cntx-ui 3.0.7 → 3.0.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/bin/cntx-ui.js +70 -0
- package/dist/lib/agent-runtime.js +269 -0
- package/dist/lib/agent-tools.js +162 -0
- package/dist/lib/api-router.js +387 -0
- package/dist/lib/bundle-manager.js +236 -0
- package/dist/lib/configuration-manager.js +230 -0
- package/dist/lib/database-manager.js +277 -0
- package/dist/lib/file-system-manager.js +305 -0
- package/dist/lib/function-level-chunker.js +144 -0
- package/dist/lib/heuristics-manager.js +491 -0
- package/dist/lib/mcp-server.js +159 -0
- package/dist/lib/mcp-transport.js +10 -0
- package/dist/lib/semantic-splitter.js +335 -0
- package/dist/lib/simple-vector-store.js +98 -0
- package/dist/lib/treesitter-semantic-chunker.js +277 -0
- package/dist/lib/websocket-manager.js +268 -0
- package/dist/server.js +225 -0
- package/package.json +18 -8
- package/bin/cntx-ui-mcp.sh +0 -3
- package/bin/cntx-ui.js +0 -123
- package/lib/agent-runtime.js +0 -371
- package/lib/agent-tools.js +0 -370
- package/lib/api-router.js +0 -1026
- package/lib/bundle-manager.js +0 -326
- package/lib/configuration-manager.js +0 -760
- package/lib/database-manager.js +0 -397
- package/lib/file-system-manager.js +0 -489
- package/lib/function-level-chunker.js +0 -406
- package/lib/heuristics-manager.js +0 -529
- package/lib/mcp-server.js +0 -1380
- package/lib/mcp-transport.js +0 -97
- package/lib/semantic-splitter.js +0 -304
- package/lib/simple-vector-store.js +0 -108
- package/lib/treesitter-semantic-chunker.js +0 -1485
- package/lib/websocket-manager.js +0 -470
- package/server.js +0 -687
|
@@ -0,0 +1,335 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Semantic Splitter - High-Performance AST-based Chunker
|
|
3
|
+
* Uses tree-sitter for surgical, function-level code extraction
|
|
4
|
+
* Integrated with HeuristicsManager for intelligent categorization
|
|
5
|
+
*/
|
|
6
|
+
import { readFileSync, existsSync } from 'fs';
|
|
7
|
+
import { join, extname } from 'path';
|
|
8
|
+
import Parser from 'tree-sitter';
|
|
9
|
+
import JavaScript from 'tree-sitter-javascript';
|
|
10
|
+
import TypeScript from 'tree-sitter-typescript';
|
|
11
|
+
import Rust from 'tree-sitter-rust';
|
|
12
|
+
import HeuristicsManager from './heuristics-manager.js';
|
|
13
|
+
export default class SemanticSplitter {
|
|
14
|
+
options;
|
|
15
|
+
parsers;
|
|
16
|
+
heuristicsManager;
|
|
17
|
+
bundleConfig;
|
|
18
|
+
constructor(options = {}) {
|
|
19
|
+
this.options = {
|
|
20
|
+
maxChunkSize: 3000, // Max chars per chunk
|
|
21
|
+
includeContext: true, // Include imports/types needed
|
|
22
|
+
minFunctionSize: 40, // Skip tiny functions
|
|
23
|
+
...options
|
|
24
|
+
};
|
|
25
|
+
// Initialize tree-sitter parsers
|
|
26
|
+
this.parsers = {
|
|
27
|
+
javascript: new Parser(),
|
|
28
|
+
typescript: new Parser(),
|
|
29
|
+
tsx: new Parser(),
|
|
30
|
+
rust: new Parser()
|
|
31
|
+
};
|
|
32
|
+
this.parsers.javascript.setLanguage(JavaScript);
|
|
33
|
+
this.parsers.typescript.setLanguage(TypeScript.typescript);
|
|
34
|
+
this.parsers.tsx.setLanguage(TypeScript.tsx);
|
|
35
|
+
this.parsers.rust.setLanguage(Rust);
|
|
36
|
+
this.heuristicsManager = new HeuristicsManager();
|
|
37
|
+
}
|
|
38
|
+
getParser(filePath) {
|
|
39
|
+
const ext = extname(filePath);
|
|
40
|
+
switch (ext) {
|
|
41
|
+
case '.ts': return this.parsers.typescript;
|
|
42
|
+
case '.tsx': return this.parsers.tsx;
|
|
43
|
+
case '.rs': return this.parsers.rust;
|
|
44
|
+
default: return this.parsers.javascript;
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
/**
|
|
48
|
+
* Main entry point - extract semantic chunks from project
|
|
49
|
+
* Now accepts a pre-filtered list of files from FileSystemManager
|
|
50
|
+
*/
|
|
51
|
+
async extractSemanticChunks(projectPath, files = [], bundleConfig = null) {
|
|
52
|
+
console.log('🔪 Starting surgical semantic splitting via tree-sitter...');
|
|
53
|
+
console.log(`📂 Project path: ${projectPath}`);
|
|
54
|
+
this.bundleConfig = bundleConfig;
|
|
55
|
+
console.log(`📁 Processing ${files.length} filtered files`);
|
|
56
|
+
const allChunks = [];
|
|
57
|
+
for (const filePath of files) {
|
|
58
|
+
try {
|
|
59
|
+
const fileChunks = this.processFile(filePath, projectPath);
|
|
60
|
+
allChunks.push(...fileChunks);
|
|
61
|
+
}
|
|
62
|
+
catch (error) {
|
|
63
|
+
console.warn(`Failed to process ${filePath}: ${error.message}`);
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
console.log(`🧩 Created ${allChunks.length} semantic chunks across project`);
|
|
67
|
+
return {
|
|
68
|
+
summary: {
|
|
69
|
+
totalFiles: files.length,
|
|
70
|
+
totalChunks: allChunks.length,
|
|
71
|
+
averageSize: allChunks.length > 0 ? allChunks.reduce((sum, c) => sum + (c.code || '').length, 0) / allChunks.length : 0
|
|
72
|
+
},
|
|
73
|
+
chunks: allChunks
|
|
74
|
+
};
|
|
75
|
+
}
|
|
76
|
+
processFile(relativePath, projectPath) {
|
|
77
|
+
const fullPath = join(projectPath, relativePath);
|
|
78
|
+
if (!existsSync(fullPath))
|
|
79
|
+
return [];
|
|
80
|
+
const content = readFileSync(fullPath, 'utf8');
|
|
81
|
+
const parser = this.getParser(relativePath);
|
|
82
|
+
const tree = parser.parse(content);
|
|
83
|
+
const root = tree.rootNode;
|
|
84
|
+
const elements = {
|
|
85
|
+
functions: [],
|
|
86
|
+
types: [],
|
|
87
|
+
imports: this.extractImports(root, content, relativePath)
|
|
88
|
+
};
|
|
89
|
+
// Traverse AST for functions and types
|
|
90
|
+
this.traverse(root, content, relativePath, elements);
|
|
91
|
+
// Create chunks from elements
|
|
92
|
+
return this.createChunks(elements, content, relativePath);
|
|
93
|
+
}
|
|
94
|
+
traverse(node, content, filePath, elements) {
|
|
95
|
+
// Detect Function Declarations (JS/TS)
|
|
96
|
+
if (node.type === 'function_declaration' || node.type === 'method_definition' || node.type === 'arrow_function') {
|
|
97
|
+
const func = this.mapFunctionNode(node, content, filePath);
|
|
98
|
+
if (func && func.code.length > this.options.minFunctionSize) {
|
|
99
|
+
elements.functions.push(func);
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
// Detect Rust function items
|
|
103
|
+
if (node.type === 'function_item') {
|
|
104
|
+
const func = this.mapFunctionNode(node, content, filePath);
|
|
105
|
+
if (func && func.code.length > this.options.minFunctionSize) {
|
|
106
|
+
elements.functions.push(func);
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
// Detect Type Definitions (TS)
|
|
110
|
+
if (node.type === 'interface_declaration' || node.type === 'type_alias_declaration') {
|
|
111
|
+
const typeDef = this.mapTypeNode(node, content, filePath);
|
|
112
|
+
if (typeDef)
|
|
113
|
+
elements.types.push(typeDef);
|
|
114
|
+
}
|
|
115
|
+
// Detect Rust type definitions
|
|
116
|
+
if (node.type === 'struct_item' || node.type === 'enum_item' || node.type === 'trait_item') {
|
|
117
|
+
const typeDef = this.mapTypeNode(node, content, filePath);
|
|
118
|
+
if (typeDef)
|
|
119
|
+
elements.types.push(typeDef);
|
|
120
|
+
}
|
|
121
|
+
// Detect Rust impl blocks — traverse into body for methods
|
|
122
|
+
if (node.type === 'impl_item') {
|
|
123
|
+
const body = node.childForFieldName('body');
|
|
124
|
+
if (body) {
|
|
125
|
+
for (let i = 0; i < body.namedChildCount; i++) {
|
|
126
|
+
const child = body.namedChild(i);
|
|
127
|
+
if (child)
|
|
128
|
+
this.traverse(child, content, filePath, elements);
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
return; // Don't recurse again below
|
|
132
|
+
}
|
|
133
|
+
// Recurse unless we've already captured the block (like a function body)
|
|
134
|
+
if (node.type !== 'function_declaration' && node.type !== 'method_definition' && node.type !== 'function_item') {
|
|
135
|
+
for (let i = 0; i < node.namedChildCount; i++) {
|
|
136
|
+
const child = node.namedChild(i);
|
|
137
|
+
if (child)
|
|
138
|
+
this.traverse(child, content, filePath, elements);
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
mapFunctionNode(node, content, filePath) {
|
|
143
|
+
let name = 'anonymous';
|
|
144
|
+
// Find name identifier based on node type
|
|
145
|
+
if (node.type === 'function_declaration' || node.type === 'method_definition' || node.type === 'function_item') {
|
|
146
|
+
const nameNode = node.childForFieldName('name');
|
|
147
|
+
if (nameNode)
|
|
148
|
+
name = content.slice(nameNode.startIndex, nameNode.endIndex);
|
|
149
|
+
}
|
|
150
|
+
else if (node.type === 'arrow_function') {
|
|
151
|
+
// 1. Check if assigned to a variable: const foo = () => {}
|
|
152
|
+
const parent = node.parent;
|
|
153
|
+
if (parent && parent.type === 'variable_declarator') {
|
|
154
|
+
const nameNode = parent.childForFieldName('name');
|
|
155
|
+
if (nameNode)
|
|
156
|
+
name = content.slice(nameNode.startIndex, nameNode.endIndex);
|
|
157
|
+
}
|
|
158
|
+
// 2. Check if part of an object property: { foo: () => {} }
|
|
159
|
+
else if (parent && parent.type === 'pair') {
|
|
160
|
+
const keyNode = parent.childForFieldName('key');
|
|
161
|
+
if (keyNode)
|
|
162
|
+
name = content.slice(keyNode.startIndex, keyNode.endIndex);
|
|
163
|
+
}
|
|
164
|
+
// 3. Check if part of an assignment: this.foo = () => {}
|
|
165
|
+
else if (parent && parent.type === 'assignment_expression') {
|
|
166
|
+
const leftNode = parent.childForFieldName('left');
|
|
167
|
+
if (leftNode)
|
|
168
|
+
name = content.slice(leftNode.startIndex, leftNode.endIndex);
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
const code = content.slice(node.startIndex, node.endIndex);
|
|
172
|
+
return {
|
|
173
|
+
name,
|
|
174
|
+
type: node.type,
|
|
175
|
+
filePath,
|
|
176
|
+
startLine: node.startPosition.row + 1,
|
|
177
|
+
code,
|
|
178
|
+
isExported: this.isExported(node),
|
|
179
|
+
isAsync: code.includes('async')
|
|
180
|
+
};
|
|
181
|
+
}
|
|
182
|
+
mapTypeNode(node, content, filePath) {
|
|
183
|
+
const nameNode = node.childForFieldName('name');
|
|
184
|
+
if (!nameNode)
|
|
185
|
+
return null;
|
|
186
|
+
return {
|
|
187
|
+
name: content.slice(nameNode.startIndex, nameNode.endIndex),
|
|
188
|
+
type: node.type,
|
|
189
|
+
filePath,
|
|
190
|
+
startLine: node.startPosition.row + 1,
|
|
191
|
+
code: content.slice(node.startIndex, node.endIndex),
|
|
192
|
+
isExported: this.isExported(node)
|
|
193
|
+
};
|
|
194
|
+
}
|
|
195
|
+
extractImports(root, content, filePath) {
|
|
196
|
+
const imports = [];
|
|
197
|
+
// Simple traversal for import/use statements
|
|
198
|
+
for (let i = 0; i < root.namedChildCount; i++) {
|
|
199
|
+
const node = root.namedChild(i);
|
|
200
|
+
if (node && (node.type === 'import_statement' || node.type === 'use_declaration')) {
|
|
201
|
+
imports.push({
|
|
202
|
+
statement: content.slice(node.startIndex, node.endIndex),
|
|
203
|
+
filePath
|
|
204
|
+
});
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
return imports;
|
|
208
|
+
}
|
|
209
|
+
isExported(node) {
|
|
210
|
+
// Rust: check for visibility_modifier (pub) as direct child
|
|
211
|
+
for (let i = 0; i < node.namedChildCount; i++) {
|
|
212
|
+
const child = node.namedChild(i);
|
|
213
|
+
if (child && child.type === 'visibility_modifier')
|
|
214
|
+
return true;
|
|
215
|
+
}
|
|
216
|
+
// JS/TS: check for export_statement ancestor or parent
|
|
217
|
+
let parent = node.parent;
|
|
218
|
+
while (parent) {
|
|
219
|
+
if (parent.type === 'export_statement' || parent.type === 'export_declaration') {
|
|
220
|
+
return true;
|
|
221
|
+
}
|
|
222
|
+
parent = parent.parent;
|
|
223
|
+
}
|
|
224
|
+
return false;
|
|
225
|
+
}
|
|
226
|
+
createChunks(elements, content, filePath) {
|
|
227
|
+
const chunks = [];
|
|
228
|
+
const pathParts = filePath.toLowerCase().split(/[\\\/]/);
|
|
229
|
+
for (const func of elements.functions) {
|
|
230
|
+
// Pass full context to heuristics
|
|
231
|
+
const heuristicContext = {
|
|
232
|
+
...func,
|
|
233
|
+
includes: elements,
|
|
234
|
+
pathParts
|
|
235
|
+
};
|
|
236
|
+
const purpose = this.heuristicsManager.determinePurpose(heuristicContext);
|
|
237
|
+
const businessDomain = this.heuristicsManager.inferBusinessDomains(heuristicContext);
|
|
238
|
+
const technicalPatterns = this.heuristicsManager.inferTechnicalPatterns(heuristicContext);
|
|
239
|
+
const tags = this.generateTags(func);
|
|
240
|
+
let chunkCode = '';
|
|
241
|
+
if (this.options.includeContext) {
|
|
242
|
+
const relevantImports = elements.imports
|
|
243
|
+
.filter(imp => this.isImportRelevant(imp.statement, func.code))
|
|
244
|
+
.map(imp => imp.statement)
|
|
245
|
+
.join('\n');
|
|
246
|
+
if (relevantImports)
|
|
247
|
+
chunkCode += relevantImports + '\n\n';
|
|
248
|
+
}
|
|
249
|
+
chunkCode += func.code;
|
|
250
|
+
chunks.push({
|
|
251
|
+
id: `${filePath}:${func.name}:${func.startLine}`,
|
|
252
|
+
name: func.name,
|
|
253
|
+
filePath,
|
|
254
|
+
type: 'function',
|
|
255
|
+
subtype: func.type,
|
|
256
|
+
code: chunkCode,
|
|
257
|
+
startLine: func.startLine,
|
|
258
|
+
complexity: this.calculateComplexity(func.code),
|
|
259
|
+
purpose,
|
|
260
|
+
tags,
|
|
261
|
+
businessDomain,
|
|
262
|
+
technicalPatterns,
|
|
263
|
+
includes: {
|
|
264
|
+
imports: elements.imports.map(i => i.statement),
|
|
265
|
+
types: elements.types.map(t => t.name)
|
|
266
|
+
},
|
|
267
|
+
bundles: this.getFileBundles(filePath)
|
|
268
|
+
});
|
|
269
|
+
}
|
|
270
|
+
return chunks;
|
|
271
|
+
}
|
|
272
|
+
isImportRelevant(importStatement, functionCode) {
|
|
273
|
+
// Rust use statements: use std::collections::HashMap;
|
|
274
|
+
const useMatch = importStatement.match(/^use\s+(.+);?\s*$/);
|
|
275
|
+
if (useMatch) {
|
|
276
|
+
const path = useMatch[1];
|
|
277
|
+
// Extract the last segment (the actual imported name)
|
|
278
|
+
const segments = path.replace(/[{}]/g, '').split('::');
|
|
279
|
+
const lastSegment = segments[segments.length - 1].trim();
|
|
280
|
+
return functionCode.includes(lastSegment);
|
|
281
|
+
}
|
|
282
|
+
// JS/TS import statements
|
|
283
|
+
const match = importStatement.match(/import\s+(?:\{([^}]+)\}|(\w+))/i);
|
|
284
|
+
if (!match)
|
|
285
|
+
return false;
|
|
286
|
+
const importedNames = match[1] ? match[1].split(',').map(n => n.trim()) : [match[2]];
|
|
287
|
+
return importedNames.some(name => functionCode.includes(name));
|
|
288
|
+
}
|
|
289
|
+
calculateComplexity(code) {
|
|
290
|
+
const indicators = ['if', 'else', 'for', 'while', 'switch', 'case', 'catch', '?', '&&', '||', 'match', 'loop', 'unsafe', 'unwrap', 'expect'];
|
|
291
|
+
let score = 1;
|
|
292
|
+
indicators.forEach(ind => {
|
|
293
|
+
// Escape special regex characters
|
|
294
|
+
const escaped = ind.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
|
295
|
+
// Only use word boundaries for word-like indicators
|
|
296
|
+
const pattern = /^[a-zA-Z]+$/.test(ind) ? `\\b${escaped}\\b` : escaped;
|
|
297
|
+
const regex = new RegExp(pattern, 'g');
|
|
298
|
+
score += (code.match(regex) || []).length;
|
|
299
|
+
});
|
|
300
|
+
return {
|
|
301
|
+
score,
|
|
302
|
+
level: score < 5 ? 'low' : score < 15 ? 'medium' : 'high'
|
|
303
|
+
};
|
|
304
|
+
}
|
|
305
|
+
generateTags(func) {
|
|
306
|
+
const tags = [func.type];
|
|
307
|
+
if (func.isExported)
|
|
308
|
+
tags.push('exported');
|
|
309
|
+
if (func.isAsync)
|
|
310
|
+
tags.push('async');
|
|
311
|
+
if (func.code.length > 2000)
|
|
312
|
+
tags.push('large');
|
|
313
|
+
return tags;
|
|
314
|
+
}
|
|
315
|
+
getFileBundles(filePath) {
|
|
316
|
+
if (!this.bundleConfig?.bundles)
|
|
317
|
+
return [];
|
|
318
|
+
const bundles = [];
|
|
319
|
+
for (const [name, patterns] of Object.entries(this.bundleConfig.bundles)) {
|
|
320
|
+
if (name === 'master')
|
|
321
|
+
continue;
|
|
322
|
+
for (const pattern of patterns) {
|
|
323
|
+
if (this.matchesPattern(filePath, pattern)) {
|
|
324
|
+
bundles.push(name);
|
|
325
|
+
break;
|
|
326
|
+
}
|
|
327
|
+
}
|
|
328
|
+
}
|
|
329
|
+
return bundles;
|
|
330
|
+
}
|
|
331
|
+
matchesPattern(filePath, pattern) {
|
|
332
|
+
const regex = pattern.replace(/\*\*/g, '.*').replace(/\*/g, '[^/]*').replace(/\./g, '\\.');
|
|
333
|
+
return new RegExp(`^${regex}$`).test(filePath);
|
|
334
|
+
}
|
|
335
|
+
}
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Simple Vector Store with SQLite Persistence
|
|
3
|
+
* Powered by Transformers.js for local embeddings
|
|
4
|
+
* Persists vectors to SQLite for instant startup
|
|
5
|
+
*/
|
|
6
|
+
import { pipeline } from '@xenova/transformers';
|
|
7
|
+
export default class SimpleVectorStore {
|
|
8
|
+
db;
|
|
9
|
+
modelName;
|
|
10
|
+
pipe;
|
|
11
|
+
initialized;
|
|
12
|
+
constructor(databaseManager, options = {}) {
|
|
13
|
+
this.db = databaseManager;
|
|
14
|
+
this.modelName = options.modelName || 'Xenova/all-MiniLM-L6-v2';
|
|
15
|
+
this.pipe = null;
|
|
16
|
+
this.initialized = false;
|
|
17
|
+
}
|
|
18
|
+
async init() {
|
|
19
|
+
if (this.initialized)
|
|
20
|
+
return;
|
|
21
|
+
console.log(`🤖 Initializing local RAG engine (${this.modelName})...`);
|
|
22
|
+
this.pipe = await pipeline('feature-extraction', this.modelName);
|
|
23
|
+
this.initialized = true;
|
|
24
|
+
console.log('✅ Local RAG engine ready');
|
|
25
|
+
}
|
|
26
|
+
async generateEmbedding(text) {
|
|
27
|
+
await this.init();
|
|
28
|
+
const output = await this.pipe(text, { pooling: 'mean', normalize: true });
|
|
29
|
+
return new Float32Array(output.data);
|
|
30
|
+
}
|
|
31
|
+
/**
|
|
32
|
+
* Upsert a chunk's embedding to persistence
|
|
33
|
+
*/
|
|
34
|
+
async upsertChunk(chunk) {
|
|
35
|
+
const chunkId = chunk.id;
|
|
36
|
+
// Check if we already have it in DB
|
|
37
|
+
const existing = this.db.getEmbedding(chunkId);
|
|
38
|
+
if (existing)
|
|
39
|
+
return existing;
|
|
40
|
+
// Generate new embedding
|
|
41
|
+
const textToEmbed = `${chunk.name} ${chunk.purpose} ${chunk.code}`;
|
|
42
|
+
const embedding = await this.generateEmbedding(textToEmbed);
|
|
43
|
+
// Save to SQLite
|
|
44
|
+
this.db.saveEmbedding(chunkId, embedding, this.modelName);
|
|
45
|
+
return embedding;
|
|
46
|
+
}
|
|
47
|
+
/**
|
|
48
|
+
* Semantic Search across persistent embeddings
|
|
49
|
+
*/
|
|
50
|
+
async search(query, options = {}) {
|
|
51
|
+
const { limit = 10, threshold = 0.5 } = options;
|
|
52
|
+
const queryEmbedding = await this.generateEmbedding(query);
|
|
53
|
+
// Load all embeddings from DB
|
|
54
|
+
const rows = this.db.db.prepare('SELECT chunk_id, embedding FROM vector_embeddings WHERE model_name = ?').all(this.modelName);
|
|
55
|
+
const results = [];
|
|
56
|
+
const batchSize = 100;
|
|
57
|
+
// Process in batches to prevent blocking the event loop
|
|
58
|
+
for (let i = 0; i < rows.length; i += batchSize) {
|
|
59
|
+
const batch = rows.slice(i, i + batchSize);
|
|
60
|
+
for (const row of batch) {
|
|
61
|
+
const embedding = new Float32Array(row.embedding.buffer, row.embedding.byteOffset, row.embedding.byteLength / 4);
|
|
62
|
+
const similarity = this.cosineSimilarity(queryEmbedding, embedding);
|
|
63
|
+
if (similarity >= threshold) {
|
|
64
|
+
results.push({
|
|
65
|
+
chunkId: row.chunk_id,
|
|
66
|
+
similarity
|
|
67
|
+
});
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
// Give other tasks a chance to run
|
|
71
|
+
if (i + batchSize < rows.length) {
|
|
72
|
+
await new Promise(resolve => setImmediate(resolve));
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
// Sort by similarity and get chunk details
|
|
76
|
+
return results
|
|
77
|
+
.sort((a, b) => b.similarity - a.similarity)
|
|
78
|
+
.slice(0, limit)
|
|
79
|
+
.map(res => {
|
|
80
|
+
const chunkRow = this.db.db.prepare('SELECT * FROM semantic_chunks WHERE id = ?').get(res.chunkId);
|
|
81
|
+
return {
|
|
82
|
+
...this.db.mapChunkRow(chunkRow),
|
|
83
|
+
similarity: res.similarity
|
|
84
|
+
};
|
|
85
|
+
});
|
|
86
|
+
}
|
|
87
|
+
cosineSimilarity(vecA, vecB) {
|
|
88
|
+
let dotProduct = 0;
|
|
89
|
+
let normA = 0;
|
|
90
|
+
let normB = 0;
|
|
91
|
+
for (let i = 0; i < vecA.length; i++) {
|
|
92
|
+
dotProduct += vecA[i] * vecB[i];
|
|
93
|
+
normA += vecA[i] * vecA[i];
|
|
94
|
+
normB += vecB[i] * vecB[i];
|
|
95
|
+
}
|
|
96
|
+
return dotProduct / (Math.sqrt(normA) * Math.sqrt(normB));
|
|
97
|
+
}
|
|
98
|
+
}
|