@terronex-dev/aifbin-recall 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. package/CONTRIBUTING.md +65 -0
  2. package/LICENSE +21 -0
  3. package/NOTICE +36 -0
  4. package/README.md +250 -0
  5. package/dist/cli.d.ts +6 -0
  6. package/dist/cli.d.ts.map +1 -0
  7. package/dist/cli.js +182 -0
  8. package/dist/cli.js.map +1 -0
  9. package/dist/db.d.ts +29 -0
  10. package/dist/db.d.ts.map +1 -0
  11. package/dist/db.js +252 -0
  12. package/dist/db.js.map +1 -0
  13. package/dist/embedder.d.ts +47 -0
  14. package/dist/embedder.d.ts.map +1 -0
  15. package/dist/embedder.js +152 -0
  16. package/dist/embedder.js.map +1 -0
  17. package/dist/index.d.ts +27 -0
  18. package/dist/index.d.ts.map +1 -0
  19. package/dist/index.js +45 -0
  20. package/dist/index.js.map +1 -0
  21. package/dist/indexer.d.ts +34 -0
  22. package/dist/indexer.d.ts.map +1 -0
  23. package/dist/indexer.js +246 -0
  24. package/dist/indexer.js.map +1 -0
  25. package/dist/mcp.d.ts +7 -0
  26. package/dist/mcp.d.ts.map +1 -0
  27. package/dist/mcp.js +207 -0
  28. package/dist/mcp.js.map +1 -0
  29. package/dist/search.d.ts +27 -0
  30. package/dist/search.d.ts.map +1 -0
  31. package/dist/search.js +159 -0
  32. package/dist/search.js.map +1 -0
  33. package/dist/server.d.ts +13 -0
  34. package/dist/server.d.ts.map +1 -0
  35. package/dist/server.js +250 -0
  36. package/dist/server.js.map +1 -0
  37. package/dist/types.d.ts +79 -0
  38. package/dist/types.d.ts.map +1 -0
  39. package/dist/types.js +20 -0
  40. package/dist/types.js.map +1 -0
  41. package/package.json +64 -0
  42. package/src/cli.ts +195 -0
  43. package/src/db.ts +295 -0
  44. package/src/embedder.ts +175 -0
  45. package/src/index.ts +46 -0
  46. package/src/indexer.ts +272 -0
  47. package/src/mcp.ts +244 -0
  48. package/src/search.ts +201 -0
  49. package/src/server.ts +270 -0
  50. package/src/types.ts +103 -0
  51. package/tsconfig.json +20 -0
package/src/indexer.ts ADDED
@@ -0,0 +1,272 @@
1
+ /**
2
+ * AIF-BIN file indexer for AIF-BIN Recall
3
+ * Parses AIF-BIN v2 binary format
4
+ */
5
+
6
+ import fs from 'fs';
7
+ import path from 'path';
8
+ import { unpack } from 'msgpackr';
9
+ import type { AifBinFile, AifBinChunk, AifBinHeader, MemoryChunk, IndexOptions } from './types.js';
10
+ import { EngramDB } from './db.js';
11
+
12
+ // AIF-BIN v2 constants
13
+ const MAGIC = Buffer.from([0x41, 0x49, 0x46, 0x42, 0x49, 0x4e, 0x00, 0x01]); // "AIFBIN\x00\x01"
14
+ const HEADER_SIZE = 64;
15
+ const ABSENT_OFFSET = BigInt('0xFFFFFFFFFFFFFFFF');
16
+
17
+ // Chunk types
18
+ enum ChunkType {
19
+ TEXT = 1,
20
+ TABLE_JSON = 2,
21
+ IMAGE = 3,
22
+ AUDIO = 4,
23
+ VIDEO = 5,
24
+ CODE = 6,
25
+ }
26
+
27
+ /**
28
+ * Parse an AIF-BIN v2 file
29
+ */
30
+ export function parseAifBinFile(filePath: string): AifBinFile {
31
+ const buffer = fs.readFileSync(filePath);
32
+
33
+ if (buffer.length < HEADER_SIZE) {
34
+ throw new Error(`File too small: ${filePath}`);
35
+ }
36
+
37
+ // Verify magic bytes
38
+ const magic = buffer.subarray(0, 8);
39
+ if (!magic.equals(MAGIC)) {
40
+ throw new Error(`Invalid AIF-BIN file: bad magic bytes in ${filePath}`);
41
+ }
42
+
43
+ // Parse header (64 bytes)
44
+ const view = new DataView(buffer.buffer, buffer.byteOffset, buffer.byteLength);
45
+
46
+ const version = view.getUint32(8, true);
47
+ // padding at 12-15
48
+ const metadataOffset = view.getBigUint64(16, true);
49
+ const originalRawOffset = view.getBigUint64(24, true);
50
+ const contentChunksOffset = view.getBigUint64(32, true);
51
+ const versionsOffset = view.getBigUint64(40, true);
52
+ const footerOffset = view.getBigUint64(48, true);
53
+ const totalSize = view.getBigUint64(56, true);
54
+
55
+ const header: AifBinHeader = {
56
+ magic: new Uint8Array(magic),
57
+ version,
58
+ flags: 0,
59
+ chunkCount: 0,
60
+ embeddingDim: 0,
61
+ createdAt: 0,
62
+ modifiedAt: 0,
63
+ };
64
+
65
+ // Parse metadata section
66
+ let metadata: Record<string, unknown> = {};
67
+ if (metadataOffset !== ABSENT_OFFSET) {
68
+ const metaStart = Number(metadataOffset);
69
+ const metaLength = view.getBigUint64(metaStart, true);
70
+ const metaData = buffer.subarray(metaStart + 8, metaStart + 8 + Number(metaLength));
71
+ try {
72
+ metadata = unpack(metaData) as Record<string, unknown>;
73
+ } catch (e) {
74
+ // Metadata parse failed, continue with empty
75
+ }
76
+ }
77
+
78
+ // Parse content chunks section
79
+ const chunks: AifBinChunk[] = [];
80
+ if (contentChunksOffset !== ABSENT_OFFSET) {
81
+ const chunksStart = Number(contentChunksOffset);
82
+ const chunkCount = view.getUint32(chunksStart, true);
83
+ header.chunkCount = chunkCount;
84
+
85
+ let offset = chunksStart + 4;
86
+
87
+ for (let i = 0; i < chunkCount; i++) {
88
+ try {
89
+ const chunkType = view.getUint32(offset, true);
90
+ offset += 4;
91
+
92
+ const dataLength = Number(view.getBigUint64(offset, true));
93
+ offset += 8;
94
+
95
+ const metadataLength = Number(view.getBigUint64(offset, true));
96
+ offset += 8;
97
+
98
+ // Parse chunk metadata
99
+ let chunkMeta: Record<string, unknown> = {};
100
+ if (metadataLength > 0) {
101
+ const chunkMetaData = buffer.subarray(offset, offset + metadataLength);
102
+ try {
103
+ chunkMeta = unpack(chunkMetaData) as Record<string, unknown>;
104
+ } catch (e) {
105
+ // Skip bad metadata
106
+ }
107
+ offset += metadataLength;
108
+ }
109
+
110
+ // Parse chunk data
111
+ const chunkData = buffer.subarray(offset, offset + dataLength);
112
+ offset += dataLength;
113
+
114
+ // Extract text content based on chunk type
115
+ let text = '';
116
+ if (chunkType === ChunkType.TEXT || chunkType === ChunkType.CODE) {
117
+ text = chunkData.toString('utf-8');
118
+ } else if (chunkType === ChunkType.TABLE_JSON) {
119
+ try {
120
+ const tableData = JSON.parse(chunkData.toString('utf-8'));
121
+ text = JSON.stringify(tableData);
122
+ } catch {
123
+ text = chunkData.toString('utf-8');
124
+ }
125
+ }
126
+
127
+ // Extract embedding if present in chunk metadata
128
+ const embedding = (chunkMeta.embedding as number[]) || [];
129
+ if (embedding.length > 0 && header.embeddingDim === 0) {
130
+ header.embeddingDim = embedding.length;
131
+ }
132
+
133
+ chunks.push({
134
+ id: (chunkMeta.id as string) || crypto.randomUUID(),
135
+ text,
136
+ embedding,
137
+ metadata: chunkMeta,
138
+ });
139
+ } catch (e) {
140
+ // Skip malformed chunk
141
+ console.error(` Warning: Failed to parse chunk ${i} in ${path.basename(filePath)}`);
142
+ break;
143
+ }
144
+ }
145
+ }
146
+
147
+ // Extract timestamps from metadata if available
148
+ if (metadata.created_at) {
149
+ header.createdAt = new Date(metadata.created_at as string).getTime();
150
+ }
151
+ if (metadata.modified_at) {
152
+ header.modifiedAt = new Date(metadata.modified_at as string).getTime();
153
+ }
154
+
155
+ return {
156
+ header,
157
+ chunks,
158
+ sourcePath: filePath,
159
+ };
160
+ }
161
+
162
+ /**
163
+ * Find all .aif-bin files in a directory
164
+ */
165
+ export function findAifBinFiles(dir: string, recursive: boolean = true): string[] {
166
+ const files: string[] = [];
167
+
168
+ function scan(currentDir: string): void {
169
+ const entries = fs.readdirSync(currentDir, { withFileTypes: true });
170
+
171
+ for (const entry of entries) {
172
+ const fullPath = path.join(currentDir, entry.name);
173
+
174
+ if (entry.isDirectory() && recursive) {
175
+ scan(fullPath);
176
+ } else if (entry.isFile() && entry.name.endsWith('.aif-bin')) {
177
+ files.push(fullPath);
178
+ }
179
+ }
180
+ }
181
+
182
+ scan(dir);
183
+ return files;
184
+ }
185
+
186
+ export class Indexer {
187
+ private db: EngramDB;
188
+
189
+ constructor(db: EngramDB) {
190
+ this.db = db;
191
+ }
192
+
193
+ /**
194
+ * Index a single AIF-BIN file into a collection
195
+ */
196
+ indexFile(filePath: string, collectionId: string): number {
197
+ const aifbin = parseAifBinFile(filePath);
198
+
199
+ // Skip files with no chunks or no embeddings
200
+ const chunksWithEmbeddings = aifbin.chunks.filter(c => c.embedding.length > 0);
201
+ if (chunksWithEmbeddings.length === 0) {
202
+ console.log(` Skipped: ${path.basename(filePath)} (no embeddings)`);
203
+ return 0;
204
+ }
205
+
206
+ // Delete existing chunks from this file (for re-indexing)
207
+ this.db.deleteChunksBySource(filePath);
208
+
209
+ // Convert to MemoryChunks and insert
210
+ const chunks: Omit<MemoryChunk, 'createdAt' | 'updatedAt'>[] = chunksWithEmbeddings.map((chunk, index) => ({
211
+ id: chunk.id || crypto.randomUUID(),
212
+ collectionId,
213
+ sourceFile: filePath,
214
+ chunkIndex: index,
215
+ text: chunk.text,
216
+ embedding: chunk.embedding,
217
+ metadata: {
218
+ ...chunk.metadata,
219
+ embeddingDim: aifbin.header.embeddingDim,
220
+ originalCreatedAt: aifbin.header.createdAt,
221
+ originalModifiedAt: aifbin.header.modifiedAt,
222
+ },
223
+ }));
224
+
225
+ this.db.insertChunks(chunks);
226
+ return chunks.length;
227
+ }
228
+
229
+ /**
230
+ * Index a directory of AIF-BIN files
231
+ */
232
+ indexDirectory(dir: string, options: IndexOptions): { files: number; chunks: number } {
233
+ const { collection, recursive = true } = options;
234
+
235
+ // Get or create collection
236
+ let col = this.db.getCollection(collection);
237
+ if (!col) {
238
+ col = this.db.createCollection(collection);
239
+ }
240
+
241
+ // Find all .aif-bin files
242
+ const files = findAifBinFiles(dir, recursive);
243
+
244
+ let totalChunks = 0;
245
+ let successFiles = 0;
246
+
247
+ for (const file of files) {
248
+ try {
249
+ const count = this.indexFile(file, col.id);
250
+ if (count > 0) {
251
+ totalChunks += count;
252
+ successFiles++;
253
+ console.log(` Indexed: ${path.basename(file)} (${count} chunks)`);
254
+ }
255
+ } catch (err) {
256
+ console.error(` Failed: ${path.basename(file)} - ${err}`);
257
+ }
258
+ }
259
+
260
+ // Update collection stats
261
+ this.db.updateCollectionStats(col.id);
262
+
263
+ return { files: successFiles, chunks: totalChunks };
264
+ }
265
+
266
+ /**
267
+ * Remove a file from the index
268
+ */
269
+ removeFile(filePath: string): number {
270
+ return this.db.deleteChunksBySource(filePath);
271
+ }
272
+ }
package/src/mcp.ts ADDED
@@ -0,0 +1,244 @@
1
+ /**
2
+ * MCP (Model Context Protocol) server for AIF-BIN Recall
3
+ * Enables AI agents to query semantic memories
4
+ */
5
+
6
+ import { Server } from '@modelcontextprotocol/sdk/server/index.js';
7
+ import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
8
+ import {
9
+ CallToolRequestSchema,
10
+ ListToolsRequestSchema,
11
+ } from '@modelcontextprotocol/sdk/types.js';
12
+ import { EngramDB } from './db.js';
13
+ import { SearchEngine } from './search.js';
14
+ import { Indexer } from './indexer.js';
15
+ import { Embedder, type EmbeddingModelName } from './embedder.js';
16
+
17
+ export async function startMcpServer(db: EngramDB): Promise<void> {
18
+ const search = new SearchEngine(db);
19
+ const indexer = new Indexer(db);
20
+ const embedder = new Embedder('minilm');
21
+
22
+ const server = new Server(
23
+ {
24
+ name: 'aifbin-recall',
25
+ version: '0.1.0',
26
+ },
27
+ {
28
+ capabilities: {
29
+ tools: {},
30
+ },
31
+ }
32
+ );
33
+
34
+ // List available tools
35
+ server.setRequestHandler(ListToolsRequestSchema, async () => {
36
+ return {
37
+ tools: [
38
+ {
39
+ name: 'recall_search',
40
+ description: 'Search semantic memories using natural language. Automatically embeds your query text. Returns relevant text chunks with similarity scores.',
41
+ inputSchema: {
42
+ type: 'object',
43
+ properties: {
44
+ query: {
45
+ type: 'string',
46
+ description: 'Natural language search query (will be embedded automatically)',
47
+ },
48
+ embedding: {
49
+ type: 'array',
50
+ items: { type: 'number' },
51
+ description: 'Pre-computed query embedding vector (optional, query text is preferred)',
52
+ },
53
+ collection: {
54
+ type: 'string',
55
+ description: 'Collection name to search (optional, searches all if omitted)',
56
+ },
57
+ limit: {
58
+ type: 'number',
59
+ description: 'Maximum results to return (default: 10)',
60
+ },
61
+ },
62
+ required: ['query'],
63
+ },
64
+ },
65
+ {
66
+ name: 'recall_get',
67
+ description: 'Retrieve a specific memory chunk by ID',
68
+ inputSchema: {
69
+ type: 'object',
70
+ properties: {
71
+ id: {
72
+ type: 'string',
73
+ description: 'Chunk ID to retrieve',
74
+ },
75
+ },
76
+ required: ['id'],
77
+ },
78
+ },
79
+ {
80
+ name: 'recall_collections',
81
+ description: 'List all available memory collections',
82
+ inputSchema: {
83
+ type: 'object',
84
+ properties: {},
85
+ },
86
+ },
87
+ {
88
+ name: 'recall_index',
89
+ description: 'Index a directory of AIF-BIN files into a collection',
90
+ inputSchema: {
91
+ type: 'object',
92
+ properties: {
93
+ path: {
94
+ type: 'string',
95
+ description: 'Directory path containing .aif-bin files',
96
+ },
97
+ collection: {
98
+ type: 'string',
99
+ description: 'Collection name to index into',
100
+ },
101
+ recursive: {
102
+ type: 'boolean',
103
+ description: 'Search subdirectories (default: true)',
104
+ },
105
+ },
106
+ required: ['path', 'collection'],
107
+ },
108
+ },
109
+ ],
110
+ };
111
+ });
112
+
113
+ // Handle tool calls
114
+ server.setRequestHandler(CallToolRequestSchema, async (request) => {
115
+ const { name, arguments: args } = request.params;
116
+
117
+ try {
118
+ switch (name) {
119
+ case 'recall_search': {
120
+ const { embedding, query, collection, limit } = args as {
121
+ embedding?: number[];
122
+ query: string;
123
+ collection?: string;
124
+ limit?: number;
125
+ };
126
+
127
+ if (!query && (!embedding || !Array.isArray(embedding))) {
128
+ return {
129
+ content: [{ type: 'text', text: 'Error: query text required' }],
130
+ isError: true,
131
+ };
132
+ }
133
+
134
+ // Generate embedding from query text if not provided
135
+ let queryEmbedding: number[];
136
+ if (embedding && Array.isArray(embedding)) {
137
+ queryEmbedding = embedding;
138
+ } else {
139
+ queryEmbedding = await embedder.embed(query);
140
+ }
141
+
142
+ const options = { collection, limit: limit || 10 };
143
+ const results = await search.hybridSearch(queryEmbedding, query, options);
144
+
145
+ const formatted = results.map((r, i) =>
146
+ `[${i + 1}] Score: ${r.score.toFixed(3)}\n` +
147
+ ` Source: ${r.chunk.sourceFile}\n` +
148
+ ` Text: ${r.chunk.text.slice(0, 500)}${r.chunk.text.length > 500 ? '...' : ''}\n` +
149
+ ` ID: ${r.chunk.id}`
150
+ ).join('\n\n');
151
+
152
+ return {
153
+ content: [{
154
+ type: 'text',
155
+ text: results.length > 0
156
+ ? `Found ${results.length} results:\n\n${formatted}`
157
+ : 'No results found.',
158
+ }],
159
+ };
160
+ }
161
+
162
+ case 'recall_get': {
163
+ const { id } = args as { id: string };
164
+ const chunk = search.recall(id);
165
+
166
+ if (!chunk) {
167
+ return {
168
+ content: [{ type: 'text', text: `Chunk not found: ${id}` }],
169
+ isError: true,
170
+ };
171
+ }
172
+
173
+ return {
174
+ content: [{
175
+ type: 'text',
176
+ text: `Source: ${chunk.sourceFile}\n` +
177
+ `Chunk: ${chunk.chunkIndex}\n` +
178
+ `Created: ${chunk.createdAt.toISOString()}\n\n` +
179
+ `Text:\n${chunk.text}`,
180
+ }],
181
+ };
182
+ }
183
+
184
+ case 'recall_collections': {
185
+ const collections = db.listCollections();
186
+
187
+ if (collections.length === 0) {
188
+ return {
189
+ content: [{ type: 'text', text: 'No collections found. Use recall_index to create one.' }],
190
+ };
191
+ }
192
+
193
+ const formatted = collections.map(c =>
194
+ `• ${c.name}: ${c.chunkCount} chunks from ${c.fileCount} files` +
195
+ (c.description ? ` - ${c.description}` : '')
196
+ ).join('\n');
197
+
198
+ return {
199
+ content: [{
200
+ type: 'text',
201
+ text: `Available collections:\n\n${formatted}`,
202
+ }],
203
+ };
204
+ }
205
+
206
+ case 'recall_index': {
207
+ const { path: dirPath, collection, recursive } = args as {
208
+ path: string;
209
+ collection: string;
210
+ recursive?: boolean;
211
+ };
212
+
213
+ const result = indexer.indexDirectory(dirPath, {
214
+ collection,
215
+ recursive: recursive !== false,
216
+ });
217
+
218
+ return {
219
+ content: [{
220
+ type: 'text',
221
+ text: `Indexed ${result.files} files (${result.chunks} chunks) into collection "${collection}"`,
222
+ }],
223
+ };
224
+ }
225
+
226
+ default:
227
+ return {
228
+ content: [{ type: 'text', text: `Unknown tool: ${name}` }],
229
+ isError: true,
230
+ };
231
+ }
232
+ } catch (err) {
233
+ return {
234
+ content: [{ type: 'text', text: `Error: ${err}` }],
235
+ isError: true,
236
+ };
237
+ }
238
+ });
239
+
240
+ // Start the server
241
+ const transport = new StdioServerTransport();
242
+ await server.connect(transport);
243
+ console.error('AIF-BIN Recall MCP server running');
244
+ }
package/src/search.ts ADDED
@@ -0,0 +1,201 @@
1
+ /**
2
+ * Search functionality for AIF-BIN Recall
3
+ */
4
+
5
+ import type { MemoryChunk, SearchResult, SearchOptions, SearchConfig } from './types.js';
6
+ import { DEFAULT_CONFIG } from './types.js';
7
+ import { EngramDB } from './db.js';
8
+
9
+ /**
10
+ * Calculate cosine similarity between two vectors
11
+ */
12
+ export function cosineSimilarity(a: number[], b: number[]): number {
13
+ if (a.length !== b.length) {
14
+ throw new Error(`Vector dimension mismatch: ${a.length} vs ${b.length}`);
15
+ }
16
+
17
+ let dotProduct = 0;
18
+ let normA = 0;
19
+ let normB = 0;
20
+
21
+ for (let i = 0; i < a.length; i++) {
22
+ dotProduct += a[i] * b[i];
23
+ normA += a[i] * a[i];
24
+ normB += b[i] * b[i];
25
+ }
26
+
27
+ const magnitude = Math.sqrt(normA) * Math.sqrt(normB);
28
+ if (magnitude === 0) return 0;
29
+
30
+ return dotProduct / magnitude;
31
+ }
32
+
33
+ /**
34
+ * Normalize BM25 scores to 0-1 range
35
+ */
36
+ function normalizeBM25Scores(scores: { id: string; score: number }[]): Map<string, number> {
37
+ if (scores.length === 0) return new Map();
38
+
39
+ // BM25 scores are negative in SQLite FTS5 (lower is better)
40
+ const minScore = Math.min(...scores.map(s => s.score));
41
+ const maxScore = Math.max(...scores.map(s => s.score));
42
+ const range = maxScore - minScore || 1;
43
+
44
+ const normalized = new Map<string, number>();
45
+ for (const { id, score } of scores) {
46
+ // Invert and normalize: best match (lowest BM25) becomes highest score
47
+ normalized.set(id, 1 - (score - minScore) / range);
48
+ }
49
+
50
+ return normalized;
51
+ }
52
+
53
+ export class SearchEngine {
54
+ private db: EngramDB;
55
+ private config: SearchConfig;
56
+
57
+ constructor(db: EngramDB, config?: Partial<SearchConfig>) {
58
+ this.db = db;
59
+ this.config = { ...DEFAULT_CONFIG.search, ...config };
60
+ }
61
+
62
+ /**
63
+ * Perform semantic search using query embedding
64
+ */
65
+ async search(
66
+ queryEmbedding: number[],
67
+ options: SearchOptions = {}
68
+ ): Promise<SearchResult[]> {
69
+ const {
70
+ collection,
71
+ limit = this.config.defaultLimit,
72
+ threshold = 0.0,
73
+ hybridWeight = this.config.hybridWeight,
74
+ } = options;
75
+
76
+ // Get collection ID if name provided
77
+ let collectionId: string | undefined;
78
+ if (collection) {
79
+ const col = this.db.getCollection(collection);
80
+ if (!col) {
81
+ throw new Error(`Collection not found: ${collection}`);
82
+ }
83
+ collectionId = col.id;
84
+ }
85
+
86
+ // Get all chunks with embeddings
87
+ const chunks = this.db.getAllChunksWithEmbeddings(collectionId);
88
+
89
+ if (chunks.length === 0) {
90
+ return [];
91
+ }
92
+
93
+ // Calculate vector similarity scores
94
+ const vectorScores: { chunk: MemoryChunk; score: number }[] = [];
95
+ for (const chunk of chunks) {
96
+ const score = cosineSimilarity(queryEmbedding, chunk.embedding);
97
+ if (score >= threshold) {
98
+ vectorScores.push({ chunk, score });
99
+ }
100
+ }
101
+
102
+ // Sort by vector score
103
+ vectorScores.sort((a, b) => b.score - a.score);
104
+
105
+ // If pure vector search (hybridWeight = 1), return top results
106
+ if (hybridWeight >= 1.0) {
107
+ return vectorScores.slice(0, limit).map(({ chunk, score }) => ({
108
+ chunk,
109
+ score,
110
+ vectorScore: score,
111
+ }));
112
+ }
113
+
114
+ // For hybrid search, we need the query text (not available here)
115
+ // This will be handled at a higher level
116
+ return vectorScores.slice(0, limit).map(({ chunk, score }) => ({
117
+ chunk,
118
+ score,
119
+ vectorScore: score,
120
+ }));
121
+ }
122
+
123
+ /**
124
+ * Perform hybrid search combining vector similarity and keyword matching
125
+ */
126
+ async hybridSearch(
127
+ queryEmbedding: number[],
128
+ queryText: string,
129
+ options: SearchOptions = {}
130
+ ): Promise<SearchResult[]> {
131
+ const {
132
+ collection,
133
+ limit = this.config.defaultLimit,
134
+ threshold = 0.0,
135
+ hybridWeight = this.config.hybridWeight,
136
+ } = options;
137
+
138
+ // Get collection ID
139
+ let collectionId: string | undefined;
140
+ if (collection) {
141
+ const col = this.db.getCollection(collection);
142
+ if (!col) {
143
+ throw new Error(`Collection not found: ${collection}`);
144
+ }
145
+ collectionId = col.id;
146
+ }
147
+
148
+ // Get all chunks
149
+ const chunks = this.db.getAllChunksWithEmbeddings(collectionId);
150
+ if (chunks.length === 0) return [];
151
+
152
+ // Calculate vector scores
153
+ const vectorScoreMap = new Map<string, number>();
154
+ for (const chunk of chunks) {
155
+ const score = cosineSimilarity(queryEmbedding, chunk.embedding);
156
+ vectorScoreMap.set(chunk.id, score);
157
+ }
158
+
159
+ // Get keyword scores (BM25)
160
+ const keywordResults = this.db.keywordSearch(queryText, collectionId, limit * 3);
161
+ const keywordScoreMap = normalizeBM25Scores(keywordResults);
162
+
163
+ // Combine scores
164
+ const results: SearchResult[] = [];
165
+ const chunkMap = new Map(chunks.map(c => [c.id, c]));
166
+
167
+ // Score all chunks that have either vector or keyword hits
168
+ const allIds = new Set([...vectorScoreMap.keys(), ...keywordScoreMap.keys()]);
169
+
170
+ for (const id of allIds) {
171
+ const chunk = chunkMap.get(id);
172
+ if (!chunk) continue;
173
+
174
+ const vectorScore = vectorScoreMap.get(id) || 0;
175
+ const keywordScore = keywordScoreMap.get(id) || 0;
176
+
177
+ // Weighted combination
178
+ const combinedScore = hybridWeight * vectorScore + (1 - hybridWeight) * keywordScore;
179
+
180
+ if (combinedScore >= threshold) {
181
+ results.push({
182
+ chunk,
183
+ score: combinedScore,
184
+ vectorScore,
185
+ keywordScore,
186
+ });
187
+ }
188
+ }
189
+
190
+ // Sort by combined score and limit
191
+ results.sort((a, b) => b.score - a.score);
192
+ return results.slice(0, limit);
193
+ }
194
+
195
+ /**
196
+ * Recall a specific chunk by ID
197
+ */
198
+ recall(id: string): MemoryChunk | null {
199
+ return this.db.getChunk(id);
200
+ }
201
+ }