@terronex/aifbin-recall 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. package/CONTRIBUTING.md +65 -0
  2. package/LICENSE +21 -0
  3. package/NOTICE +36 -0
  4. package/README.md +250 -0
  5. package/dist/cli.d.ts +6 -0
  6. package/dist/cli.d.ts.map +1 -0
  7. package/dist/cli.js +182 -0
  8. package/dist/cli.js.map +1 -0
  9. package/dist/db.d.ts +29 -0
  10. package/dist/db.d.ts.map +1 -0
  11. package/dist/db.js +252 -0
  12. package/dist/db.js.map +1 -0
  13. package/dist/embedder.d.ts +47 -0
  14. package/dist/embedder.d.ts.map +1 -0
  15. package/dist/embedder.js +152 -0
  16. package/dist/embedder.js.map +1 -0
  17. package/dist/index.d.ts +27 -0
  18. package/dist/index.d.ts.map +1 -0
  19. package/dist/index.js +45 -0
  20. package/dist/index.js.map +1 -0
  21. package/dist/indexer.d.ts +34 -0
  22. package/dist/indexer.d.ts.map +1 -0
  23. package/dist/indexer.js +246 -0
  24. package/dist/indexer.js.map +1 -0
  25. package/dist/mcp.d.ts +7 -0
  26. package/dist/mcp.d.ts.map +1 -0
  27. package/dist/mcp.js +207 -0
  28. package/dist/mcp.js.map +1 -0
  29. package/dist/search.d.ts +27 -0
  30. package/dist/search.d.ts.map +1 -0
  31. package/dist/search.js +159 -0
  32. package/dist/search.js.map +1 -0
  33. package/dist/server.d.ts +13 -0
  34. package/dist/server.d.ts.map +1 -0
  35. package/dist/server.js +250 -0
  36. package/dist/server.js.map +1 -0
  37. package/dist/types.d.ts +79 -0
  38. package/dist/types.d.ts.map +1 -0
  39. package/dist/types.js +20 -0
  40. package/dist/types.js.map +1 -0
  41. package/package.json +64 -0
  42. package/src/cli.ts +195 -0
  43. package/src/db.ts +295 -0
  44. package/src/embedder.ts +175 -0
  45. package/src/index.ts +46 -0
  46. package/src/indexer.ts +272 -0
  47. package/src/mcp.ts +244 -0
  48. package/src/search.ts +201 -0
  49. package/src/server.ts +270 -0
  50. package/src/types.ts +103 -0
  51. package/tsconfig.json +20 -0
package/src/cli.ts ADDED
@@ -0,0 +1,195 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * AIF-BIN Recall CLI - Local-first memory server for AI agents
4
+ */
5
+
6
+ import { Command } from 'commander';
7
+ import path from 'path';
8
+ import os from 'os';
9
+ import fs from 'fs';
10
+ import { EngramDB } from './db.js';
11
+ import { SearchEngine } from './search.js';
12
+ import { Indexer } from './indexer.js';
13
+ import { startServer } from './server.js';
14
+ import { startMcpServer } from './mcp.js';
15
+ import { DEFAULT_CONFIG } from './types.js';
16
+ import { Embedder, EMBEDDING_MODELS, type EmbeddingModelName } from './embedder.js';
17
+
18
+ const program = new Command();
19
+
20
+ // Resolve default DB path
21
+ function getDefaultDbPath(): string {
22
+ const configDir = path.join(os.homedir(), '.aifbin-recall');
23
+ if (!fs.existsSync(configDir)) {
24
+ fs.mkdirSync(configDir, { recursive: true });
25
+ }
26
+ return path.join(configDir, 'index.db');
27
+ }
28
+
29
+ program
30
+ .name('aifbin-recall')
31
+ .description('Local-first memory server for AI agents')
32
+ .version('0.1.0')
33
+ .option('-d, --db <path>', 'Database path', getDefaultDbPath());
34
+
35
+ // Index command
36
+ program
37
+ .command('index <directory>')
38
+ .description('Index AIF-BIN files from a directory')
39
+ .option('-c, --collection <name>', 'Collection name', 'default')
40
+ .option('-r, --recursive', 'Search subdirectories', true)
41
+ .option('--no-recursive', 'Do not search subdirectories')
42
+ .action((directory, options) => {
43
+ const dbPath = program.opts().db;
44
+ const db = new EngramDB(dbPath);
45
+ const indexer = new Indexer(db);
46
+
47
+ console.log(`Indexing ${directory} into collection "${options.collection}"...`);
48
+
49
+ const result = indexer.indexDirectory(path.resolve(directory), {
50
+ collection: options.collection,
51
+ recursive: options.recursive,
52
+ });
53
+
54
+ console.log(`\n✅ Indexed ${result.files} files (${result.chunks} chunks)`);
55
+ db.close();
56
+ });
57
+
58
+ // Serve command
59
+ program
60
+ .command('serve')
61
+ .description('Start the HTTP server')
62
+ .option('-p, --port <port>', 'Server port', String(DEFAULT_CONFIG.server.port))
63
+ .option('-h, --host <host>', 'Server host', DEFAULT_CONFIG.server.host)
64
+ .action((options) => {
65
+ const dbPath = program.opts().db;
66
+ const db = new EngramDB(dbPath);
67
+
68
+ startServer({
69
+ db,
70
+ config: {
71
+ port: parseInt(options.port, 10),
72
+ host: options.host,
73
+ },
74
+ });
75
+ });
76
+
77
+ // MCP command
78
+ program
79
+ .command('mcp')
80
+ .description('Start the MCP server for AI agent integration')
81
+ .action(async () => {
82
+ const dbPath = program.opts().db;
83
+ const db = new EngramDB(dbPath);
84
+ await startMcpServer(db);
85
+ });
86
+
87
+ // Collections command
88
+ program
89
+ .command('collections')
90
+ .description('List all collections')
91
+ .action(() => {
92
+ const dbPath = program.opts().db;
93
+ const db = new EngramDB(dbPath);
94
+ const collections = db.listCollections();
95
+
96
+ if (collections.length === 0) {
97
+ console.log('No collections found. Use "aifbin-recall index" to create one.');
98
+ } else {
99
+ console.log('Collections:\n');
100
+ for (const col of collections) {
101
+ console.log(` ${col.name}`);
102
+ console.log(` Files: ${col.fileCount}`);
103
+ console.log(` Chunks: ${col.chunkCount}`);
104
+ if (col.description) {
105
+ console.log(` Description: ${col.description}`);
106
+ }
107
+ console.log('');
108
+ }
109
+ }
110
+
111
+ db.close();
112
+ });
113
+
114
+ // Search command - now with built-in embedding!
115
+ program
116
+ .command('search <query>')
117
+ .description('Search memories using natural language')
118
+ .option('-c, --collection <name>', 'Collection to search')
119
+ .option('-n, --limit <count>', 'Number of results', '10')
120
+ .option('-m, --model <model>', 'Embedding model (minilm, mpnet, bge-small, bge-base, e5-small)', 'minilm')
121
+ .option('-e, --embedding <file>', 'JSON file containing pre-computed query embedding (optional)')
122
+ .action(async (query, options) => {
123
+ const dbPath = program.opts().db;
124
+ const db = new EngramDB(dbPath);
125
+ const search = new SearchEngine(db);
126
+
127
+ try {
128
+ let embedding: number[];
129
+
130
+ if (options.embedding) {
131
+ // Use provided embedding file
132
+ const embeddingData = JSON.parse(fs.readFileSync(options.embedding, 'utf-8'));
133
+ embedding = embeddingData.embedding || embeddingData;
134
+ if (!Array.isArray(embedding)) {
135
+ console.error('Error: embedding must be an array');
136
+ db.close();
137
+ return;
138
+ }
139
+ } else {
140
+ // Generate embedding locally
141
+ console.log(`Embedding query with ${options.model}...`);
142
+ const embedder = new Embedder(options.model as EmbeddingModelName);
143
+ embedding = await embedder.embed(query);
144
+ console.log(`Embedding generated (${embedding.length} dims)\n`);
145
+ }
146
+
147
+ const results = await search.hybridSearch(embedding, query, {
148
+ collection: options.collection,
149
+ limit: parseInt(options.limit, 10),
150
+ });
151
+
152
+ if (results.length === 0) {
153
+ console.log('No results found.');
154
+ } else {
155
+ console.log(`Found ${results.length} results:\n`);
156
+ for (const [i, r] of results.entries()) {
157
+ console.log(`[${i + 1}] Score: ${r.score.toFixed(4)} (vector: ${r.vectorScore.toFixed(4)}, keyword: ${r.keywordScore?.toFixed(4) || 'n/a'})`);
158
+ console.log(` Source: ${path.basename(r.chunk.sourceFile)}`);
159
+ console.log(` Text: ${r.chunk.text.slice(0, 200)}${r.chunk.text.length > 200 ? '...' : ''}`);
160
+ console.log('');
161
+ }
162
+ }
163
+ } catch (err) {
164
+ console.error('Error:', err);
165
+ }
166
+
167
+ db.close();
168
+ });
169
+
170
+ // Info command
171
+ program
172
+ .command('info')
173
+ .description('Show database information')
174
+ .action(() => {
175
+ const dbPath = program.opts().db;
176
+ console.log(`AIF-BIN Recall v0.1.0`);
177
+ console.log(`Database: ${dbPath}`);
178
+ console.log('');
179
+
180
+ if (fs.existsSync(dbPath)) {
181
+ const db = new EngramDB(dbPath);
182
+ const collections = db.listCollections();
183
+ const totalChunks = collections.reduce((sum, c) => sum + c.chunkCount, 0);
184
+ const totalFiles = collections.reduce((sum, c) => sum + c.fileCount, 0);
185
+
186
+ console.log(`Collections: ${collections.length}`);
187
+ console.log(`Total files: ${totalFiles}`);
188
+ console.log(`Total chunks: ${totalChunks}`);
189
+ db.close();
190
+ } else {
191
+ console.log('Database not found. Run "aifbin-recall index" to create one.');
192
+ }
193
+ });
194
+
195
+ program.parse();
package/src/db.ts ADDED
@@ -0,0 +1,295 @@
1
+ /**
2
+ * SQLite database management for AIF-BIN Recall
3
+ */
4
+
5
+ import Database from 'better-sqlite3';
6
+ import path from 'path';
7
+ import os from 'os';
8
+ import fs from 'fs';
9
+ import { DEFAULT_CONFIG, type Collection, type MemoryChunk, type EngramConfig } from './types.js';
10
+
11
+ export class EngramDB {
12
+ private db: Database.Database;
13
+
14
+ constructor(dbPath?: string) {
15
+ const resolvedPath = this.resolvePath(dbPath || DEFAULT_CONFIG.index.path);
16
+
17
+ // Ensure directory exists
18
+ const dir = path.dirname(resolvedPath);
19
+ if (!fs.existsSync(dir)) {
20
+ fs.mkdirSync(dir, { recursive: true });
21
+ }
22
+
23
+ this.db = new Database(resolvedPath);
24
+ this.init();
25
+ }
26
+
27
+ private resolvePath(p: string): string {
28
+ if (p.startsWith('~')) {
29
+ return path.join(os.homedir(), p.slice(1));
30
+ }
31
+ return path.resolve(p);
32
+ }
33
+
34
+ private init(): void {
35
+ // Enable WAL mode for better concurrency
36
+ this.db.pragma('journal_mode = WAL');
37
+
38
+ // Create collections table
39
+ this.db.exec(`
40
+ CREATE TABLE IF NOT EXISTS collections (
41
+ id TEXT PRIMARY KEY,
42
+ name TEXT UNIQUE NOT NULL,
43
+ description TEXT,
44
+ file_count INTEGER DEFAULT 0,
45
+ chunk_count INTEGER DEFAULT 0,
46
+ created_at TEXT DEFAULT CURRENT_TIMESTAMP,
47
+ updated_at TEXT DEFAULT CURRENT_TIMESTAMP
48
+ )
49
+ `);
50
+
51
+ // Create chunks table
52
+ this.db.exec(`
53
+ CREATE TABLE IF NOT EXISTS chunks (
54
+ id TEXT PRIMARY KEY,
55
+ collection_id TEXT NOT NULL,
56
+ source_file TEXT NOT NULL,
57
+ chunk_index INTEGER NOT NULL,
58
+ text TEXT NOT NULL,
59
+ embedding BLOB NOT NULL,
60
+ metadata TEXT DEFAULT '{}',
61
+ created_at TEXT DEFAULT CURRENT_TIMESTAMP,
62
+ updated_at TEXT DEFAULT CURRENT_TIMESTAMP,
63
+ FOREIGN KEY (collection_id) REFERENCES collections(id) ON DELETE CASCADE
64
+ )
65
+ `);
66
+
67
+ // Create FTS5 virtual table for keyword search
68
+ this.db.exec(`
69
+ CREATE VIRTUAL TABLE IF NOT EXISTS chunks_fts USING fts5(
70
+ text,
71
+ content='chunks',
72
+ content_rowid='rowid'
73
+ )
74
+ `);
75
+
76
+ // Triggers to keep FTS in sync
77
+ this.db.exec(`
78
+ CREATE TRIGGER IF NOT EXISTS chunks_ai AFTER INSERT ON chunks BEGIN
79
+ INSERT INTO chunks_fts(rowid, text) VALUES (NEW.rowid, NEW.text);
80
+ END
81
+ `);
82
+
83
+ this.db.exec(`
84
+ CREATE TRIGGER IF NOT EXISTS chunks_ad AFTER DELETE ON chunks BEGIN
85
+ INSERT INTO chunks_fts(chunks_fts, rowid, text) VALUES('delete', OLD.rowid, OLD.text);
86
+ END
87
+ `);
88
+
89
+ this.db.exec(`
90
+ CREATE TRIGGER IF NOT EXISTS chunks_au AFTER UPDATE ON chunks BEGIN
91
+ INSERT INTO chunks_fts(chunks_fts, rowid, text) VALUES('delete', OLD.rowid, OLD.text);
92
+ INSERT INTO chunks_fts(rowid, text) VALUES (NEW.rowid, NEW.text);
93
+ END
94
+ `);
95
+
96
+ // Indexes
97
+ this.db.exec(`CREATE INDEX IF NOT EXISTS idx_chunks_collection ON chunks(collection_id)`);
98
+ this.db.exec(`CREATE INDEX IF NOT EXISTS idx_chunks_source ON chunks(source_file)`);
99
+ }
100
+
101
+ // Collection operations
102
+ createCollection(name: string, description?: string): Collection {
103
+ const id = crypto.randomUUID();
104
+ const stmt = this.db.prepare(`
105
+ INSERT INTO collections (id, name, description)
106
+ VALUES (?, ?, ?)
107
+ `);
108
+ stmt.run(id, name, description || null);
109
+ return this.getCollection(name)!;
110
+ }
111
+
112
+ getCollection(name: string): Collection | null {
113
+ const stmt = this.db.prepare(`SELECT * FROM collections WHERE name = ?`);
114
+ const row = stmt.get(name) as any;
115
+ if (!row) return null;
116
+ return {
117
+ id: row.id,
118
+ name: row.name,
119
+ description: row.description,
120
+ fileCount: row.file_count,
121
+ chunkCount: row.chunk_count,
122
+ createdAt: new Date(row.created_at),
123
+ updatedAt: new Date(row.updated_at),
124
+ };
125
+ }
126
+
127
+ getCollectionById(id: string): Collection | null {
128
+ const stmt = this.db.prepare(`SELECT * FROM collections WHERE id = ?`);
129
+ const row = stmt.get(id) as any;
130
+ if (!row) return null;
131
+ return {
132
+ id: row.id,
133
+ name: row.name,
134
+ description: row.description,
135
+ fileCount: row.file_count,
136
+ chunkCount: row.chunk_count,
137
+ createdAt: new Date(row.created_at),
138
+ updatedAt: new Date(row.updated_at),
139
+ };
140
+ }
141
+
142
+ listCollections(): Collection[] {
143
+ const stmt = this.db.prepare(`SELECT * FROM collections ORDER BY name`);
144
+ const rows = stmt.all() as any[];
145
+ return rows.map(row => ({
146
+ id: row.id,
147
+ name: row.name,
148
+ description: row.description,
149
+ fileCount: row.file_count,
150
+ chunkCount: row.chunk_count,
151
+ createdAt: new Date(row.created_at),
152
+ updatedAt: new Date(row.updated_at),
153
+ }));
154
+ }
155
+
156
+ deleteCollection(name: string): boolean {
157
+ const stmt = this.db.prepare(`DELETE FROM collections WHERE name = ?`);
158
+ const result = stmt.run(name);
159
+ return result.changes > 0;
160
+ }
161
+
162
+ // Chunk operations
163
+ insertChunk(chunk: Omit<MemoryChunk, 'createdAt' | 'updatedAt'>): void {
164
+ const stmt = this.db.prepare(`
165
+ INSERT INTO chunks (id, collection_id, source_file, chunk_index, text, embedding, metadata)
166
+ VALUES (?, ?, ?, ?, ?, ?, ?)
167
+ `);
168
+
169
+ // Convert embedding array to binary buffer
170
+ const embeddingBuffer = Buffer.from(new Float32Array(chunk.embedding).buffer);
171
+
172
+ stmt.run(
173
+ chunk.id,
174
+ chunk.collectionId,
175
+ chunk.sourceFile,
176
+ chunk.chunkIndex,
177
+ chunk.text,
178
+ embeddingBuffer,
179
+ JSON.stringify(chunk.metadata)
180
+ );
181
+ }
182
+
183
+ insertChunks(chunks: Omit<MemoryChunk, 'createdAt' | 'updatedAt'>[]): void {
184
+ const insert = this.db.prepare(`
185
+ INSERT INTO chunks (id, collection_id, source_file, chunk_index, text, embedding, metadata)
186
+ VALUES (?, ?, ?, ?, ?, ?, ?)
187
+ `);
188
+
189
+ const insertMany = this.db.transaction((chunks: Omit<MemoryChunk, 'createdAt' | 'updatedAt'>[]) => {
190
+ for (const chunk of chunks) {
191
+ const embeddingBuffer = Buffer.from(new Float32Array(chunk.embedding).buffer);
192
+ insert.run(
193
+ chunk.id,
194
+ chunk.collectionId,
195
+ chunk.sourceFile,
196
+ chunk.chunkIndex,
197
+ chunk.text,
198
+ embeddingBuffer,
199
+ JSON.stringify(chunk.metadata)
200
+ );
201
+ }
202
+ });
203
+
204
+ insertMany(chunks);
205
+ }
206
+
207
+ getChunk(id: string): MemoryChunk | null {
208
+ const stmt = this.db.prepare(`SELECT * FROM chunks WHERE id = ?`);
209
+ const row = stmt.get(id) as any;
210
+ if (!row) return null;
211
+ return this.rowToChunk(row);
212
+ }
213
+
214
+ getChunksByCollection(collectionId: string): MemoryChunk[] {
215
+ const stmt = this.db.prepare(`SELECT * FROM chunks WHERE collection_id = ?`);
216
+ const rows = stmt.all(collectionId) as any[];
217
+ return rows.map(row => this.rowToChunk(row));
218
+ }
219
+
220
+ deleteChunksBySource(sourceFile: string): number {
221
+ const stmt = this.db.prepare(`DELETE FROM chunks WHERE source_file = ?`);
222
+ const result = stmt.run(sourceFile);
223
+ return result.changes;
224
+ }
225
+
226
+ // Search operations
227
+ getAllChunksWithEmbeddings(collectionId?: string): MemoryChunk[] {
228
+ let stmt;
229
+ if (collectionId) {
230
+ stmt = this.db.prepare(`SELECT * FROM chunks WHERE collection_id = ?`);
231
+ return (stmt.all(collectionId) as any[]).map(row => this.rowToChunk(row));
232
+ } else {
233
+ stmt = this.db.prepare(`SELECT * FROM chunks`);
234
+ return (stmt.all() as any[]).map(row => this.rowToChunk(row));
235
+ }
236
+ }
237
+
238
+ keywordSearch(query: string, collectionId?: string, limit: number = 10): { id: string; score: number }[] {
239
+ // Escape special FTS5 characters and wrap in quotes for phrase matching
240
+ const escapedQuery = '"' + query.replace(/"/g, '""') + '"';
241
+
242
+ let sql = `
243
+ SELECT chunks.id, bm25(chunks_fts) as score
244
+ FROM chunks_fts
245
+ JOIN chunks ON chunks.rowid = chunks_fts.rowid
246
+ WHERE chunks_fts MATCH ?
247
+ `;
248
+
249
+ const params: any[] = [escapedQuery];
250
+
251
+ if (collectionId) {
252
+ sql += ` AND chunks.collection_id = ?`;
253
+ params.push(collectionId);
254
+ }
255
+
256
+ sql += ` ORDER BY score LIMIT ?`;
257
+ params.push(limit);
258
+
259
+ const stmt = this.db.prepare(sql);
260
+ return stmt.all(...params) as { id: string; score: number }[];
261
+ }
262
+
263
+ updateCollectionStats(collectionId: string): void {
264
+ const stmt = this.db.prepare(`
265
+ UPDATE collections SET
266
+ file_count = (SELECT COUNT(DISTINCT source_file) FROM chunks WHERE collection_id = ?),
267
+ chunk_count = (SELECT COUNT(*) FROM chunks WHERE collection_id = ?),
268
+ updated_at = CURRENT_TIMESTAMP
269
+ WHERE id = ?
270
+ `);
271
+ stmt.run(collectionId, collectionId, collectionId);
272
+ }
273
+
274
+ private rowToChunk(row: any): MemoryChunk {
275
+ // Convert binary buffer back to Float32Array, then to regular array
276
+ const embeddingBuffer = row.embedding as Buffer;
277
+ const embedding = Array.from(new Float32Array(embeddingBuffer.buffer, embeddingBuffer.byteOffset, embeddingBuffer.length / 4));
278
+
279
+ return {
280
+ id: row.id,
281
+ collectionId: row.collection_id,
282
+ sourceFile: row.source_file,
283
+ chunkIndex: row.chunk_index,
284
+ text: row.text,
285
+ embedding,
286
+ metadata: JSON.parse(row.metadata || '{}'),
287
+ createdAt: new Date(row.created_at),
288
+ updatedAt: new Date(row.updated_at),
289
+ };
290
+ }
291
+
292
+ close(): void {
293
+ this.db.close();
294
+ }
295
+ }
@@ -0,0 +1,175 @@
1
+ /**
2
+ * Local embedding generation for AIF-BIN Recall
3
+ * Uses @xenova/transformers to run sentence-transformers models locally
4
+ */
5
+
6
+ import { pipeline, env } from '@xenova/transformers';
7
+
8
+ // Disable local model check (download from HuggingFace)
9
+ env.allowLocalModels = false;
10
+
11
+ // Supported embedding models (same as AIF-BIN Pro)
12
+ export const EMBEDDING_MODELS = {
13
+ minilm: 'Xenova/all-MiniLM-L6-v2', // 384 dims, fastest
14
+ mpnet: 'Xenova/all-mpnet-base-v2', // 768 dims, balanced
15
+ 'bge-small': 'Xenova/bge-small-en-v1.5', // 384 dims, good quality
16
+ 'bge-base': 'Xenova/bge-base-en-v1.5', // 768 dims, best quality
17
+ 'e5-small': 'Xenova/e5-small-v2', // 384 dims
18
+ } as const;
19
+
20
+ export type EmbeddingModelName = keyof typeof EMBEDDING_MODELS;
21
+
22
+ // Cache for loaded pipelines
23
+ const pipelineCache = new Map<string, any>();
24
+
25
+ /**
26
+ * Get or create an embedding pipeline for the given model
27
+ */
28
+ async function getEmbedder(model: EmbeddingModelName = 'minilm') {
29
+ const modelPath = EMBEDDING_MODELS[model];
30
+
31
+ if (!pipelineCache.has(modelPath)) {
32
+ console.log(`Loading embedding model: ${model} (${modelPath})...`);
33
+ const embedder = await pipeline('feature-extraction', modelPath);
34
+ pipelineCache.set(modelPath, embedder);
35
+ console.log(`Model loaded: ${model}`);
36
+ }
37
+
38
+ return pipelineCache.get(modelPath);
39
+ }
40
+
41
+ /**
42
+ * Mean pooling for sentence embeddings
43
+ */
44
+ function meanPool(embeddings: number[][]): number[] {
45
+ const dims = embeddings[0].length;
46
+ const result = new Array(dims).fill(0);
47
+
48
+ for (const embedding of embeddings) {
49
+ for (let i = 0; i < dims; i++) {
50
+ result[i] += embedding[i];
51
+ }
52
+ }
53
+
54
+ for (let i = 0; i < dims; i++) {
55
+ result[i] /= embeddings.length;
56
+ }
57
+
58
+ return result;
59
+ }
60
+
61
+ /**
62
+ * Normalize a vector to unit length
63
+ */
64
+ function normalize(vector: number[]): number[] {
65
+ const magnitude = Math.sqrt(vector.reduce((sum, val) => sum + val * val, 0));
66
+ if (magnitude === 0) return vector;
67
+ return vector.map(val => val / magnitude);
68
+ }
69
+
70
+ export class Embedder {
71
+ private model: EmbeddingModelName;
72
+ private embedder: any = null;
73
+ private loading: Promise<void> | null = null;
74
+
75
+ constructor(model: EmbeddingModelName = 'minilm') {
76
+ this.model = model;
77
+ }
78
+
79
+ /**
80
+ * Ensure the model is loaded
81
+ */
82
+ async init(): Promise<void> {
83
+ if (this.embedder) return;
84
+
85
+ if (!this.loading) {
86
+ this.loading = (async () => {
87
+ this.embedder = await getEmbedder(this.model);
88
+ })();
89
+ }
90
+
91
+ await this.loading;
92
+ }
93
+
94
+ /**
95
+ * Embed a single text string
96
+ */
97
+ async embed(text: string): Promise<number[]> {
98
+ await this.init();
99
+
100
+ const output = await this.embedder(text, {
101
+ pooling: 'mean',
102
+ normalize: true,
103
+ });
104
+
105
+ // Convert to regular array
106
+ return Array.from(output.data);
107
+ }
108
+
109
+ /**
110
+ * Embed multiple texts in batch
111
+ */
112
+ async embedBatch(texts: string[]): Promise<number[][]> {
113
+ await this.init();
114
+
115
+ const results: number[][] = [];
116
+
117
+ // Process in batches to avoid memory issues
118
+ const batchSize = 32;
119
+ for (let i = 0; i < texts.length; i += batchSize) {
120
+ const batch = texts.slice(i, i + batchSize);
121
+
122
+ for (const text of batch) {
123
+ const embedding = await this.embed(text);
124
+ results.push(embedding);
125
+ }
126
+ }
127
+
128
+ return results;
129
+ }
130
+
131
+ /**
132
+ * Get the embedding dimension for the current model
133
+ */
134
+ getDimension(): number {
135
+ switch (this.model) {
136
+ case 'minilm':
137
+ case 'bge-small':
138
+ case 'e5-small':
139
+ return 384;
140
+ case 'mpnet':
141
+ case 'bge-base':
142
+ return 768;
143
+ default:
144
+ return 384;
145
+ }
146
+ }
147
+
148
+ /**
149
+ * Get the model name
150
+ */
151
+ getModelName(): string {
152
+ return this.model;
153
+ }
154
+ }
155
+
156
+ // Singleton instance for default model
157
+ let defaultEmbedder: Embedder | null = null;
158
+
159
+ /**
160
+ * Get the default embedder instance
161
+ */
162
+ export function getDefaultEmbedder(model: EmbeddingModelName = 'minilm'): Embedder {
163
+ if (!defaultEmbedder || defaultEmbedder.getModelName() !== model) {
164
+ defaultEmbedder = new Embedder(model);
165
+ }
166
+ return defaultEmbedder;
167
+ }
168
+
169
+ /**
170
+ * Quick helper to embed a single query
171
+ */
172
+ export async function embedQuery(text: string, model: EmbeddingModelName = 'minilm'): Promise<number[]> {
173
+ const embedder = getDefaultEmbedder(model);
174
+ return embedder.embed(text);
175
+ }
package/src/index.ts ADDED
@@ -0,0 +1,46 @@
1
+ /**
2
+ * AIF-BIN Recall - Local-first memory server for AI agents
3
+ *
4
+ * @example
5
+ * ```typescript
6
+ * import { EngramDB, SearchEngine, Indexer } from '@terronex/engram';
7
+ *
8
+ * const db = new EngramDB('~/.engram/index.db');
9
+ * const indexer = new Indexer(db);
10
+ * const search = new SearchEngine(db);
11
+ *
12
+ * // Index AIF-BIN files
13
+ * indexer.indexDirectory('./memories', { collection: 'my-project' });
14
+ *
15
+ * // Search with embedding
16
+ * const results = await search.search(queryEmbedding, { collection: 'my-project' });
17
+ * ```
18
+ */
19
+
20
+ // Core classes
21
+ export { EngramDB } from './db.js';
22
+ export { SearchEngine, cosineSimilarity } from './search.js';
23
+ export { Indexer, parseAifBinFile, findAifBinFiles } from './indexer.js';
24
+ export { Embedder, embedQuery, getDefaultEmbedder, EMBEDDING_MODELS, type EmbeddingModelName } from './embedder.js';
25
+
26
+ // Server functions
27
+ export { createServer, startServer } from './server.js';
28
+ export { startMcpServer } from './mcp.js';
29
+
30
+ // Types
31
+ export type {
32
+ MemoryChunk,
33
+ Collection,
34
+ SearchResult,
35
+ SearchOptions,
36
+ IndexOptions,
37
+ EngramConfig,
38
+ ServerConfig,
39
+ IndexConfig,
40
+ SearchConfig,
41
+ AifBinFile,
42
+ AifBinChunk,
43
+ AifBinHeader,
44
+ } from './types.js';
45
+
46
+ export { DEFAULT_CONFIG } from './types.js';