@terronex/aifbin-recall 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CONTRIBUTING.md +65 -0
- package/LICENSE +21 -0
- package/NOTICE +36 -0
- package/README.md +250 -0
- package/dist/cli.d.ts +6 -0
- package/dist/cli.d.ts.map +1 -0
- package/dist/cli.js +182 -0
- package/dist/cli.js.map +1 -0
- package/dist/db.d.ts +29 -0
- package/dist/db.d.ts.map +1 -0
- package/dist/db.js +252 -0
- package/dist/db.js.map +1 -0
- package/dist/embedder.d.ts +47 -0
- package/dist/embedder.d.ts.map +1 -0
- package/dist/embedder.js +152 -0
- package/dist/embedder.js.map +1 -0
- package/dist/index.d.ts +27 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +45 -0
- package/dist/index.js.map +1 -0
- package/dist/indexer.d.ts +34 -0
- package/dist/indexer.d.ts.map +1 -0
- package/dist/indexer.js +246 -0
- package/dist/indexer.js.map +1 -0
- package/dist/mcp.d.ts +7 -0
- package/dist/mcp.d.ts.map +1 -0
- package/dist/mcp.js +207 -0
- package/dist/mcp.js.map +1 -0
- package/dist/search.d.ts +27 -0
- package/dist/search.d.ts.map +1 -0
- package/dist/search.js +159 -0
- package/dist/search.js.map +1 -0
- package/dist/server.d.ts +13 -0
- package/dist/server.d.ts.map +1 -0
- package/dist/server.js +250 -0
- package/dist/server.js.map +1 -0
- package/dist/types.d.ts +79 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +20 -0
- package/dist/types.js.map +1 -0
- package/package.json +64 -0
- package/src/cli.ts +195 -0
- package/src/db.ts +295 -0
- package/src/embedder.ts +175 -0
- package/src/index.ts +46 -0
- package/src/indexer.ts +272 -0
- package/src/mcp.ts +244 -0
- package/src/search.ts +201 -0
- package/src/server.ts +270 -0
- package/src/types.ts +103 -0
- package/tsconfig.json +20 -0
package/src/cli.ts
ADDED
|
@@ -0,0 +1,195 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* AIF-BIN Recall CLI - Local-first memory server for AI agents
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import { Command } from 'commander';
|
|
7
|
+
import path from 'path';
|
|
8
|
+
import os from 'os';
|
|
9
|
+
import fs from 'fs';
|
|
10
|
+
import { EngramDB } from './db.js';
|
|
11
|
+
import { SearchEngine } from './search.js';
|
|
12
|
+
import { Indexer } from './indexer.js';
|
|
13
|
+
import { startServer } from './server.js';
|
|
14
|
+
import { startMcpServer } from './mcp.js';
|
|
15
|
+
import { DEFAULT_CONFIG } from './types.js';
|
|
16
|
+
import { Embedder, EMBEDDING_MODELS, type EmbeddingModelName } from './embedder.js';
|
|
17
|
+
|
|
18
|
+
const program = new Command();
|
|
19
|
+
|
|
20
|
+
// Resolve default DB path
|
|
21
|
+
function getDefaultDbPath(): string {
|
|
22
|
+
const configDir = path.join(os.homedir(), '.aifbin-recall');
|
|
23
|
+
if (!fs.existsSync(configDir)) {
|
|
24
|
+
fs.mkdirSync(configDir, { recursive: true });
|
|
25
|
+
}
|
|
26
|
+
return path.join(configDir, 'index.db');
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
program
|
|
30
|
+
.name('aifbin-recall')
|
|
31
|
+
.description('Local-first memory server for AI agents')
|
|
32
|
+
.version('0.1.0')
|
|
33
|
+
.option('-d, --db <path>', 'Database path', getDefaultDbPath());
|
|
34
|
+
|
|
35
|
+
// Index command
|
|
36
|
+
program
|
|
37
|
+
.command('index <directory>')
|
|
38
|
+
.description('Index AIF-BIN files from a directory')
|
|
39
|
+
.option('-c, --collection <name>', 'Collection name', 'default')
|
|
40
|
+
.option('-r, --recursive', 'Search subdirectories', true)
|
|
41
|
+
.option('--no-recursive', 'Do not search subdirectories')
|
|
42
|
+
.action((directory, options) => {
|
|
43
|
+
const dbPath = program.opts().db;
|
|
44
|
+
const db = new EngramDB(dbPath);
|
|
45
|
+
const indexer = new Indexer(db);
|
|
46
|
+
|
|
47
|
+
console.log(`Indexing ${directory} into collection "${options.collection}"...`);
|
|
48
|
+
|
|
49
|
+
const result = indexer.indexDirectory(path.resolve(directory), {
|
|
50
|
+
collection: options.collection,
|
|
51
|
+
recursive: options.recursive,
|
|
52
|
+
});
|
|
53
|
+
|
|
54
|
+
console.log(`\n✅ Indexed ${result.files} files (${result.chunks} chunks)`);
|
|
55
|
+
db.close();
|
|
56
|
+
});
|
|
57
|
+
|
|
58
|
+
// Serve command
|
|
59
|
+
program
|
|
60
|
+
.command('serve')
|
|
61
|
+
.description('Start the HTTP server')
|
|
62
|
+
.option('-p, --port <port>', 'Server port', String(DEFAULT_CONFIG.server.port))
|
|
63
|
+
.option('-h, --host <host>', 'Server host', DEFAULT_CONFIG.server.host)
|
|
64
|
+
.action((options) => {
|
|
65
|
+
const dbPath = program.opts().db;
|
|
66
|
+
const db = new EngramDB(dbPath);
|
|
67
|
+
|
|
68
|
+
startServer({
|
|
69
|
+
db,
|
|
70
|
+
config: {
|
|
71
|
+
port: parseInt(options.port, 10),
|
|
72
|
+
host: options.host,
|
|
73
|
+
},
|
|
74
|
+
});
|
|
75
|
+
});
|
|
76
|
+
|
|
77
|
+
// MCP command
|
|
78
|
+
program
|
|
79
|
+
.command('mcp')
|
|
80
|
+
.description('Start the MCP server for AI agent integration')
|
|
81
|
+
.action(async () => {
|
|
82
|
+
const dbPath = program.opts().db;
|
|
83
|
+
const db = new EngramDB(dbPath);
|
|
84
|
+
await startMcpServer(db);
|
|
85
|
+
});
|
|
86
|
+
|
|
87
|
+
// Collections command
|
|
88
|
+
program
|
|
89
|
+
.command('collections')
|
|
90
|
+
.description('List all collections')
|
|
91
|
+
.action(() => {
|
|
92
|
+
const dbPath = program.opts().db;
|
|
93
|
+
const db = new EngramDB(dbPath);
|
|
94
|
+
const collections = db.listCollections();
|
|
95
|
+
|
|
96
|
+
if (collections.length === 0) {
|
|
97
|
+
console.log('No collections found. Use "aifbin-recall index" to create one.');
|
|
98
|
+
} else {
|
|
99
|
+
console.log('Collections:\n');
|
|
100
|
+
for (const col of collections) {
|
|
101
|
+
console.log(` ${col.name}`);
|
|
102
|
+
console.log(` Files: ${col.fileCount}`);
|
|
103
|
+
console.log(` Chunks: ${col.chunkCount}`);
|
|
104
|
+
if (col.description) {
|
|
105
|
+
console.log(` Description: ${col.description}`);
|
|
106
|
+
}
|
|
107
|
+
console.log('');
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
db.close();
|
|
112
|
+
});
|
|
113
|
+
|
|
114
|
+
// Search command - now with built-in embedding!
|
|
115
|
+
program
|
|
116
|
+
.command('search <query>')
|
|
117
|
+
.description('Search memories using natural language')
|
|
118
|
+
.option('-c, --collection <name>', 'Collection to search')
|
|
119
|
+
.option('-n, --limit <count>', 'Number of results', '10')
|
|
120
|
+
.option('-m, --model <model>', 'Embedding model (minilm, mpnet, bge-small, bge-base, e5-small)', 'minilm')
|
|
121
|
+
.option('-e, --embedding <file>', 'JSON file containing pre-computed query embedding (optional)')
|
|
122
|
+
.action(async (query, options) => {
|
|
123
|
+
const dbPath = program.opts().db;
|
|
124
|
+
const db = new EngramDB(dbPath);
|
|
125
|
+
const search = new SearchEngine(db);
|
|
126
|
+
|
|
127
|
+
try {
|
|
128
|
+
let embedding: number[];
|
|
129
|
+
|
|
130
|
+
if (options.embedding) {
|
|
131
|
+
// Use provided embedding file
|
|
132
|
+
const embeddingData = JSON.parse(fs.readFileSync(options.embedding, 'utf-8'));
|
|
133
|
+
embedding = embeddingData.embedding || embeddingData;
|
|
134
|
+
if (!Array.isArray(embedding)) {
|
|
135
|
+
console.error('Error: embedding must be an array');
|
|
136
|
+
db.close();
|
|
137
|
+
return;
|
|
138
|
+
}
|
|
139
|
+
} else {
|
|
140
|
+
// Generate embedding locally
|
|
141
|
+
console.log(`Embedding query with ${options.model}...`);
|
|
142
|
+
const embedder = new Embedder(options.model as EmbeddingModelName);
|
|
143
|
+
embedding = await embedder.embed(query);
|
|
144
|
+
console.log(`Embedding generated (${embedding.length} dims)\n`);
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
const results = await search.hybridSearch(embedding, query, {
|
|
148
|
+
collection: options.collection,
|
|
149
|
+
limit: parseInt(options.limit, 10),
|
|
150
|
+
});
|
|
151
|
+
|
|
152
|
+
if (results.length === 0) {
|
|
153
|
+
console.log('No results found.');
|
|
154
|
+
} else {
|
|
155
|
+
console.log(`Found ${results.length} results:\n`);
|
|
156
|
+
for (const [i, r] of results.entries()) {
|
|
157
|
+
console.log(`[${i + 1}] Score: ${r.score.toFixed(4)} (vector: ${r.vectorScore.toFixed(4)}, keyword: ${r.keywordScore?.toFixed(4) || 'n/a'})`);
|
|
158
|
+
console.log(` Source: ${path.basename(r.chunk.sourceFile)}`);
|
|
159
|
+
console.log(` Text: ${r.chunk.text.slice(0, 200)}${r.chunk.text.length > 200 ? '...' : ''}`);
|
|
160
|
+
console.log('');
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
} catch (err) {
|
|
164
|
+
console.error('Error:', err);
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
db.close();
|
|
168
|
+
});
|
|
169
|
+
|
|
170
|
+
// Info command
|
|
171
|
+
program
|
|
172
|
+
.command('info')
|
|
173
|
+
.description('Show database information')
|
|
174
|
+
.action(() => {
|
|
175
|
+
const dbPath = program.opts().db;
|
|
176
|
+
console.log(`AIF-BIN Recall v0.1.0`);
|
|
177
|
+
console.log(`Database: ${dbPath}`);
|
|
178
|
+
console.log('');
|
|
179
|
+
|
|
180
|
+
if (fs.existsSync(dbPath)) {
|
|
181
|
+
const db = new EngramDB(dbPath);
|
|
182
|
+
const collections = db.listCollections();
|
|
183
|
+
const totalChunks = collections.reduce((sum, c) => sum + c.chunkCount, 0);
|
|
184
|
+
const totalFiles = collections.reduce((sum, c) => sum + c.fileCount, 0);
|
|
185
|
+
|
|
186
|
+
console.log(`Collections: ${collections.length}`);
|
|
187
|
+
console.log(`Total files: ${totalFiles}`);
|
|
188
|
+
console.log(`Total chunks: ${totalChunks}`);
|
|
189
|
+
db.close();
|
|
190
|
+
} else {
|
|
191
|
+
console.log('Database not found. Run "aifbin-recall index" to create one.');
|
|
192
|
+
}
|
|
193
|
+
});
|
|
194
|
+
|
|
195
|
+
program.parse();
|
package/src/db.ts
ADDED
|
@@ -0,0 +1,295 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* SQLite database management for AIF-BIN Recall
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import Database from 'better-sqlite3';
|
|
6
|
+
import path from 'path';
|
|
7
|
+
import os from 'os';
|
|
8
|
+
import fs from 'fs';
|
|
9
|
+
import { DEFAULT_CONFIG, type Collection, type MemoryChunk, type EngramConfig } from './types.js';
|
|
10
|
+
|
|
11
|
+
export class EngramDB {
|
|
12
|
+
private db: Database.Database;
|
|
13
|
+
|
|
14
|
+
constructor(dbPath?: string) {
|
|
15
|
+
const resolvedPath = this.resolvePath(dbPath || DEFAULT_CONFIG.index.path);
|
|
16
|
+
|
|
17
|
+
// Ensure directory exists
|
|
18
|
+
const dir = path.dirname(resolvedPath);
|
|
19
|
+
if (!fs.existsSync(dir)) {
|
|
20
|
+
fs.mkdirSync(dir, { recursive: true });
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
this.db = new Database(resolvedPath);
|
|
24
|
+
this.init();
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
private resolvePath(p: string): string {
|
|
28
|
+
if (p.startsWith('~')) {
|
|
29
|
+
return path.join(os.homedir(), p.slice(1));
|
|
30
|
+
}
|
|
31
|
+
return path.resolve(p);
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
private init(): void {
|
|
35
|
+
// Enable WAL mode for better concurrency
|
|
36
|
+
this.db.pragma('journal_mode = WAL');
|
|
37
|
+
|
|
38
|
+
// Create collections table
|
|
39
|
+
this.db.exec(`
|
|
40
|
+
CREATE TABLE IF NOT EXISTS collections (
|
|
41
|
+
id TEXT PRIMARY KEY,
|
|
42
|
+
name TEXT UNIQUE NOT NULL,
|
|
43
|
+
description TEXT,
|
|
44
|
+
file_count INTEGER DEFAULT 0,
|
|
45
|
+
chunk_count INTEGER DEFAULT 0,
|
|
46
|
+
created_at TEXT DEFAULT CURRENT_TIMESTAMP,
|
|
47
|
+
updated_at TEXT DEFAULT CURRENT_TIMESTAMP
|
|
48
|
+
)
|
|
49
|
+
`);
|
|
50
|
+
|
|
51
|
+
// Create chunks table
|
|
52
|
+
this.db.exec(`
|
|
53
|
+
CREATE TABLE IF NOT EXISTS chunks (
|
|
54
|
+
id TEXT PRIMARY KEY,
|
|
55
|
+
collection_id TEXT NOT NULL,
|
|
56
|
+
source_file TEXT NOT NULL,
|
|
57
|
+
chunk_index INTEGER NOT NULL,
|
|
58
|
+
text TEXT NOT NULL,
|
|
59
|
+
embedding BLOB NOT NULL,
|
|
60
|
+
metadata TEXT DEFAULT '{}',
|
|
61
|
+
created_at TEXT DEFAULT CURRENT_TIMESTAMP,
|
|
62
|
+
updated_at TEXT DEFAULT CURRENT_TIMESTAMP,
|
|
63
|
+
FOREIGN KEY (collection_id) REFERENCES collections(id) ON DELETE CASCADE
|
|
64
|
+
)
|
|
65
|
+
`);
|
|
66
|
+
|
|
67
|
+
// Create FTS5 virtual table for keyword search
|
|
68
|
+
this.db.exec(`
|
|
69
|
+
CREATE VIRTUAL TABLE IF NOT EXISTS chunks_fts USING fts5(
|
|
70
|
+
text,
|
|
71
|
+
content='chunks',
|
|
72
|
+
content_rowid='rowid'
|
|
73
|
+
)
|
|
74
|
+
`);
|
|
75
|
+
|
|
76
|
+
// Triggers to keep FTS in sync
|
|
77
|
+
this.db.exec(`
|
|
78
|
+
CREATE TRIGGER IF NOT EXISTS chunks_ai AFTER INSERT ON chunks BEGIN
|
|
79
|
+
INSERT INTO chunks_fts(rowid, text) VALUES (NEW.rowid, NEW.text);
|
|
80
|
+
END
|
|
81
|
+
`);
|
|
82
|
+
|
|
83
|
+
this.db.exec(`
|
|
84
|
+
CREATE TRIGGER IF NOT EXISTS chunks_ad AFTER DELETE ON chunks BEGIN
|
|
85
|
+
INSERT INTO chunks_fts(chunks_fts, rowid, text) VALUES('delete', OLD.rowid, OLD.text);
|
|
86
|
+
END
|
|
87
|
+
`);
|
|
88
|
+
|
|
89
|
+
this.db.exec(`
|
|
90
|
+
CREATE TRIGGER IF NOT EXISTS chunks_au AFTER UPDATE ON chunks BEGIN
|
|
91
|
+
INSERT INTO chunks_fts(chunks_fts, rowid, text) VALUES('delete', OLD.rowid, OLD.text);
|
|
92
|
+
INSERT INTO chunks_fts(rowid, text) VALUES (NEW.rowid, NEW.text);
|
|
93
|
+
END
|
|
94
|
+
`);
|
|
95
|
+
|
|
96
|
+
// Indexes
|
|
97
|
+
this.db.exec(`CREATE INDEX IF NOT EXISTS idx_chunks_collection ON chunks(collection_id)`);
|
|
98
|
+
this.db.exec(`CREATE INDEX IF NOT EXISTS idx_chunks_source ON chunks(source_file)`);
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
// Collection operations
|
|
102
|
+
createCollection(name: string, description?: string): Collection {
|
|
103
|
+
const id = crypto.randomUUID();
|
|
104
|
+
const stmt = this.db.prepare(`
|
|
105
|
+
INSERT INTO collections (id, name, description)
|
|
106
|
+
VALUES (?, ?, ?)
|
|
107
|
+
`);
|
|
108
|
+
stmt.run(id, name, description || null);
|
|
109
|
+
return this.getCollection(name)!;
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
getCollection(name: string): Collection | null {
|
|
113
|
+
const stmt = this.db.prepare(`SELECT * FROM collections WHERE name = ?`);
|
|
114
|
+
const row = stmt.get(name) as any;
|
|
115
|
+
if (!row) return null;
|
|
116
|
+
return {
|
|
117
|
+
id: row.id,
|
|
118
|
+
name: row.name,
|
|
119
|
+
description: row.description,
|
|
120
|
+
fileCount: row.file_count,
|
|
121
|
+
chunkCount: row.chunk_count,
|
|
122
|
+
createdAt: new Date(row.created_at),
|
|
123
|
+
updatedAt: new Date(row.updated_at),
|
|
124
|
+
};
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
getCollectionById(id: string): Collection | null {
|
|
128
|
+
const stmt = this.db.prepare(`SELECT * FROM collections WHERE id = ?`);
|
|
129
|
+
const row = stmt.get(id) as any;
|
|
130
|
+
if (!row) return null;
|
|
131
|
+
return {
|
|
132
|
+
id: row.id,
|
|
133
|
+
name: row.name,
|
|
134
|
+
description: row.description,
|
|
135
|
+
fileCount: row.file_count,
|
|
136
|
+
chunkCount: row.chunk_count,
|
|
137
|
+
createdAt: new Date(row.created_at),
|
|
138
|
+
updatedAt: new Date(row.updated_at),
|
|
139
|
+
};
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
listCollections(): Collection[] {
|
|
143
|
+
const stmt = this.db.prepare(`SELECT * FROM collections ORDER BY name`);
|
|
144
|
+
const rows = stmt.all() as any[];
|
|
145
|
+
return rows.map(row => ({
|
|
146
|
+
id: row.id,
|
|
147
|
+
name: row.name,
|
|
148
|
+
description: row.description,
|
|
149
|
+
fileCount: row.file_count,
|
|
150
|
+
chunkCount: row.chunk_count,
|
|
151
|
+
createdAt: new Date(row.created_at),
|
|
152
|
+
updatedAt: new Date(row.updated_at),
|
|
153
|
+
}));
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
deleteCollection(name: string): boolean {
|
|
157
|
+
const stmt = this.db.prepare(`DELETE FROM collections WHERE name = ?`);
|
|
158
|
+
const result = stmt.run(name);
|
|
159
|
+
return result.changes > 0;
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
// Chunk operations
|
|
163
|
+
insertChunk(chunk: Omit<MemoryChunk, 'createdAt' | 'updatedAt'>): void {
|
|
164
|
+
const stmt = this.db.prepare(`
|
|
165
|
+
INSERT INTO chunks (id, collection_id, source_file, chunk_index, text, embedding, metadata)
|
|
166
|
+
VALUES (?, ?, ?, ?, ?, ?, ?)
|
|
167
|
+
`);
|
|
168
|
+
|
|
169
|
+
// Convert embedding array to binary buffer
|
|
170
|
+
const embeddingBuffer = Buffer.from(new Float32Array(chunk.embedding).buffer);
|
|
171
|
+
|
|
172
|
+
stmt.run(
|
|
173
|
+
chunk.id,
|
|
174
|
+
chunk.collectionId,
|
|
175
|
+
chunk.sourceFile,
|
|
176
|
+
chunk.chunkIndex,
|
|
177
|
+
chunk.text,
|
|
178
|
+
embeddingBuffer,
|
|
179
|
+
JSON.stringify(chunk.metadata)
|
|
180
|
+
);
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
insertChunks(chunks: Omit<MemoryChunk, 'createdAt' | 'updatedAt'>[]): void {
|
|
184
|
+
const insert = this.db.prepare(`
|
|
185
|
+
INSERT INTO chunks (id, collection_id, source_file, chunk_index, text, embedding, metadata)
|
|
186
|
+
VALUES (?, ?, ?, ?, ?, ?, ?)
|
|
187
|
+
`);
|
|
188
|
+
|
|
189
|
+
const insertMany = this.db.transaction((chunks: Omit<MemoryChunk, 'createdAt' | 'updatedAt'>[]) => {
|
|
190
|
+
for (const chunk of chunks) {
|
|
191
|
+
const embeddingBuffer = Buffer.from(new Float32Array(chunk.embedding).buffer);
|
|
192
|
+
insert.run(
|
|
193
|
+
chunk.id,
|
|
194
|
+
chunk.collectionId,
|
|
195
|
+
chunk.sourceFile,
|
|
196
|
+
chunk.chunkIndex,
|
|
197
|
+
chunk.text,
|
|
198
|
+
embeddingBuffer,
|
|
199
|
+
JSON.stringify(chunk.metadata)
|
|
200
|
+
);
|
|
201
|
+
}
|
|
202
|
+
});
|
|
203
|
+
|
|
204
|
+
insertMany(chunks);
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
getChunk(id: string): MemoryChunk | null {
|
|
208
|
+
const stmt = this.db.prepare(`SELECT * FROM chunks WHERE id = ?`);
|
|
209
|
+
const row = stmt.get(id) as any;
|
|
210
|
+
if (!row) return null;
|
|
211
|
+
return this.rowToChunk(row);
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
getChunksByCollection(collectionId: string): MemoryChunk[] {
|
|
215
|
+
const stmt = this.db.prepare(`SELECT * FROM chunks WHERE collection_id = ?`);
|
|
216
|
+
const rows = stmt.all(collectionId) as any[];
|
|
217
|
+
return rows.map(row => this.rowToChunk(row));
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
deleteChunksBySource(sourceFile: string): number {
|
|
221
|
+
const stmt = this.db.prepare(`DELETE FROM chunks WHERE source_file = ?`);
|
|
222
|
+
const result = stmt.run(sourceFile);
|
|
223
|
+
return result.changes;
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
// Search operations
|
|
227
|
+
getAllChunksWithEmbeddings(collectionId?: string): MemoryChunk[] {
|
|
228
|
+
let stmt;
|
|
229
|
+
if (collectionId) {
|
|
230
|
+
stmt = this.db.prepare(`SELECT * FROM chunks WHERE collection_id = ?`);
|
|
231
|
+
return (stmt.all(collectionId) as any[]).map(row => this.rowToChunk(row));
|
|
232
|
+
} else {
|
|
233
|
+
stmt = this.db.prepare(`SELECT * FROM chunks`);
|
|
234
|
+
return (stmt.all() as any[]).map(row => this.rowToChunk(row));
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
keywordSearch(query: string, collectionId?: string, limit: number = 10): { id: string; score: number }[] {
|
|
239
|
+
// Escape special FTS5 characters and wrap in quotes for phrase matching
|
|
240
|
+
const escapedQuery = '"' + query.replace(/"/g, '""') + '"';
|
|
241
|
+
|
|
242
|
+
let sql = `
|
|
243
|
+
SELECT chunks.id, bm25(chunks_fts) as score
|
|
244
|
+
FROM chunks_fts
|
|
245
|
+
JOIN chunks ON chunks.rowid = chunks_fts.rowid
|
|
246
|
+
WHERE chunks_fts MATCH ?
|
|
247
|
+
`;
|
|
248
|
+
|
|
249
|
+
const params: any[] = [escapedQuery];
|
|
250
|
+
|
|
251
|
+
if (collectionId) {
|
|
252
|
+
sql += ` AND chunks.collection_id = ?`;
|
|
253
|
+
params.push(collectionId);
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
sql += ` ORDER BY score LIMIT ?`;
|
|
257
|
+
params.push(limit);
|
|
258
|
+
|
|
259
|
+
const stmt = this.db.prepare(sql);
|
|
260
|
+
return stmt.all(...params) as { id: string; score: number }[];
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
updateCollectionStats(collectionId: string): void {
|
|
264
|
+
const stmt = this.db.prepare(`
|
|
265
|
+
UPDATE collections SET
|
|
266
|
+
file_count = (SELECT COUNT(DISTINCT source_file) FROM chunks WHERE collection_id = ?),
|
|
267
|
+
chunk_count = (SELECT COUNT(*) FROM chunks WHERE collection_id = ?),
|
|
268
|
+
updated_at = CURRENT_TIMESTAMP
|
|
269
|
+
WHERE id = ?
|
|
270
|
+
`);
|
|
271
|
+
stmt.run(collectionId, collectionId, collectionId);
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
private rowToChunk(row: any): MemoryChunk {
|
|
275
|
+
// Convert binary buffer back to Float32Array, then to regular array
|
|
276
|
+
const embeddingBuffer = row.embedding as Buffer;
|
|
277
|
+
const embedding = Array.from(new Float32Array(embeddingBuffer.buffer, embeddingBuffer.byteOffset, embeddingBuffer.length / 4));
|
|
278
|
+
|
|
279
|
+
return {
|
|
280
|
+
id: row.id,
|
|
281
|
+
collectionId: row.collection_id,
|
|
282
|
+
sourceFile: row.source_file,
|
|
283
|
+
chunkIndex: row.chunk_index,
|
|
284
|
+
text: row.text,
|
|
285
|
+
embedding,
|
|
286
|
+
metadata: JSON.parse(row.metadata || '{}'),
|
|
287
|
+
createdAt: new Date(row.created_at),
|
|
288
|
+
updatedAt: new Date(row.updated_at),
|
|
289
|
+
};
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
close(): void {
|
|
293
|
+
this.db.close();
|
|
294
|
+
}
|
|
295
|
+
}
|
package/src/embedder.ts
ADDED
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Local embedding generation for AIF-BIN Recall
|
|
3
|
+
* Uses @xenova/transformers to run sentence-transformers models locally
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import { pipeline, env } from '@xenova/transformers';
|
|
7
|
+
|
|
8
|
+
// Disable local model check (download from HuggingFace)
|
|
9
|
+
env.allowLocalModels = false;
|
|
10
|
+
|
|
11
|
+
// Supported embedding models (same as AIF-BIN Pro)
|
|
12
|
+
export const EMBEDDING_MODELS = {
|
|
13
|
+
minilm: 'Xenova/all-MiniLM-L6-v2', // 384 dims, fastest
|
|
14
|
+
mpnet: 'Xenova/all-mpnet-base-v2', // 768 dims, balanced
|
|
15
|
+
'bge-small': 'Xenova/bge-small-en-v1.5', // 384 dims, good quality
|
|
16
|
+
'bge-base': 'Xenova/bge-base-en-v1.5', // 768 dims, best quality
|
|
17
|
+
'e5-small': 'Xenova/e5-small-v2', // 384 dims
|
|
18
|
+
} as const;
|
|
19
|
+
|
|
20
|
+
export type EmbeddingModelName = keyof typeof EMBEDDING_MODELS;
|
|
21
|
+
|
|
22
|
+
// Cache for loaded pipelines
|
|
23
|
+
const pipelineCache = new Map<string, any>();
|
|
24
|
+
|
|
25
|
+
/**
|
|
26
|
+
* Get or create an embedding pipeline for the given model
|
|
27
|
+
*/
|
|
28
|
+
async function getEmbedder(model: EmbeddingModelName = 'minilm') {
|
|
29
|
+
const modelPath = EMBEDDING_MODELS[model];
|
|
30
|
+
|
|
31
|
+
if (!pipelineCache.has(modelPath)) {
|
|
32
|
+
console.log(`Loading embedding model: ${model} (${modelPath})...`);
|
|
33
|
+
const embedder = await pipeline('feature-extraction', modelPath);
|
|
34
|
+
pipelineCache.set(modelPath, embedder);
|
|
35
|
+
console.log(`Model loaded: ${model}`);
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
return pipelineCache.get(modelPath);
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
/**
|
|
42
|
+
* Mean pooling for sentence embeddings
|
|
43
|
+
*/
|
|
44
|
+
function meanPool(embeddings: number[][]): number[] {
|
|
45
|
+
const dims = embeddings[0].length;
|
|
46
|
+
const result = new Array(dims).fill(0);
|
|
47
|
+
|
|
48
|
+
for (const embedding of embeddings) {
|
|
49
|
+
for (let i = 0; i < dims; i++) {
|
|
50
|
+
result[i] += embedding[i];
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
for (let i = 0; i < dims; i++) {
|
|
55
|
+
result[i] /= embeddings.length;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
return result;
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
/**
|
|
62
|
+
* Normalize a vector to unit length
|
|
63
|
+
*/
|
|
64
|
+
function normalize(vector: number[]): number[] {
|
|
65
|
+
const magnitude = Math.sqrt(vector.reduce((sum, val) => sum + val * val, 0));
|
|
66
|
+
if (magnitude === 0) return vector;
|
|
67
|
+
return vector.map(val => val / magnitude);
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
export class Embedder {
|
|
71
|
+
private model: EmbeddingModelName;
|
|
72
|
+
private embedder: any = null;
|
|
73
|
+
private loading: Promise<void> | null = null;
|
|
74
|
+
|
|
75
|
+
constructor(model: EmbeddingModelName = 'minilm') {
|
|
76
|
+
this.model = model;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
/**
|
|
80
|
+
* Ensure the model is loaded
|
|
81
|
+
*/
|
|
82
|
+
async init(): Promise<void> {
|
|
83
|
+
if (this.embedder) return;
|
|
84
|
+
|
|
85
|
+
if (!this.loading) {
|
|
86
|
+
this.loading = (async () => {
|
|
87
|
+
this.embedder = await getEmbedder(this.model);
|
|
88
|
+
})();
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
await this.loading;
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
/**
|
|
95
|
+
* Embed a single text string
|
|
96
|
+
*/
|
|
97
|
+
async embed(text: string): Promise<number[]> {
|
|
98
|
+
await this.init();
|
|
99
|
+
|
|
100
|
+
const output = await this.embedder(text, {
|
|
101
|
+
pooling: 'mean',
|
|
102
|
+
normalize: true,
|
|
103
|
+
});
|
|
104
|
+
|
|
105
|
+
// Convert to regular array
|
|
106
|
+
return Array.from(output.data);
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
/**
|
|
110
|
+
* Embed multiple texts in batch
|
|
111
|
+
*/
|
|
112
|
+
async embedBatch(texts: string[]): Promise<number[][]> {
|
|
113
|
+
await this.init();
|
|
114
|
+
|
|
115
|
+
const results: number[][] = [];
|
|
116
|
+
|
|
117
|
+
// Process in batches to avoid memory issues
|
|
118
|
+
const batchSize = 32;
|
|
119
|
+
for (let i = 0; i < texts.length; i += batchSize) {
|
|
120
|
+
const batch = texts.slice(i, i + batchSize);
|
|
121
|
+
|
|
122
|
+
for (const text of batch) {
|
|
123
|
+
const embedding = await this.embed(text);
|
|
124
|
+
results.push(embedding);
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
return results;
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
/**
|
|
132
|
+
* Get the embedding dimension for the current model
|
|
133
|
+
*/
|
|
134
|
+
getDimension(): number {
|
|
135
|
+
switch (this.model) {
|
|
136
|
+
case 'minilm':
|
|
137
|
+
case 'bge-small':
|
|
138
|
+
case 'e5-small':
|
|
139
|
+
return 384;
|
|
140
|
+
case 'mpnet':
|
|
141
|
+
case 'bge-base':
|
|
142
|
+
return 768;
|
|
143
|
+
default:
|
|
144
|
+
return 384;
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
/**
|
|
149
|
+
* Get the model name
|
|
150
|
+
*/
|
|
151
|
+
getModelName(): string {
|
|
152
|
+
return this.model;
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
// Singleton instance for default model
|
|
157
|
+
let defaultEmbedder: Embedder | null = null;
|
|
158
|
+
|
|
159
|
+
/**
|
|
160
|
+
* Get the default embedder instance
|
|
161
|
+
*/
|
|
162
|
+
export function getDefaultEmbedder(model: EmbeddingModelName = 'minilm'): Embedder {
|
|
163
|
+
if (!defaultEmbedder || defaultEmbedder.getModelName() !== model) {
|
|
164
|
+
defaultEmbedder = new Embedder(model);
|
|
165
|
+
}
|
|
166
|
+
return defaultEmbedder;
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
/**
|
|
170
|
+
* Quick helper to embed a single query
|
|
171
|
+
*/
|
|
172
|
+
export async function embedQuery(text: string, model: EmbeddingModelName = 'minilm'): Promise<number[]> {
|
|
173
|
+
const embedder = getDefaultEmbedder(model);
|
|
174
|
+
return embedder.embed(text);
|
|
175
|
+
}
|
package/src/index.ts
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* AIF-BIN Recall - Local-first memory server for AI agents
|
|
3
|
+
*
|
|
4
|
+
* @example
|
|
5
|
+
* ```typescript
|
|
6
|
+
* import { EngramDB, SearchEngine, Indexer } from '@terronex/engram';
|
|
7
|
+
*
|
|
8
|
+
* const db = new EngramDB('~/.engram/index.db');
|
|
9
|
+
* const indexer = new Indexer(db);
|
|
10
|
+
* const search = new SearchEngine(db);
|
|
11
|
+
*
|
|
12
|
+
* // Index AIF-BIN files
|
|
13
|
+
* indexer.indexDirectory('./memories', { collection: 'my-project' });
|
|
14
|
+
*
|
|
15
|
+
* // Search with embedding
|
|
16
|
+
* const results = await search.search(queryEmbedding, { collection: 'my-project' });
|
|
17
|
+
* ```
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
// Core classes
|
|
21
|
+
export { EngramDB } from './db.js';
|
|
22
|
+
export { SearchEngine, cosineSimilarity } from './search.js';
|
|
23
|
+
export { Indexer, parseAifBinFile, findAifBinFiles } from './indexer.js';
|
|
24
|
+
export { Embedder, embedQuery, getDefaultEmbedder, EMBEDDING_MODELS, type EmbeddingModelName } from './embedder.js';
|
|
25
|
+
|
|
26
|
+
// Server functions
|
|
27
|
+
export { createServer, startServer } from './server.js';
|
|
28
|
+
export { startMcpServer } from './mcp.js';
|
|
29
|
+
|
|
30
|
+
// Types
|
|
31
|
+
export type {
|
|
32
|
+
MemoryChunk,
|
|
33
|
+
Collection,
|
|
34
|
+
SearchResult,
|
|
35
|
+
SearchOptions,
|
|
36
|
+
IndexOptions,
|
|
37
|
+
EngramConfig,
|
|
38
|
+
ServerConfig,
|
|
39
|
+
IndexConfig,
|
|
40
|
+
SearchConfig,
|
|
41
|
+
AifBinFile,
|
|
42
|
+
AifBinChunk,
|
|
43
|
+
AifBinHeader,
|
|
44
|
+
} from './types.js';
|
|
45
|
+
|
|
46
|
+
export { DEFAULT_CONFIG } from './types.js';
|