@1mbrain/core 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,548 @@
1
+ /**
2
+ * SQLite Database Provider
3
+ *
4
+ * Implements the DatabaseProvider interface using better-sqlite3.
5
+ * Vector search is done via manual cosine similarity calculation since
6
+ * sqlite-vec availability varies. Falls back gracefully.
7
+ *
8
+ * Schema is created inline (no migration tool needed for SQLite).
9
+ */
10
+
11
+ import Database from 'better-sqlite3';
12
+ import { v4 as uuidv4 } from 'uuid';
13
+ import type {
14
+ DatabaseProvider,
15
+ Memory,
16
+ MemoryType,
17
+ Association,
18
+ AssociationOrigin,
19
+ AssociationRelationType,
20
+ } from '../types.js';
21
+ import { createChildLogger } from '../logger.js';
22
+
23
+ const log = createChildLogger('sqlite-provider');
24
+
25
+ export class SqliteDatabaseProvider implements DatabaseProvider {
26
+ private db!: Database.Database;
27
+ private readonly dbPath: string;
28
+
29
+ constructor(dbPath: string) {
30
+ this.dbPath = dbPath;
31
+ }
32
+
33
+ async initialize(): Promise<void> {
34
+ log.info({ path: this.dbPath }, 'Initializing SQLite database');
35
+
36
+ this.db = new Database(this.dbPath);
37
+
38
+ // Performance pragmas
39
+ try {
40
+ this.db.pragma('journal_mode = WAL');
41
+ } catch (err) {
42
+ log.warn({ err }, 'WAL journal mode unavailable; continuing with default journaling');
43
+ }
44
+ this.db.pragma('busy_timeout = 5000');
45
+ this.db.pragma('synchronous = NORMAL');
46
+ this.db.pragma('cache_size = -64000'); // 64MB
47
+ this.db.pragma('foreign_keys = ON');
48
+
49
+ this.createTables();
50
+
51
+ log.info('SQLite database initialized');
52
+ }
53
+
54
+ private createTables(): void {
55
+ this.db.exec(`
56
+ CREATE TABLE IF NOT EXISTS memories (
57
+ id TEXT PRIMARY KEY,
58
+ agent_id TEXT NOT NULL,
59
+ type TEXT NOT NULL CHECK(type IN ('episodic', 'semantic', 'procedural', 'entity', 'warning')),
60
+ content TEXT NOT NULL,
61
+ embedding_model TEXT,
62
+ embedding BLOB,
63
+ importance REAL NOT NULL DEFAULT 0.5,
64
+ decay_score REAL NOT NULL DEFAULT 1.0,
65
+ created_at TEXT NOT NULL DEFAULT (datetime('now')),
66
+ last_accessed_at TEXT NOT NULL DEFAULT (datetime('now')),
67
+ tags TEXT NOT NULL DEFAULT '[]',
68
+ metadata TEXT
69
+ );
70
+
71
+ CREATE INDEX IF NOT EXISTS idx_memories_agent_id ON memories(agent_id);
72
+ CREATE INDEX IF NOT EXISTS idx_memories_type ON memories(agent_id, type);
73
+ CREATE INDEX IF NOT EXISTS idx_memories_decay ON memories(decay_score);
74
+ CREATE INDEX IF NOT EXISTS idx_memories_created ON memories(agent_id, created_at);
75
+
76
+ CREATE TABLE IF NOT EXISTS associations (
77
+ source_id TEXT NOT NULL,
78
+ target_id TEXT NOT NULL,
79
+ strength REAL NOT NULL DEFAULT 0.5,
80
+ origin TEXT NOT NULL CHECK(origin IN ('co-occurrence', 'similarity', 'explicit')),
81
+ relation_type TEXT NOT NULL DEFAULT 'relates_to',
82
+ created_at TEXT NOT NULL DEFAULT (datetime('now')),
83
+ PRIMARY KEY (source_id, target_id),
84
+ FOREIGN KEY (source_id) REFERENCES memories(id) ON DELETE CASCADE,
85
+ FOREIGN KEY (target_id) REFERENCES memories(id) ON DELETE CASCADE
86
+ );
87
+
88
+ CREATE INDEX IF NOT EXISTS idx_associations_source ON associations(source_id);
89
+ CREATE INDEX IF NOT EXISTS idx_associations_target ON associations(target_id);
90
+
91
+ CREATE TABLE IF NOT EXISTS api_keys (
92
+ id TEXT PRIMARY KEY,
93
+ key_hash TEXT NOT NULL UNIQUE,
94
+ agent_id TEXT NOT NULL,
95
+ name TEXT NOT NULL,
96
+ created_at TEXT NOT NULL DEFAULT (datetime('now')),
97
+ last_used_at TEXT,
98
+ is_active INTEGER NOT NULL DEFAULT 1
99
+ );
100
+
101
+ CREATE INDEX IF NOT EXISTS idx_api_keys_hash ON api_keys(key_hash);
102
+ CREATE INDEX IF NOT EXISTS idx_api_keys_agent ON api_keys(agent_id);
103
+ `);
104
+ }
105
+
106
+ // ─── Memory CRUD ──────────────────────────────────────
107
+
108
+ async createMemory(memory: Omit<Memory, 'createdAt' | 'lastAccessedAt'>): Promise<Memory> {
109
+ const id = memory.id || uuidv4();
110
+ const now = new Date().toISOString();
111
+
112
+ const stmt = this.db.prepare(`
113
+ INSERT INTO memories (id, agent_id, type, content, embedding_model, embedding, importance, decay_score, created_at, last_accessed_at, tags, metadata)
114
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
115
+ `);
116
+
117
+ const embeddingBlob = memory.embedding
118
+ ? Buffer.from(new Float64Array(memory.embedding).buffer)
119
+ : null;
120
+
121
+ stmt.run(
122
+ id,
123
+ memory.agentId,
124
+ memory.type,
125
+ memory.content,
126
+ memory.embeddingModel,
127
+ embeddingBlob,
128
+ memory.importance,
129
+ memory.decayScore,
130
+ now,
131
+ now,
132
+ JSON.stringify(memory.tags),
133
+ memory.metadata ? JSON.stringify(memory.metadata) : null,
134
+ );
135
+
136
+ log.debug({ id, agentId: memory.agentId, type: memory.type }, 'Memory created');
137
+
138
+ return {
139
+ ...memory,
140
+ id,
141
+ createdAt: new Date(now),
142
+ lastAccessedAt: new Date(now),
143
+ };
144
+ }
145
+
146
+ async getMemoryById(id: string, agentId: string): Promise<Memory | null> {
147
+ const row = this.db
148
+ .prepare('SELECT * FROM memories WHERE id = ? AND agent_id = ?')
149
+ .get(id, agentId) as MemoryRow | undefined;
150
+
151
+ if (!row) return null;
152
+
153
+ // Update last_accessed_at
154
+ this.db
155
+ .prepare(
156
+ "UPDATE memories SET last_accessed_at = datetime('now'), decay_score = MIN(1.0, decay_score + 0.05) WHERE id = ?",
157
+ )
158
+ .run(id);
159
+
160
+ return this.rowToMemory(row);
161
+ }
162
+
163
+ async updateMemory(
164
+ id: string,
165
+ agentId: string,
166
+ updates: Partial<Memory>,
167
+ ): Promise<Memory | null> {
168
+ const existing = await this.getMemoryById(id, agentId);
169
+ if (!existing) return null;
170
+
171
+ const fields: string[] = [];
172
+ const values: unknown[] = [];
173
+
174
+ if (updates.content !== undefined) {
175
+ fields.push('content = ?');
176
+ values.push(updates.content);
177
+ }
178
+ if (updates.type !== undefined) {
179
+ fields.push('type = ?');
180
+ values.push(updates.type);
181
+ }
182
+ if (updates.importance !== undefined) {
183
+ fields.push('importance = ?');
184
+ values.push(updates.importance);
185
+ }
186
+ if (updates.decayScore !== undefined) {
187
+ fields.push('decay_score = ?');
188
+ values.push(updates.decayScore);
189
+ }
190
+ if (updates.tags !== undefined) {
191
+ fields.push('tags = ?');
192
+ values.push(JSON.stringify(updates.tags));
193
+ }
194
+ if (updates.metadata !== undefined) {
195
+ fields.push('metadata = ?');
196
+ values.push(JSON.stringify(updates.metadata));
197
+ }
198
+ if (updates.embedding !== undefined) {
199
+ fields.push('embedding = ?');
200
+ fields.push('embedding_model = ?');
201
+ values.push(
202
+ updates.embedding ? Buffer.from(new Float64Array(updates.embedding).buffer) : null,
203
+ );
204
+ values.push(updates.embeddingModel ?? null);
205
+ }
206
+
207
+ if (fields.length === 0) return existing;
208
+
209
+ fields.push("last_accessed_at = datetime('now')");
210
+ values.push(id, agentId);
211
+
212
+ this.db
213
+ .prepare(`UPDATE memories SET ${fields.join(', ')} WHERE id = ? AND agent_id = ?`)
214
+ .run(...values);
215
+
216
+ return this.getMemoryById(id, agentId);
217
+ }
218
+
219
+ async deleteMemory(id: string, agentId: string): Promise<boolean> {
220
+ const result = this.db
221
+ .prepare('DELETE FROM memories WHERE id = ? AND agent_id = ?')
222
+ .run(id, agentId);
223
+
224
+ log.debug({ id, agentId, deleted: result.changes > 0 }, 'Memory deleted');
225
+ return result.changes > 0;
226
+ }
227
+
228
+ // ─── Vector Search ────────────────────────────────────
229
+
230
+ async searchByVector(
231
+ agentId: string,
232
+ embedding: number[],
233
+ options: {
234
+ limit?: number;
235
+ threshold?: number;
236
+ type?: MemoryType;
237
+ tags?: string[];
238
+ } = {},
239
+ ): Promise<Array<{ memory: Memory; similarity: number }>> {
240
+ const { limit = 10, threshold = 0.3, type, tags } = options;
241
+
242
+ // Build WHERE clause
243
+ let whereClause = 'agent_id = ? AND embedding IS NOT NULL';
244
+ const params: unknown[] = [agentId];
245
+
246
+ if (type) {
247
+ whereClause += ' AND type = ?';
248
+ params.push(type);
249
+ }
250
+
251
+ const rows = this.db
252
+ .prepare(`SELECT * FROM memories WHERE ${whereClause}`)
253
+ .all(...params) as MemoryRow[];
254
+
255
+ // Calculate cosine similarity in JS (portable, no extension needed)
256
+ const results = rows
257
+ .map((row) => {
258
+ const storedEmbedding = this.blobToVector(row.embedding as Buffer);
259
+ if (!storedEmbedding) return null;
260
+
261
+ const similarity = cosineSimilarity(embedding, storedEmbedding);
262
+ return { memory: this.rowToMemory(row), similarity };
263
+ })
264
+ .filter((r): r is NonNullable<typeof r> => r !== null && r.similarity >= threshold);
265
+
266
+ // Filter by tags if specified
267
+ const filtered = tags?.length
268
+ ? results.filter((r) => {
269
+ const memTags = r.memory.tags;
270
+ return tags.some((t) => memTags.includes(t));
271
+ })
272
+ : results;
273
+
274
+ // Sort by similarity descending, take top N
275
+ filtered.sort((a, b) => b.similarity - a.similarity);
276
+
277
+ // Update last_accessed_at for accessed memories
278
+ const topResults = filtered.slice(0, limit);
279
+ if (topResults.length > 0) {
280
+ const ids = topResults.map((r) => r.memory.id);
281
+ const placeholders = ids.map(() => '?').join(',');
282
+ this.db
283
+ .prepare(
284
+ `UPDATE memories SET last_accessed_at = datetime('now'), decay_score = MIN(1.0, decay_score + 0.02) WHERE id IN (${placeholders})`,
285
+ )
286
+ .run(...ids);
287
+ }
288
+
289
+ return topResults;
290
+ }
291
+
292
+ // ─── Associations ─────────────────────────────────────
293
+
294
+ async createAssociation(association: Omit<Association, 'createdAt'>): Promise<Association> {
295
+ const now = new Date().toISOString();
296
+
297
+ // Upsert: if association exists, update strength
298
+ this.db
299
+ .prepare(
300
+ `INSERT INTO associations (source_id, target_id, strength, origin, relation_type, created_at)
301
+ VALUES (?, ?, ?, ?, ?, ?)
302
+ ON CONFLICT(source_id, target_id) DO UPDATE SET
303
+ strength = MAX(associations.strength, excluded.strength),
304
+ origin = excluded.origin,
305
+ relation_type = excluded.relation_type`,
306
+ )
307
+ .run(
308
+ association.sourceId,
309
+ association.targetId,
310
+ association.strength,
311
+ association.origin,
312
+ association.relationType ?? 'relates_to',
313
+ now,
314
+ );
315
+
316
+ log.debug(
317
+ { sourceId: association.sourceId, targetId: association.targetId },
318
+ 'Association created/updated',
319
+ );
320
+
321
+ return { ...association, createdAt: new Date(now) };
322
+ }
323
+
324
+ async getAssociations(memoryId: string): Promise<Association[]> {
325
+ const rows = this.db
326
+ .prepare(`SELECT * FROM associations WHERE source_id = ? OR target_id = ?`)
327
+ .all(memoryId, memoryId) as AssociationRow[];
328
+
329
+ return rows.map(this.rowToAssociation);
330
+ }
331
+
332
+ async deleteAssociations(memoryId: string): Promise<number> {
333
+ const result = this.db
334
+ .prepare('DELETE FROM associations WHERE source_id = ? OR target_id = ?')
335
+ .run(memoryId, memoryId);
336
+
337
+ return result.changes;
338
+ }
339
+
340
+ // ─── Bulk Operations ──────────────────────────────────
341
+
342
+ async listAgentIds(): Promise<string[]> {
343
+ const rows = this.db
344
+ .prepare('SELECT DISTINCT agent_id FROM memories ORDER BY agent_id ASC')
345
+ .all() as Array<{ agent_id: string }>;
346
+
347
+ return rows.map((row) => row.agent_id);
348
+ }
349
+
350
+ async getAllMemories(agentId: string): Promise<Memory[]> {
351
+ const rows = this.db
352
+ .prepare('SELECT * FROM memories WHERE agent_id = ? ORDER BY created_at DESC')
353
+ .all(agentId) as MemoryRow[];
354
+
355
+ return rows.map(this.rowToMemory.bind(this));
356
+ }
357
+
358
+ async getAllAssociations(agentId: string): Promise<Association[]> {
359
+ const rows = this.db
360
+ .prepare(
361
+ `SELECT a.* FROM associations a
362
+ JOIN memories m ON a.source_id = m.id
363
+ WHERE m.agent_id = ?`,
364
+ )
365
+ .all(agentId) as AssociationRow[];
366
+
367
+ return rows.map(this.rowToAssociation);
368
+ }
369
+
370
+ async bulkCreateMemories(
371
+ memories: Array<Omit<Memory, 'createdAt' | 'lastAccessedAt'>>,
372
+ ): Promise<Memory[]> {
373
+ const now = new Date().toISOString();
374
+ const stmt = this.db.prepare(`
375
+ INSERT INTO memories (id, agent_id, type, content, embedding_model, embedding, importance, decay_score, created_at, last_accessed_at, tags, metadata)
376
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
377
+ `);
378
+
379
+ const insertMany = this.db.transaction(
380
+ (items: Array<Omit<Memory, 'createdAt' | 'lastAccessedAt'>>) => {
381
+ return items.map((m) => {
382
+ const id = m.id || uuidv4();
383
+ const embeddingBlob = m.embedding
384
+ ? Buffer.from(new Float64Array(m.embedding).buffer)
385
+ : null;
386
+
387
+ stmt.run(
388
+ id,
389
+ m.agentId,
390
+ m.type,
391
+ m.content,
392
+ m.embeddingModel,
393
+ embeddingBlob,
394
+ m.importance,
395
+ m.decayScore,
396
+ now,
397
+ now,
398
+ JSON.stringify(m.tags),
399
+ m.metadata ? JSON.stringify(m.metadata) : null,
400
+ );
401
+
402
+ return {
403
+ ...m,
404
+ id,
405
+ createdAt: new Date(now),
406
+ lastAccessedAt: new Date(now),
407
+ };
408
+ });
409
+ },
410
+ );
411
+
412
+ return insertMany(memories);
413
+ }
414
+
415
+ async bulkCreateAssociations(
416
+ associations: Array<Omit<Association, 'createdAt'>>,
417
+ ): Promise<Association[]> {
418
+ const now = new Date().toISOString();
419
+ const stmt = this.db.prepare(`
420
+ INSERT OR REPLACE INTO associations (source_id, target_id, strength, origin, relation_type, created_at)
421
+ VALUES (?, ?, ?, ?, ?, ?)
422
+ `);
423
+
424
+ const insertMany = this.db.transaction((items: Array<Omit<Association, 'createdAt'>>) => {
425
+ return items.map((a) => {
426
+ stmt.run(a.sourceId, a.targetId, a.strength, a.origin, a.relationType ?? 'relates_to', now);
427
+ return { ...a, relationType: a.relationType ?? 'relates_to', createdAt: new Date(now) };
428
+ });
429
+ });
430
+
431
+ return insertMany(associations);
432
+ }
433
+
434
+ // ─── Decay ────────────────────────────────────────────
435
+
436
+ async applyDecay(decayRate: number, minScore: number): Promise<number> {
437
+ const result = this.db
438
+ .prepare(
439
+ `UPDATE memories SET decay_score = MAX(?, decay_score * ?)
440
+ WHERE decay_score > ?`,
441
+ )
442
+ .run(minScore, 1 - decayRate, minScore);
443
+
444
+ log.debug({ affected: result.changes, decayRate }, 'Decay applied');
445
+ return result.changes;
446
+ }
447
+
448
+ async applyAssociationDecay(decayRate: number, minStrength: number): Promise<number> {
449
+ const result = this.db
450
+ .prepare(
451
+ `UPDATE associations SET strength = MAX(?, strength * ?)
452
+ WHERE strength > ? AND origin != 'explicit'`,
453
+ )
454
+ .run(minStrength, 1 - decayRate, minStrength);
455
+
456
+ log.debug({ affected: result.changes, decayRate }, 'Association decay applied');
457
+ return result.changes;
458
+ }
459
+
460
+ // ─── Lifecycle ────────────────────────────────────────
461
+
462
+ async close(): Promise<void> {
463
+ this.db.close();
464
+ log.info('SQLite database closed');
465
+ }
466
+
467
+ // ─── Private Helpers ──────────────────────────────────
468
+
469
+ private rowToMemory(row: MemoryRow): Memory {
470
+ return {
471
+ id: row.id,
472
+ agentId: row.agent_id,
473
+ type: row.type as MemoryType,
474
+ content: row.content,
475
+ embeddingModel: row.embedding_model,
476
+ embedding: this.blobToVector(row.embedding as Buffer | null),
477
+ importance: row.importance,
478
+ decayScore: row.decay_score,
479
+ createdAt: new Date(row.created_at),
480
+ lastAccessedAt: new Date(row.last_accessed_at),
481
+ tags: JSON.parse(row.tags) as string[],
482
+ metadata: row.metadata ? (JSON.parse(row.metadata) as Record<string, unknown>) : undefined,
483
+ };
484
+ }
485
+
486
+ private rowToAssociation(row: AssociationRow): Association {
487
+ return {
488
+ sourceId: row.source_id,
489
+ targetId: row.target_id,
490
+ strength: row.strength,
491
+ origin: row.origin as AssociationOrigin,
492
+ relationType: (row.relation_type ?? 'relates_to') as AssociationRelationType,
493
+ createdAt: new Date(row.created_at),
494
+ };
495
+ }
496
+
497
+ private blobToVector(blob: Buffer | null): number[] | null {
498
+ if (!blob) return null;
499
+ return Array.from(new Float64Array(blob.buffer, blob.byteOffset, blob.length / 8));
500
+ }
501
+ }
502
+
503
+ // ─── Cosine Similarity ──────────────────────────────────
504
+
505
+ function cosineSimilarity(a: number[], b: number[]): number {
506
+ if (a.length !== b.length) return 0;
507
+
508
+ let dotProduct = 0;
509
+ let normA = 0;
510
+ let normB = 0;
511
+
512
+ for (let i = 0; i < a.length; i++) {
513
+ dotProduct += a[i] * b[i];
514
+ normA += a[i] * a[i];
515
+ normB += b[i] * b[i];
516
+ }
517
+
518
+ const denominator = Math.sqrt(normA) * Math.sqrt(normB);
519
+ if (denominator === 0) return 0;
520
+
521
+ return dotProduct / denominator;
522
+ }
523
+
524
+ // ─── Row Types ──────────────────────────────────────────
525
+
526
+ interface MemoryRow {
527
+ id: string;
528
+ agent_id: string;
529
+ type: string;
530
+ content: string;
531
+ embedding_model: string | null;
532
+ embedding: Buffer | null;
533
+ importance: number;
534
+ decay_score: number;
535
+ created_at: string;
536
+ last_accessed_at: string;
537
+ tags: string;
538
+ metadata: string | null;
539
+ }
540
+
541
+ interface AssociationRow {
542
+ source_id: string;
543
+ target_id: string;
544
+ strength: number;
545
+ origin: string;
546
+ relation_type: string;
547
+ created_at: string;
548
+ }
@@ -0,0 +1,56 @@
1
+ /**
2
+ * Embedding Provider Factory
3
+ *
4
+ * Creates the appropriate embedding provider based on configuration.
5
+ */
6
+
7
+ import type { EmbeddingProvider, OneMBrainConfig } from '../types.js';
8
+ import { OpenAIEmbeddingProvider } from './openai-provider.js';
9
+ import { OllamaEmbeddingProvider } from './ollama-provider.js';
10
+ import { KeywordEmbeddingProvider } from './keyword-provider.js';
11
+ import { createChildLogger } from '../logger.js';
12
+
13
+ const log = createChildLogger('embedding-factory');
14
+
15
+ export function createEmbeddingProvider(
16
+ config: OneMBrainConfig['embedding'],
17
+ ): EmbeddingProvider {
18
+ switch (config.provider) {
19
+ case 'openai': {
20
+ if (!config.openai?.apiKey) {
21
+ throw new Error('OpenAI API key is required for OpenAI embedding provider');
22
+ }
23
+ log.info({ provider: 'openai', model: config.openai.model }, 'Creating OpenAI embedding provider');
24
+ return new OpenAIEmbeddingProvider(config.openai.apiKey, config.openai.model);
25
+ }
26
+
27
+ case 'ollama': {
28
+ const baseUrl = config.ollama?.baseUrl || 'http://localhost:11434';
29
+ const model = config.ollama?.model || 'nomic-embed-text';
30
+ log.info({ provider: 'ollama', model, baseUrl }, 'Creating Ollama embedding provider');
31
+ return new OllamaEmbeddingProvider(baseUrl, model);
32
+ }
33
+
34
+ case 'claude': {
35
+ // Claude doesn't natively support embeddings yet.
36
+ // When it does, add an adapter here.
37
+ throw new Error(
38
+ 'Claude embedding provider is not yet available. ' +
39
+ 'Use OpenAI or Ollama for embeddings, or implement a custom adapter.',
40
+ );
41
+ }
42
+
43
+ case 'local-keyword': {
44
+ const dimensions = config.localKeyword?.dimensions || 256;
45
+ log.info({ provider: 'local-keyword', dimensions }, 'Creating local keyword embedding provider');
46
+ return new KeywordEmbeddingProvider(dimensions);
47
+ }
48
+
49
+ default:
50
+ throw new Error(`Unknown embedding provider: ${config.provider}`);
51
+ }
52
+ }
53
+
54
+ export { OpenAIEmbeddingProvider } from './openai-provider.js';
55
+ export { OllamaEmbeddingProvider } from './ollama-provider.js';
56
+ export { KeywordEmbeddingProvider } from './keyword-provider.js';
@@ -0,0 +1,71 @@
1
+ /**
2
+ * Deterministic local embedding provider.
3
+ *
4
+ * Uses hashed token features so the API can run fully offline without
5
+ * external embedding services. The output is stable across runs.
6
+ */
7
+
8
+ import type { EmbeddingProvider } from '../types.js';
9
+
10
+ const DEFAULT_DIMENSIONS = 256;
11
+ const TOKEN_RE = /[a-z0-9]+/g;
12
+
13
+ function normalizeText(text: string): string {
14
+ return text.toLowerCase();
15
+ }
16
+
17
+ function tokenize(text: string): string[] {
18
+ return normalizeText(text).match(TOKEN_RE) ?? [];
19
+ }
20
+
21
+ function hashToken(token: string): number {
22
+ let hash = 2166136261;
23
+ for (let i = 0; i < token.length; i++) {
24
+ hash ^= token.charCodeAt(i);
25
+ hash = Math.imul(hash, 16777619);
26
+ }
27
+ return hash >>> 0;
28
+ }
29
+
30
+ export class KeywordEmbeddingProvider implements EmbeddingProvider {
31
+ readonly name = 'local-keyword';
32
+ readonly model = 'local-keyword-v1';
33
+ readonly dimensions: number;
34
+
35
+ constructor(dimensions: number = DEFAULT_DIMENSIONS) {
36
+ this.dimensions = dimensions;
37
+ }
38
+
39
+ async embed(text: string): Promise<number[]> {
40
+ const vector = new Array(this.dimensions).fill(0);
41
+ const tokens = tokenize(text);
42
+
43
+ if (tokens.length === 0) {
44
+ return vector;
45
+ }
46
+
47
+ for (const token of tokens) {
48
+ const hash = hashToken(token);
49
+ const index = hash % this.dimensions;
50
+ const weight = 1 + (hash % 7) * 0.1;
51
+ vector[index] += weight;
52
+
53
+ if (token.length > 3) {
54
+ const prefixIndex = hashToken(token.slice(0, 3)) % this.dimensions;
55
+ vector[prefixIndex] += 0.35;
56
+ }
57
+ }
58
+
59
+ let norm = 0;
60
+ for (const value of vector) {
61
+ norm += value * value;
62
+ }
63
+ norm = Math.sqrt(norm) || 1;
64
+
65
+ return vector.map((value) => value / norm);
66
+ }
67
+
68
+ async embedBatch(texts: string[]): Promise<number[][]> {
69
+ return Promise.all(texts.map((text) => this.embed(text)));
70
+ }
71
+ }