memory-lancedb-pro 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,354 @@
1
+ /**
2
+ * Embedding Abstraction Layer
3
+ * OpenAI-compatible API for various embedding providers.
4
+ *
5
+ * Note: Some providers (e.g. Jina) support extra parameters like `task` and
6
+ * `normalized` on the embeddings endpoint. The OpenAI SDK types do not include
7
+ * these fields, so we pass them via a narrow `any` cast.
8
+ */
9
+
10
+ import OpenAI from "openai";
11
+ import { createHash } from "node:crypto";
12
+
13
+ // ============================================================================
14
+ // Embedding Cache (LRU with TTL)
15
+ // ============================================================================
16
+
17
+ interface CacheEntry {
18
+ vector: number[];
19
+ createdAt: number;
20
+ }
21
+
22
+ class EmbeddingCache {
23
+ private cache = new Map<string, CacheEntry>();
24
+ private readonly maxSize: number;
25
+ private readonly ttlMs: number;
26
+ public hits = 0;
27
+ public misses = 0;
28
+
29
+ constructor(maxSize = 256, ttlMinutes = 30) {
30
+ this.maxSize = maxSize;
31
+ this.ttlMs = ttlMinutes * 60_000;
32
+ }
33
+
34
+ private key(text: string, task?: string): string {
35
+ const hash = createHash("sha256").update(`${task || ""}:${text}`).digest("hex").slice(0, 24);
36
+ return hash;
37
+ }
38
+
39
+ get(text: string, task?: string): number[] | undefined {
40
+ const k = this.key(text, task);
41
+ const entry = this.cache.get(k);
42
+ if (!entry) {
43
+ this.misses++;
44
+ return undefined;
45
+ }
46
+ if (Date.now() - entry.createdAt > this.ttlMs) {
47
+ this.cache.delete(k);
48
+ this.misses++;
49
+ return undefined;
50
+ }
51
+ // Move to end (most recently used)
52
+ this.cache.delete(k);
53
+ this.cache.set(k, entry);
54
+ this.hits++;
55
+ return entry.vector;
56
+ }
57
+
58
+ set(text: string, task: string | undefined, vector: number[]): void {
59
+ const k = this.key(text, task);
60
+ // Evict oldest if full
61
+ if (this.cache.size >= this.maxSize) {
62
+ const firstKey = this.cache.keys().next().value;
63
+ if (firstKey !== undefined) this.cache.delete(firstKey);
64
+ }
65
+ this.cache.set(k, { vector, createdAt: Date.now() });
66
+ }
67
+
68
+ get size(): number { return this.cache.size; }
69
+ get stats(): { size: number; hits: number; misses: number; hitRate: string } {
70
+ const total = this.hits + this.misses;
71
+ return {
72
+ size: this.cache.size,
73
+ hits: this.hits,
74
+ misses: this.misses,
75
+ hitRate: total > 0 ? `${((this.hits / total) * 100).toFixed(1)}%` : "N/A",
76
+ };
77
+ }
78
+ }
79
+
80
+ // ============================================================================
81
+ // Types & Configuration
82
+ // ============================================================================
83
+
84
+ export interface EmbeddingConfig {
85
+ provider: "openai-compatible";
86
+ apiKey: string;
87
+ model: string;
88
+ baseURL?: string;
89
+ dimensions?: number;
90
+
91
+ /** Optional task type for query embeddings (e.g. "retrieval.query") */
92
+ taskQuery?: string;
93
+ /** Optional task type for passage/document embeddings (e.g. "retrieval.passage") */
94
+ taskPassage?: string;
95
+ /** Optional flag to request normalized embeddings (provider-dependent, e.g. Jina v5) */
96
+ normalized?: boolean;
97
+ }
98
+
99
+ // Known embedding model dimensions
100
+ const EMBEDDING_DIMENSIONS: Record<string, number> = {
101
+ "text-embedding-3-small": 1536,
102
+ "text-embedding-3-large": 3072,
103
+ "text-embedding-004": 768,
104
+ "gemini-embedding-001": 3072,
105
+ "nomic-embed-text": 768,
106
+ "mxbai-embed-large": 1024,
107
+ "BAAI/bge-m3": 1024,
108
+ "all-MiniLM-L6-v2": 384,
109
+ "all-mpnet-base-v2": 768,
110
+
111
+ // Jina v5
112
+ "jina-embeddings-v5-text-small": 1024,
113
+ "jina-embeddings-v5-text-nano": 768,
114
+ };
115
+
116
+ // ============================================================================
117
+ // Utility Functions
118
+ // ============================================================================
119
+
120
+ function resolveEnvVars(value: string): string {
121
+ return value.replace(/\$\{([^}]+)\}/g, (_, envVar) => {
122
+ const envValue = process.env[envVar];
123
+ if (!envValue) {
124
+ throw new Error(`Environment variable ${envVar} is not set`);
125
+ }
126
+ return envValue;
127
+ });
128
+ }
129
+
130
+ export function getVectorDimensions(model: string, overrideDims?: number): number {
131
+ if (overrideDims && overrideDims > 0) {
132
+ return overrideDims;
133
+ }
134
+
135
+ const dims = EMBEDDING_DIMENSIONS[model];
136
+ if (!dims) {
137
+ throw new Error(
138
+ `Unsupported embedding model: ${model}. Either add it to EMBEDDING_DIMENSIONS or set embedding.dimensions in config.`
139
+ );
140
+ }
141
+
142
+ return dims;
143
+ }
144
+
145
+ // ============================================================================
146
+ // Embedder Class
147
+ // ============================================================================
148
+
149
+ export class Embedder {
150
+ private client: OpenAI;
151
+ public readonly dimensions: number;
152
+ private readonly _cache: EmbeddingCache;
153
+
154
+ private readonly _model: string;
155
+ private readonly _taskQuery?: string;
156
+ private readonly _taskPassage?: string;
157
+ private readonly _normalized?: boolean;
158
+
159
+ constructor(config: EmbeddingConfig) {
160
+ // Resolve environment variables in API key
161
+ const resolvedApiKey = resolveEnvVars(config.apiKey);
162
+
163
+ this._model = config.model;
164
+ this._taskQuery = config.taskQuery;
165
+ this._taskPassage = config.taskPassage;
166
+ this._normalized = config.normalized;
167
+
168
+ this.client = new OpenAI({
169
+ apiKey: resolvedApiKey,
170
+ ...(config.baseURL ? { baseURL: config.baseURL } : {}),
171
+ });
172
+
173
+ this.dimensions = getVectorDimensions(config.model, config.dimensions);
174
+ this._cache = new EmbeddingCache(256, 30); // 256 entries, 30 min TTL
175
+ }
176
+
177
+ // --------------------------------------------------------------------------
178
+ // Backward-compatible API
179
+ // --------------------------------------------------------------------------
180
+
181
+ /**
182
+ * Backward-compatible embedding API.
183
+ *
184
+ * Historically the plugin used a single `embed()` method for both query and
185
+ * passage embeddings. With task-aware providers we treat this as passage.
186
+ */
187
+ async embed(text: string): Promise<number[]> {
188
+ return this.embedPassage(text);
189
+ }
190
+
191
+ /** Backward-compatible batch embedding API (treated as passage). */
192
+ async embedBatch(texts: string[]): Promise<number[][]> {
193
+ return this.embedBatchPassage(texts);
194
+ }
195
+
196
+ // --------------------------------------------------------------------------
197
+ // Task-aware API
198
+ // --------------------------------------------------------------------------
199
+
200
+ async embedQuery(text: string): Promise<number[]> {
201
+ return this.embedSingle(text, this._taskQuery);
202
+ }
203
+
204
+ async embedPassage(text: string): Promise<number[]> {
205
+ return this.embedSingle(text, this._taskPassage);
206
+ }
207
+
208
+ async embedBatchQuery(texts: string[]): Promise<number[][]> {
209
+ return this.embedMany(texts, this._taskQuery);
210
+ }
211
+
212
+ async embedBatchPassage(texts: string[]): Promise<number[][]> {
213
+ return this.embedMany(texts, this._taskPassage);
214
+ }
215
+
216
+ // --------------------------------------------------------------------------
217
+ // Internals
218
+ // --------------------------------------------------------------------------
219
+
220
+ private validateEmbedding(embedding: number[]): void {
221
+ if (!Array.isArray(embedding)) {
222
+ throw new Error(`Embedding is not an array (got ${typeof embedding})`);
223
+ }
224
+ if (embedding.length !== this.dimensions) {
225
+ throw new Error(
226
+ `Embedding dimension mismatch: expected ${this.dimensions}, got ${embedding.length}`
227
+ );
228
+ }
229
+ }
230
+
231
+ private buildPayload(input: string | string[], task?: string): any {
232
+ const payload: any = {
233
+ model: this.model,
234
+ input,
235
+ };
236
+
237
+ if (task) payload.task = task;
238
+ if (this._normalized !== undefined) payload.normalized = this._normalized;
239
+
240
+ return payload;
241
+ }
242
+
243
+ private async embedSingle(text: string, task?: string): Promise<number[]> {
244
+ if (!text || text.trim().length === 0) {
245
+ throw new Error("Cannot embed empty text");
246
+ }
247
+
248
+ // Check cache first
249
+ const cached = this._cache.get(text, task);
250
+ if (cached) return cached;
251
+
252
+ try {
253
+ const response = await this.client.embeddings.create(this.buildPayload(text, task) as any);
254
+ const embedding = response.data[0]?.embedding as number[] | undefined;
255
+ if (!embedding) {
256
+ throw new Error("No embedding returned from provider");
257
+ }
258
+
259
+ this.validateEmbedding(embedding);
260
+ this._cache.set(text, task, embedding);
261
+ return embedding;
262
+ } catch (error) {
263
+ if (error instanceof Error) {
264
+ throw new Error(`Failed to generate embedding: ${error.message}`, { cause: error });
265
+ }
266
+ throw new Error(`Failed to generate embedding: ${String(error)}`);
267
+ }
268
+ }
269
+
270
+ private async embedMany(texts: string[], task?: string): Promise<number[][]> {
271
+ if (!texts || texts.length === 0) {
272
+ return [];
273
+ }
274
+
275
+ // Filter out empty texts and track indices
276
+ const validTexts: string[] = [];
277
+ const validIndices: number[] = [];
278
+
279
+ texts.forEach((text, index) => {
280
+ if (text && text.trim().length > 0) {
281
+ validTexts.push(text);
282
+ validIndices.push(index);
283
+ }
284
+ });
285
+
286
+ if (validTexts.length === 0) {
287
+ return texts.map(() => []);
288
+ }
289
+
290
+ try {
291
+ const response = await this.client.embeddings.create(
292
+ this.buildPayload(validTexts, task) as any
293
+ );
294
+
295
+ // Create result array with proper length
296
+ const results: number[][] = new Array(texts.length);
297
+
298
+ // Fill in embeddings for valid texts
299
+ response.data.forEach((item, idx) => {
300
+ const originalIndex = validIndices[idx];
301
+ const embedding = item.embedding as number[];
302
+
303
+ this.validateEmbedding(embedding);
304
+ results[originalIndex] = embedding;
305
+ });
306
+
307
+ // Fill empty arrays for invalid texts
308
+ for (let i = 0; i < texts.length; i++) {
309
+ if (!results[i]) {
310
+ results[i] = [];
311
+ }
312
+ }
313
+
314
+ return results;
315
+ } catch (error) {
316
+ if (error instanceof Error) {
317
+ throw new Error(`Failed to generate batch embeddings: ${error.message}`, { cause: error });
318
+ }
319
+ throw new Error(`Failed to generate batch embeddings: ${String(error)}`);
320
+ }
321
+ }
322
+
323
+ get model(): string {
324
+ return this._model;
325
+ }
326
+
327
+ // Test connection and validate configuration
328
+ async test(): Promise<{ success: boolean; error?: string; dimensions?: number }> {
329
+ try {
330
+ const testEmbedding = await this.embedPassage("test");
331
+ return {
332
+ success: true,
333
+ dimensions: testEmbedding.length,
334
+ };
335
+ } catch (error) {
336
+ return {
337
+ success: false,
338
+ error: error instanceof Error ? error.message : String(error),
339
+ };
340
+ }
341
+ }
342
+
343
+ get cacheStats() {
344
+ return this._cache.stats;
345
+ }
346
+ }
347
+
348
+ // ============================================================================
349
+ // Factory Function
350
+ // ============================================================================
351
+
352
+ export function createEmbedder(config: EmbeddingConfig): Embedder {
353
+ return new Embedder(config);
354
+ }
package/src/migrate.ts ADDED
@@ -0,0 +1,356 @@
1
+ /**
2
+ * Migration Utilities
3
+ * Migrates data from old memory-lancedb plugin to memory-lancedb-pro
4
+ */
5
+
6
+ import { homedir } from "node:os";
7
+ import { join } from "node:path";
8
+ import fs from "node:fs/promises";
9
+ import type { MemoryStore, MemoryEntry } from "./store.js";
10
+ import { loadLanceDB } from "./store.js";
11
+
12
+ // ============================================================================
13
+ // Types
14
+ // ============================================================================
15
+
16
+ interface LegacyMemoryEntry {
17
+ id: string;
18
+ text: string;
19
+ vector: number[];
20
+ importance: number;
21
+ category: "preference" | "fact" | "decision" | "entity" | "other";
22
+ createdAt: number;
23
+ scope?: string;
24
+ }
25
+
26
+ interface MigrationResult {
27
+ success: boolean;
28
+ migratedCount: number;
29
+ skippedCount: number;
30
+ errors: string[];
31
+ summary: string;
32
+ }
33
+
34
+ interface MigrationOptions {
35
+ sourceDbPath?: string;
36
+ dryRun?: boolean;
37
+ defaultScope?: string;
38
+ skipExisting?: boolean;
39
+ }
40
+
41
+ // ============================================================================
42
+ // Default Paths
43
+ // ============================================================================
44
+
45
+ function getDefaultLegacyPaths(): string[] {
46
+ const home = homedir();
47
+ return [
48
+ join(home, ".openclaw", "memory", "lancedb"),
49
+ join(home, ".claude", "memory", "lancedb"),
50
+ // Add more legacy paths as needed
51
+ ];
52
+ }
53
+
54
+ // ============================================================================
55
+ // Migration Functions
56
+ // ============================================================================
57
+
58
+ export class MemoryMigrator {
59
+ constructor(private targetStore: MemoryStore) {}
60
+
61
+ async migrate(options: MigrationOptions = {}): Promise<MigrationResult> {
62
+ const result: MigrationResult = {
63
+ success: false,
64
+ migratedCount: 0,
65
+ skippedCount: 0,
66
+ errors: [],
67
+ summary: "",
68
+ };
69
+
70
+ try {
71
+ // Find source database
72
+ const sourceDbPath = await this.findSourceDatabase(options.sourceDbPath);
73
+ if (!sourceDbPath) {
74
+ result.errors.push("No legacy database found to migrate from");
75
+ result.summary = "Migration failed: No source database found";
76
+ return result;
77
+ }
78
+
79
+ console.log(`Migrating from: ${sourceDbPath}`);
80
+
81
+ // Load legacy data
82
+ const legacyEntries = await this.loadLegacyData(sourceDbPath);
83
+ if (legacyEntries.length === 0) {
84
+ result.summary = "Migration completed: No data to migrate";
85
+ result.success = true;
86
+ return result;
87
+ }
88
+
89
+ console.log(`Found ${legacyEntries.length} entries to migrate`);
90
+
91
+ // Migrate entries
92
+ if (!options.dryRun) {
93
+ const migrationStats = await this.migrateEntries(legacyEntries, options);
94
+ result.migratedCount = migrationStats.migrated;
95
+ result.skippedCount = migrationStats.skipped;
96
+ result.errors.push(...migrationStats.errors);
97
+ } else {
98
+ result.summary = `Dry run: Would migrate ${legacyEntries.length} entries`;
99
+ result.success = true;
100
+ return result;
101
+ }
102
+
103
+ result.success = result.errors.length === 0;
104
+ result.summary = `Migration ${result.success ? 'completed' : 'completed with errors'}: ` +
105
+ `${result.migratedCount} migrated, ${result.skippedCount} skipped`;
106
+
107
+ } catch (error) {
108
+ result.errors.push(`Migration failed: ${error instanceof Error ? error.message : String(error)}`);
109
+ result.summary = "Migration failed due to unexpected error";
110
+ }
111
+
112
+ return result;
113
+ }
114
+
115
+ private async findSourceDatabase(explicitPath?: string): Promise<string | null> {
116
+ if (explicitPath) {
117
+ try {
118
+ await fs.access(explicitPath);
119
+ return explicitPath;
120
+ } catch {
121
+ return null;
122
+ }
123
+ }
124
+
125
+ // Check default legacy paths
126
+ for (const path of getDefaultLegacyPaths()) {
127
+ try {
128
+ await fs.access(path);
129
+ const files = await fs.readdir(path);
130
+ // Check for LanceDB files
131
+ if (files.some(f => f.endsWith('.lance') || f === 'memories.lance')) {
132
+ return path;
133
+ }
134
+ } catch {
135
+ continue;
136
+ }
137
+ }
138
+
139
+ return null;
140
+ }
141
+
142
+ private async loadLegacyData(sourceDbPath: string, limit?: number): Promise<LegacyMemoryEntry[]> {
143
+ const lancedb = await loadLanceDB();
144
+ const db = await lancedb.connect(sourceDbPath);
145
+
146
+ try {
147
+ const table = await db.openTable("memories");
148
+ let query = table.query();
149
+ if (limit) query = query.limit(limit);
150
+ const entries = await query.toArray();
151
+
152
+ return entries.map((row): LegacyMemoryEntry => ({
153
+ id: row.id as string,
154
+ text: row.text as string,
155
+ vector: row.vector as number[],
156
+ importance: row.importance as number,
157
+ category: (row.category as LegacyMemoryEntry["category"]) || "other",
158
+ createdAt: row.createdAt as number,
159
+ scope: row.scope as string | undefined,
160
+ }));
161
+ } catch (error) {
162
+ console.warn(`Failed to load legacy data: ${error}`);
163
+ return [];
164
+ }
165
+ }
166
+
167
+ private async migrateEntries(
168
+ legacyEntries: LegacyMemoryEntry[],
169
+ options: MigrationOptions
170
+ ): Promise<{ migrated: number; skipped: number; errors: string[] }> {
171
+ let migrated = 0;
172
+ let skipped = 0;
173
+ const errors: string[] = [];
174
+
175
+ const defaultScope = options.defaultScope || "global";
176
+
177
+ for (const legacy of legacyEntries) {
178
+ try {
179
+ // Check if entry already exists (if skipExisting is enabled)
180
+ if (options.skipExisting) {
181
+ const existing = await this.targetStore.vectorSearch(
182
+ legacy.vector, 1, 0.9, [legacy.scope || defaultScope]
183
+ );
184
+ if (existing.length > 0 && existing[0].score > 0.95) {
185
+ skipped++;
186
+ continue;
187
+ }
188
+ }
189
+
190
+ // Convert legacy entry to new format
191
+ const newEntry: Omit<MemoryEntry, "id" | "timestamp"> = {
192
+ text: legacy.text,
193
+ vector: legacy.vector,
194
+ category: legacy.category,
195
+ scope: legacy.scope || defaultScope, // Use legacy scope or default
196
+ importance: legacy.importance,
197
+ metadata: JSON.stringify({
198
+ migratedFrom: "memory-lancedb",
199
+ originalId: legacy.id,
200
+ originalCreatedAt: legacy.createdAt,
201
+ }),
202
+ };
203
+
204
+ await this.targetStore.store(newEntry);
205
+ migrated++;
206
+
207
+ if (migrated % 100 === 0) {
208
+ console.log(`Migrated ${migrated}/${legacyEntries.length} entries...`);
209
+ }
210
+
211
+ } catch (error) {
212
+ errors.push(`Failed to migrate entry ${legacy.id}: ${error}`);
213
+ skipped++;
214
+ }
215
+ }
216
+
217
+ return { migrated, skipped, errors };
218
+ }
219
+
220
+ // Check if migration is needed
221
+ async checkMigrationNeeded(sourceDbPath?: string): Promise<{
222
+ needed: boolean;
223
+ sourceFound: boolean;
224
+ sourceDbPath?: string;
225
+ entryCount?: number;
226
+ }> {
227
+ const sourcePath = await this.findSourceDatabase(sourceDbPath);
228
+
229
+ if (!sourcePath) {
230
+ return {
231
+ needed: false,
232
+ sourceFound: false,
233
+ };
234
+ }
235
+
236
+ try {
237
+ const entries = await this.loadLegacyData(sourcePath, 1);
238
+ return {
239
+ needed: entries.length > 0,
240
+ sourceFound: true,
241
+ sourceDbPath: sourcePath,
242
+ entryCount: entries.length > 0 ? undefined : 0, // Avoid full scan; count unknown
243
+ };
244
+ } catch (error) {
245
+ return {
246
+ needed: false,
247
+ sourceFound: true,
248
+ sourceDbPath: sourcePath,
249
+ };
250
+ }
251
+ }
252
+
253
+ // Verify migration results
254
+ async verifyMigration(sourceDbPath?: string): Promise<{
255
+ valid: boolean;
256
+ sourceCount: number;
257
+ targetCount: number;
258
+ issues: string[];
259
+ }> {
260
+ const issues: string[] = [];
261
+
262
+ try {
263
+ const sourcePath = await this.findSourceDatabase(sourceDbPath);
264
+ if (!sourcePath) {
265
+ return {
266
+ valid: false,
267
+ sourceCount: 0,
268
+ targetCount: 0,
269
+ issues: ["Source database not found"],
270
+ };
271
+ }
272
+
273
+ const sourceEntries = await this.loadLegacyData(sourcePath);
274
+ const targetStats = await this.targetStore.stats();
275
+
276
+ const sourceCount = sourceEntries.length;
277
+ const targetCount = targetStats.totalCount;
278
+
279
+ // Basic validation - target should have at least as many entries as source
280
+ if (targetCount < sourceCount) {
281
+ issues.push(`Target has fewer entries (${targetCount}) than source (${sourceCount})`);
282
+ }
283
+
284
+ return {
285
+ valid: issues.length === 0,
286
+ sourceCount,
287
+ targetCount,
288
+ issues,
289
+ };
290
+
291
+ } catch (error) {
292
+ return {
293
+ valid: false,
294
+ sourceCount: 0,
295
+ targetCount: 0,
296
+ issues: [`Verification failed: ${error}`],
297
+ };
298
+ }
299
+ }
300
+ }
301
+
302
+ // ============================================================================
303
+ // Factory Function
304
+ // ============================================================================
305
+
306
+ export function createMigrator(targetStore: MemoryStore): MemoryMigrator {
307
+ return new MemoryMigrator(targetStore);
308
+ }
309
+
310
+ // ============================================================================
311
+ // Standalone Migration Function
312
+ // ============================================================================
313
+
314
+ export async function migrateFromLegacy(
315
+ targetStore: MemoryStore,
316
+ options: MigrationOptions = {}
317
+ ): Promise<MigrationResult> {
318
+ const migrator = createMigrator(targetStore);
319
+ return migrator.migrate(options);
320
+ }
321
+
322
+ // ============================================================================
323
+ // CLI Helper Functions
324
+ // ============================================================================
325
+
326
+ export async function checkForLegacyData(): Promise<{
327
+ found: boolean;
328
+ paths: string[];
329
+ totalEntries: number;
330
+ }> {
331
+ const paths: string[] = [];
332
+ let totalEntries = 0;
333
+
334
+ for (const path of getDefaultLegacyPaths()) {
335
+ try {
336
+ const lancedb = await loadLanceDB();
337
+ const db = await lancedb.connect(path);
338
+ const table = await db.openTable("memories");
339
+ const entries = await table.query().select(["id"]).toArray();
340
+
341
+ if (entries.length > 0) {
342
+ paths.push(path);
343
+ totalEntries += entries.length;
344
+ }
345
+ } catch {
346
+ // Path doesn't exist or isn't a valid LanceDB
347
+ continue;
348
+ }
349
+ }
350
+
351
+ return {
352
+ found: paths.length > 0,
353
+ paths,
354
+ totalEntries,
355
+ };
356
+ }