memory-lancedb-pro 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +489 -0
- package/README_CN.md +406 -0
- package/cli.ts +611 -0
- package/index.ts +698 -0
- package/openclaw.plugin.json +385 -0
- package/package.json +38 -0
- package/skills/lesson/SKILL.md +28 -0
- package/src/adaptive-retrieval.ts +60 -0
- package/src/embedder.ts +354 -0
- package/src/migrate.ts +356 -0
- package/src/noise-filter.ts +78 -0
- package/src/retriever.ts +722 -0
- package/src/scopes.ts +374 -0
- package/src/store.ts +567 -0
- package/src/tools.ts +639 -0
package/src/embedder.ts
ADDED
|
@@ -0,0 +1,354 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Embedding Abstraction Layer
|
|
3
|
+
* OpenAI-compatible API for various embedding providers.
|
|
4
|
+
*
|
|
5
|
+
* Note: Some providers (e.g. Jina) support extra parameters like `task` and
|
|
6
|
+
* `normalized` on the embeddings endpoint. The OpenAI SDK types do not include
|
|
7
|
+
* these fields, so we pass them via a narrow `any` cast.
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import OpenAI from "openai";
|
|
11
|
+
import { createHash } from "node:crypto";
|
|
12
|
+
|
|
13
|
+
// ============================================================================
|
|
14
|
+
// Embedding Cache (LRU with TTL)
|
|
15
|
+
// ============================================================================
|
|
16
|
+
|
|
17
|
+
interface CacheEntry {
|
|
18
|
+
vector: number[];
|
|
19
|
+
createdAt: number;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
class EmbeddingCache {
|
|
23
|
+
private cache = new Map<string, CacheEntry>();
|
|
24
|
+
private readonly maxSize: number;
|
|
25
|
+
private readonly ttlMs: number;
|
|
26
|
+
public hits = 0;
|
|
27
|
+
public misses = 0;
|
|
28
|
+
|
|
29
|
+
constructor(maxSize = 256, ttlMinutes = 30) {
|
|
30
|
+
this.maxSize = maxSize;
|
|
31
|
+
this.ttlMs = ttlMinutes * 60_000;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
private key(text: string, task?: string): string {
|
|
35
|
+
const hash = createHash("sha256").update(`${task || ""}:${text}`).digest("hex").slice(0, 24);
|
|
36
|
+
return hash;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
get(text: string, task?: string): number[] | undefined {
|
|
40
|
+
const k = this.key(text, task);
|
|
41
|
+
const entry = this.cache.get(k);
|
|
42
|
+
if (!entry) {
|
|
43
|
+
this.misses++;
|
|
44
|
+
return undefined;
|
|
45
|
+
}
|
|
46
|
+
if (Date.now() - entry.createdAt > this.ttlMs) {
|
|
47
|
+
this.cache.delete(k);
|
|
48
|
+
this.misses++;
|
|
49
|
+
return undefined;
|
|
50
|
+
}
|
|
51
|
+
// Move to end (most recently used)
|
|
52
|
+
this.cache.delete(k);
|
|
53
|
+
this.cache.set(k, entry);
|
|
54
|
+
this.hits++;
|
|
55
|
+
return entry.vector;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
set(text: string, task: string | undefined, vector: number[]): void {
|
|
59
|
+
const k = this.key(text, task);
|
|
60
|
+
// Evict oldest if full
|
|
61
|
+
if (this.cache.size >= this.maxSize) {
|
|
62
|
+
const firstKey = this.cache.keys().next().value;
|
|
63
|
+
if (firstKey !== undefined) this.cache.delete(firstKey);
|
|
64
|
+
}
|
|
65
|
+
this.cache.set(k, { vector, createdAt: Date.now() });
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
get size(): number { return this.cache.size; }
|
|
69
|
+
get stats(): { size: number; hits: number; misses: number; hitRate: string } {
|
|
70
|
+
const total = this.hits + this.misses;
|
|
71
|
+
return {
|
|
72
|
+
size: this.cache.size,
|
|
73
|
+
hits: this.hits,
|
|
74
|
+
misses: this.misses,
|
|
75
|
+
hitRate: total > 0 ? `${((this.hits / total) * 100).toFixed(1)}%` : "N/A",
|
|
76
|
+
};
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
// ============================================================================
|
|
81
|
+
// Types & Configuration
|
|
82
|
+
// ============================================================================
|
|
83
|
+
|
|
84
|
+
export interface EmbeddingConfig {
|
|
85
|
+
provider: "openai-compatible";
|
|
86
|
+
apiKey: string;
|
|
87
|
+
model: string;
|
|
88
|
+
baseURL?: string;
|
|
89
|
+
dimensions?: number;
|
|
90
|
+
|
|
91
|
+
/** Optional task type for query embeddings (e.g. "retrieval.query") */
|
|
92
|
+
taskQuery?: string;
|
|
93
|
+
/** Optional task type for passage/document embeddings (e.g. "retrieval.passage") */
|
|
94
|
+
taskPassage?: string;
|
|
95
|
+
/** Optional flag to request normalized embeddings (provider-dependent, e.g. Jina v5) */
|
|
96
|
+
normalized?: boolean;
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
// Known embedding model dimensions
|
|
100
|
+
const EMBEDDING_DIMENSIONS: Record<string, number> = {
|
|
101
|
+
"text-embedding-3-small": 1536,
|
|
102
|
+
"text-embedding-3-large": 3072,
|
|
103
|
+
"text-embedding-004": 768,
|
|
104
|
+
"gemini-embedding-001": 3072,
|
|
105
|
+
"nomic-embed-text": 768,
|
|
106
|
+
"mxbai-embed-large": 1024,
|
|
107
|
+
"BAAI/bge-m3": 1024,
|
|
108
|
+
"all-MiniLM-L6-v2": 384,
|
|
109
|
+
"all-mpnet-base-v2": 768,
|
|
110
|
+
|
|
111
|
+
// Jina v5
|
|
112
|
+
"jina-embeddings-v5-text-small": 1024,
|
|
113
|
+
"jina-embeddings-v5-text-nano": 768,
|
|
114
|
+
};
|
|
115
|
+
|
|
116
|
+
// ============================================================================
|
|
117
|
+
// Utility Functions
|
|
118
|
+
// ============================================================================
|
|
119
|
+
|
|
120
|
+
function resolveEnvVars(value: string): string {
|
|
121
|
+
return value.replace(/\$\{([^}]+)\}/g, (_, envVar) => {
|
|
122
|
+
const envValue = process.env[envVar];
|
|
123
|
+
if (!envValue) {
|
|
124
|
+
throw new Error(`Environment variable ${envVar} is not set`);
|
|
125
|
+
}
|
|
126
|
+
return envValue;
|
|
127
|
+
});
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
export function getVectorDimensions(model: string, overrideDims?: number): number {
|
|
131
|
+
if (overrideDims && overrideDims > 0) {
|
|
132
|
+
return overrideDims;
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
const dims = EMBEDDING_DIMENSIONS[model];
|
|
136
|
+
if (!dims) {
|
|
137
|
+
throw new Error(
|
|
138
|
+
`Unsupported embedding model: ${model}. Either add it to EMBEDDING_DIMENSIONS or set embedding.dimensions in config.`
|
|
139
|
+
);
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
return dims;
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
// ============================================================================
|
|
146
|
+
// Embedder Class
|
|
147
|
+
// ============================================================================
|
|
148
|
+
|
|
149
|
+
export class Embedder {
|
|
150
|
+
private client: OpenAI;
|
|
151
|
+
public readonly dimensions: number;
|
|
152
|
+
private readonly _cache: EmbeddingCache;
|
|
153
|
+
|
|
154
|
+
private readonly _model: string;
|
|
155
|
+
private readonly _taskQuery?: string;
|
|
156
|
+
private readonly _taskPassage?: string;
|
|
157
|
+
private readonly _normalized?: boolean;
|
|
158
|
+
|
|
159
|
+
constructor(config: EmbeddingConfig) {
|
|
160
|
+
// Resolve environment variables in API key
|
|
161
|
+
const resolvedApiKey = resolveEnvVars(config.apiKey);
|
|
162
|
+
|
|
163
|
+
this._model = config.model;
|
|
164
|
+
this._taskQuery = config.taskQuery;
|
|
165
|
+
this._taskPassage = config.taskPassage;
|
|
166
|
+
this._normalized = config.normalized;
|
|
167
|
+
|
|
168
|
+
this.client = new OpenAI({
|
|
169
|
+
apiKey: resolvedApiKey,
|
|
170
|
+
...(config.baseURL ? { baseURL: config.baseURL } : {}),
|
|
171
|
+
});
|
|
172
|
+
|
|
173
|
+
this.dimensions = getVectorDimensions(config.model, config.dimensions);
|
|
174
|
+
this._cache = new EmbeddingCache(256, 30); // 256 entries, 30 min TTL
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
// --------------------------------------------------------------------------
|
|
178
|
+
// Backward-compatible API
|
|
179
|
+
// --------------------------------------------------------------------------
|
|
180
|
+
|
|
181
|
+
/**
|
|
182
|
+
* Backward-compatible embedding API.
|
|
183
|
+
*
|
|
184
|
+
* Historically the plugin used a single `embed()` method for both query and
|
|
185
|
+
* passage embeddings. With task-aware providers we treat this as passage.
|
|
186
|
+
*/
|
|
187
|
+
async embed(text: string): Promise<number[]> {
|
|
188
|
+
return this.embedPassage(text);
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
/** Backward-compatible batch embedding API (treated as passage). */
|
|
192
|
+
async embedBatch(texts: string[]): Promise<number[][]> {
|
|
193
|
+
return this.embedBatchPassage(texts);
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
// --------------------------------------------------------------------------
|
|
197
|
+
// Task-aware API
|
|
198
|
+
// --------------------------------------------------------------------------
|
|
199
|
+
|
|
200
|
+
async embedQuery(text: string): Promise<number[]> {
|
|
201
|
+
return this.embedSingle(text, this._taskQuery);
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
async embedPassage(text: string): Promise<number[]> {
|
|
205
|
+
return this.embedSingle(text, this._taskPassage);
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
async embedBatchQuery(texts: string[]): Promise<number[][]> {
|
|
209
|
+
return this.embedMany(texts, this._taskQuery);
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
async embedBatchPassage(texts: string[]): Promise<number[][]> {
|
|
213
|
+
return this.embedMany(texts, this._taskPassage);
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
// --------------------------------------------------------------------------
|
|
217
|
+
// Internals
|
|
218
|
+
// --------------------------------------------------------------------------
|
|
219
|
+
|
|
220
|
+
private validateEmbedding(embedding: number[]): void {
|
|
221
|
+
if (!Array.isArray(embedding)) {
|
|
222
|
+
throw new Error(`Embedding is not an array (got ${typeof embedding})`);
|
|
223
|
+
}
|
|
224
|
+
if (embedding.length !== this.dimensions) {
|
|
225
|
+
throw new Error(
|
|
226
|
+
`Embedding dimension mismatch: expected ${this.dimensions}, got ${embedding.length}`
|
|
227
|
+
);
|
|
228
|
+
}
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
private buildPayload(input: string | string[], task?: string): any {
|
|
232
|
+
const payload: any = {
|
|
233
|
+
model: this.model,
|
|
234
|
+
input,
|
|
235
|
+
};
|
|
236
|
+
|
|
237
|
+
if (task) payload.task = task;
|
|
238
|
+
if (this._normalized !== undefined) payload.normalized = this._normalized;
|
|
239
|
+
|
|
240
|
+
return payload;
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
private async embedSingle(text: string, task?: string): Promise<number[]> {
|
|
244
|
+
if (!text || text.trim().length === 0) {
|
|
245
|
+
throw new Error("Cannot embed empty text");
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
// Check cache first
|
|
249
|
+
const cached = this._cache.get(text, task);
|
|
250
|
+
if (cached) return cached;
|
|
251
|
+
|
|
252
|
+
try {
|
|
253
|
+
const response = await this.client.embeddings.create(this.buildPayload(text, task) as any);
|
|
254
|
+
const embedding = response.data[0]?.embedding as number[] | undefined;
|
|
255
|
+
if (!embedding) {
|
|
256
|
+
throw new Error("No embedding returned from provider");
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
this.validateEmbedding(embedding);
|
|
260
|
+
this._cache.set(text, task, embedding);
|
|
261
|
+
return embedding;
|
|
262
|
+
} catch (error) {
|
|
263
|
+
if (error instanceof Error) {
|
|
264
|
+
throw new Error(`Failed to generate embedding: ${error.message}`, { cause: error });
|
|
265
|
+
}
|
|
266
|
+
throw new Error(`Failed to generate embedding: ${String(error)}`);
|
|
267
|
+
}
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
private async embedMany(texts: string[], task?: string): Promise<number[][]> {
|
|
271
|
+
if (!texts || texts.length === 0) {
|
|
272
|
+
return [];
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
// Filter out empty texts and track indices
|
|
276
|
+
const validTexts: string[] = [];
|
|
277
|
+
const validIndices: number[] = [];
|
|
278
|
+
|
|
279
|
+
texts.forEach((text, index) => {
|
|
280
|
+
if (text && text.trim().length > 0) {
|
|
281
|
+
validTexts.push(text);
|
|
282
|
+
validIndices.push(index);
|
|
283
|
+
}
|
|
284
|
+
});
|
|
285
|
+
|
|
286
|
+
if (validTexts.length === 0) {
|
|
287
|
+
return texts.map(() => []);
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
try {
|
|
291
|
+
const response = await this.client.embeddings.create(
|
|
292
|
+
this.buildPayload(validTexts, task) as any
|
|
293
|
+
);
|
|
294
|
+
|
|
295
|
+
// Create result array with proper length
|
|
296
|
+
const results: number[][] = new Array(texts.length);
|
|
297
|
+
|
|
298
|
+
// Fill in embeddings for valid texts
|
|
299
|
+
response.data.forEach((item, idx) => {
|
|
300
|
+
const originalIndex = validIndices[idx];
|
|
301
|
+
const embedding = item.embedding as number[];
|
|
302
|
+
|
|
303
|
+
this.validateEmbedding(embedding);
|
|
304
|
+
results[originalIndex] = embedding;
|
|
305
|
+
});
|
|
306
|
+
|
|
307
|
+
// Fill empty arrays for invalid texts
|
|
308
|
+
for (let i = 0; i < texts.length; i++) {
|
|
309
|
+
if (!results[i]) {
|
|
310
|
+
results[i] = [];
|
|
311
|
+
}
|
|
312
|
+
}
|
|
313
|
+
|
|
314
|
+
return results;
|
|
315
|
+
} catch (error) {
|
|
316
|
+
if (error instanceof Error) {
|
|
317
|
+
throw new Error(`Failed to generate batch embeddings: ${error.message}`, { cause: error });
|
|
318
|
+
}
|
|
319
|
+
throw new Error(`Failed to generate batch embeddings: ${String(error)}`);
|
|
320
|
+
}
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
get model(): string {
|
|
324
|
+
return this._model;
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
// Test connection and validate configuration
|
|
328
|
+
async test(): Promise<{ success: boolean; error?: string; dimensions?: number }> {
|
|
329
|
+
try {
|
|
330
|
+
const testEmbedding = await this.embedPassage("test");
|
|
331
|
+
return {
|
|
332
|
+
success: true,
|
|
333
|
+
dimensions: testEmbedding.length,
|
|
334
|
+
};
|
|
335
|
+
} catch (error) {
|
|
336
|
+
return {
|
|
337
|
+
success: false,
|
|
338
|
+
error: error instanceof Error ? error.message : String(error),
|
|
339
|
+
};
|
|
340
|
+
}
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
get cacheStats() {
|
|
344
|
+
return this._cache.stats;
|
|
345
|
+
}
|
|
346
|
+
}
|
|
347
|
+
|
|
348
|
+
// ============================================================================
|
|
349
|
+
// Factory Function
|
|
350
|
+
// ============================================================================
|
|
351
|
+
|
|
352
|
+
export function createEmbedder(config: EmbeddingConfig): Embedder {
|
|
353
|
+
return new Embedder(config);
|
|
354
|
+
}
|
package/src/migrate.ts
ADDED
|
@@ -0,0 +1,356 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Migration Utilities
|
|
3
|
+
* Migrates data from old memory-lancedb plugin to memory-lancedb-pro
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import { homedir } from "node:os";
|
|
7
|
+
import { join } from "node:path";
|
|
8
|
+
import fs from "node:fs/promises";
|
|
9
|
+
import type { MemoryStore, MemoryEntry } from "./store.js";
|
|
10
|
+
import { loadLanceDB } from "./store.js";
|
|
11
|
+
|
|
12
|
+
// ============================================================================
|
|
13
|
+
// Types
|
|
14
|
+
// ============================================================================
|
|
15
|
+
|
|
16
|
+
interface LegacyMemoryEntry {
|
|
17
|
+
id: string;
|
|
18
|
+
text: string;
|
|
19
|
+
vector: number[];
|
|
20
|
+
importance: number;
|
|
21
|
+
category: "preference" | "fact" | "decision" | "entity" | "other";
|
|
22
|
+
createdAt: number;
|
|
23
|
+
scope?: string;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
interface MigrationResult {
|
|
27
|
+
success: boolean;
|
|
28
|
+
migratedCount: number;
|
|
29
|
+
skippedCount: number;
|
|
30
|
+
errors: string[];
|
|
31
|
+
summary: string;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
interface MigrationOptions {
|
|
35
|
+
sourceDbPath?: string;
|
|
36
|
+
dryRun?: boolean;
|
|
37
|
+
defaultScope?: string;
|
|
38
|
+
skipExisting?: boolean;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
// ============================================================================
|
|
42
|
+
// Default Paths
|
|
43
|
+
// ============================================================================
|
|
44
|
+
|
|
45
|
+
function getDefaultLegacyPaths(): string[] {
|
|
46
|
+
const home = homedir();
|
|
47
|
+
return [
|
|
48
|
+
join(home, ".openclaw", "memory", "lancedb"),
|
|
49
|
+
join(home, ".claude", "memory", "lancedb"),
|
|
50
|
+
// Add more legacy paths as needed
|
|
51
|
+
];
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
// ============================================================================
|
|
55
|
+
// Migration Functions
|
|
56
|
+
// ============================================================================
|
|
57
|
+
|
|
58
|
+
export class MemoryMigrator {
|
|
59
|
+
constructor(private targetStore: MemoryStore) {}
|
|
60
|
+
|
|
61
|
+
async migrate(options: MigrationOptions = {}): Promise<MigrationResult> {
|
|
62
|
+
const result: MigrationResult = {
|
|
63
|
+
success: false,
|
|
64
|
+
migratedCount: 0,
|
|
65
|
+
skippedCount: 0,
|
|
66
|
+
errors: [],
|
|
67
|
+
summary: "",
|
|
68
|
+
};
|
|
69
|
+
|
|
70
|
+
try {
|
|
71
|
+
// Find source database
|
|
72
|
+
const sourceDbPath = await this.findSourceDatabase(options.sourceDbPath);
|
|
73
|
+
if (!sourceDbPath) {
|
|
74
|
+
result.errors.push("No legacy database found to migrate from");
|
|
75
|
+
result.summary = "Migration failed: No source database found";
|
|
76
|
+
return result;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
console.log(`Migrating from: ${sourceDbPath}`);
|
|
80
|
+
|
|
81
|
+
// Load legacy data
|
|
82
|
+
const legacyEntries = await this.loadLegacyData(sourceDbPath);
|
|
83
|
+
if (legacyEntries.length === 0) {
|
|
84
|
+
result.summary = "Migration completed: No data to migrate";
|
|
85
|
+
result.success = true;
|
|
86
|
+
return result;
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
console.log(`Found ${legacyEntries.length} entries to migrate`);
|
|
90
|
+
|
|
91
|
+
// Migrate entries
|
|
92
|
+
if (!options.dryRun) {
|
|
93
|
+
const migrationStats = await this.migrateEntries(legacyEntries, options);
|
|
94
|
+
result.migratedCount = migrationStats.migrated;
|
|
95
|
+
result.skippedCount = migrationStats.skipped;
|
|
96
|
+
result.errors.push(...migrationStats.errors);
|
|
97
|
+
} else {
|
|
98
|
+
result.summary = `Dry run: Would migrate ${legacyEntries.length} entries`;
|
|
99
|
+
result.success = true;
|
|
100
|
+
return result;
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
result.success = result.errors.length === 0;
|
|
104
|
+
result.summary = `Migration ${result.success ? 'completed' : 'completed with errors'}: ` +
|
|
105
|
+
`${result.migratedCount} migrated, ${result.skippedCount} skipped`;
|
|
106
|
+
|
|
107
|
+
} catch (error) {
|
|
108
|
+
result.errors.push(`Migration failed: ${error instanceof Error ? error.message : String(error)}`);
|
|
109
|
+
result.summary = "Migration failed due to unexpected error";
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
return result;
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
private async findSourceDatabase(explicitPath?: string): Promise<string | null> {
|
|
116
|
+
if (explicitPath) {
|
|
117
|
+
try {
|
|
118
|
+
await fs.access(explicitPath);
|
|
119
|
+
return explicitPath;
|
|
120
|
+
} catch {
|
|
121
|
+
return null;
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
// Check default legacy paths
|
|
126
|
+
for (const path of getDefaultLegacyPaths()) {
|
|
127
|
+
try {
|
|
128
|
+
await fs.access(path);
|
|
129
|
+
const files = await fs.readdir(path);
|
|
130
|
+
// Check for LanceDB files
|
|
131
|
+
if (files.some(f => f.endsWith('.lance') || f === 'memories.lance')) {
|
|
132
|
+
return path;
|
|
133
|
+
}
|
|
134
|
+
} catch {
|
|
135
|
+
continue;
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
return null;
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
private async loadLegacyData(sourceDbPath: string, limit?: number): Promise<LegacyMemoryEntry[]> {
|
|
143
|
+
const lancedb = await loadLanceDB();
|
|
144
|
+
const db = await lancedb.connect(sourceDbPath);
|
|
145
|
+
|
|
146
|
+
try {
|
|
147
|
+
const table = await db.openTable("memories");
|
|
148
|
+
let query = table.query();
|
|
149
|
+
if (limit) query = query.limit(limit);
|
|
150
|
+
const entries = await query.toArray();
|
|
151
|
+
|
|
152
|
+
return entries.map((row): LegacyMemoryEntry => ({
|
|
153
|
+
id: row.id as string,
|
|
154
|
+
text: row.text as string,
|
|
155
|
+
vector: row.vector as number[],
|
|
156
|
+
importance: row.importance as number,
|
|
157
|
+
category: (row.category as LegacyMemoryEntry["category"]) || "other",
|
|
158
|
+
createdAt: row.createdAt as number,
|
|
159
|
+
scope: row.scope as string | undefined,
|
|
160
|
+
}));
|
|
161
|
+
} catch (error) {
|
|
162
|
+
console.warn(`Failed to load legacy data: ${error}`);
|
|
163
|
+
return [];
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
private async migrateEntries(
|
|
168
|
+
legacyEntries: LegacyMemoryEntry[],
|
|
169
|
+
options: MigrationOptions
|
|
170
|
+
): Promise<{ migrated: number; skipped: number; errors: string[] }> {
|
|
171
|
+
let migrated = 0;
|
|
172
|
+
let skipped = 0;
|
|
173
|
+
const errors: string[] = [];
|
|
174
|
+
|
|
175
|
+
const defaultScope = options.defaultScope || "global";
|
|
176
|
+
|
|
177
|
+
for (const legacy of legacyEntries) {
|
|
178
|
+
try {
|
|
179
|
+
// Check if entry already exists (if skipExisting is enabled)
|
|
180
|
+
if (options.skipExisting) {
|
|
181
|
+
const existing = await this.targetStore.vectorSearch(
|
|
182
|
+
legacy.vector, 1, 0.9, [legacy.scope || defaultScope]
|
|
183
|
+
);
|
|
184
|
+
if (existing.length > 0 && existing[0].score > 0.95) {
|
|
185
|
+
skipped++;
|
|
186
|
+
continue;
|
|
187
|
+
}
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
// Convert legacy entry to new format
|
|
191
|
+
const newEntry: Omit<MemoryEntry, "id" | "timestamp"> = {
|
|
192
|
+
text: legacy.text,
|
|
193
|
+
vector: legacy.vector,
|
|
194
|
+
category: legacy.category,
|
|
195
|
+
scope: legacy.scope || defaultScope, // Use legacy scope or default
|
|
196
|
+
importance: legacy.importance,
|
|
197
|
+
metadata: JSON.stringify({
|
|
198
|
+
migratedFrom: "memory-lancedb",
|
|
199
|
+
originalId: legacy.id,
|
|
200
|
+
originalCreatedAt: legacy.createdAt,
|
|
201
|
+
}),
|
|
202
|
+
};
|
|
203
|
+
|
|
204
|
+
await this.targetStore.store(newEntry);
|
|
205
|
+
migrated++;
|
|
206
|
+
|
|
207
|
+
if (migrated % 100 === 0) {
|
|
208
|
+
console.log(`Migrated ${migrated}/${legacyEntries.length} entries...`);
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
} catch (error) {
|
|
212
|
+
errors.push(`Failed to migrate entry ${legacy.id}: ${error}`);
|
|
213
|
+
skipped++;
|
|
214
|
+
}
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
return { migrated, skipped, errors };
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
// Check if migration is needed
|
|
221
|
+
async checkMigrationNeeded(sourceDbPath?: string): Promise<{
|
|
222
|
+
needed: boolean;
|
|
223
|
+
sourceFound: boolean;
|
|
224
|
+
sourceDbPath?: string;
|
|
225
|
+
entryCount?: number;
|
|
226
|
+
}> {
|
|
227
|
+
const sourcePath = await this.findSourceDatabase(sourceDbPath);
|
|
228
|
+
|
|
229
|
+
if (!sourcePath) {
|
|
230
|
+
return {
|
|
231
|
+
needed: false,
|
|
232
|
+
sourceFound: false,
|
|
233
|
+
};
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
try {
|
|
237
|
+
const entries = await this.loadLegacyData(sourcePath, 1);
|
|
238
|
+
return {
|
|
239
|
+
needed: entries.length > 0,
|
|
240
|
+
sourceFound: true,
|
|
241
|
+
sourceDbPath: sourcePath,
|
|
242
|
+
entryCount: entries.length > 0 ? undefined : 0, // Avoid full scan; count unknown
|
|
243
|
+
};
|
|
244
|
+
} catch (error) {
|
|
245
|
+
return {
|
|
246
|
+
needed: false,
|
|
247
|
+
sourceFound: true,
|
|
248
|
+
sourceDbPath: sourcePath,
|
|
249
|
+
};
|
|
250
|
+
}
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
// Verify migration results
|
|
254
|
+
async verifyMigration(sourceDbPath?: string): Promise<{
|
|
255
|
+
valid: boolean;
|
|
256
|
+
sourceCount: number;
|
|
257
|
+
targetCount: number;
|
|
258
|
+
issues: string[];
|
|
259
|
+
}> {
|
|
260
|
+
const issues: string[] = [];
|
|
261
|
+
|
|
262
|
+
try {
|
|
263
|
+
const sourcePath = await this.findSourceDatabase(sourceDbPath);
|
|
264
|
+
if (!sourcePath) {
|
|
265
|
+
return {
|
|
266
|
+
valid: false,
|
|
267
|
+
sourceCount: 0,
|
|
268
|
+
targetCount: 0,
|
|
269
|
+
issues: ["Source database not found"],
|
|
270
|
+
};
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
const sourceEntries = await this.loadLegacyData(sourcePath);
|
|
274
|
+
const targetStats = await this.targetStore.stats();
|
|
275
|
+
|
|
276
|
+
const sourceCount = sourceEntries.length;
|
|
277
|
+
const targetCount = targetStats.totalCount;
|
|
278
|
+
|
|
279
|
+
// Basic validation - target should have at least as many entries as source
|
|
280
|
+
if (targetCount < sourceCount) {
|
|
281
|
+
issues.push(`Target has fewer entries (${targetCount}) than source (${sourceCount})`);
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
return {
|
|
285
|
+
valid: issues.length === 0,
|
|
286
|
+
sourceCount,
|
|
287
|
+
targetCount,
|
|
288
|
+
issues,
|
|
289
|
+
};
|
|
290
|
+
|
|
291
|
+
} catch (error) {
|
|
292
|
+
return {
|
|
293
|
+
valid: false,
|
|
294
|
+
sourceCount: 0,
|
|
295
|
+
targetCount: 0,
|
|
296
|
+
issues: [`Verification failed: ${error}`],
|
|
297
|
+
};
|
|
298
|
+
}
|
|
299
|
+
}
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
// ============================================================================
|
|
303
|
+
// Factory Function
|
|
304
|
+
// ============================================================================
|
|
305
|
+
|
|
306
|
+
export function createMigrator(targetStore: MemoryStore): MemoryMigrator {
|
|
307
|
+
return new MemoryMigrator(targetStore);
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
// ============================================================================
|
|
311
|
+
// Standalone Migration Function
|
|
312
|
+
// ============================================================================
|
|
313
|
+
|
|
314
|
+
export async function migrateFromLegacy(
|
|
315
|
+
targetStore: MemoryStore,
|
|
316
|
+
options: MigrationOptions = {}
|
|
317
|
+
): Promise<MigrationResult> {
|
|
318
|
+
const migrator = createMigrator(targetStore);
|
|
319
|
+
return migrator.migrate(options);
|
|
320
|
+
}
|
|
321
|
+
|
|
322
|
+
// ============================================================================
|
|
323
|
+
// CLI Helper Functions
|
|
324
|
+
// ============================================================================
|
|
325
|
+
|
|
326
|
+
export async function checkForLegacyData(): Promise<{
|
|
327
|
+
found: boolean;
|
|
328
|
+
paths: string[];
|
|
329
|
+
totalEntries: number;
|
|
330
|
+
}> {
|
|
331
|
+
const paths: string[] = [];
|
|
332
|
+
let totalEntries = 0;
|
|
333
|
+
|
|
334
|
+
for (const path of getDefaultLegacyPaths()) {
|
|
335
|
+
try {
|
|
336
|
+
const lancedb = await loadLanceDB();
|
|
337
|
+
const db = await lancedb.connect(path);
|
|
338
|
+
const table = await db.openTable("memories");
|
|
339
|
+
const entries = await table.query().select(["id"]).toArray();
|
|
340
|
+
|
|
341
|
+
if (entries.length > 0) {
|
|
342
|
+
paths.push(path);
|
|
343
|
+
totalEntries += entries.length;
|
|
344
|
+
}
|
|
345
|
+
} catch {
|
|
346
|
+
// Path doesn't exist or isn't a valid LanceDB
|
|
347
|
+
continue;
|
|
348
|
+
}
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
return {
|
|
352
|
+
found: paths.length > 0,
|
|
353
|
+
paths,
|
|
354
|
+
totalEntries,
|
|
355
|
+
};
|
|
356
|
+
}
|