@twelvehart/supermemory-runtime 1.0.0-next.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +57 -0
- package/README.md +374 -0
- package/dist/index.js +189 -0
- package/dist/mcp/index.js +1132 -0
- package/docker-compose.prod.yml +91 -0
- package/docker-compose.yml +358 -0
- package/drizzle/0000_dapper_the_professor.sql +159 -0
- package/drizzle/0001_api_keys.sql +51 -0
- package/drizzle/meta/0000_snapshot.json +1532 -0
- package/drizzle/meta/_journal.json +13 -0
- package/drizzle.config.ts +20 -0
- package/package.json +114 -0
- package/scripts/add-extraction-job.ts +122 -0
- package/scripts/benchmark-pgvector.ts +122 -0
- package/scripts/bootstrap.sh +209 -0
- package/scripts/check-runtime-pack.ts +111 -0
- package/scripts/claude-mcp-config.ts +336 -0
- package/scripts/docker-entrypoint.sh +183 -0
- package/scripts/doctor.ts +377 -0
- package/scripts/init-db.sql +33 -0
- package/scripts/install.sh +1110 -0
- package/scripts/mcp-setup.ts +271 -0
- package/scripts/migrations/001_create_pgvector_extension.sql +31 -0
- package/scripts/migrations/002_create_memory_embeddings_table.sql +75 -0
- package/scripts/migrations/003_create_hnsw_index.sql +94 -0
- package/scripts/migrations/004_create_memory_embeddings_standalone.sql +70 -0
- package/scripts/migrations/005_create_chunks_table.sql +95 -0
- package/scripts/migrations/006_create_processing_queue.sql +45 -0
- package/scripts/migrations/generate_test_data.sql +42 -0
- package/scripts/migrations/phase1_comprehensive_test.sql +204 -0
- package/scripts/migrations/run_migrations.sh +286 -0
- package/scripts/migrations/test_hnsw_index.sql +255 -0
- package/scripts/pre-commit-secrets +282 -0
- package/scripts/run-extraction-worker.ts +46 -0
- package/scripts/run-phase1-tests.sh +291 -0
- package/scripts/setup.ts +222 -0
- package/scripts/smoke-install.sh +12 -0
- package/scripts/test-health-endpoint.sh +328 -0
- package/src/api/index.ts +2 -0
- package/src/api/middleware/auth.ts +80 -0
- package/src/api/middleware/csrf.ts +308 -0
- package/src/api/middleware/errorHandler.ts +166 -0
- package/src/api/middleware/rateLimit.ts +360 -0
- package/src/api/middleware/validation.ts +514 -0
- package/src/api/routes/documents.ts +286 -0
- package/src/api/routes/profiles.ts +237 -0
- package/src/api/routes/search.ts +71 -0
- package/src/api/stores/index.ts +58 -0
- package/src/config/bootstrap-env.ts +3 -0
- package/src/config/env.ts +71 -0
- package/src/config/feature-flags.ts +25 -0
- package/src/config/index.ts +140 -0
- package/src/config/secrets.config.ts +291 -0
- package/src/db/client.ts +92 -0
- package/src/db/index.ts +73 -0
- package/src/db/postgres.ts +72 -0
- package/src/db/schema/chunks.schema.ts +31 -0
- package/src/db/schema/containers.schema.ts +46 -0
- package/src/db/schema/documents.schema.ts +49 -0
- package/src/db/schema/embeddings.schema.ts +32 -0
- package/src/db/schema/index.ts +11 -0
- package/src/db/schema/memories.schema.ts +72 -0
- package/src/db/schema/profiles.schema.ts +34 -0
- package/src/db/schema/queue.schema.ts +59 -0
- package/src/db/schema/relationships.schema.ts +42 -0
- package/src/db/schema.ts +223 -0
- package/src/db/worker-connection.ts +47 -0
- package/src/index.ts +235 -0
- package/src/mcp/CLAUDE.md +1 -0
- package/src/mcp/index.ts +1380 -0
- package/src/mcp/legacyState.ts +22 -0
- package/src/mcp/rateLimit.ts +358 -0
- package/src/mcp/resources.ts +309 -0
- package/src/mcp/results.ts +104 -0
- package/src/mcp/tools.ts +401 -0
- package/src/queues/config.ts +119 -0
- package/src/queues/index.ts +289 -0
- package/src/sdk/client.ts +225 -0
- package/src/sdk/errors.ts +266 -0
- package/src/sdk/http.ts +560 -0
- package/src/sdk/index.ts +244 -0
- package/src/sdk/resources/base.ts +65 -0
- package/src/sdk/resources/connections.ts +204 -0
- package/src/sdk/resources/documents.ts +163 -0
- package/src/sdk/resources/index.ts +10 -0
- package/src/sdk/resources/memories.ts +150 -0
- package/src/sdk/resources/search.ts +60 -0
- package/src/sdk/resources/settings.ts +36 -0
- package/src/sdk/types.ts +674 -0
- package/src/services/chunking/index.ts +451 -0
- package/src/services/chunking.service.ts +650 -0
- package/src/services/csrf.service.ts +252 -0
- package/src/services/documents.repository.ts +219 -0
- package/src/services/documents.service.ts +191 -0
- package/src/services/embedding.service.ts +404 -0
- package/src/services/extraction.service.ts +300 -0
- package/src/services/extractors/code.extractor.ts +451 -0
- package/src/services/extractors/index.ts +9 -0
- package/src/services/extractors/markdown.extractor.ts +461 -0
- package/src/services/extractors/pdf.extractor.ts +315 -0
- package/src/services/extractors/text.extractor.ts +118 -0
- package/src/services/extractors/url.extractor.ts +243 -0
- package/src/services/index.ts +235 -0
- package/src/services/ingestion.service.ts +177 -0
- package/src/services/llm/anthropic.ts +400 -0
- package/src/services/llm/base.ts +460 -0
- package/src/services/llm/contradiction-detector.service.ts +526 -0
- package/src/services/llm/heuristics.ts +148 -0
- package/src/services/llm/index.ts +309 -0
- package/src/services/llm/memory-classifier.service.ts +383 -0
- package/src/services/llm/memory-extension-detector.service.ts +523 -0
- package/src/services/llm/mock.ts +470 -0
- package/src/services/llm/openai.ts +398 -0
- package/src/services/llm/prompts.ts +438 -0
- package/src/services/llm/types.ts +373 -0
- package/src/services/memory.repository.ts +1769 -0
- package/src/services/memory.service.ts +1338 -0
- package/src/services/memory.types.ts +234 -0
- package/src/services/persistence/index.ts +295 -0
- package/src/services/pipeline.service.ts +509 -0
- package/src/services/profile.repository.ts +436 -0
- package/src/services/profile.service.ts +560 -0
- package/src/services/profile.types.ts +270 -0
- package/src/services/relationships/detector.ts +1128 -0
- package/src/services/relationships/index.ts +268 -0
- package/src/services/relationships/memory-integration.ts +459 -0
- package/src/services/relationships/strategies.ts +132 -0
- package/src/services/relationships/types.ts +370 -0
- package/src/services/search.service.ts +761 -0
- package/src/services/search.types.ts +220 -0
- package/src/services/secrets.service.ts +384 -0
- package/src/services/vectorstore/base.ts +327 -0
- package/src/services/vectorstore/index.ts +444 -0
- package/src/services/vectorstore/memory.ts +286 -0
- package/src/services/vectorstore/migration.ts +295 -0
- package/src/services/vectorstore/mock.ts +403 -0
- package/src/services/vectorstore/pgvector.ts +695 -0
- package/src/services/vectorstore/types.ts +247 -0
- package/src/startup.ts +389 -0
- package/src/types/api.types.ts +193 -0
- package/src/types/document.types.ts +103 -0
- package/src/types/index.ts +241 -0
- package/src/types/profile.base.ts +133 -0
- package/src/utils/errors.ts +447 -0
- package/src/utils/id.ts +15 -0
- package/src/utils/index.ts +101 -0
- package/src/utils/logger.ts +313 -0
- package/src/utils/sanitization.ts +501 -0
- package/src/utils/secret-validation.ts +273 -0
- package/src/utils/synonyms.ts +188 -0
- package/src/utils/validation.ts +581 -0
- package/src/workers/chunking.worker.ts +242 -0
- package/src/workers/embedding.worker.ts +358 -0
- package/src/workers/extraction.worker.ts +346 -0
- package/src/workers/indexing.worker.ts +505 -0
- package/tsconfig.json +38 -0
|
@@ -0,0 +1,286 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* In-Memory Vector Store
|
|
3
|
+
*
|
|
4
|
+
* A fast, ephemeral vector store implementation that stores all vectors in memory.
|
|
5
|
+
* Suitable for development, testing, and small-scale production use.
|
|
6
|
+
*
|
|
7
|
+
* Features:
|
|
8
|
+
* - O(n) linear search with optimized similarity calculation
|
|
9
|
+
* - Metadata filtering support
|
|
10
|
+
* - No external dependencies
|
|
11
|
+
* - Thread-safe operations
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
import {
|
|
15
|
+
VectorEntry,
|
|
16
|
+
VectorSearchResult,
|
|
17
|
+
SearchOptions,
|
|
18
|
+
AddOptions,
|
|
19
|
+
DeleteOptions,
|
|
20
|
+
VectorStoreConfig,
|
|
21
|
+
VectorStoreStats,
|
|
22
|
+
BatchResult,
|
|
23
|
+
} from './types.js'
|
|
24
|
+
import { BaseVectorStore, validateVector } from './base.js'
|
|
25
|
+
import { ConflictError } from '../../utils/errors.js'
|
|
26
|
+
|
|
27
|
+
/**
|
|
28
|
+
* Internal entry with additional tracking
|
|
29
|
+
*/
|
|
30
|
+
interface InternalEntry extends VectorEntry {
|
|
31
|
+
namespace: string
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
/**
|
|
35
|
+
* In-Memory Vector Store implementation
|
|
36
|
+
*/
|
|
37
|
+
export class InMemoryVectorStore extends BaseVectorStore {
|
|
38
|
+
private entries: Map<string, InternalEntry> = new Map()
|
|
39
|
+
private initialized = false
|
|
40
|
+
|
|
41
|
+
constructor(config: VectorStoreConfig) {
|
|
42
|
+
super({
|
|
43
|
+
...config,
|
|
44
|
+
provider: 'memory',
|
|
45
|
+
})
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
/**
|
|
49
|
+
* Initialize the in-memory store
|
|
50
|
+
*/
|
|
51
|
+
async initialize(): Promise<void> {
|
|
52
|
+
if (this.initialized) return
|
|
53
|
+
this.entries.clear()
|
|
54
|
+
this.initialized = true
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
/**
|
|
58
|
+
* Add a single vector entry
|
|
59
|
+
*/
|
|
60
|
+
async add(entry: VectorEntry, options?: AddOptions): Promise<void> {
|
|
61
|
+
this.validateEntry(entry)
|
|
62
|
+
const namespace = options?.namespace ?? this.config.defaultNamespace ?? 'default'
|
|
63
|
+
|
|
64
|
+
if (this.entries.has(entry.id) && !options?.overwrite) {
|
|
65
|
+
throw new ConflictError(`Entry with ID ${entry.id} already exists`, 'duplicate', { entryId: entry.id })
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
const internalEntry: InternalEntry = {
|
|
69
|
+
...entry,
|
|
70
|
+
namespace,
|
|
71
|
+
createdAt: entry.createdAt ?? new Date(),
|
|
72
|
+
updatedAt: new Date(),
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
this.entries.set(entry.id, internalEntry)
|
|
76
|
+
this.emit('add', { id: entry.id })
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
/**
|
|
80
|
+
* Add multiple vector entries
|
|
81
|
+
*/
|
|
82
|
+
async addBatch(entries: VectorEntry[], options?: AddOptions): Promise<BatchResult> {
|
|
83
|
+
const result: BatchResult = {
|
|
84
|
+
successful: 0,
|
|
85
|
+
failed: 0,
|
|
86
|
+
errors: [],
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
for (const entry of entries) {
|
|
90
|
+
try {
|
|
91
|
+
await this.add(entry, options)
|
|
92
|
+
result.successful++
|
|
93
|
+
} catch (error) {
|
|
94
|
+
result.failed++
|
|
95
|
+
result.errors?.push({
|
|
96
|
+
id: entry.id,
|
|
97
|
+
error: error instanceof Error ? error.message : String(error),
|
|
98
|
+
})
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
return result
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
/**
|
|
106
|
+
* Update an existing vector entry
|
|
107
|
+
*/
|
|
108
|
+
async update(id: string, updates: Partial<VectorEntry>): Promise<boolean> {
|
|
109
|
+
const existing = this.entries.get(id)
|
|
110
|
+
if (!existing) {
|
|
111
|
+
return false
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
// Validate embedding if provided
|
|
115
|
+
if (updates.embedding) {
|
|
116
|
+
validateVector(updates.embedding, this.config.dimensions)
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
const updated: InternalEntry = {
|
|
120
|
+
...existing,
|
|
121
|
+
...updates,
|
|
122
|
+
id, // Ensure ID cannot be changed
|
|
123
|
+
namespace: existing.namespace, // Preserve namespace
|
|
124
|
+
updatedAt: new Date(),
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
this.entries.set(id, updated)
|
|
128
|
+
this.emit('update', { id })
|
|
129
|
+
return true
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
/**
|
|
133
|
+
* Delete vector entries
|
|
134
|
+
*/
|
|
135
|
+
async delete(options: DeleteOptions): Promise<number> {
|
|
136
|
+
let deleted = 0
|
|
137
|
+
|
|
138
|
+
if (options.deleteAll) {
|
|
139
|
+
const namespace = options.namespace ?? this.config.defaultNamespace ?? 'default'
|
|
140
|
+
for (const [id, entry] of this.entries) {
|
|
141
|
+
if (entry.namespace === namespace) {
|
|
142
|
+
this.entries.delete(id)
|
|
143
|
+
deleted++
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
} else if (options.ids && options.ids.length > 0) {
|
|
147
|
+
for (const id of options.ids) {
|
|
148
|
+
if (this.entries.delete(id)) {
|
|
149
|
+
deleted++
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
} else if (options.filter) {
|
|
153
|
+
for (const [id, entry] of this.entries) {
|
|
154
|
+
if (this.matchesFilter(entry.metadata, options.filter)) {
|
|
155
|
+
this.entries.delete(id)
|
|
156
|
+
deleted++
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
if (deleted > 0) {
|
|
162
|
+
this.emit('delete', { count: deleted })
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
return deleted
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
/**
|
|
169
|
+
* Get a vector entry by ID
|
|
170
|
+
*/
|
|
171
|
+
async get(id: string): Promise<VectorEntry | null> {
|
|
172
|
+
const entry = this.entries.get(id)
|
|
173
|
+
if (!entry) return null
|
|
174
|
+
|
|
175
|
+
// Return copy without internal fields
|
|
176
|
+
const { namespace: _namespace, ...publicEntry } = entry
|
|
177
|
+
return publicEntry
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
/**
|
|
181
|
+
* Check if a vector entry exists
|
|
182
|
+
*/
|
|
183
|
+
async exists(id: string): Promise<boolean> {
|
|
184
|
+
return this.entries.has(id)
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
/**
|
|
188
|
+
* Search for similar vectors using cosine similarity
|
|
189
|
+
*/
|
|
190
|
+
async search(query: number[], options?: SearchOptions): Promise<VectorSearchResult[]> {
|
|
191
|
+
validateVector(query, this.config.dimensions)
|
|
192
|
+
const opts = this.mergeOptions(options)
|
|
193
|
+
|
|
194
|
+
// Get all entries and apply filters
|
|
195
|
+
const allEntries = Array.from(this.entries.values())
|
|
196
|
+
const candidates = this.applyFilters(allEntries as VectorEntry[], opts.filters) as InternalEntry[]
|
|
197
|
+
|
|
198
|
+
// Calculate similarities
|
|
199
|
+
const results: VectorSearchResult[] = []
|
|
200
|
+
for (const entry of candidates) {
|
|
201
|
+
const score = this.calculateSimilarity(query, entry.embedding)
|
|
202
|
+
|
|
203
|
+
if (score >= opts.threshold) {
|
|
204
|
+
results.push({
|
|
205
|
+
id: entry.id,
|
|
206
|
+
score,
|
|
207
|
+
embedding: opts.includeVectors ? entry.embedding : undefined,
|
|
208
|
+
metadata: opts.includeMetadata ? entry.metadata : {},
|
|
209
|
+
})
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
// Sort by score descending and apply limit
|
|
214
|
+
results.sort((a, b) => b.score - a.score)
|
|
215
|
+
|
|
216
|
+
this.emit('search', {
|
|
217
|
+
resultsCount: Math.min(results.length, opts.limit),
|
|
218
|
+
totalCandidates: candidates.length,
|
|
219
|
+
})
|
|
220
|
+
|
|
221
|
+
return results.slice(0, opts.limit)
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
/**
|
|
225
|
+
* Get statistics about the vector store
|
|
226
|
+
*/
|
|
227
|
+
async getStats(): Promise<VectorStoreStats> {
|
|
228
|
+
const namespaces = new Set<string>()
|
|
229
|
+
for (const entry of this.entries.values()) {
|
|
230
|
+
namespaces.add(entry.namespace)
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
return {
|
|
234
|
+
totalVectors: this.entries.size,
|
|
235
|
+
dimensions: this.config.dimensions,
|
|
236
|
+
indexType: 'flat',
|
|
237
|
+
metric: this.config.metric ?? 'cosine',
|
|
238
|
+
indexBuilt: true, // Always true for in-memory
|
|
239
|
+
namespaces: Array.from(namespaces),
|
|
240
|
+
}
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
/**
|
|
244
|
+
* Clear all vectors from the store
|
|
245
|
+
*/
|
|
246
|
+
async clear(): Promise<void> {
|
|
247
|
+
this.entries.clear()
|
|
248
|
+
this.emit('delete', { deleteAll: true })
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
/**
|
|
252
|
+
* Close the vector store and release resources
|
|
253
|
+
*/
|
|
254
|
+
async close(): Promise<void> {
|
|
255
|
+
this.entries.clear()
|
|
256
|
+
this.initialized = false
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
/**
|
|
260
|
+
* Get all entries (for migration/export)
|
|
261
|
+
*/
|
|
262
|
+
async getAllEntries(): Promise<VectorEntry[]> {
|
|
263
|
+
return Array.from(this.entries.values()).map(({ namespace: _namespace, ...entry }) => entry)
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
/**
|
|
267
|
+
* Get the number of entries
|
|
268
|
+
*/
|
|
269
|
+
size(): number {
|
|
270
|
+
return this.entries.size
|
|
271
|
+
}
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
/**
|
|
275
|
+
* Create an in-memory vector store
|
|
276
|
+
*/
|
|
277
|
+
export function createInMemoryVectorStore(
|
|
278
|
+
dimensions: number,
|
|
279
|
+
options?: Partial<Omit<VectorStoreConfig, 'provider' | 'dimensions'>>
|
|
280
|
+
): InMemoryVectorStore {
|
|
281
|
+
return new InMemoryVectorStore({
|
|
282
|
+
provider: 'memory',
|
|
283
|
+
dimensions,
|
|
284
|
+
...options,
|
|
285
|
+
})
|
|
286
|
+
}
|
|
@@ -0,0 +1,295 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Vector Store Migration Utilities
|
|
3
|
+
*
|
|
4
|
+
* Utilities for migrating vector data between different vector store implementations.
|
|
5
|
+
* Supports batch processing with progress tracking and error handling.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import { MigrationProgress, BatchResult } from './types.js'
|
|
9
|
+
import { InMemoryVectorStore } from './memory.js'
|
|
10
|
+
import { PgVectorStore } from './pgvector.js'
|
|
11
|
+
|
|
12
|
+
/**
|
|
13
|
+
* Migrate vectors from InMemoryVectorStore to PgVectorStore
|
|
14
|
+
*/
|
|
15
|
+
export async function migrateMemoryToPgVector(
|
|
16
|
+
source: InMemoryVectorStore,
|
|
17
|
+
target: PgVectorStore,
|
|
18
|
+
options?: {
|
|
19
|
+
batchSize?: number
|
|
20
|
+
onProgress?: (progress: MigrationProgress) => void
|
|
21
|
+
}
|
|
22
|
+
): Promise<BatchResult> {
|
|
23
|
+
const batchSize = options?.batchSize ?? 100
|
|
24
|
+
|
|
25
|
+
// Get all entries from source
|
|
26
|
+
const entries = await source.getAllEntries()
|
|
27
|
+
const total = entries.length
|
|
28
|
+
|
|
29
|
+
if (total === 0) {
|
|
30
|
+
return {
|
|
31
|
+
successful: 0,
|
|
32
|
+
failed: 0,
|
|
33
|
+
errors: [],
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
// Initialize result
|
|
38
|
+
const result: BatchResult = {
|
|
39
|
+
successful: 0,
|
|
40
|
+
failed: 0,
|
|
41
|
+
errors: [],
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
// Calculate batches
|
|
45
|
+
const totalBatches = Math.ceil(total / batchSize)
|
|
46
|
+
let currentBatch = 0
|
|
47
|
+
const startTime = Date.now()
|
|
48
|
+
|
|
49
|
+
// Process in batches
|
|
50
|
+
for (let i = 0; i < entries.length; i += batchSize) {
|
|
51
|
+
const batch = entries.slice(i, i + batchSize)
|
|
52
|
+
currentBatch++
|
|
53
|
+
|
|
54
|
+
// Add batch to target
|
|
55
|
+
const batchResult = await target.addBatch(batch, { overwrite: true })
|
|
56
|
+
|
|
57
|
+
// Update result
|
|
58
|
+
result.successful += batchResult.successful
|
|
59
|
+
result.failed += batchResult.failed
|
|
60
|
+
if (batchResult.errors && batchResult.errors.length > 0) {
|
|
61
|
+
result.errors?.push(...batchResult.errors)
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
// Calculate progress
|
|
65
|
+
const migrated = Math.min(i + batchSize, total)
|
|
66
|
+
const percentage = (migrated / total) * 100
|
|
67
|
+
const elapsed = Date.now() - startTime
|
|
68
|
+
const estimatedTotal = (elapsed / migrated) * total
|
|
69
|
+
const estimatedTimeRemaining = (estimatedTotal - elapsed) / 1000
|
|
70
|
+
|
|
71
|
+
// Report progress
|
|
72
|
+
if (options?.onProgress) {
|
|
73
|
+
const progress: MigrationProgress = {
|
|
74
|
+
total,
|
|
75
|
+
migrated,
|
|
76
|
+
percentage,
|
|
77
|
+
currentBatch,
|
|
78
|
+
totalBatches,
|
|
79
|
+
estimatedTimeRemaining,
|
|
80
|
+
}
|
|
81
|
+
options.onProgress(progress)
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
return result
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
/**
|
|
89
|
+
* Migrate all vectors from source to target with automatic type detection
|
|
90
|
+
*/
|
|
91
|
+
export async function migrateVectorStore(
|
|
92
|
+
source: InMemoryVectorStore | PgVectorStore,
|
|
93
|
+
target: InMemoryVectorStore | PgVectorStore,
|
|
94
|
+
options?: {
|
|
95
|
+
batchSize?: number
|
|
96
|
+
onProgress?: (progress: MigrationProgress) => void
|
|
97
|
+
}
|
|
98
|
+
): Promise<BatchResult> {
|
|
99
|
+
// Detect migration type
|
|
100
|
+
const isMemoryToMemory = source instanceof InMemoryVectorStore && target instanceof InMemoryVectorStore
|
|
101
|
+
const isMemoryToPg = source instanceof InMemoryVectorStore && target instanceof PgVectorStore
|
|
102
|
+
const isPgToPg = source instanceof PgVectorStore && target instanceof PgVectorStore
|
|
103
|
+
|
|
104
|
+
if (isMemoryToMemory || isPgToPg) {
|
|
105
|
+
console.warn('Migrating between same store types. Consider using copy instead.')
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
// Perform migration
|
|
109
|
+
if (isMemoryToPg) {
|
|
110
|
+
return migrateMemoryToPgVector(source, target, options)
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
// Generic migration for other types
|
|
114
|
+
return genericMigration(source, target, options)
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
/**
|
|
118
|
+
* Generic migration implementation
|
|
119
|
+
*/
|
|
120
|
+
async function genericMigration(
|
|
121
|
+
source: InMemoryVectorStore | PgVectorStore,
|
|
122
|
+
target: InMemoryVectorStore | PgVectorStore,
|
|
123
|
+
options?: {
|
|
124
|
+
batchSize?: number
|
|
125
|
+
onProgress?: (progress: MigrationProgress) => void
|
|
126
|
+
}
|
|
127
|
+
): Promise<BatchResult> {
|
|
128
|
+
const batchSize = options?.batchSize ?? 100
|
|
129
|
+
|
|
130
|
+
// Get all entries
|
|
131
|
+
const entries = await source.getAllEntries()
|
|
132
|
+
const total = entries.length
|
|
133
|
+
|
|
134
|
+
if (total === 0) {
|
|
135
|
+
return {
|
|
136
|
+
successful: 0,
|
|
137
|
+
failed: 0,
|
|
138
|
+
errors: [],
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
// Initialize result
|
|
143
|
+
const result: BatchResult = {
|
|
144
|
+
successful: 0,
|
|
145
|
+
failed: 0,
|
|
146
|
+
errors: [],
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
// Calculate batches
|
|
150
|
+
const totalBatches = Math.ceil(total / batchSize)
|
|
151
|
+
let currentBatch = 0
|
|
152
|
+
const startTime = Date.now()
|
|
153
|
+
|
|
154
|
+
// Process in batches
|
|
155
|
+
for (let i = 0; i < entries.length; i += batchSize) {
|
|
156
|
+
const batch = entries.slice(i, i + batchSize)
|
|
157
|
+
currentBatch++
|
|
158
|
+
|
|
159
|
+
// Add batch to target
|
|
160
|
+
const batchResult = await target.addBatch(batch, { overwrite: true })
|
|
161
|
+
|
|
162
|
+
// Update result
|
|
163
|
+
result.successful += batchResult.successful
|
|
164
|
+
result.failed += batchResult.failed
|
|
165
|
+
if (batchResult.errors && batchResult.errors.length > 0) {
|
|
166
|
+
result.errors?.push(...batchResult.errors)
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
// Calculate progress
|
|
170
|
+
const migrated = Math.min(i + batchSize, total)
|
|
171
|
+
const percentage = (migrated / total) * 100
|
|
172
|
+
const elapsed = Date.now() - startTime
|
|
173
|
+
const estimatedTotal = (elapsed / migrated) * total
|
|
174
|
+
const estimatedTimeRemaining = (estimatedTotal - elapsed) / 1000
|
|
175
|
+
|
|
176
|
+
// Report progress
|
|
177
|
+
if (options?.onProgress) {
|
|
178
|
+
const progress: MigrationProgress = {
|
|
179
|
+
total,
|
|
180
|
+
migrated,
|
|
181
|
+
percentage,
|
|
182
|
+
currentBatch,
|
|
183
|
+
totalBatches,
|
|
184
|
+
estimatedTimeRemaining,
|
|
185
|
+
}
|
|
186
|
+
options.onProgress(progress)
|
|
187
|
+
}
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
return result
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
/**
|
|
194
|
+
* Verify migration by comparing vector counts and sample entries
|
|
195
|
+
*/
|
|
196
|
+
export async function verifyMigration(
|
|
197
|
+
source: InMemoryVectorStore | PgVectorStore,
|
|
198
|
+
target: InMemoryVectorStore | PgVectorStore,
|
|
199
|
+
sampleSize = 10
|
|
200
|
+
): Promise<{
|
|
201
|
+
success: boolean
|
|
202
|
+
issues: string[]
|
|
203
|
+
sourceCount: number
|
|
204
|
+
targetCount: number
|
|
205
|
+
samplesMatch: number
|
|
206
|
+
samplesMismatch: number
|
|
207
|
+
}> {
|
|
208
|
+
const issues: string[] = []
|
|
209
|
+
let samplesMatch = 0
|
|
210
|
+
let samplesMismatch = 0
|
|
211
|
+
|
|
212
|
+
// Compare counts
|
|
213
|
+
const sourceStats = await source.getStats()
|
|
214
|
+
const targetStats = await target.getStats()
|
|
215
|
+
const sourceCount = sourceStats.totalVectors
|
|
216
|
+
const targetCount = targetStats.totalVectors
|
|
217
|
+
|
|
218
|
+
if (sourceCount !== targetCount) {
|
|
219
|
+
issues.push(`Vector count mismatch: source has ${sourceCount}, target has ${targetCount}`)
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
// Compare dimensions
|
|
223
|
+
if (sourceStats.dimensions !== targetStats.dimensions) {
|
|
224
|
+
issues.push(`Dimension mismatch: source has ${sourceStats.dimensions}, target has ${targetStats.dimensions}`)
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
// Sample verification
|
|
228
|
+
const sourceEntries = await source.getAllEntries()
|
|
229
|
+
const sampleIndices = new Set<number>()
|
|
230
|
+
|
|
231
|
+
// Generate random sample indices
|
|
232
|
+
while (sampleIndices.size < Math.min(sampleSize, sourceEntries.length)) {
|
|
233
|
+
sampleIndices.add(Math.floor(Math.random() * sourceEntries.length))
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
// Compare samples
|
|
237
|
+
for (const index of sampleIndices) {
|
|
238
|
+
const sourceEntry = sourceEntries[index]
|
|
239
|
+
if (!sourceEntry) continue
|
|
240
|
+
|
|
241
|
+
const targetEntry = await target.get(sourceEntry.id)
|
|
242
|
+
|
|
243
|
+
if (!targetEntry) {
|
|
244
|
+
issues.push(`Entry ${sourceEntry.id} not found in target`)
|
|
245
|
+
samplesMismatch++
|
|
246
|
+
continue
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
// Compare embeddings
|
|
250
|
+
const embeddingsMatch = sourceEntry.embedding.every(
|
|
251
|
+
(val, i) => Math.abs(val - (targetEntry.embedding[i] ?? 0)) < 0.0001
|
|
252
|
+
)
|
|
253
|
+
|
|
254
|
+
if (!embeddingsMatch) {
|
|
255
|
+
issues.push(`Embedding mismatch for entry ${sourceEntry.id}`)
|
|
256
|
+
samplesMismatch++
|
|
257
|
+
continue
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
// Compare metadata
|
|
261
|
+
const metadataMatch = JSON.stringify(sourceEntry.metadata) === JSON.stringify(targetEntry.metadata)
|
|
262
|
+
|
|
263
|
+
if (!metadataMatch) {
|
|
264
|
+
issues.push(`Metadata mismatch for entry ${sourceEntry.id}`)
|
|
265
|
+
samplesMismatch++
|
|
266
|
+
continue
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
samplesMatch++
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
return {
|
|
273
|
+
success: issues.length === 0,
|
|
274
|
+
issues,
|
|
275
|
+
sourceCount,
|
|
276
|
+
targetCount,
|
|
277
|
+
samplesMatch,
|
|
278
|
+
samplesMismatch,
|
|
279
|
+
}
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
/**
|
|
283
|
+
* Create a progress reporter function
|
|
284
|
+
*/
|
|
285
|
+
export function createProgressReporter(onUpdate?: (message: string) => void): (progress: MigrationProgress) => void {
|
|
286
|
+
return (progress: MigrationProgress) => {
|
|
287
|
+
const message = `Migration progress: ${progress.migrated}/${progress.total} (${progress.percentage.toFixed(1)}%) - Batch ${progress.currentBatch}/${progress.totalBatches}${progress.estimatedTimeRemaining ? ` - ETA: ${Math.round(progress.estimatedTimeRemaining)}s` : ''}`
|
|
288
|
+
|
|
289
|
+
if (onUpdate) {
|
|
290
|
+
onUpdate(message)
|
|
291
|
+
} else {
|
|
292
|
+
console.log(message)
|
|
293
|
+
}
|
|
294
|
+
}
|
|
295
|
+
}
|