@twelvehart/supermemory-runtime 1.0.0-next.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (156) hide show
  1. package/.env.example +57 -0
  2. package/README.md +374 -0
  3. package/dist/index.js +189 -0
  4. package/dist/mcp/index.js +1132 -0
  5. package/docker-compose.prod.yml +91 -0
  6. package/docker-compose.yml +358 -0
  7. package/drizzle/0000_dapper_the_professor.sql +159 -0
  8. package/drizzle/0001_api_keys.sql +51 -0
  9. package/drizzle/meta/0000_snapshot.json +1532 -0
  10. package/drizzle/meta/_journal.json +13 -0
  11. package/drizzle.config.ts +20 -0
  12. package/package.json +114 -0
  13. package/scripts/add-extraction-job.ts +122 -0
  14. package/scripts/benchmark-pgvector.ts +122 -0
  15. package/scripts/bootstrap.sh +209 -0
  16. package/scripts/check-runtime-pack.ts +111 -0
  17. package/scripts/claude-mcp-config.ts +336 -0
  18. package/scripts/docker-entrypoint.sh +183 -0
  19. package/scripts/doctor.ts +377 -0
  20. package/scripts/init-db.sql +33 -0
  21. package/scripts/install.sh +1110 -0
  22. package/scripts/mcp-setup.ts +271 -0
  23. package/scripts/migrations/001_create_pgvector_extension.sql +31 -0
  24. package/scripts/migrations/002_create_memory_embeddings_table.sql +75 -0
  25. package/scripts/migrations/003_create_hnsw_index.sql +94 -0
  26. package/scripts/migrations/004_create_memory_embeddings_standalone.sql +70 -0
  27. package/scripts/migrations/005_create_chunks_table.sql +95 -0
  28. package/scripts/migrations/006_create_processing_queue.sql +45 -0
  29. package/scripts/migrations/generate_test_data.sql +42 -0
  30. package/scripts/migrations/phase1_comprehensive_test.sql +204 -0
  31. package/scripts/migrations/run_migrations.sh +286 -0
  32. package/scripts/migrations/test_hnsw_index.sql +255 -0
  33. package/scripts/pre-commit-secrets +282 -0
  34. package/scripts/run-extraction-worker.ts +46 -0
  35. package/scripts/run-phase1-tests.sh +291 -0
  36. package/scripts/setup.ts +222 -0
  37. package/scripts/smoke-install.sh +12 -0
  38. package/scripts/test-health-endpoint.sh +328 -0
  39. package/src/api/index.ts +2 -0
  40. package/src/api/middleware/auth.ts +80 -0
  41. package/src/api/middleware/csrf.ts +308 -0
  42. package/src/api/middleware/errorHandler.ts +166 -0
  43. package/src/api/middleware/rateLimit.ts +360 -0
  44. package/src/api/middleware/validation.ts +514 -0
  45. package/src/api/routes/documents.ts +286 -0
  46. package/src/api/routes/profiles.ts +237 -0
  47. package/src/api/routes/search.ts +71 -0
  48. package/src/api/stores/index.ts +58 -0
  49. package/src/config/bootstrap-env.ts +3 -0
  50. package/src/config/env.ts +71 -0
  51. package/src/config/feature-flags.ts +25 -0
  52. package/src/config/index.ts +140 -0
  53. package/src/config/secrets.config.ts +291 -0
  54. package/src/db/client.ts +92 -0
  55. package/src/db/index.ts +73 -0
  56. package/src/db/postgres.ts +72 -0
  57. package/src/db/schema/chunks.schema.ts +31 -0
  58. package/src/db/schema/containers.schema.ts +46 -0
  59. package/src/db/schema/documents.schema.ts +49 -0
  60. package/src/db/schema/embeddings.schema.ts +32 -0
  61. package/src/db/schema/index.ts +11 -0
  62. package/src/db/schema/memories.schema.ts +72 -0
  63. package/src/db/schema/profiles.schema.ts +34 -0
  64. package/src/db/schema/queue.schema.ts +59 -0
  65. package/src/db/schema/relationships.schema.ts +42 -0
  66. package/src/db/schema.ts +223 -0
  67. package/src/db/worker-connection.ts +47 -0
  68. package/src/index.ts +235 -0
  69. package/src/mcp/CLAUDE.md +1 -0
  70. package/src/mcp/index.ts +1380 -0
  71. package/src/mcp/legacyState.ts +22 -0
  72. package/src/mcp/rateLimit.ts +358 -0
  73. package/src/mcp/resources.ts +309 -0
  74. package/src/mcp/results.ts +104 -0
  75. package/src/mcp/tools.ts +401 -0
  76. package/src/queues/config.ts +119 -0
  77. package/src/queues/index.ts +289 -0
  78. package/src/sdk/client.ts +225 -0
  79. package/src/sdk/errors.ts +266 -0
  80. package/src/sdk/http.ts +560 -0
  81. package/src/sdk/index.ts +244 -0
  82. package/src/sdk/resources/base.ts +65 -0
  83. package/src/sdk/resources/connections.ts +204 -0
  84. package/src/sdk/resources/documents.ts +163 -0
  85. package/src/sdk/resources/index.ts +10 -0
  86. package/src/sdk/resources/memories.ts +150 -0
  87. package/src/sdk/resources/search.ts +60 -0
  88. package/src/sdk/resources/settings.ts +36 -0
  89. package/src/sdk/types.ts +674 -0
  90. package/src/services/chunking/index.ts +451 -0
  91. package/src/services/chunking.service.ts +650 -0
  92. package/src/services/csrf.service.ts +252 -0
  93. package/src/services/documents.repository.ts +219 -0
  94. package/src/services/documents.service.ts +191 -0
  95. package/src/services/embedding.service.ts +404 -0
  96. package/src/services/extraction.service.ts +300 -0
  97. package/src/services/extractors/code.extractor.ts +451 -0
  98. package/src/services/extractors/index.ts +9 -0
  99. package/src/services/extractors/markdown.extractor.ts +461 -0
  100. package/src/services/extractors/pdf.extractor.ts +315 -0
  101. package/src/services/extractors/text.extractor.ts +118 -0
  102. package/src/services/extractors/url.extractor.ts +243 -0
  103. package/src/services/index.ts +235 -0
  104. package/src/services/ingestion.service.ts +177 -0
  105. package/src/services/llm/anthropic.ts +400 -0
  106. package/src/services/llm/base.ts +460 -0
  107. package/src/services/llm/contradiction-detector.service.ts +526 -0
  108. package/src/services/llm/heuristics.ts +148 -0
  109. package/src/services/llm/index.ts +309 -0
  110. package/src/services/llm/memory-classifier.service.ts +383 -0
  111. package/src/services/llm/memory-extension-detector.service.ts +523 -0
  112. package/src/services/llm/mock.ts +470 -0
  113. package/src/services/llm/openai.ts +398 -0
  114. package/src/services/llm/prompts.ts +438 -0
  115. package/src/services/llm/types.ts +373 -0
  116. package/src/services/memory.repository.ts +1769 -0
  117. package/src/services/memory.service.ts +1338 -0
  118. package/src/services/memory.types.ts +234 -0
  119. package/src/services/persistence/index.ts +295 -0
  120. package/src/services/pipeline.service.ts +509 -0
  121. package/src/services/profile.repository.ts +436 -0
  122. package/src/services/profile.service.ts +560 -0
  123. package/src/services/profile.types.ts +270 -0
  124. package/src/services/relationships/detector.ts +1128 -0
  125. package/src/services/relationships/index.ts +268 -0
  126. package/src/services/relationships/memory-integration.ts +459 -0
  127. package/src/services/relationships/strategies.ts +132 -0
  128. package/src/services/relationships/types.ts +370 -0
  129. package/src/services/search.service.ts +761 -0
  130. package/src/services/search.types.ts +220 -0
  131. package/src/services/secrets.service.ts +384 -0
  132. package/src/services/vectorstore/base.ts +327 -0
  133. package/src/services/vectorstore/index.ts +444 -0
  134. package/src/services/vectorstore/memory.ts +286 -0
  135. package/src/services/vectorstore/migration.ts +295 -0
  136. package/src/services/vectorstore/mock.ts +403 -0
  137. package/src/services/vectorstore/pgvector.ts +695 -0
  138. package/src/services/vectorstore/types.ts +247 -0
  139. package/src/startup.ts +389 -0
  140. package/src/types/api.types.ts +193 -0
  141. package/src/types/document.types.ts +103 -0
  142. package/src/types/index.ts +241 -0
  143. package/src/types/profile.base.ts +133 -0
  144. package/src/utils/errors.ts +447 -0
  145. package/src/utils/id.ts +15 -0
  146. package/src/utils/index.ts +101 -0
  147. package/src/utils/logger.ts +313 -0
  148. package/src/utils/sanitization.ts +501 -0
  149. package/src/utils/secret-validation.ts +273 -0
  150. package/src/utils/synonyms.ts +188 -0
  151. package/src/utils/validation.ts +581 -0
  152. package/src/workers/chunking.worker.ts +242 -0
  153. package/src/workers/embedding.worker.ts +358 -0
  154. package/src/workers/extraction.worker.ts +346 -0
  155. package/src/workers/indexing.worker.ts +505 -0
  156. package/tsconfig.json +38 -0
@@ -0,0 +1,286 @@
1
+ /**
2
+ * In-Memory Vector Store
3
+ *
4
+ * A fast, ephemeral vector store implementation that stores all vectors in memory.
5
+ * Suitable for development, testing, and small-scale production use.
6
+ *
7
+ * Features:
8
+ * - O(n) linear search with optimized similarity calculation
9
+ * - Metadata filtering support
10
+ * - No external dependencies
11
+ * - Thread-safe operations
12
+ */
13
+
14
+ import {
15
+ VectorEntry,
16
+ VectorSearchResult,
17
+ SearchOptions,
18
+ AddOptions,
19
+ DeleteOptions,
20
+ VectorStoreConfig,
21
+ VectorStoreStats,
22
+ BatchResult,
23
+ } from './types.js'
24
+ import { BaseVectorStore, validateVector } from './base.js'
25
+ import { ConflictError } from '../../utils/errors.js'
26
+
27
+ /**
28
+ * Internal entry with additional tracking
29
+ */
30
+ interface InternalEntry extends VectorEntry {
31
+ namespace: string
32
+ }
33
+
34
+ /**
35
+ * In-Memory Vector Store implementation
36
+ */
37
+ export class InMemoryVectorStore extends BaseVectorStore {
38
+ private entries: Map<string, InternalEntry> = new Map()
39
+ private initialized = false
40
+
41
+ constructor(config: VectorStoreConfig) {
42
+ super({
43
+ ...config,
44
+ provider: 'memory',
45
+ })
46
+ }
47
+
48
+ /**
49
+ * Initialize the in-memory store
50
+ */
51
+ async initialize(): Promise<void> {
52
+ if (this.initialized) return
53
+ this.entries.clear()
54
+ this.initialized = true
55
+ }
56
+
57
+ /**
58
+ * Add a single vector entry
59
+ */
60
+ async add(entry: VectorEntry, options?: AddOptions): Promise<void> {
61
+ this.validateEntry(entry)
62
+ const namespace = options?.namespace ?? this.config.defaultNamespace ?? 'default'
63
+
64
+ if (this.entries.has(entry.id) && !options?.overwrite) {
65
+ throw new ConflictError(`Entry with ID ${entry.id} already exists`, 'duplicate', { entryId: entry.id })
66
+ }
67
+
68
+ const internalEntry: InternalEntry = {
69
+ ...entry,
70
+ namespace,
71
+ createdAt: entry.createdAt ?? new Date(),
72
+ updatedAt: new Date(),
73
+ }
74
+
75
+ this.entries.set(entry.id, internalEntry)
76
+ this.emit('add', { id: entry.id })
77
+ }
78
+
79
+ /**
80
+ * Add multiple vector entries
81
+ */
82
+ async addBatch(entries: VectorEntry[], options?: AddOptions): Promise<BatchResult> {
83
+ const result: BatchResult = {
84
+ successful: 0,
85
+ failed: 0,
86
+ errors: [],
87
+ }
88
+
89
+ for (const entry of entries) {
90
+ try {
91
+ await this.add(entry, options)
92
+ result.successful++
93
+ } catch (error) {
94
+ result.failed++
95
+ result.errors?.push({
96
+ id: entry.id,
97
+ error: error instanceof Error ? error.message : String(error),
98
+ })
99
+ }
100
+ }
101
+
102
+ return result
103
+ }
104
+
105
+ /**
106
+ * Update an existing vector entry
107
+ */
108
+ async update(id: string, updates: Partial<VectorEntry>): Promise<boolean> {
109
+ const existing = this.entries.get(id)
110
+ if (!existing) {
111
+ return false
112
+ }
113
+
114
+ // Validate embedding if provided
115
+ if (updates.embedding) {
116
+ validateVector(updates.embedding, this.config.dimensions)
117
+ }
118
+
119
+ const updated: InternalEntry = {
120
+ ...existing,
121
+ ...updates,
122
+ id, // Ensure ID cannot be changed
123
+ namespace: existing.namespace, // Preserve namespace
124
+ updatedAt: new Date(),
125
+ }
126
+
127
+ this.entries.set(id, updated)
128
+ this.emit('update', { id })
129
+ return true
130
+ }
131
+
132
+ /**
133
+ * Delete vector entries
134
+ */
135
+ async delete(options: DeleteOptions): Promise<number> {
136
+ let deleted = 0
137
+
138
+ if (options.deleteAll) {
139
+ const namespace = options.namespace ?? this.config.defaultNamespace ?? 'default'
140
+ for (const [id, entry] of this.entries) {
141
+ if (entry.namespace === namespace) {
142
+ this.entries.delete(id)
143
+ deleted++
144
+ }
145
+ }
146
+ } else if (options.ids && options.ids.length > 0) {
147
+ for (const id of options.ids) {
148
+ if (this.entries.delete(id)) {
149
+ deleted++
150
+ }
151
+ }
152
+ } else if (options.filter) {
153
+ for (const [id, entry] of this.entries) {
154
+ if (this.matchesFilter(entry.metadata, options.filter)) {
155
+ this.entries.delete(id)
156
+ deleted++
157
+ }
158
+ }
159
+ }
160
+
161
+ if (deleted > 0) {
162
+ this.emit('delete', { count: deleted })
163
+ }
164
+
165
+ return deleted
166
+ }
167
+
168
+ /**
169
+ * Get a vector entry by ID
170
+ */
171
+ async get(id: string): Promise<VectorEntry | null> {
172
+ const entry = this.entries.get(id)
173
+ if (!entry) return null
174
+
175
+ // Return copy without internal fields
176
+ const { namespace: _namespace, ...publicEntry } = entry
177
+ return publicEntry
178
+ }
179
+
180
+ /**
181
+ * Check if a vector entry exists
182
+ */
183
+ async exists(id: string): Promise<boolean> {
184
+ return this.entries.has(id)
185
+ }
186
+
187
+ /**
188
+ * Search for similar vectors using cosine similarity
189
+ */
190
+ async search(query: number[], options?: SearchOptions): Promise<VectorSearchResult[]> {
191
+ validateVector(query, this.config.dimensions)
192
+ const opts = this.mergeOptions(options)
193
+
194
+ // Get all entries and apply filters
195
+ const allEntries = Array.from(this.entries.values())
196
+ const candidates = this.applyFilters(allEntries as VectorEntry[], opts.filters) as InternalEntry[]
197
+
198
+ // Calculate similarities
199
+ const results: VectorSearchResult[] = []
200
+ for (const entry of candidates) {
201
+ const score = this.calculateSimilarity(query, entry.embedding)
202
+
203
+ if (score >= opts.threshold) {
204
+ results.push({
205
+ id: entry.id,
206
+ score,
207
+ embedding: opts.includeVectors ? entry.embedding : undefined,
208
+ metadata: opts.includeMetadata ? entry.metadata : {},
209
+ })
210
+ }
211
+ }
212
+
213
+ // Sort by score descending and apply limit
214
+ results.sort((a, b) => b.score - a.score)
215
+
216
+ this.emit('search', {
217
+ resultsCount: Math.min(results.length, opts.limit),
218
+ totalCandidates: candidates.length,
219
+ })
220
+
221
+ return results.slice(0, opts.limit)
222
+ }
223
+
224
+ /**
225
+ * Get statistics about the vector store
226
+ */
227
+ async getStats(): Promise<VectorStoreStats> {
228
+ const namespaces = new Set<string>()
229
+ for (const entry of this.entries.values()) {
230
+ namespaces.add(entry.namespace)
231
+ }
232
+
233
+ return {
234
+ totalVectors: this.entries.size,
235
+ dimensions: this.config.dimensions,
236
+ indexType: 'flat',
237
+ metric: this.config.metric ?? 'cosine',
238
+ indexBuilt: true, // Always true for in-memory
239
+ namespaces: Array.from(namespaces),
240
+ }
241
+ }
242
+
243
+ /**
244
+ * Clear all vectors from the store
245
+ */
246
+ async clear(): Promise<void> {
247
+ this.entries.clear()
248
+ this.emit('delete', { deleteAll: true })
249
+ }
250
+
251
+ /**
252
+ * Close the vector store and release resources
253
+ */
254
+ async close(): Promise<void> {
255
+ this.entries.clear()
256
+ this.initialized = false
257
+ }
258
+
259
+ /**
260
+ * Get all entries (for migration/export)
261
+ */
262
+ async getAllEntries(): Promise<VectorEntry[]> {
263
+ return Array.from(this.entries.values()).map(({ namespace: _namespace, ...entry }) => entry)
264
+ }
265
+
266
+ /**
267
+ * Get the number of entries
268
+ */
269
+ size(): number {
270
+ return this.entries.size
271
+ }
272
+ }
273
+
274
+ /**
275
+ * Create an in-memory vector store
276
+ */
277
+ export function createInMemoryVectorStore(
278
+ dimensions: number,
279
+ options?: Partial<Omit<VectorStoreConfig, 'provider' | 'dimensions'>>
280
+ ): InMemoryVectorStore {
281
+ return new InMemoryVectorStore({
282
+ provider: 'memory',
283
+ dimensions,
284
+ ...options,
285
+ })
286
+ }
@@ -0,0 +1,295 @@
1
+ /**
2
+ * Vector Store Migration Utilities
3
+ *
4
+ * Utilities for migrating vector data between different vector store implementations.
5
+ * Supports batch processing with progress tracking and error handling.
6
+ */
7
+
8
+ import { MigrationProgress, BatchResult } from './types.js'
9
+ import { InMemoryVectorStore } from './memory.js'
10
+ import { PgVectorStore } from './pgvector.js'
11
+
12
+ /**
13
+ * Migrate vectors from InMemoryVectorStore to PgVectorStore
14
+ */
15
+ export async function migrateMemoryToPgVector(
16
+ source: InMemoryVectorStore,
17
+ target: PgVectorStore,
18
+ options?: {
19
+ batchSize?: number
20
+ onProgress?: (progress: MigrationProgress) => void
21
+ }
22
+ ): Promise<BatchResult> {
23
+ const batchSize = options?.batchSize ?? 100
24
+
25
+ // Get all entries from source
26
+ const entries = await source.getAllEntries()
27
+ const total = entries.length
28
+
29
+ if (total === 0) {
30
+ return {
31
+ successful: 0,
32
+ failed: 0,
33
+ errors: [],
34
+ }
35
+ }
36
+
37
+ // Initialize result
38
+ const result: BatchResult = {
39
+ successful: 0,
40
+ failed: 0,
41
+ errors: [],
42
+ }
43
+
44
+ // Calculate batches
45
+ const totalBatches = Math.ceil(total / batchSize)
46
+ let currentBatch = 0
47
+ const startTime = Date.now()
48
+
49
+ // Process in batches
50
+ for (let i = 0; i < entries.length; i += batchSize) {
51
+ const batch = entries.slice(i, i + batchSize)
52
+ currentBatch++
53
+
54
+ // Add batch to target
55
+ const batchResult = await target.addBatch(batch, { overwrite: true })
56
+
57
+ // Update result
58
+ result.successful += batchResult.successful
59
+ result.failed += batchResult.failed
60
+ if (batchResult.errors && batchResult.errors.length > 0) {
61
+ result.errors?.push(...batchResult.errors)
62
+ }
63
+
64
+ // Calculate progress
65
+ const migrated = Math.min(i + batchSize, total)
66
+ const percentage = (migrated / total) * 100
67
+ const elapsed = Date.now() - startTime
68
+ const estimatedTotal = (elapsed / migrated) * total
69
+ const estimatedTimeRemaining = (estimatedTotal - elapsed) / 1000
70
+
71
+ // Report progress
72
+ if (options?.onProgress) {
73
+ const progress: MigrationProgress = {
74
+ total,
75
+ migrated,
76
+ percentage,
77
+ currentBatch,
78
+ totalBatches,
79
+ estimatedTimeRemaining,
80
+ }
81
+ options.onProgress(progress)
82
+ }
83
+ }
84
+
85
+ return result
86
+ }
87
+
88
+ /**
89
+ * Migrate all vectors from source to target with automatic type detection
90
+ */
91
+ export async function migrateVectorStore(
92
+ source: InMemoryVectorStore | PgVectorStore,
93
+ target: InMemoryVectorStore | PgVectorStore,
94
+ options?: {
95
+ batchSize?: number
96
+ onProgress?: (progress: MigrationProgress) => void
97
+ }
98
+ ): Promise<BatchResult> {
99
+ // Detect migration type
100
+ const isMemoryToMemory = source instanceof InMemoryVectorStore && target instanceof InMemoryVectorStore
101
+ const isMemoryToPg = source instanceof InMemoryVectorStore && target instanceof PgVectorStore
102
+ const isPgToPg = source instanceof PgVectorStore && target instanceof PgVectorStore
103
+
104
+ if (isMemoryToMemory || isPgToPg) {
105
+ console.warn('Migrating between same store types. Consider using copy instead.')
106
+ }
107
+
108
+ // Perform migration
109
+ if (isMemoryToPg) {
110
+ return migrateMemoryToPgVector(source, target, options)
111
+ }
112
+
113
+ // Generic migration for other types
114
+ return genericMigration(source, target, options)
115
+ }
116
+
117
+ /**
118
+ * Generic migration implementation
119
+ */
120
+ async function genericMigration(
121
+ source: InMemoryVectorStore | PgVectorStore,
122
+ target: InMemoryVectorStore | PgVectorStore,
123
+ options?: {
124
+ batchSize?: number
125
+ onProgress?: (progress: MigrationProgress) => void
126
+ }
127
+ ): Promise<BatchResult> {
128
+ const batchSize = options?.batchSize ?? 100
129
+
130
+ // Get all entries
131
+ const entries = await source.getAllEntries()
132
+ const total = entries.length
133
+
134
+ if (total === 0) {
135
+ return {
136
+ successful: 0,
137
+ failed: 0,
138
+ errors: [],
139
+ }
140
+ }
141
+
142
+ // Initialize result
143
+ const result: BatchResult = {
144
+ successful: 0,
145
+ failed: 0,
146
+ errors: [],
147
+ }
148
+
149
+ // Calculate batches
150
+ const totalBatches = Math.ceil(total / batchSize)
151
+ let currentBatch = 0
152
+ const startTime = Date.now()
153
+
154
+ // Process in batches
155
+ for (let i = 0; i < entries.length; i += batchSize) {
156
+ const batch = entries.slice(i, i + batchSize)
157
+ currentBatch++
158
+
159
+ // Add batch to target
160
+ const batchResult = await target.addBatch(batch, { overwrite: true })
161
+
162
+ // Update result
163
+ result.successful += batchResult.successful
164
+ result.failed += batchResult.failed
165
+ if (batchResult.errors && batchResult.errors.length > 0) {
166
+ result.errors?.push(...batchResult.errors)
167
+ }
168
+
169
+ // Calculate progress
170
+ const migrated = Math.min(i + batchSize, total)
171
+ const percentage = (migrated / total) * 100
172
+ const elapsed = Date.now() - startTime
173
+ const estimatedTotal = (elapsed / migrated) * total
174
+ const estimatedTimeRemaining = (estimatedTotal - elapsed) / 1000
175
+
176
+ // Report progress
177
+ if (options?.onProgress) {
178
+ const progress: MigrationProgress = {
179
+ total,
180
+ migrated,
181
+ percentage,
182
+ currentBatch,
183
+ totalBatches,
184
+ estimatedTimeRemaining,
185
+ }
186
+ options.onProgress(progress)
187
+ }
188
+ }
189
+
190
+ return result
191
+ }
192
+
193
+ /**
194
+ * Verify migration by comparing vector counts and sample entries
195
+ */
196
+ export async function verifyMigration(
197
+ source: InMemoryVectorStore | PgVectorStore,
198
+ target: InMemoryVectorStore | PgVectorStore,
199
+ sampleSize = 10
200
+ ): Promise<{
201
+ success: boolean
202
+ issues: string[]
203
+ sourceCount: number
204
+ targetCount: number
205
+ samplesMatch: number
206
+ samplesMismatch: number
207
+ }> {
208
+ const issues: string[] = []
209
+ let samplesMatch = 0
210
+ let samplesMismatch = 0
211
+
212
+ // Compare counts
213
+ const sourceStats = await source.getStats()
214
+ const targetStats = await target.getStats()
215
+ const sourceCount = sourceStats.totalVectors
216
+ const targetCount = targetStats.totalVectors
217
+
218
+ if (sourceCount !== targetCount) {
219
+ issues.push(`Vector count mismatch: source has ${sourceCount}, target has ${targetCount}`)
220
+ }
221
+
222
+ // Compare dimensions
223
+ if (sourceStats.dimensions !== targetStats.dimensions) {
224
+ issues.push(`Dimension mismatch: source has ${sourceStats.dimensions}, target has ${targetStats.dimensions}`)
225
+ }
226
+
227
+ // Sample verification
228
+ const sourceEntries = await source.getAllEntries()
229
+ const sampleIndices = new Set<number>()
230
+
231
+ // Generate random sample indices
232
+ while (sampleIndices.size < Math.min(sampleSize, sourceEntries.length)) {
233
+ sampleIndices.add(Math.floor(Math.random() * sourceEntries.length))
234
+ }
235
+
236
+ // Compare samples
237
+ for (const index of sampleIndices) {
238
+ const sourceEntry = sourceEntries[index]
239
+ if (!sourceEntry) continue
240
+
241
+ const targetEntry = await target.get(sourceEntry.id)
242
+
243
+ if (!targetEntry) {
244
+ issues.push(`Entry ${sourceEntry.id} not found in target`)
245
+ samplesMismatch++
246
+ continue
247
+ }
248
+
249
+ // Compare embeddings
250
+ const embeddingsMatch = sourceEntry.embedding.every(
251
+ (val, i) => Math.abs(val - (targetEntry.embedding[i] ?? 0)) < 0.0001
252
+ )
253
+
254
+ if (!embeddingsMatch) {
255
+ issues.push(`Embedding mismatch for entry ${sourceEntry.id}`)
256
+ samplesMismatch++
257
+ continue
258
+ }
259
+
260
+ // Compare metadata
261
+ const metadataMatch = JSON.stringify(sourceEntry.metadata) === JSON.stringify(targetEntry.metadata)
262
+
263
+ if (!metadataMatch) {
264
+ issues.push(`Metadata mismatch for entry ${sourceEntry.id}`)
265
+ samplesMismatch++
266
+ continue
267
+ }
268
+
269
+ samplesMatch++
270
+ }
271
+
272
+ return {
273
+ success: issues.length === 0,
274
+ issues,
275
+ sourceCount,
276
+ targetCount,
277
+ samplesMatch,
278
+ samplesMismatch,
279
+ }
280
+ }
281
+
282
+ /**
283
+ * Create a progress reporter function
284
+ */
285
+ export function createProgressReporter(onUpdate?: (message: string) => void): (progress: MigrationProgress) => void {
286
+ return (progress: MigrationProgress) => {
287
+ const message = `Migration progress: ${progress.migrated}/${progress.total} (${progress.percentage.toFixed(1)}%) - Batch ${progress.currentBatch}/${progress.totalBatches}${progress.estimatedTimeRemaining ? ` - ETA: ${Math.round(progress.estimatedTimeRemaining)}s` : ''}`
288
+
289
+ if (onUpdate) {
290
+ onUpdate(message)
291
+ } else {
292
+ console.log(message)
293
+ }
294
+ }
295
+ }