@twelvehart/supermemory-runtime 1.0.0-next.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +57 -0
- package/README.md +374 -0
- package/dist/index.js +189 -0
- package/dist/mcp/index.js +1132 -0
- package/docker-compose.prod.yml +91 -0
- package/docker-compose.yml +358 -0
- package/drizzle/0000_dapper_the_professor.sql +159 -0
- package/drizzle/0001_api_keys.sql +51 -0
- package/drizzle/meta/0000_snapshot.json +1532 -0
- package/drizzle/meta/_journal.json +13 -0
- package/drizzle.config.ts +20 -0
- package/package.json +114 -0
- package/scripts/add-extraction-job.ts +122 -0
- package/scripts/benchmark-pgvector.ts +122 -0
- package/scripts/bootstrap.sh +209 -0
- package/scripts/check-runtime-pack.ts +111 -0
- package/scripts/claude-mcp-config.ts +336 -0
- package/scripts/docker-entrypoint.sh +183 -0
- package/scripts/doctor.ts +377 -0
- package/scripts/init-db.sql +33 -0
- package/scripts/install.sh +1110 -0
- package/scripts/mcp-setup.ts +271 -0
- package/scripts/migrations/001_create_pgvector_extension.sql +31 -0
- package/scripts/migrations/002_create_memory_embeddings_table.sql +75 -0
- package/scripts/migrations/003_create_hnsw_index.sql +94 -0
- package/scripts/migrations/004_create_memory_embeddings_standalone.sql +70 -0
- package/scripts/migrations/005_create_chunks_table.sql +95 -0
- package/scripts/migrations/006_create_processing_queue.sql +45 -0
- package/scripts/migrations/generate_test_data.sql +42 -0
- package/scripts/migrations/phase1_comprehensive_test.sql +204 -0
- package/scripts/migrations/run_migrations.sh +286 -0
- package/scripts/migrations/test_hnsw_index.sql +255 -0
- package/scripts/pre-commit-secrets +282 -0
- package/scripts/run-extraction-worker.ts +46 -0
- package/scripts/run-phase1-tests.sh +291 -0
- package/scripts/setup.ts +222 -0
- package/scripts/smoke-install.sh +12 -0
- package/scripts/test-health-endpoint.sh +328 -0
- package/src/api/index.ts +2 -0
- package/src/api/middleware/auth.ts +80 -0
- package/src/api/middleware/csrf.ts +308 -0
- package/src/api/middleware/errorHandler.ts +166 -0
- package/src/api/middleware/rateLimit.ts +360 -0
- package/src/api/middleware/validation.ts +514 -0
- package/src/api/routes/documents.ts +286 -0
- package/src/api/routes/profiles.ts +237 -0
- package/src/api/routes/search.ts +71 -0
- package/src/api/stores/index.ts +58 -0
- package/src/config/bootstrap-env.ts +3 -0
- package/src/config/env.ts +71 -0
- package/src/config/feature-flags.ts +25 -0
- package/src/config/index.ts +140 -0
- package/src/config/secrets.config.ts +291 -0
- package/src/db/client.ts +92 -0
- package/src/db/index.ts +73 -0
- package/src/db/postgres.ts +72 -0
- package/src/db/schema/chunks.schema.ts +31 -0
- package/src/db/schema/containers.schema.ts +46 -0
- package/src/db/schema/documents.schema.ts +49 -0
- package/src/db/schema/embeddings.schema.ts +32 -0
- package/src/db/schema/index.ts +11 -0
- package/src/db/schema/memories.schema.ts +72 -0
- package/src/db/schema/profiles.schema.ts +34 -0
- package/src/db/schema/queue.schema.ts +59 -0
- package/src/db/schema/relationships.schema.ts +42 -0
- package/src/db/schema.ts +223 -0
- package/src/db/worker-connection.ts +47 -0
- package/src/index.ts +235 -0
- package/src/mcp/CLAUDE.md +1 -0
- package/src/mcp/index.ts +1380 -0
- package/src/mcp/legacyState.ts +22 -0
- package/src/mcp/rateLimit.ts +358 -0
- package/src/mcp/resources.ts +309 -0
- package/src/mcp/results.ts +104 -0
- package/src/mcp/tools.ts +401 -0
- package/src/queues/config.ts +119 -0
- package/src/queues/index.ts +289 -0
- package/src/sdk/client.ts +225 -0
- package/src/sdk/errors.ts +266 -0
- package/src/sdk/http.ts +560 -0
- package/src/sdk/index.ts +244 -0
- package/src/sdk/resources/base.ts +65 -0
- package/src/sdk/resources/connections.ts +204 -0
- package/src/sdk/resources/documents.ts +163 -0
- package/src/sdk/resources/index.ts +10 -0
- package/src/sdk/resources/memories.ts +150 -0
- package/src/sdk/resources/search.ts +60 -0
- package/src/sdk/resources/settings.ts +36 -0
- package/src/sdk/types.ts +674 -0
- package/src/services/chunking/index.ts +451 -0
- package/src/services/chunking.service.ts +650 -0
- package/src/services/csrf.service.ts +252 -0
- package/src/services/documents.repository.ts +219 -0
- package/src/services/documents.service.ts +191 -0
- package/src/services/embedding.service.ts +404 -0
- package/src/services/extraction.service.ts +300 -0
- package/src/services/extractors/code.extractor.ts +451 -0
- package/src/services/extractors/index.ts +9 -0
- package/src/services/extractors/markdown.extractor.ts +461 -0
- package/src/services/extractors/pdf.extractor.ts +315 -0
- package/src/services/extractors/text.extractor.ts +118 -0
- package/src/services/extractors/url.extractor.ts +243 -0
- package/src/services/index.ts +235 -0
- package/src/services/ingestion.service.ts +177 -0
- package/src/services/llm/anthropic.ts +400 -0
- package/src/services/llm/base.ts +460 -0
- package/src/services/llm/contradiction-detector.service.ts +526 -0
- package/src/services/llm/heuristics.ts +148 -0
- package/src/services/llm/index.ts +309 -0
- package/src/services/llm/memory-classifier.service.ts +383 -0
- package/src/services/llm/memory-extension-detector.service.ts +523 -0
- package/src/services/llm/mock.ts +470 -0
- package/src/services/llm/openai.ts +398 -0
- package/src/services/llm/prompts.ts +438 -0
- package/src/services/llm/types.ts +373 -0
- package/src/services/memory.repository.ts +1769 -0
- package/src/services/memory.service.ts +1338 -0
- package/src/services/memory.types.ts +234 -0
- package/src/services/persistence/index.ts +295 -0
- package/src/services/pipeline.service.ts +509 -0
- package/src/services/profile.repository.ts +436 -0
- package/src/services/profile.service.ts +560 -0
- package/src/services/profile.types.ts +270 -0
- package/src/services/relationships/detector.ts +1128 -0
- package/src/services/relationships/index.ts +268 -0
- package/src/services/relationships/memory-integration.ts +459 -0
- package/src/services/relationships/strategies.ts +132 -0
- package/src/services/relationships/types.ts +370 -0
- package/src/services/search.service.ts +761 -0
- package/src/services/search.types.ts +220 -0
- package/src/services/secrets.service.ts +384 -0
- package/src/services/vectorstore/base.ts +327 -0
- package/src/services/vectorstore/index.ts +444 -0
- package/src/services/vectorstore/memory.ts +286 -0
- package/src/services/vectorstore/migration.ts +295 -0
- package/src/services/vectorstore/mock.ts +403 -0
- package/src/services/vectorstore/pgvector.ts +695 -0
- package/src/services/vectorstore/types.ts +247 -0
- package/src/startup.ts +389 -0
- package/src/types/api.types.ts +193 -0
- package/src/types/document.types.ts +103 -0
- package/src/types/index.ts +241 -0
- package/src/types/profile.base.ts +133 -0
- package/src/utils/errors.ts +447 -0
- package/src/utils/id.ts +15 -0
- package/src/utils/index.ts +101 -0
- package/src/utils/logger.ts +313 -0
- package/src/utils/sanitization.ts +501 -0
- package/src/utils/secret-validation.ts +273 -0
- package/src/utils/synonyms.ts +188 -0
- package/src/utils/validation.ts +581 -0
- package/src/workers/chunking.worker.ts +242 -0
- package/src/workers/embedding.worker.ts +358 -0
- package/src/workers/extraction.worker.ts +346 -0
- package/src/workers/indexing.worker.ts +505 -0
- package/tsconfig.json +38 -0
|
@@ -0,0 +1,444 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Vector Store Module
|
|
3
|
+
*
|
|
4
|
+
* Provides an abstraction layer for vector similarity search with multiple backends:
|
|
5
|
+
* - InMemoryVectorStore: Fast, ephemeral storage for development/testing
|
|
6
|
+
* - PgVectorStore: PostgreSQL with pgvector extension for production deployments
|
|
7
|
+
* - MockVectorStore: Testing mock with configurable behavior
|
|
8
|
+
*
|
|
9
|
+
* Usage:
|
|
10
|
+
* ```typescript
|
|
11
|
+
* import { createVectorStore, VectorStoreConfig } from './vectorstore';
|
|
12
|
+
*
|
|
13
|
+
* const config: VectorStoreConfig = {
|
|
14
|
+
* provider: 'pgvector',
|
|
15
|
+
* dimensions: 1536,
|
|
16
|
+
* metric: 'cosine',
|
|
17
|
+
* };
|
|
18
|
+
*
|
|
19
|
+
* const store = await createVectorStore(config);
|
|
20
|
+
* await store.initialize();
|
|
21
|
+
*
|
|
22
|
+
* // Add vectors
|
|
23
|
+
* await store.add({
|
|
24
|
+
* id: 'memory-1',
|
|
25
|
+
* embedding: [...],
|
|
26
|
+
* metadata: { containerTag: 'default' }
|
|
27
|
+
* });
|
|
28
|
+
*
|
|
29
|
+
* // Search
|
|
30
|
+
* const results = await store.search(queryEmbedding, { limit: 10 });
|
|
31
|
+
* ```
|
|
32
|
+
*/
|
|
33
|
+
|
|
34
|
+
// Type exports
|
|
35
|
+
export type {
|
|
36
|
+
VectorStoreProvider,
|
|
37
|
+
SimilarityMetric,
|
|
38
|
+
IndexType,
|
|
39
|
+
FilterOperator,
|
|
40
|
+
MetadataFilter,
|
|
41
|
+
VectorEntry,
|
|
42
|
+
SearchOptions,
|
|
43
|
+
VectorSearchResult,
|
|
44
|
+
AddOptions,
|
|
45
|
+
DeleteOptions,
|
|
46
|
+
VectorStoreConfig,
|
|
47
|
+
HNSWConfig,
|
|
48
|
+
VectorStoreStats,
|
|
49
|
+
BatchResult,
|
|
50
|
+
MigrationOptions,
|
|
51
|
+
MigrationProgress,
|
|
52
|
+
VectorStoreEvent,
|
|
53
|
+
VectorStoreEventListener,
|
|
54
|
+
} from './types.js'
|
|
55
|
+
|
|
56
|
+
// Constants
|
|
57
|
+
export { DEFAULT_SEARCH_OPTIONS, DEFAULT_HNSW_CONFIG } from './types.js'
|
|
58
|
+
|
|
59
|
+
// Base class and utilities
|
|
60
|
+
export {
|
|
61
|
+
BaseVectorStore,
|
|
62
|
+
cosineSimilarity,
|
|
63
|
+
euclideanDistance,
|
|
64
|
+
dotProduct,
|
|
65
|
+
normalizeVector,
|
|
66
|
+
validateVector,
|
|
67
|
+
} from './base.js'
|
|
68
|
+
|
|
69
|
+
// Implementations
|
|
70
|
+
export { InMemoryVectorStore, createInMemoryVectorStore } from './memory.js'
|
|
71
|
+
export { PgVectorStore, createPgVectorStore } from './pgvector.js'
|
|
72
|
+
export type { PgVectorStoreConfig } from './pgvector.js'
|
|
73
|
+
export { MockVectorStore, createMockVectorStore } from './mock.js'
|
|
74
|
+
export type { MockVectorStoreOptions, RecordedOperation } from './mock.js'
|
|
75
|
+
|
|
76
|
+
// Migration utilities
|
|
77
|
+
export {
|
|
78
|
+
migrateMemoryToPgVector,
|
|
79
|
+
migrateVectorStore as migrateVectorStores,
|
|
80
|
+
verifyMigration,
|
|
81
|
+
createProgressReporter,
|
|
82
|
+
} from './migration.js'
|
|
83
|
+
|
|
84
|
+
// Import implementations for factory
|
|
85
|
+
import type { VectorStoreConfig, VectorStoreProvider } from './types.js'
|
|
86
|
+
import { BaseVectorStore } from './base.js'
|
|
87
|
+
import { InMemoryVectorStore } from './memory.js'
|
|
88
|
+
import { getLogger } from '../../utils/logger.js'
|
|
89
|
+
import { ValidationError } from '../../utils/errors.js'
|
|
90
|
+
|
|
91
|
+
const logger = getLogger('VectorStoreFactory')
|
|
92
|
+
|
|
93
|
+
/**
|
|
94
|
+
* Vector store constructor type that accepts a VectorStoreConfig or extended config
|
|
95
|
+
* Uses any for constructor compatibility across implementations.
|
|
96
|
+
*/
|
|
97
|
+
type VectorStoreConstructor = new (config: VectorStoreConfig | any) => BaseVectorStore
|
|
98
|
+
|
|
99
|
+
/**
|
|
100
|
+
* Lazy-loaded implementation loaders
|
|
101
|
+
* These are functions to avoid importing optional dependencies until needed
|
|
102
|
+
*/
|
|
103
|
+
const implementationLoaders: Record<VectorStoreProvider, () => Promise<VectorStoreConstructor>> = {
|
|
104
|
+
memory: async () => InMemoryVectorStore,
|
|
105
|
+
|
|
106
|
+
pgvector: async () => {
|
|
107
|
+
try {
|
|
108
|
+
const { PgVectorStore } = await import('./pgvector.js')
|
|
109
|
+
return PgVectorStore
|
|
110
|
+
} catch (error) {
|
|
111
|
+
logger.warn('pgvector not available, falling back to memory store', { error })
|
|
112
|
+
return InMemoryVectorStore
|
|
113
|
+
}
|
|
114
|
+
},
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
/**
|
|
118
|
+
* Create a vector store instance based on configuration
|
|
119
|
+
*
|
|
120
|
+
* @param config - Vector store configuration
|
|
121
|
+
* @returns Initialized vector store instance
|
|
122
|
+
*
|
|
123
|
+
* @example
|
|
124
|
+
* ```typescript
|
|
125
|
+
* const store = await createVectorStore({
|
|
126
|
+
* provider: 'sqlite-vss',
|
|
127
|
+
* dimensions: 1536,
|
|
128
|
+
* sqlitePath: './data/vectors.db',
|
|
129
|
+
* });
|
|
130
|
+
* await store.initialize();
|
|
131
|
+
* ```
|
|
132
|
+
*/
|
|
133
|
+
export async function createVectorStore(config: VectorStoreConfig): Promise<BaseVectorStore> {
|
|
134
|
+
const provider = config.provider ?? 'memory'
|
|
135
|
+
|
|
136
|
+
logger.debug('Creating vector store', { provider, dimensions: config.dimensions })
|
|
137
|
+
|
|
138
|
+
const loader = implementationLoaders[provider]
|
|
139
|
+
if (!loader) {
|
|
140
|
+
throw new ValidationError(`Unknown vector store provider: ${provider}`, {
|
|
141
|
+
provider: [`Invalid provider '${provider}'. Valid providers: ${Object.keys(implementationLoaders).join(', ')}`],
|
|
142
|
+
})
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
const StoreClass = await loader()
|
|
146
|
+
return new StoreClass(config)
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
/**
|
|
150
|
+
* Create and initialize a vector store in one call
|
|
151
|
+
*
|
|
152
|
+
* @param config - Vector store configuration
|
|
153
|
+
* @returns Initialized vector store instance ready for use
|
|
154
|
+
*/
|
|
155
|
+
export async function createAndInitializeVectorStore(config: VectorStoreConfig): Promise<BaseVectorStore> {
|
|
156
|
+
const store = await createVectorStore(config)
|
|
157
|
+
await store.initialize()
|
|
158
|
+
return store
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
// ============================================================================
|
|
162
|
+
// Singleton Pattern for Application-wide Vector Store
|
|
163
|
+
// ============================================================================
|
|
164
|
+
|
|
165
|
+
let _vectorStoreInstance: BaseVectorStore | null = null
|
|
166
|
+
let _vectorStoreConfig: VectorStoreConfig | null = null
|
|
167
|
+
|
|
168
|
+
/**
|
|
169
|
+
* Configure the default vector store for the application
|
|
170
|
+
*
|
|
171
|
+
* Must be called before getVectorStore() if you want a non-default configuration.
|
|
172
|
+
*
|
|
173
|
+
* @param config - Vector store configuration
|
|
174
|
+
*/
|
|
175
|
+
export function configureVectorStore(config: VectorStoreConfig): void {
|
|
176
|
+
if (_vectorStoreInstance) {
|
|
177
|
+
logger.warn('Vector store already initialized, configuration will be ignored')
|
|
178
|
+
return
|
|
179
|
+
}
|
|
180
|
+
_vectorStoreConfig = config
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
/**
|
|
184
|
+
* Get the singleton vector store instance
|
|
185
|
+
*
|
|
186
|
+
* Creates a default in-memory store if not configured.
|
|
187
|
+
* Call configureVectorStore() first to use a different provider.
|
|
188
|
+
*
|
|
189
|
+
* @returns The vector store instance (may not be initialized)
|
|
190
|
+
*/
|
|
191
|
+
export async function getVectorStore(): Promise<BaseVectorStore> {
|
|
192
|
+
if (!_vectorStoreInstance) {
|
|
193
|
+
const config: VectorStoreConfig = _vectorStoreConfig ?? {
|
|
194
|
+
provider: 'memory',
|
|
195
|
+
dimensions: 1536, // Default to OpenAI dimensions
|
|
196
|
+
}
|
|
197
|
+
_vectorStoreInstance = await createVectorStore(config)
|
|
198
|
+
}
|
|
199
|
+
return _vectorStoreInstance
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
/**
|
|
203
|
+
* Get the singleton vector store instance, ensuring it's initialized
|
|
204
|
+
*
|
|
205
|
+
* @returns The initialized vector store instance
|
|
206
|
+
*/
|
|
207
|
+
export async function getInitializedVectorStore(): Promise<BaseVectorStore> {
|
|
208
|
+
const store = await getVectorStore()
|
|
209
|
+
await store.initialize()
|
|
210
|
+
return store
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
/**
|
|
214
|
+
* Reset the singleton vector store (for testing)
|
|
215
|
+
*/
|
|
216
|
+
export async function resetVectorStore(): Promise<void> {
|
|
217
|
+
if (_vectorStoreInstance) {
|
|
218
|
+
await _vectorStoreInstance.close()
|
|
219
|
+
_vectorStoreInstance = null
|
|
220
|
+
}
|
|
221
|
+
_vectorStoreConfig = null
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
// ============================================================================
|
|
225
|
+
// Provider Detection
|
|
226
|
+
// ============================================================================
|
|
227
|
+
|
|
228
|
+
/**
|
|
229
|
+
* Check which vector store providers are available
|
|
230
|
+
*
|
|
231
|
+
* @returns Object mapping provider names to availability status
|
|
232
|
+
*/
|
|
233
|
+
export async function getAvailableProviders(): Promise<Record<VectorStoreProvider, boolean>> {
|
|
234
|
+
const results: Record<VectorStoreProvider, boolean> = {
|
|
235
|
+
memory: true, // Always available
|
|
236
|
+
pgvector: false,
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
// Check pgvector (requires pg package)
|
|
240
|
+
try {
|
|
241
|
+
await import('pg')
|
|
242
|
+
results.pgvector = true
|
|
243
|
+
} catch {
|
|
244
|
+
// Not available
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
return results
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
/**
|
|
251
|
+
* Get the best available provider for the current environment
|
|
252
|
+
*
|
|
253
|
+
* Priority: pgvector > memory (production-first approach)
|
|
254
|
+
*
|
|
255
|
+
* @returns The recommended provider
|
|
256
|
+
*/
|
|
257
|
+
export async function getBestProvider(): Promise<VectorStoreProvider> {
|
|
258
|
+
const available = await getAvailableProviders()
|
|
259
|
+
|
|
260
|
+
if (available.pgvector) {
|
|
261
|
+
return 'pgvector'
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
return 'memory'
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
// ============================================================================
|
|
268
|
+
// Migration Support
|
|
269
|
+
// ============================================================================
|
|
270
|
+
|
|
271
|
+
import type { MigrationProgress, VectorEntry } from './types.js'
|
|
272
|
+
|
|
273
|
+
/**
|
|
274
|
+
* Helper to get all entries from any vector store
|
|
275
|
+
*/
|
|
276
|
+
async function getAllEntriesFromStore(store: BaseVectorStore): Promise<VectorEntry[]> {
|
|
277
|
+
// Check if store has getAllEntries method
|
|
278
|
+
if ('getAllEntries' in store && typeof store.getAllEntries === 'function') {
|
|
279
|
+
return (store as InMemoryVectorStore).getAllEntries()
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
// Fallback: search with very low threshold to get all vectors
|
|
283
|
+
const results = await store.search(new Array(store.getDimensions()).fill(0), {
|
|
284
|
+
limit: 100000,
|
|
285
|
+
threshold: -1,
|
|
286
|
+
includeVectors: true,
|
|
287
|
+
includeMetadata: true,
|
|
288
|
+
})
|
|
289
|
+
|
|
290
|
+
return results.map((r) => ({
|
|
291
|
+
id: r.id,
|
|
292
|
+
embedding: r.embedding!,
|
|
293
|
+
metadata: r.metadata,
|
|
294
|
+
}))
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
/**
|
|
298
|
+
* Migrate vectors between stores
|
|
299
|
+
*
|
|
300
|
+
* @param source - Source vector store
|
|
301
|
+
* @param target - Target vector store
|
|
302
|
+
* @param options - Migration options
|
|
303
|
+
* @returns Final migration progress
|
|
304
|
+
*/
|
|
305
|
+
export async function migrateVectorStore(
|
|
306
|
+
source: BaseVectorStore,
|
|
307
|
+
target: BaseVectorStore,
|
|
308
|
+
options?: {
|
|
309
|
+
batchSize?: number
|
|
310
|
+
onProgress?: (progress: MigrationProgress) => void
|
|
311
|
+
}
|
|
312
|
+
): Promise<MigrationProgress> {
|
|
313
|
+
const batchSize = options?.batchSize ?? 100
|
|
314
|
+
const entries = await getAllEntriesFromStore(source)
|
|
315
|
+
const total = entries.length
|
|
316
|
+
const totalBatches = Math.ceil(total / batchSize)
|
|
317
|
+
|
|
318
|
+
const progress: MigrationProgress = {
|
|
319
|
+
total,
|
|
320
|
+
migrated: 0,
|
|
321
|
+
percentage: 0,
|
|
322
|
+
currentBatch: 0,
|
|
323
|
+
totalBatches,
|
|
324
|
+
}
|
|
325
|
+
|
|
326
|
+
if (total === 0) {
|
|
327
|
+
progress.percentage = 100
|
|
328
|
+
return progress
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
const startTime = Date.now()
|
|
332
|
+
|
|
333
|
+
for (let i = 0; i < entries.length; i += batchSize) {
|
|
334
|
+
progress.currentBatch++
|
|
335
|
+
const batch = entries.slice(i, i + batchSize)
|
|
336
|
+
|
|
337
|
+
await target.addBatch(batch, { overwrite: true })
|
|
338
|
+
|
|
339
|
+
progress.migrated += batch.length
|
|
340
|
+
progress.percentage = Math.round((progress.migrated / total) * 100)
|
|
341
|
+
|
|
342
|
+
const elapsed = Date.now() - startTime
|
|
343
|
+
const rate = progress.migrated / (elapsed / 1000)
|
|
344
|
+
const remaining = total - progress.migrated
|
|
345
|
+
progress.estimatedTimeRemaining = remaining > 0 ? Math.round(remaining / rate) : 0
|
|
346
|
+
|
|
347
|
+
if (options?.onProgress) {
|
|
348
|
+
options.onProgress(progress)
|
|
349
|
+
}
|
|
350
|
+
}
|
|
351
|
+
|
|
352
|
+
return progress
|
|
353
|
+
}
|
|
354
|
+
|
|
355
|
+
/**
|
|
356
|
+
* Re-index all vectors in a store with new embeddings
|
|
357
|
+
*
|
|
358
|
+
* @param store - Vector store to re-index
|
|
359
|
+
* @param generateEmbedding - Function to generate new embeddings
|
|
360
|
+
* @param getContent - Function to get content for an ID
|
|
361
|
+
* @param options - Re-indexing options
|
|
362
|
+
* @returns Final progress
|
|
363
|
+
*/
|
|
364
|
+
export async function reindexVectorStore(
|
|
365
|
+
store: BaseVectorStore,
|
|
366
|
+
generateEmbedding: (id: string, content: string) => Promise<number[]>,
|
|
367
|
+
getContent: (id: string) => Promise<string | null>,
|
|
368
|
+
options?: {
|
|
369
|
+
batchSize?: number
|
|
370
|
+
onProgress?: (progress: MigrationProgress) => void
|
|
371
|
+
}
|
|
372
|
+
): Promise<MigrationProgress> {
|
|
373
|
+
const batchSize = options?.batchSize ?? 50
|
|
374
|
+
const entries = await getAllEntriesFromStore(store)
|
|
375
|
+
const total = entries.length
|
|
376
|
+
const totalBatches = Math.ceil(total / batchSize)
|
|
377
|
+
|
|
378
|
+
const progress: MigrationProgress = {
|
|
379
|
+
total,
|
|
380
|
+
migrated: 0,
|
|
381
|
+
percentage: 0,
|
|
382
|
+
currentBatch: 0,
|
|
383
|
+
totalBatches,
|
|
384
|
+
}
|
|
385
|
+
|
|
386
|
+
if (total === 0) {
|
|
387
|
+
progress.percentage = 100
|
|
388
|
+
return progress
|
|
389
|
+
}
|
|
390
|
+
|
|
391
|
+
const startTime = Date.now()
|
|
392
|
+
|
|
393
|
+
for (let i = 0; i < entries.length; i += batchSize) {
|
|
394
|
+
progress.currentBatch++
|
|
395
|
+
const batch = entries.slice(i, i + batchSize)
|
|
396
|
+
|
|
397
|
+
for (const entry of batch) {
|
|
398
|
+
const content = await getContent(entry.id)
|
|
399
|
+
if (content) {
|
|
400
|
+
const embedding = await generateEmbedding(entry.id, content)
|
|
401
|
+
await store.update(entry.id, { embedding })
|
|
402
|
+
}
|
|
403
|
+
progress.migrated++
|
|
404
|
+
}
|
|
405
|
+
|
|
406
|
+
progress.percentage = Math.round((progress.migrated / total) * 100)
|
|
407
|
+
|
|
408
|
+
const elapsed = Date.now() - startTime
|
|
409
|
+
const rate = progress.migrated / (elapsed / 1000)
|
|
410
|
+
const remaining = total - progress.migrated
|
|
411
|
+
progress.estimatedTimeRemaining = remaining > 0 ? Math.round(remaining / rate) : 0
|
|
412
|
+
|
|
413
|
+
if (options?.onProgress) {
|
|
414
|
+
options.onProgress(progress)
|
|
415
|
+
}
|
|
416
|
+
}
|
|
417
|
+
|
|
418
|
+
return progress
|
|
419
|
+
}
|
|
420
|
+
|
|
421
|
+
/**
|
|
422
|
+
* Get default vector store configuration from environment
|
|
423
|
+
*/
|
|
424
|
+
export function getDefaultVectorStoreConfig(): VectorStoreConfig {
|
|
425
|
+
const provider = (process.env.VECTOR_STORE_PROVIDER as VectorStoreProvider) ?? 'memory'
|
|
426
|
+
const dimensions = parseInt(process.env.VECTOR_DIMENSIONS ?? '1536', 10)
|
|
427
|
+
|
|
428
|
+
const config: VectorStoreConfig = {
|
|
429
|
+
provider,
|
|
430
|
+
dimensions,
|
|
431
|
+
metric: 'cosine',
|
|
432
|
+
indexType: provider === 'pgvector' ? 'hnsw' : 'flat',
|
|
433
|
+
defaultNamespace: 'default',
|
|
434
|
+
}
|
|
435
|
+
|
|
436
|
+
if (provider === 'pgvector') {
|
|
437
|
+
config.hnswConfig = {
|
|
438
|
+
M: parseInt(process.env.PGVECTOR_HNSW_M ?? '16', 10),
|
|
439
|
+
efConstruction: parseInt(process.env.PGVECTOR_HNSW_EF_CONSTRUCTION ?? '64', 10),
|
|
440
|
+
}
|
|
441
|
+
}
|
|
442
|
+
|
|
443
|
+
return config
|
|
444
|
+
}
|