@twelvehart/supermemory-runtime 1.0.0-next.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (156) hide show
  1. package/.env.example +57 -0
  2. package/README.md +374 -0
  3. package/dist/index.js +189 -0
  4. package/dist/mcp/index.js +1132 -0
  5. package/docker-compose.prod.yml +91 -0
  6. package/docker-compose.yml +358 -0
  7. package/drizzle/0000_dapper_the_professor.sql +159 -0
  8. package/drizzle/0001_api_keys.sql +51 -0
  9. package/drizzle/meta/0000_snapshot.json +1532 -0
  10. package/drizzle/meta/_journal.json +13 -0
  11. package/drizzle.config.ts +20 -0
  12. package/package.json +114 -0
  13. package/scripts/add-extraction-job.ts +122 -0
  14. package/scripts/benchmark-pgvector.ts +122 -0
  15. package/scripts/bootstrap.sh +209 -0
  16. package/scripts/check-runtime-pack.ts +111 -0
  17. package/scripts/claude-mcp-config.ts +336 -0
  18. package/scripts/docker-entrypoint.sh +183 -0
  19. package/scripts/doctor.ts +377 -0
  20. package/scripts/init-db.sql +33 -0
  21. package/scripts/install.sh +1110 -0
  22. package/scripts/mcp-setup.ts +271 -0
  23. package/scripts/migrations/001_create_pgvector_extension.sql +31 -0
  24. package/scripts/migrations/002_create_memory_embeddings_table.sql +75 -0
  25. package/scripts/migrations/003_create_hnsw_index.sql +94 -0
  26. package/scripts/migrations/004_create_memory_embeddings_standalone.sql +70 -0
  27. package/scripts/migrations/005_create_chunks_table.sql +95 -0
  28. package/scripts/migrations/006_create_processing_queue.sql +45 -0
  29. package/scripts/migrations/generate_test_data.sql +42 -0
  30. package/scripts/migrations/phase1_comprehensive_test.sql +204 -0
  31. package/scripts/migrations/run_migrations.sh +286 -0
  32. package/scripts/migrations/test_hnsw_index.sql +255 -0
  33. package/scripts/pre-commit-secrets +282 -0
  34. package/scripts/run-extraction-worker.ts +46 -0
  35. package/scripts/run-phase1-tests.sh +291 -0
  36. package/scripts/setup.ts +222 -0
  37. package/scripts/smoke-install.sh +12 -0
  38. package/scripts/test-health-endpoint.sh +328 -0
  39. package/src/api/index.ts +2 -0
  40. package/src/api/middleware/auth.ts +80 -0
  41. package/src/api/middleware/csrf.ts +308 -0
  42. package/src/api/middleware/errorHandler.ts +166 -0
  43. package/src/api/middleware/rateLimit.ts +360 -0
  44. package/src/api/middleware/validation.ts +514 -0
  45. package/src/api/routes/documents.ts +286 -0
  46. package/src/api/routes/profiles.ts +237 -0
  47. package/src/api/routes/search.ts +71 -0
  48. package/src/api/stores/index.ts +58 -0
  49. package/src/config/bootstrap-env.ts +3 -0
  50. package/src/config/env.ts +71 -0
  51. package/src/config/feature-flags.ts +25 -0
  52. package/src/config/index.ts +140 -0
  53. package/src/config/secrets.config.ts +291 -0
  54. package/src/db/client.ts +92 -0
  55. package/src/db/index.ts +73 -0
  56. package/src/db/postgres.ts +72 -0
  57. package/src/db/schema/chunks.schema.ts +31 -0
  58. package/src/db/schema/containers.schema.ts +46 -0
  59. package/src/db/schema/documents.schema.ts +49 -0
  60. package/src/db/schema/embeddings.schema.ts +32 -0
  61. package/src/db/schema/index.ts +11 -0
  62. package/src/db/schema/memories.schema.ts +72 -0
  63. package/src/db/schema/profiles.schema.ts +34 -0
  64. package/src/db/schema/queue.schema.ts +59 -0
  65. package/src/db/schema/relationships.schema.ts +42 -0
  66. package/src/db/schema.ts +223 -0
  67. package/src/db/worker-connection.ts +47 -0
  68. package/src/index.ts +235 -0
  69. package/src/mcp/CLAUDE.md +1 -0
  70. package/src/mcp/index.ts +1380 -0
  71. package/src/mcp/legacyState.ts +22 -0
  72. package/src/mcp/rateLimit.ts +358 -0
  73. package/src/mcp/resources.ts +309 -0
  74. package/src/mcp/results.ts +104 -0
  75. package/src/mcp/tools.ts +401 -0
  76. package/src/queues/config.ts +119 -0
  77. package/src/queues/index.ts +289 -0
  78. package/src/sdk/client.ts +225 -0
  79. package/src/sdk/errors.ts +266 -0
  80. package/src/sdk/http.ts +560 -0
  81. package/src/sdk/index.ts +244 -0
  82. package/src/sdk/resources/base.ts +65 -0
  83. package/src/sdk/resources/connections.ts +204 -0
  84. package/src/sdk/resources/documents.ts +163 -0
  85. package/src/sdk/resources/index.ts +10 -0
  86. package/src/sdk/resources/memories.ts +150 -0
  87. package/src/sdk/resources/search.ts +60 -0
  88. package/src/sdk/resources/settings.ts +36 -0
  89. package/src/sdk/types.ts +674 -0
  90. package/src/services/chunking/index.ts +451 -0
  91. package/src/services/chunking.service.ts +650 -0
  92. package/src/services/csrf.service.ts +252 -0
  93. package/src/services/documents.repository.ts +219 -0
  94. package/src/services/documents.service.ts +191 -0
  95. package/src/services/embedding.service.ts +404 -0
  96. package/src/services/extraction.service.ts +300 -0
  97. package/src/services/extractors/code.extractor.ts +451 -0
  98. package/src/services/extractors/index.ts +9 -0
  99. package/src/services/extractors/markdown.extractor.ts +461 -0
  100. package/src/services/extractors/pdf.extractor.ts +315 -0
  101. package/src/services/extractors/text.extractor.ts +118 -0
  102. package/src/services/extractors/url.extractor.ts +243 -0
  103. package/src/services/index.ts +235 -0
  104. package/src/services/ingestion.service.ts +177 -0
  105. package/src/services/llm/anthropic.ts +400 -0
  106. package/src/services/llm/base.ts +460 -0
  107. package/src/services/llm/contradiction-detector.service.ts +526 -0
  108. package/src/services/llm/heuristics.ts +148 -0
  109. package/src/services/llm/index.ts +309 -0
  110. package/src/services/llm/memory-classifier.service.ts +383 -0
  111. package/src/services/llm/memory-extension-detector.service.ts +523 -0
  112. package/src/services/llm/mock.ts +470 -0
  113. package/src/services/llm/openai.ts +398 -0
  114. package/src/services/llm/prompts.ts +438 -0
  115. package/src/services/llm/types.ts +373 -0
  116. package/src/services/memory.repository.ts +1769 -0
  117. package/src/services/memory.service.ts +1338 -0
  118. package/src/services/memory.types.ts +234 -0
  119. package/src/services/persistence/index.ts +295 -0
  120. package/src/services/pipeline.service.ts +509 -0
  121. package/src/services/profile.repository.ts +436 -0
  122. package/src/services/profile.service.ts +560 -0
  123. package/src/services/profile.types.ts +270 -0
  124. package/src/services/relationships/detector.ts +1128 -0
  125. package/src/services/relationships/index.ts +268 -0
  126. package/src/services/relationships/memory-integration.ts +459 -0
  127. package/src/services/relationships/strategies.ts +132 -0
  128. package/src/services/relationships/types.ts +370 -0
  129. package/src/services/search.service.ts +761 -0
  130. package/src/services/search.types.ts +220 -0
  131. package/src/services/secrets.service.ts +384 -0
  132. package/src/services/vectorstore/base.ts +327 -0
  133. package/src/services/vectorstore/index.ts +444 -0
  134. package/src/services/vectorstore/memory.ts +286 -0
  135. package/src/services/vectorstore/migration.ts +295 -0
  136. package/src/services/vectorstore/mock.ts +403 -0
  137. package/src/services/vectorstore/pgvector.ts +695 -0
  138. package/src/services/vectorstore/types.ts +247 -0
  139. package/src/startup.ts +389 -0
  140. package/src/types/api.types.ts +193 -0
  141. package/src/types/document.types.ts +103 -0
  142. package/src/types/index.ts +241 -0
  143. package/src/types/profile.base.ts +133 -0
  144. package/src/utils/errors.ts +447 -0
  145. package/src/utils/id.ts +15 -0
  146. package/src/utils/index.ts +101 -0
  147. package/src/utils/logger.ts +313 -0
  148. package/src/utils/sanitization.ts +501 -0
  149. package/src/utils/secret-validation.ts +273 -0
  150. package/src/utils/synonyms.ts +188 -0
  151. package/src/utils/validation.ts +581 -0
  152. package/src/workers/chunking.worker.ts +242 -0
  153. package/src/workers/embedding.worker.ts +358 -0
  154. package/src/workers/extraction.worker.ts +346 -0
  155. package/src/workers/indexing.worker.ts +505 -0
  156. package/tsconfig.json +38 -0
@@ -0,0 +1,509 @@
1
+ /**
2
+ * Document processing pipeline - orchestrates the full extraction workflow
3
+ */
4
+
5
+ import { v4 as uuidv4 } from 'uuid'
6
+ import { Document, DocumentStatus, Chunk, PipelineResult, ChunkingOptions } from '../types/document.types.js'
7
+ import { ExtractionService } from './extraction.service.js'
8
+ import { ChunkingService } from './chunking.service.js'
9
+ import { NotFoundError, ExtractionError, ErrorCode } from '../utils/errors.js'
10
+
11
+ /**
12
+ * Simple mutex implementation for protecting queue operations
13
+ */
14
+ class Mutex {
15
+ private locked = false
16
+ private waitQueue: Array<() => void> = []
17
+
18
+ async acquire(): Promise<void> {
19
+ if (!this.locked) {
20
+ this.locked = true
21
+ return
22
+ }
23
+
24
+ return new Promise<void>((resolve) => {
25
+ this.waitQueue.push(resolve)
26
+ })
27
+ }
28
+
29
+ release(): void {
30
+ if (this.waitQueue.length > 0) {
31
+ const next = this.waitQueue.shift()
32
+ next?.()
33
+ } else {
34
+ this.locked = false
35
+ }
36
+ }
37
+
38
+ async withLock<T>(fn: () => T | Promise<T>): Promise<T> {
39
+ await this.acquire()
40
+ try {
41
+ return await fn()
42
+ } finally {
43
+ this.release()
44
+ }
45
+ }
46
+ }
47
+
48
+ /**
49
+ * Thread-safe concurrent queue for document processing
50
+ */
51
+ class ConcurrentQueue<T> {
52
+ private items: T[] = []
53
+ private mutex = new Mutex()
54
+
55
+ async enqueue(item: T): Promise<void> {
56
+ await this.mutex.withLock(() => {
57
+ this.items.push(item)
58
+ })
59
+ }
60
+
61
+ async enqueueBatch(items: T[]): Promise<void> {
62
+ await this.mutex.withLock(() => {
63
+ this.items.push(...items)
64
+ })
65
+ }
66
+
67
+ async dequeue(): Promise<T | undefined> {
68
+ return this.mutex.withLock(() => {
69
+ return this.items.shift()
70
+ })
71
+ }
72
+
73
+ async size(): Promise<number> {
74
+ return this.mutex.withLock(() => {
75
+ return this.items.length
76
+ })
77
+ }
78
+
79
+ async isEmpty(): Promise<boolean> {
80
+ return this.mutex.withLock(() => {
81
+ return this.items.length === 0
82
+ })
83
+ }
84
+ }
85
+
86
+ interface PipelineConfig {
87
+ maxRetries: number
88
+ retryDelayMs: number
89
+ chunkingOptions?: ChunkingOptions
90
+ /** Timeout for extraction stage in milliseconds (default: 30000) */
91
+ extractionTimeoutMs?: number
92
+ /** Timeout for chunking stage in milliseconds (default: 10000) */
93
+ chunkingTimeoutMs?: number
94
+ /** Timeout for embedding stage in milliseconds (default: 60000) */
95
+ embeddingTimeoutMs?: number
96
+ /** Timeout for indexing stage in milliseconds (default: 30000) */
97
+ indexingTimeoutMs?: number
98
+ onStatusChange?: (docId: string, status: DocumentStatus) => void
99
+ onError?: (docId: string, error: Error) => void
100
+ }
101
+
102
+ interface EmbeddingProvider {
103
+ embed(text: string): Promise<number[]>
104
+ embedBatch(texts: string[]): Promise<number[][]>
105
+ }
106
+
107
+ interface IndexProvider {
108
+ index(chunks: Chunk[]): Promise<void>
109
+ remove(documentId: string): Promise<void>
110
+ }
111
+
112
+ const DEFAULT_CONFIG: PipelineConfig = {
113
+ maxRetries: 3,
114
+ retryDelayMs: 1000,
115
+ extractionTimeoutMs: 30000,
116
+ chunkingTimeoutMs: 10000,
117
+ embeddingTimeoutMs: 60000,
118
+ indexingTimeoutMs: 30000,
119
+ }
120
+
121
+ /**
122
+ * Timeout error for pipeline stage cancellation
123
+ */
124
+ class PipelineTimeoutError extends Error {
125
+ constructor(stage: string, timeoutMs: number) {
126
+ super(`Pipeline ${stage} stage timed out after ${timeoutMs}ms`)
127
+ this.name = 'PipelineTimeoutError'
128
+ }
129
+ }
130
+
131
+ /**
132
+ * Wrap an operation with a timeout that properly cancels on timeout
133
+ */
134
+ async function withTimeout<T>(operation: () => Promise<T>, timeoutMs: number, stageName: string): Promise<T> {
135
+ let timeoutId: NodeJS.Timeout | undefined
136
+
137
+ const timeoutPromise = new Promise<never>((_, reject) => {
138
+ timeoutId = setTimeout(() => {
139
+ reject(new PipelineTimeoutError(stageName, timeoutMs))
140
+ }, timeoutMs)
141
+ })
142
+
143
+ try {
144
+ const result = await Promise.race([operation(), timeoutPromise])
145
+ return result
146
+ } finally {
147
+ if (timeoutId) {
148
+ clearTimeout(timeoutId)
149
+ }
150
+ }
151
+ }
152
+
153
+ export class PipelineService {
154
+ private readonly extractionService: ExtractionService
155
+ private readonly chunkingService: ChunkingService
156
+ private readonly config: PipelineConfig
157
+
158
+ // Document store (in-memory for now, could be replaced with database)
159
+ private readonly documents: Map<string, Document> = new Map()
160
+ private readonly chunks: Map<string, Chunk[]> = new Map()
161
+
162
+ // Optional providers
163
+ private embeddingProvider?: EmbeddingProvider
164
+ private indexProvider?: IndexProvider
165
+
166
+ constructor(config?: Partial<PipelineConfig>) {
167
+ this.extractionService = new ExtractionService()
168
+ this.chunkingService = new ChunkingService()
169
+ this.config = { ...DEFAULT_CONFIG, ...config }
170
+ }
171
+
172
+ /**
173
+ * Set embedding provider for generating vector embeddings
174
+ */
175
+ setEmbeddingProvider(provider: EmbeddingProvider): void {
176
+ this.embeddingProvider = provider
177
+ }
178
+
179
+ /**
180
+ * Set index provider for storing and searching chunks
181
+ */
182
+ setIndexProvider(provider: IndexProvider): void {
183
+ this.indexProvider = provider
184
+ }
185
+
186
+ /**
187
+ * Create a new document and add it to the queue
188
+ */
189
+ async createDocument(content: string, metadata?: Document['metadata']): Promise<Document> {
190
+ const now = new Date()
191
+ const document: Document = {
192
+ id: uuidv4(),
193
+ content,
194
+ status: 'queued',
195
+ metadata: metadata || {},
196
+ createdAt: now,
197
+ updatedAt: now,
198
+ retryCount: 0,
199
+ }
200
+
201
+ this.documents.set(document.id, document)
202
+ return document
203
+ }
204
+
205
+ /**
206
+ * Process a document through the full pipeline with configurable timeouts
207
+ */
208
+ async processDocument(docId: string): Promise<PipelineResult> {
209
+ const startTime = Date.now()
210
+ const document = this.documents.get(docId)
211
+
212
+ if (!document) {
213
+ throw new NotFoundError('Document', docId, ErrorCode.DOCUMENT_NOT_FOUND)
214
+ }
215
+
216
+ try {
217
+ // Stage 1: Extracting (with timeout)
218
+ await this.updateStatus(docId, 'extracting')
219
+ const extractionResult = await withTimeout(
220
+ () => this.withRetry(() => this.extractionService.extract(document), 'extraction'),
221
+ this.config.extractionTimeoutMs ?? DEFAULT_CONFIG.extractionTimeoutMs!,
222
+ 'extraction'
223
+ )
224
+
225
+ // Update document with extraction results
226
+ document.contentType = extractionResult.contentType
227
+ document.metadata = {
228
+ ...document.metadata,
229
+ ...extractionResult.metadata,
230
+ }
231
+
232
+ // Stage 2: Chunking (with timeout)
233
+ await this.updateStatus(docId, 'chunking')
234
+ const chunks = await withTimeout(
235
+ () =>
236
+ this.withRetry(
237
+ () =>
238
+ Promise.resolve(
239
+ this.chunkingService.chunk(
240
+ docId,
241
+ extractionResult.content,
242
+ extractionResult.contentType,
243
+ this.config.chunkingOptions
244
+ )
245
+ ),
246
+ 'chunking'
247
+ ),
248
+ this.config.chunkingTimeoutMs ?? DEFAULT_CONFIG.chunkingTimeoutMs!,
249
+ 'chunking'
250
+ )
251
+
252
+ // Stage 3: Embedding (if provider available, with timeout)
253
+ if (this.embeddingProvider) {
254
+ await this.updateStatus(docId, 'embedding')
255
+ await withTimeout(
256
+ () => this.withRetry(() => this.generateEmbeddings(chunks), 'embedding'),
257
+ this.config.embeddingTimeoutMs ?? DEFAULT_CONFIG.embeddingTimeoutMs!,
258
+ 'embedding'
259
+ )
260
+ }
261
+
262
+ // Stage 4: Indexing (if provider available, with timeout)
263
+ if (this.indexProvider) {
264
+ await this.updateStatus(docId, 'indexing')
265
+ await withTimeout(
266
+ () => this.withRetry(() => this.indexProvider!.index(chunks), 'indexing'),
267
+ this.config.indexingTimeoutMs ?? DEFAULT_CONFIG.indexingTimeoutMs!,
268
+ 'indexing'
269
+ )
270
+ }
271
+
272
+ // Stage 5: Done
273
+ await this.updateStatus(docId, 'done')
274
+ this.chunks.set(docId, chunks)
275
+
276
+ return {
277
+ documentId: docId,
278
+ status: 'done',
279
+ chunks,
280
+ processingTimeMs: Date.now() - startTime,
281
+ }
282
+ } catch (error) {
283
+ const errorMessage = error instanceof Error ? error.message : 'Unknown error'
284
+ document.errorMessage = errorMessage
285
+ await this.updateStatus(docId, 'error')
286
+
287
+ this.config.onError?.(docId, error as Error)
288
+
289
+ return {
290
+ documentId: docId,
291
+ status: 'error',
292
+ chunks: [],
293
+ processingTimeMs: Date.now() - startTime,
294
+ error: errorMessage,
295
+ }
296
+ }
297
+ }
298
+
299
+ /**
300
+ * Process multiple documents in parallel with thread-safe queue
301
+ */
302
+ async processDocuments(docIds: string[], concurrency: number = 5): Promise<PipelineResult[]> {
303
+ const results: PipelineResult[] = []
304
+ const resultsMutex = new Mutex()
305
+ const queue = new ConcurrentQueue<string>()
306
+
307
+ // Enqueue all document IDs
308
+ await queue.enqueueBatch(docIds)
309
+
310
+ const processNext = async (): Promise<void> => {
311
+ while (!(await queue.isEmpty())) {
312
+ const docId = await queue.dequeue()
313
+ if (docId) {
314
+ const result = await this.processDocument(docId)
315
+ // Thread-safe push to results array
316
+ await resultsMutex.withLock(() => {
317
+ results.push(result)
318
+ })
319
+ }
320
+ }
321
+ }
322
+
323
+ // Create concurrent workers
324
+ const workers = Array(Math.min(concurrency, docIds.length))
325
+ .fill(null)
326
+ .map(() => processNext())
327
+
328
+ await Promise.all(workers)
329
+ return results
330
+ }
331
+
332
+ /**
333
+ * Reprocess a failed document
334
+ */
335
+ async reprocessDocument(docId: string): Promise<PipelineResult> {
336
+ const document = this.documents.get(docId)
337
+
338
+ if (!document) {
339
+ throw new NotFoundError('Document', docId, ErrorCode.DOCUMENT_NOT_FOUND)
340
+ }
341
+
342
+ // Reset retry count and clear error
343
+ document.retryCount = 0
344
+ document.errorMessage = undefined
345
+ document.status = 'queued'
346
+
347
+ return this.processDocument(docId)
348
+ }
349
+
350
+ /**
351
+ * Get document by ID
352
+ */
353
+ getDocument(docId: string): Document | undefined {
354
+ return this.documents.get(docId)
355
+ }
356
+
357
+ /**
358
+ * Get chunks for a document
359
+ */
360
+ getChunks(docId: string): Chunk[] | undefined {
361
+ return this.chunks.get(docId)
362
+ }
363
+
364
+ /**
365
+ * Get all documents with a specific status
366
+ */
367
+ getDocumentsByStatus(status: DocumentStatus): Document[] {
368
+ return Array.from(this.documents.values()).filter((doc) => doc.status === status)
369
+ }
370
+
371
+ /**
372
+ * Delete a document and its chunks
373
+ */
374
+ async deleteDocument(docId: string): Promise<void> {
375
+ if (this.indexProvider) {
376
+ await this.indexProvider.remove(docId)
377
+ }
378
+
379
+ this.documents.delete(docId)
380
+ this.chunks.delete(docId)
381
+ }
382
+
383
+ /**
384
+ * Get pipeline statistics
385
+ */
386
+ getStats(): {
387
+ total: number
388
+ byStatus: Record<DocumentStatus, number>
389
+ totalChunks: number
390
+ } {
391
+ const docs = Array.from(this.documents.values())
392
+ const statuses: DocumentStatus[] = ['queued', 'extracting', 'chunking', 'embedding', 'indexing', 'done', 'error']
393
+
394
+ const byStatus = statuses.reduce(
395
+ (acc, status) => {
396
+ acc[status] = docs.filter((d) => d.status === status).length
397
+ return acc
398
+ },
399
+ {} as Record<DocumentStatus, number>
400
+ )
401
+
402
+ const totalChunks = Array.from(this.chunks.values()).reduce((sum, chunks) => sum + chunks.length, 0)
403
+
404
+ return {
405
+ total: docs.length,
406
+ byStatus,
407
+ totalChunks,
408
+ }
409
+ }
410
+
411
+ /**
412
+ * Update document status and notify listeners
413
+ */
414
+ private async updateStatus(docId: string, status: DocumentStatus): Promise<void> {
415
+ const document = this.documents.get(docId)
416
+ if (document) {
417
+ document.status = status
418
+ document.updatedAt = new Date()
419
+ this.config.onStatusChange?.(docId, status)
420
+ }
421
+ }
422
+
423
+ /**
424
+ * Execute with retry logic
425
+ */
426
+ private async withRetry<T>(operation: () => Promise<T>, stageName: string): Promise<T> {
427
+ let lastError: Error | undefined
428
+
429
+ for (let attempt = 0; attempt <= this.config.maxRetries; attempt++) {
430
+ try {
431
+ return await operation()
432
+ } catch (error) {
433
+ lastError = error as Error
434
+
435
+ if (attempt < this.config.maxRetries) {
436
+ // Exponential backoff
437
+ const delay = this.config.retryDelayMs * Math.pow(2, attempt)
438
+ await this.delay(delay)
439
+ }
440
+ }
441
+ }
442
+
443
+ throw new ExtractionError(
444
+ `${stageName} failed after ${this.config.maxRetries + 1} attempts: ${lastError?.message}`,
445
+ undefined,
446
+ {
447
+ stage: stageName,
448
+ attempts: this.config.maxRetries + 1,
449
+ lastError: lastError?.message,
450
+ }
451
+ )
452
+ }
453
+
454
+ /**
455
+ * Generate embeddings for chunks
456
+ */
457
+ private async generateEmbeddings(chunks: Chunk[]): Promise<void> {
458
+ if (!this.embeddingProvider) return
459
+
460
+ const texts = chunks.map((c) => c.content)
461
+ const embeddings = await this.embeddingProvider.embedBatch(texts)
462
+
463
+ for (let i = 0; i < chunks.length; i++) {
464
+ const chunk = chunks[i]
465
+ const embedding = embeddings[i]
466
+ if (chunk && embedding) {
467
+ chunk.embedding = embedding
468
+ }
469
+ }
470
+ }
471
+
472
+ /**
473
+ * Delay utility
474
+ */
475
+ private delay(ms: number): Promise<void> {
476
+ return new Promise((resolve) => setTimeout(resolve, ms))
477
+ }
478
+
479
+ /**
480
+ * Export documents for backup
481
+ */
482
+ exportDocuments(): { documents: Document[]; chunks: Record<string, Chunk[]> } {
483
+ return {
484
+ documents: Array.from(this.documents.values()),
485
+ chunks: Object.fromEntries(this.chunks.entries()),
486
+ }
487
+ }
488
+
489
+ /**
490
+ * Import documents from backup
491
+ */
492
+ importDocuments(data: { documents: Document[]; chunks: Record<string, Chunk[]> }): void {
493
+ for (const doc of data.documents) {
494
+ this.documents.set(doc.id, doc)
495
+ }
496
+
497
+ for (const [docId, docChunks] of Object.entries(data.chunks)) {
498
+ this.chunks.set(docId, docChunks)
499
+ }
500
+ }
501
+
502
+ /**
503
+ * Clear all documents
504
+ */
505
+ clear(): void {
506
+ this.documents.clear()
507
+ this.chunks.clear()
508
+ }
509
+ }