@twelvehart/supermemory-runtime 1.0.0-next.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +57 -0
- package/README.md +374 -0
- package/dist/index.js +189 -0
- package/dist/mcp/index.js +1132 -0
- package/docker-compose.prod.yml +91 -0
- package/docker-compose.yml +358 -0
- package/drizzle/0000_dapper_the_professor.sql +159 -0
- package/drizzle/0001_api_keys.sql +51 -0
- package/drizzle/meta/0000_snapshot.json +1532 -0
- package/drizzle/meta/_journal.json +13 -0
- package/drizzle.config.ts +20 -0
- package/package.json +114 -0
- package/scripts/add-extraction-job.ts +122 -0
- package/scripts/benchmark-pgvector.ts +122 -0
- package/scripts/bootstrap.sh +209 -0
- package/scripts/check-runtime-pack.ts +111 -0
- package/scripts/claude-mcp-config.ts +336 -0
- package/scripts/docker-entrypoint.sh +183 -0
- package/scripts/doctor.ts +377 -0
- package/scripts/init-db.sql +33 -0
- package/scripts/install.sh +1110 -0
- package/scripts/mcp-setup.ts +271 -0
- package/scripts/migrations/001_create_pgvector_extension.sql +31 -0
- package/scripts/migrations/002_create_memory_embeddings_table.sql +75 -0
- package/scripts/migrations/003_create_hnsw_index.sql +94 -0
- package/scripts/migrations/004_create_memory_embeddings_standalone.sql +70 -0
- package/scripts/migrations/005_create_chunks_table.sql +95 -0
- package/scripts/migrations/006_create_processing_queue.sql +45 -0
- package/scripts/migrations/generate_test_data.sql +42 -0
- package/scripts/migrations/phase1_comprehensive_test.sql +204 -0
- package/scripts/migrations/run_migrations.sh +286 -0
- package/scripts/migrations/test_hnsw_index.sql +255 -0
- package/scripts/pre-commit-secrets +282 -0
- package/scripts/run-extraction-worker.ts +46 -0
- package/scripts/run-phase1-tests.sh +291 -0
- package/scripts/setup.ts +222 -0
- package/scripts/smoke-install.sh +12 -0
- package/scripts/test-health-endpoint.sh +328 -0
- package/src/api/index.ts +2 -0
- package/src/api/middleware/auth.ts +80 -0
- package/src/api/middleware/csrf.ts +308 -0
- package/src/api/middleware/errorHandler.ts +166 -0
- package/src/api/middleware/rateLimit.ts +360 -0
- package/src/api/middleware/validation.ts +514 -0
- package/src/api/routes/documents.ts +286 -0
- package/src/api/routes/profiles.ts +237 -0
- package/src/api/routes/search.ts +71 -0
- package/src/api/stores/index.ts +58 -0
- package/src/config/bootstrap-env.ts +3 -0
- package/src/config/env.ts +71 -0
- package/src/config/feature-flags.ts +25 -0
- package/src/config/index.ts +140 -0
- package/src/config/secrets.config.ts +291 -0
- package/src/db/client.ts +92 -0
- package/src/db/index.ts +73 -0
- package/src/db/postgres.ts +72 -0
- package/src/db/schema/chunks.schema.ts +31 -0
- package/src/db/schema/containers.schema.ts +46 -0
- package/src/db/schema/documents.schema.ts +49 -0
- package/src/db/schema/embeddings.schema.ts +32 -0
- package/src/db/schema/index.ts +11 -0
- package/src/db/schema/memories.schema.ts +72 -0
- package/src/db/schema/profiles.schema.ts +34 -0
- package/src/db/schema/queue.schema.ts +59 -0
- package/src/db/schema/relationships.schema.ts +42 -0
- package/src/db/schema.ts +223 -0
- package/src/db/worker-connection.ts +47 -0
- package/src/index.ts +235 -0
- package/src/mcp/CLAUDE.md +1 -0
- package/src/mcp/index.ts +1380 -0
- package/src/mcp/legacyState.ts +22 -0
- package/src/mcp/rateLimit.ts +358 -0
- package/src/mcp/resources.ts +309 -0
- package/src/mcp/results.ts +104 -0
- package/src/mcp/tools.ts +401 -0
- package/src/queues/config.ts +119 -0
- package/src/queues/index.ts +289 -0
- package/src/sdk/client.ts +225 -0
- package/src/sdk/errors.ts +266 -0
- package/src/sdk/http.ts +560 -0
- package/src/sdk/index.ts +244 -0
- package/src/sdk/resources/base.ts +65 -0
- package/src/sdk/resources/connections.ts +204 -0
- package/src/sdk/resources/documents.ts +163 -0
- package/src/sdk/resources/index.ts +10 -0
- package/src/sdk/resources/memories.ts +150 -0
- package/src/sdk/resources/search.ts +60 -0
- package/src/sdk/resources/settings.ts +36 -0
- package/src/sdk/types.ts +674 -0
- package/src/services/chunking/index.ts +451 -0
- package/src/services/chunking.service.ts +650 -0
- package/src/services/csrf.service.ts +252 -0
- package/src/services/documents.repository.ts +219 -0
- package/src/services/documents.service.ts +191 -0
- package/src/services/embedding.service.ts +404 -0
- package/src/services/extraction.service.ts +300 -0
- package/src/services/extractors/code.extractor.ts +451 -0
- package/src/services/extractors/index.ts +9 -0
- package/src/services/extractors/markdown.extractor.ts +461 -0
- package/src/services/extractors/pdf.extractor.ts +315 -0
- package/src/services/extractors/text.extractor.ts +118 -0
- package/src/services/extractors/url.extractor.ts +243 -0
- package/src/services/index.ts +235 -0
- package/src/services/ingestion.service.ts +177 -0
- package/src/services/llm/anthropic.ts +400 -0
- package/src/services/llm/base.ts +460 -0
- package/src/services/llm/contradiction-detector.service.ts +526 -0
- package/src/services/llm/heuristics.ts +148 -0
- package/src/services/llm/index.ts +309 -0
- package/src/services/llm/memory-classifier.service.ts +383 -0
- package/src/services/llm/memory-extension-detector.service.ts +523 -0
- package/src/services/llm/mock.ts +470 -0
- package/src/services/llm/openai.ts +398 -0
- package/src/services/llm/prompts.ts +438 -0
- package/src/services/llm/types.ts +373 -0
- package/src/services/memory.repository.ts +1769 -0
- package/src/services/memory.service.ts +1338 -0
- package/src/services/memory.types.ts +234 -0
- package/src/services/persistence/index.ts +295 -0
- package/src/services/pipeline.service.ts +509 -0
- package/src/services/profile.repository.ts +436 -0
- package/src/services/profile.service.ts +560 -0
- package/src/services/profile.types.ts +270 -0
- package/src/services/relationships/detector.ts +1128 -0
- package/src/services/relationships/index.ts +268 -0
- package/src/services/relationships/memory-integration.ts +459 -0
- package/src/services/relationships/strategies.ts +132 -0
- package/src/services/relationships/types.ts +370 -0
- package/src/services/search.service.ts +761 -0
- package/src/services/search.types.ts +220 -0
- package/src/services/secrets.service.ts +384 -0
- package/src/services/vectorstore/base.ts +327 -0
- package/src/services/vectorstore/index.ts +444 -0
- package/src/services/vectorstore/memory.ts +286 -0
- package/src/services/vectorstore/migration.ts +295 -0
- package/src/services/vectorstore/mock.ts +403 -0
- package/src/services/vectorstore/pgvector.ts +695 -0
- package/src/services/vectorstore/types.ts +247 -0
- package/src/startup.ts +389 -0
- package/src/types/api.types.ts +193 -0
- package/src/types/document.types.ts +103 -0
- package/src/types/index.ts +241 -0
- package/src/types/profile.base.ts +133 -0
- package/src/utils/errors.ts +447 -0
- package/src/utils/id.ts +15 -0
- package/src/utils/index.ts +101 -0
- package/src/utils/logger.ts +313 -0
- package/src/utils/sanitization.ts +501 -0
- package/src/utils/secret-validation.ts +273 -0
- package/src/utils/synonyms.ts +188 -0
- package/src/utils/validation.ts +581 -0
- package/src/workers/chunking.worker.ts +242 -0
- package/src/workers/embedding.worker.ts +358 -0
- package/src/workers/extraction.worker.ts +346 -0
- package/src/workers/indexing.worker.ts +505 -0
- package/tsconfig.json +38 -0
|
@@ -0,0 +1,242 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Chunking Worker
|
|
3
|
+
*
|
|
4
|
+
* BullMQ worker that processes documents from extraction queue,
|
|
5
|
+
* chunks them using appropriate strategies, stores chunks in database,
|
|
6
|
+
* and chains to embedding queue.
|
|
7
|
+
*
|
|
8
|
+
* Part of TASK-008: Content Processing Pipeline
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
import { Job, Worker, Queue } from 'bullmq'
|
|
12
|
+
import { v4 as uuidv4 } from 'uuid'
|
|
13
|
+
import { eq } from 'drizzle-orm'
|
|
14
|
+
import { chunks } from '../db/schema/chunks.schema.js'
|
|
15
|
+
import { memories } from '../db/schema/memories.schema.js'
|
|
16
|
+
import { chunkContent, detectContentType } from '../services/chunking/index.js'
|
|
17
|
+
import { workerDb as db } from '../db/worker-connection.js'
|
|
18
|
+
import { getLogger } from '../utils/logger.js'
|
|
19
|
+
import { NotFoundError, ErrorCode } from '../utils/errors.js'
|
|
20
|
+
|
|
21
|
+
const logger = getLogger('ChunkingWorker')
|
|
22
|
+
|
|
23
|
+
// Job data interfaces
|
|
24
|
+
export interface ChunkingJobData {
|
|
25
|
+
documentId: string
|
|
26
|
+
memoryId: string
|
|
27
|
+
content: string
|
|
28
|
+
contentType?: 'markdown' | 'code' | 'text'
|
|
29
|
+
chunkSize?: number
|
|
30
|
+
overlap?: number
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
export interface ChunkingJobResult {
|
|
34
|
+
documentId: string
|
|
35
|
+
memoryId: string
|
|
36
|
+
chunkCount: number
|
|
37
|
+
chunkIds: string[]
|
|
38
|
+
contentType: 'markdown' | 'code' | 'text'
|
|
39
|
+
totalTokens: number
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
export interface EmbeddingJobData {
|
|
43
|
+
documentId: string
|
|
44
|
+
memoryId: string
|
|
45
|
+
chunkIds: string[]
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
// Queue configuration
|
|
49
|
+
const REDIS_URL = process.env.REDIS_URL || 'redis://localhost:6379'
|
|
50
|
+
const QUEUE_NAME = 'chunking'
|
|
51
|
+
const EMBEDDING_QUEUE_NAME = 'embedding'
|
|
52
|
+
const CONCURRENCY = parseInt(process.env.BULLMQ_CONCURRENCY_CHUNKING || '3', 10)
|
|
53
|
+
|
|
54
|
+
// Retry configuration
|
|
55
|
+
const MAX_ATTEMPTS = 3
|
|
56
|
+
const BACKOFF_DELAY = 2000 // 2 seconds
|
|
57
|
+
|
|
58
|
+
/**
|
|
59
|
+
* Process a chunking job
|
|
60
|
+
*/
|
|
61
|
+
async function processChunkingJob(job: Job<ChunkingJobData>): Promise<ChunkingJobResult> {
|
|
62
|
+
const { documentId, memoryId, content, contentType, chunkSize, overlap } = job.data
|
|
63
|
+
|
|
64
|
+
try {
|
|
65
|
+
// Update progress: starting
|
|
66
|
+
await job.updateProgress(0)
|
|
67
|
+
await job.log(`Starting chunking for document ${documentId}`)
|
|
68
|
+
|
|
69
|
+
// Detect content type if not provided
|
|
70
|
+
const detectedType = contentType || detectContentType(content)
|
|
71
|
+
await job.log(`Detected content type: ${detectedType}`)
|
|
72
|
+
|
|
73
|
+
// Update progress: content type detected
|
|
74
|
+
await job.updateProgress(20)
|
|
75
|
+
|
|
76
|
+
// Chunk the content using appropriate strategy
|
|
77
|
+
const contentChunks = chunkContent(content, memoryId, {
|
|
78
|
+
chunkSize,
|
|
79
|
+
overlap,
|
|
80
|
+
contentType: detectedType,
|
|
81
|
+
})
|
|
82
|
+
|
|
83
|
+
await job.log(`Generated ${contentChunks.length} chunks`)
|
|
84
|
+
await job.updateProgress(50)
|
|
85
|
+
|
|
86
|
+
// Verify memory exists
|
|
87
|
+
const memory = await db.query.memories.findFirst({
|
|
88
|
+
where: eq(memories.id, memoryId),
|
|
89
|
+
})
|
|
90
|
+
|
|
91
|
+
if (!memory) {
|
|
92
|
+
throw new NotFoundError('Memory', memoryId, ErrorCode.MEMORY_NOT_FOUND)
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
// Store chunks in database
|
|
96
|
+
const chunkIds: string[] = []
|
|
97
|
+
const totalTokens = contentChunks.reduce((sum, chunk) => sum + chunk.tokenCount, 0)
|
|
98
|
+
|
|
99
|
+
for (let i = 0; i < contentChunks.length; i++) {
|
|
100
|
+
const chunk = contentChunks[i]
|
|
101
|
+
if (!chunk) continue
|
|
102
|
+
const chunkId = uuidv4()
|
|
103
|
+
|
|
104
|
+
await db.insert(chunks).values({
|
|
105
|
+
id: chunkId,
|
|
106
|
+
memoryId: memoryId,
|
|
107
|
+
content: chunk.content,
|
|
108
|
+
chunkIndex: i,
|
|
109
|
+
startOffset: chunk.metadata.startOffset,
|
|
110
|
+
endOffset: chunk.metadata.endOffset,
|
|
111
|
+
tokenCount: chunk.tokenCount,
|
|
112
|
+
metadata: {
|
|
113
|
+
contentType: chunk.metadata.contentType,
|
|
114
|
+
language: chunk.metadata.language,
|
|
115
|
+
heading: chunk.metadata.heading,
|
|
116
|
+
position: chunk.metadata.position,
|
|
117
|
+
},
|
|
118
|
+
})
|
|
119
|
+
|
|
120
|
+
chunkIds.push(chunkId)
|
|
121
|
+
|
|
122
|
+
// Update progress per chunk
|
|
123
|
+
const progress = 50 + Math.floor(((i + 1) / contentChunks.length) * 40)
|
|
124
|
+
await job.updateProgress(progress)
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
await job.log(`Stored ${chunkIds.length} chunks in database`)
|
|
128
|
+
await job.updateProgress(90)
|
|
129
|
+
|
|
130
|
+
// Chain to embedding queue
|
|
131
|
+
const embeddingQueue = new Queue<EmbeddingJobData>(EMBEDDING_QUEUE_NAME, {
|
|
132
|
+
connection: {
|
|
133
|
+
host: new URL(REDIS_URL).hostname,
|
|
134
|
+
port: parseInt(new URL(REDIS_URL).port || '6379', 10),
|
|
135
|
+
},
|
|
136
|
+
})
|
|
137
|
+
|
|
138
|
+
await embeddingQueue.add(
|
|
139
|
+
'embed',
|
|
140
|
+
{
|
|
141
|
+
documentId,
|
|
142
|
+
memoryId,
|
|
143
|
+
chunkIds,
|
|
144
|
+
},
|
|
145
|
+
{
|
|
146
|
+
priority: 5, // Medium priority
|
|
147
|
+
attempts: MAX_ATTEMPTS,
|
|
148
|
+
backoff: {
|
|
149
|
+
type: 'exponential',
|
|
150
|
+
delay: BACKOFF_DELAY,
|
|
151
|
+
},
|
|
152
|
+
}
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
await job.log(`Chained to embedding queue with ${chunkIds.length} chunks`)
|
|
156
|
+
await job.updateProgress(100)
|
|
157
|
+
|
|
158
|
+
return {
|
|
159
|
+
documentId,
|
|
160
|
+
memoryId,
|
|
161
|
+
chunkCount: contentChunks.length,
|
|
162
|
+
chunkIds,
|
|
163
|
+
contentType: detectedType,
|
|
164
|
+
totalTokens,
|
|
165
|
+
}
|
|
166
|
+
} catch (error) {
|
|
167
|
+
const errorMessage = error instanceof Error ? error.message : String(error)
|
|
168
|
+
await job.log(`Error: ${errorMessage}`)
|
|
169
|
+
throw error
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
/**
|
|
174
|
+
* Create and start chunking worker
|
|
175
|
+
*/
|
|
176
|
+
export function createChunkingWorker(): Worker<ChunkingJobData, ChunkingJobResult> {
|
|
177
|
+
const worker = new Worker<ChunkingJobData, ChunkingJobResult>(QUEUE_NAME, async (job) => processChunkingJob(job), {
|
|
178
|
+
connection: {
|
|
179
|
+
host: new URL(REDIS_URL).hostname,
|
|
180
|
+
port: parseInt(new URL(REDIS_URL).port || '6379', 10),
|
|
181
|
+
},
|
|
182
|
+
concurrency: CONCURRENCY,
|
|
183
|
+
autorun: true,
|
|
184
|
+
removeOnComplete: {
|
|
185
|
+
count: 100, // Keep last 100 completed jobs
|
|
186
|
+
},
|
|
187
|
+
removeOnFail: {
|
|
188
|
+
count: 500, // Keep last 500 failed jobs for debugging
|
|
189
|
+
},
|
|
190
|
+
})
|
|
191
|
+
|
|
192
|
+
// Event handlers
|
|
193
|
+
worker.on('completed', (job, result) => {
|
|
194
|
+
logger.info('Job completed', { jobId: job.id, chunkCount: result.chunkCount })
|
|
195
|
+
})
|
|
196
|
+
|
|
197
|
+
worker.on('failed', (job, error) => {
|
|
198
|
+
logger.error('Job failed', { jobId: job?.id, error: error.message })
|
|
199
|
+
})
|
|
200
|
+
|
|
201
|
+
worker.on('error', (error) => {
|
|
202
|
+
logger.error('Worker error', { error: error.message })
|
|
203
|
+
})
|
|
204
|
+
|
|
205
|
+
worker.on('stalled', (jobId) => {
|
|
206
|
+
logger.warn('Job stalled', { jobId })
|
|
207
|
+
})
|
|
208
|
+
|
|
209
|
+
logger.info('Worker started', { concurrency: CONCURRENCY })
|
|
210
|
+
|
|
211
|
+
return worker
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
/**
|
|
215
|
+
* Create chunking queue (for adding jobs)
|
|
216
|
+
*/
|
|
217
|
+
export function createChunkingQueue(): Queue<ChunkingJobData> {
|
|
218
|
+
return new Queue<ChunkingJobData>(QUEUE_NAME, {
|
|
219
|
+
connection: {
|
|
220
|
+
host: new URL(REDIS_URL).hostname,
|
|
221
|
+
port: parseInt(new URL(REDIS_URL).port || '6379', 10),
|
|
222
|
+
},
|
|
223
|
+
defaultJobOptions: {
|
|
224
|
+
attempts: MAX_ATTEMPTS,
|
|
225
|
+
backoff: {
|
|
226
|
+
type: 'exponential',
|
|
227
|
+
delay: BACKOFF_DELAY,
|
|
228
|
+
},
|
|
229
|
+
removeOnComplete: 100,
|
|
230
|
+
removeOnFail: 500,
|
|
231
|
+
},
|
|
232
|
+
})
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
/**
|
|
236
|
+
* Graceful shutdown
|
|
237
|
+
*/
|
|
238
|
+
export async function shutdownChunkingWorker(worker: Worker): Promise<void> {
|
|
239
|
+
logger.info('Shutting down...')
|
|
240
|
+
await worker.close()
|
|
241
|
+
logger.info('Shutdown complete')
|
|
242
|
+
}
|
|
@@ -0,0 +1,358 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Embedding Worker - Generates embeddings for chunks in batches
|
|
3
|
+
*
|
|
4
|
+
* Responsibilities:
|
|
5
|
+
* - Receive chunks from chunking queue
|
|
6
|
+
* - Group into batches of 100 (OpenAI API limit)
|
|
7
|
+
* - Generate embeddings using EmbeddingService
|
|
8
|
+
* - Store in vector_embeddings staging table via PgVectorStore
|
|
9
|
+
* - Chain to indexing queue with embedding IDs
|
|
10
|
+
* - Track cost and progress per batch
|
|
11
|
+
* - Rate limiting: 3500 RPM (58 req/sec)
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
import { Queue, Worker, Job } from 'bullmq'
|
|
15
|
+
import pLimit from 'p-limit'
|
|
16
|
+
import { getEmbeddingService } from '../services/embedding.service.js'
|
|
17
|
+
import { createPgVectorStore } from '../services/vectorstore/pgvector.js'
|
|
18
|
+
import type { PgVectorStore } from '../services/vectorstore/pgvector.js'
|
|
19
|
+
import type { VectorEntry } from '../services/vectorstore/types.js'
|
|
20
|
+
import { getLogger } from '../utils/logger.js'
|
|
21
|
+
import { DatabaseError, EmbeddingError, ErrorCode } from '../utils/errors.js'
|
|
22
|
+
|
|
23
|
+
const logger = getLogger('EmbeddingWorker')
|
|
24
|
+
|
|
25
|
+
/**
|
|
26
|
+
* Job data structure for embedding worker
|
|
27
|
+
*/
|
|
28
|
+
export interface EmbeddingJobData {
|
|
29
|
+
/** Document ID for tracking */
|
|
30
|
+
documentId: string
|
|
31
|
+
/** Chunks to embed */
|
|
32
|
+
chunks: Array<{
|
|
33
|
+
id: string
|
|
34
|
+
content: string
|
|
35
|
+
metadata?: Record<string, unknown>
|
|
36
|
+
}>
|
|
37
|
+
/** Optional: Override default batch size */
|
|
38
|
+
batchSize?: number
|
|
39
|
+
/** Optional: Processing queue ID for status updates */
|
|
40
|
+
processingQueueId?: string
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
/**
|
|
44
|
+
* Job result structure
|
|
45
|
+
*/
|
|
46
|
+
export interface EmbeddingJobResult {
|
|
47
|
+
/** Total number of embeddings generated */
|
|
48
|
+
embeddingCount: number
|
|
49
|
+
/** Total cost in USD */
|
|
50
|
+
costUsd: number
|
|
51
|
+
/** Number of batches processed */
|
|
52
|
+
batchesProcessed: number
|
|
53
|
+
/** Embedding IDs for chaining to indexing queue */
|
|
54
|
+
embeddingIds: string[]
|
|
55
|
+
/** Processing time in milliseconds */
|
|
56
|
+
processingTimeMs: number
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
/**
|
|
60
|
+
* Embedding cost constants
|
|
61
|
+
* Based on OpenAI text-embedding-3-small pricing: $0.0001 per 1K tokens
|
|
62
|
+
*/
|
|
63
|
+
const COST_PER_1K_TOKENS = 0.0001
|
|
64
|
+
const AVG_TOKENS_PER_CHAR = 0.25 // Rough estimate: 4 chars = 1 token
|
|
65
|
+
|
|
66
|
+
/**
|
|
67
|
+
* Rate limiting constants
|
|
68
|
+
* 3500 RPM = 58.33 requests per second
|
|
69
|
+
* Conservative limit: 58 concurrent requests
|
|
70
|
+
*/
|
|
71
|
+
const MAX_CONCURRENT_REQUESTS = 58
|
|
72
|
+
|
|
73
|
+
/**
|
|
74
|
+
* Default batch size for OpenAI API
|
|
75
|
+
*/
|
|
76
|
+
const DEFAULT_BATCH_SIZE = 100
|
|
77
|
+
|
|
78
|
+
/**
|
|
79
|
+
* Estimate token count from text length
|
|
80
|
+
*/
|
|
81
|
+
function estimateTokens(text: string): number {
|
|
82
|
+
return Math.ceil(text.length * AVG_TOKENS_PER_CHAR)
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
/**
|
|
86
|
+
* Calculate cost based on token count
|
|
87
|
+
*/
|
|
88
|
+
function calculateCost(tokens: number): number {
|
|
89
|
+
return (tokens / 1000) * COST_PER_1K_TOKENS
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
/**
|
|
93
|
+
* Group chunks into batches
|
|
94
|
+
*/
|
|
95
|
+
function createBatches<T>(items: T[], batchSize: number): T[][] {
|
|
96
|
+
const batches: T[][] = []
|
|
97
|
+
for (let i = 0; i < items.length; i += batchSize) {
|
|
98
|
+
batches.push(items.slice(i, i + batchSize))
|
|
99
|
+
}
|
|
100
|
+
return batches
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
/**
|
|
104
|
+
* Embedding Worker class
|
|
105
|
+
*/
|
|
106
|
+
export class EmbeddingWorker {
|
|
107
|
+
private worker: Worker<EmbeddingJobData, EmbeddingJobResult> | null = null
|
|
108
|
+
private readonly queueName: string
|
|
109
|
+
private readonly connectionString: string
|
|
110
|
+
private vectorStore: PgVectorStore | null = null
|
|
111
|
+
private rateLimiter = pLimit(MAX_CONCURRENT_REQUESTS)
|
|
112
|
+
|
|
113
|
+
constructor(queueName: string = 'embedding', connectionString?: string) {
|
|
114
|
+
this.queueName = queueName
|
|
115
|
+
this.connectionString = connectionString || process.env.DATABASE_URL || 'postgresql://localhost:5432/supermemory'
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
/**
|
|
119
|
+
* Initialize the worker
|
|
120
|
+
*/
|
|
121
|
+
async initialize(): Promise<void> {
|
|
122
|
+
// Initialize vector store
|
|
123
|
+
const embeddingService = getEmbeddingService()
|
|
124
|
+
const dimensions = embeddingService.getDimensions()
|
|
125
|
+
|
|
126
|
+
this.vectorStore = createPgVectorStore(this.connectionString, dimensions, {
|
|
127
|
+
batchSize: DEFAULT_BATCH_SIZE,
|
|
128
|
+
hnswConfig: { M: 16, efConstruction: 64 },
|
|
129
|
+
metric: 'cosine',
|
|
130
|
+
})
|
|
131
|
+
|
|
132
|
+
await this.vectorStore.initialize()
|
|
133
|
+
|
|
134
|
+
// Create worker
|
|
135
|
+
this.worker = new Worker<EmbeddingJobData, EmbeddingJobResult>(this.queueName, this.processJob.bind(this), {
|
|
136
|
+
connection: {
|
|
137
|
+
host: 'localhost',
|
|
138
|
+
port: 6379,
|
|
139
|
+
},
|
|
140
|
+
concurrency: 1, // Process one job at a time to control rate limiting globally
|
|
141
|
+
removeOnComplete: { count: 100 }, // Keep last 100 completed jobs
|
|
142
|
+
removeOnFail: { count: 500 }, // Keep last 500 failed jobs
|
|
143
|
+
})
|
|
144
|
+
|
|
145
|
+
// Error handling
|
|
146
|
+
this.worker.on('error', (error) => {
|
|
147
|
+
logger.error('Worker error', { error: error.message })
|
|
148
|
+
})
|
|
149
|
+
|
|
150
|
+
this.worker.on('failed', (job, error) => {
|
|
151
|
+
logger.error('Job failed', { jobId: job?.id, error: error.message })
|
|
152
|
+
})
|
|
153
|
+
|
|
154
|
+
logger.info('Worker initialized', { queueName: this.queueName })
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
/**
|
|
158
|
+
* Process embedding job
|
|
159
|
+
*/
|
|
160
|
+
private async processJob(job: Job<EmbeddingJobData, EmbeddingJobResult>): Promise<EmbeddingJobResult> {
|
|
161
|
+
const startTime = Date.now()
|
|
162
|
+
const { documentId, chunks, batchSize = DEFAULT_BATCH_SIZE } = job.data
|
|
163
|
+
|
|
164
|
+
logger.info('Processing job', { jobId: job.id, documentId, chunkCount: chunks.length })
|
|
165
|
+
|
|
166
|
+
if (!this.vectorStore) {
|
|
167
|
+
throw new DatabaseError('Vector store not initialized', 'embedding', {
|
|
168
|
+
code: ErrorCode.DATABASE_NOT_INITIALIZED,
|
|
169
|
+
})
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
// Filter out empty chunks
|
|
173
|
+
const validChunks = chunks.filter((chunk) => chunk.content && chunk.content.trim().length > 0)
|
|
174
|
+
|
|
175
|
+
if (validChunks.length === 0) {
|
|
176
|
+
logger.warn('No valid chunks to process', { jobId: job.id })
|
|
177
|
+
return {
|
|
178
|
+
embeddingCount: 0,
|
|
179
|
+
costUsd: 0,
|
|
180
|
+
batchesProcessed: 0,
|
|
181
|
+
embeddingIds: [],
|
|
182
|
+
processingTimeMs: Date.now() - startTime,
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
// Create batches
|
|
187
|
+
const batches = createBatches(validChunks, batchSize)
|
|
188
|
+
logger.info('Created batches', { batchCount: batches.length, batchSize })
|
|
189
|
+
|
|
190
|
+
const embeddingService = getEmbeddingService()
|
|
191
|
+
const embeddingIds: string[] = []
|
|
192
|
+
let totalCost = 0
|
|
193
|
+
let totalTokens = 0
|
|
194
|
+
|
|
195
|
+
// Process batches with rate limiting
|
|
196
|
+
for (let i = 0; i < batches.length; i++) {
|
|
197
|
+
const batch = batches[i]
|
|
198
|
+
if (!batch) continue
|
|
199
|
+
|
|
200
|
+
const batchProgress = Math.round(((i + 1) / batches.length) * 100)
|
|
201
|
+
await job.updateProgress(batchProgress)
|
|
202
|
+
logger.info('Processing batch', {
|
|
203
|
+
batchNum: i + 1,
|
|
204
|
+
totalBatches: batches.length,
|
|
205
|
+
progress: batchProgress,
|
|
206
|
+
})
|
|
207
|
+
|
|
208
|
+
// Extract texts from batch
|
|
209
|
+
const texts = batch.map((chunk) => chunk.content)
|
|
210
|
+
|
|
211
|
+
// Estimate tokens and cost
|
|
212
|
+
const batchTokens = texts.reduce((sum, text) => sum + estimateTokens(text), 0)
|
|
213
|
+
const batchCost = calculateCost(batchTokens)
|
|
214
|
+
totalTokens += batchTokens
|
|
215
|
+
totalCost += batchCost
|
|
216
|
+
|
|
217
|
+
// Generate embeddings with rate limiting
|
|
218
|
+
const embeddings = await this.rateLimiter(async () => {
|
|
219
|
+
try {
|
|
220
|
+
return await embeddingService.batchEmbed(texts)
|
|
221
|
+
} catch (error) {
|
|
222
|
+
logger.error('Batch failed, retrying', { batchNum: i + 1, error })
|
|
223
|
+
// Retry once after exponential backoff
|
|
224
|
+
await new Promise((resolve) => setTimeout(resolve, 1000 * Math.pow(2, i)))
|
|
225
|
+
return await embeddingService.batchEmbed(texts)
|
|
226
|
+
}
|
|
227
|
+
})
|
|
228
|
+
|
|
229
|
+
// Store embeddings in vector store
|
|
230
|
+
const vectorEntries: VectorEntry[] = batch.map((chunk, idx) => {
|
|
231
|
+
const embedding = embeddings[idx]
|
|
232
|
+
if (!embedding || embedding.length === 0) {
|
|
233
|
+
throw new EmbeddingError(`Empty embedding for chunk ${chunk.id}`, undefined, {
|
|
234
|
+
chunkId: chunk.id,
|
|
235
|
+
batchIndex: idx,
|
|
236
|
+
})
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
return {
|
|
240
|
+
id: chunk.id,
|
|
241
|
+
embedding,
|
|
242
|
+
metadata: {
|
|
243
|
+
...chunk.metadata,
|
|
244
|
+
documentId,
|
|
245
|
+
chunkId: chunk.id,
|
|
246
|
+
createdAt: new Date().toISOString(),
|
|
247
|
+
},
|
|
248
|
+
}
|
|
249
|
+
})
|
|
250
|
+
|
|
251
|
+
// Add to vector store in batch
|
|
252
|
+
const batchResult = await this.vectorStore.addBatch(vectorEntries, {
|
|
253
|
+
overwrite: false,
|
|
254
|
+
namespace: 'memories',
|
|
255
|
+
})
|
|
256
|
+
|
|
257
|
+
if (batchResult.failed > 0) {
|
|
258
|
+
logger.warn('Batch had failures', {
|
|
259
|
+
batchNum: i + 1,
|
|
260
|
+
failures: batchResult.failed,
|
|
261
|
+
errors: batchResult.errors,
|
|
262
|
+
})
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
// Collect embedding IDs
|
|
266
|
+
embeddingIds.push(...vectorEntries.map((entry) => entry.id))
|
|
267
|
+
|
|
268
|
+
logger.info('Batch complete', {
|
|
269
|
+
batchNum: i + 1,
|
|
270
|
+
totalBatches: batches.length,
|
|
271
|
+
embeddingCount: vectorEntries.length,
|
|
272
|
+
tokens: batchTokens,
|
|
273
|
+
cost: batchCost.toFixed(6),
|
|
274
|
+
})
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
const processingTimeMs = Date.now() - startTime
|
|
278
|
+
|
|
279
|
+
logger.info('Job complete', {
|
|
280
|
+
jobId: job.id,
|
|
281
|
+
embeddingCount: embeddingIds.length,
|
|
282
|
+
tokens: totalTokens,
|
|
283
|
+
cost: totalCost.toFixed(6),
|
|
284
|
+
processingTimeMs,
|
|
285
|
+
})
|
|
286
|
+
|
|
287
|
+
// Chain to indexing queue (if configured)
|
|
288
|
+
await this.chainToIndexingQueue(documentId, embeddingIds)
|
|
289
|
+
|
|
290
|
+
return {
|
|
291
|
+
embeddingCount: embeddingIds.length,
|
|
292
|
+
costUsd: totalCost,
|
|
293
|
+
batchesProcessed: batches.length,
|
|
294
|
+
embeddingIds,
|
|
295
|
+
processingTimeMs,
|
|
296
|
+
}
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
/**
|
|
300
|
+
* Chain to indexing queue with embedding IDs
|
|
301
|
+
*/
|
|
302
|
+
private async chainToIndexingQueue(documentId: string, embeddingIds: string[]): Promise<void> {
|
|
303
|
+
try {
|
|
304
|
+
const indexingQueue = new Queue('indexing', {
|
|
305
|
+
connection: {
|
|
306
|
+
host: 'localhost',
|
|
307
|
+
port: 6379,
|
|
308
|
+
},
|
|
309
|
+
})
|
|
310
|
+
|
|
311
|
+
await indexingQueue.add(
|
|
312
|
+
'index',
|
|
313
|
+
{
|
|
314
|
+
documentId,
|
|
315
|
+
embeddingIds,
|
|
316
|
+
},
|
|
317
|
+
{
|
|
318
|
+
attempts: 3,
|
|
319
|
+
backoff: {
|
|
320
|
+
type: 'exponential',
|
|
321
|
+
delay: 2000,
|
|
322
|
+
},
|
|
323
|
+
}
|
|
324
|
+
)
|
|
325
|
+
|
|
326
|
+
logger.info('Chained to indexing queue', { documentId })
|
|
327
|
+
} catch (error) {
|
|
328
|
+
logger.error('Failed to chain to indexing queue', { documentId, error })
|
|
329
|
+
// Don't throw - embedding job succeeded
|
|
330
|
+
}
|
|
331
|
+
}
|
|
332
|
+
|
|
333
|
+
/**
|
|
334
|
+
* Close the worker and cleanup resources
|
|
335
|
+
*/
|
|
336
|
+
async close(): Promise<void> {
|
|
337
|
+
if (this.worker) {
|
|
338
|
+
await this.worker.close()
|
|
339
|
+
this.worker = null
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
if (this.vectorStore) {
|
|
343
|
+
await this.vectorStore.close()
|
|
344
|
+
this.vectorStore = null
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
logger.info('Worker closed')
|
|
348
|
+
}
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
/**
|
|
352
|
+
* Create and initialize an embedding worker
|
|
353
|
+
*/
|
|
354
|
+
export async function createEmbeddingWorker(queueName?: string, connectionString?: string): Promise<EmbeddingWorker> {
|
|
355
|
+
const worker = new EmbeddingWorker(queueName, connectionString)
|
|
356
|
+
await worker.initialize()
|
|
357
|
+
return worker
|
|
358
|
+
}
|