@twelvehart/supermemory-runtime 1.0.0-next.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (156) hide show
  1. package/.env.example +57 -0
  2. package/README.md +374 -0
  3. package/dist/index.js +189 -0
  4. package/dist/mcp/index.js +1132 -0
  5. package/docker-compose.prod.yml +91 -0
  6. package/docker-compose.yml +358 -0
  7. package/drizzle/0000_dapper_the_professor.sql +159 -0
  8. package/drizzle/0001_api_keys.sql +51 -0
  9. package/drizzle/meta/0000_snapshot.json +1532 -0
  10. package/drizzle/meta/_journal.json +13 -0
  11. package/drizzle.config.ts +20 -0
  12. package/package.json +114 -0
  13. package/scripts/add-extraction-job.ts +122 -0
  14. package/scripts/benchmark-pgvector.ts +122 -0
  15. package/scripts/bootstrap.sh +209 -0
  16. package/scripts/check-runtime-pack.ts +111 -0
  17. package/scripts/claude-mcp-config.ts +336 -0
  18. package/scripts/docker-entrypoint.sh +183 -0
  19. package/scripts/doctor.ts +377 -0
  20. package/scripts/init-db.sql +33 -0
  21. package/scripts/install.sh +1110 -0
  22. package/scripts/mcp-setup.ts +271 -0
  23. package/scripts/migrations/001_create_pgvector_extension.sql +31 -0
  24. package/scripts/migrations/002_create_memory_embeddings_table.sql +75 -0
  25. package/scripts/migrations/003_create_hnsw_index.sql +94 -0
  26. package/scripts/migrations/004_create_memory_embeddings_standalone.sql +70 -0
  27. package/scripts/migrations/005_create_chunks_table.sql +95 -0
  28. package/scripts/migrations/006_create_processing_queue.sql +45 -0
  29. package/scripts/migrations/generate_test_data.sql +42 -0
  30. package/scripts/migrations/phase1_comprehensive_test.sql +204 -0
  31. package/scripts/migrations/run_migrations.sh +286 -0
  32. package/scripts/migrations/test_hnsw_index.sql +255 -0
  33. package/scripts/pre-commit-secrets +282 -0
  34. package/scripts/run-extraction-worker.ts +46 -0
  35. package/scripts/run-phase1-tests.sh +291 -0
  36. package/scripts/setup.ts +222 -0
  37. package/scripts/smoke-install.sh +12 -0
  38. package/scripts/test-health-endpoint.sh +328 -0
  39. package/src/api/index.ts +2 -0
  40. package/src/api/middleware/auth.ts +80 -0
  41. package/src/api/middleware/csrf.ts +308 -0
  42. package/src/api/middleware/errorHandler.ts +166 -0
  43. package/src/api/middleware/rateLimit.ts +360 -0
  44. package/src/api/middleware/validation.ts +514 -0
  45. package/src/api/routes/documents.ts +286 -0
  46. package/src/api/routes/profiles.ts +237 -0
  47. package/src/api/routes/search.ts +71 -0
  48. package/src/api/stores/index.ts +58 -0
  49. package/src/config/bootstrap-env.ts +3 -0
  50. package/src/config/env.ts +71 -0
  51. package/src/config/feature-flags.ts +25 -0
  52. package/src/config/index.ts +140 -0
  53. package/src/config/secrets.config.ts +291 -0
  54. package/src/db/client.ts +92 -0
  55. package/src/db/index.ts +73 -0
  56. package/src/db/postgres.ts +72 -0
  57. package/src/db/schema/chunks.schema.ts +31 -0
  58. package/src/db/schema/containers.schema.ts +46 -0
  59. package/src/db/schema/documents.schema.ts +49 -0
  60. package/src/db/schema/embeddings.schema.ts +32 -0
  61. package/src/db/schema/index.ts +11 -0
  62. package/src/db/schema/memories.schema.ts +72 -0
  63. package/src/db/schema/profiles.schema.ts +34 -0
  64. package/src/db/schema/queue.schema.ts +59 -0
  65. package/src/db/schema/relationships.schema.ts +42 -0
  66. package/src/db/schema.ts +223 -0
  67. package/src/db/worker-connection.ts +47 -0
  68. package/src/index.ts +235 -0
  69. package/src/mcp/CLAUDE.md +1 -0
  70. package/src/mcp/index.ts +1380 -0
  71. package/src/mcp/legacyState.ts +22 -0
  72. package/src/mcp/rateLimit.ts +358 -0
  73. package/src/mcp/resources.ts +309 -0
  74. package/src/mcp/results.ts +104 -0
  75. package/src/mcp/tools.ts +401 -0
  76. package/src/queues/config.ts +119 -0
  77. package/src/queues/index.ts +289 -0
  78. package/src/sdk/client.ts +225 -0
  79. package/src/sdk/errors.ts +266 -0
  80. package/src/sdk/http.ts +560 -0
  81. package/src/sdk/index.ts +244 -0
  82. package/src/sdk/resources/base.ts +65 -0
  83. package/src/sdk/resources/connections.ts +204 -0
  84. package/src/sdk/resources/documents.ts +163 -0
  85. package/src/sdk/resources/index.ts +10 -0
  86. package/src/sdk/resources/memories.ts +150 -0
  87. package/src/sdk/resources/search.ts +60 -0
  88. package/src/sdk/resources/settings.ts +36 -0
  89. package/src/sdk/types.ts +674 -0
  90. package/src/services/chunking/index.ts +451 -0
  91. package/src/services/chunking.service.ts +650 -0
  92. package/src/services/csrf.service.ts +252 -0
  93. package/src/services/documents.repository.ts +219 -0
  94. package/src/services/documents.service.ts +191 -0
  95. package/src/services/embedding.service.ts +404 -0
  96. package/src/services/extraction.service.ts +300 -0
  97. package/src/services/extractors/code.extractor.ts +451 -0
  98. package/src/services/extractors/index.ts +9 -0
  99. package/src/services/extractors/markdown.extractor.ts +461 -0
  100. package/src/services/extractors/pdf.extractor.ts +315 -0
  101. package/src/services/extractors/text.extractor.ts +118 -0
  102. package/src/services/extractors/url.extractor.ts +243 -0
  103. package/src/services/index.ts +235 -0
  104. package/src/services/ingestion.service.ts +177 -0
  105. package/src/services/llm/anthropic.ts +400 -0
  106. package/src/services/llm/base.ts +460 -0
  107. package/src/services/llm/contradiction-detector.service.ts +526 -0
  108. package/src/services/llm/heuristics.ts +148 -0
  109. package/src/services/llm/index.ts +309 -0
  110. package/src/services/llm/memory-classifier.service.ts +383 -0
  111. package/src/services/llm/memory-extension-detector.service.ts +523 -0
  112. package/src/services/llm/mock.ts +470 -0
  113. package/src/services/llm/openai.ts +398 -0
  114. package/src/services/llm/prompts.ts +438 -0
  115. package/src/services/llm/types.ts +373 -0
  116. package/src/services/memory.repository.ts +1769 -0
  117. package/src/services/memory.service.ts +1338 -0
  118. package/src/services/memory.types.ts +234 -0
  119. package/src/services/persistence/index.ts +295 -0
  120. package/src/services/pipeline.service.ts +509 -0
  121. package/src/services/profile.repository.ts +436 -0
  122. package/src/services/profile.service.ts +560 -0
  123. package/src/services/profile.types.ts +270 -0
  124. package/src/services/relationships/detector.ts +1128 -0
  125. package/src/services/relationships/index.ts +268 -0
  126. package/src/services/relationships/memory-integration.ts +459 -0
  127. package/src/services/relationships/strategies.ts +132 -0
  128. package/src/services/relationships/types.ts +370 -0
  129. package/src/services/search.service.ts +761 -0
  130. package/src/services/search.types.ts +220 -0
  131. package/src/services/secrets.service.ts +384 -0
  132. package/src/services/vectorstore/base.ts +327 -0
  133. package/src/services/vectorstore/index.ts +444 -0
  134. package/src/services/vectorstore/memory.ts +286 -0
  135. package/src/services/vectorstore/migration.ts +295 -0
  136. package/src/services/vectorstore/mock.ts +403 -0
  137. package/src/services/vectorstore/pgvector.ts +695 -0
  138. package/src/services/vectorstore/types.ts +247 -0
  139. package/src/startup.ts +389 -0
  140. package/src/types/api.types.ts +193 -0
  141. package/src/types/document.types.ts +103 -0
  142. package/src/types/index.ts +241 -0
  143. package/src/types/profile.base.ts +133 -0
  144. package/src/utils/errors.ts +447 -0
  145. package/src/utils/id.ts +15 -0
  146. package/src/utils/index.ts +101 -0
  147. package/src/utils/logger.ts +313 -0
  148. package/src/utils/sanitization.ts +501 -0
  149. package/src/utils/secret-validation.ts +273 -0
  150. package/src/utils/synonyms.ts +188 -0
  151. package/src/utils/validation.ts +581 -0
  152. package/src/workers/chunking.worker.ts +242 -0
  153. package/src/workers/embedding.worker.ts +358 -0
  154. package/src/workers/extraction.worker.ts +346 -0
  155. package/src/workers/indexing.worker.ts +505 -0
  156. package/tsconfig.json +38 -0
@@ -0,0 +1,505 @@
1
+ /**
2
+ * Indexing Worker
3
+ *
4
+ * Processes memories with embeddings, detects duplicates via similarity_hash,
5
+ * detects relationships using EmbeddingRelationshipDetector, and updates
6
+ * database status.
7
+ *
8
+ * Flow:
9
+ * 1. Receive embeddings from embedding queue
10
+ * 2. Check for duplicates using similarity_hash
11
+ * 3. Insert memories into memories table
12
+ * 4. Link embeddings via memory_embeddings table
13
+ * 5. Detect relationships using EmbeddingRelationshipDetector
14
+ * 6. Insert relationships into memory_relationships table
15
+ * 7. Update documents.status = 'processed'
16
+ * 8. Mark processing_queue job as 'completed'
17
+ */
18
+
19
+ import { and, eq, inArray, notInArray } from 'drizzle-orm'
20
+ import { documents } from '../db/schema/documents.schema.js'
21
+ import { memories } from '../db/schema/memories.schema.js'
22
+ import { memoryEmbeddings } from '../db/schema/embeddings.schema.js'
23
+ import { processingQueue } from '../db/schema/queue.schema.js'
24
+ import { memoryRelationships } from '../db/schema/relationships.schema.js'
25
+ import { getLogger } from '../utils/logger.js'
26
+ import { AppError, ErrorCode, DatabaseError } from '../utils/errors.js'
27
+ import { generateId } from '../utils/id.js'
28
+ import { EmbeddingRelationshipDetector, InMemoryVectorStoreAdapter } from '../services/relationships/detector.js'
29
+ import type { EmbeddingService } from '../services/embedding.service.js'
30
+ import { createHash } from 'node:crypto'
31
+ import { workerDb as db, type WorkerTransaction as DbTransaction } from '../db/worker-connection.js'
32
+ import type { MemoryType } from '../types/index.js'
33
+
34
+ const logger = getLogger('IndexingWorker')
35
+
36
+ // ============================================================================
37
+ // Type Utilities
38
+ // ============================================================================
39
+
40
+ /**
41
+ * Database allows: fact, preference, episode, belief, skill, context
42
+ * Vector store type (MemoryType from types/index.ts) allows: fact, event, preference, skill, relationship, context, note
43
+ *
44
+ * This function maps database types to vector store types for the relationship detector
45
+ */
46
+ function mapToVectorStoreType(dbType: string): MemoryType {
47
+ // Map database types to vector store types
48
+ const mapping: Record<string, MemoryType> = {
49
+ fact: 'fact',
50
+ preference: 'preference',
51
+ episode: 'event', // Map episode to event
52
+ belief: 'fact', // Map belief to fact
53
+ skill: 'skill',
54
+ context: 'context',
55
+ }
56
+
57
+ return mapping[dbType] ?? 'note'
58
+ }
59
+
60
+ // ============================================================================
61
+ // Types
62
+ // ============================================================================
63
+
64
+ export interface IndexingJobData {
65
+ /** ID of the document being indexed */
66
+ documentId: string
67
+ /** Container tag for the document */
68
+ containerTag: string
69
+ /** Processing queue job ID */
70
+ queueJobId: string
71
+ /** Memories with their content and embeddings */
72
+ memories: Array<{
73
+ content: string
74
+ embedding: number[]
75
+ memoryType?: 'fact' | 'preference' | 'episode' | 'belief' | 'skill' | 'context' | 'note' | 'event' | 'relationship'
76
+ confidenceScore?: number
77
+ metadata?: Record<string, unknown>
78
+ }>
79
+ }
80
+
81
+ export interface IndexingJobResult {
82
+ /** Number of memories indexed (after duplicate detection) */
83
+ memoriesIndexed: number
84
+ /** Number of duplicates skipped */
85
+ duplicatesSkipped: number
86
+ /** Number of relationships detected */
87
+ relationshipsDetected: number
88
+ /** IDs of indexed memories */
89
+ memoryIds: string[]
90
+ /** Processing time in milliseconds */
91
+ processingTimeMs: number
92
+ }
93
+
94
+ export interface IndexingWorkerConfig {
95
+ /** Embedding service for relationship detection */
96
+ embeddingService: EmbeddingService
97
+ /** Enable relationship detection (default: true) */
98
+ enableRelationshipDetection?: boolean
99
+ /** Skip duplicates or merge (default: skip) */
100
+ duplicateStrategy?: 'skip' | 'merge'
101
+ /** Batch size for relationship detection */
102
+ relationshipBatchSize?: number
103
+ }
104
+
105
+ // ============================================================================
106
+ // Indexing Worker
107
+ // ============================================================================
108
+
109
+ export class IndexingWorker {
110
+ private readonly embeddingService: EmbeddingService
111
+ private readonly enableRelationshipDetection: boolean
112
+ private readonly duplicateStrategy: 'skip' | 'merge'
113
+ private readonly relationshipBatchSize: number
114
+ private readonly vectorStore: InMemoryVectorStoreAdapter
115
+ private readonly relationshipDetector: EmbeddingRelationshipDetector
116
+
117
+ constructor(config: IndexingWorkerConfig) {
118
+ this.embeddingService = config.embeddingService
119
+ this.enableRelationshipDetection = config.enableRelationshipDetection ?? true
120
+ this.duplicateStrategy = config.duplicateStrategy ?? 'skip'
121
+ this.relationshipBatchSize = config.relationshipBatchSize ?? 50
122
+
123
+ // Initialize vector store for relationship detection
124
+ this.vectorStore = new InMemoryVectorStoreAdapter()
125
+ this.relationshipDetector = new EmbeddingRelationshipDetector(this.embeddingService, this.vectorStore, {
126
+ maxCandidates: 20,
127
+ batchSize: this.relationshipBatchSize,
128
+ enableContradictionDetection: true,
129
+ enableLLMVerification: false, // Disable for performance in worker
130
+ })
131
+
132
+ logger.info('IndexingWorker initialized', {
133
+ enableRelationshipDetection: this.enableRelationshipDetection,
134
+ duplicateStrategy: this.duplicateStrategy,
135
+ relationshipBatchSize: this.relationshipBatchSize,
136
+ })
137
+ }
138
+
139
+ /**
140
+ * Process an indexing job
141
+ */
142
+ async processJob(jobData: IndexingJobData): Promise<IndexingJobResult> {
143
+ const startTime = Date.now()
144
+ const result: IndexingJobResult = {
145
+ memoriesIndexed: 0,
146
+ duplicatesSkipped: 0,
147
+ relationshipsDetected: 0,
148
+ memoryIds: [],
149
+ processingTimeMs: 0,
150
+ }
151
+
152
+ try {
153
+ logger.info('Processing indexing job', {
154
+ documentId: jobData.documentId,
155
+ memoryCount: jobData.memories.length,
156
+ containerTag: jobData.containerTag,
157
+ })
158
+
159
+ // Validate document exists
160
+ const document = await db.query.documents.findFirst({
161
+ where: eq(documents.id, jobData.documentId),
162
+ })
163
+
164
+ if (!document) {
165
+ throw new DatabaseError(`Document not found: ${jobData.documentId}`, 'findDocument')
166
+ }
167
+
168
+ // Start transaction for atomicity
169
+ await db.transaction(async (tx) => {
170
+ // Step 1: Process each memory (duplicate detection + insertion)
171
+ for (const memoryData of jobData.memories) {
172
+ const similarityHash = this.generateSimilarityHash(memoryData.content)
173
+
174
+ // Check for duplicates
175
+ const existingMemory = await tx.query.memories.findFirst({
176
+ where: eq(memories.similarityHash, similarityHash),
177
+ })
178
+
179
+ if (existingMemory) {
180
+ logger.debug('Duplicate memory detected', {
181
+ similarityHash,
182
+ existingMemoryId: existingMemory.id,
183
+ })
184
+ result.duplicatesSkipped++
185
+
186
+ if (this.duplicateStrategy === 'skip') {
187
+ continue
188
+ }
189
+ // If merge strategy, we would update the existing memory here
190
+ // For now, we skip to keep it simple
191
+ continue
192
+ }
193
+
194
+ // Insert memory
195
+ const memoryId = generateId()
196
+ await tx.insert(memories).values({
197
+ id: memoryId,
198
+ documentId: jobData.documentId,
199
+ content: memoryData.content,
200
+ memoryType: memoryData.memoryType ?? 'fact',
201
+ similarityHash,
202
+ containerTag: jobData.containerTag,
203
+ confidenceScore: memoryData.confidenceScore?.toString() ?? '1.000',
204
+ metadata: memoryData.metadata ?? {},
205
+ isLatest: true,
206
+ version: 1,
207
+ })
208
+
209
+ // Insert embedding
210
+ await tx.insert(memoryEmbeddings).values({
211
+ memoryId,
212
+ embedding: memoryData.embedding,
213
+ model: 'text-embedding-3-small',
214
+ normalized: true,
215
+ })
216
+
217
+ result.memoryIds.push(memoryId)
218
+ result.memoriesIndexed++
219
+
220
+ logger.debug('Memory indexed', { memoryId, similarityHash })
221
+ }
222
+
223
+ // Step 2: Detect relationships if enabled
224
+ if (this.enableRelationshipDetection && result.memoriesIndexed > 0) {
225
+ const relationshipCount = await this.detectAndStoreRelationships(tx, result.memoryIds, jobData.containerTag)
226
+ result.relationshipsDetected = relationshipCount
227
+ }
228
+
229
+ // Step 3: Update document status
230
+ await tx
231
+ .update(documents)
232
+ .set({
233
+ status: 'processed',
234
+ updatedAt: new Date(),
235
+ })
236
+ .where(eq(documents.id, jobData.documentId))
237
+
238
+ // Step 4: Mark processing queue job as completed
239
+ await tx
240
+ .update(processingQueue)
241
+ .set({
242
+ status: 'completed',
243
+ completedAt: new Date(),
244
+ })
245
+ .where(eq(processingQueue.id, jobData.queueJobId))
246
+
247
+ logger.info('Transaction committed successfully', {
248
+ documentId: jobData.documentId,
249
+ memoriesIndexed: result.memoriesIndexed,
250
+ duplicatesSkipped: result.duplicatesSkipped,
251
+ relationshipsDetected: result.relationshipsDetected,
252
+ })
253
+ })
254
+
255
+ result.processingTimeMs = Date.now() - startTime
256
+
257
+ logger.info('Indexing job completed', {
258
+ documentId: jobData.documentId,
259
+ result,
260
+ })
261
+
262
+ return result
263
+ } catch (error) {
264
+ logger.errorWithException('Indexing job failed', error, {
265
+ documentId: jobData.documentId,
266
+ queueJobId: jobData.queueJobId,
267
+ })
268
+
269
+ // Update processing queue to failed status
270
+ try {
271
+ await db
272
+ .update(processingQueue)
273
+ .set({
274
+ status: 'failed',
275
+ error: error instanceof Error ? error.message : 'Unknown error',
276
+ errorCode: error instanceof AppError ? error.code : ErrorCode.INTERNAL_ERROR,
277
+ completedAt: new Date(),
278
+ })
279
+ .where(eq(processingQueue.id, jobData.queueJobId))
280
+ } catch (updateError) {
281
+ logger.errorWithException('Failed to update queue status to failed', updateError)
282
+ }
283
+
284
+ throw AppError.from(error, ErrorCode.DATABASE_ERROR)
285
+ }
286
+ }
287
+
288
+ /**
289
+ * Detect relationships between memories and store in database
290
+ */
291
+ private async detectAndStoreRelationships(
292
+ tx: DbTransaction,
293
+ memoryIds: string[],
294
+ containerTag: string
295
+ ): Promise<number> {
296
+ try {
297
+ // Load memories with embeddings
298
+ const memoryRowsRaw = await tx
299
+ .select({ memory: memories, embedding: memoryEmbeddings })
300
+ .from(memories)
301
+ .leftJoin(memoryEmbeddings, eq(memoryEmbeddings.memoryId, memories.id))
302
+ .where(inArray(memories.id, memoryIds))
303
+
304
+ // Filter memories to those with valid embeddings
305
+ const memoryRows = memoryRowsRaw
306
+ .map(({ memory, embedding }) => ({
307
+ ...memory,
308
+ embedding: embedding ? { embedding: embedding.embedding } : null,
309
+ }))
310
+ .filter((m) => {
311
+ const emb = m.embedding as { embedding: number[] | null } | null
312
+ return (
313
+ emb !== null &&
314
+ emb.embedding !== null &&
315
+ Array.isArray(emb.embedding) &&
316
+ m.containerTag !== null &&
317
+ m.confidenceScore !== null
318
+ )
319
+ })
320
+
321
+ if (memoryRows.length === 0) {
322
+ return 0
323
+ }
324
+
325
+ // Load existing memories from the same container for relationship detection
326
+ const existingMemoryRowsRaw = await tx
327
+ .select({ memory: memories, embedding: memoryEmbeddings })
328
+ .from(memories)
329
+ .leftJoin(memoryEmbeddings, eq(memoryEmbeddings.memoryId, memories.id))
330
+ .where(and(eq(memories.containerTag, containerTag), notInArray(memories.id, memoryIds)))
331
+ .limit(1000) // Limit to prevent memory issues
332
+
333
+ // Filter existing memories to those with valid embeddings
334
+ const existingMemoryRows = existingMemoryRowsRaw
335
+ .map(({ memory, embedding }) => ({
336
+ ...memory,
337
+ embedding: embedding ? { embedding: embedding.embedding } : null,
338
+ }))
339
+ .filter((m) => {
340
+ const emb = m.embedding as { embedding: number[] | null } | null
341
+ return (
342
+ emb !== null &&
343
+ emb.embedding !== null &&
344
+ Array.isArray(emb.embedding) &&
345
+ m.containerTag !== null &&
346
+ m.confidenceScore !== null
347
+ )
348
+ })
349
+
350
+ // Add existing memories to vector store
351
+ for (const memory of existingMemoryRows) {
352
+ // Type assertion: We've already filtered for non-null embeddings
353
+ const embedding = (memory.embedding as { embedding: number[] }).embedding
354
+ this.vectorStore.addMemory(
355
+ {
356
+ id: memory.id,
357
+ content: memory.content,
358
+ type: mapToVectorStoreType(memory.memoryType),
359
+ relationships: [],
360
+ isLatest: memory.isLatest,
361
+ containerTag: memory.containerTag!,
362
+ createdAt: memory.createdAt,
363
+ updatedAt: memory.updatedAt,
364
+ confidence: parseFloat(memory.confidenceScore!),
365
+ metadata: {
366
+ ...(memory.metadata as Record<string, unknown>),
367
+ confidence: parseFloat(memory.confidenceScore!),
368
+ originalDbType: memory.memoryType, // Preserve original type
369
+ },
370
+ },
371
+ embedding
372
+ )
373
+ }
374
+
375
+ let totalRelationships = 0
376
+
377
+ // Detect relationships for each new memory (already filtered to have embeddings)
378
+ for (const memory of memoryRows) {
379
+ // Type assertion: We've already filtered for non-null embeddings
380
+ const embedding = (memory.embedding as { embedding: number[] }).embedding
381
+
382
+ const detectionResult = await this.relationshipDetector.detectRelationships(
383
+ {
384
+ id: memory.id,
385
+ content: memory.content,
386
+ type: mapToVectorStoreType(memory.memoryType),
387
+ relationships: [],
388
+ isLatest: memory.isLatest,
389
+ containerTag: memory.containerTag!,
390
+ createdAt: memory.createdAt,
391
+ updatedAt: memory.updatedAt,
392
+ confidence: parseFloat(memory.confidenceScore!),
393
+ embedding,
394
+ metadata: {
395
+ ...(memory.metadata as Record<string, unknown>),
396
+ confidence: parseFloat(memory.confidenceScore!),
397
+ originalDbType: memory.memoryType, // Preserve original type
398
+ },
399
+ },
400
+ { containerTag }
401
+ )
402
+
403
+ // Insert detected relationships
404
+ for (const rel of detectionResult.relationships) {
405
+ await tx.insert(memoryRelationships).values({
406
+ sourceMemoryId: rel.relationship.sourceMemoryId,
407
+ targetMemoryId: rel.relationship.targetMemoryId,
408
+ relationshipType: rel.relationship.type,
409
+ weight: rel.score.toString(),
410
+ bidirectional: false,
411
+ metadata: {
412
+ vectorSimilarity: rel.score,
413
+ detectedAt: new Date().toISOString(),
414
+ llmVerified: rel.llmVerified ?? false,
415
+ },
416
+ })
417
+ totalRelationships++
418
+ }
419
+
420
+ // Add newly indexed memory to vector store for subsequent detections
421
+ this.vectorStore.addMemory(
422
+ {
423
+ id: memory.id,
424
+ content: memory.content,
425
+ type: mapToVectorStoreType(memory.memoryType),
426
+ relationships: [],
427
+ isLatest: memory.isLatest,
428
+ containerTag: memory.containerTag!,
429
+ createdAt: memory.createdAt,
430
+ updatedAt: memory.updatedAt,
431
+ confidence: parseFloat(memory.confidenceScore!),
432
+ metadata: {
433
+ ...(memory.metadata as Record<string, unknown>),
434
+ confidence: parseFloat(memory.confidenceScore!),
435
+ originalDbType: memory.memoryType, // Preserve original type
436
+ },
437
+ },
438
+ embedding
439
+ )
440
+ }
441
+
442
+ logger.info('Relationships detected and stored', {
443
+ newMemoriesCount: memoryRows.length,
444
+ existingMemoriesCount: existingMemoryRows.length,
445
+ relationshipsDetected: totalRelationships,
446
+ })
447
+
448
+ return totalRelationships
449
+ } catch (error) {
450
+ logger.errorWithException('Relationship detection failed', error)
451
+ // Don't fail the job for relationship detection errors
452
+ return 0
453
+ }
454
+ }
455
+
456
+ /**
457
+ * Generate similarity hash for duplicate detection
458
+ * Uses content normalization + SHA256
459
+ */
460
+ private generateSimilarityHash(content: string): string {
461
+ // Normalize content: lowercase, remove extra whitespace, trim
462
+ const normalized = content.toLowerCase().replace(/\s+/g, ' ').trim()
463
+
464
+ // Generate SHA256 hash
465
+ return createHash('sha256').update(normalized).digest('hex')
466
+ }
467
+
468
+ /**
469
+ * Health check for the worker
470
+ */
471
+ async healthCheck(): Promise<{
472
+ healthy: boolean
473
+ dbConnected: boolean
474
+ embeddingServiceReady: boolean
475
+ }> {
476
+ try {
477
+ // Test database connection
478
+ await db.query.documents.findFirst()
479
+
480
+ return {
481
+ healthy: true,
482
+ dbConnected: true,
483
+ embeddingServiceReady: !!this.embeddingService,
484
+ }
485
+ } catch (error) {
486
+ logger.errorWithException('Health check failed', error)
487
+ return {
488
+ healthy: false,
489
+ dbConnected: false,
490
+ embeddingServiceReady: false,
491
+ }
492
+ }
493
+ }
494
+ }
495
+
496
+ // ============================================================================
497
+ // Factory Function
498
+ // ============================================================================
499
+
500
+ /**
501
+ * Create an indexing worker instance
502
+ */
503
+ export function createIndexingWorker(config: IndexingWorkerConfig): IndexingWorker {
504
+ return new IndexingWorker(config)
505
+ }
package/tsconfig.json ADDED
@@ -0,0 +1,38 @@
1
+ {
2
+ "compilerOptions": {
3
+ "target": "ES2022",
4
+ "module": "NodeNext",
5
+ "moduleResolution": "NodeNext",
6
+ "lib": ["ES2022"],
7
+ "outDir": "./dist",
8
+ "rootDir": "./src",
9
+ "strict": true,
10
+ "esModuleInterop": true,
11
+ "skipLibCheck": true,
12
+ "forceConsistentCasingInFileNames": true,
13
+ "resolveJsonModule": true,
14
+ "declaration": true,
15
+ "declarationMap": true,
16
+ "sourceMap": true,
17
+ "noImplicitAny": true,
18
+ "strictNullChecks": true,
19
+ "strictFunctionTypes": true,
20
+ "noImplicitReturns": true,
21
+ "noFallthroughCasesInSwitch": true,
22
+ "noUncheckedIndexedAccess": true,
23
+ "noImplicitOverride": true,
24
+ "allowUnusedLabels": false,
25
+ "allowUnreachableCode": false,
26
+ "allowSyntheticDefaultImports": true,
27
+ "downlevelIteration": true,
28
+ "exactOptionalPropertyTypes": false,
29
+ "noPropertyAccessFromIndexSignature": false,
30
+ "paths": {
31
+ "@/*": ["./src/*"],
32
+ "@tests/*": ["./tests/*"]
33
+ },
34
+ "baseUrl": "."
35
+ },
36
+ "include": ["src/**/*"],
37
+ "exclude": ["node_modules", "dist", "tests"]
38
+ }