@twelvehart/supermemory-runtime 1.0.0-next.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +57 -0
- package/README.md +374 -0
- package/dist/index.js +189 -0
- package/dist/mcp/index.js +1132 -0
- package/docker-compose.prod.yml +91 -0
- package/docker-compose.yml +358 -0
- package/drizzle/0000_dapper_the_professor.sql +159 -0
- package/drizzle/0001_api_keys.sql +51 -0
- package/drizzle/meta/0000_snapshot.json +1532 -0
- package/drizzle/meta/_journal.json +13 -0
- package/drizzle.config.ts +20 -0
- package/package.json +114 -0
- package/scripts/add-extraction-job.ts +122 -0
- package/scripts/benchmark-pgvector.ts +122 -0
- package/scripts/bootstrap.sh +209 -0
- package/scripts/check-runtime-pack.ts +111 -0
- package/scripts/claude-mcp-config.ts +336 -0
- package/scripts/docker-entrypoint.sh +183 -0
- package/scripts/doctor.ts +377 -0
- package/scripts/init-db.sql +33 -0
- package/scripts/install.sh +1110 -0
- package/scripts/mcp-setup.ts +271 -0
- package/scripts/migrations/001_create_pgvector_extension.sql +31 -0
- package/scripts/migrations/002_create_memory_embeddings_table.sql +75 -0
- package/scripts/migrations/003_create_hnsw_index.sql +94 -0
- package/scripts/migrations/004_create_memory_embeddings_standalone.sql +70 -0
- package/scripts/migrations/005_create_chunks_table.sql +95 -0
- package/scripts/migrations/006_create_processing_queue.sql +45 -0
- package/scripts/migrations/generate_test_data.sql +42 -0
- package/scripts/migrations/phase1_comprehensive_test.sql +204 -0
- package/scripts/migrations/run_migrations.sh +286 -0
- package/scripts/migrations/test_hnsw_index.sql +255 -0
- package/scripts/pre-commit-secrets +282 -0
- package/scripts/run-extraction-worker.ts +46 -0
- package/scripts/run-phase1-tests.sh +291 -0
- package/scripts/setup.ts +222 -0
- package/scripts/smoke-install.sh +12 -0
- package/scripts/test-health-endpoint.sh +328 -0
- package/src/api/index.ts +2 -0
- package/src/api/middleware/auth.ts +80 -0
- package/src/api/middleware/csrf.ts +308 -0
- package/src/api/middleware/errorHandler.ts +166 -0
- package/src/api/middleware/rateLimit.ts +360 -0
- package/src/api/middleware/validation.ts +514 -0
- package/src/api/routes/documents.ts +286 -0
- package/src/api/routes/profiles.ts +237 -0
- package/src/api/routes/search.ts +71 -0
- package/src/api/stores/index.ts +58 -0
- package/src/config/bootstrap-env.ts +3 -0
- package/src/config/env.ts +71 -0
- package/src/config/feature-flags.ts +25 -0
- package/src/config/index.ts +140 -0
- package/src/config/secrets.config.ts +291 -0
- package/src/db/client.ts +92 -0
- package/src/db/index.ts +73 -0
- package/src/db/postgres.ts +72 -0
- package/src/db/schema/chunks.schema.ts +31 -0
- package/src/db/schema/containers.schema.ts +46 -0
- package/src/db/schema/documents.schema.ts +49 -0
- package/src/db/schema/embeddings.schema.ts +32 -0
- package/src/db/schema/index.ts +11 -0
- package/src/db/schema/memories.schema.ts +72 -0
- package/src/db/schema/profiles.schema.ts +34 -0
- package/src/db/schema/queue.schema.ts +59 -0
- package/src/db/schema/relationships.schema.ts +42 -0
- package/src/db/schema.ts +223 -0
- package/src/db/worker-connection.ts +47 -0
- package/src/index.ts +235 -0
- package/src/mcp/CLAUDE.md +1 -0
- package/src/mcp/index.ts +1380 -0
- package/src/mcp/legacyState.ts +22 -0
- package/src/mcp/rateLimit.ts +358 -0
- package/src/mcp/resources.ts +309 -0
- package/src/mcp/results.ts +104 -0
- package/src/mcp/tools.ts +401 -0
- package/src/queues/config.ts +119 -0
- package/src/queues/index.ts +289 -0
- package/src/sdk/client.ts +225 -0
- package/src/sdk/errors.ts +266 -0
- package/src/sdk/http.ts +560 -0
- package/src/sdk/index.ts +244 -0
- package/src/sdk/resources/base.ts +65 -0
- package/src/sdk/resources/connections.ts +204 -0
- package/src/sdk/resources/documents.ts +163 -0
- package/src/sdk/resources/index.ts +10 -0
- package/src/sdk/resources/memories.ts +150 -0
- package/src/sdk/resources/search.ts +60 -0
- package/src/sdk/resources/settings.ts +36 -0
- package/src/sdk/types.ts +674 -0
- package/src/services/chunking/index.ts +451 -0
- package/src/services/chunking.service.ts +650 -0
- package/src/services/csrf.service.ts +252 -0
- package/src/services/documents.repository.ts +219 -0
- package/src/services/documents.service.ts +191 -0
- package/src/services/embedding.service.ts +404 -0
- package/src/services/extraction.service.ts +300 -0
- package/src/services/extractors/code.extractor.ts +451 -0
- package/src/services/extractors/index.ts +9 -0
- package/src/services/extractors/markdown.extractor.ts +461 -0
- package/src/services/extractors/pdf.extractor.ts +315 -0
- package/src/services/extractors/text.extractor.ts +118 -0
- package/src/services/extractors/url.extractor.ts +243 -0
- package/src/services/index.ts +235 -0
- package/src/services/ingestion.service.ts +177 -0
- package/src/services/llm/anthropic.ts +400 -0
- package/src/services/llm/base.ts +460 -0
- package/src/services/llm/contradiction-detector.service.ts +526 -0
- package/src/services/llm/heuristics.ts +148 -0
- package/src/services/llm/index.ts +309 -0
- package/src/services/llm/memory-classifier.service.ts +383 -0
- package/src/services/llm/memory-extension-detector.service.ts +523 -0
- package/src/services/llm/mock.ts +470 -0
- package/src/services/llm/openai.ts +398 -0
- package/src/services/llm/prompts.ts +438 -0
- package/src/services/llm/types.ts +373 -0
- package/src/services/memory.repository.ts +1769 -0
- package/src/services/memory.service.ts +1338 -0
- package/src/services/memory.types.ts +234 -0
- package/src/services/persistence/index.ts +295 -0
- package/src/services/pipeline.service.ts +509 -0
- package/src/services/profile.repository.ts +436 -0
- package/src/services/profile.service.ts +560 -0
- package/src/services/profile.types.ts +270 -0
- package/src/services/relationships/detector.ts +1128 -0
- package/src/services/relationships/index.ts +268 -0
- package/src/services/relationships/memory-integration.ts +459 -0
- package/src/services/relationships/strategies.ts +132 -0
- package/src/services/relationships/types.ts +370 -0
- package/src/services/search.service.ts +761 -0
- package/src/services/search.types.ts +220 -0
- package/src/services/secrets.service.ts +384 -0
- package/src/services/vectorstore/base.ts +327 -0
- package/src/services/vectorstore/index.ts +444 -0
- package/src/services/vectorstore/memory.ts +286 -0
- package/src/services/vectorstore/migration.ts +295 -0
- package/src/services/vectorstore/mock.ts +403 -0
- package/src/services/vectorstore/pgvector.ts +695 -0
- package/src/services/vectorstore/types.ts +247 -0
- package/src/startup.ts +389 -0
- package/src/types/api.types.ts +193 -0
- package/src/types/document.types.ts +103 -0
- package/src/types/index.ts +241 -0
- package/src/types/profile.base.ts +133 -0
- package/src/utils/errors.ts +447 -0
- package/src/utils/id.ts +15 -0
- package/src/utils/index.ts +101 -0
- package/src/utils/logger.ts +313 -0
- package/src/utils/sanitization.ts +501 -0
- package/src/utils/secret-validation.ts +273 -0
- package/src/utils/synonyms.ts +188 -0
- package/src/utils/validation.ts +581 -0
- package/src/workers/chunking.worker.ts +242 -0
- package/src/workers/embedding.worker.ts +358 -0
- package/src/workers/extraction.worker.ts +346 -0
- package/src/workers/indexing.worker.ts +505 -0
- package/tsconfig.json +38 -0
|
@@ -0,0 +1,505 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Indexing Worker
|
|
3
|
+
*
|
|
4
|
+
* Processes memories with embeddings, detects duplicates via similarity_hash,
|
|
5
|
+
* detects relationships using EmbeddingRelationshipDetector, and updates
|
|
6
|
+
* database status.
|
|
7
|
+
*
|
|
8
|
+
* Flow:
|
|
9
|
+
* 1. Receive embeddings from embedding queue
|
|
10
|
+
* 2. Check for duplicates using similarity_hash
|
|
11
|
+
* 3. Insert memories into memories table
|
|
12
|
+
* 4. Link embeddings via memory_embeddings table
|
|
13
|
+
* 5. Detect relationships using EmbeddingRelationshipDetector
|
|
14
|
+
* 6. Insert relationships into memory_relationships table
|
|
15
|
+
* 7. Update documents.status = 'processed'
|
|
16
|
+
* 8. Mark processing_queue job as 'completed'
|
|
17
|
+
*/
|
|
18
|
+
|
|
19
|
+
import { and, eq, inArray, notInArray } from 'drizzle-orm'
|
|
20
|
+
import { documents } from '../db/schema/documents.schema.js'
|
|
21
|
+
import { memories } from '../db/schema/memories.schema.js'
|
|
22
|
+
import { memoryEmbeddings } from '../db/schema/embeddings.schema.js'
|
|
23
|
+
import { processingQueue } from '../db/schema/queue.schema.js'
|
|
24
|
+
import { memoryRelationships } from '../db/schema/relationships.schema.js'
|
|
25
|
+
import { getLogger } from '../utils/logger.js'
|
|
26
|
+
import { AppError, ErrorCode, DatabaseError } from '../utils/errors.js'
|
|
27
|
+
import { generateId } from '../utils/id.js'
|
|
28
|
+
import { EmbeddingRelationshipDetector, InMemoryVectorStoreAdapter } from '../services/relationships/detector.js'
|
|
29
|
+
import type { EmbeddingService } from '../services/embedding.service.js'
|
|
30
|
+
import { createHash } from 'node:crypto'
|
|
31
|
+
import { workerDb as db, type WorkerTransaction as DbTransaction } from '../db/worker-connection.js'
|
|
32
|
+
import type { MemoryType } from '../types/index.js'
|
|
33
|
+
|
|
34
|
+
const logger = getLogger('IndexingWorker')
|
|
35
|
+
|
|
36
|
+
// ============================================================================
|
|
37
|
+
// Type Utilities
|
|
38
|
+
// ============================================================================
|
|
39
|
+
|
|
40
|
+
/**
|
|
41
|
+
* Database allows: fact, preference, episode, belief, skill, context
|
|
42
|
+
* Vector store type (MemoryType from types/index.ts) allows: fact, event, preference, skill, relationship, context, note
|
|
43
|
+
*
|
|
44
|
+
* This function maps database types to vector store types for the relationship detector
|
|
45
|
+
*/
|
|
46
|
+
function mapToVectorStoreType(dbType: string): MemoryType {
|
|
47
|
+
// Map database types to vector store types
|
|
48
|
+
const mapping: Record<string, MemoryType> = {
|
|
49
|
+
fact: 'fact',
|
|
50
|
+
preference: 'preference',
|
|
51
|
+
episode: 'event', // Map episode to event
|
|
52
|
+
belief: 'fact', // Map belief to fact
|
|
53
|
+
skill: 'skill',
|
|
54
|
+
context: 'context',
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
return mapping[dbType] ?? 'note'
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
// ============================================================================
|
|
61
|
+
// Types
|
|
62
|
+
// ============================================================================
|
|
63
|
+
|
|
64
|
+
export interface IndexingJobData {
|
|
65
|
+
/** ID of the document being indexed */
|
|
66
|
+
documentId: string
|
|
67
|
+
/** Container tag for the document */
|
|
68
|
+
containerTag: string
|
|
69
|
+
/** Processing queue job ID */
|
|
70
|
+
queueJobId: string
|
|
71
|
+
/** Memories with their content and embeddings */
|
|
72
|
+
memories: Array<{
|
|
73
|
+
content: string
|
|
74
|
+
embedding: number[]
|
|
75
|
+
memoryType?: 'fact' | 'preference' | 'episode' | 'belief' | 'skill' | 'context' | 'note' | 'event' | 'relationship'
|
|
76
|
+
confidenceScore?: number
|
|
77
|
+
metadata?: Record<string, unknown>
|
|
78
|
+
}>
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
export interface IndexingJobResult {
|
|
82
|
+
/** Number of memories indexed (after duplicate detection) */
|
|
83
|
+
memoriesIndexed: number
|
|
84
|
+
/** Number of duplicates skipped */
|
|
85
|
+
duplicatesSkipped: number
|
|
86
|
+
/** Number of relationships detected */
|
|
87
|
+
relationshipsDetected: number
|
|
88
|
+
/** IDs of indexed memories */
|
|
89
|
+
memoryIds: string[]
|
|
90
|
+
/** Processing time in milliseconds */
|
|
91
|
+
processingTimeMs: number
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
export interface IndexingWorkerConfig {
|
|
95
|
+
/** Embedding service for relationship detection */
|
|
96
|
+
embeddingService: EmbeddingService
|
|
97
|
+
/** Enable relationship detection (default: true) */
|
|
98
|
+
enableRelationshipDetection?: boolean
|
|
99
|
+
/** Skip duplicates or merge (default: skip) */
|
|
100
|
+
duplicateStrategy?: 'skip' | 'merge'
|
|
101
|
+
/** Batch size for relationship detection */
|
|
102
|
+
relationshipBatchSize?: number
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
// ============================================================================
|
|
106
|
+
// Indexing Worker
|
|
107
|
+
// ============================================================================
|
|
108
|
+
|
|
109
|
+
export class IndexingWorker {
|
|
110
|
+
private readonly embeddingService: EmbeddingService
|
|
111
|
+
private readonly enableRelationshipDetection: boolean
|
|
112
|
+
private readonly duplicateStrategy: 'skip' | 'merge'
|
|
113
|
+
private readonly relationshipBatchSize: number
|
|
114
|
+
private readonly vectorStore: InMemoryVectorStoreAdapter
|
|
115
|
+
private readonly relationshipDetector: EmbeddingRelationshipDetector
|
|
116
|
+
|
|
117
|
+
constructor(config: IndexingWorkerConfig) {
|
|
118
|
+
this.embeddingService = config.embeddingService
|
|
119
|
+
this.enableRelationshipDetection = config.enableRelationshipDetection ?? true
|
|
120
|
+
this.duplicateStrategy = config.duplicateStrategy ?? 'skip'
|
|
121
|
+
this.relationshipBatchSize = config.relationshipBatchSize ?? 50
|
|
122
|
+
|
|
123
|
+
// Initialize vector store for relationship detection
|
|
124
|
+
this.vectorStore = new InMemoryVectorStoreAdapter()
|
|
125
|
+
this.relationshipDetector = new EmbeddingRelationshipDetector(this.embeddingService, this.vectorStore, {
|
|
126
|
+
maxCandidates: 20,
|
|
127
|
+
batchSize: this.relationshipBatchSize,
|
|
128
|
+
enableContradictionDetection: true,
|
|
129
|
+
enableLLMVerification: false, // Disable for performance in worker
|
|
130
|
+
})
|
|
131
|
+
|
|
132
|
+
logger.info('IndexingWorker initialized', {
|
|
133
|
+
enableRelationshipDetection: this.enableRelationshipDetection,
|
|
134
|
+
duplicateStrategy: this.duplicateStrategy,
|
|
135
|
+
relationshipBatchSize: this.relationshipBatchSize,
|
|
136
|
+
})
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
/**
|
|
140
|
+
* Process an indexing job
|
|
141
|
+
*/
|
|
142
|
+
async processJob(jobData: IndexingJobData): Promise<IndexingJobResult> {
|
|
143
|
+
const startTime = Date.now()
|
|
144
|
+
const result: IndexingJobResult = {
|
|
145
|
+
memoriesIndexed: 0,
|
|
146
|
+
duplicatesSkipped: 0,
|
|
147
|
+
relationshipsDetected: 0,
|
|
148
|
+
memoryIds: [],
|
|
149
|
+
processingTimeMs: 0,
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
try {
|
|
153
|
+
logger.info('Processing indexing job', {
|
|
154
|
+
documentId: jobData.documentId,
|
|
155
|
+
memoryCount: jobData.memories.length,
|
|
156
|
+
containerTag: jobData.containerTag,
|
|
157
|
+
})
|
|
158
|
+
|
|
159
|
+
// Validate document exists
|
|
160
|
+
const document = await db.query.documents.findFirst({
|
|
161
|
+
where: eq(documents.id, jobData.documentId),
|
|
162
|
+
})
|
|
163
|
+
|
|
164
|
+
if (!document) {
|
|
165
|
+
throw new DatabaseError(`Document not found: ${jobData.documentId}`, 'findDocument')
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
// Start transaction for atomicity
|
|
169
|
+
await db.transaction(async (tx) => {
|
|
170
|
+
// Step 1: Process each memory (duplicate detection + insertion)
|
|
171
|
+
for (const memoryData of jobData.memories) {
|
|
172
|
+
const similarityHash = this.generateSimilarityHash(memoryData.content)
|
|
173
|
+
|
|
174
|
+
// Check for duplicates
|
|
175
|
+
const existingMemory = await tx.query.memories.findFirst({
|
|
176
|
+
where: eq(memories.similarityHash, similarityHash),
|
|
177
|
+
})
|
|
178
|
+
|
|
179
|
+
if (existingMemory) {
|
|
180
|
+
logger.debug('Duplicate memory detected', {
|
|
181
|
+
similarityHash,
|
|
182
|
+
existingMemoryId: existingMemory.id,
|
|
183
|
+
})
|
|
184
|
+
result.duplicatesSkipped++
|
|
185
|
+
|
|
186
|
+
if (this.duplicateStrategy === 'skip') {
|
|
187
|
+
continue
|
|
188
|
+
}
|
|
189
|
+
// If merge strategy, we would update the existing memory here
|
|
190
|
+
// For now, we skip to keep it simple
|
|
191
|
+
continue
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
// Insert memory
|
|
195
|
+
const memoryId = generateId()
|
|
196
|
+
await tx.insert(memories).values({
|
|
197
|
+
id: memoryId,
|
|
198
|
+
documentId: jobData.documentId,
|
|
199
|
+
content: memoryData.content,
|
|
200
|
+
memoryType: memoryData.memoryType ?? 'fact',
|
|
201
|
+
similarityHash,
|
|
202
|
+
containerTag: jobData.containerTag,
|
|
203
|
+
confidenceScore: memoryData.confidenceScore?.toString() ?? '1.000',
|
|
204
|
+
metadata: memoryData.metadata ?? {},
|
|
205
|
+
isLatest: true,
|
|
206
|
+
version: 1,
|
|
207
|
+
})
|
|
208
|
+
|
|
209
|
+
// Insert embedding
|
|
210
|
+
await tx.insert(memoryEmbeddings).values({
|
|
211
|
+
memoryId,
|
|
212
|
+
embedding: memoryData.embedding,
|
|
213
|
+
model: 'text-embedding-3-small',
|
|
214
|
+
normalized: true,
|
|
215
|
+
})
|
|
216
|
+
|
|
217
|
+
result.memoryIds.push(memoryId)
|
|
218
|
+
result.memoriesIndexed++
|
|
219
|
+
|
|
220
|
+
logger.debug('Memory indexed', { memoryId, similarityHash })
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
// Step 2: Detect relationships if enabled
|
|
224
|
+
if (this.enableRelationshipDetection && result.memoriesIndexed > 0) {
|
|
225
|
+
const relationshipCount = await this.detectAndStoreRelationships(tx, result.memoryIds, jobData.containerTag)
|
|
226
|
+
result.relationshipsDetected = relationshipCount
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
// Step 3: Update document status
|
|
230
|
+
await tx
|
|
231
|
+
.update(documents)
|
|
232
|
+
.set({
|
|
233
|
+
status: 'processed',
|
|
234
|
+
updatedAt: new Date(),
|
|
235
|
+
})
|
|
236
|
+
.where(eq(documents.id, jobData.documentId))
|
|
237
|
+
|
|
238
|
+
// Step 4: Mark processing queue job as completed
|
|
239
|
+
await tx
|
|
240
|
+
.update(processingQueue)
|
|
241
|
+
.set({
|
|
242
|
+
status: 'completed',
|
|
243
|
+
completedAt: new Date(),
|
|
244
|
+
})
|
|
245
|
+
.where(eq(processingQueue.id, jobData.queueJobId))
|
|
246
|
+
|
|
247
|
+
logger.info('Transaction committed successfully', {
|
|
248
|
+
documentId: jobData.documentId,
|
|
249
|
+
memoriesIndexed: result.memoriesIndexed,
|
|
250
|
+
duplicatesSkipped: result.duplicatesSkipped,
|
|
251
|
+
relationshipsDetected: result.relationshipsDetected,
|
|
252
|
+
})
|
|
253
|
+
})
|
|
254
|
+
|
|
255
|
+
result.processingTimeMs = Date.now() - startTime
|
|
256
|
+
|
|
257
|
+
logger.info('Indexing job completed', {
|
|
258
|
+
documentId: jobData.documentId,
|
|
259
|
+
result,
|
|
260
|
+
})
|
|
261
|
+
|
|
262
|
+
return result
|
|
263
|
+
} catch (error) {
|
|
264
|
+
logger.errorWithException('Indexing job failed', error, {
|
|
265
|
+
documentId: jobData.documentId,
|
|
266
|
+
queueJobId: jobData.queueJobId,
|
|
267
|
+
})
|
|
268
|
+
|
|
269
|
+
// Update processing queue to failed status
|
|
270
|
+
try {
|
|
271
|
+
await db
|
|
272
|
+
.update(processingQueue)
|
|
273
|
+
.set({
|
|
274
|
+
status: 'failed',
|
|
275
|
+
error: error instanceof Error ? error.message : 'Unknown error',
|
|
276
|
+
errorCode: error instanceof AppError ? error.code : ErrorCode.INTERNAL_ERROR,
|
|
277
|
+
completedAt: new Date(),
|
|
278
|
+
})
|
|
279
|
+
.where(eq(processingQueue.id, jobData.queueJobId))
|
|
280
|
+
} catch (updateError) {
|
|
281
|
+
logger.errorWithException('Failed to update queue status to failed', updateError)
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
throw AppError.from(error, ErrorCode.DATABASE_ERROR)
|
|
285
|
+
}
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
/**
|
|
289
|
+
* Detect relationships between memories and store in database
|
|
290
|
+
*/
|
|
291
|
+
private async detectAndStoreRelationships(
|
|
292
|
+
tx: DbTransaction,
|
|
293
|
+
memoryIds: string[],
|
|
294
|
+
containerTag: string
|
|
295
|
+
): Promise<number> {
|
|
296
|
+
try {
|
|
297
|
+
// Load memories with embeddings
|
|
298
|
+
const memoryRowsRaw = await tx
|
|
299
|
+
.select({ memory: memories, embedding: memoryEmbeddings })
|
|
300
|
+
.from(memories)
|
|
301
|
+
.leftJoin(memoryEmbeddings, eq(memoryEmbeddings.memoryId, memories.id))
|
|
302
|
+
.where(inArray(memories.id, memoryIds))
|
|
303
|
+
|
|
304
|
+
// Filter memories to those with valid embeddings
|
|
305
|
+
const memoryRows = memoryRowsRaw
|
|
306
|
+
.map(({ memory, embedding }) => ({
|
|
307
|
+
...memory,
|
|
308
|
+
embedding: embedding ? { embedding: embedding.embedding } : null,
|
|
309
|
+
}))
|
|
310
|
+
.filter((m) => {
|
|
311
|
+
const emb = m.embedding as { embedding: number[] | null } | null
|
|
312
|
+
return (
|
|
313
|
+
emb !== null &&
|
|
314
|
+
emb.embedding !== null &&
|
|
315
|
+
Array.isArray(emb.embedding) &&
|
|
316
|
+
m.containerTag !== null &&
|
|
317
|
+
m.confidenceScore !== null
|
|
318
|
+
)
|
|
319
|
+
})
|
|
320
|
+
|
|
321
|
+
if (memoryRows.length === 0) {
|
|
322
|
+
return 0
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
// Load existing memories from the same container for relationship detection
|
|
326
|
+
const existingMemoryRowsRaw = await tx
|
|
327
|
+
.select({ memory: memories, embedding: memoryEmbeddings })
|
|
328
|
+
.from(memories)
|
|
329
|
+
.leftJoin(memoryEmbeddings, eq(memoryEmbeddings.memoryId, memories.id))
|
|
330
|
+
.where(and(eq(memories.containerTag, containerTag), notInArray(memories.id, memoryIds)))
|
|
331
|
+
.limit(1000) // Limit to prevent memory issues
|
|
332
|
+
|
|
333
|
+
// Filter existing memories to those with valid embeddings
|
|
334
|
+
const existingMemoryRows = existingMemoryRowsRaw
|
|
335
|
+
.map(({ memory, embedding }) => ({
|
|
336
|
+
...memory,
|
|
337
|
+
embedding: embedding ? { embedding: embedding.embedding } : null,
|
|
338
|
+
}))
|
|
339
|
+
.filter((m) => {
|
|
340
|
+
const emb = m.embedding as { embedding: number[] | null } | null
|
|
341
|
+
return (
|
|
342
|
+
emb !== null &&
|
|
343
|
+
emb.embedding !== null &&
|
|
344
|
+
Array.isArray(emb.embedding) &&
|
|
345
|
+
m.containerTag !== null &&
|
|
346
|
+
m.confidenceScore !== null
|
|
347
|
+
)
|
|
348
|
+
})
|
|
349
|
+
|
|
350
|
+
// Add existing memories to vector store
|
|
351
|
+
for (const memory of existingMemoryRows) {
|
|
352
|
+
// Type assertion: We've already filtered for non-null embeddings
|
|
353
|
+
const embedding = (memory.embedding as { embedding: number[] }).embedding
|
|
354
|
+
this.vectorStore.addMemory(
|
|
355
|
+
{
|
|
356
|
+
id: memory.id,
|
|
357
|
+
content: memory.content,
|
|
358
|
+
type: mapToVectorStoreType(memory.memoryType),
|
|
359
|
+
relationships: [],
|
|
360
|
+
isLatest: memory.isLatest,
|
|
361
|
+
containerTag: memory.containerTag!,
|
|
362
|
+
createdAt: memory.createdAt,
|
|
363
|
+
updatedAt: memory.updatedAt,
|
|
364
|
+
confidence: parseFloat(memory.confidenceScore!),
|
|
365
|
+
metadata: {
|
|
366
|
+
...(memory.metadata as Record<string, unknown>),
|
|
367
|
+
confidence: parseFloat(memory.confidenceScore!),
|
|
368
|
+
originalDbType: memory.memoryType, // Preserve original type
|
|
369
|
+
},
|
|
370
|
+
},
|
|
371
|
+
embedding
|
|
372
|
+
)
|
|
373
|
+
}
|
|
374
|
+
|
|
375
|
+
let totalRelationships = 0
|
|
376
|
+
|
|
377
|
+
// Detect relationships for each new memory (already filtered to have embeddings)
|
|
378
|
+
for (const memory of memoryRows) {
|
|
379
|
+
// Type assertion: We've already filtered for non-null embeddings
|
|
380
|
+
const embedding = (memory.embedding as { embedding: number[] }).embedding
|
|
381
|
+
|
|
382
|
+
const detectionResult = await this.relationshipDetector.detectRelationships(
|
|
383
|
+
{
|
|
384
|
+
id: memory.id,
|
|
385
|
+
content: memory.content,
|
|
386
|
+
type: mapToVectorStoreType(memory.memoryType),
|
|
387
|
+
relationships: [],
|
|
388
|
+
isLatest: memory.isLatest,
|
|
389
|
+
containerTag: memory.containerTag!,
|
|
390
|
+
createdAt: memory.createdAt,
|
|
391
|
+
updatedAt: memory.updatedAt,
|
|
392
|
+
confidence: parseFloat(memory.confidenceScore!),
|
|
393
|
+
embedding,
|
|
394
|
+
metadata: {
|
|
395
|
+
...(memory.metadata as Record<string, unknown>),
|
|
396
|
+
confidence: parseFloat(memory.confidenceScore!),
|
|
397
|
+
originalDbType: memory.memoryType, // Preserve original type
|
|
398
|
+
},
|
|
399
|
+
},
|
|
400
|
+
{ containerTag }
|
|
401
|
+
)
|
|
402
|
+
|
|
403
|
+
// Insert detected relationships
|
|
404
|
+
for (const rel of detectionResult.relationships) {
|
|
405
|
+
await tx.insert(memoryRelationships).values({
|
|
406
|
+
sourceMemoryId: rel.relationship.sourceMemoryId,
|
|
407
|
+
targetMemoryId: rel.relationship.targetMemoryId,
|
|
408
|
+
relationshipType: rel.relationship.type,
|
|
409
|
+
weight: rel.score.toString(),
|
|
410
|
+
bidirectional: false,
|
|
411
|
+
metadata: {
|
|
412
|
+
vectorSimilarity: rel.score,
|
|
413
|
+
detectedAt: new Date().toISOString(),
|
|
414
|
+
llmVerified: rel.llmVerified ?? false,
|
|
415
|
+
},
|
|
416
|
+
})
|
|
417
|
+
totalRelationships++
|
|
418
|
+
}
|
|
419
|
+
|
|
420
|
+
// Add newly indexed memory to vector store for subsequent detections
|
|
421
|
+
this.vectorStore.addMemory(
|
|
422
|
+
{
|
|
423
|
+
id: memory.id,
|
|
424
|
+
content: memory.content,
|
|
425
|
+
type: mapToVectorStoreType(memory.memoryType),
|
|
426
|
+
relationships: [],
|
|
427
|
+
isLatest: memory.isLatest,
|
|
428
|
+
containerTag: memory.containerTag!,
|
|
429
|
+
createdAt: memory.createdAt,
|
|
430
|
+
updatedAt: memory.updatedAt,
|
|
431
|
+
confidence: parseFloat(memory.confidenceScore!),
|
|
432
|
+
metadata: {
|
|
433
|
+
...(memory.metadata as Record<string, unknown>),
|
|
434
|
+
confidence: parseFloat(memory.confidenceScore!),
|
|
435
|
+
originalDbType: memory.memoryType, // Preserve original type
|
|
436
|
+
},
|
|
437
|
+
},
|
|
438
|
+
embedding
|
|
439
|
+
)
|
|
440
|
+
}
|
|
441
|
+
|
|
442
|
+
logger.info('Relationships detected and stored', {
|
|
443
|
+
newMemoriesCount: memoryRows.length,
|
|
444
|
+
existingMemoriesCount: existingMemoryRows.length,
|
|
445
|
+
relationshipsDetected: totalRelationships,
|
|
446
|
+
})
|
|
447
|
+
|
|
448
|
+
return totalRelationships
|
|
449
|
+
} catch (error) {
|
|
450
|
+
logger.errorWithException('Relationship detection failed', error)
|
|
451
|
+
// Don't fail the job for relationship detection errors
|
|
452
|
+
return 0
|
|
453
|
+
}
|
|
454
|
+
}
|
|
455
|
+
|
|
456
|
+
/**
|
|
457
|
+
* Generate similarity hash for duplicate detection
|
|
458
|
+
* Uses content normalization + SHA256
|
|
459
|
+
*/
|
|
460
|
+
private generateSimilarityHash(content: string): string {
|
|
461
|
+
// Normalize content: lowercase, remove extra whitespace, trim
|
|
462
|
+
const normalized = content.toLowerCase().replace(/\s+/g, ' ').trim()
|
|
463
|
+
|
|
464
|
+
// Generate SHA256 hash
|
|
465
|
+
return createHash('sha256').update(normalized).digest('hex')
|
|
466
|
+
}
|
|
467
|
+
|
|
468
|
+
/**
|
|
469
|
+
* Health check for the worker
|
|
470
|
+
*/
|
|
471
|
+
async healthCheck(): Promise<{
|
|
472
|
+
healthy: boolean
|
|
473
|
+
dbConnected: boolean
|
|
474
|
+
embeddingServiceReady: boolean
|
|
475
|
+
}> {
|
|
476
|
+
try {
|
|
477
|
+
// Test database connection
|
|
478
|
+
await db.query.documents.findFirst()
|
|
479
|
+
|
|
480
|
+
return {
|
|
481
|
+
healthy: true,
|
|
482
|
+
dbConnected: true,
|
|
483
|
+
embeddingServiceReady: !!this.embeddingService,
|
|
484
|
+
}
|
|
485
|
+
} catch (error) {
|
|
486
|
+
logger.errorWithException('Health check failed', error)
|
|
487
|
+
return {
|
|
488
|
+
healthy: false,
|
|
489
|
+
dbConnected: false,
|
|
490
|
+
embeddingServiceReady: false,
|
|
491
|
+
}
|
|
492
|
+
}
|
|
493
|
+
}
|
|
494
|
+
}
|
|
495
|
+
|
|
496
|
+
// ============================================================================
|
|
497
|
+
// Factory Function
|
|
498
|
+
// ============================================================================
|
|
499
|
+
|
|
500
|
+
/**
|
|
501
|
+
* Create an indexing worker instance
|
|
502
|
+
*/
|
|
503
|
+
export function createIndexingWorker(config: IndexingWorkerConfig): IndexingWorker {
|
|
504
|
+
return new IndexingWorker(config)
|
|
505
|
+
}
|
package/tsconfig.json
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
{
|
|
2
|
+
"compilerOptions": {
|
|
3
|
+
"target": "ES2022",
|
|
4
|
+
"module": "NodeNext",
|
|
5
|
+
"moduleResolution": "NodeNext",
|
|
6
|
+
"lib": ["ES2022"],
|
|
7
|
+
"outDir": "./dist",
|
|
8
|
+
"rootDir": "./src",
|
|
9
|
+
"strict": true,
|
|
10
|
+
"esModuleInterop": true,
|
|
11
|
+
"skipLibCheck": true,
|
|
12
|
+
"forceConsistentCasingInFileNames": true,
|
|
13
|
+
"resolveJsonModule": true,
|
|
14
|
+
"declaration": true,
|
|
15
|
+
"declarationMap": true,
|
|
16
|
+
"sourceMap": true,
|
|
17
|
+
"noImplicitAny": true,
|
|
18
|
+
"strictNullChecks": true,
|
|
19
|
+
"strictFunctionTypes": true,
|
|
20
|
+
"noImplicitReturns": true,
|
|
21
|
+
"noFallthroughCasesInSwitch": true,
|
|
22
|
+
"noUncheckedIndexedAccess": true,
|
|
23
|
+
"noImplicitOverride": true,
|
|
24
|
+
"allowUnusedLabels": false,
|
|
25
|
+
"allowUnreachableCode": false,
|
|
26
|
+
"allowSyntheticDefaultImports": true,
|
|
27
|
+
"downlevelIteration": true,
|
|
28
|
+
"exactOptionalPropertyTypes": false,
|
|
29
|
+
"noPropertyAccessFromIndexSignature": false,
|
|
30
|
+
"paths": {
|
|
31
|
+
"@/*": ["./src/*"],
|
|
32
|
+
"@tests/*": ["./tests/*"]
|
|
33
|
+
},
|
|
34
|
+
"baseUrl": "."
|
|
35
|
+
},
|
|
36
|
+
"include": ["src/**/*"],
|
|
37
|
+
"exclude": ["node_modules", "dist", "tests"]
|
|
38
|
+
}
|