@twelvehart/supermemory-runtime 1.0.0-next.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (156) hide show
  1. package/.env.example +57 -0
  2. package/README.md +374 -0
  3. package/dist/index.js +189 -0
  4. package/dist/mcp/index.js +1132 -0
  5. package/docker-compose.prod.yml +91 -0
  6. package/docker-compose.yml +358 -0
  7. package/drizzle/0000_dapper_the_professor.sql +159 -0
  8. package/drizzle/0001_api_keys.sql +51 -0
  9. package/drizzle/meta/0000_snapshot.json +1532 -0
  10. package/drizzle/meta/_journal.json +13 -0
  11. package/drizzle.config.ts +20 -0
  12. package/package.json +114 -0
  13. package/scripts/add-extraction-job.ts +122 -0
  14. package/scripts/benchmark-pgvector.ts +122 -0
  15. package/scripts/bootstrap.sh +209 -0
  16. package/scripts/check-runtime-pack.ts +111 -0
  17. package/scripts/claude-mcp-config.ts +336 -0
  18. package/scripts/docker-entrypoint.sh +183 -0
  19. package/scripts/doctor.ts +377 -0
  20. package/scripts/init-db.sql +33 -0
  21. package/scripts/install.sh +1110 -0
  22. package/scripts/mcp-setup.ts +271 -0
  23. package/scripts/migrations/001_create_pgvector_extension.sql +31 -0
  24. package/scripts/migrations/002_create_memory_embeddings_table.sql +75 -0
  25. package/scripts/migrations/003_create_hnsw_index.sql +94 -0
  26. package/scripts/migrations/004_create_memory_embeddings_standalone.sql +70 -0
  27. package/scripts/migrations/005_create_chunks_table.sql +95 -0
  28. package/scripts/migrations/006_create_processing_queue.sql +45 -0
  29. package/scripts/migrations/generate_test_data.sql +42 -0
  30. package/scripts/migrations/phase1_comprehensive_test.sql +204 -0
  31. package/scripts/migrations/run_migrations.sh +286 -0
  32. package/scripts/migrations/test_hnsw_index.sql +255 -0
  33. package/scripts/pre-commit-secrets +282 -0
  34. package/scripts/run-extraction-worker.ts +46 -0
  35. package/scripts/run-phase1-tests.sh +291 -0
  36. package/scripts/setup.ts +222 -0
  37. package/scripts/smoke-install.sh +12 -0
  38. package/scripts/test-health-endpoint.sh +328 -0
  39. package/src/api/index.ts +2 -0
  40. package/src/api/middleware/auth.ts +80 -0
  41. package/src/api/middleware/csrf.ts +308 -0
  42. package/src/api/middleware/errorHandler.ts +166 -0
  43. package/src/api/middleware/rateLimit.ts +360 -0
  44. package/src/api/middleware/validation.ts +514 -0
  45. package/src/api/routes/documents.ts +286 -0
  46. package/src/api/routes/profiles.ts +237 -0
  47. package/src/api/routes/search.ts +71 -0
  48. package/src/api/stores/index.ts +58 -0
  49. package/src/config/bootstrap-env.ts +3 -0
  50. package/src/config/env.ts +71 -0
  51. package/src/config/feature-flags.ts +25 -0
  52. package/src/config/index.ts +140 -0
  53. package/src/config/secrets.config.ts +291 -0
  54. package/src/db/client.ts +92 -0
  55. package/src/db/index.ts +73 -0
  56. package/src/db/postgres.ts +72 -0
  57. package/src/db/schema/chunks.schema.ts +31 -0
  58. package/src/db/schema/containers.schema.ts +46 -0
  59. package/src/db/schema/documents.schema.ts +49 -0
  60. package/src/db/schema/embeddings.schema.ts +32 -0
  61. package/src/db/schema/index.ts +11 -0
  62. package/src/db/schema/memories.schema.ts +72 -0
  63. package/src/db/schema/profiles.schema.ts +34 -0
  64. package/src/db/schema/queue.schema.ts +59 -0
  65. package/src/db/schema/relationships.schema.ts +42 -0
  66. package/src/db/schema.ts +223 -0
  67. package/src/db/worker-connection.ts +47 -0
  68. package/src/index.ts +235 -0
  69. package/src/mcp/CLAUDE.md +1 -0
  70. package/src/mcp/index.ts +1380 -0
  71. package/src/mcp/legacyState.ts +22 -0
  72. package/src/mcp/rateLimit.ts +358 -0
  73. package/src/mcp/resources.ts +309 -0
  74. package/src/mcp/results.ts +104 -0
  75. package/src/mcp/tools.ts +401 -0
  76. package/src/queues/config.ts +119 -0
  77. package/src/queues/index.ts +289 -0
  78. package/src/sdk/client.ts +225 -0
  79. package/src/sdk/errors.ts +266 -0
  80. package/src/sdk/http.ts +560 -0
  81. package/src/sdk/index.ts +244 -0
  82. package/src/sdk/resources/base.ts +65 -0
  83. package/src/sdk/resources/connections.ts +204 -0
  84. package/src/sdk/resources/documents.ts +163 -0
  85. package/src/sdk/resources/index.ts +10 -0
  86. package/src/sdk/resources/memories.ts +150 -0
  87. package/src/sdk/resources/search.ts +60 -0
  88. package/src/sdk/resources/settings.ts +36 -0
  89. package/src/sdk/types.ts +674 -0
  90. package/src/services/chunking/index.ts +451 -0
  91. package/src/services/chunking.service.ts +650 -0
  92. package/src/services/csrf.service.ts +252 -0
  93. package/src/services/documents.repository.ts +219 -0
  94. package/src/services/documents.service.ts +191 -0
  95. package/src/services/embedding.service.ts +404 -0
  96. package/src/services/extraction.service.ts +300 -0
  97. package/src/services/extractors/code.extractor.ts +451 -0
  98. package/src/services/extractors/index.ts +9 -0
  99. package/src/services/extractors/markdown.extractor.ts +461 -0
  100. package/src/services/extractors/pdf.extractor.ts +315 -0
  101. package/src/services/extractors/text.extractor.ts +118 -0
  102. package/src/services/extractors/url.extractor.ts +243 -0
  103. package/src/services/index.ts +235 -0
  104. package/src/services/ingestion.service.ts +177 -0
  105. package/src/services/llm/anthropic.ts +400 -0
  106. package/src/services/llm/base.ts +460 -0
  107. package/src/services/llm/contradiction-detector.service.ts +526 -0
  108. package/src/services/llm/heuristics.ts +148 -0
  109. package/src/services/llm/index.ts +309 -0
  110. package/src/services/llm/memory-classifier.service.ts +383 -0
  111. package/src/services/llm/memory-extension-detector.service.ts +523 -0
  112. package/src/services/llm/mock.ts +470 -0
  113. package/src/services/llm/openai.ts +398 -0
  114. package/src/services/llm/prompts.ts +438 -0
  115. package/src/services/llm/types.ts +373 -0
  116. package/src/services/memory.repository.ts +1769 -0
  117. package/src/services/memory.service.ts +1338 -0
  118. package/src/services/memory.types.ts +234 -0
  119. package/src/services/persistence/index.ts +295 -0
  120. package/src/services/pipeline.service.ts +509 -0
  121. package/src/services/profile.repository.ts +436 -0
  122. package/src/services/profile.service.ts +560 -0
  123. package/src/services/profile.types.ts +270 -0
  124. package/src/services/relationships/detector.ts +1128 -0
  125. package/src/services/relationships/index.ts +268 -0
  126. package/src/services/relationships/memory-integration.ts +459 -0
  127. package/src/services/relationships/strategies.ts +132 -0
  128. package/src/services/relationships/types.ts +370 -0
  129. package/src/services/search.service.ts +761 -0
  130. package/src/services/search.types.ts +220 -0
  131. package/src/services/secrets.service.ts +384 -0
  132. package/src/services/vectorstore/base.ts +327 -0
  133. package/src/services/vectorstore/index.ts +444 -0
  134. package/src/services/vectorstore/memory.ts +286 -0
  135. package/src/services/vectorstore/migration.ts +295 -0
  136. package/src/services/vectorstore/mock.ts +403 -0
  137. package/src/services/vectorstore/pgvector.ts +695 -0
  138. package/src/services/vectorstore/types.ts +247 -0
  139. package/src/startup.ts +389 -0
  140. package/src/types/api.types.ts +193 -0
  141. package/src/types/document.types.ts +103 -0
  142. package/src/types/index.ts +241 -0
  143. package/src/types/profile.base.ts +133 -0
  144. package/src/utils/errors.ts +447 -0
  145. package/src/utils/id.ts +15 -0
  146. package/src/utils/index.ts +101 -0
  147. package/src/utils/logger.ts +313 -0
  148. package/src/utils/sanitization.ts +501 -0
  149. package/src/utils/secret-validation.ts +273 -0
  150. package/src/utils/synonyms.ts +188 -0
  151. package/src/utils/validation.ts +581 -0
  152. package/src/workers/chunking.worker.ts +242 -0
  153. package/src/workers/embedding.worker.ts +358 -0
  154. package/src/workers/extraction.worker.ts +346 -0
  155. package/src/workers/indexing.worker.ts +505 -0
  156. package/tsconfig.json +38 -0
@@ -0,0 +1,761 @@
1
+ /**
2
+ * Search Service for Supermemory Clone
3
+ *
4
+ * Provides hybrid search combining vector similarity and memory graph search
5
+ * with reranking and query rewriting capabilities.
6
+ */
7
+
8
+ import { EmbeddingService, createEmbeddingService } from './embedding.service.js'
9
+ import {
10
+ SearchOptions,
11
+ SearchResult,
12
+ SearchResponse,
13
+ Memory,
14
+ Chunk,
15
+ MetadataFilter,
16
+ DEFAULT_SEARCH_OPTIONS,
17
+ RerankOptions,
18
+ QueryRewriteOptions,
19
+ } from './search.types.js'
20
+ import {
21
+ BaseVectorStore,
22
+ createVectorStore,
23
+ createPgVectorStore,
24
+ createInMemoryVectorStore,
25
+ getDefaultVectorStoreConfig,
26
+ VectorStoreConfig,
27
+ VectorSearchResult as VectorStoreSearchResult,
28
+ } from './vectorstore/index.js'
29
+ import { expandQuery } from '../utils/synonyms.js'
30
+ import { getDatabaseUrl, isPostgresUrl } from '../db/client.js'
31
+ import { getPostgresDatabase } from '../db/postgres.js'
32
+ import { documents } from '../db/schema/documents.schema.js'
33
+ import { and, desc, eq, sql } from 'drizzle-orm'
34
+
35
+ /**
36
+ * Internal result type for compatibility with search types
37
+ */
38
+ interface InternalVectorSearchResult {
39
+ entry: {
40
+ id: string
41
+ embedding: number[]
42
+ metadata: Record<string, unknown>
43
+ }
44
+ similarity: number
45
+ }
46
+
47
+ /**
48
+ * In-memory memory graph for development/testing
49
+ */
50
+ class InMemoryMemoryGraph {
51
+ private memories: Map<string, Memory> = new Map()
52
+ private chunksByMemoryId: Map<string, Chunk[]> = new Map()
53
+
54
+ addMemory(memory: Memory): void {
55
+ this.memories.set(memory.id, memory)
56
+ }
57
+
58
+ addChunk(chunk: Chunk): void {
59
+ const chunks = this.chunksByMemoryId.get(chunk.memoryId) || []
60
+ chunks.push(chunk)
61
+ this.chunksByMemoryId.set(chunk.memoryId, chunks)
62
+ }
63
+
64
+ getMemory(id: string): Memory | undefined {
65
+ return this.memories.get(id)
66
+ }
67
+
68
+ getChunks(memoryId: string): Chunk[] {
69
+ return this.chunksByMemoryId.get(memoryId) || []
70
+ }
71
+
72
+ getAllMemories(): Memory[] {
73
+ return Array.from(this.memories.values())
74
+ }
75
+
76
+ removeMemory(memoryId: string): { removed: boolean; chunkCount: number } {
77
+ const removed = this.memories.delete(memoryId)
78
+ const chunkCount = this.getChunks(memoryId).length
79
+ this.chunksByMemoryId.delete(memoryId)
80
+ return { removed, chunkCount }
81
+ }
82
+
83
+ searchByTag(containerTag: string): Memory[] {
84
+ return Array.from(this.memories.values()).filter((m) => m.containerTag === containerTag)
85
+ }
86
+
87
+ searchByContent(query: string): Memory[] {
88
+ const lowerQuery = query.toLowerCase()
89
+ const tokens = lowerQuery.split(/\s+/).filter((t) => t.length > 0)
90
+
91
+ return Array.from(this.memories.values())
92
+ .map((memory) => {
93
+ const content = memory.content.toLowerCase()
94
+ const matchCount = tokens.filter((token) => content.includes(token)).length
95
+ const score = matchCount / tokens.length
96
+ return { memory, score }
97
+ })
98
+ .filter(({ score }) => score > 0.3)
99
+ .sort((a, b) => b.score - a.score)
100
+ .map(({ memory }) => memory)
101
+ }
102
+
103
+ clear(): void {
104
+ this.memories.clear()
105
+ this.chunksByMemoryId.clear()
106
+ }
107
+ }
108
+
109
+ /**
110
+ * Search Service class
111
+ */
112
+ export class SearchService {
113
+ private readonly embeddingService: EmbeddingService
114
+ private vectorStore: BaseVectorStore
115
+ private readonly memoryGraph: InMemoryMemoryGraph
116
+ private initialized = false
117
+
118
+ constructor(options?: { embeddingService?: EmbeddingService; vectorStore?: BaseVectorStore }) {
119
+ this.embeddingService = options?.embeddingService || createEmbeddingService()
120
+ // Default to pgvector-backed store for runtime usage
121
+ const connectionString = getDatabaseUrl()
122
+ const defaultConfig = getDefaultVectorStoreConfig()
123
+ let vectorStore = options?.vectorStore
124
+ if (!vectorStore) {
125
+ const useInMemory = process.env.NODE_ENV === 'test' || !isPostgresUrl(connectionString)
126
+ if (!useInMemory) {
127
+ vectorStore = createPgVectorStore(connectionString, this.embeddingService.getDimensions(), {
128
+ metric: defaultConfig.metric,
129
+ hnswConfig: defaultConfig.hnswConfig,
130
+ defaultNamespace: defaultConfig.defaultNamespace,
131
+ indexType: defaultConfig.indexType,
132
+ })
133
+ } else {
134
+ vectorStore = createInMemoryVectorStore(this.embeddingService.getDimensions(), {
135
+ metric: defaultConfig.metric,
136
+ hnswConfig: defaultConfig.hnswConfig,
137
+ defaultNamespace: defaultConfig.defaultNamespace,
138
+ indexType: defaultConfig.indexType,
139
+ })
140
+ }
141
+ }
142
+ this.vectorStore = vectorStore
143
+ this.memoryGraph = new InMemoryMemoryGraph()
144
+ }
145
+
146
+ /**
147
+ * Initialize the search service (initializes vector store)
148
+ */
149
+ async initialize(): Promise<void> {
150
+ if (this.initialized) return
151
+ await this.vectorStore.initialize()
152
+ this.initialized = true
153
+ }
154
+
155
+ /**
156
+ * Set a custom vector store (useful for testing or changing providers)
157
+ */
158
+ setVectorStore(vectorStore: BaseVectorStore): void {
159
+ this.vectorStore = vectorStore
160
+ this.initialized = false
161
+ }
162
+
163
+ /**
164
+ * Get the vector store
165
+ */
166
+ getVectorStore(): BaseVectorStore {
167
+ return this.vectorStore
168
+ }
169
+
170
+ /**
171
+ * Get the embedding service
172
+ */
173
+ getEmbeddingService(): EmbeddingService {
174
+ return this.embeddingService
175
+ }
176
+
177
+ /**
178
+ * Index a memory and its chunks
179
+ */
180
+ async indexMemory(memory: Memory, chunks?: Chunk[]): Promise<void> {
181
+ // Ensure vector store is initialized
182
+ if (!this.initialized) {
183
+ await this.initialize()
184
+ }
185
+
186
+ // Generate embedding for memory if not provided
187
+ if (!memory.embedding) {
188
+ memory.embedding = await this.embeddingService.generateEmbedding(memory.content)
189
+ }
190
+
191
+ // Add to memory graph
192
+ this.memoryGraph.addMemory(memory)
193
+
194
+ // Add to vector store
195
+ await this.vectorStore.add(
196
+ {
197
+ id: memory.id,
198
+ embedding: memory.embedding,
199
+ metadata: {
200
+ type: 'memory',
201
+ containerTag: memory.containerTag,
202
+ ...memory.metadata,
203
+ },
204
+ },
205
+ { overwrite: true }
206
+ )
207
+
208
+ // Index chunks if provided
209
+ if (chunks && chunks.length > 0) {
210
+ const chunkTexts = chunks.map((c) => c.content)
211
+ const chunkEmbeddings = await this.embeddingService.batchEmbed(chunkTexts)
212
+
213
+ const vectorEntries = []
214
+ for (let i = 0; i < chunks.length; i++) {
215
+ const chunk = chunks[i]
216
+ const embedding = chunkEmbeddings[i]
217
+ if (!chunk || !embedding) continue
218
+
219
+ chunk.embedding = embedding
220
+
221
+ this.memoryGraph.addChunk(chunk)
222
+ vectorEntries.push({
223
+ id: chunk.id,
224
+ embedding: embedding,
225
+ metadata: {
226
+ type: 'chunk',
227
+ memoryId: chunk.memoryId,
228
+ chunkIndex: chunk.chunkIndex,
229
+ ...chunk.metadata,
230
+ },
231
+ })
232
+ }
233
+
234
+ // Batch add chunks to vector store
235
+ if (vectorEntries.length > 0) {
236
+ await this.vectorStore.addBatch(vectorEntries, { overwrite: true })
237
+ }
238
+ }
239
+ }
240
+
241
+ /**
242
+ * Perform hybrid search combining vector and memory graph search
243
+ */
244
+ async hybridSearch(query: string, containerTag?: string, options?: Partial<SearchOptions>): Promise<SearchResponse> {
245
+ const startTime = Date.now()
246
+ const searchOptions: SearchOptions = { ...DEFAULT_SEARCH_OPTIONS, ...options }
247
+
248
+ let searchQuery = query
249
+ let originalQuery: string | undefined
250
+
251
+ // Rewrite query if enabled
252
+ if (searchOptions.rewriteQuery) {
253
+ originalQuery = query
254
+ searchQuery = await this.rewriteQuery(query)
255
+ }
256
+
257
+ let results: SearchResult[] = []
258
+
259
+ switch (searchOptions.searchMode) {
260
+ case 'vector':
261
+ results = await this.vectorSearchInternal(searchQuery, searchOptions)
262
+ break
263
+ case 'memory':
264
+ results = this.memorySearchInternal(searchQuery, containerTag, searchOptions)
265
+ break
266
+ case 'fulltext':
267
+ results = await this.fullTextSearchInternal(searchQuery, containerTag, searchOptions)
268
+ break
269
+ case 'hybrid':
270
+ default:
271
+ results = await this.combineSearchResults(searchQuery, containerTag, searchOptions)
272
+ break
273
+ }
274
+
275
+ // Apply container tag filter
276
+ if (containerTag) {
277
+ results = results.filter((r) => {
278
+ if (r.memory) return r.memory.containerTag === containerTag
279
+ if (r.chunk) {
280
+ const memory = this.memoryGraph.getMemory(r.chunk.memoryId)
281
+ return memory?.containerTag === containerTag
282
+ }
283
+ return false
284
+ })
285
+ }
286
+
287
+ // Apply metadata filters
288
+ if (searchOptions.filters && searchOptions.filters.length > 0) {
289
+ results = this.applyFilters(results, searchOptions.filters)
290
+ }
291
+
292
+ // Apply date range filter
293
+ if (searchOptions.dateRange) {
294
+ results = this.applyDateFilter(results, searchOptions.dateRange)
295
+ }
296
+
297
+ // Rerank if enabled
298
+ if (searchOptions.rerank && results.length > 1) {
299
+ results = await this.rerank(results, searchQuery)
300
+ }
301
+
302
+ // Sort by similarity (or rerank score if available)
303
+ results.sort((a, b) => {
304
+ const scoreA = a.rerankScore ?? a.similarity
305
+ const scoreB = b.rerankScore ?? b.similarity
306
+ return scoreB - scoreA
307
+ })
308
+
309
+ // Apply limit
310
+ const totalCount = results.length
311
+ results = results.slice(0, searchOptions.limit)
312
+
313
+ // Remove embeddings if not requested
314
+ if (!searchOptions.includeEmbeddings) {
315
+ results = results.map((r) => ({
316
+ ...r,
317
+ memory: r.memory ? { ...r.memory, embedding: undefined } : undefined,
318
+ chunk: r.chunk ? { ...r.chunk, embedding: undefined } : undefined,
319
+ }))
320
+ }
321
+
322
+ const searchTimeMs = Date.now() - startTime
323
+
324
+ return {
325
+ results,
326
+ totalCount,
327
+ query: searchQuery,
328
+ originalQuery,
329
+ searchTimeMs,
330
+ options: searchOptions,
331
+ }
332
+ }
333
+
334
+ /**
335
+ * Pure vector similarity search
336
+ */
337
+ async vectorSearch(embedding: number[], limit: number = 10, threshold: number = 0.7): Promise<SearchResult[]> {
338
+ // Ensure vector store is initialized
339
+ if (!this.initialized) {
340
+ await this.initialize()
341
+ }
342
+
343
+ const vectorResults = await this.vectorStore.search(embedding, {
344
+ limit,
345
+ threshold,
346
+ includeMetadata: true,
347
+ })
348
+
349
+ return vectorResults.map((vr) => this.vectorStoreResultToSearchResult(vr))
350
+ }
351
+
352
+ /**
353
+ * Search through memory graph
354
+ */
355
+ memorySearch(query: string, containerTag?: string): SearchResult[] {
356
+ return this.memorySearchInternal(query, containerTag, DEFAULT_SEARCH_OPTIONS)
357
+ }
358
+
359
+ /**
360
+ * Rerank results using cross-encoder scoring
361
+ * Note: In production, this would use a proper cross-encoder model
362
+ */
363
+ async rerank(results: SearchResult[], query: string, options?: RerankOptions): Promise<SearchResult[]> {
364
+ const topK = options?.topK || results.length
365
+ const toRerank = results.slice(0, topK)
366
+
367
+ // Simple reranking based on query term overlap and position
368
+ // In production, use a proper cross-encoder model
369
+ const queryTokens = new Set(
370
+ query
371
+ .toLowerCase()
372
+ .split(/\s+/)
373
+ .filter((t) => t.length > 1)
374
+ )
375
+
376
+ const reranked = toRerank.map((result) => {
377
+ const content = (result.memory?.content || result.chunk?.content || '').toLowerCase()
378
+ const contentTokens = content.split(/\s+/)
379
+
380
+ const score = result.similarity
381
+ let matchCount = 0
382
+ let positionBoost = 0
383
+
384
+ for (let i = 0; i < contentTokens.length; i++) {
385
+ const token = contentTokens[i]
386
+ if (token && queryTokens.has(token)) {
387
+ matchCount++
388
+ // Boost for matches early in content
389
+ positionBoost += 1 / (1 + i * 0.01)
390
+ }
391
+ }
392
+
393
+ // Combine original similarity with reranking factors
394
+ const termOverlap = matchCount / queryTokens.size
395
+ const rerankScore = score * 0.5 + termOverlap * 0.3 + positionBoost * 0.2
396
+
397
+ return {
398
+ ...result,
399
+ rerankScore: Math.min(1, rerankScore),
400
+ }
401
+ })
402
+
403
+ // Sort by rerank score
404
+ reranked.sort((a, b) => (b.rerankScore || 0) - (a.rerankScore || 0))
405
+
406
+ // Combine with remaining results
407
+ return [...reranked, ...results.slice(topK)]
408
+ }
409
+
410
+ /**
411
+ * Expand/rewrite query for better recall.
412
+ *
413
+ * Uses the shared synonyms utility for consistent expansion across services.
414
+ * In production, this would be augmented with an LLM for intelligent query rewriting.
415
+ */
416
+ async rewriteQuery(query: string, options?: QueryRewriteOptions): Promise<string> {
417
+ return expandQuery(query, {
418
+ includeSynonyms: options?.includeSynonyms !== false,
419
+ expandAbbreviations: options?.expandAbbreviations !== false,
420
+ maxSynonymsPerTerm: 2,
421
+ })
422
+ }
423
+
424
+ /**
425
+ * Clear all indexed data
426
+ */
427
+ async clear(): Promise<void> {
428
+ await this.vectorStore.clear()
429
+ this.memoryGraph.clear()
430
+ }
431
+
432
+ /**
433
+ * Get statistics about indexed data
434
+ */
435
+ async getStats(): Promise<{ vectorCount: number; memoryCount: number }> {
436
+ const stats = await this.vectorStore.getStats()
437
+ return {
438
+ vectorCount: stats.totalVectors,
439
+ memoryCount: this.memoryGraph.getAllMemories().length,
440
+ }
441
+ }
442
+
443
+ /**
444
+ * Remove a memory from the index
445
+ */
446
+ async removeMemory(memoryId: string): Promise<{ removed: boolean; vectorsDeleted: number }> {
447
+ // Remove from vector store
448
+ const deleted = await this.vectorStore.delete({ ids: [memoryId] })
449
+
450
+ // Also remove any chunks associated with this memory
451
+ const chunks = this.memoryGraph.getChunks(memoryId)
452
+ let chunkDeleted = 0
453
+ if (chunks.length > 0) {
454
+ const chunkIds = chunks.map((c) => c.id)
455
+ chunkDeleted = await this.vectorStore.delete({ ids: chunkIds })
456
+ }
457
+
458
+ const removedFromGraph = this.memoryGraph.removeMemory(memoryId)
459
+
460
+ return {
461
+ removed: deleted > 0 || chunkDeleted > 0 || removedFromGraph.removed || removedFromGraph.chunkCount > 0,
462
+ vectorsDeleted: deleted + chunkDeleted,
463
+ }
464
+ }
465
+
466
+ async close(): Promise<void> {
467
+ await this.vectorStore.close()
468
+ this.memoryGraph.clear()
469
+ this.initialized = false
470
+ }
471
+
472
+ // Private methods
473
+
474
+ private async vectorSearchInternal(query: string, options: SearchOptions): Promise<SearchResult[]> {
475
+ // Ensure vector store is initialized
476
+ if (!this.initialized) {
477
+ await this.initialize()
478
+ }
479
+
480
+ const queryEmbedding = await this.embeddingService.generateEmbedding(query)
481
+ const vectorResults = await this.vectorStore.search(queryEmbedding, {
482
+ limit: options.limit * 2, // Get more to allow for filtering
483
+ threshold: options.threshold,
484
+ includeMetadata: true,
485
+ })
486
+
487
+ return vectorResults.map((vr) => this.vectorStoreResultToSearchResult(vr))
488
+ }
489
+
490
+ private memorySearchInternal(
491
+ query: string,
492
+ containerTag: string | undefined,
493
+ options: SearchOptions
494
+ ): SearchResult[] {
495
+ let memories: Memory[]
496
+
497
+ if (containerTag) {
498
+ memories = this.memoryGraph.searchByTag(containerTag)
499
+ // Further filter by content
500
+ const lowerQuery = query.toLowerCase()
501
+ memories = memories.filter((m) => m.content.toLowerCase().includes(lowerQuery))
502
+ } else {
503
+ memories = this.memoryGraph.searchByContent(query)
504
+ }
505
+
506
+ return memories.slice(0, options.limit).map((memory, index) => ({
507
+ id: memory.id,
508
+ memory,
509
+ similarity: 1 - index * 0.05, // Decay based on position
510
+ metadata: memory.metadata || {},
511
+ updatedAt: memory.updatedAt,
512
+ source: 'memory' as const,
513
+ }))
514
+ }
515
+
516
+ private async combineSearchResults(
517
+ query: string,
518
+ containerTag: string | undefined,
519
+ options: SearchOptions
520
+ ): Promise<SearchResult[]> {
521
+ // Run vector + full-text searches.
522
+ const [vectorResults, fullTextResults] = await Promise.all([
523
+ this.vectorSearchInternal(query, options),
524
+ this.fullTextSearchInternal(query, containerTag, options),
525
+ ])
526
+
527
+ // Merge and deduplicate
528
+ const resultMap = new Map<string, SearchResult>()
529
+
530
+ // Add vector results first (higher priority for similarity)
531
+ for (const result of vectorResults) {
532
+ resultMap.set(result.id, result)
533
+ }
534
+
535
+ // Add full-text results, merging if exists
536
+ for (const result of fullTextResults) {
537
+ const existing = resultMap.get(result.id)
538
+ if (existing) {
539
+ // Combine scores - keep higher similarity, mark as hybrid
540
+ resultMap.set(result.id, {
541
+ ...existing,
542
+ similarity: Math.max(existing.similarity, result.similarity),
543
+ source: 'hybrid',
544
+ })
545
+ } else {
546
+ resultMap.set(result.id, result)
547
+ }
548
+ }
549
+
550
+ return Array.from(resultMap.values())
551
+ }
552
+
553
+ private async fullTextSearchInternal(
554
+ query: string,
555
+ containerTag: string | undefined,
556
+ options: SearchOptions
557
+ ): Promise<SearchResult[]> {
558
+ const connectionString = getDatabaseUrl()
559
+
560
+ // Keep test behavior deterministic by using in-memory fallback.
561
+ if (process.env.NODE_ENV === 'test' || !isPostgresUrl(connectionString)) {
562
+ return this.memorySearchInternal(query, containerTag, options)
563
+ }
564
+
565
+ const db = getPostgresDatabase(connectionString)
566
+ const rankExpr = sql<number>`
567
+ ts_rank_cd(
568
+ to_tsvector('english', ${documents.content}),
569
+ plainto_tsquery('english', ${query})
570
+ )
571
+ `
572
+
573
+ const textMatch = sql<boolean>`
574
+ to_tsvector('english', ${documents.content})
575
+ @@
576
+ plainto_tsquery('english', ${query})
577
+ `
578
+ const whereClause = containerTag ? and(textMatch, eq(documents.containerTag, containerTag)) : textMatch
579
+
580
+ const rows = await db
581
+ .select({
582
+ id: documents.id,
583
+ content: documents.content,
584
+ containerTag: documents.containerTag,
585
+ metadata: documents.metadata,
586
+ createdAt: documents.createdAt,
587
+ updatedAt: documents.updatedAt,
588
+ rank: rankExpr,
589
+ })
590
+ .from(documents)
591
+ .where(whereClause)
592
+ .orderBy(desc(rankExpr), desc(documents.updatedAt))
593
+ .limit(options.limit * 2)
594
+
595
+ return rows.map((row) => {
596
+ const metadata = row.metadata && typeof row.metadata === 'object' ? (row.metadata as Record<string, unknown>) : {}
597
+ const createdAt = row.createdAt instanceof Date ? row.createdAt : new Date(row.createdAt)
598
+ const updatedAt = row.updatedAt instanceof Date ? row.updatedAt : new Date(row.updatedAt)
599
+ const score = Math.max(0, Math.min(1, Number(row.rank ?? 0)))
600
+
601
+ return {
602
+ id: row.id,
603
+ memory: {
604
+ id: row.id,
605
+ content: row.content,
606
+ type: 'fact',
607
+ relationships: [],
608
+ isLatest: true,
609
+ containerTag: row.containerTag,
610
+ metadata,
611
+ createdAt,
612
+ updatedAt,
613
+ confidence: 1,
614
+ sourceId: row.id,
615
+ },
616
+ similarity: score,
617
+ metadata,
618
+ updatedAt,
619
+ source: 'fulltext',
620
+ }
621
+ })
622
+ }
623
+
624
+ private vectorResultToSearchResult(vr: InternalVectorSearchResult): SearchResult {
625
+ const isChunk = vr.entry.metadata.type === 'chunk'
626
+ const memoryId = isChunk ? (vr.entry.metadata.memoryId as string) : vr.entry.id
627
+
628
+ const memory = this.memoryGraph.getMemory(memoryId)
629
+ const chunk = isChunk ? this.memoryGraph.getChunks(memoryId).find((c) => c.id === vr.entry.id) : undefined
630
+
631
+ return {
632
+ id: vr.entry.id,
633
+ memory,
634
+ chunk,
635
+ similarity: vr.similarity,
636
+ metadata: { ...vr.entry.metadata, ...(memory?.metadata || {}) },
637
+ updatedAt: memory?.updatedAt || new Date(),
638
+ source: 'vector',
639
+ }
640
+ }
641
+
642
+ /**
643
+ * Convert VectorStoreSearchResult to SearchResult
644
+ */
645
+ private vectorStoreResultToSearchResult(vr: VectorStoreSearchResult): SearchResult {
646
+ const isChunk = vr.metadata.type === 'chunk'
647
+ const memoryId = isChunk ? (vr.metadata.memoryId as string) : vr.id
648
+
649
+ const memory = this.memoryGraph.getMemory(memoryId)
650
+ const chunk = isChunk ? this.memoryGraph.getChunks(memoryId).find((c) => c.id === vr.id) : undefined
651
+
652
+ return {
653
+ id: vr.id,
654
+ memory,
655
+ chunk,
656
+ similarity: vr.score,
657
+ metadata: { ...vr.metadata, ...(memory?.metadata || {}) },
658
+ updatedAt: memory?.updatedAt || new Date(),
659
+ source: 'vector',
660
+ }
661
+ }
662
+
663
+ private applyFilters(results: SearchResult[], filters: MetadataFilter[]): SearchResult[] {
664
+ return results.filter((result) => {
665
+ const metadata = result.metadata
666
+ return filters.every((filter) => {
667
+ const value = metadata[filter.key]
668
+ if (value === undefined) return false
669
+
670
+ const op = filter.operator || 'eq'
671
+ switch (op) {
672
+ case 'eq':
673
+ return value === filter.value
674
+ case 'ne':
675
+ return value !== filter.value
676
+ case 'gt':
677
+ return typeof value === 'number' && typeof filter.value === 'number' && value > filter.value
678
+ case 'gte':
679
+ return typeof value === 'number' && typeof filter.value === 'number' && value >= filter.value
680
+ case 'lt':
681
+ return typeof value === 'number' && typeof filter.value === 'number' && value < filter.value
682
+ case 'lte':
683
+ return typeof value === 'number' && typeof filter.value === 'number' && value <= filter.value
684
+ case 'contains':
685
+ return typeof value === 'string' && typeof filter.value === 'string' && value.includes(filter.value)
686
+ case 'startsWith':
687
+ return typeof value === 'string' && typeof filter.value === 'string' && value.startsWith(filter.value)
688
+ default:
689
+ return false
690
+ }
691
+ })
692
+ })
693
+ }
694
+
695
+ private applyDateFilter(results: SearchResult[], dateRange: { from?: Date; to?: Date }): SearchResult[] {
696
+ return results.filter((result) => {
697
+ const date = result.updatedAt
698
+ if (dateRange.from && date < dateRange.from) return false
699
+ if (dateRange.to && date > dateRange.to) return false
700
+ return true
701
+ })
702
+ }
703
+ }
704
+
705
+ /**
706
+ * Create a new search service instance
707
+ */
708
+ export function createSearchService(options?: {
709
+ embeddingService?: EmbeddingService
710
+ vectorStore?: BaseVectorStore
711
+ }): SearchService {
712
+ return new SearchService(options)
713
+ }
714
+
715
+ /**
716
+ * Create a search service with a specific vector store provider
717
+ */
718
+ export async function createSearchServiceWithVectorStore(
719
+ vectorStoreConfig: VectorStoreConfig,
720
+ embeddingService?: EmbeddingService
721
+ ): Promise<SearchService> {
722
+ const vectorStore = await createVectorStore(vectorStoreConfig)
723
+ await vectorStore.initialize()
724
+
725
+ const service = new SearchService({
726
+ embeddingService,
727
+ vectorStore,
728
+ })
729
+
730
+ // Mark as initialized since vector store is already initialized
731
+ await service.initialize()
732
+
733
+ return service
734
+ }
735
+
736
+ // Lazy singleton instance
737
+ let _searchService: SearchService | null = null
738
+
739
+ /**
740
+ * Get the singleton search service instance (created lazily)
741
+ */
742
+ export function getSearchService(): SearchService {
743
+ if (!_searchService) {
744
+ _searchService = new SearchService()
745
+ }
746
+ return _searchService
747
+ }
748
+
749
+ /**
750
+ * Reset the singleton instance (useful for testing)
751
+ */
752
+ export function resetSearchService(): void {
753
+ _searchService = null
754
+ }
755
+
756
+ // Export default instance (lazy getter for backwards compatibility)
757
+ export const searchService = new Proxy({} as SearchService, {
758
+ get(_, prop) {
759
+ return getSearchService()[prop as keyof SearchService]
760
+ },
761
+ })