@twelvehart/supermemory-runtime 1.0.0-next.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +57 -0
- package/README.md +374 -0
- package/dist/index.js +189 -0
- package/dist/mcp/index.js +1132 -0
- package/docker-compose.prod.yml +91 -0
- package/docker-compose.yml +358 -0
- package/drizzle/0000_dapper_the_professor.sql +159 -0
- package/drizzle/0001_api_keys.sql +51 -0
- package/drizzle/meta/0000_snapshot.json +1532 -0
- package/drizzle/meta/_journal.json +13 -0
- package/drizzle.config.ts +20 -0
- package/package.json +114 -0
- package/scripts/add-extraction-job.ts +122 -0
- package/scripts/benchmark-pgvector.ts +122 -0
- package/scripts/bootstrap.sh +209 -0
- package/scripts/check-runtime-pack.ts +111 -0
- package/scripts/claude-mcp-config.ts +336 -0
- package/scripts/docker-entrypoint.sh +183 -0
- package/scripts/doctor.ts +377 -0
- package/scripts/init-db.sql +33 -0
- package/scripts/install.sh +1110 -0
- package/scripts/mcp-setup.ts +271 -0
- package/scripts/migrations/001_create_pgvector_extension.sql +31 -0
- package/scripts/migrations/002_create_memory_embeddings_table.sql +75 -0
- package/scripts/migrations/003_create_hnsw_index.sql +94 -0
- package/scripts/migrations/004_create_memory_embeddings_standalone.sql +70 -0
- package/scripts/migrations/005_create_chunks_table.sql +95 -0
- package/scripts/migrations/006_create_processing_queue.sql +45 -0
- package/scripts/migrations/generate_test_data.sql +42 -0
- package/scripts/migrations/phase1_comprehensive_test.sql +204 -0
- package/scripts/migrations/run_migrations.sh +286 -0
- package/scripts/migrations/test_hnsw_index.sql +255 -0
- package/scripts/pre-commit-secrets +282 -0
- package/scripts/run-extraction-worker.ts +46 -0
- package/scripts/run-phase1-tests.sh +291 -0
- package/scripts/setup.ts +222 -0
- package/scripts/smoke-install.sh +12 -0
- package/scripts/test-health-endpoint.sh +328 -0
- package/src/api/index.ts +2 -0
- package/src/api/middleware/auth.ts +80 -0
- package/src/api/middleware/csrf.ts +308 -0
- package/src/api/middleware/errorHandler.ts +166 -0
- package/src/api/middleware/rateLimit.ts +360 -0
- package/src/api/middleware/validation.ts +514 -0
- package/src/api/routes/documents.ts +286 -0
- package/src/api/routes/profiles.ts +237 -0
- package/src/api/routes/search.ts +71 -0
- package/src/api/stores/index.ts +58 -0
- package/src/config/bootstrap-env.ts +3 -0
- package/src/config/env.ts +71 -0
- package/src/config/feature-flags.ts +25 -0
- package/src/config/index.ts +140 -0
- package/src/config/secrets.config.ts +291 -0
- package/src/db/client.ts +92 -0
- package/src/db/index.ts +73 -0
- package/src/db/postgres.ts +72 -0
- package/src/db/schema/chunks.schema.ts +31 -0
- package/src/db/schema/containers.schema.ts +46 -0
- package/src/db/schema/documents.schema.ts +49 -0
- package/src/db/schema/embeddings.schema.ts +32 -0
- package/src/db/schema/index.ts +11 -0
- package/src/db/schema/memories.schema.ts +72 -0
- package/src/db/schema/profiles.schema.ts +34 -0
- package/src/db/schema/queue.schema.ts +59 -0
- package/src/db/schema/relationships.schema.ts +42 -0
- package/src/db/schema.ts +223 -0
- package/src/db/worker-connection.ts +47 -0
- package/src/index.ts +235 -0
- package/src/mcp/CLAUDE.md +1 -0
- package/src/mcp/index.ts +1380 -0
- package/src/mcp/legacyState.ts +22 -0
- package/src/mcp/rateLimit.ts +358 -0
- package/src/mcp/resources.ts +309 -0
- package/src/mcp/results.ts +104 -0
- package/src/mcp/tools.ts +401 -0
- package/src/queues/config.ts +119 -0
- package/src/queues/index.ts +289 -0
- package/src/sdk/client.ts +225 -0
- package/src/sdk/errors.ts +266 -0
- package/src/sdk/http.ts +560 -0
- package/src/sdk/index.ts +244 -0
- package/src/sdk/resources/base.ts +65 -0
- package/src/sdk/resources/connections.ts +204 -0
- package/src/sdk/resources/documents.ts +163 -0
- package/src/sdk/resources/index.ts +10 -0
- package/src/sdk/resources/memories.ts +150 -0
- package/src/sdk/resources/search.ts +60 -0
- package/src/sdk/resources/settings.ts +36 -0
- package/src/sdk/types.ts +674 -0
- package/src/services/chunking/index.ts +451 -0
- package/src/services/chunking.service.ts +650 -0
- package/src/services/csrf.service.ts +252 -0
- package/src/services/documents.repository.ts +219 -0
- package/src/services/documents.service.ts +191 -0
- package/src/services/embedding.service.ts +404 -0
- package/src/services/extraction.service.ts +300 -0
- package/src/services/extractors/code.extractor.ts +451 -0
- package/src/services/extractors/index.ts +9 -0
- package/src/services/extractors/markdown.extractor.ts +461 -0
- package/src/services/extractors/pdf.extractor.ts +315 -0
- package/src/services/extractors/text.extractor.ts +118 -0
- package/src/services/extractors/url.extractor.ts +243 -0
- package/src/services/index.ts +235 -0
- package/src/services/ingestion.service.ts +177 -0
- package/src/services/llm/anthropic.ts +400 -0
- package/src/services/llm/base.ts +460 -0
- package/src/services/llm/contradiction-detector.service.ts +526 -0
- package/src/services/llm/heuristics.ts +148 -0
- package/src/services/llm/index.ts +309 -0
- package/src/services/llm/memory-classifier.service.ts +383 -0
- package/src/services/llm/memory-extension-detector.service.ts +523 -0
- package/src/services/llm/mock.ts +470 -0
- package/src/services/llm/openai.ts +398 -0
- package/src/services/llm/prompts.ts +438 -0
- package/src/services/llm/types.ts +373 -0
- package/src/services/memory.repository.ts +1769 -0
- package/src/services/memory.service.ts +1338 -0
- package/src/services/memory.types.ts +234 -0
- package/src/services/persistence/index.ts +295 -0
- package/src/services/pipeline.service.ts +509 -0
- package/src/services/profile.repository.ts +436 -0
- package/src/services/profile.service.ts +560 -0
- package/src/services/profile.types.ts +270 -0
- package/src/services/relationships/detector.ts +1128 -0
- package/src/services/relationships/index.ts +268 -0
- package/src/services/relationships/memory-integration.ts +459 -0
- package/src/services/relationships/strategies.ts +132 -0
- package/src/services/relationships/types.ts +370 -0
- package/src/services/search.service.ts +761 -0
- package/src/services/search.types.ts +220 -0
- package/src/services/secrets.service.ts +384 -0
- package/src/services/vectorstore/base.ts +327 -0
- package/src/services/vectorstore/index.ts +444 -0
- package/src/services/vectorstore/memory.ts +286 -0
- package/src/services/vectorstore/migration.ts +295 -0
- package/src/services/vectorstore/mock.ts +403 -0
- package/src/services/vectorstore/pgvector.ts +695 -0
- package/src/services/vectorstore/types.ts +247 -0
- package/src/startup.ts +389 -0
- package/src/types/api.types.ts +193 -0
- package/src/types/document.types.ts +103 -0
- package/src/types/index.ts +241 -0
- package/src/types/profile.base.ts +133 -0
- package/src/utils/errors.ts +447 -0
- package/src/utils/id.ts +15 -0
- package/src/utils/index.ts +101 -0
- package/src/utils/logger.ts +313 -0
- package/src/utils/sanitization.ts +501 -0
- package/src/utils/secret-validation.ts +273 -0
- package/src/utils/synonyms.ts +188 -0
- package/src/utils/validation.ts +581 -0
- package/src/workers/chunking.worker.ts +242 -0
- package/src/workers/embedding.worker.ts +358 -0
- package/src/workers/extraction.worker.ts +346 -0
- package/src/workers/indexing.worker.ts +505 -0
- package/tsconfig.json +38 -0
|
@@ -0,0 +1,761 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Search Service for Supermemory Clone
|
|
3
|
+
*
|
|
4
|
+
* Provides hybrid search combining vector similarity and memory graph search
|
|
5
|
+
* with reranking and query rewriting capabilities.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import { EmbeddingService, createEmbeddingService } from './embedding.service.js'
|
|
9
|
+
import {
|
|
10
|
+
SearchOptions,
|
|
11
|
+
SearchResult,
|
|
12
|
+
SearchResponse,
|
|
13
|
+
Memory,
|
|
14
|
+
Chunk,
|
|
15
|
+
MetadataFilter,
|
|
16
|
+
DEFAULT_SEARCH_OPTIONS,
|
|
17
|
+
RerankOptions,
|
|
18
|
+
QueryRewriteOptions,
|
|
19
|
+
} from './search.types.js'
|
|
20
|
+
import {
|
|
21
|
+
BaseVectorStore,
|
|
22
|
+
createVectorStore,
|
|
23
|
+
createPgVectorStore,
|
|
24
|
+
createInMemoryVectorStore,
|
|
25
|
+
getDefaultVectorStoreConfig,
|
|
26
|
+
VectorStoreConfig,
|
|
27
|
+
VectorSearchResult as VectorStoreSearchResult,
|
|
28
|
+
} from './vectorstore/index.js'
|
|
29
|
+
import { expandQuery } from '../utils/synonyms.js'
|
|
30
|
+
import { getDatabaseUrl, isPostgresUrl } from '../db/client.js'
|
|
31
|
+
import { getPostgresDatabase } from '../db/postgres.js'
|
|
32
|
+
import { documents } from '../db/schema/documents.schema.js'
|
|
33
|
+
import { and, desc, eq, sql } from 'drizzle-orm'
|
|
34
|
+
|
|
35
|
+
/**
|
|
36
|
+
* Internal result type for compatibility with search types
|
|
37
|
+
*/
|
|
38
|
+
interface InternalVectorSearchResult {
|
|
39
|
+
entry: {
|
|
40
|
+
id: string
|
|
41
|
+
embedding: number[]
|
|
42
|
+
metadata: Record<string, unknown>
|
|
43
|
+
}
|
|
44
|
+
similarity: number
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
/**
|
|
48
|
+
* In-memory memory graph for development/testing
|
|
49
|
+
*/
|
|
50
|
+
class InMemoryMemoryGraph {
|
|
51
|
+
private memories: Map<string, Memory> = new Map()
|
|
52
|
+
private chunksByMemoryId: Map<string, Chunk[]> = new Map()
|
|
53
|
+
|
|
54
|
+
addMemory(memory: Memory): void {
|
|
55
|
+
this.memories.set(memory.id, memory)
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
addChunk(chunk: Chunk): void {
|
|
59
|
+
const chunks = this.chunksByMemoryId.get(chunk.memoryId) || []
|
|
60
|
+
chunks.push(chunk)
|
|
61
|
+
this.chunksByMemoryId.set(chunk.memoryId, chunks)
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
getMemory(id: string): Memory | undefined {
|
|
65
|
+
return this.memories.get(id)
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
getChunks(memoryId: string): Chunk[] {
|
|
69
|
+
return this.chunksByMemoryId.get(memoryId) || []
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
getAllMemories(): Memory[] {
|
|
73
|
+
return Array.from(this.memories.values())
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
removeMemory(memoryId: string): { removed: boolean; chunkCount: number } {
|
|
77
|
+
const removed = this.memories.delete(memoryId)
|
|
78
|
+
const chunkCount = this.getChunks(memoryId).length
|
|
79
|
+
this.chunksByMemoryId.delete(memoryId)
|
|
80
|
+
return { removed, chunkCount }
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
searchByTag(containerTag: string): Memory[] {
|
|
84
|
+
return Array.from(this.memories.values()).filter((m) => m.containerTag === containerTag)
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
searchByContent(query: string): Memory[] {
|
|
88
|
+
const lowerQuery = query.toLowerCase()
|
|
89
|
+
const tokens = lowerQuery.split(/\s+/).filter((t) => t.length > 0)
|
|
90
|
+
|
|
91
|
+
return Array.from(this.memories.values())
|
|
92
|
+
.map((memory) => {
|
|
93
|
+
const content = memory.content.toLowerCase()
|
|
94
|
+
const matchCount = tokens.filter((token) => content.includes(token)).length
|
|
95
|
+
const score = matchCount / tokens.length
|
|
96
|
+
return { memory, score }
|
|
97
|
+
})
|
|
98
|
+
.filter(({ score }) => score > 0.3)
|
|
99
|
+
.sort((a, b) => b.score - a.score)
|
|
100
|
+
.map(({ memory }) => memory)
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
clear(): void {
|
|
104
|
+
this.memories.clear()
|
|
105
|
+
this.chunksByMemoryId.clear()
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
/**
|
|
110
|
+
* Search Service class
|
|
111
|
+
*/
|
|
112
|
+
export class SearchService {
|
|
113
|
+
private readonly embeddingService: EmbeddingService
|
|
114
|
+
private vectorStore: BaseVectorStore
|
|
115
|
+
private readonly memoryGraph: InMemoryMemoryGraph
|
|
116
|
+
private initialized = false
|
|
117
|
+
|
|
118
|
+
constructor(options?: { embeddingService?: EmbeddingService; vectorStore?: BaseVectorStore }) {
|
|
119
|
+
this.embeddingService = options?.embeddingService || createEmbeddingService()
|
|
120
|
+
// Default to pgvector-backed store for runtime usage
|
|
121
|
+
const connectionString = getDatabaseUrl()
|
|
122
|
+
const defaultConfig = getDefaultVectorStoreConfig()
|
|
123
|
+
let vectorStore = options?.vectorStore
|
|
124
|
+
if (!vectorStore) {
|
|
125
|
+
const useInMemory = process.env.NODE_ENV === 'test' || !isPostgresUrl(connectionString)
|
|
126
|
+
if (!useInMemory) {
|
|
127
|
+
vectorStore = createPgVectorStore(connectionString, this.embeddingService.getDimensions(), {
|
|
128
|
+
metric: defaultConfig.metric,
|
|
129
|
+
hnswConfig: defaultConfig.hnswConfig,
|
|
130
|
+
defaultNamespace: defaultConfig.defaultNamespace,
|
|
131
|
+
indexType: defaultConfig.indexType,
|
|
132
|
+
})
|
|
133
|
+
} else {
|
|
134
|
+
vectorStore = createInMemoryVectorStore(this.embeddingService.getDimensions(), {
|
|
135
|
+
metric: defaultConfig.metric,
|
|
136
|
+
hnswConfig: defaultConfig.hnswConfig,
|
|
137
|
+
defaultNamespace: defaultConfig.defaultNamespace,
|
|
138
|
+
indexType: defaultConfig.indexType,
|
|
139
|
+
})
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
this.vectorStore = vectorStore
|
|
143
|
+
this.memoryGraph = new InMemoryMemoryGraph()
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
/**
|
|
147
|
+
* Initialize the search service (initializes vector store)
|
|
148
|
+
*/
|
|
149
|
+
async initialize(): Promise<void> {
|
|
150
|
+
if (this.initialized) return
|
|
151
|
+
await this.vectorStore.initialize()
|
|
152
|
+
this.initialized = true
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
/**
|
|
156
|
+
* Set a custom vector store (useful for testing or changing providers)
|
|
157
|
+
*/
|
|
158
|
+
setVectorStore(vectorStore: BaseVectorStore): void {
|
|
159
|
+
this.vectorStore = vectorStore
|
|
160
|
+
this.initialized = false
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
/**
|
|
164
|
+
* Get the vector store
|
|
165
|
+
*/
|
|
166
|
+
getVectorStore(): BaseVectorStore {
|
|
167
|
+
return this.vectorStore
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
/**
|
|
171
|
+
* Get the embedding service
|
|
172
|
+
*/
|
|
173
|
+
getEmbeddingService(): EmbeddingService {
|
|
174
|
+
return this.embeddingService
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
/**
|
|
178
|
+
* Index a memory and its chunks
|
|
179
|
+
*/
|
|
180
|
+
async indexMemory(memory: Memory, chunks?: Chunk[]): Promise<void> {
|
|
181
|
+
// Ensure vector store is initialized
|
|
182
|
+
if (!this.initialized) {
|
|
183
|
+
await this.initialize()
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
// Generate embedding for memory if not provided
|
|
187
|
+
if (!memory.embedding) {
|
|
188
|
+
memory.embedding = await this.embeddingService.generateEmbedding(memory.content)
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
// Add to memory graph
|
|
192
|
+
this.memoryGraph.addMemory(memory)
|
|
193
|
+
|
|
194
|
+
// Add to vector store
|
|
195
|
+
await this.vectorStore.add(
|
|
196
|
+
{
|
|
197
|
+
id: memory.id,
|
|
198
|
+
embedding: memory.embedding,
|
|
199
|
+
metadata: {
|
|
200
|
+
type: 'memory',
|
|
201
|
+
containerTag: memory.containerTag,
|
|
202
|
+
...memory.metadata,
|
|
203
|
+
},
|
|
204
|
+
},
|
|
205
|
+
{ overwrite: true }
|
|
206
|
+
)
|
|
207
|
+
|
|
208
|
+
// Index chunks if provided
|
|
209
|
+
if (chunks && chunks.length > 0) {
|
|
210
|
+
const chunkTexts = chunks.map((c) => c.content)
|
|
211
|
+
const chunkEmbeddings = await this.embeddingService.batchEmbed(chunkTexts)
|
|
212
|
+
|
|
213
|
+
const vectorEntries = []
|
|
214
|
+
for (let i = 0; i < chunks.length; i++) {
|
|
215
|
+
const chunk = chunks[i]
|
|
216
|
+
const embedding = chunkEmbeddings[i]
|
|
217
|
+
if (!chunk || !embedding) continue
|
|
218
|
+
|
|
219
|
+
chunk.embedding = embedding
|
|
220
|
+
|
|
221
|
+
this.memoryGraph.addChunk(chunk)
|
|
222
|
+
vectorEntries.push({
|
|
223
|
+
id: chunk.id,
|
|
224
|
+
embedding: embedding,
|
|
225
|
+
metadata: {
|
|
226
|
+
type: 'chunk',
|
|
227
|
+
memoryId: chunk.memoryId,
|
|
228
|
+
chunkIndex: chunk.chunkIndex,
|
|
229
|
+
...chunk.metadata,
|
|
230
|
+
},
|
|
231
|
+
})
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
// Batch add chunks to vector store
|
|
235
|
+
if (vectorEntries.length > 0) {
|
|
236
|
+
await this.vectorStore.addBatch(vectorEntries, { overwrite: true })
|
|
237
|
+
}
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
/**
|
|
242
|
+
* Perform hybrid search combining vector and memory graph search
|
|
243
|
+
*/
|
|
244
|
+
async hybridSearch(query: string, containerTag?: string, options?: Partial<SearchOptions>): Promise<SearchResponse> {
|
|
245
|
+
const startTime = Date.now()
|
|
246
|
+
const searchOptions: SearchOptions = { ...DEFAULT_SEARCH_OPTIONS, ...options }
|
|
247
|
+
|
|
248
|
+
let searchQuery = query
|
|
249
|
+
let originalQuery: string | undefined
|
|
250
|
+
|
|
251
|
+
// Rewrite query if enabled
|
|
252
|
+
if (searchOptions.rewriteQuery) {
|
|
253
|
+
originalQuery = query
|
|
254
|
+
searchQuery = await this.rewriteQuery(query)
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
let results: SearchResult[] = []
|
|
258
|
+
|
|
259
|
+
switch (searchOptions.searchMode) {
|
|
260
|
+
case 'vector':
|
|
261
|
+
results = await this.vectorSearchInternal(searchQuery, searchOptions)
|
|
262
|
+
break
|
|
263
|
+
case 'memory':
|
|
264
|
+
results = this.memorySearchInternal(searchQuery, containerTag, searchOptions)
|
|
265
|
+
break
|
|
266
|
+
case 'fulltext':
|
|
267
|
+
results = await this.fullTextSearchInternal(searchQuery, containerTag, searchOptions)
|
|
268
|
+
break
|
|
269
|
+
case 'hybrid':
|
|
270
|
+
default:
|
|
271
|
+
results = await this.combineSearchResults(searchQuery, containerTag, searchOptions)
|
|
272
|
+
break
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
// Apply container tag filter
|
|
276
|
+
if (containerTag) {
|
|
277
|
+
results = results.filter((r) => {
|
|
278
|
+
if (r.memory) return r.memory.containerTag === containerTag
|
|
279
|
+
if (r.chunk) {
|
|
280
|
+
const memory = this.memoryGraph.getMemory(r.chunk.memoryId)
|
|
281
|
+
return memory?.containerTag === containerTag
|
|
282
|
+
}
|
|
283
|
+
return false
|
|
284
|
+
})
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
// Apply metadata filters
|
|
288
|
+
if (searchOptions.filters && searchOptions.filters.length > 0) {
|
|
289
|
+
results = this.applyFilters(results, searchOptions.filters)
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
// Apply date range filter
|
|
293
|
+
if (searchOptions.dateRange) {
|
|
294
|
+
results = this.applyDateFilter(results, searchOptions.dateRange)
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
// Rerank if enabled
|
|
298
|
+
if (searchOptions.rerank && results.length > 1) {
|
|
299
|
+
results = await this.rerank(results, searchQuery)
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
// Sort by similarity (or rerank score if available)
|
|
303
|
+
results.sort((a, b) => {
|
|
304
|
+
const scoreA = a.rerankScore ?? a.similarity
|
|
305
|
+
const scoreB = b.rerankScore ?? b.similarity
|
|
306
|
+
return scoreB - scoreA
|
|
307
|
+
})
|
|
308
|
+
|
|
309
|
+
// Apply limit
|
|
310
|
+
const totalCount = results.length
|
|
311
|
+
results = results.slice(0, searchOptions.limit)
|
|
312
|
+
|
|
313
|
+
// Remove embeddings if not requested
|
|
314
|
+
if (!searchOptions.includeEmbeddings) {
|
|
315
|
+
results = results.map((r) => ({
|
|
316
|
+
...r,
|
|
317
|
+
memory: r.memory ? { ...r.memory, embedding: undefined } : undefined,
|
|
318
|
+
chunk: r.chunk ? { ...r.chunk, embedding: undefined } : undefined,
|
|
319
|
+
}))
|
|
320
|
+
}
|
|
321
|
+
|
|
322
|
+
const searchTimeMs = Date.now() - startTime
|
|
323
|
+
|
|
324
|
+
return {
|
|
325
|
+
results,
|
|
326
|
+
totalCount,
|
|
327
|
+
query: searchQuery,
|
|
328
|
+
originalQuery,
|
|
329
|
+
searchTimeMs,
|
|
330
|
+
options: searchOptions,
|
|
331
|
+
}
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
/**
|
|
335
|
+
* Pure vector similarity search
|
|
336
|
+
*/
|
|
337
|
+
async vectorSearch(embedding: number[], limit: number = 10, threshold: number = 0.7): Promise<SearchResult[]> {
|
|
338
|
+
// Ensure vector store is initialized
|
|
339
|
+
if (!this.initialized) {
|
|
340
|
+
await this.initialize()
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
const vectorResults = await this.vectorStore.search(embedding, {
|
|
344
|
+
limit,
|
|
345
|
+
threshold,
|
|
346
|
+
includeMetadata: true,
|
|
347
|
+
})
|
|
348
|
+
|
|
349
|
+
return vectorResults.map((vr) => this.vectorStoreResultToSearchResult(vr))
|
|
350
|
+
}
|
|
351
|
+
|
|
352
|
+
/**
|
|
353
|
+
* Search through memory graph
|
|
354
|
+
*/
|
|
355
|
+
memorySearch(query: string, containerTag?: string): SearchResult[] {
|
|
356
|
+
return this.memorySearchInternal(query, containerTag, DEFAULT_SEARCH_OPTIONS)
|
|
357
|
+
}
|
|
358
|
+
|
|
359
|
+
/**
|
|
360
|
+
* Rerank results using cross-encoder scoring
|
|
361
|
+
* Note: In production, this would use a proper cross-encoder model
|
|
362
|
+
*/
|
|
363
|
+
async rerank(results: SearchResult[], query: string, options?: RerankOptions): Promise<SearchResult[]> {
|
|
364
|
+
const topK = options?.topK || results.length
|
|
365
|
+
const toRerank = results.slice(0, topK)
|
|
366
|
+
|
|
367
|
+
// Simple reranking based on query term overlap and position
|
|
368
|
+
// In production, use a proper cross-encoder model
|
|
369
|
+
const queryTokens = new Set(
|
|
370
|
+
query
|
|
371
|
+
.toLowerCase()
|
|
372
|
+
.split(/\s+/)
|
|
373
|
+
.filter((t) => t.length > 1)
|
|
374
|
+
)
|
|
375
|
+
|
|
376
|
+
const reranked = toRerank.map((result) => {
|
|
377
|
+
const content = (result.memory?.content || result.chunk?.content || '').toLowerCase()
|
|
378
|
+
const contentTokens = content.split(/\s+/)
|
|
379
|
+
|
|
380
|
+
const score = result.similarity
|
|
381
|
+
let matchCount = 0
|
|
382
|
+
let positionBoost = 0
|
|
383
|
+
|
|
384
|
+
for (let i = 0; i < contentTokens.length; i++) {
|
|
385
|
+
const token = contentTokens[i]
|
|
386
|
+
if (token && queryTokens.has(token)) {
|
|
387
|
+
matchCount++
|
|
388
|
+
// Boost for matches early in content
|
|
389
|
+
positionBoost += 1 / (1 + i * 0.01)
|
|
390
|
+
}
|
|
391
|
+
}
|
|
392
|
+
|
|
393
|
+
// Combine original similarity with reranking factors
|
|
394
|
+
const termOverlap = matchCount / queryTokens.size
|
|
395
|
+
const rerankScore = score * 0.5 + termOverlap * 0.3 + positionBoost * 0.2
|
|
396
|
+
|
|
397
|
+
return {
|
|
398
|
+
...result,
|
|
399
|
+
rerankScore: Math.min(1, rerankScore),
|
|
400
|
+
}
|
|
401
|
+
})
|
|
402
|
+
|
|
403
|
+
// Sort by rerank score
|
|
404
|
+
reranked.sort((a, b) => (b.rerankScore || 0) - (a.rerankScore || 0))
|
|
405
|
+
|
|
406
|
+
// Combine with remaining results
|
|
407
|
+
return [...reranked, ...results.slice(topK)]
|
|
408
|
+
}
|
|
409
|
+
|
|
410
|
+
/**
|
|
411
|
+
* Expand/rewrite query for better recall.
|
|
412
|
+
*
|
|
413
|
+
* Uses the shared synonyms utility for consistent expansion across services.
|
|
414
|
+
* In production, this would be augmented with an LLM for intelligent query rewriting.
|
|
415
|
+
*/
|
|
416
|
+
async rewriteQuery(query: string, options?: QueryRewriteOptions): Promise<string> {
|
|
417
|
+
return expandQuery(query, {
|
|
418
|
+
includeSynonyms: options?.includeSynonyms !== false,
|
|
419
|
+
expandAbbreviations: options?.expandAbbreviations !== false,
|
|
420
|
+
maxSynonymsPerTerm: 2,
|
|
421
|
+
})
|
|
422
|
+
}
|
|
423
|
+
|
|
424
|
+
/**
|
|
425
|
+
* Clear all indexed data
|
|
426
|
+
*/
|
|
427
|
+
async clear(): Promise<void> {
|
|
428
|
+
await this.vectorStore.clear()
|
|
429
|
+
this.memoryGraph.clear()
|
|
430
|
+
}
|
|
431
|
+
|
|
432
|
+
/**
|
|
433
|
+
* Get statistics about indexed data
|
|
434
|
+
*/
|
|
435
|
+
async getStats(): Promise<{ vectorCount: number; memoryCount: number }> {
|
|
436
|
+
const stats = await this.vectorStore.getStats()
|
|
437
|
+
return {
|
|
438
|
+
vectorCount: stats.totalVectors,
|
|
439
|
+
memoryCount: this.memoryGraph.getAllMemories().length,
|
|
440
|
+
}
|
|
441
|
+
}
|
|
442
|
+
|
|
443
|
+
/**
|
|
444
|
+
* Remove a memory from the index
|
|
445
|
+
*/
|
|
446
|
+
async removeMemory(memoryId: string): Promise<{ removed: boolean; vectorsDeleted: number }> {
|
|
447
|
+
// Remove from vector store
|
|
448
|
+
const deleted = await this.vectorStore.delete({ ids: [memoryId] })
|
|
449
|
+
|
|
450
|
+
// Also remove any chunks associated with this memory
|
|
451
|
+
const chunks = this.memoryGraph.getChunks(memoryId)
|
|
452
|
+
let chunkDeleted = 0
|
|
453
|
+
if (chunks.length > 0) {
|
|
454
|
+
const chunkIds = chunks.map((c) => c.id)
|
|
455
|
+
chunkDeleted = await this.vectorStore.delete({ ids: chunkIds })
|
|
456
|
+
}
|
|
457
|
+
|
|
458
|
+
const removedFromGraph = this.memoryGraph.removeMemory(memoryId)
|
|
459
|
+
|
|
460
|
+
return {
|
|
461
|
+
removed: deleted > 0 || chunkDeleted > 0 || removedFromGraph.removed || removedFromGraph.chunkCount > 0,
|
|
462
|
+
vectorsDeleted: deleted + chunkDeleted,
|
|
463
|
+
}
|
|
464
|
+
}
|
|
465
|
+
|
|
466
|
+
async close(): Promise<void> {
|
|
467
|
+
await this.vectorStore.close()
|
|
468
|
+
this.memoryGraph.clear()
|
|
469
|
+
this.initialized = false
|
|
470
|
+
}
|
|
471
|
+
|
|
472
|
+
// Private methods
|
|
473
|
+
|
|
474
|
+
private async vectorSearchInternal(query: string, options: SearchOptions): Promise<SearchResult[]> {
|
|
475
|
+
// Ensure vector store is initialized
|
|
476
|
+
if (!this.initialized) {
|
|
477
|
+
await this.initialize()
|
|
478
|
+
}
|
|
479
|
+
|
|
480
|
+
const queryEmbedding = await this.embeddingService.generateEmbedding(query)
|
|
481
|
+
const vectorResults = await this.vectorStore.search(queryEmbedding, {
|
|
482
|
+
limit: options.limit * 2, // Get more to allow for filtering
|
|
483
|
+
threshold: options.threshold,
|
|
484
|
+
includeMetadata: true,
|
|
485
|
+
})
|
|
486
|
+
|
|
487
|
+
return vectorResults.map((vr) => this.vectorStoreResultToSearchResult(vr))
|
|
488
|
+
}
|
|
489
|
+
|
|
490
|
+
private memorySearchInternal(
|
|
491
|
+
query: string,
|
|
492
|
+
containerTag: string | undefined,
|
|
493
|
+
options: SearchOptions
|
|
494
|
+
): SearchResult[] {
|
|
495
|
+
let memories: Memory[]
|
|
496
|
+
|
|
497
|
+
if (containerTag) {
|
|
498
|
+
memories = this.memoryGraph.searchByTag(containerTag)
|
|
499
|
+
// Further filter by content
|
|
500
|
+
const lowerQuery = query.toLowerCase()
|
|
501
|
+
memories = memories.filter((m) => m.content.toLowerCase().includes(lowerQuery))
|
|
502
|
+
} else {
|
|
503
|
+
memories = this.memoryGraph.searchByContent(query)
|
|
504
|
+
}
|
|
505
|
+
|
|
506
|
+
return memories.slice(0, options.limit).map((memory, index) => ({
|
|
507
|
+
id: memory.id,
|
|
508
|
+
memory,
|
|
509
|
+
similarity: 1 - index * 0.05, // Decay based on position
|
|
510
|
+
metadata: memory.metadata || {},
|
|
511
|
+
updatedAt: memory.updatedAt,
|
|
512
|
+
source: 'memory' as const,
|
|
513
|
+
}))
|
|
514
|
+
}
|
|
515
|
+
|
|
516
|
+
private async combineSearchResults(
|
|
517
|
+
query: string,
|
|
518
|
+
containerTag: string | undefined,
|
|
519
|
+
options: SearchOptions
|
|
520
|
+
): Promise<SearchResult[]> {
|
|
521
|
+
// Run vector + full-text searches.
|
|
522
|
+
const [vectorResults, fullTextResults] = await Promise.all([
|
|
523
|
+
this.vectorSearchInternal(query, options),
|
|
524
|
+
this.fullTextSearchInternal(query, containerTag, options),
|
|
525
|
+
])
|
|
526
|
+
|
|
527
|
+
// Merge and deduplicate
|
|
528
|
+
const resultMap = new Map<string, SearchResult>()
|
|
529
|
+
|
|
530
|
+
// Add vector results first (higher priority for similarity)
|
|
531
|
+
for (const result of vectorResults) {
|
|
532
|
+
resultMap.set(result.id, result)
|
|
533
|
+
}
|
|
534
|
+
|
|
535
|
+
// Add full-text results, merging if exists
|
|
536
|
+
for (const result of fullTextResults) {
|
|
537
|
+
const existing = resultMap.get(result.id)
|
|
538
|
+
if (existing) {
|
|
539
|
+
// Combine scores - keep higher similarity, mark as hybrid
|
|
540
|
+
resultMap.set(result.id, {
|
|
541
|
+
...existing,
|
|
542
|
+
similarity: Math.max(existing.similarity, result.similarity),
|
|
543
|
+
source: 'hybrid',
|
|
544
|
+
})
|
|
545
|
+
} else {
|
|
546
|
+
resultMap.set(result.id, result)
|
|
547
|
+
}
|
|
548
|
+
}
|
|
549
|
+
|
|
550
|
+
return Array.from(resultMap.values())
|
|
551
|
+
}
|
|
552
|
+
|
|
553
|
+
private async fullTextSearchInternal(
|
|
554
|
+
query: string,
|
|
555
|
+
containerTag: string | undefined,
|
|
556
|
+
options: SearchOptions
|
|
557
|
+
): Promise<SearchResult[]> {
|
|
558
|
+
const connectionString = getDatabaseUrl()
|
|
559
|
+
|
|
560
|
+
// Keep test behavior deterministic by using in-memory fallback.
|
|
561
|
+
if (process.env.NODE_ENV === 'test' || !isPostgresUrl(connectionString)) {
|
|
562
|
+
return this.memorySearchInternal(query, containerTag, options)
|
|
563
|
+
}
|
|
564
|
+
|
|
565
|
+
const db = getPostgresDatabase(connectionString)
|
|
566
|
+
const rankExpr = sql<number>`
|
|
567
|
+
ts_rank_cd(
|
|
568
|
+
to_tsvector('english', ${documents.content}),
|
|
569
|
+
plainto_tsquery('english', ${query})
|
|
570
|
+
)
|
|
571
|
+
`
|
|
572
|
+
|
|
573
|
+
const textMatch = sql<boolean>`
|
|
574
|
+
to_tsvector('english', ${documents.content})
|
|
575
|
+
@@
|
|
576
|
+
plainto_tsquery('english', ${query})
|
|
577
|
+
`
|
|
578
|
+
const whereClause = containerTag ? and(textMatch, eq(documents.containerTag, containerTag)) : textMatch
|
|
579
|
+
|
|
580
|
+
const rows = await db
|
|
581
|
+
.select({
|
|
582
|
+
id: documents.id,
|
|
583
|
+
content: documents.content,
|
|
584
|
+
containerTag: documents.containerTag,
|
|
585
|
+
metadata: documents.metadata,
|
|
586
|
+
createdAt: documents.createdAt,
|
|
587
|
+
updatedAt: documents.updatedAt,
|
|
588
|
+
rank: rankExpr,
|
|
589
|
+
})
|
|
590
|
+
.from(documents)
|
|
591
|
+
.where(whereClause)
|
|
592
|
+
.orderBy(desc(rankExpr), desc(documents.updatedAt))
|
|
593
|
+
.limit(options.limit * 2)
|
|
594
|
+
|
|
595
|
+
return rows.map((row) => {
|
|
596
|
+
const metadata = row.metadata && typeof row.metadata === 'object' ? (row.metadata as Record<string, unknown>) : {}
|
|
597
|
+
const createdAt = row.createdAt instanceof Date ? row.createdAt : new Date(row.createdAt)
|
|
598
|
+
const updatedAt = row.updatedAt instanceof Date ? row.updatedAt : new Date(row.updatedAt)
|
|
599
|
+
const score = Math.max(0, Math.min(1, Number(row.rank ?? 0)))
|
|
600
|
+
|
|
601
|
+
return {
|
|
602
|
+
id: row.id,
|
|
603
|
+
memory: {
|
|
604
|
+
id: row.id,
|
|
605
|
+
content: row.content,
|
|
606
|
+
type: 'fact',
|
|
607
|
+
relationships: [],
|
|
608
|
+
isLatest: true,
|
|
609
|
+
containerTag: row.containerTag,
|
|
610
|
+
metadata,
|
|
611
|
+
createdAt,
|
|
612
|
+
updatedAt,
|
|
613
|
+
confidence: 1,
|
|
614
|
+
sourceId: row.id,
|
|
615
|
+
},
|
|
616
|
+
similarity: score,
|
|
617
|
+
metadata,
|
|
618
|
+
updatedAt,
|
|
619
|
+
source: 'fulltext',
|
|
620
|
+
}
|
|
621
|
+
})
|
|
622
|
+
}
|
|
623
|
+
|
|
624
|
+
private vectorResultToSearchResult(vr: InternalVectorSearchResult): SearchResult {
|
|
625
|
+
const isChunk = vr.entry.metadata.type === 'chunk'
|
|
626
|
+
const memoryId = isChunk ? (vr.entry.metadata.memoryId as string) : vr.entry.id
|
|
627
|
+
|
|
628
|
+
const memory = this.memoryGraph.getMemory(memoryId)
|
|
629
|
+
const chunk = isChunk ? this.memoryGraph.getChunks(memoryId).find((c) => c.id === vr.entry.id) : undefined
|
|
630
|
+
|
|
631
|
+
return {
|
|
632
|
+
id: vr.entry.id,
|
|
633
|
+
memory,
|
|
634
|
+
chunk,
|
|
635
|
+
similarity: vr.similarity,
|
|
636
|
+
metadata: { ...vr.entry.metadata, ...(memory?.metadata || {}) },
|
|
637
|
+
updatedAt: memory?.updatedAt || new Date(),
|
|
638
|
+
source: 'vector',
|
|
639
|
+
}
|
|
640
|
+
}
|
|
641
|
+
|
|
642
|
+
/**
|
|
643
|
+
* Convert VectorStoreSearchResult to SearchResult
|
|
644
|
+
*/
|
|
645
|
+
private vectorStoreResultToSearchResult(vr: VectorStoreSearchResult): SearchResult {
|
|
646
|
+
const isChunk = vr.metadata.type === 'chunk'
|
|
647
|
+
const memoryId = isChunk ? (vr.metadata.memoryId as string) : vr.id
|
|
648
|
+
|
|
649
|
+
const memory = this.memoryGraph.getMemory(memoryId)
|
|
650
|
+
const chunk = isChunk ? this.memoryGraph.getChunks(memoryId).find((c) => c.id === vr.id) : undefined
|
|
651
|
+
|
|
652
|
+
return {
|
|
653
|
+
id: vr.id,
|
|
654
|
+
memory,
|
|
655
|
+
chunk,
|
|
656
|
+
similarity: vr.score,
|
|
657
|
+
metadata: { ...vr.metadata, ...(memory?.metadata || {}) },
|
|
658
|
+
updatedAt: memory?.updatedAt || new Date(),
|
|
659
|
+
source: 'vector',
|
|
660
|
+
}
|
|
661
|
+
}
|
|
662
|
+
|
|
663
|
+
private applyFilters(results: SearchResult[], filters: MetadataFilter[]): SearchResult[] {
|
|
664
|
+
return results.filter((result) => {
|
|
665
|
+
const metadata = result.metadata
|
|
666
|
+
return filters.every((filter) => {
|
|
667
|
+
const value = metadata[filter.key]
|
|
668
|
+
if (value === undefined) return false
|
|
669
|
+
|
|
670
|
+
const op = filter.operator || 'eq'
|
|
671
|
+
switch (op) {
|
|
672
|
+
case 'eq':
|
|
673
|
+
return value === filter.value
|
|
674
|
+
case 'ne':
|
|
675
|
+
return value !== filter.value
|
|
676
|
+
case 'gt':
|
|
677
|
+
return typeof value === 'number' && typeof filter.value === 'number' && value > filter.value
|
|
678
|
+
case 'gte':
|
|
679
|
+
return typeof value === 'number' && typeof filter.value === 'number' && value >= filter.value
|
|
680
|
+
case 'lt':
|
|
681
|
+
return typeof value === 'number' && typeof filter.value === 'number' && value < filter.value
|
|
682
|
+
case 'lte':
|
|
683
|
+
return typeof value === 'number' && typeof filter.value === 'number' && value <= filter.value
|
|
684
|
+
case 'contains':
|
|
685
|
+
return typeof value === 'string' && typeof filter.value === 'string' && value.includes(filter.value)
|
|
686
|
+
case 'startsWith':
|
|
687
|
+
return typeof value === 'string' && typeof filter.value === 'string' && value.startsWith(filter.value)
|
|
688
|
+
default:
|
|
689
|
+
return false
|
|
690
|
+
}
|
|
691
|
+
})
|
|
692
|
+
})
|
|
693
|
+
}
|
|
694
|
+
|
|
695
|
+
private applyDateFilter(results: SearchResult[], dateRange: { from?: Date; to?: Date }): SearchResult[] {
|
|
696
|
+
return results.filter((result) => {
|
|
697
|
+
const date = result.updatedAt
|
|
698
|
+
if (dateRange.from && date < dateRange.from) return false
|
|
699
|
+
if (dateRange.to && date > dateRange.to) return false
|
|
700
|
+
return true
|
|
701
|
+
})
|
|
702
|
+
}
|
|
703
|
+
}
|
|
704
|
+
|
|
705
|
+
/**
|
|
706
|
+
* Create a new search service instance
|
|
707
|
+
*/
|
|
708
|
+
export function createSearchService(options?: {
|
|
709
|
+
embeddingService?: EmbeddingService
|
|
710
|
+
vectorStore?: BaseVectorStore
|
|
711
|
+
}): SearchService {
|
|
712
|
+
return new SearchService(options)
|
|
713
|
+
}
|
|
714
|
+
|
|
715
|
+
/**
|
|
716
|
+
* Create a search service with a specific vector store provider
|
|
717
|
+
*/
|
|
718
|
+
export async function createSearchServiceWithVectorStore(
|
|
719
|
+
vectorStoreConfig: VectorStoreConfig,
|
|
720
|
+
embeddingService?: EmbeddingService
|
|
721
|
+
): Promise<SearchService> {
|
|
722
|
+
const vectorStore = await createVectorStore(vectorStoreConfig)
|
|
723
|
+
await vectorStore.initialize()
|
|
724
|
+
|
|
725
|
+
const service = new SearchService({
|
|
726
|
+
embeddingService,
|
|
727
|
+
vectorStore,
|
|
728
|
+
})
|
|
729
|
+
|
|
730
|
+
// Mark as initialized since vector store is already initialized
|
|
731
|
+
await service.initialize()
|
|
732
|
+
|
|
733
|
+
return service
|
|
734
|
+
}
|
|
735
|
+
|
|
736
|
+
// Lazy singleton instance
|
|
737
|
+
let _searchService: SearchService | null = null
|
|
738
|
+
|
|
739
|
+
/**
|
|
740
|
+
* Get the singleton search service instance (created lazily)
|
|
741
|
+
*/
|
|
742
|
+
export function getSearchService(): SearchService {
|
|
743
|
+
if (!_searchService) {
|
|
744
|
+
_searchService = new SearchService()
|
|
745
|
+
}
|
|
746
|
+
return _searchService
|
|
747
|
+
}
|
|
748
|
+
|
|
749
|
+
/**
|
|
750
|
+
* Reset the singleton instance (useful for testing)
|
|
751
|
+
*/
|
|
752
|
+
export function resetSearchService(): void {
|
|
753
|
+
_searchService = null
|
|
754
|
+
}
|
|
755
|
+
|
|
756
|
+
// Export default instance (lazy getter for backwards compatibility)
|
|
757
|
+
export const searchService = new Proxy({} as SearchService, {
|
|
758
|
+
get(_, prop) {
|
|
759
|
+
return getSearchService()[prop as keyof SearchService]
|
|
760
|
+
},
|
|
761
|
+
})
|