@twelvehart/supermemory-runtime 1.0.0-next.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +57 -0
- package/README.md +374 -0
- package/dist/index.js +189 -0
- package/dist/mcp/index.js +1132 -0
- package/docker-compose.prod.yml +91 -0
- package/docker-compose.yml +358 -0
- package/drizzle/0000_dapper_the_professor.sql +159 -0
- package/drizzle/0001_api_keys.sql +51 -0
- package/drizzle/meta/0000_snapshot.json +1532 -0
- package/drizzle/meta/_journal.json +13 -0
- package/drizzle.config.ts +20 -0
- package/package.json +114 -0
- package/scripts/add-extraction-job.ts +122 -0
- package/scripts/benchmark-pgvector.ts +122 -0
- package/scripts/bootstrap.sh +209 -0
- package/scripts/check-runtime-pack.ts +111 -0
- package/scripts/claude-mcp-config.ts +336 -0
- package/scripts/docker-entrypoint.sh +183 -0
- package/scripts/doctor.ts +377 -0
- package/scripts/init-db.sql +33 -0
- package/scripts/install.sh +1110 -0
- package/scripts/mcp-setup.ts +271 -0
- package/scripts/migrations/001_create_pgvector_extension.sql +31 -0
- package/scripts/migrations/002_create_memory_embeddings_table.sql +75 -0
- package/scripts/migrations/003_create_hnsw_index.sql +94 -0
- package/scripts/migrations/004_create_memory_embeddings_standalone.sql +70 -0
- package/scripts/migrations/005_create_chunks_table.sql +95 -0
- package/scripts/migrations/006_create_processing_queue.sql +45 -0
- package/scripts/migrations/generate_test_data.sql +42 -0
- package/scripts/migrations/phase1_comprehensive_test.sql +204 -0
- package/scripts/migrations/run_migrations.sh +286 -0
- package/scripts/migrations/test_hnsw_index.sql +255 -0
- package/scripts/pre-commit-secrets +282 -0
- package/scripts/run-extraction-worker.ts +46 -0
- package/scripts/run-phase1-tests.sh +291 -0
- package/scripts/setup.ts +222 -0
- package/scripts/smoke-install.sh +12 -0
- package/scripts/test-health-endpoint.sh +328 -0
- package/src/api/index.ts +2 -0
- package/src/api/middleware/auth.ts +80 -0
- package/src/api/middleware/csrf.ts +308 -0
- package/src/api/middleware/errorHandler.ts +166 -0
- package/src/api/middleware/rateLimit.ts +360 -0
- package/src/api/middleware/validation.ts +514 -0
- package/src/api/routes/documents.ts +286 -0
- package/src/api/routes/profiles.ts +237 -0
- package/src/api/routes/search.ts +71 -0
- package/src/api/stores/index.ts +58 -0
- package/src/config/bootstrap-env.ts +3 -0
- package/src/config/env.ts +71 -0
- package/src/config/feature-flags.ts +25 -0
- package/src/config/index.ts +140 -0
- package/src/config/secrets.config.ts +291 -0
- package/src/db/client.ts +92 -0
- package/src/db/index.ts +73 -0
- package/src/db/postgres.ts +72 -0
- package/src/db/schema/chunks.schema.ts +31 -0
- package/src/db/schema/containers.schema.ts +46 -0
- package/src/db/schema/documents.schema.ts +49 -0
- package/src/db/schema/embeddings.schema.ts +32 -0
- package/src/db/schema/index.ts +11 -0
- package/src/db/schema/memories.schema.ts +72 -0
- package/src/db/schema/profiles.schema.ts +34 -0
- package/src/db/schema/queue.schema.ts +59 -0
- package/src/db/schema/relationships.schema.ts +42 -0
- package/src/db/schema.ts +223 -0
- package/src/db/worker-connection.ts +47 -0
- package/src/index.ts +235 -0
- package/src/mcp/CLAUDE.md +1 -0
- package/src/mcp/index.ts +1380 -0
- package/src/mcp/legacyState.ts +22 -0
- package/src/mcp/rateLimit.ts +358 -0
- package/src/mcp/resources.ts +309 -0
- package/src/mcp/results.ts +104 -0
- package/src/mcp/tools.ts +401 -0
- package/src/queues/config.ts +119 -0
- package/src/queues/index.ts +289 -0
- package/src/sdk/client.ts +225 -0
- package/src/sdk/errors.ts +266 -0
- package/src/sdk/http.ts +560 -0
- package/src/sdk/index.ts +244 -0
- package/src/sdk/resources/base.ts +65 -0
- package/src/sdk/resources/connections.ts +204 -0
- package/src/sdk/resources/documents.ts +163 -0
- package/src/sdk/resources/index.ts +10 -0
- package/src/sdk/resources/memories.ts +150 -0
- package/src/sdk/resources/search.ts +60 -0
- package/src/sdk/resources/settings.ts +36 -0
- package/src/sdk/types.ts +674 -0
- package/src/services/chunking/index.ts +451 -0
- package/src/services/chunking.service.ts +650 -0
- package/src/services/csrf.service.ts +252 -0
- package/src/services/documents.repository.ts +219 -0
- package/src/services/documents.service.ts +191 -0
- package/src/services/embedding.service.ts +404 -0
- package/src/services/extraction.service.ts +300 -0
- package/src/services/extractors/code.extractor.ts +451 -0
- package/src/services/extractors/index.ts +9 -0
- package/src/services/extractors/markdown.extractor.ts +461 -0
- package/src/services/extractors/pdf.extractor.ts +315 -0
- package/src/services/extractors/text.extractor.ts +118 -0
- package/src/services/extractors/url.extractor.ts +243 -0
- package/src/services/index.ts +235 -0
- package/src/services/ingestion.service.ts +177 -0
- package/src/services/llm/anthropic.ts +400 -0
- package/src/services/llm/base.ts +460 -0
- package/src/services/llm/contradiction-detector.service.ts +526 -0
- package/src/services/llm/heuristics.ts +148 -0
- package/src/services/llm/index.ts +309 -0
- package/src/services/llm/memory-classifier.service.ts +383 -0
- package/src/services/llm/memory-extension-detector.service.ts +523 -0
- package/src/services/llm/mock.ts +470 -0
- package/src/services/llm/openai.ts +398 -0
- package/src/services/llm/prompts.ts +438 -0
- package/src/services/llm/types.ts +373 -0
- package/src/services/memory.repository.ts +1769 -0
- package/src/services/memory.service.ts +1338 -0
- package/src/services/memory.types.ts +234 -0
- package/src/services/persistence/index.ts +295 -0
- package/src/services/pipeline.service.ts +509 -0
- package/src/services/profile.repository.ts +436 -0
- package/src/services/profile.service.ts +560 -0
- package/src/services/profile.types.ts +270 -0
- package/src/services/relationships/detector.ts +1128 -0
- package/src/services/relationships/index.ts +268 -0
- package/src/services/relationships/memory-integration.ts +459 -0
- package/src/services/relationships/strategies.ts +132 -0
- package/src/services/relationships/types.ts +370 -0
- package/src/services/search.service.ts +761 -0
- package/src/services/search.types.ts +220 -0
- package/src/services/secrets.service.ts +384 -0
- package/src/services/vectorstore/base.ts +327 -0
- package/src/services/vectorstore/index.ts +444 -0
- package/src/services/vectorstore/memory.ts +286 -0
- package/src/services/vectorstore/migration.ts +295 -0
- package/src/services/vectorstore/mock.ts +403 -0
- package/src/services/vectorstore/pgvector.ts +695 -0
- package/src/services/vectorstore/types.ts +247 -0
- package/src/startup.ts +389 -0
- package/src/types/api.types.ts +193 -0
- package/src/types/document.types.ts +103 -0
- package/src/types/index.ts +241 -0
- package/src/types/profile.base.ts +133 -0
- package/src/utils/errors.ts +447 -0
- package/src/utils/id.ts +15 -0
- package/src/utils/index.ts +101 -0
- package/src/utils/logger.ts +313 -0
- package/src/utils/sanitization.ts +501 -0
- package/src/utils/secret-validation.ts +273 -0
- package/src/utils/synonyms.ts +188 -0
- package/src/utils/validation.ts +581 -0
- package/src/workers/chunking.worker.ts +242 -0
- package/src/workers/embedding.worker.ts +358 -0
- package/src/workers/extraction.worker.ts +346 -0
- package/src/workers/indexing.worker.ts +505 -0
- package/tsconfig.json +38 -0
|
@@ -0,0 +1,695 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* PostgreSQL pgvector Vector Store
|
|
3
|
+
*
|
|
4
|
+
* Production-ready vector store implementation using PostgreSQL with pgvector extension.
|
|
5
|
+
* Supports HNSW indexing for fast approximate nearest neighbor search.
|
|
6
|
+
*
|
|
7
|
+
* Features:
|
|
8
|
+
* - HNSW index support for O(log n) search performance
|
|
9
|
+
* - Connection pooling with production-ready settings
|
|
10
|
+
* - Batch operations with transaction support
|
|
11
|
+
* - Metadata filtering and threshold-based search
|
|
12
|
+
* - Automatic pgvector extension enablement
|
|
13
|
+
*/
|
|
14
|
+
|
|
15
|
+
import {
|
|
16
|
+
VectorEntry,
|
|
17
|
+
VectorSearchResult,
|
|
18
|
+
SearchOptions,
|
|
19
|
+
AddOptions,
|
|
20
|
+
DeleteOptions,
|
|
21
|
+
VectorStoreConfig,
|
|
22
|
+
VectorStoreStats,
|
|
23
|
+
BatchResult,
|
|
24
|
+
MetadataFilter,
|
|
25
|
+
} from './types.js'
|
|
26
|
+
import { BaseVectorStore, validateVector } from './base.js'
|
|
27
|
+
import pkg from 'pg'
|
|
28
|
+
const { Pool } = pkg
|
|
29
|
+
import type { Pool as PgPool } from 'pg'
|
|
30
|
+
import { DatabaseError, ConflictError, ErrorCode } from '../../utils/errors.js'
|
|
31
|
+
import {
|
|
32
|
+
getPostgresDatabase,
|
|
33
|
+
closePostgresDatabase,
|
|
34
|
+
getPostgresPoolConfig,
|
|
35
|
+
type PostgresDatabaseInstance,
|
|
36
|
+
} from '../../db/postgres.js'
|
|
37
|
+
|
|
38
|
+
/**
|
|
39
|
+
* pgvector-specific configuration
|
|
40
|
+
*/
|
|
41
|
+
export interface PgVectorStoreConfig extends VectorStoreConfig {
|
|
42
|
+
/** PostgreSQL connection string */
|
|
43
|
+
connectionString: string
|
|
44
|
+
/** Table name for vector storage (default: 'vector_embeddings') */
|
|
45
|
+
tableName?: string
|
|
46
|
+
/** Batch size for bulk operations (default: 100) */
|
|
47
|
+
batchSize?: number
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
/**
|
|
51
|
+
* Internal entry structure for PostgreSQL storage
|
|
52
|
+
*/
|
|
53
|
+
interface PgVectorEntry {
|
|
54
|
+
id: string
|
|
55
|
+
embedding: string // pgvector format: '[1,2,3]'
|
|
56
|
+
metadata: unknown // Already parsed by pg library from JSONB
|
|
57
|
+
namespace: string
|
|
58
|
+
created_at: Date
|
|
59
|
+
updated_at: Date
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
/**
|
|
63
|
+
* PostgreSQL pgvector Vector Store implementation
|
|
64
|
+
*/
|
|
65
|
+
export class PgVectorStore extends BaseVectorStore {
|
|
66
|
+
private db: PostgresDatabaseInstance | null = null
|
|
67
|
+
private pool: PgPool | null = null
|
|
68
|
+
private readonly connectionString: string
|
|
69
|
+
private readonly tableName: string
|
|
70
|
+
private readonly batchSize: number
|
|
71
|
+
private initialized = false
|
|
72
|
+
|
|
73
|
+
constructor(config: PgVectorStoreConfig) {
|
|
74
|
+
super({
|
|
75
|
+
...config,
|
|
76
|
+
provider: 'pgvector',
|
|
77
|
+
indexType: config.hnswConfig ? 'hnsw' : 'flat',
|
|
78
|
+
})
|
|
79
|
+
|
|
80
|
+
this.connectionString = config.connectionString
|
|
81
|
+
this.tableName = config.tableName ?? 'vector_embeddings'
|
|
82
|
+
this.batchSize = config.batchSize ?? 100
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
/**
|
|
86
|
+
* Initialize the pgvector store
|
|
87
|
+
* Creates table and HNSW index if they don't exist
|
|
88
|
+
*/
|
|
89
|
+
async initialize(): Promise<void> {
|
|
90
|
+
if (this.initialized) return
|
|
91
|
+
|
|
92
|
+
// Get database connection
|
|
93
|
+
this.db = getPostgresDatabase(this.connectionString)
|
|
94
|
+
|
|
95
|
+
// Create connection pool for direct queries
|
|
96
|
+
this.pool = new Pool({
|
|
97
|
+
connectionString: this.connectionString,
|
|
98
|
+
...getPostgresPoolConfig(),
|
|
99
|
+
})
|
|
100
|
+
|
|
101
|
+
// Create table if it doesn't exist
|
|
102
|
+
await this.createTableIfNotExists()
|
|
103
|
+
|
|
104
|
+
// Create HNSW index if configured
|
|
105
|
+
if (this.config.hnswConfig) {
|
|
106
|
+
await this.createHNSWIndex()
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
this.initialized = true
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
/**
|
|
113
|
+
* Create the vector embeddings table
|
|
114
|
+
*/
|
|
115
|
+
private async createTableIfNotExists(): Promise<void> {
|
|
116
|
+
if (!this.pool) {
|
|
117
|
+
throw new DatabaseError('Database not initialized', 'connection', {
|
|
118
|
+
code: ErrorCode.DATABASE_NOT_INITIALIZED,
|
|
119
|
+
table: this.tableName,
|
|
120
|
+
})
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
const createTableSQL = `
|
|
124
|
+
CREATE TABLE IF NOT EXISTS ${this.tableName} (
|
|
125
|
+
id VARCHAR(255) PRIMARY KEY,
|
|
126
|
+
embedding vector(${this.config.dimensions}) NOT NULL,
|
|
127
|
+
metadata JSONB NOT NULL DEFAULT '{}',
|
|
128
|
+
namespace VARCHAR(255) NOT NULL DEFAULT 'default',
|
|
129
|
+
created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW(),
|
|
130
|
+
updated_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW()
|
|
131
|
+
)
|
|
132
|
+
`
|
|
133
|
+
|
|
134
|
+
await this.pool.query(createTableSQL)
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
/**
|
|
138
|
+
* Create HNSW index for fast vector search
|
|
139
|
+
*/
|
|
140
|
+
private async createHNSWIndex(): Promise<void> {
|
|
141
|
+
if (!this.pool) {
|
|
142
|
+
throw new DatabaseError('Database not initialized', 'connection', {
|
|
143
|
+
code: ErrorCode.DATABASE_NOT_INITIALIZED,
|
|
144
|
+
table: this.tableName,
|
|
145
|
+
})
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
const hnswConfig = this.config.hnswConfig ?? { M: 16, efConstruction: 64 }
|
|
149
|
+
const indexName = `${this.tableName}_hnsw_idx`
|
|
150
|
+
|
|
151
|
+
// Use metric-specific index operator class for similarity search.
|
|
152
|
+
const metric = this.config.metric ?? 'cosine'
|
|
153
|
+
let operator: string
|
|
154
|
+
switch (metric) {
|
|
155
|
+
case 'cosine':
|
|
156
|
+
operator = 'vector_cosine_ops'
|
|
157
|
+
break
|
|
158
|
+
case 'euclidean':
|
|
159
|
+
operator = 'vector_l2_ops'
|
|
160
|
+
break
|
|
161
|
+
case 'dot_product':
|
|
162
|
+
default:
|
|
163
|
+
operator = 'vector_ip_ops' // inner product for dot_product
|
|
164
|
+
break
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
const createIndexSQL = `
|
|
168
|
+
CREATE INDEX IF NOT EXISTS ${indexName}
|
|
169
|
+
ON ${this.tableName}
|
|
170
|
+
USING hnsw (embedding ${operator})
|
|
171
|
+
WITH (m = ${hnswConfig.M}, ef_construction = ${hnswConfig.efConstruction})
|
|
172
|
+
`
|
|
173
|
+
|
|
174
|
+
await this.pool.query(createIndexSQL)
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
/**
|
|
178
|
+
* Add a single vector entry
|
|
179
|
+
*/
|
|
180
|
+
async add(entry: VectorEntry, options?: AddOptions): Promise<void> {
|
|
181
|
+
this.validateEntry(entry)
|
|
182
|
+
if (!this.pool) {
|
|
183
|
+
throw new DatabaseError('Database not initialized', 'connection', {
|
|
184
|
+
code: ErrorCode.DATABASE_NOT_INITIALIZED,
|
|
185
|
+
table: this.tableName,
|
|
186
|
+
})
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
const namespace = options?.namespace ?? this.config.defaultNamespace ?? 'default'
|
|
190
|
+
|
|
191
|
+
// Check if entry exists
|
|
192
|
+
if (!options?.overwrite) {
|
|
193
|
+
const exists = await this.exists(entry.id)
|
|
194
|
+
if (exists) {
|
|
195
|
+
throw new ConflictError(`Entry with ID ${entry.id} already exists`, 'duplicate', {
|
|
196
|
+
entryId: entry.id,
|
|
197
|
+
table: this.tableName,
|
|
198
|
+
})
|
|
199
|
+
}
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
// Convert embedding to pgvector format
|
|
203
|
+
const embeddingStr = `[${entry.embedding.join(',').replace(/\s+/g, '')}]`
|
|
204
|
+
|
|
205
|
+
const insertSQL = `
|
|
206
|
+
INSERT INTO ${this.tableName} (id, embedding, metadata, namespace, created_at, updated_at)
|
|
207
|
+
VALUES ($1, $2::vector, $3::jsonb, $4, $5, $6)
|
|
208
|
+
ON CONFLICT (id) DO UPDATE SET
|
|
209
|
+
embedding = EXCLUDED.embedding,
|
|
210
|
+
metadata = EXCLUDED.metadata,
|
|
211
|
+
namespace = EXCLUDED.namespace,
|
|
212
|
+
updated_at = EXCLUDED.updated_at
|
|
213
|
+
`
|
|
214
|
+
|
|
215
|
+
await this.pool.query(insertSQL, [
|
|
216
|
+
entry.id,
|
|
217
|
+
embeddingStr,
|
|
218
|
+
JSON.stringify(entry.metadata),
|
|
219
|
+
namespace,
|
|
220
|
+
entry.createdAt ?? new Date(),
|
|
221
|
+
new Date(),
|
|
222
|
+
])
|
|
223
|
+
|
|
224
|
+
this.emit('add', { id: entry.id })
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
/**
|
|
228
|
+
* Add multiple vector entries in batches
|
|
229
|
+
* Uses transactions for consistency
|
|
230
|
+
*/
|
|
231
|
+
async addBatch(entries: VectorEntry[], options?: AddOptions): Promise<BatchResult> {
|
|
232
|
+
if (!this.pool) {
|
|
233
|
+
throw new DatabaseError('Database not initialized', 'connection', {
|
|
234
|
+
code: ErrorCode.DATABASE_NOT_INITIALIZED,
|
|
235
|
+
table: this.tableName,
|
|
236
|
+
})
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
const result: BatchResult = {
|
|
240
|
+
successful: 0,
|
|
241
|
+
failed: 0,
|
|
242
|
+
errors: [],
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
// Process in batches of batchSize
|
|
246
|
+
for (let i = 0; i < entries.length; i += this.batchSize) {
|
|
247
|
+
const batch = entries.slice(i, i + this.batchSize)
|
|
248
|
+
const client = await this.pool.connect()
|
|
249
|
+
|
|
250
|
+
try {
|
|
251
|
+
await client.query('BEGIN')
|
|
252
|
+
|
|
253
|
+
for (const entry of batch) {
|
|
254
|
+
try {
|
|
255
|
+
await this.add(entry, options)
|
|
256
|
+
result.successful++
|
|
257
|
+
} catch (error) {
|
|
258
|
+
result.failed++
|
|
259
|
+
result.errors?.push({
|
|
260
|
+
id: entry.id,
|
|
261
|
+
error: error instanceof Error ? error.message : String(error),
|
|
262
|
+
})
|
|
263
|
+
}
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
await client.query('COMMIT')
|
|
267
|
+
} catch (error) {
|
|
268
|
+
await client.query('ROLLBACK')
|
|
269
|
+
// If transaction fails, mark all batch entries as failed
|
|
270
|
+
for (const entry of batch) {
|
|
271
|
+
result.failed++
|
|
272
|
+
result.errors?.push({
|
|
273
|
+
id: entry.id,
|
|
274
|
+
error: `Transaction failed: ${error instanceof Error ? error.message : String(error)}`,
|
|
275
|
+
})
|
|
276
|
+
}
|
|
277
|
+
} finally {
|
|
278
|
+
client.release()
|
|
279
|
+
}
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
return result
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
/**
|
|
286
|
+
* Update an existing vector entry
|
|
287
|
+
*/
|
|
288
|
+
async update(id: string, updates: Partial<VectorEntry>): Promise<boolean> {
|
|
289
|
+
if (!this.pool) {
|
|
290
|
+
throw new DatabaseError('Database not initialized', 'connection', {
|
|
291
|
+
code: ErrorCode.DATABASE_NOT_INITIALIZED,
|
|
292
|
+
table: this.tableName,
|
|
293
|
+
})
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
// Validate embedding if provided
|
|
297
|
+
if (updates.embedding) {
|
|
298
|
+
validateVector(updates.embedding, this.config.dimensions)
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
const existing = await this.get(id)
|
|
302
|
+
if (!existing) {
|
|
303
|
+
return false
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
const updateFields: string[] = []
|
|
307
|
+
const values: unknown[] = []
|
|
308
|
+
let paramIndex = 1
|
|
309
|
+
|
|
310
|
+
if (updates.embedding) {
|
|
311
|
+
updateFields.push(`embedding = $${paramIndex++}::vector`)
|
|
312
|
+
values.push(`[${updates.embedding.join(',')}]`)
|
|
313
|
+
}
|
|
314
|
+
|
|
315
|
+
if (updates.metadata) {
|
|
316
|
+
updateFields.push(`metadata = $${paramIndex++}::jsonb`)
|
|
317
|
+
values.push(JSON.stringify(updates.metadata))
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
updateFields.push(`updated_at = $${paramIndex++}`)
|
|
321
|
+
values.push(new Date())
|
|
322
|
+
|
|
323
|
+
if (updateFields.length === 1) {
|
|
324
|
+
// Only updated_at changed, nothing to do
|
|
325
|
+
return true
|
|
326
|
+
}
|
|
327
|
+
|
|
328
|
+
// Add id as last parameter
|
|
329
|
+
values.push(id)
|
|
330
|
+
|
|
331
|
+
const updateSQL = `
|
|
332
|
+
UPDATE ${this.tableName}
|
|
333
|
+
SET ${updateFields.join(', ')}
|
|
334
|
+
WHERE id = $${paramIndex}
|
|
335
|
+
`
|
|
336
|
+
|
|
337
|
+
await this.pool.query(updateSQL, values)
|
|
338
|
+
this.emit('update', { id })
|
|
339
|
+
return true
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
/**
|
|
343
|
+
* Delete vector entries
|
|
344
|
+
*/
|
|
345
|
+
async delete(options: DeleteOptions): Promise<number> {
|
|
346
|
+
if (!this.pool) {
|
|
347
|
+
throw new DatabaseError('Database not initialized', 'connection', {
|
|
348
|
+
code: ErrorCode.DATABASE_NOT_INITIALIZED,
|
|
349
|
+
table: this.tableName,
|
|
350
|
+
})
|
|
351
|
+
}
|
|
352
|
+
|
|
353
|
+
let deletedCount = 0
|
|
354
|
+
|
|
355
|
+
if (options.deleteAll) {
|
|
356
|
+
const namespace = options.namespace ?? this.config.defaultNamespace ?? 'default'
|
|
357
|
+
const deleteSQL = `
|
|
358
|
+
DELETE FROM ${this.tableName}
|
|
359
|
+
WHERE namespace = $1
|
|
360
|
+
`
|
|
361
|
+
const result = await this.pool.query(deleteSQL, [namespace])
|
|
362
|
+
deletedCount = result.rowCount ?? 0
|
|
363
|
+
} else if (options.ids && options.ids.length > 0) {
|
|
364
|
+
const deleteSQL = `
|
|
365
|
+
DELETE FROM ${this.tableName}
|
|
366
|
+
WHERE id = ANY($1::varchar[])
|
|
367
|
+
`
|
|
368
|
+
const result = await this.pool.query(deleteSQL, [options.ids])
|
|
369
|
+
deletedCount = result.rowCount ?? 0
|
|
370
|
+
} else if (options.filter) {
|
|
371
|
+
// Build WHERE clause from metadata filter
|
|
372
|
+
const whereClause = this.buildMetadataFilterSQL(options.filter)
|
|
373
|
+
const deleteSQL = `
|
|
374
|
+
DELETE FROM ${this.tableName}
|
|
375
|
+
WHERE ${whereClause}
|
|
376
|
+
`
|
|
377
|
+
const result = await this.pool.query(deleteSQL)
|
|
378
|
+
deletedCount = result.rowCount ?? 0
|
|
379
|
+
}
|
|
380
|
+
|
|
381
|
+
if (deletedCount > 0) {
|
|
382
|
+
this.emit('delete', { count: deletedCount })
|
|
383
|
+
}
|
|
384
|
+
|
|
385
|
+
return deletedCount
|
|
386
|
+
}
|
|
387
|
+
|
|
388
|
+
/**
|
|
389
|
+
* Get a vector entry by ID
|
|
390
|
+
*/
|
|
391
|
+
async get(id: string): Promise<VectorEntry | null> {
|
|
392
|
+
if (!this.pool) {
|
|
393
|
+
throw new DatabaseError('Database not initialized', 'connection', {
|
|
394
|
+
code: ErrorCode.DATABASE_NOT_INITIALIZED,
|
|
395
|
+
table: this.tableName,
|
|
396
|
+
})
|
|
397
|
+
}
|
|
398
|
+
|
|
399
|
+
const selectSQL = `
|
|
400
|
+
SELECT id, embedding::text, metadata, created_at, updated_at
|
|
401
|
+
FROM ${this.tableName}
|
|
402
|
+
WHERE id = $1
|
|
403
|
+
`
|
|
404
|
+
|
|
405
|
+
const result = await this.pool.query(selectSQL, [id])
|
|
406
|
+
const row = result.rows[0] as PgVectorEntry | undefined
|
|
407
|
+
|
|
408
|
+
if (!row) return null
|
|
409
|
+
|
|
410
|
+
return this.rowToVectorEntry(row)
|
|
411
|
+
}
|
|
412
|
+
|
|
413
|
+
/**
|
|
414
|
+
* Check if a vector entry exists
|
|
415
|
+
*/
|
|
416
|
+
async exists(id: string): Promise<boolean> {
|
|
417
|
+
if (!this.pool) {
|
|
418
|
+
throw new DatabaseError('Database not initialized', 'connection', {
|
|
419
|
+
code: ErrorCode.DATABASE_NOT_INITIALIZED,
|
|
420
|
+
table: this.tableName,
|
|
421
|
+
})
|
|
422
|
+
}
|
|
423
|
+
|
|
424
|
+
const selectSQL = `
|
|
425
|
+
SELECT 1 FROM ${this.tableName} WHERE id = $1
|
|
426
|
+
`
|
|
427
|
+
|
|
428
|
+
const result = await this.pool.query(selectSQL, [id])
|
|
429
|
+
return (result.rows.length ?? 0) > 0
|
|
430
|
+
}
|
|
431
|
+
|
|
432
|
+
/**
|
|
433
|
+
* Search for similar vectors using HNSW or linear search
|
|
434
|
+
*/
|
|
435
|
+
async search(query: number[], options?: SearchOptions): Promise<VectorSearchResult[]> {
|
|
436
|
+
validateVector(query, this.config.dimensions)
|
|
437
|
+
if (!this.pool) {
|
|
438
|
+
throw new DatabaseError('Database not initialized', 'connection', {
|
|
439
|
+
code: ErrorCode.DATABASE_NOT_INITIALIZED,
|
|
440
|
+
table: this.tableName,
|
|
441
|
+
})
|
|
442
|
+
}
|
|
443
|
+
|
|
444
|
+
const opts = this.mergeOptions(options)
|
|
445
|
+
const queryVector = `[${query.join(',')}]`
|
|
446
|
+
|
|
447
|
+
// Build distance/similarity operator based on metric.
|
|
448
|
+
const metric = this.config.metric ?? 'cosine'
|
|
449
|
+
let distanceOp: string
|
|
450
|
+
switch (metric) {
|
|
451
|
+
case 'cosine':
|
|
452
|
+
distanceOp = '<=>'
|
|
453
|
+
break
|
|
454
|
+
case 'euclidean':
|
|
455
|
+
distanceOp = '<->'
|
|
456
|
+
break
|
|
457
|
+
case 'dot_product':
|
|
458
|
+
default:
|
|
459
|
+
distanceOp = '<#>' // inner product
|
|
460
|
+
break
|
|
461
|
+
}
|
|
462
|
+
|
|
463
|
+
// Build WHERE clause for metadata filters
|
|
464
|
+
let whereClause = 'TRUE'
|
|
465
|
+
if (opts.filters && opts.filters.length > 0) {
|
|
466
|
+
const filterConditions = opts.filters.map((filter) => this.buildMetadataFilterSQL(filter))
|
|
467
|
+
whereClause = filterConditions.join(' AND ')
|
|
468
|
+
}
|
|
469
|
+
|
|
470
|
+
// Build SELECT fields based on options
|
|
471
|
+
const selectFields = ['id']
|
|
472
|
+
if (opts.includeVectors) {
|
|
473
|
+
selectFields.push('embedding::text as embedding')
|
|
474
|
+
}
|
|
475
|
+
if (opts.includeMetadata) {
|
|
476
|
+
selectFields.push('metadata')
|
|
477
|
+
}
|
|
478
|
+
|
|
479
|
+
// For cosine similarity, convert distance to similarity (1 - distance)
|
|
480
|
+
const scoreExpression =
|
|
481
|
+
metric === 'cosine' ? `1 - (embedding ${distanceOp} $1::vector)` : `embedding ${distanceOp} $1::vector`
|
|
482
|
+
|
|
483
|
+
const searchSQL = `
|
|
484
|
+
SELECT
|
|
485
|
+
${selectFields.join(', ')},
|
|
486
|
+
${scoreExpression} as score
|
|
487
|
+
FROM ${this.tableName}
|
|
488
|
+
WHERE ${whereClause}
|
|
489
|
+
AND ${scoreExpression} >= $2
|
|
490
|
+
ORDER BY embedding ${distanceOp} $1::vector
|
|
491
|
+
LIMIT $3
|
|
492
|
+
`
|
|
493
|
+
|
|
494
|
+
const result = await this.pool.query(searchSQL, [queryVector, opts.threshold, opts.limit])
|
|
495
|
+
|
|
496
|
+
this.emit('search', {
|
|
497
|
+
resultsCount: result.rows.length,
|
|
498
|
+
})
|
|
499
|
+
|
|
500
|
+
const rows = result.rows as Array<{
|
|
501
|
+
id: string
|
|
502
|
+
score: number
|
|
503
|
+
embedding?: string
|
|
504
|
+
metadata?: Record<string, unknown>
|
|
505
|
+
}>
|
|
506
|
+
|
|
507
|
+
return rows.map((row) => ({
|
|
508
|
+
id: row.id,
|
|
509
|
+
score: row.score,
|
|
510
|
+
embedding: opts.includeVectors ? this.parseEmbedding(row.embedding ?? '[]') : undefined,
|
|
511
|
+
metadata: opts.includeMetadata ? (row.metadata ?? {}) : {},
|
|
512
|
+
}))
|
|
513
|
+
}
|
|
514
|
+
|
|
515
|
+
/**
|
|
516
|
+
* Get statistics about the vector store
|
|
517
|
+
*/
|
|
518
|
+
async getStats(): Promise<VectorStoreStats> {
|
|
519
|
+
if (!this.pool) {
|
|
520
|
+
throw new DatabaseError('Database not initialized', 'connection', {
|
|
521
|
+
code: ErrorCode.DATABASE_NOT_INITIALIZED,
|
|
522
|
+
table: this.tableName,
|
|
523
|
+
})
|
|
524
|
+
}
|
|
525
|
+
|
|
526
|
+
const countSQL = `
|
|
527
|
+
SELECT COUNT(*) as total, COUNT(DISTINCT namespace) as namespace_count
|
|
528
|
+
FROM ${this.tableName}
|
|
529
|
+
`
|
|
530
|
+
|
|
531
|
+
const namespacesSQL = `
|
|
532
|
+
SELECT DISTINCT namespace FROM ${this.tableName}
|
|
533
|
+
`
|
|
534
|
+
|
|
535
|
+
const [countResult, namespacesResult] = await Promise.all([
|
|
536
|
+
this.pool.query(countSQL),
|
|
537
|
+
this.pool.query(namespacesSQL),
|
|
538
|
+
])
|
|
539
|
+
|
|
540
|
+
const stats = countResult.rows[0] as { total: string; namespace_count: string }
|
|
541
|
+
const namespaces = (namespacesResult.rows as Array<{ namespace: string }>).map((row) => row.namespace)
|
|
542
|
+
|
|
543
|
+
return {
|
|
544
|
+
totalVectors: parseInt(stats.total, 10),
|
|
545
|
+
dimensions: this.config.dimensions,
|
|
546
|
+
indexType: this.config.indexType ?? 'flat',
|
|
547
|
+
metric: this.config.metric ?? 'cosine',
|
|
548
|
+
indexBuilt: this.config.indexType === 'hnsw',
|
|
549
|
+
namespaces,
|
|
550
|
+
}
|
|
551
|
+
}
|
|
552
|
+
|
|
553
|
+
/**
|
|
554
|
+
* Clear all vectors from the store
|
|
555
|
+
*/
|
|
556
|
+
async clear(): Promise<void> {
|
|
557
|
+
if (!this.pool) {
|
|
558
|
+
throw new DatabaseError('Database not initialized', 'connection', {
|
|
559
|
+
code: ErrorCode.DATABASE_NOT_INITIALIZED,
|
|
560
|
+
table: this.tableName,
|
|
561
|
+
})
|
|
562
|
+
}
|
|
563
|
+
|
|
564
|
+
const deleteSQL = `TRUNCATE TABLE ${this.tableName}`
|
|
565
|
+
await this.pool.query(deleteSQL)
|
|
566
|
+
this.emit('delete', { deleteAll: true })
|
|
567
|
+
}
|
|
568
|
+
|
|
569
|
+
/**
|
|
570
|
+
* Close the vector store and release resources
|
|
571
|
+
*/
|
|
572
|
+
async close(): Promise<void> {
|
|
573
|
+
if (this.pool) {
|
|
574
|
+
await this.pool.end()
|
|
575
|
+
this.pool = null
|
|
576
|
+
}
|
|
577
|
+
await closePostgresDatabase()
|
|
578
|
+
this.db = null
|
|
579
|
+
this.initialized = false
|
|
580
|
+
}
|
|
581
|
+
|
|
582
|
+
/**
|
|
583
|
+
* Get all entries (for migration/export)
|
|
584
|
+
*/
|
|
585
|
+
async getAllEntries(): Promise<VectorEntry[]> {
|
|
586
|
+
if (!this.pool) {
|
|
587
|
+
throw new DatabaseError('Database not initialized', 'connection', {
|
|
588
|
+
code: ErrorCode.DATABASE_NOT_INITIALIZED,
|
|
589
|
+
table: this.tableName,
|
|
590
|
+
})
|
|
591
|
+
}
|
|
592
|
+
|
|
593
|
+
const selectSQL = `
|
|
594
|
+
SELECT id, embedding::text, metadata, created_at, updated_at
|
|
595
|
+
FROM ${this.tableName}
|
|
596
|
+
`
|
|
597
|
+
|
|
598
|
+
const result = await this.pool.query(selectSQL)
|
|
599
|
+
return result.rows.map((row) => this.rowToVectorEntry(row as PgVectorEntry))
|
|
600
|
+
}
|
|
601
|
+
|
|
602
|
+
/**
|
|
603
|
+
* Get the number of entries
|
|
604
|
+
*/
|
|
605
|
+
async size(): Promise<number> {
|
|
606
|
+
if (!this.pool) {
|
|
607
|
+
throw new DatabaseError('Database not initialized', 'connection', {
|
|
608
|
+
code: ErrorCode.DATABASE_NOT_INITIALIZED,
|
|
609
|
+
table: this.tableName,
|
|
610
|
+
})
|
|
611
|
+
}
|
|
612
|
+
|
|
613
|
+
const countSQL = `SELECT COUNT(*) as total FROM ${this.tableName}`
|
|
614
|
+
const result = await this.pool.query(countSQL)
|
|
615
|
+
const row = result.rows[0] as { total: string }
|
|
616
|
+
return parseInt(row.total, 10)
|
|
617
|
+
}
|
|
618
|
+
|
|
619
|
+
/**
|
|
620
|
+
* Convert database row to VectorEntry
|
|
621
|
+
*/
|
|
622
|
+
private rowToVectorEntry(row: PgVectorEntry): VectorEntry {
|
|
623
|
+
return {
|
|
624
|
+
id: row.id,
|
|
625
|
+
embedding: this.parseEmbedding(row.embedding),
|
|
626
|
+
metadata: (row.metadata ?? {}) as Record<string, unknown>, // JSONB is already parsed by pg
|
|
627
|
+
createdAt: row.created_at,
|
|
628
|
+
updatedAt: row.updated_at,
|
|
629
|
+
}
|
|
630
|
+
}
|
|
631
|
+
|
|
632
|
+
/**
|
|
633
|
+
* Parse pgvector embedding string to number array
|
|
634
|
+
*/
|
|
635
|
+
private parseEmbedding(embeddingStr: string): number[] {
|
|
636
|
+
// Remove brackets and split by comma
|
|
637
|
+
return embeddingStr
|
|
638
|
+
.replace(/^\[|\]$/g, '')
|
|
639
|
+
.split(',')
|
|
640
|
+
.map((v) => parseFloat(v))
|
|
641
|
+
}
|
|
642
|
+
|
|
643
|
+
/**
|
|
644
|
+
* Build SQL WHERE clause from metadata filter
|
|
645
|
+
*/
|
|
646
|
+
private buildMetadataFilterSQL(filter: MetadataFilter): string {
|
|
647
|
+
const key = filter.key
|
|
648
|
+
const value = filter.value
|
|
649
|
+
|
|
650
|
+
switch (filter.operator) {
|
|
651
|
+
case 'eq':
|
|
652
|
+
return `metadata->>'${key}' = '${value}'`
|
|
653
|
+
case 'ne':
|
|
654
|
+
return `metadata->>'${key}' != '${value}'`
|
|
655
|
+
case 'gt':
|
|
656
|
+
return `(metadata->>'${key}')::numeric > ${value}`
|
|
657
|
+
case 'gte':
|
|
658
|
+
return `(metadata->>'${key}')::numeric >= ${value}`
|
|
659
|
+
case 'lt':
|
|
660
|
+
return `(metadata->>'${key}')::numeric < ${value}`
|
|
661
|
+
case 'lte':
|
|
662
|
+
return `(metadata->>'${key}')::numeric <= ${value}`
|
|
663
|
+
case 'in': {
|
|
664
|
+
const inValues = Array.isArray(value) ? value.map((v) => `'${v}'`).join(',') : `'${value}'`
|
|
665
|
+
return `metadata->>'${key}' IN (${inValues})`
|
|
666
|
+
}
|
|
667
|
+
case 'nin': {
|
|
668
|
+
const ninValues = Array.isArray(value) ? value.map((v) => `'${v}'`).join(',') : `'${value}'`
|
|
669
|
+
return `metadata->>'${key}' NOT IN (${ninValues})`
|
|
670
|
+
}
|
|
671
|
+
case 'contains':
|
|
672
|
+
return `metadata->>'${key}' LIKE '%${value}%'`
|
|
673
|
+
case 'startsWith':
|
|
674
|
+
return `metadata->>'${key}' LIKE '${value}%'`
|
|
675
|
+
default:
|
|
676
|
+
return 'TRUE'
|
|
677
|
+
}
|
|
678
|
+
}
|
|
679
|
+
}
|
|
680
|
+
|
|
681
|
+
/**
|
|
682
|
+
* Create a PgVector store instance
|
|
683
|
+
*/
|
|
684
|
+
export function createPgVectorStore(
|
|
685
|
+
connectionString: string,
|
|
686
|
+
dimensions: number,
|
|
687
|
+
options?: Partial<Omit<PgVectorStoreConfig, 'provider' | 'dimensions' | 'connectionString'>>
|
|
688
|
+
): PgVectorStore {
|
|
689
|
+
return new PgVectorStore({
|
|
690
|
+
provider: 'pgvector',
|
|
691
|
+
dimensions,
|
|
692
|
+
connectionString,
|
|
693
|
+
...options,
|
|
694
|
+
})
|
|
695
|
+
}
|