@strav/rag 0.4.31 → 1.0.0-alpha.20
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +21 -23
- package/src/chunking/chunker.ts +7 -2
- package/src/chunking/fixed_size_chunker.ts +24 -8
- package/src/chunking/recursive_chunker.ts +89 -28
- package/src/console/index.ts +3 -0
- package/src/console/rag_console_provider.ts +17 -0
- package/src/console/rag_flush.ts +51 -0
- package/src/console/rag_list.ts +48 -0
- package/src/drivers/memory_driver.ts +110 -85
- package/src/drivers/pgvector_driver.ts +203 -109
- package/src/index.ts +46 -36
- package/src/migrations.ts +116 -0
- package/src/rag_error.ts +76 -0
- package/src/rag_manager.ts +289 -66
- package/src/rag_provider.ts +85 -7
- package/src/rag_vector_schema.ts +56 -0
- package/src/retrievable.ts +236 -145
- package/src/types.ts +80 -22
- package/src/vector_store.ts +45 -5
- package/src/commands/rag_flush.ts +0 -41
- package/src/commands/rag_ingest.ts +0 -45
- package/src/drivers/null_driver.ts +0 -21
- package/src/errors.ts +0 -21
- package/src/helpers.ts +0 -186
- package/stubs/config/rag.ts +0 -33
- package/tsconfig.json +0 -5
|
@@ -1,157 +1,251 @@
|
|
|
1
|
-
|
|
1
|
+
/**
|
|
2
|
+
* `PgvectorDriver` — `VectorStore` backed by Postgres + the
|
|
3
|
+
* `pgvector` extension. Single table per app (`rag_vector` by
|
|
4
|
+
* default), `collection` is a column inside it.
|
|
5
|
+
*
|
|
6
|
+
* Multitenancy: every query relies on RLS scoping by
|
|
7
|
+
* `current_setting('app.tenant_id')`. Apps wrap calls in
|
|
8
|
+
* `tenants.withTenant(tenantId, async () => { ... })` — the
|
|
9
|
+
* driver itself has no tenant awareness.
|
|
10
|
+
*
|
|
11
|
+
* Why one table instead of one-per-collection:
|
|
12
|
+
*
|
|
13
|
+
* - `defineSchema` doesn't support runtime table creation.
|
|
14
|
+
* - HNSW indexes work fine with `collection` as a leading
|
|
15
|
+
* column; if a collection grows past tens of millions and
|
|
16
|
+
* wants its own partial HNSW, that's a one-line follow-up
|
|
17
|
+
* migration.
|
|
18
|
+
* - One RLS policy, one set of grants, fewer surprises.
|
|
19
|
+
*
|
|
20
|
+
* Why this driver doesn't extend `Repository`:
|
|
21
|
+
*
|
|
22
|
+
* - The framework repository hydrates rows into a `Model`, but
|
|
23
|
+
* `embedding vector(N)` isn't expressible in the framework's
|
|
24
|
+
* type system. The driver uses raw `db.query` / `db.execute`
|
|
25
|
+
* on the table and returns plain objects.
|
|
26
|
+
* - All vector ops (`<=>`, `vector_cosine_ops`) are
|
|
27
|
+
* pgvector-specific; the framework's query builder can't
|
|
28
|
+
* model them.
|
|
29
|
+
*/
|
|
30
|
+
|
|
31
|
+
import {
|
|
32
|
+
currentTransactionalContext,
|
|
33
|
+
type DatabaseExecutor,
|
|
34
|
+
type PostgresDatabase,
|
|
35
|
+
} from '@strav/database'
|
|
36
|
+
import { VectorQueryError } from '../rag_error.ts'
|
|
37
|
+
import { ragVectorSchema } from '../rag_vector_schema.ts'
|
|
38
|
+
import type {
|
|
39
|
+
QueryOptions,
|
|
40
|
+
QueryResult,
|
|
41
|
+
StoreConfig,
|
|
42
|
+
VectorDocument,
|
|
43
|
+
VectorMatch,
|
|
44
|
+
} from '../types.ts'
|
|
2
45
|
import type { VectorStore } from '../vector_store.ts'
|
|
3
|
-
|
|
4
|
-
|
|
46
|
+
|
|
47
|
+
export interface PgvectorDriverOptions {
|
|
48
|
+
/** PostgresDatabase instance — typically resolved from the container. */
|
|
49
|
+
db: PostgresDatabase
|
|
50
|
+
/** Override table name. Defaults to `rag_vector`. */
|
|
51
|
+
table?: string
|
|
52
|
+
}
|
|
5
53
|
|
|
6
54
|
export class PgvectorDriver implements VectorStore {
|
|
7
55
|
readonly name = 'pgvector'
|
|
8
|
-
private initialized = false
|
|
9
56
|
|
|
10
|
-
|
|
57
|
+
private readonly db: PostgresDatabase
|
|
58
|
+
private readonly table: string
|
|
11
59
|
|
|
12
|
-
|
|
13
|
-
|
|
60
|
+
constructor(options: PgvectorDriverOptions) {
|
|
61
|
+
this.db = options.db
|
|
62
|
+
this.table = options.table ?? ragVectorSchema.name
|
|
63
|
+
}
|
|
14
64
|
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
65
|
+
/**
|
|
66
|
+
* Factory used by `RagManager.createStore` — accepts the raw
|
|
67
|
+
* `StoreConfig` from `config.rag.stores[<name>]` and resolves
|
|
68
|
+
* the `db` from the container. Apps that want explicit control
|
|
69
|
+
* `new PgvectorDriver({ db, table })` directly.
|
|
70
|
+
*/
|
|
71
|
+
static fromConfig(db: PostgresDatabase, config: StoreConfig): PgvectorDriver {
|
|
72
|
+
return new PgvectorDriver({
|
|
73
|
+
db,
|
|
74
|
+
...(typeof config.table === 'string' ? { table: config.table } : {}),
|
|
75
|
+
})
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
/**
|
|
79
|
+
* Route reads + writes through the ambient `UnitOfWork`
|
|
80
|
+
* transaction when one is active (e.g., inside
|
|
81
|
+
* `tenants.withTenant(...)`); fall back to the raw pool
|
|
82
|
+
* otherwise. Mirrors how `Repository.executor(opts)` works in
|
|
83
|
+
* `@strav/database`, so RLS scoping + transactional event
|
|
84
|
+
* flushing apply uniformly across framework + driver code.
|
|
85
|
+
*/
|
|
86
|
+
private exec(): DatabaseExecutor {
|
|
87
|
+
const ambient = currentTransactionalContext()
|
|
88
|
+
if (ambient) return ambient.tx
|
|
89
|
+
return this.db as unknown as DatabaseExecutor
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
// ─── Collections ──────────────────────────────────────────────────────
|
|
93
|
+
|
|
94
|
+
async createCollection(_collection: string, _dimension: number): Promise<void> {
|
|
95
|
+
// No-op: every collection lives in the same table. The
|
|
96
|
+
// `applyRagVectorMigration` helper attached the
|
|
97
|
+
// `vector(<dimension>)` column at migration time, so the
|
|
98
|
+
// dimension is fixed per table and enforced at INSERT.
|
|
25
99
|
}
|
|
26
100
|
|
|
27
101
|
async deleteCollection(collection: string): Promise<void> {
|
|
28
|
-
await
|
|
29
|
-
`DELETE FROM
|
|
30
|
-
[collection]
|
|
102
|
+
await this.exec().execute(
|
|
103
|
+
`DELETE FROM "${this.table}" WHERE "collection" = $1`,
|
|
104
|
+
[collection],
|
|
31
105
|
)
|
|
32
106
|
}
|
|
33
107
|
|
|
34
|
-
|
|
35
|
-
const sql = Database.raw
|
|
108
|
+
// ─── Mutations ────────────────────────────────────────────────────────
|
|
36
109
|
|
|
110
|
+
async upsert(
|
|
111
|
+
collection: string,
|
|
112
|
+
documents: readonly VectorDocument[],
|
|
113
|
+
): Promise<void> {
|
|
114
|
+
if (documents.length === 0) return
|
|
115
|
+
// pgvector accepts the vector as a stringified array literal —
|
|
116
|
+
// `[0.12,0.34,...]` — cast with `::vector` at the boundary.
|
|
117
|
+
//
|
|
118
|
+
// Tenant scoping: the `tenant_id` column on `rag_vector` is
|
|
119
|
+
// NOT NULL with no default, so apps wrapping the call in
|
|
120
|
+
// `tenants.withTenant(...)` need a value supplied. We read
|
|
121
|
+
// `current_setting('app.tenant_id')` inside the SQL itself —
|
|
122
|
+
// the same session var the RLS policy reads — so the INSERT
|
|
123
|
+
// works under tenant scope without the driver knowing the PK
|
|
124
|
+
// type ahead of time. The `true` second arg makes the
|
|
125
|
+
// setting return NULL (not throw) outside `withTenant`; the
|
|
126
|
+
// INSERT then fails the NOT NULL constraint with a clear
|
|
127
|
+
// error message that nudges the app toward the right wrap.
|
|
37
128
|
for (const doc of documents) {
|
|
38
|
-
const
|
|
39
|
-
const
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
129
|
+
const id = doc.id ?? crypto.randomUUID()
|
|
130
|
+
const embeddingLiteral = `[${doc.embedding.join(',')}]`
|
|
131
|
+
await this.exec().execute(
|
|
132
|
+
`INSERT INTO "${this.table}"
|
|
133
|
+
("id", "tenant_id", "collection", "source_id", "content", "metadata", "embedding", "created_at")
|
|
134
|
+
VALUES ($1, current_setting('app.tenant_id', true), $2, $3, $4, $5::jsonb, $6::vector, NOW())
|
|
135
|
+
ON CONFLICT ("id") DO UPDATE SET
|
|
136
|
+
"collection" = EXCLUDED."collection",
|
|
137
|
+
"source_id" = EXCLUDED."source_id",
|
|
138
|
+
"content" = EXCLUDED."content",
|
|
139
|
+
"metadata" = EXCLUDED."metadata",
|
|
140
|
+
"embedding" = EXCLUDED."embedding"`,
|
|
141
|
+
[
|
|
142
|
+
id,
|
|
143
|
+
collection,
|
|
144
|
+
doc.sourceId ?? null,
|
|
145
|
+
doc.content,
|
|
146
|
+
JSON.stringify(doc.metadata ?? {}),
|
|
147
|
+
embeddingLiteral,
|
|
148
|
+
],
|
|
47
149
|
)
|
|
48
150
|
}
|
|
49
151
|
}
|
|
50
152
|
|
|
51
|
-
async delete(collection: string, ids:
|
|
153
|
+
async delete(collection: string, ids: readonly string[]): Promise<void> {
|
|
52
154
|
if (ids.length === 0) return
|
|
53
155
|
const placeholders = ids.map((_, i) => `$${i + 2}`).join(', ')
|
|
54
|
-
await
|
|
55
|
-
`DELETE FROM
|
|
56
|
-
[collection, ...ids]
|
|
156
|
+
await this.exec().execute(
|
|
157
|
+
`DELETE FROM "${this.table}" WHERE "collection" = $1 AND "id" IN (${placeholders})`,
|
|
158
|
+
[collection, ...ids],
|
|
57
159
|
)
|
|
58
160
|
}
|
|
59
161
|
|
|
60
|
-
async deleteBySource(collection: string, sourceId: string
|
|
61
|
-
await
|
|
62
|
-
`DELETE FROM
|
|
63
|
-
[collection,
|
|
162
|
+
async deleteBySource(collection: string, sourceId: string): Promise<void> {
|
|
163
|
+
await this.exec().execute(
|
|
164
|
+
`DELETE FROM "${this.table}" WHERE "collection" = $1 AND "source_id" = $2`,
|
|
165
|
+
[collection, sourceId],
|
|
64
166
|
)
|
|
65
167
|
}
|
|
66
168
|
|
|
67
169
|
async flush(collection: string): Promise<void> {
|
|
68
|
-
await
|
|
69
|
-
`DELETE FROM
|
|
70
|
-
[collection]
|
|
170
|
+
await this.exec().execute(
|
|
171
|
+
`DELETE FROM "${this.table}" WHERE "collection" = $1`,
|
|
172
|
+
[collection],
|
|
71
173
|
)
|
|
72
174
|
}
|
|
73
175
|
|
|
176
|
+
// ─── Query ────────────────────────────────────────────────────────────
|
|
177
|
+
|
|
74
178
|
async query(
|
|
75
179
|
collection: string,
|
|
76
|
-
vector: number[],
|
|
77
|
-
options
|
|
180
|
+
vector: readonly number[],
|
|
181
|
+
options: QueryOptions = {},
|
|
78
182
|
): Promise<QueryResult> {
|
|
79
183
|
const start = performance.now()
|
|
80
|
-
const topK = options
|
|
81
|
-
const threshold = options
|
|
82
|
-
const embeddingStr = `[${vector.join(',')}]`
|
|
184
|
+
const topK = options.topK ?? 5
|
|
185
|
+
const threshold = options.threshold
|
|
83
186
|
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
187
|
+
// pgvector's `<=>` is cosine distance in [0, 2]; `1 - (a <=> b)`
|
|
188
|
+
// is cosine similarity. We further map cos similarity in
|
|
189
|
+
// [-1, 1] → [0, 1] via `(s + 1) / 2` to match MemoryDriver so
|
|
190
|
+
// scores are comparable across drivers.
|
|
191
|
+
const params: unknown[] = [collection, `[${vector.join(',')}]`]
|
|
192
|
+
const where: string[] = [`"collection" = $1`]
|
|
87
193
|
|
|
88
|
-
if (options
|
|
194
|
+
if (options.filter) {
|
|
89
195
|
for (const [key, value] of Object.entries(options.filter)) {
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
paramIndex++
|
|
196
|
+
params.push(JSON.stringify(value))
|
|
197
|
+
where.push(`"metadata" @> jsonb_build_object('${escapeJsonbKey(key)}', $${params.length}::jsonb)`)
|
|
93
198
|
}
|
|
94
199
|
}
|
|
95
200
|
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
201
|
+
let sql = `
|
|
202
|
+
SELECT "id", "source_id", "content", "metadata",
|
|
203
|
+
((1 - ("embedding" <=> $2::vector)) + 1) / 2 AS score
|
|
204
|
+
FROM "${this.table}"
|
|
205
|
+
WHERE ${where.join(' AND ')}
|
|
206
|
+
`
|
|
207
|
+
if (threshold !== undefined) {
|
|
208
|
+
params.push(threshold)
|
|
209
|
+
sql += ` AND ((1 - ("embedding" <=> $2::vector)) + 1) / 2 >= $${params.length}`
|
|
100
210
|
}
|
|
101
|
-
|
|
211
|
+
params.push(topK)
|
|
212
|
+
sql += ` ORDER BY "embedding" <=> $2::vector LIMIT $${params.length}`
|
|
213
|
+
|
|
214
|
+
let rows: Array<{
|
|
215
|
+
id: string
|
|
216
|
+
source_id: string | null
|
|
217
|
+
content: string
|
|
218
|
+
metadata: Record<string, unknown> | string
|
|
219
|
+
score: number | string
|
|
220
|
+
}>
|
|
102
221
|
try {
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
ORDER BY embedding <=> $${paramIndex}::vector
|
|
109
|
-
LIMIT $${paramIndex + 1}`,
|
|
110
|
-
[...params, embeddingStr, topK]
|
|
111
|
-
)) as any[]
|
|
112
|
-
|
|
113
|
-
const matches: VectorMatch[] = rows.map((row: any) => ({
|
|
114
|
-
id: row.source_id ?? row.id,
|
|
115
|
-
content: row.content,
|
|
116
|
-
score: parseFloat(row.score),
|
|
117
|
-
metadata: typeof row.metadata === 'string' ? JSON.parse(row.metadata) : row.metadata ?? {},
|
|
118
|
-
}))
|
|
119
|
-
|
|
120
|
-
return {
|
|
121
|
-
matches,
|
|
122
|
-
processingTimeMs: performance.now() - start,
|
|
123
|
-
}
|
|
124
|
-
} catch (err) {
|
|
125
|
-
throw new VectorQueryError(collection, err instanceof Error ? err.message : String(err))
|
|
126
|
-
}
|
|
127
|
-
}
|
|
128
|
-
|
|
129
|
-
private async ensureTable(dimension: number): Promise<void> {
|
|
130
|
-
if (this.initialized) return
|
|
131
|
-
|
|
132
|
-
const sql = Database.raw
|
|
133
|
-
|
|
134
|
-
await sql.unsafe(`CREATE EXTENSION IF NOT EXISTS vector`)
|
|
135
|
-
|
|
136
|
-
await sql.unsafe(`
|
|
137
|
-
CREATE TABLE IF NOT EXISTS _strav_vectors (
|
|
138
|
-
id BIGSERIAL PRIMARY KEY,
|
|
139
|
-
collection VARCHAR(255) NOT NULL,
|
|
140
|
-
source_id VARCHAR(255),
|
|
141
|
-
content TEXT NOT NULL,
|
|
142
|
-
metadata JSONB DEFAULT '{}',
|
|
143
|
-
embedding vector(${dimension}),
|
|
144
|
-
created_at TIMESTAMPTZ DEFAULT NOW()
|
|
222
|
+
rows = await this.exec().query(sql, params)
|
|
223
|
+
} catch (cause) {
|
|
224
|
+
throw new VectorQueryError(
|
|
225
|
+
`pgvector query failed for collection "${collection}".`,
|
|
226
|
+
{ context: { collection, table: this.table }, cause },
|
|
145
227
|
)
|
|
146
|
-
|
|
228
|
+
}
|
|
147
229
|
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
230
|
+
const matches: VectorMatch[] = rows.map((r) => ({
|
|
231
|
+
id: r.id,
|
|
232
|
+
content: r.content,
|
|
233
|
+
score: typeof r.score === 'string' ? Number.parseFloat(r.score) : r.score,
|
|
234
|
+
metadata: typeof r.metadata === 'string' ? JSON.parse(r.metadata) : r.metadata,
|
|
235
|
+
sourceId: r.source_id,
|
|
236
|
+
}))
|
|
237
|
+
return { matches, processingTimeMs: performance.now() - start }
|
|
238
|
+
}
|
|
239
|
+
}
|
|
154
240
|
|
|
155
|
-
|
|
241
|
+
/**
|
|
242
|
+
* Escape a JSONB object key for embedding in an SQL string. Keys
|
|
243
|
+
* are app-supplied so we sanitize defensively — backslash-escape
|
|
244
|
+
* single quotes; refuse keys with NUL bytes.
|
|
245
|
+
*/
|
|
246
|
+
function escapeJsonbKey(key: string): string {
|
|
247
|
+
if (key.includes('\0')) {
|
|
248
|
+
throw new VectorQueryError(`pgvector filter key contains NUL byte: ${JSON.stringify(key)}`)
|
|
156
249
|
}
|
|
250
|
+
return key.replace(/'/g, "''")
|
|
157
251
|
}
|
package/src/index.ts
CHANGED
|
@@ -1,48 +1,58 @@
|
|
|
1
|
-
//
|
|
2
|
-
|
|
1
|
+
// Public API of `@strav/rag`.
|
|
2
|
+
//
|
|
3
|
+
// V1: vector store abstraction + memory & pgvector drivers +
|
|
4
|
+
// fixed-size & recursive chunkers + RagManager + RagProvider.
|
|
5
|
+
// Composes with `@strav/brain` for embeddings and `@strav/database`
|
|
6
|
+
// for pgvector persistence + multitenancy.
|
|
7
|
+
//
|
|
8
|
+
// Deferred to follow-up slices: `retrievable()` repository mixin,
|
|
9
|
+
// CLI commands (`rag:reindex`, `rag:flush`), re-ranking strategies.
|
|
3
10
|
|
|
4
|
-
// Provider
|
|
5
|
-
export { default as RagProvider } from './rag_provider.ts'
|
|
6
|
-
|
|
7
|
-
// Store interface
|
|
8
|
-
export type { VectorStore } from './vector_store.ts'
|
|
9
|
-
|
|
10
|
-
// Drivers
|
|
11
|
-
export { NullDriver } from './drivers/null_driver.ts'
|
|
12
|
-
export { MemoryDriver } from './drivers/memory_driver.ts'
|
|
13
|
-
export { PgvectorDriver } from './drivers/pgvector_driver.ts'
|
|
14
|
-
|
|
15
|
-
// Mixin
|
|
16
|
-
export { retrievable } from './retrievable.ts'
|
|
17
|
-
export type { RetrievableInstance, RetrievableModel } from './retrievable.ts'
|
|
18
|
-
|
|
19
|
-
// Helper
|
|
20
|
-
export { rag } from './helpers.ts'
|
|
21
|
-
|
|
22
|
-
// Chunking
|
|
23
11
|
export { createChunker } from './chunking/chunker.ts'
|
|
24
12
|
export { FixedSizeChunker } from './chunking/fixed_size_chunker.ts'
|
|
25
13
|
export { RecursiveChunker } from './chunking/recursive_chunker.ts'
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
14
|
+
export { MemoryDriver } from './drivers/memory_driver.ts'
|
|
15
|
+
export {
|
|
16
|
+
PgvectorDriver,
|
|
17
|
+
type PgvectorDriverOptions,
|
|
18
|
+
} from './drivers/pgvector_driver.ts'
|
|
19
|
+
export {
|
|
20
|
+
applyRagVectorMigration,
|
|
21
|
+
type ApplyRagVectorMigrationOptions,
|
|
22
|
+
} from './migrations.ts'
|
|
23
|
+
export {
|
|
24
|
+
CollectionNotFoundError,
|
|
25
|
+
EmbeddingError,
|
|
26
|
+
RagError,
|
|
27
|
+
VectorQueryError,
|
|
28
|
+
} from './rag_error.ts'
|
|
29
|
+
export {
|
|
30
|
+
type IngestOptions,
|
|
31
|
+
RagManager,
|
|
32
|
+
type RagManagerOptions,
|
|
33
|
+
type StoreFactory,
|
|
34
|
+
} from './rag_manager.ts'
|
|
35
|
+
export {
|
|
36
|
+
RagConsoleProvider,
|
|
37
|
+
RagFlush,
|
|
38
|
+
RagList,
|
|
39
|
+
} from './console/index.ts'
|
|
40
|
+
export { RagProvider } from './rag_provider.ts'
|
|
41
|
+
export { ragVectorSchema } from './rag_vector_schema.ts'
|
|
42
|
+
export { retrievable } from './retrievable.ts'
|
|
31
43
|
export type {
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
EmbeddingConfig,
|
|
44
|
+
Chunk,
|
|
45
|
+
Chunker,
|
|
35
46
|
ChunkingConfig,
|
|
36
|
-
|
|
47
|
+
EmbeddingConfig,
|
|
37
48
|
QueryOptions,
|
|
38
49
|
QueryResult,
|
|
39
|
-
|
|
50
|
+
RagConfig,
|
|
40
51
|
RetrieveOptions,
|
|
41
|
-
RerankOptions,
|
|
42
52
|
RetrieveResult,
|
|
43
53
|
RetrievedDocument,
|
|
44
|
-
|
|
45
|
-
|
|
54
|
+
StoreConfig,
|
|
55
|
+
VectorDocument,
|
|
56
|
+
VectorMatch,
|
|
46
57
|
} from './types.ts'
|
|
47
|
-
|
|
48
|
-
export type { IngestOptions } from './helpers.ts'
|
|
58
|
+
export type { VectorStore } from './vector_store.ts'
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Migration helpers — emit the DDL apps need to put `rag_vector`
|
|
3
|
+
* into a working state. The framework's `emitCreateTable` handles
|
|
4
|
+
* everything except the pgvector-specific bits (the `vector(N)`
|
|
5
|
+
* column type and the HNSW index). This module fills the gap.
|
|
6
|
+
*
|
|
7
|
+
* Apps drop one call into their migration:
|
|
8
|
+
*
|
|
9
|
+
* ```ts
|
|
10
|
+
* import { SchemaRegistry, emitDropTable, type Migration } from '@strav/database'
|
|
11
|
+
* import { applyRagVectorMigration, ragVectorSchema } from '@strav/rag'
|
|
12
|
+
*
|
|
13
|
+
* export const migration: Migration = {
|
|
14
|
+
* name: '20260601000000_create_rag_vector',
|
|
15
|
+
* async up(db) {
|
|
16
|
+
* await applyRagVectorMigration(db, {
|
|
17
|
+
* dimension: 1536, // match the embedding model
|
|
18
|
+
* registry,
|
|
19
|
+
* })
|
|
20
|
+
* },
|
|
21
|
+
* async down(db) {
|
|
22
|
+
* await db.execute(emitDropTable(ragVectorSchema.name).sql)
|
|
23
|
+
* },
|
|
24
|
+
* }
|
|
25
|
+
* ```
|
|
26
|
+
*
|
|
27
|
+
* The helper is idempotent against `IF NOT EXISTS` clauses where
|
|
28
|
+
* Postgres supports them, but apps should still rely on the
|
|
29
|
+
* migration runner's tracking table for re-run safety rather than
|
|
30
|
+
* the helper itself.
|
|
31
|
+
*/
|
|
32
|
+
|
|
33
|
+
import {
|
|
34
|
+
emitCreateTable,
|
|
35
|
+
type DatabaseExecutor,
|
|
36
|
+
type SchemaRegistry,
|
|
37
|
+
} from '@strav/database'
|
|
38
|
+
import { ragVectorSchema } from './rag_vector_schema.ts'
|
|
39
|
+
|
|
40
|
+
export interface ApplyRagVectorMigrationOptions {
|
|
41
|
+
/**
|
|
42
|
+
* Vector dimension. Must match the configured embedding model
|
|
43
|
+
* (OpenAI's `text-embedding-3-small` → 1536,
|
|
44
|
+
* `text-embedding-3-large` → 3072, Gemini's
|
|
45
|
+
* `text-embedding-004` → 768, etc.). Mismatched dimensions
|
|
46
|
+
* cause `vector` casts at INSERT to throw.
|
|
47
|
+
*/
|
|
48
|
+
dimension: number
|
|
49
|
+
/**
|
|
50
|
+
* Schema registry — required for `emitCreateTable` to resolve
|
|
51
|
+
* foreign-key references (the tenant registry, in this case).
|
|
52
|
+
*/
|
|
53
|
+
registry: SchemaRegistry
|
|
54
|
+
/**
|
|
55
|
+
* Optional override table name. Defaults to `rag_vector` (the
|
|
56
|
+
* `ragVectorSchema.name`). Apps that need multiple vector
|
|
57
|
+
* tables (e.g., one per dimension) override this here AND
|
|
58
|
+
* register their own schema variant under the override name.
|
|
59
|
+
*/
|
|
60
|
+
table?: string
|
|
61
|
+
/**
|
|
62
|
+
* HNSW construction parameter `m`. Default Postgres-level
|
|
63
|
+
* default (16). Higher = better recall, slower builds.
|
|
64
|
+
*/
|
|
65
|
+
hnswM?: number
|
|
66
|
+
/**
|
|
67
|
+
* HNSW construction parameter `ef_construction`. Default 64.
|
|
68
|
+
* Higher = better recall, slower builds.
|
|
69
|
+
*/
|
|
70
|
+
hnswEfConstruction?: number
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
export async function applyRagVectorMigration(
|
|
74
|
+
db: DatabaseExecutor,
|
|
75
|
+
options: ApplyRagVectorMigrationOptions,
|
|
76
|
+
): Promise<void> {
|
|
77
|
+
const table = options.table ?? ragVectorSchema.name
|
|
78
|
+
const { dimension, registry } = options
|
|
79
|
+
|
|
80
|
+
await db.execute(`CREATE EXTENSION IF NOT EXISTS vector`)
|
|
81
|
+
|
|
82
|
+
// Framework table + RLS + tenant_id column come from emitCreateTable.
|
|
83
|
+
await db.execute(emitCreateTable(ragVectorSchema, { registry }).sql)
|
|
84
|
+
|
|
85
|
+
// Vector column — pgvector-specific. NOT NULL because every
|
|
86
|
+
// ingested chunk has an embedding by construction.
|
|
87
|
+
await db.execute(
|
|
88
|
+
`ALTER TABLE "${table}" ADD COLUMN IF NOT EXISTS "embedding" vector(${dimension}) NOT NULL`,
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
// HNSW index on cosine ops — pgvector's default for similarity
|
|
92
|
+
// search. Partial index per collection isn't possible at
|
|
93
|
+
// CREATE INDEX time without a literal value; apps that have
|
|
94
|
+
// very large per-collection corpora add `WHERE collection = '...'`
|
|
95
|
+
// partial indexes in a separate migration.
|
|
96
|
+
const hnswOpts: string[] = []
|
|
97
|
+
if (options.hnswM !== undefined) hnswOpts.push(`m = ${options.hnswM}`)
|
|
98
|
+
if (options.hnswEfConstruction !== undefined) {
|
|
99
|
+
hnswOpts.push(`ef_construction = ${options.hnswEfConstruction}`)
|
|
100
|
+
}
|
|
101
|
+
const withClause = hnswOpts.length > 0 ? ` WITH (${hnswOpts.join(', ')})` : ''
|
|
102
|
+
await db.execute(
|
|
103
|
+
`CREATE INDEX IF NOT EXISTS "idx_${table}_embedding_hnsw"
|
|
104
|
+
ON "${table}" USING hnsw ("embedding" vector_cosine_ops)${withClause}`,
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
// Helpful secondary indexes for the standard access patterns.
|
|
108
|
+
await db.execute(
|
|
109
|
+
`CREATE INDEX IF NOT EXISTS "idx_${table}_collection"
|
|
110
|
+
ON "${table}" ("collection")`,
|
|
111
|
+
)
|
|
112
|
+
await db.execute(
|
|
113
|
+
`CREATE INDEX IF NOT EXISTS "idx_${table}_source_id"
|
|
114
|
+
ON "${table}" ("source_id") WHERE "source_id" IS NOT NULL`,
|
|
115
|
+
)
|
|
116
|
+
}
|
package/src/rag_error.ts
ADDED
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* `RagError` hierarchy — typed wrappers for failures in the RAG
|
|
3
|
+
* stack. Each subclass carries a specific error code so apps can
|
|
4
|
+
* branch on the failure mode at the call site instead of parsing
|
|
5
|
+
* error messages.
|
|
6
|
+
*
|
|
7
|
+
* Three concrete subclasses ship in V1:
|
|
8
|
+
*
|
|
9
|
+
* - `CollectionNotFoundError` — `rag.retrieve` against a
|
|
10
|
+
* collection that doesn't exist on the active store. Apps
|
|
11
|
+
* create the collection via `rag.createCollection(...)`
|
|
12
|
+
* before the first ingest.
|
|
13
|
+
*
|
|
14
|
+
* - `VectorQueryError` — the underlying store rejected the
|
|
15
|
+
* query (bad dimension, malformed filter, etc.). Cause
|
|
16
|
+
* carries the driver-native error.
|
|
17
|
+
*
|
|
18
|
+
* - `EmbeddingError` — the brain provider rejected the
|
|
19
|
+
* embedding call. Wraps the brain-side error so apps can
|
|
20
|
+
* `error.cause instanceof BrainError` for retry logic.
|
|
21
|
+
*/
|
|
22
|
+
|
|
23
|
+
import { StravError } from '@strav/kernel'
|
|
24
|
+
|
|
25
|
+
export class RagError extends StravError {
|
|
26
|
+
constructor(
|
|
27
|
+
message: string,
|
|
28
|
+
options: {
|
|
29
|
+
code?: string
|
|
30
|
+
status?: number
|
|
31
|
+
context?: Record<string, unknown>
|
|
32
|
+
cause?: unknown
|
|
33
|
+
} = {},
|
|
34
|
+
) {
|
|
35
|
+
super(
|
|
36
|
+
message,
|
|
37
|
+
{ code: options.code ?? 'rag.error', status: options.status ?? 500 },
|
|
38
|
+
{ ...(options.context ? { context: options.context } : {}), ...(options.cause !== undefined ? { cause: options.cause } : {}) },
|
|
39
|
+
)
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
export class CollectionNotFoundError extends RagError {
|
|
44
|
+
constructor(collection: string, store: string) {
|
|
45
|
+
super(
|
|
46
|
+
`RAG collection "${collection}" does not exist on store "${store}". Call \`rag.createCollection("${collection}", dim)\` before the first ingest.`,
|
|
47
|
+
{
|
|
48
|
+
code: 'rag.collection_not_found',
|
|
49
|
+
status: 404,
|
|
50
|
+
context: { collection, store },
|
|
51
|
+
},
|
|
52
|
+
)
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
export class VectorQueryError extends RagError {
|
|
57
|
+
constructor(message: string, options: { context?: Record<string, unknown>; cause?: unknown } = {}) {
|
|
58
|
+
super(message, {
|
|
59
|
+
code: 'rag.vector_query',
|
|
60
|
+
status: 500,
|
|
61
|
+
...(options.context ? { context: options.context } : {}),
|
|
62
|
+
...(options.cause !== undefined ? { cause: options.cause } : {}),
|
|
63
|
+
})
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
export class EmbeddingError extends RagError {
|
|
68
|
+
constructor(message: string, options: { context?: Record<string, unknown>; cause?: unknown } = {}) {
|
|
69
|
+
super(message, {
|
|
70
|
+
code: 'rag.embedding',
|
|
71
|
+
status: 500,
|
|
72
|
+
...(options.context ? { context: options.context } : {}),
|
|
73
|
+
...(options.cause !== undefined ? { cause: options.cause } : {}),
|
|
74
|
+
})
|
|
75
|
+
}
|
|
76
|
+
}
|