@strav/rag 1.0.0-alpha.29 → 1.0.0-alpha.31
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +5 -5
- package/src/console/index.ts +1 -0
- package/src/console/rag_console_provider.ts +2 -1
- package/src/console/rag_flush.ts +5 -7
- package/src/console/rag_reindex.ts +100 -0
- package/src/drivers/memory/memory_driver.ts +2 -10
- package/src/drivers/pgvector/pgvector_driver.ts +11 -18
- package/src/index.ts +31 -16
- package/src/rag_error.ts +12 -3
- package/src/rag_manager.ts +28 -27
- package/src/rag_provider.ts +5 -8
- package/src/rerankers/index.ts +3 -0
- package/src/rerankers/keyword_reranker.ts +69 -0
- package/src/rerankers/mmr_reranker.ts +107 -0
- package/src/rerankers/reranker.ts +35 -0
- package/src/retrievable.ts +4 -14
- package/src/retrievable_registry.ts +60 -0
- package/src/types.ts +19 -0
- package/src/vector_store.ts +1 -5
- package/src/vectors/apply_rag_vector_migration.ts +1 -5
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@strav/rag",
|
|
3
|
-
"version": "1.0.0-alpha.
|
|
3
|
+
"version": "1.0.0-alpha.31",
|
|
4
4
|
"description": "Strav RAG module — vector store abstraction, pgvector + in-memory drivers, chunking strategies. Composes with @strav/brain for embeddings and @strav/database for persistence.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./src/index.ts",
|
|
@@ -19,10 +19,10 @@
|
|
|
19
19
|
"access": "public"
|
|
20
20
|
},
|
|
21
21
|
"dependencies": {
|
|
22
|
-
"@strav/brain": "1.0.0-alpha.
|
|
23
|
-
"@strav/cli": "1.0.0-alpha.
|
|
24
|
-
"@strav/database": "1.0.0-alpha.
|
|
25
|
-
"@strav/kernel": "1.0.0-alpha.
|
|
22
|
+
"@strav/brain": "1.0.0-alpha.31",
|
|
23
|
+
"@strav/cli": "1.0.0-alpha.31",
|
|
24
|
+
"@strav/database": "1.0.0-alpha.31",
|
|
25
|
+
"@strav/kernel": "1.0.0-alpha.31"
|
|
26
26
|
},
|
|
27
27
|
"peerDependencies": {
|
|
28
28
|
"@types/bun": ">=1.3.14"
|
package/src/console/index.ts
CHANGED
|
@@ -10,8 +10,9 @@
|
|
|
10
10
|
import { ConsoleProvider } from '@strav/cli'
|
|
11
11
|
import { RagFlush } from './rag_flush.ts'
|
|
12
12
|
import { RagList } from './rag_list.ts'
|
|
13
|
+
import { RagReindex } from './rag_reindex.ts'
|
|
13
14
|
|
|
14
15
|
export class RagConsoleProvider extends ConsoleProvider {
|
|
15
16
|
override readonly name = 'console.rag'
|
|
16
|
-
override readonly commands = [RagFlush, RagList] as const
|
|
17
|
+
override readonly commands = [RagFlush, RagList, RagReindex] as const
|
|
17
18
|
}
|
package/src/console/rag_flush.ts
CHANGED
|
@@ -19,14 +19,14 @@ import { RagManager } from '../rag_manager.ts'
|
|
|
19
19
|
|
|
20
20
|
export class RagFlush extends Command {
|
|
21
21
|
static signature = 'rag:flush {collection} {--store=} {--force}'
|
|
22
|
-
static description =
|
|
22
|
+
static description =
|
|
23
|
+
'Delete every vector in a collection (on the active or --store= named store).'
|
|
23
24
|
static providers = ['config', 'logger', 'brain', 'rag']
|
|
24
25
|
|
|
25
26
|
override async execute({ args, flags }: ExecuteArgs): Promise<number> {
|
|
26
27
|
const collection = args.collection as string
|
|
27
|
-
const storeName =
|
|
28
|
-
? flags.store
|
|
29
|
-
: undefined
|
|
28
|
+
const storeName =
|
|
29
|
+
typeof flags.store === 'string' && flags.store.length > 0 ? flags.store : undefined
|
|
30
30
|
|
|
31
31
|
const manager = this.app.resolve(RagManager)
|
|
32
32
|
const fullCollection = manager.collectionName(collection)
|
|
@@ -43,9 +43,7 @@ export class RagFlush extends Command {
|
|
|
43
43
|
}
|
|
44
44
|
|
|
45
45
|
await manager.store(storeName).flush(fullCollection)
|
|
46
|
-
this.success(
|
|
47
|
-
`Flushed collection "${fullCollection}" on store "${storeLabel}".`,
|
|
48
|
-
)
|
|
46
|
+
this.success(`Flushed collection "${fullCollection}" on store "${storeLabel}".`)
|
|
49
47
|
return ExitCode.Success
|
|
50
48
|
}
|
|
51
49
|
}
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* `bun strav rag:reindex {name?} [--all] [--batch=100]` —
|
|
3
|
+
* walk a registered repository and re-vectorize every row.
|
|
4
|
+
*
|
|
5
|
+
* Apps register repos at boot:
|
|
6
|
+
*
|
|
7
|
+
* const registry = app.resolve(RetrievableRegistry)
|
|
8
|
+
* registry.register('articles', ArticleRepository)
|
|
9
|
+
*
|
|
10
|
+
* Then:
|
|
11
|
+
*
|
|
12
|
+
* bun strav rag:reindex articles # one repo
|
|
13
|
+
* bun strav rag:reindex --all # every registered repo
|
|
14
|
+
*
|
|
15
|
+
* The repo class must implement `reindexAll(batchSize?)` — the
|
|
16
|
+
* `retrievable()` mixin already does. Batch size defaults to 100;
|
|
17
|
+
* apps hitting embedding rate limits drop it lower.
|
|
18
|
+
*
|
|
19
|
+
* Long-running on large corpora — apps that need cron-driven or
|
|
20
|
+
* queued re-index typically ship a custom command pointing at the
|
|
21
|
+
* same `reindexAll` method.
|
|
22
|
+
*/
|
|
23
|
+
|
|
24
|
+
import { Command, type ExecuteArgs, ExitCode } from '@strav/cli'
|
|
25
|
+
import { RagError } from '../rag_error.ts'
|
|
26
|
+
import { RetrievableRegistry } from '../retrievable_registry.ts'
|
|
27
|
+
|
|
28
|
+
export class RagReindex extends Command {
|
|
29
|
+
static signature = 'rag:reindex {name?} {--all} {--batch=100}'
|
|
30
|
+
static description =
|
|
31
|
+
'Re-vectorize one registered retrievable repository (or every one with --all).'
|
|
32
|
+
static providers = ['config', 'logger', 'brain', 'rag', 'database']
|
|
33
|
+
|
|
34
|
+
override async execute({ args, flags }: ExecuteArgs): Promise<number> {
|
|
35
|
+
const registry = this.app.resolve(RetrievableRegistry)
|
|
36
|
+
const batchSize = parseBatch(flags.batch)
|
|
37
|
+
|
|
38
|
+
if (flags.all === true) {
|
|
39
|
+
const names = registry.names()
|
|
40
|
+
if (names.length === 0) {
|
|
41
|
+
this.warn(
|
|
42
|
+
'No retrievables registered. Call `registry.register(name, Repo)` from a service provider first.',
|
|
43
|
+
)
|
|
44
|
+
return ExitCode.Success
|
|
45
|
+
}
|
|
46
|
+
let total = 0
|
|
47
|
+
for (const name of names) {
|
|
48
|
+
const processed = await this.reindexOne(registry, name, batchSize)
|
|
49
|
+
total += processed
|
|
50
|
+
}
|
|
51
|
+
this.success(
|
|
52
|
+
`Re-indexed ${total} rows across ${names.length} repositor${names.length === 1 ? 'y' : 'ies'}.`,
|
|
53
|
+
)
|
|
54
|
+
return ExitCode.Success
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
const name = args.name
|
|
58
|
+
if (typeof name !== 'string' || name.length === 0) {
|
|
59
|
+
this.error(
|
|
60
|
+
'rag:reindex requires a repository name, or --all to re-index every registered repository.',
|
|
61
|
+
)
|
|
62
|
+
this.info(`Registered: ${registry.names().join(', ') || '(none)'}`)
|
|
63
|
+
return ExitCode.UsageError
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
try {
|
|
67
|
+
const processed = await this.reindexOne(registry, name, batchSize)
|
|
68
|
+
this.success(`Re-indexed ${processed} rows in "${name}".`)
|
|
69
|
+
return ExitCode.Success
|
|
70
|
+
} catch (err) {
|
|
71
|
+
if (err instanceof RagError) {
|
|
72
|
+
this.error(err.message)
|
|
73
|
+
this.info(`Registered: ${registry.names().join(', ') || '(none)'}`)
|
|
74
|
+
return ExitCode.GenericFailure
|
|
75
|
+
}
|
|
76
|
+
throw err
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
private async reindexOne(
|
|
81
|
+
registry: RetrievableRegistry,
|
|
82
|
+
name: string,
|
|
83
|
+
batchSize: number,
|
|
84
|
+
): Promise<number> {
|
|
85
|
+
this.info(`Re-indexing "${name}"…`)
|
|
86
|
+
const repo = this.app.resolve(registry.resolve(name))
|
|
87
|
+
const processed = await repo.reindexAll(batchSize)
|
|
88
|
+
this.info(` ${processed} rows.`)
|
|
89
|
+
return processed
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
function parseBatch(raw: unknown): number {
|
|
94
|
+
if (typeof raw === 'number' && raw > 0) return Math.floor(raw)
|
|
95
|
+
if (typeof raw === 'string') {
|
|
96
|
+
const n = Number.parseInt(raw, 10)
|
|
97
|
+
if (Number.isFinite(n) && n > 0) return n
|
|
98
|
+
}
|
|
99
|
+
return 100
|
|
100
|
+
}
|
|
@@ -21,12 +21,7 @@
|
|
|
21
21
|
*/
|
|
22
22
|
|
|
23
23
|
import { CollectionNotFoundError } from '../../rag_error.ts'
|
|
24
|
-
import type {
|
|
25
|
-
QueryOptions,
|
|
26
|
-
QueryResult,
|
|
27
|
-
VectorDocument,
|
|
28
|
-
VectorMatch,
|
|
29
|
-
} from '../../types.ts'
|
|
24
|
+
import type { QueryOptions, QueryResult, VectorDocument, VectorMatch } from '../../types.ts'
|
|
30
25
|
import type { VectorStore } from '../../vector_store.ts'
|
|
31
26
|
|
|
32
27
|
interface StoredDoc {
|
|
@@ -55,10 +50,7 @@ export class MemoryDriver implements VectorStore {
|
|
|
55
50
|
this.dimensions.delete(collection)
|
|
56
51
|
}
|
|
57
52
|
|
|
58
|
-
async upsert(
|
|
59
|
-
collection: string,
|
|
60
|
-
documents: readonly VectorDocument[],
|
|
61
|
-
): Promise<void> {
|
|
53
|
+
async upsert(collection: string, documents: readonly VectorDocument[]): Promise<void> {
|
|
62
54
|
const bucket = this.requireBucket(collection)
|
|
63
55
|
for (const doc of documents) {
|
|
64
56
|
const id = doc.id ?? crypto.randomUUID()
|
|
@@ -34,7 +34,6 @@ import {
|
|
|
34
34
|
type PostgresDatabase,
|
|
35
35
|
} from '@strav/database'
|
|
36
36
|
import { VectorQueryError } from '../../rag_error.ts'
|
|
37
|
-
import { ragVectorSchema } from '../../vectors/rag_vector_schema.ts'
|
|
38
37
|
import type {
|
|
39
38
|
QueryOptions,
|
|
40
39
|
QueryResult,
|
|
@@ -43,6 +42,7 @@ import type {
|
|
|
43
42
|
VectorMatch,
|
|
44
43
|
} from '../../types.ts'
|
|
45
44
|
import type { VectorStore } from '../../vector_store.ts'
|
|
45
|
+
import { ragVectorSchema } from '../../vectors/rag_vector_schema.ts'
|
|
46
46
|
|
|
47
47
|
export interface PgvectorDriverOptions {
|
|
48
48
|
/** PostgresDatabase instance — typically resolved from the container. */
|
|
@@ -99,18 +99,12 @@ export class PgvectorDriver implements VectorStore {
|
|
|
99
99
|
}
|
|
100
100
|
|
|
101
101
|
async deleteCollection(collection: string): Promise<void> {
|
|
102
|
-
await this.exec().execute(
|
|
103
|
-
`DELETE FROM "${this.table}" WHERE "collection" = $1`,
|
|
104
|
-
[collection],
|
|
105
|
-
)
|
|
102
|
+
await this.exec().execute(`DELETE FROM "${this.table}" WHERE "collection" = $1`, [collection])
|
|
106
103
|
}
|
|
107
104
|
|
|
108
105
|
// ─── Mutations ────────────────────────────────────────────────────────
|
|
109
106
|
|
|
110
|
-
async upsert(
|
|
111
|
-
collection: string,
|
|
112
|
-
documents: readonly VectorDocument[],
|
|
113
|
-
): Promise<void> {
|
|
107
|
+
async upsert(collection: string, documents: readonly VectorDocument[]): Promise<void> {
|
|
114
108
|
if (documents.length === 0) return
|
|
115
109
|
// pgvector accepts the vector as a stringified array literal —
|
|
116
110
|
// `[0.12,0.34,...]` — cast with `::vector` at the boundary.
|
|
@@ -167,10 +161,7 @@ export class PgvectorDriver implements VectorStore {
|
|
|
167
161
|
}
|
|
168
162
|
|
|
169
163
|
async flush(collection: string): Promise<void> {
|
|
170
|
-
await this.exec().execute(
|
|
171
|
-
`DELETE FROM "${this.table}" WHERE "collection" = $1`,
|
|
172
|
-
[collection],
|
|
173
|
-
)
|
|
164
|
+
await this.exec().execute(`DELETE FROM "${this.table}" WHERE "collection" = $1`, [collection])
|
|
174
165
|
}
|
|
175
166
|
|
|
176
167
|
// ─── Query ────────────────────────────────────────────────────────────
|
|
@@ -194,7 +185,9 @@ export class PgvectorDriver implements VectorStore {
|
|
|
194
185
|
if (options.filter) {
|
|
195
186
|
for (const [key, value] of Object.entries(options.filter)) {
|
|
196
187
|
params.push(JSON.stringify(value))
|
|
197
|
-
where.push(
|
|
188
|
+
where.push(
|
|
189
|
+
`"metadata" @> jsonb_build_object('${escapeJsonbKey(key)}', $${params.length}::jsonb)`,
|
|
190
|
+
)
|
|
198
191
|
}
|
|
199
192
|
}
|
|
200
193
|
|
|
@@ -221,10 +214,10 @@ export class PgvectorDriver implements VectorStore {
|
|
|
221
214
|
try {
|
|
222
215
|
rows = await this.exec().query(sql, params)
|
|
223
216
|
} catch (cause) {
|
|
224
|
-
throw new VectorQueryError(
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
)
|
|
217
|
+
throw new VectorQueryError(`pgvector query failed for collection "${collection}".`, {
|
|
218
|
+
context: { collection, table: this.table },
|
|
219
|
+
cause,
|
|
220
|
+
})
|
|
228
221
|
}
|
|
229
222
|
|
|
230
223
|
const matches: VectorMatch[] = rows.map((r) => ({
|
package/src/index.ts
CHANGED
|
@@ -1,25 +1,30 @@
|
|
|
1
1
|
// Public API of `@strav/rag`.
|
|
2
2
|
//
|
|
3
|
-
//
|
|
4
|
-
//
|
|
3
|
+
// Shipped:
|
|
4
|
+
// - Vector store abstraction + Memory & Pgvector drivers.
|
|
5
|
+
// - Fixed-size + recursive chunkers.
|
|
6
|
+
// - `RagManager` + `RagProvider` service wiring.
|
|
7
|
+
// - `retrievable()` repository mixin + `RetrievableRegistry`.
|
|
8
|
+
// - CLI: `rag:list`, `rag:flush`, `rag:reindex {name|--all}`.
|
|
9
|
+
// - Re-ranking — `Reranker` interface + `KeywordReranker` +
|
|
10
|
+
// `MMRReranker` + `RetrieveOptions.rerank` / `rerankPool`.
|
|
5
11
|
// Composes with `@strav/brain` for embeddings and `@strav/database`
|
|
6
12
|
// for pgvector persistence + multitenancy.
|
|
7
|
-
//
|
|
8
|
-
// Deferred to follow-up slices: `retrievable()` repository mixin,
|
|
9
|
-
// CLI commands (`rag:reindex`, `rag:flush`), re-ranking strategies.
|
|
10
13
|
|
|
11
14
|
export { createChunker } from './chunking/chunker.ts'
|
|
12
15
|
export { FixedSizeChunker } from './chunking/fixed_size_chunker.ts'
|
|
13
16
|
export { RecursiveChunker } from './chunking/recursive_chunker.ts'
|
|
17
|
+
export {
|
|
18
|
+
RagConsoleProvider,
|
|
19
|
+
RagFlush,
|
|
20
|
+
RagList,
|
|
21
|
+
RagReindex,
|
|
22
|
+
} from './console/index.ts'
|
|
14
23
|
export { MemoryDriver } from './drivers/memory/memory_driver.ts'
|
|
15
24
|
export {
|
|
16
25
|
PgvectorDriver,
|
|
17
26
|
type PgvectorDriverOptions,
|
|
18
27
|
} from './drivers/pgvector/pgvector_driver.ts'
|
|
19
|
-
export {
|
|
20
|
-
applyRagVectorMigration,
|
|
21
|
-
type ApplyRagVectorMigrationOptions,
|
|
22
|
-
} from './vectors/apply_rag_vector_migration.ts'
|
|
23
28
|
export {
|
|
24
29
|
CollectionNotFoundError,
|
|
25
30
|
EmbeddingError,
|
|
@@ -32,14 +37,19 @@ export {
|
|
|
32
37
|
type RagManagerOptions,
|
|
33
38
|
type StoreFactory,
|
|
34
39
|
} from './rag_manager.ts'
|
|
35
|
-
export {
|
|
36
|
-
RagConsoleProvider,
|
|
37
|
-
RagFlush,
|
|
38
|
-
RagList,
|
|
39
|
-
} from './console/index.ts'
|
|
40
40
|
export { RagProvider } from './rag_provider.ts'
|
|
41
|
-
export {
|
|
41
|
+
export {
|
|
42
|
+
KeywordReranker,
|
|
43
|
+
type KeywordRerankerOptions,
|
|
44
|
+
MMRReranker,
|
|
45
|
+
type MMRRerankerOptions,
|
|
46
|
+
type Reranker,
|
|
47
|
+
} from './rerankers/index.ts'
|
|
42
48
|
export { retrievable } from './retrievable.ts'
|
|
49
|
+
export {
|
|
50
|
+
RetrievableRegistry,
|
|
51
|
+
type RetrievableTarget,
|
|
52
|
+
} from './retrievable_registry.ts'
|
|
43
53
|
export type {
|
|
44
54
|
Chunk,
|
|
45
55
|
Chunker,
|
|
@@ -48,11 +58,16 @@ export type {
|
|
|
48
58
|
QueryOptions,
|
|
49
59
|
QueryResult,
|
|
50
60
|
RagConfig,
|
|
61
|
+
RetrievedDocument,
|
|
51
62
|
RetrieveOptions,
|
|
52
63
|
RetrieveResult,
|
|
53
|
-
RetrievedDocument,
|
|
54
64
|
StoreConfig,
|
|
55
65
|
VectorDocument,
|
|
56
66
|
VectorMatch,
|
|
57
67
|
} from './types.ts'
|
|
58
68
|
export type { VectorStore } from './vector_store.ts'
|
|
69
|
+
export {
|
|
70
|
+
type ApplyRagVectorMigrationOptions,
|
|
71
|
+
applyRagVectorMigration,
|
|
72
|
+
} from './vectors/apply_rag_vector_migration.ts'
|
|
73
|
+
export { ragVectorSchema } from './vectors/rag_vector_schema.ts'
|
package/src/rag_error.ts
CHANGED
|
@@ -35,7 +35,10 @@ export class RagError extends StravError {
|
|
|
35
35
|
super(
|
|
36
36
|
message,
|
|
37
37
|
{ code: options.code ?? 'rag.error', status: options.status ?? 500 },
|
|
38
|
-
{
|
|
38
|
+
{
|
|
39
|
+
...(options.context ? { context: options.context } : {}),
|
|
40
|
+
...(options.cause !== undefined ? { cause: options.cause } : {}),
|
|
41
|
+
},
|
|
39
42
|
)
|
|
40
43
|
}
|
|
41
44
|
}
|
|
@@ -54,7 +57,10 @@ export class CollectionNotFoundError extends RagError {
|
|
|
54
57
|
}
|
|
55
58
|
|
|
56
59
|
export class VectorQueryError extends RagError {
|
|
57
|
-
constructor(
|
|
60
|
+
constructor(
|
|
61
|
+
message: string,
|
|
62
|
+
options: { context?: Record<string, unknown>; cause?: unknown } = {},
|
|
63
|
+
) {
|
|
58
64
|
super(message, {
|
|
59
65
|
code: 'rag.vector_query',
|
|
60
66
|
status: 500,
|
|
@@ -65,7 +71,10 @@ export class VectorQueryError extends RagError {
|
|
|
65
71
|
}
|
|
66
72
|
|
|
67
73
|
export class EmbeddingError extends RagError {
|
|
68
|
-
constructor(
|
|
74
|
+
constructor(
|
|
75
|
+
message: string,
|
|
76
|
+
options: { context?: Record<string, unknown>; cause?: unknown } = {},
|
|
77
|
+
) {
|
|
69
78
|
super(message, {
|
|
70
79
|
code: 'rag.embedding',
|
|
71
80
|
status: 500,
|
package/src/rag_manager.ts
CHANGED
|
@@ -23,10 +23,10 @@
|
|
|
23
23
|
* `tenants.withTenant(...)` get per-tenant isolation for free.
|
|
24
24
|
*/
|
|
25
25
|
|
|
26
|
-
// biome-ignore lint/style/useImportType: PostgresDatabase value import for the container path that wires PgvectorDriver.
|
|
27
|
-
import { PostgresDatabase } from '@strav/database'
|
|
28
26
|
// biome-ignore lint/style/useImportType: BrainManager value import for @inject() param-type metadata.
|
|
29
27
|
import { BrainManager } from '@strav/brain'
|
|
28
|
+
// biome-ignore lint/style/useImportType: PostgresDatabase value import for the container path that wires PgvectorDriver.
|
|
29
|
+
import { PostgresDatabase } from '@strav/database'
|
|
30
30
|
// biome-ignore lint/style/useImportType: Application value import for the container handle.
|
|
31
31
|
import { Application, inject, ulid } from '@strav/kernel'
|
|
32
32
|
import { createChunker } from './chunking/chunker.ts'
|
|
@@ -34,13 +34,13 @@ import { MemoryDriver } from './drivers/memory/memory_driver.ts'
|
|
|
34
34
|
import { PgvectorDriver } from './drivers/pgvector/pgvector_driver.ts'
|
|
35
35
|
import { EmbeddingError, RagError } from './rag_error.ts'
|
|
36
36
|
import type {
|
|
37
|
-
ChunkingConfig,
|
|
38
37
|
Chunk,
|
|
39
38
|
Chunker,
|
|
39
|
+
ChunkingConfig,
|
|
40
40
|
RagConfig,
|
|
41
|
+
RetrievedDocument,
|
|
41
42
|
RetrieveOptions,
|
|
42
43
|
RetrieveResult,
|
|
43
|
-
RetrievedDocument,
|
|
44
44
|
StoreConfig,
|
|
45
45
|
VectorDocument,
|
|
46
46
|
} from './types.ts'
|
|
@@ -168,7 +168,7 @@ export class RagManager {
|
|
|
168
168
|
strategy: options.chunking?.strategy ?? this.config.chunking.strategy,
|
|
169
169
|
chunkSize: options.chunking?.chunkSize ?? this.config.chunking.chunkSize,
|
|
170
170
|
overlap: options.chunking?.overlap ?? this.config.chunking.overlap,
|
|
171
|
-
...(options.chunking?.separators ?? this.config.chunking.separators
|
|
171
|
+
...((options.chunking?.separators ?? this.config.chunking.separators)
|
|
172
172
|
? { separators: options.chunking?.separators ?? this.config.chunking.separators }
|
|
173
173
|
: {}),
|
|
174
174
|
}
|
|
@@ -198,10 +198,10 @@ export class RagManager {
|
|
|
198
198
|
})
|
|
199
199
|
embeddings = result.embeddings as number[][]
|
|
200
200
|
} catch (cause) {
|
|
201
|
-
throw new EmbeddingError(
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
)
|
|
201
|
+
throw new EmbeddingError(`RagManager.ingest: embedding ${texts.length} chunks failed.`, {
|
|
202
|
+
context: { collection: fullCollection },
|
|
203
|
+
cause,
|
|
204
|
+
})
|
|
205
205
|
}
|
|
206
206
|
|
|
207
207
|
const documents: VectorDocument[] = chunks.map((chunk, i) => ({
|
|
@@ -223,13 +223,8 @@ export class RagManager {
|
|
|
223
223
|
|
|
224
224
|
// ─── Retrieve ─────────────────────────────────────────────────────────
|
|
225
225
|
|
|
226
|
-
async retrieve(
|
|
227
|
-
|
|
228
|
-
options: RetrieveOptions = {},
|
|
229
|
-
): Promise<RetrieveResult> {
|
|
230
|
-
const fullCollection = this.collectionName(
|
|
231
|
-
options.collection ?? this.config.default,
|
|
232
|
-
)
|
|
226
|
+
async retrieve(query: string, options: RetrieveOptions = {}): Promise<RetrieveResult> {
|
|
227
|
+
const fullCollection = this.collectionName(options.collection ?? this.config.default)
|
|
233
228
|
const start = performance.now()
|
|
234
229
|
|
|
235
230
|
let embedding: number[]
|
|
@@ -240,24 +235,23 @@ export class RagManager {
|
|
|
240
235
|
})
|
|
241
236
|
embedding = result.embeddings[0] as number[]
|
|
242
237
|
} catch (cause) {
|
|
243
|
-
throw new EmbeddingError(
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
)
|
|
238
|
+
throw new EmbeddingError(`RagManager.retrieve: embedding query failed.`, {
|
|
239
|
+
context: { collection: fullCollection },
|
|
240
|
+
cause,
|
|
241
|
+
})
|
|
247
242
|
}
|
|
248
243
|
|
|
244
|
+
const finalTopK = options.topK
|
|
245
|
+
const fetchK = options.rerank !== undefined ? (options.rerankPool ?? finalTopK) : finalTopK
|
|
246
|
+
|
|
249
247
|
const queryOpts: { topK?: number; threshold?: number; filter?: Record<string, unknown> } = {}
|
|
250
|
-
if (
|
|
248
|
+
if (fetchK !== undefined) queryOpts.topK = fetchK
|
|
251
249
|
if (options.threshold !== undefined) queryOpts.threshold = options.threshold
|
|
252
250
|
if (options.filter !== undefined) queryOpts.filter = options.filter
|
|
253
251
|
|
|
254
|
-
const result = await this.store(options.store).query(
|
|
255
|
-
fullCollection,
|
|
256
|
-
embedding,
|
|
257
|
-
queryOpts,
|
|
258
|
-
)
|
|
252
|
+
const result = await this.store(options.store).query(fullCollection, embedding, queryOpts)
|
|
259
253
|
|
|
260
|
-
|
|
254
|
+
let matches: RetrievedDocument[] = result.matches.map((m) => ({
|
|
261
255
|
id: m.id,
|
|
262
256
|
content: m.content,
|
|
263
257
|
score: m.score,
|
|
@@ -266,6 +260,13 @@ export class RagManager {
|
|
|
266
260
|
...(m.sourceId !== undefined ? { sourceId: m.sourceId } : {}),
|
|
267
261
|
}))
|
|
268
262
|
|
|
263
|
+
if (options.rerank !== undefined && matches.length > 0) {
|
|
264
|
+
matches = [...(await options.rerank.rerank(query, matches))]
|
|
265
|
+
if (finalTopK !== undefined && matches.length > finalTopK) {
|
|
266
|
+
matches = matches.slice(0, finalTopK)
|
|
267
|
+
}
|
|
268
|
+
}
|
|
269
|
+
|
|
269
270
|
return {
|
|
270
271
|
query,
|
|
271
272
|
matches,
|
package/src/rag_provider.ts
CHANGED
|
@@ -16,17 +16,13 @@
|
|
|
16
16
|
* via a real `config/rag.ts`.
|
|
17
17
|
*/
|
|
18
18
|
|
|
19
|
-
// biome-ignore lint/style/useImportType: PostgresDatabase value import — required when any pgvector store is configured. Loaded conditionally below.
|
|
20
|
-
import { PostgresDatabase } from '@strav/database'
|
|
21
19
|
// biome-ignore lint/style/useImportType: BrainManager value import for c.resolve.
|
|
22
20
|
import { BrainManager } from '@strav/brain'
|
|
23
|
-
import
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
ConfigRepository,
|
|
27
|
-
ServiceProvider,
|
|
28
|
-
} from '@strav/kernel'
|
|
21
|
+
// biome-ignore lint/style/useImportType: PostgresDatabase value import — required when any pgvector store is configured. Loaded conditionally below.
|
|
22
|
+
import { PostgresDatabase } from '@strav/database'
|
|
23
|
+
import { type Application, ConfigError, ConfigRepository, ServiceProvider } from '@strav/kernel'
|
|
29
24
|
import { RagManager, type RagManagerOptions } from './rag_manager.ts'
|
|
25
|
+
import { RetrievableRegistry } from './retrievable_registry.ts'
|
|
30
26
|
import type { RagConfig } from './types.ts'
|
|
31
27
|
|
|
32
28
|
export class RagProvider extends ServiceProvider {
|
|
@@ -34,6 +30,7 @@ export class RagProvider extends ServiceProvider {
|
|
|
34
30
|
override readonly dependencies = ['config', 'brain']
|
|
35
31
|
|
|
36
32
|
override register(app: Application): void {
|
|
33
|
+
app.singleton(RetrievableRegistry, () => new RetrievableRegistry())
|
|
37
34
|
app.singleton(RagManager, (c) => {
|
|
38
35
|
const raw = c.resolve(ConfigRepository).get('rag') as Partial<RagConfig> | undefined
|
|
39
36
|
const config = applyDefaults(raw)
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* `KeywordReranker` — boost matches whose content literally contains
|
|
3
|
+
* tokens from the query.
|
|
4
|
+
*
|
|
5
|
+
* Useful when the embedder is too smooth for short, jargon-heavy
|
|
6
|
+
* queries (product SKUs, error codes, acronyms). Combines the raw
|
|
7
|
+
* vector similarity with a token-overlap score:
|
|
8
|
+
*
|
|
9
|
+
* score = (1 - weight) * similarity + weight * overlap
|
|
10
|
+
*
|
|
11
|
+
* where `overlap = matched tokens / total query tokens`. Pure
|
|
12
|
+
* lexical with no inverted index — fine at top-K sizes of a few
|
|
13
|
+
* dozen. Apps that need real BM25 wire a custom `Reranker`.
|
|
14
|
+
*/
|
|
15
|
+
|
|
16
|
+
import type { RetrievedDocument } from '../types.ts'
|
|
17
|
+
import type { Reranker } from './reranker.ts'
|
|
18
|
+
|
|
19
|
+
export interface KeywordRerankerOptions {
|
|
20
|
+
/**
|
|
21
|
+
* Blend factor between vector similarity (0) and keyword overlap
|
|
22
|
+
* (1). Default `0.3` — similarity stays the dominant signal,
|
|
23
|
+
* keyword overlap nudges exact-match docs higher.
|
|
24
|
+
*/
|
|
25
|
+
weight?: number
|
|
26
|
+
/** Case-sensitive matching. Default `false`. */
|
|
27
|
+
caseSensitive?: boolean
|
|
28
|
+
/**
|
|
29
|
+
* Custom tokenizer. Default splits on Unicode whitespace and
|
|
30
|
+
* drops empty fragments. Apps with stricter requirements (stem,
|
|
31
|
+
* stop-word filter, etc.) pass their own.
|
|
32
|
+
*/
|
|
33
|
+
tokenize?(input: string): readonly string[]
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
export class KeywordReranker implements Reranker {
|
|
37
|
+
private readonly weight: number
|
|
38
|
+
private readonly caseSensitive: boolean
|
|
39
|
+
private readonly tokenize: (input: string) => readonly string[]
|
|
40
|
+
|
|
41
|
+
constructor(options: KeywordRerankerOptions = {}) {
|
|
42
|
+
this.weight = options.weight ?? 0.3
|
|
43
|
+
this.caseSensitive = options.caseSensitive ?? false
|
|
44
|
+
this.tokenize = options.tokenize ?? defaultTokenize
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
rerank(query: string, matches: readonly RetrievedDocument[]): RetrievedDocument[] {
|
|
48
|
+
const queryStr = this.caseSensitive ? query : query.toLowerCase()
|
|
49
|
+
const tokens = [...new Set(this.tokenize(queryStr))].filter(Boolean)
|
|
50
|
+
if (tokens.length === 0) return [...matches]
|
|
51
|
+
|
|
52
|
+
const scored = matches.map((m) => {
|
|
53
|
+
const haystack = this.caseSensitive ? m.content : m.content.toLowerCase()
|
|
54
|
+
let hits = 0
|
|
55
|
+
for (const token of tokens) {
|
|
56
|
+
if (haystack.includes(token)) hits++
|
|
57
|
+
}
|
|
58
|
+
const overlap = hits / tokens.length
|
|
59
|
+
const blended = m.similarity * (1 - this.weight) + overlap * this.weight
|
|
60
|
+
return { ...m, score: blended }
|
|
61
|
+
})
|
|
62
|
+
|
|
63
|
+
return scored.sort((a, b) => b.score - a.score)
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
function defaultTokenize(input: string): readonly string[] {
|
|
68
|
+
return input.split(/\s+/)
|
|
69
|
+
}
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* `MMRReranker` — Maximal Marginal Relevance.
|
|
3
|
+
*
|
|
4
|
+
* Reorders matches to balance relevance to the query against
|
|
5
|
+
* diversity among the chosen documents. Useful when the raw top-K
|
|
6
|
+
* contains near-duplicate chunks from the same source.
|
|
7
|
+
*
|
|
8
|
+
* MMR(d) = λ * sim(d, query) - (1 - λ) * max_{s ∈ selected} sim(d, s)
|
|
9
|
+
*
|
|
10
|
+
* `λ = 1.0` collapses to pure similarity (no diversity bias);
|
|
11
|
+
* `λ = 0.0` greedily maximizes diversity ignoring the query.
|
|
12
|
+
* `0.5` (default) is a balanced middle.
|
|
13
|
+
*
|
|
14
|
+
* Requires document embeddings the vector store didn't return. The
|
|
15
|
+
* caller provides an `embed(text)` callback — typically
|
|
16
|
+
* `(t) => brain.embed([t]).then(r => r.embeddings[0])`. The reranker
|
|
17
|
+
* embeds the query + every document once (`matches.length + 1`
|
|
18
|
+
* calls); apps that rerank pools >50 should cache or pre-compute
|
|
19
|
+
* upstream.
|
|
20
|
+
*/
|
|
21
|
+
|
|
22
|
+
import type { RetrievedDocument } from '../types.ts'
|
|
23
|
+
import type { Reranker } from './reranker.ts'
|
|
24
|
+
|
|
25
|
+
export interface MMRRerankerOptions {
|
|
26
|
+
/**
|
|
27
|
+
* Compute an embedding for a single piece of text. Apps wire
|
|
28
|
+
* this from `BrainManager.embed`:
|
|
29
|
+
*
|
|
30
|
+
* embed: async (t) => {
|
|
31
|
+
* const r = await brain.embed([t], { model: 'text-embedding-3-small' })
|
|
32
|
+
* return r.embeddings[0]!
|
|
33
|
+
* }
|
|
34
|
+
*/
|
|
35
|
+
embed(text: string): Promise<number[]>
|
|
36
|
+
/**
|
|
37
|
+
* Relevance/diversity blend in `[0, 1]`. Default `0.5`. `1.0` =
|
|
38
|
+
* pure similarity; `0.0` = pure diversity.
|
|
39
|
+
*/
|
|
40
|
+
lambda?: number
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
export class MMRReranker implements Reranker {
|
|
44
|
+
private readonly embed: (text: string) => Promise<number[]>
|
|
45
|
+
private readonly lambda: number
|
|
46
|
+
|
|
47
|
+
constructor(options: MMRRerankerOptions) {
|
|
48
|
+
this.embed = options.embed
|
|
49
|
+
this.lambda = options.lambda ?? 0.5
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
async rerank(query: string, matches: readonly RetrievedDocument[]): Promise<RetrievedDocument[]> {
|
|
53
|
+
if (matches.length <= 1) return [...matches]
|
|
54
|
+
|
|
55
|
+
const queryEmbedding = await this.embed(query)
|
|
56
|
+
const docEmbeddings = await Promise.all(matches.map((m) => this.embed(m.content)))
|
|
57
|
+
|
|
58
|
+
const remaining = matches.map((_, i) => i)
|
|
59
|
+
const ordered: { index: number; mmr: number }[] = []
|
|
60
|
+
|
|
61
|
+
while (remaining.length > 0) {
|
|
62
|
+
let bestSlot = 0
|
|
63
|
+
let bestMmr = Number.NEGATIVE_INFINITY
|
|
64
|
+
for (let r = 0; r < remaining.length; r++) {
|
|
65
|
+
const i = remaining[r] as number
|
|
66
|
+
const relevance = cosineSimilarity(queryEmbedding, docEmbeddings[i] as number[])
|
|
67
|
+
let maxOverlap = 0
|
|
68
|
+
for (const { index: s } of ordered) {
|
|
69
|
+
const overlap = cosineSimilarity(
|
|
70
|
+
docEmbeddings[i] as number[],
|
|
71
|
+
docEmbeddings[s] as number[],
|
|
72
|
+
)
|
|
73
|
+
if (overlap > maxOverlap) maxOverlap = overlap
|
|
74
|
+
}
|
|
75
|
+
const mmr = this.lambda * relevance - (1 - this.lambda) * maxOverlap
|
|
76
|
+
if (mmr > bestMmr) {
|
|
77
|
+
bestMmr = mmr
|
|
78
|
+
bestSlot = r
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
const chosen = remaining[bestSlot] as number
|
|
82
|
+
ordered.push({ index: chosen, mmr: bestMmr })
|
|
83
|
+
remaining.splice(bestSlot, 1)
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
return ordered.map(({ index, mmr }) => ({
|
|
87
|
+
...(matches[index] as RetrievedDocument),
|
|
88
|
+
score: mmr,
|
|
89
|
+
}))
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
function cosineSimilarity(a: readonly number[], b: readonly number[]): number {
|
|
94
|
+
const n = Math.min(a.length, b.length)
|
|
95
|
+
let dot = 0
|
|
96
|
+
let normA = 0
|
|
97
|
+
let normB = 0
|
|
98
|
+
for (let i = 0; i < n; i++) {
|
|
99
|
+
const ai = a[i] as number
|
|
100
|
+
const bi = b[i] as number
|
|
101
|
+
dot += ai * bi
|
|
102
|
+
normA += ai * ai
|
|
103
|
+
normB += bi * bi
|
|
104
|
+
}
|
|
105
|
+
if (normA === 0 || normB === 0) return 0
|
|
106
|
+
return dot / (Math.sqrt(normA) * Math.sqrt(normB))
|
|
107
|
+
}
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Re-ranking — reorder a `topK` set of retrieved documents using
|
|
3
|
+
* a signal richer than raw vector similarity.
|
|
4
|
+
*
|
|
5
|
+
* The vector store hands back matches sorted by cosine similarity.
|
|
6
|
+
* That's a strong baseline but loses information: keyword overlap,
|
|
7
|
+
* diversity, recency, source authority, second-stage cross-encoder
|
|
8
|
+
* scores, etc. A `Reranker` consumes the initial top-K and returns
|
|
9
|
+
* the same documents in a (possibly different) order with new
|
|
10
|
+
* `score` values. The raw vector `similarity` field is preserved
|
|
11
|
+
* verbatim so apps that want to display both can.
|
|
12
|
+
*
|
|
13
|
+
* Common usage:
|
|
14
|
+
*
|
|
15
|
+
* ```ts
|
|
16
|
+
* const { matches } = await rag.retrieve(query, {
|
|
17
|
+
* topK: 5,
|
|
18
|
+
* rerankPool: 25, // fetch a wider pool…
|
|
19
|
+
* rerank: new MMRReranker({ ... }),// reorder for diversity…
|
|
20
|
+
* }) // …then slice to topK.
|
|
21
|
+
* ```
|
|
22
|
+
*
|
|
23
|
+
* The contract is intentionally narrow: the reranker decides the
|
|
24
|
+
* order. The framework handles the over-fetch-then-truncate pattern
|
|
25
|
+
* via `rerankPool` so apps don't have to manage it.
|
|
26
|
+
*/
|
|
27
|
+
|
|
28
|
+
import type { RetrievedDocument } from '../types.ts'
|
|
29
|
+
|
|
30
|
+
export interface Reranker {
|
|
31
|
+
rerank(
|
|
32
|
+
query: string,
|
|
33
|
+
matches: readonly RetrievedDocument[],
|
|
34
|
+
): Promise<RetrievedDocument[]> | RetrievedDocument[]
|
|
35
|
+
}
|
package/src/retrievable.ts
CHANGED
|
@@ -65,11 +65,7 @@
|
|
|
65
65
|
|
|
66
66
|
import type { Repository } from '@strav/database'
|
|
67
67
|
import type { RagManager } from './rag_manager.ts'
|
|
68
|
-
import type {
|
|
69
|
-
RetrieveOptions,
|
|
70
|
-
RetrieveResult,
|
|
71
|
-
VectorMatch,
|
|
72
|
-
} from './types.ts'
|
|
68
|
+
import type { RetrieveOptions, RetrieveResult, VectorMatch } from './types.ts'
|
|
73
69
|
|
|
74
70
|
/** Minimal constructor type we can mix into. Wider than `typeof Repository` so subclasses with extra ctor args still type-check. */
|
|
75
71
|
// biome-ignore lint/suspicious/noExplicitAny: mixin constructor signatures intentionally accept any[]; the user-side subclass narrows.
|
|
@@ -157,9 +153,7 @@ export function retrievable<TModel extends object, TBase extends RepositoryConst
|
|
|
157
153
|
// replace cleanly. (RagManager.ingest writes fresh ids per
|
|
158
154
|
// call; without this step every re-vectorize would
|
|
159
155
|
// duplicate.)
|
|
160
|
-
await this.rag
|
|
161
|
-
.store()
|
|
162
|
-
.deleteBySource(this.rag.collectionName(collection), id)
|
|
156
|
+
await this.rag.store().deleteBySource(this.rag.collectionName(collection), id)
|
|
163
157
|
|
|
164
158
|
if (!this.shouldRetrieve(model)) return []
|
|
165
159
|
|
|
@@ -182,9 +176,7 @@ export function retrievable<TModel extends object, TBase extends RepositoryConst
|
|
|
182
176
|
async vectorRemove(model: TModel): Promise<void> {
|
|
183
177
|
const collection = this.collectionName()
|
|
184
178
|
const id = modelId(model)
|
|
185
|
-
await this.rag
|
|
186
|
-
.store()
|
|
187
|
-
.deleteBySource(this.rag.collectionName(collection), id)
|
|
179
|
+
await this.rag.store().deleteBySource(this.rag.collectionName(collection), id)
|
|
188
180
|
}
|
|
189
181
|
|
|
190
182
|
/**
|
|
@@ -238,9 +230,7 @@ export function retrievable<TModel extends object, TBase extends RepositoryConst
|
|
|
238
230
|
const ids = [...new Set(matches.map((m) => m.sourceId).filter((s): s is string => !!s))]
|
|
239
231
|
if (ids.length === 0) return []
|
|
240
232
|
const found = await this.findMany(ids as unknown as readonly string[])
|
|
241
|
-
const byId = new Map<string, TModel>(
|
|
242
|
-
found.map((m) => [modelId(m), m]),
|
|
243
|
-
)
|
|
233
|
+
const byId = new Map<string, TModel>(found.map((m) => [modelId(m), m]))
|
|
244
234
|
const out: TModel[] = []
|
|
245
235
|
for (const match of matches) {
|
|
246
236
|
if (!match.sourceId) continue
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* `RetrievableRegistry` — bag of named pointers to retrievable
|
|
3
|
+
* repositories so the `rag:reindex` console command can resolve
|
|
4
|
+
* them at runtime.
|
|
5
|
+
*
|
|
6
|
+
* The framework can't statically discover which repositories use
|
|
7
|
+
* the `retrievable()` mixin — apps register them at boot:
|
|
8
|
+
*
|
|
9
|
+
* const registry = app.resolve(RetrievableRegistry)
|
|
10
|
+
* registry.register('articles', ArticleRepository)
|
|
11
|
+
*
|
|
12
|
+
* Then:
|
|
13
|
+
*
|
|
14
|
+
* bun strav rag:reindex articles
|
|
15
|
+
* bun strav rag:reindex --all
|
|
16
|
+
*
|
|
17
|
+
* resolves the repository through the container and calls
|
|
18
|
+
* `reindexAll(batchSize)`. The repo class must implement
|
|
19
|
+
* `reindexAll(batchSize?: number): Promise<number>` — the
|
|
20
|
+
* `retrievable()` mixin provides exactly that shape.
|
|
21
|
+
*/
|
|
22
|
+
|
|
23
|
+
import { inject } from '@strav/kernel'
|
|
24
|
+
import { RagError } from './rag_error.ts'
|
|
25
|
+
|
|
26
|
+
export interface RetrievableTarget {
|
|
27
|
+
reindexAll(batchSize?: number): Promise<number>
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
// biome-ignore lint/suspicious/noExplicitAny: container-resolved constructor; the user-side class narrows.
|
|
31
|
+
type RetrievableConstructor = new (...args: any[]) => RetrievableTarget
|
|
32
|
+
|
|
33
|
+
@inject()
|
|
34
|
+
export class RetrievableRegistry {
|
|
35
|
+
private readonly targets = new Map<string, RetrievableConstructor>()
|
|
36
|
+
|
|
37
|
+
/**
|
|
38
|
+
* Register a repository class under `name`. The class will be
|
|
39
|
+
* resolved from the container on `rag:reindex <name>`.
|
|
40
|
+
*/
|
|
41
|
+
register(name: string, ctor: RetrievableConstructor): void {
|
|
42
|
+
this.targets.set(name, ctor)
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
/** List every registered name — used by `rag:reindex --all`. */
|
|
46
|
+
names(): readonly string[] {
|
|
47
|
+
return [...this.targets.keys()]
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
/** Resolve the constructor for one name. Throws when unregistered. */
|
|
51
|
+
resolve(name: string): RetrievableConstructor {
|
|
52
|
+
const ctor = this.targets.get(name)
|
|
53
|
+
if (ctor === undefined) {
|
|
54
|
+
throw new RagError(`RetrievableRegistry: no retrievable registered under "${name}".`, {
|
|
55
|
+
context: { requested: name, available: this.names() },
|
|
56
|
+
})
|
|
57
|
+
}
|
|
58
|
+
return ctor
|
|
59
|
+
}
|
|
60
|
+
}
|
package/src/types.ts
CHANGED
|
@@ -79,6 +79,25 @@ export interface RetrieveOptions {
|
|
|
79
79
|
embedModel?: string
|
|
80
80
|
/** Override the brain provider used for embedding. */
|
|
81
81
|
embedProvider?: string
|
|
82
|
+
/**
|
|
83
|
+
* Optional re-ranker. When set, the framework fetches `rerankPool`
|
|
84
|
+
* (or `topK` if unset) matches from the store, runs the reranker,
|
|
85
|
+
* then slices the result to `topK`. The reranker decides the
|
|
86
|
+
* final order + `score`; `similarity` carries the raw vector
|
|
87
|
+
* cosine.
|
|
88
|
+
*
|
|
89
|
+
* Built-in strategies live under `@strav/rag` —
|
|
90
|
+
* `KeywordReranker` (lexical overlap blend) and `MMRReranker`
|
|
91
|
+
* (Maximal Marginal Relevance for diversity).
|
|
92
|
+
*/
|
|
93
|
+
rerank?: import('./rerankers/reranker.ts').Reranker
|
|
94
|
+
/**
|
|
95
|
+
* Size of the candidate pool fetched from the store before
|
|
96
|
+
* re-ranking. Ignored when `rerank` is unset. Defaults to
|
|
97
|
+
* `topK` — set higher (`topK * 3` to `topK * 5` is typical) to
|
|
98
|
+
* give the reranker room to reorder.
|
|
99
|
+
*/
|
|
100
|
+
rerankPool?: number
|
|
82
101
|
}
|
|
83
102
|
|
|
84
103
|
export interface RetrieveResult {
|
package/src/vector_store.ts
CHANGED
|
@@ -47,9 +47,5 @@ export interface VectorStore {
|
|
|
47
47
|
deleteBySource(collection: string, sourceId: string): Promise<void>
|
|
48
48
|
flush(collection: string): Promise<void>
|
|
49
49
|
|
|
50
|
-
query(
|
|
51
|
-
collection: string,
|
|
52
|
-
vector: readonly number[],
|
|
53
|
-
options?: QueryOptions,
|
|
54
|
-
): Promise<QueryResult>
|
|
50
|
+
query(collection: string, vector: readonly number[], options?: QueryOptions): Promise<QueryResult>
|
|
55
51
|
}
|
|
@@ -30,11 +30,7 @@
|
|
|
30
30
|
* the helper itself.
|
|
31
31
|
*/
|
|
32
32
|
|
|
33
|
-
import {
|
|
34
|
-
emitCreateTable,
|
|
35
|
-
type DatabaseExecutor,
|
|
36
|
-
type SchemaRegistry,
|
|
37
|
-
} from '@strav/database'
|
|
33
|
+
import { type DatabaseExecutor, emitCreateTable, type SchemaRegistry } from '@strav/database'
|
|
38
34
|
import { ragVectorSchema } from './rag_vector_schema.ts'
|
|
39
35
|
|
|
40
36
|
export interface ApplyRagVectorMigrationOptions {
|