@strav/rag 1.0.0-alpha.28 → 1.0.0-alpha.30

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@strav/rag",
3
- "version": "1.0.0-alpha.28",
3
+ "version": "1.0.0-alpha.30",
4
4
  "description": "Strav RAG module — vector store abstraction, pgvector + in-memory drivers, chunking strategies. Composes with @strav/brain for embeddings and @strav/database for persistence.",
5
5
  "type": "module",
6
6
  "main": "./src/index.ts",
@@ -19,10 +19,10 @@
19
19
  "access": "public"
20
20
  },
21
21
  "dependencies": {
22
- "@strav/brain": "1.0.0-alpha.28",
23
- "@strav/cli": "1.0.0-alpha.28",
24
- "@strav/database": "1.0.0-alpha.28",
25
- "@strav/kernel": "1.0.0-alpha.28"
22
+ "@strav/brain": "1.0.0-alpha.30",
23
+ "@strav/cli": "1.0.0-alpha.30",
24
+ "@strav/database": "1.0.0-alpha.30",
25
+ "@strav/kernel": "1.0.0-alpha.30"
26
26
  },
27
27
  "peerDependencies": {
28
28
  "@types/bun": ">=1.3.14"
@@ -1,3 +1,4 @@
1
1
  export { RagConsoleProvider } from './rag_console_provider.ts'
2
2
  export { RagFlush } from './rag_flush.ts'
3
3
  export { RagList } from './rag_list.ts'
4
+ export { RagReindex } from './rag_reindex.ts'
@@ -10,8 +10,9 @@
10
10
  import { ConsoleProvider } from '@strav/cli'
11
11
  import { RagFlush } from './rag_flush.ts'
12
12
  import { RagList } from './rag_list.ts'
13
+ import { RagReindex } from './rag_reindex.ts'
13
14
 
14
15
  export class RagConsoleProvider extends ConsoleProvider {
15
16
  override readonly name = 'console.rag'
16
- override readonly commands = [RagFlush, RagList] as const
17
+ override readonly commands = [RagFlush, RagList, RagReindex] as const
17
18
  }
@@ -19,14 +19,14 @@ import { RagManager } from '../rag_manager.ts'
19
19
 
20
20
  export class RagFlush extends Command {
21
21
  static signature = 'rag:flush {collection} {--store=} {--force}'
22
- static description = 'Delete every vector in a collection (on the active or --store= named store).'
22
+ static description =
23
+ 'Delete every vector in a collection (on the active or --store= named store).'
23
24
  static providers = ['config', 'logger', 'brain', 'rag']
24
25
 
25
26
  override async execute({ args, flags }: ExecuteArgs): Promise<number> {
26
27
  const collection = args.collection as string
27
- const storeName = typeof flags.store === 'string' && flags.store.length > 0
28
- ? flags.store
29
- : undefined
28
+ const storeName =
29
+ typeof flags.store === 'string' && flags.store.length > 0 ? flags.store : undefined
30
30
 
31
31
  const manager = this.app.resolve(RagManager)
32
32
  const fullCollection = manager.collectionName(collection)
@@ -43,9 +43,7 @@ export class RagFlush extends Command {
43
43
  }
44
44
 
45
45
  await manager.store(storeName).flush(fullCollection)
46
- this.success(
47
- `Flushed collection "${fullCollection}" on store "${storeLabel}".`,
48
- )
46
+ this.success(`Flushed collection "${fullCollection}" on store "${storeLabel}".`)
49
47
  return ExitCode.Success
50
48
  }
51
49
  }
@@ -0,0 +1,100 @@
1
+ /**
2
+ * `bun strav rag:reindex {name?} [--all] [--batch=100]` —
3
+ * walk a registered repository and re-vectorize every row.
4
+ *
5
+ * Apps register repos at boot:
6
+ *
7
+ * const registry = app.resolve(RetrievableRegistry)
8
+ * registry.register('articles', ArticleRepository)
9
+ *
10
+ * Then:
11
+ *
12
+ * bun strav rag:reindex articles # one repo
13
+ * bun strav rag:reindex --all # every registered repo
14
+ *
15
+ * The repo class must implement `reindexAll(batchSize?)` — the
16
+ * `retrievable()` mixin already does. Batch size defaults to 100;
17
+ * apps hitting embedding rate limits drop it lower.
18
+ *
19
+ * Long-running on large corpora — apps that need cron-driven or
20
+ * queued re-index typically ship a custom command pointing at the
21
+ * same `reindexAll` method.
22
+ */
23
+
24
+ import { Command, type ExecuteArgs, ExitCode } from '@strav/cli'
25
+ import { RagError } from '../rag_error.ts'
26
+ import { RetrievableRegistry } from '../retrievable_registry.ts'
27
+
28
+ export class RagReindex extends Command {
29
+ static signature = 'rag:reindex {name?} {--all} {--batch=100}'
30
+ static description =
31
+ 'Re-vectorize one registered retrievable repository (or every one with --all).'
32
+ static providers = ['config', 'logger', 'brain', 'rag', 'database']
33
+
34
+ override async execute({ args, flags }: ExecuteArgs): Promise<number> {
35
+ const registry = this.app.resolve(RetrievableRegistry)
36
+ const batchSize = parseBatch(flags.batch)
37
+
38
+ if (flags.all === true) {
39
+ const names = registry.names()
40
+ if (names.length === 0) {
41
+ this.warn(
42
+ 'No retrievables registered. Call `registry.register(name, Repo)` from a service provider first.',
43
+ )
44
+ return ExitCode.Success
45
+ }
46
+ let total = 0
47
+ for (const name of names) {
48
+ const processed = await this.reindexOne(registry, name, batchSize)
49
+ total += processed
50
+ }
51
+ this.success(
52
+ `Re-indexed ${total} rows across ${names.length} repositor${names.length === 1 ? 'y' : 'ies'}.`,
53
+ )
54
+ return ExitCode.Success
55
+ }
56
+
57
+ const name = args.name
58
+ if (typeof name !== 'string' || name.length === 0) {
59
+ this.error(
60
+ 'rag:reindex requires a repository name, or --all to re-index every registered repository.',
61
+ )
62
+ this.info(`Registered: ${registry.names().join(', ') || '(none)'}`)
63
+ return ExitCode.UsageError
64
+ }
65
+
66
+ try {
67
+ const processed = await this.reindexOne(registry, name, batchSize)
68
+ this.success(`Re-indexed ${processed} rows in "${name}".`)
69
+ return ExitCode.Success
70
+ } catch (err) {
71
+ if (err instanceof RagError) {
72
+ this.error(err.message)
73
+ this.info(`Registered: ${registry.names().join(', ') || '(none)'}`)
74
+ return ExitCode.GenericFailure
75
+ }
76
+ throw err
77
+ }
78
+ }
79
+
80
+ private async reindexOne(
81
+ registry: RetrievableRegistry,
82
+ name: string,
83
+ batchSize: number,
84
+ ): Promise<number> {
85
+ this.info(`Re-indexing "${name}"…`)
86
+ const repo = this.app.resolve(registry.resolve(name))
87
+ const processed = await repo.reindexAll(batchSize)
88
+ this.info(` ${processed} rows.`)
89
+ return processed
90
+ }
91
+ }
92
+
93
+ function parseBatch(raw: unknown): number {
94
+ if (typeof raw === 'number' && raw > 0) return Math.floor(raw)
95
+ if (typeof raw === 'string') {
96
+ const n = Number.parseInt(raw, 10)
97
+ if (Number.isFinite(n) && n > 0) return n
98
+ }
99
+ return 100
100
+ }
@@ -21,12 +21,7 @@
21
21
  */
22
22
 
23
23
  import { CollectionNotFoundError } from '../../rag_error.ts'
24
- import type {
25
- QueryOptions,
26
- QueryResult,
27
- VectorDocument,
28
- VectorMatch,
29
- } from '../../types.ts'
24
+ import type { QueryOptions, QueryResult, VectorDocument, VectorMatch } from '../../types.ts'
30
25
  import type { VectorStore } from '../../vector_store.ts'
31
26
 
32
27
  interface StoredDoc {
@@ -55,10 +50,7 @@ export class MemoryDriver implements VectorStore {
55
50
  this.dimensions.delete(collection)
56
51
  }
57
52
 
58
- async upsert(
59
- collection: string,
60
- documents: readonly VectorDocument[],
61
- ): Promise<void> {
53
+ async upsert(collection: string, documents: readonly VectorDocument[]): Promise<void> {
62
54
  const bucket = this.requireBucket(collection)
63
55
  for (const doc of documents) {
64
56
  const id = doc.id ?? crypto.randomUUID()
@@ -34,7 +34,6 @@ import {
34
34
  type PostgresDatabase,
35
35
  } from '@strav/database'
36
36
  import { VectorQueryError } from '../../rag_error.ts'
37
- import { ragVectorSchema } from '../../vectors/rag_vector_schema.ts'
38
37
  import type {
39
38
  QueryOptions,
40
39
  QueryResult,
@@ -43,6 +42,7 @@ import type {
43
42
  VectorMatch,
44
43
  } from '../../types.ts'
45
44
  import type { VectorStore } from '../../vector_store.ts'
45
+ import { ragVectorSchema } from '../../vectors/rag_vector_schema.ts'
46
46
 
47
47
  export interface PgvectorDriverOptions {
48
48
  /** PostgresDatabase instance — typically resolved from the container. */
@@ -99,18 +99,12 @@ export class PgvectorDriver implements VectorStore {
99
99
  }
100
100
 
101
101
  async deleteCollection(collection: string): Promise<void> {
102
- await this.exec().execute(
103
- `DELETE FROM "${this.table}" WHERE "collection" = $1`,
104
- [collection],
105
- )
102
+ await this.exec().execute(`DELETE FROM "${this.table}" WHERE "collection" = $1`, [collection])
106
103
  }
107
104
 
108
105
  // ─── Mutations ────────────────────────────────────────────────────────
109
106
 
110
- async upsert(
111
- collection: string,
112
- documents: readonly VectorDocument[],
113
- ): Promise<void> {
107
+ async upsert(collection: string, documents: readonly VectorDocument[]): Promise<void> {
114
108
  if (documents.length === 0) return
115
109
  // pgvector accepts the vector as a stringified array literal —
116
110
  // `[0.12,0.34,...]` — cast with `::vector` at the boundary.
@@ -167,10 +161,7 @@ export class PgvectorDriver implements VectorStore {
167
161
  }
168
162
 
169
163
  async flush(collection: string): Promise<void> {
170
- await this.exec().execute(
171
- `DELETE FROM "${this.table}" WHERE "collection" = $1`,
172
- [collection],
173
- )
164
+ await this.exec().execute(`DELETE FROM "${this.table}" WHERE "collection" = $1`, [collection])
174
165
  }
175
166
 
176
167
  // ─── Query ────────────────────────────────────────────────────────────
@@ -194,7 +185,9 @@ export class PgvectorDriver implements VectorStore {
194
185
  if (options.filter) {
195
186
  for (const [key, value] of Object.entries(options.filter)) {
196
187
  params.push(JSON.stringify(value))
197
- where.push(`"metadata" @> jsonb_build_object('${escapeJsonbKey(key)}', $${params.length}::jsonb)`)
188
+ where.push(
189
+ `"metadata" @> jsonb_build_object('${escapeJsonbKey(key)}', $${params.length}::jsonb)`,
190
+ )
198
191
  }
199
192
  }
200
193
 
@@ -221,10 +214,10 @@ export class PgvectorDriver implements VectorStore {
221
214
  try {
222
215
  rows = await this.exec().query(sql, params)
223
216
  } catch (cause) {
224
- throw new VectorQueryError(
225
- `pgvector query failed for collection "${collection}".`,
226
- { context: { collection, table: this.table }, cause },
227
- )
217
+ throw new VectorQueryError(`pgvector query failed for collection "${collection}".`, {
218
+ context: { collection, table: this.table },
219
+ cause,
220
+ })
228
221
  }
229
222
 
230
223
  const matches: VectorMatch[] = rows.map((r) => ({
package/src/index.ts CHANGED
@@ -1,25 +1,30 @@
1
1
  // Public API of `@strav/rag`.
2
2
  //
3
- // V1: vector store abstraction + memory & pgvector drivers +
4
- // fixed-size & recursive chunkers + RagManager + RagProvider.
3
+ // Shipped:
4
+ // - Vector store abstraction + Memory & Pgvector drivers.
5
+ // - Fixed-size + recursive chunkers.
6
+ // - `RagManager` + `RagProvider` service wiring.
7
+ // - `retrievable()` repository mixin + `RetrievableRegistry`.
8
+ // - CLI: `rag:list`, `rag:flush`, `rag:reindex {name|--all}`.
9
+ // - Re-ranking — `Reranker` interface + `KeywordReranker` +
10
+ // `MMRReranker` + `RetrieveOptions.rerank` / `rerankPool`.
5
11
  // Composes with `@strav/brain` for embeddings and `@strav/database`
6
12
  // for pgvector persistence + multitenancy.
7
- //
8
- // Deferred to follow-up slices: `retrievable()` repository mixin,
9
- // CLI commands (`rag:reindex`, `rag:flush`), re-ranking strategies.
10
13
 
11
14
  export { createChunker } from './chunking/chunker.ts'
12
15
  export { FixedSizeChunker } from './chunking/fixed_size_chunker.ts'
13
16
  export { RecursiveChunker } from './chunking/recursive_chunker.ts'
17
+ export {
18
+ RagConsoleProvider,
19
+ RagFlush,
20
+ RagList,
21
+ RagReindex,
22
+ } from './console/index.ts'
14
23
  export { MemoryDriver } from './drivers/memory/memory_driver.ts'
15
24
  export {
16
25
  PgvectorDriver,
17
26
  type PgvectorDriverOptions,
18
27
  } from './drivers/pgvector/pgvector_driver.ts'
19
- export {
20
- applyRagVectorMigration,
21
- type ApplyRagVectorMigrationOptions,
22
- } from './vectors/apply_rag_vector_migration.ts'
23
28
  export {
24
29
  CollectionNotFoundError,
25
30
  EmbeddingError,
@@ -32,14 +37,19 @@ export {
32
37
  type RagManagerOptions,
33
38
  type StoreFactory,
34
39
  } from './rag_manager.ts'
35
- export {
36
- RagConsoleProvider,
37
- RagFlush,
38
- RagList,
39
- } from './console/index.ts'
40
40
  export { RagProvider } from './rag_provider.ts'
41
- export { ragVectorSchema } from './vectors/rag_vector_schema.ts'
41
+ export {
42
+ KeywordReranker,
43
+ type KeywordRerankerOptions,
44
+ MMRReranker,
45
+ type MMRRerankerOptions,
46
+ type Reranker,
47
+ } from './rerankers/index.ts'
42
48
  export { retrievable } from './retrievable.ts'
49
+ export {
50
+ RetrievableRegistry,
51
+ type RetrievableTarget,
52
+ } from './retrievable_registry.ts'
43
53
  export type {
44
54
  Chunk,
45
55
  Chunker,
@@ -48,11 +58,16 @@ export type {
48
58
  QueryOptions,
49
59
  QueryResult,
50
60
  RagConfig,
61
+ RetrievedDocument,
51
62
  RetrieveOptions,
52
63
  RetrieveResult,
53
- RetrievedDocument,
54
64
  StoreConfig,
55
65
  VectorDocument,
56
66
  VectorMatch,
57
67
  } from './types.ts'
58
68
  export type { VectorStore } from './vector_store.ts'
69
+ export {
70
+ type ApplyRagVectorMigrationOptions,
71
+ applyRagVectorMigration,
72
+ } from './vectors/apply_rag_vector_migration.ts'
73
+ export { ragVectorSchema } from './vectors/rag_vector_schema.ts'
package/src/rag_error.ts CHANGED
@@ -35,7 +35,10 @@ export class RagError extends StravError {
35
35
  super(
36
36
  message,
37
37
  { code: options.code ?? 'rag.error', status: options.status ?? 500 },
38
- { ...(options.context ? { context: options.context } : {}), ...(options.cause !== undefined ? { cause: options.cause } : {}) },
38
+ {
39
+ ...(options.context ? { context: options.context } : {}),
40
+ ...(options.cause !== undefined ? { cause: options.cause } : {}),
41
+ },
39
42
  )
40
43
  }
41
44
  }
@@ -54,7 +57,10 @@ export class CollectionNotFoundError extends RagError {
54
57
  }
55
58
 
56
59
  export class VectorQueryError extends RagError {
57
- constructor(message: string, options: { context?: Record<string, unknown>; cause?: unknown } = {}) {
60
+ constructor(
61
+ message: string,
62
+ options: { context?: Record<string, unknown>; cause?: unknown } = {},
63
+ ) {
58
64
  super(message, {
59
65
  code: 'rag.vector_query',
60
66
  status: 500,
@@ -65,7 +71,10 @@ export class VectorQueryError extends RagError {
65
71
  }
66
72
 
67
73
  export class EmbeddingError extends RagError {
68
- constructor(message: string, options: { context?: Record<string, unknown>; cause?: unknown } = {}) {
74
+ constructor(
75
+ message: string,
76
+ options: { context?: Record<string, unknown>; cause?: unknown } = {},
77
+ ) {
69
78
  super(message, {
70
79
  code: 'rag.embedding',
71
80
  status: 500,
@@ -23,10 +23,10 @@
23
23
  * `tenants.withTenant(...)` get per-tenant isolation for free.
24
24
  */
25
25
 
26
- // biome-ignore lint/style/useImportType: PostgresDatabase value import for the container path that wires PgvectorDriver.
27
- import { PostgresDatabase } from '@strav/database'
28
26
  // biome-ignore lint/style/useImportType: BrainManager value import for @inject() param-type metadata.
29
27
  import { BrainManager } from '@strav/brain'
28
+ // biome-ignore lint/style/useImportType: PostgresDatabase value import for the container path that wires PgvectorDriver.
29
+ import { PostgresDatabase } from '@strav/database'
30
30
  // biome-ignore lint/style/useImportType: Application value import for the container handle.
31
31
  import { Application, inject, ulid } from '@strav/kernel'
32
32
  import { createChunker } from './chunking/chunker.ts'
@@ -34,13 +34,13 @@ import { MemoryDriver } from './drivers/memory/memory_driver.ts'
34
34
  import { PgvectorDriver } from './drivers/pgvector/pgvector_driver.ts'
35
35
  import { EmbeddingError, RagError } from './rag_error.ts'
36
36
  import type {
37
- ChunkingConfig,
38
37
  Chunk,
39
38
  Chunker,
39
+ ChunkingConfig,
40
40
  RagConfig,
41
+ RetrievedDocument,
41
42
  RetrieveOptions,
42
43
  RetrieveResult,
43
- RetrievedDocument,
44
44
  StoreConfig,
45
45
  VectorDocument,
46
46
  } from './types.ts'
@@ -168,7 +168,7 @@ export class RagManager {
168
168
  strategy: options.chunking?.strategy ?? this.config.chunking.strategy,
169
169
  chunkSize: options.chunking?.chunkSize ?? this.config.chunking.chunkSize,
170
170
  overlap: options.chunking?.overlap ?? this.config.chunking.overlap,
171
- ...(options.chunking?.separators ?? this.config.chunking.separators
171
+ ...((options.chunking?.separators ?? this.config.chunking.separators)
172
172
  ? { separators: options.chunking?.separators ?? this.config.chunking.separators }
173
173
  : {}),
174
174
  }
@@ -198,10 +198,10 @@ export class RagManager {
198
198
  })
199
199
  embeddings = result.embeddings as number[][]
200
200
  } catch (cause) {
201
- throw new EmbeddingError(
202
- `RagManager.ingest: embedding ${texts.length} chunks failed.`,
203
- { context: { collection: fullCollection }, cause },
204
- )
201
+ throw new EmbeddingError(`RagManager.ingest: embedding ${texts.length} chunks failed.`, {
202
+ context: { collection: fullCollection },
203
+ cause,
204
+ })
205
205
  }
206
206
 
207
207
  const documents: VectorDocument[] = chunks.map((chunk, i) => ({
@@ -223,13 +223,8 @@ export class RagManager {
223
223
 
224
224
  // ─── Retrieve ─────────────────────────────────────────────────────────
225
225
 
226
- async retrieve(
227
- query: string,
228
- options: RetrieveOptions = {},
229
- ): Promise<RetrieveResult> {
230
- const fullCollection = this.collectionName(
231
- options.collection ?? this.config.default,
232
- )
226
+ async retrieve(query: string, options: RetrieveOptions = {}): Promise<RetrieveResult> {
227
+ const fullCollection = this.collectionName(options.collection ?? this.config.default)
233
228
  const start = performance.now()
234
229
 
235
230
  let embedding: number[]
@@ -240,24 +235,23 @@ export class RagManager {
240
235
  })
241
236
  embedding = result.embeddings[0] as number[]
242
237
  } catch (cause) {
243
- throw new EmbeddingError(
244
- `RagManager.retrieve: embedding query failed.`,
245
- { context: { collection: fullCollection }, cause },
246
- )
238
+ throw new EmbeddingError(`RagManager.retrieve: embedding query failed.`, {
239
+ context: { collection: fullCollection },
240
+ cause,
241
+ })
247
242
  }
248
243
 
244
+ const finalTopK = options.topK
245
+ const fetchK = options.rerank !== undefined ? (options.rerankPool ?? finalTopK) : finalTopK
246
+
249
247
  const queryOpts: { topK?: number; threshold?: number; filter?: Record<string, unknown> } = {}
250
- if (options.topK !== undefined) queryOpts.topK = options.topK
248
+ if (fetchK !== undefined) queryOpts.topK = fetchK
251
249
  if (options.threshold !== undefined) queryOpts.threshold = options.threshold
252
250
  if (options.filter !== undefined) queryOpts.filter = options.filter
253
251
 
254
- const result = await this.store(options.store).query(
255
- fullCollection,
256
- embedding,
257
- queryOpts,
258
- )
252
+ const result = await this.store(options.store).query(fullCollection, embedding, queryOpts)
259
253
 
260
- const matches: RetrievedDocument[] = result.matches.map((m) => ({
254
+ let matches: RetrievedDocument[] = result.matches.map((m) => ({
261
255
  id: m.id,
262
256
  content: m.content,
263
257
  score: m.score,
@@ -266,6 +260,13 @@ export class RagManager {
266
260
  ...(m.sourceId !== undefined ? { sourceId: m.sourceId } : {}),
267
261
  }))
268
262
 
263
+ if (options.rerank !== undefined && matches.length > 0) {
264
+ matches = [...(await options.rerank.rerank(query, matches))]
265
+ if (finalTopK !== undefined && matches.length > finalTopK) {
266
+ matches = matches.slice(0, finalTopK)
267
+ }
268
+ }
269
+
269
270
  return {
270
271
  query,
271
272
  matches,
@@ -16,17 +16,13 @@
16
16
  * via a real `config/rag.ts`.
17
17
  */
18
18
 
19
- // biome-ignore lint/style/useImportType: PostgresDatabase value import — required when any pgvector store is configured. Loaded conditionally below.
20
- import { PostgresDatabase } from '@strav/database'
21
19
  // biome-ignore lint/style/useImportType: BrainManager value import for c.resolve.
22
20
  import { BrainManager } from '@strav/brain'
23
- import {
24
- type Application,
25
- ConfigError,
26
- ConfigRepository,
27
- ServiceProvider,
28
- } from '@strav/kernel'
21
+ // biome-ignore lint/style/useImportType: PostgresDatabase value import — required when any pgvector store is configured. Loaded conditionally below.
22
+ import { PostgresDatabase } from '@strav/database'
23
+ import { type Application, ConfigError, ConfigRepository, ServiceProvider } from '@strav/kernel'
29
24
  import { RagManager, type RagManagerOptions } from './rag_manager.ts'
25
+ import { RetrievableRegistry } from './retrievable_registry.ts'
30
26
  import type { RagConfig } from './types.ts'
31
27
 
32
28
  export class RagProvider extends ServiceProvider {
@@ -34,6 +30,7 @@ export class RagProvider extends ServiceProvider {
34
30
  override readonly dependencies = ['config', 'brain']
35
31
 
36
32
  override register(app: Application): void {
33
+ app.singleton(RetrievableRegistry, () => new RetrievableRegistry())
37
34
  app.singleton(RagManager, (c) => {
38
35
  const raw = c.resolve(ConfigRepository).get('rag') as Partial<RagConfig> | undefined
39
36
  const config = applyDefaults(raw)
@@ -0,0 +1,3 @@
1
+ export { KeywordReranker, type KeywordRerankerOptions } from './keyword_reranker.ts'
2
+ export { MMRReranker, type MMRRerankerOptions } from './mmr_reranker.ts'
3
+ export type { Reranker } from './reranker.ts'
@@ -0,0 +1,69 @@
1
+ /**
2
+ * `KeywordReranker` — boost matches whose content literally contains
3
+ * tokens from the query.
4
+ *
5
+ * Useful when the embedder is too smooth for short, jargon-heavy
6
+ * queries (product SKUs, error codes, acronyms). Combines the raw
7
+ * vector similarity with a token-overlap score:
8
+ *
9
+ * score = (1 - weight) * similarity + weight * overlap
10
+ *
11
+ * where `overlap = matched tokens / total query tokens`. Pure
12
+ * lexical with no inverted index — fine at top-K sizes of a few
13
+ * dozen. Apps that need real BM25 wire a custom `Reranker`.
14
+ */
15
+
16
+ import type { RetrievedDocument } from '../types.ts'
17
+ import type { Reranker } from './reranker.ts'
18
+
19
+ export interface KeywordRerankerOptions {
20
+ /**
21
+ * Blend factor between vector similarity (0) and keyword overlap
22
+ * (1). Default `0.3` — similarity stays the dominant signal,
23
+ * keyword overlap nudges exact-match docs higher.
24
+ */
25
+ weight?: number
26
+ /** Case-sensitive matching. Default `false`. */
27
+ caseSensitive?: boolean
28
+ /**
29
+ * Custom tokenizer. Default splits on Unicode whitespace and
30
+ * drops empty fragments. Apps with stricter requirements (stem,
31
+ * stop-word filter, etc.) pass their own.
32
+ */
33
+ tokenize?(input: string): readonly string[]
34
+ }
35
+
36
+ export class KeywordReranker implements Reranker {
37
+ private readonly weight: number
38
+ private readonly caseSensitive: boolean
39
+ private readonly tokenize: (input: string) => readonly string[]
40
+
41
+ constructor(options: KeywordRerankerOptions = {}) {
42
+ this.weight = options.weight ?? 0.3
43
+ this.caseSensitive = options.caseSensitive ?? false
44
+ this.tokenize = options.tokenize ?? defaultTokenize
45
+ }
46
+
47
+ rerank(query: string, matches: readonly RetrievedDocument[]): RetrievedDocument[] {
48
+ const queryStr = this.caseSensitive ? query : query.toLowerCase()
49
+ const tokens = [...new Set(this.tokenize(queryStr))].filter(Boolean)
50
+ if (tokens.length === 0) return [...matches]
51
+
52
+ const scored = matches.map((m) => {
53
+ const haystack = this.caseSensitive ? m.content : m.content.toLowerCase()
54
+ let hits = 0
55
+ for (const token of tokens) {
56
+ if (haystack.includes(token)) hits++
57
+ }
58
+ const overlap = hits / tokens.length
59
+ const blended = m.similarity * (1 - this.weight) + overlap * this.weight
60
+ return { ...m, score: blended }
61
+ })
62
+
63
+ return scored.sort((a, b) => b.score - a.score)
64
+ }
65
+ }
66
+
67
+ function defaultTokenize(input: string): readonly string[] {
68
+ return input.split(/\s+/)
69
+ }
@@ -0,0 +1,107 @@
1
+ /**
2
+ * `MMRReranker` — Maximal Marginal Relevance.
3
+ *
4
+ * Reorders matches to balance relevance to the query against
5
+ * diversity among the chosen documents. Useful when the raw top-K
6
+ * contains near-duplicate chunks from the same source.
7
+ *
8
+ * MMR(d) = λ * sim(d, query) - (1 - λ) * max_{s ∈ selected} sim(d, s)
9
+ *
10
+ * `λ = 1.0` collapses to pure similarity (no diversity bias);
11
+ * `λ = 0.0` greedily maximizes diversity ignoring the query.
12
+ * `0.5` (default) is a balanced middle.
13
+ *
14
+ * Requires document embeddings the vector store didn't return. The
15
+ * caller provides an `embed(text)` callback — typically
16
+ * `(t) => brain.embed([t]).then(r => r.embeddings[0])`. The reranker
17
+ * embeds the query + every document once (`matches.length + 1`
18
+ * calls); apps that rerank pools >50 should cache or pre-compute
19
+ * upstream.
20
+ */
21
+
22
+ import type { RetrievedDocument } from '../types.ts'
23
+ import type { Reranker } from './reranker.ts'
24
+
25
+ export interface MMRRerankerOptions {
26
+ /**
27
+ * Compute an embedding for a single piece of text. Apps wire
28
+ * this from `BrainManager.embed`:
29
+ *
30
+ * embed: async (t) => {
31
+ * const r = await brain.embed([t], { model: 'text-embedding-3-small' })
32
+ * return r.embeddings[0]!
33
+ * }
34
+ */
35
+ embed(text: string): Promise<number[]>
36
+ /**
37
+ * Relevance/diversity blend in `[0, 1]`. Default `0.5`. `1.0` =
38
+ * pure similarity; `0.0` = pure diversity.
39
+ */
40
+ lambda?: number
41
+ }
42
+
43
+ export class MMRReranker implements Reranker {
44
+ private readonly embed: (text: string) => Promise<number[]>
45
+ private readonly lambda: number
46
+
47
+ constructor(options: MMRRerankerOptions) {
48
+ this.embed = options.embed
49
+ this.lambda = options.lambda ?? 0.5
50
+ }
51
+
52
+ async rerank(query: string, matches: readonly RetrievedDocument[]): Promise<RetrievedDocument[]> {
53
+ if (matches.length <= 1) return [...matches]
54
+
55
+ const queryEmbedding = await this.embed(query)
56
+ const docEmbeddings = await Promise.all(matches.map((m) => this.embed(m.content)))
57
+
58
+ const remaining = matches.map((_, i) => i)
59
+ const ordered: { index: number; mmr: number }[] = []
60
+
61
+ while (remaining.length > 0) {
62
+ let bestSlot = 0
63
+ let bestMmr = Number.NEGATIVE_INFINITY
64
+ for (let r = 0; r < remaining.length; r++) {
65
+ const i = remaining[r] as number
66
+ const relevance = cosineSimilarity(queryEmbedding, docEmbeddings[i] as number[])
67
+ let maxOverlap = 0
68
+ for (const { index: s } of ordered) {
69
+ const overlap = cosineSimilarity(
70
+ docEmbeddings[i] as number[],
71
+ docEmbeddings[s] as number[],
72
+ )
73
+ if (overlap > maxOverlap) maxOverlap = overlap
74
+ }
75
+ const mmr = this.lambda * relevance - (1 - this.lambda) * maxOverlap
76
+ if (mmr > bestMmr) {
77
+ bestMmr = mmr
78
+ bestSlot = r
79
+ }
80
+ }
81
+ const chosen = remaining[bestSlot] as number
82
+ ordered.push({ index: chosen, mmr: bestMmr })
83
+ remaining.splice(bestSlot, 1)
84
+ }
85
+
86
+ return ordered.map(({ index, mmr }) => ({
87
+ ...(matches[index] as RetrievedDocument),
88
+ score: mmr,
89
+ }))
90
+ }
91
+ }
92
+
93
+ function cosineSimilarity(a: readonly number[], b: readonly number[]): number {
94
+ const n = Math.min(a.length, b.length)
95
+ let dot = 0
96
+ let normA = 0
97
+ let normB = 0
98
+ for (let i = 0; i < n; i++) {
99
+ const ai = a[i] as number
100
+ const bi = b[i] as number
101
+ dot += ai * bi
102
+ normA += ai * ai
103
+ normB += bi * bi
104
+ }
105
+ if (normA === 0 || normB === 0) return 0
106
+ return dot / (Math.sqrt(normA) * Math.sqrt(normB))
107
+ }
@@ -0,0 +1,35 @@
1
+ /**
2
+ * Re-ranking — reorder a `topK` set of retrieved documents using
3
+ * a signal richer than raw vector similarity.
4
+ *
5
+ * The vector store hands back matches sorted by cosine similarity.
6
+ * That's a strong baseline but loses information: keyword overlap,
7
+ * diversity, recency, source authority, second-stage cross-encoder
8
+ * scores, etc. A `Reranker` consumes the initial top-K and returns
9
+ * the same documents in a (possibly different) order with new
10
+ * `score` values. The raw vector `similarity` field is preserved
11
+ * verbatim so apps that want to display both can.
12
+ *
13
+ * Common usage:
14
+ *
15
+ * ```ts
16
+ * const { matches } = await rag.retrieve(query, {
17
+ * topK: 5,
18
+ * rerankPool: 25, // fetch a wider pool…
19
+ * rerank: new MMRReranker({ ... }),// reorder for diversity…
20
+ * }) // …then slice to topK.
21
+ * ```
22
+ *
23
+ * The contract is intentionally narrow: the reranker decides the
24
+ * order. The framework handles the over-fetch-then-truncate pattern
25
+ * via `rerankPool` so apps don't have to manage it.
26
+ */
27
+
28
+ import type { RetrievedDocument } from '../types.ts'
29
+
30
+ export interface Reranker {
31
+ rerank(
32
+ query: string,
33
+ matches: readonly RetrievedDocument[],
34
+ ): Promise<RetrievedDocument[]> | RetrievedDocument[]
35
+ }
@@ -65,11 +65,7 @@
65
65
 
66
66
  import type { Repository } from '@strav/database'
67
67
  import type { RagManager } from './rag_manager.ts'
68
- import type {
69
- RetrieveOptions,
70
- RetrieveResult,
71
- VectorMatch,
72
- } from './types.ts'
68
+ import type { RetrieveOptions, RetrieveResult, VectorMatch } from './types.ts'
73
69
 
74
70
  /** Minimal constructor type we can mix into. Wider than `typeof Repository` so subclasses with extra ctor args still type-check. */
75
71
  // biome-ignore lint/suspicious/noExplicitAny: mixin constructor signatures intentionally accept any[]; the user-side subclass narrows.
@@ -157,9 +153,7 @@ export function retrievable<TModel extends object, TBase extends RepositoryConst
157
153
  // replace cleanly. (RagManager.ingest writes fresh ids per
158
154
  // call; without this step every re-vectorize would
159
155
  // duplicate.)
160
- await this.rag
161
- .store()
162
- .deleteBySource(this.rag.collectionName(collection), id)
156
+ await this.rag.store().deleteBySource(this.rag.collectionName(collection), id)
163
157
 
164
158
  if (!this.shouldRetrieve(model)) return []
165
159
 
@@ -182,9 +176,7 @@ export function retrievable<TModel extends object, TBase extends RepositoryConst
182
176
  async vectorRemove(model: TModel): Promise<void> {
183
177
  const collection = this.collectionName()
184
178
  const id = modelId(model)
185
- await this.rag
186
- .store()
187
- .deleteBySource(this.rag.collectionName(collection), id)
179
+ await this.rag.store().deleteBySource(this.rag.collectionName(collection), id)
188
180
  }
189
181
 
190
182
  /**
@@ -238,9 +230,7 @@ export function retrievable<TModel extends object, TBase extends RepositoryConst
238
230
  const ids = [...new Set(matches.map((m) => m.sourceId).filter((s): s is string => !!s))]
239
231
  if (ids.length === 0) return []
240
232
  const found = await this.findMany(ids as unknown as readonly string[])
241
- const byId = new Map<string, TModel>(
242
- found.map((m) => [modelId(m), m]),
243
- )
233
+ const byId = new Map<string, TModel>(found.map((m) => [modelId(m), m]))
244
234
  const out: TModel[] = []
245
235
  for (const match of matches) {
246
236
  if (!match.sourceId) continue
@@ -0,0 +1,60 @@
1
+ /**
2
+ * `RetrievableRegistry` — bag of named pointers to retrievable
3
+ * repositories so the `rag:reindex` console command can resolve
4
+ * them at runtime.
5
+ *
6
+ * The framework can't statically discover which repositories use
7
+ * the `retrievable()` mixin — apps register them at boot:
8
+ *
9
+ * const registry = app.resolve(RetrievableRegistry)
10
+ * registry.register('articles', ArticleRepository)
11
+ *
12
+ * Then:
13
+ *
14
+ * bun strav rag:reindex articles
15
+ * bun strav rag:reindex --all
16
+ *
17
+ * resolves the repository through the container and calls
18
+ * `reindexAll(batchSize)`. The repo class must implement
19
+ * `reindexAll(batchSize?: number): Promise<number>` — the
20
+ * `retrievable()` mixin provides exactly that shape.
21
+ */
22
+
23
+ import { inject } from '@strav/kernel'
24
+ import { RagError } from './rag_error.ts'
25
+
26
+ export interface RetrievableTarget {
27
+ reindexAll(batchSize?: number): Promise<number>
28
+ }
29
+
30
+ // biome-ignore lint/suspicious/noExplicitAny: container-resolved constructor; the user-side class narrows.
31
+ type RetrievableConstructor = new (...args: any[]) => RetrievableTarget
32
+
33
+ @inject()
34
+ export class RetrievableRegistry {
35
+ private readonly targets = new Map<string, RetrievableConstructor>()
36
+
37
+ /**
38
+ * Register a repository class under `name`. The class will be
39
+ * resolved from the container on `rag:reindex <name>`.
40
+ */
41
+ register(name: string, ctor: RetrievableConstructor): void {
42
+ this.targets.set(name, ctor)
43
+ }
44
+
45
+ /** List every registered name — used by `rag:reindex --all`. */
46
+ names(): readonly string[] {
47
+ return [...this.targets.keys()]
48
+ }
49
+
50
+ /** Resolve the constructor for one name. Throws when unregistered. */
51
+ resolve(name: string): RetrievableConstructor {
52
+ const ctor = this.targets.get(name)
53
+ if (ctor === undefined) {
54
+ throw new RagError(`RetrievableRegistry: no retrievable registered under "${name}".`, {
55
+ context: { requested: name, available: this.names() },
56
+ })
57
+ }
58
+ return ctor
59
+ }
60
+ }
package/src/types.ts CHANGED
@@ -79,6 +79,25 @@ export interface RetrieveOptions {
79
79
  embedModel?: string
80
80
  /** Override the brain provider used for embedding. */
81
81
  embedProvider?: string
82
+ /**
83
+ * Optional re-ranker. When set, the framework fetches `rerankPool`
84
+ * (or `topK` if unset) matches from the store, runs the reranker,
85
+ * then slices the result to `topK`. The reranker decides the
86
+ * final order + `score`; `similarity` carries the raw vector
87
+ * cosine.
88
+ *
89
+ * Built-in strategies live under `@strav/rag` —
90
+ * `KeywordReranker` (lexical overlap blend) and `MMRReranker`
91
+ * (Maximal Marginal Relevance for diversity).
92
+ */
93
+ rerank?: import('./rerankers/reranker.ts').Reranker
94
+ /**
95
+ * Size of the candidate pool fetched from the store before
96
+ * re-ranking. Ignored when `rerank` is unset. Defaults to
97
+ * `topK` — set higher (`topK * 3` to `topK * 5` is typical) to
98
+ * give the reranker room to reorder.
99
+ */
100
+ rerankPool?: number
82
101
  }
83
102
 
84
103
  export interface RetrieveResult {
@@ -47,9 +47,5 @@ export interface VectorStore {
47
47
  deleteBySource(collection: string, sourceId: string): Promise<void>
48
48
  flush(collection: string): Promise<void>
49
49
 
50
- query(
51
- collection: string,
52
- vector: readonly number[],
53
- options?: QueryOptions,
54
- ): Promise<QueryResult>
50
+ query(collection: string, vector: readonly number[], options?: QueryOptions): Promise<QueryResult>
55
51
  }
@@ -30,11 +30,7 @@
30
30
  * the helper itself.
31
31
  */
32
32
 
33
- import {
34
- emitCreateTable,
35
- type DatabaseExecutor,
36
- type SchemaRegistry,
37
- } from '@strav/database'
33
+ import { type DatabaseExecutor, emitCreateTable, type SchemaRegistry } from '@strav/database'
38
34
  import { ragVectorSchema } from './rag_vector_schema.ts'
39
35
 
40
36
  export interface ApplyRagVectorMigrationOptions {