@strav/rag 0.4.31 → 1.0.0-alpha.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/types.ts CHANGED
@@ -1,47 +1,84 @@
1
- // ── Vector Documents ─────────────────────────────────────────────────────
2
-
1
+ /**
2
+ * `@strav/rag` types — the data shapes apps see when reading and
3
+ * writing vectors and when running retrieval.
4
+ *
5
+ * Three concept clusters:
6
+ *
7
+ * - **Vector docs + queries** — the storage layer. A
8
+ * `VectorDocument` is one indexed unit (a chunk of source
9
+ * content, its embedding, and free-form metadata).
10
+ * `query()` returns `VectorMatch[]` ranked by similarity.
11
+ *
12
+ * - **Retrieval pipeline** — `RetrieveOptions` /
13
+ * `RetrieveResult`. Apps call `rag.retrieve(query, ...)`,
14
+ * the manager embeds the query through `@strav/brain`,
15
+ * queries the active store, and returns matches with
16
+ * normalized similarity scores.
17
+ *
18
+ * - **Chunking** — `Chunk`, `Chunker`. The chunker takes raw
19
+ * content and produces overlapping segments suitable for
20
+ * embedding. Two strategies ship: `fixed` (mechanical N-char
21
+ * windows with overlap) and `recursive` (paragraph-aware,
22
+ * better for prose).
23
+ */
24
+
25
+ // ─── Vector documents + queries ──────────────────────────────────────────
26
+
27
+ /**
28
+ * One indexed unit. `id` is provider-assigned (ULID by default);
29
+ * `sourceId` is the optional app-defined pointer back to the row
30
+ * the chunk came from (e.g., `article_id`) — `deleteBySource`
31
+ * removes every chunk for one source in a single call.
32
+ */
3
33
  export interface VectorDocument {
4
- id?: string | number
5
- sourceId?: string | number
34
+ id?: string
35
+ sourceId?: string | null
6
36
  content: string
7
37
  embedding: number[]
8
38
  metadata?: Record<string, unknown>
9
39
  }
10
40
 
11
- // ── Query Options & Results ──────────────────────────────────────────────
12
-
13
41
  export interface QueryOptions {
42
+ /** Top-K matches to return. Default `5`. */
14
43
  topK?: number
44
+ /** Minimum similarity threshold (0–1). Matches below this are filtered out. */
15
45
  threshold?: number
46
+ /** Metadata filter — flat key/value AND. Driver-specific operators are NOT supported in V1. */
16
47
  filter?: Record<string, unknown>
17
48
  }
18
49
 
19
50
  export interface QueryResult {
20
51
  matches: VectorMatch[]
21
- processingTimeMs?: number
52
+ /** Time the underlying store took to compute the query, in ms. */
53
+ processingTimeMs: number
22
54
  }
23
55
 
24
56
  export interface VectorMatch {
25
- id: string | number
57
+ id: string
26
58
  content: string
59
+ /** Similarity score in [0, 1]. 1.0 = identical embeddings, 0 = orthogonal. */
27
60
  score: number
28
61
  metadata: Record<string, unknown>
62
+ sourceId?: string | null
29
63
  }
30
64
 
31
- // ── Retrieval (high-level pipeline) ──────────────────────────────────────
65
+ // ─── Retrieval pipeline ─────────────────────────────────────────────────
32
66
 
33
67
  export interface RetrieveOptions {
68
+ /** Override the collection. Defaults to the manager's default. */
34
69
  collection?: string
70
+ /** Top-K matches. Default `5`. */
35
71
  topK?: number
72
+ /** Minimum similarity threshold. */
36
73
  threshold?: number
74
+ /** Metadata filter — flat key/value AND. */
37
75
  filter?: Record<string, unknown>
38
- rerank?: RerankOptions
39
- }
40
-
41
- export interface RerankOptions {
42
- authorityWeight?: number
43
- recencyWeight?: number
44
- similarityWeight?: number
76
+ /** Override the store. Defaults to the manager's default store. */
77
+ store?: string
78
+ /** Override the embedding model used to encode the query. */
79
+ embedModel?: string
80
+ /** Override the brain provider used for embedding. */
81
+ embedProvider?: string
45
82
  }
46
83
 
47
84
  export interface RetrieveResult {
@@ -51,19 +88,24 @@ export interface RetrieveResult {
51
88
  }
52
89
 
53
90
  export interface RetrievedDocument {
54
- id: string | number
91
+ id: string
55
92
  content: string
93
+ /** Same as `VectorMatch.score` — kept as a separate field so future re-ranking can diverge `score` from raw `similarity`. */
56
94
  score: number
57
95
  similarity: number
58
96
  metadata: Record<string, unknown>
97
+ sourceId?: string | null
59
98
  }
60
99
 
61
- // ── Chunking ─────────────────────────────────────────────────────────────
100
+ // ─── Chunking ────────────────────────────────────────────────────────────
62
101
 
63
102
  export interface Chunk {
64
103
  content: string
104
+ /** 0-based ordinal within the source. */
65
105
  index: number
106
+ /** Character offset of the chunk's first character in the source. */
66
107
  startOffset: number
108
+ /** Character offset one past the chunk's last character. */
67
109
  endOffset: number
68
110
  }
69
111
 
@@ -71,30 +113,46 @@ export interface Chunker {
71
113
  chunk(content: string): Chunk[]
72
114
  }
73
115
 
74
- // ── Configuration ────────────────────────────────────────────────────────
116
+ // ─── Configuration ──────────────────────────────────────────────────────
75
117
 
118
+ /**
119
+ * `config.rag` shape. Apps that don't configure rag get a sensible
120
+ * default (memory driver, OpenAI text-embedding-3-small, recursive
121
+ * chunking) — see `RagProvider.boot()` for the defaults.
122
+ */
76
123
  export interface RagConfig {
124
+ /** Default store name — must be a key in `stores`. */
77
125
  default: string
78
- prefix: string
126
+ /** Optional collection-name prefix. Used to namespace per-app or per-tenant. */
127
+ prefix?: string
79
128
  embedding: EmbeddingConfig
80
129
  chunking: ChunkingConfig
81
130
  stores: Record<string, StoreConfig>
82
131
  }
83
132
 
84
133
  export interface EmbeddingConfig {
134
+ /** `@strav/brain` provider key (e.g., `'openai'`, `'gemini'`, `'ollama'`). */
85
135
  provider: string
136
+ /** Model identifier — passed to `brain.embed(..., { model })`. */
86
137
  model: string
138
+ /** Vector dimension. Must match the chosen model. */
87
139
  dimension: number
88
140
  }
89
141
 
90
142
  export interface ChunkingConfig {
91
- strategy: string
143
+ /** `'fixed'` or `'recursive'`. Custom strategies aren't pluggable in V1. */
144
+ strategy: 'fixed' | 'recursive'
92
145
  chunkSize: number
93
146
  overlap: number
94
- separators?: string[]
147
+ /** Custom separators for the recursive strategy. Defaults to `['\n\n', '\n', '. ', ' ']`. */
148
+ separators?: readonly string[]
95
149
  }
96
150
 
97
151
  export interface StoreConfig {
152
+ /** `'memory'` or `'pgvector'`; custom drivers register via `rag.extend(name, factory)`. */
98
153
  driver: string
154
+ /** Pgvector: explicit table name override. Default `'rag_vector'`. */
155
+ table?: string
156
+ /** Free-form fields driver-specific (e.g., HNSW tuning for pgvector). */
99
157
  [key: string]: unknown
100
158
  }
@@ -1,15 +1,55 @@
1
- import type { VectorDocument, QueryOptions, QueryResult } from './types.ts'
1
+ /**
2
+ * `VectorStore` — the storage abstraction every driver
3
+ * (`MemoryDriver`, `PgvectorDriver`, custom drivers registered
4
+ * via `rag.extend(...)`) implements.
5
+ *
6
+ * Lifecycle:
7
+ *
8
+ * - `createCollection(name, dimension)` — idempotent. For
9
+ * pgvector this is mostly a no-op (the table holds every
10
+ * collection); the dimension is enforced at INSERT.
11
+ * - `deleteCollection(name)` — drops every vector under
12
+ * `collection = name`.
13
+ *
14
+ * Reads + writes:
15
+ *
16
+ * - `upsert(collection, docs)` — inserts (and overwrites by id
17
+ * when supplied).
18
+ * - `delete(collection, ids)` — removes specific vectors.
19
+ * - `deleteBySource(collection, sourceId)` — removes every
20
+ * vector with the matching `source_id`. Apps call this when
21
+ * re-indexing a source row.
22
+ * - `flush(collection)` — drops every vector in the
23
+ * collection. Faster than `deleteCollection` for the common
24
+ * "wipe + re-ingest" pattern because the collection's
25
+ * identity stays intact.
26
+ * - `query(collection, vector, opts)` — top-K similarity
27
+ * search.
28
+ *
29
+ * Multitenancy lives BELOW this interface — the pgvector driver
30
+ * relies on `app.tenant_id` session settings (set by
31
+ * `tenants.withTenant`) to enforce isolation via RLS. The
32
+ * `MemoryDriver` is single-tenant by construction and ignores
33
+ * tenancy.
34
+ */
35
+
36
+ import type { QueryOptions, QueryResult, VectorDocument } from './types.ts'
2
37
 
3
38
  export interface VectorStore {
39
+ /** Driver identifier — `'memory'`, `'pgvector'`, or the name passed to `rag.extend`. */
4
40
  readonly name: string
5
41
 
6
42
  createCollection(collection: string, dimension: number): Promise<void>
7
43
  deleteCollection(collection: string): Promise<void>
8
44
 
9
- upsert(collection: string, documents: VectorDocument[]): Promise<void>
10
- delete(collection: string, ids: (string | number)[]): Promise<void>
11
- deleteBySource(collection: string, sourceId: string | number): Promise<void>
45
+ upsert(collection: string, documents: readonly VectorDocument[]): Promise<void>
46
+ delete(collection: string, ids: readonly string[]): Promise<void>
47
+ deleteBySource(collection: string, sourceId: string): Promise<void>
12
48
  flush(collection: string): Promise<void>
13
49
 
14
- query(collection: string, vector: number[], options?: QueryOptions): Promise<QueryResult>
50
+ query(
51
+ collection: string,
52
+ vector: readonly number[],
53
+ options?: QueryOptions,
54
+ ): Promise<QueryResult>
15
55
  }
@@ -1,41 +0,0 @@
1
- import type { Command } from 'commander'
2
- import chalk from 'chalk'
3
- import { bootstrap, shutdown } from '@strav/cli'
4
- import { BaseModel } from '@strav/database'
5
- import RagManager from '../rag_manager.ts'
6
-
7
- export function register(program: Command): void {
8
- program
9
- .command('rag:flush <model>')
10
- .description("Flush all vectors from a model's vector collection")
11
- .action(async (modelPath: string) => {
12
- let db
13
- try {
14
- const { db: database, config } = await bootstrap()
15
- db = database
16
-
17
- new BaseModel(db)
18
- new RagManager(config)
19
-
20
- const resolved = require.resolve(`${process.cwd()}/${modelPath}`)
21
- const module = await import(resolved)
22
- const ModelClass = module.default ?? (Object.values(module)[0] as any)
23
-
24
- if (typeof ModelClass?.flushVectors !== 'function') {
25
- console.error(chalk.red(`Model "${modelPath}" does not use the retrievable() mixin.`))
26
- process.exit(1)
27
- }
28
-
29
- const collectionName = ModelClass.retrievableAs()
30
- console.log(chalk.dim(`Flushing "${collectionName}"...`))
31
-
32
- await ModelClass.flushVectors()
33
- console.log(chalk.green(`Flushed all vectors from "${collectionName}".`))
34
- } catch (err) {
35
- console.error(chalk.red(`Error: ${err instanceof Error ? err.message : err}`))
36
- process.exit(1)
37
- } finally {
38
- if (db) await shutdown(db)
39
- }
40
- })
41
- }
@@ -1,45 +0,0 @@
1
- import type { Command } from 'commander'
2
- import chalk from 'chalk'
3
- import { bootstrap, shutdown } from '@strav/cli'
4
- import { BaseModel } from '@strav/database'
5
- import { BrainManager } from '@strav/brain'
6
- import RagManager from '../rag_manager.ts'
7
-
8
- export function register(program: Command): void {
9
- program
10
- .command('rag:ingest <model>')
11
- .description('Vectorize all records for a model into the vector store')
12
- .option('--chunk <size>', 'Records per batch', '100')
13
- .action(async (modelPath: string, options: { chunk: string }) => {
14
- let db
15
- try {
16
- const { db: database, config } = await bootstrap()
17
- db = database
18
-
19
- new BaseModel(db)
20
- new RagManager(config)
21
- new BrainManager(config)
22
-
23
- const resolved = require.resolve(`${process.cwd()}/${modelPath}`)
24
- const module = await import(resolved)
25
- const ModelClass = module.default ?? (Object.values(module)[0] as any)
26
-
27
- if (typeof ModelClass?.importAll !== 'function') {
28
- console.error(chalk.red(`Model "${modelPath}" does not use the retrievable() mixin.`))
29
- process.exit(1)
30
- }
31
-
32
- const chunkSize = parseInt(options.chunk, 10)
33
- const collectionName = ModelClass.retrievableAs()
34
- console.log(chalk.dim(`Vectorizing ${ModelClass.name} into "${collectionName}"...`))
35
-
36
- const count = await ModelClass.importAll(chunkSize)
37
- console.log(chalk.green(`Vectorized ${count} record(s) into "${collectionName}".`))
38
- } catch (err) {
39
- console.error(chalk.red(`Error: ${err instanceof Error ? err.message : err}`))
40
- process.exit(1)
41
- } finally {
42
- if (db) await shutdown(db)
43
- }
44
- })
45
- }
@@ -1,21 +0,0 @@
1
- import type { VectorStore } from '../vector_store.ts'
2
- import type { VectorDocument, QueryOptions, QueryResult } from '../types.ts'
3
-
4
- export class NullDriver implements VectorStore {
5
- readonly name = 'null'
6
-
7
- async createCollection(_collection: string, _dimension: number): Promise<void> {}
8
- async deleteCollection(_collection: string): Promise<void> {}
9
- async upsert(_collection: string, _documents: VectorDocument[]): Promise<void> {}
10
- async delete(_collection: string, _ids: (string | number)[]): Promise<void> {}
11
- async deleteBySource(_collection: string, _sourceId: string | number): Promise<void> {}
12
- async flush(_collection: string): Promise<void> {}
13
-
14
- async query(
15
- _collection: string,
16
- _vector: number[],
17
- _options?: QueryOptions
18
- ): Promise<QueryResult> {
19
- return { matches: [] }
20
- }
21
- }
package/src/errors.ts DELETED
@@ -1,21 +0,0 @@
1
- import { StravError } from '@strav/kernel'
2
-
3
- export class RagError extends StravError {}
4
-
5
- export class CollectionNotFoundError extends RagError {
6
- constructor(collection: string) {
7
- super(`Vector collection "${collection}" not found.`)
8
- }
9
- }
10
-
11
- export class VectorQueryError extends RagError {
12
- constructor(collection: string, cause?: string) {
13
- super(`Vector query on "${collection}" failed${cause ? `: ${cause}` : ''}.`)
14
- }
15
- }
16
-
17
- export class EmbeddingError extends RagError {
18
- constructor(cause?: string) {
19
- super(`Embedding generation failed${cause ? `: ${cause}` : ''}.`)
20
- }
21
- }
package/src/helpers.ts DELETED
@@ -1,186 +0,0 @@
1
- import { brain } from '@strav/brain'
2
- import RagManager from './rag_manager.ts'
3
- import type { VectorStore } from './vector_store.ts'
4
- import type {
5
- RetrieveOptions,
6
- RetrieveResult,
7
- RetrievedDocument,
8
- VectorDocument,
9
- StoreConfig,
10
- } from './types.ts'
11
- import { createChunker } from './chunking/chunker.ts'
12
- import { EmbeddingError } from './errors.ts'
13
-
14
- export interface IngestOptions {
15
- metadata?: Record<string, unknown>
16
- sourceId?: string | number
17
- chunkSize?: number
18
- overlap?: number
19
- strategy?: string
20
- /**
21
- * Optional per-chunk sanitizer applied AFTER chunking, BEFORE
22
- * embedding. Use to scrub PII, secrets, or prompt-injection markers
23
- * out of untrusted source content before it lands in the vector
24
- * store. Return `null` to drop a chunk; otherwise return the
25
- * (possibly modified) text.
26
- *
27
- * The hook is the caller's escape valve — RAG cannot judge what's
28
- * sensitive in your domain. See `docs/rag/rag.md` "Content trust
29
- * model" for the threat surface (prompt injection at retrieval
30
- * time, indexed PII, accidental secret indexing).
31
- */
32
- sanitize?: (chunk: { content: string; index: number }) => string | null | Promise<string | null>
33
- }
34
-
35
- export const rag = {
36
- store(name?: string): VectorStore {
37
- return RagManager.store(name)
38
- },
39
-
40
- extend(name: string, factory: (config: StoreConfig) => VectorStore): void {
41
- RagManager.extend(name, factory)
42
- },
43
-
44
- async ingest(
45
- collection: string,
46
- content: string,
47
- options: IngestOptions = {}
48
- ): Promise<string[]> {
49
- const config = RagManager.config
50
- const fullCollection = RagManager.collectionName(collection)
51
-
52
- const chunkerConfig = {
53
- strategy: options.strategy ?? config.chunking.strategy,
54
- chunkSize: options.chunkSize ?? config.chunking.chunkSize,
55
- overlap: options.overlap ?? config.chunking.overlap,
56
- separators: config.chunking.separators,
57
- }
58
- const chunker = createChunker(chunkerConfig)
59
- let chunks = chunker.chunk(content)
60
-
61
- if (chunks.length === 0) return []
62
-
63
- // Apply the optional sanitize hook before embedding. Drops chunks
64
- // where the hook returns null (e.g., a chunk that's all PII).
65
- if (options.sanitize) {
66
- const sanitized: typeof chunks = []
67
- for (const chunk of chunks) {
68
- const result = await options.sanitize({ content: chunk.content, index: chunk.index })
69
- if (result === null) continue
70
- sanitized.push({ ...chunk, content: result })
71
- }
72
- chunks = sanitized
73
- if (chunks.length === 0) return []
74
- }
75
-
76
- const chunkTexts = chunks.map(c => c.content)
77
- let embeddings: number[][]
78
- try {
79
- embeddings = await brain.embed(chunkTexts, {
80
- provider: config.embedding.provider,
81
- model: config.embedding.model,
82
- })
83
- } catch (err) {
84
- throw new EmbeddingError(err instanceof Error ? err.message : String(err))
85
- }
86
-
87
- const baseId = crypto.randomUUID()
88
- const documents: VectorDocument[] = chunks.map((chunk, i) => ({
89
- id: `${baseId}_${i}`,
90
- sourceId: options.sourceId,
91
- content: chunk.content,
92
- embedding: embeddings[i]!,
93
- metadata: {
94
- ...options.metadata,
95
- chunkIndex: chunk.index,
96
- startOffset: chunk.startOffset,
97
- endOffset: chunk.endOffset,
98
- },
99
- }))
100
-
101
- await RagManager.store().upsert(fullCollection, documents)
102
- return documents.map(d => String(d.id))
103
- },
104
-
105
- async retrieve(query: string, options: RetrieveOptions = {}): Promise<RetrieveResult> {
106
- const start = performance.now()
107
- const config = RagManager.config
108
- const collection = RagManager.collectionName(options.collection ?? 'default')
109
-
110
- let queryVector: number[]
111
- try {
112
- const vectors = await brain.embed(query, {
113
- provider: config.embedding.provider,
114
- model: config.embedding.model,
115
- })
116
- queryVector = vectors[0]!
117
- } catch (err) {
118
- throw new EmbeddingError(err instanceof Error ? err.message : String(err))
119
- }
120
-
121
- const queryResult = await RagManager.store().query(collection, queryVector, {
122
- topK: options.topK,
123
- threshold: options.threshold,
124
- filter: options.filter,
125
- })
126
-
127
- let matches: RetrievedDocument[] = queryResult.matches.map(m => ({
128
- id: m.id,
129
- content: m.content,
130
- score: m.score,
131
- similarity: m.score,
132
- metadata: m.metadata,
133
- }))
134
-
135
- if (options.rerank) {
136
- const {
137
- similarityWeight = 0.6,
138
- authorityWeight = 0.2,
139
- recencyWeight = 0.2,
140
- } = options.rerank
141
-
142
- matches = matches.map(m => {
143
- const authority =
144
- typeof m.metadata.authority === 'number' ? m.metadata.authority : 0
145
- const createdAt = m.metadata.createdAt
146
- const recencyScore = createdAt
147
- ? 1 / (1 + daysSince(new Date(createdAt as string)) / 30)
148
- : 0.5
149
-
150
- const finalScore =
151
- m.similarity * similarityWeight +
152
- authority * authorityWeight +
153
- recencyScore * recencyWeight
154
-
155
- return { ...m, score: finalScore }
156
- })
157
-
158
- matches.sort((a, b) => b.score - a.score)
159
- }
160
-
161
- return {
162
- matches,
163
- query,
164
- processingTimeMs: performance.now() - start,
165
- }
166
- },
167
-
168
- async delete(collection: string, ids: (string | number)[]): Promise<void> {
169
- const fullCollection = RagManager.collectionName(collection)
170
- await RagManager.store().delete(fullCollection, ids)
171
- },
172
-
173
- async deleteBySource(collection: string, sourceId: string | number): Promise<void> {
174
- const fullCollection = RagManager.collectionName(collection)
175
- await RagManager.store().deleteBySource(fullCollection, sourceId)
176
- },
177
-
178
- async flush(collection: string): Promise<void> {
179
- const fullCollection = RagManager.collectionName(collection)
180
- await RagManager.store().flush(fullCollection)
181
- },
182
- }
183
-
184
- function daysSince(date: Date): number {
185
- return (Date.now() - date.getTime()) / (1000 * 60 * 60 * 24)
186
- }