@strav/rag 0.4.31 → 1.0.0-alpha.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,179 +0,0 @@
1
- import type { BaseModel } from '@strav/database'
2
- import type { NormalizeConstructor } from '@strav/kernel'
3
- import { Emitter } from '@strav/kernel'
4
- import { brain } from '@strav/brain'
5
- import RagManager from './rag_manager.ts'
6
- import { createChunker } from './chunking/chunker.ts'
7
- import type { VectorDocument, RetrieveOptions, RetrieveResult } from './types.ts'
8
-
9
- export function retrievable<T extends NormalizeConstructor<typeof BaseModel>>(Base: T) {
10
- return class Retrievable extends Base {
11
- private static _retrievalBooted = false
12
-
13
- static retrievableAs(): string {
14
- return (this as unknown as typeof BaseModel).tableName
15
- }
16
-
17
- toRetrievableContent(): string {
18
- const parts: string[] = []
19
- for (const key of Object.keys(this)) {
20
- if (key.startsWith('_')) continue
21
- const val = (this as any)[key]
22
- if (typeof val === 'string' && val.length > 0) parts.push(val)
23
- }
24
- return parts.join('\n')
25
- }
26
-
27
- toRetrievableMetadata(): Record<string, unknown> {
28
- return {}
29
- }
30
-
31
- shouldBeRetrievable(): boolean {
32
- return true
33
- }
34
-
35
- // ── Instance methods ──────────────────────────────────────────────
36
-
37
- async vectorize(): Promise<void> {
38
- if (!this.shouldBeRetrievable()) return
39
-
40
- const ctor = this.constructor as typeof Retrievable
41
- const collection = RagManager.collectionName(ctor.retrievableAs())
42
- const config = RagManager.config
43
- const pkProp = (ctor as unknown as typeof BaseModel).primaryKeyProperty
44
- const id = (this as any)[pkProp]
45
-
46
- const content = this.toRetrievableContent()
47
- if (!content) return
48
-
49
- // Remove existing chunks for this model instance
50
- await RagManager.store().deleteBySource(collection, id)
51
-
52
- const chunker = createChunker(config.chunking)
53
- const chunks = chunker.chunk(content)
54
- if (chunks.length === 0) return
55
-
56
- const texts = chunks.map(c => c.content)
57
- const embeddings = await brain.embed(texts, {
58
- provider: config.embedding.provider,
59
- model: config.embedding.model,
60
- })
61
-
62
- const metadata = this.toRetrievableMetadata()
63
- const documents: VectorDocument[] = chunks.map((chunk, i) => ({
64
- id: `${id}_${i}`,
65
- sourceId: id,
66
- content: chunk.content,
67
- embedding: embeddings[i]!,
68
- metadata: {
69
- ...metadata,
70
- modelId: id,
71
- chunkIndex: chunk.index,
72
- },
73
- }))
74
-
75
- await RagManager.store().upsert(collection, documents)
76
- }
77
-
78
- async vectorRemove(): Promise<void> {
79
- const ctor = this.constructor as typeof Retrievable
80
- const collection = RagManager.collectionName(ctor.retrievableAs())
81
- const pkProp = (ctor as unknown as typeof BaseModel).primaryKeyProperty
82
- const id = (this as any)[pkProp]
83
- await RagManager.store().deleteBySource(collection, id)
84
- }
85
-
86
- // ── Static methods ────────────────────────────────────────────────
87
-
88
- static async retrieve(query: string, options?: RetrieveOptions): Promise<RetrieveResult> {
89
- const { rag } = await import('./helpers.ts')
90
- return rag.retrieve(query, {
91
- ...options,
92
- collection: options?.collection ?? this.retrievableAs(),
93
- })
94
- }
95
-
96
- static async importAll(chunkSize: number = 100): Promise<number> {
97
- const ModelCtor = this as unknown as typeof BaseModel & typeof Retrievable
98
- const collection = RagManager.collectionName(this.retrievableAs())
99
- const config = RagManager.config
100
- const db = ModelCtor.db
101
- const table = ModelCtor.tableName
102
- const pkCol = ModelCtor.primaryKeyColumn
103
-
104
- await RagManager.store().createCollection(collection, config.embedding.dimension)
105
-
106
- let imported = 0
107
- let offset = 0
108
-
109
- while (true) {
110
- const rows = (await db.sql.unsafe(
111
- `SELECT * FROM "${table}" ORDER BY "${pkCol}" LIMIT $1 OFFSET $2`,
112
- [chunkSize, offset]
113
- )) as Record<string, unknown>[]
114
-
115
- if (rows.length === 0) break
116
-
117
- for (const row of rows) {
118
- const instance = ModelCtor.hydrate(row) as InstanceType<typeof Retrievable>
119
- if (instance.shouldBeRetrievable()) {
120
- try {
121
- await instance.vectorize()
122
- imported++
123
- } catch {
124
- // Vectorization is secondary — continue on failure
125
- }
126
- }
127
- }
128
-
129
- offset += chunkSize
130
- if (rows.length < chunkSize) break
131
- }
132
-
133
- return imported
134
- }
135
-
136
- static async flushVectors(): Promise<void> {
137
- const collection = RagManager.collectionName(this.retrievableAs())
138
- await RagManager.store().flush(collection)
139
- }
140
-
141
- static async createVectorCollection(): Promise<void> {
142
- const collection = RagManager.collectionName(this.retrievableAs())
143
- await RagManager.store().createCollection(collection, RagManager.config.embedding.dimension)
144
- }
145
-
146
- static bootRetrieval(eventPrefix: string): void {
147
- if (this._retrievalBooted) return
148
- this._retrievalBooted = true
149
-
150
- const vectorizeFn = async (model: unknown) => {
151
- if (model && typeof (model as any).vectorize === 'function') {
152
- try {
153
- await (model as any).vectorize()
154
- } catch {
155
- // Vectorization is secondary — failures should not break the event pipeline
156
- }
157
- }
158
- }
159
-
160
- const removeFn = async (model: unknown) => {
161
- if (model && typeof (model as any).vectorRemove === 'function') {
162
- try {
163
- await (model as any).vectorRemove()
164
- } catch {
165
- // Vector removal is secondary
166
- }
167
- }
168
- }
169
-
170
- Emitter.on(`${eventPrefix}.created`, vectorizeFn)
171
- Emitter.on(`${eventPrefix}.updated`, vectorizeFn)
172
- Emitter.on(`${eventPrefix}.synced`, vectorizeFn)
173
- Emitter.on(`${eventPrefix}.deleted`, removeFn)
174
- }
175
- }
176
- }
177
-
178
- export type RetrievableInstance = InstanceType<ReturnType<typeof retrievable>>
179
- export type RetrievableModel = ReturnType<typeof retrievable>
@@ -1,33 +0,0 @@
1
- import { env } from '@strav/kernel'
2
-
3
- export default {
4
- default: env('RAG_DRIVER', 'pgvector'),
5
-
6
- prefix: env('RAG_PREFIX', ''),
7
-
8
- embedding: {
9
- provider: env('RAG_EMBEDDING_PROVIDER', 'openai'),
10
- model: env('RAG_EMBEDDING_MODEL', 'text-embedding-3-small'),
11
- dimension: 1536,
12
- },
13
-
14
- chunking: {
15
- strategy: 'recursive',
16
- chunkSize: 512,
17
- overlap: 64,
18
- },
19
-
20
- stores: {
21
- pgvector: {
22
- driver: 'pgvector',
23
- },
24
-
25
- memory: {
26
- driver: 'memory',
27
- },
28
-
29
- null: {
30
- driver: 'null',
31
- },
32
- },
33
- }
package/tsconfig.json DELETED
@@ -1,5 +0,0 @@
1
- {
2
- "extends": "../../tsconfig.json",
3
- "include": ["src/**/*.ts"],
4
- "exclude": ["node_modules", "tests"]
5
- }