@opensaas/stack-rag 0.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +4 -0
- package/CHANGELOG.md +10 -0
- package/CLAUDE.md +565 -0
- package/LICENSE +21 -0
- package/README.md +406 -0
- package/dist/config/index.d.ts +63 -0
- package/dist/config/index.d.ts.map +1 -0
- package/dist/config/index.js +94 -0
- package/dist/config/index.js.map +1 -0
- package/dist/config/plugin.d.ts +38 -0
- package/dist/config/plugin.d.ts.map +1 -0
- package/dist/config/plugin.js +215 -0
- package/dist/config/plugin.js.map +1 -0
- package/dist/config/plugin.test.d.ts +2 -0
- package/dist/config/plugin.test.d.ts.map +1 -0
- package/dist/config/plugin.test.js +554 -0
- package/dist/config/plugin.test.js.map +1 -0
- package/dist/config/types.d.ts +249 -0
- package/dist/config/types.d.ts.map +1 -0
- package/dist/config/types.js +5 -0
- package/dist/config/types.js.map +1 -0
- package/dist/fields/embedding.d.ts +85 -0
- package/dist/fields/embedding.d.ts.map +1 -0
- package/dist/fields/embedding.js +81 -0
- package/dist/fields/embedding.js.map +1 -0
- package/dist/fields/embedding.test.d.ts +2 -0
- package/dist/fields/embedding.test.d.ts.map +1 -0
- package/dist/fields/embedding.test.js +323 -0
- package/dist/fields/embedding.test.js.map +1 -0
- package/dist/fields/index.d.ts +6 -0
- package/dist/fields/index.d.ts.map +1 -0
- package/dist/fields/index.js +5 -0
- package/dist/fields/index.js.map +1 -0
- package/dist/index.d.ts +8 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +9 -0
- package/dist/index.js.map +1 -0
- package/dist/mcp/index.d.ts +19 -0
- package/dist/mcp/index.d.ts.map +1 -0
- package/dist/mcp/index.js +18 -0
- package/dist/mcp/index.js.map +1 -0
- package/dist/providers/index.d.ts +38 -0
- package/dist/providers/index.d.ts.map +1 -0
- package/dist/providers/index.js +68 -0
- package/dist/providers/index.js.map +1 -0
- package/dist/providers/ollama.d.ts +49 -0
- package/dist/providers/ollama.d.ts.map +1 -0
- package/dist/providers/ollama.js +151 -0
- package/dist/providers/ollama.js.map +1 -0
- package/dist/providers/openai.d.ts +41 -0
- package/dist/providers/openai.d.ts.map +1 -0
- package/dist/providers/openai.js +126 -0
- package/dist/providers/openai.js.map +1 -0
- package/dist/providers/providers.test.d.ts +2 -0
- package/dist/providers/providers.test.d.ts.map +1 -0
- package/dist/providers/providers.test.js +224 -0
- package/dist/providers/providers.test.js.map +1 -0
- package/dist/providers/types.d.ts +88 -0
- package/dist/providers/types.d.ts.map +1 -0
- package/dist/providers/types.js +2 -0
- package/dist/providers/types.js.map +1 -0
- package/dist/runtime/batch.d.ts +183 -0
- package/dist/runtime/batch.d.ts.map +1 -0
- package/dist/runtime/batch.js +240 -0
- package/dist/runtime/batch.js.map +1 -0
- package/dist/runtime/batch.test.d.ts +2 -0
- package/dist/runtime/batch.test.d.ts.map +1 -0
- package/dist/runtime/batch.test.js +251 -0
- package/dist/runtime/batch.test.js.map +1 -0
- package/dist/runtime/chunking.d.ts +42 -0
- package/dist/runtime/chunking.d.ts.map +1 -0
- package/dist/runtime/chunking.js +264 -0
- package/dist/runtime/chunking.js.map +1 -0
- package/dist/runtime/chunking.test.d.ts +2 -0
- package/dist/runtime/chunking.test.d.ts.map +1 -0
- package/dist/runtime/chunking.test.js +212 -0
- package/dist/runtime/chunking.test.js.map +1 -0
- package/dist/runtime/embeddings.d.ts +147 -0
- package/dist/runtime/embeddings.d.ts.map +1 -0
- package/dist/runtime/embeddings.js +201 -0
- package/dist/runtime/embeddings.js.map +1 -0
- package/dist/runtime/embeddings.test.d.ts +2 -0
- package/dist/runtime/embeddings.test.d.ts.map +1 -0
- package/dist/runtime/embeddings.test.js +366 -0
- package/dist/runtime/embeddings.test.js.map +1 -0
- package/dist/runtime/index.d.ts +14 -0
- package/dist/runtime/index.d.ts.map +1 -0
- package/dist/runtime/index.js +18 -0
- package/dist/runtime/index.js.map +1 -0
- package/dist/runtime/search.d.ts +135 -0
- package/dist/runtime/search.d.ts.map +1 -0
- package/dist/runtime/search.js +101 -0
- package/dist/runtime/search.js.map +1 -0
- package/dist/storage/index.d.ts +41 -0
- package/dist/storage/index.d.ts.map +1 -0
- package/dist/storage/index.js +73 -0
- package/dist/storage/index.js.map +1 -0
- package/dist/storage/json.d.ts +34 -0
- package/dist/storage/json.d.ts.map +1 -0
- package/dist/storage/json.js +82 -0
- package/dist/storage/json.js.map +1 -0
- package/dist/storage/pgvector.d.ts +53 -0
- package/dist/storage/pgvector.d.ts.map +1 -0
- package/dist/storage/pgvector.js +168 -0
- package/dist/storage/pgvector.js.map +1 -0
- package/dist/storage/sqlite-vss.d.ts +49 -0
- package/dist/storage/sqlite-vss.d.ts.map +1 -0
- package/dist/storage/sqlite-vss.js +148 -0
- package/dist/storage/sqlite-vss.js.map +1 -0
- package/dist/storage/storage.test.d.ts +2 -0
- package/dist/storage/storage.test.d.ts.map +1 -0
- package/dist/storage/storage.test.js +440 -0
- package/dist/storage/storage.test.js.map +1 -0
- package/dist/storage/types.d.ts +79 -0
- package/dist/storage/types.d.ts.map +1 -0
- package/dist/storage/types.js +49 -0
- package/dist/storage/types.js.map +1 -0
- package/package.json +82 -0
- package/src/config/index.ts +116 -0
- package/src/config/plugin.test.ts +664 -0
- package/src/config/plugin.ts +257 -0
- package/src/config/types.ts +283 -0
- package/src/fields/embedding.test.ts +408 -0
- package/src/fields/embedding.ts +150 -0
- package/src/fields/index.ts +6 -0
- package/src/index.ts +33 -0
- package/src/mcp/index.ts +21 -0
- package/src/providers/index.ts +81 -0
- package/src/providers/ollama.ts +186 -0
- package/src/providers/openai.ts +161 -0
- package/src/providers/providers.test.ts +275 -0
- package/src/providers/types.ts +100 -0
- package/src/runtime/batch.test.ts +332 -0
- package/src/runtime/batch.ts +424 -0
- package/src/runtime/chunking.test.ts +258 -0
- package/src/runtime/chunking.ts +334 -0
- package/src/runtime/embeddings.test.ts +441 -0
- package/src/runtime/embeddings.ts +380 -0
- package/src/runtime/index.ts +51 -0
- package/src/runtime/search.ts +243 -0
- package/src/storage/index.ts +86 -0
- package/src/storage/json.ts +106 -0
- package/src/storage/pgvector.ts +206 -0
- package/src/storage/sqlite-vss.ts +193 -0
- package/src/storage/storage.test.ts +521 -0
- package/src/storage/types.ts +126 -0
- package/tsconfig.json +13 -0
- package/tsconfig.tsbuildinfo +1 -0
- package/vitest.config.ts +18 -0
|
@@ -0,0 +1,408 @@
|
|
|
1
|
+
import { describe, it, expect } from 'vitest'
|
|
2
|
+
import { embedding } from './embedding.js'
|
|
3
|
+
import type { EmbeddingField } from './embedding.js'
|
|
4
|
+
import { z } from 'zod'
|
|
5
|
+
|
|
6
|
+
describe('Embedding Field', () => {
|
|
7
|
+
describe('embedding field builder', () => {
|
|
8
|
+
it('should create embedding field with default options', () => {
|
|
9
|
+
const field = embedding()
|
|
10
|
+
|
|
11
|
+
expect(field.type).toBe('embedding')
|
|
12
|
+
expect(field.dimensions).toBe(1536) // Default OpenAI dimensions
|
|
13
|
+
expect(field.autoGenerate).toBe(false) // No sourceField
|
|
14
|
+
})
|
|
15
|
+
|
|
16
|
+
it('should create embedding field with custom dimensions', () => {
|
|
17
|
+
const field = embedding({ dimensions: 3072 })
|
|
18
|
+
|
|
19
|
+
expect(field.dimensions).toBe(3072)
|
|
20
|
+
})
|
|
21
|
+
|
|
22
|
+
it('should set autoGenerate to true when sourceField is provided', () => {
|
|
23
|
+
const field = embedding({ sourceField: 'content' })
|
|
24
|
+
|
|
25
|
+
expect(field.autoGenerate).toBe(true)
|
|
26
|
+
expect(field.sourceField).toBe('content')
|
|
27
|
+
})
|
|
28
|
+
|
|
29
|
+
it('should allow explicit autoGenerate override', () => {
|
|
30
|
+
const field = embedding({ sourceField: 'content', autoGenerate: false })
|
|
31
|
+
|
|
32
|
+
expect(field.autoGenerate).toBe(false)
|
|
33
|
+
expect(field.sourceField).toBe('content')
|
|
34
|
+
})
|
|
35
|
+
|
|
36
|
+
it('should support provider configuration', () => {
|
|
37
|
+
const field = embedding({ provider: 'openai' })
|
|
38
|
+
|
|
39
|
+
expect(field.provider).toBe('openai')
|
|
40
|
+
})
|
|
41
|
+
|
|
42
|
+
it('should support chunking configuration', () => {
|
|
43
|
+
const field = embedding({
|
|
44
|
+
sourceField: 'content',
|
|
45
|
+
chunking: {
|
|
46
|
+
strategy: 'recursive',
|
|
47
|
+
maxTokens: 500,
|
|
48
|
+
overlap: 50,
|
|
49
|
+
},
|
|
50
|
+
})
|
|
51
|
+
|
|
52
|
+
expect(field.chunking).toEqual({
|
|
53
|
+
strategy: 'recursive',
|
|
54
|
+
maxTokens: 500,
|
|
55
|
+
overlap: 50,
|
|
56
|
+
})
|
|
57
|
+
})
|
|
58
|
+
|
|
59
|
+
it('should support UI configuration', () => {
|
|
60
|
+
const field = embedding({
|
|
61
|
+
ui: {
|
|
62
|
+
showVector: true,
|
|
63
|
+
showMetadata: true,
|
|
64
|
+
},
|
|
65
|
+
})
|
|
66
|
+
|
|
67
|
+
expect(field.ui).toEqual({
|
|
68
|
+
showVector: true,
|
|
69
|
+
showMetadata: true,
|
|
70
|
+
})
|
|
71
|
+
})
|
|
72
|
+
})
|
|
73
|
+
|
|
74
|
+
describe('getZodSchema', () => {
|
|
75
|
+
it('should generate valid Zod schema', () => {
|
|
76
|
+
const field = embedding({ dimensions: 3 })
|
|
77
|
+
const schema = field.getZodSchema!('embedding', 'create')
|
|
78
|
+
|
|
79
|
+
expect(schema).toBeDefined()
|
|
80
|
+
expect(schema instanceof z.ZodType).toBe(true)
|
|
81
|
+
})
|
|
82
|
+
|
|
83
|
+
it('should validate correct embedding structure', () => {
|
|
84
|
+
const field = embedding({ dimensions: 3 })
|
|
85
|
+
const schema = field.getZodSchema!('embedding', 'create')
|
|
86
|
+
|
|
87
|
+
const validEmbedding = {
|
|
88
|
+
vector: [0.1, 0.2, 0.3],
|
|
89
|
+
metadata: {
|
|
90
|
+
model: 'text-embedding-3-small',
|
|
91
|
+
provider: 'openai',
|
|
92
|
+
dimensions: 3,
|
|
93
|
+
generatedAt: new Date().toISOString(),
|
|
94
|
+
},
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
expect(() => schema.parse(validEmbedding)).not.toThrow()
|
|
98
|
+
})
|
|
99
|
+
|
|
100
|
+
it('should accept null embedding', () => {
|
|
101
|
+
const field = embedding({ dimensions: 3 })
|
|
102
|
+
const schema = field.getZodSchema!('embedding', 'create')
|
|
103
|
+
|
|
104
|
+
expect(() => schema.parse(null)).not.toThrow()
|
|
105
|
+
})
|
|
106
|
+
|
|
107
|
+
it('should accept undefined embedding', () => {
|
|
108
|
+
const field = embedding({ dimensions: 3 })
|
|
109
|
+
const schema = field.getZodSchema!('embedding', 'create')
|
|
110
|
+
|
|
111
|
+
expect(() => schema.parse(undefined)).not.toThrow()
|
|
112
|
+
})
|
|
113
|
+
|
|
114
|
+
it('should reject incorrect vector dimensions', () => {
|
|
115
|
+
const field = embedding({ dimensions: 3 })
|
|
116
|
+
const schema = field.getZodSchema!('embedding', 'create')
|
|
117
|
+
|
|
118
|
+
const invalidEmbedding = {
|
|
119
|
+
vector: [0.1, 0.2], // Only 2 dimensions, expected 3
|
|
120
|
+
metadata: {
|
|
121
|
+
model: 'test',
|
|
122
|
+
provider: 'test',
|
|
123
|
+
dimensions: 2,
|
|
124
|
+
generatedAt: new Date().toISOString(),
|
|
125
|
+
},
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
expect(() => schema.parse(invalidEmbedding)).toThrow(/exactly 3 dimensions/)
|
|
129
|
+
})
|
|
130
|
+
|
|
131
|
+
it('should validate metadata structure', () => {
|
|
132
|
+
const field = embedding({ dimensions: 3 })
|
|
133
|
+
const schema = field.getZodSchema!('embedding', 'create')
|
|
134
|
+
|
|
135
|
+
const invalidEmbedding = {
|
|
136
|
+
vector: [0.1, 0.2, 0.3],
|
|
137
|
+
metadata: {
|
|
138
|
+
// Missing required fields
|
|
139
|
+
model: 'test',
|
|
140
|
+
},
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
expect(() => schema.parse(invalidEmbedding)).toThrow()
|
|
144
|
+
})
|
|
145
|
+
|
|
146
|
+
it('should accept optional sourceHash in metadata', () => {
|
|
147
|
+
const field = embedding({ dimensions: 3 })
|
|
148
|
+
const schema = field.getZodSchema!('embedding', 'create')
|
|
149
|
+
|
|
150
|
+
const embeddingWithHash = {
|
|
151
|
+
vector: [0.1, 0.2, 0.3],
|
|
152
|
+
metadata: {
|
|
153
|
+
model: 'test',
|
|
154
|
+
provider: 'test',
|
|
155
|
+
dimensions: 3,
|
|
156
|
+
generatedAt: new Date().toISOString(),
|
|
157
|
+
sourceHash: 'abc123',
|
|
158
|
+
},
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
expect(() => schema.parse(embeddingWithHash)).not.toThrow()
|
|
162
|
+
})
|
|
163
|
+
|
|
164
|
+
it('should work for both create and update operations', () => {
|
|
165
|
+
const field = embedding({ dimensions: 3 })
|
|
166
|
+
|
|
167
|
+
const createSchema = field.getZodSchema!('embedding', 'create')
|
|
168
|
+
const updateSchema = field.getZodSchema!('embedding', 'update')
|
|
169
|
+
|
|
170
|
+
const validEmbedding = {
|
|
171
|
+
vector: [0.1, 0.2, 0.3],
|
|
172
|
+
metadata: {
|
|
173
|
+
model: 'test',
|
|
174
|
+
provider: 'test',
|
|
175
|
+
dimensions: 3,
|
|
176
|
+
generatedAt: new Date().toISOString(),
|
|
177
|
+
},
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
expect(() => createSchema.parse(validEmbedding)).not.toThrow()
|
|
181
|
+
expect(() => updateSchema.parse(validEmbedding)).not.toThrow()
|
|
182
|
+
})
|
|
183
|
+
})
|
|
184
|
+
|
|
185
|
+
describe('getPrismaType', () => {
|
|
186
|
+
it('should return Json type', () => {
|
|
187
|
+
const field = embedding()
|
|
188
|
+
const prismaType = field.getPrismaType!('embedding')
|
|
189
|
+
|
|
190
|
+
expect(prismaType.type).toBe('Json')
|
|
191
|
+
})
|
|
192
|
+
|
|
193
|
+
it('should return optional modifier', () => {
|
|
194
|
+
const field = embedding()
|
|
195
|
+
const prismaType = field.getPrismaType!('embedding')
|
|
196
|
+
|
|
197
|
+
expect(prismaType.modifiers).toBe('?')
|
|
198
|
+
})
|
|
199
|
+
|
|
200
|
+
it('should work regardless of field options', () => {
|
|
201
|
+
const field1 = embedding()
|
|
202
|
+
const field2 = embedding({ dimensions: 3072, provider: 'openai' })
|
|
203
|
+
|
|
204
|
+
const type1 = field1.getPrismaType!('embedding')
|
|
205
|
+
const type2 = field2.getPrismaType!('embedding')
|
|
206
|
+
|
|
207
|
+
expect(type1).toEqual(type2)
|
|
208
|
+
expect(type1.type).toBe('Json')
|
|
209
|
+
expect(type1.modifiers).toBe('?')
|
|
210
|
+
})
|
|
211
|
+
})
|
|
212
|
+
|
|
213
|
+
describe('getTypeScriptType', () => {
|
|
214
|
+
it('should return StoredEmbedding type', () => {
|
|
215
|
+
const field = embedding()
|
|
216
|
+
const tsType = field.getTypeScriptType!()
|
|
217
|
+
|
|
218
|
+
expect(tsType.type).toBe('StoredEmbedding | null')
|
|
219
|
+
})
|
|
220
|
+
|
|
221
|
+
it('should be optional', () => {
|
|
222
|
+
const field = embedding()
|
|
223
|
+
const tsType = field.getTypeScriptType!()
|
|
224
|
+
|
|
225
|
+
expect(tsType.optional).toBe(true)
|
|
226
|
+
})
|
|
227
|
+
|
|
228
|
+
it('should work regardless of field options', () => {
|
|
229
|
+
const field1 = embedding()
|
|
230
|
+
const field2 = embedding({ dimensions: 3072, provider: 'openai' })
|
|
231
|
+
|
|
232
|
+
const type1 = field1.getTypeScriptType!()
|
|
233
|
+
const type2 = field2.getTypeScriptType!()
|
|
234
|
+
|
|
235
|
+
expect(type1).toEqual(type2)
|
|
236
|
+
expect(type1.type).toBe('StoredEmbedding | null')
|
|
237
|
+
expect(type1.optional).toBe(true)
|
|
238
|
+
})
|
|
239
|
+
})
|
|
240
|
+
|
|
241
|
+
describe('Field configuration combinations', () => {
|
|
242
|
+
it('should support manual embedding storage', () => {
|
|
243
|
+
const field = embedding({
|
|
244
|
+
dimensions: 1536,
|
|
245
|
+
provider: 'openai',
|
|
246
|
+
})
|
|
247
|
+
|
|
248
|
+
expect(field.autoGenerate).toBe(false)
|
|
249
|
+
expect(field.sourceField).toBeUndefined()
|
|
250
|
+
expect(field.dimensions).toBe(1536)
|
|
251
|
+
expect(field.provider).toBe('openai')
|
|
252
|
+
})
|
|
253
|
+
|
|
254
|
+
it('should support automatic embedding generation', () => {
|
|
255
|
+
const field = embedding({
|
|
256
|
+
sourceField: 'content',
|
|
257
|
+
provider: 'openai',
|
|
258
|
+
dimensions: 1536,
|
|
259
|
+
autoGenerate: true,
|
|
260
|
+
})
|
|
261
|
+
|
|
262
|
+
expect(field.autoGenerate).toBe(true)
|
|
263
|
+
expect(field.sourceField).toBe('content')
|
|
264
|
+
expect(field.dimensions).toBe(1536)
|
|
265
|
+
expect(field.provider).toBe('openai')
|
|
266
|
+
})
|
|
267
|
+
|
|
268
|
+
it('should support chunking for long documents', () => {
|
|
269
|
+
const field = embedding({
|
|
270
|
+
sourceField: 'content',
|
|
271
|
+
chunking: {
|
|
272
|
+
strategy: 'recursive',
|
|
273
|
+
maxTokens: 500,
|
|
274
|
+
overlap: 50,
|
|
275
|
+
},
|
|
276
|
+
})
|
|
277
|
+
|
|
278
|
+
expect(field.chunking?.strategy).toBe('recursive')
|
|
279
|
+
expect(field.chunking?.maxTokens).toBe(500)
|
|
280
|
+
expect(field.chunking?.overlap).toBe(50)
|
|
281
|
+
})
|
|
282
|
+
|
|
283
|
+
it('should support multiple providers via provider name', () => {
|
|
284
|
+
const openaiField = embedding({ provider: 'openai' })
|
|
285
|
+
const ollamaField = embedding({ provider: 'ollama' })
|
|
286
|
+
|
|
287
|
+
expect(openaiField.provider).toBe('openai')
|
|
288
|
+
expect(ollamaField.provider).toBe('ollama')
|
|
289
|
+
})
|
|
290
|
+
})
|
|
291
|
+
|
|
292
|
+
describe('Field type conformance', () => {
|
|
293
|
+
it('should conform to BaseFieldConfig interface', () => {
|
|
294
|
+
const field: EmbeddingField = embedding()
|
|
295
|
+
|
|
296
|
+
expect(field).toHaveProperty('type')
|
|
297
|
+
expect(field).toHaveProperty('getZodSchema')
|
|
298
|
+
expect(field).toHaveProperty('getPrismaType')
|
|
299
|
+
expect(field).toHaveProperty('getTypeScriptType')
|
|
300
|
+
})
|
|
301
|
+
|
|
302
|
+
it('should have all required builder methods', () => {
|
|
303
|
+
const field = embedding()
|
|
304
|
+
|
|
305
|
+
expect(typeof field.getZodSchema).toBe('function')
|
|
306
|
+
expect(typeof field.getPrismaType).toBe('function')
|
|
307
|
+
expect(typeof field.getTypeScriptType).toBe('function')
|
|
308
|
+
})
|
|
309
|
+
|
|
310
|
+
it('should return consistent types across methods', () => {
|
|
311
|
+
const field = embedding({ dimensions: 1536 })
|
|
312
|
+
|
|
313
|
+
const zodSchema = field.getZodSchema!('embedding', 'create')
|
|
314
|
+
const prismaType = field.getPrismaType!('embedding')
|
|
315
|
+
const tsType = field.getTypeScriptType!()
|
|
316
|
+
|
|
317
|
+
// All methods should be callable and return valid results
|
|
318
|
+
expect(zodSchema).toBeDefined()
|
|
319
|
+
expect(prismaType).toBeDefined()
|
|
320
|
+
expect(tsType).toBeDefined()
|
|
321
|
+
|
|
322
|
+
// Types should be consistent (Json in Prisma, StoredEmbedding in TS)
|
|
323
|
+
expect(prismaType.type).toBe('Json')
|
|
324
|
+
expect(tsType.type).toContain('StoredEmbedding')
|
|
325
|
+
})
|
|
326
|
+
})
|
|
327
|
+
|
|
328
|
+
describe('UI configuration', () => {
|
|
329
|
+
it('should default to hiding vector display', () => {
|
|
330
|
+
const field = embedding()
|
|
331
|
+
|
|
332
|
+
expect(field.ui?.showVector).toBeUndefined()
|
|
333
|
+
})
|
|
334
|
+
|
|
335
|
+
it('should allow showing vector in UI', () => {
|
|
336
|
+
const field = embedding({
|
|
337
|
+
ui: { showVector: true },
|
|
338
|
+
})
|
|
339
|
+
|
|
340
|
+
expect(field.ui?.showVector).toBe(true)
|
|
341
|
+
})
|
|
342
|
+
|
|
343
|
+
it('should default to showing metadata', () => {
|
|
344
|
+
const field = embedding()
|
|
345
|
+
|
|
346
|
+
expect(field.ui?.showMetadata).toBeUndefined()
|
|
347
|
+
})
|
|
348
|
+
|
|
349
|
+
it('should allow customizing metadata display', () => {
|
|
350
|
+
const field = embedding({
|
|
351
|
+
ui: { showMetadata: false },
|
|
352
|
+
})
|
|
353
|
+
|
|
354
|
+
expect(field.ui?.showMetadata).toBe(false)
|
|
355
|
+
})
|
|
356
|
+
})
|
|
357
|
+
|
|
358
|
+
describe('Edge cases', () => {
|
|
359
|
+
it('should handle very large dimensions', () => {
|
|
360
|
+
const field = embedding({ dimensions: 10000 })
|
|
361
|
+
|
|
362
|
+
expect(field.dimensions).toBe(10000)
|
|
363
|
+
|
|
364
|
+
const schema = field.getZodSchema!('embedding', 'create')
|
|
365
|
+
const largeVector = Array(10000).fill(0.1)
|
|
366
|
+
|
|
367
|
+
const validEmbedding = {
|
|
368
|
+
vector: largeVector,
|
|
369
|
+
metadata: {
|
|
370
|
+
model: 'test',
|
|
371
|
+
provider: 'test',
|
|
372
|
+
dimensions: 10000,
|
|
373
|
+
generatedAt: new Date().toISOString(),
|
|
374
|
+
},
|
|
375
|
+
}
|
|
376
|
+
|
|
377
|
+
expect(() => schema.parse(validEmbedding)).not.toThrow()
|
|
378
|
+
})
|
|
379
|
+
|
|
380
|
+
it('should handle minimum dimensions', () => {
|
|
381
|
+
const field = embedding({ dimensions: 1 })
|
|
382
|
+
|
|
383
|
+
expect(field.dimensions).toBe(1)
|
|
384
|
+
|
|
385
|
+
const schema = field.getZodSchema!('embedding', 'create')
|
|
386
|
+
const validEmbedding = {
|
|
387
|
+
vector: [0.5],
|
|
388
|
+
metadata: {
|
|
389
|
+
model: 'test',
|
|
390
|
+
provider: 'test',
|
|
391
|
+
dimensions: 1,
|
|
392
|
+
generatedAt: new Date().toISOString(),
|
|
393
|
+
},
|
|
394
|
+
}
|
|
395
|
+
|
|
396
|
+
expect(() => schema.parse(validEmbedding)).not.toThrow()
|
|
397
|
+
})
|
|
398
|
+
|
|
399
|
+
it('should handle empty sourceField gracefully', () => {
|
|
400
|
+
const field = embedding({ sourceField: '' })
|
|
401
|
+
|
|
402
|
+
expect(field.sourceField).toBe('')
|
|
403
|
+
// Empty string is truthy (it's a defined value), so autoGenerate will be true
|
|
404
|
+
// unless explicitly set to false
|
|
405
|
+
expect(field.autoGenerate).toBe(true)
|
|
406
|
+
})
|
|
407
|
+
})
|
|
408
|
+
})
|
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
import { z } from 'zod'
|
|
2
|
+
import type { BaseFieldConfig } from '@opensaas/stack-core'
|
|
3
|
+
import type { StoredEmbedding, EmbeddingProviderName, ChunkingConfig } from '../config/types.js'
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* Embedding field configuration
|
|
7
|
+
* Stores vector embeddings as JSON with metadata
|
|
8
|
+
*/
|
|
9
|
+
export type EmbeddingField = BaseFieldConfig<StoredEmbedding | null, StoredEmbedding | null> & {
|
|
10
|
+
type: 'embedding'
|
|
11
|
+
|
|
12
|
+
/**
|
|
13
|
+
* Source field name to generate embeddings from
|
|
14
|
+
* When this field changes, embeddings will be automatically regenerated
|
|
15
|
+
*
|
|
16
|
+
* @example 'content' or 'title'
|
|
17
|
+
*/
|
|
18
|
+
sourceField?: string
|
|
19
|
+
|
|
20
|
+
/**
|
|
21
|
+
* Embedding provider to use
|
|
22
|
+
* References a provider name from RAG config
|
|
23
|
+
* Falls back to default provider if not specified
|
|
24
|
+
*
|
|
25
|
+
* @example 'openai' or 'ollama'
|
|
26
|
+
*/
|
|
27
|
+
provider?: EmbeddingProviderName
|
|
28
|
+
|
|
29
|
+
/**
|
|
30
|
+
* Vector dimensions
|
|
31
|
+
* Must match the provider's output dimensions
|
|
32
|
+
* @default 1536 (OpenAI text-embedding-3-small)
|
|
33
|
+
*/
|
|
34
|
+
dimensions?: number
|
|
35
|
+
|
|
36
|
+
/**
|
|
37
|
+
* Chunking configuration for long texts
|
|
38
|
+
* Only applies if sourceField is set
|
|
39
|
+
*/
|
|
40
|
+
chunking?: ChunkingConfig
|
|
41
|
+
|
|
42
|
+
/**
|
|
43
|
+
* Whether to automatically generate embeddings
|
|
44
|
+
* @default true if sourceField is set
|
|
45
|
+
*/
|
|
46
|
+
autoGenerate?: boolean
|
|
47
|
+
|
|
48
|
+
/**
|
|
49
|
+
* UI configuration
|
|
50
|
+
*/
|
|
51
|
+
ui?: {
|
|
52
|
+
/**
|
|
53
|
+
* Whether to show the embedding vector in the UI
|
|
54
|
+
* @default false (usually too large to display)
|
|
55
|
+
*/
|
|
56
|
+
showVector?: boolean
|
|
57
|
+
|
|
58
|
+
/**
|
|
59
|
+
* Whether to show embedding metadata
|
|
60
|
+
* @default true
|
|
61
|
+
*/
|
|
62
|
+
showMetadata?: boolean
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
/**
|
|
67
|
+
* Embedding field builder
|
|
68
|
+
* Creates a field that stores vector embeddings with metadata
|
|
69
|
+
*
|
|
70
|
+
* @example
|
|
71
|
+
* ```typescript
|
|
72
|
+
* import { embedding } from '@opensaas/stack-rag/fields'
|
|
73
|
+
*
|
|
74
|
+
* // Manual embedding storage
|
|
75
|
+
* fields: {
|
|
76
|
+
* embedding: embedding({
|
|
77
|
+
* dimensions: 1536,
|
|
78
|
+
* provider: 'openai'
|
|
79
|
+
* })
|
|
80
|
+
* }
|
|
81
|
+
*
|
|
82
|
+
* // Automatic embedding generation from source field
|
|
83
|
+
* fields: {
|
|
84
|
+
* content: text(),
|
|
85
|
+
* contentEmbedding: embedding({
|
|
86
|
+
* sourceField: 'content',
|
|
87
|
+
* provider: 'openai',
|
|
88
|
+
* dimensions: 1536,
|
|
89
|
+
* autoGenerate: true
|
|
90
|
+
* })
|
|
91
|
+
* }
|
|
92
|
+
* ```
|
|
93
|
+
*/
|
|
94
|
+
export function embedding(options?: Omit<EmbeddingField, 'type'>): EmbeddingField {
|
|
95
|
+
const dimensions = options?.dimensions || 1536
|
|
96
|
+
const autoGenerate = options?.autoGenerate ?? options?.sourceField != null
|
|
97
|
+
|
|
98
|
+
return {
|
|
99
|
+
type: 'embedding',
|
|
100
|
+
...options,
|
|
101
|
+
dimensions,
|
|
102
|
+
autoGenerate,
|
|
103
|
+
|
|
104
|
+
getZodSchema: (_fieldName: string, _operation: 'create' | 'update') => {
|
|
105
|
+
// Embedding schema validation
|
|
106
|
+
const embeddingSchema = z.object({
|
|
107
|
+
vector: z.array(z.number()).length(dimensions, {
|
|
108
|
+
message: `Embedding vector must have exactly ${dimensions} dimensions`,
|
|
109
|
+
}),
|
|
110
|
+
metadata: z.object({
|
|
111
|
+
model: z.string(),
|
|
112
|
+
provider: z.string(),
|
|
113
|
+
dimensions: z.number(),
|
|
114
|
+
generatedAt: z.string(),
|
|
115
|
+
sourceHash: z.string().optional(),
|
|
116
|
+
}),
|
|
117
|
+
})
|
|
118
|
+
|
|
119
|
+
// Embeddings are always optional in input
|
|
120
|
+
// They are generated automatically via hooks if sourceField is set
|
|
121
|
+
return embeddingSchema.nullable().optional() as unknown as z.ZodTypeAny
|
|
122
|
+
},
|
|
123
|
+
|
|
124
|
+
getPrismaType: (_fieldName: string) => {
|
|
125
|
+
// Store as JSON in database
|
|
126
|
+
// For pgvector, we could use vector() type, but JSON is more portable
|
|
127
|
+
return {
|
|
128
|
+
type: 'Json',
|
|
129
|
+
modifiers: '?', // Always optional
|
|
130
|
+
}
|
|
131
|
+
},
|
|
132
|
+
|
|
133
|
+
getTypeScriptType: () => {
|
|
134
|
+
return {
|
|
135
|
+
type: 'StoredEmbedding | null',
|
|
136
|
+
optional: true,
|
|
137
|
+
}
|
|
138
|
+
},
|
|
139
|
+
|
|
140
|
+
getTypeScriptImports: () => {
|
|
141
|
+
return [
|
|
142
|
+
{
|
|
143
|
+
names: ['StoredEmbedding'],
|
|
144
|
+
from: '@opensaas/stack-rag',
|
|
145
|
+
typeOnly: true,
|
|
146
|
+
},
|
|
147
|
+
]
|
|
148
|
+
},
|
|
149
|
+
}
|
|
150
|
+
}
|
package/src/index.ts
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @opensaas/stack-rag
|
|
3
|
+
* RAG and AI embeddings integration for OpenSaas Stack
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
// Config exports
|
|
7
|
+
export {
|
|
8
|
+
openaiEmbeddings,
|
|
9
|
+
ollamaEmbeddings,
|
|
10
|
+
pgvectorStorage,
|
|
11
|
+
sqliteVssStorage,
|
|
12
|
+
jsonStorage,
|
|
13
|
+
} from './config/index.js'
|
|
14
|
+
|
|
15
|
+
// Plugin export
|
|
16
|
+
export { ragPlugin } from './config/plugin.js'
|
|
17
|
+
|
|
18
|
+
export type {
|
|
19
|
+
RAGConfig,
|
|
20
|
+
NormalizedRAGConfig,
|
|
21
|
+
EmbeddingProviderConfig,
|
|
22
|
+
OpenAIEmbeddingConfig,
|
|
23
|
+
OllamaEmbeddingConfig,
|
|
24
|
+
VectorStorageConfig,
|
|
25
|
+
PgVectorStorageConfig,
|
|
26
|
+
SqliteVssStorageConfig,
|
|
27
|
+
JsonStorageConfig,
|
|
28
|
+
ChunkingConfig,
|
|
29
|
+
ChunkingStrategy,
|
|
30
|
+
EmbeddingMetadata,
|
|
31
|
+
StoredEmbedding,
|
|
32
|
+
SearchResult,
|
|
33
|
+
} from './config/types.js'
|
package/src/mcp/index.ts
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* MCP (Model Context Protocol) integration for RAG
|
|
3
|
+
*
|
|
4
|
+
* MCP tools for semantic search are automatically generated by the ragPlugin.
|
|
5
|
+
* This export is provided for future extensibility and custom MCP tool generation.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
// Re-export types that may be useful for MCP tool development
|
|
9
|
+
export type { SearchResult } from '../config/types.js'
|
|
10
|
+
export type { SemanticSearchOptions } from '../runtime/search.js'
|
|
11
|
+
|
|
12
|
+
/**
|
|
13
|
+
* MCP tools are automatically generated by ragPlugin when:
|
|
14
|
+
* - MCP is enabled in main config
|
|
15
|
+
* - ragPlugin has enableMcpTools: true (default)
|
|
16
|
+
*
|
|
17
|
+
* Generated tools:
|
|
18
|
+
* - semantic_search_{listKey} - Search by natural language query
|
|
19
|
+
*
|
|
20
|
+
* See packages/rag/src/config/plugin.ts for implementation details.
|
|
21
|
+
*/
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
import type { EmbeddingProvider } from './types.js'
|
|
2
|
+
import type { EmbeddingProviderConfig } from '../config/types.js'
|
|
3
|
+
import { createOpenAIProvider } from './openai.js'
|
|
4
|
+
import { createOllamaProvider } from './ollama.js'
|
|
5
|
+
|
|
6
|
+
/**
|
|
7
|
+
* Provider factory registry
|
|
8
|
+
* Maps provider types to factory functions
|
|
9
|
+
*/
|
|
10
|
+
const providerFactories = new Map<string, (config: EmbeddingProviderConfig) => EmbeddingProvider>()
|
|
11
|
+
|
|
12
|
+
/**
|
|
13
|
+
* Register the built-in providers
|
|
14
|
+
*/
|
|
15
|
+
providerFactories.set('openai', (config) => {
|
|
16
|
+
if (config.type !== 'openai') {
|
|
17
|
+
throw new Error('Invalid config type for OpenAI provider')
|
|
18
|
+
}
|
|
19
|
+
return createOpenAIProvider(config as import('../config/types.js').OpenAIEmbeddingConfig)
|
|
20
|
+
})
|
|
21
|
+
providerFactories.set('ollama', (config) => {
|
|
22
|
+
if (config.type !== 'ollama') {
|
|
23
|
+
throw new Error('Invalid config type for Ollama provider')
|
|
24
|
+
}
|
|
25
|
+
return createOllamaProvider(config as import('../config/types.js').OllamaEmbeddingConfig)
|
|
26
|
+
})
|
|
27
|
+
|
|
28
|
+
/**
|
|
29
|
+
* Register a custom embedding provider factory
|
|
30
|
+
* Use this to add support for custom embedding providers
|
|
31
|
+
*
|
|
32
|
+
* @example
|
|
33
|
+
* ```typescript
|
|
34
|
+
* import { registerEmbeddingProvider } from '@opensaas/stack-rag/providers'
|
|
35
|
+
*
|
|
36
|
+
* registerEmbeddingProvider('custom', (config) => {
|
|
37
|
+
* return new CustomEmbeddingProvider(config)
|
|
38
|
+
* })
|
|
39
|
+
* ```
|
|
40
|
+
*/
|
|
41
|
+
export function registerEmbeddingProvider(
|
|
42
|
+
type: string,
|
|
43
|
+
factory: (config: EmbeddingProviderConfig) => EmbeddingProvider,
|
|
44
|
+
): void {
|
|
45
|
+
providerFactories.set(type, factory)
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
/**
|
|
49
|
+
* Create an embedding provider instance from configuration
|
|
50
|
+
* Automatically selects the correct provider based on config.type
|
|
51
|
+
*
|
|
52
|
+
* @example
|
|
53
|
+
* ```typescript
|
|
54
|
+
* import { createEmbeddingProvider } from '@opensaas/stack-rag/providers'
|
|
55
|
+
*
|
|
56
|
+
* const provider = createEmbeddingProvider({
|
|
57
|
+
* type: 'openai',
|
|
58
|
+
* apiKey: process.env.OPENAI_API_KEY!,
|
|
59
|
+
* model: 'text-embedding-3-small'
|
|
60
|
+
* })
|
|
61
|
+
*
|
|
62
|
+
* const embedding = await provider.embed('Hello world')
|
|
63
|
+
* ```
|
|
64
|
+
*/
|
|
65
|
+
export function createEmbeddingProvider(config: EmbeddingProviderConfig): EmbeddingProvider {
|
|
66
|
+
const factory = providerFactories.get(config.type)
|
|
67
|
+
|
|
68
|
+
if (!factory) {
|
|
69
|
+
throw new Error(
|
|
70
|
+
`Unknown embedding provider type: ${config.type}. ` +
|
|
71
|
+
`Available providers: ${Array.from(providerFactories.keys()).join(', ')}`,
|
|
72
|
+
)
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
return factory(config)
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
// Export types and individual providers
|
|
79
|
+
export * from './types.js'
|
|
80
|
+
export { OpenAIEmbeddingProvider, createOpenAIProvider } from './openai.js'
|
|
81
|
+
export { OllamaEmbeddingProvider, createOllamaProvider } from './ollama.js'
|