@opensaas/stack-rag 0.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +4 -0
- package/CHANGELOG.md +10 -0
- package/CLAUDE.md +565 -0
- package/LICENSE +21 -0
- package/README.md +406 -0
- package/dist/config/index.d.ts +63 -0
- package/dist/config/index.d.ts.map +1 -0
- package/dist/config/index.js +94 -0
- package/dist/config/index.js.map +1 -0
- package/dist/config/plugin.d.ts +38 -0
- package/dist/config/plugin.d.ts.map +1 -0
- package/dist/config/plugin.js +215 -0
- package/dist/config/plugin.js.map +1 -0
- package/dist/config/plugin.test.d.ts +2 -0
- package/dist/config/plugin.test.d.ts.map +1 -0
- package/dist/config/plugin.test.js +554 -0
- package/dist/config/plugin.test.js.map +1 -0
- package/dist/config/types.d.ts +249 -0
- package/dist/config/types.d.ts.map +1 -0
- package/dist/config/types.js +5 -0
- package/dist/config/types.js.map +1 -0
- package/dist/fields/embedding.d.ts +85 -0
- package/dist/fields/embedding.d.ts.map +1 -0
- package/dist/fields/embedding.js +81 -0
- package/dist/fields/embedding.js.map +1 -0
- package/dist/fields/embedding.test.d.ts +2 -0
- package/dist/fields/embedding.test.d.ts.map +1 -0
- package/dist/fields/embedding.test.js +323 -0
- package/dist/fields/embedding.test.js.map +1 -0
- package/dist/fields/index.d.ts +6 -0
- package/dist/fields/index.d.ts.map +1 -0
- package/dist/fields/index.js +5 -0
- package/dist/fields/index.js.map +1 -0
- package/dist/index.d.ts +8 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +9 -0
- package/dist/index.js.map +1 -0
- package/dist/mcp/index.d.ts +19 -0
- package/dist/mcp/index.d.ts.map +1 -0
- package/dist/mcp/index.js +18 -0
- package/dist/mcp/index.js.map +1 -0
- package/dist/providers/index.d.ts +38 -0
- package/dist/providers/index.d.ts.map +1 -0
- package/dist/providers/index.js +68 -0
- package/dist/providers/index.js.map +1 -0
- package/dist/providers/ollama.d.ts +49 -0
- package/dist/providers/ollama.d.ts.map +1 -0
- package/dist/providers/ollama.js +151 -0
- package/dist/providers/ollama.js.map +1 -0
- package/dist/providers/openai.d.ts +41 -0
- package/dist/providers/openai.d.ts.map +1 -0
- package/dist/providers/openai.js +126 -0
- package/dist/providers/openai.js.map +1 -0
- package/dist/providers/providers.test.d.ts +2 -0
- package/dist/providers/providers.test.d.ts.map +1 -0
- package/dist/providers/providers.test.js +224 -0
- package/dist/providers/providers.test.js.map +1 -0
- package/dist/providers/types.d.ts +88 -0
- package/dist/providers/types.d.ts.map +1 -0
- package/dist/providers/types.js +2 -0
- package/dist/providers/types.js.map +1 -0
- package/dist/runtime/batch.d.ts +183 -0
- package/dist/runtime/batch.d.ts.map +1 -0
- package/dist/runtime/batch.js +240 -0
- package/dist/runtime/batch.js.map +1 -0
- package/dist/runtime/batch.test.d.ts +2 -0
- package/dist/runtime/batch.test.d.ts.map +1 -0
- package/dist/runtime/batch.test.js +251 -0
- package/dist/runtime/batch.test.js.map +1 -0
- package/dist/runtime/chunking.d.ts +42 -0
- package/dist/runtime/chunking.d.ts.map +1 -0
- package/dist/runtime/chunking.js +264 -0
- package/dist/runtime/chunking.js.map +1 -0
- package/dist/runtime/chunking.test.d.ts +2 -0
- package/dist/runtime/chunking.test.d.ts.map +1 -0
- package/dist/runtime/chunking.test.js +212 -0
- package/dist/runtime/chunking.test.js.map +1 -0
- package/dist/runtime/embeddings.d.ts +147 -0
- package/dist/runtime/embeddings.d.ts.map +1 -0
- package/dist/runtime/embeddings.js +201 -0
- package/dist/runtime/embeddings.js.map +1 -0
- package/dist/runtime/embeddings.test.d.ts +2 -0
- package/dist/runtime/embeddings.test.d.ts.map +1 -0
- package/dist/runtime/embeddings.test.js +366 -0
- package/dist/runtime/embeddings.test.js.map +1 -0
- package/dist/runtime/index.d.ts +14 -0
- package/dist/runtime/index.d.ts.map +1 -0
- package/dist/runtime/index.js +18 -0
- package/dist/runtime/index.js.map +1 -0
- package/dist/runtime/search.d.ts +135 -0
- package/dist/runtime/search.d.ts.map +1 -0
- package/dist/runtime/search.js +101 -0
- package/dist/runtime/search.js.map +1 -0
- package/dist/storage/index.d.ts +41 -0
- package/dist/storage/index.d.ts.map +1 -0
- package/dist/storage/index.js +73 -0
- package/dist/storage/index.js.map +1 -0
- package/dist/storage/json.d.ts +34 -0
- package/dist/storage/json.d.ts.map +1 -0
- package/dist/storage/json.js +82 -0
- package/dist/storage/json.js.map +1 -0
- package/dist/storage/pgvector.d.ts +53 -0
- package/dist/storage/pgvector.d.ts.map +1 -0
- package/dist/storage/pgvector.js +168 -0
- package/dist/storage/pgvector.js.map +1 -0
- package/dist/storage/sqlite-vss.d.ts +49 -0
- package/dist/storage/sqlite-vss.d.ts.map +1 -0
- package/dist/storage/sqlite-vss.js +148 -0
- package/dist/storage/sqlite-vss.js.map +1 -0
- package/dist/storage/storage.test.d.ts +2 -0
- package/dist/storage/storage.test.d.ts.map +1 -0
- package/dist/storage/storage.test.js +440 -0
- package/dist/storage/storage.test.js.map +1 -0
- package/dist/storage/types.d.ts +79 -0
- package/dist/storage/types.d.ts.map +1 -0
- package/dist/storage/types.js +49 -0
- package/dist/storage/types.js.map +1 -0
- package/package.json +82 -0
- package/src/config/index.ts +116 -0
- package/src/config/plugin.test.ts +664 -0
- package/src/config/plugin.ts +257 -0
- package/src/config/types.ts +283 -0
- package/src/fields/embedding.test.ts +408 -0
- package/src/fields/embedding.ts +150 -0
- package/src/fields/index.ts +6 -0
- package/src/index.ts +33 -0
- package/src/mcp/index.ts +21 -0
- package/src/providers/index.ts +81 -0
- package/src/providers/ollama.ts +186 -0
- package/src/providers/openai.ts +161 -0
- package/src/providers/providers.test.ts +275 -0
- package/src/providers/types.ts +100 -0
- package/src/runtime/batch.test.ts +332 -0
- package/src/runtime/batch.ts +424 -0
- package/src/runtime/chunking.test.ts +258 -0
- package/src/runtime/chunking.ts +334 -0
- package/src/runtime/embeddings.test.ts +441 -0
- package/src/runtime/embeddings.ts +380 -0
- package/src/runtime/index.ts +51 -0
- package/src/runtime/search.ts +243 -0
- package/src/storage/index.ts +86 -0
- package/src/storage/json.ts +106 -0
- package/src/storage/pgvector.ts +206 -0
- package/src/storage/sqlite-vss.ts +193 -0
- package/src/storage/storage.test.ts +521 -0
- package/src/storage/types.ts +126 -0
- package/tsconfig.json +13 -0
- package/tsconfig.tsbuildinfo +1 -0
- package/vitest.config.ts +18 -0
|
@@ -0,0 +1,521 @@
|
|
|
1
|
+
import { describe, it, expect, beforeEach, vi } from 'vitest'
|
|
2
|
+
import { JsonVectorStorage } from './json.js'
|
|
3
|
+
import { createVectorStorage } from './index.js'
|
|
4
|
+
import { cosineSimilarity, dotProduct, l2Distance } from './types.js'
|
|
5
|
+
import type { StoredEmbedding } from '../config/types.js'
|
|
6
|
+
import type { AccessContext } from '@opensaas/stack-core'
|
|
7
|
+
|
|
8
|
+
// Helper to create mock context
|
|
9
|
+
function createMockContext(dbOverrides: Record<string, unknown> = {}): AccessContext<unknown> {
|
|
10
|
+
return {
|
|
11
|
+
db: dbOverrides,
|
|
12
|
+
session: null,
|
|
13
|
+
sudo: vi.fn(),
|
|
14
|
+
prisma: {} as unknown,
|
|
15
|
+
storage: {} as unknown,
|
|
16
|
+
} as AccessContext<unknown>
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
describe('Vector Storage', () => {
|
|
20
|
+
describe('JsonVectorStorage', () => {
|
|
21
|
+
let storage: JsonVectorStorage
|
|
22
|
+
|
|
23
|
+
beforeEach(() => {
|
|
24
|
+
storage = new JsonVectorStorage()
|
|
25
|
+
})
|
|
26
|
+
|
|
27
|
+
describe('constructor', () => {
|
|
28
|
+
it('should initialize with correct type', () => {
|
|
29
|
+
expect(storage.type).toBe('json')
|
|
30
|
+
})
|
|
31
|
+
})
|
|
32
|
+
|
|
33
|
+
describe('search', () => {
|
|
34
|
+
it('should throw error for non-existent list', async () => {
|
|
35
|
+
const mockContext = createMockContext({})
|
|
36
|
+
|
|
37
|
+
const queryVector = [0.1, 0.2, 0.3]
|
|
38
|
+
|
|
39
|
+
await expect(
|
|
40
|
+
storage.search('NonExistentList', 'embedding', queryVector, {
|
|
41
|
+
context: mockContext,
|
|
42
|
+
}),
|
|
43
|
+
).rejects.toThrow(/List 'NonExistentList' not found/)
|
|
44
|
+
})
|
|
45
|
+
|
|
46
|
+
it('should return empty results when no items match', async () => {
|
|
47
|
+
const mockContext = createMockContext({
|
|
48
|
+
article: {
|
|
49
|
+
findMany: vi.fn().mockResolvedValue([]),
|
|
50
|
+
},
|
|
51
|
+
})
|
|
52
|
+
|
|
53
|
+
const queryVector = [0.1, 0.2, 0.3]
|
|
54
|
+
|
|
55
|
+
const results = await storage.search('Article', 'embedding', queryVector, {
|
|
56
|
+
context: mockContext,
|
|
57
|
+
})
|
|
58
|
+
|
|
59
|
+
expect(results).toEqual([])
|
|
60
|
+
})
|
|
61
|
+
|
|
62
|
+
it('should filter items with null embeddings', async () => {
|
|
63
|
+
const mockItems = [
|
|
64
|
+
{
|
|
65
|
+
id: '1',
|
|
66
|
+
title: 'Article 1',
|
|
67
|
+
embedding: null,
|
|
68
|
+
},
|
|
69
|
+
{
|
|
70
|
+
id: '2',
|
|
71
|
+
title: 'Article 2',
|
|
72
|
+
embedding: {
|
|
73
|
+
vector: [0.1, 0.2, 0.3],
|
|
74
|
+
metadata: {
|
|
75
|
+
model: 'test',
|
|
76
|
+
provider: 'test',
|
|
77
|
+
dimensions: 3,
|
|
78
|
+
generatedAt: new Date().toISOString(),
|
|
79
|
+
},
|
|
80
|
+
},
|
|
81
|
+
},
|
|
82
|
+
]
|
|
83
|
+
|
|
84
|
+
const mockContext = createMockContext({
|
|
85
|
+
article: {
|
|
86
|
+
findMany: vi.fn().mockResolvedValue(mockItems),
|
|
87
|
+
},
|
|
88
|
+
})
|
|
89
|
+
|
|
90
|
+
const queryVector = [0.1, 0.2, 0.3]
|
|
91
|
+
|
|
92
|
+
const results = await storage.search('Article', 'embedding', queryVector, {
|
|
93
|
+
context: mockContext,
|
|
94
|
+
})
|
|
95
|
+
|
|
96
|
+
expect(results).toHaveLength(1)
|
|
97
|
+
expect((results[0].item as { id: string }).id).toBe('2')
|
|
98
|
+
})
|
|
99
|
+
|
|
100
|
+
it('should calculate cosine similarity correctly', async () => {
|
|
101
|
+
const embedding1: StoredEmbedding = {
|
|
102
|
+
vector: [1.0, 0.0, 0.0],
|
|
103
|
+
metadata: {
|
|
104
|
+
model: 'test',
|
|
105
|
+
provider: 'test',
|
|
106
|
+
dimensions: 3,
|
|
107
|
+
generatedAt: new Date().toISOString(),
|
|
108
|
+
},
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
const embedding2: StoredEmbedding = {
|
|
112
|
+
vector: [0.0, 1.0, 0.0],
|
|
113
|
+
metadata: {
|
|
114
|
+
model: 'test',
|
|
115
|
+
provider: 'test',
|
|
116
|
+
dimensions: 3,
|
|
117
|
+
generatedAt: new Date().toISOString(),
|
|
118
|
+
},
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
const embedding3: StoredEmbedding = {
|
|
122
|
+
vector: [1.0, 0.0, 0.0],
|
|
123
|
+
metadata: {
|
|
124
|
+
model: 'test',
|
|
125
|
+
provider: 'test',
|
|
126
|
+
dimensions: 3,
|
|
127
|
+
generatedAt: new Date().toISOString(),
|
|
128
|
+
},
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
const mockItems = [
|
|
132
|
+
{ id: '1', embedding: embedding1 },
|
|
133
|
+
{ id: '2', embedding: embedding2 },
|
|
134
|
+
{ id: '3', embedding: embedding3 },
|
|
135
|
+
]
|
|
136
|
+
|
|
137
|
+
const mockContext = createMockContext({
|
|
138
|
+
article: {
|
|
139
|
+
findMany: vi.fn().mockResolvedValue(mockItems),
|
|
140
|
+
},
|
|
141
|
+
})
|
|
142
|
+
|
|
143
|
+
const queryVector = [1.0, 0.0, 0.0] // Same as embedding1 and embedding3
|
|
144
|
+
|
|
145
|
+
const results = await storage.search('Article', 'embedding', queryVector, {
|
|
146
|
+
context: mockContext,
|
|
147
|
+
limit: 10,
|
|
148
|
+
})
|
|
149
|
+
|
|
150
|
+
expect(results).toHaveLength(3)
|
|
151
|
+
// Items 1 and 3 should have perfect similarity (score = 1.0)
|
|
152
|
+
expect(results[0].score).toBeCloseTo(1.0, 5)
|
|
153
|
+
expect(results[1].score).toBeCloseTo(1.0, 5)
|
|
154
|
+
// Item 2 should have perpendicular vectors (cosine similarity normalized to 0.5)
|
|
155
|
+
expect(results[2].score).toBeCloseTo(0.5, 5)
|
|
156
|
+
})
|
|
157
|
+
|
|
158
|
+
it('should respect limit parameter', async () => {
|
|
159
|
+
const mockItems = Array.from({ length: 20 }, (_, i) => ({
|
|
160
|
+
id: String(i + 1),
|
|
161
|
+
embedding: {
|
|
162
|
+
vector: [Math.random(), Math.random(), Math.random()],
|
|
163
|
+
metadata: {
|
|
164
|
+
model: 'test',
|
|
165
|
+
provider: 'test',
|
|
166
|
+
dimensions: 3,
|
|
167
|
+
generatedAt: new Date().toISOString(),
|
|
168
|
+
},
|
|
169
|
+
},
|
|
170
|
+
}))
|
|
171
|
+
|
|
172
|
+
const mockContext = createMockContext({
|
|
173
|
+
article: {
|
|
174
|
+
findMany: vi.fn().mockResolvedValue(mockItems),
|
|
175
|
+
},
|
|
176
|
+
})
|
|
177
|
+
|
|
178
|
+
const queryVector = [0.5, 0.5, 0.5]
|
|
179
|
+
|
|
180
|
+
const results = await storage.search('Article', 'embedding', queryVector, {
|
|
181
|
+
context: mockContext,
|
|
182
|
+
limit: 5,
|
|
183
|
+
})
|
|
184
|
+
|
|
185
|
+
expect(results).toHaveLength(5)
|
|
186
|
+
})
|
|
187
|
+
|
|
188
|
+
it('should respect minScore parameter', async () => {
|
|
189
|
+
const mockItems = [
|
|
190
|
+
{
|
|
191
|
+
id: '1',
|
|
192
|
+
embedding: {
|
|
193
|
+
vector: [1.0, 0.0, 0.0], // Perfect match
|
|
194
|
+
metadata: {
|
|
195
|
+
model: 'test',
|
|
196
|
+
provider: 'test',
|
|
197
|
+
dimensions: 3,
|
|
198
|
+
generatedAt: new Date().toISOString(),
|
|
199
|
+
},
|
|
200
|
+
},
|
|
201
|
+
},
|
|
202
|
+
{
|
|
203
|
+
id: '2',
|
|
204
|
+
embedding: {
|
|
205
|
+
vector: [0.0, 1.0, 0.0], // No match (perpendicular)
|
|
206
|
+
metadata: {
|
|
207
|
+
model: 'test',
|
|
208
|
+
provider: 'test',
|
|
209
|
+
dimensions: 3,
|
|
210
|
+
generatedAt: new Date().toISOString(),
|
|
211
|
+
},
|
|
212
|
+
},
|
|
213
|
+
},
|
|
214
|
+
]
|
|
215
|
+
|
|
216
|
+
const mockContext = createMockContext({
|
|
217
|
+
article: {
|
|
218
|
+
findMany: vi.fn().mockResolvedValue(mockItems),
|
|
219
|
+
},
|
|
220
|
+
})
|
|
221
|
+
|
|
222
|
+
const queryVector = [1.0, 0.0, 0.0]
|
|
223
|
+
|
|
224
|
+
const results = await storage.search('Article', 'embedding', queryVector, {
|
|
225
|
+
context: mockContext,
|
|
226
|
+
minScore: 0.9, // Only items with >90% similarity
|
|
227
|
+
})
|
|
228
|
+
|
|
229
|
+
expect(results).toHaveLength(1)
|
|
230
|
+
expect((results[0].item as { id: string }).id).toBe('1')
|
|
231
|
+
})
|
|
232
|
+
|
|
233
|
+
it('should sort results by score descending', async () => {
|
|
234
|
+
const mockItems = [
|
|
235
|
+
{
|
|
236
|
+
id: '1',
|
|
237
|
+
score: 0.5,
|
|
238
|
+
embedding: {
|
|
239
|
+
vector: [0.5, 0.5, 0.5],
|
|
240
|
+
metadata: {
|
|
241
|
+
model: 'test',
|
|
242
|
+
provider: 'test',
|
|
243
|
+
dimensions: 3,
|
|
244
|
+
generatedAt: new Date().toISOString(),
|
|
245
|
+
},
|
|
246
|
+
},
|
|
247
|
+
},
|
|
248
|
+
{
|
|
249
|
+
id: '2',
|
|
250
|
+
score: 0.9,
|
|
251
|
+
embedding: {
|
|
252
|
+
vector: [0.9, 0.9, 0.9],
|
|
253
|
+
metadata: {
|
|
254
|
+
model: 'test',
|
|
255
|
+
provider: 'test',
|
|
256
|
+
dimensions: 3,
|
|
257
|
+
generatedAt: new Date().toISOString(),
|
|
258
|
+
},
|
|
259
|
+
},
|
|
260
|
+
},
|
|
261
|
+
{
|
|
262
|
+
id: '3',
|
|
263
|
+
score: 0.7,
|
|
264
|
+
embedding: {
|
|
265
|
+
vector: [0.7, 0.7, 0.7],
|
|
266
|
+
metadata: {
|
|
267
|
+
model: 'test',
|
|
268
|
+
provider: 'test',
|
|
269
|
+
dimensions: 3,
|
|
270
|
+
generatedAt: new Date().toISOString(),
|
|
271
|
+
},
|
|
272
|
+
},
|
|
273
|
+
},
|
|
274
|
+
]
|
|
275
|
+
|
|
276
|
+
const mockContext = createMockContext({
|
|
277
|
+
article: {
|
|
278
|
+
findMany: vi.fn().mockResolvedValue(mockItems),
|
|
279
|
+
},
|
|
280
|
+
})
|
|
281
|
+
|
|
282
|
+
const queryVector = [1.0, 1.0, 1.0]
|
|
283
|
+
|
|
284
|
+
const results = await storage.search('Article', 'embedding', queryVector, {
|
|
285
|
+
context: mockContext,
|
|
286
|
+
})
|
|
287
|
+
|
|
288
|
+
// Results should be sorted by similarity score descending
|
|
289
|
+
expect(results[0].score).toBeGreaterThanOrEqual(results[1].score)
|
|
290
|
+
expect(results[1].score).toBeGreaterThanOrEqual(results[2].score)
|
|
291
|
+
})
|
|
292
|
+
|
|
293
|
+
it('should skip items with dimension mismatch', async () => {
|
|
294
|
+
const mockItems = [
|
|
295
|
+
{
|
|
296
|
+
id: '1',
|
|
297
|
+
embedding: {
|
|
298
|
+
vector: [0.1, 0.2], // 2 dimensions
|
|
299
|
+
metadata: {
|
|
300
|
+
model: 'test',
|
|
301
|
+
provider: 'test',
|
|
302
|
+
dimensions: 2,
|
|
303
|
+
generatedAt: new Date().toISOString(),
|
|
304
|
+
},
|
|
305
|
+
},
|
|
306
|
+
},
|
|
307
|
+
{
|
|
308
|
+
id: '2',
|
|
309
|
+
embedding: {
|
|
310
|
+
vector: [0.1, 0.2, 0.3], // 3 dimensions - correct
|
|
311
|
+
metadata: {
|
|
312
|
+
model: 'test',
|
|
313
|
+
provider: 'test',
|
|
314
|
+
dimensions: 3,
|
|
315
|
+
generatedAt: new Date().toISOString(),
|
|
316
|
+
},
|
|
317
|
+
},
|
|
318
|
+
},
|
|
319
|
+
]
|
|
320
|
+
|
|
321
|
+
const mockContext = createMockContext({
|
|
322
|
+
article: {
|
|
323
|
+
findMany: vi.fn().mockResolvedValue(mockItems),
|
|
324
|
+
},
|
|
325
|
+
})
|
|
326
|
+
|
|
327
|
+
const queryVector = [0.1, 0.2, 0.3] // 3 dimensions
|
|
328
|
+
|
|
329
|
+
const consoleWarnSpy = vi.spyOn(console, 'warn').mockImplementation(() => {})
|
|
330
|
+
|
|
331
|
+
const results = await storage.search('Article', 'embedding', queryVector, {
|
|
332
|
+
context: mockContext,
|
|
333
|
+
})
|
|
334
|
+
|
|
335
|
+
expect(results).toHaveLength(1)
|
|
336
|
+
expect((results[0].item as { id: string }).id).toBe('2')
|
|
337
|
+
expect(consoleWarnSpy).toHaveBeenCalledWith(
|
|
338
|
+
expect.stringContaining('Vector dimension mismatch'),
|
|
339
|
+
)
|
|
340
|
+
|
|
341
|
+
consoleWarnSpy.mockRestore()
|
|
342
|
+
})
|
|
343
|
+
|
|
344
|
+
it('should pass through where clause to Prisma', async () => {
|
|
345
|
+
const mockContext = createMockContext({
|
|
346
|
+
article: {
|
|
347
|
+
findMany: vi.fn().mockResolvedValue([]),
|
|
348
|
+
},
|
|
349
|
+
})
|
|
350
|
+
|
|
351
|
+
const queryVector = [0.1, 0.2, 0.3]
|
|
352
|
+
const whereClause = { published: true }
|
|
353
|
+
|
|
354
|
+
await storage.search('Article', 'embedding', queryVector, {
|
|
355
|
+
context: mockContext,
|
|
356
|
+
where: whereClause,
|
|
357
|
+
})
|
|
358
|
+
|
|
359
|
+
expect(
|
|
360
|
+
(mockContext.db as Record<string, { findMany: (args: unknown) => void }>).article
|
|
361
|
+
.findMany,
|
|
362
|
+
).toHaveBeenCalledWith({
|
|
363
|
+
where: {
|
|
364
|
+
published: true,
|
|
365
|
+
embedding: { not: null },
|
|
366
|
+
},
|
|
367
|
+
})
|
|
368
|
+
})
|
|
369
|
+
|
|
370
|
+
it('should include distance in results', async () => {
|
|
371
|
+
const mockItems = [
|
|
372
|
+
{
|
|
373
|
+
id: '1',
|
|
374
|
+
embedding: {
|
|
375
|
+
vector: [1.0, 0.0, 0.0],
|
|
376
|
+
metadata: {
|
|
377
|
+
model: 'test',
|
|
378
|
+
provider: 'test',
|
|
379
|
+
dimensions: 3,
|
|
380
|
+
generatedAt: new Date().toISOString(),
|
|
381
|
+
},
|
|
382
|
+
},
|
|
383
|
+
},
|
|
384
|
+
]
|
|
385
|
+
|
|
386
|
+
const mockContext = createMockContext({
|
|
387
|
+
article: {
|
|
388
|
+
findMany: vi.fn().mockResolvedValue(mockItems),
|
|
389
|
+
},
|
|
390
|
+
})
|
|
391
|
+
|
|
392
|
+
const queryVector = [1.0, 0.0, 0.0]
|
|
393
|
+
|
|
394
|
+
const results = await storage.search('Article', 'embedding', queryVector, {
|
|
395
|
+
context: mockContext,
|
|
396
|
+
})
|
|
397
|
+
|
|
398
|
+
expect(results[0]).toHaveProperty('distance')
|
|
399
|
+
expect(results[0].distance).toBeCloseTo(0.0, 5) // distance = 1 - score, perfect match = 0 distance
|
|
400
|
+
})
|
|
401
|
+
})
|
|
402
|
+
|
|
403
|
+
describe('cosineSimilarity', () => {
|
|
404
|
+
it('should calculate similarity for identical vectors', () => {
|
|
405
|
+
const vec1 = [1.0, 2.0, 3.0]
|
|
406
|
+
const vec2 = [1.0, 2.0, 3.0]
|
|
407
|
+
|
|
408
|
+
const similarity = storage.cosineSimilarity(vec1, vec2)
|
|
409
|
+
expect(similarity).toBeCloseTo(1.0, 5)
|
|
410
|
+
})
|
|
411
|
+
|
|
412
|
+
it('should calculate similarity for perpendicular vectors', () => {
|
|
413
|
+
const vec1 = [1.0, 0.0, 0.0]
|
|
414
|
+
const vec2 = [0.0, 1.0, 0.0]
|
|
415
|
+
|
|
416
|
+
const similarity = storage.cosineSimilarity(vec1, vec2)
|
|
417
|
+
// Cosine similarity is normalized to 0-1 range in the implementation
|
|
418
|
+
// Perpendicular vectors have cosine 0, which becomes 0.5 after normalization
|
|
419
|
+
expect(similarity).toBeCloseTo(0.5, 5)
|
|
420
|
+
})
|
|
421
|
+
|
|
422
|
+
it('should calculate similarity for opposite vectors', () => {
|
|
423
|
+
const vec1 = [1.0, 0.0, 0.0]
|
|
424
|
+
const vec2 = [-1.0, 0.0, 0.0]
|
|
425
|
+
|
|
426
|
+
const similarity = storage.cosineSimilarity(vec1, vec2)
|
|
427
|
+
// Cosine similarity is normalized to 0-1 range in the implementation
|
|
428
|
+
// Opposite vectors have cosine -1, which becomes 0.0 after normalization
|
|
429
|
+
expect(similarity).toBeCloseTo(0.0, 5)
|
|
430
|
+
})
|
|
431
|
+
})
|
|
432
|
+
})
|
|
433
|
+
|
|
434
|
+
describe('Similarity utility functions', () => {
|
|
435
|
+
describe('cosineSimilarity', () => {
|
|
436
|
+
it('should calculate cosine similarity correctly', () => {
|
|
437
|
+
const vec1 = [1.0, 0.0, 0.0]
|
|
438
|
+
const vec2 = [1.0, 0.0, 0.0]
|
|
439
|
+
expect(cosineSimilarity(vec1, vec2)).toBeCloseTo(1.0, 5)
|
|
440
|
+
})
|
|
441
|
+
|
|
442
|
+
it('should handle zero vectors', () => {
|
|
443
|
+
const vec1 = [0.0, 0.0, 0.0]
|
|
444
|
+
const vec2 = [1.0, 0.0, 0.0]
|
|
445
|
+
expect(cosineSimilarity(vec1, vec2)).toBe(0)
|
|
446
|
+
})
|
|
447
|
+
|
|
448
|
+
it('should throw for mismatched dimensions', () => {
|
|
449
|
+
const vec1 = [1.0, 0.0]
|
|
450
|
+
const vec2 = [1.0, 0.0, 0.0]
|
|
451
|
+
expect(() => cosineSimilarity(vec1, vec2)).toThrow('Vector dimension mismatch')
|
|
452
|
+
})
|
|
453
|
+
})
|
|
454
|
+
|
|
455
|
+
describe('dotProduct', () => {
|
|
456
|
+
it('should calculate dot product correctly', () => {
|
|
457
|
+
const vec1 = [1.0, 2.0, 3.0]
|
|
458
|
+
const vec2 = [4.0, 5.0, 6.0]
|
|
459
|
+
// 1*4 + 2*5 + 3*6 = 4 + 10 + 18 = 32
|
|
460
|
+
expect(dotProduct(vec1, vec2)).toBe(32)
|
|
461
|
+
})
|
|
462
|
+
|
|
463
|
+
it('should return 0 for perpendicular vectors', () => {
|
|
464
|
+
const vec1 = [1.0, 0.0, 0.0]
|
|
465
|
+
const vec2 = [0.0, 1.0, 0.0]
|
|
466
|
+
expect(dotProduct(vec1, vec2)).toBe(0)
|
|
467
|
+
})
|
|
468
|
+
|
|
469
|
+
it('should throw for mismatched dimensions', () => {
|
|
470
|
+
const vec1 = [1.0, 0.0]
|
|
471
|
+
const vec2 = [1.0, 0.0, 0.0]
|
|
472
|
+
expect(() => dotProduct(vec1, vec2)).toThrow('Vector dimension mismatch')
|
|
473
|
+
})
|
|
474
|
+
})
|
|
475
|
+
|
|
476
|
+
describe('l2Distance', () => {
|
|
477
|
+
it('should calculate L2 distance correctly', () => {
|
|
478
|
+
const vec1 = [0.0, 0.0, 0.0]
|
|
479
|
+
const vec2 = [3.0, 4.0, 0.0]
|
|
480
|
+
// sqrt((3-0)^2 + (4-0)^2 + (0-0)^2) = sqrt(9 + 16) = 5
|
|
481
|
+
expect(l2Distance(vec1, vec2)).toBe(5)
|
|
482
|
+
})
|
|
483
|
+
|
|
484
|
+
it('should return 0 for identical vectors', () => {
|
|
485
|
+
const vec1 = [1.0, 2.0, 3.0]
|
|
486
|
+
const vec2 = [1.0, 2.0, 3.0]
|
|
487
|
+
expect(l2Distance(vec1, vec2)).toBe(0)
|
|
488
|
+
})
|
|
489
|
+
|
|
490
|
+
it('should throw for mismatched dimensions', () => {
|
|
491
|
+
const vec1 = [1.0, 0.0]
|
|
492
|
+
const vec2 = [1.0, 0.0, 0.0]
|
|
493
|
+
expect(() => l2Distance(vec1, vec2)).toThrow('Vector dimension mismatch')
|
|
494
|
+
})
|
|
495
|
+
})
|
|
496
|
+
})
|
|
497
|
+
|
|
498
|
+
describe('createVectorStorage factory', () => {
|
|
499
|
+
it('should create JSON storage', () => {
|
|
500
|
+
const storage = createVectorStorage({ type: 'json' })
|
|
501
|
+
expect(storage).toBeInstanceOf(JsonVectorStorage)
|
|
502
|
+
expect(storage.type).toBe('json')
|
|
503
|
+
})
|
|
504
|
+
|
|
505
|
+
it('should throw error for unknown storage type', () => {
|
|
506
|
+
expect(() => {
|
|
507
|
+
createVectorStorage({ type: 'unknown' as 'json' })
|
|
508
|
+
}).toThrow(/Unknown vector storage type/)
|
|
509
|
+
})
|
|
510
|
+
})
|
|
511
|
+
|
|
512
|
+
describe('Storage interface compliance', () => {
|
|
513
|
+
it('JSON storage should implement VectorStorage interface', () => {
|
|
514
|
+
const storage = new JsonVectorStorage()
|
|
515
|
+
|
|
516
|
+
expect(storage).toHaveProperty('type')
|
|
517
|
+
expect(storage).toHaveProperty('search')
|
|
518
|
+
expect(typeof storage.search).toBe('function')
|
|
519
|
+
})
|
|
520
|
+
})
|
|
521
|
+
})
|
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
import type { SearchResult } from '../config/types.js'
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Vector storage backend interface
|
|
5
|
+
* All storage backends must implement this interface
|
|
6
|
+
*/
|
|
7
|
+
export interface VectorStorage {
|
|
8
|
+
/**
|
|
9
|
+
* Storage backend type
|
|
10
|
+
*/
|
|
11
|
+
readonly type: string
|
|
12
|
+
|
|
13
|
+
/**
|
|
14
|
+
* Search for similar vectors
|
|
15
|
+
*
|
|
16
|
+
* @param listKey - The list name (e.g., 'Post', 'Article')
|
|
17
|
+
* @param fieldName - The field name containing embeddings
|
|
18
|
+
* @param queryVector - The query embedding vector
|
|
19
|
+
* @param options - Search options
|
|
20
|
+
* @returns Array of search results with items and scores
|
|
21
|
+
*/
|
|
22
|
+
search<T = unknown>(
|
|
23
|
+
listKey: string,
|
|
24
|
+
fieldName: string,
|
|
25
|
+
queryVector: number[],
|
|
26
|
+
options: SearchOptions,
|
|
27
|
+
): Promise<SearchResult<T>[]>
|
|
28
|
+
|
|
29
|
+
/**
|
|
30
|
+
* Calculate cosine similarity between two vectors
|
|
31
|
+
* Utility function for scoring results
|
|
32
|
+
*
|
|
33
|
+
* @param a - First vector
|
|
34
|
+
* @param b - Second vector
|
|
35
|
+
* @returns Similarity score (0-1, higher is more similar)
|
|
36
|
+
*/
|
|
37
|
+
cosineSimilarity(a: number[], b: number[]): number
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
/**
|
|
41
|
+
* Options for vector search
|
|
42
|
+
*/
|
|
43
|
+
export type SearchOptions = {
|
|
44
|
+
/**
|
|
45
|
+
* Maximum number of results to return
|
|
46
|
+
* @default 10
|
|
47
|
+
*/
|
|
48
|
+
limit?: number
|
|
49
|
+
|
|
50
|
+
/**
|
|
51
|
+
* Minimum similarity score (0-1)
|
|
52
|
+
* Results below this threshold will be filtered out
|
|
53
|
+
* @default 0.0
|
|
54
|
+
*/
|
|
55
|
+
minScore?: number
|
|
56
|
+
|
|
57
|
+
/**
|
|
58
|
+
* Access context for enforcing access control
|
|
59
|
+
* Required to ensure users only see items they have access to
|
|
60
|
+
*/
|
|
61
|
+
context: import('@opensaas/stack-core').AccessContext
|
|
62
|
+
|
|
63
|
+
/**
|
|
64
|
+
* Additional Prisma where clause to filter results
|
|
65
|
+
* This is merged with access control filters
|
|
66
|
+
*/
|
|
67
|
+
where?: Record<string, unknown>
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
/**
|
|
71
|
+
* Distance functions for vector similarity
|
|
72
|
+
*/
|
|
73
|
+
export type DistanceFunction = 'cosine' | 'l2' | 'inner_product'
|
|
74
|
+
|
|
75
|
+
/**
|
|
76
|
+
* Normalize a vector to unit length
|
|
77
|
+
* Required for cosine similarity
|
|
78
|
+
*/
|
|
79
|
+
export function normalizeVector(vector: number[]): number[] {
|
|
80
|
+
const magnitude = Math.sqrt(vector.reduce((sum, val) => sum + val * val, 0))
|
|
81
|
+
if (magnitude === 0) return vector
|
|
82
|
+
return vector.map((val) => val / magnitude)
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
/**
|
|
86
|
+
* Calculate dot product of two vectors
|
|
87
|
+
*/
|
|
88
|
+
export function dotProduct(a: number[], b: number[]): number {
|
|
89
|
+
if (a.length !== b.length) {
|
|
90
|
+
throw new Error(`Vector dimension mismatch: ${a.length} vs ${b.length}`)
|
|
91
|
+
}
|
|
92
|
+
return a.reduce((sum, val, i) => sum + val * b[i], 0)
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
/**
|
|
96
|
+
* Calculate L2 (Euclidean) distance between two vectors
|
|
97
|
+
*/
|
|
98
|
+
export function l2Distance(a: number[], b: number[]): number {
|
|
99
|
+
if (a.length !== b.length) {
|
|
100
|
+
throw new Error(`Vector dimension mismatch: ${a.length} vs ${b.length}`)
|
|
101
|
+
}
|
|
102
|
+
const sumSquaredDiff = a.reduce((sum, val, i) => {
|
|
103
|
+
const diff = val - b[i]
|
|
104
|
+
return sum + diff * diff
|
|
105
|
+
}, 0)
|
|
106
|
+
return Math.sqrt(sumSquaredDiff)
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
/**
|
|
110
|
+
* Calculate cosine similarity between two vectors
|
|
111
|
+
* Returns a value between 0 and 1 (higher is more similar)
|
|
112
|
+
*/
|
|
113
|
+
export function cosineSimilarity(a: number[], b: number[]): number {
|
|
114
|
+
const dotProd = dotProduct(a, b)
|
|
115
|
+
const magnitudeA = Math.sqrt(dotProduct(a, a))
|
|
116
|
+
const magnitudeB = Math.sqrt(dotProduct(b, b))
|
|
117
|
+
|
|
118
|
+
if (magnitudeA === 0 || magnitudeB === 0) {
|
|
119
|
+
return 0
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
// Cosine similarity ranges from -1 to 1
|
|
123
|
+
// We normalize to 0-1 for consistency
|
|
124
|
+
const similarity = dotProd / (magnitudeA * magnitudeB)
|
|
125
|
+
return (similarity + 1) / 2
|
|
126
|
+
}
|
package/tsconfig.json
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
{
|
|
2
|
+
"extends": "../../tsconfig.json",
|
|
3
|
+
"compilerOptions": {
|
|
4
|
+
"outDir": "./dist",
|
|
5
|
+
"rootDir": "./src",
|
|
6
|
+
"composite": true,
|
|
7
|
+
"declaration": true,
|
|
8
|
+
"declarationMap": true,
|
|
9
|
+
"lib": ["ES2022"]
|
|
10
|
+
},
|
|
11
|
+
"include": ["src/**/*"],
|
|
12
|
+
"exclude": ["node_modules", "dist"]
|
|
13
|
+
}
|