@opensaas/stack-rag 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (149) hide show
  1. package/.turbo/turbo-build.log +4 -0
  2. package/CHANGELOG.md +10 -0
  3. package/CLAUDE.md +565 -0
  4. package/LICENSE +21 -0
  5. package/README.md +406 -0
  6. package/dist/config/index.d.ts +63 -0
  7. package/dist/config/index.d.ts.map +1 -0
  8. package/dist/config/index.js +94 -0
  9. package/dist/config/index.js.map +1 -0
  10. package/dist/config/plugin.d.ts +38 -0
  11. package/dist/config/plugin.d.ts.map +1 -0
  12. package/dist/config/plugin.js +215 -0
  13. package/dist/config/plugin.js.map +1 -0
  14. package/dist/config/plugin.test.d.ts +2 -0
  15. package/dist/config/plugin.test.d.ts.map +1 -0
  16. package/dist/config/plugin.test.js +554 -0
  17. package/dist/config/plugin.test.js.map +1 -0
  18. package/dist/config/types.d.ts +249 -0
  19. package/dist/config/types.d.ts.map +1 -0
  20. package/dist/config/types.js +5 -0
  21. package/dist/config/types.js.map +1 -0
  22. package/dist/fields/embedding.d.ts +85 -0
  23. package/dist/fields/embedding.d.ts.map +1 -0
  24. package/dist/fields/embedding.js +81 -0
  25. package/dist/fields/embedding.js.map +1 -0
  26. package/dist/fields/embedding.test.d.ts +2 -0
  27. package/dist/fields/embedding.test.d.ts.map +1 -0
  28. package/dist/fields/embedding.test.js +323 -0
  29. package/dist/fields/embedding.test.js.map +1 -0
  30. package/dist/fields/index.d.ts +6 -0
  31. package/dist/fields/index.d.ts.map +1 -0
  32. package/dist/fields/index.js +5 -0
  33. package/dist/fields/index.js.map +1 -0
  34. package/dist/index.d.ts +8 -0
  35. package/dist/index.d.ts.map +1 -0
  36. package/dist/index.js +9 -0
  37. package/dist/index.js.map +1 -0
  38. package/dist/mcp/index.d.ts +19 -0
  39. package/dist/mcp/index.d.ts.map +1 -0
  40. package/dist/mcp/index.js +18 -0
  41. package/dist/mcp/index.js.map +1 -0
  42. package/dist/providers/index.d.ts +38 -0
  43. package/dist/providers/index.d.ts.map +1 -0
  44. package/dist/providers/index.js +68 -0
  45. package/dist/providers/index.js.map +1 -0
  46. package/dist/providers/ollama.d.ts +49 -0
  47. package/dist/providers/ollama.d.ts.map +1 -0
  48. package/dist/providers/ollama.js +151 -0
  49. package/dist/providers/ollama.js.map +1 -0
  50. package/dist/providers/openai.d.ts +41 -0
  51. package/dist/providers/openai.d.ts.map +1 -0
  52. package/dist/providers/openai.js +126 -0
  53. package/dist/providers/openai.js.map +1 -0
  54. package/dist/providers/providers.test.d.ts +2 -0
  55. package/dist/providers/providers.test.d.ts.map +1 -0
  56. package/dist/providers/providers.test.js +224 -0
  57. package/dist/providers/providers.test.js.map +1 -0
  58. package/dist/providers/types.d.ts +88 -0
  59. package/dist/providers/types.d.ts.map +1 -0
  60. package/dist/providers/types.js +2 -0
  61. package/dist/providers/types.js.map +1 -0
  62. package/dist/runtime/batch.d.ts +183 -0
  63. package/dist/runtime/batch.d.ts.map +1 -0
  64. package/dist/runtime/batch.js +240 -0
  65. package/dist/runtime/batch.js.map +1 -0
  66. package/dist/runtime/batch.test.d.ts +2 -0
  67. package/dist/runtime/batch.test.d.ts.map +1 -0
  68. package/dist/runtime/batch.test.js +251 -0
  69. package/dist/runtime/batch.test.js.map +1 -0
  70. package/dist/runtime/chunking.d.ts +42 -0
  71. package/dist/runtime/chunking.d.ts.map +1 -0
  72. package/dist/runtime/chunking.js +264 -0
  73. package/dist/runtime/chunking.js.map +1 -0
  74. package/dist/runtime/chunking.test.d.ts +2 -0
  75. package/dist/runtime/chunking.test.d.ts.map +1 -0
  76. package/dist/runtime/chunking.test.js +212 -0
  77. package/dist/runtime/chunking.test.js.map +1 -0
  78. package/dist/runtime/embeddings.d.ts +147 -0
  79. package/dist/runtime/embeddings.d.ts.map +1 -0
  80. package/dist/runtime/embeddings.js +201 -0
  81. package/dist/runtime/embeddings.js.map +1 -0
  82. package/dist/runtime/embeddings.test.d.ts +2 -0
  83. package/dist/runtime/embeddings.test.d.ts.map +1 -0
  84. package/dist/runtime/embeddings.test.js +366 -0
  85. package/dist/runtime/embeddings.test.js.map +1 -0
  86. package/dist/runtime/index.d.ts +14 -0
  87. package/dist/runtime/index.d.ts.map +1 -0
  88. package/dist/runtime/index.js +18 -0
  89. package/dist/runtime/index.js.map +1 -0
  90. package/dist/runtime/search.d.ts +135 -0
  91. package/dist/runtime/search.d.ts.map +1 -0
  92. package/dist/runtime/search.js +101 -0
  93. package/dist/runtime/search.js.map +1 -0
  94. package/dist/storage/index.d.ts +41 -0
  95. package/dist/storage/index.d.ts.map +1 -0
  96. package/dist/storage/index.js +73 -0
  97. package/dist/storage/index.js.map +1 -0
  98. package/dist/storage/json.d.ts +34 -0
  99. package/dist/storage/json.d.ts.map +1 -0
  100. package/dist/storage/json.js +82 -0
  101. package/dist/storage/json.js.map +1 -0
  102. package/dist/storage/pgvector.d.ts +53 -0
  103. package/dist/storage/pgvector.d.ts.map +1 -0
  104. package/dist/storage/pgvector.js +168 -0
  105. package/dist/storage/pgvector.js.map +1 -0
  106. package/dist/storage/sqlite-vss.d.ts +49 -0
  107. package/dist/storage/sqlite-vss.d.ts.map +1 -0
  108. package/dist/storage/sqlite-vss.js +148 -0
  109. package/dist/storage/sqlite-vss.js.map +1 -0
  110. package/dist/storage/storage.test.d.ts +2 -0
  111. package/dist/storage/storage.test.d.ts.map +1 -0
  112. package/dist/storage/storage.test.js +440 -0
  113. package/dist/storage/storage.test.js.map +1 -0
  114. package/dist/storage/types.d.ts +79 -0
  115. package/dist/storage/types.d.ts.map +1 -0
  116. package/dist/storage/types.js +49 -0
  117. package/dist/storage/types.js.map +1 -0
  118. package/package.json +82 -0
  119. package/src/config/index.ts +116 -0
  120. package/src/config/plugin.test.ts +664 -0
  121. package/src/config/plugin.ts +257 -0
  122. package/src/config/types.ts +283 -0
  123. package/src/fields/embedding.test.ts +408 -0
  124. package/src/fields/embedding.ts +150 -0
  125. package/src/fields/index.ts +6 -0
  126. package/src/index.ts +33 -0
  127. package/src/mcp/index.ts +21 -0
  128. package/src/providers/index.ts +81 -0
  129. package/src/providers/ollama.ts +186 -0
  130. package/src/providers/openai.ts +161 -0
  131. package/src/providers/providers.test.ts +275 -0
  132. package/src/providers/types.ts +100 -0
  133. package/src/runtime/batch.test.ts +332 -0
  134. package/src/runtime/batch.ts +424 -0
  135. package/src/runtime/chunking.test.ts +258 -0
  136. package/src/runtime/chunking.ts +334 -0
  137. package/src/runtime/embeddings.test.ts +441 -0
  138. package/src/runtime/embeddings.ts +380 -0
  139. package/src/runtime/index.ts +51 -0
  140. package/src/runtime/search.ts +243 -0
  141. package/src/storage/index.ts +86 -0
  142. package/src/storage/json.ts +106 -0
  143. package/src/storage/pgvector.ts +206 -0
  144. package/src/storage/sqlite-vss.ts +193 -0
  145. package/src/storage/storage.test.ts +521 -0
  146. package/src/storage/types.ts +126 -0
  147. package/tsconfig.json +13 -0
  148. package/tsconfig.tsbuildinfo +1 -0
  149. package/vitest.config.ts +18 -0
@@ -0,0 +1,441 @@
1
+ import { describe, it, expect, vi } from 'vitest'
2
+ import {
3
+ generateEmbedding,
4
+ generateEmbeddings,
5
+ shouldRegenerateEmbedding,
6
+ hashText,
7
+ validateEmbeddingDimensions,
8
+ mergeEmbeddings,
9
+ } from './embeddings.js'
10
+ import type { EmbeddingProvider } from '../providers/types.js'
11
+ import type { StoredEmbedding } from '../config/types.js'
12
+
13
+ // Mock embedding provider
14
+ function createMockProvider(): EmbeddingProvider {
15
+ return {
16
+ type: 'mock',
17
+ model: 'mock-model',
18
+ dimensions: 3,
19
+ embed: vi.fn(async (text: string) => {
20
+ // Return simple mock vector based on text length
21
+ return [text.length / 10, text.length / 20, text.length / 30]
22
+ }),
23
+ embedBatch: vi.fn(async (texts: string[]) => {
24
+ return texts.map((text) => [text.length / 10, text.length / 20, text.length / 30])
25
+ }),
26
+ }
27
+ }
28
+
29
+ describe('generateEmbedding', () => {
30
+ it('should generate single embedding without chunking', async () => {
31
+ const provider = createMockProvider()
32
+ const embedding = await generateEmbedding({
33
+ provider,
34
+ text: 'Hello world',
35
+ enableChunking: false,
36
+ })
37
+
38
+ expect(embedding).toHaveProperty('vector')
39
+ expect(embedding).toHaveProperty('metadata')
40
+ expect(embedding.vector).toHaveLength(3)
41
+ expect(embedding.metadata.model).toBe('mock-model')
42
+ expect(embedding.metadata.provider).toBe('mock')
43
+ expect(embedding.metadata.dimensions).toBe(3)
44
+ expect(embedding.metadata.sourceHash).toBeDefined()
45
+ })
46
+
47
+ it('should generate chunked embeddings with chunking enabled', async () => {
48
+ const provider = createMockProvider()
49
+ const longText = 'A'.repeat(1000)
50
+
51
+ const chunkedEmbeddings = await generateEmbedding({
52
+ provider,
53
+ text: longText,
54
+ enableChunking: true,
55
+ chunking: { chunkSize: 200, chunkOverlap: 0 },
56
+ })
57
+
58
+ expect(Array.isArray(chunkedEmbeddings)).toBe(true)
59
+ expect(chunkedEmbeddings.length).toBeGreaterThan(1)
60
+
61
+ for (const chunked of chunkedEmbeddings) {
62
+ expect(chunked).toHaveProperty('chunk')
63
+ expect(chunked).toHaveProperty('embedding')
64
+ expect(chunked.embedding.vector).toHaveLength(3)
65
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
66
+ expect((chunked.embedding.metadata as any).chunkIndex).toBeDefined()
67
+ }
68
+ })
69
+
70
+ it('should include source hash when enabled', async () => {
71
+ const provider = createMockProvider()
72
+ const text = 'Test text'
73
+
74
+ const embedding = await generateEmbedding({
75
+ provider,
76
+ text,
77
+ includeSourceHash: true,
78
+ })
79
+
80
+ expect(embedding.metadata.sourceHash).toBeDefined()
81
+ expect(typeof embedding.metadata.sourceHash).toBe('string')
82
+ expect(embedding.metadata.sourceHash).toHaveLength(64) // SHA-256 hex
83
+ })
84
+
85
+ it('should exclude source hash when disabled', async () => {
86
+ const provider = createMockProvider()
87
+ const text = 'Test text'
88
+
89
+ const embedding = await generateEmbedding({
90
+ provider,
91
+ text,
92
+ includeSourceHash: false,
93
+ })
94
+
95
+ expect(embedding.metadata.sourceHash).toBeUndefined()
96
+ })
97
+
98
+ it('should include additional metadata', async () => {
99
+ const provider = createMockProvider()
100
+
101
+ const embedding = await generateEmbedding({
102
+ provider,
103
+ text: 'Test',
104
+ metadata: { customField: 'customValue' },
105
+ })
106
+
107
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
108
+ expect((embedding.metadata as any).customField).toBe('customValue')
109
+ })
110
+
111
+ it('should call provider.embed for single embedding', async () => {
112
+ const provider = createMockProvider()
113
+
114
+ await generateEmbedding({
115
+ provider,
116
+ text: 'Test',
117
+ enableChunking: false,
118
+ })
119
+
120
+ expect(provider.embed).toHaveBeenCalledWith('Test')
121
+ })
122
+
123
+ it('should call provider.embedBatch for chunked embedding', async () => {
124
+ const provider = createMockProvider()
125
+
126
+ await generateEmbedding({
127
+ provider,
128
+ text: 'A'.repeat(1000),
129
+ enableChunking: true,
130
+ chunking: { chunkSize: 200, chunkOverlap: 50 },
131
+ })
132
+
133
+ expect(provider.embedBatch).toHaveBeenCalled()
134
+ })
135
+ })
136
+
137
+ describe('generateEmbeddings', () => {
138
+ it('should generate embeddings for multiple texts', async () => {
139
+ const provider = createMockProvider()
140
+ const texts = ['Text 1', 'Text 2', 'Text 3']
141
+
142
+ const embeddings = await generateEmbeddings({
143
+ provider,
144
+ texts,
145
+ })
146
+
147
+ expect(embeddings).toHaveLength(3)
148
+
149
+ for (const embedding of embeddings) {
150
+ expect(embedding.vector).toHaveLength(3)
151
+ expect(embedding.metadata.model).toBe('mock-model')
152
+ }
153
+ })
154
+
155
+ it('should process texts in batches', async () => {
156
+ const provider = createMockProvider()
157
+ const texts = Array(25).fill('Test')
158
+
159
+ await generateEmbeddings({
160
+ provider,
161
+ texts,
162
+ batchSize: 10,
163
+ })
164
+
165
+ // Should be called 3 times (10 + 10 + 5)
166
+ expect(provider.embedBatch).toHaveBeenCalledTimes(3)
167
+ })
168
+
169
+ it('should include source hashes for all embeddings', async () => {
170
+ const provider = createMockProvider()
171
+ const texts = ['Text 1', 'Text 2', 'Text 3']
172
+
173
+ const embeddings = await generateEmbeddings({
174
+ provider,
175
+ texts,
176
+ includeSourceHash: true,
177
+ })
178
+
179
+ for (const embedding of embeddings) {
180
+ expect(embedding.metadata.sourceHash).toBeDefined()
181
+ }
182
+ })
183
+
184
+ it('should handle empty text array', async () => {
185
+ const provider = createMockProvider()
186
+
187
+ const embeddings = await generateEmbeddings({
188
+ provider,
189
+ texts: [],
190
+ })
191
+
192
+ expect(embeddings).toHaveLength(0)
193
+ })
194
+ })
195
+
196
+ describe('shouldRegenerateEmbedding', () => {
197
+ it('should return true when no existing embedding', () => {
198
+ const result = shouldRegenerateEmbedding('New text', null)
199
+ expect(result).toBe(true)
200
+ })
201
+
202
+ it('should return true when source text changed', () => {
203
+ const embedding: StoredEmbedding = {
204
+ vector: [1, 2, 3],
205
+ metadata: {
206
+ model: 'test',
207
+ provider: 'test',
208
+ dimensions: 3,
209
+ generatedAt: new Date().toISOString(),
210
+ sourceHash: hashText('Old text'),
211
+ },
212
+ }
213
+
214
+ const result = shouldRegenerateEmbedding('New text', embedding)
215
+ expect(result).toBe(true)
216
+ })
217
+
218
+ it('should return false when source text unchanged', () => {
219
+ const text = 'Same text'
220
+ const embedding: StoredEmbedding = {
221
+ vector: [1, 2, 3],
222
+ metadata: {
223
+ model: 'test',
224
+ provider: 'test',
225
+ dimensions: 3,
226
+ generatedAt: new Date().toISOString(),
227
+ sourceHash: hashText(text),
228
+ },
229
+ }
230
+
231
+ const result = shouldRegenerateEmbedding(text, embedding)
232
+ expect(result).toBe(false)
233
+ })
234
+
235
+ it('should return false when no source hash in metadata', () => {
236
+ const embedding: StoredEmbedding = {
237
+ vector: [1, 2, 3],
238
+ metadata: {
239
+ model: 'test',
240
+ provider: 'test',
241
+ dimensions: 3,
242
+ generatedAt: new Date().toISOString(),
243
+ },
244
+ }
245
+
246
+ const result = shouldRegenerateEmbedding('Any text', embedding)
247
+ expect(result).toBe(false) // Conservative: don't regenerate if we can't tell
248
+ })
249
+ })
250
+
251
+ describe('hashText', () => {
252
+ it('should generate consistent hash for same text', () => {
253
+ const text = 'Test text'
254
+ const hash1 = hashText(text)
255
+ const hash2 = hashText(text)
256
+
257
+ expect(hash1).toBe(hash2)
258
+ })
259
+
260
+ it('should generate different hashes for different text', () => {
261
+ const hash1 = hashText('Text 1')
262
+ const hash2 = hashText('Text 2')
263
+
264
+ expect(hash1).not.toBe(hash2)
265
+ })
266
+
267
+ it('should generate SHA-256 hex string', () => {
268
+ const hash = hashText('Test')
269
+ expect(hash).toMatch(/^[0-9a-f]{64}$/)
270
+ })
271
+
272
+ it('should be case-sensitive', () => {
273
+ const hash1 = hashText('Test')
274
+ const hash2 = hashText('test')
275
+
276
+ expect(hash1).not.toBe(hash2)
277
+ })
278
+ })
279
+
280
+ describe('validateEmbeddingDimensions', () => {
281
+ it('should pass validation for correct dimensions', () => {
282
+ const embedding: StoredEmbedding = {
283
+ vector: [1, 2, 3],
284
+ metadata: {
285
+ model: 'test',
286
+ provider: 'test',
287
+ dimensions: 3,
288
+ generatedAt: new Date().toISOString(),
289
+ },
290
+ }
291
+
292
+ expect(() => {
293
+ validateEmbeddingDimensions(embedding, 3)
294
+ }).not.toThrow()
295
+ })
296
+
297
+ it('should throw error for dimension mismatch', () => {
298
+ const embedding: StoredEmbedding = {
299
+ vector: [1, 2, 3],
300
+ metadata: {
301
+ model: 'test',
302
+ provider: 'test',
303
+ dimensions: 3,
304
+ generatedAt: new Date().toISOString(),
305
+ },
306
+ }
307
+
308
+ expect(() => {
309
+ validateEmbeddingDimensions(embedding, 5)
310
+ }).toThrow('Embedding dimension mismatch: expected 5, got 3')
311
+ })
312
+
313
+ it('should throw error for metadata dimension mismatch', () => {
314
+ const embedding: StoredEmbedding = {
315
+ vector: [1, 2, 3],
316
+ metadata: {
317
+ model: 'test',
318
+ provider: 'test',
319
+ dimensions: 5, // Wrong metadata
320
+ generatedAt: new Date().toISOString(),
321
+ },
322
+ }
323
+
324
+ expect(() => {
325
+ validateEmbeddingDimensions(embedding, 3)
326
+ }).toThrow('Embedding metadata dimension mismatch')
327
+ })
328
+ })
329
+
330
+ describe('mergeEmbeddings', () => {
331
+ it('should merge embeddings using average pooling', () => {
332
+ const embeddings: StoredEmbedding[] = [
333
+ {
334
+ vector: [1, 2, 3],
335
+ metadata: {
336
+ model: 'test',
337
+ provider: 'test',
338
+ dimensions: 3,
339
+ generatedAt: new Date().toISOString(),
340
+ },
341
+ },
342
+ {
343
+ vector: [3, 4, 5],
344
+ metadata: {
345
+ model: 'test',
346
+ provider: 'test',
347
+ dimensions: 3,
348
+ generatedAt: new Date().toISOString(),
349
+ },
350
+ },
351
+ ]
352
+
353
+ const merged = mergeEmbeddings(embeddings, 'average')
354
+
355
+ expect(merged.vector).toEqual([2, 3, 4]) // Average of [1,2,3] and [3,4,5]
356
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
357
+ expect((merged.metadata as any).mergedFrom).toBe(2)
358
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
359
+ expect((merged.metadata as any).mergeMethod).toBe('average')
360
+ })
361
+
362
+ it('should merge embeddings using max pooling', () => {
363
+ const embeddings: StoredEmbedding[] = [
364
+ {
365
+ vector: [1, 5, 3],
366
+ metadata: {
367
+ model: 'test',
368
+ provider: 'test',
369
+ dimensions: 3,
370
+ generatedAt: new Date().toISOString(),
371
+ },
372
+ },
373
+ {
374
+ vector: [4, 2, 6],
375
+ metadata: {
376
+ model: 'test',
377
+ provider: 'test',
378
+ dimensions: 3,
379
+ generatedAt: new Date().toISOString(),
380
+ },
381
+ },
382
+ ]
383
+
384
+ const merged = mergeEmbeddings(embeddings, 'max')
385
+
386
+ expect(merged.vector).toEqual([4, 5, 6]) // Max of each dimension
387
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
388
+ expect((merged.metadata as any).mergeMethod).toBe('max')
389
+ })
390
+
391
+ it('should return single embedding if array has one item', () => {
392
+ const embeddings: StoredEmbedding[] = [
393
+ {
394
+ vector: [1, 2, 3],
395
+ metadata: {
396
+ model: 'test',
397
+ provider: 'test',
398
+ dimensions: 3,
399
+ generatedAt: new Date().toISOString(),
400
+ },
401
+ },
402
+ ]
403
+
404
+ const merged = mergeEmbeddings(embeddings)
405
+
406
+ expect(merged).toBe(embeddings[0])
407
+ })
408
+
409
+ it('should throw error for empty array', () => {
410
+ expect(() => {
411
+ mergeEmbeddings([])
412
+ }).toThrow('Cannot merge empty array of embeddings')
413
+ })
414
+
415
+ it('should throw error for dimension mismatch', () => {
416
+ const embeddings: StoredEmbedding[] = [
417
+ {
418
+ vector: [1, 2, 3],
419
+ metadata: {
420
+ model: 'test',
421
+ provider: 'test',
422
+ dimensions: 3,
423
+ generatedAt: new Date().toISOString(),
424
+ },
425
+ },
426
+ {
427
+ vector: [1, 2], // Different dimensions
428
+ metadata: {
429
+ model: 'test',
430
+ provider: 'test',
431
+ dimensions: 2,
432
+ generatedAt: new Date().toISOString(),
433
+ },
434
+ },
435
+ ]
436
+
437
+ expect(() => {
438
+ mergeEmbeddings(embeddings)
439
+ }).toThrow('Cannot merge embeddings with different dimensions')
440
+ })
441
+ })