@opensaas/stack-rag 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (149) hide show
  1. package/.turbo/turbo-build.log +4 -0
  2. package/CHANGELOG.md +10 -0
  3. package/CLAUDE.md +565 -0
  4. package/LICENSE +21 -0
  5. package/README.md +406 -0
  6. package/dist/config/index.d.ts +63 -0
  7. package/dist/config/index.d.ts.map +1 -0
  8. package/dist/config/index.js +94 -0
  9. package/dist/config/index.js.map +1 -0
  10. package/dist/config/plugin.d.ts +38 -0
  11. package/dist/config/plugin.d.ts.map +1 -0
  12. package/dist/config/plugin.js +215 -0
  13. package/dist/config/plugin.js.map +1 -0
  14. package/dist/config/plugin.test.d.ts +2 -0
  15. package/dist/config/plugin.test.d.ts.map +1 -0
  16. package/dist/config/plugin.test.js +554 -0
  17. package/dist/config/plugin.test.js.map +1 -0
  18. package/dist/config/types.d.ts +249 -0
  19. package/dist/config/types.d.ts.map +1 -0
  20. package/dist/config/types.js +5 -0
  21. package/dist/config/types.js.map +1 -0
  22. package/dist/fields/embedding.d.ts +85 -0
  23. package/dist/fields/embedding.d.ts.map +1 -0
  24. package/dist/fields/embedding.js +81 -0
  25. package/dist/fields/embedding.js.map +1 -0
  26. package/dist/fields/embedding.test.d.ts +2 -0
  27. package/dist/fields/embedding.test.d.ts.map +1 -0
  28. package/dist/fields/embedding.test.js +323 -0
  29. package/dist/fields/embedding.test.js.map +1 -0
  30. package/dist/fields/index.d.ts +6 -0
  31. package/dist/fields/index.d.ts.map +1 -0
  32. package/dist/fields/index.js +5 -0
  33. package/dist/fields/index.js.map +1 -0
  34. package/dist/index.d.ts +8 -0
  35. package/dist/index.d.ts.map +1 -0
  36. package/dist/index.js +9 -0
  37. package/dist/index.js.map +1 -0
  38. package/dist/mcp/index.d.ts +19 -0
  39. package/dist/mcp/index.d.ts.map +1 -0
  40. package/dist/mcp/index.js +18 -0
  41. package/dist/mcp/index.js.map +1 -0
  42. package/dist/providers/index.d.ts +38 -0
  43. package/dist/providers/index.d.ts.map +1 -0
  44. package/dist/providers/index.js +68 -0
  45. package/dist/providers/index.js.map +1 -0
  46. package/dist/providers/ollama.d.ts +49 -0
  47. package/dist/providers/ollama.d.ts.map +1 -0
  48. package/dist/providers/ollama.js +151 -0
  49. package/dist/providers/ollama.js.map +1 -0
  50. package/dist/providers/openai.d.ts +41 -0
  51. package/dist/providers/openai.d.ts.map +1 -0
  52. package/dist/providers/openai.js +126 -0
  53. package/dist/providers/openai.js.map +1 -0
  54. package/dist/providers/providers.test.d.ts +2 -0
  55. package/dist/providers/providers.test.d.ts.map +1 -0
  56. package/dist/providers/providers.test.js +224 -0
  57. package/dist/providers/providers.test.js.map +1 -0
  58. package/dist/providers/types.d.ts +88 -0
  59. package/dist/providers/types.d.ts.map +1 -0
  60. package/dist/providers/types.js +2 -0
  61. package/dist/providers/types.js.map +1 -0
  62. package/dist/runtime/batch.d.ts +183 -0
  63. package/dist/runtime/batch.d.ts.map +1 -0
  64. package/dist/runtime/batch.js +240 -0
  65. package/dist/runtime/batch.js.map +1 -0
  66. package/dist/runtime/batch.test.d.ts +2 -0
  67. package/dist/runtime/batch.test.d.ts.map +1 -0
  68. package/dist/runtime/batch.test.js +251 -0
  69. package/dist/runtime/batch.test.js.map +1 -0
  70. package/dist/runtime/chunking.d.ts +42 -0
  71. package/dist/runtime/chunking.d.ts.map +1 -0
  72. package/dist/runtime/chunking.js +264 -0
  73. package/dist/runtime/chunking.js.map +1 -0
  74. package/dist/runtime/chunking.test.d.ts +2 -0
  75. package/dist/runtime/chunking.test.d.ts.map +1 -0
  76. package/dist/runtime/chunking.test.js +212 -0
  77. package/dist/runtime/chunking.test.js.map +1 -0
  78. package/dist/runtime/embeddings.d.ts +147 -0
  79. package/dist/runtime/embeddings.d.ts.map +1 -0
  80. package/dist/runtime/embeddings.js +201 -0
  81. package/dist/runtime/embeddings.js.map +1 -0
  82. package/dist/runtime/embeddings.test.d.ts +2 -0
  83. package/dist/runtime/embeddings.test.d.ts.map +1 -0
  84. package/dist/runtime/embeddings.test.js +366 -0
  85. package/dist/runtime/embeddings.test.js.map +1 -0
  86. package/dist/runtime/index.d.ts +14 -0
  87. package/dist/runtime/index.d.ts.map +1 -0
  88. package/dist/runtime/index.js +18 -0
  89. package/dist/runtime/index.js.map +1 -0
  90. package/dist/runtime/search.d.ts +135 -0
  91. package/dist/runtime/search.d.ts.map +1 -0
  92. package/dist/runtime/search.js +101 -0
  93. package/dist/runtime/search.js.map +1 -0
  94. package/dist/storage/index.d.ts +41 -0
  95. package/dist/storage/index.d.ts.map +1 -0
  96. package/dist/storage/index.js +73 -0
  97. package/dist/storage/index.js.map +1 -0
  98. package/dist/storage/json.d.ts +34 -0
  99. package/dist/storage/json.d.ts.map +1 -0
  100. package/dist/storage/json.js +82 -0
  101. package/dist/storage/json.js.map +1 -0
  102. package/dist/storage/pgvector.d.ts +53 -0
  103. package/dist/storage/pgvector.d.ts.map +1 -0
  104. package/dist/storage/pgvector.js +168 -0
  105. package/dist/storage/pgvector.js.map +1 -0
  106. package/dist/storage/sqlite-vss.d.ts +49 -0
  107. package/dist/storage/sqlite-vss.d.ts.map +1 -0
  108. package/dist/storage/sqlite-vss.js +148 -0
  109. package/dist/storage/sqlite-vss.js.map +1 -0
  110. package/dist/storage/storage.test.d.ts +2 -0
  111. package/dist/storage/storage.test.d.ts.map +1 -0
  112. package/dist/storage/storage.test.js +440 -0
  113. package/dist/storage/storage.test.js.map +1 -0
  114. package/dist/storage/types.d.ts +79 -0
  115. package/dist/storage/types.d.ts.map +1 -0
  116. package/dist/storage/types.js +49 -0
  117. package/dist/storage/types.js.map +1 -0
  118. package/package.json +82 -0
  119. package/src/config/index.ts +116 -0
  120. package/src/config/plugin.test.ts +664 -0
  121. package/src/config/plugin.ts +257 -0
  122. package/src/config/types.ts +283 -0
  123. package/src/fields/embedding.test.ts +408 -0
  124. package/src/fields/embedding.ts +150 -0
  125. package/src/fields/index.ts +6 -0
  126. package/src/index.ts +33 -0
  127. package/src/mcp/index.ts +21 -0
  128. package/src/providers/index.ts +81 -0
  129. package/src/providers/ollama.ts +186 -0
  130. package/src/providers/openai.ts +161 -0
  131. package/src/providers/providers.test.ts +275 -0
  132. package/src/providers/types.ts +100 -0
  133. package/src/runtime/batch.test.ts +332 -0
  134. package/src/runtime/batch.ts +424 -0
  135. package/src/runtime/chunking.test.ts +258 -0
  136. package/src/runtime/chunking.ts +334 -0
  137. package/src/runtime/embeddings.test.ts +441 -0
  138. package/src/runtime/embeddings.ts +380 -0
  139. package/src/runtime/index.ts +51 -0
  140. package/src/runtime/search.ts +243 -0
  141. package/src/storage/index.ts +86 -0
  142. package/src/storage/json.ts +106 -0
  143. package/src/storage/pgvector.ts +206 -0
  144. package/src/storage/sqlite-vss.ts +193 -0
  145. package/src/storage/storage.test.ts +521 -0
  146. package/src/storage/types.ts +126 -0
  147. package/tsconfig.json +13 -0
  148. package/tsconfig.tsbuildinfo +1 -0
  149. package/vitest.config.ts +18 -0
@@ -0,0 +1,100 @@
1
+ /**
2
+ * Embedding provider interface
3
+ * All embedding providers must implement this interface
4
+ */
5
+ export interface EmbeddingProvider {
6
+ /**
7
+ * Provider type identifier
8
+ */
9
+ readonly type: string
10
+
11
+ /**
12
+ * Model name being used
13
+ */
14
+ readonly model: string
15
+
16
+ /**
17
+ * Number of dimensions in the embedding vector
18
+ */
19
+ readonly dimensions: number
20
+
21
+ /**
22
+ * Generate embedding for a single text
23
+ *
24
+ * @param text - The text to embed
25
+ * @returns The embedding vector
26
+ */
27
+ embed(text: string): Promise<number[]>
28
+
29
+ /**
30
+ * Generate embeddings for multiple texts in a batch
31
+ * More efficient than calling embed() multiple times
32
+ *
33
+ * @param texts - Array of texts to embed
34
+ * @returns Array of embedding vectors in the same order as inputs
35
+ */
36
+ embedBatch(texts: string[]): Promise<number[][]>
37
+ }
38
+
39
+ /**
40
+ * Result from embedding generation
41
+ */
42
+ export type EmbeddingResult = {
43
+ /**
44
+ * The embedding vector
45
+ */
46
+ vector: number[]
47
+
48
+ /**
49
+ * Number of tokens in the input text
50
+ */
51
+ tokens?: number
52
+
53
+ /**
54
+ * Model used for generation
55
+ */
56
+ model: string
57
+ }
58
+
59
+ /**
60
+ * Batch embedding result
61
+ */
62
+ export type BatchEmbeddingResult = {
63
+ /**
64
+ * Array of embedding vectors
65
+ */
66
+ vectors: number[][]
67
+
68
+ /**
69
+ * Total tokens processed
70
+ */
71
+ totalTokens?: number
72
+
73
+ /**
74
+ * Model used for generation
75
+ */
76
+ model: string
77
+ }
78
+
79
+ /**
80
+ * Options for embedding generation
81
+ */
82
+ export type EmbedOptions = {
83
+ /**
84
+ * Maximum number of texts to embed in a single batch
85
+ * @default 10
86
+ */
87
+ batchSize?: number
88
+
89
+ /**
90
+ * Rate limit in requests per minute
91
+ * @default 100
92
+ */
93
+ rateLimit?: number
94
+
95
+ /**
96
+ * Whether to show progress for large batches
97
+ * @default false
98
+ */
99
+ showProgress?: boolean
100
+ }
@@ -0,0 +1,332 @@
1
+ import { describe, it, expect, vi, beforeEach } from 'vitest'
2
+ import {
3
+ batchProcess,
4
+ RateLimiter,
5
+ ProcessingQueue,
6
+ type BatchProgress,
7
+ type BatchError,
8
+ } from './batch.js'
9
+ import type { EmbeddingProvider } from '../providers/types.js'
10
+
11
+ // Mock embedding provider
12
+ function createMockProvider(delayMs: number = 0): EmbeddingProvider {
13
+ return {
14
+ type: 'mock',
15
+ model: 'mock-model',
16
+ dimensions: 3,
17
+ embed: vi.fn(async (text: string) => {
18
+ if (delayMs > 0) await new Promise((resolve) => setTimeout(resolve, delayMs))
19
+ return [text.length / 10, text.length / 20, text.length / 30]
20
+ }),
21
+ embedBatch: vi.fn(async (texts: string[]) => {
22
+ if (delayMs > 0) await new Promise((resolve) => setTimeout(resolve, delayMs))
23
+ return texts.map((text) => [text.length / 10, text.length / 20, text.length / 30])
24
+ }),
25
+ }
26
+ }
27
+
28
+ describe('batchProcess', () => {
29
+ beforeEach(() => {
30
+ vi.clearAllMocks()
31
+ })
32
+
33
+ it('should process all texts successfully', async () => {
34
+ const provider = createMockProvider()
35
+ const texts = ['Text 1', 'Text 2', 'Text 3']
36
+
37
+ const result = await batchProcess({
38
+ provider,
39
+ texts,
40
+ batchSize: 2,
41
+ })
42
+
43
+ expect(result.embeddings).toHaveLength(3)
44
+ expect(result.failed).toHaveLength(0)
45
+ expect(result.stats.successful).toBe(3)
46
+ expect(result.stats.failed).toBe(0)
47
+ })
48
+
49
+ it('should respect batch size', async () => {
50
+ const provider = createMockProvider()
51
+ const texts = Array(10).fill('Test')
52
+
53
+ await batchProcess({
54
+ provider,
55
+ texts,
56
+ batchSize: 3,
57
+ })
58
+
59
+ // Should be called 4 times: 3 + 3 + 3 + 1
60
+ expect(provider.embedBatch).toHaveBeenCalledTimes(4)
61
+ })
62
+
63
+ it('should report progress', async () => {
64
+ const provider = createMockProvider()
65
+ const texts = Array(10).fill('Test')
66
+ const progressCalls: BatchProgress[] = []
67
+
68
+ await batchProcess({
69
+ provider,
70
+ texts,
71
+ batchSize: 3,
72
+ onProgress: (progress) => {
73
+ progressCalls.push(progress)
74
+ },
75
+ })
76
+
77
+ expect(progressCalls.length).toBeGreaterThan(0)
78
+
79
+ const lastProgress = progressCalls[progressCalls.length - 1]
80
+ expect(lastProgress.processed).toBe(10)
81
+ expect(lastProgress.total).toBe(10)
82
+ expect(lastProgress.percentage).toBe(100)
83
+ })
84
+
85
+ it('should handle errors with error callback', async () => {
86
+ const provider = createMockProvider()
87
+ provider.embedBatch = vi.fn().mockRejectedValue(new Error('API Error'))
88
+
89
+ const texts = ['Text 1', 'Text 2']
90
+ const errors: BatchError[] = []
91
+
92
+ const result = await batchProcess({
93
+ provider,
94
+ texts,
95
+ batchSize: 1,
96
+ maxRetries: 0,
97
+ onError: (error) => {
98
+ errors.push(error)
99
+ },
100
+ })
101
+
102
+ expect(errors.length).toBeGreaterThan(0)
103
+ expect(result.failed.length).toBe(2)
104
+ expect(result.stats.failed).toBe(2)
105
+ })
106
+
107
+ it('should throw error without error callback', async () => {
108
+ const provider = createMockProvider()
109
+ provider.embedBatch = vi.fn().mockRejectedValue(new Error('API Error'))
110
+
111
+ const texts = ['Text 1']
112
+
113
+ await expect(
114
+ batchProcess({
115
+ provider,
116
+ texts,
117
+ batchSize: 1,
118
+ maxRetries: 0,
119
+ }),
120
+ ).rejects.toThrow('API Error')
121
+ })
122
+
123
+ it('should retry failed batches', async () => {
124
+ const provider = createMockProvider()
125
+ let callCount = 0
126
+
127
+ provider.embedBatch = vi.fn().mockImplementation(async () => {
128
+ callCount++
129
+ if (callCount < 3) {
130
+ throw new Error('Temporary error')
131
+ }
132
+ return [[1, 2, 3]]
133
+ })
134
+
135
+ const result = await batchProcess({
136
+ provider,
137
+ texts: ['Test'],
138
+ batchSize: 1,
139
+ maxRetries: 3,
140
+ retryDelay: 10,
141
+ })
142
+
143
+ expect(result.embeddings).toHaveLength(1)
144
+ expect(provider.embedBatch).toHaveBeenCalledTimes(3) // Initial + 2 retries
145
+ })
146
+
147
+ it('should track duration', async () => {
148
+ const provider = createMockProvider(10) // 10ms delay
149
+ const texts = ['Text 1', 'Text 2']
150
+
151
+ const result = await batchProcess({
152
+ provider,
153
+ texts,
154
+ batchSize: 1,
155
+ rateLimit: 1000, // High rate limit to minimize delay
156
+ })
157
+
158
+ expect(result.stats.duration).toBeGreaterThan(0)
159
+ })
160
+
161
+ it('should include source hash in embeddings', async () => {
162
+ const provider = createMockProvider()
163
+ const texts = ['Text 1']
164
+
165
+ const result = await batchProcess({
166
+ provider,
167
+ texts,
168
+ includeSourceHash: true,
169
+ })
170
+
171
+ expect(result.embeddings[0].metadata.sourceHash).toBeDefined()
172
+ })
173
+ })
174
+
175
+ describe('RateLimiter', () => {
176
+ it('should allow requests under rate limit', async () => {
177
+ const limiter = new RateLimiter(100) // 100 requests per minute
178
+
179
+ const start = Date.now()
180
+
181
+ // Should process quickly since we're under limit
182
+ await limiter.waitForSlot()
183
+ await limiter.waitForSlot()
184
+ await limiter.waitForSlot()
185
+
186
+ const duration = Date.now() - start
187
+
188
+ // Should be nearly instant
189
+ expect(duration).toBeLessThan(100)
190
+ })
191
+
192
+ it('should throttle requests exceeding rate limit', { timeout: 70000 }, async () => {
193
+ const limiter = new RateLimiter(2) // Only 2 requests per minute
194
+
195
+ const results: number[] = []
196
+
197
+ // Try to make 3 requests
198
+ for (let i = 0; i < 3; i++) {
199
+ await limiter.waitForSlot()
200
+ results.push(Date.now())
201
+ }
202
+
203
+ // Third request should be delayed
204
+ const delay1 = results[1] - results[0]
205
+ const delay2 = results[2] - results[1]
206
+
207
+ expect(delay1).toBeLessThan(1000) // First two are quick
208
+ expect(delay2).toBeGreaterThan(100) // Third is delayed
209
+ }) // 70 second timeout for rate limiting test
210
+
211
+ it('should execute function with rate limiting', async () => {
212
+ const limiter = new RateLimiter(100)
213
+
214
+ const result = await limiter.execute(async () => {
215
+ return 'success'
216
+ })
217
+
218
+ expect(result).toBe('success')
219
+ })
220
+
221
+ it('should handle errors in executed function', async () => {
222
+ const limiter = new RateLimiter(100)
223
+
224
+ await expect(
225
+ limiter.execute(async () => {
226
+ throw new Error('Test error')
227
+ }),
228
+ ).rejects.toThrow('Test error')
229
+ })
230
+ })
231
+
232
+ describe('ProcessingQueue', () => {
233
+ it('should process items sequentially with concurrency 1', async () => {
234
+ const processed: number[] = []
235
+ const queue = new ProcessingQueue(
236
+ async (item: number) => {
237
+ processed.push(item)
238
+ await new Promise((resolve) => setTimeout(resolve, 10))
239
+ return item * 2
240
+ },
241
+ 1, // concurrency
242
+ )
243
+
244
+ const promises = [queue.add(1), queue.add(2), queue.add(3)]
245
+
246
+ const results = await Promise.all(promises)
247
+
248
+ expect(results).toEqual([2, 4, 6])
249
+ expect(processed).toEqual([1, 2, 3])
250
+ })
251
+
252
+ it('should process items concurrently with concurrency > 1', async () => {
253
+ const queue = new ProcessingQueue(
254
+ async (item: number) => {
255
+ await new Promise((resolve) => setTimeout(resolve, 10))
256
+ return item * 2
257
+ },
258
+ 3, // concurrency
259
+ )
260
+
261
+ const start = Date.now()
262
+ const results = await queue.addBatch([1, 2, 3, 4, 5])
263
+ const duration = Date.now() - start
264
+
265
+ expect(results).toEqual([2, 4, 6, 8, 10])
266
+
267
+ // With concurrency 3, should be faster than sequential
268
+ // 5 items with 10ms each sequentially = 50ms
269
+ // With concurrency 3: ceil(5/3) * 10ms = 20ms
270
+ expect(duration).toBeLessThan(40)
271
+ })
272
+
273
+ it('should track queue size', async () => {
274
+ const queue = new ProcessingQueue(async (item: number) => {
275
+ await new Promise((resolve) => setTimeout(resolve, 50))
276
+ return item
277
+ }, 1)
278
+
279
+ // Add items quickly
280
+ queue.add(1)
281
+ queue.add(2)
282
+ queue.add(3)
283
+
284
+ // Queue should have items waiting
285
+ expect(queue.size).toBeGreaterThan(0)
286
+ })
287
+
288
+ it('should track active count', async () => {
289
+ const queue = new ProcessingQueue(async (item: number) => {
290
+ await new Promise((resolve) => setTimeout(resolve, 50))
291
+ return item
292
+ }, 2)
293
+
294
+ // Start processing
295
+ const promise1 = queue.add(1)
296
+ const promise2 = queue.add(2)
297
+ const promise3 = queue.add(3)
298
+
299
+ // Give time for processing to start
300
+ await new Promise((resolve) => setTimeout(resolve, 10))
301
+
302
+ // Should have 2 active (concurrency limit)
303
+ expect(queue.activeCount).toBeLessThanOrEqual(2)
304
+
305
+ await Promise.all([promise1, promise2, promise3])
306
+ })
307
+
308
+ it('should handle errors in processor', async () => {
309
+ const queue = new ProcessingQueue(async (item: number) => {
310
+ if (item === 2) {
311
+ throw new Error('Processing error')
312
+ }
313
+ return item * 2
314
+ }, 1)
315
+
316
+ const result1 = await queue.add(1)
317
+ expect(result1).toBe(2)
318
+
319
+ await expect(queue.add(2)).rejects.toThrow('Processing error')
320
+
321
+ const result3 = await queue.add(3)
322
+ expect(result3).toBe(6)
323
+ })
324
+
325
+ it('should process empty batch', async () => {
326
+ const queue = new ProcessingQueue(async (item: number) => item * 2, 1)
327
+
328
+ const results = await queue.addBatch([])
329
+
330
+ expect(results).toEqual([])
331
+ })
332
+ })