@opensaas/stack-rag 0.1.6 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +1 -1
- package/CHANGELOG.md +141 -0
- package/README.md +82 -6
- package/dist/config/index.d.ts.map +1 -1
- package/dist/config/index.js +9 -0
- package/dist/config/index.js.map +1 -1
- package/dist/config/plugin.d.ts.map +1 -1
- package/dist/config/plugin.js +61 -1
- package/dist/config/plugin.js.map +1 -1
- package/dist/config/plugin.test.js +70 -14
- package/dist/config/plugin.test.js.map +1 -1
- package/dist/config/types.d.ts +186 -0
- package/dist/config/types.d.ts.map +1 -1
- package/dist/fields/index.d.ts +1 -0
- package/dist/fields/index.d.ts.map +1 -1
- package/dist/fields/index.js +1 -0
- package/dist/fields/index.js.map +1 -1
- package/dist/fields/searchable.d.ts +42 -0
- package/dist/fields/searchable.d.ts.map +1 -0
- package/dist/fields/searchable.js +51 -0
- package/dist/fields/searchable.js.map +1 -0
- package/dist/fields/searchable.test.d.ts +2 -0
- package/dist/fields/searchable.test.d.ts.map +1 -0
- package/dist/fields/searchable.test.js +112 -0
- package/dist/fields/searchable.test.js.map +1 -0
- package/dist/index.d.ts +2 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/providers/openai.d.ts +2 -0
- package/dist/providers/openai.d.ts.map +1 -1
- package/dist/providers/openai.js +35 -20
- package/dist/providers/openai.js.map +1 -1
- package/dist/runtime/batch.test.js +1 -1
- package/dist/runtime/build-time.d.ts +100 -0
- package/dist/runtime/build-time.d.ts.map +1 -0
- package/dist/runtime/build-time.js +185 -0
- package/dist/runtime/build-time.js.map +1 -0
- package/dist/runtime/index.d.ts +3 -0
- package/dist/runtime/index.d.ts.map +1 -1
- package/dist/runtime/index.js +6 -0
- package/dist/runtime/index.js.map +1 -1
- package/dist/runtime/markdown.d.ts +33 -0
- package/dist/runtime/markdown.d.ts.map +1 -0
- package/dist/runtime/markdown.js +94 -0
- package/dist/runtime/markdown.js.map +1 -0
- package/dist/runtime/provider-helpers.d.ts +56 -0
- package/dist/runtime/provider-helpers.d.ts.map +1 -0
- package/dist/runtime/provider-helpers.js +95 -0
- package/dist/runtime/provider-helpers.js.map +1 -0
- package/dist/runtime/types.d.ts +29 -0
- package/dist/runtime/types.d.ts.map +1 -0
- package/dist/runtime/types.js +6 -0
- package/dist/runtime/types.js.map +1 -0
- package/dist/storage/access-filter.d.ts +30 -0
- package/dist/storage/access-filter.d.ts.map +1 -0
- package/dist/storage/access-filter.js +241 -0
- package/dist/storage/access-filter.js.map +1 -0
- package/dist/storage/index.d.ts +2 -0
- package/dist/storage/index.d.ts.map +1 -1
- package/dist/storage/index.js +3 -0
- package/dist/storage/index.js.map +1 -1
- package/dist/storage/json-file.d.ts +53 -0
- package/dist/storage/json-file.d.ts.map +1 -0
- package/dist/storage/json-file.js +124 -0
- package/dist/storage/json-file.js.map +1 -0
- package/dist/storage/pgvector.d.ts.map +1 -1
- package/dist/storage/pgvector.js +26 -11
- package/dist/storage/pgvector.js.map +1 -1
- package/dist/storage/storage.test.js +2 -0
- package/dist/storage/storage.test.js.map +1 -1
- package/dist/storage/types.d.ts +5 -0
- package/dist/storage/types.d.ts.map +1 -1
- package/dist/storage/types.js.map +1 -1
- package/package.json +6 -5
- package/src/config/index.ts +9 -0
- package/src/config/plugin.test.ts +70 -14
- package/src/config/plugin.ts +72 -2
- package/src/config/types.ts +217 -0
- package/src/fields/index.ts +2 -0
- package/src/fields/searchable.test.ts +136 -0
- package/src/fields/searchable.ts +57 -0
- package/src/index.ts +6 -0
- package/src/providers/openai.ts +37 -22
- package/src/runtime/batch.test.ts +1 -1
- package/src/runtime/build-time.ts +216 -0
- package/src/runtime/index.ts +18 -0
- package/src/runtime/markdown.ts +119 -0
- package/src/runtime/provider-helpers.ts +115 -0
- package/src/runtime/types.ts +30 -0
- package/src/storage/access-filter.ts +303 -0
- package/src/storage/index.ts +4 -0
- package/src/storage/json-file.ts +157 -0
- package/src/storage/pgvector.ts +31 -11
- package/src/storage/storage.test.ts +2 -0
- package/src/storage/types.ts +6 -0
- package/tsconfig.tsbuildinfo +1 -1
package/src/config/types.ts
CHANGED
|
@@ -155,6 +155,42 @@ export type VectorStorageConfig =
|
|
|
155
155
|
| JsonStorageConfig
|
|
156
156
|
| CustomStorageConfig
|
|
157
157
|
|
|
158
|
+
/**
|
|
159
|
+
* Build-time embedding generation configuration
|
|
160
|
+
*/
|
|
161
|
+
export type BuildTimeConfig = {
|
|
162
|
+
/**
|
|
163
|
+
* Enable build-time embedding generation
|
|
164
|
+
*/
|
|
165
|
+
enabled: boolean
|
|
166
|
+
|
|
167
|
+
/**
|
|
168
|
+
* Output path for embeddings JSON file
|
|
169
|
+
* Relative to project root
|
|
170
|
+
* @default '.embeddings/embeddings.json'
|
|
171
|
+
*/
|
|
172
|
+
outputPath?: string
|
|
173
|
+
|
|
174
|
+
/**
|
|
175
|
+
* Chunk size for text splitting (in characters)
|
|
176
|
+
* @default 500
|
|
177
|
+
*/
|
|
178
|
+
chunkSize?: number
|
|
179
|
+
|
|
180
|
+
/**
|
|
181
|
+
* Overlap between chunks (in characters)
|
|
182
|
+
* @default 50
|
|
183
|
+
*/
|
|
184
|
+
chunkOverlap?: number
|
|
185
|
+
|
|
186
|
+
/**
|
|
187
|
+
* Whether to enable differential updates
|
|
188
|
+
* Only regenerate embeddings for changed content
|
|
189
|
+
* @default true
|
|
190
|
+
*/
|
|
191
|
+
differential?: boolean
|
|
192
|
+
}
|
|
193
|
+
|
|
158
194
|
/**
|
|
159
195
|
* Main RAG configuration
|
|
160
196
|
*/
|
|
@@ -191,6 +227,13 @@ export type RAGConfig = {
|
|
|
191
227
|
*/
|
|
192
228
|
chunking?: ChunkingConfig
|
|
193
229
|
|
|
230
|
+
/**
|
|
231
|
+
* Build-time embedding generation configuration
|
|
232
|
+
* When enabled, embeddings are generated at build time and stored in a JSON file
|
|
233
|
+
* instead of being generated at runtime via hooks
|
|
234
|
+
*/
|
|
235
|
+
buildTime?: BuildTimeConfig
|
|
236
|
+
|
|
194
237
|
/**
|
|
195
238
|
* Whether to enable MCP tools for semantic search
|
|
196
239
|
* Requires MCP to be enabled in main config
|
|
@@ -219,6 +262,7 @@ export type NormalizedRAGConfig = {
|
|
|
219
262
|
providers: Record<string, EmbeddingProviderConfig>
|
|
220
263
|
storage: VectorStorageConfig
|
|
221
264
|
chunking: Required<ChunkingConfig>
|
|
265
|
+
buildTime: Required<BuildTimeConfig> | null
|
|
222
266
|
enableMcpTools: boolean
|
|
223
267
|
batchSize: number
|
|
224
268
|
rateLimit: number
|
|
@@ -281,3 +325,176 @@ export type SearchResult<T = unknown> = {
|
|
|
281
325
|
*/
|
|
282
326
|
distance: number
|
|
283
327
|
}
|
|
328
|
+
|
|
329
|
+
/**
|
|
330
|
+
* Options for searchable() field wrapper
|
|
331
|
+
* Simplified options for common use cases
|
|
332
|
+
*/
|
|
333
|
+
export type SearchableOptions = {
|
|
334
|
+
/**
|
|
335
|
+
* Embedding provider to use
|
|
336
|
+
* References a provider name from RAG config
|
|
337
|
+
* Falls back to default provider if not specified
|
|
338
|
+
*/
|
|
339
|
+
provider?: EmbeddingProviderName
|
|
340
|
+
|
|
341
|
+
/**
|
|
342
|
+
* Vector dimensions
|
|
343
|
+
* Must match the provider's output dimensions
|
|
344
|
+
* @default 1536 (OpenAI text-embedding-3-small)
|
|
345
|
+
*/
|
|
346
|
+
dimensions?: number
|
|
347
|
+
|
|
348
|
+
/**
|
|
349
|
+
* Chunking configuration for long texts
|
|
350
|
+
*/
|
|
351
|
+
chunking?: ChunkingConfig
|
|
352
|
+
|
|
353
|
+
/**
|
|
354
|
+
* Custom name for the generated embedding field
|
|
355
|
+
* If not provided, defaults to `${fieldName}Embedding`
|
|
356
|
+
* @example 'contentVector' instead of 'contentEmbedding'
|
|
357
|
+
*/
|
|
358
|
+
embeddingFieldName?: string
|
|
359
|
+
}
|
|
360
|
+
|
|
361
|
+
/**
|
|
362
|
+
* Internal metadata attached to searchable fields
|
|
363
|
+
* Used by ragPlugin to identify and inject embedding fields
|
|
364
|
+
* @internal
|
|
365
|
+
*/
|
|
366
|
+
export type SearchableMetadata = {
|
|
367
|
+
/**
|
|
368
|
+
* Name for the generated embedding field
|
|
369
|
+
*/
|
|
370
|
+
embeddingFieldName: string
|
|
371
|
+
|
|
372
|
+
/**
|
|
373
|
+
* Embedding provider to use
|
|
374
|
+
*/
|
|
375
|
+
provider?: EmbeddingProviderName
|
|
376
|
+
|
|
377
|
+
/**
|
|
378
|
+
* Vector dimensions
|
|
379
|
+
*/
|
|
380
|
+
dimensions?: number
|
|
381
|
+
|
|
382
|
+
/**
|
|
383
|
+
* Chunking configuration
|
|
384
|
+
*/
|
|
385
|
+
chunking?: ChunkingConfig
|
|
386
|
+
}
|
|
387
|
+
|
|
388
|
+
/**
|
|
389
|
+
* A chunk of text with its embedding
|
|
390
|
+
* Used in build-time generation output
|
|
391
|
+
*/
|
|
392
|
+
export type EmbeddingChunk = {
|
|
393
|
+
/**
|
|
394
|
+
* The text content of this chunk
|
|
395
|
+
*/
|
|
396
|
+
text: string
|
|
397
|
+
|
|
398
|
+
/**
|
|
399
|
+
* The embedding vector for this chunk
|
|
400
|
+
*/
|
|
401
|
+
embedding: number[]
|
|
402
|
+
|
|
403
|
+
/**
|
|
404
|
+
* Metadata about the chunk
|
|
405
|
+
*/
|
|
406
|
+
metadata: {
|
|
407
|
+
/**
|
|
408
|
+
* Index of this chunk within the document
|
|
409
|
+
*/
|
|
410
|
+
chunkIndex: number
|
|
411
|
+
|
|
412
|
+
/**
|
|
413
|
+
* Start character position in original text
|
|
414
|
+
*/
|
|
415
|
+
startOffset: number
|
|
416
|
+
|
|
417
|
+
/**
|
|
418
|
+
* End character position in original text
|
|
419
|
+
*/
|
|
420
|
+
endOffset: number
|
|
421
|
+
|
|
422
|
+
/**
|
|
423
|
+
* Whether this chunk represents a document title
|
|
424
|
+
* Title chunks receive boosted scoring during search
|
|
425
|
+
*/
|
|
426
|
+
isTitle?: boolean
|
|
427
|
+
|
|
428
|
+
/**
|
|
429
|
+
* Additional custom metadata
|
|
430
|
+
*/
|
|
431
|
+
[key: string]: unknown
|
|
432
|
+
}
|
|
433
|
+
}
|
|
434
|
+
|
|
435
|
+
/**
|
|
436
|
+
* Document with embeddings
|
|
437
|
+
* Used in build-time generation output
|
|
438
|
+
*/
|
|
439
|
+
export type EmbeddedDocument = {
|
|
440
|
+
/**
|
|
441
|
+
* Document ID or slug
|
|
442
|
+
*/
|
|
443
|
+
id: string
|
|
444
|
+
|
|
445
|
+
/**
|
|
446
|
+
* Document title
|
|
447
|
+
*/
|
|
448
|
+
title?: string
|
|
449
|
+
|
|
450
|
+
/**
|
|
451
|
+
* The chunks of this document with embeddings
|
|
452
|
+
*/
|
|
453
|
+
chunks: EmbeddingChunk[]
|
|
454
|
+
|
|
455
|
+
/**
|
|
456
|
+
* Embedding metadata
|
|
457
|
+
*/
|
|
458
|
+
embeddingMetadata: EmbeddingMetadata
|
|
459
|
+
|
|
460
|
+
/**
|
|
461
|
+
* When the embeddings were generated
|
|
462
|
+
*/
|
|
463
|
+
generatedAt: string
|
|
464
|
+
|
|
465
|
+
/**
|
|
466
|
+
* Hash of the source content (for differential updates)
|
|
467
|
+
*/
|
|
468
|
+
contentHash: string
|
|
469
|
+
}
|
|
470
|
+
|
|
471
|
+
/**
|
|
472
|
+
* Build-time embeddings index file format
|
|
473
|
+
*/
|
|
474
|
+
export type EmbeddingsIndex = {
|
|
475
|
+
/**
|
|
476
|
+
* Version of the embeddings format
|
|
477
|
+
*/
|
|
478
|
+
version: string
|
|
479
|
+
|
|
480
|
+
/**
|
|
481
|
+
* Embedding configuration used to generate these embeddings
|
|
482
|
+
*/
|
|
483
|
+
config: {
|
|
484
|
+
provider: string
|
|
485
|
+
model: string
|
|
486
|
+
dimensions: number
|
|
487
|
+
chunkSize: number
|
|
488
|
+
chunkOverlap: number
|
|
489
|
+
}
|
|
490
|
+
|
|
491
|
+
/**
|
|
492
|
+
* Documents with embeddings
|
|
493
|
+
*/
|
|
494
|
+
documents: Record<string, EmbeddedDocument>
|
|
495
|
+
|
|
496
|
+
/**
|
|
497
|
+
* When the index was generated
|
|
498
|
+
*/
|
|
499
|
+
generatedAt: string
|
|
500
|
+
}
|
package/src/fields/index.ts
CHANGED
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
import { describe, it, expect } from 'vitest'
|
|
2
|
+
import { searchable } from './searchable.js'
|
|
3
|
+
import type { BaseFieldConfig } from '@opensaas/stack-core'
|
|
4
|
+
import type { SearchableOptions } from '../config/types.js'
|
|
5
|
+
|
|
6
|
+
// Mock text field for testing
|
|
7
|
+
function mockTextField(): BaseFieldConfig {
|
|
8
|
+
return {
|
|
9
|
+
type: 'text',
|
|
10
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
11
|
+
getZodSchema: () => null as any,
|
|
12
|
+
getPrismaType: () => ({ type: 'String', modifiers: '' }),
|
|
13
|
+
getTypeScriptType: () => ({ type: 'string', optional: false }),
|
|
14
|
+
}
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
describe('searchable() field wrapper', () => {
|
|
18
|
+
it('should preserve original field properties', () => {
|
|
19
|
+
const field = mockTextField()
|
|
20
|
+
const wrapped = searchable(field)
|
|
21
|
+
|
|
22
|
+
expect(wrapped.type).toBe('text')
|
|
23
|
+
expect(wrapped.getZodSchema).toBe(field.getZodSchema)
|
|
24
|
+
expect(wrapped.getPrismaType).toBe(field.getPrismaType)
|
|
25
|
+
expect(wrapped.getTypeScriptType).toBe(field.getTypeScriptType)
|
|
26
|
+
})
|
|
27
|
+
|
|
28
|
+
it('should attach _searchable metadata', () => {
|
|
29
|
+
const field = mockTextField()
|
|
30
|
+
const wrapped = searchable(field, { provider: 'openai' })
|
|
31
|
+
|
|
32
|
+
expect(wrapped._searchable).toBeDefined()
|
|
33
|
+
expect(wrapped._searchable.provider).toBe('openai')
|
|
34
|
+
})
|
|
35
|
+
|
|
36
|
+
it('should use default options when not provided', () => {
|
|
37
|
+
const field = mockTextField()
|
|
38
|
+
const wrapped = searchable(field)
|
|
39
|
+
|
|
40
|
+
expect(wrapped._searchable).toBeDefined()
|
|
41
|
+
expect(wrapped._searchable.embeddingFieldName).toBe('')
|
|
42
|
+
expect(wrapped._searchable.provider).toBeUndefined()
|
|
43
|
+
expect(wrapped._searchable.dimensions).toBeUndefined()
|
|
44
|
+
})
|
|
45
|
+
|
|
46
|
+
it('should accept all searchable options', () => {
|
|
47
|
+
const field = mockTextField()
|
|
48
|
+
const options: SearchableOptions = {
|
|
49
|
+
provider: 'ollama',
|
|
50
|
+
dimensions: 768,
|
|
51
|
+
chunking: {
|
|
52
|
+
strategy: 'sentence',
|
|
53
|
+
maxTokens: 300,
|
|
54
|
+
overlap: 25,
|
|
55
|
+
},
|
|
56
|
+
embeddingFieldName: 'customEmbedding',
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
const wrapped = searchable(field, options)
|
|
60
|
+
|
|
61
|
+
expect(wrapped._searchable.provider).toBe('ollama')
|
|
62
|
+
expect(wrapped._searchable.dimensions).toBe(768)
|
|
63
|
+
expect(wrapped._searchable.chunking).toEqual({
|
|
64
|
+
strategy: 'sentence',
|
|
65
|
+
maxTokens: 300,
|
|
66
|
+
overlap: 25,
|
|
67
|
+
})
|
|
68
|
+
expect(wrapped._searchable.embeddingFieldName).toBe('customEmbedding')
|
|
69
|
+
})
|
|
70
|
+
|
|
71
|
+
it('should preserve field with validation options', () => {
|
|
72
|
+
const fieldWithValidation = {
|
|
73
|
+
...mockTextField(),
|
|
74
|
+
validation: {
|
|
75
|
+
isRequired: true,
|
|
76
|
+
length: { min: 10, max: 1000 },
|
|
77
|
+
},
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
const wrapped = searchable(fieldWithValidation, { provider: 'openai' })
|
|
81
|
+
|
|
82
|
+
expect(wrapped.validation).toEqual({
|
|
83
|
+
isRequired: true,
|
|
84
|
+
length: { min: 10, max: 1000 },
|
|
85
|
+
})
|
|
86
|
+
expect(wrapped._searchable).toBeDefined()
|
|
87
|
+
})
|
|
88
|
+
|
|
89
|
+
it('should preserve field with hooks', () => {
|
|
90
|
+
const resolveInputHook = () => {}
|
|
91
|
+
const fieldWithHooks = {
|
|
92
|
+
...mockTextField(),
|
|
93
|
+
hooks: {
|
|
94
|
+
resolveInput: resolveInputHook,
|
|
95
|
+
},
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
const wrapped = searchable(fieldWithHooks)
|
|
99
|
+
|
|
100
|
+
expect(wrapped.hooks).toBeDefined()
|
|
101
|
+
expect(wrapped.hooks?.resolveInput).toBe(resolveInputHook)
|
|
102
|
+
expect(wrapped._searchable).toBeDefined()
|
|
103
|
+
})
|
|
104
|
+
|
|
105
|
+
it('should work with different field types', () => {
|
|
106
|
+
const richTextField = {
|
|
107
|
+
...mockTextField(),
|
|
108
|
+
type: 'richText' as const,
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
const wrapped = searchable(richTextField, { provider: 'openai' })
|
|
112
|
+
|
|
113
|
+
expect(wrapped.type).toBe('richText')
|
|
114
|
+
expect(wrapped._searchable).toBeDefined()
|
|
115
|
+
})
|
|
116
|
+
|
|
117
|
+
it('should handle empty embeddingFieldName option', () => {
|
|
118
|
+
const field = mockTextField()
|
|
119
|
+
const wrapped = searchable(field, { embeddingFieldName: '' })
|
|
120
|
+
|
|
121
|
+
expect(wrapped._searchable.embeddingFieldName).toBe('')
|
|
122
|
+
})
|
|
123
|
+
|
|
124
|
+
it('should handle partial chunking config', () => {
|
|
125
|
+
const field = mockTextField()
|
|
126
|
+
const wrapped = searchable(field, {
|
|
127
|
+
chunking: {
|
|
128
|
+
strategy: 'recursive',
|
|
129
|
+
},
|
|
130
|
+
})
|
|
131
|
+
|
|
132
|
+
expect(wrapped._searchable.chunking).toEqual({
|
|
133
|
+
strategy: 'recursive',
|
|
134
|
+
})
|
|
135
|
+
})
|
|
136
|
+
})
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
import type { BaseFieldConfig } from '@opensaas/stack-core'
|
|
2
|
+
import type { SearchableOptions, SearchableMetadata } from '../config/types.js'
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* High-level field wrapper that automatically adds embedding field and hooks
|
|
6
|
+
*
|
|
7
|
+
* This wrapper makes it easy to add semantic search to any text field by
|
|
8
|
+
* automatically creating a companion embedding field that stays in sync.
|
|
9
|
+
*
|
|
10
|
+
* @example
|
|
11
|
+
* ```typescript
|
|
12
|
+
* import { text } from '@opensaas/stack-core/fields'
|
|
13
|
+
* import { searchable } from '@opensaas/stack-rag/fields'
|
|
14
|
+
*
|
|
15
|
+
* fields: {
|
|
16
|
+
* content: searchable(text(), {
|
|
17
|
+
* provider: 'openai',
|
|
18
|
+
* dimensions: 1536
|
|
19
|
+
* })
|
|
20
|
+
* }
|
|
21
|
+
* ```
|
|
22
|
+
*
|
|
23
|
+
* This is equivalent to the manual pattern:
|
|
24
|
+
* ```typescript
|
|
25
|
+
* fields: {
|
|
26
|
+
* content: text(),
|
|
27
|
+
* contentEmbedding: embedding({
|
|
28
|
+
* sourceField: 'content',
|
|
29
|
+
* provider: 'openai',
|
|
30
|
+
* dimensions: 1536,
|
|
31
|
+
* autoGenerate: true
|
|
32
|
+
* })
|
|
33
|
+
* }
|
|
34
|
+
* ```
|
|
35
|
+
*
|
|
36
|
+
* @param field - The field to make searchable (usually text() or richText())
|
|
37
|
+
* @param options - Embedding configuration options
|
|
38
|
+
* @returns The same field with searchable metadata attached
|
|
39
|
+
*/
|
|
40
|
+
export function searchable<T extends BaseFieldConfig>(
|
|
41
|
+
field: T,
|
|
42
|
+
options: SearchableOptions = {},
|
|
43
|
+
): T & { _searchable: SearchableMetadata } {
|
|
44
|
+
const { embeddingFieldName, provider, dimensions, chunking } = options
|
|
45
|
+
|
|
46
|
+
// Attach metadata to the field for ragPlugin to detect
|
|
47
|
+
return {
|
|
48
|
+
...field,
|
|
49
|
+
_searchable: {
|
|
50
|
+
// Use custom name if provided, otherwise will be set by plugin based on field name
|
|
51
|
+
embeddingFieldName: embeddingFieldName || '',
|
|
52
|
+
provider,
|
|
53
|
+
dimensions,
|
|
54
|
+
chunking,
|
|
55
|
+
},
|
|
56
|
+
}
|
|
57
|
+
}
|
package/src/index.ts
CHANGED
|
@@ -15,6 +15,9 @@ export {
|
|
|
15
15
|
// Plugin export
|
|
16
16
|
export { ragPlugin } from './config/plugin.js'
|
|
17
17
|
|
|
18
|
+
// Runtime type exports
|
|
19
|
+
export type { RAGRuntimeServices } from './runtime/types.js'
|
|
20
|
+
|
|
18
21
|
export type {
|
|
19
22
|
RAGConfig,
|
|
20
23
|
NormalizedRAGConfig,
|
|
@@ -30,4 +33,7 @@ export type {
|
|
|
30
33
|
EmbeddingMetadata,
|
|
31
34
|
StoredEmbedding,
|
|
32
35
|
SearchResult,
|
|
36
|
+
EmbeddingsIndex,
|
|
37
|
+
EmbeddedDocument,
|
|
38
|
+
EmbeddingChunk,
|
|
33
39
|
} from './config/types.js'
|
package/src/providers/openai.ts
CHANGED
|
@@ -10,6 +10,21 @@ const MODEL_DIMENSIONS: Record<OpenAIEmbeddingModel, number> = {
|
|
|
10
10
|
'text-embedding-ada-002': 1536,
|
|
11
11
|
}
|
|
12
12
|
|
|
13
|
+
/**
|
|
14
|
+
* Lazily load OpenAI to avoid requiring it at import time
|
|
15
|
+
*/
|
|
16
|
+
async function getOpenAI() {
|
|
17
|
+
try {
|
|
18
|
+
const module = await import('openai')
|
|
19
|
+
return module.default
|
|
20
|
+
} catch {
|
|
21
|
+
throw new Error(
|
|
22
|
+
'OpenAI package not found. Install it with: npm install openai\n' +
|
|
23
|
+
'Make sure to run: pnpm install openai',
|
|
24
|
+
)
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
|
|
13
28
|
/**
|
|
14
29
|
* Type for OpenAI client (avoids direct dependency)
|
|
15
30
|
*/
|
|
@@ -34,35 +49,33 @@ export class OpenAIEmbeddingProvider implements EmbeddingProvider {
|
|
|
34
49
|
readonly model: string
|
|
35
50
|
readonly dimensions: number
|
|
36
51
|
|
|
37
|
-
private client: OpenAIClient
|
|
52
|
+
private client: OpenAIClient | null = null
|
|
38
53
|
private config: OpenAIEmbeddingConfig
|
|
54
|
+
private clientPromise: Promise<OpenAIClient> | null = null
|
|
39
55
|
|
|
40
56
|
constructor(config: OpenAIEmbeddingConfig) {
|
|
41
57
|
this.config = config
|
|
42
58
|
this.model = config.model || 'text-embedding-3-small'
|
|
43
59
|
this.dimensions = MODEL_DIMENSIONS[this.model as OpenAIEmbeddingModel] || 1536
|
|
60
|
+
}
|
|
44
61
|
|
|
45
|
-
|
|
46
|
-
this.client
|
|
62
|
+
private async ensureClient(): Promise<OpenAIClient> {
|
|
63
|
+
if (this.client) return this.client
|
|
64
|
+
if (this.clientPromise) return this.clientPromise
|
|
65
|
+
|
|
66
|
+
this.clientPromise = this.initializeClient()
|
|
67
|
+
this.client = await this.clientPromise
|
|
68
|
+
return this.client
|
|
47
69
|
}
|
|
48
70
|
|
|
49
|
-
private initializeClient(): OpenAIClient {
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
baseURL: this.config.baseURL,
|
|
58
|
-
}) as OpenAIClient
|
|
59
|
-
} catch (error) {
|
|
60
|
-
throw new Error(
|
|
61
|
-
'OpenAI package not found. Install it with: npm install openai\n' +
|
|
62
|
-
'Error: ' +
|
|
63
|
-
(error as Error).message,
|
|
64
|
-
)
|
|
65
|
-
}
|
|
71
|
+
private async initializeClient(): Promise<OpenAIClient> {
|
|
72
|
+
const OpenAI = await getOpenAI()
|
|
73
|
+
|
|
74
|
+
return new OpenAI({
|
|
75
|
+
apiKey: this.config.apiKey,
|
|
76
|
+
organization: this.config.organization,
|
|
77
|
+
baseURL: this.config.baseURL,
|
|
78
|
+
}) as OpenAIClient
|
|
66
79
|
}
|
|
67
80
|
|
|
68
81
|
/**
|
|
@@ -74,7 +87,8 @@ export class OpenAIEmbeddingProvider implements EmbeddingProvider {
|
|
|
74
87
|
}
|
|
75
88
|
|
|
76
89
|
try {
|
|
77
|
-
const
|
|
90
|
+
const client = await this.ensureClient()
|
|
91
|
+
const response = await client.embeddings.create({
|
|
78
92
|
model: this.model,
|
|
79
93
|
input: text,
|
|
80
94
|
encoding_format: 'float',
|
|
@@ -111,7 +125,8 @@ export class OpenAIEmbeddingProvider implements EmbeddingProvider {
|
|
|
111
125
|
|
|
112
126
|
try {
|
|
113
127
|
// OpenAI supports batch embedding
|
|
114
|
-
const
|
|
128
|
+
const client = await this.ensureClient()
|
|
129
|
+
const response = await client.embeddings.create({
|
|
115
130
|
model: this.model,
|
|
116
131
|
input: validTexts,
|
|
117
132
|
encoding_format: 'float',
|
|
@@ -267,7 +267,7 @@ describe('ProcessingQueue', () => {
|
|
|
267
267
|
// With concurrency 3, should be faster than sequential
|
|
268
268
|
// 5 items with 10ms each sequentially = 50ms
|
|
269
269
|
// With concurrency 3: ceil(5/3) * 10ms = 20ms
|
|
270
|
-
expect(duration).toBeLessThan(
|
|
270
|
+
expect(duration).toBeLessThan(50)
|
|
271
271
|
})
|
|
272
272
|
|
|
273
273
|
it('should track queue size', async () => {
|