@opensaas/stack-rag 0.1.6 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (95) hide show
  1. package/.turbo/turbo-build.log +1 -1
  2. package/CHANGELOG.md +141 -0
  3. package/README.md +82 -6
  4. package/dist/config/index.d.ts.map +1 -1
  5. package/dist/config/index.js +9 -0
  6. package/dist/config/index.js.map +1 -1
  7. package/dist/config/plugin.d.ts.map +1 -1
  8. package/dist/config/plugin.js +61 -1
  9. package/dist/config/plugin.js.map +1 -1
  10. package/dist/config/plugin.test.js +70 -14
  11. package/dist/config/plugin.test.js.map +1 -1
  12. package/dist/config/types.d.ts +186 -0
  13. package/dist/config/types.d.ts.map +1 -1
  14. package/dist/fields/index.d.ts +1 -0
  15. package/dist/fields/index.d.ts.map +1 -1
  16. package/dist/fields/index.js +1 -0
  17. package/dist/fields/index.js.map +1 -1
  18. package/dist/fields/searchable.d.ts +42 -0
  19. package/dist/fields/searchable.d.ts.map +1 -0
  20. package/dist/fields/searchable.js +51 -0
  21. package/dist/fields/searchable.js.map +1 -0
  22. package/dist/fields/searchable.test.d.ts +2 -0
  23. package/dist/fields/searchable.test.d.ts.map +1 -0
  24. package/dist/fields/searchable.test.js +112 -0
  25. package/dist/fields/searchable.test.js.map +1 -0
  26. package/dist/index.d.ts +2 -1
  27. package/dist/index.d.ts.map +1 -1
  28. package/dist/providers/openai.d.ts +2 -0
  29. package/dist/providers/openai.d.ts.map +1 -1
  30. package/dist/providers/openai.js +35 -20
  31. package/dist/providers/openai.js.map +1 -1
  32. package/dist/runtime/batch.test.js +1 -1
  33. package/dist/runtime/build-time.d.ts +100 -0
  34. package/dist/runtime/build-time.d.ts.map +1 -0
  35. package/dist/runtime/build-time.js +185 -0
  36. package/dist/runtime/build-time.js.map +1 -0
  37. package/dist/runtime/index.d.ts +3 -0
  38. package/dist/runtime/index.d.ts.map +1 -1
  39. package/dist/runtime/index.js +6 -0
  40. package/dist/runtime/index.js.map +1 -1
  41. package/dist/runtime/markdown.d.ts +33 -0
  42. package/dist/runtime/markdown.d.ts.map +1 -0
  43. package/dist/runtime/markdown.js +94 -0
  44. package/dist/runtime/markdown.js.map +1 -0
  45. package/dist/runtime/provider-helpers.d.ts +56 -0
  46. package/dist/runtime/provider-helpers.d.ts.map +1 -0
  47. package/dist/runtime/provider-helpers.js +95 -0
  48. package/dist/runtime/provider-helpers.js.map +1 -0
  49. package/dist/runtime/types.d.ts +29 -0
  50. package/dist/runtime/types.d.ts.map +1 -0
  51. package/dist/runtime/types.js +6 -0
  52. package/dist/runtime/types.js.map +1 -0
  53. package/dist/storage/access-filter.d.ts +30 -0
  54. package/dist/storage/access-filter.d.ts.map +1 -0
  55. package/dist/storage/access-filter.js +241 -0
  56. package/dist/storage/access-filter.js.map +1 -0
  57. package/dist/storage/index.d.ts +2 -0
  58. package/dist/storage/index.d.ts.map +1 -1
  59. package/dist/storage/index.js +3 -0
  60. package/dist/storage/index.js.map +1 -1
  61. package/dist/storage/json-file.d.ts +53 -0
  62. package/dist/storage/json-file.d.ts.map +1 -0
  63. package/dist/storage/json-file.js +124 -0
  64. package/dist/storage/json-file.js.map +1 -0
  65. package/dist/storage/pgvector.d.ts.map +1 -1
  66. package/dist/storage/pgvector.js +26 -11
  67. package/dist/storage/pgvector.js.map +1 -1
  68. package/dist/storage/storage.test.js +2 -0
  69. package/dist/storage/storage.test.js.map +1 -1
  70. package/dist/storage/types.d.ts +5 -0
  71. package/dist/storage/types.d.ts.map +1 -1
  72. package/dist/storage/types.js.map +1 -1
  73. package/package.json +6 -5
  74. package/src/config/index.ts +9 -0
  75. package/src/config/plugin.test.ts +70 -14
  76. package/src/config/plugin.ts +72 -2
  77. package/src/config/types.ts +217 -0
  78. package/src/fields/index.ts +2 -0
  79. package/src/fields/searchable.test.ts +136 -0
  80. package/src/fields/searchable.ts +57 -0
  81. package/src/index.ts +6 -0
  82. package/src/providers/openai.ts +37 -22
  83. package/src/runtime/batch.test.ts +1 -1
  84. package/src/runtime/build-time.ts +216 -0
  85. package/src/runtime/index.ts +18 -0
  86. package/src/runtime/markdown.ts +119 -0
  87. package/src/runtime/provider-helpers.ts +115 -0
  88. package/src/runtime/types.ts +30 -0
  89. package/src/storage/access-filter.ts +303 -0
  90. package/src/storage/index.ts +4 -0
  91. package/src/storage/json-file.ts +157 -0
  92. package/src/storage/pgvector.ts +31 -11
  93. package/src/storage/storage.test.ts +2 -0
  94. package/src/storage/types.ts +6 -0
  95. package/tsconfig.tsbuildinfo +1 -1
@@ -155,6 +155,42 @@ export type VectorStorageConfig =
155
155
  | JsonStorageConfig
156
156
  | CustomStorageConfig
157
157
 
158
+ /**
159
+ * Build-time embedding generation configuration
160
+ */
161
+ export type BuildTimeConfig = {
162
+ /**
163
+ * Enable build-time embedding generation
164
+ */
165
+ enabled: boolean
166
+
167
+ /**
168
+ * Output path for embeddings JSON file
169
+ * Relative to project root
170
+ * @default '.embeddings/embeddings.json'
171
+ */
172
+ outputPath?: string
173
+
174
+ /**
175
+ * Chunk size for text splitting (in characters)
176
+ * @default 500
177
+ */
178
+ chunkSize?: number
179
+
180
+ /**
181
+ * Overlap between chunks (in characters)
182
+ * @default 50
183
+ */
184
+ chunkOverlap?: number
185
+
186
+ /**
187
+ * Whether to enable differential updates
188
+ * Only regenerate embeddings for changed content
189
+ * @default true
190
+ */
191
+ differential?: boolean
192
+ }
193
+
158
194
  /**
159
195
  * Main RAG configuration
160
196
  */
@@ -191,6 +227,13 @@ export type RAGConfig = {
191
227
  */
192
228
  chunking?: ChunkingConfig
193
229
 
230
+ /**
231
+ * Build-time embedding generation configuration
232
+ * When enabled, embeddings are generated at build time and stored in a JSON file
233
+ * instead of being generated at runtime via hooks
234
+ */
235
+ buildTime?: BuildTimeConfig
236
+
194
237
  /**
195
238
  * Whether to enable MCP tools for semantic search
196
239
  * Requires MCP to be enabled in main config
@@ -219,6 +262,7 @@ export type NormalizedRAGConfig = {
219
262
  providers: Record<string, EmbeddingProviderConfig>
220
263
  storage: VectorStorageConfig
221
264
  chunking: Required<ChunkingConfig>
265
+ buildTime: Required<BuildTimeConfig> | null
222
266
  enableMcpTools: boolean
223
267
  batchSize: number
224
268
  rateLimit: number
@@ -281,3 +325,176 @@ export type SearchResult<T = unknown> = {
281
325
  */
282
326
  distance: number
283
327
  }
328
+
329
+ /**
330
+ * Options for searchable() field wrapper
331
+ * Simplified options for common use cases
332
+ */
333
+ export type SearchableOptions = {
334
+ /**
335
+ * Embedding provider to use
336
+ * References a provider name from RAG config
337
+ * Falls back to default provider if not specified
338
+ */
339
+ provider?: EmbeddingProviderName
340
+
341
+ /**
342
+ * Vector dimensions
343
+ * Must match the provider's output dimensions
344
+ * @default 1536 (OpenAI text-embedding-3-small)
345
+ */
346
+ dimensions?: number
347
+
348
+ /**
349
+ * Chunking configuration for long texts
350
+ */
351
+ chunking?: ChunkingConfig
352
+
353
+ /**
354
+ * Custom name for the generated embedding field
355
+ * If not provided, defaults to `${fieldName}Embedding`
356
+ * @example 'contentVector' instead of 'contentEmbedding'
357
+ */
358
+ embeddingFieldName?: string
359
+ }
360
+
361
+ /**
362
+ * Internal metadata attached to searchable fields
363
+ * Used by ragPlugin to identify and inject embedding fields
364
+ * @internal
365
+ */
366
+ export type SearchableMetadata = {
367
+ /**
368
+ * Name for the generated embedding field
369
+ */
370
+ embeddingFieldName: string
371
+
372
+ /**
373
+ * Embedding provider to use
374
+ */
375
+ provider?: EmbeddingProviderName
376
+
377
+ /**
378
+ * Vector dimensions
379
+ */
380
+ dimensions?: number
381
+
382
+ /**
383
+ * Chunking configuration
384
+ */
385
+ chunking?: ChunkingConfig
386
+ }
387
+
388
+ /**
389
+ * A chunk of text with its embedding
390
+ * Used in build-time generation output
391
+ */
392
+ export type EmbeddingChunk = {
393
+ /**
394
+ * The text content of this chunk
395
+ */
396
+ text: string
397
+
398
+ /**
399
+ * The embedding vector for this chunk
400
+ */
401
+ embedding: number[]
402
+
403
+ /**
404
+ * Metadata about the chunk
405
+ */
406
+ metadata: {
407
+ /**
408
+ * Index of this chunk within the document
409
+ */
410
+ chunkIndex: number
411
+
412
+ /**
413
+ * Start character position in original text
414
+ */
415
+ startOffset: number
416
+
417
+ /**
418
+ * End character position in original text
419
+ */
420
+ endOffset: number
421
+
422
+ /**
423
+ * Whether this chunk represents a document title
424
+ * Title chunks receive boosted scoring during search
425
+ */
426
+ isTitle?: boolean
427
+
428
+ /**
429
+ * Additional custom metadata
430
+ */
431
+ [key: string]: unknown
432
+ }
433
+ }
434
+
435
+ /**
436
+ * Document with embeddings
437
+ * Used in build-time generation output
438
+ */
439
+ export type EmbeddedDocument = {
440
+ /**
441
+ * Document ID or slug
442
+ */
443
+ id: string
444
+
445
+ /**
446
+ * Document title
447
+ */
448
+ title?: string
449
+
450
+ /**
451
+ * The chunks of this document with embeddings
452
+ */
453
+ chunks: EmbeddingChunk[]
454
+
455
+ /**
456
+ * Embedding metadata
457
+ */
458
+ embeddingMetadata: EmbeddingMetadata
459
+
460
+ /**
461
+ * When the embeddings were generated
462
+ */
463
+ generatedAt: string
464
+
465
+ /**
466
+ * Hash of the source content (for differential updates)
467
+ */
468
+ contentHash: string
469
+ }
470
+
471
+ /**
472
+ * Build-time embeddings index file format
473
+ */
474
+ export type EmbeddingsIndex = {
475
+ /**
476
+ * Version of the embeddings format
477
+ */
478
+ version: string
479
+
480
+ /**
481
+ * Embedding configuration used to generate these embeddings
482
+ */
483
+ config: {
484
+ provider: string
485
+ model: string
486
+ dimensions: number
487
+ chunkSize: number
488
+ chunkOverlap: number
489
+ }
490
+
491
+ /**
492
+ * Documents with embeddings
493
+ */
494
+ documents: Record<string, EmbeddedDocument>
495
+
496
+ /**
497
+ * When the index was generated
498
+ */
499
+ generatedAt: string
500
+ }
@@ -4,3 +4,5 @@
4
4
 
5
5
  export { embedding } from './embedding.js'
6
6
  export type { EmbeddingField } from './embedding.js'
7
+
8
+ export { searchable } from './searchable.js'
@@ -0,0 +1,136 @@
1
+ import { describe, it, expect } from 'vitest'
2
+ import { searchable } from './searchable.js'
3
+ import type { BaseFieldConfig } from '@opensaas/stack-core'
4
+ import type { SearchableOptions } from '../config/types.js'
5
+
6
+ // Mock text field for testing
7
+ function mockTextField(): BaseFieldConfig {
8
+ return {
9
+ type: 'text',
10
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
11
+ getZodSchema: () => null as any,
12
+ getPrismaType: () => ({ type: 'String', modifiers: '' }),
13
+ getTypeScriptType: () => ({ type: 'string', optional: false }),
14
+ }
15
+ }
16
+
17
+ describe('searchable() field wrapper', () => {
18
+ it('should preserve original field properties', () => {
19
+ const field = mockTextField()
20
+ const wrapped = searchable(field)
21
+
22
+ expect(wrapped.type).toBe('text')
23
+ expect(wrapped.getZodSchema).toBe(field.getZodSchema)
24
+ expect(wrapped.getPrismaType).toBe(field.getPrismaType)
25
+ expect(wrapped.getTypeScriptType).toBe(field.getTypeScriptType)
26
+ })
27
+
28
+ it('should attach _searchable metadata', () => {
29
+ const field = mockTextField()
30
+ const wrapped = searchable(field, { provider: 'openai' })
31
+
32
+ expect(wrapped._searchable).toBeDefined()
33
+ expect(wrapped._searchable.provider).toBe('openai')
34
+ })
35
+
36
+ it('should use default options when not provided', () => {
37
+ const field = mockTextField()
38
+ const wrapped = searchable(field)
39
+
40
+ expect(wrapped._searchable).toBeDefined()
41
+ expect(wrapped._searchable.embeddingFieldName).toBe('')
42
+ expect(wrapped._searchable.provider).toBeUndefined()
43
+ expect(wrapped._searchable.dimensions).toBeUndefined()
44
+ })
45
+
46
+ it('should accept all searchable options', () => {
47
+ const field = mockTextField()
48
+ const options: SearchableOptions = {
49
+ provider: 'ollama',
50
+ dimensions: 768,
51
+ chunking: {
52
+ strategy: 'sentence',
53
+ maxTokens: 300,
54
+ overlap: 25,
55
+ },
56
+ embeddingFieldName: 'customEmbedding',
57
+ }
58
+
59
+ const wrapped = searchable(field, options)
60
+
61
+ expect(wrapped._searchable.provider).toBe('ollama')
62
+ expect(wrapped._searchable.dimensions).toBe(768)
63
+ expect(wrapped._searchable.chunking).toEqual({
64
+ strategy: 'sentence',
65
+ maxTokens: 300,
66
+ overlap: 25,
67
+ })
68
+ expect(wrapped._searchable.embeddingFieldName).toBe('customEmbedding')
69
+ })
70
+
71
+ it('should preserve field with validation options', () => {
72
+ const fieldWithValidation = {
73
+ ...mockTextField(),
74
+ validation: {
75
+ isRequired: true,
76
+ length: { min: 10, max: 1000 },
77
+ },
78
+ }
79
+
80
+ const wrapped = searchable(fieldWithValidation, { provider: 'openai' })
81
+
82
+ expect(wrapped.validation).toEqual({
83
+ isRequired: true,
84
+ length: { min: 10, max: 1000 },
85
+ })
86
+ expect(wrapped._searchable).toBeDefined()
87
+ })
88
+
89
+ it('should preserve field with hooks', () => {
90
+ const resolveInputHook = () => {}
91
+ const fieldWithHooks = {
92
+ ...mockTextField(),
93
+ hooks: {
94
+ resolveInput: resolveInputHook,
95
+ },
96
+ }
97
+
98
+ const wrapped = searchable(fieldWithHooks)
99
+
100
+ expect(wrapped.hooks).toBeDefined()
101
+ expect(wrapped.hooks?.resolveInput).toBe(resolveInputHook)
102
+ expect(wrapped._searchable).toBeDefined()
103
+ })
104
+
105
+ it('should work with different field types', () => {
106
+ const richTextField = {
107
+ ...mockTextField(),
108
+ type: 'richText' as const,
109
+ }
110
+
111
+ const wrapped = searchable(richTextField, { provider: 'openai' })
112
+
113
+ expect(wrapped.type).toBe('richText')
114
+ expect(wrapped._searchable).toBeDefined()
115
+ })
116
+
117
+ it('should handle empty embeddingFieldName option', () => {
118
+ const field = mockTextField()
119
+ const wrapped = searchable(field, { embeddingFieldName: '' })
120
+
121
+ expect(wrapped._searchable.embeddingFieldName).toBe('')
122
+ })
123
+
124
+ it('should handle partial chunking config', () => {
125
+ const field = mockTextField()
126
+ const wrapped = searchable(field, {
127
+ chunking: {
128
+ strategy: 'recursive',
129
+ },
130
+ })
131
+
132
+ expect(wrapped._searchable.chunking).toEqual({
133
+ strategy: 'recursive',
134
+ })
135
+ })
136
+ })
@@ -0,0 +1,57 @@
1
+ import type { BaseFieldConfig } from '@opensaas/stack-core'
2
+ import type { SearchableOptions, SearchableMetadata } from '../config/types.js'
3
+
4
+ /**
5
+ * High-level field wrapper that automatically adds embedding field and hooks
6
+ *
7
+ * This wrapper makes it easy to add semantic search to any text field by
8
+ * automatically creating a companion embedding field that stays in sync.
9
+ *
10
+ * @example
11
+ * ```typescript
12
+ * import { text } from '@opensaas/stack-core/fields'
13
+ * import { searchable } from '@opensaas/stack-rag/fields'
14
+ *
15
+ * fields: {
16
+ * content: searchable(text(), {
17
+ * provider: 'openai',
18
+ * dimensions: 1536
19
+ * })
20
+ * }
21
+ * ```
22
+ *
23
+ * This is equivalent to the manual pattern:
24
+ * ```typescript
25
+ * fields: {
26
+ * content: text(),
27
+ * contentEmbedding: embedding({
28
+ * sourceField: 'content',
29
+ * provider: 'openai',
30
+ * dimensions: 1536,
31
+ * autoGenerate: true
32
+ * })
33
+ * }
34
+ * ```
35
+ *
36
+ * @param field - The field to make searchable (usually text() or richText())
37
+ * @param options - Embedding configuration options
38
+ * @returns The same field with searchable metadata attached
39
+ */
40
+ export function searchable<T extends BaseFieldConfig>(
41
+ field: T,
42
+ options: SearchableOptions = {},
43
+ ): T & { _searchable: SearchableMetadata } {
44
+ const { embeddingFieldName, provider, dimensions, chunking } = options
45
+
46
+ // Attach metadata to the field for ragPlugin to detect
47
+ return {
48
+ ...field,
49
+ _searchable: {
50
+ // Use custom name if provided, otherwise will be set by plugin based on field name
51
+ embeddingFieldName: embeddingFieldName || '',
52
+ provider,
53
+ dimensions,
54
+ chunking,
55
+ },
56
+ }
57
+ }
package/src/index.ts CHANGED
@@ -15,6 +15,9 @@ export {
15
15
  // Plugin export
16
16
  export { ragPlugin } from './config/plugin.js'
17
17
 
18
+ // Runtime type exports
19
+ export type { RAGRuntimeServices } from './runtime/types.js'
20
+
18
21
  export type {
19
22
  RAGConfig,
20
23
  NormalizedRAGConfig,
@@ -30,4 +33,7 @@ export type {
30
33
  EmbeddingMetadata,
31
34
  StoredEmbedding,
32
35
  SearchResult,
36
+ EmbeddingsIndex,
37
+ EmbeddedDocument,
38
+ EmbeddingChunk,
33
39
  } from './config/types.js'
@@ -10,6 +10,21 @@ const MODEL_DIMENSIONS: Record<OpenAIEmbeddingModel, number> = {
10
10
  'text-embedding-ada-002': 1536,
11
11
  }
12
12
 
13
+ /**
14
+ * Lazily load OpenAI to avoid requiring it at import time
15
+ */
16
+ async function getOpenAI() {
17
+ try {
18
+ const module = await import('openai')
19
+ return module.default
20
+ } catch {
21
+ throw new Error(
22
+ 'OpenAI package not found. Install it with: npm install openai\n' +
23
+ 'Make sure to run: pnpm install openai',
24
+ )
25
+ }
26
+ }
27
+
13
28
  /**
14
29
  * Type for OpenAI client (avoids direct dependency)
15
30
  */
@@ -34,35 +49,33 @@ export class OpenAIEmbeddingProvider implements EmbeddingProvider {
34
49
  readonly model: string
35
50
  readonly dimensions: number
36
51
 
37
- private client: OpenAIClient
52
+ private client: OpenAIClient | null = null
38
53
  private config: OpenAIEmbeddingConfig
54
+ private clientPromise: Promise<OpenAIClient> | null = null
39
55
 
40
56
  constructor(config: OpenAIEmbeddingConfig) {
41
57
  this.config = config
42
58
  this.model = config.model || 'text-embedding-3-small'
43
59
  this.dimensions = MODEL_DIMENSIONS[this.model as OpenAIEmbeddingModel] || 1536
60
+ }
44
61
 
45
- // Initialize OpenAI client
46
- this.client = this.initializeClient()
62
+ private async ensureClient(): Promise<OpenAIClient> {
63
+ if (this.client) return this.client
64
+ if (this.clientPromise) return this.clientPromise
65
+
66
+ this.clientPromise = this.initializeClient()
67
+ this.client = await this.clientPromise
68
+ return this.client
47
69
  }
48
70
 
49
- private initializeClient(): OpenAIClient {
50
- try {
51
- // eslint-disable-next-line @typescript-eslint/no-require-imports
52
- const { OpenAI } = require('openai')
53
-
54
- return new OpenAI({
55
- apiKey: this.config.apiKey,
56
- organization: this.config.organization,
57
- baseURL: this.config.baseURL,
58
- }) as OpenAIClient
59
- } catch (error) {
60
- throw new Error(
61
- 'OpenAI package not found. Install it with: npm install openai\n' +
62
- 'Error: ' +
63
- (error as Error).message,
64
- )
65
- }
71
+ private async initializeClient(): Promise<OpenAIClient> {
72
+ const OpenAI = await getOpenAI()
73
+
74
+ return new OpenAI({
75
+ apiKey: this.config.apiKey,
76
+ organization: this.config.organization,
77
+ baseURL: this.config.baseURL,
78
+ }) as OpenAIClient
66
79
  }
67
80
 
68
81
  /**
@@ -74,7 +87,8 @@ export class OpenAIEmbeddingProvider implements EmbeddingProvider {
74
87
  }
75
88
 
76
89
  try {
77
- const response = await this.client.embeddings.create({
90
+ const client = await this.ensureClient()
91
+ const response = await client.embeddings.create({
78
92
  model: this.model,
79
93
  input: text,
80
94
  encoding_format: 'float',
@@ -111,7 +125,8 @@ export class OpenAIEmbeddingProvider implements EmbeddingProvider {
111
125
 
112
126
  try {
113
127
  // OpenAI supports batch embedding
114
- const response = await this.client.embeddings.create({
128
+ const client = await this.ensureClient()
129
+ const response = await client.embeddings.create({
115
130
  model: this.model,
116
131
  input: validTexts,
117
132
  encoding_format: 'float',
@@ -267,7 +267,7 @@ describe('ProcessingQueue', () => {
267
267
  // With concurrency 3, should be faster than sequential
268
268
  // 5 items with 10ms each sequentially = 50ms
269
269
  // With concurrency 3: ceil(5/3) * 10ms = 20ms
270
- expect(duration).toBeLessThan(40)
270
+ expect(duration).toBeLessThan(50)
271
271
  })
272
272
 
273
273
  it('should track queue size', async () => {