npm - @opensaas/stack-rag - Versions diffs - 0.1.6 → 0.3.0 - Mend

@opensaas/stack-rag 0.1.6 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (95) hide show

package/.turbo/turbo-build.log +1 -1
package/CHANGELOG.md +141 -0
package/README.md +82 -6
package/dist/config/index.d.ts.map +1 -1
package/dist/config/index.js +9 -0
package/dist/config/index.js.map +1 -1
package/dist/config/plugin.d.ts.map +1 -1
package/dist/config/plugin.js +61 -1
package/dist/config/plugin.js.map +1 -1
package/dist/config/plugin.test.js +70 -14
package/dist/config/plugin.test.js.map +1 -1
package/dist/config/types.d.ts +186 -0
package/dist/config/types.d.ts.map +1 -1
package/dist/fields/index.d.ts +1 -0
package/dist/fields/index.d.ts.map +1 -1
package/dist/fields/index.js +1 -0
package/dist/fields/index.js.map +1 -1
package/dist/fields/searchable.d.ts +42 -0
package/dist/fields/searchable.d.ts.map +1 -0
package/dist/fields/searchable.js +51 -0
package/dist/fields/searchable.js.map +1 -0
package/dist/fields/searchable.test.d.ts +2 -0
package/dist/fields/searchable.test.d.ts.map +1 -0
package/dist/fields/searchable.test.js +112 -0
package/dist/fields/searchable.test.js.map +1 -0
package/dist/index.d.ts +2 -1
package/dist/index.d.ts.map +1 -1
package/dist/providers/openai.d.ts +2 -0
package/dist/providers/openai.d.ts.map +1 -1
package/dist/providers/openai.js +35 -20
package/dist/providers/openai.js.map +1 -1
package/dist/runtime/batch.test.js +1 -1
package/dist/runtime/build-time.d.ts +100 -0
package/dist/runtime/build-time.d.ts.map +1 -0
package/dist/runtime/build-time.js +185 -0
package/dist/runtime/build-time.js.map +1 -0
package/dist/runtime/index.d.ts +3 -0
package/dist/runtime/index.d.ts.map +1 -1
package/dist/runtime/index.js +6 -0
package/dist/runtime/index.js.map +1 -1
package/dist/runtime/markdown.d.ts +33 -0
package/dist/runtime/markdown.d.ts.map +1 -0
package/dist/runtime/markdown.js +94 -0
package/dist/runtime/markdown.js.map +1 -0
package/dist/runtime/provider-helpers.d.ts +56 -0
package/dist/runtime/provider-helpers.d.ts.map +1 -0
package/dist/runtime/provider-helpers.js +95 -0
package/dist/runtime/provider-helpers.js.map +1 -0
package/dist/runtime/types.d.ts +29 -0
package/dist/runtime/types.d.ts.map +1 -0
package/dist/runtime/types.js +6 -0
package/dist/runtime/types.js.map +1 -0
package/dist/storage/access-filter.d.ts +30 -0
package/dist/storage/access-filter.d.ts.map +1 -0
package/dist/storage/access-filter.js +241 -0
package/dist/storage/access-filter.js.map +1 -0
package/dist/storage/index.d.ts +2 -0
package/dist/storage/index.d.ts.map +1 -1
package/dist/storage/index.js +3 -0
package/dist/storage/index.js.map +1 -1
package/dist/storage/json-file.d.ts +53 -0
package/dist/storage/json-file.d.ts.map +1 -0
package/dist/storage/json-file.js +124 -0
package/dist/storage/json-file.js.map +1 -0
package/dist/storage/pgvector.d.ts.map +1 -1
package/dist/storage/pgvector.js +26 -11
package/dist/storage/pgvector.js.map +1 -1
package/dist/storage/storage.test.js +2 -0
package/dist/storage/storage.test.js.map +1 -1
package/dist/storage/types.d.ts +5 -0
package/dist/storage/types.d.ts.map +1 -1
package/dist/storage/types.js.map +1 -1
package/package.json +6 -5
package/src/config/index.ts +9 -0
package/src/config/plugin.test.ts +70 -14
package/src/config/plugin.ts +72 -2
package/src/config/types.ts +217 -0
package/src/fields/index.ts +2 -0
package/src/fields/searchable.test.ts +136 -0
package/src/fields/searchable.ts +57 -0
package/src/index.ts +6 -0
package/src/providers/openai.ts +37 -22
package/src/runtime/batch.test.ts +1 -1
package/src/runtime/build-time.ts +216 -0
package/src/runtime/index.ts +18 -0
package/src/runtime/markdown.ts +119 -0
package/src/runtime/provider-helpers.ts +115 -0
package/src/runtime/types.ts +30 -0
package/src/storage/access-filter.ts +303 -0
package/src/storage/index.ts +4 -0
package/src/storage/json-file.ts +157 -0
package/src/storage/pgvector.ts +31 -11
package/src/storage/storage.test.ts +2 -0
package/src/storage/types.ts +6 -0
package/tsconfig.tsbuildinfo +1 -1

package/src/config/types.ts CHANGED Viewed

@@ -155,6 +155,42 @@ export type VectorStorageConfig =
   | JsonStorageConfig
   | CustomStorageConfig
+/**
+ * Build-time embedding generation configuration
+ */
+export type BuildTimeConfig = {
+  /**
+   * Enable build-time embedding generation
+   */
+  enabled: boolean
+  /**
+   * Output path for embeddings JSON file
+   * Relative to project root
+   * @default '.embeddings/embeddings.json'
+   */
+  outputPath?: string
+  /**
+   * Chunk size for text splitting (in characters)
+   * @default 500
+   */
+  chunkSize?: number
+  /**
+   * Overlap between chunks (in characters)
+   * @default 50
+   */
+  chunkOverlap?: number
+  /**
+   * Whether to enable differential updates
+   * Only regenerate embeddings for changed content
+   * @default true
+   */
+  differential?: boolean
+}
 /**
  * Main RAG configuration
  */
@@ -191,6 +227,13 @@ export type RAGConfig = {
    */
   chunking?: ChunkingConfig
+  /**
+   * Build-time embedding generation configuration
+   * When enabled, embeddings are generated at build time and stored in a JSON file
+   * instead of being generated at runtime via hooks
+   */
+  buildTime?: BuildTimeConfig
   /**
    * Whether to enable MCP tools for semantic search
    * Requires MCP to be enabled in main config
@@ -219,6 +262,7 @@ export type NormalizedRAGConfig = {
   providers: Record<string, EmbeddingProviderConfig>
   storage: VectorStorageConfig
   chunking: Required<ChunkingConfig>
+  buildTime: Required<BuildTimeConfig> | null
   enableMcpTools: boolean
   batchSize: number
   rateLimit: number
@@ -281,3 +325,176 @@ export type SearchResult<T = unknown> = {
    */
   distance: number
 }
+/**
+ * Options for searchable() field wrapper
+ * Simplified options for common use cases
+ */
+export type SearchableOptions = {
+  /**
+   * Embedding provider to use
+   * References a provider name from RAG config
+   * Falls back to default provider if not specified
+   */
+  provider?: EmbeddingProviderName
+  /**
+   * Vector dimensions
+   * Must match the provider's output dimensions
+   * @default 1536 (OpenAI text-embedding-3-small)
+   */
+  dimensions?: number
+  /**
+   * Chunking configuration for long texts
+   */
+  chunking?: ChunkingConfig
+  /**
+   * Custom name for the generated embedding field
+   * If not provided, defaults to `${fieldName}Embedding`
+   * @example 'contentVector' instead of 'contentEmbedding'
+   */
+  embeddingFieldName?: string
+}
+/**
+ * Internal metadata attached to searchable fields
+ * Used by ragPlugin to identify and inject embedding fields
+ * @internal
+ */
+export type SearchableMetadata = {
+  /**
+   * Name for the generated embedding field
+   */
+  embeddingFieldName: string
+  /**
+   * Embedding provider to use
+   */
+  provider?: EmbeddingProviderName
+  /**
+   * Vector dimensions
+   */
+  dimensions?: number
+  /**
+   * Chunking configuration
+   */
+  chunking?: ChunkingConfig
+}
+/**
+ * A chunk of text with its embedding
+ * Used in build-time generation output
+ */
+export type EmbeddingChunk = {
+  /**
+   * The text content of this chunk
+   */
+  text: string
+  /**
+   * The embedding vector for this chunk
+   */
+  embedding: number[]
+  /**
+   * Metadata about the chunk
+   */
+  metadata: {
+    /**
+     * Index of this chunk within the document
+     */
+    chunkIndex: number
+    /**
+     * Start character position in original text
+     */
+    startOffset: number
+    /**
+     * End character position in original text
+     */
+    endOffset: number
+    /**
+     * Whether this chunk represents a document title
+     * Title chunks receive boosted scoring during search
+     */
+    isTitle?: boolean
+    /**
+     * Additional custom metadata
+     */
+    [key: string]: unknown
+  }
+}
+/**
+ * Document with embeddings
+ * Used in build-time generation output
+ */
+export type EmbeddedDocument = {
+  /**
+   * Document ID or slug
+   */
+  id: string
+  /**
+   * Document title
+   */
+  title?: string
+  /**
+   * The chunks of this document with embeddings
+   */
+  chunks: EmbeddingChunk[]
+  /**
+   * Embedding metadata
+   */
+  embeddingMetadata: EmbeddingMetadata
+  /**
+   * When the embeddings were generated
+   */
+  generatedAt: string
+  /**
+   * Hash of the source content (for differential updates)
+   */
+  contentHash: string
+}
+/**
+ * Build-time embeddings index file format
+ */
+export type EmbeddingsIndex = {
+  /**
+   * Version of the embeddings format
+   */
+  version: string
+  /**
+   * Embedding configuration used to generate these embeddings
+   */
+  config: {
+    provider: string
+    model: string
+    dimensions: number
+    chunkSize: number
+    chunkOverlap: number
+  }
+  /**
+   * Documents with embeddings
+   */
+  documents: Record<string, EmbeddedDocument>
+  /**
+   * When the index was generated
+   */
+  generatedAt: string
+}

package/src/fields/index.ts CHANGED Viewed

@@ -4,3 +4,5 @@
 export { embedding } from './embedding.js'
 export type { EmbeddingField } from './embedding.js'
+export { searchable } from './searchable.js'

package/src/fields/searchable.test.ts ADDED Viewed

@@ -0,0 +1,136 @@
+import { describe, it, expect } from 'vitest'
+import { searchable } from './searchable.js'
+import type { BaseFieldConfig } from '@opensaas/stack-core'
+import type { SearchableOptions } from '../config/types.js'
+// Mock text field for testing
+function mockTextField(): BaseFieldConfig {
+  return {
+    type: 'text',
+    // eslint-disable-next-line @typescript-eslint/no-explicit-any
+    getZodSchema: () => null as any,
+    getPrismaType: () => ({ type: 'String', modifiers: '' }),
+    getTypeScriptType: () => ({ type: 'string', optional: false }),
+  }
+}
+describe('searchable() field wrapper', () => {
+  it('should preserve original field properties', () => {
+    const field = mockTextField()
+    const wrapped = searchable(field)
+    expect(wrapped.type).toBe('text')
+    expect(wrapped.getZodSchema).toBe(field.getZodSchema)
+    expect(wrapped.getPrismaType).toBe(field.getPrismaType)
+    expect(wrapped.getTypeScriptType).toBe(field.getTypeScriptType)
+  })
+  it('should attach _searchable metadata', () => {
+    const field = mockTextField()
+    const wrapped = searchable(field, { provider: 'openai' })
+    expect(wrapped._searchable).toBeDefined()
+    expect(wrapped._searchable.provider).toBe('openai')
+  })
+  it('should use default options when not provided', () => {
+    const field = mockTextField()
+    const wrapped = searchable(field)
+    expect(wrapped._searchable).toBeDefined()
+    expect(wrapped._searchable.embeddingFieldName).toBe('')
+    expect(wrapped._searchable.provider).toBeUndefined()
+    expect(wrapped._searchable.dimensions).toBeUndefined()
+  })
+  it('should accept all searchable options', () => {
+    const field = mockTextField()
+    const options: SearchableOptions = {
+      provider: 'ollama',
+      dimensions: 768,
+      chunking: {
+        strategy: 'sentence',
+        maxTokens: 300,
+        overlap: 25,
+      },
+      embeddingFieldName: 'customEmbedding',
+    }
+    const wrapped = searchable(field, options)
+    expect(wrapped._searchable.provider).toBe('ollama')
+    expect(wrapped._searchable.dimensions).toBe(768)
+    expect(wrapped._searchable.chunking).toEqual({
+      strategy: 'sentence',
+      maxTokens: 300,
+      overlap: 25,
+    })
+    expect(wrapped._searchable.embeddingFieldName).toBe('customEmbedding')
+  })
+  it('should preserve field with validation options', () => {
+    const fieldWithValidation = {
+      ...mockTextField(),
+      validation: {
+        isRequired: true,
+        length: { min: 10, max: 1000 },
+      },
+    }
+    const wrapped = searchable(fieldWithValidation, { provider: 'openai' })
+    expect(wrapped.validation).toEqual({
+      isRequired: true,
+      length: { min: 10, max: 1000 },
+    })
+    expect(wrapped._searchable).toBeDefined()
+  })
+  it('should preserve field with hooks', () => {
+    const resolveInputHook = () => {}
+    const fieldWithHooks = {
+      ...mockTextField(),
+      hooks: {
+        resolveInput: resolveInputHook,
+      },
+    }
+    const wrapped = searchable(fieldWithHooks)
+    expect(wrapped.hooks).toBeDefined()
+    expect(wrapped.hooks?.resolveInput).toBe(resolveInputHook)
+    expect(wrapped._searchable).toBeDefined()
+  })
+  it('should work with different field types', () => {
+    const richTextField = {
+      ...mockTextField(),
+      type: 'richText' as const,
+    }
+    const wrapped = searchable(richTextField, { provider: 'openai' })
+    expect(wrapped.type).toBe('richText')
+    expect(wrapped._searchable).toBeDefined()
+  })
+  it('should handle empty embeddingFieldName option', () => {
+    const field = mockTextField()
+    const wrapped = searchable(field, { embeddingFieldName: '' })
+    expect(wrapped._searchable.embeddingFieldName).toBe('')
+  })
+  it('should handle partial chunking config', () => {
+    const field = mockTextField()
+    const wrapped = searchable(field, {
+      chunking: {
+        strategy: 'recursive',
+      },
+    })
+    expect(wrapped._searchable.chunking).toEqual({
+      strategy: 'recursive',
+    })
+  })
+})

package/src/fields/searchable.ts ADDED Viewed

@@ -0,0 +1,57 @@
+import type { BaseFieldConfig } from '@opensaas/stack-core'
+import type { SearchableOptions, SearchableMetadata } from '../config/types.js'
+/**
+ * High-level field wrapper that automatically adds embedding field and hooks
+ *
+ * This wrapper makes it easy to add semantic search to any text field by
+ * automatically creating a companion embedding field that stays in sync.
+ *
+ * @example
+ * ```typescript
+ * import { text } from '@opensaas/stack-core/fields'
+ * import { searchable } from '@opensaas/stack-rag/fields'
+ *
+ * fields: {
+ *   content: searchable(text(), {
+ *     provider: 'openai',
+ *     dimensions: 1536
+ *   })
+ * }
+ * ```
+ *
+ * This is equivalent to the manual pattern:
+ * ```typescript
+ * fields: {
+ *   content: text(),
+ *   contentEmbedding: embedding({
+ *     sourceField: 'content',
+ *     provider: 'openai',
+ *     dimensions: 1536,
+ *     autoGenerate: true
+ *   })
+ * }
+ * ```
+ *
+ * @param field - The field to make searchable (usually text() or richText())
+ * @param options - Embedding configuration options
+ * @returns The same field with searchable metadata attached
+ */
+export function searchable<T extends BaseFieldConfig>(
+  field: T,
+  options: SearchableOptions = {},
+): T & { _searchable: SearchableMetadata } {
+  const { embeddingFieldName, provider, dimensions, chunking } = options
+  // Attach metadata to the field for ragPlugin to detect
+  return {
+    ...field,
+    _searchable: {
+      // Use custom name if provided, otherwise will be set by plugin based on field name
+      embeddingFieldName: embeddingFieldName || '',
+      provider,
+      dimensions,
+      chunking,
+    },
+  }
+}

package/src/index.ts CHANGED Viewed

@@ -15,6 +15,9 @@ export {
 // Plugin export
 export { ragPlugin } from './config/plugin.js'
+// Runtime type exports
+export type { RAGRuntimeServices } from './runtime/types.js'
 export type {
   RAGConfig,
   NormalizedRAGConfig,
@@ -30,4 +33,7 @@ export type {
   EmbeddingMetadata,
   StoredEmbedding,
   SearchResult,
+  EmbeddingsIndex,
+  EmbeddedDocument,
+  EmbeddingChunk,
 } from './config/types.js'

package/src/providers/openai.ts CHANGED Viewed

@@ -10,6 +10,21 @@ const MODEL_DIMENSIONS: Record<OpenAIEmbeddingModel, number> = {
   'text-embedding-ada-002': 1536,
 }
+/**
+ * Lazily load OpenAI to avoid requiring it at import time
+ */
+async function getOpenAI() {
+  try {
+    const module = await import('openai')
+    return module.default
+  } catch {
+    throw new Error(
+      'OpenAI package not found. Install it with: npm install openai\n' +
+        'Make sure to run: pnpm install openai',
+    )
+  }
+}
 /**
  * Type for OpenAI client (avoids direct dependency)
  */
@@ -34,35 +49,33 @@ export class OpenAIEmbeddingProvider implements EmbeddingProvider {
   readonly model: string
   readonly dimensions: number
-  private client: OpenAIClient
+  private client: OpenAIClient | null = null
   private config: OpenAIEmbeddingConfig
+  private clientPromise: Promise<OpenAIClient> | null = null
   constructor(config: OpenAIEmbeddingConfig) {
     this.config = config
     this.model = config.model || 'text-embedding-3-small'
     this.dimensions = MODEL_DIMENSIONS[this.model as OpenAIEmbeddingModel] || 1536
+  }
-    // Initialize OpenAI client
-    this.client = this.initializeClient()
+  private async ensureClient(): Promise<OpenAIClient> {
+    if (this.client) return this.client
+    if (this.clientPromise) return this.clientPromise
+    this.clientPromise = this.initializeClient()
+    this.client = await this.clientPromise
+    return this.client
   }
-  private initializeClient(): OpenAIClient {
-    try {
-      // eslint-disable-next-line @typescript-eslint/no-require-imports
-      const { OpenAI } = require('openai')
-      return new OpenAI({
-        apiKey: this.config.apiKey,
-        organization: this.config.organization,
-        baseURL: this.config.baseURL,
-      }) as OpenAIClient
-    } catch (error) {
-      throw new Error(
-        'OpenAI package not found. Install it with: npm install openai\n' +
-          'Error: ' +
-          (error as Error).message,
-      )
-    }
+  private async initializeClient(): Promise<OpenAIClient> {
+    const OpenAI = await getOpenAI()
+    return new OpenAI({
+      apiKey: this.config.apiKey,
+      organization: this.config.organization,
+      baseURL: this.config.baseURL,
+    }) as OpenAIClient
   }
   /**
@@ -74,7 +87,8 @@ export class OpenAIEmbeddingProvider implements EmbeddingProvider {
     }
     try {
-      const response = await this.client.embeddings.create({
+      const client = await this.ensureClient()
+      const response = await client.embeddings.create({
         model: this.model,
         input: text,
         encoding_format: 'float',
@@ -111,7 +125,8 @@ export class OpenAIEmbeddingProvider implements EmbeddingProvider {
     try {
       // OpenAI supports batch embedding
-      const response = await this.client.embeddings.create({
+      const client = await this.ensureClient()
+      const response = await client.embeddings.create({
         model: this.model,
         input: validTexts,
         encoding_format: 'float',

package/src/runtime/batch.test.ts CHANGED Viewed

@@ -267,7 +267,7 @@ describe('ProcessingQueue', () => {
     // With concurrency 3, should be faster than sequential
     // 5 items with 10ms each sequentially = 50ms
     // With concurrency 3: ceil(5/3) * 10ms = 20ms
-    expect(duration).toBeLessThan(40)
+    expect(duration).toBeLessThan(50)
   })
   it('should track queue size', async () => {