@opensaas/stack-rag 0.1.7 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +1 -1
- package/CHANGELOG.md +223 -0
- package/dist/config/index.d.ts.map +1 -1
- package/dist/config/index.js +9 -0
- package/dist/config/index.js.map +1 -1
- package/dist/config/plugin.d.ts.map +1 -1
- package/dist/config/plugin.js +32 -0
- package/dist/config/plugin.js.map +1 -1
- package/dist/config/plugin.test.js +70 -14
- package/dist/config/plugin.test.js.map +1 -1
- package/dist/config/types.d.ts +135 -0
- package/dist/config/types.d.ts.map +1 -1
- package/dist/fields/embedding.d.ts +4 -4
- package/dist/fields/embedding.d.ts.map +1 -1
- package/dist/fields/embedding.js.map +1 -1
- package/dist/fields/searchable.d.ts +1 -1
- package/dist/fields/searchable.d.ts.map +1 -1
- package/dist/fields/searchable.js +1 -0
- package/dist/fields/searchable.js.map +1 -1
- package/dist/fields/searchable.test.js +1 -0
- package/dist/fields/searchable.test.js.map +1 -1
- package/dist/index.d.ts +2 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/providers/ollama.js +4 -4
- package/dist/providers/ollama.js.map +1 -1
- package/dist/providers/openai.js +1 -1
- package/dist/providers/openai.js.map +1 -1
- package/dist/runtime/build-time.d.ts +100 -0
- package/dist/runtime/build-time.d.ts.map +1 -0
- package/dist/runtime/build-time.js +185 -0
- package/dist/runtime/build-time.js.map +1 -0
- package/dist/runtime/index.d.ts +3 -0
- package/dist/runtime/index.d.ts.map +1 -1
- package/dist/runtime/index.js +6 -0
- package/dist/runtime/index.js.map +1 -1
- package/dist/runtime/markdown.d.ts +33 -0
- package/dist/runtime/markdown.d.ts.map +1 -0
- package/dist/runtime/markdown.js +94 -0
- package/dist/runtime/markdown.js.map +1 -0
- package/dist/runtime/provider-helpers.d.ts +56 -0
- package/dist/runtime/provider-helpers.d.ts.map +1 -0
- package/dist/runtime/provider-helpers.js +95 -0
- package/dist/runtime/provider-helpers.js.map +1 -0
- package/dist/runtime/types.d.ts +29 -0
- package/dist/runtime/types.d.ts.map +1 -0
- package/dist/runtime/types.js +6 -0
- package/dist/runtime/types.js.map +1 -0
- package/dist/storage/index.d.ts +1 -0
- package/dist/storage/index.d.ts.map +1 -1
- package/dist/storage/index.js +1 -0
- package/dist/storage/index.js.map +1 -1
- package/dist/storage/json-file.d.ts +53 -0
- package/dist/storage/json-file.d.ts.map +1 -0
- package/dist/storage/json-file.js +124 -0
- package/dist/storage/json-file.js.map +1 -0
- package/dist/storage/storage.test.js +1 -0
- package/dist/storage/storage.test.js.map +1 -1
- package/package.json +11 -10
- package/src/config/index.ts +9 -0
- package/src/config/plugin.test.ts +70 -14
- package/src/config/plugin.ts +37 -0
- package/src/config/types.ts +158 -0
- package/src/fields/embedding.ts +6 -4
- package/src/fields/searchable.test.ts +2 -1
- package/src/fields/searchable.ts +2 -1
- package/src/index.ts +6 -0
- package/src/providers/ollama.ts +5 -5
- package/src/providers/openai.ts +1 -1
- package/src/runtime/build-time.ts +216 -0
- package/src/runtime/index.ts +18 -0
- package/src/runtime/markdown.ts +119 -0
- package/src/runtime/provider-helpers.ts +115 -0
- package/src/runtime/types.ts +30 -0
- package/src/storage/index.ts +1 -0
- package/src/storage/json-file.ts +157 -0
- package/src/storage/storage.test.ts +1 -0
- package/tsconfig.tsbuildinfo +1 -1
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Helper utilities for working with embedding providers
|
|
3
|
+
* Simplifies provider creation from environment variables
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import { createEmbeddingProvider } from '../providers/index.js'
|
|
7
|
+
import type { EmbeddingProvider } from '../providers/types.js'
|
|
8
|
+
import type { EmbeddingProviderConfig } from '../config/types.js'
|
|
9
|
+
import 'dotenv/config'
|
|
10
|
+
|
|
11
|
+
/**
|
|
12
|
+
* Provider type from environment or configuration
|
|
13
|
+
*/
|
|
14
|
+
export type ProviderType = 'openai' | 'ollama'
|
|
15
|
+
|
|
16
|
+
/**
|
|
17
|
+
* Create an embedding provider from environment variables
|
|
18
|
+
*
|
|
19
|
+
* Reads configuration from environment variables:
|
|
20
|
+
* - EMBEDDING_PROVIDER: 'openai' or 'ollama' (default: 'openai')
|
|
21
|
+
* - OPENAI_API_KEY: Required if using OpenAI
|
|
22
|
+
* - OLLAMA_BASE_URL: Ollama endpoint (default: 'http://localhost:11434')
|
|
23
|
+
*
|
|
24
|
+
* @param overrides - Optional overrides for environment config
|
|
25
|
+
* @returns Configured embedding provider
|
|
26
|
+
*
|
|
27
|
+
* @example
|
|
28
|
+
* ```typescript
|
|
29
|
+
* import { createProviderFromEnv } from '@opensaas/stack-rag/runtime'
|
|
30
|
+
*
|
|
31
|
+
* // Uses EMBEDDING_PROVIDER and OPENAI_API_KEY from env
|
|
32
|
+
* const provider = createProviderFromEnv()
|
|
33
|
+
*
|
|
34
|
+
* // Override provider type
|
|
35
|
+
* const ollamaProvider = createProviderFromEnv({ provider: 'ollama' })
|
|
36
|
+
* ```
|
|
37
|
+
*/
|
|
38
|
+
export function createProviderFromEnv(overrides?: {
|
|
39
|
+
provider?: ProviderType
|
|
40
|
+
openaiApiKey?: string
|
|
41
|
+
ollamaBaseUrl?: string
|
|
42
|
+
model?: string
|
|
43
|
+
}): EmbeddingProvider {
|
|
44
|
+
const providerType =
|
|
45
|
+
overrides?.provider || (process.env.EMBEDDING_PROVIDER as ProviderType) || 'openai'
|
|
46
|
+
|
|
47
|
+
let config: EmbeddingProviderConfig
|
|
48
|
+
|
|
49
|
+
if (providerType === 'openai') {
|
|
50
|
+
const apiKey = overrides?.openaiApiKey || process.env.OPENAI_API_KEY
|
|
51
|
+
|
|
52
|
+
if (!apiKey) {
|
|
53
|
+
throw new Error('OPENAI_API_KEY environment variable is required when using OpenAI provider')
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
config = {
|
|
57
|
+
type: 'openai',
|
|
58
|
+
apiKey,
|
|
59
|
+
model:
|
|
60
|
+
(overrides?.model as 'text-embedding-3-small' | 'text-embedding-3-large') ||
|
|
61
|
+
'text-embedding-3-small',
|
|
62
|
+
}
|
|
63
|
+
} else if (providerType === 'ollama') {
|
|
64
|
+
config = {
|
|
65
|
+
type: 'ollama',
|
|
66
|
+
baseURL: overrides?.ollamaBaseUrl || process.env.OLLAMA_BASE_URL || 'http://localhost:11434',
|
|
67
|
+
model: overrides?.model || 'nomic-embed-text',
|
|
68
|
+
}
|
|
69
|
+
} else {
|
|
70
|
+
throw new Error(`Unknown provider type: ${providerType}. Supported: openai, ollama`)
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
return createEmbeddingProvider(config)
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
/**
|
|
77
|
+
* Get provider configuration from environment
|
|
78
|
+
*
|
|
79
|
+
* Useful for inspecting what provider would be used without creating it.
|
|
80
|
+
*
|
|
81
|
+
* @returns Provider configuration object
|
|
82
|
+
*
|
|
83
|
+
* @example
|
|
84
|
+
* ```typescript
|
|
85
|
+
* import { getProviderConfigFromEnv } from '@opensaas/stack-rag/runtime'
|
|
86
|
+
*
|
|
87
|
+
* const config = getProviderConfigFromEnv()
|
|
88
|
+
* console.log(`Using ${config.type} provider`)
|
|
89
|
+
* ```
|
|
90
|
+
*/
|
|
91
|
+
export function getProviderConfigFromEnv(): EmbeddingProviderConfig {
|
|
92
|
+
const providerType = (process.env.EMBEDDING_PROVIDER as ProviderType) || 'openai'
|
|
93
|
+
|
|
94
|
+
if (providerType === 'openai') {
|
|
95
|
+
const apiKey = process.env.OPENAI_API_KEY
|
|
96
|
+
|
|
97
|
+
if (!apiKey) {
|
|
98
|
+
throw new Error('OPENAI_API_KEY environment variable is required')
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
return {
|
|
102
|
+
type: 'openai',
|
|
103
|
+
apiKey,
|
|
104
|
+
model: 'text-embedding-3-small',
|
|
105
|
+
}
|
|
106
|
+
} else if (providerType === 'ollama') {
|
|
107
|
+
return {
|
|
108
|
+
type: 'ollama',
|
|
109
|
+
baseURL: process.env.OLLAMA_BASE_URL || 'http://localhost:11434',
|
|
110
|
+
model: 'nomic-embed-text',
|
|
111
|
+
}
|
|
112
|
+
} else {
|
|
113
|
+
throw new Error(`Unknown provider type: ${providerType}`)
|
|
114
|
+
}
|
|
115
|
+
}
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Type definitions for RAG plugin runtime services
|
|
3
|
+
* These types are used for type-safe access to context.plugins.rag
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
/**
|
|
7
|
+
* Runtime services provided by the RAG plugin
|
|
8
|
+
* Available via context.plugins.rag
|
|
9
|
+
*/
|
|
10
|
+
export interface RAGRuntimeServices {
|
|
11
|
+
/**
|
|
12
|
+
* Generate embedding for a given text
|
|
13
|
+
* Uses the configured embedding provider
|
|
14
|
+
*
|
|
15
|
+
* @param text - The text to generate an embedding for
|
|
16
|
+
* @param providerName - Optional provider name if multiple providers are configured
|
|
17
|
+
* @returns Vector embedding as array of numbers
|
|
18
|
+
*/
|
|
19
|
+
generateEmbedding: (text: string, providerName?: string) => Promise<number[]>
|
|
20
|
+
|
|
21
|
+
/**
|
|
22
|
+
* Generate embeddings for multiple texts (batch)
|
|
23
|
+
* More efficient than calling generateEmbedding multiple times
|
|
24
|
+
*
|
|
25
|
+
* @param texts - Array of texts to generate embeddings for
|
|
26
|
+
* @param providerName - Optional provider name if multiple providers are configured
|
|
27
|
+
* @returns Array of vector embeddings
|
|
28
|
+
*/
|
|
29
|
+
generateEmbeddings: (texts: string[], providerName?: string) => Promise<number[][]>
|
|
30
|
+
}
|
package/src/storage/index.ts
CHANGED
|
@@ -82,6 +82,7 @@ export function createVectorStorage(config: VectorStorageConfig): VectorStorage
|
|
|
82
82
|
// Export types and individual storage backends
|
|
83
83
|
export * from './types.js'
|
|
84
84
|
export { JsonVectorStorage, createJsonStorage } from './json.js'
|
|
85
|
+
export { JsonFileStorage, createJsonFileStorage } from './json-file.js'
|
|
85
86
|
export { PgVectorStorage, createPgVectorStorage } from './pgvector.js'
|
|
86
87
|
export { SqliteVssStorage, createSqliteVssStorage } from './sqlite-vss.js'
|
|
87
88
|
|
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
import { readFileSync, existsSync } from 'node:fs'
|
|
2
|
+
import type { VectorStorage, SearchOptions } from './types.js'
|
|
3
|
+
import type { SearchResult, EmbeddingsIndex } from '../config/types.js'
|
|
4
|
+
import { cosineSimilarity as calculateCosineSimilarity } from './types.js'
|
|
5
|
+
|
|
6
|
+
/**
|
|
7
|
+
* JSON file-based vector storage
|
|
8
|
+
* Loads embeddings from a JSON file generated at build time
|
|
9
|
+
* Performs similarity search in JavaScript without database queries
|
|
10
|
+
* Ideal for static sites, documentation, and build-time generated embeddings
|
|
11
|
+
*/
|
|
12
|
+
export class JsonFileStorage implements VectorStorage {
|
|
13
|
+
readonly type = 'json-file'
|
|
14
|
+
|
|
15
|
+
private index: EmbeddingsIndex | null = null
|
|
16
|
+
private filePath: string
|
|
17
|
+
|
|
18
|
+
constructor(filePath: string) {
|
|
19
|
+
this.filePath = filePath
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
/**
|
|
23
|
+
* Load embeddings index from file
|
|
24
|
+
* Caches the result in memory
|
|
25
|
+
*/
|
|
26
|
+
private loadIndex(): EmbeddingsIndex {
|
|
27
|
+
if (this.index) {
|
|
28
|
+
return this.index
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
if (!existsSync(this.filePath)) {
|
|
32
|
+
throw new Error(
|
|
33
|
+
`Embeddings file not found: ${this.filePath}. Run embeddings generation first.`,
|
|
34
|
+
)
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
try {
|
|
38
|
+
const content = readFileSync(this.filePath, 'utf-8')
|
|
39
|
+
this.index = JSON.parse(content) as EmbeddingsIndex
|
|
40
|
+
return this.index
|
|
41
|
+
} catch (error) {
|
|
42
|
+
throw new Error(`Failed to load embeddings from ${this.filePath}: ${error}`)
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
/**
|
|
47
|
+
* Search for similar documents using JavaScript-based cosine similarity
|
|
48
|
+
* Note: listKey parameter is included for interface compatibility but not used
|
|
49
|
+
* since this storage is typically used for standalone content (e.g., docs)
|
|
50
|
+
*/
|
|
51
|
+
async search<T = unknown>(
|
|
52
|
+
_listKey: string,
|
|
53
|
+
_fieldName: string,
|
|
54
|
+
queryVector: number[],
|
|
55
|
+
options: SearchOptions,
|
|
56
|
+
): Promise<SearchResult<T>[]> {
|
|
57
|
+
const { limit = 10, minScore = 0.0, where = {} } = options
|
|
58
|
+
|
|
59
|
+
const index = this.loadIndex()
|
|
60
|
+
|
|
61
|
+
// Validate query vector dimensions against index config
|
|
62
|
+
if (queryVector.length !== index.config.dimensions) {
|
|
63
|
+
throw new Error(
|
|
64
|
+
`Query vector dimensions (${queryVector.length}) don't match index dimensions (${index.config.dimensions})`,
|
|
65
|
+
)
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
const results: Array<{
|
|
69
|
+
item: T
|
|
70
|
+
score: number
|
|
71
|
+
distance: number
|
|
72
|
+
documentId: string
|
|
73
|
+
chunkIndex: number
|
|
74
|
+
}> = []
|
|
75
|
+
|
|
76
|
+
// Search through all documents and chunks
|
|
77
|
+
for (const [documentId, document] of Object.entries(index.documents)) {
|
|
78
|
+
// Apply where filters if provided (simple equality check)
|
|
79
|
+
if (where && Object.keys(where).length > 0) {
|
|
80
|
+
let matches = true
|
|
81
|
+
for (const [key, value] of Object.entries(where)) {
|
|
82
|
+
if ((document as Record<string, unknown>)[key] !== value) {
|
|
83
|
+
matches = false
|
|
84
|
+
break
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
if (!matches) continue
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
// Search through each chunk
|
|
91
|
+
for (const chunk of document.chunks) {
|
|
92
|
+
const score = this.cosineSimilarity(queryVector, chunk.embedding)
|
|
93
|
+
|
|
94
|
+
if (score >= minScore) {
|
|
95
|
+
results.push({
|
|
96
|
+
item: {
|
|
97
|
+
documentId,
|
|
98
|
+
title: document.title,
|
|
99
|
+
content: chunk.text,
|
|
100
|
+
chunkIndex: chunk.metadata.chunkIndex,
|
|
101
|
+
metadata: chunk.metadata,
|
|
102
|
+
} as T,
|
|
103
|
+
score,
|
|
104
|
+
distance: 1 - score,
|
|
105
|
+
documentId,
|
|
106
|
+
chunkIndex: chunk.metadata.chunkIndex,
|
|
107
|
+
})
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
// Sort by score (descending) and limit results
|
|
113
|
+
results.sort((a, b) => b.score - a.score)
|
|
114
|
+
|
|
115
|
+
return results.slice(0, limit)
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
/**
|
|
119
|
+
* Calculate cosine similarity between two vectors
|
|
120
|
+
*/
|
|
121
|
+
cosineSimilarity(a: number[], b: number[]): number {
|
|
122
|
+
return calculateCosineSimilarity(a, b)
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
/**
|
|
126
|
+
* Get the loaded index for inspection
|
|
127
|
+
*/
|
|
128
|
+
getIndex(): EmbeddingsIndex | null {
|
|
129
|
+
return this.index
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
/**
|
|
133
|
+
* Force reload the index from disk
|
|
134
|
+
*/
|
|
135
|
+
reloadIndex(): void {
|
|
136
|
+
this.index = null
|
|
137
|
+
this.loadIndex()
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
/**
|
|
142
|
+
* Create a JSON file vector storage instance
|
|
143
|
+
*
|
|
144
|
+
* @example
|
|
145
|
+
* ```typescript
|
|
146
|
+
* import { createJsonFileStorage } from '@opensaas/stack-rag/storage'
|
|
147
|
+
*
|
|
148
|
+
* const storage = createJsonFileStorage('.embeddings/docs.json')
|
|
149
|
+
* const results = await storage.search('', '', queryVector, {
|
|
150
|
+
* limit: 10,
|
|
151
|
+
* minScore: 0.7
|
|
152
|
+
* })
|
|
153
|
+
* ```
|
|
154
|
+
*/
|
|
155
|
+
export function createJsonFileStorage(filePath: string): JsonFileStorage {
|
|
156
|
+
return new JsonFileStorage(filePath)
|
|
157
|
+
}
|