@opensaas/stack-rag 0.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +4 -0
- package/CHANGELOG.md +10 -0
- package/CLAUDE.md +565 -0
- package/LICENSE +21 -0
- package/README.md +406 -0
- package/dist/config/index.d.ts +63 -0
- package/dist/config/index.d.ts.map +1 -0
- package/dist/config/index.js +94 -0
- package/dist/config/index.js.map +1 -0
- package/dist/config/plugin.d.ts +38 -0
- package/dist/config/plugin.d.ts.map +1 -0
- package/dist/config/plugin.js +215 -0
- package/dist/config/plugin.js.map +1 -0
- package/dist/config/plugin.test.d.ts +2 -0
- package/dist/config/plugin.test.d.ts.map +1 -0
- package/dist/config/plugin.test.js +554 -0
- package/dist/config/plugin.test.js.map +1 -0
- package/dist/config/types.d.ts +249 -0
- package/dist/config/types.d.ts.map +1 -0
- package/dist/config/types.js +5 -0
- package/dist/config/types.js.map +1 -0
- package/dist/fields/embedding.d.ts +85 -0
- package/dist/fields/embedding.d.ts.map +1 -0
- package/dist/fields/embedding.js +81 -0
- package/dist/fields/embedding.js.map +1 -0
- package/dist/fields/embedding.test.d.ts +2 -0
- package/dist/fields/embedding.test.d.ts.map +1 -0
- package/dist/fields/embedding.test.js +323 -0
- package/dist/fields/embedding.test.js.map +1 -0
- package/dist/fields/index.d.ts +6 -0
- package/dist/fields/index.d.ts.map +1 -0
- package/dist/fields/index.js +5 -0
- package/dist/fields/index.js.map +1 -0
- package/dist/index.d.ts +8 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +9 -0
- package/dist/index.js.map +1 -0
- package/dist/mcp/index.d.ts +19 -0
- package/dist/mcp/index.d.ts.map +1 -0
- package/dist/mcp/index.js +18 -0
- package/dist/mcp/index.js.map +1 -0
- package/dist/providers/index.d.ts +38 -0
- package/dist/providers/index.d.ts.map +1 -0
- package/dist/providers/index.js +68 -0
- package/dist/providers/index.js.map +1 -0
- package/dist/providers/ollama.d.ts +49 -0
- package/dist/providers/ollama.d.ts.map +1 -0
- package/dist/providers/ollama.js +151 -0
- package/dist/providers/ollama.js.map +1 -0
- package/dist/providers/openai.d.ts +41 -0
- package/dist/providers/openai.d.ts.map +1 -0
- package/dist/providers/openai.js +126 -0
- package/dist/providers/openai.js.map +1 -0
- package/dist/providers/providers.test.d.ts +2 -0
- package/dist/providers/providers.test.d.ts.map +1 -0
- package/dist/providers/providers.test.js +224 -0
- package/dist/providers/providers.test.js.map +1 -0
- package/dist/providers/types.d.ts +88 -0
- package/dist/providers/types.d.ts.map +1 -0
- package/dist/providers/types.js +2 -0
- package/dist/providers/types.js.map +1 -0
- package/dist/runtime/batch.d.ts +183 -0
- package/dist/runtime/batch.d.ts.map +1 -0
- package/dist/runtime/batch.js +240 -0
- package/dist/runtime/batch.js.map +1 -0
- package/dist/runtime/batch.test.d.ts +2 -0
- package/dist/runtime/batch.test.d.ts.map +1 -0
- package/dist/runtime/batch.test.js +251 -0
- package/dist/runtime/batch.test.js.map +1 -0
- package/dist/runtime/chunking.d.ts +42 -0
- package/dist/runtime/chunking.d.ts.map +1 -0
- package/dist/runtime/chunking.js +264 -0
- package/dist/runtime/chunking.js.map +1 -0
- package/dist/runtime/chunking.test.d.ts +2 -0
- package/dist/runtime/chunking.test.d.ts.map +1 -0
- package/dist/runtime/chunking.test.js +212 -0
- package/dist/runtime/chunking.test.js.map +1 -0
- package/dist/runtime/embeddings.d.ts +147 -0
- package/dist/runtime/embeddings.d.ts.map +1 -0
- package/dist/runtime/embeddings.js +201 -0
- package/dist/runtime/embeddings.js.map +1 -0
- package/dist/runtime/embeddings.test.d.ts +2 -0
- package/dist/runtime/embeddings.test.d.ts.map +1 -0
- package/dist/runtime/embeddings.test.js +366 -0
- package/dist/runtime/embeddings.test.js.map +1 -0
- package/dist/runtime/index.d.ts +14 -0
- package/dist/runtime/index.d.ts.map +1 -0
- package/dist/runtime/index.js +18 -0
- package/dist/runtime/index.js.map +1 -0
- package/dist/runtime/search.d.ts +135 -0
- package/dist/runtime/search.d.ts.map +1 -0
- package/dist/runtime/search.js +101 -0
- package/dist/runtime/search.js.map +1 -0
- package/dist/storage/index.d.ts +41 -0
- package/dist/storage/index.d.ts.map +1 -0
- package/dist/storage/index.js +73 -0
- package/dist/storage/index.js.map +1 -0
- package/dist/storage/json.d.ts +34 -0
- package/dist/storage/json.d.ts.map +1 -0
- package/dist/storage/json.js +82 -0
- package/dist/storage/json.js.map +1 -0
- package/dist/storage/pgvector.d.ts +53 -0
- package/dist/storage/pgvector.d.ts.map +1 -0
- package/dist/storage/pgvector.js +168 -0
- package/dist/storage/pgvector.js.map +1 -0
- package/dist/storage/sqlite-vss.d.ts +49 -0
- package/dist/storage/sqlite-vss.d.ts.map +1 -0
- package/dist/storage/sqlite-vss.js +148 -0
- package/dist/storage/sqlite-vss.js.map +1 -0
- package/dist/storage/storage.test.d.ts +2 -0
- package/dist/storage/storage.test.d.ts.map +1 -0
- package/dist/storage/storage.test.js +440 -0
- package/dist/storage/storage.test.js.map +1 -0
- package/dist/storage/types.d.ts +79 -0
- package/dist/storage/types.d.ts.map +1 -0
- package/dist/storage/types.js +49 -0
- package/dist/storage/types.js.map +1 -0
- package/package.json +82 -0
- package/src/config/index.ts +116 -0
- package/src/config/plugin.test.ts +664 -0
- package/src/config/plugin.ts +257 -0
- package/src/config/types.ts +283 -0
- package/src/fields/embedding.test.ts +408 -0
- package/src/fields/embedding.ts +150 -0
- package/src/fields/index.ts +6 -0
- package/src/index.ts +33 -0
- package/src/mcp/index.ts +21 -0
- package/src/providers/index.ts +81 -0
- package/src/providers/ollama.ts +186 -0
- package/src/providers/openai.ts +161 -0
- package/src/providers/providers.test.ts +275 -0
- package/src/providers/types.ts +100 -0
- package/src/runtime/batch.test.ts +332 -0
- package/src/runtime/batch.ts +424 -0
- package/src/runtime/chunking.test.ts +258 -0
- package/src/runtime/chunking.ts +334 -0
- package/src/runtime/embeddings.test.ts +441 -0
- package/src/runtime/embeddings.ts +380 -0
- package/src/runtime/index.ts +51 -0
- package/src/runtime/search.ts +243 -0
- package/src/storage/index.ts +86 -0
- package/src/storage/json.ts +106 -0
- package/src/storage/pgvector.ts +206 -0
- package/src/storage/sqlite-vss.ts +193 -0
- package/src/storage/storage.test.ts +521 -0
- package/src/storage/types.ts +126 -0
- package/tsconfig.json +13 -0
- package/tsconfig.tsbuildinfo +1 -0
- package/vitest.config.ts +18 -0
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
import type { VectorStorage } from './types.js'
|
|
2
|
+
import type { VectorStorageConfig } from '../config/types.js'
|
|
3
|
+
import { createJsonStorage } from './json.js'
|
|
4
|
+
import { createPgVectorStorage } from './pgvector.js'
|
|
5
|
+
import { createSqliteVssStorage } from './sqlite-vss.js'
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* Storage backend factory registry
|
|
9
|
+
* Maps storage types to factory functions
|
|
10
|
+
*/
|
|
11
|
+
const storageFactories = new Map<string, (config: VectorStorageConfig) => VectorStorage>()
|
|
12
|
+
|
|
13
|
+
/**
|
|
14
|
+
* Register the built-in storage backends
|
|
15
|
+
*/
|
|
16
|
+
storageFactories.set('json', () => createJsonStorage())
|
|
17
|
+
storageFactories.set('pgvector', (config) => {
|
|
18
|
+
if (config.type !== 'pgvector') {
|
|
19
|
+
throw new Error('Invalid config type for pgvector storage')
|
|
20
|
+
}
|
|
21
|
+
return createPgVectorStorage(config as import('../config/types.js').PgVectorStorageConfig)
|
|
22
|
+
})
|
|
23
|
+
storageFactories.set('sqlite-vss', (config) => {
|
|
24
|
+
if (config.type !== 'sqlite-vss') {
|
|
25
|
+
throw new Error('Invalid config type for sqlite-vss storage')
|
|
26
|
+
}
|
|
27
|
+
return createSqliteVssStorage(config as import('../config/types.js').SqliteVssStorageConfig)
|
|
28
|
+
})
|
|
29
|
+
|
|
30
|
+
/**
|
|
31
|
+
* Register a custom vector storage backend
|
|
32
|
+
* Use this to add support for custom storage backends (e.g., Pinecone, Qdrant)
|
|
33
|
+
*
|
|
34
|
+
* @example
|
|
35
|
+
* ```typescript
|
|
36
|
+
* import { registerVectorStorage } from '@opensaas/stack-rag/storage'
|
|
37
|
+
*
|
|
38
|
+
* registerVectorStorage('pinecone', (config) => {
|
|
39
|
+
* return new PineconeVectorStorage(config)
|
|
40
|
+
* })
|
|
41
|
+
* ```
|
|
42
|
+
*/
|
|
43
|
+
export function registerVectorStorage(
|
|
44
|
+
type: string,
|
|
45
|
+
factory: (config: VectorStorageConfig) => VectorStorage,
|
|
46
|
+
): void {
|
|
47
|
+
storageFactories.set(type, factory)
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
/**
|
|
51
|
+
* Create a vector storage instance from configuration
|
|
52
|
+
* Automatically selects the correct backend based on config.type
|
|
53
|
+
*
|
|
54
|
+
* @example
|
|
55
|
+
* ```typescript
|
|
56
|
+
* import { createVectorStorage } from '@opensaas/stack-rag/storage'
|
|
57
|
+
*
|
|
58
|
+
* const storage = createVectorStorage({
|
|
59
|
+
* type: 'pgvector',
|
|
60
|
+
* distanceFunction: 'cosine'
|
|
61
|
+
* })
|
|
62
|
+
*
|
|
63
|
+
* const results = await storage.search('Article', 'contentEmbedding', queryVector, {
|
|
64
|
+
* limit: 10,
|
|
65
|
+
* context
|
|
66
|
+
* })
|
|
67
|
+
* ```
|
|
68
|
+
*/
|
|
69
|
+
export function createVectorStorage(config: VectorStorageConfig): VectorStorage {
|
|
70
|
+
const factory = storageFactories.get(config.type)
|
|
71
|
+
|
|
72
|
+
if (!factory) {
|
|
73
|
+
throw new Error(
|
|
74
|
+
`Unknown vector storage type: ${config.type}. ` +
|
|
75
|
+
`Available backends: ${Array.from(storageFactories.keys()).join(', ')}`,
|
|
76
|
+
)
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
return factory(config)
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
// Export types and individual storage backends
|
|
83
|
+
export * from './types.js'
|
|
84
|
+
export { JsonVectorStorage, createJsonStorage } from './json.js'
|
|
85
|
+
export { PgVectorStorage, createPgVectorStorage } from './pgvector.js'
|
|
86
|
+
export { SqliteVssStorage, createSqliteVssStorage } from './sqlite-vss.js'
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
import type { VectorStorage, SearchOptions } from './types.js'
|
|
2
|
+
import type { SearchResult, StoredEmbedding } from '../config/types.js'
|
|
3
|
+
import { cosineSimilarity as calculateCosineSimilarity } from './types.js'
|
|
4
|
+
import { getDbKey } from '@opensaas/stack-core'
|
|
5
|
+
|
|
6
|
+
/**
|
|
7
|
+
* JSON-based vector storage
|
|
8
|
+
* Stores vectors as JSON in the database and performs similarity search in JavaScript
|
|
9
|
+
* Good for development and small datasets, doesn't require special database extensions
|
|
10
|
+
*/
|
|
11
|
+
export class JsonVectorStorage implements VectorStorage {
|
|
12
|
+
readonly type = 'json'
|
|
13
|
+
|
|
14
|
+
/**
|
|
15
|
+
* Search for similar vectors using JavaScript-based cosine similarity
|
|
16
|
+
*/
|
|
17
|
+
async search<T = unknown>(
|
|
18
|
+
listKey: string,
|
|
19
|
+
fieldName: string,
|
|
20
|
+
queryVector: number[],
|
|
21
|
+
options: SearchOptions,
|
|
22
|
+
): Promise<SearchResult<T>[]> {
|
|
23
|
+
const { limit = 10, minScore = 0.0, context, where = {} } = options
|
|
24
|
+
|
|
25
|
+
const dbKey = getDbKey(listKey)
|
|
26
|
+
const model = context.db[dbKey]
|
|
27
|
+
|
|
28
|
+
if (!model) {
|
|
29
|
+
throw new Error(`List '${listKey}' not found in context.db`)
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
// Fetch all items with embeddings (access control applied via context)
|
|
33
|
+
// We need to fetch all items because we're doing similarity scoring in JS
|
|
34
|
+
const items = await model.findMany({
|
|
35
|
+
where: {
|
|
36
|
+
...where,
|
|
37
|
+
[fieldName]: {
|
|
38
|
+
not: null,
|
|
39
|
+
},
|
|
40
|
+
},
|
|
41
|
+
})
|
|
42
|
+
|
|
43
|
+
// Calculate similarity for each item
|
|
44
|
+
const results: Array<{ item: T; score: number; distance: number }> = []
|
|
45
|
+
|
|
46
|
+
for (const item of items) {
|
|
47
|
+
const embeddingData = item[fieldName] as StoredEmbedding | null
|
|
48
|
+
|
|
49
|
+
if (!embeddingData || !embeddingData.vector) {
|
|
50
|
+
continue
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
const storedVector = embeddingData.vector
|
|
54
|
+
|
|
55
|
+
// Validate vector dimensions
|
|
56
|
+
if (storedVector.length !== queryVector.length) {
|
|
57
|
+
console.warn(
|
|
58
|
+
`Vector dimension mismatch for ${listKey}.${item.id}.${fieldName}: ` +
|
|
59
|
+
`expected ${queryVector.length}, got ${storedVector.length}. Skipping.`,
|
|
60
|
+
)
|
|
61
|
+
continue
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
// Calculate cosine similarity
|
|
65
|
+
const score = this.cosineSimilarity(queryVector, storedVector)
|
|
66
|
+
|
|
67
|
+
if (score >= minScore) {
|
|
68
|
+
results.push({
|
|
69
|
+
item: item as T,
|
|
70
|
+
score,
|
|
71
|
+
distance: 1 - score, // Convert similarity to distance
|
|
72
|
+
})
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
// Sort by score (descending) and limit results
|
|
77
|
+
results.sort((a, b) => b.score - a.score)
|
|
78
|
+
|
|
79
|
+
return results.slice(0, limit)
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
/**
|
|
83
|
+
* Calculate cosine similarity between two vectors
|
|
84
|
+
*/
|
|
85
|
+
cosineSimilarity(a: number[], b: number[]): number {
|
|
86
|
+
return calculateCosineSimilarity(a, b)
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
/**
|
|
91
|
+
* Create a JSON vector storage instance
|
|
92
|
+
*
|
|
93
|
+
* @example
|
|
94
|
+
* ```typescript
|
|
95
|
+
* import { createJsonStorage } from '@opensaas/stack-rag/storage'
|
|
96
|
+
*
|
|
97
|
+
* const storage = createJsonStorage()
|
|
98
|
+
* const results = await storage.search('Article', 'contentEmbedding', queryVector, {
|
|
99
|
+
* limit: 10,
|
|
100
|
+
* context
|
|
101
|
+
* })
|
|
102
|
+
* ```
|
|
103
|
+
*/
|
|
104
|
+
export function createJsonStorage(): JsonVectorStorage {
|
|
105
|
+
return new JsonVectorStorage()
|
|
106
|
+
}
|
|
@@ -0,0 +1,206 @@
|
|
|
1
|
+
import type { VectorStorage, SearchOptions } from './types.js'
|
|
2
|
+
import type { SearchResult } from '../config/types.js'
|
|
3
|
+
import type { PgVectorStorageConfig } from '../config/types.js'
|
|
4
|
+
import { cosineSimilarity as calculateCosineSimilarity } from './types.js'
|
|
5
|
+
import { getDbKey } from '@opensaas/stack-core'
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* pgvector storage backend
|
|
9
|
+
* Uses PostgreSQL with pgvector extension for efficient vector similarity search
|
|
10
|
+
* Requires: CREATE EXTENSION vector;
|
|
11
|
+
*/
|
|
12
|
+
export class PgVectorStorage implements VectorStorage {
|
|
13
|
+
readonly type = 'pgvector'
|
|
14
|
+
private distanceFunction: 'cosine' | 'l2' | 'inner_product'
|
|
15
|
+
|
|
16
|
+
constructor(config: PgVectorStorageConfig) {
|
|
17
|
+
this.distanceFunction = config.distanceFunction || 'cosine'
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
/**
|
|
21
|
+
* Get the appropriate distance operator for pgvector
|
|
22
|
+
*/
|
|
23
|
+
private getDistanceOperator(): string {
|
|
24
|
+
switch (this.distanceFunction) {
|
|
25
|
+
case 'cosine':
|
|
26
|
+
return '<=>' // Cosine distance
|
|
27
|
+
case 'l2':
|
|
28
|
+
return '<->' // L2 distance
|
|
29
|
+
case 'inner_product':
|
|
30
|
+
return '<#>' // Inner product (negative, so smaller is more similar)
|
|
31
|
+
default:
|
|
32
|
+
return '<=>' // Default to cosine
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
/**
|
|
37
|
+
* Convert distance to similarity score (0-1, higher is more similar)
|
|
38
|
+
*/
|
|
39
|
+
private distanceToScore(distance: number): number {
|
|
40
|
+
switch (this.distanceFunction) {
|
|
41
|
+
case 'cosine':
|
|
42
|
+
// Cosine distance is 1 - similarity, so similarity = 1 - distance
|
|
43
|
+
return 1 - distance
|
|
44
|
+
case 'l2':
|
|
45
|
+
// L2 distance: convert to similarity using 1 / (1 + distance)
|
|
46
|
+
return 1 / (1 + distance)
|
|
47
|
+
case 'inner_product':
|
|
48
|
+
// Inner product: larger (less negative) is more similar
|
|
49
|
+
// Convert to 0-1 range
|
|
50
|
+
return -distance
|
|
51
|
+
default:
|
|
52
|
+
return 1 - distance
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
/**
|
|
57
|
+
* Search for similar vectors using pgvector
|
|
58
|
+
*/
|
|
59
|
+
async search<T = unknown>(
|
|
60
|
+
listKey: string,
|
|
61
|
+
fieldName: string,
|
|
62
|
+
queryVector: number[],
|
|
63
|
+
options: SearchOptions,
|
|
64
|
+
): Promise<SearchResult<T>[]> {
|
|
65
|
+
const { limit = 10, minScore = 0.0, context, where = {} } = options
|
|
66
|
+
|
|
67
|
+
const dbKey = getDbKey(listKey)
|
|
68
|
+
const model = context.db[dbKey]
|
|
69
|
+
|
|
70
|
+
if (!model) {
|
|
71
|
+
throw new Error(`List '${listKey}' not found in context.db`)
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
const distanceOp = this.getDistanceOperator()
|
|
75
|
+
|
|
76
|
+
// Build the vector string for Prisma raw query
|
|
77
|
+
// pgvector expects vectors in format: '[1,2,3]'
|
|
78
|
+
const vectorString = `[${queryVector.join(',')}]`
|
|
79
|
+
|
|
80
|
+
// We need to use Prisma.$queryRaw to access pgvector operators
|
|
81
|
+
// The access-controlled context.db doesn't expose $queryRaw directly,
|
|
82
|
+
// so we need to use a two-step approach:
|
|
83
|
+
// 1. Get all matching IDs using raw query
|
|
84
|
+
// 2. Fetch full items via access-controlled context
|
|
85
|
+
|
|
86
|
+
try {
|
|
87
|
+
// Get the underlying Prisma client
|
|
88
|
+
// Note: This bypasses access control for the similarity search,
|
|
89
|
+
// but we enforce it in the second query
|
|
90
|
+
const prisma = context.prisma
|
|
91
|
+
|
|
92
|
+
if (!prisma) {
|
|
93
|
+
// Fallback: if we can't access Prisma directly, use JSON storage approach
|
|
94
|
+
console.warn(
|
|
95
|
+
'pgvector: Could not access Prisma client directly. ' +
|
|
96
|
+
'Falling back to JSON-based search. ' +
|
|
97
|
+
'For full pgvector support, ensure the context exposes _prisma.',
|
|
98
|
+
)
|
|
99
|
+
return this.fallbackSearch(listKey, fieldName, queryVector, options)
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
// Raw query to get IDs and distances
|
|
103
|
+
// We extract the vector from the JSON field and cast it to vector type
|
|
104
|
+
const tableName = listKey.toLowerCase() // Prisma table names are lowercase
|
|
105
|
+
const results = (await prisma.$queryRawUnsafe(`
|
|
106
|
+
SELECT id,
|
|
107
|
+
(("${fieldName}"->>'vector')::vector ${distanceOp} '${vectorString}'::vector) as distance
|
|
108
|
+
FROM "${tableName}"
|
|
109
|
+
WHERE "${fieldName}" IS NOT NULL
|
|
110
|
+
AND "${fieldName}"->>'vector' IS NOT NULL
|
|
111
|
+
ORDER BY distance
|
|
112
|
+
LIMIT ${limit * 2}
|
|
113
|
+
`)) as Array<{ id: string; distance: string }>
|
|
114
|
+
|
|
115
|
+
// Get IDs of items within score threshold
|
|
116
|
+
const itemIds = results
|
|
117
|
+
.map((r) => ({
|
|
118
|
+
id: r.id,
|
|
119
|
+
distance: Number(r.distance),
|
|
120
|
+
score: this.distanceToScore(Number(r.distance)),
|
|
121
|
+
}))
|
|
122
|
+
.filter((r) => r.score >= minScore)
|
|
123
|
+
.slice(0, limit)
|
|
124
|
+
.map((r) => ({ id: r.id, distance: r.distance, score: r.score }))
|
|
125
|
+
|
|
126
|
+
if (itemIds.length === 0) {
|
|
127
|
+
return []
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
// Fetch full items via access-controlled context
|
|
131
|
+
const items = await model.findMany({
|
|
132
|
+
where: {
|
|
133
|
+
...where,
|
|
134
|
+
id: {
|
|
135
|
+
in: itemIds.map((r) => r.id),
|
|
136
|
+
},
|
|
137
|
+
},
|
|
138
|
+
})
|
|
139
|
+
|
|
140
|
+
// Match items with their scores and sort by score
|
|
141
|
+
const searchResults: SearchResult<T>[] = []
|
|
142
|
+
for (const idInfo of itemIds) {
|
|
143
|
+
const item = items.find(
|
|
144
|
+
(i: Record<string, unknown>) => (i as { id: string }).id === idInfo.id,
|
|
145
|
+
)
|
|
146
|
+
if (item) {
|
|
147
|
+
searchResults.push({
|
|
148
|
+
item: item as T,
|
|
149
|
+
score: idInfo.score,
|
|
150
|
+
distance: idInfo.distance,
|
|
151
|
+
})
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
return searchResults
|
|
156
|
+
} catch (error) {
|
|
157
|
+
throw new Error(
|
|
158
|
+
`pgvector search failed: ${(error as Error).message}\n` +
|
|
159
|
+
'Ensure pgvector extension is installed: CREATE EXTENSION vector;',
|
|
160
|
+
)
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
/**
|
|
165
|
+
* Fallback to JSON-based search if we can't access Prisma directly
|
|
166
|
+
*/
|
|
167
|
+
private async fallbackSearch<T = unknown>(
|
|
168
|
+
listKey: string,
|
|
169
|
+
fieldName: string,
|
|
170
|
+
queryVector: number[],
|
|
171
|
+
options: SearchOptions,
|
|
172
|
+
): Promise<SearchResult<T>[]> {
|
|
173
|
+
const { JsonVectorStorage } = await import('./json.js')
|
|
174
|
+
const jsonStorage = new JsonVectorStorage()
|
|
175
|
+
return jsonStorage.search(listKey, fieldName, queryVector, options)
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
/**
|
|
179
|
+
* Calculate cosine similarity between two vectors
|
|
180
|
+
*/
|
|
181
|
+
cosineSimilarity(a: number[], b: number[]): number {
|
|
182
|
+
return calculateCosineSimilarity(a, b)
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
/**
|
|
187
|
+
* Create a pgvector storage instance
|
|
188
|
+
*
|
|
189
|
+
* @example
|
|
190
|
+
* ```typescript
|
|
191
|
+
* import { createPgVectorStorage } from '@opensaas/stack-rag/storage'
|
|
192
|
+
*
|
|
193
|
+
* const storage = createPgVectorStorage({
|
|
194
|
+
* type: 'pgvector',
|
|
195
|
+
* distanceFunction: 'cosine'
|
|
196
|
+
* })
|
|
197
|
+
*
|
|
198
|
+
* const results = await storage.search('Article', 'contentEmbedding', queryVector, {
|
|
199
|
+
* limit: 10,
|
|
200
|
+
* context
|
|
201
|
+
* })
|
|
202
|
+
* ```
|
|
203
|
+
*/
|
|
204
|
+
export function createPgVectorStorage(config: PgVectorStorageConfig): PgVectorStorage {
|
|
205
|
+
return new PgVectorStorage(config)
|
|
206
|
+
}
|
|
@@ -0,0 +1,193 @@
|
|
|
1
|
+
import type { VectorStorage, SearchOptions } from './types.js'
|
|
2
|
+
import type { SearchResult } from '../config/types.js'
|
|
3
|
+
import type { SqliteVssStorageConfig } from '../config/types.js'
|
|
4
|
+
import { cosineSimilarity as calculateCosineSimilarity } from './types.js'
|
|
5
|
+
import { getDbKey } from '@opensaas/stack-core'
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* SQLite VSS storage backend
|
|
9
|
+
* Uses sqlite-vss extension for vector similarity search
|
|
10
|
+
* Requires: sqlite-vss extension to be loaded
|
|
11
|
+
*/
|
|
12
|
+
export class SqliteVssStorage implements VectorStorage {
|
|
13
|
+
readonly type = 'sqlite-vss'
|
|
14
|
+
private distanceFunction: 'cosine' | 'l2'
|
|
15
|
+
|
|
16
|
+
constructor(config: SqliteVssStorageConfig) {
|
|
17
|
+
this.distanceFunction = config.distanceFunction || 'cosine'
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
/**
|
|
21
|
+
* Convert distance to similarity score (0-1, higher is more similar)
|
|
22
|
+
*/
|
|
23
|
+
private distanceToScore(distance: number): number {
|
|
24
|
+
if (this.distanceFunction === 'cosine') {
|
|
25
|
+
// Cosine distance is 1 - similarity
|
|
26
|
+
return 1 - distance
|
|
27
|
+
} else {
|
|
28
|
+
// L2 distance: convert to similarity using 1 / (1 + distance)
|
|
29
|
+
return 1 / (1 + distance)
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
/**
|
|
34
|
+
* Search for similar vectors using sqlite-vss
|
|
35
|
+
*/
|
|
36
|
+
async search<T = unknown>(
|
|
37
|
+
listKey: string,
|
|
38
|
+
fieldName: string,
|
|
39
|
+
queryVector: number[],
|
|
40
|
+
options: SearchOptions,
|
|
41
|
+
): Promise<SearchResult<T>[]> {
|
|
42
|
+
const { limit = 10, minScore = 0.0, context, where = {} } = options
|
|
43
|
+
|
|
44
|
+
const dbKey = getDbKey(listKey)
|
|
45
|
+
const model = context.db[dbKey]
|
|
46
|
+
|
|
47
|
+
if (!model) {
|
|
48
|
+
throw new Error(`List '${listKey}' not found in context.db`)
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
try {
|
|
52
|
+
// Get the underlying Prisma client
|
|
53
|
+
const prisma = context.prisma
|
|
54
|
+
|
|
55
|
+
if (!prisma) {
|
|
56
|
+
// Fallback: if we can't access Prisma directly, use JSON storage approach
|
|
57
|
+
console.warn(
|
|
58
|
+
'sqlite-vss: Could not access Prisma client directly. ' +
|
|
59
|
+
'Falling back to JSON-based search. ' +
|
|
60
|
+
'For full sqlite-vss support, ensure the context exposes _prisma.',
|
|
61
|
+
)
|
|
62
|
+
return this.fallbackSearch(listKey, fieldName, queryVector, options)
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
// Build JSON array string for the vector
|
|
66
|
+
// Note: vectorString would be used for native sqlite-vss queries
|
|
67
|
+
// Currently using fallback JS-based search
|
|
68
|
+
// const vectorString = JSON.stringify(queryVector)
|
|
69
|
+
|
|
70
|
+
// Table name (Prisma uses PascalCase in schema but lowercases in DB)
|
|
71
|
+
// Note: tableName would be used for native sqlite-vss queries
|
|
72
|
+
// const tableName = listKey
|
|
73
|
+
|
|
74
|
+
// SQLite VSS query
|
|
75
|
+
// We need to create a virtual table for VSS search
|
|
76
|
+
// For now, we'll use a simpler approach: extract vectors and compute in JS
|
|
77
|
+
// Full sqlite-vss integration would require creating virtual tables at schema generation time
|
|
78
|
+
|
|
79
|
+
// Query to get all items with embeddings
|
|
80
|
+
const items = await model.findMany({
|
|
81
|
+
where: {
|
|
82
|
+
...where,
|
|
83
|
+
[fieldName]: {
|
|
84
|
+
not: null,
|
|
85
|
+
},
|
|
86
|
+
},
|
|
87
|
+
})
|
|
88
|
+
|
|
89
|
+
// Calculate similarity for each item (JavaScript fallback)
|
|
90
|
+
const results: Array<{ item: T; score: number; distance: number }> = []
|
|
91
|
+
|
|
92
|
+
for (const item of items) {
|
|
93
|
+
const embeddingData = item[fieldName] as
|
|
94
|
+
| import('../config/types.js').StoredEmbedding
|
|
95
|
+
| null
|
|
96
|
+
| undefined
|
|
97
|
+
|
|
98
|
+
if (!embeddingData || !embeddingData.vector) {
|
|
99
|
+
continue
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
const storedVector = embeddingData.vector
|
|
103
|
+
|
|
104
|
+
// Validate vector dimensions
|
|
105
|
+
if (storedVector.length !== queryVector.length) {
|
|
106
|
+
console.warn(
|
|
107
|
+
`Vector dimension mismatch for ${listKey}.${item.id}.${fieldName}: ` +
|
|
108
|
+
`expected ${queryVector.length}, got ${storedVector.length}. Skipping.`,
|
|
109
|
+
)
|
|
110
|
+
continue
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
// Calculate similarity
|
|
114
|
+
let distance: number
|
|
115
|
+
if (this.distanceFunction === 'cosine') {
|
|
116
|
+
const similarity = this.cosineSimilarity(queryVector, storedVector)
|
|
117
|
+
distance = 1 - similarity
|
|
118
|
+
} else {
|
|
119
|
+
// L2 distance
|
|
120
|
+
distance = Math.sqrt(
|
|
121
|
+
storedVector.reduce((sum: number, val: number, i: number) => {
|
|
122
|
+
const diff = val - queryVector[i]
|
|
123
|
+
return sum + diff * diff
|
|
124
|
+
}, 0),
|
|
125
|
+
)
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
const score = this.distanceToScore(distance)
|
|
129
|
+
|
|
130
|
+
if (score >= minScore) {
|
|
131
|
+
results.push({
|
|
132
|
+
item: item as T,
|
|
133
|
+
score,
|
|
134
|
+
distance,
|
|
135
|
+
})
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
// Sort by score (descending) and limit results
|
|
140
|
+
results.sort((a, b) => b.score - a.score)
|
|
141
|
+
|
|
142
|
+
return results.slice(0, limit)
|
|
143
|
+
} catch (error) {
|
|
144
|
+
throw new Error(
|
|
145
|
+
`sqlite-vss search failed: ${(error as Error).message}\n` +
|
|
146
|
+
'Ensure sqlite-vss extension is loaded in your SQLite connection.',
|
|
147
|
+
)
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
/**
|
|
152
|
+
* Fallback to JSON-based search if we can't access Prisma directly
|
|
153
|
+
*/
|
|
154
|
+
private async fallbackSearch<T = unknown>(
|
|
155
|
+
listKey: string,
|
|
156
|
+
fieldName: string,
|
|
157
|
+
queryVector: number[],
|
|
158
|
+
options: SearchOptions,
|
|
159
|
+
): Promise<SearchResult<T>[]> {
|
|
160
|
+
const { JsonVectorStorage } = await import('./json.js')
|
|
161
|
+
const jsonStorage = new JsonVectorStorage()
|
|
162
|
+
return jsonStorage.search(listKey, fieldName, queryVector, options)
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
/**
|
|
166
|
+
* Calculate cosine similarity between two vectors
|
|
167
|
+
*/
|
|
168
|
+
cosineSimilarity(a: number[], b: number[]): number {
|
|
169
|
+
return calculateCosineSimilarity(a, b)
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
/**
|
|
174
|
+
* Create a SQLite VSS storage instance
|
|
175
|
+
*
|
|
176
|
+
* @example
|
|
177
|
+
* ```typescript
|
|
178
|
+
* import { createSqliteVssStorage } from '@opensaas/stack-rag/storage'
|
|
179
|
+
*
|
|
180
|
+
* const storage = createSqliteVssStorage({
|
|
181
|
+
* type: 'sqlite-vss',
|
|
182
|
+
* distanceFunction: 'cosine'
|
|
183
|
+
* })
|
|
184
|
+
*
|
|
185
|
+
* const results = await storage.search('Article', 'contentEmbedding', queryVector, {
|
|
186
|
+
* limit: 10,
|
|
187
|
+
* context
|
|
188
|
+
* })
|
|
189
|
+
* ```
|
|
190
|
+
*/
|
|
191
|
+
export function createSqliteVssStorage(config: SqliteVssStorageConfig): SqliteVssStorage {
|
|
192
|
+
return new SqliteVssStorage(config)
|
|
193
|
+
}
|