@opensaas/stack-rag 0.1.6 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +1 -1
- package/CHANGELOG.md +141 -0
- package/README.md +82 -6
- package/dist/config/index.d.ts.map +1 -1
- package/dist/config/index.js +9 -0
- package/dist/config/index.js.map +1 -1
- package/dist/config/plugin.d.ts.map +1 -1
- package/dist/config/plugin.js +61 -1
- package/dist/config/plugin.js.map +1 -1
- package/dist/config/plugin.test.js +70 -14
- package/dist/config/plugin.test.js.map +1 -1
- package/dist/config/types.d.ts +186 -0
- package/dist/config/types.d.ts.map +1 -1
- package/dist/fields/index.d.ts +1 -0
- package/dist/fields/index.d.ts.map +1 -1
- package/dist/fields/index.js +1 -0
- package/dist/fields/index.js.map +1 -1
- package/dist/fields/searchable.d.ts +42 -0
- package/dist/fields/searchable.d.ts.map +1 -0
- package/dist/fields/searchable.js +51 -0
- package/dist/fields/searchable.js.map +1 -0
- package/dist/fields/searchable.test.d.ts +2 -0
- package/dist/fields/searchable.test.d.ts.map +1 -0
- package/dist/fields/searchable.test.js +112 -0
- package/dist/fields/searchable.test.js.map +1 -0
- package/dist/index.d.ts +2 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/providers/openai.d.ts +2 -0
- package/dist/providers/openai.d.ts.map +1 -1
- package/dist/providers/openai.js +35 -20
- package/dist/providers/openai.js.map +1 -1
- package/dist/runtime/batch.test.js +1 -1
- package/dist/runtime/build-time.d.ts +100 -0
- package/dist/runtime/build-time.d.ts.map +1 -0
- package/dist/runtime/build-time.js +185 -0
- package/dist/runtime/build-time.js.map +1 -0
- package/dist/runtime/index.d.ts +3 -0
- package/dist/runtime/index.d.ts.map +1 -1
- package/dist/runtime/index.js +6 -0
- package/dist/runtime/index.js.map +1 -1
- package/dist/runtime/markdown.d.ts +33 -0
- package/dist/runtime/markdown.d.ts.map +1 -0
- package/dist/runtime/markdown.js +94 -0
- package/dist/runtime/markdown.js.map +1 -0
- package/dist/runtime/provider-helpers.d.ts +56 -0
- package/dist/runtime/provider-helpers.d.ts.map +1 -0
- package/dist/runtime/provider-helpers.js +95 -0
- package/dist/runtime/provider-helpers.js.map +1 -0
- package/dist/runtime/types.d.ts +29 -0
- package/dist/runtime/types.d.ts.map +1 -0
- package/dist/runtime/types.js +6 -0
- package/dist/runtime/types.js.map +1 -0
- package/dist/storage/access-filter.d.ts +30 -0
- package/dist/storage/access-filter.d.ts.map +1 -0
- package/dist/storage/access-filter.js +241 -0
- package/dist/storage/access-filter.js.map +1 -0
- package/dist/storage/index.d.ts +2 -0
- package/dist/storage/index.d.ts.map +1 -1
- package/dist/storage/index.js +3 -0
- package/dist/storage/index.js.map +1 -1
- package/dist/storage/json-file.d.ts +53 -0
- package/dist/storage/json-file.d.ts.map +1 -0
- package/dist/storage/json-file.js +124 -0
- package/dist/storage/json-file.js.map +1 -0
- package/dist/storage/pgvector.d.ts.map +1 -1
- package/dist/storage/pgvector.js +26 -11
- package/dist/storage/pgvector.js.map +1 -1
- package/dist/storage/storage.test.js +2 -0
- package/dist/storage/storage.test.js.map +1 -1
- package/dist/storage/types.d.ts +5 -0
- package/dist/storage/types.d.ts.map +1 -1
- package/dist/storage/types.js.map +1 -1
- package/package.json +6 -5
- package/src/config/index.ts +9 -0
- package/src/config/plugin.test.ts +70 -14
- package/src/config/plugin.ts +72 -2
- package/src/config/types.ts +217 -0
- package/src/fields/index.ts +2 -0
- package/src/fields/searchable.test.ts +136 -0
- package/src/fields/searchable.ts +57 -0
- package/src/index.ts +6 -0
- package/src/providers/openai.ts +37 -22
- package/src/runtime/batch.test.ts +1 -1
- package/src/runtime/build-time.ts +216 -0
- package/src/runtime/index.ts +18 -0
- package/src/runtime/markdown.ts +119 -0
- package/src/runtime/provider-helpers.ts +115 -0
- package/src/runtime/types.ts +30 -0
- package/src/storage/access-filter.ts +303 -0
- package/src/storage/index.ts +4 -0
- package/src/storage/json-file.ts +157 -0
- package/src/storage/pgvector.ts +31 -11
- package/src/storage/storage.test.ts +2 -0
- package/src/storage/types.ts +6 -0
- package/tsconfig.tsbuildinfo +1 -1
|
@@ -0,0 +1,303 @@
|
|
|
1
|
+
import type { AccessContext, PrismaFilter, AccessControl } from '@opensaas/stack-core'
|
|
2
|
+
import type { OpenSaasConfig } from '@opensaas/stack-core'
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Execute an access control function (copied from @opensaas/stack-core/access)
|
|
6
|
+
*/
|
|
7
|
+
async function checkAccess<T = Record<string, unknown>>(
|
|
8
|
+
accessControl: AccessControl<T> | undefined,
|
|
9
|
+
args: {
|
|
10
|
+
session: AccessContext['session']
|
|
11
|
+
item?: T
|
|
12
|
+
context: AccessContext
|
|
13
|
+
},
|
|
14
|
+
): Promise<boolean | PrismaFilter<T>> {
|
|
15
|
+
// No access control means deny by default
|
|
16
|
+
if (!accessControl) {
|
|
17
|
+
return false
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
// Execute the access control function
|
|
21
|
+
const result = await accessControl(args)
|
|
22
|
+
|
|
23
|
+
return result
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* Merge user filter with access control filter (copied from @opensaas/stack-core/access)
|
|
28
|
+
*/
|
|
29
|
+
function mergeFilters(
|
|
30
|
+
userFilter: PrismaFilter | undefined,
|
|
31
|
+
accessFilter: boolean | PrismaFilter,
|
|
32
|
+
): PrismaFilter | null {
|
|
33
|
+
// If access is denied, return null
|
|
34
|
+
if (accessFilter === false) {
|
|
35
|
+
return null
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
// If access is fully granted, use user filter
|
|
39
|
+
if (accessFilter === true) {
|
|
40
|
+
return userFilter || {}
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
// Merge access filter with user filter
|
|
44
|
+
if (!userFilter) {
|
|
45
|
+
return accessFilter
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
// Combine filters with AND
|
|
49
|
+
return {
|
|
50
|
+
AND: [accessFilter, userFilter],
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
/**
|
|
55
|
+
* Build access control filter for a given list and context
|
|
56
|
+
* Extracts the filter that would be applied by the access control engine
|
|
57
|
+
*
|
|
58
|
+
* @param listKey - The list name (e.g., 'Post', 'Article')
|
|
59
|
+
* @param context - The access context with session
|
|
60
|
+
* @param config - The OpenSaas configuration
|
|
61
|
+
* @returns Prisma filter object or null if access is denied
|
|
62
|
+
*/
|
|
63
|
+
export async function buildAccessControlFilter(
|
|
64
|
+
listKey: string,
|
|
65
|
+
context: AccessContext,
|
|
66
|
+
config: OpenSaasConfig,
|
|
67
|
+
): Promise<PrismaFilter | null> {
|
|
68
|
+
const listConfig = config.lists[listKey]
|
|
69
|
+
|
|
70
|
+
if (!listConfig) {
|
|
71
|
+
throw new Error(`List '${listKey}' not found in config`)
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
// Check query access control
|
|
75
|
+
const queryAccess = listConfig.access?.operation?.query
|
|
76
|
+
|
|
77
|
+
if (!queryAccess) {
|
|
78
|
+
// No access control means deny by default (following OpenSaaS Stack pattern)
|
|
79
|
+
return null
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
// Execute access control function
|
|
83
|
+
const accessResult = await checkAccess(queryAccess, {
|
|
84
|
+
session: context.session,
|
|
85
|
+
context,
|
|
86
|
+
})
|
|
87
|
+
|
|
88
|
+
// If access is denied (false), return null
|
|
89
|
+
if (accessResult === false) {
|
|
90
|
+
return null
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
// If access is fully granted (true), return empty filter
|
|
94
|
+
if (accessResult === true) {
|
|
95
|
+
return {}
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
// Otherwise, return the filter object
|
|
99
|
+
return accessResult
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
/**
|
|
103
|
+
* Merge access control filter with user-provided where clause
|
|
104
|
+
*
|
|
105
|
+
* @param accessFilter - Filter from access control
|
|
106
|
+
* @param userWhere - User-provided where clause
|
|
107
|
+
* @returns Combined filter or null if access is denied
|
|
108
|
+
*/
|
|
109
|
+
export function mergeAccessFilter(
|
|
110
|
+
accessFilter: PrismaFilter | null,
|
|
111
|
+
userWhere: Record<string, unknown> = {},
|
|
112
|
+
): PrismaFilter | null {
|
|
113
|
+
if (accessFilter === null) {
|
|
114
|
+
return null
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
return mergeFilters(userWhere, accessFilter)
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
/**
|
|
121
|
+
* Convert a Prisma filter object to SQL WHERE clause
|
|
122
|
+
* Handles common Prisma filter operators
|
|
123
|
+
*
|
|
124
|
+
* @param filter - Prisma filter object
|
|
125
|
+
* @param tableName - Table name for column references
|
|
126
|
+
* @returns SQL WHERE clause string (without "WHERE" keyword)
|
|
127
|
+
*/
|
|
128
|
+
export function prismaFilterToSQL(filter: PrismaFilter, tableName?: string): string {
|
|
129
|
+
if (!filter || Object.keys(filter).length === 0) {
|
|
130
|
+
return 'TRUE' // No filter means all records
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
const conditions: string[] = []
|
|
134
|
+
|
|
135
|
+
for (const [key, value] of Object.entries(filter)) {
|
|
136
|
+
// Handle logical operators
|
|
137
|
+
if (key === 'AND') {
|
|
138
|
+
if (!Array.isArray(value)) continue
|
|
139
|
+
const andConditions = value
|
|
140
|
+
.map((subFilter) => prismaFilterToSQL(subFilter, tableName))
|
|
141
|
+
.filter((c) => c !== 'TRUE')
|
|
142
|
+
if (andConditions.length > 0) {
|
|
143
|
+
conditions.push(`(${andConditions.join(' AND ')})`)
|
|
144
|
+
}
|
|
145
|
+
continue
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
if (key === 'OR') {
|
|
149
|
+
if (!Array.isArray(value)) continue
|
|
150
|
+
const orConditions = value
|
|
151
|
+
.map((subFilter) => prismaFilterToSQL(subFilter, tableName))
|
|
152
|
+
.filter((c) => c !== 'TRUE')
|
|
153
|
+
if (orConditions.length > 0) {
|
|
154
|
+
conditions.push(`(${orConditions.join(' OR ')})`)
|
|
155
|
+
}
|
|
156
|
+
continue
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
if (key === 'NOT') {
|
|
160
|
+
const notCondition = prismaFilterToSQL(value as PrismaFilter, tableName)
|
|
161
|
+
if (notCondition !== 'TRUE') {
|
|
162
|
+
conditions.push(`NOT (${notCondition})`)
|
|
163
|
+
}
|
|
164
|
+
continue
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
// Handle field conditions
|
|
168
|
+
const columnName = tableName ? `"${tableName}"."${key}"` : `"${key}"`
|
|
169
|
+
|
|
170
|
+
if (value === null) {
|
|
171
|
+
conditions.push(`${columnName} IS NULL`)
|
|
172
|
+
continue
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
if (typeof value !== 'object' || value === null) {
|
|
176
|
+
// Direct equality
|
|
177
|
+
const escapedValue = escapeSQLValue(value)
|
|
178
|
+
conditions.push(`${columnName} = ${escapedValue}`)
|
|
179
|
+
continue
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
// Handle nested field conditions
|
|
183
|
+
const fieldConditions: string[] = []
|
|
184
|
+
|
|
185
|
+
for (const [operator, operatorValue] of Object.entries(value)) {
|
|
186
|
+
switch (operator) {
|
|
187
|
+
case 'equals':
|
|
188
|
+
if (operatorValue === null) {
|
|
189
|
+
fieldConditions.push(`${columnName} IS NULL`)
|
|
190
|
+
} else {
|
|
191
|
+
fieldConditions.push(`${columnName} = ${escapeSQLValue(operatorValue)}`)
|
|
192
|
+
}
|
|
193
|
+
break
|
|
194
|
+
|
|
195
|
+
case 'not':
|
|
196
|
+
if (operatorValue === null) {
|
|
197
|
+
fieldConditions.push(`${columnName} IS NOT NULL`)
|
|
198
|
+
} else {
|
|
199
|
+
fieldConditions.push(`${columnName} != ${escapeSQLValue(operatorValue)}`)
|
|
200
|
+
}
|
|
201
|
+
break
|
|
202
|
+
|
|
203
|
+
case 'in':
|
|
204
|
+
if (Array.isArray(operatorValue) && operatorValue.length > 0) {
|
|
205
|
+
const values = operatorValue.map((v) => escapeSQLValue(v)).join(', ')
|
|
206
|
+
fieldConditions.push(`${columnName} IN (${values})`)
|
|
207
|
+
}
|
|
208
|
+
break
|
|
209
|
+
|
|
210
|
+
case 'notIn':
|
|
211
|
+
if (Array.isArray(operatorValue) && operatorValue.length > 0) {
|
|
212
|
+
const values = operatorValue.map((v) => escapeSQLValue(v)).join(', ')
|
|
213
|
+
fieldConditions.push(`${columnName} NOT IN (${values})`)
|
|
214
|
+
}
|
|
215
|
+
break
|
|
216
|
+
|
|
217
|
+
case 'lt':
|
|
218
|
+
fieldConditions.push(`${columnName} < ${escapeSQLValue(operatorValue)}`)
|
|
219
|
+
break
|
|
220
|
+
|
|
221
|
+
case 'lte':
|
|
222
|
+
fieldConditions.push(`${columnName} <= ${escapeSQLValue(operatorValue)}`)
|
|
223
|
+
break
|
|
224
|
+
|
|
225
|
+
case 'gt':
|
|
226
|
+
fieldConditions.push(`${columnName} > ${escapeSQLValue(operatorValue)}`)
|
|
227
|
+
break
|
|
228
|
+
|
|
229
|
+
case 'gte':
|
|
230
|
+
fieldConditions.push(`${columnName} >= ${escapeSQLValue(operatorValue)}`)
|
|
231
|
+
break
|
|
232
|
+
|
|
233
|
+
case 'contains':
|
|
234
|
+
fieldConditions.push(`${columnName} LIKE ${escapeSQLValue(`%${operatorValue}%`)}`)
|
|
235
|
+
break
|
|
236
|
+
|
|
237
|
+
case 'startsWith':
|
|
238
|
+
fieldConditions.push(`${columnName} LIKE ${escapeSQLValue(`${operatorValue}%`)}`)
|
|
239
|
+
break
|
|
240
|
+
|
|
241
|
+
case 'endsWith':
|
|
242
|
+
fieldConditions.push(`${columnName} LIKE ${escapeSQLValue(`%${operatorValue}`)}`)
|
|
243
|
+
break
|
|
244
|
+
|
|
245
|
+
case 'isNull':
|
|
246
|
+
if (operatorValue === true) {
|
|
247
|
+
fieldConditions.push(`${columnName} IS NULL`)
|
|
248
|
+
} else {
|
|
249
|
+
fieldConditions.push(`${columnName} IS NOT NULL`)
|
|
250
|
+
}
|
|
251
|
+
break
|
|
252
|
+
|
|
253
|
+
// Add more operators as needed
|
|
254
|
+
default:
|
|
255
|
+
console.warn(`Unsupported Prisma filter operator: ${operator}`)
|
|
256
|
+
}
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
if (fieldConditions.length > 0) {
|
|
260
|
+
conditions.push(fieldConditions.join(' AND '))
|
|
261
|
+
}
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
if (conditions.length === 0) {
|
|
265
|
+
return 'TRUE'
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
if (conditions.length === 1) {
|
|
269
|
+
return conditions[0]
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
return conditions.join(' AND ')
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
/**
|
|
276
|
+
* Escape SQL values to prevent SQL injection
|
|
277
|
+
* Basic escaping - in production, use parameterized queries
|
|
278
|
+
*/
|
|
279
|
+
function escapeSQLValue(value: unknown): string {
|
|
280
|
+
if (value === null) {
|
|
281
|
+
return 'NULL'
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
if (typeof value === 'string') {
|
|
285
|
+
// Escape single quotes by doubling them
|
|
286
|
+
return `'${value.replace(/'/g, "''")}'`
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
if (typeof value === 'number') {
|
|
290
|
+
return value.toString()
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
if (typeof value === 'boolean') {
|
|
294
|
+
return value ? 'TRUE' : 'FALSE'
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
if (value instanceof Date) {
|
|
298
|
+
return `'${value.toISOString()}'`
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
// Fallback for other types
|
|
302
|
+
return `'${String(value).replace(/'/g, "''")}'`
|
|
303
|
+
}
|
package/src/storage/index.ts
CHANGED
|
@@ -82,5 +82,9 @@ export function createVectorStorage(config: VectorStorageConfig): VectorStorage
|
|
|
82
82
|
// Export types and individual storage backends
|
|
83
83
|
export * from './types.js'
|
|
84
84
|
export { JsonVectorStorage, createJsonStorage } from './json.js'
|
|
85
|
+
export { JsonFileStorage, createJsonFileStorage } from './json-file.js'
|
|
85
86
|
export { PgVectorStorage, createPgVectorStorage } from './pgvector.js'
|
|
86
87
|
export { SqliteVssStorage, createSqliteVssStorage } from './sqlite-vss.js'
|
|
88
|
+
|
|
89
|
+
// Export access control utilities
|
|
90
|
+
export { buildAccessControlFilter, mergeAccessFilter, prismaFilterToSQL } from './access-filter.js'
|
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
import { readFileSync, existsSync } from 'node:fs'
|
|
2
|
+
import type { VectorStorage, SearchOptions } from './types.js'
|
|
3
|
+
import type { SearchResult, EmbeddingsIndex } from '../config/types.js'
|
|
4
|
+
import { cosineSimilarity as calculateCosineSimilarity } from './types.js'
|
|
5
|
+
|
|
6
|
+
/**
|
|
7
|
+
* JSON file-based vector storage
|
|
8
|
+
* Loads embeddings from a JSON file generated at build time
|
|
9
|
+
* Performs similarity search in JavaScript without database queries
|
|
10
|
+
* Ideal for static sites, documentation, and build-time generated embeddings
|
|
11
|
+
*/
|
|
12
|
+
export class JsonFileStorage implements VectorStorage {
|
|
13
|
+
readonly type = 'json-file'
|
|
14
|
+
|
|
15
|
+
private index: EmbeddingsIndex | null = null
|
|
16
|
+
private filePath: string
|
|
17
|
+
|
|
18
|
+
constructor(filePath: string) {
|
|
19
|
+
this.filePath = filePath
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
/**
|
|
23
|
+
* Load embeddings index from file
|
|
24
|
+
* Caches the result in memory
|
|
25
|
+
*/
|
|
26
|
+
private loadIndex(): EmbeddingsIndex {
|
|
27
|
+
if (this.index) {
|
|
28
|
+
return this.index
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
if (!existsSync(this.filePath)) {
|
|
32
|
+
throw new Error(
|
|
33
|
+
`Embeddings file not found: ${this.filePath}. Run embeddings generation first.`,
|
|
34
|
+
)
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
try {
|
|
38
|
+
const content = readFileSync(this.filePath, 'utf-8')
|
|
39
|
+
this.index = JSON.parse(content) as EmbeddingsIndex
|
|
40
|
+
return this.index
|
|
41
|
+
} catch (error) {
|
|
42
|
+
throw new Error(`Failed to load embeddings from ${this.filePath}: ${error}`)
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
/**
|
|
47
|
+
* Search for similar documents using JavaScript-based cosine similarity
|
|
48
|
+
* Note: listKey parameter is included for interface compatibility but not used
|
|
49
|
+
* since this storage is typically used for standalone content (e.g., docs)
|
|
50
|
+
*/
|
|
51
|
+
async search<T = unknown>(
|
|
52
|
+
_listKey: string,
|
|
53
|
+
_fieldName: string,
|
|
54
|
+
queryVector: number[],
|
|
55
|
+
options: SearchOptions,
|
|
56
|
+
): Promise<SearchResult<T>[]> {
|
|
57
|
+
const { limit = 10, minScore = 0.0, where = {} } = options
|
|
58
|
+
|
|
59
|
+
const index = this.loadIndex()
|
|
60
|
+
|
|
61
|
+
// Validate query vector dimensions against index config
|
|
62
|
+
if (queryVector.length !== index.config.dimensions) {
|
|
63
|
+
throw new Error(
|
|
64
|
+
`Query vector dimensions (${queryVector.length}) don't match index dimensions (${index.config.dimensions})`,
|
|
65
|
+
)
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
const results: Array<{
|
|
69
|
+
item: T
|
|
70
|
+
score: number
|
|
71
|
+
distance: number
|
|
72
|
+
documentId: string
|
|
73
|
+
chunkIndex: number
|
|
74
|
+
}> = []
|
|
75
|
+
|
|
76
|
+
// Search through all documents and chunks
|
|
77
|
+
for (const [documentId, document] of Object.entries(index.documents)) {
|
|
78
|
+
// Apply where filters if provided (simple equality check)
|
|
79
|
+
if (where && Object.keys(where).length > 0) {
|
|
80
|
+
let matches = true
|
|
81
|
+
for (const [key, value] of Object.entries(where)) {
|
|
82
|
+
if ((document as Record<string, unknown>)[key] !== value) {
|
|
83
|
+
matches = false
|
|
84
|
+
break
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
if (!matches) continue
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
// Search through each chunk
|
|
91
|
+
for (const chunk of document.chunks) {
|
|
92
|
+
const score = this.cosineSimilarity(queryVector, chunk.embedding)
|
|
93
|
+
|
|
94
|
+
if (score >= minScore) {
|
|
95
|
+
results.push({
|
|
96
|
+
item: {
|
|
97
|
+
documentId,
|
|
98
|
+
title: document.title,
|
|
99
|
+
content: chunk.text,
|
|
100
|
+
chunkIndex: chunk.metadata.chunkIndex,
|
|
101
|
+
metadata: chunk.metadata,
|
|
102
|
+
} as T,
|
|
103
|
+
score,
|
|
104
|
+
distance: 1 - score,
|
|
105
|
+
documentId,
|
|
106
|
+
chunkIndex: chunk.metadata.chunkIndex,
|
|
107
|
+
})
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
// Sort by score (descending) and limit results
|
|
113
|
+
results.sort((a, b) => b.score - a.score)
|
|
114
|
+
|
|
115
|
+
return results.slice(0, limit)
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
/**
|
|
119
|
+
* Calculate cosine similarity between two vectors
|
|
120
|
+
*/
|
|
121
|
+
cosineSimilarity(a: number[], b: number[]): number {
|
|
122
|
+
return calculateCosineSimilarity(a, b)
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
/**
|
|
126
|
+
* Get the loaded index for inspection
|
|
127
|
+
*/
|
|
128
|
+
getIndex(): EmbeddingsIndex | null {
|
|
129
|
+
return this.index
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
/**
|
|
133
|
+
* Force reload the index from disk
|
|
134
|
+
*/
|
|
135
|
+
reloadIndex(): void {
|
|
136
|
+
this.index = null
|
|
137
|
+
this.loadIndex()
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
/**
|
|
142
|
+
* Create a JSON file vector storage instance
|
|
143
|
+
*
|
|
144
|
+
* @example
|
|
145
|
+
* ```typescript
|
|
146
|
+
* import { createJsonFileStorage } from '@opensaas/stack-rag/storage'
|
|
147
|
+
*
|
|
148
|
+
* const storage = createJsonFileStorage('.embeddings/docs.json')
|
|
149
|
+
* const results = await storage.search('', '', queryVector, {
|
|
150
|
+
* limit: 10,
|
|
151
|
+
* minScore: 0.7
|
|
152
|
+
* })
|
|
153
|
+
* ```
|
|
154
|
+
*/
|
|
155
|
+
export function createJsonFileStorage(filePath: string): JsonFileStorage {
|
|
156
|
+
return new JsonFileStorage(filePath)
|
|
157
|
+
}
|
package/src/storage/pgvector.ts
CHANGED
|
@@ -3,6 +3,7 @@ import type { SearchResult } from '../config/types.js'
|
|
|
3
3
|
import type { PgVectorStorageConfig } from '../config/types.js'
|
|
4
4
|
import { cosineSimilarity as calculateCosineSimilarity } from './types.js'
|
|
5
5
|
import { getDbKey } from '@opensaas/stack-core'
|
|
6
|
+
import { buildAccessControlFilter, mergeAccessFilter, prismaFilterToSQL } from './access-filter.js'
|
|
6
7
|
|
|
7
8
|
/**
|
|
8
9
|
* pgvector storage backend
|
|
@@ -62,7 +63,7 @@ export class PgVectorStorage implements VectorStorage {
|
|
|
62
63
|
queryVector: number[],
|
|
63
64
|
options: SearchOptions,
|
|
64
65
|
): Promise<SearchResult<T>[]> {
|
|
65
|
-
const { limit = 10, minScore = 0.0, context, where = {} } = options
|
|
66
|
+
const { limit = 10, minScore = 0.0, context, where = {}, config } = options
|
|
66
67
|
|
|
67
68
|
const dbKey = getDbKey(listKey)
|
|
68
69
|
const model = context.db[dbKey]
|
|
@@ -78,15 +79,11 @@ export class PgVectorStorage implements VectorStorage {
|
|
|
78
79
|
const vectorString = `[${queryVector.join(',')}]`
|
|
79
80
|
|
|
80
81
|
// We need to use Prisma.$queryRaw to access pgvector operators
|
|
81
|
-
//
|
|
82
|
-
//
|
|
83
|
-
// 1. Get all matching IDs using raw query
|
|
84
|
-
// 2. Fetch full items via access-controlled context
|
|
82
|
+
// However, we must enforce access control in the raw query itself
|
|
83
|
+
// to ensure users only see items they have access to
|
|
85
84
|
|
|
86
85
|
try {
|
|
87
86
|
// Get the underlying Prisma client
|
|
88
|
-
// Note: This bypasses access control for the similarity search,
|
|
89
|
-
// but we enforce it in the second query
|
|
90
87
|
const prisma = context.prisma
|
|
91
88
|
|
|
92
89
|
if (!prisma) {
|
|
@@ -94,20 +91,43 @@ export class PgVectorStorage implements VectorStorage {
|
|
|
94
91
|
console.warn(
|
|
95
92
|
'pgvector: Could not access Prisma client directly. ' +
|
|
96
93
|
'Falling back to JSON-based search. ' +
|
|
97
|
-
'For full pgvector support, ensure the context exposes
|
|
94
|
+
'For full pgvector support, ensure the context exposes prisma.',
|
|
98
95
|
)
|
|
99
96
|
return this.fallbackSearch(listKey, fieldName, queryVector, options)
|
|
100
97
|
}
|
|
101
98
|
|
|
102
|
-
//
|
|
99
|
+
// Build access control filter
|
|
100
|
+
let accessFilter = null
|
|
101
|
+
if (config) {
|
|
102
|
+
accessFilter = await buildAccessControlFilter(listKey, context, config)
|
|
103
|
+
|
|
104
|
+
// If access is denied, return empty results
|
|
105
|
+
if (accessFilter === null) {
|
|
106
|
+
return []
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
// Merge access filter with user where clause
|
|
111
|
+
const combinedFilter = accessFilter ? mergeAccessFilter(accessFilter, where) : where
|
|
112
|
+
|
|
113
|
+
// If merged filter is null (access denied), return empty results
|
|
114
|
+
if (combinedFilter === null) {
|
|
115
|
+
return []
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
// Convert Prisma filter to SQL WHERE clause
|
|
119
|
+
const tableName = listKey // Prisma table names match the model name (PascalCase by default)
|
|
120
|
+
const sqlWhereClause = prismaFilterToSQL(combinedFilter, tableName)
|
|
121
|
+
|
|
122
|
+
// Raw query to get IDs and distances with access control
|
|
103
123
|
// We extract the vector from the JSON field and cast it to vector type
|
|
104
|
-
const tableName = listKey.toLowerCase() // Prisma table names are lowercase
|
|
105
124
|
const results = (await prisma.$queryRawUnsafe(`
|
|
106
125
|
SELECT id,
|
|
107
126
|
(("${fieldName}"->>'vector')::vector ${distanceOp} '${vectorString}'::vector) as distance
|
|
108
127
|
FROM "${tableName}"
|
|
109
128
|
WHERE "${fieldName}" IS NOT NULL
|
|
110
129
|
AND "${fieldName}"->>'vector' IS NOT NULL
|
|
130
|
+
AND (${sqlWhereClause})
|
|
111
131
|
ORDER BY distance
|
|
112
132
|
LIMIT ${limit * 2}
|
|
113
133
|
`)) as Array<{ id: string; distance: string }>
|
|
@@ -128,9 +148,9 @@ export class PgVectorStorage implements VectorStorage {
|
|
|
128
148
|
}
|
|
129
149
|
|
|
130
150
|
// Fetch full items via access-controlled context
|
|
151
|
+
// This applies field-level access control and resolveOutput hooks
|
|
131
152
|
const items = await model.findMany({
|
|
132
153
|
where: {
|
|
133
|
-
...where,
|
|
134
154
|
id: {
|
|
135
155
|
in: itemIds.map((r) => r.id),
|
|
136
156
|
},
|
package/src/storage/types.ts
CHANGED
|
@@ -65,6 +65,12 @@ export type SearchOptions = {
|
|
|
65
65
|
* This is merged with access control filters
|
|
66
66
|
*/
|
|
67
67
|
where?: Record<string, unknown>
|
|
68
|
+
|
|
69
|
+
/**
|
|
70
|
+
* OpenSaaS config for access control integration
|
|
71
|
+
* Required to properly enforce access control in raw SQL queries
|
|
72
|
+
*/
|
|
73
|
+
config?: import('@opensaas/stack-core').OpenSaasConfig
|
|
68
74
|
}
|
|
69
75
|
|
|
70
76
|
/**
|