@opensaas/stack-rag 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (149) hide show
  1. package/.turbo/turbo-build.log +4 -0
  2. package/CHANGELOG.md +10 -0
  3. package/CLAUDE.md +565 -0
  4. package/LICENSE +21 -0
  5. package/README.md +406 -0
  6. package/dist/config/index.d.ts +63 -0
  7. package/dist/config/index.d.ts.map +1 -0
  8. package/dist/config/index.js +94 -0
  9. package/dist/config/index.js.map +1 -0
  10. package/dist/config/plugin.d.ts +38 -0
  11. package/dist/config/plugin.d.ts.map +1 -0
  12. package/dist/config/plugin.js +215 -0
  13. package/dist/config/plugin.js.map +1 -0
  14. package/dist/config/plugin.test.d.ts +2 -0
  15. package/dist/config/plugin.test.d.ts.map +1 -0
  16. package/dist/config/plugin.test.js +554 -0
  17. package/dist/config/plugin.test.js.map +1 -0
  18. package/dist/config/types.d.ts +249 -0
  19. package/dist/config/types.d.ts.map +1 -0
  20. package/dist/config/types.js +5 -0
  21. package/dist/config/types.js.map +1 -0
  22. package/dist/fields/embedding.d.ts +85 -0
  23. package/dist/fields/embedding.d.ts.map +1 -0
  24. package/dist/fields/embedding.js +81 -0
  25. package/dist/fields/embedding.js.map +1 -0
  26. package/dist/fields/embedding.test.d.ts +2 -0
  27. package/dist/fields/embedding.test.d.ts.map +1 -0
  28. package/dist/fields/embedding.test.js +323 -0
  29. package/dist/fields/embedding.test.js.map +1 -0
  30. package/dist/fields/index.d.ts +6 -0
  31. package/dist/fields/index.d.ts.map +1 -0
  32. package/dist/fields/index.js +5 -0
  33. package/dist/fields/index.js.map +1 -0
  34. package/dist/index.d.ts +8 -0
  35. package/dist/index.d.ts.map +1 -0
  36. package/dist/index.js +9 -0
  37. package/dist/index.js.map +1 -0
  38. package/dist/mcp/index.d.ts +19 -0
  39. package/dist/mcp/index.d.ts.map +1 -0
  40. package/dist/mcp/index.js +18 -0
  41. package/dist/mcp/index.js.map +1 -0
  42. package/dist/providers/index.d.ts +38 -0
  43. package/dist/providers/index.d.ts.map +1 -0
  44. package/dist/providers/index.js +68 -0
  45. package/dist/providers/index.js.map +1 -0
  46. package/dist/providers/ollama.d.ts +49 -0
  47. package/dist/providers/ollama.d.ts.map +1 -0
  48. package/dist/providers/ollama.js +151 -0
  49. package/dist/providers/ollama.js.map +1 -0
  50. package/dist/providers/openai.d.ts +41 -0
  51. package/dist/providers/openai.d.ts.map +1 -0
  52. package/dist/providers/openai.js +126 -0
  53. package/dist/providers/openai.js.map +1 -0
  54. package/dist/providers/providers.test.d.ts +2 -0
  55. package/dist/providers/providers.test.d.ts.map +1 -0
  56. package/dist/providers/providers.test.js +224 -0
  57. package/dist/providers/providers.test.js.map +1 -0
  58. package/dist/providers/types.d.ts +88 -0
  59. package/dist/providers/types.d.ts.map +1 -0
  60. package/dist/providers/types.js +2 -0
  61. package/dist/providers/types.js.map +1 -0
  62. package/dist/runtime/batch.d.ts +183 -0
  63. package/dist/runtime/batch.d.ts.map +1 -0
  64. package/dist/runtime/batch.js +240 -0
  65. package/dist/runtime/batch.js.map +1 -0
  66. package/dist/runtime/batch.test.d.ts +2 -0
  67. package/dist/runtime/batch.test.d.ts.map +1 -0
  68. package/dist/runtime/batch.test.js +251 -0
  69. package/dist/runtime/batch.test.js.map +1 -0
  70. package/dist/runtime/chunking.d.ts +42 -0
  71. package/dist/runtime/chunking.d.ts.map +1 -0
  72. package/dist/runtime/chunking.js +264 -0
  73. package/dist/runtime/chunking.js.map +1 -0
  74. package/dist/runtime/chunking.test.d.ts +2 -0
  75. package/dist/runtime/chunking.test.d.ts.map +1 -0
  76. package/dist/runtime/chunking.test.js +212 -0
  77. package/dist/runtime/chunking.test.js.map +1 -0
  78. package/dist/runtime/embeddings.d.ts +147 -0
  79. package/dist/runtime/embeddings.d.ts.map +1 -0
  80. package/dist/runtime/embeddings.js +201 -0
  81. package/dist/runtime/embeddings.js.map +1 -0
  82. package/dist/runtime/embeddings.test.d.ts +2 -0
  83. package/dist/runtime/embeddings.test.d.ts.map +1 -0
  84. package/dist/runtime/embeddings.test.js +366 -0
  85. package/dist/runtime/embeddings.test.js.map +1 -0
  86. package/dist/runtime/index.d.ts +14 -0
  87. package/dist/runtime/index.d.ts.map +1 -0
  88. package/dist/runtime/index.js +18 -0
  89. package/dist/runtime/index.js.map +1 -0
  90. package/dist/runtime/search.d.ts +135 -0
  91. package/dist/runtime/search.d.ts.map +1 -0
  92. package/dist/runtime/search.js +101 -0
  93. package/dist/runtime/search.js.map +1 -0
  94. package/dist/storage/index.d.ts +41 -0
  95. package/dist/storage/index.d.ts.map +1 -0
  96. package/dist/storage/index.js +73 -0
  97. package/dist/storage/index.js.map +1 -0
  98. package/dist/storage/json.d.ts +34 -0
  99. package/dist/storage/json.d.ts.map +1 -0
  100. package/dist/storage/json.js +82 -0
  101. package/dist/storage/json.js.map +1 -0
  102. package/dist/storage/pgvector.d.ts +53 -0
  103. package/dist/storage/pgvector.d.ts.map +1 -0
  104. package/dist/storage/pgvector.js +168 -0
  105. package/dist/storage/pgvector.js.map +1 -0
  106. package/dist/storage/sqlite-vss.d.ts +49 -0
  107. package/dist/storage/sqlite-vss.d.ts.map +1 -0
  108. package/dist/storage/sqlite-vss.js +148 -0
  109. package/dist/storage/sqlite-vss.js.map +1 -0
  110. package/dist/storage/storage.test.d.ts +2 -0
  111. package/dist/storage/storage.test.d.ts.map +1 -0
  112. package/dist/storage/storage.test.js +440 -0
  113. package/dist/storage/storage.test.js.map +1 -0
  114. package/dist/storage/types.d.ts +79 -0
  115. package/dist/storage/types.d.ts.map +1 -0
  116. package/dist/storage/types.js +49 -0
  117. package/dist/storage/types.js.map +1 -0
  118. package/package.json +82 -0
  119. package/src/config/index.ts +116 -0
  120. package/src/config/plugin.test.ts +664 -0
  121. package/src/config/plugin.ts +257 -0
  122. package/src/config/types.ts +283 -0
  123. package/src/fields/embedding.test.ts +408 -0
  124. package/src/fields/embedding.ts +150 -0
  125. package/src/fields/index.ts +6 -0
  126. package/src/index.ts +33 -0
  127. package/src/mcp/index.ts +21 -0
  128. package/src/providers/index.ts +81 -0
  129. package/src/providers/ollama.ts +186 -0
  130. package/src/providers/openai.ts +161 -0
  131. package/src/providers/providers.test.ts +275 -0
  132. package/src/providers/types.ts +100 -0
  133. package/src/runtime/batch.test.ts +332 -0
  134. package/src/runtime/batch.ts +424 -0
  135. package/src/runtime/chunking.test.ts +258 -0
  136. package/src/runtime/chunking.ts +334 -0
  137. package/src/runtime/embeddings.test.ts +441 -0
  138. package/src/runtime/embeddings.ts +380 -0
  139. package/src/runtime/index.ts +51 -0
  140. package/src/runtime/search.ts +243 -0
  141. package/src/storage/index.ts +86 -0
  142. package/src/storage/json.ts +106 -0
  143. package/src/storage/pgvector.ts +206 -0
  144. package/src/storage/sqlite-vss.ts +193 -0
  145. package/src/storage/storage.test.ts +521 -0
  146. package/src/storage/types.ts +126 -0
  147. package/tsconfig.json +13 -0
  148. package/tsconfig.tsbuildinfo +1 -0
  149. package/vitest.config.ts +18 -0
@@ -0,0 +1,380 @@
1
+ /**
2
+ * High-level embedding generation utilities
3
+ */
4
+
5
+ import type { EmbeddingProvider } from '../providers/types.js'
6
+ import type { StoredEmbedding, EmbeddingMetadata } from '../config/types.js'
7
+ import { chunkText, type ChunkingOptions, type TextChunk } from './chunking.js'
8
+ import { createHash } from 'node:crypto'
9
+
10
+ export interface GenerateEmbeddingOptions {
11
+ /**
12
+ * Embedding provider to use
13
+ */
14
+ provider: EmbeddingProvider
15
+
16
+ /**
17
+ * Text to embed
18
+ */
19
+ text: string
20
+
21
+ /**
22
+ * Whether to enable text chunking for long documents
23
+ * @default false
24
+ */
25
+ enableChunking?: boolean
26
+
27
+ /**
28
+ * Chunking configuration (only used if enableChunking is true)
29
+ */
30
+ chunking?: ChunkingOptions
31
+
32
+ /**
33
+ * Whether to include source hash in metadata for change detection
34
+ * @default true
35
+ */
36
+ includeSourceHash?: boolean
37
+
38
+ /**
39
+ * Additional metadata to include
40
+ */
41
+ metadata?: Record<string, unknown>
42
+ }
43
+
44
+ export interface ChunkedEmbedding {
45
+ /**
46
+ * The chunk information
47
+ */
48
+ chunk: TextChunk
49
+
50
+ /**
51
+ * The stored embedding for this chunk
52
+ */
53
+ embedding: StoredEmbedding
54
+ }
55
+
56
+ /**
57
+ * Generate embedding for text with automatic chunking support
58
+ *
59
+ * For single embeddings (no chunking), returns a StoredEmbedding.
60
+ * For chunked text, returns an array of ChunkedEmbeddings.
61
+ *
62
+ * @example
63
+ * ```typescript
64
+ * // Simple embedding
65
+ * const embedding = await generateEmbedding({
66
+ * provider: createEmbeddingProvider({ type: 'openai', apiKey: '...' }),
67
+ * text: 'Hello world',
68
+ * })
69
+ *
70
+ * // Chunked embedding for long text
71
+ * const chunks = await generateEmbedding({
72
+ * provider: createEmbeddingProvider({ type: 'openai', apiKey: '...' }),
73
+ * text: longDocument,
74
+ * enableChunking: true,
75
+ * chunking: { chunkSize: 1000, chunkOverlap: 200 },
76
+ * })
77
+ * ```
78
+ */
79
+ // Overload signatures
80
+
81
+ export function generateEmbedding(
82
+ options: GenerateEmbeddingOptions & { enableChunking: true },
83
+ ): Promise<ChunkedEmbedding[]>
84
+ // eslint-disable-next-line no-redeclare
85
+ export function generateEmbedding(
86
+ options: GenerateEmbeddingOptions & { enableChunking?: false },
87
+ ): Promise<StoredEmbedding>
88
+ // eslint-disable-next-line no-redeclare
89
+ export function generateEmbedding(
90
+ options: GenerateEmbeddingOptions,
91
+ ): Promise<StoredEmbedding | ChunkedEmbedding[]>
92
+ // Implementation
93
+ // eslint-disable-next-line no-redeclare
94
+ export async function generateEmbedding(
95
+ options: GenerateEmbeddingOptions,
96
+ ): Promise<StoredEmbedding | ChunkedEmbedding[]> {
97
+ const {
98
+ provider,
99
+ text,
100
+ enableChunking = false,
101
+ chunking,
102
+ includeSourceHash = true,
103
+ metadata: additionalMetadata,
104
+ } = options
105
+
106
+ const sourceHash = includeSourceHash ? hashText(text) : undefined
107
+
108
+ // Generate base metadata
109
+ const baseMetadata: EmbeddingMetadata = {
110
+ model: provider.model,
111
+ provider: provider.type,
112
+ dimensions: provider.dimensions,
113
+ generatedAt: new Date().toISOString(),
114
+ sourceHash,
115
+ }
116
+
117
+ // Without chunking, generate single embedding
118
+ if (!enableChunking) {
119
+ const vector = await provider.embed(text)
120
+
121
+ return {
122
+ vector,
123
+ metadata: {
124
+ ...baseMetadata,
125
+ ...additionalMetadata,
126
+ },
127
+ }
128
+ }
129
+
130
+ // With chunking, split text and generate embeddings for each chunk
131
+ const chunks = chunkText(text, chunking)
132
+
133
+ // Extract chunk texts
134
+ const chunkTexts = chunks.map((c) => c.text)
135
+
136
+ // Generate embeddings for all chunks in batch
137
+ const vectors = await provider.embedBatch(chunkTexts)
138
+
139
+ // Combine chunks with their embeddings
140
+ const chunkedEmbeddings: ChunkedEmbedding[] = chunks.map((chunk, index) => ({
141
+ chunk,
142
+ embedding: {
143
+ vector: vectors[index],
144
+ metadata: {
145
+ ...baseMetadata,
146
+ ...additionalMetadata,
147
+ chunkIndex: index,
148
+ chunkStart: chunk.start,
149
+ chunkEnd: chunk.end,
150
+ },
151
+ },
152
+ }))
153
+
154
+ return chunkedEmbeddings
155
+ }
156
+
157
+ export interface GenerateEmbeddingsOptions {
158
+ /**
159
+ * Embedding provider to use
160
+ */
161
+ provider: EmbeddingProvider
162
+
163
+ /**
164
+ * Array of texts to embed
165
+ */
166
+ texts: string[]
167
+
168
+ /**
169
+ * Whether to include source hash in metadata for change detection
170
+ * @default true
171
+ */
172
+ includeSourceHash?: boolean
173
+
174
+ /**
175
+ * Additional metadata to include for all embeddings
176
+ */
177
+ metadata?: Record<string, unknown>
178
+
179
+ /**
180
+ * Batch size for embedding generation
181
+ * @default 10
182
+ */
183
+ batchSize?: number
184
+ }
185
+
186
+ /**
187
+ * Generate embeddings for multiple texts in batches
188
+ *
189
+ * More efficient than calling generateEmbedding() multiple times.
190
+ * Automatically batches requests to respect API limits.
191
+ *
192
+ * @example
193
+ * ```typescript
194
+ * const embeddings = await generateEmbeddings({
195
+ * provider: createEmbeddingProvider({ type: 'openai', apiKey: '...' }),
196
+ * texts: ['text 1', 'text 2', 'text 3'],
197
+ * batchSize: 10,
198
+ * })
199
+ * ```
200
+ */
201
+ export async function generateEmbeddings(
202
+ options: GenerateEmbeddingsOptions,
203
+ ): Promise<StoredEmbedding[]> {
204
+ const {
205
+ provider,
206
+ texts,
207
+ includeSourceHash = true,
208
+ metadata: additionalMetadata,
209
+ batchSize = 10,
210
+ } = options
211
+
212
+ const baseMetadata: Omit<EmbeddingMetadata, 'sourceHash'> = {
213
+ model: provider.model,
214
+ provider: provider.type,
215
+ dimensions: provider.dimensions,
216
+ generatedAt: new Date().toISOString(),
217
+ }
218
+
219
+ const embeddings: StoredEmbedding[] = []
220
+
221
+ // Process in batches
222
+ for (let i = 0; i < texts.length; i += batchSize) {
223
+ const batch = texts.slice(i, i + batchSize)
224
+
225
+ // Generate embeddings for batch
226
+ const vectors = await provider.embedBatch(batch)
227
+
228
+ // Create StoredEmbedding objects
229
+ for (let j = 0; j < batch.length; j++) {
230
+ const text = batch[j]
231
+ const vector = vectors[j]
232
+ const sourceHash = includeSourceHash ? hashText(text) : undefined
233
+
234
+ embeddings.push({
235
+ vector,
236
+ metadata: {
237
+ ...baseMetadata,
238
+ sourceHash,
239
+ ...additionalMetadata,
240
+ },
241
+ })
242
+ }
243
+ }
244
+
245
+ return embeddings
246
+ }
247
+
248
+ /**
249
+ * Check if an embedding needs regeneration based on source text changes
250
+ *
251
+ * @param sourceText - Current source text
252
+ * @param currentEmbedding - Existing embedding (if any)
253
+ * @returns true if embedding needs regeneration
254
+ */
255
+ export function shouldRegenerateEmbedding(
256
+ sourceText: string,
257
+ currentEmbedding: StoredEmbedding | null | undefined,
258
+ ): boolean {
259
+ // No existing embedding, needs generation
260
+ if (!currentEmbedding) {
261
+ return true
262
+ }
263
+
264
+ // No source hash in metadata, can't detect changes
265
+ if (!currentEmbedding.metadata.sourceHash) {
266
+ return false // Conservative: don't regenerate if we can't tell
267
+ }
268
+
269
+ // Compare source hash
270
+ const currentHash = hashText(sourceText)
271
+ return currentHash !== currentEmbedding.metadata.sourceHash
272
+ }
273
+
274
+ /**
275
+ * Hash text for change detection
276
+ * Uses SHA-256 for consistent hashing
277
+ */
278
+ export function hashText(text: string): string {
279
+ return createHash('sha256').update(text).digest('hex')
280
+ }
281
+
282
+ /**
283
+ * Validate that embedding dimensions match expected dimensions
284
+ *
285
+ * @param embedding - The embedding to validate
286
+ * @param expectedDimensions - Expected number of dimensions
287
+ * @throws Error if dimensions don't match
288
+ */
289
+ export function validateEmbeddingDimensions(
290
+ embedding: StoredEmbedding,
291
+ expectedDimensions: number,
292
+ ): void {
293
+ const actualDimensions = embedding.vector.length
294
+
295
+ if (actualDimensions !== expectedDimensions) {
296
+ throw new Error(
297
+ `Embedding dimension mismatch: expected ${expectedDimensions}, got ${actualDimensions}. ` +
298
+ `Provider: ${embedding.metadata.provider}, Model: ${embedding.metadata.model}`,
299
+ )
300
+ }
301
+
302
+ if (embedding.metadata.dimensions !== actualDimensions) {
303
+ throw new Error(
304
+ `Embedding metadata dimension mismatch: metadata says ${embedding.metadata.dimensions}, ` +
305
+ `but vector has ${actualDimensions} dimensions`,
306
+ )
307
+ }
308
+ }
309
+
310
+ /**
311
+ * Merge multiple embeddings into a single embedding
312
+ * Uses average pooling by default
313
+ *
314
+ * Useful for combining chunk embeddings into a single document embedding.
315
+ *
316
+ * @param embeddings - Array of embeddings to merge
317
+ * @param method - Merge method ('average' or 'max')
318
+ * @returns Merged embedding
319
+ */
320
+ export function mergeEmbeddings(
321
+ embeddings: StoredEmbedding[],
322
+ method: 'average' | 'max' = 'average',
323
+ ): StoredEmbedding {
324
+ if (embeddings.length === 0) {
325
+ throw new Error('Cannot merge empty array of embeddings')
326
+ }
327
+
328
+ if (embeddings.length === 1) {
329
+ return embeddings[0]
330
+ }
331
+
332
+ // Validate all embeddings have same dimensions
333
+ const dimensions = embeddings[0].vector.length
334
+ for (const emb of embeddings) {
335
+ if (emb.vector.length !== dimensions) {
336
+ throw new Error(
337
+ `Cannot merge embeddings with different dimensions: ${dimensions} vs ${emb.vector.length}`,
338
+ )
339
+ }
340
+ }
341
+
342
+ let mergedVector: number[]
343
+
344
+ if (method === 'average') {
345
+ // Average pooling
346
+ mergedVector = new Array(dimensions).fill(0)
347
+
348
+ for (const emb of embeddings) {
349
+ for (let i = 0; i < dimensions; i++) {
350
+ mergedVector[i] += emb.vector[i]
351
+ }
352
+ }
353
+
354
+ for (let i = 0; i < dimensions; i++) {
355
+ mergedVector[i] /= embeddings.length
356
+ }
357
+ } else {
358
+ // Max pooling
359
+ mergedVector = new Array(dimensions).fill(-Infinity)
360
+
361
+ for (const emb of embeddings) {
362
+ for (let i = 0; i < dimensions; i++) {
363
+ mergedVector[i] = Math.max(mergedVector[i], emb.vector[i])
364
+ }
365
+ }
366
+ }
367
+
368
+ // Merge metadata (use first embedding's metadata)
369
+ const firstMetadata = embeddings[0].metadata
370
+
371
+ return {
372
+ vector: mergedVector,
373
+ metadata: {
374
+ ...firstMetadata,
375
+ generatedAt: new Date().toISOString(),
376
+ mergedFrom: embeddings.length,
377
+ mergeMethod: method,
378
+ } as EmbeddingMetadata,
379
+ }
380
+ }
@@ -0,0 +1,51 @@
1
+ /**
2
+ * Runtime utilities for RAG operations
3
+ *
4
+ * This module provides high-level APIs for:
5
+ * - Text chunking
6
+ * - Embedding generation
7
+ * - Semantic search
8
+ * - Batch processing with rate limiting
9
+ */
10
+
11
+ // Text chunking
12
+ export {
13
+ chunkText,
14
+ estimateTokenCount,
15
+ mergeSmallChunks,
16
+ type ChunkingStrategy,
17
+ type ChunkingOptions,
18
+ type TextChunk,
19
+ } from './chunking.js'
20
+
21
+ // Embedding generation
22
+ export {
23
+ generateEmbedding,
24
+ generateEmbeddings,
25
+ shouldRegenerateEmbedding,
26
+ hashText,
27
+ validateEmbeddingDimensions,
28
+ mergeEmbeddings,
29
+ type GenerateEmbeddingOptions,
30
+ type GenerateEmbeddingsOptions,
31
+ type ChunkedEmbedding,
32
+ } from './embeddings.js'
33
+
34
+ // Semantic search
35
+ export {
36
+ semanticSearch,
37
+ findSimilar,
38
+ type SemanticSearchOptions,
39
+ type FindSimilarOptions,
40
+ } from './search.js'
41
+
42
+ // Batch processing
43
+ export {
44
+ batchProcess,
45
+ RateLimiter,
46
+ ProcessingQueue,
47
+ type BatchProcessOptions,
48
+ type BatchProgress,
49
+ type BatchError,
50
+ type BatchProcessResult,
51
+ } from './batch.js'
@@ -0,0 +1,243 @@
1
+ /**
2
+ * High-level semantic search APIs
3
+ */
4
+
5
+ import type { AccessContext } from '@opensaas/stack-core'
6
+ import type { SearchResult } from '../config/types.js'
7
+ import type { EmbeddingProvider } from '../providers/types.js'
8
+ import type { VectorStorage } from '../storage/types.js'
9
+
10
+ export interface SemanticSearchOptions {
11
+ /**
12
+ * List key to search (e.g., 'Article', 'Post')
13
+ */
14
+ listKey: string
15
+
16
+ /**
17
+ * Field name containing embeddings
18
+ */
19
+ fieldName: string
20
+
21
+ /**
22
+ * Natural language query text
23
+ */
24
+ query: string
25
+
26
+ /**
27
+ * Embedding provider to use for query embedding
28
+ */
29
+ provider: EmbeddingProvider
30
+
31
+ /**
32
+ * Vector storage backend to use for search
33
+ */
34
+ storage: VectorStorage
35
+
36
+ /**
37
+ * Access context for enforcing access control
38
+ */
39
+ context: AccessContext
40
+
41
+ /**
42
+ * Maximum number of results to return
43
+ * @default 10
44
+ */
45
+ limit?: number
46
+
47
+ /**
48
+ * Minimum similarity score (0-1)
49
+ * @default 0.0
50
+ */
51
+ minScore?: number
52
+
53
+ /**
54
+ * Additional Prisma where clause to filter results
55
+ */
56
+ where?: Record<string, unknown>
57
+ }
58
+
59
+ /**
60
+ * Perform semantic search using natural language query
61
+ *
62
+ * This is a high-level API that:
63
+ * 1. Generates embedding for the query text
64
+ * 2. Searches for similar vectors in the database
65
+ * 3. Enforces access control
66
+ *
67
+ * @example
68
+ * ```typescript
69
+ * const results = await semanticSearch({
70
+ * listKey: 'Article',
71
+ * fieldName: 'contentEmbedding',
72
+ * query: 'articles about machine learning',
73
+ * provider: createEmbeddingProvider({ type: 'openai', apiKey: '...' }),
74
+ * storage: createVectorStorage({ type: 'pgvector' }),
75
+ * context: await getContext(),
76
+ * limit: 10,
77
+ * minScore: 0.7,
78
+ * })
79
+ * ```
80
+ */
81
+ export async function semanticSearch<T = unknown>(
82
+ options: SemanticSearchOptions,
83
+ ): Promise<SearchResult<T>[]> {
84
+ const {
85
+ listKey,
86
+ fieldName,
87
+ query,
88
+ provider,
89
+ storage,
90
+ context,
91
+ limit = 10,
92
+ minScore = 0.0,
93
+ where,
94
+ } = options
95
+
96
+ // Generate embedding for query
97
+ const queryVector = await provider.embed(query)
98
+
99
+ // Search for similar vectors
100
+ const results = await storage.search<T>(listKey, fieldName, queryVector, {
101
+ limit,
102
+ minScore,
103
+ context,
104
+ where,
105
+ })
106
+
107
+ return results
108
+ }
109
+
110
+ export interface FindSimilarOptions {
111
+ /**
112
+ * List key to search (e.g., 'Article', 'Post')
113
+ */
114
+ listKey: string
115
+
116
+ /**
117
+ * Field name containing embeddings
118
+ */
119
+ fieldName: string
120
+
121
+ /**
122
+ * ID of the item to find similar items for
123
+ */
124
+ itemId: string
125
+
126
+ /**
127
+ * Vector storage backend to use for search
128
+ */
129
+ storage: VectorStorage
130
+
131
+ /**
132
+ * Access context for enforcing access control
133
+ */
134
+ context: AccessContext
135
+
136
+ /**
137
+ * Maximum number of results to return
138
+ * @default 10
139
+ */
140
+ limit?: number
141
+
142
+ /**
143
+ * Minimum similarity score (0-1)
144
+ * @default 0.0
145
+ */
146
+ minScore?: number
147
+
148
+ /**
149
+ * Whether to exclude the source item from results
150
+ * @default true
151
+ */
152
+ excludeSelf?: boolean
153
+
154
+ /**
155
+ * Additional Prisma where clause to filter results
156
+ */
157
+ where?: Record<string, unknown>
158
+ }
159
+
160
+ /**
161
+ * Find items similar to a given item by ID
162
+ *
163
+ * This is a high-level API that:
164
+ * 1. Fetches the embedding of the source item
165
+ * 2. Searches for similar vectors in the database
166
+ * 3. Enforces access control
167
+ * 4. Optionally excludes the source item from results
168
+ *
169
+ * @example
170
+ * ```typescript
171
+ * const similar = await findSimilar({
172
+ * listKey: 'Article',
173
+ * fieldName: 'contentEmbedding',
174
+ * itemId: 'article-123',
175
+ * storage: createVectorStorage({ type: 'pgvector' }),
176
+ * context: await getContext(),
177
+ * limit: 5,
178
+ * excludeSelf: true,
179
+ * })
180
+ * ```
181
+ */
182
+ export async function findSimilar<T = unknown>(
183
+ options: FindSimilarOptions,
184
+ ): Promise<SearchResult<T>[]> {
185
+ const {
186
+ listKey,
187
+ fieldName,
188
+ itemId,
189
+ storage,
190
+ context,
191
+ limit = 10,
192
+ minScore = 0.0,
193
+ excludeSelf = true,
194
+ where = {},
195
+ } = options
196
+
197
+ // Fetch the source item's embedding
198
+ // We need to access the database through the context
199
+ const dbKey = getDbKey(listKey)
200
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
201
+ const model = (context.db as any)[dbKey]
202
+
203
+ if (!model) {
204
+ throw new Error(`List "${listKey}" not found in database`)
205
+ }
206
+
207
+ const item = await model.findUnique({
208
+ where: { id: itemId },
209
+ select: { [fieldName]: true },
210
+ })
211
+
212
+ if (!item) {
213
+ throw new Error(`Item with id "${itemId}" not found in list "${listKey}"`)
214
+ }
215
+
216
+ const embedding = item[fieldName]
217
+ if (!embedding || !embedding.vector) {
218
+ throw new Error(`Item "${itemId}" does not have an embedding in field "${fieldName}"`)
219
+ }
220
+
221
+ const queryVector = embedding.vector
222
+
223
+ // Build where clause
224
+ const searchWhere = excludeSelf ? { ...where, id: { not: itemId } } : where
225
+
226
+ // Search for similar vectors
227
+ const results = await storage.search<T>(listKey, fieldName, queryVector, {
228
+ limit,
229
+ minScore,
230
+ context,
231
+ where: searchWhere,
232
+ })
233
+
234
+ return results
235
+ }
236
+
237
+ /**
238
+ * Convert list key (PascalCase) to database key (camelCase)
239
+ * Same logic as in core package
240
+ */
241
+ function getDbKey(listKey: string): string {
242
+ return listKey.charAt(0).toLowerCase() + listKey.slice(1)
243
+ }