@convex-dev/rag 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +201 -0
- package/README.md +371 -0
- package/dist/client/_generated/_ignore.d.ts +1 -0
- package/dist/client/_generated/_ignore.d.ts.map +1 -0
- package/dist/client/_generated/_ignore.js +3 -0
- package/dist/client/_generated/_ignore.js.map +1 -0
- package/dist/client/defaultChunker.d.ts +15 -0
- package/dist/client/defaultChunker.d.ts.map +1 -0
- package/dist/client/defaultChunker.js +148 -0
- package/dist/client/defaultChunker.js.map +1 -0
- package/dist/client/fileUtils.d.ts +24 -0
- package/dist/client/fileUtils.d.ts.map +1 -0
- package/dist/client/fileUtils.js +179 -0
- package/dist/client/fileUtils.js.map +1 -0
- package/dist/client/index.d.ts +442 -0
- package/dist/client/index.d.ts.map +1 -0
- package/dist/client/index.js +597 -0
- package/dist/client/index.js.map +1 -0
- package/dist/client/types.d.ts +29 -0
- package/dist/client/types.d.ts.map +1 -0
- package/dist/client/types.js +2 -0
- package/dist/client/types.js.map +1 -0
- package/dist/component/_generated/api.d.ts +439 -0
- package/dist/component/_generated/api.d.ts.map +1 -0
- package/dist/component/_generated/api.js +22 -0
- package/dist/component/_generated/api.js.map +1 -0
- package/dist/component/_generated/dataModel.d.ts +60 -0
- package/dist/component/_generated/server.d.ts +149 -0
- package/dist/component/_generated/server.d.ts.map +1 -0
- package/dist/component/_generated/server.js +74 -0
- package/dist/component/_generated/server.js.map +1 -0
- package/dist/component/chunks.d.ts +139 -0
- package/dist/component/chunks.d.ts.map +1 -0
- package/dist/component/chunks.js +413 -0
- package/dist/component/chunks.js.map +1 -0
- package/dist/component/convex.config.d.ts +3 -0
- package/dist/component/convex.config.d.ts.map +1 -0
- package/dist/component/convex.config.js +6 -0
- package/dist/component/convex.config.js.map +1 -0
- package/dist/component/embeddings/importance.d.ts +21 -0
- package/dist/component/embeddings/importance.d.ts.map +1 -0
- package/dist/component/embeddings/importance.js +67 -0
- package/dist/component/embeddings/importance.js.map +1 -0
- package/dist/component/embeddings/index.d.ts +23 -0
- package/dist/component/embeddings/index.d.ts.map +1 -0
- package/dist/component/embeddings/index.js +54 -0
- package/dist/component/embeddings/index.js.map +1 -0
- package/dist/component/embeddings/tables.d.ts +39 -0
- package/dist/component/embeddings/tables.d.ts.map +1 -0
- package/dist/component/embeddings/tables.js +53 -0
- package/dist/component/embeddings/tables.js.map +1 -0
- package/dist/component/entries.d.ts +167 -0
- package/dist/component/entries.d.ts.map +1 -0
- package/dist/component/entries.js +409 -0
- package/dist/component/entries.js.map +1 -0
- package/dist/component/filters.d.ts +46 -0
- package/dist/component/filters.d.ts.map +1 -0
- package/dist/component/filters.js +72 -0
- package/dist/component/filters.js.map +1 -0
- package/dist/component/namespaces.d.ts +131 -0
- package/dist/component/namespaces.d.ts.map +1 -0
- package/dist/component/namespaces.js +222 -0
- package/dist/component/namespaces.js.map +1 -0
- package/dist/component/schema.d.ts +1697 -0
- package/dist/component/schema.d.ts.map +1 -0
- package/dist/component/schema.js +88 -0
- package/dist/component/schema.js.map +1 -0
- package/dist/component/search.d.ts +20 -0
- package/dist/component/search.d.ts.map +1 -0
- package/dist/component/search.js +69 -0
- package/dist/component/search.js.map +1 -0
- package/dist/package.json +3 -0
- package/dist/react/index.d.ts +2 -0
- package/dist/react/index.d.ts.map +1 -0
- package/dist/react/index.js +6 -0
- package/dist/react/index.js.map +1 -0
- package/dist/shared.d.ts +479 -0
- package/dist/shared.d.ts.map +1 -0
- package/dist/shared.js +98 -0
- package/dist/shared.js.map +1 -0
- package/package.json +97 -0
- package/src/client/_generated/_ignore.ts +1 -0
- package/src/client/defaultChunker.test.ts +243 -0
- package/src/client/defaultChunker.ts +183 -0
- package/src/client/fileUtils.ts +179 -0
- package/src/client/index.test.ts +475 -0
- package/src/client/index.ts +1125 -0
- package/src/client/setup.test.ts +28 -0
- package/src/client/types.ts +69 -0
- package/src/component/_generated/api.d.ts +439 -0
- package/src/component/_generated/api.js +23 -0
- package/src/component/_generated/dataModel.d.ts +60 -0
- package/src/component/_generated/server.d.ts +149 -0
- package/src/component/_generated/server.js +90 -0
- package/src/component/chunks.test.ts +915 -0
- package/src/component/chunks.ts +555 -0
- package/src/component/convex.config.ts +7 -0
- package/src/component/embeddings/importance.test.ts +249 -0
- package/src/component/embeddings/importance.ts +75 -0
- package/src/component/embeddings/index.test.ts +482 -0
- package/src/component/embeddings/index.ts +99 -0
- package/src/component/embeddings/tables.ts +114 -0
- package/src/component/entries.test.ts +341 -0
- package/src/component/entries.ts +546 -0
- package/src/component/filters.ts +119 -0
- package/src/component/namespaces.ts +299 -0
- package/src/component/schema.ts +106 -0
- package/src/component/search.test.ts +445 -0
- package/src/component/search.ts +97 -0
- package/src/component/setup.test.ts +5 -0
- package/src/react/index.ts +7 -0
- package/src/shared.ts +247 -0
- package/src/vitest.config.ts +7 -0
|
@@ -0,0 +1,445 @@
|
|
|
1
|
+
/// <reference types="vite/client" />
|
|
2
|
+
|
|
3
|
+
import { describe, expect, test } from "vitest";
|
|
4
|
+
import { convexTest, type TestConvex } from "convex-test";
|
|
5
|
+
import schema from "./schema.js";
|
|
6
|
+
import { api } from "./_generated/api.js";
|
|
7
|
+
import { modules } from "./setup.test.js";
|
|
8
|
+
import { insertChunks } from "./chunks.js";
|
|
9
|
+
import type { Id } from "./_generated/dataModel.js";
|
|
10
|
+
import type { Value } from "convex/values";
|
|
11
|
+
|
|
12
|
+
type ConvexTest = TestConvex<typeof schema>;
|
|
13
|
+
|
|
14
|
+
describe("search", () => {
|
|
15
|
+
async function setupTestNamespace(
|
|
16
|
+
t: ConvexTest,
|
|
17
|
+
namespace = "test-namespace",
|
|
18
|
+
dimension = 128,
|
|
19
|
+
filterNames: string[] = []
|
|
20
|
+
) {
|
|
21
|
+
return await t.run(async (ctx) => {
|
|
22
|
+
return ctx.db.insert("namespaces", {
|
|
23
|
+
namespace,
|
|
24
|
+
version: 1,
|
|
25
|
+
modelId: "test-model",
|
|
26
|
+
dimension,
|
|
27
|
+
filterNames,
|
|
28
|
+
status: { kind: "ready" },
|
|
29
|
+
});
|
|
30
|
+
});
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
async function setupTestEntry(
|
|
34
|
+
t: ConvexTest,
|
|
35
|
+
namespaceId: Id<"namespaces">,
|
|
36
|
+
key = "test-entry",
|
|
37
|
+
version = 0,
|
|
38
|
+
filterValues: Array<{ name: string; value: Value }> = []
|
|
39
|
+
) {
|
|
40
|
+
return await t.run(async (ctx) => {
|
|
41
|
+
return ctx.db.insert("entries", {
|
|
42
|
+
namespaceId,
|
|
43
|
+
key,
|
|
44
|
+
version,
|
|
45
|
+
status: { kind: "ready" },
|
|
46
|
+
contentHash: `test-content-hash-${key}-${version}`,
|
|
47
|
+
importance: 0.5,
|
|
48
|
+
filterValues,
|
|
49
|
+
});
|
|
50
|
+
});
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
function createTestChunks(count = 3, baseEmbedding = 0.1) {
|
|
54
|
+
return Array.from({ length: count }, (_, i) => ({
|
|
55
|
+
content: {
|
|
56
|
+
text: `Test chunk content ${i + 1}`,
|
|
57
|
+
metadata: { index: i },
|
|
58
|
+
},
|
|
59
|
+
embedding: [...Array(127).fill(0.01), baseEmbedding + i * 0.01],
|
|
60
|
+
}));
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
test("if a namespace doesn't exist yet, returns nothing", async () => {
|
|
64
|
+
const t = convexTest(schema, modules);
|
|
65
|
+
|
|
66
|
+
// Search in a non-existent namespace
|
|
67
|
+
const result = await t.action(api.search.search, {
|
|
68
|
+
namespace: "non-existent-namespace",
|
|
69
|
+
embedding: Array(128).fill(0.1),
|
|
70
|
+
modelId: "test-model",
|
|
71
|
+
filters: [],
|
|
72
|
+
limit: 10,
|
|
73
|
+
});
|
|
74
|
+
|
|
75
|
+
expect(result.results).toHaveLength(0);
|
|
76
|
+
expect(result.entries).toHaveLength(0);
|
|
77
|
+
});
|
|
78
|
+
|
|
79
|
+
test("if a namespace exists and is compatible, it finds the correct embedding for a query", async () => {
|
|
80
|
+
const t = convexTest(schema, modules);
|
|
81
|
+
const namespaceId = await setupTestNamespace(t);
|
|
82
|
+
const entryId = await setupTestEntry(t, namespaceId);
|
|
83
|
+
|
|
84
|
+
// Insert chunks with specific embeddings
|
|
85
|
+
const targetEmbedding = [...Array(127).fill(0.5), 1];
|
|
86
|
+
const chunks = [
|
|
87
|
+
{
|
|
88
|
+
content: {
|
|
89
|
+
text: "Target chunk content",
|
|
90
|
+
metadata: { target: true },
|
|
91
|
+
},
|
|
92
|
+
embedding: targetEmbedding,
|
|
93
|
+
},
|
|
94
|
+
{
|
|
95
|
+
content: {
|
|
96
|
+
text: "Other chunk content",
|
|
97
|
+
metadata: { target: false },
|
|
98
|
+
},
|
|
99
|
+
embedding: [...Array(127).fill(0.1), 0], // Different embedding
|
|
100
|
+
},
|
|
101
|
+
];
|
|
102
|
+
|
|
103
|
+
await t.run(async (ctx) => {
|
|
104
|
+
await insertChunks(ctx, {
|
|
105
|
+
entryId,
|
|
106
|
+
startOrder: 0,
|
|
107
|
+
chunks,
|
|
108
|
+
});
|
|
109
|
+
});
|
|
110
|
+
|
|
111
|
+
// Search with the exact target embedding
|
|
112
|
+
const result = await t.action(api.search.search, {
|
|
113
|
+
namespace: "test-namespace",
|
|
114
|
+
embedding: targetEmbedding,
|
|
115
|
+
modelId: "test-model",
|
|
116
|
+
filters: [],
|
|
117
|
+
limit: 10,
|
|
118
|
+
});
|
|
119
|
+
|
|
120
|
+
expect(result.results).toHaveLength(2);
|
|
121
|
+
expect(result.entries).toHaveLength(1);
|
|
122
|
+
expect(result.entries[0].entryId).toBe(entryId);
|
|
123
|
+
|
|
124
|
+
// The target chunk should have a higher score (first result)
|
|
125
|
+
expect(result.results[0].score).toBeGreaterThan(result.results[1].score);
|
|
126
|
+
expect(result.results[0].content[0].text).toBe("Target chunk content");
|
|
127
|
+
});
|
|
128
|
+
|
|
129
|
+
test("if the limit is 0, it returns nothing", async () => {
|
|
130
|
+
const t = convexTest(schema, modules);
|
|
131
|
+
const namespaceId = await setupTestNamespace(t);
|
|
132
|
+
const entryId = await setupTestEntry(t, namespaceId);
|
|
133
|
+
|
|
134
|
+
// Insert chunks
|
|
135
|
+
const chunks = createTestChunks(3);
|
|
136
|
+
await t.run(async (ctx) => {
|
|
137
|
+
await insertChunks(ctx, {
|
|
138
|
+
entryId,
|
|
139
|
+
startOrder: 0,
|
|
140
|
+
chunks,
|
|
141
|
+
});
|
|
142
|
+
});
|
|
143
|
+
|
|
144
|
+
// Search with limit 0
|
|
145
|
+
const result = await t.action(api.search.search, {
|
|
146
|
+
namespace: "test-namespace",
|
|
147
|
+
embedding: Array(128).fill(0.1),
|
|
148
|
+
modelId: "test-model",
|
|
149
|
+
filters: [],
|
|
150
|
+
limit: 0,
|
|
151
|
+
});
|
|
152
|
+
|
|
153
|
+
expect(result.results).toHaveLength(0);
|
|
154
|
+
expect(result.entries).toHaveLength(0);
|
|
155
|
+
});
|
|
156
|
+
|
|
157
|
+
test("it filters out results where the vectorScoreThreshold is too low", async () => {
|
|
158
|
+
const t = convexTest(schema, modules);
|
|
159
|
+
const namespaceId = await setupTestNamespace(t);
|
|
160
|
+
const entryId = await setupTestEntry(t, namespaceId);
|
|
161
|
+
|
|
162
|
+
// Insert chunks with different embeddings (to get different scores)
|
|
163
|
+
const chunks = [
|
|
164
|
+
{
|
|
165
|
+
content: {
|
|
166
|
+
text: "High similarity chunk",
|
|
167
|
+
metadata: { similarity: "high" },
|
|
168
|
+
},
|
|
169
|
+
embedding: Array(128).fill(0.5), // Very similar to search embedding
|
|
170
|
+
},
|
|
171
|
+
{
|
|
172
|
+
content: {
|
|
173
|
+
text: "Low similarity chunk",
|
|
174
|
+
metadata: { similarity: "low" },
|
|
175
|
+
},
|
|
176
|
+
embedding: Array(128).fill(0.0), // Very different from search embedding
|
|
177
|
+
},
|
|
178
|
+
];
|
|
179
|
+
|
|
180
|
+
await t.run(async (ctx) => {
|
|
181
|
+
await insertChunks(ctx, {
|
|
182
|
+
entryId,
|
|
183
|
+
startOrder: 0,
|
|
184
|
+
chunks,
|
|
185
|
+
});
|
|
186
|
+
});
|
|
187
|
+
|
|
188
|
+
// Search with a high threshold
|
|
189
|
+
const searchEmbedding = Array(128).fill(0.5);
|
|
190
|
+
const resultWithThreshold = await t.action(api.search.search, {
|
|
191
|
+
namespace: "test-namespace",
|
|
192
|
+
embedding: searchEmbedding,
|
|
193
|
+
modelId: "test-model",
|
|
194
|
+
filters: [],
|
|
195
|
+
limit: 10,
|
|
196
|
+
vectorScoreThreshold: 0.8, // High threshold
|
|
197
|
+
});
|
|
198
|
+
|
|
199
|
+
// Search without threshold
|
|
200
|
+
const resultWithoutThreshold = await t.action(api.search.search, {
|
|
201
|
+
namespace: "test-namespace",
|
|
202
|
+
embedding: searchEmbedding,
|
|
203
|
+
modelId: "test-model",
|
|
204
|
+
filters: [],
|
|
205
|
+
limit: 10,
|
|
206
|
+
});
|
|
207
|
+
|
|
208
|
+
// With threshold should return fewer results
|
|
209
|
+
expect(resultWithThreshold.results.length).toBeLessThan(
|
|
210
|
+
resultWithoutThreshold.results.length
|
|
211
|
+
);
|
|
212
|
+
expect(resultWithoutThreshold.results).toHaveLength(2);
|
|
213
|
+
|
|
214
|
+
// All results with threshold should have score >= threshold
|
|
215
|
+
for (const result of resultWithThreshold.results) {
|
|
216
|
+
expect(result.score).toBeGreaterThanOrEqual(0.8);
|
|
217
|
+
}
|
|
218
|
+
});
|
|
219
|
+
|
|
220
|
+
test("it successfully uses filters to search for entries that match", async () => {
|
|
221
|
+
const t = convexTest(schema, modules);
|
|
222
|
+
|
|
223
|
+
// Create namespace with filter support
|
|
224
|
+
const namespaceId = await setupTestNamespace(t, "filtered-namespace", 128, [
|
|
225
|
+
"category",
|
|
226
|
+
]);
|
|
227
|
+
|
|
228
|
+
// Create entries with different filter values
|
|
229
|
+
const doc1Id = await setupTestEntry(t, namespaceId, "doc1", 0, [
|
|
230
|
+
{ name: "category", value: "category1" },
|
|
231
|
+
]);
|
|
232
|
+
const doc2Id = await setupTestEntry(t, namespaceId, "doc2", 0, [
|
|
233
|
+
{ name: "category", value: "category2" },
|
|
234
|
+
]);
|
|
235
|
+
const doc3Id = await setupTestEntry(t, namespaceId, "doc3", 0, [
|
|
236
|
+
{ name: "category", value: "category1" },
|
|
237
|
+
]);
|
|
238
|
+
|
|
239
|
+
// Insert chunks in all entries
|
|
240
|
+
const baseEmbedding = Array(128).fill(0.1);
|
|
241
|
+
await t.run(async (ctx) => {
|
|
242
|
+
await insertChunks(ctx, {
|
|
243
|
+
entryId: doc1Id,
|
|
244
|
+
startOrder: 0,
|
|
245
|
+
chunks: createTestChunks(2, 0.1),
|
|
246
|
+
});
|
|
247
|
+
await insertChunks(ctx, {
|
|
248
|
+
entryId: doc2Id,
|
|
249
|
+
startOrder: 0,
|
|
250
|
+
chunks: createTestChunks(2, 0.1),
|
|
251
|
+
});
|
|
252
|
+
await insertChunks(ctx, {
|
|
253
|
+
entryId: doc3Id,
|
|
254
|
+
startOrder: 0,
|
|
255
|
+
chunks: createTestChunks(2, 0.1),
|
|
256
|
+
});
|
|
257
|
+
});
|
|
258
|
+
|
|
259
|
+
// Search for category1 only
|
|
260
|
+
const category1Results = await t.action(api.search.search, {
|
|
261
|
+
namespace: "filtered-namespace",
|
|
262
|
+
embedding: baseEmbedding,
|
|
263
|
+
modelId: "test-model",
|
|
264
|
+
filters: [{ name: "category", value: "category1" }],
|
|
265
|
+
limit: 10,
|
|
266
|
+
});
|
|
267
|
+
|
|
268
|
+
expect(category1Results.entries).toHaveLength(2); // doc1 and doc3
|
|
269
|
+
expect(category1Results.results).toHaveLength(4); // 2 chunks each from doc1 and doc3
|
|
270
|
+
|
|
271
|
+
const entryIds = category1Results.entries.map((d) => d.entryId).sort();
|
|
272
|
+
expect(entryIds).toEqual([doc1Id, doc3Id].sort());
|
|
273
|
+
|
|
274
|
+
// Search for category2 only
|
|
275
|
+
const category2Results = await t.action(api.search.search, {
|
|
276
|
+
namespace: "filtered-namespace",
|
|
277
|
+
embedding: baseEmbedding,
|
|
278
|
+
modelId: "test-model",
|
|
279
|
+
filters: [{ name: "category", value: "category2" }],
|
|
280
|
+
limit: 10,
|
|
281
|
+
});
|
|
282
|
+
|
|
283
|
+
expect(category2Results.entries).toHaveLength(1); // only doc2
|
|
284
|
+
expect(category2Results.results).toHaveLength(2); // 2 chunks from doc2
|
|
285
|
+
expect(category2Results.entries[0].entryId).toBe(doc2Id);
|
|
286
|
+
|
|
287
|
+
// Search with no filters should return all
|
|
288
|
+
const noFilterResults = await t.action(api.search.search, {
|
|
289
|
+
namespace: "filtered-namespace",
|
|
290
|
+
embedding: baseEmbedding,
|
|
291
|
+
modelId: "test-model",
|
|
292
|
+
filters: [],
|
|
293
|
+
limit: 10,
|
|
294
|
+
});
|
|
295
|
+
|
|
296
|
+
expect(noFilterResults.entries).toHaveLength(3); // all entries
|
|
297
|
+
expect(noFilterResults.results).toHaveLength(6); // all chunks
|
|
298
|
+
});
|
|
299
|
+
|
|
300
|
+
test("it handles multiple filter fields correctly", async () => {
|
|
301
|
+
const t = convexTest(schema, modules);
|
|
302
|
+
|
|
303
|
+
// Create namespace with multiple filter fields
|
|
304
|
+
const namespaceId = await setupTestNamespace(
|
|
305
|
+
t,
|
|
306
|
+
"multi-filter-namespace",
|
|
307
|
+
128,
|
|
308
|
+
["category", "priority_category"]
|
|
309
|
+
);
|
|
310
|
+
|
|
311
|
+
// Create entries with different filter combinations
|
|
312
|
+
const doc1Id = await setupTestEntry(t, namespaceId, "doc1", 0, [
|
|
313
|
+
{ name: "category", value: "articles" },
|
|
314
|
+
{
|
|
315
|
+
name: "priority_category",
|
|
316
|
+
value: { priority: "high", category: "articles" },
|
|
317
|
+
},
|
|
318
|
+
]);
|
|
319
|
+
const doc2Id = await setupTestEntry(t, namespaceId, "doc2", 0, [
|
|
320
|
+
{ name: "category", value: "articles" },
|
|
321
|
+
{
|
|
322
|
+
name: "priority_category",
|
|
323
|
+
value: { priority: "low", category: "articles" },
|
|
324
|
+
},
|
|
325
|
+
]);
|
|
326
|
+
const doc3Id = await setupTestEntry(t, namespaceId, "doc3", 0, [
|
|
327
|
+
{ name: "category", value: "blogs" },
|
|
328
|
+
{
|
|
329
|
+
name: "priority_category",
|
|
330
|
+
value: { priority: "high", category: "blogs" },
|
|
331
|
+
},
|
|
332
|
+
]);
|
|
333
|
+
|
|
334
|
+
// Insert chunks
|
|
335
|
+
const baseEmbedding = Array(128).fill(0.1);
|
|
336
|
+
await t.run(async (ctx) => {
|
|
337
|
+
await insertChunks(ctx, {
|
|
338
|
+
entryId: doc1Id,
|
|
339
|
+
startOrder: 0,
|
|
340
|
+
chunks: createTestChunks(1, 0.1),
|
|
341
|
+
});
|
|
342
|
+
await insertChunks(ctx, {
|
|
343
|
+
entryId: doc2Id,
|
|
344
|
+
startOrder: 0,
|
|
345
|
+
chunks: createTestChunks(1, 0.1),
|
|
346
|
+
});
|
|
347
|
+
await insertChunks(ctx, {
|
|
348
|
+
entryId: doc3Id,
|
|
349
|
+
startOrder: 0,
|
|
350
|
+
chunks: createTestChunks(1, 0.1),
|
|
351
|
+
});
|
|
352
|
+
});
|
|
353
|
+
|
|
354
|
+
// Search for articles with high priority
|
|
355
|
+
const result = await t.action(api.search.search, {
|
|
356
|
+
namespace: "multi-filter-namespace",
|
|
357
|
+
embedding: baseEmbedding,
|
|
358
|
+
modelId: "test-model",
|
|
359
|
+
filters: [
|
|
360
|
+
{
|
|
361
|
+
name: "priority_category",
|
|
362
|
+
value: { priority: "high", category: "articles" },
|
|
363
|
+
},
|
|
364
|
+
],
|
|
365
|
+
limit: 10,
|
|
366
|
+
});
|
|
367
|
+
|
|
368
|
+
expect(result.entries).toHaveLength(1); // only doc1 matches both filters
|
|
369
|
+
expect(result.entries[0].entryId).toBe(doc1Id);
|
|
370
|
+
expect(result.results).toHaveLength(1);
|
|
371
|
+
});
|
|
372
|
+
|
|
373
|
+
test("it returns empty results for incompatible namespace dimensions", async () => {
|
|
374
|
+
const t = convexTest(schema, modules);
|
|
375
|
+
|
|
376
|
+
// Create namespace with 256 dimensions
|
|
377
|
+
await setupTestNamespace(t, "high-dim-namespace", 256);
|
|
378
|
+
|
|
379
|
+
// Search with 128-dimensional embedding (incompatible)
|
|
380
|
+
const result = await t.action(api.search.search, {
|
|
381
|
+
namespace: "high-dim-namespace",
|
|
382
|
+
embedding: Array(128).fill(0.1), // Wrong dimension
|
|
383
|
+
modelId: "test-model",
|
|
384
|
+
filters: [],
|
|
385
|
+
limit: 10,
|
|
386
|
+
});
|
|
387
|
+
|
|
388
|
+
expect(result.results).toHaveLength(0);
|
|
389
|
+
expect(result.entries).toHaveLength(0);
|
|
390
|
+
});
|
|
391
|
+
|
|
392
|
+
test("it returns empty results for incompatible model IDs", async () => {
|
|
393
|
+
const t = convexTest(schema, modules);
|
|
394
|
+
|
|
395
|
+
// Create namespace with specific model ID
|
|
396
|
+
await setupTestNamespace(t, "model-specific-namespace", 128);
|
|
397
|
+
|
|
398
|
+
// Search with different model ID
|
|
399
|
+
const result = await t.action(api.search.search, {
|
|
400
|
+
namespace: "model-specific-namespace",
|
|
401
|
+
embedding: Array(128).fill(0.1),
|
|
402
|
+
modelId: "different-model", // Wrong model ID
|
|
403
|
+
filters: [],
|
|
404
|
+
limit: 10,
|
|
405
|
+
});
|
|
406
|
+
|
|
407
|
+
expect(result.results).toHaveLength(0);
|
|
408
|
+
expect(result.entries).toHaveLength(0);
|
|
409
|
+
});
|
|
410
|
+
|
|
411
|
+
test("it respects the limit parameter", async () => {
|
|
412
|
+
const t = convexTest(schema, modules);
|
|
413
|
+
const namespaceId = await setupTestNamespace(t);
|
|
414
|
+
const entryId = await setupTestEntry(t, namespaceId);
|
|
415
|
+
|
|
416
|
+
// Insert many chunks
|
|
417
|
+
const chunks = createTestChunks(10);
|
|
418
|
+
await t.run(async (ctx) => {
|
|
419
|
+
await insertChunks(ctx, {
|
|
420
|
+
entryId,
|
|
421
|
+
startOrder: 0,
|
|
422
|
+
chunks,
|
|
423
|
+
});
|
|
424
|
+
});
|
|
425
|
+
|
|
426
|
+
// Search with small limit
|
|
427
|
+
const result = await t.action(api.search.search, {
|
|
428
|
+
namespace: "test-namespace",
|
|
429
|
+
embedding: Array(128).fill(0.1),
|
|
430
|
+
modelId: "test-model",
|
|
431
|
+
filters: [],
|
|
432
|
+
limit: 3,
|
|
433
|
+
});
|
|
434
|
+
|
|
435
|
+
expect(result.results).toHaveLength(3);
|
|
436
|
+
expect(result.entries).toHaveLength(1);
|
|
437
|
+
|
|
438
|
+
// Results should be sorted by score (best first)
|
|
439
|
+
for (let i = 1; i < result.results.length; i++) {
|
|
440
|
+
expect(result.results[i - 1].score).toBeGreaterThanOrEqual(
|
|
441
|
+
result.results[i].score
|
|
442
|
+
);
|
|
443
|
+
}
|
|
444
|
+
});
|
|
445
|
+
});
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
import { v, type Infer } from "convex/values";
|
|
2
|
+
import { action } from "./_generated/server.js";
|
|
3
|
+
import { searchEmbeddings } from "./embeddings/index.js";
|
|
4
|
+
import { numberedFiltersFromNamedFilters, vNamedFilter } from "./filters.js";
|
|
5
|
+
import { internal } from "./_generated/api.js";
|
|
6
|
+
import {
|
|
7
|
+
vEntry,
|
|
8
|
+
type Entry,
|
|
9
|
+
vSearchResult,
|
|
10
|
+
type SearchResult,
|
|
11
|
+
type EntryId,
|
|
12
|
+
} from "../shared.js";
|
|
13
|
+
import type { vRangeResult } from "./chunks.js";
|
|
14
|
+
|
|
15
|
+
export const search = action({
|
|
16
|
+
args: {
|
|
17
|
+
namespace: v.string(),
|
|
18
|
+
embedding: v.array(v.number()),
|
|
19
|
+
modelId: v.string(),
|
|
20
|
+
// These are all OR'd together
|
|
21
|
+
filters: v.array(vNamedFilter),
|
|
22
|
+
limit: v.number(),
|
|
23
|
+
vectorScoreThreshold: v.optional(v.number()),
|
|
24
|
+
chunkContext: v.optional(
|
|
25
|
+
v.object({ before: v.number(), after: v.number() })
|
|
26
|
+
),
|
|
27
|
+
},
|
|
28
|
+
returns: v.object({
|
|
29
|
+
results: v.array(vSearchResult),
|
|
30
|
+
entries: v.array(vEntry),
|
|
31
|
+
}),
|
|
32
|
+
handler: async (
|
|
33
|
+
ctx,
|
|
34
|
+
args
|
|
35
|
+
): Promise<{
|
|
36
|
+
results: SearchResult[];
|
|
37
|
+
entries: Entry[];
|
|
38
|
+
}> => {
|
|
39
|
+
const { modelId, embedding, filters, limit } = args;
|
|
40
|
+
const namespace = await ctx.runQuery(
|
|
41
|
+
internal.namespaces.getCompatibleNamespace,
|
|
42
|
+
{
|
|
43
|
+
namespace: args.namespace,
|
|
44
|
+
modelId,
|
|
45
|
+
dimension: embedding.length,
|
|
46
|
+
filterNames: filters.map((f) => f.name),
|
|
47
|
+
}
|
|
48
|
+
);
|
|
49
|
+
if (!namespace) {
|
|
50
|
+
console.debug(
|
|
51
|
+
`No compatible namespace found for ${args.namespace} with model ${args.modelId} and dimension ${embedding.length} and filters ${filters.map((f) => f.name).join(", ")}.`
|
|
52
|
+
);
|
|
53
|
+
return {
|
|
54
|
+
results: [],
|
|
55
|
+
entries: [],
|
|
56
|
+
};
|
|
57
|
+
}
|
|
58
|
+
const results = await searchEmbeddings(ctx, {
|
|
59
|
+
embedding,
|
|
60
|
+
namespaceId: namespace._id,
|
|
61
|
+
filters: numberedFiltersFromNamedFilters(filters, namespace.filterNames),
|
|
62
|
+
limit,
|
|
63
|
+
});
|
|
64
|
+
|
|
65
|
+
const threshold = args.vectorScoreThreshold ?? -1;
|
|
66
|
+
const aboveThreshold = results.filter((r) => r._score >= threshold);
|
|
67
|
+
const chunkContext = args.chunkContext ?? { before: 0, after: 0 };
|
|
68
|
+
// TODO: break this up if there are too many results
|
|
69
|
+
const { ranges, entries } = await ctx.runQuery(
|
|
70
|
+
internal.chunks.getRangesOfChunks,
|
|
71
|
+
{
|
|
72
|
+
embeddingIds: aboveThreshold.map((r) => r._id),
|
|
73
|
+
chunkContext,
|
|
74
|
+
}
|
|
75
|
+
);
|
|
76
|
+
return {
|
|
77
|
+
results: ranges
|
|
78
|
+
.map((r, i) => publicSearchResult(r, aboveThreshold[i]._score))
|
|
79
|
+
.filter((r) => r !== null),
|
|
80
|
+
entries,
|
|
81
|
+
};
|
|
82
|
+
},
|
|
83
|
+
});
|
|
84
|
+
|
|
85
|
+
function publicSearchResult(
|
|
86
|
+
r: Infer<typeof vRangeResult> | null,
|
|
87
|
+
score: number
|
|
88
|
+
): SearchResult | null {
|
|
89
|
+
if (r === null) {
|
|
90
|
+
return null;
|
|
91
|
+
}
|
|
92
|
+
return {
|
|
93
|
+
...r,
|
|
94
|
+
score,
|
|
95
|
+
entryId: r.entryId as unknown as EntryId,
|
|
96
|
+
};
|
|
97
|
+
}
|