@convex-dev/rag 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +201 -0
- package/README.md +371 -0
- package/dist/client/_generated/_ignore.d.ts +1 -0
- package/dist/client/_generated/_ignore.d.ts.map +1 -0
- package/dist/client/_generated/_ignore.js +3 -0
- package/dist/client/_generated/_ignore.js.map +1 -0
- package/dist/client/defaultChunker.d.ts +15 -0
- package/dist/client/defaultChunker.d.ts.map +1 -0
- package/dist/client/defaultChunker.js +148 -0
- package/dist/client/defaultChunker.js.map +1 -0
- package/dist/client/fileUtils.d.ts +24 -0
- package/dist/client/fileUtils.d.ts.map +1 -0
- package/dist/client/fileUtils.js +179 -0
- package/dist/client/fileUtils.js.map +1 -0
- package/dist/client/index.d.ts +442 -0
- package/dist/client/index.d.ts.map +1 -0
- package/dist/client/index.js +597 -0
- package/dist/client/index.js.map +1 -0
- package/dist/client/types.d.ts +29 -0
- package/dist/client/types.d.ts.map +1 -0
- package/dist/client/types.js +2 -0
- package/dist/client/types.js.map +1 -0
- package/dist/component/_generated/api.d.ts +439 -0
- package/dist/component/_generated/api.d.ts.map +1 -0
- package/dist/component/_generated/api.js +22 -0
- package/dist/component/_generated/api.js.map +1 -0
- package/dist/component/_generated/dataModel.d.ts +60 -0
- package/dist/component/_generated/server.d.ts +149 -0
- package/dist/component/_generated/server.d.ts.map +1 -0
- package/dist/component/_generated/server.js +74 -0
- package/dist/component/_generated/server.js.map +1 -0
- package/dist/component/chunks.d.ts +139 -0
- package/dist/component/chunks.d.ts.map +1 -0
- package/dist/component/chunks.js +413 -0
- package/dist/component/chunks.js.map +1 -0
- package/dist/component/convex.config.d.ts +3 -0
- package/dist/component/convex.config.d.ts.map +1 -0
- package/dist/component/convex.config.js +6 -0
- package/dist/component/convex.config.js.map +1 -0
- package/dist/component/embeddings/importance.d.ts +21 -0
- package/dist/component/embeddings/importance.d.ts.map +1 -0
- package/dist/component/embeddings/importance.js +67 -0
- package/dist/component/embeddings/importance.js.map +1 -0
- package/dist/component/embeddings/index.d.ts +23 -0
- package/dist/component/embeddings/index.d.ts.map +1 -0
- package/dist/component/embeddings/index.js +54 -0
- package/dist/component/embeddings/index.js.map +1 -0
- package/dist/component/embeddings/tables.d.ts +39 -0
- package/dist/component/embeddings/tables.d.ts.map +1 -0
- package/dist/component/embeddings/tables.js +53 -0
- package/dist/component/embeddings/tables.js.map +1 -0
- package/dist/component/entries.d.ts +167 -0
- package/dist/component/entries.d.ts.map +1 -0
- package/dist/component/entries.js +409 -0
- package/dist/component/entries.js.map +1 -0
- package/dist/component/filters.d.ts +46 -0
- package/dist/component/filters.d.ts.map +1 -0
- package/dist/component/filters.js +72 -0
- package/dist/component/filters.js.map +1 -0
- package/dist/component/namespaces.d.ts +131 -0
- package/dist/component/namespaces.d.ts.map +1 -0
- package/dist/component/namespaces.js +222 -0
- package/dist/component/namespaces.js.map +1 -0
- package/dist/component/schema.d.ts +1697 -0
- package/dist/component/schema.d.ts.map +1 -0
- package/dist/component/schema.js +88 -0
- package/dist/component/schema.js.map +1 -0
- package/dist/component/search.d.ts +20 -0
- package/dist/component/search.d.ts.map +1 -0
- package/dist/component/search.js +69 -0
- package/dist/component/search.js.map +1 -0
- package/dist/package.json +3 -0
- package/dist/react/index.d.ts +2 -0
- package/dist/react/index.d.ts.map +1 -0
- package/dist/react/index.js +6 -0
- package/dist/react/index.js.map +1 -0
- package/dist/shared.d.ts +479 -0
- package/dist/shared.d.ts.map +1 -0
- package/dist/shared.js +98 -0
- package/dist/shared.js.map +1 -0
- package/package.json +97 -0
- package/src/client/_generated/_ignore.ts +1 -0
- package/src/client/defaultChunker.test.ts +243 -0
- package/src/client/defaultChunker.ts +183 -0
- package/src/client/fileUtils.ts +179 -0
- package/src/client/index.test.ts +475 -0
- package/src/client/index.ts +1125 -0
- package/src/client/setup.test.ts +28 -0
- package/src/client/types.ts +69 -0
- package/src/component/_generated/api.d.ts +439 -0
- package/src/component/_generated/api.js +23 -0
- package/src/component/_generated/dataModel.d.ts +60 -0
- package/src/component/_generated/server.d.ts +149 -0
- package/src/component/_generated/server.js +90 -0
- package/src/component/chunks.test.ts +915 -0
- package/src/component/chunks.ts +555 -0
- package/src/component/convex.config.ts +7 -0
- package/src/component/embeddings/importance.test.ts +249 -0
- package/src/component/embeddings/importance.ts +75 -0
- package/src/component/embeddings/index.test.ts +482 -0
- package/src/component/embeddings/index.ts +99 -0
- package/src/component/embeddings/tables.ts +114 -0
- package/src/component/entries.test.ts +341 -0
- package/src/component/entries.ts +546 -0
- package/src/component/filters.ts +119 -0
- package/src/component/namespaces.ts +299 -0
- package/src/component/schema.ts +106 -0
- package/src/component/search.test.ts +445 -0
- package/src/component/search.ts +97 -0
- package/src/component/setup.test.ts +5 -0
- package/src/react/index.ts +7 -0
- package/src/shared.ts +247 -0
- package/src/vitest.config.ts +7 -0
|
@@ -0,0 +1,482 @@
|
|
|
1
|
+
/// <reference types="vite/client" />
|
|
2
|
+
|
|
3
|
+
import { describe, expect, test } from "vitest";
|
|
4
|
+
import { convexTest } from "convex-test";
|
|
5
|
+
import schema, { v } from "../schema.js";
|
|
6
|
+
import { modules } from "../setup.test.js";
|
|
7
|
+
import { insertEmbedding, searchEmbeddings } from "./index.js";
|
|
8
|
+
import { vectorWithImportanceDimension } from "./importance.js";
|
|
9
|
+
import { action } from "../_generated/server.js";
|
|
10
|
+
import { anyApi, type ApiFromModules } from "convex/server";
|
|
11
|
+
|
|
12
|
+
export const search = action({
|
|
13
|
+
args: {
|
|
14
|
+
embedding: v.array(v.number()),
|
|
15
|
+
namespaceId: v.id("namespaces"),
|
|
16
|
+
filters: v.array(v.any()),
|
|
17
|
+
limit: v.number(),
|
|
18
|
+
},
|
|
19
|
+
handler: async (ctx, args) => {
|
|
20
|
+
return searchEmbeddings(ctx, args);
|
|
21
|
+
},
|
|
22
|
+
});
|
|
23
|
+
|
|
24
|
+
const testApi: ApiFromModules<{
|
|
25
|
+
fns: {
|
|
26
|
+
search: typeof search;
|
|
27
|
+
};
|
|
28
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
29
|
+
}>["fns"] = anyApi["embeddings"]["index.test"] as any;
|
|
30
|
+
|
|
31
|
+
describe("embeddings", () => {
|
|
32
|
+
test("insertEmbedding with no filters or importance works", async () => {
|
|
33
|
+
const t = convexTest(schema, modules);
|
|
34
|
+
|
|
35
|
+
// Create a namespace first
|
|
36
|
+
const namespaceId = await t.run(async (ctx) => {
|
|
37
|
+
return ctx.db.insert("namespaces", {
|
|
38
|
+
namespace: "test-namespace",
|
|
39
|
+
version: 1,
|
|
40
|
+
modelId: "test-model",
|
|
41
|
+
dimension: 128,
|
|
42
|
+
filterNames: [],
|
|
43
|
+
status: { kind: "ready" },
|
|
44
|
+
});
|
|
45
|
+
});
|
|
46
|
+
|
|
47
|
+
// Create a simple 128-dimension embedding
|
|
48
|
+
const embedding = Array(128).fill(0.1);
|
|
49
|
+
|
|
50
|
+
// Insert embedding without filters or importance
|
|
51
|
+
const vectorId = await t.run(async (ctx) => {
|
|
52
|
+
return insertEmbedding(ctx, embedding, namespaceId, undefined, undefined);
|
|
53
|
+
});
|
|
54
|
+
|
|
55
|
+
expect(vectorId).toBeDefined();
|
|
56
|
+
|
|
57
|
+
// Verify the vector was inserted correctly
|
|
58
|
+
const insertedVector = await t.run(async (ctx) => {
|
|
59
|
+
return ctx.db.get(vectorId);
|
|
60
|
+
});
|
|
61
|
+
|
|
62
|
+
expect(insertedVector).toBeDefined();
|
|
63
|
+
expect(insertedVector!.namespaceId).toBe(namespaceId);
|
|
64
|
+
expect(insertedVector!.vector).toHaveLength(
|
|
65
|
+
vectorWithImportanceDimension(128)
|
|
66
|
+
);
|
|
67
|
+
expect(insertedVector!.filter0).toBeUndefined();
|
|
68
|
+
expect(insertedVector!.filter1).toBeUndefined();
|
|
69
|
+
expect(insertedVector!.filter2).toBeUndefined();
|
|
70
|
+
expect(insertedVector!.filter3).toBeUndefined();
|
|
71
|
+
});
|
|
72
|
+
|
|
73
|
+
test("insertEmbedding with importance modifies the vector", async () => {
|
|
74
|
+
const t = convexTest(schema, modules);
|
|
75
|
+
|
|
76
|
+
const namespaceId = await t.run(async (ctx) => {
|
|
77
|
+
return ctx.db.insert("namespaces", {
|
|
78
|
+
namespace: "test-namespace-importance",
|
|
79
|
+
version: 1,
|
|
80
|
+
modelId: "test-model",
|
|
81
|
+
dimension: 128,
|
|
82
|
+
filterNames: [],
|
|
83
|
+
status: { kind: "ready" },
|
|
84
|
+
});
|
|
85
|
+
});
|
|
86
|
+
|
|
87
|
+
const embedding = Array(128).fill(0.1);
|
|
88
|
+
const importance = 0.5;
|
|
89
|
+
|
|
90
|
+
// Insert embedding with importance
|
|
91
|
+
const vectorId = await t.run(async (ctx) => {
|
|
92
|
+
return insertEmbedding(
|
|
93
|
+
ctx,
|
|
94
|
+
embedding,
|
|
95
|
+
namespaceId,
|
|
96
|
+
importance,
|
|
97
|
+
undefined
|
|
98
|
+
);
|
|
99
|
+
});
|
|
100
|
+
|
|
101
|
+
const insertedVector = await t.run(async (ctx) => {
|
|
102
|
+
return ctx.db.get(vectorId);
|
|
103
|
+
});
|
|
104
|
+
|
|
105
|
+
expect(insertedVector).toBeDefined();
|
|
106
|
+
expect(insertedVector!.vector).toHaveLength(129);
|
|
107
|
+
|
|
108
|
+
// The importance should affect the vector - it should not be the same as without importance
|
|
109
|
+
const vectorWithoutImportance = await t.run(async (ctx) => {
|
|
110
|
+
return insertEmbedding(ctx, embedding, namespaceId, undefined, undefined);
|
|
111
|
+
});
|
|
112
|
+
|
|
113
|
+
const vectorWithoutImportanceData = await t.run(async (ctx) => {
|
|
114
|
+
return ctx.db.get(vectorWithoutImportance);
|
|
115
|
+
});
|
|
116
|
+
|
|
117
|
+
// Vectors should be different due to importance scaling
|
|
118
|
+
expect(insertedVector!.vector).not.toEqual(
|
|
119
|
+
vectorWithoutImportanceData!.vector
|
|
120
|
+
);
|
|
121
|
+
|
|
122
|
+
// The last element should be the weight: sqrt(1 - importance^2)
|
|
123
|
+
const expectedWeight = Math.sqrt(1 - importance ** 2);
|
|
124
|
+
expect(insertedVector!.vector[128]).toBeCloseTo(expectedWeight, 5);
|
|
125
|
+
});
|
|
126
|
+
|
|
127
|
+
test("search for vectors sorted by importance when identical otherwise", async () => {
|
|
128
|
+
const t = convexTest(schema, modules);
|
|
129
|
+
|
|
130
|
+
const namespaceId = await t.run(async (ctx) => {
|
|
131
|
+
return ctx.db.insert("namespaces", {
|
|
132
|
+
namespace: "importance-sort-test",
|
|
133
|
+
version: 1,
|
|
134
|
+
modelId: "test-model",
|
|
135
|
+
dimension: 128,
|
|
136
|
+
filterNames: [],
|
|
137
|
+
status: { kind: "ready" },
|
|
138
|
+
});
|
|
139
|
+
});
|
|
140
|
+
|
|
141
|
+
const embedding = Array(128).fill(0.1);
|
|
142
|
+
|
|
143
|
+
// Insert same embedding with different importance levels
|
|
144
|
+
await t.run(async (ctx) => {
|
|
145
|
+
await insertEmbedding(ctx, embedding, namespaceId, 0.2, undefined); // Low importance
|
|
146
|
+
await insertEmbedding(ctx, embedding, namespaceId, 0.8, undefined); // High importance
|
|
147
|
+
await insertEmbedding(ctx, embedding, namespaceId, 0.5, undefined); // Medium importance
|
|
148
|
+
});
|
|
149
|
+
|
|
150
|
+
// Search for the vectors
|
|
151
|
+
const results = await t.action(testApi.search, {
|
|
152
|
+
embedding,
|
|
153
|
+
namespaceId,
|
|
154
|
+
filters: [],
|
|
155
|
+
limit: 10,
|
|
156
|
+
});
|
|
157
|
+
|
|
158
|
+
expect(results).toHaveLength(3);
|
|
159
|
+
|
|
160
|
+
// Results should be sorted by similarity (which correlates with importance)
|
|
161
|
+
// Higher importance vectors should have higher similarity scores
|
|
162
|
+
expect(results[0]._score).toBeGreaterThan(results[1]._score);
|
|
163
|
+
expect(results[1]._score).toBeGreaterThan(results[2]._score);
|
|
164
|
+
});
|
|
165
|
+
|
|
166
|
+
test("filters are added to the correct field", async () => {
|
|
167
|
+
const t = convexTest(schema, modules);
|
|
168
|
+
|
|
169
|
+
const namespaceId = await t.run(async (ctx) => {
|
|
170
|
+
return ctx.db.insert("namespaces", {
|
|
171
|
+
namespace: "filter-test",
|
|
172
|
+
version: 1,
|
|
173
|
+
modelId: "test-model",
|
|
174
|
+
dimension: 128,
|
|
175
|
+
filterNames: ["category", "priority", "status", "author"],
|
|
176
|
+
status: { kind: "ready" },
|
|
177
|
+
});
|
|
178
|
+
});
|
|
179
|
+
|
|
180
|
+
const embedding = Array(128).fill(0.1);
|
|
181
|
+
|
|
182
|
+
// Insert embedding with filter on position 0
|
|
183
|
+
const vectorId0 = await t.run(async (ctx) => {
|
|
184
|
+
return insertEmbedding(ctx, embedding, namespaceId, undefined, {
|
|
185
|
+
0: "entries",
|
|
186
|
+
});
|
|
187
|
+
});
|
|
188
|
+
|
|
189
|
+
// Insert embedding with filter on position 2
|
|
190
|
+
const vectorId2 = await t.run(async (ctx) => {
|
|
191
|
+
return insertEmbedding(ctx, embedding, namespaceId, undefined, {
|
|
192
|
+
2: "active",
|
|
193
|
+
});
|
|
194
|
+
});
|
|
195
|
+
|
|
196
|
+
// Verify filters are in correct fields
|
|
197
|
+
const vector0 = await t.run(async (ctx) => ctx.db.get(vectorId0));
|
|
198
|
+
const vector2 = await t.run(async (ctx) => ctx.db.get(vectorId2));
|
|
199
|
+
|
|
200
|
+
expect(vector0!.filter0).toEqual([namespaceId, "entries"]);
|
|
201
|
+
expect(vector0!.filter1).toBeUndefined();
|
|
202
|
+
expect(vector0!.filter2).toBeUndefined();
|
|
203
|
+
expect(vector0!.filter3).toBeUndefined();
|
|
204
|
+
|
|
205
|
+
expect(vector2!.filter0).toBeUndefined();
|
|
206
|
+
expect(vector2!.filter1).toBeUndefined();
|
|
207
|
+
expect(vector2!.filter2).toEqual([namespaceId, "active"]);
|
|
208
|
+
expect(vector2!.filter3).toBeUndefined();
|
|
209
|
+
});
|
|
210
|
+
|
|
211
|
+
test("embeddings have namespace prefixed on filter fields", async () => {
|
|
212
|
+
const t = convexTest(schema, modules);
|
|
213
|
+
|
|
214
|
+
const namespace1Id = await t.run(async (ctx) => {
|
|
215
|
+
return ctx.db.insert("namespaces", {
|
|
216
|
+
namespace: "namespace1",
|
|
217
|
+
version: 1,
|
|
218
|
+
modelId: "test-model",
|
|
219
|
+
dimension: 128,
|
|
220
|
+
filterNames: ["type"],
|
|
221
|
+
status: { kind: "ready" },
|
|
222
|
+
});
|
|
223
|
+
});
|
|
224
|
+
|
|
225
|
+
const namespace2Id = await t.run(async (ctx) => {
|
|
226
|
+
return ctx.db.insert("namespaces", {
|
|
227
|
+
namespace: "namespace2",
|
|
228
|
+
version: 1,
|
|
229
|
+
modelId: "test-model",
|
|
230
|
+
dimension: 128,
|
|
231
|
+
filterNames: ["type"],
|
|
232
|
+
status: { kind: "ready" },
|
|
233
|
+
});
|
|
234
|
+
});
|
|
235
|
+
|
|
236
|
+
const embedding = Array(128).fill(0.1);
|
|
237
|
+
|
|
238
|
+
// Insert same filter value in different namespaces
|
|
239
|
+
const vector1Id = await t.run(async (ctx) => {
|
|
240
|
+
return insertEmbedding(ctx, embedding, namespace1Id, undefined, {
|
|
241
|
+
0: "article",
|
|
242
|
+
});
|
|
243
|
+
});
|
|
244
|
+
|
|
245
|
+
const vector2Id = await t.run(async (ctx) => {
|
|
246
|
+
return insertEmbedding(ctx, embedding, namespace2Id, undefined, {
|
|
247
|
+
0: "article",
|
|
248
|
+
});
|
|
249
|
+
});
|
|
250
|
+
|
|
251
|
+
const vector1 = await t.run(async (ctx) => ctx.db.get(vector1Id));
|
|
252
|
+
const vector2 = await t.run(async (ctx) => ctx.db.get(vector2Id));
|
|
253
|
+
|
|
254
|
+
// Both have the same filter value but different namespace prefixes
|
|
255
|
+
expect(vector1!.filter0).toEqual([namespace1Id, "article"]);
|
|
256
|
+
expect(vector2!.filter0).toEqual([namespace2Id, "article"]);
|
|
257
|
+
expect(vector1!.filter0).not.toEqual(vector2!.filter0);
|
|
258
|
+
});
|
|
259
|
+
|
|
260
|
+
test("search without filters returns only vectors in the target namespace", async () => {
|
|
261
|
+
const t = convexTest(schema, modules);
|
|
262
|
+
|
|
263
|
+
const namespace1Id = await t.run(async (ctx) => {
|
|
264
|
+
return ctx.db.insert("namespaces", {
|
|
265
|
+
namespace: "namespace1",
|
|
266
|
+
version: 1,
|
|
267
|
+
modelId: "test-model",
|
|
268
|
+
dimension: 128,
|
|
269
|
+
filterNames: [],
|
|
270
|
+
status: { kind: "ready" },
|
|
271
|
+
});
|
|
272
|
+
});
|
|
273
|
+
|
|
274
|
+
const namespace2Id = await t.run(async (ctx) => {
|
|
275
|
+
return ctx.db.insert("namespaces", {
|
|
276
|
+
namespace: "namespace2",
|
|
277
|
+
version: 1,
|
|
278
|
+
modelId: "test-model",
|
|
279
|
+
dimension: 128,
|
|
280
|
+
filterNames: [],
|
|
281
|
+
status: { kind: "ready" },
|
|
282
|
+
});
|
|
283
|
+
});
|
|
284
|
+
|
|
285
|
+
const embedding = Array(128).fill(0.1);
|
|
286
|
+
|
|
287
|
+
// Insert vectors in both namespaces
|
|
288
|
+
await t.run(async (ctx) => {
|
|
289
|
+
await insertEmbedding(ctx, embedding, namespace1Id, undefined, undefined);
|
|
290
|
+
await insertEmbedding(ctx, embedding, namespace1Id, undefined, undefined);
|
|
291
|
+
await insertEmbedding(ctx, embedding, namespace2Id, undefined, undefined);
|
|
292
|
+
});
|
|
293
|
+
|
|
294
|
+
// Search in namespace1 only
|
|
295
|
+
const results1 = await t.action(testApi.search, {
|
|
296
|
+
embedding,
|
|
297
|
+
namespaceId: namespace1Id,
|
|
298
|
+
filters: [],
|
|
299
|
+
limit: 10,
|
|
300
|
+
});
|
|
301
|
+
|
|
302
|
+
// Search in namespace2 only
|
|
303
|
+
const results2 = await t.action(testApi.search, {
|
|
304
|
+
embedding,
|
|
305
|
+
namespaceId: namespace2Id,
|
|
306
|
+
filters: [],
|
|
307
|
+
limit: 10,
|
|
308
|
+
});
|
|
309
|
+
|
|
310
|
+
expect(results1).toHaveLength(2);
|
|
311
|
+
expect(results2).toHaveLength(1);
|
|
312
|
+
|
|
313
|
+
// All results should be from the correct namespace
|
|
314
|
+
for (const result of results1) {
|
|
315
|
+
const vector = await t.run(async (ctx) => ctx.db.get(result._id));
|
|
316
|
+
expect(vector!.namespaceId).toBe(namespace1Id);
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
for (const result of results2) {
|
|
320
|
+
const vector = await t.run(async (ctx) => ctx.db.get(result._id));
|
|
321
|
+
expect(vector!.namespaceId).toBe(namespace2Id);
|
|
322
|
+
}
|
|
323
|
+
});
|
|
324
|
+
|
|
325
|
+
test("search with filters returns only matching vectors in namespace", async () => {
|
|
326
|
+
const t = convexTest(schema, modules);
|
|
327
|
+
|
|
328
|
+
const namespaceId = await t.run(async (ctx) => {
|
|
329
|
+
return ctx.db.insert("namespaces", {
|
|
330
|
+
namespace: "filtered-search",
|
|
331
|
+
version: 1,
|
|
332
|
+
modelId: "test-model",
|
|
333
|
+
dimension: 128,
|
|
334
|
+
filterNames: ["category", "status"],
|
|
335
|
+
status: { kind: "ready" },
|
|
336
|
+
});
|
|
337
|
+
});
|
|
338
|
+
|
|
339
|
+
const embedding = Array(128).fill(0.1);
|
|
340
|
+
|
|
341
|
+
// Insert vectors with different filter combinations
|
|
342
|
+
await t.run(async (ctx) => {
|
|
343
|
+
await insertEmbedding(ctx, embedding, namespaceId, undefined, {
|
|
344
|
+
0: "articles",
|
|
345
|
+
});
|
|
346
|
+
await insertEmbedding(ctx, embedding, namespaceId, undefined, {
|
|
347
|
+
0: "blogs",
|
|
348
|
+
});
|
|
349
|
+
await insertEmbedding(ctx, embedding, namespaceId, undefined, {
|
|
350
|
+
1: "published",
|
|
351
|
+
});
|
|
352
|
+
await insertEmbedding(ctx, embedding, namespaceId, undefined, {
|
|
353
|
+
0: "articles",
|
|
354
|
+
1: "draft",
|
|
355
|
+
});
|
|
356
|
+
await insertEmbedding(ctx, embedding, namespaceId, undefined, undefined); // No filters
|
|
357
|
+
});
|
|
358
|
+
|
|
359
|
+
// Search for articles only
|
|
360
|
+
const articlesResults = await t.action(testApi.search, {
|
|
361
|
+
embedding,
|
|
362
|
+
namespaceId,
|
|
363
|
+
filters: [{ 0: "articles" }],
|
|
364
|
+
limit: 10,
|
|
365
|
+
});
|
|
366
|
+
|
|
367
|
+
expect(articlesResults).toHaveLength(2); // Two vectors with category "articles"
|
|
368
|
+
|
|
369
|
+
// Search for published status only
|
|
370
|
+
const publishedResults = await t.action(testApi.search, {
|
|
371
|
+
embedding,
|
|
372
|
+
namespaceId,
|
|
373
|
+
filters: [{ 1: "published" }],
|
|
374
|
+
limit: 10,
|
|
375
|
+
});
|
|
376
|
+
|
|
377
|
+
expect(publishedResults).toHaveLength(1); // One vector with status "published"
|
|
378
|
+
});
|
|
379
|
+
|
|
380
|
+
test("multiple filters perform OR operation", async () => {
|
|
381
|
+
const t = convexTest(schema, modules);
|
|
382
|
+
|
|
383
|
+
const namespaceId = await t.run(async (ctx) => {
|
|
384
|
+
return ctx.db.insert("namespaces", {
|
|
385
|
+
namespace: "multi-filter-or",
|
|
386
|
+
version: 1,
|
|
387
|
+
modelId: "test-model",
|
|
388
|
+
dimension: 128,
|
|
389
|
+
filterNames: ["category", "priority"],
|
|
390
|
+
status: { kind: "ready" },
|
|
391
|
+
});
|
|
392
|
+
});
|
|
393
|
+
|
|
394
|
+
const embedding = Array(128).fill(0.1);
|
|
395
|
+
|
|
396
|
+
// Insert vectors with different filter values
|
|
397
|
+
await t.run(async (ctx) => {
|
|
398
|
+
await insertEmbedding(ctx, embedding, namespaceId, undefined, {
|
|
399
|
+
0: "articles",
|
|
400
|
+
});
|
|
401
|
+
await insertEmbedding(ctx, embedding, namespaceId, undefined, {
|
|
402
|
+
0: "blogs",
|
|
403
|
+
});
|
|
404
|
+
await insertEmbedding(ctx, embedding, namespaceId, undefined, {
|
|
405
|
+
1: "high",
|
|
406
|
+
});
|
|
407
|
+
await insertEmbedding(ctx, embedding, namespaceId, undefined, {
|
|
408
|
+
1: "low",
|
|
409
|
+
});
|
|
410
|
+
await insertEmbedding(ctx, embedding, namespaceId, undefined, undefined); // No filters
|
|
411
|
+
});
|
|
412
|
+
|
|
413
|
+
// Search with OR filters: articles OR high priority
|
|
414
|
+
const orResults = await t.action(testApi.search, {
|
|
415
|
+
embedding,
|
|
416
|
+
namespaceId,
|
|
417
|
+
filters: [
|
|
418
|
+
{ 0: "articles" }, // category = articles
|
|
419
|
+
{ 1: "high" }, // OR priority = high
|
|
420
|
+
],
|
|
421
|
+
limit: 10,
|
|
422
|
+
});
|
|
423
|
+
|
|
424
|
+
expect(orResults).toHaveLength(2); // Should match both "articles" and "high priority" vectors
|
|
425
|
+
|
|
426
|
+
// Verify the results contain the expected filters
|
|
427
|
+
const vectorIds = orResults.map((r) => r._id);
|
|
428
|
+
const vectors = await t.run(async (ctx) => {
|
|
429
|
+
return Promise.all(vectorIds.map((id) => ctx.db.get(id)));
|
|
430
|
+
});
|
|
431
|
+
|
|
432
|
+
const hasArticles = vectors.some((v) => v!.filter0?.[1] === "articles");
|
|
433
|
+
const hasHighPriority = vectors.some((v) => v!.filter1?.[1] === "high");
|
|
434
|
+
|
|
435
|
+
expect(hasArticles).toBe(true);
|
|
436
|
+
expect(hasHighPriority).toBe(true);
|
|
437
|
+
});
|
|
438
|
+
|
|
439
|
+
test("searchEmbeddings", async () => {
|
|
440
|
+
const t = convexTest(schema, modules);
|
|
441
|
+
|
|
442
|
+
const namespaceId = await t.run(async (ctx) => {
|
|
443
|
+
return ctx.db.insert("namespaces", {
|
|
444
|
+
namespace: "search-test",
|
|
445
|
+
version: 1,
|
|
446
|
+
modelId: "test-model",
|
|
447
|
+
dimension: 128,
|
|
448
|
+
filterNames: [],
|
|
449
|
+
status: { kind: "ready" },
|
|
450
|
+
});
|
|
451
|
+
});
|
|
452
|
+
|
|
453
|
+
const embedding1 = Array(128).fill(0.1);
|
|
454
|
+
embedding1[0] = 1;
|
|
455
|
+
const embedding2 = Array(128).fill(0.1);
|
|
456
|
+
embedding2[0] = 0;
|
|
457
|
+
const searchEmbedding = Array(128).fill(0.1);
|
|
458
|
+
searchEmbedding[0] = 0.8; // Closer to embedding1
|
|
459
|
+
|
|
460
|
+
// Insert two different embeddings
|
|
461
|
+
await t.run(async (ctx) => {
|
|
462
|
+
await insertEmbedding(ctx, embedding1, namespaceId, undefined, undefined);
|
|
463
|
+
await insertEmbedding(ctx, embedding2, namespaceId, undefined, undefined);
|
|
464
|
+
});
|
|
465
|
+
|
|
466
|
+
// Search should return results ordered by similarity
|
|
467
|
+
const results = await t.action(testApi.search, {
|
|
468
|
+
embedding: searchEmbedding,
|
|
469
|
+
namespaceId,
|
|
470
|
+
filters: [],
|
|
471
|
+
limit: 10,
|
|
472
|
+
});
|
|
473
|
+
|
|
474
|
+
expect(results).toHaveLength(2);
|
|
475
|
+
expect(results[0]._score).toBeGreaterThan(results[1]._score);
|
|
476
|
+
|
|
477
|
+
// The first result should be more similar to embedding1 (0.1) than embedding2 (0.2)
|
|
478
|
+
// since searchEmbedding (0.15) is closer to 0.1
|
|
479
|
+
const firstVector = await t.run(async (ctx) => ctx.db.get(results[0]._id));
|
|
480
|
+
expect(firstVector).toBeDefined();
|
|
481
|
+
});
|
|
482
|
+
});
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* This file is the interface for interacting with vectors.
|
|
3
|
+
* It translates from embeddings to the underlying vector storage and search.
|
|
4
|
+
* It modifies embeddings to include importance.
|
|
5
|
+
* The outer world deals with filters with user names.
|
|
6
|
+
* The underlying vector storage has its own names.
|
|
7
|
+
* This file takes in numbered filters (0-3) to translate without knowing about
|
|
8
|
+
* user names.
|
|
9
|
+
*/
|
|
10
|
+
import type { Id } from "../_generated/dataModel.js";
|
|
11
|
+
import { type ActionCtx, type MutationCtx } from "../_generated/server.js";
|
|
12
|
+
import { getVectorTableName, validateVectorDimension } from "./tables.js";
|
|
13
|
+
import { filterFieldsFromNumbers, type NumberedFilter } from "../filters.js";
|
|
14
|
+
import { searchVector, vectorWithImportance } from "./importance.js";
|
|
15
|
+
|
|
16
|
+
// TODO: wait to see if this is needed.
|
|
17
|
+
// export const insertBatch = mutation({
|
|
18
|
+
// args: {
|
|
19
|
+
// vectorDimension: vVectorDimension,
|
|
20
|
+
// vectors: v.array(
|
|
21
|
+
// v.object({
|
|
22
|
+
// vector: v.array(v.number()),
|
|
23
|
+
// namespace: v.id("namespaces"),
|
|
24
|
+
// importance: v.optional(v.number()),
|
|
25
|
+
// filters: v.optional(v.any()),
|
|
26
|
+
// })
|
|
27
|
+
// ),
|
|
28
|
+
// },
|
|
29
|
+
// returns: v.array(vVectorId),
|
|
30
|
+
// handler: async (ctx, args) => {
|
|
31
|
+
// return Promise.all(
|
|
32
|
+
// args.vectors.map(async (vector) =>
|
|
33
|
+
// insertEmbedding(
|
|
34
|
+
// ctx,
|
|
35
|
+
// vector.vector,
|
|
36
|
+
// vector.namespace,
|
|
37
|
+
// vector.importance,
|
|
38
|
+
// vector.filters
|
|
39
|
+
// )
|
|
40
|
+
// )
|
|
41
|
+
// );
|
|
42
|
+
// },
|
|
43
|
+
// });
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
export async function insertEmbedding(
|
|
47
|
+
ctx: MutationCtx,
|
|
48
|
+
embedding: number[],
|
|
49
|
+
namespaceId: Id<"namespaces">,
|
|
50
|
+
importance: number | undefined,
|
|
51
|
+
filters: NumberedFilter | undefined
|
|
52
|
+
) {
|
|
53
|
+
const filterFields = filterFieldsFromNumbers(namespaceId, filters);
|
|
54
|
+
const dimension = validateVectorDimension(embedding.length);
|
|
55
|
+
return ctx.db.insert(getVectorTableName(dimension), {
|
|
56
|
+
namespaceId,
|
|
57
|
+
vector: vectorWithImportance(embedding, importance ?? 1),
|
|
58
|
+
...filterFields,
|
|
59
|
+
});
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
export async function searchEmbeddings(
|
|
63
|
+
ctx: ActionCtx,
|
|
64
|
+
{
|
|
65
|
+
embedding,
|
|
66
|
+
namespaceId,
|
|
67
|
+
filters,
|
|
68
|
+
limit,
|
|
69
|
+
}: {
|
|
70
|
+
embedding: number[];
|
|
71
|
+
namespaceId: Id<"namespaces">;
|
|
72
|
+
// NOTE: Current vector search supports OR logic between filters
|
|
73
|
+
// Multiple filters will match if ANY condition is met
|
|
74
|
+
// e.g. [{3: filter3}, {1: filter1}, {2: filter2}] will match if any of
|
|
75
|
+
// filter3, filter1, or filter2 is present.
|
|
76
|
+
filters: Array<NumberedFilter>;
|
|
77
|
+
limit: number;
|
|
78
|
+
}
|
|
79
|
+
) {
|
|
80
|
+
const dimension = validateVectorDimension(embedding.length);
|
|
81
|
+
const tableName = getVectorTableName(dimension);
|
|
82
|
+
const orFilters = filters.flatMap((filter) =>
|
|
83
|
+
filterFieldsFromNumbers(namespaceId, filter)
|
|
84
|
+
);
|
|
85
|
+
return ctx.vectorSearch(tableName, "vector", {
|
|
86
|
+
vector: searchVector(embedding),
|
|
87
|
+
filter: (q) =>
|
|
88
|
+
orFilters.length === 0
|
|
89
|
+
? q.eq("namespaceId", namespaceId)
|
|
90
|
+
: q.or(
|
|
91
|
+
...orFilters.flatMap((namedFilter) =>
|
|
92
|
+
Object.entries(namedFilter).map(([filterField, filter]) =>
|
|
93
|
+
q.eq(filterField as keyof (typeof orFilters)[number], filter)
|
|
94
|
+
)
|
|
95
|
+
)
|
|
96
|
+
),
|
|
97
|
+
limit,
|
|
98
|
+
});
|
|
99
|
+
}
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
import { literals } from "convex-helpers/validators";
|
|
2
|
+
import {
|
|
3
|
+
defineTable,
|
|
4
|
+
type GenericTableSearchIndexes,
|
|
5
|
+
type SchemaDefinition,
|
|
6
|
+
type TableDefinition,
|
|
7
|
+
} from "convex/server";
|
|
8
|
+
import {
|
|
9
|
+
type GenericId,
|
|
10
|
+
type ObjectType,
|
|
11
|
+
v,
|
|
12
|
+
type VId,
|
|
13
|
+
type VObject,
|
|
14
|
+
type VUnion,
|
|
15
|
+
} from "convex/values";
|
|
16
|
+
import { vectorWithImportanceDimension } from "./importance.js";
|
|
17
|
+
import { allFilterFieldNames, vAllFilterFields } from "../filters.js";
|
|
18
|
+
|
|
19
|
+
// We only generate embeddings for non-tool, non-system messages
|
|
20
|
+
const embeddingsFields = {
|
|
21
|
+
vector: v.array(v.number()),
|
|
22
|
+
...vAllFilterFields,
|
|
23
|
+
};
|
|
24
|
+
|
|
25
|
+
function table(dimensions: VectorDimension): Table {
|
|
26
|
+
return defineTable(embeddingsFields).vectorIndex("vector", {
|
|
27
|
+
vectorField: "vector",
|
|
28
|
+
dimensions: vectorWithImportanceDimension(dimensions),
|
|
29
|
+
filterFields: allFilterFieldNames,
|
|
30
|
+
});
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
type Table = TableDefinition<
|
|
34
|
+
VObject<ObjectType<typeof embeddingsFields>, typeof embeddingsFields>,
|
|
35
|
+
{ model_table_threadId: ["model", "table", "threadId", "_creationTime"] },
|
|
36
|
+
GenericTableSearchIndexes,
|
|
37
|
+
VectorIndex
|
|
38
|
+
>;
|
|
39
|
+
|
|
40
|
+
type VectorIndex = {
|
|
41
|
+
vector: {
|
|
42
|
+
vectorField: "vector";
|
|
43
|
+
dimensions: number;
|
|
44
|
+
filterFields: string;
|
|
45
|
+
};
|
|
46
|
+
};
|
|
47
|
+
|
|
48
|
+
export type VectorSchema = SchemaDefinition<
|
|
49
|
+
{ [key in VectorTableName]: Table },
|
|
50
|
+
true
|
|
51
|
+
>;
|
|
52
|
+
|
|
53
|
+
export const VectorDimensions = [
|
|
54
|
+
128, 256, 512, 768, 1024, 1408, 1536, 2048, 3072, 4096,
|
|
55
|
+
] as const;
|
|
56
|
+
|
|
57
|
+
export function assertVectorDimension(
|
|
58
|
+
dimension: number
|
|
59
|
+
): asserts dimension is VectorDimension {
|
|
60
|
+
if (!VectorDimensions.includes(dimension as VectorDimension)) {
|
|
61
|
+
throw new Error(
|
|
62
|
+
`Unsupported vector dimension${dimension}. Supported: ${VectorDimensions.join(", ")}`
|
|
63
|
+
);
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
export function validateVectorDimension(dimension: number): VectorDimension {
|
|
68
|
+
if (!VectorDimensions.includes(dimension as VectorDimension)) {
|
|
69
|
+
throw new Error(
|
|
70
|
+
`Unsupported vector dimension${dimension}. Supported: ${VectorDimensions.join(", ")}`
|
|
71
|
+
);
|
|
72
|
+
}
|
|
73
|
+
return dimension as VectorDimension;
|
|
74
|
+
}
|
|
75
|
+
export type VectorDimension = (typeof VectorDimensions)[number];
|
|
76
|
+
export const VectorTableNames = VectorDimensions.map(
|
|
77
|
+
(d) => `vectors_${d}`
|
|
78
|
+
) as `vectors_${(typeof VectorDimensions)[number]}`[];
|
|
79
|
+
export type VectorTableName = (typeof VectorTableNames)[number];
|
|
80
|
+
export type VectorTableId = GenericId<(typeof VectorTableNames)[number]>;
|
|
81
|
+
|
|
82
|
+
export const vVectorDimension = literals(...VectorDimensions);
|
|
83
|
+
export const vVectorTableName = literals(...VectorTableNames);
|
|
84
|
+
export const vVectorId = v.union(
|
|
85
|
+
...VectorTableNames.map((name) => v.id(name))
|
|
86
|
+
) as VUnion<
|
|
87
|
+
GenericId<(typeof VectorTableNames)[number]>,
|
|
88
|
+
VId<(typeof VectorTableNames)[number]>[]
|
|
89
|
+
>;
|
|
90
|
+
|
|
91
|
+
export function getVectorTableName(dimension: VectorDimension) {
|
|
92
|
+
return `vectors_${dimension}` as VectorTableName;
|
|
93
|
+
}
|
|
94
|
+
// export function getVectorIdInfo(ctx: QueryCtx, id: VectorTableId) {
|
|
95
|
+
// for (const dimension of VectorDimensions) {
|
|
96
|
+
// const tableName = getVectorTableName(dimension);
|
|
97
|
+
// if (ctx.db.normalizeId(tableName, id)) {
|
|
98
|
+
// return { tableName, dimension };
|
|
99
|
+
// }
|
|
100
|
+
// }
|
|
101
|
+
// throw new Error(`Unknown vector table id: ${id}`);
|
|
102
|
+
// }
|
|
103
|
+
|
|
104
|
+
const tables: {
|
|
105
|
+
[K in keyof typeof VectorDimensions &
|
|
106
|
+
number as `vectors_${(typeof VectorDimensions)[K]}`]: Table;
|
|
107
|
+
} = Object.fromEntries(
|
|
108
|
+
VectorDimensions.map((dimensions) => [
|
|
109
|
+
`vectors_${dimensions}`,
|
|
110
|
+
table(dimensions),
|
|
111
|
+
])
|
|
112
|
+
) as Record<`vectors_${(typeof VectorDimensions)[number]}`, Table>;
|
|
113
|
+
|
|
114
|
+
export default tables;
|