@convex-dev/rag 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (113) hide show
  1. package/LICENSE +201 -0
  2. package/README.md +371 -0
  3. package/dist/client/_generated/_ignore.d.ts +1 -0
  4. package/dist/client/_generated/_ignore.d.ts.map +1 -0
  5. package/dist/client/_generated/_ignore.js +3 -0
  6. package/dist/client/_generated/_ignore.js.map +1 -0
  7. package/dist/client/defaultChunker.d.ts +15 -0
  8. package/dist/client/defaultChunker.d.ts.map +1 -0
  9. package/dist/client/defaultChunker.js +148 -0
  10. package/dist/client/defaultChunker.js.map +1 -0
  11. package/dist/client/fileUtils.d.ts +24 -0
  12. package/dist/client/fileUtils.d.ts.map +1 -0
  13. package/dist/client/fileUtils.js +179 -0
  14. package/dist/client/fileUtils.js.map +1 -0
  15. package/dist/client/index.d.ts +442 -0
  16. package/dist/client/index.d.ts.map +1 -0
  17. package/dist/client/index.js +597 -0
  18. package/dist/client/index.js.map +1 -0
  19. package/dist/client/types.d.ts +29 -0
  20. package/dist/client/types.d.ts.map +1 -0
  21. package/dist/client/types.js +2 -0
  22. package/dist/client/types.js.map +1 -0
  23. package/dist/component/_generated/api.d.ts +439 -0
  24. package/dist/component/_generated/api.d.ts.map +1 -0
  25. package/dist/component/_generated/api.js +22 -0
  26. package/dist/component/_generated/api.js.map +1 -0
  27. package/dist/component/_generated/dataModel.d.ts +60 -0
  28. package/dist/component/_generated/server.d.ts +149 -0
  29. package/dist/component/_generated/server.d.ts.map +1 -0
  30. package/dist/component/_generated/server.js +74 -0
  31. package/dist/component/_generated/server.js.map +1 -0
  32. package/dist/component/chunks.d.ts +139 -0
  33. package/dist/component/chunks.d.ts.map +1 -0
  34. package/dist/component/chunks.js +413 -0
  35. package/dist/component/chunks.js.map +1 -0
  36. package/dist/component/convex.config.d.ts +3 -0
  37. package/dist/component/convex.config.d.ts.map +1 -0
  38. package/dist/component/convex.config.js +6 -0
  39. package/dist/component/convex.config.js.map +1 -0
  40. package/dist/component/embeddings/importance.d.ts +21 -0
  41. package/dist/component/embeddings/importance.d.ts.map +1 -0
  42. package/dist/component/embeddings/importance.js +67 -0
  43. package/dist/component/embeddings/importance.js.map +1 -0
  44. package/dist/component/embeddings/index.d.ts +23 -0
  45. package/dist/component/embeddings/index.d.ts.map +1 -0
  46. package/dist/component/embeddings/index.js +54 -0
  47. package/dist/component/embeddings/index.js.map +1 -0
  48. package/dist/component/embeddings/tables.d.ts +39 -0
  49. package/dist/component/embeddings/tables.d.ts.map +1 -0
  50. package/dist/component/embeddings/tables.js +53 -0
  51. package/dist/component/embeddings/tables.js.map +1 -0
  52. package/dist/component/entries.d.ts +167 -0
  53. package/dist/component/entries.d.ts.map +1 -0
  54. package/dist/component/entries.js +409 -0
  55. package/dist/component/entries.js.map +1 -0
  56. package/dist/component/filters.d.ts +46 -0
  57. package/dist/component/filters.d.ts.map +1 -0
  58. package/dist/component/filters.js +72 -0
  59. package/dist/component/filters.js.map +1 -0
  60. package/dist/component/namespaces.d.ts +131 -0
  61. package/dist/component/namespaces.d.ts.map +1 -0
  62. package/dist/component/namespaces.js +222 -0
  63. package/dist/component/namespaces.js.map +1 -0
  64. package/dist/component/schema.d.ts +1697 -0
  65. package/dist/component/schema.d.ts.map +1 -0
  66. package/dist/component/schema.js +88 -0
  67. package/dist/component/schema.js.map +1 -0
  68. package/dist/component/search.d.ts +20 -0
  69. package/dist/component/search.d.ts.map +1 -0
  70. package/dist/component/search.js +69 -0
  71. package/dist/component/search.js.map +1 -0
  72. package/dist/package.json +3 -0
  73. package/dist/react/index.d.ts +2 -0
  74. package/dist/react/index.d.ts.map +1 -0
  75. package/dist/react/index.js +6 -0
  76. package/dist/react/index.js.map +1 -0
  77. package/dist/shared.d.ts +479 -0
  78. package/dist/shared.d.ts.map +1 -0
  79. package/dist/shared.js +98 -0
  80. package/dist/shared.js.map +1 -0
  81. package/package.json +97 -0
  82. package/src/client/_generated/_ignore.ts +1 -0
  83. package/src/client/defaultChunker.test.ts +243 -0
  84. package/src/client/defaultChunker.ts +183 -0
  85. package/src/client/fileUtils.ts +179 -0
  86. package/src/client/index.test.ts +475 -0
  87. package/src/client/index.ts +1125 -0
  88. package/src/client/setup.test.ts +28 -0
  89. package/src/client/types.ts +69 -0
  90. package/src/component/_generated/api.d.ts +439 -0
  91. package/src/component/_generated/api.js +23 -0
  92. package/src/component/_generated/dataModel.d.ts +60 -0
  93. package/src/component/_generated/server.d.ts +149 -0
  94. package/src/component/_generated/server.js +90 -0
  95. package/src/component/chunks.test.ts +915 -0
  96. package/src/component/chunks.ts +555 -0
  97. package/src/component/convex.config.ts +7 -0
  98. package/src/component/embeddings/importance.test.ts +249 -0
  99. package/src/component/embeddings/importance.ts +75 -0
  100. package/src/component/embeddings/index.test.ts +482 -0
  101. package/src/component/embeddings/index.ts +99 -0
  102. package/src/component/embeddings/tables.ts +114 -0
  103. package/src/component/entries.test.ts +341 -0
  104. package/src/component/entries.ts +546 -0
  105. package/src/component/filters.ts +119 -0
  106. package/src/component/namespaces.ts +299 -0
  107. package/src/component/schema.ts +106 -0
  108. package/src/component/search.test.ts +445 -0
  109. package/src/component/search.ts +97 -0
  110. package/src/component/setup.test.ts +5 -0
  111. package/src/react/index.ts +7 -0
  112. package/src/shared.ts +247 -0
  113. package/src/vitest.config.ts +7 -0
@@ -0,0 +1,482 @@
1
+ /// <reference types="vite/client" />
2
+
3
+ import { describe, expect, test } from "vitest";
4
+ import { convexTest } from "convex-test";
5
+ import schema, { v } from "../schema.js";
6
+ import { modules } from "../setup.test.js";
7
+ import { insertEmbedding, searchEmbeddings } from "./index.js";
8
+ import { vectorWithImportanceDimension } from "./importance.js";
9
+ import { action } from "../_generated/server.js";
10
+ import { anyApi, type ApiFromModules } from "convex/server";
11
+
12
+ export const search = action({
13
+ args: {
14
+ embedding: v.array(v.number()),
15
+ namespaceId: v.id("namespaces"),
16
+ filters: v.array(v.any()),
17
+ limit: v.number(),
18
+ },
19
+ handler: async (ctx, args) => {
20
+ return searchEmbeddings(ctx, args);
21
+ },
22
+ });
23
+
24
+ const testApi: ApiFromModules<{
25
+ fns: {
26
+ search: typeof search;
27
+ };
28
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
29
+ }>["fns"] = anyApi["embeddings"]["index.test"] as any;
30
+
31
+ describe("embeddings", () => {
32
+ test("insertEmbedding with no filters or importance works", async () => {
33
+ const t = convexTest(schema, modules);
34
+
35
+ // Create a namespace first
36
+ const namespaceId = await t.run(async (ctx) => {
37
+ return ctx.db.insert("namespaces", {
38
+ namespace: "test-namespace",
39
+ version: 1,
40
+ modelId: "test-model",
41
+ dimension: 128,
42
+ filterNames: [],
43
+ status: { kind: "ready" },
44
+ });
45
+ });
46
+
47
+ // Create a simple 128-dimension embedding
48
+ const embedding = Array(128).fill(0.1);
49
+
50
+ // Insert embedding without filters or importance
51
+ const vectorId = await t.run(async (ctx) => {
52
+ return insertEmbedding(ctx, embedding, namespaceId, undefined, undefined);
53
+ });
54
+
55
+ expect(vectorId).toBeDefined();
56
+
57
+ // Verify the vector was inserted correctly
58
+ const insertedVector = await t.run(async (ctx) => {
59
+ return ctx.db.get(vectorId);
60
+ });
61
+
62
+ expect(insertedVector).toBeDefined();
63
+ expect(insertedVector!.namespaceId).toBe(namespaceId);
64
+ expect(insertedVector!.vector).toHaveLength(
65
+ vectorWithImportanceDimension(128)
66
+ );
67
+ expect(insertedVector!.filter0).toBeUndefined();
68
+ expect(insertedVector!.filter1).toBeUndefined();
69
+ expect(insertedVector!.filter2).toBeUndefined();
70
+ expect(insertedVector!.filter3).toBeUndefined();
71
+ });
72
+
73
+ test("insertEmbedding with importance modifies the vector", async () => {
74
+ const t = convexTest(schema, modules);
75
+
76
+ const namespaceId = await t.run(async (ctx) => {
77
+ return ctx.db.insert("namespaces", {
78
+ namespace: "test-namespace-importance",
79
+ version: 1,
80
+ modelId: "test-model",
81
+ dimension: 128,
82
+ filterNames: [],
83
+ status: { kind: "ready" },
84
+ });
85
+ });
86
+
87
+ const embedding = Array(128).fill(0.1);
88
+ const importance = 0.5;
89
+
90
+ // Insert embedding with importance
91
+ const vectorId = await t.run(async (ctx) => {
92
+ return insertEmbedding(
93
+ ctx,
94
+ embedding,
95
+ namespaceId,
96
+ importance,
97
+ undefined
98
+ );
99
+ });
100
+
101
+ const insertedVector = await t.run(async (ctx) => {
102
+ return ctx.db.get(vectorId);
103
+ });
104
+
105
+ expect(insertedVector).toBeDefined();
106
+ expect(insertedVector!.vector).toHaveLength(129);
107
+
108
+ // The importance should affect the vector - it should not be the same as without importance
109
+ const vectorWithoutImportance = await t.run(async (ctx) => {
110
+ return insertEmbedding(ctx, embedding, namespaceId, undefined, undefined);
111
+ });
112
+
113
+ const vectorWithoutImportanceData = await t.run(async (ctx) => {
114
+ return ctx.db.get(vectorWithoutImportance);
115
+ });
116
+
117
+ // Vectors should be different due to importance scaling
118
+ expect(insertedVector!.vector).not.toEqual(
119
+ vectorWithoutImportanceData!.vector
120
+ );
121
+
122
+ // The last element should be the weight: sqrt(1 - importance^2)
123
+ const expectedWeight = Math.sqrt(1 - importance ** 2);
124
+ expect(insertedVector!.vector[128]).toBeCloseTo(expectedWeight, 5);
125
+ });
126
+
127
+ test("search for vectors sorted by importance when identical otherwise", async () => {
128
+ const t = convexTest(schema, modules);
129
+
130
+ const namespaceId = await t.run(async (ctx) => {
131
+ return ctx.db.insert("namespaces", {
132
+ namespace: "importance-sort-test",
133
+ version: 1,
134
+ modelId: "test-model",
135
+ dimension: 128,
136
+ filterNames: [],
137
+ status: { kind: "ready" },
138
+ });
139
+ });
140
+
141
+ const embedding = Array(128).fill(0.1);
142
+
143
+ // Insert same embedding with different importance levels
144
+ await t.run(async (ctx) => {
145
+ await insertEmbedding(ctx, embedding, namespaceId, 0.2, undefined); // Low importance
146
+ await insertEmbedding(ctx, embedding, namespaceId, 0.8, undefined); // High importance
147
+ await insertEmbedding(ctx, embedding, namespaceId, 0.5, undefined); // Medium importance
148
+ });
149
+
150
+ // Search for the vectors
151
+ const results = await t.action(testApi.search, {
152
+ embedding,
153
+ namespaceId,
154
+ filters: [],
155
+ limit: 10,
156
+ });
157
+
158
+ expect(results).toHaveLength(3);
159
+
160
+ // Results should be sorted by similarity (which correlates with importance)
161
+ // Higher importance vectors should have higher similarity scores
162
+ expect(results[0]._score).toBeGreaterThan(results[1]._score);
163
+ expect(results[1]._score).toBeGreaterThan(results[2]._score);
164
+ });
165
+
166
+ test("filters are added to the correct field", async () => {
167
+ const t = convexTest(schema, modules);
168
+
169
+ const namespaceId = await t.run(async (ctx) => {
170
+ return ctx.db.insert("namespaces", {
171
+ namespace: "filter-test",
172
+ version: 1,
173
+ modelId: "test-model",
174
+ dimension: 128,
175
+ filterNames: ["category", "priority", "status", "author"],
176
+ status: { kind: "ready" },
177
+ });
178
+ });
179
+
180
+ const embedding = Array(128).fill(0.1);
181
+
182
+ // Insert embedding with filter on position 0
183
+ const vectorId0 = await t.run(async (ctx) => {
184
+ return insertEmbedding(ctx, embedding, namespaceId, undefined, {
185
+ 0: "entries",
186
+ });
187
+ });
188
+
189
+ // Insert embedding with filter on position 2
190
+ const vectorId2 = await t.run(async (ctx) => {
191
+ return insertEmbedding(ctx, embedding, namespaceId, undefined, {
192
+ 2: "active",
193
+ });
194
+ });
195
+
196
+ // Verify filters are in correct fields
197
+ const vector0 = await t.run(async (ctx) => ctx.db.get(vectorId0));
198
+ const vector2 = await t.run(async (ctx) => ctx.db.get(vectorId2));
199
+
200
+ expect(vector0!.filter0).toEqual([namespaceId, "entries"]);
201
+ expect(vector0!.filter1).toBeUndefined();
202
+ expect(vector0!.filter2).toBeUndefined();
203
+ expect(vector0!.filter3).toBeUndefined();
204
+
205
+ expect(vector2!.filter0).toBeUndefined();
206
+ expect(vector2!.filter1).toBeUndefined();
207
+ expect(vector2!.filter2).toEqual([namespaceId, "active"]);
208
+ expect(vector2!.filter3).toBeUndefined();
209
+ });
210
+
211
+ test("embeddings have namespace prefixed on filter fields", async () => {
212
+ const t = convexTest(schema, modules);
213
+
214
+ const namespace1Id = await t.run(async (ctx) => {
215
+ return ctx.db.insert("namespaces", {
216
+ namespace: "namespace1",
217
+ version: 1,
218
+ modelId: "test-model",
219
+ dimension: 128,
220
+ filterNames: ["type"],
221
+ status: { kind: "ready" },
222
+ });
223
+ });
224
+
225
+ const namespace2Id = await t.run(async (ctx) => {
226
+ return ctx.db.insert("namespaces", {
227
+ namespace: "namespace2",
228
+ version: 1,
229
+ modelId: "test-model",
230
+ dimension: 128,
231
+ filterNames: ["type"],
232
+ status: { kind: "ready" },
233
+ });
234
+ });
235
+
236
+ const embedding = Array(128).fill(0.1);
237
+
238
+ // Insert same filter value in different namespaces
239
+ const vector1Id = await t.run(async (ctx) => {
240
+ return insertEmbedding(ctx, embedding, namespace1Id, undefined, {
241
+ 0: "article",
242
+ });
243
+ });
244
+
245
+ const vector2Id = await t.run(async (ctx) => {
246
+ return insertEmbedding(ctx, embedding, namespace2Id, undefined, {
247
+ 0: "article",
248
+ });
249
+ });
250
+
251
+ const vector1 = await t.run(async (ctx) => ctx.db.get(vector1Id));
252
+ const vector2 = await t.run(async (ctx) => ctx.db.get(vector2Id));
253
+
254
+ // Both have the same filter value but different namespace prefixes
255
+ expect(vector1!.filter0).toEqual([namespace1Id, "article"]);
256
+ expect(vector2!.filter0).toEqual([namespace2Id, "article"]);
257
+ expect(vector1!.filter0).not.toEqual(vector2!.filter0);
258
+ });
259
+
260
+ test("search without filters returns only vectors in the target namespace", async () => {
261
+ const t = convexTest(schema, modules);
262
+
263
+ const namespace1Id = await t.run(async (ctx) => {
264
+ return ctx.db.insert("namespaces", {
265
+ namespace: "namespace1",
266
+ version: 1,
267
+ modelId: "test-model",
268
+ dimension: 128,
269
+ filterNames: [],
270
+ status: { kind: "ready" },
271
+ });
272
+ });
273
+
274
+ const namespace2Id = await t.run(async (ctx) => {
275
+ return ctx.db.insert("namespaces", {
276
+ namespace: "namespace2",
277
+ version: 1,
278
+ modelId: "test-model",
279
+ dimension: 128,
280
+ filterNames: [],
281
+ status: { kind: "ready" },
282
+ });
283
+ });
284
+
285
+ const embedding = Array(128).fill(0.1);
286
+
287
+ // Insert vectors in both namespaces
288
+ await t.run(async (ctx) => {
289
+ await insertEmbedding(ctx, embedding, namespace1Id, undefined, undefined);
290
+ await insertEmbedding(ctx, embedding, namespace1Id, undefined, undefined);
291
+ await insertEmbedding(ctx, embedding, namespace2Id, undefined, undefined);
292
+ });
293
+
294
+ // Search in namespace1 only
295
+ const results1 = await t.action(testApi.search, {
296
+ embedding,
297
+ namespaceId: namespace1Id,
298
+ filters: [],
299
+ limit: 10,
300
+ });
301
+
302
+ // Search in namespace2 only
303
+ const results2 = await t.action(testApi.search, {
304
+ embedding,
305
+ namespaceId: namespace2Id,
306
+ filters: [],
307
+ limit: 10,
308
+ });
309
+
310
+ expect(results1).toHaveLength(2);
311
+ expect(results2).toHaveLength(1);
312
+
313
+ // All results should be from the correct namespace
314
+ for (const result of results1) {
315
+ const vector = await t.run(async (ctx) => ctx.db.get(result._id));
316
+ expect(vector!.namespaceId).toBe(namespace1Id);
317
+ }
318
+
319
+ for (const result of results2) {
320
+ const vector = await t.run(async (ctx) => ctx.db.get(result._id));
321
+ expect(vector!.namespaceId).toBe(namespace2Id);
322
+ }
323
+ });
324
+
325
+ test("search with filters returns only matching vectors in namespace", async () => {
326
+ const t = convexTest(schema, modules);
327
+
328
+ const namespaceId = await t.run(async (ctx) => {
329
+ return ctx.db.insert("namespaces", {
330
+ namespace: "filtered-search",
331
+ version: 1,
332
+ modelId: "test-model",
333
+ dimension: 128,
334
+ filterNames: ["category", "status"],
335
+ status: { kind: "ready" },
336
+ });
337
+ });
338
+
339
+ const embedding = Array(128).fill(0.1);
340
+
341
+ // Insert vectors with different filter combinations
342
+ await t.run(async (ctx) => {
343
+ await insertEmbedding(ctx, embedding, namespaceId, undefined, {
344
+ 0: "articles",
345
+ });
346
+ await insertEmbedding(ctx, embedding, namespaceId, undefined, {
347
+ 0: "blogs",
348
+ });
349
+ await insertEmbedding(ctx, embedding, namespaceId, undefined, {
350
+ 1: "published",
351
+ });
352
+ await insertEmbedding(ctx, embedding, namespaceId, undefined, {
353
+ 0: "articles",
354
+ 1: "draft",
355
+ });
356
+ await insertEmbedding(ctx, embedding, namespaceId, undefined, undefined); // No filters
357
+ });
358
+
359
+ // Search for articles only
360
+ const articlesResults = await t.action(testApi.search, {
361
+ embedding,
362
+ namespaceId,
363
+ filters: [{ 0: "articles" }],
364
+ limit: 10,
365
+ });
366
+
367
+ expect(articlesResults).toHaveLength(2); // Two vectors with category "articles"
368
+
369
+ // Search for published status only
370
+ const publishedResults = await t.action(testApi.search, {
371
+ embedding,
372
+ namespaceId,
373
+ filters: [{ 1: "published" }],
374
+ limit: 10,
375
+ });
376
+
377
+ expect(publishedResults).toHaveLength(1); // One vector with status "published"
378
+ });
379
+
380
+ test("multiple filters perform OR operation", async () => {
381
+ const t = convexTest(schema, modules);
382
+
383
+ const namespaceId = await t.run(async (ctx) => {
384
+ return ctx.db.insert("namespaces", {
385
+ namespace: "multi-filter-or",
386
+ version: 1,
387
+ modelId: "test-model",
388
+ dimension: 128,
389
+ filterNames: ["category", "priority"],
390
+ status: { kind: "ready" },
391
+ });
392
+ });
393
+
394
+ const embedding = Array(128).fill(0.1);
395
+
396
+ // Insert vectors with different filter values
397
+ await t.run(async (ctx) => {
398
+ await insertEmbedding(ctx, embedding, namespaceId, undefined, {
399
+ 0: "articles",
400
+ });
401
+ await insertEmbedding(ctx, embedding, namespaceId, undefined, {
402
+ 0: "blogs",
403
+ });
404
+ await insertEmbedding(ctx, embedding, namespaceId, undefined, {
405
+ 1: "high",
406
+ });
407
+ await insertEmbedding(ctx, embedding, namespaceId, undefined, {
408
+ 1: "low",
409
+ });
410
+ await insertEmbedding(ctx, embedding, namespaceId, undefined, undefined); // No filters
411
+ });
412
+
413
+ // Search with OR filters: articles OR high priority
414
+ const orResults = await t.action(testApi.search, {
415
+ embedding,
416
+ namespaceId,
417
+ filters: [
418
+ { 0: "articles" }, // category = articles
419
+ { 1: "high" }, // OR priority = high
420
+ ],
421
+ limit: 10,
422
+ });
423
+
424
+ expect(orResults).toHaveLength(2); // Should match both "articles" and "high priority" vectors
425
+
426
+ // Verify the results contain the expected filters
427
+ const vectorIds = orResults.map((r) => r._id);
428
+ const vectors = await t.run(async (ctx) => {
429
+ return Promise.all(vectorIds.map((id) => ctx.db.get(id)));
430
+ });
431
+
432
+ const hasArticles = vectors.some((v) => v!.filter0?.[1] === "articles");
433
+ const hasHighPriority = vectors.some((v) => v!.filter1?.[1] === "high");
434
+
435
+ expect(hasArticles).toBe(true);
436
+ expect(hasHighPriority).toBe(true);
437
+ });
438
+
439
+ test("searchEmbeddings", async () => {
440
+ const t = convexTest(schema, modules);
441
+
442
+ const namespaceId = await t.run(async (ctx) => {
443
+ return ctx.db.insert("namespaces", {
444
+ namespace: "search-test",
445
+ version: 1,
446
+ modelId: "test-model",
447
+ dimension: 128,
448
+ filterNames: [],
449
+ status: { kind: "ready" },
450
+ });
451
+ });
452
+
453
+ const embedding1 = Array(128).fill(0.1);
454
+ embedding1[0] = 1;
455
+ const embedding2 = Array(128).fill(0.1);
456
+ embedding2[0] = 0;
457
+ const searchEmbedding = Array(128).fill(0.1);
458
+ searchEmbedding[0] = 0.8; // Closer to embedding1
459
+
460
+ // Insert two different embeddings
461
+ await t.run(async (ctx) => {
462
+ await insertEmbedding(ctx, embedding1, namespaceId, undefined, undefined);
463
+ await insertEmbedding(ctx, embedding2, namespaceId, undefined, undefined);
464
+ });
465
+
466
+ // Search should return results ordered by similarity
467
+ const results = await t.action(testApi.search, {
468
+ embedding: searchEmbedding,
469
+ namespaceId,
470
+ filters: [],
471
+ limit: 10,
472
+ });
473
+
474
+ expect(results).toHaveLength(2);
475
+ expect(results[0]._score).toBeGreaterThan(results[1]._score);
476
+
477
+ // The first result should be more similar to embedding1 (0.1) than embedding2 (0.2)
478
+ // since searchEmbedding (0.15) is closer to 0.1
479
+ const firstVector = await t.run(async (ctx) => ctx.db.get(results[0]._id));
480
+ expect(firstVector).toBeDefined();
481
+ });
482
+ });
@@ -0,0 +1,99 @@
1
+ /**
2
+ * This file is the interface for interacting with vectors.
3
+ * It translates from embeddings to the underlying vector storage and search.
4
+ * It modifies embeddings to include importance.
5
+ * The outer world deals with filters with user names.
6
+ * The underlying vector storage has its own names.
7
+ * This file takes in numbered filters (0-3) to translate without knowing about
8
+ * user names.
9
+ */
10
+ import type { Id } from "../_generated/dataModel.js";
11
+ import { type ActionCtx, type MutationCtx } from "../_generated/server.js";
12
+ import { getVectorTableName, validateVectorDimension } from "./tables.js";
13
+ import { filterFieldsFromNumbers, type NumberedFilter } from "../filters.js";
14
+ import { searchVector, vectorWithImportance } from "./importance.js";
15
+
16
+ // TODO: wait to see if this is needed.
17
+ // export const insertBatch = mutation({
18
+ // args: {
19
+ // vectorDimension: vVectorDimension,
20
+ // vectors: v.array(
21
+ // v.object({
22
+ // vector: v.array(v.number()),
23
+ // namespace: v.id("namespaces"),
24
+ // importance: v.optional(v.number()),
25
+ // filters: v.optional(v.any()),
26
+ // })
27
+ // ),
28
+ // },
29
+ // returns: v.array(vVectorId),
30
+ // handler: async (ctx, args) => {
31
+ // return Promise.all(
32
+ // args.vectors.map(async (vector) =>
33
+ // insertEmbedding(
34
+ // ctx,
35
+ // vector.vector,
36
+ // vector.namespace,
37
+ // vector.importance,
38
+ // vector.filters
39
+ // )
40
+ // )
41
+ // );
42
+ // },
43
+ // });
44
+
45
+
46
+ export async function insertEmbedding(
47
+ ctx: MutationCtx,
48
+ embedding: number[],
49
+ namespaceId: Id<"namespaces">,
50
+ importance: number | undefined,
51
+ filters: NumberedFilter | undefined
52
+ ) {
53
+ const filterFields = filterFieldsFromNumbers(namespaceId, filters);
54
+ const dimension = validateVectorDimension(embedding.length);
55
+ return ctx.db.insert(getVectorTableName(dimension), {
56
+ namespaceId,
57
+ vector: vectorWithImportance(embedding, importance ?? 1),
58
+ ...filterFields,
59
+ });
60
+ }
61
+
62
+ export async function searchEmbeddings(
63
+ ctx: ActionCtx,
64
+ {
65
+ embedding,
66
+ namespaceId,
67
+ filters,
68
+ limit,
69
+ }: {
70
+ embedding: number[];
71
+ namespaceId: Id<"namespaces">;
72
+ // NOTE: Current vector search supports OR logic between filters
73
+ // Multiple filters will match if ANY condition is met
74
+ // e.g. [{3: filter3}, {1: filter1}, {2: filter2}] will match if any of
75
+ // filter3, filter1, or filter2 is present.
76
+ filters: Array<NumberedFilter>;
77
+ limit: number;
78
+ }
79
+ ) {
80
+ const dimension = validateVectorDimension(embedding.length);
81
+ const tableName = getVectorTableName(dimension);
82
+ const orFilters = filters.flatMap((filter) =>
83
+ filterFieldsFromNumbers(namespaceId, filter)
84
+ );
85
+ return ctx.vectorSearch(tableName, "vector", {
86
+ vector: searchVector(embedding),
87
+ filter: (q) =>
88
+ orFilters.length === 0
89
+ ? q.eq("namespaceId", namespaceId)
90
+ : q.or(
91
+ ...orFilters.flatMap((namedFilter) =>
92
+ Object.entries(namedFilter).map(([filterField, filter]) =>
93
+ q.eq(filterField as keyof (typeof orFilters)[number], filter)
94
+ )
95
+ )
96
+ ),
97
+ limit,
98
+ });
99
+ }
@@ -0,0 +1,114 @@
1
+ import { literals } from "convex-helpers/validators";
2
+ import {
3
+ defineTable,
4
+ type GenericTableSearchIndexes,
5
+ type SchemaDefinition,
6
+ type TableDefinition,
7
+ } from "convex/server";
8
+ import {
9
+ type GenericId,
10
+ type ObjectType,
11
+ v,
12
+ type VId,
13
+ type VObject,
14
+ type VUnion,
15
+ } from "convex/values";
16
+ import { vectorWithImportanceDimension } from "./importance.js";
17
+ import { allFilterFieldNames, vAllFilterFields } from "../filters.js";
18
+
19
+ // We only generate embeddings for non-tool, non-system messages
20
+ const embeddingsFields = {
21
+ vector: v.array(v.number()),
22
+ ...vAllFilterFields,
23
+ };
24
+
25
+ function table(dimensions: VectorDimension): Table {
26
+ return defineTable(embeddingsFields).vectorIndex("vector", {
27
+ vectorField: "vector",
28
+ dimensions: vectorWithImportanceDimension(dimensions),
29
+ filterFields: allFilterFieldNames,
30
+ });
31
+ }
32
+
33
+ type Table = TableDefinition<
34
+ VObject<ObjectType<typeof embeddingsFields>, typeof embeddingsFields>,
35
+ { model_table_threadId: ["model", "table", "threadId", "_creationTime"] },
36
+ GenericTableSearchIndexes,
37
+ VectorIndex
38
+ >;
39
+
40
+ type VectorIndex = {
41
+ vector: {
42
+ vectorField: "vector";
43
+ dimensions: number;
44
+ filterFields: string;
45
+ };
46
+ };
47
+
48
+ export type VectorSchema = SchemaDefinition<
49
+ { [key in VectorTableName]: Table },
50
+ true
51
+ >;
52
+
53
+ export const VectorDimensions = [
54
+ 128, 256, 512, 768, 1024, 1408, 1536, 2048, 3072, 4096,
55
+ ] as const;
56
+
57
+ export function assertVectorDimension(
58
+ dimension: number
59
+ ): asserts dimension is VectorDimension {
60
+ if (!VectorDimensions.includes(dimension as VectorDimension)) {
61
+ throw new Error(
62
+ `Unsupported vector dimension${dimension}. Supported: ${VectorDimensions.join(", ")}`
63
+ );
64
+ }
65
+ }
66
+
67
+ export function validateVectorDimension(dimension: number): VectorDimension {
68
+ if (!VectorDimensions.includes(dimension as VectorDimension)) {
69
+ throw new Error(
70
+ `Unsupported vector dimension${dimension}. Supported: ${VectorDimensions.join(", ")}`
71
+ );
72
+ }
73
+ return dimension as VectorDimension;
74
+ }
75
+ export type VectorDimension = (typeof VectorDimensions)[number];
76
+ export const VectorTableNames = VectorDimensions.map(
77
+ (d) => `vectors_${d}`
78
+ ) as `vectors_${(typeof VectorDimensions)[number]}`[];
79
+ export type VectorTableName = (typeof VectorTableNames)[number];
80
+ export type VectorTableId = GenericId<(typeof VectorTableNames)[number]>;
81
+
82
+ export const vVectorDimension = literals(...VectorDimensions);
83
+ export const vVectorTableName = literals(...VectorTableNames);
84
+ export const vVectorId = v.union(
85
+ ...VectorTableNames.map((name) => v.id(name))
86
+ ) as VUnion<
87
+ GenericId<(typeof VectorTableNames)[number]>,
88
+ VId<(typeof VectorTableNames)[number]>[]
89
+ >;
90
+
91
+ export function getVectorTableName(dimension: VectorDimension) {
92
+ return `vectors_${dimension}` as VectorTableName;
93
+ }
94
+ // export function getVectorIdInfo(ctx: QueryCtx, id: VectorTableId) {
95
+ // for (const dimension of VectorDimensions) {
96
+ // const tableName = getVectorTableName(dimension);
97
+ // if (ctx.db.normalizeId(tableName, id)) {
98
+ // return { tableName, dimension };
99
+ // }
100
+ // }
101
+ // throw new Error(`Unknown vector table id: ${id}`);
102
+ // }
103
+
104
+ const tables: {
105
+ [K in keyof typeof VectorDimensions &
106
+ number as `vectors_${(typeof VectorDimensions)[K]}`]: Table;
107
+ } = Object.fromEntries(
108
+ VectorDimensions.map((dimensions) => [
109
+ `vectors_${dimensions}`,
110
+ table(dimensions),
111
+ ])
112
+ ) as Record<`vectors_${(typeof VectorDimensions)[number]}`, Table>;
113
+
114
+ export default tables;