@convex-dev/rag 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (113) hide show
  1. package/LICENSE +201 -0
  2. package/README.md +371 -0
  3. package/dist/client/_generated/_ignore.d.ts +1 -0
  4. package/dist/client/_generated/_ignore.d.ts.map +1 -0
  5. package/dist/client/_generated/_ignore.js +3 -0
  6. package/dist/client/_generated/_ignore.js.map +1 -0
  7. package/dist/client/defaultChunker.d.ts +15 -0
  8. package/dist/client/defaultChunker.d.ts.map +1 -0
  9. package/dist/client/defaultChunker.js +148 -0
  10. package/dist/client/defaultChunker.js.map +1 -0
  11. package/dist/client/fileUtils.d.ts +24 -0
  12. package/dist/client/fileUtils.d.ts.map +1 -0
  13. package/dist/client/fileUtils.js +179 -0
  14. package/dist/client/fileUtils.js.map +1 -0
  15. package/dist/client/index.d.ts +442 -0
  16. package/dist/client/index.d.ts.map +1 -0
  17. package/dist/client/index.js +597 -0
  18. package/dist/client/index.js.map +1 -0
  19. package/dist/client/types.d.ts +29 -0
  20. package/dist/client/types.d.ts.map +1 -0
  21. package/dist/client/types.js +2 -0
  22. package/dist/client/types.js.map +1 -0
  23. package/dist/component/_generated/api.d.ts +439 -0
  24. package/dist/component/_generated/api.d.ts.map +1 -0
  25. package/dist/component/_generated/api.js +22 -0
  26. package/dist/component/_generated/api.js.map +1 -0
  27. package/dist/component/_generated/dataModel.d.ts +60 -0
  28. package/dist/component/_generated/server.d.ts +149 -0
  29. package/dist/component/_generated/server.d.ts.map +1 -0
  30. package/dist/component/_generated/server.js +74 -0
  31. package/dist/component/_generated/server.js.map +1 -0
  32. package/dist/component/chunks.d.ts +139 -0
  33. package/dist/component/chunks.d.ts.map +1 -0
  34. package/dist/component/chunks.js +413 -0
  35. package/dist/component/chunks.js.map +1 -0
  36. package/dist/component/convex.config.d.ts +3 -0
  37. package/dist/component/convex.config.d.ts.map +1 -0
  38. package/dist/component/convex.config.js +6 -0
  39. package/dist/component/convex.config.js.map +1 -0
  40. package/dist/component/embeddings/importance.d.ts +21 -0
  41. package/dist/component/embeddings/importance.d.ts.map +1 -0
  42. package/dist/component/embeddings/importance.js +67 -0
  43. package/dist/component/embeddings/importance.js.map +1 -0
  44. package/dist/component/embeddings/index.d.ts +23 -0
  45. package/dist/component/embeddings/index.d.ts.map +1 -0
  46. package/dist/component/embeddings/index.js +54 -0
  47. package/dist/component/embeddings/index.js.map +1 -0
  48. package/dist/component/embeddings/tables.d.ts +39 -0
  49. package/dist/component/embeddings/tables.d.ts.map +1 -0
  50. package/dist/component/embeddings/tables.js +53 -0
  51. package/dist/component/embeddings/tables.js.map +1 -0
  52. package/dist/component/entries.d.ts +167 -0
  53. package/dist/component/entries.d.ts.map +1 -0
  54. package/dist/component/entries.js +409 -0
  55. package/dist/component/entries.js.map +1 -0
  56. package/dist/component/filters.d.ts +46 -0
  57. package/dist/component/filters.d.ts.map +1 -0
  58. package/dist/component/filters.js +72 -0
  59. package/dist/component/filters.js.map +1 -0
  60. package/dist/component/namespaces.d.ts +131 -0
  61. package/dist/component/namespaces.d.ts.map +1 -0
  62. package/dist/component/namespaces.js +222 -0
  63. package/dist/component/namespaces.js.map +1 -0
  64. package/dist/component/schema.d.ts +1697 -0
  65. package/dist/component/schema.d.ts.map +1 -0
  66. package/dist/component/schema.js +88 -0
  67. package/dist/component/schema.js.map +1 -0
  68. package/dist/component/search.d.ts +20 -0
  69. package/dist/component/search.d.ts.map +1 -0
  70. package/dist/component/search.js +69 -0
  71. package/dist/component/search.js.map +1 -0
  72. package/dist/package.json +3 -0
  73. package/dist/react/index.d.ts +2 -0
  74. package/dist/react/index.d.ts.map +1 -0
  75. package/dist/react/index.js +6 -0
  76. package/dist/react/index.js.map +1 -0
  77. package/dist/shared.d.ts +479 -0
  78. package/dist/shared.d.ts.map +1 -0
  79. package/dist/shared.js +98 -0
  80. package/dist/shared.js.map +1 -0
  81. package/package.json +97 -0
  82. package/src/client/_generated/_ignore.ts +1 -0
  83. package/src/client/defaultChunker.test.ts +243 -0
  84. package/src/client/defaultChunker.ts +183 -0
  85. package/src/client/fileUtils.ts +179 -0
  86. package/src/client/index.test.ts +475 -0
  87. package/src/client/index.ts +1125 -0
  88. package/src/client/setup.test.ts +28 -0
  89. package/src/client/types.ts +69 -0
  90. package/src/component/_generated/api.d.ts +439 -0
  91. package/src/component/_generated/api.js +23 -0
  92. package/src/component/_generated/dataModel.d.ts +60 -0
  93. package/src/component/_generated/server.d.ts +149 -0
  94. package/src/component/_generated/server.js +90 -0
  95. package/src/component/chunks.test.ts +915 -0
  96. package/src/component/chunks.ts +555 -0
  97. package/src/component/convex.config.ts +7 -0
  98. package/src/component/embeddings/importance.test.ts +249 -0
  99. package/src/component/embeddings/importance.ts +75 -0
  100. package/src/component/embeddings/index.test.ts +482 -0
  101. package/src/component/embeddings/index.ts +99 -0
  102. package/src/component/embeddings/tables.ts +114 -0
  103. package/src/component/entries.test.ts +341 -0
  104. package/src/component/entries.ts +546 -0
  105. package/src/component/filters.ts +119 -0
  106. package/src/component/namespaces.ts +299 -0
  107. package/src/component/schema.ts +106 -0
  108. package/src/component/search.test.ts +445 -0
  109. package/src/component/search.ts +97 -0
  110. package/src/component/setup.test.ts +5 -0
  111. package/src/react/index.ts +7 -0
  112. package/src/shared.ts +247 -0
  113. package/src/vitest.config.ts +7 -0
@@ -0,0 +1,915 @@
1
+ /// <reference types="vite/client" />
2
+
3
+ import { describe, expect, test } from "vitest";
4
+ import { convexTest, type TestConvex } from "convex-test";
5
+ import schema from "./schema.js";
6
+ import { api, internal } from "./_generated/api.js";
7
+ import { modules } from "./setup.test.js";
8
+ import { insertChunks, deleteChunksPage } from "./chunks.js";
9
+ import type { Id } from "./_generated/dataModel.js";
10
+ import { assert } from "convex-helpers";
11
+
12
+ type ConvexTest = TestConvex<typeof schema>;
13
+
14
+ describe("chunks", () => {
15
+ async function setupTestNamespace(t: ConvexTest) {
16
+ return await t.run(async (ctx) => {
17
+ return ctx.db.insert("namespaces", {
18
+ namespace: "test-namespace",
19
+ version: 1,
20
+ modelId: "test-model",
21
+ dimension: 128,
22
+ filterNames: [],
23
+ status: { kind: "ready" },
24
+ });
25
+ });
26
+ }
27
+
28
+ async function setupTestEntry(
29
+ t: ConvexTest,
30
+ namespaceId: Id<"namespaces">,
31
+ key = "test-entry",
32
+ version = 0,
33
+ status: "ready" | "pending" = "ready"
34
+ ) {
35
+ return await t.run(async (ctx) => {
36
+ return ctx.db.insert("entries", {
37
+ namespaceId,
38
+ key,
39
+ version,
40
+ status: { kind: status },
41
+ contentHash: `test-content-hash-${key}-${version}`,
42
+ importance: 0.5,
43
+ filterValues: [],
44
+ });
45
+ });
46
+ }
47
+
48
+ function createTestChunks(count = 3) {
49
+ return Array.from({ length: count }, (_, i) => ({
50
+ content: {
51
+ text: `Test chunk content ${i + 1}`,
52
+ metadata: { index: i },
53
+ },
54
+ embedding: Array(128).fill(0.1 + i * 0.01),
55
+ }));
56
+ }
57
+
58
+ test("inserting chunks when there's no entry throws error", async () => {
59
+ const t = convexTest(schema, modules);
60
+ await setupTestNamespace(t);
61
+
62
+ // Try to insert chunks for a non-existent entry
63
+ const nonExistentDocId = "j57c3xc4x6j3c4x6j3c4x6j3c4x6" as Id<"entries">;
64
+ const chunks = createTestChunks(2);
65
+
66
+ await expect(
67
+ t.run(async (ctx) => {
68
+ return insertChunks(ctx, {
69
+ entryId: nonExistentDocId,
70
+ startOrder: 0,
71
+ chunks,
72
+ });
73
+ })
74
+ ).rejects.toThrow(`Entry ${nonExistentDocId} not found`);
75
+ });
76
+
77
+ test("overwriting chunks with insert works", async () => {
78
+ const t = convexTest(schema, modules);
79
+ const namespaceId = await setupTestNamespace(t);
80
+ const entryId = await setupTestEntry(t, namespaceId);
81
+
82
+ // Insert initial chunks
83
+ const initialChunks = createTestChunks(3);
84
+ await t.run(async (ctx) => {
85
+ return insertChunks(ctx, {
86
+ entryId,
87
+ startOrder: 0,
88
+ chunks: initialChunks,
89
+ });
90
+ });
91
+
92
+ // Verify initial chunks exist
93
+ const initialChunksList = await t.run(async (ctx) => {
94
+ return ctx.db
95
+ .query("chunks")
96
+ .withIndex("entryId_order", (q) => q.eq("entryId", entryId))
97
+ .collect();
98
+ });
99
+ expect(initialChunksList).toHaveLength(3);
100
+
101
+ // Overwrite chunks 1 and 2 with new content
102
+ const overwriteChunks = [
103
+ {
104
+ content: {
105
+ text: "Overwritten chunk 1 content",
106
+ metadata: { overwritten: true, index: 1 },
107
+ },
108
+ embedding: Array(128).fill(0.9),
109
+ },
110
+ {
111
+ content: {
112
+ text: "Overwritten chunk 2 content",
113
+ metadata: { overwritten: true, index: 2 },
114
+ },
115
+ embedding: Array(128).fill(0.8),
116
+ },
117
+ ];
118
+
119
+ await t.run(async (ctx) => {
120
+ return insertChunks(ctx, {
121
+ entryId,
122
+ startOrder: 1,
123
+ chunks: overwriteChunks,
124
+ });
125
+ });
126
+
127
+ // Verify total chunks is still correct (original chunk 0 + 2 overwritten)
128
+ const finalChunksList = await t.run(async (ctx) => {
129
+ return ctx.db
130
+ .query("chunks")
131
+ .withIndex("entryId_order", (q) => q.eq("entryId", entryId))
132
+ .collect();
133
+ });
134
+ expect(finalChunksList).toHaveLength(3);
135
+
136
+ // Verify the overwritten chunks have new content
137
+ const overwrittenChunk1 = finalChunksList.find((c) => c.order === 1);
138
+ const overwrittenChunk2 = finalChunksList.find((c) => c.order === 2);
139
+
140
+ expect(overwrittenChunk1).toBeDefined();
141
+ expect(overwrittenChunk2).toBeDefined();
142
+
143
+ const content1 = await t.run(async (ctx) =>
144
+ ctx.db.get(overwrittenChunk1!.contentId)
145
+ );
146
+ const content2 = await t.run(async (ctx) =>
147
+ ctx.db.get(overwrittenChunk2!.contentId)
148
+ );
149
+
150
+ expect(content1!.text).toBe("Overwritten chunk 1 content");
151
+ expect(content1!.metadata?.overwritten).toBe(true);
152
+ expect(content2!.text).toBe("Overwritten chunk 2 content");
153
+ expect(content2!.metadata?.overwritten).toBe(true);
154
+ });
155
+
156
+ test("when replacing an older version, older one is marked as replaced and only new one shows up in search results", async () => {
157
+ const t = convexTest(schema, modules);
158
+ const namespaceId = await setupTestNamespace(t);
159
+
160
+ // Create version 1 of entry
161
+ const docV1Id = await setupTestEntry(t, namespaceId, "versioned-entry", 1);
162
+
163
+ // Insert chunks in version 1
164
+ const v1Chunks = createTestChunks(2);
165
+ await t.run(async (ctx) => {
166
+ return insertChunks(ctx, {
167
+ entryId: docV1Id,
168
+ startOrder: 0,
169
+ chunks: v1Chunks,
170
+ });
171
+ });
172
+
173
+ // Create version 2 of the same entry
174
+ const docV2Id = await setupTestEntry(
175
+ t,
176
+ namespaceId,
177
+ "versioned-entry",
178
+ 2,
179
+ "pending"
180
+ );
181
+
182
+ // Insert chunks in version 2 (this should mark v1 chunks as replaced)
183
+ const v2Chunks = createTestChunks(2);
184
+ await t.run(async (ctx) => {
185
+ return insertChunks(ctx, {
186
+ entryId: docV2Id,
187
+ startOrder: 0,
188
+ chunks: v2Chunks,
189
+ });
190
+ });
191
+
192
+ // Run replaceChunksPage to actually perform the replacement
193
+ let isDone = false;
194
+ let startOrder = 0;
195
+ while (!isDone) {
196
+ const result = await t.mutation(api.chunks.replaceChunksPage, {
197
+ entryId: docV2Id,
198
+ startOrder,
199
+ });
200
+ isDone = result.status !== "pending";
201
+ startOrder = result.nextStartOrder;
202
+ }
203
+
204
+ // Check that v1 chunks are marked as replaced
205
+ const v1ChunksList = await t.run(async (ctx) => {
206
+ return ctx.db
207
+ .query("chunks")
208
+ .withIndex("entryId_order", (q) => q.eq("entryId", docV1Id))
209
+ .collect();
210
+ });
211
+
212
+ for (const chunk of v1ChunksList) {
213
+ if (chunk.state.kind !== "pending") {
214
+ expect(chunk.state.kind).toBe("replaced");
215
+ }
216
+ }
217
+
218
+ // Check that v2 chunks are ready
219
+ const v2ChunksList = await t.run(async (ctx) => {
220
+ return ctx.db
221
+ .query("chunks")
222
+ .withIndex("entryId_order", (q) => q.eq("entryId", docV2Id))
223
+ .collect();
224
+ });
225
+
226
+ for (const chunk of v2ChunksList) {
227
+ expect(chunk.state.kind).toBe("ready");
228
+ }
229
+ });
230
+
231
+ test("chunks can be created on different entries and fetched separately", async () => {
232
+ const t = convexTest(schema, modules);
233
+ const namespaceId = await setupTestNamespace(t);
234
+
235
+ // Create two entries
236
+ const doc1Id = await setupTestEntry(t, namespaceId, "doc1");
237
+ const doc2Id = await setupTestEntry(t, namespaceId, "doc2");
238
+
239
+ // Insert chunks in both entries
240
+ const doc1Chunks = createTestChunks(5);
241
+ const doc2Chunks = createTestChunks(3);
242
+
243
+ await t.run(async (ctx) => {
244
+ await insertChunks(ctx, {
245
+ entryId: doc1Id,
246
+ startOrder: 0,
247
+ chunks: doc1Chunks,
248
+ });
249
+ return insertChunks(ctx, {
250
+ entryId: doc2Id,
251
+ startOrder: 0,
252
+ chunks: doc2Chunks,
253
+ });
254
+ });
255
+
256
+ // Verify chunks exist in both entries
257
+ const doc1ChunksList = await t.run(async (ctx) => {
258
+ return ctx.db
259
+ .query("chunks")
260
+ .withIndex("entryId_order", (q) => q.eq("entryId", doc1Id))
261
+ .collect();
262
+ });
263
+
264
+ const doc2ChunksList = await t.run(async (ctx) => {
265
+ return ctx.db
266
+ .query("chunks")
267
+ .withIndex("entryId_order", (q) => q.eq("entryId", doc2Id))
268
+ .collect();
269
+ });
270
+
271
+ expect(doc1ChunksList).toHaveLength(5);
272
+ expect(doc2ChunksList).toHaveLength(3);
273
+
274
+ // Verify chunk order and content
275
+ expect(doc1ChunksList[0].order).toBe(0);
276
+ expect(doc1ChunksList[4].order).toBe(4);
277
+ expect(doc2ChunksList[0].order).toBe(0);
278
+ expect(doc2ChunksList[2].order).toBe(2);
279
+
280
+ // Verify chunk content
281
+ const doc1Content0 = await t.run(async (ctx) =>
282
+ ctx.db.get(doc1ChunksList[0].contentId)
283
+ );
284
+ const doc2Content0 = await t.run(async (ctx) =>
285
+ ctx.db.get(doc2ChunksList[0].contentId)
286
+ );
287
+
288
+ expect(doc1Content0!.text).toBe("Test chunk content 1");
289
+ expect(doc2Content0!.text).toBe("Test chunk content 1");
290
+ });
291
+
292
+ test("chunks support zero-range queries", async () => {
293
+ const t = convexTest(schema, modules);
294
+ const namespaceId = await setupTestNamespace(t);
295
+ const entryId = await setupTestEntry(t, namespaceId);
296
+
297
+ // Insert chunks
298
+ const chunks = createTestChunks(5);
299
+ await t.run(async (ctx) => {
300
+ return insertChunks(ctx, {
301
+ entryId,
302
+ startOrder: 0,
303
+ chunks,
304
+ });
305
+ });
306
+
307
+ // Get a single chunk (simulating zero range)
308
+ const singleChunk = await t.run(async (ctx) => {
309
+ return ctx.db
310
+ .query("chunks")
311
+ .withIndex("entryId_order", (q) =>
312
+ q.eq("entryId", entryId).eq("order", 2)
313
+ )
314
+ .first();
315
+ });
316
+
317
+ expect(singleChunk).toBeDefined();
318
+ expect(singleChunk!.order).toBe(2);
319
+
320
+ // Verify content
321
+ const content = await t.run(async (ctx) =>
322
+ ctx.db.get(singleChunk!.contentId)
323
+ );
324
+ expect(content!.text).toBe("Test chunk content 3");
325
+ });
326
+
327
+ test("deleting pages should work", async () => {
328
+ const t = convexTest(schema, modules);
329
+ const namespaceId = await setupTestNamespace(t);
330
+ const entryId = await setupTestEntry(t, namespaceId);
331
+
332
+ // Insert a large number of chunks
333
+ const chunks = createTestChunks(10);
334
+ await t.run(async (ctx) => {
335
+ return insertChunks(ctx, {
336
+ entryId,
337
+ startOrder: 0,
338
+ chunks,
339
+ });
340
+ });
341
+
342
+ // Verify chunks exist
343
+ const initialChunksList = await t.run(async (ctx) => {
344
+ return ctx.db
345
+ .query("chunks")
346
+ .withIndex("entryId_order", (q) => q.eq("entryId", entryId))
347
+ .collect();
348
+ });
349
+ expect(initialChunksList).toHaveLength(10);
350
+
351
+ // Delete chunks starting from order 3
352
+ const deleteResult = await t.run(async (ctx) => {
353
+ return deleteChunksPage(ctx, {
354
+ entryId,
355
+ startOrder: 3,
356
+ });
357
+ });
358
+
359
+ expect(deleteResult.isDone).toBe(true);
360
+
361
+ // Verify only first 3 chunks remain
362
+ const remainingChunksList = await t.run(async (ctx) => {
363
+ return ctx.db
364
+ .query("chunks")
365
+ .withIndex("entryId_order", (q) => q.eq("entryId", entryId))
366
+ .collect();
367
+ });
368
+ expect(remainingChunksList).toHaveLength(3);
369
+
370
+ // Verify the remaining chunks are orders 0, 1, 2
371
+ const orders = remainingChunksList.map((c) => c.order).sort();
372
+ expect(orders).toEqual([0, 1, 2]);
373
+
374
+ // Verify content was also deleted
375
+ const allContent = await t.run(async (ctx) => {
376
+ return ctx.db.query("content").collect();
377
+ });
378
+ // Should have only 3 content records remaining (for the 3 remaining chunks)
379
+ expect(allContent).toHaveLength(3);
380
+ });
381
+
382
+ test("listing chunks returns correct pagination", async () => {
383
+ const t = convexTest(schema, modules);
384
+ const namespaceId = await setupTestNamespace(t);
385
+ const entryId = await setupTestEntry(t, namespaceId);
386
+
387
+ // Insert chunks
388
+ const chunks = createTestChunks(5);
389
+ await t.run(async (ctx) => {
390
+ return insertChunks(ctx, {
391
+ entryId,
392
+ startOrder: 0,
393
+ chunks,
394
+ });
395
+ });
396
+
397
+ // Test listing with pagination
398
+ const result = await t.query(api.chunks.list, {
399
+ entryId,
400
+ paginationOpts: { numItems: 3, cursor: null },
401
+ });
402
+
403
+ expect(result.page).toHaveLength(3);
404
+ expect(result.isDone).toBe(false);
405
+
406
+ // Verify chunk content and order
407
+ expect(result.page[0].order).toBe(0);
408
+ expect(result.page[0].text).toBe("Test chunk content 1");
409
+ expect(result.page[0].state).toBe("ready");
410
+
411
+ expect(result.page[1].order).toBe(1);
412
+ expect(result.page[1].text).toBe("Test chunk content 2");
413
+
414
+ expect(result.page[2].order).toBe(2);
415
+ expect(result.page[2].text).toBe("Test chunk content 3");
416
+
417
+ // Get next page
418
+ const nextResult = await t.query(api.chunks.list, {
419
+ entryId,
420
+ paginationOpts: { numItems: 3, cursor: result.continueCursor },
421
+ });
422
+
423
+ expect(nextResult.page).toHaveLength(2);
424
+ expect(nextResult.isDone).toBe(true);
425
+ expect(nextResult.page[0].order).toBe(3);
426
+ expect(nextResult.page[1].order).toBe(4);
427
+ });
428
+
429
+ describe("getRangesOfChunks", () => {
430
+ test("it returns the correct number of chunks when given a range", async () => {
431
+ const t = convexTest(schema, modules);
432
+ const namespaceId = await setupTestNamespace(t);
433
+ const entryId = await setupTestEntry(t, namespaceId);
434
+
435
+ // Insert chunks
436
+ const chunks = createTestChunks(5);
437
+ await t.run(async (ctx) => {
438
+ const result = await insertChunks(ctx, {
439
+ entryId,
440
+ startOrder: 0,
441
+ chunks,
442
+ });
443
+ expect(result.status).toBe("ready");
444
+ });
445
+
446
+ const chunkDocs = await t.run(async (ctx) => {
447
+ return ctx.db
448
+ .query("chunks")
449
+ .withIndex("entryId_order", (q) => q.eq("entryId", entryId))
450
+ .collect();
451
+ });
452
+ assert(chunkDocs.length === 5);
453
+ assert(chunkDocs[2].state.kind === "ready");
454
+
455
+ const { ranges, entries } = await t.query(
456
+ internal.chunks.getRangesOfChunks,
457
+ {
458
+ embeddingIds: [chunkDocs[2].state.embeddingId],
459
+ chunkContext: { before: 1, after: 2 },
460
+ }
461
+ );
462
+ expect(entries).toHaveLength(1);
463
+ expect(entries[0].entryId).toBe(entryId);
464
+ expect(ranges).toHaveLength(1);
465
+ expect(ranges[0]?.startOrder).toBe(1);
466
+ expect(ranges[0]?.order).toBe(2);
467
+ expect(ranges[0]?.entryId).toBe(entryId);
468
+ expect(ranges[0]?.content).toHaveLength(4);
469
+ expect(ranges[0]?.content[0].text).toBe("Test chunk content 2");
470
+ expect(ranges[0]?.content[1].text).toBe("Test chunk content 3");
471
+ expect(ranges[0]?.content[2].text).toBe("Test chunk content 4");
472
+ expect(ranges[0]?.content[3].text).toBe("Test chunk content 5");
473
+ });
474
+
475
+ test("works finding chunks from multiple entries", async () => {
476
+ const t = convexTest(schema, modules);
477
+ const namespaceId = await setupTestNamespace(t);
478
+
479
+ // Create two entries
480
+ const doc1Id = await setupTestEntry(t, namespaceId, "doc1");
481
+ const doc2Id = await setupTestEntry(t, namespaceId, "doc2");
482
+
483
+ // Insert chunks in both entries
484
+ const doc1Chunks = createTestChunks(3);
485
+ const doc2Chunks = createTestChunks(4);
486
+
487
+ await t.run(async (ctx) => {
488
+ await insertChunks(ctx, {
489
+ entryId: doc1Id,
490
+ startOrder: 0,
491
+ chunks: doc1Chunks,
492
+ });
493
+ await insertChunks(ctx, {
494
+ entryId: doc2Id,
495
+ startOrder: 0,
496
+ chunks: doc2Chunks,
497
+ });
498
+ });
499
+
500
+ // Get chunks from both entries
501
+ const doc1ChunkDocs = await t.run(async (ctx) => {
502
+ return ctx.db
503
+ .query("chunks")
504
+ .withIndex("entryId_order", (q) => q.eq("entryId", doc1Id))
505
+ .collect();
506
+ });
507
+ const doc2ChunkDocs = await t.run(async (ctx) => {
508
+ return ctx.db
509
+ .query("chunks")
510
+ .withIndex("entryId_order", (q) => q.eq("entryId", doc2Id))
511
+ .collect();
512
+ });
513
+
514
+ assert(doc1ChunkDocs[1].state.kind === "ready");
515
+ assert(doc2ChunkDocs[2].state.kind === "ready");
516
+
517
+ const { ranges, entries } = await t.query(
518
+ internal.chunks.getRangesOfChunks,
519
+ {
520
+ embeddingIds: [
521
+ doc1ChunkDocs[1].state.embeddingId, // doc1, chunk at order 1
522
+ doc2ChunkDocs[2].state.embeddingId, // doc2, chunk at order 2
523
+ ],
524
+ chunkContext: { before: 1, after: 1 },
525
+ }
526
+ );
527
+
528
+ expect(entries).toHaveLength(2);
529
+ expect(ranges).toHaveLength(2);
530
+
531
+ // First range should be from doc1
532
+ expect(ranges[0]?.entryId).toBe(doc1Id);
533
+ expect(ranges[0]?.order).toBe(1);
534
+ expect(ranges[0]?.startOrder).toBe(0);
535
+ expect(ranges[0]?.content).toHaveLength(3); // orders 0, 1, 2
536
+
537
+ // Second range should be from doc2
538
+ expect(ranges[1]?.entryId).toBe(doc2Id);
539
+ expect(ranges[1]?.order).toBe(2);
540
+ expect(ranges[1]?.startOrder).toBe(1);
541
+ expect(ranges[1]?.content).toHaveLength(3); // orders 1, 2, 3
542
+ });
543
+
544
+ test("finds chunks on both a ready and replaced version of the same entry", async () => {
545
+ const t = convexTest(schema, modules);
546
+ const namespaceId = await setupTestNamespace(t);
547
+
548
+ // Create version 1 (ready) and version 2 (ready) of the same entry
549
+ // (We'll test with ready versions since pending chunks don't have embeddingIds)
550
+ const docV1Id = await setupTestEntry(
551
+ t,
552
+ namespaceId,
553
+ "versioned-entry",
554
+ 1,
555
+ "ready"
556
+ );
557
+
558
+ // Insert chunks in version 1
559
+ const v1Chunks = createTestChunks(3);
560
+ await t.run(async (ctx) => {
561
+ await insertChunks(ctx, {
562
+ entryId: docV1Id,
563
+ startOrder: 0,
564
+ chunks: v1Chunks,
565
+ });
566
+ });
567
+
568
+ const docV2Id = await setupTestEntry(
569
+ t,
570
+ namespaceId,
571
+ "versioned-entry",
572
+ 2,
573
+ "pending"
574
+ );
575
+
576
+ // Insert chunks in version 2
577
+ const v2Chunks = createTestChunks(3);
578
+ await t.run(async (ctx) => {
579
+ await insertChunks(ctx, {
580
+ entryId: docV2Id,
581
+ startOrder: 0,
582
+ chunks: v2Chunks,
583
+ });
584
+ });
585
+ while (true) {
586
+ const result = await t.mutation(api.chunks.replaceChunksPage, {
587
+ entryId: docV2Id,
588
+ startOrder: 0,
589
+ });
590
+ if (result.status !== "pending") {
591
+ break;
592
+ }
593
+ }
594
+
595
+ // Get chunks from both versions
596
+ const v1ChunkDocs = await t.run(async (ctx) => {
597
+ return ctx.db
598
+ .query("chunks")
599
+ .withIndex("entryId_order", (q) => q.eq("entryId", docV1Id))
600
+ .collect();
601
+ });
602
+ const v2ChunkDocs = await t.run(async (ctx) => {
603
+ return ctx.db
604
+ .query("chunks")
605
+ .withIndex("entryId_order", (q) => q.eq("entryId", docV2Id))
606
+ .collect();
607
+ });
608
+
609
+ expect(v1ChunkDocs[1].state.kind).toBe("replaced");
610
+ expect(v2ChunkDocs[1].state.kind).toBe("ready");
611
+
612
+ // Type guard to ensure we have ready chunks
613
+ assert(v1ChunkDocs[1].state.kind === "replaced");
614
+ assert(v2ChunkDocs[1].state.kind === "ready");
615
+
616
+ const { ranges, entries } = await t.query(
617
+ internal.chunks.getRangesOfChunks,
618
+ {
619
+ embeddingIds: [
620
+ v1ChunkDocs[1].state.embeddingId, // v1, chunk at order 1
621
+ v2ChunkDocs[1].state.embeddingId, // v2, chunk at order 1
622
+ ],
623
+ chunkContext: { before: 1, after: 1 },
624
+ }
625
+ );
626
+
627
+ expect(entries).toHaveLength(2);
628
+ expect(ranges).toHaveLength(2);
629
+ expect(ranges[0]?.entryId).toBe(docV1Id);
630
+ expect(ranges[0]?.order).toBe(1);
631
+ expect(ranges[1]?.entryId).toBe(docV2Id);
632
+ expect(ranges[1]?.order).toBe(1);
633
+ });
634
+
635
+ test("finds chunks before and after a chunk", async () => {
636
+ const t = convexTest(schema, modules);
637
+ const namespaceId = await setupTestNamespace(t);
638
+ const entryId = await setupTestEntry(t, namespaceId);
639
+
640
+ // Insert chunks
641
+ const chunks = createTestChunks(7);
642
+ await t.run(async (ctx) => {
643
+ await insertChunks(ctx, {
644
+ entryId,
645
+ startOrder: 0,
646
+ chunks,
647
+ });
648
+ });
649
+
650
+ const chunkDocs = await t.run(async (ctx) => {
651
+ return ctx.db
652
+ .query("chunks")
653
+ .withIndex("entryId_order", (q) => q.eq("entryId", entryId))
654
+ .collect();
655
+ });
656
+ assert(chunkDocs[3].state.kind === "ready");
657
+
658
+ const { ranges } = await t.query(internal.chunks.getRangesOfChunks, {
659
+ embeddingIds: [chunkDocs[3].state.embeddingId], // chunk at order 3
660
+ chunkContext: { before: 2, after: 2 },
661
+ });
662
+
663
+ expect(ranges).toHaveLength(1);
664
+ expect(ranges[0]?.order).toBe(3);
665
+ expect(ranges[0]?.startOrder).toBe(1); // 3 - 2 = 1
666
+ expect(ranges[0]?.content).toHaveLength(5); // orders 1, 2, 3, 4, 5
667
+ expect(ranges[0]?.content[0].text).toBe("Test chunk content 2"); // order 1
668
+ expect(ranges[0]?.content[1].text).toBe("Test chunk content 3"); // order 2
669
+ expect(ranges[0]?.content[2].text).toBe("Test chunk content 4"); // order 3 (target)
670
+ expect(ranges[0]?.content[3].text).toBe("Test chunk content 5"); // order 4
671
+ expect(ranges[0]?.content[4].text).toBe("Test chunk content 6"); // order 5
672
+ });
673
+
674
+ test("accepts ranges outside of the entry order bounds", async () => {
675
+ const t = convexTest(schema, modules);
676
+ const namespaceId = await setupTestNamespace(t);
677
+ const entryId = await setupTestEntry(t, namespaceId);
678
+
679
+ // Insert only 3 chunks (orders 0, 1, 2)
680
+ const chunks = createTestChunks(3);
681
+ await t.run(async (ctx) => {
682
+ await insertChunks(ctx, {
683
+ entryId,
684
+ startOrder: 0,
685
+ chunks,
686
+ });
687
+ });
688
+
689
+ const chunkDocs = await t.run(async (ctx) => {
690
+ return ctx.db
691
+ .query("chunks")
692
+ .withIndex("entryId_order", (q) => q.eq("entryId", entryId))
693
+ .collect();
694
+ });
695
+ assert(chunkDocs[2].state.kind === "ready");
696
+
697
+ // Request a large range that extends beyond entry bounds
698
+ const { ranges } = await t.query(internal.chunks.getRangesOfChunks, {
699
+ embeddingIds: [chunkDocs[2].state.embeddingId], // chunk at order 2
700
+ chunkContext: { before: 5, after: 5 }, // Large range
701
+ });
702
+
703
+ expect(ranges).toHaveLength(1);
704
+ expect(ranges[0]?.order).toBe(2);
705
+ expect(ranges[0]?.startOrder).toBe(0); // Should be clamped to 0
706
+ expect(ranges[0]?.content).toHaveLength(3); // All available chunks (0, 1, 2)
707
+ expect(ranges[0]?.content[0].text).toBe("Test chunk content 1"); // order 0
708
+ expect(ranges[0]?.content[1].text).toBe("Test chunk content 2"); // order 1
709
+ expect(ranges[0]?.content[2].text).toBe("Test chunk content 3"); // order 2
710
+ });
711
+
712
+ test("when two ranges overlap, the later range gets priority on the chunks in between", async () => {
713
+ const t = convexTest(schema, modules);
714
+ const namespaceId = await setupTestNamespace(t);
715
+ const entryId = await setupTestEntry(t, namespaceId);
716
+
717
+ // Insert chunks
718
+ const chunks = createTestChunks(10);
719
+ await t.run(async (ctx) => {
720
+ await insertChunks(ctx, {
721
+ entryId,
722
+ startOrder: 0,
723
+ chunks,
724
+ });
725
+ });
726
+
727
+ const chunkDocs = await t.run(async (ctx) => {
728
+ return ctx.db
729
+ .query("chunks")
730
+ .withIndex("entryId_order", (q) => q.eq("entryId", entryId))
731
+ .collect();
732
+ });
733
+ assert(chunkDocs[2].state.kind === "ready");
734
+ assert(chunkDocs[6].state.kind === "ready");
735
+
736
+ const { ranges } = await t.query(internal.chunks.getRangesOfChunks, {
737
+ embeddingIds: [
738
+ chunkDocs[2].state.embeddingId, // chunk at order 2
739
+ chunkDocs[6].state.embeddingId, // chunk at order 6
740
+ ],
741
+ chunkContext: { before: 3, after: 3 },
742
+ });
743
+
744
+ expect(ranges).toHaveLength(2);
745
+
746
+ // First range (order 2): should stop before the second range's territory
747
+ expect(ranges[0]?.order).toBe(2);
748
+ expect(ranges[0]?.startOrder).toBe(0);
749
+ // The end should be limited by the second range's before context
750
+ expect(ranges[0]?.content.length).toBe(3); // orders 0, 1, 2
751
+
752
+ // Second range (order 6): should get priority for overlapping chunks
753
+ expect(ranges[1]?.order).toBe(6);
754
+ expect(ranges[1]?.startOrder).toBe(3); // start at 6, 3 before
755
+ expect(ranges[1]?.content).toHaveLength(7); // orders 3, 4, 5, 6, 7, 8, 9
756
+ });
757
+
758
+ test("when three ranges overlap, the middle chunk gets priority on before chunk but not after chunk", async () => {
759
+ const t = convexTest(schema, modules);
760
+ const namespaceId = await setupTestNamespace(t);
761
+ const entryId = await setupTestEntry(t, namespaceId);
762
+
763
+ // Insert chunks
764
+ const chunks = createTestChunks(15);
765
+ await t.run(async (ctx) => {
766
+ await insertChunks(ctx, {
767
+ entryId,
768
+ startOrder: 0,
769
+ chunks,
770
+ });
771
+ });
772
+
773
+ const chunkDocs = await t.run(async (ctx) => {
774
+ return ctx.db
775
+ .query("chunks")
776
+ .withIndex("entryId_order", (q) => q.eq("entryId", entryId))
777
+ .collect();
778
+ });
779
+ assert(chunkDocs[2].state.kind === "ready");
780
+ assert(chunkDocs[7].state.kind === "ready");
781
+ assert(chunkDocs[12].state.kind === "ready");
782
+
783
+ const { ranges } = await t.query(internal.chunks.getRangesOfChunks, {
784
+ embeddingIds: [
785
+ chunkDocs[2].state.embeddingId, // chunk at order 2
786
+ chunkDocs[7].state.embeddingId, // chunk at order 7 (middle)
787
+ chunkDocs[12].state.embeddingId, // chunk at order 12
788
+ ],
789
+ chunkContext: { before: 4, after: 4 },
790
+ });
791
+
792
+ expect(ranges).toHaveLength(3);
793
+
794
+ // First range (order 2)
795
+ expect(ranges[0]?.order).toBe(2);
796
+ expect(ranges[0]?.startOrder).toBe(0);
797
+
798
+ // Middle range (order 7): should get priority over first range's after context
799
+ expect(ranges[1]?.order).toBe(7);
800
+ expect(ranges[1]?.startOrder).toBe(3); // Should start after first range's territory
801
+
802
+ // Last range (order 12): should get priority over middle range's after context
803
+ expect(ranges[2]?.order).toBe(12);
804
+ expect(ranges[2]?.startOrder).toBe(8); // Should start after middle range's territory
805
+ expect(ranges[2]?.content.length).toBeLessThanOrEqual(7); // Should not extend beyond entry
806
+ });
807
+
808
+ test("it works with before/after of 0", async () => {
809
+ const t = convexTest(schema, modules);
810
+ const namespaceId = await setupTestNamespace(t);
811
+ const entryId = await setupTestEntry(t, namespaceId);
812
+
813
+ // Insert chunks
814
+ const chunks = createTestChunks(5);
815
+ await t.run(async (ctx) => {
816
+ await insertChunks(ctx, {
817
+ entryId,
818
+ startOrder: 0,
819
+ chunks,
820
+ });
821
+ });
822
+
823
+ const chunkDocs = await t.run(async (ctx) => {
824
+ return ctx.db
825
+ .query("chunks")
826
+ .withIndex("entryId_order", (q) => q.eq("entryId", entryId))
827
+ .collect();
828
+ });
829
+ assert(chunkDocs[2].state.kind === "ready");
830
+
831
+ const { ranges } = await t.query(internal.chunks.getRangesOfChunks, {
832
+ embeddingIds: [chunkDocs[2].state.embeddingId], // chunk at order 2
833
+ chunkContext: { before: 0, after: 0 },
834
+ });
835
+
836
+ expect(ranges).toHaveLength(1);
837
+ expect(ranges[0]?.order).toBe(2);
838
+ expect(ranges[0]?.startOrder).toBe(2);
839
+ expect(ranges[0]?.content).toHaveLength(1); // Only the target chunk
840
+ expect(ranges[0]?.content[0].text).toBe("Test chunk content 3"); // order 2
841
+ });
842
+
843
+ test("it returns de-duplicated entries in the order of the associated embedding ids", async () => {
844
+ const t = convexTest(schema, modules);
845
+ const namespaceId = await setupTestNamespace(t);
846
+
847
+ // Create three entries
848
+ const doc1Id = await setupTestEntry(t, namespaceId, "doc1");
849
+ const doc2Id = await setupTestEntry(t, namespaceId, "doc2");
850
+ const doc3Id = await setupTestEntry(t, namespaceId, "doc3");
851
+
852
+ // Insert chunks in all entries
853
+ await t.run(async (ctx) => {
854
+ await insertChunks(ctx, {
855
+ entryId: doc1Id,
856
+ startOrder: 0,
857
+ chunks: createTestChunks(2),
858
+ });
859
+ await insertChunks(ctx, {
860
+ entryId: doc2Id,
861
+ startOrder: 0,
862
+ chunks: createTestChunks(2),
863
+ });
864
+ await insertChunks(ctx, {
865
+ entryId: doc3Id,
866
+ startOrder: 0,
867
+ chunks: createTestChunks(2),
868
+ });
869
+ });
870
+
871
+ // Get chunks from all entries
872
+ const [doc1Chunks, doc2Chunks, doc3Chunks] = await t.run(async (ctx) => {
873
+ return Promise.all([
874
+ ctx.db
875
+ .query("chunks")
876
+ .withIndex("entryId_order", (q) => q.eq("entryId", doc1Id))
877
+ .collect(),
878
+ ctx.db
879
+ .query("chunks")
880
+ .withIndex("entryId_order", (q) => q.eq("entryId", doc2Id))
881
+ .collect(),
882
+ ctx.db
883
+ .query("chunks")
884
+ .withIndex("entryId_order", (q) => q.eq("entryId", doc3Id))
885
+ .collect(),
886
+ ]);
887
+ });
888
+
889
+ assert(doc1Chunks[0].state.kind === "ready");
890
+ assert(doc2Chunks[1].state.kind === "ready");
891
+ assert(doc3Chunks[0].state.kind === "ready");
892
+ assert(doc1Chunks[1].state.kind === "ready");
893
+ assert(doc2Chunks[0].state.kind === "ready");
894
+
895
+ const { entries } = await t.query(internal.chunks.getRangesOfChunks, {
896
+ embeddingIds: [
897
+ doc2Chunks[1].state.embeddingId, // doc2 first
898
+ doc1Chunks[0].state.embeddingId, // doc1 second
899
+ doc3Chunks[0].state.embeddingId, // doc3 third
900
+ doc1Chunks[1].state.embeddingId, // doc1 again (should be deduplicated)
901
+ doc2Chunks[0].state.embeddingId, // doc2 again (should be deduplicated)
902
+ ],
903
+ chunkContext: { before: 0, after: 0 },
904
+ });
905
+
906
+ // Should return only 3 entries (deduplicated)
907
+ expect(entries).toHaveLength(3);
908
+
909
+ // Should be in the order they first appeared in the embedding IDs
910
+ expect(entries[0].entryId).toBe(doc2Id); // First appearance
911
+ expect(entries[1].entryId).toBe(doc1Id); // Second appearance
912
+ expect(entries[2].entryId).toBe(doc3Id); // Third appearance
913
+ });
914
+ });
915
+ });