@convex-dev/rag 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +201 -0
- package/README.md +371 -0
- package/dist/client/_generated/_ignore.d.ts +1 -0
- package/dist/client/_generated/_ignore.d.ts.map +1 -0
- package/dist/client/_generated/_ignore.js +3 -0
- package/dist/client/_generated/_ignore.js.map +1 -0
- package/dist/client/defaultChunker.d.ts +15 -0
- package/dist/client/defaultChunker.d.ts.map +1 -0
- package/dist/client/defaultChunker.js +148 -0
- package/dist/client/defaultChunker.js.map +1 -0
- package/dist/client/fileUtils.d.ts +24 -0
- package/dist/client/fileUtils.d.ts.map +1 -0
- package/dist/client/fileUtils.js +179 -0
- package/dist/client/fileUtils.js.map +1 -0
- package/dist/client/index.d.ts +442 -0
- package/dist/client/index.d.ts.map +1 -0
- package/dist/client/index.js +597 -0
- package/dist/client/index.js.map +1 -0
- package/dist/client/types.d.ts +29 -0
- package/dist/client/types.d.ts.map +1 -0
- package/dist/client/types.js +2 -0
- package/dist/client/types.js.map +1 -0
- package/dist/component/_generated/api.d.ts +439 -0
- package/dist/component/_generated/api.d.ts.map +1 -0
- package/dist/component/_generated/api.js +22 -0
- package/dist/component/_generated/api.js.map +1 -0
- package/dist/component/_generated/dataModel.d.ts +60 -0
- package/dist/component/_generated/server.d.ts +149 -0
- package/dist/component/_generated/server.d.ts.map +1 -0
- package/dist/component/_generated/server.js +74 -0
- package/dist/component/_generated/server.js.map +1 -0
- package/dist/component/chunks.d.ts +139 -0
- package/dist/component/chunks.d.ts.map +1 -0
- package/dist/component/chunks.js +413 -0
- package/dist/component/chunks.js.map +1 -0
- package/dist/component/convex.config.d.ts +3 -0
- package/dist/component/convex.config.d.ts.map +1 -0
- package/dist/component/convex.config.js +6 -0
- package/dist/component/convex.config.js.map +1 -0
- package/dist/component/embeddings/importance.d.ts +21 -0
- package/dist/component/embeddings/importance.d.ts.map +1 -0
- package/dist/component/embeddings/importance.js +67 -0
- package/dist/component/embeddings/importance.js.map +1 -0
- package/dist/component/embeddings/index.d.ts +23 -0
- package/dist/component/embeddings/index.d.ts.map +1 -0
- package/dist/component/embeddings/index.js +54 -0
- package/dist/component/embeddings/index.js.map +1 -0
- package/dist/component/embeddings/tables.d.ts +39 -0
- package/dist/component/embeddings/tables.d.ts.map +1 -0
- package/dist/component/embeddings/tables.js +53 -0
- package/dist/component/embeddings/tables.js.map +1 -0
- package/dist/component/entries.d.ts +167 -0
- package/dist/component/entries.d.ts.map +1 -0
- package/dist/component/entries.js +409 -0
- package/dist/component/entries.js.map +1 -0
- package/dist/component/filters.d.ts +46 -0
- package/dist/component/filters.d.ts.map +1 -0
- package/dist/component/filters.js +72 -0
- package/dist/component/filters.js.map +1 -0
- package/dist/component/namespaces.d.ts +131 -0
- package/dist/component/namespaces.d.ts.map +1 -0
- package/dist/component/namespaces.js +222 -0
- package/dist/component/namespaces.js.map +1 -0
- package/dist/component/schema.d.ts +1697 -0
- package/dist/component/schema.d.ts.map +1 -0
- package/dist/component/schema.js +88 -0
- package/dist/component/schema.js.map +1 -0
- package/dist/component/search.d.ts +20 -0
- package/dist/component/search.d.ts.map +1 -0
- package/dist/component/search.js +69 -0
- package/dist/component/search.js.map +1 -0
- package/dist/package.json +3 -0
- package/dist/react/index.d.ts +2 -0
- package/dist/react/index.d.ts.map +1 -0
- package/dist/react/index.js +6 -0
- package/dist/react/index.js.map +1 -0
- package/dist/shared.d.ts +479 -0
- package/dist/shared.d.ts.map +1 -0
- package/dist/shared.js +98 -0
- package/dist/shared.js.map +1 -0
- package/package.json +97 -0
- package/src/client/_generated/_ignore.ts +1 -0
- package/src/client/defaultChunker.test.ts +243 -0
- package/src/client/defaultChunker.ts +183 -0
- package/src/client/fileUtils.ts +179 -0
- package/src/client/index.test.ts +475 -0
- package/src/client/index.ts +1125 -0
- package/src/client/setup.test.ts +28 -0
- package/src/client/types.ts +69 -0
- package/src/component/_generated/api.d.ts +439 -0
- package/src/component/_generated/api.js +23 -0
- package/src/component/_generated/dataModel.d.ts +60 -0
- package/src/component/_generated/server.d.ts +149 -0
- package/src/component/_generated/server.js +90 -0
- package/src/component/chunks.test.ts +915 -0
- package/src/component/chunks.ts +555 -0
- package/src/component/convex.config.ts +7 -0
- package/src/component/embeddings/importance.test.ts +249 -0
- package/src/component/embeddings/importance.ts +75 -0
- package/src/component/embeddings/index.test.ts +482 -0
- package/src/component/embeddings/index.ts +99 -0
- package/src/component/embeddings/tables.ts +114 -0
- package/src/component/entries.test.ts +341 -0
- package/src/component/entries.ts +546 -0
- package/src/component/filters.ts +119 -0
- package/src/component/namespaces.ts +299 -0
- package/src/component/schema.ts +106 -0
- package/src/component/search.test.ts +445 -0
- package/src/component/search.ts +97 -0
- package/src/component/setup.test.ts +5 -0
- package/src/react/index.ts +7 -0
- package/src/shared.ts +247 -0
- package/src/vitest.config.ts +7 -0
|
@@ -0,0 +1,915 @@
|
|
|
1
|
+
/// <reference types="vite/client" />
|
|
2
|
+
|
|
3
|
+
import { describe, expect, test } from "vitest";
|
|
4
|
+
import { convexTest, type TestConvex } from "convex-test";
|
|
5
|
+
import schema from "./schema.js";
|
|
6
|
+
import { api, internal } from "./_generated/api.js";
|
|
7
|
+
import { modules } from "./setup.test.js";
|
|
8
|
+
import { insertChunks, deleteChunksPage } from "./chunks.js";
|
|
9
|
+
import type { Id } from "./_generated/dataModel.js";
|
|
10
|
+
import { assert } from "convex-helpers";
|
|
11
|
+
|
|
12
|
+
type ConvexTest = TestConvex<typeof schema>;
|
|
13
|
+
|
|
14
|
+
describe("chunks", () => {
|
|
15
|
+
async function setupTestNamespace(t: ConvexTest) {
|
|
16
|
+
return await t.run(async (ctx) => {
|
|
17
|
+
return ctx.db.insert("namespaces", {
|
|
18
|
+
namespace: "test-namespace",
|
|
19
|
+
version: 1,
|
|
20
|
+
modelId: "test-model",
|
|
21
|
+
dimension: 128,
|
|
22
|
+
filterNames: [],
|
|
23
|
+
status: { kind: "ready" },
|
|
24
|
+
});
|
|
25
|
+
});
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
async function setupTestEntry(
|
|
29
|
+
t: ConvexTest,
|
|
30
|
+
namespaceId: Id<"namespaces">,
|
|
31
|
+
key = "test-entry",
|
|
32
|
+
version = 0,
|
|
33
|
+
status: "ready" | "pending" = "ready"
|
|
34
|
+
) {
|
|
35
|
+
return await t.run(async (ctx) => {
|
|
36
|
+
return ctx.db.insert("entries", {
|
|
37
|
+
namespaceId,
|
|
38
|
+
key,
|
|
39
|
+
version,
|
|
40
|
+
status: { kind: status },
|
|
41
|
+
contentHash: `test-content-hash-${key}-${version}`,
|
|
42
|
+
importance: 0.5,
|
|
43
|
+
filterValues: [],
|
|
44
|
+
});
|
|
45
|
+
});
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
function createTestChunks(count = 3) {
|
|
49
|
+
return Array.from({ length: count }, (_, i) => ({
|
|
50
|
+
content: {
|
|
51
|
+
text: `Test chunk content ${i + 1}`,
|
|
52
|
+
metadata: { index: i },
|
|
53
|
+
},
|
|
54
|
+
embedding: Array(128).fill(0.1 + i * 0.01),
|
|
55
|
+
}));
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
test("inserting chunks when there's no entry throws error", async () => {
|
|
59
|
+
const t = convexTest(schema, modules);
|
|
60
|
+
await setupTestNamespace(t);
|
|
61
|
+
|
|
62
|
+
// Try to insert chunks for a non-existent entry
|
|
63
|
+
const nonExistentDocId = "j57c3xc4x6j3c4x6j3c4x6j3c4x6" as Id<"entries">;
|
|
64
|
+
const chunks = createTestChunks(2);
|
|
65
|
+
|
|
66
|
+
await expect(
|
|
67
|
+
t.run(async (ctx) => {
|
|
68
|
+
return insertChunks(ctx, {
|
|
69
|
+
entryId: nonExistentDocId,
|
|
70
|
+
startOrder: 0,
|
|
71
|
+
chunks,
|
|
72
|
+
});
|
|
73
|
+
})
|
|
74
|
+
).rejects.toThrow(`Entry ${nonExistentDocId} not found`);
|
|
75
|
+
});
|
|
76
|
+
|
|
77
|
+
test("overwriting chunks with insert works", async () => {
|
|
78
|
+
const t = convexTest(schema, modules);
|
|
79
|
+
const namespaceId = await setupTestNamespace(t);
|
|
80
|
+
const entryId = await setupTestEntry(t, namespaceId);
|
|
81
|
+
|
|
82
|
+
// Insert initial chunks
|
|
83
|
+
const initialChunks = createTestChunks(3);
|
|
84
|
+
await t.run(async (ctx) => {
|
|
85
|
+
return insertChunks(ctx, {
|
|
86
|
+
entryId,
|
|
87
|
+
startOrder: 0,
|
|
88
|
+
chunks: initialChunks,
|
|
89
|
+
});
|
|
90
|
+
});
|
|
91
|
+
|
|
92
|
+
// Verify initial chunks exist
|
|
93
|
+
const initialChunksList = await t.run(async (ctx) => {
|
|
94
|
+
return ctx.db
|
|
95
|
+
.query("chunks")
|
|
96
|
+
.withIndex("entryId_order", (q) => q.eq("entryId", entryId))
|
|
97
|
+
.collect();
|
|
98
|
+
});
|
|
99
|
+
expect(initialChunksList).toHaveLength(3);
|
|
100
|
+
|
|
101
|
+
// Overwrite chunks 1 and 2 with new content
|
|
102
|
+
const overwriteChunks = [
|
|
103
|
+
{
|
|
104
|
+
content: {
|
|
105
|
+
text: "Overwritten chunk 1 content",
|
|
106
|
+
metadata: { overwritten: true, index: 1 },
|
|
107
|
+
},
|
|
108
|
+
embedding: Array(128).fill(0.9),
|
|
109
|
+
},
|
|
110
|
+
{
|
|
111
|
+
content: {
|
|
112
|
+
text: "Overwritten chunk 2 content",
|
|
113
|
+
metadata: { overwritten: true, index: 2 },
|
|
114
|
+
},
|
|
115
|
+
embedding: Array(128).fill(0.8),
|
|
116
|
+
},
|
|
117
|
+
];
|
|
118
|
+
|
|
119
|
+
await t.run(async (ctx) => {
|
|
120
|
+
return insertChunks(ctx, {
|
|
121
|
+
entryId,
|
|
122
|
+
startOrder: 1,
|
|
123
|
+
chunks: overwriteChunks,
|
|
124
|
+
});
|
|
125
|
+
});
|
|
126
|
+
|
|
127
|
+
// Verify total chunks is still correct (original chunk 0 + 2 overwritten)
|
|
128
|
+
const finalChunksList = await t.run(async (ctx) => {
|
|
129
|
+
return ctx.db
|
|
130
|
+
.query("chunks")
|
|
131
|
+
.withIndex("entryId_order", (q) => q.eq("entryId", entryId))
|
|
132
|
+
.collect();
|
|
133
|
+
});
|
|
134
|
+
expect(finalChunksList).toHaveLength(3);
|
|
135
|
+
|
|
136
|
+
// Verify the overwritten chunks have new content
|
|
137
|
+
const overwrittenChunk1 = finalChunksList.find((c) => c.order === 1);
|
|
138
|
+
const overwrittenChunk2 = finalChunksList.find((c) => c.order === 2);
|
|
139
|
+
|
|
140
|
+
expect(overwrittenChunk1).toBeDefined();
|
|
141
|
+
expect(overwrittenChunk2).toBeDefined();
|
|
142
|
+
|
|
143
|
+
const content1 = await t.run(async (ctx) =>
|
|
144
|
+
ctx.db.get(overwrittenChunk1!.contentId)
|
|
145
|
+
);
|
|
146
|
+
const content2 = await t.run(async (ctx) =>
|
|
147
|
+
ctx.db.get(overwrittenChunk2!.contentId)
|
|
148
|
+
);
|
|
149
|
+
|
|
150
|
+
expect(content1!.text).toBe("Overwritten chunk 1 content");
|
|
151
|
+
expect(content1!.metadata?.overwritten).toBe(true);
|
|
152
|
+
expect(content2!.text).toBe("Overwritten chunk 2 content");
|
|
153
|
+
expect(content2!.metadata?.overwritten).toBe(true);
|
|
154
|
+
});
|
|
155
|
+
|
|
156
|
+
test("when replacing an older version, older one is marked as replaced and only new one shows up in search results", async () => {
|
|
157
|
+
const t = convexTest(schema, modules);
|
|
158
|
+
const namespaceId = await setupTestNamespace(t);
|
|
159
|
+
|
|
160
|
+
// Create version 1 of entry
|
|
161
|
+
const docV1Id = await setupTestEntry(t, namespaceId, "versioned-entry", 1);
|
|
162
|
+
|
|
163
|
+
// Insert chunks in version 1
|
|
164
|
+
const v1Chunks = createTestChunks(2);
|
|
165
|
+
await t.run(async (ctx) => {
|
|
166
|
+
return insertChunks(ctx, {
|
|
167
|
+
entryId: docV1Id,
|
|
168
|
+
startOrder: 0,
|
|
169
|
+
chunks: v1Chunks,
|
|
170
|
+
});
|
|
171
|
+
});
|
|
172
|
+
|
|
173
|
+
// Create version 2 of the same entry
|
|
174
|
+
const docV2Id = await setupTestEntry(
|
|
175
|
+
t,
|
|
176
|
+
namespaceId,
|
|
177
|
+
"versioned-entry",
|
|
178
|
+
2,
|
|
179
|
+
"pending"
|
|
180
|
+
);
|
|
181
|
+
|
|
182
|
+
// Insert chunks in version 2 (this should mark v1 chunks as replaced)
|
|
183
|
+
const v2Chunks = createTestChunks(2);
|
|
184
|
+
await t.run(async (ctx) => {
|
|
185
|
+
return insertChunks(ctx, {
|
|
186
|
+
entryId: docV2Id,
|
|
187
|
+
startOrder: 0,
|
|
188
|
+
chunks: v2Chunks,
|
|
189
|
+
});
|
|
190
|
+
});
|
|
191
|
+
|
|
192
|
+
// Run replaceChunksPage to actually perform the replacement
|
|
193
|
+
let isDone = false;
|
|
194
|
+
let startOrder = 0;
|
|
195
|
+
while (!isDone) {
|
|
196
|
+
const result = await t.mutation(api.chunks.replaceChunksPage, {
|
|
197
|
+
entryId: docV2Id,
|
|
198
|
+
startOrder,
|
|
199
|
+
});
|
|
200
|
+
isDone = result.status !== "pending";
|
|
201
|
+
startOrder = result.nextStartOrder;
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
// Check that v1 chunks are marked as replaced
|
|
205
|
+
const v1ChunksList = await t.run(async (ctx) => {
|
|
206
|
+
return ctx.db
|
|
207
|
+
.query("chunks")
|
|
208
|
+
.withIndex("entryId_order", (q) => q.eq("entryId", docV1Id))
|
|
209
|
+
.collect();
|
|
210
|
+
});
|
|
211
|
+
|
|
212
|
+
for (const chunk of v1ChunksList) {
|
|
213
|
+
if (chunk.state.kind !== "pending") {
|
|
214
|
+
expect(chunk.state.kind).toBe("replaced");
|
|
215
|
+
}
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
// Check that v2 chunks are ready
|
|
219
|
+
const v2ChunksList = await t.run(async (ctx) => {
|
|
220
|
+
return ctx.db
|
|
221
|
+
.query("chunks")
|
|
222
|
+
.withIndex("entryId_order", (q) => q.eq("entryId", docV2Id))
|
|
223
|
+
.collect();
|
|
224
|
+
});
|
|
225
|
+
|
|
226
|
+
for (const chunk of v2ChunksList) {
|
|
227
|
+
expect(chunk.state.kind).toBe("ready");
|
|
228
|
+
}
|
|
229
|
+
});
|
|
230
|
+
|
|
231
|
+
test("chunks can be created on different entries and fetched separately", async () => {
|
|
232
|
+
const t = convexTest(schema, modules);
|
|
233
|
+
const namespaceId = await setupTestNamespace(t);
|
|
234
|
+
|
|
235
|
+
// Create two entries
|
|
236
|
+
const doc1Id = await setupTestEntry(t, namespaceId, "doc1");
|
|
237
|
+
const doc2Id = await setupTestEntry(t, namespaceId, "doc2");
|
|
238
|
+
|
|
239
|
+
// Insert chunks in both entries
|
|
240
|
+
const doc1Chunks = createTestChunks(5);
|
|
241
|
+
const doc2Chunks = createTestChunks(3);
|
|
242
|
+
|
|
243
|
+
await t.run(async (ctx) => {
|
|
244
|
+
await insertChunks(ctx, {
|
|
245
|
+
entryId: doc1Id,
|
|
246
|
+
startOrder: 0,
|
|
247
|
+
chunks: doc1Chunks,
|
|
248
|
+
});
|
|
249
|
+
return insertChunks(ctx, {
|
|
250
|
+
entryId: doc2Id,
|
|
251
|
+
startOrder: 0,
|
|
252
|
+
chunks: doc2Chunks,
|
|
253
|
+
});
|
|
254
|
+
});
|
|
255
|
+
|
|
256
|
+
// Verify chunks exist in both entries
|
|
257
|
+
const doc1ChunksList = await t.run(async (ctx) => {
|
|
258
|
+
return ctx.db
|
|
259
|
+
.query("chunks")
|
|
260
|
+
.withIndex("entryId_order", (q) => q.eq("entryId", doc1Id))
|
|
261
|
+
.collect();
|
|
262
|
+
});
|
|
263
|
+
|
|
264
|
+
const doc2ChunksList = await t.run(async (ctx) => {
|
|
265
|
+
return ctx.db
|
|
266
|
+
.query("chunks")
|
|
267
|
+
.withIndex("entryId_order", (q) => q.eq("entryId", doc2Id))
|
|
268
|
+
.collect();
|
|
269
|
+
});
|
|
270
|
+
|
|
271
|
+
expect(doc1ChunksList).toHaveLength(5);
|
|
272
|
+
expect(doc2ChunksList).toHaveLength(3);
|
|
273
|
+
|
|
274
|
+
// Verify chunk order and content
|
|
275
|
+
expect(doc1ChunksList[0].order).toBe(0);
|
|
276
|
+
expect(doc1ChunksList[4].order).toBe(4);
|
|
277
|
+
expect(doc2ChunksList[0].order).toBe(0);
|
|
278
|
+
expect(doc2ChunksList[2].order).toBe(2);
|
|
279
|
+
|
|
280
|
+
// Verify chunk content
|
|
281
|
+
const doc1Content0 = await t.run(async (ctx) =>
|
|
282
|
+
ctx.db.get(doc1ChunksList[0].contentId)
|
|
283
|
+
);
|
|
284
|
+
const doc2Content0 = await t.run(async (ctx) =>
|
|
285
|
+
ctx.db.get(doc2ChunksList[0].contentId)
|
|
286
|
+
);
|
|
287
|
+
|
|
288
|
+
expect(doc1Content0!.text).toBe("Test chunk content 1");
|
|
289
|
+
expect(doc2Content0!.text).toBe("Test chunk content 1");
|
|
290
|
+
});
|
|
291
|
+
|
|
292
|
+
test("chunks support zero-range queries", async () => {
|
|
293
|
+
const t = convexTest(schema, modules);
|
|
294
|
+
const namespaceId = await setupTestNamespace(t);
|
|
295
|
+
const entryId = await setupTestEntry(t, namespaceId);
|
|
296
|
+
|
|
297
|
+
// Insert chunks
|
|
298
|
+
const chunks = createTestChunks(5);
|
|
299
|
+
await t.run(async (ctx) => {
|
|
300
|
+
return insertChunks(ctx, {
|
|
301
|
+
entryId,
|
|
302
|
+
startOrder: 0,
|
|
303
|
+
chunks,
|
|
304
|
+
});
|
|
305
|
+
});
|
|
306
|
+
|
|
307
|
+
// Get a single chunk (simulating zero range)
|
|
308
|
+
const singleChunk = await t.run(async (ctx) => {
|
|
309
|
+
return ctx.db
|
|
310
|
+
.query("chunks")
|
|
311
|
+
.withIndex("entryId_order", (q) =>
|
|
312
|
+
q.eq("entryId", entryId).eq("order", 2)
|
|
313
|
+
)
|
|
314
|
+
.first();
|
|
315
|
+
});
|
|
316
|
+
|
|
317
|
+
expect(singleChunk).toBeDefined();
|
|
318
|
+
expect(singleChunk!.order).toBe(2);
|
|
319
|
+
|
|
320
|
+
// Verify content
|
|
321
|
+
const content = await t.run(async (ctx) =>
|
|
322
|
+
ctx.db.get(singleChunk!.contentId)
|
|
323
|
+
);
|
|
324
|
+
expect(content!.text).toBe("Test chunk content 3");
|
|
325
|
+
});
|
|
326
|
+
|
|
327
|
+
test("deleting pages should work", async () => {
|
|
328
|
+
const t = convexTest(schema, modules);
|
|
329
|
+
const namespaceId = await setupTestNamespace(t);
|
|
330
|
+
const entryId = await setupTestEntry(t, namespaceId);
|
|
331
|
+
|
|
332
|
+
// Insert a large number of chunks
|
|
333
|
+
const chunks = createTestChunks(10);
|
|
334
|
+
await t.run(async (ctx) => {
|
|
335
|
+
return insertChunks(ctx, {
|
|
336
|
+
entryId,
|
|
337
|
+
startOrder: 0,
|
|
338
|
+
chunks,
|
|
339
|
+
});
|
|
340
|
+
});
|
|
341
|
+
|
|
342
|
+
// Verify chunks exist
|
|
343
|
+
const initialChunksList = await t.run(async (ctx) => {
|
|
344
|
+
return ctx.db
|
|
345
|
+
.query("chunks")
|
|
346
|
+
.withIndex("entryId_order", (q) => q.eq("entryId", entryId))
|
|
347
|
+
.collect();
|
|
348
|
+
});
|
|
349
|
+
expect(initialChunksList).toHaveLength(10);
|
|
350
|
+
|
|
351
|
+
// Delete chunks starting from order 3
|
|
352
|
+
const deleteResult = await t.run(async (ctx) => {
|
|
353
|
+
return deleteChunksPage(ctx, {
|
|
354
|
+
entryId,
|
|
355
|
+
startOrder: 3,
|
|
356
|
+
});
|
|
357
|
+
});
|
|
358
|
+
|
|
359
|
+
expect(deleteResult.isDone).toBe(true);
|
|
360
|
+
|
|
361
|
+
// Verify only first 3 chunks remain
|
|
362
|
+
const remainingChunksList = await t.run(async (ctx) => {
|
|
363
|
+
return ctx.db
|
|
364
|
+
.query("chunks")
|
|
365
|
+
.withIndex("entryId_order", (q) => q.eq("entryId", entryId))
|
|
366
|
+
.collect();
|
|
367
|
+
});
|
|
368
|
+
expect(remainingChunksList).toHaveLength(3);
|
|
369
|
+
|
|
370
|
+
// Verify the remaining chunks are orders 0, 1, 2
|
|
371
|
+
const orders = remainingChunksList.map((c) => c.order).sort();
|
|
372
|
+
expect(orders).toEqual([0, 1, 2]);
|
|
373
|
+
|
|
374
|
+
// Verify content was also deleted
|
|
375
|
+
const allContent = await t.run(async (ctx) => {
|
|
376
|
+
return ctx.db.query("content").collect();
|
|
377
|
+
});
|
|
378
|
+
// Should have only 3 content records remaining (for the 3 remaining chunks)
|
|
379
|
+
expect(allContent).toHaveLength(3);
|
|
380
|
+
});
|
|
381
|
+
|
|
382
|
+
test("listing chunks returns correct pagination", async () => {
|
|
383
|
+
const t = convexTest(schema, modules);
|
|
384
|
+
const namespaceId = await setupTestNamespace(t);
|
|
385
|
+
const entryId = await setupTestEntry(t, namespaceId);
|
|
386
|
+
|
|
387
|
+
// Insert chunks
|
|
388
|
+
const chunks = createTestChunks(5);
|
|
389
|
+
await t.run(async (ctx) => {
|
|
390
|
+
return insertChunks(ctx, {
|
|
391
|
+
entryId,
|
|
392
|
+
startOrder: 0,
|
|
393
|
+
chunks,
|
|
394
|
+
});
|
|
395
|
+
});
|
|
396
|
+
|
|
397
|
+
// Test listing with pagination
|
|
398
|
+
const result = await t.query(api.chunks.list, {
|
|
399
|
+
entryId,
|
|
400
|
+
paginationOpts: { numItems: 3, cursor: null },
|
|
401
|
+
});
|
|
402
|
+
|
|
403
|
+
expect(result.page).toHaveLength(3);
|
|
404
|
+
expect(result.isDone).toBe(false);
|
|
405
|
+
|
|
406
|
+
// Verify chunk content and order
|
|
407
|
+
expect(result.page[0].order).toBe(0);
|
|
408
|
+
expect(result.page[0].text).toBe("Test chunk content 1");
|
|
409
|
+
expect(result.page[0].state).toBe("ready");
|
|
410
|
+
|
|
411
|
+
expect(result.page[1].order).toBe(1);
|
|
412
|
+
expect(result.page[1].text).toBe("Test chunk content 2");
|
|
413
|
+
|
|
414
|
+
expect(result.page[2].order).toBe(2);
|
|
415
|
+
expect(result.page[2].text).toBe("Test chunk content 3");
|
|
416
|
+
|
|
417
|
+
// Get next page
|
|
418
|
+
const nextResult = await t.query(api.chunks.list, {
|
|
419
|
+
entryId,
|
|
420
|
+
paginationOpts: { numItems: 3, cursor: result.continueCursor },
|
|
421
|
+
});
|
|
422
|
+
|
|
423
|
+
expect(nextResult.page).toHaveLength(2);
|
|
424
|
+
expect(nextResult.isDone).toBe(true);
|
|
425
|
+
expect(nextResult.page[0].order).toBe(3);
|
|
426
|
+
expect(nextResult.page[1].order).toBe(4);
|
|
427
|
+
});
|
|
428
|
+
|
|
429
|
+
describe("getRangesOfChunks", () => {
|
|
430
|
+
test("it returns the correct number of chunks when given a range", async () => {
|
|
431
|
+
const t = convexTest(schema, modules);
|
|
432
|
+
const namespaceId = await setupTestNamespace(t);
|
|
433
|
+
const entryId = await setupTestEntry(t, namespaceId);
|
|
434
|
+
|
|
435
|
+
// Insert chunks
|
|
436
|
+
const chunks = createTestChunks(5);
|
|
437
|
+
await t.run(async (ctx) => {
|
|
438
|
+
const result = await insertChunks(ctx, {
|
|
439
|
+
entryId,
|
|
440
|
+
startOrder: 0,
|
|
441
|
+
chunks,
|
|
442
|
+
});
|
|
443
|
+
expect(result.status).toBe("ready");
|
|
444
|
+
});
|
|
445
|
+
|
|
446
|
+
const chunkDocs = await t.run(async (ctx) => {
|
|
447
|
+
return ctx.db
|
|
448
|
+
.query("chunks")
|
|
449
|
+
.withIndex("entryId_order", (q) => q.eq("entryId", entryId))
|
|
450
|
+
.collect();
|
|
451
|
+
});
|
|
452
|
+
assert(chunkDocs.length === 5);
|
|
453
|
+
assert(chunkDocs[2].state.kind === "ready");
|
|
454
|
+
|
|
455
|
+
const { ranges, entries } = await t.query(
|
|
456
|
+
internal.chunks.getRangesOfChunks,
|
|
457
|
+
{
|
|
458
|
+
embeddingIds: [chunkDocs[2].state.embeddingId],
|
|
459
|
+
chunkContext: { before: 1, after: 2 },
|
|
460
|
+
}
|
|
461
|
+
);
|
|
462
|
+
expect(entries).toHaveLength(1);
|
|
463
|
+
expect(entries[0].entryId).toBe(entryId);
|
|
464
|
+
expect(ranges).toHaveLength(1);
|
|
465
|
+
expect(ranges[0]?.startOrder).toBe(1);
|
|
466
|
+
expect(ranges[0]?.order).toBe(2);
|
|
467
|
+
expect(ranges[0]?.entryId).toBe(entryId);
|
|
468
|
+
expect(ranges[0]?.content).toHaveLength(4);
|
|
469
|
+
expect(ranges[0]?.content[0].text).toBe("Test chunk content 2");
|
|
470
|
+
expect(ranges[0]?.content[1].text).toBe("Test chunk content 3");
|
|
471
|
+
expect(ranges[0]?.content[2].text).toBe("Test chunk content 4");
|
|
472
|
+
expect(ranges[0]?.content[3].text).toBe("Test chunk content 5");
|
|
473
|
+
});
|
|
474
|
+
|
|
475
|
+
test("works finding chunks from multiple entries", async () => {
|
|
476
|
+
const t = convexTest(schema, modules);
|
|
477
|
+
const namespaceId = await setupTestNamespace(t);
|
|
478
|
+
|
|
479
|
+
// Create two entries
|
|
480
|
+
const doc1Id = await setupTestEntry(t, namespaceId, "doc1");
|
|
481
|
+
const doc2Id = await setupTestEntry(t, namespaceId, "doc2");
|
|
482
|
+
|
|
483
|
+
// Insert chunks in both entries
|
|
484
|
+
const doc1Chunks = createTestChunks(3);
|
|
485
|
+
const doc2Chunks = createTestChunks(4);
|
|
486
|
+
|
|
487
|
+
await t.run(async (ctx) => {
|
|
488
|
+
await insertChunks(ctx, {
|
|
489
|
+
entryId: doc1Id,
|
|
490
|
+
startOrder: 0,
|
|
491
|
+
chunks: doc1Chunks,
|
|
492
|
+
});
|
|
493
|
+
await insertChunks(ctx, {
|
|
494
|
+
entryId: doc2Id,
|
|
495
|
+
startOrder: 0,
|
|
496
|
+
chunks: doc2Chunks,
|
|
497
|
+
});
|
|
498
|
+
});
|
|
499
|
+
|
|
500
|
+
// Get chunks from both entries
|
|
501
|
+
const doc1ChunkDocs = await t.run(async (ctx) => {
|
|
502
|
+
return ctx.db
|
|
503
|
+
.query("chunks")
|
|
504
|
+
.withIndex("entryId_order", (q) => q.eq("entryId", doc1Id))
|
|
505
|
+
.collect();
|
|
506
|
+
});
|
|
507
|
+
const doc2ChunkDocs = await t.run(async (ctx) => {
|
|
508
|
+
return ctx.db
|
|
509
|
+
.query("chunks")
|
|
510
|
+
.withIndex("entryId_order", (q) => q.eq("entryId", doc2Id))
|
|
511
|
+
.collect();
|
|
512
|
+
});
|
|
513
|
+
|
|
514
|
+
assert(doc1ChunkDocs[1].state.kind === "ready");
|
|
515
|
+
assert(doc2ChunkDocs[2].state.kind === "ready");
|
|
516
|
+
|
|
517
|
+
const { ranges, entries } = await t.query(
|
|
518
|
+
internal.chunks.getRangesOfChunks,
|
|
519
|
+
{
|
|
520
|
+
embeddingIds: [
|
|
521
|
+
doc1ChunkDocs[1].state.embeddingId, // doc1, chunk at order 1
|
|
522
|
+
doc2ChunkDocs[2].state.embeddingId, // doc2, chunk at order 2
|
|
523
|
+
],
|
|
524
|
+
chunkContext: { before: 1, after: 1 },
|
|
525
|
+
}
|
|
526
|
+
);
|
|
527
|
+
|
|
528
|
+
expect(entries).toHaveLength(2);
|
|
529
|
+
expect(ranges).toHaveLength(2);
|
|
530
|
+
|
|
531
|
+
// First range should be from doc1
|
|
532
|
+
expect(ranges[0]?.entryId).toBe(doc1Id);
|
|
533
|
+
expect(ranges[0]?.order).toBe(1);
|
|
534
|
+
expect(ranges[0]?.startOrder).toBe(0);
|
|
535
|
+
expect(ranges[0]?.content).toHaveLength(3); // orders 0, 1, 2
|
|
536
|
+
|
|
537
|
+
// Second range should be from doc2
|
|
538
|
+
expect(ranges[1]?.entryId).toBe(doc2Id);
|
|
539
|
+
expect(ranges[1]?.order).toBe(2);
|
|
540
|
+
expect(ranges[1]?.startOrder).toBe(1);
|
|
541
|
+
expect(ranges[1]?.content).toHaveLength(3); // orders 1, 2, 3
|
|
542
|
+
});
|
|
543
|
+
|
|
544
|
+
test("finds chunks on both a ready and replaced version of the same entry", async () => {
|
|
545
|
+
const t = convexTest(schema, modules);
|
|
546
|
+
const namespaceId = await setupTestNamespace(t);
|
|
547
|
+
|
|
548
|
+
// Create version 1 (ready) and version 2 (ready) of the same entry
|
|
549
|
+
// (We'll test with ready versions since pending chunks don't have embeddingIds)
|
|
550
|
+
const docV1Id = await setupTestEntry(
|
|
551
|
+
t,
|
|
552
|
+
namespaceId,
|
|
553
|
+
"versioned-entry",
|
|
554
|
+
1,
|
|
555
|
+
"ready"
|
|
556
|
+
);
|
|
557
|
+
|
|
558
|
+
// Insert chunks in version 1
|
|
559
|
+
const v1Chunks = createTestChunks(3);
|
|
560
|
+
await t.run(async (ctx) => {
|
|
561
|
+
await insertChunks(ctx, {
|
|
562
|
+
entryId: docV1Id,
|
|
563
|
+
startOrder: 0,
|
|
564
|
+
chunks: v1Chunks,
|
|
565
|
+
});
|
|
566
|
+
});
|
|
567
|
+
|
|
568
|
+
const docV2Id = await setupTestEntry(
|
|
569
|
+
t,
|
|
570
|
+
namespaceId,
|
|
571
|
+
"versioned-entry",
|
|
572
|
+
2,
|
|
573
|
+
"pending"
|
|
574
|
+
);
|
|
575
|
+
|
|
576
|
+
// Insert chunks in version 2
|
|
577
|
+
const v2Chunks = createTestChunks(3);
|
|
578
|
+
await t.run(async (ctx) => {
|
|
579
|
+
await insertChunks(ctx, {
|
|
580
|
+
entryId: docV2Id,
|
|
581
|
+
startOrder: 0,
|
|
582
|
+
chunks: v2Chunks,
|
|
583
|
+
});
|
|
584
|
+
});
|
|
585
|
+
while (true) {
|
|
586
|
+
const result = await t.mutation(api.chunks.replaceChunksPage, {
|
|
587
|
+
entryId: docV2Id,
|
|
588
|
+
startOrder: 0,
|
|
589
|
+
});
|
|
590
|
+
if (result.status !== "pending") {
|
|
591
|
+
break;
|
|
592
|
+
}
|
|
593
|
+
}
|
|
594
|
+
|
|
595
|
+
// Get chunks from both versions
|
|
596
|
+
const v1ChunkDocs = await t.run(async (ctx) => {
|
|
597
|
+
return ctx.db
|
|
598
|
+
.query("chunks")
|
|
599
|
+
.withIndex("entryId_order", (q) => q.eq("entryId", docV1Id))
|
|
600
|
+
.collect();
|
|
601
|
+
});
|
|
602
|
+
const v2ChunkDocs = await t.run(async (ctx) => {
|
|
603
|
+
return ctx.db
|
|
604
|
+
.query("chunks")
|
|
605
|
+
.withIndex("entryId_order", (q) => q.eq("entryId", docV2Id))
|
|
606
|
+
.collect();
|
|
607
|
+
});
|
|
608
|
+
|
|
609
|
+
expect(v1ChunkDocs[1].state.kind).toBe("replaced");
|
|
610
|
+
expect(v2ChunkDocs[1].state.kind).toBe("ready");
|
|
611
|
+
|
|
612
|
+
// Type guard to ensure we have ready chunks
|
|
613
|
+
assert(v1ChunkDocs[1].state.kind === "replaced");
|
|
614
|
+
assert(v2ChunkDocs[1].state.kind === "ready");
|
|
615
|
+
|
|
616
|
+
const { ranges, entries } = await t.query(
|
|
617
|
+
internal.chunks.getRangesOfChunks,
|
|
618
|
+
{
|
|
619
|
+
embeddingIds: [
|
|
620
|
+
v1ChunkDocs[1].state.embeddingId, // v1, chunk at order 1
|
|
621
|
+
v2ChunkDocs[1].state.embeddingId, // v2, chunk at order 1
|
|
622
|
+
],
|
|
623
|
+
chunkContext: { before: 1, after: 1 },
|
|
624
|
+
}
|
|
625
|
+
);
|
|
626
|
+
|
|
627
|
+
expect(entries).toHaveLength(2);
|
|
628
|
+
expect(ranges).toHaveLength(2);
|
|
629
|
+
expect(ranges[0]?.entryId).toBe(docV1Id);
|
|
630
|
+
expect(ranges[0]?.order).toBe(1);
|
|
631
|
+
expect(ranges[1]?.entryId).toBe(docV2Id);
|
|
632
|
+
expect(ranges[1]?.order).toBe(1);
|
|
633
|
+
});
|
|
634
|
+
|
|
635
|
+
test("finds chunks before and after a chunk", async () => {
|
|
636
|
+
const t = convexTest(schema, modules);
|
|
637
|
+
const namespaceId = await setupTestNamespace(t);
|
|
638
|
+
const entryId = await setupTestEntry(t, namespaceId);
|
|
639
|
+
|
|
640
|
+
// Insert chunks
|
|
641
|
+
const chunks = createTestChunks(7);
|
|
642
|
+
await t.run(async (ctx) => {
|
|
643
|
+
await insertChunks(ctx, {
|
|
644
|
+
entryId,
|
|
645
|
+
startOrder: 0,
|
|
646
|
+
chunks,
|
|
647
|
+
});
|
|
648
|
+
});
|
|
649
|
+
|
|
650
|
+
const chunkDocs = await t.run(async (ctx) => {
|
|
651
|
+
return ctx.db
|
|
652
|
+
.query("chunks")
|
|
653
|
+
.withIndex("entryId_order", (q) => q.eq("entryId", entryId))
|
|
654
|
+
.collect();
|
|
655
|
+
});
|
|
656
|
+
assert(chunkDocs[3].state.kind === "ready");
|
|
657
|
+
|
|
658
|
+
const { ranges } = await t.query(internal.chunks.getRangesOfChunks, {
|
|
659
|
+
embeddingIds: [chunkDocs[3].state.embeddingId], // chunk at order 3
|
|
660
|
+
chunkContext: { before: 2, after: 2 },
|
|
661
|
+
});
|
|
662
|
+
|
|
663
|
+
expect(ranges).toHaveLength(1);
|
|
664
|
+
expect(ranges[0]?.order).toBe(3);
|
|
665
|
+
expect(ranges[0]?.startOrder).toBe(1); // 3 - 2 = 1
|
|
666
|
+
expect(ranges[0]?.content).toHaveLength(5); // orders 1, 2, 3, 4, 5
|
|
667
|
+
expect(ranges[0]?.content[0].text).toBe("Test chunk content 2"); // order 1
|
|
668
|
+
expect(ranges[0]?.content[1].text).toBe("Test chunk content 3"); // order 2
|
|
669
|
+
expect(ranges[0]?.content[2].text).toBe("Test chunk content 4"); // order 3 (target)
|
|
670
|
+
expect(ranges[0]?.content[3].text).toBe("Test chunk content 5"); // order 4
|
|
671
|
+
expect(ranges[0]?.content[4].text).toBe("Test chunk content 6"); // order 5
|
|
672
|
+
});
|
|
673
|
+
|
|
674
|
+
test("accepts ranges outside of the entry order bounds", async () => {
|
|
675
|
+
const t = convexTest(schema, modules);
|
|
676
|
+
const namespaceId = await setupTestNamespace(t);
|
|
677
|
+
const entryId = await setupTestEntry(t, namespaceId);
|
|
678
|
+
|
|
679
|
+
// Insert only 3 chunks (orders 0, 1, 2)
|
|
680
|
+
const chunks = createTestChunks(3);
|
|
681
|
+
await t.run(async (ctx) => {
|
|
682
|
+
await insertChunks(ctx, {
|
|
683
|
+
entryId,
|
|
684
|
+
startOrder: 0,
|
|
685
|
+
chunks,
|
|
686
|
+
});
|
|
687
|
+
});
|
|
688
|
+
|
|
689
|
+
const chunkDocs = await t.run(async (ctx) => {
|
|
690
|
+
return ctx.db
|
|
691
|
+
.query("chunks")
|
|
692
|
+
.withIndex("entryId_order", (q) => q.eq("entryId", entryId))
|
|
693
|
+
.collect();
|
|
694
|
+
});
|
|
695
|
+
assert(chunkDocs[2].state.kind === "ready");
|
|
696
|
+
|
|
697
|
+
// Request a large range that extends beyond entry bounds
|
|
698
|
+
const { ranges } = await t.query(internal.chunks.getRangesOfChunks, {
|
|
699
|
+
embeddingIds: [chunkDocs[2].state.embeddingId], // chunk at order 2
|
|
700
|
+
chunkContext: { before: 5, after: 5 }, // Large range
|
|
701
|
+
});
|
|
702
|
+
|
|
703
|
+
expect(ranges).toHaveLength(1);
|
|
704
|
+
expect(ranges[0]?.order).toBe(2);
|
|
705
|
+
expect(ranges[0]?.startOrder).toBe(0); // Should be clamped to 0
|
|
706
|
+
expect(ranges[0]?.content).toHaveLength(3); // All available chunks (0, 1, 2)
|
|
707
|
+
expect(ranges[0]?.content[0].text).toBe("Test chunk content 1"); // order 0
|
|
708
|
+
expect(ranges[0]?.content[1].text).toBe("Test chunk content 2"); // order 1
|
|
709
|
+
expect(ranges[0]?.content[2].text).toBe("Test chunk content 3"); // order 2
|
|
710
|
+
});
|
|
711
|
+
|
|
712
|
+
test("when two ranges overlap, the later range gets priority on the chunks in between", async () => {
|
|
713
|
+
const t = convexTest(schema, modules);
|
|
714
|
+
const namespaceId = await setupTestNamespace(t);
|
|
715
|
+
const entryId = await setupTestEntry(t, namespaceId);
|
|
716
|
+
|
|
717
|
+
// Insert chunks
|
|
718
|
+
const chunks = createTestChunks(10);
|
|
719
|
+
await t.run(async (ctx) => {
|
|
720
|
+
await insertChunks(ctx, {
|
|
721
|
+
entryId,
|
|
722
|
+
startOrder: 0,
|
|
723
|
+
chunks,
|
|
724
|
+
});
|
|
725
|
+
});
|
|
726
|
+
|
|
727
|
+
const chunkDocs = await t.run(async (ctx) => {
|
|
728
|
+
return ctx.db
|
|
729
|
+
.query("chunks")
|
|
730
|
+
.withIndex("entryId_order", (q) => q.eq("entryId", entryId))
|
|
731
|
+
.collect();
|
|
732
|
+
});
|
|
733
|
+
assert(chunkDocs[2].state.kind === "ready");
|
|
734
|
+
assert(chunkDocs[6].state.kind === "ready");
|
|
735
|
+
|
|
736
|
+
const { ranges } = await t.query(internal.chunks.getRangesOfChunks, {
|
|
737
|
+
embeddingIds: [
|
|
738
|
+
chunkDocs[2].state.embeddingId, // chunk at order 2
|
|
739
|
+
chunkDocs[6].state.embeddingId, // chunk at order 6
|
|
740
|
+
],
|
|
741
|
+
chunkContext: { before: 3, after: 3 },
|
|
742
|
+
});
|
|
743
|
+
|
|
744
|
+
expect(ranges).toHaveLength(2);
|
|
745
|
+
|
|
746
|
+
// First range (order 2): should stop before the second range's territory
|
|
747
|
+
expect(ranges[0]?.order).toBe(2);
|
|
748
|
+
expect(ranges[0]?.startOrder).toBe(0);
|
|
749
|
+
// The end should be limited by the second range's before context
|
|
750
|
+
expect(ranges[0]?.content.length).toBe(3); // orders 0, 1, 2
|
|
751
|
+
|
|
752
|
+
// Second range (order 6): should get priority for overlapping chunks
|
|
753
|
+
expect(ranges[1]?.order).toBe(6);
|
|
754
|
+
expect(ranges[1]?.startOrder).toBe(3); // start at 6, 3 before
|
|
755
|
+
expect(ranges[1]?.content).toHaveLength(7); // orders 3, 4, 5, 6, 7, 8, 9
|
|
756
|
+
});
|
|
757
|
+
|
|
758
|
+
test("when three ranges overlap, the middle chunk gets priority on before chunk but not after chunk", async () => {
|
|
759
|
+
const t = convexTest(schema, modules);
|
|
760
|
+
const namespaceId = await setupTestNamespace(t);
|
|
761
|
+
const entryId = await setupTestEntry(t, namespaceId);
|
|
762
|
+
|
|
763
|
+
// Insert chunks
|
|
764
|
+
const chunks = createTestChunks(15);
|
|
765
|
+
await t.run(async (ctx) => {
|
|
766
|
+
await insertChunks(ctx, {
|
|
767
|
+
entryId,
|
|
768
|
+
startOrder: 0,
|
|
769
|
+
chunks,
|
|
770
|
+
});
|
|
771
|
+
});
|
|
772
|
+
|
|
773
|
+
const chunkDocs = await t.run(async (ctx) => {
|
|
774
|
+
return ctx.db
|
|
775
|
+
.query("chunks")
|
|
776
|
+
.withIndex("entryId_order", (q) => q.eq("entryId", entryId))
|
|
777
|
+
.collect();
|
|
778
|
+
});
|
|
779
|
+
assert(chunkDocs[2].state.kind === "ready");
|
|
780
|
+
assert(chunkDocs[7].state.kind === "ready");
|
|
781
|
+
assert(chunkDocs[12].state.kind === "ready");
|
|
782
|
+
|
|
783
|
+
const { ranges } = await t.query(internal.chunks.getRangesOfChunks, {
|
|
784
|
+
embeddingIds: [
|
|
785
|
+
chunkDocs[2].state.embeddingId, // chunk at order 2
|
|
786
|
+
chunkDocs[7].state.embeddingId, // chunk at order 7 (middle)
|
|
787
|
+
chunkDocs[12].state.embeddingId, // chunk at order 12
|
|
788
|
+
],
|
|
789
|
+
chunkContext: { before: 4, after: 4 },
|
|
790
|
+
});
|
|
791
|
+
|
|
792
|
+
expect(ranges).toHaveLength(3);
|
|
793
|
+
|
|
794
|
+
// First range (order 2)
|
|
795
|
+
expect(ranges[0]?.order).toBe(2);
|
|
796
|
+
expect(ranges[0]?.startOrder).toBe(0);
|
|
797
|
+
|
|
798
|
+
// Middle range (order 7): should get priority over first range's after context
|
|
799
|
+
expect(ranges[1]?.order).toBe(7);
|
|
800
|
+
expect(ranges[1]?.startOrder).toBe(3); // Should start after first range's territory
|
|
801
|
+
|
|
802
|
+
// Last range (order 12): should get priority over middle range's after context
|
|
803
|
+
expect(ranges[2]?.order).toBe(12);
|
|
804
|
+
expect(ranges[2]?.startOrder).toBe(8); // Should start after middle range's territory
|
|
805
|
+
expect(ranges[2]?.content.length).toBeLessThanOrEqual(7); // Should not extend beyond entry
|
|
806
|
+
});
|
|
807
|
+
|
|
808
|
+
test("it works with before/after of 0", async () => {
|
|
809
|
+
const t = convexTest(schema, modules);
|
|
810
|
+
const namespaceId = await setupTestNamespace(t);
|
|
811
|
+
const entryId = await setupTestEntry(t, namespaceId);
|
|
812
|
+
|
|
813
|
+
// Insert chunks
|
|
814
|
+
const chunks = createTestChunks(5);
|
|
815
|
+
await t.run(async (ctx) => {
|
|
816
|
+
await insertChunks(ctx, {
|
|
817
|
+
entryId,
|
|
818
|
+
startOrder: 0,
|
|
819
|
+
chunks,
|
|
820
|
+
});
|
|
821
|
+
});
|
|
822
|
+
|
|
823
|
+
const chunkDocs = await t.run(async (ctx) => {
|
|
824
|
+
return ctx.db
|
|
825
|
+
.query("chunks")
|
|
826
|
+
.withIndex("entryId_order", (q) => q.eq("entryId", entryId))
|
|
827
|
+
.collect();
|
|
828
|
+
});
|
|
829
|
+
assert(chunkDocs[2].state.kind === "ready");
|
|
830
|
+
|
|
831
|
+
const { ranges } = await t.query(internal.chunks.getRangesOfChunks, {
|
|
832
|
+
embeddingIds: [chunkDocs[2].state.embeddingId], // chunk at order 2
|
|
833
|
+
chunkContext: { before: 0, after: 0 },
|
|
834
|
+
});
|
|
835
|
+
|
|
836
|
+
expect(ranges).toHaveLength(1);
|
|
837
|
+
expect(ranges[0]?.order).toBe(2);
|
|
838
|
+
expect(ranges[0]?.startOrder).toBe(2);
|
|
839
|
+
expect(ranges[0]?.content).toHaveLength(1); // Only the target chunk
|
|
840
|
+
expect(ranges[0]?.content[0].text).toBe("Test chunk content 3"); // order 2
|
|
841
|
+
});
|
|
842
|
+
|
|
843
|
+
test("it returns de-duplicated entries in the order of the associated embedding ids", async () => {
|
|
844
|
+
const t = convexTest(schema, modules);
|
|
845
|
+
const namespaceId = await setupTestNamespace(t);
|
|
846
|
+
|
|
847
|
+
// Create three entries
|
|
848
|
+
const doc1Id = await setupTestEntry(t, namespaceId, "doc1");
|
|
849
|
+
const doc2Id = await setupTestEntry(t, namespaceId, "doc2");
|
|
850
|
+
const doc3Id = await setupTestEntry(t, namespaceId, "doc3");
|
|
851
|
+
|
|
852
|
+
// Insert chunks in all entries
|
|
853
|
+
await t.run(async (ctx) => {
|
|
854
|
+
await insertChunks(ctx, {
|
|
855
|
+
entryId: doc1Id,
|
|
856
|
+
startOrder: 0,
|
|
857
|
+
chunks: createTestChunks(2),
|
|
858
|
+
});
|
|
859
|
+
await insertChunks(ctx, {
|
|
860
|
+
entryId: doc2Id,
|
|
861
|
+
startOrder: 0,
|
|
862
|
+
chunks: createTestChunks(2),
|
|
863
|
+
});
|
|
864
|
+
await insertChunks(ctx, {
|
|
865
|
+
entryId: doc3Id,
|
|
866
|
+
startOrder: 0,
|
|
867
|
+
chunks: createTestChunks(2),
|
|
868
|
+
});
|
|
869
|
+
});
|
|
870
|
+
|
|
871
|
+
// Get chunks from all entries
|
|
872
|
+
const [doc1Chunks, doc2Chunks, doc3Chunks] = await t.run(async (ctx) => {
|
|
873
|
+
return Promise.all([
|
|
874
|
+
ctx.db
|
|
875
|
+
.query("chunks")
|
|
876
|
+
.withIndex("entryId_order", (q) => q.eq("entryId", doc1Id))
|
|
877
|
+
.collect(),
|
|
878
|
+
ctx.db
|
|
879
|
+
.query("chunks")
|
|
880
|
+
.withIndex("entryId_order", (q) => q.eq("entryId", doc2Id))
|
|
881
|
+
.collect(),
|
|
882
|
+
ctx.db
|
|
883
|
+
.query("chunks")
|
|
884
|
+
.withIndex("entryId_order", (q) => q.eq("entryId", doc3Id))
|
|
885
|
+
.collect(),
|
|
886
|
+
]);
|
|
887
|
+
});
|
|
888
|
+
|
|
889
|
+
assert(doc1Chunks[0].state.kind === "ready");
|
|
890
|
+
assert(doc2Chunks[1].state.kind === "ready");
|
|
891
|
+
assert(doc3Chunks[0].state.kind === "ready");
|
|
892
|
+
assert(doc1Chunks[1].state.kind === "ready");
|
|
893
|
+
assert(doc2Chunks[0].state.kind === "ready");
|
|
894
|
+
|
|
895
|
+
const { entries } = await t.query(internal.chunks.getRangesOfChunks, {
|
|
896
|
+
embeddingIds: [
|
|
897
|
+
doc2Chunks[1].state.embeddingId, // doc2 first
|
|
898
|
+
doc1Chunks[0].state.embeddingId, // doc1 second
|
|
899
|
+
doc3Chunks[0].state.embeddingId, // doc3 third
|
|
900
|
+
doc1Chunks[1].state.embeddingId, // doc1 again (should be deduplicated)
|
|
901
|
+
doc2Chunks[0].state.embeddingId, // doc2 again (should be deduplicated)
|
|
902
|
+
],
|
|
903
|
+
chunkContext: { before: 0, after: 0 },
|
|
904
|
+
});
|
|
905
|
+
|
|
906
|
+
// Should return only 3 entries (deduplicated)
|
|
907
|
+
expect(entries).toHaveLength(3);
|
|
908
|
+
|
|
909
|
+
// Should be in the order they first appeared in the embedding IDs
|
|
910
|
+
expect(entries[0].entryId).toBe(doc2Id); // First appearance
|
|
911
|
+
expect(entries[1].entryId).toBe(doc1Id); // Second appearance
|
|
912
|
+
expect(entries[2].entryId).toBe(doc3Id); // Third appearance
|
|
913
|
+
});
|
|
914
|
+
});
|
|
915
|
+
});
|