@convex-dev/rag 0.5.4 → 0.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +89 -82
- package/dist/client/index.d.ts +30 -26
- package/dist/client/index.d.ts.map +1 -1
- package/dist/client/index.js +2 -2
- package/dist/client/index.js.map +1 -1
- package/dist/component/_generated/api.d.ts +25 -482
- package/dist/component/_generated/api.d.ts.map +1 -1
- package/dist/component/_generated/api.js +10 -1
- package/dist/component/_generated/api.js.map +1 -1
- package/dist/component/_generated/component.d.ts +380 -0
- package/dist/component/_generated/component.d.ts.map +1 -0
- package/dist/component/_generated/component.js +11 -0
- package/dist/component/_generated/component.js.map +1 -0
- package/dist/component/_generated/dataModel.d.ts +4 -18
- package/dist/component/_generated/dataModel.d.ts.map +1 -0
- package/dist/component/_generated/dataModel.js +11 -0
- package/dist/component/_generated/dataModel.js.map +1 -0
- package/dist/component/_generated/server.d.ts +10 -38
- package/dist/component/_generated/server.d.ts.map +1 -1
- package/dist/component/_generated/server.js +9 -5
- package/dist/component/_generated/server.js.map +1 -1
- package/dist/component/chunks.d.ts +5 -5
- package/dist/component/chunks.d.ts.map +1 -1
- package/dist/component/chunks.js +21 -51
- package/dist/component/chunks.js.map +1 -1
- package/dist/component/embeddings/tables.d.ts +4 -5
- package/dist/component/embeddings/tables.d.ts.map +1 -1
- package/dist/component/embeddings/tables.js.map +1 -1
- package/dist/component/entries.d.ts +6 -6
- package/dist/component/namespaces.d.ts +8 -8
- package/dist/component/namespaces.d.ts.map +1 -1
- package/dist/component/namespaces.js +2 -2
- package/dist/component/namespaces.js.map +1 -1
- package/dist/component/schema.d.ts +185 -224
- package/dist/component/schema.d.ts.map +1 -1
- package/dist/component/search.d.ts +4 -3
- package/dist/component/search.d.ts.map +1 -1
- package/dist/component/search.js +1 -1
- package/dist/component/search.js.map +1 -1
- package/dist/shared.d.ts +9 -4
- package/dist/shared.d.ts.map +1 -1
- package/dist/shared.js +1 -4
- package/dist/shared.js.map +1 -1
- package/package.json +72 -44
- package/src/client/defaultChunker.test.ts +1 -1
- package/src/client/defaultChunker.ts +7 -7
- package/src/client/fileUtils.ts +3 -3
- package/src/client/hybridRank.ts +1 -1
- package/src/client/index.test.ts +18 -18
- package/src/client/index.ts +104 -84
- package/src/client/setup.test.ts +2 -2
- package/src/component/_generated/api.ts +66 -0
- package/src/component/_generated/component.ts +442 -0
- package/src/component/_generated/{server.d.ts → server.ts} +33 -21
- package/src/component/chunks.test.ts +14 -14
- package/src/component/chunks.ts +59 -88
- package/src/component/embeddings/importance.test.ts +4 -4
- package/src/component/embeddings/importance.ts +1 -1
- package/src/component/embeddings/index.test.ts +3 -4
- package/src/component/embeddings/index.ts +6 -6
- package/src/component/embeddings/tables.ts +9 -8
- package/src/component/entries.test.ts +10 -10
- package/src/component/entries.ts +29 -29
- package/src/component/filters.ts +8 -8
- package/src/component/namespaces.ts +31 -34
- package/src/component/schema.ts +2 -2
- package/src/component/search.test.ts +5 -5
- package/src/component/search.ts +8 -9
- package/src/component/setup.test.ts +2 -8
- package/src/shared.ts +47 -45
- package/src/test.ts +20 -0
- package/dist/client/types.d.ts +0 -29
- package/dist/client/types.d.ts.map +0 -1
- package/dist/client/types.js +0 -2
- package/dist/client/types.js.map +0 -1
- package/dist/package.json +0 -3
- package/src/client/types.ts +0 -69
- package/src/component/_generated/api.d.ts +0 -507
- package/src/component/_generated/api.js +0 -23
- package/src/component/_generated/server.js +0 -90
- package/src/vitest.config.ts +0 -7
- /package/src/component/_generated/{dataModel.d.ts → dataModel.ts} +0 -0
package/src/component/chunks.ts
CHANGED
|
@@ -50,7 +50,7 @@ export const insert = mutation({
|
|
|
50
50
|
|
|
51
51
|
export async function insertChunks(
|
|
52
52
|
ctx: MutationCtx,
|
|
53
|
-
{ entryId, startOrder, chunks }: InsertChunksArgs
|
|
53
|
+
{ entryId, startOrder, chunks }: InsertChunksArgs,
|
|
54
54
|
) {
|
|
55
55
|
const entry = await ctx.db.get(entryId);
|
|
56
56
|
if (!entry) {
|
|
@@ -71,12 +71,12 @@ export async function insertChunks(
|
|
|
71
71
|
q
|
|
72
72
|
.eq("entryId", entryId)
|
|
73
73
|
.gte("order", startOrder)
|
|
74
|
-
.lt("order", startOrder + chunks.length)
|
|
74
|
+
.lt("order", startOrder + chunks.length),
|
|
75
75
|
)
|
|
76
76
|
.collect();
|
|
77
77
|
if (existingChunks.length > 0) {
|
|
78
78
|
console.debug(
|
|
79
|
-
`Deleting ${existingChunks.length} existing chunks for entry ${entryId} at version ${entry.version}
|
|
79
|
+
`Deleting ${existingChunks.length} existing chunks for entry ${entryId} at version ${entry.version}`,
|
|
80
80
|
);
|
|
81
81
|
}
|
|
82
82
|
// TODO: avoid writing if they're the same
|
|
@@ -87,11 +87,11 @@ export async function insertChunks(
|
|
|
87
87
|
}
|
|
88
88
|
await ctx.db.delete(c.contentId);
|
|
89
89
|
await ctx.db.delete(c._id);
|
|
90
|
-
})
|
|
90
|
+
}),
|
|
91
91
|
);
|
|
92
92
|
const numberedFilter = numberedFilterFromNamedFilters(
|
|
93
93
|
entry.filterValues,
|
|
94
|
-
namespace!.filterNames
|
|
94
|
+
namespace!.filterNames,
|
|
95
95
|
);
|
|
96
96
|
for (const chunk of chunks) {
|
|
97
97
|
const contentId = await ctx.db.insert("content", {
|
|
@@ -110,7 +110,7 @@ export async function insertChunks(
|
|
|
110
110
|
chunk.embedding,
|
|
111
111
|
entry.namespaceId,
|
|
112
112
|
entry.importance,
|
|
113
|
-
numberedFilter
|
|
113
|
+
numberedFilter,
|
|
114
114
|
);
|
|
115
115
|
state = {
|
|
116
116
|
kind: "ready",
|
|
@@ -126,13 +126,11 @@ export async function insertChunks(
|
|
|
126
126
|
contentId,
|
|
127
127
|
namespaceId: entry.namespaceId,
|
|
128
128
|
...filterFieldsFromNumbers(entry.namespaceId, numberedFilter),
|
|
129
|
-
})
|
|
129
|
+
}),
|
|
130
130
|
);
|
|
131
131
|
order++;
|
|
132
132
|
}
|
|
133
|
-
return {
|
|
134
|
-
status: previousEntry ? ("pending" as const) : ("ready" as const),
|
|
135
|
-
};
|
|
133
|
+
return { status: previousEntry ? ("pending" as const) : ("ready" as const) };
|
|
136
134
|
}
|
|
137
135
|
|
|
138
136
|
async function ensureLatestEntryVersion(ctx: QueryCtx, entry: Doc<"entries">) {
|
|
@@ -148,14 +146,14 @@ async function ensureLatestEntryVersion(ctx: QueryCtx, entry: Doc<"entries">) {
|
|
|
148
146
|
.eq("namespaceId", entry.namespaceId)
|
|
149
147
|
.eq("status.kind", status)
|
|
150
148
|
.eq("key", entry.key)
|
|
151
|
-
.gt("version", entry.version)
|
|
152
|
-
)
|
|
149
|
+
.gt("version", entry.version),
|
|
150
|
+
),
|
|
153
151
|
),
|
|
154
|
-
["version"]
|
|
152
|
+
["version"],
|
|
155
153
|
).first();
|
|
156
154
|
if (newerEntry) {
|
|
157
155
|
console.warn(
|
|
158
|
-
`Bailing from inserting chunks for entry ${entry.key} at version ${entry.version} since there's a newer version ${newerEntry.version} (status ${newerEntry.status}) creation time difference ${(newerEntry._creationTime - entry._creationTime).toFixed(0)}ms
|
|
156
|
+
`Bailing from inserting chunks for entry ${entry.key} at version ${entry.version} since there's a newer version ${newerEntry.version} (status ${newerEntry.status}) creation time difference ${(newerEntry._creationTime - entry._creationTime).toFixed(0)}ms`,
|
|
159
157
|
);
|
|
160
158
|
return false;
|
|
161
159
|
}
|
|
@@ -163,14 +161,8 @@ async function ensureLatestEntryVersion(ctx: QueryCtx, entry: Doc<"entries">) {
|
|
|
163
161
|
}
|
|
164
162
|
|
|
165
163
|
export const replaceChunksPage = mutation({
|
|
166
|
-
args: v.object({
|
|
167
|
-
|
|
168
|
-
startOrder: v.number(),
|
|
169
|
-
}),
|
|
170
|
-
returns: v.object({
|
|
171
|
-
status: vStatus,
|
|
172
|
-
nextStartOrder: v.number(),
|
|
173
|
-
}),
|
|
164
|
+
args: v.object({ entryId: v.id("entries"), startOrder: v.number() }),
|
|
165
|
+
returns: v.object({ status: vStatus, nextStartOrder: v.number() }),
|
|
174
166
|
handler: async (ctx, args) => {
|
|
175
167
|
const { entryId, startOrder } = args;
|
|
176
168
|
const entryOrNull = await ctx.db.get(entryId);
|
|
@@ -180,10 +172,7 @@ export const replaceChunksPage = mutation({
|
|
|
180
172
|
const entry = entryOrNull;
|
|
181
173
|
const isLatest = await ensureLatestEntryVersion(ctx, entry);
|
|
182
174
|
if (!isLatest) {
|
|
183
|
-
return {
|
|
184
|
-
status: "replaced" as const,
|
|
185
|
-
nextStartOrder: startOrder,
|
|
186
|
-
};
|
|
175
|
+
return { status: "replaced" as const, nextStartOrder: startOrder };
|
|
187
176
|
}
|
|
188
177
|
|
|
189
178
|
// Get the namespace for filter conversion
|
|
@@ -200,7 +189,7 @@ export const replaceChunksPage = mutation({
|
|
|
200
189
|
q
|
|
201
190
|
.eq("namespaceId", entry.namespaceId)
|
|
202
191
|
.eq("status.kind", "pending")
|
|
203
|
-
.eq("key", entry.key)
|
|
192
|
+
.eq("key", entry.key),
|
|
204
193
|
)
|
|
205
194
|
.collect()
|
|
206
195
|
).filter((e) => e._id !== entry._id)
|
|
@@ -212,29 +201,27 @@ export const replaceChunksPage = mutation({
|
|
|
212
201
|
stream(ctx.db, schema)
|
|
213
202
|
.query("chunks")
|
|
214
203
|
.withIndex("entryId_order", (q) =>
|
|
215
|
-
q.eq("entryId", entry._id).gte("order", startOrder)
|
|
216
|
-
)
|
|
204
|
+
q.eq("entryId", entry._id).gte("order", startOrder),
|
|
205
|
+
),
|
|
217
206
|
),
|
|
218
|
-
["order"]
|
|
207
|
+
["order"],
|
|
219
208
|
);
|
|
220
209
|
const namespaceId = entry.namespaceId;
|
|
221
210
|
const namedFilters = numberedFilterFromNamedFilters(
|
|
222
211
|
entry.filterValues,
|
|
223
|
-
namespace!.filterNames
|
|
212
|
+
namespace!.filterNames,
|
|
224
213
|
);
|
|
225
214
|
async function addChunk(
|
|
226
|
-
chunk: Doc<"chunks"> & { state: { kind: "pending" } }
|
|
215
|
+
chunk: Doc<"chunks"> & { state: { kind: "pending" } },
|
|
227
216
|
) {
|
|
228
217
|
const embeddingId = await insertEmbedding(
|
|
229
218
|
ctx,
|
|
230
219
|
chunk.state.embedding,
|
|
231
220
|
namespaceId,
|
|
232
221
|
entry.importance,
|
|
233
|
-
namedFilters
|
|
222
|
+
namedFilters,
|
|
234
223
|
);
|
|
235
|
-
await ctx.db.patch(chunk._id, {
|
|
236
|
-
state: { kind: "ready", embeddingId },
|
|
237
|
-
});
|
|
224
|
+
await ctx.db.patch(chunk._id, { state: { kind: "ready", embeddingId } });
|
|
238
225
|
}
|
|
239
226
|
let dataUsedSoFar = 0;
|
|
240
227
|
let indexToDelete = startOrder;
|
|
@@ -247,6 +234,8 @@ export const replaceChunksPage = mutation({
|
|
|
247
234
|
assert(chunk.state.kind === "ready");
|
|
248
235
|
const vector = await ctx.db.get(chunk.state.embeddingId);
|
|
249
236
|
assert(vector, `Vector ${chunk.state.embeddingId} not found`);
|
|
237
|
+
// get and delete both count as bandwidth reads
|
|
238
|
+
dataUsedSoFar += estimateEmbeddingSize(vector) * 2;
|
|
250
239
|
await ctx.db.delete(chunk.state.embeddingId);
|
|
251
240
|
await ctx.db.patch(chunk._id, {
|
|
252
241
|
state: {
|
|
@@ -256,7 +245,7 @@ export const replaceChunksPage = mutation({
|
|
|
256
245
|
pendingSearchableText: chunk.state.searchableText,
|
|
257
246
|
},
|
|
258
247
|
});
|
|
259
|
-
})
|
|
248
|
+
}),
|
|
260
249
|
);
|
|
261
250
|
chunksToDeleteEmbeddings = [];
|
|
262
251
|
if (chunkToAdd) {
|
|
@@ -265,9 +254,9 @@ export const replaceChunksPage = mutation({
|
|
|
265
254
|
chunkToAdd = null;
|
|
266
255
|
}
|
|
267
256
|
for await (const chunk of chunkStream) {
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
257
|
+
// one for the stream read, one for patching / replacing
|
|
258
|
+
dataUsedSoFar += estimateChunkSize(chunk) * 2;
|
|
259
|
+
if (chunk.state.kind !== "pending") {
|
|
271
260
|
dataUsedSoFar += 17 * KB; // embedding conservative estimate
|
|
272
261
|
}
|
|
273
262
|
if (chunk.order > indexToDelete) {
|
|
@@ -277,23 +266,17 @@ export const replaceChunksPage = mutation({
|
|
|
277
266
|
// check if we're close to the limit
|
|
278
267
|
// if so, bail and pick up on this chunk.order.
|
|
279
268
|
if (dataUsedSoFar > BANDWIDTH_PER_TRANSACTION_SOFT_LIMIT) {
|
|
280
|
-
return {
|
|
281
|
-
status: "pending" as const,
|
|
282
|
-
nextStartOrder: indexToDelete,
|
|
283
|
-
};
|
|
269
|
+
return { status: "pending" as const, nextStartOrder: indexToDelete };
|
|
284
270
|
}
|
|
285
271
|
}
|
|
286
272
|
if (dataUsedSoFar > BANDWIDTH_PER_TRANSACTION_HARD_LIMIT) {
|
|
287
|
-
return {
|
|
288
|
-
status: "pending" as const,
|
|
289
|
-
nextStartOrder: indexToDelete,
|
|
290
|
-
};
|
|
273
|
+
return { status: "pending" as const, nextStartOrder: indexToDelete };
|
|
291
274
|
}
|
|
292
275
|
if (chunk.state.kind === "pending") {
|
|
293
276
|
if (chunk.entryId === entryId) {
|
|
294
277
|
if (chunkToAdd) {
|
|
295
278
|
console.warn(
|
|
296
|
-
`Multiple pending chunks before changing order ${chunk.order} for entry ${entryId} version ${entry.version}: ${chunkToAdd._id} and ${chunk._id}
|
|
279
|
+
`Multiple pending chunks before changing order ${chunk.order} for entry ${entryId} version ${entry.version}: ${chunkToAdd._id} and ${chunk._id}`,
|
|
297
280
|
);
|
|
298
281
|
await addChunk(chunkToAdd);
|
|
299
282
|
}
|
|
@@ -304,7 +287,7 @@ export const replaceChunksPage = mutation({
|
|
|
304
287
|
chunksToDeleteEmbeddings.push(chunk);
|
|
305
288
|
} else {
|
|
306
289
|
console.debug(
|
|
307
|
-
`Skipping adding chunk ${chunk._id} for entry ${entryId} version ${entry.version} since it's already ready
|
|
290
|
+
`Skipping adding chunk ${chunk._id} for entry ${entryId} version ${entry.version} since it's already ready`,
|
|
308
291
|
);
|
|
309
292
|
}
|
|
310
293
|
}
|
|
@@ -312,10 +295,7 @@ export const replaceChunksPage = mutation({
|
|
|
312
295
|
// handle the last batch
|
|
313
296
|
await handleBatch();
|
|
314
297
|
|
|
315
|
-
return {
|
|
316
|
-
status: "ready" as const,
|
|
317
|
-
nextStartOrder: 0,
|
|
318
|
-
};
|
|
298
|
+
return { status: "ready" as const, nextStartOrder: 0 };
|
|
319
299
|
},
|
|
320
300
|
});
|
|
321
301
|
|
|
@@ -327,7 +307,7 @@ export const vRangeResult = v.object({
|
|
|
327
307
|
v.object({
|
|
328
308
|
text: v.string(),
|
|
329
309
|
metadata: v.optional(v.record(v.string(), v.any())),
|
|
330
|
-
})
|
|
310
|
+
}),
|
|
331
311
|
),
|
|
332
312
|
});
|
|
333
313
|
|
|
@@ -342,7 +322,7 @@ export const getRangesOfChunks = internalQuery({
|
|
|
342
322
|
}),
|
|
343
323
|
handler: async (
|
|
344
324
|
ctx,
|
|
345
|
-
args
|
|
325
|
+
args,
|
|
346
326
|
): Promise<{
|
|
347
327
|
ranges: (null | Infer<typeof vRangeResult>)[];
|
|
348
328
|
entries: Entry[];
|
|
@@ -353,19 +333,19 @@ export const getRangesOfChunks = internalQuery({
|
|
|
353
333
|
ctx.db
|
|
354
334
|
.query("chunks")
|
|
355
335
|
.withIndex("embeddingId", (q) =>
|
|
356
|
-
q.eq("state.embeddingId", embeddingId)
|
|
336
|
+
q.eq("state.embeddingId", embeddingId),
|
|
357
337
|
)
|
|
358
338
|
.order("desc")
|
|
359
|
-
.first()
|
|
360
|
-
)
|
|
339
|
+
.first(),
|
|
340
|
+
),
|
|
361
341
|
);
|
|
362
342
|
|
|
363
343
|
// Note: This preserves order of entries as they first appeared.
|
|
364
344
|
const entries = (
|
|
365
345
|
await Promise.all(
|
|
366
346
|
Array.from(
|
|
367
|
-
new Set(chunks.filter((c) => c !== null).map((c) => c.entryId))
|
|
368
|
-
).map((id) => ctx.db.get(id))
|
|
347
|
+
new Set(chunks.filter((c) => c !== null).map((c) => c.entryId)),
|
|
348
|
+
).map((id) => ctx.db.get(id)),
|
|
369
349
|
)
|
|
370
350
|
)
|
|
371
351
|
.filter((d) => d !== null)
|
|
@@ -383,7 +363,7 @@ export const getRangesOfChunks = internalQuery({
|
|
|
383
363
|
acc[entryId] = [...(acc[entryId] ?? []), order].sort((a, b) => a - b);
|
|
384
364
|
return acc;
|
|
385
365
|
},
|
|
386
|
-
{} as Record<Id<"entries">, number[]
|
|
366
|
+
{} as Record<Id<"entries">, number[]>,
|
|
387
367
|
);
|
|
388
368
|
|
|
389
369
|
const result: Array<Infer<typeof vRangeResult> | null> = [];
|
|
@@ -397,7 +377,7 @@ export const getRangesOfChunks = internalQuery({
|
|
|
397
377
|
// instead we'd check that other chunks are not the same doc/order
|
|
398
378
|
if (
|
|
399
379
|
result.find(
|
|
400
|
-
(r) => r?.entryId === chunk.entryId && r?.order === chunk.order
|
|
380
|
+
(r) => r?.entryId === chunk.entryId && r?.order === chunk.order,
|
|
401
381
|
)
|
|
402
382
|
) {
|
|
403
383
|
// De-dupe chunks
|
|
@@ -415,12 +395,12 @@ export const getRangesOfChunks = internalQuery({
|
|
|
415
395
|
const startOrder = Math.max(
|
|
416
396
|
chunk.order - chunkContext.before,
|
|
417
397
|
0,
|
|
418
|
-
Math.min(previousOrder + 1, chunk.order)
|
|
398
|
+
Math.min(previousOrder + 1, chunk.order),
|
|
419
399
|
);
|
|
420
400
|
// We stop short if the next chunk order's "before" context will cover it.
|
|
421
401
|
const endOrder = Math.min(
|
|
422
402
|
chunk.order + chunkContext.after + 1,
|
|
423
|
-
Math.max(nextOrder - chunkContext.before, chunk.order + 1)
|
|
403
|
+
Math.max(nextOrder - chunkContext.before, chunk.order + 1),
|
|
424
404
|
);
|
|
425
405
|
const contentIds: Id<"content">[] = [];
|
|
426
406
|
if (startOrder === chunk.order && endOrder === chunk.order + 1) {
|
|
@@ -432,7 +412,7 @@ export const getRangesOfChunks = internalQuery({
|
|
|
432
412
|
q
|
|
433
413
|
.eq("entryId", entryId)
|
|
434
414
|
.gte("order", startOrder)
|
|
435
|
-
.lt("order", endOrder)
|
|
415
|
+
.lt("order", endOrder),
|
|
436
416
|
)
|
|
437
417
|
.collect();
|
|
438
418
|
for (const chunk of chunks) {
|
|
@@ -444,21 +424,13 @@ export const getRangesOfChunks = internalQuery({
|
|
|
444
424
|
const content = await ctx.db.get(contentId);
|
|
445
425
|
assert(content, `Content ${contentId} not found`);
|
|
446
426
|
return { text: content.text, metadata: content.metadata };
|
|
447
|
-
})
|
|
427
|
+
}),
|
|
448
428
|
);
|
|
449
429
|
|
|
450
|
-
result.push({
|
|
451
|
-
entryId,
|
|
452
|
-
order: chunk.order,
|
|
453
|
-
startOrder,
|
|
454
|
-
content,
|
|
455
|
-
});
|
|
430
|
+
result.push({ entryId, order: chunk.order, startOrder, content });
|
|
456
431
|
}
|
|
457
432
|
|
|
458
|
-
return {
|
|
459
|
-
ranges: result,
|
|
460
|
-
entries,
|
|
461
|
-
};
|
|
433
|
+
return { ranges: result, entries };
|
|
462
434
|
},
|
|
463
435
|
});
|
|
464
436
|
|
|
@@ -483,7 +455,7 @@ export const list = query({
|
|
|
483
455
|
const content = await ctx.db.get(chunk.contentId);
|
|
484
456
|
assert(content, `Content ${chunk.contentId} not found`);
|
|
485
457
|
return publicChunk(chunk, content);
|
|
486
|
-
})
|
|
458
|
+
}),
|
|
487
459
|
),
|
|
488
460
|
};
|
|
489
461
|
},
|
|
@@ -516,31 +488,30 @@ async function publicChunk(chunk: Doc<"chunks">, content: Doc<"content">) {
|
|
|
516
488
|
}
|
|
517
489
|
|
|
518
490
|
export const deleteChunksPage = internalMutation({
|
|
519
|
-
args: v.object({
|
|
520
|
-
entryId: v.id("entries"),
|
|
521
|
-
startOrder: v.number(),
|
|
522
|
-
}),
|
|
491
|
+
args: v.object({ entryId: v.id("entries"), startOrder: v.number() }),
|
|
523
492
|
returns: v.object({ isDone: v.boolean(), nextStartOrder: v.number() }),
|
|
524
493
|
handler: deleteChunksPageHandler,
|
|
525
494
|
});
|
|
526
495
|
|
|
527
496
|
export async function deleteChunksPageHandler(
|
|
528
497
|
ctx: MutationCtx,
|
|
529
|
-
{ entryId, startOrder }: { entryId: Id<"entries">; startOrder: number }
|
|
498
|
+
{ entryId, startOrder }: { entryId: Id<"entries">; startOrder: number },
|
|
530
499
|
) {
|
|
531
500
|
const chunkStream = ctx.db
|
|
532
501
|
.query("chunks")
|
|
533
502
|
.withIndex("entryId_order", (q) =>
|
|
534
|
-
q.eq("entryId", entryId).gte("order", startOrder)
|
|
503
|
+
q.eq("entryId", entryId).gte("order", startOrder),
|
|
535
504
|
);
|
|
536
505
|
let dataUsedSoFar = 0;
|
|
537
506
|
for await (const chunk of chunkStream) {
|
|
538
|
-
|
|
507
|
+
// one for the stream read, one for deleting
|
|
508
|
+
dataUsedSoFar += estimateChunkSize(chunk) * 2;
|
|
539
509
|
await ctx.db.delete(chunk._id);
|
|
540
510
|
if (chunk.state.kind === "ready") {
|
|
541
511
|
const embedding = await ctx.db.get(chunk.state.embeddingId);
|
|
542
512
|
if (embedding) {
|
|
543
|
-
|
|
513
|
+
// get and delete both count as bandwidth reads
|
|
514
|
+
dataUsedSoFar += estimateEmbeddingSize(embedding) * 2;
|
|
544
515
|
await ctx.db.delete(chunk.state.embeddingId);
|
|
545
516
|
}
|
|
546
517
|
}
|
|
@@ -572,7 +543,7 @@ function estimateEmbeddingSize(embedding: Doc<VectorTableName>) {
|
|
|
572
543
|
return dataUsedSoFar;
|
|
573
544
|
}
|
|
574
545
|
|
|
575
|
-
|
|
546
|
+
function estimateChunkSize(chunk: Doc<"chunks">) {
|
|
576
547
|
let dataUsedSoFar = 100; // constant metadata - roughly
|
|
577
548
|
if (chunk.state.kind === "pending") {
|
|
578
549
|
dataUsedSoFar += chunk.state.embedding.length * 8;
|
|
@@ -590,7 +561,7 @@ async function estimateContentSize(ctx: QueryCtx, contentId: Id<"content">) {
|
|
|
590
561
|
if (content) {
|
|
591
562
|
dataUsedSoFar += content.text.length;
|
|
592
563
|
dataUsedSoFar += JSON.stringify(
|
|
593
|
-
convexToJson(content.metadata ?? {})
|
|
564
|
+
convexToJson(content.metadata ?? {}),
|
|
594
565
|
).length;
|
|
595
566
|
}
|
|
596
567
|
return dataUsedSoFar;
|
|
@@ -45,7 +45,7 @@ describe("importance.ts", () => {
|
|
|
45
45
|
expect(result[0]).toBeCloseTo(embedding[0] * importance);
|
|
46
46
|
expect(result[1]).toBeCloseTo(embedding[1] * importance);
|
|
47
47
|
expect(
|
|
48
|
-
Math.sqrt(result[0] ** 2 + result[1] ** 2 + result[2] ** 2)
|
|
48
|
+
Math.sqrt(result[0] ** 2 + result[1] ** 2 + result[2] ** 2),
|
|
49
49
|
).toBeCloseTo(1);
|
|
50
50
|
});
|
|
51
51
|
|
|
@@ -175,7 +175,7 @@ describe("importance.ts", () => {
|
|
|
175
175
|
|
|
176
176
|
expect(retrievedImportance).toBeCloseTo(importance, 3);
|
|
177
177
|
expect(Math.abs(retrievedImportance - importance)).toBeLessThan(
|
|
178
|
-
tolerance
|
|
178
|
+
tolerance,
|
|
179
179
|
);
|
|
180
180
|
});
|
|
181
181
|
});
|
|
@@ -198,13 +198,13 @@ describe("importance.ts", () => {
|
|
|
198
198
|
// Create vector with initial importance
|
|
199
199
|
const vectorWithInitialImp = vectorWithImportance(
|
|
200
200
|
embedding,
|
|
201
|
-
initialImportance
|
|
201
|
+
initialImportance,
|
|
202
202
|
);
|
|
203
203
|
|
|
204
204
|
// Modify importance
|
|
205
205
|
const vectorWithModifiedImp = modifyImportance(
|
|
206
206
|
vectorWithInitialImp,
|
|
207
|
-
newImportance
|
|
207
|
+
newImportance,
|
|
208
208
|
);
|
|
209
209
|
|
|
210
210
|
// Retrieve and verify
|
|
@@ -35,7 +35,7 @@ export function vectorWithImportance(embedding: number[], importance: number) {
|
|
|
35
35
|
// We drop the final dimension if it'd make it larger than 4096.
|
|
36
36
|
// Unfortunate current limitation of Convex vector search.
|
|
37
37
|
const vectorToModify = normalizeVector(
|
|
38
|
-
embedding.length === 4096 ? embedding.slice(0, 4095) : embedding
|
|
38
|
+
embedding.length === 4096 ? embedding.slice(0, 4095) : embedding,
|
|
39
39
|
);
|
|
40
40
|
const scaled = scaleVector(vectorToModify, importance);
|
|
41
41
|
|
|
@@ -25,7 +25,6 @@ const testApi: ApiFromModules<{
|
|
|
25
25
|
fns: {
|
|
26
26
|
search: typeof search;
|
|
27
27
|
};
|
|
28
|
-
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
29
28
|
}>["fns"] = anyApi["embeddings"]["index.test"] as any;
|
|
30
29
|
|
|
31
30
|
describe("embeddings", () => {
|
|
@@ -62,7 +61,7 @@ describe("embeddings", () => {
|
|
|
62
61
|
expect(insertedVector).toBeDefined();
|
|
63
62
|
expect(insertedVector!.namespaceId).toBe(namespaceId);
|
|
64
63
|
expect(insertedVector!.vector).toHaveLength(
|
|
65
|
-
vectorWithImportanceDimension(128)
|
|
64
|
+
vectorWithImportanceDimension(128),
|
|
66
65
|
);
|
|
67
66
|
expect(insertedVector!.filter0).toBeUndefined();
|
|
68
67
|
expect(insertedVector!.filter1).toBeUndefined();
|
|
@@ -94,7 +93,7 @@ describe("embeddings", () => {
|
|
|
94
93
|
embedding,
|
|
95
94
|
namespaceId,
|
|
96
95
|
importance,
|
|
97
|
-
undefined
|
|
96
|
+
undefined,
|
|
98
97
|
);
|
|
99
98
|
});
|
|
100
99
|
|
|
@@ -116,7 +115,7 @@ describe("embeddings", () => {
|
|
|
116
115
|
|
|
117
116
|
// Vectors should be different due to importance scaling
|
|
118
117
|
expect(insertedVector!.vector).not.toEqual(
|
|
119
|
-
vectorWithoutImportanceData!.vector
|
|
118
|
+
vectorWithoutImportanceData!.vector,
|
|
120
119
|
);
|
|
121
120
|
|
|
122
121
|
// The last element should be the weight: sqrt(1 - importance^2)
|
|
@@ -47,7 +47,7 @@ export async function insertEmbedding(
|
|
|
47
47
|
embedding: number[],
|
|
48
48
|
namespaceId: Id<"namespaces">,
|
|
49
49
|
importance: number | undefined,
|
|
50
|
-
filters: NumberedFilter | undefined
|
|
50
|
+
filters: NumberedFilter | undefined,
|
|
51
51
|
) {
|
|
52
52
|
const filterFields = filterFieldsFromNumbers(namespaceId, filters);
|
|
53
53
|
const dimension = validateVectorDimension(embedding.length);
|
|
@@ -74,12 +74,12 @@ export async function searchEmbeddings(
|
|
|
74
74
|
// filter3, filter1, or filter2 is present.
|
|
75
75
|
filters: Array<NumberedFilter>;
|
|
76
76
|
limit: number;
|
|
77
|
-
}
|
|
77
|
+
},
|
|
78
78
|
) {
|
|
79
79
|
const dimension = validateVectorDimension(embedding.length);
|
|
80
80
|
const tableName = getVectorTableName(dimension);
|
|
81
81
|
const orFilters = filters.flatMap((filter) =>
|
|
82
|
-
filterFieldsFromNumbers(namespaceId, filter)
|
|
82
|
+
filterFieldsFromNumbers(namespaceId, filter),
|
|
83
83
|
);
|
|
84
84
|
return ctx.vectorSearch(tableName, "vector", {
|
|
85
85
|
vector: searchVector(embedding),
|
|
@@ -89,9 +89,9 @@ export async function searchEmbeddings(
|
|
|
89
89
|
: q.or(
|
|
90
90
|
...orFilters.flatMap((namedFilter) =>
|
|
91
91
|
Object.entries(namedFilter).map(([filterField, filter]) =>
|
|
92
|
-
q.eq(filterField as keyof (typeof orFilters)[number], filter)
|
|
93
|
-
)
|
|
94
|
-
)
|
|
92
|
+
q.eq(filterField as keyof (typeof orFilters)[number], filter),
|
|
93
|
+
),
|
|
94
|
+
),
|
|
95
95
|
),
|
|
96
96
|
limit,
|
|
97
97
|
});
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import { literals } from "convex-helpers/validators";
|
|
2
2
|
import {
|
|
3
3
|
defineTable,
|
|
4
|
+
GenericTableIndexes,
|
|
4
5
|
type GenericTableSearchIndexes,
|
|
5
6
|
type SchemaDefinition,
|
|
6
7
|
type TableDefinition,
|
|
@@ -32,7 +33,7 @@ function table(dimensions: VectorDimension): Table {
|
|
|
32
33
|
|
|
33
34
|
type Table = TableDefinition<
|
|
34
35
|
VObject<ObjectType<typeof embeddingsFields>, typeof embeddingsFields>,
|
|
35
|
-
|
|
36
|
+
GenericTableIndexes,
|
|
36
37
|
GenericTableSearchIndexes,
|
|
37
38
|
VectorIndex
|
|
38
39
|
>;
|
|
@@ -41,7 +42,7 @@ type VectorIndex = {
|
|
|
41
42
|
vector: {
|
|
42
43
|
vectorField: "vector";
|
|
43
44
|
dimensions: number;
|
|
44
|
-
filterFields: string;
|
|
45
|
+
filterFields: keyof typeof vAllFilterFields & string;
|
|
45
46
|
};
|
|
46
47
|
};
|
|
47
48
|
|
|
@@ -55,11 +56,11 @@ export const VectorDimensions = [
|
|
|
55
56
|
] as const;
|
|
56
57
|
|
|
57
58
|
export function assertVectorDimension(
|
|
58
|
-
dimension: number
|
|
59
|
+
dimension: number,
|
|
59
60
|
): asserts dimension is VectorDimension {
|
|
60
61
|
if (!VectorDimensions.includes(dimension as VectorDimension)) {
|
|
61
62
|
throw new Error(
|
|
62
|
-
`Unsupported vector dimension${dimension}. Supported: ${VectorDimensions.join(", ")}
|
|
63
|
+
`Unsupported vector dimension${dimension}. Supported: ${VectorDimensions.join(", ")}`,
|
|
63
64
|
);
|
|
64
65
|
}
|
|
65
66
|
}
|
|
@@ -67,14 +68,14 @@ export function assertVectorDimension(
|
|
|
67
68
|
export function validateVectorDimension(dimension: number): VectorDimension {
|
|
68
69
|
if (!VectorDimensions.includes(dimension as VectorDimension)) {
|
|
69
70
|
throw new Error(
|
|
70
|
-
`Unsupported vector dimension${dimension}. Supported: ${VectorDimensions.join(", ")}
|
|
71
|
+
`Unsupported vector dimension${dimension}. Supported: ${VectorDimensions.join(", ")}`,
|
|
71
72
|
);
|
|
72
73
|
}
|
|
73
74
|
return dimension as VectorDimension;
|
|
74
75
|
}
|
|
75
76
|
export type VectorDimension = (typeof VectorDimensions)[number];
|
|
76
77
|
export const VectorTableNames = VectorDimensions.map(
|
|
77
|
-
(d) => `vectors_${d}
|
|
78
|
+
(d) => `vectors_${d}`,
|
|
78
79
|
) as `vectors_${(typeof VectorDimensions)[number]}`[];
|
|
79
80
|
export type VectorTableName = (typeof VectorTableNames)[number];
|
|
80
81
|
export type VectorTableId = GenericId<(typeof VectorTableNames)[number]>;
|
|
@@ -82,7 +83,7 @@ export type VectorTableId = GenericId<(typeof VectorTableNames)[number]>;
|
|
|
82
83
|
export const vVectorDimension = literals(...VectorDimensions);
|
|
83
84
|
export const vVectorTableName = literals(...VectorTableNames);
|
|
84
85
|
export const vVectorId = v.union(
|
|
85
|
-
...VectorTableNames.map((name) => v.id(name))
|
|
86
|
+
...VectorTableNames.map((name) => v.id(name)),
|
|
86
87
|
) as VUnion<
|
|
87
88
|
GenericId<(typeof VectorTableNames)[number]>,
|
|
88
89
|
VId<(typeof VectorTableNames)[number]>[]
|
|
@@ -108,7 +109,7 @@ const tables: {
|
|
|
108
109
|
VectorDimensions.map((dimensions) => [
|
|
109
110
|
`vectors_${dimensions}`,
|
|
110
111
|
table(dimensions),
|
|
111
|
-
])
|
|
112
|
+
]),
|
|
112
113
|
) as Record<`vectors_${(typeof VectorDimensions)[number]}`, Table>;
|
|
113
114
|
|
|
114
115
|
export default tables;
|
|
@@ -97,8 +97,8 @@ describe("entries", () => {
|
|
|
97
97
|
.filter((q) =>
|
|
98
98
|
q.and(
|
|
99
99
|
q.eq(q.field("namespaceId"), namespaceId),
|
|
100
|
-
q.eq(q.field("key"), entry.key)
|
|
101
|
-
)
|
|
100
|
+
q.eq(q.field("key"), entry.key),
|
|
101
|
+
),
|
|
102
102
|
)
|
|
103
103
|
.collect();
|
|
104
104
|
});
|
|
@@ -143,8 +143,8 @@ describe("entries", () => {
|
|
|
143
143
|
.filter((q) =>
|
|
144
144
|
q.and(
|
|
145
145
|
q.eq(q.field("namespaceId"), namespaceId),
|
|
146
|
-
q.eq(q.field("key"), entry.key)
|
|
147
|
-
)
|
|
146
|
+
q.eq(q.field("key"), entry.key),
|
|
147
|
+
),
|
|
148
148
|
)
|
|
149
149
|
.collect();
|
|
150
150
|
});
|
|
@@ -537,7 +537,7 @@ describe("entries", () => {
|
|
|
537
537
|
{
|
|
538
538
|
namespaceId,
|
|
539
539
|
key: "shared-key",
|
|
540
|
-
}
|
|
540
|
+
},
|
|
541
541
|
);
|
|
542
542
|
expect(sharedBefore).toHaveLength(2);
|
|
543
543
|
|
|
@@ -563,7 +563,7 @@ describe("entries", () => {
|
|
|
563
563
|
|
|
564
564
|
const sharedAfter = await t.query(
|
|
565
565
|
internal.entries.getEntriesForNamespaceByKey,
|
|
566
|
-
{ namespaceId, key: "shared-key" }
|
|
566
|
+
{ namespaceId, key: "shared-key" },
|
|
567
567
|
);
|
|
568
568
|
expect(sharedAfter).toHaveLength(0);
|
|
569
569
|
|
|
@@ -722,8 +722,8 @@ describe("entries", () => {
|
|
|
722
722
|
.filter((q) =>
|
|
723
723
|
q.and(
|
|
724
724
|
q.eq(q.field("namespaceId"), namespaceId),
|
|
725
|
-
q.eq(q.field("key"), "versioned-key")
|
|
726
|
-
)
|
|
725
|
+
q.eq(q.field("key"), "versioned-key"),
|
|
726
|
+
),
|
|
727
727
|
)
|
|
728
728
|
.collect();
|
|
729
729
|
});
|
|
@@ -747,8 +747,8 @@ describe("entries", () => {
|
|
|
747
747
|
.filter((q) =>
|
|
748
748
|
q.and(
|
|
749
749
|
q.eq(q.field("namespaceId"), namespaceId),
|
|
750
|
-
q.eq(q.field("key"), "versioned-key")
|
|
751
|
-
)
|
|
750
|
+
q.eq(q.field("key"), "versioned-key"),
|
|
751
|
+
),
|
|
752
752
|
)
|
|
753
753
|
.collect();
|
|
754
754
|
});
|