@convex-dev/rag 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (113) hide show
  1. package/LICENSE +201 -0
  2. package/README.md +371 -0
  3. package/dist/client/_generated/_ignore.d.ts +1 -0
  4. package/dist/client/_generated/_ignore.d.ts.map +1 -0
  5. package/dist/client/_generated/_ignore.js +3 -0
  6. package/dist/client/_generated/_ignore.js.map +1 -0
  7. package/dist/client/defaultChunker.d.ts +15 -0
  8. package/dist/client/defaultChunker.d.ts.map +1 -0
  9. package/dist/client/defaultChunker.js +148 -0
  10. package/dist/client/defaultChunker.js.map +1 -0
  11. package/dist/client/fileUtils.d.ts +24 -0
  12. package/dist/client/fileUtils.d.ts.map +1 -0
  13. package/dist/client/fileUtils.js +179 -0
  14. package/dist/client/fileUtils.js.map +1 -0
  15. package/dist/client/index.d.ts +442 -0
  16. package/dist/client/index.d.ts.map +1 -0
  17. package/dist/client/index.js +597 -0
  18. package/dist/client/index.js.map +1 -0
  19. package/dist/client/types.d.ts +29 -0
  20. package/dist/client/types.d.ts.map +1 -0
  21. package/dist/client/types.js +2 -0
  22. package/dist/client/types.js.map +1 -0
  23. package/dist/component/_generated/api.d.ts +439 -0
  24. package/dist/component/_generated/api.d.ts.map +1 -0
  25. package/dist/component/_generated/api.js +22 -0
  26. package/dist/component/_generated/api.js.map +1 -0
  27. package/dist/component/_generated/dataModel.d.ts +60 -0
  28. package/dist/component/_generated/server.d.ts +149 -0
  29. package/dist/component/_generated/server.d.ts.map +1 -0
  30. package/dist/component/_generated/server.js +74 -0
  31. package/dist/component/_generated/server.js.map +1 -0
  32. package/dist/component/chunks.d.ts +139 -0
  33. package/dist/component/chunks.d.ts.map +1 -0
  34. package/dist/component/chunks.js +413 -0
  35. package/dist/component/chunks.js.map +1 -0
  36. package/dist/component/convex.config.d.ts +3 -0
  37. package/dist/component/convex.config.d.ts.map +1 -0
  38. package/dist/component/convex.config.js +6 -0
  39. package/dist/component/convex.config.js.map +1 -0
  40. package/dist/component/embeddings/importance.d.ts +21 -0
  41. package/dist/component/embeddings/importance.d.ts.map +1 -0
  42. package/dist/component/embeddings/importance.js +67 -0
  43. package/dist/component/embeddings/importance.js.map +1 -0
  44. package/dist/component/embeddings/index.d.ts +23 -0
  45. package/dist/component/embeddings/index.d.ts.map +1 -0
  46. package/dist/component/embeddings/index.js +54 -0
  47. package/dist/component/embeddings/index.js.map +1 -0
  48. package/dist/component/embeddings/tables.d.ts +39 -0
  49. package/dist/component/embeddings/tables.d.ts.map +1 -0
  50. package/dist/component/embeddings/tables.js +53 -0
  51. package/dist/component/embeddings/tables.js.map +1 -0
  52. package/dist/component/entries.d.ts +167 -0
  53. package/dist/component/entries.d.ts.map +1 -0
  54. package/dist/component/entries.js +409 -0
  55. package/dist/component/entries.js.map +1 -0
  56. package/dist/component/filters.d.ts +46 -0
  57. package/dist/component/filters.d.ts.map +1 -0
  58. package/dist/component/filters.js +72 -0
  59. package/dist/component/filters.js.map +1 -0
  60. package/dist/component/namespaces.d.ts +131 -0
  61. package/dist/component/namespaces.d.ts.map +1 -0
  62. package/dist/component/namespaces.js +222 -0
  63. package/dist/component/namespaces.js.map +1 -0
  64. package/dist/component/schema.d.ts +1697 -0
  65. package/dist/component/schema.d.ts.map +1 -0
  66. package/dist/component/schema.js +88 -0
  67. package/dist/component/schema.js.map +1 -0
  68. package/dist/component/search.d.ts +20 -0
  69. package/dist/component/search.d.ts.map +1 -0
  70. package/dist/component/search.js +69 -0
  71. package/dist/component/search.js.map +1 -0
  72. package/dist/package.json +3 -0
  73. package/dist/react/index.d.ts +2 -0
  74. package/dist/react/index.d.ts.map +1 -0
  75. package/dist/react/index.js +6 -0
  76. package/dist/react/index.js.map +1 -0
  77. package/dist/shared.d.ts +479 -0
  78. package/dist/shared.d.ts.map +1 -0
  79. package/dist/shared.js +98 -0
  80. package/dist/shared.js.map +1 -0
  81. package/package.json +97 -0
  82. package/src/client/_generated/_ignore.ts +1 -0
  83. package/src/client/defaultChunker.test.ts +243 -0
  84. package/src/client/defaultChunker.ts +183 -0
  85. package/src/client/fileUtils.ts +179 -0
  86. package/src/client/index.test.ts +475 -0
  87. package/src/client/index.ts +1125 -0
  88. package/src/client/setup.test.ts +28 -0
  89. package/src/client/types.ts +69 -0
  90. package/src/component/_generated/api.d.ts +439 -0
  91. package/src/component/_generated/api.js +23 -0
  92. package/src/component/_generated/dataModel.d.ts +60 -0
  93. package/src/component/_generated/server.d.ts +149 -0
  94. package/src/component/_generated/server.js +90 -0
  95. package/src/component/chunks.test.ts +915 -0
  96. package/src/component/chunks.ts +555 -0
  97. package/src/component/convex.config.ts +7 -0
  98. package/src/component/embeddings/importance.test.ts +249 -0
  99. package/src/component/embeddings/importance.ts +75 -0
  100. package/src/component/embeddings/index.test.ts +482 -0
  101. package/src/component/embeddings/index.ts +99 -0
  102. package/src/component/embeddings/tables.ts +114 -0
  103. package/src/component/entries.test.ts +341 -0
  104. package/src/component/entries.ts +546 -0
  105. package/src/component/filters.ts +119 -0
  106. package/src/component/namespaces.ts +299 -0
  107. package/src/component/schema.ts +106 -0
  108. package/src/component/search.test.ts +445 -0
  109. package/src/component/search.ts +97 -0
  110. package/src/component/setup.test.ts +5 -0
  111. package/src/react/index.ts +7 -0
  112. package/src/shared.ts +247 -0
  113. package/src/vitest.config.ts +7 -0
@@ -0,0 +1,555 @@
1
+ import { assert } from "convex-helpers";
2
+ import { paginator } from "convex-helpers/server/pagination";
3
+ import { mergedStream, stream } from "convex-helpers/server/stream";
4
+ import { paginationOptsValidator } from "convex/server";
5
+ import { convexToJson, type Infer } from "convex/values";
6
+ import {
7
+ statuses,
8
+ vChunk,
9
+ vCreateChunkArgs,
10
+ vEntry,
11
+ vPaginationResult,
12
+ vStatus,
13
+ type Entry,
14
+ } from "../shared.js";
15
+ import type { Doc, Id } from "./_generated/dataModel.js";
16
+ import {
17
+ internalQuery,
18
+ mutation,
19
+ query,
20
+ type MutationCtx,
21
+ type QueryCtx,
22
+ } from "./_generated/server.js";
23
+ import { insertEmbedding } from "./embeddings/index.js";
24
+ import { vVectorId } from "./embeddings/tables.js";
25
+ import { schema, v } from "./schema.js";
26
+ import { getPreviousEntry, publicEntry } from "./entries.js";
27
+ import {
28
+ filterFieldsFromNumbers,
29
+ numberedFilterFromNamedFilters,
30
+ } from "./filters.js";
31
+
32
+ const KB = 1_024;
33
+ const MB = 1_024 * KB;
34
+ const BANDWIDTH_PER_TRANSACTION_HARD_LIMIT = 8 * MB;
35
+ const BANDWIDTH_PER_TRANSACTION_SOFT_LIMIT = 4 * MB;
36
+
37
+ export const vInsertChunksArgs = v.object({
38
+ entryId: v.id("entries"),
39
+ startOrder: v.number(),
40
+ chunks: v.array(vCreateChunkArgs),
41
+ });
42
+ type InsertChunksArgs = Infer<typeof vInsertChunksArgs>;
43
+
44
+ export const insert = mutation({
45
+ args: vInsertChunksArgs,
46
+ returns: v.object({ status: vStatus }),
47
+ handler: insertChunks,
48
+ });
49
+
50
+ export async function insertChunks(
51
+ ctx: MutationCtx,
52
+ { entryId, startOrder, chunks }: InsertChunksArgs
53
+ ) {
54
+ const entry = await ctx.db.get(entryId);
55
+ if (!entry) {
56
+ throw new Error(`Entry ${entryId} not found`);
57
+ }
58
+ await ensureLatestEntryVersion(ctx, entry);
59
+
60
+ // Get the namespace for filter conversion
61
+ const namespace = await ctx.db.get(entry.namespaceId);
62
+ assert(namespace, `Namespace ${entry.namespaceId} not found`);
63
+
64
+ const previousEntry = await getPreviousEntry(ctx, entry);
65
+ let order = startOrder;
66
+ const chunkIds: Id<"chunks">[] = [];
67
+ const existingChunks = await ctx.db
68
+ .query("chunks")
69
+ .withIndex("entryId_order", (q) =>
70
+ q
71
+ .eq("entryId", entryId)
72
+ .gte("order", startOrder)
73
+ .lt("order", startOrder + chunks.length)
74
+ )
75
+ .collect();
76
+ if (existingChunks.length > 0) {
77
+ console.debug(
78
+ `Deleting ${existingChunks.length} existing chunks for entry ${entryId} at version ${entry.version}`
79
+ );
80
+ }
81
+ // TODO: avoid writing if they're the same
82
+ await Promise.all(
83
+ existingChunks.map(async (c) => {
84
+ if (c.state.kind === "ready") {
85
+ await ctx.db.delete(c.state.embeddingId);
86
+ }
87
+ await ctx.db.delete(c.contentId);
88
+ await ctx.db.delete(c._id);
89
+ })
90
+ );
91
+ const numberedFilter = numberedFilterFromNamedFilters(
92
+ entry.filterValues,
93
+ namespace!.filterNames
94
+ );
95
+ for (const chunk of chunks) {
96
+ const contentId = await ctx.db.insert("content", {
97
+ text: chunk.content.text,
98
+ metadata: chunk.content.metadata,
99
+ });
100
+ let state: Doc<"chunks">["state"] = {
101
+ kind: "pending",
102
+ embedding: chunk.embedding,
103
+ importance: entry.importance,
104
+ pendingSearchableText: chunk.searchableText,
105
+ };
106
+ if (!previousEntry) {
107
+ const embeddingId = await insertEmbedding(
108
+ ctx,
109
+ chunk.embedding,
110
+ entry.namespaceId,
111
+ entry.importance,
112
+ numberedFilter
113
+ );
114
+ state = {
115
+ kind: "ready",
116
+ embeddingId,
117
+ searchableText: chunk.searchableText,
118
+ };
119
+ }
120
+ chunkIds.push(
121
+ await ctx.db.insert("chunks", {
122
+ entryId,
123
+ order,
124
+ state,
125
+ contentId,
126
+ namespaceId: entry.namespaceId,
127
+ ...filterFieldsFromNumbers(entry.namespaceId, numberedFilter),
128
+ })
129
+ );
130
+ order++;
131
+ }
132
+ return {
133
+ status: previousEntry ? ("pending" as const) : ("ready" as const),
134
+ };
135
+ }
136
+
137
+ async function ensureLatestEntryVersion(ctx: QueryCtx, entry: Doc<"entries">) {
138
+ if (!entry.key) {
139
+ return true;
140
+ }
141
+ const newerEntry = await mergedStream(
142
+ statuses.map((status) =>
143
+ stream(ctx.db, schema)
144
+ .query("entries")
145
+ .withIndex("namespaceId_status_key_version", (q) =>
146
+ q
147
+ .eq("namespaceId", entry.namespaceId)
148
+ .eq("status.kind", status)
149
+ .eq("key", entry.key)
150
+ .gt("version", entry.version)
151
+ )
152
+ ),
153
+ ["version"]
154
+ ).first();
155
+ if (newerEntry) {
156
+ console.warn(
157
+ `Bailing from inserting chunks for entry ${entry.key} at version ${entry.version} since there's a newer version ${newerEntry.version} (status ${newerEntry.status}) creation time difference ${(newerEntry._creationTime - entry._creationTime).toFixed(0)}ms`
158
+ );
159
+ return false;
160
+ }
161
+ return true;
162
+ }
163
+
164
+ export const replaceChunksPage = mutation({
165
+ args: v.object({
166
+ entryId: v.id("entries"),
167
+ startOrder: v.number(),
168
+ }),
169
+ returns: v.object({
170
+ status: vStatus,
171
+ nextStartOrder: v.number(),
172
+ }),
173
+ handler: async (ctx, args) => {
174
+ const { entryId, startOrder } = args;
175
+ const entryOrNull = await ctx.db.get(entryId);
176
+ if (!entryOrNull) {
177
+ throw new Error(`Entry ${entryId} not found`);
178
+ }
179
+ const entry = entryOrNull;
180
+ const isLatest = await ensureLatestEntryVersion(ctx, entry);
181
+ if (!isLatest) {
182
+ return {
183
+ status: "replaced" as const,
184
+ nextStartOrder: startOrder,
185
+ };
186
+ }
187
+
188
+ // Get the namespace for filter conversion
189
+ const namespace = await ctx.db.get(entry.namespaceId);
190
+ assert(namespace, `Namespace ${entry.namespaceId} not found`);
191
+
192
+ const previousEntry = await getPreviousEntry(ctx, entry);
193
+ const pendingEntries =
194
+ entry.key && previousEntry
195
+ ? await ctx.db
196
+ .query("entries")
197
+ .withIndex("namespaceId_status_key_version", (q) =>
198
+ q
199
+ .eq("namespaceId", entry.namespaceId)
200
+ .eq("status.kind", "pending")
201
+ .eq("key", entry.key)
202
+ )
203
+ .collect()
204
+ : [];
205
+ const chunkStream = mergedStream(
206
+ [entry, ...pendingEntries, previousEntry]
207
+ .filter((d) => d !== null)
208
+ .map((entry) =>
209
+ stream(ctx.db, schema)
210
+ .query("chunks")
211
+ .withIndex("entryId_order", (q) =>
212
+ q.eq("entryId", entry._id).gte("order", startOrder)
213
+ )
214
+ ),
215
+ ["order"]
216
+ );
217
+ const namespaceId = entry.namespaceId;
218
+ const namedFilters = numberedFilterFromNamedFilters(
219
+ entry.filterValues,
220
+ namespace!.filterNames
221
+ );
222
+ async function addChunk(
223
+ chunk: Doc<"chunks"> & { state: { kind: "pending" } }
224
+ ) {
225
+ const embeddingId = await insertEmbedding(
226
+ ctx,
227
+ chunk.state.embedding,
228
+ namespaceId,
229
+ entry.importance,
230
+ namedFilters
231
+ );
232
+ await ctx.db.patch(chunk._id, {
233
+ state: { kind: "ready", embeddingId },
234
+ });
235
+ }
236
+ let dataUsedSoFar = 0;
237
+ let indexToDelete = startOrder;
238
+ let chunksToDeleteEmbeddings: Doc<"chunks">[] = [];
239
+ let chunkToAdd: (Doc<"chunks"> & { state: { kind: "pending" } }) | null =
240
+ null;
241
+ async function handleBatch() {
242
+ await Promise.all(
243
+ chunksToDeleteEmbeddings.map(async (chunk) => {
244
+ assert(chunk.state.kind === "ready");
245
+ const vector = await ctx.db.get(chunk.state.embeddingId);
246
+ assert(vector, `Vector ${chunk.state.embeddingId} not found`);
247
+ await ctx.db.delete(chunk.state.embeddingId);
248
+ await ctx.db.patch(chunk._id, {
249
+ state: {
250
+ kind: "replaced",
251
+ embeddingId: chunk.state.embeddingId,
252
+ vector: vector.vector,
253
+ pendingSearchableText: chunk.state.searchableText,
254
+ },
255
+ });
256
+ })
257
+ );
258
+ chunksToDeleteEmbeddings = [];
259
+ if (chunkToAdd) {
260
+ await addChunk(chunkToAdd);
261
+ }
262
+ chunkToAdd = null;
263
+ }
264
+ for await (const chunk of chunkStream) {
265
+ if (chunk.state.kind === "pending") {
266
+ dataUsedSoFar += await estimateChunkSize(chunk);
267
+ } else {
268
+ dataUsedSoFar += 17 * KB; // embedding conservative estimate
269
+ }
270
+ if (chunk.order > indexToDelete) {
271
+ await handleBatch();
272
+ indexToDelete = chunk.order;
273
+ // delete the chunks
274
+ // check if we're close to the limit
275
+ // if so, bail and pick up on this chunk.order.
276
+ if (dataUsedSoFar > BANDWIDTH_PER_TRANSACTION_SOFT_LIMIT) {
277
+ return {
278
+ status: "pending" as const,
279
+ nextStartOrder: indexToDelete,
280
+ };
281
+ }
282
+ }
283
+ if (dataUsedSoFar > BANDWIDTH_PER_TRANSACTION_HARD_LIMIT) {
284
+ return {
285
+ status: "pending" as const,
286
+ nextStartOrder: indexToDelete,
287
+ };
288
+ }
289
+ if (chunk.state.kind === "pending") {
290
+ if (chunk.entryId === entryId) {
291
+ if (chunkToAdd) {
292
+ console.warn(
293
+ `Multiple pending chunks before changing order ${chunk.order} for entry ${entryId} version ${entry.version}: ${chunkToAdd._id} and ${chunk._id}`
294
+ );
295
+ await addChunk(chunkToAdd);
296
+ }
297
+ chunkToAdd = chunk as Doc<"chunks"> & { state: { kind: "pending" } };
298
+ }
299
+ } else {
300
+ if (chunk.entryId !== entryId && chunk.state.kind === "ready") {
301
+ chunksToDeleteEmbeddings.push(chunk);
302
+ } else {
303
+ console.debug(
304
+ `Skipping adding chunk ${chunk._id} for entry ${entryId} version ${entry.version} since it's already ready`
305
+ );
306
+ }
307
+ }
308
+ }
309
+ // handle the last batch
310
+ await handleBatch();
311
+
312
+ return {
313
+ status: "ready" as const,
314
+ nextStartOrder: 0,
315
+ };
316
+ },
317
+ });
318
+
319
+ export const vRangeResult = v.object({
320
+ entryId: v.id("entries"),
321
+ order: v.number(),
322
+ startOrder: v.number(),
323
+ content: v.array(
324
+ v.object({
325
+ text: v.string(),
326
+ metadata: v.optional(v.record(v.string(), v.any())),
327
+ })
328
+ ),
329
+ });
330
+
331
+ export const getRangesOfChunks = internalQuery({
332
+ args: {
333
+ embeddingIds: v.array(vVectorId),
334
+ chunkContext: v.object({ before: v.number(), after: v.number() }),
335
+ },
336
+ returns: v.object({
337
+ ranges: v.array(v.union(v.null(), vRangeResult)),
338
+ entries: v.array(vEntry),
339
+ }),
340
+ handler: async (
341
+ ctx,
342
+ args
343
+ ): Promise<{
344
+ ranges: (null | Infer<typeof vRangeResult>)[];
345
+ entries: Entry[];
346
+ }> => {
347
+ const { embeddingIds, chunkContext } = args;
348
+ const chunks = await Promise.all(
349
+ embeddingIds.map((embeddingId) =>
350
+ ctx.db
351
+ .query("chunks")
352
+ .withIndex("embeddingId", (q) =>
353
+ q.eq("state.embeddingId", embeddingId)
354
+ )
355
+ .order("desc")
356
+ .first()
357
+ )
358
+ );
359
+
360
+ // Note: This preserves order of entries as they first appeared.
361
+ const entries = (
362
+ await Promise.all(
363
+ Array.from(
364
+ new Set(chunks.filter((c) => c !== null).map((c) => c.entryId))
365
+ ).map((id) => ctx.db.get(id))
366
+ )
367
+ )
368
+ .filter((d) => d !== null)
369
+ .map(publicEntry);
370
+
371
+ const entryOders = chunks
372
+ .filter((c) => c !== null)
373
+ .map((c) => [c.entryId, c.order] as const)
374
+ .reduce(
375
+ (acc, [entryId, order]) => {
376
+ if (acc[entryId]?.includes(order)) {
377
+ // De-dupe orders
378
+ return acc;
379
+ }
380
+ acc[entryId] = [...(acc[entryId] ?? []), order].sort((a, b) => a - b);
381
+ return acc;
382
+ },
383
+ {} as Record<Id<"entries">, number[]>
384
+ );
385
+
386
+ const result: Array<Infer<typeof vRangeResult> | null> = [];
387
+
388
+ for (const chunk of chunks) {
389
+ if (chunk === null) {
390
+ result.push(null);
391
+ continue;
392
+ }
393
+ // Note: if we parallelize this in the future, we could have a race
394
+ // instead we'd check that other chunks are not the same doc/order
395
+ if (
396
+ result.find(
397
+ (r) => r?.entryId === chunk.entryId && r?.order === chunk.order
398
+ )
399
+ ) {
400
+ // De-dupe chunks
401
+ result.push(null);
402
+ continue;
403
+ }
404
+ const entryId = chunk.entryId;
405
+ const entry = await ctx.db.get(entryId);
406
+ assert(entry, `Entry ${entryId} not found`);
407
+ const otherOrders = entryOders[entryId] ?? [chunk.order];
408
+ const ourOrderIndex = otherOrders.indexOf(chunk.order);
409
+ const previousOrder = otherOrders[ourOrderIndex - 1] ?? -Infinity;
410
+ const nextOrder = otherOrders[ourOrderIndex + 1] ?? Infinity;
411
+ // We absorb all previous context up to the previous chunk.
412
+ const startOrder = Math.max(
413
+ chunk.order - chunkContext.before,
414
+ 0,
415
+ Math.min(previousOrder + 1, chunk.order)
416
+ );
417
+ // We stop short if the next chunk order's "before" context will cover it.
418
+ const endOrder = Math.min(
419
+ chunk.order + chunkContext.after + 1,
420
+ Math.max(nextOrder - chunkContext.before, chunk.order + 1)
421
+ );
422
+ const contentIds: Id<"content">[] = [];
423
+ if (startOrder === chunk.order && endOrder === chunk.order + 1) {
424
+ contentIds.push(chunk.contentId);
425
+ } else {
426
+ const chunks = await ctx.db
427
+ .query("chunks")
428
+ .withIndex("entryId_order", (q) =>
429
+ q
430
+ .eq("entryId", entryId)
431
+ .gte("order", startOrder)
432
+ .lt("order", endOrder)
433
+ )
434
+ .collect();
435
+ for (const chunk of chunks) {
436
+ contentIds.push(chunk.contentId);
437
+ }
438
+ }
439
+ const content = await Promise.all(
440
+ contentIds.map(async (contentId) => {
441
+ const content = await ctx.db.get(contentId);
442
+ assert(content, `Content ${contentId} not found`);
443
+ return { text: content.text, metadata: content.metadata };
444
+ })
445
+ );
446
+
447
+ result.push({
448
+ entryId,
449
+ order: chunk.order,
450
+ startOrder,
451
+ content,
452
+ });
453
+ }
454
+
455
+ return {
456
+ ranges: result,
457
+ entries,
458
+ };
459
+ },
460
+ });
461
+
462
+ export const list = query({
463
+ args: v.object({
464
+ entryId: v.id("entries"),
465
+ paginationOpts: paginationOptsValidator,
466
+ }),
467
+ returns: vPaginationResult(vChunk),
468
+ handler: async (ctx, args) => {
469
+ const { entryId, paginationOpts } = args;
470
+ const chunks = await paginator(ctx.db, schema)
471
+ .query("chunks")
472
+ .withIndex("entryId_order", (q) => q.eq("entryId", entryId))
473
+ .order("asc")
474
+ .paginate(paginationOpts);
475
+ return {
476
+ ...chunks,
477
+ page: await Promise.all(
478
+ chunks.page.map(async (chunk) => {
479
+ const content = await ctx.db.get(chunk.contentId);
480
+ assert(content, `Content ${chunk.contentId} not found`);
481
+ return publicChunk(chunk, content);
482
+ })
483
+ ),
484
+ };
485
+ },
486
+ });
487
+
488
+ // export async function findLastChunk(
489
+ // ctx: MutationCtx,
490
+ // entryId: Id<"entries">
491
+ // ): Promise<Chunk | null> {
492
+ // const chunk = await ctx.db
493
+ // .query("chunks")
494
+ // .withIndex("entryId_order", (q) => q.eq("entryId", entryId))
495
+ // .order("desc")
496
+ // .first();
497
+ // if (!chunk) {
498
+ // return null;
499
+ // }
500
+ // const content = await ctx.db.get(chunk.contentId);
501
+ // assert(content, `Content for chunk ${chunk._id} not found`);
502
+ // return publicChunk(chunk, content);
503
+ // }
504
+
505
+ async function publicChunk(chunk: Doc<"chunks">, content: Doc<"content">) {
506
+ return {
507
+ order: chunk.order,
508
+ state: chunk.state.kind,
509
+ text: content.text,
510
+ metadata: content.metadata,
511
+ };
512
+ }
513
+
514
+ export async function deleteChunksPage(
515
+ ctx: MutationCtx,
516
+ { entryId, startOrder }: { entryId: Id<"entries">; startOrder: number }
517
+ ) {
518
+ const chunkStream = ctx.db
519
+ .query("chunks")
520
+ .withIndex("entryId_order", (q) =>
521
+ q.eq("entryId", entryId).gte("order", startOrder)
522
+ );
523
+ let dataUsedSoFar = 0;
524
+ for await (const chunk of chunkStream) {
525
+ dataUsedSoFar += await estimateChunkSize(chunk);
526
+ await ctx.db.delete(chunk._id);
527
+ dataUsedSoFar += await estimateContentSize(ctx, chunk.contentId);
528
+ await ctx.db.delete(chunk.contentId);
529
+ if (dataUsedSoFar > BANDWIDTH_PER_TRANSACTION_HARD_LIMIT) {
530
+ // TODO: schedule follow-up - workpool?
531
+ return { isDone: false, nextStartOrder: chunk.order };
532
+ }
533
+ }
534
+ return { isDone: true, nextStartOrder: -1 };
535
+ }
536
+
537
+ async function estimateChunkSize(chunk: Doc<"chunks">) {
538
+ let dataUsedSoFar = 100; // constant metadata - roughly
539
+ if (chunk.state.kind === "pending") {
540
+ dataUsedSoFar += chunk.state.embedding.length * 8;
541
+ }
542
+ return dataUsedSoFar;
543
+ }
544
+ async function estimateContentSize(ctx: QueryCtx, contentId: Id<"content">) {
545
+ let dataUsedSoFar = 0;
546
+ // TODO: if/when deletions don't count as bandwidth, we can remove this.
547
+ const content = await ctx.db.get(contentId);
548
+ if (content) {
549
+ dataUsedSoFar += content.text.length;
550
+ dataUsedSoFar += JSON.stringify(
551
+ convexToJson(content.metadata ?? {})
552
+ ).length;
553
+ }
554
+ return dataUsedSoFar;
555
+ }
@@ -0,0 +1,7 @@
1
+ import { defineComponent } from "convex/server";
2
+ import workpool from "@convex-dev/workpool/convex.config";
3
+
4
+ const component = defineComponent("rag");
5
+ component.use(workpool);
6
+
7
+ export default component;