@convex-dev/rag 0.5.3 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +89 -82
- package/dist/client/index.d.ts +39 -26
- package/dist/client/index.d.ts.map +1 -1
- package/dist/client/index.js +26 -8
- package/dist/client/index.js.map +1 -1
- package/dist/component/_generated/api.d.ts +100 -481
- package/dist/component/_generated/api.d.ts.map +1 -1
- package/dist/component/_generated/api.js +10 -1
- package/dist/component/_generated/api.js.map +1 -1
- package/dist/component/_generated/component.d.ts +380 -0
- package/dist/component/_generated/component.d.ts.map +1 -0
- package/dist/component/_generated/component.js +11 -0
- package/dist/component/_generated/component.js.map +1 -0
- package/dist/component/_generated/dataModel.d.ts +4 -18
- package/dist/component/_generated/dataModel.d.ts.map +1 -0
- package/dist/component/_generated/dataModel.js +11 -0
- package/dist/component/_generated/dataModel.js.map +1 -0
- package/dist/component/_generated/server.d.ts +10 -38
- package/dist/component/_generated/server.d.ts.map +1 -1
- package/dist/component/_generated/server.js +9 -5
- package/dist/component/_generated/server.js.map +1 -1
- package/dist/component/chunks.d.ts +5 -5
- package/dist/component/chunks.d.ts.map +1 -1
- package/dist/component/chunks.js +11 -44
- package/dist/component/chunks.js.map +1 -1
- package/dist/component/embeddings/tables.d.ts +4 -5
- package/dist/component/embeddings/tables.d.ts.map +1 -1
- package/dist/component/embeddings/tables.js.map +1 -1
- package/dist/component/entries.d.ts +6 -6
- package/dist/component/namespaces.d.ts +8 -8
- package/dist/component/namespaces.d.ts.map +1 -1
- package/dist/component/namespaces.js +2 -2
- package/dist/component/namespaces.js.map +1 -1
- package/dist/component/schema.d.ts +185 -224
- package/dist/component/schema.d.ts.map +1 -1
- package/dist/component/search.d.ts +4 -3
- package/dist/component/search.d.ts.map +1 -1
- package/dist/component/search.js +1 -1
- package/dist/component/search.js.map +1 -1
- package/dist/shared.d.ts +9 -4
- package/dist/shared.d.ts.map +1 -1
- package/dist/shared.js +1 -4
- package/dist/shared.js.map +1 -1
- package/package.json +71 -42
- package/src/client/defaultChunker.test.ts +1 -1
- package/src/client/defaultChunker.ts +7 -7
- package/src/client/fileUtils.ts +3 -3
- package/src/client/hybridRank.ts +1 -1
- package/src/client/index.test.ts +18 -18
- package/src/client/index.ts +135 -90
- package/src/client/setup.test.ts +2 -2
- package/src/component/_generated/api.ts +152 -0
- package/src/component/_generated/component.ts +442 -0
- package/src/component/_generated/{server.d.ts → server.ts} +33 -21
- package/src/component/chunks.test.ts +14 -14
- package/src/component/chunks.ts +49 -82
- package/src/component/embeddings/importance.test.ts +4 -4
- package/src/component/embeddings/importance.ts +1 -1
- package/src/component/embeddings/index.test.ts +3 -4
- package/src/component/embeddings/index.ts +6 -6
- package/src/component/embeddings/tables.ts +9 -8
- package/src/component/entries.test.ts +10 -10
- package/src/component/entries.ts +29 -29
- package/src/component/filters.ts +8 -8
- package/src/component/namespaces.ts +31 -34
- package/src/component/schema.ts +2 -2
- package/src/component/search.test.ts +5 -5
- package/src/component/search.ts +8 -9
- package/src/component/setup.test.ts +2 -8
- package/src/shared.ts +47 -45
- package/src/test.ts +20 -0
- package/dist/client/types.d.ts +0 -29
- package/dist/client/types.d.ts.map +0 -1
- package/dist/client/types.js +0 -2
- package/dist/client/types.js.map +0 -1
- package/dist/package.json +0 -3
- package/src/client/types.ts +0 -69
- package/src/component/_generated/api.d.ts +0 -507
- package/src/component/_generated/api.js +0 -23
- package/src/component/_generated/server.js +0 -90
- package/src/vitest.config.ts +0 -7
- /package/src/component/_generated/{dataModel.d.ts → dataModel.ts} +0 -0
|
@@ -8,9 +8,8 @@
|
|
|
8
8
|
* @module
|
|
9
9
|
*/
|
|
10
10
|
|
|
11
|
-
import {
|
|
11
|
+
import type {
|
|
12
12
|
ActionBuilder,
|
|
13
|
-
AnyComponents,
|
|
14
13
|
HttpActionBuilder,
|
|
15
14
|
MutationBuilder,
|
|
16
15
|
QueryBuilder,
|
|
@@ -19,15 +18,18 @@ import {
|
|
|
19
18
|
GenericQueryCtx,
|
|
20
19
|
GenericDatabaseReader,
|
|
21
20
|
GenericDatabaseWriter,
|
|
22
|
-
|
|
21
|
+
} from "convex/server";
|
|
22
|
+
import {
|
|
23
|
+
actionGeneric,
|
|
24
|
+
httpActionGeneric,
|
|
25
|
+
queryGeneric,
|
|
26
|
+
mutationGeneric,
|
|
27
|
+
internalActionGeneric,
|
|
28
|
+
internalMutationGeneric,
|
|
29
|
+
internalQueryGeneric,
|
|
23
30
|
} from "convex/server";
|
|
24
31
|
import type { DataModel } from "./dataModel.js";
|
|
25
32
|
|
|
26
|
-
type GenericCtx =
|
|
27
|
-
| GenericActionCtx<DataModel>
|
|
28
|
-
| GenericMutationCtx<DataModel>
|
|
29
|
-
| GenericQueryCtx<DataModel>;
|
|
30
|
-
|
|
31
33
|
/**
|
|
32
34
|
* Define a query in this Convex app's public API.
|
|
33
35
|
*
|
|
@@ -36,7 +38,7 @@ type GenericCtx =
|
|
|
36
38
|
* @param func - The query function. It receives a {@link QueryCtx} as its first argument.
|
|
37
39
|
* @returns The wrapped query. Include this as an `export` to name it and make it accessible.
|
|
38
40
|
*/
|
|
39
|
-
export
|
|
41
|
+
export const query: QueryBuilder<DataModel, "public"> = queryGeneric;
|
|
40
42
|
|
|
41
43
|
/**
|
|
42
44
|
* Define a query that is only accessible from other Convex functions (but not from the client).
|
|
@@ -46,7 +48,8 @@ export declare const query: QueryBuilder<DataModel, "public">;
|
|
|
46
48
|
* @param func - The query function. It receives a {@link QueryCtx} as its first argument.
|
|
47
49
|
* @returns The wrapped query. Include this as an `export` to name it and make it accessible.
|
|
48
50
|
*/
|
|
49
|
-
export
|
|
51
|
+
export const internalQuery: QueryBuilder<DataModel, "internal"> =
|
|
52
|
+
internalQueryGeneric;
|
|
50
53
|
|
|
51
54
|
/**
|
|
52
55
|
* Define a mutation in this Convex app's public API.
|
|
@@ -56,7 +59,7 @@ export declare const internalQuery: QueryBuilder<DataModel, "internal">;
|
|
|
56
59
|
* @param func - The mutation function. It receives a {@link MutationCtx} as its first argument.
|
|
57
60
|
* @returns The wrapped mutation. Include this as an `export` to name it and make it accessible.
|
|
58
61
|
*/
|
|
59
|
-
export
|
|
62
|
+
export const mutation: MutationBuilder<DataModel, "public"> = mutationGeneric;
|
|
60
63
|
|
|
61
64
|
/**
|
|
62
65
|
* Define a mutation that is only accessible from other Convex functions (but not from the client).
|
|
@@ -66,7 +69,8 @@ export declare const mutation: MutationBuilder<DataModel, "public">;
|
|
|
66
69
|
* @param func - The mutation function. It receives a {@link MutationCtx} as its first argument.
|
|
67
70
|
* @returns The wrapped mutation. Include this as an `export` to name it and make it accessible.
|
|
68
71
|
*/
|
|
69
|
-
export
|
|
72
|
+
export const internalMutation: MutationBuilder<DataModel, "internal"> =
|
|
73
|
+
internalMutationGeneric;
|
|
70
74
|
|
|
71
75
|
/**
|
|
72
76
|
* Define an action in this Convex app's public API.
|
|
@@ -79,7 +83,7 @@ export declare const internalMutation: MutationBuilder<DataModel, "internal">;
|
|
|
79
83
|
* @param func - The action. It receives an {@link ActionCtx} as its first argument.
|
|
80
84
|
* @returns The wrapped action. Include this as an `export` to name it and make it accessible.
|
|
81
85
|
*/
|
|
82
|
-
export
|
|
86
|
+
export const action: ActionBuilder<DataModel, "public"> = actionGeneric;
|
|
83
87
|
|
|
84
88
|
/**
|
|
85
89
|
* Define an action that is only accessible from other Convex functions (but not from the client).
|
|
@@ -87,19 +91,26 @@ export declare const action: ActionBuilder<DataModel, "public">;
|
|
|
87
91
|
* @param func - The function. It receives an {@link ActionCtx} as its first argument.
|
|
88
92
|
* @returns The wrapped function. Include this as an `export` to name it and make it accessible.
|
|
89
93
|
*/
|
|
90
|
-
export
|
|
94
|
+
export const internalAction: ActionBuilder<DataModel, "internal"> =
|
|
95
|
+
internalActionGeneric;
|
|
91
96
|
|
|
92
97
|
/**
|
|
93
98
|
* Define an HTTP action.
|
|
94
99
|
*
|
|
95
|
-
*
|
|
96
|
-
* deployment if the requests matches the path and method where
|
|
97
|
-
* is routed. Be sure to route your
|
|
100
|
+
* The wrapped function will be used to respond to HTTP requests received
|
|
101
|
+
* by a Convex deployment if the requests matches the path and method where
|
|
102
|
+
* this action is routed. Be sure to route your httpAction in `convex/http.js`.
|
|
98
103
|
*
|
|
99
|
-
* @param func - The function. It receives an {@link ActionCtx} as its first argument
|
|
104
|
+
* @param func - The function. It receives an {@link ActionCtx} as its first argument
|
|
105
|
+
* and a Fetch API `Request` object as its second.
|
|
100
106
|
* @returns The wrapped function. Import this function from `convex/http.js` and route it to hook it up.
|
|
101
107
|
*/
|
|
102
|
-
export
|
|
108
|
+
export const httpAction: HttpActionBuilder = httpActionGeneric;
|
|
109
|
+
|
|
110
|
+
type GenericCtx =
|
|
111
|
+
| GenericActionCtx<DataModel>
|
|
112
|
+
| GenericMutationCtx<DataModel>
|
|
113
|
+
| GenericQueryCtx<DataModel>;
|
|
103
114
|
|
|
104
115
|
/**
|
|
105
116
|
* A set of services for use within Convex query functions.
|
|
@@ -107,8 +118,7 @@ export declare const httpAction: HttpActionBuilder;
|
|
|
107
118
|
* The query context is passed as the first argument to any Convex query
|
|
108
119
|
* function run on the server.
|
|
109
120
|
*
|
|
110
|
-
*
|
|
111
|
-
* read-only.
|
|
121
|
+
* If you're using code generation, use the `QueryCtx` type in `convex/_generated/server.d.ts` instead.
|
|
112
122
|
*/
|
|
113
123
|
export type QueryCtx = GenericQueryCtx<DataModel>;
|
|
114
124
|
|
|
@@ -117,6 +127,8 @@ export type QueryCtx = GenericQueryCtx<DataModel>;
|
|
|
117
127
|
*
|
|
118
128
|
* The mutation context is passed as the first argument to any Convex mutation
|
|
119
129
|
* function run on the server.
|
|
130
|
+
*
|
|
131
|
+
* If you're using code generation, use the `MutationCtx` type in `convex/_generated/server.d.ts` instead.
|
|
120
132
|
*/
|
|
121
133
|
export type MutationCtx = GenericMutationCtx<DataModel>;
|
|
122
134
|
|
|
@@ -30,7 +30,7 @@ describe("chunks", () => {
|
|
|
30
30
|
namespaceId: Id<"namespaces">,
|
|
31
31
|
key = "test-entry",
|
|
32
32
|
version = 0,
|
|
33
|
-
status: "ready" | "pending" = "ready"
|
|
33
|
+
status: "ready" | "pending" = "ready",
|
|
34
34
|
) {
|
|
35
35
|
return await t.run(async (ctx) => {
|
|
36
36
|
return ctx.db.insert("entries", {
|
|
@@ -70,7 +70,7 @@ describe("chunks", () => {
|
|
|
70
70
|
startOrder: 0,
|
|
71
71
|
chunks,
|
|
72
72
|
});
|
|
73
|
-
})
|
|
73
|
+
}),
|
|
74
74
|
).rejects.toThrow(`Entry ${nonExistentDocId} not found`);
|
|
75
75
|
});
|
|
76
76
|
|
|
@@ -141,10 +141,10 @@ describe("chunks", () => {
|
|
|
141
141
|
expect(overwrittenChunk2).toBeDefined();
|
|
142
142
|
|
|
143
143
|
const content1 = await t.run(async (ctx) =>
|
|
144
|
-
ctx.db.get(overwrittenChunk1!.contentId)
|
|
144
|
+
ctx.db.get(overwrittenChunk1!.contentId),
|
|
145
145
|
);
|
|
146
146
|
const content2 = await t.run(async (ctx) =>
|
|
147
|
-
ctx.db.get(overwrittenChunk2!.contentId)
|
|
147
|
+
ctx.db.get(overwrittenChunk2!.contentId),
|
|
148
148
|
);
|
|
149
149
|
|
|
150
150
|
expect(content1!.text).toBe("Overwritten chunk 1 content");
|
|
@@ -176,7 +176,7 @@ describe("chunks", () => {
|
|
|
176
176
|
namespaceId,
|
|
177
177
|
"versioned-entry",
|
|
178
178
|
2,
|
|
179
|
-
"pending"
|
|
179
|
+
"pending",
|
|
180
180
|
);
|
|
181
181
|
|
|
182
182
|
// Insert chunks in version 2 (this should mark v1 chunks as replaced)
|
|
@@ -279,10 +279,10 @@ describe("chunks", () => {
|
|
|
279
279
|
|
|
280
280
|
// Verify chunk content
|
|
281
281
|
const doc1Content0 = await t.run(async (ctx) =>
|
|
282
|
-
ctx.db.get(doc1ChunksList[0].contentId)
|
|
282
|
+
ctx.db.get(doc1ChunksList[0].contentId),
|
|
283
283
|
);
|
|
284
284
|
const doc2Content0 = await t.run(async (ctx) =>
|
|
285
|
-
ctx.db.get(doc2ChunksList[0].contentId)
|
|
285
|
+
ctx.db.get(doc2ChunksList[0].contentId),
|
|
286
286
|
);
|
|
287
287
|
|
|
288
288
|
expect(doc1Content0!.text).toBe("Test chunk content 1");
|
|
@@ -309,7 +309,7 @@ describe("chunks", () => {
|
|
|
309
309
|
return ctx.db
|
|
310
310
|
.query("chunks")
|
|
311
311
|
.withIndex("entryId_order", (q) =>
|
|
312
|
-
q.eq("entryId", entryId).eq("order", 2)
|
|
312
|
+
q.eq("entryId", entryId).eq("order", 2),
|
|
313
313
|
)
|
|
314
314
|
.first();
|
|
315
315
|
});
|
|
@@ -319,7 +319,7 @@ describe("chunks", () => {
|
|
|
319
319
|
|
|
320
320
|
// Verify content
|
|
321
321
|
const content = await t.run(async (ctx) =>
|
|
322
|
-
ctx.db.get(singleChunk!.contentId)
|
|
322
|
+
ctx.db.get(singleChunk!.contentId),
|
|
323
323
|
);
|
|
324
324
|
expect(content!.text).toBe("Test chunk content 3");
|
|
325
325
|
});
|
|
@@ -467,7 +467,7 @@ describe("chunks", () => {
|
|
|
467
467
|
{
|
|
468
468
|
embeddingIds: [chunkDocs[2].state.embeddingId],
|
|
469
469
|
chunkContext: { before: 1, after: 2 },
|
|
470
|
-
}
|
|
470
|
+
},
|
|
471
471
|
);
|
|
472
472
|
expect(entries).toHaveLength(1);
|
|
473
473
|
expect(entries[0].entryId).toBe(entryId);
|
|
@@ -532,7 +532,7 @@ describe("chunks", () => {
|
|
|
532
532
|
doc2ChunkDocs[2].state.embeddingId, // doc2, chunk at order 2
|
|
533
533
|
],
|
|
534
534
|
chunkContext: { before: 1, after: 1 },
|
|
535
|
-
}
|
|
535
|
+
},
|
|
536
536
|
);
|
|
537
537
|
|
|
538
538
|
expect(entries).toHaveLength(2);
|
|
@@ -562,7 +562,7 @@ describe("chunks", () => {
|
|
|
562
562
|
namespaceId,
|
|
563
563
|
"versioned-entry",
|
|
564
564
|
1,
|
|
565
|
-
"ready"
|
|
565
|
+
"ready",
|
|
566
566
|
);
|
|
567
567
|
|
|
568
568
|
// Insert chunks in version 1
|
|
@@ -580,7 +580,7 @@ describe("chunks", () => {
|
|
|
580
580
|
namespaceId,
|
|
581
581
|
"versioned-entry",
|
|
582
582
|
2,
|
|
583
|
-
"pending"
|
|
583
|
+
"pending",
|
|
584
584
|
);
|
|
585
585
|
|
|
586
586
|
// Insert chunks in version 2
|
|
@@ -631,7 +631,7 @@ describe("chunks", () => {
|
|
|
631
631
|
v2ChunkDocs[1].state.embeddingId, // v2, chunk at order 1
|
|
632
632
|
],
|
|
633
633
|
chunkContext: { before: 1, after: 1 },
|
|
634
|
-
}
|
|
634
|
+
},
|
|
635
635
|
);
|
|
636
636
|
|
|
637
637
|
expect(entries).toHaveLength(2);
|
package/src/component/chunks.ts
CHANGED
|
@@ -50,7 +50,7 @@ export const insert = mutation({
|
|
|
50
50
|
|
|
51
51
|
export async function insertChunks(
|
|
52
52
|
ctx: MutationCtx,
|
|
53
|
-
{ entryId, startOrder, chunks }: InsertChunksArgs
|
|
53
|
+
{ entryId, startOrder, chunks }: InsertChunksArgs,
|
|
54
54
|
) {
|
|
55
55
|
const entry = await ctx.db.get(entryId);
|
|
56
56
|
if (!entry) {
|
|
@@ -71,12 +71,12 @@ export async function insertChunks(
|
|
|
71
71
|
q
|
|
72
72
|
.eq("entryId", entryId)
|
|
73
73
|
.gte("order", startOrder)
|
|
74
|
-
.lt("order", startOrder + chunks.length)
|
|
74
|
+
.lt("order", startOrder + chunks.length),
|
|
75
75
|
)
|
|
76
76
|
.collect();
|
|
77
77
|
if (existingChunks.length > 0) {
|
|
78
78
|
console.debug(
|
|
79
|
-
`Deleting ${existingChunks.length} existing chunks for entry ${entryId} at version ${entry.version}
|
|
79
|
+
`Deleting ${existingChunks.length} existing chunks for entry ${entryId} at version ${entry.version}`,
|
|
80
80
|
);
|
|
81
81
|
}
|
|
82
82
|
// TODO: avoid writing if they're the same
|
|
@@ -87,11 +87,11 @@ export async function insertChunks(
|
|
|
87
87
|
}
|
|
88
88
|
await ctx.db.delete(c.contentId);
|
|
89
89
|
await ctx.db.delete(c._id);
|
|
90
|
-
})
|
|
90
|
+
}),
|
|
91
91
|
);
|
|
92
92
|
const numberedFilter = numberedFilterFromNamedFilters(
|
|
93
93
|
entry.filterValues,
|
|
94
|
-
namespace!.filterNames
|
|
94
|
+
namespace!.filterNames,
|
|
95
95
|
);
|
|
96
96
|
for (const chunk of chunks) {
|
|
97
97
|
const contentId = await ctx.db.insert("content", {
|
|
@@ -110,7 +110,7 @@ export async function insertChunks(
|
|
|
110
110
|
chunk.embedding,
|
|
111
111
|
entry.namespaceId,
|
|
112
112
|
entry.importance,
|
|
113
|
-
numberedFilter
|
|
113
|
+
numberedFilter,
|
|
114
114
|
);
|
|
115
115
|
state = {
|
|
116
116
|
kind: "ready",
|
|
@@ -126,13 +126,11 @@ export async function insertChunks(
|
|
|
126
126
|
contentId,
|
|
127
127
|
namespaceId: entry.namespaceId,
|
|
128
128
|
...filterFieldsFromNumbers(entry.namespaceId, numberedFilter),
|
|
129
|
-
})
|
|
129
|
+
}),
|
|
130
130
|
);
|
|
131
131
|
order++;
|
|
132
132
|
}
|
|
133
|
-
return {
|
|
134
|
-
status: previousEntry ? ("pending" as const) : ("ready" as const),
|
|
135
|
-
};
|
|
133
|
+
return { status: previousEntry ? ("pending" as const) : ("ready" as const) };
|
|
136
134
|
}
|
|
137
135
|
|
|
138
136
|
async function ensureLatestEntryVersion(ctx: QueryCtx, entry: Doc<"entries">) {
|
|
@@ -148,14 +146,14 @@ async function ensureLatestEntryVersion(ctx: QueryCtx, entry: Doc<"entries">) {
|
|
|
148
146
|
.eq("namespaceId", entry.namespaceId)
|
|
149
147
|
.eq("status.kind", status)
|
|
150
148
|
.eq("key", entry.key)
|
|
151
|
-
.gt("version", entry.version)
|
|
152
|
-
)
|
|
149
|
+
.gt("version", entry.version),
|
|
150
|
+
),
|
|
153
151
|
),
|
|
154
|
-
["version"]
|
|
152
|
+
["version"],
|
|
155
153
|
).first();
|
|
156
154
|
if (newerEntry) {
|
|
157
155
|
console.warn(
|
|
158
|
-
`Bailing from inserting chunks for entry ${entry.key} at version ${entry.version} since there's a newer version ${newerEntry.version} (status ${newerEntry.status}) creation time difference ${(newerEntry._creationTime - entry._creationTime).toFixed(0)}ms
|
|
156
|
+
`Bailing from inserting chunks for entry ${entry.key} at version ${entry.version} since there's a newer version ${newerEntry.version} (status ${newerEntry.status}) creation time difference ${(newerEntry._creationTime - entry._creationTime).toFixed(0)}ms`,
|
|
159
157
|
);
|
|
160
158
|
return false;
|
|
161
159
|
}
|
|
@@ -163,14 +161,8 @@ async function ensureLatestEntryVersion(ctx: QueryCtx, entry: Doc<"entries">) {
|
|
|
163
161
|
}
|
|
164
162
|
|
|
165
163
|
export const replaceChunksPage = mutation({
|
|
166
|
-
args: v.object({
|
|
167
|
-
|
|
168
|
-
startOrder: v.number(),
|
|
169
|
-
}),
|
|
170
|
-
returns: v.object({
|
|
171
|
-
status: vStatus,
|
|
172
|
-
nextStartOrder: v.number(),
|
|
173
|
-
}),
|
|
164
|
+
args: v.object({ entryId: v.id("entries"), startOrder: v.number() }),
|
|
165
|
+
returns: v.object({ status: vStatus, nextStartOrder: v.number() }),
|
|
174
166
|
handler: async (ctx, args) => {
|
|
175
167
|
const { entryId, startOrder } = args;
|
|
176
168
|
const entryOrNull = await ctx.db.get(entryId);
|
|
@@ -180,10 +172,7 @@ export const replaceChunksPage = mutation({
|
|
|
180
172
|
const entry = entryOrNull;
|
|
181
173
|
const isLatest = await ensureLatestEntryVersion(ctx, entry);
|
|
182
174
|
if (!isLatest) {
|
|
183
|
-
return {
|
|
184
|
-
status: "replaced" as const,
|
|
185
|
-
nextStartOrder: startOrder,
|
|
186
|
-
};
|
|
175
|
+
return { status: "replaced" as const, nextStartOrder: startOrder };
|
|
187
176
|
}
|
|
188
177
|
|
|
189
178
|
// Get the namespace for filter conversion
|
|
@@ -200,7 +189,7 @@ export const replaceChunksPage = mutation({
|
|
|
200
189
|
q
|
|
201
190
|
.eq("namespaceId", entry.namespaceId)
|
|
202
191
|
.eq("status.kind", "pending")
|
|
203
|
-
.eq("key", entry.key)
|
|
192
|
+
.eq("key", entry.key),
|
|
204
193
|
)
|
|
205
194
|
.collect()
|
|
206
195
|
).filter((e) => e._id !== entry._id)
|
|
@@ -212,29 +201,27 @@ export const replaceChunksPage = mutation({
|
|
|
212
201
|
stream(ctx.db, schema)
|
|
213
202
|
.query("chunks")
|
|
214
203
|
.withIndex("entryId_order", (q) =>
|
|
215
|
-
q.eq("entryId", entry._id).gte("order", startOrder)
|
|
216
|
-
)
|
|
204
|
+
q.eq("entryId", entry._id).gte("order", startOrder),
|
|
205
|
+
),
|
|
217
206
|
),
|
|
218
|
-
["order"]
|
|
207
|
+
["order"],
|
|
219
208
|
);
|
|
220
209
|
const namespaceId = entry.namespaceId;
|
|
221
210
|
const namedFilters = numberedFilterFromNamedFilters(
|
|
222
211
|
entry.filterValues,
|
|
223
|
-
namespace!.filterNames
|
|
212
|
+
namespace!.filterNames,
|
|
224
213
|
);
|
|
225
214
|
async function addChunk(
|
|
226
|
-
chunk: Doc<"chunks"> & { state: { kind: "pending" } }
|
|
215
|
+
chunk: Doc<"chunks"> & { state: { kind: "pending" } },
|
|
227
216
|
) {
|
|
228
217
|
const embeddingId = await insertEmbedding(
|
|
229
218
|
ctx,
|
|
230
219
|
chunk.state.embedding,
|
|
231
220
|
namespaceId,
|
|
232
221
|
entry.importance,
|
|
233
|
-
namedFilters
|
|
222
|
+
namedFilters,
|
|
234
223
|
);
|
|
235
|
-
await ctx.db.patch(chunk._id, {
|
|
236
|
-
state: { kind: "ready", embeddingId },
|
|
237
|
-
});
|
|
224
|
+
await ctx.db.patch(chunk._id, { state: { kind: "ready", embeddingId } });
|
|
238
225
|
}
|
|
239
226
|
let dataUsedSoFar = 0;
|
|
240
227
|
let indexToDelete = startOrder;
|
|
@@ -256,7 +243,7 @@ export const replaceChunksPage = mutation({
|
|
|
256
243
|
pendingSearchableText: chunk.state.searchableText,
|
|
257
244
|
},
|
|
258
245
|
});
|
|
259
|
-
})
|
|
246
|
+
}),
|
|
260
247
|
);
|
|
261
248
|
chunksToDeleteEmbeddings = [];
|
|
262
249
|
if (chunkToAdd) {
|
|
@@ -277,23 +264,17 @@ export const replaceChunksPage = mutation({
|
|
|
277
264
|
// check if we're close to the limit
|
|
278
265
|
// if so, bail and pick up on this chunk.order.
|
|
279
266
|
if (dataUsedSoFar > BANDWIDTH_PER_TRANSACTION_SOFT_LIMIT) {
|
|
280
|
-
return {
|
|
281
|
-
status: "pending" as const,
|
|
282
|
-
nextStartOrder: indexToDelete,
|
|
283
|
-
};
|
|
267
|
+
return { status: "pending" as const, nextStartOrder: indexToDelete };
|
|
284
268
|
}
|
|
285
269
|
}
|
|
286
270
|
if (dataUsedSoFar > BANDWIDTH_PER_TRANSACTION_HARD_LIMIT) {
|
|
287
|
-
return {
|
|
288
|
-
status: "pending" as const,
|
|
289
|
-
nextStartOrder: indexToDelete,
|
|
290
|
-
};
|
|
271
|
+
return { status: "pending" as const, nextStartOrder: indexToDelete };
|
|
291
272
|
}
|
|
292
273
|
if (chunk.state.kind === "pending") {
|
|
293
274
|
if (chunk.entryId === entryId) {
|
|
294
275
|
if (chunkToAdd) {
|
|
295
276
|
console.warn(
|
|
296
|
-
`Multiple pending chunks before changing order ${chunk.order} for entry ${entryId} version ${entry.version}: ${chunkToAdd._id} and ${chunk._id}
|
|
277
|
+
`Multiple pending chunks before changing order ${chunk.order} for entry ${entryId} version ${entry.version}: ${chunkToAdd._id} and ${chunk._id}`,
|
|
297
278
|
);
|
|
298
279
|
await addChunk(chunkToAdd);
|
|
299
280
|
}
|
|
@@ -304,7 +285,7 @@ export const replaceChunksPage = mutation({
|
|
|
304
285
|
chunksToDeleteEmbeddings.push(chunk);
|
|
305
286
|
} else {
|
|
306
287
|
console.debug(
|
|
307
|
-
`Skipping adding chunk ${chunk._id} for entry ${entryId} version ${entry.version} since it's already ready
|
|
288
|
+
`Skipping adding chunk ${chunk._id} for entry ${entryId} version ${entry.version} since it's already ready`,
|
|
308
289
|
);
|
|
309
290
|
}
|
|
310
291
|
}
|
|
@@ -312,10 +293,7 @@ export const replaceChunksPage = mutation({
|
|
|
312
293
|
// handle the last batch
|
|
313
294
|
await handleBatch();
|
|
314
295
|
|
|
315
|
-
return {
|
|
316
|
-
status: "ready" as const,
|
|
317
|
-
nextStartOrder: 0,
|
|
318
|
-
};
|
|
296
|
+
return { status: "ready" as const, nextStartOrder: 0 };
|
|
319
297
|
},
|
|
320
298
|
});
|
|
321
299
|
|
|
@@ -327,7 +305,7 @@ export const vRangeResult = v.object({
|
|
|
327
305
|
v.object({
|
|
328
306
|
text: v.string(),
|
|
329
307
|
metadata: v.optional(v.record(v.string(), v.any())),
|
|
330
|
-
})
|
|
308
|
+
}),
|
|
331
309
|
),
|
|
332
310
|
});
|
|
333
311
|
|
|
@@ -342,7 +320,7 @@ export const getRangesOfChunks = internalQuery({
|
|
|
342
320
|
}),
|
|
343
321
|
handler: async (
|
|
344
322
|
ctx,
|
|
345
|
-
args
|
|
323
|
+
args,
|
|
346
324
|
): Promise<{
|
|
347
325
|
ranges: (null | Infer<typeof vRangeResult>)[];
|
|
348
326
|
entries: Entry[];
|
|
@@ -353,19 +331,19 @@ export const getRangesOfChunks = internalQuery({
|
|
|
353
331
|
ctx.db
|
|
354
332
|
.query("chunks")
|
|
355
333
|
.withIndex("embeddingId", (q) =>
|
|
356
|
-
q.eq("state.embeddingId", embeddingId)
|
|
334
|
+
q.eq("state.embeddingId", embeddingId),
|
|
357
335
|
)
|
|
358
336
|
.order("desc")
|
|
359
|
-
.first()
|
|
360
|
-
)
|
|
337
|
+
.first(),
|
|
338
|
+
),
|
|
361
339
|
);
|
|
362
340
|
|
|
363
341
|
// Note: This preserves order of entries as they first appeared.
|
|
364
342
|
const entries = (
|
|
365
343
|
await Promise.all(
|
|
366
344
|
Array.from(
|
|
367
|
-
new Set(chunks.filter((c) => c !== null).map((c) => c.entryId))
|
|
368
|
-
).map((id) => ctx.db.get(id))
|
|
345
|
+
new Set(chunks.filter((c) => c !== null).map((c) => c.entryId)),
|
|
346
|
+
).map((id) => ctx.db.get(id)),
|
|
369
347
|
)
|
|
370
348
|
)
|
|
371
349
|
.filter((d) => d !== null)
|
|
@@ -383,7 +361,7 @@ export const getRangesOfChunks = internalQuery({
|
|
|
383
361
|
acc[entryId] = [...(acc[entryId] ?? []), order].sort((a, b) => a - b);
|
|
384
362
|
return acc;
|
|
385
363
|
},
|
|
386
|
-
{} as Record<Id<"entries">, number[]
|
|
364
|
+
{} as Record<Id<"entries">, number[]>,
|
|
387
365
|
);
|
|
388
366
|
|
|
389
367
|
const result: Array<Infer<typeof vRangeResult> | null> = [];
|
|
@@ -397,7 +375,7 @@ export const getRangesOfChunks = internalQuery({
|
|
|
397
375
|
// instead we'd check that other chunks are not the same doc/order
|
|
398
376
|
if (
|
|
399
377
|
result.find(
|
|
400
|
-
(r) => r?.entryId === chunk.entryId && r?.order === chunk.order
|
|
378
|
+
(r) => r?.entryId === chunk.entryId && r?.order === chunk.order,
|
|
401
379
|
)
|
|
402
380
|
) {
|
|
403
381
|
// De-dupe chunks
|
|
@@ -415,12 +393,12 @@ export const getRangesOfChunks = internalQuery({
|
|
|
415
393
|
const startOrder = Math.max(
|
|
416
394
|
chunk.order - chunkContext.before,
|
|
417
395
|
0,
|
|
418
|
-
Math.min(previousOrder + 1, chunk.order)
|
|
396
|
+
Math.min(previousOrder + 1, chunk.order),
|
|
419
397
|
);
|
|
420
398
|
// We stop short if the next chunk order's "before" context will cover it.
|
|
421
399
|
const endOrder = Math.min(
|
|
422
400
|
chunk.order + chunkContext.after + 1,
|
|
423
|
-
Math.max(nextOrder - chunkContext.before, chunk.order + 1)
|
|
401
|
+
Math.max(nextOrder - chunkContext.before, chunk.order + 1),
|
|
424
402
|
);
|
|
425
403
|
const contentIds: Id<"content">[] = [];
|
|
426
404
|
if (startOrder === chunk.order && endOrder === chunk.order + 1) {
|
|
@@ -432,7 +410,7 @@ export const getRangesOfChunks = internalQuery({
|
|
|
432
410
|
q
|
|
433
411
|
.eq("entryId", entryId)
|
|
434
412
|
.gte("order", startOrder)
|
|
435
|
-
.lt("order", endOrder)
|
|
413
|
+
.lt("order", endOrder),
|
|
436
414
|
)
|
|
437
415
|
.collect();
|
|
438
416
|
for (const chunk of chunks) {
|
|
@@ -444,21 +422,13 @@ export const getRangesOfChunks = internalQuery({
|
|
|
444
422
|
const content = await ctx.db.get(contentId);
|
|
445
423
|
assert(content, `Content ${contentId} not found`);
|
|
446
424
|
return { text: content.text, metadata: content.metadata };
|
|
447
|
-
})
|
|
425
|
+
}),
|
|
448
426
|
);
|
|
449
427
|
|
|
450
|
-
result.push({
|
|
451
|
-
entryId,
|
|
452
|
-
order: chunk.order,
|
|
453
|
-
startOrder,
|
|
454
|
-
content,
|
|
455
|
-
});
|
|
428
|
+
result.push({ entryId, order: chunk.order, startOrder, content });
|
|
456
429
|
}
|
|
457
430
|
|
|
458
|
-
return {
|
|
459
|
-
ranges: result,
|
|
460
|
-
entries,
|
|
461
|
-
};
|
|
431
|
+
return { ranges: result, entries };
|
|
462
432
|
},
|
|
463
433
|
});
|
|
464
434
|
|
|
@@ -483,7 +453,7 @@ export const list = query({
|
|
|
483
453
|
const content = await ctx.db.get(chunk.contentId);
|
|
484
454
|
assert(content, `Content ${chunk.contentId} not found`);
|
|
485
455
|
return publicChunk(chunk, content);
|
|
486
|
-
})
|
|
456
|
+
}),
|
|
487
457
|
),
|
|
488
458
|
};
|
|
489
459
|
},
|
|
@@ -516,22 +486,19 @@ async function publicChunk(chunk: Doc<"chunks">, content: Doc<"content">) {
|
|
|
516
486
|
}
|
|
517
487
|
|
|
518
488
|
export const deleteChunksPage = internalMutation({
|
|
519
|
-
args: v.object({
|
|
520
|
-
entryId: v.id("entries"),
|
|
521
|
-
startOrder: v.number(),
|
|
522
|
-
}),
|
|
489
|
+
args: v.object({ entryId: v.id("entries"), startOrder: v.number() }),
|
|
523
490
|
returns: v.object({ isDone: v.boolean(), nextStartOrder: v.number() }),
|
|
524
491
|
handler: deleteChunksPageHandler,
|
|
525
492
|
});
|
|
526
493
|
|
|
527
494
|
export async function deleteChunksPageHandler(
|
|
528
495
|
ctx: MutationCtx,
|
|
529
|
-
{ entryId, startOrder }: { entryId: Id<"entries">; startOrder: number }
|
|
496
|
+
{ entryId, startOrder }: { entryId: Id<"entries">; startOrder: number },
|
|
530
497
|
) {
|
|
531
498
|
const chunkStream = ctx.db
|
|
532
499
|
.query("chunks")
|
|
533
500
|
.withIndex("entryId_order", (q) =>
|
|
534
|
-
q.eq("entryId", entryId).gte("order", startOrder)
|
|
501
|
+
q.eq("entryId", entryId).gte("order", startOrder),
|
|
535
502
|
);
|
|
536
503
|
let dataUsedSoFar = 0;
|
|
537
504
|
for await (const chunk of chunkStream) {
|
|
@@ -590,7 +557,7 @@ async function estimateContentSize(ctx: QueryCtx, contentId: Id<"content">) {
|
|
|
590
557
|
if (content) {
|
|
591
558
|
dataUsedSoFar += content.text.length;
|
|
592
559
|
dataUsedSoFar += JSON.stringify(
|
|
593
|
-
convexToJson(content.metadata ?? {})
|
|
560
|
+
convexToJson(content.metadata ?? {}),
|
|
594
561
|
).length;
|
|
595
562
|
}
|
|
596
563
|
return dataUsedSoFar;
|
|
@@ -45,7 +45,7 @@ describe("importance.ts", () => {
|
|
|
45
45
|
expect(result[0]).toBeCloseTo(embedding[0] * importance);
|
|
46
46
|
expect(result[1]).toBeCloseTo(embedding[1] * importance);
|
|
47
47
|
expect(
|
|
48
|
-
Math.sqrt(result[0] ** 2 + result[1] ** 2 + result[2] ** 2)
|
|
48
|
+
Math.sqrt(result[0] ** 2 + result[1] ** 2 + result[2] ** 2),
|
|
49
49
|
).toBeCloseTo(1);
|
|
50
50
|
});
|
|
51
51
|
|
|
@@ -175,7 +175,7 @@ describe("importance.ts", () => {
|
|
|
175
175
|
|
|
176
176
|
expect(retrievedImportance).toBeCloseTo(importance, 3);
|
|
177
177
|
expect(Math.abs(retrievedImportance - importance)).toBeLessThan(
|
|
178
|
-
tolerance
|
|
178
|
+
tolerance,
|
|
179
179
|
);
|
|
180
180
|
});
|
|
181
181
|
});
|
|
@@ -198,13 +198,13 @@ describe("importance.ts", () => {
|
|
|
198
198
|
// Create vector with initial importance
|
|
199
199
|
const vectorWithInitialImp = vectorWithImportance(
|
|
200
200
|
embedding,
|
|
201
|
-
initialImportance
|
|
201
|
+
initialImportance,
|
|
202
202
|
);
|
|
203
203
|
|
|
204
204
|
// Modify importance
|
|
205
205
|
const vectorWithModifiedImp = modifyImportance(
|
|
206
206
|
vectorWithInitialImp,
|
|
207
|
-
newImportance
|
|
207
|
+
newImportance,
|
|
208
208
|
);
|
|
209
209
|
|
|
210
210
|
// Retrieve and verify
|
|
@@ -35,7 +35,7 @@ export function vectorWithImportance(embedding: number[], importance: number) {
|
|
|
35
35
|
// We drop the final dimension if it'd make it larger than 4096.
|
|
36
36
|
// Unfortunate current limitation of Convex vector search.
|
|
37
37
|
const vectorToModify = normalizeVector(
|
|
38
|
-
embedding.length === 4096 ? embedding.slice(0, 4095) : embedding
|
|
38
|
+
embedding.length === 4096 ? embedding.slice(0, 4095) : embedding,
|
|
39
39
|
);
|
|
40
40
|
const scaled = scaleVector(vectorToModify, importance);
|
|
41
41
|
|
|
@@ -25,7 +25,6 @@ const testApi: ApiFromModules<{
|
|
|
25
25
|
fns: {
|
|
26
26
|
search: typeof search;
|
|
27
27
|
};
|
|
28
|
-
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
29
28
|
}>["fns"] = anyApi["embeddings"]["index.test"] as any;
|
|
30
29
|
|
|
31
30
|
describe("embeddings", () => {
|
|
@@ -62,7 +61,7 @@ describe("embeddings", () => {
|
|
|
62
61
|
expect(insertedVector).toBeDefined();
|
|
63
62
|
expect(insertedVector!.namespaceId).toBe(namespaceId);
|
|
64
63
|
expect(insertedVector!.vector).toHaveLength(
|
|
65
|
-
vectorWithImportanceDimension(128)
|
|
64
|
+
vectorWithImportanceDimension(128),
|
|
66
65
|
);
|
|
67
66
|
expect(insertedVector!.filter0).toBeUndefined();
|
|
68
67
|
expect(insertedVector!.filter1).toBeUndefined();
|
|
@@ -94,7 +93,7 @@ describe("embeddings", () => {
|
|
|
94
93
|
embedding,
|
|
95
94
|
namespaceId,
|
|
96
95
|
importance,
|
|
97
|
-
undefined
|
|
96
|
+
undefined,
|
|
98
97
|
);
|
|
99
98
|
});
|
|
100
99
|
|
|
@@ -116,7 +115,7 @@ describe("embeddings", () => {
|
|
|
116
115
|
|
|
117
116
|
// Vectors should be different due to importance scaling
|
|
118
117
|
expect(insertedVector!.vector).not.toEqual(
|
|
119
|
-
vectorWithoutImportanceData!.vector
|
|
118
|
+
vectorWithoutImportanceData!.vector,
|
|
120
119
|
);
|
|
121
120
|
|
|
122
121
|
// The last element should be the weight: sqrt(1 - importance^2)
|