@convex-dev/rag 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (113) hide show
  1. package/LICENSE +201 -0
  2. package/README.md +371 -0
  3. package/dist/client/_generated/_ignore.d.ts +1 -0
  4. package/dist/client/_generated/_ignore.d.ts.map +1 -0
  5. package/dist/client/_generated/_ignore.js +3 -0
  6. package/dist/client/_generated/_ignore.js.map +1 -0
  7. package/dist/client/defaultChunker.d.ts +15 -0
  8. package/dist/client/defaultChunker.d.ts.map +1 -0
  9. package/dist/client/defaultChunker.js +148 -0
  10. package/dist/client/defaultChunker.js.map +1 -0
  11. package/dist/client/fileUtils.d.ts +24 -0
  12. package/dist/client/fileUtils.d.ts.map +1 -0
  13. package/dist/client/fileUtils.js +179 -0
  14. package/dist/client/fileUtils.js.map +1 -0
  15. package/dist/client/index.d.ts +442 -0
  16. package/dist/client/index.d.ts.map +1 -0
  17. package/dist/client/index.js +597 -0
  18. package/dist/client/index.js.map +1 -0
  19. package/dist/client/types.d.ts +29 -0
  20. package/dist/client/types.d.ts.map +1 -0
  21. package/dist/client/types.js +2 -0
  22. package/dist/client/types.js.map +1 -0
  23. package/dist/component/_generated/api.d.ts +439 -0
  24. package/dist/component/_generated/api.d.ts.map +1 -0
  25. package/dist/component/_generated/api.js +22 -0
  26. package/dist/component/_generated/api.js.map +1 -0
  27. package/dist/component/_generated/dataModel.d.ts +60 -0
  28. package/dist/component/_generated/server.d.ts +149 -0
  29. package/dist/component/_generated/server.d.ts.map +1 -0
  30. package/dist/component/_generated/server.js +74 -0
  31. package/dist/component/_generated/server.js.map +1 -0
  32. package/dist/component/chunks.d.ts +139 -0
  33. package/dist/component/chunks.d.ts.map +1 -0
  34. package/dist/component/chunks.js +413 -0
  35. package/dist/component/chunks.js.map +1 -0
  36. package/dist/component/convex.config.d.ts +3 -0
  37. package/dist/component/convex.config.d.ts.map +1 -0
  38. package/dist/component/convex.config.js +6 -0
  39. package/dist/component/convex.config.js.map +1 -0
  40. package/dist/component/embeddings/importance.d.ts +21 -0
  41. package/dist/component/embeddings/importance.d.ts.map +1 -0
  42. package/dist/component/embeddings/importance.js +67 -0
  43. package/dist/component/embeddings/importance.js.map +1 -0
  44. package/dist/component/embeddings/index.d.ts +23 -0
  45. package/dist/component/embeddings/index.d.ts.map +1 -0
  46. package/dist/component/embeddings/index.js +54 -0
  47. package/dist/component/embeddings/index.js.map +1 -0
  48. package/dist/component/embeddings/tables.d.ts +39 -0
  49. package/dist/component/embeddings/tables.d.ts.map +1 -0
  50. package/dist/component/embeddings/tables.js +53 -0
  51. package/dist/component/embeddings/tables.js.map +1 -0
  52. package/dist/component/entries.d.ts +167 -0
  53. package/dist/component/entries.d.ts.map +1 -0
  54. package/dist/component/entries.js +409 -0
  55. package/dist/component/entries.js.map +1 -0
  56. package/dist/component/filters.d.ts +46 -0
  57. package/dist/component/filters.d.ts.map +1 -0
  58. package/dist/component/filters.js +72 -0
  59. package/dist/component/filters.js.map +1 -0
  60. package/dist/component/namespaces.d.ts +131 -0
  61. package/dist/component/namespaces.d.ts.map +1 -0
  62. package/dist/component/namespaces.js +222 -0
  63. package/dist/component/namespaces.js.map +1 -0
  64. package/dist/component/schema.d.ts +1697 -0
  65. package/dist/component/schema.d.ts.map +1 -0
  66. package/dist/component/schema.js +88 -0
  67. package/dist/component/schema.js.map +1 -0
  68. package/dist/component/search.d.ts +20 -0
  69. package/dist/component/search.d.ts.map +1 -0
  70. package/dist/component/search.js +69 -0
  71. package/dist/component/search.js.map +1 -0
  72. package/dist/package.json +3 -0
  73. package/dist/react/index.d.ts +2 -0
  74. package/dist/react/index.d.ts.map +1 -0
  75. package/dist/react/index.js +6 -0
  76. package/dist/react/index.js.map +1 -0
  77. package/dist/shared.d.ts +479 -0
  78. package/dist/shared.d.ts.map +1 -0
  79. package/dist/shared.js +98 -0
  80. package/dist/shared.js.map +1 -0
  81. package/package.json +97 -0
  82. package/src/client/_generated/_ignore.ts +1 -0
  83. package/src/client/defaultChunker.test.ts +243 -0
  84. package/src/client/defaultChunker.ts +183 -0
  85. package/src/client/fileUtils.ts +179 -0
  86. package/src/client/index.test.ts +475 -0
  87. package/src/client/index.ts +1125 -0
  88. package/src/client/setup.test.ts +28 -0
  89. package/src/client/types.ts +69 -0
  90. package/src/component/_generated/api.d.ts +439 -0
  91. package/src/component/_generated/api.js +23 -0
  92. package/src/component/_generated/dataModel.d.ts +60 -0
  93. package/src/component/_generated/server.d.ts +149 -0
  94. package/src/component/_generated/server.js +90 -0
  95. package/src/component/chunks.test.ts +915 -0
  96. package/src/component/chunks.ts +555 -0
  97. package/src/component/convex.config.ts +7 -0
  98. package/src/component/embeddings/importance.test.ts +249 -0
  99. package/src/component/embeddings/importance.ts +75 -0
  100. package/src/component/embeddings/index.test.ts +482 -0
  101. package/src/component/embeddings/index.ts +99 -0
  102. package/src/component/embeddings/tables.ts +114 -0
  103. package/src/component/entries.test.ts +341 -0
  104. package/src/component/entries.ts +546 -0
  105. package/src/component/filters.ts +119 -0
  106. package/src/component/namespaces.ts +299 -0
  107. package/src/component/schema.ts +106 -0
  108. package/src/component/search.test.ts +445 -0
  109. package/src/component/search.ts +97 -0
  110. package/src/component/setup.test.ts +5 -0
  111. package/src/react/index.ts +7 -0
  112. package/src/shared.ts +247 -0
  113. package/src/vitest.config.ts +7 -0
@@ -0,0 +1,179 @@
1
+ export function guessMimeTypeFromExtension(
2
+ filename: string
3
+ ): string | undefined {
4
+ const extension = filename.split(".").pop();
5
+ if (!extension || extension.includes(" ")) {
6
+ return undefined;
7
+ }
8
+ switch (extension.toLowerCase()) {
9
+ case "pdf":
10
+ return "application/pdf";
11
+ case "txt":
12
+ case "rtf":
13
+ return "text/plain";
14
+ case "json":
15
+ return "application/json";
16
+ case "xml":
17
+ return "application/xml";
18
+ case "html":
19
+ return "text/html";
20
+ case "css":
21
+ return "text/css";
22
+ case "js":
23
+ case "cjs":
24
+ case "mjs":
25
+ case "jsx":
26
+ case "ts":
27
+ case "tsx":
28
+ return "text/javascript";
29
+ case "md":
30
+ case "mdx":
31
+ return "text/markdown";
32
+ case "csv":
33
+ return "text/csv";
34
+ case "zip":
35
+ return "application/zip";
36
+ case "apng":
37
+ return "image/apng";
38
+ case "png":
39
+ return "image/png";
40
+ case "avif":
41
+ return "image/avif";
42
+ case "gif":
43
+ return "image/gif";
44
+ case "svg":
45
+ return "image/svg+xml";
46
+ case "webp":
47
+ return "image/webp";
48
+ case "tiff":
49
+ return "image/tiff";
50
+ case "ico":
51
+ return "image/x-icon";
52
+ case "jpeg":
53
+ case "jpg":
54
+ return "image/jpeg";
55
+ case "mp1":
56
+ case "mp2":
57
+ case "mp3":
58
+ return "audio/mpeg";
59
+ case "mp4":
60
+ return "video/mp4";
61
+ default:
62
+ return "application/octet-stream";
63
+ }
64
+ }
65
+ /**
66
+ * Return a best-guess MIME type based on the magic-number signature
67
+ * found at the start of an ArrayBuffer.
68
+ *
69
+ * @param buf – the source ArrayBuffer
70
+ * @returns the detected MIME type, or `"application/octet-stream"` if unknown
71
+ */
72
+
73
+ export function guessMimeTypeFromContents(buf: ArrayBuffer | string): string {
74
+ if (typeof buf === "string") {
75
+ if (buf.match(/^data:\w+\/\w+;base64/)) {
76
+ return buf.split(";")[0].split(":")[1]!;
77
+ }
78
+ return "text/plain";
79
+ }
80
+ if (buf.byteLength < 4) return "application/octet-stream";
81
+
82
+ // Read the first 12 bytes (enough for all signatures below)
83
+ const bytes = new Uint8Array(buf.slice(0, 12));
84
+ const hex = [...bytes].map((b) => b.toString(16).padStart(2, "0")).join("");
85
+
86
+ // Helper so we can look at only the needed prefix
87
+ const startsWith = (sig: string) => hex.startsWith(sig.toLowerCase());
88
+
89
+ // --- image formats ---
90
+ if (startsWith("89504e47")) return "image/png"; // PNG - 89 50 4E 47
91
+ if (
92
+ startsWith("ffd8ffdb") ||
93
+ startsWith("ffd8ffe0") ||
94
+ startsWith("ffd8ffee") ||
95
+ startsWith("ffd8ffe1")
96
+ )
97
+ return "image/jpeg"; // JPEG
98
+ if (startsWith("47494638")) return "image/gif"; // GIF
99
+ if (startsWith("424d")) return "image/bmp"; // BMP
100
+ if (startsWith("52494646") && hex.substr(16, 8) === "57454250")
101
+ return "image/webp"; // WEBP (RIFF....WEBP)
102
+ if (startsWith("49492a00")) return "image/tiff"; // TIFF
103
+
104
+ // <svg in hex is 3c 3f 78 6d 6c
105
+ if (startsWith("3c737667")) return "image/svg+xml"; // <svg
106
+ if (startsWith("3c3f786d")) return "image/svg+xml"; // <?xm
107
+
108
+ // --- audio/video ---
109
+ if (startsWith("494433")) return "audio/mpeg"; // MP3 (ID3)
110
+ if (startsWith("000001ba") || startsWith("000001b3")) return "video/mpeg"; // MPEG container
111
+ if (startsWith("1a45dfa3")) return "video/webm"; // WEBM / Matroska
112
+ if (startsWith("00000018") && hex.substr(16, 8) === "66747970")
113
+ return "video/mp4"; // MP4
114
+ if (startsWith("4f676753")) return "audio/ogg"; // OGG / Opus
115
+
116
+ // --- documents & archives ---
117
+ if (startsWith("25504446")) return "application/pdf"; // PDF
118
+ if (
119
+ startsWith("504b0304") ||
120
+ startsWith("504b0506") ||
121
+ startsWith("504b0708")
122
+ )
123
+ return "application/zip"; // ZIP / DOCX / PPTX / XLSX / EPUB
124
+ if (startsWith("52617221")) return "application/x-rar-compressed"; // RAR
125
+ if (startsWith("7f454c46")) return "application/x-elf"; // ELF binaries
126
+ if (startsWith("1f8b08")) return "application/gzip"; // GZIP
127
+ if (startsWith("425a68")) return "application/x-bzip2"; // BZIP2
128
+ if (startsWith("3c3f786d6c")) return "application/xml"; // XML
129
+
130
+ // Plain text, JSON and others are trickier—fallback:
131
+ return "application/octet-stream";
132
+ }
133
+ /**
134
+ * Make a contentHash of a Blob that matches the File Storage metadata, allowing
135
+ * identifying when content is identical.
136
+ * @param blob The contents to hash
137
+ * @returns sha256 hash of the contents
138
+ */
139
+
140
+ export async function contentHashFromArrayBuffer(buffer: ArrayBuffer) {
141
+ return Array.from(
142
+ new Uint8Array(await crypto.subtle.digest("SHA-256", buffer))
143
+ )
144
+ .map((b) => b.toString(16).padStart(2, "0"))
145
+ .join("");
146
+ }
147
+
148
+ /**
149
+ * Split a filename into a keyword-friendly string. Specifically adds sections
150
+ * of camelCase and TitleCase into a space-separated strings.
151
+ * e.g. "MyFile is soGreat.txt" -> "MyFile is soGreat.txt My File so Great"
152
+ * Note: it doesn't split up titles that don't have a file extension.
153
+ */
154
+ export function splitFilename(title: string | undefined): string | undefined {
155
+ if (!title) {
156
+ return undefined;
157
+ }
158
+ const parts = title.split(".");
159
+ if (parts.pop()?.includes(" ")) {
160
+ // There isn't an extension, so don't treat it as a filename
161
+ return title;
162
+ }
163
+ // split up camelCase into "camel Case"
164
+ return [
165
+ title,
166
+ ...parts.flatMap((part) => {
167
+ const words = part.split(" ");
168
+ const camelCaseWords = words.flatMap((word) => {
169
+ const pieces = word.split(/(?=[A-Z])/);
170
+ if (pieces.length === 1) {
171
+ // This will already be verbatim in the regular title parts
172
+ return [];
173
+ }
174
+ return pieces;
175
+ });
176
+ return camelCaseWords;
177
+ }),
178
+ ].join(" ");
179
+ }
@@ -0,0 +1,475 @@
1
+ import { describe, expect, test } from "vitest";
2
+ import { RAG } from "./index.js";
3
+ import type { DataModelFromSchemaDefinition } from "convex/server";
4
+ import {
5
+ anyApi,
6
+ queryGeneric,
7
+ mutationGeneric,
8
+ actionGeneric,
9
+ } from "convex/server";
10
+ import type {
11
+ ApiFromModules,
12
+ ActionBuilder,
13
+ MutationBuilder,
14
+ QueryBuilder,
15
+ } from "convex/server";
16
+ import { v } from "convex/values";
17
+ import { defineSchema } from "convex/server";
18
+ import { components, initConvexTest } from "./setup.test.js";
19
+ import { openai } from "@ai-sdk/openai";
20
+
21
+ // The schema for the tests
22
+ const schema = defineSchema({});
23
+ type DataModel = DataModelFromSchemaDefinition<typeof schema>;
24
+ // type DatabaseReader = GenericDatabaseReader<DataModel>;
25
+ const query = queryGeneric as QueryBuilder<DataModel, "public">;
26
+ const mutation = mutationGeneric as MutationBuilder<DataModel, "public">;
27
+ const action = actionGeneric as ActionBuilder<DataModel, "public">;
28
+
29
+ const rag = new RAG(components.rag, {
30
+ embeddingDimension: 1536,
31
+ textEmbeddingModel: openai.textEmbeddingModel("text-embedding-3-small"),
32
+ filterNames: ["simpleString", "arrayOfStrings", "customObject"],
33
+ });
34
+
35
+ export const findExistingEntryByContentHash = query({
36
+ args: { namespace: v.string(), key: v.string(), contentHash: v.string() },
37
+ handler: async (ctx, args) => {
38
+ return rag.findExistingEntryByContentHash(ctx, {
39
+ namespace: args.namespace,
40
+ key: args.key,
41
+ contentHash: args.contentHash,
42
+ });
43
+ },
44
+ });
45
+
46
+ export const add = mutation({
47
+ args: {
48
+ key: v.string(),
49
+ chunks: v.array(
50
+ v.object({
51
+ text: v.string(),
52
+ metadata: v.record(v.string(), v.any()),
53
+ embedding: v.array(v.number()),
54
+ })
55
+ ),
56
+ namespace: v.string(),
57
+ title: v.optional(v.string()),
58
+ filterValues: v.optional(
59
+ v.array(
60
+ v.union(
61
+ v.object({
62
+ name: v.literal("simpleString"),
63
+ value: v.string(),
64
+ }),
65
+ v.object({
66
+ name: v.literal("arrayOfStrings"),
67
+ value: v.array(v.string()),
68
+ }),
69
+ v.object({
70
+ name: v.literal("customObject"),
71
+ value: v.record(v.string(), v.any()),
72
+ })
73
+ )
74
+ )
75
+ ),
76
+ importance: v.optional(v.number()),
77
+ contentHash: v.optional(v.string()),
78
+ },
79
+ handler: async (ctx, args) => {
80
+ return rag.add(ctx, args);
81
+ },
82
+ });
83
+
84
+ export const search = action({
85
+ args: {
86
+ embedding: v.array(v.number()),
87
+ namespace: v.string(),
88
+ limit: v.optional(v.number()),
89
+ chunkContext: v.optional(
90
+ v.object({
91
+ before: v.number(),
92
+ after: v.number(),
93
+ })
94
+ ),
95
+ },
96
+ handler: async (ctx, args) => {
97
+ const { results, entries, text } = await rag.search(ctx, {
98
+ embedding: args.embedding,
99
+ namespace: args.namespace,
100
+ limit: args.limit ?? 10,
101
+ chunkContext: args.chunkContext ?? { before: 0, after: 0 },
102
+ });
103
+
104
+ return {
105
+ results,
106
+ text,
107
+ entries,
108
+ };
109
+ },
110
+ });
111
+
112
+ const testApi: ApiFromModules<{
113
+ fns: {
114
+ findExistingEntryByContentHash: typeof findExistingEntryByContentHash;
115
+ add: typeof add;
116
+ search: typeof search;
117
+ };
118
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
119
+ }>["fns"] = anyApi["index.test"] as any;
120
+
121
+ function dummyEmbeddings(text: string) {
122
+ return Array.from({ length: 1536 }, (_, i) =>
123
+ i === 0 ? text.charCodeAt(0) / 256 : 0.1
124
+ );
125
+ }
126
+
127
+ describe("RAG thick client", () => {
128
+ test("should add a entry and be able to list it", async () => {
129
+ const t = initConvexTest(schema);
130
+ const { entryId, status } = await t.mutation(testApi.add, {
131
+ key: "test",
132
+ chunks: [
133
+ { text: "A", metadata: {}, embedding: dummyEmbeddings("A") },
134
+ { text: "B", metadata: {}, embedding: dummyEmbeddings("B") },
135
+ { text: "C", metadata: {}, embedding: dummyEmbeddings("C") },
136
+ ],
137
+ namespace: "test",
138
+ });
139
+ expect(entryId).toBeDefined();
140
+ expect(status).toBe("ready");
141
+ await t.run(async (ctx) => {
142
+ const { isDone, page } = await rag.listChunks(ctx, {
143
+ entryId,
144
+ paginationOpts: { numItems: 10, cursor: null },
145
+ });
146
+ expect(page.length).toBe(3);
147
+ expect(isDone).toBe(true);
148
+ expect(page[0].order).toBe(0);
149
+ expect(page[1].order).toBe(1);
150
+ expect(page[2].order).toBe(2);
151
+ });
152
+ });
153
+
154
+ test("should work from a test function", async () => {
155
+ const t = initConvexTest(schema);
156
+ await t.mutation(testApi.add, {
157
+ key: "test",
158
+ chunks: [
159
+ { text: "A", metadata: {}, embedding: dummyEmbeddings("A") },
160
+ { text: "B", metadata: {}, embedding: dummyEmbeddings("B") },
161
+ { text: "C", metadata: {}, embedding: dummyEmbeddings("C") },
162
+ ],
163
+ namespace: "test",
164
+ });
165
+ // expect(result).toBe(1);
166
+ });
167
+
168
+ describe("text formatting validation", () => {
169
+ test("should format single entry with sequential chunks correctly", async () => {
170
+ const t = initConvexTest(schema);
171
+
172
+ // Add entry with sequential chunks
173
+ await t.mutation(testApi.add, {
174
+ key: "sequential-test",
175
+ chunks: [
176
+ {
177
+ text: "Chunk 1 content",
178
+ metadata: {},
179
+ embedding: dummyEmbeddings("Chunk 1 content"),
180
+ },
181
+ {
182
+ text: "Chunk 2 content",
183
+ metadata: {},
184
+ embedding: dummyEmbeddings("Chunk 2 content"),
185
+ },
186
+ {
187
+ text: "Chunk 3 content",
188
+ metadata: {},
189
+ embedding: dummyEmbeddings("Chunk 3 content"),
190
+ },
191
+ ],
192
+ namespace: "format-test",
193
+ title: "Test Document",
194
+ });
195
+
196
+ // Search and verify text format
197
+ const { text, entries } = await t.action(testApi.search, {
198
+ embedding: dummyEmbeddings("content"),
199
+ namespace: "format-test",
200
+ limit: 10,
201
+ });
202
+
203
+ // Should match README format: "# Title:\n{entry.text}"
204
+ expect(text).toContain("# Test Document:");
205
+ expect(entries).toHaveLength(1);
206
+ expect(entries[0].text).toBe(
207
+ "Chunk 1 content\nChunk 2 content\nChunk 3 content"
208
+ );
209
+
210
+ // Overall text should be: "# Test Document:\nChunk 1 content\nChunk 2 content\nChunk 3 content"
211
+ expect(text).toBe(
212
+ "# Test Document:\nChunk 1 content\nChunk 2 content\nChunk 3 content"
213
+ );
214
+ });
215
+
216
+ test("should format single entry without title correctly", async () => {
217
+ const t = initConvexTest(schema);
218
+
219
+ // Add entry without title
220
+ await t.mutation(testApi.add, {
221
+ key: "no-title-test",
222
+ chunks: [
223
+ {
224
+ text: "Content without title",
225
+ metadata: {},
226
+ embedding: dummyEmbeddings("Content without title"),
227
+ },
228
+ ],
229
+ namespace: "format-test-notitle",
230
+ });
231
+
232
+ const { text, entries } = await t.action(testApi.search, {
233
+ embedding: dummyEmbeddings("content"),
234
+ namespace: "format-test-notitle",
235
+ limit: 10,
236
+ });
237
+
238
+ // Should not have "# " prefix since no title
239
+ expect(text).not.toContain("# ");
240
+ expect(entries).toHaveLength(1);
241
+ expect(entries[0].text).toBe("Content without title");
242
+ expect(text).toBe("Content without title");
243
+ });
244
+
245
+ test("should format non-sequential chunks with ellipsis separator", async () => {
246
+ const t = initConvexTest(schema);
247
+
248
+ // Add multiple entries to create potential non-sequential results
249
+ await t.mutation(testApi.add, {
250
+ key: "doc1",
251
+ chunks: [
252
+ {
253
+ text: "Chunk 1 content",
254
+ metadata: {},
255
+ embedding: dummyEmbeddings("Chunk 1 content"),
256
+ },
257
+ {
258
+ text: "Chunk 2 content",
259
+ metadata: {},
260
+ embedding: dummyEmbeddings("Chunk 2 content"),
261
+ },
262
+ {
263
+ text: "Important chunk",
264
+ metadata: {},
265
+ embedding: dummyEmbeddings("A important chunk"),
266
+ },
267
+ {
268
+ text: "Chunk 4 content",
269
+ metadata: {},
270
+ embedding: dummyEmbeddings("Chunk 4 content"),
271
+ },
272
+ {
273
+ text: "Another important chunk",
274
+ metadata: {},
275
+ // embedding hack uses first char to determine order
276
+ embedding: dummyEmbeddings("B important chunk"),
277
+ },
278
+ ],
279
+ namespace: "ellipsis-test",
280
+ title: "Document with gaps",
281
+ });
282
+
283
+ // Search with chunk context to potentially get non-sequential results
284
+ const { text, entries } = await t.action(testApi.search, {
285
+ embedding: dummyEmbeddings("A important chunk"),
286
+ namespace: "ellipsis-test",
287
+ limit: 2,
288
+ chunkContext: { before: 0, after: 0 }, // Just the matching chunks
289
+ });
290
+
291
+ expect(entries).toHaveLength(1);
292
+
293
+ // If we get non-sequential chunks, they should be separated by "\n...\n"
294
+ // The exact behavior depends on the search results, but we can at least verify structure
295
+ expect(entries[0].text).toContain("Important chunk");
296
+ expect(entries[0].text).toContain("Another important chunk");
297
+
298
+ // The text might contain ellipsis if chunks are non-sequential
299
+ expect(text).toMatch(/\n\.\.\.\n/);
300
+ });
301
+
302
+ test("should format multiple entries with separators", async () => {
303
+ const t = initConvexTest(schema);
304
+
305
+ // Add two separate entries
306
+ await t.mutation(testApi.add, {
307
+ key: "first-doc",
308
+ chunks: [
309
+ {
310
+ text: "First document content",
311
+ metadata: {},
312
+ embedding: dummyEmbeddings("First document content"),
313
+ },
314
+ ],
315
+ namespace: "multi-entry-test",
316
+ title: "First Document",
317
+ });
318
+
319
+ await t.mutation(testApi.add, {
320
+ key: "second-doc",
321
+ chunks: [
322
+ {
323
+ text: "Second document content",
324
+ metadata: {},
325
+ embedding: dummyEmbeddings("Second document content"),
326
+ },
327
+ ],
328
+ namespace: "multi-entry-test",
329
+ title: "Second Document",
330
+ });
331
+
332
+ const { text, entries } = await t.action(testApi.search, {
333
+ embedding: dummyEmbeddings("document"),
334
+ namespace: "multi-entry-test",
335
+ limit: 10,
336
+ });
337
+
338
+ // Should have entries separated by "\n---\n" as per README
339
+ expect(text).toContain("---");
340
+ expect(text).toMatch(/# .+:\n.+\n---\n# .+:\n.+/);
341
+
342
+ // Should have both titles prefixed with "# "
343
+ expect(text).toContain("# First Document:");
344
+ expect(text).toContain("# Second Document:");
345
+
346
+ expect(entries).toHaveLength(2);
347
+ });
348
+
349
+ test("should format mixed entries (with and without titles)", async () => {
350
+ const t = initConvexTest(schema);
351
+
352
+ // Add entry with title
353
+ await t.mutation(testApi.add, {
354
+ key: "titled-doc",
355
+ chunks: [
356
+ {
357
+ text: "Content with title",
358
+ metadata: {},
359
+ embedding: dummyEmbeddings("Content with title"),
360
+ },
361
+ ],
362
+ namespace: "mixed-test",
363
+ title: "Titled Document",
364
+ });
365
+
366
+ // Add entry without title
367
+ await t.mutation(testApi.add, {
368
+ key: "untitled-doc",
369
+ chunks: [
370
+ {
371
+ text: "Content without title",
372
+ metadata: {},
373
+ embedding: dummyEmbeddings("Content without title"),
374
+ },
375
+ ],
376
+ namespace: "mixed-test",
377
+ });
378
+
379
+ const { text, entries } = await t.action(testApi.search, {
380
+ embedding: dummyEmbeddings("content"),
381
+ namespace: "mixed-test",
382
+ limit: 10,
383
+ });
384
+
385
+ // Should properly handle mixed formatting
386
+ expect(text).toContain("---"); // Entries should be separated
387
+ expect(text).toContain("# Titled Document:"); // Titled entry should have prefix
388
+
389
+ // One entry should have title format, one should not
390
+ const parts = text.split("\n---\n");
391
+ expect(parts).toHaveLength(2);
392
+
393
+ const hasTitle = parts.some((part) => part.startsWith("# "));
394
+ const hasNoTitle = parts.some((part) => !part.startsWith("# "));
395
+ expect(hasTitle).toBe(true);
396
+ expect(hasNoTitle).toBe(true);
397
+
398
+ expect(entries).toHaveLength(2);
399
+ });
400
+
401
+ test("should match exact README format specification", async () => {
402
+ const t = initConvexTest(schema);
403
+
404
+ // Create the exact scenario from README example
405
+ await t.mutation(testApi.add, {
406
+ key: "title1-doc",
407
+ chunks: [
408
+ {
409
+ text: "Chunk 1 contents",
410
+ metadata: {},
411
+ embedding: dummyEmbeddings("Chunk 1 contents"),
412
+ },
413
+ {
414
+ text: "Chunk 2 contents",
415
+ metadata: {},
416
+ embedding: dummyEmbeddings("Chunk 2 contents"),
417
+ },
418
+ ],
419
+ namespace: "readme-format-test",
420
+ title: "Title 1",
421
+ });
422
+
423
+ await t.mutation(testApi.add, {
424
+ key: "title2-doc",
425
+ chunks: [
426
+ {
427
+ text: "Chunk 3 contents",
428
+ metadata: {},
429
+ embedding: dummyEmbeddings("Chunk 3 contents"),
430
+ },
431
+ {
432
+ text: "Chunk 4 contents",
433
+ metadata: {},
434
+ embedding: dummyEmbeddings("Chunk 4 contents"),
435
+ },
436
+ ],
437
+ namespace: "readme-format-test",
438
+ title: "Title 2",
439
+ });
440
+
441
+ const { text, entries } = await t.action(testApi.search, {
442
+ embedding: dummyEmbeddings("contents"),
443
+ namespace: "readme-format-test",
444
+ limit: 10,
445
+ });
446
+
447
+ // Verify basic structure matches README
448
+ expect(text).toContain("# Title 1:");
449
+ expect(text).toContain("# Title 2:");
450
+ expect(text).toContain("---");
451
+
452
+ // Should have proper entry separation
453
+ const parts = text.split("\n---\n");
454
+ expect(parts).toHaveLength(2);
455
+
456
+ // Each part should start with "# Title X:"
457
+ parts.forEach((part) => {
458
+ expect(part).toMatch(/^# Title \d+:/);
459
+ });
460
+
461
+ expect(entries).toHaveLength(2);
462
+
463
+ // Individual entry texts should follow the pattern
464
+ expect(text).toBe(
465
+ `# Title 1:
466
+ Chunk 1 contents
467
+ Chunk 2 contents
468
+ ---
469
+ # Title 2:
470
+ Chunk 3 contents
471
+ Chunk 4 contents`
472
+ );
473
+ });
474
+ });
475
+ });