@convex-dev/rag 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +201 -0
- package/README.md +371 -0
- package/dist/client/_generated/_ignore.d.ts +1 -0
- package/dist/client/_generated/_ignore.d.ts.map +1 -0
- package/dist/client/_generated/_ignore.js +3 -0
- package/dist/client/_generated/_ignore.js.map +1 -0
- package/dist/client/defaultChunker.d.ts +15 -0
- package/dist/client/defaultChunker.d.ts.map +1 -0
- package/dist/client/defaultChunker.js +148 -0
- package/dist/client/defaultChunker.js.map +1 -0
- package/dist/client/fileUtils.d.ts +24 -0
- package/dist/client/fileUtils.d.ts.map +1 -0
- package/dist/client/fileUtils.js +179 -0
- package/dist/client/fileUtils.js.map +1 -0
- package/dist/client/index.d.ts +442 -0
- package/dist/client/index.d.ts.map +1 -0
- package/dist/client/index.js +597 -0
- package/dist/client/index.js.map +1 -0
- package/dist/client/types.d.ts +29 -0
- package/dist/client/types.d.ts.map +1 -0
- package/dist/client/types.js +2 -0
- package/dist/client/types.js.map +1 -0
- package/dist/component/_generated/api.d.ts +439 -0
- package/dist/component/_generated/api.d.ts.map +1 -0
- package/dist/component/_generated/api.js +22 -0
- package/dist/component/_generated/api.js.map +1 -0
- package/dist/component/_generated/dataModel.d.ts +60 -0
- package/dist/component/_generated/server.d.ts +149 -0
- package/dist/component/_generated/server.d.ts.map +1 -0
- package/dist/component/_generated/server.js +74 -0
- package/dist/component/_generated/server.js.map +1 -0
- package/dist/component/chunks.d.ts +139 -0
- package/dist/component/chunks.d.ts.map +1 -0
- package/dist/component/chunks.js +413 -0
- package/dist/component/chunks.js.map +1 -0
- package/dist/component/convex.config.d.ts +3 -0
- package/dist/component/convex.config.d.ts.map +1 -0
- package/dist/component/convex.config.js +6 -0
- package/dist/component/convex.config.js.map +1 -0
- package/dist/component/embeddings/importance.d.ts +21 -0
- package/dist/component/embeddings/importance.d.ts.map +1 -0
- package/dist/component/embeddings/importance.js +67 -0
- package/dist/component/embeddings/importance.js.map +1 -0
- package/dist/component/embeddings/index.d.ts +23 -0
- package/dist/component/embeddings/index.d.ts.map +1 -0
- package/dist/component/embeddings/index.js +54 -0
- package/dist/component/embeddings/index.js.map +1 -0
- package/dist/component/embeddings/tables.d.ts +39 -0
- package/dist/component/embeddings/tables.d.ts.map +1 -0
- package/dist/component/embeddings/tables.js +53 -0
- package/dist/component/embeddings/tables.js.map +1 -0
- package/dist/component/entries.d.ts +167 -0
- package/dist/component/entries.d.ts.map +1 -0
- package/dist/component/entries.js +409 -0
- package/dist/component/entries.js.map +1 -0
- package/dist/component/filters.d.ts +46 -0
- package/dist/component/filters.d.ts.map +1 -0
- package/dist/component/filters.js +72 -0
- package/dist/component/filters.js.map +1 -0
- package/dist/component/namespaces.d.ts +131 -0
- package/dist/component/namespaces.d.ts.map +1 -0
- package/dist/component/namespaces.js +222 -0
- package/dist/component/namespaces.js.map +1 -0
- package/dist/component/schema.d.ts +1697 -0
- package/dist/component/schema.d.ts.map +1 -0
- package/dist/component/schema.js +88 -0
- package/dist/component/schema.js.map +1 -0
- package/dist/component/search.d.ts +20 -0
- package/dist/component/search.d.ts.map +1 -0
- package/dist/component/search.js +69 -0
- package/dist/component/search.js.map +1 -0
- package/dist/package.json +3 -0
- package/dist/react/index.d.ts +2 -0
- package/dist/react/index.d.ts.map +1 -0
- package/dist/react/index.js +6 -0
- package/dist/react/index.js.map +1 -0
- package/dist/shared.d.ts +479 -0
- package/dist/shared.d.ts.map +1 -0
- package/dist/shared.js +98 -0
- package/dist/shared.js.map +1 -0
- package/package.json +97 -0
- package/src/client/_generated/_ignore.ts +1 -0
- package/src/client/defaultChunker.test.ts +243 -0
- package/src/client/defaultChunker.ts +183 -0
- package/src/client/fileUtils.ts +179 -0
- package/src/client/index.test.ts +475 -0
- package/src/client/index.ts +1125 -0
- package/src/client/setup.test.ts +28 -0
- package/src/client/types.ts +69 -0
- package/src/component/_generated/api.d.ts +439 -0
- package/src/component/_generated/api.js +23 -0
- package/src/component/_generated/dataModel.d.ts +60 -0
- package/src/component/_generated/server.d.ts +149 -0
- package/src/component/_generated/server.js +90 -0
- package/src/component/chunks.test.ts +915 -0
- package/src/component/chunks.ts +555 -0
- package/src/component/convex.config.ts +7 -0
- package/src/component/embeddings/importance.test.ts +249 -0
- package/src/component/embeddings/importance.ts +75 -0
- package/src/component/embeddings/index.test.ts +482 -0
- package/src/component/embeddings/index.ts +99 -0
- package/src/component/embeddings/tables.ts +114 -0
- package/src/component/entries.test.ts +341 -0
- package/src/component/entries.ts +546 -0
- package/src/component/filters.ts +119 -0
- package/src/component/namespaces.ts +299 -0
- package/src/component/schema.ts +106 -0
- package/src/component/search.test.ts +445 -0
- package/src/component/search.ts +97 -0
- package/src/component/setup.test.ts +5 -0
- package/src/react/index.ts +7 -0
- package/src/shared.ts +247 -0
- package/src/vitest.config.ts +7 -0
|
@@ -0,0 +1,249 @@
|
|
|
1
|
+
import { describe, it, expect } from "vitest";
|
|
2
|
+
import {
|
|
3
|
+
searchVector,
|
|
4
|
+
vectorWithImportance,
|
|
5
|
+
modifyImportance,
|
|
6
|
+
getImportance,
|
|
7
|
+
vectorWithImportanceDimension,
|
|
8
|
+
} from "./importance.js";
|
|
9
|
+
|
|
10
|
+
describe("importance.ts", () => {
|
|
11
|
+
describe("searchVector", () => {
|
|
12
|
+
it("should add a 0 to the end of a normal embedding", () => {
|
|
13
|
+
const embedding = [0.1, 0.2, 0.3, 0.4];
|
|
14
|
+
const result = searchVector(embedding);
|
|
15
|
+
|
|
16
|
+
expect(result).toEqual([0.1, 0.2, 0.3, 0.4, 0]);
|
|
17
|
+
expect(result).toHaveLength(embedding.length + 1);
|
|
18
|
+
});
|
|
19
|
+
|
|
20
|
+
it("should handle 4096 dimension embeddings by slicing to 4095 and adding 0", () => {
|
|
21
|
+
const embedding = new Array(4096).fill(0).map((_, i) => i / 4096);
|
|
22
|
+
const result = searchVector(embedding);
|
|
23
|
+
|
|
24
|
+
expect(result).toHaveLength(4096);
|
|
25
|
+
expect(result[4095]).toBe(0);
|
|
26
|
+
expect(result.slice(0, 4095)).toEqual(embedding.slice(0, 4095));
|
|
27
|
+
});
|
|
28
|
+
});
|
|
29
|
+
|
|
30
|
+
function normalizeVector(vector: number[]) {
|
|
31
|
+
const sumOfSquares = vector.reduce((acc, v) => acc + v * v, 0);
|
|
32
|
+
const magnitude = Math.sqrt(sumOfSquares);
|
|
33
|
+
return magnitude === 0
|
|
34
|
+
? vector.map(() => 0)
|
|
35
|
+
: vector.map((v) => v / magnitude);
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
describe("vectorWithImportance", () => {
|
|
39
|
+
it("should return normalized vector relative to importance", () => {
|
|
40
|
+
const embedding = [0.6, 0.8]; // magnitude = 1.0
|
|
41
|
+
const importance = 0.5;
|
|
42
|
+
const result = vectorWithImportance(embedding, importance);
|
|
43
|
+
|
|
44
|
+
expect(result).toHaveLength(3);
|
|
45
|
+
expect(result[0]).toBeCloseTo(embedding[0] * importance);
|
|
46
|
+
expect(result[1]).toBeCloseTo(embedding[1] * importance);
|
|
47
|
+
expect(
|
|
48
|
+
Math.sqrt(result[0] ** 2 + result[1] ** 2 + result[2] ** 2)
|
|
49
|
+
).toBeCloseTo(1);
|
|
50
|
+
});
|
|
51
|
+
|
|
52
|
+
it("should handle maximum importance", () => {
|
|
53
|
+
const embedding = [0.6, 0.8];
|
|
54
|
+
const importance = 1.0;
|
|
55
|
+
const result = vectorWithImportance(embedding, importance);
|
|
56
|
+
|
|
57
|
+
expect(result).toHaveLength(3);
|
|
58
|
+
expect(result[0]).toBeCloseTo(0.6);
|
|
59
|
+
expect(result[1]).toBeCloseTo(0.8);
|
|
60
|
+
});
|
|
61
|
+
|
|
62
|
+
it("should handle minimum importance", () => {
|
|
63
|
+
const embedding = [0.6, 0.8];
|
|
64
|
+
const importance = 0.0;
|
|
65
|
+
const result = vectorWithImportance(embedding, importance);
|
|
66
|
+
|
|
67
|
+
expect(result).toHaveLength(3);
|
|
68
|
+
expect(result[0]).toBeCloseTo(0);
|
|
69
|
+
expect(result[1]).toBeCloseTo(0);
|
|
70
|
+
});
|
|
71
|
+
|
|
72
|
+
it("should handle 4096 dimension embedding by slicing to 4095", () => {
|
|
73
|
+
const embedding = new Array(4096).fill(0.1);
|
|
74
|
+
const importance = 0.5;
|
|
75
|
+
const result = vectorWithImportance(embedding, importance);
|
|
76
|
+
|
|
77
|
+
expect(result).toHaveLength(4096);
|
|
78
|
+
expect(getImportance(result)).toBeCloseTo(0.5);
|
|
79
|
+
});
|
|
80
|
+
|
|
81
|
+
it("should properly normalize non-unit vectors", () => {
|
|
82
|
+
const embedding = [3, 4]; // magnitude = 5
|
|
83
|
+
const importance = 1;
|
|
84
|
+
const result = vectorWithImportance(embedding, importance);
|
|
85
|
+
|
|
86
|
+
// After normalization: [3/5, 4/5] = [0.6, 0.8]
|
|
87
|
+
expect(result).toHaveLength(3);
|
|
88
|
+
expect(result[0]).toBeCloseTo(0.6);
|
|
89
|
+
expect(result[1]).toBeCloseTo(0.8);
|
|
90
|
+
});
|
|
91
|
+
});
|
|
92
|
+
|
|
93
|
+
describe("getImportance", () => {
|
|
94
|
+
it("should correctly extract importance from vector", () => {
|
|
95
|
+
const vector = vectorWithImportance([0.1, 0.2, 0.3], 0.49);
|
|
96
|
+
const importance = getImportance(vector);
|
|
97
|
+
|
|
98
|
+
expect(importance).toBeCloseTo(0.49);
|
|
99
|
+
});
|
|
100
|
+
|
|
101
|
+
it("should handle zero importance", () => {
|
|
102
|
+
const vector = [0.1, 0.2, 0.3, 0];
|
|
103
|
+
const importance = getImportance(vector);
|
|
104
|
+
|
|
105
|
+
expect(importance).toBe(1);
|
|
106
|
+
});
|
|
107
|
+
|
|
108
|
+
it("should handle maximum importance", () => {
|
|
109
|
+
const vector = [0.1, 0.2, 0.3, 0];
|
|
110
|
+
const importance = getImportance(vector);
|
|
111
|
+
|
|
112
|
+
expect(importance).toBe(1);
|
|
113
|
+
});
|
|
114
|
+
});
|
|
115
|
+
|
|
116
|
+
describe("modifyImportance", () => {
|
|
117
|
+
it("should modify importance of existing vector", () => {
|
|
118
|
+
const originalVector = [0.6, 0.8, Math.sqrt(0.75)]; // original importance = 0.25
|
|
119
|
+
const newImportance = 0.64;
|
|
120
|
+
const result = modifyImportance(originalVector, newImportance);
|
|
121
|
+
|
|
122
|
+
expect(result).toHaveLength(3);
|
|
123
|
+
expect(result[0]).toBeCloseTo(0.6 * newImportance);
|
|
124
|
+
expect(result[1]).toBeCloseTo(0.8 * newImportance);
|
|
125
|
+
expect(getImportance(result)).toBeCloseTo(0.64);
|
|
126
|
+
});
|
|
127
|
+
|
|
128
|
+
it("should handle zero importance modification", () => {
|
|
129
|
+
const originalVector = [0.6, 0.8, 0.5];
|
|
130
|
+
const newImportance = 0;
|
|
131
|
+
const result = modifyImportance(originalVector, newImportance);
|
|
132
|
+
|
|
133
|
+
expect(result).toHaveLength(3);
|
|
134
|
+
expect(result[0]).toBe(0);
|
|
135
|
+
expect(result[1]).toBe(0);
|
|
136
|
+
expect(result[2]).toBe(1);
|
|
137
|
+
expect(getImportance(result)).toBe(0);
|
|
138
|
+
});
|
|
139
|
+
});
|
|
140
|
+
|
|
141
|
+
describe("vectorWithImportanceDimension", () => {
|
|
142
|
+
it("should return dimensions + 1 for normal dimensions", () => {
|
|
143
|
+
expect(vectorWithImportanceDimension(128)).toBe(129);
|
|
144
|
+
expect(vectorWithImportanceDimension(256)).toBe(257);
|
|
145
|
+
expect(vectorWithImportanceDimension(512)).toBe(513);
|
|
146
|
+
expect(vectorWithImportanceDimension(1024)).toBe(1025);
|
|
147
|
+
expect(vectorWithImportanceDimension(1536)).toBe(1537);
|
|
148
|
+
});
|
|
149
|
+
|
|
150
|
+
it("should return 4096 for 4096 input (respecting global limit)", () => {
|
|
151
|
+
expect(vectorWithImportanceDimension(4096)).toBe(4096);
|
|
152
|
+
});
|
|
153
|
+
|
|
154
|
+
it("should handle edge cases", () => {
|
|
155
|
+
expect(vectorWithImportanceDimension(0)).toBe(1);
|
|
156
|
+
expect(vectorWithImportanceDimension(1)).toBe(2);
|
|
157
|
+
});
|
|
158
|
+
});
|
|
159
|
+
|
|
160
|
+
describe("round-trip importance testing", () => {
|
|
161
|
+
const testCases = [
|
|
162
|
+
{ importance: 0.0, tolerance: 0.001 },
|
|
163
|
+
{ importance: 0.1, tolerance: 0.001 },
|
|
164
|
+
{ importance: 0.25, tolerance: 0.001 },
|
|
165
|
+
{ importance: 0.5, tolerance: 0.001 },
|
|
166
|
+
{ importance: 0.75, tolerance: 0.001 },
|
|
167
|
+
{ importance: 1.0, tolerance: 0.001 },
|
|
168
|
+
];
|
|
169
|
+
|
|
170
|
+
testCases.forEach(({ importance, tolerance }) => {
|
|
171
|
+
it(`should round-trip importance value ${importance} approximately`, () => {
|
|
172
|
+
const embedding = [0.6, 0.8]; // unit vector
|
|
173
|
+
const vectorWithImp = vectorWithImportance(embedding, importance);
|
|
174
|
+
const retrievedImportance = getImportance(vectorWithImp);
|
|
175
|
+
|
|
176
|
+
expect(retrievedImportance).toBeCloseTo(importance, 3);
|
|
177
|
+
expect(Math.abs(retrievedImportance - importance)).toBeLessThan(
|
|
178
|
+
tolerance
|
|
179
|
+
);
|
|
180
|
+
});
|
|
181
|
+
});
|
|
182
|
+
|
|
183
|
+
it("should round-trip with non-unit vectors", () => {
|
|
184
|
+
const embedding = [3, 4, 5]; // magnitude = sqrt(50)
|
|
185
|
+
const originalImportance = 0.36;
|
|
186
|
+
|
|
187
|
+
const vectorWithImp = vectorWithImportance(embedding, originalImportance);
|
|
188
|
+
const retrievedImportance = getImportance(vectorWithImp);
|
|
189
|
+
|
|
190
|
+
expect(retrievedImportance).toBeCloseTo(originalImportance, 3);
|
|
191
|
+
});
|
|
192
|
+
|
|
193
|
+
it("should round-trip after modifyImportance", () => {
|
|
194
|
+
const embedding = [0.1, 0.2, 0.3];
|
|
195
|
+
const initialImportance = 0.5;
|
|
196
|
+
const newImportance = 0.8;
|
|
197
|
+
|
|
198
|
+
// Create vector with initial importance
|
|
199
|
+
const vectorWithInitialImp = vectorWithImportance(
|
|
200
|
+
embedding,
|
|
201
|
+
initialImportance
|
|
202
|
+
);
|
|
203
|
+
|
|
204
|
+
// Modify importance
|
|
205
|
+
const vectorWithModifiedImp = modifyImportance(
|
|
206
|
+
vectorWithInitialImp,
|
|
207
|
+
newImportance
|
|
208
|
+
);
|
|
209
|
+
|
|
210
|
+
// Retrieve and verify
|
|
211
|
+
const retrievedImportance = getImportance(vectorWithModifiedImp);
|
|
212
|
+
expect(retrievedImportance).toBeCloseTo(newImportance, 3);
|
|
213
|
+
});
|
|
214
|
+
});
|
|
215
|
+
|
|
216
|
+
describe("edge cases and error conditions", () => {
|
|
217
|
+
it("should handle very small importance values", () => {
|
|
218
|
+
const embedding = [1, 0];
|
|
219
|
+
const importance = 1e-10;
|
|
220
|
+
const result = vectorWithImportance(embedding, importance);
|
|
221
|
+
|
|
222
|
+
expect(getImportance(result)).toBeCloseTo(importance);
|
|
223
|
+
});
|
|
224
|
+
|
|
225
|
+
it("should handle very large embeddings", () => {
|
|
226
|
+
const embedding = new Array(2048).fill(0.1);
|
|
227
|
+
const importance = 0.234;
|
|
228
|
+
const result = vectorWithImportance(embedding, importance);
|
|
229
|
+
|
|
230
|
+
expect(result).toHaveLength(2049);
|
|
231
|
+
});
|
|
232
|
+
|
|
233
|
+
it("should maintain vector properties after importance weighting", () => {
|
|
234
|
+
const embedding = [0.6, 0.8]; // unit vector
|
|
235
|
+
const importance = 0.25;
|
|
236
|
+
const result = vectorWithImportance(embedding, importance);
|
|
237
|
+
|
|
238
|
+
const normalized = normalizeVector(result.slice(0, 2));
|
|
239
|
+
// The first two components should be normalized versions of original
|
|
240
|
+
expect(normalized[0]).toBeCloseTo(0.6);
|
|
241
|
+
expect(normalized[1]).toBeCloseTo(0.8);
|
|
242
|
+
|
|
243
|
+
// When used in search (ignoring importance), should behave correctly
|
|
244
|
+
const searchVec = searchVector(embedding);
|
|
245
|
+
expect(searchVec[0]).toBeCloseTo(0.6);
|
|
246
|
+
expect(searchVec[1]).toBeCloseTo(0.8);
|
|
247
|
+
});
|
|
248
|
+
});
|
|
249
|
+
});
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* This file contains functions for modifying embeddings to include importance.
|
|
3
|
+
* Terminology is roughly: a "vector" is an "embedding" + importance.
|
|
4
|
+
* Users pass in embeddings, the tables and vector search deal with vectors.
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* For a search, we need to add a 0 to the end of the embedding so we ignore
|
|
9
|
+
* the weight value.
|
|
10
|
+
*/
|
|
11
|
+
export function searchVector(embedding: number[]) {
|
|
12
|
+
if (embedding.length === 4096) {
|
|
13
|
+
return [...embedding.slice(0, 4095), 0];
|
|
14
|
+
}
|
|
15
|
+
return [...embedding, 0];
|
|
16
|
+
}
|
|
17
|
+
/**
|
|
18
|
+
* For an importance of x (0 to 1):
|
|
19
|
+
* @param embedding - The vector to modify with an importance weight.
|
|
20
|
+
* @param importance - 0 - 1, where 0 is no importance and 1 is full importance.
|
|
21
|
+
* @returns The vector with the importance added.
|
|
22
|
+
*/
|
|
23
|
+
export function vectorWithImportance(embedding: number[], importance: number) {
|
|
24
|
+
/*
|
|
25
|
+
* Goal: add a weighting that reduces the magnitude of the target vector after
|
|
26
|
+
* normalization.
|
|
27
|
+
* 1. Scale the existing vector by importance and add a weight so |v| = 1.
|
|
28
|
+
* 2. Search with [...embedding, 0].
|
|
29
|
+
* e.g.:
|
|
30
|
+
* Say we have an embedding of 2 numbers [.6, .8]
|
|
31
|
+
* For 50% importance: [.6 * i, .8 * i, sqrt(1 - i^2)] -> [.3, .4, .866]
|
|
32
|
+
* For [.6, .8] we used to get 1.0.
|
|
33
|
+
* Now we get .6*.3 + .8*.4 + 0*.866 = 0.5
|
|
34
|
+
*/
|
|
35
|
+
// We drop the final dimension if it'd make it larger than 4096.
|
|
36
|
+
// Unfortunate current limitation of Convex vector search.
|
|
37
|
+
const vectorToModify = normalizeVector(
|
|
38
|
+
embedding.length === 4096 ? embedding.slice(0, 4095) : embedding
|
|
39
|
+
);
|
|
40
|
+
const scaled = scaleVector(vectorToModify, importance);
|
|
41
|
+
|
|
42
|
+
// |embedding| == 1
|
|
43
|
+
// weight^2 + importance^2(|embedding|^2) == 1
|
|
44
|
+
// weight = sqrt(1 - importance^2)
|
|
45
|
+
const weight = Math.sqrt(1 - importance ** 2);
|
|
46
|
+
return [...scaled, weight];
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
function normalizeVector(vector: number[]) {
|
|
50
|
+
const sumOfSquares = vector.reduce((acc, v) => acc + v * v, 0);
|
|
51
|
+
const magnitude = Math.sqrt(sumOfSquares);
|
|
52
|
+
return magnitude === 0
|
|
53
|
+
? vector.map(() => 0)
|
|
54
|
+
: vector.map((v) => v / magnitude);
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
function scaleVector(vector: number[], scale: number) {
|
|
58
|
+
return vector.map((v) => v * scale);
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
export function modifyImportance(vector: number[], importance: number) {
|
|
62
|
+
// Note: we don't need to handle 4096 explicitly here
|
|
63
|
+
// vectorWithImportance will turn it from 4095 to 4096.
|
|
64
|
+
const vectorToModify = vector.slice(0, vector.length - 1);
|
|
65
|
+
return vectorWithImportance(vectorToModify, importance);
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
export function getImportance(vector: number[]) {
|
|
69
|
+
return Math.sqrt(1 - vector[vector.length - 1] ** 2);
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
export function vectorWithImportanceDimension(dimensions: number) {
|
|
73
|
+
// +1 for the importance weighting, but respect global limit
|
|
74
|
+
return dimensions === 4096 ? 4096 : dimensions + 1;
|
|
75
|
+
}
|