eigen-db 4.3.0 → 4.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +4 -0
- package/README.md +41 -3
- package/dist/compute.d.ts +20 -0
- package/dist/eigen-db.js +165 -113
- package/dist/eigen-db.js.map +1 -1
- package/dist/eigen-db.umd.cjs +1 -1
- package/dist/eigen-db.umd.cjs.map +1 -1
- package/dist/errors.d.ts +7 -0
- package/dist/index.d.ts +12 -0
- package/dist/lexicon.d.ts +28 -0
- package/dist/memory-manager.d.ts +68 -0
- package/dist/result-set.d.ts +35 -0
- package/dist/simd-binary.d.ts +1 -0
- package/dist/storage.d.ts +38 -0
- package/dist/types.d.ts +44 -0
- package/dist/vector-db.d.ts +131 -0
- package/dist/wasm-compute.d.ts +13 -0
- package/package.json +4 -4
- package/src/lib/__tests__/vector-db.test.ts +288 -0
- package/src/lib/vector-db.ts +91 -16
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* VectorDB — Key-Value Vector Database
|
|
3
|
+
*
|
|
4
|
+
* Decoupled from embedding providers. Users pass pre-computed vectors
|
|
5
|
+
* as number arrays (or Float32Array) with string keys.
|
|
6
|
+
*
|
|
7
|
+
* Supports:
|
|
8
|
+
* - set/get/setMany/getMany for key-value CRUD
|
|
9
|
+
* - query for similarity search (dot product on normalized vectors)
|
|
10
|
+
* - flush to persist, close to flush+release, clear to wipe
|
|
11
|
+
* - Streaming export/import using Web Streams API
|
|
12
|
+
* - Last-write-wins semantics for duplicate keys (append-only storage)
|
|
13
|
+
*/
|
|
14
|
+
import type { ResultItem } from "./result-set";
|
|
15
|
+
import type { OpenOptions, OpenOptionsInternal, QueryOptions, SetOptions, VectorInput } from "./types";
|
|
16
|
+
export declare class VectorDB {
|
|
17
|
+
private readonly memoryManager;
|
|
18
|
+
private readonly storage;
|
|
19
|
+
private readonly _dimensions;
|
|
20
|
+
private readonly shouldNormalize;
|
|
21
|
+
private wasmExports;
|
|
22
|
+
/** Maps key to its slot index in the vector array */
|
|
23
|
+
private keyToSlot;
|
|
24
|
+
/** Maps slot index back to its key */
|
|
25
|
+
private slotToKey;
|
|
26
|
+
/** Whether this instance has been closed */
|
|
27
|
+
private closed;
|
|
28
|
+
private constructor();
|
|
29
|
+
/**
|
|
30
|
+
* Opens a VectorDB instance.
|
|
31
|
+
* Loads existing data from storage into WASM memory.
|
|
32
|
+
*/
|
|
33
|
+
static open(options: OpenOptions): Promise<VectorDB>;
|
|
34
|
+
static open(options: OpenOptionsInternal): Promise<VectorDB>;
|
|
35
|
+
/** Total number of key-value pairs in the database */
|
|
36
|
+
get size(): number;
|
|
37
|
+
/** Number of dimensions per vector */
|
|
38
|
+
get dimensions(): number;
|
|
39
|
+
/**
|
|
40
|
+
* Check whether a key exists in the database.
|
|
41
|
+
* Uses the internal key-to-slot map for O(1) lookup.
|
|
42
|
+
*/
|
|
43
|
+
has(key: string): boolean;
|
|
44
|
+
/**
|
|
45
|
+
* Delete an entry by key. Returns true if the key existed, false otherwise.
|
|
46
|
+
* Uses swap-and-pop to avoid gaps in the underlying vector array.
|
|
47
|
+
*/
|
|
48
|
+
delete(key: string): boolean;
|
|
49
|
+
/**
|
|
50
|
+
* Returns an iterable of all keys in the database.
|
|
51
|
+
*/
|
|
52
|
+
keys(): IterableIterator<string>;
|
|
53
|
+
/**
|
|
54
|
+
* Returns an iterable of [key, value] pairs.
|
|
55
|
+
* Values are returned as plain number array copies.
|
|
56
|
+
*/
|
|
57
|
+
entries(): IterableIterator<[string, number[]]>;
|
|
58
|
+
/**
|
|
59
|
+
* Implements the iterable protocol. Same as entries().
|
|
60
|
+
*/
|
|
61
|
+
[Symbol.iterator](): IterableIterator<[string, number[]]>;
|
|
62
|
+
/**
|
|
63
|
+
* Set a key-value pair. If the key already exists, its vector is overwritten (last-write-wins).
|
|
64
|
+
* The value is a number[] or Float32Array of length equal to the configured dimensions.
|
|
65
|
+
*/
|
|
66
|
+
set(key: string, value: VectorInput, options?: SetOptions): void;
|
|
67
|
+
/**
|
|
68
|
+
* Get the stored vector for a key. Returns undefined if the key does not exist.
|
|
69
|
+
* Returns a copy of the stored vector as a plain number array.
|
|
70
|
+
*/
|
|
71
|
+
get(key: string): number[] | undefined;
|
|
72
|
+
/**
|
|
73
|
+
* Set multiple key-value pairs at once. Last-write-wins applies within the batch.
|
|
74
|
+
*/
|
|
75
|
+
setMany(entries: [string, VectorInput][]): void;
|
|
76
|
+
/**
|
|
77
|
+
* Get vectors for multiple keys. Returns undefined for keys that don't exist.
|
|
78
|
+
*/
|
|
79
|
+
getMany(keys: string[]): (number[] | undefined)[];
|
|
80
|
+
/**
|
|
81
|
+
* Search for the most similar vectors to the given query vector.
|
|
82
|
+
*
|
|
83
|
+
* Default: returns a plain ResultItem[] sorted by descending similarity.
|
|
84
|
+
* With `{ iterable: true }`: returns a lazy Iterable<ResultItem> where keys
|
|
85
|
+
* are resolved only as each item is consumed.
|
|
86
|
+
*
|
|
87
|
+
* Similarity is the dot product of query and stored vectors. With
|
|
88
|
+
* normalization (default), this equals cosine similarity: 1 = identical,
|
|
89
|
+
* -1 = opposite.
|
|
90
|
+
*/
|
|
91
|
+
query(value: VectorInput, options: QueryOptions & {
|
|
92
|
+
iterable: true;
|
|
93
|
+
}): Iterable<ResultItem>;
|
|
94
|
+
query(value: VectorInput, options?: QueryOptions): ResultItem[];
|
|
95
|
+
/**
|
|
96
|
+
* Persist the current in-memory state to storage.
|
|
97
|
+
*/
|
|
98
|
+
flush(): Promise<void>;
|
|
99
|
+
/**
|
|
100
|
+
* Flush data to storage and release the instance.
|
|
101
|
+
* The instance cannot be used after close.
|
|
102
|
+
*/
|
|
103
|
+
close(): Promise<void>;
|
|
104
|
+
/**
|
|
105
|
+
* Clear all data from the database and storage.
|
|
106
|
+
*/
|
|
107
|
+
clear(): Promise<void>;
|
|
108
|
+
/**
|
|
109
|
+
* Export the entire database as a ReadableStream of binary chunks.
|
|
110
|
+
*
|
|
111
|
+
* Format: [Header 24 bytes][Vector data][Keys data]
|
|
112
|
+
* Header: magic(4) + version(4) + dimensions(4) + vectorCount(4) + vectorDataLen(4) + keysDataLen(4)
|
|
113
|
+
*
|
|
114
|
+
* Vectors are streamed in 64KB chunks from WASM memory to avoid large
|
|
115
|
+
* heap allocations.
|
|
116
|
+
*/
|
|
117
|
+
export(): Promise<ReadableStream<Uint8Array>>;
|
|
118
|
+
/**
|
|
119
|
+
* Import data from a ReadableStream, replacing all existing data.
|
|
120
|
+
* Performs a dimension check against the configured dimensions.
|
|
121
|
+
*
|
|
122
|
+
* Vectors are streamed directly into WASM memory in chunks to avoid
|
|
123
|
+
* large heap allocations.
|
|
124
|
+
*/
|
|
125
|
+
import(stream: ReadableStream<Uint8Array>): Promise<void>;
|
|
126
|
+
/**
|
|
127
|
+
* Normalize a vector using WASM (if available) or JS fallback.
|
|
128
|
+
*/
|
|
129
|
+
private normalizeVector;
|
|
130
|
+
private assertOpen;
|
|
131
|
+
}
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* WASM SIMD compute layer.
|
|
3
|
+
* Compiles the hand-written WAT module and provides typed wrappers
|
|
4
|
+
* that operate on shared WebAssembly.Memory.
|
|
5
|
+
*/
|
|
6
|
+
export interface WasmExports {
|
|
7
|
+
normalize(ptr: number, dimensions: number): void;
|
|
8
|
+
search_all(queryPtr: number, dbPtr: number, scoresPtr: number, dbSize: number, dimensions: number): void;
|
|
9
|
+
}
|
|
10
|
+
/**
|
|
11
|
+
* Instantiates a WASM module with the given memory and returns typed exports.
|
|
12
|
+
*/
|
|
13
|
+
export declare function instantiateWasm(wasmBinary: Uint8Array, memory: WebAssembly.Memory): Promise<WasmExports>;
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "eigen-db",
|
|
3
|
-
"version": "4.
|
|
3
|
+
"version": "4.4.0",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"files": [
|
|
6
6
|
"dist",
|
|
@@ -12,16 +12,16 @@
|
|
|
12
12
|
"module": "./dist/eigen-db.js",
|
|
13
13
|
"exports": {
|
|
14
14
|
".": {
|
|
15
|
-
"types": "./
|
|
15
|
+
"types": "./dist/index.d.ts",
|
|
16
16
|
"import": "./dist/eigen-db.js",
|
|
17
17
|
"require": "./dist/eigen-db.umd.cjs"
|
|
18
18
|
}
|
|
19
19
|
},
|
|
20
|
-
"types": "./
|
|
20
|
+
"types": "./dist/index.d.ts",
|
|
21
21
|
"scripts": {
|
|
22
22
|
"dev": "vite",
|
|
23
23
|
"compile-wat": "tsx scripts/compile-wat.ts",
|
|
24
|
-
"build": "npm run compile-wat && tsc && vite build",
|
|
24
|
+
"build": "npm run compile-wat && tsc && vite build && tsc -p tsconfig.build.json",
|
|
25
25
|
"preview": "vite preview",
|
|
26
26
|
"test": "vitest run",
|
|
27
27
|
"test:watch": "vitest",
|
|
@@ -94,6 +94,16 @@ describe("VectorDB", () => {
|
|
|
94
94
|
expect(db.size).toBe(0);
|
|
95
95
|
});
|
|
96
96
|
|
|
97
|
+
it("exposes dimensions property", async () => {
|
|
98
|
+
const db = await VectorDB.open({
|
|
99
|
+
dimensions: 4,
|
|
100
|
+
storage,
|
|
101
|
+
wasmBinary,
|
|
102
|
+
});
|
|
103
|
+
|
|
104
|
+
expect(db.dimensions).toBe(4);
|
|
105
|
+
});
|
|
106
|
+
|
|
97
107
|
// --- set and get ---
|
|
98
108
|
it("stores and retrieves a vector by key", async () => {
|
|
99
109
|
const db = await VectorDB.open({
|
|
@@ -838,6 +848,284 @@ describe("VectorDB", () => {
|
|
|
838
848
|
expect(db2.get("gamma")![2]).toBeCloseTo(1);
|
|
839
849
|
});
|
|
840
850
|
|
|
851
|
+
// --- has ---
|
|
852
|
+
it("has returns true for existing key", async () => {
|
|
853
|
+
const db = await VectorDB.open({
|
|
854
|
+
dimensions: 4,
|
|
855
|
+
normalize: false,
|
|
856
|
+
storage,
|
|
857
|
+
wasmBinary,
|
|
858
|
+
});
|
|
859
|
+
|
|
860
|
+
db.set("a", [1, 0, 0, 0]);
|
|
861
|
+
expect(db.has("a")).toBe(true);
|
|
862
|
+
});
|
|
863
|
+
|
|
864
|
+
it("has returns false for non-existent key", async () => {
|
|
865
|
+
const db = await VectorDB.open({
|
|
866
|
+
dimensions: 4,
|
|
867
|
+
storage,
|
|
868
|
+
wasmBinary,
|
|
869
|
+
});
|
|
870
|
+
|
|
871
|
+
expect(db.has("nonexistent")).toBe(false);
|
|
872
|
+
});
|
|
873
|
+
|
|
874
|
+
it("has throws on closed database", async () => {
|
|
875
|
+
const db = await VectorDB.open({
|
|
876
|
+
dimensions: 4,
|
|
877
|
+
storage,
|
|
878
|
+
wasmBinary,
|
|
879
|
+
});
|
|
880
|
+
|
|
881
|
+
await db.close();
|
|
882
|
+
expect(() => db.has("a")).toThrow("closed");
|
|
883
|
+
});
|
|
884
|
+
|
|
885
|
+
// --- delete ---
|
|
886
|
+
it("delete removes an existing entry and returns true", async () => {
|
|
887
|
+
const db = await VectorDB.open({
|
|
888
|
+
dimensions: 4,
|
|
889
|
+
normalize: false,
|
|
890
|
+
storage,
|
|
891
|
+
wasmBinary,
|
|
892
|
+
});
|
|
893
|
+
|
|
894
|
+
db.set("a", [1, 0, 0, 0]);
|
|
895
|
+
db.set("b", [0, 1, 0, 0]);
|
|
896
|
+
expect(db.size).toBe(2);
|
|
897
|
+
|
|
898
|
+
const result = db.delete("a");
|
|
899
|
+
expect(result).toBe(true);
|
|
900
|
+
expect(db.size).toBe(1);
|
|
901
|
+
expect(db.get("a")).toBeUndefined();
|
|
902
|
+
expect(db.has("a")).toBe(false);
|
|
903
|
+
expect(db.get("b")).toBeDefined();
|
|
904
|
+
});
|
|
905
|
+
|
|
906
|
+
it("delete returns false for non-existent key", async () => {
|
|
907
|
+
const db = await VectorDB.open({
|
|
908
|
+
dimensions: 4,
|
|
909
|
+
storage,
|
|
910
|
+
wasmBinary,
|
|
911
|
+
});
|
|
912
|
+
|
|
913
|
+
expect(db.delete("nonexistent")).toBe(false);
|
|
914
|
+
});
|
|
915
|
+
|
|
916
|
+
it("delete last entry leaves empty database", async () => {
|
|
917
|
+
const db = await VectorDB.open({
|
|
918
|
+
dimensions: 4,
|
|
919
|
+
normalize: false,
|
|
920
|
+
storage,
|
|
921
|
+
wasmBinary,
|
|
922
|
+
});
|
|
923
|
+
|
|
924
|
+
db.set("only", [1, 2, 3, 4]);
|
|
925
|
+
db.delete("only");
|
|
926
|
+
expect(db.size).toBe(0);
|
|
927
|
+
expect(db.get("only")).toBeUndefined();
|
|
928
|
+
});
|
|
929
|
+
|
|
930
|
+
it("delete preserves remaining entries and query works", async () => {
|
|
931
|
+
const db = await VectorDB.open({
|
|
932
|
+
dimensions: 4,
|
|
933
|
+
storage,
|
|
934
|
+
wasmBinary,
|
|
935
|
+
});
|
|
936
|
+
|
|
937
|
+
db.set("x-axis", [1, 0, 0, 0]);
|
|
938
|
+
db.set("y-axis", [0, 1, 0, 0]);
|
|
939
|
+
db.set("z-axis", [0, 0, 1, 0]);
|
|
940
|
+
|
|
941
|
+
db.delete("y-axis");
|
|
942
|
+
expect(db.size).toBe(2);
|
|
943
|
+
|
|
944
|
+
const results = db.query([1, 0, 0, 0]);
|
|
945
|
+
expect(results.length).toBe(2);
|
|
946
|
+
expect(results[0].key).toBe("x-axis");
|
|
947
|
+
expect(results.find((r) => r.key === "y-axis")).toBeUndefined();
|
|
948
|
+
});
|
|
949
|
+
|
|
950
|
+
it("delete then set reuses the database correctly", async () => {
|
|
951
|
+
const db = await VectorDB.open({
|
|
952
|
+
dimensions: 4,
|
|
953
|
+
normalize: false,
|
|
954
|
+
storage,
|
|
955
|
+
wasmBinary,
|
|
956
|
+
});
|
|
957
|
+
|
|
958
|
+
db.set("a", [1, 0, 0, 0]);
|
|
959
|
+
db.set("b", [0, 1, 0, 0]);
|
|
960
|
+
db.delete("a");
|
|
961
|
+
|
|
962
|
+
db.set("c", [0, 0, 1, 0]);
|
|
963
|
+
expect(db.size).toBe(2);
|
|
964
|
+
expect(db.get("a")).toBeUndefined();
|
|
965
|
+
expect(db.get("b")![1]).toBeCloseTo(1);
|
|
966
|
+
expect(db.get("c")![2]).toBeCloseTo(1);
|
|
967
|
+
});
|
|
968
|
+
|
|
969
|
+
it("delete persists correctly after flush", async () => {
|
|
970
|
+
const db1 = await VectorDB.open({
|
|
971
|
+
dimensions: 4,
|
|
972
|
+
normalize: false,
|
|
973
|
+
storage,
|
|
974
|
+
wasmBinary,
|
|
975
|
+
});
|
|
976
|
+
|
|
977
|
+
db1.set("a", [1, 0, 0, 0]);
|
|
978
|
+
db1.set("b", [0, 1, 0, 0]);
|
|
979
|
+
db1.delete("a");
|
|
980
|
+
await db1.flush();
|
|
981
|
+
|
|
982
|
+
const db2 = await VectorDB.open({
|
|
983
|
+
dimensions: 4,
|
|
984
|
+
normalize: false,
|
|
985
|
+
storage,
|
|
986
|
+
wasmBinary,
|
|
987
|
+
});
|
|
988
|
+
|
|
989
|
+
expect(db2.size).toBe(1);
|
|
990
|
+
expect(db2.get("a")).toBeUndefined();
|
|
991
|
+
expect(db2.get("b")![1]).toBeCloseTo(1);
|
|
992
|
+
});
|
|
993
|
+
|
|
994
|
+
it("delete throws on closed database", async () => {
|
|
995
|
+
const db = await VectorDB.open({
|
|
996
|
+
dimensions: 4,
|
|
997
|
+
storage,
|
|
998
|
+
wasmBinary,
|
|
999
|
+
});
|
|
1000
|
+
|
|
1001
|
+
await db.close();
|
|
1002
|
+
expect(() => db.delete("a")).toThrow("closed");
|
|
1003
|
+
});
|
|
1004
|
+
|
|
1005
|
+
// --- keys ---
|
|
1006
|
+
it("keys returns an iterable of all keys", async () => {
|
|
1007
|
+
const db = await VectorDB.open({
|
|
1008
|
+
dimensions: 4,
|
|
1009
|
+
normalize: false,
|
|
1010
|
+
storage,
|
|
1011
|
+
wasmBinary,
|
|
1012
|
+
});
|
|
1013
|
+
|
|
1014
|
+
db.set("a", [1, 0, 0, 0]);
|
|
1015
|
+
db.set("b", [0, 1, 0, 0]);
|
|
1016
|
+
db.set("c", [0, 0, 1, 0]);
|
|
1017
|
+
|
|
1018
|
+
const keys = [...db.keys()];
|
|
1019
|
+
expect(keys).toHaveLength(3);
|
|
1020
|
+
expect(keys).toContain("a");
|
|
1021
|
+
expect(keys).toContain("b");
|
|
1022
|
+
expect(keys).toContain("c");
|
|
1023
|
+
});
|
|
1024
|
+
|
|
1025
|
+
it("keys returns empty iterable for empty database", async () => {
|
|
1026
|
+
const db = await VectorDB.open({
|
|
1027
|
+
dimensions: 4,
|
|
1028
|
+
storage,
|
|
1029
|
+
wasmBinary,
|
|
1030
|
+
});
|
|
1031
|
+
|
|
1032
|
+
expect([...db.keys()]).toEqual([]);
|
|
1033
|
+
});
|
|
1034
|
+
|
|
1035
|
+
it("keys throws on closed database", async () => {
|
|
1036
|
+
const db = await VectorDB.open({
|
|
1037
|
+
dimensions: 4,
|
|
1038
|
+
storage,
|
|
1039
|
+
wasmBinary,
|
|
1040
|
+
});
|
|
1041
|
+
|
|
1042
|
+
await db.close();
|
|
1043
|
+
expect(() => db.keys()).toThrow("closed");
|
|
1044
|
+
});
|
|
1045
|
+
|
|
1046
|
+
// --- entries ---
|
|
1047
|
+
it("entries returns an iterable of [key, value] pairs", async () => {
|
|
1048
|
+
const db = await VectorDB.open({
|
|
1049
|
+
dimensions: 4,
|
|
1050
|
+
normalize: false,
|
|
1051
|
+
storage,
|
|
1052
|
+
wasmBinary,
|
|
1053
|
+
});
|
|
1054
|
+
|
|
1055
|
+
db.set("a", [1, 0, 0, 0]);
|
|
1056
|
+
db.set("b", [0, 1, 0, 0]);
|
|
1057
|
+
|
|
1058
|
+
const entries = [...db.entries()];
|
|
1059
|
+
expect(entries).toHaveLength(2);
|
|
1060
|
+
|
|
1061
|
+
const aEntry = entries.find(([key]) => key === "a");
|
|
1062
|
+
expect(aEntry).toBeDefined();
|
|
1063
|
+
expect(aEntry![1][0]).toBeCloseTo(1);
|
|
1064
|
+
|
|
1065
|
+
const bEntry = entries.find(([key]) => key === "b");
|
|
1066
|
+
expect(bEntry).toBeDefined();
|
|
1067
|
+
expect(bEntry![1][1]).toBeCloseTo(1);
|
|
1068
|
+
});
|
|
1069
|
+
|
|
1070
|
+
it("entries returns empty iterable for empty database", async () => {
|
|
1071
|
+
const db = await VectorDB.open({
|
|
1072
|
+
dimensions: 4,
|
|
1073
|
+
storage,
|
|
1074
|
+
wasmBinary,
|
|
1075
|
+
});
|
|
1076
|
+
|
|
1077
|
+
expect([...db.entries()]).toEqual([]);
|
|
1078
|
+
});
|
|
1079
|
+
|
|
1080
|
+
it("entries throws on closed database", async () => {
|
|
1081
|
+
const db = await VectorDB.open({
|
|
1082
|
+
dimensions: 4,
|
|
1083
|
+
storage,
|
|
1084
|
+
wasmBinary,
|
|
1085
|
+
});
|
|
1086
|
+
|
|
1087
|
+
await db.close();
|
|
1088
|
+
expect(() => db.entries()).toThrow("closed");
|
|
1089
|
+
});
|
|
1090
|
+
|
|
1091
|
+
// --- Symbol.iterator ---
|
|
1092
|
+
it("supports spread operator via Symbol.iterator", async () => {
|
|
1093
|
+
const db = await VectorDB.open({
|
|
1094
|
+
dimensions: 4,
|
|
1095
|
+
normalize: false,
|
|
1096
|
+
storage,
|
|
1097
|
+
wasmBinary,
|
|
1098
|
+
});
|
|
1099
|
+
|
|
1100
|
+
db.set("a", [1, 0, 0, 0]);
|
|
1101
|
+
db.set("b", [0, 1, 0, 0]);
|
|
1102
|
+
|
|
1103
|
+
const spread = [...db];
|
|
1104
|
+
expect(spread).toHaveLength(2);
|
|
1105
|
+
|
|
1106
|
+
// Same as entries()
|
|
1107
|
+
const entries = [...db.entries()];
|
|
1108
|
+
expect(spread).toEqual(entries);
|
|
1109
|
+
});
|
|
1110
|
+
|
|
1111
|
+
it("supports for-of iteration", async () => {
|
|
1112
|
+
const db = await VectorDB.open({
|
|
1113
|
+
dimensions: 4,
|
|
1114
|
+
normalize: false,
|
|
1115
|
+
storage,
|
|
1116
|
+
wasmBinary,
|
|
1117
|
+
});
|
|
1118
|
+
|
|
1119
|
+
db.set("a", [1, 0, 0, 0]);
|
|
1120
|
+
db.set("b", [0, 1, 0, 0]);
|
|
1121
|
+
|
|
1122
|
+
const collected: [string, number[]][] = [];
|
|
1123
|
+
for (const entry of db) {
|
|
1124
|
+
collected.push(entry);
|
|
1125
|
+
}
|
|
1126
|
+
expect(collected).toHaveLength(2);
|
|
1127
|
+
});
|
|
1128
|
+
|
|
841
1129
|
it("import works correctly with single-byte stream chunks", async () => {
|
|
842
1130
|
const db1 = await VectorDB.open({
|
|
843
1131
|
dimensions: 4,
|
package/src/lib/vector-db.ts
CHANGED
|
@@ -38,7 +38,7 @@ const STREAM_CHUNK_SIZE = 65536;
|
|
|
38
38
|
export class VectorDB {
|
|
39
39
|
private readonly memoryManager: MemoryManager;
|
|
40
40
|
private readonly storage: StorageProvider;
|
|
41
|
-
private readonly
|
|
41
|
+
private readonly _dimensions: number;
|
|
42
42
|
private readonly shouldNormalize: boolean;
|
|
43
43
|
private wasmExports: WasmExports | null;
|
|
44
44
|
|
|
@@ -62,7 +62,7 @@ export class VectorDB {
|
|
|
62
62
|
) {
|
|
63
63
|
this.memoryManager = memoryManager;
|
|
64
64
|
this.storage = storage;
|
|
65
|
-
this.
|
|
65
|
+
this._dimensions = dimensions;
|
|
66
66
|
this.shouldNormalize = shouldNormalize;
|
|
67
67
|
this.wasmExports = wasmExports;
|
|
68
68
|
this.keyToSlot = keyToSlot;
|
|
@@ -122,6 +122,81 @@ export class VectorDB {
|
|
|
122
122
|
return this.keyToSlot.size;
|
|
123
123
|
}
|
|
124
124
|
|
|
125
|
+
/** Number of dimensions per vector */
|
|
126
|
+
get dimensions(): number {
|
|
127
|
+
return this._dimensions;
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
/**
|
|
131
|
+
* Check whether a key exists in the database.
|
|
132
|
+
* Uses the internal key-to-slot map for O(1) lookup.
|
|
133
|
+
*/
|
|
134
|
+
has(key: string): boolean {
|
|
135
|
+
this.assertOpen();
|
|
136
|
+
return this.keyToSlot.has(key);
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
/**
|
|
140
|
+
* Delete an entry by key. Returns true if the key existed, false otherwise.
|
|
141
|
+
* Uses swap-and-pop to avoid gaps in the underlying vector array.
|
|
142
|
+
*/
|
|
143
|
+
delete(key: string): boolean {
|
|
144
|
+
this.assertOpen();
|
|
145
|
+
|
|
146
|
+
const slot = this.keyToSlot.get(key);
|
|
147
|
+
if (slot === undefined) return false;
|
|
148
|
+
|
|
149
|
+
const lastSlot = this.memoryManager.vectorCount - 1;
|
|
150
|
+
|
|
151
|
+
if (slot !== lastSlot) {
|
|
152
|
+
// Move last vector into the deleted slot
|
|
153
|
+
const lastVector = new Float32Array(this.memoryManager.readVector(lastSlot));
|
|
154
|
+
this.memoryManager.writeVector(slot, lastVector);
|
|
155
|
+
|
|
156
|
+
// Update mappings for the moved key
|
|
157
|
+
const movedKey = this.slotToKey[lastSlot];
|
|
158
|
+
this.keyToSlot.set(movedKey, slot);
|
|
159
|
+
this.slotToKey[slot] = movedKey;
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
// Remove the deleted key and shrink
|
|
163
|
+
this.keyToSlot.delete(key);
|
|
164
|
+
this.slotToKey.length = lastSlot;
|
|
165
|
+
this.memoryManager.setVectorCount(lastSlot);
|
|
166
|
+
|
|
167
|
+
return true;
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
/**
|
|
171
|
+
* Returns an iterable of all keys in the database.
|
|
172
|
+
*/
|
|
173
|
+
keys(): IterableIterator<string> {
|
|
174
|
+
this.assertOpen();
|
|
175
|
+
return this.keyToSlot.keys();
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
/**
|
|
179
|
+
* Returns an iterable of [key, value] pairs.
|
|
180
|
+
* Values are returned as plain number array copies.
|
|
181
|
+
*/
|
|
182
|
+
entries(): IterableIterator<[string, number[]]> {
|
|
183
|
+
this.assertOpen();
|
|
184
|
+
const keyToSlot = this.keyToSlot;
|
|
185
|
+
const mm = this.memoryManager;
|
|
186
|
+
return (function* () {
|
|
187
|
+
for (const [key, slot] of keyToSlot) {
|
|
188
|
+
yield [key, Array.from(mm.readVector(slot))] as [string, number[]];
|
|
189
|
+
}
|
|
190
|
+
})();
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
/**
|
|
194
|
+
* Implements the iterable protocol. Same as entries().
|
|
195
|
+
*/
|
|
196
|
+
[Symbol.iterator](): IterableIterator<[string, number[]]> {
|
|
197
|
+
return this.entries();
|
|
198
|
+
}
|
|
199
|
+
|
|
125
200
|
/**
|
|
126
201
|
* Set a key-value pair. If the key already exists, its vector is overwritten (last-write-wins).
|
|
127
202
|
* The value is a number[] or Float32Array of length equal to the configured dimensions.
|
|
@@ -129,8 +204,8 @@ export class VectorDB {
|
|
|
129
204
|
set(key: string, value: VectorInput, options?: SetOptions): void {
|
|
130
205
|
this.assertOpen();
|
|
131
206
|
|
|
132
|
-
if (value.length !== this.
|
|
133
|
-
throw new Error(`Vector dimension mismatch: expected ${this.
|
|
207
|
+
if (value.length !== this._dimensions) {
|
|
208
|
+
throw new Error(`Vector dimension mismatch: expected ${this._dimensions}, got ${value.length}`);
|
|
134
209
|
}
|
|
135
210
|
|
|
136
211
|
// Convert to Float32Array (also clones to avoid mutating caller's array)
|
|
@@ -214,8 +289,8 @@ export class VectorDB {
|
|
|
214
289
|
return [];
|
|
215
290
|
}
|
|
216
291
|
|
|
217
|
-
if (value.length !== this.
|
|
218
|
-
throw new Error(`Query vector dimension mismatch: expected ${this.
|
|
292
|
+
if (value.length !== this._dimensions) {
|
|
293
|
+
throw new Error(`Query vector dimension mismatch: expected ${this._dimensions}, got ${value.length}`);
|
|
219
294
|
}
|
|
220
295
|
|
|
221
296
|
// Convert to Float32Array and optionally normalize the query vector
|
|
@@ -242,21 +317,21 @@ export class VectorDB {
|
|
|
242
317
|
this.memoryManager.dbOffset,
|
|
243
318
|
scoresOffset,
|
|
244
319
|
totalVectors,
|
|
245
|
-
this.
|
|
320
|
+
this._dimensions,
|
|
246
321
|
);
|
|
247
322
|
} else {
|
|
248
323
|
const queryView = new Float32Array(
|
|
249
324
|
this.memoryManager.memory.buffer,
|
|
250
325
|
this.memoryManager.queryOffset,
|
|
251
|
-
this.
|
|
326
|
+
this._dimensions,
|
|
252
327
|
);
|
|
253
328
|
const dbView = new Float32Array(
|
|
254
329
|
this.memoryManager.memory.buffer,
|
|
255
330
|
this.memoryManager.dbOffset,
|
|
256
|
-
totalVectors * this.
|
|
331
|
+
totalVectors * this._dimensions,
|
|
257
332
|
);
|
|
258
333
|
const scoresView = new Float32Array(this.memoryManager.memory.buffer, scoresOffset, totalVectors);
|
|
259
|
-
searchAll(queryView, dbView, scoresView, totalVectors, this.
|
|
334
|
+
searchAll(queryView, dbView, scoresView, totalVectors, this._dimensions);
|
|
260
335
|
}
|
|
261
336
|
|
|
262
337
|
// Read scores (make a copy so the buffer can be reused)
|
|
@@ -283,12 +358,12 @@ export class VectorDB {
|
|
|
283
358
|
const totalVectors = this.memoryManager.vectorCount;
|
|
284
359
|
|
|
285
360
|
// Serialize vectors from WASM memory
|
|
286
|
-
const vectorBytes = new Uint8Array(totalVectors * this.
|
|
361
|
+
const vectorBytes = new Uint8Array(totalVectors * this._dimensions * 4);
|
|
287
362
|
if (totalVectors > 0) {
|
|
288
363
|
const src = new Uint8Array(
|
|
289
364
|
this.memoryManager.memory.buffer,
|
|
290
365
|
this.memoryManager.dbOffset,
|
|
291
|
-
totalVectors * this.
|
|
366
|
+
totalVectors * this._dimensions * 4,
|
|
292
367
|
);
|
|
293
368
|
vectorBytes.set(src);
|
|
294
369
|
}
|
|
@@ -335,7 +410,7 @@ export class VectorDB {
|
|
|
335
410
|
this.assertOpen();
|
|
336
411
|
|
|
337
412
|
const totalVectors = this.memoryManager.vectorCount;
|
|
338
|
-
const vectorDataLen = totalVectors * this.
|
|
413
|
+
const vectorDataLen = totalVectors * this._dimensions * 4;
|
|
339
414
|
|
|
340
415
|
// Encode keys (typically much smaller than vectors)
|
|
341
416
|
const keysBytes = encodeLexicon(this.slotToKey);
|
|
@@ -346,7 +421,7 @@ export class VectorDB {
|
|
|
346
421
|
const headerView = new DataView(header);
|
|
347
422
|
headerView.setUint32(0, EXPORT_MAGIC, true);
|
|
348
423
|
headerView.setUint32(4, EXPORT_VERSION, true);
|
|
349
|
-
headerView.setUint32(8, this.
|
|
424
|
+
headerView.setUint32(8, this._dimensions, true);
|
|
350
425
|
headerView.setUint32(12, totalVectors, true);
|
|
351
426
|
headerView.setUint32(16, vectorDataLen, true);
|
|
352
427
|
headerView.setUint32(20, keysDataLen, true);
|
|
@@ -415,8 +490,8 @@ export class VectorDB {
|
|
|
415
490
|
}
|
|
416
491
|
|
|
417
492
|
const dimensions = headerView.getUint32(8, true);
|
|
418
|
-
if (dimensions !== this.
|
|
419
|
-
throw new Error(`Import dimension mismatch: expected ${this.
|
|
493
|
+
if (dimensions !== this._dimensions) {
|
|
494
|
+
throw new Error(`Import dimension mismatch: expected ${this._dimensions}, got ${dimensions}`);
|
|
420
495
|
}
|
|
421
496
|
|
|
422
497
|
const vectorCount = headerView.getUint32(12, true);
|