eigen-db 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -0
- package/dist/eigen-db.js +446 -0
- package/dist/eigen-db.js.map +1 -0
- package/dist/eigen-db.umd.cjs +2 -0
- package/dist/eigen-db.umd.cjs.map +1 -0
- package/package.json +27 -0
- package/src/lib/__tests__/compute.bench.ts +70 -0
- package/src/lib/__tests__/compute.test.ts +121 -0
- package/src/lib/__tests__/lexicon.test.ts +96 -0
- package/src/lib/__tests__/memory-manager.test.ts +94 -0
- package/src/lib/__tests__/result-set.test.ts +90 -0
- package/src/lib/__tests__/vector-db.test.ts +443 -0
- package/src/lib/__tests__/wasm-compute.test.ts +152 -0
- package/src/lib/compute.ts +48 -0
- package/src/lib/errors.ts +10 -0
- package/src/lib/index.ts +14 -0
- package/src/lib/lexicon.ts +95 -0
- package/src/lib/memory-manager.ts +147 -0
- package/src/lib/result-set.ts +89 -0
- package/src/lib/simd-binary.ts +11 -0
- package/src/lib/simd.wat +220 -0
- package/src/lib/storage.ts +111 -0
- package/src/lib/types.ts +39 -0
- package/src/lib/vector-db.ts +346 -0
- package/src/lib/wasm-compute.ts +41 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"eigen-db.umd.cjs","sources":["../src/lib/compute.ts","../src/lib/errors.ts","../src/lib/lexicon.ts","../src/lib/memory-manager.ts","../src/lib/result-set.ts","../src/lib/simd-binary.ts","../src/lib/storage.ts","../src/lib/wasm-compute.ts","../src/lib/vector-db.ts"],"sourcesContent":["/**\n * Pure JavaScript compute functions for vector operations.\n * These serve as the reference implementation and fallback when WASM SIMD is unavailable.\n */\n\n/**\n * Normalizes a vector in-place to unit length.\n * After normalization, cosine similarity reduces to a simple dot product.\n */\nexport function normalize(vec: Float32Array): void {\n let sumSq = 0;\n for (let i = 0; i < vec.length; i++) {\n sumSq += vec[i] * vec[i];\n }\n const mag = Math.sqrt(sumSq);\n if (mag === 0) return;\n const invMag = 1 / mag;\n for (let i = 0; i < vec.length; i++) {\n vec[i] *= invMag;\n }\n}\n\n/**\n * Computes dot products of query against all vectors in the database.\n * Writes scores to the output array.\n *\n * @param query - Normalized query vector (length = dimensions)\n * @param db - Contiguous flat array of normalized vectors (length = dbSize * dimensions)\n * @param scores - Output array for dot product scores (length = dbSize)\n * @param dbSize - Number of vectors in the database\n * @param dimensions - Dimensionality of each vector\n */\nexport function searchAll(\n query: Float32Array,\n db: Float32Array,\n scores: Float32Array,\n dbSize: number,\n dimensions: number,\n): void {\n for (let i = 0; i < dbSize; i++) {\n let dot = 0;\n const offset = i * dimensions;\n for (let j = 0; j < dimensions; j++) {\n dot += query[j] * db[offset + j];\n }\n scores[i] = dot;\n }\n}\n","/**\n * Thrown when the database exceeds the 4GB WebAssembly 32-bit memory limit,\n * or the browser's available RAM.\n */\nexport class VectorCapacityExceededError extends Error {\n constructor(maxVectors: number) {\n super(`Capacity exceeded. Max vectors for this dimension size is ~${maxVectors}.`);\n this.name = \"VectorCapacityExceededError\";\n }\n}\n","/**\n * Lexicon: length-prefixed UTF-8 encoding for text strings.\n *\n * Format: Each entry is [4-byte uint32 length][UTF-8 bytes]\n * This allows efficient sequential reading and appending.\n */\n\nconst encoder = new TextEncoder();\nconst decoder = new TextDecoder();\n\n/**\n * Encodes an array of strings into a length-prefixed binary format.\n */\nexport function encodeLexicon(texts: string[]): Uint8Array {\n const encoded = texts.map((t) => encoder.encode(t));\n const totalSize = encoded.reduce((sum, e) => sum + 4 + e.byteLength, 0);\n\n const buffer = new ArrayBuffer(totalSize);\n const view = new DataView(buffer);\n const bytes = new Uint8Array(buffer);\n let offset = 0;\n\n for (const e of encoded) {\n view.setUint32(offset, e.byteLength, true); // little-endian\n offset += 4;\n bytes.set(e, offset);\n offset += e.byteLength;\n }\n\n return bytes;\n}\n\n/**\n * Decodes all strings from a length-prefixed binary buffer.\n */\nexport function decodeLexicon(data: Uint8Array): string[] {\n const result: string[] = [];\n const view = new DataView(data.buffer, data.byteOffset, data.byteLength);\n let offset = 0;\n\n while (offset < data.byteLength) {\n const len = view.getUint32(offset, true);\n offset += 4;\n const text = decoder.decode(data.subarray(offset, offset + len));\n result.push(text);\n offset += len;\n }\n\n return result;\n}\n\n/**\n * Decodes a single string at a given index from the lexicon.\n * Returns the string and the byte offset of the next entry.\n */\nexport function decodeLexiconAt(data: Uint8Array, index: number): string {\n const view = new DataView(data.buffer, data.byteOffset, data.byteLength);\n let offset = 0;\n\n for (let i = 0; i < index; i++) {\n const len = view.getUint32(offset, true);\n offset += 4 + len;\n }\n\n const len = view.getUint32(offset, true);\n offset += 4;\n return decoder.decode(data.subarray(offset, offset + len));\n}\n\n/**\n * Builds an index of byte offsets for each entry in the lexicon.\n * Enables O(1) access to any entry by index.\n */\nexport function buildLexiconIndex(data: Uint8Array): Uint32Array {\n const offsets: number[] = [];\n const view = new DataView(data.buffer, data.byteOffset, data.byteLength);\n let offset = 0;\n\n while (offset < data.byteLength) {\n offsets.push(offset);\n const len = view.getUint32(offset, true);\n offset += 4 + len;\n }\n\n return new Uint32Array(offsets);\n}\n\n/**\n * Decodes a string at a given byte offset in the lexicon.\n */\nexport function decodeLexiconAtOffset(data: Uint8Array, byteOffset: number): string {\n const view = new DataView(data.buffer, data.byteOffset, data.byteLength);\n const len = view.getUint32(byteOffset, true);\n return decoder.decode(data.subarray(byteOffset + 4, byteOffset + 4 + len));\n}\n","/**\n * Memory Manager for WASM shared memory.\n *\n * Memory Layout:\n * [ 0x00000 ] -> Query Vector Buffer (Fixed, dimensions * 4 bytes, aligned to 64KB page)\n * [ DB_OFFSET ] -> Vector Database (Grows dynamically)\n * [ Dynamic ] -> Scores Buffer (Mapped after DB during search)\n */\n\n/** WASM page size is 64KB */\nconst PAGE_SIZE = 65536;\n\n/** Maximum WASM memory: ~4GB (65536 pages of 64KB each) */\nconst MAX_PAGES = 65536;\n\nexport class MemoryManager {\n readonly memory: WebAssembly.Memory;\n readonly dimensions: number;\n readonly queryOffset: number;\n readonly dbOffset: number;\n private _vectorCount: number;\n\n constructor(dimensions: number, initialVectorCount: number = 0) {\n this.dimensions = dimensions;\n\n // Query buffer: dimensions * 4 bytes, aligned to page boundary\n this.queryOffset = 0;\n const queryBytes = dimensions * 4;\n this.dbOffset = Math.ceil(queryBytes / PAGE_SIZE) * PAGE_SIZE;\n\n // Calculate initial memory needed\n const dbBytes = initialVectorCount * dimensions * 4;\n const totalBytes = this.dbOffset + dbBytes;\n const initialPages = Math.max(1, Math.ceil(totalBytes / PAGE_SIZE));\n\n this.memory = new WebAssembly.Memory({ initial: initialPages });\n this._vectorCount = initialVectorCount;\n }\n\n /** Current number of vectors stored */\n get vectorCount(): number {\n return this._vectorCount;\n }\n\n /** Byte offset where the scores buffer starts (right after DB) */\n get scoresOffset(): number {\n return this.dbOffset + this._vectorCount * this.dimensions * 4;\n }\n\n /** Total bytes needed for scores buffer */\n get scoresBytes(): number {\n return this._vectorCount * 4;\n }\n\n /**\n * Maximum vectors that can be stored given the 4GB WASM memory limit.\n * Accounts for query buffer, DB space, and scores buffer.\n */\n get maxVectors(): number {\n const availableBytes = MAX_PAGES * PAGE_SIZE - this.dbOffset;\n // Each vector needs: dimensions * 4 bytes (DB) + 4 bytes (score)\n const bytesPerVector = this.dimensions * 4 + 4;\n return Math.floor(availableBytes / bytesPerVector);\n }\n\n /**\n * Ensures memory is large enough for the current DB + scores buffer.\n * Calls memory.grow() if needed.\n */\n ensureCapacity(additionalVectors: number): void {\n const newTotal = this._vectorCount + additionalVectors;\n const requiredBytes =\n this.dbOffset + newTotal * this.dimensions * 4 + newTotal * 4; // DB + scores\n const currentBytes = this.memory.buffer.byteLength;\n\n if (requiredBytes > currentBytes) {\n const pagesNeeded = Math.ceil((requiredBytes - currentBytes) / PAGE_SIZE);\n const currentPages = currentBytes / PAGE_SIZE;\n if (currentPages + pagesNeeded > MAX_PAGES) {\n throw new Error(\"WASM memory limit exceeded\");\n }\n this.memory.grow(pagesNeeded);\n }\n }\n\n /**\n * Write a query vector into the query buffer region.\n */\n writeQuery(vector: Float32Array): void {\n new Float32Array(this.memory.buffer, this.queryOffset, this.dimensions).set(vector);\n }\n\n /**\n * Append vectors to the database region.\n * Returns the byte offset where the new vectors were written.\n */\n appendVectors(vectors: Float32Array[]): number {\n const startOffset = this.dbOffset + this._vectorCount * this.dimensions * 4;\n let offset = startOffset;\n for (const vec of vectors) {\n new Float32Array(this.memory.buffer, offset, this.dimensions).set(vec);\n offset += this.dimensions * 4;\n }\n this._vectorCount += vectors.length;\n return startOffset;\n }\n\n /**\n * Load raw vector bytes directly into the database region.\n * Used for bulk loading from OPFS.\n */\n loadVectorBytes(data: Uint8Array, vectorCount: number): void {\n new Uint8Array(this.memory.buffer, this.dbOffset, data.byteLength).set(data);\n this._vectorCount = vectorCount;\n }\n\n /**\n * Read the scores buffer as a Float32Array view.\n */\n readScores(): Float32Array {\n return new Float32Array(this.memory.buffer, this.scoresOffset, this._vectorCount);\n }\n\n /**\n * Read the DB region for a specific vector index.\n */\n readVector(index: number): Float32Array {\n const offset = this.dbOffset + index * this.dimensions * 4;\n return new Float32Array(this.memory.buffer, offset, this.dimensions);\n }\n\n /**\n * Write a vector to a specific slot in the database region.\n */\n writeVector(index: number, vector: Float32Array): void {\n const offset = this.dbOffset + index * this.dimensions * 4;\n new Float32Array(this.memory.buffer, offset, this.dimensions).set(vector);\n }\n\n /**\n * Reset the vector count to zero, logically clearing the database.\n * WASM memory is not freed but will be overwritten on next writes.\n */\n reset(): void {\n this._vectorCount = 0;\n }\n}\n","/**\n * LAZY RESULT SET\n *\n * Holds pointers to sorted TypedArrays. Prevents JS heap overflow when K is massive.\n * Strings are only instantiated from the Lexicon when explicitly requested.\n */\n\nexport interface ResultItem {\n key: string;\n score: number;\n}\n\nexport type KeyResolver = (index: number) => string;\n\nexport class ResultSet {\n /** Total number of results */\n readonly length: number;\n\n /**\n * Sorted indices into the original database (by descending score).\n * sortedIndices[0] is the index of the best match.\n */\n private readonly sortedIndices: Uint32Array;\n\n /** Raw scores array (not sorted, indexed by original DB position) */\n private readonly scores: Float32Array;\n\n /** Function to lazily resolve key from the slot index */\n private readonly resolveKey: KeyResolver;\n\n constructor(\n scores: Float32Array,\n sortedIndices: Uint32Array,\n resolveKey: KeyResolver,\n topK: number,\n ) {\n this.scores = scores;\n this.sortedIndices = sortedIndices;\n this.resolveKey = resolveKey;\n this.length = Math.min(topK, sortedIndices.length);\n }\n\n /**\n * Sort scores and return a ResultSet with lazy key resolution.\n *\n * @param scores - Float32Array of scores (one per DB vector)\n * @param resolveKey - Function to resolve key by original index\n * @param topK - Maximum number of results to include\n */\n static fromScores(\n scores: Float32Array,\n resolveKey: KeyResolver,\n topK: number,\n ): ResultSet {\n const n = scores.length;\n\n // Create index array for sorting\n const indices = new Uint32Array(n);\n for (let i = 0; i < n; i++) indices[i] = i;\n\n // Sort indices by descending score\n indices.sort((a, b) => scores[b] - scores[a]);\n\n return new ResultSet(scores, indices, resolveKey, topK);\n }\n\n /** Fetch a single result by its rank (0 is best match) */\n get(rank: number): ResultItem {\n if (rank < 0 || rank >= this.length) {\n throw new RangeError(`Rank ${rank} out of bounds [0, ${this.length})`);\n }\n const dbIndex = this.sortedIndices[rank];\n return {\n key: this.resolveKey(dbIndex),\n score: this.scores[dbIndex],\n };\n }\n\n /** Helper for UI pagination. Instantiates strings only for the requested page. */\n getPage(page: number, pageSize: number): ResultItem[] {\n const start = page * pageSize;\n const end = Math.min(start + pageSize, this.length);\n const results: ResultItem[] = [];\n for (let i = start; i < end; i++) {\n results.push(this.get(i));\n }\n return results;\n }\n}\n","// AUTO-GENERATED - Do not edit. Run: npx tsx scripts/compile-wat.ts\nconst SIMD_WASM_BASE64 = \"AGFzbQEAAAABDgJgAn9/AGAFf39/f38AAg8BA2VudgZtZW1vcnkCAAEDAwIAAQcaAglub3JtYWxpemUAAApzZWFyY2hfYWxsAAEKsgQCtQIFAX8BewN9AXsDf/0MAAAAAAAAAAAAAAAAAAAAACEDIAFBfHEhCEEAIQICQANAIAIgCE8NASAAIAJBAnRqIQogAyAK/QAEACAK/QAEAP3mAf3kASEDIAJBBGohAgwACwsgA/0fACAD/R8BkiAD/R8CIAP9HwOSkiEEIAghCQJAA0AgCSABTw0BIAAgCUECdGohCiAEIAoqAgAgCioCAJSSIQQgCUEBaiEJDAALCyAEkSEFIAVDAAAAAFsEQA8LQwAAgD8gBZUhBiAG/RMhB0EAIQICQANAIAIgCE8NASAAIAJBAnRqIQogCiAK/QAEACAH/eYB/QsEACACQQRqIQIMAAsLIAghCQJAA0AgCSABTw0BIAAgCUECdGohCiAKIAoqAgAgBpQ4AgAgCUEBaiEJDAALCwv4AQQCfwF7AX0GfyAEQXxxIQogBEECdCEOQQAhBQJAA0AgBSADTw0BIAEgBSAObGohCf0MAAAAAAAAAAAAAAAAAAAAACEHQQAhBgJAA0AgBiAKTw0BIAAgBkECdGohDCAJIAZBAnRqIQ0gByAM/QAEACAN/QAEAP3mAf3kASEHIAZBBGohBgwACwsgB/0fACAH/R8BkiAH/R8CIAf9HwOSkiEIIAohCwJAA0AgCyAETw0BIAAgC0ECdGohDCAJIAtBAnRqIQ0gCCAMKgIAIA0qAgCUkiEIIAtBAWohCwwACwsgAiAFQQJ0aiAIOAIAIAVBAWohBQwACwsL\";\n\nexport function getSimdWasmBinary(): Uint8Array {\n const binaryString = atob(SIMD_WASM_BASE64);\n const bytes = new Uint8Array(binaryString.length);\n for (let i = 0; i < binaryString.length; i++) {\n bytes[i] = binaryString.charCodeAt(i);\n }\n return bytes;\n}\n","/**\n * Storage abstraction for append-only binary files.\n * Supports OPFS for browser and in-memory for testing.\n */\n\nexport interface StorageProvider {\n /** Read the entire contents of a file. Returns empty Uint8Array if file doesn't exist. */\n readAll(fileName: string): Promise<Uint8Array>;\n\n /** Append data to a file (creates if it doesn't exist). */\n append(fileName: string, data: Uint8Array): Promise<void>;\n\n /** Write data to a file, replacing all existing content. */\n write(fileName: string, data: Uint8Array): Promise<void>;\n\n /** Delete the storage directory and all files. */\n destroy(): Promise<void>;\n}\n\n/**\n * OPFS-backed storage provider for browser environments.\n * Uses Origin Private File System for high-performance persistent storage.\n */\nexport class OPFSStorageProvider implements StorageProvider {\n private dirHandle: FileSystemDirectoryHandle | null = null;\n private dirName: string;\n\n constructor(dirName: string) {\n this.dirName = dirName;\n }\n\n private async getDir(): Promise<FileSystemDirectoryHandle> {\n if (!this.dirHandle) {\n const root = await navigator.storage.getDirectory();\n this.dirHandle = await root.getDirectoryHandle(this.dirName, { create: true });\n }\n return this.dirHandle;\n }\n\n async readAll(fileName: string): Promise<Uint8Array> {\n try {\n const dir = await this.getDir();\n const fileHandle = await dir.getFileHandle(fileName);\n const file = await fileHandle.getFile();\n const buffer = await file.arrayBuffer();\n return new Uint8Array(buffer);\n } catch {\n return new Uint8Array(0);\n }\n }\n\n async append(fileName: string, data: Uint8Array): Promise<void> {\n const dir = await this.getDir();\n const fileHandle = await dir.getFileHandle(fileName, { create: true });\n const writable = await fileHandle.createWritable({ keepExistingData: true });\n const file = await fileHandle.getFile();\n await writable.seek(file.size);\n await writable.write(data as unknown as BufferSource);\n await writable.close();\n }\n\n async write(fileName: string, data: Uint8Array): Promise<void> {\n const dir = await this.getDir();\n const fileHandle = await dir.getFileHandle(fileName, { create: true });\n const writable = await fileHandle.createWritable({ keepExistingData: false });\n await writable.write(data as unknown as BufferSource);\n await writable.close();\n }\n\n async destroy(): Promise<void> {\n const root = await navigator.storage.getDirectory();\n await root.removeEntry(this.dirName, { recursive: true });\n this.dirHandle = null;\n }\n}\n\n/**\n * In-memory storage provider for testing.\n */\nexport class InMemoryStorageProvider implements StorageProvider {\n private files = new Map<string, Uint8Array[]>();\n\n async readAll(fileName: string): Promise<Uint8Array> {\n const chunks = this.files.get(fileName);\n if (!chunks || chunks.length === 0) return new Uint8Array(0);\n\n const totalSize = chunks.reduce((sum, c) => sum + c.byteLength, 0);\n const result = new Uint8Array(totalSize);\n let offset = 0;\n for (const chunk of chunks) {\n result.set(chunk, offset);\n offset += chunk.byteLength;\n }\n return result;\n }\n\n async append(fileName: string, data: Uint8Array): Promise<void> {\n if (!this.files.has(fileName)) {\n this.files.set(fileName, []);\n }\n this.files.get(fileName)!.push(new Uint8Array(data));\n }\n\n async write(fileName: string, data: Uint8Array): Promise<void> {\n this.files.set(fileName, [new Uint8Array(data)]);\n }\n\n async destroy(): Promise<void> {\n this.files.clear();\n }\n}\n","/**\n * WASM SIMD compute layer.\n * Compiles the hand-written WAT module and provides typed wrappers\n * that operate on shared WebAssembly.Memory.\n */\n\nexport interface WasmExports {\n normalize(ptr: number, dimensions: number): void;\n search_all(queryPtr: number, dbPtr: number, scoresPtr: number, dbSize: number, dimensions: number): void;\n}\n\n/**\n * Compiles a WAT string into a WASM module using the wabt library.\n * This is used at build time or test time; at runtime, the pre-compiled binary is used.\n */\nexport async function compileWatToWasm(watSource: string): Promise<Uint8Array> {\n const wabt = await import(\"wabt\");\n const wabtModule = await wabt.default();\n const parsed = wabtModule.parseWat(\"simd.wat\", watSource, {\n simd: true,\n });\n parsed.resolveNames();\n parsed.validate();\n const { buffer } = parsed.toBinary({});\n parsed.destroy();\n return new Uint8Array(buffer);\n}\n\n/**\n * Instantiates a WASM module with the given memory and returns typed exports.\n */\nexport async function instantiateWasm(\n wasmBinary: Uint8Array,\n memory: WebAssembly.Memory,\n): Promise<WasmExports> {\n const importObject = { env: { memory } };\n const result = await WebAssembly.instantiate(wasmBinary, importObject);\n // WebAssembly.instantiate with a buffer returns { instance, module }\n const instance = (result as unknown as { instance: WebAssembly.Instance }).instance;\n return instance.exports as unknown as WasmExports;\n}\n","/**\n * VectorDB — Key-Value Vector Database\n *\n * Decoupled from embedding providers. Users pass pre-computed vectors\n * as Float32Array values with string keys.\n *\n * Supports:\n * - set/get/setMany/getMany for key-value CRUD\n * - query for similarity search (dot product on normalized vectors)\n * - flush to persist, close to flush+release, clear to wipe\n * - Last-write-wins semantics for duplicate keys (append-only storage)\n */\n\nimport { normalize, searchAll } from \"./compute\";\nimport { VectorCapacityExceededError } from \"./errors\";\nimport { encodeLexicon, decodeLexicon } from \"./lexicon\";\nimport { MemoryManager } from \"./memory-manager\";\nimport { ResultSet } from \"./result-set\";\nimport { getSimdWasmBinary } from \"./simd-binary\";\nimport type { StorageProvider } from \"./storage\";\nimport { OPFSStorageProvider } from \"./storage\";\nimport type { OpenOptions, OpenOptionsInternal, SetOptions, QueryOptions } from \"./types\";\nimport { instantiateWasm, type WasmExports } from \"./wasm-compute\";\n\nconst VECTORS_FILE = \"vectors.bin\";\nconst KEYS_FILE = \"keys.bin\";\n\nexport class VectorDB {\n private readonly memoryManager: MemoryManager;\n private readonly storage: StorageProvider;\n private readonly dimensions: number;\n private readonly shouldNormalize: boolean;\n private wasmExports: WasmExports | null;\n\n /** Maps key to its slot index in the vector array */\n private keyToSlot: Map<string, number>;\n\n /** Maps slot index back to its key */\n private slotToKey: string[];\n\n /** Whether this instance has been closed */\n private closed = false;\n\n private constructor(\n memoryManager: MemoryManager,\n storage: StorageProvider,\n dimensions: number,\n shouldNormalize: boolean,\n wasmExports: WasmExports | null,\n keyToSlot: Map<string, number>,\n slotToKey: string[],\n ) {\n this.memoryManager = memoryManager;\n this.storage = storage;\n this.dimensions = dimensions;\n this.shouldNormalize = shouldNormalize;\n this.wasmExports = wasmExports;\n this.keyToSlot = keyToSlot;\n this.slotToKey = slotToKey;\n }\n\n /**\n * Opens a VectorDB instance.\n * Loads existing data from storage into WASM memory.\n */\n static async open(options: OpenOptions): Promise<VectorDB>;\n static async open(options: OpenOptionsInternal): Promise<VectorDB>;\n static async open(options: OpenOptionsInternal): Promise<VectorDB> {\n const name = options.name ?? \"default\";\n const storage = options.storage ?? new OPFSStorageProvider(name);\n const shouldNormalize = options.normalize !== false;\n\n // Load existing data from storage\n const [vectorBytes, keysBytes] = await Promise.all([\n storage.readAll(VECTORS_FILE),\n storage.readAll(KEYS_FILE),\n ]);\n\n // Decode stored keys\n const keys = keysBytes.byteLength > 0 ? decodeLexicon(keysBytes) : [];\n const vectorCount = vectorBytes.byteLength / (options.dimensions * 4);\n\n // Build key-to-slot mapping.\n // flush() always writes deduplicated state, so keys are unique on load.\n const keyToSlot = new Map<string, number>();\n const slotToKey: string[] = [];\n\n for (let i = 0; i < keys.length; i++) {\n keyToSlot.set(keys[i], i);\n slotToKey[i] = keys[i];\n }\n\n // Initialize memory manager\n const mm = new MemoryManager(options.dimensions, vectorCount);\n\n if (vectorBytes.byteLength > 0) {\n mm.loadVectorBytes(vectorBytes, vectorCount);\n }\n\n // Try to instantiate WASM SIMD module\n let wasmExports: WasmExports | null = null;\n const wasmBinary = options.wasmBinary !== undefined ? options.wasmBinary : getSimdWasmBinary();\n if (wasmBinary !== null) {\n try {\n wasmExports = await instantiateWasm(wasmBinary, mm.memory);\n } catch {\n // Fall back to JS compute\n }\n }\n\n return new VectorDB(\n mm,\n storage,\n options.dimensions,\n shouldNormalize,\n wasmExports,\n keyToSlot,\n slotToKey,\n );\n }\n\n /** Total number of key-value pairs in the database */\n get size(): number {\n return this.keyToSlot.size;\n }\n\n /**\n * Set a key-value pair. If the key already exists, its vector is overwritten (last-write-wins).\n * The value is a Float32Array of length equal to the configured dimensions.\n */\n set(key: string, value: Float32Array, options?: SetOptions): void {\n this.assertOpen();\n\n if (value.length !== this.dimensions) {\n throw new Error(\n `Vector dimension mismatch: expected ${this.dimensions}, got ${value.length}`,\n );\n }\n\n // Clone to avoid mutating caller's array during normalization\n const vec = new Float32Array(value);\n\n // Normalize if needed\n const doNormalize = options?.normalize ?? this.shouldNormalize;\n if (doNormalize) {\n this.normalizeVector(vec);\n }\n\n const existingSlot = this.keyToSlot.get(key);\n if (existingSlot !== undefined) {\n // Overwrite existing slot\n this.memoryManager.writeVector(existingSlot, vec);\n } else {\n // Append new entry\n const newTotal = this.memoryManager.vectorCount + 1;\n if (newTotal > this.memoryManager.maxVectors) {\n throw new VectorCapacityExceededError(this.memoryManager.maxVectors);\n }\n this.memoryManager.ensureCapacity(1);\n const slotIndex = this.memoryManager.vectorCount;\n this.memoryManager.appendVectors([vec]);\n this.keyToSlot.set(key, slotIndex);\n this.slotToKey[slotIndex] = key;\n }\n }\n\n /**\n * Get the stored vector for a key. Returns undefined if the key does not exist.\n * Returns a copy of the stored vector.\n */\n get(key: string): Float32Array | undefined {\n this.assertOpen();\n\n const slot = this.keyToSlot.get(key);\n if (slot === undefined) return undefined;\n\n // Return a copy so callers can't corrupt WASM memory\n return new Float32Array(this.memoryManager.readVector(slot));\n }\n\n /**\n * Set multiple key-value pairs at once. Last-write-wins applies within the batch.\n */\n setMany(entries: [string, Float32Array][]): void {\n for (const [key, value] of entries) {\n this.set(key, value);\n }\n }\n\n /**\n * Get vectors for multiple keys. Returns undefined for keys that don't exist.\n */\n getMany(keys: string[]): (Float32Array | undefined)[] {\n return keys.map((key) => this.get(key));\n }\n\n /**\n * Search for the most similar vectors to the given query vector.\n * Returns a ResultSet sorted by descending similarity score.\n */\n query(value: Float32Array, options?: QueryOptions): ResultSet {\n this.assertOpen();\n\n const k = options?.topK ?? this.size;\n\n if (this.size === 0) {\n return ResultSet.fromScores(new Float32Array(0), () => \"\", 0);\n }\n\n if (value.length !== this.dimensions) {\n throw new Error(\n `Query vector dimension mismatch: expected ${this.dimensions}, got ${value.length}`,\n );\n }\n\n // Clone and optionally normalize the query vector\n const queryVec = new Float32Array(value);\n const doNormalize = options?.normalize ?? this.shouldNormalize;\n if (doNormalize) {\n this.normalizeVector(queryVec);\n }\n\n // Write query to WASM memory\n this.memoryManager.writeQuery(queryVec);\n\n // Ensure memory has space for scores buffer\n this.memoryManager.ensureCapacity(0);\n\n // Total vectors in memory\n const totalVectors = this.memoryManager.vectorCount;\n\n // Execute search\n const scoresOffset = this.memoryManager.scoresOffset;\n if (this.wasmExports) {\n this.wasmExports.search_all(\n this.memoryManager.queryOffset,\n this.memoryManager.dbOffset,\n scoresOffset,\n totalVectors,\n this.dimensions,\n );\n } else {\n const queryView = new Float32Array(\n this.memoryManager.memory.buffer,\n this.memoryManager.queryOffset,\n this.dimensions,\n );\n const dbView = new Float32Array(\n this.memoryManager.memory.buffer,\n this.memoryManager.dbOffset,\n totalVectors * this.dimensions,\n );\n const scoresView = new Float32Array(\n this.memoryManager.memory.buffer,\n scoresOffset,\n totalVectors,\n );\n searchAll(queryView, dbView, scoresView, totalVectors, this.dimensions);\n }\n\n // Read scores (make a copy so the buffer can be reused)\n const scores = new Float32Array(this.memoryManager.readScores());\n\n // Resolve key from slot index\n const slotToKey = this.slotToKey;\n const resolveKey = (slotIndex: number): string => {\n return slotToKey[slotIndex];\n };\n\n return ResultSet.fromScores(scores, resolveKey, k);\n }\n\n /**\n * Persist the current in-memory state to storage.\n */\n async flush(): Promise<void> {\n this.assertOpen();\n\n const totalVectors = this.memoryManager.vectorCount;\n\n // Serialize vectors from WASM memory\n const vectorBytes = new Uint8Array(\n totalVectors * this.dimensions * 4,\n );\n if (totalVectors > 0) {\n const src = new Uint8Array(\n this.memoryManager.memory.buffer,\n this.memoryManager.dbOffset,\n totalVectors * this.dimensions * 4,\n );\n vectorBytes.set(src);\n }\n\n // Serialize keys using lexicon format\n const keysBytes = encodeLexicon(this.slotToKey);\n\n await Promise.all([\n this.storage.write(VECTORS_FILE, vectorBytes),\n this.storage.write(KEYS_FILE, keysBytes),\n ]);\n }\n\n /**\n * Flush data to storage and release the instance.\n * The instance cannot be used after close.\n */\n async close(): Promise<void> {\n if (this.closed) return;\n await this.flush();\n this.closed = true;\n }\n\n /**\n * Clear all data from the database and storage.\n */\n async clear(): Promise<void> {\n this.assertOpen();\n\n this.keyToSlot.clear();\n this.slotToKey.length = 0;\n this.memoryManager.reset();\n\n await this.storage.destroy();\n }\n\n /**\n * Normalize a vector using WASM (if available) or JS fallback.\n */\n private normalizeVector(vec: Float32Array): void {\n if (this.wasmExports) {\n const ptr = this.memoryManager.queryOffset;\n new Float32Array(this.memoryManager.memory.buffer, ptr, vec.length).set(vec);\n this.wasmExports.normalize(ptr, vec.length);\n const normalized = new Float32Array(this.memoryManager.memory.buffer, ptr, vec.length);\n vec.set(normalized);\n } else {\n normalize(vec);\n }\n }\n\n private assertOpen(): void {\n if (this.closed) {\n throw new Error(\"VectorDB instance has been closed\");\n }\n }\n}\n"],"names":["normalize","vec","sumSq","i","mag","invMag","searchAll","query","db","scores","dbSize","dimensions","dot","offset","j","VectorCapacityExceededError","maxVectors","encoder","decoder","encodeLexicon","texts","encoded","t","totalSize","sum","e","buffer","view","bytes","decodeLexicon","data","result","len","text","PAGE_SIZE","MAX_PAGES","MemoryManager","initialVectorCount","queryBytes","dbBytes","totalBytes","initialPages","availableBytes","bytesPerVector","additionalVectors","newTotal","requiredBytes","currentBytes","pagesNeeded","vector","vectors","startOffset","vectorCount","index","ResultSet","sortedIndices","resolveKey","topK","n","indices","a","b","rank","dbIndex","page","pageSize","start","end","results","SIMD_WASM_BASE64","getSimdWasmBinary","binaryString","OPFSStorageProvider","dirName","root","fileName","fileHandle","writable","file","InMemoryStorageProvider","chunks","c","chunk","instantiateWasm","wasmBinary","memory","importObject","VECTORS_FILE","KEYS_FILE","VectorDB","memoryManager","storage","shouldNormalize","wasmExports","keyToSlot","slotToKey","options","name","vectorBytes","keysBytes","keys","mm","key","value","existingSlot","slotIndex","slot","entries","k","queryVec","totalVectors","scoresOffset","queryView","dbView","scoresView","src","ptr","normalized"],"mappings":"gOASO,SAASA,EAAUC,EAAyB,CACjD,IAAIC,EAAQ,EACZ,QAASC,EAAI,EAAGA,EAAIF,EAAI,OAAQE,IAC9BD,GAASD,EAAIE,CAAC,EAAIF,EAAIE,CAAC,EAEzB,MAAMC,EAAM,KAAK,KAAKF,CAAK,EAC3B,GAAIE,IAAQ,EAAG,OACf,MAAMC,EAAS,EAAID,EACnB,QAASD,EAAI,EAAGA,EAAIF,EAAI,OAAQE,IAC9BF,EAAIE,CAAC,GAAKE,CAEd,CAYO,SAASC,EACdC,EACAC,EACAC,EACAC,EACAC,EACM,CACN,QAASR,EAAI,EAAGA,EAAIO,EAAQP,IAAK,CAC/B,IAAIS,EAAM,EACV,MAAMC,EAASV,EAAIQ,EACnB,QAASG,EAAI,EAAGA,EAAIH,EAAYG,IAC9BF,GAAOL,EAAMO,CAAC,EAAIN,EAAGK,EAASC,CAAC,EAEjCL,EAAON,CAAC,EAAIS,CACd,CACF,CC3CO,MAAMG,UAAoC,KAAM,CACrD,YAAYC,EAAoB,CAC9B,MAAM,8DAA8DA,CAAU,GAAG,EACjF,KAAK,KAAO,6BACd,CACF,CCFA,MAAMC,EAAU,IAAI,YACdC,EAAU,IAAI,YAKb,SAASC,EAAcC,EAA6B,CACzD,MAAMC,EAAUD,EAAM,IAAKE,GAAML,EAAQ,OAAOK,CAAC,CAAC,EAC5CC,EAAYF,EAAQ,OAAO,CAACG,EAAKC,IAAMD,EAAM,EAAIC,EAAE,WAAY,CAAC,EAEhEC,EAAS,IAAI,YAAYH,CAAS,EAClCI,EAAO,IAAI,SAASD,CAAM,EAC1BE,EAAQ,IAAI,WAAWF,CAAM,EACnC,IAAIb,EAAS,EAEb,UAAWY,KAAKJ,EACdM,EAAK,UAAUd,EAAQY,EAAE,WAAY,EAAI,EACzCZ,GAAU,EACVe,EAAM,IAAIH,EAAGZ,CAAM,EACnBA,GAAUY,EAAE,WAGd,OAAOG,CACT,CAKO,SAASC,EAAcC,EAA4B,CACxD,MAAMC,EAAmB,CAAA,EACnBJ,EAAO,IAAI,SAASG,EAAK,OAAQA,EAAK,WAAYA,EAAK,UAAU,EACvE,IAAIjB,EAAS,EAEb,KAAOA,EAASiB,EAAK,YAAY,CAC/B,MAAME,EAAML,EAAK,UAAUd,EAAQ,EAAI,EACvCA,GAAU,EACV,MAAMoB,EAAOf,EAAQ,OAAOY,EAAK,SAASjB,EAAQA,EAASmB,CAAG,CAAC,EAC/DD,EAAO,KAAKE,CAAI,EAChBpB,GAAUmB,CACZ,CAEA,OAAOD,CACT,CCvCA,MAAMG,EAAY,MAGZC,EAAY,MAEX,MAAMC,CAAc,CAChB,OACA,WACA,YACA,SACD,aAER,YAAYzB,EAAoB0B,EAA6B,EAAG,CAC9D,KAAK,WAAa1B,EAGlB,KAAK,YAAc,EACnB,MAAM2B,EAAa3B,EAAa,EAChC,KAAK,SAAW,KAAK,KAAK2B,EAAaJ,CAAS,EAAIA,EAGpD,MAAMK,EAAUF,EAAqB1B,EAAa,EAC5C6B,EAAa,KAAK,SAAWD,EAC7BE,EAAe,KAAK,IAAI,EAAG,KAAK,KAAKD,EAAaN,CAAS,CAAC,EAElE,KAAK,OAAS,IAAI,YAAY,OAAO,CAAE,QAASO,EAAc,EAC9D,KAAK,aAAeJ,CACtB,CAGA,IAAI,aAAsB,CACxB,OAAO,KAAK,YACd,CAGA,IAAI,cAAuB,CACzB,OAAO,KAAK,SAAW,KAAK,aAAe,KAAK,WAAa,CAC/D,CAGA,IAAI,aAAsB,CACxB,OAAO,KAAK,aAAe,CAC7B,CAMA,IAAI,YAAqB,CACvB,MAAMK,EAAiBP,EAAYD,EAAY,KAAK,SAE9CS,EAAiB,KAAK,WAAa,EAAI,EAC7C,OAAO,KAAK,MAAMD,EAAiBC,CAAc,CACnD,CAMA,eAAeC,EAAiC,CAC9C,MAAMC,EAAW,KAAK,aAAeD,EAC/BE,EACJ,KAAK,SAAWD,EAAW,KAAK,WAAa,EAAIA,EAAW,EACxDE,EAAe,KAAK,OAAO,OAAO,WAExC,GAAID,EAAgBC,EAAc,CAChC,MAAMC,EAAc,KAAK,MAAMF,EAAgBC,GAAgBb,CAAS,EAExE,GADqBa,EAAeb,EACjBc,EAAcb,EAC/B,MAAM,IAAI,MAAM,4BAA4B,EAE9C,KAAK,OAAO,KAAKa,CAAW,CAC9B,CACF,CAKA,WAAWC,EAA4B,CACrC,IAAI,aAAa,KAAK,OAAO,OAAQ,KAAK,YAAa,KAAK,UAAU,EAAE,IAAIA,CAAM,CACpF,CAMA,cAAcC,EAAiC,CAC7C,MAAMC,EAAc,KAAK,SAAW,KAAK,aAAe,KAAK,WAAa,EAC1E,IAAItC,EAASsC,EACb,UAAWlD,KAAOiD,EAChB,IAAI,aAAa,KAAK,OAAO,OAAQrC,EAAQ,KAAK,UAAU,EAAE,IAAIZ,CAAG,EACrEY,GAAU,KAAK,WAAa,EAE9B,YAAK,cAAgBqC,EAAQ,OACtBC,CACT,CAMA,gBAAgBrB,EAAkBsB,EAA2B,CAC3D,IAAI,WAAW,KAAK,OAAO,OAAQ,KAAK,SAAUtB,EAAK,UAAU,EAAE,IAAIA,CAAI,EAC3E,KAAK,aAAesB,CACtB,CAKA,YAA2B,CACzB,OAAO,IAAI,aAAa,KAAK,OAAO,OAAQ,KAAK,aAAc,KAAK,YAAY,CAClF,CAKA,WAAWC,EAA6B,CACtC,MAAMxC,EAAS,KAAK,SAAWwC,EAAQ,KAAK,WAAa,EACzD,OAAO,IAAI,aAAa,KAAK,OAAO,OAAQxC,EAAQ,KAAK,UAAU,CACrE,CAKA,YAAYwC,EAAeJ,EAA4B,CACrD,MAAMpC,EAAS,KAAK,SAAWwC,EAAQ,KAAK,WAAa,EACzD,IAAI,aAAa,KAAK,OAAO,OAAQxC,EAAQ,KAAK,UAAU,EAAE,IAAIoC,CAAM,CAC1E,CAMA,OAAc,CACZ,KAAK,aAAe,CACtB,CACF,CCpIO,MAAMK,CAAU,CAEZ,OAMQ,cAGA,OAGA,WAEjB,YACE7C,EACA8C,EACAC,EACAC,EACA,CACA,KAAK,OAAShD,EACd,KAAK,cAAgB8C,EACrB,KAAK,WAAaC,EAClB,KAAK,OAAS,KAAK,IAAIC,EAAMF,EAAc,MAAM,CACnD,CASA,OAAO,WACL9C,EACA+C,EACAC,EACW,CACX,MAAMC,EAAIjD,EAAO,OAGXkD,EAAU,IAAI,YAAYD,CAAC,EACjC,QAASvD,EAAI,EAAGA,EAAIuD,EAAGvD,IAAKwD,EAAQxD,CAAC,EAAIA,EAGzC,OAAAwD,EAAQ,KAAK,CAACC,EAAGC,IAAMpD,EAAOoD,CAAC,EAAIpD,EAAOmD,CAAC,CAAC,EAErC,IAAIN,EAAU7C,EAAQkD,EAASH,EAAYC,CAAI,CACxD,CAGA,IAAIK,EAA0B,CAC5B,GAAIA,EAAO,GAAKA,GAAQ,KAAK,OAC3B,MAAM,IAAI,WAAW,QAAQA,CAAI,sBAAsB,KAAK,MAAM,GAAG,EAEvE,MAAMC,EAAU,KAAK,cAAcD,CAAI,EACvC,MAAO,CACL,IAAK,KAAK,WAAWC,CAAO,EAC5B,MAAO,KAAK,OAAOA,CAAO,CAAA,CAE9B,CAGA,QAAQC,EAAcC,EAAgC,CACpD,MAAMC,EAAQF,EAAOC,EACfE,EAAM,KAAK,IAAID,EAAQD,EAAU,KAAK,MAAM,EAC5CG,EAAwB,CAAA,EAC9B,QAASjE,EAAI+D,EAAO/D,EAAIgE,EAAKhE,IAC3BiE,EAAQ,KAAK,KAAK,IAAIjE,CAAC,CAAC,EAE1B,OAAOiE,CACT,CACF,CCvFA,MAAMC,EAAmB,u1BAElB,SAASC,GAAgC,CAC9C,MAAMC,EAAe,KAAKF,CAAgB,EACpCzC,EAAQ,IAAI,WAAW2C,EAAa,MAAM,EAChD,QAASpE,EAAI,EAAGA,EAAIoE,EAAa,OAAQpE,IACvCyB,EAAMzB,CAAC,EAAIoE,EAAa,WAAWpE,CAAC,EAEtC,OAAOyB,CACT,CCaO,MAAM4C,CAA+C,CAClD,UAA8C,KAC9C,QAER,YAAYC,EAAiB,CAC3B,KAAK,QAAUA,CACjB,CAEA,MAAc,QAA6C,CACzD,GAAI,CAAC,KAAK,UAAW,CACnB,MAAMC,EAAO,MAAM,UAAU,QAAQ,aAAA,EACrC,KAAK,UAAY,MAAMA,EAAK,mBAAmB,KAAK,QAAS,CAAE,OAAQ,GAAM,CAC/E,CACA,OAAO,KAAK,SACd,CAEA,MAAM,QAAQC,EAAuC,CACnD,GAAI,CAIF,MAAMjD,EAAS,MADF,MADM,MADP,MAAM,KAAK,OAAA,GACM,cAAciD,CAAQ,GACrB,QAAA,GACJ,YAAA,EAC1B,OAAO,IAAI,WAAWjD,CAAM,CAC9B,MAAQ,CACN,OAAO,IAAI,WAAW,CAAC,CACzB,CACF,CAEA,MAAM,OAAOiD,EAAkB7C,EAAiC,CAE9D,MAAM8C,EAAa,MADP,MAAM,KAAK,OAAA,GACM,cAAcD,EAAU,CAAE,OAAQ,GAAM,EAC/DE,EAAW,MAAMD,EAAW,eAAe,CAAE,iBAAkB,GAAM,EACrEE,EAAO,MAAMF,EAAW,QAAA,EAC9B,MAAMC,EAAS,KAAKC,EAAK,IAAI,EAC7B,MAAMD,EAAS,MAAM/C,CAA+B,EACpD,MAAM+C,EAAS,MAAA,CACjB,CAEA,MAAM,MAAMF,EAAkB7C,EAAiC,CAG7D,MAAM+C,EAAW,MADE,MADP,MAAM,KAAK,OAAA,GACM,cAAcF,EAAU,CAAE,OAAQ,GAAM,GACnC,eAAe,CAAE,iBAAkB,GAAO,EAC5E,MAAME,EAAS,MAAM/C,CAA+B,EACpD,MAAM+C,EAAS,MAAA,CACjB,CAEA,MAAM,SAAyB,CAE7B,MADa,MAAM,UAAU,QAAQ,aAAA,GAC1B,YAAY,KAAK,QAAS,CAAE,UAAW,GAAM,EACxD,KAAK,UAAY,IACnB,CACF,CAKO,MAAME,CAAmD,CACtD,UAAY,IAEpB,MAAM,QAAQJ,EAAuC,CACnD,MAAMK,EAAS,KAAK,MAAM,IAAIL,CAAQ,EACtC,GAAI,CAACK,GAAUA,EAAO,SAAW,EAAG,OAAO,IAAI,WAAW,CAAC,EAE3D,MAAMzD,EAAYyD,EAAO,OAAO,CAACxD,EAAKyD,IAAMzD,EAAMyD,EAAE,WAAY,CAAC,EAC3DlD,EAAS,IAAI,WAAWR,CAAS,EACvC,IAAIV,EAAS,EACb,UAAWqE,KAASF,EAClBjD,EAAO,IAAImD,EAAOrE,CAAM,EACxBA,GAAUqE,EAAM,WAElB,OAAOnD,CACT,CAEA,MAAM,OAAO4C,EAAkB7C,EAAiC,CACzD,KAAK,MAAM,IAAI6C,CAAQ,GAC1B,KAAK,MAAM,IAAIA,EAAU,CAAA,CAAE,EAE7B,KAAK,MAAM,IAAIA,CAAQ,EAAG,KAAK,IAAI,WAAW7C,CAAI,CAAC,CACrD,CAEA,MAAM,MAAM6C,EAAkB7C,EAAiC,CAC7D,KAAK,MAAM,IAAI6C,EAAU,CAAC,IAAI,WAAW7C,CAAI,CAAC,CAAC,CACjD,CAEA,MAAM,SAAyB,CAC7B,KAAK,MAAM,MAAA,CACb,CACF,CC/EA,eAAsBqD,EACpBC,EACAC,EACsB,CACtB,MAAMC,EAAe,CAAE,IAAK,CAAE,OAAAD,EAAO,EAIrC,OAHe,MAAM,YAAY,YAAYD,EAAYE,CAAY,GAEM,SAC3D,OAClB,CChBA,MAAMC,EAAe,cACfC,EAAY,WAEX,MAAMC,CAAS,CACH,cACA,QACA,WACA,gBACT,YAGA,UAGA,UAGA,OAAS,GAET,YACNC,EACAC,EACAhF,EACAiF,EACAC,EACAC,EACAC,EACA,CACA,KAAK,cAAgBL,EACrB,KAAK,QAAUC,EACf,KAAK,WAAahF,EAClB,KAAK,gBAAkBiF,EACvB,KAAK,YAAcC,EACnB,KAAK,UAAYC,EACjB,KAAK,UAAYC,CACnB,CAQA,aAAa,KAAKC,EAAiD,CACjE,MAAMC,EAAOD,EAAQ,MAAQ,UACvBL,EAAUK,EAAQ,SAAW,IAAIxB,EAAoByB,CAAI,EACzDL,EAAkBI,EAAQ,YAAc,GAGxC,CAACE,EAAaC,CAAS,EAAI,MAAM,QAAQ,IAAI,CACjDR,EAAQ,QAAQJ,CAAY,EAC5BI,EAAQ,QAAQH,CAAS,CAAA,CAC1B,EAGKY,EAAOD,EAAU,WAAa,EAAItE,EAAcsE,CAAS,EAAI,CAAA,EAC7D/C,EAAc8C,EAAY,YAAcF,EAAQ,WAAa,GAI7DF,MAAgB,IAChBC,EAAsB,CAAA,EAE5B,QAAS5F,EAAI,EAAGA,EAAIiG,EAAK,OAAQjG,IAC/B2F,EAAU,IAAIM,EAAKjG,CAAC,EAAGA,CAAC,EACxB4F,EAAU5F,CAAC,EAAIiG,EAAKjG,CAAC,EAIvB,MAAMkG,EAAK,IAAIjE,EAAc4D,EAAQ,WAAY5C,CAAW,EAExD8C,EAAY,WAAa,GAC3BG,EAAG,gBAAgBH,EAAa9C,CAAW,EAI7C,IAAIyC,EAAkC,KACtC,MAAMT,EAAaY,EAAQ,aAAe,OAAYA,EAAQ,WAAa1B,EAAA,EAC3E,GAAIc,IAAe,KACjB,GAAI,CACFS,EAAc,MAAMV,EAAgBC,EAAYiB,EAAG,MAAM,CAC3D,MAAQ,CAER,CAGF,OAAO,IAAIZ,EACTY,EACAV,EACAK,EAAQ,WACRJ,EACAC,EACAC,EACAC,CAAA,CAEJ,CAGA,IAAI,MAAe,CACjB,OAAO,KAAK,UAAU,IACxB,CAMA,IAAIO,EAAaC,EAAqBP,EAA4B,CAGhE,GAFA,KAAK,WAAA,EAEDO,EAAM,SAAW,KAAK,WACxB,MAAM,IAAI,MACR,uCAAuC,KAAK,UAAU,SAASA,EAAM,MAAM,EAAA,EAK/E,MAAMtG,EAAM,IAAI,aAAasG,CAAK,GAGdP,GAAS,WAAa,KAAK,kBAE7C,KAAK,gBAAgB/F,CAAG,EAG1B,MAAMuG,EAAe,KAAK,UAAU,IAAIF,CAAG,EAC3C,GAAIE,IAAiB,OAEnB,KAAK,cAAc,YAAYA,EAAcvG,CAAG,MAC3C,CAGL,GADiB,KAAK,cAAc,YAAc,EACnC,KAAK,cAAc,WAChC,MAAM,IAAIc,EAA4B,KAAK,cAAc,UAAU,EAErE,KAAK,cAAc,eAAe,CAAC,EACnC,MAAM0F,EAAY,KAAK,cAAc,YACrC,KAAK,cAAc,cAAc,CAACxG,CAAG,CAAC,EACtC,KAAK,UAAU,IAAIqG,EAAKG,CAAS,EACjC,KAAK,UAAUA,CAAS,EAAIH,CAC9B,CACF,CAMA,IAAIA,EAAuC,CACzC,KAAK,WAAA,EAEL,MAAMI,EAAO,KAAK,UAAU,IAAIJ,CAAG,EACnC,GAAII,IAAS,OAGb,OAAO,IAAI,aAAa,KAAK,cAAc,WAAWA,CAAI,CAAC,CAC7D,CAKA,QAAQC,EAAyC,CAC/C,SAAW,CAACL,EAAKC,CAAK,IAAKI,EACzB,KAAK,IAAIL,EAAKC,CAAK,CAEvB,CAKA,QAAQH,EAA8C,CACpD,OAAOA,EAAK,IAAKE,GAAQ,KAAK,IAAIA,CAAG,CAAC,CACxC,CAMA,MAAMC,EAAqBP,EAAmC,CAC5D,KAAK,WAAA,EAEL,MAAMY,EAAIZ,GAAS,MAAQ,KAAK,KAEhC,GAAI,KAAK,OAAS,EAChB,OAAO1C,EAAU,WAAW,IAAI,aAAa,CAAC,EAAG,IAAM,GAAI,CAAC,EAG9D,GAAIiD,EAAM,SAAW,KAAK,WACxB,MAAM,IAAI,MACR,6CAA6C,KAAK,UAAU,SAASA,EAAM,MAAM,EAAA,EAKrF,MAAMM,EAAW,IAAI,aAAaN,CAAK,GACnBP,GAAS,WAAa,KAAK,kBAE7C,KAAK,gBAAgBa,CAAQ,EAI/B,KAAK,cAAc,WAAWA,CAAQ,EAGtC,KAAK,cAAc,eAAe,CAAC,EAGnC,MAAMC,EAAe,KAAK,cAAc,YAGlCC,EAAe,KAAK,cAAc,aACxC,GAAI,KAAK,YACP,KAAK,YAAY,WACf,KAAK,cAAc,YACnB,KAAK,cAAc,SACnBA,EACAD,EACA,KAAK,UAAA,MAEF,CACL,MAAME,EAAY,IAAI,aACpB,KAAK,cAAc,OAAO,OAC1B,KAAK,cAAc,YACnB,KAAK,UAAA,EAEDC,EAAS,IAAI,aACjB,KAAK,cAAc,OAAO,OAC1B,KAAK,cAAc,SACnBH,EAAe,KAAK,UAAA,EAEhBI,EAAa,IAAI,aACrB,KAAK,cAAc,OAAO,OAC1BH,EACAD,CAAA,EAEFxG,EAAU0G,EAAWC,EAAQC,EAAYJ,EAAc,KAAK,UAAU,CACxE,CAGA,MAAMrG,EAAS,IAAI,aAAa,KAAK,cAAc,YAAY,EAGzDsF,EAAY,KAAK,UACjBvC,EAAciD,GACXV,EAAUU,CAAS,EAG5B,OAAOnD,EAAU,WAAW7C,EAAQ+C,EAAYoD,CAAC,CACnD,CAKA,MAAM,OAAuB,CAC3B,KAAK,WAAA,EAEL,MAAME,EAAe,KAAK,cAAc,YAGlCZ,EAAc,IAAI,WACtBY,EAAe,KAAK,WAAa,CAAA,EAEnC,GAAIA,EAAe,EAAG,CACpB,MAAMK,EAAM,IAAI,WACd,KAAK,cAAc,OAAO,OAC1B,KAAK,cAAc,SACnBL,EAAe,KAAK,WAAa,CAAA,EAEnCZ,EAAY,IAAIiB,CAAG,CACrB,CAGA,MAAMhB,EAAYhF,EAAc,KAAK,SAAS,EAE9C,MAAM,QAAQ,IAAI,CAChB,KAAK,QAAQ,MAAMoE,EAAcW,CAAW,EAC5C,KAAK,QAAQ,MAAMV,EAAWW,CAAS,CAAA,CACxC,CACH,CAMA,MAAM,OAAuB,CACvB,KAAK,SACT,MAAM,KAAK,MAAA,EACX,KAAK,OAAS,GAChB,CAKA,MAAM,OAAuB,CAC3B,KAAK,WAAA,EAEL,KAAK,UAAU,MAAA,EACf,KAAK,UAAU,OAAS,EACxB,KAAK,cAAc,MAAA,EAEnB,MAAM,KAAK,QAAQ,QAAA,CACrB,CAKQ,gBAAgBlG,EAAyB,CAC/C,GAAI,KAAK,YAAa,CACpB,MAAMmH,EAAM,KAAK,cAAc,YAC/B,IAAI,aAAa,KAAK,cAAc,OAAO,OAAQA,EAAKnH,EAAI,MAAM,EAAE,IAAIA,CAAG,EAC3E,KAAK,YAAY,UAAUmH,EAAKnH,EAAI,MAAM,EAC1C,MAAMoH,EAAa,IAAI,aAAa,KAAK,cAAc,OAAO,OAAQD,EAAKnH,EAAI,MAAM,EACrFA,EAAI,IAAIoH,CAAU,CACpB,MACErH,EAAUC,CAAG,CAEjB,CAEQ,YAAmB,CACzB,GAAI,KAAK,OACP,MAAM,IAAI,MAAM,mCAAmC,CAEvD,CACF"}
|
package/package.json
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "eigen-db",
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"type": "module",
|
|
5
|
+
"files": [
|
|
6
|
+
"dist",
|
|
7
|
+
"src/lib"
|
|
8
|
+
],
|
|
9
|
+
"main": "./dist/",
|
|
10
|
+
"scripts": {
|
|
11
|
+
"dev": "vite",
|
|
12
|
+
"compile-wat": "tsx scripts/compile-wat.ts",
|
|
13
|
+
"build": "npm run compile-wat && tsc && vite build",
|
|
14
|
+
"preview": "vite preview",
|
|
15
|
+
"test": "vitest run",
|
|
16
|
+
"test:watch": "vitest",
|
|
17
|
+
"bench": "vitest bench"
|
|
18
|
+
},
|
|
19
|
+
"devDependencies": {
|
|
20
|
+
"@types/node": "^25.3.0",
|
|
21
|
+
"tsx": "^4.21.0",
|
|
22
|
+
"typescript": "~5.9.3",
|
|
23
|
+
"vite": "^7.3.1",
|
|
24
|
+
"vitest": "^4.0.18",
|
|
25
|
+
"wabt": "^1.0.39"
|
|
26
|
+
}
|
|
27
|
+
}
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
import { describe, bench } from "vitest";
|
|
2
|
+
import { readFileSync } from "fs";
|
|
3
|
+
import { resolve } from "path";
|
|
4
|
+
import { normalize as jsNormalize, searchAll as jsSearchAll } from "../compute";
|
|
5
|
+
import { compileWatToWasm, instantiateWasm } from "../wasm-compute";
|
|
6
|
+
|
|
7
|
+
const watSource = readFileSync(resolve(__dirname, "../simd.wat"), "utf-8");
|
|
8
|
+
|
|
9
|
+
/**
|
|
10
|
+
* Benchmarks comparing JS vs WASM SIMD performance for vector operations.
|
|
11
|
+
*/
|
|
12
|
+
describe("normalize benchmark", async () => {
|
|
13
|
+
const dimensions = 1536;
|
|
14
|
+
const vec = new Float32Array(dimensions);
|
|
15
|
+
for (let i = 0; i < dimensions; i++) vec[i] = Math.random();
|
|
16
|
+
|
|
17
|
+
const wasmBinary = await compileWatToWasm(watSource);
|
|
18
|
+
const memory = new WebAssembly.Memory({ initial: 1 });
|
|
19
|
+
const wasm = await instantiateWasm(wasmBinary, memory);
|
|
20
|
+
const ptr = 0;
|
|
21
|
+
|
|
22
|
+
bench("JS normalize (1536 dims)", () => {
|
|
23
|
+
const v = new Float32Array(vec);
|
|
24
|
+
jsNormalize(v);
|
|
25
|
+
});
|
|
26
|
+
|
|
27
|
+
bench("WASM SIMD normalize (1536 dims)", () => {
|
|
28
|
+
new Float32Array(memory.buffer, ptr, dimensions).set(vec);
|
|
29
|
+
wasm.normalize(ptr, dimensions);
|
|
30
|
+
});
|
|
31
|
+
});
|
|
32
|
+
|
|
33
|
+
describe("searchAll benchmark", async () => {
|
|
34
|
+
const dimensions = 1536;
|
|
35
|
+
const dbSizes = [100, 1000, 10000];
|
|
36
|
+
|
|
37
|
+
for (const dbSize of dbSizes) {
|
|
38
|
+
// Prepare data
|
|
39
|
+
const query = new Float32Array(dimensions);
|
|
40
|
+
for (let i = 0; i < dimensions; i++) query[i] = Math.random();
|
|
41
|
+
jsNormalize(query);
|
|
42
|
+
|
|
43
|
+
const db = new Float32Array(dbSize * dimensions);
|
|
44
|
+
for (let i = 0; i < db.length; i++) db[i] = Math.random();
|
|
45
|
+
|
|
46
|
+
const jsScores = new Float32Array(dbSize);
|
|
47
|
+
|
|
48
|
+
bench(`JS searchAll (${dbSize} vectors × ${dimensions} dims)`, () => {
|
|
49
|
+
jsSearchAll(query, db, jsScores, dbSize, dimensions);
|
|
50
|
+
});
|
|
51
|
+
|
|
52
|
+
// WASM benchmark
|
|
53
|
+
const wasmBinary = await compileWatToWasm(watSource);
|
|
54
|
+
const totalBytes = dimensions * 4 + dbSize * dimensions * 4 + dbSize * 4;
|
|
55
|
+
const pages = Math.ceil(totalBytes / 65536);
|
|
56
|
+
const memory = new WebAssembly.Memory({ initial: pages });
|
|
57
|
+
const wasm = await instantiateWasm(wasmBinary, memory);
|
|
58
|
+
|
|
59
|
+
const queryPtr = 0;
|
|
60
|
+
const dbPtr = dimensions * 4;
|
|
61
|
+
const scoresPtr = dbPtr + dbSize * dimensions * 4;
|
|
62
|
+
|
|
63
|
+
new Float32Array(memory.buffer, queryPtr, dimensions).set(query);
|
|
64
|
+
new Float32Array(memory.buffer, dbPtr, dbSize * dimensions).set(db);
|
|
65
|
+
|
|
66
|
+
bench(`WASM SIMD searchAll (${dbSize} vectors × ${dimensions} dims)`, () => {
|
|
67
|
+
wasm.search_all(queryPtr, dbPtr, scoresPtr, dbSize, dimensions);
|
|
68
|
+
});
|
|
69
|
+
}
|
|
70
|
+
});
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
import { describe, it, expect } from "vitest";
|
|
2
|
+
import { normalize, searchAll } from "../compute";
|
|
3
|
+
|
|
4
|
+
describe("normalize", () => {
|
|
5
|
+
it("normalizes a vector to unit length", () => {
|
|
6
|
+
const vec = new Float32Array([3, 4]);
|
|
7
|
+
normalize(vec);
|
|
8
|
+
expect(vec[0]).toBeCloseTo(0.6, 5);
|
|
9
|
+
expect(vec[1]).toBeCloseTo(0.8, 5);
|
|
10
|
+
// Verify unit length
|
|
11
|
+
const mag = Math.sqrt(vec[0] * vec[0] + vec[1] * vec[1]);
|
|
12
|
+
expect(mag).toBeCloseTo(1.0, 5);
|
|
13
|
+
});
|
|
14
|
+
|
|
15
|
+
it("normalizes a higher-dimensional vector", () => {
|
|
16
|
+
const vec = new Float32Array([1, 2, 3, 4]);
|
|
17
|
+
normalize(vec);
|
|
18
|
+
let sumSq = 0;
|
|
19
|
+
for (let i = 0; i < vec.length; i++) sumSq += vec[i] * vec[i];
|
|
20
|
+
expect(Math.sqrt(sumSq)).toBeCloseTo(1.0, 5);
|
|
21
|
+
});
|
|
22
|
+
|
|
23
|
+
it("handles a zero vector gracefully", () => {
|
|
24
|
+
const vec = new Float32Array([0, 0, 0]);
|
|
25
|
+
normalize(vec);
|
|
26
|
+
expect(vec[0]).toBe(0);
|
|
27
|
+
expect(vec[1]).toBe(0);
|
|
28
|
+
expect(vec[2]).toBe(0);
|
|
29
|
+
});
|
|
30
|
+
|
|
31
|
+
it("handles a single-element vector", () => {
|
|
32
|
+
const vec = new Float32Array([5]);
|
|
33
|
+
normalize(vec);
|
|
34
|
+
expect(vec[0]).toBeCloseTo(1.0, 5);
|
|
35
|
+
});
|
|
36
|
+
|
|
37
|
+
it("handles already-normalized vector", () => {
|
|
38
|
+
const vec = new Float32Array([0.6, 0.8]);
|
|
39
|
+
normalize(vec);
|
|
40
|
+
expect(vec[0]).toBeCloseTo(0.6, 5);
|
|
41
|
+
expect(vec[1]).toBeCloseTo(0.8, 5);
|
|
42
|
+
});
|
|
43
|
+
});
|
|
44
|
+
|
|
45
|
+
describe("searchAll", () => {
|
|
46
|
+
it("computes dot products for a single database vector", () => {
|
|
47
|
+
const dimensions = 3;
|
|
48
|
+
const query = new Float32Array([1, 0, 0]);
|
|
49
|
+
const db = new Float32Array([0.5, 0.5, 0]);
|
|
50
|
+
const scores = new Float32Array(1);
|
|
51
|
+
|
|
52
|
+
searchAll(query, db, scores, 1, dimensions);
|
|
53
|
+
expect(scores[0]).toBeCloseTo(0.5, 5);
|
|
54
|
+
});
|
|
55
|
+
|
|
56
|
+
it("computes dot products for multiple database vectors", () => {
|
|
57
|
+
const dimensions = 2;
|
|
58
|
+
// Two normalized vectors
|
|
59
|
+
const query = new Float32Array([1, 0]);
|
|
60
|
+
const db = new Float32Array([
|
|
61
|
+
1, 0, // identical to query
|
|
62
|
+
0, 1, // orthogonal to query
|
|
63
|
+
]);
|
|
64
|
+
const scores = new Float32Array(2);
|
|
65
|
+
|
|
66
|
+
searchAll(query, db, scores, 2, dimensions);
|
|
67
|
+
expect(scores[0]).toBeCloseTo(1.0, 5); // identical
|
|
68
|
+
expect(scores[1]).toBeCloseTo(0.0, 5); // orthogonal
|
|
69
|
+
});
|
|
70
|
+
|
|
71
|
+
it("computes correct scores for normalized vectors (cosine similarity)", () => {
|
|
72
|
+
const dimensions = 3;
|
|
73
|
+
const q = new Float32Array([1, 2, 3]);
|
|
74
|
+
normalize(q);
|
|
75
|
+
|
|
76
|
+
const v1 = new Float32Array([1, 2, 3]); // same direction
|
|
77
|
+
normalize(v1);
|
|
78
|
+
const v2 = new Float32Array([-1, -2, -3]); // opposite direction
|
|
79
|
+
normalize(v2);
|
|
80
|
+
const v3 = new Float32Array([0, 0, 1]); // different direction
|
|
81
|
+
normalize(v3);
|
|
82
|
+
|
|
83
|
+
const db = new Float32Array([...v1, ...v2, ...v3]);
|
|
84
|
+
const scores = new Float32Array(3);
|
|
85
|
+
|
|
86
|
+
searchAll(q, db, scores, 3, dimensions);
|
|
87
|
+
expect(scores[0]).toBeCloseTo(1.0, 4); // same direction = 1
|
|
88
|
+
expect(scores[1]).toBeCloseTo(-1.0, 4); // opposite = -1
|
|
89
|
+
expect(scores[2]).toBeGreaterThan(-1);
|
|
90
|
+
expect(scores[2]).toBeLessThan(1);
|
|
91
|
+
});
|
|
92
|
+
|
|
93
|
+
it("handles empty database", () => {
|
|
94
|
+
const query = new Float32Array([1, 0, 0]);
|
|
95
|
+
const db = new Float32Array(0);
|
|
96
|
+
const scores = new Float32Array(0);
|
|
97
|
+
|
|
98
|
+
searchAll(query, db, scores, 0, 3);
|
|
99
|
+
expect(scores.length).toBe(0);
|
|
100
|
+
});
|
|
101
|
+
|
|
102
|
+
it("handles high-dimensional vectors (128 dims)", () => {
|
|
103
|
+
const dimensions = 128;
|
|
104
|
+
const query = new Float32Array(dimensions);
|
|
105
|
+
query[0] = 1; // unit vector along first axis
|
|
106
|
+
|
|
107
|
+
const dbSize = 10;
|
|
108
|
+
const db = new Float32Array(dbSize * dimensions);
|
|
109
|
+
for (let i = 0; i < dbSize; i++) {
|
|
110
|
+
db[i * dimensions + i % dimensions] = 1; // unit vectors along different axes
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
const scores = new Float32Array(dbSize);
|
|
114
|
+
searchAll(query, db, scores, dbSize, dimensions);
|
|
115
|
+
|
|
116
|
+
expect(scores[0]).toBeCloseTo(1.0, 5); // same axis as query
|
|
117
|
+
for (let i = 1; i < dbSize; i++) {
|
|
118
|
+
expect(scores[i]).toBeCloseTo(0.0, 5); // orthogonal
|
|
119
|
+
}
|
|
120
|
+
});
|
|
121
|
+
});
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
import { describe, it, expect } from "vitest";
|
|
2
|
+
import {
|
|
3
|
+
encodeLexicon,
|
|
4
|
+
decodeLexicon,
|
|
5
|
+
decodeLexiconAt,
|
|
6
|
+
buildLexiconIndex,
|
|
7
|
+
decodeLexiconAtOffset,
|
|
8
|
+
} from "../lexicon";
|
|
9
|
+
|
|
10
|
+
describe("Lexicon", () => {
|
|
11
|
+
describe("encodeLexicon / decodeLexicon", () => {
|
|
12
|
+
it("round-trips an array of strings", () => {
|
|
13
|
+
const texts = ["hello", "world", "vector search"];
|
|
14
|
+
const encoded = encodeLexicon(texts);
|
|
15
|
+
const decoded = decodeLexicon(encoded);
|
|
16
|
+
expect(decoded).toEqual(texts);
|
|
17
|
+
});
|
|
18
|
+
|
|
19
|
+
it("handles empty array", () => {
|
|
20
|
+
const encoded = encodeLexicon([]);
|
|
21
|
+
expect(encoded.byteLength).toBe(0);
|
|
22
|
+
expect(decodeLexicon(encoded)).toEqual([]);
|
|
23
|
+
});
|
|
24
|
+
|
|
25
|
+
it("handles empty strings", () => {
|
|
26
|
+
const texts = ["", "hello", ""];
|
|
27
|
+
const encoded = encodeLexicon(texts);
|
|
28
|
+
const decoded = decodeLexicon(encoded);
|
|
29
|
+
expect(decoded).toEqual(texts);
|
|
30
|
+
});
|
|
31
|
+
|
|
32
|
+
it("handles unicode strings", () => {
|
|
33
|
+
const texts = ["日本語", "émojis 🎉", "Ñoño"];
|
|
34
|
+
const encoded = encodeLexicon(texts);
|
|
35
|
+
const decoded = decodeLexicon(encoded);
|
|
36
|
+
expect(decoded).toEqual(texts);
|
|
37
|
+
});
|
|
38
|
+
|
|
39
|
+
it("encodes with correct binary format", () => {
|
|
40
|
+
const texts = ["hi"];
|
|
41
|
+
const encoded = encodeLexicon(texts);
|
|
42
|
+
// "hi" = 2 bytes UTF-8, so total = 4 (length) + 2 (data) = 6 bytes
|
|
43
|
+
expect(encoded.byteLength).toBe(6);
|
|
44
|
+
const view = new DataView(encoded.buffer);
|
|
45
|
+
expect(view.getUint32(0, true)).toBe(2); // length prefix
|
|
46
|
+
});
|
|
47
|
+
});
|
|
48
|
+
|
|
49
|
+
describe("decodeLexiconAt", () => {
|
|
50
|
+
it("accesses entries by index", () => {
|
|
51
|
+
const texts = ["alpha", "beta", "gamma"];
|
|
52
|
+
const encoded = encodeLexicon(texts);
|
|
53
|
+
|
|
54
|
+
expect(decodeLexiconAt(encoded, 0)).toBe("alpha");
|
|
55
|
+
expect(decodeLexiconAt(encoded, 1)).toBe("beta");
|
|
56
|
+
expect(decodeLexiconAt(encoded, 2)).toBe("gamma");
|
|
57
|
+
});
|
|
58
|
+
});
|
|
59
|
+
|
|
60
|
+
describe("buildLexiconIndex / decodeLexiconAtOffset", () => {
|
|
61
|
+
it("builds offset index for O(1) access", () => {
|
|
62
|
+
const texts = ["one", "two", "three"];
|
|
63
|
+
const encoded = encodeLexicon(texts);
|
|
64
|
+
const index = buildLexiconIndex(encoded);
|
|
65
|
+
|
|
66
|
+
expect(index.length).toBe(3);
|
|
67
|
+
expect(index[0]).toBe(0);
|
|
68
|
+
|
|
69
|
+
for (let i = 0; i < texts.length; i++) {
|
|
70
|
+
expect(decodeLexiconAtOffset(encoded, index[i])).toBe(texts[i]);
|
|
71
|
+
}
|
|
72
|
+
});
|
|
73
|
+
|
|
74
|
+
it("handles single entry", () => {
|
|
75
|
+
const encoded = encodeLexicon(["solo"]);
|
|
76
|
+
const index = buildLexiconIndex(encoded);
|
|
77
|
+
expect(index.length).toBe(1);
|
|
78
|
+
expect(decodeLexiconAtOffset(encoded, index[0])).toBe("solo");
|
|
79
|
+
});
|
|
80
|
+
});
|
|
81
|
+
|
|
82
|
+
describe("incremental append", () => {
|
|
83
|
+
it("supports appending encoded chunks", () => {
|
|
84
|
+
const part1 = encodeLexicon(["first", "second"]);
|
|
85
|
+
const part2 = encodeLexicon(["third"]);
|
|
86
|
+
|
|
87
|
+
// Simulate append: concatenate buffers
|
|
88
|
+
const combined = new Uint8Array(part1.byteLength + part2.byteLength);
|
|
89
|
+
combined.set(part1, 0);
|
|
90
|
+
combined.set(part2, part1.byteLength);
|
|
91
|
+
|
|
92
|
+
const decoded = decodeLexicon(combined);
|
|
93
|
+
expect(decoded).toEqual(["first", "second", "third"]);
|
|
94
|
+
});
|
|
95
|
+
});
|
|
96
|
+
});
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
import { describe, it, expect } from "vitest";
|
|
2
|
+
import { MemoryManager } from "../memory-manager";
|
|
3
|
+
|
|
4
|
+
describe("MemoryManager", () => {
|
|
5
|
+
it("initializes with correct layout", () => {
|
|
6
|
+
const mm = new MemoryManager(4);
|
|
7
|
+
expect(mm.queryOffset).toBe(0);
|
|
8
|
+
expect(mm.dbOffset).toBe(65536); // aligned to 64KB page
|
|
9
|
+
expect(mm.vectorCount).toBe(0);
|
|
10
|
+
});
|
|
11
|
+
|
|
12
|
+
it("calculates dbOffset as page-aligned", () => {
|
|
13
|
+
// For 1536-dim vectors: 1536 * 4 = 6144 bytes < 64KB
|
|
14
|
+
const mm = new MemoryManager(1536);
|
|
15
|
+
expect(mm.dbOffset).toBe(65536);
|
|
16
|
+
|
|
17
|
+
// For very large dimensions that exceed one page
|
|
18
|
+
const mm2 = new MemoryManager(20000); // 20000 * 4 = 80000 bytes > 64KB
|
|
19
|
+
expect(mm2.dbOffset).toBe(131072); // 2 pages
|
|
20
|
+
});
|
|
21
|
+
|
|
22
|
+
it("writes and reads query vector", () => {
|
|
23
|
+
const mm = new MemoryManager(4);
|
|
24
|
+
const query = new Float32Array([1, 2, 3, 4]);
|
|
25
|
+
mm.writeQuery(query);
|
|
26
|
+
|
|
27
|
+
const read = new Float32Array(mm.memory.buffer, mm.queryOffset, 4);
|
|
28
|
+
expect(read[0]).toBe(1);
|
|
29
|
+
expect(read[1]).toBe(2);
|
|
30
|
+
expect(read[2]).toBe(3);
|
|
31
|
+
expect(read[3]).toBe(4);
|
|
32
|
+
});
|
|
33
|
+
|
|
34
|
+
it("appends vectors and updates count", () => {
|
|
35
|
+
const mm = new MemoryManager(3);
|
|
36
|
+
const v1 = new Float32Array([1, 0, 0]);
|
|
37
|
+
const v2 = new Float32Array([0, 1, 0]);
|
|
38
|
+
|
|
39
|
+
mm.ensureCapacity(2);
|
|
40
|
+
mm.appendVectors([v1, v2]);
|
|
41
|
+
|
|
42
|
+
expect(mm.vectorCount).toBe(2);
|
|
43
|
+
|
|
44
|
+
const read1 = mm.readVector(0);
|
|
45
|
+
expect(read1[0]).toBe(1);
|
|
46
|
+
expect(read1[1]).toBe(0);
|
|
47
|
+
|
|
48
|
+
const read2 = mm.readVector(1);
|
|
49
|
+
expect(read2[0]).toBe(0);
|
|
50
|
+
expect(read2[1]).toBe(1);
|
|
51
|
+
});
|
|
52
|
+
|
|
53
|
+
it("computes scores offset after DB", () => {
|
|
54
|
+
const mm = new MemoryManager(4);
|
|
55
|
+
mm.ensureCapacity(10);
|
|
56
|
+
mm.appendVectors(Array.from({ length: 10 }, () => new Float32Array(4)));
|
|
57
|
+
|
|
58
|
+
// DB at page 1 (65536), 10 vectors of 4*4 = 160 bytes
|
|
59
|
+
expect(mm.scoresOffset).toBe(65536 + 10 * 4 * 4);
|
|
60
|
+
});
|
|
61
|
+
|
|
62
|
+
it("loads bulk vector bytes", () => {
|
|
63
|
+
const mm = new MemoryManager(3);
|
|
64
|
+
const data = new Float32Array([1, 2, 3, 4, 5, 6]);
|
|
65
|
+
const bytes = new Uint8Array(data.buffer);
|
|
66
|
+
|
|
67
|
+
mm.ensureCapacity(2);
|
|
68
|
+
mm.loadVectorBytes(bytes, 2);
|
|
69
|
+
|
|
70
|
+
expect(mm.vectorCount).toBe(2);
|
|
71
|
+
const v0 = mm.readVector(0);
|
|
72
|
+
expect(v0[0]).toBe(1);
|
|
73
|
+
expect(v0[1]).toBe(2);
|
|
74
|
+
expect(v0[2]).toBe(3);
|
|
75
|
+
});
|
|
76
|
+
|
|
77
|
+
it("grows memory when needed", () => {
|
|
78
|
+
const mm = new MemoryManager(1536);
|
|
79
|
+
const initialSize = mm.memory.buffer.byteLength;
|
|
80
|
+
|
|
81
|
+
// Ensure capacity for many vectors
|
|
82
|
+
mm.ensureCapacity(1000);
|
|
83
|
+
expect(mm.memory.buffer.byteLength).toBeGreaterThan(initialSize);
|
|
84
|
+
});
|
|
85
|
+
|
|
86
|
+
it("calculates maxVectors based on WASM limit", () => {
|
|
87
|
+
const mm = new MemoryManager(1536);
|
|
88
|
+
// With 1536 dims: each vector = 1536*4 + 4 = 6148 bytes
|
|
89
|
+
// Available = 4GB - 64KB ≈ 4294901760 bytes
|
|
90
|
+
// Max ≈ 4294901760 / 6148 ≈ 698,697
|
|
91
|
+
expect(mm.maxVectors).toBeGreaterThan(600000);
|
|
92
|
+
expect(mm.maxVectors).toBeLessThan(800000);
|
|
93
|
+
});
|
|
94
|
+
});
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
import { describe, it, expect } from "vitest";
|
|
2
|
+
import { ResultSet } from "../result-set";
|
|
3
|
+
|
|
4
|
+
describe("ResultSet", () => {
|
|
5
|
+
const keys = ["apple", "banana", "cherry", "date", "elderberry"];
|
|
6
|
+
const resolveKey = (index: number) => keys[index];
|
|
7
|
+
|
|
8
|
+
it("sorts results by descending score", () => {
|
|
9
|
+
const scores = new Float32Array([0.3, 0.9, 0.1, 0.7, 0.5]);
|
|
10
|
+
const rs = ResultSet.fromScores(scores, resolveKey, 5);
|
|
11
|
+
|
|
12
|
+
expect(rs.length).toBe(5);
|
|
13
|
+
expect(rs.get(0).key).toBe("banana");
|
|
14
|
+
expect(rs.get(0).score).toBeCloseTo(0.9, 4);
|
|
15
|
+
expect(rs.get(1).key).toBe("date");
|
|
16
|
+
expect(rs.get(1).score).toBeCloseTo(0.7, 4);
|
|
17
|
+
expect(rs.get(2).key).toBe("elderberry");
|
|
18
|
+
expect(rs.get(3).key).toBe("apple");
|
|
19
|
+
expect(rs.get(4).key).toBe("cherry");
|
|
20
|
+
});
|
|
21
|
+
|
|
22
|
+
it("respects topK limit", () => {
|
|
23
|
+
const scores = new Float32Array([0.3, 0.9, 0.1, 0.7, 0.5]);
|
|
24
|
+
const rs = ResultSet.fromScores(scores, resolveKey, 3);
|
|
25
|
+
|
|
26
|
+
expect(rs.length).toBe(3);
|
|
27
|
+
expect(rs.get(0).key).toBe("banana");
|
|
28
|
+
expect(rs.get(0).score).toBeCloseTo(0.9, 4);
|
|
29
|
+
expect(rs.get(2).key).toBe("elderberry");
|
|
30
|
+
});
|
|
31
|
+
|
|
32
|
+
it("throws on out-of-bounds rank", () => {
|
|
33
|
+
const scores = new Float32Array([0.5, 0.8]);
|
|
34
|
+
const rs = ResultSet.fromScores(scores, resolveKey, 2);
|
|
35
|
+
|
|
36
|
+
expect(() => rs.get(-1)).toThrow(RangeError);
|
|
37
|
+
expect(() => rs.get(2)).toThrow(RangeError);
|
|
38
|
+
});
|
|
39
|
+
|
|
40
|
+
it("returns correct pages", () => {
|
|
41
|
+
const scores = new Float32Array([0.1, 0.2, 0.3, 0.4, 0.5]);
|
|
42
|
+
const rs = ResultSet.fromScores(scores, resolveKey, 5);
|
|
43
|
+
|
|
44
|
+
const page0 = rs.getPage(0, 2);
|
|
45
|
+
expect(page0).toHaveLength(2);
|
|
46
|
+
expect(page0[0].key).toBe("elderberry"); // score 0.5
|
|
47
|
+
expect(page0[1].key).toBe("date"); // score 0.4
|
|
48
|
+
|
|
49
|
+
const page1 = rs.getPage(1, 2);
|
|
50
|
+
expect(page1).toHaveLength(2);
|
|
51
|
+
expect(page1[0].key).toBe("cherry"); // score 0.3
|
|
52
|
+
expect(page1[1].key).toBe("banana"); // score 0.2
|
|
53
|
+
|
|
54
|
+
const page2 = rs.getPage(2, 2);
|
|
55
|
+
expect(page2).toHaveLength(1); // only 1 remaining
|
|
56
|
+
expect(page2[0].key).toBe("apple"); // score 0.1
|
|
57
|
+
});
|
|
58
|
+
|
|
59
|
+
it("handles empty results", () => {
|
|
60
|
+
const scores = new Float32Array(0);
|
|
61
|
+
const rs = ResultSet.fromScores(scores, resolveKey, 10);
|
|
62
|
+
expect(rs.length).toBe(0);
|
|
63
|
+
expect(rs.getPage(0, 10)).toEqual([]);
|
|
64
|
+
});
|
|
65
|
+
|
|
66
|
+
it("only resolves keys lazily (on access)", () => {
|
|
67
|
+
let callCount = 0;
|
|
68
|
+
const lazyResolver = (index: number) => {
|
|
69
|
+
callCount++;
|
|
70
|
+
return keys[index];
|
|
71
|
+
};
|
|
72
|
+
|
|
73
|
+
const scores = new Float32Array([0.3, 0.9, 0.1]);
|
|
74
|
+
const rs = ResultSet.fromScores(scores, lazyResolver, 3);
|
|
75
|
+
|
|
76
|
+
expect(callCount).toBe(0); // no key resolved yet
|
|
77
|
+
|
|
78
|
+
rs.get(0);
|
|
79
|
+
expect(callCount).toBe(1); // resolved only 1
|
|
80
|
+
|
|
81
|
+
rs.getPage(0, 2);
|
|
82
|
+
expect(callCount).toBe(3); // resolved 2 more
|
|
83
|
+
});
|
|
84
|
+
|
|
85
|
+
it("handles topK larger than result count", () => {
|
|
86
|
+
const scores = new Float32Array([0.5, 0.8]);
|
|
87
|
+
const rs = ResultSet.fromScores(scores, resolveKey, 100);
|
|
88
|
+
expect(rs.length).toBe(2);
|
|
89
|
+
});
|
|
90
|
+
});
|