hnswlib-wasm-node 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +370 -0
- package/dist/index.cjs +357 -0
- package/dist/index.js +320 -0
- package/package.json +51 -0
package/README.md
ADDED
|
@@ -0,0 +1,370 @@
|
|
|
1
|
+
# hnswlib-wasm-node
|
|
2
|
+
|
|
3
|
+
Persistence layer for [hnswlib-wasm](https://www.npmjs.com/package/hnswlib-wasm) with JSON and binary format support for Node.js. This package enables saving and loading HNSW vector indexes to disk, making it easy to persist your vector search indexes across application restarts.
|
|
4
|
+
|
|
5
|
+
## Features
|
|
6
|
+
|
|
7
|
+
- ✅ **Dual Package Support**: Works with both ESM (`import`) and CommonJS (`require()`)
|
|
8
|
+
- ✅ **Dual Format Support**: Save/load indexes in both JSON (human-readable) and binary (compact) formats
|
|
9
|
+
- ✅ **Node.js Optimized**: Automatic environment setup for Node.js
|
|
10
|
+
- ✅ **Type Safe**: Full JSDoc documentation
|
|
11
|
+
- ✅ **Error Handling**: Comprehensive validation and error messages
|
|
12
|
+
- ✅ **Optional Logging**: Configurable logger interface
|
|
13
|
+
- ✅ **Production Ready**: Buffer bounds checking, input validation, edge case handling
|
|
14
|
+
|
|
15
|
+
## Installation
|
|
16
|
+
|
|
17
|
+
```bash
|
|
18
|
+
npm install hnswlib-wasm-node
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
**Peer Dependency**: This package requires `hnswlib-wasm` to be installed:
|
|
22
|
+
|
|
23
|
+
```bash
|
|
24
|
+
npm install hnswlib-wasm
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
## Quick Start
|
|
28
|
+
|
|
29
|
+
### ESM (Recommended)
|
|
30
|
+
|
|
31
|
+
```javascript
|
|
32
|
+
import { loadHnswlib, saveIndexToFile, loadIndexFromFile } from 'hnswlib-wasm-node';
|
|
33
|
+
|
|
34
|
+
// Load the library (environment setup is automatic)
|
|
35
|
+
const hnswlib = await loadHnswlib();
|
|
36
|
+
|
|
37
|
+
// Create an index
|
|
38
|
+
const index = new hnswlib.HierarchicalNSW('l2', 128, '');
|
|
39
|
+
index.initIndex(1000, 16, 200, 100);
|
|
40
|
+
|
|
41
|
+
// Add vectors
|
|
42
|
+
const vectors = [
|
|
43
|
+
[1.0, 2.0, 3.0, ...], // 128-dimensional vector
|
|
44
|
+
[4.0, 5.0, 6.0, ...],
|
|
45
|
+
// ... more vectors
|
|
46
|
+
];
|
|
47
|
+
|
|
48
|
+
vectors.forEach((vector, i) => {
|
|
49
|
+
index.addPoint(vector, i, false);
|
|
50
|
+
});
|
|
51
|
+
|
|
52
|
+
// Save to disk (JSON format)
|
|
53
|
+
const metadata = {
|
|
54
|
+
spaceName: 'l2',
|
|
55
|
+
maxElements: 1000,
|
|
56
|
+
m: 16,
|
|
57
|
+
efConstruction: 200,
|
|
58
|
+
randomSeed: 100
|
|
59
|
+
};
|
|
60
|
+
|
|
61
|
+
await saveIndexToFile(index, 'my-index.json', metadata);
|
|
62
|
+
|
|
63
|
+
// Later, load from disk
|
|
64
|
+
const { index: loadedIndex, metadata: loadedMetadata } = await loadIndexFromFile(hnswlib, 'my-index.json');
|
|
65
|
+
|
|
66
|
+
// Use the loaded index
|
|
67
|
+
const results = loadedIndex.searchKnn([1.0, 2.0, 3.0, ...], 5, undefined);
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
### CommonJS
|
|
71
|
+
|
|
72
|
+
```javascript
|
|
73
|
+
const { loadHnswlib, saveIndexToFile, loadIndexFromFile } = require('hnswlib-wasm-node');
|
|
74
|
+
|
|
75
|
+
// Load the library (environment setup is automatic)
|
|
76
|
+
const hnswlib = await loadHnswlib();
|
|
77
|
+
|
|
78
|
+
// ... rest of the code is the same as ESM example ...
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
## API Reference
|
|
82
|
+
|
|
83
|
+
### `loadHnswlib()`
|
|
84
|
+
|
|
85
|
+
Loads the hnswlib-wasm library with automatic Node.js environment setup.
|
|
86
|
+
|
|
87
|
+
```javascript
|
|
88
|
+
import { loadHnswlib } from 'hnswlib-wasm-node';
|
|
89
|
+
|
|
90
|
+
const hnswlib = await loadHnswlib();
|
|
91
|
+
// Returns the loaded hnswlib module
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
**Returns**: `Promise<Object>` - The loaded hnswlib module
|
|
95
|
+
|
|
96
|
+
**Throws**: `Error` if library fails to load
|
|
97
|
+
|
|
98
|
+
---
|
|
99
|
+
|
|
100
|
+
### `saveIndexToFile(index, filename, metadata?)`
|
|
101
|
+
|
|
102
|
+
Saves an HNSW index to disk in JSON or binary format (determined by file extension).
|
|
103
|
+
|
|
104
|
+
```javascript
|
|
105
|
+
import { saveIndexToFile } from 'hnswlib-wasm-node';
|
|
106
|
+
|
|
107
|
+
await saveIndexToFile(index, 'index.json', {
|
|
108
|
+
spaceName: 'l2', // 'l2', 'ip', or 'cosine'
|
|
109
|
+
maxElements: 1000, // Maximum number of elements
|
|
110
|
+
m: 16, // Number of bi-directional links
|
|
111
|
+
efConstruction: 200, // Construction time/accuracy trade-off
|
|
112
|
+
randomSeed: 100 // Random seed
|
|
113
|
+
});
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
**Parameters**:
|
|
117
|
+
- `index` (Object): The hnswlib index object
|
|
118
|
+
- `filename` (string): Output filename (`.json` for JSON, `.bin` or `.dat` for binary)
|
|
119
|
+
- `metadata` (Object, optional): Index metadata
|
|
120
|
+
- `spaceName` (string): 'l2', 'ip', or 'cosine'
|
|
121
|
+
- `maxElements` (number): Maximum number of elements
|
|
122
|
+
- `m` (number): Number of bi-directional links (default: 16)
|
|
123
|
+
- `efConstruction` (number): Construction parameter (default: 200)
|
|
124
|
+
- `randomSeed` (number): Random seed (default: 100)
|
|
125
|
+
|
|
126
|
+
**Returns**: `Promise<void>`
|
|
127
|
+
|
|
128
|
+
**Throws**: `Error` if save operation fails
|
|
129
|
+
|
|
130
|
+
---
|
|
131
|
+
|
|
132
|
+
### `loadIndexFromFile(hnswlib, filename)`
|
|
133
|
+
|
|
134
|
+
Loads an HNSW index from disk (JSON or binary format).
|
|
135
|
+
|
|
136
|
+
```javascript
|
|
137
|
+
import { loadIndexFromFile } from 'hnswlib-wasm-node';
|
|
138
|
+
|
|
139
|
+
const { index, metadata } = await loadIndexFromFile(hnswlib, 'index.json');
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
**Parameters**:
|
|
143
|
+
- `hnswlib` (Object): The loaded hnswlib module (from `loadHnswlib()`)
|
|
144
|
+
- `filename` (string): Input filename (`.json` for JSON, `.bin` or `.dat` for binary)
|
|
145
|
+
|
|
146
|
+
**Returns**: `Promise<{index: Object, metadata: Object}>`
|
|
147
|
+
- `index`: The recreated index object
|
|
148
|
+
- `metadata`: Index metadata (spaceName, numDimensions, maxElements, m, efConstruction, randomSeed)
|
|
149
|
+
|
|
150
|
+
**Throws**: `Error` if load operation fails
|
|
151
|
+
|
|
152
|
+
---
|
|
153
|
+
|
|
154
|
+
### `setLogger(logger)`
|
|
155
|
+
|
|
156
|
+
Configure custom logger for the library (optional). By default, the library is silent (no console output).
|
|
157
|
+
|
|
158
|
+
```javascript
|
|
159
|
+
import { setLogger } from 'hnswlib-wasm-node';
|
|
160
|
+
|
|
161
|
+
// Enable logging
|
|
162
|
+
setLogger({
|
|
163
|
+
log: console.log, // Info messages
|
|
164
|
+
error: console.error // Error messages (accepts message and error object)
|
|
165
|
+
});
|
|
166
|
+
|
|
167
|
+
// Disable all logging
|
|
168
|
+
setLogger({
|
|
169
|
+
log: () => {},
|
|
170
|
+
error: () => {}
|
|
171
|
+
});
|
|
172
|
+
```
|
|
173
|
+
|
|
174
|
+
**Parameters**:
|
|
175
|
+
- `logger` (Object): Logger object with `log` and `error` methods
|
|
176
|
+
|
|
177
|
+
---
|
|
178
|
+
|
|
179
|
+
## Format Comparison
|
|
180
|
+
|
|
181
|
+
### JSON Format (`.json`)
|
|
182
|
+
|
|
183
|
+
**Pros**:
|
|
184
|
+
- Human-readable and editable
|
|
185
|
+
- Easy to debug
|
|
186
|
+
- Portable across systems
|
|
187
|
+
|
|
188
|
+
**Cons**:
|
|
189
|
+
- Larger file size
|
|
190
|
+
- Slower to parse
|
|
191
|
+
|
|
192
|
+
**Example**:
|
|
193
|
+
```json
|
|
194
|
+
{
|
|
195
|
+
"version": 1,
|
|
196
|
+
"spaceName": "l2",
|
|
197
|
+
"numDimensions": 128,
|
|
198
|
+
"maxElements": 1000,
|
|
199
|
+
"m": 16,
|
|
200
|
+
"efConstruction": 200,
|
|
201
|
+
"randomSeed": 100,
|
|
202
|
+
"numVectors": 3,
|
|
203
|
+
"vectors": [
|
|
204
|
+
{
|
|
205
|
+
"label": 0,
|
|
206
|
+
"point": [1.0, 2.0, 3.0, ...]
|
|
207
|
+
},
|
|
208
|
+
...
|
|
209
|
+
]
|
|
210
|
+
}
|
|
211
|
+
```
|
|
212
|
+
|
|
213
|
+
### Binary Format (`.bin` or `.dat`)
|
|
214
|
+
|
|
215
|
+
**Pros**:
|
|
216
|
+
- Compact file size (~83% smaller than JSON)
|
|
217
|
+
- Fast read/write operations
|
|
218
|
+
- Efficient for large datasets
|
|
219
|
+
|
|
220
|
+
**Cons**:
|
|
221
|
+
- Not human-readable
|
|
222
|
+
- Requires a reader to inspect
|
|
223
|
+
|
|
224
|
+
**File Structure**:
|
|
225
|
+
- Header (40 bytes): version, spaceName, dimensions, parameters, vector count, reserved space (14 bytes for future use)
|
|
226
|
+
- Vector records: label (4 bytes) + point (numDimensions × 4 bytes)
|
|
227
|
+
|
|
228
|
+
---
|
|
229
|
+
|
|
230
|
+
## Complete Examples
|
|
231
|
+
|
|
232
|
+
### Example 1: Basic Usage
|
|
233
|
+
|
|
234
|
+
```javascript
|
|
235
|
+
import { loadHnswlib, saveIndexToFile, loadIndexFromFile } from 'hnswlib-wasm-node';
|
|
236
|
+
|
|
237
|
+
async function main() {
|
|
238
|
+
// Load library
|
|
239
|
+
const hnswlib = await loadHnswlib();
|
|
240
|
+
|
|
241
|
+
// Create index
|
|
242
|
+
const index = new hnswlib.HierarchicalNSW('l2', 3, '');
|
|
243
|
+
index.initIndex(10, 16, 200, 100);
|
|
244
|
+
|
|
245
|
+
// Add vectors
|
|
246
|
+
const vectors = [
|
|
247
|
+
[1, 0, 0],
|
|
248
|
+
[0, 1, 0],
|
|
249
|
+
[0, 0, 1]
|
|
250
|
+
];
|
|
251
|
+
|
|
252
|
+
vectors.forEach((vector, i) => {
|
|
253
|
+
index.addPoint(vector, i, false);
|
|
254
|
+
});
|
|
255
|
+
|
|
256
|
+
// Save
|
|
257
|
+
await saveIndexToFile(index, 'vectors.json', {
|
|
258
|
+
spaceName: 'l2',
|
|
259
|
+
maxElements: 10,
|
|
260
|
+
m: 16,
|
|
261
|
+
efConstruction: 200,
|
|
262
|
+
randomSeed: 100
|
|
263
|
+
});
|
|
264
|
+
|
|
265
|
+
// Load
|
|
266
|
+
const { index: loadedIndex } = await loadIndexFromFile(hnswlib, 'vectors.json');
|
|
267
|
+
|
|
268
|
+
// Search
|
|
269
|
+
const results = loadedIndex.searchKnn([0.9, 0.1, 0], 2, undefined);
|
|
270
|
+
console.log(results);
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
main().catch(console.error);
|
|
274
|
+
```
|
|
275
|
+
|
|
276
|
+
### Example 2: Using Binary Format
|
|
277
|
+
|
|
278
|
+
```javascript
|
|
279
|
+
import { loadHnswlib, saveIndexToFile, loadIndexFromFile } from 'hnswlib-wasm-node';
|
|
280
|
+
|
|
281
|
+
async function main() {
|
|
282
|
+
const hnswlib = await loadHnswlib();
|
|
283
|
+
const index = new hnswlib.HierarchicalNSW('cosine', 128, '');
|
|
284
|
+
index.initIndex(10000, 32, 200, 100);
|
|
285
|
+
|
|
286
|
+
// ... add vectors ...
|
|
287
|
+
|
|
288
|
+
// Save as binary (more efficient for large datasets)
|
|
289
|
+
await saveIndexToFile(index, 'large-index.bin', {
|
|
290
|
+
spaceName: 'cosine',
|
|
291
|
+
maxElements: 10000,
|
|
292
|
+
m: 32,
|
|
293
|
+
efConstruction: 200,
|
|
294
|
+
randomSeed: 100
|
|
295
|
+
});
|
|
296
|
+
|
|
297
|
+
// Load binary file
|
|
298
|
+
const { index: loadedIndex } = await loadIndexFromFile(hnswlib, 'large-index.bin');
|
|
299
|
+
}
|
|
300
|
+
```
|
|
301
|
+
|
|
302
|
+
### Example 3: Error Handling
|
|
303
|
+
|
|
304
|
+
```javascript
|
|
305
|
+
import { loadHnswlib, saveIndexToFile, loadIndexFromFile } from 'hnswlib-wasm-node';
|
|
306
|
+
|
|
307
|
+
async function main() {
|
|
308
|
+
const hnswlib = await loadHnswlib();
|
|
309
|
+
|
|
310
|
+
try {
|
|
311
|
+
// This will throw if file doesn't exist
|
|
312
|
+
const { index } = await loadIndexFromFile(hnswlib, 'nonexistent.json');
|
|
313
|
+
} catch (error) {
|
|
314
|
+
console.error('Load failed:', error.message);
|
|
315
|
+
// Error: File not found: nonexistent.json
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
try {
|
|
319
|
+
// This will throw if index is empty
|
|
320
|
+
const emptyIndex = new hnswlib.HierarchicalNSW('l2', 3, '');
|
|
321
|
+
emptyIndex.initIndex(10, 16, 200, 100);
|
|
322
|
+
await saveIndexToFile(emptyIndex, 'empty.json');
|
|
323
|
+
} catch (error) {
|
|
324
|
+
console.error('Save failed:', error.message);
|
|
325
|
+
// Error: Cannot save empty index (no vectors added)
|
|
326
|
+
}
|
|
327
|
+
}
|
|
328
|
+
```
|
|
329
|
+
|
|
330
|
+
### Example 4: Custom Logging
|
|
331
|
+
|
|
332
|
+
```javascript
|
|
333
|
+
import { loadHnswlib, saveIndexToFile, setLogger } from 'hnswlib-wasm-node';
|
|
334
|
+
|
|
335
|
+
// Enable verbose logging
|
|
336
|
+
setLogger({
|
|
337
|
+
log: (message) => console.log(`[INFO] ${message}`),
|
|
338
|
+
error: (message, error) => console.error(`[ERROR] ${message}`, error)
|
|
339
|
+
});
|
|
340
|
+
|
|
341
|
+
const hnswlib = await loadHnswlib();
|
|
342
|
+
const index = new hnswlib.HierarchicalNSW('l2', 128, '');
|
|
343
|
+
// ... create and save index ...
|
|
344
|
+
// Will output: [INFO] Index saved to index.json (JSON format, 100 vectors)
|
|
345
|
+
```
|
|
346
|
+
|
|
347
|
+
---
|
|
348
|
+
|
|
349
|
+
## Requirements
|
|
350
|
+
|
|
351
|
+
- **Node.js**: >= 18.0.0
|
|
352
|
+
- **Peer Dependency**: `hnswlib-wasm` ^0.8.2
|
|
353
|
+
|
|
354
|
+
## License
|
|
355
|
+
|
|
356
|
+
MIT
|
|
357
|
+
|
|
358
|
+
## Related Projects
|
|
359
|
+
|
|
360
|
+
- [hnswlib-wasm](https://www.npmjs.com/package/hnswlib-wasm) - WebAssembly bindings for HNSWlib
|
|
361
|
+
- [hnswlib-node](https://www.npmjs.com/package/hnswlib-node) - Native Node.js bindings for HNSWlib
|
|
362
|
+
|
|
363
|
+
## Contributing
|
|
364
|
+
|
|
365
|
+
Contributions are welcome! Please feel free to submit a Pull Request.
|
|
366
|
+
|
|
367
|
+
## Support
|
|
368
|
+
|
|
369
|
+
For issues and questions, please open an issue on the GitHub repository.
|
|
370
|
+
|
package/dist/index.cjs
ADDED
|
@@ -0,0 +1,357 @@
|
|
|
1
|
+
var __create = Object.create;
|
|
2
|
+
var __defProp = Object.defineProperty;
|
|
3
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
4
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
5
|
+
var __getProtoOf = Object.getPrototypeOf;
|
|
6
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
7
|
+
var __export = (target, all) => {
|
|
8
|
+
for (var name in all)
|
|
9
|
+
__defProp(target, name, { get: all[name], enumerable: true });
|
|
10
|
+
};
|
|
11
|
+
var __copyProps = (to, from, except, desc) => {
|
|
12
|
+
if (from && typeof from === "object" || typeof from === "function") {
|
|
13
|
+
for (let key of __getOwnPropNames(from))
|
|
14
|
+
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
15
|
+
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
16
|
+
}
|
|
17
|
+
return to;
|
|
18
|
+
};
|
|
19
|
+
var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
|
|
20
|
+
// If the importer is in node compatibility mode or this is not an ESM
|
|
21
|
+
// file that has been converted to a CommonJS file using a Babel-
|
|
22
|
+
// compatible transform (i.e. "__esModule" has not been set), then set
|
|
23
|
+
// "default" to the CommonJS "module.exports" for node compatibility.
|
|
24
|
+
isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
|
|
25
|
+
mod
|
|
26
|
+
));
|
|
27
|
+
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
28
|
+
|
|
29
|
+
// index.js
|
|
30
|
+
var index_exports = {};
|
|
31
|
+
__export(index_exports, {
|
|
32
|
+
loadHnswlib: () => loadHnswlib,
|
|
33
|
+
loadIndexFromFile: () => loadIndexFromFile,
|
|
34
|
+
saveIndexToFile: () => saveIndexToFile,
|
|
35
|
+
setLogger: () => setLogger
|
|
36
|
+
});
|
|
37
|
+
module.exports = __toCommonJS(index_exports);
|
|
38
|
+
var import_promises = __toESM(require("fs/promises"), 1);
|
|
39
|
+
var defaultLogger = {
|
|
40
|
+
log: () => {
|
|
41
|
+
},
|
|
42
|
+
error: console.error
|
|
43
|
+
};
|
|
44
|
+
var logger = defaultLogger;
|
|
45
|
+
function setLogger(customLogger) {
|
|
46
|
+
logger = { ...defaultLogger, ...customLogger };
|
|
47
|
+
}
|
|
48
|
+
function setupNodeEnvironment() {
|
|
49
|
+
if (typeof indexedDB === "undefined") {
|
|
50
|
+
global.indexedDB = {
|
|
51
|
+
open: () => ({
|
|
52
|
+
onerror: null,
|
|
53
|
+
onsuccess: null,
|
|
54
|
+
onupgradeneeded: null,
|
|
55
|
+
result: {
|
|
56
|
+
createObjectStore: () => ({}),
|
|
57
|
+
objectStoreNames: { contains: () => false }
|
|
58
|
+
}
|
|
59
|
+
})
|
|
60
|
+
};
|
|
61
|
+
}
|
|
62
|
+
if (typeof window === "undefined") {
|
|
63
|
+
global.window = {};
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
async function loadHnswlib() {
|
|
67
|
+
try {
|
|
68
|
+
setupNodeEnvironment();
|
|
69
|
+
const { loadHnswlib: loadHnswlibOriginal } = await import("hnswlib-wasm/dist/hnswlib.js");
|
|
70
|
+
return await loadHnswlibOriginal();
|
|
71
|
+
} catch (error) {
|
|
72
|
+
logger.error("Failed to load hnswlib:", error);
|
|
73
|
+
throw new Error(`Failed to load hnswlib: ${error.message}`);
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
function extractVectorsFromIndex(index) {
|
|
77
|
+
try {
|
|
78
|
+
const usedLabels = index.getUsedLabels();
|
|
79
|
+
const vectors = [];
|
|
80
|
+
for (const label of usedLabels) {
|
|
81
|
+
const point = index.getPoint(label);
|
|
82
|
+
const vector = Array.isArray(point) ? point : Array.from(point);
|
|
83
|
+
vectors.push({ label, point: vector });
|
|
84
|
+
}
|
|
85
|
+
return vectors;
|
|
86
|
+
} catch (error) {
|
|
87
|
+
throw new Error(`Failed to extract vectors from index: ${error.message}`);
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
function validateIndex(index) {
|
|
91
|
+
if (!index) {
|
|
92
|
+
throw new Error("Index parameter is required");
|
|
93
|
+
}
|
|
94
|
+
if (typeof index.getNumDimensions !== "function" || typeof index.getCurrentCount !== "function" || typeof index.getUsedLabels !== "function" || typeof index.getPoint !== "function") {
|
|
95
|
+
throw new Error("Invalid index object: missing required methods");
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
function validateFilename(filename) {
|
|
99
|
+
if (!filename || typeof filename !== "string") {
|
|
100
|
+
throw new Error("Filename must be a non-empty string");
|
|
101
|
+
}
|
|
102
|
+
if (filename.trim().length === 0) {
|
|
103
|
+
throw new Error("Filename cannot be empty");
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
async function saveIndexToFile(index, filename, metadata = {}) {
|
|
107
|
+
validateIndex(index);
|
|
108
|
+
validateFilename(filename);
|
|
109
|
+
try {
|
|
110
|
+
const numDimensions = index.getNumDimensions();
|
|
111
|
+
const numVectors = index.getCurrentCount();
|
|
112
|
+
if (numVectors === 0) {
|
|
113
|
+
throw new Error("Cannot save empty index (no vectors added)");
|
|
114
|
+
}
|
|
115
|
+
const vectors = extractVectorsFromIndex(index);
|
|
116
|
+
const isBinary = filename.endsWith(".bin") || filename.endsWith(".dat");
|
|
117
|
+
if (isBinary) {
|
|
118
|
+
await saveIndexBinary(index, filename, metadata, numDimensions, vectors);
|
|
119
|
+
} else {
|
|
120
|
+
await saveIndexJSON(index, filename, metadata, numDimensions, vectors);
|
|
121
|
+
}
|
|
122
|
+
} catch (error) {
|
|
123
|
+
logger.error(`Failed to save index to ${filename}:`, error);
|
|
124
|
+
throw error;
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
async function saveIndexJSON(index, filename, metadata, numDimensions, vectors) {
|
|
128
|
+
const data = {
|
|
129
|
+
version: 1,
|
|
130
|
+
spaceName: metadata.spaceName || "l2",
|
|
131
|
+
numDimensions,
|
|
132
|
+
maxElements: metadata.maxElements || index.getMaxElements(),
|
|
133
|
+
m: metadata.m || 16,
|
|
134
|
+
efConstruction: metadata.efConstruction || 200,
|
|
135
|
+
randomSeed: metadata.randomSeed || 100,
|
|
136
|
+
numVectors: vectors.length,
|
|
137
|
+
vectors
|
|
138
|
+
};
|
|
139
|
+
try {
|
|
140
|
+
await import_promises.default.writeFile(filename, JSON.stringify(data, null, 2), "utf8");
|
|
141
|
+
logger.log(`Index saved to ${filename} (JSON format, ${vectors.length} vectors)`);
|
|
142
|
+
} catch (error) {
|
|
143
|
+
throw new Error(`Failed to write JSON file: ${error.message}`);
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
async function saveIndexBinary(index, filename, metadata, numDimensions, vectors) {
|
|
147
|
+
const spaceNameMap = { "l2": 0, "ip": 1, "cosine": 2 };
|
|
148
|
+
const spaceNameCode = spaceNameMap[metadata.spaceName || "l2"] || 0;
|
|
149
|
+
const headerSize = 40;
|
|
150
|
+
const reservedSize = 14;
|
|
151
|
+
const vectorSize = 4 + numDimensions * 4;
|
|
152
|
+
const bufferSize = headerSize + vectors.length * vectorSize;
|
|
153
|
+
try {
|
|
154
|
+
const buffer = Buffer.allocUnsafe(bufferSize);
|
|
155
|
+
let offset = 0;
|
|
156
|
+
buffer.writeUInt8(1, offset);
|
|
157
|
+
offset += 1;
|
|
158
|
+
buffer.writeUInt8(spaceNameCode, offset);
|
|
159
|
+
offset += 1;
|
|
160
|
+
buffer.writeUInt32LE(numDimensions, offset);
|
|
161
|
+
offset += 4;
|
|
162
|
+
buffer.writeUInt32LE(metadata.maxElements || index.getMaxElements(), offset);
|
|
163
|
+
offset += 4;
|
|
164
|
+
buffer.writeUInt32LE(metadata.m || 16, offset);
|
|
165
|
+
offset += 4;
|
|
166
|
+
buffer.writeUInt32LE(metadata.efConstruction || 200, offset);
|
|
167
|
+
offset += 4;
|
|
168
|
+
buffer.writeUInt32LE(metadata.randomSeed || 100, offset);
|
|
169
|
+
offset += 4;
|
|
170
|
+
buffer.writeUInt32LE(vectors.length, offset);
|
|
171
|
+
offset += 4;
|
|
172
|
+
buffer.fill(0, offset, offset + reservedSize);
|
|
173
|
+
offset += reservedSize;
|
|
174
|
+
for (const { label, point } of vectors) {
|
|
175
|
+
if (point.length !== numDimensions) {
|
|
176
|
+
throw new Error(`Vector dimension mismatch: expected ${numDimensions}, got ${point.length}`);
|
|
177
|
+
}
|
|
178
|
+
buffer.writeUInt32LE(label, offset);
|
|
179
|
+
offset += 4;
|
|
180
|
+
for (const value of point) {
|
|
181
|
+
if (typeof value !== "number" || !isFinite(value)) {
|
|
182
|
+
throw new Error(`Invalid vector value: ${value}`);
|
|
183
|
+
}
|
|
184
|
+
buffer.writeFloatLE(value, offset);
|
|
185
|
+
offset += 4;
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
await import_promises.default.writeFile(filename, buffer);
|
|
189
|
+
logger.log(`Index saved to ${filename} (binary format, ${vectors.length} vectors, ${bufferSize} bytes)`);
|
|
190
|
+
} catch (error) {
|
|
191
|
+
throw new Error(`Failed to write binary file: ${error.message}`);
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
async function loadIndexFromFile(hnswlib, filename) {
|
|
195
|
+
validateFilename(filename);
|
|
196
|
+
if (!hnswlib || typeof hnswlib.HierarchicalNSW !== "function") {
|
|
197
|
+
throw new Error("Invalid hnswlib module: HierarchicalNSW constructor not found");
|
|
198
|
+
}
|
|
199
|
+
try {
|
|
200
|
+
await import_promises.default.access(filename);
|
|
201
|
+
} catch (error) {
|
|
202
|
+
throw new Error(`File not found: ${filename}`);
|
|
203
|
+
}
|
|
204
|
+
const isBinary = filename.endsWith(".bin") || filename.endsWith(".dat");
|
|
205
|
+
try {
|
|
206
|
+
if (isBinary) {
|
|
207
|
+
return await loadIndexBinary(hnswlib, filename);
|
|
208
|
+
} else {
|
|
209
|
+
return await loadIndexJSON(hnswlib, filename);
|
|
210
|
+
}
|
|
211
|
+
} catch (error) {
|
|
212
|
+
logger.error(`Failed to load index from ${filename}:`, error);
|
|
213
|
+
throw error;
|
|
214
|
+
}
|
|
215
|
+
}
|
|
216
|
+
async function loadIndexJSON(hnswlib, filename) {
|
|
217
|
+
let data;
|
|
218
|
+
try {
|
|
219
|
+
const content = await import_promises.default.readFile(filename, "utf8");
|
|
220
|
+
data = JSON.parse(content);
|
|
221
|
+
} catch (error) {
|
|
222
|
+
if (error instanceof SyntaxError) {
|
|
223
|
+
throw new Error(`Invalid JSON file: ${error.message}`);
|
|
224
|
+
}
|
|
225
|
+
throw new Error(`Failed to read file: ${error.message}`);
|
|
226
|
+
}
|
|
227
|
+
if (!data.vectors || !Array.isArray(data.vectors)) {
|
|
228
|
+
throw new Error("Invalid index file format: missing or invalid vectors array");
|
|
229
|
+
}
|
|
230
|
+
if (typeof data.numDimensions !== "number" || data.numDimensions <= 0) {
|
|
231
|
+
throw new Error("Invalid index file format: invalid numDimensions");
|
|
232
|
+
}
|
|
233
|
+
if (!data.spaceName || !["l2", "ip", "cosine"].includes(data.spaceName)) {
|
|
234
|
+
throw new Error(`Invalid spaceName: ${data.spaceName}. Must be 'l2', 'ip', or 'cosine'`);
|
|
235
|
+
}
|
|
236
|
+
try {
|
|
237
|
+
const index = new hnswlib.HierarchicalNSW(data.spaceName, data.numDimensions, "");
|
|
238
|
+
index.initIndex(
|
|
239
|
+
data.maxElements || 100,
|
|
240
|
+
data.m || 16,
|
|
241
|
+
data.efConstruction || 200,
|
|
242
|
+
data.randomSeed || 100
|
|
243
|
+
);
|
|
244
|
+
for (const { label, point } of data.vectors) {
|
|
245
|
+
if (typeof label !== "number") {
|
|
246
|
+
throw new Error(`Invalid label: ${label}. Must be a number`);
|
|
247
|
+
}
|
|
248
|
+
if (!Array.isArray(point) || point.length !== data.numDimensions) {
|
|
249
|
+
throw new Error(`Invalid vector: expected array of length ${data.numDimensions}`);
|
|
250
|
+
}
|
|
251
|
+
index.addPoint(point, label, false);
|
|
252
|
+
}
|
|
253
|
+
logger.log(`Index loaded from ${filename} (JSON format, ${data.vectors.length} vectors)`);
|
|
254
|
+
return {
|
|
255
|
+
index,
|
|
256
|
+
metadata: {
|
|
257
|
+
spaceName: data.spaceName,
|
|
258
|
+
numDimensions: data.numDimensions,
|
|
259
|
+
maxElements: data.maxElements || 100,
|
|
260
|
+
m: data.m || 16,
|
|
261
|
+
efConstruction: data.efConstruction || 200,
|
|
262
|
+
randomSeed: data.randomSeed || 100
|
|
263
|
+
}
|
|
264
|
+
};
|
|
265
|
+
} catch (error) {
|
|
266
|
+
throw new Error(`Failed to recreate index: ${error.message}`);
|
|
267
|
+
}
|
|
268
|
+
}
|
|
269
|
+
async function loadIndexBinary(hnswlib, filename) {
|
|
270
|
+
let buffer;
|
|
271
|
+
try {
|
|
272
|
+
buffer = await import_promises.default.readFile(filename);
|
|
273
|
+
} catch (error) {
|
|
274
|
+
throw new Error(`Failed to read file: ${error.message}`);
|
|
275
|
+
}
|
|
276
|
+
const minSize = 40;
|
|
277
|
+
if (buffer.length < minSize) {
|
|
278
|
+
throw new Error(`File too small to be a valid index file (${buffer.length} bytes, minimum ${minSize} bytes)`);
|
|
279
|
+
}
|
|
280
|
+
let offset = 0;
|
|
281
|
+
const version = buffer.readUInt8(offset);
|
|
282
|
+
offset += 1;
|
|
283
|
+
if (version !== 1) {
|
|
284
|
+
throw new Error(`Unsupported binary format version: ${version}. Expected version 1`);
|
|
285
|
+
}
|
|
286
|
+
if (offset >= buffer.length) throw new Error("Unexpected end of file while reading header");
|
|
287
|
+
const spaceNameCode = buffer.readUInt8(offset);
|
|
288
|
+
offset += 1;
|
|
289
|
+
const spaceNameMap = ["l2", "ip", "cosine"];
|
|
290
|
+
const spaceName = spaceNameMap[spaceNameCode] || "l2";
|
|
291
|
+
if (offset + 24 > buffer.length) throw new Error("Unexpected end of file while reading header");
|
|
292
|
+
const numDimensions = buffer.readUInt32LE(offset);
|
|
293
|
+
offset += 4;
|
|
294
|
+
const maxElements = buffer.readUInt32LE(offset);
|
|
295
|
+
offset += 4;
|
|
296
|
+
const m = buffer.readUInt32LE(offset);
|
|
297
|
+
offset += 4;
|
|
298
|
+
const efConstruction = buffer.readUInt32LE(offset);
|
|
299
|
+
offset += 4;
|
|
300
|
+
const randomSeed = buffer.readUInt32LE(offset);
|
|
301
|
+
offset += 4;
|
|
302
|
+
const numVectors = buffer.readUInt32LE(offset);
|
|
303
|
+
offset += 4;
|
|
304
|
+
offset += 14;
|
|
305
|
+
if (numDimensions <= 0 || numDimensions > 1e5) {
|
|
306
|
+
throw new Error(`Invalid numDimensions: ${numDimensions}`);
|
|
307
|
+
}
|
|
308
|
+
if (numVectors < 0 || numVectors > 1e8) {
|
|
309
|
+
throw new Error(`Invalid numVectors: ${numVectors}`);
|
|
310
|
+
}
|
|
311
|
+
const vectorSize = 4 + numDimensions * 4;
|
|
312
|
+
const expectedSize = 40 + numVectors * vectorSize;
|
|
313
|
+
if (buffer.length < expectedSize) {
|
|
314
|
+
throw new Error(`File size mismatch: expected ${expectedSize} bytes, got ${buffer.length} bytes`);
|
|
315
|
+
}
|
|
316
|
+
try {
|
|
317
|
+
const index = new hnswlib.HierarchicalNSW(spaceName, numDimensions, "");
|
|
318
|
+
index.initIndex(maxElements, m, efConstruction, randomSeed);
|
|
319
|
+
for (let i = 0; i < numVectors; i++) {
|
|
320
|
+
if (offset + 4 > buffer.length) {
|
|
321
|
+
throw new Error(`Unexpected end of file while reading vector ${i}`);
|
|
322
|
+
}
|
|
323
|
+
const label = buffer.readUInt32LE(offset);
|
|
324
|
+
offset += 4;
|
|
325
|
+
const point = [];
|
|
326
|
+
for (let j = 0; j < numDimensions; j++) {
|
|
327
|
+
if (offset + 4 > buffer.length) {
|
|
328
|
+
throw new Error(`Unexpected end of file while reading vector ${i}, dimension ${j}`);
|
|
329
|
+
}
|
|
330
|
+
point.push(buffer.readFloatLE(offset));
|
|
331
|
+
offset += 4;
|
|
332
|
+
}
|
|
333
|
+
index.addPoint(point, label, false);
|
|
334
|
+
}
|
|
335
|
+
logger.log(`Index loaded from ${filename} (binary format, ${numVectors} vectors)`);
|
|
336
|
+
return {
|
|
337
|
+
index,
|
|
338
|
+
metadata: {
|
|
339
|
+
spaceName,
|
|
340
|
+
numDimensions,
|
|
341
|
+
maxElements,
|
|
342
|
+
m,
|
|
343
|
+
efConstruction,
|
|
344
|
+
randomSeed
|
|
345
|
+
}
|
|
346
|
+
};
|
|
347
|
+
} catch (error) {
|
|
348
|
+
throw new Error(`Failed to recreate index: ${error.message}`);
|
|
349
|
+
}
|
|
350
|
+
}
|
|
351
|
+
// Annotate the CommonJS export names for ESM import in node:
|
|
352
|
+
0 && (module.exports = {
|
|
353
|
+
loadHnswlib,
|
|
354
|
+
loadIndexFromFile,
|
|
355
|
+
saveIndexToFile,
|
|
356
|
+
setLogger
|
|
357
|
+
});
|
package/dist/index.js
ADDED
|
@@ -0,0 +1,320 @@
|
|
|
1
|
+
// index.js
|
|
2
|
+
import fs from "fs/promises";
|
|
3
|
+
var defaultLogger = {
|
|
4
|
+
log: () => {
|
|
5
|
+
},
|
|
6
|
+
error: console.error
|
|
7
|
+
};
|
|
8
|
+
var logger = defaultLogger;
|
|
9
|
+
function setLogger(customLogger) {
|
|
10
|
+
logger = { ...defaultLogger, ...customLogger };
|
|
11
|
+
}
|
|
12
|
+
function setupNodeEnvironment() {
|
|
13
|
+
if (typeof indexedDB === "undefined") {
|
|
14
|
+
global.indexedDB = {
|
|
15
|
+
open: () => ({
|
|
16
|
+
onerror: null,
|
|
17
|
+
onsuccess: null,
|
|
18
|
+
onupgradeneeded: null,
|
|
19
|
+
result: {
|
|
20
|
+
createObjectStore: () => ({}),
|
|
21
|
+
objectStoreNames: { contains: () => false }
|
|
22
|
+
}
|
|
23
|
+
})
|
|
24
|
+
};
|
|
25
|
+
}
|
|
26
|
+
if (typeof window === "undefined") {
|
|
27
|
+
global.window = {};
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
async function loadHnswlib() {
|
|
31
|
+
try {
|
|
32
|
+
setupNodeEnvironment();
|
|
33
|
+
const { loadHnswlib: loadHnswlibOriginal } = await import("hnswlib-wasm/dist/hnswlib.js");
|
|
34
|
+
return await loadHnswlibOriginal();
|
|
35
|
+
} catch (error) {
|
|
36
|
+
logger.error("Failed to load hnswlib:", error);
|
|
37
|
+
throw new Error(`Failed to load hnswlib: ${error.message}`);
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
function extractVectorsFromIndex(index) {
|
|
41
|
+
try {
|
|
42
|
+
const usedLabels = index.getUsedLabels();
|
|
43
|
+
const vectors = [];
|
|
44
|
+
for (const label of usedLabels) {
|
|
45
|
+
const point = index.getPoint(label);
|
|
46
|
+
const vector = Array.isArray(point) ? point : Array.from(point);
|
|
47
|
+
vectors.push({ label, point: vector });
|
|
48
|
+
}
|
|
49
|
+
return vectors;
|
|
50
|
+
} catch (error) {
|
|
51
|
+
throw new Error(`Failed to extract vectors from index: ${error.message}`);
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
function validateIndex(index) {
|
|
55
|
+
if (!index) {
|
|
56
|
+
throw new Error("Index parameter is required");
|
|
57
|
+
}
|
|
58
|
+
if (typeof index.getNumDimensions !== "function" || typeof index.getCurrentCount !== "function" || typeof index.getUsedLabels !== "function" || typeof index.getPoint !== "function") {
|
|
59
|
+
throw new Error("Invalid index object: missing required methods");
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
function validateFilename(filename) {
|
|
63
|
+
if (!filename || typeof filename !== "string") {
|
|
64
|
+
throw new Error("Filename must be a non-empty string");
|
|
65
|
+
}
|
|
66
|
+
if (filename.trim().length === 0) {
|
|
67
|
+
throw new Error("Filename cannot be empty");
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
async function saveIndexToFile(index, filename, metadata = {}) {
|
|
71
|
+
validateIndex(index);
|
|
72
|
+
validateFilename(filename);
|
|
73
|
+
try {
|
|
74
|
+
const numDimensions = index.getNumDimensions();
|
|
75
|
+
const numVectors = index.getCurrentCount();
|
|
76
|
+
if (numVectors === 0) {
|
|
77
|
+
throw new Error("Cannot save empty index (no vectors added)");
|
|
78
|
+
}
|
|
79
|
+
const vectors = extractVectorsFromIndex(index);
|
|
80
|
+
const isBinary = filename.endsWith(".bin") || filename.endsWith(".dat");
|
|
81
|
+
if (isBinary) {
|
|
82
|
+
await saveIndexBinary(index, filename, metadata, numDimensions, vectors);
|
|
83
|
+
} else {
|
|
84
|
+
await saveIndexJSON(index, filename, metadata, numDimensions, vectors);
|
|
85
|
+
}
|
|
86
|
+
} catch (error) {
|
|
87
|
+
logger.error(`Failed to save index to ${filename}:`, error);
|
|
88
|
+
throw error;
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
async function saveIndexJSON(index, filename, metadata, numDimensions, vectors) {
|
|
92
|
+
const data = {
|
|
93
|
+
version: 1,
|
|
94
|
+
spaceName: metadata.spaceName || "l2",
|
|
95
|
+
numDimensions,
|
|
96
|
+
maxElements: metadata.maxElements || index.getMaxElements(),
|
|
97
|
+
m: metadata.m || 16,
|
|
98
|
+
efConstruction: metadata.efConstruction || 200,
|
|
99
|
+
randomSeed: metadata.randomSeed || 100,
|
|
100
|
+
numVectors: vectors.length,
|
|
101
|
+
vectors
|
|
102
|
+
};
|
|
103
|
+
try {
|
|
104
|
+
await fs.writeFile(filename, JSON.stringify(data, null, 2), "utf8");
|
|
105
|
+
logger.log(`Index saved to ${filename} (JSON format, ${vectors.length} vectors)`);
|
|
106
|
+
} catch (error) {
|
|
107
|
+
throw new Error(`Failed to write JSON file: ${error.message}`);
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
async function saveIndexBinary(index, filename, metadata, numDimensions, vectors) {
|
|
111
|
+
const spaceNameMap = { "l2": 0, "ip": 1, "cosine": 2 };
|
|
112
|
+
const spaceNameCode = spaceNameMap[metadata.spaceName || "l2"] || 0;
|
|
113
|
+
const headerSize = 40;
|
|
114
|
+
const reservedSize = 14;
|
|
115
|
+
const vectorSize = 4 + numDimensions * 4;
|
|
116
|
+
const bufferSize = headerSize + vectors.length * vectorSize;
|
|
117
|
+
try {
|
|
118
|
+
const buffer = Buffer.allocUnsafe(bufferSize);
|
|
119
|
+
let offset = 0;
|
|
120
|
+
buffer.writeUInt8(1, offset);
|
|
121
|
+
offset += 1;
|
|
122
|
+
buffer.writeUInt8(spaceNameCode, offset);
|
|
123
|
+
offset += 1;
|
|
124
|
+
buffer.writeUInt32LE(numDimensions, offset);
|
|
125
|
+
offset += 4;
|
|
126
|
+
buffer.writeUInt32LE(metadata.maxElements || index.getMaxElements(), offset);
|
|
127
|
+
offset += 4;
|
|
128
|
+
buffer.writeUInt32LE(metadata.m || 16, offset);
|
|
129
|
+
offset += 4;
|
|
130
|
+
buffer.writeUInt32LE(metadata.efConstruction || 200, offset);
|
|
131
|
+
offset += 4;
|
|
132
|
+
buffer.writeUInt32LE(metadata.randomSeed || 100, offset);
|
|
133
|
+
offset += 4;
|
|
134
|
+
buffer.writeUInt32LE(vectors.length, offset);
|
|
135
|
+
offset += 4;
|
|
136
|
+
buffer.fill(0, offset, offset + reservedSize);
|
|
137
|
+
offset += reservedSize;
|
|
138
|
+
for (const { label, point } of vectors) {
|
|
139
|
+
if (point.length !== numDimensions) {
|
|
140
|
+
throw new Error(`Vector dimension mismatch: expected ${numDimensions}, got ${point.length}`);
|
|
141
|
+
}
|
|
142
|
+
buffer.writeUInt32LE(label, offset);
|
|
143
|
+
offset += 4;
|
|
144
|
+
for (const value of point) {
|
|
145
|
+
if (typeof value !== "number" || !isFinite(value)) {
|
|
146
|
+
throw new Error(`Invalid vector value: ${value}`);
|
|
147
|
+
}
|
|
148
|
+
buffer.writeFloatLE(value, offset);
|
|
149
|
+
offset += 4;
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
await fs.writeFile(filename, buffer);
|
|
153
|
+
logger.log(`Index saved to ${filename} (binary format, ${vectors.length} vectors, ${bufferSize} bytes)`);
|
|
154
|
+
} catch (error) {
|
|
155
|
+
throw new Error(`Failed to write binary file: ${error.message}`);
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
async function loadIndexFromFile(hnswlib, filename) {
|
|
159
|
+
validateFilename(filename);
|
|
160
|
+
if (!hnswlib || typeof hnswlib.HierarchicalNSW !== "function") {
|
|
161
|
+
throw new Error("Invalid hnswlib module: HierarchicalNSW constructor not found");
|
|
162
|
+
}
|
|
163
|
+
try {
|
|
164
|
+
await fs.access(filename);
|
|
165
|
+
} catch (error) {
|
|
166
|
+
throw new Error(`File not found: ${filename}`);
|
|
167
|
+
}
|
|
168
|
+
const isBinary = filename.endsWith(".bin") || filename.endsWith(".dat");
|
|
169
|
+
try {
|
|
170
|
+
if (isBinary) {
|
|
171
|
+
return await loadIndexBinary(hnswlib, filename);
|
|
172
|
+
} else {
|
|
173
|
+
return await loadIndexJSON(hnswlib, filename);
|
|
174
|
+
}
|
|
175
|
+
} catch (error) {
|
|
176
|
+
logger.error(`Failed to load index from ${filename}:`, error);
|
|
177
|
+
throw error;
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
async function loadIndexJSON(hnswlib, filename) {
|
|
181
|
+
let data;
|
|
182
|
+
try {
|
|
183
|
+
const content = await fs.readFile(filename, "utf8");
|
|
184
|
+
data = JSON.parse(content);
|
|
185
|
+
} catch (error) {
|
|
186
|
+
if (error instanceof SyntaxError) {
|
|
187
|
+
throw new Error(`Invalid JSON file: ${error.message}`);
|
|
188
|
+
}
|
|
189
|
+
throw new Error(`Failed to read file: ${error.message}`);
|
|
190
|
+
}
|
|
191
|
+
if (!data.vectors || !Array.isArray(data.vectors)) {
|
|
192
|
+
throw new Error("Invalid index file format: missing or invalid vectors array");
|
|
193
|
+
}
|
|
194
|
+
if (typeof data.numDimensions !== "number" || data.numDimensions <= 0) {
|
|
195
|
+
throw new Error("Invalid index file format: invalid numDimensions");
|
|
196
|
+
}
|
|
197
|
+
if (!data.spaceName || !["l2", "ip", "cosine"].includes(data.spaceName)) {
|
|
198
|
+
throw new Error(`Invalid spaceName: ${data.spaceName}. Must be 'l2', 'ip', or 'cosine'`);
|
|
199
|
+
}
|
|
200
|
+
try {
|
|
201
|
+
const index = new hnswlib.HierarchicalNSW(data.spaceName, data.numDimensions, "");
|
|
202
|
+
index.initIndex(
|
|
203
|
+
data.maxElements || 100,
|
|
204
|
+
data.m || 16,
|
|
205
|
+
data.efConstruction || 200,
|
|
206
|
+
data.randomSeed || 100
|
|
207
|
+
);
|
|
208
|
+
for (const { label, point } of data.vectors) {
|
|
209
|
+
if (typeof label !== "number") {
|
|
210
|
+
throw new Error(`Invalid label: ${label}. Must be a number`);
|
|
211
|
+
}
|
|
212
|
+
if (!Array.isArray(point) || point.length !== data.numDimensions) {
|
|
213
|
+
throw new Error(`Invalid vector: expected array of length ${data.numDimensions}`);
|
|
214
|
+
}
|
|
215
|
+
index.addPoint(point, label, false);
|
|
216
|
+
}
|
|
217
|
+
logger.log(`Index loaded from ${filename} (JSON format, ${data.vectors.length} vectors)`);
|
|
218
|
+
return {
|
|
219
|
+
index,
|
|
220
|
+
metadata: {
|
|
221
|
+
spaceName: data.spaceName,
|
|
222
|
+
numDimensions: data.numDimensions,
|
|
223
|
+
maxElements: data.maxElements || 100,
|
|
224
|
+
m: data.m || 16,
|
|
225
|
+
efConstruction: data.efConstruction || 200,
|
|
226
|
+
randomSeed: data.randomSeed || 100
|
|
227
|
+
}
|
|
228
|
+
};
|
|
229
|
+
} catch (error) {
|
|
230
|
+
throw new Error(`Failed to recreate index: ${error.message}`);
|
|
231
|
+
}
|
|
232
|
+
}
|
|
233
|
+
async function loadIndexBinary(hnswlib, filename) {
|
|
234
|
+
let buffer;
|
|
235
|
+
try {
|
|
236
|
+
buffer = await fs.readFile(filename);
|
|
237
|
+
} catch (error) {
|
|
238
|
+
throw new Error(`Failed to read file: ${error.message}`);
|
|
239
|
+
}
|
|
240
|
+
const minSize = 40;
|
|
241
|
+
if (buffer.length < minSize) {
|
|
242
|
+
throw new Error(`File too small to be a valid index file (${buffer.length} bytes, minimum ${minSize} bytes)`);
|
|
243
|
+
}
|
|
244
|
+
let offset = 0;
|
|
245
|
+
const version = buffer.readUInt8(offset);
|
|
246
|
+
offset += 1;
|
|
247
|
+
if (version !== 1) {
|
|
248
|
+
throw new Error(`Unsupported binary format version: ${version}. Expected version 1`);
|
|
249
|
+
}
|
|
250
|
+
if (offset >= buffer.length) throw new Error("Unexpected end of file while reading header");
|
|
251
|
+
const spaceNameCode = buffer.readUInt8(offset);
|
|
252
|
+
offset += 1;
|
|
253
|
+
const spaceNameMap = ["l2", "ip", "cosine"];
|
|
254
|
+
const spaceName = spaceNameMap[spaceNameCode] || "l2";
|
|
255
|
+
if (offset + 24 > buffer.length) throw new Error("Unexpected end of file while reading header");
|
|
256
|
+
const numDimensions = buffer.readUInt32LE(offset);
|
|
257
|
+
offset += 4;
|
|
258
|
+
const maxElements = buffer.readUInt32LE(offset);
|
|
259
|
+
offset += 4;
|
|
260
|
+
const m = buffer.readUInt32LE(offset);
|
|
261
|
+
offset += 4;
|
|
262
|
+
const efConstruction = buffer.readUInt32LE(offset);
|
|
263
|
+
offset += 4;
|
|
264
|
+
const randomSeed = buffer.readUInt32LE(offset);
|
|
265
|
+
offset += 4;
|
|
266
|
+
const numVectors = buffer.readUInt32LE(offset);
|
|
267
|
+
offset += 4;
|
|
268
|
+
offset += 14;
|
|
269
|
+
if (numDimensions <= 0 || numDimensions > 1e5) {
|
|
270
|
+
throw new Error(`Invalid numDimensions: ${numDimensions}`);
|
|
271
|
+
}
|
|
272
|
+
if (numVectors < 0 || numVectors > 1e8) {
|
|
273
|
+
throw new Error(`Invalid numVectors: ${numVectors}`);
|
|
274
|
+
}
|
|
275
|
+
const vectorSize = 4 + numDimensions * 4;
|
|
276
|
+
const expectedSize = 40 + numVectors * vectorSize;
|
|
277
|
+
if (buffer.length < expectedSize) {
|
|
278
|
+
throw new Error(`File size mismatch: expected ${expectedSize} bytes, got ${buffer.length} bytes`);
|
|
279
|
+
}
|
|
280
|
+
try {
|
|
281
|
+
const index = new hnswlib.HierarchicalNSW(spaceName, numDimensions, "");
|
|
282
|
+
index.initIndex(maxElements, m, efConstruction, randomSeed);
|
|
283
|
+
for (let i = 0; i < numVectors; i++) {
|
|
284
|
+
if (offset + 4 > buffer.length) {
|
|
285
|
+
throw new Error(`Unexpected end of file while reading vector ${i}`);
|
|
286
|
+
}
|
|
287
|
+
const label = buffer.readUInt32LE(offset);
|
|
288
|
+
offset += 4;
|
|
289
|
+
const point = [];
|
|
290
|
+
for (let j = 0; j < numDimensions; j++) {
|
|
291
|
+
if (offset + 4 > buffer.length) {
|
|
292
|
+
throw new Error(`Unexpected end of file while reading vector ${i}, dimension ${j}`);
|
|
293
|
+
}
|
|
294
|
+
point.push(buffer.readFloatLE(offset));
|
|
295
|
+
offset += 4;
|
|
296
|
+
}
|
|
297
|
+
index.addPoint(point, label, false);
|
|
298
|
+
}
|
|
299
|
+
logger.log(`Index loaded from ${filename} (binary format, ${numVectors} vectors)`);
|
|
300
|
+
return {
|
|
301
|
+
index,
|
|
302
|
+
metadata: {
|
|
303
|
+
spaceName,
|
|
304
|
+
numDimensions,
|
|
305
|
+
maxElements,
|
|
306
|
+
m,
|
|
307
|
+
efConstruction,
|
|
308
|
+
randomSeed
|
|
309
|
+
}
|
|
310
|
+
};
|
|
311
|
+
} catch (error) {
|
|
312
|
+
throw new Error(`Failed to recreate index: ${error.message}`);
|
|
313
|
+
}
|
|
314
|
+
}
|
|
315
|
+
export {
|
|
316
|
+
loadHnswlib,
|
|
317
|
+
loadIndexFromFile,
|
|
318
|
+
saveIndexToFile,
|
|
319
|
+
setLogger
|
|
320
|
+
};
|
package/package.json
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "hnswlib-wasm-node",
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"description": "Persistence layer for hnswlib-wasm with JSON and binary format support for Node.js",
|
|
5
|
+
"main": "./dist/index.cjs",
|
|
6
|
+
"type": "module",
|
|
7
|
+
"exports": {
|
|
8
|
+
".": {
|
|
9
|
+
"import": "./dist/index.js",
|
|
10
|
+
"require": "./dist/index.cjs"
|
|
11
|
+
}
|
|
12
|
+
},
|
|
13
|
+
"files": [
|
|
14
|
+
"dist"
|
|
15
|
+
],
|
|
16
|
+
"keywords": [
|
|
17
|
+
"hnswlib",
|
|
18
|
+
"hnswlib-wasm",
|
|
19
|
+
"vector-search",
|
|
20
|
+
"similarity-search",
|
|
21
|
+
"persistence",
|
|
22
|
+
"index",
|
|
23
|
+
"nearest-neighbor",
|
|
24
|
+
"nodejs"
|
|
25
|
+
],
|
|
26
|
+
"author": "",
|
|
27
|
+
"license": "MIT",
|
|
28
|
+
"repository": {
|
|
29
|
+
"type": "git",
|
|
30
|
+
"url": ""
|
|
31
|
+
},
|
|
32
|
+
"bugs": {
|
|
33
|
+
"url": ""
|
|
34
|
+
},
|
|
35
|
+
"homepage": "",
|
|
36
|
+
"peerDependencies": {
|
|
37
|
+
"hnswlib-wasm": "^0.8.2"
|
|
38
|
+
},
|
|
39
|
+
"engines": {
|
|
40
|
+
"node": ">=18.0.0"
|
|
41
|
+
},
|
|
42
|
+
"scripts": {
|
|
43
|
+
"build": "tsup",
|
|
44
|
+
"prepublishOnly": "npm run build",
|
|
45
|
+
"test": "echo \"Error: no test specified\" && exit 1"
|
|
46
|
+
},
|
|
47
|
+
"devDependencies": {
|
|
48
|
+
"tsup": "^8.5.1",
|
|
49
|
+
"typescript": "^5.9.3"
|
|
50
|
+
}
|
|
51
|
+
}
|