@faiss-node/native 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json ADDED
@@ -0,0 +1,69 @@
1
+ {
2
+ "name": "@faiss-node/native",
3
+ "version": "0.1.4",
4
+ "description": "High-performance Node.js native bindings for Facebook FAISS - the fastest vector similarity search library. Supports FLAT_L2, IVF_FLAT, and HNSW index types with async operations, persistence, and batch search.",
5
+ "main": "src/js/index.js",
6
+ "types": "src/js/types.d.ts",
7
+ "repository": {
8
+ "type": "git",
9
+ "url": "https://github.com/anupammaurya6767/faiss-node-native.git"
10
+ },
11
+ "homepage": "https://github.com/anupammaurya6767/faiss-node-native#readme",
12
+ "bugs": {
13
+ "url": "https://github.com/anupammaurya6767/faiss-node-native/issues"
14
+ },
15
+ "author": "Anupam Maurya <anupammaurya6767@gmail.com>",
16
+ "license": "MIT",
17
+ "keywords": [
18
+ "faiss",
19
+ "vector",
20
+ "similarity",
21
+ "search",
22
+ "embedding",
23
+ "nearest-neighbor",
24
+ "ann",
25
+ "approximate-nearest-neighbor",
26
+ "machine-learning",
27
+ "ai",
28
+ "semantic-search",
29
+ "rag",
30
+ "vector-database",
31
+ "hnsw",
32
+ "ivf",
33
+ "native",
34
+ "performance"
35
+ ],
36
+ "scripts": {
37
+ "build": "node-gyp rebuild",
38
+ "clean": "node-gyp clean",
39
+ "test": "jest",
40
+ "test:ci": "jest --ci --coverage --config=jest.ci.config.js --testPathIgnorePatterns=manual",
41
+ "test:unit": "jest test/unit",
42
+ "test:integration": "jest test/integration",
43
+ "test:watch": "jest --watch",
44
+ "test:memory": "node --expose-gc test/memory.test.js",
45
+ "test:local": "npm run clean && npm run build && npm test",
46
+ "ci:status": "bash scripts/check-ci-status.sh",
47
+ "docs": "npm run docs:js && npm run docs:cpp",
48
+ "docs:js": "typedoc",
49
+ "docs:cpp": "doxygen Doxyfile",
50
+ "docs:watch": "npm run docs:js -- --watch",
51
+ "docs:serve": "cd docs/api && python3 -m http.server 8000",
52
+ "docs:serve:cpp": "cd docs/html && python3 -m http.server 8001",
53
+ "docs:auto": "npm run docs && npm run docs:serve",
54
+ "prepublishOnly": "npm run docs",
55
+ "publish:local": "bash -c 'export NODE_AUTH_TOKEN=$(grep NPM_TOKEN .env | cut -d \\\"=\\\" -f2) && npm config set //registry.npmjs.org/:_authToken $NODE_AUTH_TOKEN && npm publish --access public'",
56
+ "publish:dry-run": "bash -c 'export NODE_AUTH_TOKEN=$(grep NPM_TOKEN .env | cut -d \\\"=\\\" -f2) && npm config set //registry.npmjs.org/:_authToken $NODE_AUTH_TOKEN && npm publish --access public --dry-run'"
57
+ },
58
+ "engines": {
59
+ "node": ">=18.0.0"
60
+ },
61
+ "devDependencies": {
62
+ "express": "^4.22.1",
63
+ "jest": "^29.7.0",
64
+ "typedoc": "^0.25.0"
65
+ },
66
+ "dependencies": {
67
+ "node-addon-api": "^7.0.0"
68
+ }
69
+ }
@@ -0,0 +1,300 @@
1
+ // Include FAISS headers FIRST, before our header
2
+ // This ensures all FAISS types are properly defined
3
+ #include <faiss/impl/FaissAssert.h>
4
+ #include <faiss/MetricType.h>
5
+ #include <faiss/Index.h>
6
+ #include <faiss/IndexFlat.h>
7
+ #include <faiss/IndexIVF.h>
8
+ #include <faiss/index_factory.h>
9
+ #include <faiss/index_io.h>
10
+ #include <faiss/impl/io.h>
11
+ #include <fstream>
12
+ #include <sstream>
13
+ #include <cstdio>
14
+ #include <cstring>
15
+
16
+ // Now include our header
17
+ #include "faiss_index.h"
18
+ #include <stdexcept>
19
+
20
+ FaissIndexWrapper::FaissIndexWrapper(int dims, const std::string& indexDescription, int metric)
21
+ : dims_(dims), disposed_(false) {
22
+ if (dims <= 0) {
23
+ throw std::invalid_argument("Dimensions must be positive");
24
+ }
25
+
26
+ // Create index using index_factory
27
+ // Examples: "Flat" -> IndexFlatL2, "IVF100,Flat" -> IndexIVFFlat, "HNSW32" -> IndexHNSW
28
+ faiss::MetricType metricType = static_cast<faiss::MetricType>(metric);
29
+ index_ = std::unique_ptr<faiss::Index>(faiss::index_factory(dims, indexDescription.c_str(), metricType));
30
+ }
31
+
32
+ FaissIndexWrapper::FaissIndexWrapper(int dims)
33
+ : FaissIndexWrapper(dims, "Flat", 1) { // Default to IndexFlatL2 with L2 metric
34
+ }
35
+
36
+ FaissIndexWrapper::~FaissIndexWrapper() {
37
+ if (!disposed_) {
38
+ Dispose();
39
+ }
40
+ }
41
+
42
+ void FaissIndexWrapper::Add(const float* vectors, size_t n) {
43
+ std::lock_guard<std::mutex> lock(mutex_);
44
+
45
+ if (disposed_) {
46
+ throw std::runtime_error("Index has been disposed");
47
+ }
48
+
49
+ if (vectors == nullptr) {
50
+ throw std::invalid_argument("Vectors pointer cannot be null");
51
+ }
52
+
53
+ if (n == 0) {
54
+ return; // Nothing to add
55
+ }
56
+
57
+ // FAISS expects vectors as a flat array: [v1[0..d-1], v2[0..d-1], ...]
58
+ // This matches how Float32Array is laid out in memory
59
+ index_->add(n, vectors);
60
+ }
61
+
62
+ void FaissIndexWrapper::Search(const float* query, int k, float* distances, int64_t* labels) const {
63
+ std::lock_guard<std::mutex> lock(mutex_);
64
+
65
+ if (disposed_) {
66
+ throw std::runtime_error("Index has been disposed");
67
+ }
68
+
69
+ if (query == nullptr) {
70
+ throw std::invalid_argument("Query pointer cannot be null");
71
+ }
72
+
73
+ if (distances == nullptr || labels == nullptr) {
74
+ throw std::invalid_argument("Output arrays cannot be null");
75
+ }
76
+
77
+ if (k <= 0) {
78
+ throw std::invalid_argument("k must be positive");
79
+ }
80
+
81
+ size_t ntotal = index_->ntotal;
82
+ if (ntotal == 0) {
83
+ throw std::runtime_error("Cannot search empty index");
84
+ }
85
+
86
+ // Clamp k to available vectors
87
+ int actual_k = (k > static_cast<int>(ntotal)) ? static_cast<int>(ntotal) : k;
88
+
89
+ // FAISS search: nq=1 (single query), k neighbors
90
+ // Cast labels to faiss::idx_t* for FAISS API
91
+ index_->search(1, query, actual_k, distances, reinterpret_cast<faiss::idx_t*>(labels));
92
+ }
93
+
94
+ void FaissIndexWrapper::SearchBatch(const float* queries, size_t nq, int k, float* distances, int64_t* labels) const {
95
+ std::lock_guard<std::mutex> lock(mutex_);
96
+
97
+ if (disposed_) {
98
+ throw std::runtime_error("Index has been disposed");
99
+ }
100
+
101
+ if (queries == nullptr) {
102
+ throw std::invalid_argument("Queries pointer cannot be null");
103
+ }
104
+
105
+ if (distances == nullptr || labels == nullptr) {
106
+ throw std::invalid_argument("Output arrays cannot be null");
107
+ }
108
+
109
+ if (nq == 0) {
110
+ throw std::invalid_argument("Number of queries must be positive");
111
+ }
112
+
113
+ if (k <= 0) {
114
+ throw std::invalid_argument("k must be positive");
115
+ }
116
+
117
+ size_t ntotal = index_->ntotal;
118
+ if (ntotal == 0) {
119
+ throw std::runtime_error("Cannot search empty index");
120
+ }
121
+
122
+ // Clamp k to available vectors
123
+ int actual_k = (k > static_cast<int>(ntotal)) ? static_cast<int>(ntotal) : k;
124
+
125
+ // FAISS batch search: nq queries, k neighbors per query
126
+ // Results are stored as: [q1_results, q2_results, ..., qn_results]
127
+ // Each query's results: [k distances, k labels]
128
+ // Cast labels to faiss::idx_t* for FAISS API
129
+ index_->search(nq, queries, actual_k, distances, reinterpret_cast<faiss::idx_t*>(labels));
130
+ }
131
+
132
+ size_t FaissIndexWrapper::GetTotalVectors() const {
133
+ std::lock_guard<std::mutex> lock(mutex_);
134
+ if (disposed_) {
135
+ return 0;
136
+ }
137
+ return index_->ntotal;
138
+ }
139
+
140
+ int FaissIndexWrapper::GetDimensions() const {
141
+ return dims_;
142
+ }
143
+
144
+ void FaissIndexWrapper::Train(const float* vectors, size_t n) {
145
+ std::lock_guard<std::mutex> lock(mutex_);
146
+
147
+ if (disposed_) {
148
+ throw std::runtime_error("Index has been disposed");
149
+ }
150
+
151
+ if (vectors == nullptr) {
152
+ throw std::invalid_argument("Vectors pointer cannot be null");
153
+ }
154
+
155
+ if (n == 0) {
156
+ throw std::invalid_argument("Number of training vectors must be positive");
157
+ }
158
+
159
+ index_->train(n, vectors);
160
+ }
161
+
162
+ void FaissIndexWrapper::SetNprobe(int nprobe) {
163
+ std::lock_guard<std::mutex> lock(mutex_);
164
+
165
+ if (disposed_) {
166
+ throw std::runtime_error("Index has been disposed");
167
+ }
168
+
169
+ // Try to cast to IndexIVF to set nprobe
170
+ // This is safe even if not an IVF index (will just do nothing)
171
+ faiss::IndexIVF* ivf_index = dynamic_cast<faiss::IndexIVF*>(index_.get());
172
+ if (ivf_index) {
173
+ ivf_index->nprobe = nprobe;
174
+ }
175
+ }
176
+
177
+ bool FaissIndexWrapper::IsTrained() const {
178
+ std::lock_guard<std::mutex> lock(mutex_);
179
+ if (disposed_) {
180
+ return false;
181
+ }
182
+ return index_->is_trained;
183
+ }
184
+
185
+ void FaissIndexWrapper::Dispose() {
186
+ std::lock_guard<std::mutex> lock(mutex_);
187
+ if (disposed_) {
188
+ return;
189
+ }
190
+
191
+ disposed_ = true;
192
+ index_.reset();
193
+ }
194
+
195
+ void FaissIndexWrapper::Save(const std::string& filename) const {
196
+ std::lock_guard<std::mutex> lock(mutex_);
197
+ if (disposed_) {
198
+ throw std::runtime_error("Index has been disposed");
199
+ }
200
+
201
+ if (filename.empty()) {
202
+ throw std::invalid_argument("Filename cannot be empty");
203
+ }
204
+
205
+ try {
206
+ faiss::write_index(index_.get(), filename.c_str());
207
+ } catch (const std::exception& e) {
208
+ throw std::runtime_error(std::string("Failed to save index: ") + e.what());
209
+ }
210
+ }
211
+
212
+ std::unique_ptr<FaissIndexWrapper> FaissIndexWrapper::Load(const std::string& filename) {
213
+ if (filename.empty()) {
214
+ throw std::invalid_argument("Filename cannot be empty");
215
+ }
216
+
217
+ try {
218
+ faiss::Index* loaded_index = faiss::read_index(filename.c_str());
219
+
220
+ // Create wrapper with loaded index (supports any index type)
221
+ auto wrapper = std::make_unique<FaissIndexWrapper>(loaded_index->d);
222
+ wrapper->index_.reset(loaded_index);
223
+ wrapper->dims_ = loaded_index->d;
224
+
225
+ return wrapper;
226
+ } catch (const std::exception& e) {
227
+ throw std::runtime_error(std::string("Failed to load index: ") + e.what());
228
+ }
229
+ }
230
+
231
+ std::vector<uint8_t> FaissIndexWrapper::ToBuffer() const {
232
+ std::lock_guard<std::mutex> lock(mutex_);
233
+ if (disposed_) {
234
+ throw std::runtime_error("Index has been disposed");
235
+ }
236
+
237
+ try {
238
+ // Use FAISS VectorIOWriter for direct memory serialization (no temp files)
239
+ // This is the same approach used by ewfian/faiss-node
240
+ faiss::VectorIOWriter writer;
241
+ faiss::write_index(index_.get(), &writer);
242
+
243
+ // Return the buffer directly
244
+ return writer.data;
245
+ } catch (const std::exception& e) {
246
+ throw std::runtime_error(std::string("Failed to serialize index: ") + e.what());
247
+ }
248
+ }
249
+
250
+ std::unique_ptr<FaissIndexWrapper> FaissIndexWrapper::FromBuffer(const uint8_t* data, size_t length) {
251
+ if (data == nullptr || length == 0) {
252
+ throw std::invalid_argument("Invalid buffer data");
253
+ }
254
+
255
+ try {
256
+ // Use FAISS VectorIOReader for direct memory deserialization (no temp files)
257
+ // This is the same approach used by ewfian/faiss-node
258
+ faiss::VectorIOReader reader;
259
+ reader.data.assign(data, data + length);
260
+
261
+ faiss::Index* loaded_index = faiss::read_index(&reader);
262
+
263
+ // Create wrapper with loaded index (supports any index type)
264
+ auto wrapper = std::make_unique<FaissIndexWrapper>(loaded_index->d);
265
+ wrapper->index_.reset(loaded_index);
266
+ wrapper->dims_ = loaded_index->d;
267
+
268
+ return wrapper;
269
+ } catch (const std::exception& e) {
270
+ throw std::runtime_error(std::string("Failed to deserialize index: ") + e.what());
271
+ }
272
+ }
273
+
274
+ void FaissIndexWrapper::MergeFrom(const FaissIndexWrapper& other) {
275
+ // Lock both mutexes to prevent deadlock (always lock in same order)
276
+ // We'll use a simple approach: lock this first, then other
277
+ // Note: This could deadlock if two threads merge in opposite directions
278
+ // In practice, this is unlikely, but we should document it
279
+ std::lock_guard<std::mutex> lock1(mutex_);
280
+ std::lock_guard<std::mutex> lock2(other.mutex_);
281
+
282
+ if (disposed_) {
283
+ throw std::runtime_error("Index has been disposed");
284
+ }
285
+
286
+ if (other.disposed_) {
287
+ throw std::runtime_error("Cannot merge from disposed index");
288
+ }
289
+
290
+ if (other.dims_ != dims_) {
291
+ throw std::invalid_argument("Merging index must have the same dimensions");
292
+ }
293
+
294
+ try {
295
+ // FAISS merge_from copies all vectors from other index
296
+ index_->merge_from(*(other.index_));
297
+ } catch (const std::exception& e) {
298
+ throw std::runtime_error(std::string("Failed to merge index: ") + e.what());
299
+ }
300
+ }
@@ -0,0 +1,99 @@
1
+ #ifndef FAISS_NODE_INDEX_H
2
+ #define FAISS_NODE_INDEX_H
3
+
4
+ #include <memory>
5
+ #include <cstdint>
6
+ #include <string>
7
+ #include <vector>
8
+ #include <mutex>
9
+
10
+ // Forward declarations - full definitions in .cpp file
11
+ namespace faiss {
12
+ struct Index; // FAISS defines Index as struct, not class
13
+ using idx_t = int64_t; // idx_t is int64_t (defined in MetricType.h)
14
+ }
15
+
16
+ /**
17
+ * Wrapper class for FAISS index that manages memory and provides
18
+ * a clean interface for N-API bindings.
19
+ */
20
+ class FaissIndexWrapper {
21
+ public:
22
+ // Constructor: creates index using index_factory string
23
+ // Examples: "Flat" for IndexFlatL2, "IVF100,Flat" for IndexIVFFlat, "HNSW32" for IndexHNSW
24
+ FaissIndexWrapper(int dims, const std::string& indexDescription, int metric = 1);
25
+
26
+ // Constructor: creates IndexFlatL2 (for backward compatibility)
27
+ explicit FaissIndexWrapper(int dims);
28
+
29
+ // Destructor: automatic cleanup via RAII
30
+ ~FaissIndexWrapper();
31
+
32
+ // Prevent copying (we own the FAISS index)
33
+ FaissIndexWrapper(const FaissIndexWrapper&) = delete;
34
+ FaissIndexWrapper& operator=(const FaissIndexWrapper&) = delete;
35
+
36
+ // Add vectors to the index
37
+ // vectors: pointer to float array (n * dims elements)
38
+ // n: number of vectors
39
+ void Add(const float* vectors, size_t n);
40
+
41
+ // Search for k nearest neighbors (single query)
42
+ // query: pointer to query vector (dims elements)
43
+ // k: number of neighbors to return
44
+ // distances: output array (k elements) - caller must allocate
45
+ // labels: output array (k elements) - caller must allocate
46
+ void Search(const float* query, int k, float* distances, int64_t* labels) const;
47
+
48
+ // Batch search for k nearest neighbors (multiple queries)
49
+ // queries: pointer to query vectors (nq * dims elements)
50
+ // nq: number of queries
51
+ // k: number of neighbors to return per query
52
+ // distances: output array (nq * k elements) - caller must allocate
53
+ // labels: output array (nq * k elements) - caller must allocate
54
+ void SearchBatch(const float* queries, size_t nq, int k, float* distances, int64_t* labels) const;
55
+
56
+ // Train the index (required for IVF indexes)
57
+ void Train(const float* vectors, size_t n);
58
+
59
+ // Get index statistics
60
+ size_t GetTotalVectors() const;
61
+ int GetDimensions() const;
62
+ bool IsTrained() const;
63
+
64
+ // Set nprobe for IVF indexes
65
+ void SetNprobe(int nprobe);
66
+
67
+ // Dispose: explicitly free resources
68
+ void Dispose();
69
+
70
+ // Check if disposed (thread-safe)
71
+ bool IsDisposed() const {
72
+ std::lock_guard<std::mutex> lock(mutex_);
73
+ return disposed_;
74
+ }
75
+
76
+ // Save index to file
77
+ void Save(const std::string& filename) const;
78
+
79
+ // Load index from file (static factory method)
80
+ static std::unique_ptr<FaissIndexWrapper> Load(const std::string& filename);
81
+
82
+ // Serialize index to buffer
83
+ std::vector<uint8_t> ToBuffer() const;
84
+
85
+ // Deserialize index from buffer (static factory method)
86
+ static std::unique_ptr<FaissIndexWrapper> FromBuffer(const uint8_t* data, size_t length);
87
+
88
+ // Merge vectors from another index
89
+ // other: reference to another FaissIndexWrapper
90
+ void MergeFrom(const FaissIndexWrapper& other);
91
+
92
+ private:
93
+ std::unique_ptr<faiss::Index> index_; // Base Index pointer (can hold any index type)
94
+ int dims_;
95
+ bool disposed_;
96
+ mutable std::mutex mutex_; // Protect concurrent access
97
+ };
98
+
99
+ #endif // FAISS_NODE_INDEX_H