native-vector-store 0.1.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +242 -12
- package/binding.gyp +22 -10
- package/deps/simdjson/simdjson.cpp +56403 -0
- package/deps/simdjson/simdjson.h +123534 -0
- package/docs/PERFORMANCE_CASE_STUDY.md +130 -0
- package/docs/PREBUILDS.md +69 -0
- package/docs/VectorStore.html +180 -0
- package/docs/VectorStoreWrapper.html +1356 -0
- package/docs/fonts/OpenSans-Bold-webfont.eot +0 -0
- package/docs/fonts/OpenSans-Bold-webfont.svg +1830 -0
- package/docs/fonts/OpenSans-Bold-webfont.woff +0 -0
- package/docs/fonts/OpenSans-BoldItalic-webfont.eot +0 -0
- package/docs/fonts/OpenSans-BoldItalic-webfont.svg +1830 -0
- package/docs/fonts/OpenSans-BoldItalic-webfont.woff +0 -0
- package/docs/fonts/OpenSans-Italic-webfont.eot +0 -0
- package/docs/fonts/OpenSans-Italic-webfont.svg +1830 -0
- package/docs/fonts/OpenSans-Italic-webfont.woff +0 -0
- package/docs/fonts/OpenSans-Light-webfont.eot +0 -0
- package/docs/fonts/OpenSans-Light-webfont.svg +1831 -0
- package/docs/fonts/OpenSans-Light-webfont.woff +0 -0
- package/docs/fonts/OpenSans-LightItalic-webfont.eot +0 -0
- package/docs/fonts/OpenSans-LightItalic-webfont.svg +1835 -0
- package/docs/fonts/OpenSans-LightItalic-webfont.woff +0 -0
- package/docs/fonts/OpenSans-Regular-webfont.eot +0 -0
- package/docs/fonts/OpenSans-Regular-webfont.svg +1831 -0
- package/docs/fonts/OpenSans-Regular-webfont.woff +0 -0
- package/docs/global.html +561 -0
- package/docs/index.html +570 -0
- package/docs/scripts/linenumber.js +25 -0
- package/docs/scripts/prettify/Apache-License-2.0.txt +202 -0
- package/docs/scripts/prettify/lang-css.js +2 -0
- package/docs/scripts/prettify/prettify.js +28 -0
- package/docs/styles/jsdoc-default.css +358 -0
- package/docs/styles/prettify-jsdoc.css +111 -0
- package/docs/styles/prettify-tomorrow.css +132 -0
- package/index.js +162 -0
- package/package.json +30 -7
- package/prebuilds/darwin-arm64/native-vector-store.node +0 -0
- package/prebuilds/darwin-x64/native-vector-store.node +0 -0
- package/prebuilds/linux-arm64/native-vector-store.node +0 -0
- package/prebuilds/linux-x64/native-vector-store.node +0 -0
- package/prebuilds/linux-x64-musl/napi-v9/native-vector-store.node +0 -0
- package/prebuilds/linux-x64-musl/native-vector-store.node +0 -0
- package/prebuilds/win32-x64/native-vector-store.node +0 -0
- package/src/Makefile +87 -0
- package/src/test_main.cpp +173 -0
- package/src/test_stress.cpp +394 -0
- package/src/vector_store.cpp +344 -0
- package/src/vector_store.h +21 -323
- package/native-vector-store-0.1.0.tgz +0 -0
- package/scripts/build-prebuilds.sh +0 -23
- /package/{src → deps/atomic_queue}/atomic_queue.h +0 -0
- /package/{src → deps/atomic_queue}/defs.h +0 -0
package/src/vector_store.h
CHANGED
@@ -26,74 +26,9 @@ class ArenaAllocator {
|
|
26
26
|
std::mutex chunk_creation_mutex_;
|
27
27
|
|
28
28
|
public:
|
29
|
-
ArenaAllocator()
|
30
|
-
|
31
|
-
|
32
|
-
void* allocate(size_t size, size_t align = 64) {
|
33
|
-
// Validate alignment is power of 2 and reasonable
|
34
|
-
assert(align > 0 && (align & (align - 1)) == 0);
|
35
|
-
if (align > 4096) {
|
36
|
-
return nullptr; // Alignment too large
|
37
|
-
}
|
38
|
-
|
39
|
-
// Validate size
|
40
|
-
if (size > CHUNK_SIZE) {
|
41
|
-
return nullptr; // Cannot allocate larger than chunk size
|
42
|
-
}
|
43
|
-
|
44
|
-
Chunk* chunk = current_.load(std::memory_order_acquire);
|
45
|
-
while (true) {
|
46
|
-
size_t old_offset = chunk->offset.load(std::memory_order_relaxed);
|
47
|
-
|
48
|
-
// Calculate the pointer that would result from current offset
|
49
|
-
void* ptr = chunk->data + old_offset;
|
50
|
-
|
51
|
-
// Calculate how much padding we need for alignment
|
52
|
-
size_t misalignment = (uintptr_t)ptr & (align - 1);
|
53
|
-
size_t padding = misalignment ? (align - misalignment) : 0;
|
54
|
-
|
55
|
-
size_t aligned_offset = old_offset + padding;
|
56
|
-
size_t new_offset = aligned_offset + size;
|
57
|
-
|
58
|
-
if (new_offset > CHUNK_SIZE) {
|
59
|
-
// Need new chunk
|
60
|
-
Chunk* next = chunk->next.load(std::memory_order_acquire);
|
61
|
-
if (!next) {
|
62
|
-
// Lock to prevent multiple threads creating chunks
|
63
|
-
std::lock_guard<std::mutex> lock(chunk_creation_mutex_);
|
64
|
-
// Double-check after acquiring lock
|
65
|
-
next = chunk->next.load(std::memory_order_acquire);
|
66
|
-
if (!next) {
|
67
|
-
auto new_chunk = std::make_unique<Chunk>();
|
68
|
-
next = new_chunk.get();
|
69
|
-
chunk->next.store(next, std::memory_order_release);
|
70
|
-
// Transfer ownership after setting atomic pointer
|
71
|
-
new_chunk.release();
|
72
|
-
}
|
73
|
-
}
|
74
|
-
// Update current to the new chunk
|
75
|
-
current_.store(next, std::memory_order_release);
|
76
|
-
chunk = next;
|
77
|
-
continue;
|
78
|
-
}
|
79
|
-
|
80
|
-
if (chunk->offset.compare_exchange_weak(old_offset, new_offset,
|
81
|
-
std::memory_order_release,
|
82
|
-
std::memory_order_relaxed)) {
|
83
|
-
return chunk->data + aligned_offset;
|
84
|
-
}
|
85
|
-
}
|
86
|
-
}
|
87
|
-
|
88
|
-
~ArenaAllocator() {
|
89
|
-
// Clean up linked chunks
|
90
|
-
Chunk* chunk = head_->next.load(std::memory_order_acquire);
|
91
|
-
while (chunk) {
|
92
|
-
Chunk* next = chunk->next.load(std::memory_order_acquire);
|
93
|
-
delete chunk;
|
94
|
-
chunk = next;
|
95
|
-
}
|
96
|
-
}
|
29
|
+
ArenaAllocator();
|
30
|
+
void* allocate(size_t size, size_t align = 64);
|
31
|
+
~ArenaAllocator();
|
97
32
|
};
|
98
33
|
|
99
34
|
struct Document {
|
@@ -107,9 +42,7 @@ struct TopK {
|
|
107
42
|
size_t k;
|
108
43
|
std::vector<std::pair<float, size_t>> heap; // min-heap by score
|
109
44
|
|
110
|
-
explicit TopK(size_t k = 0)
|
111
|
-
heap.reserve(k + 1); // Reserve k+1 to avoid reallocation during push
|
112
|
-
}
|
45
|
+
explicit TopK(size_t k = 0);
|
113
46
|
|
114
47
|
// Make TopK move-only to prevent copy-construction races
|
115
48
|
TopK(const TopK&) = delete;
|
@@ -117,46 +50,24 @@ struct TopK {
|
|
117
50
|
TopK(TopK&&) = default;
|
118
51
|
TopK& operator=(TopK&&) = default;
|
119
52
|
|
120
|
-
void push(float score, size_t idx)
|
121
|
-
if (heap.size() < k) {
|
122
|
-
heap.emplace_back(score, idx);
|
123
|
-
std::push_heap(heap.begin(), heap.end(), cmp);
|
124
|
-
} else if (k > 0 && score > heap.front().first) {
|
125
|
-
// Replace the minimum element
|
126
|
-
std::pop_heap(heap.begin(), heap.end(), cmp);
|
127
|
-
heap.back() = {score, idx};
|
128
|
-
std::push_heap(heap.begin(), heap.end(), cmp);
|
129
|
-
}
|
130
|
-
}
|
53
|
+
void push(float score, size_t idx);
|
131
54
|
|
132
55
|
// Comparator for min-heap (greater than for min-heap behavior)
|
133
|
-
static bool cmp(const std::pair<float, size_t>& a, const std::pair<float, size_t>& b)
|
134
|
-
return a.first > b.first;
|
135
|
-
}
|
56
|
+
static bool cmp(const std::pair<float, size_t>& a, const std::pair<float, size_t>& b);
|
136
57
|
|
137
|
-
void merge(const TopK& other)
|
138
|
-
// More efficient: if we have space, bulk insert then re-heapify
|
139
|
-
if (heap.size() + other.heap.size() <= k) {
|
140
|
-
heap.insert(heap.end(), other.heap.begin(), other.heap.end());
|
141
|
-
std::make_heap(heap.begin(), heap.end(), cmp);
|
142
|
-
} else {
|
143
|
-
// Otherwise, insert one by one
|
144
|
-
for (const auto& [score, idx] : other.heap) {
|
145
|
-
push(score, idx);
|
146
|
-
}
|
147
|
-
}
|
148
|
-
}
|
58
|
+
void merge(const TopK& other);
|
149
59
|
};
|
150
60
|
|
151
|
-
|
152
61
|
class VectorStore {
|
153
|
-
|
154
|
-
ArenaAllocator arena_;
|
155
|
-
|
62
|
+
public:
|
156
63
|
struct Entry {
|
157
64
|
Document doc;
|
158
65
|
float* embedding; // Extracted pointer for fast access
|
159
66
|
};
|
67
|
+
|
68
|
+
private:
|
69
|
+
const size_t dim_;
|
70
|
+
ArenaAllocator arena_;
|
160
71
|
|
161
72
|
std::vector<Entry> entries_;
|
162
73
|
std::atomic<size_t> count_{0}; // Atomic for parallel loading
|
@@ -164,238 +75,25 @@ class VectorStore {
|
|
164
75
|
mutable std::shared_mutex search_mutex_; // Protects against overlapping OpenMP teams
|
165
76
|
|
166
77
|
public:
|
167
|
-
explicit VectorStore(size_t dim)
|
168
|
-
entries_.resize(1'000'000); // Pre-size with default-constructed entries
|
169
|
-
}
|
78
|
+
explicit VectorStore(size_t dim);
|
170
79
|
|
171
80
|
// Overload for document type (used in test_main.cpp)
|
172
|
-
simdjson::error_code add_document(simdjson::ondemand::document& json_doc)
|
173
|
-
simdjson::ondemand::object obj;
|
174
|
-
auto error = json_doc.get_object().get(obj);
|
175
|
-
if (error) {
|
176
|
-
return error;
|
177
|
-
}
|
178
|
-
return add_document(obj);
|
179
|
-
}
|
81
|
+
simdjson::error_code add_document(simdjson::ondemand::document& json_doc);
|
180
82
|
|
181
|
-
simdjson::error_code add_document(simdjson::ondemand::object& json_doc)
|
182
|
-
// Cannot add documents after finalization
|
183
|
-
if (is_finalized_.load(std::memory_order_acquire)) {
|
184
|
-
return simdjson::INCORRECT_TYPE;
|
185
|
-
}
|
186
|
-
|
187
|
-
// Parse with error handling
|
188
|
-
std::string_view id, text;
|
189
|
-
auto error = json_doc["id"].get_string().get(id);
|
190
|
-
if (error) return error;
|
191
|
-
|
192
|
-
error = json_doc["text"].get_string().get(text);
|
193
|
-
if (error) return error;
|
194
|
-
|
195
|
-
// Calculate sizes
|
196
|
-
size_t emb_size = dim_ * sizeof(float);
|
197
|
-
size_t id_size = id.size() + 1;
|
198
|
-
size_t text_size = text.size() + 1;
|
199
|
-
|
200
|
-
// Allocate temporary buffer for embedding
|
201
|
-
std::vector<float> temp_embedding;
|
202
|
-
temp_embedding.reserve(dim_);
|
203
|
-
|
204
|
-
// Process metadata and embedding first
|
205
|
-
simdjson::ondemand::object metadata;
|
206
|
-
error = json_doc["metadata"].get_object().get(metadata);
|
207
|
-
if (error) return error;
|
208
|
-
|
209
|
-
simdjson::ondemand::array emb_array;
|
210
|
-
error = metadata["embedding"].get_array().get(emb_array);
|
211
|
-
if (error) return error;
|
212
|
-
|
213
|
-
// Consume the array before touching anything else
|
214
|
-
size_t i = 0;
|
215
|
-
for (auto value_result : emb_array) {
|
216
|
-
simdjson::ondemand::value v;
|
217
|
-
error = value_result.get(v);
|
218
|
-
if (error) return error;
|
219
|
-
double val;
|
220
|
-
error = v.get_double().get(val);
|
221
|
-
if (error) return error;
|
222
|
-
|
223
|
-
if (i >= dim_) {
|
224
|
-
return simdjson::CAPACITY; // Too many embedding values
|
225
|
-
}
|
226
|
-
temp_embedding.push_back(float(val));
|
227
|
-
i++;
|
228
|
-
}
|
229
|
-
|
230
|
-
// Verify we got the expected number of embedding values
|
231
|
-
if (i != dim_) {
|
232
|
-
return simdjson::INCORRECT_TYPE; // Wrong embedding dimension
|
233
|
-
}
|
234
|
-
|
235
|
-
// Now it is safe to take the raw metadata JSON
|
236
|
-
std::string_view raw_json;
|
237
|
-
error = metadata.raw_json().get(raw_json);
|
238
|
-
if (error) return error;
|
239
|
-
size_t meta_size = raw_json.size() + 1;
|
240
|
-
|
241
|
-
// Single arena allocation
|
242
|
-
char* base = (char*)arena_.allocate(emb_size + id_size + text_size + meta_size);
|
243
|
-
if (!base) {
|
244
|
-
return simdjson::MEMALLOC; // Allocation failed
|
245
|
-
}
|
246
|
-
|
247
|
-
// Layout: [embedding][id][text][metadata_json]
|
248
|
-
float* emb_ptr = (float*)base;
|
249
|
-
char* id_ptr = base + emb_size;
|
250
|
-
char* text_ptr = id_ptr + id_size;
|
251
|
-
char* meta_ptr = text_ptr + text_size;
|
252
|
-
|
253
|
-
// Copy embedding from temporary buffer
|
254
|
-
std::memcpy(emb_ptr, temp_embedding.data(), emb_size);
|
255
|
-
|
256
|
-
// Copy strings (adding null terminator)
|
257
|
-
std::memcpy(id_ptr, id.data(), id.size());
|
258
|
-
id_ptr[id.size()] = '\0';
|
259
|
-
|
260
|
-
std::memcpy(text_ptr, text.data(), text.size());
|
261
|
-
text_ptr[text.size()] = '\0';
|
262
|
-
|
263
|
-
std::memcpy(meta_ptr, raw_json.data(), raw_json.size());
|
264
|
-
meta_ptr[raw_json.size()] = '\0';
|
265
|
-
|
266
|
-
// Atomic increment for parallel loading
|
267
|
-
size_t idx = count_.fetch_add(1, std::memory_order_relaxed);
|
268
|
-
|
269
|
-
// Bounds check
|
270
|
-
if (idx >= entries_.size()) {
|
271
|
-
count_.fetch_sub(1, std::memory_order_relaxed);
|
272
|
-
return simdjson::CAPACITY;
|
273
|
-
}
|
274
|
-
|
275
|
-
// Construct entry directly - no synchronization needed
|
276
|
-
entries_[idx] = Entry{
|
277
|
-
.doc = Document{
|
278
|
-
.id = std::string_view(id_ptr, id.size()),
|
279
|
-
.text = std::string_view(text_ptr, text.size()),
|
280
|
-
.metadata_json = std::string_view(meta_ptr, raw_json.size())
|
281
|
-
},
|
282
|
-
.embedding = emb_ptr
|
283
|
-
};
|
284
|
-
|
285
|
-
return simdjson::SUCCESS;
|
286
|
-
}
|
83
|
+
simdjson::error_code add_document(simdjson::ondemand::object& json_doc);
|
287
84
|
|
288
85
|
// Finalize the store: normalize and switch to serving phase
|
289
|
-
void finalize()
|
290
|
-
// If already finalized, do nothing
|
291
|
-
if (is_finalized_.load(std::memory_order_acquire)) {
|
292
|
-
return;
|
293
|
-
}
|
294
|
-
|
295
|
-
// Get final count
|
296
|
-
size_t final_count = count_.load(std::memory_order_acquire);
|
297
|
-
|
298
|
-
// Normalize all embeddings (single-threaded, no races)
|
299
|
-
for (size_t i = 0; i < final_count; ++i) {
|
300
|
-
float* emb = entries_[i].embedding;
|
301
|
-
if (!emb) continue; // Skip uninitialized entries
|
302
|
-
|
303
|
-
float sum = 0.0f;
|
304
|
-
#pragma omp simd reduction(+:sum)
|
305
|
-
for (size_t j = 0; j < dim_; ++j) {
|
306
|
-
sum += emb[j] * emb[j];
|
307
|
-
}
|
308
|
-
|
309
|
-
if (sum > 1e-10f) { // Avoid division by zero
|
310
|
-
float inv_norm = 1.0f / std::sqrt(sum);
|
311
|
-
#pragma omp simd
|
312
|
-
for (size_t j = 0; j < dim_; ++j) {
|
313
|
-
emb[j] *= inv_norm;
|
314
|
-
}
|
315
|
-
}
|
316
|
-
}
|
317
|
-
|
318
|
-
// Ensure all threads see the normalized data
|
319
|
-
#pragma omp barrier
|
320
|
-
|
321
|
-
// Mark as finalized - this is the ONLY place this flag is set
|
322
|
-
is_finalized_.store(true, std::memory_order_seq_cst);
|
323
|
-
}
|
86
|
+
void finalize();
|
324
87
|
|
325
88
|
// Deprecated: use finalize() instead
|
326
|
-
void normalize_all()
|
327
|
-
finalize();
|
328
|
-
}
|
89
|
+
void normalize_all();
|
329
90
|
|
330
91
|
std::vector<std::pair<float, size_t>>
|
331
|
-
search(const float* query, size_t k) const
|
332
|
-
// Exclusive lock: prevent overlapping OpenMP teams
|
333
|
-
// Since each search uses all threads via OpenMP, concurrent searches provide no benefit
|
334
|
-
std::unique_lock<std::shared_mutex> lock(search_mutex_);
|
335
|
-
|
336
|
-
// Search can ONLY run if finalized
|
337
|
-
if (!is_finalized_.load(std::memory_order_acquire)) {
|
338
|
-
return {};
|
339
|
-
}
|
340
|
-
|
341
|
-
size_t n = count_.load(std::memory_order_acquire);
|
342
|
-
if (n == 0 || k == 0) return {};
|
343
|
-
|
344
|
-
k = std::min(k, n); // Ensure k doesn't exceed count
|
345
|
-
|
346
|
-
|
347
|
-
// Always use per-thread heaps to avoid any shared memory races
|
348
|
-
const int num_threads = omp_get_max_threads();
|
349
|
-
std::vector<TopK> thread_heaps;
|
350
|
-
thread_heaps.reserve(num_threads);
|
351
|
-
for (int i = 0; i < num_threads; ++i) {
|
352
|
-
thread_heaps.emplace_back(k); // in-place construction, no copies
|
353
|
-
}
|
354
|
-
|
355
|
-
std::vector<std::pair<float,std::size_t>> result;
|
356
|
-
|
357
|
-
#pragma omp parallel
|
358
|
-
{
|
359
|
-
const int tid = omp_get_thread_num();
|
360
|
-
TopK& local_heap = thread_heaps[tid];
|
361
|
-
|
362
|
-
#pragma omp for // default barrier kept - ensures all threads finish before merge
|
363
|
-
for (size_t i = 0; i < n; ++i) {
|
364
|
-
float score = 0.0f;
|
365
|
-
const float* emb = entries_[i].embedding;
|
366
|
-
|
367
|
-
#pragma omp simd reduction(+:score)
|
368
|
-
for (size_t j = 0; j < dim_; ++j) {
|
369
|
-
score += emb[j] * query[j];
|
370
|
-
}
|
371
|
-
|
372
|
-
local_heap.push(score, i);
|
373
|
-
}
|
374
|
-
|
375
|
-
#pragma omp barrier
|
376
|
-
#pragma omp single
|
377
|
-
{
|
378
|
-
TopK final_heap(k);
|
379
|
-
for (auto& th : thread_heaps) final_heap.merge(th);
|
380
|
-
result = std::move(final_heap.heap);
|
381
|
-
}
|
382
|
-
}
|
383
|
-
|
384
|
-
std::sort(result.begin(), result.end(),
|
385
|
-
[](const auto& a, const auto& b) { return a.first > b.first; });
|
386
|
-
|
387
|
-
return result;
|
388
|
-
}
|
92
|
+
search(const float* query, size_t k) const;
|
389
93
|
|
390
|
-
const Entry& get_entry(size_t idx) const
|
391
|
-
return entries_[idx];
|
392
|
-
}
|
94
|
+
const Entry& get_entry(size_t idx) const;
|
393
95
|
|
394
|
-
size_t size() const
|
395
|
-
return count_.load(std::memory_order_acquire);
|
396
|
-
}
|
96
|
+
size_t size() const;
|
397
97
|
|
398
|
-
bool is_finalized() const
|
399
|
-
return is_finalized_.load(std::memory_order_acquire);
|
400
|
-
}
|
98
|
+
bool is_finalized() const;
|
401
99
|
};
|
Binary file
|
@@ -1,23 +0,0 @@
|
|
1
|
-
#!/bin/bash
|
2
|
-
|
3
|
-
# Build prebuilds for current platform
|
4
|
-
echo "Building prebuilds for native-vector-store..."
|
5
|
-
|
6
|
-
# Clean previous builds
|
7
|
-
rm -rf prebuilds/
|
8
|
-
|
9
|
-
# Build for current platform and architecture
|
10
|
-
echo "Building for current platform..."
|
11
|
-
npx prebuildify --napi --strip
|
12
|
-
|
13
|
-
# For local testing on macOS, build both architectures if on Apple Silicon
|
14
|
-
if [[ "$OSTYPE" == "darwin"* ]] && [[ "$(uname -m)" == "arm64" ]]; then
|
15
|
-
echo "Building universal binary for macOS..."
|
16
|
-
npx prebuildify --napi --strip --arch x64
|
17
|
-
npx prebuildify --napi --strip --arch arm64
|
18
|
-
fi
|
19
|
-
|
20
|
-
echo "Prebuilds created:"
|
21
|
-
find prebuilds -type f -name "*.node" | sort
|
22
|
-
|
23
|
-
echo "Done!"
|
File without changes
|
File without changes
|