native-vector-store 0.1.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. package/README.md +242 -12
  2. package/binding.gyp +22 -10
  3. package/deps/simdjson/simdjson.cpp +56403 -0
  4. package/deps/simdjson/simdjson.h +123534 -0
  5. package/docs/PERFORMANCE_CASE_STUDY.md +130 -0
  6. package/docs/PREBUILDS.md +69 -0
  7. package/docs/VectorStore.html +180 -0
  8. package/docs/VectorStoreWrapper.html +1356 -0
  9. package/docs/fonts/OpenSans-Bold-webfont.eot +0 -0
  10. package/docs/fonts/OpenSans-Bold-webfont.svg +1830 -0
  11. package/docs/fonts/OpenSans-Bold-webfont.woff +0 -0
  12. package/docs/fonts/OpenSans-BoldItalic-webfont.eot +0 -0
  13. package/docs/fonts/OpenSans-BoldItalic-webfont.svg +1830 -0
  14. package/docs/fonts/OpenSans-BoldItalic-webfont.woff +0 -0
  15. package/docs/fonts/OpenSans-Italic-webfont.eot +0 -0
  16. package/docs/fonts/OpenSans-Italic-webfont.svg +1830 -0
  17. package/docs/fonts/OpenSans-Italic-webfont.woff +0 -0
  18. package/docs/fonts/OpenSans-Light-webfont.eot +0 -0
  19. package/docs/fonts/OpenSans-Light-webfont.svg +1831 -0
  20. package/docs/fonts/OpenSans-Light-webfont.woff +0 -0
  21. package/docs/fonts/OpenSans-LightItalic-webfont.eot +0 -0
  22. package/docs/fonts/OpenSans-LightItalic-webfont.svg +1835 -0
  23. package/docs/fonts/OpenSans-LightItalic-webfont.woff +0 -0
  24. package/docs/fonts/OpenSans-Regular-webfont.eot +0 -0
  25. package/docs/fonts/OpenSans-Regular-webfont.svg +1831 -0
  26. package/docs/fonts/OpenSans-Regular-webfont.woff +0 -0
  27. package/docs/global.html +561 -0
  28. package/docs/index.html +570 -0
  29. package/docs/scripts/linenumber.js +25 -0
  30. package/docs/scripts/prettify/Apache-License-2.0.txt +202 -0
  31. package/docs/scripts/prettify/lang-css.js +2 -0
  32. package/docs/scripts/prettify/prettify.js +28 -0
  33. package/docs/styles/jsdoc-default.css +358 -0
  34. package/docs/styles/prettify-jsdoc.css +111 -0
  35. package/docs/styles/prettify-tomorrow.css +132 -0
  36. package/index.js +162 -0
  37. package/package.json +30 -7
  38. package/prebuilds/darwin-arm64/native-vector-store.node +0 -0
  39. package/prebuilds/darwin-x64/native-vector-store.node +0 -0
  40. package/prebuilds/linux-arm64/native-vector-store.node +0 -0
  41. package/prebuilds/linux-x64/native-vector-store.node +0 -0
  42. package/prebuilds/linux-x64-musl/napi-v9/native-vector-store.node +0 -0
  43. package/prebuilds/linux-x64-musl/native-vector-store.node +0 -0
  44. package/prebuilds/win32-x64/native-vector-store.node +0 -0
  45. package/src/Makefile +87 -0
  46. package/src/test_main.cpp +173 -0
  47. package/src/test_stress.cpp +394 -0
  48. package/src/vector_store.cpp +344 -0
  49. package/src/vector_store.h +21 -323
  50. package/native-vector-store-0.1.0.tgz +0 -0
  51. package/scripts/build-prebuilds.sh +0 -23
  52. /package/{src → deps/atomic_queue}/atomic_queue.h +0 -0
  53. /package/{src → deps/atomic_queue}/defs.h +0 -0
@@ -0,0 +1,344 @@
1
+ #include "vector_store.h"
2
+
3
+ // ArenaAllocator implementation
4
+
5
+ ArenaAllocator::ArenaAllocator() : head_(std::make_unique<Chunk>()),
6
+ current_(head_.get()) {}
7
+
8
+ void* ArenaAllocator::allocate(size_t size, size_t align) {
9
+ // Validate alignment is power of 2 and reasonable
10
+ assert(align > 0 && (align & (align - 1)) == 0);
11
+ if (align > 4096) {
12
+ return nullptr; // Alignment too large
13
+ }
14
+
15
+ // Validate size
16
+ if (size > CHUNK_SIZE) {
17
+ return nullptr; // Cannot allocate larger than chunk size
18
+ }
19
+
20
+ Chunk* chunk = current_.load(std::memory_order_acquire);
21
+ while (true) {
22
+ size_t old_offset = chunk->offset.load(std::memory_order_relaxed);
23
+
24
+ // Calculate the pointer that would result from current offset
25
+ void* ptr = chunk->data + old_offset;
26
+
27
+ // Calculate how much padding we need for alignment
28
+ size_t misalignment = (uintptr_t)ptr & (align - 1);
29
+ size_t padding = misalignment ? (align - misalignment) : 0;
30
+
31
+ size_t aligned_offset = old_offset + padding;
32
+ size_t new_offset = aligned_offset + size;
33
+
34
+ if (new_offset > CHUNK_SIZE) {
35
+ // Need new chunk
36
+ Chunk* next = chunk->next.load(std::memory_order_acquire);
37
+ if (!next) {
38
+ // Lock to prevent multiple threads creating chunks
39
+ std::lock_guard<std::mutex> lock(chunk_creation_mutex_);
40
+ // Double-check after acquiring lock
41
+ next = chunk->next.load(std::memory_order_acquire);
42
+ if (!next) {
43
+ auto new_chunk = std::make_unique<Chunk>();
44
+ next = new_chunk.get();
45
+ chunk->next.store(next, std::memory_order_release);
46
+ // Transfer ownership after setting atomic pointer
47
+ new_chunk.release();
48
+ }
49
+ }
50
+ // Update current to the new chunk
51
+ current_.store(next, std::memory_order_release);
52
+ chunk = next;
53
+ continue;
54
+ }
55
+
56
+ if (chunk->offset.compare_exchange_weak(old_offset, new_offset,
57
+ std::memory_order_release,
58
+ std::memory_order_relaxed)) {
59
+ return chunk->data + aligned_offset;
60
+ }
61
+ }
62
+ }
63
+
64
+ ArenaAllocator::~ArenaAllocator() {
65
+ // Clean up linked chunks
66
+ Chunk* chunk = head_->next.load(std::memory_order_acquire);
67
+ while (chunk) {
68
+ Chunk* next = chunk->next.load(std::memory_order_acquire);
69
+ delete chunk;
70
+ chunk = next;
71
+ }
72
+ }
73
+
74
+ // TopK implementation
75
+
76
+ TopK::TopK(size_t k) : k(k) {
77
+ heap.reserve(k + 1); // Reserve k+1 to avoid reallocation during push
78
+ }
79
+
80
+ void TopK::push(float score, size_t idx) {
81
+ if (heap.size() < k) {
82
+ heap.emplace_back(score, idx);
83
+ std::push_heap(heap.begin(), heap.end(), cmp);
84
+ } else if (k > 0 && score > heap.front().first) {
85
+ // Replace the minimum element
86
+ std::pop_heap(heap.begin(), heap.end(), cmp);
87
+ heap.back() = {score, idx};
88
+ std::push_heap(heap.begin(), heap.end(), cmp);
89
+ }
90
+ }
91
+
92
+ bool TopK::cmp(const std::pair<float, size_t>& a, const std::pair<float, size_t>& b) {
93
+ return a.first > b.first;
94
+ }
95
+
96
+ void TopK::merge(const TopK& other) {
97
+ // More efficient: if we have space, bulk insert then re-heapify
98
+ if (heap.size() + other.heap.size() <= k) {
99
+ heap.insert(heap.end(), other.heap.begin(), other.heap.end());
100
+ std::make_heap(heap.begin(), heap.end(), cmp);
101
+ } else {
102
+ // Otherwise, insert one by one
103
+ for (const auto& [score, idx] : other.heap) {
104
+ push(score, idx);
105
+ }
106
+ }
107
+ }
108
+
109
+ // VectorStore implementation
110
+
111
+ VectorStore::VectorStore(size_t dim) : dim_(dim) {
112
+ entries_.resize(1'000'000); // Pre-size with default-constructed entries
113
+ }
114
+
115
+ simdjson::error_code VectorStore::add_document(simdjson::ondemand::document& json_doc) {
116
+ simdjson::ondemand::object obj;
117
+ auto error = json_doc.get_object().get(obj);
118
+ if (error) {
119
+ return error;
120
+ }
121
+ return add_document(obj);
122
+ }
123
+
124
+ simdjson::error_code VectorStore::add_document(simdjson::ondemand::object& json_doc) {
125
+ // Cannot add documents after finalization
126
+ if (is_finalized_.load(std::memory_order_acquire)) {
127
+ return simdjson::INCORRECT_TYPE;
128
+ }
129
+
130
+ // Parse with error handling
131
+ std::string_view id, text;
132
+ auto error = json_doc["id"].get_string().get(id);
133
+ if (error) return error;
134
+
135
+ error = json_doc["text"].get_string().get(text);
136
+ if (error) return error;
137
+
138
+ // Calculate sizes
139
+ size_t emb_size = dim_ * sizeof(float);
140
+ size_t id_size = id.size() + 1;
141
+ size_t text_size = text.size() + 1;
142
+
143
+ // Allocate temporary buffer for embedding
144
+ std::vector<float> temp_embedding;
145
+ temp_embedding.reserve(dim_);
146
+
147
+ // Process metadata and embedding first
148
+ simdjson::ondemand::object metadata;
149
+ error = json_doc["metadata"].get_object().get(metadata);
150
+ if (error) return error;
151
+
152
+ simdjson::ondemand::array emb_array;
153
+ error = metadata["embedding"].get_array().get(emb_array);
154
+ if (error) return error;
155
+
156
+ // Consume the array before touching anything else
157
+ size_t i = 0;
158
+ for (auto value_result : emb_array) {
159
+ simdjson::ondemand::value v;
160
+ error = value_result.get(v);
161
+ if (error) return error;
162
+ double val;
163
+ error = v.get_double().get(val);
164
+ if (error) return error;
165
+
166
+ if (i >= dim_) {
167
+ return simdjson::CAPACITY; // Too many embedding values
168
+ }
169
+ temp_embedding.push_back(float(val));
170
+ i++;
171
+ }
172
+
173
+ // Verify we got the expected number of embedding values
174
+ if (i != dim_) {
175
+ return simdjson::INCORRECT_TYPE; // Wrong embedding dimension
176
+ }
177
+
178
+ // Now it is safe to take the raw metadata JSON
179
+ std::string_view raw_json;
180
+ error = metadata.raw_json().get(raw_json);
181
+ if (error) return error;
182
+ size_t meta_size = raw_json.size() + 1;
183
+
184
+ // Single arena allocation
185
+ char* base = (char*)arena_.allocate(emb_size + id_size + text_size + meta_size);
186
+ if (!base) {
187
+ return simdjson::MEMALLOC; // Allocation failed
188
+ }
189
+
190
+ // Layout: [embedding][id][text][metadata_json]
191
+ float* emb_ptr = (float*)base;
192
+ char* id_ptr = base + emb_size;
193
+ char* text_ptr = id_ptr + id_size;
194
+ char* meta_ptr = text_ptr + text_size;
195
+
196
+ // Copy embedding from temporary buffer
197
+ std::memcpy(emb_ptr, temp_embedding.data(), emb_size);
198
+
199
+ // Copy strings (adding null terminator)
200
+ std::memcpy(id_ptr, id.data(), id.size());
201
+ id_ptr[id.size()] = '\0';
202
+
203
+ std::memcpy(text_ptr, text.data(), text.size());
204
+ text_ptr[text.size()] = '\0';
205
+
206
+ std::memcpy(meta_ptr, raw_json.data(), raw_json.size());
207
+ meta_ptr[raw_json.size()] = '\0';
208
+
209
+ // Atomic increment for parallel loading
210
+ size_t idx = count_.fetch_add(1, std::memory_order_relaxed);
211
+
212
+ // Bounds check
213
+ if (idx >= entries_.size()) {
214
+ count_.fetch_sub(1, std::memory_order_relaxed);
215
+ return simdjson::CAPACITY;
216
+ }
217
+
218
+ // Construct entry directly - no synchronization needed
219
+ // Use traditional initialization for C++17 compatibility
220
+ Document doc;
221
+ doc.id = std::string_view(id_ptr, id.size());
222
+ doc.text = std::string_view(text_ptr, text.size());
223
+ doc.metadata_json = std::string_view(meta_ptr, raw_json.size());
224
+
225
+ Entry entry;
226
+ entry.doc = doc;
227
+ entry.embedding = emb_ptr;
228
+
229
+ entries_[idx] = entry;
230
+
231
+ return simdjson::SUCCESS;
232
+ }
233
+
234
+ void VectorStore::finalize() {
235
+ // If already finalized, do nothing
236
+ if (is_finalized_.load(std::memory_order_acquire)) {
237
+ return;
238
+ }
239
+
240
+ // Get final count
241
+ size_t final_count = count_.load(std::memory_order_acquire);
242
+
243
+ // Normalize all embeddings (single-threaded, no races)
244
+ for (size_t i = 0; i < final_count; ++i) {
245
+ float* emb = entries_[i].embedding;
246
+ if (!emb) continue; // Skip uninitialized entries
247
+
248
+ float sum = 0.0f;
249
+ #pragma omp simd reduction(+:sum)
250
+ for (size_t j = 0; j < dim_; ++j) {
251
+ sum += emb[j] * emb[j];
252
+ }
253
+
254
+ if (sum > 1e-10f) { // Avoid division by zero
255
+ float inv_norm = 1.0f / std::sqrt(sum);
256
+ #pragma omp simd
257
+ for (size_t j = 0; j < dim_; ++j) {
258
+ emb[j] *= inv_norm;
259
+ }
260
+ }
261
+ }
262
+
263
+ // Ensure all threads see the normalized data
264
+ #pragma omp barrier
265
+
266
+ // Mark as finalized - this is the ONLY place this flag is set
267
+ is_finalized_.store(true, std::memory_order_seq_cst);
268
+ }
269
+
270
+ void VectorStore::normalize_all() {
271
+ finalize();
272
+ }
273
+
274
+ std::vector<std::pair<float, size_t>>
275
+ VectorStore::search(const float* query, size_t k) const {
276
+ // Exclusive lock: prevent overlapping OpenMP teams
277
+ // Since each search uses all threads via OpenMP, concurrent searches provide no benefit
278
+ std::unique_lock<std::shared_mutex> lock(search_mutex_);
279
+
280
+ // Search can ONLY run if finalized
281
+ if (!is_finalized_.load(std::memory_order_acquire)) {
282
+ return {};
283
+ }
284
+
285
+ size_t n = count_.load(std::memory_order_acquire);
286
+ if (n == 0 || k == 0) return {};
287
+
288
+ k = std::min(k, n); // Ensure k doesn't exceed count
289
+
290
+
291
+ // Always use per-thread heaps to avoid any shared memory races
292
+ const int num_threads = omp_get_max_threads();
293
+ std::vector<TopK> thread_heaps;
294
+ thread_heaps.reserve(num_threads);
295
+ for (int i = 0; i < num_threads; ++i) {
296
+ thread_heaps.emplace_back(k); // in-place construction, no copies
297
+ }
298
+
299
+ std::vector<std::pair<float,std::size_t>> result;
300
+
301
+ #pragma omp parallel
302
+ {
303
+ const int tid = omp_get_thread_num();
304
+ TopK& local_heap = thread_heaps[tid];
305
+
306
+ #pragma omp for // default barrier kept - ensures all threads finish before merge
307
+ for (int i = 0; i < static_cast<int>(n); ++i) {
308
+ float score = 0.0f;
309
+ const float* emb = entries_[i].embedding;
310
+
311
+ #pragma omp simd reduction(+:score)
312
+ for (size_t j = 0; j < dim_; ++j) {
313
+ score += emb[j] * query[j];
314
+ }
315
+
316
+ local_heap.push(score, i);
317
+ }
318
+
319
+ #pragma omp barrier
320
+ #pragma omp single
321
+ {
322
+ TopK final_heap(k);
323
+ for (auto& th : thread_heaps) final_heap.merge(th);
324
+ result = std::move(final_heap.heap);
325
+ }
326
+ }
327
+
328
+ std::sort(result.begin(), result.end(),
329
+ [](const auto& a, const auto& b) { return a.first > b.first; });
330
+
331
+ return result;
332
+ }
333
+
334
+ const VectorStore::Entry& VectorStore::get_entry(size_t idx) const {
335
+ return entries_[idx];
336
+ }
337
+
338
+ size_t VectorStore::size() const {
339
+ return count_.load(std::memory_order_acquire);
340
+ }
341
+
342
+ bool VectorStore::is_finalized() const {
343
+ return is_finalized_.load(std::memory_order_acquire);
344
+ }