native-vector-store 0.1.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +242 -12
- package/binding.gyp +22 -10
- package/deps/simdjson/simdjson.cpp +56403 -0
- package/deps/simdjson/simdjson.h +123534 -0
- package/docs/PERFORMANCE_CASE_STUDY.md +130 -0
- package/docs/PREBUILDS.md +69 -0
- package/docs/VectorStore.html +180 -0
- package/docs/VectorStoreWrapper.html +1356 -0
- package/docs/fonts/OpenSans-Bold-webfont.eot +0 -0
- package/docs/fonts/OpenSans-Bold-webfont.svg +1830 -0
- package/docs/fonts/OpenSans-Bold-webfont.woff +0 -0
- package/docs/fonts/OpenSans-BoldItalic-webfont.eot +0 -0
- package/docs/fonts/OpenSans-BoldItalic-webfont.svg +1830 -0
- package/docs/fonts/OpenSans-BoldItalic-webfont.woff +0 -0
- package/docs/fonts/OpenSans-Italic-webfont.eot +0 -0
- package/docs/fonts/OpenSans-Italic-webfont.svg +1830 -0
- package/docs/fonts/OpenSans-Italic-webfont.woff +0 -0
- package/docs/fonts/OpenSans-Light-webfont.eot +0 -0
- package/docs/fonts/OpenSans-Light-webfont.svg +1831 -0
- package/docs/fonts/OpenSans-Light-webfont.woff +0 -0
- package/docs/fonts/OpenSans-LightItalic-webfont.eot +0 -0
- package/docs/fonts/OpenSans-LightItalic-webfont.svg +1835 -0
- package/docs/fonts/OpenSans-LightItalic-webfont.woff +0 -0
- package/docs/fonts/OpenSans-Regular-webfont.eot +0 -0
- package/docs/fonts/OpenSans-Regular-webfont.svg +1831 -0
- package/docs/fonts/OpenSans-Regular-webfont.woff +0 -0
- package/docs/global.html +561 -0
- package/docs/index.html +570 -0
- package/docs/scripts/linenumber.js +25 -0
- package/docs/scripts/prettify/Apache-License-2.0.txt +202 -0
- package/docs/scripts/prettify/lang-css.js +2 -0
- package/docs/scripts/prettify/prettify.js +28 -0
- package/docs/styles/jsdoc-default.css +358 -0
- package/docs/styles/prettify-jsdoc.css +111 -0
- package/docs/styles/prettify-tomorrow.css +132 -0
- package/index.js +162 -0
- package/package.json +30 -7
- package/prebuilds/darwin-arm64/native-vector-store.node +0 -0
- package/prebuilds/darwin-x64/native-vector-store.node +0 -0
- package/prebuilds/linux-arm64/native-vector-store.node +0 -0
- package/prebuilds/linux-x64/native-vector-store.node +0 -0
- package/prebuilds/linux-x64-musl/napi-v9/native-vector-store.node +0 -0
- package/prebuilds/linux-x64-musl/native-vector-store.node +0 -0
- package/prebuilds/win32-x64/native-vector-store.node +0 -0
- package/src/Makefile +87 -0
- package/src/test_main.cpp +173 -0
- package/src/test_stress.cpp +394 -0
- package/src/vector_store.cpp +344 -0
- package/src/vector_store.h +21 -323
- package/native-vector-store-0.1.0.tgz +0 -0
- package/scripts/build-prebuilds.sh +0 -23
- /package/{src → deps/atomic_queue}/atomic_queue.h +0 -0
- /package/{src → deps/atomic_queue}/defs.h +0 -0
@@ -0,0 +1,344 @@
|
|
1
|
+
#include "vector_store.h"
|
2
|
+
|
3
|
+
// ArenaAllocator implementation
|
4
|
+
|
5
|
+
ArenaAllocator::ArenaAllocator() : head_(std::make_unique<Chunk>()),
|
6
|
+
current_(head_.get()) {}
|
7
|
+
|
8
|
+
void* ArenaAllocator::allocate(size_t size, size_t align) {
|
9
|
+
// Validate alignment is power of 2 and reasonable
|
10
|
+
assert(align > 0 && (align & (align - 1)) == 0);
|
11
|
+
if (align > 4096) {
|
12
|
+
return nullptr; // Alignment too large
|
13
|
+
}
|
14
|
+
|
15
|
+
// Validate size
|
16
|
+
if (size > CHUNK_SIZE) {
|
17
|
+
return nullptr; // Cannot allocate larger than chunk size
|
18
|
+
}
|
19
|
+
|
20
|
+
Chunk* chunk = current_.load(std::memory_order_acquire);
|
21
|
+
while (true) {
|
22
|
+
size_t old_offset = chunk->offset.load(std::memory_order_relaxed);
|
23
|
+
|
24
|
+
// Calculate the pointer that would result from current offset
|
25
|
+
void* ptr = chunk->data + old_offset;
|
26
|
+
|
27
|
+
// Calculate how much padding we need for alignment
|
28
|
+
size_t misalignment = (uintptr_t)ptr & (align - 1);
|
29
|
+
size_t padding = misalignment ? (align - misalignment) : 0;
|
30
|
+
|
31
|
+
size_t aligned_offset = old_offset + padding;
|
32
|
+
size_t new_offset = aligned_offset + size;
|
33
|
+
|
34
|
+
if (new_offset > CHUNK_SIZE) {
|
35
|
+
// Need new chunk
|
36
|
+
Chunk* next = chunk->next.load(std::memory_order_acquire);
|
37
|
+
if (!next) {
|
38
|
+
// Lock to prevent multiple threads creating chunks
|
39
|
+
std::lock_guard<std::mutex> lock(chunk_creation_mutex_);
|
40
|
+
// Double-check after acquiring lock
|
41
|
+
next = chunk->next.load(std::memory_order_acquire);
|
42
|
+
if (!next) {
|
43
|
+
auto new_chunk = std::make_unique<Chunk>();
|
44
|
+
next = new_chunk.get();
|
45
|
+
chunk->next.store(next, std::memory_order_release);
|
46
|
+
// Transfer ownership after setting atomic pointer
|
47
|
+
new_chunk.release();
|
48
|
+
}
|
49
|
+
}
|
50
|
+
// Update current to the new chunk
|
51
|
+
current_.store(next, std::memory_order_release);
|
52
|
+
chunk = next;
|
53
|
+
continue;
|
54
|
+
}
|
55
|
+
|
56
|
+
if (chunk->offset.compare_exchange_weak(old_offset, new_offset,
|
57
|
+
std::memory_order_release,
|
58
|
+
std::memory_order_relaxed)) {
|
59
|
+
return chunk->data + aligned_offset;
|
60
|
+
}
|
61
|
+
}
|
62
|
+
}
|
63
|
+
|
64
|
+
ArenaAllocator::~ArenaAllocator() {
|
65
|
+
// Clean up linked chunks
|
66
|
+
Chunk* chunk = head_->next.load(std::memory_order_acquire);
|
67
|
+
while (chunk) {
|
68
|
+
Chunk* next = chunk->next.load(std::memory_order_acquire);
|
69
|
+
delete chunk;
|
70
|
+
chunk = next;
|
71
|
+
}
|
72
|
+
}
|
73
|
+
|
74
|
+
// TopK implementation
|
75
|
+
|
76
|
+
TopK::TopK(size_t k) : k(k) {
|
77
|
+
heap.reserve(k + 1); // Reserve k+1 to avoid reallocation during push
|
78
|
+
}
|
79
|
+
|
80
|
+
void TopK::push(float score, size_t idx) {
|
81
|
+
if (heap.size() < k) {
|
82
|
+
heap.emplace_back(score, idx);
|
83
|
+
std::push_heap(heap.begin(), heap.end(), cmp);
|
84
|
+
} else if (k > 0 && score > heap.front().first) {
|
85
|
+
// Replace the minimum element
|
86
|
+
std::pop_heap(heap.begin(), heap.end(), cmp);
|
87
|
+
heap.back() = {score, idx};
|
88
|
+
std::push_heap(heap.begin(), heap.end(), cmp);
|
89
|
+
}
|
90
|
+
}
|
91
|
+
|
92
|
+
bool TopK::cmp(const std::pair<float, size_t>& a, const std::pair<float, size_t>& b) {
|
93
|
+
return a.first > b.first;
|
94
|
+
}
|
95
|
+
|
96
|
+
void TopK::merge(const TopK& other) {
|
97
|
+
// More efficient: if we have space, bulk insert then re-heapify
|
98
|
+
if (heap.size() + other.heap.size() <= k) {
|
99
|
+
heap.insert(heap.end(), other.heap.begin(), other.heap.end());
|
100
|
+
std::make_heap(heap.begin(), heap.end(), cmp);
|
101
|
+
} else {
|
102
|
+
// Otherwise, insert one by one
|
103
|
+
for (const auto& [score, idx] : other.heap) {
|
104
|
+
push(score, idx);
|
105
|
+
}
|
106
|
+
}
|
107
|
+
}
|
108
|
+
|
109
|
+
// VectorStore implementation
|
110
|
+
|
111
|
+
VectorStore::VectorStore(size_t dim) : dim_(dim) {
|
112
|
+
entries_.resize(1'000'000); // Pre-size with default-constructed entries
|
113
|
+
}
|
114
|
+
|
115
|
+
simdjson::error_code VectorStore::add_document(simdjson::ondemand::document& json_doc) {
|
116
|
+
simdjson::ondemand::object obj;
|
117
|
+
auto error = json_doc.get_object().get(obj);
|
118
|
+
if (error) {
|
119
|
+
return error;
|
120
|
+
}
|
121
|
+
return add_document(obj);
|
122
|
+
}
|
123
|
+
|
124
|
+
simdjson::error_code VectorStore::add_document(simdjson::ondemand::object& json_doc) {
|
125
|
+
// Cannot add documents after finalization
|
126
|
+
if (is_finalized_.load(std::memory_order_acquire)) {
|
127
|
+
return simdjson::INCORRECT_TYPE;
|
128
|
+
}
|
129
|
+
|
130
|
+
// Parse with error handling
|
131
|
+
std::string_view id, text;
|
132
|
+
auto error = json_doc["id"].get_string().get(id);
|
133
|
+
if (error) return error;
|
134
|
+
|
135
|
+
error = json_doc["text"].get_string().get(text);
|
136
|
+
if (error) return error;
|
137
|
+
|
138
|
+
// Calculate sizes
|
139
|
+
size_t emb_size = dim_ * sizeof(float);
|
140
|
+
size_t id_size = id.size() + 1;
|
141
|
+
size_t text_size = text.size() + 1;
|
142
|
+
|
143
|
+
// Allocate temporary buffer for embedding
|
144
|
+
std::vector<float> temp_embedding;
|
145
|
+
temp_embedding.reserve(dim_);
|
146
|
+
|
147
|
+
// Process metadata and embedding first
|
148
|
+
simdjson::ondemand::object metadata;
|
149
|
+
error = json_doc["metadata"].get_object().get(metadata);
|
150
|
+
if (error) return error;
|
151
|
+
|
152
|
+
simdjson::ondemand::array emb_array;
|
153
|
+
error = metadata["embedding"].get_array().get(emb_array);
|
154
|
+
if (error) return error;
|
155
|
+
|
156
|
+
// Consume the array before touching anything else
|
157
|
+
size_t i = 0;
|
158
|
+
for (auto value_result : emb_array) {
|
159
|
+
simdjson::ondemand::value v;
|
160
|
+
error = value_result.get(v);
|
161
|
+
if (error) return error;
|
162
|
+
double val;
|
163
|
+
error = v.get_double().get(val);
|
164
|
+
if (error) return error;
|
165
|
+
|
166
|
+
if (i >= dim_) {
|
167
|
+
return simdjson::CAPACITY; // Too many embedding values
|
168
|
+
}
|
169
|
+
temp_embedding.push_back(float(val));
|
170
|
+
i++;
|
171
|
+
}
|
172
|
+
|
173
|
+
// Verify we got the expected number of embedding values
|
174
|
+
if (i != dim_) {
|
175
|
+
return simdjson::INCORRECT_TYPE; // Wrong embedding dimension
|
176
|
+
}
|
177
|
+
|
178
|
+
// Now it is safe to take the raw metadata JSON
|
179
|
+
std::string_view raw_json;
|
180
|
+
error = metadata.raw_json().get(raw_json);
|
181
|
+
if (error) return error;
|
182
|
+
size_t meta_size = raw_json.size() + 1;
|
183
|
+
|
184
|
+
// Single arena allocation
|
185
|
+
char* base = (char*)arena_.allocate(emb_size + id_size + text_size + meta_size);
|
186
|
+
if (!base) {
|
187
|
+
return simdjson::MEMALLOC; // Allocation failed
|
188
|
+
}
|
189
|
+
|
190
|
+
// Layout: [embedding][id][text][metadata_json]
|
191
|
+
float* emb_ptr = (float*)base;
|
192
|
+
char* id_ptr = base + emb_size;
|
193
|
+
char* text_ptr = id_ptr + id_size;
|
194
|
+
char* meta_ptr = text_ptr + text_size;
|
195
|
+
|
196
|
+
// Copy embedding from temporary buffer
|
197
|
+
std::memcpy(emb_ptr, temp_embedding.data(), emb_size);
|
198
|
+
|
199
|
+
// Copy strings (adding null terminator)
|
200
|
+
std::memcpy(id_ptr, id.data(), id.size());
|
201
|
+
id_ptr[id.size()] = '\0';
|
202
|
+
|
203
|
+
std::memcpy(text_ptr, text.data(), text.size());
|
204
|
+
text_ptr[text.size()] = '\0';
|
205
|
+
|
206
|
+
std::memcpy(meta_ptr, raw_json.data(), raw_json.size());
|
207
|
+
meta_ptr[raw_json.size()] = '\0';
|
208
|
+
|
209
|
+
// Atomic increment for parallel loading
|
210
|
+
size_t idx = count_.fetch_add(1, std::memory_order_relaxed);
|
211
|
+
|
212
|
+
// Bounds check
|
213
|
+
if (idx >= entries_.size()) {
|
214
|
+
count_.fetch_sub(1, std::memory_order_relaxed);
|
215
|
+
return simdjson::CAPACITY;
|
216
|
+
}
|
217
|
+
|
218
|
+
// Construct entry directly - no synchronization needed
|
219
|
+
// Use traditional initialization for C++17 compatibility
|
220
|
+
Document doc;
|
221
|
+
doc.id = std::string_view(id_ptr, id.size());
|
222
|
+
doc.text = std::string_view(text_ptr, text.size());
|
223
|
+
doc.metadata_json = std::string_view(meta_ptr, raw_json.size());
|
224
|
+
|
225
|
+
Entry entry;
|
226
|
+
entry.doc = doc;
|
227
|
+
entry.embedding = emb_ptr;
|
228
|
+
|
229
|
+
entries_[idx] = entry;
|
230
|
+
|
231
|
+
return simdjson::SUCCESS;
|
232
|
+
}
|
233
|
+
|
234
|
+
void VectorStore::finalize() {
|
235
|
+
// If already finalized, do nothing
|
236
|
+
if (is_finalized_.load(std::memory_order_acquire)) {
|
237
|
+
return;
|
238
|
+
}
|
239
|
+
|
240
|
+
// Get final count
|
241
|
+
size_t final_count = count_.load(std::memory_order_acquire);
|
242
|
+
|
243
|
+
// Normalize all embeddings (single-threaded, no races)
|
244
|
+
for (size_t i = 0; i < final_count; ++i) {
|
245
|
+
float* emb = entries_[i].embedding;
|
246
|
+
if (!emb) continue; // Skip uninitialized entries
|
247
|
+
|
248
|
+
float sum = 0.0f;
|
249
|
+
#pragma omp simd reduction(+:sum)
|
250
|
+
for (size_t j = 0; j < dim_; ++j) {
|
251
|
+
sum += emb[j] * emb[j];
|
252
|
+
}
|
253
|
+
|
254
|
+
if (sum > 1e-10f) { // Avoid division by zero
|
255
|
+
float inv_norm = 1.0f / std::sqrt(sum);
|
256
|
+
#pragma omp simd
|
257
|
+
for (size_t j = 0; j < dim_; ++j) {
|
258
|
+
emb[j] *= inv_norm;
|
259
|
+
}
|
260
|
+
}
|
261
|
+
}
|
262
|
+
|
263
|
+
// Ensure all threads see the normalized data
|
264
|
+
#pragma omp barrier
|
265
|
+
|
266
|
+
// Mark as finalized - this is the ONLY place this flag is set
|
267
|
+
is_finalized_.store(true, std::memory_order_seq_cst);
|
268
|
+
}
|
269
|
+
|
270
|
+
void VectorStore::normalize_all() {
|
271
|
+
finalize();
|
272
|
+
}
|
273
|
+
|
274
|
+
std::vector<std::pair<float, size_t>>
|
275
|
+
VectorStore::search(const float* query, size_t k) const {
|
276
|
+
// Exclusive lock: prevent overlapping OpenMP teams
|
277
|
+
// Since each search uses all threads via OpenMP, concurrent searches provide no benefit
|
278
|
+
std::unique_lock<std::shared_mutex> lock(search_mutex_);
|
279
|
+
|
280
|
+
// Search can ONLY run if finalized
|
281
|
+
if (!is_finalized_.load(std::memory_order_acquire)) {
|
282
|
+
return {};
|
283
|
+
}
|
284
|
+
|
285
|
+
size_t n = count_.load(std::memory_order_acquire);
|
286
|
+
if (n == 0 || k == 0) return {};
|
287
|
+
|
288
|
+
k = std::min(k, n); // Ensure k doesn't exceed count
|
289
|
+
|
290
|
+
|
291
|
+
// Always use per-thread heaps to avoid any shared memory races
|
292
|
+
const int num_threads = omp_get_max_threads();
|
293
|
+
std::vector<TopK> thread_heaps;
|
294
|
+
thread_heaps.reserve(num_threads);
|
295
|
+
for (int i = 0; i < num_threads; ++i) {
|
296
|
+
thread_heaps.emplace_back(k); // in-place construction, no copies
|
297
|
+
}
|
298
|
+
|
299
|
+
std::vector<std::pair<float,std::size_t>> result;
|
300
|
+
|
301
|
+
#pragma omp parallel
|
302
|
+
{
|
303
|
+
const int tid = omp_get_thread_num();
|
304
|
+
TopK& local_heap = thread_heaps[tid];
|
305
|
+
|
306
|
+
#pragma omp for // default barrier kept - ensures all threads finish before merge
|
307
|
+
for (int i = 0; i < static_cast<int>(n); ++i) {
|
308
|
+
float score = 0.0f;
|
309
|
+
const float* emb = entries_[i].embedding;
|
310
|
+
|
311
|
+
#pragma omp simd reduction(+:score)
|
312
|
+
for (size_t j = 0; j < dim_; ++j) {
|
313
|
+
score += emb[j] * query[j];
|
314
|
+
}
|
315
|
+
|
316
|
+
local_heap.push(score, i);
|
317
|
+
}
|
318
|
+
|
319
|
+
#pragma omp barrier
|
320
|
+
#pragma omp single
|
321
|
+
{
|
322
|
+
TopK final_heap(k);
|
323
|
+
for (auto& th : thread_heaps) final_heap.merge(th);
|
324
|
+
result = std::move(final_heap.heap);
|
325
|
+
}
|
326
|
+
}
|
327
|
+
|
328
|
+
std::sort(result.begin(), result.end(),
|
329
|
+
[](const auto& a, const auto& b) { return a.first > b.first; });
|
330
|
+
|
331
|
+
return result;
|
332
|
+
}
|
333
|
+
|
334
|
+
const VectorStore::Entry& VectorStore::get_entry(size_t idx) const {
|
335
|
+
return entries_[idx];
|
336
|
+
}
|
337
|
+
|
338
|
+
size_t VectorStore::size() const {
|
339
|
+
return count_.load(std::memory_order_acquire);
|
340
|
+
}
|
341
|
+
|
342
|
+
bool VectorStore::is_finalized() const {
|
343
|
+
return is_finalized_.load(std::memory_order_acquire);
|
344
|
+
}
|