native-vector-store 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/binding.cc ADDED
@@ -0,0 +1,151 @@
1
+ #include <napi.h>
2
+ #include "vector_store.h"
3
+ #include "vector_store_loader.h"
4
+ #include <cmath>
5
+
6
+ class VectorStoreWrapper : public Napi::ObjectWrap<VectorStoreWrapper> {
7
+ std::unique_ptr<VectorStore> store_;
8
+ size_t dim_;
9
+
10
+ public:
11
+ static Napi::Object Init(Napi::Env env, Napi::Object exports) {
12
+ Napi::Function func = DefineClass(env, "VectorStore", {
13
+ InstanceMethod("loadDir", &VectorStoreWrapper::LoadDir),
14
+ InstanceMethod("loadDirMMap", &VectorStoreWrapper::LoadDirMMap),
15
+ InstanceMethod("loadDirAdaptive", &VectorStoreWrapper::LoadDirAdaptive),
16
+ InstanceMethod("addDocument", &VectorStoreWrapper::AddDocument),
17
+ InstanceMethod("search", &VectorStoreWrapper::Search),
18
+ InstanceMethod("normalize", &VectorStoreWrapper::Normalize),
19
+ InstanceMethod("finalize", &VectorStoreWrapper::FinalizeStore),
20
+ InstanceMethod("isFinalized", &VectorStoreWrapper::IsFinalized),
21
+ InstanceMethod("size", &VectorStoreWrapper::Size)
22
+ });
23
+
24
+ exports.Set("VectorStore", func);
25
+ return exports;
26
+ }
27
+
28
+ VectorStoreWrapper(const Napi::CallbackInfo& info)
29
+ : Napi::ObjectWrap<VectorStoreWrapper>(info) {
30
+ dim_ = info[0].As<Napi::Number>().Uint32Value();
31
+ store_ = std::make_unique<VectorStore>(dim_);
32
+ }
33
+
34
+ void LoadDir(const Napi::CallbackInfo& info) {
35
+ std::string path = info[0].As<Napi::String>();
36
+ // Use adaptive loader as default for best performance
37
+ VectorStoreLoader::loadDirectoryAdaptive(store_.get(), path);
38
+ }
39
+
40
+ void LoadDirMMap(const Napi::CallbackInfo& info) {
41
+ std::string path = info[0].As<Napi::String>();
42
+ VectorStoreLoader::loadDirectoryMMap(store_.get(), path);
43
+ }
44
+
45
+ void LoadDirAdaptive(const Napi::CallbackInfo& info) {
46
+ std::string path = info[0].As<Napi::String>();
47
+ VectorStoreLoader::loadDirectoryAdaptive(store_.get(), path);
48
+ }
49
+
50
+ void AddDocument(const Napi::CallbackInfo& info) {
51
+ Napi::Object doc = info[0].As<Napi::Object>();
52
+
53
+ // Convert JS object to JSON string
54
+ std::string json_str = "{";
55
+ json_str += "\"id\":\"" + doc.Get("id").As<Napi::String>().Utf8Value() + "\",";
56
+ json_str += "\"text\":\"" + doc.Get("text").As<Napi::String>().Utf8Value() + "\",";
57
+ json_str += "\"metadata\":{\"embedding\":[";
58
+
59
+ // Get embedding from metadata
60
+ Napi::Object metadata = doc.Get("metadata").As<Napi::Object>();
61
+ Napi::Array embedding = metadata.Get("embedding").As<Napi::Array>();
62
+
63
+ for (uint32_t i = 0; i < embedding.Length(); ++i) {
64
+ if (i > 0) json_str += ",";
65
+ json_str += std::to_string(embedding.Get(i).As<Napi::Number>().DoubleValue());
66
+ }
67
+ json_str += "]}}";
68
+
69
+ // Parse and add
70
+ simdjson::ondemand::parser parser;
71
+ simdjson::padded_string padded(json_str);
72
+ simdjson::ondemand::document json_doc;
73
+ auto parse_error = parser.iterate(padded).get(json_doc);
74
+ if (parse_error) {
75
+ Napi::Error::New(info.Env(),
76
+ std::string("JSON parse error: ") + simdjson::error_message(parse_error))
77
+ .ThrowAsJavaScriptException();
78
+ return;
79
+ }
80
+
81
+ auto add_error = store_->add_document(json_doc);
82
+ if (add_error) {
83
+ Napi::Error::New(info.Env(),
84
+ std::string("Document add error: ") + simdjson::error_message(add_error))
85
+ .ThrowAsJavaScriptException();
86
+ return;
87
+ }
88
+ }
89
+
90
+ Napi::Value Search(const Napi::CallbackInfo& info) {
91
+ Napi::Env env = info.Env();
92
+ Napi::Float32Array query_array = info[0].As<Napi::Float32Array>();
93
+ size_t k = info[1].As<Napi::Number>().Uint32Value();
94
+
95
+ // Normalize query if requested
96
+ bool normalize_query = info.Length() > 2 ? info[2].As<Napi::Boolean>() : true;
97
+
98
+ std::vector<float> query(query_array.Data(),
99
+ query_array.Data() + query_array.ElementLength());
100
+
101
+ if (normalize_query) {
102
+ float sum = 0.0f;
103
+ for (float v : query) sum += v * v;
104
+ if (sum > 1e-10f) {
105
+ float inv_norm = 1.0f / std::sqrt(sum);
106
+ for (float& v : query) v *= inv_norm;
107
+ }
108
+ }
109
+
110
+ auto results = store_->search(query.data(), k);
111
+
112
+ Napi::Array output = Napi::Array::New(env, results.size());
113
+ for (size_t i = 0; i < results.size(); ++i) {
114
+ const auto& entry = store_->get_entry(results[i].second);
115
+
116
+ Napi::Object result = Napi::Object::New(env);
117
+ result.Set("score", results[i].first);
118
+ result.Set("id", std::string(entry.doc.id));
119
+ result.Set("text", std::string(entry.doc.text));
120
+
121
+ // For now, return metadata as JSON string
122
+ result.Set("metadata_json", std::string(entry.doc.metadata_json));
123
+
124
+ output[i] = result;
125
+ }
126
+
127
+ return output;
128
+ }
129
+
130
+ void Normalize(const Napi::CallbackInfo& info) {
131
+ store_->normalize_all();
132
+ }
133
+
134
+ void FinalizeStore(const Napi::CallbackInfo& info) {
135
+ store_->finalize();
136
+ }
137
+
138
+ Napi::Value IsFinalized(const Napi::CallbackInfo& info) {
139
+ return Napi::Boolean::New(info.Env(), store_->is_finalized());
140
+ }
141
+
142
+ Napi::Value Size(const Napi::CallbackInfo& info) {
143
+ return Napi::Number::New(info.Env(), store_->size());
144
+ }
145
+ };
146
+
147
+ Napi::Object Init(Napi::Env env, Napi::Object exports) {
148
+ return VectorStoreWrapper::Init(env, exports);
149
+ }
150
+
151
+ NODE_API_MODULE(vector_store, Init)
package/src/defs.h ADDED
@@ -0,0 +1,107 @@
1
+ /* -*- mode: c++; c-basic-offset: 4; indent-tabs-mode: nil; tab-width: 4 -*- */
2
+ #ifndef ATOMIC_QUEUE_DEFS_H_INCLUDED
3
+ #define ATOMIC_QUEUE_DEFS_H_INCLUDED
4
+
5
+ // Copyright (c) 2019 Maxim Egorushkin. MIT License. See the full licence in file LICENSE.
6
+
7
+ #include <atomic>
8
+
9
+ #if defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || defined(_M_IX86)
10
+ #include <emmintrin.h>
11
+ namespace atomic_queue {
12
+ constexpr int CACHE_LINE_SIZE = 64;
13
+ static inline void spin_loop_pause() noexcept {
14
+ _mm_pause();
15
+ }
16
+ } // namespace atomic_queue
17
+ #elif defined(__arm__) || defined(__aarch64__) || defined(_M_ARM64)
18
+ namespace atomic_queue {
19
+ constexpr int CACHE_LINE_SIZE = 64;
20
+ static inline void spin_loop_pause() noexcept {
21
+ #if (defined(__ARM_ARCH_6K__) || \
22
+ defined(__ARM_ARCH_6Z__) || \
23
+ defined(__ARM_ARCH_6ZK__) || \
24
+ defined(__ARM_ARCH_6T2__) || \
25
+ defined(__ARM_ARCH_7__) || \
26
+ defined(__ARM_ARCH_7A__) || \
27
+ defined(__ARM_ARCH_7R__) || \
28
+ defined(__ARM_ARCH_7M__) || \
29
+ defined(__ARM_ARCH_7S__) || \
30
+ defined(__ARM_ARCH_8A__) || \
31
+ defined(__aarch64__))
32
+ asm volatile ("yield" ::: "memory");
33
+ #elif defined(_M_ARM64)
34
+ __yield();
35
+ #else
36
+ asm volatile ("nop" ::: "memory");
37
+ #endif
38
+ }
39
+ } // namespace atomic_queue
40
+ #elif defined(__ppc64__) || defined(__powerpc64__)
41
+ namespace atomic_queue {
42
+ constexpr int CACHE_LINE_SIZE = 128; // TODO: Review that this is the correct value.
43
+ static inline void spin_loop_pause() noexcept {
44
+ asm volatile("or 31,31,31 # very low priority"); // TODO: Review and benchmark that this is the right instruction.
45
+ }
46
+ } // namespace atomic_queue
47
+ #elif defined(__s390x__)
48
+ namespace atomic_queue {
49
+ constexpr int CACHE_LINE_SIZE = 256; // TODO: Review that this is the correct value.
50
+ static inline void spin_loop_pause() noexcept {} // TODO: Find the right instruction to use here, if any.
51
+ } // namespace atomic_queue
52
+ #elif defined(__riscv)
53
+ namespace atomic_queue {
54
+ constexpr int CACHE_LINE_SIZE = 64;
55
+ static inline void spin_loop_pause() noexcept {
56
+ asm volatile (".insn i 0x0F, 0, x0, x0, 0x010");
57
+ }
58
+ } // namespace atomic_queue
59
+ #elif defined(__loongarch__)
60
+ namespace atomic_queue {
61
+ constexpr int CACHE_LINE_SIZE = 64;
62
+ static inline void spin_loop_pause() noexcept
63
+ {
64
+ asm volatile("nop \n nop \n nop \n nop \n nop \n nop \n nop \n nop");
65
+ }
66
+ } // namespace atomic_queue
67
+ #else
68
+ #ifdef _MSC_VER
69
+ #pragma message("Unknown CPU architecture. Using L1 cache line size of 64 bytes and no spinloop pause instruction.")
70
+ #else
71
+ #warning "Unknown CPU architecture. Using L1 cache line size of 64 bytes and no spinloop pause instruction."
72
+ #endif
73
+ namespace atomic_queue {
74
+ constexpr int CACHE_LINE_SIZE = 64; // TODO: Review that this is the correct value.
75
+ static inline void spin_loop_pause() noexcept {}
76
+ } // namespace atomic_queue
77
+ #endif
78
+
79
+ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
80
+
81
+ namespace atomic_queue {
82
+
83
+ #if defined(__GNUC__) || defined(__clang__)
84
+ #define ATOMIC_QUEUE_LIKELY(expr) __builtin_expect(static_cast<bool>(expr), 1)
85
+ #define ATOMIC_QUEUE_UNLIKELY(expr) __builtin_expect(static_cast<bool>(expr), 0)
86
+ #define ATOMIC_QUEUE_NOINLINE __attribute__((noinline))
87
+ #else
88
+ #define ATOMIC_QUEUE_LIKELY(expr) (expr)
89
+ #define ATOMIC_QUEUE_UNLIKELY(expr) (expr)
90
+ #define ATOMIC_QUEUE_NOINLINE
91
+ #endif
92
+
93
+ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
94
+
95
+ auto constexpr A = std::memory_order_acquire;
96
+ auto constexpr R = std::memory_order_release;
97
+ auto constexpr X = std::memory_order_relaxed;
98
+ auto constexpr C = std::memory_order_seq_cst;
99
+ auto constexpr AR = std::memory_order_acq_rel;
100
+
101
+ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
102
+
103
+ } // namespace atomic_queue
104
+
105
+ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
106
+
107
+ #endif // ATOMIC_QUEUE_DEFS_H_INCLUDED
@@ -0,0 +1,159 @@
1
+ #pragma once
2
+ #include <string>
3
+ #include <memory>
4
+
5
+ #ifdef _WIN32
6
+ #include <windows.h>
7
+ #else
8
+ #include <sys/mman.h>
9
+ #include <sys/stat.h>
10
+ #include <fcntl.h>
11
+ #include <unistd.h>
12
+ #endif
13
+
14
+ // Cross-platform memory-mapped file wrapper
15
+ class MMapFile {
16
+ public:
17
+ MMapFile() = default;
18
+ ~MMapFile() { close(); }
19
+
20
+ // Disable copy, enable move
21
+ MMapFile(const MMapFile&) = delete;
22
+ MMapFile& operator=(const MMapFile&) = delete;
23
+ MMapFile(MMapFile&& other) noexcept { *this = std::move(other); }
24
+ MMapFile& operator=(MMapFile&& other) noexcept {
25
+ if (this != &other) {
26
+ close();
27
+ data_ = other.data_;
28
+ size_ = other.size_;
29
+ #ifdef _WIN32
30
+ file_handle_ = other.file_handle_;
31
+ map_handle_ = other.map_handle_;
32
+ other.file_handle_ = INVALID_HANDLE_VALUE;
33
+ other.map_handle_ = nullptr;
34
+ #else
35
+ fd_ = other.fd_;
36
+ other.fd_ = -1;
37
+ #endif
38
+ other.data_ = nullptr;
39
+ other.size_ = 0;
40
+ }
41
+ return *this;
42
+ }
43
+
44
+ bool open(const std::string& filepath) {
45
+ close();
46
+
47
+ #ifdef _WIN32
48
+ // Windows implementation
49
+ file_handle_ = CreateFileA(filepath.c_str(), GENERIC_READ, FILE_SHARE_READ,
50
+ nullptr, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, nullptr);
51
+ if (file_handle_ == INVALID_HANDLE_VALUE) {
52
+ return false;
53
+ }
54
+
55
+ LARGE_INTEGER file_size;
56
+ if (!GetFileSizeEx(file_handle_, &file_size)) {
57
+ CloseHandle(file_handle_);
58
+ file_handle_ = INVALID_HANDLE_VALUE;
59
+ return false;
60
+ }
61
+ size_ = static_cast<size_t>(file_size.QuadPart);
62
+
63
+ if (size_ == 0) {
64
+ return true; // Empty file, nothing to map
65
+ }
66
+
67
+ map_handle_ = CreateFileMappingA(file_handle_, nullptr, PAGE_READONLY, 0, 0, nullptr);
68
+ if (!map_handle_) {
69
+ CloseHandle(file_handle_);
70
+ file_handle_ = INVALID_HANDLE_VALUE;
71
+ return false;
72
+ }
73
+
74
+ data_ = MapViewOfFile(map_handle_, FILE_MAP_READ, 0, 0, 0);
75
+ if (!data_) {
76
+ CloseHandle(map_handle_);
77
+ CloseHandle(file_handle_);
78
+ map_handle_ = nullptr;
79
+ file_handle_ = INVALID_HANDLE_VALUE;
80
+ return false;
81
+ }
82
+ #else
83
+ // POSIX implementation
84
+ fd_ = ::open(filepath.c_str(), O_RDONLY);
85
+ if (fd_ < 0) {
86
+ return false;
87
+ }
88
+
89
+ struct stat st;
90
+ if (fstat(fd_, &st) < 0) {
91
+ ::close(fd_);
92
+ fd_ = -1;
93
+ return false;
94
+ }
95
+ size_ = static_cast<size_t>(st.st_size);
96
+
97
+ if (size_ == 0) {
98
+ return true; // Empty file, nothing to map
99
+ }
100
+
101
+ data_ = mmap(nullptr, size_, PROT_READ, MAP_PRIVATE, fd_, 0);
102
+ if (data_ == MAP_FAILED) {
103
+ ::close(fd_);
104
+ fd_ = -1;
105
+ data_ = nullptr;
106
+ return false;
107
+ }
108
+
109
+ // Advise kernel about access pattern
110
+ madvise(data_, size_, MADV_SEQUENTIAL);
111
+ #endif
112
+
113
+ return true;
114
+ }
115
+
116
+ void close() {
117
+ if (data_) {
118
+ #ifdef _WIN32
119
+ UnmapViewOfFile(data_);
120
+ #else
121
+ munmap(data_, size_);
122
+ #endif
123
+ data_ = nullptr;
124
+ }
125
+
126
+ #ifdef _WIN32
127
+ if (map_handle_) {
128
+ CloseHandle(map_handle_);
129
+ map_handle_ = nullptr;
130
+ }
131
+ if (file_handle_ != INVALID_HANDLE_VALUE) {
132
+ CloseHandle(file_handle_);
133
+ file_handle_ = INVALID_HANDLE_VALUE;
134
+ }
135
+ #else
136
+ if (fd_ >= 0) {
137
+ ::close(fd_);
138
+ fd_ = -1;
139
+ }
140
+ #endif
141
+
142
+ size_ = 0;
143
+ }
144
+
145
+ const char* data() const { return static_cast<const char*>(data_); }
146
+ size_t size() const { return size_; }
147
+ bool is_open() const { return data_ != nullptr || size_ == 0; }
148
+
149
+ private:
150
+ void* data_ = nullptr;
151
+ size_t size_ = 0;
152
+
153
+ #ifdef _WIN32
154
+ HANDLE file_handle_ = INVALID_HANDLE_VALUE;
155
+ HANDLE map_handle_ = nullptr;
156
+ #else
157
+ int fd_ = -1;
158
+ #endif
159
+ };