native-vector-store 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +10 -3
- package/binding.gyp +21 -10
- package/deps/simdjson.cpp +56403 -0
- package/deps/simdjson.h +123534 -0
- package/package.json +25 -6
- package/prebuilds/darwin-arm64/native-vector-store.node +0 -0
- package/prebuilds/darwin-x64/native-vector-store.node +0 -0
- package/prebuilds/linux-arm64/native-vector-store.node +0 -0
- package/prebuilds/linux-x64/native-vector-store.node +0 -0
- package/prebuilds/linux-x64-musl/napi-v9/native-vector-store.node +0 -0
- package/prebuilds/linux-x64-musl/native-vector-store.node +0 -0
- package/prebuilds/win32-x64/native-vector-store.node +0 -0
- package/src/Makefile +87 -0
- package/src/test_main.cpp +173 -0
- package/src/test_stress.cpp +394 -0
- package/src/vector_store.cpp +344 -0
- package/src/vector_store.h +21 -323
- package/native-vector-store-0.1.0.tgz +0 -0
- package/scripts/build-prebuilds.sh +0 -23
package/package.json
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
{
|
2
2
|
"name": "native-vector-store",
|
3
|
-
"version": "0.
|
3
|
+
"version": "0.2.0",
|
4
4
|
"description": "High-performance local vector store with SIMD optimization for MCP servers",
|
5
5
|
"main": "index.js",
|
6
6
|
"types": "lib/index.d.ts",
|
@@ -36,17 +36,36 @@
|
|
36
36
|
"dependencies": {
|
37
37
|
"dotenv": "^17.2.0",
|
38
38
|
"node-addon-api": "^7.0.0",
|
39
|
-
"node-gyp-build": "^4.8.
|
39
|
+
"node-gyp-build": "^4.8.4",
|
40
40
|
"openai": "^5.8.3"
|
41
41
|
},
|
42
|
+
"optionalDependencies": {
|
43
|
+
"@modelcontextprotocol/sdk": "^1.15.1"
|
44
|
+
},
|
42
45
|
"devDependencies": {
|
43
46
|
"node-gyp": "^10.0.0",
|
44
|
-
"prebuildify": "^6.0.0"
|
47
|
+
"prebuildify": "^6.0.0",
|
48
|
+
"prebuildify-cross": "^5.0.0"
|
45
49
|
},
|
46
|
-
"os": [
|
47
|
-
|
50
|
+
"os": [
|
51
|
+
"darwin",
|
52
|
+
"linux",
|
53
|
+
"win32"
|
54
|
+
],
|
55
|
+
"cpu": [
|
56
|
+
"x64",
|
57
|
+
"arm64"
|
58
|
+
],
|
48
59
|
"engines": {
|
49
60
|
"node": ">=14.0.0"
|
50
61
|
},
|
51
|
-
"gypfile": true
|
62
|
+
"gypfile": true,
|
63
|
+
"files": [
|
64
|
+
"index.js",
|
65
|
+
"lib/",
|
66
|
+
"src/",
|
67
|
+
"deps/",
|
68
|
+
"binding.gyp",
|
69
|
+
"prebuilds/"
|
70
|
+
]
|
52
71
|
}
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
package/src/Makefile
ADDED
@@ -0,0 +1,87 @@
|
|
1
|
+
# Detect OS
|
2
|
+
UNAME_S := $(shell uname -s)
|
3
|
+
|
4
|
+
# Compiler
|
5
|
+
ifeq ($(UNAME_S),Darwin)
|
6
|
+
CXX = clang++
|
7
|
+
else
|
8
|
+
CXX = g++
|
9
|
+
endif
|
10
|
+
|
11
|
+
# Base flags
|
12
|
+
CXXFLAGS = -std=c++17 -g -O0 -fno-omit-frame-pointer -DDEBUG
|
13
|
+
|
14
|
+
# OS-specific configuration
|
15
|
+
ifeq ($(UNAME_S),Darwin)
|
16
|
+
# macOS configuration
|
17
|
+
INCLUDES = -I. -I../deps -I/opt/homebrew/opt/libomp/include
|
18
|
+
LDFLAGS = -L/opt/homebrew/opt/libomp/lib
|
19
|
+
LIBS = -lomp
|
20
|
+
# Add flags for better debugging with lldb
|
21
|
+
CXXFLAGS += -glldb -gdwarf-4
|
22
|
+
# OpenMP flags for macOS
|
23
|
+
CXXFLAGS += -Xpreprocessor -fopenmp
|
24
|
+
else
|
25
|
+
# Linux configuration
|
26
|
+
INCLUDES = -I. -I../deps
|
27
|
+
LDFLAGS = -L/usr/lib
|
28
|
+
LIBS = -lgomp
|
29
|
+
# OpenMP flags for Linux
|
30
|
+
CXXFLAGS += -fopenmp
|
31
|
+
endif
|
32
|
+
|
33
|
+
TARGET = test_vector_store
|
34
|
+
STRESS_TARGET = test_stress
|
35
|
+
SOURCES = test_main.cpp vector_store.cpp ../deps/simdjson.cpp
|
36
|
+
STRESS_SOURCES = test_stress.cpp vector_store.cpp vector_store_loader.cpp vector_store_loader_mmap.cpp vector_store_loader_adaptive.cpp ../deps/simdjson.cpp
|
37
|
+
OBJECTS = $(SOURCES:.cpp=.o)
|
38
|
+
STRESS_OBJECTS = $(STRESS_SOURCES:.cpp=.o)
|
39
|
+
|
40
|
+
all: $(TARGET) $(STRESS_TARGET)
|
41
|
+
|
42
|
+
$(TARGET): $(OBJECTS)
|
43
|
+
$(CXX) $(CXXFLAGS) $(OBJECTS) -o $(TARGET) $(LDFLAGS) $(LIBS)
|
44
|
+
|
45
|
+
$(STRESS_TARGET): $(STRESS_OBJECTS)
|
46
|
+
$(CXX) $(CXXFLAGS) $(STRESS_OBJECTS) -o $(STRESS_TARGET) $(LDFLAGS) $(LIBS)
|
47
|
+
|
48
|
+
%.o: %.cpp
|
49
|
+
$(CXX) $(CXXFLAGS) $(INCLUDES) -c $< -o $@
|
50
|
+
|
51
|
+
clean:
|
52
|
+
rm -f $(OBJECTS) $(STRESS_OBJECTS) $(TARGET) $(STRESS_TARGET)
|
53
|
+
|
54
|
+
# Force rebuild (useful for CI)
|
55
|
+
rebuild: clean all
|
56
|
+
|
57
|
+
run: $(TARGET)
|
58
|
+
./$(TARGET) ../test
|
59
|
+
|
60
|
+
debug: $(TARGET)
|
61
|
+
lldb ./$(TARGET)
|
62
|
+
|
63
|
+
# Sanitizer builds
|
64
|
+
test-asan: CXXFLAGS += -fsanitize=address -fno-sanitize-recover=all
|
65
|
+
test-asan: LDFLAGS += -fsanitize=address
|
66
|
+
test-asan: clean $(STRESS_TARGET)
|
67
|
+
./$(STRESS_TARGET)
|
68
|
+
|
69
|
+
test-tsan: CXXFLAGS += -fsanitize=thread -fno-sanitize-recover=all
|
70
|
+
test-tsan: LDFLAGS += -fsanitize=thread
|
71
|
+
test-tsan: clean $(STRESS_TARGET)
|
72
|
+
./$(STRESS_TARGET)
|
73
|
+
|
74
|
+
# Run stress tests with sanitizers by default
|
75
|
+
# Use SANITIZER=none to disable, SANITIZER=thread for TSan
|
76
|
+
SANITIZER ?= address
|
77
|
+
|
78
|
+
ifeq ($(SANITIZER),address)
|
79
|
+
stress: test-asan
|
80
|
+
else ifeq ($(SANITIZER),thread)
|
81
|
+
stress: test-tsan
|
82
|
+
else
|
83
|
+
stress: $(STRESS_TARGET)
|
84
|
+
./$(STRESS_TARGET)
|
85
|
+
endif
|
86
|
+
|
87
|
+
.PHONY: all clean run debug test-asan test-tsan stress
|
@@ -0,0 +1,173 @@
|
|
1
|
+
#include "vector_store.h"
|
2
|
+
#include <iostream>
|
3
|
+
#include <filesystem>
|
4
|
+
#include <vector>
|
5
|
+
#include <simdjson.h>
|
6
|
+
#include <cctype>
|
7
|
+
|
8
|
+
void test_single_document() {
|
9
|
+
std::cout << "=== Testing Single Document ===" << std::endl;
|
10
|
+
|
11
|
+
VectorStore store(20);
|
12
|
+
|
13
|
+
// Create a test document manually
|
14
|
+
std::string json_str = R"({
|
15
|
+
"id": "test1",
|
16
|
+
"text": "Test document for debugging",
|
17
|
+
"metadata": {
|
18
|
+
"embedding": [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0,
|
19
|
+
0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0],
|
20
|
+
"category": "test"
|
21
|
+
}
|
22
|
+
})";
|
23
|
+
|
24
|
+
try {
|
25
|
+
simdjson::ondemand::parser parser;
|
26
|
+
simdjson::padded_string padded(json_str);
|
27
|
+
simdjson::ondemand::document doc;
|
28
|
+
auto parse_error = parser.iterate(padded).get(doc);
|
29
|
+
if (parse_error) {
|
30
|
+
std::cerr << "JSON parse error: " << simdjson::error_message(parse_error) << std::endl;
|
31
|
+
return;
|
32
|
+
}
|
33
|
+
|
34
|
+
std::cout << "Adding document..." << std::endl;
|
35
|
+
auto add_error = store.add_document(doc);
|
36
|
+
if (add_error) {
|
37
|
+
std::cerr << "Document add error: " << simdjson::error_message(add_error) << std::endl;
|
38
|
+
return;
|
39
|
+
}
|
40
|
+
std::cout << "Document added successfully. Store size: " << store.size() << std::endl;
|
41
|
+
|
42
|
+
// Test search
|
43
|
+
float query[20] = {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0,
|
44
|
+
0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0};
|
45
|
+
|
46
|
+
auto results = store.search(query, 1);
|
47
|
+
std::cout << "Search completed. Found " << results.size() << " results" << std::endl;
|
48
|
+
if (!results.empty()) {
|
49
|
+
std::cout << "Top result score: " << results[0].first << std::endl;
|
50
|
+
}
|
51
|
+
|
52
|
+
} catch (const std::exception& e) {
|
53
|
+
std::cerr << "Error: " << e.what() << std::endl;
|
54
|
+
}
|
55
|
+
}
|
56
|
+
|
57
|
+
void test_load_directory(const std::string& path) {
|
58
|
+
std::cout << "\n=== Testing Load Directory ===" << std::endl;
|
59
|
+
std::cout << "Loading from: " << path << std::endl;
|
60
|
+
|
61
|
+
VectorStore store(20);
|
62
|
+
|
63
|
+
try {
|
64
|
+
// Collect JSON files
|
65
|
+
std::vector<std::filesystem::path> json_files;
|
66
|
+
for (const auto& entry : std::filesystem::directory_iterator(path)) {
|
67
|
+
if (entry.path().extension() == ".json") {
|
68
|
+
std::cout << "Found JSON file: " << entry.path() << std::endl;
|
69
|
+
json_files.push_back(entry.path());
|
70
|
+
}
|
71
|
+
}
|
72
|
+
|
73
|
+
std::cout << "Total JSON files found: " << json_files.size() << std::endl;
|
74
|
+
|
75
|
+
// Process files
|
76
|
+
simdjson::ondemand::parser file_parser;
|
77
|
+
for (size_t i = 0; i < json_files.size(); ++i) {
|
78
|
+
std::cout << "Processing file " << (i+1) << "/" << json_files.size()
|
79
|
+
<< ": " << json_files[i].filename() << std::endl;
|
80
|
+
|
81
|
+
try {
|
82
|
+
std::cout << " Loading file..." << std::endl;
|
83
|
+
auto json = simdjson::padded_string::load(json_files[i].string()).value_unsafe();
|
84
|
+
std::cout << " File loaded, size: " << json.size() << " bytes" << std::endl;
|
85
|
+
|
86
|
+
std::cout << " Parsing JSON..." << std::endl;
|
87
|
+
auto json_doc = file_parser.iterate(json).value_unsafe();
|
88
|
+
std::cout << " JSON parsed successfully" << std::endl;
|
89
|
+
|
90
|
+
// Check first character to determine if it's an array
|
91
|
+
const char* json_start = json.data();
|
92
|
+
while (json_start && *json_start && std::isspace(*json_start)) {
|
93
|
+
json_start++;
|
94
|
+
}
|
95
|
+
|
96
|
+
if (json_start && *json_start == '[') {
|
97
|
+
// It's an array
|
98
|
+
std::cout << " Detected array of documents" << std::endl;
|
99
|
+
simdjson::ondemand::array doc_array;
|
100
|
+
auto array_error = json_doc.get_array().get(doc_array);
|
101
|
+
if (array_error) {
|
102
|
+
std::cerr << " Error getting array: " << simdjson::error_message(array_error) << std::endl;
|
103
|
+
continue;
|
104
|
+
}
|
105
|
+
|
106
|
+
size_t doc_count = 0;
|
107
|
+
size_t error_count = 0;
|
108
|
+
for (auto doc_element : doc_array) {
|
109
|
+
simdjson::ondemand::object doc_obj;
|
110
|
+
auto obj_error = doc_element.get_object().get(doc_obj);
|
111
|
+
if (obj_error) {
|
112
|
+
std::cerr << " Error getting object from array element: " << simdjson::error_message(obj_error) << std::endl;
|
113
|
+
error_count++;
|
114
|
+
continue;
|
115
|
+
}
|
116
|
+
|
117
|
+
auto add_error = store.add_document(doc_obj);
|
118
|
+
if (add_error) {
|
119
|
+
std::cerr << " Error adding document: " << simdjson::error_message(add_error) << std::endl;
|
120
|
+
error_count++;
|
121
|
+
} else {
|
122
|
+
doc_count++;
|
123
|
+
}
|
124
|
+
}
|
125
|
+
std::cout << " Added " << doc_count << " documents";
|
126
|
+
if (error_count > 0) {
|
127
|
+
std::cout << " (with " << error_count << " errors)";
|
128
|
+
}
|
129
|
+
std::cout << ". Current store size: " << store.size() << std::endl;
|
130
|
+
} else {
|
131
|
+
// Single document
|
132
|
+
std::cout << " Detected single document" << std::endl;
|
133
|
+
std::cout << " Adding to store..." << std::endl;
|
134
|
+
auto add_error = store.add_document(json_doc);
|
135
|
+
if (add_error) {
|
136
|
+
std::cerr << " Error adding document: " << simdjson::error_message(add_error) << std::endl;
|
137
|
+
} else {
|
138
|
+
std::cout << " Document added successfully";
|
139
|
+
}
|
140
|
+
std::cout << ". Current store size: " << store.size() << std::endl;
|
141
|
+
}
|
142
|
+
|
143
|
+
} catch (const std::exception& e) {
|
144
|
+
std::cerr << " Error processing file: " << e.what() << std::endl;
|
145
|
+
}
|
146
|
+
}
|
147
|
+
|
148
|
+
std::cout << "\nAll files processed. Final store size: " << store.size() << std::endl;
|
149
|
+
|
150
|
+
// Test normalization
|
151
|
+
std::cout << "Testing normalization..." << std::endl;
|
152
|
+
store.normalize_all();
|
153
|
+
std::cout << "Normalization completed" << std::endl;
|
154
|
+
|
155
|
+
} catch (const std::exception& e) {
|
156
|
+
std::cerr << "Error in load directory: " << e.what() << std::endl;
|
157
|
+
}
|
158
|
+
}
|
159
|
+
|
160
|
+
int main(int argc, char** argv) {
|
161
|
+
std::cout << "Vector Store C++ Test Program" << std::endl;
|
162
|
+
std::cout << "=============================" << std::endl;
|
163
|
+
|
164
|
+
// Test single document first
|
165
|
+
test_single_document();
|
166
|
+
|
167
|
+
// Test directory loading
|
168
|
+
std::string test_dir = (argc > 1) ? argv[1] : "test";
|
169
|
+
test_load_directory(test_dir);
|
170
|
+
|
171
|
+
std::cout << "\nAll tests completed!" << std::endl;
|
172
|
+
return 0;
|
173
|
+
}
|