native-vector-store 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "native-vector-store",
3
- "version": "0.1.0",
3
+ "version": "0.2.0",
4
4
  "description": "High-performance local vector store with SIMD optimization for MCP servers",
5
5
  "main": "index.js",
6
6
  "types": "lib/index.d.ts",
@@ -36,17 +36,36 @@
36
36
  "dependencies": {
37
37
  "dotenv": "^17.2.0",
38
38
  "node-addon-api": "^7.0.0",
39
- "node-gyp-build": "^4.8.0",
39
+ "node-gyp-build": "^4.8.4",
40
40
  "openai": "^5.8.3"
41
41
  },
42
+ "optionalDependencies": {
43
+ "@modelcontextprotocol/sdk": "^1.15.1"
44
+ },
42
45
  "devDependencies": {
43
46
  "node-gyp": "^10.0.0",
44
- "prebuildify": "^6.0.0"
47
+ "prebuildify": "^6.0.0",
48
+ "prebuildify-cross": "^5.0.0"
45
49
  },
46
- "os": ["darwin", "linux", "win32"],
47
- "cpu": ["x64", "arm64"],
50
+ "os": [
51
+ "darwin",
52
+ "linux",
53
+ "win32"
54
+ ],
55
+ "cpu": [
56
+ "x64",
57
+ "arm64"
58
+ ],
48
59
  "engines": {
49
60
  "node": ">=14.0.0"
50
61
  },
51
- "gypfile": true
62
+ "gypfile": true,
63
+ "files": [
64
+ "index.js",
65
+ "lib/",
66
+ "src/",
67
+ "deps/",
68
+ "binding.gyp",
69
+ "prebuilds/"
70
+ ]
52
71
  }
package/src/Makefile ADDED
@@ -0,0 +1,87 @@
1
+ # Detect OS
2
+ UNAME_S := $(shell uname -s)
3
+
4
+ # Compiler
5
+ ifeq ($(UNAME_S),Darwin)
6
+ CXX = clang++
7
+ else
8
+ CXX = g++
9
+ endif
10
+
11
+ # Base flags
12
+ CXXFLAGS = -std=c++17 -g -O0 -fno-omit-frame-pointer -DDEBUG
13
+
14
+ # OS-specific configuration
15
+ ifeq ($(UNAME_S),Darwin)
16
+ # macOS configuration
17
+ INCLUDES = -I. -I../deps -I/opt/homebrew/opt/libomp/include
18
+ LDFLAGS = -L/opt/homebrew/opt/libomp/lib
19
+ LIBS = -lomp
20
+ # Add flags for better debugging with lldb
21
+ CXXFLAGS += -glldb -gdwarf-4
22
+ # OpenMP flags for macOS
23
+ CXXFLAGS += -Xpreprocessor -fopenmp
24
+ else
25
+ # Linux configuration
26
+ INCLUDES = -I. -I../deps
27
+ LDFLAGS = -L/usr/lib
28
+ LIBS = -lgomp
29
+ # OpenMP flags for Linux
30
+ CXXFLAGS += -fopenmp
31
+ endif
32
+
33
+ TARGET = test_vector_store
34
+ STRESS_TARGET = test_stress
35
+ SOURCES = test_main.cpp vector_store.cpp ../deps/simdjson.cpp
36
+ STRESS_SOURCES = test_stress.cpp vector_store.cpp vector_store_loader.cpp vector_store_loader_mmap.cpp vector_store_loader_adaptive.cpp ../deps/simdjson.cpp
37
+ OBJECTS = $(SOURCES:.cpp=.o)
38
+ STRESS_OBJECTS = $(STRESS_SOURCES:.cpp=.o)
39
+
40
+ all: $(TARGET) $(STRESS_TARGET)
41
+
42
+ $(TARGET): $(OBJECTS)
43
+ $(CXX) $(CXXFLAGS) $(OBJECTS) -o $(TARGET) $(LDFLAGS) $(LIBS)
44
+
45
+ $(STRESS_TARGET): $(STRESS_OBJECTS)
46
+ $(CXX) $(CXXFLAGS) $(STRESS_OBJECTS) -o $(STRESS_TARGET) $(LDFLAGS) $(LIBS)
47
+
48
+ %.o: %.cpp
49
+ $(CXX) $(CXXFLAGS) $(INCLUDES) -c $< -o $@
50
+
51
+ clean:
52
+ rm -f $(OBJECTS) $(STRESS_OBJECTS) $(TARGET) $(STRESS_TARGET)
53
+
54
+ # Force rebuild (useful for CI)
55
+ rebuild: clean all
56
+
57
+ run: $(TARGET)
58
+ ./$(TARGET) ../test
59
+
60
+ debug: $(TARGET)
61
+ lldb ./$(TARGET)
62
+
63
+ # Sanitizer builds
64
+ test-asan: CXXFLAGS += -fsanitize=address -fno-sanitize-recover=all
65
+ test-asan: LDFLAGS += -fsanitize=address
66
+ test-asan: clean $(STRESS_TARGET)
67
+ ./$(STRESS_TARGET)
68
+
69
+ test-tsan: CXXFLAGS += -fsanitize=thread -fno-sanitize-recover=all
70
+ test-tsan: LDFLAGS += -fsanitize=thread
71
+ test-tsan: clean $(STRESS_TARGET)
72
+ ./$(STRESS_TARGET)
73
+
74
+ # Run stress tests with sanitizers by default
75
+ # Use SANITIZER=none to disable, SANITIZER=thread for TSan
76
+ SANITIZER ?= address
77
+
78
+ ifeq ($(SANITIZER),address)
79
+ stress: test-asan
80
+ else ifeq ($(SANITIZER),thread)
81
+ stress: test-tsan
82
+ else
83
+ stress: $(STRESS_TARGET)
84
+ ./$(STRESS_TARGET)
85
+ endif
86
+
87
+ .PHONY: all clean run debug test-asan test-tsan stress
@@ -0,0 +1,173 @@
1
+ #include "vector_store.h"
2
+ #include <iostream>
3
+ #include <filesystem>
4
+ #include <vector>
5
+ #include <simdjson.h>
6
+ #include <cctype>
7
+
8
+ void test_single_document() {
9
+ std::cout << "=== Testing Single Document ===" << std::endl;
10
+
11
+ VectorStore store(20);
12
+
13
+ // Create a test document manually
14
+ std::string json_str = R"({
15
+ "id": "test1",
16
+ "text": "Test document for debugging",
17
+ "metadata": {
18
+ "embedding": [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0,
19
+ 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0],
20
+ "category": "test"
21
+ }
22
+ })";
23
+
24
+ try {
25
+ simdjson::ondemand::parser parser;
26
+ simdjson::padded_string padded(json_str);
27
+ simdjson::ondemand::document doc;
28
+ auto parse_error = parser.iterate(padded).get(doc);
29
+ if (parse_error) {
30
+ std::cerr << "JSON parse error: " << simdjson::error_message(parse_error) << std::endl;
31
+ return;
32
+ }
33
+
34
+ std::cout << "Adding document..." << std::endl;
35
+ auto add_error = store.add_document(doc);
36
+ if (add_error) {
37
+ std::cerr << "Document add error: " << simdjson::error_message(add_error) << std::endl;
38
+ return;
39
+ }
40
+ std::cout << "Document added successfully. Store size: " << store.size() << std::endl;
41
+
42
+ // Test search
43
+ float query[20] = {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0,
44
+ 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0};
45
+
46
+ auto results = store.search(query, 1);
47
+ std::cout << "Search completed. Found " << results.size() << " results" << std::endl;
48
+ if (!results.empty()) {
49
+ std::cout << "Top result score: " << results[0].first << std::endl;
50
+ }
51
+
52
+ } catch (const std::exception& e) {
53
+ std::cerr << "Error: " << e.what() << std::endl;
54
+ }
55
+ }
56
+
57
+ void test_load_directory(const std::string& path) {
58
+ std::cout << "\n=== Testing Load Directory ===" << std::endl;
59
+ std::cout << "Loading from: " << path << std::endl;
60
+
61
+ VectorStore store(20);
62
+
63
+ try {
64
+ // Collect JSON files
65
+ std::vector<std::filesystem::path> json_files;
66
+ for (const auto& entry : std::filesystem::directory_iterator(path)) {
67
+ if (entry.path().extension() == ".json") {
68
+ std::cout << "Found JSON file: " << entry.path() << std::endl;
69
+ json_files.push_back(entry.path());
70
+ }
71
+ }
72
+
73
+ std::cout << "Total JSON files found: " << json_files.size() << std::endl;
74
+
75
+ // Process files
76
+ simdjson::ondemand::parser file_parser;
77
+ for (size_t i = 0; i < json_files.size(); ++i) {
78
+ std::cout << "Processing file " << (i+1) << "/" << json_files.size()
79
+ << ": " << json_files[i].filename() << std::endl;
80
+
81
+ try {
82
+ std::cout << " Loading file..." << std::endl;
83
+ auto json = simdjson::padded_string::load(json_files[i].string()).value_unsafe();
84
+ std::cout << " File loaded, size: " << json.size() << " bytes" << std::endl;
85
+
86
+ std::cout << " Parsing JSON..." << std::endl;
87
+ auto json_doc = file_parser.iterate(json).value_unsafe();
88
+ std::cout << " JSON parsed successfully" << std::endl;
89
+
90
+ // Check first character to determine if it's an array
91
+ const char* json_start = json.data();
92
+ while (json_start && *json_start && std::isspace(*json_start)) {
93
+ json_start++;
94
+ }
95
+
96
+ if (json_start && *json_start == '[') {
97
+ // It's an array
98
+ std::cout << " Detected array of documents" << std::endl;
99
+ simdjson::ondemand::array doc_array;
100
+ auto array_error = json_doc.get_array().get(doc_array);
101
+ if (array_error) {
102
+ std::cerr << " Error getting array: " << simdjson::error_message(array_error) << std::endl;
103
+ continue;
104
+ }
105
+
106
+ size_t doc_count = 0;
107
+ size_t error_count = 0;
108
+ for (auto doc_element : doc_array) {
109
+ simdjson::ondemand::object doc_obj;
110
+ auto obj_error = doc_element.get_object().get(doc_obj);
111
+ if (obj_error) {
112
+ std::cerr << " Error getting object from array element: " << simdjson::error_message(obj_error) << std::endl;
113
+ error_count++;
114
+ continue;
115
+ }
116
+
117
+ auto add_error = store.add_document(doc_obj);
118
+ if (add_error) {
119
+ std::cerr << " Error adding document: " << simdjson::error_message(add_error) << std::endl;
120
+ error_count++;
121
+ } else {
122
+ doc_count++;
123
+ }
124
+ }
125
+ std::cout << " Added " << doc_count << " documents";
126
+ if (error_count > 0) {
127
+ std::cout << " (with " << error_count << " errors)";
128
+ }
129
+ std::cout << ". Current store size: " << store.size() << std::endl;
130
+ } else {
131
+ // Single document
132
+ std::cout << " Detected single document" << std::endl;
133
+ std::cout << " Adding to store..." << std::endl;
134
+ auto add_error = store.add_document(json_doc);
135
+ if (add_error) {
136
+ std::cerr << " Error adding document: " << simdjson::error_message(add_error) << std::endl;
137
+ } else {
138
+ std::cout << " Document added successfully";
139
+ }
140
+ std::cout << ". Current store size: " << store.size() << std::endl;
141
+ }
142
+
143
+ } catch (const std::exception& e) {
144
+ std::cerr << " Error processing file: " << e.what() << std::endl;
145
+ }
146
+ }
147
+
148
+ std::cout << "\nAll files processed. Final store size: " << store.size() << std::endl;
149
+
150
+ // Test normalization
151
+ std::cout << "Testing normalization..." << std::endl;
152
+ store.normalize_all();
153
+ std::cout << "Normalization completed" << std::endl;
154
+
155
+ } catch (const std::exception& e) {
156
+ std::cerr << "Error in load directory: " << e.what() << std::endl;
157
+ }
158
+ }
159
+
160
+ int main(int argc, char** argv) {
161
+ std::cout << "Vector Store C++ Test Program" << std::endl;
162
+ std::cout << "=============================" << std::endl;
163
+
164
+ // Test single document first
165
+ test_single_document();
166
+
167
+ // Test directory loading
168
+ std::string test_dir = (argc > 1) ? argv[1] : "test";
169
+ test_load_directory(test_dir);
170
+
171
+ std::cout << "\nAll tests completed!" << std::endl;
172
+ return 0;
173
+ }