native-vector-store 0.1.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. package/README.md +242 -12
  2. package/binding.gyp +22 -10
  3. package/deps/simdjson/simdjson.cpp +56403 -0
  4. package/deps/simdjson/simdjson.h +123534 -0
  5. package/docs/PERFORMANCE_CASE_STUDY.md +130 -0
  6. package/docs/PREBUILDS.md +69 -0
  7. package/docs/VectorStore.html +180 -0
  8. package/docs/VectorStoreWrapper.html +1356 -0
  9. package/docs/fonts/OpenSans-Bold-webfont.eot +0 -0
  10. package/docs/fonts/OpenSans-Bold-webfont.svg +1830 -0
  11. package/docs/fonts/OpenSans-Bold-webfont.woff +0 -0
  12. package/docs/fonts/OpenSans-BoldItalic-webfont.eot +0 -0
  13. package/docs/fonts/OpenSans-BoldItalic-webfont.svg +1830 -0
  14. package/docs/fonts/OpenSans-BoldItalic-webfont.woff +0 -0
  15. package/docs/fonts/OpenSans-Italic-webfont.eot +0 -0
  16. package/docs/fonts/OpenSans-Italic-webfont.svg +1830 -0
  17. package/docs/fonts/OpenSans-Italic-webfont.woff +0 -0
  18. package/docs/fonts/OpenSans-Light-webfont.eot +0 -0
  19. package/docs/fonts/OpenSans-Light-webfont.svg +1831 -0
  20. package/docs/fonts/OpenSans-Light-webfont.woff +0 -0
  21. package/docs/fonts/OpenSans-LightItalic-webfont.eot +0 -0
  22. package/docs/fonts/OpenSans-LightItalic-webfont.svg +1835 -0
  23. package/docs/fonts/OpenSans-LightItalic-webfont.woff +0 -0
  24. package/docs/fonts/OpenSans-Regular-webfont.eot +0 -0
  25. package/docs/fonts/OpenSans-Regular-webfont.svg +1831 -0
  26. package/docs/fonts/OpenSans-Regular-webfont.woff +0 -0
  27. package/docs/global.html +561 -0
  28. package/docs/index.html +570 -0
  29. package/docs/scripts/linenumber.js +25 -0
  30. package/docs/scripts/prettify/Apache-License-2.0.txt +202 -0
  31. package/docs/scripts/prettify/lang-css.js +2 -0
  32. package/docs/scripts/prettify/prettify.js +28 -0
  33. package/docs/styles/jsdoc-default.css +358 -0
  34. package/docs/styles/prettify-jsdoc.css +111 -0
  35. package/docs/styles/prettify-tomorrow.css +132 -0
  36. package/index.js +162 -0
  37. package/package.json +30 -7
  38. package/prebuilds/darwin-arm64/native-vector-store.node +0 -0
  39. package/prebuilds/darwin-x64/native-vector-store.node +0 -0
  40. package/prebuilds/linux-arm64/native-vector-store.node +0 -0
  41. package/prebuilds/linux-x64/native-vector-store.node +0 -0
  42. package/prebuilds/linux-x64-musl/napi-v9/native-vector-store.node +0 -0
  43. package/prebuilds/linux-x64-musl/native-vector-store.node +0 -0
  44. package/prebuilds/win32-x64/native-vector-store.node +0 -0
  45. package/src/Makefile +87 -0
  46. package/src/test_main.cpp +173 -0
  47. package/src/test_stress.cpp +394 -0
  48. package/src/vector_store.cpp +344 -0
  49. package/src/vector_store.h +21 -323
  50. package/native-vector-store-0.1.0.tgz +0 -0
  51. package/scripts/build-prebuilds.sh +0 -23
  52. /package/{src → deps/atomic_queue}/atomic_queue.h +0 -0
  53. /package/{src → deps/atomic_queue}/defs.h +0 -0
@@ -0,0 +1,111 @@
1
+ /* JSDoc prettify.js theme */
2
+
3
+ /* plain text */
4
+ .pln {
5
+ color: #000000;
6
+ font-weight: normal;
7
+ font-style: normal;
8
+ }
9
+
10
+ /* string content */
11
+ .str {
12
+ color: #006400;
13
+ font-weight: normal;
14
+ font-style: normal;
15
+ }
16
+
17
+ /* a keyword */
18
+ .kwd {
19
+ color: #000000;
20
+ font-weight: bold;
21
+ font-style: normal;
22
+ }
23
+
24
+ /* a comment */
25
+ .com {
26
+ font-weight: normal;
27
+ font-style: italic;
28
+ }
29
+
30
+ /* a type name */
31
+ .typ {
32
+ color: #000000;
33
+ font-weight: normal;
34
+ font-style: normal;
35
+ }
36
+
37
+ /* a literal value */
38
+ .lit {
39
+ color: #006400;
40
+ font-weight: normal;
41
+ font-style: normal;
42
+ }
43
+
44
+ /* punctuation */
45
+ .pun {
46
+ color: #000000;
47
+ font-weight: bold;
48
+ font-style: normal;
49
+ }
50
+
51
+ /* lisp open bracket */
52
+ .opn {
53
+ color: #000000;
54
+ font-weight: bold;
55
+ font-style: normal;
56
+ }
57
+
58
+ /* lisp close bracket */
59
+ .clo {
60
+ color: #000000;
61
+ font-weight: bold;
62
+ font-style: normal;
63
+ }
64
+
65
+ /* a markup tag name */
66
+ .tag {
67
+ color: #006400;
68
+ font-weight: normal;
69
+ font-style: normal;
70
+ }
71
+
72
+ /* a markup attribute name */
73
+ .atn {
74
+ color: #006400;
75
+ font-weight: normal;
76
+ font-style: normal;
77
+ }
78
+
79
+ /* a markup attribute value */
80
+ .atv {
81
+ color: #006400;
82
+ font-weight: normal;
83
+ font-style: normal;
84
+ }
85
+
86
+ /* a declaration */
87
+ .dec {
88
+ color: #000000;
89
+ font-weight: bold;
90
+ font-style: normal;
91
+ }
92
+
93
+ /* a variable name */
94
+ .var {
95
+ color: #000000;
96
+ font-weight: normal;
97
+ font-style: normal;
98
+ }
99
+
100
+ /* a function name */
101
+ .fun {
102
+ color: #000000;
103
+ font-weight: bold;
104
+ font-style: normal;
105
+ }
106
+
107
+ /* Specify class=linenums on a pre to get line numbering */
108
+ ol.linenums {
109
+ margin-top: 0;
110
+ margin-bottom: 0;
111
+ }
@@ -0,0 +1,132 @@
1
+ /* Tomorrow Theme */
2
+ /* Original theme - https://github.com/chriskempson/tomorrow-theme */
3
+ /* Pretty printing styles. Used with prettify.js. */
4
+ /* SPAN elements with the classes below are added by prettyprint. */
5
+ /* plain text */
6
+ .pln {
7
+ color: #4d4d4c; }
8
+
9
+ @media screen {
10
+ /* string content */
11
+ .str {
12
+ color: #718c00; }
13
+
14
+ /* a keyword */
15
+ .kwd {
16
+ color: #8959a8; }
17
+
18
+ /* a comment */
19
+ .com {
20
+ color: #8e908c; }
21
+
22
+ /* a type name */
23
+ .typ {
24
+ color: #4271ae; }
25
+
26
+ /* a literal value */
27
+ .lit {
28
+ color: #f5871f; }
29
+
30
+ /* punctuation */
31
+ .pun {
32
+ color: #4d4d4c; }
33
+
34
+ /* lisp open bracket */
35
+ .opn {
36
+ color: #4d4d4c; }
37
+
38
+ /* lisp close bracket */
39
+ .clo {
40
+ color: #4d4d4c; }
41
+
42
+ /* a markup tag name */
43
+ .tag {
44
+ color: #c82829; }
45
+
46
+ /* a markup attribute name */
47
+ .atn {
48
+ color: #f5871f; }
49
+
50
+ /* a markup attribute value */
51
+ .atv {
52
+ color: #3e999f; }
53
+
54
+ /* a declaration */
55
+ .dec {
56
+ color: #f5871f; }
57
+
58
+ /* a variable name */
59
+ .var {
60
+ color: #c82829; }
61
+
62
+ /* a function name */
63
+ .fun {
64
+ color: #4271ae; } }
65
+ /* Use higher contrast and text-weight for printable form. */
66
+ @media print, projection {
67
+ .str {
68
+ color: #060; }
69
+
70
+ .kwd {
71
+ color: #006;
72
+ font-weight: bold; }
73
+
74
+ .com {
75
+ color: #600;
76
+ font-style: italic; }
77
+
78
+ .typ {
79
+ color: #404;
80
+ font-weight: bold; }
81
+
82
+ .lit {
83
+ color: #044; }
84
+
85
+ .pun, .opn, .clo {
86
+ color: #440; }
87
+
88
+ .tag {
89
+ color: #006;
90
+ font-weight: bold; }
91
+
92
+ .atn {
93
+ color: #404; }
94
+
95
+ .atv {
96
+ color: #060; } }
97
+ /* Style */
98
+ /*
99
+ pre.prettyprint {
100
+ background: white;
101
+ font-family: Consolas, Monaco, 'Andale Mono', monospace;
102
+ font-size: 12px;
103
+ line-height: 1.5;
104
+ border: 1px solid #ccc;
105
+ padding: 10px; }
106
+ */
107
+
108
+ /* Specify class=linenums on a pre to get line numbering */
109
+ ol.linenums {
110
+ margin-top: 0;
111
+ margin-bottom: 0; }
112
+
113
+ /* IE indents via margin-left */
114
+ li.L0,
115
+ li.L1,
116
+ li.L2,
117
+ li.L3,
118
+ li.L4,
119
+ li.L5,
120
+ li.L6,
121
+ li.L7,
122
+ li.L8,
123
+ li.L9 {
124
+ /* */ }
125
+
126
+ /* Alternate shading for lines */
127
+ li.L1,
128
+ li.L3,
129
+ li.L5,
130
+ li.L7,
131
+ li.L9 {
132
+ /* */ }
package/index.js CHANGED
@@ -1,3 +1,165 @@
1
1
  const { VectorStore } = require('node-gyp-build')(__dirname);
2
2
 
3
+ /**
4
+ * @typedef {Object} Document
5
+ * @property {string} id - Unique identifier for the document
6
+ * @property {string} text - The text content of the document
7
+ * @property {Object} metadata - Document metadata
8
+ * @property {number[]} metadata.embedding - The embedding vector for the document
9
+ * @property {*} [metadata.*] - Additional metadata properties
10
+ */
11
+
12
+ /**
13
+ * @typedef {Object} SearchResult
14
+ * @property {number} score - Similarity score (0-1, higher is more similar)
15
+ * @property {string} id - Document identifier
16
+ * @property {string} text - Document text content
17
+ * @property {string} metadata_json - Serialized metadata as JSON string
18
+ */
19
+
20
+ /**
21
+ * High-performance vector store with SIMD optimization for similarity search.
22
+ * Designed for immutable, one-time loading scenarios with fast searches over focused corpora.
23
+ *
24
+ * @class VectorStore
25
+ * @example
26
+ * // Basic usage
27
+ * const store = new VectorStore(1536);
28
+ * store.loadDir('./documents');
29
+ * const results = store.search(queryEmbedding, 10);
30
+ *
31
+ * @example
32
+ * // Multiple domain-specific stores
33
+ * const productStore = new VectorStore(1536);
34
+ * const supportStore = new VectorStore(1536);
35
+ * productStore.loadDir('./knowledge/products');
36
+ * supportStore.loadDir('./knowledge/support');
37
+ */
38
+ class VectorStoreWrapper {
39
+ /**
40
+ * Creates a new VectorStore instance
41
+ * @param {number} dimensions - The dimensionality of embedding vectors (e.g., 1536 for OpenAI embeddings)
42
+ * @throws {TypeError} If dimensions is not a positive integer
43
+ */
44
+ constructor(dimensions) {
45
+ return new VectorStore(dimensions);
46
+ }
47
+
48
+ /**
49
+ * Load all JSON documents from a directory and automatically finalize the store.
50
+ * Documents should contain embedding vectors in their metadata field.
51
+ * Supports both single documents and arrays of documents per file.
52
+ *
53
+ * @param {string} path - Absolute or relative path to directory containing JSON files
54
+ * @returns {void}
55
+ * @throws {Error} If directory doesn't exist or contains invalid JSON
56
+ *
57
+ * @example
58
+ * // Load documents from a directory
59
+ * store.loadDir('./knowledge-base');
60
+ * // Store is automatically finalized and ready for searches
61
+ *
62
+ * @example
63
+ * // Document format in JSON files
64
+ * {
65
+ * "id": "doc-123",
66
+ * "text": "Document content...",
67
+ * "metadata": {
68
+ * "embedding": [0.1, 0.2, ...], // Required: embedding vector
69
+ * "category": "product" // Optional: additional metadata
70
+ * }
71
+ * }
72
+ */
73
+ loadDir(path) {}
74
+
75
+ /**
76
+ * Add a single document to the store. Only works before finalization.
77
+ *
78
+ * @param {Document} doc - Document object with embedding in metadata
79
+ * @returns {void}
80
+ * @throws {Error} If called after finalization or document format is invalid
81
+ *
82
+ * @example
83
+ * store.addDocument({
84
+ * id: 'doc-1',
85
+ * text: 'Sample document',
86
+ * metadata: {
87
+ * embedding: new Array(1536).fill(0).map(() => Math.random())
88
+ * }
89
+ * });
90
+ */
91
+ addDocument(doc) {}
92
+
93
+ /**
94
+ * Search for the k most similar documents to a query embedding.
95
+ * Uses SIMD-optimized cosine similarity for fast performance.
96
+ *
97
+ * @param {Float32Array} query - Query embedding vector (must match store dimensions)
98
+ * @param {number} k - Number of results to return (top-k nearest neighbors)
99
+ * @param {boolean} [normalizeQuery=true] - Whether to L2-normalize the query vector
100
+ * @returns {SearchResult[]} Array of search results sorted by similarity (highest first)
101
+ * @throws {Error} If store is not finalized or query dimensions don't match
102
+ *
103
+ * @example
104
+ * // Basic search
105
+ * const queryEmbedding = new Float32Array(1536);
106
+ * const results = store.search(queryEmbedding, 10);
107
+ *
108
+ * @example
109
+ * // Search with pre-normalized query
110
+ * const normalized = normalizeVector(queryEmbedding);
111
+ * const results = store.search(normalized, 5, false);
112
+ *
113
+ * @example
114
+ * // Filter results by score threshold
115
+ * const results = store.search(queryEmbedding, 20)
116
+ * .filter(r => r.score > 0.7);
117
+ */
118
+ search(query, k, normalizeQuery = true) {}
119
+
120
+ /**
121
+ * Finalize the store: normalize all embeddings and switch to serving mode.
122
+ * After calling this, no more documents can be added but searches become available.
123
+ * This is automatically called by loadDir().
124
+ *
125
+ * @returns {void}
126
+ *
127
+ * @example
128
+ * // Manual finalization after adding documents
129
+ * store.addDocument(doc1);
130
+ * store.addDocument(doc2);
131
+ * store.finalize(); // Must call before searching
132
+ */
133
+ finalize() {}
134
+
135
+ /**
136
+ * Check if the store has been finalized and is ready for searching.
137
+ *
138
+ * @returns {boolean} True if finalized, false otherwise
139
+ *
140
+ * @example
141
+ * if (store.isFinalized()) {
142
+ * const results = store.search(query, 10);
143
+ * }
144
+ */
145
+ isFinalized() {}
146
+
147
+ /**
148
+ * Get the number of documents in the store.
149
+ *
150
+ * @returns {number} Number of documents loaded
151
+ *
152
+ * @example
153
+ * console.log(`Loaded ${store.size()} documents`);
154
+ */
155
+ size() {}
156
+
157
+ /**
158
+ * @deprecated Use finalize() instead
159
+ * @returns {void}
160
+ */
161
+ normalize() {}
162
+ }
163
+
164
+ // Re-export the native VectorStore as-is, but the JSDoc above provides documentation
3
165
  module.exports = { VectorStore };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "native-vector-store",
3
- "version": "0.1.0",
3
+ "version": "0.3.0",
4
4
  "description": "High-performance local vector store with SIMD optimization for MCP servers",
5
5
  "main": "index.js",
6
6
  "types": "lib/index.d.ts",
@@ -23,7 +23,9 @@
23
23
  "prebuildify-cross": "prebuildify-cross -i centos7-devtoolset7 -i alpine -i linux-arm64 -i darwin-arm64 -i darwin-x64+arm64",
24
24
  "benchmark": "node test/test.js",
25
25
  "example": "node examples/mcp-server.js demo",
26
- "embed": "node embed_dir.js"
26
+ "embed": "node embed_dir.js",
27
+ "docs": "jsdoc -c jsdoc.json",
28
+ "prepublishOnly": "npm run docs"
27
29
  },
28
30
  "keywords": [
29
31
  "vector",
@@ -36,17 +38,38 @@
36
38
  "dependencies": {
37
39
  "dotenv": "^17.2.0",
38
40
  "node-addon-api": "^7.0.0",
39
- "node-gyp-build": "^4.8.0",
41
+ "node-gyp-build": "^4.8.4",
40
42
  "openai": "^5.8.3"
41
43
  },
44
+ "optionalDependencies": {
45
+ "@modelcontextprotocol/sdk": "^1.15.1"
46
+ },
42
47
  "devDependencies": {
48
+ "jsdoc": "^4.0.4",
43
49
  "node-gyp": "^10.0.0",
44
- "prebuildify": "^6.0.0"
50
+ "prebuildify": "^6.0.0",
51
+ "prebuildify-cross": "^5.0.0"
45
52
  },
46
- "os": ["darwin", "linux", "win32"],
47
- "cpu": ["x64", "arm64"],
53
+ "os": [
54
+ "darwin",
55
+ "linux",
56
+ "win32"
57
+ ],
58
+ "cpu": [
59
+ "x64",
60
+ "arm64"
61
+ ],
48
62
  "engines": {
49
63
  "node": ">=14.0.0"
50
64
  },
51
- "gypfile": true
65
+ "gypfile": true,
66
+ "files": [
67
+ "index.js",
68
+ "lib/",
69
+ "src/",
70
+ "deps/",
71
+ "binding.gyp",
72
+ "prebuilds/",
73
+ "docs/"
74
+ ]
52
75
  }
package/src/Makefile ADDED
@@ -0,0 +1,87 @@
1
+ # Detect OS
2
+ UNAME_S := $(shell uname -s)
3
+
4
+ # Compiler
5
+ ifeq ($(UNAME_S),Darwin)
6
+ CXX = clang++
7
+ else
8
+ CXX = g++
9
+ endif
10
+
11
+ # Base flags
12
+ CXXFLAGS = -std=c++17 -g -O0 -fno-omit-frame-pointer -DDEBUG
13
+
14
+ # OS-specific configuration
15
+ ifeq ($(UNAME_S),Darwin)
16
+ # macOS configuration
17
+ INCLUDES = -I. -I../deps/simdjson -I../deps/atomic_queue -I/opt/homebrew/opt/libomp/include
18
+ LDFLAGS = -L/opt/homebrew/opt/libomp/lib
19
+ LIBS = -lomp
20
+ # Add flags for better debugging with lldb
21
+ CXXFLAGS += -glldb -gdwarf-4
22
+ # OpenMP flags for macOS
23
+ CXXFLAGS += -Xpreprocessor -fopenmp
24
+ else
25
+ # Linux configuration
26
+ INCLUDES = -I. -I../deps/simdjson -I../deps/atomic_queue
27
+ LDFLAGS = -L/usr/lib
28
+ LIBS = -lgomp
29
+ # OpenMP flags for Linux
30
+ CXXFLAGS += -fopenmp
31
+ endif
32
+
33
+ TARGET = test_vector_store
34
+ STRESS_TARGET = test_stress
35
+ SOURCES = test_main.cpp vector_store.cpp ../deps/simdjson/simdjson.cpp
36
+ STRESS_SOURCES = test_stress.cpp vector_store.cpp vector_store_loader.cpp vector_store_loader_mmap.cpp vector_store_loader_adaptive.cpp ../deps/simdjson/simdjson.cpp
37
+ OBJECTS = $(SOURCES:.cpp=.o)
38
+ STRESS_OBJECTS = $(STRESS_SOURCES:.cpp=.o)
39
+
40
+ all: $(TARGET) $(STRESS_TARGET)
41
+
42
+ $(TARGET): $(OBJECTS)
43
+ $(CXX) $(CXXFLAGS) $(OBJECTS) -o $(TARGET) $(LDFLAGS) $(LIBS)
44
+
45
+ $(STRESS_TARGET): $(STRESS_OBJECTS)
46
+ $(CXX) $(CXXFLAGS) $(STRESS_OBJECTS) -o $(STRESS_TARGET) $(LDFLAGS) $(LIBS)
47
+
48
+ %.o: %.cpp
49
+ $(CXX) $(CXXFLAGS) $(INCLUDES) -c $< -o $@
50
+
51
+ clean:
52
+ rm -f $(OBJECTS) $(STRESS_OBJECTS) $(TARGET) $(STRESS_TARGET)
53
+
54
+ # Force rebuild (useful for CI)
55
+ rebuild: clean all
56
+
57
+ run: $(TARGET)
58
+ ./$(TARGET) ../test
59
+
60
+ debug: $(TARGET)
61
+ lldb ./$(TARGET)
62
+
63
+ # Sanitizer builds
64
+ test-asan: CXXFLAGS += -fsanitize=address -fno-sanitize-recover=all
65
+ test-asan: LDFLAGS += -fsanitize=address
66
+ test-asan: clean $(STRESS_TARGET)
67
+ ./$(STRESS_TARGET)
68
+
69
+ test-tsan: CXXFLAGS += -fsanitize=thread -fno-sanitize-recover=all
70
+ test-tsan: LDFLAGS += -fsanitize=thread
71
+ test-tsan: clean $(STRESS_TARGET)
72
+ ./$(STRESS_TARGET)
73
+
74
+ # Run stress tests with sanitizers by default
75
+ # Use SANITIZER=none to disable, SANITIZER=thread for TSan
76
+ SANITIZER ?= address
77
+
78
+ ifeq ($(SANITIZER),address)
79
+ stress: test-asan
80
+ else ifeq ($(SANITIZER),thread)
81
+ stress: test-tsan
82
+ else
83
+ stress: $(STRESS_TARGET)
84
+ ./$(STRESS_TARGET)
85
+ endif
86
+
87
+ .PHONY: all clean run debug test-asan test-tsan stress