native-vector-store 0.3.2 → 0.3.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +19 -6
- package/docs/VectorStoreWrapper.html +11 -1
- package/docs/global.html +54 -1
- package/docs/index.html +17 -6
- package/index.js +14 -2
- package/lib/index.d.ts +2 -1
- package/package.json +2 -2
- package/prebuilds/darwin-arm64/native-vector-store.node +0 -0
- package/prebuilds/darwin-x64/native-vector-store.node +0 -0
- package/prebuilds/linux-arm64/native-vector-store.node +0 -0
- package/prebuilds/linux-x64/native-vector-store.node +0 -0
- package/prebuilds/linux-x64-musl/napi-v9/native-vector-store.node +0 -0
- package/prebuilds/linux-x64-musl/native-vector-store.node +0 -0
- package/prebuilds/win32-x64/native-vector-store.node +0 -0
- package/src/vector_store.cpp +63 -5
- package/src/vector_store.h +4 -0
- package/src/vector_store_loader.cpp +16 -0
package/README.md
CHANGED
@@ -192,16 +192,29 @@ Each JSON file contains self-contained documents with embeddings:
|
|
192
192
|
|
193
193
|
```json
|
194
194
|
{
|
195
|
-
"id": "unique-id",
|
196
|
-
"text": "Document content...",
|
197
|
-
"metadata": {
|
198
|
-
"embedding": [0.1, 0.2, ...],
|
199
|
-
"category": "product",
|
200
|
-
"lastUpdated": "2024-01-01"
|
195
|
+
"id": "unique-id", // Required: unique document identifier
|
196
|
+
"text": "Document content...", // Required: searchable text content (or use "content" for Spring AI)
|
197
|
+
"metadata": { // Required: metadata object
|
198
|
+
"embedding": [0.1, 0.2, ...], // Required: array of numbers matching vector dimensions
|
199
|
+
"category": "product", // Optional: additional metadata
|
200
|
+
"lastUpdated": "2024-01-01" // Optional: additional metadata
|
201
201
|
}
|
202
202
|
}
|
203
203
|
```
|
204
204
|
|
205
|
+
**Spring AI Compatibility**: You can use `"content"` instead of `"text"` for the document field. The library auto-detects which field name you're using from the first document and optimizes subsequent lookups.
|
206
|
+
|
207
|
+
**Common Mistakes:**
|
208
|
+
- ❌ Putting `embedding` at the root level instead of inside `metadata`
|
209
|
+
- ❌ Using string format for embeddings instead of number array
|
210
|
+
- ❌ Missing required fields (`id`, `text`, or `metadata`)
|
211
|
+
- ❌ Wrong embedding dimensions (must match VectorStore constructor)
|
212
|
+
|
213
|
+
**Validate your JSON format:**
|
214
|
+
```bash
|
215
|
+
node node_modules/native-vector-store/examples/validate-format.js your-file.json
|
216
|
+
```
|
217
|
+
|
205
218
|
### Deployment Strategies
|
206
219
|
|
207
220
|
#### Blue-Green Deployment
|
@@ -815,7 +815,7 @@ Supports both single documents and arrays of documents per file.</p>
|
|
815
815
|
store.loadDir('./knowledge-base');
|
816
816
|
// Store is automatically finalized and ready for searches</code></pre>
|
817
817
|
|
818
|
-
<pre class="prettyprint"><code>//
|
818
|
+
<pre class="prettyprint"><code>// Standard format with 'text' field
|
819
819
|
{
|
820
820
|
"id": "doc-123",
|
821
821
|
"text": "Document content...",
|
@@ -825,6 +825,16 @@ store.loadDir('./knowledge-base');
|
|
825
825
|
}
|
826
826
|
}</code></pre>
|
827
827
|
|
828
|
+
<pre class="prettyprint"><code>// Spring AI format with 'content' field
|
829
|
+
{
|
830
|
+
"id": "doc-456",
|
831
|
+
"content": "Document content...", // 'content' instead of 'text'
|
832
|
+
"metadata": {
|
833
|
+
"embedding": [0.1, 0.2, ...],
|
834
|
+
"category": "spring-ai"
|
835
|
+
}
|
836
|
+
}</code></pre>
|
837
|
+
|
828
838
|
|
829
839
|
|
830
840
|
|
package/docs/global.html
CHANGED
@@ -137,6 +137,8 @@
|
|
137
137
|
<th>Type</th>
|
138
138
|
|
139
139
|
|
140
|
+
<th>Attributes</th>
|
141
|
+
|
140
142
|
|
141
143
|
|
142
144
|
|
@@ -162,6 +164,12 @@
|
|
162
164
|
</td>
|
163
165
|
|
164
166
|
|
167
|
+
<td class="attributes">
|
168
|
+
|
169
|
+
|
170
|
+
|
171
|
+
</td>
|
172
|
+
|
165
173
|
|
166
174
|
|
167
175
|
|
@@ -185,10 +193,49 @@
|
|
185
193
|
</td>
|
186
194
|
|
187
195
|
|
196
|
+
<td class="attributes">
|
197
|
+
|
198
|
+
<optional><br>
|
199
|
+
|
200
|
+
|
201
|
+
|
202
|
+
</td>
|
203
|
+
|
188
204
|
|
189
205
|
|
190
206
|
|
191
|
-
<td class="description last"><p>The text content of the document</p></td>
|
207
|
+
<td class="description last"><p>The text content of the document (mutually exclusive with content)</p></td>
|
208
|
+
</tr>
|
209
|
+
|
210
|
+
|
211
|
+
|
212
|
+
<tr>
|
213
|
+
|
214
|
+
<td class="name"><code>content</code></td>
|
215
|
+
|
216
|
+
|
217
|
+
<td class="type">
|
218
|
+
|
219
|
+
|
220
|
+
<span class="param-type">string</span>
|
221
|
+
|
222
|
+
|
223
|
+
|
224
|
+
</td>
|
225
|
+
|
226
|
+
|
227
|
+
<td class="attributes">
|
228
|
+
|
229
|
+
<optional><br>
|
230
|
+
|
231
|
+
|
232
|
+
|
233
|
+
</td>
|
234
|
+
|
235
|
+
|
236
|
+
|
237
|
+
|
238
|
+
<td class="description last"><p>Alternative to text for Spring AI compatibility (mutually exclusive with text)</p></td>
|
192
239
|
</tr>
|
193
240
|
|
194
241
|
|
@@ -208,6 +255,12 @@
|
|
208
255
|
</td>
|
209
256
|
|
210
257
|
|
258
|
+
<td class="attributes">
|
259
|
+
|
260
|
+
|
261
|
+
|
262
|
+
</td>
|
263
|
+
|
211
264
|
|
212
265
|
|
213
266
|
|
package/docs/index.html
CHANGED
@@ -212,15 +212,26 @@ const supportResults = supportStore.search(queryEmbedding, 5);
|
|
212
212
|
</code></pre>
|
213
213
|
<p>Each JSON file contains self-contained documents with embeddings:</p>
|
214
214
|
<pre class="prettyprint source lang-json"><code>{
|
215
|
-
"id": "unique-id",
|
216
|
-
"text": "Document content...",
|
217
|
-
"metadata": {
|
218
|
-
"embedding": [0.1, 0.2, ...],
|
219
|
-
"category": "product",
|
220
|
-
"lastUpdated": "2024-01-01"
|
215
|
+
"id": "unique-id", // Required: unique document identifier
|
216
|
+
"text": "Document content...", // Required: searchable text content (or use "content" for Spring AI)
|
217
|
+
"metadata": { // Required: metadata object
|
218
|
+
"embedding": [0.1, 0.2, ...], // Required: array of numbers matching vector dimensions
|
219
|
+
"category": "product", // Optional: additional metadata
|
220
|
+
"lastUpdated": "2024-01-01" // Optional: additional metadata
|
221
221
|
}
|
222
222
|
}
|
223
223
|
</code></pre>
|
224
|
+
<p><strong>Spring AI Compatibility</strong>: You can use <code>"content"</code> instead of <code>"text"</code> for the document field. The library auto-detects which field name you're using from the first document and optimizes subsequent lookups.</p>
|
225
|
+
<p><strong>Common Mistakes:</strong></p>
|
226
|
+
<ul>
|
227
|
+
<li>❌ Putting <code>embedding</code> at the root level instead of inside <code>metadata</code></li>
|
228
|
+
<li>❌ Using string format for embeddings instead of number array</li>
|
229
|
+
<li>❌ Missing required fields (<code>id</code>, <code>text</code>, or <code>metadata</code>)</li>
|
230
|
+
<li>❌ Wrong embedding dimensions (must match VectorStore constructor)</li>
|
231
|
+
</ul>
|
232
|
+
<p><strong>Validate your JSON format:</strong></p>
|
233
|
+
<pre class="prettyprint source lang-bash"><code>node node_modules/native-vector-store/examples/validate-format.js your-file.json
|
234
|
+
</code></pre>
|
224
235
|
<h3 id="deployment-strategies">Deployment Strategies</h3>
|
225
236
|
<h4 id="blue-green-deployment">Blue-Green Deployment</h4>
|
226
237
|
<pre class="prettyprint source lang-javascript"><code>// Load new version without downtime
|
package/index.js
CHANGED
@@ -3,7 +3,8 @@ const { VectorStore } = require('node-gyp-build')(__dirname);
|
|
3
3
|
/**
|
4
4
|
* @typedef {Object} Document
|
5
5
|
* @property {string} id - Unique identifier for the document
|
6
|
-
* @property {string} text - The text content of the document
|
6
|
+
* @property {string} [text] - The text content of the document (mutually exclusive with content)
|
7
|
+
* @property {string} [content] - Alternative to text for Spring AI compatibility (mutually exclusive with text)
|
7
8
|
* @property {Object} metadata - Document metadata
|
8
9
|
* @property {number[]} metadata.embedding - The embedding vector for the document
|
9
10
|
* @property {*} [metadata.*] - Additional metadata properties
|
@@ -60,7 +61,7 @@ class VectorStoreWrapper {
|
|
60
61
|
* // Store is automatically finalized and ready for searches
|
61
62
|
*
|
62
63
|
* @example
|
63
|
-
* //
|
64
|
+
* // Standard format with 'text' field
|
64
65
|
* {
|
65
66
|
* "id": "doc-123",
|
66
67
|
* "text": "Document content...",
|
@@ -69,6 +70,17 @@ class VectorStoreWrapper {
|
|
69
70
|
* "category": "product" // Optional: additional metadata
|
70
71
|
* }
|
71
72
|
* }
|
73
|
+
*
|
74
|
+
* @example
|
75
|
+
* // Spring AI format with 'content' field
|
76
|
+
* {
|
77
|
+
* "id": "doc-456",
|
78
|
+
* "content": "Document content...", // 'content' instead of 'text'
|
79
|
+
* "metadata": {
|
80
|
+
* "embedding": [0.1, 0.2, ...],
|
81
|
+
* "category": "spring-ai"
|
82
|
+
* }
|
83
|
+
* }
|
72
84
|
*/
|
73
85
|
loadDir(path) {}
|
74
86
|
|
package/lib/index.d.ts
CHANGED
package/package.json
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
{
|
2
2
|
"name": "native-vector-store",
|
3
|
-
"version": "0.3.
|
3
|
+
"version": "0.3.4",
|
4
4
|
"description": "High-performance local vector store with SIMD optimization for MCP servers",
|
5
5
|
"main": "index.js",
|
6
6
|
"types": "lib/index.d.ts",
|
@@ -13,7 +13,7 @@
|
|
13
13
|
"bugs": {
|
14
14
|
"url": "https://github.com/mboros1/native-vector-store/issues"
|
15
15
|
},
|
16
|
-
"homepage": "https://github.
|
16
|
+
"homepage": "https://mboros1.github.io/native-vector-store/",
|
17
17
|
"scripts": {
|
18
18
|
"build": "node-gyp rebuild",
|
19
19
|
"clean": "node-gyp clean",
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
package/src/vector_store.cpp
CHANGED
@@ -130,10 +130,58 @@ simdjson::error_code VectorStore::add_document(simdjson::ondemand::object& json_
|
|
130
130
|
// Parse with error handling
|
131
131
|
std::string_view id, text;
|
132
132
|
auto error = json_doc["id"].get_string().get(id);
|
133
|
-
if (error)
|
133
|
+
if (error) {
|
134
|
+
if (error == simdjson::NO_SUCH_FIELD) {
|
135
|
+
fprintf(stderr, "Missing required field 'id'\n");
|
136
|
+
}
|
137
|
+
return error;
|
138
|
+
}
|
134
139
|
|
135
|
-
|
136
|
-
|
140
|
+
// Auto-detect text field type on first document, then use that for all subsequent documents
|
141
|
+
TextFieldType field_type = text_field_type_.load(std::memory_order_acquire);
|
142
|
+
|
143
|
+
if (field_type == TextFieldType::UNKNOWN) {
|
144
|
+
// First document - detect field type
|
145
|
+
error = json_doc["text"].get_string().get(text);
|
146
|
+
if (!error) {
|
147
|
+
// Found 'text' field - use it for all documents
|
148
|
+
TextFieldType expected = TextFieldType::UNKNOWN;
|
149
|
+
text_field_type_.compare_exchange_strong(expected, TextFieldType::TEXT, std::memory_order_release);
|
150
|
+
} else if (error == simdjson::NO_SUCH_FIELD) {
|
151
|
+
// Try 'content' field
|
152
|
+
error = json_doc["content"].get_string().get(text);
|
153
|
+
if (!error) {
|
154
|
+
// Found 'content' field - use it for all documents
|
155
|
+
TextFieldType expected = TextFieldType::UNKNOWN;
|
156
|
+
text_field_type_.compare_exchange_strong(expected, TextFieldType::CONTENT, std::memory_order_release);
|
157
|
+
} else {
|
158
|
+
if (error == simdjson::NO_SUCH_FIELD) {
|
159
|
+
fprintf(stderr, "Missing required field 'text' or 'content'\n");
|
160
|
+
}
|
161
|
+
return error;
|
162
|
+
}
|
163
|
+
} else {
|
164
|
+
return error;
|
165
|
+
}
|
166
|
+
} else if (field_type == TextFieldType::TEXT) {
|
167
|
+
// Use 'text' field directly
|
168
|
+
error = json_doc["text"].get_string().get(text);
|
169
|
+
if (error) {
|
170
|
+
if (error == simdjson::NO_SUCH_FIELD) {
|
171
|
+
fprintf(stderr, "Missing required field 'text' (detected from first document)\n");
|
172
|
+
}
|
173
|
+
return error;
|
174
|
+
}
|
175
|
+
} else { // TextFieldType::CONTENT
|
176
|
+
// Use 'content' field directly
|
177
|
+
error = json_doc["content"].get_string().get(text);
|
178
|
+
if (error) {
|
179
|
+
if (error == simdjson::NO_SUCH_FIELD) {
|
180
|
+
fprintf(stderr, "Missing required field 'content' (detected from first document)\n");
|
181
|
+
}
|
182
|
+
return error;
|
183
|
+
}
|
184
|
+
}
|
137
185
|
|
138
186
|
// Calculate sizes
|
139
187
|
size_t emb_size = dim_ * sizeof(float);
|
@@ -147,11 +195,21 @@ simdjson::error_code VectorStore::add_document(simdjson::ondemand::object& json_
|
|
147
195
|
// Process metadata and embedding first
|
148
196
|
simdjson::ondemand::object metadata;
|
149
197
|
error = json_doc["metadata"].get_object().get(metadata);
|
150
|
-
if (error)
|
198
|
+
if (error) {
|
199
|
+
if (error == simdjson::NO_SUCH_FIELD) {
|
200
|
+
fprintf(stderr, "Missing required field 'metadata'\n");
|
201
|
+
}
|
202
|
+
return error;
|
203
|
+
}
|
151
204
|
|
152
205
|
simdjson::ondemand::array emb_array;
|
153
206
|
error = metadata["embedding"].get_array().get(emb_array);
|
154
|
-
if (error)
|
207
|
+
if (error) {
|
208
|
+
if (error == simdjson::NO_SUCH_FIELD) {
|
209
|
+
fprintf(stderr, "Missing required field 'embedding' inside 'metadata'\n");
|
210
|
+
}
|
211
|
+
return error;
|
212
|
+
}
|
155
213
|
|
156
214
|
// Consume the array before touching anything else
|
157
215
|
size_t i = 0;
|
package/src/vector_store.h
CHANGED
@@ -74,6 +74,10 @@ private:
|
|
74
74
|
std::atomic<bool> is_finalized_{false}; // Simple flag: false = loading, true = serving
|
75
75
|
mutable std::shared_mutex search_mutex_; // Protects against overlapping OpenMP teams
|
76
76
|
|
77
|
+
// Auto-detect text field name for Spring AI compatibility
|
78
|
+
enum class TextFieldType { UNKNOWN, TEXT, CONTENT };
|
79
|
+
std::atomic<TextFieldType> text_field_type_{TextFieldType::UNKNOWN};
|
80
|
+
|
77
81
|
public:
|
78
82
|
explicit VectorStore(size_t dim);
|
79
83
|
|
@@ -135,6 +135,14 @@ void VectorStoreLoader::loadDirectory(VectorStore* store, const std::string& pat
|
|
135
135
|
if (add_error) {
|
136
136
|
fprintf(stderr, "Error adding document from %s: %s\n",
|
137
137
|
data->filename.c_str(), simdjson::error_message(add_error));
|
138
|
+
if (add_error == simdjson::NO_SUCH_FIELD) {
|
139
|
+
fprintf(stderr, " Expected JSON format: {\"id\": string, \"text\": string, \"metadata\": {\"embedding\": [numbers...]}}\n");
|
140
|
+
fprintf(stderr, " Required fields: id, text (or content), metadata.embedding\n");
|
141
|
+
fprintf(stderr, " Note: 'embedding' must be inside 'metadata' object\n");
|
142
|
+
fprintf(stderr, " Note: 'text' and 'content' are interchangeable (Spring AI compatibility)\n");
|
143
|
+
} else if (add_error == simdjson::INCORRECT_TYPE) {
|
144
|
+
fprintf(stderr, " Possible causes: wrong embedding dimensions or store already finalized\n");
|
145
|
+
}
|
138
146
|
}
|
139
147
|
}
|
140
148
|
}
|
@@ -147,6 +155,14 @@ void VectorStoreLoader::loadDirectory(VectorStore* store, const std::string& pat
|
|
147
155
|
if (add_error) {
|
148
156
|
fprintf(stderr, "Error adding document from %s: %s\n",
|
149
157
|
data->filename.c_str(), simdjson::error_message(add_error));
|
158
|
+
if (add_error == simdjson::NO_SUCH_FIELD) {
|
159
|
+
fprintf(stderr, " Expected JSON format: {\"id\": string, \"text\": string, \"metadata\": {\"embedding\": [numbers...]}}\n");
|
160
|
+
fprintf(stderr, " Required fields: id, text (or content), metadata.embedding\n");
|
161
|
+
fprintf(stderr, " Note: 'embedding' must be inside 'metadata' object\n");
|
162
|
+
fprintf(stderr, " Note: 'text' and 'content' are interchangeable (Spring AI compatibility)\n");
|
163
|
+
} else if (add_error == simdjson::INCORRECT_TYPE) {
|
164
|
+
fprintf(stderr, " Possible causes: wrong embedding dimensions or store already finalized\n");
|
165
|
+
}
|
150
166
|
}
|
151
167
|
}
|
152
168
|
}
|