native-vector-store 0.3.3 → 0.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -192,16 +192,29 @@ Each JSON file contains self-contained documents with embeddings:
192
192
 
193
193
  ```json
194
194
  {
195
- "id": "unique-id",
196
- "text": "Document content...",
197
- "metadata": {
198
- "embedding": [0.1, 0.2, ...],
199
- "category": "product",
200
- "lastUpdated": "2024-01-01"
195
+ "id": "unique-id", // Required: unique document identifier
196
+ "text": "Document content...", // Required: searchable text content (or use "content" for Spring AI)
197
+ "metadata": { // Required: metadata object
198
+ "embedding": [0.1, 0.2, ...], // Required: array of numbers matching vector dimensions
199
+ "category": "product", // Optional: additional metadata
200
+ "lastUpdated": "2024-01-01" // Optional: additional metadata
201
201
  }
202
202
  }
203
203
  ```
204
204
 
205
+ **Spring AI Compatibility**: You can use `"content"` instead of `"text"` for the document field. The library auto-detects which field name you're using from the first document and optimizes subsequent lookups.
206
+
207
+ **Common Mistakes:**
208
+ - ❌ Putting `embedding` at the root level instead of inside `metadata`
209
+ - ❌ Using string format for embeddings instead of number array
210
+ - ❌ Missing required fields (`id`, `text`, or `metadata`)
211
+ - ❌ Wrong embedding dimensions (must match VectorStore constructor)
212
+
213
+ **Validate your JSON format:**
214
+ ```bash
215
+ node node_modules/native-vector-store/examples/validate-format.js your-file.json
216
+ ```
217
+
205
218
  ### Deployment Strategies
206
219
 
207
220
  #### Blue-Green Deployment
@@ -815,7 +815,7 @@ Supports both single documents and arrays of documents per file.</p>
815
815
  store.loadDir('./knowledge-base');
816
816
  // Store is automatically finalized and ready for searches</code></pre>
817
817
 
818
- <pre class="prettyprint"><code>// Document format in JSON files
818
+ <pre class="prettyprint"><code>// Standard format with 'text' field
819
819
  {
820
820
  "id": "doc-123",
821
821
  "text": "Document content...",
@@ -825,6 +825,16 @@ store.loadDir('./knowledge-base');
825
825
  }
826
826
  }</code></pre>
827
827
 
828
+ <pre class="prettyprint"><code>// Spring AI format with 'content' field
829
+ {
830
+ "id": "doc-456",
831
+ "content": "Document content...", // 'content' instead of 'text'
832
+ "metadata": {
833
+ "embedding": [0.1, 0.2, ...],
834
+ "category": "spring-ai"
835
+ }
836
+ }</code></pre>
837
+
828
838
 
829
839
 
830
840
 
package/docs/global.html CHANGED
@@ -137,6 +137,8 @@
137
137
  <th>Type</th>
138
138
 
139
139
 
140
+ <th>Attributes</th>
141
+
140
142
 
141
143
 
142
144
 
@@ -162,6 +164,12 @@
162
164
  </td>
163
165
 
164
166
 
167
+ <td class="attributes">
168
+
169
+
170
+
171
+ </td>
172
+
165
173
 
166
174
 
167
175
 
@@ -185,10 +193,49 @@
185
193
  </td>
186
194
 
187
195
 
196
+ <td class="attributes">
197
+
198
+ &lt;optional><br>
199
+
200
+
201
+
202
+ </td>
203
+
188
204
 
189
205
 
190
206
 
191
- <td class="description last"><p>The text content of the document</p></td>
207
+ <td class="description last"><p>The text content of the document (mutually exclusive with content)</p></td>
208
+ </tr>
209
+
210
+
211
+
212
+ <tr>
213
+
214
+ <td class="name"><code>content</code></td>
215
+
216
+
217
+ <td class="type">
218
+
219
+
220
+ <span class="param-type">string</span>
221
+
222
+
223
+
224
+ </td>
225
+
226
+
227
+ <td class="attributes">
228
+
229
+ &lt;optional><br>
230
+
231
+
232
+
233
+ </td>
234
+
235
+
236
+
237
+
238
+ <td class="description last"><p>Alternative to text for Spring AI compatibility (mutually exclusive with text)</p></td>
192
239
  </tr>
193
240
 
194
241
 
@@ -208,6 +255,12 @@
208
255
  </td>
209
256
 
210
257
 
258
+ <td class="attributes">
259
+
260
+
261
+
262
+ </td>
263
+
211
264
 
212
265
 
213
266
 
package/docs/index.html CHANGED
@@ -212,15 +212,26 @@ const supportResults = supportStore.search(queryEmbedding, 5);
212
212
  </code></pre>
213
213
  <p>Each JSON file contains self-contained documents with embeddings:</p>
214
214
  <pre class="prettyprint source lang-json"><code>{
215
- &quot;id&quot;: &quot;unique-id&quot;,
216
- &quot;text&quot;: &quot;Document content...&quot;,
217
- &quot;metadata&quot;: {
218
- &quot;embedding&quot;: [0.1, 0.2, ...],
219
- &quot;category&quot;: &quot;product&quot;,
220
- &quot;lastUpdated&quot;: &quot;2024-01-01&quot;
215
+ &quot;id&quot;: &quot;unique-id&quot;, // Required: unique document identifier
216
+ &quot;text&quot;: &quot;Document content...&quot;, // Required: searchable text content (or use &quot;content&quot; for Spring AI)
217
+ &quot;metadata&quot;: { // Required: metadata object
218
+ &quot;embedding&quot;: [0.1, 0.2, ...], // Required: array of numbers matching vector dimensions
219
+ &quot;category&quot;: &quot;product&quot;, // Optional: additional metadata
220
+ &quot;lastUpdated&quot;: &quot;2024-01-01&quot; // Optional: additional metadata
221
221
  }
222
222
  }
223
223
  </code></pre>
224
+ <p><strong>Spring AI Compatibility</strong>: You can use <code>&quot;content&quot;</code> instead of <code>&quot;text&quot;</code> for the document field. The library auto-detects which field name you're using from the first document and optimizes subsequent lookups.</p>
225
+ <p><strong>Common Mistakes:</strong></p>
226
+ <ul>
227
+ <li>❌ Putting <code>embedding</code> at the root level instead of inside <code>metadata</code></li>
228
+ <li>❌ Using string format for embeddings instead of number array</li>
229
+ <li>❌ Missing required fields (<code>id</code>, <code>text</code>, or <code>metadata</code>)</li>
230
+ <li>❌ Wrong embedding dimensions (must match VectorStore constructor)</li>
231
+ </ul>
232
+ <p><strong>Validate your JSON format:</strong></p>
233
+ <pre class="prettyprint source lang-bash"><code>node node_modules/native-vector-store/examples/validate-format.js your-file.json
234
+ </code></pre>
224
235
  <h3 id="deployment-strategies">Deployment Strategies</h3>
225
236
  <h4 id="blue-green-deployment">Blue-Green Deployment</h4>
226
237
  <pre class="prettyprint source lang-javascript"><code>// Load new version without downtime
package/index.js CHANGED
@@ -3,7 +3,8 @@ const { VectorStore } = require('node-gyp-build')(__dirname);
3
3
  /**
4
4
  * @typedef {Object} Document
5
5
  * @property {string} id - Unique identifier for the document
6
- * @property {string} text - The text content of the document
6
+ * @property {string} [text] - The text content of the document (mutually exclusive with content)
7
+ * @property {string} [content] - Alternative to text for Spring AI compatibility (mutually exclusive with text)
7
8
  * @property {Object} metadata - Document metadata
8
9
  * @property {number[]} metadata.embedding - The embedding vector for the document
9
10
  * @property {*} [metadata.*] - Additional metadata properties
@@ -60,7 +61,7 @@ class VectorStoreWrapper {
60
61
  * // Store is automatically finalized and ready for searches
61
62
  *
62
63
  * @example
63
- * // Document format in JSON files
64
+ * // Standard format with 'text' field
64
65
  * {
65
66
  * "id": "doc-123",
66
67
  * "text": "Document content...",
@@ -69,6 +70,17 @@ class VectorStoreWrapper {
69
70
  * "category": "product" // Optional: additional metadata
70
71
  * }
71
72
  * }
73
+ *
74
+ * @example
75
+ * // Spring AI format with 'content' field
76
+ * {
77
+ * "id": "doc-456",
78
+ * "content": "Document content...", // 'content' instead of 'text'
79
+ * "metadata": {
80
+ * "embedding": [0.1, 0.2, ...],
81
+ * "category": "spring-ai"
82
+ * }
83
+ * }
72
84
  */
73
85
  loadDir(path) {}
74
86
 
package/lib/index.d.ts CHANGED
@@ -1,6 +1,7 @@
1
1
  export interface Document {
2
2
  id: string;
3
- text: string;
3
+ text?: string; // Either text or content is required
4
+ content?: string; // Spring AI compatibility
4
5
  metadata: {
5
6
  embedding?: number[];
6
7
  [key: string]: any;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "native-vector-store",
3
- "version": "0.3.3",
3
+ "version": "0.3.4",
4
4
  "description": "High-performance local vector store with SIMD optimization for MCP servers",
5
5
  "main": "index.js",
6
6
  "types": "lib/index.d.ts",
@@ -130,10 +130,58 @@ simdjson::error_code VectorStore::add_document(simdjson::ondemand::object& json_
130
130
  // Parse with error handling
131
131
  std::string_view id, text;
132
132
  auto error = json_doc["id"].get_string().get(id);
133
- if (error) return error;
133
+ if (error) {
134
+ if (error == simdjson::NO_SUCH_FIELD) {
135
+ fprintf(stderr, "Missing required field 'id'\n");
136
+ }
137
+ return error;
138
+ }
134
139
 
135
- error = json_doc["text"].get_string().get(text);
136
- if (error) return error;
140
+ // Auto-detect text field type on first document, then use that for all subsequent documents
141
+ TextFieldType field_type = text_field_type_.load(std::memory_order_acquire);
142
+
143
+ if (field_type == TextFieldType::UNKNOWN) {
144
+ // First document - detect field type
145
+ error = json_doc["text"].get_string().get(text);
146
+ if (!error) {
147
+ // Found 'text' field - use it for all documents
148
+ TextFieldType expected = TextFieldType::UNKNOWN;
149
+ text_field_type_.compare_exchange_strong(expected, TextFieldType::TEXT, std::memory_order_release);
150
+ } else if (error == simdjson::NO_SUCH_FIELD) {
151
+ // Try 'content' field
152
+ error = json_doc["content"].get_string().get(text);
153
+ if (!error) {
154
+ // Found 'content' field - use it for all documents
155
+ TextFieldType expected = TextFieldType::UNKNOWN;
156
+ text_field_type_.compare_exchange_strong(expected, TextFieldType::CONTENT, std::memory_order_release);
157
+ } else {
158
+ if (error == simdjson::NO_SUCH_FIELD) {
159
+ fprintf(stderr, "Missing required field 'text' or 'content'\n");
160
+ }
161
+ return error;
162
+ }
163
+ } else {
164
+ return error;
165
+ }
166
+ } else if (field_type == TextFieldType::TEXT) {
167
+ // Use 'text' field directly
168
+ error = json_doc["text"].get_string().get(text);
169
+ if (error) {
170
+ if (error == simdjson::NO_SUCH_FIELD) {
171
+ fprintf(stderr, "Missing required field 'text' (detected from first document)\n");
172
+ }
173
+ return error;
174
+ }
175
+ } else { // TextFieldType::CONTENT
176
+ // Use 'content' field directly
177
+ error = json_doc["content"].get_string().get(text);
178
+ if (error) {
179
+ if (error == simdjson::NO_SUCH_FIELD) {
180
+ fprintf(stderr, "Missing required field 'content' (detected from first document)\n");
181
+ }
182
+ return error;
183
+ }
184
+ }
137
185
 
138
186
  // Calculate sizes
139
187
  size_t emb_size = dim_ * sizeof(float);
@@ -147,11 +195,21 @@ simdjson::error_code VectorStore::add_document(simdjson::ondemand::object& json_
147
195
  // Process metadata and embedding first
148
196
  simdjson::ondemand::object metadata;
149
197
  error = json_doc["metadata"].get_object().get(metadata);
150
- if (error) return error;
198
+ if (error) {
199
+ if (error == simdjson::NO_SUCH_FIELD) {
200
+ fprintf(stderr, "Missing required field 'metadata'\n");
201
+ }
202
+ return error;
203
+ }
151
204
 
152
205
  simdjson::ondemand::array emb_array;
153
206
  error = metadata["embedding"].get_array().get(emb_array);
154
- if (error) return error;
207
+ if (error) {
208
+ if (error == simdjson::NO_SUCH_FIELD) {
209
+ fprintf(stderr, "Missing required field 'embedding' inside 'metadata'\n");
210
+ }
211
+ return error;
212
+ }
155
213
 
156
214
  // Consume the array before touching anything else
157
215
  size_t i = 0;
@@ -74,6 +74,10 @@ private:
74
74
  std::atomic<bool> is_finalized_{false}; // Simple flag: false = loading, true = serving
75
75
  mutable std::shared_mutex search_mutex_; // Protects against overlapping OpenMP teams
76
76
 
77
+ // Auto-detect text field name for Spring AI compatibility
78
+ enum class TextFieldType { UNKNOWN, TEXT, CONTENT };
79
+ std::atomic<TextFieldType> text_field_type_{TextFieldType::UNKNOWN};
80
+
77
81
  public:
78
82
  explicit VectorStore(size_t dim);
79
83
 
@@ -135,6 +135,14 @@ void VectorStoreLoader::loadDirectory(VectorStore* store, const std::string& pat
135
135
  if (add_error) {
136
136
  fprintf(stderr, "Error adding document from %s: %s\n",
137
137
  data->filename.c_str(), simdjson::error_message(add_error));
138
+ if (add_error == simdjson::NO_SUCH_FIELD) {
139
+ fprintf(stderr, " Expected JSON format: {\"id\": string, \"text\": string, \"metadata\": {\"embedding\": [numbers...]}}\n");
140
+ fprintf(stderr, " Required fields: id, text (or content), metadata.embedding\n");
141
+ fprintf(stderr, " Note: 'embedding' must be inside 'metadata' object\n");
142
+ fprintf(stderr, " Note: 'text' and 'content' are interchangeable (Spring AI compatibility)\n");
143
+ } else if (add_error == simdjson::INCORRECT_TYPE) {
144
+ fprintf(stderr, " Possible causes: wrong embedding dimensions or store already finalized\n");
145
+ }
138
146
  }
139
147
  }
140
148
  }
@@ -147,6 +155,14 @@ void VectorStoreLoader::loadDirectory(VectorStore* store, const std::string& pat
147
155
  if (add_error) {
148
156
  fprintf(stderr, "Error adding document from %s: %s\n",
149
157
  data->filename.c_str(), simdjson::error_message(add_error));
158
+ if (add_error == simdjson::NO_SUCH_FIELD) {
159
+ fprintf(stderr, " Expected JSON format: {\"id\": string, \"text\": string, \"metadata\": {\"embedding\": [numbers...]}}\n");
160
+ fprintf(stderr, " Required fields: id, text (or content), metadata.embedding\n");
161
+ fprintf(stderr, " Note: 'embedding' must be inside 'metadata' object\n");
162
+ fprintf(stderr, " Note: 'text' and 'content' are interchangeable (Spring AI compatibility)\n");
163
+ } else if (add_error == simdjson::INCORRECT_TYPE) {
164
+ fprintf(stderr, " Possible causes: wrong embedding dimensions or store already finalized\n");
165
+ }
150
166
  }
151
167
  }
152
168
  }