native-vector-store 0.3.6 → 0.3.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -3
- package/docs/index.html +12 -6
- package/package.json +1 -1
- package/prebuilds/darwin-arm64/native-vector-store.node +0 -0
- package/prebuilds/darwin-x64/native-vector-store.node +0 -0
- package/prebuilds/linux-arm64/native-vector-store.node +0 -0
- package/prebuilds/linux-x64/native-vector-store.node +0 -0
- package/prebuilds/win32-x64/native-vector-store.node +0 -0
- package/src/binding.cc +2 -2
- package/src/test_main.cpp +6 -6
- package/src/test_stress.cpp +7 -7
- package/src/vector_store.cpp +64 -34
- package/src/vector_store.h +160 -2
- package/src/vector_store_loader.cpp +20 -13
- package/src/vector_store_loader_adaptive.cpp +10 -7
- package/src/vector_store_loader_mmap.cpp +9 -6
- package/prebuilds/linux-x64-musl/napi-v9/native-vector-store.node +0 -0
- package/prebuilds/linux-x64-musl/native-vector-store.node +0 -0
package/README.md
CHANGED
|
@@ -390,9 +390,10 @@ Performance on typical hardware (M1 MacBook Pro):
|
|
|
390
390
|
|
|
391
391
|
| Operation | Documents | Time | Throughput |
|
|
392
392
|
|-----------|-----------|------|------------|
|
|
393
|
+
| Loading (from disk) | 10,000 | 153ms | 65k docs/sec |
|
|
393
394
|
| Loading (from disk) | 100,000 | ~560ms | 178k docs/sec |
|
|
394
395
|
| Loading (production) | 65,000 | 15-20s | 3.2-4.3k docs/sec |
|
|
395
|
-
| Search (k=10) | 10,000 corpus |
|
|
396
|
+
| Search (k=10) | 10,000 corpus | 2ms | 500 queries/sec |
|
|
396
397
|
| Search (k=10) | 65,000 corpus | 40-45ms | 20-25 queries/sec |
|
|
397
398
|
| Search (k=100) | 100,000 corpus | 8-12ms | 80-125 queries/sec |
|
|
398
399
|
| Normalization | 100,000 | <100ms | 1M+ docs/sec |
|
|
@@ -503,8 +504,8 @@ Performance on M1 MacBook Pro with 1536-dimensional embeddings:
|
|
|
503
504
|
|
|
504
505
|
| Operation | Document Count | Time | Rate |
|
|
505
506
|
|-----------|---------------|------|------|
|
|
506
|
-
| Load | 10,000 |
|
|
507
|
-
| Search | 10,000 |
|
|
507
|
+
| Load | 10,000 | 153ms | 65.4k docs/sec |
|
|
508
|
+
| Search | 10,000 | 2ms | 5M docs/sec |
|
|
508
509
|
| Normalize | 10,000 | 12ms | 833k docs/sec |
|
|
509
510
|
|
|
510
511
|
*Results may vary based on hardware and document characteristics.*
|
package/docs/index.html
CHANGED
|
@@ -383,6 +383,12 @@ const response = await server.handleMCPRequest('vector_search', {
|
|
|
383
383
|
<tbody>
|
|
384
384
|
<tr>
|
|
385
385
|
<td>Loading (from disk)</td>
|
|
386
|
+
<td>10,000</td>
|
|
387
|
+
<td>153ms</td>
|
|
388
|
+
<td>65k docs/sec</td>
|
|
389
|
+
</tr>
|
|
390
|
+
<tr>
|
|
391
|
+
<td>Loading (from disk)</td>
|
|
386
392
|
<td>100,000</td>
|
|
387
393
|
<td>~560ms</td>
|
|
388
394
|
<td>178k docs/sec</td>
|
|
@@ -396,8 +402,8 @@ const response = await server.handleMCPRequest('vector_search', {
|
|
|
396
402
|
<tr>
|
|
397
403
|
<td>Search (k=10)</td>
|
|
398
404
|
<td>10,000 corpus</td>
|
|
399
|
-
<td>
|
|
400
|
-
<td>500
|
|
405
|
+
<td>2ms</td>
|
|
406
|
+
<td>500 queries/sec</td>
|
|
401
407
|
</tr>
|
|
402
408
|
<tr>
|
|
403
409
|
<td>Search (k=10)</td>
|
|
@@ -591,14 +597,14 @@ npm run example
|
|
|
591
597
|
<tr>
|
|
592
598
|
<td>Load</td>
|
|
593
599
|
<td>10,000</td>
|
|
594
|
-
<td>
|
|
595
|
-
<td>
|
|
600
|
+
<td>153ms</td>
|
|
601
|
+
<td>65.4k docs/sec</td>
|
|
596
602
|
</tr>
|
|
597
603
|
<tr>
|
|
598
604
|
<td>Search</td>
|
|
599
605
|
<td>10,000</td>
|
|
600
|
-
<td>
|
|
601
|
-
<td>
|
|
606
|
+
<td>2ms</td>
|
|
607
|
+
<td>5M docs/sec</td>
|
|
602
608
|
</tr>
|
|
603
609
|
<tr>
|
|
604
610
|
<td>Normalize</td>
|
package/package.json
CHANGED
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
package/src/binding.cc
CHANGED
|
@@ -79,9 +79,9 @@ public:
|
|
|
79
79
|
}
|
|
80
80
|
|
|
81
81
|
auto add_error = store_->add_document(json_doc);
|
|
82
|
-
if (add_error) {
|
|
82
|
+
if (add_error != VectorStoreError::SUCCESS) {
|
|
83
83
|
Napi::Error::New(info.Env(),
|
|
84
|
-
std::string("Document add error: ") +
|
|
84
|
+
std::string("Document add error: ") + vector_store_error_message(add_error))
|
|
85
85
|
.ThrowAsJavaScriptException();
|
|
86
86
|
return;
|
|
87
87
|
}
|
package/src/test_main.cpp
CHANGED
|
@@ -33,8 +33,8 @@ void test_single_document() {
|
|
|
33
33
|
|
|
34
34
|
std::cout << "Adding document..." << std::endl;
|
|
35
35
|
auto add_error = store.add_document(doc);
|
|
36
|
-
if (add_error) {
|
|
37
|
-
std::cerr << "Document add error: " <<
|
|
36
|
+
if (add_error != VectorStoreError::SUCCESS) {
|
|
37
|
+
std::cerr << "Document add error: " << vector_store_error_message(add_error) << std::endl;
|
|
38
38
|
return;
|
|
39
39
|
}
|
|
40
40
|
std::cout << "Document added successfully. Store size: " << store.size() << std::endl;
|
|
@@ -115,8 +115,8 @@ void test_load_directory(const std::string& path) {
|
|
|
115
115
|
}
|
|
116
116
|
|
|
117
117
|
auto add_error = store.add_document(doc_obj);
|
|
118
|
-
if (add_error) {
|
|
119
|
-
std::cerr << " Error adding document: " <<
|
|
118
|
+
if (add_error != VectorStoreError::SUCCESS) {
|
|
119
|
+
std::cerr << " Error adding document: " << vector_store_error_message(add_error) << std::endl;
|
|
120
120
|
error_count++;
|
|
121
121
|
} else {
|
|
122
122
|
doc_count++;
|
|
@@ -132,8 +132,8 @@ void test_load_directory(const std::string& path) {
|
|
|
132
132
|
std::cout << " Detected single document" << std::endl;
|
|
133
133
|
std::cout << " Adding to store..." << std::endl;
|
|
134
134
|
auto add_error = store.add_document(json_doc);
|
|
135
|
-
if (add_error) {
|
|
136
|
-
std::cerr << " Error adding document: " <<
|
|
135
|
+
if (add_error != VectorStoreError::SUCCESS) {
|
|
136
|
+
std::cerr << " Error adding document: " << vector_store_error_message(add_error) << std::endl;
|
|
137
137
|
} else {
|
|
138
138
|
std::cout << " Document added successfully";
|
|
139
139
|
}
|
package/src/test_stress.cpp
CHANGED
|
@@ -107,7 +107,7 @@ void test_phase_enforcement() {
|
|
|
107
107
|
simdjson::ondemand::document doc;
|
|
108
108
|
if (!parser.iterate(padded).get(doc)) {
|
|
109
109
|
auto error = store.add_document(doc);
|
|
110
|
-
assert(error ==
|
|
110
|
+
assert(error == VectorStoreError::SUCCESS);
|
|
111
111
|
}
|
|
112
112
|
}
|
|
113
113
|
|
|
@@ -127,7 +127,7 @@ void test_phase_enforcement() {
|
|
|
127
127
|
simdjson::ondemand::document doc;
|
|
128
128
|
parser.iterate(padded).get(doc);
|
|
129
129
|
auto error = store.add_document(doc);
|
|
130
|
-
assert(error ==
|
|
130
|
+
assert(error == VectorStoreError::STORE_ALREADY_FINALIZED);
|
|
131
131
|
std::cout << " ✅ Document addition correctly blocked after finalization\n";
|
|
132
132
|
}
|
|
133
133
|
|
|
@@ -159,11 +159,11 @@ void test_oversize_allocation() {
|
|
|
159
159
|
auto error = parser.iterate(padded).get(doc);
|
|
160
160
|
if (!error) {
|
|
161
161
|
// This should fail in the allocator
|
|
162
|
-
error = store.add_document(doc);
|
|
163
|
-
if (error ==
|
|
162
|
+
auto error = store.add_document(doc);
|
|
163
|
+
if (error == VectorStoreError::MEMORY_ALLOCATION_FAILED) {
|
|
164
164
|
std::cout << "✅ Correctly rejected oversize allocation\n";
|
|
165
165
|
} else {
|
|
166
|
-
std::cout << "❌ Should have failed with MEMALLOC error, got: " <<
|
|
166
|
+
std::cout << "❌ Should have failed with MEMALLOC error, got: " << vector_store_error_message(error) << "\n";
|
|
167
167
|
std::exit(1);
|
|
168
168
|
}
|
|
169
169
|
} else {
|
|
@@ -231,7 +231,7 @@ void test_phase_separation() {
|
|
|
231
231
|
simdjson::ondemand::document doc;
|
|
232
232
|
if (!parser.iterate(padded).get(doc)) {
|
|
233
233
|
auto error = store.add_document(doc);
|
|
234
|
-
if (
|
|
234
|
+
if (error == VectorStoreError::SUCCESS) {
|
|
235
235
|
docs_loaded++;
|
|
236
236
|
}
|
|
237
237
|
}
|
|
@@ -260,7 +260,7 @@ void test_phase_separation() {
|
|
|
260
260
|
simdjson::ondemand::document doc;
|
|
261
261
|
parser.iterate(padded).get(doc);
|
|
262
262
|
auto error = store.add_document(doc);
|
|
263
|
-
assert(error ==
|
|
263
|
+
assert(error == VectorStoreError::STORE_ALREADY_FINALIZED);
|
|
264
264
|
std::cout << " ✅ Document additions correctly blocked after finalization\n";
|
|
265
265
|
}
|
|
266
266
|
|
package/src/vector_store.cpp
CHANGED
|
@@ -71,6 +71,7 @@ ArenaAllocator::~ArenaAllocator() {
|
|
|
71
71
|
}
|
|
72
72
|
}
|
|
73
73
|
|
|
74
|
+
|
|
74
75
|
// TopK implementation
|
|
75
76
|
|
|
76
77
|
TopK::TopK(size_t k) : k(k) {
|
|
@@ -110,21 +111,33 @@ void TopK::merge(const TopK& other) {
|
|
|
110
111
|
|
|
111
112
|
VectorStore::VectorStore(size_t dim) : dim_(dim) {
|
|
112
113
|
entries_.resize(1'000'000); // Pre-size with default-constructed entries
|
|
114
|
+
|
|
115
|
+
// Prepare per-thread arena allocators for zero-contention parallel loading
|
|
116
|
+
int max_threads = omp_get_max_threads();
|
|
117
|
+
thread_arenas_.reserve(max_threads);
|
|
118
|
+
for (int i = 0; i < max_threads; ++i) {
|
|
119
|
+
thread_arenas_.emplace_back(std::make_unique<ArenaAllocator>());
|
|
120
|
+
}
|
|
113
121
|
}
|
|
114
122
|
|
|
115
|
-
|
|
123
|
+
VectorStore::BatchState& VectorStore::get_batch_state() {
|
|
124
|
+
thread_local BatchState state;
|
|
125
|
+
return state;
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
VectorStoreError VectorStore::add_document(simdjson::ondemand::document& json_doc) {
|
|
116
129
|
simdjson::ondemand::object obj;
|
|
117
130
|
auto error = json_doc.get_object().get(obj);
|
|
118
131
|
if (error) {
|
|
119
|
-
return error;
|
|
132
|
+
return map_simdjson_error(error);
|
|
120
133
|
}
|
|
121
134
|
return add_document(obj);
|
|
122
135
|
}
|
|
123
136
|
|
|
124
|
-
|
|
137
|
+
VectorStoreError VectorStore::add_document(simdjson::ondemand::object& json_doc) {
|
|
125
138
|
// Cannot add documents after finalization
|
|
126
139
|
if (is_finalized_.load(std::memory_order_acquire)) {
|
|
127
|
-
return
|
|
140
|
+
return VectorStoreError::STORE_ALREADY_FINALIZED;
|
|
128
141
|
}
|
|
129
142
|
|
|
130
143
|
// Parse with error handling
|
|
@@ -134,7 +147,7 @@ simdjson::error_code VectorStore::add_document(simdjson::ondemand::object& json_
|
|
|
134
147
|
if (error == simdjson::NO_SUCH_FIELD) {
|
|
135
148
|
fprintf(stderr, "Missing required field 'id'\n");
|
|
136
149
|
}
|
|
137
|
-
return error;
|
|
150
|
+
return map_simdjson_error(error);
|
|
138
151
|
}
|
|
139
152
|
|
|
140
153
|
// Auto-detect text field type on first document, then use that for all subsequent documents
|
|
@@ -158,10 +171,10 @@ simdjson::error_code VectorStore::add_document(simdjson::ondemand::object& json_
|
|
|
158
171
|
if (error == simdjson::NO_SUCH_FIELD) {
|
|
159
172
|
fprintf(stderr, "Missing required field 'text' or 'content'\n");
|
|
160
173
|
}
|
|
161
|
-
return error;
|
|
174
|
+
return map_simdjson_error(error);
|
|
162
175
|
}
|
|
163
176
|
} else {
|
|
164
|
-
return error;
|
|
177
|
+
return map_simdjson_error(error);
|
|
165
178
|
}
|
|
166
179
|
} else if (field_type == TextFieldType::TEXT) {
|
|
167
180
|
// Use 'text' field directly
|
|
@@ -170,7 +183,7 @@ simdjson::error_code VectorStore::add_document(simdjson::ondemand::object& json_
|
|
|
170
183
|
if (error == simdjson::NO_SUCH_FIELD) {
|
|
171
184
|
fprintf(stderr, "Missing required field 'text' (detected from first document)\n");
|
|
172
185
|
}
|
|
173
|
-
return error;
|
|
186
|
+
return map_simdjson_error(error);
|
|
174
187
|
}
|
|
175
188
|
} else { // TextFieldType::CONTENT
|
|
176
189
|
// Use 'content' field directly
|
|
@@ -179,27 +192,18 @@ simdjson::error_code VectorStore::add_document(simdjson::ondemand::object& json_
|
|
|
179
192
|
if (error == simdjson::NO_SUCH_FIELD) {
|
|
180
193
|
fprintf(stderr, "Missing required field 'content' (detected from first document)\n");
|
|
181
194
|
}
|
|
182
|
-
return error;
|
|
195
|
+
return map_simdjson_error(error);
|
|
183
196
|
}
|
|
184
197
|
}
|
|
185
198
|
|
|
186
|
-
//
|
|
187
|
-
size_t emb_size = dim_ * sizeof(float);
|
|
188
|
-
size_t id_size = id.size() + 1;
|
|
189
|
-
size_t text_size = text.size() + 1;
|
|
190
|
-
|
|
191
|
-
// Allocate temporary buffer for embedding
|
|
192
|
-
std::vector<float> temp_embedding;
|
|
193
|
-
temp_embedding.reserve(dim_);
|
|
194
|
-
|
|
195
|
-
// Process metadata and embedding first
|
|
199
|
+
// Process metadata and embedding first to get raw JSON before allocation
|
|
196
200
|
simdjson::ondemand::object metadata;
|
|
197
201
|
error = json_doc["metadata"].get_object().get(metadata);
|
|
198
202
|
if (error) {
|
|
199
203
|
if (error == simdjson::NO_SUCH_FIELD) {
|
|
200
204
|
fprintf(stderr, "Missing required field 'metadata'\n");
|
|
201
205
|
}
|
|
202
|
-
return error;
|
|
206
|
+
return map_simdjson_error(error);
|
|
203
207
|
}
|
|
204
208
|
|
|
205
209
|
simdjson::ondemand::array emb_array;
|
|
@@ -208,21 +212,27 @@ simdjson::error_code VectorStore::add_document(simdjson::ondemand::object& json_
|
|
|
208
212
|
if (error == simdjson::NO_SUCH_FIELD) {
|
|
209
213
|
fprintf(stderr, "Missing required field 'embedding' inside 'metadata'\n");
|
|
210
214
|
}
|
|
211
|
-
return error;
|
|
215
|
+
return map_simdjson_error(error);
|
|
212
216
|
}
|
|
213
217
|
|
|
214
|
-
//
|
|
218
|
+
// Use thread-local temporary buffer for embedding to avoid allocation/free per document
|
|
219
|
+
thread_local std::vector<float> temp_embedding;
|
|
220
|
+
temp_embedding.clear();
|
|
221
|
+
temp_embedding.reserve(dim_);
|
|
222
|
+
|
|
223
|
+
// Fill embedding into temporary buffer
|
|
215
224
|
size_t i = 0;
|
|
216
225
|
for (auto value_result : emb_array) {
|
|
217
226
|
simdjson::ondemand::value v;
|
|
218
227
|
error = value_result.get(v);
|
|
219
|
-
if (error) return error;
|
|
228
|
+
if (error) return map_simdjson_error(error);
|
|
220
229
|
double val;
|
|
221
230
|
error = v.get_double().get(val);
|
|
222
|
-
if (error) return error;
|
|
231
|
+
if (error) return map_simdjson_error(error);
|
|
223
232
|
|
|
224
233
|
if (i >= dim_) {
|
|
225
|
-
|
|
234
|
+
fprintf(stderr, "Too many embedding values: expected %zu, got at least %zu\n", dim_, i+1);
|
|
235
|
+
return VectorStoreError::DIMENSION_MISMATCH;
|
|
226
236
|
}
|
|
227
237
|
temp_embedding.push_back(float(val));
|
|
228
238
|
i++;
|
|
@@ -230,19 +240,39 @@ simdjson::error_code VectorStore::add_document(simdjson::ondemand::object& json_
|
|
|
230
240
|
|
|
231
241
|
// Verify we got the expected number of embedding values
|
|
232
242
|
if (i != dim_) {
|
|
233
|
-
|
|
243
|
+
fprintf(stderr, "Wrong embedding dimension: expected %zu, got %zu\n", dim_, i);
|
|
244
|
+
return VectorStoreError::DIMENSION_MISMATCH;
|
|
234
245
|
}
|
|
235
246
|
|
|
236
247
|
// Now it is safe to take the raw metadata JSON
|
|
237
248
|
std::string_view raw_json;
|
|
238
249
|
error = metadata.raw_json().get(raw_json);
|
|
239
|
-
if (error) return error;
|
|
250
|
+
if (error) return map_simdjson_error(error);
|
|
251
|
+
|
|
252
|
+
// Calculate sizes
|
|
253
|
+
size_t emb_size = dim_ * sizeof(float);
|
|
254
|
+
size_t id_size = id.size() + 1;
|
|
255
|
+
size_t text_size = text.size() + 1;
|
|
240
256
|
size_t meta_size = raw_json.size() + 1;
|
|
241
257
|
|
|
242
|
-
//
|
|
243
|
-
|
|
258
|
+
// Use per-thread arena allocator for zero-contention allocation
|
|
259
|
+
// Get thread ID and dispatch to appropriate arena
|
|
260
|
+
#ifdef _OPENMP
|
|
261
|
+
int tid = omp_get_thread_num();
|
|
262
|
+
#else
|
|
263
|
+
// For non-OpenMP builds, assign each std::thread a small integer ID
|
|
264
|
+
static std::atomic<size_t> counter{0};
|
|
265
|
+
static thread_local size_t tid = counter++;
|
|
266
|
+
#endif
|
|
267
|
+
|
|
268
|
+
// Ensure thread ID is within bounds
|
|
269
|
+
if (tid >= static_cast<int>(thread_arenas_.size())) {
|
|
270
|
+
tid = 0; // Fallback to first arena
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
char* base = (char*)thread_arenas_[tid]->allocate(emb_size + id_size + text_size + meta_size);
|
|
244
274
|
if (!base) {
|
|
245
|
-
return
|
|
275
|
+
return VectorStoreError::MEMORY_ALLOCATION_FAILED;
|
|
246
276
|
}
|
|
247
277
|
|
|
248
278
|
// Layout: [embedding][id][text][metadata_json]
|
|
@@ -251,7 +281,7 @@ simdjson::error_code VectorStore::add_document(simdjson::ondemand::object& json_
|
|
|
251
281
|
char* text_ptr = id_ptr + id_size;
|
|
252
282
|
char* meta_ptr = text_ptr + text_size;
|
|
253
283
|
|
|
254
|
-
// Copy embedding from
|
|
284
|
+
// Copy embedding from thread-local buffer (no heap allocation per call)
|
|
255
285
|
std::memcpy(emb_ptr, temp_embedding.data(), emb_size);
|
|
256
286
|
|
|
257
287
|
// Copy strings (adding null terminator)
|
|
@@ -270,7 +300,7 @@ simdjson::error_code VectorStore::add_document(simdjson::ondemand::object& json_
|
|
|
270
300
|
// Bounds check
|
|
271
301
|
if (idx >= entries_.size()) {
|
|
272
302
|
count_.fetch_sub(1, std::memory_order_relaxed);
|
|
273
|
-
return
|
|
303
|
+
return VectorStoreError::CAPACITY_EXCEEDED;
|
|
274
304
|
}
|
|
275
305
|
|
|
276
306
|
// Construct entry directly - no synchronization needed
|
|
@@ -286,7 +316,7 @@ simdjson::error_code VectorStore::add_document(simdjson::ondemand::object& json_
|
|
|
286
316
|
|
|
287
317
|
entries_[idx] = entry;
|
|
288
318
|
|
|
289
|
-
return
|
|
319
|
+
return VectorStoreError::SUCCESS;
|
|
290
320
|
}
|
|
291
321
|
|
|
292
322
|
void VectorStore::finalize() {
|
|
@@ -399,4 +429,4 @@ size_t VectorStore::size() const {
|
|
|
399
429
|
|
|
400
430
|
bool VectorStore::is_finalized() const {
|
|
401
431
|
return is_finalized_.load(std::memory_order_acquire);
|
|
402
|
-
}
|
|
432
|
+
}
|
package/src/vector_store.h
CHANGED
|
@@ -13,6 +13,138 @@
|
|
|
13
13
|
#include <algorithm>
|
|
14
14
|
#include <functional>
|
|
15
15
|
|
|
16
|
+
// Custom error codes for VectorStore
|
|
17
|
+
enum class VectorStoreError {
|
|
18
|
+
SUCCESS = 0,
|
|
19
|
+
MEMORY_ALLOCATION_FAILED,
|
|
20
|
+
DIMENSION_MISMATCH,
|
|
21
|
+
MISSING_FIELD,
|
|
22
|
+
WRONG_TYPE,
|
|
23
|
+
STORE_NOT_FINALIZED,
|
|
24
|
+
STORE_ALREADY_FINALIZED,
|
|
25
|
+
CAPACITY_EXCEEDED,
|
|
26
|
+
JSON_PARSE_ERROR,
|
|
27
|
+
FILE_IO_ERROR,
|
|
28
|
+
UNKNOWN_ERROR,
|
|
29
|
+
// JSON parsing specific errors (mapped from simdjson)
|
|
30
|
+
JSON_CAPACITY,
|
|
31
|
+
JSON_TAPE_ERROR,
|
|
32
|
+
JSON_DEPTH_ERROR,
|
|
33
|
+
JSON_STRING_ERROR,
|
|
34
|
+
JSON_T_ATOM_ERROR,
|
|
35
|
+
JSON_F_ATOM_ERROR,
|
|
36
|
+
JSON_N_ATOM_ERROR,
|
|
37
|
+
JSON_NUMBER_ERROR,
|
|
38
|
+
JSON_UTF8_ERROR,
|
|
39
|
+
JSON_UNINITIALIZED,
|
|
40
|
+
JSON_EMPTY,
|
|
41
|
+
JSON_UNESCAPED_CHARS,
|
|
42
|
+
JSON_UNCLOSED_STRING,
|
|
43
|
+
JSON_UNSUPPORTED_ARCHITECTURE,
|
|
44
|
+
JSON_INCORRECT_TYPE,
|
|
45
|
+
JSON_NUMBER_OUT_OF_RANGE,
|
|
46
|
+
JSON_INDEX_OUT_OF_BOUNDS,
|
|
47
|
+
JSON_NO_SUCH_FIELD,
|
|
48
|
+
JSON_IO_ERROR,
|
|
49
|
+
JSON_INVALID_JSON_POINTER,
|
|
50
|
+
JSON_INVALID_URI_FRAGMENT,
|
|
51
|
+
JSON_UNEXPECTED_ERROR,
|
|
52
|
+
JSON_PARSER_IN_USE,
|
|
53
|
+
JSON_OUT_OF_ORDER_ITERATION,
|
|
54
|
+
JSON_INSUFFICIENT_PADDING,
|
|
55
|
+
JSON_INCOMPLETE_ARRAY_OR_OBJECT,
|
|
56
|
+
JSON_SCALAR_DOCUMENT_AS_VALUE,
|
|
57
|
+
JSON_OUT_OF_BOUNDS,
|
|
58
|
+
JSON_TRAILING_CONTENT
|
|
59
|
+
};
|
|
60
|
+
|
|
61
|
+
// Map simdjson error to VectorStoreError
|
|
62
|
+
inline VectorStoreError map_simdjson_error(simdjson::error_code error) {
|
|
63
|
+
using namespace simdjson;
|
|
64
|
+
switch (error) {
|
|
65
|
+
case SUCCESS: return VectorStoreError::SUCCESS;
|
|
66
|
+
case CAPACITY: return VectorStoreError::JSON_CAPACITY;
|
|
67
|
+
case MEMALLOC: return VectorStoreError::MEMORY_ALLOCATION_FAILED;
|
|
68
|
+
case TAPE_ERROR: return VectorStoreError::JSON_TAPE_ERROR;
|
|
69
|
+
case DEPTH_ERROR: return VectorStoreError::JSON_DEPTH_ERROR;
|
|
70
|
+
case STRING_ERROR: return VectorStoreError::JSON_STRING_ERROR;
|
|
71
|
+
case T_ATOM_ERROR: return VectorStoreError::JSON_T_ATOM_ERROR;
|
|
72
|
+
case F_ATOM_ERROR: return VectorStoreError::JSON_F_ATOM_ERROR;
|
|
73
|
+
case N_ATOM_ERROR: return VectorStoreError::JSON_N_ATOM_ERROR;
|
|
74
|
+
case NUMBER_ERROR: return VectorStoreError::JSON_NUMBER_ERROR;
|
|
75
|
+
case UTF8_ERROR: return VectorStoreError::JSON_UTF8_ERROR;
|
|
76
|
+
case UNINITIALIZED: return VectorStoreError::JSON_UNINITIALIZED;
|
|
77
|
+
case EMPTY: return VectorStoreError::JSON_EMPTY;
|
|
78
|
+
case UNESCAPED_CHARS: return VectorStoreError::JSON_UNESCAPED_CHARS;
|
|
79
|
+
case UNCLOSED_STRING: return VectorStoreError::JSON_UNCLOSED_STRING;
|
|
80
|
+
case UNSUPPORTED_ARCHITECTURE: return VectorStoreError::JSON_UNSUPPORTED_ARCHITECTURE;
|
|
81
|
+
case INCORRECT_TYPE: return VectorStoreError::JSON_INCORRECT_TYPE;
|
|
82
|
+
case NUMBER_OUT_OF_RANGE: return VectorStoreError::JSON_NUMBER_OUT_OF_RANGE;
|
|
83
|
+
case INDEX_OUT_OF_BOUNDS: return VectorStoreError::JSON_INDEX_OUT_OF_BOUNDS;
|
|
84
|
+
case NO_SUCH_FIELD: return VectorStoreError::JSON_NO_SUCH_FIELD;
|
|
85
|
+
case IO_ERROR: return VectorStoreError::JSON_IO_ERROR;
|
|
86
|
+
case INVALID_JSON_POINTER: return VectorStoreError::JSON_INVALID_JSON_POINTER;
|
|
87
|
+
case INVALID_URI_FRAGMENT: return VectorStoreError::JSON_INVALID_URI_FRAGMENT;
|
|
88
|
+
case UNEXPECTED_ERROR: return VectorStoreError::JSON_UNEXPECTED_ERROR;
|
|
89
|
+
case PARSER_IN_USE: return VectorStoreError::JSON_PARSER_IN_USE;
|
|
90
|
+
case OUT_OF_ORDER_ITERATION: return VectorStoreError::JSON_OUT_OF_ORDER_ITERATION;
|
|
91
|
+
case INSUFFICIENT_PADDING: return VectorStoreError::JSON_INSUFFICIENT_PADDING;
|
|
92
|
+
case INCOMPLETE_ARRAY_OR_OBJECT: return VectorStoreError::JSON_INCOMPLETE_ARRAY_OR_OBJECT;
|
|
93
|
+
case SCALAR_DOCUMENT_AS_VALUE: return VectorStoreError::JSON_SCALAR_DOCUMENT_AS_VALUE;
|
|
94
|
+
case OUT_OF_BOUNDS: return VectorStoreError::JSON_OUT_OF_BOUNDS;
|
|
95
|
+
case TRAILING_CONTENT: return VectorStoreError::JSON_TRAILING_CONTENT;
|
|
96
|
+
default: return VectorStoreError::JSON_PARSE_ERROR;
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
// Convert VectorStoreError to string for error messages
|
|
101
|
+
inline const char* vector_store_error_message(VectorStoreError error) {
|
|
102
|
+
switch (error) {
|
|
103
|
+
case VectorStoreError::SUCCESS: return "Success";
|
|
104
|
+
case VectorStoreError::MEMORY_ALLOCATION_FAILED: return "Memory allocation failed";
|
|
105
|
+
case VectorStoreError::DIMENSION_MISMATCH: return "Embedding dimension mismatch";
|
|
106
|
+
case VectorStoreError::MISSING_FIELD: return "Required field missing";
|
|
107
|
+
case VectorStoreError::WRONG_TYPE: return "Wrong field type";
|
|
108
|
+
case VectorStoreError::STORE_NOT_FINALIZED: return "Store must be finalized before searching";
|
|
109
|
+
case VectorStoreError::STORE_ALREADY_FINALIZED: return "Store already finalized, cannot add more documents";
|
|
110
|
+
case VectorStoreError::CAPACITY_EXCEEDED: return "Store capacity exceeded";
|
|
111
|
+
case VectorStoreError::JSON_PARSE_ERROR: return "JSON parsing error";
|
|
112
|
+
case VectorStoreError::FILE_IO_ERROR: return "File I/O error";
|
|
113
|
+
case VectorStoreError::UNKNOWN_ERROR: return "Unknown error";
|
|
114
|
+
// JSON specific errors
|
|
115
|
+
case VectorStoreError::JSON_CAPACITY: return "JSON parser capacity exceeded";
|
|
116
|
+
case VectorStoreError::JSON_TAPE_ERROR: return "JSON tape error";
|
|
117
|
+
case VectorStoreError::JSON_DEPTH_ERROR: return "JSON depth error";
|
|
118
|
+
case VectorStoreError::JSON_STRING_ERROR: return "JSON string error";
|
|
119
|
+
case VectorStoreError::JSON_T_ATOM_ERROR: return "JSON 'true' atom error";
|
|
120
|
+
case VectorStoreError::JSON_F_ATOM_ERROR: return "JSON 'false' atom error";
|
|
121
|
+
case VectorStoreError::JSON_N_ATOM_ERROR: return "JSON 'null' atom error";
|
|
122
|
+
case VectorStoreError::JSON_NUMBER_ERROR: return "JSON number error";
|
|
123
|
+
case VectorStoreError::JSON_UTF8_ERROR: return "JSON UTF-8 error";
|
|
124
|
+
case VectorStoreError::JSON_UNINITIALIZED: return "JSON parser uninitialized";
|
|
125
|
+
case VectorStoreError::JSON_EMPTY: return "JSON document empty";
|
|
126
|
+
case VectorStoreError::JSON_UNESCAPED_CHARS: return "JSON unescaped characters";
|
|
127
|
+
case VectorStoreError::JSON_UNCLOSED_STRING: return "JSON unclosed string";
|
|
128
|
+
case VectorStoreError::JSON_UNSUPPORTED_ARCHITECTURE: return "JSON unsupported architecture";
|
|
129
|
+
case VectorStoreError::JSON_INCORRECT_TYPE: return "JSON incorrect type";
|
|
130
|
+
case VectorStoreError::JSON_NUMBER_OUT_OF_RANGE: return "JSON number out of range";
|
|
131
|
+
case VectorStoreError::JSON_INDEX_OUT_OF_BOUNDS: return "JSON index out of bounds";
|
|
132
|
+
case VectorStoreError::JSON_NO_SUCH_FIELD: return "JSON field not found";
|
|
133
|
+
case VectorStoreError::JSON_IO_ERROR: return "JSON I/O error";
|
|
134
|
+
case VectorStoreError::JSON_INVALID_JSON_POINTER: return "JSON invalid pointer";
|
|
135
|
+
case VectorStoreError::JSON_INVALID_URI_FRAGMENT: return "JSON invalid URI fragment";
|
|
136
|
+
case VectorStoreError::JSON_UNEXPECTED_ERROR: return "JSON unexpected error";
|
|
137
|
+
case VectorStoreError::JSON_PARSER_IN_USE: return "JSON parser in use";
|
|
138
|
+
case VectorStoreError::JSON_OUT_OF_ORDER_ITERATION: return "JSON out of order iteration";
|
|
139
|
+
case VectorStoreError::JSON_INSUFFICIENT_PADDING: return "JSON insufficient padding";
|
|
140
|
+
case VectorStoreError::JSON_INCOMPLETE_ARRAY_OR_OBJECT: return "JSON incomplete array or object";
|
|
141
|
+
case VectorStoreError::JSON_SCALAR_DOCUMENT_AS_VALUE: return "JSON scalar document as value";
|
|
142
|
+
case VectorStoreError::JSON_OUT_OF_BOUNDS: return "JSON out of bounds";
|
|
143
|
+
case VectorStoreError::JSON_TRAILING_CONTENT: return "JSON trailing content";
|
|
144
|
+
default: return "Unknown error";
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
|
|
16
148
|
class ArenaAllocator {
|
|
17
149
|
static constexpr size_t CHUNK_SIZE = 1 << 26; // 64MB chunks
|
|
18
150
|
struct Chunk {
|
|
@@ -31,6 +163,7 @@ public:
|
|
|
31
163
|
~ArenaAllocator();
|
|
32
164
|
};
|
|
33
165
|
|
|
166
|
+
|
|
34
167
|
struct Document {
|
|
35
168
|
std::string_view id;
|
|
36
169
|
std::string_view text;
|
|
@@ -69,6 +202,9 @@ private:
|
|
|
69
202
|
const size_t dim_;
|
|
70
203
|
ArenaAllocator arena_;
|
|
71
204
|
|
|
205
|
+
// Per-thread arena allocators for zero-contention parallel allocation
|
|
206
|
+
std::vector<std::unique_ptr<ArenaAllocator>> thread_arenas_;
|
|
207
|
+
|
|
72
208
|
std::vector<Entry> entries_;
|
|
73
209
|
std::atomic<size_t> count_{0}; // Atomic for parallel loading
|
|
74
210
|
std::atomic<bool> is_finalized_{false}; // Simple flag: false = loading, true = serving
|
|
@@ -82,9 +218,31 @@ public:
|
|
|
82
218
|
explicit VectorStore(size_t dim);
|
|
83
219
|
|
|
84
220
|
// Overload for document type (used in test_main.cpp)
|
|
85
|
-
|
|
221
|
+
VectorStoreError add_document(simdjson::ondemand::document& json_doc);
|
|
222
|
+
|
|
223
|
+
VectorStoreError add_document(simdjson::ondemand::object& json_doc);
|
|
224
|
+
|
|
225
|
+
// Batch processing with index reservation
|
|
226
|
+
// Thread-local state for batched operations
|
|
227
|
+
struct BatchState {
|
|
228
|
+
size_t batch_size;
|
|
229
|
+
size_t batch_start;
|
|
230
|
+
size_t batch_offset;
|
|
231
|
+
|
|
232
|
+
BatchState(size_t size = 128) : batch_size(size), batch_start(0), batch_offset(0) {}
|
|
233
|
+
|
|
234
|
+
size_t reserve_next(std::atomic<size_t>& count) {
|
|
235
|
+
if (batch_offset >= batch_size) {
|
|
236
|
+
// Need new batch
|
|
237
|
+
batch_start = count.fetch_add(batch_size, std::memory_order_relaxed);
|
|
238
|
+
batch_offset = 0;
|
|
239
|
+
}
|
|
240
|
+
return batch_start + batch_offset++;
|
|
241
|
+
}
|
|
242
|
+
};
|
|
86
243
|
|
|
87
|
-
|
|
244
|
+
// Get thread-local batch state
|
|
245
|
+
static BatchState& get_batch_state();
|
|
88
246
|
|
|
89
247
|
// Finalize the store: normalize and switch to serving phase
|
|
90
248
|
void finalize();
|
|
@@ -92,13 +92,16 @@ void VectorStoreLoader::loadDirectory(VectorStore* store, const std::string& pat
|
|
|
92
92
|
|
|
93
93
|
for (size_t w = 0; w < num_workers; ++w) {
|
|
94
94
|
consumers.emplace_back([&]() {
|
|
95
|
-
// Each thread needs its own parser
|
|
96
|
-
simdjson::ondemand::parser doc_parser;
|
|
95
|
+
// Each thread needs its own parser with initial capacity
|
|
96
|
+
simdjson::ondemand::parser doc_parser(16 * 1024 * 1024); // 16MB initial capacity
|
|
97
|
+
// Set a larger maximum capacity for very large files (up to 512MB)
|
|
98
|
+
doc_parser.allocate(512 * 1024 * 1024);
|
|
97
99
|
FileData* data = nullptr;
|
|
98
100
|
|
|
99
101
|
while (true) {
|
|
100
102
|
// Try to get work from queue
|
|
101
103
|
if (queue.try_pop(data)) {
|
|
104
|
+
|
|
102
105
|
// Process the file
|
|
103
106
|
simdjson::padded_string json(data->content);
|
|
104
107
|
|
|
@@ -132,16 +135,18 @@ void VectorStoreLoader::loadDirectory(VectorStore* store, const std::string& pat
|
|
|
132
135
|
error = doc_element.get_object().get(obj);
|
|
133
136
|
if (!error) {
|
|
134
137
|
auto add_error = store->add_document(obj);
|
|
135
|
-
if (add_error) {
|
|
138
|
+
if (add_error != VectorStoreError::SUCCESS) {
|
|
136
139
|
fprintf(stderr, "Error adding document from %s: %s\n",
|
|
137
|
-
data->filename.c_str(),
|
|
138
|
-
if (add_error ==
|
|
140
|
+
data->filename.c_str(), vector_store_error_message(add_error));
|
|
141
|
+
if (add_error == VectorStoreError::JSON_NO_SUCH_FIELD || add_error == VectorStoreError::MISSING_FIELD) {
|
|
139
142
|
fprintf(stderr, " Expected JSON format: {\"id\": string, \"text\": string, \"metadata\": {\"embedding\": [numbers...]}}\n");
|
|
140
143
|
fprintf(stderr, " Required fields: id, text (or content), metadata.embedding\n");
|
|
141
144
|
fprintf(stderr, " Note: 'embedding' must be inside 'metadata' object\n");
|
|
142
145
|
fprintf(stderr, " Note: 'text' and 'content' are interchangeable (Spring AI compatibility)\n");
|
|
143
|
-
} else if (add_error ==
|
|
144
|
-
fprintf(stderr, "
|
|
146
|
+
} else if (add_error == VectorStoreError::DIMENSION_MISMATCH) {
|
|
147
|
+
fprintf(stderr, " Check that all embeddings have the same dimensions\n");
|
|
148
|
+
} else if (add_error == VectorStoreError::STORE_ALREADY_FINALIZED) {
|
|
149
|
+
fprintf(stderr, " Store has been finalized and cannot accept new documents\n");
|
|
145
150
|
}
|
|
146
151
|
}
|
|
147
152
|
}
|
|
@@ -152,16 +157,18 @@ void VectorStoreLoader::loadDirectory(VectorStore* store, const std::string& pat
|
|
|
152
157
|
error = doc.get_object().get(obj);
|
|
153
158
|
if (!error) {
|
|
154
159
|
auto add_error = store->add_document(obj);
|
|
155
|
-
if (add_error) {
|
|
160
|
+
if (add_error != VectorStoreError::SUCCESS) {
|
|
156
161
|
fprintf(stderr, "Error adding document from %s: %s\n",
|
|
157
|
-
data->filename.c_str(),
|
|
158
|
-
if (add_error ==
|
|
162
|
+
data->filename.c_str(), vector_store_error_message(add_error));
|
|
163
|
+
if (add_error == VectorStoreError::JSON_NO_SUCH_FIELD || add_error == VectorStoreError::MISSING_FIELD) {
|
|
159
164
|
fprintf(stderr, " Expected JSON format: {\"id\": string, \"text\": string, \"metadata\": {\"embedding\": [numbers...]}}\n");
|
|
160
165
|
fprintf(stderr, " Required fields: id, text (or content), metadata.embedding\n");
|
|
161
166
|
fprintf(stderr, " Note: 'embedding' must be inside 'metadata' object\n");
|
|
162
167
|
fprintf(stderr, " Note: 'text' and 'content' are interchangeable (Spring AI compatibility)\n");
|
|
163
|
-
} else if (add_error ==
|
|
164
|
-
fprintf(stderr, "
|
|
168
|
+
} else if (add_error == VectorStoreError::DIMENSION_MISMATCH) {
|
|
169
|
+
fprintf(stderr, " Check that all embeddings have the same dimensions\n");
|
|
170
|
+
} else if (add_error == VectorStoreError::STORE_ALREADY_FINALIZED) {
|
|
171
|
+
fprintf(stderr, " Store has been finalized and cannot accept new documents\n");
|
|
165
172
|
}
|
|
166
173
|
}
|
|
167
174
|
}
|
|
@@ -189,4 +196,4 @@ void VectorStoreLoader::loadDirectory(VectorStore* store, const std::string& pat
|
|
|
189
196
|
|
|
190
197
|
// Finalize after batch load - normalize and switch to serving phase
|
|
191
198
|
store->finalize();
|
|
192
|
-
}
|
|
199
|
+
}
|
|
@@ -132,8 +132,10 @@ void VectorStoreLoader::loadDirectoryAdaptive(VectorStore* store, const std::str
|
|
|
132
132
|
|
|
133
133
|
for (size_t w = 0; w < num_workers; ++w) {
|
|
134
134
|
consumers.emplace_back([&]() {
|
|
135
|
-
// Each thread needs its own parser
|
|
136
|
-
simdjson::ondemand::parser doc_parser;
|
|
135
|
+
// Each thread needs its own parser with initial capacity
|
|
136
|
+
simdjson::ondemand::parser doc_parser(16 * 1024 * 1024); // 16MB initial capacity
|
|
137
|
+
// Set a larger maximum capacity for very large files (up to 512MB)
|
|
138
|
+
doc_parser.allocate(512 * 1024 * 1024);
|
|
137
139
|
MixedFileData* data = nullptr;
|
|
138
140
|
|
|
139
141
|
while (true) {
|
|
@@ -144,6 +146,7 @@ void VectorStoreLoader::loadDirectoryAdaptive(VectorStore* store, const std::str
|
|
|
144
146
|
? simdjson::padded_string(data->mmap->data(), data->mmap->size())
|
|
145
147
|
: simdjson::padded_string(data->content);
|
|
146
148
|
|
|
149
|
+
|
|
147
150
|
// Check if it's an array or object
|
|
148
151
|
const char* json_start = json.data();
|
|
149
152
|
while (json_start && *json_start && std::isspace(*json_start)) {
|
|
@@ -176,9 +179,9 @@ void VectorStoreLoader::loadDirectoryAdaptive(VectorStore* store, const std::str
|
|
|
176
179
|
error = doc_element.get_object().get(obj);
|
|
177
180
|
if (!error) {
|
|
178
181
|
auto add_error = store->add_document(obj);
|
|
179
|
-
if (add_error) {
|
|
182
|
+
if (add_error != VectorStoreError::SUCCESS) {
|
|
180
183
|
fprintf(stderr, "Error adding document from %s: %s\n",
|
|
181
|
-
data->filename.c_str(),
|
|
184
|
+
data->filename.c_str(), vector_store_error_message(add_error));
|
|
182
185
|
}
|
|
183
186
|
}
|
|
184
187
|
}
|
|
@@ -188,9 +191,9 @@ void VectorStoreLoader::loadDirectoryAdaptive(VectorStore* store, const std::str
|
|
|
188
191
|
error = doc.get_object().get(obj);
|
|
189
192
|
if (!error) {
|
|
190
193
|
auto add_error = store->add_document(obj);
|
|
191
|
-
if (add_error) {
|
|
194
|
+
if (add_error != VectorStoreError::SUCCESS) {
|
|
192
195
|
fprintf(stderr, "Error adding document from %s: %s\n",
|
|
193
|
-
data->filename.c_str(),
|
|
196
|
+
data->filename.c_str(), vector_store_error_message(add_error));
|
|
194
197
|
}
|
|
195
198
|
}
|
|
196
199
|
}
|
|
@@ -217,4 +220,4 @@ void VectorStoreLoader::loadDirectoryAdaptive(VectorStore* store, const std::str
|
|
|
217
220
|
|
|
218
221
|
// Finalize after batch load - normalize and switch to serving phase
|
|
219
222
|
store->finalize();
|
|
220
|
-
}
|
|
223
|
+
}
|
|
@@ -67,13 +67,16 @@ void VectorStoreLoader::loadDirectoryMMap(VectorStore* store, const std::string&
|
|
|
67
67
|
|
|
68
68
|
for (size_t w = 0; w < num_workers; ++w) {
|
|
69
69
|
consumers.emplace_back([&]() {
|
|
70
|
-
// Each thread needs its own parser
|
|
71
|
-
simdjson::ondemand::parser doc_parser;
|
|
70
|
+
// Each thread needs its own parser with initial capacity
|
|
71
|
+
simdjson::ondemand::parser doc_parser(16 * 1024 * 1024); // 16MB initial capacity
|
|
72
|
+
// Set a larger maximum capacity for very large files (up to 512MB)
|
|
73
|
+
doc_parser.set_max_capacity(512 * 1024 * 1024);
|
|
72
74
|
MMapFileData* data = nullptr;
|
|
73
75
|
|
|
74
76
|
while (true) {
|
|
75
77
|
// Try to get work from queue
|
|
76
78
|
if (queue.try_pop(data)) {
|
|
79
|
+
|
|
77
80
|
// Process the memory-mapped file
|
|
78
81
|
// For mmap, we need to copy to ensure padding
|
|
79
82
|
simdjson::padded_string json(data->mmap->data(), data->mmap->size());
|
|
@@ -110,9 +113,9 @@ void VectorStoreLoader::loadDirectoryMMap(VectorStore* store, const std::string&
|
|
|
110
113
|
error = doc_element.get_object().get(obj);
|
|
111
114
|
if (!error) {
|
|
112
115
|
auto add_error = store->add_document(obj);
|
|
113
|
-
if (add_error) {
|
|
116
|
+
if (add_error != VectorStoreError::SUCCESS) {
|
|
114
117
|
fprintf(stderr, "Error adding document from %s: %s\n",
|
|
115
|
-
data->filename.c_str(),
|
|
118
|
+
data->filename.c_str(), vector_store_error_message(add_error));
|
|
116
119
|
}
|
|
117
120
|
}
|
|
118
121
|
}
|
|
@@ -122,9 +125,9 @@ void VectorStoreLoader::loadDirectoryMMap(VectorStore* store, const std::string&
|
|
|
122
125
|
error = doc.get_object().get(obj);
|
|
123
126
|
if (!error) {
|
|
124
127
|
auto add_error = store->add_document(obj);
|
|
125
|
-
if (add_error) {
|
|
128
|
+
if (add_error != VectorStoreError::SUCCESS) {
|
|
126
129
|
fprintf(stderr, "Error adding document from %s: %s\n",
|
|
127
|
-
data->filename.c_str(),
|
|
130
|
+
data->filename.c_str(), vector_store_error_message(add_error));
|
|
128
131
|
}
|
|
129
132
|
}
|
|
130
133
|
}
|
|
Binary file
|
|
Binary file
|