vectlite 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1214 @@
1
+ use std::collections::BTreeMap;
2
+ use std::path::Path;
3
+ use std::sync::{Arc, Mutex, MutexGuard, RwLock, RwLockReadGuard, RwLockWriteGuard};
4
+
5
+ use napi::Error as NapiError;
6
+ use napi::bindgen_prelude::*;
7
+ use napi_derive::napi;
8
+ use serde_json::{Map, Number, Value, json};
9
+ use vectlite::{
10
+ Database as CoreDatabase, FusionStrategy, HybridSearchOptions, Metadata, MetadataFilter,
11
+ MetadataValue, NamedVectors, Record, SearchOutcome, SearchResult, SparseVector,
12
+ Store as CoreStore, WriteOperation,
13
+ };
14
+
15
+ #[napi(js_name = "NativeDatabase")]
16
+ pub struct NativeDatabase {
17
+ inner: Arc<RwLock<CoreDatabase>>,
18
+ }
19
+
20
+ #[napi(js_name = "NativeTransaction")]
21
+ pub struct NativeTransaction {
22
+ inner: Arc<RwLock<CoreDatabase>>,
23
+ staged: Mutex<TransactionState>,
24
+ }
25
+
26
+ #[derive(Default)]
27
+ struct TransactionState {
28
+ ops: Vec<WriteOperation>,
29
+ closed: bool,
30
+ }
31
+
32
+ #[napi(js_name = "NativeStore")]
33
+ pub struct NativeStore {
34
+ inner: CoreStore,
35
+ }
36
+
37
+ struct SearchRequest {
38
+ namespace: String,
39
+ all_namespaces: bool,
40
+ sparse: SparseVector,
41
+ options: HybridSearchOptions,
42
+ explain: bool,
43
+ fusion_name: String,
44
+ }
45
+
46
+ #[napi]
47
+ impl NativeStore {
48
+ #[napi(getter)]
49
+ pub fn root(&self) -> String {
50
+ self.inner.root().display().to_string()
51
+ }
52
+
53
+ #[napi(js_name = "createCollection")]
54
+ pub fn create_collection(&self, name: String, dimension: u32) -> Result<NativeDatabase> {
55
+ let database = self
56
+ .inner
57
+ .create_collection(&name, dimension as usize)
58
+ .map_err(to_napi_error)?;
59
+ Ok(NativeDatabase {
60
+ inner: Arc::new(RwLock::new(database)),
61
+ })
62
+ }
63
+
64
+ #[napi(js_name = "openCollection")]
65
+ pub fn open_collection(&self, name: String) -> Result<NativeDatabase> {
66
+ let database = self.inner.open_collection(&name).map_err(to_napi_error)?;
67
+ Ok(NativeDatabase {
68
+ inner: Arc::new(RwLock::new(database)),
69
+ })
70
+ }
71
+
72
+ #[napi(js_name = "openOrCreateCollection")]
73
+ pub fn open_or_create_collection(
74
+ &self,
75
+ name: String,
76
+ dimension: u32,
77
+ ) -> Result<NativeDatabase> {
78
+ let database = self
79
+ .inner
80
+ .open_or_create_collection(&name, dimension as usize)
81
+ .map_err(to_napi_error)?;
82
+ Ok(NativeDatabase {
83
+ inner: Arc::new(RwLock::new(database)),
84
+ })
85
+ }
86
+
87
+ #[napi(js_name = "openCollectionReadOnly")]
88
+ pub fn open_collection_read_only(&self, name: String) -> Result<NativeDatabase> {
89
+ let database = self
90
+ .inner
91
+ .open_collection_read_only(&name)
92
+ .map_err(to_napi_error)?;
93
+ Ok(NativeDatabase {
94
+ inner: Arc::new(RwLock::new(database)),
95
+ })
96
+ }
97
+
98
+ #[napi(js_name = "dropCollection")]
99
+ pub fn drop_collection(&self, name: String) -> Result<bool> {
100
+ self.inner.drop_collection(&name).map_err(to_napi_error)
101
+ }
102
+
103
+ #[napi]
104
+ pub fn collections(&self) -> Result<Vec<String>> {
105
+ self.inner.collections().map_err(to_napi_error)
106
+ }
107
+ }
108
+
109
+ #[napi]
110
+ impl NativeDatabase {
111
+ #[napi(getter)]
112
+ pub fn path(&self) -> Result<String> {
113
+ let database = self.read()?;
114
+ Ok(database.path().display().to_string())
115
+ }
116
+
117
+ #[napi(getter, js_name = "walPath")]
118
+ pub fn wal_path(&self) -> Result<String> {
119
+ let database = self.read()?;
120
+ Ok(database.wal_path().display().to_string())
121
+ }
122
+
123
+ #[napi(getter)]
124
+ pub fn dimension(&self) -> Result<u32> {
125
+ let database = self.read()?;
126
+ Ok(database.dimension() as u32)
127
+ }
128
+
129
+ #[napi(getter, js_name = "readOnly")]
130
+ pub fn read_only(&self) -> Result<bool> {
131
+ let database = self.read()?;
132
+ Ok(database.is_read_only())
133
+ }
134
+
135
+ #[napi]
136
+ pub fn count(&self) -> Result<u32> {
137
+ let database = self.read()?;
138
+ Ok(database.len() as u32)
139
+ }
140
+
141
+ #[napi]
142
+ pub fn namespaces(&self) -> Result<Vec<String>> {
143
+ let database = self.read()?;
144
+ Ok(database.namespaces())
145
+ }
146
+
147
+ #[napi]
148
+ pub fn transaction(&self) -> NativeTransaction {
149
+ NativeTransaction {
150
+ inner: Arc::clone(&self.inner),
151
+ staged: Mutex::new(TransactionState::default()),
152
+ }
153
+ }
154
+
155
+ #[napi]
156
+ pub fn insert(
157
+ &self,
158
+ id: String,
159
+ vector: Vec<f64>,
160
+ metadata_json: Option<String>,
161
+ namespace: Option<String>,
162
+ sparse_json: Option<String>,
163
+ vectors_json: Option<String>,
164
+ ) -> Result<()> {
165
+ let metadata = parse_metadata_json(metadata_json)?;
166
+ let sparse = parse_sparse_json(sparse_json)?;
167
+ let vectors = parse_named_vectors_json(vectors_json)?;
168
+ let vector = js_vector_to_core(vector, "vector")?;
169
+ let mut database = self.write()?;
170
+ database
171
+ .insert_with_vectors_in_namespace(
172
+ namespace.unwrap_or_default(),
173
+ &id,
174
+ vector,
175
+ vectors,
176
+ sparse,
177
+ metadata,
178
+ )
179
+ .map_err(to_napi_error)
180
+ }
181
+
182
+ #[napi]
183
+ pub fn upsert(
184
+ &self,
185
+ id: String,
186
+ vector: Vec<f64>,
187
+ metadata_json: Option<String>,
188
+ namespace: Option<String>,
189
+ sparse_json: Option<String>,
190
+ vectors_json: Option<String>,
191
+ ) -> Result<()> {
192
+ let metadata = parse_metadata_json(metadata_json)?;
193
+ let sparse = parse_sparse_json(sparse_json)?;
194
+ let vectors = parse_named_vectors_json(vectors_json)?;
195
+ let vector = js_vector_to_core(vector, "vector")?;
196
+ let mut database = self.write()?;
197
+ database
198
+ .upsert_with_vectors_in_namespace(
199
+ namespace.unwrap_or_default(),
200
+ &id,
201
+ vector,
202
+ vectors,
203
+ sparse,
204
+ metadata,
205
+ )
206
+ .map_err(to_napi_error)
207
+ }
208
+
209
+ #[napi(js_name = "insertMany")]
210
+ pub fn insert_many(&self, records_json: String, namespace: Option<String>) -> Result<u32> {
211
+ let records = parse_record_batch_json(&records_json, namespace.as_deref())?;
212
+ let mut database = self.write()?;
213
+ database
214
+ .insert_many(records)
215
+ .map(|count| count as u32)
216
+ .map_err(to_napi_error)
217
+ }
218
+
219
+ #[napi(js_name = "upsertMany")]
220
+ pub fn upsert_many(&self, records_json: String, namespace: Option<String>) -> Result<u32> {
221
+ let records = parse_record_batch_json(&records_json, namespace.as_deref())?;
222
+ let mut database = self.write()?;
223
+ database
224
+ .upsert_many(records)
225
+ .map(|count| count as u32)
226
+ .map_err(to_napi_error)
227
+ }
228
+
229
+ #[napi(js_name = "bulkIngest")]
230
+ pub fn bulk_ingest(
231
+ &self,
232
+ records_json: String,
233
+ namespace: Option<String>,
234
+ batch_size: u32,
235
+ ) -> Result<u32> {
236
+ let records = parse_record_batch_json(&records_json, namespace.as_deref())?;
237
+ let mut database = self.write()?;
238
+ database
239
+ .bulk_ingest(records, batch_size as usize)
240
+ .map(|count| count as u32)
241
+ .map_err(to_napi_error)
242
+ }
243
+
244
+ #[napi]
245
+ pub fn get(&self, id: String, namespace: Option<String>) -> Result<Option<String>> {
246
+ let record = {
247
+ let database = self.read()?;
248
+ database
249
+ .get_in_namespace(&namespace.unwrap_or_default(), &id)
250
+ .cloned()
251
+ };
252
+
253
+ record
254
+ .map(|record| stringify_value(record_to_json(&record)))
255
+ .transpose()
256
+ }
257
+
258
+ #[napi]
259
+ pub fn delete(&self, id: String, namespace: Option<String>) -> Result<bool> {
260
+ let mut database = self.write()?;
261
+ database
262
+ .delete_in_namespace(&namespace.unwrap_or_default(), &id)
263
+ .map_err(to_napi_error)
264
+ }
265
+
266
+ #[napi(js_name = "deleteMany")]
267
+ pub fn delete_many(&self, ids: Vec<String>, namespace: Option<String>) -> Result<u32> {
268
+ let mut database = self.write()?;
269
+ database
270
+ .delete_many_in_namespace(&namespace.unwrap_or_default(), ids)
271
+ .map(|count| count as u32)
272
+ .map_err(to_napi_error)
273
+ }
274
+
275
+ #[napi]
276
+ pub fn flush(&self) -> Result<()> {
277
+ let mut database = self.write()?;
278
+ database.flush().map_err(to_napi_error)
279
+ }
280
+
281
+ #[napi]
282
+ pub fn compact(&self) -> Result<()> {
283
+ let mut database = self.write()?;
284
+ database.compact().map_err(to_napi_error)
285
+ }
286
+
287
+ #[napi]
288
+ pub fn snapshot(&self, dest: String) -> Result<()> {
289
+ let database = self.read()?;
290
+ database.snapshot(&dest).map_err(to_napi_error)
291
+ }
292
+
293
+ #[napi]
294
+ pub fn backup(&self, dest: String) -> Result<()> {
295
+ let database = self.read()?;
296
+ database.backup(&dest).map_err(to_napi_error)
297
+ }
298
+
299
+ #[napi]
300
+ pub fn search(&self, query: Option<Vec<f64>>, options_json: Option<String>) -> Result<String> {
301
+ let request = parse_search_request(options_json)?;
302
+ let query = query
303
+ .map(|vector| js_vector_to_core(vector, "query vector"))
304
+ .transpose()?;
305
+ let outcome = self.execute_search(query, &request)?;
306
+ stringify_value(search_results_to_json(
307
+ &outcome.results,
308
+ request.explain,
309
+ &request.fusion_name,
310
+ ))
311
+ }
312
+
313
+ #[napi(js_name = "searchWithStats")]
314
+ pub fn search_with_stats(
315
+ &self,
316
+ query: Option<Vec<f64>>,
317
+ options_json: Option<String>,
318
+ ) -> Result<String> {
319
+ let request = parse_search_request(options_json)?;
320
+ let query = query
321
+ .map(|vector| js_vector_to_core(vector, "query vector"))
322
+ .transpose()?;
323
+ let outcome = self.execute_search(query, &request)?;
324
+ stringify_value(search_outcome_to_json(
325
+ &outcome,
326
+ request.explain,
327
+ &request.fusion_name,
328
+ ))
329
+ }
330
+ }
331
+
332
+ #[napi]
333
+ impl NativeTransaction {
334
+ #[napi]
335
+ pub fn count(&self) -> Result<u32> {
336
+ let state = self.state()?;
337
+ Ok(state.ops.len() as u32)
338
+ }
339
+
340
+ #[napi]
341
+ pub fn insert(
342
+ &self,
343
+ id: String,
344
+ vector: Vec<f64>,
345
+ metadata_json: Option<String>,
346
+ namespace: Option<String>,
347
+ sparse_json: Option<String>,
348
+ vectors_json: Option<String>,
349
+ ) -> Result<()> {
350
+ let metadata = parse_metadata_json(metadata_json)?;
351
+ let sparse = parse_sparse_json(sparse_json)?;
352
+ let vectors = parse_named_vectors_json(vectors_json)?;
353
+ let vector = js_vector_to_core(vector, "vector")?;
354
+ self.stage(WriteOperation::Insert(Record {
355
+ namespace: namespace.unwrap_or_default(),
356
+ id,
357
+ vector,
358
+ vectors,
359
+ sparse,
360
+ metadata,
361
+ }))
362
+ }
363
+
364
+ #[napi]
365
+ pub fn upsert(
366
+ &self,
367
+ id: String,
368
+ vector: Vec<f64>,
369
+ metadata_json: Option<String>,
370
+ namespace: Option<String>,
371
+ sparse_json: Option<String>,
372
+ vectors_json: Option<String>,
373
+ ) -> Result<()> {
374
+ let metadata = parse_metadata_json(metadata_json)?;
375
+ let sparse = parse_sparse_json(sparse_json)?;
376
+ let vectors = parse_named_vectors_json(vectors_json)?;
377
+ let vector = js_vector_to_core(vector, "vector")?;
378
+ self.stage(WriteOperation::Upsert(Record {
379
+ namespace: namespace.unwrap_or_default(),
380
+ id,
381
+ vector,
382
+ vectors,
383
+ sparse,
384
+ metadata,
385
+ }))
386
+ }
387
+
388
+ #[napi(js_name = "insertMany")]
389
+ pub fn insert_many(&self, records_json: String, namespace: Option<String>) -> Result<u32> {
390
+ let records = parse_record_batch_json(&records_json, namespace.as_deref())?;
391
+ let count = records.len() as u32;
392
+ for record in records {
393
+ self.stage(WriteOperation::Insert(record))?;
394
+ }
395
+ Ok(count)
396
+ }
397
+
398
+ #[napi(js_name = "upsertMany")]
399
+ pub fn upsert_many(&self, records_json: String, namespace: Option<String>) -> Result<u32> {
400
+ let records = parse_record_batch_json(&records_json, namespace.as_deref())?;
401
+ let count = records.len() as u32;
402
+ for record in records {
403
+ self.stage(WriteOperation::Upsert(record))?;
404
+ }
405
+ Ok(count)
406
+ }
407
+
408
+ #[napi]
409
+ pub fn delete(&self, id: String, namespace: Option<String>) -> Result<bool> {
410
+ self.stage(WriteOperation::Delete {
411
+ namespace: namespace.unwrap_or_default(),
412
+ id,
413
+ })?;
414
+ Ok(true)
415
+ }
416
+
417
+ #[napi(js_name = "deleteMany")]
418
+ pub fn delete_many(&self, ids: Vec<String>, namespace: Option<String>) -> Result<u32> {
419
+ let namespace = namespace.unwrap_or_default();
420
+ let count = ids.len() as u32;
421
+ for id in ids {
422
+ self.stage(WriteOperation::Delete {
423
+ namespace: namespace.clone(),
424
+ id,
425
+ })?;
426
+ }
427
+ Ok(count)
428
+ }
429
+
430
+ #[napi]
431
+ pub fn commit(&self) -> Result<()> {
432
+ let ops = {
433
+ let mut state = self.state()?;
434
+ if state.closed {
435
+ return Ok(());
436
+ }
437
+ state.closed = true;
438
+ std::mem::take(&mut state.ops)
439
+ };
440
+ if ops.is_empty() {
441
+ return Ok(());
442
+ }
443
+ let mut database = self.write_db()?;
444
+ database.apply_operations(ops).map_err(to_napi_error)
445
+ }
446
+
447
+ #[napi]
448
+ pub fn rollback(&self) -> Result<()> {
449
+ let mut state = self.state()?;
450
+ state.closed = true;
451
+ state.ops.clear();
452
+ Ok(())
453
+ }
454
+ }
455
+
456
+ impl NativeTransaction {
457
+ fn stage(&self, op: WriteOperation) -> Result<()> {
458
+ let mut state = self.state()?;
459
+ if state.closed {
460
+ return Err(err("transaction is already closed"));
461
+ }
462
+ state.ops.push(op);
463
+ Ok(())
464
+ }
465
+
466
+ fn state(&self) -> Result<MutexGuard<'_, TransactionState>> {
467
+ self.staged
468
+ .lock()
469
+ .map_err(|_| err("transaction state lock poisoned"))
470
+ }
471
+
472
+ fn write_db(&self) -> Result<RwLockWriteGuard<'_, CoreDatabase>> {
473
+ self.inner
474
+ .write()
475
+ .map_err(|_| err("database write lock poisoned"))
476
+ }
477
+ }
478
+
479
+ impl NativeDatabase {
480
+ fn read(&self) -> Result<RwLockReadGuard<'_, CoreDatabase>> {
481
+ self.inner
482
+ .read()
483
+ .map_err(|_| err("database read lock poisoned"))
484
+ }
485
+
486
+ fn write(&self) -> Result<RwLockWriteGuard<'_, CoreDatabase>> {
487
+ self.inner
488
+ .write()
489
+ .map_err(|_| err("database write lock poisoned"))
490
+ }
491
+
492
+ fn execute_search(
493
+ &self,
494
+ query: Option<Vec<f32>>,
495
+ request: &SearchRequest,
496
+ ) -> Result<SearchOutcome> {
497
+ let sparse_ref = if request.sparse.is_empty() {
498
+ None
499
+ } else {
500
+ Some(&request.sparse)
501
+ };
502
+ let database = self.read()?;
503
+ if request.all_namespaces {
504
+ database
505
+ .hybrid_search_all_namespaces_with_stats(
506
+ query.as_deref(),
507
+ sparse_ref,
508
+ request.options.clone(),
509
+ )
510
+ .map_err(to_napi_error)
511
+ } else {
512
+ database
513
+ .hybrid_search_in_namespace_with_stats(
514
+ &request.namespace,
515
+ query.as_deref(),
516
+ sparse_ref,
517
+ request.options.clone(),
518
+ )
519
+ .map_err(to_napi_error)
520
+ }
521
+ }
522
+ }
523
+
524
+ #[napi]
525
+ pub fn open(path: String, dimension: Option<u32>, read_only: bool) -> Result<NativeDatabase> {
526
+ let database = if read_only {
527
+ if !Path::new(&path).exists() {
528
+ return Err(err("cannot open non-existent database in read-only mode"));
529
+ }
530
+ CoreDatabase::open_read_only(&path).map_err(to_napi_error)?
531
+ } else if Path::new(&path).exists() {
532
+ match dimension {
533
+ Some(dimension) => {
534
+ CoreDatabase::open_or_create(&path, dimension as usize).map_err(to_napi_error)?
535
+ }
536
+ None => CoreDatabase::open(&path).map_err(to_napi_error)?,
537
+ }
538
+ } else {
539
+ let Some(dimension) = dimension else {
540
+ return Err(err("dimension is required when creating a new database"));
541
+ };
542
+ CoreDatabase::create(&path, dimension as usize).map_err(to_napi_error)?
543
+ };
544
+
545
+ Ok(NativeDatabase {
546
+ inner: Arc::new(RwLock::new(database)),
547
+ })
548
+ }
549
+
550
+ #[napi(js_name = "openStore")]
551
+ pub fn open_store(root: String) -> Result<NativeStore> {
552
+ let store = CoreStore::open(&root).map_err(to_napi_error)?;
553
+ Ok(NativeStore { inner: store })
554
+ }
555
+
556
+ #[napi]
557
+ pub fn restore(source: String, dest: String) -> Result<NativeDatabase> {
558
+ let database = CoreDatabase::restore(&source, &dest).map_err(to_napi_error)?;
559
+ Ok(NativeDatabase {
560
+ inner: Arc::new(RwLock::new(database)),
561
+ })
562
+ }
563
+
564
+ fn parse_search_request(options_json: Option<String>) -> Result<SearchRequest> {
565
+ let value = parse_optional_json(options_json)?;
566
+ let object = expect_optional_object(value.as_ref(), "search options")?;
567
+
568
+ let top_k = get_usize(object, "k")?.unwrap_or(10);
569
+ let filter = object
570
+ .and_then(|obj| obj.get("filter"))
571
+ .filter(|value| !value.is_null())
572
+ .map(json_to_filter)
573
+ .transpose()?;
574
+ let namespace = get_string(object, "namespace")?.unwrap_or_default();
575
+ let all_namespaces = get_bool(object, "allNamespaces")?.unwrap_or(false);
576
+ let sparse = object
577
+ .and_then(|obj| obj.get("sparse"))
578
+ .map(json_to_sparse_value)
579
+ .transpose()?
580
+ .unwrap_or_default();
581
+ let dense_weight = get_f32(object, "denseWeight")?.unwrap_or(1.0);
582
+ let sparse_weight = get_f32(object, "sparseWeight")?.unwrap_or(1.0);
583
+ let fetch_k = get_usize(object, "fetchK")?.unwrap_or(0);
584
+ let mmr_lambda = get_optional_f32(object, "mmrLambda")?;
585
+ let vector_name = get_string(object, "vectorName")?;
586
+ let fusion_name = get_string(object, "fusion")?.unwrap_or_else(|| "linear".to_owned());
587
+ let rrf_k = get_usize(object, "rrfK")?.unwrap_or(60);
588
+ let explain = get_bool(object, "explain")?.unwrap_or(false);
589
+ let query_vectors = parse_multi_vector_queries(
590
+ object.and_then(|obj| obj.get("queryVectors")),
591
+ object.and_then(|obj| obj.get("vectorWeights")),
592
+ )?;
593
+
594
+ Ok(SearchRequest {
595
+ namespace,
596
+ all_namespaces,
597
+ sparse,
598
+ explain,
599
+ fusion_name: fusion_name.clone(),
600
+ options: HybridSearchOptions {
601
+ top_k,
602
+ filter,
603
+ dense_weight,
604
+ sparse_weight,
605
+ fetch_k,
606
+ mmr_lambda,
607
+ vector_name,
608
+ fusion: parse_fusion(&fusion_name, rrf_k)?,
609
+ multi_vector_queries: query_vectors,
610
+ },
611
+ })
612
+ }
613
+
614
+ fn parse_metadata_json(input: Option<String>) -> Result<Metadata> {
615
+ let value = parse_optional_json(input)?;
616
+ match value {
617
+ None | Some(Value::Null) => Ok(Metadata::new()),
618
+ Some(value) => json_to_metadata(&value),
619
+ }
620
+ }
621
+
622
+ fn parse_sparse_json(input: Option<String>) -> Result<SparseVector> {
623
+ let value = parse_optional_json(input)?;
624
+ match value {
625
+ None | Some(Value::Null) => Ok(SparseVector::new()),
626
+ Some(value) => json_to_sparse_value(&value),
627
+ }
628
+ }
629
+
630
+ fn parse_named_vectors_json(input: Option<String>) -> Result<NamedVectors> {
631
+ let value = parse_optional_json(input)?;
632
+ match value {
633
+ None | Some(Value::Null) => Ok(NamedVectors::new()),
634
+ Some(value) => json_to_named_vectors(&value),
635
+ }
636
+ }
637
+
638
+ fn parse_record_batch_json(input: &str, default_namespace: Option<&str>) -> Result<Vec<Record>> {
639
+ let value: Value = serde_json::from_str(input).map_err(|error| {
640
+ err(format!(
641
+ "records must be valid JSON for batch operations: {error}"
642
+ ))
643
+ })?;
644
+ let items = value
645
+ .as_array()
646
+ .ok_or_else(|| err("records must be a JSON array"))?;
647
+ let mut parsed = Vec::with_capacity(items.len());
648
+ for item in items {
649
+ let object = item
650
+ .as_object()
651
+ .ok_or_else(|| err("each batch record must be a JSON object"))?;
652
+ parsed.push(json_to_record(object, default_namespace)?);
653
+ }
654
+ Ok(parsed)
655
+ }
656
+
657
+ fn parse_optional_json(input: Option<String>) -> Result<Option<Value>> {
658
+ input
659
+ .map(|value| {
660
+ serde_json::from_str(&value)
661
+ .map_err(|error| err(format!("invalid JSON payload: {error}")))
662
+ })
663
+ .transpose()
664
+ }
665
+
666
+ fn json_to_metadata(value: &Value) -> Result<Metadata> {
667
+ let object = value
668
+ .as_object()
669
+ .ok_or_else(|| err("metadata must be a JSON object"))?;
670
+ let mut metadata = Metadata::new();
671
+ for (key, value) in object {
672
+ metadata.insert(key.clone(), json_to_metadata_value(value)?);
673
+ }
674
+ Ok(metadata)
675
+ }
676
+
677
+ fn json_to_metadata_value(value: &Value) -> Result<MetadataValue> {
678
+ match value {
679
+ Value::Null => Ok(MetadataValue::Null),
680
+ Value::Bool(value) => Ok(MetadataValue::Boolean(*value)),
681
+ Value::String(value) => Ok(MetadataValue::String(value.clone())),
682
+ Value::Number(value) => {
683
+ if let Some(integer) = value.as_i64() {
684
+ Ok(MetadataValue::Integer(integer))
685
+ } else if let Some(float) = value.as_f64() {
686
+ Ok(MetadataValue::Float(float))
687
+ } else {
688
+ Err(err("metadata number must fit into an integer or float"))
689
+ }
690
+ }
691
+ Value::Array(items) => {
692
+ let mut converted = Vec::with_capacity(items.len());
693
+ for item in items {
694
+ converted.push(json_to_metadata_value(item)?);
695
+ }
696
+ Ok(MetadataValue::List(converted))
697
+ }
698
+ Value::Object(entries) => {
699
+ let mut converted = BTreeMap::new();
700
+ for (key, value) in entries {
701
+ converted.insert(key.clone(), json_to_metadata_value(value)?);
702
+ }
703
+ Ok(MetadataValue::Map(converted))
704
+ }
705
+ }
706
+ }
707
+
708
+ fn json_to_sparse_value(value: &Value) -> Result<SparseVector> {
709
+ let object = value
710
+ .as_object()
711
+ .ok_or_else(|| err("sparse vector must be a JSON object"))?;
712
+ let mut sparse = SparseVector::new();
713
+ for (term, value) in object {
714
+ sparse.insert(term.clone(), value_to_f32(value, "sparse weights")?);
715
+ }
716
+ Ok(sparse)
717
+ }
718
+
719
+ fn json_to_named_vectors(value: &Value) -> Result<NamedVectors> {
720
+ let object = value
721
+ .as_object()
722
+ .ok_or_else(|| err("named vectors must be a JSON object"))?;
723
+ let mut vectors = NamedVectors::new();
724
+ for (name, vector) in object {
725
+ if name.is_empty() {
726
+ return Err(err("named vectors must not use an empty name"));
727
+ }
728
+ vectors.insert(name.clone(), value_to_vector(vector, "named vector")?);
729
+ }
730
+ Ok(vectors)
731
+ }
732
+
733
+ fn json_to_filter(value: &Value) -> Result<MetadataFilter> {
734
+ let object = value
735
+ .as_object()
736
+ .ok_or_else(|| err("filter must be a JSON object"))?;
737
+ let mut filters = Vec::new();
738
+ for (key, value) in object {
739
+ match key.as_str() {
740
+ "$and" => filters.push(MetadataFilter::and(parse_filter_group(value)?)),
741
+ "$or" => filters.push(MetadataFilter::or(parse_filter_group(value)?)),
742
+ "$not" => filters.push(MetadataFilter::not(json_to_filter(value)?)),
743
+ field => filters.push(parse_field_filter(field, value)?),
744
+ }
745
+ }
746
+ collapse_filters(filters, "filter")
747
+ }
748
+
749
+ fn parse_filter_group(value: &Value) -> Result<Vec<MetadataFilter>> {
750
+ let items = value
751
+ .as_array()
752
+ .ok_or_else(|| err("logical filter groups must be JSON arrays"))?;
753
+ let mut filters = Vec::with_capacity(items.len());
754
+ for item in items {
755
+ filters.push(json_to_filter(item)?);
756
+ }
757
+ Ok(filters)
758
+ }
759
+
760
+ fn parse_field_filter(key: &str, value: &Value) -> Result<MetadataFilter> {
761
+ if let Some(operators) = value.as_object() {
762
+ let mut filters = Vec::new();
763
+ for (operator, operand) in operators {
764
+ match operator.as_str() {
765
+ "$eq" => filters.push(MetadataFilter::eq(key, json_to_metadata_value(operand)?)),
766
+ "$ne" => filters.push(MetadataFilter::ne(key, json_to_metadata_value(operand)?)),
767
+ "$in" => filters.push(MetadataFilter::r#in(key, extract_metadata_values(operand)?)),
768
+ "$nin" => filters.push(MetadataFilter::nin(key, extract_metadata_values(operand)?)),
769
+ "$not" => filters.push(MetadataFilter::not(parse_field_filter(key, operand)?)),
770
+ "$contains" => filters.push(MetadataFilter::contains(
771
+ key,
772
+ operand
773
+ .as_str()
774
+ .ok_or_else(|| err("$contains expects a string"))?,
775
+ )),
776
+ "$gt" => filters.push(MetadataFilter::gt(key, extract_numeric(operand)?)),
777
+ "$gte" => filters.push(MetadataFilter::gte(key, extract_numeric(operand)?)),
778
+ "$lt" => filters.push(MetadataFilter::lt(key, extract_numeric(operand)?)),
779
+ "$lte" => filters.push(MetadataFilter::lte(key, extract_numeric(operand)?)),
780
+ "$exists" => {
781
+ let exists = operand
782
+ .as_bool()
783
+ .ok_or_else(|| err("$exists expects a boolean"))?;
784
+ if exists {
785
+ filters.push(MetadataFilter::exists(key));
786
+ } else {
787
+ filters.push(MetadataFilter::not(MetadataFilter::exists(key)));
788
+ }
789
+ }
790
+ "$elemMatch" => {
791
+ let dict = operand
792
+ .as_object()
793
+ .ok_or_else(|| err("$elemMatch expects a JSON object"))?;
794
+ let all_operators = dict.keys().all(|item| item.starts_with('$'));
795
+ let sub_filter = if all_operators {
796
+ parse_field_filter("_", operand)?
797
+ } else {
798
+ json_to_filter(operand)?
799
+ };
800
+ filters.push(MetadataFilter::elem_match(key, sub_filter));
801
+ }
802
+ "$size" => {
803
+ filters.push(MetadataFilter::size(key, value_to_usize(operand, "$size")?))
804
+ }
805
+ other => {
806
+ return Err(err(format!("unsupported filter operator: {other}")));
807
+ }
808
+ }
809
+ }
810
+ collapse_filters(filters, "field filter")
811
+ } else {
812
+ Ok(MetadataFilter::eq(key, json_to_metadata_value(value)?))
813
+ }
814
+ }
815
+
816
+ fn collapse_filters(filters: Vec<MetadataFilter>, context: &str) -> Result<MetadataFilter> {
817
+ match filters.len() {
818
+ 0 => Err(err(format!("{context} cannot be empty"))),
819
+ 1 => Ok(filters
820
+ .into_iter()
821
+ .next()
822
+ .expect("single filter must exist")),
823
+ _ => Ok(MetadataFilter::and(filters)),
824
+ }
825
+ }
826
+
827
+ fn extract_metadata_values(value: &Value) -> Result<Vec<MetadataValue>> {
828
+ let items = value
829
+ .as_array()
830
+ .ok_or_else(|| err("list filter operands must be JSON arrays"))?;
831
+ let mut values = Vec::with_capacity(items.len());
832
+ for item in items {
833
+ values.push(json_to_metadata_value(item)?);
834
+ }
835
+ Ok(values)
836
+ }
837
+
838
+ fn extract_numeric(value: &Value) -> Result<f64> {
839
+ value
840
+ .as_f64()
841
+ .ok_or_else(|| err("numeric filter operands must be numbers"))
842
+ }
843
+
844
+ fn parse_multi_vector_queries(
845
+ query_vectors: Option<&Value>,
846
+ vector_weights: Option<&Value>,
847
+ ) -> Result<BTreeMap<String, (Vec<f32>, f32)>> {
848
+ let mut queries = BTreeMap::new();
849
+ let Some(query_vectors) = query_vectors else {
850
+ return Ok(queries);
851
+ };
852
+ let vectors = query_vectors
853
+ .as_object()
854
+ .ok_or_else(|| err("queryVectors must be a JSON object"))?;
855
+ let weights = match vector_weights {
856
+ Some(value) => Some(
857
+ value
858
+ .as_object()
859
+ .ok_or_else(|| err("vectorWeights must be a JSON object"))?,
860
+ ),
861
+ None => None,
862
+ };
863
+
864
+ for (name, vector) in vectors {
865
+ let weight = weights
866
+ .and_then(|weights| weights.get(name))
867
+ .map(|value| value_to_f32(value, "vector weights"))
868
+ .transpose()?
869
+ .unwrap_or(1.0);
870
+ queries.insert(
871
+ name.clone(),
872
+ (value_to_vector(vector, "query vector")?, weight),
873
+ );
874
+ }
875
+ Ok(queries)
876
+ }
877
+
878
+ fn parse_fusion(fusion: &str, rrf_k: usize) -> Result<FusionStrategy> {
879
+ match fusion {
880
+ "linear" => Ok(FusionStrategy::Linear),
881
+ "rrf" => Ok(FusionStrategy::Rrf {
882
+ rank_constant: rrf_k.max(1),
883
+ }),
884
+ other => Err(err(format!("unsupported fusion strategy: {other}"))),
885
+ }
886
+ }
887
+
888
+ fn json_to_record(object: &Map<String, Value>, default_namespace: Option<&str>) -> Result<Record> {
889
+ let namespace = object
890
+ .get("namespace")
891
+ .map(value_to_string)
892
+ .transpose()?
893
+ .unwrap_or_else(|| default_namespace.unwrap_or_default().to_owned());
894
+ let id = object
895
+ .get("id")
896
+ .ok_or_else(|| err("batch record is missing 'id'"))?;
897
+ let vector = object
898
+ .get("vector")
899
+ .ok_or_else(|| err("batch record is missing 'vector'"))?;
900
+
901
+ let vectors = object
902
+ .get("vectors")
903
+ .map(json_to_named_vectors)
904
+ .transpose()?
905
+ .unwrap_or_default();
906
+ let sparse = object
907
+ .get("sparse")
908
+ .map(json_to_sparse_value)
909
+ .transpose()?
910
+ .unwrap_or_default();
911
+ let metadata = object
912
+ .get("metadata")
913
+ .map(json_to_metadata)
914
+ .transpose()?
915
+ .unwrap_or_default();
916
+
917
+ Ok(Record {
918
+ namespace,
919
+ id: value_to_string(id)?,
920
+ vector: value_to_vector(vector, "batch vector")?,
921
+ vectors,
922
+ sparse,
923
+ metadata,
924
+ })
925
+ }
926
+
927
+ fn record_to_json(record: &Record) -> Value {
928
+ json!({
929
+ "namespace": record.namespace,
930
+ "id": record.id,
931
+ "vector": record.vector,
932
+ "vectors": named_vectors_to_json(&record.vectors),
933
+ "sparse": sparse_to_json(&record.sparse),
934
+ "metadata": metadata_to_json(&record.metadata),
935
+ })
936
+ }
937
+
938
+ fn search_results_to_json(results: &[SearchResult], explain: bool, fusion: &str) -> Value {
939
+ Value::Array(
940
+ results
941
+ .iter()
942
+ .map(|result| search_result_to_json(result, explain, fusion))
943
+ .collect(),
944
+ )
945
+ }
946
+
947
+ fn search_outcome_to_json(outcome: &SearchOutcome, explain: bool, fusion: &str) -> Value {
948
+ json!({
949
+ "results": search_results_to_json(&outcome.results, explain, fusion),
950
+ "stats": search_stats_to_json(&outcome.stats),
951
+ })
952
+ }
953
+
954
+ fn search_result_to_json(result: &SearchResult, explain: bool, fusion: &str) -> Value {
955
+ let mut object = Map::new();
956
+ object.insert(
957
+ "namespace".to_owned(),
958
+ Value::String(result.namespace.clone()),
959
+ );
960
+ object.insert("id".to_owned(), Value::String(result.id.clone()));
961
+ object.insert("score".to_owned(), float_value(result.score as f64));
962
+ object.insert(
963
+ "dense_score".to_owned(),
964
+ float_value(result.dense_score as f64),
965
+ );
966
+ object.insert(
967
+ "sparse_score".to_owned(),
968
+ float_value(result.sparse_score as f64),
969
+ );
970
+ object.insert(
971
+ "vector_name".to_owned(),
972
+ result
973
+ .vector_name
974
+ .clone()
975
+ .map(Value::String)
976
+ .unwrap_or(Value::Null),
977
+ );
978
+ object.insert(
979
+ "matched_terms".to_owned(),
980
+ Value::Array(
981
+ result
982
+ .matched_terms
983
+ .iter()
984
+ .cloned()
985
+ .map(Value::String)
986
+ .collect(),
987
+ ),
988
+ );
989
+ object.insert(
990
+ "dense_rank".to_owned(),
991
+ result
992
+ .dense_rank
993
+ .map(|rank| Value::Number(Number::from(rank)))
994
+ .unwrap_or(Value::Null),
995
+ );
996
+ object.insert(
997
+ "sparse_rank".to_owned(),
998
+ result
999
+ .sparse_rank
1000
+ .map(|rank| Value::Number(Number::from(rank)))
1001
+ .unwrap_or(Value::Null),
1002
+ );
1003
+ object.insert("metadata".to_owned(), metadata_to_json(&result.metadata));
1004
+
1005
+ if explain {
1006
+ object.insert(
1007
+ "explain".to_owned(),
1008
+ json!({
1009
+ "fusion": fusion,
1010
+ "dense_score": result.dense_score,
1011
+ "sparse_score": result.sparse_score,
1012
+ "matched_terms": result.matched_terms,
1013
+ "vector_name": result.vector_name,
1014
+ "dense_rank": result.dense_rank,
1015
+ "sparse_rank": result.sparse_rank,
1016
+ "bm25_term_scores": result.bm25_term_scores,
1017
+ }),
1018
+ );
1019
+ }
1020
+
1021
+ Value::Object(object)
1022
+ }
1023
+
1024
+ fn search_stats_to_json(stats: &vectlite::SearchStats) -> Value {
1025
+ json!({
1026
+ "used_ann": stats.used_ann,
1027
+ "ann_candidate_count": stats.ann_candidate_count,
1028
+ "exact_fallback": stats.exact_fallback,
1029
+ "considered_count": stats.considered_count,
1030
+ "fetch_k": stats.fetch_k,
1031
+ "mmr_applied": stats.mmr_applied,
1032
+ "sparse_candidate_count": stats.sparse_candidate_count,
1033
+ "ann_loaded_from_disk": stats.ann_loaded_from_disk,
1034
+ "wal_entries_replayed": stats.wal_entries_replayed,
1035
+ "fusion": stats.fusion,
1036
+ "rerank_applied": false,
1037
+ "rerank_count": 0,
1038
+ "timings": {
1039
+ "dense_us": stats.timings.dense_us,
1040
+ "sparse_us": stats.timings.sparse_us,
1041
+ "fusion_us": stats.timings.fusion_us,
1042
+ "total_us": stats.timings.total_us,
1043
+ },
1044
+ })
1045
+ }
1046
+
1047
+ fn metadata_to_json(metadata: &Metadata) -> Value {
1048
+ let mut object = Map::new();
1049
+ for (key, value) in metadata {
1050
+ object.insert(key.clone(), metadata_value_to_json(value));
1051
+ }
1052
+ Value::Object(object)
1053
+ }
1054
+
1055
+ fn metadata_value_to_json(value: &MetadataValue) -> Value {
1056
+ match value {
1057
+ MetadataValue::String(value) => Value::String(value.clone()),
1058
+ MetadataValue::Integer(value) => Value::Number(Number::from(*value)),
1059
+ MetadataValue::Float(value) => float_value(*value),
1060
+ MetadataValue::Boolean(value) => Value::Bool(*value),
1061
+ MetadataValue::Null => Value::Null,
1062
+ MetadataValue::List(items) => {
1063
+ Value::Array(items.iter().map(metadata_value_to_json).collect())
1064
+ }
1065
+ MetadataValue::Map(entries) => {
1066
+ let mut object = Map::new();
1067
+ for (key, value) in entries {
1068
+ object.insert(key.clone(), metadata_value_to_json(value));
1069
+ }
1070
+ Value::Object(object)
1071
+ }
1072
+ }
1073
+ }
1074
+
1075
+ fn sparse_to_json(sparse: &SparseVector) -> Value {
1076
+ let mut object = Map::new();
1077
+ for (term, weight) in sparse {
1078
+ object.insert(term.clone(), float_value(*weight as f64));
1079
+ }
1080
+ Value::Object(object)
1081
+ }
1082
+
1083
+ fn named_vectors_to_json(vectors: &NamedVectors) -> Value {
1084
+ let mut object = Map::new();
1085
+ for (name, vector) in vectors {
1086
+ object.insert(
1087
+ name.clone(),
1088
+ Value::Array(
1089
+ vector
1090
+ .iter()
1091
+ .map(|value| float_value(*value as f64))
1092
+ .collect(),
1093
+ ),
1094
+ );
1095
+ }
1096
+ Value::Object(object)
1097
+ }
1098
+
1099
+ fn stringify_value(value: Value) -> Result<String> {
1100
+ serde_json::to_string(&value).map_err(|error| err(format!("failed to serialize JSON: {error}")))
1101
+ }
1102
+
1103
+ fn float_value(value: f64) -> Value {
1104
+ Number::from_f64(value)
1105
+ .map(Value::Number)
1106
+ .unwrap_or(Value::Null)
1107
+ }
1108
+
1109
+ fn expect_optional_object<'a>(
1110
+ value: Option<&'a Value>,
1111
+ label: &str,
1112
+ ) -> Result<Option<&'a Map<String, Value>>> {
1113
+ match value {
1114
+ None | Some(Value::Null) => Ok(None),
1115
+ Some(Value::Object(object)) => Ok(Some(object)),
1116
+ Some(_) => Err(err(format!("{label} must be a JSON object"))),
1117
+ }
1118
+ }
1119
+
1120
+ fn get_string(object: Option<&Map<String, Value>>, key: &str) -> Result<Option<String>> {
1121
+ let Some(object) = object else {
1122
+ return Ok(None);
1123
+ };
1124
+ match object.get(key) {
1125
+ None | Some(Value::Null) => Ok(None),
1126
+ Some(value) => Ok(Some(value_to_string(value)?)),
1127
+ }
1128
+ }
1129
+
1130
+ fn get_bool(object: Option<&Map<String, Value>>, key: &str) -> Result<Option<bool>> {
1131
+ let Some(object) = object else {
1132
+ return Ok(None);
1133
+ };
1134
+ match object.get(key) {
1135
+ None | Some(Value::Null) => Ok(None),
1136
+ Some(Value::Bool(value)) => Ok(Some(*value)),
1137
+ Some(_) => Err(err(format!("{key} must be a boolean"))),
1138
+ }
1139
+ }
1140
+
1141
+ fn get_usize(object: Option<&Map<String, Value>>, key: &str) -> Result<Option<usize>> {
1142
+ let Some(object) = object else {
1143
+ return Ok(None);
1144
+ };
1145
+ match object.get(key) {
1146
+ None | Some(Value::Null) => Ok(None),
1147
+ Some(value) => Ok(Some(value_to_usize(value, key)?)),
1148
+ }
1149
+ }
1150
+
1151
+ fn get_f32(object: Option<&Map<String, Value>>, key: &str) -> Result<Option<f32>> {
1152
+ let Some(object) = object else {
1153
+ return Ok(None);
1154
+ };
1155
+ match object.get(key) {
1156
+ None | Some(Value::Null) => Ok(None),
1157
+ Some(value) => Ok(Some(value_to_f32(value, key)?)),
1158
+ }
1159
+ }
1160
+
1161
+ fn get_optional_f32(object: Option<&Map<String, Value>>, key: &str) -> Result<Option<f32>> {
1162
+ get_f32(object, key)
1163
+ }
1164
+
1165
+ fn value_to_string(value: &Value) -> Result<String> {
1166
+ value
1167
+ .as_str()
1168
+ .map(ToOwned::to_owned)
1169
+ .ok_or_else(|| err("value must be a string"))
1170
+ }
1171
+
1172
+ fn value_to_vector(value: &Value, label: &str) -> Result<Vec<f32>> {
1173
+ let items = value
1174
+ .as_array()
1175
+ .ok_or_else(|| err(format!("{label} must be a JSON array of numbers")))?;
1176
+ let mut vector = Vec::with_capacity(items.len());
1177
+ for item in items {
1178
+ vector.push(value_to_f32(item, label)?);
1179
+ }
1180
+ Ok(vector)
1181
+ }
1182
+
1183
+ fn value_to_f32(value: &Value, label: &str) -> Result<f32> {
1184
+ value
1185
+ .as_f64()
1186
+ .map(|value| value as f32)
1187
+ .ok_or_else(|| err(format!("{label} must contain numeric values")))
1188
+ }
1189
+
1190
+ fn js_vector_to_core(values: Vec<f64>, label: &str) -> Result<Vec<f32>> {
1191
+ let mut vector = Vec::with_capacity(values.len());
1192
+ for value in values {
1193
+ if !value.is_finite() {
1194
+ return Err(err(format!("{label} must contain finite numeric values")));
1195
+ }
1196
+ vector.push(value as f32);
1197
+ }
1198
+ Ok(vector)
1199
+ }
1200
+
1201
+ fn value_to_usize(value: &Value, label: &str) -> Result<usize> {
1202
+ value
1203
+ .as_u64()
1204
+ .map(|value| value as usize)
1205
+ .ok_or_else(|| err(format!("{label} must be an unsigned integer")))
1206
+ }
1207
+
1208
+ fn err(message: impl Into<String>) -> NapiError {
1209
+ NapiError::from_reason(message.into())
1210
+ }
1211
+
1212
+ fn to_napi_error(error: vectlite::VectLiteError) -> NapiError {
1213
+ err(error.to_string())
1214
+ }