vectlite 0.1.11 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/native/src/lib.rs CHANGED
@@ -6,10 +6,15 @@ use napi::Error as NapiError;
6
6
  use napi::bindgen_prelude::*;
7
7
  use napi_derive::napi;
8
8
  use serde_json::{Map, Number, Value, json};
9
+ use vectlite::quantization::{
10
+ BinaryQuantizationConfig, MultiVectorQuantizationConfig, ProductQuantizationConfig,
11
+ QuantizationConfig, ScalarQuantizationConfig, TwoBitQuantizationConfig,
12
+ };
9
13
  use vectlite::{
10
- Database as CoreDatabase, FusionStrategy, HybridSearchOptions, Metadata, MetadataFilter,
11
- MetadataValue, NamedVectors, Record, SearchOutcome, SearchResult, SparseVector,
12
- Store as CoreStore, WriteOperation,
14
+ Database as CoreDatabase, DistanceMetric, FusionStrategy, HybridSearchOptions, Metadata,
15
+ MetadataFilter, MetadataValue, MultiVectorSearchOptions, MultiVectors, NamedVectors,
16
+ PayloadIndexType, Record, SearchOutcome, SearchResult, SparseVector, Store as CoreStore,
17
+ WriteOperation,
13
18
  };
14
19
 
15
20
  #[napi(js_name = "NativeDatabase")]
@@ -126,6 +131,12 @@ impl NativeDatabase {
126
131
  Ok(database.dimension() as u32)
127
132
  }
128
133
 
134
+ #[napi(getter)]
135
+ pub fn metric(&self) -> Result<String> {
136
+ let database = self.read()?;
137
+ Ok(database.metric().name().to_owned())
138
+ }
139
+
129
140
  #[napi(getter, js_name = "readOnly")]
130
141
  pub fn read_only(&self) -> Result<bool> {
131
142
  let database = self.read()?;
@@ -142,10 +153,6 @@ impl NativeDatabase {
142
153
  json_to_filter(&value)
143
154
  })
144
155
  .transpose()?;
145
- if namespace.is_none() && filter.is_none() {
146
- let database = self.read()?;
147
- return Ok(database.len() as u32);
148
- }
149
156
  let database = self.read()?;
150
157
  Ok(database.count_filtered(namespace.as_deref(), filter.as_ref()) as u32)
151
158
  }
@@ -195,6 +202,38 @@ impl NativeDatabase {
195
202
  stringify_value(Value::Array(json_records))
196
203
  }
197
204
 
205
+ #[napi(js_name = "listCursor")]
206
+ pub fn list_cursor(
207
+ &self,
208
+ namespace: Option<String>,
209
+ filter_json: Option<String>,
210
+ limit: Option<u32>,
211
+ cursor: Option<String>,
212
+ ) -> Result<String> {
213
+ let filter = filter_json
214
+ .as_ref()
215
+ .map(|json_str| {
216
+ let value: serde_json::Value = serde_json::from_str(json_str)
217
+ .map_err(|e| err(format!("invalid filter JSON: {e}")))?;
218
+ json_to_filter(&value)
219
+ })
220
+ .transpose()?;
221
+ let database = self.read()?;
222
+ let (records, next_cursor) = database.list_cursor(
223
+ namespace.as_deref(),
224
+ filter.as_ref(),
225
+ limit.unwrap_or(0) as usize,
226
+ cursor.as_deref(),
227
+ );
228
+ let records: Vec<Record> = records.into_iter().cloned().collect();
229
+ let json_records: Vec<Value> = records.iter().map(record_to_json).collect();
230
+ let result = serde_json::json!({
231
+ "records": json_records,
232
+ "cursor": next_cursor,
233
+ });
234
+ stringify_value(result)
235
+ }
236
+
198
237
  #[napi(js_name = "deleteByFilter")]
199
238
  pub fn delete_by_filter(&self, filter_json: String, namespace: Option<String>) -> Result<u32> {
200
239
  let value: serde_json::Value = serde_json::from_str(&filter_json)
@@ -207,6 +246,70 @@ impl NativeDatabase {
207
246
  .map_err(to_napi_error)
208
247
  }
209
248
 
249
+ #[napi]
250
+ pub fn update_metadata(
251
+ &self,
252
+ id: String,
253
+ metadata_json: String,
254
+ namespace: Option<String>,
255
+ ) -> Result<bool> {
256
+ let patch = parse_metadata_json(Some(metadata_json))?;
257
+ let mut database = self.write_open()?;
258
+ database
259
+ .update_metadata_in_namespace(namespace.unwrap_or_default(), &id, patch)
260
+ .map_err(to_napi_error)
261
+ }
262
+
263
+ // -------------------------------------------------------------------
264
+ // TTL / Expiry
265
+ // -------------------------------------------------------------------
266
+
267
+ #[napi(js_name = "setTtl")]
268
+ pub fn set_ttl(&self, id: String, ttl: f64, namespace: Option<String>) -> Result<bool> {
269
+ let mut database = self.write_open()?;
270
+ database
271
+ .set_ttl_in_namespace(&namespace.unwrap_or_default(), &id, ttl)
272
+ .map_err(to_napi_error)
273
+ }
274
+
275
+ #[napi(js_name = "clearTtl")]
276
+ pub fn clear_ttl(&self, id: String, namespace: Option<String>) -> Result<bool> {
277
+ let mut database = self.write_open()?;
278
+ database
279
+ .clear_ttl_in_namespace(&namespace.unwrap_or_default(), &id)
280
+ .map_err(to_napi_error)
281
+ }
282
+
283
+ // -------------------------------------------------------------------
284
+ // Payload Indexes
285
+ // -------------------------------------------------------------------
286
+
287
+ #[napi(js_name = "createIndex")]
288
+ pub fn create_index(&self, field: String, index_type: String) -> Result<bool> {
289
+ let ty = parse_payload_index_type(&index_type)?;
290
+ let mut database = self.write_open()?;
291
+ database.create_index(&field, ty).map_err(to_napi_error)
292
+ }
293
+
294
+ #[napi(js_name = "dropIndex")]
295
+ pub fn drop_index(&self, field: String) -> Result<bool> {
296
+ let mut database = self.write_open()?;
297
+ database.drop_index(&field).map_err(to_napi_error)
298
+ }
299
+
300
+ #[napi(js_name = "listIndexes")]
301
+ pub fn list_indexes(&self) -> Result<String> {
302
+ let database = self.read()?;
303
+ let indexes = database.list_indexes();
304
+ let arr: Vec<Value> = indexes
305
+ .into_iter()
306
+ .map(|(field, index_type)| {
307
+ json!({ "field": field, "type": index_type.name() })
308
+ })
309
+ .collect();
310
+ serde_json::to_string(&arr).map_err(|e| err(format!("JSON serialize: {e}")))
311
+ }
312
+
210
313
  #[napi]
211
314
  pub fn transaction(&self) -> Result<NativeTransaction> {
212
315
  drop(self.read()?);
@@ -225,21 +328,27 @@ impl NativeDatabase {
225
328
  namespace: Option<String>,
226
329
  sparse_json: Option<String>,
227
330
  vectors_json: Option<String>,
331
+ ttl: Option<f64>,
228
332
  ) -> Result<()> {
229
333
  let metadata = parse_metadata_json(metadata_json)?;
230
334
  let sparse = parse_sparse_json(sparse_json)?;
231
335
  let vectors = parse_named_vectors_json(vectors_json)?;
232
336
  let vector = js_vector_to_core(vector, "vector")?;
337
+ let expires_at = ttl_to_expires_at(ttl)?;
338
+ let record = Record {
339
+ namespace: namespace.unwrap_or_default(),
340
+ id,
341
+ vector,
342
+ vectors,
343
+ sparse,
344
+ metadata,
345
+ multi_vectors: MultiVectors::new(),
346
+ expires_at,
347
+ };
233
348
  let mut database = self.write_open()?;
234
349
  database
235
- .insert_with_vectors_in_namespace(
236
- namespace.unwrap_or_default(),
237
- &id,
238
- vector,
239
- vectors,
240
- sparse,
241
- metadata,
242
- )
350
+ .insert_many(std::iter::once(record))
351
+ .map(|_| ())
243
352
  .map_err(to_napi_error)
244
353
  }
245
354
 
@@ -252,21 +361,27 @@ impl NativeDatabase {
252
361
  namespace: Option<String>,
253
362
  sparse_json: Option<String>,
254
363
  vectors_json: Option<String>,
364
+ ttl: Option<f64>,
255
365
  ) -> Result<()> {
256
366
  let metadata = parse_metadata_json(metadata_json)?;
257
367
  let sparse = parse_sparse_json(sparse_json)?;
258
368
  let vectors = parse_named_vectors_json(vectors_json)?;
259
369
  let vector = js_vector_to_core(vector, "vector")?;
370
+ let expires_at = ttl_to_expires_at(ttl)?;
371
+ let record = Record {
372
+ namespace: namespace.unwrap_or_default(),
373
+ id,
374
+ vector,
375
+ vectors,
376
+ sparse,
377
+ metadata,
378
+ multi_vectors: MultiVectors::new(),
379
+ expires_at,
380
+ };
260
381
  let mut database = self.write_open()?;
261
382
  database
262
- .upsert_with_vectors_in_namespace(
263
- namespace.unwrap_or_default(),
264
- &id,
265
- vector,
266
- vectors,
267
- sparse,
268
- metadata,
269
- )
383
+ .upsert_many(std::iter::once(record))
384
+ .map(|_| ())
270
385
  .map_err(to_napi_error)
271
386
  }
272
387
 
@@ -348,6 +463,242 @@ impl NativeDatabase {
348
463
  database.compact().map_err(to_napi_error)
349
464
  }
350
465
 
466
+ // -------------------------------------------------------------------
467
+ // Quantization
468
+ // -------------------------------------------------------------------
469
+
470
+ /// Enable quantization on the database.
471
+ /// `method`: "scalar", "binary", or "product"
472
+ /// `options_json`: JSON with optional keys: rescore_multiplier, num_sub_vectors, num_centroids, training_iterations
473
+ #[napi(js_name = "enableQuantization")]
474
+ pub fn enable_quantization(
475
+ &self,
476
+ method: Option<String>,
477
+ options_json: Option<String>,
478
+ ) -> Result<()> {
479
+ let method = method.as_deref().unwrap_or("scalar");
480
+ let (rescore_multiplier, num_sub_vectors, num_centroids, training_iterations) =
481
+ parse_quantization_options(options_json.as_deref())?;
482
+ let config = build_quantization_config(
483
+ method,
484
+ rescore_multiplier,
485
+ num_sub_vectors,
486
+ num_centroids,
487
+ training_iterations,
488
+ )?;
489
+ let mut database = self.write_open()?;
490
+ database.enable_quantization(config).map_err(to_napi_error)
491
+ }
492
+
493
+ /// Disable quantization and remove persisted parameters.
494
+ #[napi(js_name = "disableQuantization")]
495
+ pub fn disable_quantization(&self) -> Result<()> {
496
+ let mut database = self.write_open()?;
497
+ database.disable_quantization().map_err(to_napi_error)
498
+ }
499
+
500
+ /// Returns true if quantization is enabled.
501
+ #[napi(getter, js_name = "isQuantized")]
502
+ pub fn is_quantized(&self) -> Result<bool> {
503
+ let database = self.read()?;
504
+ Ok(database.is_quantized())
505
+ }
506
+
507
+ /// Returns the quantization method name if enabled, else null.
508
+ #[napi(getter, js_name = "quantizationMethod")]
509
+ pub fn quantization_method(&self) -> Result<Option<String>> {
510
+ let database = self.read()?;
511
+ Ok(database.quantization_config().map(|config| match config {
512
+ QuantizationConfig::Scalar(_) => "scalar".to_owned(),
513
+ QuantizationConfig::Binary(_) => "binary".to_owned(),
514
+ QuantizationConfig::Product(_) => "product".to_owned(),
515
+ }))
516
+ }
517
+
518
+ // ---- Multi-vector / ColBERT-style late interaction ----
519
+
520
+ /// Upsert a record with multi-vector token embeddings (ColBERT-style).
521
+ ///
522
+ /// `multi_vectors_json` is a JSON string mapping space names to arrays of
523
+ /// token vectors, e.g. `{"colbert": [[0.1, 0.2], [0.3, 0.4]]}`.
524
+ #[napi(js_name = "upsertMultiVectors")]
525
+ pub fn upsert_multi_vectors(
526
+ &self,
527
+ id: String,
528
+ vector: Vec<f64>,
529
+ multi_vectors_json: String,
530
+ options_json: Option<String>,
531
+ ) -> Result<()> {
532
+ let vector: Vec<f32> = vector.iter().map(|&v| v as f32).collect();
533
+ let mv_value: Value = serde_json::from_str(&multi_vectors_json)
534
+ .map_err(|e| to_napi_error(vectlite::VectLiteError::InvalidFormat(e.to_string())))?;
535
+ let mv = json_to_multi_vectors(&mv_value)?;
536
+
537
+ let (metadata, namespace) = if let Some(opts) = options_json {
538
+ let opts: Value = serde_json::from_str(&opts)
539
+ .map_err(|e| to_napi_error(vectlite::VectLiteError::InvalidFormat(e.to_string())))?;
540
+ let metadata = opts
541
+ .get("metadata")
542
+ .map(|v| json_to_metadata(v))
543
+ .transpose()?
544
+ .unwrap_or_default();
545
+ let namespace = opts
546
+ .get("namespace")
547
+ .and_then(|v| v.as_str())
548
+ .unwrap_or("")
549
+ .to_string();
550
+ (metadata, namespace)
551
+ } else {
552
+ (Metadata::new(), String::new())
553
+ };
554
+
555
+ let mut database = self.write_open()?;
556
+ database
557
+ .upsert_multi_vectors_in_namespace(namespace, id, vector, metadata, mv)
558
+ .map_err(to_napi_error)
559
+ }
560
+
561
+ /// Search using multi-vector late interaction (MaxSim) scoring.
562
+ ///
563
+ /// Returns a JSON array of results with id, score, namespace, and metadata.
564
+ #[napi(js_name = "searchMultiVector")]
565
+ pub fn search_multi_vector(
566
+ &self,
567
+ space: String,
568
+ query_tokens_json: String,
569
+ options_json: Option<String>,
570
+ ) -> Result<String> {
571
+ let qt_value: Value = serde_json::from_str(&query_tokens_json)
572
+ .map_err(|e| to_napi_error(vectlite::VectLiteError::InvalidFormat(e.to_string())))?;
573
+ let qt_arr = qt_value
574
+ .as_array()
575
+ .ok_or_else(|| to_napi_error(vectlite::VectLiteError::InvalidFormat(
576
+ "query_tokens must be a JSON array of arrays".to_owned(),
577
+ )))?;
578
+ let query_tokens: Vec<Vec<f32>> = qt_arr
579
+ .iter()
580
+ .map(|v| {
581
+ v.as_array()
582
+ .ok_or_else(|| {
583
+ to_napi_error(vectlite::VectLiteError::InvalidFormat(
584
+ "each query token must be an array of numbers".to_owned(),
585
+ ))
586
+ })?
587
+ .iter()
588
+ .map(|n| {
589
+ n.as_f64()
590
+ .map(|f| f as f32)
591
+ .ok_or_else(|| {
592
+ to_napi_error(vectlite::VectLiteError::InvalidFormat(
593
+ "token values must be numbers".to_owned(),
594
+ ))
595
+ })
596
+ })
597
+ .collect::<Result<Vec<f32>>>()
598
+ })
599
+ .collect::<Result<Vec<Vec<f32>>>>()?;
600
+
601
+ let (top_k, filter, namespace) = if let Some(opts) = options_json {
602
+ let opts: Value = serde_json::from_str(&opts)
603
+ .map_err(|e| to_napi_error(vectlite::VectLiteError::InvalidFormat(e.to_string())))?;
604
+ let top_k = opts.get("k").and_then(|v| v.as_u64()).unwrap_or(10) as usize;
605
+ let filter = opts
606
+ .get("filter")
607
+ .map(|v| json_to_filter(v))
608
+ .transpose()?;
609
+ let namespace = opts
610
+ .get("namespace")
611
+ .and_then(|v| v.as_str())
612
+ .map(String::from);
613
+ (top_k, filter, namespace)
614
+ } else {
615
+ (10, None, None)
616
+ };
617
+
618
+ let options = MultiVectorSearchOptions {
619
+ top_k,
620
+ filter,
621
+ namespace,
622
+ };
623
+
624
+ let database = self.read()?;
625
+ let results = database
626
+ .search_multi_vector(&space, &query_tokens, options)
627
+ .map_err(to_napi_error)?;
628
+
629
+ let json_results: Vec<Value> = results
630
+ .into_iter()
631
+ .map(|r| {
632
+ json!({
633
+ "id": r.id,
634
+ "score": r.score,
635
+ "namespace": r.namespace,
636
+ "metadata": metadata_to_json(&r.metadata),
637
+ })
638
+ })
639
+ .collect();
640
+
641
+ serde_json::to_string(&json_results)
642
+ .map_err(|e| to_napi_error(vectlite::VectLiteError::InvalidFormat(e.to_string())))
643
+ }
644
+
645
+ /// Enable 2-bit quantization for a multi-vector space.
646
+ #[napi(js_name = "enableMultiVectorQuantization")]
647
+ pub fn enable_multi_vector_quantization(
648
+ &self,
649
+ space: String,
650
+ options_json: Option<String>,
651
+ ) -> Result<()> {
652
+ let (method, rescore_multiplier) = if let Some(opts) = options_json {
653
+ let opts: Value = serde_json::from_str(&opts)
654
+ .map_err(|e| to_napi_error(vectlite::VectLiteError::InvalidFormat(e.to_string())))?;
655
+ let method = opts
656
+ .get("method")
657
+ .and_then(|v| v.as_str())
658
+ .unwrap_or("two_bit")
659
+ .to_string();
660
+ let rescore = opts
661
+ .get("rescoreMultiplier")
662
+ .and_then(|v| v.as_u64())
663
+ .map(|v| v as usize);
664
+ (method, rescore)
665
+ } else {
666
+ ("two_bit".to_string(), None)
667
+ };
668
+
669
+ let config = match method.as_str() {
670
+ "two_bit" => MultiVectorQuantizationConfig::TwoBit(TwoBitQuantizationConfig {
671
+ rescore_multiplier: rescore_multiplier.unwrap_or(4),
672
+ }),
673
+ other => {
674
+ return Err(to_napi_error(vectlite::VectLiteError::InvalidFormat(
675
+ format!("unknown multi-vector quantization method: {other}. Supported: two_bit"),
676
+ )));
677
+ }
678
+ };
679
+
680
+ let mut database = self.write_open()?;
681
+ database
682
+ .enable_multi_vector_quantization(&space, config)
683
+ .map_err(to_napi_error)
684
+ }
685
+
686
+ /// Disable multi-vector quantization for a space.
687
+ #[napi(js_name = "disableMultiVectorQuantization")]
688
+ pub fn disable_multi_vector_quantization(&self, space: String) -> Result<()> {
689
+ let mut database = self.write_open()?;
690
+ database
691
+ .disable_multi_vector_quantization(&space)
692
+ .map_err(to_napi_error)
693
+ }
694
+
695
+ /// Returns true if multi-vector quantization is enabled for the given space.
696
+ #[napi(js_name = "isMultiVectorQuantized")]
697
+ pub fn is_multi_vector_quantized(&self, space: String) -> Result<bool> {
698
+ let database = self.read()?;
699
+ Ok(database.is_multi_vector_quantized(&space))
700
+ }
701
+
351
702
  #[napi]
352
703
  pub fn snapshot(&self, dest: String) -> Result<()> {
353
704
  let database = self.read()?;
@@ -393,6 +744,206 @@ impl NativeDatabase {
393
744
  }
394
745
  }
395
746
 
747
+ // -------------------------------------------------------------------
748
+ // Async tasks (run on libuv threadpool via napi::Task)
749
+ // -------------------------------------------------------------------
750
+
751
+ pub struct SearchTask {
752
+ db: Arc<RwLock<CoreDatabase>>,
753
+ query: Option<Vec<f32>>,
754
+ request: SearchRequest,
755
+ with_stats: bool,
756
+ }
757
+
758
+ impl napi::Task for SearchTask {
759
+ type Output = String;
760
+ type JsValue = String;
761
+
762
+ fn compute(&mut self) -> Result<Self::Output> {
763
+ let database = self
764
+ .db
765
+ .read()
766
+ .map_err(|e| err(format!("lock poisoned: {e}")))?;
767
+ let sparse_ref = if self.request.sparse.is_empty() {
768
+ None
769
+ } else {
770
+ Some(&self.request.sparse)
771
+ };
772
+ let outcome = if self.request.all_namespaces {
773
+ database
774
+ .hybrid_search_all_namespaces_with_stats(
775
+ self.query.as_deref(),
776
+ sparse_ref,
777
+ self.request.options.clone(),
778
+ )
779
+ .map_err(to_napi_error)?
780
+ } else {
781
+ database
782
+ .hybrid_search_in_namespace_with_stats(
783
+ &self.request.namespace,
784
+ self.query.as_deref(),
785
+ sparse_ref,
786
+ self.request.options.clone(),
787
+ )
788
+ .map_err(to_napi_error)?
789
+ };
790
+ if self.with_stats {
791
+ stringify_value(search_outcome_to_json(
792
+ &outcome,
793
+ self.request.explain,
794
+ &self.request.fusion_name,
795
+ ))
796
+ } else {
797
+ stringify_value(search_results_to_json(
798
+ &outcome.results,
799
+ self.request.explain,
800
+ &self.request.fusion_name,
801
+ ))
802
+ }
803
+ }
804
+
805
+ fn resolve(&mut self, _env: napi::Env, output: Self::Output) -> Result<Self::JsValue> {
806
+ Ok(output)
807
+ }
808
+ }
809
+
810
+ pub struct FlushTask {
811
+ db: Arc<RwLock<CoreDatabase>>,
812
+ }
813
+
814
+ impl napi::Task for FlushTask {
815
+ type Output = ();
816
+ type JsValue = ();
817
+
818
+ fn compute(&mut self) -> Result<Self::Output> {
819
+ let mut database = self
820
+ .db
821
+ .write()
822
+ .map_err(|e| err(format!("lock poisoned: {e}")))?;
823
+ database.flush().map_err(to_napi_error)
824
+ }
825
+
826
+ fn resolve(&mut self, _env: napi::Env, _output: Self::Output) -> Result<Self::JsValue> {
827
+ Ok(())
828
+ }
829
+ }
830
+
831
+ pub struct CompactTask {
832
+ db: Arc<RwLock<CoreDatabase>>,
833
+ }
834
+
835
+ impl napi::Task for CompactTask {
836
+ type Output = ();
837
+ type JsValue = ();
838
+
839
+ fn compute(&mut self) -> Result<Self::Output> {
840
+ let mut database = self
841
+ .db
842
+ .write()
843
+ .map_err(|e| err(format!("lock poisoned: {e}")))?;
844
+ database.compact().map_err(to_napi_error)
845
+ }
846
+
847
+ fn resolve(&mut self, _env: napi::Env, _output: Self::Output) -> Result<Self::JsValue> {
848
+ Ok(())
849
+ }
850
+ }
851
+
852
+ pub struct BulkIngestTask {
853
+ db: Arc<RwLock<CoreDatabase>>,
854
+ records: Vec<Record>,
855
+ batch_size: usize,
856
+ }
857
+
858
+ impl napi::Task for BulkIngestTask {
859
+ type Output = u32;
860
+ type JsValue = u32;
861
+
862
+ fn compute(&mut self) -> Result<Self::Output> {
863
+ let records = std::mem::take(&mut self.records);
864
+ let mut database = self
865
+ .db
866
+ .write()
867
+ .map_err(|e| err(format!("lock poisoned: {e}")))?;
868
+ database
869
+ .bulk_ingest(records, self.batch_size)
870
+ .map(|count| count as u32)
871
+ .map_err(to_napi_error)
872
+ }
873
+
874
+ fn resolve(&mut self, _env: napi::Env, output: Self::Output) -> Result<Self::JsValue> {
875
+ Ok(output)
876
+ }
877
+ }
878
+
879
+ #[napi]
880
+ impl NativeDatabase {
881
+ #[napi(js_name = "searchAsync")]
882
+ pub fn search_async(
883
+ &self,
884
+ query: Option<Vec<f64>>,
885
+ options_json: Option<String>,
886
+ ) -> Result<AsyncTask<SearchTask>> {
887
+ let request = parse_search_request(options_json)?;
888
+ let query = query
889
+ .map(|vector| js_vector_to_core(vector, "query vector"))
890
+ .transpose()?;
891
+ Ok(AsyncTask::new(SearchTask {
892
+ db: self.inner.clone(),
893
+ query,
894
+ request,
895
+ with_stats: false,
896
+ }))
897
+ }
898
+
899
+ #[napi(js_name = "searchWithStatsAsync")]
900
+ pub fn search_with_stats_async(
901
+ &self,
902
+ query: Option<Vec<f64>>,
903
+ options_json: Option<String>,
904
+ ) -> Result<AsyncTask<SearchTask>> {
905
+ let request = parse_search_request(options_json)?;
906
+ let query = query
907
+ .map(|vector| js_vector_to_core(vector, "query vector"))
908
+ .transpose()?;
909
+ Ok(AsyncTask::new(SearchTask {
910
+ db: self.inner.clone(),
911
+ query,
912
+ request,
913
+ with_stats: true,
914
+ }))
915
+ }
916
+
917
+ #[napi(js_name = "flushAsync")]
918
+ pub fn flush_async(&self) -> AsyncTask<FlushTask> {
919
+ AsyncTask::new(FlushTask {
920
+ db: self.inner.clone(),
921
+ })
922
+ }
923
+
924
+ #[napi(js_name = "compactAsync")]
925
+ pub fn compact_async(&self) -> AsyncTask<CompactTask> {
926
+ AsyncTask::new(CompactTask {
927
+ db: self.inner.clone(),
928
+ })
929
+ }
930
+
931
+ #[napi(js_name = "bulkIngestAsync")]
932
+ pub fn bulk_ingest_async(
933
+ &self,
934
+ records_json: String,
935
+ namespace: Option<String>,
936
+ batch_size: u32,
937
+ ) -> Result<AsyncTask<BulkIngestTask>> {
938
+ let records = parse_record_batch_json(&records_json, namespace.as_deref())?;
939
+ Ok(AsyncTask::new(BulkIngestTask {
940
+ db: self.inner.clone(),
941
+ records,
942
+ batch_size: batch_size as usize,
943
+ }))
944
+ }
945
+ }
946
+
396
947
  #[napi]
397
948
  impl NativeTransaction {
398
949
  #[napi]
@@ -410,11 +961,13 @@ impl NativeTransaction {
410
961
  namespace: Option<String>,
411
962
  sparse_json: Option<String>,
412
963
  vectors_json: Option<String>,
964
+ ttl: Option<f64>,
413
965
  ) -> Result<()> {
414
966
  let metadata = parse_metadata_json(metadata_json)?;
415
967
  let sparse = parse_sparse_json(sparse_json)?;
416
968
  let vectors = parse_named_vectors_json(vectors_json)?;
417
969
  let vector = js_vector_to_core(vector, "vector")?;
970
+ let expires_at = ttl_to_expires_at(ttl)?;
418
971
  self.stage(WriteOperation::Insert(Record {
419
972
  namespace: namespace.unwrap_or_default(),
420
973
  id,
@@ -422,6 +975,8 @@ impl NativeTransaction {
422
975
  vectors,
423
976
  sparse,
424
977
  metadata,
978
+ multi_vectors: MultiVectors::new(),
979
+ expires_at,
425
980
  }))
426
981
  }
427
982
 
@@ -434,11 +989,13 @@ impl NativeTransaction {
434
989
  namespace: Option<String>,
435
990
  sparse_json: Option<String>,
436
991
  vectors_json: Option<String>,
992
+ ttl: Option<f64>,
437
993
  ) -> Result<()> {
438
994
  let metadata = parse_metadata_json(metadata_json)?;
439
995
  let sparse = parse_sparse_json(sparse_json)?;
440
996
  let vectors = parse_named_vectors_json(vectors_json)?;
441
997
  let vector = js_vector_to_core(vector, "vector")?;
998
+ let expires_at = ttl_to_expires_at(ttl)?;
442
999
  self.stage(WriteOperation::Upsert(Record {
443
1000
  namespace: namespace.unwrap_or_default(),
444
1001
  id,
@@ -446,6 +1003,8 @@ impl NativeTransaction {
446
1003
  vectors,
447
1004
  sparse,
448
1005
  metadata,
1006
+ multi_vectors: MultiVectors::new(),
1007
+ expires_at,
449
1008
  }))
450
1009
  }
451
1010
 
@@ -604,7 +1163,13 @@ pub fn open(
604
1163
  dimension: Option<u32>,
605
1164
  read_only: bool,
606
1165
  lock_timeout: Option<f64>,
1166
+ metric: Option<String>,
607
1167
  ) -> Result<NativeDatabase> {
1168
+ let parsed_metric = match metric.as_deref() {
1169
+ Some(name) => DistanceMetric::from_name(name).map_err(to_napi_error)?,
1170
+ None => DistanceMetric::Cosine,
1171
+ };
1172
+
608
1173
  let database = if read_only {
609
1174
  if !Path::new(&path).exists() {
610
1175
  return Err(err("cannot open non-existent database in read-only mode"));
@@ -627,7 +1192,8 @@ pub fn open(
627
1192
  db
628
1193
  }
629
1194
  (Some(dimension), None) => {
630
- CoreDatabase::open_or_create(&path, dimension as usize).map_err(to_napi_error)?
1195
+ CoreDatabase::open_or_create_with_metric(&path, dimension as usize, parsed_metric)
1196
+ .map_err(to_napi_error)?
631
1197
  }
632
1198
  (None, Some(timeout)) => {
633
1199
  CoreDatabase::open_with_timeout(&path, timeout).map_err(to_napi_error)?
@@ -638,7 +1204,8 @@ pub fn open(
638
1204
  let Some(dimension) = dimension else {
639
1205
  return Err(err("dimension is required when creating a new database"));
640
1206
  };
641
- CoreDatabase::create(&path, dimension as usize).map_err(to_napi_error)?
1207
+ CoreDatabase::create_with_metric(&path, dimension as usize, parsed_metric)
1208
+ .map_err(to_napi_error)?
642
1209
  };
643
1210
 
644
1211
  Ok(NativeDatabase {
@@ -682,6 +1249,7 @@ fn parse_search_request(options_json: Option<String>) -> Result<SearchRequest> {
682
1249
  let fetch_k = get_usize(object, "fetchK")?.unwrap_or(0);
683
1250
  let mmr_lambda = get_optional_f32(object, "mmrLambda")?;
684
1251
  let vector_name = get_string(object, "vectorName")?;
1252
+ let truncate_dim = get_usize(object, "truncateDim")?;
685
1253
  let fusion_name = get_string(object, "fusion")?.unwrap_or_else(|| "linear".to_owned());
686
1254
  let rrf_k = get_usize(object, "rrfK")?.unwrap_or(60);
687
1255
  let explain = get_bool(object, "explain")?.unwrap_or(false);
@@ -705,6 +1273,7 @@ fn parse_search_request(options_json: Option<String>) -> Result<SearchRequest> {
705
1273
  mmr_lambda,
706
1274
  vector_name,
707
1275
  fusion: parse_fusion(&fusion_name, rrf_k)?,
1276
+ truncate_dim,
708
1277
  multi_vector_queries: query_vectors,
709
1278
  },
710
1279
  })
@@ -829,6 +1398,39 @@ fn json_to_named_vectors(value: &Value) -> Result<NamedVectors> {
829
1398
  Ok(vectors)
830
1399
  }
831
1400
 
1401
+ fn json_to_multi_vectors(value: &Value) -> Result<MultiVectors> {
1402
+ let object = value
1403
+ .as_object()
1404
+ .ok_or_else(|| err("multi_vectors must be a JSON object"))?;
1405
+ let mut multi_vectors = MultiVectors::new();
1406
+ for (name, token_array) in object {
1407
+ if name.is_empty() {
1408
+ return Err(err("multi-vector space names must not be empty"));
1409
+ }
1410
+ let arr = token_array
1411
+ .as_array()
1412
+ .ok_or_else(|| err("multi-vector space value must be an array of arrays"))?;
1413
+ let mut token_vectors = Vec::with_capacity(arr.len());
1414
+ for item in arr {
1415
+ token_vectors.push(value_to_vector(item, "multi-vector token")?);
1416
+ }
1417
+ multi_vectors.insert(name.clone(), token_vectors);
1418
+ }
1419
+ Ok(multi_vectors)
1420
+ }
1421
+
1422
+ fn multi_vectors_to_json(mv: &MultiVectors) -> Value {
1423
+ let mut map = Map::new();
1424
+ for (name, token_vectors) in mv {
1425
+ let arr: Vec<Value> = token_vectors
1426
+ .iter()
1427
+ .map(|v| Value::Array(v.iter().map(|&f| json!(f)).collect()))
1428
+ .collect();
1429
+ map.insert(name.clone(), Value::Array(arr));
1430
+ }
1431
+ Value::Object(map)
1432
+ }
1433
+
832
1434
  fn json_to_filter(value: &Value) -> Result<MetadataFilter> {
833
1435
  let object = value
834
1436
  .as_object()
@@ -1013,6 +1615,18 @@ fn json_to_record(object: &Map<String, Value>, default_namespace: Option<&str>)
1013
1615
  .transpose()?
1014
1616
  .unwrap_or_default();
1015
1617
 
1618
+ let multi_vectors = object
1619
+ .get("multi_vectors")
1620
+ .or_else(|| object.get("multiVectors"))
1621
+ .map(json_to_multi_vectors)
1622
+ .transpose()?
1623
+ .unwrap_or_default();
1624
+
1625
+ let ttl = object
1626
+ .get("ttl")
1627
+ .and_then(|v| v.as_f64());
1628
+ let expires_at = ttl_to_expires_at(ttl)?;
1629
+
1016
1630
  Ok(Record {
1017
1631
  namespace,
1018
1632
  id: value_to_string(id)?,
@@ -1020,6 +1634,8 @@ fn json_to_record(object: &Map<String, Value>, default_namespace: Option<&str>)
1020
1634
  vectors,
1021
1635
  sparse,
1022
1636
  metadata,
1637
+ multi_vectors,
1638
+ expires_at,
1023
1639
  })
1024
1640
  }
1025
1641
 
@@ -1031,6 +1647,8 @@ fn record_to_json(record: &Record) -> Value {
1031
1647
  "vectors": named_vectors_to_json(&record.vectors),
1032
1648
  "sparse": sparse_to_json(&record.sparse),
1033
1649
  "metadata": metadata_to_json(&record.metadata),
1650
+ "multi_vectors": multi_vectors_to_json(&record.multi_vectors),
1651
+ "expires_at": record.expires_at,
1034
1652
  })
1035
1653
  }
1036
1654
 
@@ -1132,6 +1750,8 @@ fn search_stats_to_json(stats: &vectlite::SearchStats) -> Value {
1132
1750
  "ann_loaded_from_disk": stats.ann_loaded_from_disk,
1133
1751
  "wal_entries_replayed": stats.wal_entries_replayed,
1134
1752
  "fusion": stats.fusion,
1753
+ "effective_dimension": stats.effective_dimension,
1754
+ "matryoshka_truncated": stats.matryoshka_truncated,
1135
1755
  "rerank_applied": false,
1136
1756
  "rerank_count": 0,
1137
1757
  "timings": {
@@ -1286,6 +1906,23 @@ fn value_to_f32(value: &Value, label: &str) -> Result<f32> {
1286
1906
  .ok_or_else(|| err(format!("{label} must contain numeric values")))
1287
1907
  }
1288
1908
 
1909
+ /// Convert an optional TTL (seconds from now) to an absolute `expires_at` timestamp.
1910
+ fn ttl_to_expires_at(ttl: Option<f64>) -> Result<Option<f64>> {
1911
+ match ttl {
1912
+ None => Ok(None),
1913
+ Some(t) if t < 0.0 || t.is_nan() => {
1914
+ Err(err("ttl must be a non-negative finite number"))
1915
+ }
1916
+ Some(t) => {
1917
+ let now = std::time::SystemTime::now()
1918
+ .duration_since(std::time::UNIX_EPOCH)
1919
+ .unwrap_or_default()
1920
+ .as_secs_f64();
1921
+ Ok(Some(now + t))
1922
+ }
1923
+ }
1924
+ }
1925
+
1289
1926
  fn js_vector_to_core(values: Vec<f64>, label: &str) -> Result<Vec<f32>> {
1290
1927
  let mut vector = Vec::with_capacity(values.len());
1291
1928
  for value in values {
@@ -1315,3 +1952,74 @@ fn to_napi_error(error: vectlite::VectLiteError) -> NapiError {
1315
1952
  fn closed_database_error() -> vectlite::VectLiteError {
1316
1953
  vectlite::VectLiteError::InvalidFormat("database is closed".to_owned())
1317
1954
  }
1955
+
1956
+ fn parse_payload_index_type(name: &str) -> Result<PayloadIndexType> {
1957
+ PayloadIndexType::from_name(name).map_err(to_napi_error)
1958
+ }
1959
+
1960
+ fn parse_quantization_options(
1961
+ options_json: Option<&str>,
1962
+ ) -> Result<(Option<usize>, Option<usize>, Option<usize>, Option<usize>)> {
1963
+ let Some(json_str) = options_json else {
1964
+ return Ok((None, None, None, None));
1965
+ };
1966
+ let value: Value = serde_json::from_str(json_str)
1967
+ .map_err(|e| err(format!("invalid quantization options JSON: {e}")))?;
1968
+ let obj = value
1969
+ .as_object()
1970
+ .ok_or_else(|| err("quantization options must be a JSON object"))?;
1971
+
1972
+ let rescore_multiplier = obj
1973
+ .get("rescoreMultiplier")
1974
+ .or_else(|| obj.get("rescore_multiplier"))
1975
+ .and_then(|v| v.as_u64())
1976
+ .map(|v| v as usize);
1977
+ let num_sub_vectors = obj
1978
+ .get("numSubVectors")
1979
+ .or_else(|| obj.get("num_sub_vectors"))
1980
+ .and_then(|v| v.as_u64())
1981
+ .map(|v| v as usize);
1982
+ let num_centroids = obj
1983
+ .get("numCentroids")
1984
+ .or_else(|| obj.get("num_centroids"))
1985
+ .and_then(|v| v.as_u64())
1986
+ .map(|v| v as usize);
1987
+ let training_iterations = obj
1988
+ .get("trainingIterations")
1989
+ .or_else(|| obj.get("training_iterations"))
1990
+ .and_then(|v| v.as_u64())
1991
+ .map(|v| v as usize);
1992
+
1993
+ Ok((
1994
+ rescore_multiplier,
1995
+ num_sub_vectors,
1996
+ num_centroids,
1997
+ training_iterations,
1998
+ ))
1999
+ }
2000
+
2001
+ fn build_quantization_config(
2002
+ method: &str,
2003
+ rescore_multiplier: Option<usize>,
2004
+ num_sub_vectors: Option<usize>,
2005
+ num_centroids: Option<usize>,
2006
+ training_iterations: Option<usize>,
2007
+ ) -> Result<QuantizationConfig> {
2008
+ match method {
2009
+ "scalar" | "int8" => Ok(QuantizationConfig::Scalar(ScalarQuantizationConfig {
2010
+ rescore_multiplier: rescore_multiplier.unwrap_or(5),
2011
+ })),
2012
+ "binary" => Ok(QuantizationConfig::Binary(BinaryQuantizationConfig {
2013
+ rescore_multiplier: rescore_multiplier.unwrap_or(10),
2014
+ })),
2015
+ "product" | "pq" => Ok(QuantizationConfig::Product(ProductQuantizationConfig {
2016
+ num_sub_vectors: num_sub_vectors.unwrap_or(16),
2017
+ num_centroids: num_centroids.unwrap_or(256),
2018
+ training_iterations: training_iterations.unwrap_or(20),
2019
+ rescore_multiplier: rescore_multiplier.unwrap_or(10),
2020
+ })),
2021
+ other => Err(err(format!(
2022
+ "unknown quantization method '{other}'. Expected: 'scalar', 'binary', or 'product'"
2023
+ ))),
2024
+ }
2025
+ }