vectlite 0.1.12 → 0.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/native/src/lib.rs CHANGED
@@ -7,13 +7,15 @@ use napi::bindgen_prelude::*;
7
7
  use napi_derive::napi;
8
8
  use serde_json::{Map, Number, Value, json};
9
9
  use vectlite::quantization::{
10
- BinaryQuantizationConfig, ProductQuantizationConfig, QuantizationConfig,
11
- ScalarQuantizationConfig,
10
+ BinaryQuantizationConfig, MultiVectorQuantizationConfig, ProductQuantizationConfig,
11
+ QuantizationConfig, ScalarQuantizationConfig, TwoBitQuantizationConfig,
12
+ default_product_num_sub_vectors,
12
13
  };
13
14
  use vectlite::{
14
- Database as CoreDatabase, FusionStrategy, HybridSearchOptions, Metadata, MetadataFilter,
15
- MetadataValue, NamedVectors, Record, SearchOutcome, SearchResult, SparseVector,
16
- Store as CoreStore, WriteOperation,
15
+ Database as CoreDatabase, DistanceMetric, FusionStrategy, HybridSearchOptions, Metadata,
16
+ MetadataFilter, MetadataValue, MultiVectorSearchOptions, MultiVectors, NamedVectors,
17
+ PayloadIndexType, Record, SearchOutcome, SearchResult, SparseVector, Store as CoreStore,
18
+ WriteOperation,
17
19
  };
18
20
 
19
21
  #[napi(js_name = "NativeDatabase")]
@@ -108,6 +110,13 @@ impl NativeStore {
108
110
  pub fn collections(&self) -> Result<Vec<String>> {
109
111
  self.inner.collections().map_err(to_napi_error)
110
112
  }
113
+
114
+ /// Close the store. This is a no-op (the store holds no open file handles)
115
+ /// but is provided for symmetry with `Database.close()`.
116
+ #[napi]
117
+ pub fn close(&self) -> Result<()> {
118
+ Ok(())
119
+ }
111
120
  }
112
121
 
113
122
  #[napi]
@@ -130,6 +139,12 @@ impl NativeDatabase {
130
139
  Ok(database.dimension() as u32)
131
140
  }
132
141
 
142
+ #[napi(getter)]
143
+ pub fn metric(&self) -> Result<String> {
144
+ let database = self.read()?;
145
+ Ok(database.metric().name().to_owned())
146
+ }
147
+
133
148
  #[napi(getter, js_name = "readOnly")]
134
149
  pub fn read_only(&self) -> Result<bool> {
135
150
  let database = self.read()?;
@@ -146,10 +161,6 @@ impl NativeDatabase {
146
161
  json_to_filter(&value)
147
162
  })
148
163
  .transpose()?;
149
- if namespace.is_none() && filter.is_none() {
150
- let database = self.read()?;
151
- return Ok(database.len() as u32);
152
- }
153
164
  let database = self.read()?;
154
165
  Ok(database.count_filtered(namespace.as_deref(), filter.as_ref()) as u32)
155
166
  }
@@ -199,6 +210,38 @@ impl NativeDatabase {
199
210
  stringify_value(Value::Array(json_records))
200
211
  }
201
212
 
213
+ #[napi(js_name = "listCursor")]
214
+ pub fn list_cursor(
215
+ &self,
216
+ namespace: Option<String>,
217
+ filter_json: Option<String>,
218
+ limit: Option<u32>,
219
+ cursor: Option<String>,
220
+ ) -> Result<String> {
221
+ let filter = filter_json
222
+ .as_ref()
223
+ .map(|json_str| {
224
+ let value: serde_json::Value = serde_json::from_str(json_str)
225
+ .map_err(|e| err(format!("invalid filter JSON: {e}")))?;
226
+ json_to_filter(&value)
227
+ })
228
+ .transpose()?;
229
+ let database = self.read()?;
230
+ let (records, next_cursor) = database.list_cursor(
231
+ namespace.as_deref(),
232
+ filter.as_ref(),
233
+ limit.unwrap_or(0) as usize,
234
+ cursor.as_deref(),
235
+ );
236
+ let records: Vec<Record> = records.into_iter().cloned().collect();
237
+ let json_records: Vec<Value> = records.iter().map(record_to_json).collect();
238
+ let result = serde_json::json!({
239
+ "records": json_records,
240
+ "cursor": next_cursor,
241
+ });
242
+ stringify_value(result)
243
+ }
244
+
202
245
  #[napi(js_name = "deleteByFilter")]
203
246
  pub fn delete_by_filter(&self, filter_json: String, namespace: Option<String>) -> Result<u32> {
204
247
  let value: serde_json::Value = serde_json::from_str(&filter_json)
@@ -211,6 +254,68 @@ impl NativeDatabase {
211
254
  .map_err(to_napi_error)
212
255
  }
213
256
 
257
+ #[napi]
258
+ pub fn update_metadata(
259
+ &self,
260
+ id: String,
261
+ metadata_json: String,
262
+ namespace: Option<String>,
263
+ ) -> Result<bool> {
264
+ let patch = parse_metadata_json(Some(metadata_json))?;
265
+ let mut database = self.write_open()?;
266
+ database
267
+ .update_metadata_in_namespace(namespace.unwrap_or_default(), &id, patch)
268
+ .map_err(to_napi_error)
269
+ }
270
+
271
+ // -------------------------------------------------------------------
272
+ // TTL / Expiry
273
+ // -------------------------------------------------------------------
274
+
275
+ #[napi(js_name = "setTtl")]
276
+ pub fn set_ttl(&self, id: String, ttl: f64, namespace: Option<String>) -> Result<bool> {
277
+ let mut database = self.write_open()?;
278
+ database
279
+ .set_ttl_in_namespace(&namespace.unwrap_or_default(), &id, ttl)
280
+ .map_err(to_napi_error)
281
+ }
282
+
283
+ #[napi(js_name = "clearTtl")]
284
+ pub fn clear_ttl(&self, id: String, namespace: Option<String>) -> Result<bool> {
285
+ let mut database = self.write_open()?;
286
+ database
287
+ .clear_ttl_in_namespace(&namespace.unwrap_or_default(), &id)
288
+ .map_err(to_napi_error)
289
+ }
290
+
291
+ // -------------------------------------------------------------------
292
+ // Payload Indexes
293
+ // -------------------------------------------------------------------
294
+
295
+ #[napi(js_name = "createIndex")]
296
+ pub fn create_index(&self, field: String, index_type: String) -> Result<bool> {
297
+ let ty = parse_payload_index_type(&index_type)?;
298
+ let mut database = self.write_open()?;
299
+ database.create_index(&field, ty).map_err(to_napi_error)
300
+ }
301
+
302
+ #[napi(js_name = "dropIndex")]
303
+ pub fn drop_index(&self, field: String) -> Result<bool> {
304
+ let mut database = self.write_open()?;
305
+ database.drop_index(&field).map_err(to_napi_error)
306
+ }
307
+
308
+ #[napi(js_name = "listIndexes")]
309
+ pub fn list_indexes(&self) -> Result<String> {
310
+ let database = self.read()?;
311
+ let indexes = database.list_indexes();
312
+ let arr: Vec<Value> = indexes
313
+ .into_iter()
314
+ .map(|(field, index_type)| json!({ "field": field, "type": index_type.name() }))
315
+ .collect();
316
+ serde_json::to_string(&arr).map_err(|e| err(format!("JSON serialize: {e}")))
317
+ }
318
+
214
319
  #[napi]
215
320
  pub fn transaction(&self) -> Result<NativeTransaction> {
216
321
  drop(self.read()?);
@@ -229,21 +334,27 @@ impl NativeDatabase {
229
334
  namespace: Option<String>,
230
335
  sparse_json: Option<String>,
231
336
  vectors_json: Option<String>,
337
+ ttl: Option<f64>,
232
338
  ) -> Result<()> {
233
339
  let metadata = parse_metadata_json(metadata_json)?;
234
340
  let sparse = parse_sparse_json(sparse_json)?;
235
341
  let vectors = parse_named_vectors_json(vectors_json)?;
236
342
  let vector = js_vector_to_core(vector, "vector")?;
343
+ let expires_at = ttl_to_expires_at(ttl)?;
344
+ let record = Record {
345
+ namespace: namespace.unwrap_or_default(),
346
+ id,
347
+ vector,
348
+ vectors,
349
+ sparse,
350
+ metadata,
351
+ multi_vectors: MultiVectors::new(),
352
+ expires_at,
353
+ };
237
354
  let mut database = self.write_open()?;
238
355
  database
239
- .insert_with_vectors_in_namespace(
240
- namespace.unwrap_or_default(),
241
- &id,
242
- vector,
243
- vectors,
244
- sparse,
245
- metadata,
246
- )
356
+ .insert_many(std::iter::once(record))
357
+ .map(|_| ())
247
358
  .map_err(to_napi_error)
248
359
  }
249
360
 
@@ -256,21 +367,27 @@ impl NativeDatabase {
256
367
  namespace: Option<String>,
257
368
  sparse_json: Option<String>,
258
369
  vectors_json: Option<String>,
370
+ ttl: Option<f64>,
259
371
  ) -> Result<()> {
260
372
  let metadata = parse_metadata_json(metadata_json)?;
261
373
  let sparse = parse_sparse_json(sparse_json)?;
262
374
  let vectors = parse_named_vectors_json(vectors_json)?;
263
375
  let vector = js_vector_to_core(vector, "vector")?;
376
+ let expires_at = ttl_to_expires_at(ttl)?;
377
+ let record = Record {
378
+ namespace: namespace.unwrap_or_default(),
379
+ id,
380
+ vector,
381
+ vectors,
382
+ sparse,
383
+ metadata,
384
+ multi_vectors: MultiVectors::new(),
385
+ expires_at,
386
+ };
264
387
  let mut database = self.write_open()?;
265
388
  database
266
- .upsert_with_vectors_in_namespace(
267
- namespace.unwrap_or_default(),
268
- &id,
269
- vector,
270
- vectors,
271
- sparse,
272
- metadata,
273
- )
389
+ .upsert_many(std::iter::once(record))
390
+ .map(|_| ())
274
391
  .map_err(to_napi_error)
275
392
  }
276
393
 
@@ -368,14 +485,15 @@ impl NativeDatabase {
368
485
  let method = method.as_deref().unwrap_or("scalar");
369
486
  let (rescore_multiplier, num_sub_vectors, num_centroids, training_iterations) =
370
487
  parse_quantization_options(options_json.as_deref())?;
488
+ let mut database = self.write_open()?;
371
489
  let config = build_quantization_config(
372
490
  method,
373
491
  rescore_multiplier,
374
492
  num_sub_vectors,
375
493
  num_centroids,
376
494
  training_iterations,
495
+ database.dimension(),
377
496
  )?;
378
- let mut database = self.write_open()?;
379
497
  database.enable_quantization(config).map_err(to_napi_error)
380
498
  }
381
499
 
@@ -404,6 +522,208 @@ impl NativeDatabase {
404
522
  }))
405
523
  }
406
524
 
525
+ /// Returns valid Product Quantization num_sub_vectors values for this database.
526
+ #[napi(js_name = "validNumSubVectors")]
527
+ pub fn valid_num_sub_vectors(&self) -> Result<Vec<u32>> {
528
+ let database = self.read()?;
529
+ database
530
+ .valid_num_sub_vectors()
531
+ .into_iter()
532
+ .map(|value| {
533
+ u32::try_from(value).map_err(|_| {
534
+ to_napi_error(vectlite::VectLiteError::InvalidFormat(
535
+ "num_sub_vectors value exceeds u32".to_owned(),
536
+ ))
537
+ })
538
+ })
539
+ .collect()
540
+ }
541
+
542
+ // ---- Multi-vector / ColBERT-style late interaction ----
543
+
544
+ /// Upsert a record with multi-vector token embeddings (ColBERT-style).
545
+ ///
546
+ /// `multi_vectors_json` is a JSON string mapping space names to arrays of
547
+ /// token vectors, e.g. `{"colbert": [[0.1, 0.2], [0.3, 0.4]]}`.
548
+ #[napi(js_name = "upsertMultiVectors")]
549
+ pub fn upsert_multi_vectors(
550
+ &self,
551
+ id: String,
552
+ vector: Vec<f64>,
553
+ multi_vectors_json: String,
554
+ options_json: Option<String>,
555
+ ) -> Result<()> {
556
+ let vector: Vec<f32> = vector.iter().map(|&v| v as f32).collect();
557
+ let mv_value: Value = serde_json::from_str(&multi_vectors_json)
558
+ .map_err(|e| to_napi_error(vectlite::VectLiteError::InvalidFormat(e.to_string())))?;
559
+ let mv = json_to_multi_vectors(&mv_value)?;
560
+
561
+ let (metadata, namespace) = if let Some(opts) = options_json {
562
+ let opts: Value = serde_json::from_str(&opts).map_err(|e| {
563
+ to_napi_error(vectlite::VectLiteError::InvalidFormat(e.to_string()))
564
+ })?;
565
+ let metadata = opts
566
+ .get("metadata")
567
+ .map(|v| json_to_metadata(v))
568
+ .transpose()?
569
+ .unwrap_or_default();
570
+ let namespace = opts
571
+ .get("namespace")
572
+ .and_then(|v| v.as_str())
573
+ .unwrap_or("")
574
+ .to_string();
575
+ (metadata, namespace)
576
+ } else {
577
+ (Metadata::new(), String::new())
578
+ };
579
+
580
+ let mut database = self.write_open()?;
581
+ database
582
+ .upsert_multi_vectors_in_namespace(namespace, id, vector, metadata, mv)
583
+ .map_err(to_napi_error)
584
+ }
585
+
586
+ /// Search using multi-vector late interaction (MaxSim) scoring.
587
+ ///
588
+ /// Returns a JSON array of results with id, score, namespace, and metadata.
589
+ #[napi(js_name = "searchMultiVector")]
590
+ pub fn search_multi_vector(
591
+ &self,
592
+ space: String,
593
+ query_tokens_json: String,
594
+ options_json: Option<String>,
595
+ ) -> Result<String> {
596
+ let qt_value: Value = serde_json::from_str(&query_tokens_json)
597
+ .map_err(|e| to_napi_error(vectlite::VectLiteError::InvalidFormat(e.to_string())))?;
598
+ let qt_arr = qt_value.as_array().ok_or_else(|| {
599
+ to_napi_error(vectlite::VectLiteError::InvalidFormat(
600
+ "query_tokens must be a JSON array of arrays".to_owned(),
601
+ ))
602
+ })?;
603
+ let query_tokens: Vec<Vec<f32>> = qt_arr
604
+ .iter()
605
+ .map(|v| {
606
+ v.as_array()
607
+ .ok_or_else(|| {
608
+ to_napi_error(vectlite::VectLiteError::InvalidFormat(
609
+ "each query token must be an array of numbers".to_owned(),
610
+ ))
611
+ })?
612
+ .iter()
613
+ .map(|n| {
614
+ n.as_f64().map(|f| f as f32).ok_or_else(|| {
615
+ to_napi_error(vectlite::VectLiteError::InvalidFormat(
616
+ "token values must be numbers".to_owned(),
617
+ ))
618
+ })
619
+ })
620
+ .collect::<Result<Vec<f32>>>()
621
+ })
622
+ .collect::<Result<Vec<Vec<f32>>>>()?;
623
+
624
+ let (top_k, filter, namespace) = if let Some(opts) = options_json {
625
+ let opts: Value = serde_json::from_str(&opts).map_err(|e| {
626
+ to_napi_error(vectlite::VectLiteError::InvalidFormat(e.to_string()))
627
+ })?;
628
+ let top_k = opts.get("k").and_then(|v| v.as_u64()).unwrap_or(10) as usize;
629
+ let filter = opts.get("filter").map(|v| json_to_filter(v)).transpose()?;
630
+ let namespace = opts
631
+ .get("namespace")
632
+ .and_then(|v| v.as_str())
633
+ .map(String::from);
634
+ (top_k, filter, namespace)
635
+ } else {
636
+ (10, None, None)
637
+ };
638
+
639
+ let options = MultiVectorSearchOptions {
640
+ top_k,
641
+ filter,
642
+ namespace,
643
+ };
644
+
645
+ let database = self.read()?;
646
+ let results = database
647
+ .search_multi_vector(&space, &query_tokens, options)
648
+ .map_err(to_napi_error)?;
649
+
650
+ let json_results: Vec<Value> = results
651
+ .into_iter()
652
+ .map(|r| {
653
+ json!({
654
+ "id": r.id,
655
+ "score": r.score,
656
+ "namespace": r.namespace,
657
+ "metadata": metadata_to_json(&r.metadata),
658
+ })
659
+ })
660
+ .collect();
661
+
662
+ serde_json::to_string(&json_results)
663
+ .map_err(|e| to_napi_error(vectlite::VectLiteError::InvalidFormat(e.to_string())))
664
+ }
665
+
666
+ /// Enable 2-bit quantization for a multi-vector space.
667
+ #[napi(js_name = "enableMultiVectorQuantization")]
668
+ pub fn enable_multi_vector_quantization(
669
+ &self,
670
+ space: String,
671
+ options_json: Option<String>,
672
+ ) -> Result<()> {
673
+ let (method, rescore_multiplier) = if let Some(opts) = options_json {
674
+ let opts: Value = serde_json::from_str(&opts).map_err(|e| {
675
+ to_napi_error(vectlite::VectLiteError::InvalidFormat(e.to_string()))
676
+ })?;
677
+ let method = opts
678
+ .get("method")
679
+ .and_then(|v| v.as_str())
680
+ .unwrap_or("two_bit")
681
+ .to_string();
682
+ let rescore = opts
683
+ .get("rescoreMultiplier")
684
+ .or_else(|| opts.get("rescore_multiplier"))
685
+ .and_then(|v| v.as_u64())
686
+ .map(|v| v as usize);
687
+ (method, rescore)
688
+ } else {
689
+ ("two_bit".to_string(), None)
690
+ };
691
+
692
+ let config = match method.as_str() {
693
+ "two_bit" => MultiVectorQuantizationConfig::TwoBit(TwoBitQuantizationConfig {
694
+ rescore_multiplier: rescore_multiplier.unwrap_or(4),
695
+ }),
696
+ other => {
697
+ return Err(to_napi_error(vectlite::VectLiteError::InvalidFormat(
698
+ format!(
699
+ "unknown multi-vector quantization method: {other}. Supported: two_bit"
700
+ ),
701
+ )));
702
+ }
703
+ };
704
+
705
+ let mut database = self.write_open()?;
706
+ database
707
+ .enable_multi_vector_quantization(&space, config)
708
+ .map_err(to_napi_error)
709
+ }
710
+
711
+ /// Disable multi-vector quantization for a space.
712
+ #[napi(js_name = "disableMultiVectorQuantization")]
713
+ pub fn disable_multi_vector_quantization(&self, space: String) -> Result<()> {
714
+ let mut database = self.write_open()?;
715
+ database
716
+ .disable_multi_vector_quantization(&space)
717
+ .map_err(to_napi_error)
718
+ }
719
+
720
+ /// Returns true if multi-vector quantization is enabled for the given space.
721
+ #[napi(js_name = "isMultiVectorQuantized")]
722
+ pub fn is_multi_vector_quantized(&self, space: String) -> Result<bool> {
723
+ let database = self.read()?;
724
+ Ok(database.is_multi_vector_quantized(&space))
725
+ }
726
+
407
727
  #[napi]
408
728
  pub fn snapshot(&self, dest: String) -> Result<()> {
409
729
  let database = self.read()?;
@@ -449,6 +769,206 @@ impl NativeDatabase {
449
769
  }
450
770
  }
451
771
 
772
+ // -------------------------------------------------------------------
773
+ // Async tasks (run on libuv threadpool via napi::Task)
774
+ // -------------------------------------------------------------------
775
+
776
+ pub struct SearchTask {
777
+ db: Arc<RwLock<CoreDatabase>>,
778
+ query: Option<Vec<f32>>,
779
+ request: SearchRequest,
780
+ with_stats: bool,
781
+ }
782
+
783
+ impl napi::Task for SearchTask {
784
+ type Output = String;
785
+ type JsValue = String;
786
+
787
+ fn compute(&mut self) -> Result<Self::Output> {
788
+ let database = self
789
+ .db
790
+ .read()
791
+ .map_err(|e| err(format!("lock poisoned: {e}")))?;
792
+ let sparse_ref = if self.request.sparse.is_empty() {
793
+ None
794
+ } else {
795
+ Some(&self.request.sparse)
796
+ };
797
+ let outcome = if self.request.all_namespaces {
798
+ database
799
+ .hybrid_search_all_namespaces_with_stats(
800
+ self.query.as_deref(),
801
+ sparse_ref,
802
+ self.request.options.clone(),
803
+ )
804
+ .map_err(to_napi_error)?
805
+ } else {
806
+ database
807
+ .hybrid_search_in_namespace_with_stats(
808
+ &self.request.namespace,
809
+ self.query.as_deref(),
810
+ sparse_ref,
811
+ self.request.options.clone(),
812
+ )
813
+ .map_err(to_napi_error)?
814
+ };
815
+ if self.with_stats {
816
+ stringify_value(search_outcome_to_json(
817
+ &outcome,
818
+ self.request.explain,
819
+ &self.request.fusion_name,
820
+ ))
821
+ } else {
822
+ stringify_value(search_results_to_json(
823
+ &outcome.results,
824
+ self.request.explain,
825
+ &self.request.fusion_name,
826
+ ))
827
+ }
828
+ }
829
+
830
+ fn resolve(&mut self, _env: napi::Env, output: Self::Output) -> Result<Self::JsValue> {
831
+ Ok(output)
832
+ }
833
+ }
834
+
835
+ pub struct FlushTask {
836
+ db: Arc<RwLock<CoreDatabase>>,
837
+ }
838
+
839
+ impl napi::Task for FlushTask {
840
+ type Output = ();
841
+ type JsValue = ();
842
+
843
+ fn compute(&mut self) -> Result<Self::Output> {
844
+ let mut database = self
845
+ .db
846
+ .write()
847
+ .map_err(|e| err(format!("lock poisoned: {e}")))?;
848
+ database.flush().map_err(to_napi_error)
849
+ }
850
+
851
+ fn resolve(&mut self, _env: napi::Env, _output: Self::Output) -> Result<Self::JsValue> {
852
+ Ok(())
853
+ }
854
+ }
855
+
856
+ pub struct CompactTask {
857
+ db: Arc<RwLock<CoreDatabase>>,
858
+ }
859
+
860
+ impl napi::Task for CompactTask {
861
+ type Output = ();
862
+ type JsValue = ();
863
+
864
+ fn compute(&mut self) -> Result<Self::Output> {
865
+ let mut database = self
866
+ .db
867
+ .write()
868
+ .map_err(|e| err(format!("lock poisoned: {e}")))?;
869
+ database.compact().map_err(to_napi_error)
870
+ }
871
+
872
+ fn resolve(&mut self, _env: napi::Env, _output: Self::Output) -> Result<Self::JsValue> {
873
+ Ok(())
874
+ }
875
+ }
876
+
877
+ pub struct BulkIngestTask {
878
+ db: Arc<RwLock<CoreDatabase>>,
879
+ records: Vec<Record>,
880
+ batch_size: usize,
881
+ }
882
+
883
+ impl napi::Task for BulkIngestTask {
884
+ type Output = u32;
885
+ type JsValue = u32;
886
+
887
+ fn compute(&mut self) -> Result<Self::Output> {
888
+ let records = std::mem::take(&mut self.records);
889
+ let mut database = self
890
+ .db
891
+ .write()
892
+ .map_err(|e| err(format!("lock poisoned: {e}")))?;
893
+ database
894
+ .bulk_ingest(records, self.batch_size)
895
+ .map(|count| count as u32)
896
+ .map_err(to_napi_error)
897
+ }
898
+
899
+ fn resolve(&mut self, _env: napi::Env, output: Self::Output) -> Result<Self::JsValue> {
900
+ Ok(output)
901
+ }
902
+ }
903
+
904
+ #[napi]
905
+ impl NativeDatabase {
906
+ #[napi(js_name = "searchAsync")]
907
+ pub fn search_async(
908
+ &self,
909
+ query: Option<Vec<f64>>,
910
+ options_json: Option<String>,
911
+ ) -> Result<AsyncTask<SearchTask>> {
912
+ let request = parse_search_request(options_json)?;
913
+ let query = query
914
+ .map(|vector| js_vector_to_core(vector, "query vector"))
915
+ .transpose()?;
916
+ Ok(AsyncTask::new(SearchTask {
917
+ db: self.inner.clone(),
918
+ query,
919
+ request,
920
+ with_stats: false,
921
+ }))
922
+ }
923
+
924
+ #[napi(js_name = "searchWithStatsAsync")]
925
+ pub fn search_with_stats_async(
926
+ &self,
927
+ query: Option<Vec<f64>>,
928
+ options_json: Option<String>,
929
+ ) -> Result<AsyncTask<SearchTask>> {
930
+ let request = parse_search_request(options_json)?;
931
+ let query = query
932
+ .map(|vector| js_vector_to_core(vector, "query vector"))
933
+ .transpose()?;
934
+ Ok(AsyncTask::new(SearchTask {
935
+ db: self.inner.clone(),
936
+ query,
937
+ request,
938
+ with_stats: true,
939
+ }))
940
+ }
941
+
942
+ #[napi(js_name = "flushAsync")]
943
+ pub fn flush_async(&self) -> AsyncTask<FlushTask> {
944
+ AsyncTask::new(FlushTask {
945
+ db: self.inner.clone(),
946
+ })
947
+ }
948
+
949
+ #[napi(js_name = "compactAsync")]
950
+ pub fn compact_async(&self) -> AsyncTask<CompactTask> {
951
+ AsyncTask::new(CompactTask {
952
+ db: self.inner.clone(),
953
+ })
954
+ }
955
+
956
+ #[napi(js_name = "bulkIngestAsync")]
957
+ pub fn bulk_ingest_async(
958
+ &self,
959
+ records_json: String,
960
+ namespace: Option<String>,
961
+ batch_size: u32,
962
+ ) -> Result<AsyncTask<BulkIngestTask>> {
963
+ let records = parse_record_batch_json(&records_json, namespace.as_deref())?;
964
+ Ok(AsyncTask::new(BulkIngestTask {
965
+ db: self.inner.clone(),
966
+ records,
967
+ batch_size: batch_size as usize,
968
+ }))
969
+ }
970
+ }
971
+
452
972
  #[napi]
453
973
  impl NativeTransaction {
454
974
  #[napi]
@@ -466,11 +986,13 @@ impl NativeTransaction {
466
986
  namespace: Option<String>,
467
987
  sparse_json: Option<String>,
468
988
  vectors_json: Option<String>,
989
+ ttl: Option<f64>,
469
990
  ) -> Result<()> {
470
991
  let metadata = parse_metadata_json(metadata_json)?;
471
992
  let sparse = parse_sparse_json(sparse_json)?;
472
993
  let vectors = parse_named_vectors_json(vectors_json)?;
473
994
  let vector = js_vector_to_core(vector, "vector")?;
995
+ let expires_at = ttl_to_expires_at(ttl)?;
474
996
  self.stage(WriteOperation::Insert(Record {
475
997
  namespace: namespace.unwrap_or_default(),
476
998
  id,
@@ -478,6 +1000,8 @@ impl NativeTransaction {
478
1000
  vectors,
479
1001
  sparse,
480
1002
  metadata,
1003
+ multi_vectors: MultiVectors::new(),
1004
+ expires_at,
481
1005
  }))
482
1006
  }
483
1007
 
@@ -490,11 +1014,13 @@ impl NativeTransaction {
490
1014
  namespace: Option<String>,
491
1015
  sparse_json: Option<String>,
492
1016
  vectors_json: Option<String>,
1017
+ ttl: Option<f64>,
493
1018
  ) -> Result<()> {
494
1019
  let metadata = parse_metadata_json(metadata_json)?;
495
1020
  let sparse = parse_sparse_json(sparse_json)?;
496
1021
  let vectors = parse_named_vectors_json(vectors_json)?;
497
1022
  let vector = js_vector_to_core(vector, "vector")?;
1023
+ let expires_at = ttl_to_expires_at(ttl)?;
498
1024
  self.stage(WriteOperation::Upsert(Record {
499
1025
  namespace: namespace.unwrap_or_default(),
500
1026
  id,
@@ -502,6 +1028,8 @@ impl NativeTransaction {
502
1028
  vectors,
503
1029
  sparse,
504
1030
  metadata,
1031
+ multi_vectors: MultiVectors::new(),
1032
+ expires_at,
505
1033
  }))
506
1034
  }
507
1035
 
@@ -660,7 +1188,13 @@ pub fn open(
660
1188
  dimension: Option<u32>,
661
1189
  read_only: bool,
662
1190
  lock_timeout: Option<f64>,
1191
+ metric: Option<String>,
663
1192
  ) -> Result<NativeDatabase> {
1193
+ let parsed_metric = match metric.as_deref() {
1194
+ Some(name) => DistanceMetric::from_name(name).map_err(to_napi_error)?,
1195
+ None => DistanceMetric::Cosine,
1196
+ };
1197
+
664
1198
  let database = if read_only {
665
1199
  if !Path::new(&path).exists() {
666
1200
  return Err(err("cannot open non-existent database in read-only mode"));
@@ -683,7 +1217,8 @@ pub fn open(
683
1217
  db
684
1218
  }
685
1219
  (Some(dimension), None) => {
686
- CoreDatabase::open_or_create(&path, dimension as usize).map_err(to_napi_error)?
1220
+ CoreDatabase::open_or_create_with_metric(&path, dimension as usize, parsed_metric)
1221
+ .map_err(to_napi_error)?
687
1222
  }
688
1223
  (None, Some(timeout)) => {
689
1224
  CoreDatabase::open_with_timeout(&path, timeout).map_err(to_napi_error)?
@@ -694,7 +1229,8 @@ pub fn open(
694
1229
  let Some(dimension) = dimension else {
695
1230
  return Err(err("dimension is required when creating a new database"));
696
1231
  };
697
- CoreDatabase::create(&path, dimension as usize).map_err(to_napi_error)?
1232
+ CoreDatabase::create_with_metric(&path, dimension as usize, parsed_metric)
1233
+ .map_err(to_napi_error)?
698
1234
  };
699
1235
 
700
1236
  Ok(NativeDatabase {
@@ -738,6 +1274,7 @@ fn parse_search_request(options_json: Option<String>) -> Result<SearchRequest> {
738
1274
  let fetch_k = get_usize(object, "fetchK")?.unwrap_or(0);
739
1275
  let mmr_lambda = get_optional_f32(object, "mmrLambda")?;
740
1276
  let vector_name = get_string(object, "vectorName")?;
1277
+ let truncate_dim = get_usize(object, "truncateDim")?;
741
1278
  let fusion_name = get_string(object, "fusion")?.unwrap_or_else(|| "linear".to_owned());
742
1279
  let rrf_k = get_usize(object, "rrfK")?.unwrap_or(60);
743
1280
  let explain = get_bool(object, "explain")?.unwrap_or(false);
@@ -761,6 +1298,7 @@ fn parse_search_request(options_json: Option<String>) -> Result<SearchRequest> {
761
1298
  mmr_lambda,
762
1299
  vector_name,
763
1300
  fusion: parse_fusion(&fusion_name, rrf_k)?,
1301
+ truncate_dim,
764
1302
  multi_vector_queries: query_vectors,
765
1303
  },
766
1304
  })
@@ -885,6 +1423,39 @@ fn json_to_named_vectors(value: &Value) -> Result<NamedVectors> {
885
1423
  Ok(vectors)
886
1424
  }
887
1425
 
1426
+ fn json_to_multi_vectors(value: &Value) -> Result<MultiVectors> {
1427
+ let object = value
1428
+ .as_object()
1429
+ .ok_or_else(|| err("multi_vectors must be a JSON object"))?;
1430
+ let mut multi_vectors = MultiVectors::new();
1431
+ for (name, token_array) in object {
1432
+ if name.is_empty() {
1433
+ return Err(err("multi-vector space names must not be empty"));
1434
+ }
1435
+ let arr = token_array
1436
+ .as_array()
1437
+ .ok_or_else(|| err("multi-vector space value must be an array of arrays"))?;
1438
+ let mut token_vectors = Vec::with_capacity(arr.len());
1439
+ for item in arr {
1440
+ token_vectors.push(value_to_vector(item, "multi-vector token")?);
1441
+ }
1442
+ multi_vectors.insert(name.clone(), token_vectors);
1443
+ }
1444
+ Ok(multi_vectors)
1445
+ }
1446
+
1447
+ fn multi_vectors_to_json(mv: &MultiVectors) -> Value {
1448
+ let mut map = Map::new();
1449
+ for (name, token_vectors) in mv {
1450
+ let arr: Vec<Value> = token_vectors
1451
+ .iter()
1452
+ .map(|v| Value::Array(v.iter().map(|&f| json!(f)).collect()))
1453
+ .collect();
1454
+ map.insert(name.clone(), Value::Array(arr));
1455
+ }
1456
+ Value::Object(map)
1457
+ }
1458
+
888
1459
  fn json_to_filter(value: &Value) -> Result<MetadataFilter> {
889
1460
  let object = value
890
1461
  .as_object()
@@ -1069,6 +1640,16 @@ fn json_to_record(object: &Map<String, Value>, default_namespace: Option<&str>)
1069
1640
  .transpose()?
1070
1641
  .unwrap_or_default();
1071
1642
 
1643
+ let multi_vectors = object
1644
+ .get("multi_vectors")
1645
+ .or_else(|| object.get("multiVectors"))
1646
+ .map(json_to_multi_vectors)
1647
+ .transpose()?
1648
+ .unwrap_or_default();
1649
+
1650
+ let ttl = object.get("ttl").and_then(|v| v.as_f64());
1651
+ let expires_at = ttl_to_expires_at(ttl)?;
1652
+
1072
1653
  Ok(Record {
1073
1654
  namespace,
1074
1655
  id: value_to_string(id)?,
@@ -1076,6 +1657,8 @@ fn json_to_record(object: &Map<String, Value>, default_namespace: Option<&str>)
1076
1657
  vectors,
1077
1658
  sparse,
1078
1659
  metadata,
1660
+ multi_vectors,
1661
+ expires_at,
1079
1662
  })
1080
1663
  }
1081
1664
 
@@ -1087,6 +1670,8 @@ fn record_to_json(record: &Record) -> Value {
1087
1670
  "vectors": named_vectors_to_json(&record.vectors),
1088
1671
  "sparse": sparse_to_json(&record.sparse),
1089
1672
  "metadata": metadata_to_json(&record.metadata),
1673
+ "multi_vectors": multi_vectors_to_json(&record.multi_vectors),
1674
+ "expires_at": record.expires_at,
1090
1675
  })
1091
1676
  }
1092
1677
 
@@ -1188,6 +1773,8 @@ fn search_stats_to_json(stats: &vectlite::SearchStats) -> Value {
1188
1773
  "ann_loaded_from_disk": stats.ann_loaded_from_disk,
1189
1774
  "wal_entries_replayed": stats.wal_entries_replayed,
1190
1775
  "fusion": stats.fusion,
1776
+ "effective_dimension": stats.effective_dimension,
1777
+ "matryoshka_truncated": stats.matryoshka_truncated,
1191
1778
  "rerank_applied": false,
1192
1779
  "rerank_count": 0,
1193
1780
  "timings": {
@@ -1342,6 +1929,21 @@ fn value_to_f32(value: &Value, label: &str) -> Result<f32> {
1342
1929
  .ok_or_else(|| err(format!("{label} must contain numeric values")))
1343
1930
  }
1344
1931
 
1932
+ /// Convert an optional TTL (seconds from now) to an absolute `expires_at` timestamp.
1933
+ fn ttl_to_expires_at(ttl: Option<f64>) -> Result<Option<f64>> {
1934
+ match ttl {
1935
+ None => Ok(None),
1936
+ Some(t) if t < 0.0 || t.is_nan() => Err(err("ttl must be a non-negative finite number")),
1937
+ Some(t) => {
1938
+ let now = std::time::SystemTime::now()
1939
+ .duration_since(std::time::UNIX_EPOCH)
1940
+ .unwrap_or_default()
1941
+ .as_secs_f64();
1942
+ Ok(Some(now + t))
1943
+ }
1944
+ }
1945
+ }
1946
+
1345
1947
  fn js_vector_to_core(values: Vec<f64>, label: &str) -> Result<Vec<f32>> {
1346
1948
  let mut vector = Vec::with_capacity(values.len());
1347
1949
  for value in values {
@@ -1372,6 +1974,10 @@ fn closed_database_error() -> vectlite::VectLiteError {
1372
1974
  vectlite::VectLiteError::InvalidFormat("database is closed".to_owned())
1373
1975
  }
1374
1976
 
1977
+ fn parse_payload_index_type(name: &str) -> Result<PayloadIndexType> {
1978
+ PayloadIndexType::from_name(name).map_err(to_napi_error)
1979
+ }
1980
+
1375
1981
  fn parse_quantization_options(
1376
1982
  options_json: Option<&str>,
1377
1983
  ) -> Result<(Option<usize>, Option<usize>, Option<usize>, Option<usize>)> {
@@ -1419,22 +2025,34 @@ fn build_quantization_config(
1419
2025
  num_sub_vectors: Option<usize>,
1420
2026
  num_centroids: Option<usize>,
1421
2027
  training_iterations: Option<usize>,
2028
+ dimension: usize,
1422
2029
  ) -> Result<QuantizationConfig> {
1423
- match method {
1424
- "scalar" | "int8" => Ok(QuantizationConfig::Scalar(ScalarQuantizationConfig {
1425
- rescore_multiplier: rescore_multiplier.unwrap_or(5),
1426
- })),
1427
- "binary" => Ok(QuantizationConfig::Binary(BinaryQuantizationConfig {
1428
- rescore_multiplier: rescore_multiplier.unwrap_or(10),
1429
- })),
1430
- "product" | "pq" => Ok(QuantizationConfig::Product(ProductQuantizationConfig {
1431
- num_sub_vectors: num_sub_vectors.unwrap_or(16),
1432
- num_centroids: num_centroids.unwrap_or(256),
1433
- training_iterations: training_iterations.unwrap_or(20),
1434
- rescore_multiplier: rescore_multiplier.unwrap_or(10),
1435
- })),
1436
- other => Err(err(format!(
1437
- "unknown quantization method '{other}'. Expected: 'scalar', 'binary', or 'product'"
2030
+ let normalized = method.to_ascii_lowercase();
2031
+ match normalized.as_str() {
2032
+ "scalar" | "int8" => {
2033
+ let default = ScalarQuantizationConfig::default();
2034
+ Ok(QuantizationConfig::Scalar(ScalarQuantizationConfig {
2035
+ rescore_multiplier: rescore_multiplier.unwrap_or(default.rescore_multiplier),
2036
+ }))
2037
+ }
2038
+ "binary" => {
2039
+ let default = BinaryQuantizationConfig::default();
2040
+ Ok(QuantizationConfig::Binary(BinaryQuantizationConfig {
2041
+ rescore_multiplier: rescore_multiplier.unwrap_or(default.rescore_multiplier),
2042
+ }))
2043
+ }
2044
+ "product" | "pq" => {
2045
+ let default = ProductQuantizationConfig::default();
2046
+ Ok(QuantizationConfig::Product(ProductQuantizationConfig {
2047
+ num_sub_vectors: num_sub_vectors
2048
+ .unwrap_or_else(|| default_product_num_sub_vectors(dimension)),
2049
+ num_centroids: num_centroids.unwrap_or(default.num_centroids),
2050
+ training_iterations: training_iterations.unwrap_or(default.training_iterations),
2051
+ rescore_multiplier: rescore_multiplier.unwrap_or(default.rescore_multiplier),
2052
+ }))
2053
+ }
2054
+ _ => Err(err(format!(
2055
+ "unknown quantization method '{method}'. Expected: 'scalar', 'binary', or 'pq' (alias: 'product')"
1438
2056
  ))),
1439
2057
  }
1440
2058
  }