vectlite 0.1.11 → 0.1.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,3 +1,5 @@
1
+ pub mod quantization;
2
+
1
3
  use std::collections::{BTreeMap, BTreeSet};
2
4
  use std::error::Error as StdError;
3
5
  use std::fmt;
@@ -9,6 +11,8 @@ use std::time::Instant;
9
11
  use fs2::FileExt;
10
12
  use hnsw_rs::prelude::*;
11
13
 
14
+ use quantization::{QuantizationConfig, QuantizedIndex};
15
+
12
16
  const MAGIC: &[u8; 4] = b"VDB1";
13
17
  const VERSION: u16 = 4;
14
18
  const WAL_MAGIC: &[u8; 4] = b"VWL1";
@@ -687,6 +691,8 @@ impl Store {
687
691
  let _ = fs::remove_file(&wal);
688
692
  let manifest = ann_manifest_path(&path);
689
693
  let _ = fs::remove_file(&manifest);
694
+ let quant = quantization_params_path(&path);
695
+ let _ = fs::remove_file(&quant);
690
696
  // Remove any .hnsw.* sidecar files
691
697
  if let Some(parent) = path.parent() {
692
698
  if let Some(stem) = path.file_name().and_then(|n| n.to_str()) {
@@ -738,6 +744,12 @@ pub struct Database {
738
744
  /// Holds the lock file open for the lifetime of the database.
739
745
  /// Dropping this releases the advisory lock.
740
746
  _lock_file: Option<File>,
747
+ /// Optional quantized index for accelerated search.
748
+ quantized: Option<QuantizedIndex>,
749
+ /// Configuration used to build the quantized index (persisted).
750
+ quantization_config: Option<QuantizationConfig>,
751
+ /// Ordered keys mapping quantized index positions to record keys.
752
+ quantized_keys: Vec<RecordKey>,
741
753
  }
742
754
 
743
755
  #[derive(Default)]
@@ -788,6 +800,9 @@ impl Database {
788
800
  ann_loaded_from_disk: false,
789
801
  read_only: false,
790
802
  _lock_file: Some(lock),
803
+ quantized: None,
804
+ quantization_config: None,
805
+ quantized_keys: Vec::new(),
791
806
  };
792
807
 
793
808
  database.flush()?;
@@ -807,6 +822,7 @@ impl Database {
807
822
  if !database.ann_loaded_from_disk {
808
823
  database.rebuild_ann();
809
824
  }
825
+ database.try_load_quantization();
810
826
  Ok(database)
811
827
  }
812
828
 
@@ -827,6 +843,7 @@ impl Database {
827
843
  if !database.ann_loaded_from_disk {
828
844
  database.rebuild_ann();
829
845
  }
846
+ database.try_load_quantization();
830
847
  Ok(database)
831
848
  }
832
849
 
@@ -858,6 +875,7 @@ impl Database {
858
875
  if !database.ann_loaded_from_disk {
859
876
  database.rebuild_ann();
860
877
  }
878
+ database.try_load_quantization();
861
879
  Ok(database)
862
880
  }
863
881
 
@@ -887,6 +905,9 @@ impl Database {
887
905
  self.records.clear();
888
906
  self.ann = AnnCatalog::default();
889
907
  self.sparse_index = SparseIndex::default();
908
+ self.quantized = None;
909
+ self.quantization_config = None;
910
+ self.quantized_keys.clear();
890
911
  self.dimension = 0;
891
912
  Ok(())
892
913
  }
@@ -1190,6 +1211,7 @@ impl Database {
1190
1211
  self.rebuild_ann();
1191
1212
  self.ann_loaded_from_disk = false;
1192
1213
  self.persist_ann_to_disk()?;
1214
+ self.rebuild_quantized_index();
1193
1215
  Ok(count)
1194
1216
  }
1195
1217
 
@@ -1216,6 +1238,7 @@ impl Database {
1216
1238
  self.rebuild_ann();
1217
1239
  self.ann_loaded_from_disk = false;
1218
1240
  self.persist_ann_to_disk()?;
1241
+ self.rebuild_quantized_index();
1219
1242
  Ok(count)
1220
1243
  }
1221
1244
 
@@ -1414,6 +1437,7 @@ impl Database {
1414
1437
  self.rebuild_ann();
1415
1438
  self.ann_loaded_from_disk = false;
1416
1439
  self.persist_ann_to_disk()?;
1440
+ self.rebuild_quantized_index();
1417
1441
  Ok(())
1418
1442
  }
1419
1443
 
@@ -1464,8 +1488,22 @@ impl Database {
1464
1488
  let vector_name = options.vector_name.as_deref();
1465
1489
 
1466
1490
  let dense_start = Instant::now();
1467
- let ann_candidates = dense_query
1468
- .and_then(|query| self.ann_candidate_keys(namespace, vector_name, query, fetch_k));
1491
+ // Use quantized index for candidate selection if available (2-stage pipeline).
1492
+ // The quantized index operates on the default vector only and globally (not per-namespace).
1493
+ let quantized_candidates =
1494
+ if vector_name.is_none() || vector_name == Some(DEFAULT_VECTOR_NAME) {
1495
+ dense_query.and_then(|query| self.quantized_candidate_keys(query, fetch_k))
1496
+ } else {
1497
+ None
1498
+ };
1499
+ let ann_candidates = if quantized_candidates.is_some() {
1500
+ // Skip HNSW if quantized index provided candidates
1501
+ None
1502
+ } else {
1503
+ dense_query
1504
+ .and_then(|query| self.ann_candidate_keys(namespace, vector_name, query, fetch_k))
1505
+ };
1506
+ let effective_dense_candidates = quantized_candidates.or(ann_candidates);
1469
1507
  let dense_us = dense_start.elapsed().as_micros() as u64;
1470
1508
 
1471
1509
  let sparse_start = Instant::now();
@@ -1476,17 +1514,17 @@ impl Database {
1476
1514
 
1477
1515
  let candidate_keys = if dense_query.is_none() {
1478
1516
  Some(sparse_candidates.clone())
1479
- } else if dense_query.is_some() && ann_candidates.is_none() {
1517
+ } else if dense_query.is_some() && effective_dense_candidates.is_none() {
1480
1518
  None
1481
1519
  } else {
1482
1520
  merge_candidate_keys(
1483
- ann_candidates.as_deref(),
1521
+ effective_dense_candidates.as_deref(),
1484
1522
  Some(sparse_candidates.as_slice()),
1485
1523
  )
1486
1524
  };
1487
1525
  let mut stats = SearchStats {
1488
- used_ann: ann_candidates.is_some(),
1489
- ann_candidate_count: ann_candidates.as_ref().map_or(0, Vec::len),
1526
+ used_ann: effective_dense_candidates.is_some(),
1527
+ ann_candidate_count: effective_dense_candidates.as_ref().map_or(0, Vec::len),
1490
1528
  fetch_k,
1491
1529
  sparse_candidate_count: sparse_candidates.len(),
1492
1530
  ann_loaded_from_disk: self.ann_loaded_from_disk,
@@ -1504,7 +1542,7 @@ impl Database {
1504
1542
  );
1505
1543
  stats.considered_count = results.len();
1506
1544
 
1507
- if ann_candidates.is_some() && results.len() < fetch_k {
1545
+ if effective_dense_candidates.is_some() && results.len() < fetch_k {
1508
1546
  stats.exact_fallback = true;
1509
1547
  results = self.collect_results(dense_query, sparse_query, &options, namespace, None);
1510
1548
  stats.considered_count = results.len();
@@ -1596,6 +1634,7 @@ impl Database {
1596
1634
  self.rebuild_ann();
1597
1635
  self.ann_loaded_from_disk = false;
1598
1636
  self.persist_ann_to_disk()?;
1637
+ self.rebuild_quantized_index();
1599
1638
  }
1600
1639
 
1601
1640
  Ok(total)
@@ -1606,6 +1645,142 @@ impl Database {
1606
1645
  self.compact_inner()
1607
1646
  }
1608
1647
 
1648
+ // -----------------------------------------------------------------------
1649
+ // Quantization API
1650
+ // -----------------------------------------------------------------------
1651
+
1652
+ /// Enable quantization on this database. Trains the quantizer on all current
1653
+ /// vectors and persists the configuration. Subsequent searches will use the
1654
+ /// quantized index for fast candidate selection followed by exact rescoring.
1655
+ pub fn enable_quantization(&mut self, config: QuantizationConfig) -> Result<()> {
1656
+ self.check_writable()?;
1657
+ if self.records.is_empty() {
1658
+ return Err(VectLiteError::InvalidFormat(
1659
+ "cannot enable quantization on an empty database".to_owned(),
1660
+ ));
1661
+ }
1662
+ self.quantization_config = Some(config);
1663
+ self.rebuild_quantized_index();
1664
+ self.persist_quantization_params()?;
1665
+ Ok(())
1666
+ }
1667
+
1668
+ /// Disable quantization and remove persisted parameters.
1669
+ pub fn disable_quantization(&mut self) -> Result<()> {
1670
+ self.check_writable()?;
1671
+ self.quantized = None;
1672
+ self.quantization_config = None;
1673
+ self.quantized_keys.clear();
1674
+ // Remove the sidecar file
1675
+ let params_path = quantization_params_path(&self.path);
1676
+ if params_path.exists() {
1677
+ fs::remove_file(&params_path)?;
1678
+ }
1679
+ Ok(())
1680
+ }
1681
+
1682
+ /// Returns true if quantization is enabled.
1683
+ pub fn is_quantized(&self) -> bool {
1684
+ self.quantized.is_some()
1685
+ }
1686
+
1687
+ /// Returns the quantization configuration if enabled.
1688
+ pub fn quantization_config(&self) -> Option<&QuantizationConfig> {
1689
+ self.quantization_config.as_ref()
1690
+ }
1691
+
1692
+ /// Rebuild the quantized index from current records.
1693
+ fn rebuild_quantized_index(&mut self) {
1694
+ let config = match &self.quantization_config {
1695
+ Some(config) => config.clone(),
1696
+ None => return,
1697
+ };
1698
+
1699
+ if self.records.is_empty() {
1700
+ self.quantized = None;
1701
+ self.quantized_keys.clear();
1702
+ return;
1703
+ }
1704
+
1705
+ let mut keys = Vec::with_capacity(self.records.len());
1706
+ let mut vectors: Vec<Vec<f32>> = Vec::with_capacity(self.records.len());
1707
+
1708
+ for (key, record) in &self.records {
1709
+ keys.push(key.clone());
1710
+ vectors.push(record.vector.clone());
1711
+ }
1712
+
1713
+ let refs: Vec<&[f32]> = vectors.iter().map(Vec::as_slice).collect();
1714
+ let index = QuantizedIndex::build(&refs, self.dimension, &config);
1715
+
1716
+ self.quantized = Some(index);
1717
+ self.quantized_keys = keys;
1718
+ }
1719
+
1720
+ /// Persist quantization parameters to a sidecar file.
1721
+ fn persist_quantization_params(&self) -> Result<()> {
1722
+ let params_path = quantization_params_path(&self.path);
1723
+ if let Some(index) = &self.quantized {
1724
+ let mut file = File::create(&params_path)?;
1725
+ index.write_params(&mut file).map_err(VectLiteError::Io)?;
1726
+ file.sync_all()?;
1727
+ } else {
1728
+ if params_path.exists() {
1729
+ fs::remove_file(&params_path)?;
1730
+ }
1731
+ }
1732
+ Ok(())
1733
+ }
1734
+
1735
+ /// Try to load quantization parameters from disk and rebuild codes.
1736
+ fn try_load_quantization(&mut self) -> bool {
1737
+ let params_path = quantization_params_path(&self.path);
1738
+ if !params_path.exists() {
1739
+ return false;
1740
+ }
1741
+
1742
+ let file = match File::open(&params_path) {
1743
+ Ok(f) => f,
1744
+ Err(_) => return false,
1745
+ };
1746
+ let mut reader = BufReader::new(file);
1747
+ let mut index = match QuantizedIndex::read_params(&mut reader) {
1748
+ Ok(idx) => idx,
1749
+ Err(_) => return false,
1750
+ };
1751
+
1752
+ // Rebuild codes from current records
1753
+ let mut keys = Vec::with_capacity(self.records.len());
1754
+ let mut vectors: Vec<Vec<f32>> = Vec::with_capacity(self.records.len());
1755
+ for (key, record) in &self.records {
1756
+ keys.push(key.clone());
1757
+ vectors.push(record.vector.clone());
1758
+ }
1759
+ let refs: Vec<&[f32]> = vectors.iter().map(Vec::as_slice).collect();
1760
+ index.rebuild_codes(&refs);
1761
+
1762
+ self.quantization_config = Some(index.config());
1763
+ self.quantized = Some(index);
1764
+ self.quantized_keys = keys;
1765
+ true
1766
+ }
1767
+
1768
+ /// Use the quantized index to get candidate record keys for rescoring.
1769
+ fn quantized_candidate_keys(&self, query: &[f32], top_k: usize) -> Option<Vec<RecordKey>> {
1770
+ let index = self.quantized.as_ref()?;
1771
+ if index.count() == 0 {
1772
+ return None;
1773
+ }
1774
+
1775
+ let candidate_indices = index.search_candidates(query, top_k);
1776
+ Some(
1777
+ candidate_indices
1778
+ .into_iter()
1779
+ .filter_map(|idx| self.quantized_keys.get(idx).cloned())
1780
+ .collect(),
1781
+ )
1782
+ }
1783
+
1609
1784
  fn compact_inner(&mut self) -> Result<()> {
1610
1785
  if let Some(parent) = self.path.parent() {
1611
1786
  if !parent.as_os_str().is_empty() {
@@ -1765,6 +1940,7 @@ impl Database {
1765
1940
  self.rebuild_ann();
1766
1941
  self.ann_loaded_from_disk = false;
1767
1942
  self.persist_ann_to_disk()?;
1943
+ self.rebuild_quantized_index();
1768
1944
  Ok(())
1769
1945
  }
1770
1946
 
@@ -1964,6 +2140,9 @@ impl Database {
1964
2140
  ann_loaded_from_disk: false,
1965
2141
  read_only: false,
1966
2142
  _lock_file: None,
2143
+ quantized: None,
2144
+ quantization_config: None,
2145
+ quantized_keys: Vec::new(),
1967
2146
  })
1968
2147
  }
1969
2148
 
@@ -2640,6 +2819,12 @@ fn lock_path(path: &Path) -> PathBuf {
2640
2819
  PathBuf::from(lock)
2641
2820
  }
2642
2821
 
2822
+ fn quantization_params_path(path: &Path) -> PathBuf {
2823
+ let mut p = path.as_os_str().to_os_string();
2824
+ p.push(".quant");
2825
+ PathBuf::from(p)
2826
+ }
2827
+
2643
2828
  fn acquire_exclusive_lock(path: &Path) -> Result<File> {
2644
2829
  acquire_exclusive_lock_with_timeout(path, None)
2645
2830
  }
@@ -3942,5 +4127,234 @@ mod tests {
3942
4127
 
3943
4128
  fn cleanup(path: &Path) {
3944
4129
  let _ = std::fs::remove_file(path);
4130
+ // Also clean up sidecar files
4131
+ let mut quant = path.as_os_str().to_os_string();
4132
+ quant.push(".quant");
4133
+ let _ = std::fs::remove_file(PathBuf::from(&quant));
4134
+ let mut wal = path.as_os_str().to_os_string();
4135
+ wal.push(".wal");
4136
+ let _ = std::fs::remove_file(PathBuf::from(&wal));
4137
+ let mut lock = path.as_os_str().to_os_string();
4138
+ lock.push(".lock");
4139
+ let _ = std::fs::remove_file(PathBuf::from(&lock));
4140
+ }
4141
+
4142
+ // -----------------------------------------------------------------------
4143
+ // Quantization integration tests
4144
+ // -----------------------------------------------------------------------
4145
+
4146
+ #[test]
4147
+ fn scalar_quantization_enables_search_and_persists() {
4148
+ use super::quantization::{QuantizationConfig, ScalarQuantizationConfig};
4149
+
4150
+ let path = temp_file("quant-scalar");
4151
+ let dim = 32;
4152
+
4153
+ {
4154
+ let mut db = Database::create(&path, dim).expect("create");
4155
+ // Insert enough records for meaningful search
4156
+ for i in 0..50 {
4157
+ let mut v = vec![0.0_f32; dim];
4158
+ v[i % dim] = 1.0;
4159
+ v[(i + 1) % dim] = 0.5;
4160
+ db.upsert(format!("doc{i}"), v, Metadata::new())
4161
+ .expect("upsert");
4162
+ }
4163
+
4164
+ // Enable scalar quantization
4165
+ db.enable_quantization(QuantizationConfig::Scalar(ScalarQuantizationConfig {
4166
+ rescore_multiplier: 5,
4167
+ }))
4168
+ .expect("enable quant");
4169
+
4170
+ assert!(db.is_quantized());
4171
+
4172
+ // Search should work with quantization
4173
+ let query = {
4174
+ let mut q = vec![0.0_f32; dim];
4175
+ q[0] = 1.0;
4176
+ q
4177
+ };
4178
+ let results = db
4179
+ .search(
4180
+ &query,
4181
+ SearchOptions {
4182
+ top_k: 5,
4183
+ filter: None,
4184
+ },
4185
+ )
4186
+ .expect("search");
4187
+ assert!(!results.is_empty());
4188
+ // The most similar vector (doc0 has [1,0.5,0,...]) should be first
4189
+ assert_eq!(results[0].id, "doc0");
4190
+ }
4191
+
4192
+ // Reopen and verify quantization persists
4193
+ {
4194
+ let db = Database::open(&path).expect("reopen");
4195
+ assert!(db.is_quantized());
4196
+ assert!(matches!(
4197
+ db.quantization_config(),
4198
+ Some(QuantizationConfig::Scalar(_))
4199
+ ));
4200
+
4201
+ let query = {
4202
+ let mut q = vec![0.0_f32; dim];
4203
+ q[0] = 1.0;
4204
+ q
4205
+ };
4206
+ let results = db
4207
+ .search(
4208
+ &query,
4209
+ SearchOptions {
4210
+ top_k: 5,
4211
+ filter: None,
4212
+ },
4213
+ )
4214
+ .expect("search after reopen");
4215
+ assert!(!results.is_empty());
4216
+ assert_eq!(results[0].id, "doc0");
4217
+ }
4218
+
4219
+ cleanup(&path);
4220
+ }
4221
+
4222
+ #[test]
4223
+ fn binary_quantization_enables_search() {
4224
+ use super::quantization::{BinaryQuantizationConfig, QuantizationConfig};
4225
+
4226
+ let path = temp_file("quant-binary");
4227
+ let dim = 64;
4228
+
4229
+ let mut db = Database::create(&path, dim).expect("create");
4230
+ for i in 0..100 {
4231
+ let mut v = vec![0.0_f32; dim];
4232
+ // Set some positive dimensions for the binary representation
4233
+ for j in 0..dim {
4234
+ v[j] = if (i + j) % 3 == 0 { 1.0 } else { -1.0 };
4235
+ }
4236
+ db.upsert(format!("doc{i}"), v, Metadata::new())
4237
+ .expect("upsert");
4238
+ }
4239
+
4240
+ db.enable_quantization(QuantizationConfig::Binary(BinaryQuantizationConfig {
4241
+ rescore_multiplier: 10,
4242
+ }))
4243
+ .expect("enable quant");
4244
+
4245
+ assert!(db.is_quantized());
4246
+
4247
+ // Search: query matches doc0's pattern
4248
+ let query: Vec<f32> = (0..dim)
4249
+ .map(|j| if j % 3 == 0 { 1.0 } else { -1.0 })
4250
+ .collect();
4251
+ let results = db
4252
+ .search(
4253
+ &query,
4254
+ SearchOptions {
4255
+ top_k: 5,
4256
+ filter: None,
4257
+ },
4258
+ )
4259
+ .expect("search");
4260
+ assert!(!results.is_empty());
4261
+ // doc0 should be the best match (identical pattern)
4262
+ assert_eq!(results[0].id, "doc0");
4263
+
4264
+ cleanup(&path);
4265
+ }
4266
+
4267
+ #[test]
4268
+ fn product_quantization_enables_search() {
4269
+ use super::quantization::{ProductQuantizationConfig, QuantizationConfig};
4270
+
4271
+ let path = temp_file("quant-pq");
4272
+ let dim = 32;
4273
+
4274
+ let mut db = Database::create(&path, dim).expect("create");
4275
+ for i in 0..100 {
4276
+ let v: Vec<f32> = (0..dim)
4277
+ .map(|j| ((i * 7 + j * 13) % 100) as f32 / 100.0)
4278
+ .collect();
4279
+ db.upsert(format!("doc{i}"), v, Metadata::new())
4280
+ .expect("upsert");
4281
+ }
4282
+
4283
+ db.enable_quantization(QuantizationConfig::Product(ProductQuantizationConfig {
4284
+ num_sub_vectors: 4,
4285
+ num_centroids: 16,
4286
+ training_iterations: 5,
4287
+ rescore_multiplier: 10,
4288
+ }))
4289
+ .expect("enable quant");
4290
+
4291
+ assert!(db.is_quantized());
4292
+
4293
+ // Search with the same vector as doc0
4294
+ let query: Vec<f32> = (0..dim).map(|j| (j * 13 % 100) as f32 / 100.0).collect();
4295
+ let results = db
4296
+ .search(
4297
+ &query,
4298
+ SearchOptions {
4299
+ top_k: 5,
4300
+ filter: None,
4301
+ },
4302
+ )
4303
+ .expect("search");
4304
+ assert!(!results.is_empty());
4305
+ assert_eq!(results[0].id, "doc0");
4306
+
4307
+ cleanup(&path);
4308
+ }
4309
+
4310
+ #[test]
4311
+ fn disable_quantization_removes_sidecar() {
4312
+ use super::quantization::{QuantizationConfig, ScalarQuantizationConfig};
4313
+
4314
+ let path = temp_file("quant-disable");
4315
+ let dim = 8;
4316
+
4317
+ let mut db = Database::create(&path, dim).expect("create");
4318
+ for i in 0..10 {
4319
+ let v: Vec<f32> = (0..dim).map(|j| (i + j) as f32).collect();
4320
+ db.upsert(format!("doc{i}"), v, Metadata::new())
4321
+ .expect("upsert");
4322
+ }
4323
+
4324
+ db.enable_quantization(QuantizationConfig::Scalar(
4325
+ ScalarQuantizationConfig::default(),
4326
+ ))
4327
+ .expect("enable");
4328
+ assert!(db.is_quantized());
4329
+
4330
+ // Verify sidecar exists
4331
+ let quant_path = {
4332
+ let mut p = path.as_os_str().to_os_string();
4333
+ p.push(".quant");
4334
+ PathBuf::from(p)
4335
+ };
4336
+ assert!(quant_path.exists());
4337
+
4338
+ db.disable_quantization().expect("disable");
4339
+ assert!(!db.is_quantized());
4340
+ assert!(!quant_path.exists());
4341
+
4342
+ cleanup(&path);
4343
+ }
4344
+
4345
+ #[test]
4346
+ fn quantization_empty_database_returns_error() {
4347
+ use super::quantization::{QuantizationConfig, ScalarQuantizationConfig};
4348
+
4349
+ let path = temp_file("quant-empty");
4350
+ let mut db = Database::create(&path, 4).expect("create");
4351
+
4352
+ let result = db.enable_quantization(QuantizationConfig::Scalar(
4353
+ ScalarQuantizationConfig::default(),
4354
+ ));
4355
+ assert!(result.is_err());
4356
+ assert!(!db.is_quantized());
4357
+
4358
+ cleanup(&path);
3945
4359
  }
3946
4360
  }