vectlite 0.1.12 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -578,6 +578,330 @@ impl ProductQuantizer {
578
578
  }
579
579
  }
580
580
 
581
+ // ---------------------------------------------------------------------------
582
+ // Two-Bit Quantization (ColBERTv2-style)
583
+ // ---------------------------------------------------------------------------
584
+
585
+ /// Configuration for 2-bit multi-vector quantization (ColBERTv2-style).
586
+ #[derive(Clone, Debug, PartialEq)]
587
+ pub struct TwoBitQuantizationConfig {
588
+ /// Number of top candidate docs from quantized search to rescore with
589
+ /// exact float32 MaxSim. Default: 4x top_k (minimum 50).
590
+ pub rescore_multiplier: usize,
591
+ }
592
+
593
+ impl Default for TwoBitQuantizationConfig {
594
+ fn default() -> Self {
595
+ Self {
596
+ rescore_multiplier: 4,
597
+ }
598
+ }
599
+ }
600
+
601
+ /// Two-bit quantizer: maps each dimension to 2 bits (4 levels) using
602
+ /// per-dimension quartile boundaries. ~16x compression vs float32.
603
+ /// Designed for ColBERT-style token-level vectors.
604
+ #[derive(Clone, Debug)]
605
+ pub struct TwoBitQuantizer {
606
+ pub dimension: usize,
607
+ /// Per-dimension boundary values: [q25, q50, q75] for each dimension.
608
+ /// Shape: dimension * 3.
609
+ pub boundaries: Vec<f32>,
610
+ /// Quantized codes: 2 bits per dimension, packed into bytes.
611
+ /// Each vector uses ceil(dimension / 4) bytes.
612
+ pub codes: Vec<u8>,
613
+ /// Number of quantized vectors.
614
+ pub count: usize,
615
+ /// Bytes per quantized vector.
616
+ pub bytes_per_vector: usize,
617
+ pub config: TwoBitQuantizationConfig,
618
+ }
619
+
620
+ impl TwoBitQuantizer {
621
+ /// Train a 2-bit quantizer by computing per-dimension quartiles.
622
+ pub fn train(
623
+ vectors: &[&[f32]],
624
+ dimension: usize,
625
+ config: TwoBitQuantizationConfig,
626
+ ) -> Self {
627
+ assert!(!vectors.is_empty(), "need at least one vector to train");
628
+
629
+ // Collect values per dimension and compute quartile boundaries
630
+ let mut boundaries = Vec::with_capacity(dimension * 3);
631
+ for d in 0..dimension {
632
+ let mut values: Vec<f32> = vectors.iter().map(|v| v[d]).collect();
633
+ values.sort_unstable_by(|a, b| a.total_cmp(b));
634
+ let n = values.len();
635
+ let q25 = values[n / 4];
636
+ let q50 = values[n / 2];
637
+ let q75 = values[(3 * n) / 4];
638
+ boundaries.push(q25);
639
+ boundaries.push(q50);
640
+ boundaries.push(q75);
641
+ }
642
+
643
+ let bytes_per_vector = (dimension + 3) / 4;
644
+ let mut codes = Vec::with_capacity(vectors.len() * bytes_per_vector);
645
+ for vector in vectors {
646
+ codes.extend_from_slice(&quantize_two_bit(vector, &boundaries, bytes_per_vector));
647
+ }
648
+
649
+ Self {
650
+ dimension,
651
+ boundaries,
652
+ codes,
653
+ count: vectors.len(),
654
+ bytes_per_vector,
655
+ config,
656
+ }
657
+ }
658
+
659
+ /// Quantize a single vector to 2-bit codes.
660
+ pub fn quantize_vector(&self, vector: &[f32]) -> Vec<u8> {
661
+ quantize_two_bit(vector, &self.boundaries, self.bytes_per_vector)
662
+ }
663
+
664
+ /// Compute approximate dot product between a 2-bit quantized query and
665
+ /// a stored quantized vector. Returns a score where higher = more similar.
666
+ pub fn approx_dot(&self, query_codes: &[u8], idx: usize) -> i32 {
667
+ let offset = idx * self.bytes_per_vector;
668
+ let stored = &self.codes[offset..offset + self.bytes_per_vector];
669
+ two_bit_approx_dot(query_codes, stored, self.dimension)
670
+ }
671
+
672
+ /// Search for top-k candidates using approximate 2-bit dot products.
673
+ /// Returns (index, approx_score) pairs sorted best-first.
674
+ pub fn search(&self, query: &[f32], top_k: usize) -> Vec<(usize, i32)> {
675
+ let rescore_count = (top_k * self.config.rescore_multiplier)
676
+ .max(50)
677
+ .min(self.count);
678
+ let query_codes = self.quantize_vector(query);
679
+
680
+ let mut scores: Vec<(usize, i32)> = (0..self.count)
681
+ .map(|idx| (idx, self.approx_dot(&query_codes, idx)))
682
+ .collect();
683
+
684
+ scores.sort_unstable_by(|a, b| b.1.cmp(&a.1));
685
+ scores.truncate(rescore_count);
686
+ scores
687
+ }
688
+
689
+ /// Rebuild codes from vectors.
690
+ pub fn rebuild_codes(&mut self, vectors: &[&[f32]]) {
691
+ self.codes.clear();
692
+ self.codes.reserve(vectors.len() * self.bytes_per_vector);
693
+ for vector in vectors {
694
+ self.codes
695
+ .extend_from_slice(&quantize_two_bit(vector, &self.boundaries, self.bytes_per_vector));
696
+ }
697
+ self.count = vectors.len();
698
+ }
699
+
700
+ /// Serialize parameters (boundaries only, codes rebuilt on load).
701
+ pub fn write_params(&self, writer: &mut impl Write) -> std::io::Result<()> {
702
+ // Tag byte: 4 = two_bit
703
+ writer.write_all(&[4u8])?;
704
+ write_usize(writer, self.dimension)?;
705
+ write_usize(writer, self.config.rescore_multiplier)?;
706
+ // Write boundaries (dimension * 3 floats)
707
+ for &b in &self.boundaries {
708
+ writer.write_all(&b.to_le_bytes())?;
709
+ }
710
+ Ok(())
711
+ }
712
+
713
+ /// Deserialize parameters.
714
+ pub fn read_params(reader: &mut impl Read) -> std::io::Result<Self> {
715
+ let dimension = read_usize(reader)?;
716
+ let rescore_multiplier = read_usize(reader)?;
717
+ let mut boundaries = vec![0.0_f32; dimension * 3];
718
+ for b in &mut boundaries {
719
+ let mut buf = [0u8; 4];
720
+ reader.read_exact(&mut buf)?;
721
+ *b = f32::from_le_bytes(buf);
722
+ }
723
+ let bytes_per_vector = (dimension + 3) / 4;
724
+ Ok(Self {
725
+ dimension,
726
+ boundaries,
727
+ codes: Vec::new(),
728
+ count: 0,
729
+ bytes_per_vector,
730
+ config: TwoBitQuantizationConfig { rescore_multiplier },
731
+ })
732
+ }
733
+ }
734
+
735
+ // ---------------------------------------------------------------------------
736
+ // Multi-vector quantized index (for ColBERT token-level search)
737
+ // ---------------------------------------------------------------------------
738
+
739
+ /// Configuration for multi-vector quantization.
740
+ #[derive(Clone, Debug, PartialEq)]
741
+ pub enum MultiVectorQuantizationConfig {
742
+ TwoBit(TwoBitQuantizationConfig),
743
+ }
744
+
745
+ /// A quantized index for multi-vector (late interaction) search.
746
+ /// Stores all token vectors from all documents in a flat quantized array,
747
+ /// with a mapping from document index to token range.
748
+ #[derive(Clone, Debug)]
749
+ pub struct MultiVectorQuantizedIndex {
750
+ pub quantizer: TwoBitQuantizer,
751
+ /// For each document: (start_index, count) into the quantized vector array.
752
+ pub doc_ranges: Vec<(usize, usize)>,
753
+ }
754
+
755
+ impl MultiVectorQuantizedIndex {
756
+ /// Build a multi-vector quantized index from per-document token vectors.
757
+ /// `doc_token_vectors[i]` is a slice of token-level vectors for document i.
758
+ pub fn build(
759
+ doc_token_vectors: &[&[Vec<f32>]],
760
+ token_dimension: usize,
761
+ config: &MultiVectorQuantizationConfig,
762
+ ) -> Self {
763
+ // Flatten all token vectors for training
764
+ let all_tokens: Vec<&[f32]> = doc_token_vectors
765
+ .iter()
766
+ .flat_map(|tokens| tokens.iter().map(|v| v.as_slice()))
767
+ .collect();
768
+
769
+ let MultiVectorQuantizationConfig::TwoBit(cfg) = config;
770
+
771
+ let quantizer = if all_tokens.is_empty() {
772
+ // Empty case: create minimal quantizer
773
+ TwoBitQuantizer {
774
+ dimension: token_dimension,
775
+ boundaries: vec![0.0; token_dimension * 3],
776
+ codes: Vec::new(),
777
+ count: 0,
778
+ bytes_per_vector: (token_dimension + 3) / 4,
779
+ config: cfg.clone(),
780
+ }
781
+ } else {
782
+ TwoBitQuantizer::train(&all_tokens, token_dimension, cfg.clone())
783
+ };
784
+
785
+ // Build doc_ranges
786
+ let mut doc_ranges = Vec::with_capacity(doc_token_vectors.len());
787
+ let mut offset = 0;
788
+ for tokens in doc_token_vectors {
789
+ doc_ranges.push((offset, tokens.len()));
790
+ offset += tokens.len();
791
+ }
792
+
793
+ Self {
794
+ quantizer,
795
+ doc_ranges,
796
+ }
797
+ }
798
+
799
+ /// Compute approximate MaxSim score for a document given query token codes.
800
+ /// For each query token, finds the max approximate dot with any document token.
801
+ pub fn approx_maxsim(&self, query_codes: &[Vec<u8>], doc_idx: usize) -> i32 {
802
+ let (start, count) = self.doc_ranges[doc_idx];
803
+ if count == 0 || query_codes.is_empty() {
804
+ return 0;
805
+ }
806
+ let mut total = 0i32;
807
+ for q_code in query_codes {
808
+ let mut best = i32::MIN;
809
+ for i in start..start + count {
810
+ let score = two_bit_approx_dot(
811
+ q_code,
812
+ &self.quantizer.codes[i * self.quantizer.bytes_per_vector
813
+ ..(i + 1) * self.quantizer.bytes_per_vector],
814
+ self.quantizer.dimension,
815
+ );
816
+ if score > best {
817
+ best = score;
818
+ }
819
+ }
820
+ total += best;
821
+ }
822
+ total
823
+ }
824
+
825
+ /// Search: returns candidate document indices sorted by approximate MaxSim.
826
+ pub fn search(&self, query_tokens: &[&[f32]], top_k: usize) -> Vec<usize> {
827
+ let rescore_count = (top_k * self.quantizer.config.rescore_multiplier)
828
+ .max(50)
829
+ .min(self.doc_ranges.len());
830
+ if query_tokens.is_empty() || self.doc_ranges.is_empty() {
831
+ return Vec::new();
832
+ }
833
+
834
+ let query_codes: Vec<Vec<u8>> = query_tokens
835
+ .iter()
836
+ .map(|t| self.quantizer.quantize_vector(t))
837
+ .collect();
838
+
839
+ let mut scores: Vec<(usize, i32)> = (0..self.doc_ranges.len())
840
+ .map(|doc_idx| (doc_idx, self.approx_maxsim(&query_codes, doc_idx)))
841
+ .collect();
842
+
843
+ scores.sort_unstable_by(|a, b| b.1.cmp(&a.1));
844
+ scores.truncate(rescore_count);
845
+ scores.into_iter().map(|(idx, _)| idx).collect()
846
+ }
847
+
848
+ /// Rebuild from document token vectors (after loading parameters from disk).
849
+ pub fn rebuild(
850
+ &mut self,
851
+ doc_token_vectors: &[&[Vec<f32>]],
852
+ ) {
853
+ let all_tokens: Vec<&[f32]> = doc_token_vectors
854
+ .iter()
855
+ .flat_map(|tokens| tokens.iter().map(|v| v.as_slice()))
856
+ .collect();
857
+ self.quantizer.rebuild_codes(&all_tokens);
858
+
859
+ self.doc_ranges.clear();
860
+ let mut offset = 0;
861
+ for tokens in doc_token_vectors {
862
+ self.doc_ranges.push((offset, tokens.len()));
863
+ offset += tokens.len();
864
+ }
865
+ }
866
+
867
+ /// Serialize parameters.
868
+ pub fn write_params(&self, writer: &mut impl Write) -> std::io::Result<()> {
869
+ self.quantizer.write_params(writer)?;
870
+ // Write doc_ranges
871
+ write_usize(writer, self.doc_ranges.len())?;
872
+ for &(start, count) in &self.doc_ranges {
873
+ write_usize(writer, start)?;
874
+ write_usize(writer, count)?;
875
+ }
876
+ Ok(())
877
+ }
878
+
879
+ /// Deserialize parameters.
880
+ pub fn read_params(reader: &mut impl Read) -> std::io::Result<Self> {
881
+ // Consume the tag byte written by TwoBitQuantizer::write_params
882
+ let mut tag = [0u8; 1];
883
+ reader.read_exact(&mut tag)?;
884
+ if tag[0] != 4 {
885
+ return Err(std::io::Error::new(
886
+ std::io::ErrorKind::InvalidData,
887
+ format!("expected two_bit tag (4), got {}", tag[0]),
888
+ ));
889
+ }
890
+ let quantizer = TwoBitQuantizer::read_params(reader)?;
891
+ let num_docs = read_usize(reader)?;
892
+ let mut doc_ranges = Vec::with_capacity(num_docs);
893
+ for _ in 0..num_docs {
894
+ let start = read_usize(reader)?;
895
+ let count = read_usize(reader)?;
896
+ doc_ranges.push((start, count));
897
+ }
898
+ Ok(Self {
899
+ quantizer,
900
+ doc_ranges,
901
+ })
902
+ }
903
+ }
904
+
581
905
  // ---------------------------------------------------------------------------
582
906
  // Unified quantization index
583
907
  // ---------------------------------------------------------------------------
@@ -740,6 +1064,45 @@ fn hamming_distance(a: &[u8], b: &[u8]) -> u32 {
740
1064
  dist
741
1065
  }
742
1066
 
1067
+ /// Quantize a float vector to 2-bit codes (4 levels per dimension).
1068
+ /// Level mapping: val <= q25 → 0, val <= q50 → 1, val <= q75 → 2, else → 3.
1069
+ /// Packed 4 dimensions per byte (least-significant bits first).
1070
+ fn quantize_two_bit(vector: &[f32], boundaries: &[f32], bytes_per_vector: usize) -> Vec<u8> {
1071
+ let mut result = vec![0u8; bytes_per_vector];
1072
+ for (i, &val) in vector.iter().enumerate() {
1073
+ let b_offset = i * 3;
1074
+ let level = if val <= boundaries[b_offset] {
1075
+ 0u8
1076
+ } else if val <= boundaries[b_offset + 1] {
1077
+ 1u8
1078
+ } else if val <= boundaries[b_offset + 2] {
1079
+ 2u8
1080
+ } else {
1081
+ 3u8
1082
+ };
1083
+ let byte_idx = i / 4;
1084
+ let bit_offset = (i % 4) * 2;
1085
+ result[byte_idx] |= level << bit_offset;
1086
+ }
1087
+ result
1088
+ }
1089
+
1090
+ /// Approximate dot product between two 2-bit quantized vectors.
1091
+ /// Uses level values 0,1,2,3 as proxies for the original float magnitudes.
1092
+ /// Higher score = more similar.
1093
+ #[inline]
1094
+ fn two_bit_approx_dot(a: &[u8], b: &[u8], dimension: usize) -> i32 {
1095
+ let mut sum = 0i32;
1096
+ for i in 0..dimension {
1097
+ let byte_idx = i / 4;
1098
+ let bit_offset = (i % 4) * 2;
1099
+ let a_level = ((a[byte_idx] >> bit_offset) & 0x03) as i32;
1100
+ let b_level = ((b[byte_idx] >> bit_offset) & 0x03) as i32;
1101
+ sum += a_level * b_level;
1102
+ }
1103
+ sum
1104
+ }
1105
+
743
1106
  /// Squared L2 distance between two vectors.
744
1107
  #[inline]
745
1108
  fn l2_distance_sq(a: &[f32], b: &[f32]) -> f32 {
@@ -1084,4 +1447,141 @@ mod tests {
1084
1447
  // Bit 7: 0.9 > 0 -> 1
1085
1448
  assert_eq!(binary[0], 0b10100101);
1086
1449
  }
1450
+
1451
+ #[test]
1452
+ fn two_bit_quantization_basic() {
1453
+ let vectors = random_vectors(100, 64, 42);
1454
+ let refs: Vec<&[f32]> = vectors.iter().map(Vec::as_slice).collect();
1455
+
1456
+ let config = TwoBitQuantizationConfig {
1457
+ rescore_multiplier: 4,
1458
+ };
1459
+ let quantizer = TwoBitQuantizer::train(&refs, 64, config);
1460
+
1461
+ assert_eq!(quantizer.dimension, 64);
1462
+ assert_eq!(quantizer.count, 100);
1463
+ assert_eq!(quantizer.bytes_per_vector, 16); // 64 dims * 2 bits / 8 = 16
1464
+ assert_eq!(quantizer.boundaries.len(), 64 * 3);
1465
+
1466
+ // Search should return candidates including the query itself
1467
+ let results = quantizer.search(&vectors[0], 10);
1468
+ assert!(!results.is_empty());
1469
+ assert!(results.iter().take(5).any(|(idx, _)| *idx == 0));
1470
+ }
1471
+
1472
+ #[test]
1473
+ fn two_bit_quantize_and_approx_dot() {
1474
+ // Manually test quantization of a small vector
1475
+ let boundaries = vec![
1476
+ -0.5, 0.0, 0.5, // dim 0: quartiles
1477
+ -0.5, 0.0, 0.5, // dim 1
1478
+ -0.5, 0.0, 0.5, // dim 2
1479
+ -0.5, 0.0, 0.5, // dim 3
1480
+ ];
1481
+ let bytes_per_vector = 1; // 4 dims * 2 bits = 8 bits = 1 byte
1482
+
1483
+ // Vector with values that map to different quantization levels
1484
+ let v1 = [-1.0, -0.25, 0.25, 1.0]; // levels: 0, 1, 2, 3
1485
+ let v2 = [-1.0, -0.25, 0.25, 1.0]; // levels: 0, 1, 2, 3
1486
+
1487
+ let q1 = quantize_two_bit(&v1, &boundaries, bytes_per_vector);
1488
+ let q2 = quantize_two_bit(&v2, &boundaries, bytes_per_vector);
1489
+
1490
+ // Same vectors should have the maximum approx dot product
1491
+ let dot = two_bit_approx_dot(&q1, &q2, 4);
1492
+ assert!(dot > 0); // 0*0 + 1*1 + 2*2 + 3*3 = 0 + 1 + 4 + 9 = 14
1493
+ assert_eq!(dot, 14);
1494
+ }
1495
+
1496
+ #[test]
1497
+ fn two_bit_serialization_roundtrip() {
1498
+ use std::io::Read;
1499
+
1500
+ let vectors = random_vectors(50, 32, 99);
1501
+ let refs: Vec<&[f32]> = vectors.iter().map(Vec::as_slice).collect();
1502
+
1503
+ let config = TwoBitQuantizationConfig {
1504
+ rescore_multiplier: 6,
1505
+ };
1506
+ let original = TwoBitQuantizer::train(&refs, 32, config);
1507
+
1508
+ let mut buf = Vec::new();
1509
+ original.write_params(&mut buf).unwrap();
1510
+
1511
+ let mut cursor = std::io::Cursor::new(&buf);
1512
+ // Consume the tag byte written by write_params
1513
+ let mut tag = [0u8; 1];
1514
+ cursor.read_exact(&mut tag).unwrap();
1515
+ assert_eq!(tag[0], 4);
1516
+ let restored = TwoBitQuantizer::read_params(&mut cursor).unwrap();
1517
+
1518
+ assert_eq!(original.dimension, restored.dimension);
1519
+ assert_eq!(original.boundaries.len(), restored.boundaries.len());
1520
+ for (a, b) in original.boundaries.iter().zip(restored.boundaries.iter()) {
1521
+ assert!((a - b).abs() < 1e-6);
1522
+ }
1523
+ assert_eq!(original.config.rescore_multiplier, restored.config.rescore_multiplier);
1524
+ }
1525
+
1526
+ #[test]
1527
+ fn multi_vector_quantized_index_basic() {
1528
+ // Create 5 "documents", each with 3-5 token vectors of dimension 16
1529
+ let mut doc_tokens: Vec<Vec<Vec<f32>>> = Vec::new();
1530
+ for doc_idx in 0..5 {
1531
+ let n_tokens = 3 + (doc_idx % 3); // 3, 4, 5, 3, 4 tokens
1532
+ let tokens = random_vectors(n_tokens, 16, 100 + doc_idx as u64);
1533
+ doc_tokens.push(tokens);
1534
+ }
1535
+
1536
+ let doc_refs: Vec<&[Vec<f32>]> = doc_tokens.iter().map(|v| v.as_slice()).collect();
1537
+ let config = MultiVectorQuantizationConfig::TwoBit(TwoBitQuantizationConfig {
1538
+ rescore_multiplier: 4,
1539
+ });
1540
+
1541
+ let index = MultiVectorQuantizedIndex::build(&doc_refs, 16, &config);
1542
+
1543
+ assert_eq!(index.doc_ranges.len(), 5);
1544
+ // Total token count: 3+4+5+3+4 = 19
1545
+ let total_tokens: usize = index.doc_ranges.iter().map(|(_, count)| count).sum();
1546
+ assert_eq!(total_tokens, 19);
1547
+
1548
+ // Search with a query that matches document 0's tokens
1549
+ let query_tokens: Vec<&[f32]> = doc_tokens[0].iter().map(Vec::as_slice).collect();
1550
+ let results = index.search(&query_tokens, 3);
1551
+ assert!(!results.is_empty());
1552
+ // Document 0 should be among top results (its own tokens should
1553
+ // score highest MaxSim against themselves)
1554
+ assert!(results.iter().take(3).any(|&idx| idx == 0));
1555
+ }
1556
+
1557
+ #[test]
1558
+ fn multi_vector_quantized_index_serialization_roundtrip() {
1559
+ let mut doc_tokens: Vec<Vec<Vec<f32>>> = Vec::new();
1560
+ for i in 0..3 {
1561
+ doc_tokens.push(random_vectors(4, 8, 200 + i));
1562
+ }
1563
+ let doc_refs: Vec<&[Vec<f32>]> = doc_tokens.iter().map(|v| v.as_slice()).collect();
1564
+
1565
+ let config = MultiVectorQuantizationConfig::TwoBit(TwoBitQuantizationConfig {
1566
+ rescore_multiplier: 2,
1567
+ });
1568
+ let original = MultiVectorQuantizedIndex::build(&doc_refs, 8, &config);
1569
+
1570
+ let mut buf = Vec::new();
1571
+ original.write_params(&mut buf).unwrap();
1572
+
1573
+ let mut cursor = std::io::Cursor::new(&buf);
1574
+ let restored = MultiVectorQuantizedIndex::read_params(&mut cursor).unwrap();
1575
+
1576
+ assert_eq!(original.doc_ranges, restored.doc_ranges);
1577
+ assert_eq!(original.quantizer.dimension, restored.quantizer.dimension);
1578
+ assert_eq!(
1579
+ original.quantizer.boundaries.len(),
1580
+ restored.quantizer.boundaries.len()
1581
+ );
1582
+ assert_eq!(
1583
+ original.quantizer.config.rescore_multiplier,
1584
+ restored.quantizer.config.rescore_multiplier,
1585
+ );
1586
+ }
1087
1587
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "vectlite",
3
- "version": "0.1.12",
3
+ "version": "0.9.0",
4
4
  "description": "Embedded vector store for local-first AI applications.",
5
5
  "main": "index.js",
6
6
  "types": "index.d.ts",
Binary file