vectlite 0.9.0 → 0.9.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -14,6 +14,7 @@ use simsimd::SpatialSimilarity;
14
14
 
15
15
  use quantization::{
16
16
  MultiVectorQuantizationConfig, MultiVectorQuantizedIndex, QuantizationConfig, QuantizedIndex,
17
+ valid_product_num_sub_vectors, validate_quantization_config,
17
18
  };
18
19
 
19
20
  const MAGIC: &[u8; 4] = b"VDB1";
@@ -233,7 +234,10 @@ fn scalar_manhattan_distance(left: &[f32], right: &[f32]) -> f32 {
233
234
  #[derive(Clone, Debug)]
234
235
  enum WalOp {
235
236
  Upsert(Record),
236
- Delete { namespace: String, id: String },
237
+ Delete {
238
+ namespace: String,
239
+ id: String,
240
+ },
237
241
  UpdateMetadata {
238
242
  namespace: String,
239
243
  id: String,
@@ -1147,7 +1151,10 @@ impl NumericIndex {
1147
1151
  /// Return keys where value > threshold.
1148
1152
  fn range_gt(&self, threshold: f64) -> HashSet<RecordKey> {
1149
1153
  let mut result = HashSet::new();
1150
- for (_, set) in self.tree.range((std::ops::Bound::Excluded(OrdF64(threshold)), std::ops::Bound::Unbounded)) {
1154
+ for (_, set) in self.tree.range((
1155
+ std::ops::Bound::Excluded(OrdF64(threshold)),
1156
+ std::ops::Bound::Unbounded,
1157
+ )) {
1151
1158
  result.extend(set.iter().cloned());
1152
1159
  }
1153
1160
  result
@@ -1255,9 +1262,9 @@ impl AnnHnsw {
1255
1262
  AnnHnsw::DotProduct(h) => h.file_dump(directory, basename),
1256
1263
  AnnHnsw::Manhattan(h) => h.file_dump(directory, basename),
1257
1264
  };
1258
- result
1259
- .map(|_| ())
1260
- .map_err(|err| VectLiteError::InvalidFormat(format!("failed to persist ANN index: {err}")))
1265
+ result.map(|_| ()).map_err(|err| {
1266
+ VectLiteError::InvalidFormat(format!("failed to persist ANN index: {err}"))
1267
+ })
1261
1268
  }
1262
1269
  }
1263
1270
 
@@ -1514,9 +1521,7 @@ impl Database {
1514
1521
  .filter_map(|key| self.records.get(key))
1515
1522
  .filter(|record| {
1516
1523
  !record.is_expired_at(now)
1517
- && filter
1518
- .map(|f| f.matches(&record.metadata))
1519
- .unwrap_or(true)
1524
+ && filter.map(|f| f.matches(&record.metadata)).unwrap_or(true)
1520
1525
  })
1521
1526
  .count()
1522
1527
  } else {
@@ -1563,9 +1568,7 @@ impl Database {
1563
1568
  .filter_map(|key| self.records.get(*key))
1564
1569
  .filter(|record| {
1565
1570
  !record.is_expired_at(now)
1566
- && filter
1567
- .map(|f| f.matches(&record.metadata))
1568
- .unwrap_or(true)
1571
+ && filter.map(|f| f.matches(&record.metadata)).unwrap_or(true)
1569
1572
  })
1570
1573
  .skip(offset)
1571
1574
  .take(if limit == 0 { usize::MAX } else { limit })
@@ -1659,9 +1662,7 @@ impl Database {
1659
1662
 
1660
1663
  let next_cursor = if results.len() > limit {
1661
1664
  results.pop(); // remove the extra
1662
- results
1663
- .last()
1664
- .map(|r| format!("{}\0{}", r.namespace, r.id))
1665
+ results.last().map(|r| format!("{}\0{}", r.namespace, r.id))
1665
1666
  } else {
1666
1667
  None
1667
1668
  };
@@ -1710,11 +1711,7 @@ impl Database {
1710
1711
  ///
1711
1712
  /// Returns `true` if the record exists and was updated, `false` if the
1712
1713
  /// record was not found (no error is raised).
1713
- pub fn update_metadata(
1714
- &mut self,
1715
- id: impl Into<String>,
1716
- metadata: Metadata,
1717
- ) -> Result<bool> {
1714
+ pub fn update_metadata(&mut self, id: impl Into<String>, metadata: Metadata) -> Result<bool> {
1718
1715
  self.update_metadata_in_namespace(DEFAULT_NAMESPACE, id, metadata)
1719
1716
  }
1720
1717
 
@@ -1841,7 +1838,11 @@ impl Database {
1841
1838
  PayloadIndexType::Numeric => {
1842
1839
  let mut num = NumericIndex::default();
1843
1840
  for (key, record) in &self.records {
1844
- if let Some(val) = record.metadata.get(&field).and_then(MetadataValue::as_number) {
1841
+ if let Some(val) = record
1842
+ .metadata
1843
+ .get(&field)
1844
+ .and_then(MetadataValue::as_number)
1845
+ {
1845
1846
  num.insert(val, key.clone());
1846
1847
  }
1847
1848
  }
@@ -2290,6 +2291,18 @@ impl Database {
2290
2291
  "search requires a dense query, a sparse query, or both".to_owned(),
2291
2292
  ));
2292
2293
  }
2294
+ // Reject zero-norm query vectors for metrics where similarity is undefined.
2295
+ if let Some(query) = dense_query {
2296
+ if self.metric.is_similarity() {
2297
+ let norm_sq: f32 = query.iter().map(|v| v * v).sum();
2298
+ if norm_sq == 0.0 {
2299
+ return Err(VectLiteError::InvalidFormat(
2300
+ "query vector has zero norm; cosine/dot-product similarity is undefined"
2301
+ .to_owned(),
2302
+ ));
2303
+ }
2304
+ }
2305
+ }
2293
2306
  if let Some(mmr_lambda) = options.mmr_lambda {
2294
2307
  if !(0.0..=1.0).contains(&mmr_lambda) {
2295
2308
  return Err(VectLiteError::InvalidFormat(
@@ -2326,14 +2339,13 @@ impl Database {
2326
2339
  let dense_start = Instant::now();
2327
2340
  // Use quantized index for candidate selection if available (2-stage pipeline).
2328
2341
  // The quantized index operates on the default vector only and globally (not per-namespace).
2329
- let quantized_candidates =
2330
- if !matryoshka_truncated
2331
- && (vector_name.is_none() || vector_name == Some(DEFAULT_VECTOR_NAME))
2332
- {
2333
- dense_query.and_then(|query| self.quantized_candidate_keys(query, fetch_k))
2334
- } else {
2335
- None
2336
- };
2342
+ let quantized_candidates = if !matryoshka_truncated
2343
+ && (vector_name.is_none() || vector_name == Some(DEFAULT_VECTOR_NAME))
2344
+ {
2345
+ dense_query.and_then(|query| self.quantized_candidate_keys(query, fetch_k))
2346
+ } else {
2347
+ None
2348
+ };
2337
2349
  let ann_candidates = if quantized_candidates.is_some() {
2338
2350
  // Skip HNSW if quantized index provided candidates
2339
2351
  None
@@ -2363,13 +2375,18 @@ impl Database {
2363
2375
  };
2364
2376
 
2365
2377
  // Use payload indexes to narrow candidates when doing a full scan.
2366
- let payload_candidates = options.filter.as_ref().and_then(|f| {
2367
- self.payload_index_candidates(f, namespace)
2368
- });
2378
+ let payload_candidates = options
2379
+ .filter
2380
+ .as_ref()
2381
+ .and_then(|f| self.payload_index_candidates(f, namespace));
2369
2382
  let candidate_keys = match (candidate_keys, payload_candidates) {
2370
2383
  (Some(ck), Some(pc)) => {
2371
2384
  // Intersect ANN/sparse candidates with payload index candidates.
2372
- Some(ck.into_iter().filter(|k| pc.contains(k)).collect::<Vec<_>>())
2385
+ Some(
2386
+ ck.into_iter()
2387
+ .filter(|k| pc.contains(k))
2388
+ .collect::<Vec<_>>(),
2389
+ )
2373
2390
  }
2374
2391
  (None, Some(pc)) => {
2375
2392
  // No ANN candidates but payload index narrowed the set.
@@ -2403,8 +2420,14 @@ impl Database {
2403
2420
 
2404
2421
  if effective_dense_candidates.is_some() && results.len() < fetch_k {
2405
2422
  stats.exact_fallback = true;
2406
- results =
2407
- self.collect_results(dense_query, sparse_query, &options, namespace, None, effective_dimension);
2423
+ results = self.collect_results(
2424
+ dense_query,
2425
+ sparse_query,
2426
+ &options,
2427
+ namespace,
2428
+ None,
2429
+ effective_dimension,
2430
+ );
2408
2431
  stats.considered_count = results.len();
2409
2432
  }
2410
2433
 
@@ -2522,6 +2545,7 @@ impl Database {
2522
2545
  "cannot enable quantization on an empty database".to_owned(),
2523
2546
  ));
2524
2547
  }
2548
+ validate_quantization_config(&config, self.dimension)?;
2525
2549
  self.quantization_config = Some(config);
2526
2550
  self.rebuild_quantized_index();
2527
2551
  self.persist_quantization_params()?;
@@ -2552,6 +2576,11 @@ impl Database {
2552
2576
  self.quantization_config.as_ref()
2553
2577
  }
2554
2578
 
2579
+ /// Returns all valid Product Quantization `num_sub_vectors` values for this database.
2580
+ pub fn valid_num_sub_vectors(&self) -> Vec<usize> {
2581
+ valid_product_num_sub_vectors(self.dimension)
2582
+ }
2583
+
2555
2584
  /// Rebuild the quantized index from current records.
2556
2585
  fn rebuild_quantized_index(&mut self) {
2557
2586
  let config = match &self.quantization_config {
@@ -2635,7 +2664,7 @@ impl Database {
2635
2664
  return None;
2636
2665
  }
2637
2666
 
2638
- let candidate_indices = index.search_candidates(query, top_k);
2667
+ let candidate_indices = index.search_candidates_with_metric(query, top_k, self.metric);
2639
2668
  Some(
2640
2669
  candidate_indices
2641
2670
  .into_iter()
@@ -2657,7 +2686,11 @@ impl Database {
2657
2686
  multi_vectors: MultiVectors,
2658
2687
  ) -> Result<()> {
2659
2688
  self.upsert_multi_vectors_in_namespace(
2660
- DEFAULT_NAMESPACE, id, vector, metadata, multi_vectors,
2689
+ DEFAULT_NAMESPACE,
2690
+ id,
2691
+ vector,
2692
+ metadata,
2693
+ multi_vectors,
2661
2694
  )
2662
2695
  }
2663
2696
 
@@ -2716,10 +2749,8 @@ impl Database {
2716
2749
 
2717
2750
  // Try quantized multi-vector search first for candidate selection
2718
2751
  let query_refs: Vec<&[f32]> = query_tokens.iter().map(Vec::as_slice).collect();
2719
- let candidate_keys: Option<Vec<RecordKey>> = self
2720
- .multi_vector_quantized
2721
- .get(space)
2722
- .and_then(|index| {
2752
+ let candidate_keys: Option<Vec<RecordKey>> =
2753
+ self.multi_vector_quantized.get(space).and_then(|index| {
2723
2754
  let keys = self.multi_vector_quantized_keys.get(space)?;
2724
2755
  let candidate_indices = index.search(&query_refs, top_k);
2725
2756
  Some(
@@ -2740,9 +2771,7 @@ impl Database {
2740
2771
  let mut scored: Vec<(f32, &Record)> = record_iter
2741
2772
  .filter(|record| {
2742
2773
  !record.is_expired_at(now)
2743
- && namespace
2744
- .map(|ns| record.namespace == ns)
2745
- .unwrap_or(true)
2774
+ && namespace.map(|ns| record.namespace == ns).unwrap_or(true)
2746
2775
  && record.multi_vectors.contains_key(space)
2747
2776
  && options
2748
2777
  .filter
@@ -2840,14 +2869,9 @@ impl Database {
2840
2869
  return;
2841
2870
  }
2842
2871
 
2843
- let index = MultiVectorQuantizedIndex::build(
2844
- &doc_token_vectors,
2845
- token_dimension,
2846
- &config,
2847
- );
2872
+ let index = MultiVectorQuantizedIndex::build(&doc_token_vectors, token_dimension, &config);
2848
2873
 
2849
- self.multi_vector_quantized
2850
- .insert(space.to_owned(), index);
2874
+ self.multi_vector_quantized.insert(space.to_owned(), index);
2851
2875
  self.multi_vector_quantized_keys
2852
2876
  .insert(space.to_owned(), keys);
2853
2877
  }
@@ -2928,15 +2952,15 @@ impl Database {
2928
2952
 
2929
2953
  if !doc_token_vectors.is_empty() {
2930
2954
  index.rebuild(&doc_token_vectors);
2931
- let MultiVectorQuantizationConfig::TwoBit(ref cfg) = {
2932
- MultiVectorQuantizationConfig::TwoBit(index.quantizer.config.clone())
2933
- };
2934
- self.multi_vector_quantization_config
2935
- .insert(space.to_owned(), MultiVectorQuantizationConfig::TwoBit(cfg.clone()));
2955
+ let MultiVectorQuantizationConfig::TwoBit(ref cfg) =
2956
+ { MultiVectorQuantizationConfig::TwoBit(index.quantizer.config.clone()) };
2957
+ self.multi_vector_quantization_config.insert(
2958
+ space.to_owned(),
2959
+ MultiVectorQuantizationConfig::TwoBit(cfg.clone()),
2960
+ );
2936
2961
  self.multi_vector_quantized_keys
2937
2962
  .insert(space.to_owned(), keys);
2938
- self.multi_vector_quantized
2939
- .insert(space.to_owned(), index);
2963
+ self.multi_vector_quantized.insert(space.to_owned(), index);
2940
2964
  }
2941
2965
  }
2942
2966
  }
@@ -3020,7 +3044,11 @@ impl Database {
3020
3044
  PayloadIndexType::Numeric => {
3021
3045
  let mut num = NumericIndex::default();
3022
3046
  for (key, record) in &self.records {
3023
- if let Some(val) = record.metadata.get(field).and_then(MetadataValue::as_number) {
3047
+ if let Some(val) = record
3048
+ .metadata
3049
+ .get(field)
3050
+ .and_then(MetadataValue::as_number)
3051
+ {
3024
3052
  num.insert(val, key.clone());
3025
3053
  }
3026
3054
  }
@@ -3071,14 +3099,22 @@ impl Database {
3071
3099
  /// Use payload indexes to narrow down candidate keys for a filter.
3072
3100
  /// Returns `None` if no indexes can help with this filter (fallback to scan).
3073
3101
  /// Returns `Some(set)` with the set of record keys that *may* match the filter.
3074
- fn payload_index_candidates(&self, filter: &MetadataFilter, namespace: Option<&str>) -> Option<HashSet<RecordKey>> {
3102
+ fn payload_index_candidates(
3103
+ &self,
3104
+ filter: &MetadataFilter,
3105
+ namespace: Option<&str>,
3106
+ ) -> Option<HashSet<RecordKey>> {
3075
3107
  if self.payload_indexes.is_empty() {
3076
3108
  return None;
3077
3109
  }
3078
3110
  self.payload_index_candidates_inner(filter, namespace)
3079
3111
  }
3080
3112
 
3081
- fn payload_index_candidates_inner(&self, filter: &MetadataFilter, namespace: Option<&str>) -> Option<HashSet<RecordKey>> {
3113
+ fn payload_index_candidates_inner(
3114
+ &self,
3115
+ filter: &MetadataFilter,
3116
+ namespace: Option<&str>,
3117
+ ) -> Option<HashSet<RecordKey>> {
3082
3118
  match filter {
3083
3119
  MetadataFilter::Eq { key, value } => {
3084
3120
  // Try keyword index for string equality
@@ -3172,7 +3208,11 @@ impl Database {
3172
3208
  }
3173
3209
  }
3174
3210
 
3175
- fn filter_by_namespace(&self, keys: HashSet<RecordKey>, namespace: Option<&str>) -> HashSet<RecordKey> {
3211
+ fn filter_by_namespace(
3212
+ &self,
3213
+ keys: HashSet<RecordKey>,
3214
+ namespace: Option<&str>,
3215
+ ) -> HashSet<RecordKey> {
3176
3216
  match namespace {
3177
3217
  Some(ns) => keys.into_iter().filter(|(n, _)| n == ns).collect(),
3178
3218
  None => keys,
@@ -3357,7 +3397,9 @@ impl Database {
3357
3397
  WalOp::UpdateMetadata { .. } | WalOp::SetTtl { .. } => false,
3358
3398
  });
3359
3399
 
3360
- let metadata_only = ops.iter().all(|op| matches!(op, WalOp::UpdateMetadata { .. } | WalOp::SetTtl { .. }));
3400
+ let metadata_only = ops
3401
+ .iter()
3402
+ .all(|op| matches!(op, WalOp::UpdateMetadata { .. } | WalOp::SetTtl { .. }));
3361
3403
 
3362
3404
  self.append_wal_batch(&ops)?;
3363
3405
  self.apply_ops_in_memory(ops);
@@ -3721,22 +3763,32 @@ impl Database {
3721
3763
  Some(0) => {
3722
3764
  return Err(VectLiteError::InvalidFormat(
3723
3765
  "truncate_dim must be greater than zero".to_owned(),
3724
- ))
3766
+ ));
3725
3767
  }
3726
3768
  Some(dim) if dim > self.dimension => {
3727
3769
  return Err(VectLiteError::DimensionMismatch {
3728
3770
  expected: self.dimension,
3729
3771
  found: dim,
3730
- })
3772
+ });
3731
3773
  }
3732
3774
  Some(dim) if dim > query.len() => {
3733
3775
  return Err(VectLiteError::InvalidFormat(format!(
3734
3776
  "truncate_dim ({dim}) cannot exceed query vector length ({})",
3735
3777
  query.len()
3736
- )))
3778
+ )));
3737
3779
  }
3738
3780
  Some(dim) => dim,
3739
- None => query.len(),
3781
+ None => {
3782
+ // Without explicit truncate_dim, require exact dimension match.
3783
+ // Users must pass truncate_dim to opt into Matryoshka truncation.
3784
+ if query.len() != self.dimension {
3785
+ return Err(VectLiteError::DimensionMismatch {
3786
+ expected: self.dimension,
3787
+ found: query.len(),
3788
+ });
3789
+ }
3790
+ query.len()
3791
+ }
3740
3792
  };
3741
3793
 
3742
3794
  Ok(Some(effective))
@@ -4058,8 +4110,13 @@ impl Database {
4058
4110
  let mut weighted_sum = 0.0_f32;
4059
4111
  for (name, (query, weight)) in &options.multi_vector_queries {
4060
4112
  if let Some(vector) = record.vector_for(Some(name.as_str())) {
4061
- weighted_sum +=
4062
- weight * score_dense_prefix(self.metric, query, vector, effective_dimension);
4113
+ weighted_sum += weight
4114
+ * score_dense_prefix(
4115
+ self.metric,
4116
+ query,
4117
+ vector,
4118
+ effective_dimension,
4119
+ );
4063
4120
  }
4064
4121
  }
4065
4122
  (weighted_sum, None)
@@ -4375,11 +4432,7 @@ fn ensure_dimension(dimension: usize) -> Result<()> {
4375
4432
  /// MaxSim scoring (ColBERT-style late interaction).
4376
4433
  /// For each query token, find the maximum similarity against any document
4377
4434
  /// token using the given metric, then sum those maxima across all query tokens.
4378
- fn maxsim_score(
4379
- query_tokens: &[&[f32]],
4380
- doc_tokens: &[Vec<f32>],
4381
- metric: DistanceMetric,
4382
- ) -> f32 {
4435
+ fn maxsim_score(query_tokens: &[&[f32]], doc_tokens: &[Vec<f32>], metric: DistanceMetric) -> f32 {
4383
4436
  if query_tokens.is_empty() || doc_tokens.is_empty() {
4384
4437
  return 0.0;
4385
4438
  }
@@ -4428,8 +4481,13 @@ fn build_ann_index(records: Vec<(RecordKey, &Vec<f32>)>, metric: DistanceMetric)
4428
4481
 
4429
4482
  macro_rules! build_hnsw {
4430
4483
  ($dist_type:ty, $dist_val:expr, $variant:ident) => {{
4431
- let mut hnsw =
4432
- Hnsw::<f32, $dist_type>::new(ANN_M, count, max_layer, ANN_EF_CONSTRUCTION, $dist_val);
4484
+ let mut hnsw = Hnsw::<f32, $dist_type>::new(
4485
+ ANN_M,
4486
+ count,
4487
+ max_layer,
4488
+ ANN_EF_CONSTRUCTION,
4489
+ $dist_val,
4490
+ );
4433
4491
  let mut keys = Vec::with_capacity(count);
4434
4492
  for (origin_id, (key, vector)) in records.into_iter().enumerate() {
4435
4493
  hnsw.insert((vector.as_slice(), origin_id));
@@ -4616,11 +4674,13 @@ fn ann_basename(path: &Path, namespace: Option<&str>, vector_name: &str) -> Stri
4616
4674
  .file_name()
4617
4675
  .and_then(|name| name.to_str())
4618
4676
  .unwrap_or("vectlite");
4619
- format!(
4620
- "{stem}.ann.{}.{}",
4621
- hex_encode(namespace.unwrap_or(DEFAULT_NAMESPACE).as_bytes()),
4622
- hex_encode(vector_name.as_bytes())
4623
- )
4677
+ let ns_hex = hex_encode(namespace.unwrap_or(DEFAULT_NAMESPACE).as_bytes());
4678
+ let vn_hex = hex_encode(vector_name.as_bytes());
4679
+ // Use "_" sentinel for empty components to avoid triple-dot filenames
4680
+ // like "c.vdb.ann...hnsw.data".
4681
+ let ns_part = if ns_hex.is_empty() { "_" } else { &ns_hex };
4682
+ let vn_part = if vn_hex.is_empty() { "_" } else { &vn_hex };
4683
+ format!("{stem}.ann.{ns_part}.{vn_part}")
4624
4684
  }
4625
4685
 
4626
4686
  fn hex_encode(bytes: &[u8]) -> String {
@@ -5350,9 +5410,9 @@ fn usize_from_u64(value: u64) -> Result<usize> {
5350
5410
  #[cfg(test)]
5351
5411
  mod tests {
5352
5412
  use super::{
5353
- Database, HybridSearchOptions, Metadata, MetadataFilter, MetadataValue, MultiVectors,
5354
- MultiVectorSearchOptions, NamedVectors, PayloadIndexType, Record, SearchOptions,
5355
- SparseVector, VectLiteError,
5413
+ Database, DistanceMetric, HybridSearchOptions, Metadata, MetadataFilter, MetadataValue,
5414
+ MultiVectorSearchOptions, MultiVectors, NamedVectors, PayloadIndexType, Record,
5415
+ SearchOptions, SparseVector, VectLiteError,
5356
5416
  };
5357
5417
  use std::path::{Path, PathBuf};
5358
5418
  use std::time::{SystemTime, UNIX_EPOCH};
@@ -6046,6 +6106,54 @@ mod tests {
6046
6106
  cleanup(&path);
6047
6107
  }
6048
6108
 
6109
+ #[test]
6110
+ fn scalar_quantization_keeps_signed_cosine_neighbor_in_candidate_set() {
6111
+ use super::quantization::{QuantizationConfig, ScalarQuantizationConfig};
6112
+
6113
+ let path = temp_file("quant-scalar-signed-recall");
6114
+ let dim = 146;
6115
+
6116
+ let mut query = vec![-1.0_f32; dim];
6117
+ for value in &mut query[..10] {
6118
+ *value = 1.0;
6119
+ }
6120
+
6121
+ let mut db = Database::create(&path, dim).expect("create");
6122
+ for i in 0..120 {
6123
+ db.upsert(format!("high{i:03}"), vec![2.0_f32; dim], Metadata::new())
6124
+ .expect("upsert high distractor");
6125
+ }
6126
+
6127
+ let mut calibration_low = vec![2.0_f32; dim];
6128
+ for value in &mut calibration_low[..10] {
6129
+ *value = -1.0;
6130
+ }
6131
+ db.upsert("calibration-low", calibration_low, Metadata::new())
6132
+ .expect("upsert calibration low");
6133
+ db.upsert("target", query.clone(), Metadata::new())
6134
+ .expect("upsert target");
6135
+
6136
+ db.enable_quantization(QuantizationConfig::Scalar(ScalarQuantizationConfig {
6137
+ rescore_multiplier: 1,
6138
+ }))
6139
+ .expect("enable quant");
6140
+
6141
+ let results = db
6142
+ .search(
6143
+ &query,
6144
+ SearchOptions {
6145
+ top_k: 1,
6146
+ filter: None,
6147
+ truncate_dim: None,
6148
+ },
6149
+ )
6150
+ .expect("search");
6151
+
6152
+ assert_eq!(results[0].id, "target");
6153
+
6154
+ cleanup(&path);
6155
+ }
6156
+
6049
6157
  #[test]
6050
6158
  fn binary_quantization_enables_search() {
6051
6159
  use super::quantization::{BinaryQuantizationConfig, QuantizationConfig};
@@ -6187,6 +6295,40 @@ mod tests {
6187
6295
  cleanup(&path);
6188
6296
  }
6189
6297
 
6298
+ #[test]
6299
+ fn product_quantization_invalid_subvector_count_returns_error() {
6300
+ use super::quantization::{ProductQuantizationConfig, QuantizationConfig};
6301
+
6302
+ let path = temp_file("quant-pq-invalid-subvectors");
6303
+ let mut db = Database::create(&path, 146).expect("create");
6304
+ for i in 0..4 {
6305
+ db.upsert(
6306
+ format!("doc{i}"),
6307
+ vec![0.1_f32 + i as f32; 146],
6308
+ Metadata::new(),
6309
+ )
6310
+ .expect("upsert");
6311
+ }
6312
+ assert_eq!(db.valid_num_sub_vectors(), vec![1, 2, 73, 146]);
6313
+
6314
+ let result =
6315
+ db.enable_quantization(QuantizationConfig::Product(ProductQuantizationConfig {
6316
+ num_sub_vectors: 16,
6317
+ num_centroids: 4,
6318
+ training_iterations: 1,
6319
+ rescore_multiplier: 1,
6320
+ }));
6321
+
6322
+ assert!(matches!(
6323
+ result,
6324
+ Err(VectLiteError::InvalidFormat(message))
6325
+ if message.contains("dimension (146) must be divisible by num_sub_vectors (16)")
6326
+ ));
6327
+ assert!(!db.is_quantized());
6328
+
6329
+ cleanup(&path);
6330
+ }
6331
+
6190
6332
  // -----------------------------------------------------------------------
6191
6333
  // Multi-vector / ColBERT-style integration tests
6192
6334
  // -----------------------------------------------------------------------
@@ -6200,10 +6342,7 @@ mod tests {
6200
6342
  let mut mv1 = MultiVectors::new();
6201
6343
  mv1.insert(
6202
6344
  "colbert".to_owned(),
6203
- vec![
6204
- vec![1.0, 0.0, 0.0],
6205
- vec![0.0, 1.0, 0.0],
6206
- ],
6345
+ vec![vec![1.0, 0.0, 0.0], vec![0.0, 1.0, 0.0]],
6207
6346
  );
6208
6347
  db.upsert_multi_vectors("doc1", vec![1.0, 0.0, 0.0], Metadata::new(), mv1)
6209
6348
  .expect("upsert doc1");
@@ -6211,10 +6350,7 @@ mod tests {
6211
6350
  let mut mv2 = MultiVectors::new();
6212
6351
  mv2.insert(
6213
6352
  "colbert".to_owned(),
6214
- vec![
6215
- vec![0.0, 0.0, 1.0],
6216
- vec![0.0, 1.0, 0.0],
6217
- ],
6353
+ vec![vec![0.0, 0.0, 1.0], vec![0.0, 1.0, 0.0]],
6218
6354
  );
6219
6355
  db.upsert_multi_vectors("doc2", vec![0.0, 0.0, 1.0], Metadata::new(), mv2)
6220
6356
  .expect("upsert doc2");
@@ -6222,13 +6358,14 @@ mod tests {
6222
6358
  assert_eq!(db.len(), 2);
6223
6359
 
6224
6360
  // Search with query tokens that strongly match doc1
6225
- let query_tokens = vec![
6226
- vec![1.0, 0.0, 0.0],
6227
- vec![0.0, 1.0, 0.0],
6228
- ];
6361
+ let query_tokens = vec![vec![1.0, 0.0, 0.0], vec![0.0, 1.0, 0.0]];
6229
6362
 
6230
6363
  let results = db
6231
- .search_multi_vector("colbert", &query_tokens, MultiVectorSearchOptions::default())
6364
+ .search_multi_vector(
6365
+ "colbert",
6366
+ &query_tokens,
6367
+ MultiVectorSearchOptions::default(),
6368
+ )
6232
6369
  .expect("search");
6233
6370
 
6234
6371
  assert_eq!(results.len(), 2);
@@ -6255,7 +6392,11 @@ mod tests {
6255
6392
  let db = Database::create(&path, 3).expect("create");
6256
6393
 
6257
6394
  let query_tokens: Vec<Vec<f32>> = vec![];
6258
- let result = db.search_multi_vector("colbert", &query_tokens, MultiVectorSearchOptions::default());
6395
+ let result = db.search_multi_vector(
6396
+ "colbert",
6397
+ &query_tokens,
6398
+ MultiVectorSearchOptions::default(),
6399
+ );
6259
6400
  assert!(result.is_err());
6260
6401
 
6261
6402
  cleanup(&path);
@@ -6268,10 +6409,22 @@ mod tests {
6268
6409
 
6269
6410
  let mut mv = MultiVectors::new();
6270
6411
  mv.insert("colbert".to_owned(), vec![vec![1.0, 0.0, 0.0]]);
6271
- db.upsert_multi_vectors_in_namespace("ns1", "doc1", vec![1.0, 0.0, 0.0], Metadata::new(), mv.clone())
6272
- .expect("upsert ns1");
6273
- db.upsert_multi_vectors_in_namespace("ns2", "doc2", vec![0.0, 1.0, 0.0], Metadata::new(), mv.clone())
6274
- .expect("upsert ns2");
6412
+ db.upsert_multi_vectors_in_namespace(
6413
+ "ns1",
6414
+ "doc1",
6415
+ vec![1.0, 0.0, 0.0],
6416
+ Metadata::new(),
6417
+ mv.clone(),
6418
+ )
6419
+ .expect("upsert ns1");
6420
+ db.upsert_multi_vectors_in_namespace(
6421
+ "ns2",
6422
+ "doc2",
6423
+ vec![0.0, 1.0, 0.0],
6424
+ Metadata::new(),
6425
+ mv.clone(),
6426
+ )
6427
+ .expect("upsert ns2");
6275
6428
 
6276
6429
  let query_tokens = vec![vec![1.0, 0.0, 0.0]];
6277
6430
  let options = MultiVectorSearchOptions {
@@ -6279,7 +6432,9 @@ mod tests {
6279
6432
  filter: None,
6280
6433
  namespace: Some("ns1".to_owned()),
6281
6434
  };
6282
- let results = db.search_multi_vector("colbert", &query_tokens, options).expect("search");
6435
+ let results = db
6436
+ .search_multi_vector("colbert", &query_tokens, options)
6437
+ .expect("search");
6283
6438
 
6284
6439
  assert_eq!(results.len(), 1);
6285
6440
  assert_eq!(results[0].id, "doc1");
@@ -6331,13 +6486,18 @@ mod tests {
6331
6486
  // Search should still work
6332
6487
  let query_tokens = vec![vec![9.0, 0.0, 0.0], vec![0.0, 9.0, 0.0]];
6333
6488
  let results = db
6334
- .search_multi_vector("colbert", &query_tokens, MultiVectorSearchOptions::default())
6489
+ .search_multi_vector(
6490
+ "colbert",
6491
+ &query_tokens,
6492
+ MultiVectorSearchOptions::default(),
6493
+ )
6335
6494
  .expect("search");
6336
6495
 
6337
6496
  assert!(!results.is_empty());
6338
6497
 
6339
6498
  // Disable quantization
6340
- db.disable_multi_vector_quantization("colbert").expect("disable");
6499
+ db.disable_multi_vector_quantization("colbert")
6500
+ .expect("disable");
6341
6501
  assert!(!db.is_multi_vector_quantized("colbert"));
6342
6502
 
6343
6503
  cleanup(&path);
@@ -6387,7 +6547,11 @@ mod tests {
6387
6547
  // Search should work on reopened database
6388
6548
  let query_tokens = vec![vec![0.9, 0.5, 0.5]];
6389
6549
  let results = db
6390
- .search_multi_vector("colbert", &query_tokens, MultiVectorSearchOptions::default())
6550
+ .search_multi_vector(
6551
+ "colbert",
6552
+ &query_tokens,
6553
+ MultiVectorSearchOptions::default(),
6554
+ )
6391
6555
  .expect("search");
6392
6556
  assert!(!results.is_empty());
6393
6557
 
@@ -6421,7 +6585,7 @@ mod tests {
6421
6585
 
6422
6586
  #[test]
6423
6587
  fn multi_vector_maxsim_scoring_correctness() {
6424
- use super::{maxsim_score, DistanceMetric};
6588
+ use super::{DistanceMetric, maxsim_score};
6425
6589
 
6426
6590
  // Two identical sets: MaxSim should be sum of 1.0 per query token
6427
6591
  let query = [&[1.0_f32, 0.0, 0.0][..], &[0.0, 1.0, 0.0]];
@@ -6479,17 +6643,44 @@ mod tests {
6479
6643
  fn distance_metric_name_aliases() {
6480
6644
  use super::DistanceMetric;
6481
6645
  // Euclidean aliases
6482
- assert_eq!(DistanceMetric::from_name("l2").unwrap(), DistanceMetric::Euclidean);
6483
- assert_eq!(DistanceMetric::from_name("L2").unwrap(), DistanceMetric::Euclidean);
6484
- assert_eq!(DistanceMetric::from_name("EUCLIDEAN").unwrap(), DistanceMetric::Euclidean);
6646
+ assert_eq!(
6647
+ DistanceMetric::from_name("l2").unwrap(),
6648
+ DistanceMetric::Euclidean
6649
+ );
6650
+ assert_eq!(
6651
+ DistanceMetric::from_name("L2").unwrap(),
6652
+ DistanceMetric::Euclidean
6653
+ );
6654
+ assert_eq!(
6655
+ DistanceMetric::from_name("EUCLIDEAN").unwrap(),
6656
+ DistanceMetric::Euclidean
6657
+ );
6485
6658
  // DotProduct aliases
6486
- assert_eq!(DistanceMetric::from_name("dot").unwrap(), DistanceMetric::DotProduct);
6487
- assert_eq!(DistanceMetric::from_name("dot_product").unwrap(), DistanceMetric::DotProduct);
6488
- assert_eq!(DistanceMetric::from_name("ip").unwrap(), DistanceMetric::DotProduct);
6489
- assert_eq!(DistanceMetric::from_name("inner_product").unwrap(), DistanceMetric::DotProduct);
6659
+ assert_eq!(
6660
+ DistanceMetric::from_name("dot").unwrap(),
6661
+ DistanceMetric::DotProduct
6662
+ );
6663
+ assert_eq!(
6664
+ DistanceMetric::from_name("dot_product").unwrap(),
6665
+ DistanceMetric::DotProduct
6666
+ );
6667
+ assert_eq!(
6668
+ DistanceMetric::from_name("ip").unwrap(),
6669
+ DistanceMetric::DotProduct
6670
+ );
6671
+ assert_eq!(
6672
+ DistanceMetric::from_name("inner_product").unwrap(),
6673
+ DistanceMetric::DotProduct
6674
+ );
6490
6675
  // Manhattan aliases
6491
- assert_eq!(DistanceMetric::from_name("l1").unwrap(), DistanceMetric::Manhattan);
6492
- assert_eq!(DistanceMetric::from_name("L1").unwrap(), DistanceMetric::Manhattan);
6676
+ assert_eq!(
6677
+ DistanceMetric::from_name("l1").unwrap(),
6678
+ DistanceMetric::Manhattan
6679
+ );
6680
+ assert_eq!(
6681
+ DistanceMetric::from_name("L1").unwrap(),
6682
+ DistanceMetric::Manhattan
6683
+ );
6493
6684
  // Invalid
6494
6685
  assert!(DistanceMetric::from_name("hamming").is_err());
6495
6686
  }
@@ -6634,8 +6825,8 @@ mod tests {
6634
6825
  fn search_with_euclidean_metric() {
6635
6826
  use super::DistanceMetric;
6636
6827
  let path = temp_file("metric-search-euclidean");
6637
- let mut db = Database::create_with_metric(&path, 3, DistanceMetric::Euclidean)
6638
- .expect("create");
6828
+ let mut db =
6829
+ Database::create_with_metric(&path, 3, DistanceMetric::Euclidean).expect("create");
6639
6830
 
6640
6831
  // Insert vectors at known distances from query [0, 0, 0]
6641
6832
  db.insert("close", vec![1.0, 0.0, 0.0], Metadata::new())
@@ -6671,8 +6862,8 @@ mod tests {
6671
6862
  fn search_with_dotproduct_metric() {
6672
6863
  use super::DistanceMetric;
6673
6864
  let path = temp_file("metric-search-dot");
6674
- let mut db = Database::create_with_metric(&path, 3, DistanceMetric::DotProduct)
6675
- .expect("create");
6865
+ let mut db =
6866
+ Database::create_with_metric(&path, 3, DistanceMetric::DotProduct).expect("create");
6676
6867
 
6677
6868
  // Vectors with different dot products with query [1, 0, 0]
6678
6869
  db.insert("high", vec![10.0, 0.0, 0.0], Metadata::new())
@@ -6707,8 +6898,8 @@ mod tests {
6707
6898
  fn search_with_manhattan_metric() {
6708
6899
  use super::DistanceMetric;
6709
6900
  let path = temp_file("metric-search-manhattan");
6710
- let mut db = Database::create_with_metric(&path, 3, DistanceMetric::Manhattan)
6711
- .expect("create");
6901
+ let mut db =
6902
+ Database::create_with_metric(&path, 3, DistanceMetric::Manhattan).expect("create");
6712
6903
 
6713
6904
  // Vectors at known Manhattan distances from query [0, 0, 0]
6714
6905
  db.insert("close", vec![1.0, 0.0, 0.0], Metadata::new())
@@ -6755,6 +6946,7 @@ mod tests {
6755
6946
  None,
6756
6947
  HybridSearchOptions {
6757
6948
  top_k: 2,
6949
+ truncate_dim: Some(2),
6758
6950
  ..HybridSearchOptions::default()
6759
6951
  },
6760
6952
  )
@@ -6801,8 +6993,8 @@ mod tests {
6801
6993
  fn search_with_cosine_metric_explicit() {
6802
6994
  use super::DistanceMetric;
6803
6995
  let path = temp_file("metric-search-cosine-explicit");
6804
- let mut db = Database::create_with_metric(&path, 3, DistanceMetric::Cosine)
6805
- .expect("create");
6996
+ let mut db =
6997
+ Database::create_with_metric(&path, 3, DistanceMetric::Cosine).expect("create");
6806
6998
 
6807
6999
  db.insert("aligned", vec![2.0, 0.0, 0.0], Metadata::new())
6808
7000
  .expect("insert aligned"); // cosine = 1.0
@@ -6836,8 +7028,8 @@ mod tests {
6836
7028
  use super::DistanceMetric;
6837
7029
  let path = temp_file("metric-upsert-cycle");
6838
7030
  {
6839
- let mut db = Database::create_with_metric(&path, 3, DistanceMetric::Manhattan)
6840
- .expect("create");
7031
+ let mut db =
7032
+ Database::create_with_metric(&path, 3, DistanceMetric::Manhattan).expect("create");
6841
7033
  db.upsert("a", vec![1.0, 0.0, 0.0], Metadata::new())
6842
7034
  .expect("upsert a");
6843
7035
  db.upsert("b", vec![0.0, 5.0, 0.0], Metadata::new())
@@ -6916,7 +7108,8 @@ mod tests {
6916
7108
  let mut meta = Metadata::new();
6917
7109
  meta.insert("source".into(), "blog".into());
6918
7110
  meta.insert("version".into(), MetadataValue::Integer(1));
6919
- db.upsert("doc1", vec![1.0, 0.0, 0.0], meta).expect("upsert");
7111
+ db.upsert("doc1", vec![1.0, 0.0, 0.0], meta)
7112
+ .expect("upsert");
6920
7113
 
6921
7114
  // Patch: update version, add new key
6922
7115
  let mut patch = Metadata::new();
@@ -6966,7 +7159,8 @@ mod tests {
6966
7159
 
6967
7160
  let mut meta = Metadata::new();
6968
7161
  meta.insert("source".into(), "blog".into());
6969
- db.upsert("doc1", vec![1.0, 2.0, 3.0], meta).expect("upsert");
7162
+ db.upsert("doc1", vec![1.0, 2.0, 3.0], meta)
7163
+ .expect("upsert");
6970
7164
 
6971
7165
  let mut patch = Metadata::new();
6972
7166
  patch.insert("source".into(), "updated".into());
@@ -6986,7 +7180,8 @@ mod tests {
6986
7180
  let mut db = Database::create(&path, 3).expect("create");
6987
7181
  let mut meta = Metadata::new();
6988
7182
  meta.insert("source".into(), "blog".into());
6989
- db.upsert("doc1", vec![1.0, 0.0, 0.0], meta).expect("upsert");
7183
+ db.upsert("doc1", vec![1.0, 0.0, 0.0], meta)
7184
+ .expect("upsert");
6990
7185
 
6991
7186
  let mut patch = Metadata::new();
6992
7187
  patch.insert("source".into(), "updated".into());
@@ -7052,7 +7247,8 @@ mod tests {
7052
7247
 
7053
7248
  let mut meta = Metadata::new();
7054
7249
  meta.insert("status".into(), "draft".into());
7055
- db.upsert("doc1", vec![1.0, 0.0, 0.0], meta).expect("upsert");
7250
+ db.upsert("doc1", vec![1.0, 0.0, 0.0], meta)
7251
+ .expect("upsert");
7056
7252
 
7057
7253
  // Before patch: filter matches
7058
7254
  let count = db.count_filtered(None, Some(&MetadataFilter::eq("status", "draft")));
@@ -7320,7 +7516,8 @@ mod tests {
7320
7516
 
7321
7517
  let mut meta = Metadata::new();
7322
7518
  meta.insert("source".into(), "blog".into());
7323
- db.upsert("doc1", vec![1.0, 0.0, 0.0], meta).expect("upsert");
7519
+ db.upsert("doc1", vec![1.0, 0.0, 0.0], meta)
7520
+ .expect("upsert");
7324
7521
 
7325
7522
  let mut meta2 = Metadata::new();
7326
7523
  meta2.insert("source".into(), "docs".into());
@@ -7356,7 +7553,8 @@ mod tests {
7356
7553
  // Now upsert records — they should be indexed incrementally
7357
7554
  let mut meta = Metadata::new();
7358
7555
  meta.insert("source".into(), "blog".into());
7359
- db.upsert("doc1", vec![1.0, 0.0, 0.0], meta).expect("upsert");
7556
+ db.upsert("doc1", vec![1.0, 0.0, 0.0], meta)
7557
+ .expect("upsert");
7360
7558
 
7361
7559
  let count = db.count_filtered(None, Some(&MetadataFilter::eq("source", "blog")));
7362
7560
  assert_eq!(count, 1);
@@ -7381,7 +7579,8 @@ mod tests {
7381
7579
 
7382
7580
  let mut meta = Metadata::new();
7383
7581
  meta.insert("source".into(), "blog".into());
7384
- db.upsert("doc1", vec![1.0, 0.0, 0.0], meta).expect("upsert");
7582
+ db.upsert("doc1", vec![1.0, 0.0, 0.0], meta)
7583
+ .expect("upsert");
7385
7584
 
7386
7585
  let mut meta2 = Metadata::new();
7387
7586
  meta2.insert("source".into(), "blog".into());
@@ -7454,7 +7653,8 @@ mod tests {
7454
7653
 
7455
7654
  let mut meta = Metadata::new();
7456
7655
  meta.insert("status".into(), "draft".into());
7457
- db.upsert("doc1", vec![1.0, 0.0, 0.0], meta).expect("upsert");
7656
+ db.upsert("doc1", vec![1.0, 0.0, 0.0], meta)
7657
+ .expect("upsert");
7458
7658
 
7459
7659
  db.create_index("status", PayloadIndexType::Keyword)
7460
7660
  .expect("create");
@@ -7490,7 +7690,10 @@ mod tests {
7490
7690
  // Insert records with source and priority
7491
7691
  for i in 0..30 {
7492
7692
  let mut meta = Metadata::new();
7493
- meta.insert("source".into(), if i % 2 == 0 { "blog" } else { "docs" }.into());
7693
+ meta.insert(
7694
+ "source".into(),
7695
+ if i % 2 == 0 { "blog" } else { "docs" }.into(),
7696
+ );
7494
7697
  meta.insert("priority".into(), MetadataValue::Float((i % 3) as f64));
7495
7698
  db.upsert(format!("doc{}", i), vec![1.0, 0.0, 0.0], meta)
7496
7699
  .expect("upsert");
@@ -7659,7 +7862,10 @@ mod tests {
7659
7862
 
7660
7863
  for i in 0..20 {
7661
7864
  let mut meta = Metadata::new();
7662
- meta.insert("type".into(), if i % 2 == 0 { "even" } else { "odd" }.into());
7865
+ meta.insert(
7866
+ "type".into(),
7867
+ if i % 2 == 0 { "even" } else { "odd" }.into(),
7868
+ );
7663
7869
  db.upsert(format!("doc{}", i), vec![1.0, 0.0, 0.0], meta)
7664
7870
  .expect("upsert");
7665
7871
  }
@@ -7929,7 +8135,10 @@ mod tests {
7929
8135
  assert!(db.get_in_namespace("ns1", "doc1").is_none());
7930
8136
 
7931
8137
  // Wrong namespace returns false
7932
- assert!(!db.set_ttl_in_namespace("ns2", "doc1", 60.0).expect("set wrong ns"));
8138
+ assert!(
8139
+ !db.set_ttl_in_namespace("ns2", "doc1", 60.0)
8140
+ .expect("set wrong ns")
8141
+ );
7933
8142
 
7934
8143
  cleanup(&path);
7935
8144
  }
@@ -7943,12 +8152,8 @@ mod tests {
7943
8152
  let path = temp_file("cursor-basic");
7944
8153
  let mut db = Database::create(&path, 3).expect("create");
7945
8154
  for i in 0..5 {
7946
- db.upsert(
7947
- &format!("doc{i}"),
7948
- vec![1.0, 0.0, 0.0],
7949
- Metadata::new(),
7950
- )
7951
- .expect("upsert");
8155
+ db.upsert(&format!("doc{i}"), vec![1.0, 0.0, 0.0], Metadata::new())
8156
+ .expect("upsert");
7952
8157
  }
7953
8158
 
7954
8159
  // First page of 2
@@ -7983,12 +8188,22 @@ mod tests {
7983
8188
  let path = temp_file("cursor-ns");
7984
8189
  let mut db = Database::create(&path, 3).expect("create");
7985
8190
  for i in 0..3 {
7986
- db.upsert_in_namespace("ns1", &format!("doc{i}"), vec![1.0, 0.0, 0.0], Metadata::new())
7987
- .expect("upsert");
8191
+ db.upsert_in_namespace(
8192
+ "ns1",
8193
+ &format!("doc{i}"),
8194
+ vec![1.0, 0.0, 0.0],
8195
+ Metadata::new(),
8196
+ )
8197
+ .expect("upsert");
7988
8198
  }
7989
8199
  for i in 0..2 {
7990
- db.upsert_in_namespace("ns2", &format!("doc{i}"), vec![0.0, 1.0, 0.0], Metadata::new())
7991
- .expect("upsert");
8200
+ db.upsert_in_namespace(
8201
+ "ns2",
8202
+ &format!("doc{i}"),
8203
+ vec![0.0, 1.0, 0.0],
8204
+ Metadata::new(),
8205
+ )
8206
+ .expect("upsert");
7992
8207
  }
7993
8208
 
7994
8209
  let (page1, cursor1) = db.list_cursor(Some("ns1"), None, 2, None);
@@ -8007,12 +8222,8 @@ mod tests {
8007
8222
  let path = temp_file("cursor-ttl");
8008
8223
  let mut db = Database::create(&path, 3).expect("create");
8009
8224
  for i in 0..5 {
8010
- db.upsert(
8011
- &format!("doc{i}"),
8012
- vec![1.0, 0.0, 0.0],
8013
- Metadata::new(),
8014
- )
8015
- .expect("upsert");
8225
+ db.upsert(&format!("doc{i}"), vec![1.0, 0.0, 0.0], Metadata::new())
8226
+ .expect("upsert");
8016
8227
  }
8017
8228
 
8018
8229
  // Expire doc1
@@ -8045,4 +8256,153 @@ mod tests {
8045
8256
  assert!(cursor.is_none());
8046
8257
  cleanup(&path);
8047
8258
  }
8259
+
8260
+ // ---------------------------------------------------------------
8261
+ // Bug #14: zero-norm query vector should be rejected for cosine
8262
+ // ---------------------------------------------------------------
8263
+
8264
+ #[test]
8265
+ fn search_zero_norm_query_cosine_rejected() {
8266
+ let path = temp_file("zero-norm-cosine");
8267
+ let mut db = Database::create(&path, 3).expect("create");
8268
+ db.insert("a", vec![1.0, 0.0, 0.0], Metadata::new())
8269
+ .expect("insert");
8270
+
8271
+ let result = db.search(
8272
+ &[0.0, 0.0, 0.0],
8273
+ SearchOptions {
8274
+ top_k: 5,
8275
+ ..Default::default()
8276
+ },
8277
+ );
8278
+ assert!(result.is_err(), "zero-norm cosine search should fail");
8279
+ let err_msg = result.unwrap_err().to_string();
8280
+ assert!(
8281
+ err_msg.contains("zero norm"),
8282
+ "error should mention zero norm: {err_msg}"
8283
+ );
8284
+ cleanup(&path);
8285
+ }
8286
+
8287
+ #[test]
8288
+ fn search_zero_norm_query_dotproduct_rejected() {
8289
+ let path = temp_file("zero-norm-dot");
8290
+ let mut db =
8291
+ Database::create_with_metric(&path, 3, DistanceMetric::DotProduct).expect("create");
8292
+ db.insert("a", vec![1.0, 0.0, 0.0], Metadata::new())
8293
+ .expect("insert");
8294
+
8295
+ let result = db.search(
8296
+ &[0.0, 0.0, 0.0],
8297
+ SearchOptions {
8298
+ top_k: 5,
8299
+ ..Default::default()
8300
+ },
8301
+ );
8302
+ assert!(result.is_err(), "zero-norm dotproduct search should fail");
8303
+ cleanup(&path);
8304
+ }
8305
+
8306
+ #[test]
8307
+ fn search_zero_norm_query_euclidean_allowed() {
8308
+ let path = temp_file("zero-norm-euclidean");
8309
+ let mut db =
8310
+ Database::create_with_metric(&path, 3, DistanceMetric::Euclidean).expect("create");
8311
+ db.insert("a", vec![1.0, 0.0, 0.0], Metadata::new())
8312
+ .expect("insert");
8313
+
8314
+ // Euclidean distance from the origin is well-defined; should succeed.
8315
+ let result = db.search(
8316
+ &[0.0, 0.0, 0.0],
8317
+ SearchOptions {
8318
+ top_k: 5,
8319
+ ..Default::default()
8320
+ },
8321
+ );
8322
+ assert!(
8323
+ result.is_ok(),
8324
+ "zero-norm euclidean search should succeed: {:?}",
8325
+ result.err()
8326
+ );
8327
+ cleanup(&path);
8328
+ }
8329
+
8330
+ // ---------------------------------------------------------------
8331
+ // Bug #15: dimension mismatch in search query should be rejected
8332
+ // ---------------------------------------------------------------
8333
+
8334
+ #[test]
8335
+ fn search_undersized_query_rejected() {
8336
+ let path = temp_file("dim-under");
8337
+ let mut db = Database::create(&path, 4).expect("create");
8338
+ db.insert("a", vec![1.0, 0.0, 0.0, 0.0], Metadata::new())
8339
+ .expect("insert");
8340
+
8341
+ // Query dim=2 on a dim=4 database without truncate_dim.
8342
+ let result = db.search(
8343
+ &[1.0, 0.0],
8344
+ SearchOptions {
8345
+ top_k: 5,
8346
+ ..Default::default()
8347
+ },
8348
+ );
8349
+ assert!(result.is_err(), "undersized query should fail");
8350
+ match result.unwrap_err() {
8351
+ VectLiteError::DimensionMismatch { expected, found } => {
8352
+ assert_eq!(expected, 4);
8353
+ assert_eq!(found, 2);
8354
+ }
8355
+ other => panic!("expected DimensionMismatch, got: {other}"),
8356
+ }
8357
+ cleanup(&path);
8358
+ }
8359
+
8360
+ #[test]
8361
+ fn search_oversized_query_rejected() {
8362
+ let path = temp_file("dim-over");
8363
+ let mut db = Database::create(&path, 3).expect("create");
8364
+ db.insert("a", vec![1.0, 0.0, 0.0], Metadata::new())
8365
+ .expect("insert");
8366
+
8367
+ let result = db.search(
8368
+ &[1.0, 0.0, 0.0, 0.0, 0.0],
8369
+ SearchOptions {
8370
+ top_k: 5,
8371
+ ..Default::default()
8372
+ },
8373
+ );
8374
+ assert!(result.is_err(), "oversized query should fail");
8375
+ match result.unwrap_err() {
8376
+ VectLiteError::DimensionMismatch { expected, found } => {
8377
+ assert_eq!(expected, 3);
8378
+ assert_eq!(found, 5);
8379
+ }
8380
+ other => panic!("expected DimensionMismatch, got: {other}"),
8381
+ }
8382
+ cleanup(&path);
8383
+ }
8384
+
8385
+ #[test]
8386
+ fn search_undersized_query_with_truncate_dim_allowed() {
8387
+ let path = temp_file("dim-matryoshka");
8388
+ let mut db = Database::create(&path, 4).expect("create");
8389
+ db.insert("a", vec![1.0, 0.0, 0.0, 0.0], Metadata::new())
8390
+ .expect("insert");
8391
+
8392
+ // With explicit truncate_dim, undersized queries are Matryoshka-truncated.
8393
+ let result = db.search(
8394
+ &[1.0, 0.0],
8395
+ SearchOptions {
8396
+ top_k: 5,
8397
+ truncate_dim: Some(2),
8398
+ ..Default::default()
8399
+ },
8400
+ );
8401
+ assert!(
8402
+ result.is_ok(),
8403
+ "truncate_dim query should succeed: {:?}",
8404
+ result.err()
8405
+ );
8406
+ cleanup(&path);
8407
+ }
8048
8408
  }