RubyGems - faiss - Versions diffs - 0.4.3 → 0.5.1 - Mend

faiss 0.4.3 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (186) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +10 -0
data/README.md +2 -0
data/ext/faiss/index.cpp +33 -6
data/ext/faiss/index_binary.cpp +17 -4
data/ext/faiss/kmeans.cpp +6 -6
data/lib/faiss/version.rb +1 -1
data/vendor/faiss/faiss/AutoTune.cpp +2 -3
data/vendor/faiss/faiss/AutoTune.h +1 -1
data/vendor/faiss/faiss/Clustering.cpp +2 -2
data/vendor/faiss/faiss/Clustering.h +2 -2
data/vendor/faiss/faiss/IVFlib.cpp +26 -51
data/vendor/faiss/faiss/IVFlib.h +1 -1
data/vendor/faiss/faiss/Index.cpp +11 -0
data/vendor/faiss/faiss/Index.h +34 -11
data/vendor/faiss/faiss/Index2Layer.cpp +1 -1
data/vendor/faiss/faiss/Index2Layer.h +2 -2
data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +1 -0
data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +9 -4
data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.h +5 -1
data/vendor/faiss/faiss/IndexBinary.h +7 -7
data/vendor/faiss/faiss/IndexBinaryFromFloat.h +1 -1
data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +8 -2
data/vendor/faiss/faiss/IndexBinaryHNSW.h +1 -1
data/vendor/faiss/faiss/IndexBinaryHash.cpp +3 -3
data/vendor/faiss/faiss/IndexBinaryHash.h +5 -5
data/vendor/faiss/faiss/IndexBinaryIVF.cpp +7 -6
data/vendor/faiss/faiss/IndexFastScan.cpp +125 -49
data/vendor/faiss/faiss/IndexFastScan.h +102 -7
data/vendor/faiss/faiss/IndexFlat.cpp +374 -4
data/vendor/faiss/faiss/IndexFlat.h +81 -1
data/vendor/faiss/faiss/IndexHNSW.cpp +93 -2
data/vendor/faiss/faiss/IndexHNSW.h +58 -2
data/vendor/faiss/faiss/IndexIDMap.cpp +14 -13
data/vendor/faiss/faiss/IndexIDMap.h +6 -6
data/vendor/faiss/faiss/IndexIVF.cpp +1 -1
data/vendor/faiss/faiss/IndexIVF.h +5 -5
data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +1 -1
data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +9 -3
data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +3 -1
data/vendor/faiss/faiss/IndexIVFFastScan.cpp +176 -90
data/vendor/faiss/faiss/IndexIVFFastScan.h +173 -18
data/vendor/faiss/faiss/IndexIVFFlat.cpp +1 -0
data/vendor/faiss/faiss/IndexIVFFlatPanorama.cpp +251 -0
data/vendor/faiss/faiss/IndexIVFFlatPanorama.h +64 -0
data/vendor/faiss/faiss/IndexIVFPQ.cpp +3 -1
data/vendor/faiss/faiss/IndexIVFPQ.h +1 -1
data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +134 -2
data/vendor/faiss/faiss/IndexIVFPQFastScan.h +7 -1
data/vendor/faiss/faiss/IndexIVFRaBitQ.cpp +99 -8
data/vendor/faiss/faiss/IndexIVFRaBitQ.h +4 -1
data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.cpp +828 -0
data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.h +252 -0
data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +1 -1
data/vendor/faiss/faiss/IndexIVFSpectralHash.h +1 -1
data/vendor/faiss/faiss/IndexNNDescent.cpp +1 -1
data/vendor/faiss/faiss/IndexNSG.cpp +1 -1
data/vendor/faiss/faiss/IndexNeuralNetCodec.h +1 -1
data/vendor/faiss/faiss/IndexPQ.cpp +4 -1
data/vendor/faiss/faiss/IndexPQ.h +1 -1
data/vendor/faiss/faiss/IndexPQFastScan.cpp +6 -2
data/vendor/faiss/faiss/IndexPQFastScan.h +5 -1
data/vendor/faiss/faiss/IndexPreTransform.cpp +14 -0
data/vendor/faiss/faiss/IndexPreTransform.h +9 -0
data/vendor/faiss/faiss/IndexRaBitQ.cpp +96 -13
data/vendor/faiss/faiss/IndexRaBitQ.h +11 -2
data/vendor/faiss/faiss/IndexRaBitQFastScan.cpp +731 -0
data/vendor/faiss/faiss/IndexRaBitQFastScan.h +175 -0
data/vendor/faiss/faiss/IndexRefine.cpp +49 -0
data/vendor/faiss/faiss/IndexRefine.h +17 -0
data/vendor/faiss/faiss/IndexShards.cpp +1 -1
data/vendor/faiss/faiss/MatrixStats.cpp +3 -3
data/vendor/faiss/faiss/MetricType.h +1 -1
data/vendor/faiss/faiss/VectorTransform.h +2 -2
data/vendor/faiss/faiss/clone_index.cpp +5 -1
data/vendor/faiss/faiss/gpu/GpuCloner.cpp +1 -1
data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +3 -1
data/vendor/faiss/faiss/gpu/GpuIndex.h +11 -11
data/vendor/faiss/faiss/gpu/GpuIndexBinaryCagra.h +1 -1
data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +1 -1
data/vendor/faiss/faiss/gpu/GpuIndexCagra.h +11 -7
data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +1 -1
data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +2 -0
data/vendor/faiss/faiss/gpu/test/TestGpuIcmEncoder.cpp +7 -0
data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +1 -1
data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +1 -1
data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +1 -1
data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +2 -2
data/vendor/faiss/faiss/impl/AuxIndexStructures.h +1 -1
data/vendor/faiss/faiss/impl/CodePacker.h +2 -2
data/vendor/faiss/faiss/impl/DistanceComputer.h +77 -6
data/vendor/faiss/faiss/impl/FastScanDistancePostProcessing.h +53 -0
data/vendor/faiss/faiss/impl/HNSW.cpp +295 -16
data/vendor/faiss/faiss/impl/HNSW.h +35 -6
data/vendor/faiss/faiss/impl/IDSelector.cpp +2 -2
data/vendor/faiss/faiss/impl/IDSelector.h +4 -4
data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +4 -4
data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +1 -1
data/vendor/faiss/faiss/impl/LookupTableScaler.h +1 -1
data/vendor/faiss/faiss/impl/NNDescent.cpp +1 -1
data/vendor/faiss/faiss/impl/NNDescent.h +2 -2
data/vendor/faiss/faiss/impl/NSG.cpp +1 -1
data/vendor/faiss/faiss/impl/Panorama.cpp +193 -0
data/vendor/faiss/faiss/impl/Panorama.h +204 -0
data/vendor/faiss/faiss/impl/PanoramaStats.cpp +33 -0
data/vendor/faiss/faiss/impl/PanoramaStats.h +38 -0
data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +5 -5
data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.cpp +1 -1
data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.h +1 -1
data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +2 -0
data/vendor/faiss/faiss/impl/ProductQuantizer.h +1 -1
data/vendor/faiss/faiss/impl/RaBitQStats.cpp +29 -0
data/vendor/faiss/faiss/impl/RaBitQStats.h +56 -0
data/vendor/faiss/faiss/impl/RaBitQUtils.cpp +294 -0
data/vendor/faiss/faiss/impl/RaBitQUtils.h +330 -0
data/vendor/faiss/faiss/impl/RaBitQuantizer.cpp +304 -223
data/vendor/faiss/faiss/impl/RaBitQuantizer.h +72 -4
data/vendor/faiss/faiss/impl/RaBitQuantizerMultiBit.cpp +362 -0
data/vendor/faiss/faiss/impl/RaBitQuantizerMultiBit.h +112 -0
data/vendor/faiss/faiss/impl/ResidualQuantizer.h +1 -1
data/vendor/faiss/faiss/impl/ResultHandler.h +4 -4
data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +7 -10
data/vendor/faiss/faiss/impl/ScalarQuantizer.h +2 -4
data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +7 -4
data/vendor/faiss/faiss/impl/index_read.cpp +238 -10
data/vendor/faiss/faiss/impl/index_write.cpp +212 -19
data/vendor/faiss/faiss/impl/io.cpp +2 -2
data/vendor/faiss/faiss/impl/io.h +4 -4
data/vendor/faiss/faiss/impl/kmeans1d.cpp +1 -1
data/vendor/faiss/faiss/impl/kmeans1d.h +1 -1
data/vendor/faiss/faiss/impl/lattice_Zn.h +2 -2
data/vendor/faiss/faiss/impl/mapped_io.cpp +2 -2
data/vendor/faiss/faiss/impl/mapped_io.h +4 -3
data/vendor/faiss/faiss/impl/maybe_owned_vector.h +8 -1
data/vendor/faiss/faiss/impl/platform_macros.h +12 -0
data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +30 -4
data/vendor/faiss/faiss/impl/pq4_fast_scan.h +14 -8
data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +5 -6
data/vendor/faiss/faiss/impl/simd_result_handlers.h +55 -11
data/vendor/faiss/faiss/impl/svs_io.cpp +86 -0
data/vendor/faiss/faiss/impl/svs_io.h +67 -0
data/vendor/faiss/faiss/impl/zerocopy_io.h +1 -1
data/vendor/faiss/faiss/index_factory.cpp +217 -8
data/vendor/faiss/faiss/index_factory.h +1 -1
data/vendor/faiss/faiss/index_io.h +1 -1
data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +1 -1
data/vendor/faiss/faiss/invlists/DirectMap.cpp +1 -1
data/vendor/faiss/faiss/invlists/InvertedLists.cpp +115 -1
data/vendor/faiss/faiss/invlists/InvertedLists.h +46 -0
data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +1 -1
data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +1 -1
data/vendor/faiss/faiss/svs/IndexSVSFaissUtils.h +261 -0
data/vendor/faiss/faiss/svs/IndexSVSFlat.cpp +117 -0
data/vendor/faiss/faiss/svs/IndexSVSFlat.h +66 -0
data/vendor/faiss/faiss/svs/IndexSVSVamana.cpp +245 -0
data/vendor/faiss/faiss/svs/IndexSVSVamana.h +137 -0
data/vendor/faiss/faiss/svs/IndexSVSVamanaLVQ.cpp +39 -0
data/vendor/faiss/faiss/svs/IndexSVSVamanaLVQ.h +42 -0
data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.cpp +149 -0
data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.h +58 -0
data/vendor/faiss/faiss/utils/AlignedTable.h +1 -1
data/vendor/faiss/faiss/utils/Heap.cpp +2 -2
data/vendor/faiss/faiss/utils/Heap.h +3 -3
data/vendor/faiss/faiss/utils/NeuralNet.cpp +1 -1
data/vendor/faiss/faiss/utils/NeuralNet.h +3 -3
data/vendor/faiss/faiss/utils/approx_topk/approx_topk.h +2 -2
data/vendor/faiss/faiss/utils/approx_topk/avx2-inl.h +2 -2
data/vendor/faiss/faiss/utils/approx_topk/mode.h +1 -1
data/vendor/faiss/faiss/utils/distances.cpp +0 -3
data/vendor/faiss/faiss/utils/distances.h +2 -2
data/vendor/faiss/faiss/utils/extra_distances-inl.h +3 -1
data/vendor/faiss/faiss/utils/hamming-inl.h +2 -0
data/vendor/faiss/faiss/utils/hamming.cpp +7 -6
data/vendor/faiss/faiss/utils/hamming.h +1 -1
data/vendor/faiss/faiss/utils/hamming_distance/common.h +1 -2
data/vendor/faiss/faiss/utils/partitioning.cpp +5 -5
data/vendor/faiss/faiss/utils/partitioning.h +2 -2
data/vendor/faiss/faiss/utils/rabitq_simd.h +222 -336
data/vendor/faiss/faiss/utils/random.cpp +1 -1
data/vendor/faiss/faiss/utils/simdlib_avx2.h +1 -1
data/vendor/faiss/faiss/utils/simdlib_avx512.h +1 -1
data/vendor/faiss/faiss/utils/simdlib_neon.h +2 -2
data/vendor/faiss/faiss/utils/transpose/transpose-avx512-inl.h +1 -1
data/vendor/faiss/faiss/utils/utils.cpp +9 -2
data/vendor/faiss/faiss/utils/utils.h +2 -2
metadata +29 -1

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 61316c4fa0bbf7f85dfaeb6890d75b35e20e013a5ed25b8d87a3d252dfea2f50
-  data.tar.gz: af0de6135077f184092dbfec00f9bf7492eb38da66d5ef5f4f2d020ce0486648
+  metadata.gz: 439ff96f613cd71e2a32197194d3814cf5ea516bd31489fdc2bd7a98747ec8ff
+  data.tar.gz: b2eaf07886acc74aaee4e6f1956a8c09f1d76da0ebaca14b6d4f3e889d728ef7
 SHA512:
-  metadata.gz: 6d21e6186d57eec3852ab285f0bcd89affacb56577c661b514763478ef6d610f808a7d1e4cbc062278aec6cbc11959b204de237b9669dcfb58de4b3070be2c1e
-  data.tar.gz: a65646746c2f558b48285d8c192d246ce4a95cead1f4413349e8eac4393ac4aa47ad3919764e7caa1abf517edb57153e385d7a012a3dbb22258a98c2795aa5b1
+  metadata.gz: 03d9a8aa01c86c176437adfab3038b91eb9b9991b2446f4b442114fac8e16133f213eec779d8bdedf80f13c643e96ec2ae39cfa9e1de4313148d53efa565d5c8
+  data.tar.gz: aa3ef7993a7411cbd480134e054528f069931c57ec55a8dd74c33642fa9debf203507f5744fa1ad178c04d142ac6efd482d63e6df1ced32e890fedd1def73769

data/CHANGELOG.md CHANGED Viewed

@@ -1,3 +1,13 @@
+## 0.5.1 (2025-12-20)
+- Updated Faiss to 1.13.2
+- Added `reconstruct` method
+## 0.5.0 (2025-11-12)
+- Updated Faiss to 1.13.0
+- Added support for releasing GVL
 ## 0.4.3 (2025-10-26)
 - Fixed error with Rice 4.7

data/README.md CHANGED Viewed

@@ -53,6 +53,8 @@ Search
 distances, ids = index.search(objects, 3)
 ```
+> Use `index.freeze` to release the GVL for searches
 Save an index
 ```ruby

data/ext/faiss/index.cpp CHANGED Viewed

@@ -111,19 +111,28 @@ void init_index(Rice::Module& m) {
       })
     .define_method(
       "train",
-      [](faiss::Index &self, numo::SFloat objects) {
+      [](Rice::Object rb_self, numo::SFloat objects) {
+        rb_check_frozen(rb_self.value());
+        auto &self = *Rice::Data_Object<faiss::Index>{rb_self};
         auto n = check_shape(objects, self.d);
         self.train(n, objects.read_ptr());
       })
     .define_method(
       "add",
-      [](faiss::Index &self, numo::SFloat objects) {
+      [](Rice::Object rb_self, numo::SFloat objects) {
+        rb_check_frozen(rb_self.value());
+        auto &self = *Rice::Data_Object<faiss::Index>{rb_self};
         auto n = check_shape(objects, self.d);
         self.add(n, objects.read_ptr());
       })
     .define_method(
       "add_with_ids",
-      [](faiss::Index &self, numo::SFloat objects, numo::Int64 ids) {
+      [](Rice::Object rb_self, numo::SFloat objects, numo::Int64 ids) {
+        rb_check_frozen(rb_self.value());
+        auto &self = *Rice::Data_Object<faiss::Index>{rb_self};
         auto n = check_shape(objects, self.d);
         if (ids.ndim() != 1 || ids.shape()[0] != n) {
           throw Rice::Exception(rb_eArgError, "expected ids to be 1d array with size %d", n);
@@ -132,13 +141,20 @@ void init_index(Rice::Module& m) {
       })
     .define_method(
       "search",
-      [](faiss::Index &self, numo::SFloat objects, size_t k) {
+      [](Rice::Object rb_self, numo::SFloat objects, size_t k) {
+        auto &self = *Rice::Data_Object<faiss::Index>{rb_self};
         auto n = check_shape(objects, self.d);
         auto distances = numo::SFloat({n, k});
         auto labels = numo::Int64({n, k});
-        self.search(n, objects.read_ptr(), k, distances.write_ptr(), labels.write_ptr());
+        if (rb_self.is_frozen()) {
+          Rice::detail::no_gvl([&] {
+            self.search(n, objects.read_ptr(), k, distances.write_ptr(), labels.write_ptr());
+          });
+        } else {
+          self.search(n, objects.read_ptr(), k, distances.write_ptr(), labels.write_ptr());
+        }
         Rice::Array ret;
         ret.push(std::move(distances), false);
@@ -147,9 +163,20 @@ void init_index(Rice::Module& m) {
       })
     .define_method(
       "nprobe=",
-      [](faiss::Index &self, double val) {
+      [](Rice::Object rb_self, double val) {
+        rb_check_frozen(rb_self.value());
+        auto &self = *Rice::Data_Object<faiss::Index>{rb_self};
         faiss::ParameterSpace().set_index_parameter(&self, "nprobe", val);
       })
+    .define_method(
+      "reconstruct",
+      [](faiss::Index &self, int64_t key) {
+        auto d = static_cast<std::size_t>(self.d);
+        auto recons = numo::SFloat({d});
+        self.reconstruct(key, recons.write_ptr());
+        return recons;
+      })
     .define_method(
       "save",
       [](faiss::Index &self, Rice::String fname) {

data/ext/faiss/index_binary.cpp CHANGED Viewed

@@ -27,25 +27,38 @@ void init_index_binary(Rice::Module& m) {
       })
     .define_method(
       "train",
-      [](faiss::IndexBinary &self, numo::UInt8 objects) {
+      [](Rice::Object rb_self, numo::UInt8 objects) {
+        rb_check_frozen(rb_self.value());
+        auto &self = *Rice::Data_Object<faiss::IndexBinary>{rb_self};
         auto n = check_shape(objects, self.d / 8);
         self.train(n, objects.read_ptr());
       })
     .define_method(
       "add",
-      [](faiss::IndexBinary &self, numo::UInt8 objects) {
+      [](Rice::Object rb_self, numo::UInt8 objects) {
+        rb_check_frozen(rb_self.value());
+        auto &self = *Rice::Data_Object<faiss::IndexBinary>{rb_self};
         auto n = check_shape(objects, self.d / 8);
         self.add(n, objects.read_ptr());
       })
     .define_method(
       "search",
-      [](faiss::IndexBinary &self, numo::UInt8 objects, size_t k) {
+      [](Rice::Object rb_self, numo::UInt8 objects, size_t k) {
+        auto &self = *Rice::Data_Object<faiss::IndexBinary>{rb_self};
         auto n = check_shape(objects, self.d / 8);
         auto distances = numo::Int32({n, k});
         auto labels = numo::Int64({n, k});
-        self.search(n, objects.read_ptr(), k, distances.write_ptr(), labels.write_ptr());
+        if (rb_self.is_frozen()) {
+          Rice::detail::no_gvl([&] {
+            self.search(n, objects.read_ptr(), k, distances.write_ptr(), labels.write_ptr());
+          });
+        } else {
+          self.search(n, objects.read_ptr(), k, distances.write_ptr(), labels.write_ptr());
+        }
         Rice::Array ret;
         ret.push(std::move(distances), false);

data/ext/faiss/kmeans.cpp CHANGED Viewed

@@ -32,11 +32,11 @@ void init_kmeans(Rice::Module& m) {
       })
     .define_method(
       "train",
-      [](Rice::Object self, numo::SFloat objects) {
-        auto self_ptr = Rice::detail::From_Ruby<faiss::Clustering*>().convert(self.value());
-        auto n = check_shape(objects, self_ptr->d);
-        auto index = faiss::IndexFlatL2(self_ptr->d);
-        self.iv_set("@index", Rice::Object(Rice::detail::To_Ruby<faiss::IndexFlatL2>().convert(index)));
-        self_ptr->train(n, objects.read_ptr(), index);
+      [](Rice::Object rb_self, numo::SFloat objects) {
+        auto &self = *Rice::Data_Object<faiss::Clustering>{rb_self};
+        auto n = check_shape(objects, self.d);
+        auto index = faiss::IndexFlatL2(self.d);
+        rb_self.iv_set("@index", index);
+        self.train(n, objects.read_ptr(), index);
       });
 }

data/lib/faiss/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module Faiss
-  VERSION = "0.4.3"
+  VERSION = "0.5.1"
 end

data/vendor/faiss/faiss/AutoTune.cpp CHANGED Viewed

@@ -20,15 +20,14 @@
 #include <faiss/utils/utils.h>
 #include <faiss/IndexHNSW.h>
+#include <faiss/IndexIDMap.h>
 #include <faiss/IndexIVF.h>
-#include <faiss/IndexIVFFlat.h>
 #include <faiss/IndexIVFPQ.h>
 #include <faiss/IndexIVFPQR.h>
 #include <faiss/IndexPQ.h>
 #include <faiss/IndexPreTransform.h>
 #include <faiss/IndexRefine.h>
 #include <faiss/IndexShardsIVF.h>
-#include <faiss/MetaIndexes.h>
 namespace faiss {
@@ -126,7 +125,7 @@ bool OperatingPoints::add(
         }
     } else {
         int i;
-        // stricto sensu this should be a bissection
+        // stricto sensu this should be a bisection
         for (i = 0; i < a.size(); i++) {
             if (a[i].perf >= perf) {
                 break;

data/vendor/faiss/faiss/AutoTune.h CHANGED Viewed

@@ -32,7 +32,7 @@ struct AutoTuneCriterion {
     AutoTuneCriterion(idx_t nq, idx_t nnn);
-    /** Intitializes the gt_D and gt_I vectors. Must be called before evaluating
+    /** Initializes the gt_D and gt_I vectors. Must be called before evaluating
      *
      * @param gt_D_in  size nq * gt_nnn
      * @param gt_I_in  size nq * gt_nnn

data/vendor/faiss/faiss/Clustering.cpp CHANGED Viewed

@@ -212,7 +212,7 @@ void compute_centroids(
  * It works by slightly changing the centroids to make 2 clusters from
  * a single one. Takes the same arguments as compute_centroids.
  *
- * @return           nb of spliting operations (larger is worse)
+ * @return           nb of splitting operations (larger is worse)
  */
 int split_clusters(
         size_t d,
@@ -242,7 +242,7 @@ int split_clusters(
                    centroids + cj * d,
                    sizeof(*centroids) * d);
-            /* small symmetric pertubation */
+            /* small symmetric perturbation */
             for (size_t j = 0; j < d; j++) {
                 if (j % 2 == 0) {
                     centroids[ci * d + j] *= 1 + EPS;

data/vendor/faiss/faiss/Clustering.h CHANGED Viewed

@@ -73,7 +73,7 @@ struct ClusteringIterationStats {
  * points to the centroids. Therefore, at each iteration the centroids
  * are added to the index.
  *
- * On output, the centoids table is set to the latest version
+ * On output, the centroids table is set to the latest version
  * of the centroids and they are also added to the index. If the
  * centroids table it is not empty on input, it is also used for
  * initialization.
@@ -109,7 +109,7 @@ struct Clustering : ClusteringParameters {
     /** run with encoded vectors
      *
-     * win addition to train()'s parameters takes a codec as parameter
+     * in addition to train()'s parameters takes a codec as parameter
      * to decode the input vectors.
      *
      * @param codec      codec used to decode the vectors (nullptr =

data/vendor/faiss/faiss/IVFlib.cpp CHANGED Viewed

@@ -9,7 +9,6 @@
 #include <omp.h>
 #include <memory>
-#include <numeric>
 #include <faiss/IndexAdditiveQuantizer.h>
 #include <faiss/IndexIVFAdditiveQuantizer.h>
@@ -58,7 +57,7 @@ void check_compatible_for_merge(const Index* index0, const Index* index1) {
         ivf0->check_compatible_for_merge(*ivf1);
     }
-    // TODO: check as thoroughfully for other index types
+    // TODO: check as thoroughly for other index types
 }
 const IndexIVF* try_extract_index_ivf(const Index* index) {
@@ -559,19 +558,6 @@ void handle_ivf(
         const std::string& filename_template,
         ShardingFunction* sharding_function,
         bool generate_ids) {
-    std::vector<faiss::IndexIVF*> sharded_indexes(shard_count);
-    auto clone = static_cast<faiss::IndexIVF*>(faiss::clone_index(index));
-    clone->quantizer->reset();
-    for (int64_t i = 0; i < shard_count; i++) {
-        sharded_indexes[i] =
-                static_cast<faiss::IndexIVF*>(faiss::clone_index(clone));
-        if (generate_ids) {
-            // Assume the quantizer does not natively support add_with_ids.
-            sharded_indexes[i]->quantizer =
-                    new IndexIDMap2(sharded_indexes[i]->quantizer);
-        }
-    }
     // assign centroids to each sharded Index based on sharding_function, and
     // add them to the quantizer of each sharded index
     std::vector<std::vector<float>> sharded_centroids(shard_count);
@@ -589,27 +575,29 @@ void handle_ivf(
                 &reconstructed[index->quantizer->d]);
         delete[] reconstructed;
     }
+    auto clone = static_cast<faiss::IndexIVF*>(faiss::clone_index(index));
+    clone->quantizer->reset();
     for (int64_t i = 0; i < shard_count; i++) {
+        auto sharded_index =
+                static_cast<faiss::IndexIVF*>(faiss::clone_index(clone));
         if (generate_ids) {
-            sharded_indexes[i]->quantizer->add_with_ids(
+            // Assume the quantizer does not natively support add_with_ids.
+            sharded_index->quantizer =
+                    new IndexIDMap2(sharded_index->quantizer);
+            sharded_index->quantizer->add_with_ids(
                     sharded_centroids[i].size() / index->quantizer->d,
                     sharded_centroids[i].data(),
                     xids[i].data());
         } else {
-            sharded_indexes[i]->quantizer->add(
+            sharded_index->quantizer->add(
                     sharded_centroids[i].size() / index->quantizer->d,
                     sharded_centroids[i].data());
         }
-    }
-    for (int64_t i = 0; i < shard_count; i++) {
         char fname[256];
         snprintf(fname, 256, filename_template.c_str(), i);
-        faiss::write_index(sharded_indexes[i], fname);
-    }
-    for (int64_t i = 0; i < shard_count; i++) {
-        delete sharded_indexes[i];
+        faiss::write_index(sharded_index, fname);
+        delete sharded_index;
     }
 }
@@ -619,22 +607,6 @@ void handle_binary_ivf(
         const std::string& filename_template,
         ShardingFunction* sharding_function,
         bool generate_ids) {
-    std::vector<faiss::IndexBinaryIVF*> sharded_indexes(shard_count);
-    auto clone = static_cast<faiss::IndexBinaryIVF*>(
-            faiss::clone_binary_index(index));
-    clone->quantizer->reset();
-    for (int64_t i = 0; i < shard_count; i++) {
-        sharded_indexes[i] = static_cast<faiss::IndexBinaryIVF*>(
-                faiss::clone_binary_index(clone));
-        if (generate_ids) {
-            // Assume the quantizer does not natively support add_with_ids.
-            sharded_indexes[i]->quantizer =
-                    new IndexBinaryIDMap2(sharded_indexes[i]->quantizer);
-        }
-    }
     // assign centroids to each sharded Index based on sharding_function, and
     // add them to the quantizer of each sharded index
     int64_t reconstruction_size = index->quantizer->d / 8;
@@ -653,27 +625,30 @@ void handle_binary_ivf(
                 &reconstructed[reconstruction_size]);
         delete[] reconstructed;
     }
+    auto clone = static_cast<faiss::IndexBinaryIVF*>(
+            faiss::clone_binary_index(index));
+    clone->quantizer->reset();
     for (int64_t i = 0; i < shard_count; i++) {
+        auto sharded_index = static_cast<faiss::IndexBinaryIVF*>(
+                faiss::clone_binary_index(clone));
         if (generate_ids) {
-            sharded_indexes[i]->quantizer->add_with_ids(
+            // Assume the quantizer does not natively support add_with_ids.
+            sharded_index->quantizer =
+                    new IndexBinaryIDMap2(sharded_index->quantizer);
+            sharded_index->quantizer->add_with_ids(
                     sharded_centroids[i].size() / reconstruction_size,
                     sharded_centroids[i].data(),
                     xids[i].data());
         } else {
-            sharded_indexes[i]->quantizer->add(
+            sharded_index->quantizer->add(
                     sharded_centroids[i].size() / reconstruction_size,
                     sharded_centroids[i].data());
         }
-    }
-    for (int64_t i = 0; i < shard_count; i++) {
         char fname[256];
         snprintf(fname, 256, filename_template.c_str(), i);
-        faiss::write_index_binary(sharded_indexes[i], fname);
-    }
-    for (int64_t i = 0; i < shard_count; i++) {
-        delete sharded_indexes[i];
+        faiss::write_index_binary(sharded_index, fname);
+        delete sharded_index;
     }
 }

data/vendor/faiss/faiss/IVFlib.h CHANGED Viewed

@@ -100,7 +100,7 @@ struct SlidingIndexWindow {
     std::vector<std::vector<size_t>> sizes;
     /// index should be initially empty and trained
-    SlidingIndexWindow(Index* index);
+    explicit SlidingIndexWindow(Index* index);
     /** Add one index to the current index and remove the oldest one.
      *

data/vendor/faiss/faiss/Index.cpp CHANGED Viewed

@@ -104,6 +104,17 @@ void Index::search_and_reconstruct(
     }
 }
+void Index::search_subset(
+        idx_t n,
+        const float* x,
+        idx_t k_base,
+        const idx_t* base_labels,
+        idx_t k,
+        float* distances,
+        idx_t* labels) const {
+    FAISS_THROW_MSG("search_subset not implemented for this type of index");
+}
 void Index::compute_residual(const float* x, float* residual, idx_t key) const {
     reconstruct(key, residual);
     for (size_t i = 0; i < d; i++) {

data/vendor/faiss/faiss/Index.h CHANGED Viewed

@@ -17,8 +17,8 @@
 #include <sstream>
 #define FAISS_VERSION_MAJOR 1
-#define FAISS_VERSION_MINOR 12
-#define FAISS_VERSION_PATCH 0
+#define FAISS_VERSION_MINOR 13
+#define FAISS_VERSION_PATCH 2
 // Macro to combine the version components into a single string
 #ifndef FAISS_STRINGIFY
@@ -78,7 +78,7 @@ inline size_t get_numeric_type_size(NumericType numeric_type) {
     }
 }
-/** Parent class for the optional search paramenters.
+/** Parent class for the optional search parameters.
  *
  * Sub-classes with additional search parameters should inherit this class.
  * Ownership of the object fields is always to the caller.
@@ -125,11 +125,11 @@ struct Index {
     /** Perform training on a representative set of vectors
      *
      * @param n      nb of training vectors
-     * @param x      training vecors, size n * d
+     * @param x      training vectors, size n * d
      */
     virtual void train(idx_t n, const float* x);
-    virtual void trainEx(idx_t n, const void* x, NumericType numeric_type) {
+    virtual void train_ex(idx_t n, const void* x, NumericType numeric_type) {
         if (numeric_type == NumericType::Float32) {
             train(n, static_cast<const float*>(x));
         } else {
@@ -147,7 +147,7 @@ struct Index {
      */
     virtual void add(idx_t n, const float* x) = 0;
-    virtual void addEx(idx_t n, const void* x, NumericType numeric_type) {
+    virtual void add_ex(idx_t n, const void* x, NumericType numeric_type) {
         if (numeric_type == NumericType::Float32) {
             add(n, static_cast<const float*>(x));
         } else {
@@ -165,7 +165,7 @@ struct Index {
      * @param xids      if non-null, ids to store for the vectors (size n)
      */
     virtual void add_with_ids(idx_t n, const float* x, const idx_t* xids);
-    virtual void add_with_idsEx(
+    virtual void add_with_ids_ex(
             idx_t n,
             const void* x,
             NumericType numeric_type,
@@ -196,7 +196,7 @@ struct Index {
             idx_t* labels,
             const SearchParameters* params = nullptr) const = 0;
-    virtual void searchEx(
+    virtual void search_ex(
             idx_t n,
             const void* x,
             NumericType numeric_type,
@@ -258,7 +258,7 @@ struct Index {
      *
      * this function may not be defined for some indexes
      * @param key         id of the vector to reconstruct
-     * @param recons      reconstucted vector (size d)
+     * @param recons      reconstructed vector (size d)
      */
     virtual void reconstruct(idx_t key, float* recons) const;
@@ -268,7 +268,7 @@ struct Index {
      * this function may not be defined for some indexes
      * @param n           number of vectors to reconstruct
      * @param keys        ids of the vectors to reconstruct (size n)
-     * @param recons      reconstucted vector (size n * d)
+     * @param recons      reconstructed vector (size n * d)
      */
     virtual void reconstruct_batch(idx_t n, const idx_t* keys, float* recons)
             const;
@@ -278,7 +278,7 @@ struct Index {
      * this function may not be defined for some indexes
      * @param i0          index of the first vector in the sequence
      * @param ni          number of vectors in the sequence
-     * @param recons      reconstucted vector (size ni * d)
+     * @param recons      reconstructed vector (size ni * d)
      */
     virtual void reconstruct_n(idx_t i0, idx_t ni, float* recons) const;
@@ -304,6 +304,29 @@ struct Index {
             float* recons,
             const SearchParameters* params = nullptr) const;
+    /** Similar to search, but operates on a potentially different subset
+     * of the dataset for each query.
+     *
+     * The default implementation fails with an assertion, as it is
+     * not supported by all indexes.
+     *
+     * @param n           number of vectors
+     * @param x           input vectors, size n * d
+     * @param k_base      number of vectors to search from
+     * @param base_labels ids of the vectors to search from
+     * @param k           desired number of results per query
+     * @param distances   output pairwise distances, size n*k
+     * @param labels      output labels of the NNs, size n*k
+     */
+    virtual void search_subset(
+            idx_t n,
+            const float* x,
+            idx_t k_base,
+            const idx_t* base_labels,
+            idx_t k,
+            float* distances,
+            idx_t* labels) const;
     /** Computes a residual vector after indexing encoding.
      *
      * The residual vector is the difference between a vector and the

data/vendor/faiss/faiss/Index2Layer.cpp CHANGED Viewed

@@ -82,7 +82,7 @@ void Index2Layer::train(idx_t n, const float* x) {
     std::unique_ptr<const float[]> del_x(x_in == x ? nullptr : x);
-    std::vector<idx_t> assign(n); // assignement to coarse centroids
+    std::vector<idx_t> assign(n); // assignment to coarse centroids
     q1.quantizer->assign(n, x, assign.data());
     std::vector<float> residuals(n * d);
     for (idx_t i = 0; i < n; i++) {

data/vendor/faiss/faiss/Index2Layer.h CHANGED Viewed

@@ -23,7 +23,7 @@ struct IndexIVFPQ;
 /** Same as an IndexIVFPQ without the inverted lists: codes are stored
  * sequentially
  *
- * The class is mainly inteded to store encoded vectors that can be
+ * The class is mainly intended to store encoded vectors that can be
  * accessed randomly, the search function is not implemented.
  */
 struct Index2Layer : IndexFlatCodes {
@@ -47,7 +47,7 @@ struct Index2Layer : IndexFlatCodes {
             MetricType metric = METRIC_L2);
     Index2Layer();
-    ~Index2Layer();
+    ~Index2Layer() override;
     void train(idx_t n, const float* x) override;

data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp CHANGED Viewed

@@ -86,6 +86,7 @@ struct AQDistanceComputerLUT : FlatCodesDistanceComputer {
     float bias;
     void set_query(const float* x) final {
+        q = x;
         // this is quite sub-optimal for multiple queries
         aq.compute_LUT(1, x, LUT.data());
         if (is_IP) {

data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp CHANGED Viewed

@@ -11,6 +11,7 @@
 #include <memory>
 #include <faiss/impl/FaissAssert.h>
+#include <faiss/impl/FastScanDistancePostProcessing.h>
 #include <faiss/impl/LocalSearchQuantizer.h>
 #include <faiss/impl/LookupTableScaler.h>
 #include <faiss/impl/ResidualQuantizer.h>
@@ -123,7 +124,8 @@ void IndexAdditiveQuantizerFastScan::estimate_norm_scale(
     }
     std::vector<float> dis_tables(n * M * ksub);
-    compute_float_LUT(dis_tables.data(), n, x);
+    FastScanDistancePostProcessing empty_context;
+    compute_float_LUT(dis_tables.data(), n, x, empty_context);
     // here we compute the mean of scales for each query
     // TODO: try max of scales
@@ -153,7 +155,8 @@ void IndexAdditiveQuantizerFastScan::compute_codes(
 void IndexAdditiveQuantizerFastScan::compute_float_LUT(
         float* lut,
         idx_t n,
-        const float* x) const {
+        const float* x,
+        const FastScanDistancePostProcessing&) const {
     if (metric_type == METRIC_INNER_PRODUCT) {
         aq->compute_LUT(n, x, lut, 1.0f);
     } else {
@@ -200,10 +203,12 @@ void IndexAdditiveQuantizerFastScan::search(
     }
     NormTableScaler scaler(norm_scale);
+    FastScanDistancePostProcessing context;
+    context.norm_scaler = &scaler;
     if (metric_type == METRIC_L2) {
-        search_dispatch_implem<true>(n, x, k, distances, labels, &scaler);
+        search_dispatch_implem<true>(n, x, k, distances, labels, context);
     } else {
-        search_dispatch_implem<false>(n, x, k, distances, labels, &scaler);
+        search_dispatch_implem<false>(n, x, k, distances, labels, context);
     }
 }

data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.h CHANGED Viewed

@@ -62,7 +62,11 @@ struct IndexAdditiveQuantizerFastScan : IndexFastScan {
     void compute_codes(uint8_t* codes, idx_t n, const float* x) const override;
-    void compute_float_LUT(float* lut, idx_t n, const float* x) const override;
+    void compute_float_LUT(
+            float* lut,
+            idx_t n,
+            const float* x,
+            const FastScanDistancePostProcessing& context) const override;
     void search(
             idx_t n,