RubyGems - faiss - Versions diffs - 0.2.4 → 0.2.5 - Mend

faiss 0.2.4 → 0.2.5

Files changed (177) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +5 -0
data/README.md +23 -21
data/ext/faiss/extconf.rb +11 -0
data/ext/faiss/index.cpp +4 -4
data/ext/faiss/index_binary.cpp +6 -6
data/ext/faiss/product_quantizer.cpp +4 -4
data/lib/faiss/version.rb +1 -1
data/vendor/faiss/faiss/AutoTune.cpp +13 -0
data/vendor/faiss/faiss/IVFlib.cpp +101 -2
data/vendor/faiss/faiss/IVFlib.h +26 -2
data/vendor/faiss/faiss/Index.cpp +36 -3
data/vendor/faiss/faiss/Index.h +43 -6
data/vendor/faiss/faiss/Index2Layer.cpp +6 -2
data/vendor/faiss/faiss/Index2Layer.h +6 -1
data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +219 -16
data/vendor/faiss/faiss/IndexAdditiveQuantizer.h +63 -5
data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +299 -0
data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.h +199 -0
data/vendor/faiss/faiss/IndexBinary.cpp +20 -4
data/vendor/faiss/faiss/IndexBinary.h +18 -3
data/vendor/faiss/faiss/IndexBinaryFlat.cpp +9 -2
data/vendor/faiss/faiss/IndexBinaryFlat.h +4 -2
data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +4 -1
data/vendor/faiss/faiss/IndexBinaryFromFloat.h +2 -1
data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +5 -1
data/vendor/faiss/faiss/IndexBinaryHNSW.h +2 -1
data/vendor/faiss/faiss/IndexBinaryHash.cpp +17 -4
data/vendor/faiss/faiss/IndexBinaryHash.h +8 -4
data/vendor/faiss/faiss/IndexBinaryIVF.cpp +28 -13
data/vendor/faiss/faiss/IndexBinaryIVF.h +10 -7
data/vendor/faiss/faiss/IndexFastScan.cpp +626 -0
data/vendor/faiss/faiss/IndexFastScan.h +145 -0
data/vendor/faiss/faiss/IndexFlat.cpp +34 -21
data/vendor/faiss/faiss/IndexFlat.h +7 -4
data/vendor/faiss/faiss/IndexFlatCodes.cpp +35 -1
data/vendor/faiss/faiss/IndexFlatCodes.h +12 -0
data/vendor/faiss/faiss/IndexHNSW.cpp +66 -138
data/vendor/faiss/faiss/IndexHNSW.h +4 -2
data/vendor/faiss/faiss/IndexIDMap.cpp +247 -0
data/vendor/faiss/faiss/IndexIDMap.h +107 -0
data/vendor/faiss/faiss/IndexIVF.cpp +121 -33
data/vendor/faiss/faiss/IndexIVF.h +35 -16
data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +84 -7
data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +63 -1
data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +590 -0
data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +171 -0
data/vendor/faiss/faiss/IndexIVFFastScan.cpp +1290 -0
data/vendor/faiss/faiss/IndexIVFFastScan.h +213 -0
data/vendor/faiss/faiss/IndexIVFFlat.cpp +37 -17
data/vendor/faiss/faiss/IndexIVFFlat.h +4 -2
data/vendor/faiss/faiss/IndexIVFPQ.cpp +234 -50
data/vendor/faiss/faiss/IndexIVFPQ.h +5 -1
data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +23 -852
data/vendor/faiss/faiss/IndexIVFPQFastScan.h +7 -112
data/vendor/faiss/faiss/IndexIVFPQR.cpp +3 -3
data/vendor/faiss/faiss/IndexIVFPQR.h +1 -1
data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +3 -1
data/vendor/faiss/faiss/IndexIVFSpectralHash.h +2 -1
data/vendor/faiss/faiss/IndexLSH.cpp +4 -2
data/vendor/faiss/faiss/IndexLSH.h +2 -1
data/vendor/faiss/faiss/IndexLattice.cpp +7 -1
data/vendor/faiss/faiss/IndexLattice.h +3 -1
data/vendor/faiss/faiss/IndexNNDescent.cpp +4 -3
data/vendor/faiss/faiss/IndexNNDescent.h +2 -1
data/vendor/faiss/faiss/IndexNSG.cpp +37 -3
data/vendor/faiss/faiss/IndexNSG.h +25 -1
data/vendor/faiss/faiss/IndexPQ.cpp +106 -69
data/vendor/faiss/faiss/IndexPQ.h +19 -5
data/vendor/faiss/faiss/IndexPQFastScan.cpp +15 -450
data/vendor/faiss/faiss/IndexPQFastScan.h +15 -78
data/vendor/faiss/faiss/IndexPreTransform.cpp +47 -8
data/vendor/faiss/faiss/IndexPreTransform.h +15 -3
data/vendor/faiss/faiss/IndexRefine.cpp +8 -4
data/vendor/faiss/faiss/IndexRefine.h +4 -2
data/vendor/faiss/faiss/IndexReplicas.cpp +4 -2
data/vendor/faiss/faiss/IndexReplicas.h +2 -1
data/vendor/faiss/faiss/IndexRowwiseMinMax.cpp +438 -0
data/vendor/faiss/faiss/IndexRowwiseMinMax.h +92 -0
data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +26 -15
data/vendor/faiss/faiss/IndexScalarQuantizer.h +6 -7
data/vendor/faiss/faiss/IndexShards.cpp +4 -1
data/vendor/faiss/faiss/IndexShards.h +2 -1
data/vendor/faiss/faiss/MetaIndexes.cpp +5 -178
data/vendor/faiss/faiss/MetaIndexes.h +3 -81
data/vendor/faiss/faiss/VectorTransform.cpp +43 -0
data/vendor/faiss/faiss/VectorTransform.h +22 -4
data/vendor/faiss/faiss/clone_index.cpp +23 -1
data/vendor/faiss/faiss/clone_index.h +3 -0
data/vendor/faiss/faiss/cppcontrib/SaDecodeKernels.h +300 -0
data/vendor/faiss/faiss/cppcontrib/detail/CoarseBitType.h +24 -0
data/vendor/faiss/faiss/cppcontrib/detail/UintReader.h +195 -0
data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-avx2-inl.h +2058 -0
data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +408 -0
data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-neon-inl.h +2147 -0
data/vendor/faiss/faiss/cppcontrib/sa_decode/MinMax-inl.h +460 -0
data/vendor/faiss/faiss/cppcontrib/sa_decode/MinMaxFP16-inl.h +465 -0
data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-avx2-inl.h +1618 -0
data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-inl.h +251 -0
data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-neon-inl.h +1452 -0
data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +1 -0
data/vendor/faiss/faiss/gpu/GpuCloner.cpp +0 -4
data/vendor/faiss/faiss/gpu/GpuIndex.h +28 -4
data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +2 -1
data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +10 -8
data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +75 -14
data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +19 -32
data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +22 -31
data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +22 -28
data/vendor/faiss/faiss/gpu/GpuResources.cpp +14 -0
data/vendor/faiss/faiss/gpu/GpuResources.h +16 -3
data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +3 -3
data/vendor/faiss/faiss/gpu/impl/IndexUtils.h +32 -0
data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +1 -0
data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +311 -75
data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +10 -0
data/vendor/faiss/faiss/gpu/test/TestUtils.h +3 -0
data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +2 -2
data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +5 -4
data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +116 -47
data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +44 -13
data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +0 -54
data/vendor/faiss/faiss/impl/AuxIndexStructures.h +0 -76
data/vendor/faiss/faiss/impl/DistanceComputer.h +64 -0
data/vendor/faiss/faiss/impl/HNSW.cpp +123 -27
data/vendor/faiss/faiss/impl/HNSW.h +19 -16
data/vendor/faiss/faiss/impl/IDSelector.cpp +125 -0
data/vendor/faiss/faiss/impl/IDSelector.h +135 -0
data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +6 -28
data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +6 -1
data/vendor/faiss/faiss/impl/LookupTableScaler.h +77 -0
data/vendor/faiss/faiss/impl/NNDescent.cpp +1 -0
data/vendor/faiss/faiss/impl/NSG.cpp +1 -1
data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.cpp +383 -0
data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.h +154 -0
data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +225 -145
data/vendor/faiss/faiss/impl/ProductQuantizer.h +29 -10
data/vendor/faiss/faiss/impl/Quantizer.h +43 -0
data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +192 -36
data/vendor/faiss/faiss/impl/ResidualQuantizer.h +40 -20
data/vendor/faiss/faiss/impl/ResultHandler.h +96 -0
data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +97 -173
data/vendor/faiss/faiss/impl/ScalarQuantizer.h +18 -18
data/vendor/faiss/faiss/impl/index_read.cpp +240 -9
data/vendor/faiss/faiss/impl/index_write.cpp +237 -5
data/vendor/faiss/faiss/impl/kmeans1d.cpp +6 -4
data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +56 -16
data/vendor/faiss/faiss/impl/pq4_fast_scan.h +25 -8
data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +66 -25
data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +75 -27
data/vendor/faiss/faiss/index_factory.cpp +196 -7
data/vendor/faiss/faiss/index_io.h +5 -0
data/vendor/faiss/faiss/invlists/DirectMap.cpp +1 -0
data/vendor/faiss/faiss/invlists/InvertedLists.cpp +4 -1
data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +2 -1
data/vendor/faiss/faiss/python/python_callbacks.cpp +27 -0
data/vendor/faiss/faiss/python/python_callbacks.h +15 -0
data/vendor/faiss/faiss/utils/Heap.h +31 -15
data/vendor/faiss/faiss/utils/distances.cpp +380 -56
data/vendor/faiss/faiss/utils/distances.h +113 -15
data/vendor/faiss/faiss/utils/distances_simd.cpp +726 -6
data/vendor/faiss/faiss/utils/extra_distances.cpp +12 -7
data/vendor/faiss/faiss/utils/extra_distances.h +3 -1
data/vendor/faiss/faiss/utils/fp16-fp16c.h +21 -0
data/vendor/faiss/faiss/utils/fp16-inl.h +101 -0
data/vendor/faiss/faiss/utils/fp16.h +11 -0
data/vendor/faiss/faiss/utils/hamming-inl.h +54 -0
data/vendor/faiss/faiss/utils/hamming.cpp +0 -48
data/vendor/faiss/faiss/utils/ordered_key_value.h +10 -0
data/vendor/faiss/faiss/utils/quantize_lut.cpp +62 -0
data/vendor/faiss/faiss/utils/quantize_lut.h +20 -0
data/vendor/faiss/faiss/utils/random.cpp +53 -0
data/vendor/faiss/faiss/utils/random.h +5 -0
data/vendor/faiss/faiss/utils/simdlib_avx2.h +4 -0
data/vendor/faiss/faiss/utils/simdlib_emulated.h +6 -1
data/vendor/faiss/faiss/utils/simdlib_neon.h +7 -2
metadata +37 -3

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 15428eb8dd7d27f8a94e3a7797dd765827e5454def33fa785055adb7ef0d20c5
-  data.tar.gz: 3e8eafebc49163c928bcab8d0ebd0f7b69e6659e49f36d7aaeb44e8651853ac9
+  metadata.gz: bdc2effbe6e2d827ffe473531be0864cf710ae6f3ad34f8324087695c367d140
+  data.tar.gz: 6e5b80b1f4281766b17d208af58b44e768576d259de1c6edb25630c700215c10
 SHA512:
-  metadata.gz: 598f6e626d5c970e408cff68ec479bf1aa2d6ee18adeeeb2489d1c4fbf627dacbc6e398ce149f0483720080a439df4d2887e5b6c9dc9f465e8ffa1bbeede84a8
-  data.tar.gz: fefebfbbbbceb58ac6c6b02636630943d30db7af4ee92521586b4fb40e73bf1aa2b401e28e4c27872bf9344f60e3c9fb288ec6420abc1ffccd05ffd3ec7379fd
+  metadata.gz: 404d064f14734a23d946ad0fc0576673e5e685d1001884981453e3bec23ecf6ba7b75857e578b47ef5303fde27342dfb25e85bbb7a60883d6ba09a964049bbfa
+  data.tar.gz: 536f403109d3773a3cd0ca27b4b46bfa26eaeedd8d5e2f4e6fa116e3b922d21f8a964a2d6aa1cbbff951da9444e9184bf7c60955263ed68c91ba91342f3f9196

data/CHANGELOG.md CHANGED Viewed

@@ -1,3 +1,8 @@
+## 0.2.5 (2022-12-27)
+- Updated Faiss to 1.7.3
+- Fixed installation errors on Mac
 ## 0.2.4 (2022-01-10)
 - Updated Faiss to 1.7.2

data/README.md CHANGED Viewed

@@ -8,23 +8,25 @@ Learn more about [Faiss](https://engineering.fb.com/data-infrastructure/faiss-a-
 ## Installation
-First, install BLAS, LAPACK, and OpenMP:
+First, ensure BLAS, LAPACK, and OpenMP are installed. For Mac, use:
 ```sh
-# Mac
-brew install openblas lapack libomp
+brew install libomp
+```
+For Ubuntu, use:
-# Ubuntu
-sudo apt install libblas-dev liblapack-dev
+```sh
+sudo apt-get install libblas-dev liblapack-dev
 ```
-Add this line to your application’s Gemfile:
+Then add this line to your application’s Gemfile:
 ```ruby
-gem 'faiss'
+gem "faiss"
 ```
-It can take a few minutes to compile the gem. Faiss is not available for Windows.
+It can take a few minutes to compile the gem. Windows is not currently supported.
 ## Getting Started
@@ -69,61 +71,61 @@ index = Faiss::Index.load("index.bin")
 Exact search for L2
-```rb
+```ruby
 Faiss::IndexFlatL2.new(d)
 ```
 Exact search for inner product
-```rb
+```ruby
 Faiss::IndexFlatIP.new(d)
 ```
 Hierarchical navigable small world graph exploration
-```rb
+```ruby
 Faiss::IndexHNSWFlat.new(d, m)
 ```
 Inverted file with exact post-verification
-```rb
+```ruby
 Faiss::IndexIVFFlat.new(quantizer, d, nlists)
 ```
 Locality-sensitive hashing
-```rb
+```ruby
 Faiss::IndexLSH.new(d, nbits)
 ```
 Scalar quantizer (SQ) in flat mode
-```rb
+```ruby
 Faiss::IndexScalarQuantizer.new(d, qtype)
 ```
 Product quantizer (PQ) in flat mode
-```rb
+```ruby
 Faiss::IndexPQ.new(d, m, nbits)
 ```
 IVF and scalar quantizer
-```rb
+```ruby
 Faiss::IndexIVFScalarQuantizer.new(quantizer, d, nlists, qtype)
 ```
 IVFADC (coarse quantizer+PQ on residuals)
-```rb
+```ruby
 Faiss::IndexIVFPQ.new(quantizer, d, nlists, m, nbits)
 ```
 IVFADC+R (same as IVFADC with re-ranking based on codes)
-```rb
+```ruby
 Faiss::IndexIVFPQR.new(quantizer, d, nlists, m, nbits, m_refine, nbits_refine)
 ```
@@ -131,13 +133,13 @@ Faiss::IndexIVFPQR.new(quantizer, d, nlists, m, nbits, m_refine, nbits_refine)
 Index binary vectors
-```rb
+```ruby
 Faiss::IndexBinaryFlat.new(d)
 ```
 Speed up search with an inverse vector file
-```rb
+```ruby
 Faiss::IndexBinaryIVF.new(quantizer, d, nlists)
 ```
@@ -220,7 +222,7 @@ Numo::NArray.cast([[1, 2, 3], [4, 5, 6]])
 ## History
-View the [changelog](https://github.com/ankane/faiss-ruby/blob/master/CHANGELOG.md)
+View the [changelog](CHANGELOG.md)
 ## Contributing

data/ext/faiss/extconf.rb CHANGED Viewed

@@ -1,6 +1,14 @@
 require "mkmf-rice"
 require "numo/narray"
+# libomp changed to keg-only
+# https://github.com/Homebrew/homebrew-core/issues/112107
+if RbConfig::CONFIG["host_os"] =~ /darwin/i
+  brew_prefix = RbConfig::CONFIG["host_cpu"] =~ /arm|aarch64/i ? "/opt/homebrew" : "/usr/local"
+  find_library("omp", nil, "#{brew_prefix}/opt/libomp/lib")
+  find_header("omp.h", "#{brew_prefix}/opt/libomp/include")
+end
 abort "BLAS not found" unless have_library("blas")
 abort "LAPACK not found" unless have_library("lapack")
 abort "OpenMP not found" unless have_library("omp") || have_library("gomp")
@@ -8,6 +16,9 @@ abort "OpenMP not found" unless have_library("omp") || have_library("gomp")
 numo = File.join(Gem.loaded_specs["numo-narray"].require_path, "numo")
 abort "Numo not found" unless find_header("numo/narray.h", numo)
+# for https://bugs.ruby-lang.org/issues/19005
+$LDFLAGS += " -Wl,-undefined,dynamic_lookup" if RbConfig::CONFIG["host_os"] =~ /darwin/i
 # -march=native not supported with ARM Mac
 default_optflags = RbConfig::CONFIG["host_os"] =~ /darwin/i && RbConfig::CONFIG["host_cpu"] =~ /arm|aarch64/i ? "" : "-march=native"
 $CXXFLAGS << " -std=c++17 $(optflags) -DFINTEGER=int " << with_config("optflags", default_optflags)

data/ext/faiss/index.cpp CHANGED Viewed

@@ -140,13 +140,13 @@ void init_index(Rice::Module& m) {
       })
     .define_method(
       "save",
-      [](faiss::Index &self, const char *fname) {
-        faiss::write_index(&self, fname);
+      [](faiss::Index &self, Rice::String fname) {
+        faiss::write_index(&self, fname.c_str());
       })
     .define_singleton_function(
       "load",
-      [](const char *fname) {
-        return faiss::read_index(fname);
+      [](Rice::String fname) {
+        return faiss::read_index(fname.c_str());
       });
   Rice::define_class_under<faiss::IndexFlatL2, faiss::Index>(m, "IndexFlatL2")

data/ext/faiss/index_binary.cpp CHANGED Viewed

@@ -52,13 +52,13 @@ void init_index_binary(Rice::Module& m) {
       })
     .define_method(
       "save",
-      [](faiss::IndexBinary &self, const char *fname) {
-        faiss::write_index_binary(&self, fname);
+      [](faiss::IndexBinary &self, Rice::String fname) {
+        faiss::write_index_binary(&self, fname.c_str());
       })
     .define_singleton_function(
       "load",
-      [](const char *fname) {
-        return faiss::read_index_binary(fname);
+      [](Rice::String fname) {
+        return faiss::read_index_binary(fname.c_str());
       });
   Rice::define_class_under<faiss::IndexBinaryFlat, faiss::IndexBinary>(m, "IndexBinaryFlat")
@@ -69,7 +69,7 @@ void init_index_binary(Rice::Module& m) {
   m.define_singleton_function(
     "index_binary_factory",
-    [](int d, const char *description) {
-      return faiss::index_binary_factory(d, description);
+    [](int d, Rice::String description) {
+      return faiss::index_binary_factory(d, description.c_str());
     });
 }

data/ext/faiss/product_quantizer.cpp CHANGED Viewed

@@ -42,12 +42,12 @@ void init_product_quantizer(Rice::Module& m) {
       })
     .define_method(
       "save",
-      [](faiss::ProductQuantizer &self, const char *fname) {
-        faiss::write_ProductQuantizer(&self, fname);
+      [](faiss::ProductQuantizer &self, Rice::String fname) {
+        faiss::write_ProductQuantizer(&self, fname.c_str());
       })
     .define_singleton_function(
       "load",
-      [](const char *fname) {
-        return faiss::read_ProductQuantizer(fname);
+      [](Rice::String fname) {
+        return faiss::read_ProductQuantizer(fname.c_str());
       });
 }

data/lib/faiss/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module Faiss
-  VERSION = "0.2.4"
+  VERSION = "0.2.5"
 end

data/vendor/faiss/faiss/AutoTune.cpp CHANGED Viewed

@@ -523,6 +523,19 @@ void ParameterSpace::set_index_parameter(
         }
     }
+    if (name == "efConstruction") {
+        if (DC(IndexHNSW)) {
+            ix->hnsw.efConstruction = int(val);
+            return;
+        }
+        if (DC(IndexIVF)) {
+            if (IndexHNSW* cq = dynamic_cast<IndexHNSW*>(ix->quantizer)) {
+                cq->hnsw.efConstruction = int(val);
+                return;
+            }
+        }
+    }
     if (name == "efSearch") {
         if (DC(IndexHNSW)) {
             ix->hnsw.efSearch = int(val);

data/vendor/faiss/faiss/IVFlib.cpp CHANGED Viewed

@@ -5,15 +5,18 @@
  * LICENSE file in the root directory of this source tree.
  */
-// -*- c++ -*-
 #include <faiss/IVFlib.h>
+#include <omp.h>
 #include <memory>
+#include <faiss/IndexAdditiveQuantizer.h>
+#include <faiss/IndexIVFAdditiveQuantizer.h>
 #include <faiss/IndexPreTransform.h>
 #include <faiss/MetaIndexes.h>
 #include <faiss/impl/FaissAssert.h>
+#include <faiss/utils/distances.h>
+#include <faiss/utils/hamming.h>
 #include <faiss/utils/utils.h>
 namespace faiss {
@@ -349,6 +352,7 @@ void search_with_parameters(
     index_ivf->search_preassigned(
             n, x, k, Iq.data(), Dq.data(), distances, labels, false, params);
     double t3 = getmillisecs();
     if (ms_per_stage) {
         ms_per_stage[0] = t1 - t0;
         ms_per_stage[1] = t2 - t1;
@@ -406,5 +410,100 @@ void range_search_with_parameters(
     }
 }
+IndexIVFResidualQuantizer* ivf_residual_from_quantizer(
+        const ResidualQuantizer& rq,
+        int nlevel) {
+    FAISS_THROW_IF_NOT(nlevel > 0 && nlevel + 1 < rq.M);
+    std::vector<size_t> nbits(nlevel);
+    std::copy(rq.nbits.begin(), rq.nbits.begin() + nlevel, nbits.begin());
+    std::unique_ptr<ResidualCoarseQuantizer> rcq(
+            new ResidualCoarseQuantizer(rq.d, nbits));
+    // set the coarse quantizer from the 2 first quantizers
+    rcq->rq.initialize_from(rq);
+    rcq->is_trained = true;
+    rcq->ntotal = (idx_t)1 << rcq->rq.tot_bits;
+    // settings for exhaustive search in RCQ
+    rcq->centroid_norms.resize(rcq->ntotal);
+    rcq->aq->compute_centroid_norms(rcq->centroid_norms.data());
+    rcq->beam_factor = -1.0; // use exact search
+    size_t nlist = rcq->ntotal;
+    // build a IVFResidualQuantizer from that
+    std::vector<size_t> nbits_refined;
+    for (int i = nlevel; i < rq.M; i++) {
+        nbits_refined.push_back(rq.nbits[i]);
+    }
+    std::unique_ptr<IndexIVFResidualQuantizer> index(
+            new IndexIVFResidualQuantizer(
+                    rcq.get(),
+                    rq.d,
+                    nlist,
+                    nbits_refined,
+                    faiss::METRIC_L2,
+                    rq.search_type));
+    index->own_fields = true;
+    rcq.release();
+    index->by_residual = true;
+    index->rq.initialize_from(rq, nlevel);
+    index->is_trained = true;
+    return index.release();
+}
+void ivf_residual_add_from_flat_codes(
+        IndexIVFResidualQuantizer* index,
+        size_t nb,
+        const uint8_t* raw_codes,
+        int64_t code_size) {
+    const ResidualCoarseQuantizer* rcq =
+            dynamic_cast<const faiss::ResidualCoarseQuantizer*>(
+                    index->quantizer);
+    FAISS_THROW_IF_NOT_MSG(rcq, "the coarse quantizer must be a RCQ");
+    if (code_size < 0) {
+        code_size = index->code_size;
+    }
+    InvertedLists& invlists = *index->invlists;
+    const ResidualQuantizer& rq = index->rq;
+    // populate inverted lists
+#pragma omp parallel if (nb > 10000)
+    {
+        std::vector<uint8_t> tmp_code(index->code_size);
+        std::vector<float> tmp(rq.d);
+        int nt = omp_get_num_threads();
+        int rank = omp_get_thread_num();
+#pragma omp for
+        for (idx_t i = 0; i < nb; i++) {
+            const uint8_t* code = &raw_codes[i * code_size];
+            BitstringReader rd(code, code_size);
+            idx_t list_no = rd.read(rcq->rq.tot_bits);
+            if (list_no % nt ==
+                rank) { // each thread takes care of 1/nt of the invlists
+                // copy AQ indexes one by one
+                BitstringWriter wr(tmp_code.data(), tmp_code.size());
+                for (int j = 0; j < rq.M; j++) {
+                    int nbit = rq.nbits[j];
+                    wr.write(rd.read(nbit), nbit);
+                }
+                // we need to recompute the norm
+                // decode first, does not use the norm component, so that's
+                // ok
+                index->rq.decode(tmp_code.data(), tmp.data(), 1);
+                float norm = fvec_norm_L2sqr(tmp.data(), rq.d);
+                wr.write(rq.encode_norm(norm), rq.norm_bits);
+                // add code to the inverted list
+                invlists.add_entry(list_no, i, tmp_code.data());
+            }
+        }
+    }
+    index->ntotal += nb;
+}
 } // namespace ivflib
 } // namespace faiss

data/vendor/faiss/faiss/IVFlib.h CHANGED Viewed

@@ -5,8 +5,6 @@
  * LICENSE file in the root directory of this source tree.
  */
-// -*- c++ -*-
 #ifndef FAISS_IVFLIB_H
 #define FAISS_IVFLIB_H
@@ -20,6 +18,11 @@
 #include <vector>
 namespace faiss {
+struct IndexIVFResidualQuantizer;
+struct IndexResidualQuantizer;
+struct ResidualQuantizer;
 namespace ivflib {
 /** check if two indexes have the same parameters and are trained in
@@ -145,6 +148,27 @@ void range_search_with_parameters(
         size_t* nb_dis = nullptr,
         double* ms_per_stage = nullptr);
+/** Build an IndexIVFResidualQuantizer from an ResidualQuantizer, using the
+ * nlevel first components as coarse quantizer and the rest as codes in invlists
+ */
+IndexIVFResidualQuantizer* ivf_residual_from_quantizer(
+        const ResidualQuantizer&,
+        int nlevel);
+/** add from codes. NB that the norm component is not used, so the code_size can
+ * be provided.
+ *
+ * @param ivfrq      index to populate with the codes
+ * @param codes      codes to add, size (ncode, code_size)
+ * @param code_size  override the ivfrq's code_size, useful if the norm encoding
+ *                   is different
+ */
+void ivf_residual_add_from_flat_codes(
+        IndexIVFResidualQuantizer* ivfrq,
+        size_t ncode,
+        const uint8_t* codes,
+        int64_t code_size = -1);
 } // namespace ivflib
 } // namespace faiss

data/vendor/faiss/faiss/Index.cpp CHANGED Viewed

@@ -10,6 +10,7 @@
 #include <faiss/Index.h>
 #include <faiss/impl/AuxIndexStructures.h>
+#include <faiss/impl/DistanceComputer.h>
 #include <faiss/impl/FaissAssert.h>
 #include <faiss/utils/distances.h>
@@ -23,7 +24,12 @@ void Index::train(idx_t /*n*/, const float* /*x*/) {
     // does nothing by default
 }
-void Index::range_search(idx_t, const float*, float, RangeSearchResult*) const {
+void Index::range_search(
+        idx_t,
+        const float*,
+        float,
+        RangeSearchResult*,
+        const SearchParameters* params) const {
     FAISS_THROW_MSG("range search not implemented");
 }
@@ -48,7 +54,25 @@ void Index::reconstruct(idx_t, float*) const {
     FAISS_THROW_MSG("reconstruct not implemented for this type of index");
 }
+void Index::reconstruct_batch(idx_t n, const idx_t* keys, float* recons) const {
+    std::mutex exception_mutex;
+    std::string exception_string;
+#pragma omp parallel for if (n > 1000)
+    for (idx_t i = 0; i < n; i++) {
+        try {
+            reconstruct(keys[i], &recons[i * d]);
+        } catch (const std::exception& e) {
+            std::lock_guard<std::mutex> lock(exception_mutex);
+            exception_string = e.what();
+        }
+    }
+    if (!exception_string.empty()) {
+        FAISS_THROW_MSG(exception_string.c_str());
+    }
+}
 void Index::reconstruct_n(idx_t i0, idx_t ni, float* recons) const {
+#pragma omp parallel for if (ni > 1000)
     for (idx_t i = 0; i < ni; i++) {
         reconstruct(i0 + i, recons + i * d);
     }
@@ -60,10 +84,11 @@ void Index::search_and_reconstruct(
         idx_t k,
         float* distances,
         idx_t* labels,
-        float* recons) const {
+        float* recons,
+        const SearchParameters* params) const {
     FAISS_THROW_IF_NOT(k > 0);
-    search(n, x, k, distances, labels);
+    search(n, x, k, distances, labels, params);
     for (idx_t i = 0; i < n; ++i) {
         for (idx_t j = 0; j < k; ++j) {
             idx_t ij = i * k + j;
@@ -149,4 +174,12 @@ DistanceComputer* Index::get_distance_computer() const {
     }
 }
+void Index::merge_from(Index& /* otherIndex */, idx_t /* add_id */) {
+    FAISS_THROW_MSG("merge_from() not implemented");
+}
+void Index::check_compatible_for_merge(const Index& /* otherIndex */) const {
+    FAISS_THROW_MSG("check_compatible_for_merge() not implemented");
+}
 } // namespace faiss

data/vendor/faiss/faiss/Index.h CHANGED Viewed

@@ -18,7 +18,7 @@
 #define FAISS_VERSION_MAJOR 1
 #define FAISS_VERSION_MINOR 7
-#define FAISS_VERSION_PATCH 2
+#define FAISS_VERSION_PATCH 3
 /**
  * @namespace faiss
@@ -38,11 +38,24 @@
 namespace faiss {
-/// Forward declarations see AuxIndexStructures.h
+/// Forward declarations see impl/AuxIndexStructures.h, impl/IDSelector.h and
+/// impl/DistanceComputer.h
 struct IDSelector;
 struct RangeSearchResult;
 struct DistanceComputer;
+/** Parent class for the optional search paramenters.
+ *
+ * Sub-classes with additional search parameters should inherit this class.
+ * Ownership of the object fields is always to the caller.
+ */
+struct SearchParameters {
+    /// if non-null, only these IDs will be considered during search.
+    IDSelector* sel = nullptr;
+    /// make sure we can dynamic_cast this
+    virtual ~SearchParameters() {}
+};
 /** Abstract structure for an index, supports adding vectors and searching them.
  *
  * All vectors provided at add or search time are 32-bit float arrays,
@@ -114,7 +127,8 @@ struct Index {
             const float* x,
             idx_t k,
             float* distances,
-            idx_t* labels) const = 0;
+            idx_t* labels,
+            const SearchParameters* params = nullptr) const = 0;
     /** query n vectors of dimension d to the index.
      *
@@ -130,7 +144,8 @@ struct Index {
             idx_t n,
             const float* x,
             float radius,
-            RangeSearchResult* result) const;
+            RangeSearchResult* result,
+            const SearchParameters* params = nullptr) const;
     /** return the indexes of the k vectors closest to the query x.
      *
@@ -157,6 +172,16 @@ struct Index {
      */
     virtual void reconstruct(idx_t key, float* recons) const;
+    /** Reconstruct several stored vectors (or an approximation if lossy coding)
+     *
+     * this function may not be defined for some indexes
+     * @param n        number of vectors to reconstruct
+     * @param keys        ids of the vectors to reconstruct (size n)
+     * @param recons      reconstucted vector (size n * d)
+     */
+    virtual void reconstruct_batch(idx_t n, const idx_t* keys, float* recons)
+            const;
     /** Reconstruct vectors i0 to i0 + ni - 1
      *
      * this function may not be defined for some indexes
@@ -178,7 +203,8 @@ struct Index {
             idx_t k,
             float* distances,
             idx_t* labels,
-            float* recons) const;
+            float* recons,
+            const SearchParameters* params = nullptr) const;
     /** Computes a residual vector after indexing encoding.
      *
@@ -234,13 +260,24 @@ struct Index {
      */
     virtual void sa_encode(idx_t n, const float* x, uint8_t* bytes) const;
-    /** encode a set of vectors
+    /** decode a set of vectors
      *
      * @param n       number of vectors
      * @param bytes   input encoded vectors, size n * sa_code_size()
      * @param x       output vectors, size n * d
      */
     virtual void sa_decode(idx_t n, const uint8_t* bytes, float* x) const;
+    /** moves the entries from another dataset to self.
+     * On output, other is empty.
+     * add_id is added to all moved ids
+     * (for sequential ids, this would be this->ntotal) */
+    virtual void merge_from(Index& otherIndex, idx_t add_id = 0);
+    /** check that the two indexes are compatible (ie, they are
+     * trained in the same way and have the same
+     * parameters). Otherwise throw. */
+    virtual void check_compatible_for_merge(const Index& otherIndex) const;
 };
 } // namespace faiss

data/vendor/faiss/faiss/Index2Layer.cpp CHANGED Viewed

@@ -111,7 +111,8 @@ void Index2Layer::search(
         const float* /*x*/,
         idx_t /*k*/,
         float* /*distances*/,
-        idx_t* /*labels*/) const {
+        idx_t* /*labels*/,
+        const SearchParameters* /* params */) const {
     FAISS_THROW_MSG("not implemented");
 }
@@ -282,10 +283,13 @@ DistanceComputer* Index2Layer::get_distance_computer() const {
 /* The standalone codec interface */
+// block size used in Index2Layer::sa_encode
+int index2layer_sa_encode_bs = 32768;
 void Index2Layer::sa_encode(idx_t n, const float* x, uint8_t* bytes) const {
     FAISS_THROW_IF_NOT(is_trained);
-    idx_t bs = 32768;
+    idx_t bs = index2layer_sa_encode_bs;
     if (n > bs) {
         for (idx_t i0 = 0; i0 < n; i0 += bs) {
             idx_t i1 = std::min(i0 + bs, n);

data/vendor/faiss/faiss/Index2Layer.h CHANGED Viewed

@@ -14,6 +14,7 @@
 #include <faiss/IndexFlatCodes.h>
 #include <faiss/IndexIVF.h>
 #include <faiss/IndexPQ.h>
+#include <faiss/impl/platform_macros.h>
 namespace faiss {
@@ -56,7 +57,8 @@ struct Index2Layer : IndexFlatCodes {
             const float* x,
             idx_t k,
             float* distances,
-            idx_t* labels) const override;
+            idx_t* labels,
+            const SearchParameters* params = nullptr) const override;
     DistanceComputer* get_distance_computer() const override;
@@ -68,4 +70,7 @@ struct Index2Layer : IndexFlatCodes {
     void sa_decode(idx_t n, const uint8_t* bytes, float* x) const override;
 };
+// block size used in Index2Layer::sa_encode
+FAISS_API extern int index2layer_sa_encode_bs;
 } // namespace faiss