RubyGems - faiss - Versions diffs - 0.1.3 → 0.1.4 - Mend

faiss 0.1.3 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (184) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +4 -0
data/LICENSE.txt +1 -1
data/README.md +1 -1
data/ext/faiss/extconf.rb +1 -1
data/lib/faiss/version.rb +1 -1
data/vendor/faiss/faiss/AutoTune.cpp +36 -33
data/vendor/faiss/faiss/AutoTune.h +6 -3
data/vendor/faiss/faiss/Clustering.cpp +16 -12
data/vendor/faiss/faiss/Index.cpp +3 -4
data/vendor/faiss/faiss/Index.h +3 -3
data/vendor/faiss/faiss/IndexBinary.cpp +3 -4
data/vendor/faiss/faiss/IndexBinary.h +1 -1
data/vendor/faiss/faiss/IndexBinaryHash.cpp +2 -12
data/vendor/faiss/faiss/IndexBinaryIVF.cpp +1 -2
data/vendor/faiss/faiss/IndexFlat.cpp +0 -148
data/vendor/faiss/faiss/IndexFlat.h +0 -51
data/vendor/faiss/faiss/IndexHNSW.cpp +4 -5
data/vendor/faiss/faiss/IndexIVF.cpp +118 -31
data/vendor/faiss/faiss/IndexIVF.h +22 -15
data/vendor/faiss/faiss/IndexIVFFlat.cpp +3 -3
data/vendor/faiss/faiss/IndexIVFFlat.h +2 -1
data/vendor/faiss/faiss/IndexIVFPQ.cpp +39 -15
data/vendor/faiss/faiss/IndexIVFPQ.h +25 -9
data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +1116 -0
data/vendor/faiss/faiss/IndexIVFPQFastScan.h +166 -0
data/vendor/faiss/faiss/IndexIVFPQR.cpp +8 -9
data/vendor/faiss/faiss/IndexIVFPQR.h +2 -1
data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +1 -2
data/vendor/faiss/faiss/IndexPQ.cpp +34 -18
data/vendor/faiss/faiss/IndexPQFastScan.cpp +536 -0
data/vendor/faiss/faiss/IndexPQFastScan.h +111 -0
data/vendor/faiss/faiss/IndexPreTransform.cpp +47 -0
data/vendor/faiss/faiss/IndexPreTransform.h +2 -0
data/vendor/faiss/faiss/IndexRefine.cpp +256 -0
data/vendor/faiss/faiss/IndexRefine.h +73 -0
data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +2 -2
data/vendor/faiss/faiss/IndexScalarQuantizer.h +1 -1
data/vendor/faiss/faiss/gpu/GpuDistance.h +1 -1
data/vendor/faiss/faiss/gpu/GpuIndex.h +16 -9
data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +8 -1
data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +11 -11
data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +19 -2
data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +28 -2
data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +24 -14
data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +29 -2
data/vendor/faiss/faiss/gpu/GpuResources.h +4 -0
data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +60 -27
data/vendor/faiss/faiss/gpu/StandardGpuResources.h +28 -6
data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +547 -0
data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +51 -0
data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +3 -3
data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +3 -2
data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +274 -0
data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +7 -2
data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +5 -1
data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +231 -0
data/vendor/faiss/faiss/gpu/test/TestUtils.h +33 -0
data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +1 -0
data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +6 -0
data/vendor/faiss/faiss/gpu/utils/Timer.cpp +5 -6
data/vendor/faiss/faiss/gpu/utils/Timer.h +2 -2
data/vendor/faiss/faiss/impl/AuxIndexStructures.h +5 -4
data/vendor/faiss/faiss/impl/HNSW.cpp +2 -4
data/vendor/faiss/faiss/impl/PolysemousTraining.h +4 -4
data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +22 -12
data/vendor/faiss/faiss/impl/ProductQuantizer.h +2 -0
data/vendor/faiss/faiss/impl/ResultHandler.h +452 -0
data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +29 -19
data/vendor/faiss/faiss/impl/ScalarQuantizer.h +6 -0
data/vendor/faiss/faiss/impl/index_read.cpp +64 -96
data/vendor/faiss/faiss/impl/index_write.cpp +34 -25
data/vendor/faiss/faiss/impl/io.cpp +33 -2
data/vendor/faiss/faiss/impl/io.h +7 -2
data/vendor/faiss/faiss/impl/lattice_Zn.cpp +1 -15
data/vendor/faiss/faiss/impl/platform_macros.h +44 -0
data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +272 -0
data/vendor/faiss/faiss/impl/pq4_fast_scan.h +169 -0
data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +180 -0
data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +354 -0
data/vendor/faiss/faiss/impl/simd_result_handlers.h +559 -0
data/vendor/faiss/faiss/index_factory.cpp +112 -7
data/vendor/faiss/faiss/index_io.h +1 -48
data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +151 -0
data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +76 -0
data/vendor/faiss/faiss/{DirectMap.cpp → invlists/DirectMap.cpp} +1 -1
data/vendor/faiss/faiss/{DirectMap.h → invlists/DirectMap.h} +1 -1
data/vendor/faiss/faiss/{InvertedLists.cpp → invlists/InvertedLists.cpp} +72 -1
data/vendor/faiss/faiss/{InvertedLists.h → invlists/InvertedLists.h} +32 -1
data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +107 -0
data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +63 -0
data/vendor/faiss/faiss/{OnDiskInvertedLists.cpp → invlists/OnDiskInvertedLists.cpp} +21 -6
data/vendor/faiss/faiss/{OnDiskInvertedLists.h → invlists/OnDiskInvertedLists.h} +5 -2
data/vendor/faiss/faiss/python/python_callbacks.h +8 -1
data/vendor/faiss/faiss/utils/AlignedTable.h +141 -0
data/vendor/faiss/faiss/utils/Heap.cpp +2 -4
data/vendor/faiss/faiss/utils/Heap.h +61 -50
data/vendor/faiss/faiss/utils/distances.cpp +164 -319
data/vendor/faiss/faiss/utils/distances.h +28 -20
data/vendor/faiss/faiss/utils/distances_simd.cpp +277 -49
data/vendor/faiss/faiss/utils/extra_distances.cpp +1 -2
data/vendor/faiss/faiss/utils/hamming-inl.h +4 -4
data/vendor/faiss/faiss/utils/hamming.cpp +3 -6
data/vendor/faiss/faiss/utils/hamming.h +2 -7
data/vendor/faiss/faiss/utils/ordered_key_value.h +98 -0
data/vendor/faiss/faiss/utils/partitioning.cpp +1256 -0
data/vendor/faiss/faiss/utils/partitioning.h +69 -0
data/vendor/faiss/faiss/utils/quantize_lut.cpp +277 -0
data/vendor/faiss/faiss/utils/quantize_lut.h +80 -0
data/vendor/faiss/faiss/utils/simdlib.h +31 -0
data/vendor/faiss/faiss/utils/simdlib_avx2.h +461 -0
data/vendor/faiss/faiss/utils/simdlib_emulated.h +589 -0
metadata +43 -141
data/vendor/faiss/benchs/bench_6bit_codec.cpp +0 -80
data/vendor/faiss/c_api/AutoTune_c.cpp +0 -83
data/vendor/faiss/c_api/AutoTune_c.h +0 -66
data/vendor/faiss/c_api/Clustering_c.cpp +0 -145
data/vendor/faiss/c_api/Clustering_c.h +0 -123
data/vendor/faiss/c_api/IndexFlat_c.cpp +0 -140
data/vendor/faiss/c_api/IndexFlat_c.h +0 -115
data/vendor/faiss/c_api/IndexIVFFlat_c.cpp +0 -64
data/vendor/faiss/c_api/IndexIVFFlat_c.h +0 -58
data/vendor/faiss/c_api/IndexIVF_c.cpp +0 -99
data/vendor/faiss/c_api/IndexIVF_c.h +0 -142
data/vendor/faiss/c_api/IndexLSH_c.cpp +0 -37
data/vendor/faiss/c_api/IndexLSH_c.h +0 -40
data/vendor/faiss/c_api/IndexPreTransform_c.cpp +0 -21
data/vendor/faiss/c_api/IndexPreTransform_c.h +0 -32
data/vendor/faiss/c_api/IndexShards_c.cpp +0 -38
data/vendor/faiss/c_api/IndexShards_c.h +0 -39
data/vendor/faiss/c_api/Index_c.cpp +0 -105
data/vendor/faiss/c_api/Index_c.h +0 -183
data/vendor/faiss/c_api/MetaIndexes_c.cpp +0 -49
data/vendor/faiss/c_api/MetaIndexes_c.h +0 -49
data/vendor/faiss/c_api/clone_index_c.cpp +0 -23
data/vendor/faiss/c_api/clone_index_c.h +0 -32
data/vendor/faiss/c_api/error_c.h +0 -42
data/vendor/faiss/c_api/error_impl.cpp +0 -27
data/vendor/faiss/c_api/error_impl.h +0 -16
data/vendor/faiss/c_api/faiss_c.h +0 -58
data/vendor/faiss/c_api/gpu/GpuAutoTune_c.cpp +0 -98
data/vendor/faiss/c_api/gpu/GpuAutoTune_c.h +0 -56
data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.cpp +0 -52
data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.h +0 -68
data/vendor/faiss/c_api/gpu/GpuIndex_c.cpp +0 -17
data/vendor/faiss/c_api/gpu/GpuIndex_c.h +0 -30
data/vendor/faiss/c_api/gpu/GpuIndicesOptions_c.h +0 -38
data/vendor/faiss/c_api/gpu/GpuResources_c.cpp +0 -86
data/vendor/faiss/c_api/gpu/GpuResources_c.h +0 -66
data/vendor/faiss/c_api/gpu/StandardGpuResources_c.cpp +0 -54
data/vendor/faiss/c_api/gpu/StandardGpuResources_c.h +0 -53
data/vendor/faiss/c_api/gpu/macros_impl.h +0 -42
data/vendor/faiss/c_api/impl/AuxIndexStructures_c.cpp +0 -220
data/vendor/faiss/c_api/impl/AuxIndexStructures_c.h +0 -149
data/vendor/faiss/c_api/index_factory_c.cpp +0 -26
data/vendor/faiss/c_api/index_factory_c.h +0 -30
data/vendor/faiss/c_api/index_io_c.cpp +0 -42
data/vendor/faiss/c_api/index_io_c.h +0 -50
data/vendor/faiss/c_api/macros_impl.h +0 -110
data/vendor/faiss/demos/demo_imi_flat.cpp +0 -154
data/vendor/faiss/demos/demo_imi_pq.cpp +0 -203
data/vendor/faiss/demos/demo_ivfpq_indexing.cpp +0 -151
data/vendor/faiss/demos/demo_sift1M.cpp +0 -252
data/vendor/faiss/demos/demo_weighted_kmeans.cpp +0 -185
data/vendor/faiss/misc/test_blas.cpp +0 -87
data/vendor/faiss/tests/test_binary_flat.cpp +0 -62
data/vendor/faiss/tests/test_dealloc_invlists.cpp +0 -188
data/vendor/faiss/tests/test_ivfpq_codec.cpp +0 -70
data/vendor/faiss/tests/test_ivfpq_indexing.cpp +0 -100
data/vendor/faiss/tests/test_lowlevel_ivf.cpp +0 -573
data/vendor/faiss/tests/test_merge.cpp +0 -260
data/vendor/faiss/tests/test_omp_threads.cpp +0 -14
data/vendor/faiss/tests/test_ondisk_ivf.cpp +0 -225
data/vendor/faiss/tests/test_pairs_decoding.cpp +0 -193
data/vendor/faiss/tests/test_params_override.cpp +0 -236
data/vendor/faiss/tests/test_pq_encoding.cpp +0 -98
data/vendor/faiss/tests/test_sliding_ivf.cpp +0 -246
data/vendor/faiss/tests/test_threaded_index.cpp +0 -253
data/vendor/faiss/tests/test_transfer_invlists.cpp +0 -159
data/vendor/faiss/tutorial/cpp/1-Flat.cpp +0 -104
data/vendor/faiss/tutorial/cpp/2-IVFFlat.cpp +0 -85
data/vendor/faiss/tutorial/cpp/3-IVFPQ.cpp +0 -98
data/vendor/faiss/tutorial/cpp/4-GPU.cpp +0 -122
data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +0 -104

data/vendor/faiss/faiss/gpu/test/TestUtils.h CHANGED Viewed

@@ -10,7 +10,10 @@
 #include <faiss/impl/FaissAssert.h>
 #include <faiss/Index.h>
+#include <faiss/invlists/InvertedLists.h>
 #include <initializer_list>
+#include <gtest/gtest.h>
+#include <cstring>
 #include <memory>
 #include <string>
 #include <vector>
@@ -90,4 +93,34 @@ void compareLists(const float* refDist,
                   float pctMaxDiff1 = 0.1f,
                   float pctMaxDiffN = 0.005f);
+/// Compare IVF lists between a CPU and GPU index
+template <typename A, typename B>
+void testIVFEquality(A& cpuIndex, B& gpuIndex) {
+  // Ensure equality of the inverted lists
+  EXPECT_EQ(cpuIndex.nlist, gpuIndex.nlist);
+  for (int i = 0; i < cpuIndex.nlist; ++i) {
+    auto cpuLists = cpuIndex.invlists;
+    // Code equality
+    EXPECT_EQ(cpuLists->list_size(i), gpuIndex.getListLength(i));
+    std::vector<uint8_t> cpuCodes(cpuLists->list_size(i) * cpuLists->code_size);
+    auto sc = faiss::InvertedLists::ScopedCodes(cpuLists, i);
+    std::memcpy(cpuCodes.data(), sc.get(),
+                cpuLists->list_size(i) * cpuLists->code_size);
+    auto gpuCodes = gpuIndex.getListVectorData(i, false);
+    EXPECT_EQ(cpuCodes, gpuCodes);
+    // Index equality
+    std::vector<Index::idx_t> cpuIndices(cpuLists->list_size(i));
+    auto si = faiss::InvertedLists::ScopedIds(cpuLists, i);
+    std::memcpy(cpuIndices.data(), si.get(),
+                cpuLists->list_size(i) * sizeof(faiss::Index::idx_t));
+    EXPECT_EQ(cpuIndices, gpuIndex.getListIndices(i));
+  }
+}
 } }

data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp CHANGED Viewed

@@ -10,6 +10,7 @@
 #include <faiss/gpu/utils/DeviceUtils.h>
 #include <faiss/gpu/utils/StaticUtils.h>
 #include <faiss/impl/FaissAssert.h>
+#include <algorithm>
 #include <sstream>
 namespace faiss { namespace gpu {

data/vendor/faiss/faiss/gpu/utils/StaticUtils.h CHANGED Viewed

@@ -10,6 +10,12 @@
 #include <cuda.h>
+// allow usage for non-CUDA files
+#ifndef __host__
+#define __host__
+#define __device__
+#endif
 namespace faiss { namespace gpu { namespace utils {
 template <typename U, typename V>

data/vendor/faiss/faiss/gpu/utils/Timer.cpp CHANGED Viewed

@@ -9,6 +9,7 @@
 #include <faiss/gpu/utils/Timer.h>
 #include <faiss/gpu/utils/DeviceUtils.h>
 #include <faiss/impl/FaissAssert.h>
+#include <chrono>
 namespace faiss { namespace gpu {
@@ -43,18 +44,16 @@ KernelTimer::elapsedMilliseconds() {
 }
 CpuTimer::CpuTimer() {
-  clock_gettime(CLOCK_REALTIME, &start_);
+  start_ = std::chrono::steady_clock::now();
 }
 float
 CpuTimer::elapsedMilliseconds() {
-  struct timespec end;
-  clock_gettime(CLOCK_REALTIME, &end);
+  auto end = std::chrono::steady_clock::now();
-  auto diffS = end.tv_sec - start_.tv_sec;
-  auto diffNs = end.tv_nsec - start_.tv_nsec;
+  std::chrono::duration<float, std::milli> duration = end - start_;
-  return 1000.0f * (float) diffS + ((float) diffNs) / 1000000.0f;
+  return duration.count();
 }
 } } // namespace

data/vendor/faiss/faiss/gpu/utils/Timer.h CHANGED Viewed

@@ -9,7 +9,7 @@
 #pragma once
 #include <cuda_runtime.h>
-#include <time.h>
+#include <chrono>
 namespace faiss { namespace gpu {
@@ -46,7 +46,7 @@ class CpuTimer {
   float elapsedMilliseconds();
  private:
-  struct timespec start_;
+  std::chrono::time_point<std::chrono::steady_clock> start_;
 };
 } } // namespace

data/vendor/faiss/faiss/impl/AuxIndexStructures.h CHANGED Viewed

@@ -199,12 +199,13 @@ struct RangeSearchPartialResult: BufferList {
  *
  * The DistanceComputer is not intended to be thread-safe (eg. because
  * it maintains counters) so the distance functions are not const,
- * instanciate one from each thread if needed.
+ * instantiate one from each thread if needed.
  ***********************************************************/
 struct DistanceComputer {
      using idx_t = Index::idx_t;
-     /// called before computing distances
+     /// called before computing distances. Pointer x should remain valid
+     /// while operator () is called
      virtual void set_query(const float *x) = 0;
      /// compute distance of vector i to current query
@@ -233,9 +234,9 @@ struct FAISS_API InterruptCallback {
     /** check if:
      * - an interrupt callback is set
-     * - the callback retuns true
+     * - the callback returns true
      * if this is the case, then throw an exception. Should not be called
-     * from multiple threds.
+     * from multiple threads.
      */
     static void check ();

data/vendor/faiss/faiss/impl/HNSW.cpp CHANGED Viewed

@@ -539,8 +539,7 @@ int HNSW::search_from_candidates(
     if (nres < k) {
       faiss::maxheap_push(++nres, D, I, d, v1);
     } else if (d < D[0]) {
-      faiss::maxheap_pop(nres--, D, I);
-      faiss::maxheap_push(++nres, D, I, d, v1);
+      faiss::maxheap_replace_top(nres, D, I, d, v1);
     }
     vt.set(v1);
   }
@@ -578,8 +577,7 @@ int HNSW::search_from_candidates(
       if (nres < k) {
         faiss::maxheap_push(++nres, D, I, d, v1);
       } else if (d < D[0]) {
-        faiss::maxheap_pop(nres--, D, I);
-        faiss::maxheap_push(++nres, D, I, d, v1);
+        faiss::maxheap_replace_top(nres, D, I, d, v1);
       }
       candidates.push(v1, d);
     }

data/vendor/faiss/faiss/impl/PolysemousTraining.h CHANGED Viewed

@@ -21,14 +21,14 @@ namespace faiss {
 struct SimulatedAnnealingParameters {
     // optimization parameters
-    double init_temperature;   // init probaility of accepting a bad swap
+    double init_temperature;   // init probability of accepting a bad swap
     double temperature_decay;  // at each iteration the temp is multiplied by this
     int n_iter; // nb of iterations
     int n_redo; // nb of runs of the simulation
     int seed;   // random seed
     int verbose;
     bool only_bit_flips; // restrict permutation changes to bit flips
-    bool init_random; // intialize with a random permutation (not identity)
+    bool init_random; // initialize with a random permutation (not identity)
     // set reasonable defaults
     SimulatedAnnealingParameters ();
@@ -57,7 +57,7 @@ struct ReproduceDistancesObjective : PermutationObjective {
     static double sqr (double x) { return x * x; }
-    // weihgting of distances: it is more important to reproduce small
+    // weighting of distances: it is more important to reproduce small
     // distances well
     double dis_weight (double x) const;
@@ -139,7 +139,7 @@ struct PolysemousTraining: SimulatedAnnealingParameters {
     // sets default values
     PolysemousTraining ();
-    /// reorder the centroids so that the Hamming distace becomes a
+    /// reorder the centroids so that the Hamming distance becomes a
     /// good approximation of the SDC distance (called by train)
     void optimize_pq_for_hamming (ProductQuantizer & pq,
                                   size_t n, const float *x) const;

data/vendor/faiss/faiss/impl/ProductQuantizer.cpp CHANGED Viewed

@@ -63,8 +63,7 @@ void pq_estimators_from_tables_Mmul4 (int M, const CT * codes,
         }
         if (C::cmp (heap_dis[0], dis)) {
-            heap_pop<C> (k, heap_dis, heap_ids);
-            heap_push<C> (k, heap_dis, heap_ids, dis, j);
+            heap_replace_top<C> (k, heap_dis, heap_ids, dis, j);
         }
     }
 }
@@ -89,8 +88,7 @@ void pq_estimators_from_tables_M4 (const CT * codes,
         dis += dt[*codes++];
         if (C::cmp (heap_dis[0], dis)) {
-            heap_pop<C> (k, heap_dis, heap_ids);
-            heap_push<C> (k, heap_dis, heap_ids, dis, j);
+            heap_replace_top<C> (k, heap_dis, heap_ids, dis, j);
         }
     }
 }
@@ -132,8 +130,7 @@ static inline void pq_estimators_from_tables (const ProductQuantizer& pq,
             dt += ksub;
         }
         if (C::cmp (heap_dis[0], dis)) {
-            heap_pop<C> (k, heap_dis, heap_ids);
-            heap_push<C> (k, heap_dis, heap_ids, dis, j);
+            heap_replace_top<C> (k, heap_dis, heap_ids, dis, j);
         }
     }
 }
@@ -163,8 +160,7 @@ static inline void pq_estimators_from_tables_generic(const ProductQuantizer& pq,
     }
     if (C::cmp(heap_dis[0], dis)) {
-      heap_pop<C>(k, heap_dis, heap_ids);
-      heap_push<C>(k, heap_dis, heap_ids, dis, j);
+      heap_replace_top<C>(k, heap_dis, heap_ids, dis, j);
     }
   }
 }
@@ -186,7 +182,7 @@ ProductQuantizer::ProductQuantizer ()
 void ProductQuantizer::set_derived_values () {
     // quite a few derived values
-    FAISS_THROW_IF_NOT (d % M == 0);
+    FAISS_THROW_IF_NOT_MSG (d % M == 0, "The dimension of the vector (d) should be a multiple of the number of subquantizers (M)");
     dsub = d / M;
     code_size = (nbits * M + 7) / 8;
     ksub = 1 << nbits;
@@ -549,6 +545,14 @@ void ProductQuantizer::compute_distance_tables (
            float * dis_tables) const
 {
+#ifdef __AVX2__
+    if (dsub == 2 && nbits < 8) { // interesting for a narrow range of settings
+        compute_PQ_dis_tables_dsub2(
+            d, ksub, centroids.data(),
+            nx, x, false, dis_tables
+        );
+    } else
+#endif
     if (dsub < 16) {
 #pragma omp parallel for
@@ -573,7 +577,14 @@ void ProductQuantizer::compute_inner_prod_tables (
            const float * x,
            float * dis_tables) const
 {
+#ifdef __AVX2__
+    if (dsub == 2 && nbits < 8) {
+        compute_PQ_dis_tables_dsub2(
+            d, ksub, centroids.data(),
+            nx, x, true, dis_tables
+        );
+    } else
+#endif
     if (dsub < 16) {
 #pragma omp parallel for
@@ -747,8 +758,7 @@ void ProductQuantizer::search_sdc (const uint8_t * qcodes,
                 tab += ksub * ksub;
             }
             if (dis < heap_dis[0]) {
-                maxheap_pop (k, heap_dis, heap_ids);
-                maxheap_push (k, heap_dis, heap_ids, dis, j);
+                maxheap_replace_top (k, heap_dis, heap_ids, dis, j);
             }
             bcode += code_size;
         }

data/vendor/faiss/faiss/impl/ProductQuantizer.h CHANGED Viewed

@@ -219,12 +219,14 @@ struct PQDecoderGeneric {
 };
 struct PQDecoder8 {
+    static const int nbits = 8;
     const uint8_t *code;
     PQDecoder8(const uint8_t *code, int nbits);
     uint64_t decode();
 };
 struct PQDecoder16 {
+    static const int nbits = 16;
     const uint16_t *code;
     PQDecoder16(const uint8_t *code, int nbits);
     uint64_t decode();

data/vendor/faiss/faiss/impl/ResultHandler.h ADDED Viewed

@@ -0,0 +1,452 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+/*
+ * Structures that collect search results from distance computations
+ */
+#pragma once
+#include <faiss/utils/Heap.h>
+#include <faiss/utils/partitioning.h>
+#include <faiss/impl/AuxIndexStructures.h>
+namespace faiss {
+/*****************************************************************
+ * Heap based result handler
+ *****************************************************************/
+template<class C>
+struct HeapResultHandler {
+    using T = typename C::T;
+    using TI = typename C::TI;
+    int nq;
+    T *heap_dis_tab;
+    TI *heap_ids_tab;
+    int64_t k;  // number of results to keep
+    HeapResultHandler(
+        size_t nq,
+        T * heap_dis_tab, TI * heap_ids_tab,
+        size_t k):
+        nq(nq),
+        heap_dis_tab(heap_dis_tab), heap_ids_tab(heap_ids_tab), k(k)
+    {
+    }
+    /******************************************************
+     * API for 1 result at a time (each SingleResultHandler is
+     * called from 1 thread)
+     */
+    struct SingleResultHandler {
+        HeapResultHandler & hr;
+        size_t k;
+        T *heap_dis;
+        TI *heap_ids;
+        T thresh;
+        SingleResultHandler(HeapResultHandler &hr): hr(hr), k(hr.k) {}
+        /// begin results for query # i
+        void begin(size_t i) {
+            heap_dis = hr.heap_dis_tab + i * k;
+            heap_ids = hr.heap_ids_tab + i * k;
+            heap_heapify<C> (k, heap_dis, heap_ids);
+            thresh = heap_dis[0];
+        }
+        /// add one result for query i
+        void add_result(T dis, TI idx) {
+            if (C::cmp(heap_dis[0], dis)) {
+                heap_replace_top<C>(k, heap_dis, heap_ids, dis, idx);
+                thresh = heap_dis[0];
+            }
+        }
+        /// series of results for query i is done
+        void end() {
+            heap_reorder<C> (k, heap_dis, heap_ids);
+        }
+    };
+    /******************************************************
+     * API for multiple results (called from 1 thread)
+     */
+    size_t i0, i1;
+    /// begin
+    void begin_multiple(size_t i0, size_t i1) {
+        this->i0 = i0;
+        this->i1 = i1;
+        for(size_t i = i0; i < i1; i++) {
+            heap_heapify<C> (k, heap_dis_tab + i * k, heap_ids_tab + i * k);
+        }
+    }
+    /// add results for query i0..i1 and j0..j1
+    void add_results(size_t j0, size_t j1, const T *dis_tab) {
+        // maybe parallel for
+        for (size_t i = i0; i < i1; i++) {
+            T * heap_dis = heap_dis_tab + i * k;
+            TI * heap_ids = heap_ids_tab + i * k;
+            T thresh = heap_dis[0];
+            for (size_t j = j0; j < j1; j++) {
+                T dis = *dis_tab++;
+                if (C::cmp(thresh, dis)) {
+                    heap_replace_top<C>(k, heap_dis, heap_ids, dis, j);
+                    thresh = heap_dis[0];
+                }
+            }
+        }
+    }
+    /// series of results for queries i0..i1 is done
+    void end_multiple() {
+        // maybe parallel for
+        for(size_t i = i0; i < i1; i++) {
+            heap_reorder<C> (k, heap_dis_tab + i * k, heap_ids_tab + i * k);
+        }
+    }
+};
+/*****************************************************************
+ * Reservoir result handler
+ *
+ * A reservoir is a result array of size capacity > n (number of requested
+ * results) all results below a threshold are stored in an arbitrary order. When
+ * the capacity is reached, a new threshold is chosen by partitionning the
+ * distance array.
+ *****************************************************************/
+/// Reservoir for a single query
+template<class C>
+struct ReservoirTopN {
+    using T = typename C::T;
+    using TI = typename C::TI;
+    T *vals;
+    TI *ids;
+    size_t i; // number of stored elements
+    size_t n; // number of requested elements
+    size_t capacity;  // size of storage
+    T threshold; // current threshold
+    ReservoirTopN() {}
+    ReservoirTopN(
+        size_t n, size_t capacity,
+        T *vals, TI *ids
+    ):
+        vals(vals), ids(ids),
+        i(0), n(n), capacity(capacity) {
+        assert(n < capacity);
+        threshold = C::neutral();
+    }
+    void add(T val, TI id) {
+        if (C::cmp(threshold, val)) {
+            if (i == capacity) {
+                shrink_fuzzy();
+            }
+            vals[i] = val;
+            ids[i] = id;
+            i++;
+        }
+    }
+    // reduce storage from capacity to anything
+    // between n and (capacity + n) / 2
+    void shrink_fuzzy() {
+        assert(i == capacity);
+        threshold = partition_fuzzy<C>(
+            vals, ids, capacity, n, (capacity + n) / 2,
+            &i);
+    }
+    void to_result(T *heap_dis, TI *heap_ids) const {
+        for (int j = 0; j < std::min(i, n); j++) {
+            heap_push<C>(
+                j + 1, heap_dis, heap_ids,
+                vals[j], ids[j]
+            );
+        }
+        if (i < n) {
+            heap_reorder<C> (i, heap_dis, heap_ids);
+            // add empty results
+            heap_heapify<C> (n - i, heap_dis + i, heap_ids + i);
+        } else {
+            // add remaining elements
+            heap_addn<C> (n, heap_dis, heap_ids, vals + n, ids + n, i - n);
+            heap_reorder<C> (n, heap_dis, heap_ids);
+        }
+    }
+};
+template<class C>
+struct ReservoirResultHandler {
+    using T = typename C::T;
+    using TI = typename C::TI;
+    int nq;
+    T *heap_dis_tab;
+    TI *heap_ids_tab;
+    int64_t k;  // number of results to keep
+    size_t capacity; // capacity of the reservoirs
+    ReservoirResultHandler(
+        size_t nq,
+        T * heap_dis_tab, TI * heap_ids_tab,
+        size_t k):
+        nq(nq),
+        heap_dis_tab(heap_dis_tab), heap_ids_tab(heap_ids_tab), k(k)
+    {
+        // double then round up to multiple of 16 (for SIMD alignment)
+        capacity = (2 * k + 15) & ~15;
+    }
+    /******************************************************
+     * API for 1 result at a time (each SingleResultHandler is
+     * called from 1 thread)
+     */
+    struct SingleResultHandler {
+        ReservoirResultHandler & hr;
+        std::vector<T> reservoir_dis;
+        std::vector<TI> reservoir_ids;
+        ReservoirTopN<C> res1;
+        SingleResultHandler(ReservoirResultHandler &hr):
+            hr(hr), reservoir_dis(hr.capacity), reservoir_ids(hr.capacity)
+        {
+        }
+        size_t i;
+        /// begin results for query # i
+        void begin(size_t i) {
+            res1 = ReservoirTopN<C>(
+                hr.k, hr.capacity, reservoir_dis.data(), reservoir_ids.data());
+            this->i = i;
+        }
+        /// add one result for query i
+        void add_result(T dis, TI idx) {
+            res1.add(dis, idx);
+        }
+        /// series of results for query i is done
+        void end() {
+            T * heap_dis = hr.heap_dis_tab + i * hr.k;
+            TI * heap_ids = hr.heap_ids_tab + i * hr.k;
+            res1.to_result(heap_dis, heap_ids);
+        }
+    };
+    /******************************************************
+     * API for multiple results (called from 1 thread)
+     */
+    size_t i0, i1;
+    std::vector<T> reservoir_dis;
+    std::vector<TI> reservoir_ids;
+    std::vector<ReservoirTopN<C>> reservoirs;
+    /// begin
+    void begin_multiple(size_t i0, size_t i1) {
+        this->i0 = i0;
+        this->i1 = i1;
+        reservoir_dis.resize((i1 - i0) * capacity);
+        reservoir_ids.resize((i1 - i0) * capacity);
+        reservoirs.clear();
+        for (size_t i = i0; i < i1; i++) {
+            reservoirs.emplace_back(
+                k, capacity,
+                reservoir_dis.data() + (i - i0) * capacity,
+                reservoir_ids.data() + (i - i0) * capacity
+            );
+        }
+    }
+    /// add results for query i0..i1 and j0..j1
+    void add_results(size_t j0, size_t j1, const T *dis_tab) {
+        // maybe parallel for
+        for (size_t i = i0; i < i1; i++) {
+            ReservoirTopN<C> & reservoir = reservoirs[i - i0];
+            for (size_t j = j0; j < j1; j++) {
+                T dis = *dis_tab++;
+                reservoir.add(dis, j);
+            }
+        }
+    }
+    /// series of results for queries i0..i1 is done
+    void end_multiple() {
+        // maybe parallel for
+        for(size_t i = i0; i < i1; i++) {
+            reservoirs[i - i0].to_result(
+                heap_dis_tab + i * k, heap_ids_tab + i * k);
+        }
+    }
+};
+/*****************************************************************
+ * Result handler for range searches
+ *****************************************************************/
+template<class C>
+struct RangeSearchResultHandler {
+    using T = typename C::T;
+    using TI = typename C::TI;
+    RangeSearchResult *res;
+    float radius;
+    RangeSearchResultHandler(RangeSearchResult *res, float radius):
+        res(res), radius(radius)
+    {}
+    /******************************************************
+     * API for 1 result at a time (each SingleResultHandler is
+     * called from 1 thread)
+     ******************************************************/
+    struct SingleResultHandler {
+        // almost the same interface as RangeSearchResultHandler
+        RangeSearchPartialResult pres;
+        float radius;
+        RangeQueryResult *qr = nullptr;
+        SingleResultHandler(RangeSearchResultHandler &rh):
+            pres(rh.res), radius(rh.radius)
+        {}
+        /// begin results for query # i
+        void begin(size_t i) {
+            qr = &pres.new_result(i);
+        }
+        /// add one result for query i
+        void add_result(T dis, TI idx) {
+            if (C::cmp(radius, dis)) {
+                qr->add(dis, idx);
+            }
+        }
+        /// series of results for query i is done
+        void end() {
+        }
+        ~SingleResultHandler() {
+            pres.finalize();
+        }
+    };
+    /******************************************************
+     * API for multiple results (called from 1 thread)
+     ******************************************************/
+    size_t i0, i1;
+    std::vector <RangeSearchPartialResult *> partial_results;
+    std::vector <size_t> j0s;
+    int pr = 0;
+    /// begin
+    void begin_multiple(size_t i0, size_t i1) {
+        this->i0 = i0;
+        this->i1 = i1;
+    }
+    /// add results for query i0..i1 and j0..j1
+    void add_results(size_t j0, size_t j1, const T *dis_tab) {
+        RangeSearchPartialResult *pres;
+        // there is one RangeSearchPartialResult structure per j0
+        // (= block of columns of the large distance matrix)
+        // it is a bit tricky to find the poper PartialResult structure
+        // because the inner loop is on db not on queries.
+        if (pr < j0s.size() && j0 == j0s[pr]) {
+            pres = partial_results[pr];
+            pr++;
+        } else if (j0 == 0 && j0s.size() > 0) {
+            pr = 0;
+            pres = partial_results[pr];
+            pr++;
+        } else { // did not find this j0
+            pres = new RangeSearchPartialResult (res);
+            partial_results.push_back(pres);
+            j0s.push_back(j0);
+            pr = partial_results.size();
+        }
+        for (size_t i = i0; i < i1; i++) {
+            const float *ip_line = dis_tab + (i - i0) * (j1 - j0);
+            RangeQueryResult & qres = pres->new_result (i);
+            for (size_t j = j0; j < j1; j++) {
+                float dis = *ip_line++;
+                if (C::cmp(radius, dis)) {
+                    qres.add (dis, j);
+                }
+            }
+        }
+    }
+    void end_multiple() {
+    }
+    ~RangeSearchResultHandler() {
+        if (partial_results.size() > 0) {
+            RangeSearchPartialResult::merge (partial_results);
+        }
+    }
+};
+}  // namespace faiss