RubyGems - faiss - Versions diffs - 0.1.3 → 0.1.4 - Mend

faiss 0.1.3 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (184) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +4 -0
data/LICENSE.txt +1 -1
data/README.md +1 -1
data/ext/faiss/extconf.rb +1 -1
data/lib/faiss/version.rb +1 -1
data/vendor/faiss/faiss/AutoTune.cpp +36 -33
data/vendor/faiss/faiss/AutoTune.h +6 -3
data/vendor/faiss/faiss/Clustering.cpp +16 -12
data/vendor/faiss/faiss/Index.cpp +3 -4
data/vendor/faiss/faiss/Index.h +3 -3
data/vendor/faiss/faiss/IndexBinary.cpp +3 -4
data/vendor/faiss/faiss/IndexBinary.h +1 -1
data/vendor/faiss/faiss/IndexBinaryHash.cpp +2 -12
data/vendor/faiss/faiss/IndexBinaryIVF.cpp +1 -2
data/vendor/faiss/faiss/IndexFlat.cpp +0 -148
data/vendor/faiss/faiss/IndexFlat.h +0 -51
data/vendor/faiss/faiss/IndexHNSW.cpp +4 -5
data/vendor/faiss/faiss/IndexIVF.cpp +118 -31
data/vendor/faiss/faiss/IndexIVF.h +22 -15
data/vendor/faiss/faiss/IndexIVFFlat.cpp +3 -3
data/vendor/faiss/faiss/IndexIVFFlat.h +2 -1
data/vendor/faiss/faiss/IndexIVFPQ.cpp +39 -15
data/vendor/faiss/faiss/IndexIVFPQ.h +25 -9
data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +1116 -0
data/vendor/faiss/faiss/IndexIVFPQFastScan.h +166 -0
data/vendor/faiss/faiss/IndexIVFPQR.cpp +8 -9
data/vendor/faiss/faiss/IndexIVFPQR.h +2 -1
data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +1 -2
data/vendor/faiss/faiss/IndexPQ.cpp +34 -18
data/vendor/faiss/faiss/IndexPQFastScan.cpp +536 -0
data/vendor/faiss/faiss/IndexPQFastScan.h +111 -0
data/vendor/faiss/faiss/IndexPreTransform.cpp +47 -0
data/vendor/faiss/faiss/IndexPreTransform.h +2 -0
data/vendor/faiss/faiss/IndexRefine.cpp +256 -0
data/vendor/faiss/faiss/IndexRefine.h +73 -0
data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +2 -2
data/vendor/faiss/faiss/IndexScalarQuantizer.h +1 -1
data/vendor/faiss/faiss/gpu/GpuDistance.h +1 -1
data/vendor/faiss/faiss/gpu/GpuIndex.h +16 -9
data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +8 -1
data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +11 -11
data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +19 -2
data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +28 -2
data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +24 -14
data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +29 -2
data/vendor/faiss/faiss/gpu/GpuResources.h +4 -0
data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +60 -27
data/vendor/faiss/faiss/gpu/StandardGpuResources.h +28 -6
data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +547 -0
data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +51 -0
data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +3 -3
data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +3 -2
data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +274 -0
data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +7 -2
data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +5 -1
data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +231 -0
data/vendor/faiss/faiss/gpu/test/TestUtils.h +33 -0
data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +1 -0
data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +6 -0
data/vendor/faiss/faiss/gpu/utils/Timer.cpp +5 -6
data/vendor/faiss/faiss/gpu/utils/Timer.h +2 -2
data/vendor/faiss/faiss/impl/AuxIndexStructures.h +5 -4
data/vendor/faiss/faiss/impl/HNSW.cpp +2 -4
data/vendor/faiss/faiss/impl/PolysemousTraining.h +4 -4
data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +22 -12
data/vendor/faiss/faiss/impl/ProductQuantizer.h +2 -0
data/vendor/faiss/faiss/impl/ResultHandler.h +452 -0
data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +29 -19
data/vendor/faiss/faiss/impl/ScalarQuantizer.h +6 -0
data/vendor/faiss/faiss/impl/index_read.cpp +64 -96
data/vendor/faiss/faiss/impl/index_write.cpp +34 -25
data/vendor/faiss/faiss/impl/io.cpp +33 -2
data/vendor/faiss/faiss/impl/io.h +7 -2
data/vendor/faiss/faiss/impl/lattice_Zn.cpp +1 -15
data/vendor/faiss/faiss/impl/platform_macros.h +44 -0
data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +272 -0
data/vendor/faiss/faiss/impl/pq4_fast_scan.h +169 -0
data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +180 -0
data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +354 -0
data/vendor/faiss/faiss/impl/simd_result_handlers.h +559 -0
data/vendor/faiss/faiss/index_factory.cpp +112 -7
data/vendor/faiss/faiss/index_io.h +1 -48
data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +151 -0
data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +76 -0
data/vendor/faiss/faiss/{DirectMap.cpp → invlists/DirectMap.cpp} +1 -1
data/vendor/faiss/faiss/{DirectMap.h → invlists/DirectMap.h} +1 -1
data/vendor/faiss/faiss/{InvertedLists.cpp → invlists/InvertedLists.cpp} +72 -1
data/vendor/faiss/faiss/{InvertedLists.h → invlists/InvertedLists.h} +32 -1
data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +107 -0
data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +63 -0
data/vendor/faiss/faiss/{OnDiskInvertedLists.cpp → invlists/OnDiskInvertedLists.cpp} +21 -6
data/vendor/faiss/faiss/{OnDiskInvertedLists.h → invlists/OnDiskInvertedLists.h} +5 -2
data/vendor/faiss/faiss/python/python_callbacks.h +8 -1
data/vendor/faiss/faiss/utils/AlignedTable.h +141 -0
data/vendor/faiss/faiss/utils/Heap.cpp +2 -4
data/vendor/faiss/faiss/utils/Heap.h +61 -50
data/vendor/faiss/faiss/utils/distances.cpp +164 -319
data/vendor/faiss/faiss/utils/distances.h +28 -20
data/vendor/faiss/faiss/utils/distances_simd.cpp +277 -49
data/vendor/faiss/faiss/utils/extra_distances.cpp +1 -2
data/vendor/faiss/faiss/utils/hamming-inl.h +4 -4
data/vendor/faiss/faiss/utils/hamming.cpp +3 -6
data/vendor/faiss/faiss/utils/hamming.h +2 -7
data/vendor/faiss/faiss/utils/ordered_key_value.h +98 -0
data/vendor/faiss/faiss/utils/partitioning.cpp +1256 -0
data/vendor/faiss/faiss/utils/partitioning.h +69 -0
data/vendor/faiss/faiss/utils/quantize_lut.cpp +277 -0
data/vendor/faiss/faiss/utils/quantize_lut.h +80 -0
data/vendor/faiss/faiss/utils/simdlib.h +31 -0
data/vendor/faiss/faiss/utils/simdlib_avx2.h +461 -0
data/vendor/faiss/faiss/utils/simdlib_emulated.h +589 -0
metadata +43 -141
data/vendor/faiss/benchs/bench_6bit_codec.cpp +0 -80
data/vendor/faiss/c_api/AutoTune_c.cpp +0 -83
data/vendor/faiss/c_api/AutoTune_c.h +0 -66
data/vendor/faiss/c_api/Clustering_c.cpp +0 -145
data/vendor/faiss/c_api/Clustering_c.h +0 -123
data/vendor/faiss/c_api/IndexFlat_c.cpp +0 -140
data/vendor/faiss/c_api/IndexFlat_c.h +0 -115
data/vendor/faiss/c_api/IndexIVFFlat_c.cpp +0 -64
data/vendor/faiss/c_api/IndexIVFFlat_c.h +0 -58
data/vendor/faiss/c_api/IndexIVF_c.cpp +0 -99
data/vendor/faiss/c_api/IndexIVF_c.h +0 -142
data/vendor/faiss/c_api/IndexLSH_c.cpp +0 -37
data/vendor/faiss/c_api/IndexLSH_c.h +0 -40
data/vendor/faiss/c_api/IndexPreTransform_c.cpp +0 -21
data/vendor/faiss/c_api/IndexPreTransform_c.h +0 -32
data/vendor/faiss/c_api/IndexShards_c.cpp +0 -38
data/vendor/faiss/c_api/IndexShards_c.h +0 -39
data/vendor/faiss/c_api/Index_c.cpp +0 -105
data/vendor/faiss/c_api/Index_c.h +0 -183
data/vendor/faiss/c_api/MetaIndexes_c.cpp +0 -49
data/vendor/faiss/c_api/MetaIndexes_c.h +0 -49
data/vendor/faiss/c_api/clone_index_c.cpp +0 -23
data/vendor/faiss/c_api/clone_index_c.h +0 -32
data/vendor/faiss/c_api/error_c.h +0 -42
data/vendor/faiss/c_api/error_impl.cpp +0 -27
data/vendor/faiss/c_api/error_impl.h +0 -16
data/vendor/faiss/c_api/faiss_c.h +0 -58
data/vendor/faiss/c_api/gpu/GpuAutoTune_c.cpp +0 -98
data/vendor/faiss/c_api/gpu/GpuAutoTune_c.h +0 -56
data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.cpp +0 -52
data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.h +0 -68
data/vendor/faiss/c_api/gpu/GpuIndex_c.cpp +0 -17
data/vendor/faiss/c_api/gpu/GpuIndex_c.h +0 -30
data/vendor/faiss/c_api/gpu/GpuIndicesOptions_c.h +0 -38
data/vendor/faiss/c_api/gpu/GpuResources_c.cpp +0 -86
data/vendor/faiss/c_api/gpu/GpuResources_c.h +0 -66
data/vendor/faiss/c_api/gpu/StandardGpuResources_c.cpp +0 -54
data/vendor/faiss/c_api/gpu/StandardGpuResources_c.h +0 -53
data/vendor/faiss/c_api/gpu/macros_impl.h +0 -42
data/vendor/faiss/c_api/impl/AuxIndexStructures_c.cpp +0 -220
data/vendor/faiss/c_api/impl/AuxIndexStructures_c.h +0 -149
data/vendor/faiss/c_api/index_factory_c.cpp +0 -26
data/vendor/faiss/c_api/index_factory_c.h +0 -30
data/vendor/faiss/c_api/index_io_c.cpp +0 -42
data/vendor/faiss/c_api/index_io_c.h +0 -50
data/vendor/faiss/c_api/macros_impl.h +0 -110
data/vendor/faiss/demos/demo_imi_flat.cpp +0 -154
data/vendor/faiss/demos/demo_imi_pq.cpp +0 -203
data/vendor/faiss/demos/demo_ivfpq_indexing.cpp +0 -151
data/vendor/faiss/demos/demo_sift1M.cpp +0 -252
data/vendor/faiss/demos/demo_weighted_kmeans.cpp +0 -185
data/vendor/faiss/misc/test_blas.cpp +0 -87
data/vendor/faiss/tests/test_binary_flat.cpp +0 -62
data/vendor/faiss/tests/test_dealloc_invlists.cpp +0 -188
data/vendor/faiss/tests/test_ivfpq_codec.cpp +0 -70
data/vendor/faiss/tests/test_ivfpq_indexing.cpp +0 -100
data/vendor/faiss/tests/test_lowlevel_ivf.cpp +0 -573
data/vendor/faiss/tests/test_merge.cpp +0 -260
data/vendor/faiss/tests/test_omp_threads.cpp +0 -14
data/vendor/faiss/tests/test_ondisk_ivf.cpp +0 -225
data/vendor/faiss/tests/test_pairs_decoding.cpp +0 -193
data/vendor/faiss/tests/test_params_override.cpp +0 -236
data/vendor/faiss/tests/test_pq_encoding.cpp +0 -98
data/vendor/faiss/tests/test_sliding_ivf.cpp +0 -246
data/vendor/faiss/tests/test_threaded_index.cpp +0 -253
data/vendor/faiss/tests/test_transfer_invlists.cpp +0 -159
data/vendor/faiss/tutorial/cpp/1-Flat.cpp +0 -104
data/vendor/faiss/tutorial/cpp/2-IVFFlat.cpp +0 -85
data/vendor/faiss/tutorial/cpp/3-IVFPQ.cpp +0 -98
data/vendor/faiss/tutorial/cpp/4-GPU.cpp +0 -122
data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +0 -104

data/vendor/faiss/faiss/utils/AlignedTable.h ADDED Viewed

@@ -0,0 +1,141 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+#pragma once
+#include <cstdint>
+#include <cstdlib>
+#include <cassert>
+#include <cstring>
+#include <algorithm>
+#include <faiss/impl/platform_macros.h>
+namespace faiss {
+template<int A=32>
+inline bool is_aligned_pointer(const void* x)
+{
+    size_t xi = (size_t)x;
+    return xi % A == 0;
+}
+// class that manages suitably aligned arrays for SIMD
+// T should be a POV type. The default alignment is 32 for AVX
+template<class T, int A=32>
+struct AlignedTableTightAlloc {
+    T * ptr;
+    size_t numel;
+    AlignedTableTightAlloc(): ptr(nullptr), numel(0)
+    { }
+    explicit AlignedTableTightAlloc(size_t n): ptr(nullptr), numel(0)
+    { resize(n); }
+    size_t itemsize() const {return sizeof(T); }
+    void resize(size_t n) {
+        if (numel == n) {
+            return;
+        }
+        T * new_ptr;
+        if (n > 0) {
+            int ret = posix_memalign((void**)&new_ptr, A, n * sizeof(T));
+            if (ret != 0) {
+                throw std::bad_alloc();
+            }
+            if (numel > 0) {
+                memcpy(new_ptr, ptr, sizeof(T) * std::min(numel, n));
+            }
+        } else {
+            new_ptr = nullptr;
+        }
+        numel = n;
+        posix_memalign_free(ptr);
+        ptr = new_ptr;
+    }
+    void clear() {memset(ptr, 0, nbytes()); }
+    size_t size() const {return numel; }
+    size_t nbytes() const {return numel * sizeof(T); }
+    T * get() {return ptr; }
+    const T * get() const {return ptr; }
+    T * data() {return ptr; }
+    const T * data() const {return ptr; }
+    T & operator [] (size_t i)  {return ptr[i]; }
+    T operator [] (size_t i) const {return ptr[i]; }
+    ~AlignedTableTightAlloc() {posix_memalign_free(ptr); }
+    AlignedTableTightAlloc<T, A> & operator =
+            (const AlignedTableTightAlloc<T, A> & other) {
+        resize(other.numel);
+        memcpy(ptr, other.ptr, sizeof(T) * numel);
+        return *this;
+    }
+    AlignedTableTightAlloc(const AlignedTableTightAlloc<T, A> & other) {
+        *this = other;
+    }
+};
+// same as AlignedTableTightAlloc, but with geometric re-allocation
+template<class T, int A=32>
+struct AlignedTable {
+    AlignedTableTightAlloc<T, A> tab;
+    size_t numel = 0;
+    static size_t round_capacity(size_t n) {
+        if (n == 0) {
+            return 0;
+        }
+        if (n < 8 * A) {
+            return 8 * A;
+        }
+        size_t capacity = 8 * A;
+        while (capacity < n) {
+            capacity *= 2;
+        }
+        return capacity;
+    }
+    AlignedTable() {}
+    explicit AlignedTable(size_t n):
+        tab(round_capacity(n)),
+        numel(n)
+    { }
+    size_t itemsize() const {return sizeof(T); }
+    void resize(size_t n) {
+        tab.resize(round_capacity(n));
+        numel = n;
+    }
+    void clear() { tab.clear(); }
+    size_t size() const {return numel; }
+    size_t nbytes() const {return numel * sizeof(T); }
+    T * get() {return tab.get(); }
+    const T * get() const {return tab.get(); }
+    T * data() {return tab.get(); }
+    const T * data() const {return tab.get(); }
+    T & operator [] (size_t i)  {return tab.ptr[i]; }
+    T operator [] (size_t i) const {return tab.ptr[i]; }
+    // assign and copy constructor should work as expected
+};
+} // namespace faiss

data/vendor/faiss/faiss/utils/Heap.cpp CHANGED Viewed

@@ -46,8 +46,7 @@ void HeapArray<C>::addn (size_t nj, const T *vin, TI j0,
         for (size_t j = 0; j < nj; j++) {
             T ip = ip_line [j];
             if (C::cmp(simi[0], ip)) {
-                heap_pop<C> (k, simi, idxi);
-                heap_push<C> (k, simi, idxi, ip, j + j0);
+                heap_replace_top<C> (k, simi, idxi, ip, j + j0);
             }
         }
     }
@@ -74,8 +73,7 @@ void HeapArray<C>::addn_with_ids (
         for (size_t j = 0; j < nj; j++) {
             T ip = ip_line [j];
             if (C::cmp(simi[0], ip)) {
-                heap_pop<C> (k, simi, idxi);
-                heap_push<C> (k, simi, idxi, ip, id_line [j]);
+                heap_replace_top<C> (k, simi, idxi, ip, id_line [j]);
             }
         }
     }

data/vendor/faiss/faiss/utils/Heap.h CHANGED Viewed

@@ -5,16 +5,18 @@
  * LICENSE file in the root directory of this source tree.
  */
-// -*- c++ -*-
 /*
- * C++ support for heaps. The set of functions is tailored for
- * efficient similarity search.
+ * C++ support for heaps. The set of functions is tailored for efficient
+ * similarity search.
  *
- * There is no specific object for a heap, and the functions that
- * operate on a signle heap are inlined, because heaps are often
- * small. More complex functions are implemented in Heaps.cpp
+ * There is no specific object for a heap, and the functions that operate on a
+ * single heap are inlined, because heaps are often small. More complex
+ * functions are implemented in Heaps.cpp
  *
+ * All heap functions rely on a C template class that define the type of the
+ * keys and values and their ordering (increasing with CMax and decreasing with
+ * Cmin). The C types are defined in ordered_key_value.h
  */
@@ -31,51 +33,12 @@
 #include <limits>
+#include <faiss/utils/ordered_key_value.h>
 namespace faiss {
-/*******************************************************************
- * C object: uniform handling of min and max heap
- *******************************************************************/
-/** The C object gives the type T of the values in the heap, the type
- *  of the keys, TI and the comparison that is done: > for the minheap
- *  and < for the maxheap. The neutral value will always be dropped in
- *  favor of any other value in the heap.
- */
-template <typename T_, typename TI_>
-struct CMax;
-// traits of minheaps = heaps where the minimum value is stored on top
-// useful to find the *max* values of an array
-template <typename T_, typename TI_>
-struct CMin {
-    typedef T_ T;
-    typedef TI_ TI;
-    typedef CMax<T_, TI_> Crev;
-    inline static bool cmp (T a, T b) {
-        return a < b;
-    }
-    inline static T neutral () {
-        return std::numeric_limits<T>::lowest();
-    }
-};
-template <typename T_, typename TI_>
-struct CMax {
-    typedef T_ T;
-    typedef TI_ TI;
-    typedef CMin<T_, TI_> Crev;
-    inline static bool cmp (T a, T b) {
-        return a > b;
-    }
-    inline static T neutral () {
-        return std::numeric_limits<T>::max();
-    }
-};
 /*******************************************************************
  * Basic heap ops: push and pop
@@ -142,6 +105,43 @@ void heap_push (size_t k,
+/** Replace the top element from the heap defined by bh_val[0..k-1] and
+ * bh_ids[0..k-1].
+ */
+template <class C> inline
+void heap_replace_top (size_t k,
+                       typename C::T * bh_val, typename C::TI * bh_ids,
+                       typename C::T val, typename C::TI ids)
+{
+    bh_val--; /* Use 1-based indexing for easier node->child translation */
+    bh_ids--;
+    size_t i = 1, i1, i2;
+    while (1) {
+        i1 = i << 1;
+        i2 = i1 + 1;
+        if (i1 > k)
+            break;
+        if (i2 == k + 1 || C::cmp(bh_val[i1], bh_val[i2])) {
+            if (C::cmp(val, bh_val[i1]))
+                break;
+            bh_val[i] = bh_val[i1];
+            bh_ids[i] = bh_ids[i1];
+            i = i1;
+        }
+        else {
+            if (C::cmp(val, bh_val[i2]))
+                break;
+            bh_val[i] = bh_val[i2];
+            bh_ids[i] = bh_ids[i2];
+            i = i2;
+        }
+    }
+    bh_val[i] = val;
+    bh_ids[i] = ids;
+}
 /* Partial instanciation for heaps with TI = int64_t */
 template <typename T> inline
@@ -158,6 +158,13 @@ void minheap_push (size_t k, T * bh_val, int64_t * bh_ids, T val, int64_t ids)
 }
+template <typename T> inline
+void minheap_replace_top (size_t k, T * bh_val, int64_t * bh_ids, T val, int64_t ids)
+{
+    heap_replace_top<CMin<T, int64_t> > (k, bh_val, bh_ids, val, ids);
+}
 template <typename T> inline
 void maxheap_pop (size_t k, T * bh_val, int64_t * bh_ids)
 {
@@ -172,6 +179,12 @@ void maxheap_push (size_t k, T * bh_val, int64_t * bh_ids, T val, int64_t ids)
 }
+template <typename T> inline
+void maxheap_replace_top (size_t k, T * bh_val, int64_t * bh_ids, T val, int64_t ids)
+{
+    heap_replace_top<CMax<T, int64_t> > (k, bh_val, bh_ids, val, ids);
+}
 /*******************************************************************
  * Heap initialization
@@ -249,15 +262,13 @@ void heap_addn (size_t k,
     if (ids)
         for (i = 0; i < n; i++) {
             if (C::cmp (bh_val[0], x[i])) {
-                heap_pop<C> (k, bh_val, bh_ids);
-                heap_push<C> (k, bh_val, bh_ids, x[i], ids[i]);
+                heap_replace_top<C> (k, bh_val, bh_ids, x[i], ids[i]);
             }
         }
     else
         for (i = 0; i < n; i++) {
             if (C::cmp (bh_val[0], x[i])) {
-                heap_pop<C> (k, bh_val, bh_ids);
-                heap_push<C> (k, bh_val, bh_ids, x[i], i);
+                heap_replace_top<C> (k, bh_val, bh_ids, x[i], i);
             }
         }
 }

data/vendor/faiss/faiss/utils/distances.cpp CHANGED Viewed

@@ -19,6 +19,7 @@
 #include <faiss/impl/AuxIndexStructures.h>
 #include <faiss/impl/FaissAssert.h>
+#include <faiss/impl/ResultHandler.h>
@@ -36,14 +37,6 @@ int sgemm_ (const char *transa, const char *transb, FINTEGER *m, FINTEGER *
             FINTEGER *lda, const float *b, FINTEGER *
             ldb, float *beta, float *c, FINTEGER *ldc);
-/* Lapack functions, see http://www.netlib.org/clapack/old/single/sgeqrf.c */
-int sgeqrf_ (FINTEGER *m, FINTEGER *n, float *a, FINTEGER *lda,
-                 float *tau, float *work, FINTEGER *lwork, FINTEGER *info);
-int sgemv_(const char *trans, FINTEGER *m, FINTEGER *n, float *alpha,
-           const float *a, FINTEGER *lda, const float *x, FINTEGER *incx,
-           float *beta, float *y, FINTEGER *incy);
 }
@@ -58,34 +51,6 @@ namespace faiss {
-/* Compute the inner product between a vector x and
-   a set of ny vectors y.
-   These functions are not intended to replace BLAS matrix-matrix, as they
-   would be significantly less efficient in this case. */
-void fvec_inner_products_ny (float * ip,
-                             const float * x,
-                             const float * y,
-                             size_t d, size_t ny)
-{
-    // Not sure which one is fastest
-#if 0
-    {
-        FINTEGER di = d;
-        FINTEGER nyi = ny;
-        float one = 1.0, zero = 0.0;
-        FINTEGER onei = 1;
-        sgemv_ ("T", &di, &nyi, &one, y, &di, x, &onei, &zero, ip, &onei);
-    }
-#endif
-    for (size_t i = 0; i < ny; i++) {
-        ip[i] = fvec_inner_product (x, y, d);
-        y += d;
-    }
-}
 /* Compute the L2 norm of a set of nx vectors */
 void fvec_norms_L2 (float * __restrict nr,
@@ -142,109 +107,112 @@ void fvec_renorm_L2 (size_t d, size_t nx, float * __restrict x)
  * KNN functions
  ***************************************************************************/
+namespace {
 /* Find the nearest neighbors for nx queries in a set of ny vectors */
-static void knn_inner_product_sse (const float * x,
-                        const float * y,
-                        size_t d, size_t nx, size_t ny,
-                        float_minheap_array_t * res)
+template<class ResultHandler>
+void exhaustive_inner_product_seq (
+        const float * x,
+        const float * y,
+        size_t d, size_t nx, size_t ny,
+        ResultHandler &res)
 {
-    size_t k = res->k;
     size_t check_period = InterruptCallback::get_period_hint (ny * d);
     check_period *= omp_get_max_threads();
+    using SingleResultHandler = typename ResultHandler::SingleResultHandler;
     for (size_t i0 = 0; i0 < nx; i0 += check_period) {
         size_t i1 = std::min(i0 + check_period, nx);
-#pragma omp parallel for
-        for (int64_t i = i0; i < i1; i++) {
-            const float * x_i = x + i * d;
-            const float * y_j = y;
-            float * __restrict simi = res->get_val(i);
-            int64_t * __restrict idxi = res->get_ids (i);
-            minheap_heapify (k, simi, idxi);
+#pragma omp parallel
+        {
+            SingleResultHandler resi(res);
+#pragma omp for
+            for (int64_t i = i0; i < i1; i++) {
+                const float * x_i = x + i * d;
+                const float * y_j = y;
-            for (size_t j = 0; j < ny; j++) {
-                float ip = fvec_inner_product (x_i, y_j, d);
+                resi.begin(i);
-                if (ip > simi[0]) {
-                    minheap_pop (k, simi, idxi);
-                    minheap_push (k, simi, idxi, ip, j);
+                for (size_t j = 0; j < ny; j++) {
+                    float ip = fvec_inner_product (x_i, y_j, d);
+                    resi.add_result(ip, j);
+                    y_j += d;
                 }
-                y_j += d;
+                resi.end();
             }
-            minheap_reorder (k, simi, idxi);
         }
         InterruptCallback::check ();
     }
 }
-static void knn_L2sqr_sse (
+template<class ResultHandler>
+void exhaustive_L2sqr_seq (
                 const float * x,
                 const float * y,
                 size_t d, size_t nx, size_t ny,
-                float_maxheap_array_t * res)
+                ResultHandler & res)
 {
-    size_t k = res->k;
     size_t check_period = InterruptCallback::get_period_hint (ny * d);
     check_period *= omp_get_max_threads();
+    using SingleResultHandler = typename ResultHandler::SingleResultHandler;
     for (size_t i0 = 0; i0 < nx; i0 += check_period) {
         size_t i1 = std::min(i0 + check_period, nx);
-#pragma omp parallel for
-        for (int64_t i = i0; i < i1; i++) {
-            const float * x_i = x + i * d;
-            const float * y_j = y;
-            size_t j;
-            float * simi = res->get_val(i);
-            int64_t * idxi = res->get_ids (i);
-            maxheap_heapify (k, simi, idxi);
-            for (j = 0; j < ny; j++) {
-                float disij = fvec_L2sqr (x_i, y_j, d);
-                if (disij < simi[0]) {
-                    maxheap_pop (k, simi, idxi);
-                    maxheap_push (k, simi, idxi, disij, j);
+#pragma omp parallel
+        {
+            SingleResultHandler resi(res);
+#pragma omp for
+            for (int64_t i = i0; i < i1; i++) {
+                const float * x_i = x + i * d;
+                const float * y_j = y;
+                resi.begin(i);
+                for (size_t j = 0; j < ny; j++) {
+                    float disij = fvec_L2sqr (x_i, y_j, d);
+                    resi.add_result(disij, j);
+                    y_j += d;
                 }
-                y_j += d;
+                resi.end();
             }
-            maxheap_reorder (k, simi, idxi);
         }
         InterruptCallback::check ();
     }
-}
+};
 /** Find the nearest neighbors for nx queries in a set of ny vectors */
-static void knn_inner_product_blas (
+template<class ResultHandler>
+void exhaustive_inner_product_blas (
         const float * x,
         const float * y,
         size_t d, size_t nx, size_t ny,
-        float_minheap_array_t * res)
+        ResultHandler & res)
 {
-    res->heapify ();
     // BLAS does not like empty matrices
     if (nx == 0 || ny == 0) return;
     /* block sizes */
-    const size_t bs_x = 4096, bs_y = 1024;
-    // const size_t bs_x = 16, bs_y = 16;
+    const size_t bs_x = distance_compute_blas_query_bs;
+    const size_t bs_y = distance_compute_blas_database_bs;
     std::unique_ptr<float[]> ip_block(new float[bs_x * bs_y]);
     for (size_t i0 = 0; i0 < nx; i0 += bs_x) {
         size_t i1 = i0 + bs_x;
         if(i1 > nx) i1 = nx;
+        res.begin_multiple(i0, i1);
         for (size_t j0 = 0; j0 < ny; j0 += bs_y) {
             size_t j1 = j0 + bs_y;
             if (j1 > ny) j1 = ny;
@@ -258,46 +226,54 @@ static void knn_inner_product_blas (
                         ip_block.get(), &nyi);
             }
-            /* collect maxima */
-            res->addn (j1 - j0, ip_block.get(), j0, i0, i1 - i0);
+            res.add_results(j0, j1, ip_block.get());
         }
+        res.end_multiple();
         InterruptCallback::check ();
     }
-    res->reorder ();
 }
 // distance correction is an operator that can be applied to transform
 // the distances
-template<class DistanceCorrection>
-static void knn_L2sqr_blas (const float * x,
+template<class ResultHandler>
+void exhaustive_L2sqr_blas (
+        const float * x,
         const float * y,
         size_t d, size_t nx, size_t ny,
-        float_maxheap_array_t * res,
-        const DistanceCorrection &corr)
+        ResultHandler & res,
+        const float *y_norms = nullptr)
 {
-    res->heapify ();
     // BLAS does not like empty matrices
     if (nx == 0 || ny == 0) return;
-    size_t k = res->k;
     /* block sizes */
-    const size_t bs_x = 4096, bs_y = 1024;
+    const size_t bs_x = distance_compute_blas_query_bs;
+    const size_t bs_y = distance_compute_blas_database_bs;
     // const size_t bs_x = 16, bs_y = 16;
-    float *ip_block = new float[bs_x * bs_y];
-    float *x_norms = new float[nx];
-    float *y_norms = new float[ny];
-    ScopeDeleter<float> del1(ip_block), del3(x_norms), del2(y_norms);
+    std::unique_ptr<float []> ip_block(new float[bs_x * bs_y]);
+    std::unique_ptr<float []> x_norms(new float[nx]);
+    std::unique_ptr<float []> del2;
-    fvec_norms_L2sqr (x_norms, x, d, nx);
-    fvec_norms_L2sqr (y_norms, y, d, ny);
+    fvec_norms_L2sqr (x_norms.get(), x, d, nx);
+    if (!y_norms) {
+        float *y_norms2 = new float[ny];
+        del2.reset(y_norms2);
+        fvec_norms_L2sqr (y_norms2, y, d, ny);
+        y_norms = y_norms2;
+    }
     for (size_t i0 = 0; i0 < nx; i0 += bs_x) {
         size_t i1 = i0 + bs_x;
         if(i1 > nx) i1 = nx;
+        res.begin_multiple(i0, i1);
         for (size_t j0 = 0; j0 < ny; j0 += bs_y) {
             size_t j1 = j0 + bs_y;
             if (j1 > ny) j1 = ny;
@@ -308,42 +284,34 @@ static void knn_L2sqr_blas (const float * x,
                 sgemm_ ("Transpose", "Not transpose", &nyi, &nxi, &di, &one,
                         y + j0 * d, &di,
                         x + i0 * d, &di, &zero,
-                        ip_block, &nyi);
+                        ip_block.get(), &nyi);
             }
-            /* collect minima */
-#pragma omp parallel for
             for (int64_t i = i0; i < i1; i++) {
-                float * __restrict simi = res->get_val(i);
-                int64_t * __restrict idxi = res->get_ids (i);
-                const float *ip_line = ip_block + (i - i0) * (j1 - j0);
+                float *ip_line = ip_block.get() + (i - i0) * (j1 - j0);
                 for (size_t j = j0; j < j1; j++) {
-                    float ip = *ip_line++;
+                    float ip = *ip_line;
                     float dis = x_norms[i] + y_norms[j] - 2 * ip;
                     // negative values can occur for identical vectors
                     // due to roundoff errors
                     if (dis < 0) dis = 0;
-                    dis = corr (dis, i, j);
-                    if (dis < simi[0]) {
-                        maxheap_pop (k, simi, idxi);
-                        maxheap_push (k, simi, idxi, dis, j);
-                    }
+                    *ip_line = dis;
+                    ip_line++;
                 }
             }
+            res.add_results(j0, j1, ip_block.get());
         }
+        res.end_multiple();
         InterruptCallback::check ();
     }
-    res->reorder ();
 }
+} // anonymous namespace
@@ -354,58 +322,103 @@ static void knn_L2sqr_blas (const float * x,
  *******************************************************/
 int distance_compute_blas_threshold = 20;
+int distance_compute_blas_query_bs = 4096;
+int distance_compute_blas_database_bs = 1024;
+int distance_compute_min_k_reservoir = 100;
 void knn_inner_product (const float * x,
         const float * y,
         size_t d, size_t nx, size_t ny,
-        float_minheap_array_t * res)
+        float_minheap_array_t * ha)
 {
-    if (nx < distance_compute_blas_threshold) {
-        knn_inner_product_sse (x, y, d, nx, ny, res);
+    if (ha->k < distance_compute_min_k_reservoir) {
+        HeapResultHandler<CMin<float, int64_t>> res(
+            ha->nh, ha->val, ha->ids, ha->k);
+        if (nx < distance_compute_blas_threshold) {
+            exhaustive_inner_product_seq (x, y, d, nx, ny, res);
+        } else {
+            exhaustive_inner_product_blas (x, y, d, nx, ny, res);
+        }
     } else {
-        knn_inner_product_blas (x, y, d, nx, ny, res);
+        ReservoirResultHandler<CMin<float, int64_t>> res(
+            ha->nh, ha->val, ha->ids, ha->k);
+        if (nx < distance_compute_blas_threshold) {
+            exhaustive_inner_product_seq (x, y, d, nx, ny, res);
+        } else {
+            exhaustive_inner_product_blas (x, y, d, nx, ny, res);
+        }
     }
 }
-struct NopDistanceCorrection {
-  float operator()(float dis, size_t /*qno*/, size_t /*bno*/) const {
-    return dis;
+void knn_L2sqr (
+        const float * x,
+        const float * y,
+        size_t d, size_t nx, size_t ny,
+        float_maxheap_array_t * ha,
+        const float *y_norm2
+) {
+    if (ha->k < distance_compute_min_k_reservoir) {
+        HeapResultHandler<CMax<float, int64_t>> res(
+            ha->nh, ha->val, ha->ids, ha->k);
+        if (nx < distance_compute_blas_threshold) {
+            exhaustive_L2sqr_seq (x, y, d, nx, ny, res);
+        } else {
+            exhaustive_L2sqr_blas (x, y, d, nx, ny, res, y_norm2);
+        }
+    } else {
+        ReservoirResultHandler<CMax<float, int64_t>> res(
+            ha->nh, ha->val, ha->ids, ha->k);
+        if (nx < distance_compute_blas_threshold) {
+            exhaustive_L2sqr_seq (x, y, d, nx, ny, res);
+        } else {
+            exhaustive_L2sqr_blas (x, y, d, nx, ny, res, y_norm2);
+        }
     }
-};
+}
-void knn_L2sqr (const float * x,
-                const float * y,
-                size_t d, size_t nx, size_t ny,
-                float_maxheap_array_t * res)
+/***************************************************************************
+ * Range search
+ ***************************************************************************/
+void range_search_L2sqr (
+        const float * x,
+        const float * y,
+        size_t d, size_t nx, size_t ny,
+        float radius,
+        RangeSearchResult *res)
 {
+    RangeSearchResultHandler<CMax<float, int64_t>> resh(res, radius);
     if (nx < distance_compute_blas_threshold) {
-        knn_L2sqr_sse (x, y, d, nx, ny, res);
+        exhaustive_L2sqr_seq (x, y, d, nx, ny, resh);
     } else {
-        NopDistanceCorrection nop;
-        knn_L2sqr_blas (x, y, d, nx, ny, res, nop);
+        exhaustive_L2sqr_blas (x, y, d, nx, ny, resh);
     }
 }
-struct BaseShiftDistanceCorrection {
-    const float *base_shift;
-    float operator()(float dis, size_t /*qno*/, size_t bno) const {
-      return dis - base_shift[bno];
-    }
-};
-void knn_L2sqr_base_shift (
-         const float * x,
-         const float * y,
-         size_t d, size_t nx, size_t ny,
-         float_maxheap_array_t * res,
-         const float *base_shift)
+void range_search_inner_product (
+        const float * x,
+        const float * y,
+        size_t d, size_t nx, size_t ny,
+        float radius,
+        RangeSearchResult *res)
 {
-    BaseShiftDistanceCorrection corr = {base_shift};
-    knn_L2sqr_blas (x, y, d, nx, ny, res, corr);
-}
+    RangeSearchResultHandler<CMin<float, int64_t>> resh(res, radius);
+    if (nx < distance_compute_blas_threshold) {
+        exhaustive_inner_product_seq (x, y, d, nx, ny, resh);
+    } else {
+        exhaustive_inner_product_blas (x, y, d, nx, ny, resh);
+    }
+}
 /***************************************************************************
@@ -509,8 +522,7 @@ void knn_inner_products_by_idx (const float * x,
             float ip = fvec_inner_product (x_, y + d * idsi[j], d);
             if (ip > simi[0]) {
-                minheap_pop (k, simi, idxi);
-                minheap_push (k, simi, idxi, ip, idsi[j]);
+                minheap_replace_top (k, simi, idxi, ip, idsi[j]);
             }
         }
         minheap_reorder (k, simi, idxi);
@@ -537,8 +549,7 @@ void knn_L2sqr_by_idx (const float * x,
             float disij = fvec_L2sqr (x_, y + d * idsi[j], d);
             if (disij < simi[0]) {
-                maxheap_pop (k, simi, idxi);
-                maxheap_push (k, simi, idxi, disij, idsi[j]);
+                maxheap_replace_top (k, simi, idxi, disij, idsi[j]);
             }
         }
         maxheap_reorder (res->k, simi, idxi);
@@ -550,172 +561,6 @@ void knn_L2sqr_by_idx (const float * x,
-/***************************************************************************
- * Range search
- ***************************************************************************/
-/** Find the nearest neighbors for nx queries in a set of ny vectors
- * compute_l2 = compute pairwise squared L2 distance rather than inner prod
- */
-template <bool compute_l2>
-static void range_search_blas (
-        const float * x,
-        const float * y,
-        size_t d, size_t nx, size_t ny,
-        float radius,
-        RangeSearchResult *result)
-{
-    // BLAS does not like empty matrices
-    if (nx == 0 || ny == 0) return;
-    /* block sizes */
-    const size_t bs_x = 4096, bs_y = 1024;
-    // const size_t bs_x = 16, bs_y = 16;
-    float *ip_block = new float[bs_x * bs_y];
-    ScopeDeleter<float> del0(ip_block);
-    float *x_norms = nullptr, *y_norms = nullptr;
-    ScopeDeleter<float> del1, del2;
-    if (compute_l2) {
-        x_norms = new float[nx];
-        del1.set (x_norms);
-        fvec_norms_L2sqr (x_norms, x, d, nx);
-        y_norms = new float[ny];
-        del2.set (y_norms);
-        fvec_norms_L2sqr (y_norms, y, d, ny);
-    }
-    std::vector <RangeSearchPartialResult *> partial_results;
-    for (size_t j0 = 0; j0 < ny; j0 += bs_y) {
-        size_t j1 = j0 + bs_y;
-        if (j1 > ny) j1 = ny;
-        RangeSearchPartialResult * pres = new RangeSearchPartialResult (result);
-        partial_results.push_back (pres);
-        for (size_t i0 = 0; i0 < nx; i0 += bs_x) {
-            size_t i1 = i0 + bs_x;
-            if(i1 > nx) i1 = nx;
-            /* compute the actual dot products */
-            {
-                float one = 1, zero = 0;
-                FINTEGER nyi = j1 - j0, nxi = i1 - i0, di = d;
-                sgemm_ ("Transpose", "Not transpose", &nyi, &nxi, &di, &one,
-                        y + j0 * d, &di,
-                        x + i0 * d, &di, &zero,
-                        ip_block, &nyi);
-            }
-            for (size_t i = i0; i < i1; i++) {
-                const float *ip_line = ip_block + (i - i0) * (j1 - j0);
-                RangeQueryResult & qres = pres->new_result (i);
-                for (size_t j = j0; j < j1; j++) {
-                    float ip = *ip_line++;
-                    if (compute_l2) {
-                        float dis =  x_norms[i] + y_norms[j] - 2 * ip;
-                        if (dis < radius) {
-                            qres.add (dis, j);
-                        }
-                    } else {
-                        if (ip > radius) {
-                            qres.add (ip, j);
-                        }
-                    }
-                }
-            }
-        }
-        InterruptCallback::check ();
-    }
-    RangeSearchPartialResult::merge (partial_results);
-}
-template <bool compute_l2>
-static void range_search_sse (const float * x,
-                const float * y,
-                size_t d, size_t nx, size_t ny,
-                float radius,
-                RangeSearchResult *res)
-{
-#pragma omp parallel
-    {
-        RangeSearchPartialResult pres (res);
-#pragma omp for
-        for (int64_t i = 0; i < nx; i++) {
-            const float * x_ = x + i * d;
-            const float * y_ = y;
-            size_t j;
-            RangeQueryResult & qres = pres.new_result (i);
-            for (j = 0; j < ny; j++) {
-                if (compute_l2) {
-                    float disij = fvec_L2sqr (x_, y_, d);
-                    if (disij < radius) {
-                        qres.add (disij, j);
-                    }
-                } else {
-                    float ip = fvec_inner_product (x_, y_, d);
-                    if (ip > radius) {
-                        qres.add (ip, j);
-                    }
-                }
-                y_ += d;
-            }
-        }
-        pres.finalize ();
-    }
-    // check just at the end because the use case is typically just
-    // when the nb of queries is low.
-    InterruptCallback::check();
-}
-void range_search_L2sqr (
-        const float * x,
-        const float * y,
-        size_t d, size_t nx, size_t ny,
-        float radius,
-        RangeSearchResult *res)
-{
-    if (nx < distance_compute_blas_threshold) {
-        range_search_sse<true> (x, y, d, nx, ny, radius, res);
-    } else {
-        range_search_blas<true> (x, y, d, nx, ny, radius, res);
-    }
-}
-void range_search_inner_product (
-        const float * x,
-        const float * y,
-        size_t d, size_t nx, size_t ny,
-        float radius,
-        RangeSearchResult *res)
-{
-    if (nx < distance_compute_blas_threshold) {
-        range_search_sse<false> (x, y, d, nx, ny, radius, res);
-    } else {
-        range_search_blas<false> (x, y, d, nx, ny, radius, res);
-    }
-}
 void pairwise_L2sqr (int64_t d,
                      int64_t nq, const float *xq,
                      int64_t nb, const float *xb,