RubyGems - faiss - Versions diffs - 0.1.3 → 0.1.4 - Mend

faiss 0.1.3 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (184) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +4 -0
data/LICENSE.txt +1 -1
data/README.md +1 -1
data/ext/faiss/extconf.rb +1 -1
data/lib/faiss/version.rb +1 -1
data/vendor/faiss/faiss/AutoTune.cpp +36 -33
data/vendor/faiss/faiss/AutoTune.h +6 -3
data/vendor/faiss/faiss/Clustering.cpp +16 -12
data/vendor/faiss/faiss/Index.cpp +3 -4
data/vendor/faiss/faiss/Index.h +3 -3
data/vendor/faiss/faiss/IndexBinary.cpp +3 -4
data/vendor/faiss/faiss/IndexBinary.h +1 -1
data/vendor/faiss/faiss/IndexBinaryHash.cpp +2 -12
data/vendor/faiss/faiss/IndexBinaryIVF.cpp +1 -2
data/vendor/faiss/faiss/IndexFlat.cpp +0 -148
data/vendor/faiss/faiss/IndexFlat.h +0 -51
data/vendor/faiss/faiss/IndexHNSW.cpp +4 -5
data/vendor/faiss/faiss/IndexIVF.cpp +118 -31
data/vendor/faiss/faiss/IndexIVF.h +22 -15
data/vendor/faiss/faiss/IndexIVFFlat.cpp +3 -3
data/vendor/faiss/faiss/IndexIVFFlat.h +2 -1
data/vendor/faiss/faiss/IndexIVFPQ.cpp +39 -15
data/vendor/faiss/faiss/IndexIVFPQ.h +25 -9
data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +1116 -0
data/vendor/faiss/faiss/IndexIVFPQFastScan.h +166 -0
data/vendor/faiss/faiss/IndexIVFPQR.cpp +8 -9
data/vendor/faiss/faiss/IndexIVFPQR.h +2 -1
data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +1 -2
data/vendor/faiss/faiss/IndexPQ.cpp +34 -18
data/vendor/faiss/faiss/IndexPQFastScan.cpp +536 -0
data/vendor/faiss/faiss/IndexPQFastScan.h +111 -0
data/vendor/faiss/faiss/IndexPreTransform.cpp +47 -0
data/vendor/faiss/faiss/IndexPreTransform.h +2 -0
data/vendor/faiss/faiss/IndexRefine.cpp +256 -0
data/vendor/faiss/faiss/IndexRefine.h +73 -0
data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +2 -2
data/vendor/faiss/faiss/IndexScalarQuantizer.h +1 -1
data/vendor/faiss/faiss/gpu/GpuDistance.h +1 -1
data/vendor/faiss/faiss/gpu/GpuIndex.h +16 -9
data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +8 -1
data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +11 -11
data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +19 -2
data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +28 -2
data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +24 -14
data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +29 -2
data/vendor/faiss/faiss/gpu/GpuResources.h +4 -0
data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +60 -27
data/vendor/faiss/faiss/gpu/StandardGpuResources.h +28 -6
data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +547 -0
data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +51 -0
data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +3 -3
data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +3 -2
data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +274 -0
data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +7 -2
data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +5 -1
data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +231 -0
data/vendor/faiss/faiss/gpu/test/TestUtils.h +33 -0
data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +1 -0
data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +6 -0
data/vendor/faiss/faiss/gpu/utils/Timer.cpp +5 -6
data/vendor/faiss/faiss/gpu/utils/Timer.h +2 -2
data/vendor/faiss/faiss/impl/AuxIndexStructures.h +5 -4
data/vendor/faiss/faiss/impl/HNSW.cpp +2 -4
data/vendor/faiss/faiss/impl/PolysemousTraining.h +4 -4
data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +22 -12
data/vendor/faiss/faiss/impl/ProductQuantizer.h +2 -0
data/vendor/faiss/faiss/impl/ResultHandler.h +452 -0
data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +29 -19
data/vendor/faiss/faiss/impl/ScalarQuantizer.h +6 -0
data/vendor/faiss/faiss/impl/index_read.cpp +64 -96
data/vendor/faiss/faiss/impl/index_write.cpp +34 -25
data/vendor/faiss/faiss/impl/io.cpp +33 -2
data/vendor/faiss/faiss/impl/io.h +7 -2
data/vendor/faiss/faiss/impl/lattice_Zn.cpp +1 -15
data/vendor/faiss/faiss/impl/platform_macros.h +44 -0
data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +272 -0
data/vendor/faiss/faiss/impl/pq4_fast_scan.h +169 -0
data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +180 -0
data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +354 -0
data/vendor/faiss/faiss/impl/simd_result_handlers.h +559 -0
data/vendor/faiss/faiss/index_factory.cpp +112 -7
data/vendor/faiss/faiss/index_io.h +1 -48
data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +151 -0
data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +76 -0
data/vendor/faiss/faiss/{DirectMap.cpp → invlists/DirectMap.cpp} +1 -1
data/vendor/faiss/faiss/{DirectMap.h → invlists/DirectMap.h} +1 -1
data/vendor/faiss/faiss/{InvertedLists.cpp → invlists/InvertedLists.cpp} +72 -1
data/vendor/faiss/faiss/{InvertedLists.h → invlists/InvertedLists.h} +32 -1
data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +107 -0
data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +63 -0
data/vendor/faiss/faiss/{OnDiskInvertedLists.cpp → invlists/OnDiskInvertedLists.cpp} +21 -6
data/vendor/faiss/faiss/{OnDiskInvertedLists.h → invlists/OnDiskInvertedLists.h} +5 -2
data/vendor/faiss/faiss/python/python_callbacks.h +8 -1
data/vendor/faiss/faiss/utils/AlignedTable.h +141 -0
data/vendor/faiss/faiss/utils/Heap.cpp +2 -4
data/vendor/faiss/faiss/utils/Heap.h +61 -50
data/vendor/faiss/faiss/utils/distances.cpp +164 -319
data/vendor/faiss/faiss/utils/distances.h +28 -20
data/vendor/faiss/faiss/utils/distances_simd.cpp +277 -49
data/vendor/faiss/faiss/utils/extra_distances.cpp +1 -2
data/vendor/faiss/faiss/utils/hamming-inl.h +4 -4
data/vendor/faiss/faiss/utils/hamming.cpp +3 -6
data/vendor/faiss/faiss/utils/hamming.h +2 -7
data/vendor/faiss/faiss/utils/ordered_key_value.h +98 -0
data/vendor/faiss/faiss/utils/partitioning.cpp +1256 -0
data/vendor/faiss/faiss/utils/partitioning.h +69 -0
data/vendor/faiss/faiss/utils/quantize_lut.cpp +277 -0
data/vendor/faiss/faiss/utils/quantize_lut.h +80 -0
data/vendor/faiss/faiss/utils/simdlib.h +31 -0
data/vendor/faiss/faiss/utils/simdlib_avx2.h +461 -0
data/vendor/faiss/faiss/utils/simdlib_emulated.h +589 -0
metadata +43 -141
data/vendor/faiss/benchs/bench_6bit_codec.cpp +0 -80
data/vendor/faiss/c_api/AutoTune_c.cpp +0 -83
data/vendor/faiss/c_api/AutoTune_c.h +0 -66
data/vendor/faiss/c_api/Clustering_c.cpp +0 -145
data/vendor/faiss/c_api/Clustering_c.h +0 -123
data/vendor/faiss/c_api/IndexFlat_c.cpp +0 -140
data/vendor/faiss/c_api/IndexFlat_c.h +0 -115
data/vendor/faiss/c_api/IndexIVFFlat_c.cpp +0 -64
data/vendor/faiss/c_api/IndexIVFFlat_c.h +0 -58
data/vendor/faiss/c_api/IndexIVF_c.cpp +0 -99
data/vendor/faiss/c_api/IndexIVF_c.h +0 -142
data/vendor/faiss/c_api/IndexLSH_c.cpp +0 -37
data/vendor/faiss/c_api/IndexLSH_c.h +0 -40
data/vendor/faiss/c_api/IndexPreTransform_c.cpp +0 -21
data/vendor/faiss/c_api/IndexPreTransform_c.h +0 -32
data/vendor/faiss/c_api/IndexShards_c.cpp +0 -38
data/vendor/faiss/c_api/IndexShards_c.h +0 -39
data/vendor/faiss/c_api/Index_c.cpp +0 -105
data/vendor/faiss/c_api/Index_c.h +0 -183
data/vendor/faiss/c_api/MetaIndexes_c.cpp +0 -49
data/vendor/faiss/c_api/MetaIndexes_c.h +0 -49
data/vendor/faiss/c_api/clone_index_c.cpp +0 -23
data/vendor/faiss/c_api/clone_index_c.h +0 -32
data/vendor/faiss/c_api/error_c.h +0 -42
data/vendor/faiss/c_api/error_impl.cpp +0 -27
data/vendor/faiss/c_api/error_impl.h +0 -16
data/vendor/faiss/c_api/faiss_c.h +0 -58
data/vendor/faiss/c_api/gpu/GpuAutoTune_c.cpp +0 -98
data/vendor/faiss/c_api/gpu/GpuAutoTune_c.h +0 -56
data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.cpp +0 -52
data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.h +0 -68
data/vendor/faiss/c_api/gpu/GpuIndex_c.cpp +0 -17
data/vendor/faiss/c_api/gpu/GpuIndex_c.h +0 -30
data/vendor/faiss/c_api/gpu/GpuIndicesOptions_c.h +0 -38
data/vendor/faiss/c_api/gpu/GpuResources_c.cpp +0 -86
data/vendor/faiss/c_api/gpu/GpuResources_c.h +0 -66
data/vendor/faiss/c_api/gpu/StandardGpuResources_c.cpp +0 -54
data/vendor/faiss/c_api/gpu/StandardGpuResources_c.h +0 -53
data/vendor/faiss/c_api/gpu/macros_impl.h +0 -42
data/vendor/faiss/c_api/impl/AuxIndexStructures_c.cpp +0 -220
data/vendor/faiss/c_api/impl/AuxIndexStructures_c.h +0 -149
data/vendor/faiss/c_api/index_factory_c.cpp +0 -26
data/vendor/faiss/c_api/index_factory_c.h +0 -30
data/vendor/faiss/c_api/index_io_c.cpp +0 -42
data/vendor/faiss/c_api/index_io_c.h +0 -50
data/vendor/faiss/c_api/macros_impl.h +0 -110
data/vendor/faiss/demos/demo_imi_flat.cpp +0 -154
data/vendor/faiss/demos/demo_imi_pq.cpp +0 -203
data/vendor/faiss/demos/demo_ivfpq_indexing.cpp +0 -151
data/vendor/faiss/demos/demo_sift1M.cpp +0 -252
data/vendor/faiss/demos/demo_weighted_kmeans.cpp +0 -185
data/vendor/faiss/misc/test_blas.cpp +0 -87
data/vendor/faiss/tests/test_binary_flat.cpp +0 -62
data/vendor/faiss/tests/test_dealloc_invlists.cpp +0 -188
data/vendor/faiss/tests/test_ivfpq_codec.cpp +0 -70
data/vendor/faiss/tests/test_ivfpq_indexing.cpp +0 -100
data/vendor/faiss/tests/test_lowlevel_ivf.cpp +0 -573
data/vendor/faiss/tests/test_merge.cpp +0 -260
data/vendor/faiss/tests/test_omp_threads.cpp +0 -14
data/vendor/faiss/tests/test_ondisk_ivf.cpp +0 -225
data/vendor/faiss/tests/test_pairs_decoding.cpp +0 -193
data/vendor/faiss/tests/test_params_override.cpp +0 -236
data/vendor/faiss/tests/test_pq_encoding.cpp +0 -98
data/vendor/faiss/tests/test_sliding_ivf.cpp +0 -246
data/vendor/faiss/tests/test_threaded_index.cpp +0 -253
data/vendor/faiss/tests/test_transfer_invlists.cpp +0 -159
data/vendor/faiss/tutorial/cpp/1-Flat.cpp +0 -104
data/vendor/faiss/tutorial/cpp/2-IVFFlat.cpp +0 -85
data/vendor/faiss/tutorial/cpp/3-IVFPQ.cpp +0 -98
data/vendor/faiss/tutorial/cpp/4-GPU.cpp +0 -122
data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +0 -104

data/vendor/faiss/faiss/utils/partitioning.h ADDED Viewed

@@ -0,0 +1,69 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+#pragma once
+#include <stdint.h>
+#include <stdio.h>
+#include <faiss/impl/platform_macros.h>
+namespace faiss {
+/** partitions the table into 0:q and q:n where all elements above q are >= all
+ * elements below q (for C = CMax, for CMin comparisons are reversed)
+ *
+ * Returns the partition threshold. The elements q:n are destroyed on output.
+ */
+template<class C>
+typename C::T partition_fuzzy(
+    typename C::T *vals, typename C::TI * ids, size_t n,
+    size_t q_min, size_t q_max, size_t * q_out);
+/** simplified interface for when the parition is not fuzzy */
+template<class C>
+inline typename C::T partition(
+    typename C::T *vals, typename C::TI * ids, size_t n,
+    size_t q)
+{
+    return partition_fuzzy<C>(vals, ids, n, q, q, nullptr);
+}
+/** low level SIMD histogramming functions */
+/** 8-bin histogram of (x - min) >> shift
+ * values outside the range are ignored.
+ * the data table should be aligned on 32 bytes */
+void simd_histogram_8(
+    const uint16_t *data, int n,
+    uint16_t min, int shift,
+    int *hist);
+/** same for 16-bin histogram */
+void simd_histogram_16(
+    const uint16_t *data, int n,
+    uint16_t min, int shift,
+    int *hist);
+struct PartitionStats {
+    uint64_t bissect_cycles;
+    uint64_t compress_cycles;
+    PartitionStats () {reset (); }
+    void reset ();
+};
+// global var that collects them all
+FAISS_API extern PartitionStats partition_stats;
+} // namespace faiss

data/vendor/faiss/faiss/utils/quantize_lut.cpp ADDED Viewed

@@ -0,0 +1,277 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+#include <faiss/utils/quantize_lut.h>
+#include <cmath>
+#include <cstring>
+#include <vector>
+#include <algorithm>
+#include <faiss/impl/FaissAssert.h>
+namespace faiss {
+namespace quantize_lut {
+/******************************************************
+ * Quantize look-up tables
+ ******************************************************/
+namespace {
+float round_uint8_and_mul(float *tab, size_t n) {
+    float max = 0;
+    for(int i = 0; i < n; i++) {
+        if(fabs(tab[i]) > max) {
+            max = fabs(tab[i]);
+        }
+    }
+    float multiplier = 127 / max;
+    for(int i = 0; i < n; i++) {
+        tab[i] = floorf(tab[i] * multiplier + 128);
+    }
+    return multiplier;
+}
+// there can be NaNs in tables, they should be ignored
+float tab_min(const float *tab, size_t n) {
+    float min = HUGE_VAL;
+    for(int i = 0; i < n; i++) {
+        if (tab[i] < min) min = tab[i];
+    }
+    return min;
+}
+float tab_max(const float *tab, size_t n) {
+    float max = -HUGE_VAL;
+    for(int i = 0; i < n; i++) {
+        if (tab[i] > max) max = tab[i];
+    }
+    return max;
+}
+void round_tab(float *tab, size_t n, float a, float bi) {
+    for(int i = 0; i < n; i++) {
+        tab[i] = floorf((tab[i] - bi) * a + 0.5);
+    }
+}
+template<typename T>
+void round_tab(const float *tab, size_t n, float a, float bi, T *tab_out) {
+    for(int i = 0; i < n; i++) {
+        tab_out[i] = (T)floorf((tab[i] - bi) * a + 0.5);
+    }
+}
+} // anonymous namespace
+void round_uint8_per_column(
+        float *tab, size_t n, size_t d,
+        float *a_out, float *b_out)
+{
+    float max_span = 0;
+    std::vector<float> mins(n);
+    for(int i = 0; i < n; i++) {
+        mins[i] = tab_min(tab + i * d, d);
+        float span = tab_max(tab + i * d, d) - mins[i];
+        if(span > max_span) {
+            max_span = span;
+        }
+    }
+    float a = 255 / max_span;
+    float b = 0;
+    for(int i = 0; i < n; i++) {
+        b += mins[i];
+        round_tab(tab + i * d, d, a, mins[i]);
+    }
+    if (a_out) *a_out = a;
+    if (b_out) *b_out = b;
+}
+void round_uint8_per_column_multi(
+        float *tab, size_t m, size_t n, size_t d,
+        float *a_out, float *b_out)
+{
+    float max_span = 0;
+    std::vector<float> mins(n);
+    for(int i = 0; i < n; i++) {
+        float min_i = HUGE_VAL;
+        float max_i = -HUGE_VAL;
+        for(int j = 0; j < m; j++) {
+            min_i = std::min(min_i, tab_min(tab + (j * n + i) * d, d));
+            max_i = std::max(max_i, tab_max(tab + (j * n + i) * d, d));
+        }
+        mins[i] = min_i;
+        float span = max_i - min_i;
+        if(span > max_span) {
+            max_span = span;
+        }
+    }
+    float a = 255 / max_span;
+    float b = 0;
+    for(int i = 0; i < n; i++) {
+        b += mins[i];
+        for(int j = 0; j < m; j++) {
+            round_tab(tab + (j * n + i) * d, d, a, mins[i]);
+        }
+    }
+    if (a_out) *a_out = a;
+    if (b_out) *b_out = b;
+}
+// translation of
+// https://github.com/fairinternal/faiss_improvements/blob/7122c3cc6ddb0a371d8aa6f1309cd8bcf2335e61/LUT_quantization.ipynb
+void quantize_LUT_and_bias(
+        size_t nprobe, size_t M, size_t ksub,
+        bool lut_is_3d,
+        const float *LUT,
+        const float *bias,
+        uint8_t *LUTq, size_t M2,
+        uint16_t *biasq,
+        float *a_out, float *b_out)
+{
+    float a, b;
+    if (!bias) {
+        FAISS_THROW_IF_NOT(!lut_is_3d);
+        std::vector<float> mins(M);
+        float max_span_LUT = -HUGE_VAL, max_span_dis = 0;
+        b = 0;
+        for(int i = 0; i < M; i++) {
+            mins[i] = tab_min(LUT + i * ksub, ksub);
+            float span = tab_max(LUT + i * ksub, ksub) - mins[i];
+            max_span_LUT = std::max(max_span_LUT, span);
+            max_span_dis += span;
+            b += mins[i];
+        }
+        a = std::min(255 / max_span_LUT, 65535 / max_span_dis);
+        for(int i = 0; i < M; i++) {
+            round_tab(LUT + i * ksub, ksub, a, mins[i], LUTq + i * ksub);
+        }
+        memset(LUTq + M * ksub, 0, ksub * (M2 - M));
+    } else if (!lut_is_3d) {
+        std::vector<float> mins(M);
+        float max_span_LUT = -HUGE_VAL, max_span_dis;
+        float bias_min = tab_min(bias, nprobe);
+        float bias_max = tab_max(bias, nprobe);
+        max_span_dis = bias_max - bias_min;
+        b = 0;
+        for(int i = 0; i < M; i++) {
+            mins[i] = tab_min(LUT + i * ksub, ksub);
+            float span = tab_max(LUT + i * ksub, ksub) - mins[i];
+            max_span_LUT = std::max(max_span_LUT, span);
+            max_span_dis += span;
+            b += mins[i];
+        }
+        a = std::min(255 / max_span_LUT, 65535 / max_span_dis);
+        b += bias_min;
+        for(int i = 0; i < M; i++) {
+            round_tab(LUT + i * ksub, ksub, a, mins[i], LUTq + i * ksub);
+        }
+        memset(LUTq + M * ksub, 0, ksub * (M2 - M));
+        round_tab(bias, nprobe, a, bias_min, biasq);
+    } else if (biasq) {
+        // LUT is 3D
+        std::vector<float> mins(nprobe * M);
+        std::vector<float> bias2(nprobe);
+        float bias_min = tab_min(bias, nprobe);
+        float max_span_LUT = -HUGE_VAL, max_span_dis = -HUGE_VAL;
+        b = HUGE_VAL;
+        size_t ij = 0;
+        for (int j = 0; j < nprobe; j++) {
+            float max_span_dis_j = bias[j] - bias_min;
+            float b2j = bias[j];
+            for(int i = 0; i < M; i++) {
+                mins[ij] = tab_min(LUT + ij * ksub, ksub);
+                float span = tab_max(LUT + ij * ksub, ksub) - mins[ij];
+                max_span_LUT = std::max(max_span_LUT, span);
+                max_span_dis_j += span;
+                b2j += mins[ij];
+                ij++;
+            }
+            max_span_dis = std::max(max_span_dis, max_span_dis_j);
+            bias2[j] = b2j;
+            b = std::min(b, b2j);
+        }
+        a = std::min(255 / max_span_LUT, 65535 / max_span_dis);
+        ij = 0;
+        size_t ij_2 = 0;
+        for (int j = 0; j < nprobe; j++) {
+            for(int i = 0; i < M; i++) {
+                round_tab(LUT + ij * ksub, ksub, a, mins[ij], LUTq + ij_2 * ksub);
+                ij++; ij_2++;
+            }
+            memset(LUTq + ij_2 * ksub, 0, ksub * (M2 - M));
+            ij_2 += M2 - M;
+        }
+        round_tab(bias2.data(), nprobe, a, b, biasq);
+    } else { // !biasq
+        // then we integrate the bias into the LUTs
+        std::vector<float> LUT2_storage(nprobe * M * ksub);
+        float *LUT2 = LUT2_storage.data();
+        size_t ijc = 0;
+        for (int j = 0; j < nprobe; j++) {
+            float bias_j = bias[j] / M;
+            for(int i = 0; i < M; i++) {
+                for (int c = 0; c < ksub; c++) {
+                    LUT2[ijc] = LUT[ijc] + bias_j;
+                    ijc++;
+                }
+            }
+        }
+        std::vector<float> mins(M, HUGE_VAL), maxs(M, -HUGE_VAL);
+        size_t ij = 0;
+        for (int j = 0; j < nprobe; j++) {
+            for(int i = 0; i < M; i++) {
+                mins[i] = std::min(mins[i], tab_min(LUT2 + ij * ksub, ksub));
+                maxs[i] = std::max(maxs[i], tab_max(LUT2 + ij * ksub, ksub));
+                ij++;
+            }
+        }
+        float max_span = -HUGE_VAL;
+        b = 0;
+        for(int i = 0; i < M; i++) {
+            float span = maxs[i] - mins[i];
+            max_span = std::max(max_span, span);
+            b += mins[i];
+        }
+        a = 255 / max_span;
+        ij = 0;
+        size_t ij_2 = 0;
+        for (int j = 0; j < nprobe; j++) {
+            for(int i = 0; i < M; i++) {
+                round_tab(LUT2 + ij * ksub, ksub, a, mins[i], LUTq + ij_2 * ksub);
+                ij++; ij_2++;
+            }
+            memset(LUTq + ij_2 * ksub, 0, ksub * (M2 - M));
+            ij_2 += M2 - M;
+        }
+    }
+    if (a_out) *a_out = a;
+    if (b_out) *b_out = b;
+}
+} // namespace quantize_lut
+} // namespace faiss

data/vendor/faiss/faiss/utils/quantize_lut.h ADDED Viewed

@@ -0,0 +1,80 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+#pragma once
+#include <cstdio>
+#include <cstdint>
+namespace faiss {
+/** Functions to quantize PQ floating-point Look Up Tables (LUT) to uint8, and
+ * biases to uint16. The accumulation is supposed to take place in uint16.
+ * The quantization coefficients are float (a, b) such that
+ *
+ *      original_value = quantized_value * a / b
+ *
+ * The hardest part of the quantization is with multiple LUTs that need to be
+ * added up together. In that case, coefficient a has to be chosen so that
+ * the sum fits in a uint16 accumulator.
+ */
+namespace quantize_lut {
+/* affine quantizer, a and b are the affine coefficients, marginalize over d
+ *
+ * @param tab input/output, size (n, d)
+ */
+void round_uint8_per_column(
+        float *tab, size_t n, size_t d,
+        float *a_out = nullptr,
+        float *b_out = nullptr
+);
+/* affine quantizer, a and b are the affine coefficients
+ *
+ * @param tab input/output, size (m, n, d)
+ */
+void round_uint8_per_column_multi(
+        float *tab, size_t m, size_t n, size_t d,
+        float *a_out = nullptr, float *b_out = nullptr);
+/** LUT quantization to uint8 and bias to uint16.
+ *
+ * (nprobe, M, ksub, lut_is_3d) determine the size of the the LUT
+ *
+ *  LUT input:
+ *  - 2D size (M, ksub): single matrix per probe (lut_is_3d=false)
+ *  - 3D size (nprobe, M, ksub): separate LUT per probe (lut_is_3d=true)
+ *  bias input:
+ *  - nullptr: bias is 0
+ *  - size (nprobe): one bias per probe
+ *  Output:
+ *  - LUTq uint8 version of the LUT (M size is rounded up to M2)
+ *  - biasq (or nullptr): uint16 version of the LUT
+ *  - a, b: scalars to approximate the true distance
+ */
+void quantize_LUT_and_bias(
+        size_t nprobe, size_t M, size_t ksub,
+        bool lut_is_3d,
+        const float *LUT,
+        const float *bias,
+        uint8_t *LUTq, size_t M2,
+        uint16_t *biasq,
+        float *a_out = nullptr, float *b_out = nullptr
+);
+} // namespace quantize_lut
+} // namespace faiss

data/vendor/faiss/faiss/utils/simdlib.h ADDED Viewed

@@ -0,0 +1,31 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+#pragma once
+/** Abstractions for 256-bit registers
+ *
+ * The objective is to separate the different interpretations of the same
+ * registers (as a vector of uint8, uint16 or uint32), to provide printing
+ * functions.
+ */
+#ifdef __AVX2__
+#include <faiss/utils/simdlib_avx2.h>
+#else
+// emulated = all operations are implemented as scalars
+#include <faiss/utils/simdlib_emulated.h>
+// FIXME: make a SSE version
+// is this ever going to happen? We will probably rather implement AVX512
+#endif