RubyGems - faiss - Versions diffs - 0.3.1 → 0.3.2 - Mend

faiss 0.3.1 → 0.3.2

Files changed (119) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +4 -0
data/lib/faiss/version.rb +1 -1
data/vendor/faiss/faiss/AutoTune.h +1 -1
data/vendor/faiss/faiss/Clustering.cpp +35 -4
data/vendor/faiss/faiss/Clustering.h +10 -1
data/vendor/faiss/faiss/IVFlib.cpp +4 -1
data/vendor/faiss/faiss/Index.h +21 -6
data/vendor/faiss/faiss/IndexBinaryHNSW.h +1 -1
data/vendor/faiss/faiss/IndexBinaryIVF.cpp +1 -1
data/vendor/faiss/faiss/IndexFastScan.cpp +22 -4
data/vendor/faiss/faiss/IndexFlat.cpp +11 -7
data/vendor/faiss/faiss/IndexFlatCodes.cpp +159 -5
data/vendor/faiss/faiss/IndexFlatCodes.h +20 -3
data/vendor/faiss/faiss/IndexHNSW.cpp +143 -90
data/vendor/faiss/faiss/IndexHNSW.h +52 -3
data/vendor/faiss/faiss/IndexIVF.cpp +3 -3
data/vendor/faiss/faiss/IndexIVF.h +9 -1
data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +15 -0
data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +3 -0
data/vendor/faiss/faiss/IndexIVFFastScan.cpp +130 -57
data/vendor/faiss/faiss/IndexIVFFastScan.h +14 -7
data/vendor/faiss/faiss/IndexIVFPQ.cpp +1 -3
data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +21 -2
data/vendor/faiss/faiss/IndexLattice.cpp +1 -19
data/vendor/faiss/faiss/IndexLattice.h +3 -22
data/vendor/faiss/faiss/IndexNNDescent.cpp +0 -29
data/vendor/faiss/faiss/IndexNNDescent.h +1 -1
data/vendor/faiss/faiss/IndexNSG.h +1 -1
data/vendor/faiss/faiss/IndexNeuralNetCodec.cpp +56 -0
data/vendor/faiss/faiss/IndexNeuralNetCodec.h +49 -0
data/vendor/faiss/faiss/IndexPreTransform.h +1 -1
data/vendor/faiss/faiss/IndexRefine.cpp +5 -5
data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +3 -1
data/vendor/faiss/faiss/MetricType.h +7 -2
data/vendor/faiss/faiss/cppcontrib/detail/UintReader.h +95 -17
data/vendor/faiss/faiss/cppcontrib/factory_tools.cpp +152 -0
data/vendor/faiss/faiss/cppcontrib/factory_tools.h +24 -0
data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +83 -30
data/vendor/faiss/faiss/gpu/GpuCloner.cpp +36 -4
data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +6 -0
data/vendor/faiss/faiss/gpu/GpuFaissAssert.h +1 -1
data/vendor/faiss/faiss/gpu/GpuIndex.h +2 -8
data/vendor/faiss/faiss/gpu/GpuIndexCagra.h +282 -0
data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +6 -0
data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +2 -0
data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +25 -0
data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +26 -21
data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +6 -0
data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +8 -5
data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +65 -0
data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +1 -1
data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +6 -0
data/vendor/faiss/faiss/gpu/utils/Timer.cpp +4 -1
data/vendor/faiss/faiss/gpu/utils/Timer.h +1 -1
data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +25 -0
data/vendor/faiss/faiss/impl/AuxIndexStructures.h +9 -1
data/vendor/faiss/faiss/impl/DistanceComputer.h +46 -0
data/vendor/faiss/faiss/impl/FaissAssert.h +4 -2
data/vendor/faiss/faiss/impl/HNSW.cpp +358 -190
data/vendor/faiss/faiss/impl/HNSW.h +43 -22
data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +8 -8
data/vendor/faiss/faiss/impl/LookupTableScaler.h +34 -0
data/vendor/faiss/faiss/impl/NNDescent.cpp +13 -8
data/vendor/faiss/faiss/impl/NSG.cpp +0 -29
data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +1 -0
data/vendor/faiss/faiss/impl/ProductQuantizer.h +5 -1
data/vendor/faiss/faiss/impl/ResultHandler.h +151 -32
data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +719 -102
data/vendor/faiss/faiss/impl/ScalarQuantizer.h +3 -0
data/vendor/faiss/faiss/impl/code_distance/code_distance-avx2.h +5 -0
data/vendor/faiss/faiss/impl/code_distance/code_distance-avx512.h +248 -0
data/vendor/faiss/faiss/impl/index_read.cpp +29 -15
data/vendor/faiss/faiss/impl/index_read_utils.h +37 -0
data/vendor/faiss/faiss/impl/index_write.cpp +28 -10
data/vendor/faiss/faiss/impl/io.cpp +13 -5
data/vendor/faiss/faiss/impl/io.h +4 -4
data/vendor/faiss/faiss/impl/io_macros.h +6 -0
data/vendor/faiss/faiss/impl/platform_macros.h +22 -0
data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +11 -0
data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +1 -1
data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +448 -1
data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.cpp +5 -5
data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.h +1 -1
data/vendor/faiss/faiss/impl/simd_result_handlers.h +143 -59
data/vendor/faiss/faiss/index_factory.cpp +31 -13
data/vendor/faiss/faiss/index_io.h +12 -5
data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +28 -8
data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +3 -0
data/vendor/faiss/faiss/invlists/DirectMap.cpp +9 -1
data/vendor/faiss/faiss/invlists/InvertedLists.cpp +55 -17
data/vendor/faiss/faiss/invlists/InvertedLists.h +18 -9
data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +21 -6
data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +2 -1
data/vendor/faiss/faiss/python/python_callbacks.cpp +3 -3
data/vendor/faiss/faiss/utils/Heap.h +105 -0
data/vendor/faiss/faiss/utils/NeuralNet.cpp +342 -0
data/vendor/faiss/faiss/utils/NeuralNet.h +147 -0
data/vendor/faiss/faiss/utils/bf16.h +36 -0
data/vendor/faiss/faiss/utils/distances.cpp +58 -88
data/vendor/faiss/faiss/utils/distances.h +5 -5
data/vendor/faiss/faiss/utils/distances_simd.cpp +997 -9
data/vendor/faiss/faiss/utils/extra_distances-inl.h +70 -0
data/vendor/faiss/faiss/utils/extra_distances.cpp +85 -137
data/vendor/faiss/faiss/utils/extra_distances.h +3 -2
data/vendor/faiss/faiss/utils/hamming.cpp +1 -1
data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +4 -1
data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +2 -1
data/vendor/faiss/faiss/utils/random.cpp +43 -0
data/vendor/faiss/faiss/utils/random.h +25 -0
data/vendor/faiss/faiss/utils/simdlib.h +10 -1
data/vendor/faiss/faiss/utils/simdlib_avx512.h +296 -0
data/vendor/faiss/faiss/utils/simdlib_neon.h +5 -2
data/vendor/faiss/faiss/utils/simdlib_ppc64.h +1084 -0
data/vendor/faiss/faiss/utils/transpose/transpose-avx512-inl.h +176 -0
data/vendor/faiss/faiss/utils/utils.cpp +10 -3
data/vendor/faiss/faiss/utils/utils.h +3 -0
metadata +16 -4
data/vendor/faiss/faiss/impl/code_distance/code_distance_avx512.h +0 -102

data/vendor/faiss/faiss/IndexNNDescent.cpp CHANGED Viewed

@@ -58,35 +58,6 @@ using storage_idx_t = NNDescent::storage_idx_t;
 namespace {
-/* Wrap the distance computer into one that negates the
-   distances. This makes supporting INNER_PRODUCE search easier */
-struct NegativeDistanceComputer : DistanceComputer {
-    /// owned by this
-    DistanceComputer* basedis;
-    explicit NegativeDistanceComputer(DistanceComputer* basedis)
-            : basedis(basedis) {}
-    void set_query(const float* x) override {
-        basedis->set_query(x);
-    }
-    /// compute distance of vector i to current query
-    float operator()(idx_t i) override {
-        return -(*basedis)(i);
-    }
-    /// compute distance between two stored vectors
-    float symmetric_dis(idx_t i, idx_t j) override {
-        return -basedis->symmetric_dis(i, j);
-    }
-    ~NegativeDistanceComputer() override {
-        delete basedis;
-    }
-};
 DistanceComputer* storage_distance_computer(const Index* storage) {
     if (is_similarity_metric(storage->metric_type)) {
         return new NegativeDistanceComputer(storage->get_distance_computer());

data/vendor/faiss/faiss/IndexNNDescent.h CHANGED Viewed

@@ -26,7 +26,7 @@ struct IndexNNDescent : Index {
     /// Faiss results are 64-bit
-    // the link strcuture
+    // the link structure
     NNDescent nndescent;
     // the sequential storage

data/vendor/faiss/faiss/IndexNSG.h CHANGED Viewed

@@ -24,7 +24,7 @@ namespace faiss {
  * link structure built on top */
 struct IndexNSG : Index {
-    /// the link strcuture
+    /// the link structure
     NSG nsg;
     /// the sequential storage

data/vendor/faiss/faiss/IndexNeuralNetCodec.cpp ADDED Viewed

@@ -0,0 +1,56 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+#include <faiss/IndexNeuralNetCodec.h>
+#include <faiss/impl/FaissAssert.h>
+#include <faiss/utils/hamming.h>
+namespace faiss {
+/*********************************************************
+ * IndexNeuralNetCodec implementation
+ *********************************************************/
+IndexNeuralNetCodec::IndexNeuralNetCodec(
+        int d,
+        int M,
+        int nbits,
+        MetricType metric)
+        : IndexFlatCodes((M * nbits + 7) / 8, d, metric), M(M), nbits(nbits) {
+    is_trained = false;
+}
+void IndexNeuralNetCodec::train(idx_t n, const float* x) {
+    FAISS_THROW_MSG("Training not implemented in C++, use Pytorch");
+}
+void IndexNeuralNetCodec::sa_encode(idx_t n, const float* x, uint8_t* codes)
+        const {
+    nn::Tensor2D x_tensor(n, d, x);
+    nn::Int32Tensor2D codes_tensor = net->encode(x_tensor);
+    pack_bitstrings(n, M, nbits, codes_tensor.data(), codes, code_size);
+}
+void IndexNeuralNetCodec::sa_decode(idx_t n, const uint8_t* codes, float* x)
+        const {
+    nn::Int32Tensor2D codes_tensor(n, M);
+    unpack_bitstrings(n, M, nbits, codes, code_size, codes_tensor.data());
+    nn::Tensor2D x_tensor = net->decode(codes_tensor);
+    memcpy(x, x_tensor.data(), d * n * sizeof(float));
+}
+/*********************************************************
+ * IndexQINeuralNetCodec implementation
+ *********************************************************/
+IndexQINCo::IndexQINCo(int d, int M, int nbits, int L, int h, MetricType metric)
+        : IndexNeuralNetCodec(d, M, nbits, metric),
+          qinco(d, 1 << nbits, L, M, h) {
+    net = &qinco;
+}
+} // namespace faiss

data/vendor/faiss/faiss/IndexNeuralNetCodec.h ADDED Viewed

@@ -0,0 +1,49 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+#pragma once
+#include <vector>
+#include <faiss/IndexFlatCodes.h>
+#include <faiss/utils/NeuralNet.h>
+namespace faiss {
+struct IndexNeuralNetCodec : IndexFlatCodes {
+    NeuralNetCodec* net = nullptr;
+    size_t M, nbits;
+    explicit IndexNeuralNetCodec(
+            int d = 0,
+            int M = 0,
+            int nbits = 0,
+            MetricType metric = METRIC_L2);
+    void train(idx_t n, const float* x) override;
+    void sa_encode(idx_t n, const float* x, uint8_t* codes) const override;
+    void sa_decode(idx_t n, const uint8_t* codes, float* x) const override;
+    ~IndexNeuralNetCodec() {}
+};
+struct IndexQINCo : IndexNeuralNetCodec {
+    QINCo qinco;
+    IndexQINCo(
+            int d,
+            int M,
+            int nbits,
+            int L,
+            int h,
+            MetricType metric = METRIC_L2);
+    ~IndexQINCo() {}
+};
+} // namespace faiss

data/vendor/faiss/faiss/IndexPreTransform.h CHANGED Viewed

@@ -23,7 +23,7 @@ struct SearchParametersPreTransform : SearchParameters {
 /** Index that applies a LinearTransform transform on vectors before
  *  handing them over to a sub-index */
 struct IndexPreTransform : Index {
-    std::vector<VectorTransform*> chain; ///! chain of tranforms
+    std::vector<VectorTransform*> chain; ///! chain of transforms
     Index* index;                        ///! the sub-index
     bool own_fields; ///! whether pointers are deleted in destructor

data/vendor/faiss/faiss/IndexRefine.cpp CHANGED Viewed

@@ -68,12 +68,12 @@ template <class C>
 static void reorder_2_heaps(
         idx_t n,
         idx_t k,
-        idx_t* labels,
-        float* distances,
+        idx_t* __restrict labels,
+        float* __restrict distances,
         idx_t k_base,
-        const idx_t* base_labels,
-        const float* base_distances) {
-#pragma omp parallel for
+        const idx_t* __restrict base_labels,
+        const float* __restrict base_distances) {
+#pragma omp parallel for if (n > 1)
     for (idx_t i = 0; i < n; i++) {
         idx_t* idxo = labels + i * k;
         float* diso = distances + i * k;

data/vendor/faiss/faiss/IndexScalarQuantizer.cpp CHANGED Viewed

@@ -32,7 +32,9 @@ IndexScalarQuantizer::IndexScalarQuantizer(
         MetricType metric)
         : IndexFlatCodes(0, d, metric), sq(d, qtype) {
     is_trained = qtype == ScalarQuantizer::QT_fp16 ||
-            qtype == ScalarQuantizer::QT_8bit_direct;
+            qtype == ScalarQuantizer::QT_8bit_direct ||
+            qtype == ScalarQuantizer::QT_bf16 ||
+            qtype == ScalarQuantizer::QT_8bit_direct_signed;
     code_size = sq.code_size;
 }

data/vendor/faiss/faiss/MetricType.h CHANGED Viewed

@@ -31,8 +31,13 @@ enum MetricType {
     METRIC_Canberra = 20,
     METRIC_BrayCurtis,
     METRIC_JensenShannon,
-    METRIC_Jaccard, ///< defined as: sum_i(min(a_i, b_i)) / sum_i(max(a_i, b_i))
-                    ///< where a_i, b_i > 0
+    /// sum_i(min(a_i, b_i)) / sum_i(max(a_i, b_i)) where a_i, b_i > 0
+    METRIC_Jaccard,
+    /// Squared Eucliden distance, ignoring NaNs
+    METRIC_NaNEuclidean,
+    /// abs(x | y): the distance to a hyperplane
+    METRIC_ABS_INNER_PRODUCT,
 };
 /// all vector indices are this type

data/vendor/faiss/faiss/cppcontrib/detail/UintReader.h CHANGED Viewed

@@ -7,6 +7,7 @@
 #pragma once
+#include <faiss/impl/platform_macros.h>
 #include <cstdint>
 namespace faiss {
@@ -31,7 +32,11 @@ struct Uint8Reader {
                 if (N_ELEMENTS > CPOS + 3) {
                     const uint32_t code32 = *reinterpret_cast<const uint32_t*>(
                             codes + ELEMENT_TO_READ * 4);
+#ifdef FAISS_BIG_ENDIAN
+                    return (code32) >> 24;
+#else
                     return (code32 & 0x000000FF);
+#endif
                 } else {
                     return codes[CPOS];
                 }
@@ -40,7 +45,11 @@ struct Uint8Reader {
                 if (N_ELEMENTS > CPOS + 2) {
                     const uint32_t code32 = *reinterpret_cast<const uint32_t*>(
                             codes + ELEMENT_TO_READ * 4);
+#ifdef FAISS_BIG_ENDIAN
+                    return (code32 & 0x00FF0000) >> 16;
+#else
                     return (code32 & 0x0000FF00) >> 8;
+#endif
                 } else {
                     return codes[CPOS];
                 }
@@ -49,7 +58,11 @@ struct Uint8Reader {
                 if (N_ELEMENTS > CPOS + 1) {
                     const uint32_t code32 = *reinterpret_cast<const uint32_t*>(
                             codes + ELEMENT_TO_READ * 4);
+#ifdef FAISS_BIG_ENDIAN
+                    return (code32 & 0x0000FF00) >> 8;
+#else
                     return (code32 & 0x00FF0000) >> 16;
+#endif
                 } else {
                     return codes[CPOS];
                 }
@@ -58,7 +71,11 @@ struct Uint8Reader {
                 if (N_ELEMENTS > CPOS) {
                     const uint32_t code32 = *reinterpret_cast<const uint32_t*>(
                             codes + ELEMENT_TO_READ * 4);
+#ifdef FAISS_BIG_ENDIAN
+                    return (code32 & 0x000000FF);
+#else
                     return (code32) >> 24;
+#endif
                 } else {
                     return codes[CPOS];
                 }
@@ -87,40 +104,61 @@ struct Uint10Reader {
         switch (SUB_ELEMENT) {
             case 0: {
                 if (N_ELEMENTS > CPOS + 2) {
-                    const uint32_t code32 = *reinterpret_cast<const uint32_t*>(
+                    uint32_t code32 = *reinterpret_cast<const uint32_t*>(
                             codes + ELEMENT_TO_READ * 5);
+#ifdef FAISS_BIG_ENDIAN
+                    code32 = Swap4Bytes(code32);
+#endif
                     return (code32 & 0b0000001111111111);
                 } else {
-                    const uint16_t code16 = *reinterpret_cast<const uint16_t*>(
+                    uint16_t code16 = *reinterpret_cast<const uint16_t*>(
                             codes + ELEMENT_TO_READ * 5 + 0);
+#ifdef FAISS_BIG_ENDIAN
+                    code16 = Swap2Bytes(code16);
+#endif
                     return (code16 & 0b0000001111111111);
                 }
             }
             case 1: {
                 if (N_ELEMENTS > CPOS + 1) {
-                    const uint32_t code32 = *reinterpret_cast<const uint32_t*>(
+                    uint32_t code32 = *reinterpret_cast<const uint32_t*>(
                             codes + ELEMENT_TO_READ * 5);
+#ifdef FAISS_BIG_ENDIAN
+                    code32 = Swap4Bytes(code32);
+#endif
                     return (code32 & 0b000011111111110000000000) >> 10;
                 } else {
-                    const uint16_t code16 = *reinterpret_cast<const uint16_t*>(
+                    uint16_t code16 = *reinterpret_cast<const uint16_t*>(
                             codes + ELEMENT_TO_READ * 5 + 1);
+#ifdef FAISS_BIG_ENDIAN
+                    code16 = Swap2Bytes(code16);
+#endif
                     return (code16 & 0b0000111111111100) >> 2;
                 }
             }
             case 2: {
                 if (N_ELEMENTS > CPOS) {
-                    const uint32_t code32 = *reinterpret_cast<const uint32_t*>(
+                    uint32_t code32 = *reinterpret_cast<const uint32_t*>(
                             codes + ELEMENT_TO_READ * 5);
+#ifdef FAISS_BIG_ENDIAN
+                    code32 = Swap4Bytes(code32);
+#endif
                     return (code32 & 0b00111111111100000000000000000000) >> 20;
                 } else {
-                    const uint16_t code16 = *reinterpret_cast<const uint16_t*>(
+                    uint16_t code16 = *reinterpret_cast<const uint16_t*>(
                             codes + ELEMENT_TO_READ * 5 + 2);
+#ifdef FAISS_BIG_ENDIAN
+                    code16 = Swap2Bytes(code16);
+#endif
                     return (code16 & 0b0011111111110000) >> 4;
                 }
             }
             case 3: {
-                const uint16_t code16 = *reinterpret_cast<const uint16_t*>(
+                uint16_t code16 = *reinterpret_cast<const uint16_t*>(
                         codes + ELEMENT_TO_READ * 5 + 3);
+#ifdef FAISS_BIG_ENDIAN
+                code16 = Swap2Bytes(code16);
+#endif
                 return (code16 & 0b1111111111000000) >> 6;
             }
         }
@@ -147,45 +185,69 @@ struct Uint12Reader {
         switch (SUB_ELEMENT) {
             case 0: {
                 if (N_ELEMENTS > CPOS + 2) {
-                    const uint32_t code32 = *reinterpret_cast<const uint32_t*>(
+                    uint32_t code32 = *reinterpret_cast<const uint32_t*>(
                             codes + ELEMENT_TO_READ * 6);
+#ifdef FAISS_BIG_ENDIAN
+                    code32 = Swap4Bytes(code32);
+#endif
                     return (code32 & 0b0000111111111111);
                 } else {
-                    const uint16_t code16 = *reinterpret_cast<const uint16_t*>(
+                    uint16_t code16 = *reinterpret_cast<const uint16_t*>(
                             codes + ELEMENT_TO_READ * 6 + 0);
+#ifdef FAISS_BIG_ENDIAN
+                    code16 = Swap2Bytes(code16);
+#endif
                     return (code16 & 0b0000111111111111);
                 }
             }
             case 1: {
                 if (N_ELEMENTS > CPOS + 1) {
-                    const uint32_t code32 = *reinterpret_cast<const uint32_t*>(
+                    uint32_t code32 = *reinterpret_cast<const uint32_t*>(
                             codes + ELEMENT_TO_READ * 6);
+#ifdef FAISS_BIG_ENDIAN
+                    code32 = Swap4Bytes(code32);
+#endif
                     return (code32 & 0b111111111111000000000000) >> 12;
                 } else {
-                    const uint16_t code16 = *reinterpret_cast<const uint16_t*>(
+                    uint16_t code16 = *reinterpret_cast<const uint16_t*>(
                             codes + ELEMENT_TO_READ * 6 + 1);
+#ifdef FAISS_BIG_ENDIAN
+                    code16 = Swap2Bytes(code16);
+#endif
                     return (code16 & 0b1111111111110000) >> 4;
                 }
             }
             case 2: {
                 if (N_ELEMENTS > CPOS + 1) {
-                    const uint32_t code32 = *reinterpret_cast<const uint32_t*>(
+                    uint32_t code32 = *reinterpret_cast<const uint32_t*>(
                             codes + ELEMENT_TO_READ * 6 + 2);
+#ifdef FAISS_BIG_ENDIAN
+                    code32 = Swap4Bytes(code32);
+#endif
                     return (code32 & 0b000011111111111100000000) >> 8;
                 } else {
-                    const uint16_t code16 = *reinterpret_cast<const uint16_t*>(
+                    uint16_t code16 = *reinterpret_cast<const uint16_t*>(
                             codes + ELEMENT_TO_READ * 6 + 3);
+#ifdef FAISS_BIG_ENDIAN
+                    code16 = Swap2Bytes(code16);
+#endif
                     return (code16 & 0b0000111111111111);
                 }
             }
             case 3: {
                 if (N_ELEMENTS > CPOS) {
-                    const uint32_t code32 = *reinterpret_cast<const uint32_t*>(
+                    uint32_t code32 = *reinterpret_cast<const uint32_t*>(
                             codes + ELEMENT_TO_READ * 6 + 2);
+#ifdef FAISS_BIG_ENDIAN
+                    code32 = Swap4Bytes(code32);
+#endif
                     return (code32 & 0b11111111111100000000000000000000) >> 20;
                 } else {
-                    const uint16_t code16 = *reinterpret_cast<const uint16_t*>(
+                    uint16_t code16 = *reinterpret_cast<const uint16_t*>(
                             codes + ELEMENT_TO_READ * 6 + 4);
+#ifdef FAISS_BIG_ENDIAN
+                    code16 = Swap2Bytes(code16);
+#endif
                     return (code16 & 0b1111111111110000) >> 4;
                 }
             }
@@ -208,23 +270,39 @@ struct Uint16Reader {
         switch (SUB_ELEMENT) {
             case 0: {
                 if (N_ELEMENTS > CPOS + 1) {
-                    const uint32_t code32 = *reinterpret_cast<const uint32_t*>(
+                    uint32_t code32 = *reinterpret_cast<const uint32_t*>(
                             codes + ELEMENT_TO_READ * 4);
+#ifdef FAISS_BIG_ENDIAN
+                    code32 = Swap4Bytes(code32);
+#endif
                     return (code32 & 0x0000FFFF);
                 } else {
                     const uint16_t* const __restrict codesFp16 =
                             reinterpret_cast<const uint16_t*>(codes);
+#ifdef FAISS_BIG_ENDIAN
+                    uint16_t rt = codesFp16[CPOS];
+                    rt = Swap2Bytes(rt);
+                    return rt;
+#endif
                     return codesFp16[CPOS];
                 }
             }
             case 1: {
                 if (N_ELEMENTS > CPOS) {
-                    const uint32_t code32 = *reinterpret_cast<const uint32_t*>(
+                    uint32_t code32 = *reinterpret_cast<const uint32_t*>(
                             codes + ELEMENT_TO_READ * 4);
+#ifdef FAISS_BIG_ENDIAN
+                    code32 = Swap4Bytes(code32);
+#endif
                     return code32 >> 16;
                 } else {
                     const uint16_t* const __restrict codesFp16 =
                             reinterpret_cast<const uint16_t*>(codes);
+#ifdef FAISS_BIG_ENDIAN
+                    uint16_t rt = codesFp16[CPOS];
+                    rt = Swap2Bytes(rt);
+                    return rt;
+#endif
                     return codesFp16[CPOS];
                 }
             }

data/vendor/faiss/faiss/cppcontrib/factory_tools.cpp ADDED Viewed

@@ -0,0 +1,152 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+// -*- c++ -*-
+#include <faiss/cppcontrib/factory_tools.h>
+#include <map>
+namespace faiss {
+namespace {
+const std::map<faiss::ScalarQuantizer::QuantizerType, std::string> sq_types = {
+        {faiss::ScalarQuantizer::QT_8bit, "SQ8"},
+        {faiss::ScalarQuantizer::QT_4bit, "SQ4"},
+        {faiss::ScalarQuantizer::QT_6bit, "SQ6"},
+        {faiss::ScalarQuantizer::QT_fp16, "SQfp16"},
+        {faiss::ScalarQuantizer::QT_bf16, "SQbf16"},
+        {faiss::ScalarQuantizer::QT_8bit_direct_signed, "SQ8_direct_signed"},
+        {faiss::ScalarQuantizer::QT_8bit_direct, "SQ8_direct"},
+};
+int get_hnsw_M(const faiss::IndexHNSW* index) {
+    if (index->hnsw.cum_nneighbor_per_level.size() >= 1) {
+        return index->hnsw.cum_nneighbor_per_level[1] / 2;
+    }
+    // Avoid runtime error, just return 0.
+    return 0;
+}
+} // namespace
+// Reference for reverse_index_factory:
+// https://github.com/facebookresearch/faiss/blob/838612c9d7f2f619811434ec9209c020f44107cb/contrib/factory_tools.py#L81
+std::string reverse_index_factory(const faiss::Index* index) {
+    std::string prefix;
+    if (dynamic_cast<const faiss::IndexFlat*>(index)) {
+        return "Flat";
+    } else if (
+            const faiss::IndexIVF* ivf_index =
+                    dynamic_cast<const faiss::IndexIVF*>(index)) {
+        const faiss::Index* quantizer = ivf_index->quantizer;
+        if (dynamic_cast<const faiss::IndexFlat*>(quantizer)) {
+            prefix = "IVF" + std::to_string(ivf_index->nlist);
+        } else if (
+                const faiss::MultiIndexQuantizer* miq =
+                        dynamic_cast<const faiss::MultiIndexQuantizer*>(
+                                quantizer)) {
+            prefix = "IMI" + std::to_string(miq->pq.M) + "x" +
+                    std::to_string(miq->pq.nbits);
+        } else if (
+                const faiss::IndexHNSW* hnsw_index =
+                        dynamic_cast<const faiss::IndexHNSW*>(quantizer)) {
+            prefix = "IVF" + std::to_string(ivf_index->nlist) + "_HNSW" +
+                    std::to_string(get_hnsw_M(hnsw_index));
+        } else {
+            prefix = "IVF" + std::to_string(ivf_index->nlist) + "(" +
+                    reverse_index_factory(quantizer) + ")";
+        }
+        if (dynamic_cast<const faiss::IndexIVFFlat*>(ivf_index)) {
+            return prefix + ",Flat";
+        } else if (
+                auto sq_index =
+                        dynamic_cast<const faiss::IndexIVFScalarQuantizer*>(
+                                ivf_index)) {
+            return prefix + "," + sq_types.at(sq_index->sq.qtype);
+        } else if (
+                const faiss::IndexIVFPQ* ivfpq_index =
+                        dynamic_cast<const faiss::IndexIVFPQ*>(ivf_index)) {
+            return prefix + ",PQ" + std::to_string(ivfpq_index->pq.M) + "x" +
+                    std::to_string(ivfpq_index->pq.nbits);
+        } else if (
+                const faiss::IndexIVFPQFastScan* ivfpqfs_index =
+                        dynamic_cast<const faiss::IndexIVFPQFastScan*>(
+                                ivf_index)) {
+            return prefix + ",PQ" + std::to_string(ivfpqfs_index->pq.M) + "x" +
+                    std::to_string(ivfpqfs_index->pq.nbits) + "fs";
+        }
+    } else if (
+            const faiss::IndexPreTransform* pretransform_index =
+                    dynamic_cast<const faiss::IndexPreTransform*>(index)) {
+        if (pretransform_index->chain.size() != 1) {
+            // Avoid runtime error, just return empty string for logging.
+            return "";
+        }
+        const faiss::VectorTransform* vt = pretransform_index->chain.at(0);
+        if (const faiss::OPQMatrix* opq_matrix =
+                    dynamic_cast<const faiss::OPQMatrix*>(vt)) {
+            prefix = "OPQ" + std::to_string(opq_matrix->M) + "_" +
+                    std::to_string(opq_matrix->d_out);
+        } else if (
+                const faiss::ITQTransform* itq_transform =
+                        dynamic_cast<const faiss::ITQTransform*>(vt)) {
+            prefix = "ITQ" + std::to_string(itq_transform->itq.d_out);
+        } else if (
+                const faiss::PCAMatrix* pca_matrix =
+                        dynamic_cast<const faiss::PCAMatrix*>(vt)) {
+            assert(pca_matrix->eigen_power == 0);
+            prefix = "PCA" +
+                    std::string(pca_matrix->random_rotation ? "R" : "") +
+                    std::to_string(pca_matrix->d_out);
+        } else {
+            // Avoid runtime error, just return empty string for logging.
+            return "";
+        }
+        return prefix + "," + reverse_index_factory(pretransform_index->index);
+    } else if (
+            const faiss::IndexHNSW* hnsw_index =
+                    dynamic_cast<const faiss::IndexHNSW*>(index)) {
+        return "HNSW" + std::to_string(get_hnsw_M(hnsw_index));
+    } else if (
+            const faiss::IndexRefine* refine_index =
+                    dynamic_cast<const faiss::IndexRefine*>(index)) {
+        return reverse_index_factory(refine_index->base_index) + ",Refine(" +
+                reverse_index_factory(refine_index->refine_index) + ")";
+    } else if (
+            const faiss::IndexPQFastScan* pqfs_index =
+                    dynamic_cast<const faiss::IndexPQFastScan*>(index)) {
+        return std::string("PQ") + std::to_string(pqfs_index->pq.M) + "x" +
+                std::to_string(pqfs_index->pq.nbits) + "fs";
+    } else if (
+            const faiss::IndexPQ* pq_index =
+                    dynamic_cast<const faiss::IndexPQ*>(index)) {
+        return std::string("PQ") + std::to_string(pq_index->pq.M) + "x" +
+                std::to_string(pq_index->pq.nbits);
+    } else if (
+            const faiss::IndexLSH* lsh_index =
+                    dynamic_cast<const faiss::IndexLSH*>(index)) {
+        std::string result = "LSH";
+        if (lsh_index->rotate_data) {
+            result += "r";
+        }
+        if (lsh_index->train_thresholds) {
+            result += "t";
+        }
+        return result;
+    } else if (
+            const faiss::IndexScalarQuantizer* sq_index =
+                    dynamic_cast<const faiss::IndexScalarQuantizer*>(index)) {
+        return std::string("SQ") + sq_types.at(sq_index->sq.qtype);
+    }
+    // Avoid runtime error, just return empty string for logging.
+    return "";
+}
+} // namespace faiss

data/vendor/faiss/faiss/cppcontrib/factory_tools.h ADDED Viewed

@@ -0,0 +1,24 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+// -*- c++ -*-
+#pragma once
+#include <faiss/IndexHNSW.h>
+#include <faiss/IndexIVFFlat.h>
+#include <faiss/IndexIVFPQFastScan.h>
+#include <faiss/IndexLSH.h>
+#include <faiss/IndexPQFastScan.h>
+#include <faiss/IndexPreTransform.h>
+#include <faiss/IndexRefine.h>
+namespace faiss {
+std::string reverse_index_factory(const faiss::Index* index);
+} // namespace faiss