RubyGems - faiss - Versions diffs - 0.2.3 → 0.2.4 - Mend

faiss 0.2.3 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (63) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +4 -0
data/LICENSE.txt +1 -1
data/lib/faiss/version.rb +1 -1
data/vendor/faiss/faiss/Clustering.cpp +32 -0
data/vendor/faiss/faiss/Clustering.h +14 -0
data/vendor/faiss/faiss/Index.h +1 -1
data/vendor/faiss/faiss/Index2Layer.cpp +19 -92
data/vendor/faiss/faiss/Index2Layer.h +2 -16
data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +407 -0
data/vendor/faiss/faiss/{IndexResidual.h → IndexAdditiveQuantizer.h} +101 -58
data/vendor/faiss/faiss/IndexFlat.cpp +22 -52
data/vendor/faiss/faiss/IndexFlat.h +9 -15
data/vendor/faiss/faiss/IndexFlatCodes.cpp +67 -0
data/vendor/faiss/faiss/IndexFlatCodes.h +47 -0
data/vendor/faiss/faiss/IndexIVF.cpp +79 -7
data/vendor/faiss/faiss/IndexIVF.h +25 -7
data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +316 -0
data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +121 -0
data/vendor/faiss/faiss/IndexIVFFlat.cpp +9 -12
data/vendor/faiss/faiss/IndexIVFPQ.cpp +5 -4
data/vendor/faiss/faiss/IndexIVFPQ.h +1 -1
data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +60 -39
data/vendor/faiss/faiss/IndexIVFSpectralHash.h +21 -6
data/vendor/faiss/faiss/IndexLSH.cpp +4 -30
data/vendor/faiss/faiss/IndexLSH.h +2 -15
data/vendor/faiss/faiss/IndexNNDescent.cpp +0 -2
data/vendor/faiss/faiss/IndexNSG.cpp +0 -2
data/vendor/faiss/faiss/IndexPQ.cpp +2 -51
data/vendor/faiss/faiss/IndexPQ.h +2 -17
data/vendor/faiss/faiss/IndexRefine.cpp +28 -0
data/vendor/faiss/faiss/IndexRefine.h +10 -0
data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +2 -28
data/vendor/faiss/faiss/IndexScalarQuantizer.h +2 -16
data/vendor/faiss/faiss/VectorTransform.cpp +2 -1
data/vendor/faiss/faiss/VectorTransform.h +3 -0
data/vendor/faiss/faiss/clone_index.cpp +3 -2
data/vendor/faiss/faiss/gpu/GpuCloner.cpp +2 -2
data/vendor/faiss/faiss/gpu/GpuIcmEncoder.h +60 -0
data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +257 -24
data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +69 -9
data/vendor/faiss/faiss/impl/HNSW.cpp +10 -5
data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +393 -210
data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +100 -28
data/vendor/faiss/faiss/impl/NSG.cpp +0 -3
data/vendor/faiss/faiss/impl/NSG.h +1 -1
data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +357 -47
data/vendor/faiss/faiss/impl/ResidualQuantizer.h +65 -7
data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +12 -19
data/vendor/faiss/faiss/impl/index_read.cpp +102 -19
data/vendor/faiss/faiss/impl/index_write.cpp +66 -16
data/vendor/faiss/faiss/impl/io.cpp +1 -1
data/vendor/faiss/faiss/impl/io_macros.h +20 -0
data/vendor/faiss/faiss/impl/kmeans1d.cpp +301 -0
data/vendor/faiss/faiss/impl/kmeans1d.h +48 -0
data/vendor/faiss/faiss/index_factory.cpp +585 -414
data/vendor/faiss/faiss/index_factory.h +3 -0
data/vendor/faiss/faiss/utils/distances.cpp +4 -2
data/vendor/faiss/faiss/utils/distances.h +36 -3
data/vendor/faiss/faiss/utils/distances_simd.cpp +50 -0
data/vendor/faiss/faiss/utils/utils.h +1 -1
metadata +12 -5
data/vendor/faiss/faiss/IndexResidual.cpp +0 -291

data/vendor/faiss/faiss/VectorTransform.cpp CHANGED Viewed

@@ -357,6 +357,7 @@ PCAMatrix::PCAMatrix(
     is_trained = false;
     max_points_per_d = 1000;
     balanced_bins = 0;
+    epsilon = 0;
 }
 namespace {
@@ -620,7 +621,7 @@ void PCAMatrix::prepare_Ab() {
         if (eigen_power != 0) {
             float* ai = A.data();
             for (int i = 0; i < d_out; i++) {
-                float factor = pow(eigenvalues[i], eigen_power);
+                float factor = pow(eigenvalues[i] + epsilon, eigen_power);
                 for (int j = 0; j < d_in; j++)
                     *ai++ *= factor;
             }

data/vendor/faiss/faiss/VectorTransform.h CHANGED Viewed

@@ -129,6 +129,9 @@ struct PCAMatrix : LinearTransform {
      */
     float eigen_power;
+    /// value added to eigenvalues to avoid division by 0 when whitening
+    float epsilon;
     /// random rotation after PCA
     bool random_rotation;

data/vendor/faiss/faiss/clone_index.cpp CHANGED Viewed

@@ -15,6 +15,7 @@
 #include <faiss/impl/FaissAssert.h>
 #include <faiss/Index2Layer.h>
+#include <faiss/IndexAdditiveQuantizer.h>
 #include <faiss/IndexFlat.h>
 #include <faiss/IndexHNSW.h>
 #include <faiss/IndexIVF.h>
@@ -27,7 +28,6 @@
 #include <faiss/IndexNSG.h>
 #include <faiss/IndexPQ.h>
 #include <faiss/IndexPreTransform.h>
-#include <faiss/IndexResidual.h>
 #include <faiss/IndexScalarQuantizer.h>
 #include <faiss/MetaIndexes.h>
 #include <faiss/VectorTransform.h>
@@ -80,9 +80,10 @@ Index* Cloner::clone_Index(const Index* index) {
     TRYCLONE(IndexFlatIP, index)
     TRYCLONE(IndexFlat, index)
     TRYCLONE(IndexLattice, index)
-    TRYCLONE(IndexResidual, index)
+    TRYCLONE(IndexResidualQuantizer, index)
     TRYCLONE(IndexScalarQuantizer, index)
     TRYCLONE(MultiIndexQuantizer, index)
+    TRYCLONE(ResidualCoarseQuantizer, index)
     if (const IndexIVF* ivf = dynamic_cast<const IndexIVF*>(index)) {
         IndexIVF* res = clone_IndexIVF(ivf);
         if (ivf->invlists == nullptr) {

data/vendor/faiss/faiss/gpu/GpuCloner.cpp CHANGED Viewed

@@ -40,7 +40,7 @@ void ToCPUCloner::merge_index(Index* dst, Index* src, bool successive_ids) {
         auto ifl2 = dynamic_cast<const IndexFlat*>(src);
         FAISS_ASSERT(ifl2);
         FAISS_ASSERT(successive_ids);
-        ifl->add(ifl2->ntotal, ifl2->xb.data());
+        ifl->add(ifl2->ntotal, ifl2->get_xb());
     } else if (auto ifl = dynamic_cast<IndexIVFFlat*>(dst)) {
         auto ifl2 = dynamic_cast<IndexIVFFlat*>(src);
         FAISS_ASSERT(ifl2);
@@ -329,7 +329,7 @@ Index* ToGpuClonerMultiple::clone_Index_to_shards(const Index* index) {
             if (index->ntotal > 0) {
                 long i0 = index->ntotal * i / n;
                 long i1 = index->ntotal * (i + 1) / n;
-                shards[i]->add(i1 - i0, index_flat->xb.data() + i0 * index->d);
+                shards[i]->add(i1 - i0, index_flat->get_xb() + i0 * index->d);
             }
         }
     }

data/vendor/faiss/faiss/gpu/GpuIcmEncoder.h ADDED Viewed

@@ -0,0 +1,60 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+#pragma once
+#include <faiss/impl/LocalSearchQuantizer.h>
+#include <memory>
+namespace faiss {
+namespace gpu {
+class GpuResourcesProvider;
+struct IcmEncoderShards;
+/** Perform LSQ encoding on GPU.
+ *
+ * Split input vectors to different devices and call IcmEncoderImpl::encode
+ * to encode them
+ */
+class GpuIcmEncoder : public lsq::IcmEncoder {
+   public:
+    GpuIcmEncoder(
+            const LocalSearchQuantizer* lsq,
+            const std::vector<GpuResourcesProvider*>& provs,
+            const std::vector<int>& devices);
+    ~GpuIcmEncoder();
+    GpuIcmEncoder(const GpuIcmEncoder&) = delete;
+    GpuIcmEncoder& operator=(const GpuIcmEncoder&) = delete;
+    void set_binary_term() override;
+    void encode(
+            int32_t* codes,
+            const float* x,
+            std::mt19937& gen,
+            size_t n,
+            size_t ils_iters) const override;
+   private:
+    std::unique_ptr<IcmEncoderShards> shards;
+};
+struct GpuIcmEncoderFactory : public lsq::IcmEncoderFactory {
+    explicit GpuIcmEncoderFactory(int ngpus = 1);
+    lsq::IcmEncoder* get(const LocalSearchQuantizer* lsq) override;
+    std::vector<GpuResourcesProvider*> provs;
+    std::vector<int> devices;
+};
+} // namespace gpu
+} // namespace faiss

data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp CHANGED Viewed

@@ -8,7 +8,6 @@
 // -*- c++ -*-
 #include <faiss/impl/AdditiveQuantizer.h>
-#include <faiss/impl/FaissAssert.h>
 #include <cstddef>
 #include <cstdio>
@@ -18,9 +17,10 @@
 #include <algorithm>
+#include <faiss/impl/FaissAssert.h>
 #include <faiss/utils/Heap.h>
 #include <faiss/utils/distances.h>
-#include <faiss/utils/hamming.h> // BitstringWriter
+#include <faiss/utils/hamming.h>
 #include <faiss/utils/utils.h>
 extern "C" {
@@ -42,51 +42,125 @@ int sgemm_(
         FINTEGER* ldc);
 }
-namespace {
-// c and a and b can overlap
-void fvec_add(size_t d, const float* a, const float* b, float* c) {
-    for (size_t i = 0; i < d; i++) {
-        c[i] = a[i] + b[i];
-    }
-}
+namespace faiss {
-void fvec_add(size_t d, const float* a, float b, float* c) {
-    for (size_t i = 0; i < d; i++) {
-        c[i] = a[i] + b;
-    }
+AdditiveQuantizer::AdditiveQuantizer(
+        size_t d,
+        const std::vector<size_t>& nbits,
+        Search_type_t search_type)
+        : d(d),
+          M(nbits.size()),
+          nbits(nbits),
+          verbose(false),
+          is_trained(false),
+          search_type(search_type) {
+    norm_max = norm_min = NAN;
+    code_size = 0;
+    tot_bits = 0;
+    total_codebook_size = 0;
+    only_8bit = false;
+    set_derived_values();
 }
-} // namespace
-namespace faiss {
+AdditiveQuantizer::AdditiveQuantizer()
+        : AdditiveQuantizer(0, std::vector<size_t>()) {}
 void AdditiveQuantizer::set_derived_values() {
     tot_bits = 0;
-    is_byte_aligned = true;
+    only_8bit = true;
     codebook_offsets.resize(M + 1, 0);
     for (int i = 0; i < M; i++) {
         int nbit = nbits[i];
         size_t k = 1 << nbit;
         codebook_offsets[i + 1] = codebook_offsets[i] + k;
         tot_bits += nbit;
-        if (nbit % 8 != 0) {
-            is_byte_aligned = false;
+        if (nbit != 0) {
+            only_8bit = false;
         }
     }
     total_codebook_size = codebook_offsets[M];
+    switch (search_type) {
+        case ST_decompress:
+        case ST_LUT_nonorm:
+        case ST_norm_from_LUT:
+            break; // nothing to add
+        case ST_norm_float:
+            tot_bits += 32;
+            break;
+        case ST_norm_qint8:
+        case ST_norm_cqint8:
+            tot_bits += 8;
+            break;
+        case ST_norm_qint4:
+        case ST_norm_cqint4:
+            tot_bits += 4;
+            break;
+    }
     // convert bits to bytes
     code_size = (tot_bits + 7) / 8;
 }
+namespace {
+// TODO
+// https://stackoverflow.com/questions/31631224/hacks-for-clamping-integer-to-0-255-and-doubles-to-0-0-1-0
+uint8_t encode_qint8(float x, float amin, float amax) {
+    float x1 = (x - amin) / (amax - amin) * 256;
+    int32_t xi = int32_t(floor(x1));
+    return xi < 0 ? 0 : xi > 255 ? 255 : xi;
+}
+uint8_t encode_qint4(float x, float amin, float amax) {
+    float x1 = (x - amin) / (amax - amin) * 16;
+    int32_t xi = int32_t(floor(x1));
+    return xi < 0 ? 0 : xi > 15 ? 15 : xi;
+}
+float decode_qint8(uint8_t i, float amin, float amax) {
+    return (i + 0.5) / 256 * (amax - amin) + amin;
+}
+float decode_qint4(uint8_t i, float amin, float amax) {
+    return (i + 0.5) / 16 * (amax - amin) + amin;
+}
+} // anonymous namespace
+uint32_t AdditiveQuantizer::encode_qcint(float x) const {
+    idx_t id;
+    qnorm.assign(idx_t(1), &x, &id, idx_t(1));
+    return uint32_t(id);
+}
+float AdditiveQuantizer::decode_qcint(uint32_t c) const {
+    return qnorm.get_xb()[c];
+}
 void AdditiveQuantizer::pack_codes(
         size_t n,
         const int32_t* codes,
         uint8_t* packed_codes,
-        int64_t ld_codes) const {
+        int64_t ld_codes,
+        const float* norms) const {
     if (ld_codes == -1) {
         ld_codes = M;
     }
+    std::vector<float> norm_buf;
+    if (search_type == ST_norm_float || search_type == ST_norm_qint4 ||
+        search_type == ST_norm_qint8 || search_type == ST_norm_cqint8 ||
+        search_type == ST_norm_cqint4) {
+        if (!norms) {
+            norm_buf.resize(n);
+            std::vector<float> x_recons(n * d);
+            decode_unpacked(codes, x_recons.data(), n, ld_codes);
+            fvec_norms_L2sqr(norm_buf.data(), x_recons.data(), d, n);
+            norms = norm_buf.data();
+        }
+    }
 #pragma omp parallel for if (n > 1000)
     for (int64_t i = 0; i < n; i++) {
         const int32_t* codes1 = codes + i * ld_codes;
@@ -94,6 +168,35 @@ void AdditiveQuantizer::pack_codes(
         for (int m = 0; m < M; m++) {
             bsw.write(codes1[m], nbits[m]);
         }
+        switch (search_type) {
+            case ST_decompress:
+            case ST_LUT_nonorm:
+            case ST_norm_from_LUT:
+                break;
+            case ST_norm_float:
+                bsw.write(*(uint32_t*)&norms[i], 32);
+                break;
+            case ST_norm_qint8: {
+                uint8_t b = encode_qint8(norms[i], norm_min, norm_max);
+                bsw.write(b, 8);
+                break;
+            }
+            case ST_norm_qint4: {
+                uint8_t b = encode_qint4(norms[i], norm_min, norm_max);
+                bsw.write(b, 4);
+                break;
+            }
+            case ST_norm_cqint8: {
+                uint32_t b = encode_qcint(norms[i]);
+                bsw.write(b, 8);
+                break;
+            }
+            case ST_norm_cqint4: {
+                uint32_t b = encode_qcint(norms[i]);
+                bsw.write(b, 4);
+                break;
+            }
+        }
     }
 }
@@ -118,10 +221,39 @@ void AdditiveQuantizer::decode(const uint8_t* code, float* x, size_t n) const {
     }
 }
+void AdditiveQuantizer::decode_unpacked(
+        const int32_t* code,
+        float* x,
+        size_t n,
+        int64_t ld_codes) const {
+    FAISS_THROW_IF_NOT_MSG(
+            is_trained, "The additive quantizer is not trained yet.");
+    if (ld_codes == -1) {
+        ld_codes = M;
+    }
+    // standard additive quantizer decoding
+#pragma omp parallel for if (n > 1000)
+    for (int64_t i = 0; i < n; i++) {
+        const int32_t* codesi = code + i * ld_codes;
+        float* xi = x + i * d;
+        for (int m = 0; m < M; m++) {
+            int idx = codesi[m];
+            const float* c = codebooks.data() + d * (codebook_offsets[m] + idx);
+            if (m == 0) {
+                memcpy(xi, c, sizeof(*x) * d);
+            } else {
+                fvec_add(d, xi, c, xi);
+            }
+        }
+    }
+}
 AdditiveQuantizer::~AdditiveQuantizer() {}
 /****************************************************************************
- * Support for fast distance computations and search with additive quantizer
+ * Support for fast distance computations in centroids
  ****************************************************************************/
 void AdditiveQuantizer::compute_centroid_norms(float* norms) const {
@@ -201,7 +333,7 @@ void compute_inner_prod_with_LUT(
 } // anonymous namespace
-void AdditiveQuantizer::knn_exact_inner_product(
+void AdditiveQuantizer::knn_centroids_inner_product(
         idx_t n,
         const float* xq,
         idx_t k,
@@ -227,7 +359,7 @@ void AdditiveQuantizer::knn_exact_inner_product(
     }
 }
-void AdditiveQuantizer::knn_exact_L2(
+void AdditiveQuantizer::knn_centroids_L2(
         idx_t n,
         const float* xq,
         idx_t k,
@@ -267,4 +399,105 @@ void AdditiveQuantizer::knn_exact_L2(
     }
 }
+/****************************************************************************
+ * Support for fast distance computations in codes
+ ****************************************************************************/
+namespace {
+float accumulate_IPs(
+        const AdditiveQuantizer& aq,
+        BitstringReader& bs,
+        const uint8_t* codes,
+        const float* LUT) {
+    float accu = 0;
+    for (int m = 0; m < aq.M; m++) {
+        size_t nbit = aq.nbits[m];
+        int idx = bs.read(nbit);
+        accu += LUT[idx];
+        LUT += (uint64_t)1 << nbit;
+    }
+    return accu;
+}
+} // anonymous namespace
+template <>
+float AdditiveQuantizer::
+        compute_1_distance_LUT<true, AdditiveQuantizer::ST_LUT_nonorm>(
+                const uint8_t* codes,
+                const float* LUT) const {
+    BitstringReader bs(codes, code_size);
+    return accumulate_IPs(*this, bs, codes, LUT);
+}
+template <>
+float AdditiveQuantizer::
+        compute_1_distance_LUT<false, AdditiveQuantizer::ST_LUT_nonorm>(
+                const uint8_t* codes,
+                const float* LUT) const {
+    BitstringReader bs(codes, code_size);
+    return -accumulate_IPs(*this, bs, codes, LUT);
+}
+template <>
+float AdditiveQuantizer::
+        compute_1_distance_LUT<false, AdditiveQuantizer::ST_norm_float>(
+                const uint8_t* codes,
+                const float* LUT) const {
+    BitstringReader bs(codes, code_size);
+    float accu = accumulate_IPs(*this, bs, codes, LUT);
+    uint32_t norm_i = bs.read(32);
+    float norm2 = *(float*)&norm_i;
+    return norm2 - 2 * accu;
+}
+template <>
+float AdditiveQuantizer::
+        compute_1_distance_LUT<false, AdditiveQuantizer::ST_norm_cqint8>(
+                const uint8_t* codes,
+                const float* LUT) const {
+    BitstringReader bs(codes, code_size);
+    float accu = accumulate_IPs(*this, bs, codes, LUT);
+    uint32_t norm_i = bs.read(8);
+    float norm2 = decode_qcint(norm_i);
+    return norm2 - 2 * accu;
+}
+template <>
+float AdditiveQuantizer::
+        compute_1_distance_LUT<false, AdditiveQuantizer::ST_norm_cqint4>(
+                const uint8_t* codes,
+                const float* LUT) const {
+    BitstringReader bs(codes, code_size);
+    float accu = accumulate_IPs(*this, bs, codes, LUT);
+    uint32_t norm_i = bs.read(4);
+    float norm2 = decode_qcint(norm_i);
+    return norm2 - 2 * accu;
+}
+template <>
+float AdditiveQuantizer::
+        compute_1_distance_LUT<false, AdditiveQuantizer::ST_norm_qint8>(
+                const uint8_t* codes,
+                const float* LUT) const {
+    BitstringReader bs(codes, code_size);
+    float accu = accumulate_IPs(*this, bs, codes, LUT);
+    uint32_t norm_i = bs.read(8);
+    float norm2 = decode_qint8(norm_i, norm_min, norm_max);
+    return norm2 - 2 * accu;
+}
+template <>
+float AdditiveQuantizer::
+        compute_1_distance_LUT<false, AdditiveQuantizer::ST_norm_qint4>(
+                const uint8_t* codes,
+                const float* LUT) const {
+    BitstringReader bs(codes, code_size);
+    float accu = accumulate_IPs(*this, bs, codes, LUT);
+    uint32_t norm_i = bs.read(4);
+    float norm2 = decode_qint4(norm_i, norm_min, norm_max);
+    return norm2 - 2 * accu;
+}
 } // namespace faiss

data/vendor/faiss/faiss/impl/AdditiveQuantizer.h CHANGED Viewed

@@ -11,6 +11,7 @@
 #include <vector>
 #include <faiss/Index.h>
+#include <faiss/IndexFlat.h>
 namespace faiss {
@@ -27,15 +28,44 @@ struct AdditiveQuantizer {
     std::vector<float> codebooks; ///< codebooks
     // derived values
-    std::vector<size_t> codebook_offsets;
+    std::vector<uint64_t> codebook_offsets;
     size_t code_size;           ///< code size in bytes
     size_t tot_bits;            ///< total number of bits
     size_t total_codebook_size; ///< size of the codebook in vectors
-    bool is_byte_aligned;
+    bool only_8bit;             ///< are all nbits = 8 (use faster decoder)
     bool verbose;    ///< verbose during training?
     bool is_trained; ///< is trained or not
+    IndexFlat1D qnorm; ///< store and search norms
+    uint32_t encode_qcint(
+            float x) const; ///< encode norm by non-uniform scalar quantization
+    float decode_qcint(uint32_t c)
+            const; ///< decode norm by non-uniform scalar quantization
+    /// Encodes how search is performed and how vectors are encoded
+    enum Search_type_t {
+        ST_decompress,    ///< decompress database vector
+        ST_LUT_nonorm,    ///< use a LUT, don't include norms (OK for IP or
+                          ///< normalized vectors)
+        ST_norm_from_LUT, ///< compute the norms from the look-up tables (cost
+                          ///< is in O(M^2))
+        ST_norm_float, ///< use a LUT, and store float32 norm with the vectors
+        ST_norm_qint8, ///< use a LUT, and store 8bit-quantized norm
+        ST_norm_qint4,
+        ST_norm_cqint8, ///< use a LUT, and store non-uniform quantized norm
+        ST_norm_cqint4,
+    };
+    AdditiveQuantizer(
+            size_t d,
+            const std::vector<size_t>& nbits,
+            Search_type_t search_type = ST_decompress);
+    AdditiveQuantizer();
     ///< compute derived values when d, M and nbits have been set
     void set_derived_values();
@@ -52,15 +82,18 @@ struct AdditiveQuantizer {
     /** pack a series of code to bit-compact format
      *
-     * @param codes  codes to be packed, size n * code_size
+     * @param codes        codes to be packed, size n * code_size
      * @param packed_codes output bit-compact codes
-     * @param ld_codes  leading dimension of codes
+     * @param ld_codes     leading dimension of codes
+     * @param norms        norms of the vectors (size n). Will be computed if
+     *                     needed but not provided
      */
     void pack_codes(
             size_t n,
             const int32_t* codes,
             uint8_t* packed_codes,
-            int64_t ld_codes = -1) const;
+            int64_t ld_codes = -1,
+            const float* norms = nullptr) const;
     /** Decode a set of vectors
      *
@@ -69,9 +102,36 @@ struct AdditiveQuantizer {
      */
     void decode(const uint8_t* codes, float* x, size_t n) const;
+    /** Decode a set of vectors in non-packed format
+     *
+     * @param codes  codes to decode, size n * ld_codes
+     * @param x      output vectors, size n * d
+     */
+    void decode_unpacked(
+            const int32_t* codes,
+            float* x,
+            size_t n,
+            int64_t ld_codes = -1) const;
+    /****************************************************************************
+     * Search functions in an external set of codes.
+     ****************************************************************************/
+    /// Also determines what's in the codes
+    Search_type_t search_type;
+    /// min/max for quantization of norms
+    float norm_min, norm_max;
+    template <bool is_IP, Search_type_t effective_search_type>
+    float compute_1_distance_LUT(const uint8_t* codes, const float* LUT) const;
+    /*
+        float compute_1_L2sqr(const uint8_t* codes, const float* LUT);
+    */
     /****************************************************************************
-     * Support for exhaustive distance computations with the centroids.
-     * Hence, the number of elements that can be enumerated is not too large.
+     * Support for exhaustive distance computations with all the centroids.
+     * Hence, the number of these centroids should not be too large.
      ****************************************************************************/
     using idx_t = Index::idx_t;
@@ -87,7 +147,7 @@ struct AdditiveQuantizer {
     void compute_LUT(size_t n, const float* xq, float* LUT) const;
     /// exact IP search
-    void knn_exact_inner_product(
+    void knn_centroids_inner_product(
             idx_t n,
             const float* xq,
             idx_t k,
@@ -101,7 +161,7 @@ struct AdditiveQuantizer {
     void compute_centroid_norms(float* norms) const;
     /** Exact L2 search, with precomputed norms */
-    void knn_exact_L2(
+    void knn_centroids_L2(
             idx_t n,
             const float* xq,
             idx_t k,

data/vendor/faiss/faiss/impl/HNSW.cpp CHANGED Viewed

@@ -434,17 +434,22 @@ void HNSW::add_links_starting_from(
     ::faiss::shrink_neighbor_list(ptdis, link_targets, M);
+    std::vector<storage_idx_t> neighbors;
+    neighbors.reserve(link_targets.size());
     while (!link_targets.empty()) {
-        int other_id = link_targets.top().id;
+        storage_idx_t other_id = link_targets.top().id;
+        add_link(*this, ptdis, pt_id, other_id, level);
+        neighbors.push_back(other_id);
+        link_targets.pop();
+    }
+    omp_unset_lock(&locks[pt_id]);
+    for (storage_idx_t other_id : neighbors) {
         omp_set_lock(&locks[other_id]);
         add_link(*this, ptdis, other_id, pt_id, level);
         omp_unset_lock(&locks[other_id]);
-        add_link(*this, ptdis, pt_id, other_id, level);
-        link_targets.pop();
     }
+    omp_set_lock(&locks[pt_id]);
 }
 /**************************************************************