RubyGems - faiss - Versions diffs - 0.2.6 → 0.2.7 - Mend

faiss 0.2.6 → 0.2.7

Files changed (189) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +4 -0
data/ext/faiss/extconf.rb +1 -1
data/lib/faiss/version.rb +1 -1
data/lib/faiss.rb +2 -2
data/vendor/faiss/faiss/AutoTune.cpp +15 -4
data/vendor/faiss/faiss/AutoTune.h +0 -1
data/vendor/faiss/faiss/Clustering.cpp +1 -5
data/vendor/faiss/faiss/Clustering.h +0 -2
data/vendor/faiss/faiss/IVFlib.h +0 -2
data/vendor/faiss/faiss/Index.h +1 -2
data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +17 -3
data/vendor/faiss/faiss/IndexAdditiveQuantizer.h +10 -1
data/vendor/faiss/faiss/IndexBinary.h +0 -1
data/vendor/faiss/faiss/IndexBinaryFlat.cpp +2 -1
data/vendor/faiss/faiss/IndexBinaryFlat.h +4 -0
data/vendor/faiss/faiss/IndexBinaryHash.cpp +1 -3
data/vendor/faiss/faiss/IndexBinaryIVF.cpp +273 -48
data/vendor/faiss/faiss/IndexBinaryIVF.h +18 -11
data/vendor/faiss/faiss/IndexFastScan.cpp +13 -10
data/vendor/faiss/faiss/IndexFastScan.h +5 -1
data/vendor/faiss/faiss/IndexFlat.cpp +16 -3
data/vendor/faiss/faiss/IndexFlat.h +1 -1
data/vendor/faiss/faiss/IndexFlatCodes.cpp +5 -0
data/vendor/faiss/faiss/IndexFlatCodes.h +7 -2
data/vendor/faiss/faiss/IndexHNSW.cpp +3 -6
data/vendor/faiss/faiss/IndexHNSW.h +0 -1
data/vendor/faiss/faiss/IndexIDMap.cpp +4 -4
data/vendor/faiss/faiss/IndexIDMap.h +0 -2
data/vendor/faiss/faiss/IndexIVF.cpp +155 -129
data/vendor/faiss/faiss/IndexIVF.h +121 -61
data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +2 -2
data/vendor/faiss/faiss/IndexIVFFastScan.cpp +12 -11
data/vendor/faiss/faiss/IndexIVFFastScan.h +6 -1
data/vendor/faiss/faiss/IndexIVFPQ.cpp +221 -165
data/vendor/faiss/faiss/IndexIVFPQ.h +1 -0
data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +6 -1
data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +0 -2
data/vendor/faiss/faiss/IndexNNDescent.cpp +1 -2
data/vendor/faiss/faiss/IndexNNDescent.h +0 -1
data/vendor/faiss/faiss/IndexNSG.cpp +1 -2
data/vendor/faiss/faiss/IndexPQ.cpp +7 -9
data/vendor/faiss/faiss/IndexRefine.cpp +1 -1
data/vendor/faiss/faiss/IndexReplicas.cpp +3 -4
data/vendor/faiss/faiss/IndexReplicas.h +0 -1
data/vendor/faiss/faiss/IndexRowwiseMinMax.cpp +8 -1
data/vendor/faiss/faiss/IndexRowwiseMinMax.h +7 -0
data/vendor/faiss/faiss/IndexShards.cpp +26 -109
data/vendor/faiss/faiss/IndexShards.h +2 -3
data/vendor/faiss/faiss/IndexShardsIVF.cpp +246 -0
data/vendor/faiss/faiss/IndexShardsIVF.h +42 -0
data/vendor/faiss/faiss/MetaIndexes.cpp +86 -0
data/vendor/faiss/faiss/MetaIndexes.h +29 -0
data/vendor/faiss/faiss/MetricType.h +14 -0
data/vendor/faiss/faiss/VectorTransform.cpp +8 -10
data/vendor/faiss/faiss/VectorTransform.h +1 -3
data/vendor/faiss/faiss/clone_index.cpp +232 -18
data/vendor/faiss/faiss/cppcontrib/SaDecodeKernels.h +25 -3
data/vendor/faiss/faiss/cppcontrib/detail/CoarseBitType.h +7 -0
data/vendor/faiss/faiss/cppcontrib/detail/UintReader.h +78 -0
data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-avx2-inl.h +20 -6
data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +7 -1
data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-neon-inl.h +21 -7
data/vendor/faiss/faiss/cppcontrib/sa_decode/MinMax-inl.h +7 -0
data/vendor/faiss/faiss/cppcontrib/sa_decode/MinMaxFP16-inl.h +7 -0
data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-avx2-inl.h +10 -3
data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-inl.h +7 -1
data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-neon-inl.h +11 -3
data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +25 -2
data/vendor/faiss/faiss/gpu/GpuCloner.cpp +76 -29
data/vendor/faiss/faiss/gpu/GpuCloner.h +2 -2
data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +14 -13
data/vendor/faiss/faiss/gpu/GpuDistance.h +18 -6
data/vendor/faiss/faiss/gpu/GpuIndex.h +23 -21
data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +10 -10
data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +11 -12
data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +29 -50
data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +3 -3
data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +8 -8
data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +4 -4
data/vendor/faiss/faiss/gpu/impl/IndexUtils.h +2 -5
data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +9 -7
data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +4 -4
data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +2 -2
data/vendor/faiss/faiss/gpu/perf/IndexWrapper.h +1 -1
data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +55 -6
data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +20 -6
data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +95 -25
data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +67 -16
data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +4 -4
data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +7 -7
data/vendor/faiss/faiss/gpu/test/TestUtils.h +4 -4
data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +1 -1
data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +6 -0
data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +0 -7
data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +9 -9
data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +1 -1
data/vendor/faiss/faiss/impl/AuxIndexStructures.h +2 -7
data/vendor/faiss/faiss/impl/CodePacker.cpp +67 -0
data/vendor/faiss/faiss/impl/CodePacker.h +71 -0
data/vendor/faiss/faiss/impl/DistanceComputer.h +0 -2
data/vendor/faiss/faiss/impl/HNSW.cpp +3 -7
data/vendor/faiss/faiss/impl/HNSW.h +6 -9
data/vendor/faiss/faiss/impl/IDSelector.cpp +1 -1
data/vendor/faiss/faiss/impl/IDSelector.h +39 -1
data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +62 -51
data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +11 -12
data/vendor/faiss/faiss/impl/NNDescent.cpp +3 -9
data/vendor/faiss/faiss/impl/NNDescent.h +10 -10
data/vendor/faiss/faiss/impl/NSG.cpp +1 -6
data/vendor/faiss/faiss/impl/NSG.h +4 -7
data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +1 -15
data/vendor/faiss/faiss/impl/PolysemousTraining.h +11 -10
data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.cpp +0 -7
data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +25 -12
data/vendor/faiss/faiss/impl/ProductQuantizer.h +2 -4
data/vendor/faiss/faiss/impl/Quantizer.h +6 -3
data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +796 -174
data/vendor/faiss/faiss/impl/ResidualQuantizer.h +16 -8
data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +3 -5
data/vendor/faiss/faiss/impl/ScalarQuantizer.h +4 -4
data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +3 -3
data/vendor/faiss/faiss/impl/ThreadedIndex.h +4 -4
data/vendor/faiss/faiss/impl/code_distance/code_distance-avx2.h +291 -0
data/vendor/faiss/faiss/impl/code_distance/code_distance-generic.h +74 -0
data/vendor/faiss/faiss/impl/code_distance/code_distance.h +123 -0
data/vendor/faiss/faiss/impl/code_distance/code_distance_avx512.h +102 -0
data/vendor/faiss/faiss/impl/index_read.cpp +13 -10
data/vendor/faiss/faiss/impl/index_write.cpp +3 -4
data/vendor/faiss/faiss/impl/kmeans1d.cpp +0 -1
data/vendor/faiss/faiss/impl/kmeans1d.h +3 -3
data/vendor/faiss/faiss/impl/lattice_Zn.cpp +1 -1
data/vendor/faiss/faiss/impl/platform_macros.h +61 -0
data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +48 -4
data/vendor/faiss/faiss/impl/pq4_fast_scan.h +18 -4
data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +2 -2
data/vendor/faiss/faiss/index_factory.cpp +8 -10
data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +29 -12
data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +8 -2
data/vendor/faiss/faiss/invlists/DirectMap.cpp +1 -1
data/vendor/faiss/faiss/invlists/DirectMap.h +2 -4
data/vendor/faiss/faiss/invlists/InvertedLists.cpp +118 -18
data/vendor/faiss/faiss/invlists/InvertedLists.h +44 -4
data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +3 -3
data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +1 -1
data/vendor/faiss/faiss/python/python_callbacks.cpp +1 -1
data/vendor/faiss/faiss/python/python_callbacks.h +1 -1
data/vendor/faiss/faiss/utils/AlignedTable.h +3 -1
data/vendor/faiss/faiss/utils/Heap.cpp +139 -3
data/vendor/faiss/faiss/utils/Heap.h +35 -1
data/vendor/faiss/faiss/utils/approx_topk/approx_topk.h +84 -0
data/vendor/faiss/faiss/utils/approx_topk/avx2-inl.h +196 -0
data/vendor/faiss/faiss/utils/approx_topk/generic.h +138 -0
data/vendor/faiss/faiss/utils/approx_topk/mode.h +34 -0
data/vendor/faiss/faiss/utils/approx_topk_hamming/approx_topk_hamming.h +367 -0
data/vendor/faiss/faiss/utils/distances.cpp +61 -7
data/vendor/faiss/faiss/utils/distances.h +11 -0
data/vendor/faiss/faiss/utils/distances_fused/avx512.cpp +346 -0
data/vendor/faiss/faiss/utils/distances_fused/avx512.h +36 -0
data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +42 -0
data/vendor/faiss/faiss/utils/distances_fused/distances_fused.h +40 -0
data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +352 -0
data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.h +32 -0
data/vendor/faiss/faiss/utils/distances_simd.cpp +515 -327
data/vendor/faiss/faiss/utils/extra_distances-inl.h +17 -1
data/vendor/faiss/faiss/utils/extra_distances.cpp +37 -8
data/vendor/faiss/faiss/utils/extra_distances.h +2 -1
data/vendor/faiss/faiss/utils/fp16-fp16c.h +7 -0
data/vendor/faiss/faiss/utils/fp16-inl.h +7 -0
data/vendor/faiss/faiss/utils/fp16.h +7 -0
data/vendor/faiss/faiss/utils/hamming-inl.h +0 -456
data/vendor/faiss/faiss/utils/hamming.cpp +104 -120
data/vendor/faiss/faiss/utils/hamming.h +21 -10
data/vendor/faiss/faiss/utils/hamming_distance/avx2-inl.h +535 -0
data/vendor/faiss/faiss/utils/hamming_distance/common.h +48 -0
data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +519 -0
data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +26 -0
data/vendor/faiss/faiss/utils/hamming_distance/neon-inl.h +614 -0
data/vendor/faiss/faiss/utils/partitioning.cpp +21 -25
data/vendor/faiss/faiss/utils/simdlib_avx2.h +344 -3
data/vendor/faiss/faiss/utils/simdlib_emulated.h +390 -0
data/vendor/faiss/faiss/utils/simdlib_neon.h +655 -130
data/vendor/faiss/faiss/utils/sorting.cpp +692 -0
data/vendor/faiss/faiss/utils/sorting.h +71 -0
data/vendor/faiss/faiss/utils/transpose/transpose-avx2-inl.h +165 -0
data/vendor/faiss/faiss/utils/utils.cpp +4 -176
data/vendor/faiss/faiss/utils/utils.h +2 -9
metadata +29 -3
data/vendor/faiss/faiss/gpu/GpuClonerOptions.cpp +0 -26

data/vendor/faiss/faiss/impl/code_distance/code_distance_avx512.h ADDED Viewed

@@ -0,0 +1,102 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+// // // AVX-512 version. It is not used, but let it be for the future
+// // // needs.
+// // template <class SearchResultType, typename T = PQDecoder>
+// // typename std::enable_if<(std::is_same<T, PQDecoder8>::value), void>::
+// //         type distance_four_codes(
+// //     const uint8_t* __restrict code0,
+// //     const uint8_t* __restrict code1,
+// //     const uint8_t* __restrict code2,
+// //     const uint8_t* __restrict code3,
+// //     float& result0,
+// //     float& result1,
+// //     float& result2,
+// //     float& result3
+// // ) const {
+// //     result0 = 0;
+// //     result1 = 0;
+// //     result2 = 0;
+// //     result3 = 0;
+// //     size_t m = 0;
+// //     const size_t pqM16 = pq.M / 16;
+// //     constexpr intptr_t N = 4;
+// //     const float* tab = sim_table;
+// //     if (pqM16 > 0) {
+// //         // process 16 values per loop
+// //         const __m512i ksub = _mm512_set1_epi32(pq.ksub);
+// //         __m512i offsets_0 = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7,
+// //              8, 9, 10, 11, 12, 13, 14, 15);
+// //         offsets_0 = _mm512_mullo_epi32(offsets_0, ksub);
+// //         // accumulators of partial sums
+// //         __m512 partialSums[N];
+// //         for (intptr_t j = 0; j < N; j++) {
+// //             partialSums[j] = _mm512_setzero_ps();
+// //         }
+// //         // loop
+// //         for (m = 0; m < pqM16 * 16; m += 16) {
+// //             // load 16 uint8 values
+// //             __m128i mm1[N];
+// //             mm1[0] = _mm_loadu_si128((const __m128i_u*)(code0 + m));
+// //             mm1[1] = _mm_loadu_si128((const __m128i_u*)(code1 + m));
+// //             mm1[2] = _mm_loadu_si128((const __m128i_u*)(code2 + m));
+// //             mm1[3] = _mm_loadu_si128((const __m128i_u*)(code3 + m));
+// //             // process first 8 codes
+// //             for (intptr_t j = 0; j < N; j++) {
+// //                 // convert uint8 values (low part of __m128i) to int32
+// //                 // values
+// //                 const __m512i idx1 = _mm512_cvtepu8_epi32(mm1[j]);
+// //                 // add offsets
+// //                 const __m512i indices_to_read_from =
+// //                     _mm512_add_epi32(idx1, offsets_0);
+// //                 // gather 8 values, similar to 8 operations of
+// // //                    tab[idx]
+// //                 __m512 collected =
+// //                        _mm512_i32gather_ps(
+// //                             indices_to_read_from, tab, sizeof(float));
+// //                 // collect partial sums
+// //                 partialSums[j] = _mm512_add_ps(partialSums[j],
+// //                    collected);
+// //             }
+// //             tab += pq.ksub * 16;
+// //         }
+// //         // horizontal sum for partialSum
+// //         result0 += _mm512_reduce_add_ps(partialSums[0]);
+// //         result1 += _mm512_reduce_add_ps(partialSums[1]);
+// //         result2 += _mm512_reduce_add_ps(partialSums[2]);
+// //         result3 += _mm512_reduce_add_ps(partialSums[3]);
+// //     }
+// //     //
+// //     if (m < pq.M) {
+// //         // process leftovers
+// //         PQDecoder decoder0(code0 + m, pq.nbits);
+// //         PQDecoder decoder1(code1 + m, pq.nbits);
+// //         PQDecoder decoder2(code2 + m, pq.nbits);
+// //         PQDecoder decoder3(code3 + m, pq.nbits);
+// //         for (; m < pq.M; m++) {
+// //             result0 += tab[decoder0.decode()];
+// //             result1 += tab[decoder1.decode()];
+// //             result2 += tab[decoder2.decode()];
+// //             result3 += tab[decoder3.decode()];
+// //             tab += pq.ksub;
+// //         }
+// //     }
+// // }

data/vendor/faiss/faiss/impl/index_read.cpp CHANGED Viewed

@@ -65,7 +65,7 @@ namespace faiss {
 static void read_index_header(Index* idx, IOReader* f) {
     READ1(idx->d);
     READ1(idx->ntotal);
-    Index::idx_t dummy;
+    idx_t dummy;
     READ1(dummy);
     READ1(dummy);
     READ1(idx->is_trained);
@@ -279,6 +279,8 @@ static void read_AdditiveQuantizer(AdditiveQuantizer* aq, IOReader* f) {
         aq->search_type == AdditiveQuantizer::ST_norm_lsq2x4 ||
         aq->search_type == AdditiveQuantizer::ST_norm_rq2x4) {
         READXBVECTOR(aq->qnorm.codes);
+        aq->qnorm.ntotal = aq->qnorm.codes.size() / 4;
+        aq->qnorm.update_permutation();
     }
     if (aq->search_type == AdditiveQuantizer::ST_norm_lsq2x4 ||
@@ -439,7 +441,6 @@ static void read_direct_map(DirectMap* dm, IOReader* f) {
     dm->type = (DirectMap::Type)maintain_direct_map;
     READVECTOR(dm->array);
     if (dm->type == DirectMap::Hashtable) {
-        using idx_t = Index::idx_t;
         std::vector<std::pair<idx_t, idx_t>> v;
         READVECTOR(v);
         std::unordered_map<idx_t, idx_t>& map = dm->hashtable;
@@ -453,7 +454,7 @@ static void read_direct_map(DirectMap* dm, IOReader* f) {
 static void read_ivf_header(
         IndexIVF* ivf,
         IOReader* f,
-        std::vector<std::vector<Index::idx_t>>* ids = nullptr) {
+        std::vector<std::vector<idx_t>>* ids = nullptr) {
     read_index_header(ivf, f);
     READ1(ivf->nlist);
     READ1(ivf->nprobe);
@@ -470,7 +471,7 @@ static void read_ivf_header(
 // used for legacy formats
 static ArrayInvertedLists* set_array_invlist(
         IndexIVF* ivf,
-        std::vector<std::vector<Index::idx_t>>& ids) {
+        std::vector<std::vector<idx_t>>& ids) {
     ArrayInvertedLists* ail =
             new ArrayInvertedLists(ivf->nlist, ivf->code_size);
     std::swap(ail->ids, ids);
@@ -487,7 +488,7 @@ static IndexIVFPQ* read_ivfpq(IOReader* f, uint32_t h, int io_flags) {
             : nullptr;
     IndexIVFPQ* ivpq = ivfpqr ? ivfpqr : new IndexIVFPQ();
-    std::vector<std::vector<Index::idx_t>> ids;
+    std::vector<std::vector<idx_t>> ids;
     read_ivf_header(ivpq, f, legacy ? &ids : nullptr);
     READ1(ivpq->by_residual);
     READ1(ivpq->code_size);
@@ -728,10 +729,11 @@ Index* read_index(IOReader* f, int io_flags) {
         READ1(ivaqfs->max_train_points);
         read_InvertedLists(ivaqfs, f, io_flags);
+        ivaqfs->init_code_packer();
         idx = ivaqfs;
     } else if (h == fourcc("IvFl") || h == fourcc("IvFL")) { // legacy
         IndexIVFFlat* ivfl = new IndexIVFFlat();
-        std::vector<std::vector<Index::idx_t>> ids;
+        std::vector<std::vector<idx_t>> ids;
         read_ivf_header(ivfl, f, &ids);
         ivfl->code_size = ivfl->d * sizeof(float);
         ArrayInvertedLists* ail = set_array_invlist(ivfl, ids);
@@ -754,10 +756,10 @@ Index* read_index(IOReader* f, int io_flags) {
         read_ivf_header(ivfl, f);
         ivfl->code_size = ivfl->d * sizeof(float);
         {
-            std::vector<Index::idx_t> tab;
+            std::vector<idx_t> tab;
             READVECTOR(tab);
             for (long i = 0; i < tab.size(); i += 2) {
-                std::pair<Index::idx_t, Index::idx_t> pair(tab[i], tab[i + 1]);
+                std::pair<idx_t, idx_t> pair(tab[i], tab[i + 1]);
                 ivfl->instances.insert(pair);
             }
         }
@@ -788,7 +790,7 @@ Index* read_index(IOReader* f, int io_flags) {
         idx = idxl;
     } else if (h == fourcc("IvSQ")) { // legacy
         IndexIVFScalarQuantizer* ivsc = new IndexIVFScalarQuantizer();
-        std::vector<std::vector<Index::idx_t>> ids;
+        std::vector<std::vector<idx_t>> ids;
         read_ivf_header(ivsc, f, &ids);
         read_ScalarQuantizer(&ivsc->sq, f);
         READ1(ivsc->code_size);
@@ -1002,6 +1004,7 @@ Index* read_index(IOReader* f, int io_flags) {
         ivpq->nbits = pq.nbits;
         ivpq->ksub = (1 << pq.nbits);
         ivpq->code_size = pq.code_size;
+        ivpq->init_code_packer();
         idx = ivpq;
     } else if (h == fourcc("IRMf")) {
@@ -1072,7 +1075,7 @@ static void read_index_binary_header(IndexBinary* idx, IOReader* f) {
 static void read_binary_ivf_header(
         IndexBinaryIVF* ivf,
         IOReader* f,
-        std::vector<std::vector<Index::idx_t>>* ids = nullptr) {
+        std::vector<std::vector<idx_t>>* ids = nullptr) {
     read_index_binary_header(ivf, f);
     READ1(ivf->nlist);
     READ1(ivf->nprobe);

data/vendor/faiss/faiss/impl/index_write.cpp CHANGED Viewed

@@ -84,7 +84,7 @@ namespace faiss {
 static void write_index_header(const Index* idx, IOWriter* f) {
     WRITE1(idx->d);
     WRITE1(idx->ntotal);
-    Index::idx_t dummy = 1 << 20;
+    idx_t dummy = 1 << 20;
     WRITE1(dummy);
     WRITE1(dummy);
     WRITE1(idx->is_trained);
@@ -373,7 +373,6 @@ static void write_direct_map(const DirectMap* dm, IOWriter* f) {
     WRITE1(maintain_direct_map);
     WRITEVECTOR(dm->array);
     if (dm->type == DirectMap::Hashtable) {
-        using idx_t = Index::idx_t;
         std::vector<std::pair<idx_t, idx_t>> v;
         const std::unordered_map<idx_t, idx_t>& map = dm->hashtable;
         v.resize(map.size());
@@ -615,7 +614,7 @@ void write_index(const Index* idx, IOWriter* f) {
         WRITE1(h);
         write_ivf_header(ivfl, f);
         {
-            std::vector<Index::idx_t> tab(2 * ivfl->instances.size());
+            std::vector<idx_t> tab(2 * ivfl->instances.size());
             long i = 0;
             for (auto it = ivfl->instances.begin(); it != ivfl->instances.end();
                  ++it) {
@@ -900,7 +899,7 @@ static void write_binary_multi_hash_map(
         size_t ntotal,
         IOWriter* f) {
     int id_bits = 0;
-    while ((ntotal > ((Index::idx_t)1 << id_bits))) {
+    while ((ntotal > ((idx_t)1 << id_bits))) {
         id_bits++;
     }
     WRITE1(id_bits);

data/vendor/faiss/faiss/impl/kmeans1d.cpp CHANGED Viewed

@@ -20,7 +20,6 @@
 namespace faiss {
-using idx_t = Index::idx_t;
 using LookUpFunc = std::function<float(idx_t, idx_t)>;
 void reduce(

data/vendor/faiss/faiss/impl/kmeans1d.h CHANGED Viewed

@@ -22,10 +22,10 @@ namespace faiss {
  * @param argmins  argmin of each row
  */
 void smawk(
-        const Index::idx_t nrows,
-        const Index::idx_t ncols,
+        const idx_t nrows,
+        const idx_t ncols,
         const float* x,
-        Index::idx_t* argmins);
+        idx_t* argmins);
 /** Exact 1D K-Means by dynamic programming
  *

data/vendor/faiss/faiss/impl/lattice_Zn.cpp CHANGED Viewed

@@ -636,7 +636,7 @@ void ZnSphereCodecRec::decode(uint64_t code, float* c) const {
     }
 }
-// if not use_rec, instanciate an arbitrary harmless znc_rec
+// if not use_rec, instantiate an arbitrary harmless znc_rec
 ZnSphereCodecAlt::ZnSphereCodecAlt(int dim, int r2)
         : ZnSphereCodec(dim, r2),
           use_rec((dim & (dim - 1)) == 0),

data/vendor/faiss/faiss/impl/platform_macros.h CHANGED Viewed

@@ -7,6 +7,10 @@
 #pragma once
+// basic int types and size_t
+#include <cstdint>
+#include <cstdio>
 #ifdef _MSC_VER
 /*******************************************************
@@ -19,6 +23,10 @@
 #define FAISS_API __declspec(dllimport)
 #endif // FAISS_MAIN_LIB
+#ifdef _MSC_VER
+#define strtok_r strtok_s
+#endif // _MSC_VER
 #define __PRETTY_FUNCTION__ __FUNCSIG__
 #define posix_memalign(p, a, s) \
@@ -87,3 +95,56 @@ inline int __builtin_clzll(uint64_t x) {
 #define ALIGNED(x) __attribute__((aligned(x)))
 #endif // _MSC_VER
+#if defined(__GNUC__) || defined(__clang__)
+#define FAISS_DEPRECATED(msg) __attribute__((deprecated(msg)))
+#else
+#define FAISS_DEPRECATED(msg)
+#endif // GCC or Clang
+// Localized enablement of imprecise floating point operations
+// You need to use all 3 macros to cover all compilers.
+#if defined(_MSC_VER)
+#define FAISS_PRAGMA_IMPRECISE_LOOP
+#define FAISS_PRAGMA_IMPRECISE_FUNCTION_BEGIN \
+    __pragma(float_control(precise, off, push))
+#define FAISS_PRAGMA_IMPRECISE_FUNCTION_END __pragma(float_control(pop))
+#elif defined(__clang__)
+#define FAISS_PRAGMA_IMPRECISE_LOOP \
+    _Pragma("clang loop vectorize(enable) interleave(enable)")
+// clang-format off
+// the following ifdef is needed, because old versions of clang (prior to 14)
+// do not generate FMAs on x86 unless this pragma is used. On the other hand,
+// ARM does not support the following pragma flag.
+// TODO: find out how to enable FMAs on clang 10 and earlier.
+#if defined(__x86_64__) && (defined(__clang_major__) && (__clang_major__ > 10))
+#define FAISS_PRAGMA_IMPRECISE_FUNCTION_BEGIN \
+    _Pragma("float_control(precise, off, push)")
+#define FAISS_PRAGMA_IMPRECISE_FUNCTION_END _Pragma("float_control(pop)")
+#else
+#define FAISS_PRAGMA_IMPRECISE_FUNCTION_BEGIN
+#define FAISS_PRAGMA_IMPRECISE_FUNCTION_END
+#endif
+#elif defined(__GNUC__)
+// Unfortunately, GCC does not provide a pragma for detecting it.
+// So, we have to stick to GNUC, which is defined by MANY compilers.
+// This is why clang/icc needs to be checked first.
+// todo: add __INTEL_COMPILER check for the classic ICC
+// todo: add __INTEL_LLVM_COMPILER for ICX
+#define FAISS_PRAGMA_IMPRECISE_LOOP
+#define FAISS_PRAGMA_IMPRECISE_FUNCTION_BEGIN \
+    _Pragma("GCC push_options") \
+    _Pragma("GCC optimize (\"unroll-loops,associative-math,no-signed-zeros\")")
+#define FAISS_PRAGMA_IMPRECISE_FUNCTION_END \
+    _Pragma("GCC pop_options")
+#else
+#define FAISS_PRAGMA_IMPRECISE_LOOP
+#define FAISS_PRAGMA_IMPRECISE_FUNCTION_BEGIN
+#define FAISS_PRAGMA_IMPRECISE_FUNCTION_END
+#endif
+// clang-format on

data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp CHANGED Viewed

@@ -88,7 +88,7 @@ void pq4_pack_codes_range(
         size_t i0,
         size_t i1,
         size_t bbs,
-        size_t M2,
+        size_t nsq,
         uint8_t* blocks) {
     const uint8_t perm0[16] = {
             0, 8, 1, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15};
@@ -98,9 +98,9 @@ void pq4_pack_codes_range(
     size_t block1 = ((i1 - 1) / bbs) + 1;
     for (size_t b = block0; b < block1; b++) {
-        uint8_t* codes2 = blocks + b * bbs * M2 / 2;
+        uint8_t* codes2 = blocks + b * bbs * nsq / 2;
         int64_t i_base = b * bbs - i0;
-        for (int sq = 0; sq < M2; sq += 2) {
+        for (int sq = 0; sq < nsq; sq += 2) {
             for (size_t i = 0; i < bbs; i += 32) {
                 std::array<uint8_t, 32> c, c0, c1;
                 get_matrix_column(
@@ -127,7 +127,7 @@ namespace {
 // get the specific address of the vector inside a block
 // shift is used for determine the if the saved in bits 0..3 (false) or
 // bits 4..7 (true)
-uint8_t get_vector_specific_address(
+size_t get_vector_specific_address(
         size_t bbs,
         size_t vector_id,
         size_t sq,
@@ -189,6 +189,50 @@ void pq4_set_packed_element(
     }
 }
+/***************************************************************
+ * CodePackerPQ4 implementation
+ ***************************************************************/
+CodePackerPQ4::CodePackerPQ4(size_t nsq, size_t bbs) {
+    this->nsq = nsq;
+    nvec = bbs;
+    code_size = (nsq * 4 + 7) / 8;
+    block_size = ((nsq + 1) / 2) * bbs;
+}
+void CodePackerPQ4::pack_1(
+        const uint8_t* flat_code,
+        size_t offset,
+        uint8_t* block) const {
+    size_t bbs = nvec;
+    if (offset >= nvec) {
+        block += (offset / nvec) * block_size;
+        offset = offset % nvec;
+    }
+    for (size_t i = 0; i < code_size; i++) {
+        uint8_t code = flat_code[i];
+        pq4_set_packed_element(block, code & 15, bbs, nsq, offset, 2 * i);
+        pq4_set_packed_element(block, code >> 4, bbs, nsq, offset, 2 * i + 1);
+    }
+}
+void CodePackerPQ4::unpack_1(
+        const uint8_t* block,
+        size_t offset,
+        uint8_t* flat_code) const {
+    size_t bbs = nvec;
+    if (offset >= nvec) {
+        block += (offset / nvec) * block_size;
+        offset = offset % nvec;
+    }
+    for (size_t i = 0; i < code_size; i++) {
+        uint8_t code0, code1;
+        code0 = pq4_get_packed_element(block, bbs, nsq, offset, 2 * i);
+        code1 = pq4_get_packed_element(block, bbs, nsq, offset, 2 * i + 1);
+        flat_code[i] = code0 | (code1 << 4);
+    }
+}
 /***************************************************************
  * Packing functions for Look-Up Tables (LUT)
  ***************************************************************/

data/vendor/faiss/faiss/impl/pq4_fast_scan.h CHANGED Viewed

@@ -10,6 +10,8 @@
 #include <cstdint>
 #include <cstdlib>
+#include <faiss/impl/CodePacker.h>
 /** PQ4 SIMD packing and accumulation functions
  *
  * The basic kernel accumulates nq query vectors with bbs = nb * 2 * 16 vectors
@@ -17,7 +19,7 @@
  * otherwise register spilling becomes too large.
  *
  * The implementation of these functions is spread over 3 cpp files to reduce
- * parallel compile times. Templates are instanciated explicitly.
+ * parallel compile times. Templates are instantiated explicitly.
  */
 namespace faiss {
@@ -29,7 +31,7 @@ namespace faiss {
  * @param ntotal  number of input codes
  * @param nb      output number of codes (ntotal rounded up to a multiple of
  *                bbs)
- * @param M2      number of sub-quantizers (=M rounded up to a muliple of 2)
+ * @param nsq      number of sub-quantizers (=M rounded up to a muliple of 2)
  * @param bbs     size of database blocks (multiple of 32)
  * @param blocks  output array, size nb * nsq / 2.
  */
@@ -39,7 +41,7 @@ void pq4_pack_codes(
         size_t M,
         size_t nb,
         size_t bbs,
-        size_t M2,
+        size_t nsq,
         uint8_t* blocks);
 /** Same as pack_codes but write in a given range of the output,
@@ -56,7 +58,7 @@ void pq4_pack_codes_range(
         size_t i0,
         size_t i1,
         size_t bbs,
-        size_t M2,
+        size_t nsq,
         uint8_t* blocks);
 /** get a single element from a packed codes table
@@ -84,6 +86,18 @@ void pq4_set_packed_element(
         size_t vector_id,
         size_t sq);
+/** CodePacker API for the PQ4 fast-scan */
+struct CodePackerPQ4 : CodePacker {
+    size_t nsq;
+    CodePackerPQ4(size_t nsq, size_t bbs);
+    void pack_1(const uint8_t* flat_code, size_t offset, uint8_t* block)
+            const final;
+    void unpack_1(const uint8_t* block, size_t offset, uint8_t* flat_code)
+            const final;
+};
 /** Pack Look-up table for consumption by the kernel.
  *
  * @param nq      number of queries

data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp CHANGED Viewed

@@ -189,7 +189,7 @@ void accumulate(
         DISPATCH(3);
         DISPATCH(4);
     }
-    FAISS_THROW_FMT("accumulate nq=%d not instanciated", nq);
+    FAISS_THROW_FMT("accumulate nq=%d not instantiated", nq);
 #undef DISPATCH
 }
@@ -263,7 +263,7 @@ void pq4_accumulate_loop_qbs(
                 DISPATCH(4);
 #undef DISPATCH
                 default:
-                    FAISS_THROW_FMT("accumulate nq=%d not instanciated", nq);
+                    FAISS_THROW_FMT("accumulate nq=%d not instantiated", nq);
             }
             i0 += nq;
             LUT += nq * nsq * 16;

data/vendor/faiss/faiss/index_factory.cpp CHANGED Viewed

@@ -10,8 +10,6 @@
  */
 #include <faiss/index_factory.h>
-#include "faiss/MetricType.h"
-#include "faiss/impl/FaissAssert.h"
 #include <cinttypes>
 #include <cmath>
@@ -665,19 +663,19 @@ std::unique_ptr<Index> index_factory_sub(
         re_match(description, "(.+),Refine\\((.+)\\)", sm)) {
         std::unique_ptr<Index> filter_index =
                 index_factory_sub(d, sm[1].str(), metric);
-        std::unique_ptr<Index> refine_index;
+        IndexRefine* index_rf = nullptr;
         if (sm.size() == 3) { // Refine
-            refine_index = index_factory_sub(d, sm[2].str(), metric);
+            std::unique_ptr<Index> refine_index =
+                    index_factory_sub(d, sm[2].str(), metric);
+            index_rf = new IndexRefine(
+                    filter_index.release(), refine_index.release());
+            index_rf->own_refine_index = true;
         } else { // RFlat
-            refine_index.reset(new IndexFlat(d, metric));
+            index_rf = new IndexRefineFlat(filter_index.release(), nullptr);
         }
-        IndexRefine* index_rf =
-                new IndexRefine(filter_index.get(), refine_index.get());
+        FAISS_ASSERT(index_rf != nullptr);
         index_rf->own_fields = true;
-        filter_index.release();
-        refine_index.release();
-        index_rf->own_refine_index = true;
         return std::unique_ptr<Index>(index_rf);
     }

data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp CHANGED Viewed

@@ -7,6 +7,7 @@
 #include <faiss/invlists/BlockInvertedLists.h>
+#include <faiss/impl/CodePacker.h>
 #include <faiss/impl/FaissAssert.h>
 #include <faiss/impl/io.h>
@@ -25,29 +26,43 @@ BlockInvertedLists::BlockInvertedLists(
     codes.resize(nlist);
 }
+BlockInvertedLists::BlockInvertedLists(size_t nlist, const CodePacker* packer)
+        : InvertedLists(nlist, InvertedLists::INVALID_CODE_SIZE),
+          n_per_block(packer->nvec),
+          block_size(packer->block_size),
+          packer(packer) {
+    ids.resize(nlist);
+    codes.resize(nlist);
+}
 BlockInvertedLists::BlockInvertedLists()
-        : InvertedLists(0, InvertedLists::INVALID_CODE_SIZE),
-          n_per_block(0),
-          block_size(0) {}
+        : InvertedLists(0, InvertedLists::INVALID_CODE_SIZE) {}
 size_t BlockInvertedLists::add_entries(
         size_t list_no,
         size_t n_entry,
         const idx_t* ids_in,
         const uint8_t* code) {
-    if (n_entry == 0)
+    if (n_entry == 0) {
         return 0;
+    }
     FAISS_THROW_IF_NOT(list_no < nlist);
     size_t o = ids[list_no].size();
-    FAISS_THROW_IF_NOT(
-            o == 0); // not clear how we should handle subsequent adds
     ids[list_no].resize(o + n_entry);
     memcpy(&ids[list_no][o], ids_in, sizeof(ids_in[0]) * n_entry);
-    // copy whole blocks
-    size_t n_block = (n_entry + n_per_block - 1) / n_per_block;
+    size_t n_block = (o + n_entry + n_per_block - 1) / n_per_block;
     codes[list_no].resize(n_block * block_size);
-    memcpy(&codes[list_no][o * code_size], code, n_block * block_size);
+    if (o % block_size == 0) {
+        // copy whole blocks
+        memcpy(&codes[list_no][o * code_size], code, n_block * block_size);
+    } else {
+        FAISS_THROW_IF_NOT_MSG(packer, "missing code packer");
+        std::vector<uint8_t> buffer(packer->code_size);
+        for (size_t i = 0; i < n_entry; i++) {
+            packer->unpack_1(code, i, buffer.data());
+            packer->pack_1(buffer.data(), i + o, codes[list_no].data());
+        }
+    }
     return o;
 }
@@ -61,7 +76,7 @@ const uint8_t* BlockInvertedLists::get_codes(size_t list_no) const {
     return codes[list_no].get();
 }
-const InvertedLists::idx_t* BlockInvertedLists::get_ids(size_t list_no) const {
+const idx_t* BlockInvertedLists::get_ids(size_t list_no) const {
     assert(list_no < nlist);
     return ids[list_no].data();
 }
@@ -95,7 +110,9 @@ void BlockInvertedLists::update_entries(
     */
 }
-BlockInvertedLists::~BlockInvertedLists() {}
+BlockInvertedLists::~BlockInvertedLists() {
+    delete packer;
+}
 /**************************************************
  * IO hook implementation

data/vendor/faiss/faiss/invlists/BlockInvertedLists.h CHANGED Viewed

@@ -14,6 +14,8 @@
 namespace faiss {
+struct CodePacker;
 /** Inverted Lists that are organized by blocks.
  *
  * Different from the regular inverted lists, the codes are organized by blocks
@@ -28,13 +30,17 @@ namespace faiss {
  * data.
  */
 struct BlockInvertedLists : InvertedLists {
-    size_t n_per_block; // nb of vectors stored per block
-    size_t block_size;  // nb bytes per block
+    size_t n_per_block = 0; // nb of vectors stored per block
+    size_t block_size = 0;  // nb bytes per block
+    // required to interpret the content of the blocks (owned by this)
+    const CodePacker* packer = nullptr;
     std::vector<AlignedTable<uint8_t>> codes;
     std::vector<std::vector<idx_t>> ids;
     BlockInvertedLists(size_t nlist, size_t vec_per_block, size_t block_size);
+    BlockInvertedLists(size_t nlist, const CodePacker* packer);
     BlockInvertedLists();

data/vendor/faiss/faiss/invlists/DirectMap.cpp CHANGED Viewed

@@ -68,7 +68,7 @@ void DirectMap::clear() {
     hashtable.clear();
 }
-DirectMap::idx_t DirectMap::get(idx_t key) const {
+idx_t DirectMap::get(idx_t key) const {
     if (type == Array) {
         FAISS_THROW_IF_NOT_MSG(key >= 0 && key < array.size(), "invalid key");
         idx_t lo = array[key];