RubyGems - faiss - Versions diffs - 0.3.0 → 0.3.2 - Mend

faiss 0.3.0 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (216) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +9 -0
data/LICENSE.txt +1 -1
data/README.md +1 -1
data/ext/faiss/extconf.rb +9 -2
data/ext/faiss/index.cpp +1 -1
data/ext/faiss/index_binary.cpp +2 -2
data/ext/faiss/product_quantizer.cpp +1 -1
data/lib/faiss/version.rb +1 -1
data/vendor/faiss/faiss/AutoTune.cpp +7 -7
data/vendor/faiss/faiss/AutoTune.h +1 -2
data/vendor/faiss/faiss/Clustering.cpp +39 -22
data/vendor/faiss/faiss/Clustering.h +40 -21
data/vendor/faiss/faiss/IVFlib.cpp +26 -12
data/vendor/faiss/faiss/Index.cpp +1 -1
data/vendor/faiss/faiss/Index.h +40 -10
data/vendor/faiss/faiss/Index2Layer.cpp +7 -7
data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +176 -166
data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +15 -15
data/vendor/faiss/faiss/IndexBinary.cpp +9 -4
data/vendor/faiss/faiss/IndexBinary.h +8 -19
data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +2 -1
data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +24 -31
data/vendor/faiss/faiss/IndexBinaryHNSW.h +1 -1
data/vendor/faiss/faiss/IndexBinaryHash.cpp +25 -50
data/vendor/faiss/faiss/IndexBinaryIVF.cpp +107 -188
data/vendor/faiss/faiss/IndexFastScan.cpp +95 -146
data/vendor/faiss/faiss/IndexFastScan.h +9 -8
data/vendor/faiss/faiss/IndexFlat.cpp +206 -10
data/vendor/faiss/faiss/IndexFlat.h +20 -1
data/vendor/faiss/faiss/IndexFlatCodes.cpp +170 -5
data/vendor/faiss/faiss/IndexFlatCodes.h +23 -4
data/vendor/faiss/faiss/IndexHNSW.cpp +231 -382
data/vendor/faiss/faiss/IndexHNSW.h +62 -49
data/vendor/faiss/faiss/IndexIDMap.cpp +69 -28
data/vendor/faiss/faiss/IndexIDMap.h +24 -2
data/vendor/faiss/faiss/IndexIVF.cpp +162 -56
data/vendor/faiss/faiss/IndexIVF.h +46 -6
data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +33 -26
data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +6 -2
data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +19 -46
data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +4 -3
data/vendor/faiss/faiss/IndexIVFFastScan.cpp +502 -401
data/vendor/faiss/faiss/IndexIVFFastScan.h +63 -26
data/vendor/faiss/faiss/IndexIVFFlat.cpp +15 -5
data/vendor/faiss/faiss/IndexIVFFlat.h +3 -2
data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.cpp +172 -0
data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.h +56 -0
data/vendor/faiss/faiss/IndexIVFPQ.cpp +79 -125
data/vendor/faiss/faiss/IndexIVFPQ.h +6 -7
data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +39 -52
data/vendor/faiss/faiss/IndexIVFPQFastScan.h +4 -3
data/vendor/faiss/faiss/IndexIVFPQR.cpp +45 -29
data/vendor/faiss/faiss/IndexIVFPQR.h +5 -2
data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +25 -27
data/vendor/faiss/faiss/IndexIVFSpectralHash.h +6 -6
data/vendor/faiss/faiss/IndexLSH.cpp +14 -16
data/vendor/faiss/faiss/IndexLattice.cpp +1 -19
data/vendor/faiss/faiss/IndexLattice.h +3 -22
data/vendor/faiss/faiss/IndexNNDescent.cpp +3 -33
data/vendor/faiss/faiss/IndexNNDescent.h +1 -1
data/vendor/faiss/faiss/IndexNSG.cpp +11 -27
data/vendor/faiss/faiss/IndexNSG.h +11 -11
data/vendor/faiss/faiss/IndexNeuralNetCodec.cpp +56 -0
data/vendor/faiss/faiss/IndexNeuralNetCodec.h +49 -0
data/vendor/faiss/faiss/IndexPQ.cpp +72 -88
data/vendor/faiss/faiss/IndexPQ.h +1 -4
data/vendor/faiss/faiss/IndexPQFastScan.cpp +1 -1
data/vendor/faiss/faiss/IndexPreTransform.cpp +25 -31
data/vendor/faiss/faiss/IndexPreTransform.h +1 -1
data/vendor/faiss/faiss/IndexRefine.cpp +54 -24
data/vendor/faiss/faiss/IndexRefine.h +7 -0
data/vendor/faiss/faiss/IndexReplicas.cpp +23 -26
data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +25 -17
data/vendor/faiss/faiss/IndexScalarQuantizer.h +6 -4
data/vendor/faiss/faiss/IndexShards.cpp +21 -29
data/vendor/faiss/faiss/IndexShardsIVF.cpp +1 -2
data/vendor/faiss/faiss/MatrixStats.cpp +17 -32
data/vendor/faiss/faiss/MatrixStats.h +21 -9
data/vendor/faiss/faiss/MetaIndexes.cpp +35 -35
data/vendor/faiss/faiss/MetricType.h +7 -2
data/vendor/faiss/faiss/VectorTransform.cpp +13 -26
data/vendor/faiss/faiss/VectorTransform.h +7 -7
data/vendor/faiss/faiss/clone_index.cpp +15 -10
data/vendor/faiss/faiss/clone_index.h +3 -0
data/vendor/faiss/faiss/cppcontrib/detail/UintReader.h +95 -17
data/vendor/faiss/faiss/cppcontrib/factory_tools.cpp +152 -0
data/vendor/faiss/faiss/cppcontrib/factory_tools.h +24 -0
data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +83 -30
data/vendor/faiss/faiss/gpu/GpuCloner.cpp +123 -8
data/vendor/faiss/faiss/gpu/GpuCloner.h +22 -0
data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +13 -0
data/vendor/faiss/faiss/gpu/GpuDistance.h +46 -38
data/vendor/faiss/faiss/gpu/GpuFaissAssert.h +1 -1
data/vendor/faiss/faiss/gpu/GpuIndex.h +30 -12
data/vendor/faiss/faiss/gpu/GpuIndexCagra.h +282 -0
data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +4 -4
data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +14 -9
data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +20 -3
data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +22 -11
data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +1 -3
data/vendor/faiss/faiss/gpu/GpuResources.cpp +24 -3
data/vendor/faiss/faiss/gpu/GpuResources.h +39 -11
data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +142 -17
data/vendor/faiss/faiss/gpu/StandardGpuResources.h +57 -3
data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +26 -21
data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +7 -1
data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +8 -5
data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +25 -0
data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +129 -9
data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +332 -40
data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +299 -208
data/vendor/faiss/faiss/gpu/test/TestGpuMemoryException.cpp +1 -0
data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +1 -1
data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +6 -0
data/vendor/faiss/faiss/gpu/utils/RaftUtils.h +75 -0
data/vendor/faiss/faiss/gpu/utils/Timer.cpp +4 -1
data/vendor/faiss/faiss/gpu/utils/Timer.h +1 -1
data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +3 -1
data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +5 -5
data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +26 -1
data/vendor/faiss/faiss/impl/AuxIndexStructures.h +10 -3
data/vendor/faiss/faiss/impl/DistanceComputer.h +70 -1
data/vendor/faiss/faiss/impl/FaissAssert.h +4 -2
data/vendor/faiss/faiss/impl/FaissException.h +13 -34
data/vendor/faiss/faiss/impl/HNSW.cpp +605 -186
data/vendor/faiss/faiss/impl/HNSW.h +52 -30
data/vendor/faiss/faiss/impl/IDSelector.h +4 -4
data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +11 -9
data/vendor/faiss/faiss/impl/LookupTableScaler.h +34 -0
data/vendor/faiss/faiss/impl/NNDescent.cpp +42 -27
data/vendor/faiss/faiss/impl/NSG.cpp +0 -29
data/vendor/faiss/faiss/impl/NSG.h +1 -1
data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +14 -12
data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.h +1 -1
data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +25 -22
data/vendor/faiss/faiss/impl/ProductQuantizer.h +6 -2
data/vendor/faiss/faiss/impl/Quantizer.h +1 -1
data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +27 -1015
data/vendor/faiss/faiss/impl/ResidualQuantizer.h +5 -63
data/vendor/faiss/faiss/impl/ResultHandler.h +347 -172
data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +1104 -147
data/vendor/faiss/faiss/impl/ScalarQuantizer.h +3 -8
data/vendor/faiss/faiss/impl/code_distance/code_distance-avx2.h +285 -42
data/vendor/faiss/faiss/impl/code_distance/code_distance-avx512.h +248 -0
data/vendor/faiss/faiss/impl/code_distance/code_distance-generic.h +21 -14
data/vendor/faiss/faiss/impl/code_distance/code_distance.h +22 -12
data/vendor/faiss/faiss/impl/index_read.cpp +74 -34
data/vendor/faiss/faiss/impl/index_read_utils.h +37 -0
data/vendor/faiss/faiss/impl/index_write.cpp +88 -51
data/vendor/faiss/faiss/impl/io.cpp +23 -15
data/vendor/faiss/faiss/impl/io.h +4 -4
data/vendor/faiss/faiss/impl/io_macros.h +6 -0
data/vendor/faiss/faiss/impl/lattice_Zn.cpp +1 -1
data/vendor/faiss/faiss/impl/platform_macros.h +40 -1
data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +14 -0
data/vendor/faiss/faiss/impl/pq4_fast_scan.h +7 -6
data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +52 -38
data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +487 -49
data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.cpp +960 -0
data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.h +176 -0
data/vendor/faiss/faiss/impl/simd_result_handlers.h +481 -225
data/vendor/faiss/faiss/index_factory.cpp +41 -20
data/vendor/faiss/faiss/index_io.h +12 -5
data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +28 -8
data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +3 -0
data/vendor/faiss/faiss/invlists/DirectMap.cpp +10 -2
data/vendor/faiss/faiss/invlists/InvertedLists.cpp +73 -17
data/vendor/faiss/faiss/invlists/InvertedLists.h +26 -8
data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +24 -9
data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +2 -1
data/vendor/faiss/faiss/python/python_callbacks.cpp +4 -4
data/vendor/faiss/faiss/utils/Heap.cpp +3 -1
data/vendor/faiss/faiss/utils/Heap.h +105 -0
data/vendor/faiss/faiss/utils/NeuralNet.cpp +342 -0
data/vendor/faiss/faiss/utils/NeuralNet.h +147 -0
data/vendor/faiss/faiss/utils/WorkerThread.h +1 -0
data/vendor/faiss/faiss/utils/bf16.h +36 -0
data/vendor/faiss/faiss/utils/distances.cpp +147 -123
data/vendor/faiss/faiss/utils/distances.h +86 -9
data/vendor/faiss/faiss/utils/distances_fused/avx512.cpp +5 -5
data/vendor/faiss/faiss/utils/distances_fused/avx512.h +2 -2
data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +2 -2
data/vendor/faiss/faiss/utils/distances_fused/distances_fused.h +1 -1
data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +5 -5
data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.h +1 -1
data/vendor/faiss/faiss/utils/distances_simd.cpp +1589 -243
data/vendor/faiss/faiss/utils/extra_distances-inl.h +70 -0
data/vendor/faiss/faiss/utils/extra_distances.cpp +85 -137
data/vendor/faiss/faiss/utils/extra_distances.h +3 -2
data/vendor/faiss/faiss/utils/fp16-arm.h +29 -0
data/vendor/faiss/faiss/utils/fp16.h +2 -0
data/vendor/faiss/faiss/utils/hamming.cpp +163 -111
data/vendor/faiss/faiss/utils/hamming.h +58 -0
data/vendor/faiss/faiss/utils/hamming_distance/avx2-inl.h +16 -89
data/vendor/faiss/faiss/utils/hamming_distance/common.h +1 -0
data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +19 -88
data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +58 -0
data/vendor/faiss/faiss/utils/hamming_distance/neon-inl.h +14 -104
data/vendor/faiss/faiss/utils/partitioning.cpp +3 -4
data/vendor/faiss/faiss/utils/prefetch.h +77 -0
data/vendor/faiss/faiss/utils/quantize_lut.cpp +0 -14
data/vendor/faiss/faiss/utils/random.cpp +43 -0
data/vendor/faiss/faiss/utils/random.h +25 -0
data/vendor/faiss/faiss/utils/simdlib.h +10 -1
data/vendor/faiss/faiss/utils/simdlib_avx2.h +0 -6
data/vendor/faiss/faiss/utils/simdlib_avx512.h +296 -0
data/vendor/faiss/faiss/utils/simdlib_neon.h +77 -79
data/vendor/faiss/faiss/utils/simdlib_ppc64.h +1084 -0
data/vendor/faiss/faiss/utils/sorting.cpp +140 -5
data/vendor/faiss/faiss/utils/sorting.h +27 -0
data/vendor/faiss/faiss/utils/transpose/transpose-avx512-inl.h +176 -0
data/vendor/faiss/faiss/utils/utils.cpp +120 -7
data/vendor/faiss/faiss/utils/utils.h +60 -20
metadata +23 -4
data/vendor/faiss/faiss/impl/code_distance/code_distance_avx512.h +0 -102

data/vendor/faiss/faiss/IndexIVFPQR.cpp CHANGED Viewed

@@ -35,10 +35,12 @@ IndexIVFPQR::IndexIVFPQR(
           refine_pq(d, M_refine, nbits_per_idx_refine),
           k_factor(4) {
     by_residual = true;
+    refine_pq.cp.max_points_per_centroid = 1000;
 }
 IndexIVFPQR::IndexIVFPQR() : k_factor(1) {
     by_residual = true;
+    refine_pq.cp.max_points_per_centroid = 1000;
 }
 void IndexIVFPQR::reset() {
@@ -46,24 +48,39 @@ void IndexIVFPQR::reset() {
     refine_codes.clear();
 }
-void IndexIVFPQR::train_residual(idx_t n, const float* x) {
-    float* residual_2 = new float[n * d];
-    ScopeDeleter<float> del(residual_2);
-    train_residual_o(n, x, residual_2);
-    if (verbose)
+void IndexIVFPQR::train_encoder(idx_t n, const float* x, const idx_t* assign) {
+    IndexIVFPQ::train_encoder(n, x, assign);
+    if (verbose) {
         printf("training %zdx%zd 2nd level PQ quantizer on %" PRId64
                " %dD-vectors\n",
                refine_pq.M,
                refine_pq.ksub,
                n,
                d);
-    refine_pq.cp.max_points_per_centroid = 1000;
+    }
     refine_pq.cp.verbose = verbose;
-    refine_pq.train(n, residual_2);
+    // 2nd level residual
+    std::vector<float> residual_2(n * d);
+    std::vector<uint8_t> train_codes(pq.code_size * n);
+    pq.compute_codes(x, train_codes.data(), n);
+    for (idx_t i = 0; i < n; i++) {
+        const float* xx = x + i * d;
+        float* res = residual_2.data() + i * d;
+        pq.decode(train_codes.data() + i * pq.code_size, res);
+        for (int j = 0; j < d; j++) {
+            res[j] = xx[j] - res[j];
+        }
+    }
+    refine_pq.train(n, residual_2.data());
+}
+idx_t IndexIVFPQR::train_encoder_num_vectors() const {
+    return std::max(
+            pq.cp.max_points_per_centroid * pq.ksub,
+            refine_pq.cp.max_points_per_centroid * refine_pq.ksub);
 }
 void IndexIVFPQR::add_with_ids(idx_t n, const float* x, const idx_t* xids) {
@@ -74,18 +91,18 @@ void IndexIVFPQR::add_core(
         idx_t n,
         const float* x,
         const idx_t* xids,
-        const idx_t* precomputed_idx) {
-    float* residual_2 = new float[n * d];
-    ScopeDeleter<float> del(residual_2);
+        const idx_t* precomputed_idx,
+        void* /*inverted_list_context*/) {
+    std::unique_ptr<float[]> residual_2(new float[n * d]);
     idx_t n0 = ntotal;
-    add_core_o(n, x, xids, residual_2, precomputed_idx);
+    add_core_o(n, x, xids, residual_2.get(), precomputed_idx);
     refine_codes.resize(ntotal * refine_pq.code_size);
     refine_pq.compute_codes(
-            residual_2, &refine_codes[n0 * refine_pq.code_size], n);
+            residual_2.get(), &refine_codes[n0 * refine_pq.code_size], n);
 }
 #define TIC t0 = get_cycles()
 #define TOC get_cycles() - t0
@@ -104,11 +121,10 @@ void IndexIVFPQR::search_preassigned(
     uint64_t t0;
     TIC;
     size_t k_coarse = long(k * k_factor);
-    idx_t* coarse_labels = new idx_t[k_coarse * n];
-    ScopeDeleter<idx_t> del1(coarse_labels);
-    { // query with quantizer levels 1 and 2.
-        float* coarse_distances = new float[k_coarse * n];
-        ScopeDeleter<float> del(coarse_distances);
+    std::unique_ptr<idx_t[]> coarse_labels(new idx_t[k_coarse * n]);
+    {
+        // query with quantizer levels 1 and 2.
+        std::unique_ptr<float[]> coarse_distances(new float[k_coarse * n]);
         IndexIVFPQ::search_preassigned(
                 n,
@@ -116,8 +132,8 @@ void IndexIVFPQR::search_preassigned(
                 k_coarse,
                 idx,
                 L1_dis,
-                coarse_distances,
-                coarse_labels,
+                coarse_distances.get(),
+                coarse_labels.get(),
                 true,
                 params);
     }
@@ -131,13 +147,12 @@ void IndexIVFPQR::search_preassigned(
 #pragma omp parallel reduction(+ : n_refine)
     {
         // tmp buffers
-        float* residual_1 = new float[2 * d];
-        ScopeDeleter<float> del(residual_1);
-        float* residual_2 = residual_1 + d;
+        std::unique_ptr<float[]> residual_1(new float[2 * d]);
+        float* residual_2 = residual_1.get() + d;
 #pragma omp for
         for (idx_t i = 0; i < n; i++) {
             const float* xq = x + i * d;
-            const idx_t* shortlist = coarse_labels + k_coarse * i;
+            const idx_t* shortlist = coarse_labels.get() + k_coarse * i;
             float* heap_sim = distances + k * i;
             idx_t* heap_ids = labels + k * i;
             maxheap_heapify(k, heap_sim, heap_ids);
@@ -155,7 +170,7 @@ void IndexIVFPQR::search_preassigned(
                 assert(ofs >= 0 && ofs < invlists->list_size(list_no));
                 // 1st level residual
-                quantizer->compute_residual(xq, residual_1, list_no);
+                quantizer->compute_residual(xq, residual_1.get(), list_no);
                 // 2nd level residual
                 const uint8_t* l2code = invlists->get_single_code(list_no, ofs);
@@ -168,9 +183,10 @@ void IndexIVFPQR::search_preassigned(
                 idx_t id = invlists->get_single_id(list_no, ofs);
                 assert(0 <= id && id < ntotal);
                 refine_pq.decode(
-                        &refine_codes[id * refine_pq.code_size], residual_1);
+                        &refine_codes[id * refine_pq.code_size],
+                        residual_1.get());
-                float dis = fvec_L2sqr(residual_1, residual_2, d);
+                float dis = fvec_L2sqr(residual_1.get(), residual_2, d);
                 if (dis < heap_sim[0]) {
                     idx_t id_or_pair = store_pairs ? sl : id;

data/vendor/faiss/faiss/IndexIVFPQR.h CHANGED Viewed

@@ -37,7 +37,9 @@ struct IndexIVFPQR : IndexIVFPQ {
     size_t remove_ids(const IDSelector& sel) override;
     /// trains the two product quantizers
-    void train_residual(idx_t n, const float* x) override;
+    void train_encoder(idx_t n, const float* x, const idx_t* assign) override;
+    idx_t train_encoder_num_vectors() const override;
     void add_with_ids(idx_t n, const float* x, const idx_t* xids) override;
@@ -46,7 +48,8 @@ struct IndexIVFPQR : IndexIVFPQ {
             idx_t n,
             const float* x,
             const idx_t* xids,
-            const idx_t* precomputed_idx) override;
+            const idx_t* precomputed_idx,
+            void* inverted_list_context = nullptr) override;
     void reconstruct_from_offset(int64_t list_no, int64_t offset, float* recons)
             const override;

data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp CHANGED Viewed

@@ -9,8 +9,8 @@
 #include <faiss/IndexIVFSpectralHash.h>
-#include <stdint.h>
 #include <algorithm>
+#include <cstdint>
 #include <memory>
 #include <faiss/IndexLSH.h>
@@ -31,22 +31,17 @@ IndexIVFSpectralHash::IndexIVFSpectralHash(
         float period)
         : IndexIVF(quantizer, d, nlist, (nbit + 7) / 8, METRIC_L2),
           nbit(nbit),
-          period(period),
-          threshold_type(Thresh_global) {
+          period(period) {
     RandomRotationMatrix* rr = new RandomRotationMatrix(d, nbit);
     rr->init(1234);
     vt = rr;
-    own_fields = true;
     is_trained = false;
+    by_residual = false;
 }
-IndexIVFSpectralHash::IndexIVFSpectralHash()
-        : IndexIVF(),
-          vt(nullptr),
-          own_fields(false),
-          nbit(0),
-          period(0),
-          threshold_type(Thresh_global) {}
+IndexIVFSpectralHash::IndexIVFSpectralHash() : IndexIVF() {
+    by_residual = false;
+}
 IndexIVFSpectralHash::~IndexIVFSpectralHash() {
     if (own_fields) {
@@ -67,10 +62,14 @@ float median(size_t n, float* x) {
 } // namespace
-void IndexIVFSpectralHash::train_residual(idx_t n, const float* x) {
+void IndexIVFSpectralHash::train_encoder(
+        idx_t n,
+        const float* x,
+        const idx_t* assign) {
     if (!vt->is_trained) {
         vt->train(n, x);
     }
+    FAISS_THROW_IF_NOT(!by_residual);
     if (threshold_type == Thresh_global) {
         // nothing to do
@@ -158,7 +157,7 @@ void binarize_with_freq(
     }
 }
-}; // namespace
+} // namespace
 void IndexIVFSpectralHash::encode_vectors(
         idx_t n,
@@ -167,6 +166,7 @@ void IndexIVFSpectralHash::encode_vectors(
         uint8_t* codes,
         bool include_listnos) const {
     FAISS_THROW_IF_NOT(is_trained);
+    FAISS_THROW_IF_NOT(!by_residual);
     float freq = 2.0 / period;
     size_t coarse_size = include_listnos ? coarse_code_size() : 0;
@@ -224,6 +224,7 @@ struct IVFScanner : InvertedListScanner {
               hc(qcode.data(), index->code_size) {
         this->store_pairs = store_pairs;
         this->code_size = index->code_size;
+        this->keep_max = is_similarity_metric(index->metric_type);
     }
     void set_query(const float* query) override {
@@ -288,26 +289,23 @@ struct IVFScanner : InvertedListScanner {
     }
 };
+struct BuildScanner {
+    using T = InvertedListScanner*;
+    template <class HammingComputer>
+    static T f(const IndexIVFSpectralHash* index, bool store_pairs) {
+        return new IVFScanner<HammingComputer>(index, store_pairs);
+    }
+};
 } // anonymous namespace
 InvertedListScanner* IndexIVFSpectralHash::get_InvertedListScanner(
         bool store_pairs,
         const IDSelector* sel) const {
     FAISS_THROW_IF_NOT(!sel);
-    switch (code_size) {
-#define HANDLE_CODE_SIZE(cs) \
-    case cs:                 \
-        return new IVFScanner<HammingComputer##cs>(this, store_pairs)
-        HANDLE_CODE_SIZE(4);
-        HANDLE_CODE_SIZE(8);
-        HANDLE_CODE_SIZE(16);
-        HANDLE_CODE_SIZE(20);
-        HANDLE_CODE_SIZE(32);
-        HANDLE_CODE_SIZE(64);
-#undef HANDLE_CODE_SIZE
-        default:
-            return new IVFScanner<HammingComputerDefault>(this, store_pairs);
-    }
+    BuildScanner bs;
+    return dispatch_HammingComputer(code_size, bs, this, store_pairs);
 }
 void IndexIVFSpectralHash::replace_vt(VectorTransform* vt_in, bool own) {

data/vendor/faiss/faiss/IndexIVFSpectralHash.h CHANGED Viewed

@@ -30,14 +30,14 @@ struct IndexPreTransform;
  */
 struct IndexIVFSpectralHash : IndexIVF {
     /// transformation from d to nbit dim
-    VectorTransform* vt;
+    VectorTransform* vt = nullptr;
     /// own the vt
-    bool own_fields;
+    bool own_fields = true;
     /// nb of bits of the binary signature
-    int nbit;
+    int nbit = 0;
     /// interval size for 0s and 1s
-    float period;
+    float period = 0;
     enum ThresholdType {
         Thresh_global,        ///< global threshold at 0
@@ -45,7 +45,7 @@ struct IndexIVFSpectralHash : IndexIVF {
         Thresh_centroid_half, ///< central interval around centroid
         Thresh_median         ///< median of training set
     };
-    ThresholdType threshold_type;
+    ThresholdType threshold_type = Thresh_global;
     /// Trained threshold.
     /// size nlist * nbit or 0 if Thresh_global
@@ -60,7 +60,7 @@ struct IndexIVFSpectralHash : IndexIVF {
     IndexIVFSpectralHash();
-    void train_residual(idx_t n, const float* x) override;
+    void train_encoder(idx_t n, const float* x, const idx_t* assign) override;
     void encode_vectors(
             idx_t n,

data/vendor/faiss/faiss/IndexLSH.cpp CHANGED Viewed

@@ -11,6 +11,7 @@
 #include <cstring>
 #include <algorithm>
+#include <memory>
 #include <faiss/impl/FaissAssert.h>
 #include <faiss/utils/hamming.h>
@@ -56,7 +57,7 @@ const float* IndexLSH::apply_preprocess(idx_t n, const float* x) const {
     }
     if (train_thresholds) {
-        if (xt == NULL) {
+        if (xt == nullptr) {
             xt = new float[nbits * n];
             memcpy(xt, x, sizeof(*x) * n * nbits);
         }
@@ -75,18 +76,17 @@ void IndexLSH::train(idx_t n, const float* x) {
         thresholds.resize(nbits);
         train_thresholds = false;
         const float* xt = apply_preprocess(n, x);
-        ScopeDeleter<float> del(xt == x ? nullptr : xt);
+        std::unique_ptr<const float[]> del(xt == x ? nullptr : xt);
         train_thresholds = true;
-        float* transposed_x = new float[n * nbits];
-        ScopeDeleter<float> del2(transposed_x);
+        std::unique_ptr<float[]> transposed_x(new float[n * nbits]);
         for (idx_t i = 0; i < n; i++)
             for (idx_t j = 0; j < nbits; j++)
                 transposed_x[j * n + i] = xt[i * nbits + j];
         for (idx_t i = 0; i < nbits; i++) {
-            float* xi = transposed_x + i * n;
+            float* xi = transposed_x.get() + i * n;
             // std::nth_element
             std::sort(xi, xi + n);
             if (n % 2 == 1)
@@ -110,19 +110,17 @@ void IndexLSH::search(
     FAISS_THROW_IF_NOT(k > 0);
     FAISS_THROW_IF_NOT(is_trained);
     const float* xt = apply_preprocess(n, x);
-    ScopeDeleter<float> del(xt == x ? nullptr : xt);
+    std::unique_ptr<const float[]> del(xt == x ? nullptr : xt);
-    uint8_t* qcodes = new uint8_t[n * code_size];
-    ScopeDeleter<uint8_t> del2(qcodes);
+    std::unique_ptr<uint8_t[]> qcodes(new uint8_t[n * code_size]);
-    fvecs2bitvecs(xt, qcodes, nbits, n);
+    fvecs2bitvecs(xt, qcodes.get(), nbits, n);
-    int* idistances = new int[n * k];
-    ScopeDeleter<int> del3(idistances);
+    std::unique_ptr<int[]> idistances(new int[n * k]);
-    int_maxheap_array_t res = {size_t(n), size_t(k), labels, idistances};
+    int_maxheap_array_t res = {size_t(n), size_t(k), labels, idistances.get()};
-    hammings_knn_hc(&res, qcodes, codes.data(), ntotal, code_size, true);
+    hammings_knn_hc(&res, qcodes.get(), codes.data(), ntotal, code_size, true);
     // convert distances to floats
     for (int i = 0; i < k * n; i++)
@@ -146,16 +144,16 @@ void IndexLSH::transfer_thresholds(LinearTransform* vt) {
 void IndexLSH::sa_encode(idx_t n, const float* x, uint8_t* bytes) const {
     FAISS_THROW_IF_NOT(is_trained);
     const float* xt = apply_preprocess(n, x);
-    ScopeDeleter<float> del(xt == x ? nullptr : xt);
+    std::unique_ptr<const float[]> del(xt == x ? nullptr : xt);
     fvecs2bitvecs(xt, bytes, nbits, n);
 }
 void IndexLSH::sa_decode(idx_t n, const uint8_t* bytes, float* x) const {
     float* xt = x;
-    ScopeDeleter<float> del;
+    std::unique_ptr<float[]> del;
     if (rotate_data || nbits != d) {
         xt = new float[n * nbits];
-        del.set(xt);
+        del.reset(xt);
     }
     bitvecs2fvecs(bytes, xt, nbits, n);

data/vendor/faiss/faiss/IndexLattice.cpp CHANGED Viewed

@@ -15,7 +15,7 @@
 namespace faiss {
 IndexLattice::IndexLattice(idx_t d, int nsq, int scale_nbit, int r2)
-        : Index(d),
+        : IndexFlatCodes(0, d, METRIC_L2),
           nsq(nsq),
           dsq(d / nsq),
           zn_sphere_codec(dsq, r2),
@@ -114,22 +114,4 @@ void IndexLattice::sa_decode(idx_t n, const uint8_t* codes, float* x) const {
     }
 }
-void IndexLattice::add(idx_t, const float*) {
-    FAISS_THROW_MSG("not implemented");
-}
-void IndexLattice::search(
-        idx_t,
-        const float*,
-        idx_t,
-        float*,
-        idx_t*,
-        const SearchParameters*) const {
-    FAISS_THROW_MSG("not implemented");
-}
-void IndexLattice::reset() {
-    FAISS_THROW_MSG("not implemented");
-}
 } // namespace faiss

data/vendor/faiss/faiss/IndexLattice.h CHANGED Viewed

@@ -5,21 +5,18 @@
  * LICENSE file in the root directory of this source tree.
  */
-// -*- c++ -*-
-#ifndef FAISS_INDEX_LATTICE_H
-#define FAISS_INDEX_LATTICE_H
+#pragma once
 #include <vector>
-#include <faiss/IndexIVF.h>
+#include <faiss/IndexFlatCodes.h>
 #include <faiss/impl/lattice_Zn.h>
 namespace faiss {
 /** Index that encodes a vector with a series of Zn lattice quantizers
  */
-struct IndexLattice : Index {
+struct IndexLattice : IndexFlatCodes {
     /// number of sub-vectors
     int nsq;
     /// dimension of sub-vectors
@@ -30,8 +27,6 @@ struct IndexLattice : Index {
     /// nb bits used to encode the scale, per subvector
     int scale_nbit, lattice_nbit;
-    /// total, in bytes
-    size_t code_size;
     /// mins and maxes of the vector norms, per subquantizer
     std::vector<float> trained;
@@ -46,20 +41,6 @@ struct IndexLattice : Index {
     void sa_encode(idx_t n, const float* x, uint8_t* bytes) const override;
     void sa_decode(idx_t n, const uint8_t* bytes, float* x) const override;
-    /// not implemented
-    void add(idx_t n, const float* x) override;
-    void search(
-            idx_t n,
-            const float* x,
-            idx_t k,
-            float* distances,
-            idx_t* labels,
-            const SearchParameters* params = nullptr) const override;
-    void reset() override;
 };
 } // namespace faiss
-#endif

data/vendor/faiss/faiss/IndexNNDescent.cpp CHANGED Viewed

@@ -58,35 +58,6 @@ using storage_idx_t = NNDescent::storage_idx_t;
 namespace {
-/* Wrap the distance computer into one that negates the
-   distances. This makes supporting INNER_PRODUCE search easier */
-struct NegativeDistanceComputer : DistanceComputer {
-    /// owned by this
-    DistanceComputer* basedis;
-    explicit NegativeDistanceComputer(DistanceComputer* basedis)
-            : basedis(basedis) {}
-    void set_query(const float* x) override {
-        basedis->set_query(x);
-    }
-    /// compute distance of vector i to current query
-    float operator()(idx_t i) override {
-        return -(*basedis)(i);
-    }
-    /// compute distance between two stored vectors
-    float symmetric_dis(idx_t i, idx_t j) override {
-        return -basedis->symmetric_dis(i, j);
-    }
-    ~NegativeDistanceComputer() override {
-        delete basedis;
-    }
-};
 DistanceComputer* storage_distance_computer(const Index* storage) {
     if (is_similarity_metric(storage->metric_type)) {
         return new NegativeDistanceComputer(storage->get_distance_computer());
@@ -158,8 +129,8 @@ void IndexNNDescent::search(
         {
             VisitedTable vt(ntotal);
-            DistanceComputer* dis = storage_distance_computer(storage);
-            ScopeDeleter1<DistanceComputer> del(dis);
+            std::unique_ptr<DistanceComputer> dis(
+                    storage_distance_computer(storage));
 #pragma omp for
             for (idx_t i = i0; i < i1; i++) {
@@ -197,8 +168,7 @@ void IndexNNDescent::add(idx_t n, const float* x) {
     storage->add(n, x);
     ntotal = storage->ntotal;
-    DistanceComputer* dis = storage_distance_computer(storage);
-    ScopeDeleter1<DistanceComputer> del(dis);
+    std::unique_ptr<DistanceComputer> dis(storage_distance_computer(storage));
     nndescent.build(*dis, ntotal, verbose);
 }

data/vendor/faiss/faiss/IndexNNDescent.h CHANGED Viewed

@@ -26,7 +26,7 @@ struct IndexNNDescent : Index {
     /// Faiss results are 64-bit
-    // the link strcuture
+    // the link structure
     NNDescent nndescent;
     // the sequential storage

data/vendor/faiss/faiss/IndexNSG.cpp CHANGED Viewed

@@ -29,32 +29,16 @@ using namespace nsg;
  * IndexNSG implementation
  **************************************************************/
-IndexNSG::IndexNSG(int d, int R, MetricType metric)
-        : Index(d, metric),
-          nsg(R),
-          own_fields(false),
-          storage(nullptr),
-          is_built(false),
-          GK(64),
-          build_type(0) {
-    nndescent_S = 10;
-    nndescent_R = 100;
+IndexNSG::IndexNSG(int d, int R, MetricType metric) : Index(d, metric), nsg(R) {
     nndescent_L = GK + 50;
-    nndescent_iter = 10;
 }
 IndexNSG::IndexNSG(Index* storage, int R)
         : Index(storage->d, storage->metric_type),
           nsg(R),
-          own_fields(false),
           storage(storage),
-          is_built(false),
-          GK(64),
           build_type(1) {
-    nndescent_S = 10;
-    nndescent_R = 100;
     nndescent_L = GK + 50;
-    nndescent_iter = 10;
 }
 IndexNSG::~IndexNSG() {
@@ -95,8 +79,8 @@ void IndexNSG::search(
         {
             VisitedTable vt(ntotal);
-            DistanceComputer* dis = storage_distance_computer(storage);
-            ScopeDeleter1<DistanceComputer> del(dis);
+            std::unique_ptr<DistanceComputer> dis(
+                    storage_distance_computer(storage));
 #pragma omp for
             for (idx_t i = i0; i < i1; i++) {
@@ -120,7 +104,7 @@ void IndexNSG::search(
     }
 }
-void IndexNSG::build(idx_t n, const float* x, idx_t* knn_graph, int GK) {
+void IndexNSG::build(idx_t n, const float* x, idx_t* knn_graph, int GK_2) {
     FAISS_THROW_IF_NOT_MSG(
             storage,
             "Please use IndexNSGFlat (or variants) instead of IndexNSG directly");
@@ -131,9 +115,9 @@ void IndexNSG::build(idx_t n, const float* x, idx_t* knn_graph, int GK) {
     ntotal = storage->ntotal;
     // check the knn graph
-    check_knn_graph(knn_graph, n, GK);
+    check_knn_graph(knn_graph, n, GK_2);
-    const nsg::Graph<idx_t> knng(knn_graph, n, GK);
+    const nsg::Graph<idx_t> knng(knn_graph, n, GK_2);
     nsg.build(storage, n, knng, verbose);
     is_built = true;
 }
@@ -302,10 +286,10 @@ IndexNSGFlat::IndexNSGFlat(int d, int R, MetricType metric)
  * IndexNSGPQ implementation
  **************************************************************/
-IndexNSGPQ::IndexNSGPQ() {}
+IndexNSGPQ::IndexNSGPQ() = default;
-IndexNSGPQ::IndexNSGPQ(int d, int pq_m, int M)
-        : IndexNSG(new IndexPQ(d, pq_m, 8), M) {
+IndexNSGPQ::IndexNSGPQ(int d, int pq_m, int M, int pq_nbits)
+        : IndexNSG(new IndexPQ(d, pq_m, pq_nbits), M) {
     own_fields = true;
     is_trained = false;
 }
@@ -325,10 +309,10 @@ IndexNSGSQ::IndexNSGSQ(
         int M,
         MetricType metric)
         : IndexNSG(new IndexScalarQuantizer(d, qtype, metric), M) {
-    is_trained = false;
+    is_trained = this->storage->is_trained;
     own_fields = true;
 }
-IndexNSGSQ::IndexNSGSQ() {}
+IndexNSGSQ::IndexNSGSQ() = default;
 } // namespace faiss