RubyGems - faiss - Versions diffs - 0.4.1 → 0.4.2 - Mend

faiss 0.4.1 → 0.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (106) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +4 -0
data/lib/faiss/version.rb +1 -1
data/vendor/faiss/faiss/AutoTune.cpp +39 -29
data/vendor/faiss/faiss/Clustering.cpp +4 -2
data/vendor/faiss/faiss/IVFlib.cpp +14 -7
data/vendor/faiss/faiss/Index.h +72 -3
data/vendor/faiss/faiss/Index2Layer.cpp +2 -4
data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +0 -1
data/vendor/faiss/faiss/IndexAdditiveQuantizer.h +1 -0
data/vendor/faiss/faiss/IndexBinary.h +46 -3
data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +118 -4
data/vendor/faiss/faiss/IndexBinaryHNSW.h +41 -0
data/vendor/faiss/faiss/IndexBinaryHash.cpp +0 -1
data/vendor/faiss/faiss/IndexBinaryIVF.cpp +18 -7
data/vendor/faiss/faiss/IndexBinaryIVF.h +5 -1
data/vendor/faiss/faiss/IndexFlat.cpp +6 -4
data/vendor/faiss/faiss/IndexHNSW.cpp +65 -24
data/vendor/faiss/faiss/IndexHNSW.h +10 -1
data/vendor/faiss/faiss/IndexIDMap.cpp +96 -18
data/vendor/faiss/faiss/IndexIDMap.h +20 -0
data/vendor/faiss/faiss/IndexIVF.cpp +28 -10
data/vendor/faiss/faiss/IndexIVF.h +16 -1
data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +84 -16
data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +18 -6
data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +33 -21
data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +16 -6
data/vendor/faiss/faiss/IndexIVFFastScan.cpp +24 -15
data/vendor/faiss/faiss/IndexIVFFastScan.h +4 -2
data/vendor/faiss/faiss/IndexIVFFlat.cpp +59 -43
data/vendor/faiss/faiss/IndexIVFFlat.h +10 -2
data/vendor/faiss/faiss/IndexIVFPQ.cpp +16 -3
data/vendor/faiss/faiss/IndexIVFPQ.h +8 -1
data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +14 -6
data/vendor/faiss/faiss/IndexIVFPQFastScan.h +2 -1
data/vendor/faiss/faiss/IndexIVFPQR.cpp +14 -4
data/vendor/faiss/faiss/IndexIVFPQR.h +2 -1
data/vendor/faiss/faiss/IndexIVFRaBitQ.cpp +28 -3
data/vendor/faiss/faiss/IndexIVFRaBitQ.h +8 -1
data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +9 -2
data/vendor/faiss/faiss/IndexIVFSpectralHash.h +2 -1
data/vendor/faiss/faiss/IndexLattice.cpp +8 -4
data/vendor/faiss/faiss/IndexNNDescent.cpp +0 -7
data/vendor/faiss/faiss/IndexNSG.cpp +3 -3
data/vendor/faiss/faiss/IndexPQ.cpp +0 -1
data/vendor/faiss/faiss/IndexPQ.h +1 -0
data/vendor/faiss/faiss/IndexPQFastScan.cpp +0 -2
data/vendor/faiss/faiss/IndexPreTransform.cpp +4 -2
data/vendor/faiss/faiss/IndexRefine.cpp +11 -6
data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +16 -4
data/vendor/faiss/faiss/IndexScalarQuantizer.h +10 -3
data/vendor/faiss/faiss/IndexShards.cpp +7 -6
data/vendor/faiss/faiss/MatrixStats.cpp +16 -8
data/vendor/faiss/faiss/MetaIndexes.cpp +12 -6
data/vendor/faiss/faiss/MetricType.h +5 -3
data/vendor/faiss/faiss/clone_index.cpp +2 -4
data/vendor/faiss/faiss/cppcontrib/factory_tools.cpp +6 -0
data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +9 -4
data/vendor/faiss/faiss/gpu/GpuCloner.cpp +32 -10
data/vendor/faiss/faiss/gpu/GpuIndex.h +88 -0
data/vendor/faiss/faiss/gpu/GpuIndexBinaryCagra.h +125 -0
data/vendor/faiss/faiss/gpu/GpuIndexCagra.h +39 -4
data/vendor/faiss/faiss/gpu/impl/IndexUtils.h +3 -3
data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +1 -1
data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +3 -2
data/vendor/faiss/faiss/gpu/utils/CuvsFilterConvert.h +41 -0
data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +6 -3
data/vendor/faiss/faiss/impl/HNSW.cpp +34 -19
data/vendor/faiss/faiss/impl/IDSelector.cpp +2 -1
data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +2 -3
data/vendor/faiss/faiss/impl/NNDescent.cpp +17 -9
data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +42 -21
data/vendor/faiss/faiss/impl/RaBitQuantizer.cpp +6 -24
data/vendor/faiss/faiss/impl/ResultHandler.h +56 -47
data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +28 -15
data/vendor/faiss/faiss/impl/index_read.cpp +36 -11
data/vendor/faiss/faiss/impl/index_write.cpp +19 -6
data/vendor/faiss/faiss/impl/io.cpp +9 -5
data/vendor/faiss/faiss/impl/lattice_Zn.cpp +18 -11
data/vendor/faiss/faiss/impl/mapped_io.cpp +4 -7
data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +0 -1
data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +0 -1
data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.cpp +6 -6
data/vendor/faiss/faiss/impl/zerocopy_io.cpp +1 -1
data/vendor/faiss/faiss/impl/zerocopy_io.h +2 -2
data/vendor/faiss/faiss/index_factory.cpp +49 -33
data/vendor/faiss/faiss/index_factory.h +8 -2
data/vendor/faiss/faiss/index_io.h +0 -3
data/vendor/faiss/faiss/invlists/DirectMap.cpp +2 -1
data/vendor/faiss/faiss/invlists/InvertedLists.cpp +12 -6
data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +8 -4
data/vendor/faiss/faiss/utils/Heap.cpp +15 -8
data/vendor/faiss/faiss/utils/Heap.h +23 -12
data/vendor/faiss/faiss/utils/distances.cpp +42 -21
data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +2 -2
data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +1 -1
data/vendor/faiss/faiss/utils/distances_simd.cpp +5 -3
data/vendor/faiss/faiss/utils/extra_distances-inl.h +27 -4
data/vendor/faiss/faiss/utils/extra_distances.cpp +8 -4
data/vendor/faiss/faiss/utils/hamming.cpp +20 -10
data/vendor/faiss/faiss/utils/partitioning.cpp +8 -4
data/vendor/faiss/faiss/utils/quantize_lut.cpp +17 -9
data/vendor/faiss/faiss/utils/rabitq_simd.h +539 -0
data/vendor/faiss/faiss/utils/random.cpp +14 -7
data/vendor/faiss/faiss/utils/utils.cpp +0 -3
metadata +5 -2

data/vendor/faiss/faiss/impl/ResultHandler.h CHANGED Viewed

@@ -79,6 +79,33 @@ struct ResultHandler {
     virtual ~ResultHandler() {}
 };
+/*****************************************************************
+ * Common ancestor for top-k search results.
+ *****************************************************************/
+template <class C, bool use_sel = false>
+struct TopkBlockResultHandler : BlockResultHandler<C, use_sel> {
+    using T = typename C::T;
+    using TI = typename C::TI;
+    T* dis_tab;
+    TI* ids_tab;
+    int64_t k; // number of results to keep
+    TopkBlockResultHandler(
+            size_t nq,
+            T* dis_tab,
+            TI* ids_tab,
+            size_t k,
+            const IDSelector* sel = nullptr)
+            : BlockResultHandler<C, use_sel>(nq, sel),
+              dis_tab(dis_tab),
+              ids_tab(ids_tab),
+              k(k) {}
+    ~TopkBlockResultHandler() {}
+};
 /*****************************************************************
  * Single best result handler.
  * Tracks the only best result, thus avoiding storing
@@ -86,25 +113,19 @@ struct ResultHandler {
  *****************************************************************/
 template <class C, bool use_sel = false>
-struct Top1BlockResultHandler : BlockResultHandler<C, use_sel> {
+struct Top1BlockResultHandler : TopkBlockResultHandler<C, use_sel> {
     using T = typename C::T;
     using TI = typename C::TI;
     using BlockResultHandler<C, use_sel>::i0;
     using BlockResultHandler<C, use_sel>::i1;
-    // contains exactly nq elements
-    T* dis_tab;
-    // contains exactly nq elements
-    TI* ids_tab;
     Top1BlockResultHandler(
             size_t nq,
             T* dis_tab,
             TI* ids_tab,
             const IDSelector* sel = nullptr)
-            : BlockResultHandler<C, use_sel>(nq, sel),
-              dis_tab(dis_tab),
-              ids_tab(ids_tab) {}
+            : TopkBlockResultHandler<C, use_sel>(nq, dis_tab, ids_tab, 1, sel) {
+    }
     struct SingleResultHandler : ResultHandler<C> {
         Top1BlockResultHandler& hr;
@@ -184,28 +205,21 @@ struct Top1BlockResultHandler : BlockResultHandler<C, use_sel> {
  *****************************************************************/
 template <class C, bool use_sel = false>
-struct HeapBlockResultHandler : BlockResultHandler<C, use_sel> {
+struct HeapBlockResultHandler : TopkBlockResultHandler<C, use_sel> {
     using T = typename C::T;
     using TI = typename C::TI;
     using BlockResultHandler<C, use_sel>::i0;
     using BlockResultHandler<C, use_sel>::i1;
-    T* heap_dis_tab;
-    TI* heap_ids_tab;
-    int64_t k; // number of results to keep
+    using TopkBlockResultHandler<C, use_sel>::k;
     HeapBlockResultHandler(
             size_t nq,
-            T* heap_dis_tab,
-            TI* heap_ids_tab,
+            T* dis_tab,
+            TI* ids_tab,
             size_t k,
             const IDSelector* sel = nullptr)
-            : BlockResultHandler<C, use_sel>(nq, sel),
-              heap_dis_tab(heap_dis_tab),
-              heap_ids_tab(heap_ids_tab),
-              k(k) {}
+            : TopkBlockResultHandler<C, use_sel>(nq, dis_tab, ids_tab, k, sel) {
+    }
     /******************************************************
      * API for 1 result at a time (each SingleResultHandler is
      * called from 1 thread)
@@ -224,8 +238,8 @@ struct HeapBlockResultHandler : BlockResultHandler<C, use_sel> {
         /// begin results for query # i
         void begin(size_t i) {
-            heap_dis = hr.heap_dis_tab + i * k;
-            heap_ids = hr.heap_ids_tab + i * k;
+            heap_dis = hr.dis_tab + i * k;
+            heap_ids = hr.ids_tab + i * k;
             heap_heapify<C>(k, heap_dis, heap_ids);
             threshold = heap_dis[0];
         }
@@ -255,7 +269,8 @@ struct HeapBlockResultHandler : BlockResultHandler<C, use_sel> {
         this->i0 = i0_2;
         this->i1 = i1_2;
         for (size_t i = i0; i < i1; i++) {
-            heap_heapify<C>(k, heap_dis_tab + i * k, heap_ids_tab + i * k);
+            heap_heapify<C>(
+                    k, this->dis_tab + i * this->k, this->ids_tab + i * k);
         }
     }
@@ -263,8 +278,8 @@ struct HeapBlockResultHandler : BlockResultHandler<C, use_sel> {
     void add_results(size_t j0, size_t j1, const T* dis_tab) final {
 #pragma omp parallel for
         for (int64_t i = i0; i < i1; i++) {
-            T* heap_dis = heap_dis_tab + i * k;
-            TI* heap_ids = heap_ids_tab + i * k;
+            T* heap_dis = this->dis_tab + i * k;
+            TI* heap_ids = this->ids_tab + i * k;
             const T* dis_tab_i = dis_tab + (j1 - j0) * (i - i0) - j0;
             T thresh = heap_dis[0];
             for (size_t j = j0; j < j1; j++) {
@@ -281,7 +296,7 @@ struct HeapBlockResultHandler : BlockResultHandler<C, use_sel> {
     void end_multiple() final {
         // maybe parallel for
         for (size_t i = i0; i < i1; i++) {
-            heap_reorder<C>(k, heap_dis_tab + i * k, heap_ids_tab + i * k);
+            heap_reorder<C>(k, this->dis_tab + i * k, this->ids_tab + i * k);
         }
     }
 };
@@ -290,9 +305,9 @@ struct HeapBlockResultHandler : BlockResultHandler<C, use_sel> {
  * Reservoir result handler
  *
  * A reservoir is a result array of size capacity > n (number of requested
- * results) all results below a threshold are stored in an arbitrary order. When
- * the capacity is reached, a new threshold is chosen by partitionning the
- * distance array.
+ * results) all results below a threshold are stored in an arbitrary order.
+ *When the capacity is reached, a new threshold is chosen by partitionning
+ *the distance array.
  *****************************************************************/
 /// Reservoir for a single query
@@ -367,28 +382,21 @@ struct ReservoirTopN : ResultHandler<C> {
 };
 template <class C, bool use_sel = false>
-struct ReservoirBlockResultHandler : BlockResultHandler<C, use_sel> {
+struct ReservoirBlockResultHandler : TopkBlockResultHandler<C, use_sel> {
     using T = typename C::T;
     using TI = typename C::TI;
     using BlockResultHandler<C, use_sel>::i0;
     using BlockResultHandler<C, use_sel>::i1;
-    T* heap_dis_tab;
-    TI* heap_ids_tab;
-    int64_t k;       // number of results to keep
     size_t capacity; // capacity of the reservoirs
     ReservoirBlockResultHandler(
             size_t nq,
-            T* heap_dis_tab,
-            TI* heap_ids_tab,
+            T* dis_tab,
+            TI* ids_tab,
             size_t k,
             const IDSelector* sel = nullptr)
-            : BlockResultHandler<C, use_sel>(nq, sel),
-              heap_dis_tab(heap_dis_tab),
-              heap_ids_tab(heap_ids_tab),
-              k(k) {
+            : TopkBlockResultHandler<C, use_sel>(nq, dis_tab, ids_tab, k, sel) {
         // double then round up to multiple of 16 (for SIMD alignment)
         capacity = (2 * k + 15) & ~15;
     }
@@ -423,8 +431,8 @@ struct ReservoirBlockResultHandler : BlockResultHandler<C, use_sel> {
         /// series of results for query qno is done
         void end() {
-            T* heap_dis = hr.heap_dis_tab + qno * hr.k;
-            TI* heap_ids = hr.heap_ids_tab + qno * hr.k;
+            T* heap_dis = hr.dis_tab + qno * hr.k;
+            TI* heap_ids = hr.ids_tab + qno * hr.k;
             this->to_result(heap_dis, heap_ids);
         }
     };
@@ -446,7 +454,7 @@ struct ReservoirBlockResultHandler : BlockResultHandler<C, use_sel> {
         reservoirs.clear();
         for (size_t i = i0_2; i < i1_2; i++) {
             reservoirs.emplace_back(
-                    k,
+                    this->k,
                     capacity,
                     reservoir_dis.data() + (i - i0_2) * capacity,
                     reservoir_ids.data() + (i - i0_2) * capacity);
@@ -471,7 +479,7 @@ struct ReservoirBlockResultHandler : BlockResultHandler<C, use_sel> {
         // maybe parallel for
         for (size_t i = i0; i < i1; i++) {
             reservoirs[i - i0].to_result(
-                    heap_dis_tab + i * k, heap_ids_tab + i * k);
+                    this->dis_tab + i * this->k, this->ids_tab + i * this->k);
         }
     }
 };
@@ -535,7 +543,8 @@ struct RangeSearchBlockResultHandler : BlockResultHandler<C, use_sel> {
                 // finalize the partial result
                 pres.finalize();
             } catch ([[maybe_unused]] const faiss::FaissException& e) {
-                // Do nothing if allocation fails in finalizing partial results.
+                // Do nothing if allocation fails in finalizing partial
+                // results.
 #ifndef NDEBUG
                 std::cerr << e.what() << std::endl;
 #endif

data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp CHANGED Viewed

@@ -13,7 +13,6 @@
 #include <cstdio>
 #include <faiss/impl/platform_macros.h>
-#include <omp.h>
 #ifdef __SSE__
 #include <immintrin.h>
@@ -985,10 +984,12 @@ void train_Uniform(
         vmin = HUGE_VAL;
         vmax = -HUGE_VAL;
         for (size_t i = 0; i < n; i++) {
-            if (x[i] < vmin)
+            if (x[i] < vmin) {
                 vmin = x[i];
-            if (x[i] > vmax)
+            }
+            if (x[i] > vmax) {
                 vmax = x[i];
+            }
         }
         float vexp = (vmax - vmin) * rs_arg;
         vmin -= vexp;
@@ -1011,10 +1012,12 @@ void train_Uniform(
         // TODO just do a quickselect
         std::sort(x_copy.begin(), x_copy.end());
         int o = int(rs_arg * n);
-        if (o < 0)
+        if (o < 0) {
             o = 0;
-        if (o > n - o)
+        }
+        if (o > n - o) {
             o = n / 2;
+        }
         vmin = x_copy[o];
         vmax = x_copy[n - 1 - o];
@@ -1024,10 +1027,12 @@ void train_Uniform(
         {
             vmin = HUGE_VAL, vmax = -HUGE_VAL;
             for (size_t i = 0; i < n; i++) {
-                if (x[i] < vmin)
+                if (x[i] < vmin) {
                     vmin = x[i];
-                if (x[i] > vmax)
+                }
+                if (x[i] > vmax) {
                     vmax = x[i];
+                }
                 sx += x[i];
             }
             b = vmin;
@@ -1043,10 +1048,12 @@ void train_Uniform(
             for (idx_t i = 0; i < n; i++) {
                 float xi = x[i];
                 float ni = floor((xi - b) / a + 0.5);
-                if (ni < 0)
+                if (ni < 0) {
                     ni = 0;
-                if (ni >= k)
+                }
+                if (ni >= k) {
                     ni = k - 1;
+                }
                 err1 += sqr(xi - (ni * a + b));
                 sn += ni;
                 sn2 += ni * ni;
@@ -1055,8 +1062,9 @@ void train_Uniform(
             if (err1 == last_err) {
                 iter_last_err++;
-                if (iter_last_err == 16)
+                if (iter_last_err == 16) {
                     break;
+                }
             } else {
                 last_err = err1;
                 iter_last_err = 0;
@@ -1071,8 +1079,9 @@ void train_Uniform(
                 fflush(stdout);
             }
         }
-        if (verbose)
+        if (verbose) {
             printf("\n");
+        }
         vmin = b;
         vmax = b + a * (k - 1);
@@ -1100,10 +1109,12 @@ void train_NonUniform(
         for (size_t i = 1; i < n; i++) {
             const float* xi = x + i * d;
             for (size_t j = 0; j < d; j++) {
-                if (xi[j] < vmin[j])
+                if (xi[j] < vmin[j]) {
                     vmin[j] = xi[j];
-                if (xi[j] > vmax[j])
+                }
+                if (xi[j] > vmax[j]) {
                     vmax[j] = xi[j];
+                }
             }
         }
         float* vdiff = vmax;
@@ -2066,16 +2077,18 @@ void ScalarQuantizer::compute_codes(const float* x, uint8_t* codes, size_t n)
     memset(codes, 0, code_size * n);
 #pragma omp parallel for
-    for (int64_t i = 0; i < n; i++)
+    for (int64_t i = 0; i < n; i++) {
         squant->encode_vector(x + i * d, codes + i * code_size);
+    }
 }
 void ScalarQuantizer::decode(const uint8_t* codes, float* x, size_t n) const {
     std::unique_ptr<SQuantizer> squant(select_quantizer());
 #pragma omp parallel for
-    for (int64_t i = 0; i < n; i++)
+    for (int64_t i = 0; i < n; i++) {
         squant->decode_vector(codes + i * code_size, x + i * d);
+    }
 }
 SQDistanceComputer* ScalarQuantizer::get_distance_computer(

data/vendor/faiss/faiss/impl/index_read.cpp CHANGED Viewed

@@ -16,7 +16,6 @@
 #include <faiss/impl/FaissAssert.h>
 #include <faiss/impl/io.h>
-#include <faiss/impl/io_macros.h>
 #include <faiss/utils/hamming.h>
 #include <faiss/invlists/InvertedListsIOHook.h>
@@ -1100,24 +1099,37 @@ Index* read_index(IOReader* f, int io_flags) {
         idx = idxp;
     } else if (
             h == fourcc("IHNf") || h == fourcc("IHNp") || h == fourcc("IHNs") ||
-            h == fourcc("IHN2") || h == fourcc("IHNc")) {
+            h == fourcc("IHN2") || h == fourcc("IHNc") || h == fourcc("IHc2")) {
         IndexHNSW* idxhnsw = nullptr;
-        if (h == fourcc("IHNf"))
+        if (h == fourcc("IHNf")) {
             idxhnsw = new IndexHNSWFlat();
-        if (h == fourcc("IHNp"))
+        }
+        if (h == fourcc("IHNp")) {
             idxhnsw = new IndexHNSWPQ();
-        if (h == fourcc("IHNs"))
+        }
+        if (h == fourcc("IHNs")) {
             idxhnsw = new IndexHNSWSQ();
-        if (h == fourcc("IHN2"))
+        }
+        if (h == fourcc("IHN2")) {
             idxhnsw = new IndexHNSW2Level();
-        if (h == fourcc("IHNc"))
+        }
+        if (h == fourcc("IHNc")) {
+            idxhnsw = new IndexHNSWCagra();
+        }
+        if (h == fourcc("IHc2")) {
             idxhnsw = new IndexHNSWCagra();
+        }
         read_index_header(idxhnsw, f);
-        if (h == fourcc("IHNc")) {
+        if (h == fourcc("IHNc") || h == fourcc("IHc2")) {
             READ1(idxhnsw->keep_max_size_level0);
             auto idx_hnsw_cagra = dynamic_cast<IndexHNSWCagra*>(idxhnsw);
             READ1(idx_hnsw_cagra->base_level_only);
             READ1(idx_hnsw_cagra->num_base_level_search_entrypoints);
+            if (h == fourcc("IHc2")) {
+                READ1(idx_hnsw_cagra->numeric_type_);
+            } else { // cagra before numeric_type_ was introduced
+                idx_hnsw_cagra->set_numeric_type(faiss::Float32);
+            }
         }
         read_HNSW(&idxhnsw->hnsw, f);
         idxhnsw->storage = read_index(f, io_flags);
@@ -1129,12 +1141,15 @@ Index* read_index(IOReader* f, int io_flags) {
     } else if (
             h == fourcc("INSf") || h == fourcc("INSp") || h == fourcc("INSs")) {
         IndexNSG* idxnsg;
-        if (h == fourcc("INSf"))
+        if (h == fourcc("INSf")) {
             idxnsg = new IndexNSGFlat();
-        if (h == fourcc("INSp"))
+        }
+        if (h == fourcc("INSp")) {
             idxnsg = new IndexNSGPQ();
-        if (h == fourcc("INSs"))
+        }
+        if (h == fourcc("INSs")) {
             idxnsg = new IndexNSGSQ();
+        }
         read_index_header(idxnsg, f);
         READ1(idxnsg->GK);
         READ1(idxnsg->build_type);
@@ -1384,6 +1399,16 @@ IndexBinary* read_index_binary(IOReader* f, int io_flags) {
         idxhnsw->storage = read_index_binary(f, io_flags);
         idxhnsw->own_fields = true;
         idx = idxhnsw;
+    } else if (h == fourcc("IBHc")) {
+        IndexBinaryHNSWCagra* idxhnsw = new IndexBinaryHNSWCagra();
+        read_index_binary_header(idxhnsw, f);
+        READ1(idxhnsw->keep_max_size_level0);
+        READ1(idxhnsw->base_level_only);
+        READ1(idxhnsw->num_base_level_search_entrypoints);
+        read_HNSW(&idxhnsw->hnsw, f);
+        idxhnsw->storage = read_index_binary(f, io_flags);
+        idxhnsw->own_fields = true;
+        idx = idxhnsw;
     } else if (h == fourcc("IBMp") || h == fourcc("IBM2")) {
         bool is_map2 = h == fourcc("IBM2");
         IndexBinaryIDMap* idxmap =

data/vendor/faiss/faiss/impl/index_write.cpp CHANGED Viewed

@@ -16,7 +16,6 @@
 #include <faiss/invlists/InvertedListsIOHook.h>
 #include <faiss/impl/FaissAssert.h>
-#include <faiss/impl/io_macros.h>
 #include <faiss/utils/hamming.h>
 #include <faiss/Index2Layer.h>
@@ -255,8 +254,9 @@ void write_InvertedLists(const InvertedLists* ils, IOWriter* f) {
         // here we store either as a full or a sparse data buffer
         size_t n_non0 = 0;
         for (size_t i = 0; i < ails->nlist; i++) {
-            if (ails->ids[i].size() > 0)
+            if (ails->ids[i].size() > 0) {
                 n_non0++;
+            }
         }
         if (n_non0 > ails->nlist / 2) {
             uint32_t list_type = fourcc("full");
@@ -739,8 +739,9 @@ void write_index(const Index* idx, IOWriter* f, int io_flags) {
         write_index_header(ixpt, f);
         int nt = ixpt->chain.size();
         WRITE1(nt);
-        for (int i = 0; i < nt; i++)
+        for (int i = 0; i < nt; i++) {
             write_VectorTransform(ixpt->chain[i], f);
+        }
         write_index(ixpt->index, f);
     } else if (
             const MultiIndexQuantizer* imiq =
@@ -771,16 +772,17 @@ void write_index(const Index* idx, IOWriter* f, int io_flags) {
                 : dynamic_cast<const IndexHNSWPQ*>(idx)      ? fourcc("IHNp")
                 : dynamic_cast<const IndexHNSWSQ*>(idx)      ? fourcc("IHNs")
                 : dynamic_cast<const IndexHNSW2Level*>(idx)  ? fourcc("IHN2")
-                : dynamic_cast<const IndexHNSWCagra*>(idx)   ? fourcc("IHNc")
+                : dynamic_cast<const IndexHNSWCagra*>(idx)   ? fourcc("IHc2")
                                                              : 0;
         FAISS_THROW_IF_NOT(h != 0);
         WRITE1(h);
         write_index_header(idxhnsw, f);
-        if (h == fourcc("IHNc")) {
+        if (h == fourcc("IHc2")) {
             WRITE1(idxhnsw->keep_max_size_level0);
             auto idx_hnsw_cagra = dynamic_cast<const IndexHNSWCagra*>(idxhnsw);
             WRITE1(idx_hnsw_cagra->base_level_only);
             WRITE1(idx_hnsw_cagra->num_base_level_search_entrypoints);
+            WRITE1(idx_hnsw_cagra->numeric_type_);
         }
         write_HNSW(&idxhnsw->hnsw, f);
         if (io_flags & IO_FLAG_SKIP_STORAGE) {
@@ -1004,9 +1006,20 @@ void write_index_binary(const IndexBinary* idx, IOWriter* f) {
     } else if (
             const IndexBinaryHNSW* idxhnsw =
                     dynamic_cast<const IndexBinaryHNSW*>(idx)) {
-        uint32_t h = fourcc("IBHf");
+        // Determine which type of binary HNSW index this is
+        uint32_t h = dynamic_cast<const IndexBinaryHNSWCagra*>(idx)
+                ? fourcc("IBHc")
+                : fourcc("IBHf");
         WRITE1(h);
         write_index_binary_header(idxhnsw, f);
+        if (h == fourcc("IBHc")) {
+            auto idxcagra = dynamic_cast<const IndexBinaryHNSWCagra*>(idxhnsw);
+            WRITE1(idxcagra->keep_max_size_level0);
+            WRITE1(idxcagra->base_level_only);
+            WRITE1(idxcagra->num_base_level_search_entrypoints);
+        }
         write_HNSW(&idxhnsw->hnsw, f);
         write_index_binary(idxhnsw->storage, f);
     } else if (

data/vendor/faiss/faiss/impl/io.cpp CHANGED Viewed

@@ -43,11 +43,13 @@ size_t VectorIOWriter::operator()(const void* ptr, size_t size, size_t nitems) {
 }
 size_t VectorIOReader::operator()(void* ptr, size_t size, size_t nitems) {
-    if (rp >= data.size())
+    if (rp >= data.size()) {
         return 0;
+    }
     size_t nremain = (data.size() - rp) / size;
-    if (nremain < nitems)
+    if (nremain < nitems) {
         nitems = nremain;
+    }
     if (size * nitems > 0) {
         memcpy(ptr, &data[rp], size * nitems);
         rp += size * nitems;
@@ -143,8 +145,9 @@ BufferedIOReader::BufferedIOReader(IOReader* reader, size_t bsz)
 size_t BufferedIOReader::operator()(void* ptr, size_t unitsize, size_t nitems) {
     size_t size = unitsize * nitems;
-    if (size == 0)
+    if (size == 0) {
         return 0;
+    }
     char* dst = (char*)ptr;
     size_t nb;
@@ -189,8 +192,9 @@ size_t BufferedIOWriter::operator()(
         size_t unitsize,
         size_t nitems) {
     size_t size = unitsize * nitems;
-    if (size == 0)
+    if (size == 0) {
         return 0;
+    }
     const char* src = (const char*)ptr;
     size_t nb;
@@ -260,7 +264,7 @@ std::string fourcc_inv(uint32_t x) {
 std::string fourcc_inv_printable(uint32_t x) {
     char cstr[5];
     fourcc_inv(x, cstr);
-    std::string str = "";
+    std::string str;
     for (int i = 0; i < 4; i++) {
         uint8_t c = cstr[i];
         if (32 <= c && c < 127) {

data/vendor/faiss/faiss/impl/lattice_Zn.cpp CHANGED Viewed

@@ -18,7 +18,6 @@
 #include <queue>
 #include <unordered_set>
-#include <faiss/impl/platform_macros.h>
 #include <faiss/utils/distances.h>
 namespace faiss {
@@ -53,8 +52,9 @@ struct Comb {
     uint64_t operator()(int n, int p) const {
         assert(n < nmax && p < nmax);
-        if (p > n)
+        if (p > n) {
             return 0;
+        }
         return tab[n * nmax + p];
     }
 };
@@ -66,8 +66,9 @@ point_list_t sum_of_sq(float total, int v, int n, float add = 0) {
     if (total < 0) {
         return point_list_t();
     } else if (n == 1) {
-        while (sqr(v + add) > total)
+        while (sqr(v + add) > total) {
             v--;
+        }
         if (sqr(v + add) == total) {
             return point_list_t(1, v + add);
         } else {
@@ -118,8 +119,9 @@ uint64_t repeats_encode_64(
                 code_comb += comb(rank, occ + 1);
                 occ++;
                 coded |= uint64_t{1} << i;
-                if (occ == r->n)
+                if (occ == r->n) {
                     break;
+                }
             }
             rank++;
         }
@@ -155,8 +157,9 @@ void repeats_decode_64(
                 decoded |= uint64_t{1} << i;
                 c[i] = r->val;
                 occ++;
-                if (occ == r->n)
+                if (occ == r->n) {
                     break;
+                }
                 next_rank = decode_comb_1(&code_comb, r->n - occ, next_rank);
             }
         }
@@ -210,8 +213,9 @@ uint64_t Repeats::encode(const float* c) const {
                     code_comb += comb(rank, occ + 1);
                     occ++;
                     coded[i] = true;
-                    if (occ == r->n)
+                    if (occ == r->n) {
                         break;
+                    }
                 }
                 rank++;
             }
@@ -247,8 +251,9 @@ void Repeats::decode(uint64_t code, float* c) const {
                     decoded[i] = true;
                     c[i] = r->val;
                     occ++;
-                    if (occ == r->n)
+                    if (occ == r->n) {
                         break;
+                    }
                     next_rank =
                             decode_comb_1(&code_comb, r->n - occ, next_rank);
                 }
@@ -440,10 +445,11 @@ void ZnSphereCodec::decode(uint64_t code, float* c) const {
     int i0 = 0, i1 = natom;
     while (i0 + 1 < i1) {
         int imed = (i0 + i1) / 2;
-        if (code_segments[imed].c0 <= code)
+        if (code_segments[imed].c0 <= code) {
             i0 = imed;
-        else
+        } else {
             i1 = imed;
+        }
     }
     const CodeSegment& cs = code_segments[i0];
     code -= cs.c0;
@@ -592,10 +598,11 @@ void ZnSphereCodecRec::decode(uint64_t code, float* c) const {
                     &all_nv_cum[(ld * (r2 + 1) + r2sub) * (r2 + 1)];
             while (i1 > i0 + 1) {
                 int imed = (i0 + i1) / 2;
-                if (cum[imed] <= codei)
+                if (cum[imed] <= codei) {
                     i0 = imed;
-                else
+                } else {
                     i1 = imed;
+                }
             }
             int r2a = i0, r2b = r2sub - i0;
             codei -= cum[r2a];