RubyGems - faiss - Versions diffs - 0.6.1 → 0.6.2 - Mend

faiss 0.6.1 → 0.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (93) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +4 -0
data/lib/faiss/version.rb +1 -1
data/vendor/faiss/faiss/Index.h +1 -1
data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +6 -7
data/vendor/faiss/faiss/IndexBinaryIVF.cpp +3 -3
data/vendor/faiss/faiss/IndexHNSW.cpp +173 -143
data/vendor/faiss/faiss/IndexIVF.cpp +2 -2
data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +2 -2
data/vendor/faiss/faiss/IndexIVFFlat.cpp +3 -1
data/vendor/faiss/faiss/IndexIVFFlatPanorama.cpp +3 -3
data/vendor/faiss/faiss/IndexIVFPQ.cpp +2 -3
data/vendor/faiss/faiss/IndexIVFPQR.cpp +2 -3
data/vendor/faiss/faiss/IndexIVFRaBitQ.cpp +4 -13
data/vendor/faiss/faiss/IndexNNDescent.cpp +1 -1
data/vendor/faiss/faiss/IndexNSG.cpp +1 -2
data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +68 -6
data/vendor/faiss/faiss/IndexScalarQuantizer.h +10 -0
data/vendor/faiss/faiss/cppcontrib/SaDecodeKernels.h +1 -1
data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-neon-inl.h +902 -12
data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-neon-inl.h +702 -10
data/vendor/faiss/faiss/factory_tools.cpp +4 -0
data/vendor/faiss/faiss/gpu/GpuResources.h +3 -2
data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +11 -12
data/vendor/faiss/faiss/gpu/StandardGpuResources.h +3 -3
data/vendor/faiss/faiss/gpu_metal/MetalDistance.h +87 -0
data/vendor/faiss/faiss/gpu_metal/MetalIndex.h +7 -0
data/vendor/faiss/faiss/gpu_metal/MetalIndexIVFFlat.h +181 -0
data/vendor/faiss/faiss/gpu_metal/MetalKernels.h +48 -3
data/vendor/faiss/faiss/gpu_metal/MetalPythonBridge.h +45 -0
data/vendor/faiss/faiss/gpu_metal/impl/MetalIVFFlat.h +193 -0
data/vendor/faiss/faiss/impl/HNSW.cpp +556 -199
data/vendor/faiss/faiss/impl/HNSW.h +51 -13
data/vendor/faiss/faiss/impl/NSG.cpp +15 -11
data/vendor/faiss/faiss/impl/Panorama.h +11 -0
data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +25 -2
data/vendor/faiss/faiss/impl/RaBitQUtils.cpp +1 -1
data/vendor/faiss/faiss/impl/RaBitQuantizer.cpp +7 -1
data/vendor/faiss/faiss/impl/ResultHandler.h +1 -0
data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +271 -8
data/vendor/faiss/faiss/impl/ScalarQuantizer.h +50 -0
data/vendor/faiss/faiss/impl/VisitedTable.cpp +10 -10
data/vendor/faiss/faiss/impl/VisitedTable.h +69 -34
data/vendor/faiss/faiss/impl/fast_scan/dispatching.h +3 -1
data/vendor/faiss/faiss/impl/hnsw/MinimaxHeap.cpp +35 -43
data/vendor/faiss/faiss/impl/hnsw/MinimaxHeap.h +64 -15
data/vendor/faiss/faiss/impl/hnsw/avx2.cpp +86 -40
data/vendor/faiss/faiss/impl/hnsw/avx512.cpp +81 -50
data/vendor/faiss/faiss/impl/index_read.cpp +100 -39
data/vendor/faiss/faiss/impl/index_write.cpp +1 -0
data/vendor/faiss/faiss/impl/io_macros.h +25 -0
data/vendor/faiss/faiss/impl/platform_macros.h +12 -8
data/vendor/faiss/faiss/impl/pq_code_distance/avx2.cpp +2 -0
data/vendor/faiss/faiss/impl/pq_code_distance/avx512.cpp +2 -0
data/vendor/faiss/faiss/impl/pq_code_distance/neon.cpp +2 -0
data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-generic.cpp +20 -0
data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-inl.h +36 -0
data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-sve.cpp +5 -0
data/vendor/faiss/faiss/impl/pq_code_distance/pq_scan_impl.h +105 -0
data/vendor/faiss/faiss/impl/pq_code_distance/rvv.cpp +2 -0
data/vendor/faiss/faiss/impl/scalar_quantizer/distance_computers.h +6 -0
data/vendor/faiss/faiss/impl/scalar_quantizer/quantizers.h +327 -18
data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx2.cpp +264 -27
data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx512-impl.h +553 -0
data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx512-spr.cpp +559 -0
data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx512.cpp +199 -27
data/vendor/faiss/faiss/impl/scalar_quantizer/sq-dispatch.h +366 -3
data/vendor/faiss/faiss/impl/scalar_quantizer/sq-neon.cpp +144 -19
data/vendor/faiss/faiss/impl/scalar_quantizer/sq-rvv.cpp +26 -0
data/vendor/faiss/faiss/impl/simd_dispatch.h +65 -8
data/vendor/faiss/faiss/index_factory.cpp +5 -1
data/vendor/faiss/faiss/index_io.h +16 -0
data/vendor/faiss/faiss/invlists/DirectMap.cpp +4 -1
data/vendor/faiss/faiss/invlists/InvertedLists.cpp +13 -13
data/vendor/faiss/faiss/invlists/InvertedLists.h +2 -2
data/vendor/faiss/faiss/svs/IndexSVSVamana.cpp +119 -22
data/vendor/faiss/faiss/svs/IndexSVSVamana.h +15 -5
data/vendor/faiss/faiss/svs/IndexSVSVamanaLVQ.cpp +3 -2
data/vendor/faiss/faiss/svs/IndexSVSVamanaLVQ.h +2 -1
data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.cpp +65 -24
data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.h +3 -2
data/vendor/faiss/faiss/utils/bf16.h +34 -0
data/vendor/faiss/faiss/utils/distances_simd.cpp +0 -1
data/vendor/faiss/faiss/utils/hamming.cpp +8 -8
data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx2.cpp +2 -1
data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx512_spr.cpp +15 -0
data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx512.h +6 -30
data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx512_spr.h +171 -0
data/vendor/faiss/faiss/utils/partitioning.cpp +0 -2
data/vendor/faiss/faiss/utils/simd_impl/partitioning_simdlib256.h +14 -68
data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx512_spr.cpp +343 -0
data/vendor/faiss/faiss/utils/simd_levels.cpp +12 -2
metadata +12 -2

data/vendor/faiss/faiss/impl/hnsw/avx512.cpp CHANGED Viewed

@@ -16,96 +16,127 @@
 namespace faiss {
-template <>
-int MinimaxHeap::pop_min_tpl<SIMDLevel::AVX512>(float* vmin_out) {
-    assert(k > 0);
+namespace {
+/// Templated AVX512 implementation of "pop best" for both CMax (returns
+/// the smallest distance) and CMin (returns the largest similarity).
+template <class HC>
+int pop_best_avx512(MinimaxHeapT<HC>& heap, float* vmin_out) {
+    using storage_idx_t = typename MinimaxHeapT<HC>::storage_idx_t;
     static_assert(
             std::is_same<storage_idx_t, int32_t>::value,
             "This code expects storage_idx_t to be int32_t");
+    assert(heap.k > 0);
-    int32_t min_idx = -1;
-    float min_dis = std::numeric_limits<float>::infinity();
+    constexpr float worst_v = HC::is_max
+            ? std::numeric_limits<float>::infinity()
+            : -std::numeric_limits<float>::infinity();
-    __m512i min_indices = _mm512_set1_epi32(-1);
-    __m512 min_distances =
-            _mm512_set1_ps(std::numeric_limits<float>::infinity());
+    int32_t best_idx = -1;
+    float best_dis = worst_v;
+    __m512i best_indices = _mm512_set1_epi32(-1);
+    __m512 best_distances = _mm512_set1_ps(worst_v);
     __m512i current_indices = _mm512_setr_epi32(
             0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
     __m512i offset = _mm512_set1_epi32(16);
-    // The following loop tracks the rightmost index with the min distance.
-    // -1 index values are ignored.
-    const size_t k16 = (k / 16) * 16;
+    auto best_vs_cand_mask = [](__m512 best_d, __m512 cand_d) -> __mmask16 {
+        // Returns the mask of lanes where the current best is already
+        // (strictly) better than the candidate.
+        if constexpr (HC::is_max) {
+            return _mm512_cmp_ps_mask(best_d, cand_d, _CMP_LT_OS);
+        } else {
+            return _mm512_cmp_ps_mask(best_d, cand_d, _CMP_GT_OS);
+        }
+    };
+    const size_t k16 = (heap.k / 16) * 16;
     for (size_t iii = 0; iii < k16; iii += 16) {
         __m512i indices =
-                _mm512_loadu_si512((const __m512i*)(ids.data() + iii));
-        __m512 distances = _mm512_loadu_ps(dis.data() + iii);
+                _mm512_loadu_si512((const __m512i*)(heap.ids.data() + iii));
+        __m512 distances = _mm512_loadu_ps(heap.dis.data() + iii);
-        // This mask filters out -1 values among indices.
         __mmask16 m1mask =
                 _mm512_cmpgt_epi32_mask(_mm512_setzero_si512(), indices);
-        __mmask16 dmask =
-                _mm512_cmp_ps_mask(min_distances, distances, _CMP_LT_OS);
+        __mmask16 dmask = best_vs_cand_mask(best_distances, distances);
         __mmask16 finalmask = m1mask | dmask;
-        const __m512i min_indices_new = _mm512_mask_blend_epi32(
-                finalmask, current_indices, min_indices);
-        const __m512 min_distances_new =
-                _mm512_mask_blend_ps(finalmask, distances, min_distances);
+        const __m512i best_indices_new = _mm512_mask_blend_epi32(
+                finalmask, current_indices, best_indices);
+        const __m512 best_distances_new =
+                _mm512_mask_blend_ps(finalmask, distances, best_distances);
-        min_indices = min_indices_new;
-        min_distances = min_distances_new;
+        best_indices = best_indices_new;
+        best_distances = best_distances_new;
         current_indices = _mm512_add_epi32(current_indices, offset);
     }
-    // leftovers
-    if (k16 != static_cast<size_t>(k)) {
-        const __mmask16 kmask = (1 << (k - k16)) - 1;
+    // Leftovers.
+    if (k16 != static_cast<size_t>(heap.k)) {
+        const __mmask16 kmask = (1 << (heap.k - k16)) - 1;
         __m512i indices = _mm512_mask_loadu_epi32(
-                _mm512_set1_epi32(-1), kmask, ids.data() + k16);
-        __m512 distances = _mm512_maskz_loadu_ps(kmask, dis.data() + k16);
+                _mm512_set1_epi32(-1), kmask, heap.ids.data() + k16);
+        __m512 distances = _mm512_maskz_loadu_ps(kmask, heap.dis.data() + k16);
-        // This mask filters out -1 values among indices.
         __mmask16 m1mask =
                 _mm512_cmpgt_epi32_mask(_mm512_setzero_si512(), indices);
-        __mmask16 dmask =
-                _mm512_cmp_ps_mask(min_distances, distances, _CMP_LT_OS);
+        __mmask16 dmask = best_vs_cand_mask(best_distances, distances);
         __mmask16 finalmask = m1mask | dmask;
-        const __m512i min_indices_new = _mm512_mask_blend_epi32(
-                finalmask, current_indices, min_indices);
-        const __m512 min_distances_new =
-                _mm512_mask_blend_ps(finalmask, distances, min_distances);
+        const __m512i best_indices_new = _mm512_mask_blend_epi32(
+                finalmask, current_indices, best_indices);
+        const __m512 best_distances_new =
+                _mm512_mask_blend_ps(finalmask, distances, best_distances);
-        min_indices = min_indices_new;
-        min_distances = min_distances_new;
+        best_indices = best_indices_new;
+        best_distances = best_distances_new;
     }
-    // grab min distance
-    min_dis = _mm512_reduce_min_ps(min_distances);
-    // blend
-    __mmask16 mindmask =
-            _mm512_cmpeq_ps_mask(min_distances, _mm512_set1_ps(min_dis));
-    // pick the max one
-    min_idx = _mm512_mask_reduce_max_epi32(mindmask, min_indices);
+    // Horizontal best: min for CMax (distance), max for CMin (similarity).
+    if constexpr (HC::is_max) {
+        best_dis = _mm512_reduce_min_ps(best_distances);
+    } else {
+        best_dis = _mm512_reduce_max_ps(best_distances);
+    }
+    // Tiebreak by picking the rightmost (largest) index among lanes
+    // matching the best distance, matching the original behavior.
+    __mmask16 best_lane_mask =
+            _mm512_cmpeq_ps_mask(best_distances, _mm512_set1_ps(best_dis));
+    best_idx = _mm512_mask_reduce_max_epi32(best_lane_mask, best_indices);
-    if (min_idx == -1) {
+    if (best_idx == -1) {
         return -1;
     }
     if (vmin_out) {
-        *vmin_out = min_dis;
+        *vmin_out = best_dis;
     }
-    int ret = ids[min_idx];
-    ids[min_idx] = -1;
-    --nvalid;
+    int ret = heap.ids[best_idx];
+    heap.ids[best_idx] = -1;
+    --heap.nvalid;
     return ret;
 }
+} // namespace
+// Explicit specializations for AVX512
+template <>
+int pop_min_tpl<CMax<float, int32_t>, SIMDLevel::AVX512>(
+        MinimaxHeapT<CMax<float, int32_t>>* heap,
+        float* vmin_out) {
+    return pop_best_avx512<CMax<float, int32_t>>(*heap, vmin_out);
+}
+template <>
+int pop_min_tpl<CMin<float, int32_t>, SIMDLevel::AVX512>(
+        MinimaxHeapT<CMin<float, int32_t>>* heap,
+        float* vmin_out) {
+    return pop_best_avx512<CMin<float, int32_t>>(*heap, vmin_out);
+}
 } // namespace faiss
 #endif // COMPILE_SIMD_AVX512

data/vendor/faiss/faiss/impl/index_read.cpp CHANGED Viewed

@@ -86,6 +86,7 @@ namespace faiss {
 namespace {
 size_t deserialization_loop_limit_ = 0;
 size_t deserialization_vector_byte_limit_ = uint64_t{1} << 40; // 1 TB
+size_t deserialization_lattice_r2_limit_ = 0;
 #ifdef FAISS_ENABLE_SVS
 // Read and validate an SVSStorageKind from the stream. Centralizes the
@@ -122,6 +123,14 @@ void set_deserialization_vector_byte_limit(size_t value) {
     deserialization_vector_byte_limit_ = value;
 }
+size_t get_deserialization_lattice_r2_limit() {
+    return deserialization_lattice_r2_limit_;
+}
+void set_deserialization_lattice_r2_limit(size_t value) {
+    deserialization_lattice_r2_limit_ = value;
+}
 #define FAISS_CHECK_DESERIALIZATION_LOOP_LIMIT(val, field_name) \
     do {                                                        \
         auto limit_ = get_deserialization_loop_limit();         \
@@ -279,7 +288,7 @@ static void read_index_header(Index& idx, IOReader* f) {
     idx_t dummy;
     READ1(dummy);
     READ1(dummy);
-    READ1(idx.is_trained);
+    READ1_BOOL(idx.is_trained);
     int metric_type_int;
     READ1(metric_type_int);
     idx.metric_type = metric_type_from_int(metric_type_int);
@@ -307,7 +316,7 @@ std::unique_ptr<VectorTransform> read_VectorTransform_up(IOReader* f) {
             if (h == fourcc("Pcam")) {
                 READ1(pca->epsilon);
             }
-            READ1(pca->random_rotation);
+            READ1_BOOL(pca->random_rotation);
             if (h != fourcc("PCAm")) {
                 READ1(pca->balanced_bins);
             }
@@ -323,7 +332,7 @@ std::unique_ptr<VectorTransform> read_VectorTransform_up(IOReader* f) {
         } else if (h == fourcc("LTra")) {
             lt = std::make_unique<LinearTransform>();
         }
-        READ1(lt->have_bias);
+        READ1_BOOL(lt->have_bias);
         READVECTOR(lt->A);
         READVECTOR(lt->b);
         FAISS_THROW_IF_NOT(
@@ -347,7 +356,7 @@ std::unique_ptr<VectorTransform> read_VectorTransform_up(IOReader* f) {
         auto itqt = std::make_unique<ITQTransform>();
         READVECTOR(itqt->mean);
-        READ1(itqt->do_pca);
+        READ1_BOOL(itqt->do_pca);
         {
             // Read, dereference, discard.
             auto sub_vt = read_VectorTransform_up(f);
@@ -376,7 +385,7 @@ std::unique_ptr<VectorTransform> read_VectorTransform_up(IOReader* f) {
     }
     READ1(vt->d_in);
     READ1(vt->d_out);
-    READ1(vt->is_trained);
+    READ1_BOOL(vt->is_trained);
     FAISS_THROW_IF_NOT_FMT(
             vt->d_in >= 0,
             "invalid VectorTransform d_in=%d (must be >= 0)",
@@ -762,7 +771,7 @@ static void read_ResidualQuantizer_old(ResidualQuantizer& rq, IOReader* f) {
             "ResidualQuantizer nbits size %zd != M %zd",
             rq.nbits.size(),
             rq.M);
-    READ1(rq.is_trained);
+    READ1_BOOL(rq.is_trained);
     READ1(rq.train_type);
     READ1(rq.max_beam_size);
     READVECTOR(rq.codebooks);
@@ -780,7 +789,7 @@ static void read_AdditiveQuantizer(AdditiveQuantizer& aq, IOReader* f) {
     FAISS_THROW_IF_NOT_FMT(
             aq.M > 0, "invalid AdditiveQuantizer M %zd, must be > 0", aq.M);
     READVECTOR(aq.nbits);
-    READ1(aq.is_trained);
+    READ1_BOOL(aq.is_trained);
     READVECTOR(aq.codebooks);
     FAISS_THROW_IF_NOT_FMT(
             aq.nbits.size() == aq.M,
@@ -1035,6 +1044,8 @@ void read_ScalarQuantizer(
             ivsc->d,
             idx.d);
     READVECTOR(ivsc->trained);
+    // Populate bits/code_size before the validation block uses ivsc->bits.
+    ivsc->set_derived_sizes();
     // Validate trained vector size matches the quantizer type and dimension.
     // UNIFORM/NON_UNIFORM qtypes require training data; other qtypes
     // (fp16, bf16, 8bit_direct*) need none.
@@ -1075,6 +1086,16 @@ void read_ScalarQuantizer(
             case ScalarQuantizer::QT_8bit_tqmse:
                 expected = 256 + 255;
                 break;
+            case ScalarQuantizer::QT_2bit_tq:
+            case ScalarQuantizer::QT_3bit_tq:
+            case ScalarQuantizer::QT_4bit_tq:
+            case ScalarQuantizer::QT_5bit_tq: {
+                // k centroids + (k-1) boundaries + 3 extra (seed + qjl_type)
+                size_t mse_bits = ivsc->bits - 1;
+                size_t k = size_t(1) << mse_bits;
+                expected = k + (k - 1) + 3;
+                break;
+            }
         }
         if (ivsc->trained.empty() && expected > 0) {
             // Empty trained is only valid for untrained indices.
@@ -1102,7 +1123,19 @@ void read_ScalarQuantizer(
             }
         }
     }
-    ivsc->set_derived_sizes();
+    // TurboQ full types: extract seed and qjl_type from trained,
+    // regenerate projection matrix.
+    if (ScalarQuantizer::TurboQuantRefine::is_turboq_full(ivsc->qtype) &&
+        ivsc->trained.size() >= 3) {
+        size_t n = ivsc->trained.size();
+        ivsc->turboq_refine.qjl_type =
+                static_cast<uint8_t>(ivsc->trained[n - 1]);
+        ivsc->turboq_refine.seed =
+                ScalarQuantizer::TurboQuantRefine::unpack_seed(
+                        ivsc->trained[n - 3], ivsc->trained[n - 2]);
+        ivsc->turboq_refine.init_projection(ivsc->d);
+    }
 }
 static void validate_HNSW(const HNSW& hnsw) {
@@ -1246,7 +1279,7 @@ static void read_NSG(NSG& nsg, IOReader* f) {
     READ1(nsg.C);
     READ1(nsg.search_L);
     READ1(nsg.enterpoint);
-    READ1(nsg.is_built);
+    READ1_BOOL(nsg.is_built);
     FAISS_THROW_IF_NOT_FMT(
             nsg.ntotal >= 0, "invalid NSG ntotal %d", nsg.ntotal);
@@ -1298,7 +1331,7 @@ static void read_NNDescent(NNDescent& nnd, IOReader* f) {
     READ1(nnd.iter);
     READ1(nnd.search_L);
     READ1(nnd.random_seed);
-    READ1(nnd.has_built);
+    READ1_BOOL(nnd.has_built);
     FAISS_THROW_IF_NOT_FMT(
             nnd.ntotal >= 0, "invalid NNDescent ntotal %d", nnd.ntotal);
@@ -1437,7 +1470,7 @@ static std::unique_ptr<IndexIVFPQ> read_ivfpq(
     std::vector<std::vector<idx_t>> ids;
     read_ivf_header(ivpq.get(), f, legacy ? &ids : nullptr);
-    READ1(ivpq->by_residual);
+    READ1_BOOL(ivpq->by_residual);
     READ1(ivpq->code_size);
     read_ProductQuantizer(&ivpq->pq, f);
@@ -1506,7 +1539,7 @@ std::unique_ptr<Index> read_index_up(IOReader* f, int io_flags) {
                     d, n_levels, batch_size);
         }
         READ1(idxp->ntotal);
-        READ1(idxp->is_trained);
+        READ1_BOOL(idxp->is_trained);
         READVECTOR(idxp->codes);
         READVECTOR(idxp->cum_sums);
         idxp->verbose = false;
@@ -1531,8 +1564,8 @@ std::unique_ptr<Index> read_index_up(IOReader* f, int io_flags) {
         auto idxl = std::make_unique<IndexLSH>();
         read_index_header(*idxl, f);
         READ1(idxl->nbits);
-        READ1(idxl->rotate_data);
-        READ1(idxl->train_thresholds);
+        READ1_BOOL(idxl->rotate_data);
+        READ1_BOOL(idxl->train_thresholds);
         READVECTOR(idxl->thresholds);
         int code_size_i;
         READ1(code_size_i);
@@ -1578,7 +1611,7 @@ std::unique_ptr<Index> read_index_up(IOReader* f, int io_flags) {
                 idxp->codes.size() == idxp->ntotal * idxp->code_size);
         if (h == fourcc("IxPo") || h == fourcc("IxPq")) {
             READ1(idxp->search_type);
-            READ1(idxp->encode_signs);
+            READ1_BOOL(idxp->encode_signs);
             READ1(idxp->polysemous_ht);
         }
         // Old versions of PQ all had metric_type set to INNER_PRODUCT
@@ -1741,7 +1774,7 @@ std::unique_ptr<Index> read_index_up(IOReader* f, int io_flags) {
         READ1(idxaqfs->ntotal2);
         READ1(idxaqfs->M2);
-        READ1(idxaqfs->rescale_norm);
+        READ1_BOOL(idxaqfs->rescale_norm);
         READ1(idxaqfs->norm_scale);
         READ1(idxaqfs->max_train_points);
@@ -1791,7 +1824,7 @@ std::unique_ptr<Index> read_index_up(IOReader* f, int io_flags) {
         validate_aq_dimension_match(
                 *ivaqfs->aq, ivaqfs->d, "IndexIVFAdditiveQuantizerFastScan");
-        READ1(ivaqfs->by_residual);
+        READ1_BOOL(ivaqfs->by_residual);
         READ1(ivaqfs->implem);
         READ1(ivaqfs->bbs);
         READ1(ivaqfs->qbs);
@@ -1804,7 +1837,7 @@ std::unique_ptr<Index> read_index_up(IOReader* f, int io_flags) {
         READ1(ivaqfs->qbs2);
         READ1(ivaqfs->M2);
-        READ1(ivaqfs->rescale_norm);
+        READ1_BOOL(ivaqfs->rescale_norm);
         READ1(ivaqfs->norm_scale);
         READ1(ivaqfs->max_train_points);
@@ -1902,6 +1935,24 @@ std::unique_ptr<Index> read_index_up(IOReader* f, int io_flags) {
                 nsq);
         FAISS_THROW_IF_NOT_FMT(
                 r2 > 0, "invalid IndexLattice r2 %d (must be > 0)", r2);
+        {
+            // ZnSphereCodecRec constructor populates a decode cache
+            // whose build cost grows polynomially in r2.  The
+            // in-codec memory cap (lattice_Zn.cpp) bounds the cache
+            // size but not the CPU cost of building it, so for small
+            // dsq the cap permits enough decode() iterations to far
+            // exceed reasonable load-time budgets.  Callers that
+            // operate on untrusted index payloads can opt in to a
+            // tighter bound via set_deserialization_lattice_r2_limit;
+            // the default of 0 preserves existing behavior.
+            auto limit_ = get_deserialization_lattice_r2_limit();
+            FAISS_THROW_IF_NOT_FMT(
+                    limit_ == 0 || static_cast<size_t>(r2) <= limit_,
+                    "IndexLattice r2=%d exceeds "
+                    "deserialization_lattice_r2_limit of %zd",
+                    r2,
+                    limit_);
+        }
         int dsq = d / nsq;
         FAISS_THROW_IF_NOT_FMT(
                 dsq >= 2 && (dsq & (dsq - 1)) == 0,
@@ -1956,7 +2007,7 @@ std::unique_ptr<Index> read_index_up(IOReader* f, int io_flags) {
         if (h == fourcc("IwSQ")) {
             ivsc->by_residual = true;
         } else {
-            READ1(ivsc->by_residual);
+            READ1_BOOL(ivsc->by_residual);
         }
         read_InvertedLists(*ivsc, f, io_flags);
         idx = std::move(ivsc);
@@ -1995,7 +2046,7 @@ std::unique_ptr<Index> read_index_up(IOReader* f, int io_flags) {
                 iva->code_size,
                 iva->aq->code_size,
                 "IndexIVFAdditiveQuantizer");
-        READ1(iva->by_residual);
+        READ1_BOOL(iva->by_residual);
         READ1(iva->use_precomputed_table);
         read_InvertedLists(*iva, f, io_flags);
         idx = std::move(iva);
@@ -2022,7 +2073,7 @@ std::unique_ptr<Index> read_index_up(IOReader* f, int io_flags) {
         read_index_header(*indep, f);
         indep->quantizer = read_index(f, io_flags);
         bool has_vt;
-        READ1(has_vt);
+        READ1_BOOL(has_vt);
         if (has_vt) {
             indep->vt = read_VectorTransform(f);
         }
@@ -2137,6 +2188,7 @@ std::unique_ptr<Index> read_index_up(IOReader* f, int io_flags) {
                 : std::make_unique<IndexIDMap>();
         read_index_header(*idxmap, f);
         idxmap->index = read_index(f, io_flags);
+        FAISS_THROW_IF_NOT_MSG(idxmap->index, "IndexIDMap inner index is null");
         idxmap->own_fields = true;
         READVECTOR(idxmap->id_map);
         FAISS_THROW_IF_NOT_FMT(
@@ -2217,11 +2269,11 @@ std::unique_ptr<Index> read_index_up(IOReader* f, int io_flags) {
             READVECTOR(idx_panorama->cum_sums);
         }
         if (h == fourcc("IHNc") || h == fourcc("IHc2")) {
-            READ1(idxhnsw->keep_max_size_level0);
+            READ1_BOOL(idxhnsw->keep_max_size_level0);
             auto idx_hnsw_cagra = dynamic_cast<IndexHNSWCagra*>(idxhnsw.get());
             FAISS_THROW_IF_NOT_MSG(
                     idx_hnsw_cagra, "dynamic_cast to IndexHNSWCagra failed");
-            READ1(idx_hnsw_cagra->base_level_only);
+            READ1_BOOL(idx_hnsw_cagra->base_level_only);
             READ1(idx_hnsw_cagra->num_base_level_search_entrypoints);
             if (h == fourcc("IHc2")) {
                 READ1(idx_hnsw_cagra->numeric_type_);
@@ -2237,6 +2289,12 @@ std::unique_ptr<Index> read_index_up(IOReader* f, int io_flags) {
                 idxhnsw->hnsw.levels.size(),
                 idxhnsw->ntotal);
         idxhnsw->hnsw.is_panorama = (h == fourcc("IHfP"));
+        // `HNSW::is_similarity` is intentionally not serialized, so we
+        // re-derive it here from the persisted metric type. Without this,
+        // a saved IP/similarity index would come back configured as a
+        // distance index and silently produce wrong rankings on search.
+        idxhnsw->hnsw.is_similarity =
+                is_similarity_metric(idxhnsw->metric_type);
         idxhnsw->storage = read_index(f, io_flags);
         idxhnsw->own_fields = idxhnsw->storage != nullptr;
         // Cross-check storage ntotal and d against index
@@ -2372,7 +2430,7 @@ std::unique_ptr<Index> read_index_up(IOReader* f, int io_flags) {
     } else if (h == fourcc("IwPf")) {
         auto ivpq = std::make_unique<IndexIVFPQFastScan>();
         read_ivf_header(ivpq.get(), f);
-        READ1(ivpq->by_residual);
+        READ1_BOOL(ivpq->by_residual);
         READ1(ivpq->code_size);
         READ1(ivpq->bbs);
         READ1(ivpq->M2);
@@ -2512,7 +2570,7 @@ std::unique_ptr<Index> read_index_up(IOReader* f, int io_flags) {
         read_ivf_header(ivrq.get(), f);
         read_RaBitQuantizer(ivrq->rabitq, f, ivrq->d, false);
         READ1(ivrq->code_size);
-        READ1(ivrq->by_residual);
+        READ1_BOOL(ivrq->by_residual);
         READ1(ivrq->qb);
         // qb=0: Not quantized - direct distance computation on given float32s.
         // qb>0 && qb<=8: Scalar-quantized with qb bits of precision.
@@ -2535,7 +2593,7 @@ std::unique_ptr<Index> read_index_up(IOReader* f, int io_flags) {
         read_RaBitQuantizer(
                 ivrq->rabitq, f, ivrq->d, true); // Reads nb_bits from file
         READ1(ivrq->code_size);
-        READ1(ivrq->by_residual);
+        READ1_BOOL(ivrq->by_residual);
         READ1(ivrq->qb);
         // qb=0: Not quantized - direct distance computation on given float32s.
         // qb>0 && qb<=8: Scalar-quantized with qb bits of precision.
@@ -2572,9 +2630,10 @@ std::unique_ptr<Index> read_index_up(IOReader* f, int io_flags) {
         READ1(svs->construction_window_size);
         READ1(svs->max_candidate_pool_size);
         READ1(svs->prune_to);
-        READ1(svs->use_full_search_history);
+        READ1_BOOL(svs->use_full_search_history);
         svs->storage_kind = read_svs_storage_kind(f);
+        READ1_BOOL(svs->is_static);
         if (h == fourcc("ISVL")) {
             auto* leanvec = dynamic_cast<IndexSVSVamanaLeanVec*>(svs.get());
@@ -2584,7 +2643,7 @@ std::unique_ptr<Index> read_index_up(IOReader* f, int io_flags) {
         }
         bool initialized;
-        READ1(initialized);
+        READ1_BOOL(initialized);
         if (initialized) {
             faiss::svs_io::ReaderStreambuf rbuf(
                     f, get_deserialization_vector_byte_limit());
@@ -2593,7 +2652,7 @@ std::unique_ptr<Index> read_index_up(IOReader* f, int io_flags) {
         }
         if (h == fourcc("ISVL")) {
             bool trained;
-            READ1(trained);
+            READ1_BOOL(trained);
             if (trained) {
                 faiss::svs_io::ReaderStreambuf rbuf(
                         f, get_deserialization_vector_byte_limit());
@@ -2616,7 +2675,7 @@ std::unique_ptr<Index> read_index_up(IOReader* f, int io_flags) {
         read_index_header(*svs, f);
         bool initialized;
-        READ1(initialized);
+        READ1_BOOL(initialized);
         if (initialized) {
             faiss::svs_io::ReaderStreambuf rbuf(
                     f, get_deserialization_vector_byte_limit());
@@ -2639,7 +2698,7 @@ std::unique_ptr<Index> read_index_up(IOReader* f, int io_flags) {
         READ1(svs_ivf->num_centroids);
         READ1(svs_ivf->minibatch_size);
         READ1(svs_ivf->num_iterations);
-        READ1(svs_ivf->is_hierarchical);
+        READ1_BOOL(svs_ivf->is_hierarchical);
         READ1(svs_ivf->training_fraction);
         READ1(svs_ivf->hierarchical_level1_clusters);
         READ1(svs_ivf->seed);
@@ -2648,7 +2707,7 @@ std::unique_ptr<Index> read_index_up(IOReader* f, int io_flags) {
         READ1(svs_ivf->num_threads);
         READ1(svs_ivf->intra_query_threads);
         svs_ivf->storage_kind = read_svs_storage_kind(f);
-        READ1(svs_ivf->is_static);
+        READ1_BOOL(svs_ivf->is_static);
         if (h == fourcc("ISIL")) {
             auto* leanvec = dynamic_cast<IndexSVSIVFLeanVec*>(svs_ivf.get());
             FAISS_THROW_IF_NOT_MSG(
@@ -2657,7 +2716,7 @@ std::unique_ptr<Index> read_index_up(IOReader* f, int io_flags) {
         }
         bool initialized;
-        READ1(initialized);
+        READ1_BOOL(initialized);
         if (initialized) {
             faiss::svs_io::ReaderStreambuf rbuf(f);
             std::istream is(&rbuf);
@@ -2665,7 +2724,7 @@ std::unique_ptr<Index> read_index_up(IOReader* f, int io_flags) {
         }
         if (h == fourcc("ISIL")) {
             bool trained;
-            READ1(trained);
+            READ1_BOOL(trained);
             if (trained) {
                 faiss::svs_io::ReaderStreambuf rbuf(f);
                 std::istream is(&rbuf);
@@ -2687,7 +2746,7 @@ std::unique_ptr<Index> read_index_up(IOReader* f, int io_flags) {
         auto ivrqfs = std::make_unique<IndexIVFRaBitQFastScan>();
         read_ivf_header(ivrqfs.get(), f);
         read_RaBitQuantizer(ivrqfs->rabitq, f, ivrqfs->d);
-        READ1(ivrqfs->by_residual);
+        READ1_BOOL(ivrqfs->by_residual);
         READ1(ivrqfs->code_size);
         READ1(ivrqfs->bbs);
         READ1(ivrqfs->qbs2);
@@ -2698,7 +2757,7 @@ std::unique_ptr<Index> read_index_up(IOReader* f, int io_flags) {
                 ivrqfs->qb > 0 && ivrqfs->qb <= 8,
                 "invalid RaBitQ qb=%d (must be in [1, 8])",
                 ivrqfs->qb);
-        READ1(ivrqfs->centered);
+        READ1_BOOL(ivrqfs->centered);
         std::vector<uint8_t> legacy_flat_storage;
         if (is_legacy) {
@@ -2825,7 +2884,7 @@ static void read_index_binary_header(IndexBinary& idx, IOReader* f) {
     READ1(idx.d);
     READ1(idx.code_size);
     READ1(idx.ntotal);
-    READ1(idx.is_trained);
+    READ1_BOOL(idx.is_trained);
     int metric_type_int;
     READ1(metric_type_int);
     idx.metric_type = metric_type_from_int(metric_type_int);
@@ -2984,6 +3043,8 @@ std::unique_ptr<IndexBinary> read_index_binary_up(IOReader* f, int io_flags) {
         read_index_binary_header(*idxff, f);
         idxff->own_fields = true;
         idxff->index = read_index(f, io_flags);
+        FAISS_THROW_IF_NOT_MSG(
+                idxff->index, "IndexBinaryFromFloat inner index is null");
         idx = std::move(idxff);
     } else if (h == fourcc("IBHf")) {
         auto idxhnsw = std::make_unique<IndexBinaryHNSW>();
@@ -3009,8 +3070,8 @@ std::unique_ptr<IndexBinary> read_index_binary_up(IOReader* f, int io_flags) {
     } else if (h == fourcc("IBHc")) {
         auto idxhnsw = std::make_unique<IndexBinaryHNSWCagra>();
         read_index_binary_header(*idxhnsw, f);
-        READ1(idxhnsw->keep_max_size_level0);
-        READ1(idxhnsw->base_level_only);
+        READ1_BOOL(idxhnsw->keep_max_size_level0);
+        READ1_BOOL(idxhnsw->base_level_only);
         READ1(idxhnsw->num_base_level_search_entrypoints);
         read_HNSW(idxhnsw->hnsw, f);
         idxhnsw->hnsw.is_panorama = false;

data/vendor/faiss/faiss/impl/index_write.cpp CHANGED Viewed

@@ -1044,6 +1044,7 @@ void write_index(const Index* idx, IOWriter* f, int io_flags) {
         WRITE1(svs->prune_to);
         WRITE1(svs->use_full_search_history);
         WRITE1(svs->storage_kind);
+        WRITE1(svs->is_static);
         if (lean != nullptr) {
             WRITE1(lean->leanvec_d);

data/vendor/faiss/faiss/impl/io_macros.h CHANGED Viewed

@@ -35,6 +35,31 @@ size_t get_deserialization_vector_byte_limit();
 #define READ1(x) READANDCHECK(&(x), 1)
+// Reads a single byte into a bool, rejecting any byte that is not the
+// canonical encoding for the platform's bool representation. Reading a
+// non-canonical byte directly into a bool is undefined behavior and
+// trips UBSan's invalid-bool-load check. To stay ABI-portable, we
+// assign via the language-defined conversion (b != 0) and then compare
+// the resulting bool's storage byte back against the byte we read - the
+// roundtrip succeeds iff the input byte was already canonical on this
+// platform. FAISS only ever writes the canonical encoding via
+// WRITE1(bool), so well-formed indices roundtrip cleanly; corrupt or
+// attacker-controlled input that places a non-canonical byte at a bool
+// offset is rejected as a FaissException.
+#define READ1_BOOL(x)                                                      \
+    {                                                                      \
+        static_assert(                                                     \
+                sizeof(x) == 1, "READ1_BOOL: destination must be 1 byte"); \
+        uint8_t b;                                                         \
+        READANDCHECK(&b, 1);                                               \
+        (x) = (b != 0);                                                    \
+        FAISS_THROW_IF_NOT_FMT(                                            \
+                *reinterpret_cast<const uint8_t*>(&(x)) == b,              \
+                "invalid bool encoding 0x%02x for %s",                     \
+                b,                                                         \
+                #x);                                                       \
+    }
 #define READ1_DUMMY(x_type) \
     {                       \
         x_type x = {};      \