faiss 0.6.0 → 0.6.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/ext/faiss/extconf.rb +2 -1
- data/ext/faiss/{index_rb.cpp → index.cpp} +1 -1
- data/ext/faiss/index_binary.cpp +1 -1
- data/ext/faiss/kmeans.cpp +1 -1
- data/ext/faiss/pca_matrix.cpp +1 -1
- data/ext/faiss/product_quantizer.cpp +1 -1
- data/ext/faiss/{utils_rb.cpp → utils.cpp} +1 -1
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/faiss/AutoTune.cpp +93 -80
- data/vendor/faiss/faiss/Clustering.cpp +39 -240
- data/vendor/faiss/faiss/Clustering.h +6 -0
- data/vendor/faiss/faiss/IVFlib.cpp +41 -21
- data/vendor/faiss/faiss/Index.cpp +6 -5
- data/vendor/faiss/faiss/Index.h +5 -5
- data/vendor/faiss/faiss/Index2Layer.cpp +37 -53
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +49 -37
- data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +36 -34
- data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.h +4 -1
- data/vendor/faiss/faiss/IndexBinary.cpp +5 -3
- data/vendor/faiss/faiss/IndexBinary.h +4 -4
- data/vendor/faiss/faiss/IndexBinaryFlat.cpp +1 -1
- data/vendor/faiss/faiss/IndexBinaryFlat.h +1 -1
- data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +4 -4
- data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +88 -97
- data/vendor/faiss/faiss/IndexBinaryHNSW.h +9 -3
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +45 -236
- data/vendor/faiss/faiss/IndexBinaryHash.h +6 -6
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +89 -417
- data/vendor/faiss/faiss/IndexFastScan.cpp +72 -109
- data/vendor/faiss/faiss/IndexFastScan.h +25 -23
- data/vendor/faiss/faiss/IndexFlat.cpp +27 -20
- data/vendor/faiss/faiss/IndexFlat.h +21 -18
- data/vendor/faiss/faiss/IndexFlatCodes.cpp +42 -19
- data/vendor/faiss/faiss/IndexHNSW.cpp +374 -206
- data/vendor/faiss/faiss/IndexHNSW.h +16 -2
- data/vendor/faiss/faiss/IndexIDMap.cpp +25 -21
- data/vendor/faiss/faiss/IndexIDMap.h +9 -7
- data/vendor/faiss/faiss/IndexIVF.cpp +467 -364
- data/vendor/faiss/faiss/IndexIVF.h +33 -12
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +79 -76
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +96 -93
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +4 -1
- data/vendor/faiss/faiss/IndexIVFFastScan.cpp +357 -238
- data/vendor/faiss/faiss/IndexIVFFastScan.h +42 -41
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +39 -69
- data/vendor/faiss/faiss/IndexIVFFlat.h +32 -0
- data/vendor/faiss/faiss/IndexIVFFlatPanorama.cpp +56 -33
- data/vendor/faiss/faiss/IndexIVFFlatPanorama.h +3 -1
- data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.cpp +18 -15
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +73 -846
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +151 -121
- data/vendor/faiss/faiss/IndexIVFPQFastScan.h +3 -0
- data/vendor/faiss/faiss/IndexIVFPQR.cpp +23 -20
- data/vendor/faiss/faiss/IndexIVFRaBitQ.cpp +30 -52
- data/vendor/faiss/faiss/IndexIVFRaBitQ.h +2 -1
- data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.cpp +475 -476
- data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.h +248 -93
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +41 -127
- data/vendor/faiss/faiss/IndexIVFSpectralHash.h +1 -1
- data/vendor/faiss/faiss/IndexLSH.cpp +36 -19
- data/vendor/faiss/faiss/IndexLattice.cpp +13 -13
- data/vendor/faiss/faiss/IndexNNDescent.cpp +36 -21
- data/vendor/faiss/faiss/IndexNNDescent.h +2 -2
- data/vendor/faiss/faiss/IndexNSG.cpp +38 -23
- data/vendor/faiss/faiss/IndexNeuralNetCodec.cpp +31 -11
- data/vendor/faiss/faiss/IndexPQ.cpp +128 -221
- data/vendor/faiss/faiss/IndexPQ.h +3 -2
- data/vendor/faiss/faiss/IndexPQFastScan.cpp +20 -14
- data/vendor/faiss/faiss/IndexPQFastScan.h +3 -0
- data/vendor/faiss/faiss/IndexPreTransform.cpp +25 -18
- data/vendor/faiss/faiss/IndexPreTransform.h +1 -1
- data/vendor/faiss/faiss/IndexRaBitQ.cpp +11 -36
- data/vendor/faiss/faiss/IndexRaBitQ.h +2 -1
- data/vendor/faiss/faiss/IndexRaBitQFastScan.cpp +41 -277
- data/vendor/faiss/faiss/IndexRaBitQFastScan.h +183 -27
- data/vendor/faiss/faiss/IndexRefine.cpp +30 -25
- data/vendor/faiss/faiss/IndexRefine.h +4 -4
- data/vendor/faiss/faiss/IndexReplicas.cpp +6 -6
- data/vendor/faiss/faiss/IndexRowwiseMinMax.cpp +15 -14
- data/vendor/faiss/faiss/IndexRowwiseMinMax.h +1 -1
- data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +150 -20
- data/vendor/faiss/faiss/IndexScalarQuantizer.h +10 -0
- data/vendor/faiss/faiss/IndexShards.cpp +10 -9
- data/vendor/faiss/faiss/IndexShardsIVF.cpp +21 -15
- data/vendor/faiss/faiss/MatrixStats.cpp +5 -4
- data/vendor/faiss/faiss/MetaIndexes.cpp +19 -17
- data/vendor/faiss/faiss/MetaIndexes.h +1 -1
- data/vendor/faiss/faiss/MetricType.h +14 -7
- data/vendor/faiss/faiss/SuperKMeans.cpp +656 -0
- data/vendor/faiss/faiss/SuperKMeans.h +97 -0
- data/vendor/faiss/faiss/VectorTransform.cpp +237 -149
- data/vendor/faiss/faiss/VectorTransform.h +16 -16
- data/vendor/faiss/faiss/build.cpp +23 -0
- data/vendor/faiss/faiss/build.h +15 -0
- data/vendor/faiss/faiss/clone_index.cpp +48 -47
- data/vendor/faiss/faiss/cppcontrib/SaDecodeKernels.h +1 -1
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-avx2-inl.h +47 -47
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +11 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-neon-inl.h +902 -12
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-avx2-inl.h +38 -38
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-inl.h +11 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-neon-inl.h +702 -10
- data/vendor/faiss/faiss/factory_tools.cpp +9 -0
- data/vendor/faiss/faiss/gpu/GpuIndexCagra.h +6 -5
- data/vendor/faiss/faiss/gpu/GpuResources.h +3 -2
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +15 -16
- data/vendor/faiss/faiss/gpu/StandardGpuResources.h +5 -4
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +46 -0
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +56 -0
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +78 -1
- data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +72 -0
- data/vendor/faiss/faiss/gpu/test/TestUtils.h +23 -0
- data/vendor/faiss/faiss/gpu/utils/CuvsFilterConvert.h +1 -1
- data/vendor/faiss/faiss/gpu/utils/CuvsUtils.h +21 -10
- data/vendor/faiss/faiss/gpu_metal/GpuIndexFlat.h +22 -0
- data/vendor/faiss/faiss/gpu_metal/MetalCloner.h +35 -0
- data/vendor/faiss/faiss/gpu_metal/MetalDistance.h +87 -0
- data/vendor/faiss/faiss/gpu_metal/MetalFlatKernels.h +40 -0
- data/vendor/faiss/faiss/gpu_metal/MetalIndex.h +58 -0
- data/vendor/faiss/faiss/gpu_metal/MetalIndexFlat.h +65 -0
- data/vendor/faiss/faiss/gpu_metal/MetalIndexIVFFlat.h +181 -0
- data/vendor/faiss/faiss/gpu_metal/MetalKernels.h +111 -0
- data/vendor/faiss/faiss/gpu_metal/MetalPythonBridge.h +45 -0
- data/vendor/faiss/faiss/gpu_metal/MetalResources.h +79 -0
- data/vendor/faiss/faiss/gpu_metal/StandardMetalResources.h +35 -0
- data/vendor/faiss/faiss/gpu_metal/impl/MetalIVFFlat.h +193 -0
- data/vendor/faiss/faiss/impl/AdSampling.cpp +103 -0
- data/vendor/faiss/faiss/impl/AdSampling.h +35 -0
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +29 -25
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +1 -0
- data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +10 -9
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +3 -0
- data/vendor/faiss/faiss/impl/ClusteringHelpers.cpp +244 -0
- data/vendor/faiss/faiss/impl/ClusteringHelpers.h +94 -0
- data/vendor/faiss/faiss/impl/ClusteringInitialization.cpp +16 -16
- data/vendor/faiss/faiss/impl/CodePacker.cpp +3 -3
- data/vendor/faiss/faiss/impl/CodePackerRaBitQ.cpp +1 -1
- data/vendor/faiss/faiss/impl/DistanceComputer.h +8 -8
- data/vendor/faiss/faiss/impl/FaissAssert.h +6 -3
- data/vendor/faiss/faiss/impl/FaissException.h +50 -3
- data/vendor/faiss/faiss/impl/HNSW.cpp +639 -507
- data/vendor/faiss/faiss/impl/HNSW.h +61 -44
- data/vendor/faiss/faiss/impl/IDSelector.cpp +15 -11
- data/vendor/faiss/faiss/impl/IDSelector.h +8 -8
- data/vendor/faiss/faiss/impl/InvertedListScannerStats.h +26 -0
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +82 -77
- data/vendor/faiss/faiss/impl/NNDescent.cpp +62 -25
- data/vendor/faiss/faiss/impl/NNDescent.h +6 -2
- data/vendor/faiss/faiss/impl/NSG.cpp +53 -32
- data/vendor/faiss/faiss/impl/NSG.h +4 -4
- data/vendor/faiss/faiss/impl/Panorama.cpp +23 -6
- data/vendor/faiss/faiss/impl/Panorama.h +269 -87
- data/vendor/faiss/faiss/impl/PdxLayout.cpp +93 -0
- data/vendor/faiss/faiss/impl/PdxLayout.h +41 -0
- data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +46 -32
- data/vendor/faiss/faiss/impl/PolysemousTraining.h +3 -3
- data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.cpp +35 -35
- data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +21 -16
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +55 -25
- data/vendor/faiss/faiss/impl/Quantizer.h +2 -2
- data/vendor/faiss/faiss/impl/RaBitQUtils.cpp +55 -49
- data/vendor/faiss/faiss/impl/RaBitQUtils.h +65 -0
- data/vendor/faiss/faiss/impl/RaBitQuantizer.cpp +302 -283
- data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +26 -23
- data/vendor/faiss/faiss/impl/ResidualQuantizer.h +1 -1
- data/vendor/faiss/faiss/impl/ResultHandler.h +100 -75
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +318 -7
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +77 -1
- data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +14 -11
- data/vendor/faiss/faiss/impl/VisitedTable.cpp +10 -10
- data/vendor/faiss/faiss/impl/VisitedTable.h +70 -28
- data/vendor/faiss/faiss/impl/approx_topk/approx_topk.h +276 -0
- data/vendor/faiss/faiss/impl/approx_topk/avx2.cpp +68 -0
- data/vendor/faiss/faiss/{utils → impl}/approx_topk/generic.h +15 -8
- data/vendor/faiss/faiss/impl/approx_topk/neon.cpp +68 -0
- data/vendor/faiss/faiss/impl/approx_topk/rq_beam_search_tab-inl.h +169 -0
- data/vendor/faiss/faiss/impl/approx_topk/rq_beam_search_tab.h +117 -0
- data/vendor/faiss/faiss/impl/approx_topk/simdlib256-inl.h +146 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryHNSW_impl.h +73 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryHash_impl.h +270 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryIVF_impl.h +460 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexIVFSpectralHash_impl.h +159 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexPQ_impl.h +92 -0
- data/vendor/faiss/faiss/impl/binary_hamming/avx2.cpp +26 -0
- data/vendor/faiss/faiss/impl/binary_hamming/avx512.cpp +26 -0
- data/vendor/faiss/faiss/impl/binary_hamming/dispatch.h +143 -0
- data/vendor/faiss/faiss/impl/binary_hamming/neon.cpp +26 -0
- data/vendor/faiss/faiss/impl/binary_hamming/rvv.cpp +26 -0
- data/vendor/faiss/faiss/impl/expanded_scanners.h +8 -3
- data/vendor/faiss/faiss/impl/{FastScanDistancePostProcessing.h → fast_scan/FastScanDistancePostProcessing.h} +13 -6
- data/vendor/faiss/faiss/impl/{LookupTableScaler.h → fast_scan/LookupTableScaler.h} +16 -5
- data/vendor/faiss/faiss/impl/fast_scan/accumulate_loops.h +237 -0
- data/vendor/faiss/faiss/impl/fast_scan/accumulate_loops_512.h +185 -0
- data/vendor/faiss/faiss/impl/fast_scan/decompose_qbs.h +229 -0
- data/vendor/faiss/faiss/impl/fast_scan/dispatching.h +270 -0
- data/vendor/faiss/faiss/impl/{pq4_fast_scan.cpp → fast_scan/fast_scan.cpp} +169 -2
- data/vendor/faiss/faiss/impl/fast_scan/fast_scan.h +341 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-avx2.cpp +36 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-avx512.cpp +40 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-neon.cpp +120 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-riscv.cpp +104 -0
- data/vendor/faiss/faiss/impl/fast_scan/kernels_simd256.h +213 -0
- data/vendor/faiss/faiss/impl/{pq4_fast_scan_search_qbs.cpp → fast_scan/kernels_simd512.h} +26 -356
- data/vendor/faiss/faiss/impl/fast_scan/rabitq_dispatching.h +90 -0
- data/vendor/faiss/faiss/impl/fast_scan/rabitq_result_handler.h +108 -0
- data/vendor/faiss/faiss/impl/{simd_result_handlers.h → fast_scan/simd_result_handlers.h} +282 -134
- data/vendor/faiss/faiss/impl/hnsw/LockVector.cpp +54 -0
- data/vendor/faiss/faiss/impl/hnsw/LockVector.h +64 -0
- data/vendor/faiss/faiss/impl/hnsw/MinimaxHeap.cpp +83 -0
- data/vendor/faiss/faiss/impl/hnsw/MinimaxHeap.h +113 -0
- data/vendor/faiss/faiss/impl/hnsw/avx2.cpp +150 -0
- data/vendor/faiss/faiss/impl/hnsw/avx512.cpp +142 -0
- data/vendor/faiss/faiss/impl/index_read.cpp +1227 -79
- data/vendor/faiss/faiss/impl/index_read_utils.h +1 -1
- data/vendor/faiss/faiss/impl/index_write.cpp +96 -13
- data/vendor/faiss/faiss/impl/io.cpp +6 -6
- data/vendor/faiss/faiss/impl/io_macros.h +58 -16
- data/vendor/faiss/faiss/impl/kmeans1d.cpp +10 -10
- data/vendor/faiss/faiss/impl/lattice_Zn.cpp +37 -23
- data/vendor/faiss/faiss/impl/lattice_Zn.h +6 -6
- data/vendor/faiss/faiss/impl/mapped_io.cpp +6 -6
- data/vendor/faiss/faiss/impl/platform_macros.h +15 -4
- data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQScanner_impl.h +549 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQ_QueryTables.cpp +245 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQ_QueryTables.h +105 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/PQDistanceComputer_impl.h +106 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/avx2.cpp +23 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/avx512.cpp +23 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/neon.cpp +23 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/{pq_code_distance-avx2.cpp → pq_code_distance-avx2.h} +9 -13
- data/vendor/faiss/faiss/impl/pq_code_distance/{pq_code_distance-avx512.cpp → pq_code_distance-avx512.h} +9 -57
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-generic.cpp +45 -107
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-generic.h +96 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-inl.h +274 -5
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-sve.cpp +10 -7
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_scan_impl.h +105 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/rvv.cpp +70 -0
- data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.cpp +311 -477
- data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.h +1 -1
- data/vendor/faiss/faiss/impl/scalar_quantizer/codecs.h +1 -1
- data/vendor/faiss/faiss/impl/scalar_quantizer/distance_computers.h +9 -2
- data/vendor/faiss/faiss/impl/scalar_quantizer/quantizers.h +419 -19
- data/vendor/faiss/faiss/impl/scalar_quantizer/scanners.h +27 -1
- data/vendor/faiss/faiss/impl/scalar_quantizer/similarities.h +3 -3
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx2.cpp +387 -2
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx512-impl.h +553 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx512-spr.cpp +559 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx512.cpp +341 -2
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-dispatch.h +425 -3
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-neon.cpp +290 -2
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-rvv.cpp +337 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/training.cpp +192 -8
- data/vendor/faiss/faiss/impl/scalar_quantizer/training.h +12 -0
- data/vendor/faiss/faiss/impl/simd_dispatch.h +157 -66
- data/vendor/faiss/faiss/impl/simdlib/simdlib.h +57 -0
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_avx2.h +264 -172
- data/vendor/faiss/faiss/impl/simdlib/simdlib_avx512.h +414 -0
- data/vendor/faiss/faiss/impl/simdlib/simdlib_dispatch.h +44 -0
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_emulated.h +231 -166
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_neon.h +270 -218
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_ppc64.h +201 -160
- data/vendor/faiss/faiss/impl/svs_io.cpp +12 -3
- data/vendor/faiss/faiss/impl/svs_io.h +8 -2
- data/vendor/faiss/faiss/index_factory.cpp +90 -18
- data/vendor/faiss/faiss/index_io.h +40 -0
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +66 -16
- data/vendor/faiss/faiss/invlists/DirectMap.cpp +28 -15
- data/vendor/faiss/faiss/invlists/DirectMap.h +4 -3
- data/vendor/faiss/faiss/invlists/InvertedLists.cpp +170 -86
- data/vendor/faiss/faiss/invlists/InvertedLists.h +88 -25
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +4 -4
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +13 -13
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +1 -1
- data/vendor/faiss/faiss/svs/IndexSVSFaissUtils.h +1 -1
- data/vendor/faiss/faiss/svs/IndexSVSFlat.cpp +2 -2
- data/vendor/faiss/faiss/svs/IndexSVSIVF.cpp +350 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVF.h +128 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLVQ.cpp +40 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLVQ.h +43 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLeanVec.cpp +225 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLeanVec.h +71 -0
- data/vendor/faiss/faiss/svs/IndexSVSVamana.cpp +142 -21
- data/vendor/faiss/faiss/svs/IndexSVSVamana.h +33 -7
- data/vendor/faiss/faiss/svs/IndexSVSVamanaLVQ.cpp +3 -2
- data/vendor/faiss/faiss/svs/IndexSVSVamanaLVQ.h +2 -1
- data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.cpp +77 -27
- data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.h +10 -4
- data/vendor/faiss/faiss/utils/Heap.cpp +10 -10
- data/vendor/faiss/faiss/utils/NeuralNet.cpp +47 -36
- data/vendor/faiss/faiss/utils/NeuralNet.h +1 -1
- data/vendor/faiss/faiss/utils/approx_topk_hamming/approx_topk_hamming.h +10 -4
- data/vendor/faiss/faiss/utils/bf16.h +34 -0
- data/vendor/faiss/faiss/utils/distances.cpp +390 -560
- data/vendor/faiss/faiss/utils/distances.h +20 -1
- data/vendor/faiss/faiss/utils/distances_dispatch.h +117 -37
- data/vendor/faiss/faiss/utils/distances_fused/avx512.cpp +8 -7
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +33 -14
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.h +12 -1
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +16 -293
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based_neon.cpp +57 -0
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_kernel-inl.h +290 -0
- data/vendor/faiss/faiss/utils/distances_simd.cpp +5 -178
- data/vendor/faiss/faiss/utils/extra_distances.cpp +9 -8
- data/vendor/faiss/faiss/utils/extra_distances.h +32 -6
- data/vendor/faiss/faiss/utils/hamming-inl.h +13 -11
- data/vendor/faiss/faiss/utils/hamming.cpp +66 -517
- data/vendor/faiss/faiss/utils/hamming.h +92 -2
- data/vendor/faiss/faiss/utils/hamming_distance/common.h +287 -10
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx2.cpp +16 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx512.cpp +15 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx512_spr.cpp +15 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx2.h +142 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx512.h +210 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx512_spr.h +171 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-generic.h +368 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-neon.h +322 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-rvv.h +39 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer.h +146 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_impl.h +481 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_neon.cpp +15 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_rvv.cpp +15 -0
- data/vendor/faiss/faiss/utils/partitioning.cpp +66 -989
- data/vendor/faiss/faiss/utils/partitioning.h +31 -0
- data/vendor/faiss/faiss/utils/popcount.h +29 -0
- data/vendor/faiss/faiss/utils/pq_code_distance.h +2 -2
- data/vendor/faiss/faiss/utils/prefetch.h +2 -2
- data/vendor/faiss/faiss/utils/quantize_lut.cpp +30 -30
- data/vendor/faiss/faiss/utils/quantize_lut.h +1 -1
- data/vendor/faiss/faiss/utils/rabitq_simd.h +57 -536
- data/vendor/faiss/faiss/utils/random.cpp +6 -6
- data/vendor/faiss/faiss/utils/simd_impl/IVFFlatScanner-inl.h +51 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_aarch64.cpp +5 -1
- data/vendor/faiss/faiss/utils/simd_impl/distances_arm_sve.cpp +213 -4
- data/vendor/faiss/faiss/utils/simd_impl/distances_autovec-inl.h +163 -10
- data/vendor/faiss/faiss/utils/simd_impl/distances_avx2.cpp +250 -4
- data/vendor/faiss/faiss/utils/simd_impl/distances_avx512.cpp +7 -4
- data/vendor/faiss/faiss/utils/simd_impl/distances_rvv.cpp +189 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_simdlib256.h +195 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_sse-inl.h +2 -1
- data/vendor/faiss/faiss/utils/{distances_fused/simdlib_based.h → simd_impl/exhaustive_L2sqr_blas_cmax.h} +5 -10
- data/vendor/faiss/faiss/utils/simd_impl/hamming_impl.h +481 -0
- data/vendor/faiss/faiss/utils/simd_impl/partitioning_avx2.cpp +14 -0
- data/vendor/faiss/faiss/utils/simd_impl/partitioning_neon.cpp +14 -0
- data/vendor/faiss/faiss/utils/simd_impl/partitioning_simdlib256.h +1031 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx2.cpp +355 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx512.cpp +477 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx512_spr.cpp +343 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_neon.cpp +55 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_rvv.cpp +55 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_dispatch.h +32 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels.h +43 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels_avx2.cpp +57 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels_avx512.cpp +45 -0
- data/vendor/faiss/faiss/utils/simd_levels.cpp +29 -7
- data/vendor/faiss/faiss/utils/simd_levels.h +93 -1
- data/vendor/faiss/faiss/utils/sorting.cpp +48 -36
- data/vendor/faiss/faiss/utils/utils.cpp +5 -5
- data/vendor/faiss/faiss/utils/utils.h +3 -3
- metadata +129 -34
- data/vendor/faiss/faiss/impl/RaBitQStats.cpp +0 -29
- data/vendor/faiss/faiss/impl/RaBitQStats.h +0 -56
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +0 -224
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +0 -230
- data/vendor/faiss/faiss/utils/approx_topk/approx_topk.h +0 -84
- data/vendor/faiss/faiss/utils/approx_topk/avx2-inl.h +0 -196
- data/vendor/faiss/faiss/utils/approx_topk/mode.h +0 -34
- data/vendor/faiss/faiss/utils/distances_fused/avx512.h +0 -36
- data/vendor/faiss/faiss/utils/extra_distances-inl.h +0 -235
- data/vendor/faiss/faiss/utils/hamming_distance/avx2-inl.h +0 -462
- data/vendor/faiss/faiss/utils/hamming_distance/avx512-inl.h +0 -490
- data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +0 -449
- data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +0 -87
- data/vendor/faiss/faiss/utils/hamming_distance/neon-inl.h +0 -524
- data/vendor/faiss/faiss/utils/simdlib.h +0 -42
- data/vendor/faiss/faiss/utils/simdlib_avx512.h +0 -365
- /data/ext/faiss/{utils_rb.h → utils.h} +0 -0
|
@@ -17,7 +17,6 @@
|
|
|
17
17
|
#include <faiss/impl/DistanceComputer.h>
|
|
18
18
|
#include <faiss/impl/FaissAssert.h>
|
|
19
19
|
#include <faiss/impl/maybe_owned_vector.h>
|
|
20
|
-
#include <faiss/impl/platform_macros.h>
|
|
21
20
|
#include <faiss/utils/Heap.h>
|
|
22
21
|
#include <faiss/utils/random.h>
|
|
23
22
|
|
|
@@ -26,6 +25,10 @@ namespace faiss {
|
|
|
26
25
|
// Forward declarations to avoid circular dependency.
|
|
27
26
|
struct IndexHNSW;
|
|
28
27
|
struct IndexHNSWFlatPanorama;
|
|
28
|
+
template <class HC_>
|
|
29
|
+
struct MinimaxHeapT;
|
|
30
|
+
using MinimaxHeap = MinimaxHeapT<CMax<float, int32_t>>;
|
|
31
|
+
class LockVector;
|
|
29
32
|
|
|
30
33
|
/** Implementation of the Hierarchical Navigable Small World
|
|
31
34
|
* datastructure.
|
|
@@ -59,56 +62,52 @@ struct HNSW {
|
|
|
59
62
|
/// internal storage of vectors (32 bits: this is expensive)
|
|
60
63
|
using storage_idx_t = int32_t;
|
|
61
64
|
|
|
62
|
-
//
|
|
63
|
-
|
|
65
|
+
// The two comparator flavors HNSW supports. CMax (smaller-is-better)
|
|
66
|
+
// is the default; CMin (larger-is-better) is used when `is_similarity`
|
|
67
|
+
// is set on the owning index.
|
|
68
|
+
using C_distance = CMax<float, int64_t>;
|
|
69
|
+
using C_similarity = CMin<float, int64_t>;
|
|
64
70
|
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
*/
|
|
69
|
-
struct MinimaxHeap {
|
|
70
|
-
int n;
|
|
71
|
-
int k;
|
|
72
|
-
int nvalid;
|
|
73
|
-
|
|
74
|
-
std::vector<storage_idx_t> ids;
|
|
75
|
-
std::vector<float> dis;
|
|
76
|
-
typedef faiss::CMax<float, storage_idx_t> HC;
|
|
77
|
-
|
|
78
|
-
explicit MinimaxHeap(int n) : n(n), k(0), nvalid(0), ids(n), dis(n) {}
|
|
79
|
-
|
|
80
|
-
void push(storage_idx_t i, float v);
|
|
81
|
-
|
|
82
|
-
float max() const;
|
|
83
|
-
|
|
84
|
-
int size() const;
|
|
85
|
-
|
|
86
|
-
void clear();
|
|
71
|
+
// Back-compat alias: keeps `HNSW::C` resolving to the distance
|
|
72
|
+
// (CMax) comparator everywhere the type is referenced directly.
|
|
73
|
+
using C = C_distance;
|
|
87
74
|
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
int count_below(float thresh);
|
|
91
|
-
};
|
|
75
|
+
typedef std::pair<float, storage_idx_t> Node;
|
|
92
76
|
|
|
93
77
|
/// to sort pairs of (id, distance) from nearest to farthest or the reverse
|
|
94
|
-
|
|
78
|
+
template <class CT>
|
|
79
|
+
struct NodeDistCloserT {
|
|
95
80
|
float d;
|
|
96
81
|
int id;
|
|
97
|
-
|
|
98
|
-
bool operator<(const
|
|
99
|
-
|
|
82
|
+
NodeDistCloserT(float d_in, int id_in) : d(d_in), id(id_in) {}
|
|
83
|
+
bool operator<(const NodeDistCloserT& obj1) const {
|
|
84
|
+
// priority_queue keeps the "worst" element at the top so that
|
|
85
|
+
// when the queue is full we can pop it. For CMax (distance) the
|
|
86
|
+
// worst element is the largest d; for CMin (similarity) it is
|
|
87
|
+
// the smallest d. Equivalent to: obj1.d "better than" d.
|
|
88
|
+
return CT::cmp(obj1.d, d);
|
|
100
89
|
}
|
|
101
90
|
};
|
|
102
91
|
|
|
103
|
-
|
|
92
|
+
template <class CT>
|
|
93
|
+
struct NodeDistFartherT {
|
|
104
94
|
float d;
|
|
105
95
|
int id;
|
|
106
|
-
|
|
107
|
-
bool operator<(const
|
|
108
|
-
|
|
96
|
+
NodeDistFartherT(float d_in, int id_in) : d(d_in), id(id_in) {}
|
|
97
|
+
bool operator<(const NodeDistFartherT& obj1) const {
|
|
98
|
+
// priority_queue here keeps the "best" element at the top so we
|
|
99
|
+
// can process the nearest candidate first. For CMax (distance)
|
|
100
|
+
// the best is the smallest d; for CMin (similarity) the best is
|
|
101
|
+
// the largest d. Equivalent to: d "better than" obj1.d.
|
|
102
|
+
return CT::cmp(d, obj1.d);
|
|
109
103
|
}
|
|
110
104
|
};
|
|
111
105
|
|
|
106
|
+
// Back-compat aliases: default to the distance (CMax) comparator so
|
|
107
|
+
// existing call sites that mention `HNSW::NodeDist*` keep working.
|
|
108
|
+
using NodeDistCloser = NodeDistCloserT<C_distance>;
|
|
109
|
+
using NodeDistFarther = NodeDistFartherT<C_distance>;
|
|
110
|
+
|
|
112
111
|
/// assignment probability to each layer (sum=1)
|
|
113
112
|
std::vector<double> assign_probas;
|
|
114
113
|
|
|
@@ -142,6 +141,10 @@ struct HNSW {
|
|
|
142
141
|
/// expansion factor at search time
|
|
143
142
|
int efSearch = 16;
|
|
144
143
|
|
|
144
|
+
/// when pruning, leave room for more neighbors to avoid O(n^2)
|
|
145
|
+
/// costs and lock contention on frequently-pruned nodes.
|
|
146
|
+
float prune_headroom = 0.2f;
|
|
147
|
+
|
|
145
148
|
/// during search: do we check whether the next best distance is good
|
|
146
149
|
/// enough?
|
|
147
150
|
bool check_relative_distance = true;
|
|
@@ -152,6 +155,12 @@ struct HNSW {
|
|
|
152
155
|
/// use Panorama progressive pruning in search
|
|
153
156
|
bool is_panorama = false;
|
|
154
157
|
|
|
158
|
+
/// distance comparison semantics: when true, distances are treated as
|
|
159
|
+
/// similarity scores (larger is better). Default false matches the
|
|
160
|
+
/// historical L2/Hamming behavior (smaller is better).
|
|
161
|
+
/// Not serialized: must be re-set by the owning Index after loading.
|
|
162
|
+
bool is_similarity = false;
|
|
163
|
+
|
|
155
164
|
// See impl/VisitedTable.h.
|
|
156
165
|
std::optional<bool> use_visited_hashset;
|
|
157
166
|
|
|
@@ -191,7 +200,7 @@ struct HNSW {
|
|
|
191
200
|
storage_idx_t nearest,
|
|
192
201
|
float d_nearest,
|
|
193
202
|
int level,
|
|
194
|
-
|
|
203
|
+
LockVector& locks,
|
|
195
204
|
VisitedTable& vt,
|
|
196
205
|
bool keep_max_size_level0 = false);
|
|
197
206
|
|
|
@@ -201,7 +210,7 @@ struct HNSW {
|
|
|
201
210
|
DistanceComputer& ptdis,
|
|
202
211
|
int pt_level,
|
|
203
212
|
int pt_id,
|
|
204
|
-
|
|
213
|
+
LockVector& locks,
|
|
205
214
|
VisitedTable& vt,
|
|
206
215
|
bool keep_max_size_level0 = false);
|
|
207
216
|
|
|
@@ -237,11 +246,12 @@ struct HNSW {
|
|
|
237
246
|
|
|
238
247
|
int prepare_level_tab(size_t n, bool preset_levels = false);
|
|
239
248
|
|
|
249
|
+
template <class C = C_distance>
|
|
240
250
|
static void shrink_neighbor_list(
|
|
241
251
|
DistanceComputer& qdis,
|
|
242
|
-
std::priority_queue<
|
|
243
|
-
std::vector<
|
|
244
|
-
|
|
252
|
+
std::priority_queue<NodeDistFartherT<C>>& input,
|
|
253
|
+
std::vector<NodeDistFartherT<C>>& output,
|
|
254
|
+
size_t max_size,
|
|
245
255
|
bool keep_max_size_level0 = false);
|
|
246
256
|
|
|
247
257
|
void permute_entries(const idx_t* map);
|
|
@@ -271,11 +281,16 @@ struct HNSWStats {
|
|
|
271
281
|
// global var that collects them all
|
|
272
282
|
FAISS_API extern HNSWStats hnsw_stats;
|
|
273
283
|
|
|
284
|
+
/// Internal HNSW algorithm helpers. These are not part of the public API; they
|
|
285
|
+
/// are exposed here only so that unit tests (and a few cross-TU callers such as
|
|
286
|
+
/// the Panorama search variant) can reach them.
|
|
287
|
+
namespace hnsw_detail {
|
|
288
|
+
|
|
274
289
|
int search_from_candidates(
|
|
275
290
|
const HNSW& hnsw,
|
|
276
291
|
DistanceComputer& qdis,
|
|
277
292
|
ResultHandler& res,
|
|
278
|
-
|
|
293
|
+
MinimaxHeap& candidates,
|
|
279
294
|
VisitedTable& vt,
|
|
280
295
|
HNSWStats& stats,
|
|
281
296
|
int level,
|
|
@@ -291,7 +306,7 @@ int search_from_candidates_panorama(
|
|
|
291
306
|
const IndexHNSW* index,
|
|
292
307
|
DistanceComputer& qdis,
|
|
293
308
|
ResultHandler& res,
|
|
294
|
-
|
|
309
|
+
MinimaxHeap& candidates,
|
|
295
310
|
VisitedTable& vt,
|
|
296
311
|
HNSWStats& stats,
|
|
297
312
|
int level,
|
|
@@ -323,4 +338,6 @@ void search_neighbors_to_add(
|
|
|
323
338
|
VisitedTable& vt,
|
|
324
339
|
bool reference_version = false);
|
|
325
340
|
|
|
341
|
+
} // namespace hnsw_detail
|
|
342
|
+
|
|
326
343
|
} // namespace faiss
|
|
@@ -14,8 +14,11 @@ namespace faiss {
|
|
|
14
14
|
* IDSelectorRange
|
|
15
15
|
***********************************************************************/
|
|
16
16
|
|
|
17
|
-
IDSelectorRange::IDSelectorRange(
|
|
18
|
-
|
|
17
|
+
IDSelectorRange::IDSelectorRange(
|
|
18
|
+
idx_t imin_in,
|
|
19
|
+
idx_t imax_in,
|
|
20
|
+
bool assume_sorted_in)
|
|
21
|
+
: imin(imin_in), imax(imax_in), assume_sorted(assume_sorted_in) {}
|
|
19
22
|
|
|
20
23
|
bool IDSelectorRange::is_member(idx_t id) const {
|
|
21
24
|
return id >= imin && id < imax;
|
|
@@ -67,10 +70,11 @@ void IDSelectorRange::find_sorted_ids_bounds(
|
|
|
67
70
|
* IDSelectorArray
|
|
68
71
|
***********************************************************************/
|
|
69
72
|
|
|
70
|
-
IDSelectorArray::IDSelectorArray(size_t
|
|
73
|
+
IDSelectorArray::IDSelectorArray(size_t n_in, const idx_t* ids_in)
|
|
74
|
+
: n(n_in), ids(ids_in) {}
|
|
71
75
|
|
|
72
76
|
bool IDSelectorArray::is_member(idx_t id) const {
|
|
73
|
-
for (
|
|
77
|
+
for (size_t i = 0; i < n; i++) {
|
|
74
78
|
if (ids[i] == id) {
|
|
75
79
|
return true;
|
|
76
80
|
}
|
|
@@ -84,15 +88,15 @@ bool IDSelectorArray::is_member(idx_t id) const {
|
|
|
84
88
|
|
|
85
89
|
IDSelectorBatch::IDSelectorBatch(size_t n, const idx_t* indices) {
|
|
86
90
|
nbits = 0;
|
|
87
|
-
while (n > (
|
|
91
|
+
while (n > (size_t{1} << nbits)) {
|
|
88
92
|
nbits++;
|
|
89
93
|
}
|
|
90
94
|
nbits += 5;
|
|
91
95
|
// for n = 1M, nbits = 25 is optimal, see P56659518
|
|
92
96
|
|
|
93
97
|
mask = ((idx_t)1 << nbits) - 1;
|
|
94
|
-
bloom.resize(
|
|
95
|
-
for (
|
|
98
|
+
bloom.resize(size_t{1} << (nbits - 3), 0);
|
|
99
|
+
for (size_t i = 0; i < n; i++) {
|
|
96
100
|
idx_t id = indices[i];
|
|
97
101
|
set.insert(id);
|
|
98
102
|
id &= mask;
|
|
@@ -101,9 +105,9 @@ IDSelectorBatch::IDSelectorBatch(size_t n, const idx_t* indices) {
|
|
|
101
105
|
}
|
|
102
106
|
|
|
103
107
|
bool IDSelectorBatch::is_member(idx_t i) const {
|
|
104
|
-
|
|
108
|
+
idx_t im = i & mask;
|
|
105
109
|
if (!(bloom[im >> 3] & (1 << (im & 7)))) {
|
|
106
|
-
return
|
|
110
|
+
return false;
|
|
107
111
|
}
|
|
108
112
|
return set.count(i);
|
|
109
113
|
}
|
|
@@ -112,8 +116,8 @@ bool IDSelectorBatch::is_member(idx_t i) const {
|
|
|
112
116
|
* IDSelectorBitmap
|
|
113
117
|
***********************************************************************/
|
|
114
118
|
|
|
115
|
-
IDSelectorBitmap::IDSelectorBitmap(size_t
|
|
116
|
-
: n(
|
|
119
|
+
IDSelectorBitmap::IDSelectorBitmap(size_t n_in, const uint8_t* bitmap_in)
|
|
120
|
+
: n(n_in), bitmap(bitmap_in) {}
|
|
117
121
|
|
|
118
122
|
bool IDSelectorBitmap::is_member(idx_t ii) const {
|
|
119
123
|
uint64_t i = ii;
|
|
@@ -116,7 +116,7 @@ struct IDSelectorBitmap : IDSelector {
|
|
|
116
116
|
/** reverts the membership test of another selector */
|
|
117
117
|
struct IDSelectorNot : IDSelector {
|
|
118
118
|
const IDSelector* sel;
|
|
119
|
-
explicit IDSelectorNot(const IDSelector*
|
|
119
|
+
explicit IDSelectorNot(const IDSelector* sel_) : sel(sel_) {}
|
|
120
120
|
bool is_member(idx_t id) const final {
|
|
121
121
|
return !sel->is_member(id);
|
|
122
122
|
}
|
|
@@ -125,7 +125,7 @@ struct IDSelectorNot : IDSelector {
|
|
|
125
125
|
|
|
126
126
|
/// selects all entries (useful for benchmarking)
|
|
127
127
|
struct IDSelectorAll : IDSelector {
|
|
128
|
-
bool is_member(idx_t id) const final {
|
|
128
|
+
bool is_member(idx_t /* id */) const final {
|
|
129
129
|
return true;
|
|
130
130
|
}
|
|
131
131
|
virtual ~IDSelectorAll() {}
|
|
@@ -136,8 +136,8 @@ struct IDSelectorAll : IDSelector {
|
|
|
136
136
|
struct IDSelectorAnd : IDSelector {
|
|
137
137
|
const IDSelector* lhs;
|
|
138
138
|
const IDSelector* rhs;
|
|
139
|
-
IDSelectorAnd(const IDSelector*
|
|
140
|
-
: lhs(
|
|
139
|
+
IDSelectorAnd(const IDSelector* lhs_, const IDSelector* rhs_)
|
|
140
|
+
: lhs(lhs_), rhs(rhs_) {}
|
|
141
141
|
bool is_member(idx_t id) const final {
|
|
142
142
|
return lhs->is_member(id) && rhs->is_member(id);
|
|
143
143
|
}
|
|
@@ -149,8 +149,8 @@ struct IDSelectorAnd : IDSelector {
|
|
|
149
149
|
struct IDSelectorOr : IDSelector {
|
|
150
150
|
const IDSelector* lhs;
|
|
151
151
|
const IDSelector* rhs;
|
|
152
|
-
IDSelectorOr(const IDSelector*
|
|
153
|
-
: lhs(
|
|
152
|
+
IDSelectorOr(const IDSelector* lhs_, const IDSelector* rhs_)
|
|
153
|
+
: lhs(lhs_), rhs(rhs_) {}
|
|
154
154
|
bool is_member(idx_t id) const final {
|
|
155
155
|
return lhs->is_member(id) || rhs->is_member(id);
|
|
156
156
|
}
|
|
@@ -162,8 +162,8 @@ struct IDSelectorOr : IDSelector {
|
|
|
162
162
|
struct IDSelectorXOr : IDSelector {
|
|
163
163
|
const IDSelector* lhs;
|
|
164
164
|
const IDSelector* rhs;
|
|
165
|
-
IDSelectorXOr(const IDSelector*
|
|
166
|
-
: lhs(
|
|
165
|
+
IDSelectorXOr(const IDSelector* lhs_, const IDSelector* rhs_)
|
|
166
|
+
: lhs(lhs_), rhs(rhs_) {}
|
|
167
167
|
bool is_member(idx_t id) const final {
|
|
168
168
|
return lhs->is_member(id) ^ rhs->is_member(id);
|
|
169
169
|
}
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
#ifndef FAISS_INVERTED_LIST_SCANNER_STATS_H
|
|
9
|
+
#define FAISS_INVERTED_LIST_SCANNER_STATS_H
|
|
10
|
+
|
|
11
|
+
#include <cstddef>
|
|
12
|
+
|
|
13
|
+
namespace faiss {
|
|
14
|
+
|
|
15
|
+
/** Per-list statistics returned by inverted-list scanners. */
|
|
16
|
+
struct InvertedListScannerStats {
|
|
17
|
+
/// Number of distances computed after IDSelector filtering.
|
|
18
|
+
size_t scan_cnt = 0;
|
|
19
|
+
|
|
20
|
+
/// Number of heap updates.
|
|
21
|
+
size_t nheap_updates = 0;
|
|
22
|
+
};
|
|
23
|
+
|
|
24
|
+
} // namespace faiss
|
|
25
|
+
|
|
26
|
+
#endif
|
|
@@ -22,11 +22,11 @@
|
|
|
22
22
|
#include <faiss/utils/distances.h>
|
|
23
23
|
#include <faiss/utils/utils.h>
|
|
24
24
|
|
|
25
|
-
#include <faiss/
|
|
25
|
+
#include <faiss/impl/approx_topk/approx_topk.h>
|
|
26
26
|
|
|
27
27
|
// this is needed for prefetching
|
|
28
28
|
|
|
29
|
-
#ifdef
|
|
29
|
+
#ifdef COMPILE_SIMD_AVX2
|
|
30
30
|
#include <xmmintrin.h>
|
|
31
31
|
#endif
|
|
32
32
|
|
|
@@ -154,12 +154,15 @@ lsq::LSQTimer lsq_timer;
|
|
|
154
154
|
using lsq::LSQTimerScope;
|
|
155
155
|
|
|
156
156
|
LocalSearchQuantizer::LocalSearchQuantizer(
|
|
157
|
-
size_t
|
|
158
|
-
size_t
|
|
159
|
-
size_t
|
|
160
|
-
Search_type_t
|
|
161
|
-
: AdditiveQuantizer(
|
|
162
|
-
|
|
157
|
+
size_t d_in,
|
|
158
|
+
size_t M_in,
|
|
159
|
+
size_t nbits_in,
|
|
160
|
+
Search_type_t search_type_in)
|
|
161
|
+
: AdditiveQuantizer(
|
|
162
|
+
d_in,
|
|
163
|
+
std::vector<size_t>(M_in, nbits_in),
|
|
164
|
+
search_type_in) {
|
|
165
|
+
K = (1 << nbits_in);
|
|
163
166
|
std::srand(random_seed);
|
|
164
167
|
}
|
|
165
168
|
|
|
@@ -170,7 +173,7 @@ LocalSearchQuantizer::~LocalSearchQuantizer() {
|
|
|
170
173
|
LocalSearchQuantizer::LocalSearchQuantizer() : LocalSearchQuantizer(0, 0, 0) {}
|
|
171
174
|
|
|
172
175
|
void LocalSearchQuantizer::train(size_t n, const float* x) {
|
|
173
|
-
FAISS_THROW_IF_NOT(K == (1 << nbits[0]));
|
|
176
|
+
FAISS_THROW_IF_NOT(K == static_cast<size_t>(1 << nbits[0]));
|
|
174
177
|
nperts = std::min(nperts, M);
|
|
175
178
|
|
|
176
179
|
lsq_timer.reset();
|
|
@@ -194,7 +197,7 @@ void LocalSearchQuantizer::train(size_t n, const float* x) {
|
|
|
194
197
|
std::vector<float> stddev(d, 0);
|
|
195
198
|
|
|
196
199
|
#pragma omp parallel for
|
|
197
|
-
for (int64_t i = 0; i < d; i++) {
|
|
200
|
+
for (int64_t i = 0; i < static_cast<int64_t>(d); i++) {
|
|
198
201
|
float mean = 0;
|
|
199
202
|
for (size_t j = 0; j < n; j++) {
|
|
200
203
|
mean += x[j * d + i];
|
|
@@ -362,7 +365,7 @@ void LocalSearchQuantizer::update_codebooks(
|
|
|
362
365
|
}
|
|
363
366
|
|
|
364
367
|
// add a regularization term to B'B
|
|
365
|
-
for (
|
|
368
|
+
for (size_t i = 0; i < M * K; i++) {
|
|
366
369
|
bb[i * (M * K) + i] += lambd;
|
|
367
370
|
}
|
|
368
371
|
|
|
@@ -427,7 +430,7 @@ void LocalSearchQuantizer::update_codebooks(
|
|
|
427
430
|
}
|
|
428
431
|
|
|
429
432
|
// add a regularization term to B'B
|
|
430
|
-
for (
|
|
433
|
+
for (size_t i = 0; i < M * K; i++) {
|
|
431
434
|
bb[i * (M * K) + i] += lambd;
|
|
432
435
|
}
|
|
433
436
|
|
|
@@ -540,7 +543,7 @@ void LocalSearchQuantizer::icm_encode_impl(
|
|
|
540
543
|
std::mt19937& gen,
|
|
541
544
|
size_t n,
|
|
542
545
|
size_t ils_iters,
|
|
543
|
-
bool
|
|
546
|
+
bool verbose_in) const {
|
|
544
547
|
std::vector<float> unaries(n * M * K); // [M, n, K]
|
|
545
548
|
compute_unary_terms(x, unaries.data(), n);
|
|
546
549
|
|
|
@@ -564,7 +567,7 @@ void LocalSearchQuantizer::icm_encode_impl(
|
|
|
564
567
|
|
|
565
568
|
// select the best code for every vector xi
|
|
566
569
|
#pragma omp parallel for reduction(+ : n_betters, mean_obj)
|
|
567
|
-
for (int64_t i = 0; i < n; i++) {
|
|
570
|
+
for (int64_t i = 0; i < static_cast<int64_t>(n); i++) {
|
|
568
571
|
if (icm_objs[i] < best_objs[i]) {
|
|
569
572
|
best_objs[i] = icm_objs[i];
|
|
570
573
|
memcpy(best_codes.data() + i * M,
|
|
@@ -578,7 +581,7 @@ void LocalSearchQuantizer::icm_encode_impl(
|
|
|
578
581
|
|
|
579
582
|
memcpy(codes, best_codes.data(), sizeof(int32_t) * n * M);
|
|
580
583
|
|
|
581
|
-
if (
|
|
584
|
+
if (verbose_in) {
|
|
582
585
|
printf("\tils_iter %zd: obj = %lf, n_betters/n = %zd/%zd\n",
|
|
583
586
|
iter1,
|
|
584
587
|
mean_obj,
|
|
@@ -597,73 +600,75 @@ void LocalSearchQuantizer::icm_encode_step(
|
|
|
597
600
|
FAISS_THROW_IF_NOT(M != 0 && K != 0);
|
|
598
601
|
FAISS_THROW_IF_NOT(binaries != nullptr);
|
|
599
602
|
|
|
603
|
+
// Resolve SIMD level once, not per iteration of the n × n_iters × M loop.
|
|
604
|
+
with_simd_level_256bit([&]<SIMDLevel SL>() {
|
|
600
605
|
#pragma omp parallel for schedule(dynamic)
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
}
|
|
612
|
-
|
|
613
|
-
// compute objective function by adding unary
|
|
614
|
-
// and binary terms together
|
|
615
|
-
for (size_t other_m = 0; other_m < M; other_m++) {
|
|
616
|
-
if (other_m == m) {
|
|
617
|
-
continue;
|
|
606
|
+
for (int64_t i = 0; i < static_cast<int64_t>(n); i++) {
|
|
607
|
+
std::vector<float> objs(K);
|
|
608
|
+
|
|
609
|
+
for (size_t iter = 0; iter < n_iters; iter++) {
|
|
610
|
+
// condition on the m-th subcode
|
|
611
|
+
for (size_t m = 0; m < M; m++) {
|
|
612
|
+
// copy
|
|
613
|
+
auto u = unaries + m * n * K + i * K;
|
|
614
|
+
for (size_t code = 0; code < K; code++) {
|
|
615
|
+
objs[code] = u[code];
|
|
618
616
|
}
|
|
619
617
|
|
|
620
|
-
|
|
621
|
-
//
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
|
|
618
|
+
// compute objective function by adding unary
|
|
619
|
+
// and binary terms together
|
|
620
|
+
for (size_t other_m = 0; other_m < M; other_m++) {
|
|
621
|
+
if (other_m == m) {
|
|
622
|
+
continue;
|
|
623
|
+
}
|
|
624
|
+
|
|
625
|
+
#ifdef COMPILE_SIMD_AVX2
|
|
626
|
+
// TODO: add platform-independent compiler-independent
|
|
627
|
+
// prefetch utilities.
|
|
628
|
+
if (other_m + 1 < M) {
|
|
629
|
+
// do a single prefetch
|
|
630
|
+
int32_t code2 = codes[i * M + other_m + 1];
|
|
631
|
+
// for (int32_t code = 0; code < K; code += 64) {
|
|
632
|
+
int32_t code = 0;
|
|
633
|
+
{
|
|
634
|
+
size_t binary_idx = (other_m + 1) * M * K * K +
|
|
635
|
+
m * K * K + code2 * K + code;
|
|
636
|
+
_mm_prefetch(
|
|
637
|
+
(const char*)(binaries + binary_idx),
|
|
638
|
+
_MM_HINT_T0);
|
|
639
|
+
}
|
|
634
640
|
}
|
|
635
|
-
}
|
|
636
641
|
#endif
|
|
637
642
|
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
|
|
643
|
+
for (size_t code = 0; code < K; code++) {
|
|
644
|
+
int32_t code2 = codes[i * M + other_m];
|
|
645
|
+
size_t binary_idx = other_m * M * K * K +
|
|
646
|
+
m * K * K + code2 * K + code;
|
|
647
|
+
// binaries[m, other_m, code, code2].
|
|
648
|
+
// It is symmetric over (m <-> other_m)
|
|
649
|
+
// and (code <-> code2).
|
|
650
|
+
// So, replace the op with
|
|
651
|
+
// binaries[other_m, m, code2, code].
|
|
652
|
+
objs[code] += binaries[binary_idx];
|
|
653
|
+
}
|
|
648
654
|
}
|
|
649
|
-
}
|
|
650
655
|
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
|
|
656
|
+
// find the optimal value of the m-th subcode
|
|
657
|
+
float best_obj = HUGE_VALF;
|
|
658
|
+
int32_t best_code = 0;
|
|
654
659
|
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
K, objs.data(), 1, &best_obj, &best_code);
|
|
660
|
+
// find one using SIMD. The following operation is similar
|
|
661
|
+
// to the search of the smallest element in objs
|
|
662
|
+
HeapWithBucketsCMaxFloat<16, 1, SL>::addn(
|
|
663
|
+
K, objs.data(), 1, &best_obj, &best_code);
|
|
660
664
|
|
|
661
|
-
|
|
662
|
-
|
|
665
|
+
// done
|
|
666
|
+
codes[i * M + m] = best_code;
|
|
663
667
|
|
|
664
|
-
|
|
668
|
+
} // loop M
|
|
669
|
+
}
|
|
665
670
|
}
|
|
666
|
-
}
|
|
671
|
+
});
|
|
667
672
|
}
|
|
668
673
|
void LocalSearchQuantizer::perturb_codes(
|
|
669
674
|
int32_t* codes,
|
|
@@ -687,7 +692,7 @@ void LocalSearchQuantizer::compute_binary_terms(float* binaries) const {
|
|
|
687
692
|
|
|
688
693
|
with_simd_level([&]<SIMDLevel SL>() {
|
|
689
694
|
#pragma omp parallel for
|
|
690
|
-
for (int64_t m12 = 0; m12 < M * M; m12++) {
|
|
695
|
+
for (int64_t m12 = 0; m12 < static_cast<int64_t>(M * M); m12++) {
|
|
691
696
|
size_t m1 = m12 / M;
|
|
692
697
|
size_t m2 = m12 % M;
|
|
693
698
|
|
|
@@ -744,7 +749,7 @@ void LocalSearchQuantizer::compute_unary_terms(
|
|
|
744
749
|
fvec_norms_L2sqr(norms.data(), codebooks.data(), d, M * K);
|
|
745
750
|
|
|
746
751
|
#pragma omp parallel for
|
|
747
|
-
for (int64_t i = 0; i < n; i++) {
|
|
752
|
+
for (int64_t i = 0; i < static_cast<int64_t>(n); i++) {
|
|
748
753
|
for (size_t m = 0; m < M; m++) {
|
|
749
754
|
float* u = unaries + m * n * K + i * K;
|
|
750
755
|
fvec_add(K, u, norms.data() + m * K, u);
|
|
@@ -766,7 +771,7 @@ float LocalSearchQuantizer::evaluate(
|
|
|
766
771
|
with_simd_level([&]<SIMDLevel SL>() {
|
|
767
772
|
float local_obj = 0.0f;
|
|
768
773
|
#pragma omp parallel for reduction(+ : local_obj)
|
|
769
|
-
for (int64_t i = 0; i < n; i++) {
|
|
774
|
+
for (int64_t i = 0; i < static_cast<int64_t>(n); i++) {
|
|
770
775
|
const auto code = codes + i * M;
|
|
771
776
|
const auto decoded_i = decoded_x.data() + i * d;
|
|
772
777
|
for (size_t m = 0; m < M; m++) {
|
|
@@ -791,8 +796,8 @@ float LocalSearchQuantizer::evaluate(
|
|
|
791
796
|
|
|
792
797
|
namespace lsq {
|
|
793
798
|
|
|
794
|
-
IcmEncoder::IcmEncoder(const LocalSearchQuantizer*
|
|
795
|
-
: verbose(false), lsq(
|
|
799
|
+
IcmEncoder::IcmEncoder(const LocalSearchQuantizer* lsq_in)
|
|
800
|
+
: verbose(false), lsq(lsq_in) {}
|
|
796
801
|
|
|
797
802
|
void IcmEncoder::set_binary_term() {
|
|
798
803
|
auto M = lsq->M;
|
|
@@ -830,8 +835,8 @@ void LSQTimer::reset() {
|
|
|
830
835
|
t.clear();
|
|
831
836
|
}
|
|
832
837
|
|
|
833
|
-
LSQTimerScope::LSQTimerScope(LSQTimer*
|
|
834
|
-
: timer(
|
|
838
|
+
LSQTimerScope::LSQTimerScope(LSQTimer* timer_in, std::string name_in)
|
|
839
|
+
: timer(timer_in), name(std::move(name_in)), finished(false) {
|
|
835
840
|
t0 = getmillisecs();
|
|
836
841
|
}
|
|
837
842
|
|