faiss 0.5.3 → 0.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +12 -0
- data/ext/faiss/ext.cpp +1 -1
- data/ext/faiss/extconf.rb +4 -4
- data/ext/faiss/index.cpp +63 -45
- data/ext/faiss/index_binary.cpp +37 -27
- data/ext/faiss/kmeans.cpp +9 -8
- data/ext/faiss/pca_matrix.cpp +9 -7
- data/ext/faiss/product_quantizer.cpp +13 -11
- data/ext/faiss/utils.cpp +4 -2
- data/ext/faiss/utils.h +4 -0
- data/lib/faiss/version.rb +1 -1
- data/lib/faiss.rb +1 -1
- data/vendor/faiss/faiss/AutoTune.cpp +214 -82
- data/vendor/faiss/faiss/AutoTune.h +14 -1
- data/vendor/faiss/faiss/Clustering.cpp +97 -249
- data/vendor/faiss/faiss/Clustering.h +18 -0
- data/vendor/faiss/faiss/IVFlib.cpp +67 -44
- data/vendor/faiss/faiss/Index.cpp +25 -12
- data/vendor/faiss/faiss/Index.h +26 -4
- data/vendor/faiss/faiss/Index2Layer.cpp +37 -53
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +68 -61
- data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +36 -34
- data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.h +4 -1
- data/vendor/faiss/faiss/IndexBinary.cpp +6 -3
- data/vendor/faiss/faiss/IndexBinary.h +4 -4
- data/vendor/faiss/faiss/IndexBinaryFlat.cpp +1 -1
- data/vendor/faiss/faiss/IndexBinaryFlat.h +1 -1
- data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +4 -4
- data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +92 -95
- data/vendor/faiss/faiss/IndexBinaryHNSW.h +9 -3
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +45 -236
- data/vendor/faiss/faiss/IndexBinaryHash.h +6 -6
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +120 -414
- data/vendor/faiss/faiss/IndexFastScan.cpp +105 -129
- data/vendor/faiss/faiss/IndexFastScan.h +35 -24
- data/vendor/faiss/faiss/IndexFlat.cpp +216 -152
- data/vendor/faiss/faiss/IndexFlat.h +32 -14
- data/vendor/faiss/faiss/IndexFlatCodes.cpp +88 -41
- data/vendor/faiss/faiss/IndexFlatCodes.h +7 -1
- data/vendor/faiss/faiss/IndexHNSW.cpp +299 -187
- data/vendor/faiss/faiss/IndexHNSW.h +30 -14
- data/vendor/faiss/faiss/IndexIDMap.cpp +26 -22
- data/vendor/faiss/faiss/IndexIDMap.h +9 -7
- data/vendor/faiss/faiss/IndexIVF.cpp +535 -405
- data/vendor/faiss/faiss/IndexIVF.h +47 -16
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +77 -74
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +105 -99
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +6 -3
- data/vendor/faiss/faiss/IndexIVFFastScan.cpp +379 -249
- data/vendor/faiss/faiss/IndexIVFFastScan.h +65 -60
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +41 -124
- data/vendor/faiss/faiss/IndexIVFFlat.h +32 -0
- data/vendor/faiss/faiss/IndexIVFFlatPanorama.cpp +89 -138
- data/vendor/faiss/faiss/IndexIVFFlatPanorama.h +3 -1
- data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.cpp +18 -15
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +77 -907
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +184 -122
- data/vendor/faiss/faiss/IndexIVFPQFastScan.h +3 -0
- data/vendor/faiss/faiss/IndexIVFPQR.cpp +23 -18
- data/vendor/faiss/faiss/IndexIVFRaBitQ.cpp +59 -60
- data/vendor/faiss/faiss/IndexIVFRaBitQ.h +4 -3
- data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.cpp +564 -416
- data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.h +269 -111
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +41 -127
- data/vendor/faiss/faiss/IndexIVFSpectralHash.h +1 -1
- data/vendor/faiss/faiss/IndexLSH.cpp +44 -25
- data/vendor/faiss/faiss/IndexLattice.cpp +41 -36
- data/vendor/faiss/faiss/IndexNNDescent.cpp +37 -21
- data/vendor/faiss/faiss/IndexNNDescent.h +2 -2
- data/vendor/faiss/faiss/IndexNSG.cpp +40 -23
- data/vendor/faiss/faiss/IndexNSG.h +0 -2
- data/vendor/faiss/faiss/IndexNeuralNetCodec.cpp +32 -12
- data/vendor/faiss/faiss/IndexPQ.cpp +129 -213
- data/vendor/faiss/faiss/IndexPQ.h +3 -2
- data/vendor/faiss/faiss/IndexPQFastScan.cpp +20 -14
- data/vendor/faiss/faiss/IndexPQFastScan.h +3 -0
- data/vendor/faiss/faiss/IndexPreTransform.cpp +25 -18
- data/vendor/faiss/faiss/IndexPreTransform.h +1 -1
- data/vendor/faiss/faiss/IndexRaBitQ.cpp +31 -43
- data/vendor/faiss/faiss/IndexRaBitQ.h +4 -3
- data/vendor/faiss/faiss/IndexRaBitQFastScan.cpp +135 -317
- data/vendor/faiss/faiss/IndexRaBitQFastScan.h +192 -34
- data/vendor/faiss/faiss/IndexRefine.cpp +30 -55
- data/vendor/faiss/faiss/IndexRefine.h +4 -4
- data/vendor/faiss/faiss/IndexReplicas.cpp +6 -6
- data/vendor/faiss/faiss/IndexRowwiseMinMax.cpp +15 -14
- data/vendor/faiss/faiss/IndexRowwiseMinMax.h +1 -1
- data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +82 -14
- data/vendor/faiss/faiss/IndexShards.cpp +13 -13
- data/vendor/faiss/faiss/IndexShardsIVF.cpp +21 -15
- data/vendor/faiss/faiss/MatrixStats.cpp +5 -4
- data/vendor/faiss/faiss/MetaIndexes.cpp +19 -17
- data/vendor/faiss/faiss/MetaIndexes.h +1 -1
- data/vendor/faiss/faiss/MetricType.h +29 -6
- data/vendor/faiss/faiss/SuperKMeans.cpp +656 -0
- data/vendor/faiss/faiss/SuperKMeans.h +97 -0
- data/vendor/faiss/faiss/VectorTransform.cpp +349 -141
- data/vendor/faiss/faiss/VectorTransform.h +39 -16
- data/vendor/faiss/faiss/build.cpp +23 -0
- data/vendor/faiss/faiss/build.h +15 -0
- data/vendor/faiss/faiss/clone_index.cpp +55 -51
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-avx2-inl.h +47 -47
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +11 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-avx2-inl.h +38 -38
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-inl.h +11 -0
- data/vendor/faiss/faiss/{cppcontrib/factory_tools.cpp → factory_tools.cpp} +6 -1
- data/vendor/faiss/faiss/gpu/GpuCloner.cpp +1 -1
- data/vendor/faiss/faiss/gpu/GpuIndexCagra.h +6 -5
- data/vendor/faiss/faiss/gpu/GpuResources.h +1 -1
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +9 -9
- data/vendor/faiss/faiss/gpu/StandardGpuResources.h +4 -3
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +46 -0
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +56 -0
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +78 -1
- data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +72 -0
- data/vendor/faiss/faiss/gpu/test/TestUtils.h +23 -0
- data/vendor/faiss/faiss/gpu/utils/CuvsFilterConvert.h +1 -1
- data/vendor/faiss/faiss/gpu/utils/CuvsUtils.h +21 -10
- data/vendor/faiss/faiss/gpu_metal/GpuIndexFlat.h +22 -0
- data/vendor/faiss/faiss/gpu_metal/MetalCloner.h +35 -0
- data/vendor/faiss/faiss/gpu_metal/MetalFlatKernels.h +40 -0
- data/vendor/faiss/faiss/gpu_metal/MetalIndex.h +51 -0
- data/vendor/faiss/faiss/gpu_metal/MetalIndexFlat.h +65 -0
- data/vendor/faiss/faiss/gpu_metal/MetalKernels.h +66 -0
- data/vendor/faiss/faiss/gpu_metal/MetalResources.h +79 -0
- data/vendor/faiss/faiss/gpu_metal/StandardMetalResources.h +35 -0
- data/vendor/faiss/faiss/impl/AdSampling.cpp +103 -0
- data/vendor/faiss/faiss/impl/AdSampling.h +35 -0
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +64 -34
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +1 -0
- data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +10 -9
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +3 -28
- data/vendor/faiss/faiss/impl/ClusteringHelpers.cpp +244 -0
- data/vendor/faiss/faiss/impl/ClusteringHelpers.h +94 -0
- data/vendor/faiss/faiss/impl/ClusteringInitialization.cpp +367 -0
- data/vendor/faiss/faiss/impl/ClusteringInitialization.h +107 -0
- data/vendor/faiss/faiss/impl/CodePacker.cpp +7 -3
- data/vendor/faiss/faiss/impl/CodePacker.h +11 -3
- data/vendor/faiss/faiss/impl/CodePackerRaBitQ.cpp +83 -0
- data/vendor/faiss/faiss/impl/CodePackerRaBitQ.h +47 -0
- data/vendor/faiss/faiss/impl/DistanceComputer.h +8 -8
- data/vendor/faiss/faiss/impl/FaissAssert.h +64 -3
- data/vendor/faiss/faiss/impl/FaissException.h +50 -3
- data/vendor/faiss/faiss/impl/HNSW.cpp +117 -351
- data/vendor/faiss/faiss/impl/HNSW.h +21 -40
- data/vendor/faiss/faiss/impl/IDSelector.cpp +15 -11
- data/vendor/faiss/faiss/impl/IDSelector.h +8 -8
- data/vendor/faiss/faiss/impl/InvertedListScannerStats.h +26 -0
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +114 -102
- data/vendor/faiss/faiss/impl/NNDescent.cpp +63 -26
- data/vendor/faiss/faiss/impl/NNDescent.h +6 -2
- data/vendor/faiss/faiss/impl/NSG.cpp +44 -26
- data/vendor/faiss/faiss/impl/NSG.h +20 -10
- data/vendor/faiss/faiss/impl/Panorama.cpp +76 -52
- data/vendor/faiss/faiss/impl/Panorama.h +265 -78
- data/vendor/faiss/faiss/impl/PdxLayout.cpp +93 -0
- data/vendor/faiss/faiss/impl/PdxLayout.h +41 -0
- data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +62 -37
- data/vendor/faiss/faiss/impl/PolysemousTraining.h +3 -3
- data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.cpp +35 -35
- data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +21 -16
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +99 -80
- data/vendor/faiss/faiss/impl/Quantizer.h +2 -2
- data/vendor/faiss/faiss/impl/RaBitQUtils.cpp +135 -37
- data/vendor/faiss/faiss/impl/RaBitQUtils.h +148 -21
- data/vendor/faiss/faiss/impl/RaBitQuantizer.cpp +298 -301
- data/vendor/faiss/faiss/impl/RaBitQuantizer.h +3 -10
- data/vendor/faiss/faiss/impl/RaBitQuantizerMultiBit.cpp +15 -41
- data/vendor/faiss/faiss/impl/RaBitQuantizerMultiBit.h +0 -4
- data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +40 -32
- data/vendor/faiss/faiss/impl/ResidualQuantizer.h +1 -1
- data/vendor/faiss/faiss/impl/ResultHandler.h +218 -113
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +119 -2362
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +27 -3
- data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +14 -11
- data/vendor/faiss/faiss/impl/VisitedTable.cpp +42 -0
- data/vendor/faiss/faiss/impl/VisitedTable.h +76 -0
- data/vendor/faiss/faiss/impl/approx_topk/approx_topk.h +276 -0
- data/vendor/faiss/faiss/impl/approx_topk/avx2.cpp +68 -0
- data/vendor/faiss/faiss/{utils → impl}/approx_topk/generic.h +15 -8
- data/vendor/faiss/faiss/impl/approx_topk/neon.cpp +68 -0
- data/vendor/faiss/faiss/impl/approx_topk/rq_beam_search_tab-inl.h +169 -0
- data/vendor/faiss/faiss/impl/approx_topk/rq_beam_search_tab.h +117 -0
- data/vendor/faiss/faiss/impl/approx_topk/simdlib256-inl.h +146 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryHNSW_impl.h +73 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryHash_impl.h +270 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryIVF_impl.h +460 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexIVFSpectralHash_impl.h +159 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexPQ_impl.h +92 -0
- data/vendor/faiss/faiss/impl/binary_hamming/avx2.cpp +26 -0
- data/vendor/faiss/faiss/impl/binary_hamming/avx512.cpp +26 -0
- data/vendor/faiss/faiss/impl/binary_hamming/dispatch.h +143 -0
- data/vendor/faiss/faiss/impl/binary_hamming/neon.cpp +26 -0
- data/vendor/faiss/faiss/impl/binary_hamming/rvv.cpp +26 -0
- data/vendor/faiss/faiss/impl/expanded_scanners.h +163 -0
- data/vendor/faiss/faiss/impl/{FastScanDistancePostProcessing.h → fast_scan/FastScanDistancePostProcessing.h} +13 -6
- data/vendor/faiss/faiss/impl/{LookupTableScaler.h → fast_scan/LookupTableScaler.h} +16 -5
- data/vendor/faiss/faiss/impl/fast_scan/accumulate_loops.h +237 -0
- data/vendor/faiss/faiss/impl/fast_scan/accumulate_loops_512.h +185 -0
- data/vendor/faiss/faiss/impl/fast_scan/decompose_qbs.h +229 -0
- data/vendor/faiss/faiss/impl/fast_scan/dispatching.h +268 -0
- data/vendor/faiss/faiss/impl/{pq4_fast_scan.cpp → fast_scan/fast_scan.cpp} +176 -4
- data/vendor/faiss/faiss/impl/fast_scan/fast_scan.h +341 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-avx2.cpp +36 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-avx512.cpp +40 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-neon.cpp +120 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-riscv.cpp +104 -0
- data/vendor/faiss/faiss/impl/fast_scan/kernels_simd256.h +213 -0
- data/vendor/faiss/faiss/impl/{pq4_fast_scan_search_qbs.cpp → fast_scan/kernels_simd512.h} +26 -348
- data/vendor/faiss/faiss/impl/fast_scan/rabitq_dispatching.h +90 -0
- data/vendor/faiss/faiss/impl/fast_scan/rabitq_result_handler.h +108 -0
- data/vendor/faiss/faiss/impl/{simd_result_handlers.h → fast_scan/simd_result_handlers.h} +290 -142
- data/vendor/faiss/faiss/impl/hnsw/LockVector.cpp +54 -0
- data/vendor/faiss/faiss/impl/hnsw/LockVector.h +64 -0
- data/vendor/faiss/faiss/impl/hnsw/MinimaxHeap.cpp +91 -0
- data/vendor/faiss/faiss/impl/hnsw/MinimaxHeap.h +64 -0
- data/vendor/faiss/faiss/impl/hnsw/avx2.cpp +104 -0
- data/vendor/faiss/faiss/impl/hnsw/avx512.cpp +111 -0
- data/vendor/faiss/faiss/impl/index_read.cpp +1950 -505
- data/vendor/faiss/faiss/impl/index_read_utils.h +1 -2
- data/vendor/faiss/faiss/impl/index_write.cpp +112 -21
- data/vendor/faiss/faiss/impl/io.cpp +6 -6
- data/vendor/faiss/faiss/impl/io_macros.h +33 -16
- data/vendor/faiss/faiss/impl/kmeans1d.cpp +10 -10
- data/vendor/faiss/faiss/impl/lattice_Zn.cpp +81 -40
- data/vendor/faiss/faiss/impl/lattice_Zn.h +6 -6
- data/vendor/faiss/faiss/impl/mapped_io.cpp +15 -8
- data/vendor/faiss/faiss/impl/platform_macros.h +11 -4
- data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQScanner_impl.h +549 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQ_QueryTables.cpp +245 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQ_QueryTables.h +105 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/PQDistanceComputer_impl.h +106 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/avx2.cpp +21 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/avx512.cpp +21 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/neon.cpp +21 -0
- data/vendor/faiss/faiss/impl/{code_distance/code_distance-avx2.h → pq_code_distance/pq_code_distance-avx2.h} +43 -220
- data/vendor/faiss/faiss/impl/{code_distance/code_distance-avx512.h → pq_code_distance/pq_code_distance-avx512.h} +25 -112
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-generic.cpp +59 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-generic.h +96 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-inl.h +256 -0
- data/vendor/faiss/faiss/impl/{code_distance/code_distance-sve.h → pq_code_distance/pq_code_distance-sve.cpp} +57 -146
- data/vendor/faiss/faiss/impl/pq_code_distance/rvv.cpp +68 -0
- data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.cpp +320 -483
- data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.h +1 -1
- data/vendor/faiss/faiss/impl/scalar_quantizer/codecs.h +121 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/distance_computers.h +137 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/quantizers.h +371 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/scanners.h +190 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/similarities.h +94 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx2.cpp +603 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx512.cpp +597 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-dispatch.h +388 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-neon.cpp +630 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-rvv.cpp +311 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/training.cpp +387 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/training.h +54 -0
- data/vendor/faiss/faiss/impl/simd_dispatch.h +173 -0
- data/vendor/faiss/faiss/impl/simdlib/simdlib.h +57 -0
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_avx2.h +274 -171
- data/vendor/faiss/faiss/impl/simdlib/simdlib_avx512.h +414 -0
- data/vendor/faiss/faiss/impl/simdlib/simdlib_dispatch.h +44 -0
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_emulated.h +231 -166
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_neon.h +275 -217
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_ppc64.h +201 -160
- data/vendor/faiss/faiss/impl/svs_io.cpp +12 -3
- data/vendor/faiss/faiss/impl/svs_io.h +8 -2
- data/vendor/faiss/faiss/index_factory.cpp +115 -28
- data/vendor/faiss/faiss/index_io.h +53 -3
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +73 -20
- data/vendor/faiss/faiss/invlists/DirectMap.cpp +24 -14
- data/vendor/faiss/faiss/invlists/DirectMap.h +4 -3
- data/vendor/faiss/faiss/invlists/InvertedLists.cpp +157 -73
- data/vendor/faiss/faiss/invlists/InvertedLists.h +86 -23
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +4 -4
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +14 -14
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +1 -1
- data/vendor/faiss/faiss/svs/IndexSVSFaissUtils.h +9 -19
- data/vendor/faiss/faiss/svs/IndexSVSFlat.cpp +2 -2
- data/vendor/faiss/faiss/svs/IndexSVSFlat.h +2 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVF.cpp +350 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVF.h +128 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLVQ.cpp +40 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLVQ.h +43 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLeanVec.cpp +225 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLeanVec.h +71 -0
- data/vendor/faiss/faiss/svs/IndexSVSVamana.cpp +25 -1
- data/vendor/faiss/faiss/svs/IndexSVSVamana.h +19 -2
- data/vendor/faiss/faiss/svs/IndexSVSVamanaLVQ.h +1 -1
- data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.cpp +19 -2
- data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.h +14 -0
- data/vendor/faiss/faiss/utils/Heap.cpp +56 -10
- data/vendor/faiss/faiss/utils/Heap.h +21 -0
- data/vendor/faiss/faiss/utils/NeuralNet.cpp +54 -40
- data/vendor/faiss/faiss/utils/NeuralNet.h +1 -1
- data/vendor/faiss/faiss/utils/approx_topk_hamming/approx_topk_hamming.h +10 -4
- data/vendor/faiss/faiss/utils/distances.cpp +507 -559
- data/vendor/faiss/faiss/utils/distances.h +118 -1
- data/vendor/faiss/faiss/utils/distances_dispatch.h +250 -0
- data/vendor/faiss/faiss/utils/distances_fused/avx512.cpp +8 -7
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +33 -14
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.h +12 -1
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +16 -293
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based_neon.cpp +57 -0
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_kernel-inl.h +290 -0
- data/vendor/faiss/faiss/utils/distances_simd.cpp +72 -3681
- data/vendor/faiss/faiss/utils/extra_distances.cpp +60 -102
- data/vendor/faiss/faiss/utils/extra_distances.h +79 -7
- data/vendor/faiss/faiss/utils/hamming-inl.h +13 -11
- data/vendor/faiss/faiss/utils/hamming.cpp +66 -517
- data/vendor/faiss/faiss/utils/hamming.h +92 -2
- data/vendor/faiss/faiss/utils/hamming_distance/common.h +287 -10
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx2.cpp +15 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx512.cpp +15 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx2.h +142 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx512.h +234 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-generic.h +368 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-neon.h +322 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-rvv.h +39 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer.h +146 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_impl.h +481 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_neon.cpp +15 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_rvv.cpp +15 -0
- data/vendor/faiss/faiss/utils/partitioning.cpp +66 -987
- data/vendor/faiss/faiss/utils/partitioning.h +31 -0
- data/vendor/faiss/faiss/utils/popcount.h +29 -0
- data/vendor/faiss/faiss/utils/pq_code_distance.h +251 -0
- data/vendor/faiss/faiss/utils/prefetch.h +2 -2
- data/vendor/faiss/faiss/utils/quantize_lut.cpp +30 -30
- data/vendor/faiss/faiss/utils/quantize_lut.h +1 -1
- data/vendor/faiss/faiss/utils/rabitq_simd.h +124 -343
- data/vendor/faiss/faiss/utils/random.cpp +6 -6
- data/vendor/faiss/faiss/utils/simd_impl/IVFFlatScanner-inl.h +51 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_aarch64.cpp +154 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_arm_sve.cpp +777 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_autovec-inl.h +306 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_avx2.cpp +1431 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_avx512.cpp +1095 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_rvv.cpp +189 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_simdlib256.h +195 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_sse-inl.h +392 -0
- data/vendor/faiss/faiss/utils/{distances_fused/simdlib_based.h → simd_impl/exhaustive_L2sqr_blas_cmax.h} +5 -10
- data/vendor/faiss/faiss/utils/simd_impl/hamming_impl.h +481 -0
- data/vendor/faiss/faiss/utils/simd_impl/partitioning_avx2.cpp +14 -0
- data/vendor/faiss/faiss/utils/simd_impl/partitioning_neon.cpp +14 -0
- data/vendor/faiss/faiss/utils/simd_impl/partitioning_simdlib256.h +1085 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx2.cpp +355 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx512.cpp +477 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_neon.cpp +55 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_rvv.cpp +55 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_dispatch.h +32 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels.h +43 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels_avx2.cpp +57 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels_avx512.cpp +45 -0
- data/vendor/faiss/faiss/utils/simd_levels.cpp +334 -0
- data/vendor/faiss/faiss/utils/simd_levels.h +183 -0
- data/vendor/faiss/faiss/utils/sorting.cpp +48 -36
- data/vendor/faiss/faiss/utils/utils.cpp +21 -14
- data/vendor/faiss/faiss/utils/utils.h +3 -3
- metadata +156 -42
- data/vendor/faiss/faiss/impl/RaBitQStats.cpp +0 -29
- data/vendor/faiss/faiss/impl/RaBitQStats.h +0 -56
- data/vendor/faiss/faiss/impl/code_distance/code_distance-generic.h +0 -81
- data/vendor/faiss/faiss/impl/code_distance/code_distance.h +0 -186
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +0 -216
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +0 -224
- data/vendor/faiss/faiss/utils/approx_topk/approx_topk.h +0 -84
- data/vendor/faiss/faiss/utils/approx_topk/avx2-inl.h +0 -196
- data/vendor/faiss/faiss/utils/approx_topk/mode.h +0 -34
- data/vendor/faiss/faiss/utils/distances_fused/avx512.h +0 -36
- data/vendor/faiss/faiss/utils/extra_distances-inl.h +0 -228
- data/vendor/faiss/faiss/utils/hamming_distance/avx2-inl.h +0 -462
- data/vendor/faiss/faiss/utils/hamming_distance/avx512-inl.h +0 -490
- data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +0 -450
- data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +0 -87
- data/vendor/faiss/faiss/utils/hamming_distance/neon-inl.h +0 -524
- data/vendor/faiss/faiss/utils/simdlib.h +0 -42
- data/vendor/faiss/faiss/utils/simdlib_avx512.h +0 -296
- /data/vendor/faiss/faiss/{cppcontrib/factory_tools.h → factory_tools.h} +0 -0
|
@@ -9,417 +9,198 @@
|
|
|
9
9
|
|
|
10
10
|
#include <cstddef>
|
|
11
11
|
#include <cstdint>
|
|
12
|
+
#include <cstring>
|
|
12
13
|
|
|
13
|
-
|
|
14
|
-
#
|
|
15
|
-
defined(_M_IX86)
|
|
16
|
-
#include <immintrin.h>
|
|
17
|
-
#endif // defined(__x86_64__) || defined(_M_X64) || defined(__i386__) ||
|
|
14
|
+
#include <faiss/utils/popcount.h>
|
|
15
|
+
#include <faiss/utils/simd_levels.h>
|
|
18
16
|
|
|
19
17
|
namespace faiss::rabitq {
|
|
20
18
|
|
|
21
|
-
#if defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || \
|
|
22
|
-
defined(_M_IX86)
|
|
23
19
|
/**
|
|
24
|
-
*
|
|
25
|
-
* This table is used for lookup-based popcount implementation.
|
|
20
|
+
* Compute dot product between query and binary data using popcount on AND.
|
|
26
21
|
*
|
|
27
|
-
*
|
|
28
|
-
*
|
|
29
|
-
* @
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
inline __m512i get_lookup_512() {
|
|
33
|
-
return _mm512_set_epi8(
|
|
34
|
-
/* f */ 4,
|
|
35
|
-
/* e */ 3,
|
|
36
|
-
/* d */ 3,
|
|
37
|
-
/* c */ 2,
|
|
38
|
-
/* b */ 3,
|
|
39
|
-
/* a */ 2,
|
|
40
|
-
/* 9 */ 2,
|
|
41
|
-
/* 8 */ 1,
|
|
42
|
-
/* 7 */ 3,
|
|
43
|
-
/* 6 */ 2,
|
|
44
|
-
/* 5 */ 2,
|
|
45
|
-
/* 4 */ 1,
|
|
46
|
-
/* 3 */ 2,
|
|
47
|
-
/* 2 */ 1,
|
|
48
|
-
/* 1 */ 1,
|
|
49
|
-
/* 0 */ 0,
|
|
50
|
-
/* f */ 4,
|
|
51
|
-
/* e */ 3,
|
|
52
|
-
/* d */ 3,
|
|
53
|
-
/* c */ 2,
|
|
54
|
-
/* b */ 3,
|
|
55
|
-
/* a */ 2,
|
|
56
|
-
/* 9 */ 2,
|
|
57
|
-
/* 8 */ 1,
|
|
58
|
-
/* 7 */ 3,
|
|
59
|
-
/* 6 */ 2,
|
|
60
|
-
/* 5 */ 2,
|
|
61
|
-
/* 4 */ 1,
|
|
62
|
-
/* 3 */ 2,
|
|
63
|
-
/* 2 */ 1,
|
|
64
|
-
/* 1 */ 1,
|
|
65
|
-
/* 0 */ 0,
|
|
66
|
-
/* f */ 4,
|
|
67
|
-
/* e */ 3,
|
|
68
|
-
/* d */ 3,
|
|
69
|
-
/* c */ 2,
|
|
70
|
-
/* b */ 3,
|
|
71
|
-
/* a */ 2,
|
|
72
|
-
/* 9 */ 2,
|
|
73
|
-
/* 8 */ 1,
|
|
74
|
-
/* 7 */ 3,
|
|
75
|
-
/* 6 */ 2,
|
|
76
|
-
/* 5 */ 2,
|
|
77
|
-
/* 4 */ 1,
|
|
78
|
-
/* 3 */ 2,
|
|
79
|
-
/* 2 */ 1,
|
|
80
|
-
/* 1 */ 1,
|
|
81
|
-
/* 0 */ 0,
|
|
82
|
-
/* f */ 4,
|
|
83
|
-
/* e */ 3,
|
|
84
|
-
/* d */ 3,
|
|
85
|
-
/* c */ 2,
|
|
86
|
-
/* b */ 3,
|
|
87
|
-
/* a */ 2,
|
|
88
|
-
/* 9 */ 2,
|
|
89
|
-
/* 8 */ 1,
|
|
90
|
-
/* 7 */ 3,
|
|
91
|
-
/* 6 */ 2,
|
|
92
|
-
/* 5 */ 2,
|
|
93
|
-
/* 4 */ 1,
|
|
94
|
-
/* 3 */ 2,
|
|
95
|
-
/* 2 */ 1,
|
|
96
|
-
/* 1 */ 1,
|
|
97
|
-
/* 0 */ 0);
|
|
98
|
-
}
|
|
99
|
-
#endif // defined(__AVX512F__)
|
|
100
|
-
#if defined(__AVX2__)
|
|
101
|
-
/**
|
|
102
|
-
* Returns the lookup table for AVX2 popcount operations.
|
|
103
|
-
* This table is used for lookup-based popcount implementation.
|
|
104
|
-
*
|
|
105
|
-
* @return Lookup table as __m256i register
|
|
22
|
+
* @param query Pointer to rearranged rotated query data
|
|
23
|
+
* @param data Pointer to binary data
|
|
24
|
+
* @param size Size in bytes
|
|
25
|
+
* @param qb Number of quantization bits
|
|
26
|
+
* @return Unsigned integer dot product
|
|
106
27
|
*/
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
/* 4 */ 1,
|
|
114
|
-
/* 5 */ 2,
|
|
115
|
-
/* 6 */ 2,
|
|
116
|
-
/* 7 */ 3,
|
|
117
|
-
/* 8 */ 1,
|
|
118
|
-
/* 9 */ 2,
|
|
119
|
-
/* a */ 2,
|
|
120
|
-
/* b */ 3,
|
|
121
|
-
/* c */ 2,
|
|
122
|
-
/* d */ 3,
|
|
123
|
-
/* e */ 3,
|
|
124
|
-
/* f */ 4,
|
|
125
|
-
/* 0 */ 0,
|
|
126
|
-
/* 1 */ 1,
|
|
127
|
-
/* 2 */ 1,
|
|
128
|
-
/* 3 */ 2,
|
|
129
|
-
/* 4 */ 1,
|
|
130
|
-
/* 5 */ 2,
|
|
131
|
-
/* 6 */ 2,
|
|
132
|
-
/* 7 */ 3,
|
|
133
|
-
/* 8 */ 1,
|
|
134
|
-
/* 9 */ 2,
|
|
135
|
-
/* a */ 2,
|
|
136
|
-
/* b */ 3,
|
|
137
|
-
/* c */ 2,
|
|
138
|
-
/* d */ 3,
|
|
139
|
-
/* e */ 3,
|
|
140
|
-
/* f */ 4);
|
|
141
|
-
}
|
|
142
|
-
#endif // defined(__AVX2__)
|
|
28
|
+
template <SIMDLevel SL = SINGLE_SIMD_LEVEL>
|
|
29
|
+
uint64_t bitwise_and_dot_product(
|
|
30
|
+
const uint8_t* query,
|
|
31
|
+
const uint8_t* data,
|
|
32
|
+
size_t size,
|
|
33
|
+
size_t qb);
|
|
143
34
|
|
|
144
|
-
#if defined(__AVX512F__)
|
|
145
35
|
/**
|
|
146
|
-
*
|
|
36
|
+
* Compute dot product between query and binary data using popcount on XOR.
|
|
147
37
|
*
|
|
148
|
-
* @param
|
|
149
|
-
* @
|
|
38
|
+
* @param query Pointer to rearranged rotated query data
|
|
39
|
+
* @param data Pointer to binary data
|
|
40
|
+
* @param size Size in bytes
|
|
41
|
+
* @param qb Number of quantization bits
|
|
42
|
+
* @return Unsigned integer dot product
|
|
150
43
|
*/
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
const __m512i lo = _mm512_and_si512(v, low_mask);
|
|
159
|
-
const __m512i hi = _mm512_and_si512(_mm512_srli_epi16(v, 4), low_mask);
|
|
160
|
-
const __m512i popcnt_lo = _mm512_shuffle_epi8(lookup, lo);
|
|
161
|
-
const __m512i popcnt_hi = _mm512_shuffle_epi8(lookup, hi);
|
|
162
|
-
const __m512i popcnt = _mm512_add_epi8(popcnt_lo, popcnt_hi);
|
|
163
|
-
return _mm512_sad_epu8(_mm512_setzero_si512(), popcnt);
|
|
164
|
-
#endif // defined(__AVX512VPOPCNTDQ__)
|
|
165
|
-
}
|
|
166
|
-
#endif // defined(__AVX512F__)
|
|
44
|
+
template <SIMDLevel SL = SINGLE_SIMD_LEVEL>
|
|
45
|
+
uint64_t bitwise_xor_dot_product(
|
|
46
|
+
const uint8_t* query,
|
|
47
|
+
const uint8_t* data,
|
|
48
|
+
size_t size,
|
|
49
|
+
size_t qb);
|
|
167
50
|
|
|
168
|
-
#if defined(__AVX2__)
|
|
169
51
|
/**
|
|
170
|
-
*
|
|
52
|
+
* Count total set bits in data.
|
|
171
53
|
*
|
|
172
|
-
* @param
|
|
173
|
-
* @
|
|
54
|
+
* @param data Pointer to binary data
|
|
55
|
+
* @param size Size in bytes
|
|
56
|
+
* @return Total popcount
|
|
174
57
|
*/
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
const __m256i low_mask = _mm256_set1_epi8(0x0f);
|
|
178
|
-
|
|
179
|
-
const __m256i lo = _mm256_and_si256(v, low_mask);
|
|
180
|
-
const __m256i hi = _mm256_and_si256(_mm256_srli_epi16(v, 4), low_mask);
|
|
181
|
-
const __m256i popcnt_lo = _mm256_shuffle_epi8(lookup, lo);
|
|
182
|
-
const __m256i popcnt_hi = _mm256_shuffle_epi8(lookup, hi);
|
|
183
|
-
const __m256i popcnt = _mm256_add_epi8(popcnt_lo, popcnt_hi);
|
|
184
|
-
// Reduce uint8_t[32] into uint64_t[4] by addition.
|
|
185
|
-
return _mm256_sad_epu8(_mm256_setzero_si256(), popcnt);
|
|
186
|
-
}
|
|
187
|
-
|
|
188
|
-
inline uint64_t reduce_add_256(__m256i v) {
|
|
189
|
-
alignas(32) uint64_t lanes[4];
|
|
190
|
-
_mm256_store_si256((__m256i*)lanes, v);
|
|
191
|
-
return lanes[0] + lanes[1] + lanes[2] + lanes[3];
|
|
192
|
-
}
|
|
193
|
-
#endif // defined(__AVX2__)
|
|
194
|
-
|
|
195
|
-
#if defined(__SSE4_1__)
|
|
196
|
-
inline __m128i popcount_128(__m128i v) {
|
|
197
|
-
// Scalar popcount for each 64-bit lane
|
|
198
|
-
uint64_t lane0 = _mm_extract_epi64(v, 0);
|
|
199
|
-
uint64_t lane1 = _mm_extract_epi64(v, 1);
|
|
200
|
-
uint64_t pop0 = __builtin_popcountll(lane0);
|
|
201
|
-
uint64_t pop1 = __builtin_popcountll(lane1);
|
|
202
|
-
return _mm_set_epi64x(pop1, pop0);
|
|
203
|
-
}
|
|
58
|
+
template <SIMDLevel SL = SINGLE_SIMD_LEVEL>
|
|
59
|
+
uint64_t popcount(const uint8_t* data, size_t size);
|
|
204
60
|
|
|
205
|
-
|
|
206
|
-
alignas(16) uint64_t lanes[2];
|
|
207
|
-
_mm_store_si128((__m128i*)lanes, v);
|
|
208
|
-
return lanes[0] + lanes[1];
|
|
209
|
-
}
|
|
210
|
-
#endif // defined(__SSE4_1__)
|
|
211
|
-
#endif // defined(__x86_64__) || defined(_M_X64) || defined(__i386__) ||
|
|
61
|
+
// NONE specializations — scalar fallbacks
|
|
212
62
|
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
*
|
|
216
|
-
* @param query Pointer to rearranged rotated query data
|
|
217
|
-
* @param data Pointer to binary data
|
|
218
|
-
* @param d Dimension
|
|
219
|
-
* @param qb Number of quantization bits
|
|
220
|
-
* @return Unsigned integer dot product
|
|
221
|
-
*/
|
|
222
|
-
inline uint64_t bitwise_and_dot_product(
|
|
63
|
+
template <>
|
|
64
|
+
inline uint64_t bitwise_and_dot_product<SIMDLevel::NONE>(
|
|
223
65
|
const uint8_t* query,
|
|
224
66
|
const uint8_t* data,
|
|
225
67
|
size_t size,
|
|
226
68
|
size_t qb) {
|
|
227
69
|
uint64_t sum = 0;
|
|
228
70
|
size_t offset = 0;
|
|
229
|
-
#if defined(__AVX512F__)
|
|
230
|
-
// Handle 512-bit chunks.
|
|
231
|
-
if (size_t step = 512 / 8; offset + step <= size) {
|
|
232
|
-
__m512i sum_512 = _mm512_setzero_si512();
|
|
233
|
-
for (; offset + step <= size; offset += step) {
|
|
234
|
-
__m512i v_x = _mm512_loadu_si512((const __m512i*)(data + offset));
|
|
235
|
-
for (int j = 0; j < qb; j++) {
|
|
236
|
-
__m512i v_q = _mm512_loadu_si512(
|
|
237
|
-
(const __m512i*)(query + j * size + offset));
|
|
238
|
-
__m512i v_and = _mm512_and_si512(v_q, v_x);
|
|
239
|
-
__m512i v_popcnt = popcount_512(v_and);
|
|
240
|
-
__m512i v_shifted = _mm512_slli_epi64(v_popcnt, j);
|
|
241
|
-
sum_512 = _mm512_add_epi64(sum_512, v_shifted);
|
|
242
|
-
}
|
|
243
|
-
}
|
|
244
|
-
sum += _mm512_reduce_add_epi64(sum_512);
|
|
245
|
-
}
|
|
246
|
-
#endif // defined(__AVX512F__)
|
|
247
|
-
#if defined(__AVX2__)
|
|
248
|
-
if (size_t step = 256 / 8; offset + step <= size) {
|
|
249
|
-
__m256i sum_256 = _mm256_setzero_si256();
|
|
250
|
-
for (; offset + step <= size; offset += step) {
|
|
251
|
-
__m256i v_x = _mm256_loadu_si256((const __m256i*)(data + offset));
|
|
252
|
-
for (int j = 0; j < qb; j++) {
|
|
253
|
-
__m256i v_q = _mm256_loadu_si256(
|
|
254
|
-
(const __m256i*)(query + j * size + offset));
|
|
255
|
-
__m256i v_and = _mm256_and_si256(v_q, v_x);
|
|
256
|
-
__m256i v_popcnt = popcount_256(v_and);
|
|
257
|
-
__m256i v_shifted = _mm256_slli_epi64(v_popcnt, j);
|
|
258
|
-
sum_256 = _mm256_add_epi64(sum_256, v_shifted);
|
|
259
|
-
}
|
|
260
|
-
}
|
|
261
|
-
sum += reduce_add_256(sum_256);
|
|
262
|
-
}
|
|
263
|
-
#endif // defined(__AVX2__)
|
|
264
|
-
#if defined(__SSE4_1__)
|
|
265
|
-
__m128i sum_128 = _mm_setzero_si128();
|
|
266
|
-
for (size_t step = 128 / 8; offset + step <= size; offset += step) {
|
|
267
|
-
__m128i v_x = _mm_loadu_si128((const __m128i*)(data + offset));
|
|
268
|
-
for (int j = 0; j < qb; j++) {
|
|
269
|
-
__m128i v_q = _mm_loadu_si128(
|
|
270
|
-
(const __m128i*)(query + j * size + offset));
|
|
271
|
-
__m128i v_and = _mm_and_si128(v_q, v_x);
|
|
272
|
-
__m128i v_popcnt = popcount_128(v_and);
|
|
273
|
-
__m128i v_shifted = _mm_slli_epi64(v_popcnt, j);
|
|
274
|
-
sum_128 = _mm_add_epi64(sum_128, v_shifted);
|
|
275
|
-
}
|
|
276
|
-
}
|
|
277
|
-
sum += reduce_add_128(sum_128);
|
|
278
|
-
#endif // defined(__SSE4_1__)
|
|
279
71
|
for (size_t step = 64 / 8; offset + step <= size; offset += step) {
|
|
280
72
|
const auto yv = *(const uint64_t*)(data + offset);
|
|
281
73
|
for (int j = 0; j < qb; j++) {
|
|
282
74
|
const auto qv = *(const uint64_t*)(query + j * size + offset);
|
|
283
|
-
sum +=
|
|
75
|
+
sum += popcount64(qv & yv) << j;
|
|
284
76
|
}
|
|
285
77
|
}
|
|
286
78
|
for (; offset < size; ++offset) {
|
|
287
79
|
const auto yv = *(data + offset);
|
|
288
80
|
for (int j = 0; j < qb; j++) {
|
|
289
81
|
const auto qv = *(query + j * size + offset);
|
|
290
|
-
sum +=
|
|
82
|
+
sum += popcount32(qv & yv) << j;
|
|
291
83
|
}
|
|
292
84
|
}
|
|
293
85
|
return sum;
|
|
294
86
|
}
|
|
295
87
|
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
*
|
|
299
|
-
* @param query Pointer to rearranged rotated query data
|
|
300
|
-
* @param data Pointer to binary data
|
|
301
|
-
* @param d Dimension
|
|
302
|
-
* @param qb Number of quantization bits
|
|
303
|
-
* @return Unsigned integer dot product
|
|
304
|
-
*/
|
|
305
|
-
inline uint64_t bitwise_xor_dot_product(
|
|
88
|
+
template <>
|
|
89
|
+
inline uint64_t bitwise_xor_dot_product<SIMDLevel::NONE>(
|
|
306
90
|
const uint8_t* query,
|
|
307
91
|
const uint8_t* data,
|
|
308
92
|
size_t size,
|
|
309
93
|
size_t qb) {
|
|
310
94
|
uint64_t sum = 0;
|
|
311
95
|
size_t offset = 0;
|
|
312
|
-
#if defined(__AVX512F__)
|
|
313
|
-
// Handle 512-bit chunks.
|
|
314
|
-
if (size_t step = 512 / 8; offset + step <= size) {
|
|
315
|
-
__m512i sum_512 = _mm512_setzero_si512();
|
|
316
|
-
for (; offset + step <= size; offset += step) {
|
|
317
|
-
__m512i v_x = _mm512_loadu_si512((const __m512i*)(data + offset));
|
|
318
|
-
for (int j = 0; j < qb; j++) {
|
|
319
|
-
__m512i v_q = _mm512_loadu_si512(
|
|
320
|
-
(const __m512i*)(query + j * size + offset));
|
|
321
|
-
__m512i v_xor = _mm512_xor_si512(v_q, v_x);
|
|
322
|
-
__m512i v_popcnt = popcount_512(v_xor);
|
|
323
|
-
__m512i v_shifted = _mm512_slli_epi64(v_popcnt, j);
|
|
324
|
-
sum_512 = _mm512_add_epi64(sum_512, v_shifted);
|
|
325
|
-
}
|
|
326
|
-
}
|
|
327
|
-
sum += _mm512_reduce_add_epi64(sum_512);
|
|
328
|
-
}
|
|
329
|
-
#endif
|
|
330
|
-
#if defined(__AVX2__)
|
|
331
|
-
if (size_t step = 256 / 8; offset + step <= size) {
|
|
332
|
-
__m256i sum_256 = _mm256_setzero_si256();
|
|
333
|
-
for (; offset + step <= size; offset += step) {
|
|
334
|
-
__m256i v_x = _mm256_loadu_si256((const __m256i*)(data + offset));
|
|
335
|
-
for (int j = 0; j < qb; j++) {
|
|
336
|
-
__m256i v_q = _mm256_loadu_si256(
|
|
337
|
-
(const __m256i*)(query + j * size + offset));
|
|
338
|
-
__m256i v_xor = _mm256_xor_si256(v_q, v_x);
|
|
339
|
-
__m256i v_popcnt = popcount_256(v_xor);
|
|
340
|
-
__m256i v_shifted = _mm256_slli_epi64(v_popcnt, j);
|
|
341
|
-
sum_256 = _mm256_add_epi64(sum_256, v_shifted);
|
|
342
|
-
}
|
|
343
|
-
}
|
|
344
|
-
sum += reduce_add_256(sum_256);
|
|
345
|
-
}
|
|
346
|
-
#endif
|
|
347
|
-
#if defined(__SSE4_1__)
|
|
348
|
-
__m128i sum_128 = _mm_setzero_si128();
|
|
349
|
-
for (size_t step = 128 / 8; offset + step <= size; offset += step) {
|
|
350
|
-
__m128i v_x = _mm_loadu_si128((const __m128i*)(data + offset));
|
|
351
|
-
for (int j = 0; j < qb; j++) {
|
|
352
|
-
__m128i v_q = _mm_loadu_si128(
|
|
353
|
-
(const __m128i*)(query + j * size + offset));
|
|
354
|
-
__m128i v_xor = _mm_xor_si128(v_q, v_x);
|
|
355
|
-
__m128i v_popcnt = popcount_128(v_xor);
|
|
356
|
-
__m128i v_shifted = _mm_slli_epi64(v_popcnt, j);
|
|
357
|
-
sum_128 = _mm_add_epi64(sum_128, v_shifted);
|
|
358
|
-
}
|
|
359
|
-
}
|
|
360
|
-
sum += reduce_add_128(sum_128);
|
|
361
|
-
#endif
|
|
362
96
|
for (size_t step = 64 / 8; offset + step <= size; offset += step) {
|
|
363
97
|
const auto yv = *(const uint64_t*)(data + offset);
|
|
364
98
|
for (int j = 0; j < qb; j++) {
|
|
365
99
|
const auto qv = *(const uint64_t*)(query + j * size + offset);
|
|
366
|
-
sum +=
|
|
100
|
+
sum += popcount64(qv ^ yv) << j;
|
|
367
101
|
}
|
|
368
102
|
}
|
|
369
103
|
for (; offset < size; ++offset) {
|
|
370
104
|
const auto yv = *(data + offset);
|
|
371
105
|
for (int j = 0; j < qb; j++) {
|
|
372
106
|
const auto qv = *(query + j * size + offset);
|
|
373
|
-
sum +=
|
|
107
|
+
sum += popcount32(qv ^ yv) << j;
|
|
374
108
|
}
|
|
375
109
|
}
|
|
376
110
|
return sum;
|
|
377
111
|
}
|
|
378
112
|
|
|
379
|
-
|
|
113
|
+
template <>
|
|
114
|
+
inline uint64_t popcount<SIMDLevel::NONE>(const uint8_t* data, size_t size) {
|
|
380
115
|
uint64_t sum = 0;
|
|
381
116
|
size_t offset = 0;
|
|
382
|
-
#if defined(__AVX512F__)
|
|
383
|
-
// Handle 512-bit chunks.
|
|
384
|
-
if (offset + 512 / 8 <= size) {
|
|
385
|
-
__m512i sum_512 = _mm512_setzero_si512();
|
|
386
|
-
for (size_t end; (end = offset + 512 / 8) <= size; offset = end) {
|
|
387
|
-
__m512i v_x = _mm512_loadu_si512((const __m512i*)(data + offset));
|
|
388
|
-
__m512i v_popcnt = popcount_512(v_x);
|
|
389
|
-
sum_512 = _mm512_add_epi64(sum_512, v_popcnt);
|
|
390
|
-
}
|
|
391
|
-
sum += _mm512_reduce_add_epi64(sum_512);
|
|
392
|
-
}
|
|
393
|
-
#endif // defined(__AVX512F__)
|
|
394
|
-
#if defined(__AVX2__)
|
|
395
|
-
if (offset + 256 / 8 <= size) {
|
|
396
|
-
__m256i sum_256 = _mm256_setzero_si256();
|
|
397
|
-
for (size_t end; (end = offset + 256 / 8) <= size; offset = end) {
|
|
398
|
-
__m256i v_x = _mm256_loadu_si256((const __m256i*)(data + offset));
|
|
399
|
-
__m256i v_popcnt = popcount_256(v_x);
|
|
400
|
-
sum_256 = _mm256_add_epi64(sum_256, v_popcnt);
|
|
401
|
-
}
|
|
402
|
-
sum += reduce_add_256(sum_256);
|
|
403
|
-
}
|
|
404
|
-
#endif // defined(__AVX2__)
|
|
405
|
-
#if defined(__SSE4_1__)
|
|
406
|
-
__m128i sum_128 = _mm_setzero_si128();
|
|
407
|
-
for (size_t step = 128 / 8; offset + step <= size; offset += step) {
|
|
408
|
-
__m128i v_x = _mm_loadu_si128((const __m128i*)(data + offset));
|
|
409
|
-
sum_128 = _mm_add_epi64(sum_128, popcount_128(v_x));
|
|
410
|
-
}
|
|
411
|
-
sum += reduce_add_128(sum_128);
|
|
412
|
-
#endif // defined(__SSE4_1__)
|
|
413
|
-
|
|
414
117
|
for (size_t step = 64 / 8; offset + step <= size; offset += step) {
|
|
415
118
|
const auto yv = *(const uint64_t*)(data + offset);
|
|
416
|
-
sum +=
|
|
119
|
+
sum += popcount64(yv);
|
|
417
120
|
}
|
|
418
121
|
for (; offset < size; ++offset) {
|
|
419
122
|
const auto yv = *(data + offset);
|
|
420
|
-
sum +=
|
|
123
|
+
sum += popcount32(yv);
|
|
421
124
|
}
|
|
422
125
|
return sum;
|
|
423
126
|
}
|
|
424
127
|
|
|
425
128
|
} // namespace faiss::rabitq
|
|
129
|
+
|
|
130
|
+
/*********************************************************
|
|
131
|
+
* Multi-bit RaBitQ inner product kernels.
|
|
132
|
+
*
|
|
133
|
+
* Compute: sum_i rotated_q[i] * ((sign_bit_i << ex_bits) + ex_code_val_i + cb)
|
|
134
|
+
*
|
|
135
|
+
* Strategy:
|
|
136
|
+
* ex_bits == 1: Specialized kernel — both sign_bits and ex_code are
|
|
137
|
+
* 1-bit-per-dim packed, enabling direct bit→mask→float
|
|
138
|
+
* conversion with zero per-element extraction.
|
|
139
|
+
* ex_bits >= 2: Bit-plane decomposition (BMI2 required) — PEXT extracts
|
|
140
|
+
* each bit plane in one instruction, then the same
|
|
141
|
+
* bit→mask→float kernel computes each plane's dot product.
|
|
142
|
+
* Fallback: Scalar extraction via 64-bit window read + shift + mask.
|
|
143
|
+
*********************************************************/
|
|
144
|
+
namespace faiss::rabitq::multibit {
|
|
145
|
+
|
|
146
|
+
/// Scalar inner product for multi-bit RaBitQ.
|
|
147
|
+
/// Extracts each code value in O(1) via 64-bit window read + shift + mask.
|
|
148
|
+
/// Also serves as the tail handler for SIMD kernels via the @p start parameter.
|
|
149
|
+
inline float ip_scalar(
|
|
150
|
+
const uint8_t* __restrict sign_bits,
|
|
151
|
+
const uint8_t* __restrict ex_code,
|
|
152
|
+
const float* __restrict rotated_q,
|
|
153
|
+
size_t start,
|
|
154
|
+
size_t d,
|
|
155
|
+
size_t ex_bits,
|
|
156
|
+
float cb) {
|
|
157
|
+
float result = 0.0f;
|
|
158
|
+
const int sign_shift = static_cast<int>(ex_bits);
|
|
159
|
+
const uint64_t code_mask = (1ULL << ex_bits) - 1;
|
|
160
|
+
for (size_t i = start; i < d; i++) {
|
|
161
|
+
int sb = (sign_bits[i / 8] >> (i % 8)) & 1;
|
|
162
|
+
size_t bit_pos = i * ex_bits;
|
|
163
|
+
size_t byte_idx = bit_pos / 8;
|
|
164
|
+
size_t bit_offset = bit_pos % 8;
|
|
165
|
+
uint64_t raw = 0;
|
|
166
|
+
memcpy(&raw, ex_code + byte_idx, sizeof(uint64_t));
|
|
167
|
+
int ex_val = static_cast<int>((raw >> bit_offset) & code_mask);
|
|
168
|
+
result += rotated_q[i] *
|
|
169
|
+
(static_cast<float>((sb << sign_shift) + ex_val) + cb);
|
|
170
|
+
}
|
|
171
|
+
return result;
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
/**
|
|
175
|
+
* Dispatch to the best available kernel for the given ex_bits.
|
|
176
|
+
*
|
|
177
|
+
* @param sign_bits packed sign bits (1 bit/dim, standard byte packing)
|
|
178
|
+
* @param ex_code packed extra-bit codes (ex_bits bits/dim)
|
|
179
|
+
* @param rotated_q rotated query vector (float[d])
|
|
180
|
+
* @param d dimensionality
|
|
181
|
+
* @param ex_bits number of extra bits per dimension (nb_bits - 1)
|
|
182
|
+
* @param cb constant bias: -(2^ex_bits - 0.5)
|
|
183
|
+
* @return inner product value
|
|
184
|
+
*/
|
|
185
|
+
template <SIMDLevel SL = SINGLE_SIMD_LEVEL>
|
|
186
|
+
float compute_inner_product(
|
|
187
|
+
const uint8_t* __restrict sign_bits,
|
|
188
|
+
const uint8_t* __restrict ex_code,
|
|
189
|
+
const float* __restrict rotated_q,
|
|
190
|
+
size_t d,
|
|
191
|
+
size_t ex_bits,
|
|
192
|
+
float cb);
|
|
193
|
+
|
|
194
|
+
// NONE specialization — pure scalar
|
|
195
|
+
template <>
|
|
196
|
+
inline float compute_inner_product<SIMDLevel::NONE>(
|
|
197
|
+
const uint8_t* __restrict sign_bits,
|
|
198
|
+
const uint8_t* __restrict ex_code,
|
|
199
|
+
const float* __restrict rotated_q,
|
|
200
|
+
size_t d,
|
|
201
|
+
size_t ex_bits,
|
|
202
|
+
float cb) {
|
|
203
|
+
return ip_scalar(sign_bits, ex_code, rotated_q, 0, d, ex_bits, cb);
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
} // namespace faiss::rabitq::multibit
|
|
@@ -100,7 +100,7 @@ void float_rand(float* x, size_t n, int64_t seed) {
|
|
|
100
100
|
int a0 = rng0.rand_int(), b0 = rng0.rand_int();
|
|
101
101
|
|
|
102
102
|
#pragma omp parallel for
|
|
103
|
-
for (int64_t j = 0; j < nblock; j++) {
|
|
103
|
+
for (int64_t j = 0; j < static_cast<int64_t>(nblock); j++) {
|
|
104
104
|
RandomGenerator rng(a0 + j * b0);
|
|
105
105
|
|
|
106
106
|
const size_t istart = j * n / nblock;
|
|
@@ -120,7 +120,7 @@ void float_randn(float* x, size_t n, int64_t seed) {
|
|
|
120
120
|
int a0 = rng0.rand_int(), b0 = rng0.rand_int();
|
|
121
121
|
|
|
122
122
|
#pragma omp parallel for
|
|
123
|
-
for (int64_t j = 0; j < nblock; j++) {
|
|
123
|
+
for (int64_t j = 0; j < static_cast<int64_t>(nblock); j++) {
|
|
124
124
|
RandomGenerator rng(a0 + j * b0);
|
|
125
125
|
|
|
126
126
|
double a = 0, b = 0, s = 0;
|
|
@@ -155,7 +155,7 @@ void int64_rand(int64_t* x, size_t n, int64_t seed) {
|
|
|
155
155
|
int a0 = rng0.rand_int(), b0 = rng0.rand_int();
|
|
156
156
|
|
|
157
157
|
#pragma omp parallel for
|
|
158
|
-
for (int64_t j = 0; j < nblock; j++) {
|
|
158
|
+
for (int64_t j = 0; j < static_cast<int64_t>(nblock); j++) {
|
|
159
159
|
RandomGenerator rng(a0 + j * b0);
|
|
160
160
|
|
|
161
161
|
const size_t istart = j * n / nblock;
|
|
@@ -174,7 +174,7 @@ void int64_rand_max(int64_t* x, size_t n, uint64_t max, int64_t seed) {
|
|
|
174
174
|
int a0 = rng0.rand_int(), b0 = rng0.rand_int();
|
|
175
175
|
|
|
176
176
|
#pragma omp parallel for
|
|
177
|
-
for (int64_t j = 0; j < nblock; j++) {
|
|
177
|
+
for (int64_t j = 0; j < static_cast<int64_t>(nblock); j++) {
|
|
178
178
|
RandomGenerator rng(a0 + j * b0);
|
|
179
179
|
|
|
180
180
|
const size_t istart = j * n / nblock;
|
|
@@ -219,7 +219,7 @@ void byte_rand(uint8_t* x, size_t n, int64_t seed) {
|
|
|
219
219
|
int a0 = rng0.rand_int(), b0 = rng0.rand_int();
|
|
220
220
|
|
|
221
221
|
#pragma omp parallel for
|
|
222
|
-
for (int64_t j = 0; j < nblock; j++) {
|
|
222
|
+
for (int64_t j = 0; j < static_cast<int64_t>(nblock); j++) {
|
|
223
223
|
RandomGenerator rng(a0 + j * b0);
|
|
224
224
|
|
|
225
225
|
const size_t istart = j * n / nblock;
|
|
@@ -261,7 +261,7 @@ void rand_smooth_vectors(size_t n, size_t d, float* x, int64_t seed) {
|
|
|
261
261
|
float_rand(scales.data(), d, seed + 2);
|
|
262
262
|
|
|
263
263
|
#pragma omp parallel for if (n * d > 10000)
|
|
264
|
-
for (int64_t i = 0; i < n; i++) {
|
|
264
|
+
for (int64_t i = 0; i < static_cast<int64_t>(n); i++) {
|
|
265
265
|
for (size_t j = 0; j < d; j++) {
|
|
266
266
|
x[i * d + j] = sinf(x[i * d + j] * (scales[j] * 4 + 0.1));
|
|
267
267
|
}
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
#pragma once
|
|
9
|
+
|
|
10
|
+
#include <faiss/IndexIVFFlat.h>
|
|
11
|
+
#include <faiss/impl/expanded_scanners.h>
|
|
12
|
+
|
|
13
|
+
#ifndef THE_SIMD_LEVEL
|
|
14
|
+
#error "THE_SIMD_LEVEL not defined"
|
|
15
|
+
#endif
|
|
16
|
+
|
|
17
|
+
namespace faiss {
|
|
18
|
+
|
|
19
|
+
constexpr faiss::SIMDLevel THE_SL = THE_SIMD_LEVEL;
|
|
20
|
+
|
|
21
|
+
#define DEFINE_IVFFLAT_SCANNER_METHODS(mt) \
|
|
22
|
+
template <> \
|
|
23
|
+
float IVFFlatScanner<VectorDistance<mt, THE_SL>>::distance_to_code( \
|
|
24
|
+
const uint8_t* code) const { \
|
|
25
|
+
const float* yj = (float*)code; \
|
|
26
|
+
return vd(xi, yj); \
|
|
27
|
+
} \
|
|
28
|
+
template <> \
|
|
29
|
+
size_t IVFFlatScanner<VectorDistance<mt, THE_SL>>::scan_codes( \
|
|
30
|
+
size_t list_size, \
|
|
31
|
+
const uint8_t* codes, \
|
|
32
|
+
const idx_t* ids, \
|
|
33
|
+
ResultHandler& handler) const { \
|
|
34
|
+
return run_scan_codes_fix_C<C>(*this, list_size, codes, ids, handler); \
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
DEFINE_IVFFLAT_SCANNER_METHODS(METRIC_L2)
|
|
38
|
+
DEFINE_IVFFLAT_SCANNER_METHODS(METRIC_INNER_PRODUCT)
|
|
39
|
+
DEFINE_IVFFLAT_SCANNER_METHODS(METRIC_L1)
|
|
40
|
+
DEFINE_IVFFLAT_SCANNER_METHODS(METRIC_Linf)
|
|
41
|
+
DEFINE_IVFFLAT_SCANNER_METHODS(METRIC_Lp)
|
|
42
|
+
DEFINE_IVFFLAT_SCANNER_METHODS(METRIC_Canberra)
|
|
43
|
+
DEFINE_IVFFLAT_SCANNER_METHODS(METRIC_BrayCurtis)
|
|
44
|
+
DEFINE_IVFFLAT_SCANNER_METHODS(METRIC_JensenShannon)
|
|
45
|
+
DEFINE_IVFFLAT_SCANNER_METHODS(METRIC_Jaccard)
|
|
46
|
+
DEFINE_IVFFLAT_SCANNER_METHODS(METRIC_NaNEuclidean)
|
|
47
|
+
DEFINE_IVFFLAT_SCANNER_METHODS(METRIC_GOWER)
|
|
48
|
+
|
|
49
|
+
#undef DEFINE_IVFFLAT_SCANNER_METHODS
|
|
50
|
+
|
|
51
|
+
} // namespace faiss
|