faiss 0.5.3 → 0.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +12 -0
- data/ext/faiss/ext.cpp +1 -1
- data/ext/faiss/extconf.rb +4 -4
- data/ext/faiss/index.cpp +63 -45
- data/ext/faiss/index_binary.cpp +37 -27
- data/ext/faiss/kmeans.cpp +9 -8
- data/ext/faiss/pca_matrix.cpp +9 -7
- data/ext/faiss/product_quantizer.cpp +13 -11
- data/ext/faiss/utils.cpp +4 -2
- data/ext/faiss/utils.h +4 -0
- data/lib/faiss/version.rb +1 -1
- data/lib/faiss.rb +1 -1
- data/vendor/faiss/faiss/AutoTune.cpp +214 -82
- data/vendor/faiss/faiss/AutoTune.h +14 -1
- data/vendor/faiss/faiss/Clustering.cpp +97 -249
- data/vendor/faiss/faiss/Clustering.h +18 -0
- data/vendor/faiss/faiss/IVFlib.cpp +67 -44
- data/vendor/faiss/faiss/Index.cpp +25 -12
- data/vendor/faiss/faiss/Index.h +26 -4
- data/vendor/faiss/faiss/Index2Layer.cpp +37 -53
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +68 -61
- data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +36 -34
- data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.h +4 -1
- data/vendor/faiss/faiss/IndexBinary.cpp +6 -3
- data/vendor/faiss/faiss/IndexBinary.h +4 -4
- data/vendor/faiss/faiss/IndexBinaryFlat.cpp +1 -1
- data/vendor/faiss/faiss/IndexBinaryFlat.h +1 -1
- data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +4 -4
- data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +92 -95
- data/vendor/faiss/faiss/IndexBinaryHNSW.h +9 -3
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +45 -236
- data/vendor/faiss/faiss/IndexBinaryHash.h +6 -6
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +120 -414
- data/vendor/faiss/faiss/IndexFastScan.cpp +105 -129
- data/vendor/faiss/faiss/IndexFastScan.h +35 -24
- data/vendor/faiss/faiss/IndexFlat.cpp +216 -152
- data/vendor/faiss/faiss/IndexFlat.h +32 -14
- data/vendor/faiss/faiss/IndexFlatCodes.cpp +88 -41
- data/vendor/faiss/faiss/IndexFlatCodes.h +7 -1
- data/vendor/faiss/faiss/IndexHNSW.cpp +299 -187
- data/vendor/faiss/faiss/IndexHNSW.h +30 -14
- data/vendor/faiss/faiss/IndexIDMap.cpp +26 -22
- data/vendor/faiss/faiss/IndexIDMap.h +9 -7
- data/vendor/faiss/faiss/IndexIVF.cpp +535 -405
- data/vendor/faiss/faiss/IndexIVF.h +47 -16
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +77 -74
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +105 -99
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +6 -3
- data/vendor/faiss/faiss/IndexIVFFastScan.cpp +379 -249
- data/vendor/faiss/faiss/IndexIVFFastScan.h +65 -60
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +41 -124
- data/vendor/faiss/faiss/IndexIVFFlat.h +32 -0
- data/vendor/faiss/faiss/IndexIVFFlatPanorama.cpp +89 -138
- data/vendor/faiss/faiss/IndexIVFFlatPanorama.h +3 -1
- data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.cpp +18 -15
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +77 -907
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +184 -122
- data/vendor/faiss/faiss/IndexIVFPQFastScan.h +3 -0
- data/vendor/faiss/faiss/IndexIVFPQR.cpp +23 -18
- data/vendor/faiss/faiss/IndexIVFRaBitQ.cpp +59 -60
- data/vendor/faiss/faiss/IndexIVFRaBitQ.h +4 -3
- data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.cpp +564 -416
- data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.h +269 -111
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +41 -127
- data/vendor/faiss/faiss/IndexIVFSpectralHash.h +1 -1
- data/vendor/faiss/faiss/IndexLSH.cpp +44 -25
- data/vendor/faiss/faiss/IndexLattice.cpp +41 -36
- data/vendor/faiss/faiss/IndexNNDescent.cpp +37 -21
- data/vendor/faiss/faiss/IndexNNDescent.h +2 -2
- data/vendor/faiss/faiss/IndexNSG.cpp +40 -23
- data/vendor/faiss/faiss/IndexNSG.h +0 -2
- data/vendor/faiss/faiss/IndexNeuralNetCodec.cpp +32 -12
- data/vendor/faiss/faiss/IndexPQ.cpp +129 -213
- data/vendor/faiss/faiss/IndexPQ.h +3 -2
- data/vendor/faiss/faiss/IndexPQFastScan.cpp +20 -14
- data/vendor/faiss/faiss/IndexPQFastScan.h +3 -0
- data/vendor/faiss/faiss/IndexPreTransform.cpp +25 -18
- data/vendor/faiss/faiss/IndexPreTransform.h +1 -1
- data/vendor/faiss/faiss/IndexRaBitQ.cpp +31 -43
- data/vendor/faiss/faiss/IndexRaBitQ.h +4 -3
- data/vendor/faiss/faiss/IndexRaBitQFastScan.cpp +135 -317
- data/vendor/faiss/faiss/IndexRaBitQFastScan.h +192 -34
- data/vendor/faiss/faiss/IndexRefine.cpp +30 -55
- data/vendor/faiss/faiss/IndexRefine.h +4 -4
- data/vendor/faiss/faiss/IndexReplicas.cpp +6 -6
- data/vendor/faiss/faiss/IndexRowwiseMinMax.cpp +15 -14
- data/vendor/faiss/faiss/IndexRowwiseMinMax.h +1 -1
- data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +82 -14
- data/vendor/faiss/faiss/IndexShards.cpp +13 -13
- data/vendor/faiss/faiss/IndexShardsIVF.cpp +21 -15
- data/vendor/faiss/faiss/MatrixStats.cpp +5 -4
- data/vendor/faiss/faiss/MetaIndexes.cpp +19 -17
- data/vendor/faiss/faiss/MetaIndexes.h +1 -1
- data/vendor/faiss/faiss/MetricType.h +29 -6
- data/vendor/faiss/faiss/SuperKMeans.cpp +656 -0
- data/vendor/faiss/faiss/SuperKMeans.h +97 -0
- data/vendor/faiss/faiss/VectorTransform.cpp +349 -141
- data/vendor/faiss/faiss/VectorTransform.h +39 -16
- data/vendor/faiss/faiss/build.cpp +23 -0
- data/vendor/faiss/faiss/build.h +15 -0
- data/vendor/faiss/faiss/clone_index.cpp +55 -51
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-avx2-inl.h +47 -47
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +11 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-avx2-inl.h +38 -38
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-inl.h +11 -0
- data/vendor/faiss/faiss/{cppcontrib/factory_tools.cpp → factory_tools.cpp} +6 -1
- data/vendor/faiss/faiss/gpu/GpuCloner.cpp +1 -1
- data/vendor/faiss/faiss/gpu/GpuIndexCagra.h +6 -5
- data/vendor/faiss/faiss/gpu/GpuResources.h +1 -1
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +9 -9
- data/vendor/faiss/faiss/gpu/StandardGpuResources.h +4 -3
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +46 -0
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +56 -0
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +78 -1
- data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +72 -0
- data/vendor/faiss/faiss/gpu/test/TestUtils.h +23 -0
- data/vendor/faiss/faiss/gpu/utils/CuvsFilterConvert.h +1 -1
- data/vendor/faiss/faiss/gpu/utils/CuvsUtils.h +21 -10
- data/vendor/faiss/faiss/gpu_metal/GpuIndexFlat.h +22 -0
- data/vendor/faiss/faiss/gpu_metal/MetalCloner.h +35 -0
- data/vendor/faiss/faiss/gpu_metal/MetalFlatKernels.h +40 -0
- data/vendor/faiss/faiss/gpu_metal/MetalIndex.h +51 -0
- data/vendor/faiss/faiss/gpu_metal/MetalIndexFlat.h +65 -0
- data/vendor/faiss/faiss/gpu_metal/MetalKernels.h +66 -0
- data/vendor/faiss/faiss/gpu_metal/MetalResources.h +79 -0
- data/vendor/faiss/faiss/gpu_metal/StandardMetalResources.h +35 -0
- data/vendor/faiss/faiss/impl/AdSampling.cpp +103 -0
- data/vendor/faiss/faiss/impl/AdSampling.h +35 -0
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +64 -34
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +1 -0
- data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +10 -9
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +3 -28
- data/vendor/faiss/faiss/impl/ClusteringHelpers.cpp +244 -0
- data/vendor/faiss/faiss/impl/ClusteringHelpers.h +94 -0
- data/vendor/faiss/faiss/impl/ClusteringInitialization.cpp +367 -0
- data/vendor/faiss/faiss/impl/ClusteringInitialization.h +107 -0
- data/vendor/faiss/faiss/impl/CodePacker.cpp +7 -3
- data/vendor/faiss/faiss/impl/CodePacker.h +11 -3
- data/vendor/faiss/faiss/impl/CodePackerRaBitQ.cpp +83 -0
- data/vendor/faiss/faiss/impl/CodePackerRaBitQ.h +47 -0
- data/vendor/faiss/faiss/impl/DistanceComputer.h +8 -8
- data/vendor/faiss/faiss/impl/FaissAssert.h +64 -3
- data/vendor/faiss/faiss/impl/FaissException.h +50 -3
- data/vendor/faiss/faiss/impl/HNSW.cpp +117 -351
- data/vendor/faiss/faiss/impl/HNSW.h +21 -40
- data/vendor/faiss/faiss/impl/IDSelector.cpp +15 -11
- data/vendor/faiss/faiss/impl/IDSelector.h +8 -8
- data/vendor/faiss/faiss/impl/InvertedListScannerStats.h +26 -0
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +114 -102
- data/vendor/faiss/faiss/impl/NNDescent.cpp +63 -26
- data/vendor/faiss/faiss/impl/NNDescent.h +6 -2
- data/vendor/faiss/faiss/impl/NSG.cpp +44 -26
- data/vendor/faiss/faiss/impl/NSG.h +20 -10
- data/vendor/faiss/faiss/impl/Panorama.cpp +76 -52
- data/vendor/faiss/faiss/impl/Panorama.h +265 -78
- data/vendor/faiss/faiss/impl/PdxLayout.cpp +93 -0
- data/vendor/faiss/faiss/impl/PdxLayout.h +41 -0
- data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +62 -37
- data/vendor/faiss/faiss/impl/PolysemousTraining.h +3 -3
- data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.cpp +35 -35
- data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +21 -16
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +99 -80
- data/vendor/faiss/faiss/impl/Quantizer.h +2 -2
- data/vendor/faiss/faiss/impl/RaBitQUtils.cpp +135 -37
- data/vendor/faiss/faiss/impl/RaBitQUtils.h +148 -21
- data/vendor/faiss/faiss/impl/RaBitQuantizer.cpp +298 -301
- data/vendor/faiss/faiss/impl/RaBitQuantizer.h +3 -10
- data/vendor/faiss/faiss/impl/RaBitQuantizerMultiBit.cpp +15 -41
- data/vendor/faiss/faiss/impl/RaBitQuantizerMultiBit.h +0 -4
- data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +40 -32
- data/vendor/faiss/faiss/impl/ResidualQuantizer.h +1 -1
- data/vendor/faiss/faiss/impl/ResultHandler.h +218 -113
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +119 -2362
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +27 -3
- data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +14 -11
- data/vendor/faiss/faiss/impl/VisitedTable.cpp +42 -0
- data/vendor/faiss/faiss/impl/VisitedTable.h +76 -0
- data/vendor/faiss/faiss/impl/approx_topk/approx_topk.h +276 -0
- data/vendor/faiss/faiss/impl/approx_topk/avx2.cpp +68 -0
- data/vendor/faiss/faiss/{utils → impl}/approx_topk/generic.h +15 -8
- data/vendor/faiss/faiss/impl/approx_topk/neon.cpp +68 -0
- data/vendor/faiss/faiss/impl/approx_topk/rq_beam_search_tab-inl.h +169 -0
- data/vendor/faiss/faiss/impl/approx_topk/rq_beam_search_tab.h +117 -0
- data/vendor/faiss/faiss/impl/approx_topk/simdlib256-inl.h +146 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryHNSW_impl.h +73 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryHash_impl.h +270 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryIVF_impl.h +460 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexIVFSpectralHash_impl.h +159 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexPQ_impl.h +92 -0
- data/vendor/faiss/faiss/impl/binary_hamming/avx2.cpp +26 -0
- data/vendor/faiss/faiss/impl/binary_hamming/avx512.cpp +26 -0
- data/vendor/faiss/faiss/impl/binary_hamming/dispatch.h +143 -0
- data/vendor/faiss/faiss/impl/binary_hamming/neon.cpp +26 -0
- data/vendor/faiss/faiss/impl/binary_hamming/rvv.cpp +26 -0
- data/vendor/faiss/faiss/impl/expanded_scanners.h +163 -0
- data/vendor/faiss/faiss/impl/{FastScanDistancePostProcessing.h → fast_scan/FastScanDistancePostProcessing.h} +13 -6
- data/vendor/faiss/faiss/impl/{LookupTableScaler.h → fast_scan/LookupTableScaler.h} +16 -5
- data/vendor/faiss/faiss/impl/fast_scan/accumulate_loops.h +237 -0
- data/vendor/faiss/faiss/impl/fast_scan/accumulate_loops_512.h +185 -0
- data/vendor/faiss/faiss/impl/fast_scan/decompose_qbs.h +229 -0
- data/vendor/faiss/faiss/impl/fast_scan/dispatching.h +268 -0
- data/vendor/faiss/faiss/impl/{pq4_fast_scan.cpp → fast_scan/fast_scan.cpp} +176 -4
- data/vendor/faiss/faiss/impl/fast_scan/fast_scan.h +341 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-avx2.cpp +36 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-avx512.cpp +40 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-neon.cpp +120 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-riscv.cpp +104 -0
- data/vendor/faiss/faiss/impl/fast_scan/kernels_simd256.h +213 -0
- data/vendor/faiss/faiss/impl/{pq4_fast_scan_search_qbs.cpp → fast_scan/kernels_simd512.h} +26 -348
- data/vendor/faiss/faiss/impl/fast_scan/rabitq_dispatching.h +90 -0
- data/vendor/faiss/faiss/impl/fast_scan/rabitq_result_handler.h +108 -0
- data/vendor/faiss/faiss/impl/{simd_result_handlers.h → fast_scan/simd_result_handlers.h} +290 -142
- data/vendor/faiss/faiss/impl/hnsw/LockVector.cpp +54 -0
- data/vendor/faiss/faiss/impl/hnsw/LockVector.h +64 -0
- data/vendor/faiss/faiss/impl/hnsw/MinimaxHeap.cpp +91 -0
- data/vendor/faiss/faiss/impl/hnsw/MinimaxHeap.h +64 -0
- data/vendor/faiss/faiss/impl/hnsw/avx2.cpp +104 -0
- data/vendor/faiss/faiss/impl/hnsw/avx512.cpp +111 -0
- data/vendor/faiss/faiss/impl/index_read.cpp +1950 -505
- data/vendor/faiss/faiss/impl/index_read_utils.h +1 -2
- data/vendor/faiss/faiss/impl/index_write.cpp +112 -21
- data/vendor/faiss/faiss/impl/io.cpp +6 -6
- data/vendor/faiss/faiss/impl/io_macros.h +33 -16
- data/vendor/faiss/faiss/impl/kmeans1d.cpp +10 -10
- data/vendor/faiss/faiss/impl/lattice_Zn.cpp +81 -40
- data/vendor/faiss/faiss/impl/lattice_Zn.h +6 -6
- data/vendor/faiss/faiss/impl/mapped_io.cpp +15 -8
- data/vendor/faiss/faiss/impl/platform_macros.h +11 -4
- data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQScanner_impl.h +549 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQ_QueryTables.cpp +245 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQ_QueryTables.h +105 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/PQDistanceComputer_impl.h +106 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/avx2.cpp +21 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/avx512.cpp +21 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/neon.cpp +21 -0
- data/vendor/faiss/faiss/impl/{code_distance/code_distance-avx2.h → pq_code_distance/pq_code_distance-avx2.h} +43 -220
- data/vendor/faiss/faiss/impl/{code_distance/code_distance-avx512.h → pq_code_distance/pq_code_distance-avx512.h} +25 -112
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-generic.cpp +59 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-generic.h +96 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-inl.h +256 -0
- data/vendor/faiss/faiss/impl/{code_distance/code_distance-sve.h → pq_code_distance/pq_code_distance-sve.cpp} +57 -146
- data/vendor/faiss/faiss/impl/pq_code_distance/rvv.cpp +68 -0
- data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.cpp +320 -483
- data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.h +1 -1
- data/vendor/faiss/faiss/impl/scalar_quantizer/codecs.h +121 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/distance_computers.h +137 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/quantizers.h +371 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/scanners.h +190 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/similarities.h +94 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx2.cpp +603 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx512.cpp +597 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-dispatch.h +388 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-neon.cpp +630 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-rvv.cpp +311 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/training.cpp +387 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/training.h +54 -0
- data/vendor/faiss/faiss/impl/simd_dispatch.h +173 -0
- data/vendor/faiss/faiss/impl/simdlib/simdlib.h +57 -0
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_avx2.h +274 -171
- data/vendor/faiss/faiss/impl/simdlib/simdlib_avx512.h +414 -0
- data/vendor/faiss/faiss/impl/simdlib/simdlib_dispatch.h +44 -0
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_emulated.h +231 -166
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_neon.h +275 -217
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_ppc64.h +201 -160
- data/vendor/faiss/faiss/impl/svs_io.cpp +12 -3
- data/vendor/faiss/faiss/impl/svs_io.h +8 -2
- data/vendor/faiss/faiss/index_factory.cpp +115 -28
- data/vendor/faiss/faiss/index_io.h +53 -3
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +73 -20
- data/vendor/faiss/faiss/invlists/DirectMap.cpp +24 -14
- data/vendor/faiss/faiss/invlists/DirectMap.h +4 -3
- data/vendor/faiss/faiss/invlists/InvertedLists.cpp +157 -73
- data/vendor/faiss/faiss/invlists/InvertedLists.h +86 -23
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +4 -4
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +14 -14
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +1 -1
- data/vendor/faiss/faiss/svs/IndexSVSFaissUtils.h +9 -19
- data/vendor/faiss/faiss/svs/IndexSVSFlat.cpp +2 -2
- data/vendor/faiss/faiss/svs/IndexSVSFlat.h +2 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVF.cpp +350 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVF.h +128 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLVQ.cpp +40 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLVQ.h +43 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLeanVec.cpp +225 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLeanVec.h +71 -0
- data/vendor/faiss/faiss/svs/IndexSVSVamana.cpp +25 -1
- data/vendor/faiss/faiss/svs/IndexSVSVamana.h +19 -2
- data/vendor/faiss/faiss/svs/IndexSVSVamanaLVQ.h +1 -1
- data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.cpp +19 -2
- data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.h +14 -0
- data/vendor/faiss/faiss/utils/Heap.cpp +56 -10
- data/vendor/faiss/faiss/utils/Heap.h +21 -0
- data/vendor/faiss/faiss/utils/NeuralNet.cpp +54 -40
- data/vendor/faiss/faiss/utils/NeuralNet.h +1 -1
- data/vendor/faiss/faiss/utils/approx_topk_hamming/approx_topk_hamming.h +10 -4
- data/vendor/faiss/faiss/utils/distances.cpp +507 -559
- data/vendor/faiss/faiss/utils/distances.h +118 -1
- data/vendor/faiss/faiss/utils/distances_dispatch.h +250 -0
- data/vendor/faiss/faiss/utils/distances_fused/avx512.cpp +8 -7
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +33 -14
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.h +12 -1
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +16 -293
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based_neon.cpp +57 -0
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_kernel-inl.h +290 -0
- data/vendor/faiss/faiss/utils/distances_simd.cpp +72 -3681
- data/vendor/faiss/faiss/utils/extra_distances.cpp +60 -102
- data/vendor/faiss/faiss/utils/extra_distances.h +79 -7
- data/vendor/faiss/faiss/utils/hamming-inl.h +13 -11
- data/vendor/faiss/faiss/utils/hamming.cpp +66 -517
- data/vendor/faiss/faiss/utils/hamming.h +92 -2
- data/vendor/faiss/faiss/utils/hamming_distance/common.h +287 -10
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx2.cpp +15 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx512.cpp +15 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx2.h +142 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx512.h +234 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-generic.h +368 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-neon.h +322 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-rvv.h +39 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer.h +146 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_impl.h +481 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_neon.cpp +15 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_rvv.cpp +15 -0
- data/vendor/faiss/faiss/utils/partitioning.cpp +66 -987
- data/vendor/faiss/faiss/utils/partitioning.h +31 -0
- data/vendor/faiss/faiss/utils/popcount.h +29 -0
- data/vendor/faiss/faiss/utils/pq_code_distance.h +251 -0
- data/vendor/faiss/faiss/utils/prefetch.h +2 -2
- data/vendor/faiss/faiss/utils/quantize_lut.cpp +30 -30
- data/vendor/faiss/faiss/utils/quantize_lut.h +1 -1
- data/vendor/faiss/faiss/utils/rabitq_simd.h +124 -343
- data/vendor/faiss/faiss/utils/random.cpp +6 -6
- data/vendor/faiss/faiss/utils/simd_impl/IVFFlatScanner-inl.h +51 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_aarch64.cpp +154 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_arm_sve.cpp +777 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_autovec-inl.h +306 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_avx2.cpp +1431 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_avx512.cpp +1095 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_rvv.cpp +189 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_simdlib256.h +195 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_sse-inl.h +392 -0
- data/vendor/faiss/faiss/utils/{distances_fused/simdlib_based.h → simd_impl/exhaustive_L2sqr_blas_cmax.h} +5 -10
- data/vendor/faiss/faiss/utils/simd_impl/hamming_impl.h +481 -0
- data/vendor/faiss/faiss/utils/simd_impl/partitioning_avx2.cpp +14 -0
- data/vendor/faiss/faiss/utils/simd_impl/partitioning_neon.cpp +14 -0
- data/vendor/faiss/faiss/utils/simd_impl/partitioning_simdlib256.h +1085 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx2.cpp +355 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx512.cpp +477 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_neon.cpp +55 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_rvv.cpp +55 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_dispatch.h +32 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels.h +43 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels_avx2.cpp +57 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels_avx512.cpp +45 -0
- data/vendor/faiss/faiss/utils/simd_levels.cpp +334 -0
- data/vendor/faiss/faiss/utils/simd_levels.h +183 -0
- data/vendor/faiss/faiss/utils/sorting.cpp +48 -36
- data/vendor/faiss/faiss/utils/utils.cpp +21 -14
- data/vendor/faiss/faiss/utils/utils.h +3 -3
- metadata +156 -42
- data/vendor/faiss/faiss/impl/RaBitQStats.cpp +0 -29
- data/vendor/faiss/faiss/impl/RaBitQStats.h +0 -56
- data/vendor/faiss/faiss/impl/code_distance/code_distance-generic.h +0 -81
- data/vendor/faiss/faiss/impl/code_distance/code_distance.h +0 -186
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +0 -216
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +0 -224
- data/vendor/faiss/faiss/utils/approx_topk/approx_topk.h +0 -84
- data/vendor/faiss/faiss/utils/approx_topk/avx2-inl.h +0 -196
- data/vendor/faiss/faiss/utils/approx_topk/mode.h +0 -34
- data/vendor/faiss/faiss/utils/distances_fused/avx512.h +0 -36
- data/vendor/faiss/faiss/utils/extra_distances-inl.h +0 -228
- data/vendor/faiss/faiss/utils/hamming_distance/avx2-inl.h +0 -462
- data/vendor/faiss/faiss/utils/hamming_distance/avx512-inl.h +0 -490
- data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +0 -450
- data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +0 -87
- data/vendor/faiss/faiss/utils/hamming_distance/neon-inl.h +0 -524
- data/vendor/faiss/faiss/utils/simdlib.h +0 -42
- data/vendor/faiss/faiss/utils/simdlib_avx512.h +0 -296
- /data/vendor/faiss/faiss/{cppcontrib/factory_tools.h → factory_tools.h} +0 -0
|
@@ -7,47 +7,25 @@
|
|
|
7
7
|
|
|
8
8
|
#pragma once
|
|
9
9
|
|
|
10
|
-
#ifdef __AVX512F__
|
|
11
|
-
|
|
12
10
|
#include <immintrin.h>
|
|
13
11
|
|
|
14
|
-
#include <
|
|
15
|
-
|
|
16
|
-
#include <faiss/impl/ProductQuantizer.h>
|
|
17
|
-
#include <faiss/impl/code_distance/code_distance-generic.h>
|
|
12
|
+
#include <faiss/impl/pq_code_distance/pq_code_distance-inl.h>
|
|
18
13
|
|
|
19
14
|
namespace faiss {
|
|
15
|
+
namespace pq_code_distance {
|
|
20
16
|
|
|
21
17
|
// According to experiments, the AVX-512 version may be SLOWER than
|
|
22
|
-
//
|
|
23
|
-
// This version is
|
|
18
|
+
// the AVX2 version, which is somewhat unexpected.
|
|
19
|
+
// This version is kept for completeness.
|
|
24
20
|
//
|
|
25
21
|
// TODO: test for AMD CPUs.
|
|
26
22
|
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
const size_t nbits,
|
|
34
|
-
// precomputed distances, layout (M, ksub)
|
|
35
|
-
const float* sim_table,
|
|
36
|
-
const uint8_t* code) {
|
|
37
|
-
// default implementation
|
|
38
|
-
return distance_single_code_generic<PQDecoderT>(M, nbits, sim_table, code);
|
|
39
|
-
}
|
|
40
|
-
|
|
41
|
-
template <typename PQDecoderT>
|
|
42
|
-
typename std::enable_if<std::is_same<PQDecoderT, PQDecoder8>::value, float>::
|
|
43
|
-
type inline distance_single_code_avx512(
|
|
44
|
-
// number of subquantizers
|
|
45
|
-
const size_t M,
|
|
46
|
-
// number of bits per quantization index
|
|
47
|
-
const size_t nbits,
|
|
48
|
-
// precomputed distances, layout (M, ksub)
|
|
49
|
-
const float* sim_table,
|
|
50
|
-
const uint8_t* code0) {
|
|
23
|
+
// NOLINTNEXTLINE(facebook-hte-MisplacedTemplateSpecialization)
|
|
24
|
+
template <>
|
|
25
|
+
float pq_code_distance_8bit_single_impl<SIMDLevel::AVX512>(
|
|
26
|
+
size_t M,
|
|
27
|
+
const float* sim_table,
|
|
28
|
+
const uint8_t* code0) {
|
|
51
29
|
float result0 = 0;
|
|
52
30
|
constexpr size_t ksub = 1 << 8;
|
|
53
31
|
|
|
@@ -59,50 +37,38 @@ typename std::enable_if<std::is_same<PQDecoderT, PQDecoder8>::value, float>::
|
|
|
59
37
|
const float* tab = sim_table;
|
|
60
38
|
|
|
61
39
|
if (pqM16 > 0) {
|
|
62
|
-
// process 16 values per loop
|
|
63
40
|
const __m512i vksub = _mm512_set1_epi32(ksub);
|
|
64
41
|
__m512i offsets_0 = _mm512_setr_epi32(
|
|
65
42
|
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
|
|
66
43
|
offsets_0 = _mm512_mullo_epi32(offsets_0, vksub);
|
|
67
44
|
|
|
68
|
-
// accumulators of partial sums
|
|
69
45
|
__m512 partialSums[N];
|
|
70
46
|
for (intptr_t j = 0; j < N; j++) {
|
|
71
47
|
partialSums[j] = _mm512_setzero_ps();
|
|
72
48
|
}
|
|
73
49
|
|
|
74
|
-
// loop
|
|
50
|
+
// Process 16 values per loop iteration.
|
|
75
51
|
for (m = 0; m < pqM16 * 16; m += 16) {
|
|
76
|
-
// load 16 uint8 values
|
|
77
52
|
__m128i mm1[N];
|
|
78
53
|
mm1[0] = _mm_loadu_si128((const __m128i_u*)(code0 + m));
|
|
79
54
|
|
|
80
|
-
// process first 8 codes
|
|
81
55
|
for (intptr_t j = 0; j < N; j++) {
|
|
82
56
|
const __m512i idx1 = _mm512_cvtepu8_epi32(mm1[j]);
|
|
83
|
-
|
|
84
|
-
// add offsets
|
|
85
57
|
const __m512i indices_to_read_from =
|
|
86
58
|
_mm512_add_epi32(idx1, offsets_0);
|
|
87
|
-
|
|
88
|
-
// gather 16 values, similar to 16 operations of tab[idx]
|
|
89
59
|
__m512 collected = _mm512_i32gather_ps(
|
|
90
60
|
indices_to_read_from, tab, sizeof(float));
|
|
91
|
-
|
|
92
|
-
// collect partial sums
|
|
93
61
|
partialSums[j] = _mm512_add_ps(partialSums[j], collected);
|
|
94
62
|
}
|
|
95
63
|
tab += ksub * 16;
|
|
96
64
|
}
|
|
97
65
|
|
|
98
|
-
// horizontal sum for partialSum
|
|
99
66
|
result0 += _mm512_reduce_add_ps(partialSums[0]);
|
|
100
67
|
}
|
|
101
68
|
|
|
102
|
-
//
|
|
69
|
+
// Process leftovers.
|
|
103
70
|
if (m < M) {
|
|
104
|
-
|
|
105
|
-
PQDecoder8 decoder0(code0 + m, nbits);
|
|
71
|
+
PQDecoder8 decoder0(code0 + m, 8);
|
|
106
72
|
for (; m < M; m++) {
|
|
107
73
|
result0 += tab[decoder0.decode()];
|
|
108
74
|
tab += ksub;
|
|
@@ -112,56 +78,16 @@ typename std::enable_if<std::is_same<PQDecoderT, PQDecoder8>::value, float>::
|
|
|
112
78
|
return result0;
|
|
113
79
|
}
|
|
114
80
|
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
const size_t M,
|
|
121
|
-
// number of bits per quantization index
|
|
122
|
-
const size_t nbits,
|
|
123
|
-
// precomputed distances, layout (M, ksub)
|
|
124
|
-
const float* sim_table,
|
|
125
|
-
// codes
|
|
126
|
-
const uint8_t* __restrict code0,
|
|
127
|
-
const uint8_t* __restrict code1,
|
|
128
|
-
const uint8_t* __restrict code2,
|
|
129
|
-
const uint8_t* __restrict code3,
|
|
130
|
-
// computed distances
|
|
131
|
-
float& result0,
|
|
132
|
-
float& result1,
|
|
133
|
-
float& result2,
|
|
134
|
-
float& result3) {
|
|
135
|
-
distance_four_codes_generic<PQDecoderT>(
|
|
136
|
-
M,
|
|
137
|
-
nbits,
|
|
138
|
-
sim_table,
|
|
139
|
-
code0,
|
|
140
|
-
code1,
|
|
141
|
-
code2,
|
|
142
|
-
code3,
|
|
143
|
-
result0,
|
|
144
|
-
result1,
|
|
145
|
-
result2,
|
|
146
|
-
result3);
|
|
147
|
-
}
|
|
148
|
-
|
|
149
|
-
// Combines 4 operations of distance_single_code()
|
|
150
|
-
template <typename PQDecoderT>
|
|
151
|
-
typename std::enable_if<std::is_same<PQDecoderT, PQDecoder8>::value, void>::type
|
|
152
|
-
distance_four_codes_avx512(
|
|
153
|
-
// number of subquantizers
|
|
154
|
-
const size_t M,
|
|
155
|
-
// number of bits per quantization index
|
|
156
|
-
const size_t nbits,
|
|
157
|
-
// precomputed distances, layout (M, ksub)
|
|
81
|
+
// Combines 4 operations of pq_code_distance_8bit_single_impl().
|
|
82
|
+
// NOLINTNEXTLINE(facebook-hte-MisplacedTemplateSpecialization)
|
|
83
|
+
template <>
|
|
84
|
+
void pq_code_distance_8bit_four_impl<SIMDLevel::AVX512>(
|
|
85
|
+
size_t M,
|
|
158
86
|
const float* sim_table,
|
|
159
|
-
// codes
|
|
160
87
|
const uint8_t* __restrict code0,
|
|
161
88
|
const uint8_t* __restrict code1,
|
|
162
89
|
const uint8_t* __restrict code2,
|
|
163
90
|
const uint8_t* __restrict code3,
|
|
164
|
-
// computed distances
|
|
165
91
|
float& result0,
|
|
166
92
|
float& result1,
|
|
167
93
|
float& result2,
|
|
@@ -180,59 +106,47 @@ distance_four_codes_avx512(
|
|
|
180
106
|
const float* tab = sim_table;
|
|
181
107
|
|
|
182
108
|
if (pqM16 > 0) {
|
|
183
|
-
// process 16 values per loop
|
|
184
109
|
const __m512i vksub = _mm512_set1_epi32(ksub);
|
|
185
110
|
__m512i offsets_0 = _mm512_setr_epi32(
|
|
186
111
|
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
|
|
187
112
|
offsets_0 = _mm512_mullo_epi32(offsets_0, vksub);
|
|
188
113
|
|
|
189
|
-
// accumulators of partial sums
|
|
190
114
|
__m512 partialSums[N];
|
|
191
115
|
for (intptr_t j = 0; j < N; j++) {
|
|
192
116
|
partialSums[j] = _mm512_setzero_ps();
|
|
193
117
|
}
|
|
194
118
|
|
|
195
|
-
// loop
|
|
119
|
+
// Process 16 values per loop iteration.
|
|
196
120
|
for (m = 0; m < pqM16 * 16; m += 16) {
|
|
197
|
-
// load 16 uint8 values
|
|
198
121
|
__m128i mm1[N];
|
|
199
122
|
mm1[0] = _mm_loadu_si128((const __m128i_u*)(code0 + m));
|
|
200
123
|
mm1[1] = _mm_loadu_si128((const __m128i_u*)(code1 + m));
|
|
201
124
|
mm1[2] = _mm_loadu_si128((const __m128i_u*)(code2 + m));
|
|
202
125
|
mm1[3] = _mm_loadu_si128((const __m128i_u*)(code3 + m));
|
|
203
126
|
|
|
204
|
-
// process first 8 codes
|
|
205
127
|
for (intptr_t j = 0; j < N; j++) {
|
|
206
128
|
const __m512i idx1 = _mm512_cvtepu8_epi32(mm1[j]);
|
|
207
|
-
|
|
208
|
-
// add offsets
|
|
209
129
|
const __m512i indices_to_read_from =
|
|
210
130
|
_mm512_add_epi32(idx1, offsets_0);
|
|
211
|
-
|
|
212
|
-
// gather 16 values, similar to 16 operations of tab[idx]
|
|
213
131
|
__m512 collected = _mm512_i32gather_ps(
|
|
214
132
|
indices_to_read_from, tab, sizeof(float));
|
|
215
|
-
|
|
216
|
-
// collect partial sums
|
|
217
133
|
partialSums[j] = _mm512_add_ps(partialSums[j], collected);
|
|
218
134
|
}
|
|
219
135
|
tab += ksub * 16;
|
|
220
136
|
}
|
|
221
137
|
|
|
222
|
-
// horizontal sum for partialSum
|
|
223
138
|
result0 += _mm512_reduce_add_ps(partialSums[0]);
|
|
224
139
|
result1 += _mm512_reduce_add_ps(partialSums[1]);
|
|
225
140
|
result2 += _mm512_reduce_add_ps(partialSums[2]);
|
|
226
141
|
result3 += _mm512_reduce_add_ps(partialSums[3]);
|
|
227
142
|
}
|
|
228
143
|
|
|
229
|
-
//
|
|
144
|
+
// Process leftovers.
|
|
230
145
|
if (m < M) {
|
|
231
|
-
|
|
232
|
-
PQDecoder8
|
|
233
|
-
PQDecoder8
|
|
234
|
-
PQDecoder8
|
|
235
|
-
PQDecoder8 decoder3(code3 + m, nbits);
|
|
146
|
+
PQDecoder8 decoder0(code0 + m, 8);
|
|
147
|
+
PQDecoder8 decoder1(code1 + m, 8);
|
|
148
|
+
PQDecoder8 decoder2(code2 + m, 8);
|
|
149
|
+
PQDecoder8 decoder3(code3 + m, 8);
|
|
236
150
|
for (; m < M; m++) {
|
|
237
151
|
result0 += tab[decoder0.decode()];
|
|
238
152
|
result1 += tab[decoder1.decode()];
|
|
@@ -243,6 +157,5 @@ distance_four_codes_avx512(
|
|
|
243
157
|
}
|
|
244
158
|
}
|
|
245
159
|
|
|
160
|
+
} // namespace pq_code_distance
|
|
246
161
|
} // namespace faiss
|
|
247
|
-
|
|
248
|
-
#endif
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
// This TU provides non-templated PQ code distance dispatch wrappers
|
|
9
|
+
// (pq_code_distance_8bit_single, pq_code_distance_8bit_four) declared
|
|
10
|
+
// in pq_code_distance-inl.h. These use with_simd_level to route to the
|
|
11
|
+
// best available SIMD implementation via pq_code_distance_8bit_*_impl
|
|
12
|
+
// function template specializations.
|
|
13
|
+
//
|
|
14
|
+
// The NONE and ARM_NEON _impl specializations are defined inline in
|
|
15
|
+
// pq_code_distance-generic.h (included transitively). The AVX2, AVX512,
|
|
16
|
+
// and ARM_SVE specializations are in their respective per-SIMD files.
|
|
17
|
+
|
|
18
|
+
#include <faiss/impl/pq_code_distance/pq_code_distance-generic.h>
|
|
19
|
+
|
|
20
|
+
namespace faiss {
|
|
21
|
+
namespace pq_code_distance {
|
|
22
|
+
|
|
23
|
+
float pq_code_distance_8bit_single(
|
|
24
|
+
size_t M,
|
|
25
|
+
const float* sim_table,
|
|
26
|
+
const uint8_t* code) {
|
|
27
|
+
return with_simd_level([&]<SIMDLevel SL>() {
|
|
28
|
+
return pq_code_distance_8bit_single_impl<SL>(M, sim_table, code);
|
|
29
|
+
});
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
void pq_code_distance_8bit_four(
|
|
33
|
+
size_t M,
|
|
34
|
+
const float* sim_table,
|
|
35
|
+
const uint8_t* __restrict code0,
|
|
36
|
+
const uint8_t* __restrict code1,
|
|
37
|
+
const uint8_t* __restrict code2,
|
|
38
|
+
const uint8_t* __restrict code3,
|
|
39
|
+
float& result0,
|
|
40
|
+
float& result1,
|
|
41
|
+
float& result2,
|
|
42
|
+
float& result3) {
|
|
43
|
+
with_simd_level([&]<SIMDLevel SL>() {
|
|
44
|
+
pq_code_distance_8bit_four_impl<SL>(
|
|
45
|
+
M,
|
|
46
|
+
sim_table,
|
|
47
|
+
code0,
|
|
48
|
+
code1,
|
|
49
|
+
code2,
|
|
50
|
+
code3,
|
|
51
|
+
result0,
|
|
52
|
+
result1,
|
|
53
|
+
result2,
|
|
54
|
+
result3);
|
|
55
|
+
});
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
} // namespace pq_code_distance
|
|
59
|
+
} // namespace faiss
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
#pragma once
|
|
9
|
+
|
|
10
|
+
#include <faiss/impl/pq_code_distance/pq_code_distance-inl.h>
|
|
11
|
+
|
|
12
|
+
namespace faiss {
|
|
13
|
+
namespace pq_code_distance {
|
|
14
|
+
|
|
15
|
+
// NONE: use scalar directly.
|
|
16
|
+
|
|
17
|
+
// NOLINTNEXTLINE(facebook-hte-MisplacedTemplateSpecialization)
|
|
18
|
+
template <>
|
|
19
|
+
inline float pq_code_distance_8bit_single_impl<SIMDLevel::NONE>(
|
|
20
|
+
size_t M,
|
|
21
|
+
const float* sim_table,
|
|
22
|
+
const uint8_t* code) {
|
|
23
|
+
return PQCodeDistanceScalar<PQDecoder8>::distance_single_code(
|
|
24
|
+
M, 8, sim_table, code);
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
// NOLINTNEXTLINE(facebook-hte-MisplacedTemplateSpecialization)
|
|
28
|
+
template <>
|
|
29
|
+
inline void pq_code_distance_8bit_four_impl<SIMDLevel::NONE>(
|
|
30
|
+
size_t M,
|
|
31
|
+
const float* sim_table,
|
|
32
|
+
const uint8_t* __restrict code0,
|
|
33
|
+
const uint8_t* __restrict code1,
|
|
34
|
+
const uint8_t* __restrict code2,
|
|
35
|
+
const uint8_t* __restrict code3,
|
|
36
|
+
float& result0,
|
|
37
|
+
float& result1,
|
|
38
|
+
float& result2,
|
|
39
|
+
float& result3) {
|
|
40
|
+
PQCodeDistanceScalar<PQDecoder8>::distance_four_codes(
|
|
41
|
+
M,
|
|
42
|
+
8,
|
|
43
|
+
sim_table,
|
|
44
|
+
code0,
|
|
45
|
+
code1,
|
|
46
|
+
code2,
|
|
47
|
+
code3,
|
|
48
|
+
result0,
|
|
49
|
+
result1,
|
|
50
|
+
result2,
|
|
51
|
+
result3);
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
#ifdef COMPILE_SIMD_ARM_NEON
|
|
55
|
+
// ARM_NEON: No NEON-optimized PQ code distance exists. Use scalar.
|
|
56
|
+
|
|
57
|
+
// NOLINTNEXTLINE(facebook-hte-MisplacedTemplateSpecialization)
|
|
58
|
+
template <>
|
|
59
|
+
inline float pq_code_distance_8bit_single_impl<SIMDLevel::ARM_NEON>(
|
|
60
|
+
size_t M,
|
|
61
|
+
const float* sim_table,
|
|
62
|
+
const uint8_t* code) {
|
|
63
|
+
return PQCodeDistanceScalar<PQDecoder8>::distance_single_code(
|
|
64
|
+
M, 8, sim_table, code);
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
// NOLINTNEXTLINE(facebook-hte-MisplacedTemplateSpecialization)
|
|
68
|
+
template <>
|
|
69
|
+
inline void pq_code_distance_8bit_four_impl<SIMDLevel::ARM_NEON>(
|
|
70
|
+
size_t M,
|
|
71
|
+
const float* sim_table,
|
|
72
|
+
const uint8_t* __restrict code0,
|
|
73
|
+
const uint8_t* __restrict code1,
|
|
74
|
+
const uint8_t* __restrict code2,
|
|
75
|
+
const uint8_t* __restrict code3,
|
|
76
|
+
float& result0,
|
|
77
|
+
float& result1,
|
|
78
|
+
float& result2,
|
|
79
|
+
float& result3) {
|
|
80
|
+
PQCodeDistanceScalar<PQDecoder8>::distance_four_codes(
|
|
81
|
+
M,
|
|
82
|
+
8,
|
|
83
|
+
sim_table,
|
|
84
|
+
code0,
|
|
85
|
+
code1,
|
|
86
|
+
code2,
|
|
87
|
+
code3,
|
|
88
|
+
result0,
|
|
89
|
+
result1,
|
|
90
|
+
result2,
|
|
91
|
+
result3);
|
|
92
|
+
}
|
|
93
|
+
#endif // COMPILE_SIMD_ARM_NEON
|
|
94
|
+
|
|
95
|
+
} // namespace pq_code_distance
|
|
96
|
+
} // namespace faiss
|
|
@@ -0,0 +1,256 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
#pragma once
|
|
9
|
+
|
|
10
|
+
/**
|
|
11
|
+
* @file pq_code_distance-inl.h
|
|
12
|
+
* @brief PQ code distance SIMD-dispatched implementations.
|
|
13
|
+
*
|
|
14
|
+
* This is a PRIVATE header — do not include in public APIs or user code.
|
|
15
|
+
* Only faiss internal .cpp files (the per-SIMD implementation files and
|
|
16
|
+
* pq_code_distance-generic.cpp) should include this header.
|
|
17
|
+
*/
|
|
18
|
+
|
|
19
|
+
#include <cstddef>
|
|
20
|
+
#include <cstdint>
|
|
21
|
+
#include <type_traits>
|
|
22
|
+
|
|
23
|
+
#include <faiss/impl/ProductQuantizer.h>
|
|
24
|
+
#include <faiss/impl/platform_macros.h>
|
|
25
|
+
#include <faiss/impl/simd_dispatch.h>
|
|
26
|
+
|
|
27
|
+
namespace faiss {
|
|
28
|
+
namespace pq_code_distance {
|
|
29
|
+
|
|
30
|
+
/*********************************************************************
|
|
31
|
+
* PQCodeDistance — SIMD-dispatched PQ code distance
|
|
32
|
+
*
|
|
33
|
+
* Computes the distance from a PQ-encoded vector to a query vector,
|
|
34
|
+
* given a precomputed table of sub-distances (one per subquantizer
|
|
35
|
+
* per centroid). Originally extracted from IndexIVFPQ.cpp.
|
|
36
|
+
*
|
|
37
|
+
* DESIGN:
|
|
38
|
+
*
|
|
39
|
+
* PQCodeDistance<PQDecoderT, SL> computes PQ code distances at a given
|
|
40
|
+
* SIMD level. The dispatch site (IndexIVFPQ.cpp, IndexPQ.cpp) uses
|
|
41
|
+
* with_simd_level to select SL at runtime, which instantiates
|
|
42
|
+
* PQCodeDistance for ALL decoder types (PQDecoder8, PQDecoder16,
|
|
43
|
+
* PQDecoderGeneric) at the chosen level.
|
|
44
|
+
*
|
|
45
|
+
* Only PQDecoder8 has SIMD-optimized implementations (AVX2, AVX512,
|
|
46
|
+
* ARM_SVE). The other decoders always use scalar code — their decode()
|
|
47
|
+
* method is inherently sequential, so SIMD doesn't help.
|
|
48
|
+
*
|
|
49
|
+
* The primary template is always complete (no forward declarations
|
|
50
|
+
* needed). For PQDecoder8, it delegates to _impl dispatch bridge
|
|
51
|
+
* functions whose specializations are defined in per-SIMD .cpp files
|
|
52
|
+
* and resolved at link time. For other decoders, it uses scalar.
|
|
53
|
+
*
|
|
54
|
+
* ADDING A NEW SIMD LEVEL:
|
|
55
|
+
*
|
|
56
|
+
* 1. Add the level to SIMDLevel enum (simd_levels.h)
|
|
57
|
+
* 2. Add dispatch_config entry (simd_dispatch.bzl)
|
|
58
|
+
* 3. Define pq_code_distance_8bit_single_impl<NEW_LEVEL> and
|
|
59
|
+
* pq_code_distance_8bit_four_impl<NEW_LEVEL> specializations in a
|
|
60
|
+
* new .cpp file compiled with appropriate SIMD flags
|
|
61
|
+
* 4. Add the .cpp to the build (CMakeLists.txt, xplat.bzl)
|
|
62
|
+
*********************************************************************/
|
|
63
|
+
|
|
64
|
+
/// Scalar PQ code distance implementation.
|
|
65
|
+
/// Templated only on decoder type, independent of SIMD level.
|
|
66
|
+
/// Used directly by non-PQDecoder8 decoders (PQDecoder16,
|
|
67
|
+
/// PQDecoderGeneric) and as fallback for PQDecoder8 at NONE/NEON.
|
|
68
|
+
template <typename PQDecoderT>
|
|
69
|
+
struct PQCodeDistanceScalar {
|
|
70
|
+
using PQDecoder = PQDecoderT;
|
|
71
|
+
|
|
72
|
+
static float distance_single_code(
|
|
73
|
+
// number of subquantizers
|
|
74
|
+
size_t M,
|
|
75
|
+
size_t nbits,
|
|
76
|
+
// precomputed distances, layout (M, ksub)
|
|
77
|
+
const float* sim_table,
|
|
78
|
+
const uint8_t* code) {
|
|
79
|
+
PQDecoderT decoder(code, nbits);
|
|
80
|
+
const size_t ksub = 1 << nbits;
|
|
81
|
+
|
|
82
|
+
const float* tab = sim_table;
|
|
83
|
+
float result = 0;
|
|
84
|
+
|
|
85
|
+
for (size_t m = 0; m < M; m++) {
|
|
86
|
+
result += tab[decoder.decode()];
|
|
87
|
+
tab += ksub;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
return result;
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
static void distance_four_codes(
|
|
94
|
+
size_t M,
|
|
95
|
+
size_t nbits,
|
|
96
|
+
const float* sim_table,
|
|
97
|
+
const uint8_t* __restrict code0,
|
|
98
|
+
const uint8_t* __restrict code1,
|
|
99
|
+
const uint8_t* __restrict code2,
|
|
100
|
+
const uint8_t* __restrict code3,
|
|
101
|
+
float& result0,
|
|
102
|
+
float& result1,
|
|
103
|
+
float& result2,
|
|
104
|
+
float& result3) {
|
|
105
|
+
PQDecoderT decoder0(code0, nbits);
|
|
106
|
+
PQDecoderT decoder1(code1, nbits);
|
|
107
|
+
PQDecoderT decoder2(code2, nbits);
|
|
108
|
+
PQDecoderT decoder3(code3, nbits);
|
|
109
|
+
const size_t ksub = 1 << nbits;
|
|
110
|
+
|
|
111
|
+
const float* tab = sim_table;
|
|
112
|
+
result0 = 0;
|
|
113
|
+
result1 = 0;
|
|
114
|
+
result2 = 0;
|
|
115
|
+
result3 = 0;
|
|
116
|
+
|
|
117
|
+
for (size_t m = 0; m < M; m++) {
|
|
118
|
+
result0 += tab[decoder0.decode()];
|
|
119
|
+
result1 += tab[decoder1.decode()];
|
|
120
|
+
result2 += tab[decoder2.decode()];
|
|
121
|
+
result3 += tab[decoder3.decode()];
|
|
122
|
+
tab += ksub;
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
};
|
|
126
|
+
|
|
127
|
+
/*********************************************************************
|
|
128
|
+
* Dispatch bridge — function templates for PQDecoder8 SIMD dispatch.
|
|
129
|
+
*
|
|
130
|
+
* Primary declarations only; specializations are defined in per-SIMD
|
|
131
|
+
* .cpp files (AVX2, AVX512, ARM_SVE) and pq_code_distance-generic.cpp
|
|
132
|
+
* (NONE, ARM_NEON). Same pattern as fvec_L2sqr et al. in distances.h.
|
|
133
|
+
*********************************************************************/
|
|
134
|
+
|
|
135
|
+
template <SIMDLevel SL>
|
|
136
|
+
float pq_code_distance_8bit_single_impl(
|
|
137
|
+
size_t M,
|
|
138
|
+
const float* sim_table,
|
|
139
|
+
const uint8_t* code);
|
|
140
|
+
|
|
141
|
+
template <SIMDLevel SL>
|
|
142
|
+
void pq_code_distance_8bit_four_impl(
|
|
143
|
+
size_t M,
|
|
144
|
+
const float* sim_table,
|
|
145
|
+
const uint8_t* __restrict code0,
|
|
146
|
+
const uint8_t* __restrict code1,
|
|
147
|
+
const uint8_t* __restrict code2,
|
|
148
|
+
const uint8_t* __restrict code3,
|
|
149
|
+
float& result0,
|
|
150
|
+
float& result1,
|
|
151
|
+
float& result2,
|
|
152
|
+
float& result3);
|
|
153
|
+
|
|
154
|
+
/// Primary template — always complete.
|
|
155
|
+
/// For PQDecoder8, delegates to _impl dispatch bridges (resolved at
|
|
156
|
+
/// link time to per-SIMD implementations). For other decoders, uses
|
|
157
|
+
/// scalar — their sequential decode() methods don't benefit from SIMD.
|
|
158
|
+
template <typename PQDecoderT, SIMDLevel SL>
|
|
159
|
+
struct PQCodeDistance {
|
|
160
|
+
using PQDecoder = PQDecoderT;
|
|
161
|
+
static constexpr SIMDLevel simd_level = SL;
|
|
162
|
+
|
|
163
|
+
static float distance_single_code(
|
|
164
|
+
size_t M,
|
|
165
|
+
size_t nbits,
|
|
166
|
+
const float* sim_table,
|
|
167
|
+
const uint8_t* code) {
|
|
168
|
+
if constexpr (std::is_same_v<PQDecoderT, PQDecoder8>) {
|
|
169
|
+
return pq_code_distance_8bit_single_impl<SL>(M, sim_table, code);
|
|
170
|
+
} else {
|
|
171
|
+
return PQCodeDistanceScalar<PQDecoderT>::distance_single_code(
|
|
172
|
+
M, nbits, sim_table, code);
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
static void distance_four_codes(
|
|
177
|
+
size_t M,
|
|
178
|
+
size_t nbits,
|
|
179
|
+
const float* sim_table,
|
|
180
|
+
const uint8_t* __restrict code0,
|
|
181
|
+
const uint8_t* __restrict code1,
|
|
182
|
+
const uint8_t* __restrict code2,
|
|
183
|
+
const uint8_t* __restrict code3,
|
|
184
|
+
float& result0,
|
|
185
|
+
float& result1,
|
|
186
|
+
float& result2,
|
|
187
|
+
float& result3) {
|
|
188
|
+
if constexpr (std::is_same_v<PQDecoderT, PQDecoder8>) {
|
|
189
|
+
pq_code_distance_8bit_four_impl<SL>(
|
|
190
|
+
M,
|
|
191
|
+
sim_table,
|
|
192
|
+
code0,
|
|
193
|
+
code1,
|
|
194
|
+
code2,
|
|
195
|
+
code3,
|
|
196
|
+
result0,
|
|
197
|
+
result1,
|
|
198
|
+
result2,
|
|
199
|
+
result3);
|
|
200
|
+
} else {
|
|
201
|
+
PQCodeDistanceScalar<PQDecoderT>::distance_four_codes(
|
|
202
|
+
M,
|
|
203
|
+
nbits,
|
|
204
|
+
sim_table,
|
|
205
|
+
code0,
|
|
206
|
+
code1,
|
|
207
|
+
code2,
|
|
208
|
+
code3,
|
|
209
|
+
result0,
|
|
210
|
+
result1,
|
|
211
|
+
result2,
|
|
212
|
+
result3);
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
};
|
|
216
|
+
|
|
217
|
+
/*********************************************************************
|
|
218
|
+
* Non-templated PQ code distance dispatch (PQDecoder8 only).
|
|
219
|
+
*
|
|
220
|
+
* These follow the same pattern as distances.h: the caller does not
|
|
221
|
+
* name a SIMDLevel. Internally they dispatch via with_simd_level
|
|
222
|
+
* to the best available SIMD implementation (DD: runtime detection,
|
|
223
|
+
* static: compile-time selection). Definitions are in
|
|
224
|
+
* pq_code_distance-generic.cpp.
|
|
225
|
+
*********************************************************************/
|
|
226
|
+
|
|
227
|
+
/// Compute PQ distance for a single code, dispatching to the best
|
|
228
|
+
/// available SIMD level.
|
|
229
|
+
FAISS_API float pq_code_distance_8bit_single(
|
|
230
|
+
size_t M,
|
|
231
|
+
const float* sim_table,
|
|
232
|
+
const uint8_t* code);
|
|
233
|
+
|
|
234
|
+
/// Compute PQ distances for four codes simultaneously, dispatching
|
|
235
|
+
/// to the best available SIMD level.
|
|
236
|
+
FAISS_API void pq_code_distance_8bit_four(
|
|
237
|
+
size_t M,
|
|
238
|
+
const float* sim_table,
|
|
239
|
+
const uint8_t* __restrict code0,
|
|
240
|
+
const uint8_t* __restrict code1,
|
|
241
|
+
const uint8_t* __restrict code2,
|
|
242
|
+
const uint8_t* __restrict code3,
|
|
243
|
+
float& result0,
|
|
244
|
+
float& result1,
|
|
245
|
+
float& result2,
|
|
246
|
+
float& result3);
|
|
247
|
+
|
|
248
|
+
} // namespace pq_code_distance
|
|
249
|
+
|
|
250
|
+
// Re-export public API into namespace faiss for convenience
|
|
251
|
+
using pq_code_distance::pq_code_distance_8bit_four;
|
|
252
|
+
using pq_code_distance::pq_code_distance_8bit_single;
|
|
253
|
+
using pq_code_distance::PQCodeDistance;
|
|
254
|
+
using pq_code_distance::PQCodeDistanceScalar;
|
|
255
|
+
|
|
256
|
+
} // namespace faiss
|