faiss 0.5.3 → 0.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +12 -0
- data/ext/faiss/ext.cpp +1 -1
- data/ext/faiss/extconf.rb +4 -4
- data/ext/faiss/index.cpp +63 -45
- data/ext/faiss/index_binary.cpp +37 -27
- data/ext/faiss/kmeans.cpp +9 -8
- data/ext/faiss/pca_matrix.cpp +9 -7
- data/ext/faiss/product_quantizer.cpp +13 -11
- data/ext/faiss/utils.cpp +4 -2
- data/ext/faiss/utils.h +4 -0
- data/lib/faiss/version.rb +1 -1
- data/lib/faiss.rb +1 -1
- data/vendor/faiss/faiss/AutoTune.cpp +214 -82
- data/vendor/faiss/faiss/AutoTune.h +14 -1
- data/vendor/faiss/faiss/Clustering.cpp +97 -249
- data/vendor/faiss/faiss/Clustering.h +18 -0
- data/vendor/faiss/faiss/IVFlib.cpp +67 -44
- data/vendor/faiss/faiss/Index.cpp +25 -12
- data/vendor/faiss/faiss/Index.h +26 -4
- data/vendor/faiss/faiss/Index2Layer.cpp +37 -53
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +68 -61
- data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +36 -34
- data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.h +4 -1
- data/vendor/faiss/faiss/IndexBinary.cpp +6 -3
- data/vendor/faiss/faiss/IndexBinary.h +4 -4
- data/vendor/faiss/faiss/IndexBinaryFlat.cpp +1 -1
- data/vendor/faiss/faiss/IndexBinaryFlat.h +1 -1
- data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +4 -4
- data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +92 -95
- data/vendor/faiss/faiss/IndexBinaryHNSW.h +9 -3
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +45 -236
- data/vendor/faiss/faiss/IndexBinaryHash.h +6 -6
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +120 -414
- data/vendor/faiss/faiss/IndexFastScan.cpp +105 -129
- data/vendor/faiss/faiss/IndexFastScan.h +35 -24
- data/vendor/faiss/faiss/IndexFlat.cpp +216 -152
- data/vendor/faiss/faiss/IndexFlat.h +32 -14
- data/vendor/faiss/faiss/IndexFlatCodes.cpp +88 -41
- data/vendor/faiss/faiss/IndexFlatCodes.h +7 -1
- data/vendor/faiss/faiss/IndexHNSW.cpp +299 -187
- data/vendor/faiss/faiss/IndexHNSW.h +30 -14
- data/vendor/faiss/faiss/IndexIDMap.cpp +26 -22
- data/vendor/faiss/faiss/IndexIDMap.h +9 -7
- data/vendor/faiss/faiss/IndexIVF.cpp +535 -405
- data/vendor/faiss/faiss/IndexIVF.h +47 -16
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +77 -74
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +105 -99
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +6 -3
- data/vendor/faiss/faiss/IndexIVFFastScan.cpp +379 -249
- data/vendor/faiss/faiss/IndexIVFFastScan.h +65 -60
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +41 -124
- data/vendor/faiss/faiss/IndexIVFFlat.h +32 -0
- data/vendor/faiss/faiss/IndexIVFFlatPanorama.cpp +89 -138
- data/vendor/faiss/faiss/IndexIVFFlatPanorama.h +3 -1
- data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.cpp +18 -15
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +77 -907
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +184 -122
- data/vendor/faiss/faiss/IndexIVFPQFastScan.h +3 -0
- data/vendor/faiss/faiss/IndexIVFPQR.cpp +23 -18
- data/vendor/faiss/faiss/IndexIVFRaBitQ.cpp +59 -60
- data/vendor/faiss/faiss/IndexIVFRaBitQ.h +4 -3
- data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.cpp +564 -416
- data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.h +269 -111
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +41 -127
- data/vendor/faiss/faiss/IndexIVFSpectralHash.h +1 -1
- data/vendor/faiss/faiss/IndexLSH.cpp +44 -25
- data/vendor/faiss/faiss/IndexLattice.cpp +41 -36
- data/vendor/faiss/faiss/IndexNNDescent.cpp +37 -21
- data/vendor/faiss/faiss/IndexNNDescent.h +2 -2
- data/vendor/faiss/faiss/IndexNSG.cpp +40 -23
- data/vendor/faiss/faiss/IndexNSG.h +0 -2
- data/vendor/faiss/faiss/IndexNeuralNetCodec.cpp +32 -12
- data/vendor/faiss/faiss/IndexPQ.cpp +129 -213
- data/vendor/faiss/faiss/IndexPQ.h +3 -2
- data/vendor/faiss/faiss/IndexPQFastScan.cpp +20 -14
- data/vendor/faiss/faiss/IndexPQFastScan.h +3 -0
- data/vendor/faiss/faiss/IndexPreTransform.cpp +25 -18
- data/vendor/faiss/faiss/IndexPreTransform.h +1 -1
- data/vendor/faiss/faiss/IndexRaBitQ.cpp +31 -43
- data/vendor/faiss/faiss/IndexRaBitQ.h +4 -3
- data/vendor/faiss/faiss/IndexRaBitQFastScan.cpp +135 -317
- data/vendor/faiss/faiss/IndexRaBitQFastScan.h +192 -34
- data/vendor/faiss/faiss/IndexRefine.cpp +30 -55
- data/vendor/faiss/faiss/IndexRefine.h +4 -4
- data/vendor/faiss/faiss/IndexReplicas.cpp +6 -6
- data/vendor/faiss/faiss/IndexRowwiseMinMax.cpp +15 -14
- data/vendor/faiss/faiss/IndexRowwiseMinMax.h +1 -1
- data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +82 -14
- data/vendor/faiss/faiss/IndexShards.cpp +13 -13
- data/vendor/faiss/faiss/IndexShardsIVF.cpp +21 -15
- data/vendor/faiss/faiss/MatrixStats.cpp +5 -4
- data/vendor/faiss/faiss/MetaIndexes.cpp +19 -17
- data/vendor/faiss/faiss/MetaIndexes.h +1 -1
- data/vendor/faiss/faiss/MetricType.h +29 -6
- data/vendor/faiss/faiss/SuperKMeans.cpp +656 -0
- data/vendor/faiss/faiss/SuperKMeans.h +97 -0
- data/vendor/faiss/faiss/VectorTransform.cpp +349 -141
- data/vendor/faiss/faiss/VectorTransform.h +39 -16
- data/vendor/faiss/faiss/build.cpp +23 -0
- data/vendor/faiss/faiss/build.h +15 -0
- data/vendor/faiss/faiss/clone_index.cpp +55 -51
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-avx2-inl.h +47 -47
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +11 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-avx2-inl.h +38 -38
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-inl.h +11 -0
- data/vendor/faiss/faiss/{cppcontrib/factory_tools.cpp → factory_tools.cpp} +6 -1
- data/vendor/faiss/faiss/gpu/GpuCloner.cpp +1 -1
- data/vendor/faiss/faiss/gpu/GpuIndexCagra.h +6 -5
- data/vendor/faiss/faiss/gpu/GpuResources.h +1 -1
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +9 -9
- data/vendor/faiss/faiss/gpu/StandardGpuResources.h +4 -3
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +46 -0
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +56 -0
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +78 -1
- data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +72 -0
- data/vendor/faiss/faiss/gpu/test/TestUtils.h +23 -0
- data/vendor/faiss/faiss/gpu/utils/CuvsFilterConvert.h +1 -1
- data/vendor/faiss/faiss/gpu/utils/CuvsUtils.h +21 -10
- data/vendor/faiss/faiss/gpu_metal/GpuIndexFlat.h +22 -0
- data/vendor/faiss/faiss/gpu_metal/MetalCloner.h +35 -0
- data/vendor/faiss/faiss/gpu_metal/MetalFlatKernels.h +40 -0
- data/vendor/faiss/faiss/gpu_metal/MetalIndex.h +51 -0
- data/vendor/faiss/faiss/gpu_metal/MetalIndexFlat.h +65 -0
- data/vendor/faiss/faiss/gpu_metal/MetalKernels.h +66 -0
- data/vendor/faiss/faiss/gpu_metal/MetalResources.h +79 -0
- data/vendor/faiss/faiss/gpu_metal/StandardMetalResources.h +35 -0
- data/vendor/faiss/faiss/impl/AdSampling.cpp +103 -0
- data/vendor/faiss/faiss/impl/AdSampling.h +35 -0
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +64 -34
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +1 -0
- data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +10 -9
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +3 -28
- data/vendor/faiss/faiss/impl/ClusteringHelpers.cpp +244 -0
- data/vendor/faiss/faiss/impl/ClusteringHelpers.h +94 -0
- data/vendor/faiss/faiss/impl/ClusteringInitialization.cpp +367 -0
- data/vendor/faiss/faiss/impl/ClusteringInitialization.h +107 -0
- data/vendor/faiss/faiss/impl/CodePacker.cpp +7 -3
- data/vendor/faiss/faiss/impl/CodePacker.h +11 -3
- data/vendor/faiss/faiss/impl/CodePackerRaBitQ.cpp +83 -0
- data/vendor/faiss/faiss/impl/CodePackerRaBitQ.h +47 -0
- data/vendor/faiss/faiss/impl/DistanceComputer.h +8 -8
- data/vendor/faiss/faiss/impl/FaissAssert.h +64 -3
- data/vendor/faiss/faiss/impl/FaissException.h +50 -3
- data/vendor/faiss/faiss/impl/HNSW.cpp +117 -351
- data/vendor/faiss/faiss/impl/HNSW.h +21 -40
- data/vendor/faiss/faiss/impl/IDSelector.cpp +15 -11
- data/vendor/faiss/faiss/impl/IDSelector.h +8 -8
- data/vendor/faiss/faiss/impl/InvertedListScannerStats.h +26 -0
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +114 -102
- data/vendor/faiss/faiss/impl/NNDescent.cpp +63 -26
- data/vendor/faiss/faiss/impl/NNDescent.h +6 -2
- data/vendor/faiss/faiss/impl/NSG.cpp +44 -26
- data/vendor/faiss/faiss/impl/NSG.h +20 -10
- data/vendor/faiss/faiss/impl/Panorama.cpp +76 -52
- data/vendor/faiss/faiss/impl/Panorama.h +265 -78
- data/vendor/faiss/faiss/impl/PdxLayout.cpp +93 -0
- data/vendor/faiss/faiss/impl/PdxLayout.h +41 -0
- data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +62 -37
- data/vendor/faiss/faiss/impl/PolysemousTraining.h +3 -3
- data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.cpp +35 -35
- data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +21 -16
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +99 -80
- data/vendor/faiss/faiss/impl/Quantizer.h +2 -2
- data/vendor/faiss/faiss/impl/RaBitQUtils.cpp +135 -37
- data/vendor/faiss/faiss/impl/RaBitQUtils.h +148 -21
- data/vendor/faiss/faiss/impl/RaBitQuantizer.cpp +298 -301
- data/vendor/faiss/faiss/impl/RaBitQuantizer.h +3 -10
- data/vendor/faiss/faiss/impl/RaBitQuantizerMultiBit.cpp +15 -41
- data/vendor/faiss/faiss/impl/RaBitQuantizerMultiBit.h +0 -4
- data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +40 -32
- data/vendor/faiss/faiss/impl/ResidualQuantizer.h +1 -1
- data/vendor/faiss/faiss/impl/ResultHandler.h +218 -113
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +119 -2362
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +27 -3
- data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +14 -11
- data/vendor/faiss/faiss/impl/VisitedTable.cpp +42 -0
- data/vendor/faiss/faiss/impl/VisitedTable.h +76 -0
- data/vendor/faiss/faiss/impl/approx_topk/approx_topk.h +276 -0
- data/vendor/faiss/faiss/impl/approx_topk/avx2.cpp +68 -0
- data/vendor/faiss/faiss/{utils → impl}/approx_topk/generic.h +15 -8
- data/vendor/faiss/faiss/impl/approx_topk/neon.cpp +68 -0
- data/vendor/faiss/faiss/impl/approx_topk/rq_beam_search_tab-inl.h +169 -0
- data/vendor/faiss/faiss/impl/approx_topk/rq_beam_search_tab.h +117 -0
- data/vendor/faiss/faiss/impl/approx_topk/simdlib256-inl.h +146 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryHNSW_impl.h +73 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryHash_impl.h +270 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryIVF_impl.h +460 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexIVFSpectralHash_impl.h +159 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexPQ_impl.h +92 -0
- data/vendor/faiss/faiss/impl/binary_hamming/avx2.cpp +26 -0
- data/vendor/faiss/faiss/impl/binary_hamming/avx512.cpp +26 -0
- data/vendor/faiss/faiss/impl/binary_hamming/dispatch.h +143 -0
- data/vendor/faiss/faiss/impl/binary_hamming/neon.cpp +26 -0
- data/vendor/faiss/faiss/impl/binary_hamming/rvv.cpp +26 -0
- data/vendor/faiss/faiss/impl/expanded_scanners.h +163 -0
- data/vendor/faiss/faiss/impl/{FastScanDistancePostProcessing.h → fast_scan/FastScanDistancePostProcessing.h} +13 -6
- data/vendor/faiss/faiss/impl/{LookupTableScaler.h → fast_scan/LookupTableScaler.h} +16 -5
- data/vendor/faiss/faiss/impl/fast_scan/accumulate_loops.h +237 -0
- data/vendor/faiss/faiss/impl/fast_scan/accumulate_loops_512.h +185 -0
- data/vendor/faiss/faiss/impl/fast_scan/decompose_qbs.h +229 -0
- data/vendor/faiss/faiss/impl/fast_scan/dispatching.h +268 -0
- data/vendor/faiss/faiss/impl/{pq4_fast_scan.cpp → fast_scan/fast_scan.cpp} +176 -4
- data/vendor/faiss/faiss/impl/fast_scan/fast_scan.h +341 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-avx2.cpp +36 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-avx512.cpp +40 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-neon.cpp +120 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-riscv.cpp +104 -0
- data/vendor/faiss/faiss/impl/fast_scan/kernels_simd256.h +213 -0
- data/vendor/faiss/faiss/impl/{pq4_fast_scan_search_qbs.cpp → fast_scan/kernels_simd512.h} +26 -348
- data/vendor/faiss/faiss/impl/fast_scan/rabitq_dispatching.h +90 -0
- data/vendor/faiss/faiss/impl/fast_scan/rabitq_result_handler.h +108 -0
- data/vendor/faiss/faiss/impl/{simd_result_handlers.h → fast_scan/simd_result_handlers.h} +290 -142
- data/vendor/faiss/faiss/impl/hnsw/LockVector.cpp +54 -0
- data/vendor/faiss/faiss/impl/hnsw/LockVector.h +64 -0
- data/vendor/faiss/faiss/impl/hnsw/MinimaxHeap.cpp +91 -0
- data/vendor/faiss/faiss/impl/hnsw/MinimaxHeap.h +64 -0
- data/vendor/faiss/faiss/impl/hnsw/avx2.cpp +104 -0
- data/vendor/faiss/faiss/impl/hnsw/avx512.cpp +111 -0
- data/vendor/faiss/faiss/impl/index_read.cpp +1950 -505
- data/vendor/faiss/faiss/impl/index_read_utils.h +1 -2
- data/vendor/faiss/faiss/impl/index_write.cpp +112 -21
- data/vendor/faiss/faiss/impl/io.cpp +6 -6
- data/vendor/faiss/faiss/impl/io_macros.h +33 -16
- data/vendor/faiss/faiss/impl/kmeans1d.cpp +10 -10
- data/vendor/faiss/faiss/impl/lattice_Zn.cpp +81 -40
- data/vendor/faiss/faiss/impl/lattice_Zn.h +6 -6
- data/vendor/faiss/faiss/impl/mapped_io.cpp +15 -8
- data/vendor/faiss/faiss/impl/platform_macros.h +11 -4
- data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQScanner_impl.h +549 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQ_QueryTables.cpp +245 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQ_QueryTables.h +105 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/PQDistanceComputer_impl.h +106 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/avx2.cpp +21 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/avx512.cpp +21 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/neon.cpp +21 -0
- data/vendor/faiss/faiss/impl/{code_distance/code_distance-avx2.h → pq_code_distance/pq_code_distance-avx2.h} +43 -220
- data/vendor/faiss/faiss/impl/{code_distance/code_distance-avx512.h → pq_code_distance/pq_code_distance-avx512.h} +25 -112
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-generic.cpp +59 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-generic.h +96 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-inl.h +256 -0
- data/vendor/faiss/faiss/impl/{code_distance/code_distance-sve.h → pq_code_distance/pq_code_distance-sve.cpp} +57 -146
- data/vendor/faiss/faiss/impl/pq_code_distance/rvv.cpp +68 -0
- data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.cpp +320 -483
- data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.h +1 -1
- data/vendor/faiss/faiss/impl/scalar_quantizer/codecs.h +121 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/distance_computers.h +137 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/quantizers.h +371 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/scanners.h +190 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/similarities.h +94 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx2.cpp +603 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx512.cpp +597 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-dispatch.h +388 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-neon.cpp +630 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-rvv.cpp +311 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/training.cpp +387 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/training.h +54 -0
- data/vendor/faiss/faiss/impl/simd_dispatch.h +173 -0
- data/vendor/faiss/faiss/impl/simdlib/simdlib.h +57 -0
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_avx2.h +274 -171
- data/vendor/faiss/faiss/impl/simdlib/simdlib_avx512.h +414 -0
- data/vendor/faiss/faiss/impl/simdlib/simdlib_dispatch.h +44 -0
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_emulated.h +231 -166
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_neon.h +275 -217
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_ppc64.h +201 -160
- data/vendor/faiss/faiss/impl/svs_io.cpp +12 -3
- data/vendor/faiss/faiss/impl/svs_io.h +8 -2
- data/vendor/faiss/faiss/index_factory.cpp +115 -28
- data/vendor/faiss/faiss/index_io.h +53 -3
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +73 -20
- data/vendor/faiss/faiss/invlists/DirectMap.cpp +24 -14
- data/vendor/faiss/faiss/invlists/DirectMap.h +4 -3
- data/vendor/faiss/faiss/invlists/InvertedLists.cpp +157 -73
- data/vendor/faiss/faiss/invlists/InvertedLists.h +86 -23
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +4 -4
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +14 -14
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +1 -1
- data/vendor/faiss/faiss/svs/IndexSVSFaissUtils.h +9 -19
- data/vendor/faiss/faiss/svs/IndexSVSFlat.cpp +2 -2
- data/vendor/faiss/faiss/svs/IndexSVSFlat.h +2 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVF.cpp +350 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVF.h +128 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLVQ.cpp +40 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLVQ.h +43 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLeanVec.cpp +225 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLeanVec.h +71 -0
- data/vendor/faiss/faiss/svs/IndexSVSVamana.cpp +25 -1
- data/vendor/faiss/faiss/svs/IndexSVSVamana.h +19 -2
- data/vendor/faiss/faiss/svs/IndexSVSVamanaLVQ.h +1 -1
- data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.cpp +19 -2
- data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.h +14 -0
- data/vendor/faiss/faiss/utils/Heap.cpp +56 -10
- data/vendor/faiss/faiss/utils/Heap.h +21 -0
- data/vendor/faiss/faiss/utils/NeuralNet.cpp +54 -40
- data/vendor/faiss/faiss/utils/NeuralNet.h +1 -1
- data/vendor/faiss/faiss/utils/approx_topk_hamming/approx_topk_hamming.h +10 -4
- data/vendor/faiss/faiss/utils/distances.cpp +507 -559
- data/vendor/faiss/faiss/utils/distances.h +118 -1
- data/vendor/faiss/faiss/utils/distances_dispatch.h +250 -0
- data/vendor/faiss/faiss/utils/distances_fused/avx512.cpp +8 -7
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +33 -14
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.h +12 -1
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +16 -293
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based_neon.cpp +57 -0
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_kernel-inl.h +290 -0
- data/vendor/faiss/faiss/utils/distances_simd.cpp +72 -3681
- data/vendor/faiss/faiss/utils/extra_distances.cpp +60 -102
- data/vendor/faiss/faiss/utils/extra_distances.h +79 -7
- data/vendor/faiss/faiss/utils/hamming-inl.h +13 -11
- data/vendor/faiss/faiss/utils/hamming.cpp +66 -517
- data/vendor/faiss/faiss/utils/hamming.h +92 -2
- data/vendor/faiss/faiss/utils/hamming_distance/common.h +287 -10
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx2.cpp +15 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx512.cpp +15 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx2.h +142 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx512.h +234 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-generic.h +368 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-neon.h +322 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-rvv.h +39 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer.h +146 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_impl.h +481 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_neon.cpp +15 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_rvv.cpp +15 -0
- data/vendor/faiss/faiss/utils/partitioning.cpp +66 -987
- data/vendor/faiss/faiss/utils/partitioning.h +31 -0
- data/vendor/faiss/faiss/utils/popcount.h +29 -0
- data/vendor/faiss/faiss/utils/pq_code_distance.h +251 -0
- data/vendor/faiss/faiss/utils/prefetch.h +2 -2
- data/vendor/faiss/faiss/utils/quantize_lut.cpp +30 -30
- data/vendor/faiss/faiss/utils/quantize_lut.h +1 -1
- data/vendor/faiss/faiss/utils/rabitq_simd.h +124 -343
- data/vendor/faiss/faiss/utils/random.cpp +6 -6
- data/vendor/faiss/faiss/utils/simd_impl/IVFFlatScanner-inl.h +51 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_aarch64.cpp +154 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_arm_sve.cpp +777 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_autovec-inl.h +306 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_avx2.cpp +1431 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_avx512.cpp +1095 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_rvv.cpp +189 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_simdlib256.h +195 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_sse-inl.h +392 -0
- data/vendor/faiss/faiss/utils/{distances_fused/simdlib_based.h → simd_impl/exhaustive_L2sqr_blas_cmax.h} +5 -10
- data/vendor/faiss/faiss/utils/simd_impl/hamming_impl.h +481 -0
- data/vendor/faiss/faiss/utils/simd_impl/partitioning_avx2.cpp +14 -0
- data/vendor/faiss/faiss/utils/simd_impl/partitioning_neon.cpp +14 -0
- data/vendor/faiss/faiss/utils/simd_impl/partitioning_simdlib256.h +1085 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx2.cpp +355 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx512.cpp +477 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_neon.cpp +55 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_rvv.cpp +55 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_dispatch.h +32 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels.h +43 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels_avx2.cpp +57 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels_avx512.cpp +45 -0
- data/vendor/faiss/faiss/utils/simd_levels.cpp +334 -0
- data/vendor/faiss/faiss/utils/simd_levels.h +183 -0
- data/vendor/faiss/faiss/utils/sorting.cpp +48 -36
- data/vendor/faiss/faiss/utils/utils.cpp +21 -14
- data/vendor/faiss/faiss/utils/utils.h +3 -3
- metadata +156 -42
- data/vendor/faiss/faiss/impl/RaBitQStats.cpp +0 -29
- data/vendor/faiss/faiss/impl/RaBitQStats.h +0 -56
- data/vendor/faiss/faiss/impl/code_distance/code_distance-generic.h +0 -81
- data/vendor/faiss/faiss/impl/code_distance/code_distance.h +0 -186
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +0 -216
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +0 -224
- data/vendor/faiss/faiss/utils/approx_topk/approx_topk.h +0 -84
- data/vendor/faiss/faiss/utils/approx_topk/avx2-inl.h +0 -196
- data/vendor/faiss/faiss/utils/approx_topk/mode.h +0 -34
- data/vendor/faiss/faiss/utils/distances_fused/avx512.h +0 -36
- data/vendor/faiss/faiss/utils/extra_distances-inl.h +0 -228
- data/vendor/faiss/faiss/utils/hamming_distance/avx2-inl.h +0 -462
- data/vendor/faiss/faiss/utils/hamming_distance/avx512-inl.h +0 -490
- data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +0 -450
- data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +0 -87
- data/vendor/faiss/faiss/utils/hamming_distance/neon-inl.h +0 -524
- data/vendor/faiss/faiss/utils/simdlib.h +0 -42
- data/vendor/faiss/faiss/utils/simdlib_avx512.h +0 -296
- /data/vendor/faiss/faiss/{cppcontrib/factory_tools.h → factory_tools.h} +0 -0
|
@@ -1,186 +0,0 @@
|
|
|
1
|
-
/*
|
|
2
|
-
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
|
-
*
|
|
4
|
-
* This source code is licensed under the MIT license found in the
|
|
5
|
-
* LICENSE file in the root directory of this source tree.
|
|
6
|
-
*/
|
|
7
|
-
|
|
8
|
-
#pragma once
|
|
9
|
-
|
|
10
|
-
#include <faiss/impl/platform_macros.h>
|
|
11
|
-
|
|
12
|
-
// This directory contains functions to compute a distance
|
|
13
|
-
// from a given PQ code to a query vector, given that the
|
|
14
|
-
// distances to a query vector for pq.M codebooks are precomputed.
|
|
15
|
-
//
|
|
16
|
-
// The code was originally the part of IndexIVFPQ.cpp.
|
|
17
|
-
// The baseline implementation can be found in
|
|
18
|
-
// code_distance-generic.h, distance_single_code_generic().
|
|
19
|
-
|
|
20
|
-
// The reason for this somewhat unusual structure is that
|
|
21
|
-
// custom implementations may need to fall off to generic
|
|
22
|
-
// implementation in certain cases. So, say, avx2 header file
|
|
23
|
-
// needs to reference the generic header file. This is
|
|
24
|
-
// why the names of the functions for custom implementations
|
|
25
|
-
// have this _generic or _avx2 suffix.
|
|
26
|
-
|
|
27
|
-
#ifdef __AVX2__
|
|
28
|
-
|
|
29
|
-
#include <faiss/impl/code_distance/code_distance-avx2.h>
|
|
30
|
-
|
|
31
|
-
namespace faiss {
|
|
32
|
-
|
|
33
|
-
template <typename PQDecoderT>
|
|
34
|
-
inline float distance_single_code(
|
|
35
|
-
// number of subquantizers
|
|
36
|
-
const size_t M,
|
|
37
|
-
// number of bits per quantization index
|
|
38
|
-
const size_t nbits,
|
|
39
|
-
// precomputed distances, layout (M, ksub)
|
|
40
|
-
const float* sim_table,
|
|
41
|
-
// the code
|
|
42
|
-
const uint8_t* code) {
|
|
43
|
-
return distance_single_code_avx2<PQDecoderT>(M, nbits, sim_table, code);
|
|
44
|
-
}
|
|
45
|
-
|
|
46
|
-
template <typename PQDecoderT>
|
|
47
|
-
inline void distance_four_codes(
|
|
48
|
-
// number of subquantizers
|
|
49
|
-
const size_t M,
|
|
50
|
-
// number of bits per quantization index
|
|
51
|
-
const size_t nbits,
|
|
52
|
-
// precomputed distances, layout (M, ksub)
|
|
53
|
-
const float* sim_table,
|
|
54
|
-
// codes
|
|
55
|
-
const uint8_t* __restrict code0,
|
|
56
|
-
const uint8_t* __restrict code1,
|
|
57
|
-
const uint8_t* __restrict code2,
|
|
58
|
-
const uint8_t* __restrict code3,
|
|
59
|
-
// computed distances
|
|
60
|
-
float& result0,
|
|
61
|
-
float& result1,
|
|
62
|
-
float& result2,
|
|
63
|
-
float& result3) {
|
|
64
|
-
distance_four_codes_avx2<PQDecoderT>(
|
|
65
|
-
M,
|
|
66
|
-
nbits,
|
|
67
|
-
sim_table,
|
|
68
|
-
code0,
|
|
69
|
-
code1,
|
|
70
|
-
code2,
|
|
71
|
-
code3,
|
|
72
|
-
result0,
|
|
73
|
-
result1,
|
|
74
|
-
result2,
|
|
75
|
-
result3);
|
|
76
|
-
}
|
|
77
|
-
|
|
78
|
-
} // namespace faiss
|
|
79
|
-
|
|
80
|
-
#elif defined(__ARM_FEATURE_SVE)
|
|
81
|
-
|
|
82
|
-
#include <faiss/impl/code_distance/code_distance-sve.h>
|
|
83
|
-
|
|
84
|
-
namespace faiss {
|
|
85
|
-
|
|
86
|
-
template <typename PQDecoderT>
|
|
87
|
-
inline float distance_single_code(
|
|
88
|
-
// the product quantizer
|
|
89
|
-
const size_t M,
|
|
90
|
-
// number of bits per quantization index
|
|
91
|
-
const size_t nbits,
|
|
92
|
-
// precomputed distances, layout (M, ksub)
|
|
93
|
-
const float* sim_table,
|
|
94
|
-
// the code
|
|
95
|
-
const uint8_t* code) {
|
|
96
|
-
return distance_single_code_sve<PQDecoderT>(M, nbits, sim_table, code);
|
|
97
|
-
}
|
|
98
|
-
|
|
99
|
-
template <typename PQDecoderT>
|
|
100
|
-
inline void distance_four_codes(
|
|
101
|
-
// the product quantizer
|
|
102
|
-
const size_t M,
|
|
103
|
-
// number of bits per quantization index
|
|
104
|
-
const size_t nbits,
|
|
105
|
-
// precomputed distances, layout (M, ksub)
|
|
106
|
-
const float* sim_table,
|
|
107
|
-
// codes
|
|
108
|
-
const uint8_t* __restrict code0,
|
|
109
|
-
const uint8_t* __restrict code1,
|
|
110
|
-
const uint8_t* __restrict code2,
|
|
111
|
-
const uint8_t* __restrict code3,
|
|
112
|
-
// computed distances
|
|
113
|
-
float& result0,
|
|
114
|
-
float& result1,
|
|
115
|
-
float& result2,
|
|
116
|
-
float& result3) {
|
|
117
|
-
distance_four_codes_sve<PQDecoderT>(
|
|
118
|
-
M,
|
|
119
|
-
nbits,
|
|
120
|
-
sim_table,
|
|
121
|
-
code0,
|
|
122
|
-
code1,
|
|
123
|
-
code2,
|
|
124
|
-
code3,
|
|
125
|
-
result0,
|
|
126
|
-
result1,
|
|
127
|
-
result2,
|
|
128
|
-
result3);
|
|
129
|
-
}
|
|
130
|
-
|
|
131
|
-
} // namespace faiss
|
|
132
|
-
|
|
133
|
-
#else
|
|
134
|
-
|
|
135
|
-
#include <faiss/impl/code_distance/code_distance-generic.h>
|
|
136
|
-
|
|
137
|
-
namespace faiss {
|
|
138
|
-
|
|
139
|
-
template <typename PQDecoderT>
|
|
140
|
-
inline float distance_single_code(
|
|
141
|
-
// number of subquantizers
|
|
142
|
-
const size_t M,
|
|
143
|
-
// number of bits per quantization index
|
|
144
|
-
const size_t nbits,
|
|
145
|
-
// precomputed distances, layout (M, ksub)
|
|
146
|
-
const float* sim_table,
|
|
147
|
-
// the code
|
|
148
|
-
const uint8_t* code) {
|
|
149
|
-
return distance_single_code_generic<PQDecoderT>(M, nbits, sim_table, code);
|
|
150
|
-
}
|
|
151
|
-
|
|
152
|
-
template <typename PQDecoderT>
|
|
153
|
-
inline void distance_four_codes(
|
|
154
|
-
// number of subquantizers
|
|
155
|
-
const size_t M,
|
|
156
|
-
// number of bits per quantization index
|
|
157
|
-
const size_t nbits,
|
|
158
|
-
// precomputed distances, layout (M, ksub)
|
|
159
|
-
const float* sim_table,
|
|
160
|
-
// codes
|
|
161
|
-
const uint8_t* __restrict code0,
|
|
162
|
-
const uint8_t* __restrict code1,
|
|
163
|
-
const uint8_t* __restrict code2,
|
|
164
|
-
const uint8_t* __restrict code3,
|
|
165
|
-
// computed distances
|
|
166
|
-
float& result0,
|
|
167
|
-
float& result1,
|
|
168
|
-
float& result2,
|
|
169
|
-
float& result3) {
|
|
170
|
-
distance_four_codes_generic<PQDecoderT>(
|
|
171
|
-
M,
|
|
172
|
-
nbits,
|
|
173
|
-
sim_table,
|
|
174
|
-
code0,
|
|
175
|
-
code1,
|
|
176
|
-
code2,
|
|
177
|
-
code3,
|
|
178
|
-
result0,
|
|
179
|
-
result1,
|
|
180
|
-
result2,
|
|
181
|
-
result3);
|
|
182
|
-
}
|
|
183
|
-
|
|
184
|
-
} // namespace faiss
|
|
185
|
-
|
|
186
|
-
#endif
|
|
@@ -1,216 +0,0 @@
|
|
|
1
|
-
/*
|
|
2
|
-
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
|
-
*
|
|
4
|
-
* This source code is licensed under the MIT license found in the
|
|
5
|
-
* LICENSE file in the root directory of this source tree.
|
|
6
|
-
*/
|
|
7
|
-
|
|
8
|
-
#pragma once
|
|
9
|
-
|
|
10
|
-
#include <cstdint>
|
|
11
|
-
#include <cstdlib>
|
|
12
|
-
|
|
13
|
-
#include <faiss/impl/CodePacker.h>
|
|
14
|
-
|
|
15
|
-
/** PQ4 SIMD packing and accumulation functions
|
|
16
|
-
*
|
|
17
|
-
* The basic kernel accumulates nq query vectors with bbs = nb * 2 * 16 vectors
|
|
18
|
-
* and produces an output matrix for that. It is interesting for nq * nb <= 4,
|
|
19
|
-
* otherwise register spilling becomes too large.
|
|
20
|
-
*
|
|
21
|
-
* The implementation of these functions is spread over 3 cpp files to reduce
|
|
22
|
-
* parallel compile times. Templates are instantiated explicitly.
|
|
23
|
-
*/
|
|
24
|
-
|
|
25
|
-
namespace faiss {
|
|
26
|
-
|
|
27
|
-
struct NormTableScaler;
|
|
28
|
-
struct SIMDResultHandler;
|
|
29
|
-
|
|
30
|
-
/** Pack codes for consumption by the SIMD kernels.
|
|
31
|
-
* The unused bytes are set to 0.
|
|
32
|
-
*
|
|
33
|
-
* @param codes input codes, size (ntotal, ceil(M / 2))
|
|
34
|
-
* @param ntotal number of input codes
|
|
35
|
-
* @param nb output number of codes (ntotal rounded up to a multiple of
|
|
36
|
-
* bbs)
|
|
37
|
-
* @param nsq number of sub-quantizers (=M rounded up to a multiple of 2)
|
|
38
|
-
* @param bbs size of database blocks (multiple of 32)
|
|
39
|
-
* @param blocks output array, size nb * nsq / 2.
|
|
40
|
-
* @param code_stride optional stride between consecutive codes (0 = use
|
|
41
|
-
default (M + 1) / 2)
|
|
42
|
-
*/
|
|
43
|
-
void pq4_pack_codes(
|
|
44
|
-
const uint8_t* codes,
|
|
45
|
-
size_t ntotal,
|
|
46
|
-
size_t M,
|
|
47
|
-
size_t nb,
|
|
48
|
-
size_t bbs,
|
|
49
|
-
size_t nsq,
|
|
50
|
-
uint8_t* blocks,
|
|
51
|
-
size_t code_stride = 0);
|
|
52
|
-
|
|
53
|
-
/** Same as pack_codes but write in a given range of the output,
|
|
54
|
-
* leaving the rest untouched. Assumes allocated entries are 0 on input.
|
|
55
|
-
*
|
|
56
|
-
* @param codes input codes, size (i1 - i0, ceil(M / 2))
|
|
57
|
-
* @param i0 first output code to write
|
|
58
|
-
* @param i1 last output code to write
|
|
59
|
-
* @param blocks output array, size at least ceil(i1 / bbs) * bbs * nsq / 2
|
|
60
|
-
* @param code_stride optional stride between consecutive codes (0 = use
|
|
61
|
-
* default (M + 1) / 2)
|
|
62
|
-
*/
|
|
63
|
-
void pq4_pack_codes_range(
|
|
64
|
-
const uint8_t* codes,
|
|
65
|
-
size_t M,
|
|
66
|
-
size_t i0,
|
|
67
|
-
size_t i1,
|
|
68
|
-
size_t bbs,
|
|
69
|
-
size_t nsq,
|
|
70
|
-
uint8_t* blocks,
|
|
71
|
-
size_t code_stride = 0);
|
|
72
|
-
|
|
73
|
-
/** get a single element from a packed codes table
|
|
74
|
-
*
|
|
75
|
-
* @param vector_id vector id
|
|
76
|
-
* @param sq subquantizer (< nsq)
|
|
77
|
-
*/
|
|
78
|
-
uint8_t pq4_get_packed_element(
|
|
79
|
-
const uint8_t* data,
|
|
80
|
-
size_t bbs,
|
|
81
|
-
size_t nsq,
|
|
82
|
-
size_t vector_id,
|
|
83
|
-
size_t sq);
|
|
84
|
-
|
|
85
|
-
/** set a single element "code" into a packed codes table
|
|
86
|
-
*
|
|
87
|
-
* @param vector_id vector id
|
|
88
|
-
* @param sq subquantizer (< nsq)
|
|
89
|
-
*/
|
|
90
|
-
void pq4_set_packed_element(
|
|
91
|
-
uint8_t* data,
|
|
92
|
-
uint8_t code,
|
|
93
|
-
size_t bbs,
|
|
94
|
-
size_t nsq,
|
|
95
|
-
size_t vector_id,
|
|
96
|
-
size_t sq);
|
|
97
|
-
|
|
98
|
-
/** CodePacker API for the PQ4 fast-scan */
|
|
99
|
-
struct CodePackerPQ4 : CodePacker {
|
|
100
|
-
size_t nsq;
|
|
101
|
-
|
|
102
|
-
CodePackerPQ4(size_t nsq, size_t bbs);
|
|
103
|
-
|
|
104
|
-
void pack_1(const uint8_t* flat_code, size_t offset, uint8_t* block)
|
|
105
|
-
const final;
|
|
106
|
-
void unpack_1(const uint8_t* block, size_t offset, uint8_t* flat_code)
|
|
107
|
-
const final;
|
|
108
|
-
};
|
|
109
|
-
|
|
110
|
-
/** Pack Look-up table for consumption by the kernel.
|
|
111
|
-
*
|
|
112
|
-
* @param nq number of queries
|
|
113
|
-
* @param nsq number of sub-quantizers (multiple of 2)
|
|
114
|
-
* @param src input array, size (nq, 16)
|
|
115
|
-
* @param dest output array, size (nq, 16)
|
|
116
|
-
*/
|
|
117
|
-
void pq4_pack_LUT(int nq, int nsq, const uint8_t* src, uint8_t* dest);
|
|
118
|
-
|
|
119
|
-
/** Loop over database elements and accumulate results into result handler
|
|
120
|
-
*
|
|
121
|
-
* @param nq number of queries
|
|
122
|
-
* @param nb number of database elements
|
|
123
|
-
* @param bbs size of database blocks (multiple of 32)
|
|
124
|
-
* @param nsq number of sub-quantizers (multiple of 2)
|
|
125
|
-
* @param codes packed codes array
|
|
126
|
-
* @param LUT packed look-up table
|
|
127
|
-
* @param scaler scaler to scale the encoded norm
|
|
128
|
-
*/
|
|
129
|
-
void pq4_accumulate_loop(
|
|
130
|
-
int nq,
|
|
131
|
-
size_t nb,
|
|
132
|
-
int bbs,
|
|
133
|
-
int nsq,
|
|
134
|
-
const uint8_t* codes,
|
|
135
|
-
const uint8_t* LUT,
|
|
136
|
-
SIMDResultHandler& res,
|
|
137
|
-
const NormTableScaler* scaler);
|
|
138
|
-
|
|
139
|
-
/* qbs versions, supported only for bbs=32.
|
|
140
|
-
*
|
|
141
|
-
* The kernel function runs the kernel for *several* query blocks
|
|
142
|
-
* and bbs database vectors. The sizes of the blocks are encoded in qbs as
|
|
143
|
-
* base-16 digits.
|
|
144
|
-
*
|
|
145
|
-
* For example, qbs = 0x1223 means that the kernel will be run 4 times, the
|
|
146
|
-
* first time with 3 query vectors, second time with 2 query vectors, then 2
|
|
147
|
-
* vectors again and finally with 1 query vector. The output block will thus be
|
|
148
|
-
* nq = 3 + 2 + 2 + 1 = 6 queries. For a given total block size, the optimal
|
|
149
|
-
* decomposition into sub-blocks (measured empirically) is given by
|
|
150
|
-
* preferred_qbs().
|
|
151
|
-
*/
|
|
152
|
-
|
|
153
|
-
/* compute the number of queries from a base-16 decomposition */
|
|
154
|
-
int pq4_qbs_to_nq(int qbs);
|
|
155
|
-
|
|
156
|
-
/** return the preferred decomposition in blocks for a nb of queries. */
|
|
157
|
-
int pq4_preferred_qbs(int nq);
|
|
158
|
-
|
|
159
|
-
/** Pack Look-up table for consumption by the kernel.
|
|
160
|
-
*
|
|
161
|
-
* @param qbs 4-bit encoded number of query blocks, the total number of
|
|
162
|
-
* queries handled (nq) is deduced from it
|
|
163
|
-
* @param nsq number of sub-quantizers (multiple of 2)
|
|
164
|
-
* @param src input array, size (nq, 16)
|
|
165
|
-
* @param dest output array, size (nq, 16)
|
|
166
|
-
* @return nq
|
|
167
|
-
*/
|
|
168
|
-
int pq4_pack_LUT_qbs(int fqbs, int nsq, const uint8_t* src, uint8_t* dest);
|
|
169
|
-
|
|
170
|
-
/** Same as pq4_pack_LUT_qbs, except the source vectors are remapped with q_map
|
|
171
|
-
*/
|
|
172
|
-
int pq4_pack_LUT_qbs_q_map(
|
|
173
|
-
int qbs,
|
|
174
|
-
int nsq,
|
|
175
|
-
const uint8_t* src,
|
|
176
|
-
const int* q_map,
|
|
177
|
-
uint8_t* dest);
|
|
178
|
-
|
|
179
|
-
/** Run accumulation loop.
|
|
180
|
-
*
|
|
181
|
-
* @param qbs 4-bit encoded number of queries
|
|
182
|
-
* @param nb number of database codes (multiple of bbs)
|
|
183
|
-
* @param nsq number of sub-quantizers
|
|
184
|
-
* @param codes encoded database vectors (packed)
|
|
185
|
-
* @param LUT look-up table (packed)
|
|
186
|
-
* @param res call-back for the results
|
|
187
|
-
* @param scaler scaler to scale the encoded norm
|
|
188
|
-
*/
|
|
189
|
-
void pq4_accumulate_loop_qbs(
|
|
190
|
-
int qbs,
|
|
191
|
-
size_t nb,
|
|
192
|
-
int nsq,
|
|
193
|
-
const uint8_t* codes,
|
|
194
|
-
const uint8_t* LUT,
|
|
195
|
-
SIMDResultHandler& res,
|
|
196
|
-
const NormTableScaler* scaler = nullptr);
|
|
197
|
-
|
|
198
|
-
/** Wrapper of pq4_accumulate_loop_qbs using simple StoreResultHandler
|
|
199
|
-
* and DummyScaler
|
|
200
|
-
*
|
|
201
|
-
* @param nq number of queries
|
|
202
|
-
* @param ntotal2 number of database elements (multiple of 32)
|
|
203
|
-
* @param nsq number of sub-quantizers (muliple of 2)
|
|
204
|
-
* @param codes packed codes array
|
|
205
|
-
* @param LUT packed look-up table
|
|
206
|
-
* @param accu array to store the results
|
|
207
|
-
*/
|
|
208
|
-
void accumulate_to_mem(
|
|
209
|
-
int nq,
|
|
210
|
-
size_t ntotal2,
|
|
211
|
-
int nsq,
|
|
212
|
-
const uint8_t* codes,
|
|
213
|
-
const uint8_t* LUT,
|
|
214
|
-
uint16_t* accu);
|
|
215
|
-
|
|
216
|
-
} // namespace faiss
|
|
@@ -1,224 +0,0 @@
|
|
|
1
|
-
/*
|
|
2
|
-
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
|
-
*
|
|
4
|
-
* This source code is licensed under the MIT license found in the
|
|
5
|
-
* LICENSE file in the root directory of this source tree.
|
|
6
|
-
*/
|
|
7
|
-
|
|
8
|
-
#include <faiss/impl/pq4_fast_scan.h>
|
|
9
|
-
|
|
10
|
-
#include <faiss/impl/FaissAssert.h>
|
|
11
|
-
#include <faiss/impl/LookupTableScaler.h>
|
|
12
|
-
#include <faiss/impl/simd_result_handlers.h>
|
|
13
|
-
|
|
14
|
-
namespace faiss {
|
|
15
|
-
|
|
16
|
-
using namespace simd_result_handlers;
|
|
17
|
-
|
|
18
|
-
/***************************************************************
|
|
19
|
-
* accumulation functions
|
|
20
|
-
***************************************************************/
|
|
21
|
-
|
|
22
|
-
namespace {
|
|
23
|
-
|
|
24
|
-
/*
|
|
25
|
-
* The computation kernel
|
|
26
|
-
* It accumulates results for NQ queries and BB * 32 database elements
|
|
27
|
-
* writes results in a ResultHandler
|
|
28
|
-
*/
|
|
29
|
-
|
|
30
|
-
template <int NQ, int BB, class ResultHandler, class Scaler>
|
|
31
|
-
void kernel_accumulate_block(
|
|
32
|
-
int nsq,
|
|
33
|
-
const uint8_t* codes,
|
|
34
|
-
const uint8_t* LUT,
|
|
35
|
-
ResultHandler& res,
|
|
36
|
-
const Scaler& scaler) {
|
|
37
|
-
// distance accumulators
|
|
38
|
-
simd16uint16 accu[NQ][BB][4];
|
|
39
|
-
|
|
40
|
-
for (int q = 0; q < NQ; q++) {
|
|
41
|
-
for (int b = 0; b < BB; b++) {
|
|
42
|
-
accu[q][b][0].clear();
|
|
43
|
-
accu[q][b][1].clear();
|
|
44
|
-
accu[q][b][2].clear();
|
|
45
|
-
accu[q][b][3].clear();
|
|
46
|
-
}
|
|
47
|
-
}
|
|
48
|
-
|
|
49
|
-
for (int sq = 0; sq < nsq - scaler.nscale; sq += 2) {
|
|
50
|
-
simd32uint8 lut_cache[NQ];
|
|
51
|
-
for (int q = 0; q < NQ; q++) {
|
|
52
|
-
lut_cache[q] = simd32uint8(LUT);
|
|
53
|
-
LUT += 32;
|
|
54
|
-
}
|
|
55
|
-
|
|
56
|
-
for (int b = 0; b < BB; b++) {
|
|
57
|
-
simd32uint8 c = simd32uint8(codes);
|
|
58
|
-
codes += 32;
|
|
59
|
-
simd32uint8 mask(15);
|
|
60
|
-
simd32uint8 chi = simd32uint8(simd16uint16(c) >> 4) & mask;
|
|
61
|
-
simd32uint8 clo = c & mask;
|
|
62
|
-
|
|
63
|
-
for (int q = 0; q < NQ; q++) {
|
|
64
|
-
simd32uint8 lut = lut_cache[q];
|
|
65
|
-
simd32uint8 res0 = lut.lookup_2_lanes(clo);
|
|
66
|
-
simd32uint8 res1 = lut.lookup_2_lanes(chi);
|
|
67
|
-
|
|
68
|
-
accu[q][b][0] += simd16uint16(res0);
|
|
69
|
-
accu[q][b][1] += simd16uint16(res0) >> 8;
|
|
70
|
-
|
|
71
|
-
accu[q][b][2] += simd16uint16(res1);
|
|
72
|
-
accu[q][b][3] += simd16uint16(res1) >> 8;
|
|
73
|
-
}
|
|
74
|
-
}
|
|
75
|
-
}
|
|
76
|
-
|
|
77
|
-
for (int sq = 0; sq < scaler.nscale; sq += 2) {
|
|
78
|
-
simd32uint8 lut_cache[NQ];
|
|
79
|
-
for (int q = 0; q < NQ; q++) {
|
|
80
|
-
lut_cache[q] = simd32uint8(LUT);
|
|
81
|
-
LUT += 32;
|
|
82
|
-
}
|
|
83
|
-
|
|
84
|
-
for (int b = 0; b < BB; b++) {
|
|
85
|
-
simd32uint8 c = simd32uint8(codes);
|
|
86
|
-
codes += 32;
|
|
87
|
-
simd32uint8 mask(15);
|
|
88
|
-
simd32uint8 chi = simd32uint8(simd16uint16(c) >> 4) & mask;
|
|
89
|
-
simd32uint8 clo = c & mask;
|
|
90
|
-
|
|
91
|
-
for (int q = 0; q < NQ; q++) {
|
|
92
|
-
simd32uint8 lut = lut_cache[q];
|
|
93
|
-
|
|
94
|
-
simd32uint8 res0 = scaler.lookup(lut, clo);
|
|
95
|
-
accu[q][b][0] += scaler.scale_lo(res0); // handle vectors 0..7
|
|
96
|
-
accu[q][b][1] += scaler.scale_hi(res0); // handle vectors 8..15
|
|
97
|
-
|
|
98
|
-
simd32uint8 res1 = scaler.lookup(lut, chi);
|
|
99
|
-
accu[q][b][2] += scaler.scale_lo(res1); // handle vectors 16..23
|
|
100
|
-
accu[q][b][3] +=
|
|
101
|
-
scaler.scale_hi(res1); // handle vectors 24..31
|
|
102
|
-
}
|
|
103
|
-
}
|
|
104
|
-
}
|
|
105
|
-
|
|
106
|
-
for (int q = 0; q < NQ; q++) {
|
|
107
|
-
for (int b = 0; b < BB; b++) {
|
|
108
|
-
accu[q][b][0] -= accu[q][b][1] << 8;
|
|
109
|
-
simd16uint16 dis0 = combine2x2(accu[q][b][0], accu[q][b][1]);
|
|
110
|
-
|
|
111
|
-
accu[q][b][2] -= accu[q][b][3] << 8;
|
|
112
|
-
simd16uint16 dis1 = combine2x2(accu[q][b][2], accu[q][b][3]);
|
|
113
|
-
|
|
114
|
-
res.handle(q, b, dis0, dis1);
|
|
115
|
-
}
|
|
116
|
-
}
|
|
117
|
-
}
|
|
118
|
-
|
|
119
|
-
template <int NQ, int BB, class ResultHandler, class Scaler>
|
|
120
|
-
void accumulate_fixed_blocks(
|
|
121
|
-
size_t nb,
|
|
122
|
-
int nsq,
|
|
123
|
-
const uint8_t* codes,
|
|
124
|
-
const uint8_t* LUT,
|
|
125
|
-
ResultHandler& res,
|
|
126
|
-
const Scaler& scaler) {
|
|
127
|
-
constexpr int bbs = 32 * BB;
|
|
128
|
-
for (size_t j0 = 0; j0 < nb; j0 += bbs) {
|
|
129
|
-
FixedStorageHandler<NQ, 2 * BB> res2;
|
|
130
|
-
kernel_accumulate_block<NQ, BB>(nsq, codes, LUT, res2, scaler);
|
|
131
|
-
res.set_block_origin(0, j0);
|
|
132
|
-
res2.to_other_handler(res);
|
|
133
|
-
codes += bbs * nsq / 2;
|
|
134
|
-
}
|
|
135
|
-
}
|
|
136
|
-
|
|
137
|
-
template <class ResultHandler, class Scaler>
|
|
138
|
-
void pq4_accumulate_loop_fixed_scaler(
|
|
139
|
-
int nq,
|
|
140
|
-
size_t nb,
|
|
141
|
-
int bbs,
|
|
142
|
-
int nsq,
|
|
143
|
-
const uint8_t* codes,
|
|
144
|
-
const uint8_t* LUT,
|
|
145
|
-
ResultHandler& res,
|
|
146
|
-
const Scaler& scaler) {
|
|
147
|
-
FAISS_THROW_IF_NOT(is_aligned_pointer(codes));
|
|
148
|
-
FAISS_THROW_IF_NOT(is_aligned_pointer(LUT));
|
|
149
|
-
FAISS_THROW_IF_NOT(bbs % 32 == 0);
|
|
150
|
-
FAISS_THROW_IF_NOT(nb % bbs == 0);
|
|
151
|
-
|
|
152
|
-
#define DISPATCH(NQ, BB) \
|
|
153
|
-
case NQ * 1000 + BB: \
|
|
154
|
-
accumulate_fixed_blocks<NQ, BB>(nb, nsq, codes, LUT, res, scaler); \
|
|
155
|
-
break
|
|
156
|
-
|
|
157
|
-
switch (nq * 1000 + bbs / 32) {
|
|
158
|
-
DISPATCH(1, 1);
|
|
159
|
-
DISPATCH(1, 2);
|
|
160
|
-
DISPATCH(1, 3);
|
|
161
|
-
DISPATCH(1, 4);
|
|
162
|
-
DISPATCH(1, 5);
|
|
163
|
-
DISPATCH(2, 1);
|
|
164
|
-
DISPATCH(2, 2);
|
|
165
|
-
DISPATCH(3, 1);
|
|
166
|
-
DISPATCH(4, 1);
|
|
167
|
-
default:
|
|
168
|
-
FAISS_THROW_FMT("nq=%d bbs=%d not instantiated", nq, bbs);
|
|
169
|
-
}
|
|
170
|
-
#undef DISPATCH
|
|
171
|
-
}
|
|
172
|
-
|
|
173
|
-
template <class ResultHandler>
|
|
174
|
-
void pq4_accumulate_loop_fixed_handler(
|
|
175
|
-
int nq,
|
|
176
|
-
size_t nb,
|
|
177
|
-
int bbs,
|
|
178
|
-
int nsq,
|
|
179
|
-
const uint8_t* codes,
|
|
180
|
-
const uint8_t* LUT,
|
|
181
|
-
ResultHandler& res,
|
|
182
|
-
const NormTableScaler* scaler) {
|
|
183
|
-
if (scaler) {
|
|
184
|
-
pq4_accumulate_loop_fixed_scaler(
|
|
185
|
-
nq, nb, bbs, nsq, codes, LUT, res, *scaler);
|
|
186
|
-
} else {
|
|
187
|
-
DummyScaler dscaler;
|
|
188
|
-
pq4_accumulate_loop_fixed_scaler(
|
|
189
|
-
nq, nb, bbs, nsq, codes, LUT, res, dscaler);
|
|
190
|
-
}
|
|
191
|
-
}
|
|
192
|
-
|
|
193
|
-
struct Run_pq4_accumulate_loop {
|
|
194
|
-
template <class ResultHandler>
|
|
195
|
-
void f(ResultHandler& res,
|
|
196
|
-
int nq,
|
|
197
|
-
size_t nb,
|
|
198
|
-
int bbs,
|
|
199
|
-
int nsq,
|
|
200
|
-
const uint8_t* codes,
|
|
201
|
-
const uint8_t* LUT,
|
|
202
|
-
const NormTableScaler* scaler) {
|
|
203
|
-
pq4_accumulate_loop_fixed_handler(
|
|
204
|
-
nq, nb, bbs, nsq, codes, LUT, res, scaler);
|
|
205
|
-
}
|
|
206
|
-
};
|
|
207
|
-
|
|
208
|
-
} // anonymous namespace
|
|
209
|
-
|
|
210
|
-
void pq4_accumulate_loop(
|
|
211
|
-
int nq,
|
|
212
|
-
size_t nb,
|
|
213
|
-
int bbs,
|
|
214
|
-
int nsq,
|
|
215
|
-
const uint8_t* codes,
|
|
216
|
-
const uint8_t* LUT,
|
|
217
|
-
SIMDResultHandler& res,
|
|
218
|
-
const NormTableScaler* scaler) {
|
|
219
|
-
Run_pq4_accumulate_loop consumer;
|
|
220
|
-
dispatch_SIMDResultHandler(
|
|
221
|
-
res, consumer, nq, nb, bbs, nsq, codes, LUT, scaler);
|
|
222
|
-
}
|
|
223
|
-
|
|
224
|
-
} // namespace faiss
|
|
@@ -1,84 +0,0 @@
|
|
|
1
|
-
/*
|
|
2
|
-
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
|
-
*
|
|
4
|
-
* This source code is licensed under the MIT license found in the
|
|
5
|
-
* LICENSE file in the root directory of this source tree.
|
|
6
|
-
*/
|
|
7
|
-
|
|
8
|
-
// This file contains an implementation of approximate top-k search
|
|
9
|
-
// using heap. It was initially created for a beam search.
|
|
10
|
-
//
|
|
11
|
-
// The core idea is the following.
|
|
12
|
-
// Say we need to find beam_size indices with the minimal distance
|
|
13
|
-
// values. It is done via heap (priority_queue) using the following
|
|
14
|
-
// pseudocode:
|
|
15
|
-
//
|
|
16
|
-
// def baseline():
|
|
17
|
-
// distances = np.empty([beam_size * n], dtype=float)
|
|
18
|
-
// indices = np.empty([beam_size * n], dtype=int)
|
|
19
|
-
//
|
|
20
|
-
// heap = Heap(max_heap_size=beam_size)
|
|
21
|
-
//
|
|
22
|
-
// for i in range(0, beam_size * n):
|
|
23
|
-
// heap.push(distances[i], indices[i])
|
|
24
|
-
//
|
|
25
|
-
// Basically, this is what heap_addn() function from utils/Heap.h does.
|
|
26
|
-
//
|
|
27
|
-
// The following scheme can be used for approximate beam search.
|
|
28
|
-
// Say, we need to find elements with min distance.
|
|
29
|
-
// Basically, we split n elements of every beam into NBUCKETS buckets
|
|
30
|
-
// and track the index with the minimal distance for every bucket.
|
|
31
|
-
// This can be effectively SIMD-ed and significantly lowers the number
|
|
32
|
-
// of operations, but yields approximate results for beam_size >= 2.
|
|
33
|
-
//
|
|
34
|
-
// def approximate_v1():
|
|
35
|
-
// distances = np.empty([beam_size * n], dtype=float)
|
|
36
|
-
// indices = np.empty([beam_size * n], dtype=int)
|
|
37
|
-
//
|
|
38
|
-
// heap = Heap(max_heap_size=beam_size)
|
|
39
|
-
//
|
|
40
|
-
// for beam in range(0, beam_size):
|
|
41
|
-
// # The value of 32 is just an example.
|
|
42
|
-
// # The value may be varied: the larger the value is,
|
|
43
|
-
// # the slower and the more precise vs baseline beam search is
|
|
44
|
-
// NBUCKETS = 32
|
|
45
|
-
//
|
|
46
|
-
// local_min_distances = [HUGE_VALF] * NBUCKETS
|
|
47
|
-
// local_min_indices = [0] * NBUCKETS
|
|
48
|
-
//
|
|
49
|
-
// for i in range(0, n / NBUCKETS):
|
|
50
|
-
// for j in range(0, NBUCKETS):
|
|
51
|
-
// idx = beam * n + i * NBUCKETS + j
|
|
52
|
-
// if distances[idx] < local_min_distances[j]:
|
|
53
|
-
// local_min_distances[j] = distances[idx]
|
|
54
|
-
// local_min_indices[j] = indices[idx]
|
|
55
|
-
//
|
|
56
|
-
// for j in range(0, NBUCKETS):
|
|
57
|
-
// heap.push(local_min_distances[j], local_min_indices[j])
|
|
58
|
-
//
|
|
59
|
-
// The accuracy can be improved by tracking min-2 elements for every
|
|
60
|
-
// bucket. Such a min-2 implementation with NBUCKETS buckets provides
|
|
61
|
-
// better accuracy than top-1 implementation with 2 * NBUCKETS buckets.
|
|
62
|
-
// Min-3 is also doable. One can use min-N approach, but I'm not sure
|
|
63
|
-
// whether min-4 and above are practical, because of the lack of SIMD
|
|
64
|
-
// registers (unless AVX-512 version is used).
|
|
65
|
-
//
|
|
66
|
-
// C++ template for top-N implementation is provided. The code
|
|
67
|
-
// assumes that indices[idx] == idx. One can write a code that lifts
|
|
68
|
-
// such an assumption easily.
|
|
69
|
-
//
|
|
70
|
-
// Currently, the code that tracks elements with min distances is implemented
|
|
71
|
-
// (Max Heap). Min Heap option can be added easily.
|
|
72
|
-
|
|
73
|
-
#pragma once
|
|
74
|
-
|
|
75
|
-
#include <faiss/impl/platform_macros.h>
|
|
76
|
-
|
|
77
|
-
// the list of available modes is in the following file
|
|
78
|
-
#include <faiss/utils/approx_topk/mode.h>
|
|
79
|
-
|
|
80
|
-
#ifdef __AVX2__
|
|
81
|
-
#include <faiss/utils/approx_topk/avx2-inl.h>
|
|
82
|
-
#else
|
|
83
|
-
#include <faiss/utils/approx_topk/generic.h>
|
|
84
|
-
#endif
|