faiss 0.5.3 → 0.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +12 -0
- data/ext/faiss/ext.cpp +1 -1
- data/ext/faiss/extconf.rb +4 -4
- data/ext/faiss/index.cpp +63 -45
- data/ext/faiss/index_binary.cpp +37 -27
- data/ext/faiss/kmeans.cpp +9 -8
- data/ext/faiss/pca_matrix.cpp +9 -7
- data/ext/faiss/product_quantizer.cpp +13 -11
- data/ext/faiss/utils.cpp +4 -2
- data/ext/faiss/utils.h +4 -0
- data/lib/faiss/version.rb +1 -1
- data/lib/faiss.rb +1 -1
- data/vendor/faiss/faiss/AutoTune.cpp +214 -82
- data/vendor/faiss/faiss/AutoTune.h +14 -1
- data/vendor/faiss/faiss/Clustering.cpp +97 -249
- data/vendor/faiss/faiss/Clustering.h +18 -0
- data/vendor/faiss/faiss/IVFlib.cpp +67 -44
- data/vendor/faiss/faiss/Index.cpp +25 -12
- data/vendor/faiss/faiss/Index.h +26 -4
- data/vendor/faiss/faiss/Index2Layer.cpp +37 -53
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +68 -61
- data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +36 -34
- data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.h +4 -1
- data/vendor/faiss/faiss/IndexBinary.cpp +6 -3
- data/vendor/faiss/faiss/IndexBinary.h +4 -4
- data/vendor/faiss/faiss/IndexBinaryFlat.cpp +1 -1
- data/vendor/faiss/faiss/IndexBinaryFlat.h +1 -1
- data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +4 -4
- data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +92 -95
- data/vendor/faiss/faiss/IndexBinaryHNSW.h +9 -3
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +45 -236
- data/vendor/faiss/faiss/IndexBinaryHash.h +6 -6
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +120 -414
- data/vendor/faiss/faiss/IndexFastScan.cpp +105 -129
- data/vendor/faiss/faiss/IndexFastScan.h +35 -24
- data/vendor/faiss/faiss/IndexFlat.cpp +216 -152
- data/vendor/faiss/faiss/IndexFlat.h +32 -14
- data/vendor/faiss/faiss/IndexFlatCodes.cpp +88 -41
- data/vendor/faiss/faiss/IndexFlatCodes.h +7 -1
- data/vendor/faiss/faiss/IndexHNSW.cpp +299 -187
- data/vendor/faiss/faiss/IndexHNSW.h +30 -14
- data/vendor/faiss/faiss/IndexIDMap.cpp +26 -22
- data/vendor/faiss/faiss/IndexIDMap.h +9 -7
- data/vendor/faiss/faiss/IndexIVF.cpp +535 -405
- data/vendor/faiss/faiss/IndexIVF.h +47 -16
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +77 -74
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +105 -99
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +6 -3
- data/vendor/faiss/faiss/IndexIVFFastScan.cpp +379 -249
- data/vendor/faiss/faiss/IndexIVFFastScan.h +65 -60
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +41 -124
- data/vendor/faiss/faiss/IndexIVFFlat.h +32 -0
- data/vendor/faiss/faiss/IndexIVFFlatPanorama.cpp +89 -138
- data/vendor/faiss/faiss/IndexIVFFlatPanorama.h +3 -1
- data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.cpp +18 -15
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +77 -907
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +184 -122
- data/vendor/faiss/faiss/IndexIVFPQFastScan.h +3 -0
- data/vendor/faiss/faiss/IndexIVFPQR.cpp +23 -18
- data/vendor/faiss/faiss/IndexIVFRaBitQ.cpp +59 -60
- data/vendor/faiss/faiss/IndexIVFRaBitQ.h +4 -3
- data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.cpp +564 -416
- data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.h +269 -111
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +41 -127
- data/vendor/faiss/faiss/IndexIVFSpectralHash.h +1 -1
- data/vendor/faiss/faiss/IndexLSH.cpp +44 -25
- data/vendor/faiss/faiss/IndexLattice.cpp +41 -36
- data/vendor/faiss/faiss/IndexNNDescent.cpp +37 -21
- data/vendor/faiss/faiss/IndexNNDescent.h +2 -2
- data/vendor/faiss/faiss/IndexNSG.cpp +40 -23
- data/vendor/faiss/faiss/IndexNSG.h +0 -2
- data/vendor/faiss/faiss/IndexNeuralNetCodec.cpp +32 -12
- data/vendor/faiss/faiss/IndexPQ.cpp +129 -213
- data/vendor/faiss/faiss/IndexPQ.h +3 -2
- data/vendor/faiss/faiss/IndexPQFastScan.cpp +20 -14
- data/vendor/faiss/faiss/IndexPQFastScan.h +3 -0
- data/vendor/faiss/faiss/IndexPreTransform.cpp +25 -18
- data/vendor/faiss/faiss/IndexPreTransform.h +1 -1
- data/vendor/faiss/faiss/IndexRaBitQ.cpp +31 -43
- data/vendor/faiss/faiss/IndexRaBitQ.h +4 -3
- data/vendor/faiss/faiss/IndexRaBitQFastScan.cpp +135 -317
- data/vendor/faiss/faiss/IndexRaBitQFastScan.h +192 -34
- data/vendor/faiss/faiss/IndexRefine.cpp +30 -55
- data/vendor/faiss/faiss/IndexRefine.h +4 -4
- data/vendor/faiss/faiss/IndexReplicas.cpp +6 -6
- data/vendor/faiss/faiss/IndexRowwiseMinMax.cpp +15 -14
- data/vendor/faiss/faiss/IndexRowwiseMinMax.h +1 -1
- data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +82 -14
- data/vendor/faiss/faiss/IndexShards.cpp +13 -13
- data/vendor/faiss/faiss/IndexShardsIVF.cpp +21 -15
- data/vendor/faiss/faiss/MatrixStats.cpp +5 -4
- data/vendor/faiss/faiss/MetaIndexes.cpp +19 -17
- data/vendor/faiss/faiss/MetaIndexes.h +1 -1
- data/vendor/faiss/faiss/MetricType.h +29 -6
- data/vendor/faiss/faiss/SuperKMeans.cpp +656 -0
- data/vendor/faiss/faiss/SuperKMeans.h +97 -0
- data/vendor/faiss/faiss/VectorTransform.cpp +349 -141
- data/vendor/faiss/faiss/VectorTransform.h +39 -16
- data/vendor/faiss/faiss/build.cpp +23 -0
- data/vendor/faiss/faiss/build.h +15 -0
- data/vendor/faiss/faiss/clone_index.cpp +55 -51
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-avx2-inl.h +47 -47
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +11 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-avx2-inl.h +38 -38
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-inl.h +11 -0
- data/vendor/faiss/faiss/{cppcontrib/factory_tools.cpp → factory_tools.cpp} +6 -1
- data/vendor/faiss/faiss/gpu/GpuCloner.cpp +1 -1
- data/vendor/faiss/faiss/gpu/GpuIndexCagra.h +6 -5
- data/vendor/faiss/faiss/gpu/GpuResources.h +1 -1
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +9 -9
- data/vendor/faiss/faiss/gpu/StandardGpuResources.h +4 -3
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +46 -0
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +56 -0
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +78 -1
- data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +72 -0
- data/vendor/faiss/faiss/gpu/test/TestUtils.h +23 -0
- data/vendor/faiss/faiss/gpu/utils/CuvsFilterConvert.h +1 -1
- data/vendor/faiss/faiss/gpu/utils/CuvsUtils.h +21 -10
- data/vendor/faiss/faiss/gpu_metal/GpuIndexFlat.h +22 -0
- data/vendor/faiss/faiss/gpu_metal/MetalCloner.h +35 -0
- data/vendor/faiss/faiss/gpu_metal/MetalFlatKernels.h +40 -0
- data/vendor/faiss/faiss/gpu_metal/MetalIndex.h +51 -0
- data/vendor/faiss/faiss/gpu_metal/MetalIndexFlat.h +65 -0
- data/vendor/faiss/faiss/gpu_metal/MetalKernels.h +66 -0
- data/vendor/faiss/faiss/gpu_metal/MetalResources.h +79 -0
- data/vendor/faiss/faiss/gpu_metal/StandardMetalResources.h +35 -0
- data/vendor/faiss/faiss/impl/AdSampling.cpp +103 -0
- data/vendor/faiss/faiss/impl/AdSampling.h +35 -0
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +64 -34
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +1 -0
- data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +10 -9
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +3 -28
- data/vendor/faiss/faiss/impl/ClusteringHelpers.cpp +244 -0
- data/vendor/faiss/faiss/impl/ClusteringHelpers.h +94 -0
- data/vendor/faiss/faiss/impl/ClusteringInitialization.cpp +367 -0
- data/vendor/faiss/faiss/impl/ClusteringInitialization.h +107 -0
- data/vendor/faiss/faiss/impl/CodePacker.cpp +7 -3
- data/vendor/faiss/faiss/impl/CodePacker.h +11 -3
- data/vendor/faiss/faiss/impl/CodePackerRaBitQ.cpp +83 -0
- data/vendor/faiss/faiss/impl/CodePackerRaBitQ.h +47 -0
- data/vendor/faiss/faiss/impl/DistanceComputer.h +8 -8
- data/vendor/faiss/faiss/impl/FaissAssert.h +64 -3
- data/vendor/faiss/faiss/impl/FaissException.h +50 -3
- data/vendor/faiss/faiss/impl/HNSW.cpp +117 -351
- data/vendor/faiss/faiss/impl/HNSW.h +21 -40
- data/vendor/faiss/faiss/impl/IDSelector.cpp +15 -11
- data/vendor/faiss/faiss/impl/IDSelector.h +8 -8
- data/vendor/faiss/faiss/impl/InvertedListScannerStats.h +26 -0
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +114 -102
- data/vendor/faiss/faiss/impl/NNDescent.cpp +63 -26
- data/vendor/faiss/faiss/impl/NNDescent.h +6 -2
- data/vendor/faiss/faiss/impl/NSG.cpp +44 -26
- data/vendor/faiss/faiss/impl/NSG.h +20 -10
- data/vendor/faiss/faiss/impl/Panorama.cpp +76 -52
- data/vendor/faiss/faiss/impl/Panorama.h +265 -78
- data/vendor/faiss/faiss/impl/PdxLayout.cpp +93 -0
- data/vendor/faiss/faiss/impl/PdxLayout.h +41 -0
- data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +62 -37
- data/vendor/faiss/faiss/impl/PolysemousTraining.h +3 -3
- data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.cpp +35 -35
- data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +21 -16
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +99 -80
- data/vendor/faiss/faiss/impl/Quantizer.h +2 -2
- data/vendor/faiss/faiss/impl/RaBitQUtils.cpp +135 -37
- data/vendor/faiss/faiss/impl/RaBitQUtils.h +148 -21
- data/vendor/faiss/faiss/impl/RaBitQuantizer.cpp +298 -301
- data/vendor/faiss/faiss/impl/RaBitQuantizer.h +3 -10
- data/vendor/faiss/faiss/impl/RaBitQuantizerMultiBit.cpp +15 -41
- data/vendor/faiss/faiss/impl/RaBitQuantizerMultiBit.h +0 -4
- data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +40 -32
- data/vendor/faiss/faiss/impl/ResidualQuantizer.h +1 -1
- data/vendor/faiss/faiss/impl/ResultHandler.h +218 -113
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +119 -2362
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +27 -3
- data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +14 -11
- data/vendor/faiss/faiss/impl/VisitedTable.cpp +42 -0
- data/vendor/faiss/faiss/impl/VisitedTable.h +76 -0
- data/vendor/faiss/faiss/impl/approx_topk/approx_topk.h +276 -0
- data/vendor/faiss/faiss/impl/approx_topk/avx2.cpp +68 -0
- data/vendor/faiss/faiss/{utils → impl}/approx_topk/generic.h +15 -8
- data/vendor/faiss/faiss/impl/approx_topk/neon.cpp +68 -0
- data/vendor/faiss/faiss/impl/approx_topk/rq_beam_search_tab-inl.h +169 -0
- data/vendor/faiss/faiss/impl/approx_topk/rq_beam_search_tab.h +117 -0
- data/vendor/faiss/faiss/impl/approx_topk/simdlib256-inl.h +146 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryHNSW_impl.h +73 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryHash_impl.h +270 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryIVF_impl.h +460 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexIVFSpectralHash_impl.h +159 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexPQ_impl.h +92 -0
- data/vendor/faiss/faiss/impl/binary_hamming/avx2.cpp +26 -0
- data/vendor/faiss/faiss/impl/binary_hamming/avx512.cpp +26 -0
- data/vendor/faiss/faiss/impl/binary_hamming/dispatch.h +143 -0
- data/vendor/faiss/faiss/impl/binary_hamming/neon.cpp +26 -0
- data/vendor/faiss/faiss/impl/binary_hamming/rvv.cpp +26 -0
- data/vendor/faiss/faiss/impl/expanded_scanners.h +163 -0
- data/vendor/faiss/faiss/impl/{FastScanDistancePostProcessing.h → fast_scan/FastScanDistancePostProcessing.h} +13 -6
- data/vendor/faiss/faiss/impl/{LookupTableScaler.h → fast_scan/LookupTableScaler.h} +16 -5
- data/vendor/faiss/faiss/impl/fast_scan/accumulate_loops.h +237 -0
- data/vendor/faiss/faiss/impl/fast_scan/accumulate_loops_512.h +185 -0
- data/vendor/faiss/faiss/impl/fast_scan/decompose_qbs.h +229 -0
- data/vendor/faiss/faiss/impl/fast_scan/dispatching.h +268 -0
- data/vendor/faiss/faiss/impl/{pq4_fast_scan.cpp → fast_scan/fast_scan.cpp} +176 -4
- data/vendor/faiss/faiss/impl/fast_scan/fast_scan.h +341 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-avx2.cpp +36 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-avx512.cpp +40 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-neon.cpp +120 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-riscv.cpp +104 -0
- data/vendor/faiss/faiss/impl/fast_scan/kernels_simd256.h +213 -0
- data/vendor/faiss/faiss/impl/{pq4_fast_scan_search_qbs.cpp → fast_scan/kernels_simd512.h} +26 -348
- data/vendor/faiss/faiss/impl/fast_scan/rabitq_dispatching.h +90 -0
- data/vendor/faiss/faiss/impl/fast_scan/rabitq_result_handler.h +108 -0
- data/vendor/faiss/faiss/impl/{simd_result_handlers.h → fast_scan/simd_result_handlers.h} +290 -142
- data/vendor/faiss/faiss/impl/hnsw/LockVector.cpp +54 -0
- data/vendor/faiss/faiss/impl/hnsw/LockVector.h +64 -0
- data/vendor/faiss/faiss/impl/hnsw/MinimaxHeap.cpp +91 -0
- data/vendor/faiss/faiss/impl/hnsw/MinimaxHeap.h +64 -0
- data/vendor/faiss/faiss/impl/hnsw/avx2.cpp +104 -0
- data/vendor/faiss/faiss/impl/hnsw/avx512.cpp +111 -0
- data/vendor/faiss/faiss/impl/index_read.cpp +1950 -505
- data/vendor/faiss/faiss/impl/index_read_utils.h +1 -2
- data/vendor/faiss/faiss/impl/index_write.cpp +112 -21
- data/vendor/faiss/faiss/impl/io.cpp +6 -6
- data/vendor/faiss/faiss/impl/io_macros.h +33 -16
- data/vendor/faiss/faiss/impl/kmeans1d.cpp +10 -10
- data/vendor/faiss/faiss/impl/lattice_Zn.cpp +81 -40
- data/vendor/faiss/faiss/impl/lattice_Zn.h +6 -6
- data/vendor/faiss/faiss/impl/mapped_io.cpp +15 -8
- data/vendor/faiss/faiss/impl/platform_macros.h +11 -4
- data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQScanner_impl.h +549 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQ_QueryTables.cpp +245 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQ_QueryTables.h +105 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/PQDistanceComputer_impl.h +106 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/avx2.cpp +21 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/avx512.cpp +21 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/neon.cpp +21 -0
- data/vendor/faiss/faiss/impl/{code_distance/code_distance-avx2.h → pq_code_distance/pq_code_distance-avx2.h} +43 -220
- data/vendor/faiss/faiss/impl/{code_distance/code_distance-avx512.h → pq_code_distance/pq_code_distance-avx512.h} +25 -112
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-generic.cpp +59 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-generic.h +96 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-inl.h +256 -0
- data/vendor/faiss/faiss/impl/{code_distance/code_distance-sve.h → pq_code_distance/pq_code_distance-sve.cpp} +57 -146
- data/vendor/faiss/faiss/impl/pq_code_distance/rvv.cpp +68 -0
- data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.cpp +320 -483
- data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.h +1 -1
- data/vendor/faiss/faiss/impl/scalar_quantizer/codecs.h +121 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/distance_computers.h +137 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/quantizers.h +371 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/scanners.h +190 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/similarities.h +94 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx2.cpp +603 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx512.cpp +597 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-dispatch.h +388 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-neon.cpp +630 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-rvv.cpp +311 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/training.cpp +387 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/training.h +54 -0
- data/vendor/faiss/faiss/impl/simd_dispatch.h +173 -0
- data/vendor/faiss/faiss/impl/simdlib/simdlib.h +57 -0
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_avx2.h +274 -171
- data/vendor/faiss/faiss/impl/simdlib/simdlib_avx512.h +414 -0
- data/vendor/faiss/faiss/impl/simdlib/simdlib_dispatch.h +44 -0
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_emulated.h +231 -166
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_neon.h +275 -217
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_ppc64.h +201 -160
- data/vendor/faiss/faiss/impl/svs_io.cpp +12 -3
- data/vendor/faiss/faiss/impl/svs_io.h +8 -2
- data/vendor/faiss/faiss/index_factory.cpp +115 -28
- data/vendor/faiss/faiss/index_io.h +53 -3
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +73 -20
- data/vendor/faiss/faiss/invlists/DirectMap.cpp +24 -14
- data/vendor/faiss/faiss/invlists/DirectMap.h +4 -3
- data/vendor/faiss/faiss/invlists/InvertedLists.cpp +157 -73
- data/vendor/faiss/faiss/invlists/InvertedLists.h +86 -23
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +4 -4
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +14 -14
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +1 -1
- data/vendor/faiss/faiss/svs/IndexSVSFaissUtils.h +9 -19
- data/vendor/faiss/faiss/svs/IndexSVSFlat.cpp +2 -2
- data/vendor/faiss/faiss/svs/IndexSVSFlat.h +2 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVF.cpp +350 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVF.h +128 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLVQ.cpp +40 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLVQ.h +43 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLeanVec.cpp +225 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLeanVec.h +71 -0
- data/vendor/faiss/faiss/svs/IndexSVSVamana.cpp +25 -1
- data/vendor/faiss/faiss/svs/IndexSVSVamana.h +19 -2
- data/vendor/faiss/faiss/svs/IndexSVSVamanaLVQ.h +1 -1
- data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.cpp +19 -2
- data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.h +14 -0
- data/vendor/faiss/faiss/utils/Heap.cpp +56 -10
- data/vendor/faiss/faiss/utils/Heap.h +21 -0
- data/vendor/faiss/faiss/utils/NeuralNet.cpp +54 -40
- data/vendor/faiss/faiss/utils/NeuralNet.h +1 -1
- data/vendor/faiss/faiss/utils/approx_topk_hamming/approx_topk_hamming.h +10 -4
- data/vendor/faiss/faiss/utils/distances.cpp +507 -559
- data/vendor/faiss/faiss/utils/distances.h +118 -1
- data/vendor/faiss/faiss/utils/distances_dispatch.h +250 -0
- data/vendor/faiss/faiss/utils/distances_fused/avx512.cpp +8 -7
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +33 -14
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.h +12 -1
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +16 -293
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based_neon.cpp +57 -0
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_kernel-inl.h +290 -0
- data/vendor/faiss/faiss/utils/distances_simd.cpp +72 -3681
- data/vendor/faiss/faiss/utils/extra_distances.cpp +60 -102
- data/vendor/faiss/faiss/utils/extra_distances.h +79 -7
- data/vendor/faiss/faiss/utils/hamming-inl.h +13 -11
- data/vendor/faiss/faiss/utils/hamming.cpp +66 -517
- data/vendor/faiss/faiss/utils/hamming.h +92 -2
- data/vendor/faiss/faiss/utils/hamming_distance/common.h +287 -10
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx2.cpp +15 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx512.cpp +15 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx2.h +142 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx512.h +234 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-generic.h +368 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-neon.h +322 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-rvv.h +39 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer.h +146 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_impl.h +481 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_neon.cpp +15 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_rvv.cpp +15 -0
- data/vendor/faiss/faiss/utils/partitioning.cpp +66 -987
- data/vendor/faiss/faiss/utils/partitioning.h +31 -0
- data/vendor/faiss/faiss/utils/popcount.h +29 -0
- data/vendor/faiss/faiss/utils/pq_code_distance.h +251 -0
- data/vendor/faiss/faiss/utils/prefetch.h +2 -2
- data/vendor/faiss/faiss/utils/quantize_lut.cpp +30 -30
- data/vendor/faiss/faiss/utils/quantize_lut.h +1 -1
- data/vendor/faiss/faiss/utils/rabitq_simd.h +124 -343
- data/vendor/faiss/faiss/utils/random.cpp +6 -6
- data/vendor/faiss/faiss/utils/simd_impl/IVFFlatScanner-inl.h +51 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_aarch64.cpp +154 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_arm_sve.cpp +777 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_autovec-inl.h +306 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_avx2.cpp +1431 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_avx512.cpp +1095 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_rvv.cpp +189 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_simdlib256.h +195 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_sse-inl.h +392 -0
- data/vendor/faiss/faiss/utils/{distances_fused/simdlib_based.h → simd_impl/exhaustive_L2sqr_blas_cmax.h} +5 -10
- data/vendor/faiss/faiss/utils/simd_impl/hamming_impl.h +481 -0
- data/vendor/faiss/faiss/utils/simd_impl/partitioning_avx2.cpp +14 -0
- data/vendor/faiss/faiss/utils/simd_impl/partitioning_neon.cpp +14 -0
- data/vendor/faiss/faiss/utils/simd_impl/partitioning_simdlib256.h +1085 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx2.cpp +355 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx512.cpp +477 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_neon.cpp +55 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_rvv.cpp +55 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_dispatch.h +32 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels.h +43 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels_avx2.cpp +57 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels_avx512.cpp +45 -0
- data/vendor/faiss/faiss/utils/simd_levels.cpp +334 -0
- data/vendor/faiss/faiss/utils/simd_levels.h +183 -0
- data/vendor/faiss/faiss/utils/sorting.cpp +48 -36
- data/vendor/faiss/faiss/utils/utils.cpp +21 -14
- data/vendor/faiss/faiss/utils/utils.h +3 -3
- metadata +156 -42
- data/vendor/faiss/faiss/impl/RaBitQStats.cpp +0 -29
- data/vendor/faiss/faiss/impl/RaBitQStats.h +0 -56
- data/vendor/faiss/faiss/impl/code_distance/code_distance-generic.h +0 -81
- data/vendor/faiss/faiss/impl/code_distance/code_distance.h +0 -186
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +0 -216
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +0 -224
- data/vendor/faiss/faiss/utils/approx_topk/approx_topk.h +0 -84
- data/vendor/faiss/faiss/utils/approx_topk/avx2-inl.h +0 -196
- data/vendor/faiss/faiss/utils/approx_topk/mode.h +0 -34
- data/vendor/faiss/faiss/utils/distances_fused/avx512.h +0 -36
- data/vendor/faiss/faiss/utils/extra_distances-inl.h +0 -228
- data/vendor/faiss/faiss/utils/hamming_distance/avx2-inl.h +0 -462
- data/vendor/faiss/faiss/utils/hamming_distance/avx512-inl.h +0 -490
- data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +0 -450
- data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +0 -87
- data/vendor/faiss/faiss/utils/hamming_distance/neon-inl.h +0 -524
- data/vendor/faiss/faiss/utils/simdlib.h +0 -42
- data/vendor/faiss/faiss/utils/simdlib_avx512.h +0 -296
- /data/vendor/faiss/faiss/{cppcontrib/factory_tools.h → factory_tools.h} +0 -0
|
@@ -21,6 +21,7 @@
|
|
|
21
21
|
#include <faiss/impl/FaissAssert.h>
|
|
22
22
|
#include <faiss/impl/LocalSearchQuantizer.h>
|
|
23
23
|
#include <faiss/impl/ResidualQuantizer.h>
|
|
24
|
+
#include <faiss/impl/simd_dispatch.h>
|
|
24
25
|
#include <faiss/utils/Heap.h>
|
|
25
26
|
#include <faiss/utils/distances.h>
|
|
26
27
|
#include <faiss/utils/hamming.h>
|
|
@@ -47,13 +48,13 @@ int sgemm_(
|
|
|
47
48
|
namespace faiss {
|
|
48
49
|
|
|
49
50
|
AdditiveQuantizer::AdditiveQuantizer(
|
|
50
|
-
size_t
|
|
51
|
-
const std::vector<size_t>&
|
|
52
|
-
Search_type_t
|
|
53
|
-
: Quantizer(
|
|
54
|
-
M(
|
|
55
|
-
nbits(
|
|
56
|
-
search_type(
|
|
51
|
+
size_t d_in,
|
|
52
|
+
const std::vector<size_t>& nbits_in,
|
|
53
|
+
Search_type_t search_type_in)
|
|
54
|
+
: Quantizer(d_in),
|
|
55
|
+
M(nbits_in.size()),
|
|
56
|
+
nbits(nbits_in),
|
|
57
|
+
search_type(search_type_in) {
|
|
57
58
|
set_derived_values();
|
|
58
59
|
}
|
|
59
60
|
|
|
@@ -64,10 +65,12 @@ void AdditiveQuantizer::set_derived_values() {
|
|
|
64
65
|
tot_bits = 0;
|
|
65
66
|
only_8bit = true;
|
|
66
67
|
codebook_offsets.resize(M + 1, 0);
|
|
67
|
-
for (
|
|
68
|
+
for (size_t i = 0; i < M; i++) {
|
|
68
69
|
int nbit = nbits[i];
|
|
69
|
-
|
|
70
|
-
|
|
70
|
+
FAISS_CHECK_RANGE(nbit, 0, 31);
|
|
71
|
+
size_t k = (size_t)1 << nbit;
|
|
72
|
+
codebook_offsets[i + 1] =
|
|
73
|
+
add_no_overflow(codebook_offsets[i], k, "codebook_offsets");
|
|
71
74
|
tot_bits += nbit;
|
|
72
75
|
if (nbit != 0) {
|
|
73
76
|
only_8bit = false;
|
|
@@ -104,7 +107,7 @@ void AdditiveQuantizer::set_derived_values() {
|
|
|
104
107
|
void AdditiveQuantizer::train_norm(size_t n, const float* norms) {
|
|
105
108
|
norm_min = HUGE_VALF;
|
|
106
109
|
norm_max = -HUGE_VALF;
|
|
107
|
-
for (
|
|
110
|
+
for (size_t i = 0; i < n; i++) {
|
|
108
111
|
if (norms[i] < norm_min) {
|
|
109
112
|
norm_min = norms[i];
|
|
110
113
|
}
|
|
@@ -153,21 +156,42 @@ void AdditiveQuantizer::train_norm(size_t n, const float* norms) {
|
|
|
153
156
|
|
|
154
157
|
void AdditiveQuantizer::compute_codebook_tables() {
|
|
155
158
|
centroid_norms.resize(total_codebook_size);
|
|
159
|
+
FAISS_THROW_IF_NOT_FMT(
|
|
160
|
+
codebooks.size() >=
|
|
161
|
+
mul_no_overflow(
|
|
162
|
+
total_codebook_size, d, "codebooks validation"),
|
|
163
|
+
"codebooks size %zd too small for total_codebook_size=%zd * d=%zd",
|
|
164
|
+
codebooks.size(),
|
|
165
|
+
total_codebook_size,
|
|
166
|
+
d);
|
|
156
167
|
fvec_norms_L2sqr(
|
|
157
168
|
centroid_norms.data(), codebooks.data(), d, total_codebook_size);
|
|
158
169
|
size_t cross_table_size = 0;
|
|
159
|
-
for (
|
|
170
|
+
for (size_t m = 0; m < M; m++) {
|
|
171
|
+
FAISS_CHECK_RANGE(nbits[m], 0, 31);
|
|
160
172
|
size_t K = (size_t)1 << nbits[m];
|
|
161
|
-
|
|
173
|
+
size_t product =
|
|
174
|
+
mul_no_overflow(K, codebook_offsets[m], "cross_table_size");
|
|
175
|
+
cross_table_size = add_no_overflow(
|
|
176
|
+
cross_table_size, product, "cross_table_size accumulation");
|
|
162
177
|
}
|
|
163
178
|
codebook_cross_products.resize(cross_table_size);
|
|
164
179
|
size_t ofs = 0;
|
|
165
|
-
for (
|
|
180
|
+
for (size_t m = 1; m < M; m++) {
|
|
166
181
|
FINTEGER ki = (size_t)1 << nbits[m];
|
|
167
182
|
FINTEGER kk = codebook_offsets[m];
|
|
168
183
|
FINTEGER di = d;
|
|
169
184
|
float zero = 0, one = 1;
|
|
170
|
-
|
|
185
|
+
size_t step_size = (size_t)ki * (size_t)kk;
|
|
186
|
+
FAISS_THROW_IF_NOT_FMT(
|
|
187
|
+
add_no_overflow(ofs, step_size, "cross product table offset") <=
|
|
188
|
+
cross_table_size,
|
|
189
|
+
"cross product table overflow at step %zd: "
|
|
190
|
+
"%zd + %zd > %zd",
|
|
191
|
+
m,
|
|
192
|
+
ofs,
|
|
193
|
+
step_size,
|
|
194
|
+
cross_table_size);
|
|
171
195
|
sgemm_("Transposed",
|
|
172
196
|
"Not transposed",
|
|
173
197
|
&ki,
|
|
@@ -181,7 +205,7 @@ void AdditiveQuantizer::compute_codebook_tables() {
|
|
|
181
205
|
&zero,
|
|
182
206
|
codebook_cross_products.data() + ofs,
|
|
183
207
|
&ki);
|
|
184
|
-
ofs +=
|
|
208
|
+
ofs += step_size;
|
|
185
209
|
}
|
|
186
210
|
}
|
|
187
211
|
|
|
@@ -276,11 +300,12 @@ void AdditiveQuantizer::pack_codes(
|
|
|
276
300
|
norms = norm_buf.data();
|
|
277
301
|
}
|
|
278
302
|
}
|
|
303
|
+
int64_t n_signed = n;
|
|
279
304
|
#pragma omp parallel for if (n > 1000)
|
|
280
|
-
for (int64_t i = 0; i <
|
|
305
|
+
for (int64_t i = 0; i < n_signed; i++) {
|
|
281
306
|
const int32_t* codes1 = codes + i * ld_codes;
|
|
282
307
|
BitstringWriter bsw(packed_codes + i * code_size, code_size);
|
|
283
|
-
for (
|
|
308
|
+
for (size_t m = 0; m < M; m++) {
|
|
284
309
|
bsw.write(codes1[m], nbits[m]);
|
|
285
310
|
}
|
|
286
311
|
if (norm_bits != 0) {
|
|
@@ -293,12 +318,13 @@ void AdditiveQuantizer::decode(const uint8_t* code, float* x, size_t n) const {
|
|
|
293
318
|
FAISS_THROW_IF_NOT_MSG(
|
|
294
319
|
is_trained, "The additive quantizer is not trained yet.");
|
|
295
320
|
|
|
321
|
+
int64_t n_signed = n;
|
|
296
322
|
// standard additive quantizer decoding
|
|
297
323
|
#pragma omp parallel for if (n > 100)
|
|
298
|
-
for (int64_t i = 0; i <
|
|
324
|
+
for (int64_t i = 0; i < n_signed; i++) {
|
|
299
325
|
BitstringReader bsr(code + i * code_size, code_size);
|
|
300
326
|
float* xi = x + i * d;
|
|
301
|
-
for (
|
|
327
|
+
for (size_t m = 0; m < M; m++) {
|
|
302
328
|
int idx = bsr.read(nbits[m]);
|
|
303
329
|
const float* c = codebooks.data() + d * (codebook_offsets[m] + idx);
|
|
304
330
|
if (m == 0) {
|
|
@@ -322,12 +348,13 @@ void AdditiveQuantizer::decode_unpacked(
|
|
|
322
348
|
ld_codes = M;
|
|
323
349
|
}
|
|
324
350
|
|
|
351
|
+
int64_t n_signed = n;
|
|
325
352
|
// standard additive quantizer decoding
|
|
326
353
|
#pragma omp parallel for if (n > 1000)
|
|
327
|
-
for (int64_t i = 0; i <
|
|
354
|
+
for (int64_t i = 0; i < n_signed; i++) {
|
|
328
355
|
const int32_t* codesi = code + i * ld_codes;
|
|
329
356
|
float* xi = x + i * d;
|
|
330
|
-
for (
|
|
357
|
+
for (size_t m = 0; m < M; m++) {
|
|
331
358
|
int idx = codesi[m];
|
|
332
359
|
const float* c = codebooks.data() + d * (codebook_offsets[m] + idx);
|
|
333
360
|
if (m == 0) {
|
|
@@ -347,20 +374,23 @@ AdditiveQuantizer::~AdditiveQuantizer() {}
|
|
|
347
374
|
|
|
348
375
|
void AdditiveQuantizer::compute_centroid_norms(float* norms) const {
|
|
349
376
|
size_t ntotal = (size_t)1 << tot_bits;
|
|
377
|
+
int64_t ntotal_signed = ntotal;
|
|
350
378
|
// TODO: make tree of partial sums
|
|
379
|
+
with_simd_level([&]<SIMDLevel SL>() {
|
|
351
380
|
#pragma omp parallel
|
|
352
|
-
|
|
353
|
-
|
|
381
|
+
{
|
|
382
|
+
std::vector<float> tmp(d);
|
|
354
383
|
#pragma omp for
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
384
|
+
for (int64_t i = 0; i < ntotal_signed; i++) {
|
|
385
|
+
decode_64bit(i, tmp.data());
|
|
386
|
+
norms[i] = fvec_norm_L2sqr<SL>(tmp.data(), d);
|
|
387
|
+
}
|
|
358
388
|
}
|
|
359
|
-
}
|
|
389
|
+
});
|
|
360
390
|
}
|
|
361
391
|
|
|
362
392
|
void AdditiveQuantizer::decode_64bit(idx_t bits, float* xi) const {
|
|
363
|
-
for (
|
|
393
|
+
for (size_t m = 0; m < M; m++) {
|
|
364
394
|
idx_t idx = bits & (((size_t)1 << nbits[m]) - 1);
|
|
365
395
|
bits >>= nbits[m];
|
|
366
396
|
const float* c = codebooks.data() + d * (codebook_offsets[m] + idx);
|
|
@@ -410,7 +440,7 @@ void compute_inner_prod_with_LUT(
|
|
|
410
440
|
const float* LUT,
|
|
411
441
|
float* ips) {
|
|
412
442
|
size_t prev_size = 1;
|
|
413
|
-
for (
|
|
443
|
+
for (size_t m = 0; m < aq.M; m++) {
|
|
414
444
|
const float* LUTm = LUT + aq.codebook_offsets[m];
|
|
415
445
|
int nb = aq.nbits[m];
|
|
416
446
|
size_t nc = (size_t)1 << nb;
|
|
@@ -483,7 +513,7 @@ void AdditiveQuantizer::knn_centroids_L2(
|
|
|
483
513
|
// ||x - y||^2 = ||x||^2 + ||y||^2 - 2 * <x,y>
|
|
484
514
|
|
|
485
515
|
maxheap_heapify(k, distances_i, labels_i);
|
|
486
|
-
for (
|
|
516
|
+
for (size_t j = 0; j < ntotal; j++) {
|
|
487
517
|
float disj = q_norms[i] + norms[j] - 2 * dis[j];
|
|
488
518
|
if (disj < distances_i[0]) {
|
|
489
519
|
heap_replace_top<CMax<float, int64_t>>(
|
|
@@ -506,7 +536,7 @@ float accumulate_IPs(
|
|
|
506
536
|
BitstringReader& bs,
|
|
507
537
|
const float* LUT) {
|
|
508
538
|
float accu = 0;
|
|
509
|
-
for (
|
|
539
|
+
for (size_t m = 0; m < aq.M; m++) {
|
|
510
540
|
size_t nbit = aq.nbits[m];
|
|
511
541
|
int idx = bs.read(nbit);
|
|
512
542
|
accu += LUT[idx];
|
|
@@ -519,7 +549,7 @@ float compute_norm_from_LUT(const AdditiveQuantizer& aq, BitstringReader& bs) {
|
|
|
519
549
|
float accu = 0;
|
|
520
550
|
std::vector<int> idx(aq.M);
|
|
521
551
|
const float* c = aq.codebook_cross_products.data();
|
|
522
|
-
for (
|
|
552
|
+
for (size_t m = 0; m < aq.M; m++) {
|
|
523
553
|
size_t nbit = aq.nbits[m];
|
|
524
554
|
int i = bs.read(nbit);
|
|
525
555
|
size_t K = 1 << nbit;
|
|
@@ -527,7 +557,7 @@ float compute_norm_from_LUT(const AdditiveQuantizer& aq, BitstringReader& bs) {
|
|
|
527
557
|
|
|
528
558
|
accu += aq.centroid_norms[aq.codebook_offsets[m] + i];
|
|
529
559
|
|
|
530
|
-
for (
|
|
560
|
+
for (size_t l = 0; l < m; l++) {
|
|
531
561
|
int j = idx[l];
|
|
532
562
|
accu += 2 * c[j * K + i];
|
|
533
563
|
c += (1 << aq.nbits[l]) * K;
|
|
@@ -20,10 +20,11 @@ namespace faiss {
|
|
|
20
20
|
* RangeSearchResult
|
|
21
21
|
***********************************************************************/
|
|
22
22
|
|
|
23
|
-
RangeSearchResult::RangeSearchResult(size_t
|
|
23
|
+
RangeSearchResult::RangeSearchResult(size_t nq_in, bool alloc_lims)
|
|
24
|
+
: nq(nq_in) {
|
|
24
25
|
if (alloc_lims) {
|
|
25
|
-
lims = new size_t[
|
|
26
|
-
memset(lims, 0, sizeof(*lims) * (
|
|
26
|
+
lims = new size_t[nq_in + 1];
|
|
27
|
+
memset(lims, 0, sizeof(*lims) * (nq_in + 1));
|
|
27
28
|
} else {
|
|
28
29
|
lims = nullptr;
|
|
29
30
|
}
|
|
@@ -39,7 +40,7 @@ void RangeSearchResult::do_allocation() {
|
|
|
39
40
|
// simultaneously
|
|
40
41
|
FAISS_THROW_IF_NOT(labels == nullptr && distances == nullptr);
|
|
41
42
|
size_t ofs = 0;
|
|
42
|
-
for (
|
|
43
|
+
for (size_t i = 0; i < nq; i++) {
|
|
43
44
|
size_t n = lims[i];
|
|
44
45
|
lims[i] = ofs;
|
|
45
46
|
ofs += n;
|
|
@@ -59,12 +60,12 @@ RangeSearchResult::~RangeSearchResult() {
|
|
|
59
60
|
* BufferList
|
|
60
61
|
***********************************************************************/
|
|
61
62
|
|
|
62
|
-
BufferList::BufferList(size_t
|
|
63
|
-
wp =
|
|
63
|
+
BufferList::BufferList(size_t buffer_size_in) : buffer_size(buffer_size_in) {
|
|
64
|
+
wp = buffer_size_in;
|
|
64
65
|
}
|
|
65
66
|
|
|
66
67
|
BufferList::~BufferList() {
|
|
67
|
-
for (
|
|
68
|
+
for (size_t i = 0; i < buffers.size(); i++) {
|
|
68
69
|
delete[] buffers[i].ids;
|
|
69
70
|
delete[] buffers[i].dis;
|
|
70
71
|
}
|
|
@@ -140,7 +141,7 @@ void RangeSearchPartialResult::finalize() {
|
|
|
140
141
|
|
|
141
142
|
/// called by range_search before do_allocation
|
|
142
143
|
void RangeSearchPartialResult::set_lims() {
|
|
143
|
-
for (
|
|
144
|
+
for (size_t i = 0; i < queries.size(); i++) {
|
|
144
145
|
RangeQueryResult& qres = queries[i];
|
|
145
146
|
res->lims[qres.qno] = qres.nres;
|
|
146
147
|
}
|
|
@@ -149,7 +150,7 @@ void RangeSearchPartialResult::set_lims() {
|
|
|
149
150
|
/// called by range_search after do_allocation
|
|
150
151
|
void RangeSearchPartialResult::copy_result(bool incremental) {
|
|
151
152
|
size_t ofs = 0;
|
|
152
|
-
for (
|
|
153
|
+
for (size_t i = 0; i < queries.size(); i++) {
|
|
153
154
|
RangeQueryResult& qres = queries[i];
|
|
154
155
|
|
|
155
156
|
copy_range(
|
|
@@ -18,6 +18,8 @@
|
|
|
18
18
|
#include <mutex>
|
|
19
19
|
#include <vector>
|
|
20
20
|
|
|
21
|
+
#include <faiss/impl/InvertedListScannerStats.h>
|
|
22
|
+
|
|
21
23
|
#include <faiss/MetricType.h>
|
|
22
24
|
#include <faiss/impl/platform_macros.h>
|
|
23
25
|
|
|
@@ -92,6 +94,7 @@ struct RangeQueryResult {
|
|
|
92
94
|
idx_t qno; //< id of the query
|
|
93
95
|
size_t nres; //< nb of results for this query
|
|
94
96
|
RangeSearchPartialResult* pres;
|
|
97
|
+
InvertedListScannerStats stats;
|
|
95
98
|
|
|
96
99
|
/// called by search function to report a new result
|
|
97
100
|
void add(float dis, idx_t id);
|
|
@@ -169,34 +172,6 @@ struct TimeoutCallback : InterruptCallback {
|
|
|
169
172
|
static void reset(double timeout_in_seconds);
|
|
170
173
|
};
|
|
171
174
|
|
|
172
|
-
/// set implementation optimized for fast access.
|
|
173
|
-
struct VisitedTable {
|
|
174
|
-
std::vector<uint8_t> visited;
|
|
175
|
-
uint8_t visno;
|
|
176
|
-
|
|
177
|
-
explicit VisitedTable(int size) : visited(size), visno(1) {}
|
|
178
|
-
|
|
179
|
-
/// set flag #no to true
|
|
180
|
-
void set(int no) {
|
|
181
|
-
visited[no] = visno;
|
|
182
|
-
}
|
|
183
|
-
|
|
184
|
-
/// get flag #no
|
|
185
|
-
bool get(int no) const {
|
|
186
|
-
return visited[no] == visno;
|
|
187
|
-
}
|
|
188
|
-
|
|
189
|
-
/// reset all flags to false
|
|
190
|
-
void advance() {
|
|
191
|
-
visno++;
|
|
192
|
-
if (visno == 250) {
|
|
193
|
-
// 250 rather than 255 because sometimes we use visno and visno+1
|
|
194
|
-
memset(visited.data(), 0, sizeof(visited[0]) * visited.size());
|
|
195
|
-
visno = 1;
|
|
196
|
-
}
|
|
197
|
-
}
|
|
198
|
-
};
|
|
199
|
-
|
|
200
175
|
} // namespace faiss
|
|
201
176
|
|
|
202
177
|
#endif
|
|
@@ -0,0 +1,244 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
#include <faiss/impl/ClusteringHelpers.h>
|
|
9
|
+
|
|
10
|
+
#include <cassert>
|
|
11
|
+
#include <chrono>
|
|
12
|
+
#include <cinttypes>
|
|
13
|
+
#include <cstdint>
|
|
14
|
+
#include <cstdio>
|
|
15
|
+
#include <cstring>
|
|
16
|
+
#include <limits>
|
|
17
|
+
#include <vector>
|
|
18
|
+
|
|
19
|
+
#include <omp.h>
|
|
20
|
+
|
|
21
|
+
#include <faiss/Index.h>
|
|
22
|
+
#include <faiss/impl/FaissAssert.h>
|
|
23
|
+
#include <faiss/utils/random.h>
|
|
24
|
+
|
|
25
|
+
namespace faiss {
|
|
26
|
+
namespace detail {
|
|
27
|
+
|
|
28
|
+
uint64_t get_actual_rng_seed(const int seed) {
|
|
29
|
+
return (seed >= 0)
|
|
30
|
+
? seed
|
|
31
|
+
: static_cast<uint64_t>(std::chrono::high_resolution_clock::now()
|
|
32
|
+
.time_since_epoch()
|
|
33
|
+
.count());
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
idx_t subsample_training_set(
|
|
37
|
+
const Clustering& clus,
|
|
38
|
+
idx_t nx,
|
|
39
|
+
const uint8_t* x,
|
|
40
|
+
size_t line_size,
|
|
41
|
+
const float* weights,
|
|
42
|
+
uint8_t** x_out,
|
|
43
|
+
float** weights_out) {
|
|
44
|
+
FAISS_THROW_IF_NOT(clus.k > 0 && clus.max_points_per_centroid > 0);
|
|
45
|
+
if (clus.verbose) {
|
|
46
|
+
printf("Sampling a subset of %zd / %" PRId64 " for training\n",
|
|
47
|
+
clus.k * clus.max_points_per_centroid,
|
|
48
|
+
nx);
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
const uint64_t actual_seed = get_actual_rng_seed(clus.seed);
|
|
52
|
+
|
|
53
|
+
std::vector<idx_t> perm;
|
|
54
|
+
if (clus.use_faster_subsampling) {
|
|
55
|
+
SplitMix64RandomGenerator rng(actual_seed);
|
|
56
|
+
|
|
57
|
+
const idx_t new_nx = clus.k * clus.max_points_per_centroid;
|
|
58
|
+
perm.resize(new_nx);
|
|
59
|
+
assert(!perm.empty());
|
|
60
|
+
for (idx_t i = 0; i < new_nx; i++) {
|
|
61
|
+
perm[i] = rng.rand_int64() % nx;
|
|
62
|
+
}
|
|
63
|
+
} else {
|
|
64
|
+
FAISS_THROW_IF_NOT_FMT(
|
|
65
|
+
nx <= static_cast<idx_t>(std::numeric_limits<int>::max()),
|
|
66
|
+
"Dataset too large (%" PRId64
|
|
67
|
+
") for standard subsampling; "
|
|
68
|
+
"set use_faster_subsampling=true",
|
|
69
|
+
nx);
|
|
70
|
+
std::vector<int> int_perm(nx);
|
|
71
|
+
rand_perm(int_perm.data(), nx, actual_seed);
|
|
72
|
+
perm.assign(int_perm.begin(), int_perm.end());
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
nx = clus.k * clus.max_points_per_centroid;
|
|
76
|
+
FAISS_THROW_IF_NOT_FMT(
|
|
77
|
+
perm.size() >= static_cast<size_t>(nx),
|
|
78
|
+
"subsample_training_set: perm size %zu < required nx %" PRId64,
|
|
79
|
+
perm.size(),
|
|
80
|
+
nx);
|
|
81
|
+
assert(!perm.empty());
|
|
82
|
+
|
|
83
|
+
uint8_t* x_new = new uint8_t[nx * line_size];
|
|
84
|
+
*x_out = x_new;
|
|
85
|
+
|
|
86
|
+
for (idx_t i = 0; i < nx; i++) {
|
|
87
|
+
memcpy(x_new + i * line_size, x + perm[i] * line_size, line_size);
|
|
88
|
+
}
|
|
89
|
+
if (weights) {
|
|
90
|
+
float* weights_new = new float[nx];
|
|
91
|
+
for (idx_t i = 0; i < nx; i++) {
|
|
92
|
+
weights_new[i] = weights[perm[i]];
|
|
93
|
+
}
|
|
94
|
+
*weights_out = weights_new;
|
|
95
|
+
} else {
|
|
96
|
+
*weights_out = nullptr;
|
|
97
|
+
}
|
|
98
|
+
return nx;
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
void compute_centroids(
|
|
102
|
+
size_t d,
|
|
103
|
+
size_t k,
|
|
104
|
+
size_t n,
|
|
105
|
+
size_t k_frozen,
|
|
106
|
+
const uint8_t* x,
|
|
107
|
+
const Index* codec,
|
|
108
|
+
const int64_t* assign,
|
|
109
|
+
const float* weights,
|
|
110
|
+
float* hassign,
|
|
111
|
+
float* centroids) {
|
|
112
|
+
k -= k_frozen;
|
|
113
|
+
centroids += k_frozen * d;
|
|
114
|
+
|
|
115
|
+
memset(centroids, 0, sizeof(*centroids) * d * k);
|
|
116
|
+
|
|
117
|
+
size_t line_size = codec ? codec->sa_code_size() : d * sizeof(float);
|
|
118
|
+
|
|
119
|
+
#pragma omp parallel
|
|
120
|
+
{
|
|
121
|
+
int nt = omp_get_num_threads();
|
|
122
|
+
int rank = omp_get_thread_num();
|
|
123
|
+
|
|
124
|
+
// this thread is taking care of centroids c0:c1
|
|
125
|
+
size_t c0 = (k * rank) / nt;
|
|
126
|
+
size_t c1 = (k * (rank + 1)) / nt;
|
|
127
|
+
std::vector<float> decode_buffer(d);
|
|
128
|
+
|
|
129
|
+
for (size_t i = 0; i < n; i++) {
|
|
130
|
+
int64_t ci = assign[i];
|
|
131
|
+
FAISS_THROW_IF_NOT_MSG(
|
|
132
|
+
ci >= 0 && ci < k + k_frozen, "invalid cluster assignment");
|
|
133
|
+
ci -= k_frozen;
|
|
134
|
+
if (ci >= static_cast<int64_t>(c0) &&
|
|
135
|
+
ci < static_cast<int64_t>(c1)) {
|
|
136
|
+
float* c = centroids + ci * d;
|
|
137
|
+
const float* xi;
|
|
138
|
+
if (!codec) {
|
|
139
|
+
xi = reinterpret_cast<const float*>(x + i * line_size);
|
|
140
|
+
} else {
|
|
141
|
+
float* xif = decode_buffer.data();
|
|
142
|
+
codec->sa_decode(1, x + i * line_size, xif);
|
|
143
|
+
xi = xif;
|
|
144
|
+
}
|
|
145
|
+
if (weights) {
|
|
146
|
+
float w = weights[i];
|
|
147
|
+
hassign[ci] += w;
|
|
148
|
+
for (size_t j = 0; j < d; j++) {
|
|
149
|
+
c[j] += xi[j] * w;
|
|
150
|
+
}
|
|
151
|
+
} else {
|
|
152
|
+
hassign[ci] += 1.0;
|
|
153
|
+
for (size_t j = 0; j < d; j++) {
|
|
154
|
+
c[j] += xi[j];
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
#pragma omp parallel for
|
|
162
|
+
for (idx_t ci = 0; ci < static_cast<idx_t>(k); ci++) {
|
|
163
|
+
if (hassign[ci] == 0) {
|
|
164
|
+
continue;
|
|
165
|
+
}
|
|
166
|
+
float norm = 1 / hassign[ci];
|
|
167
|
+
float* c = centroids + ci * d;
|
|
168
|
+
for (size_t j = 0; j < d; j++) {
|
|
169
|
+
c[j] *= norm;
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
// a bit above machine epsilon for float16
|
|
175
|
+
static constexpr float EPS = 1.f / 1024.f;
|
|
176
|
+
|
|
177
|
+
int split_clusters(
|
|
178
|
+
size_t d,
|
|
179
|
+
size_t k,
|
|
180
|
+
size_t n,
|
|
181
|
+
size_t k_frozen,
|
|
182
|
+
float* hassign,
|
|
183
|
+
float* centroids) {
|
|
184
|
+
k -= k_frozen;
|
|
185
|
+
centroids += k_frozen * d;
|
|
186
|
+
FAISS_THROW_IF_NOT_MSG(
|
|
187
|
+
n > k,
|
|
188
|
+
"split_clusters: n must exceed k to find a non-empty donor centroid");
|
|
189
|
+
|
|
190
|
+
size_t nsplit = 0;
|
|
191
|
+
RandomGenerator rng(1234);
|
|
192
|
+
for (size_t ci = 0; ci < k; ci++) {
|
|
193
|
+
if (hassign[ci] == 0) {
|
|
194
|
+
// Probabilistic donor pick weighted by hassign; deterministic
|
|
195
|
+
// fallback to the largest cluster if too many iterations pass.
|
|
196
|
+
size_t cj;
|
|
197
|
+
size_t max_tries = 10 * k;
|
|
198
|
+
size_t n_tries = 0;
|
|
199
|
+
bool found = false;
|
|
200
|
+
for (cj = 0; n_tries < max_tries; cj = (cj + 1) % k) {
|
|
201
|
+
float p = (hassign[cj] - 1.0) / (float)(n - k);
|
|
202
|
+
float r = rng.rand_float();
|
|
203
|
+
if (r < p) {
|
|
204
|
+
found = true;
|
|
205
|
+
break;
|
|
206
|
+
}
|
|
207
|
+
n_tries++;
|
|
208
|
+
}
|
|
209
|
+
if (!found) {
|
|
210
|
+
// Deterministic fallback: split the largest cluster.
|
|
211
|
+
cj = 0;
|
|
212
|
+
for (size_t j = 1; j < k; j++) {
|
|
213
|
+
if (hassign[j] > hassign[cj]) {
|
|
214
|
+
cj = j;
|
|
215
|
+
}
|
|
216
|
+
}
|
|
217
|
+
}
|
|
218
|
+
memcpy(centroids + ci * d,
|
|
219
|
+
centroids + cj * d,
|
|
220
|
+
sizeof(*centroids) * d);
|
|
221
|
+
|
|
222
|
+
/* small symmetric perturbation */
|
|
223
|
+
for (size_t j = 0; j < d; j++) {
|
|
224
|
+
if (j % 2 == 0) {
|
|
225
|
+
centroids[ci * d + j] *= 1 + EPS;
|
|
226
|
+
centroids[cj * d + j] *= 1 - EPS;
|
|
227
|
+
} else {
|
|
228
|
+
centroids[ci * d + j] *= 1 - EPS;
|
|
229
|
+
centroids[cj * d + j] *= 1 + EPS;
|
|
230
|
+
}
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
/* assume even split of the cluster */
|
|
234
|
+
hassign[ci] = hassign[cj] / 2;
|
|
235
|
+
hassign[cj] -= hassign[ci];
|
|
236
|
+
nsplit++;
|
|
237
|
+
}
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
return static_cast<int>(nsplit);
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
} // namespace detail
|
|
244
|
+
} // namespace faiss
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
#pragma once
|
|
9
|
+
|
|
10
|
+
#include <cstddef>
|
|
11
|
+
#include <cstdint>
|
|
12
|
+
|
|
13
|
+
#include <faiss/Clustering.h>
|
|
14
|
+
#include <faiss/Index.h>
|
|
15
|
+
|
|
16
|
+
namespace faiss {
|
|
17
|
+
namespace detail {
|
|
18
|
+
|
|
19
|
+
/** Resolve the actual RNG seed for clustering helpers.
|
|
20
|
+
*
|
|
21
|
+
* If `seed >= 0`, returns `seed`. Otherwise returns a high-resolution
|
|
22
|
+
* timestamp so that callers get a non-deterministic seed.
|
|
23
|
+
*
|
|
24
|
+
* @param seed user-provided seed; negative values request a time-based seed
|
|
25
|
+
* @return the resolved seed
|
|
26
|
+
*/
|
|
27
|
+
uint64_t get_actual_rng_seed(const int seed);
|
|
28
|
+
|
|
29
|
+
/** Subsample a training set down to `clus.k * clus.max_points_per_centroid`
|
|
30
|
+
* rows.
|
|
31
|
+
*
|
|
32
|
+
* Allocates `*x_out` (and `*weights_out` when `weights` is non-null) with
|
|
33
|
+
* `new[]`; ownership is transferred to the caller.
|
|
34
|
+
*
|
|
35
|
+
* @param clus clustering parameters (reads `k`,
|
|
36
|
+
* `max_points_per_centroid`, `use_faster_subsampling`, `seed`, `verbose`)
|
|
37
|
+
* @param nx number of input training rows
|
|
38
|
+
* @param x input training data, row-major, `nx * line_size` bytes
|
|
39
|
+
* @param line_size bytes per training row
|
|
40
|
+
* @param weights optional per-row weights (length `nx`), or null
|
|
41
|
+
* @param x_out output: newly allocated subsampled rows
|
|
42
|
+
* @param weights_out output: newly allocated subsampled weights, or null
|
|
43
|
+
* @return number of rows in the subsampled set
|
|
44
|
+
*/
|
|
45
|
+
idx_t subsample_training_set(
|
|
46
|
+
const Clustering& clus,
|
|
47
|
+
idx_t nx,
|
|
48
|
+
const uint8_t* x,
|
|
49
|
+
size_t line_size,
|
|
50
|
+
const float* weights,
|
|
51
|
+
uint8_t** x_out,
|
|
52
|
+
float** weights_out);
|
|
53
|
+
|
|
54
|
+
/** compute centroids as (weighted) sum of training points
|
|
55
|
+
*
|
|
56
|
+
* @param x training vectors, size n * code_size (from codec)
|
|
57
|
+
* @param codec how to decode the vectors (if NULL then cast to float*)
|
|
58
|
+
* @param weights per-training vector weight, size n (or NULL)
|
|
59
|
+
* @param assign nearest centroid for each training vector, size n
|
|
60
|
+
* @param k_frozen do not update the k_frozen first centroids
|
|
61
|
+
* @param centroids centroid vectors (output only), size k * d
|
|
62
|
+
* @param hassign histogram of assignments per centroid (size k),
|
|
63
|
+
* should be 0 on input
|
|
64
|
+
*
|
|
65
|
+
*/
|
|
66
|
+
void compute_centroids(
|
|
67
|
+
size_t d,
|
|
68
|
+
size_t k,
|
|
69
|
+
size_t n,
|
|
70
|
+
size_t k_frozen,
|
|
71
|
+
const uint8_t* x,
|
|
72
|
+
const Index* codec,
|
|
73
|
+
const int64_t* assign,
|
|
74
|
+
const float* weights,
|
|
75
|
+
float* hassign,
|
|
76
|
+
float* centroids);
|
|
77
|
+
|
|
78
|
+
/** Handle empty clusters by splitting larger ones.
|
|
79
|
+
*
|
|
80
|
+
* It works by slightly changing the centroids to make 2 clusters from
|
|
81
|
+
* a single one. Takes the same arguments as compute_centroids.
|
|
82
|
+
*
|
|
83
|
+
* @return nb of splitting operations (larger is worse)
|
|
84
|
+
*/
|
|
85
|
+
int split_clusters(
|
|
86
|
+
size_t d,
|
|
87
|
+
size_t k,
|
|
88
|
+
size_t n,
|
|
89
|
+
size_t k_frozen,
|
|
90
|
+
float* hassign,
|
|
91
|
+
float* centroids);
|
|
92
|
+
|
|
93
|
+
} // namespace detail
|
|
94
|
+
} // namespace faiss
|