umappp 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE.txt +25 -0
- data/README.md +110 -0
- data/ext/umappp/extconf.rb +25 -0
- data/ext/umappp/numo.hpp +867 -0
- data/ext/umappp/umappp.cpp +225 -0
- data/lib/umappp/version.rb +5 -0
- data/lib/umappp.rb +41 -0
- data/vendor/Eigen/Cholesky +45 -0
- data/vendor/Eigen/CholmodSupport +48 -0
- data/vendor/Eigen/Core +384 -0
- data/vendor/Eigen/Dense +7 -0
- data/vendor/Eigen/Eigen +2 -0
- data/vendor/Eigen/Eigenvalues +60 -0
- data/vendor/Eigen/Geometry +59 -0
- data/vendor/Eigen/Householder +29 -0
- data/vendor/Eigen/IterativeLinearSolvers +48 -0
- data/vendor/Eigen/Jacobi +32 -0
- data/vendor/Eigen/KLUSupport +41 -0
- data/vendor/Eigen/LU +47 -0
- data/vendor/Eigen/MetisSupport +35 -0
- data/vendor/Eigen/OrderingMethods +70 -0
- data/vendor/Eigen/PaStiXSupport +49 -0
- data/vendor/Eigen/PardisoSupport +35 -0
- data/vendor/Eigen/QR +50 -0
- data/vendor/Eigen/QtAlignedMalloc +39 -0
- data/vendor/Eigen/SPQRSupport +34 -0
- data/vendor/Eigen/SVD +50 -0
- data/vendor/Eigen/Sparse +34 -0
- data/vendor/Eigen/SparseCholesky +37 -0
- data/vendor/Eigen/SparseCore +69 -0
- data/vendor/Eigen/SparseLU +50 -0
- data/vendor/Eigen/SparseQR +36 -0
- data/vendor/Eigen/StdDeque +27 -0
- data/vendor/Eigen/StdList +26 -0
- data/vendor/Eigen/StdVector +27 -0
- data/vendor/Eigen/SuperLUSupport +64 -0
- data/vendor/Eigen/UmfPackSupport +40 -0
- data/vendor/Eigen/src/Cholesky/LDLT.h +688 -0
- data/vendor/Eigen/src/Cholesky/LLT.h +558 -0
- data/vendor/Eigen/src/Cholesky/LLT_LAPACKE.h +99 -0
- data/vendor/Eigen/src/CholmodSupport/CholmodSupport.h +682 -0
- data/vendor/Eigen/src/Core/ArithmeticSequence.h +413 -0
- data/vendor/Eigen/src/Core/Array.h +417 -0
- data/vendor/Eigen/src/Core/ArrayBase.h +226 -0
- data/vendor/Eigen/src/Core/ArrayWrapper.h +209 -0
- data/vendor/Eigen/src/Core/Assign.h +90 -0
- data/vendor/Eigen/src/Core/AssignEvaluator.h +1010 -0
- data/vendor/Eigen/src/Core/Assign_MKL.h +178 -0
- data/vendor/Eigen/src/Core/BandMatrix.h +353 -0
- data/vendor/Eigen/src/Core/Block.h +448 -0
- data/vendor/Eigen/src/Core/BooleanRedux.h +162 -0
- data/vendor/Eigen/src/Core/CommaInitializer.h +164 -0
- data/vendor/Eigen/src/Core/ConditionEstimator.h +175 -0
- data/vendor/Eigen/src/Core/CoreEvaluators.h +1741 -0
- data/vendor/Eigen/src/Core/CoreIterators.h +132 -0
- data/vendor/Eigen/src/Core/CwiseBinaryOp.h +183 -0
- data/vendor/Eigen/src/Core/CwiseNullaryOp.h +1001 -0
- data/vendor/Eigen/src/Core/CwiseTernaryOp.h +197 -0
- data/vendor/Eigen/src/Core/CwiseUnaryOp.h +103 -0
- data/vendor/Eigen/src/Core/CwiseUnaryView.h +132 -0
- data/vendor/Eigen/src/Core/DenseBase.h +701 -0
- data/vendor/Eigen/src/Core/DenseCoeffsBase.h +685 -0
- data/vendor/Eigen/src/Core/DenseStorage.h +652 -0
- data/vendor/Eigen/src/Core/Diagonal.h +258 -0
- data/vendor/Eigen/src/Core/DiagonalMatrix.h +391 -0
- data/vendor/Eigen/src/Core/DiagonalProduct.h +28 -0
- data/vendor/Eigen/src/Core/Dot.h +318 -0
- data/vendor/Eigen/src/Core/EigenBase.h +160 -0
- data/vendor/Eigen/src/Core/ForceAlignedAccess.h +150 -0
- data/vendor/Eigen/src/Core/Fuzzy.h +155 -0
- data/vendor/Eigen/src/Core/GeneralProduct.h +465 -0
- data/vendor/Eigen/src/Core/GenericPacketMath.h +1040 -0
- data/vendor/Eigen/src/Core/GlobalFunctions.h +194 -0
- data/vendor/Eigen/src/Core/IO.h +258 -0
- data/vendor/Eigen/src/Core/IndexedView.h +237 -0
- data/vendor/Eigen/src/Core/Inverse.h +117 -0
- data/vendor/Eigen/src/Core/Map.h +171 -0
- data/vendor/Eigen/src/Core/MapBase.h +310 -0
- data/vendor/Eigen/src/Core/MathFunctions.h +2057 -0
- data/vendor/Eigen/src/Core/MathFunctionsImpl.h +200 -0
- data/vendor/Eigen/src/Core/Matrix.h +565 -0
- data/vendor/Eigen/src/Core/MatrixBase.h +547 -0
- data/vendor/Eigen/src/Core/NestByValue.h +85 -0
- data/vendor/Eigen/src/Core/NoAlias.h +109 -0
- data/vendor/Eigen/src/Core/NumTraits.h +335 -0
- data/vendor/Eigen/src/Core/PartialReduxEvaluator.h +232 -0
- data/vendor/Eigen/src/Core/PermutationMatrix.h +605 -0
- data/vendor/Eigen/src/Core/PlainObjectBase.h +1128 -0
- data/vendor/Eigen/src/Core/Product.h +191 -0
- data/vendor/Eigen/src/Core/ProductEvaluators.h +1179 -0
- data/vendor/Eigen/src/Core/Random.h +218 -0
- data/vendor/Eigen/src/Core/Redux.h +515 -0
- data/vendor/Eigen/src/Core/Ref.h +381 -0
- data/vendor/Eigen/src/Core/Replicate.h +142 -0
- data/vendor/Eigen/src/Core/Reshaped.h +454 -0
- data/vendor/Eigen/src/Core/ReturnByValue.h +119 -0
- data/vendor/Eigen/src/Core/Reverse.h +217 -0
- data/vendor/Eigen/src/Core/Select.h +164 -0
- data/vendor/Eigen/src/Core/SelfAdjointView.h +365 -0
- data/vendor/Eigen/src/Core/SelfCwiseBinaryOp.h +47 -0
- data/vendor/Eigen/src/Core/Solve.h +188 -0
- data/vendor/Eigen/src/Core/SolveTriangular.h +235 -0
- data/vendor/Eigen/src/Core/SolverBase.h +168 -0
- data/vendor/Eigen/src/Core/StableNorm.h +251 -0
- data/vendor/Eigen/src/Core/StlIterators.h +463 -0
- data/vendor/Eigen/src/Core/Stride.h +116 -0
- data/vendor/Eigen/src/Core/Swap.h +68 -0
- data/vendor/Eigen/src/Core/Transpose.h +464 -0
- data/vendor/Eigen/src/Core/Transpositions.h +386 -0
- data/vendor/Eigen/src/Core/TriangularMatrix.h +1001 -0
- data/vendor/Eigen/src/Core/VectorBlock.h +96 -0
- data/vendor/Eigen/src/Core/VectorwiseOp.h +784 -0
- data/vendor/Eigen/src/Core/Visitor.h +381 -0
- data/vendor/Eigen/src/Core/arch/AVX/Complex.h +372 -0
- data/vendor/Eigen/src/Core/arch/AVX/MathFunctions.h +228 -0
- data/vendor/Eigen/src/Core/arch/AVX/PacketMath.h +1574 -0
- data/vendor/Eigen/src/Core/arch/AVX/TypeCasting.h +115 -0
- data/vendor/Eigen/src/Core/arch/AVX512/Complex.h +422 -0
- data/vendor/Eigen/src/Core/arch/AVX512/MathFunctions.h +362 -0
- data/vendor/Eigen/src/Core/arch/AVX512/PacketMath.h +2303 -0
- data/vendor/Eigen/src/Core/arch/AVX512/TypeCasting.h +89 -0
- data/vendor/Eigen/src/Core/arch/AltiVec/Complex.h +417 -0
- data/vendor/Eigen/src/Core/arch/AltiVec/MathFunctions.h +90 -0
- data/vendor/Eigen/src/Core/arch/AltiVec/MatrixProduct.h +2937 -0
- data/vendor/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h +221 -0
- data/vendor/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h +629 -0
- data/vendor/Eigen/src/Core/arch/AltiVec/PacketMath.h +2711 -0
- data/vendor/Eigen/src/Core/arch/CUDA/Complex.h +258 -0
- data/vendor/Eigen/src/Core/arch/Default/BFloat16.h +700 -0
- data/vendor/Eigen/src/Core/arch/Default/ConjHelper.h +117 -0
- data/vendor/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +1649 -0
- data/vendor/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +110 -0
- data/vendor/Eigen/src/Core/arch/Default/Half.h +942 -0
- data/vendor/Eigen/src/Core/arch/Default/Settings.h +49 -0
- data/vendor/Eigen/src/Core/arch/Default/TypeCasting.h +120 -0
- data/vendor/Eigen/src/Core/arch/GPU/MathFunctions.h +103 -0
- data/vendor/Eigen/src/Core/arch/GPU/PacketMath.h +1685 -0
- data/vendor/Eigen/src/Core/arch/GPU/TypeCasting.h +80 -0
- data/vendor/Eigen/src/Core/arch/HIP/hcc/math_constants.h +23 -0
- data/vendor/Eigen/src/Core/arch/MSA/Complex.h +648 -0
- data/vendor/Eigen/src/Core/arch/MSA/MathFunctions.h +387 -0
- data/vendor/Eigen/src/Core/arch/MSA/PacketMath.h +1233 -0
- data/vendor/Eigen/src/Core/arch/NEON/Complex.h +584 -0
- data/vendor/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h +183 -0
- data/vendor/Eigen/src/Core/arch/NEON/MathFunctions.h +75 -0
- data/vendor/Eigen/src/Core/arch/NEON/PacketMath.h +4587 -0
- data/vendor/Eigen/src/Core/arch/NEON/TypeCasting.h +1419 -0
- data/vendor/Eigen/src/Core/arch/SSE/Complex.h +351 -0
- data/vendor/Eigen/src/Core/arch/SSE/MathFunctions.h +199 -0
- data/vendor/Eigen/src/Core/arch/SSE/PacketMath.h +1505 -0
- data/vendor/Eigen/src/Core/arch/SSE/TypeCasting.h +142 -0
- data/vendor/Eigen/src/Core/arch/SVE/MathFunctions.h +44 -0
- data/vendor/Eigen/src/Core/arch/SVE/PacketMath.h +752 -0
- data/vendor/Eigen/src/Core/arch/SVE/TypeCasting.h +49 -0
- data/vendor/Eigen/src/Core/arch/SYCL/InteropHeaders.h +232 -0
- data/vendor/Eigen/src/Core/arch/SYCL/MathFunctions.h +301 -0
- data/vendor/Eigen/src/Core/arch/SYCL/PacketMath.h +670 -0
- data/vendor/Eigen/src/Core/arch/SYCL/SyclMemoryModel.h +694 -0
- data/vendor/Eigen/src/Core/arch/SYCL/TypeCasting.h +85 -0
- data/vendor/Eigen/src/Core/arch/ZVector/Complex.h +426 -0
- data/vendor/Eigen/src/Core/arch/ZVector/MathFunctions.h +233 -0
- data/vendor/Eigen/src/Core/arch/ZVector/PacketMath.h +1060 -0
- data/vendor/Eigen/src/Core/functors/AssignmentFunctors.h +177 -0
- data/vendor/Eigen/src/Core/functors/BinaryFunctors.h +541 -0
- data/vendor/Eigen/src/Core/functors/NullaryFunctors.h +189 -0
- data/vendor/Eigen/src/Core/functors/StlFunctors.h +166 -0
- data/vendor/Eigen/src/Core/functors/TernaryFunctors.h +25 -0
- data/vendor/Eigen/src/Core/functors/UnaryFunctors.h +1131 -0
- data/vendor/Eigen/src/Core/products/GeneralBlockPanelKernel.h +2645 -0
- data/vendor/Eigen/src/Core/products/GeneralMatrixMatrix.h +517 -0
- data/vendor/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +317 -0
- data/vendor/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +145 -0
- data/vendor/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h +124 -0
- data/vendor/Eigen/src/Core/products/GeneralMatrixVector.h +518 -0
- data/vendor/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h +136 -0
- data/vendor/Eigen/src/Core/products/Parallelizer.h +180 -0
- data/vendor/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +544 -0
- data/vendor/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h +295 -0
- data/vendor/Eigen/src/Core/products/SelfadjointMatrixVector.h +262 -0
- data/vendor/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h +118 -0
- data/vendor/Eigen/src/Core/products/SelfadjointProduct.h +133 -0
- data/vendor/Eigen/src/Core/products/SelfadjointRank2Update.h +94 -0
- data/vendor/Eigen/src/Core/products/TriangularMatrixMatrix.h +472 -0
- data/vendor/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h +317 -0
- data/vendor/Eigen/src/Core/products/TriangularMatrixVector.h +350 -0
- data/vendor/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h +255 -0
- data/vendor/Eigen/src/Core/products/TriangularSolverMatrix.h +337 -0
- data/vendor/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h +167 -0
- data/vendor/Eigen/src/Core/products/TriangularSolverVector.h +148 -0
- data/vendor/Eigen/src/Core/util/BlasUtil.h +583 -0
- data/vendor/Eigen/src/Core/util/ConfigureVectorization.h +512 -0
- data/vendor/Eigen/src/Core/util/Constants.h +563 -0
- data/vendor/Eigen/src/Core/util/DisableStupidWarnings.h +106 -0
- data/vendor/Eigen/src/Core/util/ForwardDeclarations.h +322 -0
- data/vendor/Eigen/src/Core/util/IndexedViewHelper.h +186 -0
- data/vendor/Eigen/src/Core/util/IntegralConstant.h +272 -0
- data/vendor/Eigen/src/Core/util/MKL_support.h +137 -0
- data/vendor/Eigen/src/Core/util/Macros.h +1464 -0
- data/vendor/Eigen/src/Core/util/Memory.h +1163 -0
- data/vendor/Eigen/src/Core/util/Meta.h +812 -0
- data/vendor/Eigen/src/Core/util/NonMPL2.h +3 -0
- data/vendor/Eigen/src/Core/util/ReenableStupidWarnings.h +31 -0
- data/vendor/Eigen/src/Core/util/ReshapedHelper.h +51 -0
- data/vendor/Eigen/src/Core/util/StaticAssert.h +221 -0
- data/vendor/Eigen/src/Core/util/SymbolicIndex.h +293 -0
- data/vendor/Eigen/src/Core/util/XprHelper.h +856 -0
- data/vendor/Eigen/src/Eigenvalues/ComplexEigenSolver.h +346 -0
- data/vendor/Eigen/src/Eigenvalues/ComplexSchur.h +462 -0
- data/vendor/Eigen/src/Eigenvalues/ComplexSchur_LAPACKE.h +91 -0
- data/vendor/Eigen/src/Eigenvalues/EigenSolver.h +622 -0
- data/vendor/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h +418 -0
- data/vendor/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +226 -0
- data/vendor/Eigen/src/Eigenvalues/HessenbergDecomposition.h +374 -0
- data/vendor/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +158 -0
- data/vendor/Eigen/src/Eigenvalues/RealQZ.h +657 -0
- data/vendor/Eigen/src/Eigenvalues/RealSchur.h +558 -0
- data/vendor/Eigen/src/Eigenvalues/RealSchur_LAPACKE.h +77 -0
- data/vendor/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +904 -0
- data/vendor/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h +87 -0
- data/vendor/Eigen/src/Eigenvalues/Tridiagonalization.h +561 -0
- data/vendor/Eigen/src/Geometry/AlignedBox.h +486 -0
- data/vendor/Eigen/src/Geometry/AngleAxis.h +247 -0
- data/vendor/Eigen/src/Geometry/EulerAngles.h +114 -0
- data/vendor/Eigen/src/Geometry/Homogeneous.h +501 -0
- data/vendor/Eigen/src/Geometry/Hyperplane.h +282 -0
- data/vendor/Eigen/src/Geometry/OrthoMethods.h +235 -0
- data/vendor/Eigen/src/Geometry/ParametrizedLine.h +232 -0
- data/vendor/Eigen/src/Geometry/Quaternion.h +870 -0
- data/vendor/Eigen/src/Geometry/Rotation2D.h +199 -0
- data/vendor/Eigen/src/Geometry/RotationBase.h +206 -0
- data/vendor/Eigen/src/Geometry/Scaling.h +188 -0
- data/vendor/Eigen/src/Geometry/Transform.h +1563 -0
- data/vendor/Eigen/src/Geometry/Translation.h +202 -0
- data/vendor/Eigen/src/Geometry/Umeyama.h +166 -0
- data/vendor/Eigen/src/Geometry/arch/Geometry_SIMD.h +168 -0
- data/vendor/Eigen/src/Householder/BlockHouseholder.h +110 -0
- data/vendor/Eigen/src/Householder/Householder.h +176 -0
- data/vendor/Eigen/src/Householder/HouseholderSequence.h +545 -0
- data/vendor/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +226 -0
- data/vendor/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +212 -0
- data/vendor/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +229 -0
- data/vendor/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +394 -0
- data/vendor/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +453 -0
- data/vendor/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +444 -0
- data/vendor/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +198 -0
- data/vendor/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +117 -0
- data/vendor/Eigen/src/Jacobi/Jacobi.h +483 -0
- data/vendor/Eigen/src/KLUSupport/KLUSupport.h +358 -0
- data/vendor/Eigen/src/LU/Determinant.h +117 -0
- data/vendor/Eigen/src/LU/FullPivLU.h +877 -0
- data/vendor/Eigen/src/LU/InverseImpl.h +432 -0
- data/vendor/Eigen/src/LU/PartialPivLU.h +624 -0
- data/vendor/Eigen/src/LU/PartialPivLU_LAPACKE.h +83 -0
- data/vendor/Eigen/src/LU/arch/InverseSize4.h +351 -0
- data/vendor/Eigen/src/MetisSupport/MetisSupport.h +137 -0
- data/vendor/Eigen/src/OrderingMethods/Amd.h +435 -0
- data/vendor/Eigen/src/OrderingMethods/Eigen_Colamd.h +1863 -0
- data/vendor/Eigen/src/OrderingMethods/Ordering.h +153 -0
- data/vendor/Eigen/src/PaStiXSupport/PaStiXSupport.h +678 -0
- data/vendor/Eigen/src/PardisoSupport/PardisoSupport.h +545 -0
- data/vendor/Eigen/src/QR/ColPivHouseholderQR.h +674 -0
- data/vendor/Eigen/src/QR/ColPivHouseholderQR_LAPACKE.h +97 -0
- data/vendor/Eigen/src/QR/CompleteOrthogonalDecomposition.h +635 -0
- data/vendor/Eigen/src/QR/FullPivHouseholderQR.h +713 -0
- data/vendor/Eigen/src/QR/HouseholderQR.h +434 -0
- data/vendor/Eigen/src/QR/HouseholderQR_LAPACKE.h +68 -0
- data/vendor/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +335 -0
- data/vendor/Eigen/src/SVD/BDCSVD.h +1366 -0
- data/vendor/Eigen/src/SVD/JacobiSVD.h +812 -0
- data/vendor/Eigen/src/SVD/JacobiSVD_LAPACKE.h +91 -0
- data/vendor/Eigen/src/SVD/SVDBase.h +376 -0
- data/vendor/Eigen/src/SVD/UpperBidiagonalization.h +414 -0
- data/vendor/Eigen/src/SparseCholesky/SimplicialCholesky.h +697 -0
- data/vendor/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +174 -0
- data/vendor/Eigen/src/SparseCore/AmbiVector.h +378 -0
- data/vendor/Eigen/src/SparseCore/CompressedStorage.h +274 -0
- data/vendor/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +352 -0
- data/vendor/Eigen/src/SparseCore/MappedSparseMatrix.h +67 -0
- data/vendor/Eigen/src/SparseCore/SparseAssign.h +270 -0
- data/vendor/Eigen/src/SparseCore/SparseBlock.h +571 -0
- data/vendor/Eigen/src/SparseCore/SparseColEtree.h +206 -0
- data/vendor/Eigen/src/SparseCore/SparseCompressedBase.h +370 -0
- data/vendor/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +722 -0
- data/vendor/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +150 -0
- data/vendor/Eigen/src/SparseCore/SparseDenseProduct.h +342 -0
- data/vendor/Eigen/src/SparseCore/SparseDiagonalProduct.h +138 -0
- data/vendor/Eigen/src/SparseCore/SparseDot.h +98 -0
- data/vendor/Eigen/src/SparseCore/SparseFuzzy.h +29 -0
- data/vendor/Eigen/src/SparseCore/SparseMap.h +305 -0
- data/vendor/Eigen/src/SparseCore/SparseMatrix.h +1518 -0
- data/vendor/Eigen/src/SparseCore/SparseMatrixBase.h +398 -0
- data/vendor/Eigen/src/SparseCore/SparsePermutation.h +178 -0
- data/vendor/Eigen/src/SparseCore/SparseProduct.h +181 -0
- data/vendor/Eigen/src/SparseCore/SparseRedux.h +49 -0
- data/vendor/Eigen/src/SparseCore/SparseRef.h +397 -0
- data/vendor/Eigen/src/SparseCore/SparseSelfAdjointView.h +659 -0
- data/vendor/Eigen/src/SparseCore/SparseSolverBase.h +124 -0
- data/vendor/Eigen/src/SparseCore/SparseSparseProductWithPruning.h +198 -0
- data/vendor/Eigen/src/SparseCore/SparseTranspose.h +92 -0
- data/vendor/Eigen/src/SparseCore/SparseTriangularView.h +189 -0
- data/vendor/Eigen/src/SparseCore/SparseUtil.h +186 -0
- data/vendor/Eigen/src/SparseCore/SparseVector.h +478 -0
- data/vendor/Eigen/src/SparseCore/SparseView.h +254 -0
- data/vendor/Eigen/src/SparseCore/TriangularSolver.h +315 -0
- data/vendor/Eigen/src/SparseLU/SparseLU.h +923 -0
- data/vendor/Eigen/src/SparseLU/SparseLUImpl.h +66 -0
- data/vendor/Eigen/src/SparseLU/SparseLU_Memory.h +226 -0
- data/vendor/Eigen/src/SparseLU/SparseLU_Structs.h +110 -0
- data/vendor/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +375 -0
- data/vendor/Eigen/src/SparseLU/SparseLU_Utils.h +80 -0
- data/vendor/Eigen/src/SparseLU/SparseLU_column_bmod.h +181 -0
- data/vendor/Eigen/src/SparseLU/SparseLU_column_dfs.h +179 -0
- data/vendor/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h +107 -0
- data/vendor/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +280 -0
- data/vendor/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h +126 -0
- data/vendor/Eigen/src/SparseLU/SparseLU_kernel_bmod.h +130 -0
- data/vendor/Eigen/src/SparseLU/SparseLU_panel_bmod.h +223 -0
- data/vendor/Eigen/src/SparseLU/SparseLU_panel_dfs.h +258 -0
- data/vendor/Eigen/src/SparseLU/SparseLU_pivotL.h +137 -0
- data/vendor/Eigen/src/SparseLU/SparseLU_pruneL.h +136 -0
- data/vendor/Eigen/src/SparseLU/SparseLU_relax_snode.h +83 -0
- data/vendor/Eigen/src/SparseQR/SparseQR.h +758 -0
- data/vendor/Eigen/src/StlSupport/StdDeque.h +116 -0
- data/vendor/Eigen/src/StlSupport/StdList.h +106 -0
- data/vendor/Eigen/src/StlSupport/StdVector.h +131 -0
- data/vendor/Eigen/src/StlSupport/details.h +84 -0
- data/vendor/Eigen/src/SuperLUSupport/SuperLUSupport.h +1025 -0
- data/vendor/Eigen/src/UmfPackSupport/UmfPackSupport.h +642 -0
- data/vendor/Eigen/src/misc/Image.h +82 -0
- data/vendor/Eigen/src/misc/Kernel.h +79 -0
- data/vendor/Eigen/src/misc/RealSvd2x2.h +55 -0
- data/vendor/Eigen/src/misc/blas.h +440 -0
- data/vendor/Eigen/src/misc/lapack.h +152 -0
- data/vendor/Eigen/src/misc/lapacke.h +16292 -0
- data/vendor/Eigen/src/misc/lapacke_mangling.h +17 -0
- data/vendor/Eigen/src/plugins/ArrayCwiseBinaryOps.h +358 -0
- data/vendor/Eigen/src/plugins/ArrayCwiseUnaryOps.h +696 -0
- data/vendor/Eigen/src/plugins/BlockMethods.h +1442 -0
- data/vendor/Eigen/src/plugins/CommonCwiseBinaryOps.h +115 -0
- data/vendor/Eigen/src/plugins/CommonCwiseUnaryOps.h +177 -0
- data/vendor/Eigen/src/plugins/IndexedViewMethods.h +262 -0
- data/vendor/Eigen/src/plugins/MatrixCwiseBinaryOps.h +152 -0
- data/vendor/Eigen/src/plugins/MatrixCwiseUnaryOps.h +95 -0
- data/vendor/Eigen/src/plugins/ReshapedMethods.h +149 -0
- data/vendor/aarand/aarand.hpp +114 -0
- data/vendor/annoy/annoylib.h +1495 -0
- data/vendor/annoy/kissrandom.h +120 -0
- data/vendor/annoy/mman.h +242 -0
- data/vendor/hnswlib/bruteforce.h +152 -0
- data/vendor/hnswlib/hnswalg.h +1192 -0
- data/vendor/hnswlib/hnswlib.h +108 -0
- data/vendor/hnswlib/space_ip.h +282 -0
- data/vendor/hnswlib/space_l2.h +281 -0
- data/vendor/hnswlib/visited_list_pool.h +79 -0
- data/vendor/irlba/irlba.hpp +575 -0
- data/vendor/irlba/lanczos.hpp +212 -0
- data/vendor/irlba/parallel.hpp +474 -0
- data/vendor/irlba/utils.hpp +224 -0
- data/vendor/irlba/wrappers.hpp +228 -0
- data/vendor/kmeans/Base.hpp +75 -0
- data/vendor/kmeans/Details.hpp +79 -0
- data/vendor/kmeans/HartiganWong.hpp +492 -0
- data/vendor/kmeans/InitializeKmeansPP.hpp +144 -0
- data/vendor/kmeans/InitializeNone.hpp +44 -0
- data/vendor/kmeans/InitializePCAPartition.hpp +309 -0
- data/vendor/kmeans/InitializeRandom.hpp +91 -0
- data/vendor/kmeans/Kmeans.hpp +161 -0
- data/vendor/kmeans/Lloyd.hpp +134 -0
- data/vendor/kmeans/MiniBatch.hpp +269 -0
- data/vendor/kmeans/QuickSearch.hpp +179 -0
- data/vendor/kmeans/compute_centroids.hpp +32 -0
- data/vendor/kmeans/compute_wcss.hpp +27 -0
- data/vendor/kmeans/is_edge_case.hpp +42 -0
- data/vendor/kmeans/random.hpp +55 -0
- data/vendor/knncolle/Annoy/Annoy.hpp +193 -0
- data/vendor/knncolle/BruteForce/BruteForce.hpp +120 -0
- data/vendor/knncolle/Hnsw/Hnsw.hpp +225 -0
- data/vendor/knncolle/Kmknn/Kmknn.hpp +286 -0
- data/vendor/knncolle/VpTree/VpTree.hpp +256 -0
- data/vendor/knncolle/knncolle.hpp +34 -0
- data/vendor/knncolle/utils/Base.hpp +100 -0
- data/vendor/knncolle/utils/NeighborQueue.hpp +94 -0
- data/vendor/knncolle/utils/distances.hpp +98 -0
- data/vendor/knncolle/utils/find_nearest_neighbors.hpp +112 -0
- data/vendor/powerit/PowerIterations.hpp +157 -0
- data/vendor/umappp/NeighborList.hpp +37 -0
- data/vendor/umappp/Umap.hpp +662 -0
- data/vendor/umappp/combine_neighbor_sets.hpp +95 -0
- data/vendor/umappp/find_ab.hpp +157 -0
- data/vendor/umappp/neighbor_similarities.hpp +136 -0
- data/vendor/umappp/optimize_layout.hpp +285 -0
- data/vendor/umappp/spectral_init.hpp +181 -0
- data/vendor/umappp/umappp.hpp +13 -0
- metadata +465 -0
|
@@ -0,0 +1,193 @@
|
|
|
1
|
+
#ifndef KNNCOLLE_ANNOYBASE_HPP
|
|
2
|
+
#define KNNCOLLE_ANNOYBASE_HPP
|
|
3
|
+
|
|
4
|
+
#include <cstdint>
|
|
5
|
+
|
|
6
|
+
#include "../utils/Base.hpp"
|
|
7
|
+
|
|
8
|
+
#include "annoy/annoylib.h"
|
|
9
|
+
#include "annoy/kissrandom.h"
|
|
10
|
+
|
|
11
|
+
/**
|
|
12
|
+
* @file Annoy.hpp
|
|
13
|
+
*
|
|
14
|
+
* @brief Implements an approximate nearest neighbor search with Annoy.
|
|
15
|
+
*/
|
|
16
|
+
|
|
17
|
+
namespace knncolle {
|
|
18
|
+
|
|
19
|
+
/**
|
|
20
|
+
* @brief Perform an approximate nearest neighbor search with Annoy.
|
|
21
|
+
*
|
|
22
|
+
* In the Approximate Nearest Neighbors Oh Yeah (Annoy) algorithm, a tree is constructed where a random hyperplane splits the points into two subsets at each internal node.
|
|
23
|
+
* Leaf nodes are defined when the number of points in a subset falls below a threshold (close to twice the number of dimensions for the settings used here).
|
|
24
|
+
* Multiple trees are constructed in this manner, each of which is different due to the random choice of hyperplanes.
|
|
25
|
+
* For a given query point, each tree is searched to identify the subset of all points in the same leaf node as the query point.
|
|
26
|
+
* The union of these subsets across all trees is exhaustively searched to identify the actual nearest neighbors to the query.
|
|
27
|
+
*
|
|
28
|
+
* @see
|
|
29
|
+
* Bernhardsson E (2018).
|
|
30
|
+
* Annoy.
|
|
31
|
+
* https://github.com/spotify/annoy
|
|
32
|
+
*
|
|
33
|
+
* @tparam DISTANCE An **Annoy**-derived class to compute the distance between vectors.
|
|
34
|
+
* Note that this is not the same as the classes in `distances.hpp`.
|
|
35
|
+
* @tparam INDEX_t Integer type for the indices.
|
|
36
|
+
* @tparam DISTANCE_t Floating point type for the distances.
|
|
37
|
+
* @tparam INTERNAL_INDEX_t Integer type for the internal indices.
|
|
38
|
+
* @tparam INTERNAL_DATA_t Floating point type for the internal data store.
|
|
39
|
+
* This uses a `float` instead of a `double` to sacrifice some accuracy for performance.
|
|
40
|
+
*/
|
|
41
|
+
template<class DISTANCE, typename INDEX_t = int, typename DISTANCE_t = double, typename QUERY_t = DISTANCE_t, typename INTERNAL_INDEX_t = int32_t, typename INTERNAL_DATA_t = float>
|
|
42
|
+
class Annoy : public Base<INDEX_t, DISTANCE_t, QUERY_t> {
|
|
43
|
+
public:
|
|
44
|
+
INDEX_t nobs() const {
|
|
45
|
+
return annoy_index.get_n_items();
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
INDEX_t ndim() const {
|
|
49
|
+
return num_dim;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
public:
|
|
54
|
+
/**
|
|
55
|
+
* Defaults for the constructor parameters.
|
|
56
|
+
*/
|
|
57
|
+
struct Defaults {
|
|
58
|
+
/**
|
|
59
|
+
* See `ntrees` in the `Annoy()` constructor.
|
|
60
|
+
*/
|
|
61
|
+
static constexpr int ntrees = 50;
|
|
62
|
+
|
|
63
|
+
/**
|
|
64
|
+
* See `search_mult` in the `Annoy()` constructor.
|
|
65
|
+
*/
|
|
66
|
+
static constexpr double search_mult = -1;
|
|
67
|
+
};
|
|
68
|
+
|
|
69
|
+
public:
|
|
70
|
+
/**
|
|
71
|
+
* @param ndim Number of dimensions.
|
|
72
|
+
* @param nobs Number of observations.
|
|
73
|
+
* @param vals Pointer to an array of length `ndim * nobs`, corresponding to a dimension-by-observation matrix in column-major format,
|
|
74
|
+
* i.e., contiguous elements belong to the same observation.
|
|
75
|
+
* @param ntrees Number of trees to construct.
|
|
76
|
+
* Larger values improve accuracy at the cost of index size (i.e., memory usage), see [here](https://github.com/spotify/annoy#tradeoffs) for details.
|
|
77
|
+
* @param search_mult Factor that is multiplied by the number of neighbors `k` to determine the number of nodes to search in `find_nearest_neighbors()`.
|
|
78
|
+
* Larger values improve accuracy at the cost of runtime, see [here](https://github.com/spotify/annoy#tradeoffs) for details.
|
|
79
|
+
* If set to -1, it defaults to `ntrees`.
|
|
80
|
+
*
|
|
81
|
+
* @tparam INPUT Floating-point type of the input data.
|
|
82
|
+
*/
|
|
83
|
+
template<typename INPUT>
|
|
84
|
+
Annoy(INDEX_t ndim, INDEX_t nobs, const INPUT* vals, int ntrees = Defaults::ntrees, double search_mult = Defaults::search_mult) :
|
|
85
|
+
annoy_index(ndim), num_dim(ndim), search_k_mult(search_mult)
|
|
86
|
+
{
|
|
87
|
+
if constexpr(std::is_same<INPUT, INTERNAL_DATA_t>::value) {
|
|
88
|
+
for (INDEX_t i=0; i < nobs; ++i, vals += ndim) {
|
|
89
|
+
annoy_index.add_item(i, vals);
|
|
90
|
+
}
|
|
91
|
+
} else {
|
|
92
|
+
std::vector<INTERNAL_DATA_t> incoming(ndim);
|
|
93
|
+
for (INDEX_t i=0; i < nobs; ++i, vals += ndim) {
|
|
94
|
+
std::copy(vals, vals + ndim, incoming.begin());
|
|
95
|
+
annoy_index.add_item(i, incoming.data());
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
annoy_index.build(ntrees);
|
|
99
|
+
return;
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
std::vector<std::pair<INDEX_t, DISTANCE_t> > find_nearest_neighbors(INDEX_t index, int k) const {
|
|
103
|
+
std::vector<INTERNAL_INDEX_t> indices;
|
|
104
|
+
std::vector<INTERNAL_DATA_t> distances;
|
|
105
|
+
annoy_index.get_nns_by_item(index, k + 1, get_search_k(k + 1), &indices, &distances); // +1, as it forgets to discard 'self'.
|
|
106
|
+
|
|
107
|
+
bool self_found = false;
|
|
108
|
+
const INTERNAL_INDEX_t self = index;
|
|
109
|
+
std::vector<std::pair<INDEX_t, DISTANCE_t> > output;
|
|
110
|
+
output.reserve(k);
|
|
111
|
+
for (size_t i = 0; i < indices.size(); ++i) {
|
|
112
|
+
if (!self_found && indices[i] == self) {
|
|
113
|
+
self_found=true;
|
|
114
|
+
} else {
|
|
115
|
+
output.emplace_back(indices[i], distances[i]);
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
// Just in case we're full of ties at duplicate points, such that 'c'
|
|
120
|
+
// is not in the set. Note that, if self_found=false, we must have at
|
|
121
|
+
// least 'K+2' points for 'c' to not be detected as its own neighbor.
|
|
122
|
+
// Thus there is no need to worry whether we are popping off a non-'c'
|
|
123
|
+
// element at the end of the vector.
|
|
124
|
+
if (!self_found) {
|
|
125
|
+
output.pop_back();
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
return output;
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
std::vector<std::pair<INDEX_t, DISTANCE_t> > find_nearest_neighbors(const QUERY_t* query, int k) const {
|
|
132
|
+
std::vector<INTERNAL_INDEX_t> indices;
|
|
133
|
+
indices.reserve(k);
|
|
134
|
+
std::vector<INTERNAL_DATA_t> distances;
|
|
135
|
+
distances.reserve(k);
|
|
136
|
+
|
|
137
|
+
if constexpr(std::is_same<INTERNAL_DATA_t, QUERY_t>::value) {
|
|
138
|
+
annoy_index.get_nns_by_vector(query, k, get_search_k(k), &indices, &distances);
|
|
139
|
+
} else {
|
|
140
|
+
std::vector<INTERNAL_DATA_t> tmp(query, query + num_dim);
|
|
141
|
+
annoy_index.get_nns_by_vector(tmp.data(), k, get_search_k(k), &indices, &distances);
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
std::vector<std::pair<INDEX_t, DISTANCE_t> > output;
|
|
145
|
+
output.reserve(k);
|
|
146
|
+
for (size_t i = 0; i < indices.size(); ++i) {
|
|
147
|
+
output.emplace_back(indices[i], distances[i]);
|
|
148
|
+
}
|
|
149
|
+
return output;
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
const QUERY_t* observation(INDEX_t index, QUERY_t* buffer) const {
|
|
153
|
+
if constexpr(std::is_same<QUERY_t, INTERNAL_DATA_t>::value) {
|
|
154
|
+
annoy_index.get_item(index, buffer);
|
|
155
|
+
} else {
|
|
156
|
+
std::vector<INTERNAL_DATA_t> tmp(num_dim);
|
|
157
|
+
annoy_index.get_item(index, tmp.data());
|
|
158
|
+
std::copy(tmp.begin(), tmp.end(), buffer);
|
|
159
|
+
}
|
|
160
|
+
return buffer;
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
using Base<INDEX_t, DISTANCE_t, QUERY_t>::observation;
|
|
164
|
+
|
|
165
|
+
private:
|
|
166
|
+
::Annoy::AnnoyIndex<INTERNAL_INDEX_t, INTERNAL_DATA_t, DISTANCE, ::Annoy::Kiss64Random, ::Annoy::AnnoyIndexSingleThreadedBuildPolicy> annoy_index;
|
|
167
|
+
INDEX_t num_dim;
|
|
168
|
+
double search_k_mult;
|
|
169
|
+
|
|
170
|
+
int get_search_k(int k) const {
|
|
171
|
+
if (search_k_mult < 0) {
|
|
172
|
+
return -1;
|
|
173
|
+
} else {
|
|
174
|
+
return search_k_mult * k + 0.5; // rounded up.
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
};
|
|
178
|
+
|
|
179
|
+
/**
|
|
180
|
+
* Perform an Annoy search with Euclidean distances.
|
|
181
|
+
*/
|
|
182
|
+
template<typename INDEX_t = int, typename DISTANCE_t = double, typename QUERY_t = DISTANCE_t, typename INTERNAL_INDEX_t = int32_t, typename INTERNAL_DATA_t = float>
|
|
183
|
+
using AnnoyEuclidean = Annoy<::Annoy::Euclidean, INDEX_t, DISTANCE_t, QUERY_t, INTERNAL_INDEX_t, INTERNAL_DATA_t>;
|
|
184
|
+
|
|
185
|
+
/**
|
|
186
|
+
* Perform an Annoy search with Manhattan distances.
|
|
187
|
+
*/
|
|
188
|
+
template<typename INDEX_t = int, typename DISTANCE_t = double, typename QUERY_t = DISTANCE_t, typename INTERNAL_INDEX_t = int32_t, typename INTERNAL_DATA_t = float>
|
|
189
|
+
using AnnoyManhattan = Annoy<::Annoy::Manhattan, INDEX_t, DISTANCE_t, QUERY_t, INTERNAL_INDEX_t, INTERNAL_DATA_t>;
|
|
190
|
+
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
#endif
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
#ifndef KNNCOLLE_BRUTEFORCE_HPP
|
|
2
|
+
#define KNNCOLLE_BRUTEFORCE_HPP
|
|
3
|
+
|
|
4
|
+
#include "../utils/distances.hpp"
|
|
5
|
+
#include "../utils/NeighborQueue.hpp"
|
|
6
|
+
#include "../utils/Base.hpp"
|
|
7
|
+
|
|
8
|
+
#include <vector>
|
|
9
|
+
#include <type_traits>
|
|
10
|
+
|
|
11
|
+
/**
|
|
12
|
+
* @file BruteForce.hpp
|
|
13
|
+
*
|
|
14
|
+
* @brief Implements a brute-force search for nearest neighbors.
|
|
15
|
+
*/
|
|
16
|
+
|
|
17
|
+
namespace knncolle {
|
|
18
|
+
|
|
19
|
+
/**
|
|
20
|
+
* @brief Perform a brute-force nearest neighbor search.
|
|
21
|
+
*
|
|
22
|
+
* The brute-force search computes all pairwise distances between data and query points to identify nearest neighbors of the latter.
|
|
23
|
+
* It has quadratic complexity and is theoretically the worst-performing method;
|
|
24
|
+
* however, it has effectively no overhead from constructing or querying indexing structures,
|
|
25
|
+
* potentially making it faster in cases where indexing provides little benefit (e.g., few data points, high dimensionality).
|
|
26
|
+
*
|
|
27
|
+
* @tparam DISTANCE Class to compute the distance between vectors, see `distance::Euclidean` for an example.
|
|
28
|
+
* @tparam INDEX_t Integer type for the indices.
|
|
29
|
+
* @tparam DISTANCE_t Floating point type for the distances.
|
|
30
|
+
* @tparam QUERY_t Floating point type for the query data.
|
|
31
|
+
* @tparam INTERNAL_t Floating point type for the internal calculations.
|
|
32
|
+
*/
|
|
33
|
+
template<class DISTANCE, typename INDEX_t = int, typename DISTANCE_t = double, typename QUERY_t = DISTANCE_t, typename INTERNAL_t = double>
|
|
34
|
+
class BruteForce : public Base<INDEX_t, DISTANCE_t, QUERY_t> {
|
|
35
|
+
private:
|
|
36
|
+
INDEX_t num_dim;
|
|
37
|
+
INDEX_t num_obs;
|
|
38
|
+
|
|
39
|
+
public:
|
|
40
|
+
INDEX_t nobs() const { return num_obs; }
|
|
41
|
+
|
|
42
|
+
INDEX_t ndim() const { return num_dim; }
|
|
43
|
+
|
|
44
|
+
private:
|
|
45
|
+
std::vector<INTERNAL_t> store;
|
|
46
|
+
|
|
47
|
+
public:
|
|
48
|
+
/**
|
|
49
|
+
* @param ndim Number of dimensions.
|
|
50
|
+
* @param nobs Number of observations.
|
|
51
|
+
* @param vals Pointer to an array of length `ndim * nobs`, corresponding to a dimension-by-observation matrix in column-major format,
|
|
52
|
+
* i.e., contiguous elements belong to the same observation.
|
|
53
|
+
*
|
|
54
|
+
* @tparam INPUT Floating-point type of the input data.
|
|
55
|
+
*/
|
|
56
|
+
template<typename INPUT>
|
|
57
|
+
BruteForce(INDEX_t ndim, INDEX_t nobs, const INPUT* vals) : num_dim(ndim), num_obs(nobs), store(vals, vals + ndim * nobs) {}
|
|
58
|
+
|
|
59
|
+
std::vector<std::pair<INDEX_t, DISTANCE_t> > find_nearest_neighbors(INDEX_t index, int k) const {
|
|
60
|
+
NeighborQueue<INDEX_t, INTERNAL_t> nearest(k, index);
|
|
61
|
+
search_nn(store.data() + index * num_dim, nearest);
|
|
62
|
+
|
|
63
|
+
auto output = nearest.template report<DISTANCE_t>();
|
|
64
|
+
normalize(output);
|
|
65
|
+
return output;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
std::vector<std::pair<INDEX_t, DISTANCE_t> > find_nearest_neighbors(const QUERY_t* query, int k) const {
|
|
69
|
+
NeighborQueue<INDEX_t, INTERNAL_t> nearest(k);
|
|
70
|
+
search_nn(query, nearest);
|
|
71
|
+
auto output = nearest.template report<DISTANCE_t>();
|
|
72
|
+
normalize(output);
|
|
73
|
+
return output;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
const QUERY_t* observation(INDEX_t index, QUERY_t* buffer) const {
|
|
77
|
+
auto candidate = store.data() + num_dim * index;
|
|
78
|
+
if constexpr(std::is_same<QUERY_t, INTERNAL_t>::value) {
|
|
79
|
+
return candidate;
|
|
80
|
+
} else {
|
|
81
|
+
std::copy(candidate, candidate + num_dim, buffer);
|
|
82
|
+
return buffer;
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
using Base<INDEX_t, DISTANCE_t, QUERY_t>::observation;
|
|
87
|
+
|
|
88
|
+
private:
|
|
89
|
+
template<class QUEUE>
|
|
90
|
+
void search_nn(const QUERY_t* query, QUEUE& nearest) const {
|
|
91
|
+
auto copy = store.data();
|
|
92
|
+
for (INDEX_t i = 0; i < num_obs; ++i, copy += num_dim) {
|
|
93
|
+
nearest.add(i, DISTANCE::template raw_distance<INTERNAL_t>(query, copy, num_dim));
|
|
94
|
+
}
|
|
95
|
+
return;
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
void normalize(std::vector<std::pair<INDEX_t, DISTANCE_t> >& results) const {
|
|
99
|
+
for (auto& d : results) {
|
|
100
|
+
d.second = DISTANCE::normalize(d.second);
|
|
101
|
+
}
|
|
102
|
+
return;
|
|
103
|
+
}
|
|
104
|
+
};
|
|
105
|
+
|
|
106
|
+
/**
|
|
107
|
+
* Perform a brute-force search with Euclidean distances.
|
|
108
|
+
*/
|
|
109
|
+
template<typename INDEX_t = int, typename DISTANCE_t = double, typename QUERY_t = DISTANCE_t, typename INTERNAL_t = double>
|
|
110
|
+
using BruteForceEuclidean = BruteForce<distances::Euclidean, INDEX_t, DISTANCE_t, QUERY_t, INTERNAL_t>;
|
|
111
|
+
|
|
112
|
+
/**
|
|
113
|
+
* Perform a brute-force search with Manhattan distances.
|
|
114
|
+
*/
|
|
115
|
+
template<typename INDEX_t = int, typename DISTANCE_t = double, typename QUERY_t = DISTANCE_t, typename INTERNAL_t = double>
|
|
116
|
+
using BruteForceManhattan = BruteForce<distances::Manhattan, INDEX_t, DISTANCE_t, QUERY_t, INTERNAL_t>;
|
|
117
|
+
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
#endif
|
|
@@ -0,0 +1,225 @@
|
|
|
1
|
+
#ifndef KNNCOLLE_HNSW_HPP
|
|
2
|
+
#define KNNCOLLE_HNSW_HPP
|
|
3
|
+
|
|
4
|
+
#include "../utils/Base.hpp"
|
|
5
|
+
#include "../utils/NeighborQueue.hpp"
|
|
6
|
+
|
|
7
|
+
#include "hnswlib/hnswalg.h"
|
|
8
|
+
#include <cmath>
|
|
9
|
+
|
|
10
|
+
/**
|
|
11
|
+
* @file Hnsw.hpp
|
|
12
|
+
*
|
|
13
|
+
* @brief Implements an approximate nearest neighbor search with HNSW.
|
|
14
|
+
*/
|
|
15
|
+
|
|
16
|
+
namespace knncolle {
|
|
17
|
+
|
|
18
|
+
/**
|
|
19
|
+
* @brief Perform an approximate nearest neighbor search with HNSW.
|
|
20
|
+
*
|
|
21
|
+
* In the HNSW algorithm (Malkov and Yashunin, 2016), each point is a node in a "nagivable small world" graph.
|
|
22
|
+
* The nearest neighbor search proceeds by starting at a node and walking through the graph to obtain closer neighbors to a given query point.
|
|
23
|
+
* Nagivable small world graphs are used to maintain connectivity across the data set by creating links between distant points.
|
|
24
|
+
* This speeds up the search by ensuring that the algorithm does not need to take many small steps to move from one cluster to another.
|
|
25
|
+
* The HNSW algorithm extends this idea by using a hierarchy of such graphs containing links of different lengths,
|
|
26
|
+
* which avoids wasting time on small steps in the early stages of the search where the current node position is far from the query.
|
|
27
|
+
*
|
|
28
|
+
* @see
|
|
29
|
+
* Malkov YA, Yashunin DA (2016).
|
|
30
|
+
* Efficient and robust approximate nearest neighbor search using Hierarchical Navigable Small World graphs.
|
|
31
|
+
* _arXiv_.
|
|
32
|
+
* https://arxiv.org/abs/1603.09320
|
|
33
|
+
*
|
|
34
|
+
*
|
|
35
|
+
* @tparam DISTANCE An **hnswlib**-derived class to compute the distance between vectors.
|
|
36
|
+
* Note that this is not the same as the classes in `distances.hpp`.
|
|
37
|
+
* @tparam INDEX_t Integer type for the indices.
|
|
38
|
+
* @tparam DISTANCE_t Floating point type for the distances.
|
|
39
|
+
*/
|
|
40
|
+
template<class SPACE, typename INDEX_t = int, typename DISTANCE_t = double, typename QUERY_t = DISTANCE_t>
|
|
41
|
+
class Hnsw : public Base<INDEX_t, DISTANCE_t, QUERY_t> {
|
|
42
|
+
typedef float INTERNAL_DATA_t; // floats are effectively hard-coded into hnswlib, given that L2Space only uses floats.
|
|
43
|
+
|
|
44
|
+
public:
|
|
45
|
+
INDEX_t nobs() const {
|
|
46
|
+
return num_obs;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
INDEX_t ndim() const {
|
|
50
|
+
return num_dim;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
public:
|
|
54
|
+
/**
|
|
55
|
+
* Defaults for the constructor parameters.
|
|
56
|
+
*/
|
|
57
|
+
struct Defaults {
|
|
58
|
+
/**
|
|
59
|
+
* See `nlinks` in the `Hnsw()` constructor.
|
|
60
|
+
*/
|
|
61
|
+
static constexpr int nlinks = 16;
|
|
62
|
+
|
|
63
|
+
/**
|
|
64
|
+
* See `ef_construction` in the `Hnsw()` constructor.
|
|
65
|
+
*/
|
|
66
|
+
static constexpr int ef_construction = 200;
|
|
67
|
+
|
|
68
|
+
/**
|
|
69
|
+
* See `ef_search` in the `Hnsw()` constructor.
|
|
70
|
+
*/
|
|
71
|
+
static constexpr int ef_search = 10;
|
|
72
|
+
};
|
|
73
|
+
|
|
74
|
+
public:
|
|
75
|
+
/**
|
|
76
|
+
* @param ndim Number of dimensions.
|
|
77
|
+
* @param nobs Number of observations.
|
|
78
|
+
* @param vals Pointer to an array of length `ndim * nobs`, corresponding to a dimension-by-observation matrix in column-major format,
|
|
79
|
+
* i.e., contiguous elements belong to the same observation.
|
|
80
|
+
* @param nlinks Number of bidirectional links for each node.
|
|
81
|
+
* This is equivalent to the `M` parameter in the underlying **hnswlib** library, see [here](https://github.com/nmslib/hnswlib/blob/master/ALGO_PARAMS.md#construction-parameters) for details.
|
|
82
|
+
* @param ef_construction Size of the dynamic list of nearest neighbors during index construction.
|
|
83
|
+
* This controls the trade-off between indexing time and accuracy and is equivalent to the `ef_construct` parameter in the underlying **hnswlib** library,
|
|
84
|
+
* see [here](https://github.com/nmslib/hnswlib/blob/master/ALGO_PARAMS.md#construction-parameters) for details.
|
|
85
|
+
* @param ef_search Size of the dynamic list of nearest neighbors during searching.
|
|
86
|
+
* This controls the trade-off between search speed and accuracy and is equivalent to the `ef` parameter in the underlying **hnswlib** library,
|
|
87
|
+
* see [here](https://github.com/nmslib/hnswlib/blob/master/ALGO_PARAMS.md#search-parameters) for details.
|
|
88
|
+
*
|
|
89
|
+
* @tparam INPUT Floating-point type of the input data.
|
|
90
|
+
*/
|
|
91
|
+
template<typename INPUT>
|
|
92
|
+
Hnsw(INDEX_t ndim, INDEX_t nobs, const INPUT* vals, int nlinks = Defaults::nlinks, int ef_construction = Defaults::ef_construction, int ef_search = Defaults::ef_search) :
|
|
93
|
+
space(ndim), hnsw_index(&space, nobs, nlinks, ef_construction), num_dim(ndim), num_obs(nobs)
|
|
94
|
+
{
|
|
95
|
+
if constexpr(std::is_same<INPUT, INTERNAL_DATA_t>::value) {
|
|
96
|
+
for (INDEX_t i=0; i < nobs; ++i, vals += ndim) {
|
|
97
|
+
hnsw_index.addPoint(vals, i);
|
|
98
|
+
}
|
|
99
|
+
} else {
|
|
100
|
+
std::vector<INTERNAL_DATA_t> copy(ndim);
|
|
101
|
+
for (INDEX_t i=0; i < nobs; ++i, vals += ndim) {
|
|
102
|
+
std::copy(vals, vals + ndim, copy.begin());
|
|
103
|
+
hnsw_index.addPoint(copy.data(), i);
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
hnsw_index.setEf(ef_search);
|
|
107
|
+
return;
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
std::vector<std::pair<INDEX_t, DISTANCE_t> > find_nearest_neighbors(INDEX_t index, int k) const {
|
|
111
|
+
auto V = hnsw_index.getDataByLabel<INTERNAL_DATA_t>(index);
|
|
112
|
+
auto Q = hnsw_index.searchKnn(V.data(), k+1);
|
|
113
|
+
auto output = harvest_queue<INDEX_t, DISTANCE_t>(Q, true, index);
|
|
114
|
+
normalize(output);
|
|
115
|
+
return output;
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
std::vector<std::pair<INDEX_t, DISTANCE_t> > find_nearest_neighbors(const QUERY_t* query, int k) const {
|
|
119
|
+
if constexpr(std::is_same<QUERY_t, INTERNAL_DATA_t>::value) {
|
|
120
|
+
auto Q = hnsw_index.searchKnn(query, k);
|
|
121
|
+
auto output = harvest_queue<INDEX_t, DISTANCE_t>(Q);
|
|
122
|
+
normalize(output);
|
|
123
|
+
return output;
|
|
124
|
+
} else {
|
|
125
|
+
std::vector<INTERNAL_DATA_t> copy(query, query + num_dim);
|
|
126
|
+
auto Q = hnsw_index.searchKnn(copy.data(), k);
|
|
127
|
+
auto output = harvest_queue<INDEX_t, DISTANCE_t>(Q);
|
|
128
|
+
normalize(output);
|
|
129
|
+
return output;
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
const QUERY_t* observation(INDEX_t index, QUERY_t* buffer) const {
|
|
134
|
+
auto V = hnsw_index.getDataByLabel<INTERNAL_DATA_t>(index);
|
|
135
|
+
std::copy(V.begin(), V.begin() + num_dim, buffer);
|
|
136
|
+
return buffer;
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
std::vector<QUERY_t> observation(INDEX_t index) const {
|
|
140
|
+
if constexpr(std::is_same<QUERY_t, INTERNAL_DATA_t>::value) {
|
|
141
|
+
return hnsw_index.getDataByLabel<QUERY_t>(index);
|
|
142
|
+
} else {
|
|
143
|
+
auto V = hnsw_index.getDataByLabel<INTERNAL_DATA_t>(index);
|
|
144
|
+
return std::vector<QUERY_t>(V.begin(), V.end());
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
private:
|
|
149
|
+
SPACE space;
|
|
150
|
+
hnswlib::HierarchicalNSW<INTERNAL_DATA_t> hnsw_index;
|
|
151
|
+
INDEX_t num_dim, num_obs;
|
|
152
|
+
|
|
153
|
+
static void normalize(std::vector<std::pair<INDEX_t, DISTANCE_t> >& results) {
|
|
154
|
+
for (auto& d : results) {
|
|
155
|
+
d.second = SPACE::normalize(d.second);
|
|
156
|
+
}
|
|
157
|
+
return;
|
|
158
|
+
}
|
|
159
|
+
};
|
|
160
|
+
|
|
161
|
+
namespace hnsw_distances {
|
|
162
|
+
|
|
163
|
+
class Manhattan : public hnswlib::SpaceInterface<float> {
|
|
164
|
+
size_t data_size_;
|
|
165
|
+
size_t dim_;
|
|
166
|
+
public:
|
|
167
|
+
Manhattan(size_t ndim) : data_size_(ndim * sizeof(float)), dim_(ndim) {}
|
|
168
|
+
|
|
169
|
+
~Manhattan() {}
|
|
170
|
+
|
|
171
|
+
size_t get_data_size() {
|
|
172
|
+
return data_size_;
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
hnswlib::DISTFUNC<float> get_dist_func() {
|
|
176
|
+
return L1;
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
void * get_dist_func_param() {
|
|
180
|
+
return &dim_;
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
static float L1(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
|
|
184
|
+
//return *((float*)pVect2);
|
|
185
|
+
const float* pVect1=static_cast<const float*>(pVect1v);
|
|
186
|
+
const float* pVect2=static_cast<const float*>(pVect2v);
|
|
187
|
+
size_t qty = *((size_t *) qty_ptr);
|
|
188
|
+
float res = 0;
|
|
189
|
+
for (; qty > 0; --qty, ++pVect1, ++pVect2) {
|
|
190
|
+
res += std::fabs(*pVect1 - *pVect2);
|
|
191
|
+
}
|
|
192
|
+
return res;
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
static float normalize(float raw) {
|
|
196
|
+
return raw;
|
|
197
|
+
}
|
|
198
|
+
};
|
|
199
|
+
|
|
200
|
+
class Euclidean : public hnswlib::L2Space {
|
|
201
|
+
public:
|
|
202
|
+
Euclidean(size_t ndim) : hnswlib::L2Space(ndim) {}
|
|
203
|
+
|
|
204
|
+
static float normalize(float raw) {
|
|
205
|
+
return std::sqrt(raw);
|
|
206
|
+
}
|
|
207
|
+
};
|
|
208
|
+
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
/**
|
|
212
|
+
* Perform an Hnsw search with Euclidean distances.
|
|
213
|
+
*/
|
|
214
|
+
template<typename INDEX_t = int, typename DISTANCE_t = double, typename QUERY_t = DISTANCE_t>
|
|
215
|
+
using HnswEuclidean = Hnsw<hnsw_distances::Euclidean, INDEX_t, DISTANCE_t, QUERY_t>;
|
|
216
|
+
|
|
217
|
+
/**
|
|
218
|
+
* Perform an Hnsw search with Manhattan distances.
|
|
219
|
+
*/
|
|
220
|
+
template<typename INDEX_t = int, typename DISTANCE_t = double, typename QUERY_t = DISTANCE_t>
|
|
221
|
+
using HnswManhattan = Hnsw<hnsw_distances::Manhattan, INDEX_t, DISTANCE_t, QUERY_t>;
|
|
222
|
+
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
#endif
|