umappp 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE.txt +25 -0
- data/README.md +110 -0
- data/ext/umappp/extconf.rb +25 -0
- data/ext/umappp/numo.hpp +867 -0
- data/ext/umappp/umappp.cpp +225 -0
- data/lib/umappp/version.rb +5 -0
- data/lib/umappp.rb +41 -0
- data/vendor/Eigen/Cholesky +45 -0
- data/vendor/Eigen/CholmodSupport +48 -0
- data/vendor/Eigen/Core +384 -0
- data/vendor/Eigen/Dense +7 -0
- data/vendor/Eigen/Eigen +2 -0
- data/vendor/Eigen/Eigenvalues +60 -0
- data/vendor/Eigen/Geometry +59 -0
- data/vendor/Eigen/Householder +29 -0
- data/vendor/Eigen/IterativeLinearSolvers +48 -0
- data/vendor/Eigen/Jacobi +32 -0
- data/vendor/Eigen/KLUSupport +41 -0
- data/vendor/Eigen/LU +47 -0
- data/vendor/Eigen/MetisSupport +35 -0
- data/vendor/Eigen/OrderingMethods +70 -0
- data/vendor/Eigen/PaStiXSupport +49 -0
- data/vendor/Eigen/PardisoSupport +35 -0
- data/vendor/Eigen/QR +50 -0
- data/vendor/Eigen/QtAlignedMalloc +39 -0
- data/vendor/Eigen/SPQRSupport +34 -0
- data/vendor/Eigen/SVD +50 -0
- data/vendor/Eigen/Sparse +34 -0
- data/vendor/Eigen/SparseCholesky +37 -0
- data/vendor/Eigen/SparseCore +69 -0
- data/vendor/Eigen/SparseLU +50 -0
- data/vendor/Eigen/SparseQR +36 -0
- data/vendor/Eigen/StdDeque +27 -0
- data/vendor/Eigen/StdList +26 -0
- data/vendor/Eigen/StdVector +27 -0
- data/vendor/Eigen/SuperLUSupport +64 -0
- data/vendor/Eigen/UmfPackSupport +40 -0
- data/vendor/Eigen/src/Cholesky/LDLT.h +688 -0
- data/vendor/Eigen/src/Cholesky/LLT.h +558 -0
- data/vendor/Eigen/src/Cholesky/LLT_LAPACKE.h +99 -0
- data/vendor/Eigen/src/CholmodSupport/CholmodSupport.h +682 -0
- data/vendor/Eigen/src/Core/ArithmeticSequence.h +413 -0
- data/vendor/Eigen/src/Core/Array.h +417 -0
- data/vendor/Eigen/src/Core/ArrayBase.h +226 -0
- data/vendor/Eigen/src/Core/ArrayWrapper.h +209 -0
- data/vendor/Eigen/src/Core/Assign.h +90 -0
- data/vendor/Eigen/src/Core/AssignEvaluator.h +1010 -0
- data/vendor/Eigen/src/Core/Assign_MKL.h +178 -0
- data/vendor/Eigen/src/Core/BandMatrix.h +353 -0
- data/vendor/Eigen/src/Core/Block.h +448 -0
- data/vendor/Eigen/src/Core/BooleanRedux.h +162 -0
- data/vendor/Eigen/src/Core/CommaInitializer.h +164 -0
- data/vendor/Eigen/src/Core/ConditionEstimator.h +175 -0
- data/vendor/Eigen/src/Core/CoreEvaluators.h +1741 -0
- data/vendor/Eigen/src/Core/CoreIterators.h +132 -0
- data/vendor/Eigen/src/Core/CwiseBinaryOp.h +183 -0
- data/vendor/Eigen/src/Core/CwiseNullaryOp.h +1001 -0
- data/vendor/Eigen/src/Core/CwiseTernaryOp.h +197 -0
- data/vendor/Eigen/src/Core/CwiseUnaryOp.h +103 -0
- data/vendor/Eigen/src/Core/CwiseUnaryView.h +132 -0
- data/vendor/Eigen/src/Core/DenseBase.h +701 -0
- data/vendor/Eigen/src/Core/DenseCoeffsBase.h +685 -0
- data/vendor/Eigen/src/Core/DenseStorage.h +652 -0
- data/vendor/Eigen/src/Core/Diagonal.h +258 -0
- data/vendor/Eigen/src/Core/DiagonalMatrix.h +391 -0
- data/vendor/Eigen/src/Core/DiagonalProduct.h +28 -0
- data/vendor/Eigen/src/Core/Dot.h +318 -0
- data/vendor/Eigen/src/Core/EigenBase.h +160 -0
- data/vendor/Eigen/src/Core/ForceAlignedAccess.h +150 -0
- data/vendor/Eigen/src/Core/Fuzzy.h +155 -0
- data/vendor/Eigen/src/Core/GeneralProduct.h +465 -0
- data/vendor/Eigen/src/Core/GenericPacketMath.h +1040 -0
- data/vendor/Eigen/src/Core/GlobalFunctions.h +194 -0
- data/vendor/Eigen/src/Core/IO.h +258 -0
- data/vendor/Eigen/src/Core/IndexedView.h +237 -0
- data/vendor/Eigen/src/Core/Inverse.h +117 -0
- data/vendor/Eigen/src/Core/Map.h +171 -0
- data/vendor/Eigen/src/Core/MapBase.h +310 -0
- data/vendor/Eigen/src/Core/MathFunctions.h +2057 -0
- data/vendor/Eigen/src/Core/MathFunctionsImpl.h +200 -0
- data/vendor/Eigen/src/Core/Matrix.h +565 -0
- data/vendor/Eigen/src/Core/MatrixBase.h +547 -0
- data/vendor/Eigen/src/Core/NestByValue.h +85 -0
- data/vendor/Eigen/src/Core/NoAlias.h +109 -0
- data/vendor/Eigen/src/Core/NumTraits.h +335 -0
- data/vendor/Eigen/src/Core/PartialReduxEvaluator.h +232 -0
- data/vendor/Eigen/src/Core/PermutationMatrix.h +605 -0
- data/vendor/Eigen/src/Core/PlainObjectBase.h +1128 -0
- data/vendor/Eigen/src/Core/Product.h +191 -0
- data/vendor/Eigen/src/Core/ProductEvaluators.h +1179 -0
- data/vendor/Eigen/src/Core/Random.h +218 -0
- data/vendor/Eigen/src/Core/Redux.h +515 -0
- data/vendor/Eigen/src/Core/Ref.h +381 -0
- data/vendor/Eigen/src/Core/Replicate.h +142 -0
- data/vendor/Eigen/src/Core/Reshaped.h +454 -0
- data/vendor/Eigen/src/Core/ReturnByValue.h +119 -0
- data/vendor/Eigen/src/Core/Reverse.h +217 -0
- data/vendor/Eigen/src/Core/Select.h +164 -0
- data/vendor/Eigen/src/Core/SelfAdjointView.h +365 -0
- data/vendor/Eigen/src/Core/SelfCwiseBinaryOp.h +47 -0
- data/vendor/Eigen/src/Core/Solve.h +188 -0
- data/vendor/Eigen/src/Core/SolveTriangular.h +235 -0
- data/vendor/Eigen/src/Core/SolverBase.h +168 -0
- data/vendor/Eigen/src/Core/StableNorm.h +251 -0
- data/vendor/Eigen/src/Core/StlIterators.h +463 -0
- data/vendor/Eigen/src/Core/Stride.h +116 -0
- data/vendor/Eigen/src/Core/Swap.h +68 -0
- data/vendor/Eigen/src/Core/Transpose.h +464 -0
- data/vendor/Eigen/src/Core/Transpositions.h +386 -0
- data/vendor/Eigen/src/Core/TriangularMatrix.h +1001 -0
- data/vendor/Eigen/src/Core/VectorBlock.h +96 -0
- data/vendor/Eigen/src/Core/VectorwiseOp.h +784 -0
- data/vendor/Eigen/src/Core/Visitor.h +381 -0
- data/vendor/Eigen/src/Core/arch/AVX/Complex.h +372 -0
- data/vendor/Eigen/src/Core/arch/AVX/MathFunctions.h +228 -0
- data/vendor/Eigen/src/Core/arch/AVX/PacketMath.h +1574 -0
- data/vendor/Eigen/src/Core/arch/AVX/TypeCasting.h +115 -0
- data/vendor/Eigen/src/Core/arch/AVX512/Complex.h +422 -0
- data/vendor/Eigen/src/Core/arch/AVX512/MathFunctions.h +362 -0
- data/vendor/Eigen/src/Core/arch/AVX512/PacketMath.h +2303 -0
- data/vendor/Eigen/src/Core/arch/AVX512/TypeCasting.h +89 -0
- data/vendor/Eigen/src/Core/arch/AltiVec/Complex.h +417 -0
- data/vendor/Eigen/src/Core/arch/AltiVec/MathFunctions.h +90 -0
- data/vendor/Eigen/src/Core/arch/AltiVec/MatrixProduct.h +2937 -0
- data/vendor/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h +221 -0
- data/vendor/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h +629 -0
- data/vendor/Eigen/src/Core/arch/AltiVec/PacketMath.h +2711 -0
- data/vendor/Eigen/src/Core/arch/CUDA/Complex.h +258 -0
- data/vendor/Eigen/src/Core/arch/Default/BFloat16.h +700 -0
- data/vendor/Eigen/src/Core/arch/Default/ConjHelper.h +117 -0
- data/vendor/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +1649 -0
- data/vendor/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +110 -0
- data/vendor/Eigen/src/Core/arch/Default/Half.h +942 -0
- data/vendor/Eigen/src/Core/arch/Default/Settings.h +49 -0
- data/vendor/Eigen/src/Core/arch/Default/TypeCasting.h +120 -0
- data/vendor/Eigen/src/Core/arch/GPU/MathFunctions.h +103 -0
- data/vendor/Eigen/src/Core/arch/GPU/PacketMath.h +1685 -0
- data/vendor/Eigen/src/Core/arch/GPU/TypeCasting.h +80 -0
- data/vendor/Eigen/src/Core/arch/HIP/hcc/math_constants.h +23 -0
- data/vendor/Eigen/src/Core/arch/MSA/Complex.h +648 -0
- data/vendor/Eigen/src/Core/arch/MSA/MathFunctions.h +387 -0
- data/vendor/Eigen/src/Core/arch/MSA/PacketMath.h +1233 -0
- data/vendor/Eigen/src/Core/arch/NEON/Complex.h +584 -0
- data/vendor/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h +183 -0
- data/vendor/Eigen/src/Core/arch/NEON/MathFunctions.h +75 -0
- data/vendor/Eigen/src/Core/arch/NEON/PacketMath.h +4587 -0
- data/vendor/Eigen/src/Core/arch/NEON/TypeCasting.h +1419 -0
- data/vendor/Eigen/src/Core/arch/SSE/Complex.h +351 -0
- data/vendor/Eigen/src/Core/arch/SSE/MathFunctions.h +199 -0
- data/vendor/Eigen/src/Core/arch/SSE/PacketMath.h +1505 -0
- data/vendor/Eigen/src/Core/arch/SSE/TypeCasting.h +142 -0
- data/vendor/Eigen/src/Core/arch/SVE/MathFunctions.h +44 -0
- data/vendor/Eigen/src/Core/arch/SVE/PacketMath.h +752 -0
- data/vendor/Eigen/src/Core/arch/SVE/TypeCasting.h +49 -0
- data/vendor/Eigen/src/Core/arch/SYCL/InteropHeaders.h +232 -0
- data/vendor/Eigen/src/Core/arch/SYCL/MathFunctions.h +301 -0
- data/vendor/Eigen/src/Core/arch/SYCL/PacketMath.h +670 -0
- data/vendor/Eigen/src/Core/arch/SYCL/SyclMemoryModel.h +694 -0
- data/vendor/Eigen/src/Core/arch/SYCL/TypeCasting.h +85 -0
- data/vendor/Eigen/src/Core/arch/ZVector/Complex.h +426 -0
- data/vendor/Eigen/src/Core/arch/ZVector/MathFunctions.h +233 -0
- data/vendor/Eigen/src/Core/arch/ZVector/PacketMath.h +1060 -0
- data/vendor/Eigen/src/Core/functors/AssignmentFunctors.h +177 -0
- data/vendor/Eigen/src/Core/functors/BinaryFunctors.h +541 -0
- data/vendor/Eigen/src/Core/functors/NullaryFunctors.h +189 -0
- data/vendor/Eigen/src/Core/functors/StlFunctors.h +166 -0
- data/vendor/Eigen/src/Core/functors/TernaryFunctors.h +25 -0
- data/vendor/Eigen/src/Core/functors/UnaryFunctors.h +1131 -0
- data/vendor/Eigen/src/Core/products/GeneralBlockPanelKernel.h +2645 -0
- data/vendor/Eigen/src/Core/products/GeneralMatrixMatrix.h +517 -0
- data/vendor/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +317 -0
- data/vendor/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +145 -0
- data/vendor/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h +124 -0
- data/vendor/Eigen/src/Core/products/GeneralMatrixVector.h +518 -0
- data/vendor/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h +136 -0
- data/vendor/Eigen/src/Core/products/Parallelizer.h +180 -0
- data/vendor/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +544 -0
- data/vendor/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h +295 -0
- data/vendor/Eigen/src/Core/products/SelfadjointMatrixVector.h +262 -0
- data/vendor/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h +118 -0
- data/vendor/Eigen/src/Core/products/SelfadjointProduct.h +133 -0
- data/vendor/Eigen/src/Core/products/SelfadjointRank2Update.h +94 -0
- data/vendor/Eigen/src/Core/products/TriangularMatrixMatrix.h +472 -0
- data/vendor/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h +317 -0
- data/vendor/Eigen/src/Core/products/TriangularMatrixVector.h +350 -0
- data/vendor/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h +255 -0
- data/vendor/Eigen/src/Core/products/TriangularSolverMatrix.h +337 -0
- data/vendor/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h +167 -0
- data/vendor/Eigen/src/Core/products/TriangularSolverVector.h +148 -0
- data/vendor/Eigen/src/Core/util/BlasUtil.h +583 -0
- data/vendor/Eigen/src/Core/util/ConfigureVectorization.h +512 -0
- data/vendor/Eigen/src/Core/util/Constants.h +563 -0
- data/vendor/Eigen/src/Core/util/DisableStupidWarnings.h +106 -0
- data/vendor/Eigen/src/Core/util/ForwardDeclarations.h +322 -0
- data/vendor/Eigen/src/Core/util/IndexedViewHelper.h +186 -0
- data/vendor/Eigen/src/Core/util/IntegralConstant.h +272 -0
- data/vendor/Eigen/src/Core/util/MKL_support.h +137 -0
- data/vendor/Eigen/src/Core/util/Macros.h +1464 -0
- data/vendor/Eigen/src/Core/util/Memory.h +1163 -0
- data/vendor/Eigen/src/Core/util/Meta.h +812 -0
- data/vendor/Eigen/src/Core/util/NonMPL2.h +3 -0
- data/vendor/Eigen/src/Core/util/ReenableStupidWarnings.h +31 -0
- data/vendor/Eigen/src/Core/util/ReshapedHelper.h +51 -0
- data/vendor/Eigen/src/Core/util/StaticAssert.h +221 -0
- data/vendor/Eigen/src/Core/util/SymbolicIndex.h +293 -0
- data/vendor/Eigen/src/Core/util/XprHelper.h +856 -0
- data/vendor/Eigen/src/Eigenvalues/ComplexEigenSolver.h +346 -0
- data/vendor/Eigen/src/Eigenvalues/ComplexSchur.h +462 -0
- data/vendor/Eigen/src/Eigenvalues/ComplexSchur_LAPACKE.h +91 -0
- data/vendor/Eigen/src/Eigenvalues/EigenSolver.h +622 -0
- data/vendor/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h +418 -0
- data/vendor/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +226 -0
- data/vendor/Eigen/src/Eigenvalues/HessenbergDecomposition.h +374 -0
- data/vendor/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +158 -0
- data/vendor/Eigen/src/Eigenvalues/RealQZ.h +657 -0
- data/vendor/Eigen/src/Eigenvalues/RealSchur.h +558 -0
- data/vendor/Eigen/src/Eigenvalues/RealSchur_LAPACKE.h +77 -0
- data/vendor/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +904 -0
- data/vendor/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h +87 -0
- data/vendor/Eigen/src/Eigenvalues/Tridiagonalization.h +561 -0
- data/vendor/Eigen/src/Geometry/AlignedBox.h +486 -0
- data/vendor/Eigen/src/Geometry/AngleAxis.h +247 -0
- data/vendor/Eigen/src/Geometry/EulerAngles.h +114 -0
- data/vendor/Eigen/src/Geometry/Homogeneous.h +501 -0
- data/vendor/Eigen/src/Geometry/Hyperplane.h +282 -0
- data/vendor/Eigen/src/Geometry/OrthoMethods.h +235 -0
- data/vendor/Eigen/src/Geometry/ParametrizedLine.h +232 -0
- data/vendor/Eigen/src/Geometry/Quaternion.h +870 -0
- data/vendor/Eigen/src/Geometry/Rotation2D.h +199 -0
- data/vendor/Eigen/src/Geometry/RotationBase.h +206 -0
- data/vendor/Eigen/src/Geometry/Scaling.h +188 -0
- data/vendor/Eigen/src/Geometry/Transform.h +1563 -0
- data/vendor/Eigen/src/Geometry/Translation.h +202 -0
- data/vendor/Eigen/src/Geometry/Umeyama.h +166 -0
- data/vendor/Eigen/src/Geometry/arch/Geometry_SIMD.h +168 -0
- data/vendor/Eigen/src/Householder/BlockHouseholder.h +110 -0
- data/vendor/Eigen/src/Householder/Householder.h +176 -0
- data/vendor/Eigen/src/Householder/HouseholderSequence.h +545 -0
- data/vendor/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +226 -0
- data/vendor/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +212 -0
- data/vendor/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +229 -0
- data/vendor/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +394 -0
- data/vendor/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +453 -0
- data/vendor/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +444 -0
- data/vendor/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +198 -0
- data/vendor/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +117 -0
- data/vendor/Eigen/src/Jacobi/Jacobi.h +483 -0
- data/vendor/Eigen/src/KLUSupport/KLUSupport.h +358 -0
- data/vendor/Eigen/src/LU/Determinant.h +117 -0
- data/vendor/Eigen/src/LU/FullPivLU.h +877 -0
- data/vendor/Eigen/src/LU/InverseImpl.h +432 -0
- data/vendor/Eigen/src/LU/PartialPivLU.h +624 -0
- data/vendor/Eigen/src/LU/PartialPivLU_LAPACKE.h +83 -0
- data/vendor/Eigen/src/LU/arch/InverseSize4.h +351 -0
- data/vendor/Eigen/src/MetisSupport/MetisSupport.h +137 -0
- data/vendor/Eigen/src/OrderingMethods/Amd.h +435 -0
- data/vendor/Eigen/src/OrderingMethods/Eigen_Colamd.h +1863 -0
- data/vendor/Eigen/src/OrderingMethods/Ordering.h +153 -0
- data/vendor/Eigen/src/PaStiXSupport/PaStiXSupport.h +678 -0
- data/vendor/Eigen/src/PardisoSupport/PardisoSupport.h +545 -0
- data/vendor/Eigen/src/QR/ColPivHouseholderQR.h +674 -0
- data/vendor/Eigen/src/QR/ColPivHouseholderQR_LAPACKE.h +97 -0
- data/vendor/Eigen/src/QR/CompleteOrthogonalDecomposition.h +635 -0
- data/vendor/Eigen/src/QR/FullPivHouseholderQR.h +713 -0
- data/vendor/Eigen/src/QR/HouseholderQR.h +434 -0
- data/vendor/Eigen/src/QR/HouseholderQR_LAPACKE.h +68 -0
- data/vendor/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +335 -0
- data/vendor/Eigen/src/SVD/BDCSVD.h +1366 -0
- data/vendor/Eigen/src/SVD/JacobiSVD.h +812 -0
- data/vendor/Eigen/src/SVD/JacobiSVD_LAPACKE.h +91 -0
- data/vendor/Eigen/src/SVD/SVDBase.h +376 -0
- data/vendor/Eigen/src/SVD/UpperBidiagonalization.h +414 -0
- data/vendor/Eigen/src/SparseCholesky/SimplicialCholesky.h +697 -0
- data/vendor/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +174 -0
- data/vendor/Eigen/src/SparseCore/AmbiVector.h +378 -0
- data/vendor/Eigen/src/SparseCore/CompressedStorage.h +274 -0
- data/vendor/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +352 -0
- data/vendor/Eigen/src/SparseCore/MappedSparseMatrix.h +67 -0
- data/vendor/Eigen/src/SparseCore/SparseAssign.h +270 -0
- data/vendor/Eigen/src/SparseCore/SparseBlock.h +571 -0
- data/vendor/Eigen/src/SparseCore/SparseColEtree.h +206 -0
- data/vendor/Eigen/src/SparseCore/SparseCompressedBase.h +370 -0
- data/vendor/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +722 -0
- data/vendor/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +150 -0
- data/vendor/Eigen/src/SparseCore/SparseDenseProduct.h +342 -0
- data/vendor/Eigen/src/SparseCore/SparseDiagonalProduct.h +138 -0
- data/vendor/Eigen/src/SparseCore/SparseDot.h +98 -0
- data/vendor/Eigen/src/SparseCore/SparseFuzzy.h +29 -0
- data/vendor/Eigen/src/SparseCore/SparseMap.h +305 -0
- data/vendor/Eigen/src/SparseCore/SparseMatrix.h +1518 -0
- data/vendor/Eigen/src/SparseCore/SparseMatrixBase.h +398 -0
- data/vendor/Eigen/src/SparseCore/SparsePermutation.h +178 -0
- data/vendor/Eigen/src/SparseCore/SparseProduct.h +181 -0
- data/vendor/Eigen/src/SparseCore/SparseRedux.h +49 -0
- data/vendor/Eigen/src/SparseCore/SparseRef.h +397 -0
- data/vendor/Eigen/src/SparseCore/SparseSelfAdjointView.h +659 -0
- data/vendor/Eigen/src/SparseCore/SparseSolverBase.h +124 -0
- data/vendor/Eigen/src/SparseCore/SparseSparseProductWithPruning.h +198 -0
- data/vendor/Eigen/src/SparseCore/SparseTranspose.h +92 -0
- data/vendor/Eigen/src/SparseCore/SparseTriangularView.h +189 -0
- data/vendor/Eigen/src/SparseCore/SparseUtil.h +186 -0
- data/vendor/Eigen/src/SparseCore/SparseVector.h +478 -0
- data/vendor/Eigen/src/SparseCore/SparseView.h +254 -0
- data/vendor/Eigen/src/SparseCore/TriangularSolver.h +315 -0
- data/vendor/Eigen/src/SparseLU/SparseLU.h +923 -0
- data/vendor/Eigen/src/SparseLU/SparseLUImpl.h +66 -0
- data/vendor/Eigen/src/SparseLU/SparseLU_Memory.h +226 -0
- data/vendor/Eigen/src/SparseLU/SparseLU_Structs.h +110 -0
- data/vendor/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +375 -0
- data/vendor/Eigen/src/SparseLU/SparseLU_Utils.h +80 -0
- data/vendor/Eigen/src/SparseLU/SparseLU_column_bmod.h +181 -0
- data/vendor/Eigen/src/SparseLU/SparseLU_column_dfs.h +179 -0
- data/vendor/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h +107 -0
- data/vendor/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +280 -0
- data/vendor/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h +126 -0
- data/vendor/Eigen/src/SparseLU/SparseLU_kernel_bmod.h +130 -0
- data/vendor/Eigen/src/SparseLU/SparseLU_panel_bmod.h +223 -0
- data/vendor/Eigen/src/SparseLU/SparseLU_panel_dfs.h +258 -0
- data/vendor/Eigen/src/SparseLU/SparseLU_pivotL.h +137 -0
- data/vendor/Eigen/src/SparseLU/SparseLU_pruneL.h +136 -0
- data/vendor/Eigen/src/SparseLU/SparseLU_relax_snode.h +83 -0
- data/vendor/Eigen/src/SparseQR/SparseQR.h +758 -0
- data/vendor/Eigen/src/StlSupport/StdDeque.h +116 -0
- data/vendor/Eigen/src/StlSupport/StdList.h +106 -0
- data/vendor/Eigen/src/StlSupport/StdVector.h +131 -0
- data/vendor/Eigen/src/StlSupport/details.h +84 -0
- data/vendor/Eigen/src/SuperLUSupport/SuperLUSupport.h +1025 -0
- data/vendor/Eigen/src/UmfPackSupport/UmfPackSupport.h +642 -0
- data/vendor/Eigen/src/misc/Image.h +82 -0
- data/vendor/Eigen/src/misc/Kernel.h +79 -0
- data/vendor/Eigen/src/misc/RealSvd2x2.h +55 -0
- data/vendor/Eigen/src/misc/blas.h +440 -0
- data/vendor/Eigen/src/misc/lapack.h +152 -0
- data/vendor/Eigen/src/misc/lapacke.h +16292 -0
- data/vendor/Eigen/src/misc/lapacke_mangling.h +17 -0
- data/vendor/Eigen/src/plugins/ArrayCwiseBinaryOps.h +358 -0
- data/vendor/Eigen/src/plugins/ArrayCwiseUnaryOps.h +696 -0
- data/vendor/Eigen/src/plugins/BlockMethods.h +1442 -0
- data/vendor/Eigen/src/plugins/CommonCwiseBinaryOps.h +115 -0
- data/vendor/Eigen/src/plugins/CommonCwiseUnaryOps.h +177 -0
- data/vendor/Eigen/src/plugins/IndexedViewMethods.h +262 -0
- data/vendor/Eigen/src/plugins/MatrixCwiseBinaryOps.h +152 -0
- data/vendor/Eigen/src/plugins/MatrixCwiseUnaryOps.h +95 -0
- data/vendor/Eigen/src/plugins/ReshapedMethods.h +149 -0
- data/vendor/aarand/aarand.hpp +114 -0
- data/vendor/annoy/annoylib.h +1495 -0
- data/vendor/annoy/kissrandom.h +120 -0
- data/vendor/annoy/mman.h +242 -0
- data/vendor/hnswlib/bruteforce.h +152 -0
- data/vendor/hnswlib/hnswalg.h +1192 -0
- data/vendor/hnswlib/hnswlib.h +108 -0
- data/vendor/hnswlib/space_ip.h +282 -0
- data/vendor/hnswlib/space_l2.h +281 -0
- data/vendor/hnswlib/visited_list_pool.h +79 -0
- data/vendor/irlba/irlba.hpp +575 -0
- data/vendor/irlba/lanczos.hpp +212 -0
- data/vendor/irlba/parallel.hpp +474 -0
- data/vendor/irlba/utils.hpp +224 -0
- data/vendor/irlba/wrappers.hpp +228 -0
- data/vendor/kmeans/Base.hpp +75 -0
- data/vendor/kmeans/Details.hpp +79 -0
- data/vendor/kmeans/HartiganWong.hpp +492 -0
- data/vendor/kmeans/InitializeKmeansPP.hpp +144 -0
- data/vendor/kmeans/InitializeNone.hpp +44 -0
- data/vendor/kmeans/InitializePCAPartition.hpp +309 -0
- data/vendor/kmeans/InitializeRandom.hpp +91 -0
- data/vendor/kmeans/Kmeans.hpp +161 -0
- data/vendor/kmeans/Lloyd.hpp +134 -0
- data/vendor/kmeans/MiniBatch.hpp +269 -0
- data/vendor/kmeans/QuickSearch.hpp +179 -0
- data/vendor/kmeans/compute_centroids.hpp +32 -0
- data/vendor/kmeans/compute_wcss.hpp +27 -0
- data/vendor/kmeans/is_edge_case.hpp +42 -0
- data/vendor/kmeans/random.hpp +55 -0
- data/vendor/knncolle/Annoy/Annoy.hpp +193 -0
- data/vendor/knncolle/BruteForce/BruteForce.hpp +120 -0
- data/vendor/knncolle/Hnsw/Hnsw.hpp +225 -0
- data/vendor/knncolle/Kmknn/Kmknn.hpp +286 -0
- data/vendor/knncolle/VpTree/VpTree.hpp +256 -0
- data/vendor/knncolle/knncolle.hpp +34 -0
- data/vendor/knncolle/utils/Base.hpp +100 -0
- data/vendor/knncolle/utils/NeighborQueue.hpp +94 -0
- data/vendor/knncolle/utils/distances.hpp +98 -0
- data/vendor/knncolle/utils/find_nearest_neighbors.hpp +112 -0
- data/vendor/powerit/PowerIterations.hpp +157 -0
- data/vendor/umappp/NeighborList.hpp +37 -0
- data/vendor/umappp/Umap.hpp +662 -0
- data/vendor/umappp/combine_neighbor_sets.hpp +95 -0
- data/vendor/umappp/find_ab.hpp +157 -0
- data/vendor/umappp/neighbor_similarities.hpp +136 -0
- data/vendor/umappp/optimize_layout.hpp +285 -0
- data/vendor/umappp/spectral_init.hpp +181 -0
- data/vendor/umappp/umappp.hpp +13 -0
- metadata +465 -0
|
@@ -0,0 +1,309 @@
|
|
|
1
|
+
#ifndef KMEANS_INITIALIZE_PCA_PARTITION_HPP
|
|
2
|
+
#define KMEANS_INITIALIZE_PCA_PARTITION_HPP
|
|
3
|
+
|
|
4
|
+
#include <iostream>
|
|
5
|
+
#include <random>
|
|
6
|
+
#include <vector>
|
|
7
|
+
#include <cmath>
|
|
8
|
+
#include <algorithm>
|
|
9
|
+
#include <numeric>
|
|
10
|
+
|
|
11
|
+
#include "aarand/aarand.hpp"
|
|
12
|
+
#include "powerit/PowerIterations.hpp"
|
|
13
|
+
#include "Base.hpp"
|
|
14
|
+
|
|
15
|
+
/**
|
|
16
|
+
* @file InitializePCAPartition.hpp
|
|
17
|
+
*
|
|
18
|
+
* @brief Class for k-means initialization with PCA partitioning.
|
|
19
|
+
*/
|
|
20
|
+
namespace kmeans {
|
|
21
|
+
|
|
22
|
+
/**
|
|
23
|
+
* @brief Implements the PCA partitioning method of Su and Dy (2007).
|
|
24
|
+
*
|
|
25
|
+
* This approach involves the selection of starting points via iterative partitioning based on principal components analysis.
|
|
26
|
+
* The aim is to obtain well-separated starting points for refinement with algorithms like Hartigan-Wong or Lloyd.
|
|
27
|
+
* This is achieved by selecting the most dispersed cluster for further partitioning.
|
|
28
|
+
*
|
|
29
|
+
* We start from a single cluster containing all points.
|
|
30
|
+
* At each iteration, we select the cluster with the largest within-cluster sum of squares (WCSS);
|
|
31
|
+
* we identify the first principal component within that cluster;
|
|
32
|
+
* and we split the cluster at its center along that axis to obtain two new clusters.
|
|
33
|
+
* This is repeated until the desired number of clusters is obtained, and the centers and cluster identifiers are then reported.
|
|
34
|
+
*
|
|
35
|
+
* The original algorithm favors selection and partitioning of the largest cluster, which has the greatest chance of having the highest WCSS.
|
|
36
|
+
* For more fine-grained control, we modify the procedure to adjust the effective number of observations contributing to the WCSS.
|
|
37
|
+
* Specifically, we choose the cluster to partition based on the product of $N$ and the mean squared difference within each cluster,
|
|
38
|
+
* where $N$ is the cluster size raised to some user-specified power (i.e., the "size adjustment") between 0 and 1.
|
|
39
|
+
* An adjustment of 1 recapitulates the original algorithm, while smaller values of the size adjustment will reduce the preference towards larger clusters.
|
|
40
|
+
* A value of zero means that the cluster size is completely ignored, though this seems unwise as it causes excessive splitting of small clusters with unstable WCSS.
|
|
41
|
+
*
|
|
42
|
+
* This method is not completely deterministic as a randomization step is used in the PCA.
|
|
43
|
+
* Nonetheless, the stochasticity is likely to have a much smaller effect than in the other initialization methods.
|
|
44
|
+
*
|
|
45
|
+
* @tparam DATA_t Floating-point type for the data and centroids.
|
|
46
|
+
* @tparam CLUSTER_t Integer type for the cluster index.
|
|
47
|
+
* @tparam INDEX_t Integer type for the observation index.
|
|
48
|
+
*
|
|
49
|
+
* @seealso
|
|
50
|
+
* Su, T. and Dy, J. G. (2007).
|
|
51
|
+
* In Search of Deterministic Methods for Initializing K-Means and Gaussian Mixture Clustering,
|
|
52
|
+
* _Intelligent Data Analysis_ 11, 319-338.
|
|
53
|
+
*/
|
|
54
|
+
template<typename DATA_t = double, typename CLUSTER_t = int, typename INDEX_t = int>
|
|
55
|
+
class InitializePCAPartition : public Initialize<DATA_t, CLUSTER_t, INDEX_t> {
|
|
56
|
+
public:
|
|
57
|
+
/**
|
|
58
|
+
* @brief Default parameter settings.
|
|
59
|
+
*/
|
|
60
|
+
struct Defaults {
|
|
61
|
+
/**
|
|
62
|
+
* See `set_size_adjustment()` for more details.
|
|
63
|
+
*/
|
|
64
|
+
static constexpr DATA_t size_adjustment = 1;
|
|
65
|
+
|
|
66
|
+
/**
|
|
67
|
+
* See `set_seed()` for more details.
|
|
68
|
+
*/
|
|
69
|
+
static constexpr uint64_t seed = 6523u;
|
|
70
|
+
};
|
|
71
|
+
|
|
72
|
+
public:
|
|
73
|
+
/**
|
|
74
|
+
* @param i Maximum number of power iterations to use in `powerit::PowerIterations::set_iterations()`.
|
|
75
|
+
* @return A reference to this `InitializePCAPartition` object.
|
|
76
|
+
*/
|
|
77
|
+
InitializePCAPartition& set_iterations(int i = powerit::PowerIterations::Defaults::iterations) {
|
|
78
|
+
iters = i;
|
|
79
|
+
return *this;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
/**
|
|
83
|
+
* @param t Convergence threshold to use in `powerit::PowerIterations::set_tolerance()`.
|
|
84
|
+
* @return A reference to this `InitializePCAPartition` object.
|
|
85
|
+
*/
|
|
86
|
+
InitializePCAPartition& set_tolerance(DATA_t t = powerit::PowerIterations::Defaults::tolerance) {
|
|
87
|
+
tol = t;
|
|
88
|
+
return *this;
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
/**
|
|
92
|
+
* @param s Size adjustment value, should lie in [0, 1].
|
|
93
|
+
* @return A reference to this `InitializePCAPartition` object.
|
|
94
|
+
*/
|
|
95
|
+
InitializePCAPartition& set_size_adjustment(DATA_t s = Defaults::size_adjustment) {
|
|
96
|
+
adjust = s;
|
|
97
|
+
return *this;
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
/**
|
|
101
|
+
* @param Random seed to use to construct the PRNG for the power method.
|
|
102
|
+
* @return A reference to this `InitializePCAPartition` object.
|
|
103
|
+
*/
|
|
104
|
+
InitializePCAPartition& set_seed(uint64_t s = Defaults::seed) {
|
|
105
|
+
seed = s;
|
|
106
|
+
return *this;
|
|
107
|
+
}
|
|
108
|
+
private:
|
|
109
|
+
int iters = powerit::PowerIterations::Defaults::iterations;
|
|
110
|
+
DATA_t tol = powerit::PowerIterations::Defaults::tolerance;
|
|
111
|
+
DATA_t adjust = Defaults::size_adjustment;
|
|
112
|
+
uint64_t seed = Defaults::seed;
|
|
113
|
+
|
|
114
|
+
public:
|
|
115
|
+
/**
|
|
116
|
+
* @cond
|
|
117
|
+
*/
|
|
118
|
+
static DATA_t normalize(int ndim, DATA_t* x) {
|
|
119
|
+
DATA_t ss = 0;
|
|
120
|
+
for (int d = 0; d < ndim; ++d) {
|
|
121
|
+
ss += x[d] * x[d];
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
if (ss) {
|
|
125
|
+
ss = std::sqrt(ss);
|
|
126
|
+
for (int d = 0; d < ndim; ++d) {
|
|
127
|
+
x[d] /= ss;
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
return ss;
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
template<class Rng>
|
|
134
|
+
std::vector<DATA_t> compute_pc1(int ndim, const std::vector<INDEX_t>& chosen, const DATA_t* data, const DATA_t* center, Rng& eng) const {
|
|
135
|
+
std::vector<DATA_t> delta(ndim);
|
|
136
|
+
std::vector<DATA_t> cov(ndim * ndim);
|
|
137
|
+
|
|
138
|
+
// Computing the lower triangle of the covariance matrix.
|
|
139
|
+
for (auto i : chosen) {
|
|
140
|
+
auto dptr = data + i * ndim;
|
|
141
|
+
for (int j = 0; j < ndim; ++j) {
|
|
142
|
+
delta[j] = dptr[j] - center[j];
|
|
143
|
+
}
|
|
144
|
+
for (int j = 0; j < ndim; ++j) {
|
|
145
|
+
for (int k = 0; k <= j; ++k) {
|
|
146
|
+
cov[j * ndim + k] += delta[j] * delta[k];
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
// Filling in the other side of the matrix, to enable cache-efficient multiplication.
|
|
152
|
+
for (size_t j = 0; j < ndim; ++j) {
|
|
153
|
+
for (size_t k = j + 1; k < ndim; ++k) {
|
|
154
|
+
cov[j * ndim + k] = cov[k * ndim + j];
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
powerit::PowerIterations power;
|
|
159
|
+
power.set_iterations(iters).set_tolerance(tol);
|
|
160
|
+
|
|
161
|
+
std::vector<DATA_t> output(ndim);
|
|
162
|
+
power.run(ndim, cov.data(), output.data(), eng);
|
|
163
|
+
return output;
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
static void compute_center(int ndim, INDEX_t nobs, const DATA_t* data, DATA_t* center) {
|
|
167
|
+
std::fill(center, center + ndim, 0);
|
|
168
|
+
for (size_t i = 0; i < nobs; ++i) {
|
|
169
|
+
auto dptr = data + i * ndim;
|
|
170
|
+
for (int d = 0; d < ndim; ++d) {
|
|
171
|
+
center[d] += dptr[d];
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
for (int d = 0; d < ndim; ++d) {
|
|
175
|
+
center[d] /= nobs;
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
static void compute_center(int ndim, const std::vector<INDEX_t>& chosen, const DATA_t* data, DATA_t* center) {
|
|
180
|
+
std::fill(center, center + ndim, 0);
|
|
181
|
+
for (auto i : chosen) {
|
|
182
|
+
auto dptr = data + i * ndim;
|
|
183
|
+
for (int d = 0; d < ndim; ++d) {
|
|
184
|
+
center[d] += dptr[d];
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
for (int d = 0; d < ndim; ++d) {
|
|
188
|
+
center[d] /= chosen.size();
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
static DATA_t update_mrse(int ndim, const std::vector<INDEX_t>& chosen, const DATA_t* data, DATA_t* center) {
|
|
193
|
+
compute_center(ndim, chosen, data, center);
|
|
194
|
+
|
|
195
|
+
DATA_t curmrse = 0;
|
|
196
|
+
for (auto i : chosen) {
|
|
197
|
+
auto dptr = data + i * ndim;
|
|
198
|
+
for (int d = 0; d < ndim; ++d) {
|
|
199
|
+
DATA_t delta = dptr[d] - center[d];
|
|
200
|
+
curmrse += delta * delta;
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
return curmrse / chosen.size();
|
|
205
|
+
}
|
|
206
|
+
/**
|
|
207
|
+
* @endcond
|
|
208
|
+
*/
|
|
209
|
+
public:
|
|
210
|
+
/*
|
|
211
|
+
* @param ndim Number of dimensions.
|
|
212
|
+
* @param nobs Number of observations.
|
|
213
|
+
* @param data Pointer to an array where the dimensions are rows and the observations are columns.
|
|
214
|
+
* Data should be stored in column-major format.
|
|
215
|
+
* @param ncenters Number of centers to pick.
|
|
216
|
+
* @param[out] centers Pointer to a `ndim`-by-`ncenters` array where columns are cluster centers and rows are dimensions.
|
|
217
|
+
* On output, this will contain the final centroid locations for each cluster.
|
|
218
|
+
* Data should be stored in column-major order.
|
|
219
|
+
* @param clusters Pointer to an array of length `nobs`.
|
|
220
|
+
* This is used as a buffer and the contents on output should not be used.
|
|
221
|
+
*
|
|
222
|
+
* @return `centers` is filled with the new cluster centers.
|
|
223
|
+
* The number of filled centers is returned, see `Initializer::run()`.
|
|
224
|
+
*/
|
|
225
|
+
CLUSTER_t run(int ndim, INDEX_t nobs, const DATA_t* data, CLUSTER_t ncenters, DATA_t* centers, CLUSTER_t* clusters) {
|
|
226
|
+
if (nobs == 0) {
|
|
227
|
+
return 0;
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
std::mt19937_64 rng(seed);
|
|
231
|
+
std::vector<DATA_t> mrse(ncenters);
|
|
232
|
+
std::vector<std::vector<INDEX_t> > assignments(ncenters);
|
|
233
|
+
|
|
234
|
+
// Setting up the zero'th cluster. (No need to actually compute the
|
|
235
|
+
// MRSE at this point, as there's nothing to compare it to.)
|
|
236
|
+
compute_center(ndim, nobs, data, centers);
|
|
237
|
+
assignments[0].resize(nobs);
|
|
238
|
+
std::iota(assignments.front().begin(), assignments.front().end(), 0);
|
|
239
|
+
std::fill(clusters, clusters + nobs, 0);
|
|
240
|
+
|
|
241
|
+
for (CLUSTER_t cluster = 1; cluster < ncenters; ++cluster) {
|
|
242
|
+
DATA_t worst_ss = 0;
|
|
243
|
+
INDEX_t worst_cluster = 0;
|
|
244
|
+
for (CLUSTER_t i = 0; i < cluster; ++i) {
|
|
245
|
+
DATA_t multiplier = assignments[i].size();
|
|
246
|
+
if (adjust != 1) {
|
|
247
|
+
multiplier = std::pow(static_cast<DATA_t>(multiplier), adjust);
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
DATA_t pseudo_ss = mrse[i] * multiplier;
|
|
251
|
+
if (pseudo_ss > worst_ss) {
|
|
252
|
+
worst_ss = pseudo_ss;
|
|
253
|
+
worst_cluster = i;
|
|
254
|
+
}
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
// Extracting the principal component for this bad boy.
|
|
258
|
+
auto worst_center = centers + worst_cluster * ndim;
|
|
259
|
+
auto& worst_assignments = assignments[worst_cluster];
|
|
260
|
+
auto pc1 = compute_pc1(ndim, worst_assignments, data, worst_center, rng);
|
|
261
|
+
|
|
262
|
+
// Projecting all points in this cluster along PC1. The center lies
|
|
263
|
+
// at zero, so everything positive (on one side of the hyperplane
|
|
264
|
+
// orthogonal to PC1 and passing through the center) gets bumped to
|
|
265
|
+
// the next cluster.
|
|
266
|
+
std::vector<INDEX_t>& new_assignments = assignments[cluster];
|
|
267
|
+
std::vector<INDEX_t> worst_assignments2;
|
|
268
|
+
for (auto i : worst_assignments) {
|
|
269
|
+
auto dptr = data + i * ndim;
|
|
270
|
+
DATA_t proj = 0;
|
|
271
|
+
for (int d = 0; d < ndim; ++d) {
|
|
272
|
+
proj += (dptr[d] - worst_center[d]) * pc1[d];
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
if (proj > 0) {
|
|
276
|
+
new_assignments.push_back(i);
|
|
277
|
+
} else {
|
|
278
|
+
worst_assignments2.push_back(i);
|
|
279
|
+
}
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
// If one or the other is empty, then this entire procedure short
|
|
283
|
+
// circuits as all future iterations will just re-select this
|
|
284
|
+
// cluster (which won't get partitioned properly anyway). In the
|
|
285
|
+
// bigger picture, the quick exit out of the iterations is correct
|
|
286
|
+
// as we should only fail to partition in this manner if all points
|
|
287
|
+
// within each remaining cluster are identical.
|
|
288
|
+
if (new_assignments.empty() || worst_assignments2.empty()) {
|
|
289
|
+
return cluster;
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
for (auto i : new_assignments) {
|
|
293
|
+
clusters[i] = cluster;
|
|
294
|
+
}
|
|
295
|
+
worst_assignments.swap(worst_assignments2);
|
|
296
|
+
|
|
297
|
+
// Computing centers and MRSE.
|
|
298
|
+
auto new_center = centers + cluster * ndim;
|
|
299
|
+
mrse[cluster] = update_mrse(ndim, new_assignments, data, new_center);
|
|
300
|
+
mrse[worst_cluster] = update_mrse(ndim, worst_assignments, data, worst_center);
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
return ncenters;
|
|
304
|
+
}
|
|
305
|
+
};
|
|
306
|
+
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
#endif
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
#ifndef KMEANS_INITIALIZE_RANDOM_HPP
|
|
2
|
+
#define KMEANS_INITIALIZE_RANDOM_HPP
|
|
3
|
+
|
|
4
|
+
#include <algorithm>
|
|
5
|
+
#include <cstdint>
|
|
6
|
+
#include <random>
|
|
7
|
+
|
|
8
|
+
#include "Base.hpp"
|
|
9
|
+
#include "random.hpp"
|
|
10
|
+
|
|
11
|
+
/**
|
|
12
|
+
* @file InitializeRandom.hpp
|
|
13
|
+
*
|
|
14
|
+
* @brief Class for random initialization.
|
|
15
|
+
*/
|
|
16
|
+
|
|
17
|
+
namespace kmeans {
|
|
18
|
+
|
|
19
|
+
/**
|
|
20
|
+
* @cond
|
|
21
|
+
*/
|
|
22
|
+
template<class V, typename DATA_t>
|
|
23
|
+
void copy_into_array(const V& chosen, int ndim, const DATA_t* in, DATA_t* out) {
|
|
24
|
+
for (auto c : chosen) {
|
|
25
|
+
auto ptr = in + c * ndim;
|
|
26
|
+
std::copy(ptr, ptr + ndim, out);
|
|
27
|
+
out += ndim;
|
|
28
|
+
}
|
|
29
|
+
return;
|
|
30
|
+
}
|
|
31
|
+
/**
|
|
32
|
+
* @endcond
|
|
33
|
+
*/
|
|
34
|
+
|
|
35
|
+
/**
|
|
36
|
+
* @brief Initialize starting points by sampling random observations without replacement.
|
|
37
|
+
*
|
|
38
|
+
* @tparam DATA_t Floating-point type for the data and centroids.
|
|
39
|
+
* @tparam CLUSTER_t Integer type for the cluster index.
|
|
40
|
+
* @tparam INDEX_t Integer type for the observation index.
|
|
41
|
+
*/
|
|
42
|
+
template<typename DATA_t = double, typename CLUSTER_t = int, typename INDEX_t = int>
|
|
43
|
+
class InitializeRandom : public Initialize<DATA_t, CLUSTER_t, INDEX_t> {
|
|
44
|
+
public:
|
|
45
|
+
/**
|
|
46
|
+
* @brief Default parameter settings.
|
|
47
|
+
*/
|
|
48
|
+
struct Defaults {
|
|
49
|
+
/**
|
|
50
|
+
* See `set_seed()` for more details.
|
|
51
|
+
*/
|
|
52
|
+
static constexpr uint64_t seed = 6523u;
|
|
53
|
+
};
|
|
54
|
+
|
|
55
|
+
/**
|
|
56
|
+
* @param Random seed to use to construct the PRNG prior to sampling.
|
|
57
|
+
*
|
|
58
|
+
* @return A reference to this `InitializeRandom` object.
|
|
59
|
+
*/
|
|
60
|
+
InitializeRandom& set_seed(uint64_t s = Defaults::seed) {
|
|
61
|
+
seed = s;
|
|
62
|
+
return *this;
|
|
63
|
+
}
|
|
64
|
+
private:
|
|
65
|
+
uint64_t seed = Defaults::seed;
|
|
66
|
+
public:
|
|
67
|
+
/*
|
|
68
|
+
* @param ndim Number of dimensions.
|
|
69
|
+
* @param nobs Number of observations.
|
|
70
|
+
* @param data Pointer to an array where the dimensions are rows and the observations are columns.
|
|
71
|
+
* Data should be stored in column-major format.
|
|
72
|
+
* @param ncenters Number of centers to pick.
|
|
73
|
+
* @param[out] centers Pointer to a `ndim`-by-`ncenters` array where columns are cluster centers and rows are dimensions.
|
|
74
|
+
* On output, this will contain the final centroid locations for each cluster.
|
|
75
|
+
* Data should be stored in column-major order.
|
|
76
|
+
* @param clusters Ignored in this method.
|
|
77
|
+
*
|
|
78
|
+
* @return `centers` is filled with the new cluster centers.
|
|
79
|
+
* The number of filled centers is returned, see `Initializer::run()`.
|
|
80
|
+
*/
|
|
81
|
+
CLUSTER_t run(int ndim, INDEX_t nobs, const DATA_t* data, CLUSTER_t ncenters, DATA_t* centers, CLUSTER_t* clusters) {
|
|
82
|
+
std::mt19937_64 eng(seed);
|
|
83
|
+
auto chosen = sample_without_replacement(nobs, ncenters, eng);
|
|
84
|
+
copy_into_array(chosen, ndim, data, centers);
|
|
85
|
+
return chosen.size();
|
|
86
|
+
}
|
|
87
|
+
};
|
|
88
|
+
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
#endif
|
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
#ifndef KMEANS_KMEANS_HPP
|
|
2
|
+
#define KMEANS_KMEANS_HPP
|
|
3
|
+
|
|
4
|
+
#include "Base.hpp"
|
|
5
|
+
#include "HartiganWong.hpp"
|
|
6
|
+
#include "InitializeKmeansPP.hpp"
|
|
7
|
+
#include "Details.hpp"
|
|
8
|
+
#include <random>
|
|
9
|
+
|
|
10
|
+
/**
|
|
11
|
+
* @file Kmeans.hpp
|
|
12
|
+
*
|
|
13
|
+
* @brief Implements the full k-means clustering procedure.
|
|
14
|
+
*/
|
|
15
|
+
|
|
16
|
+
namespace kmeans {
|
|
17
|
+
|
|
18
|
+
/**
|
|
19
|
+
* @brief Top-level class to run k-means clustering.
|
|
20
|
+
*
|
|
21
|
+
* k-means clustering aims to partition a dataset of `nobs` observations into `ncenters` clusters where `ncenters` is specified in advance.
|
|
22
|
+
* Each observation is assigned to its closest cluster based on the distance to the cluster centroids.
|
|
23
|
+
* The cluster centroids themselves are chosen to minimize the sum of squared Euclidean distances from each observation to its assigned cluster.
|
|
24
|
+
* This procedure involves some heuristics to choose a good initial set of centroids (see `weighted_initialization()` for details)
|
|
25
|
+
* and to converge to a local minimum (see `HartiganWong`, `Lloyd` or `MiniBatch` for details).
|
|
26
|
+
*
|
|
27
|
+
* @tparam DATA_t Floating-point type for the data and centroids.
|
|
28
|
+
* @tparam CLUSTER_t Integer type for the cluster assignments.
|
|
29
|
+
* @tparam INDEX_t Integer type for the observation index.
|
|
30
|
+
* This should have a maximum positive value that is at least 50 times greater than the maximum expected number of observations.
|
|
31
|
+
*/
|
|
32
|
+
template<typename DATA_t = double, typename CLUSTER_t = int, typename INDEX_t = int>
|
|
33
|
+
class Kmeans {
|
|
34
|
+
public:
|
|
35
|
+
/**
|
|
36
|
+
* @brief Default parameter values for `Kmeans`.
|
|
37
|
+
*/
|
|
38
|
+
struct Defaults {
|
|
39
|
+
/**
|
|
40
|
+
* See `set_seed()` for more details.
|
|
41
|
+
*/
|
|
42
|
+
static constexpr uint64_t seed = 5489u;
|
|
43
|
+
};
|
|
44
|
+
|
|
45
|
+
private:
|
|
46
|
+
uint64_t seed = Defaults::seed;
|
|
47
|
+
|
|
48
|
+
public:
|
|
49
|
+
/**
|
|
50
|
+
* @param s Seed to use for PRNG.
|
|
51
|
+
* Defaults to default seed for the `std::mt19937_64` constructor.
|
|
52
|
+
*
|
|
53
|
+
* @return A reference to this `Kmeans` object.
|
|
54
|
+
*
|
|
55
|
+
* This seed is only used for the default `refiner` and `initializer` instances in `run()`.
|
|
56
|
+
* Otherwise, the seed from individual instances is respected.
|
|
57
|
+
*/
|
|
58
|
+
Kmeans& set_seed(uint64_t s = 5489u) {
|
|
59
|
+
seed = s;
|
|
60
|
+
return *this;
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
public:
|
|
64
|
+
/**
|
|
65
|
+
* @param ndim Number of dimensions.
|
|
66
|
+
* @param nobs Number of observations.
|
|
67
|
+
* @param[in] data Pointer to a `ndim`-by-`nobs` array where columns are observations and rows are dimensions.
|
|
68
|
+
* Data should be stored in column-major order.
|
|
69
|
+
* @param ncenters Number of cluster centers.
|
|
70
|
+
* @param[in, out] centers Pointer to a `ndim`-by-`ncenters` array where columns are cluster centers and rows are dimensions.
|
|
71
|
+
* On input, this should contain the initial centroid locations for each cluster if `set_initialization_method()` is `NONE`, otherwise it is ignored.
|
|
72
|
+
* On output, this will contain the final centroid locations for each cluster.
|
|
73
|
+
* Data should be stored in column-major order.
|
|
74
|
+
* @param[in, out] clusters Pointer to an array of length `nobs`.
|
|
75
|
+
* On input, this should contain the identity of the closest cluster for each observation if `set_initialization_method()` is `REINIT_PRECOMPUTED`, otherwise it is ignored.
|
|
76
|
+
* On output, this will contain the (0-indexed) cluster assignment for each observation.
|
|
77
|
+
* @param initializer Pointer to a `Initialize` object containing the desired k-means initialization method, e.g., `InitializeNone`, `InitializeRandom`, `InitializeKmeansPP`.
|
|
78
|
+
* If `NULL`, this defaults to a default-constructed `InitializeKmeansPP` instance.
|
|
79
|
+
* @param refiner Pointer to a `Refine` object containing the desired k-means refinement algorithm, e.g., `HartiganWong`, `Lloyd`, `MiniBatch`.
|
|
80
|
+
* If `NULL`, this defaults to a default-constructed `HartiganWong` instance.
|
|
81
|
+
*
|
|
82
|
+
* @return `centers` and `clusters` are filled, and a `Details` object is returned containing clustering statistics.
|
|
83
|
+
* Note that the actual number of clusters may be less than `ncenters` in pathological cases -
|
|
84
|
+
* check the length of `Details::sizes` and the value of `Details::status`.
|
|
85
|
+
*/
|
|
86
|
+
Details<DATA_t, INDEX_t> run(int ndim, INDEX_t nobs, const DATA_t* data, CLUSTER_t ncenters, DATA_t* centers, CLUSTER_t* clusters,
|
|
87
|
+
Initialize<DATA_t, CLUSTER_t, INDEX_t>* initializer = NULL, Refine<DATA_t, CLUSTER_t, INDEX_t>* refiner = NULL)
|
|
88
|
+
{
|
|
89
|
+
if (initializer == NULL) {
|
|
90
|
+
InitializeKmeansPP<DATA_t, CLUSTER_t, INDEX_t> init;
|
|
91
|
+
ncenters = init.run(ndim, nobs, data, ncenters, centers, clusters);
|
|
92
|
+
} else {
|
|
93
|
+
ncenters = initializer->run(ndim, nobs, data, ncenters, centers, clusters);
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
if (refiner == NULL) {
|
|
97
|
+
HartiganWong<DATA_t, CLUSTER_t, INDEX_t> hw;
|
|
98
|
+
return hw.run(ndim, nobs, data, ncenters, centers, clusters);
|
|
99
|
+
} else {
|
|
100
|
+
return refiner->run(ndim, nobs, data, ncenters, centers, clusters);
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
public:
|
|
105
|
+
/**
|
|
106
|
+
* @brief Full statistics from k-means clustering.
|
|
107
|
+
*/
|
|
108
|
+
struct Results {
|
|
109
|
+
/**
|
|
110
|
+
* @cond
|
|
111
|
+
*/
|
|
112
|
+
Results(int ndim, INDEX_t nobs, CLUSTER_t ncenters) : centers(ndim * ncenters), clusters(nobs) {}
|
|
113
|
+
/**
|
|
114
|
+
* @endcond
|
|
115
|
+
*/
|
|
116
|
+
|
|
117
|
+
/**
|
|
118
|
+
* A column-major `ndim`-by-`ncenters` array containing per-cluster centroid coordinates.
|
|
119
|
+
*/
|
|
120
|
+
std::vector<DATA_t> centers;
|
|
121
|
+
|
|
122
|
+
/**
|
|
123
|
+
* An array of length `nobs` containing 0-indexed cluster assignments for each observation.
|
|
124
|
+
*/
|
|
125
|
+
std::vector<CLUSTER_t> clusters;
|
|
126
|
+
|
|
127
|
+
/**
|
|
128
|
+
* Further details from the chosen k-means algorithm.
|
|
129
|
+
*/
|
|
130
|
+
Details<DATA_t, INDEX_t> details;
|
|
131
|
+
};
|
|
132
|
+
|
|
133
|
+
/**
|
|
134
|
+
* @param ndim Number of dimensions.
|
|
135
|
+
* @param nobs Number of observations.
|
|
136
|
+
* @param[in] data Pointer to a `ndim`-by-`nobs` array where columns are observations and rows are dimensions.
|
|
137
|
+
* Data should be stored in column-major order.
|
|
138
|
+
* @param ncenters Number of cluster centers.
|
|
139
|
+
* @param initializer Pointer to a `Initialize` object containing the desired k-means initialization method.
|
|
140
|
+
* If `NULL`, this defaults to a default-constructed `InitializeKmeansPP` instance.
|
|
141
|
+
* @param refiner Pointer to a `Refine` object containing the desired k-means refinement algorithm.
|
|
142
|
+
* If `NULL`, this defaults to a default-constructed `HartiganWong` instance.
|
|
143
|
+
*
|
|
144
|
+
* @return `centers` and `clusters` are filled, and a `Results` object is returned containing clustering statistics.
|
|
145
|
+
* See `run()` for more details.
|
|
146
|
+
*
|
|
147
|
+
* For this method, it would be unwise to initialize with any methods that use information from the existing cluster centers.
|
|
148
|
+
* We suggest using only `InitializeRandom` or InitializeKmeansPP` here.
|
|
149
|
+
*/
|
|
150
|
+
Results run(int ndim, INDEX_t nobs, const DATA_t* data, CLUSTER_t ncenters,
|
|
151
|
+
Initialize<DATA_t, CLUSTER_t, INDEX_t>* initializer = NULL, Refine<DATA_t, CLUSTER_t, INDEX_t>* refiner = NULL)
|
|
152
|
+
{
|
|
153
|
+
Results output(ndim, nobs, ncenters);
|
|
154
|
+
output.details = run(ndim, nobs, data, ncenters, output.centers.data(), output.clusters.data(), initializer, refiner);
|
|
155
|
+
return output;
|
|
156
|
+
}
|
|
157
|
+
};
|
|
158
|
+
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
#endif
|
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
#ifndef KMEANS_LLOYD_HPP
|
|
2
|
+
#define KMEANS_LLOYD_HPP
|
|
3
|
+
|
|
4
|
+
#include <vector>
|
|
5
|
+
#include <algorithm>
|
|
6
|
+
#include <numeric>
|
|
7
|
+
#include <cstdint>
|
|
8
|
+
#include <stdexcept>
|
|
9
|
+
#include <limits>
|
|
10
|
+
|
|
11
|
+
#include "Base.hpp"
|
|
12
|
+
#include "Details.hpp"
|
|
13
|
+
#include "QuickSearch.hpp"
|
|
14
|
+
#include "is_edge_case.hpp"
|
|
15
|
+
#include "compute_centroids.hpp"
|
|
16
|
+
#include "compute_wcss.hpp"
|
|
17
|
+
|
|
18
|
+
/**
|
|
19
|
+
* @file Lloyd.hpp
|
|
20
|
+
*
|
|
21
|
+
* @brief Implements the Lloyd algorithm for k-means clustering.
|
|
22
|
+
*/
|
|
23
|
+
|
|
24
|
+
namespace kmeans {
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* @brief Implements the Lloyd algorithm for k-means clustering.
|
|
28
|
+
*
|
|
29
|
+
* The Lloyd algorithm is the simplest k-means clustering algorithm,
|
|
30
|
+
* involving several iterations of batch assignments and center calculations.
|
|
31
|
+
* Specifically, we assign each observation to its closest cluster, and once all points are assigned, we recompute the cluster centroids.
|
|
32
|
+
* This is repeated until there are no reassignments or the maximum number of iterations is reached.
|
|
33
|
+
*
|
|
34
|
+
* @tparam DATA_t Floating-point type for the data and centroids.
|
|
35
|
+
* @tparam CLUSTER_t Integer type for the cluster assignments.
|
|
36
|
+
* @tparam INDEX_t Integer type for the observation index.
|
|
37
|
+
*
|
|
38
|
+
* @see
|
|
39
|
+
* Lloyd, S. P. (1982).
|
|
40
|
+
* Least squares quantization in PCM.
|
|
41
|
+
* _IEEE Transactions on Information Theory_ 28, 128-137.
|
|
42
|
+
*/
|
|
43
|
+
template<typename DATA_t = double, typename CLUSTER_t = int, typename INDEX_t = int>
|
|
44
|
+
class Lloyd : public Refine<DATA_t, CLUSTER_t, INDEX_t> {
|
|
45
|
+
public:
|
|
46
|
+
/**
|
|
47
|
+
* @brief Default parameter values for `Lloyd`.
|
|
48
|
+
*/
|
|
49
|
+
struct Defaults {
|
|
50
|
+
/**
|
|
51
|
+
* See `Lloyd::set_max_iterations()`.
|
|
52
|
+
*/
|
|
53
|
+
static constexpr int max_iterations = 10;
|
|
54
|
+
};
|
|
55
|
+
|
|
56
|
+
private:
|
|
57
|
+
int maxiter = Defaults::max_iterations;
|
|
58
|
+
|
|
59
|
+
public:
|
|
60
|
+
/**
|
|
61
|
+
* @param m Maximum number of iterations.
|
|
62
|
+
* More iterations increase the opportunity for convergence at the cost of more computational time.
|
|
63
|
+
*
|
|
64
|
+
* @return A reference to this `Lloyd` object.
|
|
65
|
+
*/
|
|
66
|
+
Lloyd& set_max_iterations(int m = Defaults::max_iterations) {
|
|
67
|
+
maxiter = m;
|
|
68
|
+
return *this;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
public:
|
|
72
|
+
Details<DATA_t, INDEX_t> run(int ndim, INDEX_t nobs, const DATA_t* data, CLUSTER_t ncenters, DATA_t* centers, CLUSTER_t* clusters) {
|
|
73
|
+
if (is_edge_case(nobs, ncenters)) {
|
|
74
|
+
return process_edge_case(ndim, nobs, data, ncenters, centers, clusters);
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
int iter = 0, status = 0;
|
|
78
|
+
std::vector<INDEX_t> sizes(ncenters);
|
|
79
|
+
std::vector<CLUSTER_t> copy(nobs);
|
|
80
|
+
|
|
81
|
+
for (iter = 1; iter <= maxiter; ++iter) {
|
|
82
|
+
// Nearest-neighbor search to assign to the closest cluster.
|
|
83
|
+
// Note that we move the `updated` check outside of this loop
|
|
84
|
+
// so that, in the future, this is more easily parallelized.
|
|
85
|
+
QuickSearch<DATA_t, CLUSTER_t> index(ndim, ncenters, centers);
|
|
86
|
+
#pragma omp parallel for
|
|
87
|
+
for (INDEX_t obs = 0; obs < nobs; ++obs) {
|
|
88
|
+
copy[obs] = index.find(data + obs * ndim);
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
bool updated = false;
|
|
92
|
+
for (INDEX_t obs = 0; obs < nobs; ++obs) {
|
|
93
|
+
if (copy[obs] != clusters[obs]) {
|
|
94
|
+
updated = true;
|
|
95
|
+
break;
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
if (!updated) {
|
|
99
|
+
break;
|
|
100
|
+
}
|
|
101
|
+
std::copy(copy.begin(), copy.end(), clusters);
|
|
102
|
+
|
|
103
|
+
// Counting the number in each cluster.
|
|
104
|
+
std::fill(sizes.begin(), sizes.end(), 0);
|
|
105
|
+
for (INDEX_t obs = 0; obs < nobs; ++obs) {
|
|
106
|
+
++sizes[clusters[obs]];
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
for (CLUSTER_t c = 0; c < ncenters; ++ c) {
|
|
110
|
+
if (!sizes[c]) {
|
|
111
|
+
status = 1;
|
|
112
|
+
break;
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
compute_centroids(ndim, nobs, data, ncenters, centers, clusters, sizes);
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
if (iter == maxiter + 1) {
|
|
120
|
+
status = 2;
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
return Details<DATA_t, INDEX_t>(
|
|
124
|
+
std::move(sizes),
|
|
125
|
+
compute_wcss(ndim, nobs, data, ncenters, centers, clusters),
|
|
126
|
+
iter,
|
|
127
|
+
status
|
|
128
|
+
);
|
|
129
|
+
}
|
|
130
|
+
};
|
|
131
|
+
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
#endif
|