umappp 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE.txt +25 -0
- data/README.md +110 -0
- data/ext/umappp/extconf.rb +25 -0
- data/ext/umappp/numo.hpp +867 -0
- data/ext/umappp/umappp.cpp +225 -0
- data/lib/umappp/version.rb +5 -0
- data/lib/umappp.rb +41 -0
- data/vendor/Eigen/Cholesky +45 -0
- data/vendor/Eigen/CholmodSupport +48 -0
- data/vendor/Eigen/Core +384 -0
- data/vendor/Eigen/Dense +7 -0
- data/vendor/Eigen/Eigen +2 -0
- data/vendor/Eigen/Eigenvalues +60 -0
- data/vendor/Eigen/Geometry +59 -0
- data/vendor/Eigen/Householder +29 -0
- data/vendor/Eigen/IterativeLinearSolvers +48 -0
- data/vendor/Eigen/Jacobi +32 -0
- data/vendor/Eigen/KLUSupport +41 -0
- data/vendor/Eigen/LU +47 -0
- data/vendor/Eigen/MetisSupport +35 -0
- data/vendor/Eigen/OrderingMethods +70 -0
- data/vendor/Eigen/PaStiXSupport +49 -0
- data/vendor/Eigen/PardisoSupport +35 -0
- data/vendor/Eigen/QR +50 -0
- data/vendor/Eigen/QtAlignedMalloc +39 -0
- data/vendor/Eigen/SPQRSupport +34 -0
- data/vendor/Eigen/SVD +50 -0
- data/vendor/Eigen/Sparse +34 -0
- data/vendor/Eigen/SparseCholesky +37 -0
- data/vendor/Eigen/SparseCore +69 -0
- data/vendor/Eigen/SparseLU +50 -0
- data/vendor/Eigen/SparseQR +36 -0
- data/vendor/Eigen/StdDeque +27 -0
- data/vendor/Eigen/StdList +26 -0
- data/vendor/Eigen/StdVector +27 -0
- data/vendor/Eigen/SuperLUSupport +64 -0
- data/vendor/Eigen/UmfPackSupport +40 -0
- data/vendor/Eigen/src/Cholesky/LDLT.h +688 -0
- data/vendor/Eigen/src/Cholesky/LLT.h +558 -0
- data/vendor/Eigen/src/Cholesky/LLT_LAPACKE.h +99 -0
- data/vendor/Eigen/src/CholmodSupport/CholmodSupport.h +682 -0
- data/vendor/Eigen/src/Core/ArithmeticSequence.h +413 -0
- data/vendor/Eigen/src/Core/Array.h +417 -0
- data/vendor/Eigen/src/Core/ArrayBase.h +226 -0
- data/vendor/Eigen/src/Core/ArrayWrapper.h +209 -0
- data/vendor/Eigen/src/Core/Assign.h +90 -0
- data/vendor/Eigen/src/Core/AssignEvaluator.h +1010 -0
- data/vendor/Eigen/src/Core/Assign_MKL.h +178 -0
- data/vendor/Eigen/src/Core/BandMatrix.h +353 -0
- data/vendor/Eigen/src/Core/Block.h +448 -0
- data/vendor/Eigen/src/Core/BooleanRedux.h +162 -0
- data/vendor/Eigen/src/Core/CommaInitializer.h +164 -0
- data/vendor/Eigen/src/Core/ConditionEstimator.h +175 -0
- data/vendor/Eigen/src/Core/CoreEvaluators.h +1741 -0
- data/vendor/Eigen/src/Core/CoreIterators.h +132 -0
- data/vendor/Eigen/src/Core/CwiseBinaryOp.h +183 -0
- data/vendor/Eigen/src/Core/CwiseNullaryOp.h +1001 -0
- data/vendor/Eigen/src/Core/CwiseTernaryOp.h +197 -0
- data/vendor/Eigen/src/Core/CwiseUnaryOp.h +103 -0
- data/vendor/Eigen/src/Core/CwiseUnaryView.h +132 -0
- data/vendor/Eigen/src/Core/DenseBase.h +701 -0
- data/vendor/Eigen/src/Core/DenseCoeffsBase.h +685 -0
- data/vendor/Eigen/src/Core/DenseStorage.h +652 -0
- data/vendor/Eigen/src/Core/Diagonal.h +258 -0
- data/vendor/Eigen/src/Core/DiagonalMatrix.h +391 -0
- data/vendor/Eigen/src/Core/DiagonalProduct.h +28 -0
- data/vendor/Eigen/src/Core/Dot.h +318 -0
- data/vendor/Eigen/src/Core/EigenBase.h +160 -0
- data/vendor/Eigen/src/Core/ForceAlignedAccess.h +150 -0
- data/vendor/Eigen/src/Core/Fuzzy.h +155 -0
- data/vendor/Eigen/src/Core/GeneralProduct.h +465 -0
- data/vendor/Eigen/src/Core/GenericPacketMath.h +1040 -0
- data/vendor/Eigen/src/Core/GlobalFunctions.h +194 -0
- data/vendor/Eigen/src/Core/IO.h +258 -0
- data/vendor/Eigen/src/Core/IndexedView.h +237 -0
- data/vendor/Eigen/src/Core/Inverse.h +117 -0
- data/vendor/Eigen/src/Core/Map.h +171 -0
- data/vendor/Eigen/src/Core/MapBase.h +310 -0
- data/vendor/Eigen/src/Core/MathFunctions.h +2057 -0
- data/vendor/Eigen/src/Core/MathFunctionsImpl.h +200 -0
- data/vendor/Eigen/src/Core/Matrix.h +565 -0
- data/vendor/Eigen/src/Core/MatrixBase.h +547 -0
- data/vendor/Eigen/src/Core/NestByValue.h +85 -0
- data/vendor/Eigen/src/Core/NoAlias.h +109 -0
- data/vendor/Eigen/src/Core/NumTraits.h +335 -0
- data/vendor/Eigen/src/Core/PartialReduxEvaluator.h +232 -0
- data/vendor/Eigen/src/Core/PermutationMatrix.h +605 -0
- data/vendor/Eigen/src/Core/PlainObjectBase.h +1128 -0
- data/vendor/Eigen/src/Core/Product.h +191 -0
- data/vendor/Eigen/src/Core/ProductEvaluators.h +1179 -0
- data/vendor/Eigen/src/Core/Random.h +218 -0
- data/vendor/Eigen/src/Core/Redux.h +515 -0
- data/vendor/Eigen/src/Core/Ref.h +381 -0
- data/vendor/Eigen/src/Core/Replicate.h +142 -0
- data/vendor/Eigen/src/Core/Reshaped.h +454 -0
- data/vendor/Eigen/src/Core/ReturnByValue.h +119 -0
- data/vendor/Eigen/src/Core/Reverse.h +217 -0
- data/vendor/Eigen/src/Core/Select.h +164 -0
- data/vendor/Eigen/src/Core/SelfAdjointView.h +365 -0
- data/vendor/Eigen/src/Core/SelfCwiseBinaryOp.h +47 -0
- data/vendor/Eigen/src/Core/Solve.h +188 -0
- data/vendor/Eigen/src/Core/SolveTriangular.h +235 -0
- data/vendor/Eigen/src/Core/SolverBase.h +168 -0
- data/vendor/Eigen/src/Core/StableNorm.h +251 -0
- data/vendor/Eigen/src/Core/StlIterators.h +463 -0
- data/vendor/Eigen/src/Core/Stride.h +116 -0
- data/vendor/Eigen/src/Core/Swap.h +68 -0
- data/vendor/Eigen/src/Core/Transpose.h +464 -0
- data/vendor/Eigen/src/Core/Transpositions.h +386 -0
- data/vendor/Eigen/src/Core/TriangularMatrix.h +1001 -0
- data/vendor/Eigen/src/Core/VectorBlock.h +96 -0
- data/vendor/Eigen/src/Core/VectorwiseOp.h +784 -0
- data/vendor/Eigen/src/Core/Visitor.h +381 -0
- data/vendor/Eigen/src/Core/arch/AVX/Complex.h +372 -0
- data/vendor/Eigen/src/Core/arch/AVX/MathFunctions.h +228 -0
- data/vendor/Eigen/src/Core/arch/AVX/PacketMath.h +1574 -0
- data/vendor/Eigen/src/Core/arch/AVX/TypeCasting.h +115 -0
- data/vendor/Eigen/src/Core/arch/AVX512/Complex.h +422 -0
- data/vendor/Eigen/src/Core/arch/AVX512/MathFunctions.h +362 -0
- data/vendor/Eigen/src/Core/arch/AVX512/PacketMath.h +2303 -0
- data/vendor/Eigen/src/Core/arch/AVX512/TypeCasting.h +89 -0
- data/vendor/Eigen/src/Core/arch/AltiVec/Complex.h +417 -0
- data/vendor/Eigen/src/Core/arch/AltiVec/MathFunctions.h +90 -0
- data/vendor/Eigen/src/Core/arch/AltiVec/MatrixProduct.h +2937 -0
- data/vendor/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h +221 -0
- data/vendor/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h +629 -0
- data/vendor/Eigen/src/Core/arch/AltiVec/PacketMath.h +2711 -0
- data/vendor/Eigen/src/Core/arch/CUDA/Complex.h +258 -0
- data/vendor/Eigen/src/Core/arch/Default/BFloat16.h +700 -0
- data/vendor/Eigen/src/Core/arch/Default/ConjHelper.h +117 -0
- data/vendor/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +1649 -0
- data/vendor/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +110 -0
- data/vendor/Eigen/src/Core/arch/Default/Half.h +942 -0
- data/vendor/Eigen/src/Core/arch/Default/Settings.h +49 -0
- data/vendor/Eigen/src/Core/arch/Default/TypeCasting.h +120 -0
- data/vendor/Eigen/src/Core/arch/GPU/MathFunctions.h +103 -0
- data/vendor/Eigen/src/Core/arch/GPU/PacketMath.h +1685 -0
- data/vendor/Eigen/src/Core/arch/GPU/TypeCasting.h +80 -0
- data/vendor/Eigen/src/Core/arch/HIP/hcc/math_constants.h +23 -0
- data/vendor/Eigen/src/Core/arch/MSA/Complex.h +648 -0
- data/vendor/Eigen/src/Core/arch/MSA/MathFunctions.h +387 -0
- data/vendor/Eigen/src/Core/arch/MSA/PacketMath.h +1233 -0
- data/vendor/Eigen/src/Core/arch/NEON/Complex.h +584 -0
- data/vendor/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h +183 -0
- data/vendor/Eigen/src/Core/arch/NEON/MathFunctions.h +75 -0
- data/vendor/Eigen/src/Core/arch/NEON/PacketMath.h +4587 -0
- data/vendor/Eigen/src/Core/arch/NEON/TypeCasting.h +1419 -0
- data/vendor/Eigen/src/Core/arch/SSE/Complex.h +351 -0
- data/vendor/Eigen/src/Core/arch/SSE/MathFunctions.h +199 -0
- data/vendor/Eigen/src/Core/arch/SSE/PacketMath.h +1505 -0
- data/vendor/Eigen/src/Core/arch/SSE/TypeCasting.h +142 -0
- data/vendor/Eigen/src/Core/arch/SVE/MathFunctions.h +44 -0
- data/vendor/Eigen/src/Core/arch/SVE/PacketMath.h +752 -0
- data/vendor/Eigen/src/Core/arch/SVE/TypeCasting.h +49 -0
- data/vendor/Eigen/src/Core/arch/SYCL/InteropHeaders.h +232 -0
- data/vendor/Eigen/src/Core/arch/SYCL/MathFunctions.h +301 -0
- data/vendor/Eigen/src/Core/arch/SYCL/PacketMath.h +670 -0
- data/vendor/Eigen/src/Core/arch/SYCL/SyclMemoryModel.h +694 -0
- data/vendor/Eigen/src/Core/arch/SYCL/TypeCasting.h +85 -0
- data/vendor/Eigen/src/Core/arch/ZVector/Complex.h +426 -0
- data/vendor/Eigen/src/Core/arch/ZVector/MathFunctions.h +233 -0
- data/vendor/Eigen/src/Core/arch/ZVector/PacketMath.h +1060 -0
- data/vendor/Eigen/src/Core/functors/AssignmentFunctors.h +177 -0
- data/vendor/Eigen/src/Core/functors/BinaryFunctors.h +541 -0
- data/vendor/Eigen/src/Core/functors/NullaryFunctors.h +189 -0
- data/vendor/Eigen/src/Core/functors/StlFunctors.h +166 -0
- data/vendor/Eigen/src/Core/functors/TernaryFunctors.h +25 -0
- data/vendor/Eigen/src/Core/functors/UnaryFunctors.h +1131 -0
- data/vendor/Eigen/src/Core/products/GeneralBlockPanelKernel.h +2645 -0
- data/vendor/Eigen/src/Core/products/GeneralMatrixMatrix.h +517 -0
- data/vendor/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +317 -0
- data/vendor/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +145 -0
- data/vendor/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h +124 -0
- data/vendor/Eigen/src/Core/products/GeneralMatrixVector.h +518 -0
- data/vendor/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h +136 -0
- data/vendor/Eigen/src/Core/products/Parallelizer.h +180 -0
- data/vendor/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +544 -0
- data/vendor/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h +295 -0
- data/vendor/Eigen/src/Core/products/SelfadjointMatrixVector.h +262 -0
- data/vendor/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h +118 -0
- data/vendor/Eigen/src/Core/products/SelfadjointProduct.h +133 -0
- data/vendor/Eigen/src/Core/products/SelfadjointRank2Update.h +94 -0
- data/vendor/Eigen/src/Core/products/TriangularMatrixMatrix.h +472 -0
- data/vendor/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h +317 -0
- data/vendor/Eigen/src/Core/products/TriangularMatrixVector.h +350 -0
- data/vendor/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h +255 -0
- data/vendor/Eigen/src/Core/products/TriangularSolverMatrix.h +337 -0
- data/vendor/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h +167 -0
- data/vendor/Eigen/src/Core/products/TriangularSolverVector.h +148 -0
- data/vendor/Eigen/src/Core/util/BlasUtil.h +583 -0
- data/vendor/Eigen/src/Core/util/ConfigureVectorization.h +512 -0
- data/vendor/Eigen/src/Core/util/Constants.h +563 -0
- data/vendor/Eigen/src/Core/util/DisableStupidWarnings.h +106 -0
- data/vendor/Eigen/src/Core/util/ForwardDeclarations.h +322 -0
- data/vendor/Eigen/src/Core/util/IndexedViewHelper.h +186 -0
- data/vendor/Eigen/src/Core/util/IntegralConstant.h +272 -0
- data/vendor/Eigen/src/Core/util/MKL_support.h +137 -0
- data/vendor/Eigen/src/Core/util/Macros.h +1464 -0
- data/vendor/Eigen/src/Core/util/Memory.h +1163 -0
- data/vendor/Eigen/src/Core/util/Meta.h +812 -0
- data/vendor/Eigen/src/Core/util/NonMPL2.h +3 -0
- data/vendor/Eigen/src/Core/util/ReenableStupidWarnings.h +31 -0
- data/vendor/Eigen/src/Core/util/ReshapedHelper.h +51 -0
- data/vendor/Eigen/src/Core/util/StaticAssert.h +221 -0
- data/vendor/Eigen/src/Core/util/SymbolicIndex.h +293 -0
- data/vendor/Eigen/src/Core/util/XprHelper.h +856 -0
- data/vendor/Eigen/src/Eigenvalues/ComplexEigenSolver.h +346 -0
- data/vendor/Eigen/src/Eigenvalues/ComplexSchur.h +462 -0
- data/vendor/Eigen/src/Eigenvalues/ComplexSchur_LAPACKE.h +91 -0
- data/vendor/Eigen/src/Eigenvalues/EigenSolver.h +622 -0
- data/vendor/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h +418 -0
- data/vendor/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +226 -0
- data/vendor/Eigen/src/Eigenvalues/HessenbergDecomposition.h +374 -0
- data/vendor/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +158 -0
- data/vendor/Eigen/src/Eigenvalues/RealQZ.h +657 -0
- data/vendor/Eigen/src/Eigenvalues/RealSchur.h +558 -0
- data/vendor/Eigen/src/Eigenvalues/RealSchur_LAPACKE.h +77 -0
- data/vendor/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +904 -0
- data/vendor/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h +87 -0
- data/vendor/Eigen/src/Eigenvalues/Tridiagonalization.h +561 -0
- data/vendor/Eigen/src/Geometry/AlignedBox.h +486 -0
- data/vendor/Eigen/src/Geometry/AngleAxis.h +247 -0
- data/vendor/Eigen/src/Geometry/EulerAngles.h +114 -0
- data/vendor/Eigen/src/Geometry/Homogeneous.h +501 -0
- data/vendor/Eigen/src/Geometry/Hyperplane.h +282 -0
- data/vendor/Eigen/src/Geometry/OrthoMethods.h +235 -0
- data/vendor/Eigen/src/Geometry/ParametrizedLine.h +232 -0
- data/vendor/Eigen/src/Geometry/Quaternion.h +870 -0
- data/vendor/Eigen/src/Geometry/Rotation2D.h +199 -0
- data/vendor/Eigen/src/Geometry/RotationBase.h +206 -0
- data/vendor/Eigen/src/Geometry/Scaling.h +188 -0
- data/vendor/Eigen/src/Geometry/Transform.h +1563 -0
- data/vendor/Eigen/src/Geometry/Translation.h +202 -0
- data/vendor/Eigen/src/Geometry/Umeyama.h +166 -0
- data/vendor/Eigen/src/Geometry/arch/Geometry_SIMD.h +168 -0
- data/vendor/Eigen/src/Householder/BlockHouseholder.h +110 -0
- data/vendor/Eigen/src/Householder/Householder.h +176 -0
- data/vendor/Eigen/src/Householder/HouseholderSequence.h +545 -0
- data/vendor/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +226 -0
- data/vendor/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +212 -0
- data/vendor/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +229 -0
- data/vendor/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +394 -0
- data/vendor/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +453 -0
- data/vendor/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +444 -0
- data/vendor/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +198 -0
- data/vendor/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +117 -0
- data/vendor/Eigen/src/Jacobi/Jacobi.h +483 -0
- data/vendor/Eigen/src/KLUSupport/KLUSupport.h +358 -0
- data/vendor/Eigen/src/LU/Determinant.h +117 -0
- data/vendor/Eigen/src/LU/FullPivLU.h +877 -0
- data/vendor/Eigen/src/LU/InverseImpl.h +432 -0
- data/vendor/Eigen/src/LU/PartialPivLU.h +624 -0
- data/vendor/Eigen/src/LU/PartialPivLU_LAPACKE.h +83 -0
- data/vendor/Eigen/src/LU/arch/InverseSize4.h +351 -0
- data/vendor/Eigen/src/MetisSupport/MetisSupport.h +137 -0
- data/vendor/Eigen/src/OrderingMethods/Amd.h +435 -0
- data/vendor/Eigen/src/OrderingMethods/Eigen_Colamd.h +1863 -0
- data/vendor/Eigen/src/OrderingMethods/Ordering.h +153 -0
- data/vendor/Eigen/src/PaStiXSupport/PaStiXSupport.h +678 -0
- data/vendor/Eigen/src/PardisoSupport/PardisoSupport.h +545 -0
- data/vendor/Eigen/src/QR/ColPivHouseholderQR.h +674 -0
- data/vendor/Eigen/src/QR/ColPivHouseholderQR_LAPACKE.h +97 -0
- data/vendor/Eigen/src/QR/CompleteOrthogonalDecomposition.h +635 -0
- data/vendor/Eigen/src/QR/FullPivHouseholderQR.h +713 -0
- data/vendor/Eigen/src/QR/HouseholderQR.h +434 -0
- data/vendor/Eigen/src/QR/HouseholderQR_LAPACKE.h +68 -0
- data/vendor/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +335 -0
- data/vendor/Eigen/src/SVD/BDCSVD.h +1366 -0
- data/vendor/Eigen/src/SVD/JacobiSVD.h +812 -0
- data/vendor/Eigen/src/SVD/JacobiSVD_LAPACKE.h +91 -0
- data/vendor/Eigen/src/SVD/SVDBase.h +376 -0
- data/vendor/Eigen/src/SVD/UpperBidiagonalization.h +414 -0
- data/vendor/Eigen/src/SparseCholesky/SimplicialCholesky.h +697 -0
- data/vendor/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +174 -0
- data/vendor/Eigen/src/SparseCore/AmbiVector.h +378 -0
- data/vendor/Eigen/src/SparseCore/CompressedStorage.h +274 -0
- data/vendor/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +352 -0
- data/vendor/Eigen/src/SparseCore/MappedSparseMatrix.h +67 -0
- data/vendor/Eigen/src/SparseCore/SparseAssign.h +270 -0
- data/vendor/Eigen/src/SparseCore/SparseBlock.h +571 -0
- data/vendor/Eigen/src/SparseCore/SparseColEtree.h +206 -0
- data/vendor/Eigen/src/SparseCore/SparseCompressedBase.h +370 -0
- data/vendor/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +722 -0
- data/vendor/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +150 -0
- data/vendor/Eigen/src/SparseCore/SparseDenseProduct.h +342 -0
- data/vendor/Eigen/src/SparseCore/SparseDiagonalProduct.h +138 -0
- data/vendor/Eigen/src/SparseCore/SparseDot.h +98 -0
- data/vendor/Eigen/src/SparseCore/SparseFuzzy.h +29 -0
- data/vendor/Eigen/src/SparseCore/SparseMap.h +305 -0
- data/vendor/Eigen/src/SparseCore/SparseMatrix.h +1518 -0
- data/vendor/Eigen/src/SparseCore/SparseMatrixBase.h +398 -0
- data/vendor/Eigen/src/SparseCore/SparsePermutation.h +178 -0
- data/vendor/Eigen/src/SparseCore/SparseProduct.h +181 -0
- data/vendor/Eigen/src/SparseCore/SparseRedux.h +49 -0
- data/vendor/Eigen/src/SparseCore/SparseRef.h +397 -0
- data/vendor/Eigen/src/SparseCore/SparseSelfAdjointView.h +659 -0
- data/vendor/Eigen/src/SparseCore/SparseSolverBase.h +124 -0
- data/vendor/Eigen/src/SparseCore/SparseSparseProductWithPruning.h +198 -0
- data/vendor/Eigen/src/SparseCore/SparseTranspose.h +92 -0
- data/vendor/Eigen/src/SparseCore/SparseTriangularView.h +189 -0
- data/vendor/Eigen/src/SparseCore/SparseUtil.h +186 -0
- data/vendor/Eigen/src/SparseCore/SparseVector.h +478 -0
- data/vendor/Eigen/src/SparseCore/SparseView.h +254 -0
- data/vendor/Eigen/src/SparseCore/TriangularSolver.h +315 -0
- data/vendor/Eigen/src/SparseLU/SparseLU.h +923 -0
- data/vendor/Eigen/src/SparseLU/SparseLUImpl.h +66 -0
- data/vendor/Eigen/src/SparseLU/SparseLU_Memory.h +226 -0
- data/vendor/Eigen/src/SparseLU/SparseLU_Structs.h +110 -0
- data/vendor/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +375 -0
- data/vendor/Eigen/src/SparseLU/SparseLU_Utils.h +80 -0
- data/vendor/Eigen/src/SparseLU/SparseLU_column_bmod.h +181 -0
- data/vendor/Eigen/src/SparseLU/SparseLU_column_dfs.h +179 -0
- data/vendor/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h +107 -0
- data/vendor/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +280 -0
- data/vendor/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h +126 -0
- data/vendor/Eigen/src/SparseLU/SparseLU_kernel_bmod.h +130 -0
- data/vendor/Eigen/src/SparseLU/SparseLU_panel_bmod.h +223 -0
- data/vendor/Eigen/src/SparseLU/SparseLU_panel_dfs.h +258 -0
- data/vendor/Eigen/src/SparseLU/SparseLU_pivotL.h +137 -0
- data/vendor/Eigen/src/SparseLU/SparseLU_pruneL.h +136 -0
- data/vendor/Eigen/src/SparseLU/SparseLU_relax_snode.h +83 -0
- data/vendor/Eigen/src/SparseQR/SparseQR.h +758 -0
- data/vendor/Eigen/src/StlSupport/StdDeque.h +116 -0
- data/vendor/Eigen/src/StlSupport/StdList.h +106 -0
- data/vendor/Eigen/src/StlSupport/StdVector.h +131 -0
- data/vendor/Eigen/src/StlSupport/details.h +84 -0
- data/vendor/Eigen/src/SuperLUSupport/SuperLUSupport.h +1025 -0
- data/vendor/Eigen/src/UmfPackSupport/UmfPackSupport.h +642 -0
- data/vendor/Eigen/src/misc/Image.h +82 -0
- data/vendor/Eigen/src/misc/Kernel.h +79 -0
- data/vendor/Eigen/src/misc/RealSvd2x2.h +55 -0
- data/vendor/Eigen/src/misc/blas.h +440 -0
- data/vendor/Eigen/src/misc/lapack.h +152 -0
- data/vendor/Eigen/src/misc/lapacke.h +16292 -0
- data/vendor/Eigen/src/misc/lapacke_mangling.h +17 -0
- data/vendor/Eigen/src/plugins/ArrayCwiseBinaryOps.h +358 -0
- data/vendor/Eigen/src/plugins/ArrayCwiseUnaryOps.h +696 -0
- data/vendor/Eigen/src/plugins/BlockMethods.h +1442 -0
- data/vendor/Eigen/src/plugins/CommonCwiseBinaryOps.h +115 -0
- data/vendor/Eigen/src/plugins/CommonCwiseUnaryOps.h +177 -0
- data/vendor/Eigen/src/plugins/IndexedViewMethods.h +262 -0
- data/vendor/Eigen/src/plugins/MatrixCwiseBinaryOps.h +152 -0
- data/vendor/Eigen/src/plugins/MatrixCwiseUnaryOps.h +95 -0
- data/vendor/Eigen/src/plugins/ReshapedMethods.h +149 -0
- data/vendor/aarand/aarand.hpp +114 -0
- data/vendor/annoy/annoylib.h +1495 -0
- data/vendor/annoy/kissrandom.h +120 -0
- data/vendor/annoy/mman.h +242 -0
- data/vendor/hnswlib/bruteforce.h +152 -0
- data/vendor/hnswlib/hnswalg.h +1192 -0
- data/vendor/hnswlib/hnswlib.h +108 -0
- data/vendor/hnswlib/space_ip.h +282 -0
- data/vendor/hnswlib/space_l2.h +281 -0
- data/vendor/hnswlib/visited_list_pool.h +79 -0
- data/vendor/irlba/irlba.hpp +575 -0
- data/vendor/irlba/lanczos.hpp +212 -0
- data/vendor/irlba/parallel.hpp +474 -0
- data/vendor/irlba/utils.hpp +224 -0
- data/vendor/irlba/wrappers.hpp +228 -0
- data/vendor/kmeans/Base.hpp +75 -0
- data/vendor/kmeans/Details.hpp +79 -0
- data/vendor/kmeans/HartiganWong.hpp +492 -0
- data/vendor/kmeans/InitializeKmeansPP.hpp +144 -0
- data/vendor/kmeans/InitializeNone.hpp +44 -0
- data/vendor/kmeans/InitializePCAPartition.hpp +309 -0
- data/vendor/kmeans/InitializeRandom.hpp +91 -0
- data/vendor/kmeans/Kmeans.hpp +161 -0
- data/vendor/kmeans/Lloyd.hpp +134 -0
- data/vendor/kmeans/MiniBatch.hpp +269 -0
- data/vendor/kmeans/QuickSearch.hpp +179 -0
- data/vendor/kmeans/compute_centroids.hpp +32 -0
- data/vendor/kmeans/compute_wcss.hpp +27 -0
- data/vendor/kmeans/is_edge_case.hpp +42 -0
- data/vendor/kmeans/random.hpp +55 -0
- data/vendor/knncolle/Annoy/Annoy.hpp +193 -0
- data/vendor/knncolle/BruteForce/BruteForce.hpp +120 -0
- data/vendor/knncolle/Hnsw/Hnsw.hpp +225 -0
- data/vendor/knncolle/Kmknn/Kmknn.hpp +286 -0
- data/vendor/knncolle/VpTree/VpTree.hpp +256 -0
- data/vendor/knncolle/knncolle.hpp +34 -0
- data/vendor/knncolle/utils/Base.hpp +100 -0
- data/vendor/knncolle/utils/NeighborQueue.hpp +94 -0
- data/vendor/knncolle/utils/distances.hpp +98 -0
- data/vendor/knncolle/utils/find_nearest_neighbors.hpp +112 -0
- data/vendor/powerit/PowerIterations.hpp +157 -0
- data/vendor/umappp/NeighborList.hpp +37 -0
- data/vendor/umappp/Umap.hpp +662 -0
- data/vendor/umappp/combine_neighbor_sets.hpp +95 -0
- data/vendor/umappp/find_ab.hpp +157 -0
- data/vendor/umappp/neighbor_similarities.hpp +136 -0
- data/vendor/umappp/optimize_layout.hpp +285 -0
- data/vendor/umappp/spectral_init.hpp +181 -0
- data/vendor/umappp/umappp.hpp +13 -0
- metadata +465 -0
|
@@ -0,0 +1,492 @@
|
|
|
1
|
+
#ifndef KMEANS_HARTIGAN_WONG_HPP
|
|
2
|
+
#define KMEANS_HARTIGAN_WONG_HPP
|
|
3
|
+
|
|
4
|
+
#include <vector>
|
|
5
|
+
#include <algorithm>
|
|
6
|
+
#include <numeric>
|
|
7
|
+
#include <cstdint>
|
|
8
|
+
#include <stdexcept>
|
|
9
|
+
#include <limits>
|
|
10
|
+
|
|
11
|
+
#include "Base.hpp"
|
|
12
|
+
#include "Details.hpp"
|
|
13
|
+
#include "compute_centroids.hpp"
|
|
14
|
+
#include "compute_wcss.hpp"
|
|
15
|
+
#include "is_edge_case.hpp"
|
|
16
|
+
|
|
17
|
+
/**
|
|
18
|
+
* @file HartiganWong.hpp
|
|
19
|
+
*
|
|
20
|
+
* @brief Implements the Hartigan-Wong algorithm for k-means clustering.
|
|
21
|
+
*/
|
|
22
|
+
|
|
23
|
+
namespace kmeans {
|
|
24
|
+
|
|
25
|
+
/**
|
|
26
|
+
* @brief Implements the Hartigan-Wong algorithm for k-means clustering.
|
|
27
|
+
*
|
|
28
|
+
* The Hartigan-Wong algorithm performs several iterations of transferring points between clusters,
|
|
29
|
+
* involving a computationally expensive "optimal transfer" step that checks each observation against each cluster for the lowest squared distance;
|
|
30
|
+
* followed by a cheaper "quick transfer" step, which iterates between the best and second-best cluster choices for each observation.
|
|
31
|
+
* The latter enables rapid exploration of the local solution space without the unnecessary expense of repeatedly comparing to all clusters for all observations.
|
|
32
|
+
* In addition, each distance calculation to a cluster accounts for the shift in the means when a point is transferred.
|
|
33
|
+
* The algorithm terminates when no observation wishes to transfer between clusters.
|
|
34
|
+
*
|
|
35
|
+
* This implementation is derived from the Fortran code underlying the `kmeans` function in the **stats** R package,
|
|
36
|
+
* which in turn is derived from Hartigan and Wong (1979).
|
|
37
|
+
*
|
|
38
|
+
* @tparam DATA_t Floating-point type for the data and centroids.
|
|
39
|
+
* @tparam CLUSTER_t Integer type for the cluster assignments.
|
|
40
|
+
* @tparam INDEX_t Integer type for the observation index.
|
|
41
|
+
* This should have a maximum positive value that is at least 50 times greater than the maximum expected number of observations.
|
|
42
|
+
*
|
|
43
|
+
* @see
|
|
44
|
+
* Hartigan, J. A. and Wong, M. A. (1979).
|
|
45
|
+
* Algorithm AS 136: A K-means clustering algorithm.
|
|
46
|
+
* _Applied Statistics_, 28, 100-108.
|
|
47
|
+
*/
|
|
48
|
+
template<typename DATA_t = double, typename CLUSTER_t = int, typename INDEX_t = int>
|
|
49
|
+
class HartiganWong : public Refine<DATA_t, CLUSTER_t, INDEX_t> {
|
|
50
|
+
int num_dim;
|
|
51
|
+
INDEX_t num_obs;
|
|
52
|
+
const DATA_t* data_ptr;
|
|
53
|
+
|
|
54
|
+
CLUSTER_t num_centers;
|
|
55
|
+
DATA_t* centers_ptr;
|
|
56
|
+
|
|
57
|
+
// Array arguments in the same order as supplied to R's kmns_ function.
|
|
58
|
+
CLUSTER_t * ic1;
|
|
59
|
+
std::vector<CLUSTER_t> ic2;
|
|
60
|
+
std::vector<INDEX_t> nc;
|
|
61
|
+
std::vector<DATA_t> an1, an2;
|
|
62
|
+
std::vector<INDEX_t> ncp;
|
|
63
|
+
std::vector<DATA_t> d;
|
|
64
|
+
std::vector<uint8_t> itran;
|
|
65
|
+
std::vector<INDEX_t> live;
|
|
66
|
+
|
|
67
|
+
private:
|
|
68
|
+
static constexpr double big = 1e30; // Define BIG to be a very large positive number
|
|
69
|
+
|
|
70
|
+
/* The following comparisons to 'ncp' are wrapped in functions to account
|
|
71
|
+
* for the fact that we need to shift all the 'ncp' values by two to give
|
|
72
|
+
* some space for the error codes when dealing with unsigned integers in
|
|
73
|
+
* 'INDEX_t'. All interactions with 'ncp' should occur via these utilities.
|
|
74
|
+
*/
|
|
75
|
+
static constexpr INDEX_t ncp_init = 0;
|
|
76
|
+
static constexpr INDEX_t ncp_unchanged = 1;
|
|
77
|
+
static constexpr INDEX_t ncp_shift = 2;
|
|
78
|
+
|
|
79
|
+
void initialize_ncp() {
|
|
80
|
+
std::fill(ncp.begin(), ncp.end(), ncp_init);
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
void reset_ncp() {
|
|
84
|
+
std::fill(ncp.begin(), ncp.end(), ncp_unchanged);
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
void set_ncp(INDEX_t obs, INDEX_t val) {
|
|
88
|
+
ncp[obs] = val + ncp_shift;
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
bool unchanged_ncp(INDEX_t obs) const {
|
|
92
|
+
return ncp[obs] == ncp_unchanged;
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
bool lt_ncp(INDEX_t obs, INDEX_t val) const {
|
|
96
|
+
return ncp[obs] > val + ncp_shift;
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
bool le_ncp(INDEX_t obs, INDEX_t val) const {
|
|
100
|
+
return ncp[obs] >= val + ncp_shift;
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
public:
|
|
104
|
+
/**
|
|
105
|
+
* @brief Default parameter values for `HartiganWong`.
|
|
106
|
+
*/
|
|
107
|
+
struct Defaults {
|
|
108
|
+
/**
|
|
109
|
+
* See `HartiganWong::set_max_iterations()`.
|
|
110
|
+
*/
|
|
111
|
+
static constexpr int max_iterations = 10;
|
|
112
|
+
};
|
|
113
|
+
|
|
114
|
+
private:
|
|
115
|
+
int maxiter = Defaults::max_iterations;
|
|
116
|
+
|
|
117
|
+
public:
|
|
118
|
+
/**
|
|
119
|
+
* @param m Maximum number of iterations.
|
|
120
|
+
* More iterations increase the opportunity for convergence at the cost of more computational time.
|
|
121
|
+
*
|
|
122
|
+
* @return A reference to this `HartiganWong` object.
|
|
123
|
+
*/
|
|
124
|
+
HartiganWong& set_max_iterations(int m = Defaults::max_iterations) {
|
|
125
|
+
maxiter = m;
|
|
126
|
+
return *this;
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
public:
|
|
130
|
+
Details<DATA_t, INDEX_t> run(int ndim, INDEX_t nobs, const DATA_t* data, CLUSTER_t ncenters, DATA_t* centers, CLUSTER_t* clusters) {
|
|
131
|
+
num_dim = ndim;
|
|
132
|
+
num_obs = nobs;
|
|
133
|
+
data_ptr = data;
|
|
134
|
+
num_centers = ncenters;
|
|
135
|
+
centers_ptr = centers;
|
|
136
|
+
ic1 = clusters;
|
|
137
|
+
|
|
138
|
+
// Sizes taken from the .Fortran() call in stats::kmeans().
|
|
139
|
+
ic2.resize(num_obs);
|
|
140
|
+
nc.resize(num_centers);
|
|
141
|
+
an1.resize(num_centers);
|
|
142
|
+
an2.resize(num_centers);
|
|
143
|
+
d.resize(num_obs);
|
|
144
|
+
|
|
145
|
+
/* ITRAN(L) = 1 if cluster L is updated in the quick-transfer stage,
|
|
146
|
+
* = 0 otherwise
|
|
147
|
+
* In the optimal-transfer stage, NCP(L) stores the step at which
|
|
148
|
+
* cluster L is last updated.
|
|
149
|
+
* In the quick-transfer stage, NCP(L) stores the step at which
|
|
150
|
+
* cluster L is last updated plus M.
|
|
151
|
+
*/
|
|
152
|
+
ncp.resize(num_centers);
|
|
153
|
+
itran.resize(num_centers);
|
|
154
|
+
live.resize(num_centers);
|
|
155
|
+
|
|
156
|
+
if (is_edge_case(num_obs, num_centers)) {
|
|
157
|
+
return process_edge_case(num_dim, num_obs, data_ptr, num_centers, centers_ptr, ic1);
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
/* For each point I, find its two closest centres, IC1(I) and
|
|
161
|
+
* IC2(I). Assign it to IC1(I).
|
|
162
|
+
*/
|
|
163
|
+
#pragma omp parallel for
|
|
164
|
+
for (INDEX_t obs = 0; obs < num_obs; ++obs) {
|
|
165
|
+
auto& best = ic1[obs];
|
|
166
|
+
best = 0;
|
|
167
|
+
DATA_t best_dist = squared_distance_from_cluster(obs, best);
|
|
168
|
+
|
|
169
|
+
auto& second = ic2[obs];
|
|
170
|
+
second = 1;
|
|
171
|
+
DATA_t second_dist = squared_distance_from_cluster(obs, second);
|
|
172
|
+
|
|
173
|
+
if (best_dist > second_dist) {
|
|
174
|
+
std::swap(best, second);
|
|
175
|
+
std::swap(best_dist, second_dist);
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
for (CLUSTER_t cen = 2; cen < num_centers; ++cen) {
|
|
179
|
+
DATA_t candidate_dist = squared_distance_from_cluster(obs, cen);
|
|
180
|
+
if (candidate_dist < second_dist) {
|
|
181
|
+
second_dist = candidate_dist;
|
|
182
|
+
second = cen;
|
|
183
|
+
if (candidate_dist < best_dist) {
|
|
184
|
+
std::swap(best_dist, second_dist);
|
|
185
|
+
std::swap(best, second);
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
/* Update cluster centres to be the average of points contained
|
|
192
|
+
* within them.
|
|
193
|
+
* NC(L) := #{units in cluster L}, L = 1..K
|
|
194
|
+
*/
|
|
195
|
+
std::fill(nc.begin(), nc.end(), 0);
|
|
196
|
+
for (INDEX_t obs = 0; obs < num_obs; ++obs) {
|
|
197
|
+
++nc[ic1[obs]];
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
compute_centroids(num_dim, num_obs, data_ptr, num_centers, centers_ptr, ic1, nc);
|
|
201
|
+
|
|
202
|
+
// Check to see if there is any empty cluster at this stage
|
|
203
|
+
for (CLUSTER_t cen = 0; cen < num_centers; ++cen) {
|
|
204
|
+
if (nc[cen] == 0) {
|
|
205
|
+
return Details<DATA_t, INDEX_t>(0, 1); // i.e., ifault = 1 here.
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
/* Initialize AN1, AN2.
|
|
209
|
+
* AN1(L) = NC(L) / (NC(L) - 1)
|
|
210
|
+
* AN2(L) = NC(L) / (NC(L) + 1)
|
|
211
|
+
*/
|
|
212
|
+
const DATA_t num = nc[cen];
|
|
213
|
+
an2[cen] = num / (num + 1);
|
|
214
|
+
an1[cen] = (num > 1 ? num / (num - 1) : big);
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
INDEX_t indx = 0;
|
|
218
|
+
if (std::numeric_limits<INDEX_t>::max() / 50 < num_obs) {
|
|
219
|
+
throw std::runtime_error("too many observations for the index integer type");
|
|
220
|
+
}
|
|
221
|
+
INDEX_t imaxqtr = num_obs * 50; // default derived from stats::kmeans()
|
|
222
|
+
|
|
223
|
+
initialize_ncp();
|
|
224
|
+
std::fill(itran.begin(), itran.end(), true);
|
|
225
|
+
std::fill(live.begin(), live.end(), 0);
|
|
226
|
+
int iter = 0;
|
|
227
|
+
int ifault = 0;
|
|
228
|
+
|
|
229
|
+
for (iter = 1; iter <= maxiter; ++iter) {
|
|
230
|
+
|
|
231
|
+
/* OPtimal-TRAnsfer stage: there is only one pass through the data.
|
|
232
|
+
* Each point is re-allocated, if necessary, to the cluster that will
|
|
233
|
+
* induce the maximum reduction in within-cluster sum of squares.
|
|
234
|
+
*/
|
|
235
|
+
optimal_transfer(indx);
|
|
236
|
+
|
|
237
|
+
// Stop if no transfer took place in the last M optimal transfer steps.
|
|
238
|
+
if (indx == num_obs) {
|
|
239
|
+
break;
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
/* Quick-TRANSfer stage: Each point is tested in turn to see if it should
|
|
243
|
+
* be re-allocated to the cluster to which it is most likely to be
|
|
244
|
+
* transferred, IC2(I), from its present cluster, IC1(I).
|
|
245
|
+
* Loop through the data until no further change is to take place.
|
|
246
|
+
*/
|
|
247
|
+
quick_transfer(indx, imaxqtr);
|
|
248
|
+
|
|
249
|
+
if (imaxqtr < 0) {
|
|
250
|
+
ifault = 4;
|
|
251
|
+
break;
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
// If there are only two clusters, there is no need to re-enter the optimal transfer stage.
|
|
255
|
+
if (num_centers == 2) {
|
|
256
|
+
break;
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
// NCP has to be reset before entering optimal_transfer().
|
|
260
|
+
reset_ncp();
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
/* Since the specified number of iterations has been exceeded, set
|
|
264
|
+
* IFAULT = 2. This may indicate unforeseen looping.
|
|
265
|
+
*/
|
|
266
|
+
if (iter == maxiter + 1) {
|
|
267
|
+
ifault = 2;
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
compute_centroids(num_dim, num_obs, data_ptr, num_centers, centers_ptr, ic1, nc);
|
|
271
|
+
return Details(
|
|
272
|
+
std::move(nc),
|
|
273
|
+
compute_wcss(num_dim, num_obs, data_ptr, num_centers, centers_ptr, ic1),
|
|
274
|
+
iter,
|
|
275
|
+
ifault
|
|
276
|
+
);
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
private:
|
|
280
|
+
#ifdef DEBUG
|
|
281
|
+
template<class T>
|
|
282
|
+
void print_vector(const T& vec, const char* msg) {
|
|
283
|
+
std::cout << msg << std::endl;
|
|
284
|
+
for (auto c : vec) {
|
|
285
|
+
std::cout << c << " ";
|
|
286
|
+
}
|
|
287
|
+
std::cout << std::endl;
|
|
288
|
+
}
|
|
289
|
+
#endif
|
|
290
|
+
|
|
291
|
+
DATA_t squared_distance_from_cluster(INDEX_t pt, CLUSTER_t clust) const {
|
|
292
|
+
const DATA_t* acopy = data_ptr + pt * num_dim;
|
|
293
|
+
const DATA_t* ccopy = centers_ptr + clust * num_dim;
|
|
294
|
+
DATA_t output = 0;
|
|
295
|
+
for (int dim = 0; dim < num_dim; ++dim, ++acopy, ++ccopy) {
|
|
296
|
+
output += (*acopy - *ccopy) * (*acopy - *ccopy);
|
|
297
|
+
}
|
|
298
|
+
return output;
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
private:
|
|
302
|
+
/* ALGORITHM AS 136.1 APPL. STATIST. (1979) VOL.28, NO.1
|
|
303
|
+
* This is the OPtimal TRAnsfer stage.
|
|
304
|
+
* ----------------------
|
|
305
|
+
* Each point is re-allocated, if necessary, to the cluster that
|
|
306
|
+
* will induce a maximum reduction in the within-cluster sum of
|
|
307
|
+
* squares.
|
|
308
|
+
*/
|
|
309
|
+
void optimal_transfer(INDEX_t& indx) {
|
|
310
|
+
/* If cluster L is updated in the last quick-transfer stage, it
|
|
311
|
+
* belongs to the live set throughout this stage. Otherwise, at
|
|
312
|
+
* each step, it is not in the live set if it has not been updated
|
|
313
|
+
* in the last M optimal transfer steps. (AL: M being a synonym for
|
|
314
|
+
* the number of observations, here and in other functions.)
|
|
315
|
+
*/
|
|
316
|
+
for (CLUSTER_t cen = 0; cen < num_centers; ++cen) {
|
|
317
|
+
if (itran[cen]) {
|
|
318
|
+
live[cen] = num_obs; // AL: using 0-index, so no need for +1.
|
|
319
|
+
}
|
|
320
|
+
}
|
|
321
|
+
|
|
322
|
+
for (INDEX_t obs = 0; obs < num_obs; ++obs) {
|
|
323
|
+
++indx;
|
|
324
|
+
auto l1 = ic1[obs];
|
|
325
|
+
|
|
326
|
+
// If point I is the only member of cluster L1, no transfer.
|
|
327
|
+
if (nc[l1] != 1) {
|
|
328
|
+
// If L1 has not yet been updated in this stage, no need to re-compute D(I).
|
|
329
|
+
if (!unchanged_ncp(l1)) {
|
|
330
|
+
d[obs] = squared_distance_from_cluster(obs, l1) * an1[l1];
|
|
331
|
+
}
|
|
332
|
+
|
|
333
|
+
// Find the cluster with minimum R2.
|
|
334
|
+
auto l2 = ic2[obs];
|
|
335
|
+
auto ll = l2;
|
|
336
|
+
DATA_t r2 = squared_distance_from_cluster(obs, l2) * an2[l2];
|
|
337
|
+
|
|
338
|
+
for (CLUSTER_t cen = 0; cen < num_centers; ++cen) {
|
|
339
|
+
/* If I >= LIVE(L1), then L1 is not in the live set. If this is
|
|
340
|
+
* true, we only need to consider clusters that are in the live
|
|
341
|
+
* set for possible transfer of point I. Otherwise, we need to
|
|
342
|
+
* consider all possible clusters.
|
|
343
|
+
*/
|
|
344
|
+
if (obs >= live[l1] && obs >= live[cen] || cen == l1 || cen == ll) {
|
|
345
|
+
continue;
|
|
346
|
+
}
|
|
347
|
+
|
|
348
|
+
DATA_t rr = r2 / an2[cen];
|
|
349
|
+
DATA_t dc = squared_distance_from_cluster(obs, cen);
|
|
350
|
+
if (dc < rr) {
|
|
351
|
+
r2 = dc * an2[cen];
|
|
352
|
+
l2 = cen;
|
|
353
|
+
}
|
|
354
|
+
}
|
|
355
|
+
|
|
356
|
+
if (r2 >= d[obs]) {
|
|
357
|
+
// If no transfer is necessary, L2 is the new IC2(I).
|
|
358
|
+
ic2[obs] = l2;
|
|
359
|
+
|
|
360
|
+
} else {
|
|
361
|
+
/* Update cluster centres, LIVE, NCP, AN1 & AN2 for clusters L1 and
|
|
362
|
+
* L2, and update IC1(I) & IC2(I).
|
|
363
|
+
*/
|
|
364
|
+
indx = 0;
|
|
365
|
+
live[l1] = num_obs + obs;
|
|
366
|
+
live[l2] = num_obs + obs;
|
|
367
|
+
set_ncp(l1, obs);
|
|
368
|
+
set_ncp(l2, obs);
|
|
369
|
+
|
|
370
|
+
transfer_point(obs, l1, l2);
|
|
371
|
+
}
|
|
372
|
+
}
|
|
373
|
+
|
|
374
|
+
if (indx == num_obs) {
|
|
375
|
+
return;
|
|
376
|
+
}
|
|
377
|
+
}
|
|
378
|
+
|
|
379
|
+
for (CLUSTER_t cen = 0; cen < num_centers; ++cen) {
|
|
380
|
+
itran[cen] = false;
|
|
381
|
+
|
|
382
|
+
// LIVE(L) has to be decreased by M before re-entering OPTRA.
|
|
383
|
+
// This means that if I >= LIVE(L1) in the next OPTRA call,
|
|
384
|
+
// the last update must be >= M steps ago, as we effectively
|
|
385
|
+
// 'lapped' the previous update for this cluster.
|
|
386
|
+
live[cen] -= num_obs;
|
|
387
|
+
}
|
|
388
|
+
|
|
389
|
+
return;
|
|
390
|
+
}
|
|
391
|
+
|
|
392
|
+
private:
|
|
393
|
+
/* ALGORITHM AS 136.2 APPL. STATIST. (1979) VOL.28, NO.1
|
|
394
|
+
* This is the Quick TRANsfer stage.
|
|
395
|
+
* --------------------
|
|
396
|
+
* IC1(I) is the cluster which point I belongs to.
|
|
397
|
+
* IC2(I) is the cluster which point I is most likely to be
|
|
398
|
+
* transferred to.
|
|
399
|
+
*
|
|
400
|
+
* For each point I, IC1(I) & IC2(I) are switched, if necessary, to
|
|
401
|
+
* reduce within-cluster sum of squares. The cluster centres are
|
|
402
|
+
* updated after each step.
|
|
403
|
+
*/
|
|
404
|
+
void quick_transfer (INDEX_t& indx, INDEX_t& imaxqtr) {
|
|
405
|
+
INDEX_t icoun = 0;
|
|
406
|
+
INDEX_t istep = 0;
|
|
407
|
+
|
|
408
|
+
while (1) {
|
|
409
|
+
for (INDEX_t obs = 0; obs < num_obs; ++obs) {
|
|
410
|
+
++icoun;
|
|
411
|
+
auto l1 = ic1[obs];
|
|
412
|
+
|
|
413
|
+
// point I is the only member of cluster L1, no transfer.
|
|
414
|
+
if (nc[l1] != 1) {
|
|
415
|
+
|
|
416
|
+
/* NCP(L) is equal to the step at which cluster L is last updated plus M.
|
|
417
|
+
* (AL: M is the notation for the number of observations, a.k.a. 'num_obs').
|
|
418
|
+
*
|
|
419
|
+
* If ISTEP > NCP(L1), no need to re-compute distance from point I to
|
|
420
|
+
* cluster L1. Note that if cluster L1 is last updated exactly M
|
|
421
|
+
* steps ago, we still need to compute the distance from point I to
|
|
422
|
+
* cluster L1.
|
|
423
|
+
*/
|
|
424
|
+
if (le_ncp(l1, istep)) {
|
|
425
|
+
d[obs] = squared_distance_from_cluster(obs, l1) * an1[l1];
|
|
426
|
+
}
|
|
427
|
+
|
|
428
|
+
// If ISTEP >= both NCP(L1) & NCP(L2) there will be no transfer of point I at this step.
|
|
429
|
+
auto l2 = ic2[obs];
|
|
430
|
+
if (lt_ncp(l1, istep) || lt_ncp(l2, istep)) {
|
|
431
|
+
if (squared_distance_from_cluster(obs, l2) < d[obs] / an2[l2]) {
|
|
432
|
+
/* Update cluster centres, NCP, NC, ITRAN, AN1 & AN2 for clusters
|
|
433
|
+
* L1 & L2. Also update IC1(I) & IC2(I). Note that if any
|
|
434
|
+
* updating occurs in this stage, INDX is set back to 0.
|
|
435
|
+
*/
|
|
436
|
+
icoun = 0;
|
|
437
|
+
indx = 0;
|
|
438
|
+
|
|
439
|
+
itran[l1] = true;
|
|
440
|
+
itran[l2] = true;
|
|
441
|
+
set_ncp(l1, istep + num_obs);
|
|
442
|
+
set_ncp(l2, istep + num_obs);
|
|
443
|
+
transfer_point(obs, l1, l2);
|
|
444
|
+
}
|
|
445
|
+
}
|
|
446
|
+
}
|
|
447
|
+
|
|
448
|
+
// If no re-allocation took place in the last M steps, return.
|
|
449
|
+
if (icoun == num_obs) {
|
|
450
|
+
return;
|
|
451
|
+
}
|
|
452
|
+
|
|
453
|
+
// AL: incrementing ISTEP after checks against NCP(L1), to avoid off-by-one
|
|
454
|
+
// errors after switching to zero-indexing for the observations.
|
|
455
|
+
++istep;
|
|
456
|
+
if (istep >= imaxqtr) {
|
|
457
|
+
imaxqtr = -1;
|
|
458
|
+
return;
|
|
459
|
+
}
|
|
460
|
+
}
|
|
461
|
+
}
|
|
462
|
+
}
|
|
463
|
+
|
|
464
|
+
private:
|
|
465
|
+
void transfer_point(INDEX_t obs, CLUSTER_t l1, CLUSTER_t l2) {
|
|
466
|
+
const DATA_t al1 = nc[l1], alw = al1 - 1;
|
|
467
|
+
const DATA_t al2 = nc[l2], alt = al2 + 1;
|
|
468
|
+
|
|
469
|
+
auto copy1 = centers_ptr + l1 * num_dim;
|
|
470
|
+
auto copy2 = centers_ptr + l2 * num_dim;
|
|
471
|
+
auto acopy = data_ptr + obs * num_dim;
|
|
472
|
+
for (int dim = 0; dim < num_dim; ++dim, ++copy1, ++copy2, ++acopy) {
|
|
473
|
+
*copy1 = (*copy1 * al1 - *acopy) / alw;
|
|
474
|
+
*copy2 = (*copy2 * al2 + *acopy) / alt;
|
|
475
|
+
}
|
|
476
|
+
|
|
477
|
+
--nc[l1];
|
|
478
|
+
++nc[l2];
|
|
479
|
+
|
|
480
|
+
an2[l1] = alw / al1;
|
|
481
|
+
an1[l1] = (alw > 1 ? alw / (alw - 1) : big);
|
|
482
|
+
an1[l2] = alt / al2;
|
|
483
|
+
an2[l2] = alt / (alt + 1);
|
|
484
|
+
|
|
485
|
+
ic1[obs] = l2;
|
|
486
|
+
ic2[obs] = l1;
|
|
487
|
+
}
|
|
488
|
+
};
|
|
489
|
+
|
|
490
|
+
}
|
|
491
|
+
|
|
492
|
+
#endif
|
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
#ifndef KMEANS_INITIALIZE_KMEANSPP_HPP
|
|
2
|
+
#define KMEANS_INITIALIZE_KMEANSPP_HPP
|
|
3
|
+
|
|
4
|
+
#include <vector>
|
|
5
|
+
#include <algorithm>
|
|
6
|
+
#include <cstdint>
|
|
7
|
+
|
|
8
|
+
#include "Base.hpp"
|
|
9
|
+
#include "InitializeRandom.hpp"
|
|
10
|
+
#include "random.hpp"
|
|
11
|
+
|
|
12
|
+
/**
|
|
13
|
+
* @file InitializeKmeansPP.hpp
|
|
14
|
+
*
|
|
15
|
+
* @brief Class for **kmeans++** initialization.
|
|
16
|
+
*/
|
|
17
|
+
|
|
18
|
+
namespace kmeans {
|
|
19
|
+
|
|
20
|
+
/**
|
|
21
|
+
* @brief Implements the **k-means++** initialization of Arthur and Vassilvitskii (2007).
|
|
22
|
+
*
|
|
23
|
+
* This approach involves the selection of starting points via iterations of weighted sampling,
|
|
24
|
+
* where the sampling probability for each point is proportional to the squared distance to the closest starting point that was chosen in any of the previous iterations.
|
|
25
|
+
* The aim is to obtain well-separated starting points to encourage the formation of suitable clusters.
|
|
26
|
+
*
|
|
27
|
+
* @tparam DATA_t Floating-point type for the data and centroids.
|
|
28
|
+
* @tparam CLUSTER_t Integer type for the cluster index.
|
|
29
|
+
* @tparam INDEX_t Integer type for the observation index.
|
|
30
|
+
*
|
|
31
|
+
* @see
|
|
32
|
+
* Arthur, D. and Vassilvitskii, S. (2007).
|
|
33
|
+
* k-means++: the advantages of careful seeding.
|
|
34
|
+
* _Proceedings of the eighteenth annual ACM-SIAM symposium on Discrete algorithms_, 1027-1035.
|
|
35
|
+
*/
|
|
36
|
+
template<typename DATA_t = double, typename CLUSTER_t = int, typename INDEX_t = int>
|
|
37
|
+
class InitializeKmeansPP : public Initialize<DATA_t, CLUSTER_t, INDEX_t> {
|
|
38
|
+
public:
|
|
39
|
+
/**
|
|
40
|
+
* @brief Default parameter settings.
|
|
41
|
+
*/
|
|
42
|
+
struct Defaults {
|
|
43
|
+
/**
|
|
44
|
+
* See `set_seed()` for more details.
|
|
45
|
+
*/
|
|
46
|
+
static constexpr uint64_t seed = 6523u;
|
|
47
|
+
};
|
|
48
|
+
|
|
49
|
+
/**
|
|
50
|
+
* @param Random seed to use to construct the PRNG prior to sampling.
|
|
51
|
+
*
|
|
52
|
+
* @return A reference to this `InitializeKmeansPP` object.
|
|
53
|
+
*/
|
|
54
|
+
InitializeKmeansPP& set_seed(uint64_t s = Defaults::seed) {
|
|
55
|
+
seed = s;
|
|
56
|
+
return *this;
|
|
57
|
+
}
|
|
58
|
+
private:
|
|
59
|
+
uint64_t seed = Defaults::seed;
|
|
60
|
+
|
|
61
|
+
public:
|
|
62
|
+
/**
|
|
63
|
+
* @cond
|
|
64
|
+
*/
|
|
65
|
+
std::vector<INDEX_t> run(int ndim, INDEX_t nobs, const DATA_t* data, CLUSTER_t ncenters) {
|
|
66
|
+
std::vector<DATA_t> mindist(nobs, 1);
|
|
67
|
+
std::vector<DATA_t> cumulative(nobs);
|
|
68
|
+
std::vector<INDEX_t> sofar;
|
|
69
|
+
sofar.reserve(ncenters);
|
|
70
|
+
std::mt19937_64 eng(seed);
|
|
71
|
+
|
|
72
|
+
for (CLUSTER_t cen = 0; cen < ncenters; ++cen) {
|
|
73
|
+
INDEX_t counter = 0;
|
|
74
|
+
if (!sofar.empty()) {
|
|
75
|
+
auto last = sofar.back();
|
|
76
|
+
|
|
77
|
+
#pragma omp parallel for
|
|
78
|
+
for (INDEX_t obs = 0; obs < nobs; ++obs) {
|
|
79
|
+
if (mindist[obs]) {
|
|
80
|
+
const DATA_t* acopy = data + obs * ndim;
|
|
81
|
+
const DATA_t* scopy = data + last * ndim;
|
|
82
|
+
DATA_t r2 = 0;
|
|
83
|
+
for (int dim = 0; dim < ndim; ++dim, ++acopy, ++scopy) {
|
|
84
|
+
r2 += (*acopy - *scopy) * (*acopy - *scopy);
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
if (cen == 1 || r2 < mindist[obs]) {
|
|
88
|
+
mindist[obs] = r2;
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
} else {
|
|
93
|
+
counter = nobs;
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
cumulative[0] = mindist[0];
|
|
97
|
+
for (INDEX_t i = 1; i < nobs; ++i) {
|
|
98
|
+
cumulative[i] = cumulative[i-1] + mindist[i];
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
const auto total = cumulative.back();
|
|
102
|
+
if (total == 0) { // a.k.a. only duplicates left.
|
|
103
|
+
break;
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
auto chosen_id = weighted_sample(cumulative, mindist, nobs, eng);
|
|
107
|
+
mindist[chosen_id] = 0;
|
|
108
|
+
sofar.push_back(chosen_id);
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
return sofar;
|
|
112
|
+
}
|
|
113
|
+
/**
|
|
114
|
+
* @endcond
|
|
115
|
+
*/
|
|
116
|
+
|
|
117
|
+
public:
|
|
118
|
+
/*
|
|
119
|
+
* @param ndim Number of dimensions.
|
|
120
|
+
* @param nobs Number of observations.
|
|
121
|
+
* @param data Pointer to an array where the dimensions are rows and the observations are columns.
|
|
122
|
+
* Data should be stored in column-major format.
|
|
123
|
+
* @param ncenters Number of centers to pick.
|
|
124
|
+
* @param[out] centers Pointer to a `ndim`-by-`ncenters` array where columns are cluster centers and rows are dimensions.
|
|
125
|
+
* On output, this will contain the final centroid locations for each cluster.
|
|
126
|
+
* Data should be stored in column-major order.
|
|
127
|
+
* @param clusters Ignored in this method.
|
|
128
|
+
*
|
|
129
|
+
* @return `centers` is filled with the new cluster centers.
|
|
130
|
+
* The number of filled centers is returned, see `Initializer::run()`.
|
|
131
|
+
*/
|
|
132
|
+
CLUSTER_t run(int ndim, INDEX_t nobs, const DATA_t* data, CLUSTER_t ncenters, DATA_t* centers, CLUSTER_t* clusters) {
|
|
133
|
+
if (!nobs) {
|
|
134
|
+
return 0;
|
|
135
|
+
}
|
|
136
|
+
auto sofar = run(ndim, nobs, data, ncenters);
|
|
137
|
+
copy_into_array(sofar, ndim, data, centers);
|
|
138
|
+
return sofar.size();
|
|
139
|
+
}
|
|
140
|
+
};
|
|
141
|
+
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
#endif
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
#ifndef KMEANS_INITIALIZE_NONE_HPP
|
|
2
|
+
#define KMEANS_INITIALIZE_NONE_HPP
|
|
3
|
+
|
|
4
|
+
#include "Base.hpp"
|
|
5
|
+
#include <algorithm>
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* @file InitializeNone.hpp
|
|
9
|
+
*
|
|
10
|
+
* @brief Class for no initialization.
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
namespace kmeans {
|
|
14
|
+
|
|
15
|
+
/**
|
|
16
|
+
* @brief Perform "initialization" by just using the input cluster centers.
|
|
17
|
+
*
|
|
18
|
+
* @tparam DATA_t Floating-point type for the data and centroids.
|
|
19
|
+
* @tparam CLUSTER_t Integer type for the cluster index.
|
|
20
|
+
* @tparam INDEX_t Integer type for the observation index.
|
|
21
|
+
*/
|
|
22
|
+
template<typename DATA_t = double, typename CLUSTER_t = int, typename INDEX_t = int>
|
|
23
|
+
class InitializeNone : public Initialize<DATA_t, CLUSTER_t, INDEX_t> {
|
|
24
|
+
public:
|
|
25
|
+
/*
|
|
26
|
+
* @param ndim Number of dimensions.
|
|
27
|
+
* @param nobs Number of observations.
|
|
28
|
+
* @param data Pointer to an array where the dimensions are rows and the observations are columns.
|
|
29
|
+
* Data should be stored in column-major format.
|
|
30
|
+
* @param ncenters Number of centers to pick.
|
|
31
|
+
* @param centers Pointer to a `ndim`-by-`ncenters` array where columns are cluster centers and rows are dimensions.
|
|
32
|
+
* This is left unchanged.
|
|
33
|
+
* @param clusters Ignored in this method.
|
|
34
|
+
*
|
|
35
|
+
* @return The smaller of `ncenters` and `nobs` is returned, see `Initialize::run()`.
|
|
36
|
+
*/
|
|
37
|
+
CLUSTER_t run(int ndim, INDEX_t nobs, const DATA_t* data, CLUSTER_t ncenters, DATA_t* centers, CLUSTER_t* clusters) {
|
|
38
|
+
return std::min(nobs, static_cast<INDEX_t>(ncenters));
|
|
39
|
+
}
|
|
40
|
+
};
|
|
41
|
+
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
#endif
|