umappp 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (395) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE.txt +25 -0
  3. data/README.md +110 -0
  4. data/ext/umappp/extconf.rb +25 -0
  5. data/ext/umappp/numo.hpp +867 -0
  6. data/ext/umappp/umappp.cpp +225 -0
  7. data/lib/umappp/version.rb +5 -0
  8. data/lib/umappp.rb +41 -0
  9. data/vendor/Eigen/Cholesky +45 -0
  10. data/vendor/Eigen/CholmodSupport +48 -0
  11. data/vendor/Eigen/Core +384 -0
  12. data/vendor/Eigen/Dense +7 -0
  13. data/vendor/Eigen/Eigen +2 -0
  14. data/vendor/Eigen/Eigenvalues +60 -0
  15. data/vendor/Eigen/Geometry +59 -0
  16. data/vendor/Eigen/Householder +29 -0
  17. data/vendor/Eigen/IterativeLinearSolvers +48 -0
  18. data/vendor/Eigen/Jacobi +32 -0
  19. data/vendor/Eigen/KLUSupport +41 -0
  20. data/vendor/Eigen/LU +47 -0
  21. data/vendor/Eigen/MetisSupport +35 -0
  22. data/vendor/Eigen/OrderingMethods +70 -0
  23. data/vendor/Eigen/PaStiXSupport +49 -0
  24. data/vendor/Eigen/PardisoSupport +35 -0
  25. data/vendor/Eigen/QR +50 -0
  26. data/vendor/Eigen/QtAlignedMalloc +39 -0
  27. data/vendor/Eigen/SPQRSupport +34 -0
  28. data/vendor/Eigen/SVD +50 -0
  29. data/vendor/Eigen/Sparse +34 -0
  30. data/vendor/Eigen/SparseCholesky +37 -0
  31. data/vendor/Eigen/SparseCore +69 -0
  32. data/vendor/Eigen/SparseLU +50 -0
  33. data/vendor/Eigen/SparseQR +36 -0
  34. data/vendor/Eigen/StdDeque +27 -0
  35. data/vendor/Eigen/StdList +26 -0
  36. data/vendor/Eigen/StdVector +27 -0
  37. data/vendor/Eigen/SuperLUSupport +64 -0
  38. data/vendor/Eigen/UmfPackSupport +40 -0
  39. data/vendor/Eigen/src/Cholesky/LDLT.h +688 -0
  40. data/vendor/Eigen/src/Cholesky/LLT.h +558 -0
  41. data/vendor/Eigen/src/Cholesky/LLT_LAPACKE.h +99 -0
  42. data/vendor/Eigen/src/CholmodSupport/CholmodSupport.h +682 -0
  43. data/vendor/Eigen/src/Core/ArithmeticSequence.h +413 -0
  44. data/vendor/Eigen/src/Core/Array.h +417 -0
  45. data/vendor/Eigen/src/Core/ArrayBase.h +226 -0
  46. data/vendor/Eigen/src/Core/ArrayWrapper.h +209 -0
  47. data/vendor/Eigen/src/Core/Assign.h +90 -0
  48. data/vendor/Eigen/src/Core/AssignEvaluator.h +1010 -0
  49. data/vendor/Eigen/src/Core/Assign_MKL.h +178 -0
  50. data/vendor/Eigen/src/Core/BandMatrix.h +353 -0
  51. data/vendor/Eigen/src/Core/Block.h +448 -0
  52. data/vendor/Eigen/src/Core/BooleanRedux.h +162 -0
  53. data/vendor/Eigen/src/Core/CommaInitializer.h +164 -0
  54. data/vendor/Eigen/src/Core/ConditionEstimator.h +175 -0
  55. data/vendor/Eigen/src/Core/CoreEvaluators.h +1741 -0
  56. data/vendor/Eigen/src/Core/CoreIterators.h +132 -0
  57. data/vendor/Eigen/src/Core/CwiseBinaryOp.h +183 -0
  58. data/vendor/Eigen/src/Core/CwiseNullaryOp.h +1001 -0
  59. data/vendor/Eigen/src/Core/CwiseTernaryOp.h +197 -0
  60. data/vendor/Eigen/src/Core/CwiseUnaryOp.h +103 -0
  61. data/vendor/Eigen/src/Core/CwiseUnaryView.h +132 -0
  62. data/vendor/Eigen/src/Core/DenseBase.h +701 -0
  63. data/vendor/Eigen/src/Core/DenseCoeffsBase.h +685 -0
  64. data/vendor/Eigen/src/Core/DenseStorage.h +652 -0
  65. data/vendor/Eigen/src/Core/Diagonal.h +258 -0
  66. data/vendor/Eigen/src/Core/DiagonalMatrix.h +391 -0
  67. data/vendor/Eigen/src/Core/DiagonalProduct.h +28 -0
  68. data/vendor/Eigen/src/Core/Dot.h +318 -0
  69. data/vendor/Eigen/src/Core/EigenBase.h +160 -0
  70. data/vendor/Eigen/src/Core/ForceAlignedAccess.h +150 -0
  71. data/vendor/Eigen/src/Core/Fuzzy.h +155 -0
  72. data/vendor/Eigen/src/Core/GeneralProduct.h +465 -0
  73. data/vendor/Eigen/src/Core/GenericPacketMath.h +1040 -0
  74. data/vendor/Eigen/src/Core/GlobalFunctions.h +194 -0
  75. data/vendor/Eigen/src/Core/IO.h +258 -0
  76. data/vendor/Eigen/src/Core/IndexedView.h +237 -0
  77. data/vendor/Eigen/src/Core/Inverse.h +117 -0
  78. data/vendor/Eigen/src/Core/Map.h +171 -0
  79. data/vendor/Eigen/src/Core/MapBase.h +310 -0
  80. data/vendor/Eigen/src/Core/MathFunctions.h +2057 -0
  81. data/vendor/Eigen/src/Core/MathFunctionsImpl.h +200 -0
  82. data/vendor/Eigen/src/Core/Matrix.h +565 -0
  83. data/vendor/Eigen/src/Core/MatrixBase.h +547 -0
  84. data/vendor/Eigen/src/Core/NestByValue.h +85 -0
  85. data/vendor/Eigen/src/Core/NoAlias.h +109 -0
  86. data/vendor/Eigen/src/Core/NumTraits.h +335 -0
  87. data/vendor/Eigen/src/Core/PartialReduxEvaluator.h +232 -0
  88. data/vendor/Eigen/src/Core/PermutationMatrix.h +605 -0
  89. data/vendor/Eigen/src/Core/PlainObjectBase.h +1128 -0
  90. data/vendor/Eigen/src/Core/Product.h +191 -0
  91. data/vendor/Eigen/src/Core/ProductEvaluators.h +1179 -0
  92. data/vendor/Eigen/src/Core/Random.h +218 -0
  93. data/vendor/Eigen/src/Core/Redux.h +515 -0
  94. data/vendor/Eigen/src/Core/Ref.h +381 -0
  95. data/vendor/Eigen/src/Core/Replicate.h +142 -0
  96. data/vendor/Eigen/src/Core/Reshaped.h +454 -0
  97. data/vendor/Eigen/src/Core/ReturnByValue.h +119 -0
  98. data/vendor/Eigen/src/Core/Reverse.h +217 -0
  99. data/vendor/Eigen/src/Core/Select.h +164 -0
  100. data/vendor/Eigen/src/Core/SelfAdjointView.h +365 -0
  101. data/vendor/Eigen/src/Core/SelfCwiseBinaryOp.h +47 -0
  102. data/vendor/Eigen/src/Core/Solve.h +188 -0
  103. data/vendor/Eigen/src/Core/SolveTriangular.h +235 -0
  104. data/vendor/Eigen/src/Core/SolverBase.h +168 -0
  105. data/vendor/Eigen/src/Core/StableNorm.h +251 -0
  106. data/vendor/Eigen/src/Core/StlIterators.h +463 -0
  107. data/vendor/Eigen/src/Core/Stride.h +116 -0
  108. data/vendor/Eigen/src/Core/Swap.h +68 -0
  109. data/vendor/Eigen/src/Core/Transpose.h +464 -0
  110. data/vendor/Eigen/src/Core/Transpositions.h +386 -0
  111. data/vendor/Eigen/src/Core/TriangularMatrix.h +1001 -0
  112. data/vendor/Eigen/src/Core/VectorBlock.h +96 -0
  113. data/vendor/Eigen/src/Core/VectorwiseOp.h +784 -0
  114. data/vendor/Eigen/src/Core/Visitor.h +381 -0
  115. data/vendor/Eigen/src/Core/arch/AVX/Complex.h +372 -0
  116. data/vendor/Eigen/src/Core/arch/AVX/MathFunctions.h +228 -0
  117. data/vendor/Eigen/src/Core/arch/AVX/PacketMath.h +1574 -0
  118. data/vendor/Eigen/src/Core/arch/AVX/TypeCasting.h +115 -0
  119. data/vendor/Eigen/src/Core/arch/AVX512/Complex.h +422 -0
  120. data/vendor/Eigen/src/Core/arch/AVX512/MathFunctions.h +362 -0
  121. data/vendor/Eigen/src/Core/arch/AVX512/PacketMath.h +2303 -0
  122. data/vendor/Eigen/src/Core/arch/AVX512/TypeCasting.h +89 -0
  123. data/vendor/Eigen/src/Core/arch/AltiVec/Complex.h +417 -0
  124. data/vendor/Eigen/src/Core/arch/AltiVec/MathFunctions.h +90 -0
  125. data/vendor/Eigen/src/Core/arch/AltiVec/MatrixProduct.h +2937 -0
  126. data/vendor/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h +221 -0
  127. data/vendor/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h +629 -0
  128. data/vendor/Eigen/src/Core/arch/AltiVec/PacketMath.h +2711 -0
  129. data/vendor/Eigen/src/Core/arch/CUDA/Complex.h +258 -0
  130. data/vendor/Eigen/src/Core/arch/Default/BFloat16.h +700 -0
  131. data/vendor/Eigen/src/Core/arch/Default/ConjHelper.h +117 -0
  132. data/vendor/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +1649 -0
  133. data/vendor/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +110 -0
  134. data/vendor/Eigen/src/Core/arch/Default/Half.h +942 -0
  135. data/vendor/Eigen/src/Core/arch/Default/Settings.h +49 -0
  136. data/vendor/Eigen/src/Core/arch/Default/TypeCasting.h +120 -0
  137. data/vendor/Eigen/src/Core/arch/GPU/MathFunctions.h +103 -0
  138. data/vendor/Eigen/src/Core/arch/GPU/PacketMath.h +1685 -0
  139. data/vendor/Eigen/src/Core/arch/GPU/TypeCasting.h +80 -0
  140. data/vendor/Eigen/src/Core/arch/HIP/hcc/math_constants.h +23 -0
  141. data/vendor/Eigen/src/Core/arch/MSA/Complex.h +648 -0
  142. data/vendor/Eigen/src/Core/arch/MSA/MathFunctions.h +387 -0
  143. data/vendor/Eigen/src/Core/arch/MSA/PacketMath.h +1233 -0
  144. data/vendor/Eigen/src/Core/arch/NEON/Complex.h +584 -0
  145. data/vendor/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h +183 -0
  146. data/vendor/Eigen/src/Core/arch/NEON/MathFunctions.h +75 -0
  147. data/vendor/Eigen/src/Core/arch/NEON/PacketMath.h +4587 -0
  148. data/vendor/Eigen/src/Core/arch/NEON/TypeCasting.h +1419 -0
  149. data/vendor/Eigen/src/Core/arch/SSE/Complex.h +351 -0
  150. data/vendor/Eigen/src/Core/arch/SSE/MathFunctions.h +199 -0
  151. data/vendor/Eigen/src/Core/arch/SSE/PacketMath.h +1505 -0
  152. data/vendor/Eigen/src/Core/arch/SSE/TypeCasting.h +142 -0
  153. data/vendor/Eigen/src/Core/arch/SVE/MathFunctions.h +44 -0
  154. data/vendor/Eigen/src/Core/arch/SVE/PacketMath.h +752 -0
  155. data/vendor/Eigen/src/Core/arch/SVE/TypeCasting.h +49 -0
  156. data/vendor/Eigen/src/Core/arch/SYCL/InteropHeaders.h +232 -0
  157. data/vendor/Eigen/src/Core/arch/SYCL/MathFunctions.h +301 -0
  158. data/vendor/Eigen/src/Core/arch/SYCL/PacketMath.h +670 -0
  159. data/vendor/Eigen/src/Core/arch/SYCL/SyclMemoryModel.h +694 -0
  160. data/vendor/Eigen/src/Core/arch/SYCL/TypeCasting.h +85 -0
  161. data/vendor/Eigen/src/Core/arch/ZVector/Complex.h +426 -0
  162. data/vendor/Eigen/src/Core/arch/ZVector/MathFunctions.h +233 -0
  163. data/vendor/Eigen/src/Core/arch/ZVector/PacketMath.h +1060 -0
  164. data/vendor/Eigen/src/Core/functors/AssignmentFunctors.h +177 -0
  165. data/vendor/Eigen/src/Core/functors/BinaryFunctors.h +541 -0
  166. data/vendor/Eigen/src/Core/functors/NullaryFunctors.h +189 -0
  167. data/vendor/Eigen/src/Core/functors/StlFunctors.h +166 -0
  168. data/vendor/Eigen/src/Core/functors/TernaryFunctors.h +25 -0
  169. data/vendor/Eigen/src/Core/functors/UnaryFunctors.h +1131 -0
  170. data/vendor/Eigen/src/Core/products/GeneralBlockPanelKernel.h +2645 -0
  171. data/vendor/Eigen/src/Core/products/GeneralMatrixMatrix.h +517 -0
  172. data/vendor/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +317 -0
  173. data/vendor/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +145 -0
  174. data/vendor/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h +124 -0
  175. data/vendor/Eigen/src/Core/products/GeneralMatrixVector.h +518 -0
  176. data/vendor/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h +136 -0
  177. data/vendor/Eigen/src/Core/products/Parallelizer.h +180 -0
  178. data/vendor/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +544 -0
  179. data/vendor/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h +295 -0
  180. data/vendor/Eigen/src/Core/products/SelfadjointMatrixVector.h +262 -0
  181. data/vendor/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h +118 -0
  182. data/vendor/Eigen/src/Core/products/SelfadjointProduct.h +133 -0
  183. data/vendor/Eigen/src/Core/products/SelfadjointRank2Update.h +94 -0
  184. data/vendor/Eigen/src/Core/products/TriangularMatrixMatrix.h +472 -0
  185. data/vendor/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h +317 -0
  186. data/vendor/Eigen/src/Core/products/TriangularMatrixVector.h +350 -0
  187. data/vendor/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h +255 -0
  188. data/vendor/Eigen/src/Core/products/TriangularSolverMatrix.h +337 -0
  189. data/vendor/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h +167 -0
  190. data/vendor/Eigen/src/Core/products/TriangularSolverVector.h +148 -0
  191. data/vendor/Eigen/src/Core/util/BlasUtil.h +583 -0
  192. data/vendor/Eigen/src/Core/util/ConfigureVectorization.h +512 -0
  193. data/vendor/Eigen/src/Core/util/Constants.h +563 -0
  194. data/vendor/Eigen/src/Core/util/DisableStupidWarnings.h +106 -0
  195. data/vendor/Eigen/src/Core/util/ForwardDeclarations.h +322 -0
  196. data/vendor/Eigen/src/Core/util/IndexedViewHelper.h +186 -0
  197. data/vendor/Eigen/src/Core/util/IntegralConstant.h +272 -0
  198. data/vendor/Eigen/src/Core/util/MKL_support.h +137 -0
  199. data/vendor/Eigen/src/Core/util/Macros.h +1464 -0
  200. data/vendor/Eigen/src/Core/util/Memory.h +1163 -0
  201. data/vendor/Eigen/src/Core/util/Meta.h +812 -0
  202. data/vendor/Eigen/src/Core/util/NonMPL2.h +3 -0
  203. data/vendor/Eigen/src/Core/util/ReenableStupidWarnings.h +31 -0
  204. data/vendor/Eigen/src/Core/util/ReshapedHelper.h +51 -0
  205. data/vendor/Eigen/src/Core/util/StaticAssert.h +221 -0
  206. data/vendor/Eigen/src/Core/util/SymbolicIndex.h +293 -0
  207. data/vendor/Eigen/src/Core/util/XprHelper.h +856 -0
  208. data/vendor/Eigen/src/Eigenvalues/ComplexEigenSolver.h +346 -0
  209. data/vendor/Eigen/src/Eigenvalues/ComplexSchur.h +462 -0
  210. data/vendor/Eigen/src/Eigenvalues/ComplexSchur_LAPACKE.h +91 -0
  211. data/vendor/Eigen/src/Eigenvalues/EigenSolver.h +622 -0
  212. data/vendor/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h +418 -0
  213. data/vendor/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +226 -0
  214. data/vendor/Eigen/src/Eigenvalues/HessenbergDecomposition.h +374 -0
  215. data/vendor/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +158 -0
  216. data/vendor/Eigen/src/Eigenvalues/RealQZ.h +657 -0
  217. data/vendor/Eigen/src/Eigenvalues/RealSchur.h +558 -0
  218. data/vendor/Eigen/src/Eigenvalues/RealSchur_LAPACKE.h +77 -0
  219. data/vendor/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +904 -0
  220. data/vendor/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h +87 -0
  221. data/vendor/Eigen/src/Eigenvalues/Tridiagonalization.h +561 -0
  222. data/vendor/Eigen/src/Geometry/AlignedBox.h +486 -0
  223. data/vendor/Eigen/src/Geometry/AngleAxis.h +247 -0
  224. data/vendor/Eigen/src/Geometry/EulerAngles.h +114 -0
  225. data/vendor/Eigen/src/Geometry/Homogeneous.h +501 -0
  226. data/vendor/Eigen/src/Geometry/Hyperplane.h +282 -0
  227. data/vendor/Eigen/src/Geometry/OrthoMethods.h +235 -0
  228. data/vendor/Eigen/src/Geometry/ParametrizedLine.h +232 -0
  229. data/vendor/Eigen/src/Geometry/Quaternion.h +870 -0
  230. data/vendor/Eigen/src/Geometry/Rotation2D.h +199 -0
  231. data/vendor/Eigen/src/Geometry/RotationBase.h +206 -0
  232. data/vendor/Eigen/src/Geometry/Scaling.h +188 -0
  233. data/vendor/Eigen/src/Geometry/Transform.h +1563 -0
  234. data/vendor/Eigen/src/Geometry/Translation.h +202 -0
  235. data/vendor/Eigen/src/Geometry/Umeyama.h +166 -0
  236. data/vendor/Eigen/src/Geometry/arch/Geometry_SIMD.h +168 -0
  237. data/vendor/Eigen/src/Householder/BlockHouseholder.h +110 -0
  238. data/vendor/Eigen/src/Householder/Householder.h +176 -0
  239. data/vendor/Eigen/src/Householder/HouseholderSequence.h +545 -0
  240. data/vendor/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +226 -0
  241. data/vendor/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +212 -0
  242. data/vendor/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +229 -0
  243. data/vendor/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +394 -0
  244. data/vendor/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +453 -0
  245. data/vendor/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +444 -0
  246. data/vendor/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +198 -0
  247. data/vendor/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +117 -0
  248. data/vendor/Eigen/src/Jacobi/Jacobi.h +483 -0
  249. data/vendor/Eigen/src/KLUSupport/KLUSupport.h +358 -0
  250. data/vendor/Eigen/src/LU/Determinant.h +117 -0
  251. data/vendor/Eigen/src/LU/FullPivLU.h +877 -0
  252. data/vendor/Eigen/src/LU/InverseImpl.h +432 -0
  253. data/vendor/Eigen/src/LU/PartialPivLU.h +624 -0
  254. data/vendor/Eigen/src/LU/PartialPivLU_LAPACKE.h +83 -0
  255. data/vendor/Eigen/src/LU/arch/InverseSize4.h +351 -0
  256. data/vendor/Eigen/src/MetisSupport/MetisSupport.h +137 -0
  257. data/vendor/Eigen/src/OrderingMethods/Amd.h +435 -0
  258. data/vendor/Eigen/src/OrderingMethods/Eigen_Colamd.h +1863 -0
  259. data/vendor/Eigen/src/OrderingMethods/Ordering.h +153 -0
  260. data/vendor/Eigen/src/PaStiXSupport/PaStiXSupport.h +678 -0
  261. data/vendor/Eigen/src/PardisoSupport/PardisoSupport.h +545 -0
  262. data/vendor/Eigen/src/QR/ColPivHouseholderQR.h +674 -0
  263. data/vendor/Eigen/src/QR/ColPivHouseholderQR_LAPACKE.h +97 -0
  264. data/vendor/Eigen/src/QR/CompleteOrthogonalDecomposition.h +635 -0
  265. data/vendor/Eigen/src/QR/FullPivHouseholderQR.h +713 -0
  266. data/vendor/Eigen/src/QR/HouseholderQR.h +434 -0
  267. data/vendor/Eigen/src/QR/HouseholderQR_LAPACKE.h +68 -0
  268. data/vendor/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +335 -0
  269. data/vendor/Eigen/src/SVD/BDCSVD.h +1366 -0
  270. data/vendor/Eigen/src/SVD/JacobiSVD.h +812 -0
  271. data/vendor/Eigen/src/SVD/JacobiSVD_LAPACKE.h +91 -0
  272. data/vendor/Eigen/src/SVD/SVDBase.h +376 -0
  273. data/vendor/Eigen/src/SVD/UpperBidiagonalization.h +414 -0
  274. data/vendor/Eigen/src/SparseCholesky/SimplicialCholesky.h +697 -0
  275. data/vendor/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +174 -0
  276. data/vendor/Eigen/src/SparseCore/AmbiVector.h +378 -0
  277. data/vendor/Eigen/src/SparseCore/CompressedStorage.h +274 -0
  278. data/vendor/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +352 -0
  279. data/vendor/Eigen/src/SparseCore/MappedSparseMatrix.h +67 -0
  280. data/vendor/Eigen/src/SparseCore/SparseAssign.h +270 -0
  281. data/vendor/Eigen/src/SparseCore/SparseBlock.h +571 -0
  282. data/vendor/Eigen/src/SparseCore/SparseColEtree.h +206 -0
  283. data/vendor/Eigen/src/SparseCore/SparseCompressedBase.h +370 -0
  284. data/vendor/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +722 -0
  285. data/vendor/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +150 -0
  286. data/vendor/Eigen/src/SparseCore/SparseDenseProduct.h +342 -0
  287. data/vendor/Eigen/src/SparseCore/SparseDiagonalProduct.h +138 -0
  288. data/vendor/Eigen/src/SparseCore/SparseDot.h +98 -0
  289. data/vendor/Eigen/src/SparseCore/SparseFuzzy.h +29 -0
  290. data/vendor/Eigen/src/SparseCore/SparseMap.h +305 -0
  291. data/vendor/Eigen/src/SparseCore/SparseMatrix.h +1518 -0
  292. data/vendor/Eigen/src/SparseCore/SparseMatrixBase.h +398 -0
  293. data/vendor/Eigen/src/SparseCore/SparsePermutation.h +178 -0
  294. data/vendor/Eigen/src/SparseCore/SparseProduct.h +181 -0
  295. data/vendor/Eigen/src/SparseCore/SparseRedux.h +49 -0
  296. data/vendor/Eigen/src/SparseCore/SparseRef.h +397 -0
  297. data/vendor/Eigen/src/SparseCore/SparseSelfAdjointView.h +659 -0
  298. data/vendor/Eigen/src/SparseCore/SparseSolverBase.h +124 -0
  299. data/vendor/Eigen/src/SparseCore/SparseSparseProductWithPruning.h +198 -0
  300. data/vendor/Eigen/src/SparseCore/SparseTranspose.h +92 -0
  301. data/vendor/Eigen/src/SparseCore/SparseTriangularView.h +189 -0
  302. data/vendor/Eigen/src/SparseCore/SparseUtil.h +186 -0
  303. data/vendor/Eigen/src/SparseCore/SparseVector.h +478 -0
  304. data/vendor/Eigen/src/SparseCore/SparseView.h +254 -0
  305. data/vendor/Eigen/src/SparseCore/TriangularSolver.h +315 -0
  306. data/vendor/Eigen/src/SparseLU/SparseLU.h +923 -0
  307. data/vendor/Eigen/src/SparseLU/SparseLUImpl.h +66 -0
  308. data/vendor/Eigen/src/SparseLU/SparseLU_Memory.h +226 -0
  309. data/vendor/Eigen/src/SparseLU/SparseLU_Structs.h +110 -0
  310. data/vendor/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +375 -0
  311. data/vendor/Eigen/src/SparseLU/SparseLU_Utils.h +80 -0
  312. data/vendor/Eigen/src/SparseLU/SparseLU_column_bmod.h +181 -0
  313. data/vendor/Eigen/src/SparseLU/SparseLU_column_dfs.h +179 -0
  314. data/vendor/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h +107 -0
  315. data/vendor/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +280 -0
  316. data/vendor/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h +126 -0
  317. data/vendor/Eigen/src/SparseLU/SparseLU_kernel_bmod.h +130 -0
  318. data/vendor/Eigen/src/SparseLU/SparseLU_panel_bmod.h +223 -0
  319. data/vendor/Eigen/src/SparseLU/SparseLU_panel_dfs.h +258 -0
  320. data/vendor/Eigen/src/SparseLU/SparseLU_pivotL.h +137 -0
  321. data/vendor/Eigen/src/SparseLU/SparseLU_pruneL.h +136 -0
  322. data/vendor/Eigen/src/SparseLU/SparseLU_relax_snode.h +83 -0
  323. data/vendor/Eigen/src/SparseQR/SparseQR.h +758 -0
  324. data/vendor/Eigen/src/StlSupport/StdDeque.h +116 -0
  325. data/vendor/Eigen/src/StlSupport/StdList.h +106 -0
  326. data/vendor/Eigen/src/StlSupport/StdVector.h +131 -0
  327. data/vendor/Eigen/src/StlSupport/details.h +84 -0
  328. data/vendor/Eigen/src/SuperLUSupport/SuperLUSupport.h +1025 -0
  329. data/vendor/Eigen/src/UmfPackSupport/UmfPackSupport.h +642 -0
  330. data/vendor/Eigen/src/misc/Image.h +82 -0
  331. data/vendor/Eigen/src/misc/Kernel.h +79 -0
  332. data/vendor/Eigen/src/misc/RealSvd2x2.h +55 -0
  333. data/vendor/Eigen/src/misc/blas.h +440 -0
  334. data/vendor/Eigen/src/misc/lapack.h +152 -0
  335. data/vendor/Eigen/src/misc/lapacke.h +16292 -0
  336. data/vendor/Eigen/src/misc/lapacke_mangling.h +17 -0
  337. data/vendor/Eigen/src/plugins/ArrayCwiseBinaryOps.h +358 -0
  338. data/vendor/Eigen/src/plugins/ArrayCwiseUnaryOps.h +696 -0
  339. data/vendor/Eigen/src/plugins/BlockMethods.h +1442 -0
  340. data/vendor/Eigen/src/plugins/CommonCwiseBinaryOps.h +115 -0
  341. data/vendor/Eigen/src/plugins/CommonCwiseUnaryOps.h +177 -0
  342. data/vendor/Eigen/src/plugins/IndexedViewMethods.h +262 -0
  343. data/vendor/Eigen/src/plugins/MatrixCwiseBinaryOps.h +152 -0
  344. data/vendor/Eigen/src/plugins/MatrixCwiseUnaryOps.h +95 -0
  345. data/vendor/Eigen/src/plugins/ReshapedMethods.h +149 -0
  346. data/vendor/aarand/aarand.hpp +114 -0
  347. data/vendor/annoy/annoylib.h +1495 -0
  348. data/vendor/annoy/kissrandom.h +120 -0
  349. data/vendor/annoy/mman.h +242 -0
  350. data/vendor/hnswlib/bruteforce.h +152 -0
  351. data/vendor/hnswlib/hnswalg.h +1192 -0
  352. data/vendor/hnswlib/hnswlib.h +108 -0
  353. data/vendor/hnswlib/space_ip.h +282 -0
  354. data/vendor/hnswlib/space_l2.h +281 -0
  355. data/vendor/hnswlib/visited_list_pool.h +79 -0
  356. data/vendor/irlba/irlba.hpp +575 -0
  357. data/vendor/irlba/lanczos.hpp +212 -0
  358. data/vendor/irlba/parallel.hpp +474 -0
  359. data/vendor/irlba/utils.hpp +224 -0
  360. data/vendor/irlba/wrappers.hpp +228 -0
  361. data/vendor/kmeans/Base.hpp +75 -0
  362. data/vendor/kmeans/Details.hpp +79 -0
  363. data/vendor/kmeans/HartiganWong.hpp +492 -0
  364. data/vendor/kmeans/InitializeKmeansPP.hpp +144 -0
  365. data/vendor/kmeans/InitializeNone.hpp +44 -0
  366. data/vendor/kmeans/InitializePCAPartition.hpp +309 -0
  367. data/vendor/kmeans/InitializeRandom.hpp +91 -0
  368. data/vendor/kmeans/Kmeans.hpp +161 -0
  369. data/vendor/kmeans/Lloyd.hpp +134 -0
  370. data/vendor/kmeans/MiniBatch.hpp +269 -0
  371. data/vendor/kmeans/QuickSearch.hpp +179 -0
  372. data/vendor/kmeans/compute_centroids.hpp +32 -0
  373. data/vendor/kmeans/compute_wcss.hpp +27 -0
  374. data/vendor/kmeans/is_edge_case.hpp +42 -0
  375. data/vendor/kmeans/random.hpp +55 -0
  376. data/vendor/knncolle/Annoy/Annoy.hpp +193 -0
  377. data/vendor/knncolle/BruteForce/BruteForce.hpp +120 -0
  378. data/vendor/knncolle/Hnsw/Hnsw.hpp +225 -0
  379. data/vendor/knncolle/Kmknn/Kmknn.hpp +286 -0
  380. data/vendor/knncolle/VpTree/VpTree.hpp +256 -0
  381. data/vendor/knncolle/knncolle.hpp +34 -0
  382. data/vendor/knncolle/utils/Base.hpp +100 -0
  383. data/vendor/knncolle/utils/NeighborQueue.hpp +94 -0
  384. data/vendor/knncolle/utils/distances.hpp +98 -0
  385. data/vendor/knncolle/utils/find_nearest_neighbors.hpp +112 -0
  386. data/vendor/powerit/PowerIterations.hpp +157 -0
  387. data/vendor/umappp/NeighborList.hpp +37 -0
  388. data/vendor/umappp/Umap.hpp +662 -0
  389. data/vendor/umappp/combine_neighbor_sets.hpp +95 -0
  390. data/vendor/umappp/find_ab.hpp +157 -0
  391. data/vendor/umappp/neighbor_similarities.hpp +136 -0
  392. data/vendor/umappp/optimize_layout.hpp +285 -0
  393. data/vendor/umappp/spectral_init.hpp +181 -0
  394. data/vendor/umappp/umappp.hpp +13 -0
  395. metadata +465 -0
@@ -0,0 +1,269 @@
1
+ #ifndef KMEANS_MINIBATCH_HPP
2
+ #define KMEANS_MINIBATCH_HPP
3
+
4
+ #include <vector>
5
+ #include <algorithm>
6
+ #include <numeric>
7
+ #include <cstdint>
8
+ #include <stdexcept>
9
+ #include <limits>
10
+ #include <random>
11
+
12
+ #include "Base.hpp"
13
+ #include "Details.hpp"
14
+ #include "QuickSearch.hpp"
15
+ #include "compute_wcss.hpp"
16
+ #include "is_edge_case.hpp"
17
+ #include "random.hpp"
18
+
19
+ /**
20
+ * @file MiniBatch.hpp
21
+ *
22
+ * @brief Implements the mini-batch algorithm for k-means clustering.
23
+ */
24
+
25
+ namespace kmeans {
26
+
27
+ /**
28
+ * @brief Implements the mini-batch algorithm for k-means clustering.
29
+ *
30
+ * The mini-batch approach is similar to Lloyd's algorithm in that it runs through a set of observations, assigns each to the closest centroid, updates the centroids and repeats.
31
+ * The key difference is that each iteration is performed with a random subset of observations (i.e., a "mini-batch"), instead of the full set of observations.
32
+ * This reduces computational time at the cost of some solution quality.
33
+ * In theory, it can also reduce memory usage, though this is not particularly relevant here as we are already assuming that the data can be fully stored in memory.
34
+ *
35
+ * The update procedure for a cluster's centroid involves adjusting the coordinates by the assigned observations in the mini-batch.
36
+ * The resulting vector can be interpreted as the mean of all observations that have ever been sampled (possibly multiple times) to that cluster.
37
+ * Thus, the magnitude of the updates will decrease in later iterations as the relative effect of newly sampled points is reduced.
38
+ * This ensures that the centroids will stabilize at a sufficiently large number of iterations.
39
+ *
40
+ * We may stop the algorithm before the maximum number of iterations if only a few observations are reassigned at each iteration.
41
+ * Specifically, every \f$h\f$ iterations, we compute the proportion of sampled observations for each cluster in the past \f$h\f$ mini-batches that were reassigned to/from that cluster.
42
+ * If this proportion is less than some threshold \f$p\f$ for all clusters, we consider that the algorithm has converged.
43
+ *
44
+ * @tparam DATA_t Floating-point type for the data and centroids.
45
+ * @tparam CLUSTER_t Integer type for the cluster assignments.
46
+ * @tparam INDEX_t Integer type for the observation index.
47
+ */
48
+ template<typename DATA_t = double, typename CLUSTER_t = int, typename INDEX_t = int>
49
+ class MiniBatch : public Refine<DATA_t, CLUSTER_t, INDEX_t> {
50
+ public:
51
+ /**
52
+ * @brief Default parameter values for `MiniBatch`.
53
+ */
54
+ struct Defaults {
55
+ /**
56
+ * See `MiniBatch::set_max_iterations()`.
57
+ */
58
+ static constexpr int max_iterations = 100;
59
+
60
+ /**
61
+ * See `MiniBatch::set_batch_size()`.
62
+ */
63
+ static constexpr INDEX_t batch_size = 500;
64
+
65
+ /**
66
+ * See `MiniBatch::set_max_change_proportion()`.
67
+ */
68
+ static constexpr double max_change_proportion = 0.01;
69
+
70
+ /**
71
+ * See `MiniBatch::set_convergence_history()`.
72
+ */
73
+ static constexpr int convergence_history = 10;
74
+
75
+ /**
76
+ * See `MiniBatch::set_seed()`.
77
+ */
78
+ static constexpr uint64_t seed = 1234567890;
79
+ };
80
+
81
+ private:
82
+ int maxiter = Defaults::max_iterations;
83
+
84
+ INDEX_t batch_size = Defaults::batch_size;
85
+
86
+ int history = Defaults::convergence_history;
87
+
88
+ double max_change = Defaults::max_change_proportion;
89
+
90
+ uint64_t seed = Defaults::seed;
91
+ public:
92
+ /**
93
+ * @param i Maximum number of iterations.
94
+ * More iterations increase the opportunity for convergence at the cost of more computational time.
95
+ *
96
+ * @return A reference to this `MiniBatch` object.
97
+ */
98
+ MiniBatch& set_max_iterations(int i = Defaults::max_iterations) {
99
+ maxiter = i;
100
+ return *this;
101
+ }
102
+
103
+ /**
104
+ * @param s Number of observations in the mini-batch.
105
+ * Larger numbers improve quality at the cost of computational time.
106
+ *
107
+ * @return A reference to this `MiniBatch` object.
108
+ */
109
+ MiniBatch& set_batch_size(INDEX_t s = Defaults::batch_size) {
110
+ batch_size = s;
111
+ return *this;
112
+ }
113
+
114
+ /**
115
+ * @param p Maximum proportion of observations in each cluster that can be reassigned when checking for convergence (i.e., \f$p\f$).
116
+ * Lower values improve the quality of the result at the cost of computational time.
117
+ *
118
+ * @return A reference to this `MiniBatch` object.
119
+ */
120
+ MiniBatch& set_max_change_proportion(double p = Defaults::max_change_proportion) {
121
+ max_change = p;
122
+ return *this;
123
+ }
124
+
125
+ /**
126
+ * @param h Number of iterations to remember when checking for convergence (i.e., \f$h\f$).
127
+ * Larger values improve the quality of the result at the cost of computational time.
128
+ *
129
+ * @return A reference to this `MiniBatch` object.
130
+ */
131
+ MiniBatch& set_convergence_history(int h = Defaults::convergence_history) {
132
+ history = h;
133
+ return *this;
134
+ }
135
+
136
+ /**
137
+ * @param s Seed to use for PRNG when sampling observations to use in each mini-batch.
138
+ *
139
+ * @return A reference to this `MiniBatch` object.
140
+ */
141
+ MiniBatch& set_seed(uint64_t s = Defaults::seed) {
142
+ seed = s;
143
+ return *this;
144
+ }
145
+
146
+ public:
147
+ /**
148
+ * @param ndim Number of dimensions.
149
+ * @param nobs Number of observations.
150
+ * @param[in] data Pointer to a `ndim`-by-`nobs` array where columns are observations and rows are dimensions.
151
+ * Data should be stored in column-major order.
152
+ * @param ncenters Number of cluster centers.
153
+ * @param[in, out] centers Pointer to a `ndim`-by-`ncenters` array where columns are cluster centers and rows are dimensions.
154
+ * On input, this should contain the initial centroid locations for each cluster.
155
+ * Data should be stored in column-major order.
156
+ * On output, this will contain the final centroid locations for each cluster.
157
+ * @param[out] clusters Pointer to an array of length `nobs`.
158
+ * On output, this will contain the cluster assignment for each observation.
159
+ *
160
+ * @return `centers` and `clusters` are filled, and a `Details` object is returned containing clustering statistics.
161
+ * If `ncenters > nobs`, only the first `nobs` columns of the `centers` array will be filled.
162
+ */
163
+ Details<DATA_t, INDEX_t> run(int ndim, INDEX_t nobs, const DATA_t* data, CLUSTER_t ncenters, DATA_t* centers, CLUSTER_t* clusters) {
164
+ if (is_edge_case(nobs, ncenters)) {
165
+ return process_edge_case(ndim, nobs, data, ncenters, centers, clusters);
166
+ }
167
+
168
+ int iter = 0, status = 0;
169
+ std::vector<INDEX_t> total_sampled(ncenters);
170
+ std::vector<CLUSTER_t> previous(nobs);
171
+ std::vector<INDEX_t> last_changed(ncenters), last_sampled(ncenters);
172
+
173
+ auto actual_batch_size = std::min(batch_size, nobs);
174
+ int last_updated = 0;
175
+ std::mt19937_64 eng(seed);
176
+
177
+ for (iter = 1; iter <= maxiter; ++iter) {
178
+ auto chosen = sample_without_replacement(nobs, actual_batch_size, eng);
179
+ if (iter > 1) {
180
+ for (auto o : chosen) {
181
+ previous[o] = clusters[o];
182
+ }
183
+ }
184
+
185
+ QuickSearch<DATA_t, CLUSTER_t> index(ndim, ncenters, centers);
186
+ #pragma omp parallel for
187
+ for (size_t i = 0; i < chosen.size(); ++i) {
188
+ clusters[chosen[i]] = index.find(data + chosen[i] * ndim);
189
+ }
190
+
191
+ // Updating the means for each cluster.
192
+ for (auto o : chosen) {
193
+ const auto c = clusters[o];
194
+ auto& n = total_sampled[c];
195
+ ++n;
196
+
197
+ auto ocopy = data + o * ndim;
198
+ auto ccopy = centers + c * ndim;
199
+ for (int d = 0; d < ndim; ++d, ++ocopy, ++ccopy) {
200
+ (*ccopy) += (*ocopy - *ccopy)/n;
201
+ }
202
+ }
203
+
204
+ // Checking for updates.
205
+ if (iter != 1) {
206
+ for (auto o : chosen) {
207
+ ++(last_sampled[previous[o]]);
208
+ if (previous[o] != clusters[o]) {
209
+ ++(last_sampled[clusters[o]]);
210
+ ++(last_changed[previous[o]]);
211
+ ++(last_changed[clusters[o]]);
212
+ }
213
+ }
214
+
215
+ if (iter % history == 1) {
216
+ bool too_many_changes = false;
217
+ for (CLUSTER_t c = 0; c < ncenters; ++c) {
218
+ if (static_cast<double>(last_changed[c]) >= last_sampled[c] * max_change) {
219
+ too_many_changes = true;
220
+ break;
221
+ }
222
+ }
223
+
224
+ if (!too_many_changes) {
225
+ break;
226
+ }
227
+ std::fill(last_sampled.begin(), last_sampled.end(), 0);
228
+ std::fill(last_changed.begin(), last_changed.end(), 0);
229
+ }
230
+ }
231
+ }
232
+
233
+ if (iter == maxiter + 1) {
234
+ status = 2;
235
+ }
236
+
237
+ // Run through all observations to make sure they have the latest cluster assignments.
238
+ QuickSearch<DATA_t, CLUSTER_t> index(ndim, ncenters, centers);
239
+ #pragma omp parallel for
240
+ for (INDEX_t o = 0; o < nobs; ++o) {
241
+ clusters[o] = index.find(data + o * ndim);
242
+ }
243
+
244
+ std::fill(total_sampled.begin(), total_sampled.end(), 0);
245
+ for (INDEX_t o = 0; o < nobs; ++o) {
246
+ ++total_sampled[clusters[o]];
247
+ }
248
+
249
+ for (auto c : total_sampled) {
250
+ if (c == 0) {
251
+ status = 1;
252
+ break;
253
+ }
254
+ }
255
+
256
+ compute_centroids(ndim, nobs, data, ncenters, centers, clusters, total_sampled);
257
+
258
+ return Details<DATA_t, INDEX_t>(
259
+ std::move(total_sampled),
260
+ compute_wcss(ndim, nobs, data, ncenters, centers, clusters),
261
+ iter,
262
+ status
263
+ );
264
+ }
265
+ };
266
+
267
+ }
268
+
269
+ #endif
@@ -0,0 +1,179 @@
1
+ #ifndef KMEANS_QUICKSEARCH_HPP
2
+ #define KMEANS_QUICKSEARCH_HPP
3
+
4
+ #include <vector>
5
+ #include <random>
6
+ #include <limits>
7
+ #include <cmath>
8
+ #include <tuple>
9
+ #include <iostream>
10
+
11
+ namespace kmeans {
12
+
13
+ /* Adapted from http://stevehanov.ca/blog/index.php?id=130 */
14
+ template<typename DATA_t = double, typename CLUSTER_t = int>
15
+ class QuickSearch {
16
+ private:
17
+ int num_dim;
18
+ CLUSTER_t num_obs;
19
+ const DATA_t* reference;
20
+
21
+ static DATA_t normalize(DATA_t x) {
22
+ return std::sqrt(x);
23
+ }
24
+
25
+ static DATA_t raw_distance(const DATA_t* x, const DATA_t* y, int ndim) {
26
+ DATA_t output = 0;
27
+ for (int i = 0; i < ndim; ++i, ++x, ++y) {
28
+ output += (*x - *y) * (*x - *y);
29
+ }
30
+ return output;
31
+ }
32
+
33
+ private:
34
+ typedef int NodeIndex_t;
35
+ static const NodeIndex_t LEAF_MARKER=-1;
36
+
37
+ // Single node of a VP tree (has a point and radius; left children are closer to point than the radius)
38
+ struct Node {
39
+ DATA_t threshold; // radius
40
+ CLUSTER_t index; // original index of current vantage point
41
+ NodeIndex_t left; // node index of the next vantage point for all children closer than 'threshold' from the current vantage point
42
+ NodeIndex_t right; // node index of the next vantage point for all children further than 'threshold' from the current vantage point
43
+ Node(NodeIndex_t i=0) : threshold(0), index(i), left(LEAF_MARKER), right(LEAF_MARKER) {}
44
+ };
45
+ std::vector<Node> nodes;
46
+
47
+ typedef std::tuple<CLUSTER_t, const DATA_t*, DATA_t> DataPoint; // internal distances computed using "DATA_t" type, even if output is returned with DISTANCE_t.
48
+
49
+ template<class SAMPLER>
50
+ NodeIndex_t buildFromPoints(NodeIndex_t lower, NodeIndex_t upper, std::vector<DataPoint>& items, SAMPLER& rng) {
51
+ if (upper == lower) { // indicates that we're done here!
52
+ return LEAF_MARKER;
53
+ }
54
+
55
+ NodeIndex_t pos = nodes.size();
56
+ nodes.resize(pos + 1);
57
+ Node& node=nodes.back();
58
+
59
+ int gap = upper - lower;
60
+ if (gap > 1) { // if we did not arrive at leaf yet
61
+
62
+ /* Choose an arbitrary point and move it to the start of the [lower, upper)
63
+ * interval in 'items'; this is our new vantage point.
64
+ *
65
+ * Yes, I know that the modulo method does not provide strictly
66
+ * uniform values but statistical correctness doesn't really matter
67
+ * here... but reproducibility across platforms does matter, and
68
+ * std::uniform_int_distribution is implementation-dependent!
69
+ */
70
+ NodeIndex_t i = static_cast<NodeIndex_t>(rng() % gap + lower);
71
+ std::swap(items[lower], items[i]);
72
+ const auto& vantage = items[lower];
73
+
74
+ // Compute distances to the new vantage point.
75
+ const DATA_t* ref = std::get<1>(vantage);
76
+ for (size_t i = lower + 1; i < upper; ++i) {
77
+ const DATA_t* loc = std::get<1>(items[i]);
78
+ std::get<2>(items[i]) = raw_distance(ref, loc, num_dim);
79
+ }
80
+
81
+ // Partition around the median distance from the vantage point.
82
+ NodeIndex_t median = lower + gap/2;
83
+ std::nth_element(items.begin() + lower + 1, items.begin() + median, items.begin() + upper,
84
+ [&](const DataPoint& left, const DataPoint& right) -> bool {
85
+ return std::get<2>(left) < std::get<2>(right);
86
+ }
87
+ );
88
+
89
+ // Threshold of the new node will be the distance to the median
90
+ node.threshold = normalize(std::get<2>(items[median]));
91
+
92
+ // Recursively build tree
93
+ node.index = std::get<0>(vantage);
94
+ node.left = buildFromPoints(lower + 1, median, items, rng);
95
+ node.right = buildFromPoints(median, upper, items, rng);
96
+ } else {
97
+ node.index = std::get<0>(items[lower]);
98
+ }
99
+
100
+ return pos;
101
+ }
102
+
103
+ private:
104
+ template<typename INPUT_t>
105
+ void search_nn(NodeIndex_t curnode_index, const INPUT_t* target, CLUSTER_t& closest, DATA_t& tau) const {
106
+ if (curnode_index == LEAF_MARKER) { // indicates that we're done here
107
+ return;
108
+ }
109
+
110
+ // Compute distance between target and current node
111
+ const auto& curnode=nodes[curnode_index];
112
+ DATA_t dist = normalize(raw_distance(reference + curnode.index * num_dim, target, num_dim));
113
+
114
+ // If current node within radius tau
115
+ if (dist < tau) {
116
+ closest = curnode.index;
117
+ tau = dist;
118
+ }
119
+
120
+ // Return if we arrived at a leaf
121
+ if (curnode.left == LEAF_MARKER && curnode.right == LEAF_MARKER) {
122
+ return;
123
+ }
124
+
125
+ // If the target lies within the radius of ball
126
+ if (dist < curnode.threshold) {
127
+ if (dist - tau <= curnode.threshold) { // if there can still be neighbors inside the ball, recursively search left child first
128
+ search_nn(curnode.left, target, closest, tau);
129
+ }
130
+
131
+ if (dist + tau >= curnode.threshold) { // if there can still be neighbors outside the ball, recursively search right child
132
+ search_nn(curnode.right, target, closest, tau);
133
+ }
134
+
135
+ // If the target lies outsize the radius of the ball
136
+ } else {
137
+ if (dist + tau >= curnode.threshold) { // if there can still be neighbors outside the ball, recursively search right child first
138
+ search_nn(curnode.right, target, closest, tau);
139
+ }
140
+
141
+ if (dist - tau <= curnode.threshold) { // if there can still be neighbors inside the ball, recursively search left child
142
+ search_nn(curnode.left, target, closest, tau);
143
+ }
144
+ }
145
+ }
146
+
147
+ public:
148
+ QuickSearch(CLUSTER_t ndim, CLUSTER_t nobs, const DATA_t* vals) : num_dim(ndim), num_obs(nobs), reference(vals) {
149
+ std::vector<DataPoint> items;
150
+ items.reserve(num_obs);
151
+ auto ptr = vals;
152
+ for (CLUSTER_t i = 0; i < num_obs; ++i, ptr += num_dim) {
153
+ items.push_back(DataPoint(i, ptr, 0));
154
+ }
155
+
156
+ nodes.reserve(num_obs);
157
+ std::mt19937_64 rand(1234567890); // seed doesn't really matter, we don't need statistical correctness here.
158
+ buildFromPoints(0, num_obs, items, rand);
159
+ return;
160
+ }
161
+
162
+ CLUSTER_t find(const DATA_t* query) const {
163
+ DATA_t tau = std::numeric_limits<DATA_t>::max();
164
+ CLUSTER_t closest = 0;
165
+ search_nn(0, query, closest, tau);
166
+ return closest;
167
+ }
168
+
169
+ std::pair<CLUSTER_t, DATA_t> find_with_distance(const DATA_t* query) const {
170
+ DATA_t tau = std::numeric_limits<DATA_t>::max();
171
+ CLUSTER_t closest = 0;
172
+ search_nn(0, query, closest, tau);
173
+ return std::make_pair(closest, tau);
174
+ }
175
+ };
176
+
177
+ }
178
+
179
+ #endif
@@ -0,0 +1,32 @@
1
+ #ifndef KMEANS_COMPUTE_CENTROIDS_HPP
2
+ #define KMEANS_COMPUTE_CENTROIDS_HPP
3
+
4
+ #include <algorithm>
5
+
6
+ namespace kmeans {
7
+
8
+ template<typename DATA_t = double, typename INDEX_t = int, typename CLUSTER_t = int, class V>
9
+ void compute_centroids(int ndim, INDEX_t nobs, const DATA_t* data, CLUSTER_t ncenters, DATA_t* centers, const CLUSTER_t* clusters, const V& sizes) {
10
+ std::fill(centers, centers + ndim * ncenters, 0);
11
+
12
+ for (INDEX_t obs = 0; obs < nobs; ++obs) {
13
+ auto copy = centers + clusters[obs] * ndim;
14
+ auto mine = data + obs * ndim;
15
+ for (int dim = 0; dim < ndim; ++dim, ++copy, ++mine) {
16
+ *copy += *mine;
17
+ }
18
+ }
19
+
20
+ for (CLUSTER_t cen = 0; cen < ncenters; ++cen) {
21
+ if (sizes[cen]) {
22
+ auto curcenter = centers + cen * ndim;
23
+ for (int dim = 0; dim < ndim; ++dim, ++curcenter) {
24
+ *curcenter /= sizes[cen];
25
+ }
26
+ }
27
+ }
28
+ }
29
+
30
+ }
31
+
32
+ #endif
@@ -0,0 +1,27 @@
1
+ #ifndef KMEANS_COMPUTE_WCSS_HPP
2
+ #define KMEANS_COMPUTE_WCSS_HPP
3
+
4
+ #include <vector>
5
+
6
+ namespace kmeans {
7
+
8
+ template<typename DATA_t = double, typename INDEX_t = int, typename CLUSTER_t = int>
9
+ std::vector<DATA_t> compute_wcss(int ndim, INDEX_t nobs, const DATA_t* data, CLUSTER_t ncenters, const DATA_t* centers, const CLUSTER_t* clusters) {
10
+ std::vector<DATA_t> wcss(ncenters);
11
+ for (INDEX_t obs = 0; obs < nobs; ++obs) {
12
+ auto cen = clusters[obs];
13
+ auto curcenter = centers + cen * ndim;
14
+ auto& curwcss = wcss[cen];
15
+
16
+ auto curdata = data + obs * ndim;
17
+ for (int dim = 0; dim < ndim; ++dim, ++curcenter, ++curdata) {
18
+ curwcss += (*curdata - *curcenter) * (*curdata - *curcenter);
19
+ }
20
+ }
21
+
22
+ return wcss;
23
+ }
24
+
25
+ }
26
+
27
+ #endif
@@ -0,0 +1,42 @@
1
+ #ifndef KMEANS_IS_EDGE_CASE_HPP
2
+ #define KMEANS_IS_EDGE_CASE_HPP
3
+
4
+ #include <numeric>
5
+ #include <algorithm>
6
+ #include "compute_wcss.hpp"
7
+ #include "compute_centroids.hpp"
8
+
9
+ namespace kmeans {
10
+
11
+ template<typename INDEX_t = int, typename CLUSTER_t = int>
12
+ bool is_edge_case(INDEX_t nobs, CLUSTER_t ncenters) {
13
+ return (ncenters <= 1 || static_cast<INDEX_t>(ncenters) >= nobs);
14
+ }
15
+
16
+ template<typename DATA_t = double, typename INDEX_t = int, typename CLUSTER_t = int>
17
+ Details<DATA_t, INDEX_t> process_edge_case(int ndim, INDEX_t nobs, const DATA_t* data, CLUSTER_t ncenters, DATA_t* centers, CLUSTER_t* clusters) {
18
+ if (ncenters == 1) {
19
+ // All points in cluster 0.
20
+ std::fill(clusters, clusters + nobs, 0);
21
+ std::vector<INDEX_t> sizes(1, nobs);
22
+ compute_centroids(ndim, nobs, data, ncenters, centers, clusters, sizes);
23
+ auto wcss = compute_wcss(ndim, nobs, data, ncenters, centers, clusters);
24
+ return Details(std::move(sizes), std::move(wcss), 0, 0);
25
+
26
+ } else if (ncenters >= nobs) {
27
+ // Special case, each observation is a center.
28
+ std::iota(clusters, clusters + nobs, 0);
29
+ std::vector<INDEX_t> sizes(ncenters);
30
+ std::fill(sizes.begin(), sizes.begin() + nobs, 1);
31
+ compute_centroids(ndim, nobs, data, ncenters, centers, clusters, sizes);
32
+ auto wcss = compute_wcss(ndim, nobs, data, ncenters, centers, clusters);
33
+ return Details(std::move(sizes), std::move(wcss), 0, (ncenters > nobs ? 3 : 0));
34
+
35
+ } else { //i.e., ncenters == 0, provided is_edge_case is true.
36
+ return Details<DATA_t, INDEX_t>(0, 3);
37
+ }
38
+ }
39
+
40
+ }
41
+
42
+ #endif
@@ -0,0 +1,55 @@
1
+ #ifndef KMEANS_RANDOM_HPP
2
+ #define KMEANS_RANDOM_HPP
3
+
4
+ #include <vector>
5
+ #include <numeric>
6
+ #include "aarand/aarand.hpp"
7
+
8
+ namespace kmeans {
9
+
10
+ template<typename T = int, class ENGINE>
11
+ std::vector<T> sample_without_replacement(T population, size_t choose, ENGINE& eng) {
12
+ std::vector<T> sofar;
13
+
14
+ if (choose >= population) {
15
+ sofar.resize(population);
16
+ std::iota(sofar.begin(), sofar.end(), 0);
17
+ } else {
18
+ sofar.reserve(choose);
19
+ T traversed = 0;
20
+
21
+ while (sofar.size() < choose) {
22
+ if (static_cast<double>(choose - sofar.size()) > static_cast<double>(population - traversed) * aarand::standard_uniform(eng)) {
23
+ sofar.push_back(traversed);
24
+ }
25
+ ++traversed;
26
+ }
27
+ }
28
+
29
+ return sofar;
30
+ }
31
+
32
+ template<typename DATA_t, typename INDEX_t, class ENGINE>
33
+ INDEX_t weighted_sample(const std::vector<DATA_t>& cumulative, const std::vector<DATA_t>& mindist, INDEX_t nobs, ENGINE& eng) {
34
+ auto total = cumulative.back();
35
+ INDEX_t chosen_id = 0;
36
+
37
+ do {
38
+ const DATA_t sampled_weight = total * aarand::standard_uniform(eng);
39
+ chosen_id = std::lower_bound(cumulative.begin(), cumulative.end(), sampled_weight) - cumulative.begin();
40
+
41
+ // We wrap this in a do/while to defend against edge cases where
42
+ // ties are chosen. The most obvious of these is when you get a
43
+ // `sampled_weight` of zero _and_ there exists a bunch of zeros at
44
+ // the start of `cumulative`. One could also get unexpected ties
45
+ // from limited precision in floating point comparisons, so we'll
46
+ // just be safe and implement a loop here, in the same vein as
47
+ // uniform01.
48
+ } while (chosen_id == nobs || mindist[chosen_id] == 0);
49
+
50
+ return chosen_id;
51
+ }
52
+
53
+ }
54
+
55
+ #endif