umappp 0.1.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (395) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE.txt +25 -0
  3. data/README.md +110 -0
  4. data/ext/umappp/extconf.rb +25 -0
  5. data/ext/umappp/numo.hpp +867 -0
  6. data/ext/umappp/umappp.cpp +225 -0
  7. data/lib/umappp/version.rb +5 -0
  8. data/lib/umappp.rb +41 -0
  9. data/vendor/Eigen/Cholesky +45 -0
  10. data/vendor/Eigen/CholmodSupport +48 -0
  11. data/vendor/Eigen/Core +384 -0
  12. data/vendor/Eigen/Dense +7 -0
  13. data/vendor/Eigen/Eigen +2 -0
  14. data/vendor/Eigen/Eigenvalues +60 -0
  15. data/vendor/Eigen/Geometry +59 -0
  16. data/vendor/Eigen/Householder +29 -0
  17. data/vendor/Eigen/IterativeLinearSolvers +48 -0
  18. data/vendor/Eigen/Jacobi +32 -0
  19. data/vendor/Eigen/KLUSupport +41 -0
  20. data/vendor/Eigen/LU +47 -0
  21. data/vendor/Eigen/MetisSupport +35 -0
  22. data/vendor/Eigen/OrderingMethods +70 -0
  23. data/vendor/Eigen/PaStiXSupport +49 -0
  24. data/vendor/Eigen/PardisoSupport +35 -0
  25. data/vendor/Eigen/QR +50 -0
  26. data/vendor/Eigen/QtAlignedMalloc +39 -0
  27. data/vendor/Eigen/SPQRSupport +34 -0
  28. data/vendor/Eigen/SVD +50 -0
  29. data/vendor/Eigen/Sparse +34 -0
  30. data/vendor/Eigen/SparseCholesky +37 -0
  31. data/vendor/Eigen/SparseCore +69 -0
  32. data/vendor/Eigen/SparseLU +50 -0
  33. data/vendor/Eigen/SparseQR +36 -0
  34. data/vendor/Eigen/StdDeque +27 -0
  35. data/vendor/Eigen/StdList +26 -0
  36. data/vendor/Eigen/StdVector +27 -0
  37. data/vendor/Eigen/SuperLUSupport +64 -0
  38. data/vendor/Eigen/UmfPackSupport +40 -0
  39. data/vendor/Eigen/src/Cholesky/LDLT.h +688 -0
  40. data/vendor/Eigen/src/Cholesky/LLT.h +558 -0
  41. data/vendor/Eigen/src/Cholesky/LLT_LAPACKE.h +99 -0
  42. data/vendor/Eigen/src/CholmodSupport/CholmodSupport.h +682 -0
  43. data/vendor/Eigen/src/Core/ArithmeticSequence.h +413 -0
  44. data/vendor/Eigen/src/Core/Array.h +417 -0
  45. data/vendor/Eigen/src/Core/ArrayBase.h +226 -0
  46. data/vendor/Eigen/src/Core/ArrayWrapper.h +209 -0
  47. data/vendor/Eigen/src/Core/Assign.h +90 -0
  48. data/vendor/Eigen/src/Core/AssignEvaluator.h +1010 -0
  49. data/vendor/Eigen/src/Core/Assign_MKL.h +178 -0
  50. data/vendor/Eigen/src/Core/BandMatrix.h +353 -0
  51. data/vendor/Eigen/src/Core/Block.h +448 -0
  52. data/vendor/Eigen/src/Core/BooleanRedux.h +162 -0
  53. data/vendor/Eigen/src/Core/CommaInitializer.h +164 -0
  54. data/vendor/Eigen/src/Core/ConditionEstimator.h +175 -0
  55. data/vendor/Eigen/src/Core/CoreEvaluators.h +1741 -0
  56. data/vendor/Eigen/src/Core/CoreIterators.h +132 -0
  57. data/vendor/Eigen/src/Core/CwiseBinaryOp.h +183 -0
  58. data/vendor/Eigen/src/Core/CwiseNullaryOp.h +1001 -0
  59. data/vendor/Eigen/src/Core/CwiseTernaryOp.h +197 -0
  60. data/vendor/Eigen/src/Core/CwiseUnaryOp.h +103 -0
  61. data/vendor/Eigen/src/Core/CwiseUnaryView.h +132 -0
  62. data/vendor/Eigen/src/Core/DenseBase.h +701 -0
  63. data/vendor/Eigen/src/Core/DenseCoeffsBase.h +685 -0
  64. data/vendor/Eigen/src/Core/DenseStorage.h +652 -0
  65. data/vendor/Eigen/src/Core/Diagonal.h +258 -0
  66. data/vendor/Eigen/src/Core/DiagonalMatrix.h +391 -0
  67. data/vendor/Eigen/src/Core/DiagonalProduct.h +28 -0
  68. data/vendor/Eigen/src/Core/Dot.h +318 -0
  69. data/vendor/Eigen/src/Core/EigenBase.h +160 -0
  70. data/vendor/Eigen/src/Core/ForceAlignedAccess.h +150 -0
  71. data/vendor/Eigen/src/Core/Fuzzy.h +155 -0
  72. data/vendor/Eigen/src/Core/GeneralProduct.h +465 -0
  73. data/vendor/Eigen/src/Core/GenericPacketMath.h +1040 -0
  74. data/vendor/Eigen/src/Core/GlobalFunctions.h +194 -0
  75. data/vendor/Eigen/src/Core/IO.h +258 -0
  76. data/vendor/Eigen/src/Core/IndexedView.h +237 -0
  77. data/vendor/Eigen/src/Core/Inverse.h +117 -0
  78. data/vendor/Eigen/src/Core/Map.h +171 -0
  79. data/vendor/Eigen/src/Core/MapBase.h +310 -0
  80. data/vendor/Eigen/src/Core/MathFunctions.h +2057 -0
  81. data/vendor/Eigen/src/Core/MathFunctionsImpl.h +200 -0
  82. data/vendor/Eigen/src/Core/Matrix.h +565 -0
  83. data/vendor/Eigen/src/Core/MatrixBase.h +547 -0
  84. data/vendor/Eigen/src/Core/NestByValue.h +85 -0
  85. data/vendor/Eigen/src/Core/NoAlias.h +109 -0
  86. data/vendor/Eigen/src/Core/NumTraits.h +335 -0
  87. data/vendor/Eigen/src/Core/PartialReduxEvaluator.h +232 -0
  88. data/vendor/Eigen/src/Core/PermutationMatrix.h +605 -0
  89. data/vendor/Eigen/src/Core/PlainObjectBase.h +1128 -0
  90. data/vendor/Eigen/src/Core/Product.h +191 -0
  91. data/vendor/Eigen/src/Core/ProductEvaluators.h +1179 -0
  92. data/vendor/Eigen/src/Core/Random.h +218 -0
  93. data/vendor/Eigen/src/Core/Redux.h +515 -0
  94. data/vendor/Eigen/src/Core/Ref.h +381 -0
  95. data/vendor/Eigen/src/Core/Replicate.h +142 -0
  96. data/vendor/Eigen/src/Core/Reshaped.h +454 -0
  97. data/vendor/Eigen/src/Core/ReturnByValue.h +119 -0
  98. data/vendor/Eigen/src/Core/Reverse.h +217 -0
  99. data/vendor/Eigen/src/Core/Select.h +164 -0
  100. data/vendor/Eigen/src/Core/SelfAdjointView.h +365 -0
  101. data/vendor/Eigen/src/Core/SelfCwiseBinaryOp.h +47 -0
  102. data/vendor/Eigen/src/Core/Solve.h +188 -0
  103. data/vendor/Eigen/src/Core/SolveTriangular.h +235 -0
  104. data/vendor/Eigen/src/Core/SolverBase.h +168 -0
  105. data/vendor/Eigen/src/Core/StableNorm.h +251 -0
  106. data/vendor/Eigen/src/Core/StlIterators.h +463 -0
  107. data/vendor/Eigen/src/Core/Stride.h +116 -0
  108. data/vendor/Eigen/src/Core/Swap.h +68 -0
  109. data/vendor/Eigen/src/Core/Transpose.h +464 -0
  110. data/vendor/Eigen/src/Core/Transpositions.h +386 -0
  111. data/vendor/Eigen/src/Core/TriangularMatrix.h +1001 -0
  112. data/vendor/Eigen/src/Core/VectorBlock.h +96 -0
  113. data/vendor/Eigen/src/Core/VectorwiseOp.h +784 -0
  114. data/vendor/Eigen/src/Core/Visitor.h +381 -0
  115. data/vendor/Eigen/src/Core/arch/AVX/Complex.h +372 -0
  116. data/vendor/Eigen/src/Core/arch/AVX/MathFunctions.h +228 -0
  117. data/vendor/Eigen/src/Core/arch/AVX/PacketMath.h +1574 -0
  118. data/vendor/Eigen/src/Core/arch/AVX/TypeCasting.h +115 -0
  119. data/vendor/Eigen/src/Core/arch/AVX512/Complex.h +422 -0
  120. data/vendor/Eigen/src/Core/arch/AVX512/MathFunctions.h +362 -0
  121. data/vendor/Eigen/src/Core/arch/AVX512/PacketMath.h +2303 -0
  122. data/vendor/Eigen/src/Core/arch/AVX512/TypeCasting.h +89 -0
  123. data/vendor/Eigen/src/Core/arch/AltiVec/Complex.h +417 -0
  124. data/vendor/Eigen/src/Core/arch/AltiVec/MathFunctions.h +90 -0
  125. data/vendor/Eigen/src/Core/arch/AltiVec/MatrixProduct.h +2937 -0
  126. data/vendor/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h +221 -0
  127. data/vendor/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h +629 -0
  128. data/vendor/Eigen/src/Core/arch/AltiVec/PacketMath.h +2711 -0
  129. data/vendor/Eigen/src/Core/arch/CUDA/Complex.h +258 -0
  130. data/vendor/Eigen/src/Core/arch/Default/BFloat16.h +700 -0
  131. data/vendor/Eigen/src/Core/arch/Default/ConjHelper.h +117 -0
  132. data/vendor/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +1649 -0
  133. data/vendor/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +110 -0
  134. data/vendor/Eigen/src/Core/arch/Default/Half.h +942 -0
  135. data/vendor/Eigen/src/Core/arch/Default/Settings.h +49 -0
  136. data/vendor/Eigen/src/Core/arch/Default/TypeCasting.h +120 -0
  137. data/vendor/Eigen/src/Core/arch/GPU/MathFunctions.h +103 -0
  138. data/vendor/Eigen/src/Core/arch/GPU/PacketMath.h +1685 -0
  139. data/vendor/Eigen/src/Core/arch/GPU/TypeCasting.h +80 -0
  140. data/vendor/Eigen/src/Core/arch/HIP/hcc/math_constants.h +23 -0
  141. data/vendor/Eigen/src/Core/arch/MSA/Complex.h +648 -0
  142. data/vendor/Eigen/src/Core/arch/MSA/MathFunctions.h +387 -0
  143. data/vendor/Eigen/src/Core/arch/MSA/PacketMath.h +1233 -0
  144. data/vendor/Eigen/src/Core/arch/NEON/Complex.h +584 -0
  145. data/vendor/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h +183 -0
  146. data/vendor/Eigen/src/Core/arch/NEON/MathFunctions.h +75 -0
  147. data/vendor/Eigen/src/Core/arch/NEON/PacketMath.h +4587 -0
  148. data/vendor/Eigen/src/Core/arch/NEON/TypeCasting.h +1419 -0
  149. data/vendor/Eigen/src/Core/arch/SSE/Complex.h +351 -0
  150. data/vendor/Eigen/src/Core/arch/SSE/MathFunctions.h +199 -0
  151. data/vendor/Eigen/src/Core/arch/SSE/PacketMath.h +1505 -0
  152. data/vendor/Eigen/src/Core/arch/SSE/TypeCasting.h +142 -0
  153. data/vendor/Eigen/src/Core/arch/SVE/MathFunctions.h +44 -0
  154. data/vendor/Eigen/src/Core/arch/SVE/PacketMath.h +752 -0
  155. data/vendor/Eigen/src/Core/arch/SVE/TypeCasting.h +49 -0
  156. data/vendor/Eigen/src/Core/arch/SYCL/InteropHeaders.h +232 -0
  157. data/vendor/Eigen/src/Core/arch/SYCL/MathFunctions.h +301 -0
  158. data/vendor/Eigen/src/Core/arch/SYCL/PacketMath.h +670 -0
  159. data/vendor/Eigen/src/Core/arch/SYCL/SyclMemoryModel.h +694 -0
  160. data/vendor/Eigen/src/Core/arch/SYCL/TypeCasting.h +85 -0
  161. data/vendor/Eigen/src/Core/arch/ZVector/Complex.h +426 -0
  162. data/vendor/Eigen/src/Core/arch/ZVector/MathFunctions.h +233 -0
  163. data/vendor/Eigen/src/Core/arch/ZVector/PacketMath.h +1060 -0
  164. data/vendor/Eigen/src/Core/functors/AssignmentFunctors.h +177 -0
  165. data/vendor/Eigen/src/Core/functors/BinaryFunctors.h +541 -0
  166. data/vendor/Eigen/src/Core/functors/NullaryFunctors.h +189 -0
  167. data/vendor/Eigen/src/Core/functors/StlFunctors.h +166 -0
  168. data/vendor/Eigen/src/Core/functors/TernaryFunctors.h +25 -0
  169. data/vendor/Eigen/src/Core/functors/UnaryFunctors.h +1131 -0
  170. data/vendor/Eigen/src/Core/products/GeneralBlockPanelKernel.h +2645 -0
  171. data/vendor/Eigen/src/Core/products/GeneralMatrixMatrix.h +517 -0
  172. data/vendor/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +317 -0
  173. data/vendor/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +145 -0
  174. data/vendor/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h +124 -0
  175. data/vendor/Eigen/src/Core/products/GeneralMatrixVector.h +518 -0
  176. data/vendor/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h +136 -0
  177. data/vendor/Eigen/src/Core/products/Parallelizer.h +180 -0
  178. data/vendor/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +544 -0
  179. data/vendor/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h +295 -0
  180. data/vendor/Eigen/src/Core/products/SelfadjointMatrixVector.h +262 -0
  181. data/vendor/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h +118 -0
  182. data/vendor/Eigen/src/Core/products/SelfadjointProduct.h +133 -0
  183. data/vendor/Eigen/src/Core/products/SelfadjointRank2Update.h +94 -0
  184. data/vendor/Eigen/src/Core/products/TriangularMatrixMatrix.h +472 -0
  185. data/vendor/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h +317 -0
  186. data/vendor/Eigen/src/Core/products/TriangularMatrixVector.h +350 -0
  187. data/vendor/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h +255 -0
  188. data/vendor/Eigen/src/Core/products/TriangularSolverMatrix.h +337 -0
  189. data/vendor/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h +167 -0
  190. data/vendor/Eigen/src/Core/products/TriangularSolverVector.h +148 -0
  191. data/vendor/Eigen/src/Core/util/BlasUtil.h +583 -0
  192. data/vendor/Eigen/src/Core/util/ConfigureVectorization.h +512 -0
  193. data/vendor/Eigen/src/Core/util/Constants.h +563 -0
  194. data/vendor/Eigen/src/Core/util/DisableStupidWarnings.h +106 -0
  195. data/vendor/Eigen/src/Core/util/ForwardDeclarations.h +322 -0
  196. data/vendor/Eigen/src/Core/util/IndexedViewHelper.h +186 -0
  197. data/vendor/Eigen/src/Core/util/IntegralConstant.h +272 -0
  198. data/vendor/Eigen/src/Core/util/MKL_support.h +137 -0
  199. data/vendor/Eigen/src/Core/util/Macros.h +1464 -0
  200. data/vendor/Eigen/src/Core/util/Memory.h +1163 -0
  201. data/vendor/Eigen/src/Core/util/Meta.h +812 -0
  202. data/vendor/Eigen/src/Core/util/NonMPL2.h +3 -0
  203. data/vendor/Eigen/src/Core/util/ReenableStupidWarnings.h +31 -0
  204. data/vendor/Eigen/src/Core/util/ReshapedHelper.h +51 -0
  205. data/vendor/Eigen/src/Core/util/StaticAssert.h +221 -0
  206. data/vendor/Eigen/src/Core/util/SymbolicIndex.h +293 -0
  207. data/vendor/Eigen/src/Core/util/XprHelper.h +856 -0
  208. data/vendor/Eigen/src/Eigenvalues/ComplexEigenSolver.h +346 -0
  209. data/vendor/Eigen/src/Eigenvalues/ComplexSchur.h +462 -0
  210. data/vendor/Eigen/src/Eigenvalues/ComplexSchur_LAPACKE.h +91 -0
  211. data/vendor/Eigen/src/Eigenvalues/EigenSolver.h +622 -0
  212. data/vendor/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h +418 -0
  213. data/vendor/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +226 -0
  214. data/vendor/Eigen/src/Eigenvalues/HessenbergDecomposition.h +374 -0
  215. data/vendor/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +158 -0
  216. data/vendor/Eigen/src/Eigenvalues/RealQZ.h +657 -0
  217. data/vendor/Eigen/src/Eigenvalues/RealSchur.h +558 -0
  218. data/vendor/Eigen/src/Eigenvalues/RealSchur_LAPACKE.h +77 -0
  219. data/vendor/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +904 -0
  220. data/vendor/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h +87 -0
  221. data/vendor/Eigen/src/Eigenvalues/Tridiagonalization.h +561 -0
  222. data/vendor/Eigen/src/Geometry/AlignedBox.h +486 -0
  223. data/vendor/Eigen/src/Geometry/AngleAxis.h +247 -0
  224. data/vendor/Eigen/src/Geometry/EulerAngles.h +114 -0
  225. data/vendor/Eigen/src/Geometry/Homogeneous.h +501 -0
  226. data/vendor/Eigen/src/Geometry/Hyperplane.h +282 -0
  227. data/vendor/Eigen/src/Geometry/OrthoMethods.h +235 -0
  228. data/vendor/Eigen/src/Geometry/ParametrizedLine.h +232 -0
  229. data/vendor/Eigen/src/Geometry/Quaternion.h +870 -0
  230. data/vendor/Eigen/src/Geometry/Rotation2D.h +199 -0
  231. data/vendor/Eigen/src/Geometry/RotationBase.h +206 -0
  232. data/vendor/Eigen/src/Geometry/Scaling.h +188 -0
  233. data/vendor/Eigen/src/Geometry/Transform.h +1563 -0
  234. data/vendor/Eigen/src/Geometry/Translation.h +202 -0
  235. data/vendor/Eigen/src/Geometry/Umeyama.h +166 -0
  236. data/vendor/Eigen/src/Geometry/arch/Geometry_SIMD.h +168 -0
  237. data/vendor/Eigen/src/Householder/BlockHouseholder.h +110 -0
  238. data/vendor/Eigen/src/Householder/Householder.h +176 -0
  239. data/vendor/Eigen/src/Householder/HouseholderSequence.h +545 -0
  240. data/vendor/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +226 -0
  241. data/vendor/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +212 -0
  242. data/vendor/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +229 -0
  243. data/vendor/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +394 -0
  244. data/vendor/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +453 -0
  245. data/vendor/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +444 -0
  246. data/vendor/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +198 -0
  247. data/vendor/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +117 -0
  248. data/vendor/Eigen/src/Jacobi/Jacobi.h +483 -0
  249. data/vendor/Eigen/src/KLUSupport/KLUSupport.h +358 -0
  250. data/vendor/Eigen/src/LU/Determinant.h +117 -0
  251. data/vendor/Eigen/src/LU/FullPivLU.h +877 -0
  252. data/vendor/Eigen/src/LU/InverseImpl.h +432 -0
  253. data/vendor/Eigen/src/LU/PartialPivLU.h +624 -0
  254. data/vendor/Eigen/src/LU/PartialPivLU_LAPACKE.h +83 -0
  255. data/vendor/Eigen/src/LU/arch/InverseSize4.h +351 -0
  256. data/vendor/Eigen/src/MetisSupport/MetisSupport.h +137 -0
  257. data/vendor/Eigen/src/OrderingMethods/Amd.h +435 -0
  258. data/vendor/Eigen/src/OrderingMethods/Eigen_Colamd.h +1863 -0
  259. data/vendor/Eigen/src/OrderingMethods/Ordering.h +153 -0
  260. data/vendor/Eigen/src/PaStiXSupport/PaStiXSupport.h +678 -0
  261. data/vendor/Eigen/src/PardisoSupport/PardisoSupport.h +545 -0
  262. data/vendor/Eigen/src/QR/ColPivHouseholderQR.h +674 -0
  263. data/vendor/Eigen/src/QR/ColPivHouseholderQR_LAPACKE.h +97 -0
  264. data/vendor/Eigen/src/QR/CompleteOrthogonalDecomposition.h +635 -0
  265. data/vendor/Eigen/src/QR/FullPivHouseholderQR.h +713 -0
  266. data/vendor/Eigen/src/QR/HouseholderQR.h +434 -0
  267. data/vendor/Eigen/src/QR/HouseholderQR_LAPACKE.h +68 -0
  268. data/vendor/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +335 -0
  269. data/vendor/Eigen/src/SVD/BDCSVD.h +1366 -0
  270. data/vendor/Eigen/src/SVD/JacobiSVD.h +812 -0
  271. data/vendor/Eigen/src/SVD/JacobiSVD_LAPACKE.h +91 -0
  272. data/vendor/Eigen/src/SVD/SVDBase.h +376 -0
  273. data/vendor/Eigen/src/SVD/UpperBidiagonalization.h +414 -0
  274. data/vendor/Eigen/src/SparseCholesky/SimplicialCholesky.h +697 -0
  275. data/vendor/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +174 -0
  276. data/vendor/Eigen/src/SparseCore/AmbiVector.h +378 -0
  277. data/vendor/Eigen/src/SparseCore/CompressedStorage.h +274 -0
  278. data/vendor/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +352 -0
  279. data/vendor/Eigen/src/SparseCore/MappedSparseMatrix.h +67 -0
  280. data/vendor/Eigen/src/SparseCore/SparseAssign.h +270 -0
  281. data/vendor/Eigen/src/SparseCore/SparseBlock.h +571 -0
  282. data/vendor/Eigen/src/SparseCore/SparseColEtree.h +206 -0
  283. data/vendor/Eigen/src/SparseCore/SparseCompressedBase.h +370 -0
  284. data/vendor/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +722 -0
  285. data/vendor/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +150 -0
  286. data/vendor/Eigen/src/SparseCore/SparseDenseProduct.h +342 -0
  287. data/vendor/Eigen/src/SparseCore/SparseDiagonalProduct.h +138 -0
  288. data/vendor/Eigen/src/SparseCore/SparseDot.h +98 -0
  289. data/vendor/Eigen/src/SparseCore/SparseFuzzy.h +29 -0
  290. data/vendor/Eigen/src/SparseCore/SparseMap.h +305 -0
  291. data/vendor/Eigen/src/SparseCore/SparseMatrix.h +1518 -0
  292. data/vendor/Eigen/src/SparseCore/SparseMatrixBase.h +398 -0
  293. data/vendor/Eigen/src/SparseCore/SparsePermutation.h +178 -0
  294. data/vendor/Eigen/src/SparseCore/SparseProduct.h +181 -0
  295. data/vendor/Eigen/src/SparseCore/SparseRedux.h +49 -0
  296. data/vendor/Eigen/src/SparseCore/SparseRef.h +397 -0
  297. data/vendor/Eigen/src/SparseCore/SparseSelfAdjointView.h +659 -0
  298. data/vendor/Eigen/src/SparseCore/SparseSolverBase.h +124 -0
  299. data/vendor/Eigen/src/SparseCore/SparseSparseProductWithPruning.h +198 -0
  300. data/vendor/Eigen/src/SparseCore/SparseTranspose.h +92 -0
  301. data/vendor/Eigen/src/SparseCore/SparseTriangularView.h +189 -0
  302. data/vendor/Eigen/src/SparseCore/SparseUtil.h +186 -0
  303. data/vendor/Eigen/src/SparseCore/SparseVector.h +478 -0
  304. data/vendor/Eigen/src/SparseCore/SparseView.h +254 -0
  305. data/vendor/Eigen/src/SparseCore/TriangularSolver.h +315 -0
  306. data/vendor/Eigen/src/SparseLU/SparseLU.h +923 -0
  307. data/vendor/Eigen/src/SparseLU/SparseLUImpl.h +66 -0
  308. data/vendor/Eigen/src/SparseLU/SparseLU_Memory.h +226 -0
  309. data/vendor/Eigen/src/SparseLU/SparseLU_Structs.h +110 -0
  310. data/vendor/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +375 -0
  311. data/vendor/Eigen/src/SparseLU/SparseLU_Utils.h +80 -0
  312. data/vendor/Eigen/src/SparseLU/SparseLU_column_bmod.h +181 -0
  313. data/vendor/Eigen/src/SparseLU/SparseLU_column_dfs.h +179 -0
  314. data/vendor/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h +107 -0
  315. data/vendor/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +280 -0
  316. data/vendor/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h +126 -0
  317. data/vendor/Eigen/src/SparseLU/SparseLU_kernel_bmod.h +130 -0
  318. data/vendor/Eigen/src/SparseLU/SparseLU_panel_bmod.h +223 -0
  319. data/vendor/Eigen/src/SparseLU/SparseLU_panel_dfs.h +258 -0
  320. data/vendor/Eigen/src/SparseLU/SparseLU_pivotL.h +137 -0
  321. data/vendor/Eigen/src/SparseLU/SparseLU_pruneL.h +136 -0
  322. data/vendor/Eigen/src/SparseLU/SparseLU_relax_snode.h +83 -0
  323. data/vendor/Eigen/src/SparseQR/SparseQR.h +758 -0
  324. data/vendor/Eigen/src/StlSupport/StdDeque.h +116 -0
  325. data/vendor/Eigen/src/StlSupport/StdList.h +106 -0
  326. data/vendor/Eigen/src/StlSupport/StdVector.h +131 -0
  327. data/vendor/Eigen/src/StlSupport/details.h +84 -0
  328. data/vendor/Eigen/src/SuperLUSupport/SuperLUSupport.h +1025 -0
  329. data/vendor/Eigen/src/UmfPackSupport/UmfPackSupport.h +642 -0
  330. data/vendor/Eigen/src/misc/Image.h +82 -0
  331. data/vendor/Eigen/src/misc/Kernel.h +79 -0
  332. data/vendor/Eigen/src/misc/RealSvd2x2.h +55 -0
  333. data/vendor/Eigen/src/misc/blas.h +440 -0
  334. data/vendor/Eigen/src/misc/lapack.h +152 -0
  335. data/vendor/Eigen/src/misc/lapacke.h +16292 -0
  336. data/vendor/Eigen/src/misc/lapacke_mangling.h +17 -0
  337. data/vendor/Eigen/src/plugins/ArrayCwiseBinaryOps.h +358 -0
  338. data/vendor/Eigen/src/plugins/ArrayCwiseUnaryOps.h +696 -0
  339. data/vendor/Eigen/src/plugins/BlockMethods.h +1442 -0
  340. data/vendor/Eigen/src/plugins/CommonCwiseBinaryOps.h +115 -0
  341. data/vendor/Eigen/src/plugins/CommonCwiseUnaryOps.h +177 -0
  342. data/vendor/Eigen/src/plugins/IndexedViewMethods.h +262 -0
  343. data/vendor/Eigen/src/plugins/MatrixCwiseBinaryOps.h +152 -0
  344. data/vendor/Eigen/src/plugins/MatrixCwiseUnaryOps.h +95 -0
  345. data/vendor/Eigen/src/plugins/ReshapedMethods.h +149 -0
  346. data/vendor/aarand/aarand.hpp +114 -0
  347. data/vendor/annoy/annoylib.h +1495 -0
  348. data/vendor/annoy/kissrandom.h +120 -0
  349. data/vendor/annoy/mman.h +242 -0
  350. data/vendor/hnswlib/bruteforce.h +152 -0
  351. data/vendor/hnswlib/hnswalg.h +1192 -0
  352. data/vendor/hnswlib/hnswlib.h +108 -0
  353. data/vendor/hnswlib/space_ip.h +282 -0
  354. data/vendor/hnswlib/space_l2.h +281 -0
  355. data/vendor/hnswlib/visited_list_pool.h +79 -0
  356. data/vendor/irlba/irlba.hpp +575 -0
  357. data/vendor/irlba/lanczos.hpp +212 -0
  358. data/vendor/irlba/parallel.hpp +474 -0
  359. data/vendor/irlba/utils.hpp +224 -0
  360. data/vendor/irlba/wrappers.hpp +228 -0
  361. data/vendor/kmeans/Base.hpp +75 -0
  362. data/vendor/kmeans/Details.hpp +79 -0
  363. data/vendor/kmeans/HartiganWong.hpp +492 -0
  364. data/vendor/kmeans/InitializeKmeansPP.hpp +144 -0
  365. data/vendor/kmeans/InitializeNone.hpp +44 -0
  366. data/vendor/kmeans/InitializePCAPartition.hpp +309 -0
  367. data/vendor/kmeans/InitializeRandom.hpp +91 -0
  368. data/vendor/kmeans/Kmeans.hpp +161 -0
  369. data/vendor/kmeans/Lloyd.hpp +134 -0
  370. data/vendor/kmeans/MiniBatch.hpp +269 -0
  371. data/vendor/kmeans/QuickSearch.hpp +179 -0
  372. data/vendor/kmeans/compute_centroids.hpp +32 -0
  373. data/vendor/kmeans/compute_wcss.hpp +27 -0
  374. data/vendor/kmeans/is_edge_case.hpp +42 -0
  375. data/vendor/kmeans/random.hpp +55 -0
  376. data/vendor/knncolle/Annoy/Annoy.hpp +193 -0
  377. data/vendor/knncolle/BruteForce/BruteForce.hpp +120 -0
  378. data/vendor/knncolle/Hnsw/Hnsw.hpp +225 -0
  379. data/vendor/knncolle/Kmknn/Kmknn.hpp +286 -0
  380. data/vendor/knncolle/VpTree/VpTree.hpp +256 -0
  381. data/vendor/knncolle/knncolle.hpp +34 -0
  382. data/vendor/knncolle/utils/Base.hpp +100 -0
  383. data/vendor/knncolle/utils/NeighborQueue.hpp +94 -0
  384. data/vendor/knncolle/utils/distances.hpp +98 -0
  385. data/vendor/knncolle/utils/find_nearest_neighbors.hpp +112 -0
  386. data/vendor/powerit/PowerIterations.hpp +157 -0
  387. data/vendor/umappp/NeighborList.hpp +37 -0
  388. data/vendor/umappp/Umap.hpp +662 -0
  389. data/vendor/umappp/combine_neighbor_sets.hpp +95 -0
  390. data/vendor/umappp/find_ab.hpp +157 -0
  391. data/vendor/umappp/neighbor_similarities.hpp +136 -0
  392. data/vendor/umappp/optimize_layout.hpp +285 -0
  393. data/vendor/umappp/spectral_init.hpp +181 -0
  394. data/vendor/umappp/umappp.hpp +13 -0
  395. metadata +465 -0
@@ -0,0 +1,286 @@
1
+ #ifndef KNNCOLLE_KMKNN_HPP
2
+ #define KNNCOLLE_KMKNN_HPP
3
+
4
+ #include "../utils/distances.hpp"
5
+ #include "../utils/NeighborQueue.hpp"
6
+ #include "../utils/Base.hpp"
7
+ #include "kmeans/Kmeans.hpp"
8
+
9
+ #include <algorithm>
10
+ #include <vector>
11
+ #include <random>
12
+ #include <limits>
13
+ #include <cmath>
14
+
15
+ #ifdef DEBUG
16
+ #include <iostream>
17
+ #endif
18
+
19
+ /**
20
+ * @file Kmknn.hpp
21
+ *
22
+ * @brief Implements the k-means with k-nearest neighbors (KMKNN) algorithm.
23
+ */
24
+
25
+ namespace knncolle {
26
+
27
+ /**
28
+ * @brief Perform a nearest neighbor search based on k-means clustering.
29
+ *
30
+ * In the k-means with k-nearest neighbors (KMKNN) algorithm (Wang, 2012), k-means clustering is first applied to the data points,
31
+ * with the number of cluster centers defined as the square root of the number of points.
32
+ * The cluster assignment and distance to the assigned cluster center for each point represent the KMKNN indexing information,
33
+ * allowing us to speed up the nearest neighbor search by exploiting the triangle inequality between cluster centers, the query point and each point in the cluster to narrow the search space.
34
+ * The advantage of the KMKNN approach is its simplicity and minimal overhead,
35
+ * resulting in performance improvements over conventional tree-based methods for high-dimensional data where most points need to be searched anyway.
36
+ *
37
+ * @tparam DISTANCE Class to compute the distance between vectors, see `distance::Euclidean` for an example.
38
+ * @tparam INDEX_t Integer type for the indices.
39
+ * @tparam DISTANCE_t Floating point type for the distances.
40
+ * @tparam QUERY_t Floating point type for the query data.
41
+ * @tparam INTERNAL_t Floating point type for the data.
42
+ *
43
+ * @see
44
+ * Wang X (2012).
45
+ * A fast exact k-nearest neighbors algorithm for high dimensional search using k-means clustering and triangle inequality.
46
+ * _Proc Int Jt Conf Neural Netw_, 43, 6:2351-2358.
47
+ */
48
+ template<class DISTANCE, typename INDEX_t = int, typename DISTANCE_t = double, typename QUERY_t = DISTANCE_t, typename INTERNAL_t = DISTANCE_t>
49
+ class Kmknn : public Base<INDEX_t, DISTANCE_t, QUERY_t> {
50
+ private:
51
+ INDEX_t num_dim;
52
+ INDEX_t num_obs;
53
+
54
+ public:
55
+ INDEX_t nobs() const { return num_obs; }
56
+
57
+ INDEX_t ndim() const { return num_dim; }
58
+
59
+ private:
60
+ std::vector<INTERNAL_t> data;
61
+
62
+ std::vector<INDEX_t> sizes;
63
+ std::vector<INDEX_t> offsets;
64
+
65
+ std::vector<INTERNAL_t> centers;
66
+
67
+ std::vector<INDEX_t> observation_id, new_location;
68
+ std::vector<DISTANCE_t> dist_to_centroid;
69
+
70
+ public:
71
+ /**
72
+ * @param ndim Number of dimensions.
73
+ * @param nobs Number of observations.
74
+ * @param vals Pointer to an array of length `ndim * nobs`, corresponding to a dimension-by-observation matrix in column-major format,
75
+ * i.e., contiguous elements belong to the same observation.
76
+ * @param power Power of `nobs` to define the number of cluster centers.
77
+ * By default, a square root is performed.
78
+ *
79
+ * @tparam INPUT_t Floating-point type of the input data.
80
+ */
81
+ template<typename INPUT_t>
82
+ Kmknn(INDEX_t ndim, INDEX_t nobs, const INPUT_t* vals, double power = 0.5) :
83
+ num_dim(ndim),
84
+ num_obs(nobs),
85
+ data(ndim * nobs),
86
+ sizes(std::ceil(std::pow(num_obs, power))),
87
+ offsets(sizes.size()),
88
+ centers(sizes.size() * ndim),
89
+ observation_id(nobs),
90
+ new_location(nobs),
91
+ dist_to_centroid(nobs)
92
+ {
93
+ std::vector<int> clusters(num_obs);
94
+ auto ncenters = sizes.size();
95
+
96
+ // Try to avoid a copy if we're dealing with the same type;
97
+ // otherwise, we just dump it into 'data', given that we
98
+ // won't be rewriting it for a while anyway.
99
+ const INTERNAL_t* host;
100
+ if constexpr(std::is_same<INPUT_t, INTERNAL_t>::value) {
101
+ host = vals;
102
+ } else {
103
+ std::copy(vals, vals + data.size(), data.data());
104
+ host = data.data();
105
+ }
106
+ auto output = kmeans::Kmeans<INTERNAL_t, int>().run(ndim, nobs, host, ncenters, centers.data(), clusters.data());
107
+ std::swap(sizes, output.sizes);
108
+
109
+ // In case there were some duplicate points, we just resize this a bit.
110
+ if (ncenters != sizes.size()) {
111
+ ncenters = sizes.size();
112
+ offsets.resize(ncenters);
113
+ centers.resize(ncenters * ndim);
114
+ }
115
+
116
+ for (INDEX_t i = 1; i < ncenters; ++i) {
117
+ offsets[i] = offsets[i - 1] + sizes[i - 1];
118
+ }
119
+
120
+ // Organize points correctly; firstly, sorting by distance from the assigned center.
121
+ std::vector<std::pair<INTERNAL_t, INDEX_t> > by_distance(nobs);
122
+ {
123
+ auto sofar = offsets;
124
+ for (INDEX_t o = 0; o < nobs; ++o) {
125
+ const auto& clustid = clusters[o];
126
+ auto& counter = sofar[clustid];
127
+ auto& current = by_distance[counter];
128
+ current.first = DISTANCE::normalize(DISTANCE::template raw_distance<INTERNAL_t>(host + o * num_dim, centers.data() + clustid * num_dim, num_dim));
129
+ current.second = o;
130
+ ++counter;
131
+ }
132
+
133
+ for (INDEX_t c = 0; c < ncenters; ++c) {
134
+ auto begin = by_distance.begin() + offsets[c];
135
+ std::sort(begin, begin + sizes[c]);
136
+ }
137
+ }
138
+
139
+ // Now, copying this over.
140
+ {
141
+ auto store = data.data();
142
+ for (INDEX_t o = 0; o < nobs; ++o, store += num_dim) {
143
+ const auto& current = by_distance[o];
144
+ auto source = vals + ndim * current.second; // must use 'vals' here, as 'host' might alias 'data'!
145
+ std::copy(source, source + ndim, store);
146
+ observation_id[o] = current.second;
147
+ new_location[current.second] = o;
148
+ dist_to_centroid[o] = current.first;
149
+ }
150
+ }
151
+
152
+ return;
153
+ }
154
+
155
+ std::vector<std::pair<INDEX_t, DISTANCE_t> > find_nearest_neighbors(INDEX_t index, int k) const {
156
+ NeighborQueue<INDEX_t, INTERNAL_t> nearest(k, new_location[index]);
157
+ search_nn(data.data() + new_location[index] * num_dim, nearest);
158
+ return report(nearest);
159
+ }
160
+
161
+ std::vector<std::pair<INDEX_t, DISTANCE_t> > find_nearest_neighbors(const QUERY_t* query, int k) const {
162
+ NeighborQueue<INDEX_t, INTERNAL_t> nearest(k);
163
+ search_nn(query, nearest);
164
+ return report(nearest);
165
+ }
166
+
167
+ const QUERY_t* observation(INDEX_t index, QUERY_t* buffer) const {
168
+ auto candidate = data.data() + num_dim * new_location[index];
169
+ if constexpr(std::is_same<QUERY_t, INTERNAL_t>::value) {
170
+ return candidate;
171
+ } else {
172
+ std::copy(candidate, candidate + num_dim, buffer);
173
+ return buffer;
174
+ }
175
+ }
176
+
177
+ using Base<INDEX_t, DISTANCE_t, QUERY_t>::observation;
178
+
179
+ private:
180
+ template<typename INPUT_t>
181
+ void search_nn(INPUT_t* target, NeighborQueue<INDEX_t, INTERNAL_t>& nearest) const {
182
+ /* Computing distances to all centers and sorting them. The aim is to
183
+ * go through the nearest centers first, to get the shortest
184
+ * 'threshold' possible through the rest of the search.
185
+ */
186
+ std::vector<std::pair<INTERNAL_t, INDEX_t> > center_order(sizes.size());
187
+ auto clust_ptr = centers.data();
188
+ for (size_t c = 0; c < sizes.size(); ++c, clust_ptr += num_dim) {
189
+ center_order[c].first = DISTANCE::template raw_distance<INTERNAL_t>(target, clust_ptr, num_dim);
190
+ center_order[c].second = c;
191
+ }
192
+ std::sort(center_order.begin(), center_order.end());
193
+ INTERNAL_t threshold_raw = -1;
194
+
195
+ // Computing the distance to each center, and deciding whether to proceed for each cluster.
196
+ for (const auto& curcent : center_order) {
197
+ const INDEX_t center = curcent.second;
198
+ const INTERNAL_t dist2center = DISTANCE::normalize(curcent.first);
199
+
200
+ const auto cur_nobs = sizes[center];
201
+ const DISTANCE_t* dIt = dist_to_centroid.data() + offsets[center];
202
+ const DISTANCE_t maxdist = *(dIt + cur_nobs - 1);
203
+
204
+ INDEX_t firstcell=0;
205
+ #if USE_UPPER
206
+ INTERNAL_t upper_bd = std::numeric_limits<INTERNAL_t>::max();
207
+ #endif
208
+
209
+ if (threshold_raw >= 0) {
210
+ const INTERNAL_t threshold = DISTANCE::normalize(threshold_raw);
211
+
212
+ /* The conditional expression below exploits the triangle inequality; it is equivalent to asking whether:
213
+ * threshold + maxdist < dist2center
214
+ * All points (if any) within this cluster with distances above 'lower_bd' are potentially countable.
215
+ */
216
+ const DISTANCE_t lower_bd = dist2center - threshold;
217
+ if (maxdist < lower_bd) {
218
+ continue;
219
+ }
220
+ firstcell=std::lower_bound(dIt, dIt + cur_nobs, lower_bd) - dIt;
221
+ #if USE_UPPER
222
+ /* This exploits the reverse triangle inequality, to ignore points where:
223
+ * threshold + dist2center < point-to-center distance
224
+ */
225
+ upper_bd = threshold + dist2center;
226
+ #endif
227
+ }
228
+
229
+ const auto cur_start = offsets[center];
230
+ const INTERNAL_t * other_cell = data.data() + num_dim * (cur_start + firstcell);
231
+ for (auto celldex = firstcell; celldex < cur_nobs; ++celldex, other_cell += num_dim) {
232
+ #if USE_UPPER
233
+ if (*(dIt + celldex) > upper_bd) {
234
+ break;
235
+ }
236
+ #endif
237
+ const auto dist2cell_raw = DISTANCE::template raw_distance<INTERNAL_t>(target, other_cell, num_dim);
238
+ nearest.add(cur_start + celldex, dist2cell_raw);
239
+ if (nearest.is_full()) {
240
+ threshold_raw = nearest.limit(); // Shrinking the threshold, if an earlier NN has been found.
241
+ #if USE_UPPER
242
+ upper_bd = DISTANCE::normalize(threshold_raw) + dist2center;
243
+ #endif
244
+ }
245
+ }
246
+ }
247
+ }
248
+
249
+ template<class QUEUE>
250
+ auto report(QUEUE& nearest) const {
251
+ auto output = nearest.template report<DISTANCE_t>();
252
+ for (auto& s : output) {
253
+ s.first = observation_id[s.first];
254
+ s.second = DISTANCE::normalize(s.second);
255
+ }
256
+ return output;
257
+ }
258
+
259
+ #ifdef DEBUG
260
+ template<class V>
261
+ void print_vector(const V& input, const char* msg) const {
262
+ std::cout << msg << ": ";
263
+ for (auto v : input) {
264
+ std::cout << v << " ";
265
+ }
266
+ std::cout << std::endl;
267
+ }
268
+ #endif
269
+ };
270
+
271
+ /**
272
+ * Perform a KMKNN search with Euclidean distances.
273
+ */
274
+ template<typename INDEX_t = int, typename DISTANCE_t = double, typename QUERY_t = DISTANCE_t, typename INTERNAL_t = DISTANCE_t>
275
+ using KmknnEuclidean = Kmknn<distances::Euclidean, INDEX_t, DISTANCE_t, QUERY_t, INTERNAL_t>;
276
+
277
+ /**
278
+ * Perform a KMKNN search with Manhattan distances.
279
+ * Note that k-means clustering may not provide a particularly good indexing structure for Manhattan distances, so your mileage may vary.
280
+ */
281
+ template<typename INDEX_t = int, typename DISTANCE_t = double, typename QUERY_t = DISTANCE_t, typename INTERNAL_t = DISTANCE_t>
282
+ using KmknnManhattan = Kmknn<distances::Manhattan, INDEX_t, DISTANCE_t, QUERY_t, INTERNAL_t>;
283
+
284
+ };
285
+
286
+ #endif
@@ -0,0 +1,256 @@
1
+ #ifndef KNNCOLLE_VPTREE_HPP
2
+ #define KNNCOLLE_VPTREE_HPP
3
+
4
+ #include "../utils/distances.hpp"
5
+ #include "../utils/NeighborQueue.hpp"
6
+ #include "../utils/Base.hpp"
7
+
8
+ #include <vector>
9
+ #include <random>
10
+ #include <limits>
11
+ #include <tuple>
12
+
13
+ /**
14
+ * @file VpTree.hpp
15
+ *
16
+ * @brief Implements a vantage point (VP) tree to search for nearest neighbors.
17
+ */
18
+
19
+ namespace knncolle {
20
+
21
+ /**
22
+ * @brief Perform a nearest neighbor search based on a vantage point (VP) tree.
23
+ *
24
+ * In a vantage point tree (Yianilos, 1993), each node contains a subset of points that is split into two further partitions.
25
+ * The split is determined by picking an arbitrary point inside that subset as the node center,
26
+ * computing the distance to all other points from the center, and using the median distance as the "radius" of a hypersphere.
27
+ * The left child of this node contains all points within that hypersphere while the right child contains the remaining points.
28
+ * This procedure is applied recursively until all points resolve to individual nodes, thus yielding a VP tree.
29
+ * Upon searching, the algorithm traverses the tree and exploits the triangle inequality between query points and node centers to narrow the search space.
30
+ *
31
+ * The major advantage of VP trees over more conventional KD-trees or ball trees is that the former does not need to construct intermediate nodes, instead using the data points themselves at the nodes.
32
+ * This reduces the memory usage of the tree and total number of distance calculations for any search.
33
+ * It can also be very useful when the concept of an intermediate is not well-defined (e.g., for non-numeric data), though this is not particularly relevant for **knncolle**.
34
+ *
35
+ * @tparam DISTANCE Class to compute the distance between vectors, see `distance::Euclidean` for an example.
36
+ * @tparam INDEX_t Integer type for the indices.
37
+ * @tparam DISTANCE_t Floating point type for the distances.
38
+ * @tparam QUERY_t Floating point type for the query data.
39
+ * @tparam INTERNAL_t Floating point type for the internal data store.
40
+ *
41
+ * @see
42
+ * Yianilos PN (1993).
43
+ * Data structures and algorithms for nearest neighbor search in general metric spaces.
44
+ * _Proceedings of the Fourth Annual ACM-SIAM Symposium on Discrete Algorithms_, 311-321.
45
+ *
46
+ * @see
47
+ * Hanov S (2011).
48
+ * VP trees: A data structure for finding stuff fast.
49
+ * http://stevehanov.ca/blog/index.php?id=130
50
+ */
51
+ template<class DISTANCE, typename INDEX_t = int, typename DISTANCE_t = double, typename QUERY_t = DISTANCE_t, typename INTERNAL_t = DISTANCE_t>
52
+ class VpTree : public Base<INDEX_t, DISTANCE_t, QUERY_t> {
53
+ /* Adapted from http://stevehanov.ca/blog/index.php?id=130 */
54
+
55
+ private:
56
+ INDEX_t num_dim;
57
+ INDEX_t num_obs;
58
+ public:
59
+ INDEX_t nobs() const { return num_obs; }
60
+
61
+ INDEX_t ndim() const { return num_dim; }
62
+ private:
63
+ typedef int NodeIndex_t;
64
+ static const NodeIndex_t LEAF_MARKER=-1;
65
+
66
+ // Single node of a VP tree (has a point and radius; left children are closer to point than the radius)
67
+ struct Node {
68
+ INTERNAL_t threshold; // radius
69
+ INDEX_t index; // original index of current vantage point
70
+ NodeIndex_t left; // node index of the next vantage point for all children closer than 'threshold' from the current vantage point
71
+ NodeIndex_t right; // node index of the next vantage point for all children further than 'threshold' from the current vantage point
72
+ Node(NodeIndex_t i=0) : threshold(0), index(i), left(LEAF_MARKER), right(LEAF_MARKER) {}
73
+ };
74
+ std::vector<Node> nodes;
75
+
76
+ typedef std::tuple<INDEX_t, const INTERNAL_t*, INTERNAL_t> DataPoint; // internal distances computed using "INTERNAL_t" type, even if output is returned with DISTANCE_t.
77
+
78
+ template<class SAMPLER>
79
+ NodeIndex_t buildFromPoints(NodeIndex_t lower, NodeIndex_t upper, std::vector<DataPoint>& items, SAMPLER& rng) {
80
+ if (upper == lower) { // indicates that we're done here!
81
+ return LEAF_MARKER;
82
+ }
83
+
84
+ NodeIndex_t pos = nodes.size();
85
+ nodes.resize(pos + 1);
86
+ Node& node=nodes.back();
87
+
88
+ int gap = upper - lower;
89
+ if (gap > 1) { // if we did not arrive at leaf yet
90
+
91
+ /* Choose an arbitrary point and move it to the start of the [lower, upper)
92
+ * interval in 'items'; this is our new vantage point.
93
+ *
94
+ * Yes, I know that the modulo method does not provide strictly
95
+ * uniform values but statistical correctness doesn't really matter
96
+ * here... but reproducibility across platforms does matter, and
97
+ * std::uniform_int_distribution is implementation-dependent!
98
+ */
99
+ NodeIndex_t i = static_cast<NodeIndex_t>(rng() % gap + lower);
100
+ std::swap(items[lower], items[i]);
101
+ const auto& vantage = items[lower];
102
+
103
+ // Compute distances to the new vantage point.
104
+ const INTERNAL_t* ref = std::get<1>(vantage);
105
+ for (size_t i = lower + 1; i < upper; ++i) {
106
+ const INTERNAL_t* loc = std::get<1>(items[i]);
107
+ std::get<2>(items[i]) = DISTANCE::template raw_distance<INTERNAL_t>(ref, loc, num_dim);
108
+ }
109
+
110
+ // Partition around the median distance from the vantage point.
111
+ NodeIndex_t median = lower + gap/2;
112
+ std::nth_element(items.begin() + lower + 1, items.begin() + median, items.begin() + upper,
113
+ [&](const DataPoint& left, const DataPoint& right) -> bool {
114
+ return std::get<2>(left) < std::get<2>(right);
115
+ }
116
+ );
117
+
118
+ // Threshold of the new node will be the distance to the median
119
+ node.threshold = DISTANCE::normalize(std::get<2>(items[median]));
120
+
121
+ // Recursively build tree
122
+ node.index = std::get<0>(vantage);
123
+ node.left = buildFromPoints(lower + 1, median, items, rng);
124
+ node.right = buildFromPoints(median, upper, items, rng);
125
+ } else {
126
+ node.index = std::get<0>(items[lower]);
127
+ }
128
+
129
+ return pos;
130
+ }
131
+
132
+ private:
133
+ std::vector<INDEX_t> new_location;
134
+ std::vector<INTERNAL_t> store;
135
+
136
+ public:
137
+ /**
138
+ * @param ndim Number of dimensions.
139
+ * @param nobs Number of observations.
140
+ * @param vals Pointer to an array of length `ndim * nobs`, corresponding to a dimension-by-observation matrix in column-major format,
141
+ * i.e., contiguous elements belong to the same observation.
142
+ *
143
+ * @tparam INPUT_t Floating-point type of the input data.
144
+ */
145
+ template<typename INPUT_t>
146
+ VpTree(INDEX_t ndim, INDEX_t nobs, const INPUT_t* vals) : num_dim(ndim), num_obs(nobs), new_location(nobs), store(ndim * nobs) {
147
+ std::vector<DataPoint> items;
148
+ items.reserve(num_obs);
149
+ for (INDEX_t i = 0; i < num_obs; ++i) {
150
+ items.push_back(DataPoint(i, vals + i * num_dim, 0));
151
+ }
152
+
153
+ nodes.reserve(num_obs);
154
+ std::mt19937_64 rand(1234567890); // seed doesn't really matter, we don't need statistical correctness here.
155
+ buildFromPoints(0, num_obs, items, rand);
156
+
157
+ // Actually populating the store based on the traversal order of the nodes.
158
+ // This should be more cache efficient than an arbitrary input order.
159
+ auto sIt = store.begin();
160
+ for (size_t i = 0; i < num_obs; ++i, sIt += num_dim) {
161
+ const auto& curnode = nodes[i];
162
+ new_location[curnode.index] = i;
163
+ auto start = vals + num_dim * curnode.index;
164
+ std::copy(start, start + num_dim, sIt);
165
+ }
166
+ return;
167
+ }
168
+
169
+ std::vector<std::pair<INDEX_t, DISTANCE_t> > find_nearest_neighbors(INDEX_t index, int k) const {
170
+ NeighborQueue<INDEX_t, INTERNAL_t> nearest(k, index);
171
+ INTERNAL_t tau = std::numeric_limits<INTERNAL_t>::max();
172
+ search_nn(0, store.data() + new_location[index] * num_dim, tau, nearest);
173
+ return nearest.template report<DISTANCE_t>();
174
+ }
175
+
176
+ std::vector<std::pair<INDEX_t, DISTANCE_t> > find_nearest_neighbors(const QUERY_t* query, int k) const {
177
+ NeighborQueue<INDEX_t, INTERNAL_t> nearest(k);
178
+ INTERNAL_t tau = std::numeric_limits<INTERNAL_t>::max();
179
+ search_nn(0, query, tau, nearest);
180
+ return nearest.template report<DISTANCE_t>();
181
+ }
182
+
183
+ const QUERY_t* observation(INDEX_t index, QUERY_t* buffer) const {
184
+ auto candidate = store.data() + num_dim * new_location[index];
185
+ if constexpr(std::is_same<QUERY_t, INTERNAL_t>::value) {
186
+ return candidate;
187
+ } else {
188
+ std::copy(candidate, candidate + num_dim, buffer);
189
+ return buffer;
190
+ }
191
+ }
192
+
193
+ using Base<INDEX_t, DISTANCE_t, QUERY_t>::observation;
194
+
195
+ private:
196
+ template<typename INPUT_t>
197
+ void search_nn(NodeIndex_t curnode_index, const INPUT_t* target, INTERNAL_t& tau, NeighborQueue<INDEX_t, INTERNAL_t>& nearest) const {
198
+ if (curnode_index == LEAF_MARKER) { // indicates that we're done here
199
+ return;
200
+ }
201
+
202
+ // Compute distance between target and current node
203
+ const auto& curnode=nodes[curnode_index];
204
+ INTERNAL_t dist = DISTANCE::normalize(DISTANCE::template raw_distance<INTERNAL_t>(store.data() + curnode_index * num_dim, target, num_dim));
205
+
206
+ // If current node within radius tau
207
+ if (dist < tau) {
208
+ nearest.add(curnode.index, dist);
209
+ if (nearest.is_full()) {
210
+ tau = nearest.limit(); // update value of tau (farthest point in result list)
211
+ }
212
+ }
213
+
214
+ // Return if we arrived at a leaf
215
+ if (curnode.left == LEAF_MARKER && curnode.right == LEAF_MARKER) {
216
+ return;
217
+ }
218
+
219
+ // If the target lies within the radius of ball
220
+ if (dist < curnode.threshold) {
221
+ if (dist - tau <= curnode.threshold) { // if there can still be neighbors inside the ball, recursively search left child first
222
+ search_nn(curnode.left, target, tau, nearest);
223
+ }
224
+
225
+ if (dist + tau >= curnode.threshold) { // if there can still be neighbors outside the ball, recursively search right child
226
+ search_nn(curnode.right, target, tau, nearest);
227
+ }
228
+
229
+ // If the target lies outsize the radius of the ball
230
+ } else {
231
+ if (dist + tau >= curnode.threshold) { // if there can still be neighbors outside the ball, recursively search right child first
232
+ search_nn(curnode.right, target, tau, nearest);
233
+ }
234
+
235
+ if (dist - tau <= curnode.threshold) { // if there can still be neighbors inside the ball, recursively search left child
236
+ search_nn(curnode.left, target, tau, nearest);
237
+ }
238
+ }
239
+ }
240
+ };
241
+
242
+ /**
243
+ * Perform a VP tree search with Euclidean distances.
244
+ */
245
+ template<typename INDEX_t = int, typename DISTANCE_t = double, typename QUERY_t = DISTANCE_t, typename INTERNAL_t = double>
246
+ using VpTreeEuclidean = VpTree<distances::Euclidean, INDEX_t, DISTANCE_t, QUERY_t, INTERNAL_t>;
247
+
248
+ /**
249
+ * Perform a VP tree search with Manhattan distances.
250
+ */
251
+ template<typename INDEX_t = int, typename DISTANCE_t = double, typename QUERY_t = DISTANCE_t, typename INTERNAL_t = double>
252
+ using VpTreeManhattan = VpTree<distances::Manhattan, INDEX_t, DISTANCE_t, QUERY_t, INTERNAL_t>;
253
+
254
+ };
255
+
256
+ #endif
@@ -0,0 +1,34 @@
1
+ #ifndef KNNCOLLE_HPP
2
+ #define KNNCOLLE_HPP
3
+
4
+ #include "BruteForce/BruteForce.hpp"
5
+ #include "VpTree/VpTree.hpp"
6
+
7
+ #ifndef KNNCOLLE_NO_KMKNN
8
+ #include "Kmknn/Kmknn.hpp"
9
+ #endif
10
+
11
+ #ifndef KNNCOLLE_NO_ANNOY
12
+ #include "Annoy/Annoy.hpp"
13
+ #endif
14
+
15
+ #ifndef KNNCOLLE_NO_HNSW
16
+ #include "Hnsw/Hnsw.hpp"
17
+ #endif
18
+
19
+ #include "utils/find_nearest_neighbors.hpp"
20
+
21
+ /**
22
+ * @file knncolle.hpp
23
+ *
24
+ * @brief Umbrella header to include all algorithms.
25
+ *
26
+ * Developers can avoid the inclusion of unnecessary dependencies by setting:
27
+ *
28
+ * - `KNNCOLLE_NO_KMKNN`, to avoid including the `Kmknn.hpp` header (which requires the **kmeans** library).
29
+ * - `KNNCOLLE_NO_ANNOY`, to avoid including the `Annoy.hpp` header (which requires the **Annoy** library).
30
+ * - `KNNCOLLE_NO_HNSW`, to avoid including the `Hnsw.hpp` header (which requires the **Hnsw** library).
31
+ */
32
+
33
+ #endif
34
+
@@ -0,0 +1,100 @@
1
+ #ifndef KNNCOLLE_BASE_HPP
2
+ #define KNNCOLLE_BASE_HPP
3
+
4
+ #include <vector>
5
+
6
+ /**
7
+ * @file Base.hpp
8
+ *
9
+ * @brief Defines the virtual base class for all **knncolle** methods.
10
+ */
11
+
12
+ namespace knncolle {
13
+
14
+ /**
15
+ * @brief Virtual base class defining the **knncolle** interface.
16
+ *
17
+ * Defines the minimum set of methods, to be implemented by all concrete subclasses.
18
+ *
19
+ * @tparam INDEX_t Integer type for the indices.
20
+ * @tparam DISTANCE_t Floating point type for the distances.
21
+ * @tparam QUERY_t Floating point type for the query data.
22
+ */
23
+ template<typename INDEX_t = int, typename DISTANCE_t = double, typename QUERY_t = DISTANCE_t>
24
+ class Base {
25
+ public:
26
+ /**
27
+ * Get the number of observations in the dataset to be searched.
28
+ */
29
+ virtual INDEX_t nobs() const = 0;
30
+
31
+ /**
32
+ * Get the number of dimensions.
33
+ */
34
+ virtual INDEX_t ndim() const = 0;
35
+
36
+ virtual ~Base() {}
37
+
38
+ public:
39
+ /**
40
+ * Get the vector of coordinates for a given observation in the dataset.
41
+ * Type conversions may be performed if `QUERY_t` differs from the type of the internal data store.
42
+ *
43
+ * `buffer` may not be filled if a pointer to the internal data store can be returned directly.
44
+ * This can be assumed to be the case if the return address is not the same as `buffer`.
45
+ *
46
+ * @param index Index of the observation.
47
+ * This should be non-negative and less than the total number of observations in `nobs()`.
48
+ * @param buffer Buffer to store the coordinates.
49
+ *
50
+ * @return A pointer to an array containing the coordinate vector.
51
+ *
52
+ */
53
+ virtual const QUERY_t* observation(INDEX_t index, QUERY_t* buffer) const = 0;
54
+
55
+ /**
56
+ * Get the vector of coordinates for a given observation in the dataset.
57
+ * Type conversions may be performed if `QUERY_t` differs from the type of the internal data store.
58
+ *
59
+ * @param index Index of the observation.
60
+ *
61
+ * @return A vector of coordinates.
62
+ *
63
+ */
64
+ virtual std::vector<QUERY_t> observation(INDEX_t index) const {
65
+ std::vector<QUERY_t> output(ndim());
66
+ auto ptr = observation(index, output.data());
67
+ if (ptr != output.data()) {
68
+ std::copy(ptr, ptr + output.size(), output.data());
69
+ }
70
+ return output;
71
+ }
72
+
73
+ public:
74
+ /**
75
+ * Find the nearest neighbors of the `index`-th observation in the dataset.
76
+ *
77
+ * @param index The index of the observation of interest.
78
+ * This should be non-negative and less than the total number of observations in `nobs()`.
79
+ * @param k The number of neighbors to identify.
80
+ *
81
+ * @return A vector of (index, distance) pairs containing the identities of the nearest neighbors in order of increasing distance.
82
+ * Length is at most `k` but may be shorter if the total number of observations is less than `k + 1`.
83
+ */
84
+ virtual std::vector<std::pair<INDEX_t, DISTANCE_t> > find_nearest_neighbors(INDEX_t index, int k) const = 0;
85
+
86
+ /**
87
+ * Find the nearest neighbors of a new observation.
88
+ *
89
+ * @param query Pointer to an array of length equal to `ndims()`, containing the coordinates of the query point.
90
+ * @param k The number of neighbors to identify.
91
+ *
92
+ * @return A vector of (index, distance) pairs containing the identities of the nearest neighbors in order of increasing distance.
93
+ * Length is at most `k` but may be shorter if the total number of observations is less than `k`.
94
+ */
95
+ virtual std::vector<std::pair<INDEX_t, DISTANCE_t> > find_nearest_neighbors(const QUERY_t* query, int k) const = 0;
96
+ };
97
+
98
+ }
99
+
100
+ #endif