umappp 0.1.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (395) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE.txt +25 -0
  3. data/README.md +110 -0
  4. data/ext/umappp/extconf.rb +25 -0
  5. data/ext/umappp/numo.hpp +867 -0
  6. data/ext/umappp/umappp.cpp +225 -0
  7. data/lib/umappp/version.rb +5 -0
  8. data/lib/umappp.rb +41 -0
  9. data/vendor/Eigen/Cholesky +45 -0
  10. data/vendor/Eigen/CholmodSupport +48 -0
  11. data/vendor/Eigen/Core +384 -0
  12. data/vendor/Eigen/Dense +7 -0
  13. data/vendor/Eigen/Eigen +2 -0
  14. data/vendor/Eigen/Eigenvalues +60 -0
  15. data/vendor/Eigen/Geometry +59 -0
  16. data/vendor/Eigen/Householder +29 -0
  17. data/vendor/Eigen/IterativeLinearSolvers +48 -0
  18. data/vendor/Eigen/Jacobi +32 -0
  19. data/vendor/Eigen/KLUSupport +41 -0
  20. data/vendor/Eigen/LU +47 -0
  21. data/vendor/Eigen/MetisSupport +35 -0
  22. data/vendor/Eigen/OrderingMethods +70 -0
  23. data/vendor/Eigen/PaStiXSupport +49 -0
  24. data/vendor/Eigen/PardisoSupport +35 -0
  25. data/vendor/Eigen/QR +50 -0
  26. data/vendor/Eigen/QtAlignedMalloc +39 -0
  27. data/vendor/Eigen/SPQRSupport +34 -0
  28. data/vendor/Eigen/SVD +50 -0
  29. data/vendor/Eigen/Sparse +34 -0
  30. data/vendor/Eigen/SparseCholesky +37 -0
  31. data/vendor/Eigen/SparseCore +69 -0
  32. data/vendor/Eigen/SparseLU +50 -0
  33. data/vendor/Eigen/SparseQR +36 -0
  34. data/vendor/Eigen/StdDeque +27 -0
  35. data/vendor/Eigen/StdList +26 -0
  36. data/vendor/Eigen/StdVector +27 -0
  37. data/vendor/Eigen/SuperLUSupport +64 -0
  38. data/vendor/Eigen/UmfPackSupport +40 -0
  39. data/vendor/Eigen/src/Cholesky/LDLT.h +688 -0
  40. data/vendor/Eigen/src/Cholesky/LLT.h +558 -0
  41. data/vendor/Eigen/src/Cholesky/LLT_LAPACKE.h +99 -0
  42. data/vendor/Eigen/src/CholmodSupport/CholmodSupport.h +682 -0
  43. data/vendor/Eigen/src/Core/ArithmeticSequence.h +413 -0
  44. data/vendor/Eigen/src/Core/Array.h +417 -0
  45. data/vendor/Eigen/src/Core/ArrayBase.h +226 -0
  46. data/vendor/Eigen/src/Core/ArrayWrapper.h +209 -0
  47. data/vendor/Eigen/src/Core/Assign.h +90 -0
  48. data/vendor/Eigen/src/Core/AssignEvaluator.h +1010 -0
  49. data/vendor/Eigen/src/Core/Assign_MKL.h +178 -0
  50. data/vendor/Eigen/src/Core/BandMatrix.h +353 -0
  51. data/vendor/Eigen/src/Core/Block.h +448 -0
  52. data/vendor/Eigen/src/Core/BooleanRedux.h +162 -0
  53. data/vendor/Eigen/src/Core/CommaInitializer.h +164 -0
  54. data/vendor/Eigen/src/Core/ConditionEstimator.h +175 -0
  55. data/vendor/Eigen/src/Core/CoreEvaluators.h +1741 -0
  56. data/vendor/Eigen/src/Core/CoreIterators.h +132 -0
  57. data/vendor/Eigen/src/Core/CwiseBinaryOp.h +183 -0
  58. data/vendor/Eigen/src/Core/CwiseNullaryOp.h +1001 -0
  59. data/vendor/Eigen/src/Core/CwiseTernaryOp.h +197 -0
  60. data/vendor/Eigen/src/Core/CwiseUnaryOp.h +103 -0
  61. data/vendor/Eigen/src/Core/CwiseUnaryView.h +132 -0
  62. data/vendor/Eigen/src/Core/DenseBase.h +701 -0
  63. data/vendor/Eigen/src/Core/DenseCoeffsBase.h +685 -0
  64. data/vendor/Eigen/src/Core/DenseStorage.h +652 -0
  65. data/vendor/Eigen/src/Core/Diagonal.h +258 -0
  66. data/vendor/Eigen/src/Core/DiagonalMatrix.h +391 -0
  67. data/vendor/Eigen/src/Core/DiagonalProduct.h +28 -0
  68. data/vendor/Eigen/src/Core/Dot.h +318 -0
  69. data/vendor/Eigen/src/Core/EigenBase.h +160 -0
  70. data/vendor/Eigen/src/Core/ForceAlignedAccess.h +150 -0
  71. data/vendor/Eigen/src/Core/Fuzzy.h +155 -0
  72. data/vendor/Eigen/src/Core/GeneralProduct.h +465 -0
  73. data/vendor/Eigen/src/Core/GenericPacketMath.h +1040 -0
  74. data/vendor/Eigen/src/Core/GlobalFunctions.h +194 -0
  75. data/vendor/Eigen/src/Core/IO.h +258 -0
  76. data/vendor/Eigen/src/Core/IndexedView.h +237 -0
  77. data/vendor/Eigen/src/Core/Inverse.h +117 -0
  78. data/vendor/Eigen/src/Core/Map.h +171 -0
  79. data/vendor/Eigen/src/Core/MapBase.h +310 -0
  80. data/vendor/Eigen/src/Core/MathFunctions.h +2057 -0
  81. data/vendor/Eigen/src/Core/MathFunctionsImpl.h +200 -0
  82. data/vendor/Eigen/src/Core/Matrix.h +565 -0
  83. data/vendor/Eigen/src/Core/MatrixBase.h +547 -0
  84. data/vendor/Eigen/src/Core/NestByValue.h +85 -0
  85. data/vendor/Eigen/src/Core/NoAlias.h +109 -0
  86. data/vendor/Eigen/src/Core/NumTraits.h +335 -0
  87. data/vendor/Eigen/src/Core/PartialReduxEvaluator.h +232 -0
  88. data/vendor/Eigen/src/Core/PermutationMatrix.h +605 -0
  89. data/vendor/Eigen/src/Core/PlainObjectBase.h +1128 -0
  90. data/vendor/Eigen/src/Core/Product.h +191 -0
  91. data/vendor/Eigen/src/Core/ProductEvaluators.h +1179 -0
  92. data/vendor/Eigen/src/Core/Random.h +218 -0
  93. data/vendor/Eigen/src/Core/Redux.h +515 -0
  94. data/vendor/Eigen/src/Core/Ref.h +381 -0
  95. data/vendor/Eigen/src/Core/Replicate.h +142 -0
  96. data/vendor/Eigen/src/Core/Reshaped.h +454 -0
  97. data/vendor/Eigen/src/Core/ReturnByValue.h +119 -0
  98. data/vendor/Eigen/src/Core/Reverse.h +217 -0
  99. data/vendor/Eigen/src/Core/Select.h +164 -0
  100. data/vendor/Eigen/src/Core/SelfAdjointView.h +365 -0
  101. data/vendor/Eigen/src/Core/SelfCwiseBinaryOp.h +47 -0
  102. data/vendor/Eigen/src/Core/Solve.h +188 -0
  103. data/vendor/Eigen/src/Core/SolveTriangular.h +235 -0
  104. data/vendor/Eigen/src/Core/SolverBase.h +168 -0
  105. data/vendor/Eigen/src/Core/StableNorm.h +251 -0
  106. data/vendor/Eigen/src/Core/StlIterators.h +463 -0
  107. data/vendor/Eigen/src/Core/Stride.h +116 -0
  108. data/vendor/Eigen/src/Core/Swap.h +68 -0
  109. data/vendor/Eigen/src/Core/Transpose.h +464 -0
  110. data/vendor/Eigen/src/Core/Transpositions.h +386 -0
  111. data/vendor/Eigen/src/Core/TriangularMatrix.h +1001 -0
  112. data/vendor/Eigen/src/Core/VectorBlock.h +96 -0
  113. data/vendor/Eigen/src/Core/VectorwiseOp.h +784 -0
  114. data/vendor/Eigen/src/Core/Visitor.h +381 -0
  115. data/vendor/Eigen/src/Core/arch/AVX/Complex.h +372 -0
  116. data/vendor/Eigen/src/Core/arch/AVX/MathFunctions.h +228 -0
  117. data/vendor/Eigen/src/Core/arch/AVX/PacketMath.h +1574 -0
  118. data/vendor/Eigen/src/Core/arch/AVX/TypeCasting.h +115 -0
  119. data/vendor/Eigen/src/Core/arch/AVX512/Complex.h +422 -0
  120. data/vendor/Eigen/src/Core/arch/AVX512/MathFunctions.h +362 -0
  121. data/vendor/Eigen/src/Core/arch/AVX512/PacketMath.h +2303 -0
  122. data/vendor/Eigen/src/Core/arch/AVX512/TypeCasting.h +89 -0
  123. data/vendor/Eigen/src/Core/arch/AltiVec/Complex.h +417 -0
  124. data/vendor/Eigen/src/Core/arch/AltiVec/MathFunctions.h +90 -0
  125. data/vendor/Eigen/src/Core/arch/AltiVec/MatrixProduct.h +2937 -0
  126. data/vendor/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h +221 -0
  127. data/vendor/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h +629 -0
  128. data/vendor/Eigen/src/Core/arch/AltiVec/PacketMath.h +2711 -0
  129. data/vendor/Eigen/src/Core/arch/CUDA/Complex.h +258 -0
  130. data/vendor/Eigen/src/Core/arch/Default/BFloat16.h +700 -0
  131. data/vendor/Eigen/src/Core/arch/Default/ConjHelper.h +117 -0
  132. data/vendor/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +1649 -0
  133. data/vendor/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +110 -0
  134. data/vendor/Eigen/src/Core/arch/Default/Half.h +942 -0
  135. data/vendor/Eigen/src/Core/arch/Default/Settings.h +49 -0
  136. data/vendor/Eigen/src/Core/arch/Default/TypeCasting.h +120 -0
  137. data/vendor/Eigen/src/Core/arch/GPU/MathFunctions.h +103 -0
  138. data/vendor/Eigen/src/Core/arch/GPU/PacketMath.h +1685 -0
  139. data/vendor/Eigen/src/Core/arch/GPU/TypeCasting.h +80 -0
  140. data/vendor/Eigen/src/Core/arch/HIP/hcc/math_constants.h +23 -0
  141. data/vendor/Eigen/src/Core/arch/MSA/Complex.h +648 -0
  142. data/vendor/Eigen/src/Core/arch/MSA/MathFunctions.h +387 -0
  143. data/vendor/Eigen/src/Core/arch/MSA/PacketMath.h +1233 -0
  144. data/vendor/Eigen/src/Core/arch/NEON/Complex.h +584 -0
  145. data/vendor/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h +183 -0
  146. data/vendor/Eigen/src/Core/arch/NEON/MathFunctions.h +75 -0
  147. data/vendor/Eigen/src/Core/arch/NEON/PacketMath.h +4587 -0
  148. data/vendor/Eigen/src/Core/arch/NEON/TypeCasting.h +1419 -0
  149. data/vendor/Eigen/src/Core/arch/SSE/Complex.h +351 -0
  150. data/vendor/Eigen/src/Core/arch/SSE/MathFunctions.h +199 -0
  151. data/vendor/Eigen/src/Core/arch/SSE/PacketMath.h +1505 -0
  152. data/vendor/Eigen/src/Core/arch/SSE/TypeCasting.h +142 -0
  153. data/vendor/Eigen/src/Core/arch/SVE/MathFunctions.h +44 -0
  154. data/vendor/Eigen/src/Core/arch/SVE/PacketMath.h +752 -0
  155. data/vendor/Eigen/src/Core/arch/SVE/TypeCasting.h +49 -0
  156. data/vendor/Eigen/src/Core/arch/SYCL/InteropHeaders.h +232 -0
  157. data/vendor/Eigen/src/Core/arch/SYCL/MathFunctions.h +301 -0
  158. data/vendor/Eigen/src/Core/arch/SYCL/PacketMath.h +670 -0
  159. data/vendor/Eigen/src/Core/arch/SYCL/SyclMemoryModel.h +694 -0
  160. data/vendor/Eigen/src/Core/arch/SYCL/TypeCasting.h +85 -0
  161. data/vendor/Eigen/src/Core/arch/ZVector/Complex.h +426 -0
  162. data/vendor/Eigen/src/Core/arch/ZVector/MathFunctions.h +233 -0
  163. data/vendor/Eigen/src/Core/arch/ZVector/PacketMath.h +1060 -0
  164. data/vendor/Eigen/src/Core/functors/AssignmentFunctors.h +177 -0
  165. data/vendor/Eigen/src/Core/functors/BinaryFunctors.h +541 -0
  166. data/vendor/Eigen/src/Core/functors/NullaryFunctors.h +189 -0
  167. data/vendor/Eigen/src/Core/functors/StlFunctors.h +166 -0
  168. data/vendor/Eigen/src/Core/functors/TernaryFunctors.h +25 -0
  169. data/vendor/Eigen/src/Core/functors/UnaryFunctors.h +1131 -0
  170. data/vendor/Eigen/src/Core/products/GeneralBlockPanelKernel.h +2645 -0
  171. data/vendor/Eigen/src/Core/products/GeneralMatrixMatrix.h +517 -0
  172. data/vendor/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +317 -0
  173. data/vendor/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +145 -0
  174. data/vendor/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h +124 -0
  175. data/vendor/Eigen/src/Core/products/GeneralMatrixVector.h +518 -0
  176. data/vendor/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h +136 -0
  177. data/vendor/Eigen/src/Core/products/Parallelizer.h +180 -0
  178. data/vendor/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +544 -0
  179. data/vendor/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h +295 -0
  180. data/vendor/Eigen/src/Core/products/SelfadjointMatrixVector.h +262 -0
  181. data/vendor/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h +118 -0
  182. data/vendor/Eigen/src/Core/products/SelfadjointProduct.h +133 -0
  183. data/vendor/Eigen/src/Core/products/SelfadjointRank2Update.h +94 -0
  184. data/vendor/Eigen/src/Core/products/TriangularMatrixMatrix.h +472 -0
  185. data/vendor/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h +317 -0
  186. data/vendor/Eigen/src/Core/products/TriangularMatrixVector.h +350 -0
  187. data/vendor/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h +255 -0
  188. data/vendor/Eigen/src/Core/products/TriangularSolverMatrix.h +337 -0
  189. data/vendor/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h +167 -0
  190. data/vendor/Eigen/src/Core/products/TriangularSolverVector.h +148 -0
  191. data/vendor/Eigen/src/Core/util/BlasUtil.h +583 -0
  192. data/vendor/Eigen/src/Core/util/ConfigureVectorization.h +512 -0
  193. data/vendor/Eigen/src/Core/util/Constants.h +563 -0
  194. data/vendor/Eigen/src/Core/util/DisableStupidWarnings.h +106 -0
  195. data/vendor/Eigen/src/Core/util/ForwardDeclarations.h +322 -0
  196. data/vendor/Eigen/src/Core/util/IndexedViewHelper.h +186 -0
  197. data/vendor/Eigen/src/Core/util/IntegralConstant.h +272 -0
  198. data/vendor/Eigen/src/Core/util/MKL_support.h +137 -0
  199. data/vendor/Eigen/src/Core/util/Macros.h +1464 -0
  200. data/vendor/Eigen/src/Core/util/Memory.h +1163 -0
  201. data/vendor/Eigen/src/Core/util/Meta.h +812 -0
  202. data/vendor/Eigen/src/Core/util/NonMPL2.h +3 -0
  203. data/vendor/Eigen/src/Core/util/ReenableStupidWarnings.h +31 -0
  204. data/vendor/Eigen/src/Core/util/ReshapedHelper.h +51 -0
  205. data/vendor/Eigen/src/Core/util/StaticAssert.h +221 -0
  206. data/vendor/Eigen/src/Core/util/SymbolicIndex.h +293 -0
  207. data/vendor/Eigen/src/Core/util/XprHelper.h +856 -0
  208. data/vendor/Eigen/src/Eigenvalues/ComplexEigenSolver.h +346 -0
  209. data/vendor/Eigen/src/Eigenvalues/ComplexSchur.h +462 -0
  210. data/vendor/Eigen/src/Eigenvalues/ComplexSchur_LAPACKE.h +91 -0
  211. data/vendor/Eigen/src/Eigenvalues/EigenSolver.h +622 -0
  212. data/vendor/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h +418 -0
  213. data/vendor/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +226 -0
  214. data/vendor/Eigen/src/Eigenvalues/HessenbergDecomposition.h +374 -0
  215. data/vendor/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +158 -0
  216. data/vendor/Eigen/src/Eigenvalues/RealQZ.h +657 -0
  217. data/vendor/Eigen/src/Eigenvalues/RealSchur.h +558 -0
  218. data/vendor/Eigen/src/Eigenvalues/RealSchur_LAPACKE.h +77 -0
  219. data/vendor/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +904 -0
  220. data/vendor/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h +87 -0
  221. data/vendor/Eigen/src/Eigenvalues/Tridiagonalization.h +561 -0
  222. data/vendor/Eigen/src/Geometry/AlignedBox.h +486 -0
  223. data/vendor/Eigen/src/Geometry/AngleAxis.h +247 -0
  224. data/vendor/Eigen/src/Geometry/EulerAngles.h +114 -0
  225. data/vendor/Eigen/src/Geometry/Homogeneous.h +501 -0
  226. data/vendor/Eigen/src/Geometry/Hyperplane.h +282 -0
  227. data/vendor/Eigen/src/Geometry/OrthoMethods.h +235 -0
  228. data/vendor/Eigen/src/Geometry/ParametrizedLine.h +232 -0
  229. data/vendor/Eigen/src/Geometry/Quaternion.h +870 -0
  230. data/vendor/Eigen/src/Geometry/Rotation2D.h +199 -0
  231. data/vendor/Eigen/src/Geometry/RotationBase.h +206 -0
  232. data/vendor/Eigen/src/Geometry/Scaling.h +188 -0
  233. data/vendor/Eigen/src/Geometry/Transform.h +1563 -0
  234. data/vendor/Eigen/src/Geometry/Translation.h +202 -0
  235. data/vendor/Eigen/src/Geometry/Umeyama.h +166 -0
  236. data/vendor/Eigen/src/Geometry/arch/Geometry_SIMD.h +168 -0
  237. data/vendor/Eigen/src/Householder/BlockHouseholder.h +110 -0
  238. data/vendor/Eigen/src/Householder/Householder.h +176 -0
  239. data/vendor/Eigen/src/Householder/HouseholderSequence.h +545 -0
  240. data/vendor/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +226 -0
  241. data/vendor/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +212 -0
  242. data/vendor/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +229 -0
  243. data/vendor/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +394 -0
  244. data/vendor/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +453 -0
  245. data/vendor/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +444 -0
  246. data/vendor/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +198 -0
  247. data/vendor/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +117 -0
  248. data/vendor/Eigen/src/Jacobi/Jacobi.h +483 -0
  249. data/vendor/Eigen/src/KLUSupport/KLUSupport.h +358 -0
  250. data/vendor/Eigen/src/LU/Determinant.h +117 -0
  251. data/vendor/Eigen/src/LU/FullPivLU.h +877 -0
  252. data/vendor/Eigen/src/LU/InverseImpl.h +432 -0
  253. data/vendor/Eigen/src/LU/PartialPivLU.h +624 -0
  254. data/vendor/Eigen/src/LU/PartialPivLU_LAPACKE.h +83 -0
  255. data/vendor/Eigen/src/LU/arch/InverseSize4.h +351 -0
  256. data/vendor/Eigen/src/MetisSupport/MetisSupport.h +137 -0
  257. data/vendor/Eigen/src/OrderingMethods/Amd.h +435 -0
  258. data/vendor/Eigen/src/OrderingMethods/Eigen_Colamd.h +1863 -0
  259. data/vendor/Eigen/src/OrderingMethods/Ordering.h +153 -0
  260. data/vendor/Eigen/src/PaStiXSupport/PaStiXSupport.h +678 -0
  261. data/vendor/Eigen/src/PardisoSupport/PardisoSupport.h +545 -0
  262. data/vendor/Eigen/src/QR/ColPivHouseholderQR.h +674 -0
  263. data/vendor/Eigen/src/QR/ColPivHouseholderQR_LAPACKE.h +97 -0
  264. data/vendor/Eigen/src/QR/CompleteOrthogonalDecomposition.h +635 -0
  265. data/vendor/Eigen/src/QR/FullPivHouseholderQR.h +713 -0
  266. data/vendor/Eigen/src/QR/HouseholderQR.h +434 -0
  267. data/vendor/Eigen/src/QR/HouseholderQR_LAPACKE.h +68 -0
  268. data/vendor/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +335 -0
  269. data/vendor/Eigen/src/SVD/BDCSVD.h +1366 -0
  270. data/vendor/Eigen/src/SVD/JacobiSVD.h +812 -0
  271. data/vendor/Eigen/src/SVD/JacobiSVD_LAPACKE.h +91 -0
  272. data/vendor/Eigen/src/SVD/SVDBase.h +376 -0
  273. data/vendor/Eigen/src/SVD/UpperBidiagonalization.h +414 -0
  274. data/vendor/Eigen/src/SparseCholesky/SimplicialCholesky.h +697 -0
  275. data/vendor/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +174 -0
  276. data/vendor/Eigen/src/SparseCore/AmbiVector.h +378 -0
  277. data/vendor/Eigen/src/SparseCore/CompressedStorage.h +274 -0
  278. data/vendor/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +352 -0
  279. data/vendor/Eigen/src/SparseCore/MappedSparseMatrix.h +67 -0
  280. data/vendor/Eigen/src/SparseCore/SparseAssign.h +270 -0
  281. data/vendor/Eigen/src/SparseCore/SparseBlock.h +571 -0
  282. data/vendor/Eigen/src/SparseCore/SparseColEtree.h +206 -0
  283. data/vendor/Eigen/src/SparseCore/SparseCompressedBase.h +370 -0
  284. data/vendor/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +722 -0
  285. data/vendor/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +150 -0
  286. data/vendor/Eigen/src/SparseCore/SparseDenseProduct.h +342 -0
  287. data/vendor/Eigen/src/SparseCore/SparseDiagonalProduct.h +138 -0
  288. data/vendor/Eigen/src/SparseCore/SparseDot.h +98 -0
  289. data/vendor/Eigen/src/SparseCore/SparseFuzzy.h +29 -0
  290. data/vendor/Eigen/src/SparseCore/SparseMap.h +305 -0
  291. data/vendor/Eigen/src/SparseCore/SparseMatrix.h +1518 -0
  292. data/vendor/Eigen/src/SparseCore/SparseMatrixBase.h +398 -0
  293. data/vendor/Eigen/src/SparseCore/SparsePermutation.h +178 -0
  294. data/vendor/Eigen/src/SparseCore/SparseProduct.h +181 -0
  295. data/vendor/Eigen/src/SparseCore/SparseRedux.h +49 -0
  296. data/vendor/Eigen/src/SparseCore/SparseRef.h +397 -0
  297. data/vendor/Eigen/src/SparseCore/SparseSelfAdjointView.h +659 -0
  298. data/vendor/Eigen/src/SparseCore/SparseSolverBase.h +124 -0
  299. data/vendor/Eigen/src/SparseCore/SparseSparseProductWithPruning.h +198 -0
  300. data/vendor/Eigen/src/SparseCore/SparseTranspose.h +92 -0
  301. data/vendor/Eigen/src/SparseCore/SparseTriangularView.h +189 -0
  302. data/vendor/Eigen/src/SparseCore/SparseUtil.h +186 -0
  303. data/vendor/Eigen/src/SparseCore/SparseVector.h +478 -0
  304. data/vendor/Eigen/src/SparseCore/SparseView.h +254 -0
  305. data/vendor/Eigen/src/SparseCore/TriangularSolver.h +315 -0
  306. data/vendor/Eigen/src/SparseLU/SparseLU.h +923 -0
  307. data/vendor/Eigen/src/SparseLU/SparseLUImpl.h +66 -0
  308. data/vendor/Eigen/src/SparseLU/SparseLU_Memory.h +226 -0
  309. data/vendor/Eigen/src/SparseLU/SparseLU_Structs.h +110 -0
  310. data/vendor/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +375 -0
  311. data/vendor/Eigen/src/SparseLU/SparseLU_Utils.h +80 -0
  312. data/vendor/Eigen/src/SparseLU/SparseLU_column_bmod.h +181 -0
  313. data/vendor/Eigen/src/SparseLU/SparseLU_column_dfs.h +179 -0
  314. data/vendor/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h +107 -0
  315. data/vendor/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +280 -0
  316. data/vendor/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h +126 -0
  317. data/vendor/Eigen/src/SparseLU/SparseLU_kernel_bmod.h +130 -0
  318. data/vendor/Eigen/src/SparseLU/SparseLU_panel_bmod.h +223 -0
  319. data/vendor/Eigen/src/SparseLU/SparseLU_panel_dfs.h +258 -0
  320. data/vendor/Eigen/src/SparseLU/SparseLU_pivotL.h +137 -0
  321. data/vendor/Eigen/src/SparseLU/SparseLU_pruneL.h +136 -0
  322. data/vendor/Eigen/src/SparseLU/SparseLU_relax_snode.h +83 -0
  323. data/vendor/Eigen/src/SparseQR/SparseQR.h +758 -0
  324. data/vendor/Eigen/src/StlSupport/StdDeque.h +116 -0
  325. data/vendor/Eigen/src/StlSupport/StdList.h +106 -0
  326. data/vendor/Eigen/src/StlSupport/StdVector.h +131 -0
  327. data/vendor/Eigen/src/StlSupport/details.h +84 -0
  328. data/vendor/Eigen/src/SuperLUSupport/SuperLUSupport.h +1025 -0
  329. data/vendor/Eigen/src/UmfPackSupport/UmfPackSupport.h +642 -0
  330. data/vendor/Eigen/src/misc/Image.h +82 -0
  331. data/vendor/Eigen/src/misc/Kernel.h +79 -0
  332. data/vendor/Eigen/src/misc/RealSvd2x2.h +55 -0
  333. data/vendor/Eigen/src/misc/blas.h +440 -0
  334. data/vendor/Eigen/src/misc/lapack.h +152 -0
  335. data/vendor/Eigen/src/misc/lapacke.h +16292 -0
  336. data/vendor/Eigen/src/misc/lapacke_mangling.h +17 -0
  337. data/vendor/Eigen/src/plugins/ArrayCwiseBinaryOps.h +358 -0
  338. data/vendor/Eigen/src/plugins/ArrayCwiseUnaryOps.h +696 -0
  339. data/vendor/Eigen/src/plugins/BlockMethods.h +1442 -0
  340. data/vendor/Eigen/src/plugins/CommonCwiseBinaryOps.h +115 -0
  341. data/vendor/Eigen/src/plugins/CommonCwiseUnaryOps.h +177 -0
  342. data/vendor/Eigen/src/plugins/IndexedViewMethods.h +262 -0
  343. data/vendor/Eigen/src/plugins/MatrixCwiseBinaryOps.h +152 -0
  344. data/vendor/Eigen/src/plugins/MatrixCwiseUnaryOps.h +95 -0
  345. data/vendor/Eigen/src/plugins/ReshapedMethods.h +149 -0
  346. data/vendor/aarand/aarand.hpp +114 -0
  347. data/vendor/annoy/annoylib.h +1495 -0
  348. data/vendor/annoy/kissrandom.h +120 -0
  349. data/vendor/annoy/mman.h +242 -0
  350. data/vendor/hnswlib/bruteforce.h +152 -0
  351. data/vendor/hnswlib/hnswalg.h +1192 -0
  352. data/vendor/hnswlib/hnswlib.h +108 -0
  353. data/vendor/hnswlib/space_ip.h +282 -0
  354. data/vendor/hnswlib/space_l2.h +281 -0
  355. data/vendor/hnswlib/visited_list_pool.h +79 -0
  356. data/vendor/irlba/irlba.hpp +575 -0
  357. data/vendor/irlba/lanczos.hpp +212 -0
  358. data/vendor/irlba/parallel.hpp +474 -0
  359. data/vendor/irlba/utils.hpp +224 -0
  360. data/vendor/irlba/wrappers.hpp +228 -0
  361. data/vendor/kmeans/Base.hpp +75 -0
  362. data/vendor/kmeans/Details.hpp +79 -0
  363. data/vendor/kmeans/HartiganWong.hpp +492 -0
  364. data/vendor/kmeans/InitializeKmeansPP.hpp +144 -0
  365. data/vendor/kmeans/InitializeNone.hpp +44 -0
  366. data/vendor/kmeans/InitializePCAPartition.hpp +309 -0
  367. data/vendor/kmeans/InitializeRandom.hpp +91 -0
  368. data/vendor/kmeans/Kmeans.hpp +161 -0
  369. data/vendor/kmeans/Lloyd.hpp +134 -0
  370. data/vendor/kmeans/MiniBatch.hpp +269 -0
  371. data/vendor/kmeans/QuickSearch.hpp +179 -0
  372. data/vendor/kmeans/compute_centroids.hpp +32 -0
  373. data/vendor/kmeans/compute_wcss.hpp +27 -0
  374. data/vendor/kmeans/is_edge_case.hpp +42 -0
  375. data/vendor/kmeans/random.hpp +55 -0
  376. data/vendor/knncolle/Annoy/Annoy.hpp +193 -0
  377. data/vendor/knncolle/BruteForce/BruteForce.hpp +120 -0
  378. data/vendor/knncolle/Hnsw/Hnsw.hpp +225 -0
  379. data/vendor/knncolle/Kmknn/Kmknn.hpp +286 -0
  380. data/vendor/knncolle/VpTree/VpTree.hpp +256 -0
  381. data/vendor/knncolle/knncolle.hpp +34 -0
  382. data/vendor/knncolle/utils/Base.hpp +100 -0
  383. data/vendor/knncolle/utils/NeighborQueue.hpp +94 -0
  384. data/vendor/knncolle/utils/distances.hpp +98 -0
  385. data/vendor/knncolle/utils/find_nearest_neighbors.hpp +112 -0
  386. data/vendor/powerit/PowerIterations.hpp +157 -0
  387. data/vendor/umappp/NeighborList.hpp +37 -0
  388. data/vendor/umappp/Umap.hpp +662 -0
  389. data/vendor/umappp/combine_neighbor_sets.hpp +95 -0
  390. data/vendor/umappp/find_ab.hpp +157 -0
  391. data/vendor/umappp/neighbor_similarities.hpp +136 -0
  392. data/vendor/umappp/optimize_layout.hpp +285 -0
  393. data/vendor/umappp/spectral_init.hpp +181 -0
  394. data/vendor/umappp/umappp.hpp +13 -0
  395. metadata +465 -0
@@ -0,0 +1,700 @@
1
+ /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2
+
3
+ Licensed under the Apache License, Version 2.0 (the "License");
4
+ you may not use this file except in compliance with the License.
5
+ You may obtain a copy of the License at
6
+
7
+ http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ Unless required by applicable law or agreed to in writing, software
10
+ distributed under the License is distributed on an "AS IS" BASIS,
11
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ See the License for the specific language governing permissions and
13
+ limitations under the License.
14
+ ==============================================================================*/
15
+
16
+ #ifndef EIGEN_BFLOAT16_H
17
+ #define EIGEN_BFLOAT16_H
18
+
19
+ #define BF16_PACKET_FUNCTION(PACKET_F, PACKET_BF16, METHOD) \
20
+ template <> \
21
+ EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED \
22
+ PACKET_BF16 METHOD<PACKET_BF16>(const PACKET_BF16& _x) { \
23
+ return F32ToBf16(METHOD<PACKET_F>(Bf16ToF32(_x))); \
24
+ }
25
+
26
+ namespace Eigen {
27
+
28
+ struct bfloat16;
29
+
30
+ namespace bfloat16_impl {
31
+
32
+ // Make our own __bfloat16_raw definition.
33
+ struct __bfloat16_raw {
34
+ EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR __bfloat16_raw() : value(0) {}
35
+ explicit EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR __bfloat16_raw(unsigned short raw) : value(raw) {}
36
+ unsigned short value;
37
+ };
38
+
39
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR __bfloat16_raw raw_uint16_to_bfloat16(unsigned short value);
40
+ template <bool AssumeArgumentIsNormalOrInfinityOrZero>
41
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __bfloat16_raw float_to_bfloat16_rtne(float ff);
42
+ // Forward declarations of template specializations, to avoid Visual C++ 2019 errors, saying:
43
+ // > error C2908: explicit specialization; 'float_to_bfloat16_rtne' has already been instantiated
44
+ template <>
45
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __bfloat16_raw float_to_bfloat16_rtne<false>(float ff);
46
+ template <>
47
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __bfloat16_raw float_to_bfloat16_rtne<true>(float ff);
48
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC float bfloat16_to_float(__bfloat16_raw h);
49
+
50
+ struct bfloat16_base : public __bfloat16_raw {
51
+ EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bfloat16_base() {}
52
+ EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bfloat16_base(const __bfloat16_raw& h) : __bfloat16_raw(h) {}
53
+ };
54
+
55
+ } // namespace bfloat16_impl
56
+
57
+ // Class definition.
58
+ struct bfloat16 : public bfloat16_impl::bfloat16_base {
59
+
60
+ typedef bfloat16_impl::__bfloat16_raw __bfloat16_raw;
61
+
62
+ EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bfloat16() {}
63
+
64
+ EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bfloat16(const __bfloat16_raw& h) : bfloat16_impl::bfloat16_base(h) {}
65
+
66
+ explicit EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bfloat16(bool b)
67
+ : bfloat16_impl::bfloat16_base(bfloat16_impl::raw_uint16_to_bfloat16(b ? 0x3f80 : 0)) {}
68
+
69
+ template<class T>
70
+ explicit EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bfloat16(T val)
71
+ : bfloat16_impl::bfloat16_base(bfloat16_impl::float_to_bfloat16_rtne<internal::is_integral<T>::value>(static_cast<float>(val))) {}
72
+
73
+ explicit EIGEN_DEVICE_FUNC bfloat16(float f)
74
+ : bfloat16_impl::bfloat16_base(bfloat16_impl::float_to_bfloat16_rtne<false>(f)) {}
75
+
76
+ // Following the convention of numpy, converting between complex and
77
+ // float will lead to loss of imag value.
78
+ template<typename RealScalar>
79
+ explicit EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bfloat16(const std::complex<RealScalar>& val)
80
+ : bfloat16_impl::bfloat16_base(bfloat16_impl::float_to_bfloat16_rtne<false>(static_cast<float>(val.real()))) {}
81
+
82
+ EIGEN_DEVICE_FUNC operator float() const { // NOLINT: Allow implicit conversion to float, because it is lossless.
83
+ return bfloat16_impl::bfloat16_to_float(*this);
84
+ }
85
+ };
86
+ } // namespace Eigen
87
+
88
+ namespace std {
89
+ template<>
90
+ struct numeric_limits<Eigen::bfloat16> {
91
+ static const bool is_specialized = true;
92
+ static const bool is_signed = true;
93
+ static const bool is_integer = false;
94
+ static const bool is_exact = false;
95
+ static const bool has_infinity = true;
96
+ static const bool has_quiet_NaN = true;
97
+ static const bool has_signaling_NaN = true;
98
+ static const float_denorm_style has_denorm = std::denorm_absent;
99
+ static const bool has_denorm_loss = false;
100
+ static const std::float_round_style round_style = numeric_limits<float>::round_style;
101
+ static const bool is_iec559 = false;
102
+ static const bool is_bounded = true;
103
+ static const bool is_modulo = false;
104
+ static const int digits = 8;
105
+ static const int digits10 = 2;
106
+ static const int max_digits10 = 4;
107
+ static const int radix = 2;
108
+ static const int min_exponent = numeric_limits<float>::min_exponent;
109
+ static const int min_exponent10 = numeric_limits<float>::min_exponent10;
110
+ static const int max_exponent = numeric_limits<float>::max_exponent;
111
+ static const int max_exponent10 = numeric_limits<float>::max_exponent10;
112
+ static const bool traps = numeric_limits<float>::traps;
113
+ static const bool tinyness_before = numeric_limits<float>::tinyness_before;
114
+
115
+ static Eigen::bfloat16 (min)() { return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(0x0080); }
116
+ static Eigen::bfloat16 lowest() { return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(0xff7f); }
117
+ static Eigen::bfloat16 (max)() { return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(0x7f7f); }
118
+ static Eigen::bfloat16 epsilon() { return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(0x3c00); }
119
+ static Eigen::bfloat16 round_error() { return Eigen::bfloat16(0x3f00); }
120
+ static Eigen::bfloat16 infinity() { return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(0x7f80); }
121
+ static Eigen::bfloat16 quiet_NaN() { return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(0x7fc0); }
122
+ static Eigen::bfloat16 signaling_NaN() { return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(0x7f81); }
123
+ static Eigen::bfloat16 denorm_min() { return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(0x0001); }
124
+ };
125
+
126
+ // If std::numeric_limits<T> is specialized, should also specialize
127
+ // std::numeric_limits<const T>, std::numeric_limits<volatile T>, and
128
+ // std::numeric_limits<const volatile T>
129
+ // https://stackoverflow.com/a/16519653/
130
+ template<>
131
+ struct numeric_limits<const Eigen::bfloat16> : numeric_limits<Eigen::bfloat16> {};
132
+ template<>
133
+ struct numeric_limits<volatile Eigen::bfloat16> : numeric_limits<Eigen::bfloat16> {};
134
+ template<>
135
+ struct numeric_limits<const volatile Eigen::bfloat16> : numeric_limits<Eigen::bfloat16> {};
136
+ } // namespace std
137
+
138
+ namespace Eigen {
139
+
140
+ namespace bfloat16_impl {
141
+
142
+ // We need to distinguish ‘clang as the CUDA compiler’ from ‘clang as the host compiler,
143
+ // invoked by NVCC’ (e.g. on MacOS). The former needs to see both host and device implementation
144
+ // of the functions, while the latter can only deal with one of them.
145
+ #if !defined(EIGEN_HAS_NATIVE_BF16) || (EIGEN_COMP_CLANG && !EIGEN_COMP_NVCC) // Emulate support for bfloat16 floats
146
+
147
+ #if EIGEN_COMP_CLANG && defined(EIGEN_CUDACC)
148
+ // We need to provide emulated *host-side* BF16 operators for clang.
149
+ #pragma push_macro("EIGEN_DEVICE_FUNC")
150
+ #undef EIGEN_DEVICE_FUNC
151
+ #if defined(EIGEN_HAS_CUDA_BF16) && defined(EIGEN_HAS_NATIVE_BF16)
152
+ #define EIGEN_DEVICE_FUNC __host__
153
+ #else // both host and device need emulated ops.
154
+ #define EIGEN_DEVICE_FUNC __host__ __device__
155
+ #endif
156
+ #endif
157
+
158
+ // Definitions for CPUs, mostly working through conversion
159
+ // to/from fp32.
160
+
161
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 operator + (const bfloat16& a, const bfloat16& b) {
162
+ return bfloat16(float(a) + float(b));
163
+ }
164
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 operator + (const bfloat16& a, const int& b) {
165
+ return bfloat16(float(a) + static_cast<float>(b));
166
+ }
167
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 operator + (const int& a, const bfloat16& b) {
168
+ return bfloat16(static_cast<float>(a) + float(b));
169
+ }
170
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 operator * (const bfloat16& a, const bfloat16& b) {
171
+ return bfloat16(float(a) * float(b));
172
+ }
173
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 operator - (const bfloat16& a, const bfloat16& b) {
174
+ return bfloat16(float(a) - float(b));
175
+ }
176
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 operator / (const bfloat16& a, const bfloat16& b) {
177
+ return bfloat16(float(a) / float(b));
178
+ }
179
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 operator - (const bfloat16& a) {
180
+ bfloat16 result;
181
+ result.value = a.value ^ 0x8000;
182
+ return result;
183
+ }
184
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16& operator += (bfloat16& a, const bfloat16& b) {
185
+ a = bfloat16(float(a) + float(b));
186
+ return a;
187
+ }
188
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16& operator *= (bfloat16& a, const bfloat16& b) {
189
+ a = bfloat16(float(a) * float(b));
190
+ return a;
191
+ }
192
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16& operator -= (bfloat16& a, const bfloat16& b) {
193
+ a = bfloat16(float(a) - float(b));
194
+ return a;
195
+ }
196
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16& operator /= (bfloat16& a, const bfloat16& b) {
197
+ a = bfloat16(float(a) / float(b));
198
+ return a;
199
+ }
200
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 operator++(bfloat16& a) {
201
+ a += bfloat16(1);
202
+ return a;
203
+ }
204
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 operator--(bfloat16& a) {
205
+ a -= bfloat16(1);
206
+ return a;
207
+ }
208
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 operator++(bfloat16& a, int) {
209
+ bfloat16 original_value = a;
210
+ ++a;
211
+ return original_value;
212
+ }
213
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 operator--(bfloat16& a, int) {
214
+ bfloat16 original_value = a;
215
+ --a;
216
+ return original_value;
217
+ }
218
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator == (const bfloat16& a, const bfloat16& b) {
219
+ return numext::equal_strict(float(a),float(b));
220
+ }
221
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator != (const bfloat16& a, const bfloat16& b) {
222
+ return numext::not_equal_strict(float(a), float(b));
223
+ }
224
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator < (const bfloat16& a, const bfloat16& b) {
225
+ return float(a) < float(b);
226
+ }
227
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator <= (const bfloat16& a, const bfloat16& b) {
228
+ return float(a) <= float(b);
229
+ }
230
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator > (const bfloat16& a, const bfloat16& b) {
231
+ return float(a) > float(b);
232
+ }
233
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator >= (const bfloat16& a, const bfloat16& b) {
234
+ return float(a) >= float(b);
235
+ }
236
+
237
+ #if EIGEN_COMP_CLANG && defined(EIGEN_CUDACC)
238
+ #pragma pop_macro("EIGEN_DEVICE_FUNC")
239
+ #endif
240
+ #endif // Emulate support for bfloat16 floats
241
+
242
+ // Division by an index. Do it in full float precision to avoid accuracy
243
+ // issues in converting the denominator to bfloat16.
244
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 operator / (const bfloat16& a, Index b) {
245
+ return bfloat16(static_cast<float>(a) / static_cast<float>(b));
246
+ }
247
+
248
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __bfloat16_raw truncate_to_bfloat16(const float v) {
249
+ __bfloat16_raw output;
250
+ if (Eigen::numext::isnan EIGEN_NOT_A_MACRO(v)) {
251
+ output.value = std::signbit(v) ? 0xFFC0: 0x7FC0;
252
+ return output;
253
+ }
254
+ const uint16_t* p = reinterpret_cast<const uint16_t*>(&v);
255
+ #if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
256
+ output.value = p[0];
257
+ #else
258
+ output.value = p[1];
259
+ #endif
260
+ return output;
261
+ }
262
+
263
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR __bfloat16_raw raw_uint16_to_bfloat16(numext::uint16_t value) {
264
+ return __bfloat16_raw(value);
265
+ }
266
+
267
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR numext::uint16_t raw_bfloat16_as_uint16(const __bfloat16_raw& bf) {
268
+ return bf.value;
269
+ }
270
+
271
+ // float_to_bfloat16_rtne template specialization that does not make any
272
+ // assumption about the value of its function argument (ff).
273
+ template <>
274
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __bfloat16_raw float_to_bfloat16_rtne<false>(float ff) {
275
+ #if (defined(EIGEN_HAS_CUDA_BF16) && defined(EIGEN_HAS_HIP_BF16))
276
+ // Nothing to do here
277
+ #else
278
+ __bfloat16_raw output;
279
+
280
+ if (Eigen::numext::isnan EIGEN_NOT_A_MACRO(ff)) {
281
+ // If the value is a NaN, squash it to a qNaN with msb of fraction set,
282
+ // this makes sure after truncation we don't end up with an inf.
283
+ //
284
+ // qNaN magic: All exponent bits set + most significant bit of fraction
285
+ // set.
286
+ output.value = std::signbit(ff) ? 0xFFC0: 0x7FC0;
287
+ } else {
288
+ // Fast rounding algorithm that rounds a half value to nearest even. This
289
+ // reduces expected error when we convert a large number of floats. Here
290
+ // is how it works:
291
+ //
292
+ // Definitions:
293
+ // To convert a float 32 to bfloat16, a float 32 can be viewed as 32 bits
294
+ // with the following tags:
295
+ //
296
+ // Sign | Exp (8 bits) | Frac (23 bits)
297
+ // S EEEEEEEE FFFFFFLRTTTTTTTTTTTTTTT
298
+ //
299
+ // S: Sign bit.
300
+ // E: Exponent bits.
301
+ // F: First 6 bits of fraction.
302
+ // L: Least significant bit of resulting bfloat16 if we truncate away the
303
+ // rest of the float32. This is also the 7th bit of fraction
304
+ // R: Rounding bit, 8th bit of fraction.
305
+ // T: Sticky bits, rest of fraction, 15 bits.
306
+ //
307
+ // To round half to nearest even, there are 3 cases where we want to round
308
+ // down (simply truncate the result of the bits away, which consists of
309
+ // rounding bit and sticky bits) and two cases where we want to round up
310
+ // (truncate then add one to the result).
311
+ //
312
+ // The fast converting algorithm simply adds lsb (L) to 0x7fff (15 bits of
313
+ // 1s) as the rounding bias, adds the rounding bias to the input, then
314
+ // truncates the last 16 bits away.
315
+ //
316
+ // To understand how it works, we can analyze this algorithm case by case:
317
+ //
318
+ // 1. L = 0, R = 0:
319
+ // Expect: round down, this is less than half value.
320
+ //
321
+ // Algorithm:
322
+ // - Rounding bias: 0x7fff + 0 = 0x7fff
323
+ // - Adding rounding bias to input may create any carry, depending on
324
+ // whether there is any value set to 1 in T bits.
325
+ // - R may be set to 1 if there is a carry.
326
+ // - L remains 0.
327
+ // - Note that this case also handles Inf and -Inf, where all fraction
328
+ // bits, including L, R and Ts are all 0. The output remains Inf after
329
+ // this algorithm.
330
+ //
331
+ // 2. L = 1, R = 0:
332
+ // Expect: round down, this is less than half value.
333
+ //
334
+ // Algorithm:
335
+ // - Rounding bias: 0x7fff + 1 = 0x8000
336
+ // - Adding rounding bias to input doesn't change sticky bits but
337
+ // adds 1 to rounding bit.
338
+ // - L remains 1.
339
+ //
340
+ // 3. L = 0, R = 1, all of T are 0:
341
+ // Expect: round down, this is exactly at half, the result is already
342
+ // even (L=0).
343
+ //
344
+ // Algorithm:
345
+ // - Rounding bias: 0x7fff + 0 = 0x7fff
346
+ // - Adding rounding bias to input sets all sticky bits to 1, but
347
+ // doesn't create a carry.
348
+ // - R remains 1.
349
+ // - L remains 0.
350
+ //
351
+ // 4. L = 1, R = 1:
352
+ // Expect: round up, this is exactly at half, the result needs to be
353
+ // round to the next even number.
354
+ //
355
+ // Algorithm:
356
+ // - Rounding bias: 0x7fff + 1 = 0x8000
357
+ // - Adding rounding bias to input doesn't change sticky bits, but
358
+ // creates a carry from rounding bit.
359
+ // - The carry sets L to 0, creates another carry bit and propagate
360
+ // forward to F bits.
361
+ // - If all the F bits are 1, a carry then propagates to the exponent
362
+ // bits, which then creates the minimum value with the next exponent
363
+ // value. Note that we won't have the case where exponents are all 1,
364
+ // since that's either a NaN (handled in the other if condition) or inf
365
+ // (handled in case 1).
366
+ //
367
+ // 5. L = 0, R = 1, any of T is 1:
368
+ // Expect: round up, this is greater than half.
369
+ //
370
+ // Algorithm:
371
+ // - Rounding bias: 0x7fff + 0 = 0x7fff
372
+ // - Adding rounding bias to input creates a carry from sticky bits,
373
+ // sets rounding bit to 0, then create another carry.
374
+ // - The second carry sets L to 1.
375
+ //
376
+ // Examples:
377
+ //
378
+ // Exact half value that is already even:
379
+ // Input:
380
+ // Sign | Exp (8 bit) | Frac (first 7 bit) | Frac (last 16 bit)
381
+ // S E E E E E E E E F F F F F F L RTTTTTTTTTTTTTTT
382
+ // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1000000000000000
383
+ //
384
+ // This falls into case 3. We truncate the rest of 16 bits and no
385
+ // carry is created into F and L:
386
+ //
387
+ // Output:
388
+ // Sign | Exp (8 bit) | Frac (first 7 bit)
389
+ // S E E E E E E E E F F F F F F L
390
+ // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0
391
+ //
392
+ // Exact half value, round to next even number:
393
+ // Input:
394
+ // Sign | Exp (8 bit) | Frac (first 7 bit) | Frac (last 16 bit)
395
+ // S E E E E E E E E F F F F F F L RTTTTTTTTTTTTTTT
396
+ // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1000000000000000
397
+ //
398
+ // This falls into case 4. We create a carry from R and T,
399
+ // which then propagates into L and F:
400
+ //
401
+ // Output:
402
+ // Sign | Exp (8 bit) | Frac (first 7 bit)
403
+ // S E E E E E E E E F F F F F F L
404
+ // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0
405
+ //
406
+ //
407
+ // Max denormal value round to min normal value:
408
+ // Input:
409
+ // Sign | Exp (8 bit) | Frac (first 7 bit) | Frac (last 16 bit)
410
+ // S E E E E E E E E F F F F F F L RTTTTTTTTTTTTTTT
411
+ // 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1111111111111111
412
+ //
413
+ // This falls into case 4. We create a carry from R and T,
414
+ // propagate into L and F, which then propagates into exponent
415
+ // bits:
416
+ //
417
+ // Output:
418
+ // Sign | Exp (8 bit) | Frac (first 7 bit)
419
+ // S E E E E E E E E F F F F F F L
420
+ // 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0
421
+ //
422
+ // Max normal value round to Inf:
423
+ // Input:
424
+ // Sign | Exp (8 bit) | Frac (first 7 bit) | Frac (last 16 bit)
425
+ // S E E E E E E E E F F F F F F L RTTTTTTTTTTTTTTT
426
+ // 0 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1111111111111111
427
+ //
428
+ // This falls into case 4. We create a carry from R and T,
429
+ // propagate into L and F, which then propagates into exponent
430
+ // bits:
431
+ //
432
+ // Sign | Exp (8 bit) | Frac (first 7 bit)
433
+ // S E E E E E E E E F F F F F F L
434
+ // 0 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0
435
+
436
+ // At this point, ff must be either a normal float, or +/-infinity.
437
+ output = float_to_bfloat16_rtne<true>(ff);
438
+ }
439
+ return output;
440
+ #endif
441
+ }
442
+
443
+ // float_to_bfloat16_rtne template specialization that assumes that its function
444
+ // argument (ff) is either a normal floating point number, or +/-infinity, or
445
+ // zero. Used to improve the runtime performance of conversion from an integer
446
+ // type to bfloat16.
447
+ template <>
448
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __bfloat16_raw float_to_bfloat16_rtne<true>(float ff) {
449
+ #if (defined(EIGEN_HAS_CUDA_BF16) && defined(EIGEN_HAS_HIP_BF16))
450
+ // Nothing to do here
451
+ #else
452
+ numext::uint32_t input = numext::bit_cast<numext::uint32_t>(ff);
453
+ __bfloat16_raw output;
454
+
455
+ // Least significant bit of resulting bfloat.
456
+ numext::uint32_t lsb = (input >> 16) & 1;
457
+ numext::uint32_t rounding_bias = 0x7fff + lsb;
458
+ input += rounding_bias;
459
+ output.value = static_cast<numext::uint16_t>(input >> 16);
460
+ return output;
461
+ #endif
462
+ }
463
+
464
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC float bfloat16_to_float(__bfloat16_raw h) {
465
+ float result = 0;
466
+ unsigned short* q = reinterpret_cast<unsigned short*>(&result);
467
+ #if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
468
+ q[0] = h.value;
469
+ #else
470
+ q[1] = h.value;
471
+ #endif
472
+ return result;
473
+ }
474
+ // --- standard functions ---
475
+
476
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool (isinf)(const bfloat16& a) {
477
+ EIGEN_USING_STD(isinf);
478
+ return (isinf)(float(a));
479
+ }
480
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool (isnan)(const bfloat16& a) {
481
+ EIGEN_USING_STD(isnan);
482
+ return (isnan)(float(a));
483
+ }
484
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool (isfinite)(const bfloat16& a) {
485
+ return !(isinf EIGEN_NOT_A_MACRO (a)) && !(isnan EIGEN_NOT_A_MACRO (a));
486
+ }
487
+
488
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 abs(const bfloat16& a) {
489
+ bfloat16 result;
490
+ result.value = a.value & 0x7FFF;
491
+ return result;
492
+ }
493
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 exp(const bfloat16& a) {
494
+ return bfloat16(::expf(float(a)));
495
+ }
496
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 expm1(const bfloat16& a) {
497
+ return bfloat16(numext::expm1(float(a)));
498
+ }
499
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 log(const bfloat16& a) {
500
+ return bfloat16(::logf(float(a)));
501
+ }
502
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 log1p(const bfloat16& a) {
503
+ return bfloat16(numext::log1p(float(a)));
504
+ }
505
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 log10(const bfloat16& a) {
506
+ return bfloat16(::log10f(float(a)));
507
+ }
508
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 log2(const bfloat16& a) {
509
+ return bfloat16(static_cast<float>(EIGEN_LOG2E) * ::logf(float(a)));
510
+ }
511
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 sqrt(const bfloat16& a) {
512
+ return bfloat16(::sqrtf(float(a)));
513
+ }
514
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 pow(const bfloat16& a, const bfloat16& b) {
515
+ return bfloat16(::powf(float(a), float(b)));
516
+ }
517
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 sin(const bfloat16& a) {
518
+ return bfloat16(::sinf(float(a)));
519
+ }
520
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 cos(const bfloat16& a) {
521
+ return bfloat16(::cosf(float(a)));
522
+ }
523
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 tan(const bfloat16& a) {
524
+ return bfloat16(::tanf(float(a)));
525
+ }
526
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 asin(const bfloat16& a) {
527
+ return bfloat16(::asinf(float(a)));
528
+ }
529
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 acos(const bfloat16& a) {
530
+ return bfloat16(::acosf(float(a)));
531
+ }
532
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 atan(const bfloat16& a) {
533
+ return bfloat16(::atanf(float(a)));
534
+ }
535
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 sinh(const bfloat16& a) {
536
+ return bfloat16(::sinhf(float(a)));
537
+ }
538
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 cosh(const bfloat16& a) {
539
+ return bfloat16(::coshf(float(a)));
540
+ }
541
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 tanh(const bfloat16& a) {
542
+ return bfloat16(::tanhf(float(a)));
543
+ }
544
+ #if EIGEN_HAS_CXX11_MATH
545
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 asinh(const bfloat16& a) {
546
+ return bfloat16(::asinhf(float(a)));
547
+ }
548
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 acosh(const bfloat16& a) {
549
+ return bfloat16(::acoshf(float(a)));
550
+ }
551
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 atanh(const bfloat16& a) {
552
+ return bfloat16(::atanhf(float(a)));
553
+ }
554
+ #endif
555
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 floor(const bfloat16& a) {
556
+ return bfloat16(::floorf(float(a)));
557
+ }
558
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 ceil(const bfloat16& a) {
559
+ return bfloat16(::ceilf(float(a)));
560
+ }
561
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 rint(const bfloat16& a) {
562
+ return bfloat16(::rintf(float(a)));
563
+ }
564
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 round(const bfloat16& a) {
565
+ return bfloat16(::roundf(float(a)));
566
+ }
567
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 fmod(const bfloat16& a, const bfloat16& b) {
568
+ return bfloat16(::fmodf(float(a), float(b)));
569
+ }
570
+
571
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 (min)(const bfloat16& a, const bfloat16& b) {
572
+ const float f1 = static_cast<float>(a);
573
+ const float f2 = static_cast<float>(b);
574
+ return f2 < f1 ? b : a;
575
+ }
576
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 (max)(const bfloat16& a, const bfloat16& b) {
577
+ const float f1 = static_cast<float>(a);
578
+ const float f2 = static_cast<float>(b);
579
+ return f1 < f2 ? b : a;
580
+ }
581
+
582
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 fmin(const bfloat16& a, const bfloat16& b) {
583
+ const float f1 = static_cast<float>(a);
584
+ const float f2 = static_cast<float>(b);
585
+ return bfloat16(::fminf(f1, f2));
586
+ }
587
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 fmax(const bfloat16& a, const bfloat16& b) {
588
+ const float f1 = static_cast<float>(a);
589
+ const float f2 = static_cast<float>(b);
590
+ return bfloat16(::fmaxf(f1, f2));
591
+ }
592
+
593
+ #ifndef EIGEN_NO_IO
594
+ EIGEN_ALWAYS_INLINE std::ostream& operator << (std::ostream& os, const bfloat16& v) {
595
+ os << static_cast<float>(v);
596
+ return os;
597
+ }
598
+ #endif
599
+
600
+ } // namespace bfloat16_impl
601
+
602
+ namespace internal {
603
+
604
+ template<>
605
+ struct random_default_impl<bfloat16, false, false>
606
+ {
607
+ static inline bfloat16 run(const bfloat16& x, const bfloat16& y)
608
+ {
609
+ return x + (y-x) * bfloat16(float(std::rand()) / float(RAND_MAX));
610
+ }
611
+ static inline bfloat16 run()
612
+ {
613
+ return run(bfloat16(-1.f), bfloat16(1.f));
614
+ }
615
+ };
616
+
617
+ template<> struct is_arithmetic<bfloat16> { enum { value = true }; };
618
+
619
+ } // namespace internal
620
+
621
+ template<> struct NumTraits<Eigen::bfloat16>
622
+ : GenericNumTraits<Eigen::bfloat16>
623
+ {
624
+ enum {
625
+ IsSigned = true,
626
+ IsInteger = false,
627
+ IsComplex = false,
628
+ RequireInitialization = false
629
+ };
630
+
631
+ EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static EIGEN_STRONG_INLINE Eigen::bfloat16 epsilon() {
632
+ return bfloat16_impl::raw_uint16_to_bfloat16(0x3c00);
633
+ }
634
+ EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static EIGEN_STRONG_INLINE Eigen::bfloat16 dummy_precision() {
635
+ return bfloat16_impl::raw_uint16_to_bfloat16(0x3D4D); // bfloat16(5e-2f);
636
+
637
+ }
638
+ EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static EIGEN_STRONG_INLINE Eigen::bfloat16 highest() {
639
+ return bfloat16_impl::raw_uint16_to_bfloat16(0x7F7F);
640
+ }
641
+ EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static EIGEN_STRONG_INLINE Eigen::bfloat16 lowest() {
642
+ return bfloat16_impl::raw_uint16_to_bfloat16(0xFF7F);
643
+ }
644
+ EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static EIGEN_STRONG_INLINE Eigen::bfloat16 infinity() {
645
+ return bfloat16_impl::raw_uint16_to_bfloat16(0x7f80);
646
+ }
647
+ EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static EIGEN_STRONG_INLINE Eigen::bfloat16 quiet_NaN() {
648
+ return bfloat16_impl::raw_uint16_to_bfloat16(0x7fc0);
649
+ }
650
+ };
651
+
652
+ } // namespace Eigen
653
+
654
+ namespace Eigen {
655
+ namespace numext {
656
+
657
+ template<>
658
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
659
+ bool (isnan)(const Eigen::bfloat16& h) {
660
+ return (bfloat16_impl::isnan)(h);
661
+ }
662
+
663
+ template<>
664
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
665
+ bool (isinf)(const Eigen::bfloat16& h) {
666
+ return (bfloat16_impl::isinf)(h);
667
+ }
668
+
669
+ template<>
670
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
671
+ bool (isfinite)(const Eigen::bfloat16& h) {
672
+ return (bfloat16_impl::isfinite)(h);
673
+ }
674
+
675
+ template <>
676
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::bfloat16 bit_cast<Eigen::bfloat16, uint16_t>(const uint16_t& src) {
677
+ return Eigen::bfloat16(Eigen::bfloat16_impl::raw_uint16_to_bfloat16(src));
678
+ }
679
+
680
+ template <>
681
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC uint16_t bit_cast<uint16_t, Eigen::bfloat16>(const Eigen::bfloat16& src) {
682
+ return Eigen::bfloat16_impl::raw_bfloat16_as_uint16(src);
683
+ }
684
+
685
+ } // namespace numext
686
+ } // namespace Eigen
687
+
688
+ #if EIGEN_HAS_STD_HASH
689
+ namespace std {
690
+ template <>
691
+ struct hash<Eigen::bfloat16> {
692
+ EIGEN_STRONG_INLINE std::size_t operator()(const Eigen::bfloat16& a) const {
693
+ return static_cast<std::size_t>(Eigen::numext::bit_cast<Eigen::numext::uint16_t>(a));
694
+ }
695
+ };
696
+ } // namespace std
697
+ #endif
698
+
699
+
700
+ #endif // EIGEN_BFLOAT16_H