tomoto 0.2.3 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (347) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +6 -0
  3. data/README.md +8 -10
  4. data/ext/tomoto/extconf.rb +6 -2
  5. data/ext/tomoto/{ext.cpp → tomoto.cpp} +1 -1
  6. data/lib/tomoto/version.rb +1 -1
  7. data/lib/tomoto.rb +5 -1
  8. data/vendor/EigenRand/EigenRand/Core.h +10 -10
  9. data/vendor/EigenRand/EigenRand/Dists/Basic.h +208 -9
  10. data/vendor/EigenRand/EigenRand/Dists/Discrete.h +52 -31
  11. data/vendor/EigenRand/EigenRand/Dists/GammaPoisson.h +9 -8
  12. data/vendor/EigenRand/EigenRand/Dists/NormalExp.h +28 -21
  13. data/vendor/EigenRand/EigenRand/EigenRand +11 -6
  14. data/vendor/EigenRand/EigenRand/Macro.h +13 -7
  15. data/vendor/EigenRand/EigenRand/MorePacketMath.h +348 -740
  16. data/vendor/EigenRand/EigenRand/MvDists/Multinomial.h +5 -3
  17. data/vendor/EigenRand/EigenRand/MvDists/MvNormal.h +9 -3
  18. data/vendor/EigenRand/EigenRand/PacketFilter.h +11 -253
  19. data/vendor/EigenRand/EigenRand/PacketRandomEngine.h +21 -47
  20. data/vendor/EigenRand/EigenRand/RandUtils.h +50 -344
  21. data/vendor/EigenRand/EigenRand/arch/AVX/MorePacketMath.h +619 -0
  22. data/vendor/EigenRand/EigenRand/arch/AVX/PacketFilter.h +149 -0
  23. data/vendor/EigenRand/EigenRand/arch/AVX/RandUtils.h +228 -0
  24. data/vendor/EigenRand/EigenRand/arch/NEON/MorePacketMath.h +473 -0
  25. data/vendor/EigenRand/EigenRand/arch/NEON/PacketFilter.h +142 -0
  26. data/vendor/EigenRand/EigenRand/arch/NEON/RandUtils.h +126 -0
  27. data/vendor/EigenRand/EigenRand/arch/SSE/MorePacketMath.h +501 -0
  28. data/vendor/EigenRand/EigenRand/arch/SSE/PacketFilter.h +133 -0
  29. data/vendor/EigenRand/EigenRand/arch/SSE/RandUtils.h +120 -0
  30. data/vendor/EigenRand/EigenRand/doc.h +24 -12
  31. data/vendor/EigenRand/README.md +57 -4
  32. data/vendor/eigen/COPYING.APACHE +203 -0
  33. data/vendor/eigen/COPYING.BSD +1 -1
  34. data/vendor/eigen/COPYING.MINPACK +51 -52
  35. data/vendor/eigen/Eigen/Cholesky +0 -1
  36. data/vendor/eigen/Eigen/Core +112 -265
  37. data/vendor/eigen/Eigen/Eigenvalues +2 -3
  38. data/vendor/eigen/Eigen/Geometry +5 -8
  39. data/vendor/eigen/Eigen/Householder +0 -1
  40. data/vendor/eigen/Eigen/Jacobi +0 -1
  41. data/vendor/eigen/Eigen/KLUSupport +41 -0
  42. data/vendor/eigen/Eigen/LU +2 -5
  43. data/vendor/eigen/Eigen/OrderingMethods +0 -3
  44. data/vendor/eigen/Eigen/PaStiXSupport +1 -0
  45. data/vendor/eigen/Eigen/PardisoSupport +0 -0
  46. data/vendor/eigen/Eigen/QR +2 -3
  47. data/vendor/eigen/Eigen/QtAlignedMalloc +0 -1
  48. data/vendor/eigen/Eigen/SVD +0 -1
  49. data/vendor/eigen/Eigen/Sparse +0 -2
  50. data/vendor/eigen/Eigen/SparseCholesky +0 -8
  51. data/vendor/eigen/Eigen/SparseLU +4 -0
  52. data/vendor/eigen/Eigen/SparseQR +0 -1
  53. data/vendor/eigen/Eigen/src/Cholesky/LDLT.h +42 -27
  54. data/vendor/eigen/Eigen/src/Cholesky/LLT.h +39 -23
  55. data/vendor/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +90 -47
  56. data/vendor/eigen/Eigen/src/Core/ArithmeticSequence.h +413 -0
  57. data/vendor/eigen/Eigen/src/Core/Array.h +99 -11
  58. data/vendor/eigen/Eigen/src/Core/ArrayBase.h +3 -3
  59. data/vendor/eigen/Eigen/src/Core/ArrayWrapper.h +21 -21
  60. data/vendor/eigen/Eigen/src/Core/Assign.h +1 -1
  61. data/vendor/eigen/Eigen/src/Core/AssignEvaluator.h +125 -50
  62. data/vendor/eigen/Eigen/src/Core/Assign_MKL.h +10 -10
  63. data/vendor/eigen/Eigen/src/Core/BandMatrix.h +16 -16
  64. data/vendor/eigen/Eigen/src/Core/Block.h +56 -60
  65. data/vendor/eigen/Eigen/src/Core/BooleanRedux.h +29 -31
  66. data/vendor/eigen/Eigen/src/Core/CommaInitializer.h +7 -3
  67. data/vendor/eigen/Eigen/src/Core/CoreEvaluators.h +325 -272
  68. data/vendor/eigen/Eigen/src/Core/CoreIterators.h +5 -0
  69. data/vendor/eigen/Eigen/src/Core/CwiseBinaryOp.h +21 -22
  70. data/vendor/eigen/Eigen/src/Core/CwiseNullaryOp.h +153 -18
  71. data/vendor/eigen/Eigen/src/Core/CwiseUnaryOp.h +6 -6
  72. data/vendor/eigen/Eigen/src/Core/CwiseUnaryView.h +14 -10
  73. data/vendor/eigen/Eigen/src/Core/DenseBase.h +132 -42
  74. data/vendor/eigen/Eigen/src/Core/DenseCoeffsBase.h +25 -21
  75. data/vendor/eigen/Eigen/src/Core/DenseStorage.h +153 -71
  76. data/vendor/eigen/Eigen/src/Core/Diagonal.h +21 -23
  77. data/vendor/eigen/Eigen/src/Core/DiagonalMatrix.h +50 -2
  78. data/vendor/eigen/Eigen/src/Core/DiagonalProduct.h +1 -1
  79. data/vendor/eigen/Eigen/src/Core/Dot.h +10 -10
  80. data/vendor/eigen/Eigen/src/Core/EigenBase.h +10 -9
  81. data/vendor/eigen/Eigen/src/Core/ForceAlignedAccess.h +8 -4
  82. data/vendor/eigen/Eigen/src/Core/Fuzzy.h +3 -3
  83. data/vendor/eigen/Eigen/src/Core/GeneralProduct.h +20 -10
  84. data/vendor/eigen/Eigen/src/Core/GenericPacketMath.h +599 -152
  85. data/vendor/eigen/Eigen/src/Core/GlobalFunctions.h +40 -33
  86. data/vendor/eigen/Eigen/src/Core/IO.h +40 -7
  87. data/vendor/eigen/Eigen/src/Core/IndexedView.h +237 -0
  88. data/vendor/eigen/Eigen/src/Core/Inverse.h +9 -10
  89. data/vendor/eigen/Eigen/src/Core/Map.h +7 -7
  90. data/vendor/eigen/Eigen/src/Core/MapBase.h +10 -3
  91. data/vendor/eigen/Eigen/src/Core/MathFunctions.h +767 -125
  92. data/vendor/eigen/Eigen/src/Core/MathFunctionsImpl.h +118 -19
  93. data/vendor/eigen/Eigen/src/Core/Matrix.h +131 -25
  94. data/vendor/eigen/Eigen/src/Core/MatrixBase.h +21 -3
  95. data/vendor/eigen/Eigen/src/Core/NestByValue.h +25 -50
  96. data/vendor/eigen/Eigen/src/Core/NoAlias.h +4 -3
  97. data/vendor/eigen/Eigen/src/Core/NumTraits.h +107 -20
  98. data/vendor/eigen/Eigen/src/Core/PartialReduxEvaluator.h +232 -0
  99. data/vendor/eigen/Eigen/src/Core/PermutationMatrix.h +3 -31
  100. data/vendor/eigen/Eigen/src/Core/PlainObjectBase.h +152 -59
  101. data/vendor/eigen/Eigen/src/Core/Product.h +30 -25
  102. data/vendor/eigen/Eigen/src/Core/ProductEvaluators.h +192 -125
  103. data/vendor/eigen/Eigen/src/Core/Random.h +37 -1
  104. data/vendor/eigen/Eigen/src/Core/Redux.h +180 -170
  105. data/vendor/eigen/Eigen/src/Core/Ref.h +121 -23
  106. data/vendor/eigen/Eigen/src/Core/Replicate.h +8 -8
  107. data/vendor/eigen/Eigen/src/Core/Reshaped.h +454 -0
  108. data/vendor/eigen/Eigen/src/Core/ReturnByValue.h +7 -5
  109. data/vendor/eigen/Eigen/src/Core/Reverse.h +18 -12
  110. data/vendor/eigen/Eigen/src/Core/Select.h +8 -6
  111. data/vendor/eigen/Eigen/src/Core/SelfAdjointView.h +33 -20
  112. data/vendor/eigen/Eigen/src/Core/Solve.h +14 -14
  113. data/vendor/eigen/Eigen/src/Core/SolveTriangular.h +16 -16
  114. data/vendor/eigen/Eigen/src/Core/SolverBase.h +41 -3
  115. data/vendor/eigen/Eigen/src/Core/StableNorm.h +100 -70
  116. data/vendor/eigen/Eigen/src/Core/StlIterators.h +463 -0
  117. data/vendor/eigen/Eigen/src/Core/Stride.h +9 -4
  118. data/vendor/eigen/Eigen/src/Core/Swap.h +5 -4
  119. data/vendor/eigen/Eigen/src/Core/Transpose.h +88 -27
  120. data/vendor/eigen/Eigen/src/Core/Transpositions.h +26 -47
  121. data/vendor/eigen/Eigen/src/Core/TriangularMatrix.h +93 -75
  122. data/vendor/eigen/Eigen/src/Core/VectorBlock.h +5 -5
  123. data/vendor/eigen/Eigen/src/Core/VectorwiseOp.h +159 -70
  124. data/vendor/eigen/Eigen/src/Core/Visitor.h +137 -29
  125. data/vendor/eigen/Eigen/src/Core/arch/AVX/Complex.h +50 -129
  126. data/vendor/eigen/Eigen/src/Core/arch/AVX/MathFunctions.h +126 -337
  127. data/vendor/eigen/Eigen/src/Core/arch/AVX/PacketMath.h +1092 -155
  128. data/vendor/eigen/Eigen/src/Core/arch/AVX/TypeCasting.h +65 -1
  129. data/vendor/eigen/Eigen/src/Core/arch/AVX512/Complex.h +422 -0
  130. data/vendor/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h +207 -236
  131. data/vendor/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h +1482 -495
  132. data/vendor/eigen/Eigen/src/Core/arch/AVX512/TypeCasting.h +89 -0
  133. data/vendor/eigen/Eigen/src/Core/arch/AltiVec/Complex.h +152 -165
  134. data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MathFunctions.h +19 -251
  135. data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MatrixProduct.h +2937 -0
  136. data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h +221 -0
  137. data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h +629 -0
  138. data/vendor/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h +2042 -392
  139. data/vendor/eigen/Eigen/src/Core/arch/CUDA/Complex.h +235 -80
  140. data/vendor/eigen/Eigen/src/Core/arch/Default/BFloat16.h +700 -0
  141. data/vendor/eigen/Eigen/src/Core/arch/Default/ConjHelper.h +102 -14
  142. data/vendor/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +1649 -0
  143. data/vendor/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +110 -0
  144. data/vendor/eigen/Eigen/src/Core/arch/Default/Half.h +942 -0
  145. data/vendor/eigen/Eigen/src/Core/arch/Default/Settings.h +1 -1
  146. data/vendor/eigen/Eigen/src/Core/arch/Default/TypeCasting.h +120 -0
  147. data/vendor/eigen/Eigen/src/Core/arch/{CUDA → GPU}/MathFunctions.h +16 -4
  148. data/vendor/eigen/Eigen/src/Core/arch/GPU/PacketMath.h +1685 -0
  149. data/vendor/eigen/Eigen/src/Core/arch/GPU/TypeCasting.h +80 -0
  150. data/vendor/eigen/Eigen/src/Core/arch/HIP/hcc/math_constants.h +23 -0
  151. data/vendor/eigen/Eigen/src/Core/arch/MSA/Complex.h +648 -0
  152. data/vendor/eigen/Eigen/src/Core/arch/MSA/MathFunctions.h +387 -0
  153. data/vendor/eigen/Eigen/src/Core/arch/MSA/PacketMath.h +1233 -0
  154. data/vendor/eigen/Eigen/src/Core/arch/NEON/Complex.h +313 -219
  155. data/vendor/eigen/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h +183 -0
  156. data/vendor/eigen/Eigen/src/Core/arch/NEON/MathFunctions.h +54 -70
  157. data/vendor/eigen/Eigen/src/Core/arch/NEON/PacketMath.h +4376 -549
  158. data/vendor/eigen/Eigen/src/Core/arch/NEON/TypeCasting.h +1419 -0
  159. data/vendor/eigen/Eigen/src/Core/arch/SSE/Complex.h +59 -179
  160. data/vendor/eigen/Eigen/src/Core/arch/SSE/MathFunctions.h +65 -428
  161. data/vendor/eigen/Eigen/src/Core/arch/SSE/PacketMath.h +893 -283
  162. data/vendor/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h +65 -0
  163. data/vendor/eigen/Eigen/src/Core/arch/SVE/MathFunctions.h +44 -0
  164. data/vendor/eigen/Eigen/src/Core/arch/SVE/PacketMath.h +752 -0
  165. data/vendor/eigen/Eigen/src/Core/arch/SVE/TypeCasting.h +49 -0
  166. data/vendor/eigen/Eigen/src/Core/arch/SYCL/InteropHeaders.h +232 -0
  167. data/vendor/eigen/Eigen/src/Core/arch/SYCL/MathFunctions.h +301 -0
  168. data/vendor/eigen/Eigen/src/Core/arch/SYCL/PacketMath.h +670 -0
  169. data/vendor/eigen/Eigen/src/Core/arch/SYCL/SyclMemoryModel.h +694 -0
  170. data/vendor/eigen/Eigen/src/Core/arch/SYCL/TypeCasting.h +85 -0
  171. data/vendor/eigen/Eigen/src/Core/arch/ZVector/Complex.h +212 -183
  172. data/vendor/eigen/Eigen/src/Core/arch/ZVector/MathFunctions.h +101 -5
  173. data/vendor/eigen/Eigen/src/Core/arch/ZVector/PacketMath.h +510 -395
  174. data/vendor/eigen/Eigen/src/Core/functors/AssignmentFunctors.h +11 -2
  175. data/vendor/eigen/Eigen/src/Core/functors/BinaryFunctors.h +112 -46
  176. data/vendor/eigen/Eigen/src/Core/functors/NullaryFunctors.h +31 -30
  177. data/vendor/eigen/Eigen/src/Core/functors/StlFunctors.h +32 -2
  178. data/vendor/eigen/Eigen/src/Core/functors/UnaryFunctors.h +355 -16
  179. data/vendor/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h +1075 -586
  180. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h +49 -24
  181. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +41 -35
  182. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +6 -6
  183. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h +4 -2
  184. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixVector.h +382 -483
  185. data/vendor/eigen/Eigen/src/Core/products/Parallelizer.h +22 -5
  186. data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +53 -30
  187. data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h +16 -8
  188. data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h +8 -6
  189. data/vendor/eigen/Eigen/src/Core/products/SelfadjointProduct.h +4 -4
  190. data/vendor/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h +5 -4
  191. data/vendor/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h +33 -27
  192. data/vendor/eigen/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h +14 -12
  193. data/vendor/eigen/Eigen/src/Core/products/TriangularSolverMatrix.h +36 -34
  194. data/vendor/eigen/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h +8 -4
  195. data/vendor/eigen/Eigen/src/Core/products/TriangularSolverVector.h +13 -10
  196. data/vendor/eigen/Eigen/src/Core/util/BlasUtil.h +304 -119
  197. data/vendor/eigen/Eigen/src/Core/util/ConfigureVectorization.h +512 -0
  198. data/vendor/eigen/Eigen/src/Core/util/Constants.h +25 -9
  199. data/vendor/eigen/Eigen/src/Core/util/DisableStupidWarnings.h +26 -3
  200. data/vendor/eigen/Eigen/src/Core/util/ForwardDeclarations.h +29 -9
  201. data/vendor/eigen/Eigen/src/Core/util/IndexedViewHelper.h +186 -0
  202. data/vendor/eigen/Eigen/src/Core/util/IntegralConstant.h +272 -0
  203. data/vendor/eigen/Eigen/src/Core/util/MKL_support.h +8 -1
  204. data/vendor/eigen/Eigen/src/Core/util/Macros.h +709 -246
  205. data/vendor/eigen/Eigen/src/Core/util/Memory.h +222 -52
  206. data/vendor/eigen/Eigen/src/Core/util/Meta.h +355 -77
  207. data/vendor/eigen/Eigen/src/Core/util/ReenableStupidWarnings.h +5 -1
  208. data/vendor/eigen/Eigen/src/Core/util/ReshapedHelper.h +51 -0
  209. data/vendor/eigen/Eigen/src/Core/util/StaticAssert.h +8 -5
  210. data/vendor/eigen/Eigen/src/Core/util/SymbolicIndex.h +293 -0
  211. data/vendor/eigen/Eigen/src/Core/util/XprHelper.h +65 -30
  212. data/vendor/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +1 -1
  213. data/vendor/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +7 -4
  214. data/vendor/eigen/Eigen/src/Eigenvalues/EigenSolver.h +2 -2
  215. data/vendor/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +1 -1
  216. data/vendor/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +2 -2
  217. data/vendor/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +2 -2
  218. data/vendor/eigen/Eigen/src/Eigenvalues/RealQZ.h +9 -6
  219. data/vendor/eigen/Eigen/src/Eigenvalues/RealSchur.h +21 -9
  220. data/vendor/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +77 -43
  221. data/vendor/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +20 -15
  222. data/vendor/eigen/Eigen/src/Geometry/AlignedBox.h +99 -5
  223. data/vendor/eigen/Eigen/src/Geometry/AngleAxis.h +4 -4
  224. data/vendor/eigen/Eigen/src/Geometry/EulerAngles.h +3 -3
  225. data/vendor/eigen/Eigen/src/Geometry/Homogeneous.h +15 -11
  226. data/vendor/eigen/Eigen/src/Geometry/Hyperplane.h +1 -1
  227. data/vendor/eigen/Eigen/src/Geometry/OrthoMethods.h +3 -2
  228. data/vendor/eigen/Eigen/src/Geometry/ParametrizedLine.h +39 -2
  229. data/vendor/eigen/Eigen/src/Geometry/Quaternion.h +70 -14
  230. data/vendor/eigen/Eigen/src/Geometry/Rotation2D.h +3 -3
  231. data/vendor/eigen/Eigen/src/Geometry/Scaling.h +23 -5
  232. data/vendor/eigen/Eigen/src/Geometry/Transform.h +88 -67
  233. data/vendor/eigen/Eigen/src/Geometry/Translation.h +6 -12
  234. data/vendor/eigen/Eigen/src/Geometry/Umeyama.h +1 -1
  235. data/vendor/eigen/Eigen/src/Geometry/arch/Geometry_SIMD.h +168 -0
  236. data/vendor/eigen/Eigen/src/Householder/BlockHouseholder.h +9 -2
  237. data/vendor/eigen/Eigen/src/Householder/Householder.h +8 -4
  238. data/vendor/eigen/Eigen/src/Householder/HouseholderSequence.h +123 -48
  239. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +15 -15
  240. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +7 -23
  241. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +5 -22
  242. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +41 -47
  243. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +51 -60
  244. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +70 -20
  245. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +2 -20
  246. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +11 -9
  247. data/vendor/eigen/Eigen/src/Jacobi/Jacobi.h +31 -10
  248. data/vendor/eigen/Eigen/src/KLUSupport/KLUSupport.h +358 -0
  249. data/vendor/eigen/Eigen/src/LU/Determinant.h +35 -19
  250. data/vendor/eigen/Eigen/src/LU/FullPivLU.h +29 -43
  251. data/vendor/eigen/Eigen/src/LU/InverseImpl.h +25 -8
  252. data/vendor/eigen/Eigen/src/LU/PartialPivLU.h +71 -58
  253. data/vendor/eigen/Eigen/src/LU/arch/InverseSize4.h +351 -0
  254. data/vendor/eigen/Eigen/src/OrderingMethods/Amd.h +7 -17
  255. data/vendor/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +297 -277
  256. data/vendor/eigen/Eigen/src/OrderingMethods/Ordering.h +6 -10
  257. data/vendor/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +1 -1
  258. data/vendor/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +9 -7
  259. data/vendor/eigen/Eigen/src/QR/ColPivHouseholderQR.h +41 -20
  260. data/vendor/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +100 -27
  261. data/vendor/eigen/Eigen/src/QR/FullPivHouseholderQR.h +59 -22
  262. data/vendor/eigen/Eigen/src/QR/HouseholderQR.h +48 -23
  263. data/vendor/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +25 -3
  264. data/vendor/eigen/Eigen/src/SVD/BDCSVD.h +183 -63
  265. data/vendor/eigen/Eigen/src/SVD/JacobiSVD.h +22 -14
  266. data/vendor/eigen/Eigen/src/SVD/SVDBase.h +83 -22
  267. data/vendor/eigen/Eigen/src/SVD/UpperBidiagonalization.h +3 -3
  268. data/vendor/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +17 -9
  269. data/vendor/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +12 -37
  270. data/vendor/eigen/Eigen/src/SparseCore/AmbiVector.h +3 -2
  271. data/vendor/eigen/Eigen/src/SparseCore/CompressedStorage.h +16 -0
  272. data/vendor/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +6 -6
  273. data/vendor/eigen/Eigen/src/SparseCore/SparseAssign.h +81 -27
  274. data/vendor/eigen/Eigen/src/SparseCore/SparseBlock.h +25 -57
  275. data/vendor/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +40 -11
  276. data/vendor/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +11 -15
  277. data/vendor/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +4 -2
  278. data/vendor/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +30 -8
  279. data/vendor/eigen/Eigen/src/SparseCore/SparseMatrix.h +126 -11
  280. data/vendor/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +5 -12
  281. data/vendor/eigen/Eigen/src/SparseCore/SparseProduct.h +13 -1
  282. data/vendor/eigen/Eigen/src/SparseCore/SparseRef.h +7 -7
  283. data/vendor/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +5 -2
  284. data/vendor/eigen/Eigen/src/SparseCore/SparseUtil.h +8 -0
  285. data/vendor/eigen/Eigen/src/SparseCore/SparseVector.h +1 -1
  286. data/vendor/eigen/Eigen/src/SparseCore/SparseView.h +1 -0
  287. data/vendor/eigen/Eigen/src/SparseLU/SparseLU.h +162 -12
  288. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +1 -1
  289. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +76 -2
  290. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +2 -2
  291. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +1 -1
  292. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +1 -1
  293. data/vendor/eigen/Eigen/src/SparseQR/SparseQR.h +19 -6
  294. data/vendor/eigen/Eigen/src/StlSupport/StdDeque.h +2 -12
  295. data/vendor/eigen/Eigen/src/StlSupport/StdList.h +2 -2
  296. data/vendor/eigen/Eigen/src/StlSupport/StdVector.h +2 -2
  297. data/vendor/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +6 -8
  298. data/vendor/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +175 -39
  299. data/vendor/eigen/Eigen/src/misc/lapacke.h +5 -4
  300. data/vendor/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +28 -2
  301. data/vendor/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +155 -11
  302. data/vendor/eigen/Eigen/src/plugins/BlockMethods.h +626 -242
  303. data/vendor/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.h +14 -0
  304. data/vendor/eigen/Eigen/src/plugins/IndexedViewMethods.h +262 -0
  305. data/vendor/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +4 -4
  306. data/vendor/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +10 -0
  307. data/vendor/eigen/Eigen/src/plugins/ReshapedMethods.h +149 -0
  308. data/vendor/eigen/README.md +2 -0
  309. data/vendor/eigen/bench/btl/README +1 -1
  310. data/vendor/eigen/bench/tensors/README +6 -7
  311. data/vendor/eigen/ci/README.md +56 -0
  312. data/vendor/eigen/demos/mix_eigen_and_c/README +1 -1
  313. data/vendor/eigen/unsupported/Eigen/CXX11/src/Tensor/README.md +213 -158
  314. data/vendor/eigen/unsupported/README.txt +1 -1
  315. data/vendor/tomotopy/README.kr.rst +21 -0
  316. data/vendor/tomotopy/README.rst +20 -0
  317. data/vendor/tomotopy/src/Labeling/FoRelevance.cpp +2 -2
  318. data/vendor/tomotopy/src/Labeling/Phraser.hpp +1 -1
  319. data/vendor/tomotopy/src/TopicModel/CTModel.hpp +2 -1
  320. data/vendor/tomotopy/src/TopicModel/DMRModel.hpp +2 -1
  321. data/vendor/tomotopy/src/TopicModel/DTModel.hpp +1 -1
  322. data/vendor/tomotopy/src/TopicModel/GDMRModel.hpp +2 -2
  323. data/vendor/tomotopy/src/TopicModel/HDP.h +1 -0
  324. data/vendor/tomotopy/src/TopicModel/HDPModel.hpp +53 -2
  325. data/vendor/tomotopy/src/TopicModel/HLDAModel.hpp +1 -1
  326. data/vendor/tomotopy/src/TopicModel/HPAModel.hpp +1 -0
  327. data/vendor/tomotopy/src/TopicModel/LDACVB0Model.hpp +2 -2
  328. data/vendor/tomotopy/src/TopicModel/LDAModel.hpp +16 -5
  329. data/vendor/tomotopy/src/TopicModel/LLDAModel.hpp +1 -0
  330. data/vendor/tomotopy/src/TopicModel/MGLDAModel.hpp +1 -0
  331. data/vendor/tomotopy/src/TopicModel/PLDAModel.hpp +1 -0
  332. data/vendor/tomotopy/src/TopicModel/PT.h +3 -1
  333. data/vendor/tomotopy/src/TopicModel/PTModel.hpp +31 -1
  334. data/vendor/tomotopy/src/TopicModel/SLDAModel.hpp +2 -2
  335. data/vendor/tomotopy/src/TopicModel/TopicModel.hpp +7 -5
  336. data/vendor/tomotopy/src/Utils/EigenAddonOps.hpp +36 -1
  337. data/vendor/tomotopy/src/Utils/exception.h +6 -0
  338. data/vendor/tomotopy/src/Utils/sample.hpp +14 -12
  339. data/vendor/tomotopy/src/Utils/sse_gamma.h +0 -3
  340. metadata +60 -14
  341. data/vendor/eigen/Eigen/CMakeLists.txt +0 -19
  342. data/vendor/eigen/Eigen/src/Core/arch/CUDA/Half.h +0 -674
  343. data/vendor/eigen/Eigen/src/Core/arch/CUDA/PacketMath.h +0 -333
  344. data/vendor/eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +0 -1124
  345. data/vendor/eigen/Eigen/src/Core/arch/CUDA/TypeCasting.h +0 -212
  346. data/vendor/eigen/Eigen/src/Geometry/arch/Geometry_SSE.h +0 -161
  347. data/vendor/eigen/Eigen/src/LU/arch/Inverse_SSE.h +0 -338
@@ -0,0 +1,133 @@
1
+ /**
2
+ * @file PacketFilter.h
3
+ * @author bab2min (bab2min@gmail.com)
4
+ * @brief
5
+ * @version 0.4.0
6
+ * @date 2021-09-17
7
+ *
8
+ * @copyright Copyright (c) 2020-2021
9
+ *
10
+ */
11
+
12
+ #ifndef EIGENRAND_PACKET_FILTER_SSE_H
13
+ #define EIGENRAND_PACKET_FILTER_SSE_H
14
+
15
+ #include <xmmintrin.h>
16
+
17
+ namespace Eigen
18
+ {
19
+ namespace Rand
20
+ {
21
+ namespace detail
22
+ {
23
+ template<>
24
+ class CompressMask<16>
25
+ {
26
+ std::array<std::array<uint8_t, 16>, 7> idx;
27
+ std::array<internal::Packet4f, 4> selector;
28
+ std::array<uint8_t, 16> cnt;
29
+
30
+ static uint8_t make_compress(int mask, int offset = 0)
31
+ {
32
+ uint8_t ret = 0;
33
+ int n = offset;
34
+ for (int i = 0; i < 4; ++i)
35
+ {
36
+ int l = mask & 1;
37
+ mask >>= 1;
38
+ if (l)
39
+ {
40
+ if (n >= 0) ret |= (i & 3) << (2 * n);
41
+ if (++n >= 4) break;
42
+ }
43
+ }
44
+ return ret;
45
+ }
46
+
47
+ static uint8_t count(int mask)
48
+ {
49
+ uint8_t ret = 0;
50
+ for (int i = 0; i < 4; ++i)
51
+ {
52
+ ret += mask & 1;
53
+ mask >>= 1;
54
+ }
55
+ return ret;
56
+ }
57
+
58
+ CompressMask()
59
+ {
60
+ for (int i = 0; i < 16; ++i)
61
+ {
62
+ for (int o = 0; o < 7; ++o)
63
+ {
64
+ idx[o][i] = make_compress(i, o < 4 ? o : o - 7);
65
+ }
66
+
67
+ cnt[i] = count(i);
68
+ }
69
+
70
+ selector[0] = _mm_castsi128_ps(_mm_setr_epi32(0, 0, 0, 0));
71
+ selector[1] = _mm_castsi128_ps(_mm_setr_epi32(-1, 0, 0, 0));
72
+ selector[2] = _mm_castsi128_ps(_mm_setr_epi32(-1, -1, 0, 0));
73
+ selector[3] = _mm_castsi128_ps(_mm_setr_epi32(-1, -1, -1, 0));
74
+ }
75
+
76
+ static EIGEN_STRONG_INLINE internal::Packet4f permute(const internal::Packet4f& p, uint8_t i)
77
+ {
78
+ float u[4];
79
+ _mm_storeu_ps(u, p);
80
+ return _mm_setr_ps(u[i & 3], u[(i >> 2) & 3], u[(i >> 4) & 3], u[(i >> 6) & 3]);
81
+ }
82
+
83
+ public:
84
+
85
+ enum { full_size = 4 };
86
+
87
+ static const CompressMask& get_inst()
88
+ {
89
+ static CompressMask cm;
90
+ return cm;
91
+ }
92
+
93
+ template<typename Packet>
94
+ EIGEN_STRONG_INLINE int compress_append(Packet& _value, const Packet& _mask,
95
+ Packet& _rest, int rest_cnt, bool& full) const
96
+ {
97
+ auto& value = reinterpret_cast<internal::Packet4f&>(_value);
98
+ auto& mask = reinterpret_cast<const internal::Packet4f&>(_mask);
99
+ auto& rest = reinterpret_cast<internal::Packet4f&>(_rest);
100
+
101
+ int m = _mm_movemask_ps(mask);
102
+ if (cnt[m] == full_size)
103
+ {
104
+ full = true;
105
+ return rest_cnt;
106
+ }
107
+
108
+ auto p1 = permute(value, idx[rest_cnt][m]);
109
+ p1 = internal::pblendv(selector[rest_cnt], rest, p1);
110
+
111
+ auto new_cnt = rest_cnt + cnt[m];
112
+ if (new_cnt >= full_size)
113
+ {
114
+ if (new_cnt > full_size)
115
+ {
116
+ rest = permute(value, idx[new_cnt - cnt[m] + full_size - 1][m]);
117
+ }
118
+ value = p1;
119
+ full = true;
120
+ return new_cnt - full_size;
121
+ }
122
+ else
123
+ {
124
+ rest = p1;
125
+ full = false;
126
+ return new_cnt;
127
+ }
128
+ }
129
+ };
130
+ }
131
+ }
132
+ }
133
+ #endif
@@ -0,0 +1,120 @@
1
+ /**
2
+ * @file RandUtils.h
3
+ * @author bab2min (bab2min@gmail.com)
4
+ * @brief
5
+ * @version 0.4.0
6
+ * @date 2021-09-17
7
+ *
8
+ * @copyright Copyright (c) 2020-2021
9
+ *
10
+ */
11
+
12
+ #ifndef EIGENRAND_RAND_UTILS_SSE_H
13
+ #define EIGENRAND_RAND_UTILS_SSE_H
14
+
15
+ #include <xmmintrin.h>
16
+
17
+ namespace Eigen
18
+ {
19
+ namespace internal
20
+ {
21
+ template<typename Rng, typename RngResult>
22
+ struct RawbitsMaker<Packet4i, Rng, RngResult, Rand::RandomEngineType::scalar>
23
+ {
24
+ EIGEN_STRONG_INLINE Packet4i rawbits(Rng& rng)
25
+ {
26
+ if (sizeof(RngResult) == 8)
27
+ {
28
+ return _mm_set_epi64x(rng(), rng());
29
+ }
30
+ else
31
+ {
32
+ return _mm_set_epi32(rng(), rng(), rng(), rng());
33
+ }
34
+ }
35
+
36
+ EIGEN_STRONG_INLINE Packet4i rawbits_34(Rng& rng)
37
+ {
38
+ if (sizeof(RngResult) == 8)
39
+ {
40
+ return _mm_set_epi64x(rng(), rng());
41
+ }
42
+ else
43
+ {
44
+ #ifdef EIGEN_VECTORIZE_SSSE3
45
+ Packet4i p = _mm_setr_epi32(rng(), rng(), rng(), 0);
46
+ return _mm_shuffle_epi8(p, _mm_setr_epi8(
47
+ 0, 1, 2, 3,
48
+ 4, 5, 6, 7,
49
+ 8, 9, 10, 11,
50
+ 3, 7, 11, 11));
51
+ #else
52
+ return _mm_set_epi32(rng(), rng(), rng(), rng());
53
+ #endif
54
+ }
55
+ }
56
+
57
+ EIGEN_STRONG_INLINE Packet4i rawbits_half(Rng& rng)
58
+ {
59
+ if (sizeof(decltype(rng())) == 8)
60
+ {
61
+ return _mm_set_epi64x(0, rng());
62
+ }
63
+ else
64
+ {
65
+ return _mm_setr_epi32(rng(), rng(), 0, 0);
66
+ }
67
+ }
68
+ };
69
+
70
+ template<typename Rng>
71
+ struct RawbitsMaker<Packet4i, Rng, Packet4i, Rand::RandomEngineType::packet>
72
+ {
73
+ EIGEN_STRONG_INLINE Packet4i rawbits(Rng& rng)
74
+ {
75
+ return rng();
76
+ }
77
+
78
+ EIGEN_STRONG_INLINE Packet4i rawbits_34(Rng& rng)
79
+ {
80
+ return rng();
81
+ }
82
+
83
+ EIGEN_STRONG_INLINE Packet4i rawbits_half(Rng& rng)
84
+ {
85
+ return rng();
86
+ }
87
+ };
88
+
89
+ template<typename Rng>
90
+ struct UniformRealUtils<Packet4f, Rng> : public RawbitsMaker<Packet4i, Rng>
91
+ {
92
+ EIGEN_STRONG_INLINE Packet4f zero_to_one(Rng& rng)
93
+ {
94
+ return pdiv((Packet4f)_mm_cvtepi32_ps(pand(this->rawbits(rng), pset1<Packet4i>(0x7FFFFFFF))),
95
+ pset1<Packet4f>(0x7FFFFFFF));
96
+ }
97
+
98
+ EIGEN_STRONG_INLINE Packet4f uniform_real(Rng& rng)
99
+ {
100
+ return bit_to_ur_float(this->rawbits_34(rng));
101
+ }
102
+ };
103
+
104
+ template<typename Rng>
105
+ struct UniformRealUtils<Packet2d, Rng> : public RawbitsMaker<Packet4i, Rng>
106
+ {
107
+ EIGEN_STRONG_INLINE Packet2d zero_to_one(Rng& rng)
108
+ {
109
+ return pdiv((Packet2d)_mm_cvtepi32_pd(pand(this->rawbits_half(rng), pset1<Packet4i>(0x7FFFFFFF))),
110
+ pset1<Packet2d>(0x7FFFFFFF));
111
+ }
112
+
113
+ EIGEN_STRONG_INLINE Packet2d uniform_real(Rng& rng)
114
+ {
115
+ return bit_to_ur_double(this->rawbits(rng));
116
+ }
117
+ };
118
+ }
119
+ }
120
+ #endif
@@ -12,7 +12,7 @@
12
12
  You can get 5~10 times speed by just replacing old Eigen's Random
13
13
  or unvectorizable c++11 random number generators with EigenRand.
14
14
 
15
- EigenRand currently supports only x86-64 architecture (SSE, AVX, AVX2).
15
+ EigenRand currently supports only x86-64 architecture (SSE, AVX, AVX2) and ARM64 NEON (experimental).
16
16
 
17
17
  EigenRand is distributed under the MIT License.
18
18
 
@@ -29,7 +29,7 @@
29
29
 
30
30
  You can install EigenRand by just downloading the source codes from [the repository](https://github.com/bab2min/EigenRand/releases).
31
31
  Since EigenRand is a header-only library like Eigen, none of binaries needs to be installed.
32
- All you need is [Eigen 3.3.7](http://eigen.tuxfamily.org/index.php?title=Main_Page) and C++11 compiler.
32
+ All you need is [Eigen 3.3.4 ~ 3.4.0](http://eigen.tuxfamily.org/index.php?title=Main_Page) and C++11 compiler.
33
33
 
34
34
  @section getting_started_2 Simple Random Matrix Generators
35
35
  @code
@@ -43,7 +43,7 @@
43
43
  {
44
44
  // Initialize random number generator with seed=42 for following codes.
45
45
  // Or you can use C++11 RNG such as std::mt19937 or std::ranlux48.
46
- Rand::Vmt19937_64 urng{ 42 };
46
+ Rand::P8_mt19937_64 urng{ 42 };
47
47
 
48
48
  // this will generate 4x4 real matrix with range [-1, 1]
49
49
  MatrixXf mat = Rand::balanced<MatrixXf>(4, 4, urng);
@@ -69,7 +69,7 @@
69
69
 
70
70
  int main()
71
71
  {
72
- Rand::Vmt19937_64 urng{ 42 };
72
+ Rand::P8_mt19937_64 urng{ 42 };
73
73
 
74
74
  MatrixXf mat{ 10, 10 };
75
75
  // this will generate a random matrix in MatrixXf type with the shape (10, 10)
@@ -98,7 +98,7 @@
98
98
 
99
99
  int main()
100
100
  {
101
- Rand::Vmt19937_64 urng{ 42 };
101
+ Rand::P8_mt19937_64 urng{ 42 };
102
102
  // constructs generator for normal distribution with mean=1.0, stdev=2.0
103
103
  Rand::NormalGen<float> norm_gen{ 1.0, 2.0 };
104
104
 
@@ -126,7 +126,7 @@
126
126
 
127
127
  int main()
128
128
  {
129
- Rand::Vmt19937_64 urng{ 42 };
129
+ Rand::P8_mt19937_64 urng{ 42 };
130
130
 
131
131
  Vector4f mean{ 0, 1, 2, 3 };
132
132
  Matrix4f cov;
@@ -182,7 +182,7 @@
182
182
  | `Eigen::Rand::lognormal` | `Eigen::Rand::LognormalGen` | float, double | generates real values on a [lognormal distribution](https://en.wikipedia.org/wiki/Lognormal_distribution). | `std::lognormal_distribution` |
183
183
  | `Eigen::Rand::normal` | `Eigen::Rand::StdNormalGen`, `Eigen::Rand::NormalGen` | float, double | generates real values on a [normal distribution](https://en.wikipedia.org/wiki/Normal_distribution). | `std::normal_distribution` |
184
184
  | `Eigen::Rand::studentT` | `Eigen::Rand::StudentTGen` | float, double | generates real values on the [Student's t distribution](https://en.wikipedia.org/wiki/Student%27s_t-distribution). | `std::student_t_distribution` |
185
- | `Eigen::Rand::uniformReal` | `Eigen::Rand::UniformRealGen` | float, double | generates real values in the `[-1, 0)` range. | `std::generate_canonical` |
185
+ | `Eigen::Rand::uniformReal` | `Eigen::Rand::StdUniformRealGen`, `Eigen::Rand::UniformRealGen` | float, double | generates real values in the `[-1, 0)` range. | `std::generate_canonical` |
186
186
  | `Eigen::Rand::weibull` | `Eigen::Rand::WeibullGen` | float, double | generates real values on the [Weibull distribution](https://en.wikipedia.org/wiki/Weibull_distribution). | `std::weibull_distribution` |
187
187
 
188
188
  @section list_of_supported_distribution_2 Random Distributions for Integer Types
@@ -200,7 +200,7 @@
200
200
  @section list_of_distribution_3 Multivariate Random Distributions
201
201
  | Generator | Description | Equivalent to |
202
202
  |:---:|:---:|:---:|
203
- | `Eigen::Rand::MultinomialGen` | generates real vectors on a [multinomial distribution](https://en.wikipedia.org/wiki/Multinomial_distribution) | [scipy.stats.multinomial in Python](https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.multinomial.html#scipy.stats.multinomial) |
203
+ | `Eigen::Rand::MultinomialGen` | generates integer vectors on a [multinomial distribution](https://en.wikipedia.org/wiki/Multinomial_distribution) | [scipy.stats.multinomial in Python](https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.multinomial.html#scipy.stats.multinomial) |
204
204
  | `Eigen::Rand::DirichletGen` | generates real vectors on a [Dirichlet distribution](https://en.wikipedia.org/wiki/Dirichlet_distribution) | [scipy.stats.dirichlet in Python](https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.dirichlet.html#scipy.stats.dirichlet) |
205
205
  | `Eigen::Rand::MvNormalGen` | generates real vectors on a [multivariate normal distribution](https://en.wikipedia.org/wiki/Multivariate_normal_distribution) | [scipy.stats.multivariate_normal in Python](https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.multivariate_normal.html#scipy.stats.multivariate_normal) |
206
206
  | `Eigen::Rand::WishartGen` | generates real matrices on a [Wishart distribution](https://en.wikipedia.org/wiki/Wishart_distribution) | [scipy.stats.wishart in Python](https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.wishart.html#scipy.stats.wishart) |
@@ -211,11 +211,13 @@
211
211
  | | Description | Equivalent to |
212
212
  |:---:|:---:|:---:|
213
213
  | `Eigen::Rand::Vmt19937_64` | a vectorized version of Mersenne Twister algorithm. It generates two 64bit random integers simultaneously with SSE2 and four integers with AVX2. | `std::mt19937_64` |
214
-
214
+ | `Eigen::Rand::P8_mt19937_64` | a vectorized version of Mersenne Twister algorithm. Since it generates eight 64bit random integers simultaneously, the random values are the same regardless of architecture. | |
215
215
  *
216
216
  * @page performance Performance
217
217
  * The following charts show the relative speed-up of EigenRand compared to Reference(C++ std or Eigen functions). Detailed results are below the charts.
218
218
 
219
+ @section performance_1 Overview of Results at x86-64 Architecture
220
+
219
221
  \image html perf_no_vect.png
220
222
 
221
223
  \image html perf_sse2.png
@@ -230,7 +232,17 @@
230
232
 
231
233
  * The following result is a measure of the time in seconds it takes to generate 1M random numbers. It shows the average of 20 times.
232
234
 
233
- @section performance_1 Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz (Ubuntu 16.04, gcc7.5)
235
+ @section performance_2 Overview of Results at ARM64 NEON (experimental)
236
+
237
+ \image html perf_neon_v0.3.90.png
238
+
239
+ \image html perf_mv_part1_neon_v0.3.90.png
240
+
241
+ \image html perf_mv_part2_neon_v0.3.90.png
242
+
243
+ * The following result is a measure of the time in seconds it takes to generate 1M random numbers. It shows the average of 20 times.
244
+
245
+ @section performance_3 Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz (Ubuntu 16.04, gcc7.5)
234
246
 
235
247
  | | C++ std (or Eigen) | EigenRand (No Vect.) | EigenRand (SSE2) | EigenRand (SSSE3) | EigenRand (AVX) | EigenRand (AVX2) |
236
248
  |---|---:|---:|---:|---:|---:|---:|
@@ -289,7 +301,7 @@
289
301
  | `Wishart(4)` | 71.19 | 5.28 | 2.70 | 2.93 | 2.04 | 1.94 |
290
302
  | `Wishart(50)` | 1185.26 | 1360.49 | 492.91 | 517.44 | 359.03 | 324.60 |
291
303
 
292
- @section performance_2 AMD Ryzen 7 3700x CPU @ 3.60GHz (Windows 10, MSVC2017)
304
+ @section performance_4 AMD Ryzen 7 3700x CPU @ 3.60GHz (Windows 10, MSVC2017)
293
305
 
294
306
  | | C++ std (or Eigen) | EigenRand (SSE2) | EigenRand (AVX) | EigenRand (AVX2) |
295
307
  |---|---:|---:|---:|---:|
@@ -334,4 +346,4 @@
334
346
  | Mersenne Twister(int64) | 5.1 | 3.9 | 3.9 | 3.3 |
335
347
 
336
348
  *
337
- */
349
+ */
@@ -16,13 +16,36 @@ You can get 5~10 times speed by just replacing old Eigen's Random or unvectoriza
16
16
  * 5~10 times faster than non-vectorized functions
17
17
  * Header-only (like Eigen)
18
18
  * Can be easily integrated with Eigen's expressions
19
- * Currently supports only x86 and x86-64 architecture
19
+ * Currently supports only x86, x86-64(up to AVX2), and ARM64 NEON (experimental) architecture.
20
20
 
21
21
  ## Requirement
22
22
 
23
- * Eigen 3.3.7 or later
23
+ * Eigen 3.3.4 ~ 3.4.0
24
24
  * C++11-compatible compilers
25
25
 
26
+ ## Build for Test & Benchmark
27
+ You can build a test binary to verify if EigenRand is working well.
28
+ First, make sure you have Eigen 3.3.4~3.4.0 installed in your compiler include folder. Also make sure you have cmake 3.9 or higher installed.
29
+ After then, you can build it following:
30
+ ```console
31
+ $ git clone https://github.com/bab2min/EigenRand
32
+ $ cd EigenRand
33
+ $ git clone https://github.com/google/googletest
34
+ $ pushd googletest && git checkout v1.8.x && popd
35
+ $ mkdir build && cd build
36
+ $ cmake -DCMAKE_BUILD_TYPE=Release ..
37
+ $ make
38
+ $ ./test/EigenRand-test # Binary for unit test
39
+ $ ./EigenRand-accuracy # Binary for accuracy test of univariate random distributions
40
+ $ ./EigenRand-benchmark # Binary for performance test of univariate random distributions
41
+ $ ./EigenRand-benchmark-mv # Binary for performance test of multivariate random distributions
42
+ ```
43
+
44
+ You can specify additional compiler arguments including target machine options (e.g. -mavx2, -march) like:
45
+ ```console
46
+ $ cmake -DCMAKE_BUILD_TYPE=Release -DEIGENRAND_CXX_FLAGS="-march=native" ..
47
+ ```
48
+
26
49
  ## Documentation
27
50
 
28
51
  https://bab2min.github.io/eigenrand/
@@ -63,7 +86,7 @@ https://bab2min.github.io/eigenrand/
63
86
 
64
87
  | Generator | Description | Equivalent to |
65
88
  |:---:|:---:|:---:|
66
- | `Eigen::Rand::MultinomialGen` | generates real vectors on a [multinomial distribution](https://en.wikipedia.org/wiki/Multinomial_distribution) | [scipy.stats.multinomial in Python](https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.multinomial.html#scipy.stats.multinomial) |
89
+ | `Eigen::Rand::MultinomialGen` | generates integer vectors on a [multinomial distribution](https://en.wikipedia.org/wiki/Multinomial_distribution) | [scipy.stats.multinomial in Python](https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.multinomial.html#scipy.stats.multinomial) |
67
90
  | `Eigen::Rand::DirichletGen` | generates real vectors on a [Dirichlet distribution](https://en.wikipedia.org/wiki/Dirichlet_distribution) | [scipy.stats.dirichlet in Python](https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.dirichlet.html#scipy.stats.dirichlet) |
68
91
  | `Eigen::Rand::MvNormalGen` | generates real vectors on a [multivariate normal distribution](https://en.wikipedia.org/wiki/Multivariate_normal_distribution) | [scipy.stats.multivariate_normal in Python](https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.multivariate_normal.html#scipy.stats.multivariate_normal) |
69
92
  | `Eigen::Rand::WishartGen` | generates real matrices on a [Wishart distribution](https://en.wikipedia.org/wiki/Wishart_distribution) | [scipy.stats.wishart in Python](https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.wishart.html#scipy.stats.wishart) |
@@ -74,7 +97,8 @@ https://bab2min.github.io/eigenrand/
74
97
 
75
98
  | | Description | Equivalent to |
76
99
  |:---:|:---:|:---:|
77
- | `Eigen::Rand::Vmt19937_64` | a vectorized version of Mersenne Twister algorithm. It generates two 64bit random integers simultaneously with SSE2 and four integers with AVX2. | `std::mt19937_64` |
100
+ | `Eigen::Rand::Vmt19937_64` | a vectorized version of Mersenne Twister algorithm. It generates two 64bit random integers simultaneously with SSE2 & NEON and four integers with AVX2. | `std::mt19937_64` |
101
+ | `Eigen::Rand::P8_mt19937_64` | a vectorized version of Mersenne Twister algorithm. Since it generates eight 64bit random integers simultaneously, the random values are the same regardless of architecture. | |
78
102
 
79
103
  ## Performance
80
104
  The following charts show the relative speed-up of EigenRand compared to references(equivalent functions of C++ std or Eigen).
@@ -318,6 +342,19 @@ It shows the average of 20 times.
318
342
  | Mersenne Twister(int32) | 5.0 | 3.4 | 3.4 | 3.3 |
319
343
  | Mersenne Twister(int64) | 5.1 | 3.9 | 3.9 | 3.3 |
320
344
 
345
+ ### ARM64 NEON (Cortex-A73)
346
+ Currently, Support for ARM64 NEON is experimental and the result may be sub-optimal.
347
+ Also keep in mind that NEON does not support vectorization of double type.
348
+ So if you use double type generators, they would fallback into scalar computations.
349
+
350
+ ![Perf_no_vect](/doxygen/images/perf_neon_v0.3.90.png)
351
+
352
+ The following charts are about multivariate distributions.
353
+ ![Perf_no_vect](/doxygen/images/perf_mv_part1_neon_v0.3.90.png)
354
+ ![Perf_no_vect](/doxygen/images/perf_mv_part2_neon_v0.3.90.png)
355
+
356
+ Cases filled with orange are generators that are slower than reference functions.
357
+
321
358
  ## Accuracy
322
359
  Since vectorized mathematical functions may have a loss of precision, I measured how well the generated random number fits its actual distribution.
323
360
  32768 samples were generated and Earth Mover's Distance between samples and its actual distribution was calculated for each distribution.
@@ -348,6 +385,22 @@ MIT License
348
385
 
349
386
  ## History
350
387
 
388
+ ### 0.4.0 alpha (2021-09-28)
389
+ * Now EigenRand supports ARM & ARM64 NEON architecture experimentally. Please report issues about ARM & ARM64 NEON.
390
+ * Now EigenRand has compatibility to `Eigen 3.4.0`.
391
+
392
+ ### 0.3.5 (2021-07-16)
393
+ * Now `UniformRealGen` generates accurate double values.
394
+ * Fixed a bug where non-vectorized double-type `NormalGen` would get stuck in an infinite loop.
395
+ * New overloading functions `balanced` and `balancedLike` which generate values over `[a, b]` were added.
396
+
397
+ ### 0.3.4 (2021-04-25)
398
+ * Now Eigen 3.3.4 - 3.3.6 versions are additionally supported.
399
+
400
+ ### 0.3.3 (2021-03-30)
401
+ * A compilation failure with some RNGs in `double` type was fixed.
402
+ * An internal function name `plgamma` conflict with one of `SpecialFunctionsPacketMath.h` was fixed.
403
+
351
404
  ### 0.3.2 (2021-03-26)
352
405
  * A default constructor for `DiscreteGen` was added.
353
406
 
@@ -0,0 +1,203 @@
1
+ /*
2
+ Apache License
3
+ Version 2.0, January 2004
4
+ http://www.apache.org/licenses/
5
+
6
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
7
+
8
+ 1. Definitions.
9
+
10
+ "License" shall mean the terms and conditions for use, reproduction,
11
+ and distribution as defined by Sections 1 through 9 of this document.
12
+
13
+ "Licensor" shall mean the copyright owner or entity authorized by
14
+ the copyright owner that is granting the License.
15
+
16
+ "Legal Entity" shall mean the union of the acting entity and all
17
+ other entities that control, are controlled by, or are under common
18
+ control with that entity. For the purposes of this definition,
19
+ "control" means (i) the power, direct or indirect, to cause the
20
+ direction or management of such entity, whether by contract or
21
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
22
+ outstanding shares, or (iii) beneficial ownership of such entity.
23
+
24
+ "You" (or "Your") shall mean an individual or Legal Entity
25
+ exercising permissions granted by this License.
26
+
27
+ "Source" form shall mean the preferred form for making modifications,
28
+ including but not limited to software source code, documentation
29
+ source, and configuration files.
30
+
31
+ "Object" form shall mean any form resulting from mechanical
32
+ transformation or translation of a Source form, including but
33
+ not limited to compiled object code, generated documentation,
34
+ and conversions to other media types.
35
+
36
+ "Work" shall mean the work of authorship, whether in Source or
37
+ Object form, made available under the License, as indicated by a
38
+ copyright notice that is included in or attached to the work
39
+ (an example is provided in the Appendix below).
40
+
41
+ "Derivative Works" shall mean any work, whether in Source or Object
42
+ form, that is based on (or derived from) the Work and for which the
43
+ editorial revisions, annotations, elaborations, or other modifications
44
+ represent, as a whole, an original work of authorship. For the purposes
45
+ of this License, Derivative Works shall not include works that remain
46
+ separable from, or merely link (or bind by name) to the interfaces of,
47
+ the Work and Derivative Works thereof.
48
+
49
+ "Contribution" shall mean any work of authorship, including
50
+ the original version of the Work and any modifications or additions
51
+ to that Work or Derivative Works thereof, that is intentionally
52
+ submitted to Licensor for inclusion in the Work by the copyright owner
53
+ or by an individual or Legal Entity authorized to submit on behalf of
54
+ the copyright owner. For the purposes of this definition, "submitted"
55
+ means any form of electronic, verbal, or written communication sent
56
+ to the Licensor or its representatives, including but not limited to
57
+ communication on electronic mailing lists, source code control systems,
58
+ and issue tracking systems that are managed by, or on behalf of, the
59
+ Licensor for the purpose of discussing and improving the Work, but
60
+ excluding communication that is conspicuously marked or otherwise
61
+ designated in writing by the copyright owner as "Not a Contribution."
62
+
63
+ "Contributor" shall mean Licensor and any individual or Legal Entity
64
+ on behalf of whom a Contribution has been received by Licensor and
65
+ subsequently incorporated within the Work.
66
+
67
+ 2. Grant of Copyright License. Subject to the terms and conditions of
68
+ this License, each Contributor hereby grants to You a perpetual,
69
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
70
+ copyright license to reproduce, prepare Derivative Works of,
71
+ publicly display, publicly perform, sublicense, and distribute the
72
+ Work and such Derivative Works in Source or Object form.
73
+
74
+ 3. Grant of Patent License. Subject to the terms and conditions of
75
+ this License, each Contributor hereby grants to You a perpetual,
76
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
77
+ (except as stated in this section) patent license to make, have made,
78
+ use, offer to sell, sell, import, and otherwise transfer the Work,
79
+ where such license applies only to those patent claims licensable
80
+ by such Contributor that are necessarily infringed by their
81
+ Contribution(s) alone or by combination of their Contribution(s)
82
+ with the Work to which such Contribution(s) was submitted. If You
83
+ institute patent litigation against any entity (including a
84
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
85
+ or a Contribution incorporated within the Work constitutes direct
86
+ or contributory patent infringement, then any patent licenses
87
+ granted to You under this License for that Work shall terminate
88
+ as of the date such litigation is filed.
89
+
90
+ 4. Redistribution. You may reproduce and distribute copies of the
91
+ Work or Derivative Works thereof in any medium, with or without
92
+ modifications, and in Source or Object form, provided that You
93
+ meet the following conditions:
94
+
95
+ (a) You must give any other recipients of the Work or
96
+ Derivative Works a copy of this License; and
97
+
98
+ (b) You must cause any modified files to carry prominent notices
99
+ stating that You changed the files; and
100
+
101
+ (c) You must retain, in the Source form of any Derivative Works
102
+ that You distribute, all copyright, patent, trademark, and
103
+ attribution notices from the Source form of the Work,
104
+ excluding those notices that do not pertain to any part of
105
+ the Derivative Works; and
106
+
107
+ (d) If the Work includes a "NOTICE" text file as part of its
108
+ distribution, then any Derivative Works that You distribute must
109
+ include a readable copy of the attribution notices contained
110
+ within such NOTICE file, excluding those notices that do not
111
+ pertain to any part of the Derivative Works, in at least one
112
+ of the following places: within a NOTICE text file distributed
113
+ as part of the Derivative Works; within the Source form or
114
+ documentation, if provided along with the Derivative Works; or,
115
+ within a display generated by the Derivative Works, if and
116
+ wherever such third-party notices normally appear. The contents
117
+ of the NOTICE file are for informational purposes only and
118
+ do not modify the License. You may add Your own attribution
119
+ notices within Derivative Works that You distribute, alongside
120
+ or as an addendum to the NOTICE text from the Work, provided
121
+ that such additional attribution notices cannot be construed
122
+ as modifying the License.
123
+
124
+ You may add Your own copyright statement to Your modifications and
125
+ may provide additional or different license terms and conditions
126
+ for use, reproduction, or distribution of Your modifications, or
127
+ for any such Derivative Works as a whole, provided Your use,
128
+ reproduction, and distribution of the Work otherwise complies with
129
+ the conditions stated in this License.
130
+
131
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
132
+ any Contribution intentionally submitted for inclusion in the Work
133
+ by You to the Licensor shall be under the terms and conditions of
134
+ this License, without any additional terms or conditions.
135
+ Notwithstanding the above, nothing herein shall supersede or modify
136
+ the terms of any separate license agreement you may have executed
137
+ with Licensor regarding such Contributions.
138
+
139
+ 6. Trademarks. This License does not grant permission to use the trade
140
+ names, trademarks, service marks, or product names of the Licensor,
141
+ except as required for reasonable and customary use in describing the
142
+ origin of the Work and reproducing the content of the NOTICE file.
143
+
144
+ 7. Disclaimer of Warranty. Unless required by applicable law or
145
+ agreed to in writing, Licensor provides the Work (and each
146
+ Contributor provides its Contributions) on an "AS IS" BASIS,
147
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148
+ implied, including, without limitation, any warranties or conditions
149
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150
+ PARTICULAR PURPOSE. You are solely responsible for determining the
151
+ appropriateness of using or redistributing the Work and assume any
152
+ risks associated with Your exercise of permissions under this License.
153
+
154
+ 8. Limitation of Liability. In no event and under no legal theory,
155
+ whether in tort (including negligence), contract, or otherwise,
156
+ unless required by applicable law (such as deliberate and grossly
157
+ negligent acts) or agreed to in writing, shall any Contributor be
158
+ liable to You for damages, including any direct, indirect, special,
159
+ incidental, or consequential damages of any character arising as a
160
+ result of this License or out of the use or inability to use the
161
+ Work (including but not limited to damages for loss of goodwill,
162
+ work stoppage, computer failure or malfunction, or any and all
163
+ other commercial damages or losses), even if such Contributor
164
+ has been advised of the possibility of such damages.
165
+
166
+ 9. Accepting Warranty or Additional Liability. While redistributing
167
+ the Work or Derivative Works thereof, You may choose to offer,
168
+ and charge a fee for, acceptance of support, warranty, indemnity,
169
+ or other liability obligations and/or rights consistent with this
170
+ License. However, in accepting such obligations, You may act only
171
+ on Your own behalf and on Your sole responsibility, not on behalf
172
+ of any other Contributor, and only if You agree to indemnify,
173
+ defend, and hold each Contributor harmless for any liability
174
+ incurred by, or claims asserted against, such Contributor by reason
175
+ of your accepting any such warranty or additional liability.
176
+
177
+ END OF TERMS AND CONDITIONS
178
+
179
+ APPENDIX: How to apply the Apache License to your work.
180
+
181
+ To apply the Apache License to your work, attach the following
182
+ boilerplate notice, with the fields enclosed by brackets "[]"
183
+ replaced with your own identifying information. (Don't include
184
+ the brackets!) The text should be enclosed in the appropriate
185
+ comment syntax for the file format. We also recommend that a
186
+ file or class name and description of purpose be included on the
187
+ same "printed page" as the copyright notice for easier
188
+ identification within third-party archives.
189
+
190
+ Copyright [yyyy] [name of copyright owner]
191
+
192
+ Licensed under the Apache License, Version 2.0 (the "License");
193
+ you may not use this file except in compliance with the License.
194
+ You may obtain a copy of the License at
195
+
196
+ http://www.apache.org/licenses/LICENSE-2.0
197
+
198
+ Unless required by applicable law or agreed to in writing, software
199
+ distributed under the License is distributed on an "AS IS" BASIS,
200
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
201
+ See the License for the specific language governing permissions and
202
+ limitations under the License.
203
+ */
@@ -23,4 +23,4 @@
23
23
  ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24
24
  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25
25
  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
- */
26
+ */