tomoto 0.2.2 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (369) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +10 -0
  3. data/README.md +8 -10
  4. data/ext/tomoto/ct.cpp +11 -11
  5. data/ext/tomoto/dmr.cpp +14 -13
  6. data/ext/tomoto/dt.cpp +14 -14
  7. data/ext/tomoto/extconf.rb +7 -5
  8. data/ext/tomoto/gdmr.cpp +7 -7
  9. data/ext/tomoto/hdp.cpp +9 -9
  10. data/ext/tomoto/hlda.cpp +13 -13
  11. data/ext/tomoto/hpa.cpp +5 -5
  12. data/ext/tomoto/lda.cpp +42 -39
  13. data/ext/tomoto/llda.cpp +6 -6
  14. data/ext/tomoto/mglda.cpp +15 -15
  15. data/ext/tomoto/pa.cpp +6 -6
  16. data/ext/tomoto/plda.cpp +6 -6
  17. data/ext/tomoto/slda.cpp +8 -8
  18. data/ext/tomoto/{ext.cpp → tomoto.cpp} +8 -8
  19. data/ext/tomoto/utils.h +16 -70
  20. data/lib/tomoto/version.rb +1 -1
  21. data/lib/tomoto.rb +5 -1
  22. data/vendor/EigenRand/EigenRand/Core.h +10 -10
  23. data/vendor/EigenRand/EigenRand/Dists/Basic.h +208 -9
  24. data/vendor/EigenRand/EigenRand/Dists/Discrete.h +52 -31
  25. data/vendor/EigenRand/EigenRand/Dists/GammaPoisson.h +9 -8
  26. data/vendor/EigenRand/EigenRand/Dists/NormalExp.h +28 -21
  27. data/vendor/EigenRand/EigenRand/EigenRand +11 -6
  28. data/vendor/EigenRand/EigenRand/Macro.h +13 -7
  29. data/vendor/EigenRand/EigenRand/MorePacketMath.h +348 -740
  30. data/vendor/EigenRand/EigenRand/MvDists/Multinomial.h +5 -3
  31. data/vendor/EigenRand/EigenRand/MvDists/MvNormal.h +9 -3
  32. data/vendor/EigenRand/EigenRand/PacketFilter.h +11 -253
  33. data/vendor/EigenRand/EigenRand/PacketRandomEngine.h +21 -47
  34. data/vendor/EigenRand/EigenRand/RandUtils.h +50 -344
  35. data/vendor/EigenRand/EigenRand/arch/AVX/MorePacketMath.h +619 -0
  36. data/vendor/EigenRand/EigenRand/arch/AVX/PacketFilter.h +149 -0
  37. data/vendor/EigenRand/EigenRand/arch/AVX/RandUtils.h +228 -0
  38. data/vendor/EigenRand/EigenRand/arch/NEON/MorePacketMath.h +473 -0
  39. data/vendor/EigenRand/EigenRand/arch/NEON/PacketFilter.h +142 -0
  40. data/vendor/EigenRand/EigenRand/arch/NEON/RandUtils.h +126 -0
  41. data/vendor/EigenRand/EigenRand/arch/SSE/MorePacketMath.h +501 -0
  42. data/vendor/EigenRand/EigenRand/arch/SSE/PacketFilter.h +133 -0
  43. data/vendor/EigenRand/EigenRand/arch/SSE/RandUtils.h +120 -0
  44. data/vendor/EigenRand/EigenRand/doc.h +24 -12
  45. data/vendor/EigenRand/README.md +57 -4
  46. data/vendor/eigen/COPYING.APACHE +203 -0
  47. data/vendor/eigen/COPYING.BSD +1 -1
  48. data/vendor/eigen/COPYING.MINPACK +51 -52
  49. data/vendor/eigen/Eigen/Cholesky +0 -1
  50. data/vendor/eigen/Eigen/Core +112 -265
  51. data/vendor/eigen/Eigen/Eigenvalues +2 -3
  52. data/vendor/eigen/Eigen/Geometry +5 -8
  53. data/vendor/eigen/Eigen/Householder +0 -1
  54. data/vendor/eigen/Eigen/Jacobi +0 -1
  55. data/vendor/eigen/Eigen/KLUSupport +41 -0
  56. data/vendor/eigen/Eigen/LU +2 -5
  57. data/vendor/eigen/Eigen/OrderingMethods +0 -3
  58. data/vendor/eigen/Eigen/PaStiXSupport +1 -0
  59. data/vendor/eigen/Eigen/PardisoSupport +0 -0
  60. data/vendor/eigen/Eigen/QR +2 -3
  61. data/vendor/eigen/Eigen/QtAlignedMalloc +0 -1
  62. data/vendor/eigen/Eigen/SVD +0 -1
  63. data/vendor/eigen/Eigen/Sparse +0 -2
  64. data/vendor/eigen/Eigen/SparseCholesky +0 -8
  65. data/vendor/eigen/Eigen/SparseLU +4 -0
  66. data/vendor/eigen/Eigen/SparseQR +0 -1
  67. data/vendor/eigen/Eigen/src/Cholesky/LDLT.h +42 -27
  68. data/vendor/eigen/Eigen/src/Cholesky/LLT.h +39 -23
  69. data/vendor/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +90 -47
  70. data/vendor/eigen/Eigen/src/Core/ArithmeticSequence.h +413 -0
  71. data/vendor/eigen/Eigen/src/Core/Array.h +99 -11
  72. data/vendor/eigen/Eigen/src/Core/ArrayBase.h +3 -3
  73. data/vendor/eigen/Eigen/src/Core/ArrayWrapper.h +21 -21
  74. data/vendor/eigen/Eigen/src/Core/Assign.h +1 -1
  75. data/vendor/eigen/Eigen/src/Core/AssignEvaluator.h +125 -50
  76. data/vendor/eigen/Eigen/src/Core/Assign_MKL.h +10 -10
  77. data/vendor/eigen/Eigen/src/Core/BandMatrix.h +16 -16
  78. data/vendor/eigen/Eigen/src/Core/Block.h +56 -60
  79. data/vendor/eigen/Eigen/src/Core/BooleanRedux.h +29 -31
  80. data/vendor/eigen/Eigen/src/Core/CommaInitializer.h +7 -3
  81. data/vendor/eigen/Eigen/src/Core/CoreEvaluators.h +325 -272
  82. data/vendor/eigen/Eigen/src/Core/CoreIterators.h +5 -0
  83. data/vendor/eigen/Eigen/src/Core/CwiseBinaryOp.h +21 -22
  84. data/vendor/eigen/Eigen/src/Core/CwiseNullaryOp.h +153 -18
  85. data/vendor/eigen/Eigen/src/Core/CwiseUnaryOp.h +6 -6
  86. data/vendor/eigen/Eigen/src/Core/CwiseUnaryView.h +14 -10
  87. data/vendor/eigen/Eigen/src/Core/DenseBase.h +132 -42
  88. data/vendor/eigen/Eigen/src/Core/DenseCoeffsBase.h +25 -21
  89. data/vendor/eigen/Eigen/src/Core/DenseStorage.h +153 -71
  90. data/vendor/eigen/Eigen/src/Core/Diagonal.h +21 -23
  91. data/vendor/eigen/Eigen/src/Core/DiagonalMatrix.h +50 -2
  92. data/vendor/eigen/Eigen/src/Core/DiagonalProduct.h +1 -1
  93. data/vendor/eigen/Eigen/src/Core/Dot.h +10 -10
  94. data/vendor/eigen/Eigen/src/Core/EigenBase.h +10 -9
  95. data/vendor/eigen/Eigen/src/Core/ForceAlignedAccess.h +8 -4
  96. data/vendor/eigen/Eigen/src/Core/Fuzzy.h +3 -3
  97. data/vendor/eigen/Eigen/src/Core/GeneralProduct.h +20 -10
  98. data/vendor/eigen/Eigen/src/Core/GenericPacketMath.h +599 -152
  99. data/vendor/eigen/Eigen/src/Core/GlobalFunctions.h +40 -33
  100. data/vendor/eigen/Eigen/src/Core/IO.h +40 -7
  101. data/vendor/eigen/Eigen/src/Core/IndexedView.h +237 -0
  102. data/vendor/eigen/Eigen/src/Core/Inverse.h +9 -10
  103. data/vendor/eigen/Eigen/src/Core/Map.h +7 -7
  104. data/vendor/eigen/Eigen/src/Core/MapBase.h +10 -3
  105. data/vendor/eigen/Eigen/src/Core/MathFunctions.h +767 -125
  106. data/vendor/eigen/Eigen/src/Core/MathFunctionsImpl.h +118 -19
  107. data/vendor/eigen/Eigen/src/Core/Matrix.h +131 -25
  108. data/vendor/eigen/Eigen/src/Core/MatrixBase.h +21 -3
  109. data/vendor/eigen/Eigen/src/Core/NestByValue.h +25 -50
  110. data/vendor/eigen/Eigen/src/Core/NoAlias.h +4 -3
  111. data/vendor/eigen/Eigen/src/Core/NumTraits.h +107 -20
  112. data/vendor/eigen/Eigen/src/Core/PartialReduxEvaluator.h +232 -0
  113. data/vendor/eigen/Eigen/src/Core/PermutationMatrix.h +3 -31
  114. data/vendor/eigen/Eigen/src/Core/PlainObjectBase.h +152 -59
  115. data/vendor/eigen/Eigen/src/Core/Product.h +30 -25
  116. data/vendor/eigen/Eigen/src/Core/ProductEvaluators.h +192 -125
  117. data/vendor/eigen/Eigen/src/Core/Random.h +37 -1
  118. data/vendor/eigen/Eigen/src/Core/Redux.h +180 -170
  119. data/vendor/eigen/Eigen/src/Core/Ref.h +121 -23
  120. data/vendor/eigen/Eigen/src/Core/Replicate.h +8 -8
  121. data/vendor/eigen/Eigen/src/Core/Reshaped.h +454 -0
  122. data/vendor/eigen/Eigen/src/Core/ReturnByValue.h +7 -5
  123. data/vendor/eigen/Eigen/src/Core/Reverse.h +18 -12
  124. data/vendor/eigen/Eigen/src/Core/Select.h +8 -6
  125. data/vendor/eigen/Eigen/src/Core/SelfAdjointView.h +33 -20
  126. data/vendor/eigen/Eigen/src/Core/Solve.h +14 -14
  127. data/vendor/eigen/Eigen/src/Core/SolveTriangular.h +16 -16
  128. data/vendor/eigen/Eigen/src/Core/SolverBase.h +41 -3
  129. data/vendor/eigen/Eigen/src/Core/StableNorm.h +100 -70
  130. data/vendor/eigen/Eigen/src/Core/StlIterators.h +463 -0
  131. data/vendor/eigen/Eigen/src/Core/Stride.h +9 -4
  132. data/vendor/eigen/Eigen/src/Core/Swap.h +5 -4
  133. data/vendor/eigen/Eigen/src/Core/Transpose.h +88 -27
  134. data/vendor/eigen/Eigen/src/Core/Transpositions.h +26 -47
  135. data/vendor/eigen/Eigen/src/Core/TriangularMatrix.h +93 -75
  136. data/vendor/eigen/Eigen/src/Core/VectorBlock.h +5 -5
  137. data/vendor/eigen/Eigen/src/Core/VectorwiseOp.h +159 -70
  138. data/vendor/eigen/Eigen/src/Core/Visitor.h +137 -29
  139. data/vendor/eigen/Eigen/src/Core/arch/AVX/Complex.h +50 -129
  140. data/vendor/eigen/Eigen/src/Core/arch/AVX/MathFunctions.h +126 -337
  141. data/vendor/eigen/Eigen/src/Core/arch/AVX/PacketMath.h +1092 -155
  142. data/vendor/eigen/Eigen/src/Core/arch/AVX/TypeCasting.h +65 -1
  143. data/vendor/eigen/Eigen/src/Core/arch/AVX512/Complex.h +422 -0
  144. data/vendor/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h +207 -236
  145. data/vendor/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h +1482 -495
  146. data/vendor/eigen/Eigen/src/Core/arch/AVX512/TypeCasting.h +89 -0
  147. data/vendor/eigen/Eigen/src/Core/arch/AltiVec/Complex.h +152 -165
  148. data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MathFunctions.h +19 -251
  149. data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MatrixProduct.h +2937 -0
  150. data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h +221 -0
  151. data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h +629 -0
  152. data/vendor/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h +2042 -392
  153. data/vendor/eigen/Eigen/src/Core/arch/CUDA/Complex.h +235 -80
  154. data/vendor/eigen/Eigen/src/Core/arch/Default/BFloat16.h +700 -0
  155. data/vendor/eigen/Eigen/src/Core/arch/Default/ConjHelper.h +102 -14
  156. data/vendor/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +1649 -0
  157. data/vendor/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +110 -0
  158. data/vendor/eigen/Eigen/src/Core/arch/Default/Half.h +942 -0
  159. data/vendor/eigen/Eigen/src/Core/arch/Default/Settings.h +1 -1
  160. data/vendor/eigen/Eigen/src/Core/arch/Default/TypeCasting.h +120 -0
  161. data/vendor/eigen/Eigen/src/Core/arch/{CUDA → GPU}/MathFunctions.h +16 -4
  162. data/vendor/eigen/Eigen/src/Core/arch/GPU/PacketMath.h +1685 -0
  163. data/vendor/eigen/Eigen/src/Core/arch/GPU/TypeCasting.h +80 -0
  164. data/vendor/eigen/Eigen/src/Core/arch/HIP/hcc/math_constants.h +23 -0
  165. data/vendor/eigen/Eigen/src/Core/arch/MSA/Complex.h +648 -0
  166. data/vendor/eigen/Eigen/src/Core/arch/MSA/MathFunctions.h +387 -0
  167. data/vendor/eigen/Eigen/src/Core/arch/MSA/PacketMath.h +1233 -0
  168. data/vendor/eigen/Eigen/src/Core/arch/NEON/Complex.h +313 -219
  169. data/vendor/eigen/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h +183 -0
  170. data/vendor/eigen/Eigen/src/Core/arch/NEON/MathFunctions.h +54 -70
  171. data/vendor/eigen/Eigen/src/Core/arch/NEON/PacketMath.h +4376 -549
  172. data/vendor/eigen/Eigen/src/Core/arch/NEON/TypeCasting.h +1419 -0
  173. data/vendor/eigen/Eigen/src/Core/arch/SSE/Complex.h +59 -179
  174. data/vendor/eigen/Eigen/src/Core/arch/SSE/MathFunctions.h +65 -428
  175. data/vendor/eigen/Eigen/src/Core/arch/SSE/PacketMath.h +893 -283
  176. data/vendor/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h +65 -0
  177. data/vendor/eigen/Eigen/src/Core/arch/SVE/MathFunctions.h +44 -0
  178. data/vendor/eigen/Eigen/src/Core/arch/SVE/PacketMath.h +752 -0
  179. data/vendor/eigen/Eigen/src/Core/arch/SVE/TypeCasting.h +49 -0
  180. data/vendor/eigen/Eigen/src/Core/arch/SYCL/InteropHeaders.h +232 -0
  181. data/vendor/eigen/Eigen/src/Core/arch/SYCL/MathFunctions.h +301 -0
  182. data/vendor/eigen/Eigen/src/Core/arch/SYCL/PacketMath.h +670 -0
  183. data/vendor/eigen/Eigen/src/Core/arch/SYCL/SyclMemoryModel.h +694 -0
  184. data/vendor/eigen/Eigen/src/Core/arch/SYCL/TypeCasting.h +85 -0
  185. data/vendor/eigen/Eigen/src/Core/arch/ZVector/Complex.h +212 -183
  186. data/vendor/eigen/Eigen/src/Core/arch/ZVector/MathFunctions.h +101 -5
  187. data/vendor/eigen/Eigen/src/Core/arch/ZVector/PacketMath.h +510 -395
  188. data/vendor/eigen/Eigen/src/Core/functors/AssignmentFunctors.h +11 -2
  189. data/vendor/eigen/Eigen/src/Core/functors/BinaryFunctors.h +112 -46
  190. data/vendor/eigen/Eigen/src/Core/functors/NullaryFunctors.h +31 -30
  191. data/vendor/eigen/Eigen/src/Core/functors/StlFunctors.h +32 -2
  192. data/vendor/eigen/Eigen/src/Core/functors/UnaryFunctors.h +355 -16
  193. data/vendor/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h +1075 -586
  194. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h +49 -24
  195. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +41 -35
  196. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +6 -6
  197. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h +4 -2
  198. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixVector.h +382 -483
  199. data/vendor/eigen/Eigen/src/Core/products/Parallelizer.h +22 -5
  200. data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +53 -30
  201. data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h +16 -8
  202. data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h +8 -6
  203. data/vendor/eigen/Eigen/src/Core/products/SelfadjointProduct.h +4 -4
  204. data/vendor/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h +5 -4
  205. data/vendor/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h +33 -27
  206. data/vendor/eigen/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h +14 -12
  207. data/vendor/eigen/Eigen/src/Core/products/TriangularSolverMatrix.h +36 -34
  208. data/vendor/eigen/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h +8 -4
  209. data/vendor/eigen/Eigen/src/Core/products/TriangularSolverVector.h +13 -10
  210. data/vendor/eigen/Eigen/src/Core/util/BlasUtil.h +304 -119
  211. data/vendor/eigen/Eigen/src/Core/util/ConfigureVectorization.h +512 -0
  212. data/vendor/eigen/Eigen/src/Core/util/Constants.h +25 -9
  213. data/vendor/eigen/Eigen/src/Core/util/DisableStupidWarnings.h +26 -3
  214. data/vendor/eigen/Eigen/src/Core/util/ForwardDeclarations.h +29 -9
  215. data/vendor/eigen/Eigen/src/Core/util/IndexedViewHelper.h +186 -0
  216. data/vendor/eigen/Eigen/src/Core/util/IntegralConstant.h +272 -0
  217. data/vendor/eigen/Eigen/src/Core/util/MKL_support.h +8 -1
  218. data/vendor/eigen/Eigen/src/Core/util/Macros.h +709 -246
  219. data/vendor/eigen/Eigen/src/Core/util/Memory.h +222 -52
  220. data/vendor/eigen/Eigen/src/Core/util/Meta.h +355 -77
  221. data/vendor/eigen/Eigen/src/Core/util/ReenableStupidWarnings.h +5 -1
  222. data/vendor/eigen/Eigen/src/Core/util/ReshapedHelper.h +51 -0
  223. data/vendor/eigen/Eigen/src/Core/util/StaticAssert.h +8 -5
  224. data/vendor/eigen/Eigen/src/Core/util/SymbolicIndex.h +293 -0
  225. data/vendor/eigen/Eigen/src/Core/util/XprHelper.h +65 -30
  226. data/vendor/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +1 -1
  227. data/vendor/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +7 -4
  228. data/vendor/eigen/Eigen/src/Eigenvalues/EigenSolver.h +2 -2
  229. data/vendor/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +1 -1
  230. data/vendor/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +2 -2
  231. data/vendor/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +2 -2
  232. data/vendor/eigen/Eigen/src/Eigenvalues/RealQZ.h +9 -6
  233. data/vendor/eigen/Eigen/src/Eigenvalues/RealSchur.h +21 -9
  234. data/vendor/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +77 -43
  235. data/vendor/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +20 -15
  236. data/vendor/eigen/Eigen/src/Geometry/AlignedBox.h +99 -5
  237. data/vendor/eigen/Eigen/src/Geometry/AngleAxis.h +4 -4
  238. data/vendor/eigen/Eigen/src/Geometry/EulerAngles.h +3 -3
  239. data/vendor/eigen/Eigen/src/Geometry/Homogeneous.h +15 -11
  240. data/vendor/eigen/Eigen/src/Geometry/Hyperplane.h +1 -1
  241. data/vendor/eigen/Eigen/src/Geometry/OrthoMethods.h +3 -2
  242. data/vendor/eigen/Eigen/src/Geometry/ParametrizedLine.h +39 -2
  243. data/vendor/eigen/Eigen/src/Geometry/Quaternion.h +70 -14
  244. data/vendor/eigen/Eigen/src/Geometry/Rotation2D.h +3 -3
  245. data/vendor/eigen/Eigen/src/Geometry/Scaling.h +23 -5
  246. data/vendor/eigen/Eigen/src/Geometry/Transform.h +88 -67
  247. data/vendor/eigen/Eigen/src/Geometry/Translation.h +6 -12
  248. data/vendor/eigen/Eigen/src/Geometry/Umeyama.h +1 -1
  249. data/vendor/eigen/Eigen/src/Geometry/arch/Geometry_SIMD.h +168 -0
  250. data/vendor/eigen/Eigen/src/Householder/BlockHouseholder.h +9 -2
  251. data/vendor/eigen/Eigen/src/Householder/Householder.h +8 -4
  252. data/vendor/eigen/Eigen/src/Householder/HouseholderSequence.h +123 -48
  253. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +15 -15
  254. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +7 -23
  255. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +5 -22
  256. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +41 -47
  257. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +51 -60
  258. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +70 -20
  259. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +2 -20
  260. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +11 -9
  261. data/vendor/eigen/Eigen/src/Jacobi/Jacobi.h +31 -10
  262. data/vendor/eigen/Eigen/src/KLUSupport/KLUSupport.h +358 -0
  263. data/vendor/eigen/Eigen/src/LU/Determinant.h +35 -19
  264. data/vendor/eigen/Eigen/src/LU/FullPivLU.h +29 -43
  265. data/vendor/eigen/Eigen/src/LU/InverseImpl.h +25 -8
  266. data/vendor/eigen/Eigen/src/LU/PartialPivLU.h +71 -58
  267. data/vendor/eigen/Eigen/src/LU/arch/InverseSize4.h +351 -0
  268. data/vendor/eigen/Eigen/src/OrderingMethods/Amd.h +7 -17
  269. data/vendor/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +297 -277
  270. data/vendor/eigen/Eigen/src/OrderingMethods/Ordering.h +6 -10
  271. data/vendor/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +1 -1
  272. data/vendor/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +9 -7
  273. data/vendor/eigen/Eigen/src/QR/ColPivHouseholderQR.h +41 -20
  274. data/vendor/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +100 -27
  275. data/vendor/eigen/Eigen/src/QR/FullPivHouseholderQR.h +59 -22
  276. data/vendor/eigen/Eigen/src/QR/HouseholderQR.h +48 -23
  277. data/vendor/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +25 -3
  278. data/vendor/eigen/Eigen/src/SVD/BDCSVD.h +183 -63
  279. data/vendor/eigen/Eigen/src/SVD/JacobiSVD.h +22 -14
  280. data/vendor/eigen/Eigen/src/SVD/SVDBase.h +83 -22
  281. data/vendor/eigen/Eigen/src/SVD/UpperBidiagonalization.h +3 -3
  282. data/vendor/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +17 -9
  283. data/vendor/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +12 -37
  284. data/vendor/eigen/Eigen/src/SparseCore/AmbiVector.h +3 -2
  285. data/vendor/eigen/Eigen/src/SparseCore/CompressedStorage.h +16 -0
  286. data/vendor/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +6 -6
  287. data/vendor/eigen/Eigen/src/SparseCore/SparseAssign.h +81 -27
  288. data/vendor/eigen/Eigen/src/SparseCore/SparseBlock.h +25 -57
  289. data/vendor/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +40 -11
  290. data/vendor/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +11 -15
  291. data/vendor/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +4 -2
  292. data/vendor/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +30 -8
  293. data/vendor/eigen/Eigen/src/SparseCore/SparseMatrix.h +126 -11
  294. data/vendor/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +5 -12
  295. data/vendor/eigen/Eigen/src/SparseCore/SparseProduct.h +13 -1
  296. data/vendor/eigen/Eigen/src/SparseCore/SparseRef.h +7 -7
  297. data/vendor/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +5 -2
  298. data/vendor/eigen/Eigen/src/SparseCore/SparseUtil.h +8 -0
  299. data/vendor/eigen/Eigen/src/SparseCore/SparseVector.h +1 -1
  300. data/vendor/eigen/Eigen/src/SparseCore/SparseView.h +1 -0
  301. data/vendor/eigen/Eigen/src/SparseLU/SparseLU.h +162 -12
  302. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +1 -1
  303. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +76 -2
  304. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +2 -2
  305. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +1 -1
  306. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +1 -1
  307. data/vendor/eigen/Eigen/src/SparseQR/SparseQR.h +19 -6
  308. data/vendor/eigen/Eigen/src/StlSupport/StdDeque.h +2 -12
  309. data/vendor/eigen/Eigen/src/StlSupport/StdList.h +2 -2
  310. data/vendor/eigen/Eigen/src/StlSupport/StdVector.h +2 -2
  311. data/vendor/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +6 -8
  312. data/vendor/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +175 -39
  313. data/vendor/eigen/Eigen/src/misc/lapacke.h +5 -4
  314. data/vendor/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +28 -2
  315. data/vendor/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +155 -11
  316. data/vendor/eigen/Eigen/src/plugins/BlockMethods.h +626 -242
  317. data/vendor/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.h +14 -0
  318. data/vendor/eigen/Eigen/src/plugins/IndexedViewMethods.h +262 -0
  319. data/vendor/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +4 -4
  320. data/vendor/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +10 -0
  321. data/vendor/eigen/Eigen/src/plugins/ReshapedMethods.h +149 -0
  322. data/vendor/eigen/README.md +2 -0
  323. data/vendor/eigen/bench/btl/README +1 -1
  324. data/vendor/eigen/bench/tensors/README +6 -7
  325. data/vendor/eigen/ci/README.md +56 -0
  326. data/vendor/eigen/demos/mix_eigen_and_c/README +1 -1
  327. data/vendor/eigen/unsupported/Eigen/CXX11/src/Tensor/README.md +213 -158
  328. data/vendor/eigen/unsupported/README.txt +1 -1
  329. data/vendor/tomotopy/README.kr.rst +78 -0
  330. data/vendor/tomotopy/README.rst +75 -0
  331. data/vendor/tomotopy/src/Labeling/FoRelevance.cpp +2 -2
  332. data/vendor/tomotopy/src/Labeling/Phraser.hpp +4 -4
  333. data/vendor/tomotopy/src/TopicModel/CTModel.hpp +7 -3
  334. data/vendor/tomotopy/src/TopicModel/DMRModel.hpp +7 -3
  335. data/vendor/tomotopy/src/TopicModel/DTModel.hpp +6 -3
  336. data/vendor/tomotopy/src/TopicModel/GDMRModel.hpp +2 -2
  337. data/vendor/tomotopy/src/TopicModel/HDP.h +1 -0
  338. data/vendor/tomotopy/src/TopicModel/HDPModel.hpp +57 -6
  339. data/vendor/tomotopy/src/TopicModel/HLDAModel.hpp +6 -3
  340. data/vendor/tomotopy/src/TopicModel/HPAModel.hpp +3 -2
  341. data/vendor/tomotopy/src/TopicModel/LDA.h +3 -3
  342. data/vendor/tomotopy/src/TopicModel/LDACVB0Model.hpp +5 -5
  343. data/vendor/tomotopy/src/TopicModel/LDAModel.hpp +50 -19
  344. data/vendor/tomotopy/src/TopicModel/LLDAModel.hpp +6 -2
  345. data/vendor/tomotopy/src/TopicModel/MGLDAModel.hpp +3 -2
  346. data/vendor/tomotopy/src/TopicModel/PAModel.hpp +1 -1
  347. data/vendor/tomotopy/src/TopicModel/PLDAModel.hpp +6 -2
  348. data/vendor/tomotopy/src/TopicModel/PT.h +3 -1
  349. data/vendor/tomotopy/src/TopicModel/PTModel.hpp +36 -3
  350. data/vendor/tomotopy/src/TopicModel/SLDAModel.hpp +6 -3
  351. data/vendor/tomotopy/src/TopicModel/TopicModel.hpp +55 -26
  352. data/vendor/tomotopy/src/Utils/AliasMethod.hpp +5 -4
  353. data/vendor/tomotopy/src/Utils/Dictionary.h +2 -2
  354. data/vendor/tomotopy/src/Utils/EigenAddonOps.hpp +36 -1
  355. data/vendor/tomotopy/src/Utils/MultiNormalDistribution.hpp +1 -1
  356. data/vendor/tomotopy/src/Utils/TruncMultiNormal.hpp +1 -1
  357. data/vendor/tomotopy/src/Utils/exception.h +6 -0
  358. data/vendor/tomotopy/src/Utils/math.h +2 -2
  359. data/vendor/tomotopy/src/Utils/sample.hpp +14 -12
  360. data/vendor/tomotopy/src/Utils/serializer.hpp +30 -5
  361. data/vendor/tomotopy/src/Utils/sse_gamma.h +0 -3
  362. metadata +64 -18
  363. data/vendor/eigen/Eigen/CMakeLists.txt +0 -19
  364. data/vendor/eigen/Eigen/src/Core/arch/CUDA/Half.h +0 -674
  365. data/vendor/eigen/Eigen/src/Core/arch/CUDA/PacketMath.h +0 -333
  366. data/vendor/eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +0 -1124
  367. data/vendor/eigen/Eigen/src/Core/arch/CUDA/TypeCasting.h +0 -212
  368. data/vendor/eigen/Eigen/src/Geometry/arch/Geometry_SSE.h +0 -161
  369. data/vendor/eigen/Eigen/src/LU/arch/Inverse_SSE.h +0 -338
@@ -0,0 +1,387 @@
1
+ // This file is part of Eigen, a lightweight C++ template library
2
+ // for linear algebra.
3
+ //
4
+ // Copyright (C) 2007 Julien Pommier
5
+ // Copyright (C) 2014 Pedro Gonnet (pedro.gonnet@gmail.com)
6
+ // Copyright (C) 2016 Gael Guennebaud <gael.guennebaud@inria.fr>
7
+ //
8
+ // Copyright (C) 2018 Wave Computing, Inc.
9
+ // Written by:
10
+ // Chris Larsen
11
+ // Alexey Frunze (afrunze@wavecomp.com)
12
+ //
13
+ // This Source Code Form is subject to the terms of the Mozilla
14
+ // Public License v. 2.0. If a copy of the MPL was not distributed
15
+ // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
16
+
17
+ /* The sin, cos, exp, and log functions of this file come from
18
+ * Julien Pommier's sse math library: http://gruntthepeon.free.fr/ssemath/
19
+ */
20
+
21
+ /* The tanh function of this file is an adaptation of
22
+ * template<typename T> T generic_fast_tanh_float(const T&)
23
+ * from MathFunctionsImpl.h.
24
+ */
25
+
26
+ #ifndef EIGEN_MATH_FUNCTIONS_MSA_H
27
+ #define EIGEN_MATH_FUNCTIONS_MSA_H
28
+
29
+ namespace Eigen {
30
+
31
+ namespace internal {
32
+
33
+ template <>
34
+ EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f
35
+ plog<Packet4f>(const Packet4f& _x) {
36
+ static _EIGEN_DECLARE_CONST_Packet4f(cephes_SQRTHF, 0.707106781186547524f);
37
+ static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p0, 7.0376836292e-2f);
38
+ static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p1, -1.1514610310e-1f);
39
+ static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p2, 1.1676998740e-1f);
40
+ static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p3, -1.2420140846e-1f);
41
+ static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p4, +1.4249322787e-1f);
42
+ static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p5, -1.6668057665e-1f);
43
+ static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p6, +2.0000714765e-1f);
44
+ static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p7, -2.4999993993e-1f);
45
+ static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p8, +3.3333331174e-1f);
46
+ static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_q1, -2.12194440e-4f);
47
+ static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_q2, 0.693359375f);
48
+ static _EIGEN_DECLARE_CONST_Packet4f(half, 0.5f);
49
+ static _EIGEN_DECLARE_CONST_Packet4f(1, 1.0f);
50
+
51
+ // Convert negative argument into NAN (quiet negative, to be specific).
52
+ Packet4f zero = (Packet4f)__builtin_msa_ldi_w(0);
53
+ Packet4i neg_mask = __builtin_msa_fclt_w(_x, zero);
54
+ Packet4i zero_mask = __builtin_msa_fceq_w(_x, zero);
55
+ Packet4f non_neg_x_or_nan = padd(_x, (Packet4f)neg_mask); // Add 0.0 or NAN.
56
+ Packet4f x = non_neg_x_or_nan;
57
+
58
+ // Extract exponent from x = mantissa * 2**exponent, where 1.0 <= mantissa < 2.0.
59
+ // N.B. the exponent is one less of what frexpf() would return.
60
+ Packet4i e_int = __builtin_msa_ftint_s_w(__builtin_msa_flog2_w(x));
61
+ // Multiply x by 2**(-exponent-1) to get 0.5 <= x < 1.0 as from frexpf().
62
+ x = __builtin_msa_fexp2_w(x, (Packet4i)__builtin_msa_nori_b((v16u8)e_int, 0));
63
+
64
+ /*
65
+ if (x < SQRTHF) {
66
+ x = x + x - 1.0;
67
+ } else {
68
+ e += 1;
69
+ x = x - 1.0;
70
+ }
71
+ */
72
+ Packet4f xx = padd(x, x);
73
+ Packet4i ge_mask = __builtin_msa_fcle_w(p4f_cephes_SQRTHF, x);
74
+ e_int = psub(e_int, ge_mask);
75
+ x = (Packet4f)__builtin_msa_bsel_v((v16u8)ge_mask, (v16u8)xx, (v16u8)x);
76
+ x = psub(x, p4f_1);
77
+ Packet4f e = __builtin_msa_ffint_s_w(e_int);
78
+
79
+ Packet4f x2 = pmul(x, x);
80
+ Packet4f x3 = pmul(x2, x);
81
+
82
+ Packet4f y, y1, y2;
83
+ y = pmadd(p4f_cephes_log_p0, x, p4f_cephes_log_p1);
84
+ y1 = pmadd(p4f_cephes_log_p3, x, p4f_cephes_log_p4);
85
+ y2 = pmadd(p4f_cephes_log_p6, x, p4f_cephes_log_p7);
86
+ y = pmadd(y, x, p4f_cephes_log_p2);
87
+ y1 = pmadd(y1, x, p4f_cephes_log_p5);
88
+ y2 = pmadd(y2, x, p4f_cephes_log_p8);
89
+ y = pmadd(y, x3, y1);
90
+ y = pmadd(y, x3, y2);
91
+ y = pmul(y, x3);
92
+
93
+ y = pmadd(e, p4f_cephes_log_q1, y);
94
+ x = __builtin_msa_fmsub_w(x, x2, p4f_half);
95
+ x = padd(x, y);
96
+ x = pmadd(e, p4f_cephes_log_q2, x);
97
+
98
+ // x is now the logarithm result candidate. We still need to handle the
99
+ // extreme arguments of zero and positive infinity, though.
100
+ // N.B. if the argument is +INFINITY, x is NAN because the polynomial terms
101
+ // contain infinities of both signs (see the coefficients and code above).
102
+ // INFINITY - INFINITY is NAN.
103
+
104
+ // If the argument is +INFINITY, make it the new result candidate.
105
+ // To achieve that we choose the smaller of the result candidate and the
106
+ // argument.
107
+ // This is correct for all finite pairs of values (the logarithm is smaller
108
+ // than the argument).
109
+ // This is also correct in the special case when the argument is +INFINITY
110
+ // and the result candidate is NAN. This is because the fmin.df instruction
111
+ // prefers non-NANs to NANs.
112
+ x = __builtin_msa_fmin_w(x, non_neg_x_or_nan);
113
+
114
+ // If the argument is zero (including -0.0), the result becomes -INFINITY.
115
+ Packet4i neg_infs = __builtin_msa_slli_w(zero_mask, 23);
116
+ x = (Packet4f)__builtin_msa_bsel_v((v16u8)zero_mask, (v16u8)x, (v16u8)neg_infs);
117
+
118
+ return x;
119
+ }
120
+
121
+ template <>
122
+ EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f
123
+ pexp<Packet4f>(const Packet4f& _x) {
124
+ // Limiting single-precision pexp's argument to [-128, +128] lets pexp
125
+ // reach 0 and INFINITY naturally.
126
+ static _EIGEN_DECLARE_CONST_Packet4f(exp_lo, -128.0f);
127
+ static _EIGEN_DECLARE_CONST_Packet4f(exp_hi, +128.0f);
128
+ static _EIGEN_DECLARE_CONST_Packet4f(cephes_LOG2EF, 1.44269504088896341f);
129
+ static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_C1, 0.693359375f);
130
+ static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_C2, -2.12194440e-4f);
131
+ static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p0, 1.9875691500e-4f);
132
+ static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p1, 1.3981999507e-3f);
133
+ static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p2, 8.3334519073e-3f);
134
+ static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p3, 4.1665795894e-2f);
135
+ static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p4, 1.6666665459e-1f);
136
+ static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p5, 5.0000001201e-1f);
137
+ static _EIGEN_DECLARE_CONST_Packet4f(half, 0.5f);
138
+ static _EIGEN_DECLARE_CONST_Packet4f(1, 1.0f);
139
+
140
+ Packet4f x = _x;
141
+
142
+ // Clamp x.
143
+ x = (Packet4f)__builtin_msa_bsel_v((v16u8)__builtin_msa_fclt_w(x, p4f_exp_lo), (v16u8)x,
144
+ (v16u8)p4f_exp_lo);
145
+ x = (Packet4f)__builtin_msa_bsel_v((v16u8)__builtin_msa_fclt_w(p4f_exp_hi, x), (v16u8)x,
146
+ (v16u8)p4f_exp_hi);
147
+
148
+ // Round to nearest integer by adding 0.5 (with x's sign) and truncating.
149
+ Packet4f x2_add = (Packet4f)__builtin_msa_binsli_w((v4u32)p4f_half, (v4u32)x, 0);
150
+ Packet4f x2 = pmadd(x, p4f_cephes_LOG2EF, x2_add);
151
+ Packet4i x2_int = __builtin_msa_ftrunc_s_w(x2);
152
+ Packet4f x2_int_f = __builtin_msa_ffint_s_w(x2_int);
153
+
154
+ x = __builtin_msa_fmsub_w(x, x2_int_f, p4f_cephes_exp_C1);
155
+ x = __builtin_msa_fmsub_w(x, x2_int_f, p4f_cephes_exp_C2);
156
+
157
+ Packet4f z = pmul(x, x);
158
+
159
+ Packet4f y = p4f_cephes_exp_p0;
160
+ y = pmadd(y, x, p4f_cephes_exp_p1);
161
+ y = pmadd(y, x, p4f_cephes_exp_p2);
162
+ y = pmadd(y, x, p4f_cephes_exp_p3);
163
+ y = pmadd(y, x, p4f_cephes_exp_p4);
164
+ y = pmadd(y, x, p4f_cephes_exp_p5);
165
+ y = pmadd(y, z, x);
166
+ y = padd(y, p4f_1);
167
+
168
+ // y *= 2**exponent.
169
+ y = __builtin_msa_fexp2_w(y, x2_int);
170
+
171
+ return y;
172
+ }
173
+
174
+ template <>
175
+ EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f
176
+ ptanh<Packet4f>(const Packet4f& _x) {
177
+ static _EIGEN_DECLARE_CONST_Packet4f(tanh_tiny, 1e-4f);
178
+ static _EIGEN_DECLARE_CONST_Packet4f(tanh_hi, 9.0f);
179
+ // The monomial coefficients of the numerator polynomial (odd).
180
+ static _EIGEN_DECLARE_CONST_Packet4f(alpha_1, 4.89352455891786e-3f);
181
+ static _EIGEN_DECLARE_CONST_Packet4f(alpha_3, 6.37261928875436e-4f);
182
+ static _EIGEN_DECLARE_CONST_Packet4f(alpha_5, 1.48572235717979e-5f);
183
+ static _EIGEN_DECLARE_CONST_Packet4f(alpha_7, 5.12229709037114e-8f);
184
+ static _EIGEN_DECLARE_CONST_Packet4f(alpha_9, -8.60467152213735e-11f);
185
+ static _EIGEN_DECLARE_CONST_Packet4f(alpha_11, 2.00018790482477e-13f);
186
+ static _EIGEN_DECLARE_CONST_Packet4f(alpha_13, -2.76076847742355e-16f);
187
+ // The monomial coefficients of the denominator polynomial (even).
188
+ static _EIGEN_DECLARE_CONST_Packet4f(beta_0, 4.89352518554385e-3f);
189
+ static _EIGEN_DECLARE_CONST_Packet4f(beta_2, 2.26843463243900e-3f);
190
+ static _EIGEN_DECLARE_CONST_Packet4f(beta_4, 1.18534705686654e-4f);
191
+ static _EIGEN_DECLARE_CONST_Packet4f(beta_6, 1.19825839466702e-6f);
192
+
193
+ Packet4f x = pabs(_x);
194
+ Packet4i tiny_mask = __builtin_msa_fclt_w(x, p4f_tanh_tiny);
195
+
196
+ // Clamp the inputs to the range [-9, 9] since anything outside
197
+ // this range is -/+1.0f in single-precision.
198
+ x = (Packet4f)__builtin_msa_bsel_v((v16u8)__builtin_msa_fclt_w(p4f_tanh_hi, x), (v16u8)x,
199
+ (v16u8)p4f_tanh_hi);
200
+
201
+ // Since the polynomials are odd/even, we need x**2.
202
+ Packet4f x2 = pmul(x, x);
203
+
204
+ // Evaluate the numerator polynomial p.
205
+ Packet4f p = pmadd(x2, p4f_alpha_13, p4f_alpha_11);
206
+ p = pmadd(x2, p, p4f_alpha_9);
207
+ p = pmadd(x2, p, p4f_alpha_7);
208
+ p = pmadd(x2, p, p4f_alpha_5);
209
+ p = pmadd(x2, p, p4f_alpha_3);
210
+ p = pmadd(x2, p, p4f_alpha_1);
211
+ p = pmul(x, p);
212
+
213
+ // Evaluate the denominator polynomial q.
214
+ Packet4f q = pmadd(x2, p4f_beta_6, p4f_beta_4);
215
+ q = pmadd(x2, q, p4f_beta_2);
216
+ q = pmadd(x2, q, p4f_beta_0);
217
+
218
+ // Divide the numerator by the denominator.
219
+ p = pdiv(p, q);
220
+
221
+ // Reinstate the sign.
222
+ p = (Packet4f)__builtin_msa_binsli_w((v4u32)p, (v4u32)_x, 0);
223
+
224
+ // When the argument is very small in magnitude it's more accurate to just return it.
225
+ p = (Packet4f)__builtin_msa_bsel_v((v16u8)tiny_mask, (v16u8)p, (v16u8)_x);
226
+
227
+ return p;
228
+ }
229
+
230
+ template <bool sine>
231
+ Packet4f psincos_inner_msa_float(const Packet4f& _x) {
232
+ static _EIGEN_DECLARE_CONST_Packet4f(sincos_max_arg, 13176795.0f); // Approx. (2**24) / (4/Pi).
233
+ static _EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP1, -0.78515625f);
234
+ static _EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP2, -2.4187564849853515625e-4f);
235
+ static _EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP3, -3.77489497744594108e-8f);
236
+ static _EIGEN_DECLARE_CONST_Packet4f(sincof_p0, -1.9515295891e-4f);
237
+ static _EIGEN_DECLARE_CONST_Packet4f(sincof_p1, 8.3321608736e-3f);
238
+ static _EIGEN_DECLARE_CONST_Packet4f(sincof_p2, -1.6666654611e-1f);
239
+ static _EIGEN_DECLARE_CONST_Packet4f(coscof_p0, 2.443315711809948e-5f);
240
+ static _EIGEN_DECLARE_CONST_Packet4f(coscof_p1, -1.388731625493765e-3f);
241
+ static _EIGEN_DECLARE_CONST_Packet4f(coscof_p2, 4.166664568298827e-2f);
242
+ static _EIGEN_DECLARE_CONST_Packet4f(cephes_FOPI, 1.27323954473516f); // 4/Pi.
243
+ static _EIGEN_DECLARE_CONST_Packet4f(half, 0.5f);
244
+ static _EIGEN_DECLARE_CONST_Packet4f(1, 1.0f);
245
+
246
+ Packet4f x = pabs(_x);
247
+
248
+ // Translate infinite arguments into NANs.
249
+ Packet4f zero_or_nan_if_inf = psub(_x, _x);
250
+ x = padd(x, zero_or_nan_if_inf);
251
+ // Prevent sin/cos from generating values larger than 1.0 in magnitude
252
+ // for very large arguments by setting x to 0.0.
253
+ Packet4i small_or_nan_mask = __builtin_msa_fcult_w(x, p4f_sincos_max_arg);
254
+ x = pand(x, (Packet4f)small_or_nan_mask);
255
+
256
+ // Scale x by 4/Pi to find x's octant.
257
+ Packet4f y = pmul(x, p4f_cephes_FOPI);
258
+ // Get the octant. We'll reduce x by this number of octants or by one more than it.
259
+ Packet4i y_int = __builtin_msa_ftrunc_s_w(y);
260
+ // x's from even-numbered octants will translate to octant 0: [0, +Pi/4].
261
+ // x's from odd-numbered octants will translate to octant -1: [-Pi/4, 0].
262
+ // Adjustment for odd-numbered octants: octant = (octant + 1) & (~1).
263
+ Packet4i y_int1 = __builtin_msa_addvi_w(y_int, 1);
264
+ Packet4i y_int2 = (Packet4i)__builtin_msa_bclri_w((Packet4ui)y_int1, 0); // bclri = bit-clear
265
+ y = __builtin_msa_ffint_s_w(y_int2);
266
+
267
+ // Compute the sign to apply to the polynomial.
268
+ Packet4i sign_mask = sine ? pxor(__builtin_msa_slli_w(y_int1, 29), (Packet4i)_x)
269
+ : __builtin_msa_slli_w(__builtin_msa_addvi_w(y_int, 3), 29);
270
+
271
+ // Get the polynomial selection mask.
272
+ // We'll calculate both (sin and cos) polynomials and then select from the two.
273
+ Packet4i poly_mask = __builtin_msa_ceqi_w(__builtin_msa_slli_w(y_int2, 30), 0);
274
+
275
+ // Reduce x by y octants to get: -Pi/4 <= x <= +Pi/4.
276
+ // The magic pass: "Extended precision modular arithmetic"
277
+ // x = ((x - y * DP1) - y * DP2) - y * DP3
278
+ Packet4f tmp1 = pmul(y, p4f_minus_cephes_DP1);
279
+ Packet4f tmp2 = pmul(y, p4f_minus_cephes_DP2);
280
+ Packet4f tmp3 = pmul(y, p4f_minus_cephes_DP3);
281
+ x = padd(x, tmp1);
282
+ x = padd(x, tmp2);
283
+ x = padd(x, tmp3);
284
+
285
+ // Evaluate the cos(x) polynomial.
286
+ y = p4f_coscof_p0;
287
+ Packet4f z = pmul(x, x);
288
+ y = pmadd(y, z, p4f_coscof_p1);
289
+ y = pmadd(y, z, p4f_coscof_p2);
290
+ y = pmul(y, z);
291
+ y = pmul(y, z);
292
+ y = __builtin_msa_fmsub_w(y, z, p4f_half);
293
+ y = padd(y, p4f_1);
294
+
295
+ // Evaluate the sin(x) polynomial.
296
+ Packet4f y2 = p4f_sincof_p0;
297
+ y2 = pmadd(y2, z, p4f_sincof_p1);
298
+ y2 = pmadd(y2, z, p4f_sincof_p2);
299
+ y2 = pmul(y2, z);
300
+ y2 = pmadd(y2, x, x);
301
+
302
+ // Select the correct result from the two polynomials.
303
+ y = sine ? (Packet4f)__builtin_msa_bsel_v((v16u8)poly_mask, (v16u8)y, (v16u8)y2)
304
+ : (Packet4f)__builtin_msa_bsel_v((v16u8)poly_mask, (v16u8)y2, (v16u8)y);
305
+
306
+ // Update the sign.
307
+ sign_mask = pxor(sign_mask, (Packet4i)y);
308
+ y = (Packet4f)__builtin_msa_binsli_w((v4u32)y, (v4u32)sign_mask, 0); // binsli = bit-insert-left
309
+ return y;
310
+ }
311
+
312
+ template <>
313
+ EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f
314
+ psin<Packet4f>(const Packet4f& x) {
315
+ return psincos_inner_msa_float</* sine */ true>(x);
316
+ }
317
+
318
+ template <>
319
+ EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f
320
+ pcos<Packet4f>(const Packet4f& x) {
321
+ return psincos_inner_msa_float</* sine */ false>(x);
322
+ }
323
+
324
+ template <>
325
+ EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet2d
326
+ pexp<Packet2d>(const Packet2d& _x) {
327
+ // Limiting double-precision pexp's argument to [-1024, +1024] lets pexp
328
+ // reach 0 and INFINITY naturally.
329
+ static _EIGEN_DECLARE_CONST_Packet2d(exp_lo, -1024.0);
330
+ static _EIGEN_DECLARE_CONST_Packet2d(exp_hi, +1024.0);
331
+ static _EIGEN_DECLARE_CONST_Packet2d(cephes_LOG2EF, 1.4426950408889634073599);
332
+ static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C1, 0.693145751953125);
333
+ static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C2, 1.42860682030941723212e-6);
334
+ static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p0, 1.26177193074810590878e-4);
335
+ static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p1, 3.02994407707441961300e-2);
336
+ static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p2, 9.99999999999999999910e-1);
337
+ static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q0, 3.00198505138664455042e-6);
338
+ static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q1, 2.52448340349684104192e-3);
339
+ static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q2, 2.27265548208155028766e-1);
340
+ static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q3, 2.00000000000000000009e0);
341
+ static _EIGEN_DECLARE_CONST_Packet2d(half, 0.5);
342
+ static _EIGEN_DECLARE_CONST_Packet2d(1, 1.0);
343
+ static _EIGEN_DECLARE_CONST_Packet2d(2, 2.0);
344
+
345
+ Packet2d x = _x;
346
+
347
+ // Clamp x.
348
+ x = (Packet2d)__builtin_msa_bsel_v((v16u8)__builtin_msa_fclt_d(x, p2d_exp_lo), (v16u8)x,
349
+ (v16u8)p2d_exp_lo);
350
+ x = (Packet2d)__builtin_msa_bsel_v((v16u8)__builtin_msa_fclt_d(p2d_exp_hi, x), (v16u8)x,
351
+ (v16u8)p2d_exp_hi);
352
+
353
+ // Round to nearest integer by adding 0.5 (with x's sign) and truncating.
354
+ Packet2d x2_add = (Packet2d)__builtin_msa_binsli_d((v2u64)p2d_half, (v2u64)x, 0);
355
+ Packet2d x2 = pmadd(x, p2d_cephes_LOG2EF, x2_add);
356
+ Packet2l x2_long = __builtin_msa_ftrunc_s_d(x2);
357
+ Packet2d x2_long_d = __builtin_msa_ffint_s_d(x2_long);
358
+
359
+ x = __builtin_msa_fmsub_d(x, x2_long_d, p2d_cephes_exp_C1);
360
+ x = __builtin_msa_fmsub_d(x, x2_long_d, p2d_cephes_exp_C2);
361
+
362
+ x2 = pmul(x, x);
363
+
364
+ Packet2d px = p2d_cephes_exp_p0;
365
+ px = pmadd(px, x2, p2d_cephes_exp_p1);
366
+ px = pmadd(px, x2, p2d_cephes_exp_p2);
367
+ px = pmul(px, x);
368
+
369
+ Packet2d qx = p2d_cephes_exp_q0;
370
+ qx = pmadd(qx, x2, p2d_cephes_exp_q1);
371
+ qx = pmadd(qx, x2, p2d_cephes_exp_q2);
372
+ qx = pmadd(qx, x2, p2d_cephes_exp_q3);
373
+
374
+ x = pdiv(px, psub(qx, px));
375
+ x = pmadd(p2d_2, x, p2d_1);
376
+
377
+ // x *= 2**exponent.
378
+ x = __builtin_msa_fexp2_d(x, x2_long);
379
+
380
+ return x;
381
+ }
382
+
383
+ } // end namespace internal
384
+
385
+ } // end namespace Eigen
386
+
387
+ #endif // EIGEN_MATH_FUNCTIONS_MSA_H