tomoto 0.2.3 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (347) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +6 -0
  3. data/README.md +8 -10
  4. data/ext/tomoto/extconf.rb +6 -2
  5. data/ext/tomoto/{ext.cpp → tomoto.cpp} +1 -1
  6. data/lib/tomoto/version.rb +1 -1
  7. data/lib/tomoto.rb +5 -1
  8. data/vendor/EigenRand/EigenRand/Core.h +10 -10
  9. data/vendor/EigenRand/EigenRand/Dists/Basic.h +208 -9
  10. data/vendor/EigenRand/EigenRand/Dists/Discrete.h +52 -31
  11. data/vendor/EigenRand/EigenRand/Dists/GammaPoisson.h +9 -8
  12. data/vendor/EigenRand/EigenRand/Dists/NormalExp.h +28 -21
  13. data/vendor/EigenRand/EigenRand/EigenRand +11 -6
  14. data/vendor/EigenRand/EigenRand/Macro.h +13 -7
  15. data/vendor/EigenRand/EigenRand/MorePacketMath.h +348 -740
  16. data/vendor/EigenRand/EigenRand/MvDists/Multinomial.h +5 -3
  17. data/vendor/EigenRand/EigenRand/MvDists/MvNormal.h +9 -3
  18. data/vendor/EigenRand/EigenRand/PacketFilter.h +11 -253
  19. data/vendor/EigenRand/EigenRand/PacketRandomEngine.h +21 -47
  20. data/vendor/EigenRand/EigenRand/RandUtils.h +50 -344
  21. data/vendor/EigenRand/EigenRand/arch/AVX/MorePacketMath.h +619 -0
  22. data/vendor/EigenRand/EigenRand/arch/AVX/PacketFilter.h +149 -0
  23. data/vendor/EigenRand/EigenRand/arch/AVX/RandUtils.h +228 -0
  24. data/vendor/EigenRand/EigenRand/arch/NEON/MorePacketMath.h +473 -0
  25. data/vendor/EigenRand/EigenRand/arch/NEON/PacketFilter.h +142 -0
  26. data/vendor/EigenRand/EigenRand/arch/NEON/RandUtils.h +126 -0
  27. data/vendor/EigenRand/EigenRand/arch/SSE/MorePacketMath.h +501 -0
  28. data/vendor/EigenRand/EigenRand/arch/SSE/PacketFilter.h +133 -0
  29. data/vendor/EigenRand/EigenRand/arch/SSE/RandUtils.h +120 -0
  30. data/vendor/EigenRand/EigenRand/doc.h +24 -12
  31. data/vendor/EigenRand/README.md +57 -4
  32. data/vendor/eigen/COPYING.APACHE +203 -0
  33. data/vendor/eigen/COPYING.BSD +1 -1
  34. data/vendor/eigen/COPYING.MINPACK +51 -52
  35. data/vendor/eigen/Eigen/Cholesky +0 -1
  36. data/vendor/eigen/Eigen/Core +112 -265
  37. data/vendor/eigen/Eigen/Eigenvalues +2 -3
  38. data/vendor/eigen/Eigen/Geometry +5 -8
  39. data/vendor/eigen/Eigen/Householder +0 -1
  40. data/vendor/eigen/Eigen/Jacobi +0 -1
  41. data/vendor/eigen/Eigen/KLUSupport +41 -0
  42. data/vendor/eigen/Eigen/LU +2 -5
  43. data/vendor/eigen/Eigen/OrderingMethods +0 -3
  44. data/vendor/eigen/Eigen/PaStiXSupport +1 -0
  45. data/vendor/eigen/Eigen/PardisoSupport +0 -0
  46. data/vendor/eigen/Eigen/QR +2 -3
  47. data/vendor/eigen/Eigen/QtAlignedMalloc +0 -1
  48. data/vendor/eigen/Eigen/SVD +0 -1
  49. data/vendor/eigen/Eigen/Sparse +0 -2
  50. data/vendor/eigen/Eigen/SparseCholesky +0 -8
  51. data/vendor/eigen/Eigen/SparseLU +4 -0
  52. data/vendor/eigen/Eigen/SparseQR +0 -1
  53. data/vendor/eigen/Eigen/src/Cholesky/LDLT.h +42 -27
  54. data/vendor/eigen/Eigen/src/Cholesky/LLT.h +39 -23
  55. data/vendor/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +90 -47
  56. data/vendor/eigen/Eigen/src/Core/ArithmeticSequence.h +413 -0
  57. data/vendor/eigen/Eigen/src/Core/Array.h +99 -11
  58. data/vendor/eigen/Eigen/src/Core/ArrayBase.h +3 -3
  59. data/vendor/eigen/Eigen/src/Core/ArrayWrapper.h +21 -21
  60. data/vendor/eigen/Eigen/src/Core/Assign.h +1 -1
  61. data/vendor/eigen/Eigen/src/Core/AssignEvaluator.h +125 -50
  62. data/vendor/eigen/Eigen/src/Core/Assign_MKL.h +10 -10
  63. data/vendor/eigen/Eigen/src/Core/BandMatrix.h +16 -16
  64. data/vendor/eigen/Eigen/src/Core/Block.h +56 -60
  65. data/vendor/eigen/Eigen/src/Core/BooleanRedux.h +29 -31
  66. data/vendor/eigen/Eigen/src/Core/CommaInitializer.h +7 -3
  67. data/vendor/eigen/Eigen/src/Core/CoreEvaluators.h +325 -272
  68. data/vendor/eigen/Eigen/src/Core/CoreIterators.h +5 -0
  69. data/vendor/eigen/Eigen/src/Core/CwiseBinaryOp.h +21 -22
  70. data/vendor/eigen/Eigen/src/Core/CwiseNullaryOp.h +153 -18
  71. data/vendor/eigen/Eigen/src/Core/CwiseUnaryOp.h +6 -6
  72. data/vendor/eigen/Eigen/src/Core/CwiseUnaryView.h +14 -10
  73. data/vendor/eigen/Eigen/src/Core/DenseBase.h +132 -42
  74. data/vendor/eigen/Eigen/src/Core/DenseCoeffsBase.h +25 -21
  75. data/vendor/eigen/Eigen/src/Core/DenseStorage.h +153 -71
  76. data/vendor/eigen/Eigen/src/Core/Diagonal.h +21 -23
  77. data/vendor/eigen/Eigen/src/Core/DiagonalMatrix.h +50 -2
  78. data/vendor/eigen/Eigen/src/Core/DiagonalProduct.h +1 -1
  79. data/vendor/eigen/Eigen/src/Core/Dot.h +10 -10
  80. data/vendor/eigen/Eigen/src/Core/EigenBase.h +10 -9
  81. data/vendor/eigen/Eigen/src/Core/ForceAlignedAccess.h +8 -4
  82. data/vendor/eigen/Eigen/src/Core/Fuzzy.h +3 -3
  83. data/vendor/eigen/Eigen/src/Core/GeneralProduct.h +20 -10
  84. data/vendor/eigen/Eigen/src/Core/GenericPacketMath.h +599 -152
  85. data/vendor/eigen/Eigen/src/Core/GlobalFunctions.h +40 -33
  86. data/vendor/eigen/Eigen/src/Core/IO.h +40 -7
  87. data/vendor/eigen/Eigen/src/Core/IndexedView.h +237 -0
  88. data/vendor/eigen/Eigen/src/Core/Inverse.h +9 -10
  89. data/vendor/eigen/Eigen/src/Core/Map.h +7 -7
  90. data/vendor/eigen/Eigen/src/Core/MapBase.h +10 -3
  91. data/vendor/eigen/Eigen/src/Core/MathFunctions.h +767 -125
  92. data/vendor/eigen/Eigen/src/Core/MathFunctionsImpl.h +118 -19
  93. data/vendor/eigen/Eigen/src/Core/Matrix.h +131 -25
  94. data/vendor/eigen/Eigen/src/Core/MatrixBase.h +21 -3
  95. data/vendor/eigen/Eigen/src/Core/NestByValue.h +25 -50
  96. data/vendor/eigen/Eigen/src/Core/NoAlias.h +4 -3
  97. data/vendor/eigen/Eigen/src/Core/NumTraits.h +107 -20
  98. data/vendor/eigen/Eigen/src/Core/PartialReduxEvaluator.h +232 -0
  99. data/vendor/eigen/Eigen/src/Core/PermutationMatrix.h +3 -31
  100. data/vendor/eigen/Eigen/src/Core/PlainObjectBase.h +152 -59
  101. data/vendor/eigen/Eigen/src/Core/Product.h +30 -25
  102. data/vendor/eigen/Eigen/src/Core/ProductEvaluators.h +192 -125
  103. data/vendor/eigen/Eigen/src/Core/Random.h +37 -1
  104. data/vendor/eigen/Eigen/src/Core/Redux.h +180 -170
  105. data/vendor/eigen/Eigen/src/Core/Ref.h +121 -23
  106. data/vendor/eigen/Eigen/src/Core/Replicate.h +8 -8
  107. data/vendor/eigen/Eigen/src/Core/Reshaped.h +454 -0
  108. data/vendor/eigen/Eigen/src/Core/ReturnByValue.h +7 -5
  109. data/vendor/eigen/Eigen/src/Core/Reverse.h +18 -12
  110. data/vendor/eigen/Eigen/src/Core/Select.h +8 -6
  111. data/vendor/eigen/Eigen/src/Core/SelfAdjointView.h +33 -20
  112. data/vendor/eigen/Eigen/src/Core/Solve.h +14 -14
  113. data/vendor/eigen/Eigen/src/Core/SolveTriangular.h +16 -16
  114. data/vendor/eigen/Eigen/src/Core/SolverBase.h +41 -3
  115. data/vendor/eigen/Eigen/src/Core/StableNorm.h +100 -70
  116. data/vendor/eigen/Eigen/src/Core/StlIterators.h +463 -0
  117. data/vendor/eigen/Eigen/src/Core/Stride.h +9 -4
  118. data/vendor/eigen/Eigen/src/Core/Swap.h +5 -4
  119. data/vendor/eigen/Eigen/src/Core/Transpose.h +88 -27
  120. data/vendor/eigen/Eigen/src/Core/Transpositions.h +26 -47
  121. data/vendor/eigen/Eigen/src/Core/TriangularMatrix.h +93 -75
  122. data/vendor/eigen/Eigen/src/Core/VectorBlock.h +5 -5
  123. data/vendor/eigen/Eigen/src/Core/VectorwiseOp.h +159 -70
  124. data/vendor/eigen/Eigen/src/Core/Visitor.h +137 -29
  125. data/vendor/eigen/Eigen/src/Core/arch/AVX/Complex.h +50 -129
  126. data/vendor/eigen/Eigen/src/Core/arch/AVX/MathFunctions.h +126 -337
  127. data/vendor/eigen/Eigen/src/Core/arch/AVX/PacketMath.h +1092 -155
  128. data/vendor/eigen/Eigen/src/Core/arch/AVX/TypeCasting.h +65 -1
  129. data/vendor/eigen/Eigen/src/Core/arch/AVX512/Complex.h +422 -0
  130. data/vendor/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h +207 -236
  131. data/vendor/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h +1482 -495
  132. data/vendor/eigen/Eigen/src/Core/arch/AVX512/TypeCasting.h +89 -0
  133. data/vendor/eigen/Eigen/src/Core/arch/AltiVec/Complex.h +152 -165
  134. data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MathFunctions.h +19 -251
  135. data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MatrixProduct.h +2937 -0
  136. data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h +221 -0
  137. data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h +629 -0
  138. data/vendor/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h +2042 -392
  139. data/vendor/eigen/Eigen/src/Core/arch/CUDA/Complex.h +235 -80
  140. data/vendor/eigen/Eigen/src/Core/arch/Default/BFloat16.h +700 -0
  141. data/vendor/eigen/Eigen/src/Core/arch/Default/ConjHelper.h +102 -14
  142. data/vendor/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +1649 -0
  143. data/vendor/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +110 -0
  144. data/vendor/eigen/Eigen/src/Core/arch/Default/Half.h +942 -0
  145. data/vendor/eigen/Eigen/src/Core/arch/Default/Settings.h +1 -1
  146. data/vendor/eigen/Eigen/src/Core/arch/Default/TypeCasting.h +120 -0
  147. data/vendor/eigen/Eigen/src/Core/arch/{CUDA → GPU}/MathFunctions.h +16 -4
  148. data/vendor/eigen/Eigen/src/Core/arch/GPU/PacketMath.h +1685 -0
  149. data/vendor/eigen/Eigen/src/Core/arch/GPU/TypeCasting.h +80 -0
  150. data/vendor/eigen/Eigen/src/Core/arch/HIP/hcc/math_constants.h +23 -0
  151. data/vendor/eigen/Eigen/src/Core/arch/MSA/Complex.h +648 -0
  152. data/vendor/eigen/Eigen/src/Core/arch/MSA/MathFunctions.h +387 -0
  153. data/vendor/eigen/Eigen/src/Core/arch/MSA/PacketMath.h +1233 -0
  154. data/vendor/eigen/Eigen/src/Core/arch/NEON/Complex.h +313 -219
  155. data/vendor/eigen/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h +183 -0
  156. data/vendor/eigen/Eigen/src/Core/arch/NEON/MathFunctions.h +54 -70
  157. data/vendor/eigen/Eigen/src/Core/arch/NEON/PacketMath.h +4376 -549
  158. data/vendor/eigen/Eigen/src/Core/arch/NEON/TypeCasting.h +1419 -0
  159. data/vendor/eigen/Eigen/src/Core/arch/SSE/Complex.h +59 -179
  160. data/vendor/eigen/Eigen/src/Core/arch/SSE/MathFunctions.h +65 -428
  161. data/vendor/eigen/Eigen/src/Core/arch/SSE/PacketMath.h +893 -283
  162. data/vendor/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h +65 -0
  163. data/vendor/eigen/Eigen/src/Core/arch/SVE/MathFunctions.h +44 -0
  164. data/vendor/eigen/Eigen/src/Core/arch/SVE/PacketMath.h +752 -0
  165. data/vendor/eigen/Eigen/src/Core/arch/SVE/TypeCasting.h +49 -0
  166. data/vendor/eigen/Eigen/src/Core/arch/SYCL/InteropHeaders.h +232 -0
  167. data/vendor/eigen/Eigen/src/Core/arch/SYCL/MathFunctions.h +301 -0
  168. data/vendor/eigen/Eigen/src/Core/arch/SYCL/PacketMath.h +670 -0
  169. data/vendor/eigen/Eigen/src/Core/arch/SYCL/SyclMemoryModel.h +694 -0
  170. data/vendor/eigen/Eigen/src/Core/arch/SYCL/TypeCasting.h +85 -0
  171. data/vendor/eigen/Eigen/src/Core/arch/ZVector/Complex.h +212 -183
  172. data/vendor/eigen/Eigen/src/Core/arch/ZVector/MathFunctions.h +101 -5
  173. data/vendor/eigen/Eigen/src/Core/arch/ZVector/PacketMath.h +510 -395
  174. data/vendor/eigen/Eigen/src/Core/functors/AssignmentFunctors.h +11 -2
  175. data/vendor/eigen/Eigen/src/Core/functors/BinaryFunctors.h +112 -46
  176. data/vendor/eigen/Eigen/src/Core/functors/NullaryFunctors.h +31 -30
  177. data/vendor/eigen/Eigen/src/Core/functors/StlFunctors.h +32 -2
  178. data/vendor/eigen/Eigen/src/Core/functors/UnaryFunctors.h +355 -16
  179. data/vendor/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h +1075 -586
  180. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h +49 -24
  181. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +41 -35
  182. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +6 -6
  183. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h +4 -2
  184. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixVector.h +382 -483
  185. data/vendor/eigen/Eigen/src/Core/products/Parallelizer.h +22 -5
  186. data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +53 -30
  187. data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h +16 -8
  188. data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h +8 -6
  189. data/vendor/eigen/Eigen/src/Core/products/SelfadjointProduct.h +4 -4
  190. data/vendor/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h +5 -4
  191. data/vendor/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h +33 -27
  192. data/vendor/eigen/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h +14 -12
  193. data/vendor/eigen/Eigen/src/Core/products/TriangularSolverMatrix.h +36 -34
  194. data/vendor/eigen/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h +8 -4
  195. data/vendor/eigen/Eigen/src/Core/products/TriangularSolverVector.h +13 -10
  196. data/vendor/eigen/Eigen/src/Core/util/BlasUtil.h +304 -119
  197. data/vendor/eigen/Eigen/src/Core/util/ConfigureVectorization.h +512 -0
  198. data/vendor/eigen/Eigen/src/Core/util/Constants.h +25 -9
  199. data/vendor/eigen/Eigen/src/Core/util/DisableStupidWarnings.h +26 -3
  200. data/vendor/eigen/Eigen/src/Core/util/ForwardDeclarations.h +29 -9
  201. data/vendor/eigen/Eigen/src/Core/util/IndexedViewHelper.h +186 -0
  202. data/vendor/eigen/Eigen/src/Core/util/IntegralConstant.h +272 -0
  203. data/vendor/eigen/Eigen/src/Core/util/MKL_support.h +8 -1
  204. data/vendor/eigen/Eigen/src/Core/util/Macros.h +709 -246
  205. data/vendor/eigen/Eigen/src/Core/util/Memory.h +222 -52
  206. data/vendor/eigen/Eigen/src/Core/util/Meta.h +355 -77
  207. data/vendor/eigen/Eigen/src/Core/util/ReenableStupidWarnings.h +5 -1
  208. data/vendor/eigen/Eigen/src/Core/util/ReshapedHelper.h +51 -0
  209. data/vendor/eigen/Eigen/src/Core/util/StaticAssert.h +8 -5
  210. data/vendor/eigen/Eigen/src/Core/util/SymbolicIndex.h +293 -0
  211. data/vendor/eigen/Eigen/src/Core/util/XprHelper.h +65 -30
  212. data/vendor/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +1 -1
  213. data/vendor/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +7 -4
  214. data/vendor/eigen/Eigen/src/Eigenvalues/EigenSolver.h +2 -2
  215. data/vendor/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +1 -1
  216. data/vendor/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +2 -2
  217. data/vendor/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +2 -2
  218. data/vendor/eigen/Eigen/src/Eigenvalues/RealQZ.h +9 -6
  219. data/vendor/eigen/Eigen/src/Eigenvalues/RealSchur.h +21 -9
  220. data/vendor/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +77 -43
  221. data/vendor/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +20 -15
  222. data/vendor/eigen/Eigen/src/Geometry/AlignedBox.h +99 -5
  223. data/vendor/eigen/Eigen/src/Geometry/AngleAxis.h +4 -4
  224. data/vendor/eigen/Eigen/src/Geometry/EulerAngles.h +3 -3
  225. data/vendor/eigen/Eigen/src/Geometry/Homogeneous.h +15 -11
  226. data/vendor/eigen/Eigen/src/Geometry/Hyperplane.h +1 -1
  227. data/vendor/eigen/Eigen/src/Geometry/OrthoMethods.h +3 -2
  228. data/vendor/eigen/Eigen/src/Geometry/ParametrizedLine.h +39 -2
  229. data/vendor/eigen/Eigen/src/Geometry/Quaternion.h +70 -14
  230. data/vendor/eigen/Eigen/src/Geometry/Rotation2D.h +3 -3
  231. data/vendor/eigen/Eigen/src/Geometry/Scaling.h +23 -5
  232. data/vendor/eigen/Eigen/src/Geometry/Transform.h +88 -67
  233. data/vendor/eigen/Eigen/src/Geometry/Translation.h +6 -12
  234. data/vendor/eigen/Eigen/src/Geometry/Umeyama.h +1 -1
  235. data/vendor/eigen/Eigen/src/Geometry/arch/Geometry_SIMD.h +168 -0
  236. data/vendor/eigen/Eigen/src/Householder/BlockHouseholder.h +9 -2
  237. data/vendor/eigen/Eigen/src/Householder/Householder.h +8 -4
  238. data/vendor/eigen/Eigen/src/Householder/HouseholderSequence.h +123 -48
  239. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +15 -15
  240. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +7 -23
  241. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +5 -22
  242. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +41 -47
  243. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +51 -60
  244. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +70 -20
  245. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +2 -20
  246. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +11 -9
  247. data/vendor/eigen/Eigen/src/Jacobi/Jacobi.h +31 -10
  248. data/vendor/eigen/Eigen/src/KLUSupport/KLUSupport.h +358 -0
  249. data/vendor/eigen/Eigen/src/LU/Determinant.h +35 -19
  250. data/vendor/eigen/Eigen/src/LU/FullPivLU.h +29 -43
  251. data/vendor/eigen/Eigen/src/LU/InverseImpl.h +25 -8
  252. data/vendor/eigen/Eigen/src/LU/PartialPivLU.h +71 -58
  253. data/vendor/eigen/Eigen/src/LU/arch/InverseSize4.h +351 -0
  254. data/vendor/eigen/Eigen/src/OrderingMethods/Amd.h +7 -17
  255. data/vendor/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +297 -277
  256. data/vendor/eigen/Eigen/src/OrderingMethods/Ordering.h +6 -10
  257. data/vendor/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +1 -1
  258. data/vendor/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +9 -7
  259. data/vendor/eigen/Eigen/src/QR/ColPivHouseholderQR.h +41 -20
  260. data/vendor/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +100 -27
  261. data/vendor/eigen/Eigen/src/QR/FullPivHouseholderQR.h +59 -22
  262. data/vendor/eigen/Eigen/src/QR/HouseholderQR.h +48 -23
  263. data/vendor/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +25 -3
  264. data/vendor/eigen/Eigen/src/SVD/BDCSVD.h +183 -63
  265. data/vendor/eigen/Eigen/src/SVD/JacobiSVD.h +22 -14
  266. data/vendor/eigen/Eigen/src/SVD/SVDBase.h +83 -22
  267. data/vendor/eigen/Eigen/src/SVD/UpperBidiagonalization.h +3 -3
  268. data/vendor/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +17 -9
  269. data/vendor/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +12 -37
  270. data/vendor/eigen/Eigen/src/SparseCore/AmbiVector.h +3 -2
  271. data/vendor/eigen/Eigen/src/SparseCore/CompressedStorage.h +16 -0
  272. data/vendor/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +6 -6
  273. data/vendor/eigen/Eigen/src/SparseCore/SparseAssign.h +81 -27
  274. data/vendor/eigen/Eigen/src/SparseCore/SparseBlock.h +25 -57
  275. data/vendor/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +40 -11
  276. data/vendor/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +11 -15
  277. data/vendor/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +4 -2
  278. data/vendor/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +30 -8
  279. data/vendor/eigen/Eigen/src/SparseCore/SparseMatrix.h +126 -11
  280. data/vendor/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +5 -12
  281. data/vendor/eigen/Eigen/src/SparseCore/SparseProduct.h +13 -1
  282. data/vendor/eigen/Eigen/src/SparseCore/SparseRef.h +7 -7
  283. data/vendor/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +5 -2
  284. data/vendor/eigen/Eigen/src/SparseCore/SparseUtil.h +8 -0
  285. data/vendor/eigen/Eigen/src/SparseCore/SparseVector.h +1 -1
  286. data/vendor/eigen/Eigen/src/SparseCore/SparseView.h +1 -0
  287. data/vendor/eigen/Eigen/src/SparseLU/SparseLU.h +162 -12
  288. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +1 -1
  289. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +76 -2
  290. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +2 -2
  291. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +1 -1
  292. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +1 -1
  293. data/vendor/eigen/Eigen/src/SparseQR/SparseQR.h +19 -6
  294. data/vendor/eigen/Eigen/src/StlSupport/StdDeque.h +2 -12
  295. data/vendor/eigen/Eigen/src/StlSupport/StdList.h +2 -2
  296. data/vendor/eigen/Eigen/src/StlSupport/StdVector.h +2 -2
  297. data/vendor/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +6 -8
  298. data/vendor/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +175 -39
  299. data/vendor/eigen/Eigen/src/misc/lapacke.h +5 -4
  300. data/vendor/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +28 -2
  301. data/vendor/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +155 -11
  302. data/vendor/eigen/Eigen/src/plugins/BlockMethods.h +626 -242
  303. data/vendor/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.h +14 -0
  304. data/vendor/eigen/Eigen/src/plugins/IndexedViewMethods.h +262 -0
  305. data/vendor/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +4 -4
  306. data/vendor/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +10 -0
  307. data/vendor/eigen/Eigen/src/plugins/ReshapedMethods.h +149 -0
  308. data/vendor/eigen/README.md +2 -0
  309. data/vendor/eigen/bench/btl/README +1 -1
  310. data/vendor/eigen/bench/tensors/README +6 -7
  311. data/vendor/eigen/ci/README.md +56 -0
  312. data/vendor/eigen/demos/mix_eigen_and_c/README +1 -1
  313. data/vendor/eigen/unsupported/Eigen/CXX11/src/Tensor/README.md +213 -158
  314. data/vendor/eigen/unsupported/README.txt +1 -1
  315. data/vendor/tomotopy/README.kr.rst +21 -0
  316. data/vendor/tomotopy/README.rst +20 -0
  317. data/vendor/tomotopy/src/Labeling/FoRelevance.cpp +2 -2
  318. data/vendor/tomotopy/src/Labeling/Phraser.hpp +1 -1
  319. data/vendor/tomotopy/src/TopicModel/CTModel.hpp +2 -1
  320. data/vendor/tomotopy/src/TopicModel/DMRModel.hpp +2 -1
  321. data/vendor/tomotopy/src/TopicModel/DTModel.hpp +1 -1
  322. data/vendor/tomotopy/src/TopicModel/GDMRModel.hpp +2 -2
  323. data/vendor/tomotopy/src/TopicModel/HDP.h +1 -0
  324. data/vendor/tomotopy/src/TopicModel/HDPModel.hpp +53 -2
  325. data/vendor/tomotopy/src/TopicModel/HLDAModel.hpp +1 -1
  326. data/vendor/tomotopy/src/TopicModel/HPAModel.hpp +1 -0
  327. data/vendor/tomotopy/src/TopicModel/LDACVB0Model.hpp +2 -2
  328. data/vendor/tomotopy/src/TopicModel/LDAModel.hpp +16 -5
  329. data/vendor/tomotopy/src/TopicModel/LLDAModel.hpp +1 -0
  330. data/vendor/tomotopy/src/TopicModel/MGLDAModel.hpp +1 -0
  331. data/vendor/tomotopy/src/TopicModel/PLDAModel.hpp +1 -0
  332. data/vendor/tomotopy/src/TopicModel/PT.h +3 -1
  333. data/vendor/tomotopy/src/TopicModel/PTModel.hpp +31 -1
  334. data/vendor/tomotopy/src/TopicModel/SLDAModel.hpp +2 -2
  335. data/vendor/tomotopy/src/TopicModel/TopicModel.hpp +7 -5
  336. data/vendor/tomotopy/src/Utils/EigenAddonOps.hpp +36 -1
  337. data/vendor/tomotopy/src/Utils/exception.h +6 -0
  338. data/vendor/tomotopy/src/Utils/sample.hpp +14 -12
  339. data/vendor/tomotopy/src/Utils/sse_gamma.h +0 -3
  340. metadata +60 -14
  341. data/vendor/eigen/Eigen/CMakeLists.txt +0 -19
  342. data/vendor/eigen/Eigen/src/Core/arch/CUDA/Half.h +0 -674
  343. data/vendor/eigen/Eigen/src/Core/arch/CUDA/PacketMath.h +0 -333
  344. data/vendor/eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +0 -1124
  345. data/vendor/eigen/Eigen/src/Core/arch/CUDA/TypeCasting.h +0 -212
  346. data/vendor/eigen/Eigen/src/Geometry/arch/Geometry_SSE.h +0 -161
  347. data/vendor/eigen/Eigen/src/LU/arch/Inverse_SSE.h +0 -338
@@ -0,0 +1,1233 @@
1
+ // This file is part of Eigen, a lightweight C++ template library
2
+ // for linear algebra.
3
+ //
4
+ // Copyright (C) 2018 Wave Computing, Inc.
5
+ // Written by:
6
+ // Chris Larsen
7
+ // Alexey Frunze (afrunze@wavecomp.com)
8
+ //
9
+ // This Source Code Form is subject to the terms of the Mozilla
10
+ // Public License v. 2.0. If a copy of the MPL was not distributed
11
+ // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
12
+
13
+ #ifndef EIGEN_PACKET_MATH_MSA_H
14
+ #define EIGEN_PACKET_MATH_MSA_H
15
+
16
+ #include <iostream>
17
+ #include <string>
18
+
19
+ namespace Eigen {
20
+
21
+ namespace internal {
22
+
23
+ #ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
24
+ #define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 8
25
+ #endif
26
+
27
+ #ifndef EIGEN_HAS_SINGLE_INSTRUCTION_MADD
28
+ #define EIGEN_HAS_SINGLE_INSTRUCTION_MADD
29
+ #endif
30
+
31
+ #ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS
32
+ #define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 32
33
+ #endif
34
+
35
+ #if 0
36
+ #define EIGEN_MSA_DEBUG \
37
+ static bool firstTime = true; \
38
+ do { \
39
+ if (firstTime) { \
40
+ std::cout << __FILE__ << ':' << __LINE__ << ':' << __FUNCTION__ << std::endl; \
41
+ firstTime = false; \
42
+ } \
43
+ } while (0)
44
+ #else
45
+ #define EIGEN_MSA_DEBUG
46
+ #endif
47
+
48
+ #define EIGEN_MSA_SHF_I8(a, b, c, d) (((d) << 6) | ((c) << 4) | ((b) << 2) | (a))
49
+
50
+ typedef v4f32 Packet4f;
51
+ typedef v4i32 Packet4i;
52
+ typedef v4u32 Packet4ui;
53
+
54
+ #define _EIGEN_DECLARE_CONST_Packet4f(NAME, X) const Packet4f p4f_##NAME = { X, X, X, X }
55
+ #define _EIGEN_DECLARE_CONST_Packet4i(NAME, X) const Packet4i p4i_##NAME = { X, X, X, X }
56
+ #define _EIGEN_DECLARE_CONST_Packet4ui(NAME, X) const Packet4ui p4ui_##NAME = { X, X, X, X }
57
+
58
+ inline std::ostream& operator<<(std::ostream& os, const Packet4f& value) {
59
+ os << "[ " << value[0] << ", " << value[1] << ", " << value[2] << ", " << value[3] << " ]";
60
+ return os;
61
+ }
62
+
63
+ inline std::ostream& operator<<(std::ostream& os, const Packet4i& value) {
64
+ os << "[ " << value[0] << ", " << value[1] << ", " << value[2] << ", " << value[3] << " ]";
65
+ return os;
66
+ }
67
+
68
+ inline std::ostream& operator<<(std::ostream& os, const Packet4ui& value) {
69
+ os << "[ " << value[0] << ", " << value[1] << ", " << value[2] << ", " << value[3] << " ]";
70
+ return os;
71
+ }
72
+
73
+ template <>
74
+ struct packet_traits<float> : default_packet_traits {
75
+ typedef Packet4f type;
76
+ typedef Packet4f half; // Packet2f intrinsics not implemented yet
77
+ enum {
78
+ Vectorizable = 1,
79
+ AlignedOnScalar = 1,
80
+ size = 4,
81
+ HasHalfPacket = 0, // Packet2f intrinsics not implemented yet
82
+ // FIXME check the Has*
83
+ HasDiv = 1,
84
+ HasSin = EIGEN_FAST_MATH,
85
+ HasCos = EIGEN_FAST_MATH,
86
+ HasTanh = EIGEN_FAST_MATH,
87
+ HasErf = EIGEN_FAST_MATH,
88
+ HasLog = 1,
89
+ HasExp = 1,
90
+ HasSqrt = 1,
91
+ HasRsqrt = 1,
92
+ HasRound = 1,
93
+ HasFloor = 1,
94
+ HasCeil = 1,
95
+ HasBlend = 1
96
+ };
97
+ };
98
+
99
+ template <>
100
+ struct packet_traits<int32_t> : default_packet_traits {
101
+ typedef Packet4i type;
102
+ typedef Packet4i half; // Packet2i intrinsics not implemented yet
103
+ enum {
104
+ Vectorizable = 1,
105
+ AlignedOnScalar = 1,
106
+ size = 4,
107
+ HasHalfPacket = 0, // Packet2i intrinsics not implemented yet
108
+ // FIXME check the Has*
109
+ HasDiv = 1,
110
+ HasBlend = 1
111
+ };
112
+ };
113
+
114
+ template <>
115
+ struct unpacket_traits<Packet4f> {
116
+ typedef float type;
117
+ enum { size = 4, alignment = Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false };
118
+ typedef Packet4f half;
119
+ };
120
+
121
+ template <>
122
+ struct unpacket_traits<Packet4i> {
123
+ typedef int32_t type;
124
+ enum { size = 4, alignment = Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false };
125
+ typedef Packet4i half;
126
+ };
127
+
128
+ template <>
129
+ EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) {
130
+ EIGEN_MSA_DEBUG;
131
+
132
+ Packet4f v = { from, from, from, from };
133
+ return v;
134
+ }
135
+
136
+ template <>
137
+ EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int32_t& from) {
138
+ EIGEN_MSA_DEBUG;
139
+
140
+ return __builtin_msa_fill_w(from);
141
+ }
142
+
143
+ template <>
144
+ EIGEN_STRONG_INLINE Packet4f pload1<Packet4f>(const float* from) {
145
+ EIGEN_MSA_DEBUG;
146
+
147
+ float f = *from;
148
+ Packet4f v = { f, f, f, f };
149
+ return v;
150
+ }
151
+
152
+ template <>
153
+ EIGEN_STRONG_INLINE Packet4i pload1<Packet4i>(const int32_t* from) {
154
+ EIGEN_MSA_DEBUG;
155
+
156
+ return __builtin_msa_fill_w(*from);
157
+ }
158
+
159
+ template <>
160
+ EIGEN_STRONG_INLINE Packet4f padd<Packet4f>(const Packet4f& a, const Packet4f& b) {
161
+ EIGEN_MSA_DEBUG;
162
+
163
+ return __builtin_msa_fadd_w(a, b);
164
+ }
165
+
166
+ template <>
167
+ EIGEN_STRONG_INLINE Packet4i padd<Packet4i>(const Packet4i& a, const Packet4i& b) {
168
+ EIGEN_MSA_DEBUG;
169
+
170
+ return __builtin_msa_addv_w(a, b);
171
+ }
172
+
173
+ template <>
174
+ EIGEN_STRONG_INLINE Packet4f plset<Packet4f>(const float& a) {
175
+ EIGEN_MSA_DEBUG;
176
+
177
+ static const Packet4f countdown = { 0.0f, 1.0f, 2.0f, 3.0f };
178
+ return padd(pset1<Packet4f>(a), countdown);
179
+ }
180
+
181
+ template <>
182
+ EIGEN_STRONG_INLINE Packet4i plset<Packet4i>(const int32_t& a) {
183
+ EIGEN_MSA_DEBUG;
184
+
185
+ static const Packet4i countdown = { 0, 1, 2, 3 };
186
+ return padd(pset1<Packet4i>(a), countdown);
187
+ }
188
+
189
+ template <>
190
+ EIGEN_STRONG_INLINE Packet4f psub<Packet4f>(const Packet4f& a, const Packet4f& b) {
191
+ EIGEN_MSA_DEBUG;
192
+
193
+ return __builtin_msa_fsub_w(a, b);
194
+ }
195
+
196
+ template <>
197
+ EIGEN_STRONG_INLINE Packet4i psub<Packet4i>(const Packet4i& a, const Packet4i& b) {
198
+ EIGEN_MSA_DEBUG;
199
+
200
+ return __builtin_msa_subv_w(a, b);
201
+ }
202
+
203
+ template <>
204
+ EIGEN_STRONG_INLINE Packet4f pnegate(const Packet4f& a) {
205
+ EIGEN_MSA_DEBUG;
206
+
207
+ return (Packet4f)__builtin_msa_bnegi_w((v4u32)a, 31);
208
+ }
209
+
210
+ template <>
211
+ EIGEN_STRONG_INLINE Packet4i pnegate(const Packet4i& a) {
212
+ EIGEN_MSA_DEBUG;
213
+
214
+ return __builtin_msa_addvi_w((v4i32)__builtin_msa_nori_b((v16u8)a, 0), 1);
215
+ }
216
+
217
+ template <>
218
+ EIGEN_STRONG_INLINE Packet4f pconj(const Packet4f& a) {
219
+ EIGEN_MSA_DEBUG;
220
+
221
+ return a;
222
+ }
223
+
224
+ template <>
225
+ EIGEN_STRONG_INLINE Packet4i pconj(const Packet4i& a) {
226
+ EIGEN_MSA_DEBUG;
227
+
228
+ return a;
229
+ }
230
+
231
+ template <>
232
+ EIGEN_STRONG_INLINE Packet4f pmul<Packet4f>(const Packet4f& a, const Packet4f& b) {
233
+ EIGEN_MSA_DEBUG;
234
+
235
+ return __builtin_msa_fmul_w(a, b);
236
+ }
237
+
238
+ template <>
239
+ EIGEN_STRONG_INLINE Packet4i pmul<Packet4i>(const Packet4i& a, const Packet4i& b) {
240
+ EIGEN_MSA_DEBUG;
241
+
242
+ return __builtin_msa_mulv_w(a, b);
243
+ }
244
+
245
+ template <>
246
+ EIGEN_STRONG_INLINE Packet4f pdiv<Packet4f>(const Packet4f& a, const Packet4f& b) {
247
+ EIGEN_MSA_DEBUG;
248
+
249
+ return __builtin_msa_fdiv_w(a, b);
250
+ }
251
+
252
+ template <>
253
+ EIGEN_STRONG_INLINE Packet4i pdiv<Packet4i>(const Packet4i& a, const Packet4i& b) {
254
+ EIGEN_MSA_DEBUG;
255
+
256
+ return __builtin_msa_div_s_w(a, b);
257
+ }
258
+
259
+ template <>
260
+ EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) {
261
+ EIGEN_MSA_DEBUG;
262
+
263
+ return __builtin_msa_fmadd_w(c, a, b);
264
+ }
265
+
266
+ template <>
267
+ EIGEN_STRONG_INLINE Packet4i pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) {
268
+ EIGEN_MSA_DEBUG;
269
+
270
+ // Use "asm" construct to avoid __builtin_msa_maddv_w GNU C bug.
271
+ Packet4i value = c;
272
+ __asm__("maddv.w %w[value], %w[a], %w[b]\n"
273
+ // Outputs
274
+ : [value] "+f"(value)
275
+ // Inputs
276
+ : [a] "f"(a), [b] "f"(b));
277
+ return value;
278
+ }
279
+
280
+ template <>
281
+ EIGEN_STRONG_INLINE Packet4f pand<Packet4f>(const Packet4f& a, const Packet4f& b) {
282
+ EIGEN_MSA_DEBUG;
283
+
284
+ return (Packet4f)__builtin_msa_and_v((v16u8)a, (v16u8)b);
285
+ }
286
+
287
+ template <>
288
+ EIGEN_STRONG_INLINE Packet4i pand<Packet4i>(const Packet4i& a, const Packet4i& b) {
289
+ EIGEN_MSA_DEBUG;
290
+
291
+ return (Packet4i)__builtin_msa_and_v((v16u8)a, (v16u8)b);
292
+ }
293
+
294
+ template <>
295
+ EIGEN_STRONG_INLINE Packet4f por<Packet4f>(const Packet4f& a, const Packet4f& b) {
296
+ EIGEN_MSA_DEBUG;
297
+
298
+ return (Packet4f)__builtin_msa_or_v((v16u8)a, (v16u8)b);
299
+ }
300
+
301
+ template <>
302
+ EIGEN_STRONG_INLINE Packet4i por<Packet4i>(const Packet4i& a, const Packet4i& b) {
303
+ EIGEN_MSA_DEBUG;
304
+
305
+ return (Packet4i)__builtin_msa_or_v((v16u8)a, (v16u8)b);
306
+ }
307
+
308
+ template <>
309
+ EIGEN_STRONG_INLINE Packet4f pxor<Packet4f>(const Packet4f& a, const Packet4f& b) {
310
+ EIGEN_MSA_DEBUG;
311
+
312
+ return (Packet4f)__builtin_msa_xor_v((v16u8)a, (v16u8)b);
313
+ }
314
+
315
+ template <>
316
+ EIGEN_STRONG_INLINE Packet4i pxor<Packet4i>(const Packet4i& a, const Packet4i& b) {
317
+ EIGEN_MSA_DEBUG;
318
+
319
+ return (Packet4i)__builtin_msa_xor_v((v16u8)a, (v16u8)b);
320
+ }
321
+
322
+ template <>
323
+ EIGEN_STRONG_INLINE Packet4f pandnot<Packet4f>(const Packet4f& a, const Packet4f& b) {
324
+ EIGEN_MSA_DEBUG;
325
+
326
+ return pand(a, (Packet4f)__builtin_msa_xori_b((v16u8)b, 255));
327
+ }
328
+
329
+ template <>
330
+ EIGEN_STRONG_INLINE Packet4i pandnot<Packet4i>(const Packet4i& a, const Packet4i& b) {
331
+ EIGEN_MSA_DEBUG;
332
+
333
+ return pand(a, (Packet4i)__builtin_msa_xori_b((v16u8)b, 255));
334
+ }
335
+
336
+ template <>
337
+ EIGEN_STRONG_INLINE Packet4f pmin<Packet4f>(const Packet4f& a, const Packet4f& b) {
338
+ EIGEN_MSA_DEBUG;
339
+
340
+ #if EIGEN_FAST_MATH
341
+ // This prefers numbers to NaNs.
342
+ return __builtin_msa_fmin_w(a, b);
343
+ #else
344
+ // This prefers NaNs to numbers.
345
+ Packet4i aNaN = __builtin_msa_fcun_w(a, a);
346
+ Packet4i aMinOrNaN = por(__builtin_msa_fclt_w(a, b), aNaN);
347
+ return (Packet4f)__builtin_msa_bsel_v((v16u8)aMinOrNaN, (v16u8)b, (v16u8)a);
348
+ #endif
349
+ }
350
+
351
+ template <>
352
+ EIGEN_STRONG_INLINE Packet4i pmin<Packet4i>(const Packet4i& a, const Packet4i& b) {
353
+ EIGEN_MSA_DEBUG;
354
+
355
+ return __builtin_msa_min_s_w(a, b);
356
+ }
357
+
358
+ template <>
359
+ EIGEN_STRONG_INLINE Packet4f pmax<Packet4f>(const Packet4f& a, const Packet4f& b) {
360
+ EIGEN_MSA_DEBUG;
361
+
362
+ #if EIGEN_FAST_MATH
363
+ // This prefers numbers to NaNs.
364
+ return __builtin_msa_fmax_w(a, b);
365
+ #else
366
+ // This prefers NaNs to numbers.
367
+ Packet4i aNaN = __builtin_msa_fcun_w(a, a);
368
+ Packet4i aMaxOrNaN = por(__builtin_msa_fclt_w(b, a), aNaN);
369
+ return (Packet4f)__builtin_msa_bsel_v((v16u8)aMaxOrNaN, (v16u8)b, (v16u8)a);
370
+ #endif
371
+ }
372
+
373
+ template <>
374
+ EIGEN_STRONG_INLINE Packet4i pmax<Packet4i>(const Packet4i& a, const Packet4i& b) {
375
+ EIGEN_MSA_DEBUG;
376
+
377
+ return __builtin_msa_max_s_w(a, b);
378
+ }
379
+
380
+ template <>
381
+ EIGEN_STRONG_INLINE Packet4f pload<Packet4f>(const float* from) {
382
+ EIGEN_MSA_DEBUG;
383
+
384
+ EIGEN_DEBUG_ALIGNED_LOAD return (Packet4f)__builtin_msa_ld_w(const_cast<float*>(from), 0);
385
+ }
386
+
387
+ template <>
388
+ EIGEN_STRONG_INLINE Packet4i pload<Packet4i>(const int32_t* from) {
389
+ EIGEN_MSA_DEBUG;
390
+
391
+ EIGEN_DEBUG_ALIGNED_LOAD return __builtin_msa_ld_w(const_cast<int32_t*>(from), 0);
392
+ }
393
+
394
+ template <>
395
+ EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float* from) {
396
+ EIGEN_MSA_DEBUG;
397
+
398
+ EIGEN_DEBUG_UNALIGNED_LOAD return (Packet4f)__builtin_msa_ld_w(const_cast<float*>(from), 0);
399
+ }
400
+
401
+ template <>
402
+ EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int32_t* from) {
403
+ EIGEN_MSA_DEBUG;
404
+
405
+ EIGEN_DEBUG_UNALIGNED_LOAD return (Packet4i)__builtin_msa_ld_w(const_cast<int32_t*>(from), 0);
406
+ }
407
+
408
+ template <>
409
+ EIGEN_STRONG_INLINE Packet4f ploaddup<Packet4f>(const float* from) {
410
+ EIGEN_MSA_DEBUG;
411
+
412
+ float f0 = from[0], f1 = from[1];
413
+ Packet4f v0 = { f0, f0, f0, f0 };
414
+ Packet4f v1 = { f1, f1, f1, f1 };
415
+ return (Packet4f)__builtin_msa_ilvr_d((v2i64)v1, (v2i64)v0);
416
+ }
417
+
418
+ template <>
419
+ EIGEN_STRONG_INLINE Packet4i ploaddup<Packet4i>(const int32_t* from) {
420
+ EIGEN_MSA_DEBUG;
421
+
422
+ int32_t i0 = from[0], i1 = from[1];
423
+ Packet4i v0 = { i0, i0, i0, i0 };
424
+ Packet4i v1 = { i1, i1, i1, i1 };
425
+ return (Packet4i)__builtin_msa_ilvr_d((v2i64)v1, (v2i64)v0);
426
+ }
427
+
428
+ template <>
429
+ EIGEN_STRONG_INLINE void pstore<float>(float* to, const Packet4f& from) {
430
+ EIGEN_MSA_DEBUG;
431
+
432
+ EIGEN_DEBUG_ALIGNED_STORE __builtin_msa_st_w((Packet4i)from, to, 0);
433
+ }
434
+
435
+ template <>
436
+ EIGEN_STRONG_INLINE void pstore<int32_t>(int32_t* to, const Packet4i& from) {
437
+ EIGEN_MSA_DEBUG;
438
+
439
+ EIGEN_DEBUG_ALIGNED_STORE __builtin_msa_st_w(from, to, 0);
440
+ }
441
+
442
+ template <>
443
+ EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet4f& from) {
444
+ EIGEN_MSA_DEBUG;
445
+
446
+ EIGEN_DEBUG_UNALIGNED_STORE __builtin_msa_st_w((Packet4i)from, to, 0);
447
+ }
448
+
449
+ template <>
450
+ EIGEN_STRONG_INLINE void pstoreu<int32_t>(int32_t* to, const Packet4i& from) {
451
+ EIGEN_MSA_DEBUG;
452
+
453
+ EIGEN_DEBUG_UNALIGNED_STORE __builtin_msa_st_w(from, to, 0);
454
+ }
455
+
456
+ template <>
457
+ EIGEN_DEVICE_FUNC inline Packet4f pgather<float, Packet4f>(const float* from, Index stride) {
458
+ EIGEN_MSA_DEBUG;
459
+
460
+ float f = *from;
461
+ Packet4f v = { f, f, f, f };
462
+ v[1] = from[stride];
463
+ v[2] = from[2 * stride];
464
+ v[3] = from[3 * stride];
465
+ return v;
466
+ }
467
+
468
+ template <>
469
+ EIGEN_DEVICE_FUNC inline Packet4i pgather<int32_t, Packet4i>(const int32_t* from, Index stride) {
470
+ EIGEN_MSA_DEBUG;
471
+
472
+ int32_t i = *from;
473
+ Packet4i v = { i, i, i, i };
474
+ v[1] = from[stride];
475
+ v[2] = from[2 * stride];
476
+ v[3] = from[3 * stride];
477
+ return v;
478
+ }
479
+
480
+ template <>
481
+ EIGEN_DEVICE_FUNC inline void pscatter<float, Packet4f>(float* to, const Packet4f& from,
482
+ Index stride) {
483
+ EIGEN_MSA_DEBUG;
484
+
485
+ *to = from[0];
486
+ to += stride;
487
+ *to = from[1];
488
+ to += stride;
489
+ *to = from[2];
490
+ to += stride;
491
+ *to = from[3];
492
+ }
493
+
494
+ template <>
495
+ EIGEN_DEVICE_FUNC inline void pscatter<int32_t, Packet4i>(int32_t* to, const Packet4i& from,
496
+ Index stride) {
497
+ EIGEN_MSA_DEBUG;
498
+
499
+ *to = from[0];
500
+ to += stride;
501
+ *to = from[1];
502
+ to += stride;
503
+ *to = from[2];
504
+ to += stride;
505
+ *to = from[3];
506
+ }
507
+
508
+ template <>
509
+ EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) {
510
+ EIGEN_MSA_DEBUG;
511
+
512
+ __builtin_prefetch(addr);
513
+ }
514
+
515
+ template <>
516
+ EIGEN_STRONG_INLINE void prefetch<int32_t>(const int32_t* addr) {
517
+ EIGEN_MSA_DEBUG;
518
+
519
+ __builtin_prefetch(addr);
520
+ }
521
+
522
+ template <>
523
+ EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) {
524
+ EIGEN_MSA_DEBUG;
525
+
526
+ return a[0];
527
+ }
528
+
529
+ template <>
530
+ EIGEN_STRONG_INLINE int32_t pfirst<Packet4i>(const Packet4i& a) {
531
+ EIGEN_MSA_DEBUG;
532
+
533
+ return a[0];
534
+ }
535
+
536
+ template <>
537
+ EIGEN_STRONG_INLINE Packet4f preverse(const Packet4f& a) {
538
+ EIGEN_MSA_DEBUG;
539
+
540
+ return (Packet4f)__builtin_msa_shf_w((v4i32)a, EIGEN_MSA_SHF_I8(3, 2, 1, 0));
541
+ }
542
+
543
+ template <>
544
+ EIGEN_STRONG_INLINE Packet4i preverse(const Packet4i& a) {
545
+ EIGEN_MSA_DEBUG;
546
+
547
+ return __builtin_msa_shf_w(a, EIGEN_MSA_SHF_I8(3, 2, 1, 0));
548
+ }
549
+
550
+ template <>
551
+ EIGEN_STRONG_INLINE Packet4f pabs(const Packet4f& a) {
552
+ EIGEN_MSA_DEBUG;
553
+
554
+ return (Packet4f)__builtin_msa_bclri_w((v4u32)a, 31);
555
+ }
556
+
557
+ template <>
558
+ EIGEN_STRONG_INLINE Packet4i pabs(const Packet4i& a) {
559
+ EIGEN_MSA_DEBUG;
560
+
561
+ Packet4i zero = __builtin_msa_ldi_w(0);
562
+ return __builtin_msa_add_a_w(zero, a);
563
+ }
564
+
565
+ template <>
566
+ EIGEN_STRONG_INLINE float predux<Packet4f>(const Packet4f& a) {
567
+ EIGEN_MSA_DEBUG;
568
+
569
+ Packet4f s = padd(a, (Packet4f)__builtin_msa_shf_w((v4i32)a, EIGEN_MSA_SHF_I8(2, 3, 0, 1)));
570
+ s = padd(s, (Packet4f)__builtin_msa_shf_w((v4i32)s, EIGEN_MSA_SHF_I8(1, 0, 3, 2)));
571
+ return s[0];
572
+ }
573
+
574
+
575
+ template <>
576
+ EIGEN_STRONG_INLINE int32_t predux<Packet4i>(const Packet4i& a) {
577
+ EIGEN_MSA_DEBUG;
578
+
579
+ Packet4i s = padd(a, __builtin_msa_shf_w(a, EIGEN_MSA_SHF_I8(2, 3, 0, 1)));
580
+ s = padd(s, __builtin_msa_shf_w(s, EIGEN_MSA_SHF_I8(1, 0, 3, 2)));
581
+ return s[0];
582
+ }
583
+
584
+ // Other reduction functions:
585
+ // mul
586
+ template <>
587
+ EIGEN_STRONG_INLINE float predux_mul<Packet4f>(const Packet4f& a) {
588
+ EIGEN_MSA_DEBUG;
589
+
590
+ Packet4f p = pmul(a, (Packet4f)__builtin_msa_shf_w((v4i32)a, EIGEN_MSA_SHF_I8(2, 3, 0, 1)));
591
+ p = pmul(p, (Packet4f)__builtin_msa_shf_w((v4i32)p, EIGEN_MSA_SHF_I8(1, 0, 3, 2)));
592
+ return p[0];
593
+ }
594
+
595
+ template <>
596
+ EIGEN_STRONG_INLINE int32_t predux_mul<Packet4i>(const Packet4i& a) {
597
+ EIGEN_MSA_DEBUG;
598
+
599
+ Packet4i p = pmul(a, __builtin_msa_shf_w(a, EIGEN_MSA_SHF_I8(2, 3, 0, 1)));
600
+ p = pmul(p, __builtin_msa_shf_w(p, EIGEN_MSA_SHF_I8(1, 0, 3, 2)));
601
+ return p[0];
602
+ }
603
+
604
+ // min
605
+ template <>
606
+ EIGEN_STRONG_INLINE float predux_min<Packet4f>(const Packet4f& a) {
607
+ EIGEN_MSA_DEBUG;
608
+
609
+ // Swap 64-bit halves of a.
610
+ Packet4f swapped = (Packet4f)__builtin_msa_shf_w((Packet4i)a, EIGEN_MSA_SHF_I8(2, 3, 0, 1));
611
+ #if !EIGEN_FAST_MATH
612
+ // Detect presence of NaNs from pairs a[0]-a[2] and a[1]-a[3] as two 32-bit
613
+ // masks of all zeroes/ones in low 64 bits.
614
+ v16u8 unord = (v16u8)__builtin_msa_fcun_w(a, swapped);
615
+ // Combine the two masks into one: 64 ones if no NaNs, otherwise 64 zeroes.
616
+ unord = (v16u8)__builtin_msa_ceqi_d((v2i64)unord, 0);
617
+ #endif
618
+ // Continue with min computation.
619
+ Packet4f v = __builtin_msa_fmin_w(a, swapped);
620
+ v = __builtin_msa_fmin_w(
621
+ v, (Packet4f)__builtin_msa_shf_w((Packet4i)v, EIGEN_MSA_SHF_I8(1, 0, 3, 2)));
622
+ #if !EIGEN_FAST_MATH
623
+ // Based on the mask select between v and 4 qNaNs.
624
+ v16u8 qnans = (v16u8)__builtin_msa_fill_w(0x7FC00000);
625
+ v = (Packet4f)__builtin_msa_bsel_v(unord, qnans, (v16u8)v);
626
+ #endif
627
+ return v[0];
628
+ }
629
+
630
+ template <>
631
+ EIGEN_STRONG_INLINE int32_t predux_min<Packet4i>(const Packet4i& a) {
632
+ EIGEN_MSA_DEBUG;
633
+
634
+ Packet4i m = pmin(a, __builtin_msa_shf_w(a, EIGEN_MSA_SHF_I8(2, 3, 0, 1)));
635
+ m = pmin(m, __builtin_msa_shf_w(m, EIGEN_MSA_SHF_I8(1, 0, 3, 2)));
636
+ return m[0];
637
+ }
638
+
639
+ // max
640
+ template <>
641
+ EIGEN_STRONG_INLINE float predux_max<Packet4f>(const Packet4f& a) {
642
+ EIGEN_MSA_DEBUG;
643
+
644
+ // Swap 64-bit halves of a.
645
+ Packet4f swapped = (Packet4f)__builtin_msa_shf_w((Packet4i)a, EIGEN_MSA_SHF_I8(2, 3, 0, 1));
646
+ #if !EIGEN_FAST_MATH
647
+ // Detect presence of NaNs from pairs a[0]-a[2] and a[1]-a[3] as two 32-bit
648
+ // masks of all zeroes/ones in low 64 bits.
649
+ v16u8 unord = (v16u8)__builtin_msa_fcun_w(a, swapped);
650
+ // Combine the two masks into one: 64 ones if no NaNs, otherwise 64 zeroes.
651
+ unord = (v16u8)__builtin_msa_ceqi_d((v2i64)unord, 0);
652
+ #endif
653
+ // Continue with max computation.
654
+ Packet4f v = __builtin_msa_fmax_w(a, swapped);
655
+ v = __builtin_msa_fmax_w(
656
+ v, (Packet4f)__builtin_msa_shf_w((Packet4i)v, EIGEN_MSA_SHF_I8(1, 0, 3, 2)));
657
+ #if !EIGEN_FAST_MATH
658
+ // Based on the mask select between v and 4 qNaNs.
659
+ v16u8 qnans = (v16u8)__builtin_msa_fill_w(0x7FC00000);
660
+ v = (Packet4f)__builtin_msa_bsel_v(unord, qnans, (v16u8)v);
661
+ #endif
662
+ return v[0];
663
+ }
664
+
665
+ template <>
666
+ EIGEN_STRONG_INLINE int32_t predux_max<Packet4i>(const Packet4i& a) {
667
+ EIGEN_MSA_DEBUG;
668
+
669
+ Packet4i m = pmax(a, __builtin_msa_shf_w(a, EIGEN_MSA_SHF_I8(2, 3, 0, 1)));
670
+ m = pmax(m, __builtin_msa_shf_w(m, EIGEN_MSA_SHF_I8(1, 0, 3, 2)));
671
+ return m[0];
672
+ }
673
+
674
+ inline std::ostream& operator<<(std::ostream& os, const PacketBlock<Packet4f, 4>& value) {
675
+ os << "[ " << value.packet[0] << "," << std::endl
676
+ << " " << value.packet[1] << "," << std::endl
677
+ << " " << value.packet[2] << "," << std::endl
678
+ << " " << value.packet[3] << " ]";
679
+ return os;
680
+ }
681
+
682
+ EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet4f, 4>& kernel) {
683
+ EIGEN_MSA_DEBUG;
684
+
685
+ v4i32 tmp1, tmp2, tmp3, tmp4;
686
+
687
+ tmp1 = __builtin_msa_ilvr_w((v4i32)kernel.packet[1], (v4i32)kernel.packet[0]);
688
+ tmp2 = __builtin_msa_ilvr_w((v4i32)kernel.packet[3], (v4i32)kernel.packet[2]);
689
+ tmp3 = __builtin_msa_ilvl_w((v4i32)kernel.packet[1], (v4i32)kernel.packet[0]);
690
+ tmp4 = __builtin_msa_ilvl_w((v4i32)kernel.packet[3], (v4i32)kernel.packet[2]);
691
+
692
+ kernel.packet[0] = (Packet4f)__builtin_msa_ilvr_d((v2i64)tmp2, (v2i64)tmp1);
693
+ kernel.packet[1] = (Packet4f)__builtin_msa_ilvod_d((v2i64)tmp2, (v2i64)tmp1);
694
+ kernel.packet[2] = (Packet4f)__builtin_msa_ilvr_d((v2i64)tmp4, (v2i64)tmp3);
695
+ kernel.packet[3] = (Packet4f)__builtin_msa_ilvod_d((v2i64)tmp4, (v2i64)tmp3);
696
+ }
697
+
698
+ inline std::ostream& operator<<(std::ostream& os, const PacketBlock<Packet4i, 4>& value) {
699
+ os << "[ " << value.packet[0] << "," << std::endl
700
+ << " " << value.packet[1] << "," << std::endl
701
+ << " " << value.packet[2] << "," << std::endl
702
+ << " " << value.packet[3] << " ]";
703
+ return os;
704
+ }
705
+
706
+ EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet4i, 4>& kernel) {
707
+ EIGEN_MSA_DEBUG;
708
+
709
+ v4i32 tmp1, tmp2, tmp3, tmp4;
710
+
711
+ tmp1 = __builtin_msa_ilvr_w(kernel.packet[1], kernel.packet[0]);
712
+ tmp2 = __builtin_msa_ilvr_w(kernel.packet[3], kernel.packet[2]);
713
+ tmp3 = __builtin_msa_ilvl_w(kernel.packet[1], kernel.packet[0]);
714
+ tmp4 = __builtin_msa_ilvl_w(kernel.packet[3], kernel.packet[2]);
715
+
716
+ kernel.packet[0] = (Packet4i)__builtin_msa_ilvr_d((v2i64)tmp2, (v2i64)tmp1);
717
+ kernel.packet[1] = (Packet4i)__builtin_msa_ilvod_d((v2i64)tmp2, (v2i64)tmp1);
718
+ kernel.packet[2] = (Packet4i)__builtin_msa_ilvr_d((v2i64)tmp4, (v2i64)tmp3);
719
+ kernel.packet[3] = (Packet4i)__builtin_msa_ilvod_d((v2i64)tmp4, (v2i64)tmp3);
720
+ }
721
+
722
+ template <>
723
+ EIGEN_STRONG_INLINE Packet4f psqrt(const Packet4f& a) {
724
+ EIGEN_MSA_DEBUG;
725
+
726
+ return __builtin_msa_fsqrt_w(a);
727
+ }
728
+
729
+ template <>
730
+ EIGEN_STRONG_INLINE Packet4f prsqrt(const Packet4f& a) {
731
+ EIGEN_MSA_DEBUG;
732
+
733
+ #if EIGEN_FAST_MATH
734
+ return __builtin_msa_frsqrt_w(a);
735
+ #else
736
+ Packet4f ones = __builtin_msa_ffint_s_w(__builtin_msa_ldi_w(1));
737
+ return pdiv(ones, psqrt(a));
738
+ #endif
739
+ }
740
+
741
+ template <>
742
+ EIGEN_STRONG_INLINE Packet4f pfloor<Packet4f>(const Packet4f& a) {
743
+ Packet4f v = a;
744
+ int32_t old_mode, new_mode;
745
+ asm volatile(
746
+ "cfcmsa %[old_mode], $1\n"
747
+ "ori %[new_mode], %[old_mode], 3\n" // 3 = round towards -INFINITY.
748
+ "ctcmsa $1, %[new_mode]\n"
749
+ "frint.w %w[v], %w[v]\n"
750
+ "ctcmsa $1, %[old_mode]\n"
751
+ : // outputs
752
+ [old_mode] "=r"(old_mode), [new_mode] "=r"(new_mode),
753
+ [v] "+f"(v)
754
+ : // inputs
755
+ : // clobbers
756
+ );
757
+ return v;
758
+ }
759
+
760
+ template <>
761
+ EIGEN_STRONG_INLINE Packet4f pceil<Packet4f>(const Packet4f& a) {
762
+ Packet4f v = a;
763
+ int32_t old_mode, new_mode;
764
+ asm volatile(
765
+ "cfcmsa %[old_mode], $1\n"
766
+ "ori %[new_mode], %[old_mode], 3\n"
767
+ "xori %[new_mode], %[new_mode], 1\n" // 2 = round towards +INFINITY.
768
+ "ctcmsa $1, %[new_mode]\n"
769
+ "frint.w %w[v], %w[v]\n"
770
+ "ctcmsa $1, %[old_mode]\n"
771
+ : // outputs
772
+ [old_mode] "=r"(old_mode), [new_mode] "=r"(new_mode),
773
+ [v] "+f"(v)
774
+ : // inputs
775
+ : // clobbers
776
+ );
777
+ return v;
778
+ }
779
+
780
+ template <>
781
+ EIGEN_STRONG_INLINE Packet4f pround<Packet4f>(const Packet4f& a) {
782
+ Packet4f v = a;
783
+ int32_t old_mode, new_mode;
784
+ asm volatile(
785
+ "cfcmsa %[old_mode], $1\n"
786
+ "ori %[new_mode], %[old_mode], 3\n"
787
+ "xori %[new_mode], %[new_mode], 3\n" // 0 = round to nearest, ties to even.
788
+ "ctcmsa $1, %[new_mode]\n"
789
+ "frint.w %w[v], %w[v]\n"
790
+ "ctcmsa $1, %[old_mode]\n"
791
+ : // outputs
792
+ [old_mode] "=r"(old_mode), [new_mode] "=r"(new_mode),
793
+ [v] "+f"(v)
794
+ : // inputs
795
+ : // clobbers
796
+ );
797
+ return v;
798
+ }
799
+
800
+ template <>
801
+ EIGEN_STRONG_INLINE Packet4f pblend(const Selector<4>& ifPacket, const Packet4f& thenPacket,
802
+ const Packet4f& elsePacket) {
803
+ Packet4ui select = { ifPacket.select[0], ifPacket.select[1], ifPacket.select[2],
804
+ ifPacket.select[3] };
805
+ Packet4i mask = __builtin_msa_ceqi_w((Packet4i)select, 0);
806
+ return (Packet4f)__builtin_msa_bsel_v((v16u8)mask, (v16u8)thenPacket, (v16u8)elsePacket);
807
+ }
808
+
809
+ template <>
810
+ EIGEN_STRONG_INLINE Packet4i pblend(const Selector<4>& ifPacket, const Packet4i& thenPacket,
811
+ const Packet4i& elsePacket) {
812
+ Packet4ui select = { ifPacket.select[0], ifPacket.select[1], ifPacket.select[2],
813
+ ifPacket.select[3] };
814
+ Packet4i mask = __builtin_msa_ceqi_w((Packet4i)select, 0);
815
+ return (Packet4i)__builtin_msa_bsel_v((v16u8)mask, (v16u8)thenPacket, (v16u8)elsePacket);
816
+ }
817
+
818
+ //---------- double ----------
819
+
820
+ typedef v2f64 Packet2d;
821
+ typedef v2i64 Packet2l;
822
+ typedef v2u64 Packet2ul;
823
+
824
+ #define _EIGEN_DECLARE_CONST_Packet2d(NAME, X) const Packet2d p2d_##NAME = { X, X }
825
+ #define _EIGEN_DECLARE_CONST_Packet2l(NAME, X) const Packet2l p2l_##NAME = { X, X }
826
+ #define _EIGEN_DECLARE_CONST_Packet2ul(NAME, X) const Packet2ul p2ul_##NAME = { X, X }
827
+
828
+ inline std::ostream& operator<<(std::ostream& os, const Packet2d& value) {
829
+ os << "[ " << value[0] << ", " << value[1] << " ]";
830
+ return os;
831
+ }
832
+
833
+ inline std::ostream& operator<<(std::ostream& os, const Packet2l& value) {
834
+ os << "[ " << value[0] << ", " << value[1] << " ]";
835
+ return os;
836
+ }
837
+
838
+ inline std::ostream& operator<<(std::ostream& os, const Packet2ul& value) {
839
+ os << "[ " << value[0] << ", " << value[1] << " ]";
840
+ return os;
841
+ }
842
+
843
+ template <>
844
+ struct packet_traits<double> : default_packet_traits {
845
+ typedef Packet2d type;
846
+ typedef Packet2d half;
847
+ enum {
848
+ Vectorizable = 1,
849
+ AlignedOnScalar = 1,
850
+ size = 2,
851
+ HasHalfPacket = 0,
852
+ // FIXME check the Has*
853
+ HasDiv = 1,
854
+ HasExp = 1,
855
+ HasSqrt = 1,
856
+ HasRsqrt = 1,
857
+ HasRound = 1,
858
+ HasFloor = 1,
859
+ HasCeil = 1,
860
+ HasBlend = 1
861
+ };
862
+ };
863
+
864
+ template <>
865
+ struct unpacket_traits<Packet2d> {
866
+ typedef double type;
867
+ enum { size = 2, alignment = Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false };
868
+ typedef Packet2d half;
869
+ };
870
+
871
+ template <>
872
+ EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) {
873
+ EIGEN_MSA_DEBUG;
874
+
875
+ Packet2d value = { from, from };
876
+ return value;
877
+ }
878
+
879
+ template <>
880
+ EIGEN_STRONG_INLINE Packet2d padd<Packet2d>(const Packet2d& a, const Packet2d& b) {
881
+ EIGEN_MSA_DEBUG;
882
+
883
+ return __builtin_msa_fadd_d(a, b);
884
+ }
885
+
886
+ template <>
887
+ EIGEN_STRONG_INLINE Packet2d plset<Packet2d>(const double& a) {
888
+ EIGEN_MSA_DEBUG;
889
+
890
+ static const Packet2d countdown = { 0.0, 1.0 };
891
+ return padd(pset1<Packet2d>(a), countdown);
892
+ }
893
+
894
+ template <>
895
+ EIGEN_STRONG_INLINE Packet2d psub<Packet2d>(const Packet2d& a, const Packet2d& b) {
896
+ EIGEN_MSA_DEBUG;
897
+
898
+ return __builtin_msa_fsub_d(a, b);
899
+ }
900
+
901
+ template <>
902
+ EIGEN_STRONG_INLINE Packet2d pnegate(const Packet2d& a) {
903
+ EIGEN_MSA_DEBUG;
904
+
905
+ return (Packet2d)__builtin_msa_bnegi_d((v2u64)a, 63);
906
+ }
907
+
908
+ template <>
909
+ EIGEN_STRONG_INLINE Packet2d pconj(const Packet2d& a) {
910
+ EIGEN_MSA_DEBUG;
911
+
912
+ return a;
913
+ }
914
+
915
+ template <>
916
+ EIGEN_STRONG_INLINE Packet2d pmul<Packet2d>(const Packet2d& a, const Packet2d& b) {
917
+ EIGEN_MSA_DEBUG;
918
+
919
+ return __builtin_msa_fmul_d(a, b);
920
+ }
921
+
922
+ template <>
923
+ EIGEN_STRONG_INLINE Packet2d pdiv<Packet2d>(const Packet2d& a, const Packet2d& b) {
924
+ EIGEN_MSA_DEBUG;
925
+
926
+ return __builtin_msa_fdiv_d(a, b);
927
+ }
928
+
929
+ template <>
930
+ EIGEN_STRONG_INLINE Packet2d pmadd(const Packet2d& a, const Packet2d& b, const Packet2d& c) {
931
+ EIGEN_MSA_DEBUG;
932
+
933
+ return __builtin_msa_fmadd_d(c, a, b);
934
+ }
935
+
936
+ // Logical Operations are not supported for float, so we have to reinterpret casts using MSA
937
+ // intrinsics
938
+ template <>
939
+ EIGEN_STRONG_INLINE Packet2d pand<Packet2d>(const Packet2d& a, const Packet2d& b) {
940
+ EIGEN_MSA_DEBUG;
941
+
942
+ return (Packet2d)__builtin_msa_and_v((v16u8)a, (v16u8)b);
943
+ }
944
+
945
+ template <>
946
+ EIGEN_STRONG_INLINE Packet2d por<Packet2d>(const Packet2d& a, const Packet2d& b) {
947
+ EIGEN_MSA_DEBUG;
948
+
949
+ return (Packet2d)__builtin_msa_or_v((v16u8)a, (v16u8)b);
950
+ }
951
+
952
+ template <>
953
+ EIGEN_STRONG_INLINE Packet2d pxor<Packet2d>(const Packet2d& a, const Packet2d& b) {
954
+ EIGEN_MSA_DEBUG;
955
+
956
+ return (Packet2d)__builtin_msa_xor_v((v16u8)a, (v16u8)b);
957
+ }
958
+
959
+ template <>
960
+ EIGEN_STRONG_INLINE Packet2d pandnot<Packet2d>(const Packet2d& a, const Packet2d& b) {
961
+ EIGEN_MSA_DEBUG;
962
+
963
+ return pand(a, (Packet2d)__builtin_msa_xori_b((v16u8)b, 255));
964
+ }
965
+
966
+ template <>
967
+ EIGEN_STRONG_INLINE Packet2d pload<Packet2d>(const double* from) {
968
+ EIGEN_MSA_DEBUG;
969
+
970
+ EIGEN_DEBUG_UNALIGNED_LOAD return (Packet2d)__builtin_msa_ld_d(const_cast<double*>(from), 0);
971
+ }
972
+
973
+ template <>
974
+ EIGEN_STRONG_INLINE Packet2d pmin<Packet2d>(const Packet2d& a, const Packet2d& b) {
975
+ EIGEN_MSA_DEBUG;
976
+
977
+ #if EIGEN_FAST_MATH
978
+ // This prefers numbers to NaNs.
979
+ return __builtin_msa_fmin_d(a, b);
980
+ #else
981
+ // This prefers NaNs to numbers.
982
+ v2i64 aNaN = __builtin_msa_fcun_d(a, a);
983
+ v2i64 aMinOrNaN = por(__builtin_msa_fclt_d(a, b), aNaN);
984
+ return (Packet2d)__builtin_msa_bsel_v((v16u8)aMinOrNaN, (v16u8)b, (v16u8)a);
985
+ #endif
986
+ }
987
+
988
+ template <>
989
+ EIGEN_STRONG_INLINE Packet2d pmax<Packet2d>(const Packet2d& a, const Packet2d& b) {
990
+ EIGEN_MSA_DEBUG;
991
+
992
+ #if EIGEN_FAST_MATH
993
+ // This prefers numbers to NaNs.
994
+ return __builtin_msa_fmax_d(a, b);
995
+ #else
996
+ // This prefers NaNs to numbers.
997
+ v2i64 aNaN = __builtin_msa_fcun_d(a, a);
998
+ v2i64 aMaxOrNaN = por(__builtin_msa_fclt_d(b, a), aNaN);
999
+ return (Packet2d)__builtin_msa_bsel_v((v16u8)aMaxOrNaN, (v16u8)b, (v16u8)a);
1000
+ #endif
1001
+ }
1002
+
1003
+ template <>
1004
+ EIGEN_STRONG_INLINE Packet2d ploadu<Packet2d>(const double* from) {
1005
+ EIGEN_MSA_DEBUG;
1006
+
1007
+ EIGEN_DEBUG_UNALIGNED_LOAD return (Packet2d)__builtin_msa_ld_d(const_cast<double*>(from), 0);
1008
+ }
1009
+
1010
+ template <>
1011
+ EIGEN_STRONG_INLINE Packet2d ploaddup<Packet2d>(const double* from) {
1012
+ EIGEN_MSA_DEBUG;
1013
+
1014
+ Packet2d value = { *from, *from };
1015
+ return value;
1016
+ }
1017
+
1018
+ template <>
1019
+ EIGEN_STRONG_INLINE void pstore<double>(double* to, const Packet2d& from) {
1020
+ EIGEN_MSA_DEBUG;
1021
+
1022
+ EIGEN_DEBUG_ALIGNED_STORE __builtin_msa_st_d((v2i64)from, to, 0);
1023
+ }
1024
+
1025
+ template <>
1026
+ EIGEN_STRONG_INLINE void pstoreu<double>(double* to, const Packet2d& from) {
1027
+ EIGEN_MSA_DEBUG;
1028
+
1029
+ EIGEN_DEBUG_UNALIGNED_STORE __builtin_msa_st_d((v2i64)from, to, 0);
1030
+ }
1031
+
1032
+ template <>
1033
+ EIGEN_DEVICE_FUNC inline Packet2d pgather<double, Packet2d>(const double* from, Index stride) {
1034
+ EIGEN_MSA_DEBUG;
1035
+
1036
+ Packet2d value;
1037
+ value[0] = *from;
1038
+ from += stride;
1039
+ value[1] = *from;
1040
+ return value;
1041
+ }
1042
+
1043
+ template <>
1044
+ EIGEN_DEVICE_FUNC inline void pscatter<double, Packet2d>(double* to, const Packet2d& from,
1045
+ Index stride) {
1046
+ EIGEN_MSA_DEBUG;
1047
+
1048
+ *to = from[0];
1049
+ to += stride;
1050
+ *to = from[1];
1051
+ }
1052
+
1053
+ template <>
1054
+ EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) {
1055
+ EIGEN_MSA_DEBUG;
1056
+
1057
+ __builtin_prefetch(addr);
1058
+ }
1059
+
1060
+ template <>
1061
+ EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) {
1062
+ EIGEN_MSA_DEBUG;
1063
+
1064
+ return a[0];
1065
+ }
1066
+
1067
+ template <>
1068
+ EIGEN_STRONG_INLINE Packet2d preverse(const Packet2d& a) {
1069
+ EIGEN_MSA_DEBUG;
1070
+
1071
+ return (Packet2d)__builtin_msa_shf_w((v4i32)a, EIGEN_MSA_SHF_I8(2, 3, 0, 1));
1072
+ }
1073
+
1074
+ template <>
1075
+ EIGEN_STRONG_INLINE Packet2d pabs(const Packet2d& a) {
1076
+ EIGEN_MSA_DEBUG;
1077
+
1078
+ return (Packet2d)__builtin_msa_bclri_d((v2u64)a, 63);
1079
+ }
1080
+
1081
+ template <>
1082
+ EIGEN_STRONG_INLINE double predux<Packet2d>(const Packet2d& a) {
1083
+ EIGEN_MSA_DEBUG;
1084
+
1085
+ Packet2d s = padd(a, preverse(a));
1086
+ return s[0];
1087
+ }
1088
+
1089
+ // Other reduction functions:
1090
+ // mul
1091
+ template <>
1092
+ EIGEN_STRONG_INLINE double predux_mul<Packet2d>(const Packet2d& a) {
1093
+ EIGEN_MSA_DEBUG;
1094
+
1095
+ Packet2d p = pmul(a, preverse(a));
1096
+ return p[0];
1097
+ }
1098
+
1099
+ // min
1100
+ template <>
1101
+ EIGEN_STRONG_INLINE double predux_min<Packet2d>(const Packet2d& a) {
1102
+ EIGEN_MSA_DEBUG;
1103
+
1104
+ #if EIGEN_FAST_MATH
1105
+ Packet2d swapped = (Packet2d)__builtin_msa_shf_w((Packet4i)a, EIGEN_MSA_SHF_I8(2, 3, 0, 1));
1106
+ Packet2d v = __builtin_msa_fmin_d(a, swapped);
1107
+ return v[0];
1108
+ #else
1109
+ double a0 = a[0], a1 = a[1];
1110
+ return ((numext::isnan)(a0) || a0 < a1) ? a0 : a1;
1111
+ #endif
1112
+ }
1113
+
1114
+ // max
1115
+ template <>
1116
+ EIGEN_STRONG_INLINE double predux_max<Packet2d>(const Packet2d& a) {
1117
+ EIGEN_MSA_DEBUG;
1118
+
1119
+ #if EIGEN_FAST_MATH
1120
+ Packet2d swapped = (Packet2d)__builtin_msa_shf_w((Packet4i)a, EIGEN_MSA_SHF_I8(2, 3, 0, 1));
1121
+ Packet2d v = __builtin_msa_fmax_d(a, swapped);
1122
+ return v[0];
1123
+ #else
1124
+ double a0 = a[0], a1 = a[1];
1125
+ return ((numext::isnan)(a0) || a0 > a1) ? a0 : a1;
1126
+ #endif
1127
+ }
1128
+
1129
+ template <>
1130
+ EIGEN_STRONG_INLINE Packet2d psqrt(const Packet2d& a) {
1131
+ EIGEN_MSA_DEBUG;
1132
+
1133
+ return __builtin_msa_fsqrt_d(a);
1134
+ }
1135
+
1136
+ template <>
1137
+ EIGEN_STRONG_INLINE Packet2d prsqrt(const Packet2d& a) {
1138
+ EIGEN_MSA_DEBUG;
1139
+
1140
+ #if EIGEN_FAST_MATH
1141
+ return __builtin_msa_frsqrt_d(a);
1142
+ #else
1143
+ Packet2d ones = __builtin_msa_ffint_s_d(__builtin_msa_ldi_d(1));
1144
+ return pdiv(ones, psqrt(a));
1145
+ #endif
1146
+ }
1147
+
1148
+ inline std::ostream& operator<<(std::ostream& os, const PacketBlock<Packet2d, 2>& value) {
1149
+ os << "[ " << value.packet[0] << "," << std::endl << " " << value.packet[1] << " ]";
1150
+ return os;
1151
+ }
1152
+
1153
+ EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet2d, 2>& kernel) {
1154
+ EIGEN_MSA_DEBUG;
1155
+
1156
+ Packet2d trn1 = (Packet2d)__builtin_msa_ilvev_d((v2i64)kernel.packet[1], (v2i64)kernel.packet[0]);
1157
+ Packet2d trn2 = (Packet2d)__builtin_msa_ilvod_d((v2i64)kernel.packet[1], (v2i64)kernel.packet[0]);
1158
+ kernel.packet[0] = trn1;
1159
+ kernel.packet[1] = trn2;
1160
+ }
1161
+
1162
+ template <>
1163
+ EIGEN_STRONG_INLINE Packet2d pfloor<Packet2d>(const Packet2d& a) {
1164
+ Packet2d v = a;
1165
+ int32_t old_mode, new_mode;
1166
+ asm volatile(
1167
+ "cfcmsa %[old_mode], $1\n"
1168
+ "ori %[new_mode], %[old_mode], 3\n" // 3 = round towards -INFINITY.
1169
+ "ctcmsa $1, %[new_mode]\n"
1170
+ "frint.d %w[v], %w[v]\n"
1171
+ "ctcmsa $1, %[old_mode]\n"
1172
+ : // outputs
1173
+ [old_mode] "=r"(old_mode), [new_mode] "=r"(new_mode),
1174
+ [v] "+f"(v)
1175
+ : // inputs
1176
+ : // clobbers
1177
+ );
1178
+ return v;
1179
+ }
1180
+
1181
+ template <>
1182
+ EIGEN_STRONG_INLINE Packet2d pceil<Packet2d>(const Packet2d& a) {
1183
+ Packet2d v = a;
1184
+ int32_t old_mode, new_mode;
1185
+ asm volatile(
1186
+ "cfcmsa %[old_mode], $1\n"
1187
+ "ori %[new_mode], %[old_mode], 3\n"
1188
+ "xori %[new_mode], %[new_mode], 1\n" // 2 = round towards +INFINITY.
1189
+ "ctcmsa $1, %[new_mode]\n"
1190
+ "frint.d %w[v], %w[v]\n"
1191
+ "ctcmsa $1, %[old_mode]\n"
1192
+ : // outputs
1193
+ [old_mode] "=r"(old_mode), [new_mode] "=r"(new_mode),
1194
+ [v] "+f"(v)
1195
+ : // inputs
1196
+ : // clobbers
1197
+ );
1198
+ return v;
1199
+ }
1200
+
1201
+ template <>
1202
+ EIGEN_STRONG_INLINE Packet2d pround<Packet2d>(const Packet2d& a) {
1203
+ Packet2d v = a;
1204
+ int32_t old_mode, new_mode;
1205
+ asm volatile(
1206
+ "cfcmsa %[old_mode], $1\n"
1207
+ "ori %[new_mode], %[old_mode], 3\n"
1208
+ "xori %[new_mode], %[new_mode], 3\n" // 0 = round to nearest, ties to even.
1209
+ "ctcmsa $1, %[new_mode]\n"
1210
+ "frint.d %w[v], %w[v]\n"
1211
+ "ctcmsa $1, %[old_mode]\n"
1212
+ : // outputs
1213
+ [old_mode] "=r"(old_mode), [new_mode] "=r"(new_mode),
1214
+ [v] "+f"(v)
1215
+ : // inputs
1216
+ : // clobbers
1217
+ );
1218
+ return v;
1219
+ }
1220
+
1221
+ template <>
1222
+ EIGEN_STRONG_INLINE Packet2d pblend(const Selector<2>& ifPacket, const Packet2d& thenPacket,
1223
+ const Packet2d& elsePacket) {
1224
+ Packet2ul select = { ifPacket.select[0], ifPacket.select[1] };
1225
+ Packet2l mask = __builtin_msa_ceqi_d((Packet2l)select, 0);
1226
+ return (Packet2d)__builtin_msa_bsel_v((v16u8)mask, (v16u8)thenPacket, (v16u8)elsePacket);
1227
+ }
1228
+
1229
+ } // end namespace internal
1230
+
1231
+ } // end namespace Eigen
1232
+
1233
+ #endif // EIGEN_PACKET_MATH_MSA_H