@smake/eigen 1.0.2 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (435) hide show
  1. package/README.md +1 -1
  2. package/eigen/Eigen/AccelerateSupport +52 -0
  3. package/eigen/Eigen/Cholesky +18 -21
  4. package/eigen/Eigen/CholmodSupport +28 -28
  5. package/eigen/Eigen/Core +235 -326
  6. package/eigen/Eigen/Eigenvalues +16 -14
  7. package/eigen/Eigen/Geometry +21 -24
  8. package/eigen/Eigen/Householder +9 -8
  9. package/eigen/Eigen/IterativeLinearSolvers +8 -4
  10. package/eigen/Eigen/Jacobi +14 -14
  11. package/eigen/Eigen/KLUSupport +43 -0
  12. package/eigen/Eigen/LU +16 -20
  13. package/eigen/Eigen/MetisSupport +12 -12
  14. package/eigen/Eigen/OrderingMethods +54 -54
  15. package/eigen/Eigen/PaStiXSupport +23 -20
  16. package/eigen/Eigen/PardisoSupport +17 -14
  17. package/eigen/Eigen/QR +18 -21
  18. package/eigen/Eigen/QtAlignedMalloc +5 -13
  19. package/eigen/Eigen/SPQRSupport +21 -14
  20. package/eigen/Eigen/SVD +23 -18
  21. package/eigen/Eigen/Sparse +1 -4
  22. package/eigen/Eigen/SparseCholesky +18 -23
  23. package/eigen/Eigen/SparseCore +18 -17
  24. package/eigen/Eigen/SparseLU +12 -8
  25. package/eigen/Eigen/SparseQR +16 -14
  26. package/eigen/Eigen/StdDeque +5 -2
  27. package/eigen/Eigen/StdList +5 -2
  28. package/eigen/Eigen/StdVector +5 -2
  29. package/eigen/Eigen/SuperLUSupport +30 -24
  30. package/eigen/Eigen/ThreadPool +80 -0
  31. package/eigen/Eigen/UmfPackSupport +19 -17
  32. package/eigen/Eigen/Version +14 -0
  33. package/eigen/Eigen/src/AccelerateSupport/AccelerateSupport.h +423 -0
  34. package/eigen/Eigen/src/AccelerateSupport/InternalHeaderCheck.h +3 -0
  35. package/eigen/Eigen/src/Cholesky/InternalHeaderCheck.h +3 -0
  36. package/eigen/Eigen/src/Cholesky/LDLT.h +377 -401
  37. package/eigen/Eigen/src/Cholesky/LLT.h +332 -360
  38. package/eigen/Eigen/src/Cholesky/LLT_LAPACKE.h +81 -56
  39. package/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +620 -521
  40. package/eigen/Eigen/src/CholmodSupport/InternalHeaderCheck.h +3 -0
  41. package/eigen/Eigen/src/Core/ArithmeticSequence.h +239 -0
  42. package/eigen/Eigen/src/Core/Array.h +341 -294
  43. package/eigen/Eigen/src/Core/ArrayBase.h +190 -203
  44. package/eigen/Eigen/src/Core/ArrayWrapper.h +127 -171
  45. package/eigen/Eigen/src/Core/Assign.h +30 -40
  46. package/eigen/Eigen/src/Core/AssignEvaluator.h +711 -589
  47. package/eigen/Eigen/src/Core/Assign_MKL.h +130 -125
  48. package/eigen/Eigen/src/Core/BandMatrix.h +268 -283
  49. package/eigen/Eigen/src/Core/Block.h +375 -398
  50. package/eigen/Eigen/src/Core/CommaInitializer.h +86 -97
  51. package/eigen/Eigen/src/Core/ConditionEstimator.h +51 -53
  52. package/eigen/Eigen/src/Core/CoreEvaluators.h +1356 -1026
  53. package/eigen/Eigen/src/Core/CoreIterators.h +73 -59
  54. package/eigen/Eigen/src/Core/CwiseBinaryOp.h +114 -132
  55. package/eigen/Eigen/src/Core/CwiseNullaryOp.h +726 -617
  56. package/eigen/Eigen/src/Core/CwiseTernaryOp.h +77 -103
  57. package/eigen/Eigen/src/Core/CwiseUnaryOp.h +56 -68
  58. package/eigen/Eigen/src/Core/CwiseUnaryView.h +132 -95
  59. package/eigen/Eigen/src/Core/DenseBase.h +632 -571
  60. package/eigen/Eigen/src/Core/DenseCoeffsBase.h +511 -624
  61. package/eigen/Eigen/src/Core/DenseStorage.h +512 -509
  62. package/eigen/Eigen/src/Core/DeviceWrapper.h +153 -0
  63. package/eigen/Eigen/src/Core/Diagonal.h +169 -210
  64. package/eigen/Eigen/src/Core/DiagonalMatrix.h +351 -274
  65. package/eigen/Eigen/src/Core/DiagonalProduct.h +12 -10
  66. package/eigen/Eigen/src/Core/Dot.h +172 -222
  67. package/eigen/Eigen/src/Core/EigenBase.h +75 -85
  68. package/eigen/Eigen/src/Core/Fill.h +138 -0
  69. package/eigen/Eigen/src/Core/FindCoeff.h +464 -0
  70. package/eigen/Eigen/src/Core/ForceAlignedAccess.h +90 -109
  71. package/eigen/Eigen/src/Core/Fuzzy.h +82 -105
  72. package/eigen/Eigen/src/Core/GeneralProduct.h +327 -263
  73. package/eigen/Eigen/src/Core/GenericPacketMath.h +1472 -360
  74. package/eigen/Eigen/src/Core/GlobalFunctions.h +194 -151
  75. package/eigen/Eigen/src/Core/IO.h +147 -139
  76. package/eigen/Eigen/src/Core/IndexedView.h +321 -0
  77. package/eigen/Eigen/src/Core/InnerProduct.h +260 -0
  78. package/eigen/Eigen/src/Core/InternalHeaderCheck.h +3 -0
  79. package/eigen/Eigen/src/Core/Inverse.h +56 -66
  80. package/eigen/Eigen/src/Core/Map.h +124 -142
  81. package/eigen/Eigen/src/Core/MapBase.h +256 -281
  82. package/eigen/Eigen/src/Core/MathFunctions.h +1620 -938
  83. package/eigen/Eigen/src/Core/MathFunctionsImpl.h +233 -71
  84. package/eigen/Eigen/src/Core/Matrix.h +491 -416
  85. package/eigen/Eigen/src/Core/MatrixBase.h +468 -453
  86. package/eigen/Eigen/src/Core/NestByValue.h +66 -85
  87. package/eigen/Eigen/src/Core/NoAlias.h +79 -85
  88. package/eigen/Eigen/src/Core/NumTraits.h +235 -148
  89. package/eigen/Eigen/src/Core/PartialReduxEvaluator.h +253 -0
  90. package/eigen/Eigen/src/Core/PermutationMatrix.h +461 -511
  91. package/eigen/Eigen/src/Core/PlainObjectBase.h +871 -894
  92. package/eigen/Eigen/src/Core/Product.h +260 -139
  93. package/eigen/Eigen/src/Core/ProductEvaluators.h +863 -714
  94. package/eigen/Eigen/src/Core/Random.h +161 -136
  95. package/eigen/Eigen/src/Core/RandomImpl.h +262 -0
  96. package/eigen/Eigen/src/Core/RealView.h +250 -0
  97. package/eigen/Eigen/src/Core/Redux.h +366 -336
  98. package/eigen/Eigen/src/Core/Ref.h +308 -209
  99. package/eigen/Eigen/src/Core/Replicate.h +94 -106
  100. package/eigen/Eigen/src/Core/Reshaped.h +398 -0
  101. package/eigen/Eigen/src/Core/ReturnByValue.h +49 -55
  102. package/eigen/Eigen/src/Core/Reverse.h +136 -145
  103. package/eigen/Eigen/src/Core/Select.h +70 -140
  104. package/eigen/Eigen/src/Core/SelfAdjointView.h +262 -285
  105. package/eigen/Eigen/src/Core/SelfCwiseBinaryOp.h +23 -20
  106. package/eigen/Eigen/src/Core/SkewSymmetricMatrix3.h +382 -0
  107. package/eigen/Eigen/src/Core/Solve.h +97 -111
  108. package/eigen/Eigen/src/Core/SolveTriangular.h +131 -129
  109. package/eigen/Eigen/src/Core/SolverBase.h +138 -101
  110. package/eigen/Eigen/src/Core/StableNorm.h +156 -160
  111. package/eigen/Eigen/src/Core/StlIterators.h +619 -0
  112. package/eigen/Eigen/src/Core/Stride.h +91 -88
  113. package/eigen/Eigen/src/Core/Swap.h +70 -38
  114. package/eigen/Eigen/src/Core/Transpose.h +295 -273
  115. package/eigen/Eigen/src/Core/Transpositions.h +272 -317
  116. package/eigen/Eigen/src/Core/TriangularMatrix.h +670 -755
  117. package/eigen/Eigen/src/Core/VectorBlock.h +59 -72
  118. package/eigen/Eigen/src/Core/VectorwiseOp.h +668 -630
  119. package/eigen/Eigen/src/Core/Visitor.h +480 -216
  120. package/eigen/Eigen/src/Core/arch/AVX/Complex.h +407 -293
  121. package/eigen/Eigen/src/Core/arch/AVX/MathFunctions.h +79 -388
  122. package/eigen/Eigen/src/Core/arch/AVX/PacketMath.h +2935 -491
  123. package/eigen/Eigen/src/Core/arch/AVX/Reductions.h +353 -0
  124. package/eigen/Eigen/src/Core/arch/AVX/TypeCasting.h +279 -22
  125. package/eigen/Eigen/src/Core/arch/AVX512/Complex.h +472 -0
  126. package/eigen/Eigen/src/Core/arch/AVX512/GemmKernel.h +1245 -0
  127. package/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h +85 -333
  128. package/eigen/Eigen/src/Core/arch/AVX512/MathFunctionsFP16.h +75 -0
  129. package/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h +2490 -649
  130. package/eigen/Eigen/src/Core/arch/AVX512/PacketMathFP16.h +1413 -0
  131. package/eigen/Eigen/src/Core/arch/AVX512/Reductions.h +297 -0
  132. package/eigen/Eigen/src/Core/arch/AVX512/TrsmKernel.h +1167 -0
  133. package/eigen/Eigen/src/Core/arch/AVX512/TrsmUnrolls.inc +1219 -0
  134. package/eigen/Eigen/src/Core/arch/AVX512/TypeCasting.h +277 -0
  135. package/eigen/Eigen/src/Core/arch/AVX512/TypeCastingFP16.h +130 -0
  136. package/eigen/Eigen/src/Core/arch/AltiVec/Complex.h +521 -298
  137. package/eigen/Eigen/src/Core/arch/AltiVec/MathFunctions.h +39 -280
  138. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProduct.h +3686 -0
  139. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h +205 -0
  140. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h +901 -0
  141. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMAbfloat16.h +742 -0
  142. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixVectorProduct.inc +2818 -0
  143. package/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h +3391 -723
  144. package/eigen/Eigen/src/Core/arch/AltiVec/TypeCasting.h +153 -0
  145. package/eigen/Eigen/src/Core/arch/Default/BFloat16.h +866 -0
  146. package/eigen/Eigen/src/Core/arch/Default/ConjHelper.h +113 -14
  147. package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +2634 -0
  148. package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +227 -0
  149. package/eigen/Eigen/src/Core/arch/Default/Half.h +1091 -0
  150. package/eigen/Eigen/src/Core/arch/Default/Settings.h +11 -13
  151. package/eigen/Eigen/src/Core/arch/GPU/Complex.h +244 -0
  152. package/eigen/Eigen/src/Core/arch/GPU/MathFunctions.h +104 -0
  153. package/eigen/Eigen/src/Core/arch/GPU/PacketMath.h +1712 -0
  154. package/eigen/Eigen/src/Core/arch/GPU/Tuple.h +268 -0
  155. package/eigen/Eigen/src/Core/arch/GPU/TypeCasting.h +77 -0
  156. package/eigen/Eigen/src/Core/arch/HIP/hcc/math_constants.h +23 -0
  157. package/eigen/Eigen/src/Core/arch/HVX/PacketMath.h +1088 -0
  158. package/eigen/Eigen/src/Core/arch/LSX/Complex.h +520 -0
  159. package/eigen/Eigen/src/Core/arch/LSX/GeneralBlockPanelKernel.h +23 -0
  160. package/eigen/Eigen/src/Core/arch/LSX/MathFunctions.h +43 -0
  161. package/eigen/Eigen/src/Core/arch/LSX/PacketMath.h +2866 -0
  162. package/eigen/Eigen/src/Core/arch/LSX/TypeCasting.h +526 -0
  163. package/eigen/Eigen/src/Core/arch/MSA/Complex.h +620 -0
  164. package/eigen/Eigen/src/Core/arch/MSA/MathFunctions.h +379 -0
  165. package/eigen/Eigen/src/Core/arch/MSA/PacketMath.h +1237 -0
  166. package/eigen/Eigen/src/Core/arch/NEON/Complex.h +531 -289
  167. package/eigen/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h +243 -0
  168. package/eigen/Eigen/src/Core/arch/NEON/MathFunctions.h +50 -73
  169. package/eigen/Eigen/src/Core/arch/NEON/PacketMath.h +5915 -579
  170. package/eigen/Eigen/src/Core/arch/NEON/TypeCasting.h +1642 -0
  171. package/eigen/Eigen/src/Core/arch/NEON/UnaryFunctors.h +57 -0
  172. package/eigen/Eigen/src/Core/arch/SSE/Complex.h +366 -334
  173. package/eigen/Eigen/src/Core/arch/SSE/MathFunctions.h +40 -514
  174. package/eigen/Eigen/src/Core/arch/SSE/PacketMath.h +2164 -675
  175. package/eigen/Eigen/src/Core/arch/SSE/Reductions.h +324 -0
  176. package/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h +188 -35
  177. package/eigen/Eigen/src/Core/arch/SVE/MathFunctions.h +48 -0
  178. package/eigen/Eigen/src/Core/arch/SVE/PacketMath.h +674 -0
  179. package/eigen/Eigen/src/Core/arch/SVE/TypeCasting.h +52 -0
  180. package/eigen/Eigen/src/Core/arch/SYCL/InteropHeaders.h +227 -0
  181. package/eigen/Eigen/src/Core/arch/SYCL/MathFunctions.h +303 -0
  182. package/eigen/Eigen/src/Core/arch/SYCL/PacketMath.h +576 -0
  183. package/eigen/Eigen/src/Core/arch/SYCL/TypeCasting.h +83 -0
  184. package/eigen/Eigen/src/Core/arch/ZVector/Complex.h +434 -261
  185. package/eigen/Eigen/src/Core/arch/ZVector/MathFunctions.h +160 -53
  186. package/eigen/Eigen/src/Core/arch/ZVector/PacketMath.h +1073 -605
  187. package/eigen/Eigen/src/Core/functors/AssignmentFunctors.h +123 -117
  188. package/eigen/Eigen/src/Core/functors/BinaryFunctors.h +594 -322
  189. package/eigen/Eigen/src/Core/functors/NullaryFunctors.h +204 -118
  190. package/eigen/Eigen/src/Core/functors/StlFunctors.h +110 -97
  191. package/eigen/Eigen/src/Core/functors/TernaryFunctors.h +34 -7
  192. package/eigen/Eigen/src/Core/functors/UnaryFunctors.h +1158 -530
  193. package/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h +2329 -1333
  194. package/eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h +328 -364
  195. package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +191 -178
  196. package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +85 -82
  197. package/eigen/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h +154 -73
  198. package/eigen/Eigen/src/Core/products/GeneralMatrixVector.h +396 -542
  199. package/eigen/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h +80 -77
  200. package/eigen/Eigen/src/Core/products/Parallelizer.h +208 -92
  201. package/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +331 -375
  202. package/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h +206 -224
  203. package/eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h +139 -146
  204. package/eigen/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h +58 -61
  205. package/eigen/Eigen/src/Core/products/SelfadjointProduct.h +71 -71
  206. package/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h +48 -46
  207. package/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h +294 -369
  208. package/eigen/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h +246 -238
  209. package/eigen/Eigen/src/Core/products/TriangularMatrixVector.h +244 -247
  210. package/eigen/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h +212 -192
  211. package/eigen/Eigen/src/Core/products/TriangularSolverMatrix.h +328 -275
  212. package/eigen/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h +108 -109
  213. package/eigen/Eigen/src/Core/products/TriangularSolverVector.h +70 -93
  214. package/eigen/Eigen/src/Core/util/Assert.h +158 -0
  215. package/eigen/Eigen/src/Core/util/BlasUtil.h +413 -290
  216. package/eigen/Eigen/src/Core/util/ConfigureVectorization.h +543 -0
  217. package/eigen/Eigen/src/Core/util/Constants.h +314 -263
  218. package/eigen/Eigen/src/Core/util/DisableStupidWarnings.h +130 -78
  219. package/eigen/Eigen/src/Core/util/EmulateArray.h +270 -0
  220. package/eigen/Eigen/src/Core/util/ForwardDeclarations.h +450 -224
  221. package/eigen/Eigen/src/Core/util/GpuHipCudaDefines.inc +101 -0
  222. package/eigen/Eigen/src/Core/util/GpuHipCudaUndefines.inc +45 -0
  223. package/eigen/Eigen/src/Core/util/IndexedViewHelper.h +487 -0
  224. package/eigen/Eigen/src/Core/util/IntegralConstant.h +279 -0
  225. package/eigen/Eigen/src/Core/util/MKL_support.h +39 -30
  226. package/eigen/Eigen/src/Core/util/Macros.h +939 -646
  227. package/eigen/Eigen/src/Core/util/MaxSizeVector.h +139 -0
  228. package/eigen/Eigen/src/Core/util/Memory.h +1042 -650
  229. package/eigen/Eigen/src/Core/util/Meta.h +618 -426
  230. package/eigen/Eigen/src/Core/util/MoreMeta.h +638 -0
  231. package/eigen/Eigen/src/Core/util/ReenableStupidWarnings.h +32 -19
  232. package/eigen/Eigen/src/Core/util/ReshapedHelper.h +51 -0
  233. package/eigen/Eigen/src/Core/util/Serializer.h +209 -0
  234. package/eigen/Eigen/src/Core/util/StaticAssert.h +51 -164
  235. package/eigen/Eigen/src/Core/util/SymbolicIndex.h +445 -0
  236. package/eigen/Eigen/src/Core/util/XprHelper.h +793 -538
  237. package/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +246 -277
  238. package/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +299 -319
  239. package/eigen/Eigen/src/Eigenvalues/ComplexSchur_LAPACKE.h +52 -48
  240. package/eigen/Eigen/src/Eigenvalues/EigenSolver.h +413 -456
  241. package/eigen/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h +309 -325
  242. package/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +157 -171
  243. package/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +292 -310
  244. package/eigen/Eigen/src/Eigenvalues/InternalHeaderCheck.h +3 -0
  245. package/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +91 -107
  246. package/eigen/Eigen/src/Eigenvalues/RealQZ.h +539 -606
  247. package/eigen/Eigen/src/Eigenvalues/RealSchur.h +348 -382
  248. package/eigen/Eigen/src/Eigenvalues/RealSchur_LAPACKE.h +41 -35
  249. package/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +579 -600
  250. package/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h +47 -44
  251. package/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +434 -461
  252. package/eigen/Eigen/src/Geometry/AlignedBox.h +307 -214
  253. package/eigen/Eigen/src/Geometry/AngleAxis.h +135 -137
  254. package/eigen/Eigen/src/Geometry/EulerAngles.h +163 -74
  255. package/eigen/Eigen/src/Geometry/Homogeneous.h +289 -333
  256. package/eigen/Eigen/src/Geometry/Hyperplane.h +152 -161
  257. package/eigen/Eigen/src/Geometry/InternalHeaderCheck.h +3 -0
  258. package/eigen/Eigen/src/Geometry/OrthoMethods.h +168 -145
  259. package/eigen/Eigen/src/Geometry/ParametrizedLine.h +141 -104
  260. package/eigen/Eigen/src/Geometry/Quaternion.h +595 -497
  261. package/eigen/Eigen/src/Geometry/Rotation2D.h +110 -108
  262. package/eigen/Eigen/src/Geometry/RotationBase.h +148 -145
  263. package/eigen/Eigen/src/Geometry/Scaling.h +115 -90
  264. package/eigen/Eigen/src/Geometry/Transform.h +896 -953
  265. package/eigen/Eigen/src/Geometry/Translation.h +100 -98
  266. package/eigen/Eigen/src/Geometry/Umeyama.h +79 -84
  267. package/eigen/Eigen/src/Geometry/arch/Geometry_SIMD.h +154 -0
  268. package/eigen/Eigen/src/Householder/BlockHouseholder.h +54 -42
  269. package/eigen/Eigen/src/Householder/Householder.h +104 -122
  270. package/eigen/Eigen/src/Householder/HouseholderSequence.h +416 -382
  271. package/eigen/Eigen/src/Householder/InternalHeaderCheck.h +3 -0
  272. package/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +153 -166
  273. package/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +127 -138
  274. package/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +95 -124
  275. package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +269 -267
  276. package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +246 -259
  277. package/eigen/Eigen/src/IterativeLinearSolvers/InternalHeaderCheck.h +3 -0
  278. package/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +218 -217
  279. package/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +80 -103
  280. package/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +59 -63
  281. package/eigen/Eigen/src/Jacobi/InternalHeaderCheck.h +3 -0
  282. package/eigen/Eigen/src/Jacobi/Jacobi.h +256 -291
  283. package/eigen/Eigen/src/KLUSupport/InternalHeaderCheck.h +3 -0
  284. package/eigen/Eigen/src/KLUSupport/KLUSupport.h +339 -0
  285. package/eigen/Eigen/src/LU/Determinant.h +60 -63
  286. package/eigen/Eigen/src/LU/FullPivLU.h +561 -626
  287. package/eigen/Eigen/src/LU/InternalHeaderCheck.h +3 -0
  288. package/eigen/Eigen/src/LU/InverseImpl.h +213 -275
  289. package/eigen/Eigen/src/LU/PartialPivLU.h +407 -435
  290. package/eigen/Eigen/src/LU/PartialPivLU_LAPACKE.h +54 -40
  291. package/eigen/Eigen/src/LU/arch/InverseSize4.h +353 -0
  292. package/eigen/Eigen/src/MetisSupport/InternalHeaderCheck.h +3 -0
  293. package/eigen/Eigen/src/MetisSupport/MetisSupport.h +81 -93
  294. package/eigen/Eigen/src/OrderingMethods/Amd.h +250 -282
  295. package/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +950 -1103
  296. package/eigen/Eigen/src/OrderingMethods/InternalHeaderCheck.h +3 -0
  297. package/eigen/Eigen/src/OrderingMethods/Ordering.h +111 -122
  298. package/eigen/Eigen/src/PaStiXSupport/InternalHeaderCheck.h +3 -0
  299. package/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +524 -570
  300. package/eigen/Eigen/src/PardisoSupport/InternalHeaderCheck.h +3 -0
  301. package/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +385 -429
  302. package/eigen/Eigen/src/QR/ColPivHouseholderQR.h +494 -473
  303. package/eigen/Eigen/src/QR/ColPivHouseholderQR_LAPACKE.h +120 -56
  304. package/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +223 -137
  305. package/eigen/Eigen/src/QR/FullPivHouseholderQR.h +517 -460
  306. package/eigen/Eigen/src/QR/HouseholderQR.h +412 -278
  307. package/eigen/Eigen/src/QR/HouseholderQR_LAPACKE.h +32 -23
  308. package/eigen/Eigen/src/QR/InternalHeaderCheck.h +3 -0
  309. package/eigen/Eigen/src/SPQRSupport/InternalHeaderCheck.h +3 -0
  310. package/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +263 -261
  311. package/eigen/Eigen/src/SVD/BDCSVD.h +872 -679
  312. package/eigen/Eigen/src/SVD/BDCSVD_LAPACKE.h +174 -0
  313. package/eigen/Eigen/src/SVD/InternalHeaderCheck.h +3 -0
  314. package/eigen/Eigen/src/SVD/JacobiSVD.h +585 -543
  315. package/eigen/Eigen/src/SVD/JacobiSVD_LAPACKE.h +85 -49
  316. package/eigen/Eigen/src/SVD/SVDBase.h +281 -160
  317. package/eigen/Eigen/src/SVD/UpperBidiagonalization.h +202 -237
  318. package/eigen/Eigen/src/SparseCholesky/InternalHeaderCheck.h +3 -0
  319. package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +769 -590
  320. package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +318 -129
  321. package/eigen/Eigen/src/SparseCore/AmbiVector.h +202 -251
  322. package/eigen/Eigen/src/SparseCore/CompressedStorage.h +184 -236
  323. package/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +140 -184
  324. package/eigen/Eigen/src/SparseCore/InternalHeaderCheck.h +3 -0
  325. package/eigen/Eigen/src/SparseCore/SparseAssign.h +174 -111
  326. package/eigen/Eigen/src/SparseCore/SparseBlock.h +408 -477
  327. package/eigen/Eigen/src/SparseCore/SparseColEtree.h +100 -112
  328. package/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +531 -280
  329. package/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +559 -347
  330. package/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +100 -108
  331. package/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +185 -191
  332. package/eigen/Eigen/src/SparseCore/SparseDiagonalProduct.h +71 -71
  333. package/eigen/Eigen/src/SparseCore/SparseDot.h +49 -47
  334. package/eigen/Eigen/src/SparseCore/SparseFuzzy.h +13 -11
  335. package/eigen/Eigen/src/SparseCore/SparseMap.h +243 -253
  336. package/eigen/Eigen/src/SparseCore/SparseMatrix.h +1614 -1142
  337. package/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +403 -357
  338. package/eigen/Eigen/src/SparseCore/SparsePermutation.h +186 -115
  339. package/eigen/Eigen/src/SparseCore/SparseProduct.h +100 -91
  340. package/eigen/Eigen/src/SparseCore/SparseRedux.h +22 -24
  341. package/eigen/Eigen/src/SparseCore/SparseRef.h +268 -295
  342. package/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +371 -414
  343. package/eigen/Eigen/src/SparseCore/SparseSolverBase.h +78 -87
  344. package/eigen/Eigen/src/SparseCore/SparseSparseProductWithPruning.h +81 -95
  345. package/eigen/Eigen/src/SparseCore/SparseTranspose.h +62 -71
  346. package/eigen/Eigen/src/SparseCore/SparseTriangularView.h +132 -144
  347. package/eigen/Eigen/src/SparseCore/SparseUtil.h +146 -115
  348. package/eigen/Eigen/src/SparseCore/SparseVector.h +426 -372
  349. package/eigen/Eigen/src/SparseCore/SparseView.h +164 -193
  350. package/eigen/Eigen/src/SparseCore/TriangularSolver.h +129 -170
  351. package/eigen/Eigen/src/SparseLU/InternalHeaderCheck.h +3 -0
  352. package/eigen/Eigen/src/SparseLU/SparseLU.h +814 -618
  353. package/eigen/Eigen/src/SparseLU/SparseLUImpl.h +61 -48
  354. package/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +102 -118
  355. package/eigen/Eigen/src/SparseLU/SparseLU_Structs.h +38 -35
  356. package/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +273 -255
  357. package/eigen/Eigen/src/SparseLU/SparseLU_Utils.h +44 -49
  358. package/eigen/Eigen/src/SparseLU/SparseLU_column_bmod.h +104 -108
  359. package/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +90 -101
  360. package/eigen/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h +57 -58
  361. package/eigen/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h +43 -55
  362. package/eigen/Eigen/src/SparseLU/SparseLU_kernel_bmod.h +74 -71
  363. package/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +125 -133
  364. package/eigen/Eigen/src/SparseLU/SparseLU_panel_dfs.h +136 -159
  365. package/eigen/Eigen/src/SparseLU/SparseLU_pivotL.h +51 -52
  366. package/eigen/Eigen/src/SparseLU/SparseLU_pruneL.h +67 -73
  367. package/eigen/Eigen/src/SparseLU/SparseLU_relax_snode.h +24 -26
  368. package/eigen/Eigen/src/SparseQR/InternalHeaderCheck.h +3 -0
  369. package/eigen/Eigen/src/SparseQR/SparseQR.h +451 -490
  370. package/eigen/Eigen/src/StlSupport/StdDeque.h +28 -105
  371. package/eigen/Eigen/src/StlSupport/StdList.h +28 -84
  372. package/eigen/Eigen/src/StlSupport/StdVector.h +28 -108
  373. package/eigen/Eigen/src/StlSupport/details.h +48 -50
  374. package/eigen/Eigen/src/SuperLUSupport/InternalHeaderCheck.h +3 -0
  375. package/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +634 -732
  376. package/eigen/Eigen/src/ThreadPool/Barrier.h +70 -0
  377. package/eigen/Eigen/src/ThreadPool/CoreThreadPoolDevice.h +336 -0
  378. package/eigen/Eigen/src/ThreadPool/EventCount.h +241 -0
  379. package/eigen/Eigen/src/ThreadPool/ForkJoin.h +140 -0
  380. package/eigen/Eigen/src/ThreadPool/InternalHeaderCheck.h +4 -0
  381. package/eigen/Eigen/src/ThreadPool/NonBlockingThreadPool.h +587 -0
  382. package/eigen/Eigen/src/ThreadPool/RunQueue.h +230 -0
  383. package/eigen/Eigen/src/ThreadPool/ThreadCancel.h +21 -0
  384. package/eigen/Eigen/src/ThreadPool/ThreadEnvironment.h +43 -0
  385. package/eigen/Eigen/src/ThreadPool/ThreadLocal.h +289 -0
  386. package/eigen/Eigen/src/ThreadPool/ThreadPoolInterface.h +50 -0
  387. package/eigen/Eigen/src/ThreadPool/ThreadYield.h +16 -0
  388. package/eigen/Eigen/src/UmfPackSupport/InternalHeaderCheck.h +3 -0
  389. package/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +480 -380
  390. package/eigen/Eigen/src/misc/Image.h +41 -43
  391. package/eigen/Eigen/src/misc/InternalHeaderCheck.h +3 -0
  392. package/eigen/Eigen/src/misc/Kernel.h +39 -41
  393. package/eigen/Eigen/src/misc/RealSvd2x2.h +19 -21
  394. package/eigen/Eigen/src/misc/blas.h +83 -426
  395. package/eigen/Eigen/src/misc/lapacke.h +9976 -16182
  396. package/eigen/Eigen/src/misc/lapacke_helpers.h +163 -0
  397. package/eigen/Eigen/src/misc/lapacke_mangling.h +4 -5
  398. package/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.inc +344 -0
  399. package/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.inc +544 -0
  400. package/eigen/Eigen/src/plugins/BlockMethods.inc +1370 -0
  401. package/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.inc +116 -0
  402. package/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.inc +167 -0
  403. package/eigen/Eigen/src/plugins/IndexedViewMethods.inc +192 -0
  404. package/eigen/Eigen/src/plugins/InternalHeaderCheck.inc +3 -0
  405. package/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.inc +331 -0
  406. package/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.inc +118 -0
  407. package/eigen/Eigen/src/plugins/ReshapedMethods.inc +133 -0
  408. package/lib/LibEigen.d.ts +4 -0
  409. package/lib/LibEigen.js +14 -0
  410. package/lib/index.d.ts +1 -1
  411. package/lib/index.js +7 -3
  412. package/package.json +2 -10
  413. package/eigen/Eigen/CMakeLists.txt +0 -19
  414. package/eigen/Eigen/src/Core/BooleanRedux.h +0 -164
  415. package/eigen/Eigen/src/Core/arch/CUDA/Complex.h +0 -103
  416. package/eigen/Eigen/src/Core/arch/CUDA/Half.h +0 -675
  417. package/eigen/Eigen/src/Core/arch/CUDA/MathFunctions.h +0 -91
  418. package/eigen/Eigen/src/Core/arch/CUDA/PacketMath.h +0 -333
  419. package/eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +0 -1124
  420. package/eigen/Eigen/src/Core/arch/CUDA/TypeCasting.h +0 -212
  421. package/eigen/Eigen/src/Core/util/NonMPL2.h +0 -3
  422. package/eigen/Eigen/src/Geometry/arch/Geometry_SSE.h +0 -161
  423. package/eigen/Eigen/src/LU/arch/Inverse_SSE.h +0 -338
  424. package/eigen/Eigen/src/SparseCore/MappedSparseMatrix.h +0 -67
  425. package/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +0 -280
  426. package/eigen/Eigen/src/misc/lapack.h +0 -152
  427. package/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +0 -332
  428. package/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +0 -552
  429. package/eigen/Eigen/src/plugins/BlockMethods.h +0 -1058
  430. package/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.h +0 -115
  431. package/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.h +0 -163
  432. package/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +0 -152
  433. package/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +0 -85
  434. package/lib/eigen.d.ts +0 -2
  435. package/lib/eigen.js +0 -15
@@ -0,0 +1,1237 @@
1
+ // This file is part of Eigen, a lightweight C++ template library
2
+ // for linear algebra.
3
+ //
4
+ // Copyright (C) 2018 Wave Computing, Inc.
5
+ // Written by:
6
+ // Chris Larsen
7
+ // Alexey Frunze (afrunze@wavecomp.com)
8
+ //
9
+ // This Source Code Form is subject to the terms of the Mozilla
10
+ // Public License v. 2.0. If a copy of the MPL was not distributed
11
+ // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
12
+
13
+ #ifndef EIGEN_PACKET_MATH_MSA_H
14
+ #define EIGEN_PACKET_MATH_MSA_H
15
+
16
+ #include <iostream>
17
+ #include <string>
18
+
19
+ // IWYU pragma: private
20
+ #include "../../InternalHeaderCheck.h"
21
+
22
+ namespace Eigen {
23
+
24
+ namespace internal {
25
+
26
+ #ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
27
+ #define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 8
28
+ #endif
29
+
30
+ #ifndef EIGEN_HAS_SINGLE_INSTRUCTION_MADD
31
+ #define EIGEN_HAS_SINGLE_INSTRUCTION_MADD
32
+ #endif
33
+
34
+ #ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS
35
+ #define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 32
36
+ #endif
37
+
38
+ #if 0
39
+ #define EIGEN_MSA_DEBUG \
40
+ static bool firstTime = true; \
41
+ do { \
42
+ if (firstTime) { \
43
+ std::cout << __FILE__ << ':' << __LINE__ << ':' << __FUNCTION__ << std::endl; \
44
+ firstTime = false; \
45
+ } \
46
+ } while (0)
47
+ #else
48
+ #define EIGEN_MSA_DEBUG
49
+ #endif
50
+
51
+ #define EIGEN_MSA_SHF_I8(a, b, c, d) (((d) << 6) | ((c) << 4) | ((b) << 2) | (a))
52
+
53
+ typedef v4f32 Packet4f;
54
+ typedef v4i32 Packet4i;
55
+ typedef v4u32 Packet4ui;
56
+
57
+ #define EIGEN_DECLARE_CONST_Packet4f(NAME, X) const Packet4f p4f_##NAME = {X, X, X, X}
58
+ #define EIGEN_DECLARE_CONST_Packet4i(NAME, X) const Packet4i p4i_##NAME = {X, X, X, X}
59
+ #define EIGEN_DECLARE_CONST_Packet4ui(NAME, X) const Packet4ui p4ui_##NAME = {X, X, X, X}
60
+
61
+ inline std::ostream& operator<<(std::ostream& os, const Packet4f& value) {
62
+ os << "[ " << value[0] << ", " << value[1] << ", " << value[2] << ", " << value[3] << " ]";
63
+ return os;
64
+ }
65
+
66
+ inline std::ostream& operator<<(std::ostream& os, const Packet4i& value) {
67
+ os << "[ " << value[0] << ", " << value[1] << ", " << value[2] << ", " << value[3] << " ]";
68
+ return os;
69
+ }
70
+
71
+ inline std::ostream& operator<<(std::ostream& os, const Packet4ui& value) {
72
+ os << "[ " << value[0] << ", " << value[1] << ", " << value[2] << ", " << value[3] << " ]";
73
+ return os;
74
+ }
75
+
76
+ template <>
77
+ struct packet_traits<float> : default_packet_traits {
78
+ typedef Packet4f type;
79
+ typedef Packet4f half; // Packet2f intrinsics not implemented yet
80
+ enum {
81
+ Vectorizable = 1,
82
+ AlignedOnScalar = 1,
83
+ size = 4,
84
+ // FIXME check the Has*
85
+ HasDiv = 1,
86
+ HasSin = EIGEN_FAST_MATH,
87
+ HasCos = EIGEN_FAST_MATH,
88
+ HasTanh = EIGEN_FAST_MATH,
89
+ HasErf = EIGEN_FAST_MATH,
90
+ HasLog = 1,
91
+ HasExp = 1,
92
+ HasSqrt = 1,
93
+ HasRsqrt = 1,
94
+ HasBlend = 1
95
+ };
96
+ };
97
+
98
+ template <>
99
+ struct packet_traits<int32_t> : default_packet_traits {
100
+ typedef Packet4i type;
101
+ typedef Packet4i half; // Packet2i intrinsics not implemented yet
102
+ enum {
103
+ Vectorizable = 1,
104
+ AlignedOnScalar = 1,
105
+ size = 4,
106
+ // FIXME check the Has*
107
+ HasDiv = 1,
108
+ HasBlend = 1
109
+ };
110
+ };
111
+
112
+ template <>
113
+ struct unpacket_traits<Packet4f> {
114
+ typedef float type;
115
+ enum {
116
+ size = 4,
117
+ alignment = Aligned16,
118
+ vectorizable = true,
119
+ masked_load_available = false,
120
+ masked_store_available = false
121
+ };
122
+ typedef Packet4f half;
123
+ };
124
+
125
+ template <>
126
+ struct unpacket_traits<Packet4i> {
127
+ typedef int32_t type;
128
+ enum {
129
+ size = 4,
130
+ alignment = Aligned16,
131
+ vectorizable = true,
132
+ masked_load_available = false,
133
+ masked_store_available = false
134
+ };
135
+ typedef Packet4i half;
136
+ };
137
+
138
+ template <>
139
+ EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) {
140
+ EIGEN_MSA_DEBUG;
141
+
142
+ Packet4f v = {from, from, from, from};
143
+ return v;
144
+ }
145
+
146
+ template <>
147
+ EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int32_t& from) {
148
+ EIGEN_MSA_DEBUG;
149
+
150
+ return __builtin_msa_fill_w(from);
151
+ }
152
+
153
+ template <>
154
+ EIGEN_STRONG_INLINE Packet4f pload1<Packet4f>(const float* from) {
155
+ EIGEN_MSA_DEBUG;
156
+
157
+ float f = *from;
158
+ Packet4f v = {f, f, f, f};
159
+ return v;
160
+ }
161
+
162
+ template <>
163
+ EIGEN_STRONG_INLINE Packet4i pload1<Packet4i>(const int32_t* from) {
164
+ EIGEN_MSA_DEBUG;
165
+
166
+ return __builtin_msa_fill_w(*from);
167
+ }
168
+
169
+ template <>
170
+ EIGEN_STRONG_INLINE Packet4f padd<Packet4f>(const Packet4f& a, const Packet4f& b) {
171
+ EIGEN_MSA_DEBUG;
172
+
173
+ return __builtin_msa_fadd_w(a, b);
174
+ }
175
+
176
+ template <>
177
+ EIGEN_STRONG_INLINE Packet4i padd<Packet4i>(const Packet4i& a, const Packet4i& b) {
178
+ EIGEN_MSA_DEBUG;
179
+
180
+ return __builtin_msa_addv_w(a, b);
181
+ }
182
+
183
+ template <>
184
+ EIGEN_STRONG_INLINE Packet4f plset<Packet4f>(const float& a) {
185
+ EIGEN_MSA_DEBUG;
186
+
187
+ static const Packet4f countdown = {0.0f, 1.0f, 2.0f, 3.0f};
188
+ return padd(pset1<Packet4f>(a), countdown);
189
+ }
190
+
191
+ template <>
192
+ EIGEN_STRONG_INLINE Packet4i plset<Packet4i>(const int32_t& a) {
193
+ EIGEN_MSA_DEBUG;
194
+
195
+ static const Packet4i countdown = {0, 1, 2, 3};
196
+ return padd(pset1<Packet4i>(a), countdown);
197
+ }
198
+
199
+ template <>
200
+ EIGEN_STRONG_INLINE Packet4f psub<Packet4f>(const Packet4f& a, const Packet4f& b) {
201
+ EIGEN_MSA_DEBUG;
202
+
203
+ return __builtin_msa_fsub_w(a, b);
204
+ }
205
+
206
+ template <>
207
+ EIGEN_STRONG_INLINE Packet4i psub<Packet4i>(const Packet4i& a, const Packet4i& b) {
208
+ EIGEN_MSA_DEBUG;
209
+
210
+ return __builtin_msa_subv_w(a, b);
211
+ }
212
+
213
+ template <>
214
+ EIGEN_STRONG_INLINE Packet4f pnegate(const Packet4f& a) {
215
+ EIGEN_MSA_DEBUG;
216
+
217
+ return (Packet4f)__builtin_msa_bnegi_w((v4u32)a, 31);
218
+ }
219
+
220
+ template <>
221
+ EIGEN_STRONG_INLINE Packet4i pnegate(const Packet4i& a) {
222
+ EIGEN_MSA_DEBUG;
223
+
224
+ return __builtin_msa_addvi_w((v4i32)__builtin_msa_nori_b((v16u8)a, 0), 1);
225
+ }
226
+
227
+ template <>
228
+ EIGEN_STRONG_INLINE Packet4f pconj(const Packet4f& a) {
229
+ EIGEN_MSA_DEBUG;
230
+
231
+ return a;
232
+ }
233
+
234
+ template <>
235
+ EIGEN_STRONG_INLINE Packet4i pconj(const Packet4i& a) {
236
+ EIGEN_MSA_DEBUG;
237
+
238
+ return a;
239
+ }
240
+
241
+ template <>
242
+ EIGEN_STRONG_INLINE Packet4f pmul<Packet4f>(const Packet4f& a, const Packet4f& b) {
243
+ EIGEN_MSA_DEBUG;
244
+
245
+ return __builtin_msa_fmul_w(a, b);
246
+ }
247
+
248
+ template <>
249
+ EIGEN_STRONG_INLINE Packet4i pmul<Packet4i>(const Packet4i& a, const Packet4i& b) {
250
+ EIGEN_MSA_DEBUG;
251
+
252
+ return __builtin_msa_mulv_w(a, b);
253
+ }
254
+
255
+ template <>
256
+ EIGEN_STRONG_INLINE Packet4f pdiv<Packet4f>(const Packet4f& a, const Packet4f& b) {
257
+ EIGEN_MSA_DEBUG;
258
+
259
+ return __builtin_msa_fdiv_w(a, b);
260
+ }
261
+
262
+ template <>
263
+ EIGEN_STRONG_INLINE Packet4i pdiv<Packet4i>(const Packet4i& a, const Packet4i& b) {
264
+ EIGEN_MSA_DEBUG;
265
+
266
+ return __builtin_msa_div_s_w(a, b);
267
+ }
268
+
269
+ template <>
270
+ EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) {
271
+ EIGEN_MSA_DEBUG;
272
+
273
+ return __builtin_msa_fmadd_w(c, a, b);
274
+ }
275
+
276
+ template <>
277
+ EIGEN_STRONG_INLINE Packet4i pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) {
278
+ EIGEN_MSA_DEBUG;
279
+
280
+ // Use "asm" construct to avoid __builtin_msa_maddv_w GNU C bug.
281
+ Packet4i value = c;
282
+ __asm__("maddv.w %w[value], %w[a], %w[b]\n"
283
+ // Outputs
284
+ : [value] "+f"(value)
285
+ // Inputs
286
+ : [a] "f"(a), [b] "f"(b));
287
+ return value;
288
+ }
289
+
290
+ template <>
291
+ EIGEN_STRONG_INLINE Packet4f pand<Packet4f>(const Packet4f& a, const Packet4f& b) {
292
+ EIGEN_MSA_DEBUG;
293
+
294
+ return (Packet4f)__builtin_msa_and_v((v16u8)a, (v16u8)b);
295
+ }
296
+
297
+ template <>
298
+ EIGEN_STRONG_INLINE Packet4i pand<Packet4i>(const Packet4i& a, const Packet4i& b) {
299
+ EIGEN_MSA_DEBUG;
300
+
301
+ return (Packet4i)__builtin_msa_and_v((v16u8)a, (v16u8)b);
302
+ }
303
+
304
+ template <>
305
+ EIGEN_STRONG_INLINE Packet4f por<Packet4f>(const Packet4f& a, const Packet4f& b) {
306
+ EIGEN_MSA_DEBUG;
307
+
308
+ return (Packet4f)__builtin_msa_or_v((v16u8)a, (v16u8)b);
309
+ }
310
+
311
+ template <>
312
+ EIGEN_STRONG_INLINE Packet4i por<Packet4i>(const Packet4i& a, const Packet4i& b) {
313
+ EIGEN_MSA_DEBUG;
314
+
315
+ return (Packet4i)__builtin_msa_or_v((v16u8)a, (v16u8)b);
316
+ }
317
+
318
+ template <>
319
+ EIGEN_STRONG_INLINE Packet4f pxor<Packet4f>(const Packet4f& a, const Packet4f& b) {
320
+ EIGEN_MSA_DEBUG;
321
+
322
+ return (Packet4f)__builtin_msa_xor_v((v16u8)a, (v16u8)b);
323
+ }
324
+
325
+ template <>
326
+ EIGEN_STRONG_INLINE Packet4i pxor<Packet4i>(const Packet4i& a, const Packet4i& b) {
327
+ EIGEN_MSA_DEBUG;
328
+
329
+ return (Packet4i)__builtin_msa_xor_v((v16u8)a, (v16u8)b);
330
+ }
331
+
332
+ template <>
333
+ EIGEN_STRONG_INLINE Packet4f pandnot<Packet4f>(const Packet4f& a, const Packet4f& b) {
334
+ EIGEN_MSA_DEBUG;
335
+
336
+ return pand(a, (Packet4f)__builtin_msa_xori_b((v16u8)b, 255));
337
+ }
338
+
339
+ template <>
340
+ EIGEN_STRONG_INLINE Packet4i pandnot<Packet4i>(const Packet4i& a, const Packet4i& b) {
341
+ EIGEN_MSA_DEBUG;
342
+
343
+ return pand(a, (Packet4i)__builtin_msa_xori_b((v16u8)b, 255));
344
+ }
345
+
346
+ template <>
347
+ EIGEN_STRONG_INLINE Packet4f pmin<Packet4f>(const Packet4f& a, const Packet4f& b) {
348
+ EIGEN_MSA_DEBUG;
349
+
350
+ #if EIGEN_FAST_MATH
351
+ // This prefers numbers to NaNs.
352
+ return __builtin_msa_fmin_w(a, b);
353
+ #else
354
+ // This prefers NaNs to numbers.
355
+ Packet4i aNaN = __builtin_msa_fcun_w(a, a);
356
+ Packet4i aMinOrNaN = por(__builtin_msa_fclt_w(a, b), aNaN);
357
+ return (Packet4f)__builtin_msa_bsel_v((v16u8)aMinOrNaN, (v16u8)b, (v16u8)a);
358
+ #endif
359
+ }
360
+
361
+ template <>
362
+ EIGEN_STRONG_INLINE Packet4i pmin<Packet4i>(const Packet4i& a, const Packet4i& b) {
363
+ EIGEN_MSA_DEBUG;
364
+
365
+ return __builtin_msa_min_s_w(a, b);
366
+ }
367
+
368
+ template <>
369
+ EIGEN_STRONG_INLINE Packet4f pmax<Packet4f>(const Packet4f& a, const Packet4f& b) {
370
+ EIGEN_MSA_DEBUG;
371
+
372
+ #if EIGEN_FAST_MATH
373
+ // This prefers numbers to NaNs.
374
+ return __builtin_msa_fmax_w(a, b);
375
+ #else
376
+ // This prefers NaNs to numbers.
377
+ Packet4i aNaN = __builtin_msa_fcun_w(a, a);
378
+ Packet4i aMaxOrNaN = por(__builtin_msa_fclt_w(b, a), aNaN);
379
+ return (Packet4f)__builtin_msa_bsel_v((v16u8)aMaxOrNaN, (v16u8)b, (v16u8)a);
380
+ #endif
381
+ }
382
+
383
+ template <>
384
+ EIGEN_STRONG_INLINE Packet4i pmax<Packet4i>(const Packet4i& a, const Packet4i& b) {
385
+ EIGEN_MSA_DEBUG;
386
+
387
+ return __builtin_msa_max_s_w(a, b);
388
+ }
389
+
390
+ template <>
391
+ EIGEN_STRONG_INLINE Packet4f pload<Packet4f>(const float* from) {
392
+ EIGEN_MSA_DEBUG;
393
+
394
+ EIGEN_DEBUG_ALIGNED_LOAD return (Packet4f)__builtin_msa_ld_w(const_cast<float*>(from), 0);
395
+ }
396
+
397
+ template <>
398
+ EIGEN_STRONG_INLINE Packet4i pload<Packet4i>(const int32_t* from) {
399
+ EIGEN_MSA_DEBUG;
400
+
401
+ EIGEN_DEBUG_ALIGNED_LOAD return __builtin_msa_ld_w(const_cast<int32_t*>(from), 0);
402
+ }
403
+
404
+ template <>
405
+ EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float* from) {
406
+ EIGEN_MSA_DEBUG;
407
+
408
+ EIGEN_DEBUG_UNALIGNED_LOAD return (Packet4f)__builtin_msa_ld_w(const_cast<float*>(from), 0);
409
+ }
410
+
411
+ template <>
412
+ EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int32_t* from) {
413
+ EIGEN_MSA_DEBUG;
414
+
415
+ EIGEN_DEBUG_UNALIGNED_LOAD return (Packet4i)__builtin_msa_ld_w(const_cast<int32_t*>(from), 0);
416
+ }
417
+
418
+ template <>
419
+ EIGEN_STRONG_INLINE Packet4f ploaddup<Packet4f>(const float* from) {
420
+ EIGEN_MSA_DEBUG;
421
+
422
+ float f0 = from[0], f1 = from[1];
423
+ Packet4f v0 = {f0, f0, f0, f0};
424
+ Packet4f v1 = {f1, f1, f1, f1};
425
+ return (Packet4f)__builtin_msa_ilvr_d((v2i64)v1, (v2i64)v0);
426
+ }
427
+
428
+ template <>
429
+ EIGEN_STRONG_INLINE Packet4i ploaddup<Packet4i>(const int32_t* from) {
430
+ EIGEN_MSA_DEBUG;
431
+
432
+ int32_t i0 = from[0], i1 = from[1];
433
+ Packet4i v0 = {i0, i0, i0, i0};
434
+ Packet4i v1 = {i1, i1, i1, i1};
435
+ return (Packet4i)__builtin_msa_ilvr_d((v2i64)v1, (v2i64)v0);
436
+ }
437
+
438
+ template <>
439
+ EIGEN_STRONG_INLINE void pstore<float>(float* to, const Packet4f& from) {
440
+ EIGEN_MSA_DEBUG;
441
+
442
+ EIGEN_DEBUG_ALIGNED_STORE __builtin_msa_st_w((Packet4i)from, to, 0);
443
+ }
444
+
445
+ template <>
446
+ EIGEN_STRONG_INLINE void pstore<int32_t>(int32_t* to, const Packet4i& from) {
447
+ EIGEN_MSA_DEBUG;
448
+
449
+ EIGEN_DEBUG_ALIGNED_STORE __builtin_msa_st_w(from, to, 0);
450
+ }
451
+
452
+ template <>
453
+ EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet4f& from) {
454
+ EIGEN_MSA_DEBUG;
455
+
456
+ EIGEN_DEBUG_UNALIGNED_STORE __builtin_msa_st_w((Packet4i)from, to, 0);
457
+ }
458
+
459
+ template <>
460
+ EIGEN_STRONG_INLINE void pstoreu<int32_t>(int32_t* to, const Packet4i& from) {
461
+ EIGEN_MSA_DEBUG;
462
+
463
+ EIGEN_DEBUG_UNALIGNED_STORE __builtin_msa_st_w(from, to, 0);
464
+ }
465
+
466
+ template <>
467
+ EIGEN_DEVICE_FUNC inline Packet4f pgather<float, Packet4f>(const float* from, Index stride) {
468
+ EIGEN_MSA_DEBUG;
469
+
470
+ float f = *from;
471
+ Packet4f v = {f, f, f, f};
472
+ v[1] = from[stride];
473
+ v[2] = from[2 * stride];
474
+ v[3] = from[3 * stride];
475
+ return v;
476
+ }
477
+
478
+ template <>
479
+ EIGEN_DEVICE_FUNC inline Packet4i pgather<int32_t, Packet4i>(const int32_t* from, Index stride) {
480
+ EIGEN_MSA_DEBUG;
481
+
482
+ int32_t i = *from;
483
+ Packet4i v = {i, i, i, i};
484
+ v[1] = from[stride];
485
+ v[2] = from[2 * stride];
486
+ v[3] = from[3 * stride];
487
+ return v;
488
+ }
489
+
490
+ template <>
491
+ EIGEN_DEVICE_FUNC inline void pscatter<float, Packet4f>(float* to, const Packet4f& from, Index stride) {
492
+ EIGEN_MSA_DEBUG;
493
+
494
+ *to = from[0];
495
+ to += stride;
496
+ *to = from[1];
497
+ to += stride;
498
+ *to = from[2];
499
+ to += stride;
500
+ *to = from[3];
501
+ }
502
+
503
+ template <>
504
+ EIGEN_DEVICE_FUNC inline void pscatter<int32_t, Packet4i>(int32_t* to, const Packet4i& from, Index stride) {
505
+ EIGEN_MSA_DEBUG;
506
+
507
+ *to = from[0];
508
+ to += stride;
509
+ *to = from[1];
510
+ to += stride;
511
+ *to = from[2];
512
+ to += stride;
513
+ *to = from[3];
514
+ }
515
+
516
+ template <>
517
+ EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) {
518
+ EIGEN_MSA_DEBUG;
519
+
520
+ __builtin_prefetch(addr);
521
+ }
522
+
523
+ template <>
524
+ EIGEN_STRONG_INLINE void prefetch<int32_t>(const int32_t* addr) {
525
+ EIGEN_MSA_DEBUG;
526
+
527
+ __builtin_prefetch(addr);
528
+ }
529
+
530
+ template <>
531
+ EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) {
532
+ EIGEN_MSA_DEBUG;
533
+
534
+ return a[0];
535
+ }
536
+
537
+ template <>
538
+ EIGEN_STRONG_INLINE int32_t pfirst<Packet4i>(const Packet4i& a) {
539
+ EIGEN_MSA_DEBUG;
540
+
541
+ return a[0];
542
+ }
543
+
544
+ template <>
545
+ EIGEN_STRONG_INLINE Packet4f preverse(const Packet4f& a) {
546
+ EIGEN_MSA_DEBUG;
547
+
548
+ return (Packet4f)__builtin_msa_shf_w((v4i32)a, EIGEN_MSA_SHF_I8(3, 2, 1, 0));
549
+ }
550
+
551
+ template <>
552
+ EIGEN_STRONG_INLINE Packet4i preverse(const Packet4i& a) {
553
+ EIGEN_MSA_DEBUG;
554
+
555
+ return __builtin_msa_shf_w(a, EIGEN_MSA_SHF_I8(3, 2, 1, 0));
556
+ }
557
+
558
+ template <>
559
+ EIGEN_STRONG_INLINE Packet4f pabs(const Packet4f& a) {
560
+ EIGEN_MSA_DEBUG;
561
+
562
+ return (Packet4f)__builtin_msa_bclri_w((v4u32)a, 31);
563
+ }
564
+
565
+ template <>
566
+ EIGEN_STRONG_INLINE Packet4i pabs(const Packet4i& a) {
567
+ EIGEN_MSA_DEBUG;
568
+
569
+ Packet4i zero = __builtin_msa_ldi_w(0);
570
+ return __builtin_msa_add_a_w(zero, a);
571
+ }
572
+
573
+ template <>
574
+ EIGEN_STRONG_INLINE float predux<Packet4f>(const Packet4f& a) {
575
+ EIGEN_MSA_DEBUG;
576
+
577
+ Packet4f s = padd(a, (Packet4f)__builtin_msa_shf_w((v4i32)a, EIGEN_MSA_SHF_I8(2, 3, 0, 1)));
578
+ s = padd(s, (Packet4f)__builtin_msa_shf_w((v4i32)s, EIGEN_MSA_SHF_I8(1, 0, 3, 2)));
579
+ return s[0];
580
+ }
581
+
582
+ template <>
583
+ EIGEN_STRONG_INLINE int32_t predux<Packet4i>(const Packet4i& a) {
584
+ EIGEN_MSA_DEBUG;
585
+
586
+ Packet4i s = padd(a, __builtin_msa_shf_w(a, EIGEN_MSA_SHF_I8(2, 3, 0, 1)));
587
+ s = padd(s, __builtin_msa_shf_w(s, EIGEN_MSA_SHF_I8(1, 0, 3, 2)));
588
+ return s[0];
589
+ }
590
+
591
+ // Other reduction functions:
592
+ // mul
593
+ template <>
594
+ EIGEN_STRONG_INLINE float predux_mul<Packet4f>(const Packet4f& a) {
595
+ EIGEN_MSA_DEBUG;
596
+
597
+ Packet4f p = pmul(a, (Packet4f)__builtin_msa_shf_w((v4i32)a, EIGEN_MSA_SHF_I8(2, 3, 0, 1)));
598
+ p = pmul(p, (Packet4f)__builtin_msa_shf_w((v4i32)p, EIGEN_MSA_SHF_I8(1, 0, 3, 2)));
599
+ return p[0];
600
+ }
601
+
602
+ template <>
603
+ EIGEN_STRONG_INLINE int32_t predux_mul<Packet4i>(const Packet4i& a) {
604
+ EIGEN_MSA_DEBUG;
605
+
606
+ Packet4i p = pmul(a, __builtin_msa_shf_w(a, EIGEN_MSA_SHF_I8(2, 3, 0, 1)));
607
+ p = pmul(p, __builtin_msa_shf_w(p, EIGEN_MSA_SHF_I8(1, 0, 3, 2)));
608
+ return p[0];
609
+ }
610
+
611
+ // min
612
+ template <>
613
+ EIGEN_STRONG_INLINE float predux_min<Packet4f>(const Packet4f& a) {
614
+ EIGEN_MSA_DEBUG;
615
+
616
+ // Swap 64-bit halves of a.
617
+ Packet4f swapped = (Packet4f)__builtin_msa_shf_w((Packet4i)a, EIGEN_MSA_SHF_I8(2, 3, 0, 1));
618
+ #if !EIGEN_FAST_MATH
619
+ // Detect presence of NaNs from pairs a[0]-a[2] and a[1]-a[3] as two 32-bit
620
+ // masks of all zeroes/ones in low 64 bits.
621
+ v16u8 unord = (v16u8)__builtin_msa_fcun_w(a, swapped);
622
+ // Combine the two masks into one: 64 ones if no NaNs, otherwise 64 zeroes.
623
+ unord = (v16u8)__builtin_msa_ceqi_d((v2i64)unord, 0);
624
+ #endif
625
+ // Continue with min computation.
626
+ Packet4f v = __builtin_msa_fmin_w(a, swapped);
627
+ v = __builtin_msa_fmin_w(v, (Packet4f)__builtin_msa_shf_w((Packet4i)v, EIGEN_MSA_SHF_I8(1, 0, 3, 2)));
628
+ #if !EIGEN_FAST_MATH
629
+ // Based on the mask select between v and 4 qNaNs.
630
+ v16u8 qnans = (v16u8)__builtin_msa_fill_w(0x7FC00000);
631
+ v = (Packet4f)__builtin_msa_bsel_v(unord, qnans, (v16u8)v);
632
+ #endif
633
+ return v[0];
634
+ }
635
+
636
+ template <>
637
+ EIGEN_STRONG_INLINE int32_t predux_min<Packet4i>(const Packet4i& a) {
638
+ EIGEN_MSA_DEBUG;
639
+
640
+ Packet4i m = pmin(a, __builtin_msa_shf_w(a, EIGEN_MSA_SHF_I8(2, 3, 0, 1)));
641
+ m = pmin(m, __builtin_msa_shf_w(m, EIGEN_MSA_SHF_I8(1, 0, 3, 2)));
642
+ return m[0];
643
+ }
644
+
645
+ // max
646
+ template <>
647
+ EIGEN_STRONG_INLINE float predux_max<Packet4f>(const Packet4f& a) {
648
+ EIGEN_MSA_DEBUG;
649
+
650
+ // Swap 64-bit halves of a.
651
+ Packet4f swapped = (Packet4f)__builtin_msa_shf_w((Packet4i)a, EIGEN_MSA_SHF_I8(2, 3, 0, 1));
652
+ #if !EIGEN_FAST_MATH
653
+ // Detect presence of NaNs from pairs a[0]-a[2] and a[1]-a[3] as two 32-bit
654
+ // masks of all zeroes/ones in low 64 bits.
655
+ v16u8 unord = (v16u8)__builtin_msa_fcun_w(a, swapped);
656
+ // Combine the two masks into one: 64 ones if no NaNs, otherwise 64 zeroes.
657
+ unord = (v16u8)__builtin_msa_ceqi_d((v2i64)unord, 0);
658
+ #endif
659
+ // Continue with max computation.
660
+ Packet4f v = __builtin_msa_fmax_w(a, swapped);
661
+ v = __builtin_msa_fmax_w(v, (Packet4f)__builtin_msa_shf_w((Packet4i)v, EIGEN_MSA_SHF_I8(1, 0, 3, 2)));
662
+ #if !EIGEN_FAST_MATH
663
+ // Based on the mask select between v and 4 qNaNs.
664
+ v16u8 qnans = (v16u8)__builtin_msa_fill_w(0x7FC00000);
665
+ v = (Packet4f)__builtin_msa_bsel_v(unord, qnans, (v16u8)v);
666
+ #endif
667
+ return v[0];
668
+ }
669
+
670
+ template <>
671
+ EIGEN_STRONG_INLINE int32_t predux_max<Packet4i>(const Packet4i& a) {
672
+ EIGEN_MSA_DEBUG;
673
+
674
+ Packet4i m = pmax(a, __builtin_msa_shf_w(a, EIGEN_MSA_SHF_I8(2, 3, 0, 1)));
675
+ m = pmax(m, __builtin_msa_shf_w(m, EIGEN_MSA_SHF_I8(1, 0, 3, 2)));
676
+ return m[0];
677
+ }
678
+
679
+ inline std::ostream& operator<<(std::ostream& os, const PacketBlock<Packet4f, 4>& value) {
680
+ os << "[ " << value.packet[0] << "," << std::endl
681
+ << " " << value.packet[1] << "," << std::endl
682
+ << " " << value.packet[2] << "," << std::endl
683
+ << " " << value.packet[3] << " ]";
684
+ return os;
685
+ }
686
+
687
+ EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet4f, 4>& kernel) {
688
+ EIGEN_MSA_DEBUG;
689
+
690
+ v4i32 tmp1, tmp2, tmp3, tmp4;
691
+
692
+ tmp1 = __builtin_msa_ilvr_w((v4i32)kernel.packet[1], (v4i32)kernel.packet[0]);
693
+ tmp2 = __builtin_msa_ilvr_w((v4i32)kernel.packet[3], (v4i32)kernel.packet[2]);
694
+ tmp3 = __builtin_msa_ilvl_w((v4i32)kernel.packet[1], (v4i32)kernel.packet[0]);
695
+ tmp4 = __builtin_msa_ilvl_w((v4i32)kernel.packet[3], (v4i32)kernel.packet[2]);
696
+
697
+ kernel.packet[0] = (Packet4f)__builtin_msa_ilvr_d((v2i64)tmp2, (v2i64)tmp1);
698
+ kernel.packet[1] = (Packet4f)__builtin_msa_ilvod_d((v2i64)tmp2, (v2i64)tmp1);
699
+ kernel.packet[2] = (Packet4f)__builtin_msa_ilvr_d((v2i64)tmp4, (v2i64)tmp3);
700
+ kernel.packet[3] = (Packet4f)__builtin_msa_ilvod_d((v2i64)tmp4, (v2i64)tmp3);
701
+ }
702
+
703
+ inline std::ostream& operator<<(std::ostream& os, const PacketBlock<Packet4i, 4>& value) {
704
+ os << "[ " << value.packet[0] << "," << std::endl
705
+ << " " << value.packet[1] << "," << std::endl
706
+ << " " << value.packet[2] << "," << std::endl
707
+ << " " << value.packet[3] << " ]";
708
+ return os;
709
+ }
710
+
711
+ EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet4i, 4>& kernel) {
712
+ EIGEN_MSA_DEBUG;
713
+
714
+ v4i32 tmp1, tmp2, tmp3, tmp4;
715
+
716
+ tmp1 = __builtin_msa_ilvr_w(kernel.packet[1], kernel.packet[0]);
717
+ tmp2 = __builtin_msa_ilvr_w(kernel.packet[3], kernel.packet[2]);
718
+ tmp3 = __builtin_msa_ilvl_w(kernel.packet[1], kernel.packet[0]);
719
+ tmp4 = __builtin_msa_ilvl_w(kernel.packet[3], kernel.packet[2]);
720
+
721
+ kernel.packet[0] = (Packet4i)__builtin_msa_ilvr_d((v2i64)tmp2, (v2i64)tmp1);
722
+ kernel.packet[1] = (Packet4i)__builtin_msa_ilvod_d((v2i64)tmp2, (v2i64)tmp1);
723
+ kernel.packet[2] = (Packet4i)__builtin_msa_ilvr_d((v2i64)tmp4, (v2i64)tmp3);
724
+ kernel.packet[3] = (Packet4i)__builtin_msa_ilvod_d((v2i64)tmp4, (v2i64)tmp3);
725
+ }
726
+
727
+ template <>
728
+ EIGEN_STRONG_INLINE Packet4f psqrt(const Packet4f& a) {
729
+ EIGEN_MSA_DEBUG;
730
+
731
+ return __builtin_msa_fsqrt_w(a);
732
+ }
733
+
734
+ template <>
735
+ EIGEN_STRONG_INLINE Packet4f prsqrt(const Packet4f& a) {
736
+ EIGEN_MSA_DEBUG;
737
+
738
+ #if EIGEN_FAST_MATH
739
+ return __builtin_msa_frsqrt_w(a);
740
+ #else
741
+ Packet4f ones = __builtin_msa_ffint_s_w(__builtin_msa_ldi_w(1));
742
+ return pdiv(ones, psqrt(a));
743
+ #endif
744
+ }
745
+
746
+ template <>
747
+ EIGEN_STRONG_INLINE Packet4f pfloor<Packet4f>(const Packet4f& a) {
748
+ Packet4f v = a;
749
+ int32_t old_mode, new_mode;
750
+ asm volatile(
751
+ "cfcmsa %[old_mode], $1\n"
752
+ "ori %[new_mode], %[old_mode], 3\n" // 3 = round towards -INFINITY.
753
+ "ctcmsa $1, %[new_mode]\n"
754
+ "frint.w %w[v], %w[v]\n"
755
+ "ctcmsa $1, %[old_mode]\n"
756
+ : // outputs
757
+ [old_mode] "=r"(old_mode), [new_mode] "=r"(new_mode),
758
+ [v] "+f"(v)
759
+ : // inputs
760
+ : // clobbers
761
+ );
762
+ return v;
763
+ }
764
+
765
+ template <>
766
+ EIGEN_STRONG_INLINE Packet4f pceil<Packet4f>(const Packet4f& a) {
767
+ Packet4f v = a;
768
+ int32_t old_mode, new_mode;
769
+ asm volatile(
770
+ "cfcmsa %[old_mode], $1\n"
771
+ "ori %[new_mode], %[old_mode], 3\n"
772
+ "xori %[new_mode], %[new_mode], 1\n" // 2 = round towards +INFINITY.
773
+ "ctcmsa $1, %[new_mode]\n"
774
+ "frint.w %w[v], %w[v]\n"
775
+ "ctcmsa $1, %[old_mode]\n"
776
+ : // outputs
777
+ [old_mode] "=r"(old_mode), [new_mode] "=r"(new_mode),
778
+ [v] "+f"(v)
779
+ : // inputs
780
+ : // clobbers
781
+ );
782
+ return v;
783
+ }
784
+
785
+ template <>
786
+ EIGEN_STRONG_INLINE Packet4f pround<Packet4f>(const Packet4f& a) {
787
+ Packet4f v = a;
788
+ int32_t old_mode, new_mode;
789
+ asm volatile(
790
+ "cfcmsa %[old_mode], $1\n"
791
+ "ori %[new_mode], %[old_mode], 3\n"
792
+ "xori %[new_mode], %[new_mode], 3\n" // 0 = round to nearest, ties to even.
793
+ "ctcmsa $1, %[new_mode]\n"
794
+ "frint.w %w[v], %w[v]\n"
795
+ "ctcmsa $1, %[old_mode]\n"
796
+ : // outputs
797
+ [old_mode] "=r"(old_mode), [new_mode] "=r"(new_mode),
798
+ [v] "+f"(v)
799
+ : // inputs
800
+ : // clobbers
801
+ );
802
+ return v;
803
+ }
804
+
805
+ template <>
806
+ EIGEN_STRONG_INLINE Packet4f pblend(const Selector<4>& ifPacket, const Packet4f& thenPacket,
807
+ const Packet4f& elsePacket) {
808
+ Packet4ui select = {ifPacket.select[0], ifPacket.select[1], ifPacket.select[2], ifPacket.select[3]};
809
+ Packet4i mask = __builtin_msa_ceqi_w((Packet4i)select, 0);
810
+ return (Packet4f)__builtin_msa_bsel_v((v16u8)mask, (v16u8)thenPacket, (v16u8)elsePacket);
811
+ }
812
+
813
+ template <>
814
+ EIGEN_STRONG_INLINE Packet4i pblend(const Selector<4>& ifPacket, const Packet4i& thenPacket,
815
+ const Packet4i& elsePacket) {
816
+ Packet4ui select = {ifPacket.select[0], ifPacket.select[1], ifPacket.select[2], ifPacket.select[3]};
817
+ Packet4i mask = __builtin_msa_ceqi_w((Packet4i)select, 0);
818
+ return (Packet4i)__builtin_msa_bsel_v((v16u8)mask, (v16u8)thenPacket, (v16u8)elsePacket);
819
+ }
820
+
821
+ //---------- double ----------
822
+
823
+ typedef v2f64 Packet2d;
824
+ typedef v2i64 Packet2l;
825
+ typedef v2u64 Packet2ul;
826
+
827
+ #define EIGEN_DECLARE_CONST_Packet2d(NAME, X) const Packet2d p2d_##NAME = {X, X}
828
+ #define EIGEN_DECLARE_CONST_Packet2l(NAME, X) const Packet2l p2l_##NAME = {X, X}
829
+ #define EIGEN_DECLARE_CONST_Packet2ul(NAME, X) const Packet2ul p2ul_##NAME = {X, X}
830
+
831
+ inline std::ostream& operator<<(std::ostream& os, const Packet2d& value) {
832
+ os << "[ " << value[0] << ", " << value[1] << " ]";
833
+ return os;
834
+ }
835
+
836
+ inline std::ostream& operator<<(std::ostream& os, const Packet2l& value) {
837
+ os << "[ " << value[0] << ", " << value[1] << " ]";
838
+ return os;
839
+ }
840
+
841
+ inline std::ostream& operator<<(std::ostream& os, const Packet2ul& value) {
842
+ os << "[ " << value[0] << ", " << value[1] << " ]";
843
+ return os;
844
+ }
845
+
846
+ template <>
847
+ struct packet_traits<double> : default_packet_traits {
848
+ typedef Packet2d type;
849
+ typedef Packet2d half;
850
+ enum {
851
+ Vectorizable = 1,
852
+ AlignedOnScalar = 1,
853
+ size = 2,
854
+ // FIXME check the Has*
855
+ HasDiv = 1,
856
+ HasExp = 1,
857
+ HasSqrt = 1,
858
+ HasRsqrt = 1,
859
+ HasBlend = 1
860
+ };
861
+ };
862
+
863
+ template <>
864
+ struct unpacket_traits<Packet2d> {
865
+ typedef double type;
866
+ enum {
867
+ size = 2,
868
+ alignment = Aligned16,
869
+ vectorizable = true,
870
+ masked_load_available = false,
871
+ masked_store_available = false
872
+ };
873
+ typedef Packet2d half;
874
+ };
875
+
876
+ template <>
877
+ EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) {
878
+ EIGEN_MSA_DEBUG;
879
+
880
+ Packet2d value = {from, from};
881
+ return value;
882
+ }
883
+
884
+ template <>
885
+ EIGEN_STRONG_INLINE Packet2d padd<Packet2d>(const Packet2d& a, const Packet2d& b) {
886
+ EIGEN_MSA_DEBUG;
887
+
888
+ return __builtin_msa_fadd_d(a, b);
889
+ }
890
+
891
+ template <>
892
+ EIGEN_STRONG_INLINE Packet2d plset<Packet2d>(const double& a) {
893
+ EIGEN_MSA_DEBUG;
894
+
895
+ static const Packet2d countdown = {0.0, 1.0};
896
+ return padd(pset1<Packet2d>(a), countdown);
897
+ }
898
+
899
+ template <>
900
+ EIGEN_STRONG_INLINE Packet2d psub<Packet2d>(const Packet2d& a, const Packet2d& b) {
901
+ EIGEN_MSA_DEBUG;
902
+
903
+ return __builtin_msa_fsub_d(a, b);
904
+ }
905
+
906
+ template <>
907
+ EIGEN_STRONG_INLINE Packet2d pnegate(const Packet2d& a) {
908
+ EIGEN_MSA_DEBUG;
909
+
910
+ return (Packet2d)__builtin_msa_bnegi_d((v2u64)a, 63);
911
+ }
912
+
913
+ template <>
914
+ EIGEN_STRONG_INLINE Packet2d pconj(const Packet2d& a) {
915
+ EIGEN_MSA_DEBUG;
916
+
917
+ return a;
918
+ }
919
+
920
+ template <>
921
+ EIGEN_STRONG_INLINE Packet2d pmul<Packet2d>(const Packet2d& a, const Packet2d& b) {
922
+ EIGEN_MSA_DEBUG;
923
+
924
+ return __builtin_msa_fmul_d(a, b);
925
+ }
926
+
927
+ template <>
928
+ EIGEN_STRONG_INLINE Packet2d pdiv<Packet2d>(const Packet2d& a, const Packet2d& b) {
929
+ EIGEN_MSA_DEBUG;
930
+
931
+ return __builtin_msa_fdiv_d(a, b);
932
+ }
933
+
934
+ template <>
935
+ EIGEN_STRONG_INLINE Packet2d pmadd(const Packet2d& a, const Packet2d& b, const Packet2d& c) {
936
+ EIGEN_MSA_DEBUG;
937
+
938
+ return __builtin_msa_fmadd_d(c, a, b);
939
+ }
940
+
941
+ // Logical Operations are not supported for float, so we have to reinterpret casts using MSA
942
+ // intrinsics
943
+ template <>
944
+ EIGEN_STRONG_INLINE Packet2d pand<Packet2d>(const Packet2d& a, const Packet2d& b) {
945
+ EIGEN_MSA_DEBUG;
946
+
947
+ return (Packet2d)__builtin_msa_and_v((v16u8)a, (v16u8)b);
948
+ }
949
+
950
+ template <>
951
+ EIGEN_STRONG_INLINE Packet2d por<Packet2d>(const Packet2d& a, const Packet2d& b) {
952
+ EIGEN_MSA_DEBUG;
953
+
954
+ return (Packet2d)__builtin_msa_or_v((v16u8)a, (v16u8)b);
955
+ }
956
+
957
+ template <>
958
+ EIGEN_STRONG_INLINE Packet2d pxor<Packet2d>(const Packet2d& a, const Packet2d& b) {
959
+ EIGEN_MSA_DEBUG;
960
+
961
+ return (Packet2d)__builtin_msa_xor_v((v16u8)a, (v16u8)b);
962
+ }
963
+
964
+ template <>
965
+ EIGEN_STRONG_INLINE Packet2d pandnot<Packet2d>(const Packet2d& a, const Packet2d& b) {
966
+ EIGEN_MSA_DEBUG;
967
+
968
+ return pand(a, (Packet2d)__builtin_msa_xori_b((v16u8)b, 255));
969
+ }
970
+
971
+ template <>
972
+ EIGEN_STRONG_INLINE Packet2d pload<Packet2d>(const double* from) {
973
+ EIGEN_MSA_DEBUG;
974
+
975
+ EIGEN_DEBUG_UNALIGNED_LOAD return (Packet2d)__builtin_msa_ld_d(const_cast<double*>(from), 0);
976
+ }
977
+
978
+ template <>
979
+ EIGEN_STRONG_INLINE Packet2d pmin<Packet2d>(const Packet2d& a, const Packet2d& b) {
980
+ EIGEN_MSA_DEBUG;
981
+
982
+ #if EIGEN_FAST_MATH
983
+ // This prefers numbers to NaNs.
984
+ return __builtin_msa_fmin_d(a, b);
985
+ #else
986
+ // This prefers NaNs to numbers.
987
+ v2i64 aNaN = __builtin_msa_fcun_d(a, a);
988
+ v2i64 aMinOrNaN = por(__builtin_msa_fclt_d(a, b), aNaN);
989
+ return (Packet2d)__builtin_msa_bsel_v((v16u8)aMinOrNaN, (v16u8)b, (v16u8)a);
990
+ #endif
991
+ }
992
+
993
+ template <>
994
+ EIGEN_STRONG_INLINE Packet2d pmax<Packet2d>(const Packet2d& a, const Packet2d& b) {
995
+ EIGEN_MSA_DEBUG;
996
+
997
+ #if EIGEN_FAST_MATH
998
+ // This prefers numbers to NaNs.
999
+ return __builtin_msa_fmax_d(a, b);
1000
+ #else
1001
+ // This prefers NaNs to numbers.
1002
+ v2i64 aNaN = __builtin_msa_fcun_d(a, a);
1003
+ v2i64 aMaxOrNaN = por(__builtin_msa_fclt_d(b, a), aNaN);
1004
+ return (Packet2d)__builtin_msa_bsel_v((v16u8)aMaxOrNaN, (v16u8)b, (v16u8)a);
1005
+ #endif
1006
+ }
1007
+
1008
+ template <>
1009
+ EIGEN_STRONG_INLINE Packet2d ploadu<Packet2d>(const double* from) {
1010
+ EIGEN_MSA_DEBUG;
1011
+
1012
+ EIGEN_DEBUG_UNALIGNED_LOAD return (Packet2d)__builtin_msa_ld_d(const_cast<double*>(from), 0);
1013
+ }
1014
+
1015
+ template <>
1016
+ EIGEN_STRONG_INLINE Packet2d ploaddup<Packet2d>(const double* from) {
1017
+ EIGEN_MSA_DEBUG;
1018
+
1019
+ Packet2d value = {*from, *from};
1020
+ return value;
1021
+ }
1022
+
1023
+ template <>
1024
+ EIGEN_STRONG_INLINE void pstore<double>(double* to, const Packet2d& from) {
1025
+ EIGEN_MSA_DEBUG;
1026
+
1027
+ EIGEN_DEBUG_ALIGNED_STORE __builtin_msa_st_d((v2i64)from, to, 0);
1028
+ }
1029
+
1030
+ template <>
1031
+ EIGEN_STRONG_INLINE void pstoreu<double>(double* to, const Packet2d& from) {
1032
+ EIGEN_MSA_DEBUG;
1033
+
1034
+ EIGEN_DEBUG_UNALIGNED_STORE __builtin_msa_st_d((v2i64)from, to, 0);
1035
+ }
1036
+
1037
+ template <>
1038
+ EIGEN_DEVICE_FUNC inline Packet2d pgather<double, Packet2d>(const double* from, Index stride) {
1039
+ EIGEN_MSA_DEBUG;
1040
+
1041
+ Packet2d value;
1042
+ value[0] = *from;
1043
+ from += stride;
1044
+ value[1] = *from;
1045
+ return value;
1046
+ }
1047
+
1048
+ template <>
1049
+ EIGEN_DEVICE_FUNC inline void pscatter<double, Packet2d>(double* to, const Packet2d& from, Index stride) {
1050
+ EIGEN_MSA_DEBUG;
1051
+
1052
+ *to = from[0];
1053
+ to += stride;
1054
+ *to = from[1];
1055
+ }
1056
+
1057
+ template <>
1058
+ EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) {
1059
+ EIGEN_MSA_DEBUG;
1060
+
1061
+ __builtin_prefetch(addr);
1062
+ }
1063
+
1064
+ template <>
1065
+ EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) {
1066
+ EIGEN_MSA_DEBUG;
1067
+
1068
+ return a[0];
1069
+ }
1070
+
1071
+ template <>
1072
+ EIGEN_STRONG_INLINE Packet2d preverse(const Packet2d& a) {
1073
+ EIGEN_MSA_DEBUG;
1074
+
1075
+ return (Packet2d)__builtin_msa_shf_w((v4i32)a, EIGEN_MSA_SHF_I8(2, 3, 0, 1));
1076
+ }
1077
+
1078
+ template <>
1079
+ EIGEN_STRONG_INLINE Packet2d pabs(const Packet2d& a) {
1080
+ EIGEN_MSA_DEBUG;
1081
+
1082
+ return (Packet2d)__builtin_msa_bclri_d((v2u64)a, 63);
1083
+ }
1084
+
1085
+ template <>
1086
+ EIGEN_STRONG_INLINE double predux<Packet2d>(const Packet2d& a) {
1087
+ EIGEN_MSA_DEBUG;
1088
+
1089
+ Packet2d s = padd(a, preverse(a));
1090
+ return s[0];
1091
+ }
1092
+
1093
+ // Other reduction functions:
1094
+ // mul
1095
+ template <>
1096
+ EIGEN_STRONG_INLINE double predux_mul<Packet2d>(const Packet2d& a) {
1097
+ EIGEN_MSA_DEBUG;
1098
+
1099
+ Packet2d p = pmul(a, preverse(a));
1100
+ return p[0];
1101
+ }
1102
+
1103
+ // min
1104
+ template <>
1105
+ EIGEN_STRONG_INLINE double predux_min<Packet2d>(const Packet2d& a) {
1106
+ EIGEN_MSA_DEBUG;
1107
+
1108
+ #if EIGEN_FAST_MATH
1109
+ Packet2d swapped = (Packet2d)__builtin_msa_shf_w((Packet4i)a, EIGEN_MSA_SHF_I8(2, 3, 0, 1));
1110
+ Packet2d v = __builtin_msa_fmin_d(a, swapped);
1111
+ return v[0];
1112
+ #else
1113
+ double a0 = a[0], a1 = a[1];
1114
+ return ((numext::isnan)(a0) || a0 < a1) ? a0 : a1;
1115
+ #endif
1116
+ }
1117
+
1118
+ // max
1119
+ template <>
1120
+ EIGEN_STRONG_INLINE double predux_max<Packet2d>(const Packet2d& a) {
1121
+ EIGEN_MSA_DEBUG;
1122
+
1123
+ #if EIGEN_FAST_MATH
1124
+ Packet2d swapped = (Packet2d)__builtin_msa_shf_w((Packet4i)a, EIGEN_MSA_SHF_I8(2, 3, 0, 1));
1125
+ Packet2d v = __builtin_msa_fmax_d(a, swapped);
1126
+ return v[0];
1127
+ #else
1128
+ double a0 = a[0], a1 = a[1];
1129
+ return ((numext::isnan)(a0) || a0 > a1) ? a0 : a1;
1130
+ #endif
1131
+ }
1132
+
1133
+ template <>
1134
+ EIGEN_STRONG_INLINE Packet2d psqrt(const Packet2d& a) {
1135
+ EIGEN_MSA_DEBUG;
1136
+
1137
+ return __builtin_msa_fsqrt_d(a);
1138
+ }
1139
+
1140
+ template <>
1141
+ EIGEN_STRONG_INLINE Packet2d prsqrt(const Packet2d& a) {
1142
+ EIGEN_MSA_DEBUG;
1143
+
1144
+ #if EIGEN_FAST_MATH
1145
+ return __builtin_msa_frsqrt_d(a);
1146
+ #else
1147
+ Packet2d ones = __builtin_msa_ffint_s_d(__builtin_msa_ldi_d(1));
1148
+ return pdiv(ones, psqrt(a));
1149
+ #endif
1150
+ }
1151
+
1152
+ inline std::ostream& operator<<(std::ostream& os, const PacketBlock<Packet2d, 2>& value) {
1153
+ os << "[ " << value.packet[0] << "," << std::endl << " " << value.packet[1] << " ]";
1154
+ return os;
1155
+ }
1156
+
1157
+ EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet2d, 2>& kernel) {
1158
+ EIGEN_MSA_DEBUG;
1159
+
1160
+ Packet2d trn1 = (Packet2d)__builtin_msa_ilvev_d((v2i64)kernel.packet[1], (v2i64)kernel.packet[0]);
1161
+ Packet2d trn2 = (Packet2d)__builtin_msa_ilvod_d((v2i64)kernel.packet[1], (v2i64)kernel.packet[0]);
1162
+ kernel.packet[0] = trn1;
1163
+ kernel.packet[1] = trn2;
1164
+ }
1165
+
1166
+ template <>
1167
+ EIGEN_STRONG_INLINE Packet2d pfloor<Packet2d>(const Packet2d& a) {
1168
+ Packet2d v = a;
1169
+ int32_t old_mode, new_mode;
1170
+ asm volatile(
1171
+ "cfcmsa %[old_mode], $1\n"
1172
+ "ori %[new_mode], %[old_mode], 3\n" // 3 = round towards -INFINITY.
1173
+ "ctcmsa $1, %[new_mode]\n"
1174
+ "frint.d %w[v], %w[v]\n"
1175
+ "ctcmsa $1, %[old_mode]\n"
1176
+ : // outputs
1177
+ [old_mode] "=r"(old_mode), [new_mode] "=r"(new_mode),
1178
+ [v] "+f"(v)
1179
+ : // inputs
1180
+ : // clobbers
1181
+ );
1182
+ return v;
1183
+ }
1184
+
1185
+ template <>
1186
+ EIGEN_STRONG_INLINE Packet2d pceil<Packet2d>(const Packet2d& a) {
1187
+ Packet2d v = a;
1188
+ int32_t old_mode, new_mode;
1189
+ asm volatile(
1190
+ "cfcmsa %[old_mode], $1\n"
1191
+ "ori %[new_mode], %[old_mode], 3\n"
1192
+ "xori %[new_mode], %[new_mode], 1\n" // 2 = round towards +INFINITY.
1193
+ "ctcmsa $1, %[new_mode]\n"
1194
+ "frint.d %w[v], %w[v]\n"
1195
+ "ctcmsa $1, %[old_mode]\n"
1196
+ : // outputs
1197
+ [old_mode] "=r"(old_mode), [new_mode] "=r"(new_mode),
1198
+ [v] "+f"(v)
1199
+ : // inputs
1200
+ : // clobbers
1201
+ );
1202
+ return v;
1203
+ }
1204
+
1205
+ template <>
1206
+ EIGEN_STRONG_INLINE Packet2d pround<Packet2d>(const Packet2d& a) {
1207
+ Packet2d v = a;
1208
+ int32_t old_mode, new_mode;
1209
+ asm volatile(
1210
+ "cfcmsa %[old_mode], $1\n"
1211
+ "ori %[new_mode], %[old_mode], 3\n"
1212
+ "xori %[new_mode], %[new_mode], 3\n" // 0 = round to nearest, ties to even.
1213
+ "ctcmsa $1, %[new_mode]\n"
1214
+ "frint.d %w[v], %w[v]\n"
1215
+ "ctcmsa $1, %[old_mode]\n"
1216
+ : // outputs
1217
+ [old_mode] "=r"(old_mode), [new_mode] "=r"(new_mode),
1218
+ [v] "+f"(v)
1219
+ : // inputs
1220
+ : // clobbers
1221
+ );
1222
+ return v;
1223
+ }
1224
+
1225
+ template <>
1226
+ EIGEN_STRONG_INLINE Packet2d pblend(const Selector<2>& ifPacket, const Packet2d& thenPacket,
1227
+ const Packet2d& elsePacket) {
1228
+ Packet2ul select = {ifPacket.select[0], ifPacket.select[1]};
1229
+ Packet2l mask = __builtin_msa_ceqi_d((Packet2l)select, 0);
1230
+ return (Packet2d)__builtin_msa_bsel_v((v16u8)mask, (v16u8)thenPacket, (v16u8)elsePacket);
1231
+ }
1232
+
1233
+ } // end namespace internal
1234
+
1235
+ } // end namespace Eigen
1236
+
1237
+ #endif // EIGEN_PACKET_MATH_MSA_H