@smake/eigen 1.1.0 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (431) hide show
  1. package/README.md +1 -1
  2. package/eigen/Eigen/AccelerateSupport +52 -0
  3. package/eigen/Eigen/Cholesky +18 -20
  4. package/eigen/Eigen/CholmodSupport +28 -28
  5. package/eigen/Eigen/Core +187 -120
  6. package/eigen/Eigen/Eigenvalues +16 -13
  7. package/eigen/Eigen/Geometry +18 -18
  8. package/eigen/Eigen/Householder +9 -7
  9. package/eigen/Eigen/IterativeLinearSolvers +8 -4
  10. package/eigen/Eigen/Jacobi +14 -13
  11. package/eigen/Eigen/KLUSupport +23 -21
  12. package/eigen/Eigen/LU +15 -16
  13. package/eigen/Eigen/MetisSupport +12 -12
  14. package/eigen/Eigen/OrderingMethods +54 -51
  15. package/eigen/Eigen/PaStiXSupport +23 -21
  16. package/eigen/Eigen/PardisoSupport +17 -14
  17. package/eigen/Eigen/QR +18 -20
  18. package/eigen/Eigen/QtAlignedMalloc +5 -12
  19. package/eigen/Eigen/SPQRSupport +21 -14
  20. package/eigen/Eigen/SVD +23 -17
  21. package/eigen/Eigen/Sparse +1 -2
  22. package/eigen/Eigen/SparseCholesky +18 -15
  23. package/eigen/Eigen/SparseCore +18 -17
  24. package/eigen/Eigen/SparseLU +9 -9
  25. package/eigen/Eigen/SparseQR +16 -14
  26. package/eigen/Eigen/StdDeque +5 -2
  27. package/eigen/Eigen/StdList +5 -2
  28. package/eigen/Eigen/StdVector +5 -2
  29. package/eigen/Eigen/SuperLUSupport +30 -24
  30. package/eigen/Eigen/ThreadPool +80 -0
  31. package/eigen/Eigen/UmfPackSupport +19 -17
  32. package/eigen/Eigen/Version +14 -0
  33. package/eigen/Eigen/src/AccelerateSupport/AccelerateSupport.h +423 -0
  34. package/eigen/Eigen/src/AccelerateSupport/InternalHeaderCheck.h +3 -0
  35. package/eigen/Eigen/src/Cholesky/InternalHeaderCheck.h +3 -0
  36. package/eigen/Eigen/src/Cholesky/LDLT.h +366 -405
  37. package/eigen/Eigen/src/Cholesky/LLT.h +323 -367
  38. package/eigen/Eigen/src/Cholesky/LLT_LAPACKE.h +81 -56
  39. package/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +585 -529
  40. package/eigen/Eigen/src/CholmodSupport/InternalHeaderCheck.h +3 -0
  41. package/eigen/Eigen/src/Core/ArithmeticSequence.h +143 -317
  42. package/eigen/Eigen/src/Core/Array.h +329 -370
  43. package/eigen/Eigen/src/Core/ArrayBase.h +190 -203
  44. package/eigen/Eigen/src/Core/ArrayWrapper.h +126 -170
  45. package/eigen/Eigen/src/Core/Assign.h +30 -40
  46. package/eigen/Eigen/src/Core/AssignEvaluator.h +651 -604
  47. package/eigen/Eigen/src/Core/Assign_MKL.h +125 -120
  48. package/eigen/Eigen/src/Core/BandMatrix.h +267 -282
  49. package/eigen/Eigen/src/Core/Block.h +371 -390
  50. package/eigen/Eigen/src/Core/CommaInitializer.h +85 -100
  51. package/eigen/Eigen/src/Core/ConditionEstimator.h +51 -53
  52. package/eigen/Eigen/src/Core/CoreEvaluators.h +1214 -937
  53. package/eigen/Eigen/src/Core/CoreIterators.h +72 -63
  54. package/eigen/Eigen/src/Core/CwiseBinaryOp.h +112 -129
  55. package/eigen/Eigen/src/Core/CwiseNullaryOp.h +676 -702
  56. package/eigen/Eigen/src/Core/CwiseTernaryOp.h +77 -103
  57. package/eigen/Eigen/src/Core/CwiseUnaryOp.h +55 -67
  58. package/eigen/Eigen/src/Core/CwiseUnaryView.h +127 -92
  59. package/eigen/Eigen/src/Core/DenseBase.h +630 -658
  60. package/eigen/Eigen/src/Core/DenseCoeffsBase.h +511 -628
  61. package/eigen/Eigen/src/Core/DenseStorage.h +511 -590
  62. package/eigen/Eigen/src/Core/DeviceWrapper.h +153 -0
  63. package/eigen/Eigen/src/Core/Diagonal.h +168 -207
  64. package/eigen/Eigen/src/Core/DiagonalMatrix.h +346 -317
  65. package/eigen/Eigen/src/Core/DiagonalProduct.h +12 -10
  66. package/eigen/Eigen/src/Core/Dot.h +167 -217
  67. package/eigen/Eigen/src/Core/EigenBase.h +74 -85
  68. package/eigen/Eigen/src/Core/Fill.h +138 -0
  69. package/eigen/Eigen/src/Core/FindCoeff.h +464 -0
  70. package/eigen/Eigen/src/Core/ForceAlignedAccess.h +90 -113
  71. package/eigen/Eigen/src/Core/Fuzzy.h +82 -105
  72. package/eigen/Eigen/src/Core/GeneralProduct.h +315 -261
  73. package/eigen/Eigen/src/Core/GenericPacketMath.h +1182 -520
  74. package/eigen/Eigen/src/Core/GlobalFunctions.h +193 -157
  75. package/eigen/Eigen/src/Core/IO.h +131 -156
  76. package/eigen/Eigen/src/Core/IndexedView.h +209 -125
  77. package/eigen/Eigen/src/Core/InnerProduct.h +260 -0
  78. package/eigen/Eigen/src/Core/InternalHeaderCheck.h +3 -0
  79. package/eigen/Eigen/src/Core/Inverse.h +50 -59
  80. package/eigen/Eigen/src/Core/Map.h +123 -141
  81. package/eigen/Eigen/src/Core/MapBase.h +255 -282
  82. package/eigen/Eigen/src/Core/MathFunctions.h +1247 -1201
  83. package/eigen/Eigen/src/Core/MathFunctionsImpl.h +162 -99
  84. package/eigen/Eigen/src/Core/Matrix.h +463 -494
  85. package/eigen/Eigen/src/Core/MatrixBase.h +468 -470
  86. package/eigen/Eigen/src/Core/NestByValue.h +58 -52
  87. package/eigen/Eigen/src/Core/NoAlias.h +79 -86
  88. package/eigen/Eigen/src/Core/NumTraits.h +206 -206
  89. package/eigen/Eigen/src/Core/PartialReduxEvaluator.h +163 -142
  90. package/eigen/Eigen/src/Core/PermutationMatrix.h +461 -511
  91. package/eigen/Eigen/src/Core/PlainObjectBase.h +858 -972
  92. package/eigen/Eigen/src/Core/Product.h +246 -130
  93. package/eigen/Eigen/src/Core/ProductEvaluators.h +779 -671
  94. package/eigen/Eigen/src/Core/Random.h +153 -164
  95. package/eigen/Eigen/src/Core/RandomImpl.h +262 -0
  96. package/eigen/Eigen/src/Core/RealView.h +250 -0
  97. package/eigen/Eigen/src/Core/Redux.h +334 -314
  98. package/eigen/Eigen/src/Core/Ref.h +259 -257
  99. package/eigen/Eigen/src/Core/Replicate.h +92 -104
  100. package/eigen/Eigen/src/Core/Reshaped.h +215 -271
  101. package/eigen/Eigen/src/Core/ReturnByValue.h +47 -55
  102. package/eigen/Eigen/src/Core/Reverse.h +133 -148
  103. package/eigen/Eigen/src/Core/Select.h +68 -140
  104. package/eigen/Eigen/src/Core/SelfAdjointView.h +254 -290
  105. package/eigen/Eigen/src/Core/SelfCwiseBinaryOp.h +23 -20
  106. package/eigen/Eigen/src/Core/SkewSymmetricMatrix3.h +382 -0
  107. package/eigen/Eigen/src/Core/Solve.h +88 -102
  108. package/eigen/Eigen/src/Core/SolveTriangular.h +126 -124
  109. package/eigen/Eigen/src/Core/SolverBase.h +132 -133
  110. package/eigen/Eigen/src/Core/StableNorm.h +113 -147
  111. package/eigen/Eigen/src/Core/StlIterators.h +404 -248
  112. package/eigen/Eigen/src/Core/Stride.h +90 -92
  113. package/eigen/Eigen/src/Core/Swap.h +70 -39
  114. package/eigen/Eigen/src/Core/Transpose.h +258 -295
  115. package/eigen/Eigen/src/Core/Transpositions.h +270 -333
  116. package/eigen/Eigen/src/Core/TriangularMatrix.h +642 -743
  117. package/eigen/Eigen/src/Core/VectorBlock.h +59 -72
  118. package/eigen/Eigen/src/Core/VectorwiseOp.h +653 -704
  119. package/eigen/Eigen/src/Core/Visitor.h +464 -308
  120. package/eigen/Eigen/src/Core/arch/AVX/Complex.h +380 -187
  121. package/eigen/Eigen/src/Core/arch/AVX/MathFunctions.h +65 -163
  122. package/eigen/Eigen/src/Core/arch/AVX/PacketMath.h +2145 -638
  123. package/eigen/Eigen/src/Core/arch/AVX/Reductions.h +353 -0
  124. package/eigen/Eigen/src/Core/arch/AVX/TypeCasting.h +253 -60
  125. package/eigen/Eigen/src/Core/arch/AVX512/Complex.h +278 -228
  126. package/eigen/Eigen/src/Core/arch/AVX512/GemmKernel.h +1245 -0
  127. package/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h +48 -269
  128. package/eigen/Eigen/src/Core/arch/AVX512/MathFunctionsFP16.h +75 -0
  129. package/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h +1597 -754
  130. package/eigen/Eigen/src/Core/arch/AVX512/PacketMathFP16.h +1413 -0
  131. package/eigen/Eigen/src/Core/arch/AVX512/Reductions.h +297 -0
  132. package/eigen/Eigen/src/Core/arch/AVX512/TrsmKernel.h +1167 -0
  133. package/eigen/Eigen/src/Core/arch/AVX512/TrsmUnrolls.inc +1219 -0
  134. package/eigen/Eigen/src/Core/arch/AVX512/TypeCasting.h +229 -41
  135. package/eigen/Eigen/src/Core/arch/AVX512/TypeCastingFP16.h +130 -0
  136. package/eigen/Eigen/src/Core/arch/AltiVec/Complex.h +420 -184
  137. package/eigen/Eigen/src/Core/arch/AltiVec/MathFunctions.h +40 -49
  138. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProduct.h +2962 -2213
  139. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h +196 -212
  140. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h +713 -441
  141. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMAbfloat16.h +742 -0
  142. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixVectorProduct.inc +2818 -0
  143. package/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h +2380 -1362
  144. package/eigen/Eigen/src/Core/arch/AltiVec/TypeCasting.h +153 -0
  145. package/eigen/Eigen/src/Core/arch/Default/BFloat16.h +390 -224
  146. package/eigen/Eigen/src/Core/arch/Default/ConjHelper.h +78 -67
  147. package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +1784 -799
  148. package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +167 -50
  149. package/eigen/Eigen/src/Core/arch/Default/Half.h +528 -379
  150. package/eigen/Eigen/src/Core/arch/Default/Settings.h +10 -12
  151. package/eigen/Eigen/src/Core/arch/GPU/Complex.h +244 -0
  152. package/eigen/Eigen/src/Core/arch/GPU/MathFunctions.h +41 -40
  153. package/eigen/Eigen/src/Core/arch/GPU/PacketMath.h +550 -523
  154. package/eigen/Eigen/src/Core/arch/GPU/Tuple.h +268 -0
  155. package/eigen/Eigen/src/Core/arch/GPU/TypeCasting.h +27 -30
  156. package/eigen/Eigen/src/Core/arch/HIP/hcc/math_constants.h +8 -8
  157. package/eigen/Eigen/src/Core/arch/HVX/PacketMath.h +1088 -0
  158. package/eigen/Eigen/src/Core/arch/LSX/Complex.h +520 -0
  159. package/eigen/Eigen/src/Core/arch/LSX/GeneralBlockPanelKernel.h +23 -0
  160. package/eigen/Eigen/src/Core/arch/LSX/MathFunctions.h +43 -0
  161. package/eigen/Eigen/src/Core/arch/LSX/PacketMath.h +2866 -0
  162. package/eigen/Eigen/src/Core/arch/LSX/TypeCasting.h +526 -0
  163. package/eigen/Eigen/src/Core/arch/MSA/Complex.h +54 -82
  164. package/eigen/Eigen/src/Core/arch/MSA/MathFunctions.h +84 -92
  165. package/eigen/Eigen/src/Core/arch/MSA/PacketMath.h +51 -47
  166. package/eigen/Eigen/src/Core/arch/NEON/Complex.h +454 -306
  167. package/eigen/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h +175 -115
  168. package/eigen/Eigen/src/Core/arch/NEON/MathFunctions.h +23 -30
  169. package/eigen/Eigen/src/Core/arch/NEON/PacketMath.h +4366 -2857
  170. package/eigen/Eigen/src/Core/arch/NEON/TypeCasting.h +616 -393
  171. package/eigen/Eigen/src/Core/arch/NEON/UnaryFunctors.h +57 -0
  172. package/eigen/Eigen/src/Core/arch/SSE/Complex.h +350 -198
  173. package/eigen/Eigen/src/Core/arch/SSE/MathFunctions.h +38 -149
  174. package/eigen/Eigen/src/Core/arch/SSE/PacketMath.h +1791 -912
  175. package/eigen/Eigen/src/Core/arch/SSE/Reductions.h +324 -0
  176. package/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h +128 -40
  177. package/eigen/Eigen/src/Core/arch/SVE/MathFunctions.h +10 -6
  178. package/eigen/Eigen/src/Core/arch/SVE/PacketMath.h +156 -234
  179. package/eigen/Eigen/src/Core/arch/SVE/TypeCasting.h +6 -3
  180. package/eigen/Eigen/src/Core/arch/SYCL/InteropHeaders.h +27 -32
  181. package/eigen/Eigen/src/Core/arch/SYCL/MathFunctions.h +119 -117
  182. package/eigen/Eigen/src/Core/arch/SYCL/PacketMath.h +325 -419
  183. package/eigen/Eigen/src/Core/arch/SYCL/TypeCasting.h +15 -17
  184. package/eigen/Eigen/src/Core/arch/ZVector/Complex.h +325 -181
  185. package/eigen/Eigen/src/Core/arch/ZVector/MathFunctions.h +94 -83
  186. package/eigen/Eigen/src/Core/arch/ZVector/PacketMath.h +811 -458
  187. package/eigen/Eigen/src/Core/functors/AssignmentFunctors.h +121 -124
  188. package/eigen/Eigen/src/Core/functors/BinaryFunctors.h +576 -370
  189. package/eigen/Eigen/src/Core/functors/NullaryFunctors.h +194 -109
  190. package/eigen/Eigen/src/Core/functors/StlFunctors.h +95 -112
  191. package/eigen/Eigen/src/Core/functors/TernaryFunctors.h +34 -7
  192. package/eigen/Eigen/src/Core/functors/UnaryFunctors.h +1038 -749
  193. package/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h +1883 -1375
  194. package/eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h +312 -370
  195. package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +189 -176
  196. package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +84 -81
  197. package/eigen/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h +154 -73
  198. package/eigen/Eigen/src/Core/products/GeneralMatrixVector.h +292 -337
  199. package/eigen/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h +80 -77
  200. package/eigen/Eigen/src/Core/products/Parallelizer.h +207 -105
  201. package/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +327 -388
  202. package/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h +206 -224
  203. package/eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h +138 -147
  204. package/eigen/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h +58 -61
  205. package/eigen/Eigen/src/Core/products/SelfadjointProduct.h +71 -71
  206. package/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h +48 -47
  207. package/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h +294 -369
  208. package/eigen/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h +246 -238
  209. package/eigen/Eigen/src/Core/products/TriangularMatrixVector.h +244 -247
  210. package/eigen/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h +212 -192
  211. package/eigen/Eigen/src/Core/products/TriangularSolverMatrix.h +328 -277
  212. package/eigen/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h +108 -109
  213. package/eigen/Eigen/src/Core/products/TriangularSolverVector.h +68 -94
  214. package/eigen/Eigen/src/Core/util/Assert.h +158 -0
  215. package/eigen/Eigen/src/Core/util/BlasUtil.h +342 -303
  216. package/eigen/Eigen/src/Core/util/ConfigureVectorization.h +348 -317
  217. package/eigen/Eigen/src/Core/util/Constants.h +297 -262
  218. package/eigen/Eigen/src/Core/util/DisableStupidWarnings.h +130 -90
  219. package/eigen/Eigen/src/Core/util/EmulateArray.h +270 -0
  220. package/eigen/Eigen/src/Core/util/ForwardDeclarations.h +449 -247
  221. package/eigen/Eigen/src/Core/util/GpuHipCudaDefines.inc +101 -0
  222. package/eigen/Eigen/src/Core/util/GpuHipCudaUndefines.inc +45 -0
  223. package/eigen/Eigen/src/Core/util/IndexedViewHelper.h +417 -116
  224. package/eigen/Eigen/src/Core/util/IntegralConstant.h +211 -204
  225. package/eigen/Eigen/src/Core/util/MKL_support.h +39 -37
  226. package/eigen/Eigen/src/Core/util/Macros.h +655 -773
  227. package/eigen/Eigen/src/Core/util/MaxSizeVector.h +139 -0
  228. package/eigen/Eigen/src/Core/util/Memory.h +970 -748
  229. package/eigen/Eigen/src/Core/util/Meta.h +581 -633
  230. package/eigen/Eigen/src/Core/util/MoreMeta.h +638 -0
  231. package/eigen/Eigen/src/Core/util/ReenableStupidWarnings.h +32 -19
  232. package/eigen/Eigen/src/Core/util/ReshapedHelper.h +17 -17
  233. package/eigen/Eigen/src/Core/util/Serializer.h +209 -0
  234. package/eigen/Eigen/src/Core/util/StaticAssert.h +50 -166
  235. package/eigen/Eigen/src/Core/util/SymbolicIndex.h +377 -225
  236. package/eigen/Eigen/src/Core/util/XprHelper.h +784 -547
  237. package/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +246 -277
  238. package/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +299 -319
  239. package/eigen/Eigen/src/Eigenvalues/ComplexSchur_LAPACKE.h +52 -48
  240. package/eigen/Eigen/src/Eigenvalues/EigenSolver.h +413 -456
  241. package/eigen/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h +309 -325
  242. package/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +157 -171
  243. package/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +292 -310
  244. package/eigen/Eigen/src/Eigenvalues/InternalHeaderCheck.h +3 -0
  245. package/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +89 -105
  246. package/eigen/Eigen/src/Eigenvalues/RealQZ.h +537 -607
  247. package/eigen/Eigen/src/Eigenvalues/RealSchur.h +342 -381
  248. package/eigen/Eigen/src/Eigenvalues/RealSchur_LAPACKE.h +41 -35
  249. package/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +541 -595
  250. package/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h +47 -44
  251. package/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +430 -462
  252. package/eigen/Eigen/src/Geometry/AlignedBox.h +226 -227
  253. package/eigen/Eigen/src/Geometry/AngleAxis.h +131 -133
  254. package/eigen/Eigen/src/Geometry/EulerAngles.h +163 -74
  255. package/eigen/Eigen/src/Geometry/Homogeneous.h +285 -333
  256. package/eigen/Eigen/src/Geometry/Hyperplane.h +151 -160
  257. package/eigen/Eigen/src/Geometry/InternalHeaderCheck.h +3 -0
  258. package/eigen/Eigen/src/Geometry/OrthoMethods.h +168 -146
  259. package/eigen/Eigen/src/Geometry/ParametrizedLine.h +127 -127
  260. package/eigen/Eigen/src/Geometry/Quaternion.h +566 -506
  261. package/eigen/Eigen/src/Geometry/Rotation2D.h +107 -105
  262. package/eigen/Eigen/src/Geometry/RotationBase.h +148 -145
  263. package/eigen/Eigen/src/Geometry/Scaling.h +113 -106
  264. package/eigen/Eigen/src/Geometry/Transform.h +858 -936
  265. package/eigen/Eigen/src/Geometry/Translation.h +94 -92
  266. package/eigen/Eigen/src/Geometry/Umeyama.h +79 -84
  267. package/eigen/Eigen/src/Geometry/arch/Geometry_SIMD.h +90 -104
  268. package/eigen/Eigen/src/Householder/BlockHouseholder.h +51 -46
  269. package/eigen/Eigen/src/Householder/Householder.h +102 -124
  270. package/eigen/Eigen/src/Householder/HouseholderSequence.h +412 -453
  271. package/eigen/Eigen/src/Householder/InternalHeaderCheck.h +3 -0
  272. package/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +149 -162
  273. package/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +124 -119
  274. package/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +92 -104
  275. package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +251 -243
  276. package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +224 -228
  277. package/eigen/Eigen/src/IterativeLinearSolvers/InternalHeaderCheck.h +3 -0
  278. package/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +178 -227
  279. package/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +79 -84
  280. package/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +54 -60
  281. package/eigen/Eigen/src/Jacobi/InternalHeaderCheck.h +3 -0
  282. package/eigen/Eigen/src/Jacobi/Jacobi.h +252 -308
  283. package/eigen/Eigen/src/KLUSupport/InternalHeaderCheck.h +3 -0
  284. package/eigen/Eigen/src/KLUSupport/KLUSupport.h +208 -227
  285. package/eigen/Eigen/src/LU/Determinant.h +50 -69
  286. package/eigen/Eigen/src/LU/FullPivLU.h +545 -596
  287. package/eigen/Eigen/src/LU/InternalHeaderCheck.h +3 -0
  288. package/eigen/Eigen/src/LU/InverseImpl.h +206 -285
  289. package/eigen/Eigen/src/LU/PartialPivLU.h +390 -428
  290. package/eigen/Eigen/src/LU/PartialPivLU_LAPACKE.h +54 -40
  291. package/eigen/Eigen/src/LU/arch/InverseSize4.h +72 -70
  292. package/eigen/Eigen/src/MetisSupport/InternalHeaderCheck.h +3 -0
  293. package/eigen/Eigen/src/MetisSupport/MetisSupport.h +81 -93
  294. package/eigen/Eigen/src/OrderingMethods/Amd.h +243 -265
  295. package/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +831 -1004
  296. package/eigen/Eigen/src/OrderingMethods/InternalHeaderCheck.h +3 -0
  297. package/eigen/Eigen/src/OrderingMethods/Ordering.h +112 -119
  298. package/eigen/Eigen/src/PaStiXSupport/InternalHeaderCheck.h +3 -0
  299. package/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +524 -570
  300. package/eigen/Eigen/src/PardisoSupport/InternalHeaderCheck.h +3 -0
  301. package/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +385 -430
  302. package/eigen/Eigen/src/QR/ColPivHouseholderQR.h +479 -479
  303. package/eigen/Eigen/src/QR/ColPivHouseholderQR_LAPACKE.h +120 -56
  304. package/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +166 -153
  305. package/eigen/Eigen/src/QR/FullPivHouseholderQR.h +495 -475
  306. package/eigen/Eigen/src/QR/HouseholderQR.h +394 -285
  307. package/eigen/Eigen/src/QR/HouseholderQR_LAPACKE.h +32 -23
  308. package/eigen/Eigen/src/QR/InternalHeaderCheck.h +3 -0
  309. package/eigen/Eigen/src/SPQRSupport/InternalHeaderCheck.h +3 -0
  310. package/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +244 -264
  311. package/eigen/Eigen/src/SVD/BDCSVD.h +817 -713
  312. package/eigen/Eigen/src/SVD/BDCSVD_LAPACKE.h +174 -0
  313. package/eigen/Eigen/src/SVD/InternalHeaderCheck.h +3 -0
  314. package/eigen/Eigen/src/SVD/JacobiSVD.h +577 -543
  315. package/eigen/Eigen/src/SVD/JacobiSVD_LAPACKE.h +85 -49
  316. package/eigen/Eigen/src/SVD/SVDBase.h +242 -182
  317. package/eigen/Eigen/src/SVD/UpperBidiagonalization.h +200 -235
  318. package/eigen/Eigen/src/SparseCholesky/InternalHeaderCheck.h +3 -0
  319. package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +765 -594
  320. package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +308 -94
  321. package/eigen/Eigen/src/SparseCore/AmbiVector.h +202 -251
  322. package/eigen/Eigen/src/SparseCore/CompressedStorage.h +184 -252
  323. package/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +134 -178
  324. package/eigen/Eigen/src/SparseCore/InternalHeaderCheck.h +3 -0
  325. package/eigen/Eigen/src/SparseCore/SparseAssign.h +149 -140
  326. package/eigen/Eigen/src/SparseCore/SparseBlock.h +403 -440
  327. package/eigen/Eigen/src/SparseCore/SparseColEtree.h +100 -112
  328. package/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +525 -303
  329. package/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +555 -339
  330. package/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +100 -108
  331. package/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +169 -197
  332. package/eigen/Eigen/src/SparseCore/SparseDiagonalProduct.h +71 -71
  333. package/eigen/Eigen/src/SparseCore/SparseDot.h +49 -47
  334. package/eigen/Eigen/src/SparseCore/SparseFuzzy.h +13 -11
  335. package/eigen/Eigen/src/SparseCore/SparseMap.h +243 -253
  336. package/eigen/Eigen/src/SparseCore/SparseMatrix.h +1603 -1245
  337. package/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +403 -350
  338. package/eigen/Eigen/src/SparseCore/SparsePermutation.h +186 -115
  339. package/eigen/Eigen/src/SparseCore/SparseProduct.h +94 -97
  340. package/eigen/Eigen/src/SparseCore/SparseRedux.h +22 -24
  341. package/eigen/Eigen/src/SparseCore/SparseRef.h +268 -295
  342. package/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +370 -416
  343. package/eigen/Eigen/src/SparseCore/SparseSolverBase.h +78 -87
  344. package/eigen/Eigen/src/SparseCore/SparseSparseProductWithPruning.h +81 -95
  345. package/eigen/Eigen/src/SparseCore/SparseTranspose.h +62 -71
  346. package/eigen/Eigen/src/SparseCore/SparseTriangularView.h +132 -144
  347. package/eigen/Eigen/src/SparseCore/SparseUtil.h +138 -115
  348. package/eigen/Eigen/src/SparseCore/SparseVector.h +426 -372
  349. package/eigen/Eigen/src/SparseCore/SparseView.h +164 -193
  350. package/eigen/Eigen/src/SparseCore/TriangularSolver.h +129 -170
  351. package/eigen/Eigen/src/SparseLU/InternalHeaderCheck.h +3 -0
  352. package/eigen/Eigen/src/SparseLU/SparseLU.h +756 -710
  353. package/eigen/Eigen/src/SparseLU/SparseLUImpl.h +61 -48
  354. package/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +102 -118
  355. package/eigen/Eigen/src/SparseLU/SparseLU_Structs.h +38 -35
  356. package/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +245 -301
  357. package/eigen/Eigen/src/SparseLU/SparseLU_Utils.h +44 -49
  358. package/eigen/Eigen/src/SparseLU/SparseLU_column_bmod.h +104 -108
  359. package/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +89 -100
  360. package/eigen/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h +57 -58
  361. package/eigen/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h +43 -55
  362. package/eigen/Eigen/src/SparseLU/SparseLU_kernel_bmod.h +74 -71
  363. package/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +124 -132
  364. package/eigen/Eigen/src/SparseLU/SparseLU_panel_dfs.h +136 -159
  365. package/eigen/Eigen/src/SparseLU/SparseLU_pivotL.h +51 -52
  366. package/eigen/Eigen/src/SparseLU/SparseLU_pruneL.h +67 -73
  367. package/eigen/Eigen/src/SparseLU/SparseLU_relax_snode.h +24 -26
  368. package/eigen/Eigen/src/SparseQR/InternalHeaderCheck.h +3 -0
  369. package/eigen/Eigen/src/SparseQR/SparseQR.h +450 -502
  370. package/eigen/Eigen/src/StlSupport/StdDeque.h +28 -93
  371. package/eigen/Eigen/src/StlSupport/StdList.h +28 -84
  372. package/eigen/Eigen/src/StlSupport/StdVector.h +28 -108
  373. package/eigen/Eigen/src/StlSupport/details.h +48 -50
  374. package/eigen/Eigen/src/SuperLUSupport/InternalHeaderCheck.h +3 -0
  375. package/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +634 -730
  376. package/eigen/Eigen/src/ThreadPool/Barrier.h +70 -0
  377. package/eigen/Eigen/src/ThreadPool/CoreThreadPoolDevice.h +336 -0
  378. package/eigen/Eigen/src/ThreadPool/EventCount.h +241 -0
  379. package/eigen/Eigen/src/ThreadPool/ForkJoin.h +140 -0
  380. package/eigen/Eigen/src/ThreadPool/InternalHeaderCheck.h +4 -0
  381. package/eigen/Eigen/src/ThreadPool/NonBlockingThreadPool.h +587 -0
  382. package/eigen/Eigen/src/ThreadPool/RunQueue.h +230 -0
  383. package/eigen/Eigen/src/ThreadPool/ThreadCancel.h +21 -0
  384. package/eigen/Eigen/src/ThreadPool/ThreadEnvironment.h +43 -0
  385. package/eigen/Eigen/src/ThreadPool/ThreadLocal.h +289 -0
  386. package/eigen/Eigen/src/ThreadPool/ThreadPoolInterface.h +50 -0
  387. package/eigen/Eigen/src/ThreadPool/ThreadYield.h +16 -0
  388. package/eigen/Eigen/src/UmfPackSupport/InternalHeaderCheck.h +3 -0
  389. package/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +428 -464
  390. package/eigen/Eigen/src/misc/Image.h +41 -43
  391. package/eigen/Eigen/src/misc/InternalHeaderCheck.h +3 -0
  392. package/eigen/Eigen/src/misc/Kernel.h +39 -41
  393. package/eigen/Eigen/src/misc/RealSvd2x2.h +19 -21
  394. package/eigen/Eigen/src/misc/blas.h +83 -426
  395. package/eigen/Eigen/src/misc/lapacke.h +9972 -16179
  396. package/eigen/Eigen/src/misc/lapacke_helpers.h +163 -0
  397. package/eigen/Eigen/src/misc/lapacke_mangling.h +4 -5
  398. package/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.inc +344 -0
  399. package/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.inc +544 -0
  400. package/eigen/Eigen/src/plugins/{BlockMethods.h → BlockMethods.inc} +434 -506
  401. package/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.inc +116 -0
  402. package/eigen/Eigen/src/plugins/{CommonCwiseUnaryOps.h → CommonCwiseUnaryOps.inc} +58 -68
  403. package/eigen/Eigen/src/plugins/IndexedViewMethods.inc +192 -0
  404. package/eigen/Eigen/src/plugins/InternalHeaderCheck.inc +3 -0
  405. package/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.inc +331 -0
  406. package/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.inc +118 -0
  407. package/eigen/Eigen/src/plugins/ReshapedMethods.inc +133 -0
  408. package/package.json +1 -1
  409. package/eigen/COPYING.APACHE +0 -203
  410. package/eigen/COPYING.BSD +0 -26
  411. package/eigen/COPYING.GPL +0 -674
  412. package/eigen/COPYING.LGPL +0 -502
  413. package/eigen/COPYING.MINPACK +0 -51
  414. package/eigen/COPYING.MPL2 +0 -373
  415. package/eigen/COPYING.README +0 -18
  416. package/eigen/Eigen/src/Core/BooleanRedux.h +0 -162
  417. package/eigen/Eigen/src/Core/arch/CUDA/Complex.h +0 -258
  418. package/eigen/Eigen/src/Core/arch/Default/TypeCasting.h +0 -120
  419. package/eigen/Eigen/src/Core/arch/SYCL/SyclMemoryModel.h +0 -694
  420. package/eigen/Eigen/src/Core/util/NonMPL2.h +0 -3
  421. package/eigen/Eigen/src/SparseCore/MappedSparseMatrix.h +0 -67
  422. package/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +0 -280
  423. package/eigen/Eigen/src/misc/lapack.h +0 -152
  424. package/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +0 -358
  425. package/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +0 -696
  426. package/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.h +0 -115
  427. package/eigen/Eigen/src/plugins/IndexedViewMethods.h +0 -262
  428. package/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +0 -152
  429. package/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +0 -95
  430. package/eigen/Eigen/src/plugins/ReshapedMethods.h +0 -149
  431. package/eigen/README.md +0 -5
@@ -10,6 +10,9 @@
10
10
  #ifndef EIGEN_PACKET_MATH_ZVECTOR_H
11
11
  #define EIGEN_PACKET_MATH_ZVECTOR_H
12
12
 
13
+ // IWYU pragma: private
14
+ #include "../../InternalHeaderCheck.h"
15
+
13
16
  namespace Eigen {
14
17
 
15
18
  namespace internal {
@@ -23,147 +26,147 @@ namespace internal {
23
26
  #endif
24
27
 
25
28
  #ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS
26
- #define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 32
29
+ #define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 32
27
30
  #endif
28
31
 
29
- typedef __vector int Packet4i;
30
- typedef __vector unsigned int Packet4ui;
31
- typedef __vector __bool int Packet4bi;
32
- typedef __vector short int Packet8i;
33
- typedef __vector unsigned char Packet16uc;
34
- typedef __vector double Packet2d;
35
- typedef __vector unsigned long long Packet2ul;
36
- typedef __vector long long Packet2l;
32
+ typedef __vector int Packet4i;
33
+ typedef __vector unsigned int Packet4ui;
34
+ typedef __vector __bool int Packet4bi;
35
+ typedef __vector short int Packet8i;
36
+ typedef __vector unsigned char Packet16uc;
37
+ typedef __vector double Packet2d;
38
+ typedef __vector unsigned long long Packet2ul;
39
+ typedef __vector long long Packet2l;
37
40
 
38
41
  // Z14 has builtin support for float vectors
39
42
  #if !defined(__ARCH__) || (defined(__ARCH__) && __ARCH__ >= 12)
40
- typedef __vector float Packet4f;
43
+ typedef __vector float Packet4f;
41
44
  #else
42
45
  typedef struct {
43
- Packet2d v4f[2];
46
+ Packet2d v4f[2];
44
47
  } Packet4f;
45
48
  #endif
46
49
 
47
50
  typedef union {
48
- numext::int32_t i[4];
51
+ numext::int32_t i[4];
49
52
  numext::uint32_t ui[4];
50
- numext::int64_t l[2];
53
+ numext::int64_t l[2];
51
54
  numext::uint64_t ul[2];
52
- double d[2];
53
- float f[4];
54
- Packet4i v4i;
55
+ double d[2];
56
+ float f[4];
57
+ Packet4i v4i;
55
58
  Packet4ui v4ui;
56
- Packet2l v2l;
59
+ Packet2l v2l;
57
60
  Packet2ul v2ul;
58
- Packet2d v2d;
61
+ Packet2d v2d;
59
62
  #if !defined(__ARCH__) || (defined(__ARCH__) && __ARCH__ >= 12)
60
- Packet4f v4f;
63
+ Packet4f v4f;
61
64
  #endif
62
65
  } Packet;
63
66
 
64
67
  // We don't want to write the same code all the time, but we need to reuse the constants
65
68
  // and it doesn't really work to declare them global, so we define macros instead
66
69
 
67
- #define _EIGEN_DECLARE_CONST_FAST_Packet4i(NAME,X) \
68
- Packet4i p4i_##NAME = reinterpret_cast<Packet4i>(vec_splat_s32(X))
70
+ #define EIGEN_DECLARE_CONST_FAST_Packet4i(NAME, X) Packet4i p4i_##NAME = reinterpret_cast<Packet4i>(vec_splat_s32(X))
69
71
 
70
- #define _EIGEN_DECLARE_CONST_FAST_Packet2d(NAME,X) \
71
- Packet2d p2d_##NAME = reinterpret_cast<Packet2d>(vec_splat_s64(X))
72
+ #define EIGEN_DECLARE_CONST_FAST_Packet2d(NAME, X) Packet2d p2d_##NAME = reinterpret_cast<Packet2d>(vec_splat_s64(X))
72
73
 
73
- #define _EIGEN_DECLARE_CONST_FAST_Packet2l(NAME,X) \
74
- Packet2l p2l_##NAME = reinterpret_cast<Packet2l>(vec_splat_s64(X))
74
+ #define EIGEN_DECLARE_CONST_FAST_Packet2l(NAME, X) Packet2l p2l_##NAME = reinterpret_cast<Packet2l>(vec_splat_s64(X))
75
75
 
76
- #define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \
77
- Packet4i p4i_##NAME = pset1<Packet4i>(X)
76
+ #define EIGEN_DECLARE_CONST_Packet4i(NAME, X) Packet4i p4i_##NAME = pset1<Packet4i>(X)
78
77
 
79
- #define _EIGEN_DECLARE_CONST_Packet2d(NAME,X) \
80
- Packet2d p2d_##NAME = pset1<Packet2d>(X)
78
+ #define EIGEN_DECLARE_CONST_Packet2d(NAME, X) Packet2d p2d_##NAME = pset1<Packet2d>(X)
81
79
 
82
- #define _EIGEN_DECLARE_CONST_Packet2l(NAME,X) \
83
- Packet2l p2l_##NAME = pset1<Packet2l>(X)
80
+ #define EIGEN_DECLARE_CONST_Packet2l(NAME, X) Packet2l p2l_##NAME = pset1<Packet2l>(X)
84
81
 
85
82
  // These constants are endian-agnostic
86
- static _EIGEN_DECLARE_CONST_FAST_Packet4i(ZERO, 0); //{ 0, 0, 0, 0,}
87
- static _EIGEN_DECLARE_CONST_FAST_Packet4i(ONE, 1); //{ 1, 1, 1, 1}
83
+ static EIGEN_DECLARE_CONST_FAST_Packet4i(ZERO, 0); //{ 0, 0, 0, 0,}
84
+ static EIGEN_DECLARE_CONST_FAST_Packet4i(ONE, 1); //{ 1, 1, 1, 1}
88
85
 
89
- static _EIGEN_DECLARE_CONST_FAST_Packet2d(ZERO, 0);
90
- static _EIGEN_DECLARE_CONST_FAST_Packet2l(ZERO, 0);
91
- static _EIGEN_DECLARE_CONST_FAST_Packet2l(ONE, 1);
86
+ static EIGEN_DECLARE_CONST_FAST_Packet2d(ZERO, 0);
87
+ static EIGEN_DECLARE_CONST_FAST_Packet2l(ZERO, 0);
88
+ static EIGEN_DECLARE_CONST_FAST_Packet2l(ONE, 1);
92
89
 
93
- static Packet2d p2d_ONE = { 1.0, 1.0 };
94
- static Packet2d p2d_ZERO_ = { numext::bit_cast<double>0x8000000000000000ull),
95
- numext::bit_cast<double>0x8000000000000000ull) };
90
+ static Packet2d p2d_ONE = {1.0, 1.0};
91
+ static Packet2d p2d_ZERO_ = {numext::bit_cast<double>(0x8000000000000000ull),
92
+ numext::bit_cast<double>(0x8000000000000000ull)};
96
93
 
97
94
  #if !defined(__ARCH__) || (defined(__ARCH__) && __ARCH__ >= 12)
98
- #define _EIGEN_DECLARE_CONST_FAST_Packet4f(NAME,X) \
99
- Packet4f p4f_##NAME = reinterpret_cast<Packet4f>(vec_splat_s32(X))
95
+ #define EIGEN_DECLARE_CONST_FAST_Packet4f(NAME, X) Packet4f p4f_##NAME = reinterpret_cast<Packet4f>(vec_splat_s32(X))
100
96
 
101
- #define _EIGEN_DECLARE_CONST_Packet4f(NAME,X) \
102
- Packet4f p4f_##NAME = pset1<Packet4f>(X)
97
+ #define EIGEN_DECLARE_CONST_Packet4f(NAME, X) Packet4f p4f_##NAME = pset1<Packet4f>(X)
103
98
 
104
- #define _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(NAME,X) \
99
+ #define EIGEN_DECLARE_CONST_Packet4f_FROM_INT(NAME, X) \
105
100
  const Packet4f p4f_##NAME = reinterpret_cast<Packet4f>(pset1<Packet4i>(X))
106
101
 
107
- static _EIGEN_DECLARE_CONST_FAST_Packet4f(ZERO, 0); //{ 0.0, 0.0, 0.0, 0.0}
108
- static _EIGEN_DECLARE_CONST_FAST_Packet4i(MINUS1,-1); //{ -1, -1, -1, -1}
109
- static Packet4f p4f_MZERO = { 0x80000000, 0x80000000, 0x80000000, 0x80000000};
102
+ static EIGEN_DECLARE_CONST_FAST_Packet4f(ZERO, 0); //{ 0.0, 0.0, 0.0, 0.0}
103
+ static EIGEN_DECLARE_CONST_FAST_Packet4i(MINUS1, -1); //{ -1, -1, -1, -1}
104
+ static Packet4f p4f_MZERO = {0x80000000, 0x80000000, 0x80000000, 0x80000000};
110
105
  #endif
111
106
 
112
- static Packet4i p4i_COUNTDOWN = { 0, 1, 2, 3 };
113
- static Packet4f p4f_COUNTDOWN = { 0.0, 1.0, 2.0, 3.0 };
114
- static Packet2d p2d_COUNTDOWN = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet16uc>(p2d_ZERO), reinterpret_cast<Packet16uc>(p2d_ONE), 8));
107
+ static Packet4i p4i_COUNTDOWN = {0, 1, 2, 3};
108
+ static Packet4f p4f_COUNTDOWN = {0.0, 1.0, 2.0, 3.0};
109
+ static Packet2d p2d_COUNTDOWN = reinterpret_cast<Packet2d>(
110
+ vec_sld(reinterpret_cast<Packet16uc>(p2d_ZERO), reinterpret_cast<Packet16uc>(p2d_ONE), 8));
115
111
 
116
- static Packet16uc p16uc_PSET64_HI = { 0,1,2,3, 4,5,6,7, 0,1,2,3, 4,5,6,7 };
117
- static Packet16uc p16uc_DUPLICATE32_HI = { 0,1,2,3, 0,1,2,3, 4,5,6,7, 4,5,6,7 };
112
+ static Packet16uc p16uc_PSET64_HI = {0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7};
113
+ static Packet16uc p16uc_DUPLICATE32_HI = {0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7, 4, 5, 6, 7};
118
114
 
119
115
  // Mask alignment
120
- #define _EIGEN_MASK_ALIGNMENT 0xfffffffffffffff0
116
+ #define EIGEN_MASK_ALIGNMENT 0xfffffffffffffff0
121
117
 
122
- #define _EIGEN_ALIGNED_PTR(x) ((std::ptrdiff_t)(x) & _EIGEN_MASK_ALIGNMENT)
118
+ #define EIGEN_ALIGNED_PTR(x) ((std::ptrdiff_t)(x) & EIGEN_MASK_ALIGNMENT)
123
119
 
124
120
  // Handle endianness properly while loading constants
125
121
  // Define global static constants:
126
122
 
127
- static Packet16uc p16uc_FORWARD = { 0,1,2,3, 4,5,6,7, 8,9,10,11, 12,13,14,15 };
128
- static Packet16uc p16uc_REVERSE32 = { 12,13,14,15, 8,9,10,11, 4,5,6,7, 0,1,2,3 };
129
- static Packet16uc p16uc_REVERSE64 = { 8,9,10,11, 12,13,14,15, 0,1,2,3, 4,5,6,7 };
130
-
131
- static Packet16uc p16uc_PSET32_WODD = vec_sld((Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 0), (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 2), 8);//{ 0,1,2,3, 0,1,2,3, 8,9,10,11, 8,9,10,11 };
132
- static Packet16uc p16uc_PSET32_WEVEN = vec_sld(p16uc_DUPLICATE32_HI, (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 3), 8);//{ 4,5,6,7, 4,5,6,7, 12,13,14,15, 12,13,14,15 };
133
- /*static Packet16uc p16uc_HALF64_0_16 = vec_sld((Packet16uc)p4i_ZERO, vec_splat((Packet16uc) vec_abs(p4i_MINUS16), 3), 8); //{ 0,0,0,0, 0,0,0,0, 16,16,16,16, 16,16,16,16};
134
-
135
- static Packet16uc p16uc_PSET64_HI = (Packet16uc) vec_mergeh((Packet4ui)p16uc_PSET32_WODD, (Packet4ui)p16uc_PSET32_WEVEN); //{ 0,1,2,3, 4,5,6,7, 0,1,2,3, 4,5,6,7 };*/
136
- static Packet16uc p16uc_PSET64_LO = (Packet16uc) vec_mergel((Packet4ui)p16uc_PSET32_WODD, (Packet4ui)p16uc_PSET32_WEVEN); //{ 8,9,10,11, 12,13,14,15, 8,9,10,11, 12,13,14,15 };
137
- /*static Packet16uc p16uc_TRANSPOSE64_HI = vec_add(p16uc_PSET64_HI, p16uc_HALF64_0_16); //{ 0,1,2,3, 4,5,6,7, 16,17,18,19, 20,21,22,23};
138
- static Packet16uc p16uc_TRANSPOSE64_LO = vec_add(p16uc_PSET64_LO, p16uc_HALF64_0_16); //{ 8,9,10,11, 12,13,14,15, 24,25,26,27, 28,29,30,31};*/
139
- static Packet16uc p16uc_TRANSPOSE64_HI = { 0,1,2,3, 4,5,6,7, 16,17,18,19, 20,21,22,23};
140
- static Packet16uc p16uc_TRANSPOSE64_LO = { 8,9,10,11, 12,13,14,15, 24,25,26,27, 28,29,30,31};
141
-
142
- static Packet16uc p16uc_COMPLEX32_REV = vec_sld(p16uc_REVERSE32, p16uc_REVERSE32, 8); //{ 4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11 };
143
-
144
- static Packet16uc p16uc_COMPLEX32_REV2 = vec_sld(p16uc_FORWARD, p16uc_FORWARD, 8); //{ 8,9,10,11, 12,13,14,15, 0,1,2,3, 4,5,6,7 };
145
-
123
+ static Packet16uc p16uc_FORWARD = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
124
+ static Packet16uc p16uc_REVERSE32 = {12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3};
125
+ static Packet16uc p16uc_REVERSE64 = {8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7};
126
+
127
+ static Packet16uc p16uc_PSET32_WODD =
128
+ vec_sld((Packet16uc)vec_splat((Packet4ui)p16uc_FORWARD, 0), (Packet16uc)vec_splat((Packet4ui)p16uc_FORWARD, 2),
129
+ 8); //{ 0,1,2,3, 0,1,2,3, 8,9,10,11, 8,9,10,11 };
130
+ static Packet16uc p16uc_PSET32_WEVEN = vec_sld(p16uc_DUPLICATE32_HI, (Packet16uc)vec_splat((Packet4ui)p16uc_FORWARD, 3),
131
+ 8); //{ 4,5,6,7, 4,5,6,7, 12,13,14,15, 12,13,14,15 };
132
+ /*static Packet16uc p16uc_HALF64_0_16 = vec_sld((Packet16uc)p4i_ZERO, vec_splat((Packet16uc) vec_abs(p4i_MINUS16), 3),
133
+ 8); //{ 0,0,0,0, 0,0,0,0, 16,16,16,16, 16,16,16,16};
134
+
135
+ static Packet16uc p16uc_PSET64_HI = (Packet16uc) vec_mergeh((Packet4ui)p16uc_PSET32_WODD,
136
+ (Packet4ui)p16uc_PSET32_WEVEN); //{ 0,1,2,3, 4,5,6,7, 0,1,2,3, 4,5,6,7 };*/
137
+ static Packet16uc p16uc_PSET64_LO = (Packet16uc)vec_mergel(
138
+ (Packet4ui)p16uc_PSET32_WODD, (Packet4ui)p16uc_PSET32_WEVEN); //{ 8,9,10,11, 12,13,14,15, 8,9,10,11, 12,13,14,15 };
139
+ /*static Packet16uc p16uc_TRANSPOSE64_HI = vec_add(p16uc_PSET64_HI, p16uc_HALF64_0_16); //{ 0,1,2,3, 4,5,6,7,
140
+ 16,17,18,19, 20,21,22,23}; static Packet16uc p16uc_TRANSPOSE64_LO = vec_add(p16uc_PSET64_LO, p16uc_HALF64_0_16); //{
141
+ 8,9,10,11, 12,13,14,15, 24,25,26,27, 28,29,30,31};*/
142
+ static Packet16uc p16uc_TRANSPOSE64_HI = {0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23};
143
+ static Packet16uc p16uc_TRANSPOSE64_LO = {8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31};
144
+
145
+ static Packet16uc p16uc_COMPLEX32_REV =
146
+ vec_sld(p16uc_REVERSE32, p16uc_REVERSE32, 8); //{ 4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11 };
147
+
148
+ static Packet16uc p16uc_COMPLEX32_REV2 =
149
+ vec_sld(p16uc_FORWARD, p16uc_FORWARD, 8); //{ 8,9,10,11, 12,13,14,15, 0,1,2,3, 4,5,6,7 };
146
150
 
147
151
  #if EIGEN_HAS_BUILTIN(__builtin_prefetch) || EIGEN_COMP_GNUC
148
- #define EIGEN_ZVECTOR_PREFETCH(ADDR) __builtin_prefetch(ADDR);
152
+ #define EIGEN_ZVECTOR_PREFETCH(ADDR) __builtin_prefetch(ADDR);
149
153
  #else
150
- #define EIGEN_ZVECTOR_PREFETCH(ADDR) asm( " pfd [%[addr]]\n" :: [addr] "r" (ADDR) : "cc" );
154
+ #define EIGEN_ZVECTOR_PREFETCH(ADDR) asm(" pfd [%[addr]]\n" ::[addr] "r"(ADDR) : "cc");
151
155
  #endif
152
156
 
153
- template<> struct packet_traits<int> : default_packet_traits
154
- {
157
+ template <>
158
+ struct packet_traits<int> : default_packet_traits {
155
159
  typedef Packet4i type;
156
160
  typedef Packet4i half;
157
161
  enum {
158
162
  Vectorizable = 1,
159
163
  AlignedOnScalar = 1,
160
164
  size = 4,
161
- HasHalfPacket = 0,
162
165
 
163
- HasAdd = 1,
164
- HasSub = 1,
165
- HasMul = 1,
166
- HasDiv = 1,
166
+ HasAdd = 1,
167
+ HasSub = 1,
168
+ HasMul = 1,
169
+ HasDiv = 1,
167
170
  HasBlend = 1
168
171
  };
169
172
  };
@@ -176,8 +179,8 @@ struct packet_traits<float> : default_packet_traits {
176
179
  Vectorizable = 1,
177
180
  AlignedOnScalar = 1,
178
181
  size = 4,
179
- HasHalfPacket = 0,
180
182
 
183
+ HasCmp = 1,
181
184
  HasAdd = 1,
182
185
  HasSub = 1,
183
186
  HasMul = 1,
@@ -193,86 +196,109 @@ struct packet_traits<float> : default_packet_traits {
193
196
  HasRsqrt = 1,
194
197
  HasTanh = 1,
195
198
  HasErf = 1,
196
- HasRound = 1,
197
- HasFloor = 1,
198
- HasCeil = 1,
199
199
  HasNegate = 1,
200
200
  HasBlend = 1
201
201
  };
202
202
  };
203
203
 
204
- template<> struct packet_traits<double> : default_packet_traits
205
- {
204
+ template <>
205
+ struct packet_traits<double> : default_packet_traits {
206
206
  typedef Packet2d type;
207
207
  typedef Packet2d half;
208
208
  enum {
209
209
  Vectorizable = 1,
210
210
  AlignedOnScalar = 1,
211
- size=2,
212
- HasHalfPacket = 1,
213
-
214
- HasAdd = 1,
215
- HasSub = 1,
216
- HasMul = 1,
217
- HasDiv = 1,
218
- HasMin = 1,
219
- HasMax = 1,
220
- HasAbs = 1,
221
- HasSin = 0,
222
- HasCos = 0,
223
- HasLog = 0,
224
- HasExp = 1,
211
+ size = 2,
212
+
213
+ HasAdd = 1,
214
+ HasSub = 1,
215
+ HasMul = 1,
216
+ HasDiv = 1,
217
+ HasMin = 1,
218
+ HasMax = 1,
219
+ HasAbs = 1,
220
+ HasSin = 0,
221
+ HasCos = 0,
222
+ HasLog = 0,
223
+ HasExp = 1,
225
224
  HasSqrt = 1,
226
225
  HasRsqrt = 1,
227
- HasRound = 1,
228
- HasFloor = 1,
229
- HasCeil = 1,
230
226
  HasNegate = 1,
231
227
  HasBlend = 1
232
228
  };
233
229
  };
234
230
 
235
- template<> struct unpacket_traits<Packet4i> { typedef int type; enum {size=4, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false}; typedef Packet4i half; };
236
- template<> struct unpacket_traits<Packet4f> { typedef float type; enum {size=4, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false}; typedef Packet4f half; };
237
- template<> struct unpacket_traits<Packet2d> { typedef double type; enum {size=2, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false}; typedef Packet2d half; };
231
+ template <>
232
+ struct unpacket_traits<Packet4i> {
233
+ typedef int type;
234
+ enum {
235
+ size = 4,
236
+ alignment = Aligned16,
237
+ vectorizable = true,
238
+ masked_load_available = false,
239
+ masked_store_available = false
240
+ };
241
+ typedef Packet4i half;
242
+ };
243
+ template <>
244
+ struct unpacket_traits<Packet4f> {
245
+ typedef float type;
246
+ enum {
247
+ size = 4,
248
+ alignment = Aligned16,
249
+ vectorizable = true,
250
+ masked_load_available = false,
251
+ masked_store_available = false
252
+ };
253
+ typedef Packet4f half;
254
+ typedef Packet4i integer_packet;
255
+ };
256
+ template <>
257
+ struct unpacket_traits<Packet2d> {
258
+ typedef double type;
259
+ enum {
260
+ size = 2,
261
+ alignment = Aligned16,
262
+ vectorizable = true,
263
+ masked_load_available = false,
264
+ masked_store_available = false
265
+ };
266
+ typedef Packet2d half;
267
+ typedef Packet2l integer_packet;
268
+ };
238
269
 
239
270
  /* Forward declaration */
240
- EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet4f,4>& kernel);
241
-
242
- inline std::ostream & operator <<(std::ostream & s, const Packet4i & v)
243
- {
271
+ EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet4f, 4>& kernel);
272
+
273
+ inline std::ostream& operator<<(std::ostream& s, const Packet4i& v) {
244
274
  Packet vt;
245
275
  vt.v4i = v;
246
276
  s << vt.i[0] << ", " << vt.i[1] << ", " << vt.i[2] << ", " << vt.i[3];
247
277
  return s;
248
278
  }
249
279
 
250
- inline std::ostream & operator <<(std::ostream & s, const Packet4ui & v)
251
- {
280
+ inline std::ostream& operator<<(std::ostream& s, const Packet4ui& v) {
252
281
  Packet vt;
253
282
  vt.v4ui = v;
254
283
  s << vt.ui[0] << ", " << vt.ui[1] << ", " << vt.ui[2] << ", " << vt.ui[3];
255
284
  return s;
256
285
  }
257
286
 
258
- inline std::ostream & operator <<(std::ostream & s, const Packet2l & v)
259
- {
287
+ inline std::ostream& operator<<(std::ostream& s, const Packet2l& v) {
260
288
  Packet vt;
261
289
  vt.v2l = v;
262
290
  s << vt.l[0] << ", " << vt.l[1];
263
291
  return s;
264
292
  }
265
293
 
266
- inline std::ostream & operator <<(std::ostream & s, const Packet2ul & v)
267
- {
294
+ inline std::ostream& operator<<(std::ostream& s, const Packet2ul& v) {
268
295
  Packet vt;
269
296
  vt.v2ul = v;
270
- s << vt.ul[0] << ", " << vt.ul[1] ;
297
+ s << vt.ul[0] << ", " << vt.ul[1];
271
298
  return s;
272
299
  }
273
300
 
274
- inline std::ostream & operator <<(std::ostream & s, const Packet2d & v)
275
- {
301
+ inline std::ostream& operator<<(std::ostream& s, const Packet2d& v) {
276
302
  Packet vt;
277
303
  vt.v2d = v;
278
304
  s << vt.d[0] << ", " << vt.d[1];
@@ -280,8 +306,7 @@ inline std::ostream & operator <<(std::ostream & s, const Packet2d & v)
280
306
  }
281
307
 
282
308
  #if !defined(__ARCH__) || (defined(__ARCH__) && __ARCH__ >= 12)
283
- inline std::ostream & operator <<(std::ostream & s, const Packet4f & v)
284
- {
309
+ inline std::ostream& operator<<(std::ostream& s, const Packet4f& v) {
285
310
  Packet vt;
286
311
  vt.v4f = v;
287
312
  s << vt.f[0] << ", " << vt.f[1] << ", " << vt.f[2] << ", " << vt.f[3];
@@ -289,54 +314,51 @@ inline std::ostream & operator <<(std::ostream & s, const Packet4f & v)
289
314
  }
290
315
  #endif
291
316
 
292
- template<> EIGEN_STRONG_INLINE Packet4i pload<Packet4i>(const int* from)
293
- {
294
- // FIXME: No intrinsic yet
317
+ template <>
318
+ EIGEN_STRONG_INLINE Packet4i pload<Packet4i>(const int* from) {
295
319
  EIGEN_DEBUG_ALIGNED_LOAD
296
- Packet *vfrom;
297
- vfrom = (Packet *) from;
298
- return vfrom->v4i;
320
+ return vec_xl(0, from);
299
321
  }
300
322
 
301
- template<> EIGEN_STRONG_INLINE Packet2d pload<Packet2d>(const double* from)
302
- {
303
- // FIXME: No intrinsic yet
323
+ template <>
324
+ EIGEN_STRONG_INLINE Packet2d pload<Packet2d>(const double* from) {
304
325
  EIGEN_DEBUG_ALIGNED_LOAD
305
- Packet *vfrom;
306
- vfrom = (Packet *) from;
307
- return vfrom->v2d;
326
+ return vec_xl(0, from);
308
327
  }
309
328
 
310
- template<> EIGEN_STRONG_INLINE void pstore<int>(int* to, const Packet4i& from)
311
- {
312
- // FIXME: No intrinsic yet
329
+ template <>
330
+ EIGEN_STRONG_INLINE void pstore<int>(int* to, const Packet4i& from) {
313
331
  EIGEN_DEBUG_ALIGNED_STORE
314
- Packet *vto;
315
- vto = (Packet *) to;
316
- vto->v4i = from;
332
+ vec_xst(from, 0, to);
317
333
  }
318
334
 
319
- template<> EIGEN_STRONG_INLINE void pstore<double>(double* to, const Packet2d& from)
320
- {
321
- // FIXME: No intrinsic yet
335
+ template <>
336
+ EIGEN_STRONG_INLINE void pstore<double>(double* to, const Packet2d& from) {
322
337
  EIGEN_DEBUG_ALIGNED_STORE
323
- Packet *vto;
324
- vto = (Packet *) to;
325
- vto->v2d = from;
338
+ vec_xst(from, 0, to);
339
+ }
340
+
341
+ template <>
342
+ EIGEN_STRONG_INLINE Packet4f pfrexp<Packet4f>(const Packet4f& a, Packet4f& exponent) {
343
+ return pfrexp_generic(a, exponent);
344
+ }
345
+
346
+ template <>
347
+ EIGEN_STRONG_INLINE Packet2d pfrexp<Packet2d>(const Packet2d& a, Packet2d& exponent) {
348
+ return pfrexp_generic(a, exponent);
326
349
  }
327
350
 
328
- template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from)
329
- {
351
+ template <>
352
+ EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from) {
330
353
  return vec_splats(from);
331
354
  }
332
- template<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) {
355
+ template <>
356
+ EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) {
333
357
  return vec_splats(from);
334
358
  }
335
359
 
336
- template<> EIGEN_STRONG_INLINE void
337
- pbroadcast4<Packet4i>(const int *a,
338
- Packet4i& a0, Packet4i& a1, Packet4i& a2, Packet4i& a3)
339
- {
360
+ template <>
361
+ EIGEN_STRONG_INLINE void pbroadcast4<Packet4i>(const int* a, Packet4i& a0, Packet4i& a1, Packet4i& a2, Packet4i& a3) {
340
362
  a3 = pload<Packet4i>(a);
341
363
  a0 = vec_splat(a3, 0);
342
364
  a1 = vec_splat(a3, 1);
@@ -344,187 +366,356 @@ pbroadcast4<Packet4i>(const int *a,
344
366
  a3 = vec_splat(a3, 3);
345
367
  }
346
368
 
347
- template<> EIGEN_STRONG_INLINE void
348
- pbroadcast4<Packet2d>(const double *a,
349
- Packet2d& a0, Packet2d& a1, Packet2d& a2, Packet2d& a3)
350
- {
369
+ template <>
370
+ EIGEN_STRONG_INLINE void pbroadcast4<Packet2d>(const double* a, Packet2d& a0, Packet2d& a1, Packet2d& a2,
371
+ Packet2d& a3) {
351
372
  a1 = pload<Packet2d>(a);
352
373
  a0 = vec_splat(a1, 0);
353
374
  a1 = vec_splat(a1, 1);
354
- a3 = pload<Packet2d>(a+2);
375
+ a3 = pload<Packet2d>(a + 2);
355
376
  a2 = vec_splat(a3, 0);
356
377
  a3 = vec_splat(a3, 1);
357
378
  }
358
379
 
359
- template<> EIGEN_DEVICE_FUNC inline Packet4i pgather<int, Packet4i>(const int* from, Index stride)
360
- {
361
- int EIGEN_ALIGN16 ai[4];
362
- ai[0] = from[0*stride];
363
- ai[1] = from[1*stride];
364
- ai[2] = from[2*stride];
365
- ai[3] = from[3*stride];
366
- return pload<Packet4i>(ai);
380
+ template <>
381
+ EIGEN_DEVICE_FUNC inline Packet4i pgather<int, Packet4i>(const int* from, Index stride) {
382
+ EIGEN_ALIGN16 int ai[4];
383
+ ai[0] = from[0 * stride];
384
+ ai[1] = from[1 * stride];
385
+ ai[2] = from[2 * stride];
386
+ ai[3] = from[3 * stride];
387
+ return pload<Packet4i>(ai);
367
388
  }
368
389
 
369
- template<> EIGEN_DEVICE_FUNC inline Packet2d pgather<double, Packet2d>(const double* from, Index stride)
370
- {
371
- double EIGEN_ALIGN16 af[2];
372
- af[0] = from[0*stride];
373
- af[1] = from[1*stride];
374
- return pload<Packet2d>(af);
390
+ template <>
391
+ EIGEN_DEVICE_FUNC inline Packet2d pgather<double, Packet2d>(const double* from, Index stride) {
392
+ EIGEN_ALIGN16 double af[2];
393
+ af[0] = from[0 * stride];
394
+ af[1] = from[1 * stride];
395
+ return pload<Packet2d>(af);
375
396
  }
376
397
 
377
- template<> EIGEN_DEVICE_FUNC inline void pscatter<int, Packet4i>(int* to, const Packet4i& from, Index stride)
378
- {
379
- int EIGEN_ALIGN16 ai[4];
380
- pstore<int>((int *)ai, from);
381
- to[0*stride] = ai[0];
382
- to[1*stride] = ai[1];
383
- to[2*stride] = ai[2];
384
- to[3*stride] = ai[3];
398
+ template <>
399
+ EIGEN_DEVICE_FUNC inline void pscatter<int, Packet4i>(int* to, const Packet4i& from, Index stride) {
400
+ EIGEN_ALIGN16 int ai[4];
401
+ pstore<int>((int*)ai, from);
402
+ to[0 * stride] = ai[0];
403
+ to[1 * stride] = ai[1];
404
+ to[2 * stride] = ai[2];
405
+ to[3 * stride] = ai[3];
385
406
  }
386
407
 
387
- template<> EIGEN_DEVICE_FUNC inline void pscatter<double, Packet2d>(double* to, const Packet2d& from, Index stride)
388
- {
389
- double EIGEN_ALIGN16 af[2];
408
+ template <>
409
+ EIGEN_DEVICE_FUNC inline void pscatter<double, Packet2d>(double* to, const Packet2d& from, Index stride) {
410
+ EIGEN_ALIGN16 double af[2];
390
411
  pstore<double>(af, from);
391
- to[0*stride] = af[0];
392
- to[1*stride] = af[1];
412
+ to[0 * stride] = af[0];
413
+ to[1 * stride] = af[1];
393
414
  }
394
415
 
395
- template<> EIGEN_STRONG_INLINE Packet4i padd<Packet4i>(const Packet4i& a, const Packet4i& b) { return (a + b); }
396
- template<> EIGEN_STRONG_INLINE Packet2d padd<Packet2d>(const Packet2d& a, const Packet2d& b) { return (a + b); }
397
-
398
- template<> EIGEN_STRONG_INLINE Packet4i psub<Packet4i>(const Packet4i& a, const Packet4i& b) { return (a - b); }
399
- template<> EIGEN_STRONG_INLINE Packet2d psub<Packet2d>(const Packet2d& a, const Packet2d& b) { return (a - b); }
416
+ template <>
417
+ EIGEN_STRONG_INLINE Packet4i padd<Packet4i>(const Packet4i& a, const Packet4i& b) {
418
+ return (a + b);
419
+ }
420
+ template <>
421
+ EIGEN_STRONG_INLINE Packet2d padd<Packet2d>(const Packet2d& a, const Packet2d& b) {
422
+ return (a + b);
423
+ }
400
424
 
401
- template<> EIGEN_STRONG_INLINE Packet4i pmul<Packet4i>(const Packet4i& a, const Packet4i& b) { return (a * b); }
402
- template<> EIGEN_STRONG_INLINE Packet2d pmul<Packet2d>(const Packet2d& a, const Packet2d& b) { return (a * b); }
425
+ template <>
426
+ EIGEN_STRONG_INLINE Packet4i psub<Packet4i>(const Packet4i& a, const Packet4i& b) {
427
+ return (a - b);
428
+ }
429
+ template <>
430
+ EIGEN_STRONG_INLINE Packet2d psub<Packet2d>(const Packet2d& a, const Packet2d& b) {
431
+ return (a - b);
432
+ }
403
433
 
404
- template<> EIGEN_STRONG_INLINE Packet4i pdiv<Packet4i>(const Packet4i& a, const Packet4i& b) { return (a / b); }
405
- template<> EIGEN_STRONG_INLINE Packet2d pdiv<Packet2d>(const Packet2d& a, const Packet2d& b) { return (a / b); }
434
+ template <>
435
+ EIGEN_STRONG_INLINE Packet4i pmul<Packet4i>(const Packet4i& a, const Packet4i& b) {
436
+ return (a * b);
437
+ }
438
+ template <>
439
+ EIGEN_STRONG_INLINE Packet2d pmul<Packet2d>(const Packet2d& a, const Packet2d& b) {
440
+ return (a * b);
441
+ }
406
442
 
407
- template<> EIGEN_STRONG_INLINE Packet4i pnegate(const Packet4i& a) { return (-a); }
408
- template<> EIGEN_STRONG_INLINE Packet2d pnegate(const Packet2d& a) { return (-a); }
443
+ template <>
444
+ EIGEN_STRONG_INLINE Packet4i pdiv<Packet4i>(const Packet4i& a, const Packet4i& b) {
445
+ return (a / b);
446
+ }
447
+ template <>
448
+ EIGEN_STRONG_INLINE Packet2d pdiv<Packet2d>(const Packet2d& a, const Packet2d& b) {
449
+ return (a / b);
450
+ }
409
451
 
410
- template<> EIGEN_STRONG_INLINE Packet4i pconj(const Packet4i& a) { return a; }
411
- template<> EIGEN_STRONG_INLINE Packet2d pconj(const Packet2d& a) { return a; }
452
+ template <>
453
+ EIGEN_STRONG_INLINE Packet4i pnegate(const Packet4i& a) {
454
+ return (-a);
455
+ }
456
+ template <>
457
+ EIGEN_STRONG_INLINE Packet2d pnegate(const Packet2d& a) {
458
+ return (-a);
459
+ }
412
460
 
413
- template<> EIGEN_STRONG_INLINE Packet4i pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) { return padd<Packet4i>(pmul<Packet4i>(a, b), c); }
414
- template<> EIGEN_STRONG_INLINE Packet2d pmadd(const Packet2d& a, const Packet2d& b, const Packet2d& c) { return vec_madd(a, b, c); }
461
+ template <>
462
+ EIGEN_STRONG_INLINE Packet4i pconj(const Packet4i& a) {
463
+ return a;
464
+ }
465
+ template <>
466
+ EIGEN_STRONG_INLINE Packet2d pconj(const Packet2d& a) {
467
+ return a;
468
+ }
415
469
 
416
- template<> EIGEN_STRONG_INLINE Packet4i plset<Packet4i>(const int& a) { return padd<Packet4i>(pset1<Packet4i>(a), p4i_COUNTDOWN); }
417
- template<> EIGEN_STRONG_INLINE Packet2d plset<Packet2d>(const double& a) { return padd<Packet2d>(pset1<Packet2d>(a), p2d_COUNTDOWN); }
470
+ template <>
471
+ EIGEN_STRONG_INLINE Packet4i pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) {
472
+ return padd<Packet4i>(pmul<Packet4i>(a, b), c);
473
+ }
474
+ template <>
475
+ EIGEN_STRONG_INLINE Packet2d pmadd(const Packet2d& a, const Packet2d& b, const Packet2d& c) {
476
+ return vec_madd(a, b, c);
477
+ }
418
478
 
419
- template<> EIGEN_STRONG_INLINE Packet4i pmin<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_min(a, b); }
420
- template<> EIGEN_STRONG_INLINE Packet2d pmin<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_min(a, b); }
479
+ template <>
480
+ EIGEN_STRONG_INLINE Packet4i plset<Packet4i>(const int& a) {
481
+ return padd<Packet4i>(pset1<Packet4i>(a), p4i_COUNTDOWN);
482
+ }
483
+ template <>
484
+ EIGEN_STRONG_INLINE Packet2d plset<Packet2d>(const double& a) {
485
+ return padd<Packet2d>(pset1<Packet2d>(a), p2d_COUNTDOWN);
486
+ }
421
487
 
422
- template<> EIGEN_STRONG_INLINE Packet4i pmax<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_max(a, b); }
423
- template<> EIGEN_STRONG_INLINE Packet2d pmax<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_max(a, b); }
488
+ template <>
489
+ EIGEN_STRONG_INLINE Packet4i pmin<Packet4i>(const Packet4i& a, const Packet4i& b) {
490
+ return vec_min(a, b);
491
+ }
492
+ template <>
493
+ EIGEN_STRONG_INLINE Packet2d pmin<Packet2d>(const Packet2d& a, const Packet2d& b) {
494
+ return vec_min(a, b);
495
+ }
424
496
 
425
- template<> EIGEN_STRONG_INLINE Packet4i pand<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_and(a, b); }
426
- template<> EIGEN_STRONG_INLINE Packet2d pand<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_and(a, b); }
497
+ template <>
498
+ EIGEN_STRONG_INLINE Packet4i pmax<Packet4i>(const Packet4i& a, const Packet4i& b) {
499
+ return vec_max(a, b);
500
+ }
501
+ template <>
502
+ EIGEN_STRONG_INLINE Packet2d pmax<Packet2d>(const Packet2d& a, const Packet2d& b) {
503
+ return vec_max(a, b);
504
+ }
427
505
 
428
- template<> EIGEN_STRONG_INLINE Packet4i por<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_or(a, b); }
429
- template<> EIGEN_STRONG_INLINE Packet2d por<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_or(a, b); }
506
+ template <>
507
+ EIGEN_STRONG_INLINE Packet4i pand<Packet4i>(const Packet4i& a, const Packet4i& b) {
508
+ return vec_and(a, b);
509
+ }
510
+ template <>
511
+ EIGEN_STRONG_INLINE Packet2d pand<Packet2d>(const Packet2d& a, const Packet2d& b) {
512
+ return vec_and(a, b);
513
+ }
430
514
 
431
- template<> EIGEN_STRONG_INLINE Packet4i pxor<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_xor(a, b); }
432
- template<> EIGEN_STRONG_INLINE Packet2d pxor<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_xor(a, b); }
515
+ template <>
516
+ EIGEN_STRONG_INLINE Packet4i por<Packet4i>(const Packet4i& a, const Packet4i& b) {
517
+ return vec_or(a, b);
518
+ }
519
+ template <>
520
+ EIGEN_STRONG_INLINE Packet2d por<Packet2d>(const Packet2d& a, const Packet2d& b) {
521
+ return vec_or(a, b);
522
+ }
433
523
 
434
- template<> EIGEN_STRONG_INLINE Packet4i pandnot<Packet4i>(const Packet4i& a, const Packet4i& b) { return pand<Packet4i>(a, vec_nor(b, b)); }
435
- template<> EIGEN_STRONG_INLINE Packet2d pandnot<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_and(a, vec_nor(b, b)); }
524
+ template <>
525
+ EIGEN_STRONG_INLINE Packet4i pxor<Packet4i>(const Packet4i& a, const Packet4i& b) {
526
+ return vec_xor(a, b);
527
+ }
528
+ template <>
529
+ EIGEN_STRONG_INLINE Packet2d pxor<Packet2d>(const Packet2d& a, const Packet2d& b) {
530
+ return vec_xor(a, b);
531
+ }
436
532
 
437
- template<> EIGEN_STRONG_INLINE Packet2d pround<Packet2d>(const Packet2d& a) { return vec_round(a); }
438
- template<> EIGEN_STRONG_INLINE Packet2d pceil<Packet2d>(const Packet2d& a) { return vec_ceil(a); }
439
- template<> EIGEN_STRONG_INLINE Packet2d pfloor<Packet2d>(const Packet2d& a) { return vec_floor(a); }
533
+ template <>
534
+ EIGEN_STRONG_INLINE Packet4i pandnot<Packet4i>(const Packet4i& a, const Packet4i& b) {
535
+ return pand<Packet4i>(a, vec_nor(b, b));
536
+ }
537
+ template <>
538
+ EIGEN_STRONG_INLINE Packet2d pandnot<Packet2d>(const Packet2d& a, const Packet2d& b) {
539
+ return vec_and(a, vec_nor(b, b));
540
+ }
440
541
 
441
- template<> EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int* from) { return pload<Packet4i>(from); }
442
- template<> EIGEN_STRONG_INLINE Packet2d ploadu<Packet2d>(const double* from) { return pload<Packet2d>(from); }
542
+ template <>
543
+ EIGEN_STRONG_INLINE Packet2d pround<Packet2d>(const Packet2d& a) {
544
+ /* Uses non-default rounding for vec_round */
545
+ return __builtin_s390_vfidb(a, 0, 1);
546
+ }
547
+ template <>
548
+ EIGEN_STRONG_INLINE Packet2d pceil<Packet2d>(const Packet2d& a) {
549
+ return vec_ceil(a);
550
+ }
551
+ template <>
552
+ EIGEN_STRONG_INLINE Packet2d pfloor<Packet2d>(const Packet2d& a) {
553
+ return vec_floor(a);
554
+ }
443
555
 
556
+ template <>
557
+ EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int* from) {
558
+ return pload<Packet4i>(from);
559
+ }
560
+ template <>
561
+ EIGEN_STRONG_INLINE Packet2d ploadu<Packet2d>(const double* from) {
562
+ return pload<Packet2d>(from);
563
+ }
444
564
 
445
- template<> EIGEN_STRONG_INLINE Packet4i ploaddup<Packet4i>(const int* from)
446
- {
565
+ template <>
566
+ EIGEN_STRONG_INLINE Packet4i ploaddup<Packet4i>(const int* from) {
447
567
  Packet4i p = pload<Packet4i>(from);
448
568
  return vec_perm(p, p, p16uc_DUPLICATE32_HI);
449
569
  }
450
570
 
451
- template<> EIGEN_STRONG_INLINE Packet2d ploaddup<Packet2d>(const double* from)
452
- {
571
+ template <>
572
+ EIGEN_STRONG_INLINE Packet2d ploaddup<Packet2d>(const double* from) {
453
573
  Packet2d p = pload<Packet2d>(from);
454
574
  return vec_perm(p, p, p16uc_PSET64_HI);
455
575
  }
456
576
 
457
- template<> EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet4i& from) { pstore<int>(to, from); }
458
- template<> EIGEN_STRONG_INLINE void pstoreu<double>(double* to, const Packet2d& from) { pstore<double>(to, from); }
577
+ template <>
578
+ EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet4i& from) {
579
+ pstore<int>(to, from);
580
+ }
581
+ template <>
582
+ EIGEN_STRONG_INLINE void pstoreu<double>(double* to, const Packet2d& from) {
583
+ pstore<double>(to, from);
584
+ }
459
585
 
460
- template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { EIGEN_ZVECTOR_PREFETCH(addr); }
461
- template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { EIGEN_ZVECTOR_PREFETCH(addr); }
586
+ template <>
587
+ EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) {
588
+ EIGEN_ZVECTOR_PREFETCH(addr);
589
+ }
590
+ template <>
591
+ EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) {
592
+ EIGEN_ZVECTOR_PREFETCH(addr);
593
+ }
462
594
 
463
- template<> EIGEN_STRONG_INLINE int pfirst<Packet4i>(const Packet4i& a) { int EIGEN_ALIGN16 x[4]; pstore(x, a); return x[0]; }
464
- template<> EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) { double EIGEN_ALIGN16 x[2]; pstore(x, a); return x[0]; }
595
+ template <int N>
596
+ EIGEN_STRONG_INLINE Packet2l parithmetic_shift_right(const Packet2l& a) {
597
+ return Packet2l { parithmetic_shift_right<N>(a[0]), parithmetic_shift_right<N>(a[1]) };
598
+ }
599
+ template <int N>
600
+ EIGEN_STRONG_INLINE Packet4i parithmetic_shift_right(const Packet4i& a) {
601
+ return Packet4i {
602
+ parithmetic_shift_right<N>(a[0]),
603
+ parithmetic_shift_right<N>(a[1]),
604
+ parithmetic_shift_right<N>(a[2]),
605
+ parithmetic_shift_right<N>(a[3]) };
606
+ }
465
607
 
466
- template<> EIGEN_STRONG_INLINE Packet4i preverse(const Packet4i& a)
467
- {
468
- return reinterpret_cast<Packet4i>(vec_perm(reinterpret_cast<Packet16uc>(a), reinterpret_cast<Packet16uc>(a), p16uc_REVERSE32));
608
+ template <int N>
609
+ EIGEN_STRONG_INLINE Packet2l plogical_shift_right(const Packet2l& a) {
610
+ return Packet2l { plogical_shift_right<N>(a[0]), plogical_shift_right<N>(a[1]) };
611
+ }
612
+ template <int N>
613
+ EIGEN_STRONG_INLINE Packet4i plogical_shift_right(const Packet4i& a) {
614
+ return Packet4i {
615
+ plogical_shift_right<N>(a[0]),
616
+ plogical_shift_right<N>(a[1]),
617
+ plogical_shift_right<N>(a[2]),
618
+ plogical_shift_right<N>(a[3]) };
469
619
  }
470
620
 
471
- template<> EIGEN_STRONG_INLINE Packet2d preverse(const Packet2d& a)
472
- {
473
- return reinterpret_cast<Packet2d>(vec_perm(reinterpret_cast<Packet16uc>(a), reinterpret_cast<Packet16uc>(a), p16uc_REVERSE64));
621
+ template <int N>
622
+ EIGEN_STRONG_INLINE Packet2l plogical_shift_left(const Packet2l& a) {
623
+ return Packet2l { plogical_shift_left<N>(a[0]), plogical_shift_left<N>(a[1]) };
624
+ }
625
+ template <int N>
626
+ EIGEN_STRONG_INLINE Packet4i plogical_shift_left(const Packet4i& a) {
627
+ return Packet4i {
628
+ plogical_shift_left<N>(a[0]),
629
+ plogical_shift_left<N>(a[1]),
630
+ plogical_shift_left<N>(a[2]),
631
+ plogical_shift_left<N>(a[3]) };
474
632
  }
475
633
 
476
- template<> EIGEN_STRONG_INLINE Packet4i pabs<Packet4i>(const Packet4i& a) { return vec_abs(a); }
477
- template<> EIGEN_STRONG_INLINE Packet2d pabs<Packet2d>(const Packet2d& a) { return vec_abs(a); }
634
+ template <>
635
+ EIGEN_STRONG_INLINE int pfirst<Packet4i>(const Packet4i& a) {
636
+ EIGEN_ALIGN16 int x[4];
637
+ pstore(x, a);
638
+ return x[0];
639
+ }
640
+ template <>
641
+ EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) {
642
+ EIGEN_ALIGN16 double x[2];
643
+ pstore(x, a);
644
+ return x[0];
645
+ }
646
+
647
+ template <>
648
+ EIGEN_STRONG_INLINE Packet4i preverse(const Packet4i& a) {
649
+ return reinterpret_cast<Packet4i>(
650
+ vec_perm(reinterpret_cast<Packet16uc>(a), reinterpret_cast<Packet16uc>(a), p16uc_REVERSE32));
651
+ }
478
652
 
479
- template<> EIGEN_STRONG_INLINE int predux<Packet4i>(const Packet4i& a)
480
- {
653
+ template <>
654
+ EIGEN_STRONG_INLINE Packet2d preverse(const Packet2d& a) {
655
+ return reinterpret_cast<Packet2d>(
656
+ vec_perm(reinterpret_cast<Packet16uc>(a), reinterpret_cast<Packet16uc>(a), p16uc_REVERSE64));
657
+ }
658
+
659
+ template <>
660
+ EIGEN_STRONG_INLINE Packet4i pabs<Packet4i>(const Packet4i& a) {
661
+ return vec_abs(a);
662
+ }
663
+ template <>
664
+ EIGEN_STRONG_INLINE Packet2d pabs<Packet2d>(const Packet2d& a) {
665
+ return vec_abs(a);
666
+ }
667
+
668
+ template <>
669
+ EIGEN_STRONG_INLINE int predux<Packet4i>(const Packet4i& a) {
481
670
  Packet4i b, sum;
482
- b = vec_sld(a, a, 8);
671
+ b = vec_sld(a, a, 8);
483
672
  sum = padd<Packet4i>(a, b);
484
- b = vec_sld(sum, sum, 4);
673
+ b = vec_sld(sum, sum, 4);
485
674
  sum = padd<Packet4i>(sum, b);
486
675
  return pfirst(sum);
487
676
  }
488
677
 
489
- template<> EIGEN_STRONG_INLINE double predux<Packet2d>(const Packet2d& a)
490
- {
678
+ template <>
679
+ EIGEN_STRONG_INLINE double predux<Packet2d>(const Packet2d& a) {
491
680
  Packet2d b, sum;
492
- b = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4i>(a), reinterpret_cast<Packet4i>(a), 8));
681
+ b = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4i>(a), reinterpret_cast<Packet4i>(a), 8));
493
682
  sum = padd<Packet2d>(a, b);
494
683
  return pfirst(sum);
495
684
  }
496
685
 
497
686
  // Other reduction functions:
498
687
  // mul
499
- template<> EIGEN_STRONG_INLINE int predux_mul<Packet4i>(const Packet4i& a)
500
- {
688
+ template <>
689
+ EIGEN_STRONG_INLINE int predux_mul<Packet4i>(const Packet4i& a) {
501
690
  EIGEN_ALIGN16 int aux[4];
502
691
  pstore(aux, a);
503
692
  return aux[0] * aux[1] * aux[2] * aux[3];
504
693
  }
505
694
 
506
- template<> EIGEN_STRONG_INLINE double predux_mul<Packet2d>(const Packet2d& a)
507
- {
508
- return pfirst(pmul(a, reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4i>(a), reinterpret_cast<Packet4i>(a), 8))));
695
+ template <>
696
+ EIGEN_STRONG_INLINE double predux_mul<Packet2d>(const Packet2d& a) {
697
+ return pfirst(
698
+ pmul(a, reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4i>(a), reinterpret_cast<Packet4i>(a), 8))));
509
699
  }
510
700
 
511
701
  // min
512
- template<> EIGEN_STRONG_INLINE int predux_min<Packet4i>(const Packet4i& a)
513
- {
702
+ template <>
703
+ EIGEN_STRONG_INLINE int predux_min<Packet4i>(const Packet4i& a) {
514
704
  Packet4i b, res;
515
- b = pmin<Packet4i>(a, vec_sld(a, a, 8));
705
+ b = pmin<Packet4i>(a, vec_sld(a, a, 8));
516
706
  res = pmin<Packet4i>(b, vec_sld(b, b, 4));
517
707
  return pfirst(res);
518
708
  }
519
709
 
520
- template<> EIGEN_STRONG_INLINE double predux_min<Packet2d>(const Packet2d& a)
521
- {
522
- return pfirst(pmin<Packet2d>(a, reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4i>(a), reinterpret_cast<Packet4i>(a), 8))));
710
+ template <>
711
+ EIGEN_STRONG_INLINE double predux_min<Packet2d>(const Packet2d& a) {
712
+ return pfirst(pmin<Packet2d>(
713
+ a, reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4i>(a), reinterpret_cast<Packet4i>(a), 8))));
523
714
  }
524
715
 
525
716
  // max
526
- template<> EIGEN_STRONG_INLINE int predux_max<Packet4i>(const Packet4i& a)
527
- {
717
+ template <>
718
+ EIGEN_STRONG_INLINE int predux_max<Packet4i>(const Packet4i& a) {
528
719
  Packet4i b, res;
529
720
  b = pmax<Packet4i>(a, vec_sld(a, a, 8));
530
721
  res = pmax<Packet4i>(b, vec_sld(b, b, 4));
@@ -532,13 +723,13 @@ template<> EIGEN_STRONG_INLINE int predux_max<Packet4i>(const Packet4i& a)
532
723
  }
533
724
 
534
725
  // max
535
- template<> EIGEN_STRONG_INLINE double predux_max<Packet2d>(const Packet2d& a)
536
- {
537
- return pfirst(pmax<Packet2d>(a, reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4i>(a), reinterpret_cast<Packet4i>(a), 8))));
726
+ template <>
727
+ EIGEN_STRONG_INLINE double predux_max<Packet2d>(const Packet2d& a) {
728
+ return pfirst(pmax<Packet2d>(
729
+ a, reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4i>(a), reinterpret_cast<Packet4i>(a), 8))));
538
730
  }
539
731
 
540
- EIGEN_DEVICE_FUNC inline void
541
- ptranspose(PacketBlock<Packet4i,4>& kernel) {
732
+ EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet4i, 4>& kernel) {
542
733
  Packet4i t0 = vec_mergeh(kernel.packet[0], kernel.packet[2]);
543
734
  Packet4i t1 = vec_mergel(kernel.packet[0], kernel.packet[2]);
544
735
  Packet4i t2 = vec_mergeh(kernel.packet[1], kernel.packet[3]);
@@ -549,23 +740,25 @@ ptranspose(PacketBlock<Packet4i,4>& kernel) {
549
740
  kernel.packet[3] = vec_mergel(t1, t3);
550
741
  }
551
742
 
552
- EIGEN_DEVICE_FUNC inline void
553
- ptranspose(PacketBlock<Packet2d,2>& kernel) {
743
+ EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet2d, 2>& kernel) {
554
744
  Packet2d t0 = vec_perm(kernel.packet[0], kernel.packet[1], p16uc_TRANSPOSE64_HI);
555
745
  Packet2d t1 = vec_perm(kernel.packet[0], kernel.packet[1], p16uc_TRANSPOSE64_LO);
556
746
  kernel.packet[0] = t0;
557
747
  kernel.packet[1] = t1;
558
748
  }
559
749
 
560
- template<> EIGEN_STRONG_INLINE Packet4i pblend(const Selector<4>& ifPacket, const Packet4i& thenPacket, const Packet4i& elsePacket) {
561
- Packet4ui select = { ifPacket.select[0], ifPacket.select[1], ifPacket.select[2], ifPacket.select[3] };
750
+ template <>
751
+ EIGEN_STRONG_INLINE Packet4i pblend(const Selector<4>& ifPacket, const Packet4i& thenPacket,
752
+ const Packet4i& elsePacket) {
753
+ Packet4ui select = {ifPacket.select[0], ifPacket.select[1], ifPacket.select[2], ifPacket.select[3]};
562
754
  Packet4ui mask = vec_cmpeq(select, reinterpret_cast<Packet4ui>(p4i_ONE));
563
755
  return vec_sel(elsePacket, thenPacket, mask);
564
756
  }
565
757
 
566
-
567
- template<> EIGEN_STRONG_INLINE Packet2d pblend(const Selector<2>& ifPacket, const Packet2d& thenPacket, const Packet2d& elsePacket) {
568
- Packet2ul select = { ifPacket.select[0], ifPacket.select[1] };
758
+ template <>
759
+ EIGEN_STRONG_INLINE Packet2d pblend(const Selector<2>& ifPacket, const Packet2d& thenPacket,
760
+ const Packet2d& elsePacket) {
761
+ Packet2ul select = {ifPacket.select[0], ifPacket.select[1]};
569
762
  Packet2ul mask = vec_cmpeq(select, reinterpret_cast<Packet2ul>(p2l_ONE));
570
763
  return vec_sel(elsePacket, thenPacket, mask);
571
764
  }
@@ -576,32 +769,32 @@ template<> EIGEN_STRONG_INLINE Packet2d pblend(const Selector<2>& ifPacket, cons
576
769
  #if !defined(__ARCH__) || (defined(__ARCH__) && __ARCH__ < 12)
577
770
  /* Helper function to simulate a vec_splat_packet4f
578
771
  */
579
- template<int element> EIGEN_STRONG_INLINE Packet4f vec_splat_packet4f(const Packet4f& from)
580
- {
772
+ template <int element>
773
+ EIGEN_STRONG_INLINE Packet4f vec_splat_packet4f(const Packet4f& from) {
581
774
  Packet4f splat;
582
775
  switch (element) {
583
- case 0:
584
- splat.v4f[0] = vec_splat(from.v4f[0], 0);
585
- splat.v4f[1] = splat.v4f[0];
586
- break;
587
- case 1:
588
- splat.v4f[0] = vec_splat(from.v4f[0], 1);
589
- splat.v4f[1] = splat.v4f[0];
590
- break;
591
- case 2:
592
- splat.v4f[0] = vec_splat(from.v4f[1], 0);
593
- splat.v4f[1] = splat.v4f[0];
594
- break;
595
- case 3:
596
- splat.v4f[0] = vec_splat(from.v4f[1], 1);
597
- splat.v4f[1] = splat.v4f[0];
598
- break;
776
+ case 0:
777
+ splat.v4f[0] = vec_splat(from.v4f[0], 0);
778
+ splat.v4f[1] = splat.v4f[0];
779
+ break;
780
+ case 1:
781
+ splat.v4f[0] = vec_splat(from.v4f[0], 1);
782
+ splat.v4f[1] = splat.v4f[0];
783
+ break;
784
+ case 2:
785
+ splat.v4f[0] = vec_splat(from.v4f[1], 0);
786
+ splat.v4f[1] = splat.v4f[0];
787
+ break;
788
+ case 3:
789
+ splat.v4f[0] = vec_splat(from.v4f[1], 1);
790
+ splat.v4f[1] = splat.v4f[0];
791
+ break;
599
792
  }
600
793
  return splat;
601
794
  }
602
795
 
603
- template<> EIGEN_STRONG_INLINE Packet4f pload<Packet4f>(const float* from)
604
- {
796
+ template <>
797
+ EIGEN_STRONG_INLINE Packet4f pload<Packet4f>(const float* from) {
605
798
  // FIXME: No intrinsic yet
606
799
  EIGEN_DEBUG_ALIGNED_LOAD
607
800
  Packet4f vfrom;
@@ -610,26 +803,24 @@ template<> EIGEN_STRONG_INLINE Packet4f pload<Packet4f>(const float* from)
610
803
  return vfrom;
611
804
  }
612
805
 
613
- template<> EIGEN_STRONG_INLINE void pstore<float>(float* to, const Packet4f& from)
614
- {
806
+ template <>
807
+ EIGEN_STRONG_INLINE void pstore<float>(float* to, const Packet4f& from) {
615
808
  // FIXME: No intrinsic yet
616
809
  EIGEN_DEBUG_ALIGNED_STORE
617
810
  vec_st2f(from.v4f[0], &to[0]);
618
811
  vec_st2f(from.v4f[1], &to[2]);
619
812
  }
620
813
 
621
- template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from)
622
- {
814
+ template <>
815
+ EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) {
623
816
  Packet4f to;
624
817
  to.v4f[0] = pset1<Packet2d>(static_cast<const double&>(from));
625
818
  to.v4f[1] = to.v4f[0];
626
819
  return to;
627
820
  }
628
821
 
629
- template<> EIGEN_STRONG_INLINE void
630
- pbroadcast4<Packet4f>(const float *a,
631
- Packet4f& a0, Packet4f& a1, Packet4f& a2, Packet4f& a3)
632
- {
822
+ template <>
823
+ EIGEN_STRONG_INLINE void pbroadcast4<Packet4f>(const float* a, Packet4f& a0, Packet4f& a1, Packet4f& a2, Packet4f& a3) {
633
824
  a3 = pload<Packet4f>(a);
634
825
  a0 = vec_splat_packet4f<0>(a3);
635
826
  a1 = vec_splat_packet4f<1>(a3);
@@ -637,207 +828,213 @@ pbroadcast4<Packet4f>(const float *a,
637
828
  a3 = vec_splat_packet4f<3>(a3);
638
829
  }
639
830
 
640
- template<> EIGEN_DEVICE_FUNC inline Packet4f pgather<float, Packet4f>(const float* from, Index stride)
641
- {
642
- float EIGEN_ALIGN16 ai[4];
643
- ai[0] = from[0*stride];
644
- ai[1] = from[1*stride];
645
- ai[2] = from[2*stride];
646
- ai[3] = from[3*stride];
647
- return pload<Packet4f>(ai);
831
+ template <>
832
+ EIGEN_DEVICE_FUNC inline Packet4f pgather<float, Packet4f>(const float* from, Index stride) {
833
+ EIGEN_ALIGN16 float ai[4];
834
+ ai[0] = from[0 * stride];
835
+ ai[1] = from[1 * stride];
836
+ ai[2] = from[2 * stride];
837
+ ai[3] = from[3 * stride];
838
+ return pload<Packet4f>(ai);
648
839
  }
649
840
 
650
- template<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet4f>(float* to, const Packet4f& from, Index stride)
651
- {
652
- float EIGEN_ALIGN16 ai[4];
653
- pstore<float>((float *)ai, from);
654
- to[0*stride] = ai[0];
655
- to[1*stride] = ai[1];
656
- to[2*stride] = ai[2];
657
- to[3*stride] = ai[3];
841
+ template <>
842
+ EIGEN_DEVICE_FUNC inline void pscatter<float, Packet4f>(float* to, const Packet4f& from, Index stride) {
843
+ EIGEN_ALIGN16 float ai[4];
844
+ pstore<float>((float*)ai, from);
845
+ to[0 * stride] = ai[0];
846
+ to[1 * stride] = ai[1];
847
+ to[2 * stride] = ai[2];
848
+ to[3 * stride] = ai[3];
658
849
  }
659
850
 
660
- template<> EIGEN_STRONG_INLINE Packet4f padd<Packet4f>(const Packet4f& a, const Packet4f& b)
661
- {
851
+ template <>
852
+ EIGEN_STRONG_INLINE Packet4f padd<Packet4f>(const Packet4f& a, const Packet4f& b) {
662
853
  Packet4f c;
663
854
  c.v4f[0] = a.v4f[0] + b.v4f[0];
664
855
  c.v4f[1] = a.v4f[1] + b.v4f[1];
665
856
  return c;
666
857
  }
667
858
 
668
- template<> EIGEN_STRONG_INLINE Packet4f psub<Packet4f>(const Packet4f& a, const Packet4f& b)
669
- {
859
+ template <>
860
+ EIGEN_STRONG_INLINE Packet4f psub<Packet4f>(const Packet4f& a, const Packet4f& b) {
670
861
  Packet4f c;
671
862
  c.v4f[0] = a.v4f[0] - b.v4f[0];
672
863
  c.v4f[1] = a.v4f[1] - b.v4f[1];
673
864
  return c;
674
865
  }
675
866
 
676
- template<> EIGEN_STRONG_INLINE Packet4f pmul<Packet4f>(const Packet4f& a, const Packet4f& b)
677
- {
867
+ template <>
868
+ EIGEN_STRONG_INLINE Packet4f pmul<Packet4f>(const Packet4f& a, const Packet4f& b) {
678
869
  Packet4f c;
679
870
  c.v4f[0] = a.v4f[0] * b.v4f[0];
680
871
  c.v4f[1] = a.v4f[1] * b.v4f[1];
681
872
  return c;
682
873
  }
683
874
 
684
- template<> EIGEN_STRONG_INLINE Packet4f pdiv<Packet4f>(const Packet4f& a, const Packet4f& b)
685
- {
875
+ template <>
876
+ EIGEN_STRONG_INLINE Packet4f pdiv<Packet4f>(const Packet4f& a, const Packet4f& b) {
686
877
  Packet4f c;
687
878
  c.v4f[0] = a.v4f[0] / b.v4f[0];
688
879
  c.v4f[1] = a.v4f[1] / b.v4f[1];
689
880
  return c;
690
881
  }
691
882
 
692
- template<> EIGEN_STRONG_INLINE Packet4f pnegate(const Packet4f& a)
693
- {
883
+ template <>
884
+ EIGEN_STRONG_INLINE Packet4f pnegate(const Packet4f& a) {
694
885
  Packet4f c;
695
886
  c.v4f[0] = -a.v4f[0];
696
887
  c.v4f[1] = -a.v4f[1];
697
888
  return c;
698
889
  }
699
890
 
700
- template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c)
701
- {
891
+ template <>
892
+ EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) {
702
893
  Packet4f res;
703
894
  res.v4f[0] = vec_madd(a.v4f[0], b.v4f[0], c.v4f[0]);
704
895
  res.v4f[1] = vec_madd(a.v4f[1], b.v4f[1], c.v4f[1]);
705
896
  return res;
706
897
  }
707
898
 
708
- template<> EIGEN_STRONG_INLINE Packet4f pmin<Packet4f>(const Packet4f& a, const Packet4f& b)
709
- {
899
+ template <>
900
+ EIGEN_STRONG_INLINE Packet4f pmin<Packet4f>(const Packet4f& a, const Packet4f& b) {
710
901
  Packet4f res;
711
902
  res.v4f[0] = pmin(a.v4f[0], b.v4f[0]);
712
903
  res.v4f[1] = pmin(a.v4f[1], b.v4f[1]);
713
904
  return res;
714
905
  }
715
906
 
716
- template<> EIGEN_STRONG_INLINE Packet4f pmax<Packet4f>(const Packet4f& a, const Packet4f& b)
717
- {
907
+ template <>
908
+ EIGEN_STRONG_INLINE Packet4f pmax<Packet4f>(const Packet4f& a, const Packet4f& b) {
718
909
  Packet4f res;
719
910
  res.v4f[0] = pmax(a.v4f[0], b.v4f[0]);
720
911
  res.v4f[1] = pmax(a.v4f[1], b.v4f[1]);
721
912
  return res;
722
913
  }
723
914
 
724
- template<> EIGEN_STRONG_INLINE Packet4f pand<Packet4f>(const Packet4f& a, const Packet4f& b)
725
- {
915
+ template <>
916
+ EIGEN_STRONG_INLINE Packet4f pand<Packet4f>(const Packet4f& a, const Packet4f& b) {
726
917
  Packet4f res;
727
918
  res.v4f[0] = pand(a.v4f[0], b.v4f[0]);
728
919
  res.v4f[1] = pand(a.v4f[1], b.v4f[1]);
729
920
  return res;
730
921
  }
731
922
 
732
- template<> EIGEN_STRONG_INLINE Packet4f por<Packet4f>(const Packet4f& a, const Packet4f& b)
733
- {
923
+ template <>
924
+ EIGEN_STRONG_INLINE Packet4f por<Packet4f>(const Packet4f& a, const Packet4f& b) {
734
925
  Packet4f res;
735
926
  res.v4f[0] = por(a.v4f[0], b.v4f[0]);
736
927
  res.v4f[1] = por(a.v4f[1], b.v4f[1]);
737
928
  return res;
738
929
  }
739
930
 
740
- template<> EIGEN_STRONG_INLINE Packet4f pxor<Packet4f>(const Packet4f& a, const Packet4f& b)
741
- {
931
+ template <>
932
+ EIGEN_STRONG_INLINE Packet4f pxor<Packet4f>(const Packet4f& a, const Packet4f& b) {
742
933
  Packet4f res;
743
934
  res.v4f[0] = pxor(a.v4f[0], b.v4f[0]);
744
935
  res.v4f[1] = pxor(a.v4f[1], b.v4f[1]);
745
936
  return res;
746
937
  }
747
938
 
748
- template<> EIGEN_STRONG_INLINE Packet4f pandnot<Packet4f>(const Packet4f& a, const Packet4f& b)
749
- {
939
+ template <>
940
+ EIGEN_STRONG_INLINE Packet4f pandnot<Packet4f>(const Packet4f& a, const Packet4f& b) {
750
941
  Packet4f res;
751
942
  res.v4f[0] = pandnot(a.v4f[0], b.v4f[0]);
752
943
  res.v4f[1] = pandnot(a.v4f[1], b.v4f[1]);
753
944
  return res;
754
945
  }
755
946
 
756
- template<> EIGEN_STRONG_INLINE Packet4f pround<Packet4f>(const Packet4f& a)
757
- {
947
+ template <>
948
+ EIGEN_STRONG_INLINE Packet4f pround<Packet4f>(const Packet4f& a) {
758
949
  Packet4f res;
759
- res.v4f[0] = vec_round(a.v4f[0]);
760
- res.v4f[1] = vec_round(a.v4f[1]);
950
+ res.v4f[0] = generic_round(a.v4f[0]);
951
+ res.v4f[1] = generic_round(a.v4f[1]);
761
952
  return res;
762
953
  }
763
954
 
764
- template<> EIGEN_STRONG_INLINE Packet4f pceil<Packet4f>(const Packet4f& a)
765
- {
955
+ template <>
956
+ EIGEN_STRONG_INLINE Packet4f pceil<Packet4f>(const Packet4f& a) {
766
957
  Packet4f res;
767
958
  res.v4f[0] = vec_ceil(a.v4f[0]);
768
959
  res.v4f[1] = vec_ceil(a.v4f[1]);
769
960
  return res;
770
961
  }
771
962
 
772
- template<> EIGEN_STRONG_INLINE Packet4f pfloor<Packet4f>(const Packet4f& a)
773
- {
963
+ template <>
964
+ EIGEN_STRONG_INLINE Packet4f pfloor<Packet4f>(const Packet4f& a) {
774
965
  Packet4f res;
775
966
  res.v4f[0] = vec_floor(a.v4f[0]);
776
967
  res.v4f[1] = vec_floor(a.v4f[1]);
777
968
  return res;
778
969
  }
779
970
 
780
- template<> EIGEN_STRONG_INLINE Packet4f ploaddup<Packet4f>(const float* from)
781
- {
971
+ template <>
972
+ EIGEN_STRONG_INLINE Packet4f ploaddup<Packet4f>(const float* from) {
782
973
  Packet4f p = pload<Packet4f>(from);
783
974
  p.v4f[1] = vec_splat(p.v4f[0], 1);
784
975
  p.v4f[0] = vec_splat(p.v4f[0], 0);
785
976
  return p;
786
977
  }
787
978
 
788
- template<> EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) { float EIGEN_ALIGN16 x[2]; vec_st2f(a.v4f[0], &x[0]); return x[0]; }
979
+ template <>
980
+ EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) {
981
+ EIGEN_ALIGN16 float x[2];
982
+ vec_st2f(a.v4f[0], &x[0]);
983
+ return x[0];
984
+ }
789
985
 
790
- template<> EIGEN_STRONG_INLINE Packet4f preverse(const Packet4f& a)
791
- {
986
+ template <>
987
+ EIGEN_STRONG_INLINE Packet4f preverse(const Packet4f& a) {
792
988
  Packet4f rev;
793
989
  rev.v4f[0] = preverse<Packet2d>(a.v4f[1]);
794
990
  rev.v4f[1] = preverse<Packet2d>(a.v4f[0]);
795
991
  return rev;
796
992
  }
797
993
 
798
- template<> EIGEN_STRONG_INLINE Packet4f pabs<Packet4f>(const Packet4f& a)
799
- {
994
+ template <>
995
+ EIGEN_STRONG_INLINE Packet4f pabs<Packet4f>(const Packet4f& a) {
800
996
  Packet4f res;
801
997
  res.v4f[0] = pabs(a.v4f[0]);
802
998
  res.v4f[1] = pabs(a.v4f[1]);
803
999
  return res;
804
1000
  }
805
1001
 
806
- template<> EIGEN_STRONG_INLINE float predux<Packet4f>(const Packet4f& a)
807
- {
1002
+ template <>
1003
+ EIGEN_STRONG_INLINE float predux<Packet4f>(const Packet4f& a) {
808
1004
  Packet2d sum;
809
1005
  sum = padd<Packet2d>(a.v4f[0], a.v4f[1]);
810
1006
  double first = predux<Packet2d>(sum);
811
1007
  return static_cast<float>(first);
812
1008
  }
813
1009
 
814
- template<> EIGEN_STRONG_INLINE float predux_mul<Packet4f>(const Packet4f& a)
815
- {
1010
+ template <>
1011
+ EIGEN_STRONG_INLINE float predux_mul<Packet4f>(const Packet4f& a) {
816
1012
  // Return predux_mul<Packet2d> of the subvectors product
817
1013
  return static_cast<float>(pfirst(predux_mul(pmul(a.v4f[0], a.v4f[1]))));
818
1014
  }
819
1015
 
820
- template<> EIGEN_STRONG_INLINE float predux_min<Packet4f>(const Packet4f& a)
821
- {
1016
+ template <>
1017
+ EIGEN_STRONG_INLINE float predux_min<Packet4f>(const Packet4f& a) {
822
1018
  Packet2d b, res;
823
- b = pmin<Packet2d>(a.v4f[0], a.v4f[1]);
824
- res = pmin<Packet2d>(b, reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4i>(b), reinterpret_cast<Packet4i>(b), 8)));
1019
+ b = pmin<Packet2d>(a.v4f[0], a.v4f[1]);
1020
+ res = pmin<Packet2d>(
1021
+ b, reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4i>(b), reinterpret_cast<Packet4i>(b), 8)));
825
1022
  return static_cast<float>(pfirst(res));
826
1023
  }
827
1024
 
828
- template<> EIGEN_STRONG_INLINE float predux_max<Packet4f>(const Packet4f& a)
829
- {
1025
+ template <>
1026
+ EIGEN_STRONG_INLINE float predux_max<Packet4f>(const Packet4f& a) {
830
1027
  Packet2d b, res;
831
- b = pmax<Packet2d>(a.v4f[0], a.v4f[1]);
832
- res = pmax<Packet2d>(b, reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4i>(b), reinterpret_cast<Packet4i>(b), 8)));
1028
+ b = pmax<Packet2d>(a.v4f[0], a.v4f[1]);
1029
+ res = pmax<Packet2d>(
1030
+ b, reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4i>(b), reinterpret_cast<Packet4i>(b), 8)));
833
1031
  return static_cast<float>(pfirst(res));
834
1032
  }
835
1033
 
836
1034
  /* Split the Packet4f PacketBlock into 4 Packet2d PacketBlocks and transpose each one
837
1035
  */
838
- EIGEN_DEVICE_FUNC inline void
839
- ptranspose(PacketBlock<Packet4f,4>& kernel) {
840
- PacketBlock<Packet2d,2> t0,t1,t2,t3;
1036
+ EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet4f, 4>& kernel) {
1037
+ PacketBlock<Packet2d, 2> t0, t1, t2, t3;
841
1038
  // copy top-left 2x2 Packet2d block
842
1039
  t0.packet[0] = kernel.packet[0].v4f[0];
843
1040
  t0.packet[1] = kernel.packet[1].v4f[0];
@@ -871,9 +1068,11 @@ ptranspose(PacketBlock<Packet4f,4>& kernel) {
871
1068
  kernel.packet[3].v4f[1] = t3.packet[1];
872
1069
  }
873
1070
 
874
- template<> EIGEN_STRONG_INLINE Packet4f pblend(const Selector<4>& ifPacket, const Packet4f& thenPacket, const Packet4f& elsePacket) {
875
- Packet2ul select_hi = { ifPacket.select[0], ifPacket.select[1] };
876
- Packet2ul select_lo = { ifPacket.select[2], ifPacket.select[3] };
1071
+ template <>
1072
+ EIGEN_STRONG_INLINE Packet4f pblend(const Selector<4>& ifPacket, const Packet4f& thenPacket,
1073
+ const Packet4f& elsePacket) {
1074
+ Packet2ul select_hi = {ifPacket.select[0], ifPacket.select[1]};
1075
+ Packet2ul select_lo = {ifPacket.select[2], ifPacket.select[3]};
877
1076
  Packet2ul mask_hi = vec_cmpeq(select_hi, reinterpret_cast<Packet2ul>(p2l_ONE));
878
1077
  Packet2ul mask_lo = vec_cmpeq(select_lo, reinterpret_cast<Packet2ul>(p2l_ONE));
879
1078
  Packet4f result;
@@ -882,24 +1081,24 @@ template<> EIGEN_STRONG_INLINE Packet4f pblend(const Selector<4>& ifPacket, cons
882
1081
  return result;
883
1082
  }
884
1083
 
885
- template<> Packet4f EIGEN_STRONG_INLINE pcmp_le<Packet4f>(const Packet4f& a, const Packet4f& b)
886
- {
1084
+ template <>
1085
+ Packet4f EIGEN_STRONG_INLINE pcmp_le<Packet4f>(const Packet4f& a, const Packet4f& b) {
887
1086
  Packet4f res;
888
1087
  res.v4f[0] = pcmp_le(a.v4f[0], b.v4f[0]);
889
1088
  res.v4f[1] = pcmp_le(a.v4f[1], b.v4f[1]);
890
1089
  return res;
891
1090
  }
892
1091
 
893
- template<> Packet4f EIGEN_STRONG_INLINE pcmp_lt<Packet4f>(const Packet4f& a, const Packet4f& b)
894
- {
1092
+ template <>
1093
+ Packet4f EIGEN_STRONG_INLINE pcmp_lt<Packet4f>(const Packet4f& a, const Packet4f& b) {
895
1094
  Packet4f res;
896
1095
  res.v4f[0] = pcmp_lt(a.v4f[0], b.v4f[0]);
897
1096
  res.v4f[1] = pcmp_lt(a.v4f[1], b.v4f[1]);
898
1097
  return res;
899
1098
  }
900
1099
 
901
- template<> Packet4f EIGEN_STRONG_INLINE pcmp_eq<Packet4f>(const Packet4f& a, const Packet4f& b)
902
- {
1100
+ template <>
1101
+ Packet4f EIGEN_STRONG_INLINE pcmp_eq<Packet4f>(const Packet4f& a, const Packet4f& b) {
903
1102
  Packet4f res;
904
1103
  res.v4f[0] = pcmp_eq(a.v4f[0], b.v4f[0]);
905
1104
  res.v4f[1] = pcmp_eq(a.v4f[1], b.v4f[1]);
@@ -907,33 +1106,25 @@ template<> Packet4f EIGEN_STRONG_INLINE pcmp_eq<Packet4f>(const Packet4f& a, con
907
1106
  }
908
1107
 
909
1108
  #else
910
- template<> EIGEN_STRONG_INLINE Packet4f pload<Packet4f>(const float* from)
911
- {
912
- // FIXME: No intrinsic yet
1109
+ template <>
1110
+ EIGEN_STRONG_INLINE Packet4f pload<Packet4f>(const float* from) {
913
1111
  EIGEN_DEBUG_ALIGNED_LOAD
914
- Packet *vfrom;
915
- vfrom = (Packet *) from;
916
- return vfrom->v4f;
1112
+ return vec_xl(0, from);
917
1113
  }
918
1114
 
919
- template<> EIGEN_STRONG_INLINE void pstore<float>(float* to, const Packet4f& from)
920
- {
921
- // FIXME: No intrinsic yet
1115
+ template <>
1116
+ EIGEN_STRONG_INLINE void pstore<float>(float* to, const Packet4f& from) {
922
1117
  EIGEN_DEBUG_ALIGNED_STORE
923
- Packet *vto;
924
- vto = (Packet *) to;
925
- vto->v4f = from;
1118
+ vec_xst(from, 0, to);
926
1119
  }
927
1120
 
928
- template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from)
929
- {
1121
+ template <>
1122
+ EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) {
930
1123
  return vec_splats(from);
931
1124
  }
932
1125
 
933
- template<> EIGEN_STRONG_INLINE void
934
- pbroadcast4<Packet4f>(const float *a,
935
- Packet4f& a0, Packet4f& a1, Packet4f& a2, Packet4f& a3)
936
- {
1126
+ template <>
1127
+ EIGEN_STRONG_INLINE void pbroadcast4<Packet4f>(const float* a, Packet4f& a0, Packet4f& a1, Packet4f& a2, Packet4f& a3) {
937
1128
  a3 = pload<Packet4f>(a);
938
1129
  a0 = vec_splat(a3, 0);
939
1130
  a1 = vec_splat(a3, 1);
@@ -941,95 +1132,152 @@ pbroadcast4<Packet4f>(const float *a,
941
1132
  a3 = vec_splat(a3, 3);
942
1133
  }
943
1134
 
944
- template<> EIGEN_DEVICE_FUNC inline Packet4f pgather<float, Packet4f>(const float* from, Index stride)
945
- {
946
- float EIGEN_ALIGN16 af[4];
947
- af[0] = from[0*stride];
948
- af[1] = from[1*stride];
949
- af[2] = from[2*stride];
950
- af[3] = from[3*stride];
951
- return pload<Packet4f>(af);
1135
+ template <>
1136
+ EIGEN_DEVICE_FUNC inline Packet4f pgather<float, Packet4f>(const float* from, Index stride) {
1137
+ EIGEN_ALIGN16 float af[4];
1138
+ af[0] = from[0 * stride];
1139
+ af[1] = from[1 * stride];
1140
+ af[2] = from[2 * stride];
1141
+ af[3] = from[3 * stride];
1142
+ return pload<Packet4f>(af);
952
1143
  }
953
1144
 
954
- template<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet4f>(float* to, const Packet4f& from, Index stride)
955
- {
956
- float EIGEN_ALIGN16 af[4];
1145
+ template <>
1146
+ EIGEN_DEVICE_FUNC inline void pscatter<float, Packet4f>(float* to, const Packet4f& from, Index stride) {
1147
+ EIGEN_ALIGN16 float af[4];
957
1148
  pstore<float>((float*)af, from);
958
- to[0*stride] = af[0];
959
- to[1*stride] = af[1];
960
- to[2*stride] = af[2];
961
- to[3*stride] = af[3];
962
- }
963
-
964
- template<> EIGEN_STRONG_INLINE Packet4f padd<Packet4f>(const Packet4f& a, const Packet4f& b) { return (a + b); }
965
- template<> EIGEN_STRONG_INLINE Packet4f psub<Packet4f>(const Packet4f& a, const Packet4f& b) { return (a - b); }
966
- template<> EIGEN_STRONG_INLINE Packet4f pmul<Packet4f>(const Packet4f& a, const Packet4f& b) { return (a * b); }
967
- template<> EIGEN_STRONG_INLINE Packet4f pdiv<Packet4f>(const Packet4f& a, const Packet4f& b) { return (a / b); }
968
- template<> EIGEN_STRONG_INLINE Packet4f pnegate<Packet4f>(const Packet4f& a) { return (-a); }
969
- template<> EIGEN_STRONG_INLINE Packet4f pconj<Packet4f> (const Packet4f& a) { return a; }
970
- template<> EIGEN_STRONG_INLINE Packet4f pmadd<Packet4f> (const Packet4f& a, const Packet4f& b, const Packet4f& c) { return vec_madd(a, b, c); }
971
- template<> EIGEN_STRONG_INLINE Packet4f pmin<Packet4f> (const Packet4f& a, const Packet4f& b) { return vec_min(a, b); }
972
- template<> EIGEN_STRONG_INLINE Packet4f pmax<Packet4f> (const Packet4f& a, const Packet4f& b) { return vec_max(a, b); }
973
- template<> EIGEN_STRONG_INLINE Packet4f pand<Packet4f> (const Packet4f& a, const Packet4f& b) { return vec_and(a, b); }
974
- template<> EIGEN_STRONG_INLINE Packet4f por<Packet4f> (const Packet4f& a, const Packet4f& b) { return vec_or(a, b); }
975
- template<> EIGEN_STRONG_INLINE Packet4f pxor<Packet4f> (const Packet4f& a, const Packet4f& b) { return vec_xor(a, b); }
976
- template<> EIGEN_STRONG_INLINE Packet4f pandnot<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_and(a, vec_nor(b, b)); }
977
- template<> EIGEN_STRONG_INLINE Packet4f pround<Packet4f> (const Packet4f& a) { return vec_round(a); }
978
- template<> EIGEN_STRONG_INLINE Packet4f pceil<Packet4f> (const Packet4f& a) { return vec_ceil(a); }
979
- template<> EIGEN_STRONG_INLINE Packet4f pfloor<Packet4f> (const Packet4f& a) { return vec_floor(a); }
980
- template<> EIGEN_STRONG_INLINE Packet4f pabs<Packet4f> (const Packet4f& a) { return vec_abs(a); }
981
- template<> EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) { float EIGEN_ALIGN16 x[4]; pstore(x, a); return x[0]; }
982
-
983
- template<> EIGEN_STRONG_INLINE Packet4f ploaddup<Packet4f>(const float* from)
984
- {
1149
+ to[0 * stride] = af[0];
1150
+ to[1 * stride] = af[1];
1151
+ to[2 * stride] = af[2];
1152
+ to[3 * stride] = af[3];
1153
+ }
1154
+
1155
+ template <>
1156
+ EIGEN_STRONG_INLINE Packet4f padd<Packet4f>(const Packet4f& a, const Packet4f& b) {
1157
+ return (a + b);
1158
+ }
1159
+ template <>
1160
+ EIGEN_STRONG_INLINE Packet4f psub<Packet4f>(const Packet4f& a, const Packet4f& b) {
1161
+ return (a - b);
1162
+ }
1163
+ template <>
1164
+ EIGEN_STRONG_INLINE Packet4f pmul<Packet4f>(const Packet4f& a, const Packet4f& b) {
1165
+ return (a * b);
1166
+ }
1167
+ template <>
1168
+ EIGEN_STRONG_INLINE Packet4f pdiv<Packet4f>(const Packet4f& a, const Packet4f& b) {
1169
+ return (a / b);
1170
+ }
1171
+ template <>
1172
+ EIGEN_STRONG_INLINE Packet4f pnegate<Packet4f>(const Packet4f& a) {
1173
+ return (-a);
1174
+ }
1175
+ template <>
1176
+ EIGEN_STRONG_INLINE Packet4f pconj<Packet4f>(const Packet4f& a) {
1177
+ return a;
1178
+ }
1179
+ template <>
1180
+ EIGEN_STRONG_INLINE Packet4f pmadd<Packet4f>(const Packet4f& a, const Packet4f& b, const Packet4f& c) {
1181
+ return vec_madd(a, b, c);
1182
+ }
1183
+ template <>
1184
+ EIGEN_STRONG_INLINE Packet4f pmin<Packet4f>(const Packet4f& a, const Packet4f& b) {
1185
+ return vec_min(a, b);
1186
+ }
1187
+ template <>
1188
+ EIGEN_STRONG_INLINE Packet4f pmax<Packet4f>(const Packet4f& a, const Packet4f& b) {
1189
+ return vec_max(a, b);
1190
+ }
1191
+ template <>
1192
+ EIGEN_STRONG_INLINE Packet4f pand<Packet4f>(const Packet4f& a, const Packet4f& b) {
1193
+ return vec_and(a, b);
1194
+ }
1195
+ template <>
1196
+ EIGEN_STRONG_INLINE Packet4f por<Packet4f>(const Packet4f& a, const Packet4f& b) {
1197
+ return vec_or(a, b);
1198
+ }
1199
+ template <>
1200
+ EIGEN_STRONG_INLINE Packet4f pxor<Packet4f>(const Packet4f& a, const Packet4f& b) {
1201
+ return vec_xor(a, b);
1202
+ }
1203
+ template <>
1204
+ EIGEN_STRONG_INLINE Packet4f pandnot<Packet4f>(const Packet4f& a, const Packet4f& b) {
1205
+ return vec_and(a, vec_nor(b, b));
1206
+ }
1207
+ template <>
1208
+ EIGEN_STRONG_INLINE Packet4f pround<Packet4f>(const Packet4f& a) {
1209
+ /* Uses non-default rounding for vec_round */
1210
+ return __builtin_s390_vfisb(a, 0, 1);
1211
+ }
1212
+ template <>
1213
+ EIGEN_STRONG_INLINE Packet4f pceil<Packet4f>(const Packet4f& a) {
1214
+ return vec_ceil(a);
1215
+ }
1216
+ template <>
1217
+ EIGEN_STRONG_INLINE Packet4f pfloor<Packet4f>(const Packet4f& a) {
1218
+ return vec_floor(a);
1219
+ }
1220
+ template <>
1221
+ EIGEN_STRONG_INLINE Packet4f pabs<Packet4f>(const Packet4f& a) {
1222
+ return vec_abs(a);
1223
+ }
1224
+ template <>
1225
+ EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) {
1226
+ EIGEN_ALIGN16 float x[4];
1227
+ pstore(x, a);
1228
+ return x[0];
1229
+ }
1230
+
1231
+ template <>
1232
+ EIGEN_STRONG_INLINE Packet4f ploaddup<Packet4f>(const float* from) {
985
1233
  Packet4f p = pload<Packet4f>(from);
986
1234
  return vec_perm(p, p, p16uc_DUPLICATE32_HI);
987
1235
  }
988
1236
 
989
- template<> EIGEN_STRONG_INLINE Packet4f preverse(const Packet4f& a)
990
- {
991
- return reinterpret_cast<Packet4f>(vec_perm(reinterpret_cast<Packet16uc>(a), reinterpret_cast<Packet16uc>(a), p16uc_REVERSE32));
1237
+ template <>
1238
+ EIGEN_STRONG_INLINE Packet4f preverse(const Packet4f& a) {
1239
+ return reinterpret_cast<Packet4f>(
1240
+ vec_perm(reinterpret_cast<Packet16uc>(a), reinterpret_cast<Packet16uc>(a), p16uc_REVERSE32));
992
1241
  }
993
1242
 
994
- template<> EIGEN_STRONG_INLINE float predux<Packet4f>(const Packet4f& a)
995
- {
1243
+ template <>
1244
+ EIGEN_STRONG_INLINE float predux<Packet4f>(const Packet4f& a) {
996
1245
  Packet4f b, sum;
997
- b = vec_sld(a, a, 8);
1246
+ b = vec_sld(a, a, 8);
998
1247
  sum = padd<Packet4f>(a, b);
999
- b = vec_sld(sum, sum, 4);
1248
+ b = vec_sld(sum, sum, 4);
1000
1249
  sum = padd<Packet4f>(sum, b);
1001
1250
  return pfirst(sum);
1002
1251
  }
1003
1252
 
1004
1253
  // Other reduction functions:
1005
1254
  // mul
1006
- template<> EIGEN_STRONG_INLINE float predux_mul<Packet4f>(const Packet4f& a)
1007
- {
1255
+ template <>
1256
+ EIGEN_STRONG_INLINE float predux_mul<Packet4f>(const Packet4f& a) {
1008
1257
  Packet4f prod;
1009
1258
  prod = pmul(a, vec_sld(a, a, 8));
1010
1259
  return pfirst(pmul(prod, vec_sld(prod, prod, 4)));
1011
1260
  }
1012
1261
 
1013
1262
  // min
1014
- template<> EIGEN_STRONG_INLINE float predux_min<Packet4f>(const Packet4f& a)
1015
- {
1263
+ template <>
1264
+ EIGEN_STRONG_INLINE float predux_min<Packet4f>(const Packet4f& a) {
1016
1265
  Packet4f b, res;
1017
- b = pmin<Packet4f>(a, vec_sld(a, a, 8));
1266
+ b = pmin<Packet4f>(a, vec_sld(a, a, 8));
1018
1267
  res = pmin<Packet4f>(b, vec_sld(b, b, 4));
1019
1268
  return pfirst(res);
1020
1269
  }
1021
1270
 
1022
1271
  // max
1023
- template<> EIGEN_STRONG_INLINE float predux_max<Packet4f>(const Packet4f& a)
1024
- {
1272
+ template <>
1273
+ EIGEN_STRONG_INLINE float predux_max<Packet4f>(const Packet4f& a) {
1025
1274
  Packet4f b, res;
1026
1275
  b = pmax<Packet4f>(a, vec_sld(a, a, 8));
1027
1276
  res = pmax<Packet4f>(b, vec_sld(b, b, 4));
1028
1277
  return pfirst(res);
1029
1278
  }
1030
1279
 
1031
- EIGEN_DEVICE_FUNC inline void
1032
- ptranspose(PacketBlock<Packet4f,4>& kernel) {
1280
+ EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet4f, 4>& kernel) {
1033
1281
  Packet4f t0 = vec_mergeh(kernel.packet[0], kernel.packet[2]);
1034
1282
  Packet4f t1 = vec_mergel(kernel.packet[0], kernel.packet[2]);
1035
1283
  Packet4f t2 = vec_mergeh(kernel.packet[1], kernel.packet[3]);
@@ -1040,21 +1288,126 @@ ptranspose(PacketBlock<Packet4f,4>& kernel) {
1040
1288
  kernel.packet[3] = vec_mergel(t1, t3);
1041
1289
  }
1042
1290
 
1043
- template<> EIGEN_STRONG_INLINE Packet4f pblend(const Selector<4>& ifPacket, const Packet4f& thenPacket, const Packet4f& elsePacket) {
1044
- Packet4ui select = { ifPacket.select[0], ifPacket.select[1], ifPacket.select[2], ifPacket.select[3] };
1291
+ template <>
1292
+ EIGEN_STRONG_INLINE Packet4f pblend(const Selector<4>& ifPacket, const Packet4f& thenPacket,
1293
+ const Packet4f& elsePacket) {
1294
+ Packet4ui select = {ifPacket.select[0], ifPacket.select[1], ifPacket.select[2], ifPacket.select[3]};
1045
1295
  Packet4ui mask = vec_cmpeq(select, reinterpret_cast<Packet4ui>(p4i_ONE));
1046
1296
  return vec_sel(elsePacket, thenPacket, mask);
1047
1297
  }
1048
1298
 
1049
1299
  #endif
1050
1300
 
1051
- template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { EIGEN_ZVECTOR_PREFETCH(addr); }
1052
- template<> EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f> (const float* from) { return pload<Packet4f>(from); }
1053
- template<> EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet4f& from) { pstore<float>(to, from); }
1054
- template<> EIGEN_STRONG_INLINE Packet4f plset<Packet4f> (const float& a) { return padd<Packet4f>(pset1<Packet4f>(a), p4f_COUNTDOWN); }
1301
+ template <>
1302
+ EIGEN_STRONG_INLINE Packet4f pldexp<Packet4f>(const Packet4f& a, const Packet4f& exponent) {
1303
+ return pldexp_generic(a, exponent);
1304
+ }
1305
+
1306
+ template <>
1307
+ EIGEN_STRONG_INLINE Packet2d pldexp<Packet2d>(const Packet2d& a, const Packet2d& exponent) {
1308
+ // Clamp exponent to [-2099, 2099]
1309
+ const Packet2d max_exponent = pset1<Packet2d>(2099.0);
1310
+ const Packet2l e = pcast<Packet2d, Packet2l>(pmin(pmax(exponent, pnegate(max_exponent)), max_exponent));
1311
+
1312
+ // Split 2^e into four factors and multiply:
1313
+ const Packet2l bias = {1023, 1023};
1314
+ Packet2l b = plogical_shift_right<2>(e); // floor(e/4)
1315
+ Packet2d c = reinterpret_cast<Packet2d>(plogical_shift_left<52>(b + bias));
1316
+ Packet2d out = pmul(pmul(pmul(a, c), c), c); // a * 2^(3b)
1317
+ b = psub(psub(psub(e, b), b), b); // e - 3b
1318
+ c = reinterpret_cast<Packet2d>(plogical_shift_left<52>(b + bias)); // 2^(e - 3b)
1319
+ out = pmul(out, c); // a * 2^e
1320
+ return out;
1321
+ }
1322
+
1323
+ template <>
1324
+ EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) {
1325
+ EIGEN_ZVECTOR_PREFETCH(addr);
1326
+ }
1327
+ template <>
1328
+ EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float* from) {
1329
+ return pload<Packet4f>(from);
1330
+ }
1331
+ template <>
1332
+ EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet4f& from) {
1333
+ pstore<float>(to, from);
1334
+ }
1335
+ template <>
1336
+ EIGEN_STRONG_INLINE Packet4f plset<Packet4f>(const float& a) {
1337
+ return padd<Packet4f>(pset1<Packet4f>(a), p4f_COUNTDOWN);
1338
+ }
1339
+
1340
+ #if !defined(vec_float) || !defined(__ARCH__) || (defined(__ARCH__) && __ARCH__ < 13)
1341
+ #pragma GCC warning \
1342
+ "float->int and int->float conversion is simulated. compile for z15 for improved performance"
1343
+ template <>
1344
+ struct cast_impl<Packet4i, Packet4f> {
1345
+ EIGEN_DEVICE_FUNC static inline Packet4f run(const Packet4i& a) {
1346
+ return Packet4f{float(a[0]), float(a[1]), float(a[2]), float(a[3]) };
1347
+ }
1348
+ };
1349
+
1350
+ template <>
1351
+ struct cast_impl<Packet4f, Packet4i> {
1352
+ EIGEN_DEVICE_FUNC static inline Packet4i run(const Packet4f& a) {
1353
+ return Packet4i{int(a[0]), int(a[1]), int(a[2]), int(a[3]) };
1354
+ }
1355
+ };
1356
+
1357
+ template <>
1358
+ struct cast_impl<Packet2l, Packet2d> {
1359
+ EIGEN_DEVICE_FUNC static inline Packet2d run(const Packet2l& a) {
1360
+ return Packet2d{double(a[0]), double(a[1]) };
1361
+ }
1362
+ };
1363
+
1364
+ template <>
1365
+ struct cast_impl<Packet2d, Packet2l> {
1366
+ EIGEN_DEVICE_FUNC static inline Packet2l run(const Packet2d& a) {
1367
+ return Packet2l{(long long)(a[0]), (long long)(a[1]) };
1368
+ }
1369
+ };
1370
+ #else
1371
+ template <>
1372
+ struct cast_impl<Packet4i, Packet4f> {
1373
+ EIGEN_DEVICE_FUNC static inline Packet4f run(const Packet4i& a) {
1374
+ return vec_float(a);
1375
+ }
1376
+ };
1377
+
1378
+ template <>
1379
+ struct cast_impl<Packet4f, Packet4i> {
1380
+ EIGEN_DEVICE_FUNC static inline Packet4i run(const Packet4f& a) {
1381
+ return vec_signed(a);
1382
+ }
1383
+ };
1384
+
1385
+ template <>
1386
+ struct cast_impl<Packet2l, Packet2d> {
1387
+ EIGEN_DEVICE_FUNC static inline Packet2d run(const Packet2l& a) {
1388
+ return vec_double(a);
1389
+ }
1390
+ };
1391
+
1392
+ template <>
1393
+ struct cast_impl<Packet2d, Packet2l> {
1394
+ EIGEN_DEVICE_FUNC static inline Packet2l run(const Packet2d& a) {
1395
+ return vec_signed(a);
1396
+ }
1397
+ };
1398
+ #endif
1399
+
1400
+ template <>
1401
+ EIGEN_STRONG_INLINE Packet4f pset1frombits<Packet4f>(uint32_t from) {
1402
+ return pset1<Packet4f>(Eigen::numext::bit_cast<float>(from));
1403
+ }
1404
+ template <>
1405
+ EIGEN_STRONG_INLINE Packet2d pset1frombits<Packet2d>(uint64_t from) {
1406
+ return pset1<Packet2d>(Eigen::numext::bit_cast<double>(from));
1407
+ }
1055
1408
 
1056
- } // end namespace internal
1409
+ } // end namespace internal
1057
1410
 
1058
- } // end namespace Eigen
1411
+ } // end namespace Eigen
1059
1412
 
1060
- #endif // EIGEN_PACKET_MATH_ZVECTOR_H
1413
+ #endif // EIGEN_PACKET_MATH_ZVECTOR_H