@smake/eigen 1.1.0 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (431) hide show
  1. package/README.md +1 -1
  2. package/eigen/Eigen/AccelerateSupport +52 -0
  3. package/eigen/Eigen/Cholesky +18 -20
  4. package/eigen/Eigen/CholmodSupport +28 -28
  5. package/eigen/Eigen/Core +187 -120
  6. package/eigen/Eigen/Eigenvalues +16 -13
  7. package/eigen/Eigen/Geometry +18 -18
  8. package/eigen/Eigen/Householder +9 -7
  9. package/eigen/Eigen/IterativeLinearSolvers +8 -4
  10. package/eigen/Eigen/Jacobi +14 -13
  11. package/eigen/Eigen/KLUSupport +23 -21
  12. package/eigen/Eigen/LU +15 -16
  13. package/eigen/Eigen/MetisSupport +12 -12
  14. package/eigen/Eigen/OrderingMethods +54 -51
  15. package/eigen/Eigen/PaStiXSupport +23 -21
  16. package/eigen/Eigen/PardisoSupport +17 -14
  17. package/eigen/Eigen/QR +18 -20
  18. package/eigen/Eigen/QtAlignedMalloc +5 -12
  19. package/eigen/Eigen/SPQRSupport +21 -14
  20. package/eigen/Eigen/SVD +23 -17
  21. package/eigen/Eigen/Sparse +1 -2
  22. package/eigen/Eigen/SparseCholesky +18 -15
  23. package/eigen/Eigen/SparseCore +18 -17
  24. package/eigen/Eigen/SparseLU +9 -9
  25. package/eigen/Eigen/SparseQR +16 -14
  26. package/eigen/Eigen/StdDeque +5 -2
  27. package/eigen/Eigen/StdList +5 -2
  28. package/eigen/Eigen/StdVector +5 -2
  29. package/eigen/Eigen/SuperLUSupport +30 -24
  30. package/eigen/Eigen/ThreadPool +80 -0
  31. package/eigen/Eigen/UmfPackSupport +19 -17
  32. package/eigen/Eigen/Version +14 -0
  33. package/eigen/Eigen/src/AccelerateSupport/AccelerateSupport.h +423 -0
  34. package/eigen/Eigen/src/AccelerateSupport/InternalHeaderCheck.h +3 -0
  35. package/eigen/Eigen/src/Cholesky/InternalHeaderCheck.h +3 -0
  36. package/eigen/Eigen/src/Cholesky/LDLT.h +366 -405
  37. package/eigen/Eigen/src/Cholesky/LLT.h +323 -367
  38. package/eigen/Eigen/src/Cholesky/LLT_LAPACKE.h +81 -56
  39. package/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +585 -529
  40. package/eigen/Eigen/src/CholmodSupport/InternalHeaderCheck.h +3 -0
  41. package/eigen/Eigen/src/Core/ArithmeticSequence.h +143 -317
  42. package/eigen/Eigen/src/Core/Array.h +329 -370
  43. package/eigen/Eigen/src/Core/ArrayBase.h +190 -203
  44. package/eigen/Eigen/src/Core/ArrayWrapper.h +126 -170
  45. package/eigen/Eigen/src/Core/Assign.h +30 -40
  46. package/eigen/Eigen/src/Core/AssignEvaluator.h +651 -604
  47. package/eigen/Eigen/src/Core/Assign_MKL.h +125 -120
  48. package/eigen/Eigen/src/Core/BandMatrix.h +267 -282
  49. package/eigen/Eigen/src/Core/Block.h +371 -390
  50. package/eigen/Eigen/src/Core/CommaInitializer.h +85 -100
  51. package/eigen/Eigen/src/Core/ConditionEstimator.h +51 -53
  52. package/eigen/Eigen/src/Core/CoreEvaluators.h +1214 -937
  53. package/eigen/Eigen/src/Core/CoreIterators.h +72 -63
  54. package/eigen/Eigen/src/Core/CwiseBinaryOp.h +112 -129
  55. package/eigen/Eigen/src/Core/CwiseNullaryOp.h +676 -702
  56. package/eigen/Eigen/src/Core/CwiseTernaryOp.h +77 -103
  57. package/eigen/Eigen/src/Core/CwiseUnaryOp.h +55 -67
  58. package/eigen/Eigen/src/Core/CwiseUnaryView.h +127 -92
  59. package/eigen/Eigen/src/Core/DenseBase.h +630 -658
  60. package/eigen/Eigen/src/Core/DenseCoeffsBase.h +511 -628
  61. package/eigen/Eigen/src/Core/DenseStorage.h +511 -590
  62. package/eigen/Eigen/src/Core/DeviceWrapper.h +153 -0
  63. package/eigen/Eigen/src/Core/Diagonal.h +168 -207
  64. package/eigen/Eigen/src/Core/DiagonalMatrix.h +346 -317
  65. package/eigen/Eigen/src/Core/DiagonalProduct.h +12 -10
  66. package/eigen/Eigen/src/Core/Dot.h +167 -217
  67. package/eigen/Eigen/src/Core/EigenBase.h +74 -85
  68. package/eigen/Eigen/src/Core/Fill.h +138 -0
  69. package/eigen/Eigen/src/Core/FindCoeff.h +464 -0
  70. package/eigen/Eigen/src/Core/ForceAlignedAccess.h +90 -113
  71. package/eigen/Eigen/src/Core/Fuzzy.h +82 -105
  72. package/eigen/Eigen/src/Core/GeneralProduct.h +315 -261
  73. package/eigen/Eigen/src/Core/GenericPacketMath.h +1182 -520
  74. package/eigen/Eigen/src/Core/GlobalFunctions.h +193 -157
  75. package/eigen/Eigen/src/Core/IO.h +131 -156
  76. package/eigen/Eigen/src/Core/IndexedView.h +209 -125
  77. package/eigen/Eigen/src/Core/InnerProduct.h +260 -0
  78. package/eigen/Eigen/src/Core/InternalHeaderCheck.h +3 -0
  79. package/eigen/Eigen/src/Core/Inverse.h +50 -59
  80. package/eigen/Eigen/src/Core/Map.h +123 -141
  81. package/eigen/Eigen/src/Core/MapBase.h +255 -282
  82. package/eigen/Eigen/src/Core/MathFunctions.h +1247 -1201
  83. package/eigen/Eigen/src/Core/MathFunctionsImpl.h +162 -99
  84. package/eigen/Eigen/src/Core/Matrix.h +463 -494
  85. package/eigen/Eigen/src/Core/MatrixBase.h +468 -470
  86. package/eigen/Eigen/src/Core/NestByValue.h +58 -52
  87. package/eigen/Eigen/src/Core/NoAlias.h +79 -86
  88. package/eigen/Eigen/src/Core/NumTraits.h +206 -206
  89. package/eigen/Eigen/src/Core/PartialReduxEvaluator.h +163 -142
  90. package/eigen/Eigen/src/Core/PermutationMatrix.h +461 -511
  91. package/eigen/Eigen/src/Core/PlainObjectBase.h +858 -972
  92. package/eigen/Eigen/src/Core/Product.h +246 -130
  93. package/eigen/Eigen/src/Core/ProductEvaluators.h +779 -671
  94. package/eigen/Eigen/src/Core/Random.h +153 -164
  95. package/eigen/Eigen/src/Core/RandomImpl.h +262 -0
  96. package/eigen/Eigen/src/Core/RealView.h +250 -0
  97. package/eigen/Eigen/src/Core/Redux.h +334 -314
  98. package/eigen/Eigen/src/Core/Ref.h +259 -257
  99. package/eigen/Eigen/src/Core/Replicate.h +92 -104
  100. package/eigen/Eigen/src/Core/Reshaped.h +215 -271
  101. package/eigen/Eigen/src/Core/ReturnByValue.h +47 -55
  102. package/eigen/Eigen/src/Core/Reverse.h +133 -148
  103. package/eigen/Eigen/src/Core/Select.h +68 -140
  104. package/eigen/Eigen/src/Core/SelfAdjointView.h +254 -290
  105. package/eigen/Eigen/src/Core/SelfCwiseBinaryOp.h +23 -20
  106. package/eigen/Eigen/src/Core/SkewSymmetricMatrix3.h +382 -0
  107. package/eigen/Eigen/src/Core/Solve.h +88 -102
  108. package/eigen/Eigen/src/Core/SolveTriangular.h +126 -124
  109. package/eigen/Eigen/src/Core/SolverBase.h +132 -133
  110. package/eigen/Eigen/src/Core/StableNorm.h +113 -147
  111. package/eigen/Eigen/src/Core/StlIterators.h +404 -248
  112. package/eigen/Eigen/src/Core/Stride.h +90 -92
  113. package/eigen/Eigen/src/Core/Swap.h +70 -39
  114. package/eigen/Eigen/src/Core/Transpose.h +258 -295
  115. package/eigen/Eigen/src/Core/Transpositions.h +270 -333
  116. package/eigen/Eigen/src/Core/TriangularMatrix.h +642 -743
  117. package/eigen/Eigen/src/Core/VectorBlock.h +59 -72
  118. package/eigen/Eigen/src/Core/VectorwiseOp.h +653 -704
  119. package/eigen/Eigen/src/Core/Visitor.h +464 -308
  120. package/eigen/Eigen/src/Core/arch/AVX/Complex.h +380 -187
  121. package/eigen/Eigen/src/Core/arch/AVX/MathFunctions.h +65 -163
  122. package/eigen/Eigen/src/Core/arch/AVX/PacketMath.h +2145 -638
  123. package/eigen/Eigen/src/Core/arch/AVX/Reductions.h +353 -0
  124. package/eigen/Eigen/src/Core/arch/AVX/TypeCasting.h +253 -60
  125. package/eigen/Eigen/src/Core/arch/AVX512/Complex.h +278 -228
  126. package/eigen/Eigen/src/Core/arch/AVX512/GemmKernel.h +1245 -0
  127. package/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h +48 -269
  128. package/eigen/Eigen/src/Core/arch/AVX512/MathFunctionsFP16.h +75 -0
  129. package/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h +1597 -754
  130. package/eigen/Eigen/src/Core/arch/AVX512/PacketMathFP16.h +1413 -0
  131. package/eigen/Eigen/src/Core/arch/AVX512/Reductions.h +297 -0
  132. package/eigen/Eigen/src/Core/arch/AVX512/TrsmKernel.h +1167 -0
  133. package/eigen/Eigen/src/Core/arch/AVX512/TrsmUnrolls.inc +1219 -0
  134. package/eigen/Eigen/src/Core/arch/AVX512/TypeCasting.h +229 -41
  135. package/eigen/Eigen/src/Core/arch/AVX512/TypeCastingFP16.h +130 -0
  136. package/eigen/Eigen/src/Core/arch/AltiVec/Complex.h +420 -184
  137. package/eigen/Eigen/src/Core/arch/AltiVec/MathFunctions.h +40 -49
  138. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProduct.h +2962 -2213
  139. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h +196 -212
  140. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h +713 -441
  141. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMAbfloat16.h +742 -0
  142. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixVectorProduct.inc +2818 -0
  143. package/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h +2380 -1362
  144. package/eigen/Eigen/src/Core/arch/AltiVec/TypeCasting.h +153 -0
  145. package/eigen/Eigen/src/Core/arch/Default/BFloat16.h +390 -224
  146. package/eigen/Eigen/src/Core/arch/Default/ConjHelper.h +78 -67
  147. package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +1784 -799
  148. package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +167 -50
  149. package/eigen/Eigen/src/Core/arch/Default/Half.h +528 -379
  150. package/eigen/Eigen/src/Core/arch/Default/Settings.h +10 -12
  151. package/eigen/Eigen/src/Core/arch/GPU/Complex.h +244 -0
  152. package/eigen/Eigen/src/Core/arch/GPU/MathFunctions.h +41 -40
  153. package/eigen/Eigen/src/Core/arch/GPU/PacketMath.h +550 -523
  154. package/eigen/Eigen/src/Core/arch/GPU/Tuple.h +268 -0
  155. package/eigen/Eigen/src/Core/arch/GPU/TypeCasting.h +27 -30
  156. package/eigen/Eigen/src/Core/arch/HIP/hcc/math_constants.h +8 -8
  157. package/eigen/Eigen/src/Core/arch/HVX/PacketMath.h +1088 -0
  158. package/eigen/Eigen/src/Core/arch/LSX/Complex.h +520 -0
  159. package/eigen/Eigen/src/Core/arch/LSX/GeneralBlockPanelKernel.h +23 -0
  160. package/eigen/Eigen/src/Core/arch/LSX/MathFunctions.h +43 -0
  161. package/eigen/Eigen/src/Core/arch/LSX/PacketMath.h +2866 -0
  162. package/eigen/Eigen/src/Core/arch/LSX/TypeCasting.h +526 -0
  163. package/eigen/Eigen/src/Core/arch/MSA/Complex.h +54 -82
  164. package/eigen/Eigen/src/Core/arch/MSA/MathFunctions.h +84 -92
  165. package/eigen/Eigen/src/Core/arch/MSA/PacketMath.h +51 -47
  166. package/eigen/Eigen/src/Core/arch/NEON/Complex.h +454 -306
  167. package/eigen/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h +175 -115
  168. package/eigen/Eigen/src/Core/arch/NEON/MathFunctions.h +23 -30
  169. package/eigen/Eigen/src/Core/arch/NEON/PacketMath.h +4366 -2857
  170. package/eigen/Eigen/src/Core/arch/NEON/TypeCasting.h +616 -393
  171. package/eigen/Eigen/src/Core/arch/NEON/UnaryFunctors.h +57 -0
  172. package/eigen/Eigen/src/Core/arch/SSE/Complex.h +350 -198
  173. package/eigen/Eigen/src/Core/arch/SSE/MathFunctions.h +38 -149
  174. package/eigen/Eigen/src/Core/arch/SSE/PacketMath.h +1791 -912
  175. package/eigen/Eigen/src/Core/arch/SSE/Reductions.h +324 -0
  176. package/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h +128 -40
  177. package/eigen/Eigen/src/Core/arch/SVE/MathFunctions.h +10 -6
  178. package/eigen/Eigen/src/Core/arch/SVE/PacketMath.h +156 -234
  179. package/eigen/Eigen/src/Core/arch/SVE/TypeCasting.h +6 -3
  180. package/eigen/Eigen/src/Core/arch/SYCL/InteropHeaders.h +27 -32
  181. package/eigen/Eigen/src/Core/arch/SYCL/MathFunctions.h +119 -117
  182. package/eigen/Eigen/src/Core/arch/SYCL/PacketMath.h +325 -419
  183. package/eigen/Eigen/src/Core/arch/SYCL/TypeCasting.h +15 -17
  184. package/eigen/Eigen/src/Core/arch/ZVector/Complex.h +325 -181
  185. package/eigen/Eigen/src/Core/arch/ZVector/MathFunctions.h +94 -83
  186. package/eigen/Eigen/src/Core/arch/ZVector/PacketMath.h +811 -458
  187. package/eigen/Eigen/src/Core/functors/AssignmentFunctors.h +121 -124
  188. package/eigen/Eigen/src/Core/functors/BinaryFunctors.h +576 -370
  189. package/eigen/Eigen/src/Core/functors/NullaryFunctors.h +194 -109
  190. package/eigen/Eigen/src/Core/functors/StlFunctors.h +95 -112
  191. package/eigen/Eigen/src/Core/functors/TernaryFunctors.h +34 -7
  192. package/eigen/Eigen/src/Core/functors/UnaryFunctors.h +1038 -749
  193. package/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h +1883 -1375
  194. package/eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h +312 -370
  195. package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +189 -176
  196. package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +84 -81
  197. package/eigen/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h +154 -73
  198. package/eigen/Eigen/src/Core/products/GeneralMatrixVector.h +292 -337
  199. package/eigen/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h +80 -77
  200. package/eigen/Eigen/src/Core/products/Parallelizer.h +207 -105
  201. package/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +327 -388
  202. package/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h +206 -224
  203. package/eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h +138 -147
  204. package/eigen/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h +58 -61
  205. package/eigen/Eigen/src/Core/products/SelfadjointProduct.h +71 -71
  206. package/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h +48 -47
  207. package/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h +294 -369
  208. package/eigen/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h +246 -238
  209. package/eigen/Eigen/src/Core/products/TriangularMatrixVector.h +244 -247
  210. package/eigen/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h +212 -192
  211. package/eigen/Eigen/src/Core/products/TriangularSolverMatrix.h +328 -277
  212. package/eigen/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h +108 -109
  213. package/eigen/Eigen/src/Core/products/TriangularSolverVector.h +68 -94
  214. package/eigen/Eigen/src/Core/util/Assert.h +158 -0
  215. package/eigen/Eigen/src/Core/util/BlasUtil.h +342 -303
  216. package/eigen/Eigen/src/Core/util/ConfigureVectorization.h +348 -317
  217. package/eigen/Eigen/src/Core/util/Constants.h +297 -262
  218. package/eigen/Eigen/src/Core/util/DisableStupidWarnings.h +130 -90
  219. package/eigen/Eigen/src/Core/util/EmulateArray.h +270 -0
  220. package/eigen/Eigen/src/Core/util/ForwardDeclarations.h +449 -247
  221. package/eigen/Eigen/src/Core/util/GpuHipCudaDefines.inc +101 -0
  222. package/eigen/Eigen/src/Core/util/GpuHipCudaUndefines.inc +45 -0
  223. package/eigen/Eigen/src/Core/util/IndexedViewHelper.h +417 -116
  224. package/eigen/Eigen/src/Core/util/IntegralConstant.h +211 -204
  225. package/eigen/Eigen/src/Core/util/MKL_support.h +39 -37
  226. package/eigen/Eigen/src/Core/util/Macros.h +655 -773
  227. package/eigen/Eigen/src/Core/util/MaxSizeVector.h +139 -0
  228. package/eigen/Eigen/src/Core/util/Memory.h +970 -748
  229. package/eigen/Eigen/src/Core/util/Meta.h +581 -633
  230. package/eigen/Eigen/src/Core/util/MoreMeta.h +638 -0
  231. package/eigen/Eigen/src/Core/util/ReenableStupidWarnings.h +32 -19
  232. package/eigen/Eigen/src/Core/util/ReshapedHelper.h +17 -17
  233. package/eigen/Eigen/src/Core/util/Serializer.h +209 -0
  234. package/eigen/Eigen/src/Core/util/StaticAssert.h +50 -166
  235. package/eigen/Eigen/src/Core/util/SymbolicIndex.h +377 -225
  236. package/eigen/Eigen/src/Core/util/XprHelper.h +784 -547
  237. package/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +246 -277
  238. package/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +299 -319
  239. package/eigen/Eigen/src/Eigenvalues/ComplexSchur_LAPACKE.h +52 -48
  240. package/eigen/Eigen/src/Eigenvalues/EigenSolver.h +413 -456
  241. package/eigen/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h +309 -325
  242. package/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +157 -171
  243. package/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +292 -310
  244. package/eigen/Eigen/src/Eigenvalues/InternalHeaderCheck.h +3 -0
  245. package/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +89 -105
  246. package/eigen/Eigen/src/Eigenvalues/RealQZ.h +537 -607
  247. package/eigen/Eigen/src/Eigenvalues/RealSchur.h +342 -381
  248. package/eigen/Eigen/src/Eigenvalues/RealSchur_LAPACKE.h +41 -35
  249. package/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +541 -595
  250. package/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h +47 -44
  251. package/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +430 -462
  252. package/eigen/Eigen/src/Geometry/AlignedBox.h +226 -227
  253. package/eigen/Eigen/src/Geometry/AngleAxis.h +131 -133
  254. package/eigen/Eigen/src/Geometry/EulerAngles.h +163 -74
  255. package/eigen/Eigen/src/Geometry/Homogeneous.h +285 -333
  256. package/eigen/Eigen/src/Geometry/Hyperplane.h +151 -160
  257. package/eigen/Eigen/src/Geometry/InternalHeaderCheck.h +3 -0
  258. package/eigen/Eigen/src/Geometry/OrthoMethods.h +168 -146
  259. package/eigen/Eigen/src/Geometry/ParametrizedLine.h +127 -127
  260. package/eigen/Eigen/src/Geometry/Quaternion.h +566 -506
  261. package/eigen/Eigen/src/Geometry/Rotation2D.h +107 -105
  262. package/eigen/Eigen/src/Geometry/RotationBase.h +148 -145
  263. package/eigen/Eigen/src/Geometry/Scaling.h +113 -106
  264. package/eigen/Eigen/src/Geometry/Transform.h +858 -936
  265. package/eigen/Eigen/src/Geometry/Translation.h +94 -92
  266. package/eigen/Eigen/src/Geometry/Umeyama.h +79 -84
  267. package/eigen/Eigen/src/Geometry/arch/Geometry_SIMD.h +90 -104
  268. package/eigen/Eigen/src/Householder/BlockHouseholder.h +51 -46
  269. package/eigen/Eigen/src/Householder/Householder.h +102 -124
  270. package/eigen/Eigen/src/Householder/HouseholderSequence.h +412 -453
  271. package/eigen/Eigen/src/Householder/InternalHeaderCheck.h +3 -0
  272. package/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +149 -162
  273. package/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +124 -119
  274. package/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +92 -104
  275. package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +251 -243
  276. package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +224 -228
  277. package/eigen/Eigen/src/IterativeLinearSolvers/InternalHeaderCheck.h +3 -0
  278. package/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +178 -227
  279. package/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +79 -84
  280. package/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +54 -60
  281. package/eigen/Eigen/src/Jacobi/InternalHeaderCheck.h +3 -0
  282. package/eigen/Eigen/src/Jacobi/Jacobi.h +252 -308
  283. package/eigen/Eigen/src/KLUSupport/InternalHeaderCheck.h +3 -0
  284. package/eigen/Eigen/src/KLUSupport/KLUSupport.h +208 -227
  285. package/eigen/Eigen/src/LU/Determinant.h +50 -69
  286. package/eigen/Eigen/src/LU/FullPivLU.h +545 -596
  287. package/eigen/Eigen/src/LU/InternalHeaderCheck.h +3 -0
  288. package/eigen/Eigen/src/LU/InverseImpl.h +206 -285
  289. package/eigen/Eigen/src/LU/PartialPivLU.h +390 -428
  290. package/eigen/Eigen/src/LU/PartialPivLU_LAPACKE.h +54 -40
  291. package/eigen/Eigen/src/LU/arch/InverseSize4.h +72 -70
  292. package/eigen/Eigen/src/MetisSupport/InternalHeaderCheck.h +3 -0
  293. package/eigen/Eigen/src/MetisSupport/MetisSupport.h +81 -93
  294. package/eigen/Eigen/src/OrderingMethods/Amd.h +243 -265
  295. package/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +831 -1004
  296. package/eigen/Eigen/src/OrderingMethods/InternalHeaderCheck.h +3 -0
  297. package/eigen/Eigen/src/OrderingMethods/Ordering.h +112 -119
  298. package/eigen/Eigen/src/PaStiXSupport/InternalHeaderCheck.h +3 -0
  299. package/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +524 -570
  300. package/eigen/Eigen/src/PardisoSupport/InternalHeaderCheck.h +3 -0
  301. package/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +385 -430
  302. package/eigen/Eigen/src/QR/ColPivHouseholderQR.h +479 -479
  303. package/eigen/Eigen/src/QR/ColPivHouseholderQR_LAPACKE.h +120 -56
  304. package/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +166 -153
  305. package/eigen/Eigen/src/QR/FullPivHouseholderQR.h +495 -475
  306. package/eigen/Eigen/src/QR/HouseholderQR.h +394 -285
  307. package/eigen/Eigen/src/QR/HouseholderQR_LAPACKE.h +32 -23
  308. package/eigen/Eigen/src/QR/InternalHeaderCheck.h +3 -0
  309. package/eigen/Eigen/src/SPQRSupport/InternalHeaderCheck.h +3 -0
  310. package/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +244 -264
  311. package/eigen/Eigen/src/SVD/BDCSVD.h +817 -713
  312. package/eigen/Eigen/src/SVD/BDCSVD_LAPACKE.h +174 -0
  313. package/eigen/Eigen/src/SVD/InternalHeaderCheck.h +3 -0
  314. package/eigen/Eigen/src/SVD/JacobiSVD.h +577 -543
  315. package/eigen/Eigen/src/SVD/JacobiSVD_LAPACKE.h +85 -49
  316. package/eigen/Eigen/src/SVD/SVDBase.h +242 -182
  317. package/eigen/Eigen/src/SVD/UpperBidiagonalization.h +200 -235
  318. package/eigen/Eigen/src/SparseCholesky/InternalHeaderCheck.h +3 -0
  319. package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +765 -594
  320. package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +308 -94
  321. package/eigen/Eigen/src/SparseCore/AmbiVector.h +202 -251
  322. package/eigen/Eigen/src/SparseCore/CompressedStorage.h +184 -252
  323. package/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +134 -178
  324. package/eigen/Eigen/src/SparseCore/InternalHeaderCheck.h +3 -0
  325. package/eigen/Eigen/src/SparseCore/SparseAssign.h +149 -140
  326. package/eigen/Eigen/src/SparseCore/SparseBlock.h +403 -440
  327. package/eigen/Eigen/src/SparseCore/SparseColEtree.h +100 -112
  328. package/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +525 -303
  329. package/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +555 -339
  330. package/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +100 -108
  331. package/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +169 -197
  332. package/eigen/Eigen/src/SparseCore/SparseDiagonalProduct.h +71 -71
  333. package/eigen/Eigen/src/SparseCore/SparseDot.h +49 -47
  334. package/eigen/Eigen/src/SparseCore/SparseFuzzy.h +13 -11
  335. package/eigen/Eigen/src/SparseCore/SparseMap.h +243 -253
  336. package/eigen/Eigen/src/SparseCore/SparseMatrix.h +1603 -1245
  337. package/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +403 -350
  338. package/eigen/Eigen/src/SparseCore/SparsePermutation.h +186 -115
  339. package/eigen/Eigen/src/SparseCore/SparseProduct.h +94 -97
  340. package/eigen/Eigen/src/SparseCore/SparseRedux.h +22 -24
  341. package/eigen/Eigen/src/SparseCore/SparseRef.h +268 -295
  342. package/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +370 -416
  343. package/eigen/Eigen/src/SparseCore/SparseSolverBase.h +78 -87
  344. package/eigen/Eigen/src/SparseCore/SparseSparseProductWithPruning.h +81 -95
  345. package/eigen/Eigen/src/SparseCore/SparseTranspose.h +62 -71
  346. package/eigen/Eigen/src/SparseCore/SparseTriangularView.h +132 -144
  347. package/eigen/Eigen/src/SparseCore/SparseUtil.h +138 -115
  348. package/eigen/Eigen/src/SparseCore/SparseVector.h +426 -372
  349. package/eigen/Eigen/src/SparseCore/SparseView.h +164 -193
  350. package/eigen/Eigen/src/SparseCore/TriangularSolver.h +129 -170
  351. package/eigen/Eigen/src/SparseLU/InternalHeaderCheck.h +3 -0
  352. package/eigen/Eigen/src/SparseLU/SparseLU.h +756 -710
  353. package/eigen/Eigen/src/SparseLU/SparseLUImpl.h +61 -48
  354. package/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +102 -118
  355. package/eigen/Eigen/src/SparseLU/SparseLU_Structs.h +38 -35
  356. package/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +245 -301
  357. package/eigen/Eigen/src/SparseLU/SparseLU_Utils.h +44 -49
  358. package/eigen/Eigen/src/SparseLU/SparseLU_column_bmod.h +104 -108
  359. package/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +89 -100
  360. package/eigen/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h +57 -58
  361. package/eigen/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h +43 -55
  362. package/eigen/Eigen/src/SparseLU/SparseLU_kernel_bmod.h +74 -71
  363. package/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +124 -132
  364. package/eigen/Eigen/src/SparseLU/SparseLU_panel_dfs.h +136 -159
  365. package/eigen/Eigen/src/SparseLU/SparseLU_pivotL.h +51 -52
  366. package/eigen/Eigen/src/SparseLU/SparseLU_pruneL.h +67 -73
  367. package/eigen/Eigen/src/SparseLU/SparseLU_relax_snode.h +24 -26
  368. package/eigen/Eigen/src/SparseQR/InternalHeaderCheck.h +3 -0
  369. package/eigen/Eigen/src/SparseQR/SparseQR.h +450 -502
  370. package/eigen/Eigen/src/StlSupport/StdDeque.h +28 -93
  371. package/eigen/Eigen/src/StlSupport/StdList.h +28 -84
  372. package/eigen/Eigen/src/StlSupport/StdVector.h +28 -108
  373. package/eigen/Eigen/src/StlSupport/details.h +48 -50
  374. package/eigen/Eigen/src/SuperLUSupport/InternalHeaderCheck.h +3 -0
  375. package/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +634 -730
  376. package/eigen/Eigen/src/ThreadPool/Barrier.h +70 -0
  377. package/eigen/Eigen/src/ThreadPool/CoreThreadPoolDevice.h +336 -0
  378. package/eigen/Eigen/src/ThreadPool/EventCount.h +241 -0
  379. package/eigen/Eigen/src/ThreadPool/ForkJoin.h +140 -0
  380. package/eigen/Eigen/src/ThreadPool/InternalHeaderCheck.h +4 -0
  381. package/eigen/Eigen/src/ThreadPool/NonBlockingThreadPool.h +587 -0
  382. package/eigen/Eigen/src/ThreadPool/RunQueue.h +230 -0
  383. package/eigen/Eigen/src/ThreadPool/ThreadCancel.h +21 -0
  384. package/eigen/Eigen/src/ThreadPool/ThreadEnvironment.h +43 -0
  385. package/eigen/Eigen/src/ThreadPool/ThreadLocal.h +289 -0
  386. package/eigen/Eigen/src/ThreadPool/ThreadPoolInterface.h +50 -0
  387. package/eigen/Eigen/src/ThreadPool/ThreadYield.h +16 -0
  388. package/eigen/Eigen/src/UmfPackSupport/InternalHeaderCheck.h +3 -0
  389. package/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +428 -464
  390. package/eigen/Eigen/src/misc/Image.h +41 -43
  391. package/eigen/Eigen/src/misc/InternalHeaderCheck.h +3 -0
  392. package/eigen/Eigen/src/misc/Kernel.h +39 -41
  393. package/eigen/Eigen/src/misc/RealSvd2x2.h +19 -21
  394. package/eigen/Eigen/src/misc/blas.h +83 -426
  395. package/eigen/Eigen/src/misc/lapacke.h +9972 -16179
  396. package/eigen/Eigen/src/misc/lapacke_helpers.h +163 -0
  397. package/eigen/Eigen/src/misc/lapacke_mangling.h +4 -5
  398. package/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.inc +344 -0
  399. package/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.inc +544 -0
  400. package/eigen/Eigen/src/plugins/{BlockMethods.h → BlockMethods.inc} +434 -506
  401. package/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.inc +116 -0
  402. package/eigen/Eigen/src/plugins/{CommonCwiseUnaryOps.h → CommonCwiseUnaryOps.inc} +58 -68
  403. package/eigen/Eigen/src/plugins/IndexedViewMethods.inc +192 -0
  404. package/eigen/Eigen/src/plugins/InternalHeaderCheck.inc +3 -0
  405. package/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.inc +331 -0
  406. package/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.inc +118 -0
  407. package/eigen/Eigen/src/plugins/ReshapedMethods.inc +133 -0
  408. package/package.json +1 -1
  409. package/eigen/COPYING.APACHE +0 -203
  410. package/eigen/COPYING.BSD +0 -26
  411. package/eigen/COPYING.GPL +0 -674
  412. package/eigen/COPYING.LGPL +0 -502
  413. package/eigen/COPYING.MINPACK +0 -51
  414. package/eigen/COPYING.MPL2 +0 -373
  415. package/eigen/COPYING.README +0 -18
  416. package/eigen/Eigen/src/Core/BooleanRedux.h +0 -162
  417. package/eigen/Eigen/src/Core/arch/CUDA/Complex.h +0 -258
  418. package/eigen/Eigen/src/Core/arch/Default/TypeCasting.h +0 -120
  419. package/eigen/Eigen/src/Core/arch/SYCL/SyclMemoryModel.h +0 -694
  420. package/eigen/Eigen/src/Core/util/NonMPL2.h +0 -3
  421. package/eigen/Eigen/src/SparseCore/MappedSparseMatrix.h +0 -67
  422. package/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +0 -280
  423. package/eigen/Eigen/src/misc/lapack.h +0 -152
  424. package/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +0 -358
  425. package/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +0 -696
  426. package/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.h +0 -115
  427. package/eigen/Eigen/src/plugins/IndexedViewMethods.h +0 -262
  428. package/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +0 -152
  429. package/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +0 -95
  430. package/eigen/Eigen/src/plugins/ReshapedMethods.h +0 -149
  431. package/eigen/README.md +0 -5
@@ -11,17 +11,20 @@
11
11
  #ifndef EIGEN_GENERIC_PACKET_MATH_H
12
12
  #define EIGEN_GENERIC_PACKET_MATH_H
13
13
 
14
+ // IWYU pragma: private
15
+ #include "./InternalHeaderCheck.h"
16
+
14
17
  namespace Eigen {
15
18
 
16
19
  namespace internal {
17
20
 
18
21
  /** \internal
19
- * \file GenericPacketMath.h
20
- *
21
- * Default implementation for types not supported by the vectorization.
22
- * In practice these functions are provided to make easier the writing
23
- * of generic vectorized code.
24
- */
22
+ * \file GenericPacketMath.h
23
+ *
24
+ * Default implementation for types not supported by the vectorization.
25
+ * In practice these functions are provided to make easier the writing
26
+ * of generic vectorized code.
27
+ */
25
28
 
26
29
  #ifndef EIGEN_DEBUG_ALIGNED_LOAD
27
30
  #define EIGEN_DEBUG_ALIGNED_LOAD
@@ -39,48 +42,53 @@ namespace internal {
39
42
  #define EIGEN_DEBUG_UNALIGNED_STORE
40
43
  #endif
41
44
 
42
- struct default_packet_traits
43
- {
45
+ struct default_packet_traits {
44
46
  enum {
45
- HasHalfPacket = 0,
46
-
47
- HasAdd = 1,
48
- HasSub = 1,
49
- HasShift = 1,
50
- HasMul = 1,
51
- HasNegate = 1,
52
- HasAbs = 1,
53
- HasArg = 0,
54
- HasAbs2 = 1,
55
- HasAbsDiff = 0,
56
- HasMin = 1,
57
- HasMax = 1,
58
- HasConj = 1,
47
+ // Ops that are implemented for most types.
48
+ HasAdd = 1,
49
+ HasSub = 1,
50
+ HasShift = 1,
51
+ HasMul = 1,
52
+ HasNegate = 1,
53
+ HasAbs = 1,
54
+ HasAbs2 = 1,
55
+ HasMin = 1,
56
+ HasMax = 1,
57
+ HasConj = 1,
59
58
  HasSetLinear = 1,
60
- HasBlend = 0,
59
+ HasSign = 1,
60
+ // By default, the nearest integer functions (rint, round, floor, ceil, trunc) are enabled for all scalar and packet
61
+ // types
62
+ HasRound = 1,
63
+
64
+ HasArg = 0,
65
+ HasAbsDiff = 0,
66
+ HasBlend = 0,
61
67
  // This flag is used to indicate whether packet comparison is supported.
62
- // pcmp_eq, pcmp_lt and pcmp_le should be defined for it to be true.
63
- HasCmp = 0,
64
-
65
- HasDiv = 0,
66
- HasSqrt = 0,
67
- HasRsqrt = 0,
68
- HasExp = 0,
69
- HasExpm1 = 0,
70
- HasLog = 0,
71
- HasLog1p = 0,
72
- HasLog10 = 0,
73
- HasPow = 0,
74
-
75
- HasSin = 0,
76
- HasCos = 0,
77
- HasTan = 0,
78
- HasASin = 0,
79
- HasACos = 0,
80
- HasATan = 0,
81
- HasSinh = 0,
82
- HasCosh = 0,
83
- HasTanh = 0,
68
+ // pcmp_eq and pcmp_lt should be defined for it to be true.
69
+ HasCmp = 0,
70
+
71
+ HasDiv = 0,
72
+ HasReciprocal = 0,
73
+ HasSqrt = 0,
74
+ HasRsqrt = 0,
75
+ HasCbrt = 0,
76
+ HasExp = 0,
77
+ HasExpm1 = 0,
78
+ HasLog = 0,
79
+ HasLog1p = 0,
80
+ HasLog10 = 0,
81
+ HasPow = 0,
82
+ HasSin = 0,
83
+ HasCos = 0,
84
+ HasTan = 0,
85
+ HasASin = 0,
86
+ HasACos = 0,
87
+ HasATan = 0,
88
+ HasATanh = 0,
89
+ HasSinh = 0,
90
+ HasCosh = 0,
91
+ HasTanh = 0,
84
92
  HasLGamma = 0,
85
93
  HasDiGamma = 0,
86
94
  HasZeta = 0,
@@ -93,76 +101,134 @@ struct default_packet_traits
93
101
  HasIGammaDerA = 0,
94
102
  HasGammaSampleDerAlpha = 0,
95
103
  HasIGammac = 0,
96
- HasBetaInc = 0,
97
-
98
- HasRound = 0,
99
- HasRint = 0,
100
- HasFloor = 0,
101
- HasCeil = 0,
102
- HasSign = 0
104
+ HasBetaInc = 0
103
105
  };
104
106
  };
105
107
 
106
- template<typename T> struct packet_traits : default_packet_traits
107
- {
108
+ template <typename T>
109
+ struct packet_traits : default_packet_traits {
108
110
  typedef T type;
109
111
  typedef T half;
110
112
  enum {
111
113
  Vectorizable = 0,
112
114
  size = 1,
113
115
  AlignedOnScalar = 0,
114
- HasHalfPacket = 0
115
116
  };
116
117
  enum {
117
- HasAdd = 0,
118
- HasSub = 0,
119
- HasMul = 0,
118
+ HasAdd = 0,
119
+ HasSub = 0,
120
+ HasMul = 0,
120
121
  HasNegate = 0,
121
- HasAbs = 0,
122
- HasAbs2 = 0,
123
- HasMin = 0,
124
- HasMax = 0,
125
- HasConj = 0,
122
+ HasAbs = 0,
123
+ HasAbs2 = 0,
124
+ HasMin = 0,
125
+ HasMax = 0,
126
+ HasConj = 0,
126
127
  HasSetLinear = 0
127
128
  };
128
129
  };
129
130
 
130
- template<typename T> struct packet_traits<const T> : packet_traits<T> { };
131
+ template <typename T>
132
+ struct packet_traits<const T> : packet_traits<T> {};
131
133
 
132
- template<typename T> struct unpacket_traits
133
- {
134
+ template <typename T>
135
+ struct unpacket_traits {
134
136
  typedef T type;
135
137
  typedef T half;
136
- enum
137
- {
138
+ typedef typename numext::get_integer_by_size<sizeof(T)>::signed_type integer_packet;
139
+ enum {
138
140
  size = 1,
139
- alignment = 1,
141
+ alignment = alignof(T),
140
142
  vectorizable = false,
141
- masked_load_available=false,
142
- masked_store_available=false
143
+ masked_load_available = false,
144
+ masked_store_available = false
143
145
  };
144
146
  };
145
147
 
146
- template<typename T> struct unpacket_traits<const T> : unpacket_traits<T> { };
148
+ template <typename T>
149
+ struct unpacket_traits<const T> : unpacket_traits<T> {};
150
+
151
+ /** \internal A convenience utility for determining if the type is a scalar.
152
+ * This is used to enable some generic packet implementations.
153
+ */
154
+ template <typename Packet>
155
+ struct is_scalar {
156
+ using Scalar = typename unpacket_traits<Packet>::type;
157
+ enum { value = internal::is_same<Packet, Scalar>::value };
158
+ };
159
+
160
+ // automatically and succinctly define combinations of pcast<SrcPacket,TgtPacket> when
161
+ // 1) the packets are the same type, or
162
+ // 2) the packets differ only in sign.
163
+ // In both of these cases, preinterpret (bit_cast) is equivalent to pcast (static_cast)
164
+ template <typename SrcPacket, typename TgtPacket,
165
+ bool Scalar = is_scalar<SrcPacket>::value && is_scalar<TgtPacket>::value>
166
+ struct is_degenerate_helper : is_same<SrcPacket, TgtPacket> {};
167
+ template <>
168
+ struct is_degenerate_helper<int8_t, uint8_t, true> : std::true_type {};
169
+ template <>
170
+ struct is_degenerate_helper<int16_t, uint16_t, true> : std::true_type {};
171
+ template <>
172
+ struct is_degenerate_helper<int32_t, uint32_t, true> : std::true_type {};
173
+ template <>
174
+ struct is_degenerate_helper<int64_t, uint64_t, true> : std::true_type {};
147
175
 
148
- template <typename Src, typename Tgt> struct type_casting_traits {
176
+ template <typename SrcPacket, typename TgtPacket>
177
+ struct is_degenerate_helper<SrcPacket, TgtPacket, false> {
178
+ using SrcScalar = typename unpacket_traits<SrcPacket>::type;
179
+ static constexpr int SrcSize = unpacket_traits<SrcPacket>::size;
180
+ using TgtScalar = typename unpacket_traits<TgtPacket>::type;
181
+ static constexpr int TgtSize = unpacket_traits<TgtPacket>::size;
182
+ static constexpr bool value = is_degenerate_helper<SrcScalar, TgtScalar, true>::value && (SrcSize == TgtSize);
183
+ };
184
+
185
+ // is_degenerate<T1,T2>::value == is_degenerate<T2,T1>::value
186
+ template <typename SrcPacket, typename TgtPacket>
187
+ struct is_degenerate {
188
+ static constexpr bool value =
189
+ is_degenerate_helper<SrcPacket, TgtPacket>::value || is_degenerate_helper<TgtPacket, SrcPacket>::value;
190
+ };
191
+
192
+ template <typename Packet>
193
+ struct is_half {
194
+ using Scalar = typename unpacket_traits<Packet>::type;
195
+ static constexpr int Size = unpacket_traits<Packet>::size;
196
+ using DefaultPacket = typename packet_traits<Scalar>::type;
197
+ static constexpr int DefaultSize = unpacket_traits<DefaultPacket>::size;
198
+ static constexpr bool value = Size != 1 && Size < DefaultSize;
199
+ };
200
+
201
+ template <typename Src, typename Tgt>
202
+ struct type_casting_traits {
149
203
  enum {
150
- VectorizedCast = 0,
204
+ VectorizedCast =
205
+ is_degenerate<Src, Tgt>::value && packet_traits<Src>::Vectorizable && packet_traits<Tgt>::Vectorizable,
151
206
  SrcCoeffRatio = 1,
152
207
  TgtCoeffRatio = 1
153
208
  };
154
209
  };
155
210
 
211
+ // provides a succinct template to define vectorized casting traits with respect to the largest accessible packet types
212
+ template <typename Src, typename Tgt>
213
+ struct vectorized_type_casting_traits {
214
+ enum : int {
215
+ DefaultSrcPacketSize = packet_traits<Src>::size,
216
+ DefaultTgtPacketSize = packet_traits<Tgt>::size,
217
+ VectorizedCast = 1,
218
+ SrcCoeffRatio = plain_enum_max(DefaultTgtPacketSize / DefaultSrcPacketSize, 1),
219
+ TgtCoeffRatio = plain_enum_max(DefaultSrcPacketSize / DefaultTgtPacketSize, 1)
220
+ };
221
+ };
222
+
156
223
  /** \internal Wrapper to ensure that multiple packet types can map to the same
157
224
  same underlying vector type. */
158
- template<typename T, int unique_id = 0>
159
- struct eigen_packet_wrapper
160
- {
225
+ template <typename T, int unique_id = 0>
226
+ struct eigen_packet_wrapper {
161
227
  EIGEN_ALWAYS_INLINE operator T&() { return m_val; }
162
228
  EIGEN_ALWAYS_INLINE operator const T&() const { return m_val; }
163
- EIGEN_ALWAYS_INLINE eigen_packet_wrapper() {}
164
- EIGEN_ALWAYS_INLINE eigen_packet_wrapper(const T &v) : m_val(v) {}
165
- EIGEN_ALWAYS_INLINE eigen_packet_wrapper& operator=(const T &v) {
229
+ EIGEN_ALWAYS_INLINE eigen_packet_wrapper() = default;
230
+ EIGEN_ALWAYS_INLINE eigen_packet_wrapper(const T& v) : m_val(v) {}
231
+ EIGEN_ALWAYS_INLINE eigen_packet_wrapper& operator=(const T& v) {
166
232
  m_val = v;
167
233
  return *this;
168
234
  }
@@ -170,109 +236,181 @@ struct eigen_packet_wrapper
170
236
  T m_val;
171
237
  };
172
238
 
239
+ template <typename Target, typename Packet, bool IsSame = is_same<Target, Packet>::value>
240
+ struct preinterpret_generic;
173
241
 
174
- /** \internal A convenience utility for determining if the type is a scalar.
175
- * This is used to enable some generic packet implementations.
176
- */
177
- template<typename Packet>
178
- struct is_scalar {
179
- typedef typename unpacket_traits<Packet>::type Scalar;
180
- enum {
181
- value = internal::is_same<Packet, Scalar>::value
182
- };
242
+ template <typename Target, typename Packet>
243
+ struct preinterpret_generic<Target, Packet, false> {
244
+ // the packets are not the same, attempt scalar bit_cast
245
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Target run(const Packet& a) {
246
+ return numext::bit_cast<Target, Packet>(a);
247
+ }
248
+ };
249
+
250
+ template <typename Packet>
251
+ struct preinterpret_generic<Packet, Packet, true> {
252
+ // the packets are the same type: do nothing
253
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run(const Packet& a) { return a; }
254
+ };
255
+
256
+ template <typename ComplexPacket>
257
+ struct preinterpret_generic<typename unpacket_traits<ComplexPacket>::as_real, ComplexPacket, false> {
258
+ using RealPacket = typename unpacket_traits<ComplexPacket>::as_real;
259
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE RealPacket run(const ComplexPacket& a) { return a.v; }
260
+ };
261
+
262
+ /** \internal \returns reinterpret_cast<Target>(a) */
263
+ template <typename Target, typename Packet>
264
+ EIGEN_DEVICE_FUNC inline Target preinterpret(const Packet& a) {
265
+ return preinterpret_generic<Target, Packet>::run(a);
266
+ }
267
+
268
+ template <typename SrcPacket, typename TgtPacket, bool Degenerate = is_degenerate<SrcPacket, TgtPacket>::value,
269
+ bool TgtIsHalf = is_half<TgtPacket>::value>
270
+ struct pcast_generic;
271
+
272
+ template <typename SrcPacket, typename TgtPacket>
273
+ struct pcast_generic<SrcPacket, TgtPacket, false, false> {
274
+ // the packets are not degenerate: attempt scalar static_cast
275
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket run(const SrcPacket& a) {
276
+ return cast_impl<SrcPacket, TgtPacket>::run(a);
277
+ }
278
+ };
279
+
280
+ template <typename Packet>
281
+ struct pcast_generic<Packet, Packet, true, false> {
282
+ // the packets are the same: do nothing
283
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run(const Packet& a) { return a; }
284
+ };
285
+
286
+ template <typename SrcPacket, typename TgtPacket, bool TgtIsHalf>
287
+ struct pcast_generic<SrcPacket, TgtPacket, true, TgtIsHalf> {
288
+ // the packets are degenerate: preinterpret is equivalent to pcast
289
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket run(const SrcPacket& a) { return preinterpret<TgtPacket>(a); }
183
290
  };
184
291
 
185
292
  /** \internal \returns static_cast<TgtType>(a) (coeff-wise) */
186
293
  template <typename SrcPacket, typename TgtPacket>
187
- EIGEN_DEVICE_FUNC inline TgtPacket
188
- pcast(const SrcPacket& a) {
189
- return static_cast<TgtPacket>(a);
294
+ EIGEN_DEVICE_FUNC inline TgtPacket pcast(const SrcPacket& a) {
295
+ return pcast_generic<SrcPacket, TgtPacket>::run(a);
190
296
  }
191
297
  template <typename SrcPacket, typename TgtPacket>
192
- EIGEN_DEVICE_FUNC inline TgtPacket
193
- pcast(const SrcPacket& a, const SrcPacket& /*b*/) {
194
- return static_cast<TgtPacket>(a);
298
+ EIGEN_DEVICE_FUNC inline TgtPacket pcast(const SrcPacket& a, const SrcPacket& b) {
299
+ return pcast_generic<SrcPacket, TgtPacket>::run(a, b);
195
300
  }
196
301
  template <typename SrcPacket, typename TgtPacket>
197
- EIGEN_DEVICE_FUNC inline TgtPacket
198
- pcast(const SrcPacket& a, const SrcPacket& /*b*/, const SrcPacket& /*c*/, const SrcPacket& /*d*/) {
199
- return static_cast<TgtPacket>(a);
302
+ EIGEN_DEVICE_FUNC inline TgtPacket pcast(const SrcPacket& a, const SrcPacket& b, const SrcPacket& c,
303
+ const SrcPacket& d) {
304
+ return pcast_generic<SrcPacket, TgtPacket>::run(a, b, c, d);
200
305
  }
201
306
  template <typename SrcPacket, typename TgtPacket>
202
- EIGEN_DEVICE_FUNC inline TgtPacket
203
- pcast(const SrcPacket& a, const SrcPacket& /*b*/, const SrcPacket& /*c*/, const SrcPacket& /*d*/,
204
- const SrcPacket& /*e*/, const SrcPacket& /*f*/, const SrcPacket& /*g*/, const SrcPacket& /*h*/) {
205
- return static_cast<TgtPacket>(a);
307
+ EIGEN_DEVICE_FUNC inline TgtPacket pcast(const SrcPacket& a, const SrcPacket& b, const SrcPacket& c, const SrcPacket& d,
308
+ const SrcPacket& e, const SrcPacket& f, const SrcPacket& g,
309
+ const SrcPacket& h) {
310
+ return pcast_generic<SrcPacket, TgtPacket>::run(a, b, c, d, e, f, g, h);
206
311
  }
207
312
 
208
- /** \internal \returns reinterpret_cast<Target>(a) */
209
- template <typename Target, typename Packet>
210
- EIGEN_DEVICE_FUNC inline Target
211
- preinterpret(const Packet& a); /* { return reinterpret_cast<const Target&>(a); } */
313
+ template <typename SrcPacket, typename TgtPacket>
314
+ struct pcast_generic<SrcPacket, TgtPacket, false, true> {
315
+ // TgtPacket is a half packet of some other type
316
+ // perform cast and truncate result
317
+ using DefaultTgtPacket = typename is_half<TgtPacket>::DefaultPacket;
318
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket run(const SrcPacket& a) {
319
+ return preinterpret<TgtPacket>(pcast<SrcPacket, DefaultTgtPacket>(a));
320
+ }
321
+ };
212
322
 
213
323
  /** \internal \returns a + b (coeff-wise) */
214
- template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
215
- padd(const Packet& a, const Packet& b) { return a+b; }
324
+ template <typename Packet>
325
+ EIGEN_DEVICE_FUNC inline Packet padd(const Packet& a, const Packet& b) {
326
+ return a + b;
327
+ }
216
328
  // Avoid compiler warning for boolean algebra.
217
- template<> EIGEN_DEVICE_FUNC inline bool
218
- padd(const bool& a, const bool& b) { return a || b; }
329
+ template <>
330
+ EIGEN_DEVICE_FUNC inline bool padd(const bool& a, const bool& b) {
331
+ return a || b;
332
+ }
333
+
334
+ /** \internal \returns a packet version of \a *from, (un-aligned masked add)
335
+ * There is no generic implementation. We only have implementations for specialized
336
+ * cases. Generic case should not be called.
337
+ */
338
+ template <typename Packet>
339
+ EIGEN_DEVICE_FUNC inline std::enable_if_t<unpacket_traits<Packet>::masked_fpops_available, Packet> padd(
340
+ const Packet& a, const Packet& b, typename unpacket_traits<Packet>::mask_t umask);
219
341
 
220
342
  /** \internal \returns a - b (coeff-wise) */
221
- template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
222
- psub(const Packet& a, const Packet& b) { return a-b; }
343
+ template <typename Packet>
344
+ EIGEN_DEVICE_FUNC inline Packet psub(const Packet& a, const Packet& b) {
345
+ return a - b;
346
+ }
223
347
 
224
348
  /** \internal \returns -a (coeff-wise) */
225
- template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
226
- pnegate(const Packet& a) { return -a; }
227
-
228
- template<> EIGEN_DEVICE_FUNC inline bool
229
- pnegate(const bool& a) { return !a; }
349
+ template <typename Packet>
350
+ EIGEN_DEVICE_FUNC inline Packet pnegate(const Packet& a) {
351
+ EIGEN_STATIC_ASSERT((!is_same<typename unpacket_traits<Packet>::type, bool>::value),
352
+ NEGATE IS NOT DEFINED FOR BOOLEAN TYPES)
353
+ return numext::negate(a);
354
+ }
230
355
 
231
356
  /** \internal \returns conj(a) (coeff-wise) */
232
- template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
233
- pconj(const Packet& a) { return numext::conj(a); }
357
+ template <typename Packet>
358
+ EIGEN_DEVICE_FUNC inline Packet pconj(const Packet& a) {
359
+ return numext::conj(a);
360
+ }
234
361
 
235
362
  /** \internal \returns a * b (coeff-wise) */
236
- template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
237
- pmul(const Packet& a, const Packet& b) { return a*b; }
363
+ template <typename Packet>
364
+ EIGEN_DEVICE_FUNC inline Packet pmul(const Packet& a, const Packet& b) {
365
+ return a * b;
366
+ }
238
367
  // Avoid compiler warning for boolean algebra.
239
- template<> EIGEN_DEVICE_FUNC inline bool
240
- pmul(const bool& a, const bool& b) { return a && b; }
368
+ template <>
369
+ EIGEN_DEVICE_FUNC inline bool pmul(const bool& a, const bool& b) {
370
+ return a && b;
371
+ }
241
372
 
242
373
  /** \internal \returns a / b (coeff-wise) */
243
- template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
244
- pdiv(const Packet& a, const Packet& b) { return a/b; }
374
+ template <typename Packet>
375
+ EIGEN_DEVICE_FUNC inline Packet pdiv(const Packet& a, const Packet& b) {
376
+ return a / b;
377
+ }
378
+ // Avoid compiler warning for boolean algebra.
379
+ template <>
380
+ EIGEN_DEVICE_FUNC inline bool pdiv(const bool& a, const bool& b) {
381
+ return a && b;
382
+ }
245
383
 
246
- // In the generic case, memset to all one bits.
247
- template<typename Packet, typename EnableIf = void>
384
+ // In the generic packet case, memset to all one bits.
385
+ template <typename Packet, typename EnableIf = void>
248
386
  struct ptrue_impl {
249
- static EIGEN_DEVICE_FUNC inline Packet run(const Packet& /*a*/){
387
+ static EIGEN_DEVICE_FUNC inline Packet run(const Packet& /*a*/) {
250
388
  Packet b;
251
389
  memset(static_cast<void*>(&b), 0xff, sizeof(Packet));
252
390
  return b;
253
391
  }
254
392
  };
255
393
 
256
- // For non-trivial scalars, set to Scalar(1) (i.e. a non-zero value).
257
- // Although this is technically not a valid bitmask, the scalar path for pselect
258
- // uses a comparison to zero, so this should still work in most cases. We don't
259
- // have another option, since the scalar type requires initialization.
260
- template<typename T>
261
- struct ptrue_impl<T,
262
- typename internal::enable_if<is_scalar<T>::value && NumTraits<T>::RequireInitialization>::type > {
263
- static EIGEN_DEVICE_FUNC inline T run(const T& /*a*/){
264
- return T(1);
265
- }
394
+ // Use a value of one for scalars.
395
+ template <typename Scalar>
396
+ struct ptrue_impl<Scalar, std::enable_if_t<is_scalar<Scalar>::value>> {
397
+ static EIGEN_DEVICE_FUNC inline Scalar run(const Scalar&) { return Scalar(1); }
398
+ };
399
+
400
+ // For booleans, we can only directly set a valid `bool` value to avoid UB.
401
+ template <>
402
+ struct ptrue_impl<bool, void> {
403
+ static EIGEN_DEVICE_FUNC inline bool run(const bool&) { return true; }
266
404
  };
267
405
 
268
406
  /** \internal \returns one bits. */
269
- template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
270
- ptrue(const Packet& a) {
407
+ template <typename Packet>
408
+ EIGEN_DEVICE_FUNC inline Packet ptrue(const Packet& a) {
271
409
  return ptrue_impl<Packet>::run(a);
272
410
  }
273
411
 
274
- // In the general case, memset to zero.
275
- template<typename Packet, typename EnableIf = void>
412
+ // In the general packet case, memset to zero.
413
+ template <typename Packet, typename EnableIf = void>
276
414
  struct pzero_impl {
277
415
  static EIGEN_DEVICE_FUNC inline Packet run(const Packet& /*a*/) {
278
416
  Packet b;
@@ -283,66 +421,59 @@ struct pzero_impl {
283
421
 
284
422
  // For scalars, explicitly set to Scalar(0), since the underlying representation
285
423
  // for zero may not consist of all-zero bits.
286
- template<typename T>
287
- struct pzero_impl<T,
288
- typename internal::enable_if<is_scalar<T>::value>::type> {
289
- static EIGEN_DEVICE_FUNC inline T run(const T& /*a*/) {
290
- return T(0);
291
- }
424
+ template <typename T>
425
+ struct pzero_impl<T, std::enable_if_t<is_scalar<T>::value>> {
426
+ static EIGEN_DEVICE_FUNC inline T run(const T& /*a*/) { return T(0); }
292
427
  };
293
428
 
294
429
  /** \internal \returns packet of zeros */
295
- template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
296
- pzero(const Packet& a) {
430
+ template <typename Packet>
431
+ EIGEN_DEVICE_FUNC inline Packet pzero(const Packet& a) {
297
432
  return pzero_impl<Packet>::run(a);
298
433
  }
299
434
 
300
- /** \internal \returns a <= b as a bit mask */
301
- template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
302
- pcmp_le(const Packet& a, const Packet& b) { return a<=b ? ptrue(a) : pzero(a); }
303
-
304
- /** \internal \returns a < b as a bit mask */
305
- template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
306
- pcmp_lt(const Packet& a, const Packet& b) { return a<b ? ptrue(a) : pzero(a); }
307
-
308
- /** \internal \returns a == b as a bit mask */
309
- template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
310
- pcmp_eq(const Packet& a, const Packet& b) { return a==b ? ptrue(a) : pzero(a); }
311
-
312
- /** \internal \returns a < b or a==NaN or b==NaN as a bit mask */
313
- template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
314
- pcmp_lt_or_nan(const Packet& a, const Packet& b) { return a>=b ? pzero(a) : ptrue(a); }
315
-
316
- template<typename T>
435
+ template <typename T>
317
436
  struct bit_and {
318
- EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR EIGEN_ALWAYS_INLINE T operator()(const T& a, const T& b) const {
319
- return a & b;
320
- }
437
+ EIGEN_DEVICE_FUNC constexpr EIGEN_ALWAYS_INLINE T operator()(const T& a, const T& b) const { return a & b; }
321
438
  };
322
439
 
323
- template<typename T>
440
+ template <typename T>
324
441
  struct bit_or {
325
- EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR EIGEN_ALWAYS_INLINE T operator()(const T& a, const T& b) const {
326
- return a | b;
327
- }
442
+ EIGEN_DEVICE_FUNC constexpr EIGEN_ALWAYS_INLINE T operator()(const T& a, const T& b) const { return a | b; }
328
443
  };
329
444
 
330
- template<typename T>
445
+ template <typename T>
331
446
  struct bit_xor {
332
- EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR EIGEN_ALWAYS_INLINE T operator()(const T& a, const T& b) const {
333
- return a ^ b;
334
- }
447
+ EIGEN_DEVICE_FUNC constexpr EIGEN_ALWAYS_INLINE T operator()(const T& a, const T& b) const { return a ^ b; }
335
448
  };
336
449
 
337
- template<typename T>
450
+ template <typename T>
338
451
  struct bit_not {
339
- EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR EIGEN_ALWAYS_INLINE T operator()(const T& a) const {
340
- return ~a;
341
- }
452
+ EIGEN_DEVICE_FUNC constexpr EIGEN_ALWAYS_INLINE T operator()(const T& a) const { return ~a; }
453
+ };
454
+
455
+ template <>
456
+ struct bit_and<bool> {
457
+ EIGEN_DEVICE_FUNC constexpr EIGEN_ALWAYS_INLINE bool operator()(const bool& a, const bool& b) const { return a && b; }
458
+ };
459
+
460
+ template <>
461
+ struct bit_or<bool> {
462
+ EIGEN_DEVICE_FUNC constexpr EIGEN_ALWAYS_INLINE bool operator()(const bool& a, const bool& b) const { return a || b; }
463
+ };
464
+
465
+ template <>
466
+ struct bit_xor<bool> {
467
+ EIGEN_DEVICE_FUNC constexpr EIGEN_ALWAYS_INLINE bool operator()(const bool& a, const bool& b) const { return a != b; }
468
+ };
469
+
470
+ template <>
471
+ struct bit_not<bool> {
472
+ EIGEN_DEVICE_FUNC constexpr EIGEN_ALWAYS_INLINE bool operator()(const bool& a) const { return !a; }
342
473
  };
343
474
 
344
475
  // Use operators &, |, ^, ~.
345
- template<typename T>
476
+ template <typename T>
346
477
  struct operator_bitwise_helper {
347
478
  EIGEN_DEVICE_FUNC static inline T bitwise_and(const T& a, const T& b) { return bit_and<T>()(a, b); }
348
479
  EIGEN_DEVICE_FUNC static inline T bitwise_or(const T& a, const T& b) { return bit_or<T>()(a, b); }
@@ -351,23 +482,19 @@ struct operator_bitwise_helper {
351
482
  };
352
483
 
353
484
  // Apply binary operations byte-by-byte
354
- template<typename T>
485
+ template <typename T>
355
486
  struct bytewise_bitwise_helper {
356
487
  EIGEN_DEVICE_FUNC static inline T bitwise_and(const T& a, const T& b) {
357
488
  return binary(a, b, bit_and<unsigned char>());
358
489
  }
359
- EIGEN_DEVICE_FUNC static inline T bitwise_or(const T& a, const T& b) {
360
- return binary(a, b, bit_or<unsigned char>());
361
- }
490
+ EIGEN_DEVICE_FUNC static inline T bitwise_or(const T& a, const T& b) { return binary(a, b, bit_or<unsigned char>()); }
362
491
  EIGEN_DEVICE_FUNC static inline T bitwise_xor(const T& a, const T& b) {
363
492
  return binary(a, b, bit_xor<unsigned char>());
364
493
  }
365
- EIGEN_DEVICE_FUNC static inline T bitwise_not(const T& a) {
366
- return unary(a,bit_not<unsigned char>());
367
- }
368
-
494
+ EIGEN_DEVICE_FUNC static inline T bitwise_not(const T& a) { return unary(a, bit_not<unsigned char>()); }
495
+
369
496
  private:
370
- template<typename Op>
497
+ template <typename Op>
371
498
  EIGEN_DEVICE_FUNC static inline T unary(const T& a, Op op) {
372
499
  const unsigned char* a_ptr = reinterpret_cast<const unsigned char*>(&a);
373
500
  T c;
@@ -378,7 +505,7 @@ struct bytewise_bitwise_helper {
378
505
  return c;
379
506
  }
380
507
 
381
- template<typename Op>
508
+ template <typename Op>
382
509
  EIGEN_DEVICE_FUNC static inline T binary(const T& a, const T& b, Op op) {
383
510
  const unsigned char* a_ptr = reinterpret_cast<const unsigned char*>(&a);
384
511
  const unsigned char* b_ptr = reinterpret_cast<const unsigned char*>(&b);
@@ -392,186 +519,215 @@ struct bytewise_bitwise_helper {
392
519
  };
393
520
 
394
521
  // In the general case, use byte-by-byte manipulation.
395
- template<typename T, typename EnableIf = void>
522
+ template <typename T, typename EnableIf = void>
396
523
  struct bitwise_helper : public bytewise_bitwise_helper<T> {};
397
524
 
398
525
  // For integers or non-trivial scalars, use binary operators.
399
- template<typename T>
400
- struct bitwise_helper<T,
401
- typename internal::enable_if<
402
- is_scalar<T>::value && (NumTraits<T>::IsInteger || NumTraits<T>::RequireInitialization)>::type
403
- > : public operator_bitwise_helper<T> {};
526
+ template <typename T>
527
+ struct bitwise_helper<T, typename std::enable_if_t<is_scalar<T>::value &&
528
+ (NumTraits<T>::IsInteger || NumTraits<T>::RequireInitialization)>>
529
+ : public operator_bitwise_helper<T> {};
404
530
 
405
531
  /** \internal \returns the bitwise and of \a a and \a b */
406
- template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
407
- pand(const Packet& a, const Packet& b) {
532
+ template <typename Packet>
533
+ EIGEN_DEVICE_FUNC inline Packet pand(const Packet& a, const Packet& b) {
408
534
  return bitwise_helper<Packet>::bitwise_and(a, b);
409
535
  }
410
536
 
411
537
  /** \internal \returns the bitwise or of \a a and \a b */
412
- template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
413
- por(const Packet& a, const Packet& b) {
538
+ template <typename Packet>
539
+ EIGEN_DEVICE_FUNC inline Packet por(const Packet& a, const Packet& b) {
414
540
  return bitwise_helper<Packet>::bitwise_or(a, b);
415
541
  }
416
542
 
417
543
  /** \internal \returns the bitwise xor of \a a and \a b */
418
- template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
419
- pxor(const Packet& a, const Packet& b) {
544
+ template <typename Packet>
545
+ EIGEN_DEVICE_FUNC inline Packet pxor(const Packet& a, const Packet& b) {
420
546
  return bitwise_helper<Packet>::bitwise_xor(a, b);
421
547
  }
422
548
 
423
549
  /** \internal \returns the bitwise not of \a a */
424
- template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
425
- pnot(const Packet& a) {
550
+ template <typename Packet>
551
+ EIGEN_DEVICE_FUNC inline Packet pnot(const Packet& a) {
426
552
  return bitwise_helper<Packet>::bitwise_not(a);
427
553
  }
428
554
 
429
555
  /** \internal \returns the bitwise and of \a a and not \a b */
430
- template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
431
- pandnot(const Packet& a, const Packet& b) { return pand(a, pnot(b)); }
556
+ template <typename Packet>
557
+ EIGEN_DEVICE_FUNC inline Packet pandnot(const Packet& a, const Packet& b) {
558
+ return pand(a, pnot(b));
559
+ }
560
+
561
+ /** \internal \returns a < b as a bit mask */
562
+ template <typename Packet>
563
+ EIGEN_DEVICE_FUNC inline Packet pcmp_lt(const Packet& a, const Packet& b) {
564
+ return a < b ? ptrue(a) : pzero(a);
565
+ }
566
+
567
+ /** \internal \returns a == b as a bit mask */
568
+ template <typename Packet>
569
+ EIGEN_DEVICE_FUNC inline Packet pcmp_eq(const Packet& a, const Packet& b) {
570
+ return a == b ? ptrue(a) : pzero(a);
571
+ }
572
+
573
+ /** \internal \returns a <= b as a bit mask */
574
+ template <typename Packet>
575
+ EIGEN_DEVICE_FUNC inline Packet pcmp_le(const Packet& a, const Packet& b) {
576
+ return por(pcmp_eq(a, b), pcmp_lt(a, b));
577
+ }
578
+
579
+ /** \internal \returns a < b or a==NaN or b==NaN as a bit mask */
580
+ template <typename Packet>
581
+ EIGEN_DEVICE_FUNC inline Packet pcmp_lt_or_nan(const Packet& a, const Packet& b) {
582
+ return a >= b ? pzero(a) : ptrue(a);
583
+ }
432
584
 
433
585
  // In the general case, use bitwise select.
434
- template<typename Packet, typename EnableIf = void>
586
+ template <typename Packet, bool is_scalar = is_scalar<Packet>::value>
435
587
  struct pselect_impl {
436
588
  static EIGEN_DEVICE_FUNC inline Packet run(const Packet& mask, const Packet& a, const Packet& b) {
437
- return por(pand(a,mask),pandnot(b,mask));
589
+ return por(pand(a, mask), pandnot(b, mask));
438
590
  }
439
591
  };
440
592
 
441
593
  // For scalars, use ternary select.
442
- template<typename Packet>
443
- struct pselect_impl<Packet,
444
- typename internal::enable_if<is_scalar<Packet>::value>::type > {
594
+ template <typename Packet>
595
+ struct pselect_impl<Packet, true> {
445
596
  static EIGEN_DEVICE_FUNC inline Packet run(const Packet& mask, const Packet& a, const Packet& b) {
446
- return numext::equal_strict(mask, Packet(0)) ? b : a;
597
+ return numext::select(mask, a, b);
447
598
  }
448
599
  };
449
600
 
450
601
  /** \internal \returns \a or \b for each field in packet according to \mask */
451
- template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
452
- pselect(const Packet& mask, const Packet& a, const Packet& b) {
602
+ template <typename Packet>
603
+ EIGEN_DEVICE_FUNC inline Packet pselect(const Packet& mask, const Packet& a, const Packet& b) {
453
604
  return pselect_impl<Packet>::run(mask, a, b);
454
605
  }
455
606
 
456
- template<> EIGEN_DEVICE_FUNC inline bool pselect<bool>(
457
- const bool& cond, const bool& a, const bool& b) {
607
+ template <>
608
+ EIGEN_DEVICE_FUNC inline bool pselect<bool>(const bool& cond, const bool& a, const bool& b) {
458
609
  return cond ? a : b;
459
610
  }
460
611
 
461
612
  /** \internal \returns the min or of \a a and \a b (coeff-wise)
462
613
  If either \a a or \a b are NaN, the result is implementation defined. */
463
- template<int NaNPropagation>
614
+ template <int NaNPropagation, bool IsInteger>
464
615
  struct pminmax_impl {
465
616
  template <typename Packet, typename Op>
466
617
  static EIGEN_DEVICE_FUNC inline Packet run(const Packet& a, const Packet& b, Op op) {
467
- return op(a,b);
618
+ return op(a, b);
468
619
  }
469
620
  };
470
621
 
471
622
  /** \internal \returns the min or max of \a a and \a b (coeff-wise)
472
623
  If either \a a or \a b are NaN, NaN is returned. */
473
- template<>
474
- struct pminmax_impl<PropagateNaN> {
624
+ template <>
625
+ struct pminmax_impl<PropagateNaN, false> {
475
626
  template <typename Packet, typename Op>
476
627
  static EIGEN_DEVICE_FUNC inline Packet run(const Packet& a, const Packet& b, Op op) {
477
- Packet not_nan_mask_a = pcmp_eq(a, a);
478
- Packet not_nan_mask_b = pcmp_eq(b, b);
479
- return pselect(not_nan_mask_a,
480
- pselect(not_nan_mask_b, op(a, b), b),
481
- a);
628
+ Packet not_nan_mask_a = pcmp_eq(a, a);
629
+ Packet not_nan_mask_b = pcmp_eq(b, b);
630
+ return pselect(not_nan_mask_a, pselect(not_nan_mask_b, op(a, b), b), a);
482
631
  }
483
632
  };
484
633
 
485
634
  /** \internal \returns the min or max of \a a and \a b (coeff-wise)
486
635
  If both \a a and \a b are NaN, NaN is returned.
487
636
  Equivalent to std::fmin(a, b). */
488
- template<>
489
- struct pminmax_impl<PropagateNumbers> {
637
+ template <>
638
+ struct pminmax_impl<PropagateNumbers, false> {
490
639
  template <typename Packet, typename Op>
491
640
  static EIGEN_DEVICE_FUNC inline Packet run(const Packet& a, const Packet& b, Op op) {
492
- Packet not_nan_mask_a = pcmp_eq(a, a);
493
- Packet not_nan_mask_b = pcmp_eq(b, b);
494
- return pselect(not_nan_mask_a,
495
- pselect(not_nan_mask_b, op(a, b), a),
496
- b);
641
+ Packet not_nan_mask_a = pcmp_eq(a, a);
642
+ Packet not_nan_mask_b = pcmp_eq(b, b);
643
+ return pselect(not_nan_mask_a, pselect(not_nan_mask_b, op(a, b), a), b);
497
644
  }
498
645
  };
499
646
 
500
-
501
- #ifndef SYCL_DEVICE_ONLY
502
- #define EIGEN_BINARY_OP_NAN_PROPAGATION(Type, Func) Func
503
- #else
504
- #define EIGEN_BINARY_OP_NAN_PROPAGATION(Type, Func) \
505
- [](const Type& a, const Type& b) { \
506
- return Func(a, b);}
507
- #endif
647
+ #define EIGEN_BINARY_OP_NAN_PROPAGATION(Type, Func) [](const Type& aa, const Type& bb) { return Func(aa, bb); }
508
648
 
509
649
  /** \internal \returns the min of \a a and \a b (coeff-wise).
510
650
  If \a a or \b b is NaN, the return value is implementation defined. */
511
- template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
512
- pmin(const Packet& a, const Packet& b) { return numext::mini(a,b); }
651
+ template <typename Packet>
652
+ EIGEN_DEVICE_FUNC inline Packet pmin(const Packet& a, const Packet& b) {
653
+ return numext::mini(a, b);
654
+ }
513
655
 
514
656
  /** \internal \returns the min of \a a and \a b (coeff-wise).
515
657
  NaNPropagation determines the NaN propagation semantics. */
516
658
  template <int NaNPropagation, typename Packet>
517
659
  EIGEN_DEVICE_FUNC inline Packet pmin(const Packet& a, const Packet& b) {
518
- return pminmax_impl<NaNPropagation>::run(a, b, EIGEN_BINARY_OP_NAN_PROPAGATION(Packet, (pmin<Packet>)));
660
+ constexpr bool IsInteger = NumTraits<typename unpacket_traits<Packet>::type>::IsInteger;
661
+ return pminmax_impl<NaNPropagation, IsInteger>::run(a, b, EIGEN_BINARY_OP_NAN_PROPAGATION(Packet, (pmin<Packet>)));
519
662
  }
520
663
 
521
664
  /** \internal \returns the max of \a a and \a b (coeff-wise)
522
665
  If \a a or \b b is NaN, the return value is implementation defined. */
523
- template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
524
- pmax(const Packet& a, const Packet& b) { return numext::maxi(a, b); }
666
+ template <typename Packet>
667
+ EIGEN_DEVICE_FUNC inline Packet pmax(const Packet& a, const Packet& b) {
668
+ return numext::maxi(a, b);
669
+ }
525
670
 
526
671
  /** \internal \returns the max of \a a and \a b (coeff-wise).
527
672
  NaNPropagation determines the NaN propagation semantics. */
528
673
  template <int NaNPropagation, typename Packet>
529
674
  EIGEN_DEVICE_FUNC inline Packet pmax(const Packet& a, const Packet& b) {
530
- return pminmax_impl<NaNPropagation>::run(a, b, EIGEN_BINARY_OP_NAN_PROPAGATION(Packet,(pmax<Packet>)));
675
+ constexpr bool IsInteger = NumTraits<typename unpacket_traits<Packet>::type>::IsInteger;
676
+ return pminmax_impl<NaNPropagation, IsInteger>::run(a, b, EIGEN_BINARY_OP_NAN_PROPAGATION(Packet, (pmax<Packet>)));
531
677
  }
532
678
 
533
679
  /** \internal \returns the absolute value of \a a */
534
- template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
535
- pabs(const Packet& a) { return numext::abs(a); }
536
- template<> EIGEN_DEVICE_FUNC inline unsigned int
537
- pabs(const unsigned int& a) { return a; }
538
- template<> EIGEN_DEVICE_FUNC inline unsigned long
539
- pabs(const unsigned long& a) { return a; }
540
- template<> EIGEN_DEVICE_FUNC inline unsigned long long
541
- pabs(const unsigned long long& a) { return a; }
680
+ template <typename Packet>
681
+ EIGEN_DEVICE_FUNC inline Packet pabs(const Packet& a) {
682
+ return numext::abs(a);
683
+ }
684
+ template <>
685
+ EIGEN_DEVICE_FUNC inline unsigned int pabs(const unsigned int& a) {
686
+ return a;
687
+ }
688
+ template <>
689
+ EIGEN_DEVICE_FUNC inline unsigned long pabs(const unsigned long& a) {
690
+ return a;
691
+ }
692
+ template <>
693
+ EIGEN_DEVICE_FUNC inline unsigned long long pabs(const unsigned long long& a) {
694
+ return a;
695
+ }
542
696
 
543
697
  /** \internal \returns the addsub value of \a a,b */
544
- template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
545
- paddsub(const Packet& a, const Packet& b) {
698
+ template <typename Packet>
699
+ EIGEN_DEVICE_FUNC inline Packet paddsub(const Packet& a, const Packet& b) {
546
700
  return pselect(peven_mask(a), padd(a, b), psub(a, b));
547
- }
701
+ }
548
702
 
549
703
  /** \internal \returns the phase angle of \a a */
550
- template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
551
- parg(const Packet& a) { using numext::arg; return arg(a); }
704
+ template <typename Packet>
705
+ EIGEN_DEVICE_FUNC inline Packet parg(const Packet& a) {
706
+ using numext::arg;
707
+ return arg(a);
708
+ }
552
709
 
710
+ /** \internal \returns \a a arithmetically shifted by N bits to the right */
711
+ template <int N, typename T>
712
+ EIGEN_DEVICE_FUNC inline T parithmetic_shift_right(const T& a) {
713
+ return numext::arithmetic_shift_right(a, N);
714
+ }
553
715
 
554
716
  /** \internal \returns \a a logically shifted by N bits to the right */
555
- template<int N> EIGEN_DEVICE_FUNC inline int
556
- parithmetic_shift_right(const int& a) { return a >> N; }
557
- template<int N> EIGEN_DEVICE_FUNC inline long int
558
- parithmetic_shift_right(const long int& a) { return a >> N; }
559
-
560
- /** \internal \returns \a a arithmetically shifted by N bits to the right */
561
- template<int N> EIGEN_DEVICE_FUNC inline int
562
- plogical_shift_right(const int& a) { return static_cast<int>(static_cast<unsigned int>(a) >> N); }
563
- template<int N> EIGEN_DEVICE_FUNC inline long int
564
- plogical_shift_right(const long int& a) { return static_cast<long>(static_cast<unsigned long>(a) >> N); }
717
+ template <int N, typename T>
718
+ EIGEN_DEVICE_FUNC inline T plogical_shift_right(const T& a) {
719
+ return numext::logical_shift_right(a, N);
720
+ }
565
721
 
566
722
  /** \internal \returns \a a shifted by N bits to the left */
567
- template<int N> EIGEN_DEVICE_FUNC inline int
568
- plogical_shift_left(const int& a) { return a << N; }
569
- template<int N> EIGEN_DEVICE_FUNC inline long int
570
- plogical_shift_left(const long int& a) { return a << N; }
723
+ template <int N, typename T>
724
+ EIGEN_DEVICE_FUNC inline T plogical_shift_left(const T& a) {
725
+ return numext::logical_shift_left(a, N);
726
+ }
571
727
 
572
728
  /** \internal \returns the significant and exponent of the underlying floating point numbers
573
- * See https://en.cppreference.com/w/cpp/numeric/math/frexp
574
- */
729
+ * See https://en.cppreference.com/w/cpp/numeric/math/frexp
730
+ */
575
731
  template <typename Packet>
576
732
  EIGEN_DEVICE_FUNC inline Packet pfrexp(const Packet& a, Packet& exponent) {
577
733
  int exp;
@@ -582,142 +738,250 @@ EIGEN_DEVICE_FUNC inline Packet pfrexp(const Packet& a, Packet& exponent) {
582
738
  }
583
739
 
584
740
  /** \internal \returns a * 2^((int)exponent)
585
- * See https://en.cppreference.com/w/cpp/numeric/math/ldexp
586
- */
587
- template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
588
- pldexp(const Packet &a, const Packet &exponent) {
741
+ * See https://en.cppreference.com/w/cpp/numeric/math/ldexp
742
+ */
743
+ template <typename Packet>
744
+ EIGEN_DEVICE_FUNC inline Packet pldexp(const Packet& a, const Packet& exponent) {
589
745
  EIGEN_USING_STD(ldexp)
590
746
  return static_cast<Packet>(ldexp(a, static_cast<int>(exponent)));
591
747
  }
592
748
 
593
749
  /** \internal \returns the min of \a a and \a b (coeff-wise) */
594
- template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
595
- pabsdiff(const Packet& a, const Packet& b) { return pselect(pcmp_lt(a, b), psub(b, a), psub(a, b)); }
750
+ template <typename Packet>
751
+ EIGEN_DEVICE_FUNC inline Packet pabsdiff(const Packet& a, const Packet& b) {
752
+ return pselect(pcmp_lt(a, b), psub(b, a), psub(a, b));
753
+ }
596
754
 
597
- /** \internal \returns a packet version of \a *from, from must be 16 bytes aligned */
598
- template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
599
- pload(const typename unpacket_traits<Packet>::type* from) { return *from; }
755
+ /** \internal \returns a packet version of \a *from, from must be properly aligned */
756
+ template <typename Packet>
757
+ EIGEN_DEVICE_FUNC inline Packet pload(const typename unpacket_traits<Packet>::type* from) {
758
+ return *from;
759
+ }
760
+
761
+ /** \internal \returns n elements of a packet version of \a *from, from must be properly aligned
762
+ * offset indicates the starting element in which to load and
763
+ * offset + n <= unpacket_traits::size
764
+ * All elements before offset and after the last element loaded will initialized with zero */
765
+ template <typename Packet>
766
+ EIGEN_DEVICE_FUNC inline Packet pload_partial(const typename unpacket_traits<Packet>::type* from, const Index n,
767
+ const Index offset = 0) {
768
+ const Index packet_size = unpacket_traits<Packet>::size;
769
+ eigen_assert(n + offset <= packet_size && "number of elements plus offset will read past end of packet");
770
+ typedef typename unpacket_traits<Packet>::type Scalar;
771
+ EIGEN_ALIGN_MAX Scalar elements[packet_size] = {Scalar(0)};
772
+ for (Index i = offset; i < numext::mini(n + offset, packet_size); i++) {
773
+ elements[i] = from[i - offset];
774
+ }
775
+ return pload<Packet>(elements);
776
+ }
600
777
 
601
778
  /** \internal \returns a packet version of \a *from, (un-aligned load) */
602
- template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
603
- ploadu(const typename unpacket_traits<Packet>::type* from) { return *from; }
779
+ template <typename Packet>
780
+ EIGEN_DEVICE_FUNC inline Packet ploadu(const typename unpacket_traits<Packet>::type* from) {
781
+ return *from;
782
+ }
783
+
784
+ /** \internal \returns n elements of a packet version of \a *from, (un-aligned load)
785
+ * All elements after the last element loaded will initialized with zero */
786
+ template <typename Packet>
787
+ EIGEN_DEVICE_FUNC inline Packet ploadu_partial(const typename unpacket_traits<Packet>::type* from, const Index n,
788
+ const Index offset = 0) {
789
+ const Index packet_size = unpacket_traits<Packet>::size;
790
+ eigen_assert(n + offset <= packet_size && "number of elements plus offset will read past end of packet");
791
+ typedef typename unpacket_traits<Packet>::type Scalar;
792
+ EIGEN_ALIGN_MAX Scalar elements[packet_size] = {Scalar(0)};
793
+ for (Index i = offset; i < numext::mini(n + offset, packet_size); i++) {
794
+ elements[i] = from[i - offset];
795
+ }
796
+ return pload<Packet>(elements);
797
+ }
604
798
 
605
799
  /** \internal \returns a packet version of \a *from, (un-aligned masked load)
606
800
  * There is no generic implementation. We only have implementations for specialized
607
801
  * cases. Generic case should not be called.
608
802
  */
609
- template<typename Packet> EIGEN_DEVICE_FUNC inline
610
- typename enable_if<unpacket_traits<Packet>::masked_load_available, Packet>::type
611
- ploadu(const typename unpacket_traits<Packet>::type* from, typename unpacket_traits<Packet>::mask_t umask);
803
+ template <typename Packet>
804
+ EIGEN_DEVICE_FUNC inline std::enable_if_t<unpacket_traits<Packet>::masked_load_available, Packet> ploadu(
805
+ const typename unpacket_traits<Packet>::type* from, typename unpacket_traits<Packet>::mask_t umask);
612
806
 
613
807
  /** \internal \returns a packet with constant coefficients \a a, e.g.: (a,a,a,a) */
614
- template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
615
- pset1(const typename unpacket_traits<Packet>::type& a) { return a; }
808
+ template <typename Packet>
809
+ EIGEN_DEVICE_FUNC inline Packet pset1(const typename unpacket_traits<Packet>::type& a) {
810
+ return a;
811
+ }
616
812
 
617
813
  /** \internal \returns a packet with constant coefficients set from bits */
618
- template<typename Packet,typename BitsType> EIGEN_DEVICE_FUNC inline Packet
619
- pset1frombits(BitsType a);
814
+ template <typename Packet, typename BitsType>
815
+ EIGEN_DEVICE_FUNC inline Packet pset1frombits(BitsType a);
620
816
 
621
817
  /** \internal \returns a packet with constant coefficients \a a[0], e.g.: (a[0],a[0],a[0],a[0]) */
622
- template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
623
- pload1(const typename unpacket_traits<Packet>::type *a) { return pset1<Packet>(*a); }
818
+ template <typename Packet>
819
+ EIGEN_DEVICE_FUNC inline Packet pload1(const typename unpacket_traits<Packet>::type* a) {
820
+ return pset1<Packet>(*a);
821
+ }
624
822
 
625
823
  /** \internal \returns a packet with elements of \a *from duplicated.
626
- * For instance, for a packet of 8 elements, 4 scalars will be read from \a *from and
627
- * duplicated to form: {from[0],from[0],from[1],from[1],from[2],from[2],from[3],from[3]}
628
- * Currently, this function is only used for scalar * complex products.
629
- */
630
- template<typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet
631
- ploaddup(const typename unpacket_traits<Packet>::type* from) { return *from; }
824
+ * For instance, for a packet of 8 elements, 4 scalars will be read from \a *from and
825
+ * duplicated to form: {from[0],from[0],from[1],from[1],from[2],from[2],from[3],from[3]}
826
+ * Currently, this function is only used for scalar * complex products.
827
+ */
828
+ template <typename Packet>
829
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet ploaddup(const typename unpacket_traits<Packet>::type* from) {
830
+ return *from;
831
+ }
632
832
 
633
833
  /** \internal \returns a packet with elements of \a *from quadrupled.
634
- * For instance, for a packet of 8 elements, 2 scalars will be read from \a *from and
635
- * replicated to form: {from[0],from[0],from[0],from[0],from[1],from[1],from[1],from[1]}
636
- * Currently, this function is only used in matrix products.
637
- * For packet-size smaller or equal to 4, this function is equivalent to pload1
638
- */
639
- template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
640
- ploadquad(const typename unpacket_traits<Packet>::type* from)
641
- { return pload1<Packet>(from); }
834
+ * For instance, for a packet of 8 elements, 2 scalars will be read from \a *from and
835
+ * replicated to form: {from[0],from[0],from[0],from[0],from[1],from[1],from[1],from[1]}
836
+ * Currently, this function is only used in matrix products.
837
+ * For packet-size smaller or equal to 4, this function is equivalent to pload1
838
+ */
839
+ template <typename Packet>
840
+ EIGEN_DEVICE_FUNC inline Packet ploadquad(const typename unpacket_traits<Packet>::type* from) {
841
+ return pload1<Packet>(from);
842
+ }
642
843
 
643
844
  /** \internal equivalent to
644
- * \code
645
- * a0 = pload1(a+0);
646
- * a1 = pload1(a+1);
647
- * a2 = pload1(a+2);
648
- * a3 = pload1(a+3);
649
- * \endcode
650
- * \sa pset1, pload1, ploaddup, pbroadcast2
651
- */
652
- template<typename Packet> EIGEN_DEVICE_FUNC
653
- inline void pbroadcast4(const typename unpacket_traits<Packet>::type *a,
654
- Packet& a0, Packet& a1, Packet& a2, Packet& a3)
655
- {
656
- a0 = pload1<Packet>(a+0);
657
- a1 = pload1<Packet>(a+1);
658
- a2 = pload1<Packet>(a+2);
659
- a3 = pload1<Packet>(a+3);
845
+ * \code
846
+ * a0 = pload1(a+0);
847
+ * a1 = pload1(a+1);
848
+ * a2 = pload1(a+2);
849
+ * a3 = pload1(a+3);
850
+ * \endcode
851
+ * \sa pset1, pload1, ploaddup, pbroadcast2
852
+ */
853
+ template <typename Packet>
854
+ EIGEN_DEVICE_FUNC inline void pbroadcast4(const typename unpacket_traits<Packet>::type* a, Packet& a0, Packet& a1,
855
+ Packet& a2, Packet& a3) {
856
+ a0 = pload1<Packet>(a + 0);
857
+ a1 = pload1<Packet>(a + 1);
858
+ a2 = pload1<Packet>(a + 2);
859
+ a3 = pload1<Packet>(a + 3);
660
860
  }
661
861
 
662
862
  /** \internal equivalent to
663
- * \code
664
- * a0 = pload1(a+0);
665
- * a1 = pload1(a+1);
666
- * \endcode
667
- * \sa pset1, pload1, ploaddup, pbroadcast4
668
- */
669
- template<typename Packet> EIGEN_DEVICE_FUNC
670
- inline void pbroadcast2(const typename unpacket_traits<Packet>::type *a,
671
- Packet& a0, Packet& a1)
672
- {
673
- a0 = pload1<Packet>(a+0);
674
- a1 = pload1<Packet>(a+1);
863
+ * \code
864
+ * a0 = pload1(a+0);
865
+ * a1 = pload1(a+1);
866
+ * \endcode
867
+ * \sa pset1, pload1, ploaddup, pbroadcast4
868
+ */
869
+ template <typename Packet>
870
+ EIGEN_DEVICE_FUNC inline void pbroadcast2(const typename unpacket_traits<Packet>::type* a, Packet& a0, Packet& a1) {
871
+ a0 = pload1<Packet>(a + 0);
872
+ a1 = pload1<Packet>(a + 1);
675
873
  }
676
874
 
677
875
  /** \internal \brief Returns a packet with coefficients (a,a+1,...,a+packet_size-1). */
678
- template<typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet
679
- plset(const typename unpacket_traits<Packet>::type& a) { return a; }
876
+ template <typename Packet>
877
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet plset(const typename unpacket_traits<Packet>::type& a) {
878
+ return a;
879
+ }
880
+
881
+ template <typename Packet, typename EnableIf = void>
882
+ struct peven_mask_impl {
883
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run(const Packet&) {
884
+ typedef typename unpacket_traits<Packet>::type Scalar;
885
+ const size_t n = unpacket_traits<Packet>::size;
886
+ EIGEN_ALIGN_TO_BOUNDARY(sizeof(Packet)) Scalar elements[n];
887
+ for (size_t i = 0; i < n; ++i) {
888
+ memset(elements + i, ((i & 1) == 0 ? 0xff : 0), sizeof(Scalar));
889
+ }
890
+ return ploadu<Packet>(elements);
891
+ }
892
+ };
893
+
894
+ template <typename Scalar>
895
+ struct peven_mask_impl<Scalar, std::enable_if_t<is_scalar<Scalar>::value>> {
896
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar run(const Scalar&) { return Scalar(1); }
897
+ };
680
898
 
681
899
  /** \internal \returns a packet with constant coefficients \a a, e.g.: (x, 0, x, 0),
682
900
  where x is the value of all 1-bits. */
683
- template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
684
- peven_mask(const Packet& /*a*/) {
685
- typedef typename unpacket_traits<Packet>::type Scalar;
686
- const size_t n = unpacket_traits<Packet>::size;
687
- EIGEN_ALIGN_TO_BOUNDARY(sizeof(Packet)) Scalar elements[n];
688
- for(size_t i = 0; i < n; ++i) {
689
- memset(elements+i, ((i & 1) == 0 ? 0xff : 0), sizeof(Scalar));
690
- }
691
- return ploadu<Packet>(elements);
901
+ template <typename Packet>
902
+ EIGEN_DEVICE_FUNC inline Packet peven_mask(const Packet& a) {
903
+ return peven_mask_impl<Packet>::run(a);
692
904
  }
693
905
 
906
+ /** \internal copy the packet \a from to \a *to, \a to must be properly aligned */
907
+ template <typename Scalar, typename Packet>
908
+ EIGEN_DEVICE_FUNC inline void pstore(Scalar* to, const Packet& from) {
909
+ (*to) = from;
910
+ }
694
911
 
695
- /** \internal copy the packet \a from to \a *to, \a to must be 16 bytes aligned */
696
- template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pstore(Scalar* to, const Packet& from)
697
- { (*to) = from; }
912
+ /** \internal copy n elements of the packet \a from to \a *to, \a to must be properly aligned
913
+ * offset indicates the starting element in which to store and
914
+ * offset + n <= unpacket_traits::size */
915
+ template <typename Scalar, typename Packet>
916
+ EIGEN_DEVICE_FUNC inline void pstore_partial(Scalar* to, const Packet& from, const Index n, const Index offset = 0) {
917
+ const Index packet_size = unpacket_traits<Packet>::size;
918
+ eigen_assert(n + offset <= packet_size && "number of elements plus offset will write past end of packet");
919
+ EIGEN_ALIGN_MAX Scalar elements[packet_size];
920
+ pstore<Scalar>(elements, from);
921
+ for (Index i = 0; i < numext::mini(n, packet_size - offset); i++) {
922
+ to[i] = elements[i + offset];
923
+ }
924
+ }
698
925
 
699
926
  /** \internal copy the packet \a from to \a *to, (un-aligned store) */
700
- template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pstoreu(Scalar* to, const Packet& from)
701
- { (*to) = from; }
927
+ template <typename Scalar, typename Packet>
928
+ EIGEN_DEVICE_FUNC inline void pstoreu(Scalar* to, const Packet& from) {
929
+ (*to) = from;
930
+ }
931
+
932
+ /** \internal copy n elements of the packet \a from to \a *to, (un-aligned store) */
933
+ template <typename Scalar, typename Packet>
934
+ EIGEN_DEVICE_FUNC inline void pstoreu_partial(Scalar* to, const Packet& from, const Index n, const Index offset = 0) {
935
+ const Index packet_size = unpacket_traits<Packet>::size;
936
+ eigen_assert(n + offset <= packet_size && "number of elements plus offset will write past end of packet");
937
+ EIGEN_ALIGN_MAX Scalar elements[packet_size];
938
+ pstore<Scalar>(elements, from);
939
+ for (Index i = 0; i < numext::mini(n, packet_size - offset); i++) {
940
+ to[i] = elements[i + offset];
941
+ }
942
+ }
702
943
 
703
944
  /** \internal copy the packet \a from to \a *to, (un-aligned store with a mask)
704
945
  * There is no generic implementation. We only have implementations for specialized
705
946
  * cases. Generic case should not be called.
706
947
  */
707
- template<typename Scalar, typename Packet>
708
- EIGEN_DEVICE_FUNC inline
709
- typename enable_if<unpacket_traits<Packet>::masked_store_available, void>::type
710
- pstoreu(Scalar* to, const Packet& from, typename unpacket_traits<Packet>::mask_t umask);
948
+ template <typename Scalar, typename Packet>
949
+ EIGEN_DEVICE_FUNC inline std::enable_if_t<unpacket_traits<Packet>::masked_store_available, void> pstoreu(
950
+ Scalar* to, const Packet& from, typename unpacket_traits<Packet>::mask_t umask);
711
951
 
712
- template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline Packet pgather(const Scalar* from, Index /*stride*/)
713
- { return ploadu<Packet>(from); }
952
+ template <typename Scalar, typename Packet>
953
+ EIGEN_DEVICE_FUNC inline Packet pgather(const Scalar* from, Index /*stride*/) {
954
+ return ploadu<Packet>(from);
955
+ }
956
+
957
+ template <typename Scalar, typename Packet>
958
+ EIGEN_DEVICE_FUNC inline Packet pgather_partial(const Scalar* from, Index stride, const Index n) {
959
+ const Index packet_size = unpacket_traits<Packet>::size;
960
+ EIGEN_ALIGN_MAX Scalar elements[packet_size] = {Scalar(0)};
961
+ for (Index i = 0; i < numext::mini(n, packet_size); i++) {
962
+ elements[i] = from[i * stride];
963
+ }
964
+ return pload<Packet>(elements);
965
+ }
714
966
 
715
- template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pscatter(Scalar* to, const Packet& from, Index /*stride*/)
716
- { pstore(to, from); }
967
+ template <typename Scalar, typename Packet>
968
+ EIGEN_DEVICE_FUNC inline void pscatter(Scalar* to, const Packet& from, Index /*stride*/) {
969
+ pstore(to, from);
970
+ }
971
+
972
+ template <typename Scalar, typename Packet>
973
+ EIGEN_DEVICE_FUNC inline void pscatter_partial(Scalar* to, const Packet& from, Index stride, const Index n) {
974
+ const Index packet_size = unpacket_traits<Packet>::size;
975
+ EIGEN_ALIGN_MAX Scalar elements[packet_size];
976
+ pstore<Scalar>(elements, from);
977
+ for (Index i = 0; i < numext::mini(n, packet_size); i++) {
978
+ to[i * stride] = elements[i];
979
+ }
980
+ }
717
981
 
718
982
  /** \internal tries to do cache prefetching of \a addr */
719
- template<typename Scalar> EIGEN_DEVICE_FUNC inline void prefetch(const Scalar* addr)
720
- {
983
+ template <typename Scalar>
984
+ EIGEN_DEVICE_FUNC inline void prefetch(const Scalar* addr) {
721
985
  #if defined(EIGEN_HIP_DEVICE_COMPILE)
722
986
  // do nothing
723
987
  #elif defined(EIGEN_CUDA_ARCH)
@@ -734,135 +998,246 @@ template<typename Scalar> EIGEN_DEVICE_FUNC inline void prefetch(const Scalar* a
734
998
  }
735
999
 
736
1000
  /** \internal \returns the reversed elements of \a a*/
737
- template<typename Packet> EIGEN_DEVICE_FUNC inline Packet preverse(const Packet& a)
738
- { return a; }
1001
+ template <typename Packet>
1002
+ EIGEN_DEVICE_FUNC inline Packet preverse(const Packet& a) {
1003
+ return a;
1004
+ }
739
1005
 
740
1006
  /** \internal \returns \a a with real and imaginary part flipped (for complex type only) */
741
- template<typename Packet> EIGEN_DEVICE_FUNC inline Packet pcplxflip(const Packet& a)
742
- {
743
- return Packet(numext::imag(a),numext::real(a));
1007
+ template <typename Packet>
1008
+ EIGEN_DEVICE_FUNC inline Packet pcplxflip(const Packet& a) {
1009
+ return Packet(numext::imag(a), numext::real(a));
744
1010
  }
745
1011
 
746
1012
  /**************************
747
- * Special math functions
748
- ***************************/
1013
+ * Special math functions
1014
+ ***************************/
1015
+
1016
+ /** \internal \returns isnan(a) */
1017
+ template <typename Packet>
1018
+ EIGEN_DEVICE_FUNC inline Packet pisnan(const Packet& a) {
1019
+ return pandnot(ptrue(a), pcmp_eq(a, a));
1020
+ }
1021
+
1022
+ /** \internal \returns isinf(a) */
1023
+ template <typename Packet>
1024
+ EIGEN_DEVICE_FUNC inline Packet pisinf(const Packet& a) {
1025
+ using Scalar = typename unpacket_traits<Packet>::type;
1026
+ constexpr Scalar inf = NumTraits<Scalar>::infinity();
1027
+ return pcmp_eq(pabs(a), pset1<Packet>(inf));
1028
+ }
749
1029
 
750
1030
  /** \internal \returns the sine of \a a (coeff-wise) */
751
- template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
752
- Packet psin(const Packet& a) { EIGEN_USING_STD(sin); return sin(a); }
1031
+ template <typename Packet>
1032
+ EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet psin(const Packet& a) {
1033
+ EIGEN_USING_STD(sin);
1034
+ return sin(a);
1035
+ }
753
1036
 
754
1037
  /** \internal \returns the cosine of \a a (coeff-wise) */
755
- template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
756
- Packet pcos(const Packet& a) { EIGEN_USING_STD(cos); return cos(a); }
1038
+ template <typename Packet>
1039
+ EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pcos(const Packet& a) {
1040
+ EIGEN_USING_STD(cos);
1041
+ return cos(a);
1042
+ }
757
1043
 
758
1044
  /** \internal \returns the tan of \a a (coeff-wise) */
759
- template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
760
- Packet ptan(const Packet& a) { EIGEN_USING_STD(tan); return tan(a); }
1045
+ template <typename Packet>
1046
+ EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet ptan(const Packet& a) {
1047
+ EIGEN_USING_STD(tan);
1048
+ return tan(a);
1049
+ }
761
1050
 
762
1051
  /** \internal \returns the arc sine of \a a (coeff-wise) */
763
- template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
764
- Packet pasin(const Packet& a) { EIGEN_USING_STD(asin); return asin(a); }
1052
+ template <typename Packet>
1053
+ EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pasin(const Packet& a) {
1054
+ EIGEN_USING_STD(asin);
1055
+ return asin(a);
1056
+ }
765
1057
 
766
1058
  /** \internal \returns the arc cosine of \a a (coeff-wise) */
767
- template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
768
- Packet pacos(const Packet& a) { EIGEN_USING_STD(acos); return acos(a); }
769
-
770
- /** \internal \returns the arc tangent of \a a (coeff-wise) */
771
- template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
772
- Packet patan(const Packet& a) { EIGEN_USING_STD(atan); return atan(a); }
1059
+ template <typename Packet>
1060
+ EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pacos(const Packet& a) {
1061
+ EIGEN_USING_STD(acos);
1062
+ return acos(a);
1063
+ }
773
1064
 
774
1065
  /** \internal \returns the hyperbolic sine of \a a (coeff-wise) */
775
- template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
776
- Packet psinh(const Packet& a) { EIGEN_USING_STD(sinh); return sinh(a); }
1066
+ template <typename Packet>
1067
+ EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet psinh(const Packet& a) {
1068
+ EIGEN_USING_STD(sinh);
1069
+ return sinh(a);
1070
+ }
777
1071
 
778
1072
  /** \internal \returns the hyperbolic cosine of \a a (coeff-wise) */
779
- template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
780
- Packet pcosh(const Packet& a) { EIGEN_USING_STD(cosh); return cosh(a); }
1073
+ template <typename Packet>
1074
+ EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pcosh(const Packet& a) {
1075
+ EIGEN_USING_STD(cosh);
1076
+ return cosh(a);
1077
+ }
1078
+
1079
+ /** \internal \returns the arc tangent of \a a (coeff-wise) */
1080
+ template <typename Packet>
1081
+ EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet patan(const Packet& a) {
1082
+ EIGEN_USING_STD(atan);
1083
+ return atan(a);
1084
+ }
781
1085
 
782
1086
  /** \internal \returns the hyperbolic tan of \a a (coeff-wise) */
783
- template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
784
- Packet ptanh(const Packet& a) { EIGEN_USING_STD(tanh); return tanh(a); }
1087
+ template <typename Packet>
1088
+ EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet ptanh(const Packet& a) {
1089
+ EIGEN_USING_STD(tanh);
1090
+ return tanh(a);
1091
+ }
1092
+
1093
+ /** \internal \returns the arc tangent of \a a (coeff-wise) */
1094
+ template <typename Packet>
1095
+ EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet patanh(const Packet& a) {
1096
+ EIGEN_USING_STD(atanh);
1097
+ return atanh(a);
1098
+ }
785
1099
 
786
1100
  /** \internal \returns the exp of \a a (coeff-wise) */
787
- template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
788
- Packet pexp(const Packet& a) { EIGEN_USING_STD(exp); return exp(a); }
1101
+ template <typename Packet>
1102
+ EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pexp(const Packet& a) {
1103
+ return numext::exp(a);
1104
+ }
1105
+
1106
+ /** \internal \returns the exp2 of \a a (coeff-wise) */
1107
+ template <typename Packet>
1108
+ EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pexp2(const Packet& a) {
1109
+ return numext::exp2(a);
1110
+ }
789
1111
 
790
1112
  /** \internal \returns the expm1 of \a a (coeff-wise) */
791
- template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
792
- Packet pexpm1(const Packet& a) { return numext::expm1(a); }
1113
+ template <typename Packet>
1114
+ EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pexpm1(const Packet& a) {
1115
+ return numext::expm1(a);
1116
+ }
793
1117
 
794
1118
  /** \internal \returns the log of \a a (coeff-wise) */
795
- template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
796
- Packet plog(const Packet& a) { EIGEN_USING_STD(log); return log(a); }
1119
+ template <typename Packet>
1120
+ EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plog(const Packet& a) {
1121
+ EIGEN_USING_STD(log);
1122
+ return log(a);
1123
+ }
797
1124
 
798
1125
  /** \internal \returns the log1p of \a a (coeff-wise) */
799
- template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
800
- Packet plog1p(const Packet& a) { return numext::log1p(a); }
1126
+ template <typename Packet>
1127
+ EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plog1p(const Packet& a) {
1128
+ return numext::log1p(a);
1129
+ }
801
1130
 
802
1131
  /** \internal \returns the log10 of \a a (coeff-wise) */
803
- template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
804
- Packet plog10(const Packet& a) { EIGEN_USING_STD(log10); return log10(a); }
1132
+ template <typename Packet>
1133
+ EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plog10(const Packet& a) {
1134
+ EIGEN_USING_STD(log10);
1135
+ return log10(a);
1136
+ }
805
1137
 
806
- /** \internal \returns the log10 of \a a (coeff-wise) */
807
- template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
808
- Packet plog2(const Packet& a) {
809
- typedef typename internal::unpacket_traits<Packet>::type Scalar;
810
- return pmul(pset1<Packet>(Scalar(EIGEN_LOG2E)), plog(a));
1138
+ /** \internal \returns the log2 of \a a (coeff-wise) */
1139
+ template <typename Packet>
1140
+ EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plog2(const Packet& a) {
1141
+ using Scalar = typename internal::unpacket_traits<Packet>::type;
1142
+ using RealScalar = typename NumTraits<Scalar>::Real;
1143
+ return pmul(pset1<Packet>(Scalar(RealScalar(EIGEN_LOG2E))), plog(a));
811
1144
  }
812
1145
 
813
1146
  /** \internal \returns the square-root of \a a (coeff-wise) */
814
- template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
815
- Packet psqrt(const Packet& a) { return numext::sqrt(a); }
1147
+ template <typename Packet>
1148
+ EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet psqrt(const Packet& a) {
1149
+ return numext::sqrt(a);
1150
+ }
816
1151
 
817
- /** \internal \returns the reciprocal square-root of \a a (coeff-wise) */
818
- template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
819
- Packet prsqrt(const Packet& a) {
820
- typedef typename internal::unpacket_traits<Packet>::type Scalar;
821
- return pdiv(pset1<Packet>(Scalar(1)), psqrt(a));
1152
+ /** \internal \returns the cube-root of \a a (coeff-wise) */
1153
+ template <typename Packet>
1154
+ EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pcbrt(const Packet& a) {
1155
+ return numext::cbrt(a);
822
1156
  }
823
1157
 
1158
+ template <typename Packet, bool IsScalar = is_scalar<Packet>::value,
1159
+ bool IsInteger = NumTraits<typename unpacket_traits<Packet>::type>::IsInteger>
1160
+ struct nearest_integer_packetop_impl {
1161
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run_floor(const Packet& x) { return numext::floor(x); }
1162
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run_ceil(const Packet& x) { return numext::ceil(x); }
1163
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run_rint(const Packet& x) { return numext::rint(x); }
1164
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run_round(const Packet& x) { return numext::round(x); }
1165
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run_trunc(const Packet& x) { return numext::trunc(x); }
1166
+ };
1167
+
824
1168
  /** \internal \returns the rounded value of \a a (coeff-wise) */
825
- template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
826
- Packet pround(const Packet& a) { using numext::round; return round(a); }
1169
+ template <typename Packet>
1170
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet pround(const Packet& a) {
1171
+ return nearest_integer_packetop_impl<Packet>::run_round(a);
1172
+ }
827
1173
 
828
1174
  /** \internal \returns the floor of \a a (coeff-wise) */
829
- template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
830
- Packet pfloor(const Packet& a) { using numext::floor; return floor(a); }
1175
+ template <typename Packet>
1176
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet pfloor(const Packet& a) {
1177
+ return nearest_integer_packetop_impl<Packet>::run_floor(a);
1178
+ }
831
1179
 
832
1180
  /** \internal \returns the rounded value of \a a (coeff-wise) with current
833
1181
  * rounding mode */
834
- template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
835
- Packet print(const Packet& a) { using numext::rint; return rint(a); }
1182
+ template <typename Packet>
1183
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet print(const Packet& a) {
1184
+ return nearest_integer_packetop_impl<Packet>::run_rint(a);
1185
+ }
836
1186
 
837
1187
  /** \internal \returns the ceil of \a a (coeff-wise) */
838
- template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
839
- Packet pceil(const Packet& a) { using numext::ceil; return ceil(a); }
1188
+ template <typename Packet>
1189
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet pceil(const Packet& a) {
1190
+ return nearest_integer_packetop_impl<Packet>::run_ceil(a);
1191
+ }
1192
+
1193
+ /** \internal \returns the truncation of \a a (coeff-wise) */
1194
+ template <typename Packet>
1195
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet ptrunc(const Packet& a) {
1196
+ return nearest_integer_packetop_impl<Packet>::run_trunc(a);
1197
+ }
1198
+
1199
+ template <typename Packet, typename EnableIf = void>
1200
+ struct psign_impl {
1201
+ static EIGEN_DEVICE_FUNC inline Packet run(const Packet& a) { return numext::sign(a); }
1202
+ };
1203
+
1204
+ /** \internal \returns the sign of \a a (coeff-wise) */
1205
+ template <typename Packet>
1206
+ EIGEN_DEVICE_FUNC inline Packet psign(const Packet& a) {
1207
+ return psign_impl<Packet>::run(a);
1208
+ }
1209
+
1210
+ template <>
1211
+ EIGEN_DEVICE_FUNC inline bool psign(const bool& a) {
1212
+ return a;
1213
+ }
840
1214
 
841
1215
  /** \internal \returns the first element of a packet */
842
- template<typename Packet>
843
- EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type
844
- pfirst(const Packet& a)
845
- { return a; }
1216
+ template <typename Packet>
1217
+ EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type pfirst(const Packet& a) {
1218
+ return a;
1219
+ }
846
1220
 
847
1221
  /** \internal \returns the sum of the elements of upper and lower half of \a a if \a a is larger than 4.
848
- * For a packet {a0, a1, a2, a3, a4, a5, a6, a7}, it returns a half packet {a0+a4, a1+a5, a2+a6, a3+a7}
849
- * For packet-size smaller or equal to 4, this boils down to a noop.
850
- */
851
- template<typename Packet>
852
- EIGEN_DEVICE_FUNC inline typename conditional<(unpacket_traits<Packet>::size%8)==0,typename unpacket_traits<Packet>::half,Packet>::type
853
- predux_half_dowto4(const Packet& a)
854
- { return a; }
1222
+ * For a packet {a0, a1, a2, a3, a4, a5, a6, a7}, it returns a half packet {a0+a4, a1+a5, a2+a6, a3+a7}
1223
+ * For packet-size smaller or equal to 4, this boils down to a noop.
1224
+ */
1225
+ template <typename Packet>
1226
+ EIGEN_DEVICE_FUNC inline std::conditional_t<(unpacket_traits<Packet>::size % 8) == 0,
1227
+ typename unpacket_traits<Packet>::half, Packet>
1228
+ predux_half_dowto4(const Packet& a) {
1229
+ return a;
1230
+ }
855
1231
 
856
1232
  // Slow generic implementation of Packet reduction.
857
1233
  template <typename Packet, typename Op>
858
- EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type
859
- predux_helper(const Packet& a, Op op) {
1234
+ EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_helper(const Packet& a, Op op) {
860
1235
  typedef typename unpacket_traits<Packet>::type Scalar;
861
1236
  const size_t n = unpacket_traits<Packet>::size;
862
1237
  EIGEN_ALIGN_TO_BOUNDARY(sizeof(Packet)) Scalar elements[n];
863
1238
  pstoreu<Scalar>(elements, a);
864
- for(size_t k = n / 2; k > 0; k /= 2) {
865
- for(size_t i = 0; i < k; ++i) {
1239
+ for (size_t k = n / 2; k > 0; k /= 2) {
1240
+ for (size_t i = 0; i < k; ++i) {
866
1241
  elements[i] = op(elements[i], elements[i + k]);
867
1242
  }
868
1243
  }
@@ -870,65 +1245,78 @@ predux_helper(const Packet& a, Op op) {
870
1245
  }
871
1246
 
872
1247
  /** \internal \returns the sum of the elements of \a a*/
873
- template<typename Packet>
874
- EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type
875
- predux(const Packet& a)
876
- {
1248
+ template <typename Packet>
1249
+ EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux(const Packet& a) {
877
1250
  return a;
878
1251
  }
879
1252
 
880
1253
  /** \internal \returns the product of the elements of \a a */
881
1254
  template <typename Packet>
882
- EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_mul(
883
- const Packet& a) {
884
- typedef typename unpacket_traits<Packet>::type Scalar;
1255
+ EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_mul(const Packet& a) {
1256
+ typedef typename unpacket_traits<Packet>::type Scalar;
885
1257
  return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmul<Scalar>)));
886
1258
  }
887
1259
 
888
1260
  /** \internal \returns the min of the elements of \a a */
889
1261
  template <typename Packet>
890
- EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_min(
891
- const Packet &a) {
892
- typedef typename unpacket_traits<Packet>::type Scalar;
893
- return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmin<PropagateFast, Scalar>)));
1262
+ EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_min(const Packet& a) {
1263
+ typedef typename unpacket_traits<Packet>::type Scalar;
1264
+ return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmin<Scalar>)));
894
1265
  }
895
1266
 
896
- template <int NaNPropagation, typename Packet>
897
- EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_min(
898
- const Packet& a) {
899
- typedef typename unpacket_traits<Packet>::type Scalar;
900
- return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmin<NaNPropagation, Scalar>)));
1267
+ /** \internal \returns the max of the elements of \a a */
1268
+ template <typename Packet>
1269
+ EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_max(const Packet& a) {
1270
+ typedef typename unpacket_traits<Packet>::type Scalar;
1271
+ return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmax<Scalar>)));
901
1272
  }
902
1273
 
903
- /** \internal \returns the min of the elements of \a a */
904
- template <typename Packet>
905
- EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_max(
906
- const Packet &a) {
907
- typedef typename unpacket_traits<Packet>::type Scalar;
908
- return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmax<PropagateFast, Scalar>)));
1274
+ template <int NaNPropagation, typename Packet>
1275
+ struct predux_min_max_helper_impl {
1276
+ using Scalar = typename unpacket_traits<Packet>::type;
1277
+ static constexpr bool UsePredux_ = NaNPropagation == PropagateFast || NumTraits<Scalar>::IsInteger;
1278
+ template <bool UsePredux = UsePredux_, std::enable_if_t<!UsePredux, bool> = true>
1279
+ static EIGEN_DEVICE_FUNC inline Scalar run_min(const Packet& a) {
1280
+ return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmin<NaNPropagation, Scalar>)));
1281
+ }
1282
+ template <bool UsePredux = UsePredux_, std::enable_if_t<!UsePredux, bool> = true>
1283
+ static EIGEN_DEVICE_FUNC inline Scalar run_max(const Packet& a) {
1284
+ return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmax<NaNPropagation, Scalar>)));
1285
+ }
1286
+ template <bool UsePredux = UsePredux_, std::enable_if_t<UsePredux, bool> = true>
1287
+ static EIGEN_DEVICE_FUNC inline Scalar run_min(const Packet& a) {
1288
+ return predux_min(a);
1289
+ }
1290
+ template <bool UsePredux = UsePredux_, std::enable_if_t<UsePredux, bool> = true>
1291
+ static EIGEN_DEVICE_FUNC inline Scalar run_max(const Packet& a) {
1292
+ return predux_max(a);
1293
+ }
1294
+ };
1295
+
1296
+ template <int NaNPropagation, typename Packet>
1297
+ EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_min(const Packet& a) {
1298
+ return predux_min_max_helper_impl<NaNPropagation, Packet>::run_min(a);
909
1299
  }
910
1300
 
911
1301
  template <int NaNPropagation, typename Packet>
912
- EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_max(
913
- const Packet& a) {
914
- typedef typename unpacket_traits<Packet>::type Scalar;
915
- return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmax<NaNPropagation, Scalar>)));
1302
+ EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_max(const Packet& a) {
1303
+ return predux_min_max_helper_impl<NaNPropagation, Packet>::run_max(a);
916
1304
  }
917
1305
 
918
1306
  #undef EIGEN_BINARY_OP_NAN_PROPAGATION
919
1307
 
920
1308
  /** \internal \returns true if all coeffs of \a a means "true"
921
- * It is supposed to be called on values returned by pcmp_*.
922
- */
1309
+ * It is supposed to be called on values returned by pcmp_*.
1310
+ */
923
1311
  // not needed yet
924
1312
  // template<typename Packet> EIGEN_DEVICE_FUNC inline bool predux_all(const Packet& a)
925
1313
  // { return bool(a); }
926
1314
 
927
1315
  /** \internal \returns true if any coeffs of \a a means "true"
928
- * It is supposed to be called on values returned by pcmp_*.
929
- */
930
- template<typename Packet> EIGEN_DEVICE_FUNC inline bool predux_any(const Packet& a)
931
- {
1316
+ * It is supposed to be called on values returned by pcmp_*.
1317
+ */
1318
+ template <typename Packet>
1319
+ EIGEN_DEVICE_FUNC inline bool predux_any(const Packet& a) {
932
1320
  // Dirty but generic implementation where "true" is assumed to be non 0 and all the sames.
933
1321
  // It is expected that "true" is either:
934
1322
  // - Scalar(1)
@@ -940,101 +1328,375 @@ template<typename Packet> EIGEN_DEVICE_FUNC inline bool predux_any(const Packet&
940
1328
  }
941
1329
 
942
1330
  /***************************************************************************
943
- * The following functions might not have to be overwritten for vectorized types
944
- ***************************************************************************/
945
-
946
- /** \internal copy a packet with constant coefficient \a a (e.g., [a,a,a,a]) to \a *to. \a to must be 16 bytes aligned */
947
- // NOTE: this function must really be templated on the packet type (think about different packet types for the same scalar type)
948
- template<typename Packet>
949
- inline void pstore1(typename unpacket_traits<Packet>::type* to, const typename unpacket_traits<Packet>::type& a)
950
- {
951
- pstore(to, pset1<Packet>(a));
952
- }
1331
+ * The following functions might not have to be overwritten for vectorized types
1332
+ ***************************************************************************/
953
1333
 
1334
+ template <typename Packet, typename EnableIf = void>
1335
+ struct pmadd_impl {
1336
+ static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet pmadd(const Packet& a, const Packet& b, const Packet& c) {
1337
+ return padd(pmul(a, b), c);
1338
+ }
1339
+ static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet pmsub(const Packet& a, const Packet& b, const Packet& c) {
1340
+ return psub(pmul(a, b), c);
1341
+ }
1342
+ static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet pnmadd(const Packet& a, const Packet& b, const Packet& c) {
1343
+ return psub(c, pmul(a, b));
1344
+ }
1345
+ static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet pnmsub(const Packet& a, const Packet& b, const Packet& c) {
1346
+ return pnegate(pmadd(a, b, c));
1347
+ }
1348
+ };
1349
+
1350
+ template <typename Scalar>
1351
+ struct pmadd_impl<Scalar, std::enable_if_t<is_scalar<Scalar>::value && NumTraits<Scalar>::IsSigned>> {
1352
+ static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar pmadd(const Scalar& a, const Scalar& b, const Scalar& c) {
1353
+ return numext::madd<Scalar>(a, b, c);
1354
+ }
1355
+ static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar pmsub(const Scalar& a, const Scalar& b, const Scalar& c) {
1356
+ return numext::madd<Scalar>(a, b, Scalar(-c));
1357
+ }
1358
+ static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar pnmadd(const Scalar& a, const Scalar& b, const Scalar& c) {
1359
+ return numext::madd<Scalar>(Scalar(-a), b, c);
1360
+ }
1361
+ static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar pnmsub(const Scalar& a, const Scalar& b, const Scalar& c) {
1362
+ return -Scalar(numext::madd<Scalar>(a, b, c));
1363
+ }
1364
+ };
1365
+
1366
+ // Multiply-add instructions.
954
1367
  /** \internal \returns a * b + c (coeff-wise) */
955
- template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
956
- pmadd(const Packet& a,
957
- const Packet& b,
958
- const Packet& c)
959
- { return padd(pmul(a, b),c); }
1368
+ template <typename Packet>
1369
+ EIGEN_DEVICE_FUNC inline Packet pmadd(const Packet& a, const Packet& b, const Packet& c) {
1370
+ return pmadd_impl<Packet>::pmadd(a, b, c);
1371
+ }
1372
+
1373
+ /** \internal \returns a * b - c (coeff-wise) */
1374
+ template <typename Packet>
1375
+ EIGEN_DEVICE_FUNC inline Packet pmsub(const Packet& a, const Packet& b, const Packet& c) {
1376
+ return pmadd_impl<Packet>::pmsub(a, b, c);
1377
+ }
1378
+
1379
+ /** \internal \returns -(a * b) + c (coeff-wise) */
1380
+ template <typename Packet>
1381
+ EIGEN_DEVICE_FUNC inline Packet pnmadd(const Packet& a, const Packet& b, const Packet& c) {
1382
+ return pmadd_impl<Packet>::pnmadd(a, b, c);
1383
+ }
1384
+
1385
+ /** \internal \returns -((a * b + c) (coeff-wise) */
1386
+ template <typename Packet>
1387
+ EIGEN_DEVICE_FUNC inline Packet pnmsub(const Packet& a, const Packet& b, const Packet& c) {
1388
+ return pmadd_impl<Packet>::pnmsub(a, b, c);
1389
+ }
1390
+
1391
+ /** \internal copy a packet with constant coefficient \a a (e.g., [a,a,a,a]) to \a *to. \a to must be 16 bytes aligned
1392
+ */
1393
+ // NOTE: this function must really be templated on the packet type (think about different packet types for the same
1394
+ // scalar type)
1395
+ template <typename Packet>
1396
+ inline void pstore1(typename unpacket_traits<Packet>::type* to, const typename unpacket_traits<Packet>::type& a) {
1397
+ pstore(to, pset1<Packet>(a));
1398
+ }
960
1399
 
961
1400
  /** \internal \returns a packet version of \a *from.
962
- * The pointer \a from must be aligned on a \a Alignment bytes boundary. */
963
- template<typename Packet, int Alignment>
964
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet ploadt(const typename unpacket_traits<Packet>::type* from)
965
- {
966
- if(Alignment >= unpacket_traits<Packet>::alignment)
1401
+ * The pointer \a from must be aligned on a \a Alignment bytes boundary. */
1402
+ template <typename Packet, int Alignment>
1403
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet ploadt(const typename unpacket_traits<Packet>::type* from) {
1404
+ if (Alignment >= unpacket_traits<Packet>::alignment)
967
1405
  return pload<Packet>(from);
968
1406
  else
969
1407
  return ploadu<Packet>(from);
970
1408
  }
971
1409
 
1410
+ /** \internal \returns n elements of a packet version of \a *from.
1411
+ * The pointer \a from must be aligned on a \a Alignment bytes boundary. */
1412
+ template <typename Packet, int Alignment>
1413
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet ploadt_partial(const typename unpacket_traits<Packet>::type* from,
1414
+ const Index n, const Index offset = 0) {
1415
+ if (Alignment >= unpacket_traits<Packet>::alignment)
1416
+ return pload_partial<Packet>(from, n, offset);
1417
+ else
1418
+ return ploadu_partial<Packet>(from, n, offset);
1419
+ }
1420
+
972
1421
  /** \internal copy the packet \a from to \a *to.
973
- * The pointer \a from must be aligned on a \a Alignment bytes boundary. */
974
- template<typename Scalar, typename Packet, int Alignment>
975
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstoret(Scalar* to, const Packet& from)
976
- {
977
- if(Alignment >= unpacket_traits<Packet>::alignment)
1422
+ * The pointer \a from must be aligned on a \a Alignment bytes boundary. */
1423
+ template <typename Scalar, typename Packet, int Alignment>
1424
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstoret(Scalar* to, const Packet& from) {
1425
+ if (Alignment >= unpacket_traits<Packet>::alignment)
978
1426
  pstore(to, from);
979
1427
  else
980
1428
  pstoreu(to, from);
981
1429
  }
982
1430
 
1431
+ /** \internal copy n elements of the packet \a from to \a *to.
1432
+ * The pointer \a from must be aligned on a \a Alignment bytes boundary. */
1433
+ template <typename Scalar, typename Packet, int Alignment>
1434
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstoret_partial(Scalar* to, const Packet& from, const Index n,
1435
+ const Index offset = 0) {
1436
+ if (Alignment >= unpacket_traits<Packet>::alignment)
1437
+ pstore_partial(to, from, n, offset);
1438
+ else
1439
+ pstoreu_partial(to, from, n, offset);
1440
+ }
1441
+
983
1442
  /** \internal \returns a packet version of \a *from.
984
- * Unlike ploadt, ploadt_ro takes advantage of the read-only memory path on the
985
- * hardware if available to speedup the loading of data that won't be modified
986
- * by the current computation.
987
- */
988
- template<typename Packet, int LoadMode>
989
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet ploadt_ro(const typename unpacket_traits<Packet>::type* from)
990
- {
1443
+ * Unlike ploadt, ploadt_ro takes advantage of the read-only memory path on the
1444
+ * hardware if available to speedup the loading of data that won't be modified
1445
+ * by the current computation.
1446
+ */
1447
+ template <typename Packet, int LoadMode>
1448
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet ploadt_ro(const typename unpacket_traits<Packet>::type* from) {
991
1449
  return ploadt<Packet, LoadMode>(from);
992
1450
  }
993
1451
 
994
1452
  /***************************************************************************
995
- * Fast complex products (GCC generates a function call which is very slow)
996
- ***************************************************************************/
1453
+ * Fast complex products (GCC generates a function call which is very slow)
1454
+ ***************************************************************************/
997
1455
 
998
1456
  // Eigen+CUDA does not support complexes.
999
1457
  #if !defined(EIGEN_GPUCC)
1000
1458
 
1001
- template<> inline std::complex<float> pmul(const std::complex<float>& a, const std::complex<float>& b)
1002
- { return std::complex<float>(a.real()*b.real() - a.imag()*b.imag(), a.imag()*b.real() + a.real()*b.imag()); }
1459
+ template <>
1460
+ inline std::complex<float> pmul(const std::complex<float>& a, const std::complex<float>& b) {
1461
+ return std::complex<float>(a.real() * b.real() - a.imag() * b.imag(), a.imag() * b.real() + a.real() * b.imag());
1462
+ }
1003
1463
 
1004
- template<> inline std::complex<double> pmul(const std::complex<double>& a, const std::complex<double>& b)
1005
- { return std::complex<double>(a.real()*b.real() - a.imag()*b.imag(), a.imag()*b.real() + a.real()*b.imag()); }
1464
+ template <>
1465
+ inline std::complex<double> pmul(const std::complex<double>& a, const std::complex<double>& b) {
1466
+ return std::complex<double>(a.real() * b.real() - a.imag() * b.imag(), a.imag() * b.real() + a.real() * b.imag());
1467
+ }
1006
1468
 
1007
1469
  #endif
1008
1470
 
1009
-
1010
1471
  /***************************************************************************
1011
1472
  * PacketBlock, that is a collection of N packets where the number of words
1012
1473
  * in the packet is a multiple of N.
1013
- ***************************************************************************/
1014
- template <typename Packet,int N=unpacket_traits<Packet>::size> struct PacketBlock {
1474
+ ***************************************************************************/
1475
+ template <typename Packet, int N = unpacket_traits<Packet>::size>
1476
+ struct PacketBlock {
1015
1477
  Packet packet[N];
1016
1478
  };
1017
1479
 
1018
- template<typename Packet> EIGEN_DEVICE_FUNC inline void
1019
- ptranspose(PacketBlock<Packet,1>& /*kernel*/) {
1480
+ template <typename Packet>
1481
+ EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet, 1>& /*kernel*/) {
1020
1482
  // Nothing to do in the scalar case, i.e. a 1x1 matrix.
1021
1483
  }
1022
1484
 
1023
1485
  /***************************************************************************
1024
1486
  * Selector, i.e. vector of N boolean values used to select (i.e. blend)
1025
1487
  * words from 2 packets.
1026
- ***************************************************************************/
1027
- template <size_t N> struct Selector {
1488
+ ***************************************************************************/
1489
+ template <size_t N>
1490
+ struct Selector {
1028
1491
  bool select[N];
1029
1492
  };
1030
1493
 
1031
- template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
1032
- pblend(const Selector<unpacket_traits<Packet>::size>& ifPacket, const Packet& thenPacket, const Packet& elsePacket) {
1494
+ template <typename Packet>
1495
+ EIGEN_DEVICE_FUNC inline Packet pblend(const Selector<unpacket_traits<Packet>::size>& ifPacket,
1496
+ const Packet& thenPacket, const Packet& elsePacket) {
1033
1497
  return ifPacket.select[0] ? thenPacket : elsePacket;
1034
1498
  }
1035
1499
 
1036
- } // end namespace internal
1500
+ /** \internal \returns 1 / a (coeff-wise) */
1501
+ template <typename Packet>
1502
+ EIGEN_DEVICE_FUNC inline Packet preciprocal(const Packet& a) {
1503
+ using Scalar = typename unpacket_traits<Packet>::type;
1504
+ return pdiv(pset1<Packet>(Scalar(1)), a);
1505
+ }
1506
+
1507
+ /** \internal \returns the reciprocal square-root of \a a (coeff-wise) */
1508
+ template <typename Packet>
1509
+ EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet prsqrt(const Packet& a) {
1510
+ return preciprocal<Packet>(psqrt(a));
1511
+ }
1512
+
1513
+ template <typename Packet, bool IsScalar = is_scalar<Packet>::value,
1514
+ bool IsInteger = NumTraits<typename unpacket_traits<Packet>::type>::IsInteger>
1515
+ struct psignbit_impl;
1516
+ template <typename Packet, bool IsInteger>
1517
+ struct psignbit_impl<Packet, true, IsInteger> {
1518
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static constexpr Packet run(const Packet& a) { return numext::signbit(a); }
1519
+ };
1520
+ template <typename Packet>
1521
+ struct psignbit_impl<Packet, false, false> {
1522
+ // generic implementation if not specialized in PacketMath.h
1523
+ // slower than arithmetic shift
1524
+ typedef typename unpacket_traits<Packet>::type Scalar;
1525
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static Packet run(const Packet& a) {
1526
+ const Packet cst_pos_one = pset1<Packet>(Scalar(1));
1527
+ const Packet cst_neg_one = pset1<Packet>(Scalar(-1));
1528
+ return pcmp_eq(por(pand(a, cst_neg_one), cst_pos_one), cst_neg_one);
1529
+ }
1530
+ };
1531
+ template <typename Packet>
1532
+ struct psignbit_impl<Packet, false, true> {
1533
+ // generic implementation for integer packets
1534
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static constexpr Packet run(const Packet& a) { return pcmp_lt(a, pzero(a)); }
1535
+ };
1536
+ /** \internal \returns the sign bit of \a a as a bitmask*/
1537
+ template <typename Packet>
1538
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE constexpr Packet psignbit(const Packet& a) {
1539
+ return psignbit_impl<Packet>::run(a);
1540
+ }
1541
+
1542
+ /** \internal \returns the 2-argument arc tangent of \a y and \a x (coeff-wise) */
1543
+ template <typename Packet, std::enable_if_t<is_scalar<Packet>::value, int> = 0>
1544
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet patan2(const Packet& y, const Packet& x) {
1545
+ return numext::atan2(y, x);
1546
+ }
1547
+
1548
+ /** \internal \returns the 2-argument arc tangent of \a y and \a x (coeff-wise) */
1549
+ template <typename Packet, std::enable_if_t<!is_scalar<Packet>::value, int> = 0>
1550
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet patan2(const Packet& y, const Packet& x) {
1551
+ typedef typename internal::unpacket_traits<Packet>::type Scalar;
1552
+
1553
+ // See https://en.cppreference.com/w/cpp/numeric/math/atan2
1554
+ // for how corner cases are supposed to be handled according to the
1555
+ // IEEE floating-point standard (IEC 60559).
1556
+ const Packet kSignMask = pset1<Packet>(-Scalar(0));
1557
+ const Packet kZero = pzero(x);
1558
+ const Packet kOne = pset1<Packet>(Scalar(1));
1559
+ const Packet kPi = pset1<Packet>(Scalar(EIGEN_PI));
1560
+
1561
+ const Packet x_has_signbit = psignbit(x);
1562
+ const Packet y_signmask = pand(y, kSignMask);
1563
+ const Packet x_signmask = pand(x, kSignMask);
1564
+ const Packet result_signmask = pxor(y_signmask, x_signmask);
1565
+ const Packet shift = por(pand(x_has_signbit, kPi), y_signmask);
1566
+
1567
+ const Packet x_and_y_are_same = pcmp_eq(pabs(x), pabs(y));
1568
+ const Packet x_and_y_are_zero = pcmp_eq(por(x, y), kZero);
1569
+
1570
+ Packet arg = pdiv(y, x);
1571
+ arg = pselect(x_and_y_are_same, por(kOne, result_signmask), arg);
1572
+ arg = pselect(x_and_y_are_zero, result_signmask, arg);
1573
+
1574
+ Packet result = patan(arg);
1575
+ result = padd(result, shift);
1576
+ return result;
1577
+ }
1578
+
1579
+ /** \internal \returns the argument of \a a as a complex number */
1580
+ template <typename Packet, std::enable_if_t<is_scalar<Packet>::value, int> = 0>
1581
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet pcarg(const Packet& a) {
1582
+ return Packet(numext::arg(a));
1583
+ }
1584
+
1585
+ /** \internal \returns the argument of \a a as a complex number */
1586
+ template <typename Packet, std::enable_if_t<!is_scalar<Packet>::value, int> = 0>
1587
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet pcarg(const Packet& a) {
1588
+ EIGEN_STATIC_ASSERT(NumTraits<typename unpacket_traits<Packet>::type>::IsComplex,
1589
+ THIS METHOD IS FOR COMPLEX TYPES ONLY)
1590
+ using RealPacket = typename unpacket_traits<Packet>::as_real;
1591
+ // a // r i r i ...
1592
+ RealPacket aflip = pcplxflip(a).v; // i r i r ...
1593
+ RealPacket result = patan2(aflip, a.v); // atan2 crap atan2 crap ...
1594
+ return (Packet)pand(result, peven_mask(result)); // atan2 0 atan2 0 ...
1595
+ }
1596
+
1597
+ /** \internal \returns a packet populated with values in the range [begin, begin + count). Elements
1598
+ * outside this range are not defined. \a *from does not need to be aligned, and can be null if \a count is zero.*/
1599
+ template <typename Packet>
1600
+ EIGEN_DEVICE_FUNC inline Packet ploaduSegment(const typename unpacket_traits<Packet>::type* from, Index begin,
1601
+ Index count) {
1602
+ using Scalar = typename unpacket_traits<Packet>::type;
1603
+ constexpr Index PacketSize = unpacket_traits<Packet>::size;
1604
+ eigen_assert((begin >= 0 && count >= 0 && begin + count <= PacketSize) && "invalid range");
1605
+ Scalar aux[PacketSize] = {};
1606
+ for (Index k = begin; k < begin + count; k++) {
1607
+ aux[k] = from[k];
1608
+ }
1609
+ return ploadu<Packet>(aux);
1610
+ }
1611
+
1612
+ /** \internal \returns a packet populated with values in the range [begin, begin + count). Elements
1613
+ * outside this range are not defined. \a *from must be aligned, and cannot be null.*/
1614
+ template <typename Packet>
1615
+ EIGEN_DEVICE_FUNC inline Packet ploadSegment(const typename unpacket_traits<Packet>::type* from, Index begin,
1616
+ Index count) {
1617
+ return ploaduSegment<Packet>(from, begin, count);
1618
+ }
1619
+
1620
+ /** \internal copy the packet \a from in the range [begin, begin + count) to \a *to.
1621
+ Elements outside of the range [begin, begin + count) are not defined. \a *to does not need to be aligned, and can be
1622
+ null if \a count is zero.*/
1623
+ template <typename Scalar, typename Packet>
1624
+ EIGEN_DEVICE_FUNC inline void pstoreuSegment(Scalar* to, const Packet& from, Index begin, Index count) {
1625
+ constexpr Index PacketSize = unpacket_traits<Packet>::size;
1626
+ eigen_assert((begin >= 0 && count >= 0 && begin + count <= PacketSize) && "invalid range");
1627
+ Scalar aux[PacketSize];
1628
+ pstoreu<Scalar, Packet>(aux, from);
1629
+ for (Index k = begin; k < begin + count; k++) {
1630
+ to[k] = aux[k];
1631
+ }
1632
+ }
1633
+
1634
+ /** \internal copy the packet \a from in the range [begin, begin + count) to \a *to.
1635
+ Elements outside of the range [begin, begin + count) are not defined. \a *to must be aligned, and cannot be
1636
+ null.*/
1637
+ template <typename Scalar, typename Packet>
1638
+ EIGEN_DEVICE_FUNC inline void pstoreSegment(Scalar* to, const Packet& from, Index begin, Index count) {
1639
+ return pstoreuSegment(to, from, begin, count);
1640
+ }
1641
+
1642
+ /** \internal \returns a packet populated with values in the range [begin, begin + count). Elements
1643
+ * outside this range are not defined.*/
1644
+ template <typename Packet, int Alignment>
1645
+ EIGEN_DEVICE_FUNC inline Packet ploadtSegment(const typename unpacket_traits<Packet>::type* from, Index begin,
1646
+ Index count) {
1647
+ constexpr int RequiredAlignment = unpacket_traits<Packet>::alignment;
1648
+ if (Alignment >= RequiredAlignment) {
1649
+ return ploadSegment<Packet>(from, begin, count);
1650
+ } else {
1651
+ return ploaduSegment<Packet>(from, begin, count);
1652
+ }
1653
+ }
1654
+
1655
+ /** \internal copy the packet \a from in the range [begin, begin + count) to \a *to.
1656
+ Elements outside of the range [begin, begin + count) are not defined.*/
1657
+ template <typename Scalar, typename Packet, int Alignment>
1658
+ EIGEN_DEVICE_FUNC inline void pstoretSegment(Scalar* to, const Packet& from, Index begin, Index count) {
1659
+ constexpr int RequiredAlignment = unpacket_traits<Packet>::alignment;
1660
+ if (Alignment >= RequiredAlignment) {
1661
+ pstoreSegment<Scalar, Packet>(to, from, begin, count);
1662
+ } else {
1663
+ pstoreuSegment<Scalar, Packet>(to, from, begin, count);
1664
+ }
1665
+ }
1666
+
1667
+ #ifndef EIGEN_NO_IO
1668
+
1669
+ template <typename Packet>
1670
+ class StreamablePacket {
1671
+ public:
1672
+ using Scalar = typename unpacket_traits<Packet>::type;
1673
+ StreamablePacket(const Packet& packet) { pstoreu(v_, packet); }
1674
+
1675
+ friend std::ostream& operator<<(std::ostream& os, const StreamablePacket& packet) {
1676
+ os << "{" << packet.v_[0];
1677
+ for (int i = 1; i < unpacket_traits<Packet>::size; ++i) {
1678
+ os << "," << packet.v_[i];
1679
+ }
1680
+ os << "}";
1681
+ return os;
1682
+ }
1683
+
1684
+ private:
1685
+ Scalar v_[unpacket_traits<Packet>::size];
1686
+ };
1687
+
1688
+ /**
1689
+ * \internal \returns an intermediary that can be used to ostream packets, e.g. for debugging.
1690
+ */
1691
+ template <typename Packet>
1692
+ StreamablePacket<Packet> postream(const Packet& packet) {
1693
+ return StreamablePacket<Packet>(packet);
1694
+ }
1695
+
1696
+ #endif // EIGEN_NO_IO
1697
+
1698
+ } // end namespace internal
1037
1699
 
1038
- } // end namespace Eigen
1700
+ } // end namespace Eigen
1039
1701
 
1040
- #endif // EIGEN_GENERIC_PACKET_MATH_H
1702
+ #endif // EIGEN_GENERIC_PACKET_MATH_H