@smake/eigen 1.0.2 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (435) hide show
  1. package/README.md +1 -1
  2. package/eigen/Eigen/AccelerateSupport +52 -0
  3. package/eigen/Eigen/Cholesky +18 -21
  4. package/eigen/Eigen/CholmodSupport +28 -28
  5. package/eigen/Eigen/Core +235 -326
  6. package/eigen/Eigen/Eigenvalues +16 -14
  7. package/eigen/Eigen/Geometry +21 -24
  8. package/eigen/Eigen/Householder +9 -8
  9. package/eigen/Eigen/IterativeLinearSolvers +8 -4
  10. package/eigen/Eigen/Jacobi +14 -14
  11. package/eigen/Eigen/KLUSupport +43 -0
  12. package/eigen/Eigen/LU +16 -20
  13. package/eigen/Eigen/MetisSupport +12 -12
  14. package/eigen/Eigen/OrderingMethods +54 -54
  15. package/eigen/Eigen/PaStiXSupport +23 -20
  16. package/eigen/Eigen/PardisoSupport +17 -14
  17. package/eigen/Eigen/QR +18 -21
  18. package/eigen/Eigen/QtAlignedMalloc +5 -13
  19. package/eigen/Eigen/SPQRSupport +21 -14
  20. package/eigen/Eigen/SVD +23 -18
  21. package/eigen/Eigen/Sparse +1 -4
  22. package/eigen/Eigen/SparseCholesky +18 -23
  23. package/eigen/Eigen/SparseCore +18 -17
  24. package/eigen/Eigen/SparseLU +12 -8
  25. package/eigen/Eigen/SparseQR +16 -14
  26. package/eigen/Eigen/StdDeque +5 -2
  27. package/eigen/Eigen/StdList +5 -2
  28. package/eigen/Eigen/StdVector +5 -2
  29. package/eigen/Eigen/SuperLUSupport +30 -24
  30. package/eigen/Eigen/ThreadPool +80 -0
  31. package/eigen/Eigen/UmfPackSupport +19 -17
  32. package/eigen/Eigen/Version +14 -0
  33. package/eigen/Eigen/src/AccelerateSupport/AccelerateSupport.h +423 -0
  34. package/eigen/Eigen/src/AccelerateSupport/InternalHeaderCheck.h +3 -0
  35. package/eigen/Eigen/src/Cholesky/InternalHeaderCheck.h +3 -0
  36. package/eigen/Eigen/src/Cholesky/LDLT.h +377 -401
  37. package/eigen/Eigen/src/Cholesky/LLT.h +332 -360
  38. package/eigen/Eigen/src/Cholesky/LLT_LAPACKE.h +81 -56
  39. package/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +620 -521
  40. package/eigen/Eigen/src/CholmodSupport/InternalHeaderCheck.h +3 -0
  41. package/eigen/Eigen/src/Core/ArithmeticSequence.h +239 -0
  42. package/eigen/Eigen/src/Core/Array.h +341 -294
  43. package/eigen/Eigen/src/Core/ArrayBase.h +190 -203
  44. package/eigen/Eigen/src/Core/ArrayWrapper.h +127 -171
  45. package/eigen/Eigen/src/Core/Assign.h +30 -40
  46. package/eigen/Eigen/src/Core/AssignEvaluator.h +711 -589
  47. package/eigen/Eigen/src/Core/Assign_MKL.h +130 -125
  48. package/eigen/Eigen/src/Core/BandMatrix.h +268 -283
  49. package/eigen/Eigen/src/Core/Block.h +375 -398
  50. package/eigen/Eigen/src/Core/CommaInitializer.h +86 -97
  51. package/eigen/Eigen/src/Core/ConditionEstimator.h +51 -53
  52. package/eigen/Eigen/src/Core/CoreEvaluators.h +1356 -1026
  53. package/eigen/Eigen/src/Core/CoreIterators.h +73 -59
  54. package/eigen/Eigen/src/Core/CwiseBinaryOp.h +114 -132
  55. package/eigen/Eigen/src/Core/CwiseNullaryOp.h +726 -617
  56. package/eigen/Eigen/src/Core/CwiseTernaryOp.h +77 -103
  57. package/eigen/Eigen/src/Core/CwiseUnaryOp.h +56 -68
  58. package/eigen/Eigen/src/Core/CwiseUnaryView.h +132 -95
  59. package/eigen/Eigen/src/Core/DenseBase.h +632 -571
  60. package/eigen/Eigen/src/Core/DenseCoeffsBase.h +511 -624
  61. package/eigen/Eigen/src/Core/DenseStorage.h +512 -509
  62. package/eigen/Eigen/src/Core/DeviceWrapper.h +153 -0
  63. package/eigen/Eigen/src/Core/Diagonal.h +169 -210
  64. package/eigen/Eigen/src/Core/DiagonalMatrix.h +351 -274
  65. package/eigen/Eigen/src/Core/DiagonalProduct.h +12 -10
  66. package/eigen/Eigen/src/Core/Dot.h +172 -222
  67. package/eigen/Eigen/src/Core/EigenBase.h +75 -85
  68. package/eigen/Eigen/src/Core/Fill.h +138 -0
  69. package/eigen/Eigen/src/Core/FindCoeff.h +464 -0
  70. package/eigen/Eigen/src/Core/ForceAlignedAccess.h +90 -109
  71. package/eigen/Eigen/src/Core/Fuzzy.h +82 -105
  72. package/eigen/Eigen/src/Core/GeneralProduct.h +327 -263
  73. package/eigen/Eigen/src/Core/GenericPacketMath.h +1472 -360
  74. package/eigen/Eigen/src/Core/GlobalFunctions.h +194 -151
  75. package/eigen/Eigen/src/Core/IO.h +147 -139
  76. package/eigen/Eigen/src/Core/IndexedView.h +321 -0
  77. package/eigen/Eigen/src/Core/InnerProduct.h +260 -0
  78. package/eigen/Eigen/src/Core/InternalHeaderCheck.h +3 -0
  79. package/eigen/Eigen/src/Core/Inverse.h +56 -66
  80. package/eigen/Eigen/src/Core/Map.h +124 -142
  81. package/eigen/Eigen/src/Core/MapBase.h +256 -281
  82. package/eigen/Eigen/src/Core/MathFunctions.h +1620 -938
  83. package/eigen/Eigen/src/Core/MathFunctionsImpl.h +233 -71
  84. package/eigen/Eigen/src/Core/Matrix.h +491 -416
  85. package/eigen/Eigen/src/Core/MatrixBase.h +468 -453
  86. package/eigen/Eigen/src/Core/NestByValue.h +66 -85
  87. package/eigen/Eigen/src/Core/NoAlias.h +79 -85
  88. package/eigen/Eigen/src/Core/NumTraits.h +235 -148
  89. package/eigen/Eigen/src/Core/PartialReduxEvaluator.h +253 -0
  90. package/eigen/Eigen/src/Core/PermutationMatrix.h +461 -511
  91. package/eigen/Eigen/src/Core/PlainObjectBase.h +871 -894
  92. package/eigen/Eigen/src/Core/Product.h +260 -139
  93. package/eigen/Eigen/src/Core/ProductEvaluators.h +863 -714
  94. package/eigen/Eigen/src/Core/Random.h +161 -136
  95. package/eigen/Eigen/src/Core/RandomImpl.h +262 -0
  96. package/eigen/Eigen/src/Core/RealView.h +250 -0
  97. package/eigen/Eigen/src/Core/Redux.h +366 -336
  98. package/eigen/Eigen/src/Core/Ref.h +308 -209
  99. package/eigen/Eigen/src/Core/Replicate.h +94 -106
  100. package/eigen/Eigen/src/Core/Reshaped.h +398 -0
  101. package/eigen/Eigen/src/Core/ReturnByValue.h +49 -55
  102. package/eigen/Eigen/src/Core/Reverse.h +136 -145
  103. package/eigen/Eigen/src/Core/Select.h +70 -140
  104. package/eigen/Eigen/src/Core/SelfAdjointView.h +262 -285
  105. package/eigen/Eigen/src/Core/SelfCwiseBinaryOp.h +23 -20
  106. package/eigen/Eigen/src/Core/SkewSymmetricMatrix3.h +382 -0
  107. package/eigen/Eigen/src/Core/Solve.h +97 -111
  108. package/eigen/Eigen/src/Core/SolveTriangular.h +131 -129
  109. package/eigen/Eigen/src/Core/SolverBase.h +138 -101
  110. package/eigen/Eigen/src/Core/StableNorm.h +156 -160
  111. package/eigen/Eigen/src/Core/StlIterators.h +619 -0
  112. package/eigen/Eigen/src/Core/Stride.h +91 -88
  113. package/eigen/Eigen/src/Core/Swap.h +70 -38
  114. package/eigen/Eigen/src/Core/Transpose.h +295 -273
  115. package/eigen/Eigen/src/Core/Transpositions.h +272 -317
  116. package/eigen/Eigen/src/Core/TriangularMatrix.h +670 -755
  117. package/eigen/Eigen/src/Core/VectorBlock.h +59 -72
  118. package/eigen/Eigen/src/Core/VectorwiseOp.h +668 -630
  119. package/eigen/Eigen/src/Core/Visitor.h +480 -216
  120. package/eigen/Eigen/src/Core/arch/AVX/Complex.h +407 -293
  121. package/eigen/Eigen/src/Core/arch/AVX/MathFunctions.h +79 -388
  122. package/eigen/Eigen/src/Core/arch/AVX/PacketMath.h +2935 -491
  123. package/eigen/Eigen/src/Core/arch/AVX/Reductions.h +353 -0
  124. package/eigen/Eigen/src/Core/arch/AVX/TypeCasting.h +279 -22
  125. package/eigen/Eigen/src/Core/arch/AVX512/Complex.h +472 -0
  126. package/eigen/Eigen/src/Core/arch/AVX512/GemmKernel.h +1245 -0
  127. package/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h +85 -333
  128. package/eigen/Eigen/src/Core/arch/AVX512/MathFunctionsFP16.h +75 -0
  129. package/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h +2490 -649
  130. package/eigen/Eigen/src/Core/arch/AVX512/PacketMathFP16.h +1413 -0
  131. package/eigen/Eigen/src/Core/arch/AVX512/Reductions.h +297 -0
  132. package/eigen/Eigen/src/Core/arch/AVX512/TrsmKernel.h +1167 -0
  133. package/eigen/Eigen/src/Core/arch/AVX512/TrsmUnrolls.inc +1219 -0
  134. package/eigen/Eigen/src/Core/arch/AVX512/TypeCasting.h +277 -0
  135. package/eigen/Eigen/src/Core/arch/AVX512/TypeCastingFP16.h +130 -0
  136. package/eigen/Eigen/src/Core/arch/AltiVec/Complex.h +521 -298
  137. package/eigen/Eigen/src/Core/arch/AltiVec/MathFunctions.h +39 -280
  138. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProduct.h +3686 -0
  139. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h +205 -0
  140. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h +901 -0
  141. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMAbfloat16.h +742 -0
  142. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixVectorProduct.inc +2818 -0
  143. package/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h +3391 -723
  144. package/eigen/Eigen/src/Core/arch/AltiVec/TypeCasting.h +153 -0
  145. package/eigen/Eigen/src/Core/arch/Default/BFloat16.h +866 -0
  146. package/eigen/Eigen/src/Core/arch/Default/ConjHelper.h +113 -14
  147. package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +2634 -0
  148. package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +227 -0
  149. package/eigen/Eigen/src/Core/arch/Default/Half.h +1091 -0
  150. package/eigen/Eigen/src/Core/arch/Default/Settings.h +11 -13
  151. package/eigen/Eigen/src/Core/arch/GPU/Complex.h +244 -0
  152. package/eigen/Eigen/src/Core/arch/GPU/MathFunctions.h +104 -0
  153. package/eigen/Eigen/src/Core/arch/GPU/PacketMath.h +1712 -0
  154. package/eigen/Eigen/src/Core/arch/GPU/Tuple.h +268 -0
  155. package/eigen/Eigen/src/Core/arch/GPU/TypeCasting.h +77 -0
  156. package/eigen/Eigen/src/Core/arch/HIP/hcc/math_constants.h +23 -0
  157. package/eigen/Eigen/src/Core/arch/HVX/PacketMath.h +1088 -0
  158. package/eigen/Eigen/src/Core/arch/LSX/Complex.h +520 -0
  159. package/eigen/Eigen/src/Core/arch/LSX/GeneralBlockPanelKernel.h +23 -0
  160. package/eigen/Eigen/src/Core/arch/LSX/MathFunctions.h +43 -0
  161. package/eigen/Eigen/src/Core/arch/LSX/PacketMath.h +2866 -0
  162. package/eigen/Eigen/src/Core/arch/LSX/TypeCasting.h +526 -0
  163. package/eigen/Eigen/src/Core/arch/MSA/Complex.h +620 -0
  164. package/eigen/Eigen/src/Core/arch/MSA/MathFunctions.h +379 -0
  165. package/eigen/Eigen/src/Core/arch/MSA/PacketMath.h +1237 -0
  166. package/eigen/Eigen/src/Core/arch/NEON/Complex.h +531 -289
  167. package/eigen/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h +243 -0
  168. package/eigen/Eigen/src/Core/arch/NEON/MathFunctions.h +50 -73
  169. package/eigen/Eigen/src/Core/arch/NEON/PacketMath.h +5915 -579
  170. package/eigen/Eigen/src/Core/arch/NEON/TypeCasting.h +1642 -0
  171. package/eigen/Eigen/src/Core/arch/NEON/UnaryFunctors.h +57 -0
  172. package/eigen/Eigen/src/Core/arch/SSE/Complex.h +366 -334
  173. package/eigen/Eigen/src/Core/arch/SSE/MathFunctions.h +40 -514
  174. package/eigen/Eigen/src/Core/arch/SSE/PacketMath.h +2164 -675
  175. package/eigen/Eigen/src/Core/arch/SSE/Reductions.h +324 -0
  176. package/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h +188 -35
  177. package/eigen/Eigen/src/Core/arch/SVE/MathFunctions.h +48 -0
  178. package/eigen/Eigen/src/Core/arch/SVE/PacketMath.h +674 -0
  179. package/eigen/Eigen/src/Core/arch/SVE/TypeCasting.h +52 -0
  180. package/eigen/Eigen/src/Core/arch/SYCL/InteropHeaders.h +227 -0
  181. package/eigen/Eigen/src/Core/arch/SYCL/MathFunctions.h +303 -0
  182. package/eigen/Eigen/src/Core/arch/SYCL/PacketMath.h +576 -0
  183. package/eigen/Eigen/src/Core/arch/SYCL/TypeCasting.h +83 -0
  184. package/eigen/Eigen/src/Core/arch/ZVector/Complex.h +434 -261
  185. package/eigen/Eigen/src/Core/arch/ZVector/MathFunctions.h +160 -53
  186. package/eigen/Eigen/src/Core/arch/ZVector/PacketMath.h +1073 -605
  187. package/eigen/Eigen/src/Core/functors/AssignmentFunctors.h +123 -117
  188. package/eigen/Eigen/src/Core/functors/BinaryFunctors.h +594 -322
  189. package/eigen/Eigen/src/Core/functors/NullaryFunctors.h +204 -118
  190. package/eigen/Eigen/src/Core/functors/StlFunctors.h +110 -97
  191. package/eigen/Eigen/src/Core/functors/TernaryFunctors.h +34 -7
  192. package/eigen/Eigen/src/Core/functors/UnaryFunctors.h +1158 -530
  193. package/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h +2329 -1333
  194. package/eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h +328 -364
  195. package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +191 -178
  196. package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +85 -82
  197. package/eigen/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h +154 -73
  198. package/eigen/Eigen/src/Core/products/GeneralMatrixVector.h +396 -542
  199. package/eigen/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h +80 -77
  200. package/eigen/Eigen/src/Core/products/Parallelizer.h +208 -92
  201. package/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +331 -375
  202. package/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h +206 -224
  203. package/eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h +139 -146
  204. package/eigen/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h +58 -61
  205. package/eigen/Eigen/src/Core/products/SelfadjointProduct.h +71 -71
  206. package/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h +48 -46
  207. package/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h +294 -369
  208. package/eigen/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h +246 -238
  209. package/eigen/Eigen/src/Core/products/TriangularMatrixVector.h +244 -247
  210. package/eigen/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h +212 -192
  211. package/eigen/Eigen/src/Core/products/TriangularSolverMatrix.h +328 -275
  212. package/eigen/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h +108 -109
  213. package/eigen/Eigen/src/Core/products/TriangularSolverVector.h +70 -93
  214. package/eigen/Eigen/src/Core/util/Assert.h +158 -0
  215. package/eigen/Eigen/src/Core/util/BlasUtil.h +413 -290
  216. package/eigen/Eigen/src/Core/util/ConfigureVectorization.h +543 -0
  217. package/eigen/Eigen/src/Core/util/Constants.h +314 -263
  218. package/eigen/Eigen/src/Core/util/DisableStupidWarnings.h +130 -78
  219. package/eigen/Eigen/src/Core/util/EmulateArray.h +270 -0
  220. package/eigen/Eigen/src/Core/util/ForwardDeclarations.h +450 -224
  221. package/eigen/Eigen/src/Core/util/GpuHipCudaDefines.inc +101 -0
  222. package/eigen/Eigen/src/Core/util/GpuHipCudaUndefines.inc +45 -0
  223. package/eigen/Eigen/src/Core/util/IndexedViewHelper.h +487 -0
  224. package/eigen/Eigen/src/Core/util/IntegralConstant.h +279 -0
  225. package/eigen/Eigen/src/Core/util/MKL_support.h +39 -30
  226. package/eigen/Eigen/src/Core/util/Macros.h +939 -646
  227. package/eigen/Eigen/src/Core/util/MaxSizeVector.h +139 -0
  228. package/eigen/Eigen/src/Core/util/Memory.h +1042 -650
  229. package/eigen/Eigen/src/Core/util/Meta.h +618 -426
  230. package/eigen/Eigen/src/Core/util/MoreMeta.h +638 -0
  231. package/eigen/Eigen/src/Core/util/ReenableStupidWarnings.h +32 -19
  232. package/eigen/Eigen/src/Core/util/ReshapedHelper.h +51 -0
  233. package/eigen/Eigen/src/Core/util/Serializer.h +209 -0
  234. package/eigen/Eigen/src/Core/util/StaticAssert.h +51 -164
  235. package/eigen/Eigen/src/Core/util/SymbolicIndex.h +445 -0
  236. package/eigen/Eigen/src/Core/util/XprHelper.h +793 -538
  237. package/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +246 -277
  238. package/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +299 -319
  239. package/eigen/Eigen/src/Eigenvalues/ComplexSchur_LAPACKE.h +52 -48
  240. package/eigen/Eigen/src/Eigenvalues/EigenSolver.h +413 -456
  241. package/eigen/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h +309 -325
  242. package/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +157 -171
  243. package/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +292 -310
  244. package/eigen/Eigen/src/Eigenvalues/InternalHeaderCheck.h +3 -0
  245. package/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +91 -107
  246. package/eigen/Eigen/src/Eigenvalues/RealQZ.h +539 -606
  247. package/eigen/Eigen/src/Eigenvalues/RealSchur.h +348 -382
  248. package/eigen/Eigen/src/Eigenvalues/RealSchur_LAPACKE.h +41 -35
  249. package/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +579 -600
  250. package/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h +47 -44
  251. package/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +434 -461
  252. package/eigen/Eigen/src/Geometry/AlignedBox.h +307 -214
  253. package/eigen/Eigen/src/Geometry/AngleAxis.h +135 -137
  254. package/eigen/Eigen/src/Geometry/EulerAngles.h +163 -74
  255. package/eigen/Eigen/src/Geometry/Homogeneous.h +289 -333
  256. package/eigen/Eigen/src/Geometry/Hyperplane.h +152 -161
  257. package/eigen/Eigen/src/Geometry/InternalHeaderCheck.h +3 -0
  258. package/eigen/Eigen/src/Geometry/OrthoMethods.h +168 -145
  259. package/eigen/Eigen/src/Geometry/ParametrizedLine.h +141 -104
  260. package/eigen/Eigen/src/Geometry/Quaternion.h +595 -497
  261. package/eigen/Eigen/src/Geometry/Rotation2D.h +110 -108
  262. package/eigen/Eigen/src/Geometry/RotationBase.h +148 -145
  263. package/eigen/Eigen/src/Geometry/Scaling.h +115 -90
  264. package/eigen/Eigen/src/Geometry/Transform.h +896 -953
  265. package/eigen/Eigen/src/Geometry/Translation.h +100 -98
  266. package/eigen/Eigen/src/Geometry/Umeyama.h +79 -84
  267. package/eigen/Eigen/src/Geometry/arch/Geometry_SIMD.h +154 -0
  268. package/eigen/Eigen/src/Householder/BlockHouseholder.h +54 -42
  269. package/eigen/Eigen/src/Householder/Householder.h +104 -122
  270. package/eigen/Eigen/src/Householder/HouseholderSequence.h +416 -382
  271. package/eigen/Eigen/src/Householder/InternalHeaderCheck.h +3 -0
  272. package/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +153 -166
  273. package/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +127 -138
  274. package/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +95 -124
  275. package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +269 -267
  276. package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +246 -259
  277. package/eigen/Eigen/src/IterativeLinearSolvers/InternalHeaderCheck.h +3 -0
  278. package/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +218 -217
  279. package/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +80 -103
  280. package/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +59 -63
  281. package/eigen/Eigen/src/Jacobi/InternalHeaderCheck.h +3 -0
  282. package/eigen/Eigen/src/Jacobi/Jacobi.h +256 -291
  283. package/eigen/Eigen/src/KLUSupport/InternalHeaderCheck.h +3 -0
  284. package/eigen/Eigen/src/KLUSupport/KLUSupport.h +339 -0
  285. package/eigen/Eigen/src/LU/Determinant.h +60 -63
  286. package/eigen/Eigen/src/LU/FullPivLU.h +561 -626
  287. package/eigen/Eigen/src/LU/InternalHeaderCheck.h +3 -0
  288. package/eigen/Eigen/src/LU/InverseImpl.h +213 -275
  289. package/eigen/Eigen/src/LU/PartialPivLU.h +407 -435
  290. package/eigen/Eigen/src/LU/PartialPivLU_LAPACKE.h +54 -40
  291. package/eigen/Eigen/src/LU/arch/InverseSize4.h +353 -0
  292. package/eigen/Eigen/src/MetisSupport/InternalHeaderCheck.h +3 -0
  293. package/eigen/Eigen/src/MetisSupport/MetisSupport.h +81 -93
  294. package/eigen/Eigen/src/OrderingMethods/Amd.h +250 -282
  295. package/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +950 -1103
  296. package/eigen/Eigen/src/OrderingMethods/InternalHeaderCheck.h +3 -0
  297. package/eigen/Eigen/src/OrderingMethods/Ordering.h +111 -122
  298. package/eigen/Eigen/src/PaStiXSupport/InternalHeaderCheck.h +3 -0
  299. package/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +524 -570
  300. package/eigen/Eigen/src/PardisoSupport/InternalHeaderCheck.h +3 -0
  301. package/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +385 -429
  302. package/eigen/Eigen/src/QR/ColPivHouseholderQR.h +494 -473
  303. package/eigen/Eigen/src/QR/ColPivHouseholderQR_LAPACKE.h +120 -56
  304. package/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +223 -137
  305. package/eigen/Eigen/src/QR/FullPivHouseholderQR.h +517 -460
  306. package/eigen/Eigen/src/QR/HouseholderQR.h +412 -278
  307. package/eigen/Eigen/src/QR/HouseholderQR_LAPACKE.h +32 -23
  308. package/eigen/Eigen/src/QR/InternalHeaderCheck.h +3 -0
  309. package/eigen/Eigen/src/SPQRSupport/InternalHeaderCheck.h +3 -0
  310. package/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +263 -261
  311. package/eigen/Eigen/src/SVD/BDCSVD.h +872 -679
  312. package/eigen/Eigen/src/SVD/BDCSVD_LAPACKE.h +174 -0
  313. package/eigen/Eigen/src/SVD/InternalHeaderCheck.h +3 -0
  314. package/eigen/Eigen/src/SVD/JacobiSVD.h +585 -543
  315. package/eigen/Eigen/src/SVD/JacobiSVD_LAPACKE.h +85 -49
  316. package/eigen/Eigen/src/SVD/SVDBase.h +281 -160
  317. package/eigen/Eigen/src/SVD/UpperBidiagonalization.h +202 -237
  318. package/eigen/Eigen/src/SparseCholesky/InternalHeaderCheck.h +3 -0
  319. package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +769 -590
  320. package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +318 -129
  321. package/eigen/Eigen/src/SparseCore/AmbiVector.h +202 -251
  322. package/eigen/Eigen/src/SparseCore/CompressedStorage.h +184 -236
  323. package/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +140 -184
  324. package/eigen/Eigen/src/SparseCore/InternalHeaderCheck.h +3 -0
  325. package/eigen/Eigen/src/SparseCore/SparseAssign.h +174 -111
  326. package/eigen/Eigen/src/SparseCore/SparseBlock.h +408 -477
  327. package/eigen/Eigen/src/SparseCore/SparseColEtree.h +100 -112
  328. package/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +531 -280
  329. package/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +559 -347
  330. package/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +100 -108
  331. package/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +185 -191
  332. package/eigen/Eigen/src/SparseCore/SparseDiagonalProduct.h +71 -71
  333. package/eigen/Eigen/src/SparseCore/SparseDot.h +49 -47
  334. package/eigen/Eigen/src/SparseCore/SparseFuzzy.h +13 -11
  335. package/eigen/Eigen/src/SparseCore/SparseMap.h +243 -253
  336. package/eigen/Eigen/src/SparseCore/SparseMatrix.h +1614 -1142
  337. package/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +403 -357
  338. package/eigen/Eigen/src/SparseCore/SparsePermutation.h +186 -115
  339. package/eigen/Eigen/src/SparseCore/SparseProduct.h +100 -91
  340. package/eigen/Eigen/src/SparseCore/SparseRedux.h +22 -24
  341. package/eigen/Eigen/src/SparseCore/SparseRef.h +268 -295
  342. package/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +371 -414
  343. package/eigen/Eigen/src/SparseCore/SparseSolverBase.h +78 -87
  344. package/eigen/Eigen/src/SparseCore/SparseSparseProductWithPruning.h +81 -95
  345. package/eigen/Eigen/src/SparseCore/SparseTranspose.h +62 -71
  346. package/eigen/Eigen/src/SparseCore/SparseTriangularView.h +132 -144
  347. package/eigen/Eigen/src/SparseCore/SparseUtil.h +146 -115
  348. package/eigen/Eigen/src/SparseCore/SparseVector.h +426 -372
  349. package/eigen/Eigen/src/SparseCore/SparseView.h +164 -193
  350. package/eigen/Eigen/src/SparseCore/TriangularSolver.h +129 -170
  351. package/eigen/Eigen/src/SparseLU/InternalHeaderCheck.h +3 -0
  352. package/eigen/Eigen/src/SparseLU/SparseLU.h +814 -618
  353. package/eigen/Eigen/src/SparseLU/SparseLUImpl.h +61 -48
  354. package/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +102 -118
  355. package/eigen/Eigen/src/SparseLU/SparseLU_Structs.h +38 -35
  356. package/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +273 -255
  357. package/eigen/Eigen/src/SparseLU/SparseLU_Utils.h +44 -49
  358. package/eigen/Eigen/src/SparseLU/SparseLU_column_bmod.h +104 -108
  359. package/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +90 -101
  360. package/eigen/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h +57 -58
  361. package/eigen/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h +43 -55
  362. package/eigen/Eigen/src/SparseLU/SparseLU_kernel_bmod.h +74 -71
  363. package/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +125 -133
  364. package/eigen/Eigen/src/SparseLU/SparseLU_panel_dfs.h +136 -159
  365. package/eigen/Eigen/src/SparseLU/SparseLU_pivotL.h +51 -52
  366. package/eigen/Eigen/src/SparseLU/SparseLU_pruneL.h +67 -73
  367. package/eigen/Eigen/src/SparseLU/SparseLU_relax_snode.h +24 -26
  368. package/eigen/Eigen/src/SparseQR/InternalHeaderCheck.h +3 -0
  369. package/eigen/Eigen/src/SparseQR/SparseQR.h +451 -490
  370. package/eigen/Eigen/src/StlSupport/StdDeque.h +28 -105
  371. package/eigen/Eigen/src/StlSupport/StdList.h +28 -84
  372. package/eigen/Eigen/src/StlSupport/StdVector.h +28 -108
  373. package/eigen/Eigen/src/StlSupport/details.h +48 -50
  374. package/eigen/Eigen/src/SuperLUSupport/InternalHeaderCheck.h +3 -0
  375. package/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +634 -732
  376. package/eigen/Eigen/src/ThreadPool/Barrier.h +70 -0
  377. package/eigen/Eigen/src/ThreadPool/CoreThreadPoolDevice.h +336 -0
  378. package/eigen/Eigen/src/ThreadPool/EventCount.h +241 -0
  379. package/eigen/Eigen/src/ThreadPool/ForkJoin.h +140 -0
  380. package/eigen/Eigen/src/ThreadPool/InternalHeaderCheck.h +4 -0
  381. package/eigen/Eigen/src/ThreadPool/NonBlockingThreadPool.h +587 -0
  382. package/eigen/Eigen/src/ThreadPool/RunQueue.h +230 -0
  383. package/eigen/Eigen/src/ThreadPool/ThreadCancel.h +21 -0
  384. package/eigen/Eigen/src/ThreadPool/ThreadEnvironment.h +43 -0
  385. package/eigen/Eigen/src/ThreadPool/ThreadLocal.h +289 -0
  386. package/eigen/Eigen/src/ThreadPool/ThreadPoolInterface.h +50 -0
  387. package/eigen/Eigen/src/ThreadPool/ThreadYield.h +16 -0
  388. package/eigen/Eigen/src/UmfPackSupport/InternalHeaderCheck.h +3 -0
  389. package/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +480 -380
  390. package/eigen/Eigen/src/misc/Image.h +41 -43
  391. package/eigen/Eigen/src/misc/InternalHeaderCheck.h +3 -0
  392. package/eigen/Eigen/src/misc/Kernel.h +39 -41
  393. package/eigen/Eigen/src/misc/RealSvd2x2.h +19 -21
  394. package/eigen/Eigen/src/misc/blas.h +83 -426
  395. package/eigen/Eigen/src/misc/lapacke.h +9976 -16182
  396. package/eigen/Eigen/src/misc/lapacke_helpers.h +163 -0
  397. package/eigen/Eigen/src/misc/lapacke_mangling.h +4 -5
  398. package/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.inc +344 -0
  399. package/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.inc +544 -0
  400. package/eigen/Eigen/src/plugins/BlockMethods.inc +1370 -0
  401. package/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.inc +116 -0
  402. package/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.inc +167 -0
  403. package/eigen/Eigen/src/plugins/IndexedViewMethods.inc +192 -0
  404. package/eigen/Eigen/src/plugins/InternalHeaderCheck.inc +3 -0
  405. package/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.inc +331 -0
  406. package/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.inc +118 -0
  407. package/eigen/Eigen/src/plugins/ReshapedMethods.inc +133 -0
  408. package/lib/LibEigen.d.ts +4 -0
  409. package/lib/LibEigen.js +14 -0
  410. package/lib/index.d.ts +1 -1
  411. package/lib/index.js +7 -3
  412. package/package.json +2 -10
  413. package/eigen/Eigen/CMakeLists.txt +0 -19
  414. package/eigen/Eigen/src/Core/BooleanRedux.h +0 -164
  415. package/eigen/Eigen/src/Core/arch/CUDA/Complex.h +0 -103
  416. package/eigen/Eigen/src/Core/arch/CUDA/Half.h +0 -675
  417. package/eigen/Eigen/src/Core/arch/CUDA/MathFunctions.h +0 -91
  418. package/eigen/Eigen/src/Core/arch/CUDA/PacketMath.h +0 -333
  419. package/eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +0 -1124
  420. package/eigen/Eigen/src/Core/arch/CUDA/TypeCasting.h +0 -212
  421. package/eigen/Eigen/src/Core/util/NonMPL2.h +0 -3
  422. package/eigen/Eigen/src/Geometry/arch/Geometry_SSE.h +0 -161
  423. package/eigen/Eigen/src/LU/arch/Inverse_SSE.h +0 -338
  424. package/eigen/Eigen/src/SparseCore/MappedSparseMatrix.h +0 -67
  425. package/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +0 -280
  426. package/eigen/Eigen/src/misc/lapack.h +0 -152
  427. package/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +0 -332
  428. package/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +0 -552
  429. package/eigen/Eigen/src/plugins/BlockMethods.h +0 -1058
  430. package/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.h +0 -115
  431. package/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.h +0 -163
  432. package/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +0 -152
  433. package/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +0 -85
  434. package/lib/eigen.d.ts +0 -2
  435. package/lib/eigen.js +0 -15
@@ -11,17 +11,20 @@
11
11
  #ifndef EIGEN_GENERIC_PACKET_MATH_H
12
12
  #define EIGEN_GENERIC_PACKET_MATH_H
13
13
 
14
+ // IWYU pragma: private
15
+ #include "./InternalHeaderCheck.h"
16
+
14
17
  namespace Eigen {
15
18
 
16
19
  namespace internal {
17
20
 
18
21
  /** \internal
19
- * \file GenericPacketMath.h
20
- *
21
- * Default implementation for types not supported by the vectorization.
22
- * In practice these functions are provided to make easier the writing
23
- * of generic vectorized code.
24
- */
22
+ * \file GenericPacketMath.h
23
+ *
24
+ * Default implementation for types not supported by the vectorization.
25
+ * In practice these functions are provided to make easier the writing
26
+ * of generic vectorized code.
27
+ */
25
28
 
26
29
  #ifndef EIGEN_DEBUG_ALIGNED_LOAD
27
30
  #define EIGEN_DEBUG_ALIGNED_LOAD
@@ -39,267 +42,950 @@ namespace internal {
39
42
  #define EIGEN_DEBUG_UNALIGNED_STORE
40
43
  #endif
41
44
 
42
- struct default_packet_traits
43
- {
45
+ struct default_packet_traits {
44
46
  enum {
45
- HasHalfPacket = 0,
46
-
47
- HasAdd = 1,
48
- HasSub = 1,
49
- HasMul = 1,
47
+ // Ops that are implemented for most types.
48
+ HasAdd = 1,
49
+ HasSub = 1,
50
+ HasShift = 1,
51
+ HasMul = 1,
50
52
  HasNegate = 1,
51
- HasAbs = 1,
52
- HasArg = 0,
53
- HasAbs2 = 1,
54
- HasMin = 1,
55
- HasMax = 1,
56
- HasConj = 1,
53
+ HasAbs = 1,
54
+ HasAbs2 = 1,
55
+ HasMin = 1,
56
+ HasMax = 1,
57
+ HasConj = 1,
57
58
  HasSetLinear = 1,
58
- HasBlend = 0,
59
-
60
- HasDiv = 0,
61
- HasSqrt = 0,
62
- HasRsqrt = 0,
63
- HasExp = 0,
64
- HasLog = 0,
65
- HasLog1p = 0,
66
- HasLog10 = 0,
67
- HasPow = 0,
68
-
69
- HasSin = 0,
70
- HasCos = 0,
71
- HasTan = 0,
72
- HasASin = 0,
73
- HasACos = 0,
74
- HasATan = 0,
75
- HasSinh = 0,
76
- HasCosh = 0,
77
- HasTanh = 0,
59
+ HasSign = 1,
60
+ // By default, the nearest integer functions (rint, round, floor, ceil, trunc) are enabled for all scalar and packet
61
+ // types
62
+ HasRound = 1,
63
+
64
+ HasArg = 0,
65
+ HasAbsDiff = 0,
66
+ HasBlend = 0,
67
+ // This flag is used to indicate whether packet comparison is supported.
68
+ // pcmp_eq and pcmp_lt should be defined for it to be true.
69
+ HasCmp = 0,
70
+
71
+ HasDiv = 0,
72
+ HasReciprocal = 0,
73
+ HasSqrt = 0,
74
+ HasRsqrt = 0,
75
+ HasCbrt = 0,
76
+ HasExp = 0,
77
+ HasExpm1 = 0,
78
+ HasLog = 0,
79
+ HasLog1p = 0,
80
+ HasLog10 = 0,
81
+ HasPow = 0,
82
+ HasSin = 0,
83
+ HasCos = 0,
84
+ HasTan = 0,
85
+ HasASin = 0,
86
+ HasACos = 0,
87
+ HasATan = 0,
88
+ HasATanh = 0,
89
+ HasSinh = 0,
90
+ HasCosh = 0,
91
+ HasTanh = 0,
78
92
  HasLGamma = 0,
79
93
  HasDiGamma = 0,
80
94
  HasZeta = 0,
81
95
  HasPolygamma = 0,
82
96
  HasErf = 0,
83
97
  HasErfc = 0,
98
+ HasNdtri = 0,
99
+ HasBessel = 0,
84
100
  HasIGamma = 0,
101
+ HasIGammaDerA = 0,
102
+ HasGammaSampleDerAlpha = 0,
85
103
  HasIGammac = 0,
86
- HasBetaInc = 0,
87
-
88
- HasRound = 0,
89
- HasFloor = 0,
90
- HasCeil = 0,
91
-
92
- HasSign = 0
104
+ HasBetaInc = 0
93
105
  };
94
106
  };
95
107
 
96
- template<typename T> struct packet_traits : default_packet_traits
97
- {
108
+ template <typename T>
109
+ struct packet_traits : default_packet_traits {
98
110
  typedef T type;
99
111
  typedef T half;
100
112
  enum {
101
113
  Vectorizable = 0,
102
114
  size = 1,
103
115
  AlignedOnScalar = 0,
104
- HasHalfPacket = 0
105
116
  };
106
117
  enum {
107
- HasAdd = 0,
108
- HasSub = 0,
109
- HasMul = 0,
118
+ HasAdd = 0,
119
+ HasSub = 0,
120
+ HasMul = 0,
110
121
  HasNegate = 0,
111
- HasAbs = 0,
112
- HasAbs2 = 0,
113
- HasMin = 0,
114
- HasMax = 0,
115
- HasConj = 0,
122
+ HasAbs = 0,
123
+ HasAbs2 = 0,
124
+ HasMin = 0,
125
+ HasMax = 0,
126
+ HasConj = 0,
116
127
  HasSetLinear = 0
117
128
  };
118
129
  };
119
130
 
120
- template<typename T> struct packet_traits<const T> : packet_traits<T> { };
131
+ template <typename T>
132
+ struct packet_traits<const T> : packet_traits<T> {};
121
133
 
122
- template <typename Src, typename Tgt> struct type_casting_traits {
134
+ template <typename T>
135
+ struct unpacket_traits {
136
+ typedef T type;
137
+ typedef T half;
138
+ typedef typename numext::get_integer_by_size<sizeof(T)>::signed_type integer_packet;
123
139
  enum {
124
- VectorizedCast = 0,
140
+ size = 1,
141
+ alignment = alignof(T),
142
+ vectorizable = false,
143
+ masked_load_available = false,
144
+ masked_store_available = false
145
+ };
146
+ };
147
+
148
+ template <typename T>
149
+ struct unpacket_traits<const T> : unpacket_traits<T> {};
150
+
151
+ /** \internal A convenience utility for determining if the type is a scalar.
152
+ * This is used to enable some generic packet implementations.
153
+ */
154
+ template <typename Packet>
155
+ struct is_scalar {
156
+ using Scalar = typename unpacket_traits<Packet>::type;
157
+ enum { value = internal::is_same<Packet, Scalar>::value };
158
+ };
159
+
160
+ // automatically and succinctly define combinations of pcast<SrcPacket,TgtPacket> when
161
+ // 1) the packets are the same type, or
162
+ // 2) the packets differ only in sign.
163
+ // In both of these cases, preinterpret (bit_cast) is equivalent to pcast (static_cast)
164
+ template <typename SrcPacket, typename TgtPacket,
165
+ bool Scalar = is_scalar<SrcPacket>::value && is_scalar<TgtPacket>::value>
166
+ struct is_degenerate_helper : is_same<SrcPacket, TgtPacket> {};
167
+ template <>
168
+ struct is_degenerate_helper<int8_t, uint8_t, true> : std::true_type {};
169
+ template <>
170
+ struct is_degenerate_helper<int16_t, uint16_t, true> : std::true_type {};
171
+ template <>
172
+ struct is_degenerate_helper<int32_t, uint32_t, true> : std::true_type {};
173
+ template <>
174
+ struct is_degenerate_helper<int64_t, uint64_t, true> : std::true_type {};
175
+
176
+ template <typename SrcPacket, typename TgtPacket>
177
+ struct is_degenerate_helper<SrcPacket, TgtPacket, false> {
178
+ using SrcScalar = typename unpacket_traits<SrcPacket>::type;
179
+ static constexpr int SrcSize = unpacket_traits<SrcPacket>::size;
180
+ using TgtScalar = typename unpacket_traits<TgtPacket>::type;
181
+ static constexpr int TgtSize = unpacket_traits<TgtPacket>::size;
182
+ static constexpr bool value = is_degenerate_helper<SrcScalar, TgtScalar, true>::value && (SrcSize == TgtSize);
183
+ };
184
+
185
+ // is_degenerate<T1,T2>::value == is_degenerate<T2,T1>::value
186
+ template <typename SrcPacket, typename TgtPacket>
187
+ struct is_degenerate {
188
+ static constexpr bool value =
189
+ is_degenerate_helper<SrcPacket, TgtPacket>::value || is_degenerate_helper<TgtPacket, SrcPacket>::value;
190
+ };
191
+
192
+ template <typename Packet>
193
+ struct is_half {
194
+ using Scalar = typename unpacket_traits<Packet>::type;
195
+ static constexpr int Size = unpacket_traits<Packet>::size;
196
+ using DefaultPacket = typename packet_traits<Scalar>::type;
197
+ static constexpr int DefaultSize = unpacket_traits<DefaultPacket>::size;
198
+ static constexpr bool value = Size != 1 && Size < DefaultSize;
199
+ };
200
+
201
+ template <typename Src, typename Tgt>
202
+ struct type_casting_traits {
203
+ enum {
204
+ VectorizedCast =
205
+ is_degenerate<Src, Tgt>::value && packet_traits<Src>::Vectorizable && packet_traits<Tgt>::Vectorizable,
125
206
  SrcCoeffRatio = 1,
126
207
  TgtCoeffRatio = 1
127
208
  };
128
209
  };
129
210
 
211
+ // provides a succinct template to define vectorized casting traits with respect to the largest accessible packet types
212
+ template <typename Src, typename Tgt>
213
+ struct vectorized_type_casting_traits {
214
+ enum : int {
215
+ DefaultSrcPacketSize = packet_traits<Src>::size,
216
+ DefaultTgtPacketSize = packet_traits<Tgt>::size,
217
+ VectorizedCast = 1,
218
+ SrcCoeffRatio = plain_enum_max(DefaultTgtPacketSize / DefaultSrcPacketSize, 1),
219
+ TgtCoeffRatio = plain_enum_max(DefaultSrcPacketSize / DefaultTgtPacketSize, 1)
220
+ };
221
+ };
222
+
223
+ /** \internal Wrapper to ensure that multiple packet types can map to the same
224
+ same underlying vector type. */
225
+ template <typename T, int unique_id = 0>
226
+ struct eigen_packet_wrapper {
227
+ EIGEN_ALWAYS_INLINE operator T&() { return m_val; }
228
+ EIGEN_ALWAYS_INLINE operator const T&() const { return m_val; }
229
+ EIGEN_ALWAYS_INLINE eigen_packet_wrapper() = default;
230
+ EIGEN_ALWAYS_INLINE eigen_packet_wrapper(const T& v) : m_val(v) {}
231
+ EIGEN_ALWAYS_INLINE eigen_packet_wrapper& operator=(const T& v) {
232
+ m_val = v;
233
+ return *this;
234
+ }
235
+
236
+ T m_val;
237
+ };
238
+
239
+ template <typename Target, typename Packet, bool IsSame = is_same<Target, Packet>::value>
240
+ struct preinterpret_generic;
241
+
242
+ template <typename Target, typename Packet>
243
+ struct preinterpret_generic<Target, Packet, false> {
244
+ // the packets are not the same, attempt scalar bit_cast
245
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Target run(const Packet& a) {
246
+ return numext::bit_cast<Target, Packet>(a);
247
+ }
248
+ };
249
+
250
+ template <typename Packet>
251
+ struct preinterpret_generic<Packet, Packet, true> {
252
+ // the packets are the same type: do nothing
253
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run(const Packet& a) { return a; }
254
+ };
255
+
256
+ template <typename ComplexPacket>
257
+ struct preinterpret_generic<typename unpacket_traits<ComplexPacket>::as_real, ComplexPacket, false> {
258
+ using RealPacket = typename unpacket_traits<ComplexPacket>::as_real;
259
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE RealPacket run(const ComplexPacket& a) { return a.v; }
260
+ };
261
+
262
+ /** \internal \returns reinterpret_cast<Target>(a) */
263
+ template <typename Target, typename Packet>
264
+ EIGEN_DEVICE_FUNC inline Target preinterpret(const Packet& a) {
265
+ return preinterpret_generic<Target, Packet>::run(a);
266
+ }
267
+
268
+ template <typename SrcPacket, typename TgtPacket, bool Degenerate = is_degenerate<SrcPacket, TgtPacket>::value,
269
+ bool TgtIsHalf = is_half<TgtPacket>::value>
270
+ struct pcast_generic;
271
+
272
+ template <typename SrcPacket, typename TgtPacket>
273
+ struct pcast_generic<SrcPacket, TgtPacket, false, false> {
274
+ // the packets are not degenerate: attempt scalar static_cast
275
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket run(const SrcPacket& a) {
276
+ return cast_impl<SrcPacket, TgtPacket>::run(a);
277
+ }
278
+ };
279
+
280
+ template <typename Packet>
281
+ struct pcast_generic<Packet, Packet, true, false> {
282
+ // the packets are the same: do nothing
283
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run(const Packet& a) { return a; }
284
+ };
285
+
286
+ template <typename SrcPacket, typename TgtPacket, bool TgtIsHalf>
287
+ struct pcast_generic<SrcPacket, TgtPacket, true, TgtIsHalf> {
288
+ // the packets are degenerate: preinterpret is equivalent to pcast
289
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket run(const SrcPacket& a) { return preinterpret<TgtPacket>(a); }
290
+ };
130
291
 
131
292
  /** \internal \returns static_cast<TgtType>(a) (coeff-wise) */
132
293
  template <typename SrcPacket, typename TgtPacket>
133
- EIGEN_DEVICE_FUNC inline TgtPacket
134
- pcast(const SrcPacket& a) {
135
- return static_cast<TgtPacket>(a);
294
+ EIGEN_DEVICE_FUNC inline TgtPacket pcast(const SrcPacket& a) {
295
+ return pcast_generic<SrcPacket, TgtPacket>::run(a);
136
296
  }
137
297
  template <typename SrcPacket, typename TgtPacket>
138
- EIGEN_DEVICE_FUNC inline TgtPacket
139
- pcast(const SrcPacket& a, const SrcPacket& /*b*/) {
140
- return static_cast<TgtPacket>(a);
298
+ EIGEN_DEVICE_FUNC inline TgtPacket pcast(const SrcPacket& a, const SrcPacket& b) {
299
+ return pcast_generic<SrcPacket, TgtPacket>::run(a, b);
300
+ }
301
+ template <typename SrcPacket, typename TgtPacket>
302
+ EIGEN_DEVICE_FUNC inline TgtPacket pcast(const SrcPacket& a, const SrcPacket& b, const SrcPacket& c,
303
+ const SrcPacket& d) {
304
+ return pcast_generic<SrcPacket, TgtPacket>::run(a, b, c, d);
141
305
  }
142
-
143
306
  template <typename SrcPacket, typename TgtPacket>
144
- EIGEN_DEVICE_FUNC inline TgtPacket
145
- pcast(const SrcPacket& a, const SrcPacket& /*b*/, const SrcPacket& /*c*/, const SrcPacket& /*d*/) {
146
- return static_cast<TgtPacket>(a);
307
+ EIGEN_DEVICE_FUNC inline TgtPacket pcast(const SrcPacket& a, const SrcPacket& b, const SrcPacket& c, const SrcPacket& d,
308
+ const SrcPacket& e, const SrcPacket& f, const SrcPacket& g,
309
+ const SrcPacket& h) {
310
+ return pcast_generic<SrcPacket, TgtPacket>::run(a, b, c, d, e, f, g, h);
147
311
  }
148
312
 
313
+ template <typename SrcPacket, typename TgtPacket>
314
+ struct pcast_generic<SrcPacket, TgtPacket, false, true> {
315
+ // TgtPacket is a half packet of some other type
316
+ // perform cast and truncate result
317
+ using DefaultTgtPacket = typename is_half<TgtPacket>::DefaultPacket;
318
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket run(const SrcPacket& a) {
319
+ return preinterpret<TgtPacket>(pcast<SrcPacket, DefaultTgtPacket>(a));
320
+ }
321
+ };
322
+
149
323
  /** \internal \returns a + b (coeff-wise) */
150
- template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
151
- padd(const Packet& a,
152
- const Packet& b) { return a+b; }
324
+ template <typename Packet>
325
+ EIGEN_DEVICE_FUNC inline Packet padd(const Packet& a, const Packet& b) {
326
+ return a + b;
327
+ }
328
+ // Avoid compiler warning for boolean algebra.
329
+ template <>
330
+ EIGEN_DEVICE_FUNC inline bool padd(const bool& a, const bool& b) {
331
+ return a || b;
332
+ }
333
+
334
+ /** \internal \returns a packet version of \a *from, (un-aligned masked add)
335
+ * There is no generic implementation. We only have implementations for specialized
336
+ * cases. Generic case should not be called.
337
+ */
338
+ template <typename Packet>
339
+ EIGEN_DEVICE_FUNC inline std::enable_if_t<unpacket_traits<Packet>::masked_fpops_available, Packet> padd(
340
+ const Packet& a, const Packet& b, typename unpacket_traits<Packet>::mask_t umask);
153
341
 
154
342
  /** \internal \returns a - b (coeff-wise) */
155
- template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
156
- psub(const Packet& a,
157
- const Packet& b) { return a-b; }
343
+ template <typename Packet>
344
+ EIGEN_DEVICE_FUNC inline Packet psub(const Packet& a, const Packet& b) {
345
+ return a - b;
346
+ }
158
347
 
159
348
  /** \internal \returns -a (coeff-wise) */
160
- template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
161
- pnegate(const Packet& a) { return -a; }
349
+ template <typename Packet>
350
+ EIGEN_DEVICE_FUNC inline Packet pnegate(const Packet& a) {
351
+ EIGEN_STATIC_ASSERT((!is_same<typename unpacket_traits<Packet>::type, bool>::value),
352
+ NEGATE IS NOT DEFINED FOR BOOLEAN TYPES)
353
+ return numext::negate(a);
354
+ }
162
355
 
163
356
  /** \internal \returns conj(a) (coeff-wise) */
164
-
165
- template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
166
- pconj(const Packet& a) { return numext::conj(a); }
357
+ template <typename Packet>
358
+ EIGEN_DEVICE_FUNC inline Packet pconj(const Packet& a) {
359
+ return numext::conj(a);
360
+ }
167
361
 
168
362
  /** \internal \returns a * b (coeff-wise) */
169
- template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
170
- pmul(const Packet& a,
171
- const Packet& b) { return a*b; }
363
+ template <typename Packet>
364
+ EIGEN_DEVICE_FUNC inline Packet pmul(const Packet& a, const Packet& b) {
365
+ return a * b;
366
+ }
367
+ // Avoid compiler warning for boolean algebra.
368
+ template <>
369
+ EIGEN_DEVICE_FUNC inline bool pmul(const bool& a, const bool& b) {
370
+ return a && b;
371
+ }
172
372
 
173
373
  /** \internal \returns a / b (coeff-wise) */
174
- template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
175
- pdiv(const Packet& a,
176
- const Packet& b) { return a/b; }
374
+ template <typename Packet>
375
+ EIGEN_DEVICE_FUNC inline Packet pdiv(const Packet& a, const Packet& b) {
376
+ return a / b;
377
+ }
378
+ // Avoid compiler warning for boolean algebra.
379
+ template <>
380
+ EIGEN_DEVICE_FUNC inline bool pdiv(const bool& a, const bool& b) {
381
+ return a && b;
382
+ }
177
383
 
178
- /** \internal \returns the min of \a a and \a b (coeff-wise) */
179
- template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
180
- pmin(const Packet& a,
181
- const Packet& b) { return numext::mini(a, b); }
384
+ // In the generic packet case, memset to all one bits.
385
+ template <typename Packet, typename EnableIf = void>
386
+ struct ptrue_impl {
387
+ static EIGEN_DEVICE_FUNC inline Packet run(const Packet& /*a*/) {
388
+ Packet b;
389
+ memset(static_cast<void*>(&b), 0xff, sizeof(Packet));
390
+ return b;
391
+ }
392
+ };
182
393
 
183
- /** \internal \returns the max of \a a and \a b (coeff-wise) */
184
- template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
185
- pmax(const Packet& a,
186
- const Packet& b) { return numext::maxi(a, b); }
394
+ // Use a value of one for scalars.
395
+ template <typename Scalar>
396
+ struct ptrue_impl<Scalar, std::enable_if_t<is_scalar<Scalar>::value>> {
397
+ static EIGEN_DEVICE_FUNC inline Scalar run(const Scalar&) { return Scalar(1); }
398
+ };
187
399
 
188
- /** \internal \returns the absolute value of \a a */
189
- template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
190
- pabs(const Packet& a) { using std::abs; return abs(a); }
400
+ // For booleans, we can only directly set a valid `bool` value to avoid UB.
401
+ template <>
402
+ struct ptrue_impl<bool, void> {
403
+ static EIGEN_DEVICE_FUNC inline bool run(const bool&) { return true; }
404
+ };
191
405
 
192
- /** \internal \returns the phase angle of \a a */
193
- template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
194
- parg(const Packet& a) { using numext::arg; return arg(a); }
406
+ /** \internal \returns one bits. */
407
+ template <typename Packet>
408
+ EIGEN_DEVICE_FUNC inline Packet ptrue(const Packet& a) {
409
+ return ptrue_impl<Packet>::run(a);
410
+ }
411
+
412
+ // In the general packet case, memset to zero.
413
+ template <typename Packet, typename EnableIf = void>
414
+ struct pzero_impl {
415
+ static EIGEN_DEVICE_FUNC inline Packet run(const Packet& /*a*/) {
416
+ Packet b;
417
+ memset(static_cast<void*>(&b), 0x00, sizeof(Packet));
418
+ return b;
419
+ }
420
+ };
421
+
422
+ // For scalars, explicitly set to Scalar(0), since the underlying representation
423
+ // for zero may not consist of all-zero bits.
424
+ template <typename T>
425
+ struct pzero_impl<T, std::enable_if_t<is_scalar<T>::value>> {
426
+ static EIGEN_DEVICE_FUNC inline T run(const T& /*a*/) { return T(0); }
427
+ };
428
+
429
+ /** \internal \returns packet of zeros */
430
+ template <typename Packet>
431
+ EIGEN_DEVICE_FUNC inline Packet pzero(const Packet& a) {
432
+ return pzero_impl<Packet>::run(a);
433
+ }
434
+
435
+ template <typename T>
436
+ struct bit_and {
437
+ EIGEN_DEVICE_FUNC constexpr EIGEN_ALWAYS_INLINE T operator()(const T& a, const T& b) const { return a & b; }
438
+ };
439
+
440
+ template <typename T>
441
+ struct bit_or {
442
+ EIGEN_DEVICE_FUNC constexpr EIGEN_ALWAYS_INLINE T operator()(const T& a, const T& b) const { return a | b; }
443
+ };
444
+
445
+ template <typename T>
446
+ struct bit_xor {
447
+ EIGEN_DEVICE_FUNC constexpr EIGEN_ALWAYS_INLINE T operator()(const T& a, const T& b) const { return a ^ b; }
448
+ };
449
+
450
+ template <typename T>
451
+ struct bit_not {
452
+ EIGEN_DEVICE_FUNC constexpr EIGEN_ALWAYS_INLINE T operator()(const T& a) const { return ~a; }
453
+ };
454
+
455
+ template <>
456
+ struct bit_and<bool> {
457
+ EIGEN_DEVICE_FUNC constexpr EIGEN_ALWAYS_INLINE bool operator()(const bool& a, const bool& b) const { return a && b; }
458
+ };
459
+
460
+ template <>
461
+ struct bit_or<bool> {
462
+ EIGEN_DEVICE_FUNC constexpr EIGEN_ALWAYS_INLINE bool operator()(const bool& a, const bool& b) const { return a || b; }
463
+ };
464
+
465
+ template <>
466
+ struct bit_xor<bool> {
467
+ EIGEN_DEVICE_FUNC constexpr EIGEN_ALWAYS_INLINE bool operator()(const bool& a, const bool& b) const { return a != b; }
468
+ };
469
+
470
+ template <>
471
+ struct bit_not<bool> {
472
+ EIGEN_DEVICE_FUNC constexpr EIGEN_ALWAYS_INLINE bool operator()(const bool& a) const { return !a; }
473
+ };
474
+
475
+ // Use operators &, |, ^, ~.
476
+ template <typename T>
477
+ struct operator_bitwise_helper {
478
+ EIGEN_DEVICE_FUNC static inline T bitwise_and(const T& a, const T& b) { return bit_and<T>()(a, b); }
479
+ EIGEN_DEVICE_FUNC static inline T bitwise_or(const T& a, const T& b) { return bit_or<T>()(a, b); }
480
+ EIGEN_DEVICE_FUNC static inline T bitwise_xor(const T& a, const T& b) { return bit_xor<T>()(a, b); }
481
+ EIGEN_DEVICE_FUNC static inline T bitwise_not(const T& a) { return bit_not<T>()(a); }
482
+ };
483
+
484
+ // Apply binary operations byte-by-byte
485
+ template <typename T>
486
+ struct bytewise_bitwise_helper {
487
+ EIGEN_DEVICE_FUNC static inline T bitwise_and(const T& a, const T& b) {
488
+ return binary(a, b, bit_and<unsigned char>());
489
+ }
490
+ EIGEN_DEVICE_FUNC static inline T bitwise_or(const T& a, const T& b) { return binary(a, b, bit_or<unsigned char>()); }
491
+ EIGEN_DEVICE_FUNC static inline T bitwise_xor(const T& a, const T& b) {
492
+ return binary(a, b, bit_xor<unsigned char>());
493
+ }
494
+ EIGEN_DEVICE_FUNC static inline T bitwise_not(const T& a) { return unary(a, bit_not<unsigned char>()); }
495
+
496
+ private:
497
+ template <typename Op>
498
+ EIGEN_DEVICE_FUNC static inline T unary(const T& a, Op op) {
499
+ const unsigned char* a_ptr = reinterpret_cast<const unsigned char*>(&a);
500
+ T c;
501
+ unsigned char* c_ptr = reinterpret_cast<unsigned char*>(&c);
502
+ for (size_t i = 0; i < sizeof(T); ++i) {
503
+ *c_ptr++ = op(*a_ptr++);
504
+ }
505
+ return c;
506
+ }
507
+
508
+ template <typename Op>
509
+ EIGEN_DEVICE_FUNC static inline T binary(const T& a, const T& b, Op op) {
510
+ const unsigned char* a_ptr = reinterpret_cast<const unsigned char*>(&a);
511
+ const unsigned char* b_ptr = reinterpret_cast<const unsigned char*>(&b);
512
+ T c;
513
+ unsigned char* c_ptr = reinterpret_cast<unsigned char*>(&c);
514
+ for (size_t i = 0; i < sizeof(T); ++i) {
515
+ *c_ptr++ = op(*a_ptr++, *b_ptr++);
516
+ }
517
+ return c;
518
+ }
519
+ };
520
+
521
+ // In the general case, use byte-by-byte manipulation.
522
+ template <typename T, typename EnableIf = void>
523
+ struct bitwise_helper : public bytewise_bitwise_helper<T> {};
524
+
525
+ // For integers or non-trivial scalars, use binary operators.
526
+ template <typename T>
527
+ struct bitwise_helper<T, typename std::enable_if_t<is_scalar<T>::value &&
528
+ (NumTraits<T>::IsInteger || NumTraits<T>::RequireInitialization)>>
529
+ : public operator_bitwise_helper<T> {};
195
530
 
196
531
  /** \internal \returns the bitwise and of \a a and \a b */
197
- template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
198
- pand(const Packet& a, const Packet& b) { return a & b; }
532
+ template <typename Packet>
533
+ EIGEN_DEVICE_FUNC inline Packet pand(const Packet& a, const Packet& b) {
534
+ return bitwise_helper<Packet>::bitwise_and(a, b);
535
+ }
199
536
 
200
537
  /** \internal \returns the bitwise or of \a a and \a b */
201
- template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
202
- por(const Packet& a, const Packet& b) { return a | b; }
538
+ template <typename Packet>
539
+ EIGEN_DEVICE_FUNC inline Packet por(const Packet& a, const Packet& b) {
540
+ return bitwise_helper<Packet>::bitwise_or(a, b);
541
+ }
203
542
 
204
543
  /** \internal \returns the bitwise xor of \a a and \a b */
205
- template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
206
- pxor(const Packet& a, const Packet& b) { return a ^ b; }
544
+ template <typename Packet>
545
+ EIGEN_DEVICE_FUNC inline Packet pxor(const Packet& a, const Packet& b) {
546
+ return bitwise_helper<Packet>::bitwise_xor(a, b);
547
+ }
548
+
549
+ /** \internal \returns the bitwise not of \a a */
550
+ template <typename Packet>
551
+ EIGEN_DEVICE_FUNC inline Packet pnot(const Packet& a) {
552
+ return bitwise_helper<Packet>::bitwise_not(a);
553
+ }
554
+
555
+ /** \internal \returns the bitwise and of \a a and not \a b */
556
+ template <typename Packet>
557
+ EIGEN_DEVICE_FUNC inline Packet pandnot(const Packet& a, const Packet& b) {
558
+ return pand(a, pnot(b));
559
+ }
560
+
561
+ /** \internal \returns a < b as a bit mask */
562
+ template <typename Packet>
563
+ EIGEN_DEVICE_FUNC inline Packet pcmp_lt(const Packet& a, const Packet& b) {
564
+ return a < b ? ptrue(a) : pzero(a);
565
+ }
566
+
567
+ /** \internal \returns a == b as a bit mask */
568
+ template <typename Packet>
569
+ EIGEN_DEVICE_FUNC inline Packet pcmp_eq(const Packet& a, const Packet& b) {
570
+ return a == b ? ptrue(a) : pzero(a);
571
+ }
572
+
573
+ /** \internal \returns a <= b as a bit mask */
574
+ template <typename Packet>
575
+ EIGEN_DEVICE_FUNC inline Packet pcmp_le(const Packet& a, const Packet& b) {
576
+ return por(pcmp_eq(a, b), pcmp_lt(a, b));
577
+ }
578
+
579
+ /** \internal \returns a < b or a==NaN or b==NaN as a bit mask */
580
+ template <typename Packet>
581
+ EIGEN_DEVICE_FUNC inline Packet pcmp_lt_or_nan(const Packet& a, const Packet& b) {
582
+ return a >= b ? pzero(a) : ptrue(a);
583
+ }
584
+
585
+ // In the general case, use bitwise select.
586
+ template <typename Packet, bool is_scalar = is_scalar<Packet>::value>
587
+ struct pselect_impl {
588
+ static EIGEN_DEVICE_FUNC inline Packet run(const Packet& mask, const Packet& a, const Packet& b) {
589
+ return por(pand(a, mask), pandnot(b, mask));
590
+ }
591
+ };
592
+
593
+ // For scalars, use ternary select.
594
+ template <typename Packet>
595
+ struct pselect_impl<Packet, true> {
596
+ static EIGEN_DEVICE_FUNC inline Packet run(const Packet& mask, const Packet& a, const Packet& b) {
597
+ return numext::select(mask, a, b);
598
+ }
599
+ };
600
+
601
+ /** \internal \returns \a or \b for each field in packet according to \mask */
602
+ template <typename Packet>
603
+ EIGEN_DEVICE_FUNC inline Packet pselect(const Packet& mask, const Packet& a, const Packet& b) {
604
+ return pselect_impl<Packet>::run(mask, a, b);
605
+ }
606
+
607
+ template <>
608
+ EIGEN_DEVICE_FUNC inline bool pselect<bool>(const bool& cond, const bool& a, const bool& b) {
609
+ return cond ? a : b;
610
+ }
611
+
612
+ /** \internal \returns the min or of \a a and \a b (coeff-wise)
613
+ If either \a a or \a b are NaN, the result is implementation defined. */
614
+ template <int NaNPropagation, bool IsInteger>
615
+ struct pminmax_impl {
616
+ template <typename Packet, typename Op>
617
+ static EIGEN_DEVICE_FUNC inline Packet run(const Packet& a, const Packet& b, Op op) {
618
+ return op(a, b);
619
+ }
620
+ };
207
621
 
208
- /** \internal \returns the bitwise andnot of \a a and \a b */
209
- template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
210
- pandnot(const Packet& a, const Packet& b) { return a & (!b); }
622
+ /** \internal \returns the min or max of \a a and \a b (coeff-wise)
623
+ If either \a a or \a b are NaN, NaN is returned. */
624
+ template <>
625
+ struct pminmax_impl<PropagateNaN, false> {
626
+ template <typename Packet, typename Op>
627
+ static EIGEN_DEVICE_FUNC inline Packet run(const Packet& a, const Packet& b, Op op) {
628
+ Packet not_nan_mask_a = pcmp_eq(a, a);
629
+ Packet not_nan_mask_b = pcmp_eq(b, b);
630
+ return pselect(not_nan_mask_a, pselect(not_nan_mask_b, op(a, b), b), a);
631
+ }
632
+ };
633
+
634
+ /** \internal \returns the min or max of \a a and \a b (coeff-wise)
635
+ If both \a a and \a b are NaN, NaN is returned.
636
+ Equivalent to std::fmin(a, b). */
637
+ template <>
638
+ struct pminmax_impl<PropagateNumbers, false> {
639
+ template <typename Packet, typename Op>
640
+ static EIGEN_DEVICE_FUNC inline Packet run(const Packet& a, const Packet& b, Op op) {
641
+ Packet not_nan_mask_a = pcmp_eq(a, a);
642
+ Packet not_nan_mask_b = pcmp_eq(b, b);
643
+ return pselect(not_nan_mask_a, pselect(not_nan_mask_b, op(a, b), a), b);
644
+ }
645
+ };
646
+
647
+ #define EIGEN_BINARY_OP_NAN_PROPAGATION(Type, Func) [](const Type& aa, const Type& bb) { return Func(aa, bb); }
648
+
649
+ /** \internal \returns the min of \a a and \a b (coeff-wise).
650
+ If \a a or \b b is NaN, the return value is implementation defined. */
651
+ template <typename Packet>
652
+ EIGEN_DEVICE_FUNC inline Packet pmin(const Packet& a, const Packet& b) {
653
+ return numext::mini(a, b);
654
+ }
655
+
656
+ /** \internal \returns the min of \a a and \a b (coeff-wise).
657
+ NaNPropagation determines the NaN propagation semantics. */
658
+ template <int NaNPropagation, typename Packet>
659
+ EIGEN_DEVICE_FUNC inline Packet pmin(const Packet& a, const Packet& b) {
660
+ constexpr bool IsInteger = NumTraits<typename unpacket_traits<Packet>::type>::IsInteger;
661
+ return pminmax_impl<NaNPropagation, IsInteger>::run(a, b, EIGEN_BINARY_OP_NAN_PROPAGATION(Packet, (pmin<Packet>)));
662
+ }
663
+
664
+ /** \internal \returns the max of \a a and \a b (coeff-wise)
665
+ If \a a or \b b is NaN, the return value is implementation defined. */
666
+ template <typename Packet>
667
+ EIGEN_DEVICE_FUNC inline Packet pmax(const Packet& a, const Packet& b) {
668
+ return numext::maxi(a, b);
669
+ }
670
+
671
+ /** \internal \returns the max of \a a and \a b (coeff-wise).
672
+ NaNPropagation determines the NaN propagation semantics. */
673
+ template <int NaNPropagation, typename Packet>
674
+ EIGEN_DEVICE_FUNC inline Packet pmax(const Packet& a, const Packet& b) {
675
+ constexpr bool IsInteger = NumTraits<typename unpacket_traits<Packet>::type>::IsInteger;
676
+ return pminmax_impl<NaNPropagation, IsInteger>::run(a, b, EIGEN_BINARY_OP_NAN_PROPAGATION(Packet, (pmax<Packet>)));
677
+ }
678
+
679
+ /** \internal \returns the absolute value of \a a */
680
+ template <typename Packet>
681
+ EIGEN_DEVICE_FUNC inline Packet pabs(const Packet& a) {
682
+ return numext::abs(a);
683
+ }
684
+ template <>
685
+ EIGEN_DEVICE_FUNC inline unsigned int pabs(const unsigned int& a) {
686
+ return a;
687
+ }
688
+ template <>
689
+ EIGEN_DEVICE_FUNC inline unsigned long pabs(const unsigned long& a) {
690
+ return a;
691
+ }
692
+ template <>
693
+ EIGEN_DEVICE_FUNC inline unsigned long long pabs(const unsigned long long& a) {
694
+ return a;
695
+ }
696
+
697
+ /** \internal \returns the addsub value of \a a,b */
698
+ template <typename Packet>
699
+ EIGEN_DEVICE_FUNC inline Packet paddsub(const Packet& a, const Packet& b) {
700
+ return pselect(peven_mask(a), padd(a, b), psub(a, b));
701
+ }
211
702
 
212
- /** \internal \returns a packet version of \a *from, from must be 16 bytes aligned */
213
- template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
214
- pload(const typename unpacket_traits<Packet>::type* from) { return *from; }
703
+ /** \internal \returns the phase angle of \a a */
704
+ template <typename Packet>
705
+ EIGEN_DEVICE_FUNC inline Packet parg(const Packet& a) {
706
+ using numext::arg;
707
+ return arg(a);
708
+ }
709
+
710
+ /** \internal \returns \a a arithmetically shifted by N bits to the right */
711
+ template <int N, typename T>
712
+ EIGEN_DEVICE_FUNC inline T parithmetic_shift_right(const T& a) {
713
+ return numext::arithmetic_shift_right(a, N);
714
+ }
715
+
716
+ /** \internal \returns \a a logically shifted by N bits to the right */
717
+ template <int N, typename T>
718
+ EIGEN_DEVICE_FUNC inline T plogical_shift_right(const T& a) {
719
+ return numext::logical_shift_right(a, N);
720
+ }
721
+
722
+ /** \internal \returns \a a shifted by N bits to the left */
723
+ template <int N, typename T>
724
+ EIGEN_DEVICE_FUNC inline T plogical_shift_left(const T& a) {
725
+ return numext::logical_shift_left(a, N);
726
+ }
727
+
728
+ /** \internal \returns the significant and exponent of the underlying floating point numbers
729
+ * See https://en.cppreference.com/w/cpp/numeric/math/frexp
730
+ */
731
+ template <typename Packet>
732
+ EIGEN_DEVICE_FUNC inline Packet pfrexp(const Packet& a, Packet& exponent) {
733
+ int exp;
734
+ EIGEN_USING_STD(frexp);
735
+ Packet result = static_cast<Packet>(frexp(a, &exp));
736
+ exponent = static_cast<Packet>(exp);
737
+ return result;
738
+ }
739
+
740
+ /** \internal \returns a * 2^((int)exponent)
741
+ * See https://en.cppreference.com/w/cpp/numeric/math/ldexp
742
+ */
743
+ template <typename Packet>
744
+ EIGEN_DEVICE_FUNC inline Packet pldexp(const Packet& a, const Packet& exponent) {
745
+ EIGEN_USING_STD(ldexp)
746
+ return static_cast<Packet>(ldexp(a, static_cast<int>(exponent)));
747
+ }
748
+
749
+ /** \internal \returns the min of \a a and \a b (coeff-wise) */
750
+ template <typename Packet>
751
+ EIGEN_DEVICE_FUNC inline Packet pabsdiff(const Packet& a, const Packet& b) {
752
+ return pselect(pcmp_lt(a, b), psub(b, a), psub(a, b));
753
+ }
754
+
755
+ /** \internal \returns a packet version of \a *from, from must be properly aligned */
756
+ template <typename Packet>
757
+ EIGEN_DEVICE_FUNC inline Packet pload(const typename unpacket_traits<Packet>::type* from) {
758
+ return *from;
759
+ }
760
+
761
+ /** \internal \returns n elements of a packet version of \a *from, from must be properly aligned
762
+ * offset indicates the starting element in which to load and
763
+ * offset + n <= unpacket_traits::size
764
+ * All elements before offset and after the last element loaded will initialized with zero */
765
+ template <typename Packet>
766
+ EIGEN_DEVICE_FUNC inline Packet pload_partial(const typename unpacket_traits<Packet>::type* from, const Index n,
767
+ const Index offset = 0) {
768
+ const Index packet_size = unpacket_traits<Packet>::size;
769
+ eigen_assert(n + offset <= packet_size && "number of elements plus offset will read past end of packet");
770
+ typedef typename unpacket_traits<Packet>::type Scalar;
771
+ EIGEN_ALIGN_MAX Scalar elements[packet_size] = {Scalar(0)};
772
+ for (Index i = offset; i < numext::mini(n + offset, packet_size); i++) {
773
+ elements[i] = from[i - offset];
774
+ }
775
+ return pload<Packet>(elements);
776
+ }
215
777
 
216
778
  /** \internal \returns a packet version of \a *from, (un-aligned load) */
217
- template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
218
- ploadu(const typename unpacket_traits<Packet>::type* from) { return *from; }
779
+ template <typename Packet>
780
+ EIGEN_DEVICE_FUNC inline Packet ploadu(const typename unpacket_traits<Packet>::type* from) {
781
+ return *from;
782
+ }
783
+
784
+ /** \internal \returns n elements of a packet version of \a *from, (un-aligned load)
785
+ * All elements after the last element loaded will initialized with zero */
786
+ template <typename Packet>
787
+ EIGEN_DEVICE_FUNC inline Packet ploadu_partial(const typename unpacket_traits<Packet>::type* from, const Index n,
788
+ const Index offset = 0) {
789
+ const Index packet_size = unpacket_traits<Packet>::size;
790
+ eigen_assert(n + offset <= packet_size && "number of elements plus offset will read past end of packet");
791
+ typedef typename unpacket_traits<Packet>::type Scalar;
792
+ EIGEN_ALIGN_MAX Scalar elements[packet_size] = {Scalar(0)};
793
+ for (Index i = offset; i < numext::mini(n + offset, packet_size); i++) {
794
+ elements[i] = from[i - offset];
795
+ }
796
+ return pload<Packet>(elements);
797
+ }
798
+
799
+ /** \internal \returns a packet version of \a *from, (un-aligned masked load)
800
+ * There is no generic implementation. We only have implementations for specialized
801
+ * cases. Generic case should not be called.
802
+ */
803
+ template <typename Packet>
804
+ EIGEN_DEVICE_FUNC inline std::enable_if_t<unpacket_traits<Packet>::masked_load_available, Packet> ploadu(
805
+ const typename unpacket_traits<Packet>::type* from, typename unpacket_traits<Packet>::mask_t umask);
219
806
 
220
807
  /** \internal \returns a packet with constant coefficients \a a, e.g.: (a,a,a,a) */
221
- template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
222
- pset1(const typename unpacket_traits<Packet>::type& a) { return a; }
808
+ template <typename Packet>
809
+ EIGEN_DEVICE_FUNC inline Packet pset1(const typename unpacket_traits<Packet>::type& a) {
810
+ return a;
811
+ }
812
+
813
+ /** \internal \returns a packet with constant coefficients set from bits */
814
+ template <typename Packet, typename BitsType>
815
+ EIGEN_DEVICE_FUNC inline Packet pset1frombits(BitsType a);
223
816
 
224
817
  /** \internal \returns a packet with constant coefficients \a a[0], e.g.: (a[0],a[0],a[0],a[0]) */
225
- template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
226
- pload1(const typename unpacket_traits<Packet>::type *a) { return pset1<Packet>(*a); }
818
+ template <typename Packet>
819
+ EIGEN_DEVICE_FUNC inline Packet pload1(const typename unpacket_traits<Packet>::type* a) {
820
+ return pset1<Packet>(*a);
821
+ }
227
822
 
228
823
  /** \internal \returns a packet with elements of \a *from duplicated.
229
- * For instance, for a packet of 8 elements, 4 scalars will be read from \a *from and
230
- * duplicated to form: {from[0],from[0],from[1],from[1],from[2],from[2],from[3],from[3]}
231
- * Currently, this function is only used for scalar * complex products.
232
- */
233
- template<typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet
234
- ploaddup(const typename unpacket_traits<Packet>::type* from) { return *from; }
824
+ * For instance, for a packet of 8 elements, 4 scalars will be read from \a *from and
825
+ * duplicated to form: {from[0],from[0],from[1],from[1],from[2],from[2],from[3],from[3]}
826
+ * Currently, this function is only used for scalar * complex products.
827
+ */
828
+ template <typename Packet>
829
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet ploaddup(const typename unpacket_traits<Packet>::type* from) {
830
+ return *from;
831
+ }
235
832
 
236
833
  /** \internal \returns a packet with elements of \a *from quadrupled.
237
- * For instance, for a packet of 8 elements, 2 scalars will be read from \a *from and
238
- * replicated to form: {from[0],from[0],from[0],from[0],from[1],from[1],from[1],from[1]}
239
- * Currently, this function is only used in matrix products.
240
- * For packet-size smaller or equal to 4, this function is equivalent to pload1
241
- */
242
- template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
243
- ploadquad(const typename unpacket_traits<Packet>::type* from)
244
- { return pload1<Packet>(from); }
834
+ * For instance, for a packet of 8 elements, 2 scalars will be read from \a *from and
835
+ * replicated to form: {from[0],from[0],from[0],from[0],from[1],from[1],from[1],from[1]}
836
+ * Currently, this function is only used in matrix products.
837
+ * For packet-size smaller or equal to 4, this function is equivalent to pload1
838
+ */
839
+ template <typename Packet>
840
+ EIGEN_DEVICE_FUNC inline Packet ploadquad(const typename unpacket_traits<Packet>::type* from) {
841
+ return pload1<Packet>(from);
842
+ }
245
843
 
246
844
  /** \internal equivalent to
247
- * \code
248
- * a0 = pload1(a+0);
249
- * a1 = pload1(a+1);
250
- * a2 = pload1(a+2);
251
- * a3 = pload1(a+3);
252
- * \endcode
253
- * \sa pset1, pload1, ploaddup, pbroadcast2
254
- */
255
- template<typename Packet> EIGEN_DEVICE_FUNC
256
- inline void pbroadcast4(const typename unpacket_traits<Packet>::type *a,
257
- Packet& a0, Packet& a1, Packet& a2, Packet& a3)
258
- {
259
- a0 = pload1<Packet>(a+0);
260
- a1 = pload1<Packet>(a+1);
261
- a2 = pload1<Packet>(a+2);
262
- a3 = pload1<Packet>(a+3);
845
+ * \code
846
+ * a0 = pload1(a+0);
847
+ * a1 = pload1(a+1);
848
+ * a2 = pload1(a+2);
849
+ * a3 = pload1(a+3);
850
+ * \endcode
851
+ * \sa pset1, pload1, ploaddup, pbroadcast2
852
+ */
853
+ template <typename Packet>
854
+ EIGEN_DEVICE_FUNC inline void pbroadcast4(const typename unpacket_traits<Packet>::type* a, Packet& a0, Packet& a1,
855
+ Packet& a2, Packet& a3) {
856
+ a0 = pload1<Packet>(a + 0);
857
+ a1 = pload1<Packet>(a + 1);
858
+ a2 = pload1<Packet>(a + 2);
859
+ a3 = pload1<Packet>(a + 3);
263
860
  }
264
861
 
265
862
  /** \internal equivalent to
266
- * \code
267
- * a0 = pload1(a+0);
268
- * a1 = pload1(a+1);
269
- * \endcode
270
- * \sa pset1, pload1, ploaddup, pbroadcast4
271
- */
272
- template<typename Packet> EIGEN_DEVICE_FUNC
273
- inline void pbroadcast2(const typename unpacket_traits<Packet>::type *a,
274
- Packet& a0, Packet& a1)
275
- {
276
- a0 = pload1<Packet>(a+0);
277
- a1 = pload1<Packet>(a+1);
863
+ * \code
864
+ * a0 = pload1(a+0);
865
+ * a1 = pload1(a+1);
866
+ * \endcode
867
+ * \sa pset1, pload1, ploaddup, pbroadcast4
868
+ */
869
+ template <typename Packet>
870
+ EIGEN_DEVICE_FUNC inline void pbroadcast2(const typename unpacket_traits<Packet>::type* a, Packet& a0, Packet& a1) {
871
+ a0 = pload1<Packet>(a + 0);
872
+ a1 = pload1<Packet>(a + 1);
278
873
  }
279
874
 
280
875
  /** \internal \brief Returns a packet with coefficients (a,a+1,...,a+packet_size-1). */
281
- template<typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet
282
- plset(const typename unpacket_traits<Packet>::type& a) { return a; }
876
+ template <typename Packet>
877
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet plset(const typename unpacket_traits<Packet>::type& a) {
878
+ return a;
879
+ }
283
880
 
284
- /** \internal copy the packet \a from to \a *to, \a to must be 16 bytes aligned */
285
- template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pstore(Scalar* to, const Packet& from)
286
- { (*to) = from; }
881
+ template <typename Packet, typename EnableIf = void>
882
+ struct peven_mask_impl {
883
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run(const Packet&) {
884
+ typedef typename unpacket_traits<Packet>::type Scalar;
885
+ const size_t n = unpacket_traits<Packet>::size;
886
+ EIGEN_ALIGN_TO_BOUNDARY(sizeof(Packet)) Scalar elements[n];
887
+ for (size_t i = 0; i < n; ++i) {
888
+ memset(elements + i, ((i & 1) == 0 ? 0xff : 0), sizeof(Scalar));
889
+ }
890
+ return ploadu<Packet>(elements);
891
+ }
892
+ };
893
+
894
+ template <typename Scalar>
895
+ struct peven_mask_impl<Scalar, std::enable_if_t<is_scalar<Scalar>::value>> {
896
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar run(const Scalar&) { return Scalar(1); }
897
+ };
898
+
899
+ /** \internal \returns a packet with constant coefficients \a a, e.g.: (x, 0, x, 0),
900
+ where x is the value of all 1-bits. */
901
+ template <typename Packet>
902
+ EIGEN_DEVICE_FUNC inline Packet peven_mask(const Packet& a) {
903
+ return peven_mask_impl<Packet>::run(a);
904
+ }
905
+
906
+ /** \internal copy the packet \a from to \a *to, \a to must be properly aligned */
907
+ template <typename Scalar, typename Packet>
908
+ EIGEN_DEVICE_FUNC inline void pstore(Scalar* to, const Packet& from) {
909
+ (*to) = from;
910
+ }
911
+
912
+ /** \internal copy n elements of the packet \a from to \a *to, \a to must be properly aligned
913
+ * offset indicates the starting element in which to store and
914
+ * offset + n <= unpacket_traits::size */
915
+ template <typename Scalar, typename Packet>
916
+ EIGEN_DEVICE_FUNC inline void pstore_partial(Scalar* to, const Packet& from, const Index n, const Index offset = 0) {
917
+ const Index packet_size = unpacket_traits<Packet>::size;
918
+ eigen_assert(n + offset <= packet_size && "number of elements plus offset will write past end of packet");
919
+ EIGEN_ALIGN_MAX Scalar elements[packet_size];
920
+ pstore<Scalar>(elements, from);
921
+ for (Index i = 0; i < numext::mini(n, packet_size - offset); i++) {
922
+ to[i] = elements[i + offset];
923
+ }
924
+ }
287
925
 
288
926
  /** \internal copy the packet \a from to \a *to, (un-aligned store) */
289
- template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pstoreu(Scalar* to, const Packet& from)
290
- { (*to) = from; }
927
+ template <typename Scalar, typename Packet>
928
+ EIGEN_DEVICE_FUNC inline void pstoreu(Scalar* to, const Packet& from) {
929
+ (*to) = from;
930
+ }
291
931
 
292
- template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline Packet pgather(const Scalar* from, Index /*stride*/)
293
- { return ploadu<Packet>(from); }
932
+ /** \internal copy n elements of the packet \a from to \a *to, (un-aligned store) */
933
+ template <typename Scalar, typename Packet>
934
+ EIGEN_DEVICE_FUNC inline void pstoreu_partial(Scalar* to, const Packet& from, const Index n, const Index offset = 0) {
935
+ const Index packet_size = unpacket_traits<Packet>::size;
936
+ eigen_assert(n + offset <= packet_size && "number of elements plus offset will write past end of packet");
937
+ EIGEN_ALIGN_MAX Scalar elements[packet_size];
938
+ pstore<Scalar>(elements, from);
939
+ for (Index i = 0; i < numext::mini(n, packet_size - offset); i++) {
940
+ to[i] = elements[i + offset];
941
+ }
942
+ }
294
943
 
295
- template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pscatter(Scalar* to, const Packet& from, Index /*stride*/)
296
- { pstore(to, from); }
944
+ /** \internal copy the packet \a from to \a *to, (un-aligned store with a mask)
945
+ * There is no generic implementation. We only have implementations for specialized
946
+ * cases. Generic case should not be called.
947
+ */
948
+ template <typename Scalar, typename Packet>
949
+ EIGEN_DEVICE_FUNC inline std::enable_if_t<unpacket_traits<Packet>::masked_store_available, void> pstoreu(
950
+ Scalar* to, const Packet& from, typename unpacket_traits<Packet>::mask_t umask);
951
+
952
+ template <typename Scalar, typename Packet>
953
+ EIGEN_DEVICE_FUNC inline Packet pgather(const Scalar* from, Index /*stride*/) {
954
+ return ploadu<Packet>(from);
955
+ }
956
+
957
+ template <typename Scalar, typename Packet>
958
+ EIGEN_DEVICE_FUNC inline Packet pgather_partial(const Scalar* from, Index stride, const Index n) {
959
+ const Index packet_size = unpacket_traits<Packet>::size;
960
+ EIGEN_ALIGN_MAX Scalar elements[packet_size] = {Scalar(0)};
961
+ for (Index i = 0; i < numext::mini(n, packet_size); i++) {
962
+ elements[i] = from[i * stride];
963
+ }
964
+ return pload<Packet>(elements);
965
+ }
966
+
967
+ template <typename Scalar, typename Packet>
968
+ EIGEN_DEVICE_FUNC inline void pscatter(Scalar* to, const Packet& from, Index /*stride*/) {
969
+ pstore(to, from);
970
+ }
971
+
972
+ template <typename Scalar, typename Packet>
973
+ EIGEN_DEVICE_FUNC inline void pscatter_partial(Scalar* to, const Packet& from, Index stride, const Index n) {
974
+ const Index packet_size = unpacket_traits<Packet>::size;
975
+ EIGEN_ALIGN_MAX Scalar elements[packet_size];
976
+ pstore<Scalar>(elements, from);
977
+ for (Index i = 0; i < numext::mini(n, packet_size); i++) {
978
+ to[i * stride] = elements[i];
979
+ }
980
+ }
297
981
 
298
982
  /** \internal tries to do cache prefetching of \a addr */
299
- template<typename Scalar> EIGEN_DEVICE_FUNC inline void prefetch(const Scalar* addr)
300
- {
301
- #ifdef __CUDA_ARCH__
302
- #if defined(__LP64__)
983
+ template <typename Scalar>
984
+ EIGEN_DEVICE_FUNC inline void prefetch(const Scalar* addr) {
985
+ #if defined(EIGEN_HIP_DEVICE_COMPILE)
986
+ // do nothing
987
+ #elif defined(EIGEN_CUDA_ARCH)
988
+ #if defined(__LP64__) || EIGEN_OS_WIN64
303
989
  // 64-bit pointer operand constraint for inlined asm
304
990
  asm(" prefetch.L1 [ %1 ];" : "=l"(addr) : "l"(addr));
305
991
  #else
@@ -311,280 +997,706 @@ template<typename Scalar> EIGEN_DEVICE_FUNC inline void prefetch(const Scalar* a
311
997
  #endif
312
998
  }
313
999
 
314
- /** \internal \returns the first element of a packet */
315
- template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type pfirst(const Packet& a)
316
- { return a; }
317
-
318
- /** \internal \returns a packet where the element i contains the sum of the packet of \a vec[i] */
319
- template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
320
- preduxp(const Packet* vecs) { return vecs[0]; }
321
-
322
- /** \internal \returns the sum of the elements of \a a*/
323
- template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux(const Packet& a)
324
- { return a; }
325
-
326
- /** \internal \returns the sum of the elements of \a a by block of 4 elements.
327
- * For a packet {a0, a1, a2, a3, a4, a5, a6, a7}, it returns a half packet {a0+a4, a1+a5, a2+a6, a3+a7}
328
- * For packet-size smaller or equal to 4, this boils down to a noop.
329
- */
330
- template<typename Packet> EIGEN_DEVICE_FUNC inline
331
- typename conditional<(unpacket_traits<Packet>::size%8)==0,typename unpacket_traits<Packet>::half,Packet>::type
332
- predux_downto4(const Packet& a)
333
- { return a; }
334
-
335
- /** \internal \returns the product of the elements of \a a*/
336
- template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_mul(const Packet& a)
337
- { return a; }
338
-
339
- /** \internal \returns the min of the elements of \a a*/
340
- template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_min(const Packet& a)
341
- { return a; }
342
-
343
- /** \internal \returns the max of the elements of \a a*/
344
- template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_max(const Packet& a)
345
- { return a; }
346
-
347
1000
  /** \internal \returns the reversed elements of \a a*/
348
- template<typename Packet> EIGEN_DEVICE_FUNC inline Packet preverse(const Packet& a)
349
- { return a; }
1001
+ template <typename Packet>
1002
+ EIGEN_DEVICE_FUNC inline Packet preverse(const Packet& a) {
1003
+ return a;
1004
+ }
350
1005
 
351
1006
  /** \internal \returns \a a with real and imaginary part flipped (for complex type only) */
352
- template<typename Packet> EIGEN_DEVICE_FUNC inline Packet pcplxflip(const Packet& a)
353
- {
354
- return Packet(a.imag(),a.real());
1007
+ template <typename Packet>
1008
+ EIGEN_DEVICE_FUNC inline Packet pcplxflip(const Packet& a) {
1009
+ return Packet(numext::imag(a), numext::real(a));
355
1010
  }
356
1011
 
357
1012
  /**************************
358
- * Special math functions
359
- ***************************/
1013
+ * Special math functions
1014
+ ***************************/
1015
+
1016
+ /** \internal \returns isnan(a) */
1017
+ template <typename Packet>
1018
+ EIGEN_DEVICE_FUNC inline Packet pisnan(const Packet& a) {
1019
+ return pandnot(ptrue(a), pcmp_eq(a, a));
1020
+ }
1021
+
1022
+ /** \internal \returns isinf(a) */
1023
+ template <typename Packet>
1024
+ EIGEN_DEVICE_FUNC inline Packet pisinf(const Packet& a) {
1025
+ using Scalar = typename unpacket_traits<Packet>::type;
1026
+ constexpr Scalar inf = NumTraits<Scalar>::infinity();
1027
+ return pcmp_eq(pabs(a), pset1<Packet>(inf));
1028
+ }
360
1029
 
361
1030
  /** \internal \returns the sine of \a a (coeff-wise) */
362
- template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
363
- Packet psin(const Packet& a) { using std::sin; return sin(a); }
1031
+ template <typename Packet>
1032
+ EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet psin(const Packet& a) {
1033
+ EIGEN_USING_STD(sin);
1034
+ return sin(a);
1035
+ }
364
1036
 
365
1037
  /** \internal \returns the cosine of \a a (coeff-wise) */
366
- template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
367
- Packet pcos(const Packet& a) { using std::cos; return cos(a); }
1038
+ template <typename Packet>
1039
+ EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pcos(const Packet& a) {
1040
+ EIGEN_USING_STD(cos);
1041
+ return cos(a);
1042
+ }
368
1043
 
369
1044
  /** \internal \returns the tan of \a a (coeff-wise) */
370
- template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
371
- Packet ptan(const Packet& a) { using std::tan; return tan(a); }
1045
+ template <typename Packet>
1046
+ EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet ptan(const Packet& a) {
1047
+ EIGEN_USING_STD(tan);
1048
+ return tan(a);
1049
+ }
372
1050
 
373
1051
  /** \internal \returns the arc sine of \a a (coeff-wise) */
374
- template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
375
- Packet pasin(const Packet& a) { using std::asin; return asin(a); }
1052
+ template <typename Packet>
1053
+ EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pasin(const Packet& a) {
1054
+ EIGEN_USING_STD(asin);
1055
+ return asin(a);
1056
+ }
376
1057
 
377
1058
  /** \internal \returns the arc cosine of \a a (coeff-wise) */
378
- template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
379
- Packet pacos(const Packet& a) { using std::acos; return acos(a); }
380
-
381
- /** \internal \returns the arc tangent of \a a (coeff-wise) */
382
- template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
383
- Packet patan(const Packet& a) { using std::atan; return atan(a); }
1059
+ template <typename Packet>
1060
+ EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pacos(const Packet& a) {
1061
+ EIGEN_USING_STD(acos);
1062
+ return acos(a);
1063
+ }
384
1064
 
385
1065
  /** \internal \returns the hyperbolic sine of \a a (coeff-wise) */
386
- template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
387
- Packet psinh(const Packet& a) { using std::sinh; return sinh(a); }
1066
+ template <typename Packet>
1067
+ EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet psinh(const Packet& a) {
1068
+ EIGEN_USING_STD(sinh);
1069
+ return sinh(a);
1070
+ }
388
1071
 
389
1072
  /** \internal \returns the hyperbolic cosine of \a a (coeff-wise) */
390
- template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
391
- Packet pcosh(const Packet& a) { using std::cosh; return cosh(a); }
1073
+ template <typename Packet>
1074
+ EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pcosh(const Packet& a) {
1075
+ EIGEN_USING_STD(cosh);
1076
+ return cosh(a);
1077
+ }
1078
+
1079
+ /** \internal \returns the arc tangent of \a a (coeff-wise) */
1080
+ template <typename Packet>
1081
+ EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet patan(const Packet& a) {
1082
+ EIGEN_USING_STD(atan);
1083
+ return atan(a);
1084
+ }
392
1085
 
393
1086
  /** \internal \returns the hyperbolic tan of \a a (coeff-wise) */
394
- template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
395
- Packet ptanh(const Packet& a) { using std::tanh; return tanh(a); }
1087
+ template <typename Packet>
1088
+ EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet ptanh(const Packet& a) {
1089
+ EIGEN_USING_STD(tanh);
1090
+ return tanh(a);
1091
+ }
1092
+
1093
+ /** \internal \returns the arc tangent of \a a (coeff-wise) */
1094
+ template <typename Packet>
1095
+ EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet patanh(const Packet& a) {
1096
+ EIGEN_USING_STD(atanh);
1097
+ return atanh(a);
1098
+ }
396
1099
 
397
1100
  /** \internal \returns the exp of \a a (coeff-wise) */
398
- template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
399
- Packet pexp(const Packet& a) { using std::exp; return exp(a); }
1101
+ template <typename Packet>
1102
+ EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pexp(const Packet& a) {
1103
+ return numext::exp(a);
1104
+ }
1105
+
1106
+ /** \internal \returns the exp2 of \a a (coeff-wise) */
1107
+ template <typename Packet>
1108
+ EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pexp2(const Packet& a) {
1109
+ return numext::exp2(a);
1110
+ }
1111
+
1112
+ /** \internal \returns the expm1 of \a a (coeff-wise) */
1113
+ template <typename Packet>
1114
+ EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pexpm1(const Packet& a) {
1115
+ return numext::expm1(a);
1116
+ }
400
1117
 
401
1118
  /** \internal \returns the log of \a a (coeff-wise) */
402
- template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
403
- Packet plog(const Packet& a) { using std::log; return log(a); }
1119
+ template <typename Packet>
1120
+ EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plog(const Packet& a) {
1121
+ EIGEN_USING_STD(log);
1122
+ return log(a);
1123
+ }
404
1124
 
405
1125
  /** \internal \returns the log1p of \a a (coeff-wise) */
406
- template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
407
- Packet plog1p(const Packet& a) { return numext::log1p(a); }
1126
+ template <typename Packet>
1127
+ EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plog1p(const Packet& a) {
1128
+ return numext::log1p(a);
1129
+ }
408
1130
 
409
1131
  /** \internal \returns the log10 of \a a (coeff-wise) */
410
- template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
411
- Packet plog10(const Packet& a) { using std::log10; return log10(a); }
1132
+ template <typename Packet>
1133
+ EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plog10(const Packet& a) {
1134
+ EIGEN_USING_STD(log10);
1135
+ return log10(a);
1136
+ }
1137
+
1138
+ /** \internal \returns the log2 of \a a (coeff-wise) */
1139
+ template <typename Packet>
1140
+ EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plog2(const Packet& a) {
1141
+ using Scalar = typename internal::unpacket_traits<Packet>::type;
1142
+ using RealScalar = typename NumTraits<Scalar>::Real;
1143
+ return pmul(pset1<Packet>(Scalar(RealScalar(EIGEN_LOG2E))), plog(a));
1144
+ }
412
1145
 
413
1146
  /** \internal \returns the square-root of \a a (coeff-wise) */
414
- template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
415
- Packet psqrt(const Packet& a) { using std::sqrt; return sqrt(a); }
1147
+ template <typename Packet>
1148
+ EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet psqrt(const Packet& a) {
1149
+ return numext::sqrt(a);
1150
+ }
416
1151
 
417
- /** \internal \returns the reciprocal square-root of \a a (coeff-wise) */
418
- template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
419
- Packet prsqrt(const Packet& a) {
420
- return pdiv(pset1<Packet>(1), psqrt(a));
1152
+ /** \internal \returns the cube-root of \a a (coeff-wise) */
1153
+ template <typename Packet>
1154
+ EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pcbrt(const Packet& a) {
1155
+ return numext::cbrt(a);
421
1156
  }
422
1157
 
1158
+ template <typename Packet, bool IsScalar = is_scalar<Packet>::value,
1159
+ bool IsInteger = NumTraits<typename unpacket_traits<Packet>::type>::IsInteger>
1160
+ struct nearest_integer_packetop_impl {
1161
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run_floor(const Packet& x) { return numext::floor(x); }
1162
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run_ceil(const Packet& x) { return numext::ceil(x); }
1163
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run_rint(const Packet& x) { return numext::rint(x); }
1164
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run_round(const Packet& x) { return numext::round(x); }
1165
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run_trunc(const Packet& x) { return numext::trunc(x); }
1166
+ };
1167
+
423
1168
  /** \internal \returns the rounded value of \a a (coeff-wise) */
424
- template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
425
- Packet pround(const Packet& a) { using numext::round; return round(a); }
1169
+ template <typename Packet>
1170
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet pround(const Packet& a) {
1171
+ return nearest_integer_packetop_impl<Packet>::run_round(a);
1172
+ }
426
1173
 
427
1174
  /** \internal \returns the floor of \a a (coeff-wise) */
428
- template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
429
- Packet pfloor(const Packet& a) { using numext::floor; return floor(a); }
1175
+ template <typename Packet>
1176
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet pfloor(const Packet& a) {
1177
+ return nearest_integer_packetop_impl<Packet>::run_floor(a);
1178
+ }
1179
+
1180
+ /** \internal \returns the rounded value of \a a (coeff-wise) with current
1181
+ * rounding mode */
1182
+ template <typename Packet>
1183
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet print(const Packet& a) {
1184
+ return nearest_integer_packetop_impl<Packet>::run_rint(a);
1185
+ }
430
1186
 
431
1187
  /** \internal \returns the ceil of \a a (coeff-wise) */
432
- template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
433
- Packet pceil(const Packet& a) { using numext::ceil; return ceil(a); }
1188
+ template <typename Packet>
1189
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet pceil(const Packet& a) {
1190
+ return nearest_integer_packetop_impl<Packet>::run_ceil(a);
1191
+ }
434
1192
 
435
- /***************************************************************************
436
- * The following functions might not have to be overwritten for vectorized types
437
- ***************************************************************************/
438
-
439
- /** \internal copy a packet with constant coeficient \a a (e.g., [a,a,a,a]) to \a *to. \a to must be 16 bytes aligned */
440
- // NOTE: this function must really be templated on the packet type (think about different packet types for the same scalar type)
441
- template<typename Packet>
442
- inline void pstore1(typename unpacket_traits<Packet>::type* to, const typename unpacket_traits<Packet>::type& a)
443
- {
444
- pstore(to, pset1<Packet>(a));
1193
+ /** \internal \returns the truncation of \a a (coeff-wise) */
1194
+ template <typename Packet>
1195
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet ptrunc(const Packet& a) {
1196
+ return nearest_integer_packetop_impl<Packet>::run_trunc(a);
1197
+ }
1198
+
1199
+ template <typename Packet, typename EnableIf = void>
1200
+ struct psign_impl {
1201
+ static EIGEN_DEVICE_FUNC inline Packet run(const Packet& a) { return numext::sign(a); }
1202
+ };
1203
+
1204
+ /** \internal \returns the sign of \a a (coeff-wise) */
1205
+ template <typename Packet>
1206
+ EIGEN_DEVICE_FUNC inline Packet psign(const Packet& a) {
1207
+ return psign_impl<Packet>::run(a);
445
1208
  }
446
1209
 
1210
+ template <>
1211
+ EIGEN_DEVICE_FUNC inline bool psign(const bool& a) {
1212
+ return a;
1213
+ }
1214
+
1215
+ /** \internal \returns the first element of a packet */
1216
+ template <typename Packet>
1217
+ EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type pfirst(const Packet& a) {
1218
+ return a;
1219
+ }
1220
+
1221
+ /** \internal \returns the sum of the elements of upper and lower half of \a a if \a a is larger than 4.
1222
+ * For a packet {a0, a1, a2, a3, a4, a5, a6, a7}, it returns a half packet {a0+a4, a1+a5, a2+a6, a3+a7}
1223
+ * For packet-size smaller or equal to 4, this boils down to a noop.
1224
+ */
1225
+ template <typename Packet>
1226
+ EIGEN_DEVICE_FUNC inline std::conditional_t<(unpacket_traits<Packet>::size % 8) == 0,
1227
+ typename unpacket_traits<Packet>::half, Packet>
1228
+ predux_half_dowto4(const Packet& a) {
1229
+ return a;
1230
+ }
1231
+
1232
+ // Slow generic implementation of Packet reduction.
1233
+ template <typename Packet, typename Op>
1234
+ EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_helper(const Packet& a, Op op) {
1235
+ typedef typename unpacket_traits<Packet>::type Scalar;
1236
+ const size_t n = unpacket_traits<Packet>::size;
1237
+ EIGEN_ALIGN_TO_BOUNDARY(sizeof(Packet)) Scalar elements[n];
1238
+ pstoreu<Scalar>(elements, a);
1239
+ for (size_t k = n / 2; k > 0; k /= 2) {
1240
+ for (size_t i = 0; i < k; ++i) {
1241
+ elements[i] = op(elements[i], elements[i + k]);
1242
+ }
1243
+ }
1244
+ return elements[0];
1245
+ }
1246
+
1247
+ /** \internal \returns the sum of the elements of \a a*/
1248
+ template <typename Packet>
1249
+ EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux(const Packet& a) {
1250
+ return a;
1251
+ }
1252
+
1253
+ /** \internal \returns the product of the elements of \a a */
1254
+ template <typename Packet>
1255
+ EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_mul(const Packet& a) {
1256
+ typedef typename unpacket_traits<Packet>::type Scalar;
1257
+ return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmul<Scalar>)));
1258
+ }
1259
+
1260
+ /** \internal \returns the min of the elements of \a a */
1261
+ template <typename Packet>
1262
+ EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_min(const Packet& a) {
1263
+ typedef typename unpacket_traits<Packet>::type Scalar;
1264
+ return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmin<Scalar>)));
1265
+ }
1266
+
1267
+ /** \internal \returns the max of the elements of \a a */
1268
+ template <typename Packet>
1269
+ EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_max(const Packet& a) {
1270
+ typedef typename unpacket_traits<Packet>::type Scalar;
1271
+ return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmax<Scalar>)));
1272
+ }
1273
+
1274
+ template <int NaNPropagation, typename Packet>
1275
+ struct predux_min_max_helper_impl {
1276
+ using Scalar = typename unpacket_traits<Packet>::type;
1277
+ static constexpr bool UsePredux_ = NaNPropagation == PropagateFast || NumTraits<Scalar>::IsInteger;
1278
+ template <bool UsePredux = UsePredux_, std::enable_if_t<!UsePredux, bool> = true>
1279
+ static EIGEN_DEVICE_FUNC inline Scalar run_min(const Packet& a) {
1280
+ return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmin<NaNPropagation, Scalar>)));
1281
+ }
1282
+ template <bool UsePredux = UsePredux_, std::enable_if_t<!UsePredux, bool> = true>
1283
+ static EIGEN_DEVICE_FUNC inline Scalar run_max(const Packet& a) {
1284
+ return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmax<NaNPropagation, Scalar>)));
1285
+ }
1286
+ template <bool UsePredux = UsePredux_, std::enable_if_t<UsePredux, bool> = true>
1287
+ static EIGEN_DEVICE_FUNC inline Scalar run_min(const Packet& a) {
1288
+ return predux_min(a);
1289
+ }
1290
+ template <bool UsePredux = UsePredux_, std::enable_if_t<UsePredux, bool> = true>
1291
+ static EIGEN_DEVICE_FUNC inline Scalar run_max(const Packet& a) {
1292
+ return predux_max(a);
1293
+ }
1294
+ };
1295
+
1296
+ template <int NaNPropagation, typename Packet>
1297
+ EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_min(const Packet& a) {
1298
+ return predux_min_max_helper_impl<NaNPropagation, Packet>::run_min(a);
1299
+ }
1300
+
1301
+ template <int NaNPropagation, typename Packet>
1302
+ EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_max(const Packet& a) {
1303
+ return predux_min_max_helper_impl<NaNPropagation, Packet>::run_max(a);
1304
+ }
1305
+
1306
+ #undef EIGEN_BINARY_OP_NAN_PROPAGATION
1307
+
1308
+ /** \internal \returns true if all coeffs of \a a means "true"
1309
+ * It is supposed to be called on values returned by pcmp_*.
1310
+ */
1311
+ // not needed yet
1312
+ // template<typename Packet> EIGEN_DEVICE_FUNC inline bool predux_all(const Packet& a)
1313
+ // { return bool(a); }
1314
+
1315
+ /** \internal \returns true if any coeffs of \a a means "true"
1316
+ * It is supposed to be called on values returned by pcmp_*.
1317
+ */
1318
+ template <typename Packet>
1319
+ EIGEN_DEVICE_FUNC inline bool predux_any(const Packet& a) {
1320
+ // Dirty but generic implementation where "true" is assumed to be non 0 and all the sames.
1321
+ // It is expected that "true" is either:
1322
+ // - Scalar(1)
1323
+ // - bits full of ones (NaN for floats),
1324
+ // - or first bit equals to 1 (1 for ints, smallest denormal for floats).
1325
+ // For all these cases, taking the sum is just fine, and this boils down to a no-op for scalars.
1326
+ typedef typename unpacket_traits<Packet>::type Scalar;
1327
+ return numext::not_equal_strict(predux(a), Scalar(0));
1328
+ }
1329
+
1330
+ /***************************************************************************
1331
+ * The following functions might not have to be overwritten for vectorized types
1332
+ ***************************************************************************/
1333
+
1334
+ template <typename Packet, typename EnableIf = void>
1335
+ struct pmadd_impl {
1336
+ static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet pmadd(const Packet& a, const Packet& b, const Packet& c) {
1337
+ return padd(pmul(a, b), c);
1338
+ }
1339
+ static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet pmsub(const Packet& a, const Packet& b, const Packet& c) {
1340
+ return psub(pmul(a, b), c);
1341
+ }
1342
+ static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet pnmadd(const Packet& a, const Packet& b, const Packet& c) {
1343
+ return psub(c, pmul(a, b));
1344
+ }
1345
+ static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet pnmsub(const Packet& a, const Packet& b, const Packet& c) {
1346
+ return pnegate(pmadd(a, b, c));
1347
+ }
1348
+ };
1349
+
1350
+ template <typename Scalar>
1351
+ struct pmadd_impl<Scalar, std::enable_if_t<is_scalar<Scalar>::value && NumTraits<Scalar>::IsSigned>> {
1352
+ static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar pmadd(const Scalar& a, const Scalar& b, const Scalar& c) {
1353
+ return numext::madd<Scalar>(a, b, c);
1354
+ }
1355
+ static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar pmsub(const Scalar& a, const Scalar& b, const Scalar& c) {
1356
+ return numext::madd<Scalar>(a, b, Scalar(-c));
1357
+ }
1358
+ static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar pnmadd(const Scalar& a, const Scalar& b, const Scalar& c) {
1359
+ return numext::madd<Scalar>(Scalar(-a), b, c);
1360
+ }
1361
+ static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar pnmsub(const Scalar& a, const Scalar& b, const Scalar& c) {
1362
+ return -Scalar(numext::madd<Scalar>(a, b, c));
1363
+ }
1364
+ };
1365
+
1366
+ // Multiply-add instructions.
447
1367
  /** \internal \returns a * b + c (coeff-wise) */
448
- template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
449
- pmadd(const Packet& a,
450
- const Packet& b,
451
- const Packet& c)
452
- { return padd(pmul(a, b),c); }
1368
+ template <typename Packet>
1369
+ EIGEN_DEVICE_FUNC inline Packet pmadd(const Packet& a, const Packet& b, const Packet& c) {
1370
+ return pmadd_impl<Packet>::pmadd(a, b, c);
1371
+ }
1372
+
1373
+ /** \internal \returns a * b - c (coeff-wise) */
1374
+ template <typename Packet>
1375
+ EIGEN_DEVICE_FUNC inline Packet pmsub(const Packet& a, const Packet& b, const Packet& c) {
1376
+ return pmadd_impl<Packet>::pmsub(a, b, c);
1377
+ }
1378
+
1379
+ /** \internal \returns -(a * b) + c (coeff-wise) */
1380
+ template <typename Packet>
1381
+ EIGEN_DEVICE_FUNC inline Packet pnmadd(const Packet& a, const Packet& b, const Packet& c) {
1382
+ return pmadd_impl<Packet>::pnmadd(a, b, c);
1383
+ }
1384
+
1385
+ /** \internal \returns -((a * b + c) (coeff-wise) */
1386
+ template <typename Packet>
1387
+ EIGEN_DEVICE_FUNC inline Packet pnmsub(const Packet& a, const Packet& b, const Packet& c) {
1388
+ return pmadd_impl<Packet>::pnmsub(a, b, c);
1389
+ }
1390
+
1391
+ /** \internal copy a packet with constant coefficient \a a (e.g., [a,a,a,a]) to \a *to. \a to must be 16 bytes aligned
1392
+ */
1393
+ // NOTE: this function must really be templated on the packet type (think about different packet types for the same
1394
+ // scalar type)
1395
+ template <typename Packet>
1396
+ inline void pstore1(typename unpacket_traits<Packet>::type* to, const typename unpacket_traits<Packet>::type& a) {
1397
+ pstore(to, pset1<Packet>(a));
1398
+ }
453
1399
 
454
1400
  /** \internal \returns a packet version of \a *from.
455
- * The pointer \a from must be aligned on a \a Alignment bytes boundary. */
456
- template<typename Packet, int Alignment>
457
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet ploadt(const typename unpacket_traits<Packet>::type* from)
458
- {
459
- if(Alignment >= unpacket_traits<Packet>::alignment)
1401
+ * The pointer \a from must be aligned on a \a Alignment bytes boundary. */
1402
+ template <typename Packet, int Alignment>
1403
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet ploadt(const typename unpacket_traits<Packet>::type* from) {
1404
+ if (Alignment >= unpacket_traits<Packet>::alignment)
460
1405
  return pload<Packet>(from);
461
1406
  else
462
1407
  return ploadu<Packet>(from);
463
1408
  }
464
1409
 
1410
+ /** \internal \returns n elements of a packet version of \a *from.
1411
+ * The pointer \a from must be aligned on a \a Alignment bytes boundary. */
1412
+ template <typename Packet, int Alignment>
1413
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet ploadt_partial(const typename unpacket_traits<Packet>::type* from,
1414
+ const Index n, const Index offset = 0) {
1415
+ if (Alignment >= unpacket_traits<Packet>::alignment)
1416
+ return pload_partial<Packet>(from, n, offset);
1417
+ else
1418
+ return ploadu_partial<Packet>(from, n, offset);
1419
+ }
1420
+
465
1421
  /** \internal copy the packet \a from to \a *to.
466
- * The pointer \a from must be aligned on a \a Alignment bytes boundary. */
467
- template<typename Scalar, typename Packet, int Alignment>
468
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstoret(Scalar* to, const Packet& from)
469
- {
470
- if(Alignment >= unpacket_traits<Packet>::alignment)
1422
+ * The pointer \a from must be aligned on a \a Alignment bytes boundary. */
1423
+ template <typename Scalar, typename Packet, int Alignment>
1424
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstoret(Scalar* to, const Packet& from) {
1425
+ if (Alignment >= unpacket_traits<Packet>::alignment)
471
1426
  pstore(to, from);
472
1427
  else
473
1428
  pstoreu(to, from);
474
1429
  }
475
1430
 
476
- /** \internal \returns a packet version of \a *from.
477
- * Unlike ploadt, ploadt_ro takes advantage of the read-only memory path on the
478
- * hardware if available to speedup the loading of data that won't be modified
479
- * by the current computation.
480
- */
481
- template<typename Packet, int LoadMode>
482
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet ploadt_ro(const typename unpacket_traits<Packet>::type* from)
483
- {
484
- return ploadt<Packet, LoadMode>(from);
1431
+ /** \internal copy n elements of the packet \a from to \a *to.
1432
+ * The pointer \a from must be aligned on a \a Alignment bytes boundary. */
1433
+ template <typename Scalar, typename Packet, int Alignment>
1434
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstoret_partial(Scalar* to, const Packet& from, const Index n,
1435
+ const Index offset = 0) {
1436
+ if (Alignment >= unpacket_traits<Packet>::alignment)
1437
+ pstore_partial(to, from, n, offset);
1438
+ else
1439
+ pstoreu_partial(to, from, n, offset);
485
1440
  }
486
1441
 
487
- /** \internal default implementation of palign() allowing partial specialization */
488
- template<int Offset,typename PacketType>
489
- struct palign_impl
490
- {
491
- // by default data are aligned, so there is nothing to be done :)
492
- static inline void run(PacketType&, const PacketType&) {}
493
- };
494
-
495
- /** \internal update \a first using the concatenation of the packet_size minus \a Offset last elements
496
- * of \a first and \a Offset first elements of \a second.
497
- *
498
- * This function is currently only used to optimize matrix-vector products on unligned matrices.
499
- * It takes 2 packets that represent a contiguous memory array, and returns a packet starting
500
- * at the position \a Offset. For instance, for packets of 4 elements, we have:
501
- * Input:
502
- * - first = {f0,f1,f2,f3}
503
- * - second = {s0,s1,s2,s3}
504
- * Output:
505
- * - if Offset==0 then {f0,f1,f2,f3}
506
- * - if Offset==1 then {f1,f2,f3,s0}
507
- * - if Offset==2 then {f2,f3,s0,s1}
508
- * - if Offset==3 then {f3,s0,s1,s3}
509
- */
510
- template<int Offset,typename PacketType>
511
- inline void palign(PacketType& first, const PacketType& second)
512
- {
513
- palign_impl<Offset,PacketType>::run(first,second);
1442
+ /** \internal \returns a packet version of \a *from.
1443
+ * Unlike ploadt, ploadt_ro takes advantage of the read-only memory path on the
1444
+ * hardware if available to speedup the loading of data that won't be modified
1445
+ * by the current computation.
1446
+ */
1447
+ template <typename Packet, int LoadMode>
1448
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet ploadt_ro(const typename unpacket_traits<Packet>::type* from) {
1449
+ return ploadt<Packet, LoadMode>(from);
514
1450
  }
515
1451
 
516
1452
  /***************************************************************************
517
- * Fast complex products (GCC generates a function call which is very slow)
518
- ***************************************************************************/
1453
+ * Fast complex products (GCC generates a function call which is very slow)
1454
+ ***************************************************************************/
519
1455
 
520
1456
  // Eigen+CUDA does not support complexes.
521
- #ifndef __CUDACC__
1457
+ #if !defined(EIGEN_GPUCC)
522
1458
 
523
- template<> inline std::complex<float> pmul(const std::complex<float>& a, const std::complex<float>& b)
524
- { return std::complex<float>(a.real()*b.real() - a.imag()*b.imag(), a.imag()*b.real() + a.real()*b.imag()); }
1459
+ template <>
1460
+ inline std::complex<float> pmul(const std::complex<float>& a, const std::complex<float>& b) {
1461
+ return std::complex<float>(a.real() * b.real() - a.imag() * b.imag(), a.imag() * b.real() + a.real() * b.imag());
1462
+ }
525
1463
 
526
- template<> inline std::complex<double> pmul(const std::complex<double>& a, const std::complex<double>& b)
527
- { return std::complex<double>(a.real()*b.real() - a.imag()*b.imag(), a.imag()*b.real() + a.real()*b.imag()); }
1464
+ template <>
1465
+ inline std::complex<double> pmul(const std::complex<double>& a, const std::complex<double>& b) {
1466
+ return std::complex<double>(a.real() * b.real() - a.imag() * b.imag(), a.imag() * b.real() + a.real() * b.imag());
1467
+ }
528
1468
 
529
1469
  #endif
530
1470
 
531
-
532
1471
  /***************************************************************************
533
1472
  * PacketBlock, that is a collection of N packets where the number of words
534
1473
  * in the packet is a multiple of N.
535
- ***************************************************************************/
536
- template <typename Packet,int N=unpacket_traits<Packet>::size> struct PacketBlock {
1474
+ ***************************************************************************/
1475
+ template <typename Packet, int N = unpacket_traits<Packet>::size>
1476
+ struct PacketBlock {
537
1477
  Packet packet[N];
538
1478
  };
539
1479
 
540
- template<typename Packet> EIGEN_DEVICE_FUNC inline void
541
- ptranspose(PacketBlock<Packet,1>& /*kernel*/) {
1480
+ template <typename Packet>
1481
+ EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet, 1>& /*kernel*/) {
542
1482
  // Nothing to do in the scalar case, i.e. a 1x1 matrix.
543
1483
  }
544
1484
 
545
1485
  /***************************************************************************
546
1486
  * Selector, i.e. vector of N boolean values used to select (i.e. blend)
547
1487
  * words from 2 packets.
548
- ***************************************************************************/
549
- template <size_t N> struct Selector {
1488
+ ***************************************************************************/
1489
+ template <size_t N>
1490
+ struct Selector {
550
1491
  bool select[N];
551
1492
  };
552
1493
 
553
- template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
554
- pblend(const Selector<unpacket_traits<Packet>::size>& ifPacket, const Packet& thenPacket, const Packet& elsePacket) {
1494
+ template <typename Packet>
1495
+ EIGEN_DEVICE_FUNC inline Packet pblend(const Selector<unpacket_traits<Packet>::size>& ifPacket,
1496
+ const Packet& thenPacket, const Packet& elsePacket) {
555
1497
  return ifPacket.select[0] ? thenPacket : elsePacket;
556
1498
  }
557
1499
 
558
- /** \internal \returns \a a with the first coefficient replaced by the scalar b */
559
- template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
560
- pinsertfirst(const Packet& a, typename unpacket_traits<Packet>::type b)
561
- {
562
- // Default implementation based on pblend.
563
- // It must be specialized for higher performance.
564
- Selector<unpacket_traits<Packet>::size> mask;
565
- mask.select[0] = true;
566
- // This for loop should be optimized away by the compiler.
567
- for(Index i=1; i<unpacket_traits<Packet>::size; ++i)
568
- mask.select[i] = false;
569
- return pblend(mask, pset1<Packet>(b), a);
1500
+ /** \internal \returns 1 / a (coeff-wise) */
1501
+ template <typename Packet>
1502
+ EIGEN_DEVICE_FUNC inline Packet preciprocal(const Packet& a) {
1503
+ using Scalar = typename unpacket_traits<Packet>::type;
1504
+ return pdiv(pset1<Packet>(Scalar(1)), a);
1505
+ }
1506
+
1507
+ /** \internal \returns the reciprocal square-root of \a a (coeff-wise) */
1508
+ template <typename Packet>
1509
+ EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet prsqrt(const Packet& a) {
1510
+ return preciprocal<Packet>(psqrt(a));
1511
+ }
1512
+
1513
+ template <typename Packet, bool IsScalar = is_scalar<Packet>::value,
1514
+ bool IsInteger = NumTraits<typename unpacket_traits<Packet>::type>::IsInteger>
1515
+ struct psignbit_impl;
1516
+ template <typename Packet, bool IsInteger>
1517
+ struct psignbit_impl<Packet, true, IsInteger> {
1518
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static constexpr Packet run(const Packet& a) { return numext::signbit(a); }
1519
+ };
1520
+ template <typename Packet>
1521
+ struct psignbit_impl<Packet, false, false> {
1522
+ // generic implementation if not specialized in PacketMath.h
1523
+ // slower than arithmetic shift
1524
+ typedef typename unpacket_traits<Packet>::type Scalar;
1525
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static Packet run(const Packet& a) {
1526
+ const Packet cst_pos_one = pset1<Packet>(Scalar(1));
1527
+ const Packet cst_neg_one = pset1<Packet>(Scalar(-1));
1528
+ return pcmp_eq(por(pand(a, cst_neg_one), cst_pos_one), cst_neg_one);
1529
+ }
1530
+ };
1531
+ template <typename Packet>
1532
+ struct psignbit_impl<Packet, false, true> {
1533
+ // generic implementation for integer packets
1534
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static constexpr Packet run(const Packet& a) { return pcmp_lt(a, pzero(a)); }
1535
+ };
1536
+ /** \internal \returns the sign bit of \a a as a bitmask*/
1537
+ template <typename Packet>
1538
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE constexpr Packet psignbit(const Packet& a) {
1539
+ return psignbit_impl<Packet>::run(a);
1540
+ }
1541
+
1542
+ /** \internal \returns the 2-argument arc tangent of \a y and \a x (coeff-wise) */
1543
+ template <typename Packet, std::enable_if_t<is_scalar<Packet>::value, int> = 0>
1544
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet patan2(const Packet& y, const Packet& x) {
1545
+ return numext::atan2(y, x);
1546
+ }
1547
+
1548
+ /** \internal \returns the 2-argument arc tangent of \a y and \a x (coeff-wise) */
1549
+ template <typename Packet, std::enable_if_t<!is_scalar<Packet>::value, int> = 0>
1550
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet patan2(const Packet& y, const Packet& x) {
1551
+ typedef typename internal::unpacket_traits<Packet>::type Scalar;
1552
+
1553
+ // See https://en.cppreference.com/w/cpp/numeric/math/atan2
1554
+ // for how corner cases are supposed to be handled according to the
1555
+ // IEEE floating-point standard (IEC 60559).
1556
+ const Packet kSignMask = pset1<Packet>(-Scalar(0));
1557
+ const Packet kZero = pzero(x);
1558
+ const Packet kOne = pset1<Packet>(Scalar(1));
1559
+ const Packet kPi = pset1<Packet>(Scalar(EIGEN_PI));
1560
+
1561
+ const Packet x_has_signbit = psignbit(x);
1562
+ const Packet y_signmask = pand(y, kSignMask);
1563
+ const Packet x_signmask = pand(x, kSignMask);
1564
+ const Packet result_signmask = pxor(y_signmask, x_signmask);
1565
+ const Packet shift = por(pand(x_has_signbit, kPi), y_signmask);
1566
+
1567
+ const Packet x_and_y_are_same = pcmp_eq(pabs(x), pabs(y));
1568
+ const Packet x_and_y_are_zero = pcmp_eq(por(x, y), kZero);
1569
+
1570
+ Packet arg = pdiv(y, x);
1571
+ arg = pselect(x_and_y_are_same, por(kOne, result_signmask), arg);
1572
+ arg = pselect(x_and_y_are_zero, result_signmask, arg);
1573
+
1574
+ Packet result = patan(arg);
1575
+ result = padd(result, shift);
1576
+ return result;
1577
+ }
1578
+
1579
+ /** \internal \returns the argument of \a a as a complex number */
1580
+ template <typename Packet, std::enable_if_t<is_scalar<Packet>::value, int> = 0>
1581
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet pcarg(const Packet& a) {
1582
+ return Packet(numext::arg(a));
1583
+ }
1584
+
1585
+ /** \internal \returns the argument of \a a as a complex number */
1586
+ template <typename Packet, std::enable_if_t<!is_scalar<Packet>::value, int> = 0>
1587
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet pcarg(const Packet& a) {
1588
+ EIGEN_STATIC_ASSERT(NumTraits<typename unpacket_traits<Packet>::type>::IsComplex,
1589
+ THIS METHOD IS FOR COMPLEX TYPES ONLY)
1590
+ using RealPacket = typename unpacket_traits<Packet>::as_real;
1591
+ // a // r i r i ...
1592
+ RealPacket aflip = pcplxflip(a).v; // i r i r ...
1593
+ RealPacket result = patan2(aflip, a.v); // atan2 crap atan2 crap ...
1594
+ return (Packet)pand(result, peven_mask(result)); // atan2 0 atan2 0 ...
1595
+ }
1596
+
1597
+ /** \internal \returns a packet populated with values in the range [begin, begin + count). Elements
1598
+ * outside this range are not defined. \a *from does not need to be aligned, and can be null if \a count is zero.*/
1599
+ template <typename Packet>
1600
+ EIGEN_DEVICE_FUNC inline Packet ploaduSegment(const typename unpacket_traits<Packet>::type* from, Index begin,
1601
+ Index count) {
1602
+ using Scalar = typename unpacket_traits<Packet>::type;
1603
+ constexpr Index PacketSize = unpacket_traits<Packet>::size;
1604
+ eigen_assert((begin >= 0 && count >= 0 && begin + count <= PacketSize) && "invalid range");
1605
+ Scalar aux[PacketSize] = {};
1606
+ for (Index k = begin; k < begin + count; k++) {
1607
+ aux[k] = from[k];
1608
+ }
1609
+ return ploadu<Packet>(aux);
1610
+ }
1611
+
1612
+ /** \internal \returns a packet populated with values in the range [begin, begin + count). Elements
1613
+ * outside this range are not defined. \a *from must be aligned, and cannot be null.*/
1614
+ template <typename Packet>
1615
+ EIGEN_DEVICE_FUNC inline Packet ploadSegment(const typename unpacket_traits<Packet>::type* from, Index begin,
1616
+ Index count) {
1617
+ return ploaduSegment<Packet>(from, begin, count);
1618
+ }
1619
+
1620
+ /** \internal copy the packet \a from in the range [begin, begin + count) to \a *to.
1621
+ Elements outside of the range [begin, begin + count) are not defined. \a *to does not need to be aligned, and can be
1622
+ null if \a count is zero.*/
1623
+ template <typename Scalar, typename Packet>
1624
+ EIGEN_DEVICE_FUNC inline void pstoreuSegment(Scalar* to, const Packet& from, Index begin, Index count) {
1625
+ constexpr Index PacketSize = unpacket_traits<Packet>::size;
1626
+ eigen_assert((begin >= 0 && count >= 0 && begin + count <= PacketSize) && "invalid range");
1627
+ Scalar aux[PacketSize];
1628
+ pstoreu<Scalar, Packet>(aux, from);
1629
+ for (Index k = begin; k < begin + count; k++) {
1630
+ to[k] = aux[k];
1631
+ }
1632
+ }
1633
+
1634
+ /** \internal copy the packet \a from in the range [begin, begin + count) to \a *to.
1635
+ Elements outside of the range [begin, begin + count) are not defined. \a *to must be aligned, and cannot be
1636
+ null.*/
1637
+ template <typename Scalar, typename Packet>
1638
+ EIGEN_DEVICE_FUNC inline void pstoreSegment(Scalar* to, const Packet& from, Index begin, Index count) {
1639
+ return pstoreuSegment(to, from, begin, count);
1640
+ }
1641
+
1642
+ /** \internal \returns a packet populated with values in the range [begin, begin + count). Elements
1643
+ * outside this range are not defined.*/
1644
+ template <typename Packet, int Alignment>
1645
+ EIGEN_DEVICE_FUNC inline Packet ploadtSegment(const typename unpacket_traits<Packet>::type* from, Index begin,
1646
+ Index count) {
1647
+ constexpr int RequiredAlignment = unpacket_traits<Packet>::alignment;
1648
+ if (Alignment >= RequiredAlignment) {
1649
+ return ploadSegment<Packet>(from, begin, count);
1650
+ } else {
1651
+ return ploaduSegment<Packet>(from, begin, count);
1652
+ }
570
1653
  }
571
1654
 
572
- /** \internal \returns \a a with the last coefficient replaced by the scalar b */
573
- template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
574
- pinsertlast(const Packet& a, typename unpacket_traits<Packet>::type b)
575
- {
576
- // Default implementation based on pblend.
577
- // It must be specialized for higher performance.
578
- Selector<unpacket_traits<Packet>::size> mask;
579
- // This for loop should be optimized away by the compiler.
580
- for(Index i=0; i<unpacket_traits<Packet>::size-1; ++i)
581
- mask.select[i] = false;
582
- mask.select[unpacket_traits<Packet>::size-1] = true;
583
- return pblend(mask, pset1<Packet>(b), a);
1655
+ /** \internal copy the packet \a from in the range [begin, begin + count) to \a *to.
1656
+ Elements outside of the range [begin, begin + count) are not defined.*/
1657
+ template <typename Scalar, typename Packet, int Alignment>
1658
+ EIGEN_DEVICE_FUNC inline void pstoretSegment(Scalar* to, const Packet& from, Index begin, Index count) {
1659
+ constexpr int RequiredAlignment = unpacket_traits<Packet>::alignment;
1660
+ if (Alignment >= RequiredAlignment) {
1661
+ pstoreSegment<Scalar, Packet>(to, from, begin, count);
1662
+ } else {
1663
+ pstoreuSegment<Scalar, Packet>(to, from, begin, count);
1664
+ }
584
1665
  }
585
1666
 
586
- } // end namespace internal
1667
+ #ifndef EIGEN_NO_IO
1668
+
1669
+ template <typename Packet>
1670
+ class StreamablePacket {
1671
+ public:
1672
+ using Scalar = typename unpacket_traits<Packet>::type;
1673
+ StreamablePacket(const Packet& packet) { pstoreu(v_, packet); }
1674
+
1675
+ friend std::ostream& operator<<(std::ostream& os, const StreamablePacket& packet) {
1676
+ os << "{" << packet.v_[0];
1677
+ for (int i = 1; i < unpacket_traits<Packet>::size; ++i) {
1678
+ os << "," << packet.v_[i];
1679
+ }
1680
+ os << "}";
1681
+ return os;
1682
+ }
1683
+
1684
+ private:
1685
+ Scalar v_[unpacket_traits<Packet>::size];
1686
+ };
1687
+
1688
+ /**
1689
+ * \internal \returns an intermediary that can be used to ostream packets, e.g. for debugging.
1690
+ */
1691
+ template <typename Packet>
1692
+ StreamablePacket<Packet> postream(const Packet& packet) {
1693
+ return StreamablePacket<Packet>(packet);
1694
+ }
1695
+
1696
+ #endif // EIGEN_NO_IO
1697
+
1698
+ } // end namespace internal
587
1699
 
588
- } // end namespace Eigen
1700
+ } // end namespace Eigen
589
1701
 
590
- #endif // EIGEN_GENERIC_PACKET_MATH_H
1702
+ #endif // EIGEN_GENERIC_PACKET_MATH_H