@smake/eigen 1.0.1 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (283) hide show
  1. package/README.md +1 -1
  2. package/eigen/COPYING.APACHE +203 -0
  3. package/eigen/COPYING.BSD +1 -1
  4. package/eigen/COPYING.MINPACK +51 -52
  5. package/eigen/Eigen/Cholesky +0 -1
  6. package/eigen/Eigen/Core +108 -266
  7. package/eigen/Eigen/Eigenvalues +0 -1
  8. package/eigen/Eigen/Geometry +3 -6
  9. package/eigen/Eigen/Householder +0 -1
  10. package/eigen/Eigen/Jacobi +0 -1
  11. package/eigen/Eigen/KLUSupport +41 -0
  12. package/eigen/Eigen/LU +2 -5
  13. package/eigen/Eigen/OrderingMethods +0 -3
  14. package/eigen/Eigen/PaStiXSupport +1 -0
  15. package/eigen/Eigen/PardisoSupport +0 -0
  16. package/eigen/Eigen/QR +0 -1
  17. package/eigen/Eigen/QtAlignedMalloc +0 -1
  18. package/eigen/Eigen/SVD +0 -1
  19. package/eigen/Eigen/Sparse +0 -2
  20. package/eigen/Eigen/SparseCholesky +0 -8
  21. package/eigen/Eigen/SparseLU +4 -0
  22. package/eigen/Eigen/src/Cholesky/LDLT.h +42 -27
  23. package/eigen/Eigen/src/Cholesky/LLT.h +39 -23
  24. package/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +90 -47
  25. package/eigen/Eigen/src/Core/ArithmeticSequence.h +413 -0
  26. package/eigen/Eigen/src/Core/Array.h +99 -11
  27. package/eigen/Eigen/src/Core/ArrayBase.h +1 -1
  28. package/eigen/Eigen/src/Core/ArrayWrapper.h +21 -21
  29. package/eigen/Eigen/src/Core/Assign.h +1 -1
  30. package/eigen/Eigen/src/Core/AssignEvaluator.h +125 -50
  31. package/eigen/Eigen/src/Core/Assign_MKL.h +10 -10
  32. package/eigen/Eigen/src/Core/BandMatrix.h +16 -16
  33. package/eigen/Eigen/src/Core/Block.h +56 -60
  34. package/eigen/Eigen/src/Core/BooleanRedux.h +29 -31
  35. package/eigen/Eigen/src/Core/CommaInitializer.h +7 -3
  36. package/eigen/Eigen/src/Core/CoreEvaluators.h +325 -272
  37. package/eigen/Eigen/src/Core/CoreIterators.h +5 -0
  38. package/eigen/Eigen/src/Core/CwiseBinaryOp.h +21 -22
  39. package/eigen/Eigen/src/Core/CwiseNullaryOp.h +153 -18
  40. package/eigen/Eigen/src/Core/CwiseUnaryOp.h +6 -6
  41. package/eigen/Eigen/src/Core/CwiseUnaryView.h +12 -10
  42. package/eigen/Eigen/src/Core/DenseBase.h +128 -39
  43. package/eigen/Eigen/src/Core/DenseCoeffsBase.h +25 -21
  44. package/eigen/Eigen/src/Core/DenseStorage.h +150 -68
  45. package/eigen/Eigen/src/Core/Diagonal.h +21 -23
  46. package/eigen/Eigen/src/Core/DiagonalMatrix.h +50 -2
  47. package/eigen/Eigen/src/Core/DiagonalProduct.h +1 -1
  48. package/eigen/Eigen/src/Core/Dot.h +10 -10
  49. package/eigen/Eigen/src/Core/EigenBase.h +10 -9
  50. package/eigen/Eigen/src/Core/ForceAlignedAccess.h +8 -4
  51. package/eigen/Eigen/src/Core/Fuzzy.h +3 -3
  52. package/eigen/Eigen/src/Core/GeneralProduct.h +20 -10
  53. package/eigen/Eigen/src/Core/GenericPacketMath.h +597 -147
  54. package/eigen/Eigen/src/Core/GlobalFunctions.h +40 -33
  55. package/eigen/Eigen/src/Core/IO.h +40 -7
  56. package/eigen/Eigen/src/Core/IndexedView.h +237 -0
  57. package/eigen/Eigen/src/Core/Inverse.h +9 -10
  58. package/eigen/Eigen/src/Core/Map.h +7 -7
  59. package/eigen/Eigen/src/Core/MapBase.h +5 -3
  60. package/eigen/Eigen/src/Core/MathFunctions.h +756 -120
  61. package/eigen/Eigen/src/Core/MathFunctionsImpl.h +118 -19
  62. package/eigen/Eigen/src/Core/Matrix.h +131 -25
  63. package/eigen/Eigen/src/Core/MatrixBase.h +19 -2
  64. package/eigen/Eigen/src/Core/NestByValue.h +25 -50
  65. package/eigen/Eigen/src/Core/NoAlias.h +4 -3
  66. package/eigen/Eigen/src/Core/NumTraits.h +107 -20
  67. package/eigen/Eigen/src/Core/PartialReduxEvaluator.h +232 -0
  68. package/eigen/Eigen/src/Core/PermutationMatrix.h +3 -3
  69. package/eigen/Eigen/src/Core/PlainObjectBase.h +145 -54
  70. package/eigen/Eigen/src/Core/Product.h +30 -25
  71. package/eigen/Eigen/src/Core/ProductEvaluators.h +183 -142
  72. package/eigen/Eigen/src/Core/Random.h +37 -1
  73. package/eigen/Eigen/src/Core/Redux.h +180 -170
  74. package/eigen/Eigen/src/Core/Ref.h +118 -21
  75. package/eigen/Eigen/src/Core/Replicate.h +8 -8
  76. package/eigen/Eigen/src/Core/Reshaped.h +454 -0
  77. package/eigen/Eigen/src/Core/ReturnByValue.h +7 -5
  78. package/eigen/Eigen/src/Core/Reverse.h +18 -12
  79. package/eigen/Eigen/src/Core/Select.h +8 -6
  80. package/eigen/Eigen/src/Core/SelfAdjointView.h +33 -20
  81. package/eigen/Eigen/src/Core/Solve.h +14 -14
  82. package/eigen/Eigen/src/Core/SolveTriangular.h +13 -13
  83. package/eigen/Eigen/src/Core/SolverBase.h +41 -3
  84. package/eigen/Eigen/src/Core/StableNorm.h +100 -70
  85. package/eigen/Eigen/src/Core/StlIterators.h +463 -0
  86. package/eigen/Eigen/src/Core/Stride.h +9 -4
  87. package/eigen/Eigen/src/Core/Swap.h +5 -4
  88. package/eigen/Eigen/src/Core/Transpose.h +86 -27
  89. package/eigen/Eigen/src/Core/Transpositions.h +26 -8
  90. package/eigen/Eigen/src/Core/TriangularMatrix.h +88 -72
  91. package/eigen/Eigen/src/Core/VectorBlock.h +5 -5
  92. package/eigen/Eigen/src/Core/VectorwiseOp.h +159 -70
  93. package/eigen/Eigen/src/Core/Visitor.h +137 -29
  94. package/eigen/Eigen/src/Core/arch/AVX/Complex.h +50 -129
  95. package/eigen/Eigen/src/Core/arch/AVX/MathFunctions.h +126 -337
  96. package/eigen/Eigen/src/Core/arch/AVX/PacketMath.h +1092 -155
  97. package/eigen/Eigen/src/Core/arch/AVX/TypeCasting.h +65 -1
  98. package/eigen/Eigen/src/Core/arch/AVX512/Complex.h +422 -0
  99. package/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h +186 -213
  100. package/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h +1250 -252
  101. package/eigen/Eigen/src/Core/arch/AVX512/TypeCasting.h +89 -0
  102. package/eigen/Eigen/src/Core/arch/AltiVec/Complex.h +152 -165
  103. package/eigen/Eigen/src/Core/arch/AltiVec/MathFunctions.h +19 -251
  104. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProduct.h +2937 -0
  105. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h +221 -0
  106. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h +629 -0
  107. package/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h +2042 -392
  108. package/eigen/Eigen/src/Core/arch/CUDA/Complex.h +235 -80
  109. package/eigen/Eigen/src/Core/arch/Default/BFloat16.h +700 -0
  110. package/eigen/Eigen/src/Core/arch/Default/ConjHelper.h +102 -14
  111. package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +1649 -0
  112. package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +110 -0
  113. package/eigen/Eigen/src/Core/arch/Default/Half.h +942 -0
  114. package/eigen/Eigen/src/Core/arch/Default/Settings.h +1 -1
  115. package/eigen/Eigen/src/Core/arch/Default/TypeCasting.h +120 -0
  116. package/eigen/Eigen/src/Core/arch/{CUDA → GPU}/MathFunctions.h +16 -4
  117. package/eigen/Eigen/src/Core/arch/GPU/PacketMath.h +1685 -0
  118. package/eigen/Eigen/src/Core/arch/GPU/TypeCasting.h +80 -0
  119. package/eigen/Eigen/src/Core/arch/HIP/hcc/math_constants.h +23 -0
  120. package/eigen/Eigen/src/Core/arch/MSA/Complex.h +648 -0
  121. package/eigen/Eigen/src/Core/arch/MSA/MathFunctions.h +387 -0
  122. package/eigen/Eigen/src/Core/arch/MSA/PacketMath.h +1233 -0
  123. package/eigen/Eigen/src/Core/arch/NEON/Complex.h +313 -219
  124. package/eigen/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h +183 -0
  125. package/eigen/Eigen/src/Core/arch/NEON/MathFunctions.h +54 -70
  126. package/eigen/Eigen/src/Core/arch/NEON/PacketMath.h +4376 -549
  127. package/eigen/Eigen/src/Core/arch/NEON/TypeCasting.h +1419 -0
  128. package/eigen/Eigen/src/Core/arch/SSE/Complex.h +59 -179
  129. package/eigen/Eigen/src/Core/arch/SSE/MathFunctions.h +65 -428
  130. package/eigen/Eigen/src/Core/arch/SSE/PacketMath.h +893 -283
  131. package/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h +65 -0
  132. package/eigen/Eigen/src/Core/arch/SVE/MathFunctions.h +44 -0
  133. package/eigen/Eigen/src/Core/arch/SVE/PacketMath.h +752 -0
  134. package/eigen/Eigen/src/Core/arch/SVE/TypeCasting.h +49 -0
  135. package/eigen/Eigen/src/Core/arch/SYCL/InteropHeaders.h +232 -0
  136. package/eigen/Eigen/src/Core/arch/SYCL/MathFunctions.h +301 -0
  137. package/eigen/Eigen/src/Core/arch/SYCL/PacketMath.h +670 -0
  138. package/eigen/Eigen/src/Core/arch/SYCL/SyclMemoryModel.h +694 -0
  139. package/eigen/Eigen/src/Core/arch/SYCL/TypeCasting.h +85 -0
  140. package/eigen/Eigen/src/Core/arch/ZVector/Complex.h +212 -183
  141. package/eigen/Eigen/src/Core/arch/ZVector/MathFunctions.h +101 -5
  142. package/eigen/Eigen/src/Core/arch/ZVector/PacketMath.h +510 -395
  143. package/eigen/Eigen/src/Core/functors/AssignmentFunctors.h +11 -2
  144. package/eigen/Eigen/src/Core/functors/BinaryFunctors.h +112 -46
  145. package/eigen/Eigen/src/Core/functors/NullaryFunctors.h +31 -30
  146. package/eigen/Eigen/src/Core/functors/StlFunctors.h +32 -2
  147. package/eigen/Eigen/src/Core/functors/UnaryFunctors.h +354 -15
  148. package/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h +1073 -585
  149. package/eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h +29 -7
  150. package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +4 -4
  151. package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +1 -1
  152. package/eigen/Eigen/src/Core/products/GeneralMatrixVector.h +382 -483
  153. package/eigen/Eigen/src/Core/products/Parallelizer.h +23 -9
  154. package/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +23 -6
  155. package/eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h +8 -6
  156. package/eigen/Eigen/src/Core/products/SelfadjointProduct.h +2 -2
  157. package/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h +5 -4
  158. package/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h +3 -3
  159. package/eigen/Eigen/src/Core/products/TriangularSolverMatrix.h +5 -3
  160. package/eigen/Eigen/src/Core/products/TriangularSolverVector.h +13 -10
  161. package/eigen/Eigen/src/Core/util/BlasUtil.h +208 -124
  162. package/eigen/Eigen/src/Core/util/ConfigureVectorization.h +512 -0
  163. package/eigen/Eigen/src/Core/util/Constants.h +25 -9
  164. package/eigen/Eigen/src/Core/util/DisableStupidWarnings.h +14 -2
  165. package/eigen/Eigen/src/Core/util/ForwardDeclarations.h +28 -4
  166. package/eigen/Eigen/src/Core/util/IndexedViewHelper.h +186 -0
  167. package/eigen/Eigen/src/Core/util/IntegralConstant.h +272 -0
  168. package/eigen/Eigen/src/Core/util/MKL_support.h +8 -1
  169. package/eigen/Eigen/src/Core/util/Macros.h +661 -250
  170. package/eigen/Eigen/src/Core/util/Memory.h +222 -52
  171. package/eigen/Eigen/src/Core/util/Meta.h +349 -105
  172. package/eigen/Eigen/src/Core/util/ReshapedHelper.h +51 -0
  173. package/eigen/Eigen/src/Core/util/StaticAssert.h +8 -5
  174. package/eigen/Eigen/src/Core/util/SymbolicIndex.h +293 -0
  175. package/eigen/Eigen/src/Core/util/XprHelper.h +48 -30
  176. package/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +1 -1
  177. package/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +1 -1
  178. package/eigen/Eigen/src/Eigenvalues/EigenSolver.h +2 -2
  179. package/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +1 -1
  180. package/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +2 -2
  181. package/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +2 -2
  182. package/eigen/Eigen/src/Eigenvalues/RealQZ.h +9 -6
  183. package/eigen/Eigen/src/Eigenvalues/RealSchur.h +10 -5
  184. package/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +75 -42
  185. package/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +20 -15
  186. package/eigen/Eigen/src/Geometry/AlignedBox.h +99 -5
  187. package/eigen/Eigen/src/Geometry/AngleAxis.h +4 -4
  188. package/eigen/Eigen/src/Geometry/EulerAngles.h +3 -3
  189. package/eigen/Eigen/src/Geometry/Homogeneous.h +15 -11
  190. package/eigen/Eigen/src/Geometry/Hyperplane.h +1 -1
  191. package/eigen/Eigen/src/Geometry/OrthoMethods.h +3 -2
  192. package/eigen/Eigen/src/Geometry/ParametrizedLine.h +39 -2
  193. package/eigen/Eigen/src/Geometry/Quaternion.h +52 -14
  194. package/eigen/Eigen/src/Geometry/Rotation2D.h +3 -3
  195. package/eigen/Eigen/src/Geometry/Scaling.h +22 -4
  196. package/eigen/Eigen/src/Geometry/Transform.h +86 -65
  197. package/eigen/Eigen/src/Geometry/Translation.h +6 -6
  198. package/eigen/Eigen/src/Geometry/arch/Geometry_SIMD.h +168 -0
  199. package/eigen/Eigen/src/Householder/BlockHouseholder.h +9 -2
  200. package/eigen/Eigen/src/Householder/Householder.h +8 -4
  201. package/eigen/Eigen/src/Householder/HouseholderSequence.h +123 -48
  202. package/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +15 -15
  203. package/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +7 -23
  204. package/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +5 -22
  205. package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +41 -47
  206. package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +51 -60
  207. package/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +70 -20
  208. package/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +2 -20
  209. package/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +11 -9
  210. package/eigen/Eigen/src/Jacobi/Jacobi.h +31 -10
  211. package/eigen/Eigen/src/KLUSupport/KLUSupport.h +358 -0
  212. package/eigen/Eigen/src/LU/Determinant.h +35 -19
  213. package/eigen/Eigen/src/LU/FullPivLU.h +29 -43
  214. package/eigen/Eigen/src/LU/InverseImpl.h +25 -8
  215. package/eigen/Eigen/src/LU/PartialPivLU.h +67 -57
  216. package/eigen/Eigen/src/LU/arch/InverseSize4.h +351 -0
  217. package/eigen/Eigen/src/OrderingMethods/Amd.h +7 -17
  218. package/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +297 -277
  219. package/eigen/Eigen/src/OrderingMethods/Ordering.h +6 -10
  220. package/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +1 -1
  221. package/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +10 -9
  222. package/eigen/Eigen/src/QR/ColPivHouseholderQR.h +41 -20
  223. package/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +100 -27
  224. package/eigen/Eigen/src/QR/FullPivHouseholderQR.h +59 -22
  225. package/eigen/Eigen/src/QR/HouseholderQR.h +48 -23
  226. package/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +25 -3
  227. package/eigen/Eigen/src/SVD/BDCSVD.h +137 -48
  228. package/eigen/Eigen/src/SVD/JacobiSVD.h +22 -14
  229. package/eigen/Eigen/src/SVD/SVDBase.h +82 -21
  230. package/eigen/Eigen/src/SVD/UpperBidiagonalization.h +3 -3
  231. package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +16 -8
  232. package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +11 -36
  233. package/eigen/Eigen/src/SparseCore/CompressedStorage.h +16 -0
  234. package/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +6 -6
  235. package/eigen/Eigen/src/SparseCore/SparseAssign.h +81 -27
  236. package/eigen/Eigen/src/SparseCore/SparseBlock.h +25 -57
  237. package/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +40 -11
  238. package/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +11 -15
  239. package/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +2 -2
  240. package/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +30 -8
  241. package/eigen/Eigen/src/SparseCore/SparseMatrix.h +124 -10
  242. package/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +5 -12
  243. package/eigen/Eigen/src/SparseCore/SparseProduct.h +13 -1
  244. package/eigen/Eigen/src/SparseCore/SparseRef.h +7 -7
  245. package/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +3 -0
  246. package/eigen/Eigen/src/SparseCore/SparseUtil.h +8 -0
  247. package/eigen/Eigen/src/SparseCore/SparseVector.h +1 -1
  248. package/eigen/Eigen/src/SparseLU/SparseLU.h +160 -10
  249. package/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +1 -1
  250. package/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +76 -2
  251. package/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +2 -2
  252. package/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +1 -1
  253. package/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +1 -1
  254. package/eigen/Eigen/src/SparseQR/SparseQR.h +19 -6
  255. package/eigen/Eigen/src/StlSupport/StdDeque.h +2 -14
  256. package/eigen/Eigen/src/StlSupport/StdList.h +2 -2
  257. package/eigen/Eigen/src/StlSupport/StdVector.h +2 -2
  258. package/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +6 -8
  259. package/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +175 -39
  260. package/eigen/Eigen/src/misc/lapacke.h +5 -4
  261. package/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +27 -1
  262. package/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +155 -11
  263. package/eigen/Eigen/src/plugins/BlockMethods.h +626 -242
  264. package/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.h +14 -0
  265. package/eigen/Eigen/src/plugins/IndexedViewMethods.h +262 -0
  266. package/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +4 -4
  267. package/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +10 -0
  268. package/eigen/Eigen/src/plugins/ReshapedMethods.h +149 -0
  269. package/eigen/README.md +2 -0
  270. package/lib/LibEigen.d.ts +4 -0
  271. package/lib/LibEigen.js +14 -0
  272. package/lib/index.d.ts +1 -1
  273. package/lib/index.js +7 -3
  274. package/package.json +2 -10
  275. package/eigen/Eigen/CMakeLists.txt +0 -19
  276. package/eigen/Eigen/src/Core/arch/CUDA/Half.h +0 -675
  277. package/eigen/Eigen/src/Core/arch/CUDA/PacketMath.h +0 -333
  278. package/eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +0 -1124
  279. package/eigen/Eigen/src/Core/arch/CUDA/TypeCasting.h +0 -212
  280. package/eigen/Eigen/src/Geometry/arch/Geometry_SSE.h +0 -161
  281. package/eigen/Eigen/src/LU/arch/Inverse_SSE.h +0 -338
  282. package/lib/eigen.d.ts +0 -2
  283. package/lib/eigen.js +0 -15
@@ -44,23 +44,29 @@ struct default_packet_traits
44
44
  enum {
45
45
  HasHalfPacket = 0,
46
46
 
47
- HasAdd = 1,
48
- HasSub = 1,
49
- HasMul = 1,
50
- HasNegate = 1,
51
- HasAbs = 1,
52
- HasArg = 0,
53
- HasAbs2 = 1,
54
- HasMin = 1,
55
- HasMax = 1,
56
- HasConj = 1,
47
+ HasAdd = 1,
48
+ HasSub = 1,
49
+ HasShift = 1,
50
+ HasMul = 1,
51
+ HasNegate = 1,
52
+ HasAbs = 1,
53
+ HasArg = 0,
54
+ HasAbs2 = 1,
55
+ HasAbsDiff = 0,
56
+ HasMin = 1,
57
+ HasMax = 1,
58
+ HasConj = 1,
57
59
  HasSetLinear = 1,
58
- HasBlend = 0,
60
+ HasBlend = 0,
61
+ // This flag is used to indicate whether packet comparison is supported.
62
+ // pcmp_eq, pcmp_lt and pcmp_le should be defined for it to be true.
63
+ HasCmp = 0,
59
64
 
60
65
  HasDiv = 0,
61
66
  HasSqrt = 0,
62
67
  HasRsqrt = 0,
63
68
  HasExp = 0,
69
+ HasExpm1 = 0,
64
70
  HasLog = 0,
65
71
  HasLog1p = 0,
66
72
  HasLog10 = 0,
@@ -81,14 +87,18 @@ struct default_packet_traits
81
87
  HasPolygamma = 0,
82
88
  HasErf = 0,
83
89
  HasErfc = 0,
90
+ HasNdtri = 0,
91
+ HasBessel = 0,
84
92
  HasIGamma = 0,
93
+ HasIGammaDerA = 0,
94
+ HasGammaSampleDerAlpha = 0,
85
95
  HasIGammac = 0,
86
96
  HasBetaInc = 0,
87
97
 
88
98
  HasRound = 0,
99
+ HasRint = 0,
89
100
  HasFloor = 0,
90
101
  HasCeil = 0,
91
-
92
102
  HasSign = 0
93
103
  };
94
104
  };
@@ -119,6 +129,22 @@ template<typename T> struct packet_traits : default_packet_traits
119
129
 
120
130
  template<typename T> struct packet_traits<const T> : packet_traits<T> { };
121
131
 
132
+ template<typename T> struct unpacket_traits
133
+ {
134
+ typedef T type;
135
+ typedef T half;
136
+ enum
137
+ {
138
+ size = 1,
139
+ alignment = 1,
140
+ vectorizable = false,
141
+ masked_load_available=false,
142
+ masked_store_available=false
143
+ };
144
+ };
145
+
146
+ template<typename T> struct unpacket_traits<const T> : unpacket_traits<T> { };
147
+
122
148
  template <typename Src, typename Tgt> struct type_casting_traits {
123
149
  enum {
124
150
  VectorizedCast = 0,
@@ -127,6 +153,34 @@ template <typename Src, typename Tgt> struct type_casting_traits {
127
153
  };
128
154
  };
129
155
 
156
+ /** \internal Wrapper to ensure that multiple packet types can map to the same
157
+ same underlying vector type. */
158
+ template<typename T, int unique_id = 0>
159
+ struct eigen_packet_wrapper
160
+ {
161
+ EIGEN_ALWAYS_INLINE operator T&() { return m_val; }
162
+ EIGEN_ALWAYS_INLINE operator const T&() const { return m_val; }
163
+ EIGEN_ALWAYS_INLINE eigen_packet_wrapper() {}
164
+ EIGEN_ALWAYS_INLINE eigen_packet_wrapper(const T &v) : m_val(v) {}
165
+ EIGEN_ALWAYS_INLINE eigen_packet_wrapper& operator=(const T &v) {
166
+ m_val = v;
167
+ return *this;
168
+ }
169
+
170
+ T m_val;
171
+ };
172
+
173
+
174
+ /** \internal A convenience utility for determining if the type is a scalar.
175
+ * This is used to enable some generic packet implementations.
176
+ */
177
+ template<typename Packet>
178
+ struct is_scalar {
179
+ typedef typename unpacket_traits<Packet>::type Scalar;
180
+ enum {
181
+ value = internal::is_same<Packet, Scalar>::value
182
+ };
183
+ };
130
184
 
131
185
  /** \internal \returns static_cast<TgtType>(a) (coeff-wise) */
132
186
  template <typename SrcPacket, typename TgtPacket>
@@ -139,75 +193,406 @@ EIGEN_DEVICE_FUNC inline TgtPacket
139
193
  pcast(const SrcPacket& a, const SrcPacket& /*b*/) {
140
194
  return static_cast<TgtPacket>(a);
141
195
  }
142
-
143
196
  template <typename SrcPacket, typename TgtPacket>
144
197
  EIGEN_DEVICE_FUNC inline TgtPacket
145
198
  pcast(const SrcPacket& a, const SrcPacket& /*b*/, const SrcPacket& /*c*/, const SrcPacket& /*d*/) {
146
199
  return static_cast<TgtPacket>(a);
147
200
  }
201
+ template <typename SrcPacket, typename TgtPacket>
202
+ EIGEN_DEVICE_FUNC inline TgtPacket
203
+ pcast(const SrcPacket& a, const SrcPacket& /*b*/, const SrcPacket& /*c*/, const SrcPacket& /*d*/,
204
+ const SrcPacket& /*e*/, const SrcPacket& /*f*/, const SrcPacket& /*g*/, const SrcPacket& /*h*/) {
205
+ return static_cast<TgtPacket>(a);
206
+ }
207
+
208
+ /** \internal \returns reinterpret_cast<Target>(a) */
209
+ template <typename Target, typename Packet>
210
+ EIGEN_DEVICE_FUNC inline Target
211
+ preinterpret(const Packet& a); /* { return reinterpret_cast<const Target&>(a); } */
148
212
 
149
213
  /** \internal \returns a + b (coeff-wise) */
150
214
  template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
151
- padd(const Packet& a,
152
- const Packet& b) { return a+b; }
215
+ padd(const Packet& a, const Packet& b) { return a+b; }
216
+ // Avoid compiler warning for boolean algebra.
217
+ template<> EIGEN_DEVICE_FUNC inline bool
218
+ padd(const bool& a, const bool& b) { return a || b; }
153
219
 
154
220
  /** \internal \returns a - b (coeff-wise) */
155
221
  template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
156
- psub(const Packet& a,
157
- const Packet& b) { return a-b; }
222
+ psub(const Packet& a, const Packet& b) { return a-b; }
158
223
 
159
224
  /** \internal \returns -a (coeff-wise) */
160
225
  template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
161
226
  pnegate(const Packet& a) { return -a; }
162
227
 
163
- /** \internal \returns conj(a) (coeff-wise) */
228
+ template<> EIGEN_DEVICE_FUNC inline bool
229
+ pnegate(const bool& a) { return !a; }
164
230
 
231
+ /** \internal \returns conj(a) (coeff-wise) */
165
232
  template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
166
233
  pconj(const Packet& a) { return numext::conj(a); }
167
234
 
168
235
  /** \internal \returns a * b (coeff-wise) */
169
236
  template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
170
- pmul(const Packet& a,
171
- const Packet& b) { return a*b; }
237
+ pmul(const Packet& a, const Packet& b) { return a*b; }
238
+ // Avoid compiler warning for boolean algebra.
239
+ template<> EIGEN_DEVICE_FUNC inline bool
240
+ pmul(const bool& a, const bool& b) { return a && b; }
172
241
 
173
242
  /** \internal \returns a / b (coeff-wise) */
174
243
  template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
175
- pdiv(const Packet& a,
176
- const Packet& b) { return a/b; }
244
+ pdiv(const Packet& a, const Packet& b) { return a/b; }
245
+
246
+ // In the generic case, memset to all one bits.
247
+ template<typename Packet, typename EnableIf = void>
248
+ struct ptrue_impl {
249
+ static EIGEN_DEVICE_FUNC inline Packet run(const Packet& /*a*/){
250
+ Packet b;
251
+ memset(static_cast<void*>(&b), 0xff, sizeof(Packet));
252
+ return b;
253
+ }
254
+ };
177
255
 
178
- /** \internal \returns the min of \a a and \a b (coeff-wise) */
256
+ // For non-trivial scalars, set to Scalar(1) (i.e. a non-zero value).
257
+ // Although this is technically not a valid bitmask, the scalar path for pselect
258
+ // uses a comparison to zero, so this should still work in most cases. We don't
259
+ // have another option, since the scalar type requires initialization.
260
+ template<typename T>
261
+ struct ptrue_impl<T,
262
+ typename internal::enable_if<is_scalar<T>::value && NumTraits<T>::RequireInitialization>::type > {
263
+ static EIGEN_DEVICE_FUNC inline T run(const T& /*a*/){
264
+ return T(1);
265
+ }
266
+ };
267
+
268
+ /** \internal \returns one bits. */
179
269
  template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
180
- pmin(const Packet& a,
181
- const Packet& b) { return numext::mini(a, b); }
270
+ ptrue(const Packet& a) {
271
+ return ptrue_impl<Packet>::run(a);
272
+ }
273
+
274
+ // In the general case, memset to zero.
275
+ template<typename Packet, typename EnableIf = void>
276
+ struct pzero_impl {
277
+ static EIGEN_DEVICE_FUNC inline Packet run(const Packet& /*a*/) {
278
+ Packet b;
279
+ memset(static_cast<void*>(&b), 0x00, sizeof(Packet));
280
+ return b;
281
+ }
282
+ };
283
+
284
+ // For scalars, explicitly set to Scalar(0), since the underlying representation
285
+ // for zero may not consist of all-zero bits.
286
+ template<typename T>
287
+ struct pzero_impl<T,
288
+ typename internal::enable_if<is_scalar<T>::value>::type> {
289
+ static EIGEN_DEVICE_FUNC inline T run(const T& /*a*/) {
290
+ return T(0);
291
+ }
292
+ };
182
293
 
183
- /** \internal \returns the max of \a a and \a b (coeff-wise) */
294
+ /** \internal \returns packet of zeros */
184
295
  template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
185
- pmax(const Packet& a,
186
- const Packet& b) { return numext::maxi(a, b); }
296
+ pzero(const Packet& a) {
297
+ return pzero_impl<Packet>::run(a);
298
+ }
187
299
 
188
- /** \internal \returns the absolute value of \a a */
300
+ /** \internal \returns a <= b as a bit mask */
189
301
  template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
190
- pabs(const Packet& a) { using std::abs; return abs(a); }
302
+ pcmp_le(const Packet& a, const Packet& b) { return a<=b ? ptrue(a) : pzero(a); }
191
303
 
192
- /** \internal \returns the phase angle of \a a */
304
+ /** \internal \returns a < b as a bit mask */
193
305
  template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
194
- parg(const Packet& a) { using numext::arg; return arg(a); }
306
+ pcmp_lt(const Packet& a, const Packet& b) { return a<b ? ptrue(a) : pzero(a); }
307
+
308
+ /** \internal \returns a == b as a bit mask */
309
+ template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
310
+ pcmp_eq(const Packet& a, const Packet& b) { return a==b ? ptrue(a) : pzero(a); }
311
+
312
+ /** \internal \returns a < b or a==NaN or b==NaN as a bit mask */
313
+ template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
314
+ pcmp_lt_or_nan(const Packet& a, const Packet& b) { return a>=b ? pzero(a) : ptrue(a); }
315
+
316
+ template<typename T>
317
+ struct bit_and {
318
+ EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR EIGEN_ALWAYS_INLINE T operator()(const T& a, const T& b) const {
319
+ return a & b;
320
+ }
321
+ };
322
+
323
+ template<typename T>
324
+ struct bit_or {
325
+ EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR EIGEN_ALWAYS_INLINE T operator()(const T& a, const T& b) const {
326
+ return a | b;
327
+ }
328
+ };
329
+
330
+ template<typename T>
331
+ struct bit_xor {
332
+ EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR EIGEN_ALWAYS_INLINE T operator()(const T& a, const T& b) const {
333
+ return a ^ b;
334
+ }
335
+ };
336
+
337
+ template<typename T>
338
+ struct bit_not {
339
+ EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR EIGEN_ALWAYS_INLINE T operator()(const T& a) const {
340
+ return ~a;
341
+ }
342
+ };
343
+
344
+ // Use operators &, |, ^, ~.
345
+ template<typename T>
346
+ struct operator_bitwise_helper {
347
+ EIGEN_DEVICE_FUNC static inline T bitwise_and(const T& a, const T& b) { return bit_and<T>()(a, b); }
348
+ EIGEN_DEVICE_FUNC static inline T bitwise_or(const T& a, const T& b) { return bit_or<T>()(a, b); }
349
+ EIGEN_DEVICE_FUNC static inline T bitwise_xor(const T& a, const T& b) { return bit_xor<T>()(a, b); }
350
+ EIGEN_DEVICE_FUNC static inline T bitwise_not(const T& a) { return bit_not<T>()(a); }
351
+ };
352
+
353
+ // Apply binary operations byte-by-byte
354
+ template<typename T>
355
+ struct bytewise_bitwise_helper {
356
+ EIGEN_DEVICE_FUNC static inline T bitwise_and(const T& a, const T& b) {
357
+ return binary(a, b, bit_and<unsigned char>());
358
+ }
359
+ EIGEN_DEVICE_FUNC static inline T bitwise_or(const T& a, const T& b) {
360
+ return binary(a, b, bit_or<unsigned char>());
361
+ }
362
+ EIGEN_DEVICE_FUNC static inline T bitwise_xor(const T& a, const T& b) {
363
+ return binary(a, b, bit_xor<unsigned char>());
364
+ }
365
+ EIGEN_DEVICE_FUNC static inline T bitwise_not(const T& a) {
366
+ return unary(a,bit_not<unsigned char>());
367
+ }
368
+
369
+ private:
370
+ template<typename Op>
371
+ EIGEN_DEVICE_FUNC static inline T unary(const T& a, Op op) {
372
+ const unsigned char* a_ptr = reinterpret_cast<const unsigned char*>(&a);
373
+ T c;
374
+ unsigned char* c_ptr = reinterpret_cast<unsigned char*>(&c);
375
+ for (size_t i = 0; i < sizeof(T); ++i) {
376
+ *c_ptr++ = op(*a_ptr++);
377
+ }
378
+ return c;
379
+ }
380
+
381
+ template<typename Op>
382
+ EIGEN_DEVICE_FUNC static inline T binary(const T& a, const T& b, Op op) {
383
+ const unsigned char* a_ptr = reinterpret_cast<const unsigned char*>(&a);
384
+ const unsigned char* b_ptr = reinterpret_cast<const unsigned char*>(&b);
385
+ T c;
386
+ unsigned char* c_ptr = reinterpret_cast<unsigned char*>(&c);
387
+ for (size_t i = 0; i < sizeof(T); ++i) {
388
+ *c_ptr++ = op(*a_ptr++, *b_ptr++);
389
+ }
390
+ return c;
391
+ }
392
+ };
393
+
394
+ // In the general case, use byte-by-byte manipulation.
395
+ template<typename T, typename EnableIf = void>
396
+ struct bitwise_helper : public bytewise_bitwise_helper<T> {};
397
+
398
+ // For integers or non-trivial scalars, use binary operators.
399
+ template<typename T>
400
+ struct bitwise_helper<T,
401
+ typename internal::enable_if<
402
+ is_scalar<T>::value && (NumTraits<T>::IsInteger || NumTraits<T>::RequireInitialization)>::type
403
+ > : public operator_bitwise_helper<T> {};
195
404
 
196
405
  /** \internal \returns the bitwise and of \a a and \a b */
197
406
  template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
198
- pand(const Packet& a, const Packet& b) { return a & b; }
407
+ pand(const Packet& a, const Packet& b) {
408
+ return bitwise_helper<Packet>::bitwise_and(a, b);
409
+ }
199
410
 
200
411
  /** \internal \returns the bitwise or of \a a and \a b */
201
412
  template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
202
- por(const Packet& a, const Packet& b) { return a | b; }
413
+ por(const Packet& a, const Packet& b) {
414
+ return bitwise_helper<Packet>::bitwise_or(a, b);
415
+ }
203
416
 
204
417
  /** \internal \returns the bitwise xor of \a a and \a b */
205
418
  template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
206
- pxor(const Packet& a, const Packet& b) { return a ^ b; }
419
+ pxor(const Packet& a, const Packet& b) {
420
+ return bitwise_helper<Packet>::bitwise_xor(a, b);
421
+ }
422
+
423
+ /** \internal \returns the bitwise not of \a a */
424
+ template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
425
+ pnot(const Packet& a) {
426
+ return bitwise_helper<Packet>::bitwise_not(a);
427
+ }
428
+
429
+ /** \internal \returns the bitwise and of \a a and not \a b */
430
+ template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
431
+ pandnot(const Packet& a, const Packet& b) { return pand(a, pnot(b)); }
432
+
433
+ // In the general case, use bitwise select.
434
+ template<typename Packet, typename EnableIf = void>
435
+ struct pselect_impl {
436
+ static EIGEN_DEVICE_FUNC inline Packet run(const Packet& mask, const Packet& a, const Packet& b) {
437
+ return por(pand(a,mask),pandnot(b,mask));
438
+ }
439
+ };
440
+
441
+ // For scalars, use ternary select.
442
+ template<typename Packet>
443
+ struct pselect_impl<Packet,
444
+ typename internal::enable_if<is_scalar<Packet>::value>::type > {
445
+ static EIGEN_DEVICE_FUNC inline Packet run(const Packet& mask, const Packet& a, const Packet& b) {
446
+ return numext::equal_strict(mask, Packet(0)) ? b : a;
447
+ }
448
+ };
449
+
450
+ /** \internal \returns \a or \b for each field in packet according to \mask */
451
+ template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
452
+ pselect(const Packet& mask, const Packet& a, const Packet& b) {
453
+ return pselect_impl<Packet>::run(mask, a, b);
454
+ }
455
+
456
+ template<> EIGEN_DEVICE_FUNC inline bool pselect<bool>(
457
+ const bool& cond, const bool& a, const bool& b) {
458
+ return cond ? a : b;
459
+ }
460
+
461
+ /** \internal \returns the min or of \a a and \a b (coeff-wise)
462
+ If either \a a or \a b are NaN, the result is implementation defined. */
463
+ template<int NaNPropagation>
464
+ struct pminmax_impl {
465
+ template <typename Packet, typename Op>
466
+ static EIGEN_DEVICE_FUNC inline Packet run(const Packet& a, const Packet& b, Op op) {
467
+ return op(a,b);
468
+ }
469
+ };
470
+
471
+ /** \internal \returns the min or max of \a a and \a b (coeff-wise)
472
+ If either \a a or \a b are NaN, NaN is returned. */
473
+ template<>
474
+ struct pminmax_impl<PropagateNaN> {
475
+ template <typename Packet, typename Op>
476
+ static EIGEN_DEVICE_FUNC inline Packet run(const Packet& a, const Packet& b, Op op) {
477
+ Packet not_nan_mask_a = pcmp_eq(a, a);
478
+ Packet not_nan_mask_b = pcmp_eq(b, b);
479
+ return pselect(not_nan_mask_a,
480
+ pselect(not_nan_mask_b, op(a, b), b),
481
+ a);
482
+ }
483
+ };
484
+
485
+ /** \internal \returns the min or max of \a a and \a b (coeff-wise)
486
+ If both \a a and \a b are NaN, NaN is returned.
487
+ Equivalent to std::fmin(a, b). */
488
+ template<>
489
+ struct pminmax_impl<PropagateNumbers> {
490
+ template <typename Packet, typename Op>
491
+ static EIGEN_DEVICE_FUNC inline Packet run(const Packet& a, const Packet& b, Op op) {
492
+ Packet not_nan_mask_a = pcmp_eq(a, a);
493
+ Packet not_nan_mask_b = pcmp_eq(b, b);
494
+ return pselect(not_nan_mask_a,
495
+ pselect(not_nan_mask_b, op(a, b), a),
496
+ b);
497
+ }
498
+ };
499
+
500
+
501
+ #ifndef SYCL_DEVICE_ONLY
502
+ #define EIGEN_BINARY_OP_NAN_PROPAGATION(Type, Func) Func
503
+ #else
504
+ #define EIGEN_BINARY_OP_NAN_PROPAGATION(Type, Func) \
505
+ [](const Type& a, const Type& b) { \
506
+ return Func(a, b);}
507
+ #endif
508
+
509
+ /** \internal \returns the min of \a a and \a b (coeff-wise).
510
+ If \a a or \b b is NaN, the return value is implementation defined. */
511
+ template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
512
+ pmin(const Packet& a, const Packet& b) { return numext::mini(a,b); }
513
+
514
+ /** \internal \returns the min of \a a and \a b (coeff-wise).
515
+ NaNPropagation determines the NaN propagation semantics. */
516
+ template <int NaNPropagation, typename Packet>
517
+ EIGEN_DEVICE_FUNC inline Packet pmin(const Packet& a, const Packet& b) {
518
+ return pminmax_impl<NaNPropagation>::run(a, b, EIGEN_BINARY_OP_NAN_PROPAGATION(Packet, (pmin<Packet>)));
519
+ }
520
+
521
+ /** \internal \returns the max of \a a and \a b (coeff-wise)
522
+ If \a a or \b b is NaN, the return value is implementation defined. */
523
+ template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
524
+ pmax(const Packet& a, const Packet& b) { return numext::maxi(a, b); }
525
+
526
+ /** \internal \returns the max of \a a and \a b (coeff-wise).
527
+ NaNPropagation determines the NaN propagation semantics. */
528
+ template <int NaNPropagation, typename Packet>
529
+ EIGEN_DEVICE_FUNC inline Packet pmax(const Packet& a, const Packet& b) {
530
+ return pminmax_impl<NaNPropagation>::run(a, b, EIGEN_BINARY_OP_NAN_PROPAGATION(Packet,(pmax<Packet>)));
531
+ }
532
+
533
+ /** \internal \returns the absolute value of \a a */
534
+ template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
535
+ pabs(const Packet& a) { return numext::abs(a); }
536
+ template<> EIGEN_DEVICE_FUNC inline unsigned int
537
+ pabs(const unsigned int& a) { return a; }
538
+ template<> EIGEN_DEVICE_FUNC inline unsigned long
539
+ pabs(const unsigned long& a) { return a; }
540
+ template<> EIGEN_DEVICE_FUNC inline unsigned long long
541
+ pabs(const unsigned long long& a) { return a; }
542
+
543
+ /** \internal \returns the addsub value of \a a,b */
544
+ template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
545
+ paddsub(const Packet& a, const Packet& b) {
546
+ return pselect(peven_mask(a), padd(a, b), psub(a, b));
547
+ }
548
+
549
+ /** \internal \returns the phase angle of \a a */
550
+ template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
551
+ parg(const Packet& a) { using numext::arg; return arg(a); }
552
+
553
+
554
+ /** \internal \returns \a a logically shifted by N bits to the right */
555
+ template<int N> EIGEN_DEVICE_FUNC inline int
556
+ parithmetic_shift_right(const int& a) { return a >> N; }
557
+ template<int N> EIGEN_DEVICE_FUNC inline long int
558
+ parithmetic_shift_right(const long int& a) { return a >> N; }
559
+
560
+ /** \internal \returns \a a arithmetically shifted by N bits to the right */
561
+ template<int N> EIGEN_DEVICE_FUNC inline int
562
+ plogical_shift_right(const int& a) { return static_cast<int>(static_cast<unsigned int>(a) >> N); }
563
+ template<int N> EIGEN_DEVICE_FUNC inline long int
564
+ plogical_shift_right(const long int& a) { return static_cast<long>(static_cast<unsigned long>(a) >> N); }
207
565
 
208
- /** \internal \returns the bitwise andnot of \a a and \a b */
566
+ /** \internal \returns \a a shifted by N bits to the left */
567
+ template<int N> EIGEN_DEVICE_FUNC inline int
568
+ plogical_shift_left(const int& a) { return a << N; }
569
+ template<int N> EIGEN_DEVICE_FUNC inline long int
570
+ plogical_shift_left(const long int& a) { return a << N; }
571
+
572
+ /** \internal \returns the significant and exponent of the underlying floating point numbers
573
+ * See https://en.cppreference.com/w/cpp/numeric/math/frexp
574
+ */
575
+ template <typename Packet>
576
+ EIGEN_DEVICE_FUNC inline Packet pfrexp(const Packet& a, Packet& exponent) {
577
+ int exp;
578
+ EIGEN_USING_STD(frexp);
579
+ Packet result = static_cast<Packet>(frexp(a, &exp));
580
+ exponent = static_cast<Packet>(exp);
581
+ return result;
582
+ }
583
+
584
+ /** \internal \returns a * 2^((int)exponent)
585
+ * See https://en.cppreference.com/w/cpp/numeric/math/ldexp
586
+ */
587
+ template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
588
+ pldexp(const Packet &a, const Packet &exponent) {
589
+ EIGEN_USING_STD(ldexp)
590
+ return static_cast<Packet>(ldexp(a, static_cast<int>(exponent)));
591
+ }
592
+
593
+ /** \internal \returns the min of \a a and \a b (coeff-wise) */
209
594
  template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
210
- pandnot(const Packet& a, const Packet& b) { return a & (!b); }
595
+ pabsdiff(const Packet& a, const Packet& b) { return pselect(pcmp_lt(a, b), psub(b, a), psub(a, b)); }
211
596
 
212
597
  /** \internal \returns a packet version of \a *from, from must be 16 bytes aligned */
213
598
  template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
@@ -217,10 +602,22 @@ pload(const typename unpacket_traits<Packet>::type* from) { return *from; }
217
602
  template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
218
603
  ploadu(const typename unpacket_traits<Packet>::type* from) { return *from; }
219
604
 
605
+ /** \internal \returns a packet version of \a *from, (un-aligned masked load)
606
+ * There is no generic implementation. We only have implementations for specialized
607
+ * cases. Generic case should not be called.
608
+ */
609
+ template<typename Packet> EIGEN_DEVICE_FUNC inline
610
+ typename enable_if<unpacket_traits<Packet>::masked_load_available, Packet>::type
611
+ ploadu(const typename unpacket_traits<Packet>::type* from, typename unpacket_traits<Packet>::mask_t umask);
612
+
220
613
  /** \internal \returns a packet with constant coefficients \a a, e.g.: (a,a,a,a) */
221
614
  template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
222
615
  pset1(const typename unpacket_traits<Packet>::type& a) { return a; }
223
616
 
617
+ /** \internal \returns a packet with constant coefficients set from bits */
618
+ template<typename Packet,typename BitsType> EIGEN_DEVICE_FUNC inline Packet
619
+ pset1frombits(BitsType a);
620
+
224
621
  /** \internal \returns a packet with constant coefficients \a a[0], e.g.: (a[0],a[0],a[0],a[0]) */
225
622
  template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
226
623
  pload1(const typename unpacket_traits<Packet>::type *a) { return pset1<Packet>(*a); }
@@ -237,7 +634,7 @@ ploaddup(const typename unpacket_traits<Packet>::type* from) { return *from; }
237
634
  * For instance, for a packet of 8 elements, 2 scalars will be read from \a *from and
238
635
  * replicated to form: {from[0],from[0],from[0],from[0],from[1],from[1],from[1],from[1]}
239
636
  * Currently, this function is only used in matrix products.
240
- * For packet-size smaller or equal to 4, this function is equivalent to pload1
637
+ * For packet-size smaller or equal to 4, this function is equivalent to pload1
241
638
  */
242
639
  template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
243
640
  ploadquad(const typename unpacket_traits<Packet>::type* from)
@@ -281,6 +678,20 @@ inline void pbroadcast2(const typename unpacket_traits<Packet>::type *a,
281
678
  template<typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet
282
679
  plset(const typename unpacket_traits<Packet>::type& a) { return a; }
283
680
 
681
+ /** \internal \returns a packet with constant coefficients \a a, e.g.: (x, 0, x, 0),
682
+ where x is the value of all 1-bits. */
683
+ template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
684
+ peven_mask(const Packet& /*a*/) {
685
+ typedef typename unpacket_traits<Packet>::type Scalar;
686
+ const size_t n = unpacket_traits<Packet>::size;
687
+ EIGEN_ALIGN_TO_BOUNDARY(sizeof(Packet)) Scalar elements[n];
688
+ for(size_t i = 0; i < n; ++i) {
689
+ memset(elements+i, ((i & 1) == 0 ? 0xff : 0), sizeof(Scalar));
690
+ }
691
+ return ploadu<Packet>(elements);
692
+ }
693
+
694
+
284
695
  /** \internal copy the packet \a from to \a *to, \a to must be 16 bytes aligned */
285
696
  template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pstore(Scalar* to, const Packet& from)
286
697
  { (*to) = from; }
@@ -289,6 +700,15 @@ template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pstore(
289
700
  template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pstoreu(Scalar* to, const Packet& from)
290
701
  { (*to) = from; }
291
702
 
703
+ /** \internal copy the packet \a from to \a *to, (un-aligned store with a mask)
704
+ * There is no generic implementation. We only have implementations for specialized
705
+ * cases. Generic case should not be called.
706
+ */
707
+ template<typename Scalar, typename Packet>
708
+ EIGEN_DEVICE_FUNC inline
709
+ typename enable_if<unpacket_traits<Packet>::masked_store_available, void>::type
710
+ pstoreu(Scalar* to, const Packet& from, typename unpacket_traits<Packet>::mask_t umask);
711
+
292
712
  template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline Packet pgather(const Scalar* from, Index /*stride*/)
293
713
  { return ploadu<Packet>(from); }
294
714
 
@@ -298,8 +718,10 @@ template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pstoreu
298
718
  /** \internal tries to do cache prefetching of \a addr */
299
719
  template<typename Scalar> EIGEN_DEVICE_FUNC inline void prefetch(const Scalar* addr)
300
720
  {
301
- #ifdef __CUDA_ARCH__
302
- #if defined(__LP64__)
721
+ #if defined(EIGEN_HIP_DEVICE_COMPILE)
722
+ // do nothing
723
+ #elif defined(EIGEN_CUDA_ARCH)
724
+ #if defined(__LP64__) || EIGEN_OS_WIN64
303
725
  // 64-bit pointer operand constraint for inlined asm
304
726
  asm(" prefetch.L1 [ %1 ];" : "=l"(addr) : "l"(addr));
305
727
  #else
@@ -311,39 +733,6 @@ template<typename Scalar> EIGEN_DEVICE_FUNC inline void prefetch(const Scalar* a
311
733
  #endif
312
734
  }
313
735
 
314
- /** \internal \returns the first element of a packet */
315
- template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type pfirst(const Packet& a)
316
- { return a; }
317
-
318
- /** \internal \returns a packet where the element i contains the sum of the packet of \a vec[i] */
319
- template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
320
- preduxp(const Packet* vecs) { return vecs[0]; }
321
-
322
- /** \internal \returns the sum of the elements of \a a*/
323
- template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux(const Packet& a)
324
- { return a; }
325
-
326
- /** \internal \returns the sum of the elements of \a a by block of 4 elements.
327
- * For a packet {a0, a1, a2, a3, a4, a5, a6, a7}, it returns a half packet {a0+a4, a1+a5, a2+a6, a3+a7}
328
- * For packet-size smaller or equal to 4, this boils down to a noop.
329
- */
330
- template<typename Packet> EIGEN_DEVICE_FUNC inline
331
- typename conditional<(unpacket_traits<Packet>::size%8)==0,typename unpacket_traits<Packet>::half,Packet>::type
332
- predux_downto4(const Packet& a)
333
- { return a; }
334
-
335
- /** \internal \returns the product of the elements of \a a*/
336
- template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_mul(const Packet& a)
337
- { return a; }
338
-
339
- /** \internal \returns the min of the elements of \a a*/
340
- template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_min(const Packet& a)
341
- { return a; }
342
-
343
- /** \internal \returns the max of the elements of \a a*/
344
- template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_max(const Packet& a)
345
- { return a; }
346
-
347
736
  /** \internal \returns the reversed elements of \a a*/
348
737
  template<typename Packet> EIGEN_DEVICE_FUNC inline Packet preverse(const Packet& a)
349
738
  { return a; }
@@ -351,7 +740,7 @@ template<typename Packet> EIGEN_DEVICE_FUNC inline Packet preverse(const Packet&
351
740
  /** \internal \returns \a a with real and imaginary part flipped (for complex type only) */
352
741
  template<typename Packet> EIGEN_DEVICE_FUNC inline Packet pcplxflip(const Packet& a)
353
742
  {
354
- return Packet(a.imag(),a.real());
743
+ return Packet(numext::imag(a),numext::real(a));
355
744
  }
356
745
 
357
746
  /**************************
@@ -360,47 +749,51 @@ template<typename Packet> EIGEN_DEVICE_FUNC inline Packet pcplxflip(const Packet
360
749
 
361
750
  /** \internal \returns the sine of \a a (coeff-wise) */
362
751
  template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
363
- Packet psin(const Packet& a) { using std::sin; return sin(a); }
752
+ Packet psin(const Packet& a) { EIGEN_USING_STD(sin); return sin(a); }
364
753
 
365
754
  /** \internal \returns the cosine of \a a (coeff-wise) */
366
755
  template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
367
- Packet pcos(const Packet& a) { using std::cos; return cos(a); }
756
+ Packet pcos(const Packet& a) { EIGEN_USING_STD(cos); return cos(a); }
368
757
 
369
758
  /** \internal \returns the tan of \a a (coeff-wise) */
370
759
  template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
371
- Packet ptan(const Packet& a) { using std::tan; return tan(a); }
760
+ Packet ptan(const Packet& a) { EIGEN_USING_STD(tan); return tan(a); }
372
761
 
373
762
  /** \internal \returns the arc sine of \a a (coeff-wise) */
374
763
  template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
375
- Packet pasin(const Packet& a) { using std::asin; return asin(a); }
764
+ Packet pasin(const Packet& a) { EIGEN_USING_STD(asin); return asin(a); }
376
765
 
377
766
  /** \internal \returns the arc cosine of \a a (coeff-wise) */
378
767
  template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
379
- Packet pacos(const Packet& a) { using std::acos; return acos(a); }
768
+ Packet pacos(const Packet& a) { EIGEN_USING_STD(acos); return acos(a); }
380
769
 
381
770
  /** \internal \returns the arc tangent of \a a (coeff-wise) */
382
771
  template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
383
- Packet patan(const Packet& a) { using std::atan; return atan(a); }
772
+ Packet patan(const Packet& a) { EIGEN_USING_STD(atan); return atan(a); }
384
773
 
385
774
  /** \internal \returns the hyperbolic sine of \a a (coeff-wise) */
386
775
  template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
387
- Packet psinh(const Packet& a) { using std::sinh; return sinh(a); }
776
+ Packet psinh(const Packet& a) { EIGEN_USING_STD(sinh); return sinh(a); }
388
777
 
389
778
  /** \internal \returns the hyperbolic cosine of \a a (coeff-wise) */
390
779
  template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
391
- Packet pcosh(const Packet& a) { using std::cosh; return cosh(a); }
780
+ Packet pcosh(const Packet& a) { EIGEN_USING_STD(cosh); return cosh(a); }
392
781
 
393
782
  /** \internal \returns the hyperbolic tan of \a a (coeff-wise) */
394
783
  template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
395
- Packet ptanh(const Packet& a) { using std::tanh; return tanh(a); }
784
+ Packet ptanh(const Packet& a) { EIGEN_USING_STD(tanh); return tanh(a); }
396
785
 
397
786
  /** \internal \returns the exp of \a a (coeff-wise) */
398
787
  template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
399
- Packet pexp(const Packet& a) { using std::exp; return exp(a); }
788
+ Packet pexp(const Packet& a) { EIGEN_USING_STD(exp); return exp(a); }
789
+
790
+ /** \internal \returns the expm1 of \a a (coeff-wise) */
791
+ template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
792
+ Packet pexpm1(const Packet& a) { return numext::expm1(a); }
400
793
 
401
794
  /** \internal \returns the log of \a a (coeff-wise) */
402
795
  template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
403
- Packet plog(const Packet& a) { using std::log; return log(a); }
796
+ Packet plog(const Packet& a) { EIGEN_USING_STD(log); return log(a); }
404
797
 
405
798
  /** \internal \returns the log1p of \a a (coeff-wise) */
406
799
  template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
@@ -408,16 +801,24 @@ Packet plog1p(const Packet& a) { return numext::log1p(a); }
408
801
 
409
802
  /** \internal \returns the log10 of \a a (coeff-wise) */
410
803
  template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
411
- Packet plog10(const Packet& a) { using std::log10; return log10(a); }
804
+ Packet plog10(const Packet& a) { EIGEN_USING_STD(log10); return log10(a); }
805
+
806
+ /** \internal \returns the log10 of \a a (coeff-wise) */
807
+ template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
808
+ Packet plog2(const Packet& a) {
809
+ typedef typename internal::unpacket_traits<Packet>::type Scalar;
810
+ return pmul(pset1<Packet>(Scalar(EIGEN_LOG2E)), plog(a));
811
+ }
412
812
 
413
813
  /** \internal \returns the square-root of \a a (coeff-wise) */
414
814
  template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
415
- Packet psqrt(const Packet& a) { using std::sqrt; return sqrt(a); }
815
+ Packet psqrt(const Packet& a) { return numext::sqrt(a); }
416
816
 
417
817
  /** \internal \returns the reciprocal square-root of \a a (coeff-wise) */
418
818
  template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
419
819
  Packet prsqrt(const Packet& a) {
420
- return pdiv(pset1<Packet>(1), psqrt(a));
820
+ typedef typename internal::unpacket_traits<Packet>::type Scalar;
821
+ return pdiv(pset1<Packet>(Scalar(1)), psqrt(a));
421
822
  }
422
823
 
423
824
  /** \internal \returns the rounded value of \a a (coeff-wise) */
@@ -428,15 +829,121 @@ Packet pround(const Packet& a) { using numext::round; return round(a); }
428
829
  template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
429
830
  Packet pfloor(const Packet& a) { using numext::floor; return floor(a); }
430
831
 
832
+ /** \internal \returns the rounded value of \a a (coeff-wise) with current
833
+ * rounding mode */
834
+ template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
835
+ Packet print(const Packet& a) { using numext::rint; return rint(a); }
836
+
431
837
  /** \internal \returns the ceil of \a a (coeff-wise) */
432
838
  template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
433
839
  Packet pceil(const Packet& a) { using numext::ceil; return ceil(a); }
434
840
 
841
+ /** \internal \returns the first element of a packet */
842
+ template<typename Packet>
843
+ EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type
844
+ pfirst(const Packet& a)
845
+ { return a; }
846
+
847
+ /** \internal \returns the sum of the elements of upper and lower half of \a a if \a a is larger than 4.
848
+ * For a packet {a0, a1, a2, a3, a4, a5, a6, a7}, it returns a half packet {a0+a4, a1+a5, a2+a6, a3+a7}
849
+ * For packet-size smaller or equal to 4, this boils down to a noop.
850
+ */
851
+ template<typename Packet>
852
+ EIGEN_DEVICE_FUNC inline typename conditional<(unpacket_traits<Packet>::size%8)==0,typename unpacket_traits<Packet>::half,Packet>::type
853
+ predux_half_dowto4(const Packet& a)
854
+ { return a; }
855
+
856
+ // Slow generic implementation of Packet reduction.
857
+ template <typename Packet, typename Op>
858
+ EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type
859
+ predux_helper(const Packet& a, Op op) {
860
+ typedef typename unpacket_traits<Packet>::type Scalar;
861
+ const size_t n = unpacket_traits<Packet>::size;
862
+ EIGEN_ALIGN_TO_BOUNDARY(sizeof(Packet)) Scalar elements[n];
863
+ pstoreu<Scalar>(elements, a);
864
+ for(size_t k = n / 2; k > 0; k /= 2) {
865
+ for(size_t i = 0; i < k; ++i) {
866
+ elements[i] = op(elements[i], elements[i + k]);
867
+ }
868
+ }
869
+ return elements[0];
870
+ }
871
+
872
+ /** \internal \returns the sum of the elements of \a a*/
873
+ template<typename Packet>
874
+ EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type
875
+ predux(const Packet& a)
876
+ {
877
+ return a;
878
+ }
879
+
880
+ /** \internal \returns the product of the elements of \a a */
881
+ template <typename Packet>
882
+ EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_mul(
883
+ const Packet& a) {
884
+ typedef typename unpacket_traits<Packet>::type Scalar;
885
+ return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmul<Scalar>)));
886
+ }
887
+
888
+ /** \internal \returns the min of the elements of \a a */
889
+ template <typename Packet>
890
+ EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_min(
891
+ const Packet &a) {
892
+ typedef typename unpacket_traits<Packet>::type Scalar;
893
+ return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmin<PropagateFast, Scalar>)));
894
+ }
895
+
896
+ template <int NaNPropagation, typename Packet>
897
+ EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_min(
898
+ const Packet& a) {
899
+ typedef typename unpacket_traits<Packet>::type Scalar;
900
+ return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmin<NaNPropagation, Scalar>)));
901
+ }
902
+
903
+ /** \internal \returns the min of the elements of \a a */
904
+ template <typename Packet>
905
+ EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_max(
906
+ const Packet &a) {
907
+ typedef typename unpacket_traits<Packet>::type Scalar;
908
+ return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmax<PropagateFast, Scalar>)));
909
+ }
910
+
911
+ template <int NaNPropagation, typename Packet>
912
+ EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_max(
913
+ const Packet& a) {
914
+ typedef typename unpacket_traits<Packet>::type Scalar;
915
+ return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmax<NaNPropagation, Scalar>)));
916
+ }
917
+
918
+ #undef EIGEN_BINARY_OP_NAN_PROPAGATION
919
+
920
+ /** \internal \returns true if all coeffs of \a a means "true"
921
+ * It is supposed to be called on values returned by pcmp_*.
922
+ */
923
+ // not needed yet
924
+ // template<typename Packet> EIGEN_DEVICE_FUNC inline bool predux_all(const Packet& a)
925
+ // { return bool(a); }
926
+
927
+ /** \internal \returns true if any coeffs of \a a means "true"
928
+ * It is supposed to be called on values returned by pcmp_*.
929
+ */
930
+ template<typename Packet> EIGEN_DEVICE_FUNC inline bool predux_any(const Packet& a)
931
+ {
932
+ // Dirty but generic implementation where "true" is assumed to be non 0 and all the sames.
933
+ // It is expected that "true" is either:
934
+ // - Scalar(1)
935
+ // - bits full of ones (NaN for floats),
936
+ // - or first bit equals to 1 (1 for ints, smallest denormal for floats).
937
+ // For all these cases, taking the sum is just fine, and this boils down to a no-op for scalars.
938
+ typedef typename unpacket_traits<Packet>::type Scalar;
939
+ return numext::not_equal_strict(predux(a), Scalar(0));
940
+ }
941
+
435
942
  /***************************************************************************
436
943
  * The following functions might not have to be overwritten for vectorized types
437
944
  ***************************************************************************/
438
945
 
439
- /** \internal copy a packet with constant coeficient \a a (e.g., [a,a,a,a]) to \a *to. \a to must be 16 bytes aligned */
946
+ /** \internal copy a packet with constant coefficient \a a (e.g., [a,a,a,a]) to \a *to. \a to must be 16 bytes aligned */
440
947
  // NOTE: this function must really be templated on the packet type (think about different packet types for the same scalar type)
441
948
  template<typename Packet>
442
949
  inline void pstore1(typename unpacket_traits<Packet>::type* to, const typename unpacket_traits<Packet>::type& a)
@@ -484,41 +991,12 @@ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet ploadt_ro(const typename unpacket_t
484
991
  return ploadt<Packet, LoadMode>(from);
485
992
  }
486
993
 
487
- /** \internal default implementation of palign() allowing partial specialization */
488
- template<int Offset,typename PacketType>
489
- struct palign_impl
490
- {
491
- // by default data are aligned, so there is nothing to be done :)
492
- static inline void run(PacketType&, const PacketType&) {}
493
- };
494
-
495
- /** \internal update \a first using the concatenation of the packet_size minus \a Offset last elements
496
- * of \a first and \a Offset first elements of \a second.
497
- *
498
- * This function is currently only used to optimize matrix-vector products on unligned matrices.
499
- * It takes 2 packets that represent a contiguous memory array, and returns a packet starting
500
- * at the position \a Offset. For instance, for packets of 4 elements, we have:
501
- * Input:
502
- * - first = {f0,f1,f2,f3}
503
- * - second = {s0,s1,s2,s3}
504
- * Output:
505
- * - if Offset==0 then {f0,f1,f2,f3}
506
- * - if Offset==1 then {f1,f2,f3,s0}
507
- * - if Offset==2 then {f2,f3,s0,s1}
508
- * - if Offset==3 then {f3,s0,s1,s3}
509
- */
510
- template<int Offset,typename PacketType>
511
- inline void palign(PacketType& first, const PacketType& second)
512
- {
513
- palign_impl<Offset,PacketType>::run(first,second);
514
- }
515
-
516
994
  /***************************************************************************
517
995
  * Fast complex products (GCC generates a function call which is very slow)
518
996
  ***************************************************************************/
519
997
 
520
998
  // Eigen+CUDA does not support complexes.
521
- #ifndef __CUDACC__
999
+ #if !defined(EIGEN_GPUCC)
522
1000
 
523
1001
  template<> inline std::complex<float> pmul(const std::complex<float>& a, const std::complex<float>& b)
524
1002
  { return std::complex<float>(a.real()*b.real() - a.imag()*b.imag(), a.imag()*b.real() + a.real()*b.imag()); }
@@ -555,34 +1033,6 @@ pblend(const Selector<unpacket_traits<Packet>::size>& ifPacket, const Packet& th
555
1033
  return ifPacket.select[0] ? thenPacket : elsePacket;
556
1034
  }
557
1035
 
558
- /** \internal \returns \a a with the first coefficient replaced by the scalar b */
559
- template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
560
- pinsertfirst(const Packet& a, typename unpacket_traits<Packet>::type b)
561
- {
562
- // Default implementation based on pblend.
563
- // It must be specialized for higher performance.
564
- Selector<unpacket_traits<Packet>::size> mask;
565
- mask.select[0] = true;
566
- // This for loop should be optimized away by the compiler.
567
- for(Index i=1; i<unpacket_traits<Packet>::size; ++i)
568
- mask.select[i] = false;
569
- return pblend(mask, pset1<Packet>(b), a);
570
- }
571
-
572
- /** \internal \returns \a a with the last coefficient replaced by the scalar b */
573
- template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
574
- pinsertlast(const Packet& a, typename unpacket_traits<Packet>::type b)
575
- {
576
- // Default implementation based on pblend.
577
- // It must be specialized for higher performance.
578
- Selector<unpacket_traits<Packet>::size> mask;
579
- // This for loop should be optimized away by the compiler.
580
- for(Index i=0; i<unpacket_traits<Packet>::size-1; ++i)
581
- mask.select[i] = false;
582
- mask.select[unpacket_traits<Packet>::size-1] = true;
583
- return pblend(mask, pset1<Packet>(b), a);
584
- }
585
-
586
1036
  } // end namespace internal
587
1037
 
588
1038
  } // end namespace Eigen