@smake/eigen 1.0.2 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (435) hide show
  1. package/README.md +1 -1
  2. package/eigen/Eigen/AccelerateSupport +52 -0
  3. package/eigen/Eigen/Cholesky +18 -21
  4. package/eigen/Eigen/CholmodSupport +28 -28
  5. package/eigen/Eigen/Core +235 -326
  6. package/eigen/Eigen/Eigenvalues +16 -14
  7. package/eigen/Eigen/Geometry +21 -24
  8. package/eigen/Eigen/Householder +9 -8
  9. package/eigen/Eigen/IterativeLinearSolvers +8 -4
  10. package/eigen/Eigen/Jacobi +14 -14
  11. package/eigen/Eigen/KLUSupport +43 -0
  12. package/eigen/Eigen/LU +16 -20
  13. package/eigen/Eigen/MetisSupport +12 -12
  14. package/eigen/Eigen/OrderingMethods +54 -54
  15. package/eigen/Eigen/PaStiXSupport +23 -20
  16. package/eigen/Eigen/PardisoSupport +17 -14
  17. package/eigen/Eigen/QR +18 -21
  18. package/eigen/Eigen/QtAlignedMalloc +5 -13
  19. package/eigen/Eigen/SPQRSupport +21 -14
  20. package/eigen/Eigen/SVD +23 -18
  21. package/eigen/Eigen/Sparse +1 -4
  22. package/eigen/Eigen/SparseCholesky +18 -23
  23. package/eigen/Eigen/SparseCore +18 -17
  24. package/eigen/Eigen/SparseLU +12 -8
  25. package/eigen/Eigen/SparseQR +16 -14
  26. package/eigen/Eigen/StdDeque +5 -2
  27. package/eigen/Eigen/StdList +5 -2
  28. package/eigen/Eigen/StdVector +5 -2
  29. package/eigen/Eigen/SuperLUSupport +30 -24
  30. package/eigen/Eigen/ThreadPool +80 -0
  31. package/eigen/Eigen/UmfPackSupport +19 -17
  32. package/eigen/Eigen/Version +14 -0
  33. package/eigen/Eigen/src/AccelerateSupport/AccelerateSupport.h +423 -0
  34. package/eigen/Eigen/src/AccelerateSupport/InternalHeaderCheck.h +3 -0
  35. package/eigen/Eigen/src/Cholesky/InternalHeaderCheck.h +3 -0
  36. package/eigen/Eigen/src/Cholesky/LDLT.h +377 -401
  37. package/eigen/Eigen/src/Cholesky/LLT.h +332 -360
  38. package/eigen/Eigen/src/Cholesky/LLT_LAPACKE.h +81 -56
  39. package/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +620 -521
  40. package/eigen/Eigen/src/CholmodSupport/InternalHeaderCheck.h +3 -0
  41. package/eigen/Eigen/src/Core/ArithmeticSequence.h +239 -0
  42. package/eigen/Eigen/src/Core/Array.h +341 -294
  43. package/eigen/Eigen/src/Core/ArrayBase.h +190 -203
  44. package/eigen/Eigen/src/Core/ArrayWrapper.h +127 -171
  45. package/eigen/Eigen/src/Core/Assign.h +30 -40
  46. package/eigen/Eigen/src/Core/AssignEvaluator.h +711 -589
  47. package/eigen/Eigen/src/Core/Assign_MKL.h +130 -125
  48. package/eigen/Eigen/src/Core/BandMatrix.h +268 -283
  49. package/eigen/Eigen/src/Core/Block.h +375 -398
  50. package/eigen/Eigen/src/Core/CommaInitializer.h +86 -97
  51. package/eigen/Eigen/src/Core/ConditionEstimator.h +51 -53
  52. package/eigen/Eigen/src/Core/CoreEvaluators.h +1356 -1026
  53. package/eigen/Eigen/src/Core/CoreIterators.h +73 -59
  54. package/eigen/Eigen/src/Core/CwiseBinaryOp.h +114 -132
  55. package/eigen/Eigen/src/Core/CwiseNullaryOp.h +726 -617
  56. package/eigen/Eigen/src/Core/CwiseTernaryOp.h +77 -103
  57. package/eigen/Eigen/src/Core/CwiseUnaryOp.h +56 -68
  58. package/eigen/Eigen/src/Core/CwiseUnaryView.h +132 -95
  59. package/eigen/Eigen/src/Core/DenseBase.h +632 -571
  60. package/eigen/Eigen/src/Core/DenseCoeffsBase.h +511 -624
  61. package/eigen/Eigen/src/Core/DenseStorage.h +512 -509
  62. package/eigen/Eigen/src/Core/DeviceWrapper.h +153 -0
  63. package/eigen/Eigen/src/Core/Diagonal.h +169 -210
  64. package/eigen/Eigen/src/Core/DiagonalMatrix.h +351 -274
  65. package/eigen/Eigen/src/Core/DiagonalProduct.h +12 -10
  66. package/eigen/Eigen/src/Core/Dot.h +172 -222
  67. package/eigen/Eigen/src/Core/EigenBase.h +75 -85
  68. package/eigen/Eigen/src/Core/Fill.h +138 -0
  69. package/eigen/Eigen/src/Core/FindCoeff.h +464 -0
  70. package/eigen/Eigen/src/Core/ForceAlignedAccess.h +90 -109
  71. package/eigen/Eigen/src/Core/Fuzzy.h +82 -105
  72. package/eigen/Eigen/src/Core/GeneralProduct.h +327 -263
  73. package/eigen/Eigen/src/Core/GenericPacketMath.h +1472 -360
  74. package/eigen/Eigen/src/Core/GlobalFunctions.h +194 -151
  75. package/eigen/Eigen/src/Core/IO.h +147 -139
  76. package/eigen/Eigen/src/Core/IndexedView.h +321 -0
  77. package/eigen/Eigen/src/Core/InnerProduct.h +260 -0
  78. package/eigen/Eigen/src/Core/InternalHeaderCheck.h +3 -0
  79. package/eigen/Eigen/src/Core/Inverse.h +56 -66
  80. package/eigen/Eigen/src/Core/Map.h +124 -142
  81. package/eigen/Eigen/src/Core/MapBase.h +256 -281
  82. package/eigen/Eigen/src/Core/MathFunctions.h +1620 -938
  83. package/eigen/Eigen/src/Core/MathFunctionsImpl.h +233 -71
  84. package/eigen/Eigen/src/Core/Matrix.h +491 -416
  85. package/eigen/Eigen/src/Core/MatrixBase.h +468 -453
  86. package/eigen/Eigen/src/Core/NestByValue.h +66 -85
  87. package/eigen/Eigen/src/Core/NoAlias.h +79 -85
  88. package/eigen/Eigen/src/Core/NumTraits.h +235 -148
  89. package/eigen/Eigen/src/Core/PartialReduxEvaluator.h +253 -0
  90. package/eigen/Eigen/src/Core/PermutationMatrix.h +461 -511
  91. package/eigen/Eigen/src/Core/PlainObjectBase.h +871 -894
  92. package/eigen/Eigen/src/Core/Product.h +260 -139
  93. package/eigen/Eigen/src/Core/ProductEvaluators.h +863 -714
  94. package/eigen/Eigen/src/Core/Random.h +161 -136
  95. package/eigen/Eigen/src/Core/RandomImpl.h +262 -0
  96. package/eigen/Eigen/src/Core/RealView.h +250 -0
  97. package/eigen/Eigen/src/Core/Redux.h +366 -336
  98. package/eigen/Eigen/src/Core/Ref.h +308 -209
  99. package/eigen/Eigen/src/Core/Replicate.h +94 -106
  100. package/eigen/Eigen/src/Core/Reshaped.h +398 -0
  101. package/eigen/Eigen/src/Core/ReturnByValue.h +49 -55
  102. package/eigen/Eigen/src/Core/Reverse.h +136 -145
  103. package/eigen/Eigen/src/Core/Select.h +70 -140
  104. package/eigen/Eigen/src/Core/SelfAdjointView.h +262 -285
  105. package/eigen/Eigen/src/Core/SelfCwiseBinaryOp.h +23 -20
  106. package/eigen/Eigen/src/Core/SkewSymmetricMatrix3.h +382 -0
  107. package/eigen/Eigen/src/Core/Solve.h +97 -111
  108. package/eigen/Eigen/src/Core/SolveTriangular.h +131 -129
  109. package/eigen/Eigen/src/Core/SolverBase.h +138 -101
  110. package/eigen/Eigen/src/Core/StableNorm.h +156 -160
  111. package/eigen/Eigen/src/Core/StlIterators.h +619 -0
  112. package/eigen/Eigen/src/Core/Stride.h +91 -88
  113. package/eigen/Eigen/src/Core/Swap.h +70 -38
  114. package/eigen/Eigen/src/Core/Transpose.h +295 -273
  115. package/eigen/Eigen/src/Core/Transpositions.h +272 -317
  116. package/eigen/Eigen/src/Core/TriangularMatrix.h +670 -755
  117. package/eigen/Eigen/src/Core/VectorBlock.h +59 -72
  118. package/eigen/Eigen/src/Core/VectorwiseOp.h +668 -630
  119. package/eigen/Eigen/src/Core/Visitor.h +480 -216
  120. package/eigen/Eigen/src/Core/arch/AVX/Complex.h +407 -293
  121. package/eigen/Eigen/src/Core/arch/AVX/MathFunctions.h +79 -388
  122. package/eigen/Eigen/src/Core/arch/AVX/PacketMath.h +2935 -491
  123. package/eigen/Eigen/src/Core/arch/AVX/Reductions.h +353 -0
  124. package/eigen/Eigen/src/Core/arch/AVX/TypeCasting.h +279 -22
  125. package/eigen/Eigen/src/Core/arch/AVX512/Complex.h +472 -0
  126. package/eigen/Eigen/src/Core/arch/AVX512/GemmKernel.h +1245 -0
  127. package/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h +85 -333
  128. package/eigen/Eigen/src/Core/arch/AVX512/MathFunctionsFP16.h +75 -0
  129. package/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h +2490 -649
  130. package/eigen/Eigen/src/Core/arch/AVX512/PacketMathFP16.h +1413 -0
  131. package/eigen/Eigen/src/Core/arch/AVX512/Reductions.h +297 -0
  132. package/eigen/Eigen/src/Core/arch/AVX512/TrsmKernel.h +1167 -0
  133. package/eigen/Eigen/src/Core/arch/AVX512/TrsmUnrolls.inc +1219 -0
  134. package/eigen/Eigen/src/Core/arch/AVX512/TypeCasting.h +277 -0
  135. package/eigen/Eigen/src/Core/arch/AVX512/TypeCastingFP16.h +130 -0
  136. package/eigen/Eigen/src/Core/arch/AltiVec/Complex.h +521 -298
  137. package/eigen/Eigen/src/Core/arch/AltiVec/MathFunctions.h +39 -280
  138. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProduct.h +3686 -0
  139. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h +205 -0
  140. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h +901 -0
  141. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMAbfloat16.h +742 -0
  142. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixVectorProduct.inc +2818 -0
  143. package/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h +3391 -723
  144. package/eigen/Eigen/src/Core/arch/AltiVec/TypeCasting.h +153 -0
  145. package/eigen/Eigen/src/Core/arch/Default/BFloat16.h +866 -0
  146. package/eigen/Eigen/src/Core/arch/Default/ConjHelper.h +113 -14
  147. package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +2634 -0
  148. package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +227 -0
  149. package/eigen/Eigen/src/Core/arch/Default/Half.h +1091 -0
  150. package/eigen/Eigen/src/Core/arch/Default/Settings.h +11 -13
  151. package/eigen/Eigen/src/Core/arch/GPU/Complex.h +244 -0
  152. package/eigen/Eigen/src/Core/arch/GPU/MathFunctions.h +104 -0
  153. package/eigen/Eigen/src/Core/arch/GPU/PacketMath.h +1712 -0
  154. package/eigen/Eigen/src/Core/arch/GPU/Tuple.h +268 -0
  155. package/eigen/Eigen/src/Core/arch/GPU/TypeCasting.h +77 -0
  156. package/eigen/Eigen/src/Core/arch/HIP/hcc/math_constants.h +23 -0
  157. package/eigen/Eigen/src/Core/arch/HVX/PacketMath.h +1088 -0
  158. package/eigen/Eigen/src/Core/arch/LSX/Complex.h +520 -0
  159. package/eigen/Eigen/src/Core/arch/LSX/GeneralBlockPanelKernel.h +23 -0
  160. package/eigen/Eigen/src/Core/arch/LSX/MathFunctions.h +43 -0
  161. package/eigen/Eigen/src/Core/arch/LSX/PacketMath.h +2866 -0
  162. package/eigen/Eigen/src/Core/arch/LSX/TypeCasting.h +526 -0
  163. package/eigen/Eigen/src/Core/arch/MSA/Complex.h +620 -0
  164. package/eigen/Eigen/src/Core/arch/MSA/MathFunctions.h +379 -0
  165. package/eigen/Eigen/src/Core/arch/MSA/PacketMath.h +1237 -0
  166. package/eigen/Eigen/src/Core/arch/NEON/Complex.h +531 -289
  167. package/eigen/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h +243 -0
  168. package/eigen/Eigen/src/Core/arch/NEON/MathFunctions.h +50 -73
  169. package/eigen/Eigen/src/Core/arch/NEON/PacketMath.h +5915 -579
  170. package/eigen/Eigen/src/Core/arch/NEON/TypeCasting.h +1642 -0
  171. package/eigen/Eigen/src/Core/arch/NEON/UnaryFunctors.h +57 -0
  172. package/eigen/Eigen/src/Core/arch/SSE/Complex.h +366 -334
  173. package/eigen/Eigen/src/Core/arch/SSE/MathFunctions.h +40 -514
  174. package/eigen/Eigen/src/Core/arch/SSE/PacketMath.h +2164 -675
  175. package/eigen/Eigen/src/Core/arch/SSE/Reductions.h +324 -0
  176. package/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h +188 -35
  177. package/eigen/Eigen/src/Core/arch/SVE/MathFunctions.h +48 -0
  178. package/eigen/Eigen/src/Core/arch/SVE/PacketMath.h +674 -0
  179. package/eigen/Eigen/src/Core/arch/SVE/TypeCasting.h +52 -0
  180. package/eigen/Eigen/src/Core/arch/SYCL/InteropHeaders.h +227 -0
  181. package/eigen/Eigen/src/Core/arch/SYCL/MathFunctions.h +303 -0
  182. package/eigen/Eigen/src/Core/arch/SYCL/PacketMath.h +576 -0
  183. package/eigen/Eigen/src/Core/arch/SYCL/TypeCasting.h +83 -0
  184. package/eigen/Eigen/src/Core/arch/ZVector/Complex.h +434 -261
  185. package/eigen/Eigen/src/Core/arch/ZVector/MathFunctions.h +160 -53
  186. package/eigen/Eigen/src/Core/arch/ZVector/PacketMath.h +1073 -605
  187. package/eigen/Eigen/src/Core/functors/AssignmentFunctors.h +123 -117
  188. package/eigen/Eigen/src/Core/functors/BinaryFunctors.h +594 -322
  189. package/eigen/Eigen/src/Core/functors/NullaryFunctors.h +204 -118
  190. package/eigen/Eigen/src/Core/functors/StlFunctors.h +110 -97
  191. package/eigen/Eigen/src/Core/functors/TernaryFunctors.h +34 -7
  192. package/eigen/Eigen/src/Core/functors/UnaryFunctors.h +1158 -530
  193. package/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h +2329 -1333
  194. package/eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h +328 -364
  195. package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +191 -178
  196. package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +85 -82
  197. package/eigen/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h +154 -73
  198. package/eigen/Eigen/src/Core/products/GeneralMatrixVector.h +396 -542
  199. package/eigen/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h +80 -77
  200. package/eigen/Eigen/src/Core/products/Parallelizer.h +208 -92
  201. package/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +331 -375
  202. package/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h +206 -224
  203. package/eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h +139 -146
  204. package/eigen/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h +58 -61
  205. package/eigen/Eigen/src/Core/products/SelfadjointProduct.h +71 -71
  206. package/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h +48 -46
  207. package/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h +294 -369
  208. package/eigen/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h +246 -238
  209. package/eigen/Eigen/src/Core/products/TriangularMatrixVector.h +244 -247
  210. package/eigen/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h +212 -192
  211. package/eigen/Eigen/src/Core/products/TriangularSolverMatrix.h +328 -275
  212. package/eigen/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h +108 -109
  213. package/eigen/Eigen/src/Core/products/TriangularSolverVector.h +70 -93
  214. package/eigen/Eigen/src/Core/util/Assert.h +158 -0
  215. package/eigen/Eigen/src/Core/util/BlasUtil.h +413 -290
  216. package/eigen/Eigen/src/Core/util/ConfigureVectorization.h +543 -0
  217. package/eigen/Eigen/src/Core/util/Constants.h +314 -263
  218. package/eigen/Eigen/src/Core/util/DisableStupidWarnings.h +130 -78
  219. package/eigen/Eigen/src/Core/util/EmulateArray.h +270 -0
  220. package/eigen/Eigen/src/Core/util/ForwardDeclarations.h +450 -224
  221. package/eigen/Eigen/src/Core/util/GpuHipCudaDefines.inc +101 -0
  222. package/eigen/Eigen/src/Core/util/GpuHipCudaUndefines.inc +45 -0
  223. package/eigen/Eigen/src/Core/util/IndexedViewHelper.h +487 -0
  224. package/eigen/Eigen/src/Core/util/IntegralConstant.h +279 -0
  225. package/eigen/Eigen/src/Core/util/MKL_support.h +39 -30
  226. package/eigen/Eigen/src/Core/util/Macros.h +939 -646
  227. package/eigen/Eigen/src/Core/util/MaxSizeVector.h +139 -0
  228. package/eigen/Eigen/src/Core/util/Memory.h +1042 -650
  229. package/eigen/Eigen/src/Core/util/Meta.h +618 -426
  230. package/eigen/Eigen/src/Core/util/MoreMeta.h +638 -0
  231. package/eigen/Eigen/src/Core/util/ReenableStupidWarnings.h +32 -19
  232. package/eigen/Eigen/src/Core/util/ReshapedHelper.h +51 -0
  233. package/eigen/Eigen/src/Core/util/Serializer.h +209 -0
  234. package/eigen/Eigen/src/Core/util/StaticAssert.h +51 -164
  235. package/eigen/Eigen/src/Core/util/SymbolicIndex.h +445 -0
  236. package/eigen/Eigen/src/Core/util/XprHelper.h +793 -538
  237. package/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +246 -277
  238. package/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +299 -319
  239. package/eigen/Eigen/src/Eigenvalues/ComplexSchur_LAPACKE.h +52 -48
  240. package/eigen/Eigen/src/Eigenvalues/EigenSolver.h +413 -456
  241. package/eigen/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h +309 -325
  242. package/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +157 -171
  243. package/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +292 -310
  244. package/eigen/Eigen/src/Eigenvalues/InternalHeaderCheck.h +3 -0
  245. package/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +91 -107
  246. package/eigen/Eigen/src/Eigenvalues/RealQZ.h +539 -606
  247. package/eigen/Eigen/src/Eigenvalues/RealSchur.h +348 -382
  248. package/eigen/Eigen/src/Eigenvalues/RealSchur_LAPACKE.h +41 -35
  249. package/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +579 -600
  250. package/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h +47 -44
  251. package/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +434 -461
  252. package/eigen/Eigen/src/Geometry/AlignedBox.h +307 -214
  253. package/eigen/Eigen/src/Geometry/AngleAxis.h +135 -137
  254. package/eigen/Eigen/src/Geometry/EulerAngles.h +163 -74
  255. package/eigen/Eigen/src/Geometry/Homogeneous.h +289 -333
  256. package/eigen/Eigen/src/Geometry/Hyperplane.h +152 -161
  257. package/eigen/Eigen/src/Geometry/InternalHeaderCheck.h +3 -0
  258. package/eigen/Eigen/src/Geometry/OrthoMethods.h +168 -145
  259. package/eigen/Eigen/src/Geometry/ParametrizedLine.h +141 -104
  260. package/eigen/Eigen/src/Geometry/Quaternion.h +595 -497
  261. package/eigen/Eigen/src/Geometry/Rotation2D.h +110 -108
  262. package/eigen/Eigen/src/Geometry/RotationBase.h +148 -145
  263. package/eigen/Eigen/src/Geometry/Scaling.h +115 -90
  264. package/eigen/Eigen/src/Geometry/Transform.h +896 -953
  265. package/eigen/Eigen/src/Geometry/Translation.h +100 -98
  266. package/eigen/Eigen/src/Geometry/Umeyama.h +79 -84
  267. package/eigen/Eigen/src/Geometry/arch/Geometry_SIMD.h +154 -0
  268. package/eigen/Eigen/src/Householder/BlockHouseholder.h +54 -42
  269. package/eigen/Eigen/src/Householder/Householder.h +104 -122
  270. package/eigen/Eigen/src/Householder/HouseholderSequence.h +416 -382
  271. package/eigen/Eigen/src/Householder/InternalHeaderCheck.h +3 -0
  272. package/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +153 -166
  273. package/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +127 -138
  274. package/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +95 -124
  275. package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +269 -267
  276. package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +246 -259
  277. package/eigen/Eigen/src/IterativeLinearSolvers/InternalHeaderCheck.h +3 -0
  278. package/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +218 -217
  279. package/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +80 -103
  280. package/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +59 -63
  281. package/eigen/Eigen/src/Jacobi/InternalHeaderCheck.h +3 -0
  282. package/eigen/Eigen/src/Jacobi/Jacobi.h +256 -291
  283. package/eigen/Eigen/src/KLUSupport/InternalHeaderCheck.h +3 -0
  284. package/eigen/Eigen/src/KLUSupport/KLUSupport.h +339 -0
  285. package/eigen/Eigen/src/LU/Determinant.h +60 -63
  286. package/eigen/Eigen/src/LU/FullPivLU.h +561 -626
  287. package/eigen/Eigen/src/LU/InternalHeaderCheck.h +3 -0
  288. package/eigen/Eigen/src/LU/InverseImpl.h +213 -275
  289. package/eigen/Eigen/src/LU/PartialPivLU.h +407 -435
  290. package/eigen/Eigen/src/LU/PartialPivLU_LAPACKE.h +54 -40
  291. package/eigen/Eigen/src/LU/arch/InverseSize4.h +353 -0
  292. package/eigen/Eigen/src/MetisSupport/InternalHeaderCheck.h +3 -0
  293. package/eigen/Eigen/src/MetisSupport/MetisSupport.h +81 -93
  294. package/eigen/Eigen/src/OrderingMethods/Amd.h +250 -282
  295. package/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +950 -1103
  296. package/eigen/Eigen/src/OrderingMethods/InternalHeaderCheck.h +3 -0
  297. package/eigen/Eigen/src/OrderingMethods/Ordering.h +111 -122
  298. package/eigen/Eigen/src/PaStiXSupport/InternalHeaderCheck.h +3 -0
  299. package/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +524 -570
  300. package/eigen/Eigen/src/PardisoSupport/InternalHeaderCheck.h +3 -0
  301. package/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +385 -429
  302. package/eigen/Eigen/src/QR/ColPivHouseholderQR.h +494 -473
  303. package/eigen/Eigen/src/QR/ColPivHouseholderQR_LAPACKE.h +120 -56
  304. package/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +223 -137
  305. package/eigen/Eigen/src/QR/FullPivHouseholderQR.h +517 -460
  306. package/eigen/Eigen/src/QR/HouseholderQR.h +412 -278
  307. package/eigen/Eigen/src/QR/HouseholderQR_LAPACKE.h +32 -23
  308. package/eigen/Eigen/src/QR/InternalHeaderCheck.h +3 -0
  309. package/eigen/Eigen/src/SPQRSupport/InternalHeaderCheck.h +3 -0
  310. package/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +263 -261
  311. package/eigen/Eigen/src/SVD/BDCSVD.h +872 -679
  312. package/eigen/Eigen/src/SVD/BDCSVD_LAPACKE.h +174 -0
  313. package/eigen/Eigen/src/SVD/InternalHeaderCheck.h +3 -0
  314. package/eigen/Eigen/src/SVD/JacobiSVD.h +585 -543
  315. package/eigen/Eigen/src/SVD/JacobiSVD_LAPACKE.h +85 -49
  316. package/eigen/Eigen/src/SVD/SVDBase.h +281 -160
  317. package/eigen/Eigen/src/SVD/UpperBidiagonalization.h +202 -237
  318. package/eigen/Eigen/src/SparseCholesky/InternalHeaderCheck.h +3 -0
  319. package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +769 -590
  320. package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +318 -129
  321. package/eigen/Eigen/src/SparseCore/AmbiVector.h +202 -251
  322. package/eigen/Eigen/src/SparseCore/CompressedStorage.h +184 -236
  323. package/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +140 -184
  324. package/eigen/Eigen/src/SparseCore/InternalHeaderCheck.h +3 -0
  325. package/eigen/Eigen/src/SparseCore/SparseAssign.h +174 -111
  326. package/eigen/Eigen/src/SparseCore/SparseBlock.h +408 -477
  327. package/eigen/Eigen/src/SparseCore/SparseColEtree.h +100 -112
  328. package/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +531 -280
  329. package/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +559 -347
  330. package/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +100 -108
  331. package/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +185 -191
  332. package/eigen/Eigen/src/SparseCore/SparseDiagonalProduct.h +71 -71
  333. package/eigen/Eigen/src/SparseCore/SparseDot.h +49 -47
  334. package/eigen/Eigen/src/SparseCore/SparseFuzzy.h +13 -11
  335. package/eigen/Eigen/src/SparseCore/SparseMap.h +243 -253
  336. package/eigen/Eigen/src/SparseCore/SparseMatrix.h +1614 -1142
  337. package/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +403 -357
  338. package/eigen/Eigen/src/SparseCore/SparsePermutation.h +186 -115
  339. package/eigen/Eigen/src/SparseCore/SparseProduct.h +100 -91
  340. package/eigen/Eigen/src/SparseCore/SparseRedux.h +22 -24
  341. package/eigen/Eigen/src/SparseCore/SparseRef.h +268 -295
  342. package/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +371 -414
  343. package/eigen/Eigen/src/SparseCore/SparseSolverBase.h +78 -87
  344. package/eigen/Eigen/src/SparseCore/SparseSparseProductWithPruning.h +81 -95
  345. package/eigen/Eigen/src/SparseCore/SparseTranspose.h +62 -71
  346. package/eigen/Eigen/src/SparseCore/SparseTriangularView.h +132 -144
  347. package/eigen/Eigen/src/SparseCore/SparseUtil.h +146 -115
  348. package/eigen/Eigen/src/SparseCore/SparseVector.h +426 -372
  349. package/eigen/Eigen/src/SparseCore/SparseView.h +164 -193
  350. package/eigen/Eigen/src/SparseCore/TriangularSolver.h +129 -170
  351. package/eigen/Eigen/src/SparseLU/InternalHeaderCheck.h +3 -0
  352. package/eigen/Eigen/src/SparseLU/SparseLU.h +814 -618
  353. package/eigen/Eigen/src/SparseLU/SparseLUImpl.h +61 -48
  354. package/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +102 -118
  355. package/eigen/Eigen/src/SparseLU/SparseLU_Structs.h +38 -35
  356. package/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +273 -255
  357. package/eigen/Eigen/src/SparseLU/SparseLU_Utils.h +44 -49
  358. package/eigen/Eigen/src/SparseLU/SparseLU_column_bmod.h +104 -108
  359. package/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +90 -101
  360. package/eigen/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h +57 -58
  361. package/eigen/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h +43 -55
  362. package/eigen/Eigen/src/SparseLU/SparseLU_kernel_bmod.h +74 -71
  363. package/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +125 -133
  364. package/eigen/Eigen/src/SparseLU/SparseLU_panel_dfs.h +136 -159
  365. package/eigen/Eigen/src/SparseLU/SparseLU_pivotL.h +51 -52
  366. package/eigen/Eigen/src/SparseLU/SparseLU_pruneL.h +67 -73
  367. package/eigen/Eigen/src/SparseLU/SparseLU_relax_snode.h +24 -26
  368. package/eigen/Eigen/src/SparseQR/InternalHeaderCheck.h +3 -0
  369. package/eigen/Eigen/src/SparseQR/SparseQR.h +451 -490
  370. package/eigen/Eigen/src/StlSupport/StdDeque.h +28 -105
  371. package/eigen/Eigen/src/StlSupport/StdList.h +28 -84
  372. package/eigen/Eigen/src/StlSupport/StdVector.h +28 -108
  373. package/eigen/Eigen/src/StlSupport/details.h +48 -50
  374. package/eigen/Eigen/src/SuperLUSupport/InternalHeaderCheck.h +3 -0
  375. package/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +634 -732
  376. package/eigen/Eigen/src/ThreadPool/Barrier.h +70 -0
  377. package/eigen/Eigen/src/ThreadPool/CoreThreadPoolDevice.h +336 -0
  378. package/eigen/Eigen/src/ThreadPool/EventCount.h +241 -0
  379. package/eigen/Eigen/src/ThreadPool/ForkJoin.h +140 -0
  380. package/eigen/Eigen/src/ThreadPool/InternalHeaderCheck.h +4 -0
  381. package/eigen/Eigen/src/ThreadPool/NonBlockingThreadPool.h +587 -0
  382. package/eigen/Eigen/src/ThreadPool/RunQueue.h +230 -0
  383. package/eigen/Eigen/src/ThreadPool/ThreadCancel.h +21 -0
  384. package/eigen/Eigen/src/ThreadPool/ThreadEnvironment.h +43 -0
  385. package/eigen/Eigen/src/ThreadPool/ThreadLocal.h +289 -0
  386. package/eigen/Eigen/src/ThreadPool/ThreadPoolInterface.h +50 -0
  387. package/eigen/Eigen/src/ThreadPool/ThreadYield.h +16 -0
  388. package/eigen/Eigen/src/UmfPackSupport/InternalHeaderCheck.h +3 -0
  389. package/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +480 -380
  390. package/eigen/Eigen/src/misc/Image.h +41 -43
  391. package/eigen/Eigen/src/misc/InternalHeaderCheck.h +3 -0
  392. package/eigen/Eigen/src/misc/Kernel.h +39 -41
  393. package/eigen/Eigen/src/misc/RealSvd2x2.h +19 -21
  394. package/eigen/Eigen/src/misc/blas.h +83 -426
  395. package/eigen/Eigen/src/misc/lapacke.h +9976 -16182
  396. package/eigen/Eigen/src/misc/lapacke_helpers.h +163 -0
  397. package/eigen/Eigen/src/misc/lapacke_mangling.h +4 -5
  398. package/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.inc +344 -0
  399. package/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.inc +544 -0
  400. package/eigen/Eigen/src/plugins/BlockMethods.inc +1370 -0
  401. package/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.inc +116 -0
  402. package/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.inc +167 -0
  403. package/eigen/Eigen/src/plugins/IndexedViewMethods.inc +192 -0
  404. package/eigen/Eigen/src/plugins/InternalHeaderCheck.inc +3 -0
  405. package/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.inc +331 -0
  406. package/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.inc +118 -0
  407. package/eigen/Eigen/src/plugins/ReshapedMethods.inc +133 -0
  408. package/lib/LibEigen.d.ts +4 -0
  409. package/lib/LibEigen.js +14 -0
  410. package/lib/index.d.ts +1 -1
  411. package/lib/index.js +7 -3
  412. package/package.json +2 -10
  413. package/eigen/Eigen/CMakeLists.txt +0 -19
  414. package/eigen/Eigen/src/Core/BooleanRedux.h +0 -164
  415. package/eigen/Eigen/src/Core/arch/CUDA/Complex.h +0 -103
  416. package/eigen/Eigen/src/Core/arch/CUDA/Half.h +0 -675
  417. package/eigen/Eigen/src/Core/arch/CUDA/MathFunctions.h +0 -91
  418. package/eigen/Eigen/src/Core/arch/CUDA/PacketMath.h +0 -333
  419. package/eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +0 -1124
  420. package/eigen/Eigen/src/Core/arch/CUDA/TypeCasting.h +0 -212
  421. package/eigen/Eigen/src/Core/util/NonMPL2.h +0 -3
  422. package/eigen/Eigen/src/Geometry/arch/Geometry_SSE.h +0 -161
  423. package/eigen/Eigen/src/LU/arch/Inverse_SSE.h +0 -338
  424. package/eigen/Eigen/src/SparseCore/MappedSparseMatrix.h +0 -67
  425. package/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +0 -280
  426. package/eigen/Eigen/src/misc/lapack.h +0 -152
  427. package/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +0 -332
  428. package/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +0 -552
  429. package/eigen/Eigen/src/plugins/BlockMethods.h +0 -1058
  430. package/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.h +0 -115
  431. package/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.h +0 -163
  432. package/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +0 -152
  433. package/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +0 -85
  434. package/lib/eigen.d.ts +0 -2
  435. package/lib/eigen.js +0 -15
@@ -12,155 +12,149 @@
12
12
  #ifndef EIGEN_ASSIGN_EVALUATOR_H
13
13
  #define EIGEN_ASSIGN_EVALUATOR_H
14
14
 
15
+ // IWYU pragma: private
16
+ #include "./InternalHeaderCheck.h"
17
+
15
18
  namespace Eigen {
16
19
 
17
20
  // This implementation is based on Assign.h
18
21
 
19
22
  namespace internal {
20
-
23
+
21
24
  /***************************************************************************
22
- * Part 1 : the logic deciding a strategy for traversal and unrolling *
23
- ***************************************************************************/
25
+ * Part 1 : the logic deciding a strategy for traversal and unrolling *
26
+ ***************************************************************************/
24
27
 
25
28
  // copy_using_evaluator_traits is based on assign_traits
26
29
 
27
- template <typename DstEvaluator, typename SrcEvaluator, typename AssignFunc>
28
- struct copy_using_evaluator_traits
29
- {
30
- typedef typename DstEvaluator::XprType Dst;
31
- typedef typename Dst::Scalar DstScalar;
32
-
33
- enum {
34
- DstFlags = DstEvaluator::Flags,
35
- SrcFlags = SrcEvaluator::Flags
36
- };
37
-
38
- public:
39
- enum {
40
- DstAlignment = DstEvaluator::Alignment,
41
- SrcAlignment = SrcEvaluator::Alignment,
42
- DstHasDirectAccess = (DstFlags & DirectAccessBit) == DirectAccessBit,
43
- JointAlignment = EIGEN_PLAIN_ENUM_MIN(DstAlignment,SrcAlignment)
44
- };
45
-
46
- private:
47
- enum {
48
- InnerSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::SizeAtCompileTime)
49
- : int(DstFlags)&RowMajorBit ? int(Dst::ColsAtCompileTime)
50
- : int(Dst::RowsAtCompileTime),
51
- InnerMaxSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::MaxSizeAtCompileTime)
52
- : int(DstFlags)&RowMajorBit ? int(Dst::MaxColsAtCompileTime)
53
- : int(Dst::MaxRowsAtCompileTime),
54
- OuterStride = int(outer_stride_at_compile_time<Dst>::ret),
55
- MaxSizeAtCompileTime = Dst::SizeAtCompileTime
56
- };
30
+ template <typename DstEvaluator, typename SrcEvaluator, typename AssignFunc, int MaxPacketSize = Dynamic>
31
+ struct copy_using_evaluator_traits {
32
+ using Src = typename SrcEvaluator::XprType;
33
+ using Dst = typename DstEvaluator::XprType;
34
+ using DstScalar = typename Dst::Scalar;
35
+
36
+ static constexpr int DstFlags = DstEvaluator::Flags;
37
+ static constexpr int SrcFlags = SrcEvaluator::Flags;
38
+
39
+ public:
40
+ static constexpr int DstAlignment = DstEvaluator::Alignment;
41
+ static constexpr int SrcAlignment = SrcEvaluator::Alignment;
42
+ static constexpr int JointAlignment = plain_enum_min(DstAlignment, SrcAlignment);
43
+ static constexpr bool DstHasDirectAccess = bool(DstFlags & DirectAccessBit);
44
+ static constexpr bool SrcIsRowMajor = bool(SrcFlags & RowMajorBit);
45
+ static constexpr bool DstIsRowMajor = bool(DstFlags & RowMajorBit);
46
+ static constexpr bool IsVectorAtCompileTime = Dst::IsVectorAtCompileTime;
47
+ static constexpr int RowsAtCompileTime = size_prefer_fixed(Src::RowsAtCompileTime, Dst::RowsAtCompileTime);
48
+ static constexpr int ColsAtCompileTime = size_prefer_fixed(Src::ColsAtCompileTime, Dst::ColsAtCompileTime);
49
+ static constexpr int SizeAtCompileTime = size_at_compile_time(RowsAtCompileTime, ColsAtCompileTime);
50
+ static constexpr int MaxRowsAtCompileTime =
51
+ min_size_prefer_fixed(Src::MaxRowsAtCompileTime, Dst::MaxRowsAtCompileTime);
52
+ static constexpr int MaxColsAtCompileTime =
53
+ min_size_prefer_fixed(Src::MaxColsAtCompileTime, Dst::MaxColsAtCompileTime);
54
+ static constexpr int MaxSizeAtCompileTime =
55
+ min_size_prefer_fixed(Src::MaxSizeAtCompileTime, Dst::MaxSizeAtCompileTime);
56
+ static constexpr int InnerSizeAtCompileTime = IsVectorAtCompileTime ? SizeAtCompileTime
57
+ : DstIsRowMajor ? ColsAtCompileTime
58
+ : RowsAtCompileTime;
59
+ static constexpr int MaxInnerSizeAtCompileTime = IsVectorAtCompileTime ? MaxSizeAtCompileTime
60
+ : DstIsRowMajor ? MaxColsAtCompileTime
61
+ : MaxRowsAtCompileTime;
62
+ static constexpr int RestrictedInnerSize = min_size_prefer_fixed(MaxInnerSizeAtCompileTime, MaxPacketSize);
63
+ static constexpr int RestrictedLinearSize = min_size_prefer_fixed(MaxSizeAtCompileTime, MaxPacketSize);
64
+ static constexpr int OuterStride = outer_stride_at_compile_time<Dst>::ret;
57
65
 
58
66
  // TODO distinguish between linear traversal and inner-traversals
59
- typedef typename find_best_packet<DstScalar,Dst::SizeAtCompileTime>::type LinearPacketType;
60
- typedef typename find_best_packet<DstScalar,InnerSize>::type InnerPacketType;
61
-
62
- enum {
63
- LinearPacketSize = unpacket_traits<LinearPacketType>::size,
64
- InnerPacketSize = unpacket_traits<InnerPacketType>::size
65
- };
66
-
67
- public:
68
- enum {
69
- LinearRequiredAlignment = unpacket_traits<LinearPacketType>::alignment,
70
- InnerRequiredAlignment = unpacket_traits<InnerPacketType>::alignment
71
- };
72
-
73
- private:
74
- enum {
75
- DstIsRowMajor = DstFlags&RowMajorBit,
76
- SrcIsRowMajor = SrcFlags&RowMajorBit,
77
- StorageOrdersAgree = (int(DstIsRowMajor) == int(SrcIsRowMajor)),
78
- MightVectorize = bool(StorageOrdersAgree)
79
- && (int(DstFlags) & int(SrcFlags) & ActualPacketAccessBit)
80
- && bool(functor_traits<AssignFunc>::PacketAccess),
81
- MayInnerVectorize = MightVectorize
82
- && int(InnerSize)!=Dynamic && int(InnerSize)%int(InnerPacketSize)==0
83
- && int(OuterStride)!=Dynamic && int(OuterStride)%int(InnerPacketSize)==0
84
- && (EIGEN_UNALIGNED_VECTORIZE || int(JointAlignment)>=int(InnerRequiredAlignment)),
85
- MayLinearize = bool(StorageOrdersAgree) && (int(DstFlags) & int(SrcFlags) & LinearAccessBit),
86
- MayLinearVectorize = bool(MightVectorize) && bool(MayLinearize) && bool(DstHasDirectAccess)
87
- && (EIGEN_UNALIGNED_VECTORIZE || (int(DstAlignment)>=int(LinearRequiredAlignment)) || MaxSizeAtCompileTime == Dynamic),
88
- /* If the destination isn't aligned, we have to do runtime checks and we don't unroll,
89
- so it's only good for large enough sizes. */
90
- MaySliceVectorize = bool(MightVectorize) && bool(DstHasDirectAccess)
91
- && (int(InnerMaxSize)==Dynamic || int(InnerMaxSize)>=(EIGEN_UNALIGNED_VECTORIZE?InnerPacketSize:(3*InnerPacketSize)))
92
- /* slice vectorization can be slow, so we only want it if the slices are big, which is
93
- indicated by InnerMaxSize rather than InnerSize, think of the case of a dynamic block
94
- in a fixed-size matrix
95
- However, with EIGEN_UNALIGNED_VECTORIZE and unrolling, slice vectorization is still worth it */
96
- };
97
-
98
- public:
99
- enum {
100
- Traversal = int(MayLinearVectorize) && (LinearPacketSize>InnerPacketSize) ? int(LinearVectorizedTraversal)
101
- : int(MayInnerVectorize) ? int(InnerVectorizedTraversal)
102
- : int(MayLinearVectorize) ? int(LinearVectorizedTraversal)
103
- : int(MaySliceVectorize) ? int(SliceVectorizedTraversal)
104
- : int(MayLinearize) ? int(LinearTraversal)
105
- : int(DefaultTraversal),
106
- Vectorized = int(Traversal) == InnerVectorizedTraversal
107
- || int(Traversal) == LinearVectorizedTraversal
108
- || int(Traversal) == SliceVectorizedTraversal
109
- };
110
-
111
- typedef typename conditional<int(Traversal)==LinearVectorizedTraversal, LinearPacketType, InnerPacketType>::type PacketType;
112
-
113
- private:
114
- enum {
115
- ActualPacketSize = int(Traversal)==LinearVectorizedTraversal ? LinearPacketSize
116
- : Vectorized ? InnerPacketSize
117
- : 1,
118
- UnrollingLimit = EIGEN_UNROLLING_LIMIT * ActualPacketSize,
119
- MayUnrollCompletely = int(Dst::SizeAtCompileTime) != Dynamic
120
- && int(Dst::SizeAtCompileTime) * (int(DstEvaluator::CoeffReadCost)+int(SrcEvaluator::CoeffReadCost)) <= int(UnrollingLimit),
121
- MayUnrollInner = int(InnerSize) != Dynamic
122
- && int(InnerSize) * (int(DstEvaluator::CoeffReadCost)+int(SrcEvaluator::CoeffReadCost)) <= int(UnrollingLimit)
123
- };
124
-
125
- public:
126
- enum {
127
- Unrolling = (int(Traversal) == int(InnerVectorizedTraversal) || int(Traversal) == int(DefaultTraversal))
128
- ? (
129
- int(MayUnrollCompletely) ? int(CompleteUnrolling)
130
- : int(MayUnrollInner) ? int(InnerUnrolling)
131
- : int(NoUnrolling)
132
- )
133
- : int(Traversal) == int(LinearVectorizedTraversal)
134
- ? ( bool(MayUnrollCompletely) && ( EIGEN_UNALIGNED_VECTORIZE || (int(DstAlignment)>=int(LinearRequiredAlignment)))
135
- ? int(CompleteUnrolling)
136
- : int(NoUnrolling) )
137
- : int(Traversal) == int(LinearTraversal)
138
- ? ( bool(MayUnrollCompletely) ? int(CompleteUnrolling)
139
- : int(NoUnrolling) )
67
+ using LinearPacketType = typename find_best_packet<DstScalar, RestrictedLinearSize>::type;
68
+ using InnerPacketType = typename find_best_packet<DstScalar, RestrictedInnerSize>::type;
69
+
70
+ static constexpr int LinearPacketSize = unpacket_traits<LinearPacketType>::size;
71
+ static constexpr int InnerPacketSize = unpacket_traits<InnerPacketType>::size;
72
+
73
+ public:
74
+ static constexpr int LinearRequiredAlignment = unpacket_traits<LinearPacketType>::alignment;
75
+ static constexpr int InnerRequiredAlignment = unpacket_traits<InnerPacketType>::alignment;
76
+
77
+ private:
78
+ static constexpr bool StorageOrdersAgree = DstIsRowMajor == SrcIsRowMajor;
79
+ static constexpr bool MightVectorize = StorageOrdersAgree && bool(DstFlags & SrcFlags & ActualPacketAccessBit) &&
80
+ bool(functor_traits<AssignFunc>::PacketAccess);
81
+ static constexpr bool MayInnerVectorize = MightVectorize && (InnerSizeAtCompileTime != Dynamic) &&
82
+ (InnerSizeAtCompileTime % InnerPacketSize == 0) &&
83
+ (OuterStride != Dynamic) && (OuterStride % InnerPacketSize == 0) &&
84
+ (EIGEN_UNALIGNED_VECTORIZE || JointAlignment >= InnerRequiredAlignment);
85
+ static constexpr bool MayLinearize = StorageOrdersAgree && (DstFlags & SrcFlags & LinearAccessBit);
86
+ static constexpr bool MayLinearVectorize =
87
+ MightVectorize && MayLinearize && DstHasDirectAccess &&
88
+ (EIGEN_UNALIGNED_VECTORIZE || (DstAlignment >= LinearRequiredAlignment) || MaxSizeAtCompileTime == Dynamic) &&
89
+ (MaxSizeAtCompileTime == Dynamic || MaxSizeAtCompileTime >= LinearPacketSize);
90
+ /* If the destination isn't aligned, we have to do runtime checks and we don't unroll,
91
+ so it's only good for large enough sizes. */
92
+ static constexpr int InnerSizeThreshold = (EIGEN_UNALIGNED_VECTORIZE ? 1 : 3) * InnerPacketSize;
93
+ static constexpr bool MaySliceVectorize =
94
+ MightVectorize && DstHasDirectAccess &&
95
+ (MaxInnerSizeAtCompileTime == Dynamic || MaxInnerSizeAtCompileTime >= InnerSizeThreshold);
96
+ /* slice vectorization can be slow, so we only want it if the slices are big, which is
97
+ indicated by InnerMaxSize rather than InnerSize, think of the case of a dynamic block
98
+ in a fixed-size matrix
99
+ However, with EIGEN_UNALIGNED_VECTORIZE and unrolling, slice vectorization is still worth it */
100
+
101
+ public:
102
+ static constexpr int Traversal = SizeAtCompileTime == 0 ? AllAtOnceTraversal
103
+ : (MayLinearVectorize && (LinearPacketSize > InnerPacketSize))
104
+ ? LinearVectorizedTraversal
105
+ : MayInnerVectorize ? InnerVectorizedTraversal
106
+ : MayLinearVectorize ? LinearVectorizedTraversal
107
+ : MaySliceVectorize ? SliceVectorizedTraversal
108
+ : MayLinearize ? LinearTraversal
109
+ : DefaultTraversal;
110
+ static constexpr bool Vectorized = Traversal == InnerVectorizedTraversal || Traversal == LinearVectorizedTraversal ||
111
+ Traversal == SliceVectorizedTraversal;
112
+
113
+ using PacketType = std::conditional_t<Traversal == LinearVectorizedTraversal, LinearPacketType, InnerPacketType>;
114
+
115
+ private:
116
+ static constexpr int ActualPacketSize = Vectorized ? unpacket_traits<PacketType>::size : 1;
117
+ static constexpr int UnrollingLimit = EIGEN_UNROLLING_LIMIT * ActualPacketSize;
118
+ static constexpr int CoeffReadCost = int(DstEvaluator::CoeffReadCost) + int(SrcEvaluator::CoeffReadCost);
119
+ static constexpr bool MayUnrollCompletely =
120
+ (SizeAtCompileTime != Dynamic) && (SizeAtCompileTime * CoeffReadCost <= UnrollingLimit);
121
+ static constexpr bool MayUnrollInner =
122
+ (InnerSizeAtCompileTime != Dynamic) && (InnerSizeAtCompileTime * CoeffReadCost <= UnrollingLimit);
123
+
124
+ public:
125
+ static constexpr int Unrolling =
126
+ (Traversal == InnerVectorizedTraversal || Traversal == DefaultTraversal)
127
+ ? (MayUnrollCompletely ? CompleteUnrolling
128
+ : MayUnrollInner ? InnerUnrolling
129
+ : NoUnrolling)
130
+ : Traversal == LinearVectorizedTraversal
131
+ ? (MayUnrollCompletely && (EIGEN_UNALIGNED_VECTORIZE || (DstAlignment >= LinearRequiredAlignment))
132
+ ? CompleteUnrolling
133
+ : NoUnrolling)
134
+ : Traversal == LinearTraversal ? (MayUnrollCompletely ? CompleteUnrolling : NoUnrolling)
140
135
  #if EIGEN_UNALIGNED_VECTORIZE
141
- : int(Traversal) == int(SliceVectorizedTraversal)
142
- ? ( bool(MayUnrollInner) ? int(InnerUnrolling)
143
- : int(NoUnrolling) )
136
+ : Traversal == SliceVectorizedTraversal ? (MayUnrollInner ? InnerUnrolling : NoUnrolling)
144
137
  #endif
145
- : int(NoUnrolling)
146
- };
138
+ : NoUnrolling;
139
+ static constexpr bool UsePacketSegment = has_packet_segment<PacketType>::value;
147
140
 
148
141
  #ifdef EIGEN_DEBUG_ASSIGN
149
- static void debug()
150
- {
142
+ static void debug() {
151
143
  std::cerr << "DstXpr: " << typeid(typename DstEvaluator::XprType).name() << std::endl;
152
144
  std::cerr << "SrcXpr: " << typeid(typename SrcEvaluator::XprType).name() << std::endl;
153
145
  std::cerr.setf(std::ios::hex, std::ios::basefield);
154
- std::cerr << "DstFlags" << " = " << DstFlags << " (" << demangle_flags(DstFlags) << " )" << std::endl;
155
- std::cerr << "SrcFlags" << " = " << SrcFlags << " (" << demangle_flags(SrcFlags) << " )" << std::endl;
146
+ std::cerr << "DstFlags"
147
+ << " = " << DstFlags << " (" << demangle_flags(DstFlags) << " )" << std::endl;
148
+ std::cerr << "SrcFlags"
149
+ << " = " << SrcFlags << " (" << demangle_flags(SrcFlags) << " )" << std::endl;
156
150
  std::cerr.unsetf(std::ios::hex);
157
151
  EIGEN_DEBUG_VAR(DstAlignment)
158
152
  EIGEN_DEBUG_VAR(SrcAlignment)
159
153
  EIGEN_DEBUG_VAR(LinearRequiredAlignment)
160
154
  EIGEN_DEBUG_VAR(InnerRequiredAlignment)
161
155
  EIGEN_DEBUG_VAR(JointAlignment)
162
- EIGEN_DEBUG_VAR(InnerSize)
163
- EIGEN_DEBUG_VAR(InnerMaxSize)
156
+ EIGEN_DEBUG_VAR(InnerSizeAtCompileTime)
157
+ EIGEN_DEBUG_VAR(MaxInnerSizeAtCompileTime)
164
158
  EIGEN_DEBUG_VAR(LinearPacketSize)
165
159
  EIGEN_DEBUG_VAR(InnerPacketSize)
166
160
  EIGEN_DEBUG_VAR(ActualPacketSize)
@@ -170,185 +164,213 @@ public:
170
164
  EIGEN_DEBUG_VAR(MayInnerVectorize)
171
165
  EIGEN_DEBUG_VAR(MayLinearVectorize)
172
166
  EIGEN_DEBUG_VAR(MaySliceVectorize)
173
- std::cerr << "Traversal" << " = " << Traversal << " (" << demangle_traversal(Traversal) << ")" << std::endl;
167
+ std::cerr << "Traversal"
168
+ << " = " << Traversal << " (" << demangle_traversal(Traversal) << ")" << std::endl;
174
169
  EIGEN_DEBUG_VAR(SrcEvaluator::CoeffReadCost)
170
+ EIGEN_DEBUG_VAR(DstEvaluator::CoeffReadCost)
171
+ EIGEN_DEBUG_VAR(Dst::SizeAtCompileTime)
175
172
  EIGEN_DEBUG_VAR(UnrollingLimit)
176
173
  EIGEN_DEBUG_VAR(MayUnrollCompletely)
177
174
  EIGEN_DEBUG_VAR(MayUnrollInner)
178
- std::cerr << "Unrolling" << " = " << Unrolling << " (" << demangle_unrolling(Unrolling) << ")" << std::endl;
175
+ std::cerr << "Unrolling"
176
+ << " = " << Unrolling << " (" << demangle_unrolling(Unrolling) << ")" << std::endl;
179
177
  std::cerr << std::endl;
180
178
  }
181
179
  #endif
182
180
  };
183
181
 
184
182
  /***************************************************************************
185
- * Part 2 : meta-unrollers
186
- ***************************************************************************/
183
+ * Part 2 : meta-unrollers
184
+ ***************************************************************************/
187
185
 
188
186
  /************************
189
187
  *** Default traversal ***
190
188
  ************************/
191
189
 
192
- template<typename Kernel, int Index, int Stop>
193
- struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling
194
- {
195
- // FIXME: this is not very clean, perhaps this information should be provided by the kernel?
196
- typedef typename Kernel::DstEvaluatorType DstEvaluatorType;
197
- typedef typename DstEvaluatorType::XprType DstXprType;
198
-
199
- enum {
200
- outer = Index / DstXprType::InnerSizeAtCompileTime,
201
- inner = Index % DstXprType::InnerSizeAtCompileTime
202
- };
190
+ template <typename Kernel, int Index_, int Stop>
191
+ struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling {
192
+ static constexpr int Outer = Index_ / Kernel::AssignmentTraits::InnerSizeAtCompileTime;
193
+ static constexpr int Inner = Index_ % Kernel::AssignmentTraits::InnerSizeAtCompileTime;
203
194
 
204
- EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
205
- {
206
- kernel.assignCoeffByOuterInner(outer, inner);
207
- copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, Index+1, Stop>::run(kernel);
195
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel) {
196
+ kernel.assignCoeffByOuterInner(Outer, Inner);
197
+ copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, Index_ + 1, Stop>::run(kernel);
208
198
  }
209
199
  };
210
200
 
211
- template<typename Kernel, int Stop>
212
- struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, Stop, Stop>
213
- {
214
- EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { }
201
+ template <typename Kernel, int Stop>
202
+ struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, Stop, Stop> {
203
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel&) {}
215
204
  };
216
205
 
217
- template<typename Kernel, int Index_, int Stop>
218
- struct copy_using_evaluator_DefaultTraversal_InnerUnrolling
219
- {
220
- EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel, Index outer)
221
- {
206
+ template <typename Kernel, int Index_, int Stop>
207
+ struct copy_using_evaluator_DefaultTraversal_InnerUnrolling {
208
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel, Index outer) {
222
209
  kernel.assignCoeffByOuterInner(outer, Index_);
223
- copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, Index_+1, Stop>::run(kernel, outer);
210
+ copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, Index_ + 1, Stop>::run(kernel, outer);
224
211
  }
225
212
  };
226
213
 
227
- template<typename Kernel, int Stop>
228
- struct copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, Stop, Stop>
229
- {
230
- EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&, Index) { }
214
+ template <typename Kernel, int Stop>
215
+ struct copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, Stop, Stop> {
216
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel&, Index) {}
231
217
  };
232
218
 
233
219
  /***********************
234
220
  *** Linear traversal ***
235
221
  ***********************/
236
222
 
237
- template<typename Kernel, int Index, int Stop>
238
- struct copy_using_evaluator_LinearTraversal_CompleteUnrolling
239
- {
240
- EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel)
241
- {
242
- kernel.assignCoeff(Index);
243
- copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, Index+1, Stop>::run(kernel);
223
+ template <typename Kernel, int Index_, int Stop>
224
+ struct copy_using_evaluator_LinearTraversal_CompleteUnrolling {
225
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel) {
226
+ kernel.assignCoeff(Index_);
227
+ copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, Index_ + 1, Stop>::run(kernel);
244
228
  }
245
229
  };
246
230
 
247
- template<typename Kernel, int Stop>
248
- struct copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, Stop, Stop>
249
- {
250
- EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { }
231
+ template <typename Kernel, int Stop>
232
+ struct copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, Stop, Stop> {
233
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel&) {}
251
234
  };
252
235
 
253
236
  /**************************
254
237
  *** Inner vectorization ***
255
238
  **************************/
256
239
 
257
- template<typename Kernel, int Index, int Stop>
258
- struct copy_using_evaluator_innervec_CompleteUnrolling
259
- {
260
- // FIXME: this is not very clean, perhaps this information should be provided by the kernel?
261
- typedef typename Kernel::DstEvaluatorType DstEvaluatorType;
262
- typedef typename DstEvaluatorType::XprType DstXprType;
263
- typedef typename Kernel::PacketType PacketType;
264
-
265
- enum {
266
- outer = Index / DstXprType::InnerSizeAtCompileTime,
267
- inner = Index % DstXprType::InnerSizeAtCompileTime,
268
- SrcAlignment = Kernel::AssignmentTraits::SrcAlignment,
269
- DstAlignment = Kernel::AssignmentTraits::DstAlignment
270
- };
271
-
272
- EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
273
- {
274
- kernel.template assignPacketByOuterInner<DstAlignment, SrcAlignment, PacketType>(outer, inner);
275
- enum { NextIndex = Index + unpacket_traits<PacketType>::size };
240
+ template <typename Kernel, int Index_, int Stop>
241
+ struct copy_using_evaluator_innervec_CompleteUnrolling {
242
+ using PacketType = typename Kernel::PacketType;
243
+ static constexpr int Outer = Index_ / Kernel::AssignmentTraits::InnerSizeAtCompileTime;
244
+ static constexpr int Inner = Index_ % Kernel::AssignmentTraits::InnerSizeAtCompileTime;
245
+ static constexpr int NextIndex = Index_ + unpacket_traits<PacketType>::size;
246
+ static constexpr int SrcAlignment = Kernel::AssignmentTraits::SrcAlignment;
247
+ static constexpr int DstAlignment = Kernel::AssignmentTraits::DstAlignment;
248
+
249
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel) {
250
+ kernel.template assignPacketByOuterInner<DstAlignment, SrcAlignment, PacketType>(Outer, Inner);
276
251
  copy_using_evaluator_innervec_CompleteUnrolling<Kernel, NextIndex, Stop>::run(kernel);
277
252
  }
278
253
  };
279
254
 
280
- template<typename Kernel, int Stop>
281
- struct copy_using_evaluator_innervec_CompleteUnrolling<Kernel, Stop, Stop>
282
- {
283
- EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { }
255
+ template <typename Kernel, int Stop>
256
+ struct copy_using_evaluator_innervec_CompleteUnrolling<Kernel, Stop, Stop> {
257
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel&) {}
284
258
  };
285
259
 
286
- template<typename Kernel, int Index_, int Stop, int SrcAlignment, int DstAlignment>
287
- struct copy_using_evaluator_innervec_InnerUnrolling
288
- {
289
- typedef typename Kernel::PacketType PacketType;
290
- EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel, Index outer)
291
- {
260
+ template <typename Kernel, int Index_, int Stop, int SrcAlignment, int DstAlignment>
261
+ struct copy_using_evaluator_innervec_InnerUnrolling {
262
+ using PacketType = typename Kernel::PacketType;
263
+ static constexpr int NextIndex = Index_ + unpacket_traits<PacketType>::size;
264
+
265
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel, Index outer) {
292
266
  kernel.template assignPacketByOuterInner<DstAlignment, SrcAlignment, PacketType>(outer, Index_);
293
- enum { NextIndex = Index_ + unpacket_traits<PacketType>::size };
294
- copy_using_evaluator_innervec_InnerUnrolling<Kernel, NextIndex, Stop, SrcAlignment, DstAlignment>::run(kernel, outer);
267
+ copy_using_evaluator_innervec_InnerUnrolling<Kernel, NextIndex, Stop, SrcAlignment, DstAlignment>::run(kernel,
268
+ outer);
269
+ }
270
+ };
271
+
272
+ template <typename Kernel, int Stop, int SrcAlignment, int DstAlignment>
273
+ struct copy_using_evaluator_innervec_InnerUnrolling<Kernel, Stop, Stop, SrcAlignment, DstAlignment> {
274
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel&, Index) {}
275
+ };
276
+
277
+ template <typename Kernel, int Start, int Stop, int SrcAlignment, int DstAlignment, bool UsePacketSegment>
278
+ struct copy_using_evaluator_innervec_segment {
279
+ using PacketType = typename Kernel::PacketType;
280
+
281
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel, Index outer) {
282
+ kernel.template assignPacketSegmentByOuterInner<DstAlignment, SrcAlignment, PacketType>(outer, Start, 0,
283
+ Stop - Start);
295
284
  }
296
285
  };
297
286
 
298
- template<typename Kernel, int Stop, int SrcAlignment, int DstAlignment>
299
- struct copy_using_evaluator_innervec_InnerUnrolling<Kernel, Stop, Stop, SrcAlignment, DstAlignment>
300
- {
301
- EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &, Index) { }
287
+ template <typename Kernel, int Start, int Stop, int SrcAlignment, int DstAlignment>
288
+ struct copy_using_evaluator_innervec_segment<Kernel, Start, Stop, SrcAlignment, DstAlignment,
289
+ /*UsePacketSegment*/ false>
290
+ : copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, Start, Stop> {};
291
+
292
+ template <typename Kernel, int Stop, int SrcAlignment, int DstAlignment>
293
+ struct copy_using_evaluator_innervec_segment<Kernel, Stop, Stop, SrcAlignment, DstAlignment,
294
+ /*UsePacketSegment*/ true> {
295
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel&, Index) {}
296
+ };
297
+
298
+ template <typename Kernel, int Stop, int SrcAlignment, int DstAlignment>
299
+ struct copy_using_evaluator_innervec_segment<Kernel, Stop, Stop, SrcAlignment, DstAlignment,
300
+ /*UsePacketSegment*/ false> {
301
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel&, Index) {}
302
302
  };
303
303
 
304
304
  /***************************************************************************
305
- * Part 3 : implementation of all cases
306
- ***************************************************************************/
305
+ * Part 3 : implementation of all cases
306
+ ***************************************************************************/
307
307
 
308
308
  // dense_assignment_loop is based on assign_impl
309
309
 
310
- template<typename Kernel,
311
- int Traversal = Kernel::AssignmentTraits::Traversal,
312
- int Unrolling = Kernel::AssignmentTraits::Unrolling>
313
- struct dense_assignment_loop;
310
+ template <typename Kernel, int Traversal = Kernel::AssignmentTraits::Traversal,
311
+ int Unrolling = Kernel::AssignmentTraits::Unrolling>
312
+ struct dense_assignment_loop_impl;
313
+
314
+ template <typename Kernel, int Traversal = Kernel::AssignmentTraits::Traversal,
315
+ int Unrolling = Kernel::AssignmentTraits::Unrolling>
316
+ struct dense_assignment_loop {
317
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel) {
318
+ #ifdef __cpp_lib_is_constant_evaluated
319
+ if (internal::is_constant_evaluated())
320
+ dense_assignment_loop_impl<Kernel, Traversal == AllAtOnceTraversal ? AllAtOnceTraversal : DefaultTraversal,
321
+ NoUnrolling>::run(kernel);
322
+ else
323
+ #endif
324
+ dense_assignment_loop_impl<Kernel, Traversal, Unrolling>::run(kernel);
325
+ }
326
+ };
327
+
328
+ /************************
329
+ ***** Special Cases *****
330
+ ************************/
331
+
332
+ // Zero-sized assignment is a no-op.
333
+ template <typename Kernel, int Unrolling>
334
+ struct dense_assignment_loop_impl<Kernel, AllAtOnceTraversal, Unrolling> {
335
+ static constexpr int SizeAtCompileTime = Kernel::AssignmentTraits::SizeAtCompileTime;
336
+
337
+ EIGEN_DEVICE_FUNC static void EIGEN_STRONG_INLINE constexpr run(Kernel& /*kernel*/) {
338
+ EIGEN_STATIC_ASSERT(SizeAtCompileTime == 0, EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT)
339
+ }
340
+ };
314
341
 
315
342
  /************************
316
343
  *** Default traversal ***
317
344
  ************************/
318
345
 
319
- template<typename Kernel>
320
- struct dense_assignment_loop<Kernel, DefaultTraversal, NoUnrolling>
321
- {
322
- EIGEN_DEVICE_FUNC static void EIGEN_STRONG_INLINE run(Kernel &kernel)
323
- {
324
- for(Index outer = 0; outer < kernel.outerSize(); ++outer) {
325
- for(Index inner = 0; inner < kernel.innerSize(); ++inner) {
346
+ template <typename Kernel>
347
+ struct dense_assignment_loop_impl<Kernel, DefaultTraversal, NoUnrolling> {
348
+ EIGEN_DEVICE_FUNC static void EIGEN_STRONG_INLINE constexpr run(Kernel& kernel) {
349
+ for (Index outer = 0; outer < kernel.outerSize(); ++outer) {
350
+ for (Index inner = 0; inner < kernel.innerSize(); ++inner) {
326
351
  kernel.assignCoeffByOuterInner(outer, inner);
327
352
  }
328
353
  }
329
354
  }
330
355
  };
331
356
 
332
- template<typename Kernel>
333
- struct dense_assignment_loop<Kernel, DefaultTraversal, CompleteUnrolling>
334
- {
335
- EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
336
- {
337
- typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
338
- copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, 0, DstXprType::SizeAtCompileTime>::run(kernel);
357
+ template <typename Kernel>
358
+ struct dense_assignment_loop_impl<Kernel, DefaultTraversal, CompleteUnrolling> {
359
+ static constexpr int SizeAtCompileTime = Kernel::AssignmentTraits::SizeAtCompileTime;
360
+
361
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel) {
362
+ copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, 0, SizeAtCompileTime>::run(kernel);
339
363
  }
340
364
  };
341
365
 
342
- template<typename Kernel>
343
- struct dense_assignment_loop<Kernel, DefaultTraversal, InnerUnrolling>
344
- {
345
- EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
346
- {
347
- typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
366
+ template <typename Kernel>
367
+ struct dense_assignment_loop_impl<Kernel, DefaultTraversal, InnerUnrolling> {
368
+ static constexpr int InnerSizeAtCompileTime = Kernel::AssignmentTraits::InnerSizeAtCompileTime;
348
369
 
370
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel) {
349
371
  const Index outerSize = kernel.outerSize();
350
- for(Index outer = 0; outer < outerSize; ++outer)
351
- copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, 0, DstXprType::InnerSizeAtCompileTime>::run(kernel, outer);
372
+ for (Index outer = 0; outer < outerSize; ++outer)
373
+ copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, 0, InnerSizeAtCompileTime>::run(kernel, outer);
352
374
  }
353
375
  };
354
376
 
@@ -356,83 +378,137 @@ struct dense_assignment_loop<Kernel, DefaultTraversal, InnerUnrolling>
356
378
  *** Linear vectorization ***
357
379
  ***************************/
358
380
 
359
-
360
381
  // The goal of unaligned_dense_assignment_loop is simply to factorize the handling
361
382
  // of the non vectorizable beginning and ending parts
362
383
 
363
- template <bool IsAligned = false>
364
- struct unaligned_dense_assignment_loop
365
- {
366
- // if IsAligned = true, then do nothing
384
+ template <typename PacketType, int DstAlignment, int SrcAlignment, bool UsePacketSegment, bool Skip>
385
+ struct unaligned_dense_assignment_loop {
386
+ // if Skip == true, then do nothing
387
+ template <typename Kernel>
388
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& /*kernel*/, Index /*start*/, Index /*end*/) {}
367
389
  template <typename Kernel>
368
- EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&, Index, Index) {}
390
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& /*kernel*/, Index /*outer*/,
391
+ Index /*innerStart*/, Index /*innerEnd*/) {}
369
392
  };
370
393
 
371
- template <>
372
- struct unaligned_dense_assignment_loop<false>
373
- {
374
- // MSVC must not inline this functions. If it does, it fails to optimize the
375
- // packet access path.
376
- // FIXME check which version exhibits this issue
377
- #if EIGEN_COMP_MSVC
394
+ template <typename PacketType, int DstAlignment, int SrcAlignment>
395
+ struct unaligned_dense_assignment_loop<PacketType, DstAlignment, SrcAlignment, /*UsePacketSegment*/ true,
396
+ /*Skip*/ false> {
378
397
  template <typename Kernel>
379
- static EIGEN_DONT_INLINE void run(Kernel &kernel,
380
- Index start,
381
- Index end)
382
- #else
398
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel, Index start, Index end) {
399
+ Index count = end - start;
400
+ eigen_assert(count <= unpacket_traits<PacketType>::size);
401
+ if (count > 0) kernel.template assignPacketSegment<DstAlignment, SrcAlignment, PacketType>(start, 0, count);
402
+ }
383
403
  template <typename Kernel>
384
- EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel,
385
- Index start,
386
- Index end)
387
- #endif
388
- {
389
- for (Index index = start; index < end; ++index)
390
- kernel.assignCoeff(index);
404
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel, Index outer, Index start, Index end) {
405
+ Index count = end - start;
406
+ eigen_assert(count <= unpacket_traits<PacketType>::size);
407
+ if (count > 0)
408
+ kernel.template assignPacketSegmentByOuterInner<DstAlignment, SrcAlignment, PacketType>(outer, start, 0, count);
391
409
  }
392
410
  };
393
411
 
394
- template<typename Kernel>
395
- struct dense_assignment_loop<Kernel, LinearVectorizedTraversal, NoUnrolling>
396
- {
397
- EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
398
- {
399
- const Index size = kernel.size();
400
- typedef typename Kernel::Scalar Scalar;
401
- typedef typename Kernel::PacketType PacketType;
402
- enum {
403
- requestedAlignment = Kernel::AssignmentTraits::LinearRequiredAlignment,
404
- packetSize = unpacket_traits<PacketType>::size,
405
- dstIsAligned = int(Kernel::AssignmentTraits::DstAlignment)>=int(requestedAlignment),
406
- dstAlignment = packet_traits<Scalar>::AlignedOnScalar ? int(requestedAlignment)
407
- : int(Kernel::AssignmentTraits::DstAlignment),
408
- srcAlignment = Kernel::AssignmentTraits::JointAlignment
409
- };
410
- const Index alignedStart = dstIsAligned ? 0 : internal::first_aligned<requestedAlignment>(kernel.dstDataPtr(), size);
411
- const Index alignedEnd = alignedStart + ((size-alignedStart)/packetSize)*packetSize;
412
+ template <typename PacketType, int DstAlignment, int SrcAlignment>
413
+ struct unaligned_dense_assignment_loop<PacketType, DstAlignment, SrcAlignment, /*UsePacketSegment*/ false,
414
+ /*Skip*/ false> {
415
+ template <typename Kernel>
416
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel, Index start, Index end) {
417
+ for (Index index = start; index < end; ++index) kernel.assignCoeff(index);
418
+ }
419
+ template <typename Kernel>
420
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel, Index outer, Index innerStart,
421
+ Index innerEnd) {
422
+ for (Index inner = innerStart; inner < innerEnd; ++inner) kernel.assignCoeffByOuterInner(outer, inner);
423
+ }
424
+ };
412
425
 
413
- unaligned_dense_assignment_loop<dstIsAligned!=0>::run(kernel, 0, alignedStart);
426
+ template <typename Kernel, int Index_, int Stop>
427
+ struct copy_using_evaluator_linearvec_CompleteUnrolling {
428
+ using PacketType = typename Kernel::PacketType;
429
+ static constexpr int SrcAlignment = Kernel::AssignmentTraits::SrcAlignment;
430
+ static constexpr int DstAlignment = Kernel::AssignmentTraits::DstAlignment;
431
+ static constexpr int NextIndex = Index_ + unpacket_traits<PacketType>::size;
414
432
 
415
- for(Index index = alignedStart; index < alignedEnd; index += packetSize)
416
- kernel.template assignPacket<dstAlignment, srcAlignment, PacketType>(index);
433
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel) {
434
+ kernel.template assignPacket<DstAlignment, SrcAlignment, PacketType>(Index_);
435
+ copy_using_evaluator_linearvec_CompleteUnrolling<Kernel, NextIndex, Stop>::run(kernel);
436
+ }
437
+ };
438
+
439
+ template <typename Kernel, int Stop>
440
+ struct copy_using_evaluator_linearvec_CompleteUnrolling<Kernel, Stop, Stop> {
441
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel&) {}
442
+ };
417
443
 
418
- unaligned_dense_assignment_loop<>::run(kernel, alignedEnd, size);
444
+ template <typename Kernel, int Index_, int Stop, bool UsePacketSegment>
445
+ struct copy_using_evaluator_linearvec_segment {
446
+ using PacketType = typename Kernel::PacketType;
447
+ static constexpr int SrcAlignment = Kernel::AssignmentTraits::SrcAlignment;
448
+ static constexpr int DstAlignment = Kernel::AssignmentTraits::DstAlignment;
449
+
450
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel) {
451
+ kernel.template assignPacketSegment<DstAlignment, SrcAlignment, PacketType>(Index_, 0, Stop - Index_);
419
452
  }
420
453
  };
421
454
 
422
- template<typename Kernel>
423
- struct dense_assignment_loop<Kernel, LinearVectorizedTraversal, CompleteUnrolling>
424
- {
425
- EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
426
- {
427
- typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
428
- typedef typename Kernel::PacketType PacketType;
429
-
430
- enum { size = DstXprType::SizeAtCompileTime,
431
- packetSize =unpacket_traits<PacketType>::size,
432
- alignedSize = (size/packetSize)*packetSize };
455
+ template <typename Kernel, int Index_, int Stop>
456
+ struct copy_using_evaluator_linearvec_segment<Kernel, Index_, Stop, /*UsePacketSegment*/ false>
457
+ : copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, Index_, Stop> {};
458
+
459
+ template <typename Kernel, int Stop>
460
+ struct copy_using_evaluator_linearvec_segment<Kernel, Stop, Stop, /*UsePacketSegment*/ true> {
461
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel&) {}
462
+ };
463
+
464
+ template <typename Kernel, int Stop>
465
+ struct copy_using_evaluator_linearvec_segment<Kernel, Stop, Stop, /*UsePacketSegment*/ false> {
466
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel&) {}
467
+ };
468
+
469
+ template <typename Kernel>
470
+ struct dense_assignment_loop_impl<Kernel, LinearVectorizedTraversal, NoUnrolling> {
471
+ using Scalar = typename Kernel::Scalar;
472
+ using PacketType = typename Kernel::PacketType;
473
+ static constexpr int PacketSize = unpacket_traits<PacketType>::size;
474
+ static constexpr int SrcAlignment = Kernel::AssignmentTraits::JointAlignment;
475
+ static constexpr int DstAlignment = plain_enum_max(Kernel::AssignmentTraits::DstAlignment, alignof(Scalar));
476
+ static constexpr int RequestedAlignment = unpacket_traits<PacketType>::alignment;
477
+ static constexpr bool Alignable =
478
+ (DstAlignment >= RequestedAlignment) || ((RequestedAlignment - DstAlignment) % sizeof(Scalar) == 0);
479
+ static constexpr int Alignment = Alignable ? RequestedAlignment : DstAlignment;
480
+ static constexpr bool DstIsAligned = DstAlignment >= Alignment;
481
+ static constexpr bool UsePacketSegment = Kernel::AssignmentTraits::UsePacketSegment;
482
+
483
+ using head_loop =
484
+ unaligned_dense_assignment_loop<PacketType, DstAlignment, SrcAlignment, UsePacketSegment, DstIsAligned>;
485
+ using tail_loop = unaligned_dense_assignment_loop<PacketType, Alignment, SrcAlignment, UsePacketSegment, false>;
486
+
487
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel) {
488
+ const Index size = kernel.size();
489
+ const Index alignedStart = DstIsAligned ? 0 : first_aligned<Alignment>(kernel.dstDataPtr(), size);
490
+ const Index alignedEnd = alignedStart + numext::round_down(size - alignedStart, PacketSize);
491
+
492
+ head_loop::run(kernel, 0, alignedStart);
493
+
494
+ for (Index index = alignedStart; index < alignedEnd; index += PacketSize)
495
+ kernel.template assignPacket<Alignment, SrcAlignment, PacketType>(index);
433
496
 
434
- copy_using_evaluator_innervec_CompleteUnrolling<Kernel, 0, alignedSize>::run(kernel);
435
- copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, alignedSize, size>::run(kernel);
497
+ tail_loop::run(kernel, alignedEnd, size);
498
+ }
499
+ };
500
+
501
+ template <typename Kernel>
502
+ struct dense_assignment_loop_impl<Kernel, LinearVectorizedTraversal, CompleteUnrolling> {
503
+ using PacketType = typename Kernel::PacketType;
504
+ static constexpr int PacketSize = unpacket_traits<PacketType>::size;
505
+ static constexpr int Size = Kernel::AssignmentTraits::SizeAtCompileTime;
506
+ static constexpr int AlignedSize = numext::round_down(Size, PacketSize);
507
+ static constexpr bool UsePacketSegment = Kernel::AssignmentTraits::UsePacketSegment;
508
+
509
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel) {
510
+ copy_using_evaluator_linearvec_CompleteUnrolling<Kernel, 0, AlignedSize>::run(kernel);
511
+ copy_using_evaluator_linearvec_segment<Kernel, AlignedSize, Size, UsePacketSegment>::run(kernel);
436
512
  }
437
513
  };
438
514
 
@@ -440,46 +516,42 @@ struct dense_assignment_loop<Kernel, LinearVectorizedTraversal, CompleteUnrollin
440
516
  *** Inner vectorization ***
441
517
  **************************/
442
518
 
443
- template<typename Kernel>
444
- struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, NoUnrolling>
445
- {
446
- typedef typename Kernel::PacketType PacketType;
447
- enum {
448
- SrcAlignment = Kernel::AssignmentTraits::SrcAlignment,
449
- DstAlignment = Kernel::AssignmentTraits::DstAlignment
450
- };
451
- EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
452
- {
519
+ template <typename Kernel>
520
+ struct dense_assignment_loop_impl<Kernel, InnerVectorizedTraversal, NoUnrolling> {
521
+ using PacketType = typename Kernel::PacketType;
522
+ static constexpr int PacketSize = unpacket_traits<PacketType>::size;
523
+ static constexpr int SrcAlignment = Kernel::AssignmentTraits::JointAlignment;
524
+ static constexpr int DstAlignment = Kernel::AssignmentTraits::DstAlignment;
525
+
526
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel) {
453
527
  const Index innerSize = kernel.innerSize();
454
528
  const Index outerSize = kernel.outerSize();
455
- const Index packetSize = unpacket_traits<PacketType>::size;
456
- for(Index outer = 0; outer < outerSize; ++outer)
457
- for(Index inner = 0; inner < innerSize; inner+=packetSize)
529
+ for (Index outer = 0; outer < outerSize; ++outer)
530
+ for (Index inner = 0; inner < innerSize; inner += PacketSize)
458
531
  kernel.template assignPacketByOuterInner<DstAlignment, SrcAlignment, PacketType>(outer, inner);
459
532
  }
460
533
  };
461
534
 
462
- template<typename Kernel>
463
- struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, CompleteUnrolling>
464
- {
465
- EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
466
- {
467
- typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
468
- copy_using_evaluator_innervec_CompleteUnrolling<Kernel, 0, DstXprType::SizeAtCompileTime>::run(kernel);
535
+ template <typename Kernel>
536
+ struct dense_assignment_loop_impl<Kernel, InnerVectorizedTraversal, CompleteUnrolling> {
537
+ static constexpr int SizeAtCompileTime = Kernel::AssignmentTraits::SizeAtCompileTime;
538
+
539
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel) {
540
+ copy_using_evaluator_innervec_CompleteUnrolling<Kernel, 0, SizeAtCompileTime>::run(kernel);
469
541
  }
470
542
  };
471
543
 
472
- template<typename Kernel>
473
- struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, InnerUnrolling>
474
- {
475
- EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
476
- {
477
- typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
478
- typedef typename Kernel::AssignmentTraits Traits;
544
+ template <typename Kernel>
545
+ struct dense_assignment_loop_impl<Kernel, InnerVectorizedTraversal, InnerUnrolling> {
546
+ static constexpr int InnerSize = Kernel::AssignmentTraits::InnerSizeAtCompileTime;
547
+ static constexpr int SrcAlignment = Kernel::AssignmentTraits::SrcAlignment;
548
+ static constexpr int DstAlignment = Kernel::AssignmentTraits::DstAlignment;
549
+
550
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel) {
479
551
  const Index outerSize = kernel.outerSize();
480
- for(Index outer = 0; outer < outerSize; ++outer)
481
- copy_using_evaluator_innervec_InnerUnrolling<Kernel, 0, DstXprType::InnerSizeAtCompileTime,
482
- Traits::SrcAlignment, Traits::DstAlignment>::run(kernel, outer);
552
+ for (Index outer = 0; outer < outerSize; ++outer)
553
+ copy_using_evaluator_innervec_InnerUnrolling<Kernel, 0, InnerSize, SrcAlignment, DstAlignment>::run(kernel,
554
+ outer);
483
555
  }
484
556
  };
485
557
 
@@ -487,24 +559,19 @@ struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, InnerUnrolling>
487
559
  *** Linear traversal ***
488
560
  ***********************/
489
561
 
490
- template<typename Kernel>
491
- struct dense_assignment_loop<Kernel, LinearTraversal, NoUnrolling>
492
- {
493
- EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
494
- {
562
+ template <typename Kernel>
563
+ struct dense_assignment_loop_impl<Kernel, LinearTraversal, NoUnrolling> {
564
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel) {
495
565
  const Index size = kernel.size();
496
- for(Index i = 0; i < size; ++i)
497
- kernel.assignCoeff(i);
566
+ for (Index i = 0; i < size; ++i) kernel.assignCoeff(i);
498
567
  }
499
568
  };
500
569
 
501
- template<typename Kernel>
502
- struct dense_assignment_loop<Kernel, LinearTraversal, CompleteUnrolling>
503
- {
504
- EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
505
- {
506
- typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
507
- copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, 0, DstXprType::SizeAtCompileTime>::run(kernel);
570
+ template <typename Kernel>
571
+ struct dense_assignment_loop_impl<Kernel, LinearTraversal, CompleteUnrolling> {
572
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel) {
573
+ copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, 0, Kernel::AssignmentTraits::SizeAtCompileTime>::run(
574
+ kernel);
508
575
  }
509
576
  };
510
577
 
@@ -512,79 +579,71 @@ struct dense_assignment_loop<Kernel, LinearTraversal, CompleteUnrolling>
512
579
  *** Slice vectorization ***
513
580
  ***************************/
514
581
 
515
- template<typename Kernel>
516
- struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, NoUnrolling>
517
- {
518
- EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
519
- {
520
- typedef typename Kernel::Scalar Scalar;
521
- typedef typename Kernel::PacketType PacketType;
522
- enum {
523
- packetSize = unpacket_traits<PacketType>::size,
524
- requestedAlignment = int(Kernel::AssignmentTraits::InnerRequiredAlignment),
525
- alignable = packet_traits<Scalar>::AlignedOnScalar || int(Kernel::AssignmentTraits::DstAlignment)>=sizeof(Scalar),
526
- dstIsAligned = int(Kernel::AssignmentTraits::DstAlignment)>=int(requestedAlignment),
527
- dstAlignment = alignable ? int(requestedAlignment)
528
- : int(Kernel::AssignmentTraits::DstAlignment)
529
- };
530
- const Scalar *dst_ptr = kernel.dstDataPtr();
531
- if((!bool(dstIsAligned)) && (UIntPtr(dst_ptr) % sizeof(Scalar))>0)
532
- {
533
- // the pointer is not aligend-on scalar, so alignment is not possible
534
- return dense_assignment_loop<Kernel,DefaultTraversal,NoUnrolling>::run(kernel);
535
- }
536
- const Index packetAlignedMask = packetSize - 1;
582
+ template <typename Kernel>
583
+ struct dense_assignment_loop_impl<Kernel, SliceVectorizedTraversal, NoUnrolling> {
584
+ using Scalar = typename Kernel::Scalar;
585
+ using PacketType = typename Kernel::PacketType;
586
+ static constexpr int PacketSize = unpacket_traits<PacketType>::size;
587
+ static constexpr int SrcAlignment = Kernel::AssignmentTraits::JointAlignment;
588
+ static constexpr int DstAlignment = plain_enum_max(Kernel::AssignmentTraits::DstAlignment, alignof(Scalar));
589
+ static constexpr int RequestedAlignment = unpacket_traits<PacketType>::alignment;
590
+ static constexpr bool Alignable =
591
+ (DstAlignment >= RequestedAlignment) || ((RequestedAlignment - DstAlignment) % sizeof(Scalar) == 0);
592
+ static constexpr int Alignment = Alignable ? RequestedAlignment : DstAlignment;
593
+ static constexpr bool DstIsAligned = DstAlignment >= Alignment;
594
+ static constexpr bool UsePacketSegment = Kernel::AssignmentTraits::UsePacketSegment;
595
+
596
+ using head_loop = unaligned_dense_assignment_loop<PacketType, DstAlignment, Unaligned, UsePacketSegment, !Alignable>;
597
+ using tail_loop = unaligned_dense_assignment_loop<PacketType, Alignment, Unaligned, UsePacketSegment, false>;
598
+
599
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel) {
600
+ const Scalar* dst_ptr = kernel.dstDataPtr();
537
601
  const Index innerSize = kernel.innerSize();
538
602
  const Index outerSize = kernel.outerSize();
539
- const Index alignedStep = alignable ? (packetSize - kernel.outerStride() % packetSize) & packetAlignedMask : 0;
540
- Index alignedStart = ((!alignable) || bool(dstIsAligned)) ? 0 : internal::first_aligned<requestedAlignment>(dst_ptr, innerSize);
541
-
542
- for(Index outer = 0; outer < outerSize; ++outer)
543
- {
544
- const Index alignedEnd = alignedStart + ((innerSize-alignedStart) & ~packetAlignedMask);
545
- // do the non-vectorizable part of the assignment
546
- for(Index inner = 0; inner<alignedStart ; ++inner)
547
- kernel.assignCoeffByOuterInner(outer, inner);
603
+ const Index alignedStep = Alignable ? (PacketSize - kernel.outerStride() % PacketSize) % PacketSize : 0;
604
+ Index alignedStart = ((!Alignable) || DstIsAligned) ? 0 : internal::first_aligned<Alignment>(dst_ptr, innerSize);
605
+
606
+ for (Index outer = 0; outer < outerSize; ++outer) {
607
+ const Index alignedEnd = alignedStart + numext::round_down(innerSize - alignedStart, PacketSize);
608
+
609
+ head_loop::run(kernel, outer, 0, alignedStart);
548
610
 
549
611
  // do the vectorizable part of the assignment
550
- for(Index inner = alignedStart; inner<alignedEnd; inner+=packetSize)
551
- kernel.template assignPacketByOuterInner<dstAlignment, Unaligned, PacketType>(outer, inner);
612
+ for (Index inner = alignedStart; inner < alignedEnd; inner += PacketSize)
613
+ kernel.template assignPacketByOuterInner<Alignment, Unaligned, PacketType>(outer, inner);
552
614
 
553
- // do the non-vectorizable part of the assignment
554
- for(Index inner = alignedEnd; inner<innerSize ; ++inner)
555
- kernel.assignCoeffByOuterInner(outer, inner);
615
+ tail_loop::run(kernel, outer, alignedEnd, innerSize);
556
616
 
557
- alignedStart = numext::mini((alignedStart+alignedStep)%packetSize, innerSize);
617
+ alignedStart = numext::mini((alignedStart + alignedStep) % PacketSize, innerSize);
558
618
  }
559
619
  }
560
620
  };
561
621
 
562
622
  #if EIGEN_UNALIGNED_VECTORIZE
563
- template<typename Kernel>
564
- struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, InnerUnrolling>
565
- {
566
- EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
567
- {
568
- typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
569
- typedef typename Kernel::PacketType PacketType;
570
-
571
- enum { size = DstXprType::InnerSizeAtCompileTime,
572
- packetSize =unpacket_traits<PacketType>::size,
573
- vectorizableSize = (size/packetSize)*packetSize };
574
-
575
- for(Index outer = 0; outer < kernel.outerSize(); ++outer)
576
- {
577
- copy_using_evaluator_innervec_InnerUnrolling<Kernel, 0, vectorizableSize, 0, 0>::run(kernel, outer);
578
- copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, vectorizableSize, size>::run(kernel, outer);
623
+ template <typename Kernel>
624
+ struct dense_assignment_loop_impl<Kernel, SliceVectorizedTraversal, InnerUnrolling> {
625
+ using PacketType = typename Kernel::PacketType;
626
+ static constexpr int PacketSize = unpacket_traits<PacketType>::size;
627
+ static constexpr int InnerSize = Kernel::AssignmentTraits::InnerSizeAtCompileTime;
628
+ static constexpr int VectorizableSize = numext::round_down(InnerSize, PacketSize);
629
+ static constexpr bool UsePacketSegment = Kernel::AssignmentTraits::UsePacketSegment;
630
+
631
+ using packet_loop = copy_using_evaluator_innervec_InnerUnrolling<Kernel, 0, VectorizableSize, Unaligned, Unaligned>;
632
+ using packet_segment_loop = copy_using_evaluator_innervec_segment<Kernel, VectorizableSize, InnerSize, Unaligned,
633
+ Unaligned, UsePacketSegment>;
634
+
635
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel) {
636
+ for (Index outer = 0; outer < kernel.outerSize(); ++outer) {
637
+ packet_loop::run(kernel, outer);
638
+ packet_segment_loop::run(kernel, outer);
579
639
  }
580
640
  }
581
641
  };
582
642
  #endif
583
643
 
584
-
585
644
  /***************************************************************************
586
- * Part 4 : Generic dense assignment kernel
587
- ***************************************************************************/
645
+ * Part 4 : Generic dense assignment kernel
646
+ ***************************************************************************/
588
647
 
589
648
  // This class generalize the assignment of a coefficient (or packet) from one dense evaluator
590
649
  // to another dense writable evaluator.
@@ -592,138 +651,166 @@ struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, InnerUnrolling>
592
651
  // This abstraction level permits to keep the evaluation loops as simple and as generic as possible.
593
652
  // One can customize the assignment using this generic dense_assignment_kernel with different
594
653
  // functors, or by completely overloading it, by-passing a functor.
595
- template<typename DstEvaluatorTypeT, typename SrcEvaluatorTypeT, typename Functor, int Version = Specialized>
596
- class generic_dense_assignment_kernel
597
- {
598
- protected:
654
+ template <typename DstEvaluatorTypeT, typename SrcEvaluatorTypeT, typename Functor, int Version = Specialized>
655
+ class generic_dense_assignment_kernel {
656
+ protected:
599
657
  typedef typename DstEvaluatorTypeT::XprType DstXprType;
600
658
  typedef typename SrcEvaluatorTypeT::XprType SrcXprType;
601
- public:
602
-
659
+
660
+ public:
603
661
  typedef DstEvaluatorTypeT DstEvaluatorType;
604
662
  typedef SrcEvaluatorTypeT SrcEvaluatorType;
605
663
  typedef typename DstEvaluatorType::Scalar Scalar;
606
664
  typedef copy_using_evaluator_traits<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor> AssignmentTraits;
607
665
  typedef typename AssignmentTraits::PacketType PacketType;
608
-
609
-
610
- EIGEN_DEVICE_FUNC generic_dense_assignment_kernel(DstEvaluatorType &dst, const SrcEvaluatorType &src, const Functor &func, DstXprType& dstExpr)
611
- : m_dst(dst), m_src(src), m_functor(func), m_dstExpr(dstExpr)
612
- {
613
- #ifdef EIGEN_DEBUG_ASSIGN
666
+
667
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr generic_dense_assignment_kernel(DstEvaluatorType& dst,
668
+ const SrcEvaluatorType& src,
669
+ const Functor& func,
670
+ DstXprType& dstExpr)
671
+ : m_dst(dst), m_src(src), m_functor(func), m_dstExpr(dstExpr) {
672
+ #ifdef EIGEN_DEBUG_ASSIGN
614
673
  AssignmentTraits::debug();
615
- #endif
616
- }
617
-
618
- EIGEN_DEVICE_FUNC Index size() const { return m_dstExpr.size(); }
619
- EIGEN_DEVICE_FUNC Index innerSize() const { return m_dstExpr.innerSize(); }
620
- EIGEN_DEVICE_FUNC Index outerSize() const { return m_dstExpr.outerSize(); }
621
- EIGEN_DEVICE_FUNC Index rows() const { return m_dstExpr.rows(); }
622
- EIGEN_DEVICE_FUNC Index cols() const { return m_dstExpr.cols(); }
623
- EIGEN_DEVICE_FUNC Index outerStride() const { return m_dstExpr.outerStride(); }
624
-
625
- EIGEN_DEVICE_FUNC DstEvaluatorType& dstEvaluator() { return m_dst; }
626
- EIGEN_DEVICE_FUNC const SrcEvaluatorType& srcEvaluator() const { return m_src; }
627
-
674
+ #endif
675
+ }
676
+
677
+ EIGEN_DEVICE_FUNC constexpr Index size() const noexcept { return m_dstExpr.size(); }
678
+ EIGEN_DEVICE_FUNC constexpr Index innerSize() const noexcept { return m_dstExpr.innerSize(); }
679
+ EIGEN_DEVICE_FUNC constexpr Index outerSize() const noexcept { return m_dstExpr.outerSize(); }
680
+ EIGEN_DEVICE_FUNC constexpr Index rows() const noexcept { return m_dstExpr.rows(); }
681
+ EIGEN_DEVICE_FUNC constexpr Index cols() const noexcept { return m_dstExpr.cols(); }
682
+ EIGEN_DEVICE_FUNC constexpr Index outerStride() const noexcept { return m_dstExpr.outerStride(); }
683
+
684
+ EIGEN_DEVICE_FUNC DstEvaluatorType& dstEvaluator() noexcept { return m_dst; }
685
+ EIGEN_DEVICE_FUNC const SrcEvaluatorType& srcEvaluator() const noexcept { return m_src; }
686
+
628
687
  /// Assign src(row,col) to dst(row,col) through the assignment functor.
629
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Index row, Index col)
630
- {
631
- m_functor.assignCoeff(m_dst.coeffRef(row,col), m_src.coeff(row,col));
688
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void assignCoeff(Index row, Index col) {
689
+ m_functor.assignCoeff(m_dst.coeffRef(row, col), m_src.coeff(row, col));
632
690
  }
633
-
691
+
634
692
  /// \sa assignCoeff(Index,Index)
635
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Index index)
636
- {
693
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Index index) {
637
694
  m_functor.assignCoeff(m_dst.coeffRef(index), m_src.coeff(index));
638
695
  }
639
-
696
+
640
697
  /// \sa assignCoeff(Index,Index)
641
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeffByOuterInner(Index outer, Index inner)
642
- {
643
- Index row = rowIndexByOuterInner(outer, inner);
644
- Index col = colIndexByOuterInner(outer, inner);
698
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void assignCoeffByOuterInner(Index outer, Index inner) {
699
+ Index row = rowIndexByOuterInner(outer, inner);
700
+ Index col = colIndexByOuterInner(outer, inner);
645
701
  assignCoeff(row, col);
646
702
  }
647
-
648
-
649
- template<int StoreMode, int LoadMode, typename PacketType>
650
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacket(Index row, Index col)
651
- {
652
- m_functor.template assignPacket<StoreMode>(&m_dst.coeffRef(row,col), m_src.template packet<LoadMode,PacketType>(row,col));
653
- }
654
-
655
- template<int StoreMode, int LoadMode, typename PacketType>
656
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacket(Index index)
657
- {
658
- m_functor.template assignPacket<StoreMode>(&m_dst.coeffRef(index), m_src.template packet<LoadMode,PacketType>(index));
659
- }
660
-
661
- template<int StoreMode, int LoadMode, typename PacketType>
662
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacketByOuterInner(Index outer, Index inner)
663
- {
664
- Index row = rowIndexByOuterInner(outer, inner);
703
+
704
+ template <int StoreMode, int LoadMode, typename Packet>
705
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacket(Index row, Index col) {
706
+ m_functor.template assignPacket<StoreMode>(&m_dst.coeffRef(row, col),
707
+ m_src.template packet<LoadMode, Packet>(row, col));
708
+ }
709
+
710
+ template <int StoreMode, int LoadMode, typename Packet>
711
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacket(Index index) {
712
+ m_functor.template assignPacket<StoreMode>(&m_dst.coeffRef(index), m_src.template packet<LoadMode, Packet>(index));
713
+ }
714
+
715
+ template <int StoreMode, int LoadMode, typename Packet>
716
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacketByOuterInner(Index outer, Index inner) {
717
+ Index row = rowIndexByOuterInner(outer, inner);
665
718
  Index col = colIndexByOuterInner(outer, inner);
666
- assignPacket<StoreMode,LoadMode,PacketType>(row, col);
719
+ assignPacket<StoreMode, LoadMode, Packet>(row, col);
667
720
  }
668
-
669
- EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Index rowIndexByOuterInner(Index outer, Index inner)
670
- {
671
- typedef typename DstEvaluatorType::ExpressionTraits Traits;
672
- return int(Traits::RowsAtCompileTime) == 1 ? 0
673
- : int(Traits::ColsAtCompileTime) == 1 ? inner
674
- : int(DstEvaluatorType::Flags)&RowMajorBit ? outer
675
- : inner;
721
+
722
+ template <int StoreMode, int LoadMode, typename Packet>
723
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacketSegment(Index row, Index col, Index begin, Index count) {
724
+ m_functor.template assignPacketSegment<StoreMode>(
725
+ &m_dst.coeffRef(row, col), m_src.template packetSegment<LoadMode, Packet>(row, col, begin, count), begin,
726
+ count);
676
727
  }
677
728
 
678
- EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Index colIndexByOuterInner(Index outer, Index inner)
679
- {
729
+ template <int StoreMode, int LoadMode, typename Packet>
730
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacketSegment(Index index, Index begin, Index count) {
731
+ m_functor.template assignPacketSegment<StoreMode>(
732
+ &m_dst.coeffRef(index), m_src.template packetSegment<LoadMode, Packet>(index, begin, count), begin, count);
733
+ }
734
+
735
+ template <int StoreMode, int LoadMode, typename Packet>
736
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacketSegmentByOuterInner(Index outer, Index inner, Index begin,
737
+ Index count) {
738
+ Index row = rowIndexByOuterInner(outer, inner);
739
+ Index col = colIndexByOuterInner(outer, inner);
740
+ assignPacketSegment<StoreMode, LoadMode, Packet>(row, col, begin, count);
741
+ }
742
+
743
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr Index rowIndexByOuterInner(Index outer, Index inner) {
680
744
  typedef typename DstEvaluatorType::ExpressionTraits Traits;
681
- return int(Traits::ColsAtCompileTime) == 1 ? 0
682
- : int(Traits::RowsAtCompileTime) == 1 ? inner
683
- : int(DstEvaluatorType::Flags)&RowMajorBit ? inner
684
- : outer;
745
+ return int(Traits::RowsAtCompileTime) == 1 ? 0
746
+ : int(Traits::ColsAtCompileTime) == 1 ? inner
747
+ : int(DstEvaluatorType::Flags) & RowMajorBit ? outer
748
+ : inner;
685
749
  }
686
750
 
687
- EIGEN_DEVICE_FUNC const Scalar* dstDataPtr() const
688
- {
689
- return m_dstExpr.data();
751
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr Index colIndexByOuterInner(Index outer, Index inner) {
752
+ typedef typename DstEvaluatorType::ExpressionTraits Traits;
753
+ return int(Traits::ColsAtCompileTime) == 1 ? 0
754
+ : int(Traits::RowsAtCompileTime) == 1 ? inner
755
+ : int(DstEvaluatorType::Flags) & RowMajorBit ? inner
756
+ : outer;
690
757
  }
691
-
692
- protected:
758
+
759
+ EIGEN_DEVICE_FUNC const Scalar* dstDataPtr() const { return m_dstExpr.data(); }
760
+
761
+ protected:
693
762
  DstEvaluatorType& m_dst;
694
763
  const SrcEvaluatorType& m_src;
695
- const Functor &m_functor;
764
+ const Functor& m_functor;
696
765
  // TODO find a way to avoid the needs of the original expression
697
766
  DstXprType& m_dstExpr;
698
767
  };
699
768
 
769
+ // Special kernel used when computing small products whose operands have dynamic dimensions. It ensures that the
770
+ // PacketSize used is no larger than 4, thereby increasing the chance that vectorized instructions will be used
771
+ // when computing the product.
772
+
773
+ template <typename DstEvaluatorTypeT, typename SrcEvaluatorTypeT, typename Functor>
774
+ class restricted_packet_dense_assignment_kernel
775
+ : public generic_dense_assignment_kernel<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor, BuiltIn> {
776
+ protected:
777
+ typedef generic_dense_assignment_kernel<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor, BuiltIn> Base;
778
+
779
+ public:
780
+ typedef typename Base::Scalar Scalar;
781
+ typedef typename Base::DstXprType DstXprType;
782
+ typedef copy_using_evaluator_traits<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor, 4> AssignmentTraits;
783
+ typedef typename AssignmentTraits::PacketType PacketType;
784
+
785
+ EIGEN_DEVICE_FUNC restricted_packet_dense_assignment_kernel(DstEvaluatorTypeT& dst, const SrcEvaluatorTypeT& src,
786
+ const Functor& func, DstXprType& dstExpr)
787
+ : Base(dst, src, func, dstExpr) {}
788
+ };
789
+
700
790
  /***************************************************************************
701
- * Part 5 : Entry point for dense rectangular assignment
702
- ***************************************************************************/
791
+ * Part 5 : Entry point for dense rectangular assignment
792
+ ***************************************************************************/
703
793
 
704
- template<typename DstXprType,typename SrcXprType, typename Functor>
705
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
706
- void resize_if_allowed(DstXprType &dst, const SrcXprType& src, const Functor &/*func*/)
707
- {
794
+ template <typename DstXprType, typename SrcXprType, typename Functor>
795
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void resize_if_allowed(DstXprType& dst, const SrcXprType& src,
796
+ const Functor& /*func*/) {
708
797
  EIGEN_ONLY_USED_FOR_DEBUG(dst);
709
798
  EIGEN_ONLY_USED_FOR_DEBUG(src);
710
799
  eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
711
800
  }
712
801
 
713
- template<typename DstXprType,typename SrcXprType, typename T1, typename T2>
714
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
715
- void resize_if_allowed(DstXprType &dst, const SrcXprType& src, const internal::assign_op<T1,T2> &/*func*/)
716
- {
802
+ template <typename DstXprType, typename SrcXprType, typename T1, typename T2>
803
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void resize_if_allowed(DstXprType& dst, const SrcXprType& src,
804
+ const internal::assign_op<T1, T2>& /*func*/) {
717
805
  Index dstRows = src.rows();
718
806
  Index dstCols = src.cols();
719
- if(((dst.rows()!=dstRows) || (dst.cols()!=dstCols)))
720
- dst.resize(dstRows, dstCols);
807
+ if (((dst.rows() != dstRows) || (dst.cols() != dstCols))) dst.resize(dstRows, dstCols);
721
808
  eigen_assert(dst.rows() == dstRows && dst.cols() == dstCols);
722
809
  }
723
810
 
724
- template<typename DstXprType, typename SrcXprType, typename Functor>
725
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(DstXprType& dst, const SrcXprType& src, const Functor &func)
726
- {
811
+ template <typename DstXprType, typename SrcXprType, typename Functor>
812
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void call_dense_assignment_loop(DstXprType& dst, const SrcXprType& src,
813
+ const Functor& func) {
727
814
  typedef evaluator<DstXprType> DstEvaluatorType;
728
815
  typedef evaluator<SrcXprType> SrcEvaluatorType;
729
816
 
@@ -734,202 +821,237 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(DstXprType
734
821
  resize_if_allowed(dst, src, func);
735
822
 
736
823
  DstEvaluatorType dstEvaluator(dst);
737
-
738
- typedef generic_dense_assignment_kernel<DstEvaluatorType,SrcEvaluatorType,Functor> Kernel;
824
+
825
+ typedef generic_dense_assignment_kernel<DstEvaluatorType, SrcEvaluatorType, Functor> Kernel;
739
826
  Kernel kernel(dstEvaluator, srcEvaluator, func, dst.const_cast_derived());
740
827
 
741
828
  dense_assignment_loop<Kernel>::run(kernel);
742
829
  }
743
830
 
744
- template<typename DstXprType, typename SrcXprType>
745
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(DstXprType& dst, const SrcXprType& src)
746
- {
747
- call_dense_assignment_loop(dst, src, internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar>());
831
+ template <typename DstXprType, typename SrcXprType>
832
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(DstXprType& dst, const SrcXprType& src) {
833
+ call_dense_assignment_loop(dst, src, internal::assign_op<typename DstXprType::Scalar, typename SrcXprType::Scalar>());
748
834
  }
749
835
 
750
836
  /***************************************************************************
751
- * Part 6 : Generic assignment
752
- ***************************************************************************/
837
+ * Part 6 : Generic assignment
838
+ ***************************************************************************/
753
839
 
754
840
  // Based on the respective shapes of the destination and source,
755
841
  // the class AssignmentKind determine the kind of assignment mechanism.
756
842
  // AssignmentKind must define a Kind typedef.
757
- template<typename DstShape, typename SrcShape> struct AssignmentKind;
843
+ template <typename DstShape, typename SrcShape>
844
+ struct AssignmentKind;
758
845
 
759
- // Assignement kind defined in this file:
846
+ // Assignment kind defined in this file:
760
847
  struct Dense2Dense {};
761
848
  struct EigenBase2EigenBase {};
762
849
 
763
- template<typename,typename> struct AssignmentKind { typedef EigenBase2EigenBase Kind; };
764
- template<> struct AssignmentKind<DenseShape,DenseShape> { typedef Dense2Dense Kind; };
765
-
850
+ template <typename, typename>
851
+ struct AssignmentKind {
852
+ typedef EigenBase2EigenBase Kind;
853
+ };
854
+ template <>
855
+ struct AssignmentKind<DenseShape, DenseShape> {
856
+ typedef Dense2Dense Kind;
857
+ };
858
+
766
859
  // This is the main assignment class
767
- template< typename DstXprType, typename SrcXprType, typename Functor,
768
- typename Kind = typename AssignmentKind< typename evaluator_traits<DstXprType>::Shape , typename evaluator_traits<SrcXprType>::Shape >::Kind,
860
+ template <typename DstXprType, typename SrcXprType, typename Functor,
861
+ typename Kind = typename AssignmentKind<typename evaluator_traits<DstXprType>::Shape,
862
+ typename evaluator_traits<SrcXprType>::Shape>::Kind,
769
863
  typename EnableIf = void>
770
864
  struct Assignment;
771
865
 
866
+ // The only purpose of this call_assignment() function is to deal with noalias() / "assume-aliasing" and automatic
867
+ // transposition. Indeed, I (Gael) think that this concept of "assume-aliasing" was a mistake, and it makes thing quite
868
+ // complicated. So this intermediate function removes everything related to "assume-aliasing" such that Assignment does
869
+ // not has to bother about these annoying details.
772
870
 
773
- // The only purpose of this call_assignment() function is to deal with noalias() / "assume-aliasing" and automatic transposition.
774
- // Indeed, I (Gael) think that this concept of "assume-aliasing" was a mistake, and it makes thing quite complicated.
775
- // So this intermediate function removes everything related to "assume-aliasing" such that Assignment
776
- // does not has to bother about these annoying details.
777
-
778
- template<typename Dst, typename Src>
779
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
780
- void call_assignment(Dst& dst, const Src& src)
781
- {
782
- call_assignment(dst, src, internal::assign_op<typename Dst::Scalar,typename Src::Scalar>());
871
+ template <typename Dst, typename Src>
872
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void call_assignment(Dst& dst, const Src& src) {
873
+ call_assignment(dst, src, internal::assign_op<typename Dst::Scalar, typename Src::Scalar>());
783
874
  }
784
- template<typename Dst, typename Src>
785
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
786
- void call_assignment(const Dst& dst, const Src& src)
787
- {
788
- call_assignment(dst, src, internal::assign_op<typename Dst::Scalar,typename Src::Scalar>());
875
+ template <typename Dst, typename Src>
876
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_assignment(const Dst& dst, const Src& src) {
877
+ call_assignment(dst, src, internal::assign_op<typename Dst::Scalar, typename Src::Scalar>());
789
878
  }
790
-
879
+
791
880
  // Deal with "assume-aliasing"
792
- template<typename Dst, typename Src, typename Func>
793
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
794
- void call_assignment(Dst& dst, const Src& src, const Func& func, typename enable_if< evaluator_assume_aliasing<Src>::value, void*>::type = 0)
795
- {
881
+ template <typename Dst, typename Src, typename Func>
882
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void call_assignment(
883
+ Dst& dst, const Src& src, const Func& func, std::enable_if_t<evaluator_assume_aliasing<Src>::value, void*> = 0) {
796
884
  typename plain_matrix_type<Src>::type tmp(src);
797
885
  call_assignment_no_alias(dst, tmp, func);
798
886
  }
799
887
 
800
- template<typename Dst, typename Src, typename Func>
801
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
802
- void call_assignment(Dst& dst, const Src& src, const Func& func, typename enable_if<!evaluator_assume_aliasing<Src>::value, void*>::type = 0)
803
- {
888
+ template <typename Dst, typename Src, typename Func>
889
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void call_assignment(
890
+ Dst& dst, const Src& src, const Func& func, std::enable_if_t<!evaluator_assume_aliasing<Src>::value, void*> = 0) {
804
891
  call_assignment_no_alias(dst, src, func);
805
892
  }
806
893
 
807
894
  // by-pass "assume-aliasing"
808
895
  // When there is no aliasing, we require that 'dst' has been properly resized
809
- template<typename Dst, template <typename> class StorageBase, typename Src, typename Func>
810
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
811
- void call_assignment(NoAlias<Dst,StorageBase>& dst, const Src& src, const Func& func)
812
- {
896
+ template <typename Dst, template <typename> class StorageBase, typename Src, typename Func>
897
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void call_assignment(NoAlias<Dst, StorageBase>& dst, const Src& src,
898
+ const Func& func) {
813
899
  call_assignment_no_alias(dst.expression(), src, func);
814
900
  }
815
901
 
816
-
817
- template<typename Dst, typename Src, typename Func>
818
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
819
- void call_assignment_no_alias(Dst& dst, const Src& src, const Func& func)
820
- {
902
+ template <typename Dst, typename Src, typename Func>
903
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void call_assignment_no_alias(Dst& dst, const Src& src,
904
+ const Func& func) {
821
905
  enum {
822
- NeedToTranspose = ( (int(Dst::RowsAtCompileTime) == 1 && int(Src::ColsAtCompileTime) == 1)
823
- || (int(Dst::ColsAtCompileTime) == 1 && int(Src::RowsAtCompileTime) == 1)
824
- ) && int(Dst::SizeAtCompileTime) != 1
906
+ NeedToTranspose = ((int(Dst::RowsAtCompileTime) == 1 && int(Src::ColsAtCompileTime) == 1) ||
907
+ (int(Dst::ColsAtCompileTime) == 1 && int(Src::RowsAtCompileTime) == 1)) &&
908
+ int(Dst::SizeAtCompileTime) != 1
825
909
  };
826
910
 
827
- typedef typename internal::conditional<NeedToTranspose, Transpose<Dst>, Dst>::type ActualDstTypeCleaned;
828
- typedef typename internal::conditional<NeedToTranspose, Transpose<Dst>, Dst&>::type ActualDstType;
911
+ typedef std::conditional_t<NeedToTranspose, Transpose<Dst>, Dst> ActualDstTypeCleaned;
912
+ typedef std::conditional_t<NeedToTranspose, Transpose<Dst>, Dst&> ActualDstType;
829
913
  ActualDstType actualDst(dst);
830
-
914
+
831
915
  // TODO check whether this is the right place to perform these checks:
832
916
  EIGEN_STATIC_ASSERT_LVALUE(Dst)
833
- EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(ActualDstTypeCleaned,Src)
834
- EIGEN_CHECK_BINARY_COMPATIBILIY(Func,typename ActualDstTypeCleaned::Scalar,typename Src::Scalar);
835
-
836
- Assignment<ActualDstTypeCleaned,Src,Func>::run(actualDst, src, func);
917
+ EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(ActualDstTypeCleaned, Src)
918
+ EIGEN_CHECK_BINARY_COMPATIBILIY(Func, typename ActualDstTypeCleaned::Scalar, typename Src::Scalar);
919
+
920
+ Assignment<ActualDstTypeCleaned, Src, Func>::run(actualDst, src, func);
921
+ }
922
+
923
+ template <typename Dst, typename Src, typename Func>
924
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_restricted_packet_assignment_no_alias(Dst& dst, const Src& src,
925
+ const Func& func) {
926
+ typedef evaluator<Dst> DstEvaluatorType;
927
+ typedef evaluator<Src> SrcEvaluatorType;
928
+ typedef restricted_packet_dense_assignment_kernel<DstEvaluatorType, SrcEvaluatorType, Func> Kernel;
929
+
930
+ EIGEN_STATIC_ASSERT_LVALUE(Dst)
931
+ EIGEN_CHECK_BINARY_COMPATIBILIY(Func, typename Dst::Scalar, typename Src::Scalar);
932
+
933
+ SrcEvaluatorType srcEvaluator(src);
934
+ resize_if_allowed(dst, src, func);
935
+
936
+ DstEvaluatorType dstEvaluator(dst);
937
+ Kernel kernel(dstEvaluator, srcEvaluator, func, dst.const_cast_derived());
938
+
939
+ dense_assignment_loop<Kernel>::run(kernel);
837
940
  }
838
- template<typename Dst, typename Src>
839
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
840
- void call_assignment_no_alias(Dst& dst, const Src& src)
841
- {
842
- call_assignment_no_alias(dst, src, internal::assign_op<typename Dst::Scalar,typename Src::Scalar>());
941
+
942
+ template <typename Dst, typename Src>
943
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void call_assignment_no_alias(Dst& dst, const Src& src) {
944
+ call_assignment_no_alias(dst, src, internal::assign_op<typename Dst::Scalar, typename Src::Scalar>());
843
945
  }
844
946
 
845
- template<typename Dst, typename Src, typename Func>
846
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
847
- void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src, const Func& func)
848
- {
947
+ template <typename Dst, typename Src, typename Func>
948
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src,
949
+ const Func& func) {
849
950
  // TODO check whether this is the right place to perform these checks:
850
951
  EIGEN_STATIC_ASSERT_LVALUE(Dst)
851
- EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Dst,Src)
852
- EIGEN_CHECK_BINARY_COMPATIBILIY(Func,typename Dst::Scalar,typename Src::Scalar);
952
+ EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Dst, Src)
953
+ EIGEN_CHECK_BINARY_COMPATIBILIY(Func, typename Dst::Scalar, typename Src::Scalar);
853
954
 
854
- Assignment<Dst,Src,Func>::run(dst, src, func);
955
+ Assignment<Dst, Src, Func>::run(dst, src, func);
855
956
  }
856
- template<typename Dst, typename Src>
857
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
858
- void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src)
859
- {
860
- call_assignment_no_alias_no_transpose(dst, src, internal::assign_op<typename Dst::Scalar,typename Src::Scalar>());
957
+ template <typename Dst, typename Src>
958
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src) {
959
+ call_assignment_no_alias_no_transpose(dst, src, internal::assign_op<typename Dst::Scalar, typename Src::Scalar>());
861
960
  }
862
961
 
863
962
  // forward declaration
864
- template<typename Dst, typename Src> void check_for_aliasing(const Dst &dst, const Src &src);
963
+ template <typename Dst, typename Src>
964
+ EIGEN_DEVICE_FUNC void check_for_aliasing(const Dst& dst, const Src& src);
865
965
 
866
966
  // Generic Dense to Dense assignment
867
967
  // Note that the last template argument "Weak" is needed to make it possible to perform
868
968
  // both partial specialization+SFINAE without ambiguous specialization
869
- template< typename DstXprType, typename SrcXprType, typename Functor, typename Weak>
870
- struct Assignment<DstXprType, SrcXprType, Functor, Dense2Dense, Weak>
871
- {
872
- EIGEN_DEVICE_FUNC
873
- static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const Functor &func)
874
- {
969
+ template <typename DstXprType, typename SrcXprType, typename Functor, typename Weak>
970
+ struct Assignment<DstXprType, SrcXprType, Functor, Dense2Dense, Weak> {
971
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(DstXprType& dst, const SrcXprType& src,
972
+ const Functor& func) {
875
973
  #ifndef EIGEN_NO_DEBUG
876
- internal::check_for_aliasing(dst, src);
974
+ if (!internal::is_constant_evaluated()) {
975
+ internal::check_for_aliasing(dst, src);
976
+ }
877
977
  #endif
878
-
978
+
879
979
  call_dense_assignment_loop(dst, src, func);
880
980
  }
881
981
  };
882
982
 
983
+ template <typename DstXprType, typename SrcPlainObject, typename Weak>
984
+ struct Assignment<DstXprType, CwiseNullaryOp<scalar_constant_op<typename DstXprType::Scalar>, SrcPlainObject>,
985
+ assign_op<typename DstXprType::Scalar, typename DstXprType::Scalar>, Dense2Dense, Weak> {
986
+ using Scalar = typename DstXprType::Scalar;
987
+ using NullaryOp = scalar_constant_op<Scalar>;
988
+ using SrcXprType = CwiseNullaryOp<NullaryOp, SrcPlainObject>;
989
+ using Functor = assign_op<Scalar, Scalar>;
990
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(DstXprType& dst, const SrcXprType& src,
991
+ const Functor& /*func*/) {
992
+ eigen_fill_impl<DstXprType>::run(dst, src);
993
+ }
994
+ };
995
+
996
+ template <typename DstXprType, typename SrcPlainObject, typename Weak>
997
+ struct Assignment<DstXprType, CwiseNullaryOp<scalar_zero_op<typename DstXprType::Scalar>, SrcPlainObject>,
998
+ assign_op<typename DstXprType::Scalar, typename DstXprType::Scalar>, Dense2Dense, Weak> {
999
+ using Scalar = typename DstXprType::Scalar;
1000
+ using NullaryOp = scalar_zero_op<Scalar>;
1001
+ using SrcXprType = CwiseNullaryOp<NullaryOp, SrcPlainObject>;
1002
+ using Functor = assign_op<Scalar, Scalar>;
1003
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(DstXprType& dst, const SrcXprType& src,
1004
+ const Functor& /*func*/) {
1005
+ eigen_zero_impl<DstXprType>::run(dst, src);
1006
+ }
1007
+ };
1008
+
883
1009
  // Generic assignment through evalTo.
884
1010
  // TODO: not sure we have to keep that one, but it helps porting current code to new evaluator mechanism.
885
1011
  // Note that the last template argument "Weak" is needed to make it possible to perform
886
1012
  // both partial specialization+SFINAE without ambiguous specialization
887
- template< typename DstXprType, typename SrcXprType, typename Functor, typename Weak>
888
- struct Assignment<DstXprType, SrcXprType, Functor, EigenBase2EigenBase, Weak>
889
- {
890
- EIGEN_DEVICE_FUNC
891
- static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar> &/*func*/)
892
- {
1013
+ template <typename DstXprType, typename SrcXprType, typename Functor, typename Weak>
1014
+ struct Assignment<DstXprType, SrcXprType, Functor, EigenBase2EigenBase, Weak> {
1015
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(
1016
+ DstXprType& dst, const SrcXprType& src,
1017
+ const internal::assign_op<typename DstXprType::Scalar, typename SrcXprType::Scalar>& /*func*/) {
893
1018
  Index dstRows = src.rows();
894
1019
  Index dstCols = src.cols();
895
- if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
896
- dst.resize(dstRows, dstCols);
1020
+ if ((dst.rows() != dstRows) || (dst.cols() != dstCols)) dst.resize(dstRows, dstCols);
897
1021
 
898
1022
  eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
899
1023
  src.evalTo(dst);
900
1024
  }
901
1025
 
902
- // NOTE The following two functions are templated to avoid their instanciation if not needed
1026
+ // NOTE The following two functions are templated to avoid their instantiation if not needed
903
1027
  // This is needed because some expressions supports evalTo only and/or have 'void' as scalar type.
904
- template<typename SrcScalarType>
905
- EIGEN_DEVICE_FUNC
906
- static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<typename DstXprType::Scalar,SrcScalarType> &/*func*/)
907
- {
1028
+ template <typename SrcScalarType>
1029
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(
1030
+ DstXprType& dst, const SrcXprType& src,
1031
+ const internal::add_assign_op<typename DstXprType::Scalar, SrcScalarType>& /*func*/) {
908
1032
  Index dstRows = src.rows();
909
1033
  Index dstCols = src.cols();
910
- if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
911
- dst.resize(dstRows, dstCols);
1034
+ if ((dst.rows() != dstRows) || (dst.cols() != dstCols)) dst.resize(dstRows, dstCols);
912
1035
 
913
1036
  eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
914
1037
  src.addTo(dst);
915
1038
  }
916
1039
 
917
- template<typename SrcScalarType>
918
- EIGEN_DEVICE_FUNC
919
- static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<typename DstXprType::Scalar,SrcScalarType> &/*func*/)
920
- {
1040
+ template <typename SrcScalarType>
1041
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(
1042
+ DstXprType& dst, const SrcXprType& src,
1043
+ const internal::sub_assign_op<typename DstXprType::Scalar, SrcScalarType>& /*func*/) {
921
1044
  Index dstRows = src.rows();
922
1045
  Index dstCols = src.cols();
923
- if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
924
- dst.resize(dstRows, dstCols);
1046
+ if ((dst.rows() != dstRows) || (dst.cols() != dstCols)) dst.resize(dstRows, dstCols);
925
1047
 
926
1048
  eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
927
1049
  src.subTo(dst);
928
1050
  }
929
1051
  };
930
1052
 
931
- } // namespace internal
1053
+ } // namespace internal
932
1054
 
933
- } // end namespace Eigen
1055
+ } // end namespace Eigen
934
1056
 
935
- #endif // EIGEN_ASSIGN_EVALUATOR_H
1057
+ #endif // EIGEN_ASSIGN_EVALUATOR_H