@smake/eigen 1.1.0 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (431) hide show
  1. package/README.md +1 -1
  2. package/eigen/Eigen/AccelerateSupport +52 -0
  3. package/eigen/Eigen/Cholesky +18 -20
  4. package/eigen/Eigen/CholmodSupport +28 -28
  5. package/eigen/Eigen/Core +187 -120
  6. package/eigen/Eigen/Eigenvalues +16 -13
  7. package/eigen/Eigen/Geometry +18 -18
  8. package/eigen/Eigen/Householder +9 -7
  9. package/eigen/Eigen/IterativeLinearSolvers +8 -4
  10. package/eigen/Eigen/Jacobi +14 -13
  11. package/eigen/Eigen/KLUSupport +23 -21
  12. package/eigen/Eigen/LU +15 -16
  13. package/eigen/Eigen/MetisSupport +12 -12
  14. package/eigen/Eigen/OrderingMethods +54 -51
  15. package/eigen/Eigen/PaStiXSupport +23 -21
  16. package/eigen/Eigen/PardisoSupport +17 -14
  17. package/eigen/Eigen/QR +18 -20
  18. package/eigen/Eigen/QtAlignedMalloc +5 -12
  19. package/eigen/Eigen/SPQRSupport +21 -14
  20. package/eigen/Eigen/SVD +23 -17
  21. package/eigen/Eigen/Sparse +1 -2
  22. package/eigen/Eigen/SparseCholesky +18 -15
  23. package/eigen/Eigen/SparseCore +18 -17
  24. package/eigen/Eigen/SparseLU +9 -9
  25. package/eigen/Eigen/SparseQR +16 -14
  26. package/eigen/Eigen/StdDeque +5 -2
  27. package/eigen/Eigen/StdList +5 -2
  28. package/eigen/Eigen/StdVector +5 -2
  29. package/eigen/Eigen/SuperLUSupport +30 -24
  30. package/eigen/Eigen/ThreadPool +80 -0
  31. package/eigen/Eigen/UmfPackSupport +19 -17
  32. package/eigen/Eigen/Version +14 -0
  33. package/eigen/Eigen/src/AccelerateSupport/AccelerateSupport.h +423 -0
  34. package/eigen/Eigen/src/AccelerateSupport/InternalHeaderCheck.h +3 -0
  35. package/eigen/Eigen/src/Cholesky/InternalHeaderCheck.h +3 -0
  36. package/eigen/Eigen/src/Cholesky/LDLT.h +366 -405
  37. package/eigen/Eigen/src/Cholesky/LLT.h +323 -367
  38. package/eigen/Eigen/src/Cholesky/LLT_LAPACKE.h +81 -56
  39. package/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +585 -529
  40. package/eigen/Eigen/src/CholmodSupport/InternalHeaderCheck.h +3 -0
  41. package/eigen/Eigen/src/Core/ArithmeticSequence.h +143 -317
  42. package/eigen/Eigen/src/Core/Array.h +329 -370
  43. package/eigen/Eigen/src/Core/ArrayBase.h +190 -203
  44. package/eigen/Eigen/src/Core/ArrayWrapper.h +126 -170
  45. package/eigen/Eigen/src/Core/Assign.h +30 -40
  46. package/eigen/Eigen/src/Core/AssignEvaluator.h +651 -604
  47. package/eigen/Eigen/src/Core/Assign_MKL.h +125 -120
  48. package/eigen/Eigen/src/Core/BandMatrix.h +267 -282
  49. package/eigen/Eigen/src/Core/Block.h +371 -390
  50. package/eigen/Eigen/src/Core/CommaInitializer.h +85 -100
  51. package/eigen/Eigen/src/Core/ConditionEstimator.h +51 -53
  52. package/eigen/Eigen/src/Core/CoreEvaluators.h +1214 -937
  53. package/eigen/Eigen/src/Core/CoreIterators.h +72 -63
  54. package/eigen/Eigen/src/Core/CwiseBinaryOp.h +112 -129
  55. package/eigen/Eigen/src/Core/CwiseNullaryOp.h +676 -702
  56. package/eigen/Eigen/src/Core/CwiseTernaryOp.h +77 -103
  57. package/eigen/Eigen/src/Core/CwiseUnaryOp.h +55 -67
  58. package/eigen/Eigen/src/Core/CwiseUnaryView.h +127 -92
  59. package/eigen/Eigen/src/Core/DenseBase.h +630 -658
  60. package/eigen/Eigen/src/Core/DenseCoeffsBase.h +511 -628
  61. package/eigen/Eigen/src/Core/DenseStorage.h +511 -590
  62. package/eigen/Eigen/src/Core/DeviceWrapper.h +153 -0
  63. package/eigen/Eigen/src/Core/Diagonal.h +168 -207
  64. package/eigen/Eigen/src/Core/DiagonalMatrix.h +346 -317
  65. package/eigen/Eigen/src/Core/DiagonalProduct.h +12 -10
  66. package/eigen/Eigen/src/Core/Dot.h +167 -217
  67. package/eigen/Eigen/src/Core/EigenBase.h +74 -85
  68. package/eigen/Eigen/src/Core/Fill.h +138 -0
  69. package/eigen/Eigen/src/Core/FindCoeff.h +464 -0
  70. package/eigen/Eigen/src/Core/ForceAlignedAccess.h +90 -113
  71. package/eigen/Eigen/src/Core/Fuzzy.h +82 -105
  72. package/eigen/Eigen/src/Core/GeneralProduct.h +315 -261
  73. package/eigen/Eigen/src/Core/GenericPacketMath.h +1182 -520
  74. package/eigen/Eigen/src/Core/GlobalFunctions.h +193 -157
  75. package/eigen/Eigen/src/Core/IO.h +131 -156
  76. package/eigen/Eigen/src/Core/IndexedView.h +209 -125
  77. package/eigen/Eigen/src/Core/InnerProduct.h +260 -0
  78. package/eigen/Eigen/src/Core/InternalHeaderCheck.h +3 -0
  79. package/eigen/Eigen/src/Core/Inverse.h +50 -59
  80. package/eigen/Eigen/src/Core/Map.h +123 -141
  81. package/eigen/Eigen/src/Core/MapBase.h +255 -282
  82. package/eigen/Eigen/src/Core/MathFunctions.h +1247 -1201
  83. package/eigen/Eigen/src/Core/MathFunctionsImpl.h +162 -99
  84. package/eigen/Eigen/src/Core/Matrix.h +463 -494
  85. package/eigen/Eigen/src/Core/MatrixBase.h +468 -470
  86. package/eigen/Eigen/src/Core/NestByValue.h +58 -52
  87. package/eigen/Eigen/src/Core/NoAlias.h +79 -86
  88. package/eigen/Eigen/src/Core/NumTraits.h +206 -206
  89. package/eigen/Eigen/src/Core/PartialReduxEvaluator.h +163 -142
  90. package/eigen/Eigen/src/Core/PermutationMatrix.h +461 -511
  91. package/eigen/Eigen/src/Core/PlainObjectBase.h +858 -972
  92. package/eigen/Eigen/src/Core/Product.h +246 -130
  93. package/eigen/Eigen/src/Core/ProductEvaluators.h +779 -671
  94. package/eigen/Eigen/src/Core/Random.h +153 -164
  95. package/eigen/Eigen/src/Core/RandomImpl.h +262 -0
  96. package/eigen/Eigen/src/Core/RealView.h +250 -0
  97. package/eigen/Eigen/src/Core/Redux.h +334 -314
  98. package/eigen/Eigen/src/Core/Ref.h +259 -257
  99. package/eigen/Eigen/src/Core/Replicate.h +92 -104
  100. package/eigen/Eigen/src/Core/Reshaped.h +215 -271
  101. package/eigen/Eigen/src/Core/ReturnByValue.h +47 -55
  102. package/eigen/Eigen/src/Core/Reverse.h +133 -148
  103. package/eigen/Eigen/src/Core/Select.h +68 -140
  104. package/eigen/Eigen/src/Core/SelfAdjointView.h +254 -290
  105. package/eigen/Eigen/src/Core/SelfCwiseBinaryOp.h +23 -20
  106. package/eigen/Eigen/src/Core/SkewSymmetricMatrix3.h +382 -0
  107. package/eigen/Eigen/src/Core/Solve.h +88 -102
  108. package/eigen/Eigen/src/Core/SolveTriangular.h +126 -124
  109. package/eigen/Eigen/src/Core/SolverBase.h +132 -133
  110. package/eigen/Eigen/src/Core/StableNorm.h +113 -147
  111. package/eigen/Eigen/src/Core/StlIterators.h +404 -248
  112. package/eigen/Eigen/src/Core/Stride.h +90 -92
  113. package/eigen/Eigen/src/Core/Swap.h +70 -39
  114. package/eigen/Eigen/src/Core/Transpose.h +258 -295
  115. package/eigen/Eigen/src/Core/Transpositions.h +270 -333
  116. package/eigen/Eigen/src/Core/TriangularMatrix.h +642 -743
  117. package/eigen/Eigen/src/Core/VectorBlock.h +59 -72
  118. package/eigen/Eigen/src/Core/VectorwiseOp.h +653 -704
  119. package/eigen/Eigen/src/Core/Visitor.h +464 -308
  120. package/eigen/Eigen/src/Core/arch/AVX/Complex.h +380 -187
  121. package/eigen/Eigen/src/Core/arch/AVX/MathFunctions.h +65 -163
  122. package/eigen/Eigen/src/Core/arch/AVX/PacketMath.h +2145 -638
  123. package/eigen/Eigen/src/Core/arch/AVX/Reductions.h +353 -0
  124. package/eigen/Eigen/src/Core/arch/AVX/TypeCasting.h +253 -60
  125. package/eigen/Eigen/src/Core/arch/AVX512/Complex.h +278 -228
  126. package/eigen/Eigen/src/Core/arch/AVX512/GemmKernel.h +1245 -0
  127. package/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h +48 -269
  128. package/eigen/Eigen/src/Core/arch/AVX512/MathFunctionsFP16.h +75 -0
  129. package/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h +1597 -754
  130. package/eigen/Eigen/src/Core/arch/AVX512/PacketMathFP16.h +1413 -0
  131. package/eigen/Eigen/src/Core/arch/AVX512/Reductions.h +297 -0
  132. package/eigen/Eigen/src/Core/arch/AVX512/TrsmKernel.h +1167 -0
  133. package/eigen/Eigen/src/Core/arch/AVX512/TrsmUnrolls.inc +1219 -0
  134. package/eigen/Eigen/src/Core/arch/AVX512/TypeCasting.h +229 -41
  135. package/eigen/Eigen/src/Core/arch/AVX512/TypeCastingFP16.h +130 -0
  136. package/eigen/Eigen/src/Core/arch/AltiVec/Complex.h +420 -184
  137. package/eigen/Eigen/src/Core/arch/AltiVec/MathFunctions.h +40 -49
  138. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProduct.h +2962 -2213
  139. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h +196 -212
  140. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h +713 -441
  141. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMAbfloat16.h +742 -0
  142. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixVectorProduct.inc +2818 -0
  143. package/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h +2380 -1362
  144. package/eigen/Eigen/src/Core/arch/AltiVec/TypeCasting.h +153 -0
  145. package/eigen/Eigen/src/Core/arch/Default/BFloat16.h +390 -224
  146. package/eigen/Eigen/src/Core/arch/Default/ConjHelper.h +78 -67
  147. package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +1784 -799
  148. package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +167 -50
  149. package/eigen/Eigen/src/Core/arch/Default/Half.h +528 -379
  150. package/eigen/Eigen/src/Core/arch/Default/Settings.h +10 -12
  151. package/eigen/Eigen/src/Core/arch/GPU/Complex.h +244 -0
  152. package/eigen/Eigen/src/Core/arch/GPU/MathFunctions.h +41 -40
  153. package/eigen/Eigen/src/Core/arch/GPU/PacketMath.h +550 -523
  154. package/eigen/Eigen/src/Core/arch/GPU/Tuple.h +268 -0
  155. package/eigen/Eigen/src/Core/arch/GPU/TypeCasting.h +27 -30
  156. package/eigen/Eigen/src/Core/arch/HIP/hcc/math_constants.h +8 -8
  157. package/eigen/Eigen/src/Core/arch/HVX/PacketMath.h +1088 -0
  158. package/eigen/Eigen/src/Core/arch/LSX/Complex.h +520 -0
  159. package/eigen/Eigen/src/Core/arch/LSX/GeneralBlockPanelKernel.h +23 -0
  160. package/eigen/Eigen/src/Core/arch/LSX/MathFunctions.h +43 -0
  161. package/eigen/Eigen/src/Core/arch/LSX/PacketMath.h +2866 -0
  162. package/eigen/Eigen/src/Core/arch/LSX/TypeCasting.h +526 -0
  163. package/eigen/Eigen/src/Core/arch/MSA/Complex.h +54 -82
  164. package/eigen/Eigen/src/Core/arch/MSA/MathFunctions.h +84 -92
  165. package/eigen/Eigen/src/Core/arch/MSA/PacketMath.h +51 -47
  166. package/eigen/Eigen/src/Core/arch/NEON/Complex.h +454 -306
  167. package/eigen/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h +175 -115
  168. package/eigen/Eigen/src/Core/arch/NEON/MathFunctions.h +23 -30
  169. package/eigen/Eigen/src/Core/arch/NEON/PacketMath.h +4366 -2857
  170. package/eigen/Eigen/src/Core/arch/NEON/TypeCasting.h +616 -393
  171. package/eigen/Eigen/src/Core/arch/NEON/UnaryFunctors.h +57 -0
  172. package/eigen/Eigen/src/Core/arch/SSE/Complex.h +350 -198
  173. package/eigen/Eigen/src/Core/arch/SSE/MathFunctions.h +38 -149
  174. package/eigen/Eigen/src/Core/arch/SSE/PacketMath.h +1791 -912
  175. package/eigen/Eigen/src/Core/arch/SSE/Reductions.h +324 -0
  176. package/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h +128 -40
  177. package/eigen/Eigen/src/Core/arch/SVE/MathFunctions.h +10 -6
  178. package/eigen/Eigen/src/Core/arch/SVE/PacketMath.h +156 -234
  179. package/eigen/Eigen/src/Core/arch/SVE/TypeCasting.h +6 -3
  180. package/eigen/Eigen/src/Core/arch/SYCL/InteropHeaders.h +27 -32
  181. package/eigen/Eigen/src/Core/arch/SYCL/MathFunctions.h +119 -117
  182. package/eigen/Eigen/src/Core/arch/SYCL/PacketMath.h +325 -419
  183. package/eigen/Eigen/src/Core/arch/SYCL/TypeCasting.h +15 -17
  184. package/eigen/Eigen/src/Core/arch/ZVector/Complex.h +325 -181
  185. package/eigen/Eigen/src/Core/arch/ZVector/MathFunctions.h +94 -83
  186. package/eigen/Eigen/src/Core/arch/ZVector/PacketMath.h +811 -458
  187. package/eigen/Eigen/src/Core/functors/AssignmentFunctors.h +121 -124
  188. package/eigen/Eigen/src/Core/functors/BinaryFunctors.h +576 -370
  189. package/eigen/Eigen/src/Core/functors/NullaryFunctors.h +194 -109
  190. package/eigen/Eigen/src/Core/functors/StlFunctors.h +95 -112
  191. package/eigen/Eigen/src/Core/functors/TernaryFunctors.h +34 -7
  192. package/eigen/Eigen/src/Core/functors/UnaryFunctors.h +1038 -749
  193. package/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h +1883 -1375
  194. package/eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h +312 -370
  195. package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +189 -176
  196. package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +84 -81
  197. package/eigen/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h +154 -73
  198. package/eigen/Eigen/src/Core/products/GeneralMatrixVector.h +292 -337
  199. package/eigen/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h +80 -77
  200. package/eigen/Eigen/src/Core/products/Parallelizer.h +207 -105
  201. package/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +327 -388
  202. package/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h +206 -224
  203. package/eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h +138 -147
  204. package/eigen/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h +58 -61
  205. package/eigen/Eigen/src/Core/products/SelfadjointProduct.h +71 -71
  206. package/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h +48 -47
  207. package/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h +294 -369
  208. package/eigen/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h +246 -238
  209. package/eigen/Eigen/src/Core/products/TriangularMatrixVector.h +244 -247
  210. package/eigen/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h +212 -192
  211. package/eigen/Eigen/src/Core/products/TriangularSolverMatrix.h +328 -277
  212. package/eigen/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h +108 -109
  213. package/eigen/Eigen/src/Core/products/TriangularSolverVector.h +68 -94
  214. package/eigen/Eigen/src/Core/util/Assert.h +158 -0
  215. package/eigen/Eigen/src/Core/util/BlasUtil.h +342 -303
  216. package/eigen/Eigen/src/Core/util/ConfigureVectorization.h +348 -317
  217. package/eigen/Eigen/src/Core/util/Constants.h +297 -262
  218. package/eigen/Eigen/src/Core/util/DisableStupidWarnings.h +130 -90
  219. package/eigen/Eigen/src/Core/util/EmulateArray.h +270 -0
  220. package/eigen/Eigen/src/Core/util/ForwardDeclarations.h +449 -247
  221. package/eigen/Eigen/src/Core/util/GpuHipCudaDefines.inc +101 -0
  222. package/eigen/Eigen/src/Core/util/GpuHipCudaUndefines.inc +45 -0
  223. package/eigen/Eigen/src/Core/util/IndexedViewHelper.h +417 -116
  224. package/eigen/Eigen/src/Core/util/IntegralConstant.h +211 -204
  225. package/eigen/Eigen/src/Core/util/MKL_support.h +39 -37
  226. package/eigen/Eigen/src/Core/util/Macros.h +655 -773
  227. package/eigen/Eigen/src/Core/util/MaxSizeVector.h +139 -0
  228. package/eigen/Eigen/src/Core/util/Memory.h +970 -748
  229. package/eigen/Eigen/src/Core/util/Meta.h +581 -633
  230. package/eigen/Eigen/src/Core/util/MoreMeta.h +638 -0
  231. package/eigen/Eigen/src/Core/util/ReenableStupidWarnings.h +32 -19
  232. package/eigen/Eigen/src/Core/util/ReshapedHelper.h +17 -17
  233. package/eigen/Eigen/src/Core/util/Serializer.h +209 -0
  234. package/eigen/Eigen/src/Core/util/StaticAssert.h +50 -166
  235. package/eigen/Eigen/src/Core/util/SymbolicIndex.h +377 -225
  236. package/eigen/Eigen/src/Core/util/XprHelper.h +784 -547
  237. package/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +246 -277
  238. package/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +299 -319
  239. package/eigen/Eigen/src/Eigenvalues/ComplexSchur_LAPACKE.h +52 -48
  240. package/eigen/Eigen/src/Eigenvalues/EigenSolver.h +413 -456
  241. package/eigen/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h +309 -325
  242. package/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +157 -171
  243. package/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +292 -310
  244. package/eigen/Eigen/src/Eigenvalues/InternalHeaderCheck.h +3 -0
  245. package/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +89 -105
  246. package/eigen/Eigen/src/Eigenvalues/RealQZ.h +537 -607
  247. package/eigen/Eigen/src/Eigenvalues/RealSchur.h +342 -381
  248. package/eigen/Eigen/src/Eigenvalues/RealSchur_LAPACKE.h +41 -35
  249. package/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +541 -595
  250. package/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h +47 -44
  251. package/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +430 -462
  252. package/eigen/Eigen/src/Geometry/AlignedBox.h +226 -227
  253. package/eigen/Eigen/src/Geometry/AngleAxis.h +131 -133
  254. package/eigen/Eigen/src/Geometry/EulerAngles.h +163 -74
  255. package/eigen/Eigen/src/Geometry/Homogeneous.h +285 -333
  256. package/eigen/Eigen/src/Geometry/Hyperplane.h +151 -160
  257. package/eigen/Eigen/src/Geometry/InternalHeaderCheck.h +3 -0
  258. package/eigen/Eigen/src/Geometry/OrthoMethods.h +168 -146
  259. package/eigen/Eigen/src/Geometry/ParametrizedLine.h +127 -127
  260. package/eigen/Eigen/src/Geometry/Quaternion.h +566 -506
  261. package/eigen/Eigen/src/Geometry/Rotation2D.h +107 -105
  262. package/eigen/Eigen/src/Geometry/RotationBase.h +148 -145
  263. package/eigen/Eigen/src/Geometry/Scaling.h +113 -106
  264. package/eigen/Eigen/src/Geometry/Transform.h +858 -936
  265. package/eigen/Eigen/src/Geometry/Translation.h +94 -92
  266. package/eigen/Eigen/src/Geometry/Umeyama.h +79 -84
  267. package/eigen/Eigen/src/Geometry/arch/Geometry_SIMD.h +90 -104
  268. package/eigen/Eigen/src/Householder/BlockHouseholder.h +51 -46
  269. package/eigen/Eigen/src/Householder/Householder.h +102 -124
  270. package/eigen/Eigen/src/Householder/HouseholderSequence.h +412 -453
  271. package/eigen/Eigen/src/Householder/InternalHeaderCheck.h +3 -0
  272. package/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +149 -162
  273. package/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +124 -119
  274. package/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +92 -104
  275. package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +251 -243
  276. package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +224 -228
  277. package/eigen/Eigen/src/IterativeLinearSolvers/InternalHeaderCheck.h +3 -0
  278. package/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +178 -227
  279. package/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +79 -84
  280. package/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +54 -60
  281. package/eigen/Eigen/src/Jacobi/InternalHeaderCheck.h +3 -0
  282. package/eigen/Eigen/src/Jacobi/Jacobi.h +252 -308
  283. package/eigen/Eigen/src/KLUSupport/InternalHeaderCheck.h +3 -0
  284. package/eigen/Eigen/src/KLUSupport/KLUSupport.h +208 -227
  285. package/eigen/Eigen/src/LU/Determinant.h +50 -69
  286. package/eigen/Eigen/src/LU/FullPivLU.h +545 -596
  287. package/eigen/Eigen/src/LU/InternalHeaderCheck.h +3 -0
  288. package/eigen/Eigen/src/LU/InverseImpl.h +206 -285
  289. package/eigen/Eigen/src/LU/PartialPivLU.h +390 -428
  290. package/eigen/Eigen/src/LU/PartialPivLU_LAPACKE.h +54 -40
  291. package/eigen/Eigen/src/LU/arch/InverseSize4.h +72 -70
  292. package/eigen/Eigen/src/MetisSupport/InternalHeaderCheck.h +3 -0
  293. package/eigen/Eigen/src/MetisSupport/MetisSupport.h +81 -93
  294. package/eigen/Eigen/src/OrderingMethods/Amd.h +243 -265
  295. package/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +831 -1004
  296. package/eigen/Eigen/src/OrderingMethods/InternalHeaderCheck.h +3 -0
  297. package/eigen/Eigen/src/OrderingMethods/Ordering.h +112 -119
  298. package/eigen/Eigen/src/PaStiXSupport/InternalHeaderCheck.h +3 -0
  299. package/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +524 -570
  300. package/eigen/Eigen/src/PardisoSupport/InternalHeaderCheck.h +3 -0
  301. package/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +385 -430
  302. package/eigen/Eigen/src/QR/ColPivHouseholderQR.h +479 -479
  303. package/eigen/Eigen/src/QR/ColPivHouseholderQR_LAPACKE.h +120 -56
  304. package/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +166 -153
  305. package/eigen/Eigen/src/QR/FullPivHouseholderQR.h +495 -475
  306. package/eigen/Eigen/src/QR/HouseholderQR.h +394 -285
  307. package/eigen/Eigen/src/QR/HouseholderQR_LAPACKE.h +32 -23
  308. package/eigen/Eigen/src/QR/InternalHeaderCheck.h +3 -0
  309. package/eigen/Eigen/src/SPQRSupport/InternalHeaderCheck.h +3 -0
  310. package/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +244 -264
  311. package/eigen/Eigen/src/SVD/BDCSVD.h +817 -713
  312. package/eigen/Eigen/src/SVD/BDCSVD_LAPACKE.h +174 -0
  313. package/eigen/Eigen/src/SVD/InternalHeaderCheck.h +3 -0
  314. package/eigen/Eigen/src/SVD/JacobiSVD.h +577 -543
  315. package/eigen/Eigen/src/SVD/JacobiSVD_LAPACKE.h +85 -49
  316. package/eigen/Eigen/src/SVD/SVDBase.h +242 -182
  317. package/eigen/Eigen/src/SVD/UpperBidiagonalization.h +200 -235
  318. package/eigen/Eigen/src/SparseCholesky/InternalHeaderCheck.h +3 -0
  319. package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +765 -594
  320. package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +308 -94
  321. package/eigen/Eigen/src/SparseCore/AmbiVector.h +202 -251
  322. package/eigen/Eigen/src/SparseCore/CompressedStorage.h +184 -252
  323. package/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +134 -178
  324. package/eigen/Eigen/src/SparseCore/InternalHeaderCheck.h +3 -0
  325. package/eigen/Eigen/src/SparseCore/SparseAssign.h +149 -140
  326. package/eigen/Eigen/src/SparseCore/SparseBlock.h +403 -440
  327. package/eigen/Eigen/src/SparseCore/SparseColEtree.h +100 -112
  328. package/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +525 -303
  329. package/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +555 -339
  330. package/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +100 -108
  331. package/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +169 -197
  332. package/eigen/Eigen/src/SparseCore/SparseDiagonalProduct.h +71 -71
  333. package/eigen/Eigen/src/SparseCore/SparseDot.h +49 -47
  334. package/eigen/Eigen/src/SparseCore/SparseFuzzy.h +13 -11
  335. package/eigen/Eigen/src/SparseCore/SparseMap.h +243 -253
  336. package/eigen/Eigen/src/SparseCore/SparseMatrix.h +1603 -1245
  337. package/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +403 -350
  338. package/eigen/Eigen/src/SparseCore/SparsePermutation.h +186 -115
  339. package/eigen/Eigen/src/SparseCore/SparseProduct.h +94 -97
  340. package/eigen/Eigen/src/SparseCore/SparseRedux.h +22 -24
  341. package/eigen/Eigen/src/SparseCore/SparseRef.h +268 -295
  342. package/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +370 -416
  343. package/eigen/Eigen/src/SparseCore/SparseSolverBase.h +78 -87
  344. package/eigen/Eigen/src/SparseCore/SparseSparseProductWithPruning.h +81 -95
  345. package/eigen/Eigen/src/SparseCore/SparseTranspose.h +62 -71
  346. package/eigen/Eigen/src/SparseCore/SparseTriangularView.h +132 -144
  347. package/eigen/Eigen/src/SparseCore/SparseUtil.h +138 -115
  348. package/eigen/Eigen/src/SparseCore/SparseVector.h +426 -372
  349. package/eigen/Eigen/src/SparseCore/SparseView.h +164 -193
  350. package/eigen/Eigen/src/SparseCore/TriangularSolver.h +129 -170
  351. package/eigen/Eigen/src/SparseLU/InternalHeaderCheck.h +3 -0
  352. package/eigen/Eigen/src/SparseLU/SparseLU.h +756 -710
  353. package/eigen/Eigen/src/SparseLU/SparseLUImpl.h +61 -48
  354. package/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +102 -118
  355. package/eigen/Eigen/src/SparseLU/SparseLU_Structs.h +38 -35
  356. package/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +245 -301
  357. package/eigen/Eigen/src/SparseLU/SparseLU_Utils.h +44 -49
  358. package/eigen/Eigen/src/SparseLU/SparseLU_column_bmod.h +104 -108
  359. package/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +89 -100
  360. package/eigen/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h +57 -58
  361. package/eigen/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h +43 -55
  362. package/eigen/Eigen/src/SparseLU/SparseLU_kernel_bmod.h +74 -71
  363. package/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +124 -132
  364. package/eigen/Eigen/src/SparseLU/SparseLU_panel_dfs.h +136 -159
  365. package/eigen/Eigen/src/SparseLU/SparseLU_pivotL.h +51 -52
  366. package/eigen/Eigen/src/SparseLU/SparseLU_pruneL.h +67 -73
  367. package/eigen/Eigen/src/SparseLU/SparseLU_relax_snode.h +24 -26
  368. package/eigen/Eigen/src/SparseQR/InternalHeaderCheck.h +3 -0
  369. package/eigen/Eigen/src/SparseQR/SparseQR.h +450 -502
  370. package/eigen/Eigen/src/StlSupport/StdDeque.h +28 -93
  371. package/eigen/Eigen/src/StlSupport/StdList.h +28 -84
  372. package/eigen/Eigen/src/StlSupport/StdVector.h +28 -108
  373. package/eigen/Eigen/src/StlSupport/details.h +48 -50
  374. package/eigen/Eigen/src/SuperLUSupport/InternalHeaderCheck.h +3 -0
  375. package/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +634 -730
  376. package/eigen/Eigen/src/ThreadPool/Barrier.h +70 -0
  377. package/eigen/Eigen/src/ThreadPool/CoreThreadPoolDevice.h +336 -0
  378. package/eigen/Eigen/src/ThreadPool/EventCount.h +241 -0
  379. package/eigen/Eigen/src/ThreadPool/ForkJoin.h +140 -0
  380. package/eigen/Eigen/src/ThreadPool/InternalHeaderCheck.h +4 -0
  381. package/eigen/Eigen/src/ThreadPool/NonBlockingThreadPool.h +587 -0
  382. package/eigen/Eigen/src/ThreadPool/RunQueue.h +230 -0
  383. package/eigen/Eigen/src/ThreadPool/ThreadCancel.h +21 -0
  384. package/eigen/Eigen/src/ThreadPool/ThreadEnvironment.h +43 -0
  385. package/eigen/Eigen/src/ThreadPool/ThreadLocal.h +289 -0
  386. package/eigen/Eigen/src/ThreadPool/ThreadPoolInterface.h +50 -0
  387. package/eigen/Eigen/src/ThreadPool/ThreadYield.h +16 -0
  388. package/eigen/Eigen/src/UmfPackSupport/InternalHeaderCheck.h +3 -0
  389. package/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +428 -464
  390. package/eigen/Eigen/src/misc/Image.h +41 -43
  391. package/eigen/Eigen/src/misc/InternalHeaderCheck.h +3 -0
  392. package/eigen/Eigen/src/misc/Kernel.h +39 -41
  393. package/eigen/Eigen/src/misc/RealSvd2x2.h +19 -21
  394. package/eigen/Eigen/src/misc/blas.h +83 -426
  395. package/eigen/Eigen/src/misc/lapacke.h +9972 -16179
  396. package/eigen/Eigen/src/misc/lapacke_helpers.h +163 -0
  397. package/eigen/Eigen/src/misc/lapacke_mangling.h +4 -5
  398. package/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.inc +344 -0
  399. package/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.inc +544 -0
  400. package/eigen/Eigen/src/plugins/{BlockMethods.h → BlockMethods.inc} +434 -506
  401. package/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.inc +116 -0
  402. package/eigen/Eigen/src/plugins/{CommonCwiseUnaryOps.h → CommonCwiseUnaryOps.inc} +58 -68
  403. package/eigen/Eigen/src/plugins/IndexedViewMethods.inc +192 -0
  404. package/eigen/Eigen/src/plugins/InternalHeaderCheck.inc +3 -0
  405. package/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.inc +331 -0
  406. package/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.inc +118 -0
  407. package/eigen/Eigen/src/plugins/ReshapedMethods.inc +133 -0
  408. package/package.json +1 -1
  409. package/eigen/COPYING.APACHE +0 -203
  410. package/eigen/COPYING.BSD +0 -26
  411. package/eigen/COPYING.GPL +0 -674
  412. package/eigen/COPYING.LGPL +0 -502
  413. package/eigen/COPYING.MINPACK +0 -51
  414. package/eigen/COPYING.MPL2 +0 -373
  415. package/eigen/COPYING.README +0 -18
  416. package/eigen/Eigen/src/Core/BooleanRedux.h +0 -162
  417. package/eigen/Eigen/src/Core/arch/CUDA/Complex.h +0 -258
  418. package/eigen/Eigen/src/Core/arch/Default/TypeCasting.h +0 -120
  419. package/eigen/Eigen/src/Core/arch/SYCL/SyclMemoryModel.h +0 -694
  420. package/eigen/Eigen/src/Core/util/NonMPL2.h +0 -3
  421. package/eigen/Eigen/src/SparseCore/MappedSparseMatrix.h +0 -67
  422. package/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +0 -280
  423. package/eigen/Eigen/src/misc/lapack.h +0 -152
  424. package/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +0 -358
  425. package/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +0 -696
  426. package/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.h +0 -115
  427. package/eigen/Eigen/src/plugins/IndexedViewMethods.h +0 -262
  428. package/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +0 -152
  429. package/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +0 -95
  430. package/eigen/Eigen/src/plugins/ReshapedMethods.h +0 -149
  431. package/eigen/README.md +0 -5
@@ -12,6 +12,9 @@
12
12
  #ifndef EIGEN_ASSIGN_EVALUATOR_H
13
13
  #define EIGEN_ASSIGN_EVALUATOR_H
14
14
 
15
+ // IWYU pragma: private
16
+ #include "./InternalHeaderCheck.h"
17
+
15
18
  namespace Eigen {
16
19
 
17
20
  // This implementation is based on Assign.h
@@ -19,151 +22,139 @@ namespace Eigen {
19
22
  namespace internal {
20
23
 
21
24
  /***************************************************************************
22
- * Part 1 : the logic deciding a strategy for traversal and unrolling *
23
- ***************************************************************************/
25
+ * Part 1 : the logic deciding a strategy for traversal and unrolling *
26
+ ***************************************************************************/
24
27
 
25
28
  // copy_using_evaluator_traits is based on assign_traits
26
29
 
27
- template <typename DstEvaluator, typename SrcEvaluator, typename AssignFunc, int MaxPacketSize = -1>
28
- struct copy_using_evaluator_traits
29
- {
30
- typedef typename DstEvaluator::XprType Dst;
31
- typedef typename Dst::Scalar DstScalar;
32
-
33
- enum {
34
- DstFlags = DstEvaluator::Flags,
35
- SrcFlags = SrcEvaluator::Flags
36
- };
30
+ template <typename DstEvaluator, typename SrcEvaluator, typename AssignFunc, int MaxPacketSize = Dynamic>
31
+ struct copy_using_evaluator_traits {
32
+ using Src = typename SrcEvaluator::XprType;
33
+ using Dst = typename DstEvaluator::XprType;
34
+ using DstScalar = typename Dst::Scalar;
37
35
 
38
- public:
39
- enum {
40
- DstAlignment = DstEvaluator::Alignment,
41
- SrcAlignment = SrcEvaluator::Alignment,
42
- DstHasDirectAccess = (DstFlags & DirectAccessBit) == DirectAccessBit,
43
- JointAlignment = EIGEN_PLAIN_ENUM_MIN(DstAlignment,SrcAlignment)
44
- };
36
+ static constexpr int DstFlags = DstEvaluator::Flags;
37
+ static constexpr int SrcFlags = SrcEvaluator::Flags;
45
38
 
46
- private:
47
- enum {
48
- InnerSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::SizeAtCompileTime)
49
- : int(DstFlags)&RowMajorBit ? int(Dst::ColsAtCompileTime)
50
- : int(Dst::RowsAtCompileTime),
51
- InnerMaxSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::MaxSizeAtCompileTime)
52
- : int(DstFlags)&RowMajorBit ? int(Dst::MaxColsAtCompileTime)
53
- : int(Dst::MaxRowsAtCompileTime),
54
- RestrictedInnerSize = EIGEN_SIZE_MIN_PREFER_FIXED(InnerSize,MaxPacketSize),
55
- RestrictedLinearSize = EIGEN_SIZE_MIN_PREFER_FIXED(Dst::SizeAtCompileTime,MaxPacketSize),
56
- OuterStride = int(outer_stride_at_compile_time<Dst>::ret),
57
- MaxSizeAtCompileTime = Dst::SizeAtCompileTime
58
- };
39
+ public:
40
+ static constexpr int DstAlignment = DstEvaluator::Alignment;
41
+ static constexpr int SrcAlignment = SrcEvaluator::Alignment;
42
+ static constexpr int JointAlignment = plain_enum_min(DstAlignment, SrcAlignment);
43
+ static constexpr bool DstHasDirectAccess = bool(DstFlags & DirectAccessBit);
44
+ static constexpr bool SrcIsRowMajor = bool(SrcFlags & RowMajorBit);
45
+ static constexpr bool DstIsRowMajor = bool(DstFlags & RowMajorBit);
46
+ static constexpr bool IsVectorAtCompileTime = Dst::IsVectorAtCompileTime;
47
+ static constexpr int RowsAtCompileTime = size_prefer_fixed(Src::RowsAtCompileTime, Dst::RowsAtCompileTime);
48
+ static constexpr int ColsAtCompileTime = size_prefer_fixed(Src::ColsAtCompileTime, Dst::ColsAtCompileTime);
49
+ static constexpr int SizeAtCompileTime = size_at_compile_time(RowsAtCompileTime, ColsAtCompileTime);
50
+ static constexpr int MaxRowsAtCompileTime =
51
+ min_size_prefer_fixed(Src::MaxRowsAtCompileTime, Dst::MaxRowsAtCompileTime);
52
+ static constexpr int MaxColsAtCompileTime =
53
+ min_size_prefer_fixed(Src::MaxColsAtCompileTime, Dst::MaxColsAtCompileTime);
54
+ static constexpr int MaxSizeAtCompileTime =
55
+ min_size_prefer_fixed(Src::MaxSizeAtCompileTime, Dst::MaxSizeAtCompileTime);
56
+ static constexpr int InnerSizeAtCompileTime = IsVectorAtCompileTime ? SizeAtCompileTime
57
+ : DstIsRowMajor ? ColsAtCompileTime
58
+ : RowsAtCompileTime;
59
+ static constexpr int MaxInnerSizeAtCompileTime = IsVectorAtCompileTime ? MaxSizeAtCompileTime
60
+ : DstIsRowMajor ? MaxColsAtCompileTime
61
+ : MaxRowsAtCompileTime;
62
+ static constexpr int RestrictedInnerSize = min_size_prefer_fixed(MaxInnerSizeAtCompileTime, MaxPacketSize);
63
+ static constexpr int RestrictedLinearSize = min_size_prefer_fixed(MaxSizeAtCompileTime, MaxPacketSize);
64
+ static constexpr int OuterStride = outer_stride_at_compile_time<Dst>::ret;
59
65
 
60
66
  // TODO distinguish between linear traversal and inner-traversals
61
- typedef typename find_best_packet<DstScalar,RestrictedLinearSize>::type LinearPacketType;
62
- typedef typename find_best_packet<DstScalar,RestrictedInnerSize>::type InnerPacketType;
63
-
64
- enum {
65
- LinearPacketSize = unpacket_traits<LinearPacketType>::size,
66
- InnerPacketSize = unpacket_traits<InnerPacketType>::size
67
- };
67
+ using LinearPacketType = typename find_best_packet<DstScalar, RestrictedLinearSize>::type;
68
+ using InnerPacketType = typename find_best_packet<DstScalar, RestrictedInnerSize>::type;
68
69
 
69
- public:
70
- enum {
71
- LinearRequiredAlignment = unpacket_traits<LinearPacketType>::alignment,
72
- InnerRequiredAlignment = unpacket_traits<InnerPacketType>::alignment
73
- };
70
+ static constexpr int LinearPacketSize = unpacket_traits<LinearPacketType>::size;
71
+ static constexpr int InnerPacketSize = unpacket_traits<InnerPacketType>::size;
74
72
 
75
- private:
76
- enum {
77
- DstIsRowMajor = DstFlags&RowMajorBit,
78
- SrcIsRowMajor = SrcFlags&RowMajorBit,
79
- StorageOrdersAgree = (int(DstIsRowMajor) == int(SrcIsRowMajor)),
80
- MightVectorize = bool(StorageOrdersAgree)
81
- && (int(DstFlags) & int(SrcFlags) & ActualPacketAccessBit)
82
- && bool(functor_traits<AssignFunc>::PacketAccess),
83
- MayInnerVectorize = MightVectorize
84
- && int(InnerSize)!=Dynamic && int(InnerSize)%int(InnerPacketSize)==0
85
- && int(OuterStride)!=Dynamic && int(OuterStride)%int(InnerPacketSize)==0
86
- && (EIGEN_UNALIGNED_VECTORIZE || int(JointAlignment)>=int(InnerRequiredAlignment)),
87
- MayLinearize = bool(StorageOrdersAgree) && (int(DstFlags) & int(SrcFlags) & LinearAccessBit),
88
- MayLinearVectorize = bool(MightVectorize) && bool(MayLinearize) && bool(DstHasDirectAccess)
89
- && (EIGEN_UNALIGNED_VECTORIZE || (int(DstAlignment)>=int(LinearRequiredAlignment)) || MaxSizeAtCompileTime == Dynamic),
90
- /* If the destination isn't aligned, we have to do runtime checks and we don't unroll,
91
- so it's only good for large enough sizes. */
92
- MaySliceVectorize = bool(MightVectorize) && bool(DstHasDirectAccess)
93
- && (int(InnerMaxSize)==Dynamic || int(InnerMaxSize)>=(EIGEN_UNALIGNED_VECTORIZE?InnerPacketSize:(3*InnerPacketSize)))
94
- /* slice vectorization can be slow, so we only want it if the slices are big, which is
95
- indicated by InnerMaxSize rather than InnerSize, think of the case of a dynamic block
96
- in a fixed-size matrix
97
- However, with EIGEN_UNALIGNED_VECTORIZE and unrolling, slice vectorization is still worth it */
98
- };
99
-
100
- public:
101
- enum {
102
- Traversal = int(Dst::SizeAtCompileTime) == 0 ? int(AllAtOnceTraversal) // If compile-size is zero, traversing will fail at compile-time.
103
- : (int(MayLinearVectorize) && (LinearPacketSize>InnerPacketSize)) ? int(LinearVectorizedTraversal)
104
- : int(MayInnerVectorize) ? int(InnerVectorizedTraversal)
105
- : int(MayLinearVectorize) ? int(LinearVectorizedTraversal)
106
- : int(MaySliceVectorize) ? int(SliceVectorizedTraversal)
107
- : int(MayLinearize) ? int(LinearTraversal)
108
- : int(DefaultTraversal),
109
- Vectorized = int(Traversal) == InnerVectorizedTraversal
110
- || int(Traversal) == LinearVectorizedTraversal
111
- || int(Traversal) == SliceVectorizedTraversal
112
- };
113
-
114
- typedef typename conditional<int(Traversal)==LinearVectorizedTraversal, LinearPacketType, InnerPacketType>::type PacketType;
73
+ public:
74
+ static constexpr int LinearRequiredAlignment = unpacket_traits<LinearPacketType>::alignment;
75
+ static constexpr int InnerRequiredAlignment = unpacket_traits<InnerPacketType>::alignment;
76
+
77
+ private:
78
+ static constexpr bool StorageOrdersAgree = DstIsRowMajor == SrcIsRowMajor;
79
+ static constexpr bool MightVectorize = StorageOrdersAgree && bool(DstFlags & SrcFlags & ActualPacketAccessBit) &&
80
+ bool(functor_traits<AssignFunc>::PacketAccess);
81
+ static constexpr bool MayInnerVectorize = MightVectorize && (InnerSizeAtCompileTime != Dynamic) &&
82
+ (InnerSizeAtCompileTime % InnerPacketSize == 0) &&
83
+ (OuterStride != Dynamic) && (OuterStride % InnerPacketSize == 0) &&
84
+ (EIGEN_UNALIGNED_VECTORIZE || JointAlignment >= InnerRequiredAlignment);
85
+ static constexpr bool MayLinearize = StorageOrdersAgree && (DstFlags & SrcFlags & LinearAccessBit);
86
+ static constexpr bool MayLinearVectorize =
87
+ MightVectorize && MayLinearize && DstHasDirectAccess &&
88
+ (EIGEN_UNALIGNED_VECTORIZE || (DstAlignment >= LinearRequiredAlignment) || MaxSizeAtCompileTime == Dynamic) &&
89
+ (MaxSizeAtCompileTime == Dynamic || MaxSizeAtCompileTime >= LinearPacketSize);
90
+ /* If the destination isn't aligned, we have to do runtime checks and we don't unroll,
91
+ so it's only good for large enough sizes. */
92
+ static constexpr int InnerSizeThreshold = (EIGEN_UNALIGNED_VECTORIZE ? 1 : 3) * InnerPacketSize;
93
+ static constexpr bool MaySliceVectorize =
94
+ MightVectorize && DstHasDirectAccess &&
95
+ (MaxInnerSizeAtCompileTime == Dynamic || MaxInnerSizeAtCompileTime >= InnerSizeThreshold);
96
+ /* slice vectorization can be slow, so we only want it if the slices are big, which is
97
+ indicated by InnerMaxSize rather than InnerSize, think of the case of a dynamic block
98
+ in a fixed-size matrix
99
+ However, with EIGEN_UNALIGNED_VECTORIZE and unrolling, slice vectorization is still worth it */
115
100
 
116
- private:
117
- enum {
118
- ActualPacketSize = int(Traversal)==LinearVectorizedTraversal ? LinearPacketSize
119
- : Vectorized ? InnerPacketSize
120
- : 1,
121
- UnrollingLimit = EIGEN_UNROLLING_LIMIT * ActualPacketSize,
122
- MayUnrollCompletely = int(Dst::SizeAtCompileTime) != Dynamic
123
- && int(Dst::SizeAtCompileTime) * (int(DstEvaluator::CoeffReadCost)+int(SrcEvaluator::CoeffReadCost)) <= int(UnrollingLimit),
124
- MayUnrollInner = int(InnerSize) != Dynamic
125
- && int(InnerSize) * (int(DstEvaluator::CoeffReadCost)+int(SrcEvaluator::CoeffReadCost)) <= int(UnrollingLimit)
126
- };
101
+ public:
102
+ static constexpr int Traversal = SizeAtCompileTime == 0 ? AllAtOnceTraversal
103
+ : (MayLinearVectorize && (LinearPacketSize > InnerPacketSize))
104
+ ? LinearVectorizedTraversal
105
+ : MayInnerVectorize ? InnerVectorizedTraversal
106
+ : MayLinearVectorize ? LinearVectorizedTraversal
107
+ : MaySliceVectorize ? SliceVectorizedTraversal
108
+ : MayLinearize ? LinearTraversal
109
+ : DefaultTraversal;
110
+ static constexpr bool Vectorized = Traversal == InnerVectorizedTraversal || Traversal == LinearVectorizedTraversal ||
111
+ Traversal == SliceVectorizedTraversal;
112
+
113
+ using PacketType = std::conditional_t<Traversal == LinearVectorizedTraversal, LinearPacketType, InnerPacketType>;
114
+
115
+ private:
116
+ static constexpr int ActualPacketSize = Vectorized ? unpacket_traits<PacketType>::size : 1;
117
+ static constexpr int UnrollingLimit = EIGEN_UNROLLING_LIMIT * ActualPacketSize;
118
+ static constexpr int CoeffReadCost = int(DstEvaluator::CoeffReadCost) + int(SrcEvaluator::CoeffReadCost);
119
+ static constexpr bool MayUnrollCompletely =
120
+ (SizeAtCompileTime != Dynamic) && (SizeAtCompileTime * CoeffReadCost <= UnrollingLimit);
121
+ static constexpr bool MayUnrollInner =
122
+ (InnerSizeAtCompileTime != Dynamic) && (InnerSizeAtCompileTime * CoeffReadCost <= UnrollingLimit);
127
123
 
128
- public:
129
- enum {
130
- Unrolling = (int(Traversal) == int(InnerVectorizedTraversal) || int(Traversal) == int(DefaultTraversal))
131
- ? (
132
- int(MayUnrollCompletely) ? int(CompleteUnrolling)
133
- : int(MayUnrollInner) ? int(InnerUnrolling)
134
- : int(NoUnrolling)
135
- )
136
- : int(Traversal) == int(LinearVectorizedTraversal)
137
- ? ( bool(MayUnrollCompletely) && ( EIGEN_UNALIGNED_VECTORIZE || (int(DstAlignment)>=int(LinearRequiredAlignment)))
138
- ? int(CompleteUnrolling)
139
- : int(NoUnrolling) )
140
- : int(Traversal) == int(LinearTraversal)
141
- ? ( bool(MayUnrollCompletely) ? int(CompleteUnrolling)
142
- : int(NoUnrolling) )
124
+ public:
125
+ static constexpr int Unrolling =
126
+ (Traversal == InnerVectorizedTraversal || Traversal == DefaultTraversal)
127
+ ? (MayUnrollCompletely ? CompleteUnrolling
128
+ : MayUnrollInner ? InnerUnrolling
129
+ : NoUnrolling)
130
+ : Traversal == LinearVectorizedTraversal
131
+ ? (MayUnrollCompletely && (EIGEN_UNALIGNED_VECTORIZE || (DstAlignment >= LinearRequiredAlignment))
132
+ ? CompleteUnrolling
133
+ : NoUnrolling)
134
+ : Traversal == LinearTraversal ? (MayUnrollCompletely ? CompleteUnrolling : NoUnrolling)
143
135
  #if EIGEN_UNALIGNED_VECTORIZE
144
- : int(Traversal) == int(SliceVectorizedTraversal)
145
- ? ( bool(MayUnrollInner) ? int(InnerUnrolling)
146
- : int(NoUnrolling) )
136
+ : Traversal == SliceVectorizedTraversal ? (MayUnrollInner ? InnerUnrolling : NoUnrolling)
147
137
  #endif
148
- : int(NoUnrolling)
149
- };
138
+ : NoUnrolling;
139
+ static constexpr bool UsePacketSegment = has_packet_segment<PacketType>::value;
150
140
 
151
141
  #ifdef EIGEN_DEBUG_ASSIGN
152
- static void debug()
153
- {
142
+ static void debug() {
154
143
  std::cerr << "DstXpr: " << typeid(typename DstEvaluator::XprType).name() << std::endl;
155
144
  std::cerr << "SrcXpr: " << typeid(typename SrcEvaluator::XprType).name() << std::endl;
156
145
  std::cerr.setf(std::ios::hex, std::ios::basefield);
157
- std::cerr << "DstFlags" << " = " << DstFlags << " (" << demangle_flags(DstFlags) << " )" << std::endl;
158
- std::cerr << "SrcFlags" << " = " << SrcFlags << " (" << demangle_flags(SrcFlags) << " )" << std::endl;
146
+ std::cerr << "DstFlags"
147
+ << " = " << DstFlags << " (" << demangle_flags(DstFlags) << " )" << std::endl;
148
+ std::cerr << "SrcFlags"
149
+ << " = " << SrcFlags << " (" << demangle_flags(SrcFlags) << " )" << std::endl;
159
150
  std::cerr.unsetf(std::ios::hex);
160
151
  EIGEN_DEBUG_VAR(DstAlignment)
161
152
  EIGEN_DEBUG_VAR(SrcAlignment)
162
153
  EIGEN_DEBUG_VAR(LinearRequiredAlignment)
163
154
  EIGEN_DEBUG_VAR(InnerRequiredAlignment)
164
155
  EIGEN_DEBUG_VAR(JointAlignment)
165
- EIGEN_DEBUG_VAR(InnerSize)
166
- EIGEN_DEBUG_VAR(InnerMaxSize)
156
+ EIGEN_DEBUG_VAR(InnerSizeAtCompileTime)
157
+ EIGEN_DEBUG_VAR(MaxInnerSizeAtCompileTime)
167
158
  EIGEN_DEBUG_VAR(LinearPacketSize)
168
159
  EIGEN_DEBUG_VAR(InnerPacketSize)
169
160
  EIGEN_DEBUG_VAR(ActualPacketSize)
@@ -173,163 +164,178 @@ public:
173
164
  EIGEN_DEBUG_VAR(MayInnerVectorize)
174
165
  EIGEN_DEBUG_VAR(MayLinearVectorize)
175
166
  EIGEN_DEBUG_VAR(MaySliceVectorize)
176
- std::cerr << "Traversal" << " = " << Traversal << " (" << demangle_traversal(Traversal) << ")" << std::endl;
167
+ std::cerr << "Traversal"
168
+ << " = " << Traversal << " (" << demangle_traversal(Traversal) << ")" << std::endl;
177
169
  EIGEN_DEBUG_VAR(SrcEvaluator::CoeffReadCost)
178
170
  EIGEN_DEBUG_VAR(DstEvaluator::CoeffReadCost)
179
171
  EIGEN_DEBUG_VAR(Dst::SizeAtCompileTime)
180
172
  EIGEN_DEBUG_VAR(UnrollingLimit)
181
173
  EIGEN_DEBUG_VAR(MayUnrollCompletely)
182
174
  EIGEN_DEBUG_VAR(MayUnrollInner)
183
- std::cerr << "Unrolling" << " = " << Unrolling << " (" << demangle_unrolling(Unrolling) << ")" << std::endl;
175
+ std::cerr << "Unrolling"
176
+ << " = " << Unrolling << " (" << demangle_unrolling(Unrolling) << ")" << std::endl;
184
177
  std::cerr << std::endl;
185
178
  }
186
179
  #endif
187
180
  };
188
181
 
189
182
  /***************************************************************************
190
- * Part 2 : meta-unrollers
191
- ***************************************************************************/
183
+ * Part 2 : meta-unrollers
184
+ ***************************************************************************/
192
185
 
193
186
  /************************
194
187
  *** Default traversal ***
195
188
  ************************/
196
189
 
197
- template<typename Kernel, int Index, int Stop>
198
- struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling
199
- {
200
- // FIXME: this is not very clean, perhaps this information should be provided by the kernel?
201
- typedef typename Kernel::DstEvaluatorType DstEvaluatorType;
202
- typedef typename DstEvaluatorType::XprType DstXprType;
190
+ template <typename Kernel, int Index_, int Stop>
191
+ struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling {
192
+ static constexpr int Outer = Index_ / Kernel::AssignmentTraits::InnerSizeAtCompileTime;
193
+ static constexpr int Inner = Index_ % Kernel::AssignmentTraits::InnerSizeAtCompileTime;
203
194
 
204
- enum {
205
- outer = Index / DstXprType::InnerSizeAtCompileTime,
206
- inner = Index % DstXprType::InnerSizeAtCompileTime
207
- };
208
-
209
- EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
210
- {
211
- kernel.assignCoeffByOuterInner(outer, inner);
212
- copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, Index+1, Stop>::run(kernel);
195
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel) {
196
+ kernel.assignCoeffByOuterInner(Outer, Inner);
197
+ copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, Index_ + 1, Stop>::run(kernel);
213
198
  }
214
199
  };
215
200
 
216
- template<typename Kernel, int Stop>
217
- struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, Stop, Stop>
218
- {
219
- EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { }
201
+ template <typename Kernel, int Stop>
202
+ struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, Stop, Stop> {
203
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel&) {}
220
204
  };
221
205
 
222
- template<typename Kernel, int Index_, int Stop>
223
- struct copy_using_evaluator_DefaultTraversal_InnerUnrolling
224
- {
225
- EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel, Index outer)
226
- {
206
+ template <typename Kernel, int Index_, int Stop>
207
+ struct copy_using_evaluator_DefaultTraversal_InnerUnrolling {
208
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel, Index outer) {
227
209
  kernel.assignCoeffByOuterInner(outer, Index_);
228
- copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, Index_+1, Stop>::run(kernel, outer);
210
+ copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, Index_ + 1, Stop>::run(kernel, outer);
229
211
  }
230
212
  };
231
213
 
232
- template<typename Kernel, int Stop>
233
- struct copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, Stop, Stop>
234
- {
235
- EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&, Index) { }
214
+ template <typename Kernel, int Stop>
215
+ struct copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, Stop, Stop> {
216
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel&, Index) {}
236
217
  };
237
218
 
238
219
  /***********************
239
220
  *** Linear traversal ***
240
221
  ***********************/
241
222
 
242
- template<typename Kernel, int Index, int Stop>
243
- struct copy_using_evaluator_LinearTraversal_CompleteUnrolling
244
- {
245
- EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel)
246
- {
247
- kernel.assignCoeff(Index);
248
- copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, Index+1, Stop>::run(kernel);
223
+ template <typename Kernel, int Index_, int Stop>
224
+ struct copy_using_evaluator_LinearTraversal_CompleteUnrolling {
225
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel) {
226
+ kernel.assignCoeff(Index_);
227
+ copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, Index_ + 1, Stop>::run(kernel);
249
228
  }
250
229
  };
251
230
 
252
- template<typename Kernel, int Stop>
253
- struct copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, Stop, Stop>
254
- {
255
- EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { }
231
+ template <typename Kernel, int Stop>
232
+ struct copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, Stop, Stop> {
233
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel&) {}
256
234
  };
257
235
 
258
236
  /**************************
259
237
  *** Inner vectorization ***
260
238
  **************************/
261
239
 
262
- template<typename Kernel, int Index, int Stop>
263
- struct copy_using_evaluator_innervec_CompleteUnrolling
264
- {
265
- // FIXME: this is not very clean, perhaps this information should be provided by the kernel?
266
- typedef typename Kernel::DstEvaluatorType DstEvaluatorType;
267
- typedef typename DstEvaluatorType::XprType DstXprType;
268
- typedef typename Kernel::PacketType PacketType;
269
-
270
- enum {
271
- outer = Index / DstXprType::InnerSizeAtCompileTime,
272
- inner = Index % DstXprType::InnerSizeAtCompileTime,
273
- SrcAlignment = Kernel::AssignmentTraits::SrcAlignment,
274
- DstAlignment = Kernel::AssignmentTraits::DstAlignment
275
- };
276
-
277
- EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
278
- {
279
- kernel.template assignPacketByOuterInner<DstAlignment, SrcAlignment, PacketType>(outer, inner);
280
- enum { NextIndex = Index + unpacket_traits<PacketType>::size };
240
+ template <typename Kernel, int Index_, int Stop>
241
+ struct copy_using_evaluator_innervec_CompleteUnrolling {
242
+ using PacketType = typename Kernel::PacketType;
243
+ static constexpr int Outer = Index_ / Kernel::AssignmentTraits::InnerSizeAtCompileTime;
244
+ static constexpr int Inner = Index_ % Kernel::AssignmentTraits::InnerSizeAtCompileTime;
245
+ static constexpr int NextIndex = Index_ + unpacket_traits<PacketType>::size;
246
+ static constexpr int SrcAlignment = Kernel::AssignmentTraits::SrcAlignment;
247
+ static constexpr int DstAlignment = Kernel::AssignmentTraits::DstAlignment;
248
+
249
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel) {
250
+ kernel.template assignPacketByOuterInner<DstAlignment, SrcAlignment, PacketType>(Outer, Inner);
281
251
  copy_using_evaluator_innervec_CompleteUnrolling<Kernel, NextIndex, Stop>::run(kernel);
282
252
  }
283
253
  };
284
254
 
285
- template<typename Kernel, int Stop>
286
- struct copy_using_evaluator_innervec_CompleteUnrolling<Kernel, Stop, Stop>
287
- {
288
- EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { }
255
+ template <typename Kernel, int Stop>
256
+ struct copy_using_evaluator_innervec_CompleteUnrolling<Kernel, Stop, Stop> {
257
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel&) {}
289
258
  };
290
259
 
291
- template<typename Kernel, int Index_, int Stop, int SrcAlignment, int DstAlignment>
292
- struct copy_using_evaluator_innervec_InnerUnrolling
293
- {
294
- typedef typename Kernel::PacketType PacketType;
295
- EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel, Index outer)
296
- {
260
+ template <typename Kernel, int Index_, int Stop, int SrcAlignment, int DstAlignment>
261
+ struct copy_using_evaluator_innervec_InnerUnrolling {
262
+ using PacketType = typename Kernel::PacketType;
263
+ static constexpr int NextIndex = Index_ + unpacket_traits<PacketType>::size;
264
+
265
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel, Index outer) {
297
266
  kernel.template assignPacketByOuterInner<DstAlignment, SrcAlignment, PacketType>(outer, Index_);
298
- enum { NextIndex = Index_ + unpacket_traits<PacketType>::size };
299
- copy_using_evaluator_innervec_InnerUnrolling<Kernel, NextIndex, Stop, SrcAlignment, DstAlignment>::run(kernel, outer);
267
+ copy_using_evaluator_innervec_InnerUnrolling<Kernel, NextIndex, Stop, SrcAlignment, DstAlignment>::run(kernel,
268
+ outer);
269
+ }
270
+ };
271
+
272
+ template <typename Kernel, int Stop, int SrcAlignment, int DstAlignment>
273
+ struct copy_using_evaluator_innervec_InnerUnrolling<Kernel, Stop, Stop, SrcAlignment, DstAlignment> {
274
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel&, Index) {}
275
+ };
276
+
277
+ template <typename Kernel, int Start, int Stop, int SrcAlignment, int DstAlignment, bool UsePacketSegment>
278
+ struct copy_using_evaluator_innervec_segment {
279
+ using PacketType = typename Kernel::PacketType;
280
+
281
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel, Index outer) {
282
+ kernel.template assignPacketSegmentByOuterInner<DstAlignment, SrcAlignment, PacketType>(outer, Start, 0,
283
+ Stop - Start);
300
284
  }
301
285
  };
302
286
 
303
- template<typename Kernel, int Stop, int SrcAlignment, int DstAlignment>
304
- struct copy_using_evaluator_innervec_InnerUnrolling<Kernel, Stop, Stop, SrcAlignment, DstAlignment>
305
- {
306
- EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &, Index) { }
287
+ template <typename Kernel, int Start, int Stop, int SrcAlignment, int DstAlignment>
288
+ struct copy_using_evaluator_innervec_segment<Kernel, Start, Stop, SrcAlignment, DstAlignment,
289
+ /*UsePacketSegment*/ false>
290
+ : copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, Start, Stop> {};
291
+
292
+ template <typename Kernel, int Stop, int SrcAlignment, int DstAlignment>
293
+ struct copy_using_evaluator_innervec_segment<Kernel, Stop, Stop, SrcAlignment, DstAlignment,
294
+ /*UsePacketSegment*/ true> {
295
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel&, Index) {}
296
+ };
297
+
298
+ template <typename Kernel, int Stop, int SrcAlignment, int DstAlignment>
299
+ struct copy_using_evaluator_innervec_segment<Kernel, Stop, Stop, SrcAlignment, DstAlignment,
300
+ /*UsePacketSegment*/ false> {
301
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel&, Index) {}
307
302
  };
308
303
 
309
304
  /***************************************************************************
310
- * Part 3 : implementation of all cases
311
- ***************************************************************************/
305
+ * Part 3 : implementation of all cases
306
+ ***************************************************************************/
312
307
 
313
308
  // dense_assignment_loop is based on assign_impl
314
309
 
315
- template<typename Kernel,
316
- int Traversal = Kernel::AssignmentTraits::Traversal,
317
- int Unrolling = Kernel::AssignmentTraits::Unrolling>
318
- struct dense_assignment_loop;
310
+ template <typename Kernel, int Traversal = Kernel::AssignmentTraits::Traversal,
311
+ int Unrolling = Kernel::AssignmentTraits::Unrolling>
312
+ struct dense_assignment_loop_impl;
313
+
314
+ template <typename Kernel, int Traversal = Kernel::AssignmentTraits::Traversal,
315
+ int Unrolling = Kernel::AssignmentTraits::Unrolling>
316
+ struct dense_assignment_loop {
317
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel) {
318
+ #ifdef __cpp_lib_is_constant_evaluated
319
+ if (internal::is_constant_evaluated())
320
+ dense_assignment_loop_impl<Kernel, Traversal == AllAtOnceTraversal ? AllAtOnceTraversal : DefaultTraversal,
321
+ NoUnrolling>::run(kernel);
322
+ else
323
+ #endif
324
+ dense_assignment_loop_impl<Kernel, Traversal, Unrolling>::run(kernel);
325
+ }
326
+ };
319
327
 
320
328
  /************************
321
329
  ***** Special Cases *****
322
330
  ************************/
323
331
 
324
332
  // Zero-sized assignment is a no-op.
325
- template<typename Kernel, int Unrolling>
326
- struct dense_assignment_loop<Kernel, AllAtOnceTraversal, Unrolling>
327
- {
328
- EIGEN_DEVICE_FUNC static void EIGEN_STRONG_INLINE run(Kernel& /*kernel*/)
329
- {
330
- typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
331
- EIGEN_STATIC_ASSERT(int(DstXprType::SizeAtCompileTime) == 0,
332
- EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT)
333
+ template <typename Kernel, int Unrolling>
334
+ struct dense_assignment_loop_impl<Kernel, AllAtOnceTraversal, Unrolling> {
335
+ static constexpr int SizeAtCompileTime = Kernel::AssignmentTraits::SizeAtCompileTime;
336
+
337
+ EIGEN_DEVICE_FUNC static void EIGEN_STRONG_INLINE constexpr run(Kernel& /*kernel*/) {
338
+ EIGEN_STATIC_ASSERT(SizeAtCompileTime == 0, EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT)
333
339
  }
334
340
  };
335
341
 
@@ -337,39 +343,34 @@ struct dense_assignment_loop<Kernel, AllAtOnceTraversal, Unrolling>
337
343
  *** Default traversal ***
338
344
  ************************/
339
345
 
340
- template<typename Kernel>
341
- struct dense_assignment_loop<Kernel, DefaultTraversal, NoUnrolling>
342
- {
343
- EIGEN_DEVICE_FUNC static void EIGEN_STRONG_INLINE run(Kernel &kernel)
344
- {
345
- for(Index outer = 0; outer < kernel.outerSize(); ++outer) {
346
- for(Index inner = 0; inner < kernel.innerSize(); ++inner) {
346
+ template <typename Kernel>
347
+ struct dense_assignment_loop_impl<Kernel, DefaultTraversal, NoUnrolling> {
348
+ EIGEN_DEVICE_FUNC static void EIGEN_STRONG_INLINE constexpr run(Kernel& kernel) {
349
+ for (Index outer = 0; outer < kernel.outerSize(); ++outer) {
350
+ for (Index inner = 0; inner < kernel.innerSize(); ++inner) {
347
351
  kernel.assignCoeffByOuterInner(outer, inner);
348
352
  }
349
353
  }
350
354
  }
351
355
  };
352
356
 
353
- template<typename Kernel>
354
- struct dense_assignment_loop<Kernel, DefaultTraversal, CompleteUnrolling>
355
- {
356
- EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
357
- {
358
- typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
359
- copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, 0, DstXprType::SizeAtCompileTime>::run(kernel);
357
+ template <typename Kernel>
358
+ struct dense_assignment_loop_impl<Kernel, DefaultTraversal, CompleteUnrolling> {
359
+ static constexpr int SizeAtCompileTime = Kernel::AssignmentTraits::SizeAtCompileTime;
360
+
361
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel) {
362
+ copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, 0, SizeAtCompileTime>::run(kernel);
360
363
  }
361
364
  };
362
365
 
363
- template<typename Kernel>
364
- struct dense_assignment_loop<Kernel, DefaultTraversal, InnerUnrolling>
365
- {
366
- EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
367
- {
368
- typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
366
+ template <typename Kernel>
367
+ struct dense_assignment_loop_impl<Kernel, DefaultTraversal, InnerUnrolling> {
368
+ static constexpr int InnerSizeAtCompileTime = Kernel::AssignmentTraits::InnerSizeAtCompileTime;
369
369
 
370
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel) {
370
371
  const Index outerSize = kernel.outerSize();
371
- for(Index outer = 0; outer < outerSize; ++outer)
372
- copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, 0, DstXprType::InnerSizeAtCompileTime>::run(kernel, outer);
372
+ for (Index outer = 0; outer < outerSize; ++outer)
373
+ copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, 0, InnerSizeAtCompileTime>::run(kernel, outer);
373
374
  }
374
375
  };
375
376
 
@@ -377,83 +378,137 @@ struct dense_assignment_loop<Kernel, DefaultTraversal, InnerUnrolling>
377
378
  *** Linear vectorization ***
378
379
  ***************************/
379
380
 
380
-
381
381
  // The goal of unaligned_dense_assignment_loop is simply to factorize the handling
382
382
  // of the non vectorizable beginning and ending parts
383
383
 
384
- template <bool IsAligned = false>
385
- struct unaligned_dense_assignment_loop
386
- {
387
- // if IsAligned = true, then do nothing
384
+ template <typename PacketType, int DstAlignment, int SrcAlignment, bool UsePacketSegment, bool Skip>
385
+ struct unaligned_dense_assignment_loop {
386
+ // if Skip == true, then do nothing
387
+ template <typename Kernel>
388
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& /*kernel*/, Index /*start*/, Index /*end*/) {}
388
389
  template <typename Kernel>
389
- EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&, Index, Index) {}
390
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& /*kernel*/, Index /*outer*/,
391
+ Index /*innerStart*/, Index /*innerEnd*/) {}
390
392
  };
391
393
 
392
- template <>
393
- struct unaligned_dense_assignment_loop<false>
394
- {
395
- // MSVC must not inline this functions. If it does, it fails to optimize the
396
- // packet access path.
397
- // FIXME check which version exhibits this issue
398
- #if EIGEN_COMP_MSVC
394
+ template <typename PacketType, int DstAlignment, int SrcAlignment>
395
+ struct unaligned_dense_assignment_loop<PacketType, DstAlignment, SrcAlignment, /*UsePacketSegment*/ true,
396
+ /*Skip*/ false> {
399
397
  template <typename Kernel>
400
- static EIGEN_DONT_INLINE void run(Kernel &kernel,
401
- Index start,
402
- Index end)
403
- #else
398
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel, Index start, Index end) {
399
+ Index count = end - start;
400
+ eigen_assert(count <= unpacket_traits<PacketType>::size);
401
+ if (count > 0) kernel.template assignPacketSegment<DstAlignment, SrcAlignment, PacketType>(start, 0, count);
402
+ }
404
403
  template <typename Kernel>
405
- EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel,
406
- Index start,
407
- Index end)
408
- #endif
409
- {
410
- for (Index index = start; index < end; ++index)
411
- kernel.assignCoeff(index);
404
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel, Index outer, Index start, Index end) {
405
+ Index count = end - start;
406
+ eigen_assert(count <= unpacket_traits<PacketType>::size);
407
+ if (count > 0)
408
+ kernel.template assignPacketSegmentByOuterInner<DstAlignment, SrcAlignment, PacketType>(outer, start, 0, count);
412
409
  }
413
410
  };
414
411
 
415
- template<typename Kernel>
416
- struct dense_assignment_loop<Kernel, LinearVectorizedTraversal, NoUnrolling>
417
- {
418
- EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
419
- {
420
- const Index size = kernel.size();
421
- typedef typename Kernel::Scalar Scalar;
422
- typedef typename Kernel::PacketType PacketType;
423
- enum {
424
- requestedAlignment = Kernel::AssignmentTraits::LinearRequiredAlignment,
425
- packetSize = unpacket_traits<PacketType>::size,
426
- dstIsAligned = int(Kernel::AssignmentTraits::DstAlignment)>=int(requestedAlignment),
427
- dstAlignment = packet_traits<Scalar>::AlignedOnScalar ? int(requestedAlignment)
428
- : int(Kernel::AssignmentTraits::DstAlignment),
429
- srcAlignment = Kernel::AssignmentTraits::JointAlignment
430
- };
431
- const Index alignedStart = dstIsAligned ? 0 : internal::first_aligned<requestedAlignment>(kernel.dstDataPtr(), size);
432
- const Index alignedEnd = alignedStart + ((size-alignedStart)/packetSize)*packetSize;
412
+ template <typename PacketType, int DstAlignment, int SrcAlignment>
413
+ struct unaligned_dense_assignment_loop<PacketType, DstAlignment, SrcAlignment, /*UsePacketSegment*/ false,
414
+ /*Skip*/ false> {
415
+ template <typename Kernel>
416
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel, Index start, Index end) {
417
+ for (Index index = start; index < end; ++index) kernel.assignCoeff(index);
418
+ }
419
+ template <typename Kernel>
420
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel, Index outer, Index innerStart,
421
+ Index innerEnd) {
422
+ for (Index inner = innerStart; inner < innerEnd; ++inner) kernel.assignCoeffByOuterInner(outer, inner);
423
+ }
424
+ };
425
+
426
+ template <typename Kernel, int Index_, int Stop>
427
+ struct copy_using_evaluator_linearvec_CompleteUnrolling {
428
+ using PacketType = typename Kernel::PacketType;
429
+ static constexpr int SrcAlignment = Kernel::AssignmentTraits::SrcAlignment;
430
+ static constexpr int DstAlignment = Kernel::AssignmentTraits::DstAlignment;
431
+ static constexpr int NextIndex = Index_ + unpacket_traits<PacketType>::size;
432
+
433
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel) {
434
+ kernel.template assignPacket<DstAlignment, SrcAlignment, PacketType>(Index_);
435
+ copy_using_evaluator_linearvec_CompleteUnrolling<Kernel, NextIndex, Stop>::run(kernel);
436
+ }
437
+ };
433
438
 
434
- unaligned_dense_assignment_loop<dstIsAligned!=0>::run(kernel, 0, alignedStart);
439
+ template <typename Kernel, int Stop>
440
+ struct copy_using_evaluator_linearvec_CompleteUnrolling<Kernel, Stop, Stop> {
441
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel&) {}
442
+ };
435
443
 
436
- for(Index index = alignedStart; index < alignedEnd; index += packetSize)
437
- kernel.template assignPacket<dstAlignment, srcAlignment, PacketType>(index);
444
+ template <typename Kernel, int Index_, int Stop, bool UsePacketSegment>
445
+ struct copy_using_evaluator_linearvec_segment {
446
+ using PacketType = typename Kernel::PacketType;
447
+ static constexpr int SrcAlignment = Kernel::AssignmentTraits::SrcAlignment;
448
+ static constexpr int DstAlignment = Kernel::AssignmentTraits::DstAlignment;
438
449
 
439
- unaligned_dense_assignment_loop<>::run(kernel, alignedEnd, size);
450
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel) {
451
+ kernel.template assignPacketSegment<DstAlignment, SrcAlignment, PacketType>(Index_, 0, Stop - Index_);
440
452
  }
441
453
  };
442
454
 
443
- template<typename Kernel>
444
- struct dense_assignment_loop<Kernel, LinearVectorizedTraversal, CompleteUnrolling>
445
- {
446
- EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
447
- {
448
- typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
449
- typedef typename Kernel::PacketType PacketType;
455
+ template <typename Kernel, int Index_, int Stop>
456
+ struct copy_using_evaluator_linearvec_segment<Kernel, Index_, Stop, /*UsePacketSegment*/ false>
457
+ : copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, Index_, Stop> {};
458
+
459
+ template <typename Kernel, int Stop>
460
+ struct copy_using_evaluator_linearvec_segment<Kernel, Stop, Stop, /*UsePacketSegment*/ true> {
461
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel&) {}
462
+ };
463
+
464
+ template <typename Kernel, int Stop>
465
+ struct copy_using_evaluator_linearvec_segment<Kernel, Stop, Stop, /*UsePacketSegment*/ false> {
466
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel&) {}
467
+ };
468
+
469
+ template <typename Kernel>
470
+ struct dense_assignment_loop_impl<Kernel, LinearVectorizedTraversal, NoUnrolling> {
471
+ using Scalar = typename Kernel::Scalar;
472
+ using PacketType = typename Kernel::PacketType;
473
+ static constexpr int PacketSize = unpacket_traits<PacketType>::size;
474
+ static constexpr int SrcAlignment = Kernel::AssignmentTraits::JointAlignment;
475
+ static constexpr int DstAlignment = plain_enum_max(Kernel::AssignmentTraits::DstAlignment, alignof(Scalar));
476
+ static constexpr int RequestedAlignment = unpacket_traits<PacketType>::alignment;
477
+ static constexpr bool Alignable =
478
+ (DstAlignment >= RequestedAlignment) || ((RequestedAlignment - DstAlignment) % sizeof(Scalar) == 0);
479
+ static constexpr int Alignment = Alignable ? RequestedAlignment : DstAlignment;
480
+ static constexpr bool DstIsAligned = DstAlignment >= Alignment;
481
+ static constexpr bool UsePacketSegment = Kernel::AssignmentTraits::UsePacketSegment;
482
+
483
+ using head_loop =
484
+ unaligned_dense_assignment_loop<PacketType, DstAlignment, SrcAlignment, UsePacketSegment, DstIsAligned>;
485
+ using tail_loop = unaligned_dense_assignment_loop<PacketType, Alignment, SrcAlignment, UsePacketSegment, false>;
486
+
487
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel) {
488
+ const Index size = kernel.size();
489
+ const Index alignedStart = DstIsAligned ? 0 : first_aligned<Alignment>(kernel.dstDataPtr(), size);
490
+ const Index alignedEnd = alignedStart + numext::round_down(size - alignedStart, PacketSize);
491
+
492
+ head_loop::run(kernel, 0, alignedStart);
450
493
 
451
- enum { size = DstXprType::SizeAtCompileTime,
452
- packetSize =unpacket_traits<PacketType>::size,
453
- alignedSize = (int(size)/packetSize)*packetSize };
494
+ for (Index index = alignedStart; index < alignedEnd; index += PacketSize)
495
+ kernel.template assignPacket<Alignment, SrcAlignment, PacketType>(index);
496
+
497
+ tail_loop::run(kernel, alignedEnd, size);
498
+ }
499
+ };
454
500
 
455
- copy_using_evaluator_innervec_CompleteUnrolling<Kernel, 0, alignedSize>::run(kernel);
456
- copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, alignedSize, size>::run(kernel);
501
+ template <typename Kernel>
502
+ struct dense_assignment_loop_impl<Kernel, LinearVectorizedTraversal, CompleteUnrolling> {
503
+ using PacketType = typename Kernel::PacketType;
504
+ static constexpr int PacketSize = unpacket_traits<PacketType>::size;
505
+ static constexpr int Size = Kernel::AssignmentTraits::SizeAtCompileTime;
506
+ static constexpr int AlignedSize = numext::round_down(Size, PacketSize);
507
+ static constexpr bool UsePacketSegment = Kernel::AssignmentTraits::UsePacketSegment;
508
+
509
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel) {
510
+ copy_using_evaluator_linearvec_CompleteUnrolling<Kernel, 0, AlignedSize>::run(kernel);
511
+ copy_using_evaluator_linearvec_segment<Kernel, AlignedSize, Size, UsePacketSegment>::run(kernel);
457
512
  }
458
513
  };
459
514
 
@@ -461,46 +516,42 @@ struct dense_assignment_loop<Kernel, LinearVectorizedTraversal, CompleteUnrollin
461
516
  *** Inner vectorization ***
462
517
  **************************/
463
518
 
464
- template<typename Kernel>
465
- struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, NoUnrolling>
466
- {
467
- typedef typename Kernel::PacketType PacketType;
468
- enum {
469
- SrcAlignment = Kernel::AssignmentTraits::SrcAlignment,
470
- DstAlignment = Kernel::AssignmentTraits::DstAlignment
471
- };
472
- EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
473
- {
519
+ template <typename Kernel>
520
+ struct dense_assignment_loop_impl<Kernel, InnerVectorizedTraversal, NoUnrolling> {
521
+ using PacketType = typename Kernel::PacketType;
522
+ static constexpr int PacketSize = unpacket_traits<PacketType>::size;
523
+ static constexpr int SrcAlignment = Kernel::AssignmentTraits::JointAlignment;
524
+ static constexpr int DstAlignment = Kernel::AssignmentTraits::DstAlignment;
525
+
526
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel) {
474
527
  const Index innerSize = kernel.innerSize();
475
528
  const Index outerSize = kernel.outerSize();
476
- const Index packetSize = unpacket_traits<PacketType>::size;
477
- for(Index outer = 0; outer < outerSize; ++outer)
478
- for(Index inner = 0; inner < innerSize; inner+=packetSize)
529
+ for (Index outer = 0; outer < outerSize; ++outer)
530
+ for (Index inner = 0; inner < innerSize; inner += PacketSize)
479
531
  kernel.template assignPacketByOuterInner<DstAlignment, SrcAlignment, PacketType>(outer, inner);
480
532
  }
481
533
  };
482
534
 
483
- template<typename Kernel>
484
- struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, CompleteUnrolling>
485
- {
486
- EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
487
- {
488
- typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
489
- copy_using_evaluator_innervec_CompleteUnrolling<Kernel, 0, DstXprType::SizeAtCompileTime>::run(kernel);
535
+ template <typename Kernel>
536
+ struct dense_assignment_loop_impl<Kernel, InnerVectorizedTraversal, CompleteUnrolling> {
537
+ static constexpr int SizeAtCompileTime = Kernel::AssignmentTraits::SizeAtCompileTime;
538
+
539
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel) {
540
+ copy_using_evaluator_innervec_CompleteUnrolling<Kernel, 0, SizeAtCompileTime>::run(kernel);
490
541
  }
491
542
  };
492
543
 
493
- template<typename Kernel>
494
- struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, InnerUnrolling>
495
- {
496
- EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
497
- {
498
- typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
499
- typedef typename Kernel::AssignmentTraits Traits;
544
+ template <typename Kernel>
545
+ struct dense_assignment_loop_impl<Kernel, InnerVectorizedTraversal, InnerUnrolling> {
546
+ static constexpr int InnerSize = Kernel::AssignmentTraits::InnerSizeAtCompileTime;
547
+ static constexpr int SrcAlignment = Kernel::AssignmentTraits::SrcAlignment;
548
+ static constexpr int DstAlignment = Kernel::AssignmentTraits::DstAlignment;
549
+
550
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel) {
500
551
  const Index outerSize = kernel.outerSize();
501
- for(Index outer = 0; outer < outerSize; ++outer)
502
- copy_using_evaluator_innervec_InnerUnrolling<Kernel, 0, DstXprType::InnerSizeAtCompileTime,
503
- Traits::SrcAlignment, Traits::DstAlignment>::run(kernel, outer);
552
+ for (Index outer = 0; outer < outerSize; ++outer)
553
+ copy_using_evaluator_innervec_InnerUnrolling<Kernel, 0, InnerSize, SrcAlignment, DstAlignment>::run(kernel,
554
+ outer);
504
555
  }
505
556
  };
506
557
 
@@ -508,24 +559,19 @@ struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, InnerUnrolling>
508
559
  *** Linear traversal ***
509
560
  ***********************/
510
561
 
511
- template<typename Kernel>
512
- struct dense_assignment_loop<Kernel, LinearTraversal, NoUnrolling>
513
- {
514
- EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
515
- {
562
+ template <typename Kernel>
563
+ struct dense_assignment_loop_impl<Kernel, LinearTraversal, NoUnrolling> {
564
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel) {
516
565
  const Index size = kernel.size();
517
- for(Index i = 0; i < size; ++i)
518
- kernel.assignCoeff(i);
566
+ for (Index i = 0; i < size; ++i) kernel.assignCoeff(i);
519
567
  }
520
568
  };
521
569
 
522
- template<typename Kernel>
523
- struct dense_assignment_loop<Kernel, LinearTraversal, CompleteUnrolling>
524
- {
525
- EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
526
- {
527
- typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
528
- copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, 0, DstXprType::SizeAtCompileTime>::run(kernel);
570
+ template <typename Kernel>
571
+ struct dense_assignment_loop_impl<Kernel, LinearTraversal, CompleteUnrolling> {
572
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel) {
573
+ copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, 0, Kernel::AssignmentTraits::SizeAtCompileTime>::run(
574
+ kernel);
529
575
  }
530
576
  };
531
577
 
@@ -533,80 +579,71 @@ struct dense_assignment_loop<Kernel, LinearTraversal, CompleteUnrolling>
533
579
  *** Slice vectorization ***
534
580
  ***************************/
535
581
 
536
- template<typename Kernel>
537
- struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, NoUnrolling>
538
- {
539
- EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
540
- {
541
- typedef typename Kernel::Scalar Scalar;
542
- typedef typename Kernel::PacketType PacketType;
543
- enum {
544
- packetSize = unpacket_traits<PacketType>::size,
545
- requestedAlignment = int(Kernel::AssignmentTraits::InnerRequiredAlignment),
546
- alignable = packet_traits<Scalar>::AlignedOnScalar || int(Kernel::AssignmentTraits::DstAlignment)>=sizeof(Scalar),
547
- dstIsAligned = int(Kernel::AssignmentTraits::DstAlignment)>=int(requestedAlignment),
548
- dstAlignment = alignable ? int(requestedAlignment)
549
- : int(Kernel::AssignmentTraits::DstAlignment)
550
- };
551
- const Scalar *dst_ptr = kernel.dstDataPtr();
552
- if((!bool(dstIsAligned)) && (UIntPtr(dst_ptr) % sizeof(Scalar))>0)
553
- {
554
- // the pointer is not aligned-on scalar, so alignment is not possible
555
- return dense_assignment_loop<Kernel,DefaultTraversal,NoUnrolling>::run(kernel);
556
- }
557
- const Index packetAlignedMask = packetSize - 1;
582
+ template <typename Kernel>
583
+ struct dense_assignment_loop_impl<Kernel, SliceVectorizedTraversal, NoUnrolling> {
584
+ using Scalar = typename Kernel::Scalar;
585
+ using PacketType = typename Kernel::PacketType;
586
+ static constexpr int PacketSize = unpacket_traits<PacketType>::size;
587
+ static constexpr int SrcAlignment = Kernel::AssignmentTraits::JointAlignment;
588
+ static constexpr int DstAlignment = plain_enum_max(Kernel::AssignmentTraits::DstAlignment, alignof(Scalar));
589
+ static constexpr int RequestedAlignment = unpacket_traits<PacketType>::alignment;
590
+ static constexpr bool Alignable =
591
+ (DstAlignment >= RequestedAlignment) || ((RequestedAlignment - DstAlignment) % sizeof(Scalar) == 0);
592
+ static constexpr int Alignment = Alignable ? RequestedAlignment : DstAlignment;
593
+ static constexpr bool DstIsAligned = DstAlignment >= Alignment;
594
+ static constexpr bool UsePacketSegment = Kernel::AssignmentTraits::UsePacketSegment;
595
+
596
+ using head_loop = unaligned_dense_assignment_loop<PacketType, DstAlignment, Unaligned, UsePacketSegment, !Alignable>;
597
+ using tail_loop = unaligned_dense_assignment_loop<PacketType, Alignment, Unaligned, UsePacketSegment, false>;
598
+
599
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel) {
600
+ const Scalar* dst_ptr = kernel.dstDataPtr();
558
601
  const Index innerSize = kernel.innerSize();
559
602
  const Index outerSize = kernel.outerSize();
560
- const Index alignedStep = alignable ? (packetSize - kernel.outerStride() % packetSize) & packetAlignedMask : 0;
561
- Index alignedStart = ((!alignable) || bool(dstIsAligned)) ? 0 : internal::first_aligned<requestedAlignment>(dst_ptr, innerSize);
562
-
563
- for(Index outer = 0; outer < outerSize; ++outer)
564
- {
565
- const Index alignedEnd = alignedStart + ((innerSize-alignedStart) & ~packetAlignedMask);
566
- // do the non-vectorizable part of the assignment
567
- for(Index inner = 0; inner<alignedStart ; ++inner)
568
- kernel.assignCoeffByOuterInner(outer, inner);
603
+ const Index alignedStep = Alignable ? (PacketSize - kernel.outerStride() % PacketSize) % PacketSize : 0;
604
+ Index alignedStart = ((!Alignable) || DstIsAligned) ? 0 : internal::first_aligned<Alignment>(dst_ptr, innerSize);
605
+
606
+ for (Index outer = 0; outer < outerSize; ++outer) {
607
+ const Index alignedEnd = alignedStart + numext::round_down(innerSize - alignedStart, PacketSize);
608
+
609
+ head_loop::run(kernel, outer, 0, alignedStart);
569
610
 
570
611
  // do the vectorizable part of the assignment
571
- for(Index inner = alignedStart; inner<alignedEnd; inner+=packetSize)
572
- kernel.template assignPacketByOuterInner<dstAlignment, Unaligned, PacketType>(outer, inner);
612
+ for (Index inner = alignedStart; inner < alignedEnd; inner += PacketSize)
613
+ kernel.template assignPacketByOuterInner<Alignment, Unaligned, PacketType>(outer, inner);
573
614
 
574
- // do the non-vectorizable part of the assignment
575
- for(Index inner = alignedEnd; inner<innerSize ; ++inner)
576
- kernel.assignCoeffByOuterInner(outer, inner);
615
+ tail_loop::run(kernel, outer, alignedEnd, innerSize);
577
616
 
578
- alignedStart = numext::mini((alignedStart+alignedStep)%packetSize, innerSize);
617
+ alignedStart = numext::mini((alignedStart + alignedStep) % PacketSize, innerSize);
579
618
  }
580
619
  }
581
620
  };
582
621
 
583
622
  #if EIGEN_UNALIGNED_VECTORIZE
584
- template<typename Kernel>
585
- struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, InnerUnrolling>
586
- {
587
- EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
588
- {
589
- typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
590
- typedef typename Kernel::PacketType PacketType;
591
-
592
- enum { innerSize = DstXprType::InnerSizeAtCompileTime,
593
- packetSize =unpacket_traits<PacketType>::size,
594
- vectorizableSize = (int(innerSize) / int(packetSize)) * int(packetSize),
595
- size = DstXprType::SizeAtCompileTime };
596
-
597
- for(Index outer = 0; outer < kernel.outerSize(); ++outer)
598
- {
599
- copy_using_evaluator_innervec_InnerUnrolling<Kernel, 0, vectorizableSize, 0, 0>::run(kernel, outer);
600
- copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, vectorizableSize, innerSize>::run(kernel, outer);
623
+ template <typename Kernel>
624
+ struct dense_assignment_loop_impl<Kernel, SliceVectorizedTraversal, InnerUnrolling> {
625
+ using PacketType = typename Kernel::PacketType;
626
+ static constexpr int PacketSize = unpacket_traits<PacketType>::size;
627
+ static constexpr int InnerSize = Kernel::AssignmentTraits::InnerSizeAtCompileTime;
628
+ static constexpr int VectorizableSize = numext::round_down(InnerSize, PacketSize);
629
+ static constexpr bool UsePacketSegment = Kernel::AssignmentTraits::UsePacketSegment;
630
+
631
+ using packet_loop = copy_using_evaluator_innervec_InnerUnrolling<Kernel, 0, VectorizableSize, Unaligned, Unaligned>;
632
+ using packet_segment_loop = copy_using_evaluator_innervec_segment<Kernel, VectorizableSize, InnerSize, Unaligned,
633
+ Unaligned, UsePacketSegment>;
634
+
635
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel) {
636
+ for (Index outer = 0; outer < kernel.outerSize(); ++outer) {
637
+ packet_loop::run(kernel, outer);
638
+ packet_segment_loop::run(kernel, outer);
601
639
  }
602
640
  }
603
641
  };
604
642
  #endif
605
643
 
606
-
607
644
  /***************************************************************************
608
- * Part 4 : Generic dense assignment kernel
609
- ***************************************************************************/
645
+ * Part 4 : Generic dense assignment kernel
646
+ ***************************************************************************/
610
647
 
611
648
  // This class generalize the assignment of a coefficient (or packet) from one dense evaluator
612
649
  // to another dense writable evaluator.
@@ -614,108 +651,117 @@ struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, InnerUnrolling>
614
651
  // This abstraction level permits to keep the evaluation loops as simple and as generic as possible.
615
652
  // One can customize the assignment using this generic dense_assignment_kernel with different
616
653
  // functors, or by completely overloading it, by-passing a functor.
617
- template<typename DstEvaluatorTypeT, typename SrcEvaluatorTypeT, typename Functor, int Version = Specialized>
618
- class generic_dense_assignment_kernel
619
- {
620
- protected:
654
+ template <typename DstEvaluatorTypeT, typename SrcEvaluatorTypeT, typename Functor, int Version = Specialized>
655
+ class generic_dense_assignment_kernel {
656
+ protected:
621
657
  typedef typename DstEvaluatorTypeT::XprType DstXprType;
622
658
  typedef typename SrcEvaluatorTypeT::XprType SrcXprType;
623
- public:
624
659
 
660
+ public:
625
661
  typedef DstEvaluatorTypeT DstEvaluatorType;
626
662
  typedef SrcEvaluatorTypeT SrcEvaluatorType;
627
663
  typedef typename DstEvaluatorType::Scalar Scalar;
628
664
  typedef copy_using_evaluator_traits<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor> AssignmentTraits;
629
665
  typedef typename AssignmentTraits::PacketType PacketType;
630
666
 
631
-
632
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
633
- generic_dense_assignment_kernel(DstEvaluatorType &dst, const SrcEvaluatorType &src, const Functor &func, DstXprType& dstExpr)
634
- : m_dst(dst), m_src(src), m_functor(func), m_dstExpr(dstExpr)
635
- {
636
- #ifdef EIGEN_DEBUG_ASSIGN
667
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr generic_dense_assignment_kernel(DstEvaluatorType& dst,
668
+ const SrcEvaluatorType& src,
669
+ const Functor& func,
670
+ DstXprType& dstExpr)
671
+ : m_dst(dst), m_src(src), m_functor(func), m_dstExpr(dstExpr) {
672
+ #ifdef EIGEN_DEBUG_ASSIGN
637
673
  AssignmentTraits::debug();
638
- #endif
674
+ #endif
639
675
  }
640
676
 
641
- EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index size() const EIGEN_NOEXCEPT { return m_dstExpr.size(); }
642
- EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index innerSize() const EIGEN_NOEXCEPT { return m_dstExpr.innerSize(); }
643
- EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index outerSize() const EIGEN_NOEXCEPT { return m_dstExpr.outerSize(); }
644
- EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index rows() const EIGEN_NOEXCEPT { return m_dstExpr.rows(); }
645
- EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index cols() const EIGEN_NOEXCEPT { return m_dstExpr.cols(); }
646
- EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index outerStride() const EIGEN_NOEXCEPT { return m_dstExpr.outerStride(); }
677
+ EIGEN_DEVICE_FUNC constexpr Index size() const noexcept { return m_dstExpr.size(); }
678
+ EIGEN_DEVICE_FUNC constexpr Index innerSize() const noexcept { return m_dstExpr.innerSize(); }
679
+ EIGEN_DEVICE_FUNC constexpr Index outerSize() const noexcept { return m_dstExpr.outerSize(); }
680
+ EIGEN_DEVICE_FUNC constexpr Index rows() const noexcept { return m_dstExpr.rows(); }
681
+ EIGEN_DEVICE_FUNC constexpr Index cols() const noexcept { return m_dstExpr.cols(); }
682
+ EIGEN_DEVICE_FUNC constexpr Index outerStride() const noexcept { return m_dstExpr.outerStride(); }
647
683
 
648
- EIGEN_DEVICE_FUNC DstEvaluatorType& dstEvaluator() EIGEN_NOEXCEPT { return m_dst; }
649
- EIGEN_DEVICE_FUNC const SrcEvaluatorType& srcEvaluator() const EIGEN_NOEXCEPT { return m_src; }
684
+ EIGEN_DEVICE_FUNC DstEvaluatorType& dstEvaluator() noexcept { return m_dst; }
685
+ EIGEN_DEVICE_FUNC const SrcEvaluatorType& srcEvaluator() const noexcept { return m_src; }
650
686
 
651
687
  /// Assign src(row,col) to dst(row,col) through the assignment functor.
652
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Index row, Index col)
653
- {
654
- m_functor.assignCoeff(m_dst.coeffRef(row,col), m_src.coeff(row,col));
688
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void assignCoeff(Index row, Index col) {
689
+ m_functor.assignCoeff(m_dst.coeffRef(row, col), m_src.coeff(row, col));
655
690
  }
656
691
 
657
692
  /// \sa assignCoeff(Index,Index)
658
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Index index)
659
- {
693
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Index index) {
660
694
  m_functor.assignCoeff(m_dst.coeffRef(index), m_src.coeff(index));
661
695
  }
662
696
 
663
697
  /// \sa assignCoeff(Index,Index)
664
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeffByOuterInner(Index outer, Index inner)
665
- {
698
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void assignCoeffByOuterInner(Index outer, Index inner) {
666
699
  Index row = rowIndexByOuterInner(outer, inner);
667
700
  Index col = colIndexByOuterInner(outer, inner);
668
701
  assignCoeff(row, col);
669
702
  }
670
703
 
704
+ template <int StoreMode, int LoadMode, typename Packet>
705
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacket(Index row, Index col) {
706
+ m_functor.template assignPacket<StoreMode>(&m_dst.coeffRef(row, col),
707
+ m_src.template packet<LoadMode, Packet>(row, col));
708
+ }
709
+
710
+ template <int StoreMode, int LoadMode, typename Packet>
711
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacket(Index index) {
712
+ m_functor.template assignPacket<StoreMode>(&m_dst.coeffRef(index), m_src.template packet<LoadMode, Packet>(index));
713
+ }
714
+
715
+ template <int StoreMode, int LoadMode, typename Packet>
716
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacketByOuterInner(Index outer, Index inner) {
717
+ Index row = rowIndexByOuterInner(outer, inner);
718
+ Index col = colIndexByOuterInner(outer, inner);
719
+ assignPacket<StoreMode, LoadMode, Packet>(row, col);
720
+ }
671
721
 
672
- template<int StoreMode, int LoadMode, typename PacketType>
673
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacket(Index row, Index col)
674
- {
675
- m_functor.template assignPacket<StoreMode>(&m_dst.coeffRef(row,col), m_src.template packet<LoadMode,PacketType>(row,col));
722
+ template <int StoreMode, int LoadMode, typename Packet>
723
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacketSegment(Index row, Index col, Index begin, Index count) {
724
+ m_functor.template assignPacketSegment<StoreMode>(
725
+ &m_dst.coeffRef(row, col), m_src.template packetSegment<LoadMode, Packet>(row, col, begin, count), begin,
726
+ count);
676
727
  }
677
728
 
678
- template<int StoreMode, int LoadMode, typename PacketType>
679
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacket(Index index)
680
- {
681
- m_functor.template assignPacket<StoreMode>(&m_dst.coeffRef(index), m_src.template packet<LoadMode,PacketType>(index));
729
+ template <int StoreMode, int LoadMode, typename Packet>
730
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacketSegment(Index index, Index begin, Index count) {
731
+ m_functor.template assignPacketSegment<StoreMode>(
732
+ &m_dst.coeffRef(index), m_src.template packetSegment<LoadMode, Packet>(index, begin, count), begin, count);
682
733
  }
683
734
 
684
- template<int StoreMode, int LoadMode, typename PacketType>
685
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacketByOuterInner(Index outer, Index inner)
686
- {
735
+ template <int StoreMode, int LoadMode, typename Packet>
736
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacketSegmentByOuterInner(Index outer, Index inner, Index begin,
737
+ Index count) {
687
738
  Index row = rowIndexByOuterInner(outer, inner);
688
739
  Index col = colIndexByOuterInner(outer, inner);
689
- assignPacket<StoreMode,LoadMode,PacketType>(row, col);
740
+ assignPacketSegment<StoreMode, LoadMode, Packet>(row, col, begin, count);
690
741
  }
691
742
 
692
- EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Index rowIndexByOuterInner(Index outer, Index inner)
693
- {
743
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr Index rowIndexByOuterInner(Index outer, Index inner) {
694
744
  typedef typename DstEvaluatorType::ExpressionTraits Traits;
695
- return int(Traits::RowsAtCompileTime) == 1 ? 0
696
- : int(Traits::ColsAtCompileTime) == 1 ? inner
697
- : int(DstEvaluatorType::Flags)&RowMajorBit ? outer
698
- : inner;
745
+ return int(Traits::RowsAtCompileTime) == 1 ? 0
746
+ : int(Traits::ColsAtCompileTime) == 1 ? inner
747
+ : int(DstEvaluatorType::Flags) & RowMajorBit ? outer
748
+ : inner;
699
749
  }
700
750
 
701
- EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Index colIndexByOuterInner(Index outer, Index inner)
702
- {
751
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr Index colIndexByOuterInner(Index outer, Index inner) {
703
752
  typedef typename DstEvaluatorType::ExpressionTraits Traits;
704
- return int(Traits::ColsAtCompileTime) == 1 ? 0
705
- : int(Traits::RowsAtCompileTime) == 1 ? inner
706
- : int(DstEvaluatorType::Flags)&RowMajorBit ? inner
707
- : outer;
753
+ return int(Traits::ColsAtCompileTime) == 1 ? 0
754
+ : int(Traits::RowsAtCompileTime) == 1 ? inner
755
+ : int(DstEvaluatorType::Flags) & RowMajorBit ? inner
756
+ : outer;
708
757
  }
709
758
 
710
- EIGEN_DEVICE_FUNC const Scalar* dstDataPtr() const
711
- {
712
- return m_dstExpr.data();
713
- }
759
+ EIGEN_DEVICE_FUNC const Scalar* dstDataPtr() const { return m_dstExpr.data(); }
714
760
 
715
- protected:
761
+ protected:
716
762
  DstEvaluatorType& m_dst;
717
763
  const SrcEvaluatorType& m_src;
718
- const Functor &m_functor;
764
+ const Functor& m_functor;
719
765
  // TODO find a way to avoid the needs of the original expression
720
766
  DstXprType& m_dstExpr;
721
767
  };
@@ -724,50 +770,47 @@ protected:
724
770
  // PacketSize used is no larger than 4, thereby increasing the chance that vectorized instructions will be used
725
771
  // when computing the product.
726
772
 
727
- template<typename DstEvaluatorTypeT, typename SrcEvaluatorTypeT, typename Functor>
728
- class restricted_packet_dense_assignment_kernel : public generic_dense_assignment_kernel<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor, BuiltIn>
729
- {
730
- protected:
773
+ template <typename DstEvaluatorTypeT, typename SrcEvaluatorTypeT, typename Functor>
774
+ class restricted_packet_dense_assignment_kernel
775
+ : public generic_dense_assignment_kernel<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor, BuiltIn> {
776
+ protected:
731
777
  typedef generic_dense_assignment_kernel<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor, BuiltIn> Base;
778
+
732
779
  public:
733
- typedef typename Base::Scalar Scalar;
734
- typedef typename Base::DstXprType DstXprType;
735
- typedef copy_using_evaluator_traits<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor, 4> AssignmentTraits;
736
- typedef typename AssignmentTraits::PacketType PacketType;
780
+ typedef typename Base::Scalar Scalar;
781
+ typedef typename Base::DstXprType DstXprType;
782
+ typedef copy_using_evaluator_traits<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor, 4> AssignmentTraits;
783
+ typedef typename AssignmentTraits::PacketType PacketType;
737
784
 
738
- EIGEN_DEVICE_FUNC restricted_packet_dense_assignment_kernel(DstEvaluatorTypeT &dst, const SrcEvaluatorTypeT &src, const Functor &func, DstXprType& dstExpr)
739
- : Base(dst, src, func, dstExpr)
740
- {
741
- }
742
- };
785
+ EIGEN_DEVICE_FUNC restricted_packet_dense_assignment_kernel(DstEvaluatorTypeT& dst, const SrcEvaluatorTypeT& src,
786
+ const Functor& func, DstXprType& dstExpr)
787
+ : Base(dst, src, func, dstExpr) {}
788
+ };
743
789
 
744
790
  /***************************************************************************
745
- * Part 5 : Entry point for dense rectangular assignment
746
- ***************************************************************************/
791
+ * Part 5 : Entry point for dense rectangular assignment
792
+ ***************************************************************************/
747
793
 
748
- template<typename DstXprType,typename SrcXprType, typename Functor>
749
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
750
- void resize_if_allowed(DstXprType &dst, const SrcXprType& src, const Functor &/*func*/)
751
- {
794
+ template <typename DstXprType, typename SrcXprType, typename Functor>
795
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void resize_if_allowed(DstXprType& dst, const SrcXprType& src,
796
+ const Functor& /*func*/) {
752
797
  EIGEN_ONLY_USED_FOR_DEBUG(dst);
753
798
  EIGEN_ONLY_USED_FOR_DEBUG(src);
754
799
  eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
755
800
  }
756
801
 
757
- template<typename DstXprType,typename SrcXprType, typename T1, typename T2>
758
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
759
- void resize_if_allowed(DstXprType &dst, const SrcXprType& src, const internal::assign_op<T1,T2> &/*func*/)
760
- {
802
+ template <typename DstXprType, typename SrcXprType, typename T1, typename T2>
803
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void resize_if_allowed(DstXprType& dst, const SrcXprType& src,
804
+ const internal::assign_op<T1, T2>& /*func*/) {
761
805
  Index dstRows = src.rows();
762
806
  Index dstCols = src.cols();
763
- if(((dst.rows()!=dstRows) || (dst.cols()!=dstCols)))
764
- dst.resize(dstRows, dstCols);
807
+ if (((dst.rows() != dstRows) || (dst.cols() != dstCols))) dst.resize(dstRows, dstCols);
765
808
  eigen_assert(dst.rows() == dstRows && dst.cols() == dstCols);
766
809
  }
767
810
 
768
- template<typename DstXprType, typename SrcXprType, typename Functor>
769
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(DstXprType& dst, const SrcXprType& src, const Functor &func)
770
- {
811
+ template <typename DstXprType, typename SrcXprType, typename Functor>
812
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void call_dense_assignment_loop(DstXprType& dst, const SrcXprType& src,
813
+ const Functor& func) {
771
814
  typedef evaluator<DstXprType> DstEvaluatorType;
772
815
  typedef evaluator<SrcXprType> SrcEvaluatorType;
773
816
 
@@ -779,196 +822,202 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(DstXprType
779
822
 
780
823
  DstEvaluatorType dstEvaluator(dst);
781
824
 
782
- typedef generic_dense_assignment_kernel<DstEvaluatorType,SrcEvaluatorType,Functor> Kernel;
825
+ typedef generic_dense_assignment_kernel<DstEvaluatorType, SrcEvaluatorType, Functor> Kernel;
783
826
  Kernel kernel(dstEvaluator, srcEvaluator, func, dst.const_cast_derived());
784
827
 
785
828
  dense_assignment_loop<Kernel>::run(kernel);
786
829
  }
787
830
 
788
- // Specialization for filling the destination with a constant value.
789
- #ifndef EIGEN_GPU_COMPILE_PHASE
790
- template<typename DstXprType>
791
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(DstXprType& dst, const Eigen::CwiseNullaryOp<Eigen::internal::scalar_constant_op<typename DstXprType::Scalar>, DstXprType>& src, const internal::assign_op<typename DstXprType::Scalar,typename DstXprType::Scalar>& func)
792
- {
793
- resize_if_allowed(dst, src, func);
794
- std::fill_n(dst.data(), dst.size(), src.functor()());
795
- }
796
- #endif
797
-
798
- template<typename DstXprType, typename SrcXprType>
799
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(DstXprType& dst, const SrcXprType& src)
800
- {
801
- call_dense_assignment_loop(dst, src, internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar>());
831
+ template <typename DstXprType, typename SrcXprType>
832
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(DstXprType& dst, const SrcXprType& src) {
833
+ call_dense_assignment_loop(dst, src, internal::assign_op<typename DstXprType::Scalar, typename SrcXprType::Scalar>());
802
834
  }
803
835
 
804
836
  /***************************************************************************
805
- * Part 6 : Generic assignment
806
- ***************************************************************************/
837
+ * Part 6 : Generic assignment
838
+ ***************************************************************************/
807
839
 
808
840
  // Based on the respective shapes of the destination and source,
809
841
  // the class AssignmentKind determine the kind of assignment mechanism.
810
842
  // AssignmentKind must define a Kind typedef.
811
- template<typename DstShape, typename SrcShape> struct AssignmentKind;
843
+ template <typename DstShape, typename SrcShape>
844
+ struct AssignmentKind;
812
845
 
813
846
  // Assignment kind defined in this file:
814
847
  struct Dense2Dense {};
815
848
  struct EigenBase2EigenBase {};
816
849
 
817
- template<typename,typename> struct AssignmentKind { typedef EigenBase2EigenBase Kind; };
818
- template<> struct AssignmentKind<DenseShape,DenseShape> { typedef Dense2Dense Kind; };
850
+ template <typename, typename>
851
+ struct AssignmentKind {
852
+ typedef EigenBase2EigenBase Kind;
853
+ };
854
+ template <>
855
+ struct AssignmentKind<DenseShape, DenseShape> {
856
+ typedef Dense2Dense Kind;
857
+ };
819
858
 
820
859
  // This is the main assignment class
821
- template< typename DstXprType, typename SrcXprType, typename Functor,
822
- typename Kind = typename AssignmentKind< typename evaluator_traits<DstXprType>::Shape , typename evaluator_traits<SrcXprType>::Shape >::Kind,
860
+ template <typename DstXprType, typename SrcXprType, typename Functor,
861
+ typename Kind = typename AssignmentKind<typename evaluator_traits<DstXprType>::Shape,
862
+ typename evaluator_traits<SrcXprType>::Shape>::Kind,
823
863
  typename EnableIf = void>
824
864
  struct Assignment;
825
865
 
866
+ // The only purpose of this call_assignment() function is to deal with noalias() / "assume-aliasing" and automatic
867
+ // transposition. Indeed, I (Gael) think that this concept of "assume-aliasing" was a mistake, and it makes thing quite
868
+ // complicated. So this intermediate function removes everything related to "assume-aliasing" such that Assignment does
869
+ // not has to bother about these annoying details.
826
870
 
827
- // The only purpose of this call_assignment() function is to deal with noalias() / "assume-aliasing" and automatic transposition.
828
- // Indeed, I (Gael) think that this concept of "assume-aliasing" was a mistake, and it makes thing quite complicated.
829
- // So this intermediate function removes everything related to "assume-aliasing" such that Assignment
830
- // does not has to bother about these annoying details.
831
-
832
- template<typename Dst, typename Src>
833
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
834
- void call_assignment(Dst& dst, const Src& src)
835
- {
836
- call_assignment(dst, src, internal::assign_op<typename Dst::Scalar,typename Src::Scalar>());
871
+ template <typename Dst, typename Src>
872
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void call_assignment(Dst& dst, const Src& src) {
873
+ call_assignment(dst, src, internal::assign_op<typename Dst::Scalar, typename Src::Scalar>());
837
874
  }
838
- template<typename Dst, typename Src>
839
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
840
- void call_assignment(const Dst& dst, const Src& src)
841
- {
842
- call_assignment(dst, src, internal::assign_op<typename Dst::Scalar,typename Src::Scalar>());
875
+ template <typename Dst, typename Src>
876
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_assignment(const Dst& dst, const Src& src) {
877
+ call_assignment(dst, src, internal::assign_op<typename Dst::Scalar, typename Src::Scalar>());
843
878
  }
844
879
 
845
880
  // Deal with "assume-aliasing"
846
- template<typename Dst, typename Src, typename Func>
847
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
848
- void call_assignment(Dst& dst, const Src& src, const Func& func, typename enable_if< evaluator_assume_aliasing<Src>::value, void*>::type = 0)
849
- {
881
+ template <typename Dst, typename Src, typename Func>
882
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void call_assignment(
883
+ Dst& dst, const Src& src, const Func& func, std::enable_if_t<evaluator_assume_aliasing<Src>::value, void*> = 0) {
850
884
  typename plain_matrix_type<Src>::type tmp(src);
851
885
  call_assignment_no_alias(dst, tmp, func);
852
886
  }
853
887
 
854
- template<typename Dst, typename Src, typename Func>
855
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
856
- void call_assignment(Dst& dst, const Src& src, const Func& func, typename enable_if<!evaluator_assume_aliasing<Src>::value, void*>::type = 0)
857
- {
888
+ template <typename Dst, typename Src, typename Func>
889
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void call_assignment(
890
+ Dst& dst, const Src& src, const Func& func, std::enable_if_t<!evaluator_assume_aliasing<Src>::value, void*> = 0) {
858
891
  call_assignment_no_alias(dst, src, func);
859
892
  }
860
893
 
861
894
  // by-pass "assume-aliasing"
862
895
  // When there is no aliasing, we require that 'dst' has been properly resized
863
- template<typename Dst, template <typename> class StorageBase, typename Src, typename Func>
864
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
865
- void call_assignment(NoAlias<Dst,StorageBase>& dst, const Src& src, const Func& func)
866
- {
896
+ template <typename Dst, template <typename> class StorageBase, typename Src, typename Func>
897
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void call_assignment(NoAlias<Dst, StorageBase>& dst, const Src& src,
898
+ const Func& func) {
867
899
  call_assignment_no_alias(dst.expression(), src, func);
868
900
  }
869
901
 
870
-
871
- template<typename Dst, typename Src, typename Func>
872
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
873
- void call_assignment_no_alias(Dst& dst, const Src& src, const Func& func)
874
- {
902
+ template <typename Dst, typename Src, typename Func>
903
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void call_assignment_no_alias(Dst& dst, const Src& src,
904
+ const Func& func) {
875
905
  enum {
876
- NeedToTranspose = ( (int(Dst::RowsAtCompileTime) == 1 && int(Src::ColsAtCompileTime) == 1)
877
- || (int(Dst::ColsAtCompileTime) == 1 && int(Src::RowsAtCompileTime) == 1)
878
- ) && int(Dst::SizeAtCompileTime) != 1
906
+ NeedToTranspose = ((int(Dst::RowsAtCompileTime) == 1 && int(Src::ColsAtCompileTime) == 1) ||
907
+ (int(Dst::ColsAtCompileTime) == 1 && int(Src::RowsAtCompileTime) == 1)) &&
908
+ int(Dst::SizeAtCompileTime) != 1
879
909
  };
880
910
 
881
- typedef typename internal::conditional<NeedToTranspose, Transpose<Dst>, Dst>::type ActualDstTypeCleaned;
882
- typedef typename internal::conditional<NeedToTranspose, Transpose<Dst>, Dst&>::type ActualDstType;
911
+ typedef std::conditional_t<NeedToTranspose, Transpose<Dst>, Dst> ActualDstTypeCleaned;
912
+ typedef std::conditional_t<NeedToTranspose, Transpose<Dst>, Dst&> ActualDstType;
883
913
  ActualDstType actualDst(dst);
884
914
 
885
915
  // TODO check whether this is the right place to perform these checks:
886
916
  EIGEN_STATIC_ASSERT_LVALUE(Dst)
887
- EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(ActualDstTypeCleaned,Src)
888
- EIGEN_CHECK_BINARY_COMPATIBILIY(Func,typename ActualDstTypeCleaned::Scalar,typename Src::Scalar);
917
+ EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(ActualDstTypeCleaned, Src)
918
+ EIGEN_CHECK_BINARY_COMPATIBILIY(Func, typename ActualDstTypeCleaned::Scalar, typename Src::Scalar);
889
919
 
890
- Assignment<ActualDstTypeCleaned,Src,Func>::run(actualDst, src, func);
920
+ Assignment<ActualDstTypeCleaned, Src, Func>::run(actualDst, src, func);
891
921
  }
892
922
 
893
- template<typename Dst, typename Src, typename Func>
894
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
895
- void call_restricted_packet_assignment_no_alias(Dst& dst, const Src& src, const Func& func)
896
- {
897
- typedef evaluator<Dst> DstEvaluatorType;
898
- typedef evaluator<Src> SrcEvaluatorType;
899
- typedef restricted_packet_dense_assignment_kernel<DstEvaluatorType,SrcEvaluatorType,Func> Kernel;
923
+ template <typename Dst, typename Src, typename Func>
924
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_restricted_packet_assignment_no_alias(Dst& dst, const Src& src,
925
+ const Func& func) {
926
+ typedef evaluator<Dst> DstEvaluatorType;
927
+ typedef evaluator<Src> SrcEvaluatorType;
928
+ typedef restricted_packet_dense_assignment_kernel<DstEvaluatorType, SrcEvaluatorType, Func> Kernel;
900
929
 
901
- EIGEN_STATIC_ASSERT_LVALUE(Dst)
902
- EIGEN_CHECK_BINARY_COMPATIBILIY(Func,typename Dst::Scalar,typename Src::Scalar);
930
+ EIGEN_STATIC_ASSERT_LVALUE(Dst)
931
+ EIGEN_CHECK_BINARY_COMPATIBILIY(Func, typename Dst::Scalar, typename Src::Scalar);
903
932
 
904
- SrcEvaluatorType srcEvaluator(src);
905
- resize_if_allowed(dst, src, func);
933
+ SrcEvaluatorType srcEvaluator(src);
934
+ resize_if_allowed(dst, src, func);
906
935
 
907
- DstEvaluatorType dstEvaluator(dst);
908
- Kernel kernel(dstEvaluator, srcEvaluator, func, dst.const_cast_derived());
936
+ DstEvaluatorType dstEvaluator(dst);
937
+ Kernel kernel(dstEvaluator, srcEvaluator, func, dst.const_cast_derived());
909
938
 
910
- dense_assignment_loop<Kernel>::run(kernel);
939
+ dense_assignment_loop<Kernel>::run(kernel);
911
940
  }
912
941
 
913
- template<typename Dst, typename Src>
914
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
915
- void call_assignment_no_alias(Dst& dst, const Src& src)
916
- {
917
- call_assignment_no_alias(dst, src, internal::assign_op<typename Dst::Scalar,typename Src::Scalar>());
942
+ template <typename Dst, typename Src>
943
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void call_assignment_no_alias(Dst& dst, const Src& src) {
944
+ call_assignment_no_alias(dst, src, internal::assign_op<typename Dst::Scalar, typename Src::Scalar>());
918
945
  }
919
946
 
920
- template<typename Dst, typename Src, typename Func>
921
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
922
- void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src, const Func& func)
923
- {
947
+ template <typename Dst, typename Src, typename Func>
948
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src,
949
+ const Func& func) {
924
950
  // TODO check whether this is the right place to perform these checks:
925
951
  EIGEN_STATIC_ASSERT_LVALUE(Dst)
926
- EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Dst,Src)
927
- EIGEN_CHECK_BINARY_COMPATIBILIY(Func,typename Dst::Scalar,typename Src::Scalar);
952
+ EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Dst, Src)
953
+ EIGEN_CHECK_BINARY_COMPATIBILIY(Func, typename Dst::Scalar, typename Src::Scalar);
928
954
 
929
- Assignment<Dst,Src,Func>::run(dst, src, func);
955
+ Assignment<Dst, Src, Func>::run(dst, src, func);
930
956
  }
931
- template<typename Dst, typename Src>
932
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
933
- void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src)
934
- {
935
- call_assignment_no_alias_no_transpose(dst, src, internal::assign_op<typename Dst::Scalar,typename Src::Scalar>());
957
+ template <typename Dst, typename Src>
958
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src) {
959
+ call_assignment_no_alias_no_transpose(dst, src, internal::assign_op<typename Dst::Scalar, typename Src::Scalar>());
936
960
  }
937
961
 
938
962
  // forward declaration
939
- template<typename Dst, typename Src> void check_for_aliasing(const Dst &dst, const Src &src);
963
+ template <typename Dst, typename Src>
964
+ EIGEN_DEVICE_FUNC void check_for_aliasing(const Dst& dst, const Src& src);
940
965
 
941
966
  // Generic Dense to Dense assignment
942
967
  // Note that the last template argument "Weak" is needed to make it possible to perform
943
968
  // both partial specialization+SFINAE without ambiguous specialization
944
- template< typename DstXprType, typename SrcXprType, typename Functor, typename Weak>
945
- struct Assignment<DstXprType, SrcXprType, Functor, Dense2Dense, Weak>
946
- {
947
- EIGEN_DEVICE_FUNC
948
- static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const Functor &func)
949
- {
969
+ template <typename DstXprType, typename SrcXprType, typename Functor, typename Weak>
970
+ struct Assignment<DstXprType, SrcXprType, Functor, Dense2Dense, Weak> {
971
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(DstXprType& dst, const SrcXprType& src,
972
+ const Functor& func) {
950
973
  #ifndef EIGEN_NO_DEBUG
951
- internal::check_for_aliasing(dst, src);
974
+ if (!internal::is_constant_evaluated()) {
975
+ internal::check_for_aliasing(dst, src);
976
+ }
952
977
  #endif
953
978
 
954
979
  call_dense_assignment_loop(dst, src, func);
955
980
  }
956
981
  };
957
982
 
983
+ template <typename DstXprType, typename SrcPlainObject, typename Weak>
984
+ struct Assignment<DstXprType, CwiseNullaryOp<scalar_constant_op<typename DstXprType::Scalar>, SrcPlainObject>,
985
+ assign_op<typename DstXprType::Scalar, typename DstXprType::Scalar>, Dense2Dense, Weak> {
986
+ using Scalar = typename DstXprType::Scalar;
987
+ using NullaryOp = scalar_constant_op<Scalar>;
988
+ using SrcXprType = CwiseNullaryOp<NullaryOp, SrcPlainObject>;
989
+ using Functor = assign_op<Scalar, Scalar>;
990
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(DstXprType& dst, const SrcXprType& src,
991
+ const Functor& /*func*/) {
992
+ eigen_fill_impl<DstXprType>::run(dst, src);
993
+ }
994
+ };
995
+
996
+ template <typename DstXprType, typename SrcPlainObject, typename Weak>
997
+ struct Assignment<DstXprType, CwiseNullaryOp<scalar_zero_op<typename DstXprType::Scalar>, SrcPlainObject>,
998
+ assign_op<typename DstXprType::Scalar, typename DstXprType::Scalar>, Dense2Dense, Weak> {
999
+ using Scalar = typename DstXprType::Scalar;
1000
+ using NullaryOp = scalar_zero_op<Scalar>;
1001
+ using SrcXprType = CwiseNullaryOp<NullaryOp, SrcPlainObject>;
1002
+ using Functor = assign_op<Scalar, Scalar>;
1003
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(DstXprType& dst, const SrcXprType& src,
1004
+ const Functor& /*func*/) {
1005
+ eigen_zero_impl<DstXprType>::run(dst, src);
1006
+ }
1007
+ };
1008
+
958
1009
  // Generic assignment through evalTo.
959
1010
  // TODO: not sure we have to keep that one, but it helps porting current code to new evaluator mechanism.
960
1011
  // Note that the last template argument "Weak" is needed to make it possible to perform
961
1012
  // both partial specialization+SFINAE without ambiguous specialization
962
- template< typename DstXprType, typename SrcXprType, typename Functor, typename Weak>
963
- struct Assignment<DstXprType, SrcXprType, Functor, EigenBase2EigenBase, Weak>
964
- {
965
- EIGEN_DEVICE_FUNC
966
- static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar> &/*func*/)
967
- {
1013
+ template <typename DstXprType, typename SrcXprType, typename Functor, typename Weak>
1014
+ struct Assignment<DstXprType, SrcXprType, Functor, EigenBase2EigenBase, Weak> {
1015
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(
1016
+ DstXprType& dst, const SrcXprType& src,
1017
+ const internal::assign_op<typename DstXprType::Scalar, typename SrcXprType::Scalar>& /*func*/) {
968
1018
  Index dstRows = src.rows();
969
1019
  Index dstCols = src.cols();
970
- if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
971
- dst.resize(dstRows, dstCols);
1020
+ if ((dst.rows() != dstRows) || (dst.cols() != dstCols)) dst.resize(dstRows, dstCols);
972
1021
 
973
1022
  eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
974
1023
  src.evalTo(dst);
@@ -976,35 +1025,33 @@ struct Assignment<DstXprType, SrcXprType, Functor, EigenBase2EigenBase, Weak>
976
1025
 
977
1026
  // NOTE The following two functions are templated to avoid their instantiation if not needed
978
1027
  // This is needed because some expressions supports evalTo only and/or have 'void' as scalar type.
979
- template<typename SrcScalarType>
980
- EIGEN_DEVICE_FUNC
981
- static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<typename DstXprType::Scalar,SrcScalarType> &/*func*/)
982
- {
1028
+ template <typename SrcScalarType>
1029
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(
1030
+ DstXprType& dst, const SrcXprType& src,
1031
+ const internal::add_assign_op<typename DstXprType::Scalar, SrcScalarType>& /*func*/) {
983
1032
  Index dstRows = src.rows();
984
1033
  Index dstCols = src.cols();
985
- if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
986
- dst.resize(dstRows, dstCols);
1034
+ if ((dst.rows() != dstRows) || (dst.cols() != dstCols)) dst.resize(dstRows, dstCols);
987
1035
 
988
1036
  eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
989
1037
  src.addTo(dst);
990
1038
  }
991
1039
 
992
- template<typename SrcScalarType>
993
- EIGEN_DEVICE_FUNC
994
- static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<typename DstXprType::Scalar,SrcScalarType> &/*func*/)
995
- {
1040
+ template <typename SrcScalarType>
1041
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(
1042
+ DstXprType& dst, const SrcXprType& src,
1043
+ const internal::sub_assign_op<typename DstXprType::Scalar, SrcScalarType>& /*func*/) {
996
1044
  Index dstRows = src.rows();
997
1045
  Index dstCols = src.cols();
998
- if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
999
- dst.resize(dstRows, dstCols);
1046
+ if ((dst.rows() != dstRows) || (dst.cols() != dstCols)) dst.resize(dstRows, dstCols);
1000
1047
 
1001
1048
  eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
1002
1049
  src.subTo(dst);
1003
1050
  }
1004
1051
  };
1005
1052
 
1006
- } // namespace internal
1053
+ } // namespace internal
1007
1054
 
1008
- } // end namespace Eigen
1055
+ } // end namespace Eigen
1009
1056
 
1010
- #endif // EIGEN_ASSIGN_EVALUATOR_H
1057
+ #endif // EIGEN_ASSIGN_EVALUATOR_H