@smake/eigen 1.0.2 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (435) hide show
  1. package/README.md +1 -1
  2. package/eigen/Eigen/AccelerateSupport +52 -0
  3. package/eigen/Eigen/Cholesky +18 -21
  4. package/eigen/Eigen/CholmodSupport +28 -28
  5. package/eigen/Eigen/Core +235 -326
  6. package/eigen/Eigen/Eigenvalues +16 -14
  7. package/eigen/Eigen/Geometry +21 -24
  8. package/eigen/Eigen/Householder +9 -8
  9. package/eigen/Eigen/IterativeLinearSolvers +8 -4
  10. package/eigen/Eigen/Jacobi +14 -14
  11. package/eigen/Eigen/KLUSupport +43 -0
  12. package/eigen/Eigen/LU +16 -20
  13. package/eigen/Eigen/MetisSupport +12 -12
  14. package/eigen/Eigen/OrderingMethods +54 -54
  15. package/eigen/Eigen/PaStiXSupport +23 -20
  16. package/eigen/Eigen/PardisoSupport +17 -14
  17. package/eigen/Eigen/QR +18 -21
  18. package/eigen/Eigen/QtAlignedMalloc +5 -13
  19. package/eigen/Eigen/SPQRSupport +21 -14
  20. package/eigen/Eigen/SVD +23 -18
  21. package/eigen/Eigen/Sparse +1 -4
  22. package/eigen/Eigen/SparseCholesky +18 -23
  23. package/eigen/Eigen/SparseCore +18 -17
  24. package/eigen/Eigen/SparseLU +12 -8
  25. package/eigen/Eigen/SparseQR +16 -14
  26. package/eigen/Eigen/StdDeque +5 -2
  27. package/eigen/Eigen/StdList +5 -2
  28. package/eigen/Eigen/StdVector +5 -2
  29. package/eigen/Eigen/SuperLUSupport +30 -24
  30. package/eigen/Eigen/ThreadPool +80 -0
  31. package/eigen/Eigen/UmfPackSupport +19 -17
  32. package/eigen/Eigen/Version +14 -0
  33. package/eigen/Eigen/src/AccelerateSupport/AccelerateSupport.h +423 -0
  34. package/eigen/Eigen/src/AccelerateSupport/InternalHeaderCheck.h +3 -0
  35. package/eigen/Eigen/src/Cholesky/InternalHeaderCheck.h +3 -0
  36. package/eigen/Eigen/src/Cholesky/LDLT.h +377 -401
  37. package/eigen/Eigen/src/Cholesky/LLT.h +332 -360
  38. package/eigen/Eigen/src/Cholesky/LLT_LAPACKE.h +81 -56
  39. package/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +620 -521
  40. package/eigen/Eigen/src/CholmodSupport/InternalHeaderCheck.h +3 -0
  41. package/eigen/Eigen/src/Core/ArithmeticSequence.h +239 -0
  42. package/eigen/Eigen/src/Core/Array.h +341 -294
  43. package/eigen/Eigen/src/Core/ArrayBase.h +190 -203
  44. package/eigen/Eigen/src/Core/ArrayWrapper.h +127 -171
  45. package/eigen/Eigen/src/Core/Assign.h +30 -40
  46. package/eigen/Eigen/src/Core/AssignEvaluator.h +711 -589
  47. package/eigen/Eigen/src/Core/Assign_MKL.h +130 -125
  48. package/eigen/Eigen/src/Core/BandMatrix.h +268 -283
  49. package/eigen/Eigen/src/Core/Block.h +375 -398
  50. package/eigen/Eigen/src/Core/CommaInitializer.h +86 -97
  51. package/eigen/Eigen/src/Core/ConditionEstimator.h +51 -53
  52. package/eigen/Eigen/src/Core/CoreEvaluators.h +1356 -1026
  53. package/eigen/Eigen/src/Core/CoreIterators.h +73 -59
  54. package/eigen/Eigen/src/Core/CwiseBinaryOp.h +114 -132
  55. package/eigen/Eigen/src/Core/CwiseNullaryOp.h +726 -617
  56. package/eigen/Eigen/src/Core/CwiseTernaryOp.h +77 -103
  57. package/eigen/Eigen/src/Core/CwiseUnaryOp.h +56 -68
  58. package/eigen/Eigen/src/Core/CwiseUnaryView.h +132 -95
  59. package/eigen/Eigen/src/Core/DenseBase.h +632 -571
  60. package/eigen/Eigen/src/Core/DenseCoeffsBase.h +511 -624
  61. package/eigen/Eigen/src/Core/DenseStorage.h +512 -509
  62. package/eigen/Eigen/src/Core/DeviceWrapper.h +153 -0
  63. package/eigen/Eigen/src/Core/Diagonal.h +169 -210
  64. package/eigen/Eigen/src/Core/DiagonalMatrix.h +351 -274
  65. package/eigen/Eigen/src/Core/DiagonalProduct.h +12 -10
  66. package/eigen/Eigen/src/Core/Dot.h +172 -222
  67. package/eigen/Eigen/src/Core/EigenBase.h +75 -85
  68. package/eigen/Eigen/src/Core/Fill.h +138 -0
  69. package/eigen/Eigen/src/Core/FindCoeff.h +464 -0
  70. package/eigen/Eigen/src/Core/ForceAlignedAccess.h +90 -109
  71. package/eigen/Eigen/src/Core/Fuzzy.h +82 -105
  72. package/eigen/Eigen/src/Core/GeneralProduct.h +327 -263
  73. package/eigen/Eigen/src/Core/GenericPacketMath.h +1472 -360
  74. package/eigen/Eigen/src/Core/GlobalFunctions.h +194 -151
  75. package/eigen/Eigen/src/Core/IO.h +147 -139
  76. package/eigen/Eigen/src/Core/IndexedView.h +321 -0
  77. package/eigen/Eigen/src/Core/InnerProduct.h +260 -0
  78. package/eigen/Eigen/src/Core/InternalHeaderCheck.h +3 -0
  79. package/eigen/Eigen/src/Core/Inverse.h +56 -66
  80. package/eigen/Eigen/src/Core/Map.h +124 -142
  81. package/eigen/Eigen/src/Core/MapBase.h +256 -281
  82. package/eigen/Eigen/src/Core/MathFunctions.h +1620 -938
  83. package/eigen/Eigen/src/Core/MathFunctionsImpl.h +233 -71
  84. package/eigen/Eigen/src/Core/Matrix.h +491 -416
  85. package/eigen/Eigen/src/Core/MatrixBase.h +468 -453
  86. package/eigen/Eigen/src/Core/NestByValue.h +66 -85
  87. package/eigen/Eigen/src/Core/NoAlias.h +79 -85
  88. package/eigen/Eigen/src/Core/NumTraits.h +235 -148
  89. package/eigen/Eigen/src/Core/PartialReduxEvaluator.h +253 -0
  90. package/eigen/Eigen/src/Core/PermutationMatrix.h +461 -511
  91. package/eigen/Eigen/src/Core/PlainObjectBase.h +871 -894
  92. package/eigen/Eigen/src/Core/Product.h +260 -139
  93. package/eigen/Eigen/src/Core/ProductEvaluators.h +863 -714
  94. package/eigen/Eigen/src/Core/Random.h +161 -136
  95. package/eigen/Eigen/src/Core/RandomImpl.h +262 -0
  96. package/eigen/Eigen/src/Core/RealView.h +250 -0
  97. package/eigen/Eigen/src/Core/Redux.h +366 -336
  98. package/eigen/Eigen/src/Core/Ref.h +308 -209
  99. package/eigen/Eigen/src/Core/Replicate.h +94 -106
  100. package/eigen/Eigen/src/Core/Reshaped.h +398 -0
  101. package/eigen/Eigen/src/Core/ReturnByValue.h +49 -55
  102. package/eigen/Eigen/src/Core/Reverse.h +136 -145
  103. package/eigen/Eigen/src/Core/Select.h +70 -140
  104. package/eigen/Eigen/src/Core/SelfAdjointView.h +262 -285
  105. package/eigen/Eigen/src/Core/SelfCwiseBinaryOp.h +23 -20
  106. package/eigen/Eigen/src/Core/SkewSymmetricMatrix3.h +382 -0
  107. package/eigen/Eigen/src/Core/Solve.h +97 -111
  108. package/eigen/Eigen/src/Core/SolveTriangular.h +131 -129
  109. package/eigen/Eigen/src/Core/SolverBase.h +138 -101
  110. package/eigen/Eigen/src/Core/StableNorm.h +156 -160
  111. package/eigen/Eigen/src/Core/StlIterators.h +619 -0
  112. package/eigen/Eigen/src/Core/Stride.h +91 -88
  113. package/eigen/Eigen/src/Core/Swap.h +70 -38
  114. package/eigen/Eigen/src/Core/Transpose.h +295 -273
  115. package/eigen/Eigen/src/Core/Transpositions.h +272 -317
  116. package/eigen/Eigen/src/Core/TriangularMatrix.h +670 -755
  117. package/eigen/Eigen/src/Core/VectorBlock.h +59 -72
  118. package/eigen/Eigen/src/Core/VectorwiseOp.h +668 -630
  119. package/eigen/Eigen/src/Core/Visitor.h +480 -216
  120. package/eigen/Eigen/src/Core/arch/AVX/Complex.h +407 -293
  121. package/eigen/Eigen/src/Core/arch/AVX/MathFunctions.h +79 -388
  122. package/eigen/Eigen/src/Core/arch/AVX/PacketMath.h +2935 -491
  123. package/eigen/Eigen/src/Core/arch/AVX/Reductions.h +353 -0
  124. package/eigen/Eigen/src/Core/arch/AVX/TypeCasting.h +279 -22
  125. package/eigen/Eigen/src/Core/arch/AVX512/Complex.h +472 -0
  126. package/eigen/Eigen/src/Core/arch/AVX512/GemmKernel.h +1245 -0
  127. package/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h +85 -333
  128. package/eigen/Eigen/src/Core/arch/AVX512/MathFunctionsFP16.h +75 -0
  129. package/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h +2490 -649
  130. package/eigen/Eigen/src/Core/arch/AVX512/PacketMathFP16.h +1413 -0
  131. package/eigen/Eigen/src/Core/arch/AVX512/Reductions.h +297 -0
  132. package/eigen/Eigen/src/Core/arch/AVX512/TrsmKernel.h +1167 -0
  133. package/eigen/Eigen/src/Core/arch/AVX512/TrsmUnrolls.inc +1219 -0
  134. package/eigen/Eigen/src/Core/arch/AVX512/TypeCasting.h +277 -0
  135. package/eigen/Eigen/src/Core/arch/AVX512/TypeCastingFP16.h +130 -0
  136. package/eigen/Eigen/src/Core/arch/AltiVec/Complex.h +521 -298
  137. package/eigen/Eigen/src/Core/arch/AltiVec/MathFunctions.h +39 -280
  138. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProduct.h +3686 -0
  139. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h +205 -0
  140. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h +901 -0
  141. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMAbfloat16.h +742 -0
  142. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixVectorProduct.inc +2818 -0
  143. package/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h +3391 -723
  144. package/eigen/Eigen/src/Core/arch/AltiVec/TypeCasting.h +153 -0
  145. package/eigen/Eigen/src/Core/arch/Default/BFloat16.h +866 -0
  146. package/eigen/Eigen/src/Core/arch/Default/ConjHelper.h +113 -14
  147. package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +2634 -0
  148. package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +227 -0
  149. package/eigen/Eigen/src/Core/arch/Default/Half.h +1091 -0
  150. package/eigen/Eigen/src/Core/arch/Default/Settings.h +11 -13
  151. package/eigen/Eigen/src/Core/arch/GPU/Complex.h +244 -0
  152. package/eigen/Eigen/src/Core/arch/GPU/MathFunctions.h +104 -0
  153. package/eigen/Eigen/src/Core/arch/GPU/PacketMath.h +1712 -0
  154. package/eigen/Eigen/src/Core/arch/GPU/Tuple.h +268 -0
  155. package/eigen/Eigen/src/Core/arch/GPU/TypeCasting.h +77 -0
  156. package/eigen/Eigen/src/Core/arch/HIP/hcc/math_constants.h +23 -0
  157. package/eigen/Eigen/src/Core/arch/HVX/PacketMath.h +1088 -0
  158. package/eigen/Eigen/src/Core/arch/LSX/Complex.h +520 -0
  159. package/eigen/Eigen/src/Core/arch/LSX/GeneralBlockPanelKernel.h +23 -0
  160. package/eigen/Eigen/src/Core/arch/LSX/MathFunctions.h +43 -0
  161. package/eigen/Eigen/src/Core/arch/LSX/PacketMath.h +2866 -0
  162. package/eigen/Eigen/src/Core/arch/LSX/TypeCasting.h +526 -0
  163. package/eigen/Eigen/src/Core/arch/MSA/Complex.h +620 -0
  164. package/eigen/Eigen/src/Core/arch/MSA/MathFunctions.h +379 -0
  165. package/eigen/Eigen/src/Core/arch/MSA/PacketMath.h +1237 -0
  166. package/eigen/Eigen/src/Core/arch/NEON/Complex.h +531 -289
  167. package/eigen/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h +243 -0
  168. package/eigen/Eigen/src/Core/arch/NEON/MathFunctions.h +50 -73
  169. package/eigen/Eigen/src/Core/arch/NEON/PacketMath.h +5915 -579
  170. package/eigen/Eigen/src/Core/arch/NEON/TypeCasting.h +1642 -0
  171. package/eigen/Eigen/src/Core/arch/NEON/UnaryFunctors.h +57 -0
  172. package/eigen/Eigen/src/Core/arch/SSE/Complex.h +366 -334
  173. package/eigen/Eigen/src/Core/arch/SSE/MathFunctions.h +40 -514
  174. package/eigen/Eigen/src/Core/arch/SSE/PacketMath.h +2164 -675
  175. package/eigen/Eigen/src/Core/arch/SSE/Reductions.h +324 -0
  176. package/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h +188 -35
  177. package/eigen/Eigen/src/Core/arch/SVE/MathFunctions.h +48 -0
  178. package/eigen/Eigen/src/Core/arch/SVE/PacketMath.h +674 -0
  179. package/eigen/Eigen/src/Core/arch/SVE/TypeCasting.h +52 -0
  180. package/eigen/Eigen/src/Core/arch/SYCL/InteropHeaders.h +227 -0
  181. package/eigen/Eigen/src/Core/arch/SYCL/MathFunctions.h +303 -0
  182. package/eigen/Eigen/src/Core/arch/SYCL/PacketMath.h +576 -0
  183. package/eigen/Eigen/src/Core/arch/SYCL/TypeCasting.h +83 -0
  184. package/eigen/Eigen/src/Core/arch/ZVector/Complex.h +434 -261
  185. package/eigen/Eigen/src/Core/arch/ZVector/MathFunctions.h +160 -53
  186. package/eigen/Eigen/src/Core/arch/ZVector/PacketMath.h +1073 -605
  187. package/eigen/Eigen/src/Core/functors/AssignmentFunctors.h +123 -117
  188. package/eigen/Eigen/src/Core/functors/BinaryFunctors.h +594 -322
  189. package/eigen/Eigen/src/Core/functors/NullaryFunctors.h +204 -118
  190. package/eigen/Eigen/src/Core/functors/StlFunctors.h +110 -97
  191. package/eigen/Eigen/src/Core/functors/TernaryFunctors.h +34 -7
  192. package/eigen/Eigen/src/Core/functors/UnaryFunctors.h +1158 -530
  193. package/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h +2329 -1333
  194. package/eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h +328 -364
  195. package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +191 -178
  196. package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +85 -82
  197. package/eigen/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h +154 -73
  198. package/eigen/Eigen/src/Core/products/GeneralMatrixVector.h +396 -542
  199. package/eigen/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h +80 -77
  200. package/eigen/Eigen/src/Core/products/Parallelizer.h +208 -92
  201. package/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +331 -375
  202. package/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h +206 -224
  203. package/eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h +139 -146
  204. package/eigen/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h +58 -61
  205. package/eigen/Eigen/src/Core/products/SelfadjointProduct.h +71 -71
  206. package/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h +48 -46
  207. package/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h +294 -369
  208. package/eigen/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h +246 -238
  209. package/eigen/Eigen/src/Core/products/TriangularMatrixVector.h +244 -247
  210. package/eigen/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h +212 -192
  211. package/eigen/Eigen/src/Core/products/TriangularSolverMatrix.h +328 -275
  212. package/eigen/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h +108 -109
  213. package/eigen/Eigen/src/Core/products/TriangularSolverVector.h +70 -93
  214. package/eigen/Eigen/src/Core/util/Assert.h +158 -0
  215. package/eigen/Eigen/src/Core/util/BlasUtil.h +413 -290
  216. package/eigen/Eigen/src/Core/util/ConfigureVectorization.h +543 -0
  217. package/eigen/Eigen/src/Core/util/Constants.h +314 -263
  218. package/eigen/Eigen/src/Core/util/DisableStupidWarnings.h +130 -78
  219. package/eigen/Eigen/src/Core/util/EmulateArray.h +270 -0
  220. package/eigen/Eigen/src/Core/util/ForwardDeclarations.h +450 -224
  221. package/eigen/Eigen/src/Core/util/GpuHipCudaDefines.inc +101 -0
  222. package/eigen/Eigen/src/Core/util/GpuHipCudaUndefines.inc +45 -0
  223. package/eigen/Eigen/src/Core/util/IndexedViewHelper.h +487 -0
  224. package/eigen/Eigen/src/Core/util/IntegralConstant.h +279 -0
  225. package/eigen/Eigen/src/Core/util/MKL_support.h +39 -30
  226. package/eigen/Eigen/src/Core/util/Macros.h +939 -646
  227. package/eigen/Eigen/src/Core/util/MaxSizeVector.h +139 -0
  228. package/eigen/Eigen/src/Core/util/Memory.h +1042 -650
  229. package/eigen/Eigen/src/Core/util/Meta.h +618 -426
  230. package/eigen/Eigen/src/Core/util/MoreMeta.h +638 -0
  231. package/eigen/Eigen/src/Core/util/ReenableStupidWarnings.h +32 -19
  232. package/eigen/Eigen/src/Core/util/ReshapedHelper.h +51 -0
  233. package/eigen/Eigen/src/Core/util/Serializer.h +209 -0
  234. package/eigen/Eigen/src/Core/util/StaticAssert.h +51 -164
  235. package/eigen/Eigen/src/Core/util/SymbolicIndex.h +445 -0
  236. package/eigen/Eigen/src/Core/util/XprHelper.h +793 -538
  237. package/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +246 -277
  238. package/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +299 -319
  239. package/eigen/Eigen/src/Eigenvalues/ComplexSchur_LAPACKE.h +52 -48
  240. package/eigen/Eigen/src/Eigenvalues/EigenSolver.h +413 -456
  241. package/eigen/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h +309 -325
  242. package/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +157 -171
  243. package/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +292 -310
  244. package/eigen/Eigen/src/Eigenvalues/InternalHeaderCheck.h +3 -0
  245. package/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +91 -107
  246. package/eigen/Eigen/src/Eigenvalues/RealQZ.h +539 -606
  247. package/eigen/Eigen/src/Eigenvalues/RealSchur.h +348 -382
  248. package/eigen/Eigen/src/Eigenvalues/RealSchur_LAPACKE.h +41 -35
  249. package/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +579 -600
  250. package/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h +47 -44
  251. package/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +434 -461
  252. package/eigen/Eigen/src/Geometry/AlignedBox.h +307 -214
  253. package/eigen/Eigen/src/Geometry/AngleAxis.h +135 -137
  254. package/eigen/Eigen/src/Geometry/EulerAngles.h +163 -74
  255. package/eigen/Eigen/src/Geometry/Homogeneous.h +289 -333
  256. package/eigen/Eigen/src/Geometry/Hyperplane.h +152 -161
  257. package/eigen/Eigen/src/Geometry/InternalHeaderCheck.h +3 -0
  258. package/eigen/Eigen/src/Geometry/OrthoMethods.h +168 -145
  259. package/eigen/Eigen/src/Geometry/ParametrizedLine.h +141 -104
  260. package/eigen/Eigen/src/Geometry/Quaternion.h +595 -497
  261. package/eigen/Eigen/src/Geometry/Rotation2D.h +110 -108
  262. package/eigen/Eigen/src/Geometry/RotationBase.h +148 -145
  263. package/eigen/Eigen/src/Geometry/Scaling.h +115 -90
  264. package/eigen/Eigen/src/Geometry/Transform.h +896 -953
  265. package/eigen/Eigen/src/Geometry/Translation.h +100 -98
  266. package/eigen/Eigen/src/Geometry/Umeyama.h +79 -84
  267. package/eigen/Eigen/src/Geometry/arch/Geometry_SIMD.h +154 -0
  268. package/eigen/Eigen/src/Householder/BlockHouseholder.h +54 -42
  269. package/eigen/Eigen/src/Householder/Householder.h +104 -122
  270. package/eigen/Eigen/src/Householder/HouseholderSequence.h +416 -382
  271. package/eigen/Eigen/src/Householder/InternalHeaderCheck.h +3 -0
  272. package/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +153 -166
  273. package/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +127 -138
  274. package/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +95 -124
  275. package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +269 -267
  276. package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +246 -259
  277. package/eigen/Eigen/src/IterativeLinearSolvers/InternalHeaderCheck.h +3 -0
  278. package/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +218 -217
  279. package/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +80 -103
  280. package/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +59 -63
  281. package/eigen/Eigen/src/Jacobi/InternalHeaderCheck.h +3 -0
  282. package/eigen/Eigen/src/Jacobi/Jacobi.h +256 -291
  283. package/eigen/Eigen/src/KLUSupport/InternalHeaderCheck.h +3 -0
  284. package/eigen/Eigen/src/KLUSupport/KLUSupport.h +339 -0
  285. package/eigen/Eigen/src/LU/Determinant.h +60 -63
  286. package/eigen/Eigen/src/LU/FullPivLU.h +561 -626
  287. package/eigen/Eigen/src/LU/InternalHeaderCheck.h +3 -0
  288. package/eigen/Eigen/src/LU/InverseImpl.h +213 -275
  289. package/eigen/Eigen/src/LU/PartialPivLU.h +407 -435
  290. package/eigen/Eigen/src/LU/PartialPivLU_LAPACKE.h +54 -40
  291. package/eigen/Eigen/src/LU/arch/InverseSize4.h +353 -0
  292. package/eigen/Eigen/src/MetisSupport/InternalHeaderCheck.h +3 -0
  293. package/eigen/Eigen/src/MetisSupport/MetisSupport.h +81 -93
  294. package/eigen/Eigen/src/OrderingMethods/Amd.h +250 -282
  295. package/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +950 -1103
  296. package/eigen/Eigen/src/OrderingMethods/InternalHeaderCheck.h +3 -0
  297. package/eigen/Eigen/src/OrderingMethods/Ordering.h +111 -122
  298. package/eigen/Eigen/src/PaStiXSupport/InternalHeaderCheck.h +3 -0
  299. package/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +524 -570
  300. package/eigen/Eigen/src/PardisoSupport/InternalHeaderCheck.h +3 -0
  301. package/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +385 -429
  302. package/eigen/Eigen/src/QR/ColPivHouseholderQR.h +494 -473
  303. package/eigen/Eigen/src/QR/ColPivHouseholderQR_LAPACKE.h +120 -56
  304. package/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +223 -137
  305. package/eigen/Eigen/src/QR/FullPivHouseholderQR.h +517 -460
  306. package/eigen/Eigen/src/QR/HouseholderQR.h +412 -278
  307. package/eigen/Eigen/src/QR/HouseholderQR_LAPACKE.h +32 -23
  308. package/eigen/Eigen/src/QR/InternalHeaderCheck.h +3 -0
  309. package/eigen/Eigen/src/SPQRSupport/InternalHeaderCheck.h +3 -0
  310. package/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +263 -261
  311. package/eigen/Eigen/src/SVD/BDCSVD.h +872 -679
  312. package/eigen/Eigen/src/SVD/BDCSVD_LAPACKE.h +174 -0
  313. package/eigen/Eigen/src/SVD/InternalHeaderCheck.h +3 -0
  314. package/eigen/Eigen/src/SVD/JacobiSVD.h +585 -543
  315. package/eigen/Eigen/src/SVD/JacobiSVD_LAPACKE.h +85 -49
  316. package/eigen/Eigen/src/SVD/SVDBase.h +281 -160
  317. package/eigen/Eigen/src/SVD/UpperBidiagonalization.h +202 -237
  318. package/eigen/Eigen/src/SparseCholesky/InternalHeaderCheck.h +3 -0
  319. package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +769 -590
  320. package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +318 -129
  321. package/eigen/Eigen/src/SparseCore/AmbiVector.h +202 -251
  322. package/eigen/Eigen/src/SparseCore/CompressedStorage.h +184 -236
  323. package/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +140 -184
  324. package/eigen/Eigen/src/SparseCore/InternalHeaderCheck.h +3 -0
  325. package/eigen/Eigen/src/SparseCore/SparseAssign.h +174 -111
  326. package/eigen/Eigen/src/SparseCore/SparseBlock.h +408 -477
  327. package/eigen/Eigen/src/SparseCore/SparseColEtree.h +100 -112
  328. package/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +531 -280
  329. package/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +559 -347
  330. package/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +100 -108
  331. package/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +185 -191
  332. package/eigen/Eigen/src/SparseCore/SparseDiagonalProduct.h +71 -71
  333. package/eigen/Eigen/src/SparseCore/SparseDot.h +49 -47
  334. package/eigen/Eigen/src/SparseCore/SparseFuzzy.h +13 -11
  335. package/eigen/Eigen/src/SparseCore/SparseMap.h +243 -253
  336. package/eigen/Eigen/src/SparseCore/SparseMatrix.h +1614 -1142
  337. package/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +403 -357
  338. package/eigen/Eigen/src/SparseCore/SparsePermutation.h +186 -115
  339. package/eigen/Eigen/src/SparseCore/SparseProduct.h +100 -91
  340. package/eigen/Eigen/src/SparseCore/SparseRedux.h +22 -24
  341. package/eigen/Eigen/src/SparseCore/SparseRef.h +268 -295
  342. package/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +371 -414
  343. package/eigen/Eigen/src/SparseCore/SparseSolverBase.h +78 -87
  344. package/eigen/Eigen/src/SparseCore/SparseSparseProductWithPruning.h +81 -95
  345. package/eigen/Eigen/src/SparseCore/SparseTranspose.h +62 -71
  346. package/eigen/Eigen/src/SparseCore/SparseTriangularView.h +132 -144
  347. package/eigen/Eigen/src/SparseCore/SparseUtil.h +146 -115
  348. package/eigen/Eigen/src/SparseCore/SparseVector.h +426 -372
  349. package/eigen/Eigen/src/SparseCore/SparseView.h +164 -193
  350. package/eigen/Eigen/src/SparseCore/TriangularSolver.h +129 -170
  351. package/eigen/Eigen/src/SparseLU/InternalHeaderCheck.h +3 -0
  352. package/eigen/Eigen/src/SparseLU/SparseLU.h +814 -618
  353. package/eigen/Eigen/src/SparseLU/SparseLUImpl.h +61 -48
  354. package/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +102 -118
  355. package/eigen/Eigen/src/SparseLU/SparseLU_Structs.h +38 -35
  356. package/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +273 -255
  357. package/eigen/Eigen/src/SparseLU/SparseLU_Utils.h +44 -49
  358. package/eigen/Eigen/src/SparseLU/SparseLU_column_bmod.h +104 -108
  359. package/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +90 -101
  360. package/eigen/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h +57 -58
  361. package/eigen/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h +43 -55
  362. package/eigen/Eigen/src/SparseLU/SparseLU_kernel_bmod.h +74 -71
  363. package/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +125 -133
  364. package/eigen/Eigen/src/SparseLU/SparseLU_panel_dfs.h +136 -159
  365. package/eigen/Eigen/src/SparseLU/SparseLU_pivotL.h +51 -52
  366. package/eigen/Eigen/src/SparseLU/SparseLU_pruneL.h +67 -73
  367. package/eigen/Eigen/src/SparseLU/SparseLU_relax_snode.h +24 -26
  368. package/eigen/Eigen/src/SparseQR/InternalHeaderCheck.h +3 -0
  369. package/eigen/Eigen/src/SparseQR/SparseQR.h +451 -490
  370. package/eigen/Eigen/src/StlSupport/StdDeque.h +28 -105
  371. package/eigen/Eigen/src/StlSupport/StdList.h +28 -84
  372. package/eigen/Eigen/src/StlSupport/StdVector.h +28 -108
  373. package/eigen/Eigen/src/StlSupport/details.h +48 -50
  374. package/eigen/Eigen/src/SuperLUSupport/InternalHeaderCheck.h +3 -0
  375. package/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +634 -732
  376. package/eigen/Eigen/src/ThreadPool/Barrier.h +70 -0
  377. package/eigen/Eigen/src/ThreadPool/CoreThreadPoolDevice.h +336 -0
  378. package/eigen/Eigen/src/ThreadPool/EventCount.h +241 -0
  379. package/eigen/Eigen/src/ThreadPool/ForkJoin.h +140 -0
  380. package/eigen/Eigen/src/ThreadPool/InternalHeaderCheck.h +4 -0
  381. package/eigen/Eigen/src/ThreadPool/NonBlockingThreadPool.h +587 -0
  382. package/eigen/Eigen/src/ThreadPool/RunQueue.h +230 -0
  383. package/eigen/Eigen/src/ThreadPool/ThreadCancel.h +21 -0
  384. package/eigen/Eigen/src/ThreadPool/ThreadEnvironment.h +43 -0
  385. package/eigen/Eigen/src/ThreadPool/ThreadLocal.h +289 -0
  386. package/eigen/Eigen/src/ThreadPool/ThreadPoolInterface.h +50 -0
  387. package/eigen/Eigen/src/ThreadPool/ThreadYield.h +16 -0
  388. package/eigen/Eigen/src/UmfPackSupport/InternalHeaderCheck.h +3 -0
  389. package/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +480 -380
  390. package/eigen/Eigen/src/misc/Image.h +41 -43
  391. package/eigen/Eigen/src/misc/InternalHeaderCheck.h +3 -0
  392. package/eigen/Eigen/src/misc/Kernel.h +39 -41
  393. package/eigen/Eigen/src/misc/RealSvd2x2.h +19 -21
  394. package/eigen/Eigen/src/misc/blas.h +83 -426
  395. package/eigen/Eigen/src/misc/lapacke.h +9976 -16182
  396. package/eigen/Eigen/src/misc/lapacke_helpers.h +163 -0
  397. package/eigen/Eigen/src/misc/lapacke_mangling.h +4 -5
  398. package/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.inc +344 -0
  399. package/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.inc +544 -0
  400. package/eigen/Eigen/src/plugins/BlockMethods.inc +1370 -0
  401. package/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.inc +116 -0
  402. package/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.inc +167 -0
  403. package/eigen/Eigen/src/plugins/IndexedViewMethods.inc +192 -0
  404. package/eigen/Eigen/src/plugins/InternalHeaderCheck.inc +3 -0
  405. package/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.inc +331 -0
  406. package/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.inc +118 -0
  407. package/eigen/Eigen/src/plugins/ReshapedMethods.inc +133 -0
  408. package/lib/LibEigen.d.ts +4 -0
  409. package/lib/LibEigen.js +14 -0
  410. package/lib/index.d.ts +1 -1
  411. package/lib/index.js +7 -3
  412. package/package.json +2 -10
  413. package/eigen/Eigen/CMakeLists.txt +0 -19
  414. package/eigen/Eigen/src/Core/BooleanRedux.h +0 -164
  415. package/eigen/Eigen/src/Core/arch/CUDA/Complex.h +0 -103
  416. package/eigen/Eigen/src/Core/arch/CUDA/Half.h +0 -675
  417. package/eigen/Eigen/src/Core/arch/CUDA/MathFunctions.h +0 -91
  418. package/eigen/Eigen/src/Core/arch/CUDA/PacketMath.h +0 -333
  419. package/eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +0 -1124
  420. package/eigen/Eigen/src/Core/arch/CUDA/TypeCasting.h +0 -212
  421. package/eigen/Eigen/src/Core/util/NonMPL2.h +0 -3
  422. package/eigen/Eigen/src/Geometry/arch/Geometry_SSE.h +0 -161
  423. package/eigen/Eigen/src/LU/arch/Inverse_SSE.h +0 -338
  424. package/eigen/Eigen/src/SparseCore/MappedSparseMatrix.h +0 -67
  425. package/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +0 -280
  426. package/eigen/Eigen/src/misc/lapack.h +0 -152
  427. package/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +0 -332
  428. package/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +0 -552
  429. package/eigen/Eigen/src/plugins/BlockMethods.h +0 -1058
  430. package/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.h +0 -115
  431. package/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.h +0 -163
  432. package/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +0 -152
  433. package/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +0 -85
  434. package/lib/eigen.d.ts +0 -2
  435. package/lib/eigen.js +0 -15
@@ -9,155 +9,138 @@
9
9
  // Public License v. 2.0. If a copy of the MPL was not distributed
10
10
  // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
11
11
 
12
-
13
12
  #ifndef EIGEN_PRODUCTEVALUATORS_H
14
13
  #define EIGEN_PRODUCTEVALUATORS_H
15
14
 
15
+ // IWYU pragma: private
16
+ #include "./InternalHeaderCheck.h"
17
+
16
18
  namespace Eigen {
17
-
19
+
18
20
  namespace internal {
19
21
 
20
22
  /** \internal
21
- * Evaluator of a product expression.
22
- * Since products require special treatments to handle all possible cases,
23
- * we simply deffer the evaluation logic to a product_evaluator class
24
- * which offers more partial specialization possibilities.
25
- *
26
- * \sa class product_evaluator
27
- */
28
- template<typename Lhs, typename Rhs, int Options>
29
- struct evaluator<Product<Lhs, Rhs, Options> >
30
- : public product_evaluator<Product<Lhs, Rhs, Options> >
31
- {
23
+ * Evaluator of a product expression.
24
+ * Since products require special treatments to handle all possible cases,
25
+ * we simply defer the evaluation logic to a product_evaluator class
26
+ * which offers more partial specialization possibilities.
27
+ *
28
+ * \sa class product_evaluator
29
+ */
30
+ template <typename Lhs, typename Rhs, int Options>
31
+ struct evaluator<Product<Lhs, Rhs, Options>> : public product_evaluator<Product<Lhs, Rhs, Options>> {
32
32
  typedef Product<Lhs, Rhs, Options> XprType;
33
33
  typedef product_evaluator<XprType> Base;
34
-
34
+
35
35
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const XprType& xpr) : Base(xpr) {}
36
36
  };
37
-
37
+
38
38
  // Catch "scalar * ( A * B )" and transform it to "(A*scalar) * B"
39
39
  // TODO we should apply that rule only if that's really helpful
40
- template<typename Lhs, typename Rhs, typename Scalar1, typename Scalar2, typename Plain1>
41
- struct evaluator_assume_aliasing<CwiseBinaryOp<internal::scalar_product_op<Scalar1,Scalar2>,
40
+ template <typename Lhs, typename Rhs, typename Scalar1, typename Scalar2, typename Plain1>
41
+ struct evaluator_assume_aliasing<CwiseBinaryOp<internal::scalar_product_op<Scalar1, Scalar2>,
42
42
  const CwiseNullaryOp<internal::scalar_constant_op<Scalar1>, Plain1>,
43
- const Product<Lhs, Rhs, DefaultProduct> > >
44
- {
43
+ const Product<Lhs, Rhs, DefaultProduct>>> {
45
44
  static const bool value = true;
46
45
  };
47
- template<typename Lhs, typename Rhs, typename Scalar1, typename Scalar2, typename Plain1>
48
- struct evaluator<CwiseBinaryOp<internal::scalar_product_op<Scalar1,Scalar2>,
46
+ template <typename Lhs, typename Rhs, typename Scalar1, typename Scalar2, typename Plain1>
47
+ struct evaluator<CwiseBinaryOp<internal::scalar_product_op<Scalar1, Scalar2>,
49
48
  const CwiseNullaryOp<internal::scalar_constant_op<Scalar1>, Plain1>,
50
- const Product<Lhs, Rhs, DefaultProduct> > >
51
- : public evaluator<Product<EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar1,Lhs,product), Rhs, DefaultProduct> >
52
- {
53
- typedef CwiseBinaryOp<internal::scalar_product_op<Scalar1,Scalar2>,
54
- const CwiseNullaryOp<internal::scalar_constant_op<Scalar1>, Plain1>,
55
- const Product<Lhs, Rhs, DefaultProduct> > XprType;
56
- typedef evaluator<Product<EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar1,Lhs,product), Rhs, DefaultProduct> > Base;
49
+ const Product<Lhs, Rhs, DefaultProduct>>>
50
+ : public evaluator<Product<EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar1, Lhs, product), Rhs, DefaultProduct>> {
51
+ typedef CwiseBinaryOp<internal::scalar_product_op<Scalar1, Scalar2>,
52
+ const CwiseNullaryOp<internal::scalar_constant_op<Scalar1>, Plain1>,
53
+ const Product<Lhs, Rhs, DefaultProduct>>
54
+ XprType;
55
+ typedef evaluator<Product<EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar1, Lhs, product), Rhs, DefaultProduct>> Base;
57
56
 
58
57
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const XprType& xpr)
59
- : Base(xpr.lhs().functor().m_other * xpr.rhs().lhs() * xpr.rhs().rhs())
60
- {}
58
+ : Base(xpr.lhs().functor().m_other * xpr.rhs().lhs() * xpr.rhs().rhs()) {}
61
59
  };
62
60
 
63
-
64
- template<typename Lhs, typename Rhs, int DiagIndex>
65
- struct evaluator<Diagonal<const Product<Lhs, Rhs, DefaultProduct>, DiagIndex> >
66
- : public evaluator<Diagonal<const Product<Lhs, Rhs, LazyProduct>, DiagIndex> >
67
- {
61
+ template <typename Lhs, typename Rhs, int DiagIndex>
62
+ struct evaluator<Diagonal<const Product<Lhs, Rhs, DefaultProduct>, DiagIndex>>
63
+ : public evaluator<Diagonal<const Product<Lhs, Rhs, LazyProduct>, DiagIndex>> {
68
64
  typedef Diagonal<const Product<Lhs, Rhs, DefaultProduct>, DiagIndex> XprType;
69
- typedef evaluator<Diagonal<const Product<Lhs, Rhs, LazyProduct>, DiagIndex> > Base;
70
-
65
+ typedef evaluator<Diagonal<const Product<Lhs, Rhs, LazyProduct>, DiagIndex>> Base;
66
+
71
67
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const XprType& xpr)
72
- : Base(Diagonal<const Product<Lhs, Rhs, LazyProduct>, DiagIndex>(
73
- Product<Lhs, Rhs, LazyProduct>(xpr.nestedExpression().lhs(), xpr.nestedExpression().rhs()),
74
- xpr.index() ))
75
- {}
68
+ : Base(Diagonal<const Product<Lhs, Rhs, LazyProduct>, DiagIndex>(
69
+ Product<Lhs, Rhs, LazyProduct>(xpr.nestedExpression().lhs(), xpr.nestedExpression().rhs()), xpr.index())) {}
76
70
  };
77
71
 
78
-
79
72
  // Helper class to perform a matrix product with the destination at hand.
80
73
  // Depending on the sizes of the factors, there are different evaluation strategies
81
74
  // as controlled by internal::product_type.
82
- template< typename Lhs, typename Rhs,
83
- typename LhsShape = typename evaluator_traits<Lhs>::Shape,
75
+ template <typename Lhs, typename Rhs, typename LhsShape = typename evaluator_traits<Lhs>::Shape,
84
76
  typename RhsShape = typename evaluator_traits<Rhs>::Shape,
85
- int ProductType = internal::product_type<Lhs,Rhs>::value>
77
+ int ProductType = internal::product_type<Lhs, Rhs>::value>
86
78
  struct generic_product_impl;
87
79
 
88
- template<typename Lhs, typename Rhs>
89
- struct evaluator_assume_aliasing<Product<Lhs, Rhs, DefaultProduct> > {
80
+ template <typename Lhs, typename Rhs>
81
+ struct evaluator_assume_aliasing<Product<Lhs, Rhs, DefaultProduct>> {
90
82
  static const bool value = true;
91
83
  };
92
84
 
93
85
  // This is the default evaluator implementation for products:
94
86
  // It creates a temporary and call generic_product_impl
95
- template<typename Lhs, typename Rhs, int Options, int ProductTag, typename LhsShape, typename RhsShape>
87
+ template <typename Lhs, typename Rhs, int Options, int ProductTag, typename LhsShape, typename RhsShape>
96
88
  struct product_evaluator<Product<Lhs, Rhs, Options>, ProductTag, LhsShape, RhsShape>
97
- : public evaluator<typename Product<Lhs, Rhs, Options>::PlainObject>
98
- {
89
+ : public evaluator<typename Product<Lhs, Rhs, Options>::PlainObject> {
99
90
  typedef Product<Lhs, Rhs, Options> XprType;
100
91
  typedef typename XprType::PlainObject PlainObject;
101
92
  typedef evaluator<PlainObject> Base;
102
- enum {
103
- Flags = Base::Flags | EvalBeforeNestingBit
104
- };
105
-
106
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
107
- explicit product_evaluator(const XprType& xpr)
108
- : m_result(xpr.rows(), xpr.cols())
109
- {
110
- ::new (static_cast<Base*>(this)) Base(m_result);
111
-
112
- // FIXME shall we handle nested_eval here?,
113
- // if so, then we must take care at removing the call to nested_eval in the specializations (e.g., in permutation_matrix_product, transposition_matrix_product, etc.)
114
- // typedef typename internal::nested_eval<Lhs,Rhs::ColsAtCompileTime>::type LhsNested;
115
- // typedef typename internal::nested_eval<Rhs,Lhs::RowsAtCompileTime>::type RhsNested;
116
- // typedef typename internal::remove_all<LhsNested>::type LhsNestedCleaned;
117
- // typedef typename internal::remove_all<RhsNested>::type RhsNestedCleaned;
118
- //
119
- // const LhsNested lhs(xpr.lhs());
120
- // const RhsNested rhs(xpr.rhs());
121
- //
122
- // generic_product_impl<LhsNestedCleaned, RhsNestedCleaned>::evalTo(m_result, lhs, rhs);
93
+ enum { Flags = Base::Flags | EvalBeforeNestingBit };
94
+
95
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit product_evaluator(const XprType& xpr)
96
+ : m_result(xpr.rows(), xpr.cols()) {
97
+ internal::construct_at<Base>(this, m_result);
98
+
99
+ // FIXME shall we handle nested_eval here?,
100
+ // if so, then we must take care at removing the call to nested_eval in the specializations (e.g., in
101
+ // permutation_matrix_product, transposition_matrix_product, etc.)
102
+ // typedef typename internal::nested_eval<Lhs,Rhs::ColsAtCompileTime>::type LhsNested;
103
+ // typedef typename internal::nested_eval<Rhs,Lhs::RowsAtCompileTime>::type RhsNested;
104
+ // typedef internal::remove_all_t<LhsNested> LhsNestedCleaned;
105
+ // typedef internal::remove_all_t<RhsNested> RhsNestedCleaned;
106
+ //
107
+ // const LhsNested lhs(xpr.lhs());
108
+ // const RhsNested rhs(xpr.rhs());
109
+ //
110
+ // generic_product_impl<LhsNestedCleaned, RhsNestedCleaned>::evalTo(m_result, lhs, rhs);
123
111
 
124
112
  generic_product_impl<Lhs, Rhs, LhsShape, RhsShape, ProductTag>::evalTo(m_result, xpr.lhs(), xpr.rhs());
125
113
  }
126
-
127
- protected:
114
+
115
+ protected:
128
116
  PlainObject m_result;
129
117
  };
130
118
 
131
- // The following three shortcuts are enabled only if the scalar types match excatly.
119
+ // The following three shortcuts are enabled only if the scalar types match exactly.
132
120
  // TODO: we could enable them for different scalar types when the product is not vectorized.
133
121
 
134
122
  // Dense = Product
135
- template< typename DstXprType, typename Lhs, typename Rhs, int Options, typename Scalar>
136
- struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::assign_op<Scalar,Scalar>, Dense2Dense,
137
- typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct)>::type>
138
- {
139
- typedef Product<Lhs,Rhs,Options> SrcXprType;
140
- static EIGEN_STRONG_INLINE
141
- void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,Scalar> &)
142
- {
123
+ template <typename DstXprType, typename Lhs, typename Rhs, int Options, typename Scalar>
124
+ struct Assignment<DstXprType, Product<Lhs, Rhs, Options>, internal::assign_op<Scalar, Scalar>, Dense2Dense,
125
+ std::enable_if_t<(Options == DefaultProduct || Options == AliasFreeProduct)>> {
126
+ typedef Product<Lhs, Rhs, Options> SrcXprType;
127
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(DstXprType& dst, const SrcXprType& src,
128
+ const internal::assign_op<Scalar, Scalar>&) {
143
129
  Index dstRows = src.rows();
144
130
  Index dstCols = src.cols();
145
- if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
146
- dst.resize(dstRows, dstCols);
131
+ if ((dst.rows() != dstRows) || (dst.cols() != dstCols)) dst.resize(dstRows, dstCols);
147
132
  // FIXME shall we handle nested_eval here?
148
133
  generic_product_impl<Lhs, Rhs>::evalTo(dst, src.lhs(), src.rhs());
149
134
  }
150
135
  };
151
136
 
152
137
  // Dense += Product
153
- template< typename DstXprType, typename Lhs, typename Rhs, int Options, typename Scalar>
154
- struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::add_assign_op<Scalar,Scalar>, Dense2Dense,
155
- typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct)>::type>
156
- {
157
- typedef Product<Lhs,Rhs,Options> SrcXprType;
158
- static EIGEN_STRONG_INLINE
159
- void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<Scalar,Scalar> &)
160
- {
138
+ template <typename DstXprType, typename Lhs, typename Rhs, int Options, typename Scalar>
139
+ struct Assignment<DstXprType, Product<Lhs, Rhs, Options>, internal::add_assign_op<Scalar, Scalar>, Dense2Dense,
140
+ std::enable_if_t<(Options == DefaultProduct || Options == AliasFreeProduct)>> {
141
+ typedef Product<Lhs, Rhs, Options> SrcXprType;
142
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(DstXprType& dst, const SrcXprType& src,
143
+ const internal::add_assign_op<Scalar, Scalar>&) {
161
144
  eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
162
145
  // FIXME shall we handle nested_eval here?
163
146
  generic_product_impl<Lhs, Rhs>::addTo(dst, src.lhs(), src.rhs());
@@ -165,35 +148,35 @@ struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::add_assign_op<
165
148
  };
166
149
 
167
150
  // Dense -= Product
168
- template< typename DstXprType, typename Lhs, typename Rhs, int Options, typename Scalar>
169
- struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::sub_assign_op<Scalar,Scalar>, Dense2Dense,
170
- typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct)>::type>
171
- {
172
- typedef Product<Lhs,Rhs,Options> SrcXprType;
173
- static EIGEN_STRONG_INLINE
174
- void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<Scalar,Scalar> &)
175
- {
151
+ template <typename DstXprType, typename Lhs, typename Rhs, int Options, typename Scalar>
152
+ struct Assignment<DstXprType, Product<Lhs, Rhs, Options>, internal::sub_assign_op<Scalar, Scalar>, Dense2Dense,
153
+ std::enable_if_t<(Options == DefaultProduct || Options == AliasFreeProduct)>> {
154
+ typedef Product<Lhs, Rhs, Options> SrcXprType;
155
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(DstXprType& dst, const SrcXprType& src,
156
+ const internal::sub_assign_op<Scalar, Scalar>&) {
176
157
  eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
177
158
  // FIXME shall we handle nested_eval here?
178
159
  generic_product_impl<Lhs, Rhs>::subTo(dst, src.lhs(), src.rhs());
179
160
  }
180
161
  };
181
162
 
182
-
183
163
  // Dense ?= scalar * Product
184
164
  // TODO we should apply that rule if that's really helpful
185
165
  // for instance, this is not good for inner products
186
- template< typename DstXprType, typename Lhs, typename Rhs, typename AssignFunc, typename Scalar, typename ScalarBis, typename Plain>
187
- struct Assignment<DstXprType, CwiseBinaryOp<internal::scalar_product_op<ScalarBis,Scalar>, const CwiseNullaryOp<internal::scalar_constant_op<ScalarBis>,Plain>,
188
- const Product<Lhs,Rhs,DefaultProduct> >, AssignFunc, Dense2Dense>
189
- {
190
- typedef CwiseBinaryOp<internal::scalar_product_op<ScalarBis,Scalar>,
191
- const CwiseNullaryOp<internal::scalar_constant_op<ScalarBis>,Plain>,
192
- const Product<Lhs,Rhs,DefaultProduct> > SrcXprType;
193
- static EIGEN_STRONG_INLINE
194
- void run(DstXprType &dst, const SrcXprType &src, const AssignFunc& func)
195
- {
196
- call_assignment_no_alias(dst, (src.lhs().functor().m_other * src.rhs().lhs())*src.rhs().rhs(), func);
166
+ template <typename DstXprType, typename Lhs, typename Rhs, typename AssignFunc, typename Scalar, typename ScalarBis,
167
+ typename Plain>
168
+ struct Assignment<DstXprType,
169
+ CwiseBinaryOp<internal::scalar_product_op<ScalarBis, Scalar>,
170
+ const CwiseNullaryOp<internal::scalar_constant_op<ScalarBis>, Plain>,
171
+ const Product<Lhs, Rhs, DefaultProduct>>,
172
+ AssignFunc, Dense2Dense> {
173
+ typedef CwiseBinaryOp<internal::scalar_product_op<ScalarBis, Scalar>,
174
+ const CwiseNullaryOp<internal::scalar_constant_op<ScalarBis>, Plain>,
175
+ const Product<Lhs, Rhs, DefaultProduct>>
176
+ SrcXprType;
177
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(DstXprType& dst, const SrcXprType& src,
178
+ const AssignFunc& func) {
179
+ call_assignment_no_alias(dst, (src.lhs().functor().m_other * src.rhs().lhs()) * src.rhs().rhs(), func);
197
180
  }
198
181
  };
199
182
 
@@ -201,251 +184,291 @@ struct Assignment<DstXprType, CwiseBinaryOp<internal::scalar_product_op<ScalarBi
201
184
  // Catch "Dense ?= xpr + Product<>" expression to save one temporary
202
185
  // FIXME we could probably enable these rules for any product, i.e., not only Dense and DefaultProduct
203
186
 
204
- template<typename OtherXpr, typename Lhs, typename Rhs>
205
- struct evaluator_assume_aliasing<CwiseBinaryOp<internal::scalar_sum_op<typename OtherXpr::Scalar,typename Product<Lhs,Rhs,DefaultProduct>::Scalar>, const OtherXpr,
206
- const Product<Lhs,Rhs,DefaultProduct> >, DenseShape > {
187
+ template <typename OtherXpr, typename Lhs, typename Rhs>
188
+ struct evaluator_assume_aliasing<
189
+ CwiseBinaryOp<
190
+ internal::scalar_sum_op<typename OtherXpr::Scalar, typename Product<Lhs, Rhs, DefaultProduct>::Scalar>,
191
+ const OtherXpr, const Product<Lhs, Rhs, DefaultProduct>>,
192
+ DenseShape> {
207
193
  static const bool value = true;
208
194
  };
209
195
 
210
- template<typename OtherXpr, typename Lhs, typename Rhs>
211
- struct evaluator_assume_aliasing<CwiseBinaryOp<internal::scalar_difference_op<typename OtherXpr::Scalar,typename Product<Lhs,Rhs,DefaultProduct>::Scalar>, const OtherXpr,
212
- const Product<Lhs,Rhs,DefaultProduct> >, DenseShape > {
196
+ template <typename OtherXpr, typename Lhs, typename Rhs>
197
+ struct evaluator_assume_aliasing<
198
+ CwiseBinaryOp<
199
+ internal::scalar_difference_op<typename OtherXpr::Scalar, typename Product<Lhs, Rhs, DefaultProduct>::Scalar>,
200
+ const OtherXpr, const Product<Lhs, Rhs, DefaultProduct>>,
201
+ DenseShape> {
213
202
  static const bool value = true;
214
203
  };
215
204
 
216
- template<typename DstXprType, typename OtherXpr, typename ProductType, typename Func1, typename Func2>
217
- struct assignment_from_xpr_op_product
218
- {
219
- template<typename SrcXprType, typename InitialFunc>
220
- static EIGEN_STRONG_INLINE
221
- void run(DstXprType &dst, const SrcXprType &src, const InitialFunc& /*func*/)
222
- {
205
+ template <typename DstXprType, typename OtherXpr, typename ProductType, typename Func1, typename Func2>
206
+ struct assignment_from_xpr_op_product {
207
+ template <typename SrcXprType, typename InitialFunc>
208
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(DstXprType& dst, const SrcXprType& src,
209
+ const InitialFunc& /*func*/) {
223
210
  call_assignment_no_alias(dst, src.lhs(), Func1());
224
211
  call_assignment_no_alias(dst, src.rhs(), Func2());
225
212
  }
226
213
  };
227
214
 
228
- #define EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(ASSIGN_OP,BINOP,ASSIGN_OP2) \
229
- template< typename DstXprType, typename OtherXpr, typename Lhs, typename Rhs, typename DstScalar, typename SrcScalar, typename OtherScalar,typename ProdScalar> \
230
- struct Assignment<DstXprType, CwiseBinaryOp<internal::BINOP<OtherScalar,ProdScalar>, const OtherXpr, \
231
- const Product<Lhs,Rhs,DefaultProduct> >, internal::ASSIGN_OP<DstScalar,SrcScalar>, Dense2Dense> \
232
- : assignment_from_xpr_op_product<DstXprType, OtherXpr, Product<Lhs,Rhs,DefaultProduct>, internal::ASSIGN_OP<DstScalar,OtherScalar>, internal::ASSIGN_OP2<DstScalar,ProdScalar> > \
233
- {}
215
+ #define EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(ASSIGN_OP, BINOP, ASSIGN_OP2) \
216
+ template <typename DstXprType, typename OtherXpr, typename Lhs, typename Rhs, typename DstScalar, \
217
+ typename SrcScalar, typename OtherScalar, typename ProdScalar> \
218
+ struct Assignment<DstXprType, \
219
+ CwiseBinaryOp<internal::BINOP<OtherScalar, ProdScalar>, const OtherXpr, \
220
+ const Product<Lhs, Rhs, DefaultProduct>>, \
221
+ internal::ASSIGN_OP<DstScalar, SrcScalar>, Dense2Dense> \
222
+ : assignment_from_xpr_op_product<DstXprType, OtherXpr, Product<Lhs, Rhs, DefaultProduct>, \
223
+ internal::ASSIGN_OP<DstScalar, OtherScalar>, \
224
+ internal::ASSIGN_OP2<DstScalar, ProdScalar>> {}
225
+
226
+ EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(assign_op, scalar_sum_op, add_assign_op);
227
+ EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(add_assign_op, scalar_sum_op, add_assign_op);
228
+ EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(sub_assign_op, scalar_sum_op, sub_assign_op);
229
+
230
+ EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(assign_op, scalar_difference_op, sub_assign_op);
231
+ EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(add_assign_op, scalar_difference_op, sub_assign_op);
232
+ EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(sub_assign_op, scalar_difference_op, add_assign_op);
234
233
 
235
- EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(assign_op, scalar_sum_op,add_assign_op);
236
- EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(add_assign_op,scalar_sum_op,add_assign_op);
237
- EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(sub_assign_op,scalar_sum_op,sub_assign_op);
234
+ //----------------------------------------
238
235
 
239
- EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(assign_op, scalar_difference_op,sub_assign_op);
240
- EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(add_assign_op,scalar_difference_op,sub_assign_op);
241
- EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(sub_assign_op,scalar_difference_op,add_assign_op);
236
+ template <typename Lhs, typename Rhs>
237
+ struct generic_product_impl<Lhs, Rhs, DenseShape, DenseShape, InnerProduct> {
238
+ using impl = default_inner_product_impl<Lhs, Rhs, false>;
239
+ template <typename Dst>
240
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) {
241
+ dst.coeffRef(0, 0) = impl::run(lhs, rhs);
242
+ }
242
243
 
243
- //----------------------------------------
244
+ template <typename Dst>
245
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) {
246
+ dst.coeffRef(0, 0) += impl::run(lhs, rhs);
247
+ }
244
248
 
245
- template<typename Lhs, typename Rhs>
246
- struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,InnerProduct>
247
- {
248
- template<typename Dst>
249
- static EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
250
- {
251
- dst.coeffRef(0,0) = (lhs.transpose().cwiseProduct(rhs)).sum();
252
- }
253
-
254
- template<typename Dst>
255
- static EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
256
- {
257
- dst.coeffRef(0,0) += (lhs.transpose().cwiseProduct(rhs)).sum();
258
- }
259
-
260
- template<typename Dst>
261
- static EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
262
- { dst.coeffRef(0,0) -= (lhs.transpose().cwiseProduct(rhs)).sum(); }
249
+ template <typename Dst>
250
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) {
251
+ dst.coeffRef(0, 0) -= impl::run(lhs, rhs);
252
+ }
263
253
  };
264
254
 
265
-
266
255
  /***********************************************************************
267
- * Implementation of outer dense * dense vector product
268
- ***********************************************************************/
256
+ * Implementation of outer dense * dense vector product
257
+ ***********************************************************************/
269
258
 
270
259
  // Column major result
271
- template<typename Dst, typename Lhs, typename Rhs, typename Func>
272
- void outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const Func& func, const false_type&)
273
- {
260
+ template <typename Dst, typename Lhs, typename Rhs, typename Func>
261
+ void EIGEN_DEVICE_FUNC outer_product_selector_run(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Func& func,
262
+ const false_type&) {
274
263
  evaluator<Rhs> rhsEval(rhs);
275
- typename nested_eval<Lhs,Rhs::SizeAtCompileTime>::type actual_lhs(lhs);
264
+ ei_declare_local_nested_eval(Lhs, lhs, Rhs::SizeAtCompileTime, actual_lhs);
276
265
  // FIXME if cols is large enough, then it might be useful to make sure that lhs is sequentially stored
277
266
  // FIXME not very good if rhs is real and lhs complex while alpha is real too
278
267
  const Index cols = dst.cols();
279
- for (Index j=0; j<cols; ++j)
280
- func(dst.col(j), rhsEval.coeff(Index(0),j) * actual_lhs);
268
+ for (Index j = 0; j < cols; ++j) func(dst.col(j), rhsEval.coeff(Index(0), j) * actual_lhs);
281
269
  }
282
270
 
283
271
  // Row major result
284
- template<typename Dst, typename Lhs, typename Rhs, typename Func>
285
- void outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const Func& func, const true_type&)
286
- {
272
+ template <typename Dst, typename Lhs, typename Rhs, typename Func>
273
+ void EIGEN_DEVICE_FUNC outer_product_selector_run(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Func& func,
274
+ const true_type&) {
287
275
  evaluator<Lhs> lhsEval(lhs);
288
- typename nested_eval<Rhs,Lhs::SizeAtCompileTime>::type actual_rhs(rhs);
276
+ ei_declare_local_nested_eval(Rhs, rhs, Lhs::SizeAtCompileTime, actual_rhs);
289
277
  // FIXME if rows is large enough, then it might be useful to make sure that rhs is sequentially stored
290
278
  // FIXME not very good if lhs is real and rhs complex while alpha is real too
291
279
  const Index rows = dst.rows();
292
- for (Index i=0; i<rows; ++i)
293
- func(dst.row(i), lhsEval.coeff(i,Index(0)) * actual_rhs);
280
+ for (Index i = 0; i < rows; ++i) func(dst.row(i), lhsEval.coeff(i, Index(0)) * actual_rhs);
294
281
  }
295
282
 
296
- template<typename Lhs, typename Rhs>
297
- struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,OuterProduct>
298
- {
299
- template<typename T> struct is_row_major : internal::conditional<(int(T::Flags)&RowMajorBit), internal::true_type, internal::false_type>::type {};
300
- typedef typename Product<Lhs,Rhs>::Scalar Scalar;
301
-
283
+ template <typename Lhs, typename Rhs>
284
+ struct generic_product_impl<Lhs, Rhs, DenseShape, DenseShape, OuterProduct> {
285
+ template <typename T>
286
+ struct is_row_major : bool_constant<(int(T::Flags) & RowMajorBit)> {};
287
+ typedef typename Product<Lhs, Rhs>::Scalar Scalar;
288
+
302
289
  // TODO it would be nice to be able to exploit our *_assign_op functors for that purpose
303
- struct set { template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() = src; } };
304
- struct add { template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() += src; } };
305
- struct sub { template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() -= src; } };
290
+ struct set {
291
+ template <typename Dst, typename Src>
292
+ EIGEN_DEVICE_FUNC void operator()(const Dst& dst, const Src& src) const {
293
+ dst.const_cast_derived() = src;
294
+ }
295
+ };
296
+ struct add {
297
+ /** Add to dst. */
298
+ template <typename Dst, typename Src>
299
+ EIGEN_DEVICE_FUNC void operator()(const Dst& dst, const Src& src) const {
300
+ dst.const_cast_derived() += src;
301
+ }
302
+ };
303
+ struct sub {
304
+ template <typename Dst, typename Src>
305
+ EIGEN_DEVICE_FUNC void operator()(const Dst& dst, const Src& src) const {
306
+ dst.const_cast_derived() -= src;
307
+ }
308
+ };
309
+ /** Scaled add. */
306
310
  struct adds {
307
311
  Scalar m_scale;
312
+ /** Constructor */
308
313
  explicit adds(const Scalar& s) : m_scale(s) {}
309
- template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const {
314
+ /** Scaled add to dst. */
315
+ template <typename Dst, typename Src>
316
+ void EIGEN_DEVICE_FUNC operator()(const Dst& dst, const Src& src) const {
310
317
  dst.const_cast_derived() += m_scale * src;
311
318
  }
312
319
  };
313
-
314
- template<typename Dst>
315
- static EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
316
- {
320
+
321
+ template <typename Dst>
322
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) {
317
323
  internal::outer_product_selector_run(dst, lhs, rhs, set(), is_row_major<Dst>());
318
324
  }
319
-
320
- template<typename Dst>
321
- static EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
322
- {
325
+
326
+ template <typename Dst>
327
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) {
323
328
  internal::outer_product_selector_run(dst, lhs, rhs, add(), is_row_major<Dst>());
324
329
  }
325
-
326
- template<typename Dst>
327
- static EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
328
- {
330
+
331
+ template <typename Dst>
332
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) {
329
333
  internal::outer_product_selector_run(dst, lhs, rhs, sub(), is_row_major<Dst>());
330
334
  }
331
-
332
- template<typename Dst>
333
- static EIGEN_STRONG_INLINE void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
334
- {
335
+
336
+ template <typename Dst>
337
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs,
338
+ const Scalar& alpha) {
335
339
  internal::outer_product_selector_run(dst, lhs, rhs, adds(alpha), is_row_major<Dst>());
336
340
  }
337
-
338
341
  };
339
342
 
340
-
341
343
  // This base class provides default implementations for evalTo, addTo, subTo, in terms of scaleAndAddTo
342
- template<typename Lhs, typename Rhs, typename Derived>
343
- struct generic_product_impl_base
344
- {
345
- typedef typename Product<Lhs,Rhs>::Scalar Scalar;
346
-
347
- template<typename Dst>
348
- static EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
349
- { dst.setZero(); scaleAndAddTo(dst, lhs, rhs, Scalar(1)); }
350
-
351
- template<typename Dst>
352
- static EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
353
- { scaleAndAddTo(dst,lhs, rhs, Scalar(1)); }
354
-
355
- template<typename Dst>
356
- static EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
357
- { scaleAndAddTo(dst, lhs, rhs, Scalar(-1)); }
358
-
359
- template<typename Dst>
360
- static EIGEN_STRONG_INLINE void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
361
- { Derived::scaleAndAddTo(dst,lhs,rhs,alpha); }
344
+ template <typename Lhs, typename Rhs, typename Derived>
345
+ struct generic_product_impl_base {
346
+ typedef typename Product<Lhs, Rhs>::Scalar Scalar;
347
+
348
+ template <typename Dst>
349
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) {
350
+ dst.setZero();
351
+ scaleAndAddTo(dst, lhs, rhs, Scalar(1));
352
+ }
353
+
354
+ template <typename Dst>
355
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) {
356
+ scaleAndAddTo(dst, lhs, rhs, Scalar(1));
357
+ }
362
358
 
359
+ template <typename Dst>
360
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) {
361
+ scaleAndAddTo(dst, lhs, rhs, Scalar(-1));
362
+ }
363
+
364
+ template <typename Dst>
365
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs,
366
+ const Scalar& alpha) {
367
+ Derived::scaleAndAddTo(dst, lhs, rhs, alpha);
368
+ }
363
369
  };
364
370
 
365
- template<typename Lhs, typename Rhs>
366
- struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemvProduct>
367
- : generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemvProduct> >
368
- {
369
- typedef typename nested_eval<Lhs,1>::type LhsNested;
370
- typedef typename nested_eval<Rhs,1>::type RhsNested;
371
- typedef typename Product<Lhs,Rhs>::Scalar Scalar;
371
+ template <typename Lhs, typename Rhs>
372
+ struct generic_product_impl<Lhs, Rhs, DenseShape, DenseShape, GemvProduct>
373
+ : generic_product_impl_base<Lhs, Rhs, generic_product_impl<Lhs, Rhs, DenseShape, DenseShape, GemvProduct>> {
374
+ typedef typename nested_eval<Lhs, 1>::type LhsNested;
375
+ typedef typename nested_eval<Rhs, 1>::type RhsNested;
376
+ typedef typename Product<Lhs, Rhs>::Scalar Scalar;
372
377
  enum { Side = Lhs::IsVectorAtCompileTime ? OnTheLeft : OnTheRight };
373
- typedef typename internal::remove_all<typename internal::conditional<int(Side)==OnTheRight,LhsNested,RhsNested>::type>::type MatrixType;
374
-
375
- template<typename Dest>
376
- static EIGEN_STRONG_INLINE void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
377
- {
378
+ typedef internal::remove_all_t<std::conditional_t<int(Side) == OnTheRight, LhsNested, RhsNested>> MatrixType;
379
+
380
+ template <typename Dest>
381
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs,
382
+ const Scalar& alpha) {
383
+ // Fallback to inner product if both the lhs and rhs is a runtime vector.
384
+ if (lhs.rows() == 1 && rhs.cols() == 1) {
385
+ dst.coeffRef(0, 0) += alpha * lhs.row(0).conjugate().dot(rhs.col(0));
386
+ return;
387
+ }
378
388
  LhsNested actual_lhs(lhs);
379
389
  RhsNested actual_rhs(rhs);
380
- internal::gemv_dense_selector<Side,
381
- (int(MatrixType::Flags)&RowMajorBit) ? RowMajor : ColMajor,
382
- bool(internal::blas_traits<MatrixType>::HasUsableDirectAccess)
383
- >::run(actual_lhs, actual_rhs, dst, alpha);
390
+ internal::gemv_dense_selector<Side, (int(MatrixType::Flags) & RowMajorBit) ? RowMajor : ColMajor,
391
+ bool(internal::blas_traits<MatrixType>::HasUsableDirectAccess)>::run(actual_lhs,
392
+ actual_rhs, dst,
393
+ alpha);
384
394
  }
385
395
  };
386
396
 
387
- template<typename Lhs, typename Rhs>
388
- struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,CoeffBasedProductMode>
389
- {
390
- typedef typename Product<Lhs,Rhs>::Scalar Scalar;
391
-
392
- template<typename Dst>
393
- static EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
394
- {
397
+ template <typename Lhs, typename Rhs>
398
+ struct generic_product_impl<Lhs, Rhs, DenseShape, DenseShape, CoeffBasedProductMode> {
399
+ typedef typename Product<Lhs, Rhs>::Scalar Scalar;
400
+
401
+ template <typename Dst>
402
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) {
395
403
  // Same as: dst.noalias() = lhs.lazyProduct(rhs);
396
404
  // but easier on the compiler side
397
- call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::assign_op<typename Dst::Scalar,Scalar>());
405
+ call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::assign_op<typename Dst::Scalar, Scalar>());
398
406
  }
399
407
 
400
- template<typename Dst>
401
- static EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
402
- {
408
+ template <typename Dst>
409
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) {
403
410
  // dst.noalias() += lhs.lazyProduct(rhs);
404
- call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::add_assign_op<typename Dst::Scalar,Scalar>());
411
+ call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::add_assign_op<typename Dst::Scalar, Scalar>());
405
412
  }
406
-
407
- template<typename Dst>
408
- static EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
409
- {
413
+
414
+ template <typename Dst>
415
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) {
410
416
  // dst.noalias() -= lhs.lazyProduct(rhs);
411
- call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::sub_assign_op<typename Dst::Scalar,Scalar>());
412
- }
413
-
414
- // Catch "dst {,+,-}= (s*A)*B" and evaluate it lazily by moving out the scalar factor:
415
- // dst {,+,-}= s * (A.lazyProduct(B))
416
- // This is a huge benefit for heap-allocated matrix types as it save one costly allocation.
417
- // For them, this strategy is also faster than simply by-passing the heap allocation through
418
- // stack allocation.
419
- // For fixed sizes matrices, this is less obvious, it is sometimes x2 faster, but sometimes x3 slower,
420
- // and the behavior depends also a lot on the compiler... so let's be conservative and enable them for dynamic-size only,
421
- // that is when coming from generic_product_impl<...,GemmProduct> in file GeneralMatrixMatrix.h
422
- template<typename Dst, typename Scalar1, typename Scalar2, typename Plain1, typename Xpr2, typename Func>
423
- static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
424
- void eval_dynamic(Dst& dst, const CwiseBinaryOp<internal::scalar_product_op<Scalar1,Scalar2>,
425
- const CwiseNullaryOp<internal::scalar_constant_op<Scalar1>, Plain1>, Xpr2>& lhs, const Rhs& rhs, const Func &func)
426
- {
427
- call_assignment_no_alias(dst, lhs.lhs().functor().m_other * lhs.rhs().lazyProduct(rhs), func);
428
- }
429
-
430
- // Here, we we always have LhsT==Lhs, but we need to make it a template type to make the above
431
- // overload more specialized.
432
- template<typename Dst, typename LhsT, typename Func>
433
- static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
434
- void eval_dynamic(Dst& dst, const LhsT& lhs, const Rhs& rhs, const Func &func)
435
- {
436
- call_assignment_no_alias(dst, lhs.lazyProduct(rhs), func);
437
- }
438
-
439
-
440
- // template<typename Dst>
441
- // static inline void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
442
- // { dst.noalias() += alpha * lhs.lazyProduct(rhs); }
417
+ call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::sub_assign_op<typename Dst::Scalar, Scalar>());
418
+ }
419
+
420
+ // This is a special evaluation path called from generic_product_impl<...,GemmProduct> in file GeneralMatrixMatrix.h
421
+ // This variant tries to extract scalar multiples from both the LHS and RHS and factor them out. For instance:
422
+ // dst {,+,-}= (s1*A)*(B*s2)
423
+ // will be rewritten as:
424
+ // dst {,+,-}= (s1*s2) * (A.lazyProduct(B))
425
+ // There are at least four benefits of doing so:
426
+ // 1 - huge performance gain for heap-allocated matrix types as it save costly allocations.
427
+ // 2 - it is faster than simply by-passing the heap allocation through stack allocation.
428
+ // 3 - it makes this fallback consistent with the heavy GEMM routine.
429
+ // 4 - it fully by-passes huge stack allocation attempts when multiplying huge fixed-size matrices.
430
+ // (see https://stackoverflow.com/questions/54738495)
431
+ // For small fixed sizes matrices, however, the gains are less obvious, it is sometimes x2 faster, but sometimes x3
432
+ // slower, and the behavior depends also a lot on the compiler... This is why this re-writing strategy is currently
433
+ // enabled only when falling back from the main GEMM.
434
+ template <typename Dst, typename Func>
435
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void eval_dynamic(Dst& dst, const Lhs& lhs, const Rhs& rhs,
436
+ const Func& func) {
437
+ enum {
438
+ HasScalarFactor = blas_traits<Lhs>::HasScalarFactor || blas_traits<Rhs>::HasScalarFactor,
439
+ ConjLhs = blas_traits<Lhs>::NeedToConjugate,
440
+ ConjRhs = blas_traits<Rhs>::NeedToConjugate
441
+ };
442
+ // FIXME: in c++11 this should be auto, and extractScalarFactor should also return auto
443
+ // this is important for real*complex_mat
444
+ Scalar actualAlpha = combine_scalar_factors<Scalar>(lhs, rhs);
445
+
446
+ eval_dynamic_impl(dst, blas_traits<Lhs>::extract(lhs).template conjugateIf<ConjLhs>(),
447
+ blas_traits<Rhs>::extract(rhs).template conjugateIf<ConjRhs>(), func, actualAlpha,
448
+ bool_constant<HasScalarFactor>());
449
+ }
450
+
451
+ protected:
452
+ template <typename Dst, typename LhsT, typename RhsT, typename Func, typename Scalar>
453
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void eval_dynamic_impl(Dst& dst, const LhsT& lhs, const RhsT& rhs,
454
+ const Func& func, const Scalar& s /* == 1 */,
455
+ false_type) {
456
+ EIGEN_UNUSED_VARIABLE(s);
457
+ eigen_internal_assert(numext::is_exactly_one(s));
458
+ call_restricted_packet_assignment_no_alias(dst, lhs.lazyProduct(rhs), func);
459
+ }
460
+
461
+ template <typename Dst, typename LhsT, typename RhsT, typename Func, typename Scalar>
462
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void eval_dynamic_impl(Dst& dst, const LhsT& lhs, const RhsT& rhs,
463
+ const Func& func, const Scalar& s, true_type) {
464
+ call_restricted_packet_assignment_no_alias(dst, s * lhs.lazyProduct(rhs), func);
465
+ }
443
466
  };
444
467
 
445
468
  // This specialization enforces the use of a coefficient-based evaluation strategy
446
- template<typename Lhs, typename Rhs>
447
- struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,LazyCoeffBasedProductMode>
448
- : generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,CoeffBasedProductMode> {};
469
+ template <typename Lhs, typename Rhs>
470
+ struct generic_product_impl<Lhs, Rhs, DenseShape, DenseShape, LazyCoeffBasedProductMode>
471
+ : generic_product_impl<Lhs, Rhs, DenseShape, DenseShape, CoeffBasedProductMode> {};
449
472
 
450
473
  // Case 2: Evaluate coeff by coeff
451
474
  //
@@ -453,29 +476,27 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,LazyCoeffBasedProductM
453
476
  // The main difference is that we add an extra argument to the etor_product_*_impl::run() function
454
477
  // for the inner dimension of the product, because evaluator object do not know their size.
455
478
 
456
- template<int Traversal, int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar>
479
+ template <int Traversal, int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar>
457
480
  struct etor_product_coeff_impl;
458
481
 
459
- template<int StorageOrder, int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
482
+ template <int StorageOrder, int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
460
483
  struct etor_product_packet_impl;
461
484
 
462
- template<typename Lhs, typename Rhs, int ProductTag>
485
+ template <typename Lhs, typename Rhs, int ProductTag>
463
486
  struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape, DenseShape>
464
- : evaluator_base<Product<Lhs, Rhs, LazyProduct> >
465
- {
487
+ : evaluator_base<Product<Lhs, Rhs, LazyProduct>> {
466
488
  typedef Product<Lhs, Rhs, LazyProduct> XprType;
467
489
  typedef typename XprType::Scalar Scalar;
468
490
  typedef typename XprType::CoeffReturnType CoeffReturnType;
469
491
 
470
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
471
- explicit product_evaluator(const XprType& xpr)
472
- : m_lhs(xpr.lhs()),
473
- m_rhs(xpr.rhs()),
474
- m_lhsImpl(m_lhs), // FIXME the creation of the evaluator objects should result in a no-op, but check that!
475
- m_rhsImpl(m_rhs), // Moreover, they are only useful for the packet path, so we could completely disable them when not needed,
476
- // or perhaps declare them on the fly on the packet method... We have experiment to check what's best.
477
- m_innerDim(xpr.lhs().cols())
478
- {
492
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit product_evaluator(const XprType& xpr)
493
+ : m_lhs(xpr.lhs()),
494
+ m_rhs(xpr.rhs()),
495
+ m_lhsImpl(m_lhs), // FIXME the creation of the evaluator objects should result in a no-op, but check that!
496
+ m_rhsImpl(m_rhs), // Moreover, they are only useful for the packet path, so we could completely disable
497
+ // them when not needed, or perhaps declare them on the fly on the packet method... We
498
+ // have experiment to check what's best.
499
+ m_innerDim(xpr.lhs().cols()) {
479
500
  EIGEN_INTERNAL_CHECK_COST_VALUE(NumTraits<Scalar>::MulCost);
480
501
  EIGEN_INTERNAL_CHECK_COST_VALUE(NumTraits<Scalar>::AddCost);
481
502
  EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
@@ -495,11 +516,11 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
495
516
 
496
517
  // Everything below here is taken from CoeffBasedProduct.h
497
518
 
498
- typedef typename internal::nested_eval<Lhs,Rhs::ColsAtCompileTime>::type LhsNested;
499
- typedef typename internal::nested_eval<Rhs,Lhs::RowsAtCompileTime>::type RhsNested;
500
-
501
- typedef typename internal::remove_all<LhsNested>::type LhsNestedCleaned;
502
- typedef typename internal::remove_all<RhsNested>::type RhsNestedCleaned;
519
+ typedef typename internal::nested_eval<Lhs, Rhs::ColsAtCompileTime>::type LhsNested;
520
+ typedef typename internal::nested_eval<Rhs, Lhs::RowsAtCompileTime>::type RhsNested;
521
+
522
+ typedef internal::remove_all_t<LhsNested> LhsNestedCleaned;
523
+ typedef internal::remove_all_t<RhsNested> RhsNestedCleaned;
503
524
 
504
525
  typedef evaluator<LhsNestedCleaned> LhsEtorType;
505
526
  typedef evaluator<RhsNestedCleaned> RhsEtorType;
@@ -507,28 +528,29 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
507
528
  enum {
508
529
  RowsAtCompileTime = LhsNestedCleaned::RowsAtCompileTime,
509
530
  ColsAtCompileTime = RhsNestedCleaned::ColsAtCompileTime,
510
- InnerSize = EIGEN_SIZE_MIN_PREFER_FIXED(LhsNestedCleaned::ColsAtCompileTime, RhsNestedCleaned::RowsAtCompileTime),
531
+ InnerSize = min_size_prefer_fixed(LhsNestedCleaned::ColsAtCompileTime, RhsNestedCleaned::RowsAtCompileTime),
511
532
  MaxRowsAtCompileTime = LhsNestedCleaned::MaxRowsAtCompileTime,
512
533
  MaxColsAtCompileTime = RhsNestedCleaned::MaxColsAtCompileTime
513
534
  };
514
535
 
515
- typedef typename find_best_packet<Scalar,RowsAtCompileTime>::type LhsVecPacketType;
516
- typedef typename find_best_packet<Scalar,ColsAtCompileTime>::type RhsVecPacketType;
536
+ typedef typename find_best_packet<Scalar, RowsAtCompileTime>::type LhsVecPacketType;
537
+ typedef typename find_best_packet<Scalar, ColsAtCompileTime>::type RhsVecPacketType;
517
538
 
518
539
  enum {
519
-
540
+
520
541
  LhsCoeffReadCost = LhsEtorType::CoeffReadCost,
521
542
  RhsCoeffReadCost = RhsEtorType::CoeffReadCost,
522
- CoeffReadCost = InnerSize==0 ? NumTraits<Scalar>::ReadCost
523
- : InnerSize == Dynamic ? HugeCost
524
- : InnerSize * (NumTraits<Scalar>::MulCost + LhsCoeffReadCost + RhsCoeffReadCost)
525
- + (InnerSize - 1) * NumTraits<Scalar>::AddCost,
543
+ CoeffReadCost = InnerSize == 0 ? NumTraits<Scalar>::ReadCost
544
+ : InnerSize == Dynamic
545
+ ? HugeCost
546
+ : InnerSize * (NumTraits<Scalar>::MulCost + int(LhsCoeffReadCost) + int(RhsCoeffReadCost)) +
547
+ (InnerSize - 1) * NumTraits<Scalar>::AddCost,
526
548
 
527
549
  Unroll = CoeffReadCost <= EIGEN_UNROLLING_LIMIT,
528
-
550
+
529
551
  LhsFlags = LhsEtorType::Flags,
530
552
  RhsFlags = RhsEtorType::Flags,
531
-
553
+
532
554
  LhsRowMajor = LhsFlags & RowMajorBit,
533
555
  RhsRowMajor = RhsFlags & RowMajorBit,
534
556
 
@@ -536,82 +558,105 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
536
558
  RhsVecPacketSize = unpacket_traits<RhsVecPacketType>::size,
537
559
 
538
560
  // Here, we don't care about alignment larger than the usable packet size.
539
- LhsAlignment = EIGEN_PLAIN_ENUM_MIN(LhsEtorType::Alignment,LhsVecPacketSize*int(sizeof(typename LhsNestedCleaned::Scalar))),
540
- RhsAlignment = EIGEN_PLAIN_ENUM_MIN(RhsEtorType::Alignment,RhsVecPacketSize*int(sizeof(typename RhsNestedCleaned::Scalar))),
541
-
542
- SameType = is_same<typename LhsNestedCleaned::Scalar,typename RhsNestedCleaned::Scalar>::value,
543
-
544
- CanVectorizeRhs = bool(RhsRowMajor) && (RhsFlags & PacketAccessBit) && (ColsAtCompileTime!=1),
545
- CanVectorizeLhs = (!LhsRowMajor) && (LhsFlags & PacketAccessBit) && (RowsAtCompileTime!=1),
546
-
547
- EvalToRowMajor = (MaxRowsAtCompileTime==1&&MaxColsAtCompileTime!=1) ? 1
548
- : (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0
549
- : (bool(RhsRowMajor) && !CanVectorizeLhs),
550
-
551
- Flags = ((unsigned int)(LhsFlags | RhsFlags) & HereditaryBits & ~RowMajorBit)
552
- | (EvalToRowMajor ? RowMajorBit : 0)
553
- // TODO enable vectorization for mixed types
554
- | (SameType && (CanVectorizeLhs || CanVectorizeRhs) ? PacketAccessBit : 0)
555
- | (XprType::IsVectorAtCompileTime ? LinearAccessBit : 0),
556
-
557
- LhsOuterStrideBytes = int(LhsNestedCleaned::OuterStrideAtCompileTime) * int(sizeof(typename LhsNestedCleaned::Scalar)),
558
- RhsOuterStrideBytes = int(RhsNestedCleaned::OuterStrideAtCompileTime) * int(sizeof(typename RhsNestedCleaned::Scalar)),
559
-
560
- Alignment = bool(CanVectorizeLhs) ? (LhsOuterStrideBytes<=0 || (int(LhsOuterStrideBytes) % EIGEN_PLAIN_ENUM_MAX(1,LhsAlignment))!=0 ? 0 : LhsAlignment)
561
- : bool(CanVectorizeRhs) ? (RhsOuterStrideBytes<=0 || (int(RhsOuterStrideBytes) % EIGEN_PLAIN_ENUM_MAX(1,RhsAlignment))!=0 ? 0 : RhsAlignment)
562
- : 0,
561
+ LhsAlignment =
562
+ plain_enum_min(LhsEtorType::Alignment, LhsVecPacketSize* int(sizeof(typename LhsNestedCleaned::Scalar))),
563
+ RhsAlignment =
564
+ plain_enum_min(RhsEtorType::Alignment, RhsVecPacketSize* int(sizeof(typename RhsNestedCleaned::Scalar))),
565
+
566
+ SameType = is_same<typename LhsNestedCleaned::Scalar, typename RhsNestedCleaned::Scalar>::value,
567
+
568
+ CanVectorizeRhs = bool(RhsRowMajor) && (RhsFlags & PacketAccessBit) && (ColsAtCompileTime != 1),
569
+ CanVectorizeLhs = (!LhsRowMajor) && (LhsFlags & PacketAccessBit) && (RowsAtCompileTime != 1),
570
+
571
+ EvalToRowMajor = (MaxRowsAtCompileTime == 1 && MaxColsAtCompileTime != 1) ? 1
572
+ : (MaxColsAtCompileTime == 1 && MaxRowsAtCompileTime != 1)
573
+ ? 0
574
+ : (bool(RhsRowMajor) && !CanVectorizeLhs),
575
+
576
+ Flags = ((int(LhsFlags) | int(RhsFlags)) & HereditaryBits & ~RowMajorBit) |
577
+ (EvalToRowMajor ? RowMajorBit : 0)
578
+ // TODO enable vectorization for mixed types
579
+ | (SameType && (CanVectorizeLhs || CanVectorizeRhs) ? PacketAccessBit : 0) |
580
+ (XprType::IsVectorAtCompileTime ? LinearAccessBit : 0),
581
+
582
+ LhsOuterStrideBytes =
583
+ int(LhsNestedCleaned::OuterStrideAtCompileTime) * int(sizeof(typename LhsNestedCleaned::Scalar)),
584
+ RhsOuterStrideBytes =
585
+ int(RhsNestedCleaned::OuterStrideAtCompileTime) * int(sizeof(typename RhsNestedCleaned::Scalar)),
586
+
587
+ Alignment = bool(CanVectorizeLhs)
588
+ ? (LhsOuterStrideBytes <= 0 || (int(LhsOuterStrideBytes) % plain_enum_max(1, LhsAlignment)) != 0
589
+ ? 0
590
+ : LhsAlignment)
591
+ : bool(CanVectorizeRhs)
592
+ ? (RhsOuterStrideBytes <= 0 || (int(RhsOuterStrideBytes) % plain_enum_max(1, RhsAlignment)) != 0
593
+ ? 0
594
+ : RhsAlignment)
595
+ : 0,
563
596
 
564
597
  /* CanVectorizeInner deserves special explanation. It does not affect the product flags. It is not used outside
565
598
  * of Product. If the Product itself is not a packet-access expression, there is still a chance that the inner
566
599
  * loop of the product might be vectorized. This is the meaning of CanVectorizeInner. Since it doesn't affect
567
600
  * the Flags, it is safe to make this value depend on ActualPacketAccessBit, that doesn't affect the ABI.
568
601
  */
569
- CanVectorizeInner = SameType
570
- && LhsRowMajor
571
- && (!RhsRowMajor)
572
- && (LhsFlags & RhsFlags & ActualPacketAccessBit)
573
- && (InnerSize % packet_traits<Scalar>::size == 0)
602
+ CanVectorizeInner = SameType && LhsRowMajor && (!RhsRowMajor) &&
603
+ (int(LhsFlags) & int(RhsFlags) & ActualPacketAccessBit) &&
604
+ (int(InnerSize) % packet_traits<Scalar>::size == 0)
574
605
  };
575
-
576
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CoeffReturnType coeff(Index row, Index col) const
577
- {
578
- return (m_lhs.row(row).transpose().cwiseProduct( m_rhs.col(col) )).sum();
606
+
607
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CoeffReturnType coeff(Index row, Index col) const {
608
+ return (m_lhs.row(row).transpose().cwiseProduct(m_rhs.col(col))).sum();
579
609
  }
580
610
 
581
611
  /* Allow index-based non-packet access. It is impossible though to allow index-based packed access,
582
612
  * which is why we don't set the LinearAccessBit.
583
613
  * TODO: this seems possible when the result is a vector
584
614
  */
585
- EIGEN_DEVICE_FUNC const CoeffReturnType coeff(Index index) const
586
- {
587
- const Index row = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime==1) ? 0 : index;
588
- const Index col = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime==1) ? index : 0;
589
- return (m_lhs.row(row).transpose().cwiseProduct( m_rhs.col(col) )).sum();
615
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CoeffReturnType coeff(Index index) const {
616
+ const Index row = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime == 1) ? 0 : index;
617
+ const Index col = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime == 1) ? index : 0;
618
+ return (m_lhs.row(row).transpose().cwiseProduct(m_rhs.col(col))).sum();
590
619
  }
591
620
 
592
- template<int LoadMode, typename PacketType>
593
- const PacketType packet(Index row, Index col) const
594
- {
621
+ template <int LoadMode, typename PacketType>
622
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const PacketType packet(Index row, Index col) const {
595
623
  PacketType res;
596
- typedef etor_product_packet_impl<bool(int(Flags)&RowMajorBit) ? RowMajor : ColMajor,
597
- Unroll ? int(InnerSize) : Dynamic,
598
- LhsEtorType, RhsEtorType, PacketType, LoadMode> PacketImpl;
624
+ typedef etor_product_packet_impl<bool(int(Flags) & RowMajorBit) ? RowMajor : ColMajor,
625
+ Unroll ? int(InnerSize) : Dynamic, LhsEtorType, RhsEtorType, PacketType, LoadMode>
626
+ PacketImpl;
599
627
  PacketImpl::run(row, col, m_lhsImpl, m_rhsImpl, m_innerDim, res);
600
628
  return res;
601
629
  }
602
630
 
603
- template<int LoadMode, typename PacketType>
604
- const PacketType packet(Index index) const
605
- {
606
- const Index row = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime==1) ? 0 : index;
607
- const Index col = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime==1) ? index : 0;
608
- return packet<LoadMode,PacketType>(row,col);
631
+ template <int LoadMode, typename PacketType>
632
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const PacketType packet(Index index) const {
633
+ const Index row = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime == 1) ? 0 : index;
634
+ const Index col = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime == 1) ? index : 0;
635
+ return packet<LoadMode, PacketType>(row, col);
609
636
  }
610
637
 
611
- protected:
612
- typename internal::add_const_on_value_type<LhsNested>::type m_lhs;
613
- typename internal::add_const_on_value_type<RhsNested>::type m_rhs;
614
-
638
+ template <int LoadMode, typename PacketType>
639
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const PacketType packetSegment(Index row, Index col, Index begin,
640
+ Index count) const {
641
+ PacketType res;
642
+ typedef etor_product_packet_impl<bool(int(Flags) & RowMajorBit) ? RowMajor : ColMajor,
643
+ Unroll ? int(InnerSize) : Dynamic, LhsEtorType, RhsEtorType, PacketType, LoadMode>
644
+ PacketImpl;
645
+ PacketImpl::run_segment(row, col, m_lhsImpl, m_rhsImpl, m_innerDim, res, begin, count);
646
+ return res;
647
+ }
648
+
649
+ template <int LoadMode, typename PacketType>
650
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const PacketType packetSegment(Index index, Index begin, Index count) const {
651
+ const Index row = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime == 1) ? 0 : index;
652
+ const Index col = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime == 1) ? index : 0;
653
+ return packetSegment<LoadMode, PacketType>(row, col, begin, count);
654
+ }
655
+
656
+ protected:
657
+ add_const_on_value_type_t<LhsNested> m_lhs;
658
+ add_const_on_value_type_t<RhsNested> m_rhs;
659
+
615
660
  LhsEtorType m_lhsImpl;
616
661
  RhsEtorType m_rhsImpl;
617
662
 
@@ -619,520 +664,624 @@ protected:
619
664
  Index m_innerDim;
620
665
  };
621
666
 
622
- template<typename Lhs, typename Rhs>
667
+ template <typename Lhs, typename Rhs>
623
668
  struct product_evaluator<Product<Lhs, Rhs, DefaultProduct>, LazyCoeffBasedProductMode, DenseShape, DenseShape>
624
- : product_evaluator<Product<Lhs, Rhs, LazyProduct>, CoeffBasedProductMode, DenseShape, DenseShape>
625
- {
669
+ : product_evaluator<Product<Lhs, Rhs, LazyProduct>, CoeffBasedProductMode, DenseShape, DenseShape> {
626
670
  typedef Product<Lhs, Rhs, DefaultProduct> XprType;
627
671
  typedef Product<Lhs, Rhs, LazyProduct> BaseProduct;
628
672
  typedef product_evaluator<BaseProduct, CoeffBasedProductMode, DenseShape, DenseShape> Base;
629
- enum {
630
- Flags = Base::Flags | EvalBeforeNestingBit
631
- };
632
- EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr)
633
- : Base(BaseProduct(xpr.lhs(),xpr.rhs()))
634
- {}
673
+ enum { Flags = Base::Flags | EvalBeforeNestingBit };
674
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit product_evaluator(const XprType& xpr)
675
+ : Base(BaseProduct(xpr.lhs(), xpr.rhs())) {}
635
676
  };
636
677
 
637
678
  /****************************************
638
679
  *** Coeff based product, Packet path ***
639
680
  ****************************************/
640
681
 
641
- template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
642
- struct etor_product_packet_impl<RowMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
643
- {
644
- static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res)
645
- {
646
- etor_product_packet_impl<RowMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, innerDim, res);
647
- res = pmadd(pset1<Packet>(lhs.coeff(row, Index(UnrollingIndex-1))), rhs.template packet<LoadMode,Packet>(Index(UnrollingIndex-1), col), res);
682
+ template <int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
683
+ struct etor_product_packet_impl<RowMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode> {
684
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs,
685
+ Index innerDim, Packet& res) {
686
+ etor_product_packet_impl<RowMajor, UnrollingIndex - 1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs,
687
+ innerDim, res);
688
+ res = pmadd(pset1<Packet>(lhs.coeff(row, Index(UnrollingIndex - 1))),
689
+ rhs.template packet<LoadMode, Packet>(Index(UnrollingIndex - 1), col), res);
690
+ }
691
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run_segment(Index row, Index col, const Lhs& lhs, const Rhs& rhs,
692
+ Index innerDim, Packet& res, Index begin, Index count) {
693
+ etor_product_packet_impl<RowMajor, UnrollingIndex - 1, Lhs, Rhs, Packet, LoadMode>::run_segment(
694
+ row, col, lhs, rhs, innerDim, res, begin, count);
695
+ res = pmadd(pset1<Packet>(lhs.coeff(row, Index(UnrollingIndex - 1))),
696
+ rhs.template packetSegment<LoadMode, Packet>(Index(UnrollingIndex - 1), col, begin, count), res);
648
697
  }
649
698
  };
650
699
 
651
- template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
652
- struct etor_product_packet_impl<ColMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
653
- {
654
- static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res)
655
- {
656
- etor_product_packet_impl<ColMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, innerDim, res);
657
- res = pmadd(lhs.template packet<LoadMode,Packet>(row, Index(UnrollingIndex-1)), pset1<Packet>(rhs.coeff(Index(UnrollingIndex-1), col)), res);
700
+ template <int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
701
+ struct etor_product_packet_impl<ColMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode> {
702
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs,
703
+ Index innerDim, Packet& res) {
704
+ etor_product_packet_impl<ColMajor, UnrollingIndex - 1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs,
705
+ innerDim, res);
706
+ res = pmadd(lhs.template packet<LoadMode, Packet>(row, Index(UnrollingIndex - 1)),
707
+ pset1<Packet>(rhs.coeff(Index(UnrollingIndex - 1), col)), res);
708
+ }
709
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run_segment(Index row, Index col, const Lhs& lhs, const Rhs& rhs,
710
+ Index innerDim, Packet& res, Index begin, Index count) {
711
+ etor_product_packet_impl<ColMajor, UnrollingIndex - 1, Lhs, Rhs, Packet, LoadMode>::run_segment(
712
+ row, col, lhs, rhs, innerDim, res, begin, count);
713
+ res = pmadd(lhs.template packetSegment<LoadMode, Packet>(row, Index(UnrollingIndex - 1), begin, count),
714
+ pset1<Packet>(rhs.coeff(Index(UnrollingIndex - 1), col)), res);
658
715
  }
659
716
  };
660
717
 
661
- template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
662
- struct etor_product_packet_impl<RowMajor, 1, Lhs, Rhs, Packet, LoadMode>
663
- {
664
- static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res)
665
- {
666
- res = pmul(pset1<Packet>(lhs.coeff(row, Index(0))),rhs.template packet<LoadMode,Packet>(Index(0), col));
718
+ template <typename Lhs, typename Rhs, typename Packet, int LoadMode>
719
+ struct etor_product_packet_impl<RowMajor, 1, Lhs, Rhs, Packet, LoadMode> {
720
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs,
721
+ Index /*innerDim*/, Packet& res) {
722
+ res = pmul(pset1<Packet>(lhs.coeff(row, Index(0))), rhs.template packet<LoadMode, Packet>(Index(0), col));
723
+ }
724
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run_segment(Index row, Index col, const Lhs& lhs, const Rhs& rhs,
725
+ Index /*innerDim*/, Packet& res, Index begin,
726
+ Index count) {
727
+ res = pmul(pset1<Packet>(lhs.coeff(row, Index(0))),
728
+ rhs.template packetSegment<LoadMode, Packet>(Index(0), col, begin, count));
667
729
  }
668
730
  };
669
731
 
670
- template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
671
- struct etor_product_packet_impl<ColMajor, 1, Lhs, Rhs, Packet, LoadMode>
672
- {
673
- static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res)
674
- {
675
- res = pmul(lhs.template packet<LoadMode,Packet>(row, Index(0)), pset1<Packet>(rhs.coeff(Index(0), col)));
732
+ template <typename Lhs, typename Rhs, typename Packet, int LoadMode>
733
+ struct etor_product_packet_impl<ColMajor, 1, Lhs, Rhs, Packet, LoadMode> {
734
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs,
735
+ Index /*innerDim*/, Packet& res) {
736
+ res = pmul(lhs.template packet<LoadMode, Packet>(row, Index(0)), pset1<Packet>(rhs.coeff(Index(0), col)));
737
+ }
738
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run_segment(Index row, Index col, const Lhs& lhs, const Rhs& rhs,
739
+ Index /*innerDim*/, Packet& res, Index begin,
740
+ Index count) {
741
+ res = pmul(lhs.template packetSegment<LoadMode, Packet>(row, Index(0), begin, count),
742
+ pset1<Packet>(rhs.coeff(Index(0), col)));
676
743
  }
677
744
  };
678
745
 
679
- template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
680
- struct etor_product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode>
681
- {
682
- static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, Index /*innerDim*/, Packet &res)
683
- {
746
+ template <typename Lhs, typename Rhs, typename Packet, int LoadMode>
747
+ struct etor_product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode> {
748
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/,
749
+ const Rhs& /*rhs*/, Index /*innerDim*/, Packet& res) {
750
+ res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
751
+ }
752
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run_segment(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/,
753
+ const Rhs& /*rhs*/, Index /*innerDim*/, Packet& res,
754
+ Index /*begin*/, Index /*count*/) {
684
755
  res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
685
756
  }
686
757
  };
687
758
 
688
- template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
689
- struct etor_product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode>
690
- {
691
- static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, Index /*innerDim*/, Packet &res)
692
- {
759
+ template <typename Lhs, typename Rhs, typename Packet, int LoadMode>
760
+ struct etor_product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode> {
761
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/,
762
+ const Rhs& /*rhs*/, Index /*innerDim*/, Packet& res) {
763
+ res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
764
+ }
765
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run_segment(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/,
766
+ const Rhs& /*rhs*/, Index /*innerDim*/, Packet& res,
767
+ Index /*begin*/, Index /*count*/) {
693
768
  res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
694
769
  }
695
770
  };
696
771
 
697
- template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
698
- struct etor_product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
699
- {
700
- static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res)
701
- {
772
+ template <typename Lhs, typename Rhs, typename Packet, int LoadMode>
773
+ struct etor_product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, Packet, LoadMode> {
774
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs,
775
+ Index innerDim, Packet& res) {
702
776
  res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
703
- for(Index i = 0; i < innerDim; ++i)
704
- res = pmadd(pset1<Packet>(lhs.coeff(row, i)), rhs.template packet<LoadMode,Packet>(i, col), res);
777
+ for (Index i = 0; i < innerDim; ++i)
778
+ res = pmadd(pset1<Packet>(lhs.coeff(row, i)), rhs.template packet<LoadMode, Packet>(i, col), res);
779
+ }
780
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run_segment(Index row, Index col, const Lhs& lhs, const Rhs& rhs,
781
+ Index innerDim, Packet& res, Index begin, Index count) {
782
+ res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
783
+ for (Index i = 0; i < innerDim; ++i)
784
+ res = pmadd(pset1<Packet>(lhs.coeff(row, i)), rhs.template packetSegment<LoadMode, Packet>(i, col, begin, count),
785
+ res);
705
786
  }
706
787
  };
707
788
 
708
- template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
709
- struct etor_product_packet_impl<ColMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
710
- {
711
- static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res)
712
- {
789
+ template <typename Lhs, typename Rhs, typename Packet, int LoadMode>
790
+ struct etor_product_packet_impl<ColMajor, Dynamic, Lhs, Rhs, Packet, LoadMode> {
791
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs,
792
+ Index innerDim, Packet& res) {
793
+ res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
794
+ for (Index i = 0; i < innerDim; ++i)
795
+ res = pmadd(lhs.template packet<LoadMode, Packet>(row, i), pset1<Packet>(rhs.coeff(i, col)), res);
796
+ }
797
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run_segment(Index row, Index col, const Lhs& lhs, const Rhs& rhs,
798
+ Index innerDim, Packet& res, Index begin, Index count) {
713
799
  res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
714
- for(Index i = 0; i < innerDim; ++i)
715
- res = pmadd(lhs.template packet<LoadMode,Packet>(row, i), pset1<Packet>(rhs.coeff(i, col)), res);
800
+ for (Index i = 0; i < innerDim; ++i)
801
+ res = pmadd(lhs.template packetSegment<LoadMode, Packet>(row, i, begin, count), pset1<Packet>(rhs.coeff(i, col)),
802
+ res);
716
803
  }
717
804
  };
718
805
 
719
-
720
806
  /***************************************************************************
721
- * Triangular products
722
- ***************************************************************************/
723
- template<int Mode, bool LhsIsTriangular,
724
- typename Lhs, bool LhsIsVector,
725
- typename Rhs, bool RhsIsVector>
807
+ * Triangular products
808
+ ***************************************************************************/
809
+ template <int Mode, bool LhsIsTriangular, typename Lhs, bool LhsIsVector, typename Rhs, bool RhsIsVector>
726
810
  struct triangular_product_impl;
727
811
 
728
- template<typename Lhs, typename Rhs, int ProductTag>
729
- struct generic_product_impl<Lhs,Rhs,TriangularShape,DenseShape,ProductTag>
730
- : generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,TriangularShape,DenseShape,ProductTag> >
731
- {
732
- typedef typename Product<Lhs,Rhs>::Scalar Scalar;
733
-
734
- template<typename Dest>
735
- static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
736
- {
737
- triangular_product_impl<Lhs::Mode,true,typename Lhs::MatrixType,false,Rhs, Rhs::ColsAtCompileTime==1>
738
- ::run(dst, lhs.nestedExpression(), rhs, alpha);
812
+ template <typename Lhs, typename Rhs, int ProductTag>
813
+ struct generic_product_impl<Lhs, Rhs, TriangularShape, DenseShape, ProductTag>
814
+ : generic_product_impl_base<Lhs, Rhs, generic_product_impl<Lhs, Rhs, TriangularShape, DenseShape, ProductTag>> {
815
+ typedef typename Product<Lhs, Rhs>::Scalar Scalar;
816
+
817
+ template <typename Dest>
818
+ static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) {
819
+ triangular_product_impl<Lhs::Mode, true, typename Lhs::MatrixType, false, Rhs, Rhs::ColsAtCompileTime == 1>::run(
820
+ dst, lhs.nestedExpression(), rhs, alpha);
739
821
  }
740
822
  };
741
823
 
742
- template<typename Lhs, typename Rhs, int ProductTag>
743
- struct generic_product_impl<Lhs,Rhs,DenseShape,TriangularShape,ProductTag>
744
- : generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,DenseShape,TriangularShape,ProductTag> >
745
- {
746
- typedef typename Product<Lhs,Rhs>::Scalar Scalar;
747
-
748
- template<typename Dest>
749
- static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
750
- {
751
- triangular_product_impl<Rhs::Mode,false,Lhs,Lhs::RowsAtCompileTime==1, typename Rhs::MatrixType, false>::run(dst, lhs, rhs.nestedExpression(), alpha);
824
+ template <typename Lhs, typename Rhs, int ProductTag>
825
+ struct generic_product_impl<Lhs, Rhs, DenseShape, TriangularShape, ProductTag>
826
+ : generic_product_impl_base<Lhs, Rhs, generic_product_impl<Lhs, Rhs, DenseShape, TriangularShape, ProductTag>> {
827
+ typedef typename Product<Lhs, Rhs>::Scalar Scalar;
828
+
829
+ template <typename Dest>
830
+ static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) {
831
+ triangular_product_impl<Rhs::Mode, false, Lhs, Lhs::RowsAtCompileTime == 1, typename Rhs::MatrixType, false>::run(
832
+ dst, lhs, rhs.nestedExpression(), alpha);
752
833
  }
753
834
  };
754
835
 
755
-
756
836
  /***************************************************************************
757
- * SelfAdjoint products
758
- ***************************************************************************/
759
- template <typename Lhs, int LhsMode, bool LhsIsVector,
760
- typename Rhs, int RhsMode, bool RhsIsVector>
837
+ * SelfAdjoint products
838
+ ***************************************************************************/
839
+ template <typename Lhs, int LhsMode, bool LhsIsVector, typename Rhs, int RhsMode, bool RhsIsVector>
761
840
  struct selfadjoint_product_impl;
762
841
 
763
- template<typename Lhs, typename Rhs, int ProductTag>
764
- struct generic_product_impl<Lhs,Rhs,SelfAdjointShape,DenseShape,ProductTag>
765
- : generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,SelfAdjointShape,DenseShape,ProductTag> >
766
- {
767
- typedef typename Product<Lhs,Rhs>::Scalar Scalar;
768
-
769
- template<typename Dest>
770
- static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
771
- {
772
- selfadjoint_product_impl<typename Lhs::MatrixType,Lhs::Mode,false,Rhs,0,Rhs::IsVectorAtCompileTime>::run(dst, lhs.nestedExpression(), rhs, alpha);
842
+ template <typename Lhs, typename Rhs, int ProductTag>
843
+ struct generic_product_impl<Lhs, Rhs, SelfAdjointShape, DenseShape, ProductTag>
844
+ : generic_product_impl_base<Lhs, Rhs, generic_product_impl<Lhs, Rhs, SelfAdjointShape, DenseShape, ProductTag>> {
845
+ typedef typename Product<Lhs, Rhs>::Scalar Scalar;
846
+
847
+ template <typename Dest>
848
+ static EIGEN_DEVICE_FUNC void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) {
849
+ selfadjoint_product_impl<typename Lhs::MatrixType, Lhs::Mode, false, Rhs, 0, Rhs::ColsAtCompileTime == 1>::run(
850
+ dst, lhs.nestedExpression(), rhs, alpha);
773
851
  }
774
852
  };
775
853
 
776
- template<typename Lhs, typename Rhs, int ProductTag>
777
- struct generic_product_impl<Lhs,Rhs,DenseShape,SelfAdjointShape,ProductTag>
778
- : generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,DenseShape,SelfAdjointShape,ProductTag> >
779
- {
780
- typedef typename Product<Lhs,Rhs>::Scalar Scalar;
781
-
782
- template<typename Dest>
783
- static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
784
- {
785
- selfadjoint_product_impl<Lhs,0,Lhs::IsVectorAtCompileTime,typename Rhs::MatrixType,Rhs::Mode,false>::run(dst, lhs, rhs.nestedExpression(), alpha);
854
+ template <typename Lhs, typename Rhs, int ProductTag>
855
+ struct generic_product_impl<Lhs, Rhs, DenseShape, SelfAdjointShape, ProductTag>
856
+ : generic_product_impl_base<Lhs, Rhs, generic_product_impl<Lhs, Rhs, DenseShape, SelfAdjointShape, ProductTag>> {
857
+ typedef typename Product<Lhs, Rhs>::Scalar Scalar;
858
+
859
+ template <typename Dest>
860
+ static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) {
861
+ selfadjoint_product_impl<Lhs, 0, Lhs::RowsAtCompileTime == 1, typename Rhs::MatrixType, Rhs::Mode, false>::run(
862
+ dst, lhs, rhs.nestedExpression(), alpha);
786
863
  }
787
864
  };
788
865
 
789
-
790
866
  /***************************************************************************
791
- * Diagonal products
792
- ***************************************************************************/
793
-
794
- template<typename MatrixType, typename DiagonalType, typename Derived, int ProductOrder>
795
- struct diagonal_product_evaluator_base
796
- : evaluator_base<Derived>
797
- {
798
- typedef typename ScalarBinaryOpTraits<typename MatrixType::Scalar, typename DiagonalType::Scalar>::ReturnType Scalar;
799
- public:
867
+ * Diagonal products
868
+ ***************************************************************************/
869
+
870
+ template <typename MatrixType, typename DiagonalType, typename Derived, int ProductOrder>
871
+ struct diagonal_product_evaluator_base : evaluator_base<Derived> {
872
+ typedef typename ScalarBinaryOpTraits<typename MatrixType::Scalar, typename DiagonalType::Scalar>::ReturnType Scalar;
873
+
874
+ public:
800
875
  enum {
801
- CoeffReadCost = NumTraits<Scalar>::MulCost + evaluator<MatrixType>::CoeffReadCost + evaluator<DiagonalType>::CoeffReadCost,
802
-
876
+ CoeffReadCost = int(NumTraits<Scalar>::MulCost) + int(evaluator<MatrixType>::CoeffReadCost) +
877
+ int(evaluator<DiagonalType>::CoeffReadCost),
878
+
803
879
  MatrixFlags = evaluator<MatrixType>::Flags,
804
880
  DiagFlags = evaluator<DiagonalType>::Flags,
805
- _StorageOrder = MatrixFlags & RowMajorBit ? RowMajor : ColMajor,
806
- _ScalarAccessOnDiag = !((int(_StorageOrder) == ColMajor && int(ProductOrder) == OnTheLeft)
807
- ||(int(_StorageOrder) == RowMajor && int(ProductOrder) == OnTheRight)),
808
- _SameTypes = is_same<typename MatrixType::Scalar, typename DiagonalType::Scalar>::value,
881
+
882
+ StorageOrder_ = (Derived::MaxRowsAtCompileTime == 1 && Derived::MaxColsAtCompileTime != 1) ? RowMajor
883
+ : (Derived::MaxColsAtCompileTime == 1 && Derived::MaxRowsAtCompileTime != 1) ? ColMajor
884
+ : MatrixFlags & RowMajorBit ? RowMajor
885
+ : ColMajor,
886
+ SameStorageOrder_ = int(StorageOrder_) == ((MatrixFlags & RowMajorBit) ? RowMajor : ColMajor),
887
+
888
+ ScalarAccessOnDiag_ = !((int(StorageOrder_) == ColMajor && int(ProductOrder) == OnTheLeft) ||
889
+ (int(StorageOrder_) == RowMajor && int(ProductOrder) == OnTheRight)),
890
+ SameTypes_ = is_same<typename MatrixType::Scalar, typename DiagonalType::Scalar>::value,
809
891
  // FIXME currently we need same types, but in the future the next rule should be the one
810
- //_Vectorizable = bool(int(MatrixFlags)&PacketAccessBit) && ((!_PacketOnDiag) || (_SameTypes && bool(int(DiagFlags)&PacketAccessBit))),
811
- _Vectorizable = bool(int(MatrixFlags)&PacketAccessBit) && _SameTypes && (_ScalarAccessOnDiag || (bool(int(DiagFlags)&PacketAccessBit))),
812
- _LinearAccessMask = (MatrixType::RowsAtCompileTime==1 || MatrixType::ColsAtCompileTime==1) ? LinearAccessBit : 0,
813
- Flags = ((HereditaryBits|_LinearAccessMask) & (unsigned int)(MatrixFlags)) | (_Vectorizable ? PacketAccessBit : 0),
892
+ // Vectorizable_ = bool(int(MatrixFlags)&PacketAccessBit) && ((!_PacketOnDiag) || (SameTypes_ &&
893
+ // bool(int(DiagFlags)&PacketAccessBit))),
894
+ Vectorizable_ = bool(int(MatrixFlags) & PacketAccessBit) && SameTypes_ &&
895
+ (SameStorageOrder_ || (MatrixFlags & LinearAccessBit) == LinearAccessBit) &&
896
+ (ScalarAccessOnDiag_ || (bool(int(DiagFlags) & PacketAccessBit))),
897
+ LinearAccessMask_ =
898
+ (MatrixType::RowsAtCompileTime == 1 || MatrixType::ColsAtCompileTime == 1) ? LinearAccessBit : 0,
899
+ Flags =
900
+ ((HereditaryBits | LinearAccessMask_) & (unsigned int)(MatrixFlags)) | (Vectorizable_ ? PacketAccessBit : 0),
814
901
  Alignment = evaluator<MatrixType>::Alignment,
815
902
 
816
- AsScalarProduct = (DiagonalType::SizeAtCompileTime==1)
817
- || (DiagonalType::SizeAtCompileTime==Dynamic && MatrixType::RowsAtCompileTime==1 && ProductOrder==OnTheLeft)
818
- || (DiagonalType::SizeAtCompileTime==Dynamic && MatrixType::ColsAtCompileTime==1 && ProductOrder==OnTheRight)
903
+ AsScalarProduct =
904
+ (DiagonalType::SizeAtCompileTime == 1) ||
905
+ (DiagonalType::SizeAtCompileTime == Dynamic && MatrixType::RowsAtCompileTime == 1 &&
906
+ ProductOrder == OnTheLeft) ||
907
+ (DiagonalType::SizeAtCompileTime == Dynamic && MatrixType::ColsAtCompileTime == 1 && ProductOrder == OnTheRight)
819
908
  };
820
-
821
- diagonal_product_evaluator_base(const MatrixType &mat, const DiagonalType &diag)
822
- : m_diagImpl(diag), m_matImpl(mat)
823
- {
909
+
910
+ EIGEN_DEVICE_FUNC diagonal_product_evaluator_base(const MatrixType& mat, const DiagonalType& diag)
911
+ : m_diagImpl(diag), m_matImpl(mat) {
824
912
  EIGEN_INTERNAL_CHECK_COST_VALUE(NumTraits<Scalar>::MulCost);
825
913
  EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
826
914
  }
827
-
828
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index idx) const
829
- {
830
- if(AsScalarProduct)
915
+
916
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index idx) const {
917
+ if (AsScalarProduct)
831
918
  return m_diagImpl.coeff(0) * m_matImpl.coeff(idx);
832
919
  else
833
920
  return m_diagImpl.coeff(idx) * m_matImpl.coeff(idx);
834
921
  }
835
-
836
- protected:
837
- template<int LoadMode,typename PacketType>
838
- EIGEN_STRONG_INLINE PacketType packet_impl(Index row, Index col, Index id, internal::true_type) const
839
- {
840
- return internal::pmul(m_matImpl.template packet<LoadMode,PacketType>(row, col),
922
+
923
+ protected:
924
+ template <int LoadMode, typename PacketType>
925
+ EIGEN_STRONG_INLINE PacketType packet_impl(Index row, Index col, Index id, internal::true_type) const {
926
+ return internal::pmul(m_matImpl.template packet<LoadMode, PacketType>(row, col),
841
927
  internal::pset1<PacketType>(m_diagImpl.coeff(id)));
842
928
  }
843
-
844
- template<int LoadMode,typename PacketType>
845
- EIGEN_STRONG_INLINE PacketType packet_impl(Index row, Index col, Index id, internal::false_type) const
846
- {
929
+
930
+ template <int LoadMode, typename PacketType>
931
+ EIGEN_STRONG_INLINE PacketType packet_impl(Index row, Index col, Index id, internal::false_type) const {
847
932
  enum {
848
933
  InnerSize = (MatrixType::Flags & RowMajorBit) ? MatrixType::ColsAtCompileTime : MatrixType::RowsAtCompileTime,
849
- DiagonalPacketLoadMode = EIGEN_PLAIN_ENUM_MIN(LoadMode,((InnerSize%16) == 0) ? int(Aligned16) : int(evaluator<DiagonalType>::Alignment)) // FIXME hardcoded 16!!
934
+ DiagonalPacketLoadMode = plain_enum_min(
935
+ LoadMode,
936
+ ((InnerSize % 16) == 0) ? int(Aligned16) : int(evaluator<DiagonalType>::Alignment)) // FIXME hardcoded 16!!
850
937
  };
851
- return internal::pmul(m_matImpl.template packet<LoadMode,PacketType>(row, col),
852
- m_diagImpl.template packet<DiagonalPacketLoadMode,PacketType>(id));
938
+ return internal::pmul(m_matImpl.template packet<LoadMode, PacketType>(row, col),
939
+ m_diagImpl.template packet<DiagonalPacketLoadMode, PacketType>(id));
853
940
  }
854
-
941
+
942
+ template <int LoadMode, typename PacketType>
943
+ EIGEN_STRONG_INLINE PacketType packet_segment_impl(Index row, Index col, Index id, Index begin, Index count,
944
+ internal::true_type) const {
945
+ return internal::pmul(m_matImpl.template packetSegment<LoadMode, PacketType>(row, col, begin, count),
946
+ internal::pset1<PacketType>(m_diagImpl.coeff(id)));
947
+ }
948
+
949
+ template <int LoadMode, typename PacketType>
950
+ EIGEN_STRONG_INLINE PacketType packet_segment_impl(Index row, Index col, Index id, Index begin, Index count,
951
+ internal::false_type) const {
952
+ enum {
953
+ InnerSize = (MatrixType::Flags & RowMajorBit) ? MatrixType::ColsAtCompileTime : MatrixType::RowsAtCompileTime,
954
+ DiagonalPacketLoadMode = plain_enum_min(
955
+ LoadMode,
956
+ ((InnerSize % 16) == 0) ? int(Aligned16) : int(evaluator<DiagonalType>::Alignment)) // FIXME hardcoded 16!!
957
+ };
958
+ return internal::pmul(m_matImpl.template packetSegment<LoadMode, PacketType>(row, col, begin, count),
959
+ m_diagImpl.template packetSegment<DiagonalPacketLoadMode, PacketType>(id, begin, count));
960
+ }
961
+
855
962
  evaluator<DiagonalType> m_diagImpl;
856
- evaluator<MatrixType> m_matImpl;
963
+ evaluator<MatrixType> m_matImpl;
857
964
  };
858
965
 
859
966
  // diagonal * dense
860
- template<typename Lhs, typename Rhs, int ProductKind, int ProductTag>
967
+ template <typename Lhs, typename Rhs, int ProductKind, int ProductTag>
861
968
  struct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DiagonalShape, DenseShape>
862
- : diagonal_product_evaluator_base<Rhs, typename Lhs::DiagonalVectorType, Product<Lhs, Rhs, LazyProduct>, OnTheLeft>
863
- {
864
- typedef diagonal_product_evaluator_base<Rhs, typename Lhs::DiagonalVectorType, Product<Lhs, Rhs, LazyProduct>, OnTheLeft> Base;
969
+ : diagonal_product_evaluator_base<Rhs, typename Lhs::DiagonalVectorType, Product<Lhs, Rhs, LazyProduct>,
970
+ OnTheLeft> {
971
+ typedef diagonal_product_evaluator_base<Rhs, typename Lhs::DiagonalVectorType, Product<Lhs, Rhs, LazyProduct>,
972
+ OnTheLeft>
973
+ Base;
974
+ using Base::coeff;
865
975
  using Base::m_diagImpl;
866
976
  using Base::m_matImpl;
867
- using Base::coeff;
868
977
  typedef typename Base::Scalar Scalar;
869
-
978
+
870
979
  typedef Product<Lhs, Rhs, ProductKind> XprType;
871
980
  typedef typename XprType::PlainObject PlainObject;
872
-
873
- enum {
874
- StorageOrder = int(Rhs::Flags) & RowMajorBit ? RowMajor : ColMajor
875
- };
981
+ typedef typename Lhs::DiagonalVectorType DiagonalType;
876
982
 
877
- EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr)
878
- : Base(xpr.rhs(), xpr.lhs().diagonal())
879
- {
880
- }
881
-
882
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const
883
- {
983
+ static constexpr int StorageOrder = Base::StorageOrder_;
984
+ using IsRowMajor_t = bool_constant<StorageOrder == RowMajor>;
985
+
986
+ EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr) : Base(xpr.rhs(), xpr.lhs().diagonal()) {}
987
+
988
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const {
884
989
  return m_diagImpl.coeff(row) * m_matImpl.coeff(row, col);
885
990
  }
886
-
887
- #ifndef __CUDACC__
888
- template<int LoadMode,typename PacketType>
889
- EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const
890
- {
991
+
992
+ #ifndef EIGEN_GPUCC
993
+ template <int LoadMode, typename PacketType>
994
+ EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const {
891
995
  // FIXME: NVCC used to complain about the template keyword, but we have to check whether this is still the case.
892
996
  // See also similar calls below.
893
- return this->template packet_impl<LoadMode,PacketType>(row,col, row,
894
- typename internal::conditional<int(StorageOrder)==RowMajor, internal::true_type, internal::false_type>::type());
997
+ return this->template packet_impl<LoadMode, PacketType>(row, col, row, IsRowMajor_t());
895
998
  }
896
-
897
- template<int LoadMode,typename PacketType>
898
- EIGEN_STRONG_INLINE PacketType packet(Index idx) const
899
- {
900
- return packet<LoadMode,PacketType>(int(StorageOrder)==ColMajor?idx:0,int(StorageOrder)==ColMajor?0:idx);
999
+
1000
+ template <int LoadMode, typename PacketType>
1001
+ EIGEN_STRONG_INLINE PacketType packet(Index idx) const {
1002
+ return packet<LoadMode, PacketType>(int(StorageOrder) == ColMajor ? idx : 0,
1003
+ int(StorageOrder) == ColMajor ? 0 : idx);
1004
+ }
1005
+
1006
+ template <int LoadMode, typename PacketType>
1007
+ EIGEN_STRONG_INLINE PacketType packetSegment(Index row, Index col, Index begin, Index count) const {
1008
+ // FIXME: NVCC used to complain about the template keyword, but we have to check whether this is still the case.
1009
+ // See also similar calls below.
1010
+ return this->template packet_segment_impl<LoadMode, PacketType>(row, col, row, begin, count, IsRowMajor_t());
1011
+ }
1012
+
1013
+ template <int LoadMode, typename PacketType>
1014
+ EIGEN_STRONG_INLINE PacketType packetSegment(Index idx, Index begin, Index count) const {
1015
+ return packetSegment<LoadMode, PacketType>(StorageOrder == ColMajor ? idx : 0, StorageOrder == ColMajor ? 0 : idx,
1016
+ begin, count);
901
1017
  }
902
1018
  #endif
903
1019
  };
904
1020
 
905
1021
  // dense * diagonal
906
- template<typename Lhs, typename Rhs, int ProductKind, int ProductTag>
1022
+ template <typename Lhs, typename Rhs, int ProductKind, int ProductTag>
907
1023
  struct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DenseShape, DiagonalShape>
908
- : diagonal_product_evaluator_base<Lhs, typename Rhs::DiagonalVectorType, Product<Lhs, Rhs, LazyProduct>, OnTheRight>
909
- {
910
- typedef diagonal_product_evaluator_base<Lhs, typename Rhs::DiagonalVectorType, Product<Lhs, Rhs, LazyProduct>, OnTheRight> Base;
1024
+ : diagonal_product_evaluator_base<Lhs, typename Rhs::DiagonalVectorType, Product<Lhs, Rhs, LazyProduct>,
1025
+ OnTheRight> {
1026
+ typedef diagonal_product_evaluator_base<Lhs, typename Rhs::DiagonalVectorType, Product<Lhs, Rhs, LazyProduct>,
1027
+ OnTheRight>
1028
+ Base;
1029
+ using Base::coeff;
911
1030
  using Base::m_diagImpl;
912
1031
  using Base::m_matImpl;
913
- using Base::coeff;
914
1032
  typedef typename Base::Scalar Scalar;
915
-
1033
+
916
1034
  typedef Product<Lhs, Rhs, ProductKind> XprType;
917
1035
  typedef typename XprType::PlainObject PlainObject;
918
-
919
- enum { StorageOrder = int(Lhs::Flags) & RowMajorBit ? RowMajor : ColMajor };
920
1036
 
921
- EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr)
922
- : Base(xpr.lhs(), xpr.rhs().diagonal())
923
- {
924
- }
925
-
926
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const
927
- {
1037
+ static constexpr int StorageOrder = Base::StorageOrder_;
1038
+ using IsColMajor_t = bool_constant<StorageOrder == ColMajor>;
1039
+
1040
+ EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr) : Base(xpr.lhs(), xpr.rhs().diagonal()) {}
1041
+
1042
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const {
928
1043
  return m_matImpl.coeff(row, col) * m_diagImpl.coeff(col);
929
1044
  }
930
-
931
- #ifndef __CUDACC__
932
- template<int LoadMode,typename PacketType>
933
- EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const
934
- {
935
- return this->template packet_impl<LoadMode,PacketType>(row,col, col,
936
- typename internal::conditional<int(StorageOrder)==ColMajor, internal::true_type, internal::false_type>::type());
1045
+
1046
+ #ifndef EIGEN_GPUCC
1047
+ template <int LoadMode, typename PacketType>
1048
+ EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const {
1049
+ return this->template packet_impl<LoadMode, PacketType>(row, col, col, IsColMajor_t());
937
1050
  }
938
-
939
- template<int LoadMode,typename PacketType>
940
- EIGEN_STRONG_INLINE PacketType packet(Index idx) const
941
- {
942
- return packet<LoadMode,PacketType>(int(StorageOrder)==ColMajor?idx:0,int(StorageOrder)==ColMajor?0:idx);
1051
+
1052
+ template <int LoadMode, typename PacketType>
1053
+ EIGEN_STRONG_INLINE PacketType packet(Index idx) const {
1054
+ return packet<LoadMode, PacketType>(StorageOrder == ColMajor ? idx : 0, StorageOrder == ColMajor ? 0 : idx);
1055
+ }
1056
+
1057
+ template <int LoadMode, typename PacketType>
1058
+ EIGEN_STRONG_INLINE PacketType packetSegment(Index row, Index col, Index begin, Index count) const {
1059
+ return this->template packet_segment_impl<LoadMode, PacketType>(row, col, col, begin, count, IsColMajor_t());
1060
+ }
1061
+
1062
+ template <int LoadMode, typename PacketType>
1063
+ EIGEN_STRONG_INLINE PacketType packetSegment(Index idx, Index begin, Index count) const {
1064
+ return packetSegment<LoadMode, PacketType>(StorageOrder == ColMajor ? idx : 0, StorageOrder == ColMajor ? 0 : idx,
1065
+ begin, count);
943
1066
  }
944
1067
  #endif
945
1068
  };
946
1069
 
947
1070
  /***************************************************************************
948
- * Products with permutation matrices
949
- ***************************************************************************/
1071
+ * Products with permutation matrices
1072
+ ***************************************************************************/
950
1073
 
951
1074
  /** \internal
952
- * \class permutation_matrix_product
953
- * Internal helper class implementing the product between a permutation matrix and a matrix.
954
- * This class is specialized for DenseShape below and for SparseShape in SparseCore/SparsePermutation.h
955
- */
956
- template<typename ExpressionType, int Side, bool Transposed, typename ExpressionShape>
1075
+ * \class permutation_matrix_product
1076
+ * Internal helper class implementing the product between a permutation matrix and a matrix.
1077
+ * This class is specialized for DenseShape below and for SparseShape in SparseCore/SparsePermutation.h
1078
+ */
1079
+ template <typename ExpressionType, int Side, bool Transposed, typename ExpressionShape>
957
1080
  struct permutation_matrix_product;
958
1081
 
959
- template<typename ExpressionType, int Side, bool Transposed>
960
- struct permutation_matrix_product<ExpressionType, Side, Transposed, DenseShape>
961
- {
962
- typedef typename nested_eval<ExpressionType, 1>::type MatrixType;
963
- typedef typename remove_all<MatrixType>::type MatrixTypeCleaned;
964
-
965
- template<typename Dest, typename PermutationType>
966
- static inline void run(Dest& dst, const PermutationType& perm, const ExpressionType& xpr)
967
- {
968
- MatrixType mat(xpr);
969
- const Index n = Side==OnTheLeft ? mat.rows() : mat.cols();
970
- // FIXME we need an is_same for expression that is not sensitive to constness. For instance
971
- // is_same_xpr<Block<const Matrix>, Block<Matrix> >::value should be true.
972
- //if(is_same<MatrixTypeCleaned,Dest>::value && extract_data(dst) == extract_data(mat))
973
- if(is_same_dense(dst, mat))
974
- {
975
- // apply the permutation inplace
976
- Matrix<bool,PermutationType::RowsAtCompileTime,1,0,PermutationType::MaxRowsAtCompileTime> mask(perm.size());
977
- mask.fill(false);
978
- Index r = 0;
979
- while(r < perm.size())
980
- {
981
- // search for the next seed
982
- while(r<perm.size() && mask[r]) r++;
983
- if(r>=perm.size())
984
- break;
985
- // we got one, let's follow it until we are back to the seed
986
- Index k0 = r++;
987
- Index kPrev = k0;
988
- mask.coeffRef(k0) = true;
989
- for(Index k=perm.indices().coeff(k0); k!=k0; k=perm.indices().coeff(k))
990
- {
991
- Block<Dest, Side==OnTheLeft ? 1 : Dest::RowsAtCompileTime, Side==OnTheRight ? 1 : Dest::ColsAtCompileTime>(dst, k)
992
- .swap(Block<Dest, Side==OnTheLeft ? 1 : Dest::RowsAtCompileTime, Side==OnTheRight ? 1 : Dest::ColsAtCompileTime>
993
- (dst,((Side==OnTheLeft) ^ Transposed) ? k0 : kPrev));
994
-
995
- mask.coeffRef(k) = true;
996
- kPrev = k;
997
- }
998
- }
999
- }
1000
- else
1001
- {
1002
- for(Index i = 0; i < n; ++i)
1003
- {
1004
- Block<Dest, Side==OnTheLeft ? 1 : Dest::RowsAtCompileTime, Side==OnTheRight ? 1 : Dest::ColsAtCompileTime>
1005
- (dst, ((Side==OnTheLeft) ^ Transposed) ? perm.indices().coeff(i) : i)
1006
-
1007
- =
1082
+ template <typename ExpressionType, int Side, bool Transposed>
1083
+ struct permutation_matrix_product<ExpressionType, Side, Transposed, DenseShape> {
1084
+ typedef typename nested_eval<ExpressionType, 1>::type MatrixType;
1085
+ typedef remove_all_t<MatrixType> MatrixTypeCleaned;
1008
1086
 
1009
- Block<const MatrixTypeCleaned,Side==OnTheLeft ? 1 : MatrixTypeCleaned::RowsAtCompileTime,Side==OnTheRight ? 1 : MatrixTypeCleaned::ColsAtCompileTime>
1010
- (mat, ((Side==OnTheRight) ^ Transposed) ? perm.indices().coeff(i) : i);
1087
+ template <typename Dest, typename PermutationType>
1088
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Dest& dst, const PermutationType& perm,
1089
+ const ExpressionType& xpr) {
1090
+ MatrixType mat(xpr);
1091
+ const Index n = Side == OnTheLeft ? mat.rows() : mat.cols();
1092
+ // FIXME we need an is_same for expression that is not sensitive to constness. For instance
1093
+ // is_same_xpr<Block<const Matrix>, Block<Matrix> >::value should be true.
1094
+ // if(is_same<MatrixTypeCleaned,Dest>::value && extract_data(dst) == extract_data(mat))
1095
+ if (is_same_dense(dst, mat)) {
1096
+ // apply the permutation inplace
1097
+ Matrix<bool, PermutationType::RowsAtCompileTime, 1, 0, PermutationType::MaxRowsAtCompileTime> mask(perm.size());
1098
+ mask.fill(false);
1099
+ Index r = 0;
1100
+ while (r < perm.size()) {
1101
+ // search for the next seed
1102
+ while (r < perm.size() && mask[r]) r++;
1103
+ if (r >= perm.size()) break;
1104
+ // we got one, let's follow it until we are back to the seed
1105
+ Index k0 = r++;
1106
+ Index kPrev = k0;
1107
+ mask.coeffRef(k0) = true;
1108
+ for (Index k = perm.indices().coeff(k0); k != k0; k = perm.indices().coeff(k)) {
1109
+ Block<Dest, Side == OnTheLeft ? 1 : Dest::RowsAtCompileTime,
1110
+ Side == OnTheRight ? 1 : Dest::ColsAtCompileTime>(dst, k)
1111
+ .swap(Block < Dest, Side == OnTheLeft ? 1 : Dest::RowsAtCompileTime,
1112
+ Side == OnTheRight
1113
+ ? 1
1114
+ : Dest::ColsAtCompileTime > (dst, ((Side == OnTheLeft) ^ Transposed) ? k0 : kPrev));
1115
+
1116
+ mask.coeffRef(k) = true;
1117
+ kPrev = k;
1011
1118
  }
1012
1119
  }
1120
+ } else {
1121
+ for (Index i = 0; i < n; ++i) {
1122
+ Block<Dest, Side == OnTheLeft ? 1 : Dest::RowsAtCompileTime, Side == OnTheRight ? 1 : Dest::ColsAtCompileTime>(
1123
+ dst, ((Side == OnTheLeft) ^ Transposed) ? perm.indices().coeff(i) : i)
1124
+
1125
+ =
1126
+
1127
+ Block < const MatrixTypeCleaned,
1128
+ Side == OnTheLeft ? 1 : MatrixTypeCleaned::RowsAtCompileTime,
1129
+ Side == OnTheRight ? 1
1130
+ : MatrixTypeCleaned::ColsAtCompileTime >
1131
+ (mat, ((Side == OnTheRight) ^ Transposed) ? perm.indices().coeff(i) : i);
1132
+ }
1013
1133
  }
1134
+ }
1014
1135
  };
1015
1136
 
1016
- template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
1017
- struct generic_product_impl<Lhs, Rhs, PermutationShape, MatrixShape, ProductTag>
1018
- {
1019
- template<typename Dest>
1020
- static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
1021
- {
1137
+ template <typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
1138
+ struct generic_product_impl<Lhs, Rhs, PermutationShape, MatrixShape, ProductTag> {
1139
+ template <typename Dest>
1140
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs) {
1022
1141
  permutation_matrix_product<Rhs, OnTheLeft, false, MatrixShape>::run(dst, lhs, rhs);
1023
1142
  }
1024
1143
  };
1025
1144
 
1026
- template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
1027
- struct generic_product_impl<Lhs, Rhs, MatrixShape, PermutationShape, ProductTag>
1028
- {
1029
- template<typename Dest>
1030
- static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
1031
- {
1145
+ template <typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
1146
+ struct generic_product_impl<Lhs, Rhs, MatrixShape, PermutationShape, ProductTag> {
1147
+ template <typename Dest>
1148
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs) {
1032
1149
  permutation_matrix_product<Lhs, OnTheRight, false, MatrixShape>::run(dst, rhs, lhs);
1033
1150
  }
1034
1151
  };
1035
1152
 
1036
- template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
1037
- struct generic_product_impl<Inverse<Lhs>, Rhs, PermutationShape, MatrixShape, ProductTag>
1038
- {
1039
- template<typename Dest>
1040
- static void evalTo(Dest& dst, const Inverse<Lhs>& lhs, const Rhs& rhs)
1041
- {
1153
+ template <typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
1154
+ struct generic_product_impl<Inverse<Lhs>, Rhs, PermutationShape, MatrixShape, ProductTag> {
1155
+ template <typename Dest>
1156
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Inverse<Lhs>& lhs, const Rhs& rhs) {
1042
1157
  permutation_matrix_product<Rhs, OnTheLeft, true, MatrixShape>::run(dst, lhs.nestedExpression(), rhs);
1043
1158
  }
1044
1159
  };
1045
1160
 
1046
- template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
1047
- struct generic_product_impl<Lhs, Inverse<Rhs>, MatrixShape, PermutationShape, ProductTag>
1048
- {
1049
- template<typename Dest>
1050
- static void evalTo(Dest& dst, const Lhs& lhs, const Inverse<Rhs>& rhs)
1051
- {
1161
+ template <typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
1162
+ struct generic_product_impl<Lhs, Inverse<Rhs>, MatrixShape, PermutationShape, ProductTag> {
1163
+ template <typename Dest>
1164
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Lhs& lhs, const Inverse<Rhs>& rhs) {
1052
1165
  permutation_matrix_product<Lhs, OnTheRight, true, MatrixShape>::run(dst, rhs.nestedExpression(), lhs);
1053
1166
  }
1054
1167
  };
1055
1168
 
1056
-
1057
1169
  /***************************************************************************
1058
- * Products with transpositions matrices
1059
- ***************************************************************************/
1170
+ * Products with transpositions matrices
1171
+ ***************************************************************************/
1060
1172
 
1061
1173
  // FIXME could we unify Transpositions and Permutation into a single "shape"??
1062
1174
 
1063
1175
  /** \internal
1064
- * \class transposition_matrix_product
1065
- * Internal helper class implementing the product between a permutation matrix and a matrix.
1066
- */
1067
- template<typename ExpressionType, int Side, bool Transposed, typename ExpressionShape>
1068
- struct transposition_matrix_product
1069
- {
1176
+ * \class transposition_matrix_product
1177
+ * Internal helper class implementing the product between a permutation matrix and a matrix.
1178
+ */
1179
+ template <typename ExpressionType, int Side, bool Transposed, typename ExpressionShape>
1180
+ struct transposition_matrix_product {
1070
1181
  typedef typename nested_eval<ExpressionType, 1>::type MatrixType;
1071
- typedef typename remove_all<MatrixType>::type MatrixTypeCleaned;
1072
-
1073
- template<typename Dest, typename TranspositionType>
1074
- static inline void run(Dest& dst, const TranspositionType& tr, const ExpressionType& xpr)
1075
- {
1182
+ typedef remove_all_t<MatrixType> MatrixTypeCleaned;
1183
+
1184
+ template <typename Dest, typename TranspositionType>
1185
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Dest& dst, const TranspositionType& tr,
1186
+ const ExpressionType& xpr) {
1076
1187
  MatrixType mat(xpr);
1077
1188
  typedef typename TranspositionType::StorageIndex StorageIndex;
1078
1189
  const Index size = tr.size();
1079
1190
  StorageIndex j = 0;
1080
1191
 
1081
- if(!is_same_dense(dst,mat))
1082
- dst = mat;
1192
+ if (!is_same_dense(dst, mat)) dst = mat;
1083
1193
 
1084
- for(Index k=(Transposed?size-1:0) ; Transposed?k>=0:k<size ; Transposed?--k:++k)
1085
- if(Index(j=tr.coeff(k))!=k)
1086
- {
1087
- if(Side==OnTheLeft) dst.row(k).swap(dst.row(j));
1088
- else if(Side==OnTheRight) dst.col(k).swap(dst.col(j));
1194
+ for (Index k = (Transposed ? size - 1 : 0); Transposed ? k >= 0 : k < size; Transposed ? --k : ++k)
1195
+ if (Index(j = tr.coeff(k)) != k) {
1196
+ if (Side == OnTheLeft)
1197
+ dst.row(k).swap(dst.row(j));
1198
+ else if (Side == OnTheRight)
1199
+ dst.col(k).swap(dst.col(j));
1089
1200
  }
1090
1201
  }
1091
1202
  };
1092
1203
 
1093
- template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
1094
- struct generic_product_impl<Lhs, Rhs, TranspositionsShape, MatrixShape, ProductTag>
1095
- {
1096
- template<typename Dest>
1097
- static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
1098
- {
1204
+ template <typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
1205
+ struct generic_product_impl<Lhs, Rhs, TranspositionsShape, MatrixShape, ProductTag> {
1206
+ template <typename Dest>
1207
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs) {
1099
1208
  transposition_matrix_product<Rhs, OnTheLeft, false, MatrixShape>::run(dst, lhs, rhs);
1100
1209
  }
1101
1210
  };
1102
1211
 
1103
- template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
1104
- struct generic_product_impl<Lhs, Rhs, MatrixShape, TranspositionsShape, ProductTag>
1105
- {
1106
- template<typename Dest>
1107
- static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
1108
- {
1212
+ template <typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
1213
+ struct generic_product_impl<Lhs, Rhs, MatrixShape, TranspositionsShape, ProductTag> {
1214
+ template <typename Dest>
1215
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs) {
1109
1216
  transposition_matrix_product<Lhs, OnTheRight, false, MatrixShape>::run(dst, rhs, lhs);
1110
1217
  }
1111
1218
  };
1112
1219
 
1113
-
1114
- template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
1115
- struct generic_product_impl<Transpose<Lhs>, Rhs, TranspositionsShape, MatrixShape, ProductTag>
1116
- {
1117
- template<typename Dest>
1118
- static void evalTo(Dest& dst, const Transpose<Lhs>& lhs, const Rhs& rhs)
1119
- {
1220
+ template <typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
1221
+ struct generic_product_impl<Transpose<Lhs>, Rhs, TranspositionsShape, MatrixShape, ProductTag> {
1222
+ template <typename Dest>
1223
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Transpose<Lhs>& lhs, const Rhs& rhs) {
1120
1224
  transposition_matrix_product<Rhs, OnTheLeft, true, MatrixShape>::run(dst, lhs.nestedExpression(), rhs);
1121
1225
  }
1122
1226
  };
1123
1227
 
1124
- template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
1125
- struct generic_product_impl<Lhs, Transpose<Rhs>, MatrixShape, TranspositionsShape, ProductTag>
1126
- {
1127
- template<typename Dest>
1128
- static void evalTo(Dest& dst, const Lhs& lhs, const Transpose<Rhs>& rhs)
1129
- {
1228
+ template <typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
1229
+ struct generic_product_impl<Lhs, Transpose<Rhs>, MatrixShape, TranspositionsShape, ProductTag> {
1230
+ template <typename Dest>
1231
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Lhs& lhs, const Transpose<Rhs>& rhs) {
1130
1232
  transposition_matrix_product<Lhs, OnTheRight, true, MatrixShape>::run(dst, rhs.nestedExpression(), lhs);
1131
1233
  }
1132
1234
  };
1133
1235
 
1134
- } // end namespace internal
1236
+ /***************************************************************************
1237
+ * skew symmetric products
1238
+ * for now we just call the generic implementation
1239
+ ***************************************************************************/
1240
+ template <typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
1241
+ struct generic_product_impl<Lhs, Rhs, SkewSymmetricShape, MatrixShape, ProductTag> {
1242
+ template <typename Dest>
1243
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs) {
1244
+ generic_product_impl<typename Lhs::DenseMatrixType, Rhs, DenseShape, MatrixShape, ProductTag>::evalTo(dst, lhs,
1245
+ rhs);
1246
+ }
1247
+ };
1248
+
1249
+ template <typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
1250
+ struct generic_product_impl<Lhs, Rhs, MatrixShape, SkewSymmetricShape, ProductTag> {
1251
+ template <typename Dest>
1252
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs) {
1253
+ generic_product_impl<Lhs, typename Rhs::DenseMatrixType, MatrixShape, DenseShape, ProductTag>::evalTo(dst, lhs,
1254
+ rhs);
1255
+ }
1256
+ };
1257
+
1258
+ template <typename Lhs, typename Rhs, int ProductTag>
1259
+ struct generic_product_impl<Lhs, Rhs, SkewSymmetricShape, SkewSymmetricShape, ProductTag> {
1260
+ template <typename Dest>
1261
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs) {
1262
+ generic_product_impl<typename Lhs::DenseMatrixType, typename Rhs::DenseMatrixType, DenseShape, DenseShape,
1263
+ ProductTag>::evalTo(dst, lhs, rhs);
1264
+ }
1265
+ };
1266
+
1267
+ template <typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
1268
+ struct generic_product_impl<Lhs, Rhs, MatrixShape, HomogeneousShape, ProductTag>
1269
+ : generic_product_impl<Lhs, typename Rhs::PlainObject, MatrixShape, DenseShape, ProductTag> {};
1270
+
1271
+ template <typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
1272
+ struct generic_product_impl<Lhs, Rhs, HomogeneousShape, MatrixShape, ProductTag>
1273
+ : generic_product_impl<typename Lhs::PlainObject, Rhs, DenseShape, MatrixShape, ProductTag> {};
1274
+
1275
+ template <typename Lhs, typename Rhs, int ProductTag>
1276
+ struct generic_product_impl<Lhs, Rhs, PermutationShape, HomogeneousShape, ProductTag>
1277
+ : generic_product_impl<Lhs, Rhs, PermutationShape, DenseShape, ProductTag> {};
1278
+
1279
+ template <typename Lhs, typename Rhs, int ProductTag>
1280
+ struct generic_product_impl<Lhs, Rhs, HomogeneousShape, PermutationShape, ProductTag>
1281
+ : generic_product_impl<Lhs, Rhs, DenseShape, PermutationShape, ProductTag> {};
1282
+
1283
+ } // end namespace internal
1135
1284
 
1136
- } // end namespace Eigen
1285
+ } // end namespace Eigen
1137
1286
 
1138
- #endif // EIGEN_PRODUCT_EVALUATORS_H
1287
+ #endif // EIGEN_PRODUCT_EVALUATORS_H