@smake/eigen 1.0.2 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (287) hide show
  1. package/README.md +1 -1
  2. package/eigen/COPYING.APACHE +203 -0
  3. package/eigen/COPYING.BSD +26 -0
  4. package/eigen/COPYING.GPL +674 -0
  5. package/eigen/COPYING.LGPL +502 -0
  6. package/eigen/COPYING.MINPACK +51 -0
  7. package/eigen/COPYING.MPL2 +373 -0
  8. package/eigen/COPYING.README +18 -0
  9. package/eigen/Eigen/Cholesky +0 -1
  10. package/eigen/Eigen/Core +108 -266
  11. package/eigen/Eigen/Eigenvalues +0 -1
  12. package/eigen/Eigen/Geometry +3 -6
  13. package/eigen/Eigen/Householder +0 -1
  14. package/eigen/Eigen/Jacobi +0 -1
  15. package/eigen/Eigen/KLUSupport +41 -0
  16. package/eigen/Eigen/LU +2 -5
  17. package/eigen/Eigen/OrderingMethods +0 -3
  18. package/eigen/Eigen/PaStiXSupport +1 -0
  19. package/eigen/Eigen/PardisoSupport +0 -0
  20. package/eigen/Eigen/QR +0 -1
  21. package/eigen/Eigen/QtAlignedMalloc +0 -1
  22. package/eigen/Eigen/SVD +0 -1
  23. package/eigen/Eigen/Sparse +0 -2
  24. package/eigen/Eigen/SparseCholesky +0 -8
  25. package/eigen/Eigen/SparseLU +4 -0
  26. package/eigen/Eigen/src/Cholesky/LDLT.h +42 -27
  27. package/eigen/Eigen/src/Cholesky/LLT.h +39 -23
  28. package/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +90 -47
  29. package/eigen/Eigen/src/Core/ArithmeticSequence.h +413 -0
  30. package/eigen/Eigen/src/Core/Array.h +99 -11
  31. package/eigen/Eigen/src/Core/ArrayBase.h +1 -1
  32. package/eigen/Eigen/src/Core/ArrayWrapper.h +21 -21
  33. package/eigen/Eigen/src/Core/Assign.h +1 -1
  34. package/eigen/Eigen/src/Core/AssignEvaluator.h +125 -50
  35. package/eigen/Eigen/src/Core/Assign_MKL.h +10 -10
  36. package/eigen/Eigen/src/Core/BandMatrix.h +16 -16
  37. package/eigen/Eigen/src/Core/Block.h +56 -60
  38. package/eigen/Eigen/src/Core/BooleanRedux.h +29 -31
  39. package/eigen/Eigen/src/Core/CommaInitializer.h +7 -3
  40. package/eigen/Eigen/src/Core/CoreEvaluators.h +325 -272
  41. package/eigen/Eigen/src/Core/CoreIterators.h +5 -0
  42. package/eigen/Eigen/src/Core/CwiseBinaryOp.h +21 -22
  43. package/eigen/Eigen/src/Core/CwiseNullaryOp.h +153 -18
  44. package/eigen/Eigen/src/Core/CwiseUnaryOp.h +6 -6
  45. package/eigen/Eigen/src/Core/CwiseUnaryView.h +12 -10
  46. package/eigen/Eigen/src/Core/DenseBase.h +128 -39
  47. package/eigen/Eigen/src/Core/DenseCoeffsBase.h +25 -21
  48. package/eigen/Eigen/src/Core/DenseStorage.h +150 -68
  49. package/eigen/Eigen/src/Core/Diagonal.h +21 -23
  50. package/eigen/Eigen/src/Core/DiagonalMatrix.h +50 -2
  51. package/eigen/Eigen/src/Core/DiagonalProduct.h +1 -1
  52. package/eigen/Eigen/src/Core/Dot.h +10 -10
  53. package/eigen/Eigen/src/Core/EigenBase.h +10 -9
  54. package/eigen/Eigen/src/Core/ForceAlignedAccess.h +8 -4
  55. package/eigen/Eigen/src/Core/Fuzzy.h +3 -3
  56. package/eigen/Eigen/src/Core/GeneralProduct.h +20 -10
  57. package/eigen/Eigen/src/Core/GenericPacketMath.h +597 -147
  58. package/eigen/Eigen/src/Core/GlobalFunctions.h +40 -33
  59. package/eigen/Eigen/src/Core/IO.h +40 -7
  60. package/eigen/Eigen/src/Core/IndexedView.h +237 -0
  61. package/eigen/Eigen/src/Core/Inverse.h +9 -10
  62. package/eigen/Eigen/src/Core/Map.h +7 -7
  63. package/eigen/Eigen/src/Core/MapBase.h +5 -3
  64. package/eigen/Eigen/src/Core/MathFunctions.h +756 -120
  65. package/eigen/Eigen/src/Core/MathFunctionsImpl.h +118 -19
  66. package/eigen/Eigen/src/Core/Matrix.h +131 -25
  67. package/eigen/Eigen/src/Core/MatrixBase.h +19 -2
  68. package/eigen/Eigen/src/Core/NestByValue.h +25 -50
  69. package/eigen/Eigen/src/Core/NoAlias.h +4 -3
  70. package/eigen/Eigen/src/Core/NumTraits.h +107 -20
  71. package/eigen/Eigen/src/Core/PartialReduxEvaluator.h +232 -0
  72. package/eigen/Eigen/src/Core/PermutationMatrix.h +3 -3
  73. package/eigen/Eigen/src/Core/PlainObjectBase.h +145 -54
  74. package/eigen/Eigen/src/Core/Product.h +30 -25
  75. package/eigen/Eigen/src/Core/ProductEvaluators.h +183 -142
  76. package/eigen/Eigen/src/Core/Random.h +37 -1
  77. package/eigen/Eigen/src/Core/Redux.h +180 -170
  78. package/eigen/Eigen/src/Core/Ref.h +118 -21
  79. package/eigen/Eigen/src/Core/Replicate.h +8 -8
  80. package/eigen/Eigen/src/Core/Reshaped.h +454 -0
  81. package/eigen/Eigen/src/Core/ReturnByValue.h +7 -5
  82. package/eigen/Eigen/src/Core/Reverse.h +18 -12
  83. package/eigen/Eigen/src/Core/Select.h +8 -6
  84. package/eigen/Eigen/src/Core/SelfAdjointView.h +33 -20
  85. package/eigen/Eigen/src/Core/Solve.h +14 -14
  86. package/eigen/Eigen/src/Core/SolveTriangular.h +13 -13
  87. package/eigen/Eigen/src/Core/SolverBase.h +41 -3
  88. package/eigen/Eigen/src/Core/StableNorm.h +100 -70
  89. package/eigen/Eigen/src/Core/StlIterators.h +463 -0
  90. package/eigen/Eigen/src/Core/Stride.h +9 -4
  91. package/eigen/Eigen/src/Core/Swap.h +5 -4
  92. package/eigen/Eigen/src/Core/Transpose.h +86 -27
  93. package/eigen/Eigen/src/Core/Transpositions.h +26 -8
  94. package/eigen/Eigen/src/Core/TriangularMatrix.h +88 -72
  95. package/eigen/Eigen/src/Core/VectorBlock.h +5 -5
  96. package/eigen/Eigen/src/Core/VectorwiseOp.h +159 -70
  97. package/eigen/Eigen/src/Core/Visitor.h +137 -29
  98. package/eigen/Eigen/src/Core/arch/AVX/Complex.h +50 -129
  99. package/eigen/Eigen/src/Core/arch/AVX/MathFunctions.h +126 -337
  100. package/eigen/Eigen/src/Core/arch/AVX/PacketMath.h +1092 -155
  101. package/eigen/Eigen/src/Core/arch/AVX/TypeCasting.h +65 -1
  102. package/eigen/Eigen/src/Core/arch/AVX512/Complex.h +422 -0
  103. package/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h +186 -213
  104. package/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h +1250 -252
  105. package/eigen/Eigen/src/Core/arch/AVX512/TypeCasting.h +89 -0
  106. package/eigen/Eigen/src/Core/arch/AltiVec/Complex.h +152 -165
  107. package/eigen/Eigen/src/Core/arch/AltiVec/MathFunctions.h +19 -251
  108. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProduct.h +2937 -0
  109. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h +221 -0
  110. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h +629 -0
  111. package/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h +2042 -392
  112. package/eigen/Eigen/src/Core/arch/CUDA/Complex.h +235 -80
  113. package/eigen/Eigen/src/Core/arch/Default/BFloat16.h +700 -0
  114. package/eigen/Eigen/src/Core/arch/Default/ConjHelper.h +102 -14
  115. package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +1649 -0
  116. package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +110 -0
  117. package/eigen/Eigen/src/Core/arch/Default/Half.h +942 -0
  118. package/eigen/Eigen/src/Core/arch/Default/Settings.h +1 -1
  119. package/eigen/Eigen/src/Core/arch/Default/TypeCasting.h +120 -0
  120. package/eigen/Eigen/src/Core/arch/{CUDA → GPU}/MathFunctions.h +16 -4
  121. package/eigen/Eigen/src/Core/arch/GPU/PacketMath.h +1685 -0
  122. package/eigen/Eigen/src/Core/arch/GPU/TypeCasting.h +80 -0
  123. package/eigen/Eigen/src/Core/arch/HIP/hcc/math_constants.h +23 -0
  124. package/eigen/Eigen/src/Core/arch/MSA/Complex.h +648 -0
  125. package/eigen/Eigen/src/Core/arch/MSA/MathFunctions.h +387 -0
  126. package/eigen/Eigen/src/Core/arch/MSA/PacketMath.h +1233 -0
  127. package/eigen/Eigen/src/Core/arch/NEON/Complex.h +313 -219
  128. package/eigen/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h +183 -0
  129. package/eigen/Eigen/src/Core/arch/NEON/MathFunctions.h +54 -70
  130. package/eigen/Eigen/src/Core/arch/NEON/PacketMath.h +4376 -549
  131. package/eigen/Eigen/src/Core/arch/NEON/TypeCasting.h +1419 -0
  132. package/eigen/Eigen/src/Core/arch/SSE/Complex.h +59 -179
  133. package/eigen/Eigen/src/Core/arch/SSE/MathFunctions.h +65 -428
  134. package/eigen/Eigen/src/Core/arch/SSE/PacketMath.h +893 -283
  135. package/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h +65 -0
  136. package/eigen/Eigen/src/Core/arch/SVE/MathFunctions.h +44 -0
  137. package/eigen/Eigen/src/Core/arch/SVE/PacketMath.h +752 -0
  138. package/eigen/Eigen/src/Core/arch/SVE/TypeCasting.h +49 -0
  139. package/eigen/Eigen/src/Core/arch/SYCL/InteropHeaders.h +232 -0
  140. package/eigen/Eigen/src/Core/arch/SYCL/MathFunctions.h +301 -0
  141. package/eigen/Eigen/src/Core/arch/SYCL/PacketMath.h +670 -0
  142. package/eigen/Eigen/src/Core/arch/SYCL/SyclMemoryModel.h +694 -0
  143. package/eigen/Eigen/src/Core/arch/SYCL/TypeCasting.h +85 -0
  144. package/eigen/Eigen/src/Core/arch/ZVector/Complex.h +212 -183
  145. package/eigen/Eigen/src/Core/arch/ZVector/MathFunctions.h +101 -5
  146. package/eigen/Eigen/src/Core/arch/ZVector/PacketMath.h +510 -395
  147. package/eigen/Eigen/src/Core/functors/AssignmentFunctors.h +11 -2
  148. package/eigen/Eigen/src/Core/functors/BinaryFunctors.h +112 -46
  149. package/eigen/Eigen/src/Core/functors/NullaryFunctors.h +31 -30
  150. package/eigen/Eigen/src/Core/functors/StlFunctors.h +32 -2
  151. package/eigen/Eigen/src/Core/functors/UnaryFunctors.h +354 -15
  152. package/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h +1073 -585
  153. package/eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h +29 -7
  154. package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +4 -4
  155. package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +1 -1
  156. package/eigen/Eigen/src/Core/products/GeneralMatrixVector.h +382 -483
  157. package/eigen/Eigen/src/Core/products/Parallelizer.h +23 -9
  158. package/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +23 -6
  159. package/eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h +8 -6
  160. package/eigen/Eigen/src/Core/products/SelfadjointProduct.h +2 -2
  161. package/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h +5 -4
  162. package/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h +3 -3
  163. package/eigen/Eigen/src/Core/products/TriangularSolverMatrix.h +5 -3
  164. package/eigen/Eigen/src/Core/products/TriangularSolverVector.h +13 -10
  165. package/eigen/Eigen/src/Core/util/BlasUtil.h +208 -124
  166. package/eigen/Eigen/src/Core/util/ConfigureVectorization.h +512 -0
  167. package/eigen/Eigen/src/Core/util/Constants.h +25 -9
  168. package/eigen/Eigen/src/Core/util/DisableStupidWarnings.h +14 -2
  169. package/eigen/Eigen/src/Core/util/ForwardDeclarations.h +28 -4
  170. package/eigen/Eigen/src/Core/util/IndexedViewHelper.h +186 -0
  171. package/eigen/Eigen/src/Core/util/IntegralConstant.h +272 -0
  172. package/eigen/Eigen/src/Core/util/MKL_support.h +8 -1
  173. package/eigen/Eigen/src/Core/util/Macros.h +661 -250
  174. package/eigen/Eigen/src/Core/util/Memory.h +222 -52
  175. package/eigen/Eigen/src/Core/util/Meta.h +349 -105
  176. package/eigen/Eigen/src/Core/util/ReshapedHelper.h +51 -0
  177. package/eigen/Eigen/src/Core/util/StaticAssert.h +8 -5
  178. package/eigen/Eigen/src/Core/util/SymbolicIndex.h +293 -0
  179. package/eigen/Eigen/src/Core/util/XprHelper.h +48 -30
  180. package/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +1 -1
  181. package/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +1 -1
  182. package/eigen/Eigen/src/Eigenvalues/EigenSolver.h +2 -2
  183. package/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +1 -1
  184. package/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +2 -2
  185. package/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +2 -2
  186. package/eigen/Eigen/src/Eigenvalues/RealQZ.h +9 -6
  187. package/eigen/Eigen/src/Eigenvalues/RealSchur.h +10 -5
  188. package/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +75 -42
  189. package/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +20 -15
  190. package/eigen/Eigen/src/Geometry/AlignedBox.h +99 -5
  191. package/eigen/Eigen/src/Geometry/AngleAxis.h +4 -4
  192. package/eigen/Eigen/src/Geometry/EulerAngles.h +3 -3
  193. package/eigen/Eigen/src/Geometry/Homogeneous.h +15 -11
  194. package/eigen/Eigen/src/Geometry/Hyperplane.h +1 -1
  195. package/eigen/Eigen/src/Geometry/OrthoMethods.h +3 -2
  196. package/eigen/Eigen/src/Geometry/ParametrizedLine.h +39 -2
  197. package/eigen/Eigen/src/Geometry/Quaternion.h +52 -14
  198. package/eigen/Eigen/src/Geometry/Rotation2D.h +3 -3
  199. package/eigen/Eigen/src/Geometry/Scaling.h +22 -4
  200. package/eigen/Eigen/src/Geometry/Transform.h +86 -65
  201. package/eigen/Eigen/src/Geometry/Translation.h +6 -6
  202. package/eigen/Eigen/src/Geometry/arch/Geometry_SIMD.h +168 -0
  203. package/eigen/Eigen/src/Householder/BlockHouseholder.h +9 -2
  204. package/eigen/Eigen/src/Householder/Householder.h +8 -4
  205. package/eigen/Eigen/src/Householder/HouseholderSequence.h +123 -48
  206. package/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +15 -15
  207. package/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +7 -23
  208. package/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +5 -22
  209. package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +41 -47
  210. package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +51 -60
  211. package/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +70 -20
  212. package/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +2 -20
  213. package/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +11 -9
  214. package/eigen/Eigen/src/Jacobi/Jacobi.h +31 -10
  215. package/eigen/Eigen/src/KLUSupport/KLUSupport.h +358 -0
  216. package/eigen/Eigen/src/LU/Determinant.h +35 -19
  217. package/eigen/Eigen/src/LU/FullPivLU.h +29 -43
  218. package/eigen/Eigen/src/LU/InverseImpl.h +25 -8
  219. package/eigen/Eigen/src/LU/PartialPivLU.h +67 -57
  220. package/eigen/Eigen/src/LU/arch/InverseSize4.h +351 -0
  221. package/eigen/Eigen/src/OrderingMethods/Amd.h +7 -17
  222. package/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +297 -277
  223. package/eigen/Eigen/src/OrderingMethods/Ordering.h +6 -10
  224. package/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +1 -1
  225. package/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +10 -9
  226. package/eigen/Eigen/src/QR/ColPivHouseholderQR.h +41 -20
  227. package/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +100 -27
  228. package/eigen/Eigen/src/QR/FullPivHouseholderQR.h +59 -22
  229. package/eigen/Eigen/src/QR/HouseholderQR.h +48 -23
  230. package/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +25 -3
  231. package/eigen/Eigen/src/SVD/BDCSVD.h +137 -48
  232. package/eigen/Eigen/src/SVD/JacobiSVD.h +22 -14
  233. package/eigen/Eigen/src/SVD/SVDBase.h +82 -21
  234. package/eigen/Eigen/src/SVD/UpperBidiagonalization.h +3 -3
  235. package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +16 -8
  236. package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +11 -36
  237. package/eigen/Eigen/src/SparseCore/CompressedStorage.h +16 -0
  238. package/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +6 -6
  239. package/eigen/Eigen/src/SparseCore/SparseAssign.h +81 -27
  240. package/eigen/Eigen/src/SparseCore/SparseBlock.h +25 -57
  241. package/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +40 -11
  242. package/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +11 -15
  243. package/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +2 -2
  244. package/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +30 -8
  245. package/eigen/Eigen/src/SparseCore/SparseMatrix.h +124 -10
  246. package/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +5 -12
  247. package/eigen/Eigen/src/SparseCore/SparseProduct.h +13 -1
  248. package/eigen/Eigen/src/SparseCore/SparseRef.h +7 -7
  249. package/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +3 -0
  250. package/eigen/Eigen/src/SparseCore/SparseUtil.h +8 -0
  251. package/eigen/Eigen/src/SparseCore/SparseVector.h +1 -1
  252. package/eigen/Eigen/src/SparseLU/SparseLU.h +160 -10
  253. package/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +1 -1
  254. package/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +76 -2
  255. package/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +2 -2
  256. package/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +1 -1
  257. package/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +1 -1
  258. package/eigen/Eigen/src/SparseQR/SparseQR.h +19 -6
  259. package/eigen/Eigen/src/StlSupport/StdDeque.h +2 -14
  260. package/eigen/Eigen/src/StlSupport/StdList.h +2 -2
  261. package/eigen/Eigen/src/StlSupport/StdVector.h +2 -2
  262. package/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +6 -8
  263. package/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +175 -39
  264. package/eigen/Eigen/src/misc/lapacke.h +5 -4
  265. package/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +27 -1
  266. package/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +155 -11
  267. package/eigen/Eigen/src/plugins/BlockMethods.h +626 -242
  268. package/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.h +14 -0
  269. package/eigen/Eigen/src/plugins/IndexedViewMethods.h +262 -0
  270. package/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +4 -4
  271. package/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +10 -0
  272. package/eigen/Eigen/src/plugins/ReshapedMethods.h +149 -0
  273. package/eigen/README.md +5 -0
  274. package/lib/LibEigen.d.ts +4 -0
  275. package/lib/LibEigen.js +14 -0
  276. package/lib/index.d.ts +1 -1
  277. package/lib/index.js +7 -3
  278. package/package.json +2 -10
  279. package/eigen/Eigen/CMakeLists.txt +0 -19
  280. package/eigen/Eigen/src/Core/arch/CUDA/Half.h +0 -675
  281. package/eigen/Eigen/src/Core/arch/CUDA/PacketMath.h +0 -333
  282. package/eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +0 -1124
  283. package/eigen/Eigen/src/Core/arch/CUDA/TypeCasting.h +0 -212
  284. package/eigen/Eigen/src/Geometry/arch/Geometry_SSE.h +0 -161
  285. package/eigen/Eigen/src/LU/arch/Inverse_SSE.h +0 -338
  286. package/lib/eigen.d.ts +0 -2
  287. package/lib/eigen.js +0 -15
@@ -14,27 +14,27 @@
14
14
  #define EIGEN_PRODUCTEVALUATORS_H
15
15
 
16
16
  namespace Eigen {
17
-
17
+
18
18
  namespace internal {
19
19
 
20
20
  /** \internal
21
21
  * Evaluator of a product expression.
22
22
  * Since products require special treatments to handle all possible cases,
23
- * we simply deffer the evaluation logic to a product_evaluator class
23
+ * we simply defer the evaluation logic to a product_evaluator class
24
24
  * which offers more partial specialization possibilities.
25
- *
25
+ *
26
26
  * \sa class product_evaluator
27
27
  */
28
28
  template<typename Lhs, typename Rhs, int Options>
29
- struct evaluator<Product<Lhs, Rhs, Options> >
29
+ struct evaluator<Product<Lhs, Rhs, Options> >
30
30
  : public product_evaluator<Product<Lhs, Rhs, Options> >
31
31
  {
32
32
  typedef Product<Lhs, Rhs, Options> XprType;
33
33
  typedef product_evaluator<XprType> Base;
34
-
34
+
35
35
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const XprType& xpr) : Base(xpr) {}
36
36
  };
37
-
37
+
38
38
  // Catch "scalar * ( A * B )" and transform it to "(A*scalar) * B"
39
39
  // TODO we should apply that rule only if that's really helpful
40
40
  template<typename Lhs, typename Rhs, typename Scalar1, typename Scalar2, typename Plain1>
@@ -62,12 +62,12 @@ struct evaluator<CwiseBinaryOp<internal::scalar_product_op<Scalar1,Scalar2>,
62
62
 
63
63
 
64
64
  template<typename Lhs, typename Rhs, int DiagIndex>
65
- struct evaluator<Diagonal<const Product<Lhs, Rhs, DefaultProduct>, DiagIndex> >
65
+ struct evaluator<Diagonal<const Product<Lhs, Rhs, DefaultProduct>, DiagIndex> >
66
66
  : public evaluator<Diagonal<const Product<Lhs, Rhs, LazyProduct>, DiagIndex> >
67
67
  {
68
68
  typedef Diagonal<const Product<Lhs, Rhs, DefaultProduct>, DiagIndex> XprType;
69
69
  typedef evaluator<Diagonal<const Product<Lhs, Rhs, LazyProduct>, DiagIndex> > Base;
70
-
70
+
71
71
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const XprType& xpr)
72
72
  : Base(Diagonal<const Product<Lhs, Rhs, LazyProduct>, DiagIndex>(
73
73
  Product<Lhs, Rhs, LazyProduct>(xpr.nestedExpression().lhs(), xpr.nestedExpression().rhs()),
@@ -108,27 +108,27 @@ struct product_evaluator<Product<Lhs, Rhs, Options>, ProductTag, LhsShape, RhsSh
108
108
  : m_result(xpr.rows(), xpr.cols())
109
109
  {
110
110
  ::new (static_cast<Base*>(this)) Base(m_result);
111
-
111
+
112
112
  // FIXME shall we handle nested_eval here?,
113
113
  // if so, then we must take care at removing the call to nested_eval in the specializations (e.g., in permutation_matrix_product, transposition_matrix_product, etc.)
114
114
  // typedef typename internal::nested_eval<Lhs,Rhs::ColsAtCompileTime>::type LhsNested;
115
115
  // typedef typename internal::nested_eval<Rhs,Lhs::RowsAtCompileTime>::type RhsNested;
116
116
  // typedef typename internal::remove_all<LhsNested>::type LhsNestedCleaned;
117
117
  // typedef typename internal::remove_all<RhsNested>::type RhsNestedCleaned;
118
- //
118
+ //
119
119
  // const LhsNested lhs(xpr.lhs());
120
120
  // const RhsNested rhs(xpr.rhs());
121
- //
121
+ //
122
122
  // generic_product_impl<LhsNestedCleaned, RhsNestedCleaned>::evalTo(m_result, lhs, rhs);
123
123
 
124
124
  generic_product_impl<Lhs, Rhs, LhsShape, RhsShape, ProductTag>::evalTo(m_result, xpr.lhs(), xpr.rhs());
125
125
  }
126
-
127
- protected:
126
+
127
+ protected:
128
128
  PlainObject m_result;
129
129
  };
130
130
 
131
- // The following three shortcuts are enabled only if the scalar types match excatly.
131
+ // The following three shortcuts are enabled only if the scalar types match exactly.
132
132
  // TODO: we could enable them for different scalar types when the product is not vectorized.
133
133
 
134
134
  // Dense = Product
@@ -137,7 +137,7 @@ struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::assign_op<Scal
137
137
  typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct)>::type>
138
138
  {
139
139
  typedef Product<Lhs,Rhs,Options> SrcXprType;
140
- static EIGEN_STRONG_INLINE
140
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
141
141
  void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,Scalar> &)
142
142
  {
143
143
  Index dstRows = src.rows();
@@ -155,7 +155,7 @@ struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::add_assign_op<
155
155
  typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct)>::type>
156
156
  {
157
157
  typedef Product<Lhs,Rhs,Options> SrcXprType;
158
- static EIGEN_STRONG_INLINE
158
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
159
159
  void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<Scalar,Scalar> &)
160
160
  {
161
161
  eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
@@ -170,7 +170,7 @@ struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::sub_assign_op<
170
170
  typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct)>::type>
171
171
  {
172
172
  typedef Product<Lhs,Rhs,Options> SrcXprType;
173
- static EIGEN_STRONG_INLINE
173
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
174
174
  void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<Scalar,Scalar> &)
175
175
  {
176
176
  eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
@@ -190,7 +190,7 @@ struct Assignment<DstXprType, CwiseBinaryOp<internal::scalar_product_op<ScalarBi
190
190
  typedef CwiseBinaryOp<internal::scalar_product_op<ScalarBis,Scalar>,
191
191
  const CwiseNullaryOp<internal::scalar_constant_op<ScalarBis>,Plain>,
192
192
  const Product<Lhs,Rhs,DefaultProduct> > SrcXprType;
193
- static EIGEN_STRONG_INLINE
193
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
194
194
  void run(DstXprType &dst, const SrcXprType &src, const AssignFunc& func)
195
195
  {
196
196
  call_assignment_no_alias(dst, (src.lhs().functor().m_other * src.rhs().lhs())*src.rhs().rhs(), func);
@@ -217,7 +217,7 @@ template<typename DstXprType, typename OtherXpr, typename ProductType, typename
217
217
  struct assignment_from_xpr_op_product
218
218
  {
219
219
  template<typename SrcXprType, typename InitialFunc>
220
- static EIGEN_STRONG_INLINE
220
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
221
221
  void run(DstXprType &dst, const SrcXprType &src, const InitialFunc& /*func*/)
222
222
  {
223
223
  call_assignment_no_alias(dst, src.lhs(), Func1());
@@ -246,19 +246,19 @@ template<typename Lhs, typename Rhs>
246
246
  struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,InnerProduct>
247
247
  {
248
248
  template<typename Dst>
249
- static EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
249
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
250
250
  {
251
251
  dst.coeffRef(0,0) = (lhs.transpose().cwiseProduct(rhs)).sum();
252
252
  }
253
-
253
+
254
254
  template<typename Dst>
255
- static EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
255
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
256
256
  {
257
257
  dst.coeffRef(0,0) += (lhs.transpose().cwiseProduct(rhs)).sum();
258
258
  }
259
-
259
+
260
260
  template<typename Dst>
261
- static EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
261
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
262
262
  { dst.coeffRef(0,0) -= (lhs.transpose().cwiseProduct(rhs)).sum(); }
263
263
  };
264
264
 
@@ -269,10 +269,10 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,InnerProduct>
269
269
 
270
270
  // Column major result
271
271
  template<typename Dst, typename Lhs, typename Rhs, typename Func>
272
- void outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const Func& func, const false_type&)
272
+ void EIGEN_DEVICE_FUNC outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const Func& func, const false_type&)
273
273
  {
274
274
  evaluator<Rhs> rhsEval(rhs);
275
- typename nested_eval<Lhs,Rhs::SizeAtCompileTime>::type actual_lhs(lhs);
275
+ ei_declare_local_nested_eval(Lhs,lhs,Rhs::SizeAtCompileTime,actual_lhs);
276
276
  // FIXME if cols is large enough, then it might be useful to make sure that lhs is sequentially stored
277
277
  // FIXME not very good if rhs is real and lhs complex while alpha is real too
278
278
  const Index cols = dst.cols();
@@ -282,10 +282,10 @@ void outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const
282
282
 
283
283
  // Row major result
284
284
  template<typename Dst, typename Lhs, typename Rhs, typename Func>
285
- void outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const Func& func, const true_type&)
285
+ void EIGEN_DEVICE_FUNC outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const Func& func, const true_type&)
286
286
  {
287
287
  evaluator<Lhs> lhsEval(lhs);
288
- typename nested_eval<Rhs,Lhs::SizeAtCompileTime>::type actual_rhs(rhs);
288
+ ei_declare_local_nested_eval(Rhs,rhs,Lhs::SizeAtCompileTime,actual_rhs);
289
289
  // FIXME if rows is large enough, then it might be useful to make sure that rhs is sequentially stored
290
290
  // FIXME not very good if lhs is real and rhs complex while alpha is real too
291
291
  const Index rows = dst.rows();
@@ -298,43 +298,43 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,OuterProduct>
298
298
  {
299
299
  template<typename T> struct is_row_major : internal::conditional<(int(T::Flags)&RowMajorBit), internal::true_type, internal::false_type>::type {};
300
300
  typedef typename Product<Lhs,Rhs>::Scalar Scalar;
301
-
301
+
302
302
  // TODO it would be nice to be able to exploit our *_assign_op functors for that purpose
303
- struct set { template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() = src; } };
304
- struct add { template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() += src; } };
305
- struct sub { template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() -= src; } };
303
+ struct set { template<typename Dst, typename Src> EIGEN_DEVICE_FUNC void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() = src; } };
304
+ struct add { template<typename Dst, typename Src> EIGEN_DEVICE_FUNC void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() += src; } };
305
+ struct sub { template<typename Dst, typename Src> EIGEN_DEVICE_FUNC void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() -= src; } };
306
306
  struct adds {
307
307
  Scalar m_scale;
308
308
  explicit adds(const Scalar& s) : m_scale(s) {}
309
- template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const {
309
+ template<typename Dst, typename Src> void EIGEN_DEVICE_FUNC operator()(const Dst& dst, const Src& src) const {
310
310
  dst.const_cast_derived() += m_scale * src;
311
311
  }
312
312
  };
313
-
313
+
314
314
  template<typename Dst>
315
- static EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
315
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
316
316
  {
317
317
  internal::outer_product_selector_run(dst, lhs, rhs, set(), is_row_major<Dst>());
318
318
  }
319
-
319
+
320
320
  template<typename Dst>
321
- static EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
321
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
322
322
  {
323
323
  internal::outer_product_selector_run(dst, lhs, rhs, add(), is_row_major<Dst>());
324
324
  }
325
-
325
+
326
326
  template<typename Dst>
327
- static EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
327
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
328
328
  {
329
329
  internal::outer_product_selector_run(dst, lhs, rhs, sub(), is_row_major<Dst>());
330
330
  }
331
-
331
+
332
332
  template<typename Dst>
333
- static EIGEN_STRONG_INLINE void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
333
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
334
334
  {
335
335
  internal::outer_product_selector_run(dst, lhs, rhs, adds(alpha), is_row_major<Dst>());
336
336
  }
337
-
337
+
338
338
  };
339
339
 
340
340
 
@@ -343,21 +343,21 @@ template<typename Lhs, typename Rhs, typename Derived>
343
343
  struct generic_product_impl_base
344
344
  {
345
345
  typedef typename Product<Lhs,Rhs>::Scalar Scalar;
346
-
346
+
347
347
  template<typename Dst>
348
- static EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
348
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
349
349
  { dst.setZero(); scaleAndAddTo(dst, lhs, rhs, Scalar(1)); }
350
350
 
351
351
  template<typename Dst>
352
- static EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
352
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
353
353
  { scaleAndAddTo(dst,lhs, rhs, Scalar(1)); }
354
354
 
355
355
  template<typename Dst>
356
- static EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
356
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
357
357
  { scaleAndAddTo(dst, lhs, rhs, Scalar(-1)); }
358
-
358
+
359
359
  template<typename Dst>
360
- static EIGEN_STRONG_INLINE void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
360
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
361
361
  { Derived::scaleAndAddTo(dst,lhs,rhs,alpha); }
362
362
 
363
363
  };
@@ -373,8 +373,13 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemvProduct>
373
373
  typedef typename internal::remove_all<typename internal::conditional<int(Side)==OnTheRight,LhsNested,RhsNested>::type>::type MatrixType;
374
374
 
375
375
  template<typename Dest>
376
- static EIGEN_STRONG_INLINE void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
376
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
377
377
  {
378
+ // Fallback to inner product if both the lhs and rhs is a runtime vector.
379
+ if (lhs.rows() == 1 && rhs.cols() == 1) {
380
+ dst.coeffRef(0,0) += alpha * lhs.row(0).conjugate().dot(rhs.col(0));
381
+ return;
382
+ }
378
383
  LhsNested actual_lhs(lhs);
379
384
  RhsNested actual_rhs(rhs);
380
385
  internal::gemv_dense_selector<Side,
@@ -385,12 +390,12 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemvProduct>
385
390
  };
386
391
 
387
392
  template<typename Lhs, typename Rhs>
388
- struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,CoeffBasedProductMode>
393
+ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,CoeffBasedProductMode>
389
394
  {
390
395
  typedef typename Product<Lhs,Rhs>::Scalar Scalar;
391
-
396
+
392
397
  template<typename Dst>
393
- static EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
398
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
394
399
  {
395
400
  // Same as: dst.noalias() = lhs.lazyProduct(rhs);
396
401
  // but easier on the compiler side
@@ -398,48 +403,71 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,CoeffBasedProductMode>
398
403
  }
399
404
 
400
405
  template<typename Dst>
401
- static EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
406
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
402
407
  {
403
408
  // dst.noalias() += lhs.lazyProduct(rhs);
404
409
  call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::add_assign_op<typename Dst::Scalar,Scalar>());
405
410
  }
406
-
411
+
407
412
  template<typename Dst>
408
- static EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
413
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
409
414
  {
410
415
  // dst.noalias() -= lhs.lazyProduct(rhs);
411
416
  call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::sub_assign_op<typename Dst::Scalar,Scalar>());
412
417
  }
413
418
 
414
- // Catch "dst {,+,-}= (s*A)*B" and evaluate it lazily by moving out the scalar factor:
415
- // dst {,+,-}= s * (A.lazyProduct(B))
416
- // This is a huge benefit for heap-allocated matrix types as it save one costly allocation.
417
- // For them, this strategy is also faster than simply by-passing the heap allocation through
418
- // stack allocation.
419
- // For fixed sizes matrices, this is less obvious, it is sometimes x2 faster, but sometimes x3 slower,
420
- // and the behavior depends also a lot on the compiler... so let's be conservative and enable them for dynamic-size only,
421
- // that is when coming from generic_product_impl<...,GemmProduct> in file GeneralMatrixMatrix.h
422
- template<typename Dst, typename Scalar1, typename Scalar2, typename Plain1, typename Xpr2, typename Func>
419
+ // This is a special evaluation path called from generic_product_impl<...,GemmProduct> in file GeneralMatrixMatrix.h
420
+ // This variant tries to extract scalar multiples from both the LHS and RHS and factor them out. For instance:
421
+ // dst {,+,-}= (s1*A)*(B*s2)
422
+ // will be rewritten as:
423
+ // dst {,+,-}= (s1*s2) * (A.lazyProduct(B))
424
+ // There are at least four benefits of doing so:
425
+ // 1 - huge performance gain for heap-allocated matrix types as it save costly allocations.
426
+ // 2 - it is faster than simply by-passing the heap allocation through stack allocation.
427
+ // 3 - it makes this fallback consistent with the heavy GEMM routine.
428
+ // 4 - it fully by-passes huge stack allocation attempts when multiplying huge fixed-size matrices.
429
+ // (see https://stackoverflow.com/questions/54738495)
430
+ // For small fixed sizes matrices, howver, the gains are less obvious, it is sometimes x2 faster, but sometimes x3 slower,
431
+ // and the behavior depends also a lot on the compiler... This is why this re-writting strategy is currently
432
+ // enabled only when falling back from the main GEMM.
433
+ template<typename Dst, typename Func>
423
434
  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
424
- void eval_dynamic(Dst& dst, const CwiseBinaryOp<internal::scalar_product_op<Scalar1,Scalar2>,
425
- const CwiseNullaryOp<internal::scalar_constant_op<Scalar1>, Plain1>, Xpr2>& lhs, const Rhs& rhs, const Func &func)
435
+ void eval_dynamic(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Func &func)
426
436
  {
427
- call_assignment_no_alias(dst, lhs.lhs().functor().m_other * lhs.rhs().lazyProduct(rhs), func);
437
+ enum {
438
+ HasScalarFactor = blas_traits<Lhs>::HasScalarFactor || blas_traits<Rhs>::HasScalarFactor,
439
+ ConjLhs = blas_traits<Lhs>::NeedToConjugate,
440
+ ConjRhs = blas_traits<Rhs>::NeedToConjugate
441
+ };
442
+ // FIXME: in c++11 this should be auto, and extractScalarFactor should also return auto
443
+ // this is important for real*complex_mat
444
+ Scalar actualAlpha = combine_scalar_factors<Scalar>(lhs, rhs);
445
+
446
+ eval_dynamic_impl(dst,
447
+ blas_traits<Lhs>::extract(lhs).template conjugateIf<ConjLhs>(),
448
+ blas_traits<Rhs>::extract(rhs).template conjugateIf<ConjRhs>(),
449
+ func,
450
+ actualAlpha,
451
+ typename conditional<HasScalarFactor,true_type,false_type>::type());
428
452
  }
429
453
 
430
- // Here, we we always have LhsT==Lhs, but we need to make it a template type to make the above
431
- // overload more specialized.
432
- template<typename Dst, typename LhsT, typename Func>
454
+ protected:
455
+
456
+ template<typename Dst, typename LhsT, typename RhsT, typename Func, typename Scalar>
457
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
458
+ void eval_dynamic_impl(Dst& dst, const LhsT& lhs, const RhsT& rhs, const Func &func, const Scalar& s /* == 1 */, false_type)
459
+ {
460
+ EIGEN_UNUSED_VARIABLE(s);
461
+ eigen_internal_assert(s==Scalar(1));
462
+ call_restricted_packet_assignment_no_alias(dst, lhs.lazyProduct(rhs), func);
463
+ }
464
+
465
+ template<typename Dst, typename LhsT, typename RhsT, typename Func, typename Scalar>
433
466
  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
434
- void eval_dynamic(Dst& dst, const LhsT& lhs, const Rhs& rhs, const Func &func)
467
+ void eval_dynamic_impl(Dst& dst, const LhsT& lhs, const RhsT& rhs, const Func &func, const Scalar& s, true_type)
435
468
  {
436
- call_assignment_no_alias(dst, lhs.lazyProduct(rhs), func);
469
+ call_restricted_packet_assignment_no_alias(dst, s * lhs.lazyProduct(rhs), func);
437
470
  }
438
-
439
-
440
- // template<typename Dst>
441
- // static inline void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
442
- // { dst.noalias() += alpha * lhs.lazyProduct(rhs); }
443
471
  };
444
472
 
445
473
  // This specialization enforces the use of a coefficient-based evaluation strategy
@@ -497,7 +525,7 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
497
525
 
498
526
  typedef typename internal::nested_eval<Lhs,Rhs::ColsAtCompileTime>::type LhsNested;
499
527
  typedef typename internal::nested_eval<Rhs,Lhs::RowsAtCompileTime>::type RhsNested;
500
-
528
+
501
529
  typedef typename internal::remove_all<LhsNested>::type LhsNestedCleaned;
502
530
  typedef typename internal::remove_all<RhsNested>::type RhsNestedCleaned;
503
531
 
@@ -516,19 +544,19 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
516
544
  typedef typename find_best_packet<Scalar,ColsAtCompileTime>::type RhsVecPacketType;
517
545
 
518
546
  enum {
519
-
547
+
520
548
  LhsCoeffReadCost = LhsEtorType::CoeffReadCost,
521
549
  RhsCoeffReadCost = RhsEtorType::CoeffReadCost,
522
550
  CoeffReadCost = InnerSize==0 ? NumTraits<Scalar>::ReadCost
523
551
  : InnerSize == Dynamic ? HugeCost
524
- : InnerSize * (NumTraits<Scalar>::MulCost + LhsCoeffReadCost + RhsCoeffReadCost)
552
+ : InnerSize * (NumTraits<Scalar>::MulCost + int(LhsCoeffReadCost) + int(RhsCoeffReadCost))
525
553
  + (InnerSize - 1) * NumTraits<Scalar>::AddCost,
526
554
 
527
555
  Unroll = CoeffReadCost <= EIGEN_UNROLLING_LIMIT,
528
-
556
+
529
557
  LhsFlags = LhsEtorType::Flags,
530
558
  RhsFlags = RhsEtorType::Flags,
531
-
559
+
532
560
  LhsRowMajor = LhsFlags & RowMajorBit,
533
561
  RhsRowMajor = RhsFlags & RowMajorBit,
534
562
 
@@ -538,7 +566,7 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
538
566
  // Here, we don't care about alignment larger than the usable packet size.
539
567
  LhsAlignment = EIGEN_PLAIN_ENUM_MIN(LhsEtorType::Alignment,LhsVecPacketSize*int(sizeof(typename LhsNestedCleaned::Scalar))),
540
568
  RhsAlignment = EIGEN_PLAIN_ENUM_MIN(RhsEtorType::Alignment,RhsVecPacketSize*int(sizeof(typename RhsNestedCleaned::Scalar))),
541
-
569
+
542
570
  SameType = is_same<typename LhsNestedCleaned::Scalar,typename RhsNestedCleaned::Scalar>::value,
543
571
 
544
572
  CanVectorizeRhs = bool(RhsRowMajor) && (RhsFlags & PacketAccessBit) && (ColsAtCompileTime!=1),
@@ -548,12 +576,12 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
548
576
  : (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0
549
577
  : (bool(RhsRowMajor) && !CanVectorizeLhs),
550
578
 
551
- Flags = ((unsigned int)(LhsFlags | RhsFlags) & HereditaryBits & ~RowMajorBit)
579
+ Flags = ((int(LhsFlags) | int(RhsFlags)) & HereditaryBits & ~RowMajorBit)
552
580
  | (EvalToRowMajor ? RowMajorBit : 0)
553
581
  // TODO enable vectorization for mixed types
554
582
  | (SameType && (CanVectorizeLhs || CanVectorizeRhs) ? PacketAccessBit : 0)
555
583
  | (XprType::IsVectorAtCompileTime ? LinearAccessBit : 0),
556
-
584
+
557
585
  LhsOuterStrideBytes = int(LhsNestedCleaned::OuterStrideAtCompileTime) * int(sizeof(typename LhsNestedCleaned::Scalar)),
558
586
  RhsOuterStrideBytes = int(RhsNestedCleaned::OuterStrideAtCompileTime) * int(sizeof(typename RhsNestedCleaned::Scalar)),
559
587
 
@@ -569,10 +597,10 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
569
597
  CanVectorizeInner = SameType
570
598
  && LhsRowMajor
571
599
  && (!RhsRowMajor)
572
- && (LhsFlags & RhsFlags & ActualPacketAccessBit)
573
- && (InnerSize % packet_traits<Scalar>::size == 0)
600
+ && (int(LhsFlags) & int(RhsFlags) & ActualPacketAccessBit)
601
+ && (int(InnerSize) % packet_traits<Scalar>::size == 0)
574
602
  };
575
-
603
+
576
604
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CoeffReturnType coeff(Index row, Index col) const
577
605
  {
578
606
  return (m_lhs.row(row).transpose().cwiseProduct( m_rhs.col(col) )).sum();
@@ -582,7 +610,8 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
582
610
  * which is why we don't set the LinearAccessBit.
583
611
  * TODO: this seems possible when the result is a vector
584
612
  */
585
- EIGEN_DEVICE_FUNC const CoeffReturnType coeff(Index index) const
613
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
614
+ const CoeffReturnType coeff(Index index) const
586
615
  {
587
616
  const Index row = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime==1) ? 0 : index;
588
617
  const Index col = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime==1) ? index : 0;
@@ -590,6 +619,7 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
590
619
  }
591
620
 
592
621
  template<int LoadMode, typename PacketType>
622
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
593
623
  const PacketType packet(Index row, Index col) const
594
624
  {
595
625
  PacketType res;
@@ -601,6 +631,7 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
601
631
  }
602
632
 
603
633
  template<int LoadMode, typename PacketType>
634
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
604
635
  const PacketType packet(Index index) const
605
636
  {
606
637
  const Index row = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime==1) ? 0 : index;
@@ -611,7 +642,7 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
611
642
  protected:
612
643
  typename internal::add_const_on_value_type<LhsNested>::type m_lhs;
613
644
  typename internal::add_const_on_value_type<RhsNested>::type m_rhs;
614
-
645
+
615
646
  LhsEtorType m_lhsImpl;
616
647
  RhsEtorType m_rhsImpl;
617
648
 
@@ -629,7 +660,8 @@ struct product_evaluator<Product<Lhs, Rhs, DefaultProduct>, LazyCoeffBasedProduc
629
660
  enum {
630
661
  Flags = Base::Flags | EvalBeforeNestingBit
631
662
  };
632
- EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr)
663
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
664
+ explicit product_evaluator(const XprType& xpr)
633
665
  : Base(BaseProduct(xpr.lhs(),xpr.rhs()))
634
666
  {}
635
667
  };
@@ -641,7 +673,7 @@ struct product_evaluator<Product<Lhs, Rhs, DefaultProduct>, LazyCoeffBasedProduc
641
673
  template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
642
674
  struct etor_product_packet_impl<RowMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
643
675
  {
644
- static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res)
676
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res)
645
677
  {
646
678
  etor_product_packet_impl<RowMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, innerDim, res);
647
679
  res = pmadd(pset1<Packet>(lhs.coeff(row, Index(UnrollingIndex-1))), rhs.template packet<LoadMode,Packet>(Index(UnrollingIndex-1), col), res);
@@ -651,7 +683,7 @@ struct etor_product_packet_impl<RowMajor, UnrollingIndex, Lhs, Rhs, Packet, Load
651
683
  template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
652
684
  struct etor_product_packet_impl<ColMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
653
685
  {
654
- static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res)
686
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res)
655
687
  {
656
688
  etor_product_packet_impl<ColMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, innerDim, res);
657
689
  res = pmadd(lhs.template packet<LoadMode,Packet>(row, Index(UnrollingIndex-1)), pset1<Packet>(rhs.coeff(Index(UnrollingIndex-1), col)), res);
@@ -661,7 +693,7 @@ struct etor_product_packet_impl<ColMajor, UnrollingIndex, Lhs, Rhs, Packet, Load
661
693
  template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
662
694
  struct etor_product_packet_impl<RowMajor, 1, Lhs, Rhs, Packet, LoadMode>
663
695
  {
664
- static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res)
696
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res)
665
697
  {
666
698
  res = pmul(pset1<Packet>(lhs.coeff(row, Index(0))),rhs.template packet<LoadMode,Packet>(Index(0), col));
667
699
  }
@@ -670,7 +702,7 @@ struct etor_product_packet_impl<RowMajor, 1, Lhs, Rhs, Packet, LoadMode>
670
702
  template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
671
703
  struct etor_product_packet_impl<ColMajor, 1, Lhs, Rhs, Packet, LoadMode>
672
704
  {
673
- static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res)
705
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res)
674
706
  {
675
707
  res = pmul(lhs.template packet<LoadMode,Packet>(row, Index(0)), pset1<Packet>(rhs.coeff(Index(0), col)));
676
708
  }
@@ -679,7 +711,7 @@ struct etor_product_packet_impl<ColMajor, 1, Lhs, Rhs, Packet, LoadMode>
679
711
  template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
680
712
  struct etor_product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode>
681
713
  {
682
- static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, Index /*innerDim*/, Packet &res)
714
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, Index /*innerDim*/, Packet &res)
683
715
  {
684
716
  res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
685
717
  }
@@ -688,7 +720,7 @@ struct etor_product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode>
688
720
  template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
689
721
  struct etor_product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode>
690
722
  {
691
- static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, Index /*innerDim*/, Packet &res)
723
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, Index /*innerDim*/, Packet &res)
692
724
  {
693
725
  res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
694
726
  }
@@ -697,7 +729,7 @@ struct etor_product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode>
697
729
  template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
698
730
  struct etor_product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
699
731
  {
700
- static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res)
732
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res)
701
733
  {
702
734
  res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
703
735
  for(Index i = 0; i < innerDim; ++i)
@@ -708,7 +740,7 @@ struct etor_product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
708
740
  template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
709
741
  struct etor_product_packet_impl<ColMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
710
742
  {
711
- static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res)
743
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res)
712
744
  {
713
745
  res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
714
746
  for(Index i = 0; i < innerDim; ++i)
@@ -730,7 +762,7 @@ struct generic_product_impl<Lhs,Rhs,TriangularShape,DenseShape,ProductTag>
730
762
  : generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,TriangularShape,DenseShape,ProductTag> >
731
763
  {
732
764
  typedef typename Product<Lhs,Rhs>::Scalar Scalar;
733
-
765
+
734
766
  template<typename Dest>
735
767
  static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
736
768
  {
@@ -744,7 +776,7 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,TriangularShape,ProductTag>
744
776
  : generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,DenseShape,TriangularShape,ProductTag> >
745
777
  {
746
778
  typedef typename Product<Lhs,Rhs>::Scalar Scalar;
747
-
779
+
748
780
  template<typename Dest>
749
781
  static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
750
782
  {
@@ -765,9 +797,10 @@ struct generic_product_impl<Lhs,Rhs,SelfAdjointShape,DenseShape,ProductTag>
765
797
  : generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,SelfAdjointShape,DenseShape,ProductTag> >
766
798
  {
767
799
  typedef typename Product<Lhs,Rhs>::Scalar Scalar;
768
-
800
+
769
801
  template<typename Dest>
770
- static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
802
+ static EIGEN_DEVICE_FUNC
803
+ void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
771
804
  {
772
805
  selfadjoint_product_impl<typename Lhs::MatrixType,Lhs::Mode,false,Rhs,0,Rhs::IsVectorAtCompileTime>::run(dst, lhs.nestedExpression(), rhs, alpha);
773
806
  }
@@ -778,7 +811,7 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,SelfAdjointShape,ProductTag>
778
811
  : generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,DenseShape,SelfAdjointShape,ProductTag> >
779
812
  {
780
813
  typedef typename Product<Lhs,Rhs>::Scalar Scalar;
781
-
814
+
782
815
  template<typename Dest>
783
816
  static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
784
817
  {
@@ -790,7 +823,7 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,SelfAdjointShape,ProductTag>
790
823
  /***************************************************************************
791
824
  * Diagonal products
792
825
  ***************************************************************************/
793
-
826
+
794
827
  template<typename MatrixType, typename DiagonalType, typename Derived, int ProductOrder>
795
828
  struct diagonal_product_evaluator_base
796
829
  : evaluator_base<Derived>
@@ -798,17 +831,25 @@ struct diagonal_product_evaluator_base
798
831
  typedef typename ScalarBinaryOpTraits<typename MatrixType::Scalar, typename DiagonalType::Scalar>::ReturnType Scalar;
799
832
  public:
800
833
  enum {
801
- CoeffReadCost = NumTraits<Scalar>::MulCost + evaluator<MatrixType>::CoeffReadCost + evaluator<DiagonalType>::CoeffReadCost,
802
-
834
+ CoeffReadCost = int(NumTraits<Scalar>::MulCost) + int(evaluator<MatrixType>::CoeffReadCost) + int(evaluator<DiagonalType>::CoeffReadCost),
835
+
803
836
  MatrixFlags = evaluator<MatrixType>::Flags,
804
837
  DiagFlags = evaluator<DiagonalType>::Flags,
805
- _StorageOrder = MatrixFlags & RowMajorBit ? RowMajor : ColMajor,
838
+
839
+ _StorageOrder = (Derived::MaxRowsAtCompileTime==1 && Derived::MaxColsAtCompileTime!=1) ? RowMajor
840
+ : (Derived::MaxColsAtCompileTime==1 && Derived::MaxRowsAtCompileTime!=1) ? ColMajor
841
+ : MatrixFlags & RowMajorBit ? RowMajor : ColMajor,
842
+ _SameStorageOrder = _StorageOrder == (MatrixFlags & RowMajorBit ? RowMajor : ColMajor),
843
+
806
844
  _ScalarAccessOnDiag = !((int(_StorageOrder) == ColMajor && int(ProductOrder) == OnTheLeft)
807
845
  ||(int(_StorageOrder) == RowMajor && int(ProductOrder) == OnTheRight)),
808
846
  _SameTypes = is_same<typename MatrixType::Scalar, typename DiagonalType::Scalar>::value,
809
847
  // FIXME currently we need same types, but in the future the next rule should be the one
810
848
  //_Vectorizable = bool(int(MatrixFlags)&PacketAccessBit) && ((!_PacketOnDiag) || (_SameTypes && bool(int(DiagFlags)&PacketAccessBit))),
811
- _Vectorizable = bool(int(MatrixFlags)&PacketAccessBit) && _SameTypes && (_ScalarAccessOnDiag || (bool(int(DiagFlags)&PacketAccessBit))),
849
+ _Vectorizable = bool(int(MatrixFlags)&PacketAccessBit)
850
+ && _SameTypes
851
+ && (_SameStorageOrder || (MatrixFlags&LinearAccessBit)==LinearAccessBit)
852
+ && (_ScalarAccessOnDiag || (bool(int(DiagFlags)&PacketAccessBit))),
812
853
  _LinearAccessMask = (MatrixType::RowsAtCompileTime==1 || MatrixType::ColsAtCompileTime==1) ? LinearAccessBit : 0,
813
854
  Flags = ((HereditaryBits|_LinearAccessMask) & (unsigned int)(MatrixFlags)) | (_Vectorizable ? PacketAccessBit : 0),
814
855
  Alignment = evaluator<MatrixType>::Alignment,
@@ -817,14 +858,14 @@ public:
817
858
  || (DiagonalType::SizeAtCompileTime==Dynamic && MatrixType::RowsAtCompileTime==1 && ProductOrder==OnTheLeft)
818
859
  || (DiagonalType::SizeAtCompileTime==Dynamic && MatrixType::ColsAtCompileTime==1 && ProductOrder==OnTheRight)
819
860
  };
820
-
821
- diagonal_product_evaluator_base(const MatrixType &mat, const DiagonalType &diag)
861
+
862
+ EIGEN_DEVICE_FUNC diagonal_product_evaluator_base(const MatrixType &mat, const DiagonalType &diag)
822
863
  : m_diagImpl(diag), m_matImpl(mat)
823
864
  {
824
865
  EIGEN_INTERNAL_CHECK_COST_VALUE(NumTraits<Scalar>::MulCost);
825
866
  EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
826
867
  }
827
-
868
+
828
869
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index idx) const
829
870
  {
830
871
  if(AsScalarProduct)
@@ -832,7 +873,7 @@ public:
832
873
  else
833
874
  return m_diagImpl.coeff(idx) * m_matImpl.coeff(idx);
834
875
  }
835
-
876
+
836
877
  protected:
837
878
  template<int LoadMode,typename PacketType>
838
879
  EIGEN_STRONG_INLINE PacketType packet_impl(Index row, Index col, Index id, internal::true_type) const
@@ -840,7 +881,7 @@ protected:
840
881
  return internal::pmul(m_matImpl.template packet<LoadMode,PacketType>(row, col),
841
882
  internal::pset1<PacketType>(m_diagImpl.coeff(id)));
842
883
  }
843
-
884
+
844
885
  template<int LoadMode,typename PacketType>
845
886
  EIGEN_STRONG_INLINE PacketType packet_impl(Index row, Index col, Index id, internal::false_type) const
846
887
  {
@@ -851,7 +892,7 @@ protected:
851
892
  return internal::pmul(m_matImpl.template packet<LoadMode,PacketType>(row, col),
852
893
  m_diagImpl.template packet<DiagonalPacketLoadMode,PacketType>(id));
853
894
  }
854
-
895
+
855
896
  evaluator<DiagonalType> m_diagImpl;
856
897
  evaluator<MatrixType> m_matImpl;
857
898
  };
@@ -866,25 +907,25 @@ struct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DiagonalSha
866
907
  using Base::m_matImpl;
867
908
  using Base::coeff;
868
909
  typedef typename Base::Scalar Scalar;
869
-
910
+
870
911
  typedef Product<Lhs, Rhs, ProductKind> XprType;
871
912
  typedef typename XprType::PlainObject PlainObject;
872
-
873
- enum {
874
- StorageOrder = int(Rhs::Flags) & RowMajorBit ? RowMajor : ColMajor
875
- };
913
+ typedef typename Lhs::DiagonalVectorType DiagonalType;
914
+
915
+
916
+ enum { StorageOrder = Base::_StorageOrder };
876
917
 
877
918
  EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr)
878
919
  : Base(xpr.rhs(), xpr.lhs().diagonal())
879
920
  {
880
921
  }
881
-
922
+
882
923
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const
883
924
  {
884
925
  return m_diagImpl.coeff(row) * m_matImpl.coeff(row, col);
885
926
  }
886
-
887
- #ifndef __CUDACC__
927
+
928
+ #ifndef EIGEN_GPUCC
888
929
  template<int LoadMode,typename PacketType>
889
930
  EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const
890
931
  {
@@ -893,7 +934,7 @@ struct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DiagonalSha
893
934
  return this->template packet_impl<LoadMode,PacketType>(row,col, row,
894
935
  typename internal::conditional<int(StorageOrder)==RowMajor, internal::true_type, internal::false_type>::type());
895
936
  }
896
-
937
+
897
938
  template<int LoadMode,typename PacketType>
898
939
  EIGEN_STRONG_INLINE PacketType packet(Index idx) const
899
940
  {
@@ -912,30 +953,30 @@ struct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DenseShape,
912
953
  using Base::m_matImpl;
913
954
  using Base::coeff;
914
955
  typedef typename Base::Scalar Scalar;
915
-
956
+
916
957
  typedef Product<Lhs, Rhs, ProductKind> XprType;
917
958
  typedef typename XprType::PlainObject PlainObject;
918
-
919
- enum { StorageOrder = int(Lhs::Flags) & RowMajorBit ? RowMajor : ColMajor };
959
+
960
+ enum { StorageOrder = Base::_StorageOrder };
920
961
 
921
962
  EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr)
922
963
  : Base(xpr.lhs(), xpr.rhs().diagonal())
923
964
  {
924
965
  }
925
-
966
+
926
967
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const
927
968
  {
928
969
  return m_matImpl.coeff(row, col) * m_diagImpl.coeff(col);
929
970
  }
930
-
931
- #ifndef __CUDACC__
971
+
972
+ #ifndef EIGEN_GPUCC
932
973
  template<int LoadMode,typename PacketType>
933
974
  EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const
934
975
  {
935
976
  return this->template packet_impl<LoadMode,PacketType>(row,col, col,
936
977
  typename internal::conditional<int(StorageOrder)==ColMajor, internal::true_type, internal::false_type>::type());
937
978
  }
938
-
979
+
939
980
  template<int LoadMode,typename PacketType>
940
981
  EIGEN_STRONG_INLINE PacketType packet(Index idx) const
941
982
  {
@@ -963,7 +1004,7 @@ struct permutation_matrix_product<ExpressionType, Side, Transposed, DenseShape>
963
1004
  typedef typename remove_all<MatrixType>::type MatrixTypeCleaned;
964
1005
 
965
1006
  template<typename Dest, typename PermutationType>
966
- static inline void run(Dest& dst, const PermutationType& perm, const ExpressionType& xpr)
1007
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Dest& dst, const PermutationType& perm, const ExpressionType& xpr)
967
1008
  {
968
1009
  MatrixType mat(xpr);
969
1010
  const Index n = Side==OnTheLeft ? mat.rows() : mat.cols();
@@ -1017,7 +1058,7 @@ template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
1017
1058
  struct generic_product_impl<Lhs, Rhs, PermutationShape, MatrixShape, ProductTag>
1018
1059
  {
1019
1060
  template<typename Dest>
1020
- static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
1061
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
1021
1062
  {
1022
1063
  permutation_matrix_product<Rhs, OnTheLeft, false, MatrixShape>::run(dst, lhs, rhs);
1023
1064
  }
@@ -1027,7 +1068,7 @@ template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
1027
1068
  struct generic_product_impl<Lhs, Rhs, MatrixShape, PermutationShape, ProductTag>
1028
1069
  {
1029
1070
  template<typename Dest>
1030
- static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
1071
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
1031
1072
  {
1032
1073
  permutation_matrix_product<Lhs, OnTheRight, false, MatrixShape>::run(dst, rhs, lhs);
1033
1074
  }
@@ -1037,7 +1078,7 @@ template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
1037
1078
  struct generic_product_impl<Inverse<Lhs>, Rhs, PermutationShape, MatrixShape, ProductTag>
1038
1079
  {
1039
1080
  template<typename Dest>
1040
- static void evalTo(Dest& dst, const Inverse<Lhs>& lhs, const Rhs& rhs)
1081
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Inverse<Lhs>& lhs, const Rhs& rhs)
1041
1082
  {
1042
1083
  permutation_matrix_product<Rhs, OnTheLeft, true, MatrixShape>::run(dst, lhs.nestedExpression(), rhs);
1043
1084
  }
@@ -1047,7 +1088,7 @@ template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
1047
1088
  struct generic_product_impl<Lhs, Inverse<Rhs>, MatrixShape, PermutationShape, ProductTag>
1048
1089
  {
1049
1090
  template<typename Dest>
1050
- static void evalTo(Dest& dst, const Lhs& lhs, const Inverse<Rhs>& rhs)
1091
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Lhs& lhs, const Inverse<Rhs>& rhs)
1051
1092
  {
1052
1093
  permutation_matrix_product<Lhs, OnTheRight, true, MatrixShape>::run(dst, rhs.nestedExpression(), lhs);
1053
1094
  }
@@ -1069,9 +1110,9 @@ struct transposition_matrix_product
1069
1110
  {
1070
1111
  typedef typename nested_eval<ExpressionType, 1>::type MatrixType;
1071
1112
  typedef typename remove_all<MatrixType>::type MatrixTypeCleaned;
1072
-
1113
+
1073
1114
  template<typename Dest, typename TranspositionType>
1074
- static inline void run(Dest& dst, const TranspositionType& tr, const ExpressionType& xpr)
1115
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Dest& dst, const TranspositionType& tr, const ExpressionType& xpr)
1075
1116
  {
1076
1117
  MatrixType mat(xpr);
1077
1118
  typedef typename TranspositionType::StorageIndex StorageIndex;
@@ -1094,7 +1135,7 @@ template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
1094
1135
  struct generic_product_impl<Lhs, Rhs, TranspositionsShape, MatrixShape, ProductTag>
1095
1136
  {
1096
1137
  template<typename Dest>
1097
- static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
1138
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
1098
1139
  {
1099
1140
  transposition_matrix_product<Rhs, OnTheLeft, false, MatrixShape>::run(dst, lhs, rhs);
1100
1141
  }
@@ -1104,7 +1145,7 @@ template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
1104
1145
  struct generic_product_impl<Lhs, Rhs, MatrixShape, TranspositionsShape, ProductTag>
1105
1146
  {
1106
1147
  template<typename Dest>
1107
- static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
1148
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
1108
1149
  {
1109
1150
  transposition_matrix_product<Lhs, OnTheRight, false, MatrixShape>::run(dst, rhs, lhs);
1110
1151
  }
@@ -1115,7 +1156,7 @@ template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
1115
1156
  struct generic_product_impl<Transpose<Lhs>, Rhs, TranspositionsShape, MatrixShape, ProductTag>
1116
1157
  {
1117
1158
  template<typename Dest>
1118
- static void evalTo(Dest& dst, const Transpose<Lhs>& lhs, const Rhs& rhs)
1159
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Transpose<Lhs>& lhs, const Rhs& rhs)
1119
1160
  {
1120
1161
  transposition_matrix_product<Rhs, OnTheLeft, true, MatrixShape>::run(dst, lhs.nestedExpression(), rhs);
1121
1162
  }
@@ -1125,7 +1166,7 @@ template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
1125
1166
  struct generic_product_impl<Lhs, Transpose<Rhs>, MatrixShape, TranspositionsShape, ProductTag>
1126
1167
  {
1127
1168
  template<typename Dest>
1128
- static void evalTo(Dest& dst, const Lhs& lhs, const Transpose<Rhs>& rhs)
1169
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Lhs& lhs, const Transpose<Rhs>& rhs)
1129
1170
  {
1130
1171
  transposition_matrix_product<Lhs, OnTheRight, true, MatrixShape>::run(dst, rhs.nestedExpression(), lhs);
1131
1172
  }