tomoto 0.2.3 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (347) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +6 -0
  3. data/README.md +8 -10
  4. data/ext/tomoto/extconf.rb +6 -2
  5. data/ext/tomoto/{ext.cpp → tomoto.cpp} +1 -1
  6. data/lib/tomoto/version.rb +1 -1
  7. data/lib/tomoto.rb +5 -1
  8. data/vendor/EigenRand/EigenRand/Core.h +10 -10
  9. data/vendor/EigenRand/EigenRand/Dists/Basic.h +208 -9
  10. data/vendor/EigenRand/EigenRand/Dists/Discrete.h +52 -31
  11. data/vendor/EigenRand/EigenRand/Dists/GammaPoisson.h +9 -8
  12. data/vendor/EigenRand/EigenRand/Dists/NormalExp.h +28 -21
  13. data/vendor/EigenRand/EigenRand/EigenRand +11 -6
  14. data/vendor/EigenRand/EigenRand/Macro.h +13 -7
  15. data/vendor/EigenRand/EigenRand/MorePacketMath.h +348 -740
  16. data/vendor/EigenRand/EigenRand/MvDists/Multinomial.h +5 -3
  17. data/vendor/EigenRand/EigenRand/MvDists/MvNormal.h +9 -3
  18. data/vendor/EigenRand/EigenRand/PacketFilter.h +11 -253
  19. data/vendor/EigenRand/EigenRand/PacketRandomEngine.h +21 -47
  20. data/vendor/EigenRand/EigenRand/RandUtils.h +50 -344
  21. data/vendor/EigenRand/EigenRand/arch/AVX/MorePacketMath.h +619 -0
  22. data/vendor/EigenRand/EigenRand/arch/AVX/PacketFilter.h +149 -0
  23. data/vendor/EigenRand/EigenRand/arch/AVX/RandUtils.h +228 -0
  24. data/vendor/EigenRand/EigenRand/arch/NEON/MorePacketMath.h +473 -0
  25. data/vendor/EigenRand/EigenRand/arch/NEON/PacketFilter.h +142 -0
  26. data/vendor/EigenRand/EigenRand/arch/NEON/RandUtils.h +126 -0
  27. data/vendor/EigenRand/EigenRand/arch/SSE/MorePacketMath.h +501 -0
  28. data/vendor/EigenRand/EigenRand/arch/SSE/PacketFilter.h +133 -0
  29. data/vendor/EigenRand/EigenRand/arch/SSE/RandUtils.h +120 -0
  30. data/vendor/EigenRand/EigenRand/doc.h +24 -12
  31. data/vendor/EigenRand/README.md +57 -4
  32. data/vendor/eigen/COPYING.APACHE +203 -0
  33. data/vendor/eigen/COPYING.BSD +1 -1
  34. data/vendor/eigen/COPYING.MINPACK +51 -52
  35. data/vendor/eigen/Eigen/Cholesky +0 -1
  36. data/vendor/eigen/Eigen/Core +112 -265
  37. data/vendor/eigen/Eigen/Eigenvalues +2 -3
  38. data/vendor/eigen/Eigen/Geometry +5 -8
  39. data/vendor/eigen/Eigen/Householder +0 -1
  40. data/vendor/eigen/Eigen/Jacobi +0 -1
  41. data/vendor/eigen/Eigen/KLUSupport +41 -0
  42. data/vendor/eigen/Eigen/LU +2 -5
  43. data/vendor/eigen/Eigen/OrderingMethods +0 -3
  44. data/vendor/eigen/Eigen/PaStiXSupport +1 -0
  45. data/vendor/eigen/Eigen/PardisoSupport +0 -0
  46. data/vendor/eigen/Eigen/QR +2 -3
  47. data/vendor/eigen/Eigen/QtAlignedMalloc +0 -1
  48. data/vendor/eigen/Eigen/SVD +0 -1
  49. data/vendor/eigen/Eigen/Sparse +0 -2
  50. data/vendor/eigen/Eigen/SparseCholesky +0 -8
  51. data/vendor/eigen/Eigen/SparseLU +4 -0
  52. data/vendor/eigen/Eigen/SparseQR +0 -1
  53. data/vendor/eigen/Eigen/src/Cholesky/LDLT.h +42 -27
  54. data/vendor/eigen/Eigen/src/Cholesky/LLT.h +39 -23
  55. data/vendor/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +90 -47
  56. data/vendor/eigen/Eigen/src/Core/ArithmeticSequence.h +413 -0
  57. data/vendor/eigen/Eigen/src/Core/Array.h +99 -11
  58. data/vendor/eigen/Eigen/src/Core/ArrayBase.h +3 -3
  59. data/vendor/eigen/Eigen/src/Core/ArrayWrapper.h +21 -21
  60. data/vendor/eigen/Eigen/src/Core/Assign.h +1 -1
  61. data/vendor/eigen/Eigen/src/Core/AssignEvaluator.h +125 -50
  62. data/vendor/eigen/Eigen/src/Core/Assign_MKL.h +10 -10
  63. data/vendor/eigen/Eigen/src/Core/BandMatrix.h +16 -16
  64. data/vendor/eigen/Eigen/src/Core/Block.h +56 -60
  65. data/vendor/eigen/Eigen/src/Core/BooleanRedux.h +29 -31
  66. data/vendor/eigen/Eigen/src/Core/CommaInitializer.h +7 -3
  67. data/vendor/eigen/Eigen/src/Core/CoreEvaluators.h +325 -272
  68. data/vendor/eigen/Eigen/src/Core/CoreIterators.h +5 -0
  69. data/vendor/eigen/Eigen/src/Core/CwiseBinaryOp.h +21 -22
  70. data/vendor/eigen/Eigen/src/Core/CwiseNullaryOp.h +153 -18
  71. data/vendor/eigen/Eigen/src/Core/CwiseUnaryOp.h +6 -6
  72. data/vendor/eigen/Eigen/src/Core/CwiseUnaryView.h +14 -10
  73. data/vendor/eigen/Eigen/src/Core/DenseBase.h +132 -42
  74. data/vendor/eigen/Eigen/src/Core/DenseCoeffsBase.h +25 -21
  75. data/vendor/eigen/Eigen/src/Core/DenseStorage.h +153 -71
  76. data/vendor/eigen/Eigen/src/Core/Diagonal.h +21 -23
  77. data/vendor/eigen/Eigen/src/Core/DiagonalMatrix.h +50 -2
  78. data/vendor/eigen/Eigen/src/Core/DiagonalProduct.h +1 -1
  79. data/vendor/eigen/Eigen/src/Core/Dot.h +10 -10
  80. data/vendor/eigen/Eigen/src/Core/EigenBase.h +10 -9
  81. data/vendor/eigen/Eigen/src/Core/ForceAlignedAccess.h +8 -4
  82. data/vendor/eigen/Eigen/src/Core/Fuzzy.h +3 -3
  83. data/vendor/eigen/Eigen/src/Core/GeneralProduct.h +20 -10
  84. data/vendor/eigen/Eigen/src/Core/GenericPacketMath.h +599 -152
  85. data/vendor/eigen/Eigen/src/Core/GlobalFunctions.h +40 -33
  86. data/vendor/eigen/Eigen/src/Core/IO.h +40 -7
  87. data/vendor/eigen/Eigen/src/Core/IndexedView.h +237 -0
  88. data/vendor/eigen/Eigen/src/Core/Inverse.h +9 -10
  89. data/vendor/eigen/Eigen/src/Core/Map.h +7 -7
  90. data/vendor/eigen/Eigen/src/Core/MapBase.h +10 -3
  91. data/vendor/eigen/Eigen/src/Core/MathFunctions.h +767 -125
  92. data/vendor/eigen/Eigen/src/Core/MathFunctionsImpl.h +118 -19
  93. data/vendor/eigen/Eigen/src/Core/Matrix.h +131 -25
  94. data/vendor/eigen/Eigen/src/Core/MatrixBase.h +21 -3
  95. data/vendor/eigen/Eigen/src/Core/NestByValue.h +25 -50
  96. data/vendor/eigen/Eigen/src/Core/NoAlias.h +4 -3
  97. data/vendor/eigen/Eigen/src/Core/NumTraits.h +107 -20
  98. data/vendor/eigen/Eigen/src/Core/PartialReduxEvaluator.h +232 -0
  99. data/vendor/eigen/Eigen/src/Core/PermutationMatrix.h +3 -31
  100. data/vendor/eigen/Eigen/src/Core/PlainObjectBase.h +152 -59
  101. data/vendor/eigen/Eigen/src/Core/Product.h +30 -25
  102. data/vendor/eigen/Eigen/src/Core/ProductEvaluators.h +192 -125
  103. data/vendor/eigen/Eigen/src/Core/Random.h +37 -1
  104. data/vendor/eigen/Eigen/src/Core/Redux.h +180 -170
  105. data/vendor/eigen/Eigen/src/Core/Ref.h +121 -23
  106. data/vendor/eigen/Eigen/src/Core/Replicate.h +8 -8
  107. data/vendor/eigen/Eigen/src/Core/Reshaped.h +454 -0
  108. data/vendor/eigen/Eigen/src/Core/ReturnByValue.h +7 -5
  109. data/vendor/eigen/Eigen/src/Core/Reverse.h +18 -12
  110. data/vendor/eigen/Eigen/src/Core/Select.h +8 -6
  111. data/vendor/eigen/Eigen/src/Core/SelfAdjointView.h +33 -20
  112. data/vendor/eigen/Eigen/src/Core/Solve.h +14 -14
  113. data/vendor/eigen/Eigen/src/Core/SolveTriangular.h +16 -16
  114. data/vendor/eigen/Eigen/src/Core/SolverBase.h +41 -3
  115. data/vendor/eigen/Eigen/src/Core/StableNorm.h +100 -70
  116. data/vendor/eigen/Eigen/src/Core/StlIterators.h +463 -0
  117. data/vendor/eigen/Eigen/src/Core/Stride.h +9 -4
  118. data/vendor/eigen/Eigen/src/Core/Swap.h +5 -4
  119. data/vendor/eigen/Eigen/src/Core/Transpose.h +88 -27
  120. data/vendor/eigen/Eigen/src/Core/Transpositions.h +26 -47
  121. data/vendor/eigen/Eigen/src/Core/TriangularMatrix.h +93 -75
  122. data/vendor/eigen/Eigen/src/Core/VectorBlock.h +5 -5
  123. data/vendor/eigen/Eigen/src/Core/VectorwiseOp.h +159 -70
  124. data/vendor/eigen/Eigen/src/Core/Visitor.h +137 -29
  125. data/vendor/eigen/Eigen/src/Core/arch/AVX/Complex.h +50 -129
  126. data/vendor/eigen/Eigen/src/Core/arch/AVX/MathFunctions.h +126 -337
  127. data/vendor/eigen/Eigen/src/Core/arch/AVX/PacketMath.h +1092 -155
  128. data/vendor/eigen/Eigen/src/Core/arch/AVX/TypeCasting.h +65 -1
  129. data/vendor/eigen/Eigen/src/Core/arch/AVX512/Complex.h +422 -0
  130. data/vendor/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h +207 -236
  131. data/vendor/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h +1482 -495
  132. data/vendor/eigen/Eigen/src/Core/arch/AVX512/TypeCasting.h +89 -0
  133. data/vendor/eigen/Eigen/src/Core/arch/AltiVec/Complex.h +152 -165
  134. data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MathFunctions.h +19 -251
  135. data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MatrixProduct.h +2937 -0
  136. data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h +221 -0
  137. data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h +629 -0
  138. data/vendor/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h +2042 -392
  139. data/vendor/eigen/Eigen/src/Core/arch/CUDA/Complex.h +235 -80
  140. data/vendor/eigen/Eigen/src/Core/arch/Default/BFloat16.h +700 -0
  141. data/vendor/eigen/Eigen/src/Core/arch/Default/ConjHelper.h +102 -14
  142. data/vendor/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +1649 -0
  143. data/vendor/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +110 -0
  144. data/vendor/eigen/Eigen/src/Core/arch/Default/Half.h +942 -0
  145. data/vendor/eigen/Eigen/src/Core/arch/Default/Settings.h +1 -1
  146. data/vendor/eigen/Eigen/src/Core/arch/Default/TypeCasting.h +120 -0
  147. data/vendor/eigen/Eigen/src/Core/arch/{CUDA → GPU}/MathFunctions.h +16 -4
  148. data/vendor/eigen/Eigen/src/Core/arch/GPU/PacketMath.h +1685 -0
  149. data/vendor/eigen/Eigen/src/Core/arch/GPU/TypeCasting.h +80 -0
  150. data/vendor/eigen/Eigen/src/Core/arch/HIP/hcc/math_constants.h +23 -0
  151. data/vendor/eigen/Eigen/src/Core/arch/MSA/Complex.h +648 -0
  152. data/vendor/eigen/Eigen/src/Core/arch/MSA/MathFunctions.h +387 -0
  153. data/vendor/eigen/Eigen/src/Core/arch/MSA/PacketMath.h +1233 -0
  154. data/vendor/eigen/Eigen/src/Core/arch/NEON/Complex.h +313 -219
  155. data/vendor/eigen/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h +183 -0
  156. data/vendor/eigen/Eigen/src/Core/arch/NEON/MathFunctions.h +54 -70
  157. data/vendor/eigen/Eigen/src/Core/arch/NEON/PacketMath.h +4376 -549
  158. data/vendor/eigen/Eigen/src/Core/arch/NEON/TypeCasting.h +1419 -0
  159. data/vendor/eigen/Eigen/src/Core/arch/SSE/Complex.h +59 -179
  160. data/vendor/eigen/Eigen/src/Core/arch/SSE/MathFunctions.h +65 -428
  161. data/vendor/eigen/Eigen/src/Core/arch/SSE/PacketMath.h +893 -283
  162. data/vendor/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h +65 -0
  163. data/vendor/eigen/Eigen/src/Core/arch/SVE/MathFunctions.h +44 -0
  164. data/vendor/eigen/Eigen/src/Core/arch/SVE/PacketMath.h +752 -0
  165. data/vendor/eigen/Eigen/src/Core/arch/SVE/TypeCasting.h +49 -0
  166. data/vendor/eigen/Eigen/src/Core/arch/SYCL/InteropHeaders.h +232 -0
  167. data/vendor/eigen/Eigen/src/Core/arch/SYCL/MathFunctions.h +301 -0
  168. data/vendor/eigen/Eigen/src/Core/arch/SYCL/PacketMath.h +670 -0
  169. data/vendor/eigen/Eigen/src/Core/arch/SYCL/SyclMemoryModel.h +694 -0
  170. data/vendor/eigen/Eigen/src/Core/arch/SYCL/TypeCasting.h +85 -0
  171. data/vendor/eigen/Eigen/src/Core/arch/ZVector/Complex.h +212 -183
  172. data/vendor/eigen/Eigen/src/Core/arch/ZVector/MathFunctions.h +101 -5
  173. data/vendor/eigen/Eigen/src/Core/arch/ZVector/PacketMath.h +510 -395
  174. data/vendor/eigen/Eigen/src/Core/functors/AssignmentFunctors.h +11 -2
  175. data/vendor/eigen/Eigen/src/Core/functors/BinaryFunctors.h +112 -46
  176. data/vendor/eigen/Eigen/src/Core/functors/NullaryFunctors.h +31 -30
  177. data/vendor/eigen/Eigen/src/Core/functors/StlFunctors.h +32 -2
  178. data/vendor/eigen/Eigen/src/Core/functors/UnaryFunctors.h +355 -16
  179. data/vendor/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h +1075 -586
  180. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h +49 -24
  181. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +41 -35
  182. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +6 -6
  183. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h +4 -2
  184. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixVector.h +382 -483
  185. data/vendor/eigen/Eigen/src/Core/products/Parallelizer.h +22 -5
  186. data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +53 -30
  187. data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h +16 -8
  188. data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h +8 -6
  189. data/vendor/eigen/Eigen/src/Core/products/SelfadjointProduct.h +4 -4
  190. data/vendor/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h +5 -4
  191. data/vendor/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h +33 -27
  192. data/vendor/eigen/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h +14 -12
  193. data/vendor/eigen/Eigen/src/Core/products/TriangularSolverMatrix.h +36 -34
  194. data/vendor/eigen/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h +8 -4
  195. data/vendor/eigen/Eigen/src/Core/products/TriangularSolverVector.h +13 -10
  196. data/vendor/eigen/Eigen/src/Core/util/BlasUtil.h +304 -119
  197. data/vendor/eigen/Eigen/src/Core/util/ConfigureVectorization.h +512 -0
  198. data/vendor/eigen/Eigen/src/Core/util/Constants.h +25 -9
  199. data/vendor/eigen/Eigen/src/Core/util/DisableStupidWarnings.h +26 -3
  200. data/vendor/eigen/Eigen/src/Core/util/ForwardDeclarations.h +29 -9
  201. data/vendor/eigen/Eigen/src/Core/util/IndexedViewHelper.h +186 -0
  202. data/vendor/eigen/Eigen/src/Core/util/IntegralConstant.h +272 -0
  203. data/vendor/eigen/Eigen/src/Core/util/MKL_support.h +8 -1
  204. data/vendor/eigen/Eigen/src/Core/util/Macros.h +709 -246
  205. data/vendor/eigen/Eigen/src/Core/util/Memory.h +222 -52
  206. data/vendor/eigen/Eigen/src/Core/util/Meta.h +355 -77
  207. data/vendor/eigen/Eigen/src/Core/util/ReenableStupidWarnings.h +5 -1
  208. data/vendor/eigen/Eigen/src/Core/util/ReshapedHelper.h +51 -0
  209. data/vendor/eigen/Eigen/src/Core/util/StaticAssert.h +8 -5
  210. data/vendor/eigen/Eigen/src/Core/util/SymbolicIndex.h +293 -0
  211. data/vendor/eigen/Eigen/src/Core/util/XprHelper.h +65 -30
  212. data/vendor/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +1 -1
  213. data/vendor/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +7 -4
  214. data/vendor/eigen/Eigen/src/Eigenvalues/EigenSolver.h +2 -2
  215. data/vendor/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +1 -1
  216. data/vendor/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +2 -2
  217. data/vendor/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +2 -2
  218. data/vendor/eigen/Eigen/src/Eigenvalues/RealQZ.h +9 -6
  219. data/vendor/eigen/Eigen/src/Eigenvalues/RealSchur.h +21 -9
  220. data/vendor/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +77 -43
  221. data/vendor/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +20 -15
  222. data/vendor/eigen/Eigen/src/Geometry/AlignedBox.h +99 -5
  223. data/vendor/eigen/Eigen/src/Geometry/AngleAxis.h +4 -4
  224. data/vendor/eigen/Eigen/src/Geometry/EulerAngles.h +3 -3
  225. data/vendor/eigen/Eigen/src/Geometry/Homogeneous.h +15 -11
  226. data/vendor/eigen/Eigen/src/Geometry/Hyperplane.h +1 -1
  227. data/vendor/eigen/Eigen/src/Geometry/OrthoMethods.h +3 -2
  228. data/vendor/eigen/Eigen/src/Geometry/ParametrizedLine.h +39 -2
  229. data/vendor/eigen/Eigen/src/Geometry/Quaternion.h +70 -14
  230. data/vendor/eigen/Eigen/src/Geometry/Rotation2D.h +3 -3
  231. data/vendor/eigen/Eigen/src/Geometry/Scaling.h +23 -5
  232. data/vendor/eigen/Eigen/src/Geometry/Transform.h +88 -67
  233. data/vendor/eigen/Eigen/src/Geometry/Translation.h +6 -12
  234. data/vendor/eigen/Eigen/src/Geometry/Umeyama.h +1 -1
  235. data/vendor/eigen/Eigen/src/Geometry/arch/Geometry_SIMD.h +168 -0
  236. data/vendor/eigen/Eigen/src/Householder/BlockHouseholder.h +9 -2
  237. data/vendor/eigen/Eigen/src/Householder/Householder.h +8 -4
  238. data/vendor/eigen/Eigen/src/Householder/HouseholderSequence.h +123 -48
  239. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +15 -15
  240. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +7 -23
  241. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +5 -22
  242. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +41 -47
  243. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +51 -60
  244. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +70 -20
  245. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +2 -20
  246. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +11 -9
  247. data/vendor/eigen/Eigen/src/Jacobi/Jacobi.h +31 -10
  248. data/vendor/eigen/Eigen/src/KLUSupport/KLUSupport.h +358 -0
  249. data/vendor/eigen/Eigen/src/LU/Determinant.h +35 -19
  250. data/vendor/eigen/Eigen/src/LU/FullPivLU.h +29 -43
  251. data/vendor/eigen/Eigen/src/LU/InverseImpl.h +25 -8
  252. data/vendor/eigen/Eigen/src/LU/PartialPivLU.h +71 -58
  253. data/vendor/eigen/Eigen/src/LU/arch/InverseSize4.h +351 -0
  254. data/vendor/eigen/Eigen/src/OrderingMethods/Amd.h +7 -17
  255. data/vendor/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +297 -277
  256. data/vendor/eigen/Eigen/src/OrderingMethods/Ordering.h +6 -10
  257. data/vendor/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +1 -1
  258. data/vendor/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +9 -7
  259. data/vendor/eigen/Eigen/src/QR/ColPivHouseholderQR.h +41 -20
  260. data/vendor/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +100 -27
  261. data/vendor/eigen/Eigen/src/QR/FullPivHouseholderQR.h +59 -22
  262. data/vendor/eigen/Eigen/src/QR/HouseholderQR.h +48 -23
  263. data/vendor/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +25 -3
  264. data/vendor/eigen/Eigen/src/SVD/BDCSVD.h +183 -63
  265. data/vendor/eigen/Eigen/src/SVD/JacobiSVD.h +22 -14
  266. data/vendor/eigen/Eigen/src/SVD/SVDBase.h +83 -22
  267. data/vendor/eigen/Eigen/src/SVD/UpperBidiagonalization.h +3 -3
  268. data/vendor/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +17 -9
  269. data/vendor/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +12 -37
  270. data/vendor/eigen/Eigen/src/SparseCore/AmbiVector.h +3 -2
  271. data/vendor/eigen/Eigen/src/SparseCore/CompressedStorage.h +16 -0
  272. data/vendor/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +6 -6
  273. data/vendor/eigen/Eigen/src/SparseCore/SparseAssign.h +81 -27
  274. data/vendor/eigen/Eigen/src/SparseCore/SparseBlock.h +25 -57
  275. data/vendor/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +40 -11
  276. data/vendor/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +11 -15
  277. data/vendor/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +4 -2
  278. data/vendor/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +30 -8
  279. data/vendor/eigen/Eigen/src/SparseCore/SparseMatrix.h +126 -11
  280. data/vendor/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +5 -12
  281. data/vendor/eigen/Eigen/src/SparseCore/SparseProduct.h +13 -1
  282. data/vendor/eigen/Eigen/src/SparseCore/SparseRef.h +7 -7
  283. data/vendor/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +5 -2
  284. data/vendor/eigen/Eigen/src/SparseCore/SparseUtil.h +8 -0
  285. data/vendor/eigen/Eigen/src/SparseCore/SparseVector.h +1 -1
  286. data/vendor/eigen/Eigen/src/SparseCore/SparseView.h +1 -0
  287. data/vendor/eigen/Eigen/src/SparseLU/SparseLU.h +162 -12
  288. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +1 -1
  289. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +76 -2
  290. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +2 -2
  291. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +1 -1
  292. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +1 -1
  293. data/vendor/eigen/Eigen/src/SparseQR/SparseQR.h +19 -6
  294. data/vendor/eigen/Eigen/src/StlSupport/StdDeque.h +2 -12
  295. data/vendor/eigen/Eigen/src/StlSupport/StdList.h +2 -2
  296. data/vendor/eigen/Eigen/src/StlSupport/StdVector.h +2 -2
  297. data/vendor/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +6 -8
  298. data/vendor/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +175 -39
  299. data/vendor/eigen/Eigen/src/misc/lapacke.h +5 -4
  300. data/vendor/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +28 -2
  301. data/vendor/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +155 -11
  302. data/vendor/eigen/Eigen/src/plugins/BlockMethods.h +626 -242
  303. data/vendor/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.h +14 -0
  304. data/vendor/eigen/Eigen/src/plugins/IndexedViewMethods.h +262 -0
  305. data/vendor/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +4 -4
  306. data/vendor/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +10 -0
  307. data/vendor/eigen/Eigen/src/plugins/ReshapedMethods.h +149 -0
  308. data/vendor/eigen/README.md +2 -0
  309. data/vendor/eigen/bench/btl/README +1 -1
  310. data/vendor/eigen/bench/tensors/README +6 -7
  311. data/vendor/eigen/ci/README.md +56 -0
  312. data/vendor/eigen/demos/mix_eigen_and_c/README +1 -1
  313. data/vendor/eigen/unsupported/Eigen/CXX11/src/Tensor/README.md +213 -158
  314. data/vendor/eigen/unsupported/README.txt +1 -1
  315. data/vendor/tomotopy/README.kr.rst +21 -0
  316. data/vendor/tomotopy/README.rst +20 -0
  317. data/vendor/tomotopy/src/Labeling/FoRelevance.cpp +2 -2
  318. data/vendor/tomotopy/src/Labeling/Phraser.hpp +1 -1
  319. data/vendor/tomotopy/src/TopicModel/CTModel.hpp +2 -1
  320. data/vendor/tomotopy/src/TopicModel/DMRModel.hpp +2 -1
  321. data/vendor/tomotopy/src/TopicModel/DTModel.hpp +1 -1
  322. data/vendor/tomotopy/src/TopicModel/GDMRModel.hpp +2 -2
  323. data/vendor/tomotopy/src/TopicModel/HDP.h +1 -0
  324. data/vendor/tomotopy/src/TopicModel/HDPModel.hpp +53 -2
  325. data/vendor/tomotopy/src/TopicModel/HLDAModel.hpp +1 -1
  326. data/vendor/tomotopy/src/TopicModel/HPAModel.hpp +1 -0
  327. data/vendor/tomotopy/src/TopicModel/LDACVB0Model.hpp +2 -2
  328. data/vendor/tomotopy/src/TopicModel/LDAModel.hpp +16 -5
  329. data/vendor/tomotopy/src/TopicModel/LLDAModel.hpp +1 -0
  330. data/vendor/tomotopy/src/TopicModel/MGLDAModel.hpp +1 -0
  331. data/vendor/tomotopy/src/TopicModel/PLDAModel.hpp +1 -0
  332. data/vendor/tomotopy/src/TopicModel/PT.h +3 -1
  333. data/vendor/tomotopy/src/TopicModel/PTModel.hpp +31 -1
  334. data/vendor/tomotopy/src/TopicModel/SLDAModel.hpp +2 -2
  335. data/vendor/tomotopy/src/TopicModel/TopicModel.hpp +7 -5
  336. data/vendor/tomotopy/src/Utils/EigenAddonOps.hpp +36 -1
  337. data/vendor/tomotopy/src/Utils/exception.h +6 -0
  338. data/vendor/tomotopy/src/Utils/sample.hpp +14 -12
  339. data/vendor/tomotopy/src/Utils/sse_gamma.h +0 -3
  340. metadata +60 -14
  341. data/vendor/eigen/Eigen/CMakeLists.txt +0 -19
  342. data/vendor/eigen/Eigen/src/Core/arch/CUDA/Half.h +0 -674
  343. data/vendor/eigen/Eigen/src/Core/arch/CUDA/PacketMath.h +0 -333
  344. data/vendor/eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +0 -1124
  345. data/vendor/eigen/Eigen/src/Core/arch/CUDA/TypeCasting.h +0 -212
  346. data/vendor/eigen/Eigen/src/Geometry/arch/Geometry_SSE.h +0 -161
  347. data/vendor/eigen/Eigen/src/LU/arch/Inverse_SSE.h +0 -338
@@ -14,27 +14,27 @@
14
14
  #define EIGEN_PRODUCTEVALUATORS_H
15
15
 
16
16
  namespace Eigen {
17
-
17
+
18
18
  namespace internal {
19
19
 
20
20
  /** \internal
21
21
  * Evaluator of a product expression.
22
22
  * Since products require special treatments to handle all possible cases,
23
- * we simply deffer the evaluation logic to a product_evaluator class
23
+ * we simply defer the evaluation logic to a product_evaluator class
24
24
  * which offers more partial specialization possibilities.
25
- *
25
+ *
26
26
  * \sa class product_evaluator
27
27
  */
28
28
  template<typename Lhs, typename Rhs, int Options>
29
- struct evaluator<Product<Lhs, Rhs, Options> >
29
+ struct evaluator<Product<Lhs, Rhs, Options> >
30
30
  : public product_evaluator<Product<Lhs, Rhs, Options> >
31
31
  {
32
32
  typedef Product<Lhs, Rhs, Options> XprType;
33
33
  typedef product_evaluator<XprType> Base;
34
-
34
+
35
35
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const XprType& xpr) : Base(xpr) {}
36
36
  };
37
-
37
+
38
38
  // Catch "scalar * ( A * B )" and transform it to "(A*scalar) * B"
39
39
  // TODO we should apply that rule only if that's really helpful
40
40
  template<typename Lhs, typename Rhs, typename Scalar1, typename Scalar2, typename Plain1>
@@ -62,12 +62,12 @@ struct evaluator<CwiseBinaryOp<internal::scalar_product_op<Scalar1,Scalar2>,
62
62
 
63
63
 
64
64
  template<typename Lhs, typename Rhs, int DiagIndex>
65
- struct evaluator<Diagonal<const Product<Lhs, Rhs, DefaultProduct>, DiagIndex> >
65
+ struct evaluator<Diagonal<const Product<Lhs, Rhs, DefaultProduct>, DiagIndex> >
66
66
  : public evaluator<Diagonal<const Product<Lhs, Rhs, LazyProduct>, DiagIndex> >
67
67
  {
68
68
  typedef Diagonal<const Product<Lhs, Rhs, DefaultProduct>, DiagIndex> XprType;
69
69
  typedef evaluator<Diagonal<const Product<Lhs, Rhs, LazyProduct>, DiagIndex> > Base;
70
-
70
+
71
71
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const XprType& xpr)
72
72
  : Base(Diagonal<const Product<Lhs, Rhs, LazyProduct>, DiagIndex>(
73
73
  Product<Lhs, Rhs, LazyProduct>(xpr.nestedExpression().lhs(), xpr.nestedExpression().rhs()),
@@ -108,27 +108,27 @@ struct product_evaluator<Product<Lhs, Rhs, Options>, ProductTag, LhsShape, RhsSh
108
108
  : m_result(xpr.rows(), xpr.cols())
109
109
  {
110
110
  ::new (static_cast<Base*>(this)) Base(m_result);
111
-
111
+
112
112
  // FIXME shall we handle nested_eval here?,
113
113
  // if so, then we must take care at removing the call to nested_eval in the specializations (e.g., in permutation_matrix_product, transposition_matrix_product, etc.)
114
114
  // typedef typename internal::nested_eval<Lhs,Rhs::ColsAtCompileTime>::type LhsNested;
115
115
  // typedef typename internal::nested_eval<Rhs,Lhs::RowsAtCompileTime>::type RhsNested;
116
116
  // typedef typename internal::remove_all<LhsNested>::type LhsNestedCleaned;
117
117
  // typedef typename internal::remove_all<RhsNested>::type RhsNestedCleaned;
118
- //
118
+ //
119
119
  // const LhsNested lhs(xpr.lhs());
120
120
  // const RhsNested rhs(xpr.rhs());
121
- //
121
+ //
122
122
  // generic_product_impl<LhsNestedCleaned, RhsNestedCleaned>::evalTo(m_result, lhs, rhs);
123
123
 
124
124
  generic_product_impl<Lhs, Rhs, LhsShape, RhsShape, ProductTag>::evalTo(m_result, xpr.lhs(), xpr.rhs());
125
125
  }
126
-
127
- protected:
126
+
127
+ protected:
128
128
  PlainObject m_result;
129
129
  };
130
130
 
131
- // The following three shortcuts are enabled only if the scalar types match excatly.
131
+ // The following three shortcuts are enabled only if the scalar types match exactly.
132
132
  // TODO: we could enable them for different scalar types when the product is not vectorized.
133
133
 
134
134
  // Dense = Product
@@ -137,7 +137,7 @@ struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::assign_op<Scal
137
137
  typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct)>::type>
138
138
  {
139
139
  typedef Product<Lhs,Rhs,Options> SrcXprType;
140
- static EIGEN_STRONG_INLINE
140
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
141
141
  void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,Scalar> &)
142
142
  {
143
143
  Index dstRows = src.rows();
@@ -155,7 +155,7 @@ struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::add_assign_op<
155
155
  typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct)>::type>
156
156
  {
157
157
  typedef Product<Lhs,Rhs,Options> SrcXprType;
158
- static EIGEN_STRONG_INLINE
158
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
159
159
  void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<Scalar,Scalar> &)
160
160
  {
161
161
  eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
@@ -170,7 +170,7 @@ struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::sub_assign_op<
170
170
  typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct)>::type>
171
171
  {
172
172
  typedef Product<Lhs,Rhs,Options> SrcXprType;
173
- static EIGEN_STRONG_INLINE
173
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
174
174
  void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<Scalar,Scalar> &)
175
175
  {
176
176
  eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
@@ -190,7 +190,7 @@ struct Assignment<DstXprType, CwiseBinaryOp<internal::scalar_product_op<ScalarBi
190
190
  typedef CwiseBinaryOp<internal::scalar_product_op<ScalarBis,Scalar>,
191
191
  const CwiseNullaryOp<internal::scalar_constant_op<ScalarBis>,Plain>,
192
192
  const Product<Lhs,Rhs,DefaultProduct> > SrcXprType;
193
- static EIGEN_STRONG_INLINE
193
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
194
194
  void run(DstXprType &dst, const SrcXprType &src, const AssignFunc& func)
195
195
  {
196
196
  call_assignment_no_alias(dst, (src.lhs().functor().m_other * src.rhs().lhs())*src.rhs().rhs(), func);
@@ -217,7 +217,7 @@ template<typename DstXprType, typename OtherXpr, typename ProductType, typename
217
217
  struct assignment_from_xpr_op_product
218
218
  {
219
219
  template<typename SrcXprType, typename InitialFunc>
220
- static EIGEN_STRONG_INLINE
220
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
221
221
  void run(DstXprType &dst, const SrcXprType &src, const InitialFunc& /*func*/)
222
222
  {
223
223
  call_assignment_no_alias(dst, src.lhs(), Func1());
@@ -246,19 +246,19 @@ template<typename Lhs, typename Rhs>
246
246
  struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,InnerProduct>
247
247
  {
248
248
  template<typename Dst>
249
- static EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
249
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
250
250
  {
251
251
  dst.coeffRef(0,0) = (lhs.transpose().cwiseProduct(rhs)).sum();
252
252
  }
253
-
253
+
254
254
  template<typename Dst>
255
- static EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
255
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
256
256
  {
257
257
  dst.coeffRef(0,0) += (lhs.transpose().cwiseProduct(rhs)).sum();
258
258
  }
259
-
259
+
260
260
  template<typename Dst>
261
- static EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
261
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
262
262
  { dst.coeffRef(0,0) -= (lhs.transpose().cwiseProduct(rhs)).sum(); }
263
263
  };
264
264
 
@@ -269,10 +269,10 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,InnerProduct>
269
269
 
270
270
  // Column major result
271
271
  template<typename Dst, typename Lhs, typename Rhs, typename Func>
272
- void outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const Func& func, const false_type&)
272
+ void EIGEN_DEVICE_FUNC outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const Func& func, const false_type&)
273
273
  {
274
274
  evaluator<Rhs> rhsEval(rhs);
275
- typename nested_eval<Lhs,Rhs::SizeAtCompileTime>::type actual_lhs(lhs);
275
+ ei_declare_local_nested_eval(Lhs,lhs,Rhs::SizeAtCompileTime,actual_lhs);
276
276
  // FIXME if cols is large enough, then it might be useful to make sure that lhs is sequentially stored
277
277
  // FIXME not very good if rhs is real and lhs complex while alpha is real too
278
278
  const Index cols = dst.cols();
@@ -282,10 +282,10 @@ void outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const
282
282
 
283
283
  // Row major result
284
284
  template<typename Dst, typename Lhs, typename Rhs, typename Func>
285
- void outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const Func& func, const true_type&)
285
+ void EIGEN_DEVICE_FUNC outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const Func& func, const true_type&)
286
286
  {
287
287
  evaluator<Lhs> lhsEval(lhs);
288
- typename nested_eval<Rhs,Lhs::SizeAtCompileTime>::type actual_rhs(rhs);
288
+ ei_declare_local_nested_eval(Rhs,rhs,Lhs::SizeAtCompileTime,actual_rhs);
289
289
  // FIXME if rows is large enough, then it might be useful to make sure that rhs is sequentially stored
290
290
  // FIXME not very good if lhs is real and rhs complex while alpha is real too
291
291
  const Index rows = dst.rows();
@@ -298,43 +298,43 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,OuterProduct>
298
298
  {
299
299
  template<typename T> struct is_row_major : internal::conditional<(int(T::Flags)&RowMajorBit), internal::true_type, internal::false_type>::type {};
300
300
  typedef typename Product<Lhs,Rhs>::Scalar Scalar;
301
-
301
+
302
302
  // TODO it would be nice to be able to exploit our *_assign_op functors for that purpose
303
- struct set { template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() = src; } };
304
- struct add { template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() += src; } };
305
- struct sub { template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() -= src; } };
303
+ struct set { template<typename Dst, typename Src> EIGEN_DEVICE_FUNC void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() = src; } };
304
+ struct add { template<typename Dst, typename Src> EIGEN_DEVICE_FUNC void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() += src; } };
305
+ struct sub { template<typename Dst, typename Src> EIGEN_DEVICE_FUNC void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() -= src; } };
306
306
  struct adds {
307
307
  Scalar m_scale;
308
308
  explicit adds(const Scalar& s) : m_scale(s) {}
309
- template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const {
309
+ template<typename Dst, typename Src> void EIGEN_DEVICE_FUNC operator()(const Dst& dst, const Src& src) const {
310
310
  dst.const_cast_derived() += m_scale * src;
311
311
  }
312
312
  };
313
-
313
+
314
314
  template<typename Dst>
315
- static EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
315
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
316
316
  {
317
317
  internal::outer_product_selector_run(dst, lhs, rhs, set(), is_row_major<Dst>());
318
318
  }
319
-
319
+
320
320
  template<typename Dst>
321
- static EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
321
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
322
322
  {
323
323
  internal::outer_product_selector_run(dst, lhs, rhs, add(), is_row_major<Dst>());
324
324
  }
325
-
325
+
326
326
  template<typename Dst>
327
- static EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
327
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
328
328
  {
329
329
  internal::outer_product_selector_run(dst, lhs, rhs, sub(), is_row_major<Dst>());
330
330
  }
331
-
331
+
332
332
  template<typename Dst>
333
- static EIGEN_STRONG_INLINE void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
333
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
334
334
  {
335
335
  internal::outer_product_selector_run(dst, lhs, rhs, adds(alpha), is_row_major<Dst>());
336
336
  }
337
-
337
+
338
338
  };
339
339
 
340
340
 
@@ -343,21 +343,21 @@ template<typename Lhs, typename Rhs, typename Derived>
343
343
  struct generic_product_impl_base
344
344
  {
345
345
  typedef typename Product<Lhs,Rhs>::Scalar Scalar;
346
-
346
+
347
347
  template<typename Dst>
348
- static EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
348
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
349
349
  { dst.setZero(); scaleAndAddTo(dst, lhs, rhs, Scalar(1)); }
350
350
 
351
351
  template<typename Dst>
352
- static EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
352
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
353
353
  { scaleAndAddTo(dst,lhs, rhs, Scalar(1)); }
354
354
 
355
355
  template<typename Dst>
356
- static EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
356
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
357
357
  { scaleAndAddTo(dst, lhs, rhs, Scalar(-1)); }
358
-
358
+
359
359
  template<typename Dst>
360
- static EIGEN_STRONG_INLINE void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
360
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
361
361
  { Derived::scaleAndAddTo(dst,lhs,rhs,alpha); }
362
362
 
363
363
  };
@@ -373,8 +373,13 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemvProduct>
373
373
  typedef typename internal::remove_all<typename internal::conditional<int(Side)==OnTheRight,LhsNested,RhsNested>::type>::type MatrixType;
374
374
 
375
375
  template<typename Dest>
376
- static EIGEN_STRONG_INLINE void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
376
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
377
377
  {
378
+ // Fallback to inner product if both the lhs and rhs is a runtime vector.
379
+ if (lhs.rows() == 1 && rhs.cols() == 1) {
380
+ dst.coeffRef(0,0) += alpha * lhs.row(0).conjugate().dot(rhs.col(0));
381
+ return;
382
+ }
378
383
  LhsNested actual_lhs(lhs);
379
384
  RhsNested actual_rhs(rhs);
380
385
  internal::gemv_dense_selector<Side,
@@ -385,35 +390,84 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemvProduct>
385
390
  };
386
391
 
387
392
  template<typename Lhs, typename Rhs>
388
- struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,CoeffBasedProductMode>
393
+ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,CoeffBasedProductMode>
389
394
  {
390
395
  typedef typename Product<Lhs,Rhs>::Scalar Scalar;
391
-
396
+
392
397
  template<typename Dst>
393
- static EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
398
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
394
399
  {
395
400
  // Same as: dst.noalias() = lhs.lazyProduct(rhs);
396
401
  // but easier on the compiler side
397
402
  call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::assign_op<typename Dst::Scalar,Scalar>());
398
403
  }
399
-
404
+
400
405
  template<typename Dst>
401
- static EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
406
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
402
407
  {
403
408
  // dst.noalias() += lhs.lazyProduct(rhs);
404
409
  call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::add_assign_op<typename Dst::Scalar,Scalar>());
405
410
  }
406
-
411
+
407
412
  template<typename Dst>
408
- static EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
413
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
409
414
  {
410
415
  // dst.noalias() -= lhs.lazyProduct(rhs);
411
416
  call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::sub_assign_op<typename Dst::Scalar,Scalar>());
412
417
  }
413
-
414
- // template<typename Dst>
415
- // static inline void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
416
- // { dst.noalias() += alpha * lhs.lazyProduct(rhs); }
418
+
419
+ // This is a special evaluation path called from generic_product_impl<...,GemmProduct> in file GeneralMatrixMatrix.h
420
+ // This variant tries to extract scalar multiples from both the LHS and RHS and factor them out. For instance:
421
+ // dst {,+,-}= (s1*A)*(B*s2)
422
+ // will be rewritten as:
423
+ // dst {,+,-}= (s1*s2) * (A.lazyProduct(B))
424
+ // There are at least four benefits of doing so:
425
+ // 1 - huge performance gain for heap-allocated matrix types as it save costly allocations.
426
+ // 2 - it is faster than simply by-passing the heap allocation through stack allocation.
427
+ // 3 - it makes this fallback consistent with the heavy GEMM routine.
428
+ // 4 - it fully by-passes huge stack allocation attempts when multiplying huge fixed-size matrices.
429
+ // (see https://stackoverflow.com/questions/54738495)
430
+ // For small fixed sizes matrices, howver, the gains are less obvious, it is sometimes x2 faster, but sometimes x3 slower,
431
+ // and the behavior depends also a lot on the compiler... This is why this re-writting strategy is currently
432
+ // enabled only when falling back from the main GEMM.
433
+ template<typename Dst, typename Func>
434
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
435
+ void eval_dynamic(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Func &func)
436
+ {
437
+ enum {
438
+ HasScalarFactor = blas_traits<Lhs>::HasScalarFactor || blas_traits<Rhs>::HasScalarFactor,
439
+ ConjLhs = blas_traits<Lhs>::NeedToConjugate,
440
+ ConjRhs = blas_traits<Rhs>::NeedToConjugate
441
+ };
442
+ // FIXME: in c++11 this should be auto, and extractScalarFactor should also return auto
443
+ // this is important for real*complex_mat
444
+ Scalar actualAlpha = combine_scalar_factors<Scalar>(lhs, rhs);
445
+
446
+ eval_dynamic_impl(dst,
447
+ blas_traits<Lhs>::extract(lhs).template conjugateIf<ConjLhs>(),
448
+ blas_traits<Rhs>::extract(rhs).template conjugateIf<ConjRhs>(),
449
+ func,
450
+ actualAlpha,
451
+ typename conditional<HasScalarFactor,true_type,false_type>::type());
452
+ }
453
+
454
+ protected:
455
+
456
+ template<typename Dst, typename LhsT, typename RhsT, typename Func, typename Scalar>
457
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
458
+ void eval_dynamic_impl(Dst& dst, const LhsT& lhs, const RhsT& rhs, const Func &func, const Scalar& s /* == 1 */, false_type)
459
+ {
460
+ EIGEN_UNUSED_VARIABLE(s);
461
+ eigen_internal_assert(s==Scalar(1));
462
+ call_restricted_packet_assignment_no_alias(dst, lhs.lazyProduct(rhs), func);
463
+ }
464
+
465
+ template<typename Dst, typename LhsT, typename RhsT, typename Func, typename Scalar>
466
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
467
+ void eval_dynamic_impl(Dst& dst, const LhsT& lhs, const RhsT& rhs, const Func &func, const Scalar& s, true_type)
468
+ {
469
+ call_restricted_packet_assignment_no_alias(dst, s * lhs.lazyProduct(rhs), func);
470
+ }
417
471
  };
418
472
 
419
473
  // This specialization enforces the use of a coefficient-based evaluation strategy
@@ -471,7 +525,7 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
471
525
 
472
526
  typedef typename internal::nested_eval<Lhs,Rhs::ColsAtCompileTime>::type LhsNested;
473
527
  typedef typename internal::nested_eval<Rhs,Lhs::RowsAtCompileTime>::type RhsNested;
474
-
528
+
475
529
  typedef typename internal::remove_all<LhsNested>::type LhsNestedCleaned;
476
530
  typedef typename internal::remove_all<RhsNested>::type RhsNestedCleaned;
477
531
 
@@ -490,19 +544,19 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
490
544
  typedef typename find_best_packet<Scalar,ColsAtCompileTime>::type RhsVecPacketType;
491
545
 
492
546
  enum {
493
-
547
+
494
548
  LhsCoeffReadCost = LhsEtorType::CoeffReadCost,
495
549
  RhsCoeffReadCost = RhsEtorType::CoeffReadCost,
496
550
  CoeffReadCost = InnerSize==0 ? NumTraits<Scalar>::ReadCost
497
551
  : InnerSize == Dynamic ? HugeCost
498
- : InnerSize * (NumTraits<Scalar>::MulCost + LhsCoeffReadCost + RhsCoeffReadCost)
552
+ : InnerSize * (NumTraits<Scalar>::MulCost + int(LhsCoeffReadCost) + int(RhsCoeffReadCost))
499
553
  + (InnerSize - 1) * NumTraits<Scalar>::AddCost,
500
554
 
501
555
  Unroll = CoeffReadCost <= EIGEN_UNROLLING_LIMIT,
502
-
556
+
503
557
  LhsFlags = LhsEtorType::Flags,
504
558
  RhsFlags = RhsEtorType::Flags,
505
-
559
+
506
560
  LhsRowMajor = LhsFlags & RowMajorBit,
507
561
  RhsRowMajor = RhsFlags & RowMajorBit,
508
562
 
@@ -512,7 +566,7 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
512
566
  // Here, we don't care about alignment larger than the usable packet size.
513
567
  LhsAlignment = EIGEN_PLAIN_ENUM_MIN(LhsEtorType::Alignment,LhsVecPacketSize*int(sizeof(typename LhsNestedCleaned::Scalar))),
514
568
  RhsAlignment = EIGEN_PLAIN_ENUM_MIN(RhsEtorType::Alignment,RhsVecPacketSize*int(sizeof(typename RhsNestedCleaned::Scalar))),
515
-
569
+
516
570
  SameType = is_same<typename LhsNestedCleaned::Scalar,typename RhsNestedCleaned::Scalar>::value,
517
571
 
518
572
  CanVectorizeRhs = bool(RhsRowMajor) && (RhsFlags & PacketAccessBit) && (ColsAtCompileTime!=1),
@@ -522,12 +576,12 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
522
576
  : (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0
523
577
  : (bool(RhsRowMajor) && !CanVectorizeLhs),
524
578
 
525
- Flags = ((unsigned int)(LhsFlags | RhsFlags) & HereditaryBits & ~RowMajorBit)
579
+ Flags = ((int(LhsFlags) | int(RhsFlags)) & HereditaryBits & ~RowMajorBit)
526
580
  | (EvalToRowMajor ? RowMajorBit : 0)
527
581
  // TODO enable vectorization for mixed types
528
582
  | (SameType && (CanVectorizeLhs || CanVectorizeRhs) ? PacketAccessBit : 0)
529
583
  | (XprType::IsVectorAtCompileTime ? LinearAccessBit : 0),
530
-
584
+
531
585
  LhsOuterStrideBytes = int(LhsNestedCleaned::OuterStrideAtCompileTime) * int(sizeof(typename LhsNestedCleaned::Scalar)),
532
586
  RhsOuterStrideBytes = int(RhsNestedCleaned::OuterStrideAtCompileTime) * int(sizeof(typename RhsNestedCleaned::Scalar)),
533
587
 
@@ -543,10 +597,10 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
543
597
  CanVectorizeInner = SameType
544
598
  && LhsRowMajor
545
599
  && (!RhsRowMajor)
546
- && (LhsFlags & RhsFlags & ActualPacketAccessBit)
547
- && (InnerSize % packet_traits<Scalar>::size == 0)
600
+ && (int(LhsFlags) & int(RhsFlags) & ActualPacketAccessBit)
601
+ && (int(InnerSize) % packet_traits<Scalar>::size == 0)
548
602
  };
549
-
603
+
550
604
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CoeffReturnType coeff(Index row, Index col) const
551
605
  {
552
606
  return (m_lhs.row(row).transpose().cwiseProduct( m_rhs.col(col) )).sum();
@@ -556,7 +610,8 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
556
610
  * which is why we don't set the LinearAccessBit.
557
611
  * TODO: this seems possible when the result is a vector
558
612
  */
559
- EIGEN_DEVICE_FUNC const CoeffReturnType coeff(Index index) const
613
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
614
+ const CoeffReturnType coeff(Index index) const
560
615
  {
561
616
  const Index row = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime==1) ? 0 : index;
562
617
  const Index col = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime==1) ? index : 0;
@@ -564,6 +619,7 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
564
619
  }
565
620
 
566
621
  template<int LoadMode, typename PacketType>
622
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
567
623
  const PacketType packet(Index row, Index col) const
568
624
  {
569
625
  PacketType res;
@@ -575,6 +631,7 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
575
631
  }
576
632
 
577
633
  template<int LoadMode, typename PacketType>
634
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
578
635
  const PacketType packet(Index index) const
579
636
  {
580
637
  const Index row = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime==1) ? 0 : index;
@@ -585,7 +642,7 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
585
642
  protected:
586
643
  typename internal::add_const_on_value_type<LhsNested>::type m_lhs;
587
644
  typename internal::add_const_on_value_type<RhsNested>::type m_rhs;
588
-
645
+
589
646
  LhsEtorType m_lhsImpl;
590
647
  RhsEtorType m_rhsImpl;
591
648
 
@@ -603,7 +660,8 @@ struct product_evaluator<Product<Lhs, Rhs, DefaultProduct>, LazyCoeffBasedProduc
603
660
  enum {
604
661
  Flags = Base::Flags | EvalBeforeNestingBit
605
662
  };
606
- EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr)
663
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
664
+ explicit product_evaluator(const XprType& xpr)
607
665
  : Base(BaseProduct(xpr.lhs(),xpr.rhs()))
608
666
  {}
609
667
  };
@@ -615,7 +673,7 @@ struct product_evaluator<Product<Lhs, Rhs, DefaultProduct>, LazyCoeffBasedProduc
615
673
  template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
616
674
  struct etor_product_packet_impl<RowMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
617
675
  {
618
- static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res)
676
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res)
619
677
  {
620
678
  etor_product_packet_impl<RowMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, innerDim, res);
621
679
  res = pmadd(pset1<Packet>(lhs.coeff(row, Index(UnrollingIndex-1))), rhs.template packet<LoadMode,Packet>(Index(UnrollingIndex-1), col), res);
@@ -625,7 +683,7 @@ struct etor_product_packet_impl<RowMajor, UnrollingIndex, Lhs, Rhs, Packet, Load
625
683
  template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
626
684
  struct etor_product_packet_impl<ColMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
627
685
  {
628
- static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res)
686
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res)
629
687
  {
630
688
  etor_product_packet_impl<ColMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, innerDim, res);
631
689
  res = pmadd(lhs.template packet<LoadMode,Packet>(row, Index(UnrollingIndex-1)), pset1<Packet>(rhs.coeff(Index(UnrollingIndex-1), col)), res);
@@ -635,7 +693,7 @@ struct etor_product_packet_impl<ColMajor, UnrollingIndex, Lhs, Rhs, Packet, Load
635
693
  template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
636
694
  struct etor_product_packet_impl<RowMajor, 1, Lhs, Rhs, Packet, LoadMode>
637
695
  {
638
- static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res)
696
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res)
639
697
  {
640
698
  res = pmul(pset1<Packet>(lhs.coeff(row, Index(0))),rhs.template packet<LoadMode,Packet>(Index(0), col));
641
699
  }
@@ -644,7 +702,7 @@ struct etor_product_packet_impl<RowMajor, 1, Lhs, Rhs, Packet, LoadMode>
644
702
  template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
645
703
  struct etor_product_packet_impl<ColMajor, 1, Lhs, Rhs, Packet, LoadMode>
646
704
  {
647
- static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res)
705
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res)
648
706
  {
649
707
  res = pmul(lhs.template packet<LoadMode,Packet>(row, Index(0)), pset1<Packet>(rhs.coeff(Index(0), col)));
650
708
  }
@@ -653,7 +711,7 @@ struct etor_product_packet_impl<ColMajor, 1, Lhs, Rhs, Packet, LoadMode>
653
711
  template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
654
712
  struct etor_product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode>
655
713
  {
656
- static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, Index /*innerDim*/, Packet &res)
714
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, Index /*innerDim*/, Packet &res)
657
715
  {
658
716
  res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
659
717
  }
@@ -662,7 +720,7 @@ struct etor_product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode>
662
720
  template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
663
721
  struct etor_product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode>
664
722
  {
665
- static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, Index /*innerDim*/, Packet &res)
723
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, Index /*innerDim*/, Packet &res)
666
724
  {
667
725
  res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
668
726
  }
@@ -671,7 +729,7 @@ struct etor_product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode>
671
729
  template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
672
730
  struct etor_product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
673
731
  {
674
- static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res)
732
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res)
675
733
  {
676
734
  res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
677
735
  for(Index i = 0; i < innerDim; ++i)
@@ -682,7 +740,7 @@ struct etor_product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
682
740
  template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
683
741
  struct etor_product_packet_impl<ColMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
684
742
  {
685
- static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res)
743
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res)
686
744
  {
687
745
  res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
688
746
  for(Index i = 0; i < innerDim; ++i)
@@ -704,7 +762,7 @@ struct generic_product_impl<Lhs,Rhs,TriangularShape,DenseShape,ProductTag>
704
762
  : generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,TriangularShape,DenseShape,ProductTag> >
705
763
  {
706
764
  typedef typename Product<Lhs,Rhs>::Scalar Scalar;
707
-
765
+
708
766
  template<typename Dest>
709
767
  static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
710
768
  {
@@ -718,7 +776,7 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,TriangularShape,ProductTag>
718
776
  : generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,DenseShape,TriangularShape,ProductTag> >
719
777
  {
720
778
  typedef typename Product<Lhs,Rhs>::Scalar Scalar;
721
-
779
+
722
780
  template<typename Dest>
723
781
  static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
724
782
  {
@@ -739,9 +797,10 @@ struct generic_product_impl<Lhs,Rhs,SelfAdjointShape,DenseShape,ProductTag>
739
797
  : generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,SelfAdjointShape,DenseShape,ProductTag> >
740
798
  {
741
799
  typedef typename Product<Lhs,Rhs>::Scalar Scalar;
742
-
800
+
743
801
  template<typename Dest>
744
- static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
802
+ static EIGEN_DEVICE_FUNC
803
+ void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
745
804
  {
746
805
  selfadjoint_product_impl<typename Lhs::MatrixType,Lhs::Mode,false,Rhs,0,Rhs::IsVectorAtCompileTime>::run(dst, lhs.nestedExpression(), rhs, alpha);
747
806
  }
@@ -752,7 +811,7 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,SelfAdjointShape,ProductTag>
752
811
  : generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,DenseShape,SelfAdjointShape,ProductTag> >
753
812
  {
754
813
  typedef typename Product<Lhs,Rhs>::Scalar Scalar;
755
-
814
+
756
815
  template<typename Dest>
757
816
  static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
758
817
  {
@@ -764,7 +823,7 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,SelfAdjointShape,ProductTag>
764
823
  /***************************************************************************
765
824
  * Diagonal products
766
825
  ***************************************************************************/
767
-
826
+
768
827
  template<typename MatrixType, typename DiagonalType, typename Derived, int ProductOrder>
769
828
  struct diagonal_product_evaluator_base
770
829
  : evaluator_base<Derived>
@@ -772,17 +831,25 @@ struct diagonal_product_evaluator_base
772
831
  typedef typename ScalarBinaryOpTraits<typename MatrixType::Scalar, typename DiagonalType::Scalar>::ReturnType Scalar;
773
832
  public:
774
833
  enum {
775
- CoeffReadCost = NumTraits<Scalar>::MulCost + evaluator<MatrixType>::CoeffReadCost + evaluator<DiagonalType>::CoeffReadCost,
776
-
834
+ CoeffReadCost = int(NumTraits<Scalar>::MulCost) + int(evaluator<MatrixType>::CoeffReadCost) + int(evaluator<DiagonalType>::CoeffReadCost),
835
+
777
836
  MatrixFlags = evaluator<MatrixType>::Flags,
778
837
  DiagFlags = evaluator<DiagonalType>::Flags,
779
- _StorageOrder = MatrixFlags & RowMajorBit ? RowMajor : ColMajor,
838
+
839
+ _StorageOrder = (Derived::MaxRowsAtCompileTime==1 && Derived::MaxColsAtCompileTime!=1) ? RowMajor
840
+ : (Derived::MaxColsAtCompileTime==1 && Derived::MaxRowsAtCompileTime!=1) ? ColMajor
841
+ : MatrixFlags & RowMajorBit ? RowMajor : ColMajor,
842
+ _SameStorageOrder = _StorageOrder == (MatrixFlags & RowMajorBit ? RowMajor : ColMajor),
843
+
780
844
  _ScalarAccessOnDiag = !((int(_StorageOrder) == ColMajor && int(ProductOrder) == OnTheLeft)
781
845
  ||(int(_StorageOrder) == RowMajor && int(ProductOrder) == OnTheRight)),
782
846
  _SameTypes = is_same<typename MatrixType::Scalar, typename DiagonalType::Scalar>::value,
783
847
  // FIXME currently we need same types, but in the future the next rule should be the one
784
848
  //_Vectorizable = bool(int(MatrixFlags)&PacketAccessBit) && ((!_PacketOnDiag) || (_SameTypes && bool(int(DiagFlags)&PacketAccessBit))),
785
- _Vectorizable = bool(int(MatrixFlags)&PacketAccessBit) && _SameTypes && (_ScalarAccessOnDiag || (bool(int(DiagFlags)&PacketAccessBit))),
849
+ _Vectorizable = bool(int(MatrixFlags)&PacketAccessBit)
850
+ && _SameTypes
851
+ && (_SameStorageOrder || (MatrixFlags&LinearAccessBit)==LinearAccessBit)
852
+ && (_ScalarAccessOnDiag || (bool(int(DiagFlags)&PacketAccessBit))),
786
853
  _LinearAccessMask = (MatrixType::RowsAtCompileTime==1 || MatrixType::ColsAtCompileTime==1) ? LinearAccessBit : 0,
787
854
  Flags = ((HereditaryBits|_LinearAccessMask) & (unsigned int)(MatrixFlags)) | (_Vectorizable ? PacketAccessBit : 0),
788
855
  Alignment = evaluator<MatrixType>::Alignment,
@@ -791,14 +858,14 @@ public:
791
858
  || (DiagonalType::SizeAtCompileTime==Dynamic && MatrixType::RowsAtCompileTime==1 && ProductOrder==OnTheLeft)
792
859
  || (DiagonalType::SizeAtCompileTime==Dynamic && MatrixType::ColsAtCompileTime==1 && ProductOrder==OnTheRight)
793
860
  };
794
-
795
- diagonal_product_evaluator_base(const MatrixType &mat, const DiagonalType &diag)
861
+
862
+ EIGEN_DEVICE_FUNC diagonal_product_evaluator_base(const MatrixType &mat, const DiagonalType &diag)
796
863
  : m_diagImpl(diag), m_matImpl(mat)
797
864
  {
798
865
  EIGEN_INTERNAL_CHECK_COST_VALUE(NumTraits<Scalar>::MulCost);
799
866
  EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
800
867
  }
801
-
868
+
802
869
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index idx) const
803
870
  {
804
871
  if(AsScalarProduct)
@@ -806,7 +873,7 @@ public:
806
873
  else
807
874
  return m_diagImpl.coeff(idx) * m_matImpl.coeff(idx);
808
875
  }
809
-
876
+
810
877
  protected:
811
878
  template<int LoadMode,typename PacketType>
812
879
  EIGEN_STRONG_INLINE PacketType packet_impl(Index row, Index col, Index id, internal::true_type) const
@@ -814,7 +881,7 @@ protected:
814
881
  return internal::pmul(m_matImpl.template packet<LoadMode,PacketType>(row, col),
815
882
  internal::pset1<PacketType>(m_diagImpl.coeff(id)));
816
883
  }
817
-
884
+
818
885
  template<int LoadMode,typename PacketType>
819
886
  EIGEN_STRONG_INLINE PacketType packet_impl(Index row, Index col, Index id, internal::false_type) const
820
887
  {
@@ -825,7 +892,7 @@ protected:
825
892
  return internal::pmul(m_matImpl.template packet<LoadMode,PacketType>(row, col),
826
893
  m_diagImpl.template packet<DiagonalPacketLoadMode,PacketType>(id));
827
894
  }
828
-
895
+
829
896
  evaluator<DiagonalType> m_diagImpl;
830
897
  evaluator<MatrixType> m_matImpl;
831
898
  };
@@ -840,25 +907,25 @@ struct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DiagonalSha
840
907
  using Base::m_matImpl;
841
908
  using Base::coeff;
842
909
  typedef typename Base::Scalar Scalar;
843
-
910
+
844
911
  typedef Product<Lhs, Rhs, ProductKind> XprType;
845
912
  typedef typename XprType::PlainObject PlainObject;
846
-
847
- enum {
848
- StorageOrder = int(Rhs::Flags) & RowMajorBit ? RowMajor : ColMajor
849
- };
913
+ typedef typename Lhs::DiagonalVectorType DiagonalType;
914
+
915
+
916
+ enum { StorageOrder = Base::_StorageOrder };
850
917
 
851
918
  EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr)
852
919
  : Base(xpr.rhs(), xpr.lhs().diagonal())
853
920
  {
854
921
  }
855
-
922
+
856
923
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const
857
924
  {
858
925
  return m_diagImpl.coeff(row) * m_matImpl.coeff(row, col);
859
926
  }
860
-
861
- #ifndef __CUDACC__
927
+
928
+ #ifndef EIGEN_GPUCC
862
929
  template<int LoadMode,typename PacketType>
863
930
  EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const
864
931
  {
@@ -867,7 +934,7 @@ struct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DiagonalSha
867
934
  return this->template packet_impl<LoadMode,PacketType>(row,col, row,
868
935
  typename internal::conditional<int(StorageOrder)==RowMajor, internal::true_type, internal::false_type>::type());
869
936
  }
870
-
937
+
871
938
  template<int LoadMode,typename PacketType>
872
939
  EIGEN_STRONG_INLINE PacketType packet(Index idx) const
873
940
  {
@@ -886,30 +953,30 @@ struct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DenseShape,
886
953
  using Base::m_matImpl;
887
954
  using Base::coeff;
888
955
  typedef typename Base::Scalar Scalar;
889
-
956
+
890
957
  typedef Product<Lhs, Rhs, ProductKind> XprType;
891
958
  typedef typename XprType::PlainObject PlainObject;
892
-
893
- enum { StorageOrder = int(Lhs::Flags) & RowMajorBit ? RowMajor : ColMajor };
959
+
960
+ enum { StorageOrder = Base::_StorageOrder };
894
961
 
895
962
  EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr)
896
963
  : Base(xpr.lhs(), xpr.rhs().diagonal())
897
964
  {
898
965
  }
899
-
966
+
900
967
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const
901
968
  {
902
969
  return m_matImpl.coeff(row, col) * m_diagImpl.coeff(col);
903
970
  }
904
-
905
- #ifndef __CUDACC__
971
+
972
+ #ifndef EIGEN_GPUCC
906
973
  template<int LoadMode,typename PacketType>
907
974
  EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const
908
975
  {
909
976
  return this->template packet_impl<LoadMode,PacketType>(row,col, col,
910
977
  typename internal::conditional<int(StorageOrder)==ColMajor, internal::true_type, internal::false_type>::type());
911
978
  }
912
-
979
+
913
980
  template<int LoadMode,typename PacketType>
914
981
  EIGEN_STRONG_INLINE PacketType packet(Index idx) const
915
982
  {
@@ -937,7 +1004,7 @@ struct permutation_matrix_product<ExpressionType, Side, Transposed, DenseShape>
937
1004
  typedef typename remove_all<MatrixType>::type MatrixTypeCleaned;
938
1005
 
939
1006
  template<typename Dest, typename PermutationType>
940
- static inline void run(Dest& dst, const PermutationType& perm, const ExpressionType& xpr)
1007
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Dest& dst, const PermutationType& perm, const ExpressionType& xpr)
941
1008
  {
942
1009
  MatrixType mat(xpr);
943
1010
  const Index n = Side==OnTheLeft ? mat.rows() : mat.cols();
@@ -991,7 +1058,7 @@ template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
991
1058
  struct generic_product_impl<Lhs, Rhs, PermutationShape, MatrixShape, ProductTag>
992
1059
  {
993
1060
  template<typename Dest>
994
- static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
1061
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
995
1062
  {
996
1063
  permutation_matrix_product<Rhs, OnTheLeft, false, MatrixShape>::run(dst, lhs, rhs);
997
1064
  }
@@ -1001,7 +1068,7 @@ template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
1001
1068
  struct generic_product_impl<Lhs, Rhs, MatrixShape, PermutationShape, ProductTag>
1002
1069
  {
1003
1070
  template<typename Dest>
1004
- static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
1071
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
1005
1072
  {
1006
1073
  permutation_matrix_product<Lhs, OnTheRight, false, MatrixShape>::run(dst, rhs, lhs);
1007
1074
  }
@@ -1011,7 +1078,7 @@ template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
1011
1078
  struct generic_product_impl<Inverse<Lhs>, Rhs, PermutationShape, MatrixShape, ProductTag>
1012
1079
  {
1013
1080
  template<typename Dest>
1014
- static void evalTo(Dest& dst, const Inverse<Lhs>& lhs, const Rhs& rhs)
1081
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Inverse<Lhs>& lhs, const Rhs& rhs)
1015
1082
  {
1016
1083
  permutation_matrix_product<Rhs, OnTheLeft, true, MatrixShape>::run(dst, lhs.nestedExpression(), rhs);
1017
1084
  }
@@ -1021,7 +1088,7 @@ template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
1021
1088
  struct generic_product_impl<Lhs, Inverse<Rhs>, MatrixShape, PermutationShape, ProductTag>
1022
1089
  {
1023
1090
  template<typename Dest>
1024
- static void evalTo(Dest& dst, const Lhs& lhs, const Inverse<Rhs>& rhs)
1091
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Lhs& lhs, const Inverse<Rhs>& rhs)
1025
1092
  {
1026
1093
  permutation_matrix_product<Lhs, OnTheRight, true, MatrixShape>::run(dst, rhs.nestedExpression(), lhs);
1027
1094
  }
@@ -1043,9 +1110,9 @@ struct transposition_matrix_product
1043
1110
  {
1044
1111
  typedef typename nested_eval<ExpressionType, 1>::type MatrixType;
1045
1112
  typedef typename remove_all<MatrixType>::type MatrixTypeCleaned;
1046
-
1113
+
1047
1114
  template<typename Dest, typename TranspositionType>
1048
- static inline void run(Dest& dst, const TranspositionType& tr, const ExpressionType& xpr)
1115
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Dest& dst, const TranspositionType& tr, const ExpressionType& xpr)
1049
1116
  {
1050
1117
  MatrixType mat(xpr);
1051
1118
  typedef typename TranspositionType::StorageIndex StorageIndex;
@@ -1068,7 +1135,7 @@ template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
1068
1135
  struct generic_product_impl<Lhs, Rhs, TranspositionsShape, MatrixShape, ProductTag>
1069
1136
  {
1070
1137
  template<typename Dest>
1071
- static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
1138
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
1072
1139
  {
1073
1140
  transposition_matrix_product<Rhs, OnTheLeft, false, MatrixShape>::run(dst, lhs, rhs);
1074
1141
  }
@@ -1078,7 +1145,7 @@ template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
1078
1145
  struct generic_product_impl<Lhs, Rhs, MatrixShape, TranspositionsShape, ProductTag>
1079
1146
  {
1080
1147
  template<typename Dest>
1081
- static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
1148
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
1082
1149
  {
1083
1150
  transposition_matrix_product<Lhs, OnTheRight, false, MatrixShape>::run(dst, rhs, lhs);
1084
1151
  }
@@ -1089,7 +1156,7 @@ template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
1089
1156
  struct generic_product_impl<Transpose<Lhs>, Rhs, TranspositionsShape, MatrixShape, ProductTag>
1090
1157
  {
1091
1158
  template<typename Dest>
1092
- static void evalTo(Dest& dst, const Transpose<Lhs>& lhs, const Rhs& rhs)
1159
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Transpose<Lhs>& lhs, const Rhs& rhs)
1093
1160
  {
1094
1161
  transposition_matrix_product<Rhs, OnTheLeft, true, MatrixShape>::run(dst, lhs.nestedExpression(), rhs);
1095
1162
  }
@@ -1099,7 +1166,7 @@ template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
1099
1166
  struct generic_product_impl<Lhs, Transpose<Rhs>, MatrixShape, TranspositionsShape, ProductTag>
1100
1167
  {
1101
1168
  template<typename Dest>
1102
- static void evalTo(Dest& dst, const Lhs& lhs, const Transpose<Rhs>& rhs)
1169
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Lhs& lhs, const Transpose<Rhs>& rhs)
1103
1170
  {
1104
1171
  transposition_matrix_product<Lhs, OnTheRight, true, MatrixShape>::run(dst, rhs.nestedExpression(), lhs);
1105
1172
  }