tomoto 0.2.2 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (369) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +10 -0
  3. data/README.md +8 -10
  4. data/ext/tomoto/ct.cpp +11 -11
  5. data/ext/tomoto/dmr.cpp +14 -13
  6. data/ext/tomoto/dt.cpp +14 -14
  7. data/ext/tomoto/extconf.rb +7 -5
  8. data/ext/tomoto/gdmr.cpp +7 -7
  9. data/ext/tomoto/hdp.cpp +9 -9
  10. data/ext/tomoto/hlda.cpp +13 -13
  11. data/ext/tomoto/hpa.cpp +5 -5
  12. data/ext/tomoto/lda.cpp +42 -39
  13. data/ext/tomoto/llda.cpp +6 -6
  14. data/ext/tomoto/mglda.cpp +15 -15
  15. data/ext/tomoto/pa.cpp +6 -6
  16. data/ext/tomoto/plda.cpp +6 -6
  17. data/ext/tomoto/slda.cpp +8 -8
  18. data/ext/tomoto/{ext.cpp → tomoto.cpp} +8 -8
  19. data/ext/tomoto/utils.h +16 -70
  20. data/lib/tomoto/version.rb +1 -1
  21. data/lib/tomoto.rb +5 -1
  22. data/vendor/EigenRand/EigenRand/Core.h +10 -10
  23. data/vendor/EigenRand/EigenRand/Dists/Basic.h +208 -9
  24. data/vendor/EigenRand/EigenRand/Dists/Discrete.h +52 -31
  25. data/vendor/EigenRand/EigenRand/Dists/GammaPoisson.h +9 -8
  26. data/vendor/EigenRand/EigenRand/Dists/NormalExp.h +28 -21
  27. data/vendor/EigenRand/EigenRand/EigenRand +11 -6
  28. data/vendor/EigenRand/EigenRand/Macro.h +13 -7
  29. data/vendor/EigenRand/EigenRand/MorePacketMath.h +348 -740
  30. data/vendor/EigenRand/EigenRand/MvDists/Multinomial.h +5 -3
  31. data/vendor/EigenRand/EigenRand/MvDists/MvNormal.h +9 -3
  32. data/vendor/EigenRand/EigenRand/PacketFilter.h +11 -253
  33. data/vendor/EigenRand/EigenRand/PacketRandomEngine.h +21 -47
  34. data/vendor/EigenRand/EigenRand/RandUtils.h +50 -344
  35. data/vendor/EigenRand/EigenRand/arch/AVX/MorePacketMath.h +619 -0
  36. data/vendor/EigenRand/EigenRand/arch/AVX/PacketFilter.h +149 -0
  37. data/vendor/EigenRand/EigenRand/arch/AVX/RandUtils.h +228 -0
  38. data/vendor/EigenRand/EigenRand/arch/NEON/MorePacketMath.h +473 -0
  39. data/vendor/EigenRand/EigenRand/arch/NEON/PacketFilter.h +142 -0
  40. data/vendor/EigenRand/EigenRand/arch/NEON/RandUtils.h +126 -0
  41. data/vendor/EigenRand/EigenRand/arch/SSE/MorePacketMath.h +501 -0
  42. data/vendor/EigenRand/EigenRand/arch/SSE/PacketFilter.h +133 -0
  43. data/vendor/EigenRand/EigenRand/arch/SSE/RandUtils.h +120 -0
  44. data/vendor/EigenRand/EigenRand/doc.h +24 -12
  45. data/vendor/EigenRand/README.md +57 -4
  46. data/vendor/eigen/COPYING.APACHE +203 -0
  47. data/vendor/eigen/COPYING.BSD +1 -1
  48. data/vendor/eigen/COPYING.MINPACK +51 -52
  49. data/vendor/eigen/Eigen/Cholesky +0 -1
  50. data/vendor/eigen/Eigen/Core +112 -265
  51. data/vendor/eigen/Eigen/Eigenvalues +2 -3
  52. data/vendor/eigen/Eigen/Geometry +5 -8
  53. data/vendor/eigen/Eigen/Householder +0 -1
  54. data/vendor/eigen/Eigen/Jacobi +0 -1
  55. data/vendor/eigen/Eigen/KLUSupport +41 -0
  56. data/vendor/eigen/Eigen/LU +2 -5
  57. data/vendor/eigen/Eigen/OrderingMethods +0 -3
  58. data/vendor/eigen/Eigen/PaStiXSupport +1 -0
  59. data/vendor/eigen/Eigen/PardisoSupport +0 -0
  60. data/vendor/eigen/Eigen/QR +2 -3
  61. data/vendor/eigen/Eigen/QtAlignedMalloc +0 -1
  62. data/vendor/eigen/Eigen/SVD +0 -1
  63. data/vendor/eigen/Eigen/Sparse +0 -2
  64. data/vendor/eigen/Eigen/SparseCholesky +0 -8
  65. data/vendor/eigen/Eigen/SparseLU +4 -0
  66. data/vendor/eigen/Eigen/SparseQR +0 -1
  67. data/vendor/eigen/Eigen/src/Cholesky/LDLT.h +42 -27
  68. data/vendor/eigen/Eigen/src/Cholesky/LLT.h +39 -23
  69. data/vendor/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +90 -47
  70. data/vendor/eigen/Eigen/src/Core/ArithmeticSequence.h +413 -0
  71. data/vendor/eigen/Eigen/src/Core/Array.h +99 -11
  72. data/vendor/eigen/Eigen/src/Core/ArrayBase.h +3 -3
  73. data/vendor/eigen/Eigen/src/Core/ArrayWrapper.h +21 -21
  74. data/vendor/eigen/Eigen/src/Core/Assign.h +1 -1
  75. data/vendor/eigen/Eigen/src/Core/AssignEvaluator.h +125 -50
  76. data/vendor/eigen/Eigen/src/Core/Assign_MKL.h +10 -10
  77. data/vendor/eigen/Eigen/src/Core/BandMatrix.h +16 -16
  78. data/vendor/eigen/Eigen/src/Core/Block.h +56 -60
  79. data/vendor/eigen/Eigen/src/Core/BooleanRedux.h +29 -31
  80. data/vendor/eigen/Eigen/src/Core/CommaInitializer.h +7 -3
  81. data/vendor/eigen/Eigen/src/Core/CoreEvaluators.h +325 -272
  82. data/vendor/eigen/Eigen/src/Core/CoreIterators.h +5 -0
  83. data/vendor/eigen/Eigen/src/Core/CwiseBinaryOp.h +21 -22
  84. data/vendor/eigen/Eigen/src/Core/CwiseNullaryOp.h +153 -18
  85. data/vendor/eigen/Eigen/src/Core/CwiseUnaryOp.h +6 -6
  86. data/vendor/eigen/Eigen/src/Core/CwiseUnaryView.h +14 -10
  87. data/vendor/eigen/Eigen/src/Core/DenseBase.h +132 -42
  88. data/vendor/eigen/Eigen/src/Core/DenseCoeffsBase.h +25 -21
  89. data/vendor/eigen/Eigen/src/Core/DenseStorage.h +153 -71
  90. data/vendor/eigen/Eigen/src/Core/Diagonal.h +21 -23
  91. data/vendor/eigen/Eigen/src/Core/DiagonalMatrix.h +50 -2
  92. data/vendor/eigen/Eigen/src/Core/DiagonalProduct.h +1 -1
  93. data/vendor/eigen/Eigen/src/Core/Dot.h +10 -10
  94. data/vendor/eigen/Eigen/src/Core/EigenBase.h +10 -9
  95. data/vendor/eigen/Eigen/src/Core/ForceAlignedAccess.h +8 -4
  96. data/vendor/eigen/Eigen/src/Core/Fuzzy.h +3 -3
  97. data/vendor/eigen/Eigen/src/Core/GeneralProduct.h +20 -10
  98. data/vendor/eigen/Eigen/src/Core/GenericPacketMath.h +599 -152
  99. data/vendor/eigen/Eigen/src/Core/GlobalFunctions.h +40 -33
  100. data/vendor/eigen/Eigen/src/Core/IO.h +40 -7
  101. data/vendor/eigen/Eigen/src/Core/IndexedView.h +237 -0
  102. data/vendor/eigen/Eigen/src/Core/Inverse.h +9 -10
  103. data/vendor/eigen/Eigen/src/Core/Map.h +7 -7
  104. data/vendor/eigen/Eigen/src/Core/MapBase.h +10 -3
  105. data/vendor/eigen/Eigen/src/Core/MathFunctions.h +767 -125
  106. data/vendor/eigen/Eigen/src/Core/MathFunctionsImpl.h +118 -19
  107. data/vendor/eigen/Eigen/src/Core/Matrix.h +131 -25
  108. data/vendor/eigen/Eigen/src/Core/MatrixBase.h +21 -3
  109. data/vendor/eigen/Eigen/src/Core/NestByValue.h +25 -50
  110. data/vendor/eigen/Eigen/src/Core/NoAlias.h +4 -3
  111. data/vendor/eigen/Eigen/src/Core/NumTraits.h +107 -20
  112. data/vendor/eigen/Eigen/src/Core/PartialReduxEvaluator.h +232 -0
  113. data/vendor/eigen/Eigen/src/Core/PermutationMatrix.h +3 -31
  114. data/vendor/eigen/Eigen/src/Core/PlainObjectBase.h +152 -59
  115. data/vendor/eigen/Eigen/src/Core/Product.h +30 -25
  116. data/vendor/eigen/Eigen/src/Core/ProductEvaluators.h +192 -125
  117. data/vendor/eigen/Eigen/src/Core/Random.h +37 -1
  118. data/vendor/eigen/Eigen/src/Core/Redux.h +180 -170
  119. data/vendor/eigen/Eigen/src/Core/Ref.h +121 -23
  120. data/vendor/eigen/Eigen/src/Core/Replicate.h +8 -8
  121. data/vendor/eigen/Eigen/src/Core/Reshaped.h +454 -0
  122. data/vendor/eigen/Eigen/src/Core/ReturnByValue.h +7 -5
  123. data/vendor/eigen/Eigen/src/Core/Reverse.h +18 -12
  124. data/vendor/eigen/Eigen/src/Core/Select.h +8 -6
  125. data/vendor/eigen/Eigen/src/Core/SelfAdjointView.h +33 -20
  126. data/vendor/eigen/Eigen/src/Core/Solve.h +14 -14
  127. data/vendor/eigen/Eigen/src/Core/SolveTriangular.h +16 -16
  128. data/vendor/eigen/Eigen/src/Core/SolverBase.h +41 -3
  129. data/vendor/eigen/Eigen/src/Core/StableNorm.h +100 -70
  130. data/vendor/eigen/Eigen/src/Core/StlIterators.h +463 -0
  131. data/vendor/eigen/Eigen/src/Core/Stride.h +9 -4
  132. data/vendor/eigen/Eigen/src/Core/Swap.h +5 -4
  133. data/vendor/eigen/Eigen/src/Core/Transpose.h +88 -27
  134. data/vendor/eigen/Eigen/src/Core/Transpositions.h +26 -47
  135. data/vendor/eigen/Eigen/src/Core/TriangularMatrix.h +93 -75
  136. data/vendor/eigen/Eigen/src/Core/VectorBlock.h +5 -5
  137. data/vendor/eigen/Eigen/src/Core/VectorwiseOp.h +159 -70
  138. data/vendor/eigen/Eigen/src/Core/Visitor.h +137 -29
  139. data/vendor/eigen/Eigen/src/Core/arch/AVX/Complex.h +50 -129
  140. data/vendor/eigen/Eigen/src/Core/arch/AVX/MathFunctions.h +126 -337
  141. data/vendor/eigen/Eigen/src/Core/arch/AVX/PacketMath.h +1092 -155
  142. data/vendor/eigen/Eigen/src/Core/arch/AVX/TypeCasting.h +65 -1
  143. data/vendor/eigen/Eigen/src/Core/arch/AVX512/Complex.h +422 -0
  144. data/vendor/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h +207 -236
  145. data/vendor/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h +1482 -495
  146. data/vendor/eigen/Eigen/src/Core/arch/AVX512/TypeCasting.h +89 -0
  147. data/vendor/eigen/Eigen/src/Core/arch/AltiVec/Complex.h +152 -165
  148. data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MathFunctions.h +19 -251
  149. data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MatrixProduct.h +2937 -0
  150. data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h +221 -0
  151. data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h +629 -0
  152. data/vendor/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h +2042 -392
  153. data/vendor/eigen/Eigen/src/Core/arch/CUDA/Complex.h +235 -80
  154. data/vendor/eigen/Eigen/src/Core/arch/Default/BFloat16.h +700 -0
  155. data/vendor/eigen/Eigen/src/Core/arch/Default/ConjHelper.h +102 -14
  156. data/vendor/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +1649 -0
  157. data/vendor/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +110 -0
  158. data/vendor/eigen/Eigen/src/Core/arch/Default/Half.h +942 -0
  159. data/vendor/eigen/Eigen/src/Core/arch/Default/Settings.h +1 -1
  160. data/vendor/eigen/Eigen/src/Core/arch/Default/TypeCasting.h +120 -0
  161. data/vendor/eigen/Eigen/src/Core/arch/{CUDA → GPU}/MathFunctions.h +16 -4
  162. data/vendor/eigen/Eigen/src/Core/arch/GPU/PacketMath.h +1685 -0
  163. data/vendor/eigen/Eigen/src/Core/arch/GPU/TypeCasting.h +80 -0
  164. data/vendor/eigen/Eigen/src/Core/arch/HIP/hcc/math_constants.h +23 -0
  165. data/vendor/eigen/Eigen/src/Core/arch/MSA/Complex.h +648 -0
  166. data/vendor/eigen/Eigen/src/Core/arch/MSA/MathFunctions.h +387 -0
  167. data/vendor/eigen/Eigen/src/Core/arch/MSA/PacketMath.h +1233 -0
  168. data/vendor/eigen/Eigen/src/Core/arch/NEON/Complex.h +313 -219
  169. data/vendor/eigen/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h +183 -0
  170. data/vendor/eigen/Eigen/src/Core/arch/NEON/MathFunctions.h +54 -70
  171. data/vendor/eigen/Eigen/src/Core/arch/NEON/PacketMath.h +4376 -549
  172. data/vendor/eigen/Eigen/src/Core/arch/NEON/TypeCasting.h +1419 -0
  173. data/vendor/eigen/Eigen/src/Core/arch/SSE/Complex.h +59 -179
  174. data/vendor/eigen/Eigen/src/Core/arch/SSE/MathFunctions.h +65 -428
  175. data/vendor/eigen/Eigen/src/Core/arch/SSE/PacketMath.h +893 -283
  176. data/vendor/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h +65 -0
  177. data/vendor/eigen/Eigen/src/Core/arch/SVE/MathFunctions.h +44 -0
  178. data/vendor/eigen/Eigen/src/Core/arch/SVE/PacketMath.h +752 -0
  179. data/vendor/eigen/Eigen/src/Core/arch/SVE/TypeCasting.h +49 -0
  180. data/vendor/eigen/Eigen/src/Core/arch/SYCL/InteropHeaders.h +232 -0
  181. data/vendor/eigen/Eigen/src/Core/arch/SYCL/MathFunctions.h +301 -0
  182. data/vendor/eigen/Eigen/src/Core/arch/SYCL/PacketMath.h +670 -0
  183. data/vendor/eigen/Eigen/src/Core/arch/SYCL/SyclMemoryModel.h +694 -0
  184. data/vendor/eigen/Eigen/src/Core/arch/SYCL/TypeCasting.h +85 -0
  185. data/vendor/eigen/Eigen/src/Core/arch/ZVector/Complex.h +212 -183
  186. data/vendor/eigen/Eigen/src/Core/arch/ZVector/MathFunctions.h +101 -5
  187. data/vendor/eigen/Eigen/src/Core/arch/ZVector/PacketMath.h +510 -395
  188. data/vendor/eigen/Eigen/src/Core/functors/AssignmentFunctors.h +11 -2
  189. data/vendor/eigen/Eigen/src/Core/functors/BinaryFunctors.h +112 -46
  190. data/vendor/eigen/Eigen/src/Core/functors/NullaryFunctors.h +31 -30
  191. data/vendor/eigen/Eigen/src/Core/functors/StlFunctors.h +32 -2
  192. data/vendor/eigen/Eigen/src/Core/functors/UnaryFunctors.h +355 -16
  193. data/vendor/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h +1075 -586
  194. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h +49 -24
  195. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +41 -35
  196. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +6 -6
  197. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h +4 -2
  198. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixVector.h +382 -483
  199. data/vendor/eigen/Eigen/src/Core/products/Parallelizer.h +22 -5
  200. data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +53 -30
  201. data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h +16 -8
  202. data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h +8 -6
  203. data/vendor/eigen/Eigen/src/Core/products/SelfadjointProduct.h +4 -4
  204. data/vendor/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h +5 -4
  205. data/vendor/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h +33 -27
  206. data/vendor/eigen/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h +14 -12
  207. data/vendor/eigen/Eigen/src/Core/products/TriangularSolverMatrix.h +36 -34
  208. data/vendor/eigen/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h +8 -4
  209. data/vendor/eigen/Eigen/src/Core/products/TriangularSolverVector.h +13 -10
  210. data/vendor/eigen/Eigen/src/Core/util/BlasUtil.h +304 -119
  211. data/vendor/eigen/Eigen/src/Core/util/ConfigureVectorization.h +512 -0
  212. data/vendor/eigen/Eigen/src/Core/util/Constants.h +25 -9
  213. data/vendor/eigen/Eigen/src/Core/util/DisableStupidWarnings.h +26 -3
  214. data/vendor/eigen/Eigen/src/Core/util/ForwardDeclarations.h +29 -9
  215. data/vendor/eigen/Eigen/src/Core/util/IndexedViewHelper.h +186 -0
  216. data/vendor/eigen/Eigen/src/Core/util/IntegralConstant.h +272 -0
  217. data/vendor/eigen/Eigen/src/Core/util/MKL_support.h +8 -1
  218. data/vendor/eigen/Eigen/src/Core/util/Macros.h +709 -246
  219. data/vendor/eigen/Eigen/src/Core/util/Memory.h +222 -52
  220. data/vendor/eigen/Eigen/src/Core/util/Meta.h +355 -77
  221. data/vendor/eigen/Eigen/src/Core/util/ReenableStupidWarnings.h +5 -1
  222. data/vendor/eigen/Eigen/src/Core/util/ReshapedHelper.h +51 -0
  223. data/vendor/eigen/Eigen/src/Core/util/StaticAssert.h +8 -5
  224. data/vendor/eigen/Eigen/src/Core/util/SymbolicIndex.h +293 -0
  225. data/vendor/eigen/Eigen/src/Core/util/XprHelper.h +65 -30
  226. data/vendor/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +1 -1
  227. data/vendor/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +7 -4
  228. data/vendor/eigen/Eigen/src/Eigenvalues/EigenSolver.h +2 -2
  229. data/vendor/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +1 -1
  230. data/vendor/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +2 -2
  231. data/vendor/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +2 -2
  232. data/vendor/eigen/Eigen/src/Eigenvalues/RealQZ.h +9 -6
  233. data/vendor/eigen/Eigen/src/Eigenvalues/RealSchur.h +21 -9
  234. data/vendor/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +77 -43
  235. data/vendor/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +20 -15
  236. data/vendor/eigen/Eigen/src/Geometry/AlignedBox.h +99 -5
  237. data/vendor/eigen/Eigen/src/Geometry/AngleAxis.h +4 -4
  238. data/vendor/eigen/Eigen/src/Geometry/EulerAngles.h +3 -3
  239. data/vendor/eigen/Eigen/src/Geometry/Homogeneous.h +15 -11
  240. data/vendor/eigen/Eigen/src/Geometry/Hyperplane.h +1 -1
  241. data/vendor/eigen/Eigen/src/Geometry/OrthoMethods.h +3 -2
  242. data/vendor/eigen/Eigen/src/Geometry/ParametrizedLine.h +39 -2
  243. data/vendor/eigen/Eigen/src/Geometry/Quaternion.h +70 -14
  244. data/vendor/eigen/Eigen/src/Geometry/Rotation2D.h +3 -3
  245. data/vendor/eigen/Eigen/src/Geometry/Scaling.h +23 -5
  246. data/vendor/eigen/Eigen/src/Geometry/Transform.h +88 -67
  247. data/vendor/eigen/Eigen/src/Geometry/Translation.h +6 -12
  248. data/vendor/eigen/Eigen/src/Geometry/Umeyama.h +1 -1
  249. data/vendor/eigen/Eigen/src/Geometry/arch/Geometry_SIMD.h +168 -0
  250. data/vendor/eigen/Eigen/src/Householder/BlockHouseholder.h +9 -2
  251. data/vendor/eigen/Eigen/src/Householder/Householder.h +8 -4
  252. data/vendor/eigen/Eigen/src/Householder/HouseholderSequence.h +123 -48
  253. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +15 -15
  254. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +7 -23
  255. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +5 -22
  256. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +41 -47
  257. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +51 -60
  258. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +70 -20
  259. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +2 -20
  260. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +11 -9
  261. data/vendor/eigen/Eigen/src/Jacobi/Jacobi.h +31 -10
  262. data/vendor/eigen/Eigen/src/KLUSupport/KLUSupport.h +358 -0
  263. data/vendor/eigen/Eigen/src/LU/Determinant.h +35 -19
  264. data/vendor/eigen/Eigen/src/LU/FullPivLU.h +29 -43
  265. data/vendor/eigen/Eigen/src/LU/InverseImpl.h +25 -8
  266. data/vendor/eigen/Eigen/src/LU/PartialPivLU.h +71 -58
  267. data/vendor/eigen/Eigen/src/LU/arch/InverseSize4.h +351 -0
  268. data/vendor/eigen/Eigen/src/OrderingMethods/Amd.h +7 -17
  269. data/vendor/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +297 -277
  270. data/vendor/eigen/Eigen/src/OrderingMethods/Ordering.h +6 -10
  271. data/vendor/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +1 -1
  272. data/vendor/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +9 -7
  273. data/vendor/eigen/Eigen/src/QR/ColPivHouseholderQR.h +41 -20
  274. data/vendor/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +100 -27
  275. data/vendor/eigen/Eigen/src/QR/FullPivHouseholderQR.h +59 -22
  276. data/vendor/eigen/Eigen/src/QR/HouseholderQR.h +48 -23
  277. data/vendor/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +25 -3
  278. data/vendor/eigen/Eigen/src/SVD/BDCSVD.h +183 -63
  279. data/vendor/eigen/Eigen/src/SVD/JacobiSVD.h +22 -14
  280. data/vendor/eigen/Eigen/src/SVD/SVDBase.h +83 -22
  281. data/vendor/eigen/Eigen/src/SVD/UpperBidiagonalization.h +3 -3
  282. data/vendor/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +17 -9
  283. data/vendor/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +12 -37
  284. data/vendor/eigen/Eigen/src/SparseCore/AmbiVector.h +3 -2
  285. data/vendor/eigen/Eigen/src/SparseCore/CompressedStorage.h +16 -0
  286. data/vendor/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +6 -6
  287. data/vendor/eigen/Eigen/src/SparseCore/SparseAssign.h +81 -27
  288. data/vendor/eigen/Eigen/src/SparseCore/SparseBlock.h +25 -57
  289. data/vendor/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +40 -11
  290. data/vendor/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +11 -15
  291. data/vendor/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +4 -2
  292. data/vendor/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +30 -8
  293. data/vendor/eigen/Eigen/src/SparseCore/SparseMatrix.h +126 -11
  294. data/vendor/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +5 -12
  295. data/vendor/eigen/Eigen/src/SparseCore/SparseProduct.h +13 -1
  296. data/vendor/eigen/Eigen/src/SparseCore/SparseRef.h +7 -7
  297. data/vendor/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +5 -2
  298. data/vendor/eigen/Eigen/src/SparseCore/SparseUtil.h +8 -0
  299. data/vendor/eigen/Eigen/src/SparseCore/SparseVector.h +1 -1
  300. data/vendor/eigen/Eigen/src/SparseCore/SparseView.h +1 -0
  301. data/vendor/eigen/Eigen/src/SparseLU/SparseLU.h +162 -12
  302. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +1 -1
  303. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +76 -2
  304. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +2 -2
  305. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +1 -1
  306. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +1 -1
  307. data/vendor/eigen/Eigen/src/SparseQR/SparseQR.h +19 -6
  308. data/vendor/eigen/Eigen/src/StlSupport/StdDeque.h +2 -12
  309. data/vendor/eigen/Eigen/src/StlSupport/StdList.h +2 -2
  310. data/vendor/eigen/Eigen/src/StlSupport/StdVector.h +2 -2
  311. data/vendor/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +6 -8
  312. data/vendor/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +175 -39
  313. data/vendor/eigen/Eigen/src/misc/lapacke.h +5 -4
  314. data/vendor/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +28 -2
  315. data/vendor/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +155 -11
  316. data/vendor/eigen/Eigen/src/plugins/BlockMethods.h +626 -242
  317. data/vendor/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.h +14 -0
  318. data/vendor/eigen/Eigen/src/plugins/IndexedViewMethods.h +262 -0
  319. data/vendor/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +4 -4
  320. data/vendor/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +10 -0
  321. data/vendor/eigen/Eigen/src/plugins/ReshapedMethods.h +149 -0
  322. data/vendor/eigen/README.md +2 -0
  323. data/vendor/eigen/bench/btl/README +1 -1
  324. data/vendor/eigen/bench/tensors/README +6 -7
  325. data/vendor/eigen/ci/README.md +56 -0
  326. data/vendor/eigen/demos/mix_eigen_and_c/README +1 -1
  327. data/vendor/eigen/unsupported/Eigen/CXX11/src/Tensor/README.md +213 -158
  328. data/vendor/eigen/unsupported/README.txt +1 -1
  329. data/vendor/tomotopy/README.kr.rst +78 -0
  330. data/vendor/tomotopy/README.rst +75 -0
  331. data/vendor/tomotopy/src/Labeling/FoRelevance.cpp +2 -2
  332. data/vendor/tomotopy/src/Labeling/Phraser.hpp +4 -4
  333. data/vendor/tomotopy/src/TopicModel/CTModel.hpp +7 -3
  334. data/vendor/tomotopy/src/TopicModel/DMRModel.hpp +7 -3
  335. data/vendor/tomotopy/src/TopicModel/DTModel.hpp +6 -3
  336. data/vendor/tomotopy/src/TopicModel/GDMRModel.hpp +2 -2
  337. data/vendor/tomotopy/src/TopicModel/HDP.h +1 -0
  338. data/vendor/tomotopy/src/TopicModel/HDPModel.hpp +57 -6
  339. data/vendor/tomotopy/src/TopicModel/HLDAModel.hpp +6 -3
  340. data/vendor/tomotopy/src/TopicModel/HPAModel.hpp +3 -2
  341. data/vendor/tomotopy/src/TopicModel/LDA.h +3 -3
  342. data/vendor/tomotopy/src/TopicModel/LDACVB0Model.hpp +5 -5
  343. data/vendor/tomotopy/src/TopicModel/LDAModel.hpp +50 -19
  344. data/vendor/tomotopy/src/TopicModel/LLDAModel.hpp +6 -2
  345. data/vendor/tomotopy/src/TopicModel/MGLDAModel.hpp +3 -2
  346. data/vendor/tomotopy/src/TopicModel/PAModel.hpp +1 -1
  347. data/vendor/tomotopy/src/TopicModel/PLDAModel.hpp +6 -2
  348. data/vendor/tomotopy/src/TopicModel/PT.h +3 -1
  349. data/vendor/tomotopy/src/TopicModel/PTModel.hpp +36 -3
  350. data/vendor/tomotopy/src/TopicModel/SLDAModel.hpp +6 -3
  351. data/vendor/tomotopy/src/TopicModel/TopicModel.hpp +55 -26
  352. data/vendor/tomotopy/src/Utils/AliasMethod.hpp +5 -4
  353. data/vendor/tomotopy/src/Utils/Dictionary.h +2 -2
  354. data/vendor/tomotopy/src/Utils/EigenAddonOps.hpp +36 -1
  355. data/vendor/tomotopy/src/Utils/MultiNormalDistribution.hpp +1 -1
  356. data/vendor/tomotopy/src/Utils/TruncMultiNormal.hpp +1 -1
  357. data/vendor/tomotopy/src/Utils/exception.h +6 -0
  358. data/vendor/tomotopy/src/Utils/math.h +2 -2
  359. data/vendor/tomotopy/src/Utils/sample.hpp +14 -12
  360. data/vendor/tomotopy/src/Utils/serializer.hpp +30 -5
  361. data/vendor/tomotopy/src/Utils/sse_gamma.h +0 -3
  362. metadata +64 -18
  363. data/vendor/eigen/Eigen/CMakeLists.txt +0 -19
  364. data/vendor/eigen/Eigen/src/Core/arch/CUDA/Half.h +0 -674
  365. data/vendor/eigen/Eigen/src/Core/arch/CUDA/PacketMath.h +0 -333
  366. data/vendor/eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +0 -1124
  367. data/vendor/eigen/Eigen/src/Core/arch/CUDA/TypeCasting.h +0 -212
  368. data/vendor/eigen/Eigen/src/Geometry/arch/Geometry_SSE.h +0 -161
  369. data/vendor/eigen/Eigen/src/LU/arch/Inverse_SSE.h +0 -338
@@ -14,27 +14,27 @@
14
14
  #define EIGEN_PRODUCTEVALUATORS_H
15
15
 
16
16
  namespace Eigen {
17
-
17
+
18
18
  namespace internal {
19
19
 
20
20
  /** \internal
21
21
  * Evaluator of a product expression.
22
22
  * Since products require special treatments to handle all possible cases,
23
- * we simply deffer the evaluation logic to a product_evaluator class
23
+ * we simply defer the evaluation logic to a product_evaluator class
24
24
  * which offers more partial specialization possibilities.
25
- *
25
+ *
26
26
  * \sa class product_evaluator
27
27
  */
28
28
  template<typename Lhs, typename Rhs, int Options>
29
- struct evaluator<Product<Lhs, Rhs, Options> >
29
+ struct evaluator<Product<Lhs, Rhs, Options> >
30
30
  : public product_evaluator<Product<Lhs, Rhs, Options> >
31
31
  {
32
32
  typedef Product<Lhs, Rhs, Options> XprType;
33
33
  typedef product_evaluator<XprType> Base;
34
-
34
+
35
35
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const XprType& xpr) : Base(xpr) {}
36
36
  };
37
-
37
+
38
38
  // Catch "scalar * ( A * B )" and transform it to "(A*scalar) * B"
39
39
  // TODO we should apply that rule only if that's really helpful
40
40
  template<typename Lhs, typename Rhs, typename Scalar1, typename Scalar2, typename Plain1>
@@ -62,12 +62,12 @@ struct evaluator<CwiseBinaryOp<internal::scalar_product_op<Scalar1,Scalar2>,
62
62
 
63
63
 
64
64
  template<typename Lhs, typename Rhs, int DiagIndex>
65
- struct evaluator<Diagonal<const Product<Lhs, Rhs, DefaultProduct>, DiagIndex> >
65
+ struct evaluator<Diagonal<const Product<Lhs, Rhs, DefaultProduct>, DiagIndex> >
66
66
  : public evaluator<Diagonal<const Product<Lhs, Rhs, LazyProduct>, DiagIndex> >
67
67
  {
68
68
  typedef Diagonal<const Product<Lhs, Rhs, DefaultProduct>, DiagIndex> XprType;
69
69
  typedef evaluator<Diagonal<const Product<Lhs, Rhs, LazyProduct>, DiagIndex> > Base;
70
-
70
+
71
71
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const XprType& xpr)
72
72
  : Base(Diagonal<const Product<Lhs, Rhs, LazyProduct>, DiagIndex>(
73
73
  Product<Lhs, Rhs, LazyProduct>(xpr.nestedExpression().lhs(), xpr.nestedExpression().rhs()),
@@ -108,27 +108,27 @@ struct product_evaluator<Product<Lhs, Rhs, Options>, ProductTag, LhsShape, RhsSh
108
108
  : m_result(xpr.rows(), xpr.cols())
109
109
  {
110
110
  ::new (static_cast<Base*>(this)) Base(m_result);
111
-
111
+
112
112
  // FIXME shall we handle nested_eval here?,
113
113
  // if so, then we must take care at removing the call to nested_eval in the specializations (e.g., in permutation_matrix_product, transposition_matrix_product, etc.)
114
114
  // typedef typename internal::nested_eval<Lhs,Rhs::ColsAtCompileTime>::type LhsNested;
115
115
  // typedef typename internal::nested_eval<Rhs,Lhs::RowsAtCompileTime>::type RhsNested;
116
116
  // typedef typename internal::remove_all<LhsNested>::type LhsNestedCleaned;
117
117
  // typedef typename internal::remove_all<RhsNested>::type RhsNestedCleaned;
118
- //
118
+ //
119
119
  // const LhsNested lhs(xpr.lhs());
120
120
  // const RhsNested rhs(xpr.rhs());
121
- //
121
+ //
122
122
  // generic_product_impl<LhsNestedCleaned, RhsNestedCleaned>::evalTo(m_result, lhs, rhs);
123
123
 
124
124
  generic_product_impl<Lhs, Rhs, LhsShape, RhsShape, ProductTag>::evalTo(m_result, xpr.lhs(), xpr.rhs());
125
125
  }
126
-
127
- protected:
126
+
127
+ protected:
128
128
  PlainObject m_result;
129
129
  };
130
130
 
131
- // The following three shortcuts are enabled only if the scalar types match excatly.
131
+ // The following three shortcuts are enabled only if the scalar types match exactly.
132
132
  // TODO: we could enable them for different scalar types when the product is not vectorized.
133
133
 
134
134
  // Dense = Product
@@ -137,7 +137,7 @@ struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::assign_op<Scal
137
137
  typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct)>::type>
138
138
  {
139
139
  typedef Product<Lhs,Rhs,Options> SrcXprType;
140
- static EIGEN_STRONG_INLINE
140
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
141
141
  void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,Scalar> &)
142
142
  {
143
143
  Index dstRows = src.rows();
@@ -155,7 +155,7 @@ struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::add_assign_op<
155
155
  typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct)>::type>
156
156
  {
157
157
  typedef Product<Lhs,Rhs,Options> SrcXprType;
158
- static EIGEN_STRONG_INLINE
158
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
159
159
  void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<Scalar,Scalar> &)
160
160
  {
161
161
  eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
@@ -170,7 +170,7 @@ struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::sub_assign_op<
170
170
  typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct)>::type>
171
171
  {
172
172
  typedef Product<Lhs,Rhs,Options> SrcXprType;
173
- static EIGEN_STRONG_INLINE
173
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
174
174
  void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<Scalar,Scalar> &)
175
175
  {
176
176
  eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
@@ -190,7 +190,7 @@ struct Assignment<DstXprType, CwiseBinaryOp<internal::scalar_product_op<ScalarBi
190
190
  typedef CwiseBinaryOp<internal::scalar_product_op<ScalarBis,Scalar>,
191
191
  const CwiseNullaryOp<internal::scalar_constant_op<ScalarBis>,Plain>,
192
192
  const Product<Lhs,Rhs,DefaultProduct> > SrcXprType;
193
- static EIGEN_STRONG_INLINE
193
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
194
194
  void run(DstXprType &dst, const SrcXprType &src, const AssignFunc& func)
195
195
  {
196
196
  call_assignment_no_alias(dst, (src.lhs().functor().m_other * src.rhs().lhs())*src.rhs().rhs(), func);
@@ -217,7 +217,7 @@ template<typename DstXprType, typename OtherXpr, typename ProductType, typename
217
217
  struct assignment_from_xpr_op_product
218
218
  {
219
219
  template<typename SrcXprType, typename InitialFunc>
220
- static EIGEN_STRONG_INLINE
220
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
221
221
  void run(DstXprType &dst, const SrcXprType &src, const InitialFunc& /*func*/)
222
222
  {
223
223
  call_assignment_no_alias(dst, src.lhs(), Func1());
@@ -246,19 +246,19 @@ template<typename Lhs, typename Rhs>
246
246
  struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,InnerProduct>
247
247
  {
248
248
  template<typename Dst>
249
- static EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
249
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
250
250
  {
251
251
  dst.coeffRef(0,0) = (lhs.transpose().cwiseProduct(rhs)).sum();
252
252
  }
253
-
253
+
254
254
  template<typename Dst>
255
- static EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
255
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
256
256
  {
257
257
  dst.coeffRef(0,0) += (lhs.transpose().cwiseProduct(rhs)).sum();
258
258
  }
259
-
259
+
260
260
  template<typename Dst>
261
- static EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
261
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
262
262
  { dst.coeffRef(0,0) -= (lhs.transpose().cwiseProduct(rhs)).sum(); }
263
263
  };
264
264
 
@@ -269,10 +269,10 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,InnerProduct>
269
269
 
270
270
  // Column major result
271
271
  template<typename Dst, typename Lhs, typename Rhs, typename Func>
272
- void outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const Func& func, const false_type&)
272
+ void EIGEN_DEVICE_FUNC outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const Func& func, const false_type&)
273
273
  {
274
274
  evaluator<Rhs> rhsEval(rhs);
275
- typename nested_eval<Lhs,Rhs::SizeAtCompileTime>::type actual_lhs(lhs);
275
+ ei_declare_local_nested_eval(Lhs,lhs,Rhs::SizeAtCompileTime,actual_lhs);
276
276
  // FIXME if cols is large enough, then it might be useful to make sure that lhs is sequentially stored
277
277
  // FIXME not very good if rhs is real and lhs complex while alpha is real too
278
278
  const Index cols = dst.cols();
@@ -282,10 +282,10 @@ void outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const
282
282
 
283
283
  // Row major result
284
284
  template<typename Dst, typename Lhs, typename Rhs, typename Func>
285
- void outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const Func& func, const true_type&)
285
+ void EIGEN_DEVICE_FUNC outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const Func& func, const true_type&)
286
286
  {
287
287
  evaluator<Lhs> lhsEval(lhs);
288
- typename nested_eval<Rhs,Lhs::SizeAtCompileTime>::type actual_rhs(rhs);
288
+ ei_declare_local_nested_eval(Rhs,rhs,Lhs::SizeAtCompileTime,actual_rhs);
289
289
  // FIXME if rows is large enough, then it might be useful to make sure that rhs is sequentially stored
290
290
  // FIXME not very good if lhs is real and rhs complex while alpha is real too
291
291
  const Index rows = dst.rows();
@@ -298,43 +298,43 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,OuterProduct>
298
298
  {
299
299
  template<typename T> struct is_row_major : internal::conditional<(int(T::Flags)&RowMajorBit), internal::true_type, internal::false_type>::type {};
300
300
  typedef typename Product<Lhs,Rhs>::Scalar Scalar;
301
-
301
+
302
302
  // TODO it would be nice to be able to exploit our *_assign_op functors for that purpose
303
- struct set { template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() = src; } };
304
- struct add { template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() += src; } };
305
- struct sub { template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() -= src; } };
303
+ struct set { template<typename Dst, typename Src> EIGEN_DEVICE_FUNC void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() = src; } };
304
+ struct add { template<typename Dst, typename Src> EIGEN_DEVICE_FUNC void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() += src; } };
305
+ struct sub { template<typename Dst, typename Src> EIGEN_DEVICE_FUNC void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() -= src; } };
306
306
  struct adds {
307
307
  Scalar m_scale;
308
308
  explicit adds(const Scalar& s) : m_scale(s) {}
309
- template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const {
309
+ template<typename Dst, typename Src> void EIGEN_DEVICE_FUNC operator()(const Dst& dst, const Src& src) const {
310
310
  dst.const_cast_derived() += m_scale * src;
311
311
  }
312
312
  };
313
-
313
+
314
314
  template<typename Dst>
315
- static EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
315
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
316
316
  {
317
317
  internal::outer_product_selector_run(dst, lhs, rhs, set(), is_row_major<Dst>());
318
318
  }
319
-
319
+
320
320
  template<typename Dst>
321
- static EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
321
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
322
322
  {
323
323
  internal::outer_product_selector_run(dst, lhs, rhs, add(), is_row_major<Dst>());
324
324
  }
325
-
325
+
326
326
  template<typename Dst>
327
- static EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
327
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
328
328
  {
329
329
  internal::outer_product_selector_run(dst, lhs, rhs, sub(), is_row_major<Dst>());
330
330
  }
331
-
331
+
332
332
  template<typename Dst>
333
- static EIGEN_STRONG_INLINE void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
333
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
334
334
  {
335
335
  internal::outer_product_selector_run(dst, lhs, rhs, adds(alpha), is_row_major<Dst>());
336
336
  }
337
-
337
+
338
338
  };
339
339
 
340
340
 
@@ -343,21 +343,21 @@ template<typename Lhs, typename Rhs, typename Derived>
343
343
  struct generic_product_impl_base
344
344
  {
345
345
  typedef typename Product<Lhs,Rhs>::Scalar Scalar;
346
-
346
+
347
347
  template<typename Dst>
348
- static EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
348
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
349
349
  { dst.setZero(); scaleAndAddTo(dst, lhs, rhs, Scalar(1)); }
350
350
 
351
351
  template<typename Dst>
352
- static EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
352
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
353
353
  { scaleAndAddTo(dst,lhs, rhs, Scalar(1)); }
354
354
 
355
355
  template<typename Dst>
356
- static EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
356
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
357
357
  { scaleAndAddTo(dst, lhs, rhs, Scalar(-1)); }
358
-
358
+
359
359
  template<typename Dst>
360
- static EIGEN_STRONG_INLINE void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
360
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
361
361
  { Derived::scaleAndAddTo(dst,lhs,rhs,alpha); }
362
362
 
363
363
  };
@@ -373,8 +373,13 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemvProduct>
373
373
  typedef typename internal::remove_all<typename internal::conditional<int(Side)==OnTheRight,LhsNested,RhsNested>::type>::type MatrixType;
374
374
 
375
375
  template<typename Dest>
376
- static EIGEN_STRONG_INLINE void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
376
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
377
377
  {
378
+ // Fallback to inner product if both the lhs and rhs is a runtime vector.
379
+ if (lhs.rows() == 1 && rhs.cols() == 1) {
380
+ dst.coeffRef(0,0) += alpha * lhs.row(0).conjugate().dot(rhs.col(0));
381
+ return;
382
+ }
378
383
  LhsNested actual_lhs(lhs);
379
384
  RhsNested actual_rhs(rhs);
380
385
  internal::gemv_dense_selector<Side,
@@ -385,35 +390,84 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemvProduct>
385
390
  };
386
391
 
387
392
  template<typename Lhs, typename Rhs>
388
- struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,CoeffBasedProductMode>
393
+ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,CoeffBasedProductMode>
389
394
  {
390
395
  typedef typename Product<Lhs,Rhs>::Scalar Scalar;
391
-
396
+
392
397
  template<typename Dst>
393
- static EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
398
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
394
399
  {
395
400
  // Same as: dst.noalias() = lhs.lazyProduct(rhs);
396
401
  // but easier on the compiler side
397
402
  call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::assign_op<typename Dst::Scalar,Scalar>());
398
403
  }
399
-
404
+
400
405
  template<typename Dst>
401
- static EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
406
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
402
407
  {
403
408
  // dst.noalias() += lhs.lazyProduct(rhs);
404
409
  call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::add_assign_op<typename Dst::Scalar,Scalar>());
405
410
  }
406
-
411
+
407
412
  template<typename Dst>
408
- static EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
413
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
409
414
  {
410
415
  // dst.noalias() -= lhs.lazyProduct(rhs);
411
416
  call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::sub_assign_op<typename Dst::Scalar,Scalar>());
412
417
  }
413
-
414
- // template<typename Dst>
415
- // static inline void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
416
- // { dst.noalias() += alpha * lhs.lazyProduct(rhs); }
418
+
419
+ // This is a special evaluation path called from generic_product_impl<...,GemmProduct> in file GeneralMatrixMatrix.h
420
+ // This variant tries to extract scalar multiples from both the LHS and RHS and factor them out. For instance:
421
+ // dst {,+,-}= (s1*A)*(B*s2)
422
+ // will be rewritten as:
423
+ // dst {,+,-}= (s1*s2) * (A.lazyProduct(B))
424
+ // There are at least four benefits of doing so:
425
+ // 1 - huge performance gain for heap-allocated matrix types as it save costly allocations.
426
+ // 2 - it is faster than simply by-passing the heap allocation through stack allocation.
427
+ // 3 - it makes this fallback consistent with the heavy GEMM routine.
428
+ // 4 - it fully by-passes huge stack allocation attempts when multiplying huge fixed-size matrices.
429
+ // (see https://stackoverflow.com/questions/54738495)
430
+ // For small fixed sizes matrices, howver, the gains are less obvious, it is sometimes x2 faster, but sometimes x3 slower,
431
+ // and the behavior depends also a lot on the compiler... This is why this re-writting strategy is currently
432
+ // enabled only when falling back from the main GEMM.
433
+ template<typename Dst, typename Func>
434
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
435
+ void eval_dynamic(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Func &func)
436
+ {
437
+ enum {
438
+ HasScalarFactor = blas_traits<Lhs>::HasScalarFactor || blas_traits<Rhs>::HasScalarFactor,
439
+ ConjLhs = blas_traits<Lhs>::NeedToConjugate,
440
+ ConjRhs = blas_traits<Rhs>::NeedToConjugate
441
+ };
442
+ // FIXME: in c++11 this should be auto, and extractScalarFactor should also return auto
443
+ // this is important for real*complex_mat
444
+ Scalar actualAlpha = combine_scalar_factors<Scalar>(lhs, rhs);
445
+
446
+ eval_dynamic_impl(dst,
447
+ blas_traits<Lhs>::extract(lhs).template conjugateIf<ConjLhs>(),
448
+ blas_traits<Rhs>::extract(rhs).template conjugateIf<ConjRhs>(),
449
+ func,
450
+ actualAlpha,
451
+ typename conditional<HasScalarFactor,true_type,false_type>::type());
452
+ }
453
+
454
+ protected:
455
+
456
+ template<typename Dst, typename LhsT, typename RhsT, typename Func, typename Scalar>
457
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
458
+ void eval_dynamic_impl(Dst& dst, const LhsT& lhs, const RhsT& rhs, const Func &func, const Scalar& s /* == 1 */, false_type)
459
+ {
460
+ EIGEN_UNUSED_VARIABLE(s);
461
+ eigen_internal_assert(s==Scalar(1));
462
+ call_restricted_packet_assignment_no_alias(dst, lhs.lazyProduct(rhs), func);
463
+ }
464
+
465
+ template<typename Dst, typename LhsT, typename RhsT, typename Func, typename Scalar>
466
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
467
+ void eval_dynamic_impl(Dst& dst, const LhsT& lhs, const RhsT& rhs, const Func &func, const Scalar& s, true_type)
468
+ {
469
+ call_restricted_packet_assignment_no_alias(dst, s * lhs.lazyProduct(rhs), func);
470
+ }
417
471
  };
418
472
 
419
473
  // This specialization enforces the use of a coefficient-based evaluation strategy
@@ -471,7 +525,7 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
471
525
 
472
526
  typedef typename internal::nested_eval<Lhs,Rhs::ColsAtCompileTime>::type LhsNested;
473
527
  typedef typename internal::nested_eval<Rhs,Lhs::RowsAtCompileTime>::type RhsNested;
474
-
528
+
475
529
  typedef typename internal::remove_all<LhsNested>::type LhsNestedCleaned;
476
530
  typedef typename internal::remove_all<RhsNested>::type RhsNestedCleaned;
477
531
 
@@ -490,19 +544,19 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
490
544
  typedef typename find_best_packet<Scalar,ColsAtCompileTime>::type RhsVecPacketType;
491
545
 
492
546
  enum {
493
-
547
+
494
548
  LhsCoeffReadCost = LhsEtorType::CoeffReadCost,
495
549
  RhsCoeffReadCost = RhsEtorType::CoeffReadCost,
496
550
  CoeffReadCost = InnerSize==0 ? NumTraits<Scalar>::ReadCost
497
551
  : InnerSize == Dynamic ? HugeCost
498
- : InnerSize * (NumTraits<Scalar>::MulCost + LhsCoeffReadCost + RhsCoeffReadCost)
552
+ : InnerSize * (NumTraits<Scalar>::MulCost + int(LhsCoeffReadCost) + int(RhsCoeffReadCost))
499
553
  + (InnerSize - 1) * NumTraits<Scalar>::AddCost,
500
554
 
501
555
  Unroll = CoeffReadCost <= EIGEN_UNROLLING_LIMIT,
502
-
556
+
503
557
  LhsFlags = LhsEtorType::Flags,
504
558
  RhsFlags = RhsEtorType::Flags,
505
-
559
+
506
560
  LhsRowMajor = LhsFlags & RowMajorBit,
507
561
  RhsRowMajor = RhsFlags & RowMajorBit,
508
562
 
@@ -512,7 +566,7 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
512
566
  // Here, we don't care about alignment larger than the usable packet size.
513
567
  LhsAlignment = EIGEN_PLAIN_ENUM_MIN(LhsEtorType::Alignment,LhsVecPacketSize*int(sizeof(typename LhsNestedCleaned::Scalar))),
514
568
  RhsAlignment = EIGEN_PLAIN_ENUM_MIN(RhsEtorType::Alignment,RhsVecPacketSize*int(sizeof(typename RhsNestedCleaned::Scalar))),
515
-
569
+
516
570
  SameType = is_same<typename LhsNestedCleaned::Scalar,typename RhsNestedCleaned::Scalar>::value,
517
571
 
518
572
  CanVectorizeRhs = bool(RhsRowMajor) && (RhsFlags & PacketAccessBit) && (ColsAtCompileTime!=1),
@@ -522,12 +576,12 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
522
576
  : (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0
523
577
  : (bool(RhsRowMajor) && !CanVectorizeLhs),
524
578
 
525
- Flags = ((unsigned int)(LhsFlags | RhsFlags) & HereditaryBits & ~RowMajorBit)
579
+ Flags = ((int(LhsFlags) | int(RhsFlags)) & HereditaryBits & ~RowMajorBit)
526
580
  | (EvalToRowMajor ? RowMajorBit : 0)
527
581
  // TODO enable vectorization for mixed types
528
582
  | (SameType && (CanVectorizeLhs || CanVectorizeRhs) ? PacketAccessBit : 0)
529
583
  | (XprType::IsVectorAtCompileTime ? LinearAccessBit : 0),
530
-
584
+
531
585
  LhsOuterStrideBytes = int(LhsNestedCleaned::OuterStrideAtCompileTime) * int(sizeof(typename LhsNestedCleaned::Scalar)),
532
586
  RhsOuterStrideBytes = int(RhsNestedCleaned::OuterStrideAtCompileTime) * int(sizeof(typename RhsNestedCleaned::Scalar)),
533
587
 
@@ -543,10 +597,10 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
543
597
  CanVectorizeInner = SameType
544
598
  && LhsRowMajor
545
599
  && (!RhsRowMajor)
546
- && (LhsFlags & RhsFlags & ActualPacketAccessBit)
547
- && (InnerSize % packet_traits<Scalar>::size == 0)
600
+ && (int(LhsFlags) & int(RhsFlags) & ActualPacketAccessBit)
601
+ && (int(InnerSize) % packet_traits<Scalar>::size == 0)
548
602
  };
549
-
603
+
550
604
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CoeffReturnType coeff(Index row, Index col) const
551
605
  {
552
606
  return (m_lhs.row(row).transpose().cwiseProduct( m_rhs.col(col) )).sum();
@@ -556,7 +610,8 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
556
610
  * which is why we don't set the LinearAccessBit.
557
611
  * TODO: this seems possible when the result is a vector
558
612
  */
559
- EIGEN_DEVICE_FUNC const CoeffReturnType coeff(Index index) const
613
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
614
+ const CoeffReturnType coeff(Index index) const
560
615
  {
561
616
  const Index row = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime==1) ? 0 : index;
562
617
  const Index col = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime==1) ? index : 0;
@@ -564,6 +619,7 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
564
619
  }
565
620
 
566
621
  template<int LoadMode, typename PacketType>
622
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
567
623
  const PacketType packet(Index row, Index col) const
568
624
  {
569
625
  PacketType res;
@@ -575,6 +631,7 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
575
631
  }
576
632
 
577
633
  template<int LoadMode, typename PacketType>
634
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
578
635
  const PacketType packet(Index index) const
579
636
  {
580
637
  const Index row = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime==1) ? 0 : index;
@@ -585,7 +642,7 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
585
642
  protected:
586
643
  typename internal::add_const_on_value_type<LhsNested>::type m_lhs;
587
644
  typename internal::add_const_on_value_type<RhsNested>::type m_rhs;
588
-
645
+
589
646
  LhsEtorType m_lhsImpl;
590
647
  RhsEtorType m_rhsImpl;
591
648
 
@@ -603,7 +660,8 @@ struct product_evaluator<Product<Lhs, Rhs, DefaultProduct>, LazyCoeffBasedProduc
603
660
  enum {
604
661
  Flags = Base::Flags | EvalBeforeNestingBit
605
662
  };
606
- EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr)
663
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
664
+ explicit product_evaluator(const XprType& xpr)
607
665
  : Base(BaseProduct(xpr.lhs(),xpr.rhs()))
608
666
  {}
609
667
  };
@@ -615,7 +673,7 @@ struct product_evaluator<Product<Lhs, Rhs, DefaultProduct>, LazyCoeffBasedProduc
615
673
  template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
616
674
  struct etor_product_packet_impl<RowMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
617
675
  {
618
- static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res)
676
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res)
619
677
  {
620
678
  etor_product_packet_impl<RowMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, innerDim, res);
621
679
  res = pmadd(pset1<Packet>(lhs.coeff(row, Index(UnrollingIndex-1))), rhs.template packet<LoadMode,Packet>(Index(UnrollingIndex-1), col), res);
@@ -625,7 +683,7 @@ struct etor_product_packet_impl<RowMajor, UnrollingIndex, Lhs, Rhs, Packet, Load
625
683
  template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
626
684
  struct etor_product_packet_impl<ColMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
627
685
  {
628
- static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res)
686
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res)
629
687
  {
630
688
  etor_product_packet_impl<ColMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, innerDim, res);
631
689
  res = pmadd(lhs.template packet<LoadMode,Packet>(row, Index(UnrollingIndex-1)), pset1<Packet>(rhs.coeff(Index(UnrollingIndex-1), col)), res);
@@ -635,7 +693,7 @@ struct etor_product_packet_impl<ColMajor, UnrollingIndex, Lhs, Rhs, Packet, Load
635
693
  template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
636
694
  struct etor_product_packet_impl<RowMajor, 1, Lhs, Rhs, Packet, LoadMode>
637
695
  {
638
- static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res)
696
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res)
639
697
  {
640
698
  res = pmul(pset1<Packet>(lhs.coeff(row, Index(0))),rhs.template packet<LoadMode,Packet>(Index(0), col));
641
699
  }
@@ -644,7 +702,7 @@ struct etor_product_packet_impl<RowMajor, 1, Lhs, Rhs, Packet, LoadMode>
644
702
  template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
645
703
  struct etor_product_packet_impl<ColMajor, 1, Lhs, Rhs, Packet, LoadMode>
646
704
  {
647
- static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res)
705
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res)
648
706
  {
649
707
  res = pmul(lhs.template packet<LoadMode,Packet>(row, Index(0)), pset1<Packet>(rhs.coeff(Index(0), col)));
650
708
  }
@@ -653,7 +711,7 @@ struct etor_product_packet_impl<ColMajor, 1, Lhs, Rhs, Packet, LoadMode>
653
711
  template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
654
712
  struct etor_product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode>
655
713
  {
656
- static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, Index /*innerDim*/, Packet &res)
714
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, Index /*innerDim*/, Packet &res)
657
715
  {
658
716
  res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
659
717
  }
@@ -662,7 +720,7 @@ struct etor_product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode>
662
720
  template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
663
721
  struct etor_product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode>
664
722
  {
665
- static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, Index /*innerDim*/, Packet &res)
723
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, Index /*innerDim*/, Packet &res)
666
724
  {
667
725
  res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
668
726
  }
@@ -671,7 +729,7 @@ struct etor_product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode>
671
729
  template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
672
730
  struct etor_product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
673
731
  {
674
- static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res)
732
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res)
675
733
  {
676
734
  res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
677
735
  for(Index i = 0; i < innerDim; ++i)
@@ -682,7 +740,7 @@ struct etor_product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
682
740
  template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
683
741
  struct etor_product_packet_impl<ColMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
684
742
  {
685
- static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res)
743
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res)
686
744
  {
687
745
  res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
688
746
  for(Index i = 0; i < innerDim; ++i)
@@ -704,7 +762,7 @@ struct generic_product_impl<Lhs,Rhs,TriangularShape,DenseShape,ProductTag>
704
762
  : generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,TriangularShape,DenseShape,ProductTag> >
705
763
  {
706
764
  typedef typename Product<Lhs,Rhs>::Scalar Scalar;
707
-
765
+
708
766
  template<typename Dest>
709
767
  static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
710
768
  {
@@ -718,7 +776,7 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,TriangularShape,ProductTag>
718
776
  : generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,DenseShape,TriangularShape,ProductTag> >
719
777
  {
720
778
  typedef typename Product<Lhs,Rhs>::Scalar Scalar;
721
-
779
+
722
780
  template<typename Dest>
723
781
  static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
724
782
  {
@@ -739,9 +797,10 @@ struct generic_product_impl<Lhs,Rhs,SelfAdjointShape,DenseShape,ProductTag>
739
797
  : generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,SelfAdjointShape,DenseShape,ProductTag> >
740
798
  {
741
799
  typedef typename Product<Lhs,Rhs>::Scalar Scalar;
742
-
800
+
743
801
  template<typename Dest>
744
- static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
802
+ static EIGEN_DEVICE_FUNC
803
+ void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
745
804
  {
746
805
  selfadjoint_product_impl<typename Lhs::MatrixType,Lhs::Mode,false,Rhs,0,Rhs::IsVectorAtCompileTime>::run(dst, lhs.nestedExpression(), rhs, alpha);
747
806
  }
@@ -752,7 +811,7 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,SelfAdjointShape,ProductTag>
752
811
  : generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,DenseShape,SelfAdjointShape,ProductTag> >
753
812
  {
754
813
  typedef typename Product<Lhs,Rhs>::Scalar Scalar;
755
-
814
+
756
815
  template<typename Dest>
757
816
  static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
758
817
  {
@@ -764,7 +823,7 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,SelfAdjointShape,ProductTag>
764
823
  /***************************************************************************
765
824
  * Diagonal products
766
825
  ***************************************************************************/
767
-
826
+
768
827
  template<typename MatrixType, typename DiagonalType, typename Derived, int ProductOrder>
769
828
  struct diagonal_product_evaluator_base
770
829
  : evaluator_base<Derived>
@@ -772,17 +831,25 @@ struct diagonal_product_evaluator_base
772
831
  typedef typename ScalarBinaryOpTraits<typename MatrixType::Scalar, typename DiagonalType::Scalar>::ReturnType Scalar;
773
832
  public:
774
833
  enum {
775
- CoeffReadCost = NumTraits<Scalar>::MulCost + evaluator<MatrixType>::CoeffReadCost + evaluator<DiagonalType>::CoeffReadCost,
776
-
834
+ CoeffReadCost = int(NumTraits<Scalar>::MulCost) + int(evaluator<MatrixType>::CoeffReadCost) + int(evaluator<DiagonalType>::CoeffReadCost),
835
+
777
836
  MatrixFlags = evaluator<MatrixType>::Flags,
778
837
  DiagFlags = evaluator<DiagonalType>::Flags,
779
- _StorageOrder = MatrixFlags & RowMajorBit ? RowMajor : ColMajor,
838
+
839
+ _StorageOrder = (Derived::MaxRowsAtCompileTime==1 && Derived::MaxColsAtCompileTime!=1) ? RowMajor
840
+ : (Derived::MaxColsAtCompileTime==1 && Derived::MaxRowsAtCompileTime!=1) ? ColMajor
841
+ : MatrixFlags & RowMajorBit ? RowMajor : ColMajor,
842
+ _SameStorageOrder = _StorageOrder == (MatrixFlags & RowMajorBit ? RowMajor : ColMajor),
843
+
780
844
  _ScalarAccessOnDiag = !((int(_StorageOrder) == ColMajor && int(ProductOrder) == OnTheLeft)
781
845
  ||(int(_StorageOrder) == RowMajor && int(ProductOrder) == OnTheRight)),
782
846
  _SameTypes = is_same<typename MatrixType::Scalar, typename DiagonalType::Scalar>::value,
783
847
  // FIXME currently we need same types, but in the future the next rule should be the one
784
848
  //_Vectorizable = bool(int(MatrixFlags)&PacketAccessBit) && ((!_PacketOnDiag) || (_SameTypes && bool(int(DiagFlags)&PacketAccessBit))),
785
- _Vectorizable = bool(int(MatrixFlags)&PacketAccessBit) && _SameTypes && (_ScalarAccessOnDiag || (bool(int(DiagFlags)&PacketAccessBit))),
849
+ _Vectorizable = bool(int(MatrixFlags)&PacketAccessBit)
850
+ && _SameTypes
851
+ && (_SameStorageOrder || (MatrixFlags&LinearAccessBit)==LinearAccessBit)
852
+ && (_ScalarAccessOnDiag || (bool(int(DiagFlags)&PacketAccessBit))),
786
853
  _LinearAccessMask = (MatrixType::RowsAtCompileTime==1 || MatrixType::ColsAtCompileTime==1) ? LinearAccessBit : 0,
787
854
  Flags = ((HereditaryBits|_LinearAccessMask) & (unsigned int)(MatrixFlags)) | (_Vectorizable ? PacketAccessBit : 0),
788
855
  Alignment = evaluator<MatrixType>::Alignment,
@@ -791,14 +858,14 @@ public:
791
858
  || (DiagonalType::SizeAtCompileTime==Dynamic && MatrixType::RowsAtCompileTime==1 && ProductOrder==OnTheLeft)
792
859
  || (DiagonalType::SizeAtCompileTime==Dynamic && MatrixType::ColsAtCompileTime==1 && ProductOrder==OnTheRight)
793
860
  };
794
-
795
- diagonal_product_evaluator_base(const MatrixType &mat, const DiagonalType &diag)
861
+
862
+ EIGEN_DEVICE_FUNC diagonal_product_evaluator_base(const MatrixType &mat, const DiagonalType &diag)
796
863
  : m_diagImpl(diag), m_matImpl(mat)
797
864
  {
798
865
  EIGEN_INTERNAL_CHECK_COST_VALUE(NumTraits<Scalar>::MulCost);
799
866
  EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
800
867
  }
801
-
868
+
802
869
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index idx) const
803
870
  {
804
871
  if(AsScalarProduct)
@@ -806,7 +873,7 @@ public:
806
873
  else
807
874
  return m_diagImpl.coeff(idx) * m_matImpl.coeff(idx);
808
875
  }
809
-
876
+
810
877
  protected:
811
878
  template<int LoadMode,typename PacketType>
812
879
  EIGEN_STRONG_INLINE PacketType packet_impl(Index row, Index col, Index id, internal::true_type) const
@@ -814,7 +881,7 @@ protected:
814
881
  return internal::pmul(m_matImpl.template packet<LoadMode,PacketType>(row, col),
815
882
  internal::pset1<PacketType>(m_diagImpl.coeff(id)));
816
883
  }
817
-
884
+
818
885
  template<int LoadMode,typename PacketType>
819
886
  EIGEN_STRONG_INLINE PacketType packet_impl(Index row, Index col, Index id, internal::false_type) const
820
887
  {
@@ -825,7 +892,7 @@ protected:
825
892
  return internal::pmul(m_matImpl.template packet<LoadMode,PacketType>(row, col),
826
893
  m_diagImpl.template packet<DiagonalPacketLoadMode,PacketType>(id));
827
894
  }
828
-
895
+
829
896
  evaluator<DiagonalType> m_diagImpl;
830
897
  evaluator<MatrixType> m_matImpl;
831
898
  };
@@ -840,25 +907,25 @@ struct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DiagonalSha
840
907
  using Base::m_matImpl;
841
908
  using Base::coeff;
842
909
  typedef typename Base::Scalar Scalar;
843
-
910
+
844
911
  typedef Product<Lhs, Rhs, ProductKind> XprType;
845
912
  typedef typename XprType::PlainObject PlainObject;
846
-
847
- enum {
848
- StorageOrder = int(Rhs::Flags) & RowMajorBit ? RowMajor : ColMajor
849
- };
913
+ typedef typename Lhs::DiagonalVectorType DiagonalType;
914
+
915
+
916
+ enum { StorageOrder = Base::_StorageOrder };
850
917
 
851
918
  EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr)
852
919
  : Base(xpr.rhs(), xpr.lhs().diagonal())
853
920
  {
854
921
  }
855
-
922
+
856
923
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const
857
924
  {
858
925
  return m_diagImpl.coeff(row) * m_matImpl.coeff(row, col);
859
926
  }
860
-
861
- #ifndef __CUDACC__
927
+
928
+ #ifndef EIGEN_GPUCC
862
929
  template<int LoadMode,typename PacketType>
863
930
  EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const
864
931
  {
@@ -867,7 +934,7 @@ struct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DiagonalSha
867
934
  return this->template packet_impl<LoadMode,PacketType>(row,col, row,
868
935
  typename internal::conditional<int(StorageOrder)==RowMajor, internal::true_type, internal::false_type>::type());
869
936
  }
870
-
937
+
871
938
  template<int LoadMode,typename PacketType>
872
939
  EIGEN_STRONG_INLINE PacketType packet(Index idx) const
873
940
  {
@@ -886,30 +953,30 @@ struct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DenseShape,
886
953
  using Base::m_matImpl;
887
954
  using Base::coeff;
888
955
  typedef typename Base::Scalar Scalar;
889
-
956
+
890
957
  typedef Product<Lhs, Rhs, ProductKind> XprType;
891
958
  typedef typename XprType::PlainObject PlainObject;
892
-
893
- enum { StorageOrder = int(Lhs::Flags) & RowMajorBit ? RowMajor : ColMajor };
959
+
960
+ enum { StorageOrder = Base::_StorageOrder };
894
961
 
895
962
  EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr)
896
963
  : Base(xpr.lhs(), xpr.rhs().diagonal())
897
964
  {
898
965
  }
899
-
966
+
900
967
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const
901
968
  {
902
969
  return m_matImpl.coeff(row, col) * m_diagImpl.coeff(col);
903
970
  }
904
-
905
- #ifndef __CUDACC__
971
+
972
+ #ifndef EIGEN_GPUCC
906
973
  template<int LoadMode,typename PacketType>
907
974
  EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const
908
975
  {
909
976
  return this->template packet_impl<LoadMode,PacketType>(row,col, col,
910
977
  typename internal::conditional<int(StorageOrder)==ColMajor, internal::true_type, internal::false_type>::type());
911
978
  }
912
-
979
+
913
980
  template<int LoadMode,typename PacketType>
914
981
  EIGEN_STRONG_INLINE PacketType packet(Index idx) const
915
982
  {
@@ -937,7 +1004,7 @@ struct permutation_matrix_product<ExpressionType, Side, Transposed, DenseShape>
937
1004
  typedef typename remove_all<MatrixType>::type MatrixTypeCleaned;
938
1005
 
939
1006
  template<typename Dest, typename PermutationType>
940
- static inline void run(Dest& dst, const PermutationType& perm, const ExpressionType& xpr)
1007
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Dest& dst, const PermutationType& perm, const ExpressionType& xpr)
941
1008
  {
942
1009
  MatrixType mat(xpr);
943
1010
  const Index n = Side==OnTheLeft ? mat.rows() : mat.cols();
@@ -991,7 +1058,7 @@ template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
991
1058
  struct generic_product_impl<Lhs, Rhs, PermutationShape, MatrixShape, ProductTag>
992
1059
  {
993
1060
  template<typename Dest>
994
- static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
1061
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
995
1062
  {
996
1063
  permutation_matrix_product<Rhs, OnTheLeft, false, MatrixShape>::run(dst, lhs, rhs);
997
1064
  }
@@ -1001,7 +1068,7 @@ template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
1001
1068
  struct generic_product_impl<Lhs, Rhs, MatrixShape, PermutationShape, ProductTag>
1002
1069
  {
1003
1070
  template<typename Dest>
1004
- static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
1071
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
1005
1072
  {
1006
1073
  permutation_matrix_product<Lhs, OnTheRight, false, MatrixShape>::run(dst, rhs, lhs);
1007
1074
  }
@@ -1011,7 +1078,7 @@ template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
1011
1078
  struct generic_product_impl<Inverse<Lhs>, Rhs, PermutationShape, MatrixShape, ProductTag>
1012
1079
  {
1013
1080
  template<typename Dest>
1014
- static void evalTo(Dest& dst, const Inverse<Lhs>& lhs, const Rhs& rhs)
1081
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Inverse<Lhs>& lhs, const Rhs& rhs)
1015
1082
  {
1016
1083
  permutation_matrix_product<Rhs, OnTheLeft, true, MatrixShape>::run(dst, lhs.nestedExpression(), rhs);
1017
1084
  }
@@ -1021,7 +1088,7 @@ template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
1021
1088
  struct generic_product_impl<Lhs, Inverse<Rhs>, MatrixShape, PermutationShape, ProductTag>
1022
1089
  {
1023
1090
  template<typename Dest>
1024
- static void evalTo(Dest& dst, const Lhs& lhs, const Inverse<Rhs>& rhs)
1091
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Lhs& lhs, const Inverse<Rhs>& rhs)
1025
1092
  {
1026
1093
  permutation_matrix_product<Lhs, OnTheRight, true, MatrixShape>::run(dst, rhs.nestedExpression(), lhs);
1027
1094
  }
@@ -1043,9 +1110,9 @@ struct transposition_matrix_product
1043
1110
  {
1044
1111
  typedef typename nested_eval<ExpressionType, 1>::type MatrixType;
1045
1112
  typedef typename remove_all<MatrixType>::type MatrixTypeCleaned;
1046
-
1113
+
1047
1114
  template<typename Dest, typename TranspositionType>
1048
- static inline void run(Dest& dst, const TranspositionType& tr, const ExpressionType& xpr)
1115
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Dest& dst, const TranspositionType& tr, const ExpressionType& xpr)
1049
1116
  {
1050
1117
  MatrixType mat(xpr);
1051
1118
  typedef typename TranspositionType::StorageIndex StorageIndex;
@@ -1068,7 +1135,7 @@ template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
1068
1135
  struct generic_product_impl<Lhs, Rhs, TranspositionsShape, MatrixShape, ProductTag>
1069
1136
  {
1070
1137
  template<typename Dest>
1071
- static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
1138
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
1072
1139
  {
1073
1140
  transposition_matrix_product<Rhs, OnTheLeft, false, MatrixShape>::run(dst, lhs, rhs);
1074
1141
  }
@@ -1078,7 +1145,7 @@ template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
1078
1145
  struct generic_product_impl<Lhs, Rhs, MatrixShape, TranspositionsShape, ProductTag>
1079
1146
  {
1080
1147
  template<typename Dest>
1081
- static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
1148
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
1082
1149
  {
1083
1150
  transposition_matrix_product<Lhs, OnTheRight, false, MatrixShape>::run(dst, rhs, lhs);
1084
1151
  }
@@ -1089,7 +1156,7 @@ template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
1089
1156
  struct generic_product_impl<Transpose<Lhs>, Rhs, TranspositionsShape, MatrixShape, ProductTag>
1090
1157
  {
1091
1158
  template<typename Dest>
1092
- static void evalTo(Dest& dst, const Transpose<Lhs>& lhs, const Rhs& rhs)
1159
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Transpose<Lhs>& lhs, const Rhs& rhs)
1093
1160
  {
1094
1161
  transposition_matrix_product<Rhs, OnTheLeft, true, MatrixShape>::run(dst, lhs.nestedExpression(), rhs);
1095
1162
  }
@@ -1099,7 +1166,7 @@ template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
1099
1166
  struct generic_product_impl<Lhs, Transpose<Rhs>, MatrixShape, TranspositionsShape, ProductTag>
1100
1167
  {
1101
1168
  template<typename Dest>
1102
- static void evalTo(Dest& dst, const Lhs& lhs, const Transpose<Rhs>& rhs)
1169
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Lhs& lhs, const Transpose<Rhs>& rhs)
1103
1170
  {
1104
1171
  transposition_matrix_product<Lhs, OnTheRight, true, MatrixShape>::run(dst, rhs.nestedExpression(), lhs);
1105
1172
  }