tomoto 0.2.3 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (347) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +6 -0
  3. data/README.md +8 -10
  4. data/ext/tomoto/extconf.rb +6 -2
  5. data/ext/tomoto/{ext.cpp → tomoto.cpp} +1 -1
  6. data/lib/tomoto/version.rb +1 -1
  7. data/lib/tomoto.rb +5 -1
  8. data/vendor/EigenRand/EigenRand/Core.h +10 -10
  9. data/vendor/EigenRand/EigenRand/Dists/Basic.h +208 -9
  10. data/vendor/EigenRand/EigenRand/Dists/Discrete.h +52 -31
  11. data/vendor/EigenRand/EigenRand/Dists/GammaPoisson.h +9 -8
  12. data/vendor/EigenRand/EigenRand/Dists/NormalExp.h +28 -21
  13. data/vendor/EigenRand/EigenRand/EigenRand +11 -6
  14. data/vendor/EigenRand/EigenRand/Macro.h +13 -7
  15. data/vendor/EigenRand/EigenRand/MorePacketMath.h +348 -740
  16. data/vendor/EigenRand/EigenRand/MvDists/Multinomial.h +5 -3
  17. data/vendor/EigenRand/EigenRand/MvDists/MvNormal.h +9 -3
  18. data/vendor/EigenRand/EigenRand/PacketFilter.h +11 -253
  19. data/vendor/EigenRand/EigenRand/PacketRandomEngine.h +21 -47
  20. data/vendor/EigenRand/EigenRand/RandUtils.h +50 -344
  21. data/vendor/EigenRand/EigenRand/arch/AVX/MorePacketMath.h +619 -0
  22. data/vendor/EigenRand/EigenRand/arch/AVX/PacketFilter.h +149 -0
  23. data/vendor/EigenRand/EigenRand/arch/AVX/RandUtils.h +228 -0
  24. data/vendor/EigenRand/EigenRand/arch/NEON/MorePacketMath.h +473 -0
  25. data/vendor/EigenRand/EigenRand/arch/NEON/PacketFilter.h +142 -0
  26. data/vendor/EigenRand/EigenRand/arch/NEON/RandUtils.h +126 -0
  27. data/vendor/EigenRand/EigenRand/arch/SSE/MorePacketMath.h +501 -0
  28. data/vendor/EigenRand/EigenRand/arch/SSE/PacketFilter.h +133 -0
  29. data/vendor/EigenRand/EigenRand/arch/SSE/RandUtils.h +120 -0
  30. data/vendor/EigenRand/EigenRand/doc.h +24 -12
  31. data/vendor/EigenRand/README.md +57 -4
  32. data/vendor/eigen/COPYING.APACHE +203 -0
  33. data/vendor/eigen/COPYING.BSD +1 -1
  34. data/vendor/eigen/COPYING.MINPACK +51 -52
  35. data/vendor/eigen/Eigen/Cholesky +0 -1
  36. data/vendor/eigen/Eigen/Core +112 -265
  37. data/vendor/eigen/Eigen/Eigenvalues +2 -3
  38. data/vendor/eigen/Eigen/Geometry +5 -8
  39. data/vendor/eigen/Eigen/Householder +0 -1
  40. data/vendor/eigen/Eigen/Jacobi +0 -1
  41. data/vendor/eigen/Eigen/KLUSupport +41 -0
  42. data/vendor/eigen/Eigen/LU +2 -5
  43. data/vendor/eigen/Eigen/OrderingMethods +0 -3
  44. data/vendor/eigen/Eigen/PaStiXSupport +1 -0
  45. data/vendor/eigen/Eigen/PardisoSupport +0 -0
  46. data/vendor/eigen/Eigen/QR +2 -3
  47. data/vendor/eigen/Eigen/QtAlignedMalloc +0 -1
  48. data/vendor/eigen/Eigen/SVD +0 -1
  49. data/vendor/eigen/Eigen/Sparse +0 -2
  50. data/vendor/eigen/Eigen/SparseCholesky +0 -8
  51. data/vendor/eigen/Eigen/SparseLU +4 -0
  52. data/vendor/eigen/Eigen/SparseQR +0 -1
  53. data/vendor/eigen/Eigen/src/Cholesky/LDLT.h +42 -27
  54. data/vendor/eigen/Eigen/src/Cholesky/LLT.h +39 -23
  55. data/vendor/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +90 -47
  56. data/vendor/eigen/Eigen/src/Core/ArithmeticSequence.h +413 -0
  57. data/vendor/eigen/Eigen/src/Core/Array.h +99 -11
  58. data/vendor/eigen/Eigen/src/Core/ArrayBase.h +3 -3
  59. data/vendor/eigen/Eigen/src/Core/ArrayWrapper.h +21 -21
  60. data/vendor/eigen/Eigen/src/Core/Assign.h +1 -1
  61. data/vendor/eigen/Eigen/src/Core/AssignEvaluator.h +125 -50
  62. data/vendor/eigen/Eigen/src/Core/Assign_MKL.h +10 -10
  63. data/vendor/eigen/Eigen/src/Core/BandMatrix.h +16 -16
  64. data/vendor/eigen/Eigen/src/Core/Block.h +56 -60
  65. data/vendor/eigen/Eigen/src/Core/BooleanRedux.h +29 -31
  66. data/vendor/eigen/Eigen/src/Core/CommaInitializer.h +7 -3
  67. data/vendor/eigen/Eigen/src/Core/CoreEvaluators.h +325 -272
  68. data/vendor/eigen/Eigen/src/Core/CoreIterators.h +5 -0
  69. data/vendor/eigen/Eigen/src/Core/CwiseBinaryOp.h +21 -22
  70. data/vendor/eigen/Eigen/src/Core/CwiseNullaryOp.h +153 -18
  71. data/vendor/eigen/Eigen/src/Core/CwiseUnaryOp.h +6 -6
  72. data/vendor/eigen/Eigen/src/Core/CwiseUnaryView.h +14 -10
  73. data/vendor/eigen/Eigen/src/Core/DenseBase.h +132 -42
  74. data/vendor/eigen/Eigen/src/Core/DenseCoeffsBase.h +25 -21
  75. data/vendor/eigen/Eigen/src/Core/DenseStorage.h +153 -71
  76. data/vendor/eigen/Eigen/src/Core/Diagonal.h +21 -23
  77. data/vendor/eigen/Eigen/src/Core/DiagonalMatrix.h +50 -2
  78. data/vendor/eigen/Eigen/src/Core/DiagonalProduct.h +1 -1
  79. data/vendor/eigen/Eigen/src/Core/Dot.h +10 -10
  80. data/vendor/eigen/Eigen/src/Core/EigenBase.h +10 -9
  81. data/vendor/eigen/Eigen/src/Core/ForceAlignedAccess.h +8 -4
  82. data/vendor/eigen/Eigen/src/Core/Fuzzy.h +3 -3
  83. data/vendor/eigen/Eigen/src/Core/GeneralProduct.h +20 -10
  84. data/vendor/eigen/Eigen/src/Core/GenericPacketMath.h +599 -152
  85. data/vendor/eigen/Eigen/src/Core/GlobalFunctions.h +40 -33
  86. data/vendor/eigen/Eigen/src/Core/IO.h +40 -7
  87. data/vendor/eigen/Eigen/src/Core/IndexedView.h +237 -0
  88. data/vendor/eigen/Eigen/src/Core/Inverse.h +9 -10
  89. data/vendor/eigen/Eigen/src/Core/Map.h +7 -7
  90. data/vendor/eigen/Eigen/src/Core/MapBase.h +10 -3
  91. data/vendor/eigen/Eigen/src/Core/MathFunctions.h +767 -125
  92. data/vendor/eigen/Eigen/src/Core/MathFunctionsImpl.h +118 -19
  93. data/vendor/eigen/Eigen/src/Core/Matrix.h +131 -25
  94. data/vendor/eigen/Eigen/src/Core/MatrixBase.h +21 -3
  95. data/vendor/eigen/Eigen/src/Core/NestByValue.h +25 -50
  96. data/vendor/eigen/Eigen/src/Core/NoAlias.h +4 -3
  97. data/vendor/eigen/Eigen/src/Core/NumTraits.h +107 -20
  98. data/vendor/eigen/Eigen/src/Core/PartialReduxEvaluator.h +232 -0
  99. data/vendor/eigen/Eigen/src/Core/PermutationMatrix.h +3 -31
  100. data/vendor/eigen/Eigen/src/Core/PlainObjectBase.h +152 -59
  101. data/vendor/eigen/Eigen/src/Core/Product.h +30 -25
  102. data/vendor/eigen/Eigen/src/Core/ProductEvaluators.h +192 -125
  103. data/vendor/eigen/Eigen/src/Core/Random.h +37 -1
  104. data/vendor/eigen/Eigen/src/Core/Redux.h +180 -170
  105. data/vendor/eigen/Eigen/src/Core/Ref.h +121 -23
  106. data/vendor/eigen/Eigen/src/Core/Replicate.h +8 -8
  107. data/vendor/eigen/Eigen/src/Core/Reshaped.h +454 -0
  108. data/vendor/eigen/Eigen/src/Core/ReturnByValue.h +7 -5
  109. data/vendor/eigen/Eigen/src/Core/Reverse.h +18 -12
  110. data/vendor/eigen/Eigen/src/Core/Select.h +8 -6
  111. data/vendor/eigen/Eigen/src/Core/SelfAdjointView.h +33 -20
  112. data/vendor/eigen/Eigen/src/Core/Solve.h +14 -14
  113. data/vendor/eigen/Eigen/src/Core/SolveTriangular.h +16 -16
  114. data/vendor/eigen/Eigen/src/Core/SolverBase.h +41 -3
  115. data/vendor/eigen/Eigen/src/Core/StableNorm.h +100 -70
  116. data/vendor/eigen/Eigen/src/Core/StlIterators.h +463 -0
  117. data/vendor/eigen/Eigen/src/Core/Stride.h +9 -4
  118. data/vendor/eigen/Eigen/src/Core/Swap.h +5 -4
  119. data/vendor/eigen/Eigen/src/Core/Transpose.h +88 -27
  120. data/vendor/eigen/Eigen/src/Core/Transpositions.h +26 -47
  121. data/vendor/eigen/Eigen/src/Core/TriangularMatrix.h +93 -75
  122. data/vendor/eigen/Eigen/src/Core/VectorBlock.h +5 -5
  123. data/vendor/eigen/Eigen/src/Core/VectorwiseOp.h +159 -70
  124. data/vendor/eigen/Eigen/src/Core/Visitor.h +137 -29
  125. data/vendor/eigen/Eigen/src/Core/arch/AVX/Complex.h +50 -129
  126. data/vendor/eigen/Eigen/src/Core/arch/AVX/MathFunctions.h +126 -337
  127. data/vendor/eigen/Eigen/src/Core/arch/AVX/PacketMath.h +1092 -155
  128. data/vendor/eigen/Eigen/src/Core/arch/AVX/TypeCasting.h +65 -1
  129. data/vendor/eigen/Eigen/src/Core/arch/AVX512/Complex.h +422 -0
  130. data/vendor/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h +207 -236
  131. data/vendor/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h +1482 -495
  132. data/vendor/eigen/Eigen/src/Core/arch/AVX512/TypeCasting.h +89 -0
  133. data/vendor/eigen/Eigen/src/Core/arch/AltiVec/Complex.h +152 -165
  134. data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MathFunctions.h +19 -251
  135. data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MatrixProduct.h +2937 -0
  136. data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h +221 -0
  137. data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h +629 -0
  138. data/vendor/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h +2042 -392
  139. data/vendor/eigen/Eigen/src/Core/arch/CUDA/Complex.h +235 -80
  140. data/vendor/eigen/Eigen/src/Core/arch/Default/BFloat16.h +700 -0
  141. data/vendor/eigen/Eigen/src/Core/arch/Default/ConjHelper.h +102 -14
  142. data/vendor/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +1649 -0
  143. data/vendor/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +110 -0
  144. data/vendor/eigen/Eigen/src/Core/arch/Default/Half.h +942 -0
  145. data/vendor/eigen/Eigen/src/Core/arch/Default/Settings.h +1 -1
  146. data/vendor/eigen/Eigen/src/Core/arch/Default/TypeCasting.h +120 -0
  147. data/vendor/eigen/Eigen/src/Core/arch/{CUDA → GPU}/MathFunctions.h +16 -4
  148. data/vendor/eigen/Eigen/src/Core/arch/GPU/PacketMath.h +1685 -0
  149. data/vendor/eigen/Eigen/src/Core/arch/GPU/TypeCasting.h +80 -0
  150. data/vendor/eigen/Eigen/src/Core/arch/HIP/hcc/math_constants.h +23 -0
  151. data/vendor/eigen/Eigen/src/Core/arch/MSA/Complex.h +648 -0
  152. data/vendor/eigen/Eigen/src/Core/arch/MSA/MathFunctions.h +387 -0
  153. data/vendor/eigen/Eigen/src/Core/arch/MSA/PacketMath.h +1233 -0
  154. data/vendor/eigen/Eigen/src/Core/arch/NEON/Complex.h +313 -219
  155. data/vendor/eigen/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h +183 -0
  156. data/vendor/eigen/Eigen/src/Core/arch/NEON/MathFunctions.h +54 -70
  157. data/vendor/eigen/Eigen/src/Core/arch/NEON/PacketMath.h +4376 -549
  158. data/vendor/eigen/Eigen/src/Core/arch/NEON/TypeCasting.h +1419 -0
  159. data/vendor/eigen/Eigen/src/Core/arch/SSE/Complex.h +59 -179
  160. data/vendor/eigen/Eigen/src/Core/arch/SSE/MathFunctions.h +65 -428
  161. data/vendor/eigen/Eigen/src/Core/arch/SSE/PacketMath.h +893 -283
  162. data/vendor/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h +65 -0
  163. data/vendor/eigen/Eigen/src/Core/arch/SVE/MathFunctions.h +44 -0
  164. data/vendor/eigen/Eigen/src/Core/arch/SVE/PacketMath.h +752 -0
  165. data/vendor/eigen/Eigen/src/Core/arch/SVE/TypeCasting.h +49 -0
  166. data/vendor/eigen/Eigen/src/Core/arch/SYCL/InteropHeaders.h +232 -0
  167. data/vendor/eigen/Eigen/src/Core/arch/SYCL/MathFunctions.h +301 -0
  168. data/vendor/eigen/Eigen/src/Core/arch/SYCL/PacketMath.h +670 -0
  169. data/vendor/eigen/Eigen/src/Core/arch/SYCL/SyclMemoryModel.h +694 -0
  170. data/vendor/eigen/Eigen/src/Core/arch/SYCL/TypeCasting.h +85 -0
  171. data/vendor/eigen/Eigen/src/Core/arch/ZVector/Complex.h +212 -183
  172. data/vendor/eigen/Eigen/src/Core/arch/ZVector/MathFunctions.h +101 -5
  173. data/vendor/eigen/Eigen/src/Core/arch/ZVector/PacketMath.h +510 -395
  174. data/vendor/eigen/Eigen/src/Core/functors/AssignmentFunctors.h +11 -2
  175. data/vendor/eigen/Eigen/src/Core/functors/BinaryFunctors.h +112 -46
  176. data/vendor/eigen/Eigen/src/Core/functors/NullaryFunctors.h +31 -30
  177. data/vendor/eigen/Eigen/src/Core/functors/StlFunctors.h +32 -2
  178. data/vendor/eigen/Eigen/src/Core/functors/UnaryFunctors.h +355 -16
  179. data/vendor/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h +1075 -586
  180. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h +49 -24
  181. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +41 -35
  182. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +6 -6
  183. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h +4 -2
  184. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixVector.h +382 -483
  185. data/vendor/eigen/Eigen/src/Core/products/Parallelizer.h +22 -5
  186. data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +53 -30
  187. data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h +16 -8
  188. data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h +8 -6
  189. data/vendor/eigen/Eigen/src/Core/products/SelfadjointProduct.h +4 -4
  190. data/vendor/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h +5 -4
  191. data/vendor/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h +33 -27
  192. data/vendor/eigen/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h +14 -12
  193. data/vendor/eigen/Eigen/src/Core/products/TriangularSolverMatrix.h +36 -34
  194. data/vendor/eigen/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h +8 -4
  195. data/vendor/eigen/Eigen/src/Core/products/TriangularSolverVector.h +13 -10
  196. data/vendor/eigen/Eigen/src/Core/util/BlasUtil.h +304 -119
  197. data/vendor/eigen/Eigen/src/Core/util/ConfigureVectorization.h +512 -0
  198. data/vendor/eigen/Eigen/src/Core/util/Constants.h +25 -9
  199. data/vendor/eigen/Eigen/src/Core/util/DisableStupidWarnings.h +26 -3
  200. data/vendor/eigen/Eigen/src/Core/util/ForwardDeclarations.h +29 -9
  201. data/vendor/eigen/Eigen/src/Core/util/IndexedViewHelper.h +186 -0
  202. data/vendor/eigen/Eigen/src/Core/util/IntegralConstant.h +272 -0
  203. data/vendor/eigen/Eigen/src/Core/util/MKL_support.h +8 -1
  204. data/vendor/eigen/Eigen/src/Core/util/Macros.h +709 -246
  205. data/vendor/eigen/Eigen/src/Core/util/Memory.h +222 -52
  206. data/vendor/eigen/Eigen/src/Core/util/Meta.h +355 -77
  207. data/vendor/eigen/Eigen/src/Core/util/ReenableStupidWarnings.h +5 -1
  208. data/vendor/eigen/Eigen/src/Core/util/ReshapedHelper.h +51 -0
  209. data/vendor/eigen/Eigen/src/Core/util/StaticAssert.h +8 -5
  210. data/vendor/eigen/Eigen/src/Core/util/SymbolicIndex.h +293 -0
  211. data/vendor/eigen/Eigen/src/Core/util/XprHelper.h +65 -30
  212. data/vendor/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +1 -1
  213. data/vendor/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +7 -4
  214. data/vendor/eigen/Eigen/src/Eigenvalues/EigenSolver.h +2 -2
  215. data/vendor/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +1 -1
  216. data/vendor/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +2 -2
  217. data/vendor/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +2 -2
  218. data/vendor/eigen/Eigen/src/Eigenvalues/RealQZ.h +9 -6
  219. data/vendor/eigen/Eigen/src/Eigenvalues/RealSchur.h +21 -9
  220. data/vendor/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +77 -43
  221. data/vendor/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +20 -15
  222. data/vendor/eigen/Eigen/src/Geometry/AlignedBox.h +99 -5
  223. data/vendor/eigen/Eigen/src/Geometry/AngleAxis.h +4 -4
  224. data/vendor/eigen/Eigen/src/Geometry/EulerAngles.h +3 -3
  225. data/vendor/eigen/Eigen/src/Geometry/Homogeneous.h +15 -11
  226. data/vendor/eigen/Eigen/src/Geometry/Hyperplane.h +1 -1
  227. data/vendor/eigen/Eigen/src/Geometry/OrthoMethods.h +3 -2
  228. data/vendor/eigen/Eigen/src/Geometry/ParametrizedLine.h +39 -2
  229. data/vendor/eigen/Eigen/src/Geometry/Quaternion.h +70 -14
  230. data/vendor/eigen/Eigen/src/Geometry/Rotation2D.h +3 -3
  231. data/vendor/eigen/Eigen/src/Geometry/Scaling.h +23 -5
  232. data/vendor/eigen/Eigen/src/Geometry/Transform.h +88 -67
  233. data/vendor/eigen/Eigen/src/Geometry/Translation.h +6 -12
  234. data/vendor/eigen/Eigen/src/Geometry/Umeyama.h +1 -1
  235. data/vendor/eigen/Eigen/src/Geometry/arch/Geometry_SIMD.h +168 -0
  236. data/vendor/eigen/Eigen/src/Householder/BlockHouseholder.h +9 -2
  237. data/vendor/eigen/Eigen/src/Householder/Householder.h +8 -4
  238. data/vendor/eigen/Eigen/src/Householder/HouseholderSequence.h +123 -48
  239. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +15 -15
  240. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +7 -23
  241. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +5 -22
  242. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +41 -47
  243. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +51 -60
  244. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +70 -20
  245. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +2 -20
  246. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +11 -9
  247. data/vendor/eigen/Eigen/src/Jacobi/Jacobi.h +31 -10
  248. data/vendor/eigen/Eigen/src/KLUSupport/KLUSupport.h +358 -0
  249. data/vendor/eigen/Eigen/src/LU/Determinant.h +35 -19
  250. data/vendor/eigen/Eigen/src/LU/FullPivLU.h +29 -43
  251. data/vendor/eigen/Eigen/src/LU/InverseImpl.h +25 -8
  252. data/vendor/eigen/Eigen/src/LU/PartialPivLU.h +71 -58
  253. data/vendor/eigen/Eigen/src/LU/arch/InverseSize4.h +351 -0
  254. data/vendor/eigen/Eigen/src/OrderingMethods/Amd.h +7 -17
  255. data/vendor/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +297 -277
  256. data/vendor/eigen/Eigen/src/OrderingMethods/Ordering.h +6 -10
  257. data/vendor/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +1 -1
  258. data/vendor/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +9 -7
  259. data/vendor/eigen/Eigen/src/QR/ColPivHouseholderQR.h +41 -20
  260. data/vendor/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +100 -27
  261. data/vendor/eigen/Eigen/src/QR/FullPivHouseholderQR.h +59 -22
  262. data/vendor/eigen/Eigen/src/QR/HouseholderQR.h +48 -23
  263. data/vendor/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +25 -3
  264. data/vendor/eigen/Eigen/src/SVD/BDCSVD.h +183 -63
  265. data/vendor/eigen/Eigen/src/SVD/JacobiSVD.h +22 -14
  266. data/vendor/eigen/Eigen/src/SVD/SVDBase.h +83 -22
  267. data/vendor/eigen/Eigen/src/SVD/UpperBidiagonalization.h +3 -3
  268. data/vendor/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +17 -9
  269. data/vendor/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +12 -37
  270. data/vendor/eigen/Eigen/src/SparseCore/AmbiVector.h +3 -2
  271. data/vendor/eigen/Eigen/src/SparseCore/CompressedStorage.h +16 -0
  272. data/vendor/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +6 -6
  273. data/vendor/eigen/Eigen/src/SparseCore/SparseAssign.h +81 -27
  274. data/vendor/eigen/Eigen/src/SparseCore/SparseBlock.h +25 -57
  275. data/vendor/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +40 -11
  276. data/vendor/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +11 -15
  277. data/vendor/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +4 -2
  278. data/vendor/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +30 -8
  279. data/vendor/eigen/Eigen/src/SparseCore/SparseMatrix.h +126 -11
  280. data/vendor/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +5 -12
  281. data/vendor/eigen/Eigen/src/SparseCore/SparseProduct.h +13 -1
  282. data/vendor/eigen/Eigen/src/SparseCore/SparseRef.h +7 -7
  283. data/vendor/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +5 -2
  284. data/vendor/eigen/Eigen/src/SparseCore/SparseUtil.h +8 -0
  285. data/vendor/eigen/Eigen/src/SparseCore/SparseVector.h +1 -1
  286. data/vendor/eigen/Eigen/src/SparseCore/SparseView.h +1 -0
  287. data/vendor/eigen/Eigen/src/SparseLU/SparseLU.h +162 -12
  288. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +1 -1
  289. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +76 -2
  290. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +2 -2
  291. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +1 -1
  292. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +1 -1
  293. data/vendor/eigen/Eigen/src/SparseQR/SparseQR.h +19 -6
  294. data/vendor/eigen/Eigen/src/StlSupport/StdDeque.h +2 -12
  295. data/vendor/eigen/Eigen/src/StlSupport/StdList.h +2 -2
  296. data/vendor/eigen/Eigen/src/StlSupport/StdVector.h +2 -2
  297. data/vendor/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +6 -8
  298. data/vendor/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +175 -39
  299. data/vendor/eigen/Eigen/src/misc/lapacke.h +5 -4
  300. data/vendor/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +28 -2
  301. data/vendor/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +155 -11
  302. data/vendor/eigen/Eigen/src/plugins/BlockMethods.h +626 -242
  303. data/vendor/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.h +14 -0
  304. data/vendor/eigen/Eigen/src/plugins/IndexedViewMethods.h +262 -0
  305. data/vendor/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +4 -4
  306. data/vendor/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +10 -0
  307. data/vendor/eigen/Eigen/src/plugins/ReshapedMethods.h +149 -0
  308. data/vendor/eigen/README.md +2 -0
  309. data/vendor/eigen/bench/btl/README +1 -1
  310. data/vendor/eigen/bench/tensors/README +6 -7
  311. data/vendor/eigen/ci/README.md +56 -0
  312. data/vendor/eigen/demos/mix_eigen_and_c/README +1 -1
  313. data/vendor/eigen/unsupported/Eigen/CXX11/src/Tensor/README.md +213 -158
  314. data/vendor/eigen/unsupported/README.txt +1 -1
  315. data/vendor/tomotopy/README.kr.rst +21 -0
  316. data/vendor/tomotopy/README.rst +20 -0
  317. data/vendor/tomotopy/src/Labeling/FoRelevance.cpp +2 -2
  318. data/vendor/tomotopy/src/Labeling/Phraser.hpp +1 -1
  319. data/vendor/tomotopy/src/TopicModel/CTModel.hpp +2 -1
  320. data/vendor/tomotopy/src/TopicModel/DMRModel.hpp +2 -1
  321. data/vendor/tomotopy/src/TopicModel/DTModel.hpp +1 -1
  322. data/vendor/tomotopy/src/TopicModel/GDMRModel.hpp +2 -2
  323. data/vendor/tomotopy/src/TopicModel/HDP.h +1 -0
  324. data/vendor/tomotopy/src/TopicModel/HDPModel.hpp +53 -2
  325. data/vendor/tomotopy/src/TopicModel/HLDAModel.hpp +1 -1
  326. data/vendor/tomotopy/src/TopicModel/HPAModel.hpp +1 -0
  327. data/vendor/tomotopy/src/TopicModel/LDACVB0Model.hpp +2 -2
  328. data/vendor/tomotopy/src/TopicModel/LDAModel.hpp +16 -5
  329. data/vendor/tomotopy/src/TopicModel/LLDAModel.hpp +1 -0
  330. data/vendor/tomotopy/src/TopicModel/MGLDAModel.hpp +1 -0
  331. data/vendor/tomotopy/src/TopicModel/PLDAModel.hpp +1 -0
  332. data/vendor/tomotopy/src/TopicModel/PT.h +3 -1
  333. data/vendor/tomotopy/src/TopicModel/PTModel.hpp +31 -1
  334. data/vendor/tomotopy/src/TopicModel/SLDAModel.hpp +2 -2
  335. data/vendor/tomotopy/src/TopicModel/TopicModel.hpp +7 -5
  336. data/vendor/tomotopy/src/Utils/EigenAddonOps.hpp +36 -1
  337. data/vendor/tomotopy/src/Utils/exception.h +6 -0
  338. data/vendor/tomotopy/src/Utils/sample.hpp +14 -12
  339. data/vendor/tomotopy/src/Utils/sse_gamma.h +0 -3
  340. metadata +60 -14
  341. data/vendor/eigen/Eigen/CMakeLists.txt +0 -19
  342. data/vendor/eigen/Eigen/src/Core/arch/CUDA/Half.h +0 -674
  343. data/vendor/eigen/Eigen/src/Core/arch/CUDA/PacketMath.h +0 -333
  344. data/vendor/eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +0 -1124
  345. data/vendor/eigen/Eigen/src/Core/arch/CUDA/TypeCasting.h +0 -212
  346. data/vendor/eigen/Eigen/src/Geometry/arch/Geometry_SSE.h +0 -161
  347. data/vendor/eigen/Eigen/src/LU/arch/Inverse_SSE.h +0 -338
@@ -10,6 +10,10 @@
10
10
  #ifndef EIGEN_PARALLELIZER_H
11
11
  #define EIGEN_PARALLELIZER_H
12
12
 
13
+ #if EIGEN_HAS_CXX11_ATOMIC
14
+ #include <atomic>
15
+ #endif
16
+
13
17
  namespace Eigen {
14
18
 
15
19
  namespace internal {
@@ -17,7 +21,8 @@ namespace internal {
17
21
  /** \internal */
18
22
  inline void manage_multi_threading(Action action, int* v)
19
23
  {
20
- static EIGEN_UNUSED int m_maxThreads = -1;
24
+ static int m_maxThreads = -1;
25
+ EIGEN_UNUSED_VARIABLE(m_maxThreads)
21
26
 
22
27
  if(action==SetAction)
23
28
  {
@@ -75,8 +80,17 @@ template<typename Index> struct GemmParallelInfo
75
80
  {
76
81
  GemmParallelInfo() : sync(-1), users(0), lhs_start(0), lhs_length(0) {}
77
82
 
83
+ // volatile is not enough on all architectures (see bug 1572)
84
+ // to guarantee that when thread A says to thread B that it is
85
+ // done with packing a block, then all writes have been really
86
+ // carried out... C++11 memory model+atomic guarantees this.
87
+ #if EIGEN_HAS_CXX11_ATOMIC
88
+ std::atomic<Index> sync;
89
+ std::atomic<int> users;
90
+ #else
78
91
  Index volatile sync;
79
92
  int volatile users;
93
+ #endif
80
94
 
81
95
  Index lhs_start;
82
96
  Index lhs_length;
@@ -87,11 +101,14 @@ void parallelize_gemm(const Functor& func, Index rows, Index cols, Index depth,
87
101
  {
88
102
  // TODO when EIGEN_USE_BLAS is defined,
89
103
  // we should still enable OMP for other scalar types
90
- #if !(defined (EIGEN_HAS_OPENMP)) || defined (EIGEN_USE_BLAS)
104
+ // Without C++11, we have to disable GEMM's parallelization on
105
+ // non x86 architectures because there volatile is not enough for our purpose.
106
+ // See bug 1572.
107
+ #if (! defined(EIGEN_HAS_OPENMP)) || defined(EIGEN_USE_BLAS) || ((!EIGEN_HAS_CXX11_ATOMIC) && !(EIGEN_ARCH_i386_OR_x86_64))
91
108
  // FIXME the transpose variable is only needed to properly split
92
109
  // the matrix product when multithreading is enabled. This is a temporary
93
110
  // fix to support row-major destination matrices. This whole
94
- // parallelizer mechanism has to be redisigned anyway.
111
+ // parallelizer mechanism has to be redesigned anyway.
95
112
  EIGEN_UNUSED_VARIABLE(depth);
96
113
  EIGEN_UNUSED_VARIABLE(transpose);
97
114
  func(0,rows, 0,cols);
@@ -112,12 +129,12 @@ void parallelize_gemm(const Functor& func, Index rows, Index cols, Index depth,
112
129
  double work = static_cast<double>(rows) * static_cast<double>(cols) *
113
130
  static_cast<double>(depth);
114
131
  double kMinTaskSize = 50000; // FIXME improve this heuristic.
115
- pb_max_threads = std::max<Index>(1, std::min<Index>(pb_max_threads, work / kMinTaskSize));
132
+ pb_max_threads = std::max<Index>(1, std::min<Index>(pb_max_threads, static_cast<Index>( work / kMinTaskSize ) ));
116
133
 
117
134
  // compute the number of threads we are going to use
118
135
  Index threads = std::min<Index>(nbThreads(), pb_max_threads);
119
136
 
120
- // if multi-threading is explicitely disabled, not useful, or if we already are in a parallel session,
137
+ // if multi-threading is explicitly disabled, not useful, or if we already are in a parallel session,
121
138
  // then abort multi-threading
122
139
  // FIXME omp_get_num_threads()>1 only works for openmp, what if the user does not use openmp?
123
140
  if((!Condition) || (threads==1) || (omp_get_num_threads()>1))
@@ -45,14 +45,23 @@ struct symm_pack_lhs
45
45
  }
46
46
  void operator()(Scalar* blockA, const Scalar* _lhs, Index lhsStride, Index cols, Index rows)
47
47
  {
48
- enum { PacketSize = packet_traits<Scalar>::size };
48
+ typedef typename unpacket_traits<typename packet_traits<Scalar>::type>::half HalfPacket;
49
+ typedef typename unpacket_traits<typename unpacket_traits<typename packet_traits<Scalar>::type>::half>::half QuarterPacket;
50
+ enum { PacketSize = packet_traits<Scalar>::size,
51
+ HalfPacketSize = unpacket_traits<HalfPacket>::size,
52
+ QuarterPacketSize = unpacket_traits<QuarterPacket>::size,
53
+ HasHalf = (int)HalfPacketSize < (int)PacketSize,
54
+ HasQuarter = (int)QuarterPacketSize < (int)HalfPacketSize};
55
+
49
56
  const_blas_data_mapper<Scalar,Index,StorageOrder> lhs(_lhs,lhsStride);
50
57
  Index count = 0;
51
58
  //Index peeled_mc3 = (rows/Pack1)*Pack1;
52
59
 
53
60
  const Index peeled_mc3 = Pack1>=3*PacketSize ? (rows/(3*PacketSize))*(3*PacketSize) : 0;
54
61
  const Index peeled_mc2 = Pack1>=2*PacketSize ? peeled_mc3+((rows-peeled_mc3)/(2*PacketSize))*(2*PacketSize) : 0;
55
- const Index peeled_mc1 = Pack1>=1*PacketSize ? (rows/(1*PacketSize))*(1*PacketSize) : 0;
62
+ const Index peeled_mc1 = Pack1>=1*PacketSize ? peeled_mc2+((rows-peeled_mc2)/(1*PacketSize))*(1*PacketSize) : 0;
63
+ const Index peeled_mc_half = Pack1>=HalfPacketSize ? peeled_mc1+((rows-peeled_mc1)/(HalfPacketSize))*(HalfPacketSize) : 0;
64
+ const Index peeled_mc_quarter = Pack1>=QuarterPacketSize ? peeled_mc_half+((rows-peeled_mc_half)/(QuarterPacketSize))*(QuarterPacketSize) : 0;
56
65
 
57
66
  if(Pack1>=3*PacketSize)
58
67
  for(Index i=0; i<peeled_mc3; i+=3*PacketSize)
@@ -66,8 +75,16 @@ struct symm_pack_lhs
66
75
  for(Index i=peeled_mc2; i<peeled_mc1; i+=1*PacketSize)
67
76
  pack<1*PacketSize>(blockA, lhs, cols, i, count);
68
77
 
78
+ if(HasHalf && Pack1>=HalfPacketSize)
79
+ for(Index i=peeled_mc1; i<peeled_mc_half; i+=HalfPacketSize)
80
+ pack<HalfPacketSize>(blockA, lhs, cols, i, count);
81
+
82
+ if(HasQuarter && Pack1>=QuarterPacketSize)
83
+ for(Index i=peeled_mc_half; i<peeled_mc_quarter; i+=QuarterPacketSize)
84
+ pack<QuarterPacketSize>(blockA, lhs, cols, i, count);
85
+
69
86
  // do the same with mr==1
70
- for(Index i=peeled_mc1; i<rows; i++)
87
+ for(Index i=peeled_mc_quarter; i<rows; i++)
71
88
  {
72
89
  for(Index k=0; k<i; k++)
73
90
  blockA[count++] = lhs(i, k); // normal
@@ -277,20 +294,21 @@ struct symm_pack_rhs
277
294
  template <typename Scalar, typename Index,
278
295
  int LhsStorageOrder, bool LhsSelfAdjoint, bool ConjugateLhs,
279
296
  int RhsStorageOrder, bool RhsSelfAdjoint, bool ConjugateRhs,
280
- int ResStorageOrder>
297
+ int ResStorageOrder, int ResInnerStride>
281
298
  struct product_selfadjoint_matrix;
282
299
 
283
300
  template <typename Scalar, typename Index,
284
301
  int LhsStorageOrder, bool LhsSelfAdjoint, bool ConjugateLhs,
285
- int RhsStorageOrder, bool RhsSelfAdjoint, bool ConjugateRhs>
286
- struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,LhsSelfAdjoint,ConjugateLhs, RhsStorageOrder,RhsSelfAdjoint,ConjugateRhs,RowMajor>
302
+ int RhsStorageOrder, bool RhsSelfAdjoint, bool ConjugateRhs,
303
+ int ResInnerStride>
304
+ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,LhsSelfAdjoint,ConjugateLhs, RhsStorageOrder,RhsSelfAdjoint,ConjugateRhs,RowMajor,ResInnerStride>
287
305
  {
288
306
 
289
307
  static EIGEN_STRONG_INLINE void run(
290
308
  Index rows, Index cols,
291
309
  const Scalar* lhs, Index lhsStride,
292
310
  const Scalar* rhs, Index rhsStride,
293
- Scalar* res, Index resStride,
311
+ Scalar* res, Index resIncr, Index resStride,
294
312
  const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)
295
313
  {
296
314
  product_selfadjoint_matrix<Scalar, Index,
@@ -298,33 +316,35 @@ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,LhsSelfAdjoint,Co
298
316
  RhsSelfAdjoint, NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(RhsSelfAdjoint,ConjugateRhs),
299
317
  EIGEN_LOGICAL_XOR(LhsSelfAdjoint,LhsStorageOrder==RowMajor) ? ColMajor : RowMajor,
300
318
  LhsSelfAdjoint, NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(LhsSelfAdjoint,ConjugateLhs),
301
- ColMajor>
302
- ::run(cols, rows, rhs, rhsStride, lhs, lhsStride, res, resStride, alpha, blocking);
319
+ ColMajor,ResInnerStride>
320
+ ::run(cols, rows, rhs, rhsStride, lhs, lhsStride, res, resIncr, resStride, alpha, blocking);
303
321
  }
304
322
  };
305
323
 
306
324
  template <typename Scalar, typename Index,
307
325
  int LhsStorageOrder, bool ConjugateLhs,
308
- int RhsStorageOrder, bool ConjugateRhs>
309
- struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs, RhsStorageOrder,false,ConjugateRhs,ColMajor>
326
+ int RhsStorageOrder, bool ConjugateRhs,
327
+ int ResInnerStride>
328
+ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs, RhsStorageOrder,false,ConjugateRhs,ColMajor,ResInnerStride>
310
329
  {
311
330
 
312
331
  static EIGEN_DONT_INLINE void run(
313
332
  Index rows, Index cols,
314
333
  const Scalar* _lhs, Index lhsStride,
315
334
  const Scalar* _rhs, Index rhsStride,
316
- Scalar* res, Index resStride,
335
+ Scalar* res, Index resIncr, Index resStride,
317
336
  const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking);
318
337
  };
319
338
 
320
339
  template <typename Scalar, typename Index,
321
340
  int LhsStorageOrder, bool ConjugateLhs,
322
- int RhsStorageOrder, bool ConjugateRhs>
323
- EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs, RhsStorageOrder,false,ConjugateRhs,ColMajor>::run(
341
+ int RhsStorageOrder, bool ConjugateRhs,
342
+ int ResInnerStride>
343
+ EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs, RhsStorageOrder,false,ConjugateRhs,ColMajor,ResInnerStride>::run(
324
344
  Index rows, Index cols,
325
345
  const Scalar* _lhs, Index lhsStride,
326
346
  const Scalar* _rhs, Index rhsStride,
327
- Scalar* _res, Index resStride,
347
+ Scalar* _res, Index resIncr, Index resStride,
328
348
  const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)
329
349
  {
330
350
  Index size = rows;
@@ -334,11 +354,11 @@ EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,t
334
354
  typedef const_blas_data_mapper<Scalar, Index, LhsStorageOrder> LhsMapper;
335
355
  typedef const_blas_data_mapper<Scalar, Index, (LhsStorageOrder == RowMajor) ? ColMajor : RowMajor> LhsTransposeMapper;
336
356
  typedef const_blas_data_mapper<Scalar, Index, RhsStorageOrder> RhsMapper;
337
- typedef blas_data_mapper<typename Traits::ResScalar, Index, ColMajor> ResMapper;
357
+ typedef blas_data_mapper<typename Traits::ResScalar, Index, ColMajor, Unaligned, ResInnerStride> ResMapper;
338
358
  LhsMapper lhs(_lhs,lhsStride);
339
359
  LhsTransposeMapper lhs_transpose(_lhs,lhsStride);
340
360
  RhsMapper rhs(_rhs,rhsStride);
341
- ResMapper res(_res, resStride);
361
+ ResMapper res(_res, resStride, resIncr);
342
362
 
343
363
  Index kc = blocking.kc(); // cache block size along the K direction
344
364
  Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction
@@ -352,7 +372,7 @@ EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,t
352
372
  gebp_kernel<Scalar, Scalar, Index, ResMapper, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
353
373
  symm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
354
374
  gemm_pack_rhs<Scalar, Index, RhsMapper, Traits::nr,RhsStorageOrder> pack_rhs;
355
- gemm_pack_lhs<Scalar, Index, LhsTransposeMapper, Traits::mr, Traits::LhsProgress, LhsStorageOrder==RowMajor?ColMajor:RowMajor, true> pack_lhs_transposed;
375
+ gemm_pack_lhs<Scalar, Index, LhsTransposeMapper, Traits::mr, Traits::LhsProgress, typename Traits::LhsPacket4Packing, LhsStorageOrder==RowMajor?ColMajor:RowMajor, true> pack_lhs_transposed;
356
376
 
357
377
  for(Index k2=0; k2<size; k2+=kc)
358
378
  {
@@ -387,7 +407,7 @@ EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,t
387
407
  for(Index i2=k2+kc; i2<size; i2+=mc)
388
408
  {
389
409
  const Index actual_mc = (std::min)(i2+mc,size)-i2;
390
- gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, LhsStorageOrder,false>()
410
+ gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, typename Traits::LhsPacket4Packing, LhsStorageOrder,false>()
391
411
  (blockA, lhs.getSubMapper(i2, k2), actual_kc, actual_mc);
392
412
 
393
413
  gebp_kernel(res.getSubMapper(i2, 0), blockA, blockB, actual_mc, actual_kc, cols, alpha);
@@ -398,26 +418,28 @@ EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,t
398
418
  // matrix * selfadjoint product
399
419
  template <typename Scalar, typename Index,
400
420
  int LhsStorageOrder, bool ConjugateLhs,
401
- int RhsStorageOrder, bool ConjugateRhs>
402
- struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,ConjugateLhs, RhsStorageOrder,true,ConjugateRhs,ColMajor>
421
+ int RhsStorageOrder, bool ConjugateRhs,
422
+ int ResInnerStride>
423
+ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,ConjugateLhs, RhsStorageOrder,true,ConjugateRhs,ColMajor,ResInnerStride>
403
424
  {
404
425
 
405
426
  static EIGEN_DONT_INLINE void run(
406
427
  Index rows, Index cols,
407
428
  const Scalar* _lhs, Index lhsStride,
408
429
  const Scalar* _rhs, Index rhsStride,
409
- Scalar* res, Index resStride,
430
+ Scalar* res, Index resIncr, Index resStride,
410
431
  const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking);
411
432
  };
412
433
 
413
434
  template <typename Scalar, typename Index,
414
435
  int LhsStorageOrder, bool ConjugateLhs,
415
- int RhsStorageOrder, bool ConjugateRhs>
416
- EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,ConjugateLhs, RhsStorageOrder,true,ConjugateRhs,ColMajor>::run(
436
+ int RhsStorageOrder, bool ConjugateRhs,
437
+ int ResInnerStride>
438
+ EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,ConjugateLhs, RhsStorageOrder,true,ConjugateRhs,ColMajor,ResInnerStride>::run(
417
439
  Index rows, Index cols,
418
440
  const Scalar* _lhs, Index lhsStride,
419
441
  const Scalar* _rhs, Index rhsStride,
420
- Scalar* _res, Index resStride,
442
+ Scalar* _res, Index resIncr, Index resStride,
421
443
  const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)
422
444
  {
423
445
  Index size = cols;
@@ -425,9 +447,9 @@ EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,f
425
447
  typedef gebp_traits<Scalar,Scalar> Traits;
426
448
 
427
449
  typedef const_blas_data_mapper<Scalar, Index, LhsStorageOrder> LhsMapper;
428
- typedef blas_data_mapper<typename Traits::ResScalar, Index, ColMajor> ResMapper;
450
+ typedef blas_data_mapper<typename Traits::ResScalar, Index, ColMajor, Unaligned, ResInnerStride> ResMapper;
429
451
  LhsMapper lhs(_lhs,lhsStride);
430
- ResMapper res(_res,resStride);
452
+ ResMapper res(_res,resStride, resIncr);
431
453
 
432
454
  Index kc = blocking.kc(); // cache block size along the K direction
433
455
  Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction
@@ -437,7 +459,7 @@ EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,f
437
459
  ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
438
460
 
439
461
  gebp_kernel<Scalar, Scalar, Index, ResMapper, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
440
- gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
462
+ gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, typename Traits::LhsPacket4Packing, LhsStorageOrder> pack_lhs;
441
463
  symm_pack_rhs<Scalar, Index, Traits::nr,RhsStorageOrder> pack_rhs;
442
464
 
443
465
  for(Index k2=0; k2<size; k2+=kc)
@@ -503,12 +525,13 @@ struct selfadjoint_product_impl<Lhs,LhsMode,false,Rhs,RhsMode,false>
503
525
  NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(LhsIsUpper,bool(LhsBlasTraits::NeedToConjugate)),
504
526
  EIGEN_LOGICAL_XOR(RhsIsUpper,internal::traits<Rhs>::Flags &RowMajorBit) ? RowMajor : ColMajor, RhsIsSelfAdjoint,
505
527
  NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(RhsIsUpper,bool(RhsBlasTraits::NeedToConjugate)),
506
- internal::traits<Dest>::Flags&RowMajorBit ? RowMajor : ColMajor>
528
+ internal::traits<Dest>::Flags&RowMajorBit ? RowMajor : ColMajor,
529
+ Dest::InnerStrideAtCompileTime>
507
530
  ::run(
508
531
  lhs.rows(), rhs.cols(), // sizes
509
532
  &lhs.coeffRef(0,0), lhs.outerStride(), // lhs info
510
533
  &rhs.coeffRef(0,0), rhs.outerStride(), // rhs info
511
- &dst.coeffRef(0,0), dst.outerStride(), // result info
534
+ &dst.coeffRef(0,0), dst.innerStride(), dst.outerStride(), // result info
512
535
  actualAlpha, blocking // alpha
513
536
  );
514
537
  }
@@ -44,16 +44,18 @@ namespace internal {
44
44
  template <typename Index, \
45
45
  int LhsStorageOrder, bool ConjugateLhs, \
46
46
  int RhsStorageOrder, bool ConjugateRhs> \
47
- struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,true,ConjugateLhs,RhsStorageOrder,false,ConjugateRhs,ColMajor> \
47
+ struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,true,ConjugateLhs,RhsStorageOrder,false,ConjugateRhs,ColMajor,1> \
48
48
  {\
49
49
  \
50
50
  static void run( \
51
51
  Index rows, Index cols, \
52
52
  const EIGTYPE* _lhs, Index lhsStride, \
53
53
  const EIGTYPE* _rhs, Index rhsStride, \
54
- EIGTYPE* res, Index resStride, \
54
+ EIGTYPE* res, Index resIncr, Index resStride, \
55
55
  EIGTYPE alpha, level3_blocking<EIGTYPE, EIGTYPE>& /*blocking*/) \
56
56
  { \
57
+ EIGEN_ONLY_USED_FOR_DEBUG(resIncr); \
58
+ eigen_assert(resIncr == 1); \
57
59
  char side='L', uplo='L'; \
58
60
  BlasIndex m, n, lda, ldb, ldc; \
59
61
  const EIGTYPE *a, *b; \
@@ -91,15 +93,17 @@ struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,true,ConjugateLh
91
93
  template <typename Index, \
92
94
  int LhsStorageOrder, bool ConjugateLhs, \
93
95
  int RhsStorageOrder, bool ConjugateRhs> \
94
- struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,true,ConjugateLhs,RhsStorageOrder,false,ConjugateRhs,ColMajor> \
96
+ struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,true,ConjugateLhs,RhsStorageOrder,false,ConjugateRhs,ColMajor,1> \
95
97
  {\
96
98
  static void run( \
97
99
  Index rows, Index cols, \
98
100
  const EIGTYPE* _lhs, Index lhsStride, \
99
101
  const EIGTYPE* _rhs, Index rhsStride, \
100
- EIGTYPE* res, Index resStride, \
102
+ EIGTYPE* res, Index resIncr, Index resStride, \
101
103
  EIGTYPE alpha, level3_blocking<EIGTYPE, EIGTYPE>& /*blocking*/) \
102
104
  { \
105
+ EIGEN_ONLY_USED_FOR_DEBUG(resIncr); \
106
+ eigen_assert(resIncr == 1); \
103
107
  char side='L', uplo='L'; \
104
108
  BlasIndex m, n, lda, ldb, ldc; \
105
109
  const EIGTYPE *a, *b; \
@@ -167,16 +171,18 @@ EIGEN_BLAS_HEMM_L(scomplex, float, cf, chemm_)
167
171
  template <typename Index, \
168
172
  int LhsStorageOrder, bool ConjugateLhs, \
169
173
  int RhsStorageOrder, bool ConjugateRhs> \
170
- struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,false,ConjugateLhs,RhsStorageOrder,true,ConjugateRhs,ColMajor> \
174
+ struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,false,ConjugateLhs,RhsStorageOrder,true,ConjugateRhs,ColMajor,1> \
171
175
  {\
172
176
  \
173
177
  static void run( \
174
178
  Index rows, Index cols, \
175
179
  const EIGTYPE* _lhs, Index lhsStride, \
176
180
  const EIGTYPE* _rhs, Index rhsStride, \
177
- EIGTYPE* res, Index resStride, \
181
+ EIGTYPE* res, Index resIncr, Index resStride, \
178
182
  EIGTYPE alpha, level3_blocking<EIGTYPE, EIGTYPE>& /*blocking*/) \
179
183
  { \
184
+ EIGEN_ONLY_USED_FOR_DEBUG(resIncr); \
185
+ eigen_assert(resIncr == 1); \
180
186
  char side='R', uplo='L'; \
181
187
  BlasIndex m, n, lda, ldb, ldc; \
182
188
  const EIGTYPE *a, *b; \
@@ -213,15 +219,17 @@ struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,false,ConjugateL
213
219
  template <typename Index, \
214
220
  int LhsStorageOrder, bool ConjugateLhs, \
215
221
  int RhsStorageOrder, bool ConjugateRhs> \
216
- struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,false,ConjugateLhs,RhsStorageOrder,true,ConjugateRhs,ColMajor> \
222
+ struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,false,ConjugateLhs,RhsStorageOrder,true,ConjugateRhs,ColMajor,1> \
217
223
  {\
218
224
  static void run( \
219
225
  Index rows, Index cols, \
220
226
  const EIGTYPE* _lhs, Index lhsStride, \
221
227
  const EIGTYPE* _rhs, Index rhsStride, \
222
- EIGTYPE* res, Index resStride, \
228
+ EIGTYPE* res, Index resIncr, Index resStride, \
223
229
  EIGTYPE alpha, level3_blocking<EIGTYPE, EIGTYPE>& /*blocking*/) \
224
230
  { \
231
+ EIGEN_ONLY_USED_FOR_DEBUG(resIncr); \
232
+ eigen_assert(resIncr == 1); \
225
233
  char side='R', uplo='L'; \
226
234
  BlasIndex m, n, lda, ldb, ldc; \
227
235
  const EIGTYPE *a, *b; \
@@ -15,7 +15,7 @@ namespace Eigen {
15
15
  namespace internal {
16
16
 
17
17
  /* Optimized selfadjoint matrix * vector product:
18
- * This algorithm processes 2 columns at onces that allows to both reduce
18
+ * This algorithm processes 2 columns at once that allows to both reduce
19
19
  * the number of load/stores of the result by a factor 2 and to reduce
20
20
  * the instruction dependency.
21
21
  */
@@ -27,7 +27,8 @@ template<typename Scalar, typename Index, int StorageOrder, int UpLo, bool Conju
27
27
  struct selfadjoint_matrix_vector_product
28
28
 
29
29
  {
30
- static EIGEN_DONT_INLINE void run(
30
+ static EIGEN_DONT_INLINE EIGEN_DEVICE_FUNC
31
+ void run(
31
32
  Index size,
32
33
  const Scalar* lhs, Index lhsStride,
33
34
  const Scalar* rhs,
@@ -36,7 +37,8 @@ static EIGEN_DONT_INLINE void run(
36
37
  };
37
38
 
38
39
  template<typename Scalar, typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs, int Version>
39
- EIGEN_DONT_INLINE void selfadjoint_matrix_vector_product<Scalar,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs,Version>::run(
40
+ EIGEN_DONT_INLINE EIGEN_DEVICE_FUNC
41
+ void selfadjoint_matrix_vector_product<Scalar,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs,Version>::run(
40
42
  Index size,
41
43
  const Scalar* lhs, Index lhsStride,
42
44
  const Scalar* rhs,
@@ -62,8 +64,7 @@ EIGEN_DONT_INLINE void selfadjoint_matrix_vector_product<Scalar,Index,StorageOrd
62
64
 
63
65
  Scalar cjAlpha = ConjugateRhs ? numext::conj(alpha) : alpha;
64
66
 
65
-
66
- Index bound = (std::max)(Index(0),size-8) & 0xfffffffe;
67
+ Index bound = numext::maxi(Index(0), size-8) & 0xfffffffe;
67
68
  if (FirstTriangular)
68
69
  bound = size - bound;
69
70
 
@@ -175,7 +176,8 @@ struct selfadjoint_product_impl<Lhs,LhsMode,false,Rhs,0,true>
175
176
  enum { LhsUpLo = LhsMode&(Upper|Lower) };
176
177
 
177
178
  template<typename Dest>
178
- static void run(Dest& dest, const Lhs &a_lhs, const Rhs &a_rhs, const Scalar& alpha)
179
+ static EIGEN_DEVICE_FUNC
180
+ void run(Dest& dest, const Lhs &a_lhs, const Rhs &a_rhs, const Scalar& alpha)
179
181
  {
180
182
  typedef typename Dest::Scalar ResScalar;
181
183
  typedef typename Rhs::Scalar RhsScalar;
@@ -109,10 +109,10 @@ struct selfadjoint_product_selector<MatrixType,OtherType,UpLo,false>
109
109
  internal::general_matrix_matrix_triangular_product<Index,
110
110
  Scalar, OtherIsRowMajor ? RowMajor : ColMajor, OtherBlasTraits::NeedToConjugate && NumTraits<Scalar>::IsComplex,
111
111
  Scalar, OtherIsRowMajor ? ColMajor : RowMajor, (!OtherBlasTraits::NeedToConjugate) && NumTraits<Scalar>::IsComplex,
112
- IsRowMajor ? RowMajor : ColMajor, UpLo>
112
+ IsRowMajor ? RowMajor : ColMajor, MatrixType::InnerStrideAtCompileTime, UpLo>
113
113
  ::run(size, depth,
114
- &actualOther.coeffRef(0,0), actualOther.outerStride(), &actualOther.coeffRef(0,0), actualOther.outerStride(),
115
- mat.data(), mat.outerStride(), actualAlpha, blocking);
114
+ actualOther.data(), actualOther.outerStride(), actualOther.data(), actualOther.outerStride(),
115
+ mat.data(), mat.innerStride(), mat.outerStride(), actualAlpha, blocking);
116
116
  }
117
117
  };
118
118
 
@@ -120,7 +120,7 @@ struct selfadjoint_product_selector<MatrixType,OtherType,UpLo,false>
120
120
 
121
121
  template<typename MatrixType, unsigned int UpLo>
122
122
  template<typename DerivedU>
123
- SelfAdjointView<MatrixType,UpLo>& SelfAdjointView<MatrixType,UpLo>
123
+ EIGEN_DEVICE_FUNC SelfAdjointView<MatrixType,UpLo>& SelfAdjointView<MatrixType,UpLo>
124
124
  ::rankUpdate(const MatrixBase<DerivedU>& u, const Scalar& alpha)
125
125
  {
126
126
  selfadjoint_product_selector<MatrixType,DerivedU,UpLo>::run(_expression().const_cast_derived(), u.derived(), alpha);
@@ -24,7 +24,8 @@ struct selfadjoint_rank2_update_selector;
24
24
  template<typename Scalar, typename Index, typename UType, typename VType>
25
25
  struct selfadjoint_rank2_update_selector<Scalar,Index,UType,VType,Lower>
26
26
  {
27
- static void run(Scalar* mat, Index stride, const UType& u, const VType& v, const Scalar& alpha)
27
+ static EIGEN_DEVICE_FUNC
28
+ void run(Scalar* mat, Index stride, const UType& u, const VType& v, const Scalar& alpha)
28
29
  {
29
30
  const Index size = u.size();
30
31
  for (Index i=0; i<size; ++i)
@@ -57,7 +58,7 @@ template<bool Cond, typename T> struct conj_expr_if
57
58
 
58
59
  template<typename MatrixType, unsigned int UpLo>
59
60
  template<typename DerivedU, typename DerivedV>
60
- SelfAdjointView<MatrixType,UpLo>& SelfAdjointView<MatrixType,UpLo>
61
+ EIGEN_DEVICE_FUNC SelfAdjointView<MatrixType,UpLo>& SelfAdjointView<MatrixType,UpLo>
61
62
  ::rankUpdate(const MatrixBase<DerivedU>& u, const MatrixBase<DerivedV>& v, const Scalar& alpha)
62
63
  {
63
64
  typedef internal::blas_traits<DerivedU> UBlasTraits;
@@ -79,8 +80,8 @@ SelfAdjointView<MatrixType,UpLo>& SelfAdjointView<MatrixType,UpLo>
79
80
  if (IsRowMajor)
80
81
  actualAlpha = numext::conj(actualAlpha);
81
82
 
82
- typedef typename internal::remove_all<typename internal::conj_expr_if<IsRowMajor ^ UBlasTraits::NeedToConjugate,_ActualUType>::type>::type UType;
83
- typedef typename internal::remove_all<typename internal::conj_expr_if<IsRowMajor ^ VBlasTraits::NeedToConjugate,_ActualVType>::type>::type VType;
83
+ typedef typename internal::remove_all<typename internal::conj_expr_if<int(IsRowMajor) ^ int(UBlasTraits::NeedToConjugate), _ActualUType>::type>::type UType;
84
+ typedef typename internal::remove_all<typename internal::conj_expr_if<int(IsRowMajor) ^ int(VBlasTraits::NeedToConjugate), _ActualVType>::type>::type VType;
84
85
  internal::selfadjoint_rank2_update_selector<Scalar, Index, UType, VType,
85
86
  (IsRowMajor ? int(UpLo==Upper ? Lower : Upper) : UpLo)>
86
87
  ::run(_expression().const_cast_derived().data(),_expression().outerStride(),UType(actualU),VType(actualV),actualAlpha);