tomoto 0.2.2 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (369) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +10 -0
  3. data/README.md +8 -10
  4. data/ext/tomoto/ct.cpp +11 -11
  5. data/ext/tomoto/dmr.cpp +14 -13
  6. data/ext/tomoto/dt.cpp +14 -14
  7. data/ext/tomoto/extconf.rb +7 -5
  8. data/ext/tomoto/gdmr.cpp +7 -7
  9. data/ext/tomoto/hdp.cpp +9 -9
  10. data/ext/tomoto/hlda.cpp +13 -13
  11. data/ext/tomoto/hpa.cpp +5 -5
  12. data/ext/tomoto/lda.cpp +42 -39
  13. data/ext/tomoto/llda.cpp +6 -6
  14. data/ext/tomoto/mglda.cpp +15 -15
  15. data/ext/tomoto/pa.cpp +6 -6
  16. data/ext/tomoto/plda.cpp +6 -6
  17. data/ext/tomoto/slda.cpp +8 -8
  18. data/ext/tomoto/{ext.cpp → tomoto.cpp} +8 -8
  19. data/ext/tomoto/utils.h +16 -70
  20. data/lib/tomoto/version.rb +1 -1
  21. data/lib/tomoto.rb +5 -1
  22. data/vendor/EigenRand/EigenRand/Core.h +10 -10
  23. data/vendor/EigenRand/EigenRand/Dists/Basic.h +208 -9
  24. data/vendor/EigenRand/EigenRand/Dists/Discrete.h +52 -31
  25. data/vendor/EigenRand/EigenRand/Dists/GammaPoisson.h +9 -8
  26. data/vendor/EigenRand/EigenRand/Dists/NormalExp.h +28 -21
  27. data/vendor/EigenRand/EigenRand/EigenRand +11 -6
  28. data/vendor/EigenRand/EigenRand/Macro.h +13 -7
  29. data/vendor/EigenRand/EigenRand/MorePacketMath.h +348 -740
  30. data/vendor/EigenRand/EigenRand/MvDists/Multinomial.h +5 -3
  31. data/vendor/EigenRand/EigenRand/MvDists/MvNormal.h +9 -3
  32. data/vendor/EigenRand/EigenRand/PacketFilter.h +11 -253
  33. data/vendor/EigenRand/EigenRand/PacketRandomEngine.h +21 -47
  34. data/vendor/EigenRand/EigenRand/RandUtils.h +50 -344
  35. data/vendor/EigenRand/EigenRand/arch/AVX/MorePacketMath.h +619 -0
  36. data/vendor/EigenRand/EigenRand/arch/AVX/PacketFilter.h +149 -0
  37. data/vendor/EigenRand/EigenRand/arch/AVX/RandUtils.h +228 -0
  38. data/vendor/EigenRand/EigenRand/arch/NEON/MorePacketMath.h +473 -0
  39. data/vendor/EigenRand/EigenRand/arch/NEON/PacketFilter.h +142 -0
  40. data/vendor/EigenRand/EigenRand/arch/NEON/RandUtils.h +126 -0
  41. data/vendor/EigenRand/EigenRand/arch/SSE/MorePacketMath.h +501 -0
  42. data/vendor/EigenRand/EigenRand/arch/SSE/PacketFilter.h +133 -0
  43. data/vendor/EigenRand/EigenRand/arch/SSE/RandUtils.h +120 -0
  44. data/vendor/EigenRand/EigenRand/doc.h +24 -12
  45. data/vendor/EigenRand/README.md +57 -4
  46. data/vendor/eigen/COPYING.APACHE +203 -0
  47. data/vendor/eigen/COPYING.BSD +1 -1
  48. data/vendor/eigen/COPYING.MINPACK +51 -52
  49. data/vendor/eigen/Eigen/Cholesky +0 -1
  50. data/vendor/eigen/Eigen/Core +112 -265
  51. data/vendor/eigen/Eigen/Eigenvalues +2 -3
  52. data/vendor/eigen/Eigen/Geometry +5 -8
  53. data/vendor/eigen/Eigen/Householder +0 -1
  54. data/vendor/eigen/Eigen/Jacobi +0 -1
  55. data/vendor/eigen/Eigen/KLUSupport +41 -0
  56. data/vendor/eigen/Eigen/LU +2 -5
  57. data/vendor/eigen/Eigen/OrderingMethods +0 -3
  58. data/vendor/eigen/Eigen/PaStiXSupport +1 -0
  59. data/vendor/eigen/Eigen/PardisoSupport +0 -0
  60. data/vendor/eigen/Eigen/QR +2 -3
  61. data/vendor/eigen/Eigen/QtAlignedMalloc +0 -1
  62. data/vendor/eigen/Eigen/SVD +0 -1
  63. data/vendor/eigen/Eigen/Sparse +0 -2
  64. data/vendor/eigen/Eigen/SparseCholesky +0 -8
  65. data/vendor/eigen/Eigen/SparseLU +4 -0
  66. data/vendor/eigen/Eigen/SparseQR +0 -1
  67. data/vendor/eigen/Eigen/src/Cholesky/LDLT.h +42 -27
  68. data/vendor/eigen/Eigen/src/Cholesky/LLT.h +39 -23
  69. data/vendor/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +90 -47
  70. data/vendor/eigen/Eigen/src/Core/ArithmeticSequence.h +413 -0
  71. data/vendor/eigen/Eigen/src/Core/Array.h +99 -11
  72. data/vendor/eigen/Eigen/src/Core/ArrayBase.h +3 -3
  73. data/vendor/eigen/Eigen/src/Core/ArrayWrapper.h +21 -21
  74. data/vendor/eigen/Eigen/src/Core/Assign.h +1 -1
  75. data/vendor/eigen/Eigen/src/Core/AssignEvaluator.h +125 -50
  76. data/vendor/eigen/Eigen/src/Core/Assign_MKL.h +10 -10
  77. data/vendor/eigen/Eigen/src/Core/BandMatrix.h +16 -16
  78. data/vendor/eigen/Eigen/src/Core/Block.h +56 -60
  79. data/vendor/eigen/Eigen/src/Core/BooleanRedux.h +29 -31
  80. data/vendor/eigen/Eigen/src/Core/CommaInitializer.h +7 -3
  81. data/vendor/eigen/Eigen/src/Core/CoreEvaluators.h +325 -272
  82. data/vendor/eigen/Eigen/src/Core/CoreIterators.h +5 -0
  83. data/vendor/eigen/Eigen/src/Core/CwiseBinaryOp.h +21 -22
  84. data/vendor/eigen/Eigen/src/Core/CwiseNullaryOp.h +153 -18
  85. data/vendor/eigen/Eigen/src/Core/CwiseUnaryOp.h +6 -6
  86. data/vendor/eigen/Eigen/src/Core/CwiseUnaryView.h +14 -10
  87. data/vendor/eigen/Eigen/src/Core/DenseBase.h +132 -42
  88. data/vendor/eigen/Eigen/src/Core/DenseCoeffsBase.h +25 -21
  89. data/vendor/eigen/Eigen/src/Core/DenseStorage.h +153 -71
  90. data/vendor/eigen/Eigen/src/Core/Diagonal.h +21 -23
  91. data/vendor/eigen/Eigen/src/Core/DiagonalMatrix.h +50 -2
  92. data/vendor/eigen/Eigen/src/Core/DiagonalProduct.h +1 -1
  93. data/vendor/eigen/Eigen/src/Core/Dot.h +10 -10
  94. data/vendor/eigen/Eigen/src/Core/EigenBase.h +10 -9
  95. data/vendor/eigen/Eigen/src/Core/ForceAlignedAccess.h +8 -4
  96. data/vendor/eigen/Eigen/src/Core/Fuzzy.h +3 -3
  97. data/vendor/eigen/Eigen/src/Core/GeneralProduct.h +20 -10
  98. data/vendor/eigen/Eigen/src/Core/GenericPacketMath.h +599 -152
  99. data/vendor/eigen/Eigen/src/Core/GlobalFunctions.h +40 -33
  100. data/vendor/eigen/Eigen/src/Core/IO.h +40 -7
  101. data/vendor/eigen/Eigen/src/Core/IndexedView.h +237 -0
  102. data/vendor/eigen/Eigen/src/Core/Inverse.h +9 -10
  103. data/vendor/eigen/Eigen/src/Core/Map.h +7 -7
  104. data/vendor/eigen/Eigen/src/Core/MapBase.h +10 -3
  105. data/vendor/eigen/Eigen/src/Core/MathFunctions.h +767 -125
  106. data/vendor/eigen/Eigen/src/Core/MathFunctionsImpl.h +118 -19
  107. data/vendor/eigen/Eigen/src/Core/Matrix.h +131 -25
  108. data/vendor/eigen/Eigen/src/Core/MatrixBase.h +21 -3
  109. data/vendor/eigen/Eigen/src/Core/NestByValue.h +25 -50
  110. data/vendor/eigen/Eigen/src/Core/NoAlias.h +4 -3
  111. data/vendor/eigen/Eigen/src/Core/NumTraits.h +107 -20
  112. data/vendor/eigen/Eigen/src/Core/PartialReduxEvaluator.h +232 -0
  113. data/vendor/eigen/Eigen/src/Core/PermutationMatrix.h +3 -31
  114. data/vendor/eigen/Eigen/src/Core/PlainObjectBase.h +152 -59
  115. data/vendor/eigen/Eigen/src/Core/Product.h +30 -25
  116. data/vendor/eigen/Eigen/src/Core/ProductEvaluators.h +192 -125
  117. data/vendor/eigen/Eigen/src/Core/Random.h +37 -1
  118. data/vendor/eigen/Eigen/src/Core/Redux.h +180 -170
  119. data/vendor/eigen/Eigen/src/Core/Ref.h +121 -23
  120. data/vendor/eigen/Eigen/src/Core/Replicate.h +8 -8
  121. data/vendor/eigen/Eigen/src/Core/Reshaped.h +454 -0
  122. data/vendor/eigen/Eigen/src/Core/ReturnByValue.h +7 -5
  123. data/vendor/eigen/Eigen/src/Core/Reverse.h +18 -12
  124. data/vendor/eigen/Eigen/src/Core/Select.h +8 -6
  125. data/vendor/eigen/Eigen/src/Core/SelfAdjointView.h +33 -20
  126. data/vendor/eigen/Eigen/src/Core/Solve.h +14 -14
  127. data/vendor/eigen/Eigen/src/Core/SolveTriangular.h +16 -16
  128. data/vendor/eigen/Eigen/src/Core/SolverBase.h +41 -3
  129. data/vendor/eigen/Eigen/src/Core/StableNorm.h +100 -70
  130. data/vendor/eigen/Eigen/src/Core/StlIterators.h +463 -0
  131. data/vendor/eigen/Eigen/src/Core/Stride.h +9 -4
  132. data/vendor/eigen/Eigen/src/Core/Swap.h +5 -4
  133. data/vendor/eigen/Eigen/src/Core/Transpose.h +88 -27
  134. data/vendor/eigen/Eigen/src/Core/Transpositions.h +26 -47
  135. data/vendor/eigen/Eigen/src/Core/TriangularMatrix.h +93 -75
  136. data/vendor/eigen/Eigen/src/Core/VectorBlock.h +5 -5
  137. data/vendor/eigen/Eigen/src/Core/VectorwiseOp.h +159 -70
  138. data/vendor/eigen/Eigen/src/Core/Visitor.h +137 -29
  139. data/vendor/eigen/Eigen/src/Core/arch/AVX/Complex.h +50 -129
  140. data/vendor/eigen/Eigen/src/Core/arch/AVX/MathFunctions.h +126 -337
  141. data/vendor/eigen/Eigen/src/Core/arch/AVX/PacketMath.h +1092 -155
  142. data/vendor/eigen/Eigen/src/Core/arch/AVX/TypeCasting.h +65 -1
  143. data/vendor/eigen/Eigen/src/Core/arch/AVX512/Complex.h +422 -0
  144. data/vendor/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h +207 -236
  145. data/vendor/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h +1482 -495
  146. data/vendor/eigen/Eigen/src/Core/arch/AVX512/TypeCasting.h +89 -0
  147. data/vendor/eigen/Eigen/src/Core/arch/AltiVec/Complex.h +152 -165
  148. data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MathFunctions.h +19 -251
  149. data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MatrixProduct.h +2937 -0
  150. data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h +221 -0
  151. data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h +629 -0
  152. data/vendor/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h +2042 -392
  153. data/vendor/eigen/Eigen/src/Core/arch/CUDA/Complex.h +235 -80
  154. data/vendor/eigen/Eigen/src/Core/arch/Default/BFloat16.h +700 -0
  155. data/vendor/eigen/Eigen/src/Core/arch/Default/ConjHelper.h +102 -14
  156. data/vendor/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +1649 -0
  157. data/vendor/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +110 -0
  158. data/vendor/eigen/Eigen/src/Core/arch/Default/Half.h +942 -0
  159. data/vendor/eigen/Eigen/src/Core/arch/Default/Settings.h +1 -1
  160. data/vendor/eigen/Eigen/src/Core/arch/Default/TypeCasting.h +120 -0
  161. data/vendor/eigen/Eigen/src/Core/arch/{CUDA → GPU}/MathFunctions.h +16 -4
  162. data/vendor/eigen/Eigen/src/Core/arch/GPU/PacketMath.h +1685 -0
  163. data/vendor/eigen/Eigen/src/Core/arch/GPU/TypeCasting.h +80 -0
  164. data/vendor/eigen/Eigen/src/Core/arch/HIP/hcc/math_constants.h +23 -0
  165. data/vendor/eigen/Eigen/src/Core/arch/MSA/Complex.h +648 -0
  166. data/vendor/eigen/Eigen/src/Core/arch/MSA/MathFunctions.h +387 -0
  167. data/vendor/eigen/Eigen/src/Core/arch/MSA/PacketMath.h +1233 -0
  168. data/vendor/eigen/Eigen/src/Core/arch/NEON/Complex.h +313 -219
  169. data/vendor/eigen/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h +183 -0
  170. data/vendor/eigen/Eigen/src/Core/arch/NEON/MathFunctions.h +54 -70
  171. data/vendor/eigen/Eigen/src/Core/arch/NEON/PacketMath.h +4376 -549
  172. data/vendor/eigen/Eigen/src/Core/arch/NEON/TypeCasting.h +1419 -0
  173. data/vendor/eigen/Eigen/src/Core/arch/SSE/Complex.h +59 -179
  174. data/vendor/eigen/Eigen/src/Core/arch/SSE/MathFunctions.h +65 -428
  175. data/vendor/eigen/Eigen/src/Core/arch/SSE/PacketMath.h +893 -283
  176. data/vendor/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h +65 -0
  177. data/vendor/eigen/Eigen/src/Core/arch/SVE/MathFunctions.h +44 -0
  178. data/vendor/eigen/Eigen/src/Core/arch/SVE/PacketMath.h +752 -0
  179. data/vendor/eigen/Eigen/src/Core/arch/SVE/TypeCasting.h +49 -0
  180. data/vendor/eigen/Eigen/src/Core/arch/SYCL/InteropHeaders.h +232 -0
  181. data/vendor/eigen/Eigen/src/Core/arch/SYCL/MathFunctions.h +301 -0
  182. data/vendor/eigen/Eigen/src/Core/arch/SYCL/PacketMath.h +670 -0
  183. data/vendor/eigen/Eigen/src/Core/arch/SYCL/SyclMemoryModel.h +694 -0
  184. data/vendor/eigen/Eigen/src/Core/arch/SYCL/TypeCasting.h +85 -0
  185. data/vendor/eigen/Eigen/src/Core/arch/ZVector/Complex.h +212 -183
  186. data/vendor/eigen/Eigen/src/Core/arch/ZVector/MathFunctions.h +101 -5
  187. data/vendor/eigen/Eigen/src/Core/arch/ZVector/PacketMath.h +510 -395
  188. data/vendor/eigen/Eigen/src/Core/functors/AssignmentFunctors.h +11 -2
  189. data/vendor/eigen/Eigen/src/Core/functors/BinaryFunctors.h +112 -46
  190. data/vendor/eigen/Eigen/src/Core/functors/NullaryFunctors.h +31 -30
  191. data/vendor/eigen/Eigen/src/Core/functors/StlFunctors.h +32 -2
  192. data/vendor/eigen/Eigen/src/Core/functors/UnaryFunctors.h +355 -16
  193. data/vendor/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h +1075 -586
  194. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h +49 -24
  195. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +41 -35
  196. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +6 -6
  197. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h +4 -2
  198. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixVector.h +382 -483
  199. data/vendor/eigen/Eigen/src/Core/products/Parallelizer.h +22 -5
  200. data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +53 -30
  201. data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h +16 -8
  202. data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h +8 -6
  203. data/vendor/eigen/Eigen/src/Core/products/SelfadjointProduct.h +4 -4
  204. data/vendor/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h +5 -4
  205. data/vendor/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h +33 -27
  206. data/vendor/eigen/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h +14 -12
  207. data/vendor/eigen/Eigen/src/Core/products/TriangularSolverMatrix.h +36 -34
  208. data/vendor/eigen/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h +8 -4
  209. data/vendor/eigen/Eigen/src/Core/products/TriangularSolverVector.h +13 -10
  210. data/vendor/eigen/Eigen/src/Core/util/BlasUtil.h +304 -119
  211. data/vendor/eigen/Eigen/src/Core/util/ConfigureVectorization.h +512 -0
  212. data/vendor/eigen/Eigen/src/Core/util/Constants.h +25 -9
  213. data/vendor/eigen/Eigen/src/Core/util/DisableStupidWarnings.h +26 -3
  214. data/vendor/eigen/Eigen/src/Core/util/ForwardDeclarations.h +29 -9
  215. data/vendor/eigen/Eigen/src/Core/util/IndexedViewHelper.h +186 -0
  216. data/vendor/eigen/Eigen/src/Core/util/IntegralConstant.h +272 -0
  217. data/vendor/eigen/Eigen/src/Core/util/MKL_support.h +8 -1
  218. data/vendor/eigen/Eigen/src/Core/util/Macros.h +709 -246
  219. data/vendor/eigen/Eigen/src/Core/util/Memory.h +222 -52
  220. data/vendor/eigen/Eigen/src/Core/util/Meta.h +355 -77
  221. data/vendor/eigen/Eigen/src/Core/util/ReenableStupidWarnings.h +5 -1
  222. data/vendor/eigen/Eigen/src/Core/util/ReshapedHelper.h +51 -0
  223. data/vendor/eigen/Eigen/src/Core/util/StaticAssert.h +8 -5
  224. data/vendor/eigen/Eigen/src/Core/util/SymbolicIndex.h +293 -0
  225. data/vendor/eigen/Eigen/src/Core/util/XprHelper.h +65 -30
  226. data/vendor/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +1 -1
  227. data/vendor/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +7 -4
  228. data/vendor/eigen/Eigen/src/Eigenvalues/EigenSolver.h +2 -2
  229. data/vendor/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +1 -1
  230. data/vendor/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +2 -2
  231. data/vendor/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +2 -2
  232. data/vendor/eigen/Eigen/src/Eigenvalues/RealQZ.h +9 -6
  233. data/vendor/eigen/Eigen/src/Eigenvalues/RealSchur.h +21 -9
  234. data/vendor/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +77 -43
  235. data/vendor/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +20 -15
  236. data/vendor/eigen/Eigen/src/Geometry/AlignedBox.h +99 -5
  237. data/vendor/eigen/Eigen/src/Geometry/AngleAxis.h +4 -4
  238. data/vendor/eigen/Eigen/src/Geometry/EulerAngles.h +3 -3
  239. data/vendor/eigen/Eigen/src/Geometry/Homogeneous.h +15 -11
  240. data/vendor/eigen/Eigen/src/Geometry/Hyperplane.h +1 -1
  241. data/vendor/eigen/Eigen/src/Geometry/OrthoMethods.h +3 -2
  242. data/vendor/eigen/Eigen/src/Geometry/ParametrizedLine.h +39 -2
  243. data/vendor/eigen/Eigen/src/Geometry/Quaternion.h +70 -14
  244. data/vendor/eigen/Eigen/src/Geometry/Rotation2D.h +3 -3
  245. data/vendor/eigen/Eigen/src/Geometry/Scaling.h +23 -5
  246. data/vendor/eigen/Eigen/src/Geometry/Transform.h +88 -67
  247. data/vendor/eigen/Eigen/src/Geometry/Translation.h +6 -12
  248. data/vendor/eigen/Eigen/src/Geometry/Umeyama.h +1 -1
  249. data/vendor/eigen/Eigen/src/Geometry/arch/Geometry_SIMD.h +168 -0
  250. data/vendor/eigen/Eigen/src/Householder/BlockHouseholder.h +9 -2
  251. data/vendor/eigen/Eigen/src/Householder/Householder.h +8 -4
  252. data/vendor/eigen/Eigen/src/Householder/HouseholderSequence.h +123 -48
  253. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +15 -15
  254. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +7 -23
  255. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +5 -22
  256. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +41 -47
  257. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +51 -60
  258. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +70 -20
  259. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +2 -20
  260. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +11 -9
  261. data/vendor/eigen/Eigen/src/Jacobi/Jacobi.h +31 -10
  262. data/vendor/eigen/Eigen/src/KLUSupport/KLUSupport.h +358 -0
  263. data/vendor/eigen/Eigen/src/LU/Determinant.h +35 -19
  264. data/vendor/eigen/Eigen/src/LU/FullPivLU.h +29 -43
  265. data/vendor/eigen/Eigen/src/LU/InverseImpl.h +25 -8
  266. data/vendor/eigen/Eigen/src/LU/PartialPivLU.h +71 -58
  267. data/vendor/eigen/Eigen/src/LU/arch/InverseSize4.h +351 -0
  268. data/vendor/eigen/Eigen/src/OrderingMethods/Amd.h +7 -17
  269. data/vendor/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +297 -277
  270. data/vendor/eigen/Eigen/src/OrderingMethods/Ordering.h +6 -10
  271. data/vendor/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +1 -1
  272. data/vendor/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +9 -7
  273. data/vendor/eigen/Eigen/src/QR/ColPivHouseholderQR.h +41 -20
  274. data/vendor/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +100 -27
  275. data/vendor/eigen/Eigen/src/QR/FullPivHouseholderQR.h +59 -22
  276. data/vendor/eigen/Eigen/src/QR/HouseholderQR.h +48 -23
  277. data/vendor/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +25 -3
  278. data/vendor/eigen/Eigen/src/SVD/BDCSVD.h +183 -63
  279. data/vendor/eigen/Eigen/src/SVD/JacobiSVD.h +22 -14
  280. data/vendor/eigen/Eigen/src/SVD/SVDBase.h +83 -22
  281. data/vendor/eigen/Eigen/src/SVD/UpperBidiagonalization.h +3 -3
  282. data/vendor/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +17 -9
  283. data/vendor/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +12 -37
  284. data/vendor/eigen/Eigen/src/SparseCore/AmbiVector.h +3 -2
  285. data/vendor/eigen/Eigen/src/SparseCore/CompressedStorage.h +16 -0
  286. data/vendor/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +6 -6
  287. data/vendor/eigen/Eigen/src/SparseCore/SparseAssign.h +81 -27
  288. data/vendor/eigen/Eigen/src/SparseCore/SparseBlock.h +25 -57
  289. data/vendor/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +40 -11
  290. data/vendor/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +11 -15
  291. data/vendor/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +4 -2
  292. data/vendor/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +30 -8
  293. data/vendor/eigen/Eigen/src/SparseCore/SparseMatrix.h +126 -11
  294. data/vendor/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +5 -12
  295. data/vendor/eigen/Eigen/src/SparseCore/SparseProduct.h +13 -1
  296. data/vendor/eigen/Eigen/src/SparseCore/SparseRef.h +7 -7
  297. data/vendor/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +5 -2
  298. data/vendor/eigen/Eigen/src/SparseCore/SparseUtil.h +8 -0
  299. data/vendor/eigen/Eigen/src/SparseCore/SparseVector.h +1 -1
  300. data/vendor/eigen/Eigen/src/SparseCore/SparseView.h +1 -0
  301. data/vendor/eigen/Eigen/src/SparseLU/SparseLU.h +162 -12
  302. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +1 -1
  303. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +76 -2
  304. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +2 -2
  305. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +1 -1
  306. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +1 -1
  307. data/vendor/eigen/Eigen/src/SparseQR/SparseQR.h +19 -6
  308. data/vendor/eigen/Eigen/src/StlSupport/StdDeque.h +2 -12
  309. data/vendor/eigen/Eigen/src/StlSupport/StdList.h +2 -2
  310. data/vendor/eigen/Eigen/src/StlSupport/StdVector.h +2 -2
  311. data/vendor/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +6 -8
  312. data/vendor/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +175 -39
  313. data/vendor/eigen/Eigen/src/misc/lapacke.h +5 -4
  314. data/vendor/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +28 -2
  315. data/vendor/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +155 -11
  316. data/vendor/eigen/Eigen/src/plugins/BlockMethods.h +626 -242
  317. data/vendor/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.h +14 -0
  318. data/vendor/eigen/Eigen/src/plugins/IndexedViewMethods.h +262 -0
  319. data/vendor/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +4 -4
  320. data/vendor/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +10 -0
  321. data/vendor/eigen/Eigen/src/plugins/ReshapedMethods.h +149 -0
  322. data/vendor/eigen/README.md +2 -0
  323. data/vendor/eigen/bench/btl/README +1 -1
  324. data/vendor/eigen/bench/tensors/README +6 -7
  325. data/vendor/eigen/ci/README.md +56 -0
  326. data/vendor/eigen/demos/mix_eigen_and_c/README +1 -1
  327. data/vendor/eigen/unsupported/Eigen/CXX11/src/Tensor/README.md +213 -158
  328. data/vendor/eigen/unsupported/README.txt +1 -1
  329. data/vendor/tomotopy/README.kr.rst +78 -0
  330. data/vendor/tomotopy/README.rst +75 -0
  331. data/vendor/tomotopy/src/Labeling/FoRelevance.cpp +2 -2
  332. data/vendor/tomotopy/src/Labeling/Phraser.hpp +4 -4
  333. data/vendor/tomotopy/src/TopicModel/CTModel.hpp +7 -3
  334. data/vendor/tomotopy/src/TopicModel/DMRModel.hpp +7 -3
  335. data/vendor/tomotopy/src/TopicModel/DTModel.hpp +6 -3
  336. data/vendor/tomotopy/src/TopicModel/GDMRModel.hpp +2 -2
  337. data/vendor/tomotopy/src/TopicModel/HDP.h +1 -0
  338. data/vendor/tomotopy/src/TopicModel/HDPModel.hpp +57 -6
  339. data/vendor/tomotopy/src/TopicModel/HLDAModel.hpp +6 -3
  340. data/vendor/tomotopy/src/TopicModel/HPAModel.hpp +3 -2
  341. data/vendor/tomotopy/src/TopicModel/LDA.h +3 -3
  342. data/vendor/tomotopy/src/TopicModel/LDACVB0Model.hpp +5 -5
  343. data/vendor/tomotopy/src/TopicModel/LDAModel.hpp +50 -19
  344. data/vendor/tomotopy/src/TopicModel/LLDAModel.hpp +6 -2
  345. data/vendor/tomotopy/src/TopicModel/MGLDAModel.hpp +3 -2
  346. data/vendor/tomotopy/src/TopicModel/PAModel.hpp +1 -1
  347. data/vendor/tomotopy/src/TopicModel/PLDAModel.hpp +6 -2
  348. data/vendor/tomotopy/src/TopicModel/PT.h +3 -1
  349. data/vendor/tomotopy/src/TopicModel/PTModel.hpp +36 -3
  350. data/vendor/tomotopy/src/TopicModel/SLDAModel.hpp +6 -3
  351. data/vendor/tomotopy/src/TopicModel/TopicModel.hpp +55 -26
  352. data/vendor/tomotopy/src/Utils/AliasMethod.hpp +5 -4
  353. data/vendor/tomotopy/src/Utils/Dictionary.h +2 -2
  354. data/vendor/tomotopy/src/Utils/EigenAddonOps.hpp +36 -1
  355. data/vendor/tomotopy/src/Utils/MultiNormalDistribution.hpp +1 -1
  356. data/vendor/tomotopy/src/Utils/TruncMultiNormal.hpp +1 -1
  357. data/vendor/tomotopy/src/Utils/exception.h +6 -0
  358. data/vendor/tomotopy/src/Utils/math.h +2 -2
  359. data/vendor/tomotopy/src/Utils/sample.hpp +14 -12
  360. data/vendor/tomotopy/src/Utils/serializer.hpp +30 -5
  361. data/vendor/tomotopy/src/Utils/sse_gamma.h +0 -3
  362. metadata +64 -18
  363. data/vendor/eigen/Eigen/CMakeLists.txt +0 -19
  364. data/vendor/eigen/Eigen/src/Core/arch/CUDA/Half.h +0 -674
  365. data/vendor/eigen/Eigen/src/Core/arch/CUDA/PacketMath.h +0 -333
  366. data/vendor/eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +0 -1124
  367. data/vendor/eigen/Eigen/src/Core/arch/CUDA/TypeCasting.h +0 -212
  368. data/vendor/eigen/Eigen/src/Geometry/arch/Geometry_SSE.h +0 -161
  369. data/vendor/eigen/Eigen/src/LU/arch/Inverse_SSE.h +0 -338
@@ -10,6 +10,10 @@
10
10
  #ifndef EIGEN_PARALLELIZER_H
11
11
  #define EIGEN_PARALLELIZER_H
12
12
 
13
+ #if EIGEN_HAS_CXX11_ATOMIC
14
+ #include <atomic>
15
+ #endif
16
+
13
17
  namespace Eigen {
14
18
 
15
19
  namespace internal {
@@ -17,7 +21,8 @@ namespace internal {
17
21
  /** \internal */
18
22
  inline void manage_multi_threading(Action action, int* v)
19
23
  {
20
- static EIGEN_UNUSED int m_maxThreads = -1;
24
+ static int m_maxThreads = -1;
25
+ EIGEN_UNUSED_VARIABLE(m_maxThreads)
21
26
 
22
27
  if(action==SetAction)
23
28
  {
@@ -75,8 +80,17 @@ template<typename Index> struct GemmParallelInfo
75
80
  {
76
81
  GemmParallelInfo() : sync(-1), users(0), lhs_start(0), lhs_length(0) {}
77
82
 
83
+ // volatile is not enough on all architectures (see bug 1572)
84
+ // to guarantee that when thread A says to thread B that it is
85
+ // done with packing a block, then all writes have been really
86
+ // carried out... C++11 memory model+atomic guarantees this.
87
+ #if EIGEN_HAS_CXX11_ATOMIC
88
+ std::atomic<Index> sync;
89
+ std::atomic<int> users;
90
+ #else
78
91
  Index volatile sync;
79
92
  int volatile users;
93
+ #endif
80
94
 
81
95
  Index lhs_start;
82
96
  Index lhs_length;
@@ -87,11 +101,14 @@ void parallelize_gemm(const Functor& func, Index rows, Index cols, Index depth,
87
101
  {
88
102
  // TODO when EIGEN_USE_BLAS is defined,
89
103
  // we should still enable OMP for other scalar types
90
- #if !(defined (EIGEN_HAS_OPENMP)) || defined (EIGEN_USE_BLAS)
104
+ // Without C++11, we have to disable GEMM's parallelization on
105
+ // non x86 architectures because there volatile is not enough for our purpose.
106
+ // See bug 1572.
107
+ #if (! defined(EIGEN_HAS_OPENMP)) || defined(EIGEN_USE_BLAS) || ((!EIGEN_HAS_CXX11_ATOMIC) && !(EIGEN_ARCH_i386_OR_x86_64))
91
108
  // FIXME the transpose variable is only needed to properly split
92
109
  // the matrix product when multithreading is enabled. This is a temporary
93
110
  // fix to support row-major destination matrices. This whole
94
- // parallelizer mechanism has to be redisigned anyway.
111
+ // parallelizer mechanism has to be redesigned anyway.
95
112
  EIGEN_UNUSED_VARIABLE(depth);
96
113
  EIGEN_UNUSED_VARIABLE(transpose);
97
114
  func(0,rows, 0,cols);
@@ -112,12 +129,12 @@ void parallelize_gemm(const Functor& func, Index rows, Index cols, Index depth,
112
129
  double work = static_cast<double>(rows) * static_cast<double>(cols) *
113
130
  static_cast<double>(depth);
114
131
  double kMinTaskSize = 50000; // FIXME improve this heuristic.
115
- pb_max_threads = std::max<Index>(1, std::min<Index>(pb_max_threads, work / kMinTaskSize));
132
+ pb_max_threads = std::max<Index>(1, std::min<Index>(pb_max_threads, static_cast<Index>( work / kMinTaskSize ) ));
116
133
 
117
134
  // compute the number of threads we are going to use
118
135
  Index threads = std::min<Index>(nbThreads(), pb_max_threads);
119
136
 
120
- // if multi-threading is explicitely disabled, not useful, or if we already are in a parallel session,
137
+ // if multi-threading is explicitly disabled, not useful, or if we already are in a parallel session,
121
138
  // then abort multi-threading
122
139
  // FIXME omp_get_num_threads()>1 only works for openmp, what if the user does not use openmp?
123
140
  if((!Condition) || (threads==1) || (omp_get_num_threads()>1))
@@ -45,14 +45,23 @@ struct symm_pack_lhs
45
45
  }
46
46
  void operator()(Scalar* blockA, const Scalar* _lhs, Index lhsStride, Index cols, Index rows)
47
47
  {
48
- enum { PacketSize = packet_traits<Scalar>::size };
48
+ typedef typename unpacket_traits<typename packet_traits<Scalar>::type>::half HalfPacket;
49
+ typedef typename unpacket_traits<typename unpacket_traits<typename packet_traits<Scalar>::type>::half>::half QuarterPacket;
50
+ enum { PacketSize = packet_traits<Scalar>::size,
51
+ HalfPacketSize = unpacket_traits<HalfPacket>::size,
52
+ QuarterPacketSize = unpacket_traits<QuarterPacket>::size,
53
+ HasHalf = (int)HalfPacketSize < (int)PacketSize,
54
+ HasQuarter = (int)QuarterPacketSize < (int)HalfPacketSize};
55
+
49
56
  const_blas_data_mapper<Scalar,Index,StorageOrder> lhs(_lhs,lhsStride);
50
57
  Index count = 0;
51
58
  //Index peeled_mc3 = (rows/Pack1)*Pack1;
52
59
 
53
60
  const Index peeled_mc3 = Pack1>=3*PacketSize ? (rows/(3*PacketSize))*(3*PacketSize) : 0;
54
61
  const Index peeled_mc2 = Pack1>=2*PacketSize ? peeled_mc3+((rows-peeled_mc3)/(2*PacketSize))*(2*PacketSize) : 0;
55
- const Index peeled_mc1 = Pack1>=1*PacketSize ? (rows/(1*PacketSize))*(1*PacketSize) : 0;
62
+ const Index peeled_mc1 = Pack1>=1*PacketSize ? peeled_mc2+((rows-peeled_mc2)/(1*PacketSize))*(1*PacketSize) : 0;
63
+ const Index peeled_mc_half = Pack1>=HalfPacketSize ? peeled_mc1+((rows-peeled_mc1)/(HalfPacketSize))*(HalfPacketSize) : 0;
64
+ const Index peeled_mc_quarter = Pack1>=QuarterPacketSize ? peeled_mc_half+((rows-peeled_mc_half)/(QuarterPacketSize))*(QuarterPacketSize) : 0;
56
65
 
57
66
  if(Pack1>=3*PacketSize)
58
67
  for(Index i=0; i<peeled_mc3; i+=3*PacketSize)
@@ -66,8 +75,16 @@ struct symm_pack_lhs
66
75
  for(Index i=peeled_mc2; i<peeled_mc1; i+=1*PacketSize)
67
76
  pack<1*PacketSize>(blockA, lhs, cols, i, count);
68
77
 
78
+ if(HasHalf && Pack1>=HalfPacketSize)
79
+ for(Index i=peeled_mc1; i<peeled_mc_half; i+=HalfPacketSize)
80
+ pack<HalfPacketSize>(blockA, lhs, cols, i, count);
81
+
82
+ if(HasQuarter && Pack1>=QuarterPacketSize)
83
+ for(Index i=peeled_mc_half; i<peeled_mc_quarter; i+=QuarterPacketSize)
84
+ pack<QuarterPacketSize>(blockA, lhs, cols, i, count);
85
+
69
86
  // do the same with mr==1
70
- for(Index i=peeled_mc1; i<rows; i++)
87
+ for(Index i=peeled_mc_quarter; i<rows; i++)
71
88
  {
72
89
  for(Index k=0; k<i; k++)
73
90
  blockA[count++] = lhs(i, k); // normal
@@ -277,20 +294,21 @@ struct symm_pack_rhs
277
294
  template <typename Scalar, typename Index,
278
295
  int LhsStorageOrder, bool LhsSelfAdjoint, bool ConjugateLhs,
279
296
  int RhsStorageOrder, bool RhsSelfAdjoint, bool ConjugateRhs,
280
- int ResStorageOrder>
297
+ int ResStorageOrder, int ResInnerStride>
281
298
  struct product_selfadjoint_matrix;
282
299
 
283
300
  template <typename Scalar, typename Index,
284
301
  int LhsStorageOrder, bool LhsSelfAdjoint, bool ConjugateLhs,
285
- int RhsStorageOrder, bool RhsSelfAdjoint, bool ConjugateRhs>
286
- struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,LhsSelfAdjoint,ConjugateLhs, RhsStorageOrder,RhsSelfAdjoint,ConjugateRhs,RowMajor>
302
+ int RhsStorageOrder, bool RhsSelfAdjoint, bool ConjugateRhs,
303
+ int ResInnerStride>
304
+ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,LhsSelfAdjoint,ConjugateLhs, RhsStorageOrder,RhsSelfAdjoint,ConjugateRhs,RowMajor,ResInnerStride>
287
305
  {
288
306
 
289
307
  static EIGEN_STRONG_INLINE void run(
290
308
  Index rows, Index cols,
291
309
  const Scalar* lhs, Index lhsStride,
292
310
  const Scalar* rhs, Index rhsStride,
293
- Scalar* res, Index resStride,
311
+ Scalar* res, Index resIncr, Index resStride,
294
312
  const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)
295
313
  {
296
314
  product_selfadjoint_matrix<Scalar, Index,
@@ -298,33 +316,35 @@ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,LhsSelfAdjoint,Co
298
316
  RhsSelfAdjoint, NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(RhsSelfAdjoint,ConjugateRhs),
299
317
  EIGEN_LOGICAL_XOR(LhsSelfAdjoint,LhsStorageOrder==RowMajor) ? ColMajor : RowMajor,
300
318
  LhsSelfAdjoint, NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(LhsSelfAdjoint,ConjugateLhs),
301
- ColMajor>
302
- ::run(cols, rows, rhs, rhsStride, lhs, lhsStride, res, resStride, alpha, blocking);
319
+ ColMajor,ResInnerStride>
320
+ ::run(cols, rows, rhs, rhsStride, lhs, lhsStride, res, resIncr, resStride, alpha, blocking);
303
321
  }
304
322
  };
305
323
 
306
324
  template <typename Scalar, typename Index,
307
325
  int LhsStorageOrder, bool ConjugateLhs,
308
- int RhsStorageOrder, bool ConjugateRhs>
309
- struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs, RhsStorageOrder,false,ConjugateRhs,ColMajor>
326
+ int RhsStorageOrder, bool ConjugateRhs,
327
+ int ResInnerStride>
328
+ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs, RhsStorageOrder,false,ConjugateRhs,ColMajor,ResInnerStride>
310
329
  {
311
330
 
312
331
  static EIGEN_DONT_INLINE void run(
313
332
  Index rows, Index cols,
314
333
  const Scalar* _lhs, Index lhsStride,
315
334
  const Scalar* _rhs, Index rhsStride,
316
- Scalar* res, Index resStride,
335
+ Scalar* res, Index resIncr, Index resStride,
317
336
  const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking);
318
337
  };
319
338
 
320
339
  template <typename Scalar, typename Index,
321
340
  int LhsStorageOrder, bool ConjugateLhs,
322
- int RhsStorageOrder, bool ConjugateRhs>
323
- EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs, RhsStorageOrder,false,ConjugateRhs,ColMajor>::run(
341
+ int RhsStorageOrder, bool ConjugateRhs,
342
+ int ResInnerStride>
343
+ EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs, RhsStorageOrder,false,ConjugateRhs,ColMajor,ResInnerStride>::run(
324
344
  Index rows, Index cols,
325
345
  const Scalar* _lhs, Index lhsStride,
326
346
  const Scalar* _rhs, Index rhsStride,
327
- Scalar* _res, Index resStride,
347
+ Scalar* _res, Index resIncr, Index resStride,
328
348
  const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)
329
349
  {
330
350
  Index size = rows;
@@ -334,11 +354,11 @@ EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,t
334
354
  typedef const_blas_data_mapper<Scalar, Index, LhsStorageOrder> LhsMapper;
335
355
  typedef const_blas_data_mapper<Scalar, Index, (LhsStorageOrder == RowMajor) ? ColMajor : RowMajor> LhsTransposeMapper;
336
356
  typedef const_blas_data_mapper<Scalar, Index, RhsStorageOrder> RhsMapper;
337
- typedef blas_data_mapper<typename Traits::ResScalar, Index, ColMajor> ResMapper;
357
+ typedef blas_data_mapper<typename Traits::ResScalar, Index, ColMajor, Unaligned, ResInnerStride> ResMapper;
338
358
  LhsMapper lhs(_lhs,lhsStride);
339
359
  LhsTransposeMapper lhs_transpose(_lhs,lhsStride);
340
360
  RhsMapper rhs(_rhs,rhsStride);
341
- ResMapper res(_res, resStride);
361
+ ResMapper res(_res, resStride, resIncr);
342
362
 
343
363
  Index kc = blocking.kc(); // cache block size along the K direction
344
364
  Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction
@@ -352,7 +372,7 @@ EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,t
352
372
  gebp_kernel<Scalar, Scalar, Index, ResMapper, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
353
373
  symm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
354
374
  gemm_pack_rhs<Scalar, Index, RhsMapper, Traits::nr,RhsStorageOrder> pack_rhs;
355
- gemm_pack_lhs<Scalar, Index, LhsTransposeMapper, Traits::mr, Traits::LhsProgress, LhsStorageOrder==RowMajor?ColMajor:RowMajor, true> pack_lhs_transposed;
375
+ gemm_pack_lhs<Scalar, Index, LhsTransposeMapper, Traits::mr, Traits::LhsProgress, typename Traits::LhsPacket4Packing, LhsStorageOrder==RowMajor?ColMajor:RowMajor, true> pack_lhs_transposed;
356
376
 
357
377
  for(Index k2=0; k2<size; k2+=kc)
358
378
  {
@@ -387,7 +407,7 @@ EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,t
387
407
  for(Index i2=k2+kc; i2<size; i2+=mc)
388
408
  {
389
409
  const Index actual_mc = (std::min)(i2+mc,size)-i2;
390
- gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, LhsStorageOrder,false>()
410
+ gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, typename Traits::LhsPacket4Packing, LhsStorageOrder,false>()
391
411
  (blockA, lhs.getSubMapper(i2, k2), actual_kc, actual_mc);
392
412
 
393
413
  gebp_kernel(res.getSubMapper(i2, 0), blockA, blockB, actual_mc, actual_kc, cols, alpha);
@@ -398,26 +418,28 @@ EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,t
398
418
  // matrix * selfadjoint product
399
419
  template <typename Scalar, typename Index,
400
420
  int LhsStorageOrder, bool ConjugateLhs,
401
- int RhsStorageOrder, bool ConjugateRhs>
402
- struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,ConjugateLhs, RhsStorageOrder,true,ConjugateRhs,ColMajor>
421
+ int RhsStorageOrder, bool ConjugateRhs,
422
+ int ResInnerStride>
423
+ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,ConjugateLhs, RhsStorageOrder,true,ConjugateRhs,ColMajor,ResInnerStride>
403
424
  {
404
425
 
405
426
  static EIGEN_DONT_INLINE void run(
406
427
  Index rows, Index cols,
407
428
  const Scalar* _lhs, Index lhsStride,
408
429
  const Scalar* _rhs, Index rhsStride,
409
- Scalar* res, Index resStride,
430
+ Scalar* res, Index resIncr, Index resStride,
410
431
  const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking);
411
432
  };
412
433
 
413
434
  template <typename Scalar, typename Index,
414
435
  int LhsStorageOrder, bool ConjugateLhs,
415
- int RhsStorageOrder, bool ConjugateRhs>
416
- EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,ConjugateLhs, RhsStorageOrder,true,ConjugateRhs,ColMajor>::run(
436
+ int RhsStorageOrder, bool ConjugateRhs,
437
+ int ResInnerStride>
438
+ EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,ConjugateLhs, RhsStorageOrder,true,ConjugateRhs,ColMajor,ResInnerStride>::run(
417
439
  Index rows, Index cols,
418
440
  const Scalar* _lhs, Index lhsStride,
419
441
  const Scalar* _rhs, Index rhsStride,
420
- Scalar* _res, Index resStride,
442
+ Scalar* _res, Index resIncr, Index resStride,
421
443
  const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)
422
444
  {
423
445
  Index size = cols;
@@ -425,9 +447,9 @@ EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,f
425
447
  typedef gebp_traits<Scalar,Scalar> Traits;
426
448
 
427
449
  typedef const_blas_data_mapper<Scalar, Index, LhsStorageOrder> LhsMapper;
428
- typedef blas_data_mapper<typename Traits::ResScalar, Index, ColMajor> ResMapper;
450
+ typedef blas_data_mapper<typename Traits::ResScalar, Index, ColMajor, Unaligned, ResInnerStride> ResMapper;
429
451
  LhsMapper lhs(_lhs,lhsStride);
430
- ResMapper res(_res,resStride);
452
+ ResMapper res(_res,resStride, resIncr);
431
453
 
432
454
  Index kc = blocking.kc(); // cache block size along the K direction
433
455
  Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction
@@ -437,7 +459,7 @@ EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,f
437
459
  ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
438
460
 
439
461
  gebp_kernel<Scalar, Scalar, Index, ResMapper, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
440
- gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
462
+ gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, typename Traits::LhsPacket4Packing, LhsStorageOrder> pack_lhs;
441
463
  symm_pack_rhs<Scalar, Index, Traits::nr,RhsStorageOrder> pack_rhs;
442
464
 
443
465
  for(Index k2=0; k2<size; k2+=kc)
@@ -503,12 +525,13 @@ struct selfadjoint_product_impl<Lhs,LhsMode,false,Rhs,RhsMode,false>
503
525
  NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(LhsIsUpper,bool(LhsBlasTraits::NeedToConjugate)),
504
526
  EIGEN_LOGICAL_XOR(RhsIsUpper,internal::traits<Rhs>::Flags &RowMajorBit) ? RowMajor : ColMajor, RhsIsSelfAdjoint,
505
527
  NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(RhsIsUpper,bool(RhsBlasTraits::NeedToConjugate)),
506
- internal::traits<Dest>::Flags&RowMajorBit ? RowMajor : ColMajor>
528
+ internal::traits<Dest>::Flags&RowMajorBit ? RowMajor : ColMajor,
529
+ Dest::InnerStrideAtCompileTime>
507
530
  ::run(
508
531
  lhs.rows(), rhs.cols(), // sizes
509
532
  &lhs.coeffRef(0,0), lhs.outerStride(), // lhs info
510
533
  &rhs.coeffRef(0,0), rhs.outerStride(), // rhs info
511
- &dst.coeffRef(0,0), dst.outerStride(), // result info
534
+ &dst.coeffRef(0,0), dst.innerStride(), dst.outerStride(), // result info
512
535
  actualAlpha, blocking // alpha
513
536
  );
514
537
  }
@@ -44,16 +44,18 @@ namespace internal {
44
44
  template <typename Index, \
45
45
  int LhsStorageOrder, bool ConjugateLhs, \
46
46
  int RhsStorageOrder, bool ConjugateRhs> \
47
- struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,true,ConjugateLhs,RhsStorageOrder,false,ConjugateRhs,ColMajor> \
47
+ struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,true,ConjugateLhs,RhsStorageOrder,false,ConjugateRhs,ColMajor,1> \
48
48
  {\
49
49
  \
50
50
  static void run( \
51
51
  Index rows, Index cols, \
52
52
  const EIGTYPE* _lhs, Index lhsStride, \
53
53
  const EIGTYPE* _rhs, Index rhsStride, \
54
- EIGTYPE* res, Index resStride, \
54
+ EIGTYPE* res, Index resIncr, Index resStride, \
55
55
  EIGTYPE alpha, level3_blocking<EIGTYPE, EIGTYPE>& /*blocking*/) \
56
56
  { \
57
+ EIGEN_ONLY_USED_FOR_DEBUG(resIncr); \
58
+ eigen_assert(resIncr == 1); \
57
59
  char side='L', uplo='L'; \
58
60
  BlasIndex m, n, lda, ldb, ldc; \
59
61
  const EIGTYPE *a, *b; \
@@ -91,15 +93,17 @@ struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,true,ConjugateLh
91
93
  template <typename Index, \
92
94
  int LhsStorageOrder, bool ConjugateLhs, \
93
95
  int RhsStorageOrder, bool ConjugateRhs> \
94
- struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,true,ConjugateLhs,RhsStorageOrder,false,ConjugateRhs,ColMajor> \
96
+ struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,true,ConjugateLhs,RhsStorageOrder,false,ConjugateRhs,ColMajor,1> \
95
97
  {\
96
98
  static void run( \
97
99
  Index rows, Index cols, \
98
100
  const EIGTYPE* _lhs, Index lhsStride, \
99
101
  const EIGTYPE* _rhs, Index rhsStride, \
100
- EIGTYPE* res, Index resStride, \
102
+ EIGTYPE* res, Index resIncr, Index resStride, \
101
103
  EIGTYPE alpha, level3_blocking<EIGTYPE, EIGTYPE>& /*blocking*/) \
102
104
  { \
105
+ EIGEN_ONLY_USED_FOR_DEBUG(resIncr); \
106
+ eigen_assert(resIncr == 1); \
103
107
  char side='L', uplo='L'; \
104
108
  BlasIndex m, n, lda, ldb, ldc; \
105
109
  const EIGTYPE *a, *b; \
@@ -167,16 +171,18 @@ EIGEN_BLAS_HEMM_L(scomplex, float, cf, chemm_)
167
171
  template <typename Index, \
168
172
  int LhsStorageOrder, bool ConjugateLhs, \
169
173
  int RhsStorageOrder, bool ConjugateRhs> \
170
- struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,false,ConjugateLhs,RhsStorageOrder,true,ConjugateRhs,ColMajor> \
174
+ struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,false,ConjugateLhs,RhsStorageOrder,true,ConjugateRhs,ColMajor,1> \
171
175
  {\
172
176
  \
173
177
  static void run( \
174
178
  Index rows, Index cols, \
175
179
  const EIGTYPE* _lhs, Index lhsStride, \
176
180
  const EIGTYPE* _rhs, Index rhsStride, \
177
- EIGTYPE* res, Index resStride, \
181
+ EIGTYPE* res, Index resIncr, Index resStride, \
178
182
  EIGTYPE alpha, level3_blocking<EIGTYPE, EIGTYPE>& /*blocking*/) \
179
183
  { \
184
+ EIGEN_ONLY_USED_FOR_DEBUG(resIncr); \
185
+ eigen_assert(resIncr == 1); \
180
186
  char side='R', uplo='L'; \
181
187
  BlasIndex m, n, lda, ldb, ldc; \
182
188
  const EIGTYPE *a, *b; \
@@ -213,15 +219,17 @@ struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,false,ConjugateL
213
219
  template <typename Index, \
214
220
  int LhsStorageOrder, bool ConjugateLhs, \
215
221
  int RhsStorageOrder, bool ConjugateRhs> \
216
- struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,false,ConjugateLhs,RhsStorageOrder,true,ConjugateRhs,ColMajor> \
222
+ struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,false,ConjugateLhs,RhsStorageOrder,true,ConjugateRhs,ColMajor,1> \
217
223
  {\
218
224
  static void run( \
219
225
  Index rows, Index cols, \
220
226
  const EIGTYPE* _lhs, Index lhsStride, \
221
227
  const EIGTYPE* _rhs, Index rhsStride, \
222
- EIGTYPE* res, Index resStride, \
228
+ EIGTYPE* res, Index resIncr, Index resStride, \
223
229
  EIGTYPE alpha, level3_blocking<EIGTYPE, EIGTYPE>& /*blocking*/) \
224
230
  { \
231
+ EIGEN_ONLY_USED_FOR_DEBUG(resIncr); \
232
+ eigen_assert(resIncr == 1); \
225
233
  char side='R', uplo='L'; \
226
234
  BlasIndex m, n, lda, ldb, ldc; \
227
235
  const EIGTYPE *a, *b; \
@@ -15,7 +15,7 @@ namespace Eigen {
15
15
  namespace internal {
16
16
 
17
17
  /* Optimized selfadjoint matrix * vector product:
18
- * This algorithm processes 2 columns at onces that allows to both reduce
18
+ * This algorithm processes 2 columns at once that allows to both reduce
19
19
  * the number of load/stores of the result by a factor 2 and to reduce
20
20
  * the instruction dependency.
21
21
  */
@@ -27,7 +27,8 @@ template<typename Scalar, typename Index, int StorageOrder, int UpLo, bool Conju
27
27
  struct selfadjoint_matrix_vector_product
28
28
 
29
29
  {
30
- static EIGEN_DONT_INLINE void run(
30
+ static EIGEN_DONT_INLINE EIGEN_DEVICE_FUNC
31
+ void run(
31
32
  Index size,
32
33
  const Scalar* lhs, Index lhsStride,
33
34
  const Scalar* rhs,
@@ -36,7 +37,8 @@ static EIGEN_DONT_INLINE void run(
36
37
  };
37
38
 
38
39
  template<typename Scalar, typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs, int Version>
39
- EIGEN_DONT_INLINE void selfadjoint_matrix_vector_product<Scalar,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs,Version>::run(
40
+ EIGEN_DONT_INLINE EIGEN_DEVICE_FUNC
41
+ void selfadjoint_matrix_vector_product<Scalar,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs,Version>::run(
40
42
  Index size,
41
43
  const Scalar* lhs, Index lhsStride,
42
44
  const Scalar* rhs,
@@ -62,8 +64,7 @@ EIGEN_DONT_INLINE void selfadjoint_matrix_vector_product<Scalar,Index,StorageOrd
62
64
 
63
65
  Scalar cjAlpha = ConjugateRhs ? numext::conj(alpha) : alpha;
64
66
 
65
-
66
- Index bound = (std::max)(Index(0),size-8) & 0xfffffffe;
67
+ Index bound = numext::maxi(Index(0), size-8) & 0xfffffffe;
67
68
  if (FirstTriangular)
68
69
  bound = size - bound;
69
70
 
@@ -175,7 +176,8 @@ struct selfadjoint_product_impl<Lhs,LhsMode,false,Rhs,0,true>
175
176
  enum { LhsUpLo = LhsMode&(Upper|Lower) };
176
177
 
177
178
  template<typename Dest>
178
- static void run(Dest& dest, const Lhs &a_lhs, const Rhs &a_rhs, const Scalar& alpha)
179
+ static EIGEN_DEVICE_FUNC
180
+ void run(Dest& dest, const Lhs &a_lhs, const Rhs &a_rhs, const Scalar& alpha)
179
181
  {
180
182
  typedef typename Dest::Scalar ResScalar;
181
183
  typedef typename Rhs::Scalar RhsScalar;
@@ -109,10 +109,10 @@ struct selfadjoint_product_selector<MatrixType,OtherType,UpLo,false>
109
109
  internal::general_matrix_matrix_triangular_product<Index,
110
110
  Scalar, OtherIsRowMajor ? RowMajor : ColMajor, OtherBlasTraits::NeedToConjugate && NumTraits<Scalar>::IsComplex,
111
111
  Scalar, OtherIsRowMajor ? ColMajor : RowMajor, (!OtherBlasTraits::NeedToConjugate) && NumTraits<Scalar>::IsComplex,
112
- IsRowMajor ? RowMajor : ColMajor, UpLo>
112
+ IsRowMajor ? RowMajor : ColMajor, MatrixType::InnerStrideAtCompileTime, UpLo>
113
113
  ::run(size, depth,
114
- &actualOther.coeffRef(0,0), actualOther.outerStride(), &actualOther.coeffRef(0,0), actualOther.outerStride(),
115
- mat.data(), mat.outerStride(), actualAlpha, blocking);
114
+ actualOther.data(), actualOther.outerStride(), actualOther.data(), actualOther.outerStride(),
115
+ mat.data(), mat.innerStride(), mat.outerStride(), actualAlpha, blocking);
116
116
  }
117
117
  };
118
118
 
@@ -120,7 +120,7 @@ struct selfadjoint_product_selector<MatrixType,OtherType,UpLo,false>
120
120
 
121
121
  template<typename MatrixType, unsigned int UpLo>
122
122
  template<typename DerivedU>
123
- SelfAdjointView<MatrixType,UpLo>& SelfAdjointView<MatrixType,UpLo>
123
+ EIGEN_DEVICE_FUNC SelfAdjointView<MatrixType,UpLo>& SelfAdjointView<MatrixType,UpLo>
124
124
  ::rankUpdate(const MatrixBase<DerivedU>& u, const Scalar& alpha)
125
125
  {
126
126
  selfadjoint_product_selector<MatrixType,DerivedU,UpLo>::run(_expression().const_cast_derived(), u.derived(), alpha);
@@ -24,7 +24,8 @@ struct selfadjoint_rank2_update_selector;
24
24
  template<typename Scalar, typename Index, typename UType, typename VType>
25
25
  struct selfadjoint_rank2_update_selector<Scalar,Index,UType,VType,Lower>
26
26
  {
27
- static void run(Scalar* mat, Index stride, const UType& u, const VType& v, const Scalar& alpha)
27
+ static EIGEN_DEVICE_FUNC
28
+ void run(Scalar* mat, Index stride, const UType& u, const VType& v, const Scalar& alpha)
28
29
  {
29
30
  const Index size = u.size();
30
31
  for (Index i=0; i<size; ++i)
@@ -57,7 +58,7 @@ template<bool Cond, typename T> struct conj_expr_if
57
58
 
58
59
  template<typename MatrixType, unsigned int UpLo>
59
60
  template<typename DerivedU, typename DerivedV>
60
- SelfAdjointView<MatrixType,UpLo>& SelfAdjointView<MatrixType,UpLo>
61
+ EIGEN_DEVICE_FUNC SelfAdjointView<MatrixType,UpLo>& SelfAdjointView<MatrixType,UpLo>
61
62
  ::rankUpdate(const MatrixBase<DerivedU>& u, const MatrixBase<DerivedV>& v, const Scalar& alpha)
62
63
  {
63
64
  typedef internal::blas_traits<DerivedU> UBlasTraits;
@@ -79,8 +80,8 @@ SelfAdjointView<MatrixType,UpLo>& SelfAdjointView<MatrixType,UpLo>
79
80
  if (IsRowMajor)
80
81
  actualAlpha = numext::conj(actualAlpha);
81
82
 
82
- typedef typename internal::remove_all<typename internal::conj_expr_if<IsRowMajor ^ UBlasTraits::NeedToConjugate,_ActualUType>::type>::type UType;
83
- typedef typename internal::remove_all<typename internal::conj_expr_if<IsRowMajor ^ VBlasTraits::NeedToConjugate,_ActualVType>::type>::type VType;
83
+ typedef typename internal::remove_all<typename internal::conj_expr_if<int(IsRowMajor) ^ int(UBlasTraits::NeedToConjugate), _ActualUType>::type>::type UType;
84
+ typedef typename internal::remove_all<typename internal::conj_expr_if<int(IsRowMajor) ^ int(VBlasTraits::NeedToConjugate), _ActualVType>::type>::type VType;
84
85
  internal::selfadjoint_rank2_update_selector<Scalar, Index, UType, VType,
85
86
  (IsRowMajor ? int(UpLo==Upper ? Lower : Upper) : UpLo)>
86
87
  ::run(_expression().const_cast_derived().data(),_expression().outerStride(),UType(actualU),VType(actualV),actualAlpha);