tomoto 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (420) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +3 -0
  3. data/LICENSE.txt +22 -0
  4. data/README.md +123 -0
  5. data/ext/tomoto/ext.cpp +245 -0
  6. data/ext/tomoto/extconf.rb +28 -0
  7. data/lib/tomoto.rb +12 -0
  8. data/lib/tomoto/ct.rb +11 -0
  9. data/lib/tomoto/hdp.rb +11 -0
  10. data/lib/tomoto/lda.rb +67 -0
  11. data/lib/tomoto/version.rb +3 -0
  12. data/vendor/EigenRand/EigenRand/Core.h +1139 -0
  13. data/vendor/EigenRand/EigenRand/Dists/Basic.h +111 -0
  14. data/vendor/EigenRand/EigenRand/Dists/Discrete.h +877 -0
  15. data/vendor/EigenRand/EigenRand/Dists/GammaPoisson.h +108 -0
  16. data/vendor/EigenRand/EigenRand/Dists/NormalExp.h +626 -0
  17. data/vendor/EigenRand/EigenRand/EigenRand +19 -0
  18. data/vendor/EigenRand/EigenRand/Macro.h +24 -0
  19. data/vendor/EigenRand/EigenRand/MorePacketMath.h +978 -0
  20. data/vendor/EigenRand/EigenRand/PacketFilter.h +286 -0
  21. data/vendor/EigenRand/EigenRand/PacketRandomEngine.h +624 -0
  22. data/vendor/EigenRand/EigenRand/RandUtils.h +413 -0
  23. data/vendor/EigenRand/EigenRand/doc.h +220 -0
  24. data/vendor/EigenRand/LICENSE +21 -0
  25. data/vendor/EigenRand/README.md +288 -0
  26. data/vendor/eigen/COPYING.BSD +26 -0
  27. data/vendor/eigen/COPYING.GPL +674 -0
  28. data/vendor/eigen/COPYING.LGPL +502 -0
  29. data/vendor/eigen/COPYING.MINPACK +52 -0
  30. data/vendor/eigen/COPYING.MPL2 +373 -0
  31. data/vendor/eigen/COPYING.README +18 -0
  32. data/vendor/eigen/Eigen/CMakeLists.txt +19 -0
  33. data/vendor/eigen/Eigen/Cholesky +46 -0
  34. data/vendor/eigen/Eigen/CholmodSupport +48 -0
  35. data/vendor/eigen/Eigen/Core +537 -0
  36. data/vendor/eigen/Eigen/Dense +7 -0
  37. data/vendor/eigen/Eigen/Eigen +2 -0
  38. data/vendor/eigen/Eigen/Eigenvalues +61 -0
  39. data/vendor/eigen/Eigen/Geometry +62 -0
  40. data/vendor/eigen/Eigen/Householder +30 -0
  41. data/vendor/eigen/Eigen/IterativeLinearSolvers +48 -0
  42. data/vendor/eigen/Eigen/Jacobi +33 -0
  43. data/vendor/eigen/Eigen/LU +50 -0
  44. data/vendor/eigen/Eigen/MetisSupport +35 -0
  45. data/vendor/eigen/Eigen/OrderingMethods +73 -0
  46. data/vendor/eigen/Eigen/PaStiXSupport +48 -0
  47. data/vendor/eigen/Eigen/PardisoSupport +35 -0
  48. data/vendor/eigen/Eigen/QR +51 -0
  49. data/vendor/eigen/Eigen/QtAlignedMalloc +40 -0
  50. data/vendor/eigen/Eigen/SPQRSupport +34 -0
  51. data/vendor/eigen/Eigen/SVD +51 -0
  52. data/vendor/eigen/Eigen/Sparse +36 -0
  53. data/vendor/eigen/Eigen/SparseCholesky +45 -0
  54. data/vendor/eigen/Eigen/SparseCore +69 -0
  55. data/vendor/eigen/Eigen/SparseLU +46 -0
  56. data/vendor/eigen/Eigen/SparseQR +37 -0
  57. data/vendor/eigen/Eigen/StdDeque +27 -0
  58. data/vendor/eigen/Eigen/StdList +26 -0
  59. data/vendor/eigen/Eigen/StdVector +27 -0
  60. data/vendor/eigen/Eigen/SuperLUSupport +64 -0
  61. data/vendor/eigen/Eigen/UmfPackSupport +40 -0
  62. data/vendor/eigen/Eigen/src/Cholesky/LDLT.h +673 -0
  63. data/vendor/eigen/Eigen/src/Cholesky/LLT.h +542 -0
  64. data/vendor/eigen/Eigen/src/Cholesky/LLT_LAPACKE.h +99 -0
  65. data/vendor/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +639 -0
  66. data/vendor/eigen/Eigen/src/Core/Array.h +329 -0
  67. data/vendor/eigen/Eigen/src/Core/ArrayBase.h +226 -0
  68. data/vendor/eigen/Eigen/src/Core/ArrayWrapper.h +209 -0
  69. data/vendor/eigen/Eigen/src/Core/Assign.h +90 -0
  70. data/vendor/eigen/Eigen/src/Core/AssignEvaluator.h +935 -0
  71. data/vendor/eigen/Eigen/src/Core/Assign_MKL.h +178 -0
  72. data/vendor/eigen/Eigen/src/Core/BandMatrix.h +353 -0
  73. data/vendor/eigen/Eigen/src/Core/Block.h +452 -0
  74. data/vendor/eigen/Eigen/src/Core/BooleanRedux.h +164 -0
  75. data/vendor/eigen/Eigen/src/Core/CommaInitializer.h +160 -0
  76. data/vendor/eigen/Eigen/src/Core/ConditionEstimator.h +175 -0
  77. data/vendor/eigen/Eigen/src/Core/CoreEvaluators.h +1688 -0
  78. data/vendor/eigen/Eigen/src/Core/CoreIterators.h +127 -0
  79. data/vendor/eigen/Eigen/src/Core/CwiseBinaryOp.h +184 -0
  80. data/vendor/eigen/Eigen/src/Core/CwiseNullaryOp.h +866 -0
  81. data/vendor/eigen/Eigen/src/Core/CwiseTernaryOp.h +197 -0
  82. data/vendor/eigen/Eigen/src/Core/CwiseUnaryOp.h +103 -0
  83. data/vendor/eigen/Eigen/src/Core/CwiseUnaryView.h +128 -0
  84. data/vendor/eigen/Eigen/src/Core/DenseBase.h +611 -0
  85. data/vendor/eigen/Eigen/src/Core/DenseCoeffsBase.h +681 -0
  86. data/vendor/eigen/Eigen/src/Core/DenseStorage.h +570 -0
  87. data/vendor/eigen/Eigen/src/Core/Diagonal.h +260 -0
  88. data/vendor/eigen/Eigen/src/Core/DiagonalMatrix.h +343 -0
  89. data/vendor/eigen/Eigen/src/Core/DiagonalProduct.h +28 -0
  90. data/vendor/eigen/Eigen/src/Core/Dot.h +318 -0
  91. data/vendor/eigen/Eigen/src/Core/EigenBase.h +159 -0
  92. data/vendor/eigen/Eigen/src/Core/ForceAlignedAccess.h +146 -0
  93. data/vendor/eigen/Eigen/src/Core/Fuzzy.h +155 -0
  94. data/vendor/eigen/Eigen/src/Core/GeneralProduct.h +455 -0
  95. data/vendor/eigen/Eigen/src/Core/GenericPacketMath.h +593 -0
  96. data/vendor/eigen/Eigen/src/Core/GlobalFunctions.h +187 -0
  97. data/vendor/eigen/Eigen/src/Core/IO.h +225 -0
  98. data/vendor/eigen/Eigen/src/Core/Inverse.h +118 -0
  99. data/vendor/eigen/Eigen/src/Core/Map.h +171 -0
  100. data/vendor/eigen/Eigen/src/Core/MapBase.h +303 -0
  101. data/vendor/eigen/Eigen/src/Core/MathFunctions.h +1415 -0
  102. data/vendor/eigen/Eigen/src/Core/MathFunctionsImpl.h +101 -0
  103. data/vendor/eigen/Eigen/src/Core/Matrix.h +459 -0
  104. data/vendor/eigen/Eigen/src/Core/MatrixBase.h +529 -0
  105. data/vendor/eigen/Eigen/src/Core/NestByValue.h +110 -0
  106. data/vendor/eigen/Eigen/src/Core/NoAlias.h +108 -0
  107. data/vendor/eigen/Eigen/src/Core/NumTraits.h +248 -0
  108. data/vendor/eigen/Eigen/src/Core/PermutationMatrix.h +633 -0
  109. data/vendor/eigen/Eigen/src/Core/PlainObjectBase.h +1035 -0
  110. data/vendor/eigen/Eigen/src/Core/Product.h +186 -0
  111. data/vendor/eigen/Eigen/src/Core/ProductEvaluators.h +1112 -0
  112. data/vendor/eigen/Eigen/src/Core/Random.h +182 -0
  113. data/vendor/eigen/Eigen/src/Core/Redux.h +505 -0
  114. data/vendor/eigen/Eigen/src/Core/Ref.h +283 -0
  115. data/vendor/eigen/Eigen/src/Core/Replicate.h +142 -0
  116. data/vendor/eigen/Eigen/src/Core/ReturnByValue.h +117 -0
  117. data/vendor/eigen/Eigen/src/Core/Reverse.h +211 -0
  118. data/vendor/eigen/Eigen/src/Core/Select.h +162 -0
  119. data/vendor/eigen/Eigen/src/Core/SelfAdjointView.h +352 -0
  120. data/vendor/eigen/Eigen/src/Core/SelfCwiseBinaryOp.h +47 -0
  121. data/vendor/eigen/Eigen/src/Core/Solve.h +188 -0
  122. data/vendor/eigen/Eigen/src/Core/SolveTriangular.h +235 -0
  123. data/vendor/eigen/Eigen/src/Core/SolverBase.h +130 -0
  124. data/vendor/eigen/Eigen/src/Core/StableNorm.h +221 -0
  125. data/vendor/eigen/Eigen/src/Core/Stride.h +111 -0
  126. data/vendor/eigen/Eigen/src/Core/Swap.h +67 -0
  127. data/vendor/eigen/Eigen/src/Core/Transpose.h +403 -0
  128. data/vendor/eigen/Eigen/src/Core/Transpositions.h +407 -0
  129. data/vendor/eigen/Eigen/src/Core/TriangularMatrix.h +983 -0
  130. data/vendor/eigen/Eigen/src/Core/VectorBlock.h +96 -0
  131. data/vendor/eigen/Eigen/src/Core/VectorwiseOp.h +695 -0
  132. data/vendor/eigen/Eigen/src/Core/Visitor.h +273 -0
  133. data/vendor/eigen/Eigen/src/Core/arch/AVX/Complex.h +451 -0
  134. data/vendor/eigen/Eigen/src/Core/arch/AVX/MathFunctions.h +439 -0
  135. data/vendor/eigen/Eigen/src/Core/arch/AVX/PacketMath.h +637 -0
  136. data/vendor/eigen/Eigen/src/Core/arch/AVX/TypeCasting.h +51 -0
  137. data/vendor/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h +391 -0
  138. data/vendor/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h +1316 -0
  139. data/vendor/eigen/Eigen/src/Core/arch/AltiVec/Complex.h +430 -0
  140. data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MathFunctions.h +322 -0
  141. data/vendor/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h +1061 -0
  142. data/vendor/eigen/Eigen/src/Core/arch/CUDA/Complex.h +103 -0
  143. data/vendor/eigen/Eigen/src/Core/arch/CUDA/Half.h +674 -0
  144. data/vendor/eigen/Eigen/src/Core/arch/CUDA/MathFunctions.h +91 -0
  145. data/vendor/eigen/Eigen/src/Core/arch/CUDA/PacketMath.h +333 -0
  146. data/vendor/eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +1124 -0
  147. data/vendor/eigen/Eigen/src/Core/arch/CUDA/TypeCasting.h +212 -0
  148. data/vendor/eigen/Eigen/src/Core/arch/Default/ConjHelper.h +29 -0
  149. data/vendor/eigen/Eigen/src/Core/arch/Default/Settings.h +49 -0
  150. data/vendor/eigen/Eigen/src/Core/arch/NEON/Complex.h +490 -0
  151. data/vendor/eigen/Eigen/src/Core/arch/NEON/MathFunctions.h +91 -0
  152. data/vendor/eigen/Eigen/src/Core/arch/NEON/PacketMath.h +760 -0
  153. data/vendor/eigen/Eigen/src/Core/arch/SSE/Complex.h +471 -0
  154. data/vendor/eigen/Eigen/src/Core/arch/SSE/MathFunctions.h +562 -0
  155. data/vendor/eigen/Eigen/src/Core/arch/SSE/PacketMath.h +895 -0
  156. data/vendor/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h +77 -0
  157. data/vendor/eigen/Eigen/src/Core/arch/ZVector/Complex.h +397 -0
  158. data/vendor/eigen/Eigen/src/Core/arch/ZVector/MathFunctions.h +137 -0
  159. data/vendor/eigen/Eigen/src/Core/arch/ZVector/PacketMath.h +945 -0
  160. data/vendor/eigen/Eigen/src/Core/functors/AssignmentFunctors.h +168 -0
  161. data/vendor/eigen/Eigen/src/Core/functors/BinaryFunctors.h +475 -0
  162. data/vendor/eigen/Eigen/src/Core/functors/NullaryFunctors.h +188 -0
  163. data/vendor/eigen/Eigen/src/Core/functors/StlFunctors.h +136 -0
  164. data/vendor/eigen/Eigen/src/Core/functors/TernaryFunctors.h +25 -0
  165. data/vendor/eigen/Eigen/src/Core/functors/UnaryFunctors.h +792 -0
  166. data/vendor/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h +2156 -0
  167. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h +492 -0
  168. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +311 -0
  169. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +145 -0
  170. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h +122 -0
  171. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixVector.h +619 -0
  172. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h +136 -0
  173. data/vendor/eigen/Eigen/src/Core/products/Parallelizer.h +163 -0
  174. data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +521 -0
  175. data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h +287 -0
  176. data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h +260 -0
  177. data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h +118 -0
  178. data/vendor/eigen/Eigen/src/Core/products/SelfadjointProduct.h +133 -0
  179. data/vendor/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h +93 -0
  180. data/vendor/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h +466 -0
  181. data/vendor/eigen/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h +315 -0
  182. data/vendor/eigen/Eigen/src/Core/products/TriangularMatrixVector.h +350 -0
  183. data/vendor/eigen/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h +255 -0
  184. data/vendor/eigen/Eigen/src/Core/products/TriangularSolverMatrix.h +335 -0
  185. data/vendor/eigen/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h +163 -0
  186. data/vendor/eigen/Eigen/src/Core/products/TriangularSolverVector.h +145 -0
  187. data/vendor/eigen/Eigen/src/Core/util/BlasUtil.h +398 -0
  188. data/vendor/eigen/Eigen/src/Core/util/Constants.h +547 -0
  189. data/vendor/eigen/Eigen/src/Core/util/DisableStupidWarnings.h +83 -0
  190. data/vendor/eigen/Eigen/src/Core/util/ForwardDeclarations.h +302 -0
  191. data/vendor/eigen/Eigen/src/Core/util/MKL_support.h +130 -0
  192. data/vendor/eigen/Eigen/src/Core/util/Macros.h +1001 -0
  193. data/vendor/eigen/Eigen/src/Core/util/Memory.h +993 -0
  194. data/vendor/eigen/Eigen/src/Core/util/Meta.h +534 -0
  195. data/vendor/eigen/Eigen/src/Core/util/NonMPL2.h +3 -0
  196. data/vendor/eigen/Eigen/src/Core/util/ReenableStupidWarnings.h +27 -0
  197. data/vendor/eigen/Eigen/src/Core/util/StaticAssert.h +218 -0
  198. data/vendor/eigen/Eigen/src/Core/util/XprHelper.h +821 -0
  199. data/vendor/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +346 -0
  200. data/vendor/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +459 -0
  201. data/vendor/eigen/Eigen/src/Eigenvalues/ComplexSchur_LAPACKE.h +91 -0
  202. data/vendor/eigen/Eigen/src/Eigenvalues/EigenSolver.h +622 -0
  203. data/vendor/eigen/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h +418 -0
  204. data/vendor/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +226 -0
  205. data/vendor/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +374 -0
  206. data/vendor/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +158 -0
  207. data/vendor/eigen/Eigen/src/Eigenvalues/RealQZ.h +654 -0
  208. data/vendor/eigen/Eigen/src/Eigenvalues/RealSchur.h +546 -0
  209. data/vendor/eigen/Eigen/src/Eigenvalues/RealSchur_LAPACKE.h +77 -0
  210. data/vendor/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +870 -0
  211. data/vendor/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h +87 -0
  212. data/vendor/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +556 -0
  213. data/vendor/eigen/Eigen/src/Geometry/AlignedBox.h +392 -0
  214. data/vendor/eigen/Eigen/src/Geometry/AngleAxis.h +247 -0
  215. data/vendor/eigen/Eigen/src/Geometry/EulerAngles.h +114 -0
  216. data/vendor/eigen/Eigen/src/Geometry/Homogeneous.h +497 -0
  217. data/vendor/eigen/Eigen/src/Geometry/Hyperplane.h +282 -0
  218. data/vendor/eigen/Eigen/src/Geometry/OrthoMethods.h +234 -0
  219. data/vendor/eigen/Eigen/src/Geometry/ParametrizedLine.h +195 -0
  220. data/vendor/eigen/Eigen/src/Geometry/Quaternion.h +814 -0
  221. data/vendor/eigen/Eigen/src/Geometry/Rotation2D.h +199 -0
  222. data/vendor/eigen/Eigen/src/Geometry/RotationBase.h +206 -0
  223. data/vendor/eigen/Eigen/src/Geometry/Scaling.h +170 -0
  224. data/vendor/eigen/Eigen/src/Geometry/Transform.h +1542 -0
  225. data/vendor/eigen/Eigen/src/Geometry/Translation.h +208 -0
  226. data/vendor/eigen/Eigen/src/Geometry/Umeyama.h +166 -0
  227. data/vendor/eigen/Eigen/src/Geometry/arch/Geometry_SSE.h +161 -0
  228. data/vendor/eigen/Eigen/src/Householder/BlockHouseholder.h +103 -0
  229. data/vendor/eigen/Eigen/src/Householder/Householder.h +172 -0
  230. data/vendor/eigen/Eigen/src/Householder/HouseholderSequence.h +470 -0
  231. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +226 -0
  232. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +228 -0
  233. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +246 -0
  234. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +400 -0
  235. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +462 -0
  236. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +394 -0
  237. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +216 -0
  238. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +115 -0
  239. data/vendor/eigen/Eigen/src/Jacobi/Jacobi.h +462 -0
  240. data/vendor/eigen/Eigen/src/LU/Determinant.h +101 -0
  241. data/vendor/eigen/Eigen/src/LU/FullPivLU.h +891 -0
  242. data/vendor/eigen/Eigen/src/LU/InverseImpl.h +415 -0
  243. data/vendor/eigen/Eigen/src/LU/PartialPivLU.h +611 -0
  244. data/vendor/eigen/Eigen/src/LU/PartialPivLU_LAPACKE.h +83 -0
  245. data/vendor/eigen/Eigen/src/LU/arch/Inverse_SSE.h +338 -0
  246. data/vendor/eigen/Eigen/src/MetisSupport/MetisSupport.h +137 -0
  247. data/vendor/eigen/Eigen/src/OrderingMethods/Amd.h +445 -0
  248. data/vendor/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +1843 -0
  249. data/vendor/eigen/Eigen/src/OrderingMethods/Ordering.h +157 -0
  250. data/vendor/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +678 -0
  251. data/vendor/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +543 -0
  252. data/vendor/eigen/Eigen/src/QR/ColPivHouseholderQR.h +653 -0
  253. data/vendor/eigen/Eigen/src/QR/ColPivHouseholderQR_LAPACKE.h +97 -0
  254. data/vendor/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +562 -0
  255. data/vendor/eigen/Eigen/src/QR/FullPivHouseholderQR.h +676 -0
  256. data/vendor/eigen/Eigen/src/QR/HouseholderQR.h +409 -0
  257. data/vendor/eigen/Eigen/src/QR/HouseholderQR_LAPACKE.h +68 -0
  258. data/vendor/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +313 -0
  259. data/vendor/eigen/Eigen/src/SVD/BDCSVD.h +1246 -0
  260. data/vendor/eigen/Eigen/src/SVD/JacobiSVD.h +804 -0
  261. data/vendor/eigen/Eigen/src/SVD/JacobiSVD_LAPACKE.h +91 -0
  262. data/vendor/eigen/Eigen/src/SVD/SVDBase.h +315 -0
  263. data/vendor/eigen/Eigen/src/SVD/UpperBidiagonalization.h +414 -0
  264. data/vendor/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +689 -0
  265. data/vendor/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +199 -0
  266. data/vendor/eigen/Eigen/src/SparseCore/AmbiVector.h +377 -0
  267. data/vendor/eigen/Eigen/src/SparseCore/CompressedStorage.h +258 -0
  268. data/vendor/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +352 -0
  269. data/vendor/eigen/Eigen/src/SparseCore/MappedSparseMatrix.h +67 -0
  270. data/vendor/eigen/Eigen/src/SparseCore/SparseAssign.h +216 -0
  271. data/vendor/eigen/Eigen/src/SparseCore/SparseBlock.h +603 -0
  272. data/vendor/eigen/Eigen/src/SparseCore/SparseColEtree.h +206 -0
  273. data/vendor/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +341 -0
  274. data/vendor/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +726 -0
  275. data/vendor/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +148 -0
  276. data/vendor/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +320 -0
  277. data/vendor/eigen/Eigen/src/SparseCore/SparseDiagonalProduct.h +138 -0
  278. data/vendor/eigen/Eigen/src/SparseCore/SparseDot.h +98 -0
  279. data/vendor/eigen/Eigen/src/SparseCore/SparseFuzzy.h +29 -0
  280. data/vendor/eigen/Eigen/src/SparseCore/SparseMap.h +305 -0
  281. data/vendor/eigen/Eigen/src/SparseCore/SparseMatrix.h +1403 -0
  282. data/vendor/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +405 -0
  283. data/vendor/eigen/Eigen/src/SparseCore/SparsePermutation.h +178 -0
  284. data/vendor/eigen/Eigen/src/SparseCore/SparseProduct.h +169 -0
  285. data/vendor/eigen/Eigen/src/SparseCore/SparseRedux.h +49 -0
  286. data/vendor/eigen/Eigen/src/SparseCore/SparseRef.h +397 -0
  287. data/vendor/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +656 -0
  288. data/vendor/eigen/Eigen/src/SparseCore/SparseSolverBase.h +124 -0
  289. data/vendor/eigen/Eigen/src/SparseCore/SparseSparseProductWithPruning.h +198 -0
  290. data/vendor/eigen/Eigen/src/SparseCore/SparseTranspose.h +92 -0
  291. data/vendor/eigen/Eigen/src/SparseCore/SparseTriangularView.h +189 -0
  292. data/vendor/eigen/Eigen/src/SparseCore/SparseUtil.h +178 -0
  293. data/vendor/eigen/Eigen/src/SparseCore/SparseVector.h +478 -0
  294. data/vendor/eigen/Eigen/src/SparseCore/SparseView.h +253 -0
  295. data/vendor/eigen/Eigen/src/SparseCore/TriangularSolver.h +315 -0
  296. data/vendor/eigen/Eigen/src/SparseLU/SparseLU.h +773 -0
  297. data/vendor/eigen/Eigen/src/SparseLU/SparseLUImpl.h +66 -0
  298. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +226 -0
  299. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_Structs.h +110 -0
  300. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +301 -0
  301. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_Utils.h +80 -0
  302. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_column_bmod.h +181 -0
  303. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +179 -0
  304. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h +107 -0
  305. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +280 -0
  306. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h +126 -0
  307. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_kernel_bmod.h +130 -0
  308. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +223 -0
  309. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_panel_dfs.h +258 -0
  310. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_pivotL.h +137 -0
  311. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_pruneL.h +136 -0
  312. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_relax_snode.h +83 -0
  313. data/vendor/eigen/Eigen/src/SparseQR/SparseQR.h +745 -0
  314. data/vendor/eigen/Eigen/src/StlSupport/StdDeque.h +126 -0
  315. data/vendor/eigen/Eigen/src/StlSupport/StdList.h +106 -0
  316. data/vendor/eigen/Eigen/src/StlSupport/StdVector.h +131 -0
  317. data/vendor/eigen/Eigen/src/StlSupport/details.h +84 -0
  318. data/vendor/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +1027 -0
  319. data/vendor/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +506 -0
  320. data/vendor/eigen/Eigen/src/misc/Image.h +82 -0
  321. data/vendor/eigen/Eigen/src/misc/Kernel.h +79 -0
  322. data/vendor/eigen/Eigen/src/misc/RealSvd2x2.h +55 -0
  323. data/vendor/eigen/Eigen/src/misc/blas.h +440 -0
  324. data/vendor/eigen/Eigen/src/misc/lapack.h +152 -0
  325. data/vendor/eigen/Eigen/src/misc/lapacke.h +16291 -0
  326. data/vendor/eigen/Eigen/src/misc/lapacke_mangling.h +17 -0
  327. data/vendor/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +332 -0
  328. data/vendor/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +552 -0
  329. data/vendor/eigen/Eigen/src/plugins/BlockMethods.h +1058 -0
  330. data/vendor/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.h +115 -0
  331. data/vendor/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.h +163 -0
  332. data/vendor/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +152 -0
  333. data/vendor/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +85 -0
  334. data/vendor/eigen/README.md +3 -0
  335. data/vendor/eigen/bench/README.txt +55 -0
  336. data/vendor/eigen/bench/btl/COPYING +340 -0
  337. data/vendor/eigen/bench/btl/README +154 -0
  338. data/vendor/eigen/bench/tensors/README +21 -0
  339. data/vendor/eigen/blas/README.txt +6 -0
  340. data/vendor/eigen/demos/mandelbrot/README +10 -0
  341. data/vendor/eigen/demos/mix_eigen_and_c/README +9 -0
  342. data/vendor/eigen/demos/opengl/README +13 -0
  343. data/vendor/eigen/unsupported/Eigen/CXX11/src/Tensor/README.md +1760 -0
  344. data/vendor/eigen/unsupported/README.txt +50 -0
  345. data/vendor/tomotopy/LICENSE +21 -0
  346. data/vendor/tomotopy/README.kr.rst +375 -0
  347. data/vendor/tomotopy/README.rst +382 -0
  348. data/vendor/tomotopy/src/Labeling/FoRelevance.cpp +362 -0
  349. data/vendor/tomotopy/src/Labeling/FoRelevance.h +88 -0
  350. data/vendor/tomotopy/src/Labeling/Labeler.h +50 -0
  351. data/vendor/tomotopy/src/TopicModel/CT.h +37 -0
  352. data/vendor/tomotopy/src/TopicModel/CTModel.cpp +13 -0
  353. data/vendor/tomotopy/src/TopicModel/CTModel.hpp +293 -0
  354. data/vendor/tomotopy/src/TopicModel/DMR.h +51 -0
  355. data/vendor/tomotopy/src/TopicModel/DMRModel.cpp +13 -0
  356. data/vendor/tomotopy/src/TopicModel/DMRModel.hpp +374 -0
  357. data/vendor/tomotopy/src/TopicModel/DT.h +65 -0
  358. data/vendor/tomotopy/src/TopicModel/DTM.h +22 -0
  359. data/vendor/tomotopy/src/TopicModel/DTModel.cpp +15 -0
  360. data/vendor/tomotopy/src/TopicModel/DTModel.hpp +572 -0
  361. data/vendor/tomotopy/src/TopicModel/GDMR.h +37 -0
  362. data/vendor/tomotopy/src/TopicModel/GDMRModel.cpp +14 -0
  363. data/vendor/tomotopy/src/TopicModel/GDMRModel.hpp +485 -0
  364. data/vendor/tomotopy/src/TopicModel/HDP.h +74 -0
  365. data/vendor/tomotopy/src/TopicModel/HDPModel.cpp +13 -0
  366. data/vendor/tomotopy/src/TopicModel/HDPModel.hpp +592 -0
  367. data/vendor/tomotopy/src/TopicModel/HLDA.h +40 -0
  368. data/vendor/tomotopy/src/TopicModel/HLDAModel.cpp +13 -0
  369. data/vendor/tomotopy/src/TopicModel/HLDAModel.hpp +681 -0
  370. data/vendor/tomotopy/src/TopicModel/HPA.h +27 -0
  371. data/vendor/tomotopy/src/TopicModel/HPAModel.cpp +21 -0
  372. data/vendor/tomotopy/src/TopicModel/HPAModel.hpp +588 -0
  373. data/vendor/tomotopy/src/TopicModel/LDA.h +144 -0
  374. data/vendor/tomotopy/src/TopicModel/LDACVB0Model.hpp +442 -0
  375. data/vendor/tomotopy/src/TopicModel/LDAModel.cpp +13 -0
  376. data/vendor/tomotopy/src/TopicModel/LDAModel.hpp +1058 -0
  377. data/vendor/tomotopy/src/TopicModel/LLDA.h +45 -0
  378. data/vendor/tomotopy/src/TopicModel/LLDAModel.cpp +13 -0
  379. data/vendor/tomotopy/src/TopicModel/LLDAModel.hpp +203 -0
  380. data/vendor/tomotopy/src/TopicModel/MGLDA.h +63 -0
  381. data/vendor/tomotopy/src/TopicModel/MGLDAModel.cpp +17 -0
  382. data/vendor/tomotopy/src/TopicModel/MGLDAModel.hpp +558 -0
  383. data/vendor/tomotopy/src/TopicModel/PA.h +43 -0
  384. data/vendor/tomotopy/src/TopicModel/PAModel.cpp +13 -0
  385. data/vendor/tomotopy/src/TopicModel/PAModel.hpp +467 -0
  386. data/vendor/tomotopy/src/TopicModel/PLDA.h +17 -0
  387. data/vendor/tomotopy/src/TopicModel/PLDAModel.cpp +13 -0
  388. data/vendor/tomotopy/src/TopicModel/PLDAModel.hpp +214 -0
  389. data/vendor/tomotopy/src/TopicModel/SLDA.h +54 -0
  390. data/vendor/tomotopy/src/TopicModel/SLDAModel.cpp +17 -0
  391. data/vendor/tomotopy/src/TopicModel/SLDAModel.hpp +456 -0
  392. data/vendor/tomotopy/src/TopicModel/TopicModel.hpp +692 -0
  393. data/vendor/tomotopy/src/Utils/AliasMethod.hpp +169 -0
  394. data/vendor/tomotopy/src/Utils/Dictionary.h +80 -0
  395. data/vendor/tomotopy/src/Utils/EigenAddonOps.hpp +181 -0
  396. data/vendor/tomotopy/src/Utils/LBFGS.h +202 -0
  397. data/vendor/tomotopy/src/Utils/LBFGS/LineSearchBacktracking.h +120 -0
  398. data/vendor/tomotopy/src/Utils/LBFGS/LineSearchBracketing.h +122 -0
  399. data/vendor/tomotopy/src/Utils/LBFGS/Param.h +213 -0
  400. data/vendor/tomotopy/src/Utils/LUT.hpp +82 -0
  401. data/vendor/tomotopy/src/Utils/MultiNormalDistribution.hpp +69 -0
  402. data/vendor/tomotopy/src/Utils/PolyaGamma.hpp +200 -0
  403. data/vendor/tomotopy/src/Utils/PolyaGammaHybrid.hpp +672 -0
  404. data/vendor/tomotopy/src/Utils/ThreadPool.hpp +150 -0
  405. data/vendor/tomotopy/src/Utils/Trie.hpp +220 -0
  406. data/vendor/tomotopy/src/Utils/TruncMultiNormal.hpp +94 -0
  407. data/vendor/tomotopy/src/Utils/Utils.hpp +337 -0
  408. data/vendor/tomotopy/src/Utils/avx_gamma.h +46 -0
  409. data/vendor/tomotopy/src/Utils/avx_mathfun.h +736 -0
  410. data/vendor/tomotopy/src/Utils/exception.h +28 -0
  411. data/vendor/tomotopy/src/Utils/math.h +281 -0
  412. data/vendor/tomotopy/src/Utils/rtnorm.hpp +2690 -0
  413. data/vendor/tomotopy/src/Utils/sample.hpp +192 -0
  414. data/vendor/tomotopy/src/Utils/serializer.hpp +695 -0
  415. data/vendor/tomotopy/src/Utils/slp.hpp +131 -0
  416. data/vendor/tomotopy/src/Utils/sse_gamma.h +48 -0
  417. data/vendor/tomotopy/src/Utils/sse_mathfun.h +710 -0
  418. data/vendor/tomotopy/src/Utils/text.hpp +49 -0
  419. data/vendor/tomotopy/src/Utils/tvector.hpp +543 -0
  420. metadata +531 -0
@@ -0,0 +1,118 @@
1
+ /*
2
+ Copyright (c) 2011, Intel Corporation. All rights reserved.
3
+
4
+ Redistribution and use in source and binary forms, with or without modification,
5
+ are permitted provided that the following conditions are met:
6
+
7
+ * Redistributions of source code must retain the above copyright notice, this
8
+ list of conditions and the following disclaimer.
9
+ * Redistributions in binary form must reproduce the above copyright notice,
10
+ this list of conditions and the following disclaimer in the documentation
11
+ and/or other materials provided with the distribution.
12
+ * Neither the name of Intel Corporation nor the names of its contributors may
13
+ be used to endorse or promote products derived from this software without
14
+ specific prior written permission.
15
+
16
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17
+ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
20
+ ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21
+ (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22
+ LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
23
+ ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
+
27
+ ********************************************************************************
28
+ * Content : Eigen bindings to BLAS F77
29
+ * Selfadjoint matrix-vector product functionality based on ?SYMV/HEMV.
30
+ ********************************************************************************
31
+ */
32
+
33
+ #ifndef EIGEN_SELFADJOINT_MATRIX_VECTOR_BLAS_H
34
+ #define EIGEN_SELFADJOINT_MATRIX_VECTOR_BLAS_H
35
+
36
+ namespace Eigen {
37
+
38
+ namespace internal {
39
+
40
+ /**********************************************************************
41
+ * This file implements selfadjoint matrix-vector multiplication using BLAS
42
+ **********************************************************************/
43
+
44
+ // symv/hemv specialization
45
+
46
+ template<typename Scalar, typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs>
47
+ struct selfadjoint_matrix_vector_product_symv :
48
+ selfadjoint_matrix_vector_product<Scalar,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs,BuiltIn> {};
49
+
50
+ #define EIGEN_BLAS_SYMV_SPECIALIZE(Scalar) \
51
+ template<typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs> \
52
+ struct selfadjoint_matrix_vector_product<Scalar,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs,Specialized> { \
53
+ static void run( \
54
+ Index size, const Scalar* lhs, Index lhsStride, \
55
+ const Scalar* _rhs, Scalar* res, Scalar alpha) { \
56
+ enum {\
57
+ IsColMajor = StorageOrder==ColMajor \
58
+ }; \
59
+ if (IsColMajor == ConjugateLhs) {\
60
+ selfadjoint_matrix_vector_product<Scalar,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs,BuiltIn>::run( \
61
+ size, lhs, lhsStride, _rhs, res, alpha); \
62
+ } else {\
63
+ selfadjoint_matrix_vector_product_symv<Scalar,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs>::run( \
64
+ size, lhs, lhsStride, _rhs, res, alpha); \
65
+ }\
66
+ } \
67
+ }; \
68
+
69
+ EIGEN_BLAS_SYMV_SPECIALIZE(double)
70
+ EIGEN_BLAS_SYMV_SPECIALIZE(float)
71
+ EIGEN_BLAS_SYMV_SPECIALIZE(dcomplex)
72
+ EIGEN_BLAS_SYMV_SPECIALIZE(scomplex)
73
+
74
+ #define EIGEN_BLAS_SYMV_SPECIALIZATION(EIGTYPE,BLASTYPE,BLASFUNC) \
75
+ template<typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs> \
76
+ struct selfadjoint_matrix_vector_product_symv<EIGTYPE,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs> \
77
+ { \
78
+ typedef Matrix<EIGTYPE,Dynamic,1,ColMajor> SYMVVector;\
79
+ \
80
+ static void run( \
81
+ Index size, const EIGTYPE* lhs, Index lhsStride, \
82
+ const EIGTYPE* _rhs, EIGTYPE* res, EIGTYPE alpha) \
83
+ { \
84
+ enum {\
85
+ IsRowMajor = StorageOrder==RowMajor ? 1 : 0, \
86
+ IsLower = UpLo == Lower ? 1 : 0 \
87
+ }; \
88
+ BlasIndex n=convert_index<BlasIndex>(size), lda=convert_index<BlasIndex>(lhsStride), incx=1, incy=1; \
89
+ EIGTYPE beta(1); \
90
+ const EIGTYPE *x_ptr; \
91
+ char uplo=(IsRowMajor) ? (IsLower ? 'U' : 'L') : (IsLower ? 'L' : 'U'); \
92
+ SYMVVector x_tmp; \
93
+ if (ConjugateRhs) { \
94
+ Map<const SYMVVector, 0 > map_x(_rhs,size,1); \
95
+ x_tmp=map_x.conjugate(); \
96
+ x_ptr=x_tmp.data(); \
97
+ } else x_ptr=_rhs; \
98
+ BLASFUNC(&uplo, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)lhs, &lda, (const BLASTYPE*)x_ptr, &incx, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &incy); \
99
+ }\
100
+ };
101
+
102
+ #ifdef EIGEN_USE_MKL
103
+ EIGEN_BLAS_SYMV_SPECIALIZATION(double, double, dsymv)
104
+ EIGEN_BLAS_SYMV_SPECIALIZATION(float, float, ssymv)
105
+ EIGEN_BLAS_SYMV_SPECIALIZATION(dcomplex, MKL_Complex16, zhemv)
106
+ EIGEN_BLAS_SYMV_SPECIALIZATION(scomplex, MKL_Complex8, chemv)
107
+ #else
108
+ EIGEN_BLAS_SYMV_SPECIALIZATION(double, double, dsymv_)
109
+ EIGEN_BLAS_SYMV_SPECIALIZATION(float, float, ssymv_)
110
+ EIGEN_BLAS_SYMV_SPECIALIZATION(dcomplex, double, zhemv_)
111
+ EIGEN_BLAS_SYMV_SPECIALIZATION(scomplex, float, chemv_)
112
+ #endif
113
+
114
+ } // end namespace internal
115
+
116
+ } // end namespace Eigen
117
+
118
+ #endif // EIGEN_SELFADJOINT_MATRIX_VECTOR_BLAS_H
@@ -0,0 +1,133 @@
1
+ // This file is part of Eigen, a lightweight C++ template library
2
+ // for linear algebra.
3
+ //
4
+ // Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
5
+ //
6
+ // This Source Code Form is subject to the terms of the Mozilla
7
+ // Public License v. 2.0. If a copy of the MPL was not distributed
8
+ // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9
+
10
+ #ifndef EIGEN_SELFADJOINT_PRODUCT_H
11
+ #define EIGEN_SELFADJOINT_PRODUCT_H
12
+
13
+ /**********************************************************************
14
+ * This file implements a self adjoint product: C += A A^T updating only
15
+ * half of the selfadjoint matrix C.
16
+ * It corresponds to the level 3 SYRK and level 2 SYR Blas routines.
17
+ **********************************************************************/
18
+
19
+ namespace Eigen {
20
+
21
+
22
+ template<typename Scalar, typename Index, int UpLo, bool ConjLhs, bool ConjRhs>
23
+ struct selfadjoint_rank1_update<Scalar,Index,ColMajor,UpLo,ConjLhs,ConjRhs>
24
+ {
25
+ static void run(Index size, Scalar* mat, Index stride, const Scalar* vecX, const Scalar* vecY, const Scalar& alpha)
26
+ {
27
+ internal::conj_if<ConjRhs> cj;
28
+ typedef Map<const Matrix<Scalar,Dynamic,1> > OtherMap;
29
+ typedef typename internal::conditional<ConjLhs,typename OtherMap::ConjugateReturnType,const OtherMap&>::type ConjLhsType;
30
+ for (Index i=0; i<size; ++i)
31
+ {
32
+ Map<Matrix<Scalar,Dynamic,1> >(mat+stride*i+(UpLo==Lower ? i : 0), (UpLo==Lower ? size-i : (i+1)))
33
+ += (alpha * cj(vecY[i])) * ConjLhsType(OtherMap(vecX+(UpLo==Lower ? i : 0),UpLo==Lower ? size-i : (i+1)));
34
+ }
35
+ }
36
+ };
37
+
38
+ template<typename Scalar, typename Index, int UpLo, bool ConjLhs, bool ConjRhs>
39
+ struct selfadjoint_rank1_update<Scalar,Index,RowMajor,UpLo,ConjLhs,ConjRhs>
40
+ {
41
+ static void run(Index size, Scalar* mat, Index stride, const Scalar* vecX, const Scalar* vecY, const Scalar& alpha)
42
+ {
43
+ selfadjoint_rank1_update<Scalar,Index,ColMajor,UpLo==Lower?Upper:Lower,ConjRhs,ConjLhs>::run(size,mat,stride,vecY,vecX,alpha);
44
+ }
45
+ };
46
+
47
+ template<typename MatrixType, typename OtherType, int UpLo, bool OtherIsVector = OtherType::IsVectorAtCompileTime>
48
+ struct selfadjoint_product_selector;
49
+
50
+ template<typename MatrixType, typename OtherType, int UpLo>
51
+ struct selfadjoint_product_selector<MatrixType,OtherType,UpLo,true>
52
+ {
53
+ static void run(MatrixType& mat, const OtherType& other, const typename MatrixType::Scalar& alpha)
54
+ {
55
+ typedef typename MatrixType::Scalar Scalar;
56
+ typedef internal::blas_traits<OtherType> OtherBlasTraits;
57
+ typedef typename OtherBlasTraits::DirectLinearAccessType ActualOtherType;
58
+ typedef typename internal::remove_all<ActualOtherType>::type _ActualOtherType;
59
+ typename internal::add_const_on_value_type<ActualOtherType>::type actualOther = OtherBlasTraits::extract(other.derived());
60
+
61
+ Scalar actualAlpha = alpha * OtherBlasTraits::extractScalarFactor(other.derived());
62
+
63
+ enum {
64
+ StorageOrder = (internal::traits<MatrixType>::Flags&RowMajorBit) ? RowMajor : ColMajor,
65
+ UseOtherDirectly = _ActualOtherType::InnerStrideAtCompileTime==1
66
+ };
67
+ internal::gemv_static_vector_if<Scalar,OtherType::SizeAtCompileTime,OtherType::MaxSizeAtCompileTime,!UseOtherDirectly> static_other;
68
+
69
+ ei_declare_aligned_stack_constructed_variable(Scalar, actualOtherPtr, other.size(),
70
+ (UseOtherDirectly ? const_cast<Scalar*>(actualOther.data()) : static_other.data()));
71
+
72
+ if(!UseOtherDirectly)
73
+ Map<typename _ActualOtherType::PlainObject>(actualOtherPtr, actualOther.size()) = actualOther;
74
+
75
+ selfadjoint_rank1_update<Scalar,Index,StorageOrder,UpLo,
76
+ OtherBlasTraits::NeedToConjugate && NumTraits<Scalar>::IsComplex,
77
+ (!OtherBlasTraits::NeedToConjugate) && NumTraits<Scalar>::IsComplex>
78
+ ::run(other.size(), mat.data(), mat.outerStride(), actualOtherPtr, actualOtherPtr, actualAlpha);
79
+ }
80
+ };
81
+
82
+ template<typename MatrixType, typename OtherType, int UpLo>
83
+ struct selfadjoint_product_selector<MatrixType,OtherType,UpLo,false>
84
+ {
85
+ static void run(MatrixType& mat, const OtherType& other, const typename MatrixType::Scalar& alpha)
86
+ {
87
+ typedef typename MatrixType::Scalar Scalar;
88
+ typedef internal::blas_traits<OtherType> OtherBlasTraits;
89
+ typedef typename OtherBlasTraits::DirectLinearAccessType ActualOtherType;
90
+ typedef typename internal::remove_all<ActualOtherType>::type _ActualOtherType;
91
+ typename internal::add_const_on_value_type<ActualOtherType>::type actualOther = OtherBlasTraits::extract(other.derived());
92
+
93
+ Scalar actualAlpha = alpha * OtherBlasTraits::extractScalarFactor(other.derived());
94
+
95
+ enum {
96
+ IsRowMajor = (internal::traits<MatrixType>::Flags&RowMajorBit) ? 1 : 0,
97
+ OtherIsRowMajor = _ActualOtherType::Flags&RowMajorBit ? 1 : 0
98
+ };
99
+
100
+ Index size = mat.cols();
101
+ Index depth = actualOther.cols();
102
+
103
+ typedef internal::gemm_blocking_space<IsRowMajor ? RowMajor : ColMajor,Scalar,Scalar,
104
+ MatrixType::MaxColsAtCompileTime, MatrixType::MaxColsAtCompileTime, _ActualOtherType::MaxColsAtCompileTime> BlockingType;
105
+
106
+ BlockingType blocking(size, size, depth, 1, false);
107
+
108
+
109
+ internal::general_matrix_matrix_triangular_product<Index,
110
+ Scalar, OtherIsRowMajor ? RowMajor : ColMajor, OtherBlasTraits::NeedToConjugate && NumTraits<Scalar>::IsComplex,
111
+ Scalar, OtherIsRowMajor ? ColMajor : RowMajor, (!OtherBlasTraits::NeedToConjugate) && NumTraits<Scalar>::IsComplex,
112
+ IsRowMajor ? RowMajor : ColMajor, UpLo>
113
+ ::run(size, depth,
114
+ &actualOther.coeffRef(0,0), actualOther.outerStride(), &actualOther.coeffRef(0,0), actualOther.outerStride(),
115
+ mat.data(), mat.outerStride(), actualAlpha, blocking);
116
+ }
117
+ };
118
+
119
+ // high level API
120
+
121
+ template<typename MatrixType, unsigned int UpLo>
122
+ template<typename DerivedU>
123
+ SelfAdjointView<MatrixType,UpLo>& SelfAdjointView<MatrixType,UpLo>
124
+ ::rankUpdate(const MatrixBase<DerivedU>& u, const Scalar& alpha)
125
+ {
126
+ selfadjoint_product_selector<MatrixType,DerivedU,UpLo>::run(_expression().const_cast_derived(), u.derived(), alpha);
127
+
128
+ return *this;
129
+ }
130
+
131
+ } // end namespace Eigen
132
+
133
+ #endif // EIGEN_SELFADJOINT_PRODUCT_H
@@ -0,0 +1,93 @@
1
+ // This file is part of Eigen, a lightweight C++ template library
2
+ // for linear algebra.
3
+ //
4
+ // Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
5
+ //
6
+ // This Source Code Form is subject to the terms of the Mozilla
7
+ // Public License v. 2.0. If a copy of the MPL was not distributed
8
+ // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9
+
10
+ #ifndef EIGEN_SELFADJOINTRANK2UPTADE_H
11
+ #define EIGEN_SELFADJOINTRANK2UPTADE_H
12
+
13
+ namespace Eigen {
14
+
15
+ namespace internal {
16
+
17
+ /* Optimized selfadjoint matrix += alpha * uv' + conj(alpha)*vu'
18
+ * It corresponds to the Level2 syr2 BLAS routine
19
+ */
20
+
21
+ template<typename Scalar, typename Index, typename UType, typename VType, int UpLo>
22
+ struct selfadjoint_rank2_update_selector;
23
+
24
+ template<typename Scalar, typename Index, typename UType, typename VType>
25
+ struct selfadjoint_rank2_update_selector<Scalar,Index,UType,VType,Lower>
26
+ {
27
+ static void run(Scalar* mat, Index stride, const UType& u, const VType& v, const Scalar& alpha)
28
+ {
29
+ const Index size = u.size();
30
+ for (Index i=0; i<size; ++i)
31
+ {
32
+ Map<Matrix<Scalar,Dynamic,1> >(mat+stride*i+i, size-i) +=
33
+ (numext::conj(alpha) * numext::conj(u.coeff(i))) * v.tail(size-i)
34
+ + (alpha * numext::conj(v.coeff(i))) * u.tail(size-i);
35
+ }
36
+ }
37
+ };
38
+
39
+ template<typename Scalar, typename Index, typename UType, typename VType>
40
+ struct selfadjoint_rank2_update_selector<Scalar,Index,UType,VType,Upper>
41
+ {
42
+ static void run(Scalar* mat, Index stride, const UType& u, const VType& v, const Scalar& alpha)
43
+ {
44
+ const Index size = u.size();
45
+ for (Index i=0; i<size; ++i)
46
+ Map<Matrix<Scalar,Dynamic,1> >(mat+stride*i, i+1) +=
47
+ (numext::conj(alpha) * numext::conj(u.coeff(i))) * v.head(i+1)
48
+ + (alpha * numext::conj(v.coeff(i))) * u.head(i+1);
49
+ }
50
+ };
51
+
52
+ template<bool Cond, typename T> struct conj_expr_if
53
+ : conditional<!Cond, const T&,
54
+ CwiseUnaryOp<scalar_conjugate_op<typename traits<T>::Scalar>,T> > {};
55
+
56
+ } // end namespace internal
57
+
58
+ template<typename MatrixType, unsigned int UpLo>
59
+ template<typename DerivedU, typename DerivedV>
60
+ SelfAdjointView<MatrixType,UpLo>& SelfAdjointView<MatrixType,UpLo>
61
+ ::rankUpdate(const MatrixBase<DerivedU>& u, const MatrixBase<DerivedV>& v, const Scalar& alpha)
62
+ {
63
+ typedef internal::blas_traits<DerivedU> UBlasTraits;
64
+ typedef typename UBlasTraits::DirectLinearAccessType ActualUType;
65
+ typedef typename internal::remove_all<ActualUType>::type _ActualUType;
66
+ typename internal::add_const_on_value_type<ActualUType>::type actualU = UBlasTraits::extract(u.derived());
67
+
68
+ typedef internal::blas_traits<DerivedV> VBlasTraits;
69
+ typedef typename VBlasTraits::DirectLinearAccessType ActualVType;
70
+ typedef typename internal::remove_all<ActualVType>::type _ActualVType;
71
+ typename internal::add_const_on_value_type<ActualVType>::type actualV = VBlasTraits::extract(v.derived());
72
+
73
+ // If MatrixType is row major, then we use the routine for lower triangular in the upper triangular case and
74
+ // vice versa, and take the complex conjugate of all coefficients and vector entries.
75
+
76
+ enum { IsRowMajor = (internal::traits<MatrixType>::Flags&RowMajorBit) ? 1 : 0 };
77
+ Scalar actualAlpha = alpha * UBlasTraits::extractScalarFactor(u.derived())
78
+ * numext::conj(VBlasTraits::extractScalarFactor(v.derived()));
79
+ if (IsRowMajor)
80
+ actualAlpha = numext::conj(actualAlpha);
81
+
82
+ typedef typename internal::remove_all<typename internal::conj_expr_if<IsRowMajor ^ UBlasTraits::NeedToConjugate,_ActualUType>::type>::type UType;
83
+ typedef typename internal::remove_all<typename internal::conj_expr_if<IsRowMajor ^ VBlasTraits::NeedToConjugate,_ActualVType>::type>::type VType;
84
+ internal::selfadjoint_rank2_update_selector<Scalar, Index, UType, VType,
85
+ (IsRowMajor ? int(UpLo==Upper ? Lower : Upper) : UpLo)>
86
+ ::run(_expression().const_cast_derived().data(),_expression().outerStride(),UType(actualU),VType(actualV),actualAlpha);
87
+
88
+ return *this;
89
+ }
90
+
91
+ } // end namespace Eigen
92
+
93
+ #endif // EIGEN_SELFADJOINTRANK2UPTADE_H
@@ -0,0 +1,466 @@
1
+ // This file is part of Eigen, a lightweight C++ template library
2
+ // for linear algebra.
3
+ //
4
+ // Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
5
+ //
6
+ // This Source Code Form is subject to the terms of the Mozilla
7
+ // Public License v. 2.0. If a copy of the MPL was not distributed
8
+ // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9
+
10
+ #ifndef EIGEN_TRIANGULAR_MATRIX_MATRIX_H
11
+ #define EIGEN_TRIANGULAR_MATRIX_MATRIX_H
12
+
13
+ namespace Eigen {
14
+
15
+ namespace internal {
16
+
17
+ // template<typename Scalar, int mr, int StorageOrder, bool Conjugate, int Mode>
18
+ // struct gemm_pack_lhs_triangular
19
+ // {
20
+ // Matrix<Scalar,mr,mr,
21
+ // void operator()(Scalar* blockA, const EIGEN_RESTRICT Scalar* _lhs, int lhsStride, int depth, int rows)
22
+ // {
23
+ // conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;
24
+ // const_blas_data_mapper<Scalar, StorageOrder> lhs(_lhs,lhsStride);
25
+ // int count = 0;
26
+ // const int peeled_mc = (rows/mr)*mr;
27
+ // for(int i=0; i<peeled_mc; i+=mr)
28
+ // {
29
+ // for(int k=0; k<depth; k++)
30
+ // for(int w=0; w<mr; w++)
31
+ // blockA[count++] = cj(lhs(i+w, k));
32
+ // }
33
+ // for(int i=peeled_mc; i<rows; i++)
34
+ // {
35
+ // for(int k=0; k<depth; k++)
36
+ // blockA[count++] = cj(lhs(i, k));
37
+ // }
38
+ // }
39
+ // };
40
+
41
+ /* Optimized triangular matrix * matrix (_TRMM++) product built on top of
42
+ * the general matrix matrix product.
43
+ */
44
+ template <typename Scalar, typename Index,
45
+ int Mode, bool LhsIsTriangular,
46
+ int LhsStorageOrder, bool ConjugateLhs,
47
+ int RhsStorageOrder, bool ConjugateRhs,
48
+ int ResStorageOrder, int Version = Specialized>
49
+ struct product_triangular_matrix_matrix;
50
+
51
+ template <typename Scalar, typename Index,
52
+ int Mode, bool LhsIsTriangular,
53
+ int LhsStorageOrder, bool ConjugateLhs,
54
+ int RhsStorageOrder, bool ConjugateRhs, int Version>
55
+ struct product_triangular_matrix_matrix<Scalar,Index,Mode,LhsIsTriangular,
56
+ LhsStorageOrder,ConjugateLhs,
57
+ RhsStorageOrder,ConjugateRhs,RowMajor,Version>
58
+ {
59
+ static EIGEN_STRONG_INLINE void run(
60
+ Index rows, Index cols, Index depth,
61
+ const Scalar* lhs, Index lhsStride,
62
+ const Scalar* rhs, Index rhsStride,
63
+ Scalar* res, Index resStride,
64
+ const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)
65
+ {
66
+ product_triangular_matrix_matrix<Scalar, Index,
67
+ (Mode&(UnitDiag|ZeroDiag)) | ((Mode&Upper) ? Lower : Upper),
68
+ (!LhsIsTriangular),
69
+ RhsStorageOrder==RowMajor ? ColMajor : RowMajor,
70
+ ConjugateRhs,
71
+ LhsStorageOrder==RowMajor ? ColMajor : RowMajor,
72
+ ConjugateLhs,
73
+ ColMajor>
74
+ ::run(cols, rows, depth, rhs, rhsStride, lhs, lhsStride, res, resStride, alpha, blocking);
75
+ }
76
+ };
77
+
78
+ // implements col-major += alpha * op(triangular) * op(general)
79
+ template <typename Scalar, typename Index, int Mode,
80
+ int LhsStorageOrder, bool ConjugateLhs,
81
+ int RhsStorageOrder, bool ConjugateRhs, int Version>
82
+ struct product_triangular_matrix_matrix<Scalar,Index,Mode,true,
83
+ LhsStorageOrder,ConjugateLhs,
84
+ RhsStorageOrder,ConjugateRhs,ColMajor,Version>
85
+ {
86
+
87
+ typedef gebp_traits<Scalar,Scalar> Traits;
88
+ enum {
89
+ SmallPanelWidth = 2 * EIGEN_PLAIN_ENUM_MAX(Traits::mr,Traits::nr),
90
+ IsLower = (Mode&Lower) == Lower,
91
+ SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1
92
+ };
93
+
94
+ static EIGEN_DONT_INLINE void run(
95
+ Index _rows, Index _cols, Index _depth,
96
+ const Scalar* _lhs, Index lhsStride,
97
+ const Scalar* _rhs, Index rhsStride,
98
+ Scalar* res, Index resStride,
99
+ const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking);
100
+ };
101
+
102
+ template <typename Scalar, typename Index, int Mode,
103
+ int LhsStorageOrder, bool ConjugateLhs,
104
+ int RhsStorageOrder, bool ConjugateRhs, int Version>
105
+ EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,true,
106
+ LhsStorageOrder,ConjugateLhs,
107
+ RhsStorageOrder,ConjugateRhs,ColMajor,Version>::run(
108
+ Index _rows, Index _cols, Index _depth,
109
+ const Scalar* _lhs, Index lhsStride,
110
+ const Scalar* _rhs, Index rhsStride,
111
+ Scalar* _res, Index resStride,
112
+ const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)
113
+ {
114
+ // strip zeros
115
+ Index diagSize = (std::min)(_rows,_depth);
116
+ Index rows = IsLower ? _rows : diagSize;
117
+ Index depth = IsLower ? diagSize : _depth;
118
+ Index cols = _cols;
119
+
120
+ typedef const_blas_data_mapper<Scalar, Index, LhsStorageOrder> LhsMapper;
121
+ typedef const_blas_data_mapper<Scalar, Index, RhsStorageOrder> RhsMapper;
122
+ typedef blas_data_mapper<typename Traits::ResScalar, Index, ColMajor> ResMapper;
123
+ LhsMapper lhs(_lhs,lhsStride);
124
+ RhsMapper rhs(_rhs,rhsStride);
125
+ ResMapper res(_res, resStride);
126
+
127
+ Index kc = blocking.kc(); // cache block size along the K direction
128
+ Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction
129
+ // The small panel size must not be larger than blocking size.
130
+ // Usually this should never be the case because SmallPanelWidth^2 is very small
131
+ // compared to L2 cache size, but let's be safe:
132
+ Index panelWidth = (std::min)(Index(SmallPanelWidth),(std::min)(kc,mc));
133
+
134
+ std::size_t sizeA = kc*mc;
135
+ std::size_t sizeB = kc*cols;
136
+
137
+ ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
138
+ ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
139
+
140
+ // To work around an "error: member reference base type 'Matrix<...>
141
+ // (Eigen::internal::constructor_without_unaligned_array_assert (*)())' is
142
+ // not a structure or union" compilation error in nvcc (tested V8.0.61),
143
+ // create a dummy internal::constructor_without_unaligned_array_assert
144
+ // object to pass to the Matrix constructor.
145
+ internal::constructor_without_unaligned_array_assert a;
146
+ Matrix<Scalar,SmallPanelWidth,SmallPanelWidth,LhsStorageOrder> triangularBuffer(a);
147
+ triangularBuffer.setZero();
148
+ if((Mode&ZeroDiag)==ZeroDiag)
149
+ triangularBuffer.diagonal().setZero();
150
+ else
151
+ triangularBuffer.diagonal().setOnes();
152
+
153
+ gebp_kernel<Scalar, Scalar, Index, ResMapper, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
154
+ gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
155
+ gemm_pack_rhs<Scalar, Index, RhsMapper, Traits::nr,RhsStorageOrder> pack_rhs;
156
+
157
+ for(Index k2=IsLower ? depth : 0;
158
+ IsLower ? k2>0 : k2<depth;
159
+ IsLower ? k2-=kc : k2+=kc)
160
+ {
161
+ Index actual_kc = (std::min)(IsLower ? k2 : depth-k2, kc);
162
+ Index actual_k2 = IsLower ? k2-actual_kc : k2;
163
+
164
+ // align blocks with the end of the triangular part for trapezoidal lhs
165
+ if((!IsLower)&&(k2<rows)&&(k2+actual_kc>rows))
166
+ {
167
+ actual_kc = rows-k2;
168
+ k2 = k2+actual_kc-kc;
169
+ }
170
+
171
+ pack_rhs(blockB, rhs.getSubMapper(actual_k2,0), actual_kc, cols);
172
+
173
+ // the selected lhs's panel has to be split in three different parts:
174
+ // 1 - the part which is zero => skip it
175
+ // 2 - the diagonal block => special kernel
176
+ // 3 - the dense panel below (lower case) or above (upper case) the diagonal block => GEPP
177
+
178
+ // the block diagonal, if any:
179
+ if(IsLower || actual_k2<rows)
180
+ {
181
+ // for each small vertical panels of lhs
182
+ for (Index k1=0; k1<actual_kc; k1+=panelWidth)
183
+ {
184
+ Index actualPanelWidth = std::min<Index>(actual_kc-k1, panelWidth);
185
+ Index lengthTarget = IsLower ? actual_kc-k1-actualPanelWidth : k1;
186
+ Index startBlock = actual_k2+k1;
187
+ Index blockBOffset = k1;
188
+
189
+ // => GEBP with the micro triangular block
190
+ // The trick is to pack this micro block while filling the opposite triangular part with zeros.
191
+ // To this end we do an extra triangular copy to a small temporary buffer
192
+ for (Index k=0;k<actualPanelWidth;++k)
193
+ {
194
+ if (SetDiag)
195
+ triangularBuffer.coeffRef(k,k) = lhs(startBlock+k,startBlock+k);
196
+ for (Index i=IsLower ? k+1 : 0; IsLower ? i<actualPanelWidth : i<k; ++i)
197
+ triangularBuffer.coeffRef(i,k) = lhs(startBlock+i,startBlock+k);
198
+ }
199
+ pack_lhs(blockA, LhsMapper(triangularBuffer.data(), triangularBuffer.outerStride()), actualPanelWidth, actualPanelWidth);
200
+
201
+ gebp_kernel(res.getSubMapper(startBlock, 0), blockA, blockB,
202
+ actualPanelWidth, actualPanelWidth, cols, alpha,
203
+ actualPanelWidth, actual_kc, 0, blockBOffset);
204
+
205
+ // GEBP with remaining micro panel
206
+ if (lengthTarget>0)
207
+ {
208
+ Index startTarget = IsLower ? actual_k2+k1+actualPanelWidth : actual_k2;
209
+
210
+ pack_lhs(blockA, lhs.getSubMapper(startTarget,startBlock), actualPanelWidth, lengthTarget);
211
+
212
+ gebp_kernel(res.getSubMapper(startTarget, 0), blockA, blockB,
213
+ lengthTarget, actualPanelWidth, cols, alpha,
214
+ actualPanelWidth, actual_kc, 0, blockBOffset);
215
+ }
216
+ }
217
+ }
218
+ // the part below (lower case) or above (upper case) the diagonal => GEPP
219
+ {
220
+ Index start = IsLower ? k2 : 0;
221
+ Index end = IsLower ? rows : (std::min)(actual_k2,rows);
222
+ for(Index i2=start; i2<end; i2+=mc)
223
+ {
224
+ const Index actual_mc = (std::min)(i2+mc,end)-i2;
225
+ gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr,Traits::LhsProgress, LhsStorageOrder,false>()
226
+ (blockA, lhs.getSubMapper(i2, actual_k2), actual_kc, actual_mc);
227
+
228
+ gebp_kernel(res.getSubMapper(i2, 0), blockA, blockB, actual_mc,
229
+ actual_kc, cols, alpha, -1, -1, 0, 0);
230
+ }
231
+ }
232
+ }
233
+ }
234
+
235
+ // implements col-major += alpha * op(general) * op(triangular)
236
+ template <typename Scalar, typename Index, int Mode,
237
+ int LhsStorageOrder, bool ConjugateLhs,
238
+ int RhsStorageOrder, bool ConjugateRhs, int Version>
239
+ struct product_triangular_matrix_matrix<Scalar,Index,Mode,false,
240
+ LhsStorageOrder,ConjugateLhs,
241
+ RhsStorageOrder,ConjugateRhs,ColMajor,Version>
242
+ {
243
+ typedef gebp_traits<Scalar,Scalar> Traits;
244
+ enum {
245
+ SmallPanelWidth = EIGEN_PLAIN_ENUM_MAX(Traits::mr,Traits::nr),
246
+ IsLower = (Mode&Lower) == Lower,
247
+ SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1
248
+ };
249
+
250
+ static EIGEN_DONT_INLINE void run(
251
+ Index _rows, Index _cols, Index _depth,
252
+ const Scalar* _lhs, Index lhsStride,
253
+ const Scalar* _rhs, Index rhsStride,
254
+ Scalar* res, Index resStride,
255
+ const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking);
256
+ };
257
+
258
+ template <typename Scalar, typename Index, int Mode,
259
+ int LhsStorageOrder, bool ConjugateLhs,
260
+ int RhsStorageOrder, bool ConjugateRhs, int Version>
261
+ EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,false,
262
+ LhsStorageOrder,ConjugateLhs,
263
+ RhsStorageOrder,ConjugateRhs,ColMajor,Version>::run(
264
+ Index _rows, Index _cols, Index _depth,
265
+ const Scalar* _lhs, Index lhsStride,
266
+ const Scalar* _rhs, Index rhsStride,
267
+ Scalar* _res, Index resStride,
268
+ const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)
269
+ {
270
+ const Index PacketBytes = packet_traits<Scalar>::size*sizeof(Scalar);
271
+ // strip zeros
272
+ Index diagSize = (std::min)(_cols,_depth);
273
+ Index rows = _rows;
274
+ Index depth = IsLower ? _depth : diagSize;
275
+ Index cols = IsLower ? diagSize : _cols;
276
+
277
+ typedef const_blas_data_mapper<Scalar, Index, LhsStorageOrder> LhsMapper;
278
+ typedef const_blas_data_mapper<Scalar, Index, RhsStorageOrder> RhsMapper;
279
+ typedef blas_data_mapper<typename Traits::ResScalar, Index, ColMajor> ResMapper;
280
+ LhsMapper lhs(_lhs,lhsStride);
281
+ RhsMapper rhs(_rhs,rhsStride);
282
+ ResMapper res(_res, resStride);
283
+
284
+ Index kc = blocking.kc(); // cache block size along the K direction
285
+ Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction
286
+
287
+ std::size_t sizeA = kc*mc;
288
+ std::size_t sizeB = kc*cols+EIGEN_MAX_ALIGN_BYTES/sizeof(Scalar);
289
+
290
+ ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
291
+ ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
292
+
293
+ internal::constructor_without_unaligned_array_assert a;
294
+ Matrix<Scalar,SmallPanelWidth,SmallPanelWidth,RhsStorageOrder> triangularBuffer(a);
295
+ triangularBuffer.setZero();
296
+ if((Mode&ZeroDiag)==ZeroDiag)
297
+ triangularBuffer.diagonal().setZero();
298
+ else
299
+ triangularBuffer.diagonal().setOnes();
300
+
301
+ gebp_kernel<Scalar, Scalar, Index, ResMapper, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
302
+ gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
303
+ gemm_pack_rhs<Scalar, Index, RhsMapper, Traits::nr,RhsStorageOrder> pack_rhs;
304
+ gemm_pack_rhs<Scalar, Index, RhsMapper, Traits::nr,RhsStorageOrder,false,true> pack_rhs_panel;
305
+
306
+ for(Index k2=IsLower ? 0 : depth;
307
+ IsLower ? k2<depth : k2>0;
308
+ IsLower ? k2+=kc : k2-=kc)
309
+ {
310
+ Index actual_kc = (std::min)(IsLower ? depth-k2 : k2, kc);
311
+ Index actual_k2 = IsLower ? k2 : k2-actual_kc;
312
+
313
+ // align blocks with the end of the triangular part for trapezoidal rhs
314
+ if(IsLower && (k2<cols) && (actual_k2+actual_kc>cols))
315
+ {
316
+ actual_kc = cols-k2;
317
+ k2 = actual_k2 + actual_kc - kc;
318
+ }
319
+
320
+ // remaining size
321
+ Index rs = IsLower ? (std::min)(cols,actual_k2) : cols - k2;
322
+ // size of the triangular part
323
+ Index ts = (IsLower && actual_k2>=cols) ? 0 : actual_kc;
324
+
325
+ Scalar* geb = blockB+ts*ts;
326
+ geb = geb + internal::first_aligned<PacketBytes>(geb,PacketBytes/sizeof(Scalar));
327
+
328
+ pack_rhs(geb, rhs.getSubMapper(actual_k2,IsLower ? 0 : k2), actual_kc, rs);
329
+
330
+ // pack the triangular part of the rhs padding the unrolled blocks with zeros
331
+ if(ts>0)
332
+ {
333
+ for (Index j2=0; j2<actual_kc; j2+=SmallPanelWidth)
334
+ {
335
+ Index actualPanelWidth = std::min<Index>(actual_kc-j2, SmallPanelWidth);
336
+ Index actual_j2 = actual_k2 + j2;
337
+ Index panelOffset = IsLower ? j2+actualPanelWidth : 0;
338
+ Index panelLength = IsLower ? actual_kc-j2-actualPanelWidth : j2;
339
+ // general part
340
+ pack_rhs_panel(blockB+j2*actual_kc,
341
+ rhs.getSubMapper(actual_k2+panelOffset, actual_j2),
342
+ panelLength, actualPanelWidth,
343
+ actual_kc, panelOffset);
344
+
345
+ // append the triangular part via a temporary buffer
346
+ for (Index j=0;j<actualPanelWidth;++j)
347
+ {
348
+ if (SetDiag)
349
+ triangularBuffer.coeffRef(j,j) = rhs(actual_j2+j,actual_j2+j);
350
+ for (Index k=IsLower ? j+1 : 0; IsLower ? k<actualPanelWidth : k<j; ++k)
351
+ triangularBuffer.coeffRef(k,j) = rhs(actual_j2+k,actual_j2+j);
352
+ }
353
+
354
+ pack_rhs_panel(blockB+j2*actual_kc,
355
+ RhsMapper(triangularBuffer.data(), triangularBuffer.outerStride()),
356
+ actualPanelWidth, actualPanelWidth,
357
+ actual_kc, j2);
358
+ }
359
+ }
360
+
361
+ for (Index i2=0; i2<rows; i2+=mc)
362
+ {
363
+ const Index actual_mc = (std::min)(mc,rows-i2);
364
+ pack_lhs(blockA, lhs.getSubMapper(i2, actual_k2), actual_kc, actual_mc);
365
+
366
+ // triangular kernel
367
+ if(ts>0)
368
+ {
369
+ for (Index j2=0; j2<actual_kc; j2+=SmallPanelWidth)
370
+ {
371
+ Index actualPanelWidth = std::min<Index>(actual_kc-j2, SmallPanelWidth);
372
+ Index panelLength = IsLower ? actual_kc-j2 : j2+actualPanelWidth;
373
+ Index blockOffset = IsLower ? j2 : 0;
374
+
375
+ gebp_kernel(res.getSubMapper(i2, actual_k2 + j2),
376
+ blockA, blockB+j2*actual_kc,
377
+ actual_mc, panelLength, actualPanelWidth,
378
+ alpha,
379
+ actual_kc, actual_kc, // strides
380
+ blockOffset, blockOffset);// offsets
381
+ }
382
+ }
383
+ gebp_kernel(res.getSubMapper(i2, IsLower ? 0 : k2),
384
+ blockA, geb, actual_mc, actual_kc, rs,
385
+ alpha,
386
+ -1, -1, 0, 0);
387
+ }
388
+ }
389
+ }
390
+
391
+ /***************************************************************************
392
+ * Wrapper to product_triangular_matrix_matrix
393
+ ***************************************************************************/
394
+
395
+ } // end namespace internal
396
+
397
+ namespace internal {
398
+ template<int Mode, bool LhsIsTriangular, typename Lhs, typename Rhs>
399
+ struct triangular_product_impl<Mode,LhsIsTriangular,Lhs,false,Rhs,false>
400
+ {
401
+ template<typename Dest> static void run(Dest& dst, const Lhs &a_lhs, const Rhs &a_rhs, const typename Dest::Scalar& alpha)
402
+ {
403
+ typedef typename Lhs::Scalar LhsScalar;
404
+ typedef typename Rhs::Scalar RhsScalar;
405
+ typedef typename Dest::Scalar Scalar;
406
+
407
+ typedef internal::blas_traits<Lhs> LhsBlasTraits;
408
+ typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType;
409
+ typedef typename internal::remove_all<ActualLhsType>::type ActualLhsTypeCleaned;
410
+ typedef internal::blas_traits<Rhs> RhsBlasTraits;
411
+ typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;
412
+ typedef typename internal::remove_all<ActualRhsType>::type ActualRhsTypeCleaned;
413
+
414
+ typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(a_lhs);
415
+ typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(a_rhs);
416
+
417
+ LhsScalar lhs_alpha = LhsBlasTraits::extractScalarFactor(a_lhs);
418
+ RhsScalar rhs_alpha = RhsBlasTraits::extractScalarFactor(a_rhs);
419
+ Scalar actualAlpha = alpha * lhs_alpha * rhs_alpha;
420
+
421
+ typedef internal::gemm_blocking_space<(Dest::Flags&RowMajorBit) ? RowMajor : ColMajor,Scalar,Scalar,
422
+ Lhs::MaxRowsAtCompileTime, Rhs::MaxColsAtCompileTime, Lhs::MaxColsAtCompileTime,4> BlockingType;
423
+
424
+ enum { IsLower = (Mode&Lower) == Lower };
425
+ Index stripedRows = ((!LhsIsTriangular) || (IsLower)) ? lhs.rows() : (std::min)(lhs.rows(),lhs.cols());
426
+ Index stripedCols = ((LhsIsTriangular) || (!IsLower)) ? rhs.cols() : (std::min)(rhs.cols(),rhs.rows());
427
+ Index stripedDepth = LhsIsTriangular ? ((!IsLower) ? lhs.cols() : (std::min)(lhs.cols(),lhs.rows()))
428
+ : ((IsLower) ? rhs.rows() : (std::min)(rhs.rows(),rhs.cols()));
429
+
430
+ BlockingType blocking(stripedRows, stripedCols, stripedDepth, 1, false);
431
+
432
+ internal::product_triangular_matrix_matrix<Scalar, Index,
433
+ Mode, LhsIsTriangular,
434
+ (internal::traits<ActualLhsTypeCleaned>::Flags&RowMajorBit) ? RowMajor : ColMajor, LhsBlasTraits::NeedToConjugate,
435
+ (internal::traits<ActualRhsTypeCleaned>::Flags&RowMajorBit) ? RowMajor : ColMajor, RhsBlasTraits::NeedToConjugate,
436
+ (internal::traits<Dest >::Flags&RowMajorBit) ? RowMajor : ColMajor>
437
+ ::run(
438
+ stripedRows, stripedCols, stripedDepth, // sizes
439
+ &lhs.coeffRef(0,0), lhs.outerStride(), // lhs info
440
+ &rhs.coeffRef(0,0), rhs.outerStride(), // rhs info
441
+ &dst.coeffRef(0,0), dst.outerStride(), // result info
442
+ actualAlpha, blocking
443
+ );
444
+
445
+ // Apply correction if the diagonal is unit and a scalar factor was nested:
446
+ if ((Mode&UnitDiag)==UnitDiag)
447
+ {
448
+ if (LhsIsTriangular && lhs_alpha!=LhsScalar(1))
449
+ {
450
+ Index diagSize = (std::min)(lhs.rows(),lhs.cols());
451
+ dst.topRows(diagSize) -= ((lhs_alpha-LhsScalar(1))*a_rhs).topRows(diagSize);
452
+ }
453
+ else if ((!LhsIsTriangular) && rhs_alpha!=RhsScalar(1))
454
+ {
455
+ Index diagSize = (std::min)(rhs.rows(),rhs.cols());
456
+ dst.leftCols(diagSize) -= (rhs_alpha-RhsScalar(1))*a_lhs.leftCols(diagSize);
457
+ }
458
+ }
459
+ }
460
+ };
461
+
462
+ } // end namespace internal
463
+
464
+ } // end namespace Eigen
465
+
466
+ #endif // EIGEN_TRIANGULAR_MATRIX_MATRIX_H