tomoto 0.2.2 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (369) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +10 -0
  3. data/README.md +8 -10
  4. data/ext/tomoto/ct.cpp +11 -11
  5. data/ext/tomoto/dmr.cpp +14 -13
  6. data/ext/tomoto/dt.cpp +14 -14
  7. data/ext/tomoto/extconf.rb +7 -5
  8. data/ext/tomoto/gdmr.cpp +7 -7
  9. data/ext/tomoto/hdp.cpp +9 -9
  10. data/ext/tomoto/hlda.cpp +13 -13
  11. data/ext/tomoto/hpa.cpp +5 -5
  12. data/ext/tomoto/lda.cpp +42 -39
  13. data/ext/tomoto/llda.cpp +6 -6
  14. data/ext/tomoto/mglda.cpp +15 -15
  15. data/ext/tomoto/pa.cpp +6 -6
  16. data/ext/tomoto/plda.cpp +6 -6
  17. data/ext/tomoto/slda.cpp +8 -8
  18. data/ext/tomoto/{ext.cpp → tomoto.cpp} +8 -8
  19. data/ext/tomoto/utils.h +16 -70
  20. data/lib/tomoto/version.rb +1 -1
  21. data/lib/tomoto.rb +5 -1
  22. data/vendor/EigenRand/EigenRand/Core.h +10 -10
  23. data/vendor/EigenRand/EigenRand/Dists/Basic.h +208 -9
  24. data/vendor/EigenRand/EigenRand/Dists/Discrete.h +52 -31
  25. data/vendor/EigenRand/EigenRand/Dists/GammaPoisson.h +9 -8
  26. data/vendor/EigenRand/EigenRand/Dists/NormalExp.h +28 -21
  27. data/vendor/EigenRand/EigenRand/EigenRand +11 -6
  28. data/vendor/EigenRand/EigenRand/Macro.h +13 -7
  29. data/vendor/EigenRand/EigenRand/MorePacketMath.h +348 -740
  30. data/vendor/EigenRand/EigenRand/MvDists/Multinomial.h +5 -3
  31. data/vendor/EigenRand/EigenRand/MvDists/MvNormal.h +9 -3
  32. data/vendor/EigenRand/EigenRand/PacketFilter.h +11 -253
  33. data/vendor/EigenRand/EigenRand/PacketRandomEngine.h +21 -47
  34. data/vendor/EigenRand/EigenRand/RandUtils.h +50 -344
  35. data/vendor/EigenRand/EigenRand/arch/AVX/MorePacketMath.h +619 -0
  36. data/vendor/EigenRand/EigenRand/arch/AVX/PacketFilter.h +149 -0
  37. data/vendor/EigenRand/EigenRand/arch/AVX/RandUtils.h +228 -0
  38. data/vendor/EigenRand/EigenRand/arch/NEON/MorePacketMath.h +473 -0
  39. data/vendor/EigenRand/EigenRand/arch/NEON/PacketFilter.h +142 -0
  40. data/vendor/EigenRand/EigenRand/arch/NEON/RandUtils.h +126 -0
  41. data/vendor/EigenRand/EigenRand/arch/SSE/MorePacketMath.h +501 -0
  42. data/vendor/EigenRand/EigenRand/arch/SSE/PacketFilter.h +133 -0
  43. data/vendor/EigenRand/EigenRand/arch/SSE/RandUtils.h +120 -0
  44. data/vendor/EigenRand/EigenRand/doc.h +24 -12
  45. data/vendor/EigenRand/README.md +57 -4
  46. data/vendor/eigen/COPYING.APACHE +203 -0
  47. data/vendor/eigen/COPYING.BSD +1 -1
  48. data/vendor/eigen/COPYING.MINPACK +51 -52
  49. data/vendor/eigen/Eigen/Cholesky +0 -1
  50. data/vendor/eigen/Eigen/Core +112 -265
  51. data/vendor/eigen/Eigen/Eigenvalues +2 -3
  52. data/vendor/eigen/Eigen/Geometry +5 -8
  53. data/vendor/eigen/Eigen/Householder +0 -1
  54. data/vendor/eigen/Eigen/Jacobi +0 -1
  55. data/vendor/eigen/Eigen/KLUSupport +41 -0
  56. data/vendor/eigen/Eigen/LU +2 -5
  57. data/vendor/eigen/Eigen/OrderingMethods +0 -3
  58. data/vendor/eigen/Eigen/PaStiXSupport +1 -0
  59. data/vendor/eigen/Eigen/PardisoSupport +0 -0
  60. data/vendor/eigen/Eigen/QR +2 -3
  61. data/vendor/eigen/Eigen/QtAlignedMalloc +0 -1
  62. data/vendor/eigen/Eigen/SVD +0 -1
  63. data/vendor/eigen/Eigen/Sparse +0 -2
  64. data/vendor/eigen/Eigen/SparseCholesky +0 -8
  65. data/vendor/eigen/Eigen/SparseLU +4 -0
  66. data/vendor/eigen/Eigen/SparseQR +0 -1
  67. data/vendor/eigen/Eigen/src/Cholesky/LDLT.h +42 -27
  68. data/vendor/eigen/Eigen/src/Cholesky/LLT.h +39 -23
  69. data/vendor/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +90 -47
  70. data/vendor/eigen/Eigen/src/Core/ArithmeticSequence.h +413 -0
  71. data/vendor/eigen/Eigen/src/Core/Array.h +99 -11
  72. data/vendor/eigen/Eigen/src/Core/ArrayBase.h +3 -3
  73. data/vendor/eigen/Eigen/src/Core/ArrayWrapper.h +21 -21
  74. data/vendor/eigen/Eigen/src/Core/Assign.h +1 -1
  75. data/vendor/eigen/Eigen/src/Core/AssignEvaluator.h +125 -50
  76. data/vendor/eigen/Eigen/src/Core/Assign_MKL.h +10 -10
  77. data/vendor/eigen/Eigen/src/Core/BandMatrix.h +16 -16
  78. data/vendor/eigen/Eigen/src/Core/Block.h +56 -60
  79. data/vendor/eigen/Eigen/src/Core/BooleanRedux.h +29 -31
  80. data/vendor/eigen/Eigen/src/Core/CommaInitializer.h +7 -3
  81. data/vendor/eigen/Eigen/src/Core/CoreEvaluators.h +325 -272
  82. data/vendor/eigen/Eigen/src/Core/CoreIterators.h +5 -0
  83. data/vendor/eigen/Eigen/src/Core/CwiseBinaryOp.h +21 -22
  84. data/vendor/eigen/Eigen/src/Core/CwiseNullaryOp.h +153 -18
  85. data/vendor/eigen/Eigen/src/Core/CwiseUnaryOp.h +6 -6
  86. data/vendor/eigen/Eigen/src/Core/CwiseUnaryView.h +14 -10
  87. data/vendor/eigen/Eigen/src/Core/DenseBase.h +132 -42
  88. data/vendor/eigen/Eigen/src/Core/DenseCoeffsBase.h +25 -21
  89. data/vendor/eigen/Eigen/src/Core/DenseStorage.h +153 -71
  90. data/vendor/eigen/Eigen/src/Core/Diagonal.h +21 -23
  91. data/vendor/eigen/Eigen/src/Core/DiagonalMatrix.h +50 -2
  92. data/vendor/eigen/Eigen/src/Core/DiagonalProduct.h +1 -1
  93. data/vendor/eigen/Eigen/src/Core/Dot.h +10 -10
  94. data/vendor/eigen/Eigen/src/Core/EigenBase.h +10 -9
  95. data/vendor/eigen/Eigen/src/Core/ForceAlignedAccess.h +8 -4
  96. data/vendor/eigen/Eigen/src/Core/Fuzzy.h +3 -3
  97. data/vendor/eigen/Eigen/src/Core/GeneralProduct.h +20 -10
  98. data/vendor/eigen/Eigen/src/Core/GenericPacketMath.h +599 -152
  99. data/vendor/eigen/Eigen/src/Core/GlobalFunctions.h +40 -33
  100. data/vendor/eigen/Eigen/src/Core/IO.h +40 -7
  101. data/vendor/eigen/Eigen/src/Core/IndexedView.h +237 -0
  102. data/vendor/eigen/Eigen/src/Core/Inverse.h +9 -10
  103. data/vendor/eigen/Eigen/src/Core/Map.h +7 -7
  104. data/vendor/eigen/Eigen/src/Core/MapBase.h +10 -3
  105. data/vendor/eigen/Eigen/src/Core/MathFunctions.h +767 -125
  106. data/vendor/eigen/Eigen/src/Core/MathFunctionsImpl.h +118 -19
  107. data/vendor/eigen/Eigen/src/Core/Matrix.h +131 -25
  108. data/vendor/eigen/Eigen/src/Core/MatrixBase.h +21 -3
  109. data/vendor/eigen/Eigen/src/Core/NestByValue.h +25 -50
  110. data/vendor/eigen/Eigen/src/Core/NoAlias.h +4 -3
  111. data/vendor/eigen/Eigen/src/Core/NumTraits.h +107 -20
  112. data/vendor/eigen/Eigen/src/Core/PartialReduxEvaluator.h +232 -0
  113. data/vendor/eigen/Eigen/src/Core/PermutationMatrix.h +3 -31
  114. data/vendor/eigen/Eigen/src/Core/PlainObjectBase.h +152 -59
  115. data/vendor/eigen/Eigen/src/Core/Product.h +30 -25
  116. data/vendor/eigen/Eigen/src/Core/ProductEvaluators.h +192 -125
  117. data/vendor/eigen/Eigen/src/Core/Random.h +37 -1
  118. data/vendor/eigen/Eigen/src/Core/Redux.h +180 -170
  119. data/vendor/eigen/Eigen/src/Core/Ref.h +121 -23
  120. data/vendor/eigen/Eigen/src/Core/Replicate.h +8 -8
  121. data/vendor/eigen/Eigen/src/Core/Reshaped.h +454 -0
  122. data/vendor/eigen/Eigen/src/Core/ReturnByValue.h +7 -5
  123. data/vendor/eigen/Eigen/src/Core/Reverse.h +18 -12
  124. data/vendor/eigen/Eigen/src/Core/Select.h +8 -6
  125. data/vendor/eigen/Eigen/src/Core/SelfAdjointView.h +33 -20
  126. data/vendor/eigen/Eigen/src/Core/Solve.h +14 -14
  127. data/vendor/eigen/Eigen/src/Core/SolveTriangular.h +16 -16
  128. data/vendor/eigen/Eigen/src/Core/SolverBase.h +41 -3
  129. data/vendor/eigen/Eigen/src/Core/StableNorm.h +100 -70
  130. data/vendor/eigen/Eigen/src/Core/StlIterators.h +463 -0
  131. data/vendor/eigen/Eigen/src/Core/Stride.h +9 -4
  132. data/vendor/eigen/Eigen/src/Core/Swap.h +5 -4
  133. data/vendor/eigen/Eigen/src/Core/Transpose.h +88 -27
  134. data/vendor/eigen/Eigen/src/Core/Transpositions.h +26 -47
  135. data/vendor/eigen/Eigen/src/Core/TriangularMatrix.h +93 -75
  136. data/vendor/eigen/Eigen/src/Core/VectorBlock.h +5 -5
  137. data/vendor/eigen/Eigen/src/Core/VectorwiseOp.h +159 -70
  138. data/vendor/eigen/Eigen/src/Core/Visitor.h +137 -29
  139. data/vendor/eigen/Eigen/src/Core/arch/AVX/Complex.h +50 -129
  140. data/vendor/eigen/Eigen/src/Core/arch/AVX/MathFunctions.h +126 -337
  141. data/vendor/eigen/Eigen/src/Core/arch/AVX/PacketMath.h +1092 -155
  142. data/vendor/eigen/Eigen/src/Core/arch/AVX/TypeCasting.h +65 -1
  143. data/vendor/eigen/Eigen/src/Core/arch/AVX512/Complex.h +422 -0
  144. data/vendor/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h +207 -236
  145. data/vendor/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h +1482 -495
  146. data/vendor/eigen/Eigen/src/Core/arch/AVX512/TypeCasting.h +89 -0
  147. data/vendor/eigen/Eigen/src/Core/arch/AltiVec/Complex.h +152 -165
  148. data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MathFunctions.h +19 -251
  149. data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MatrixProduct.h +2937 -0
  150. data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h +221 -0
  151. data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h +629 -0
  152. data/vendor/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h +2042 -392
  153. data/vendor/eigen/Eigen/src/Core/arch/CUDA/Complex.h +235 -80
  154. data/vendor/eigen/Eigen/src/Core/arch/Default/BFloat16.h +700 -0
  155. data/vendor/eigen/Eigen/src/Core/arch/Default/ConjHelper.h +102 -14
  156. data/vendor/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +1649 -0
  157. data/vendor/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +110 -0
  158. data/vendor/eigen/Eigen/src/Core/arch/Default/Half.h +942 -0
  159. data/vendor/eigen/Eigen/src/Core/arch/Default/Settings.h +1 -1
  160. data/vendor/eigen/Eigen/src/Core/arch/Default/TypeCasting.h +120 -0
  161. data/vendor/eigen/Eigen/src/Core/arch/{CUDA → GPU}/MathFunctions.h +16 -4
  162. data/vendor/eigen/Eigen/src/Core/arch/GPU/PacketMath.h +1685 -0
  163. data/vendor/eigen/Eigen/src/Core/arch/GPU/TypeCasting.h +80 -0
  164. data/vendor/eigen/Eigen/src/Core/arch/HIP/hcc/math_constants.h +23 -0
  165. data/vendor/eigen/Eigen/src/Core/arch/MSA/Complex.h +648 -0
  166. data/vendor/eigen/Eigen/src/Core/arch/MSA/MathFunctions.h +387 -0
  167. data/vendor/eigen/Eigen/src/Core/arch/MSA/PacketMath.h +1233 -0
  168. data/vendor/eigen/Eigen/src/Core/arch/NEON/Complex.h +313 -219
  169. data/vendor/eigen/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h +183 -0
  170. data/vendor/eigen/Eigen/src/Core/arch/NEON/MathFunctions.h +54 -70
  171. data/vendor/eigen/Eigen/src/Core/arch/NEON/PacketMath.h +4376 -549
  172. data/vendor/eigen/Eigen/src/Core/arch/NEON/TypeCasting.h +1419 -0
  173. data/vendor/eigen/Eigen/src/Core/arch/SSE/Complex.h +59 -179
  174. data/vendor/eigen/Eigen/src/Core/arch/SSE/MathFunctions.h +65 -428
  175. data/vendor/eigen/Eigen/src/Core/arch/SSE/PacketMath.h +893 -283
  176. data/vendor/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h +65 -0
  177. data/vendor/eigen/Eigen/src/Core/arch/SVE/MathFunctions.h +44 -0
  178. data/vendor/eigen/Eigen/src/Core/arch/SVE/PacketMath.h +752 -0
  179. data/vendor/eigen/Eigen/src/Core/arch/SVE/TypeCasting.h +49 -0
  180. data/vendor/eigen/Eigen/src/Core/arch/SYCL/InteropHeaders.h +232 -0
  181. data/vendor/eigen/Eigen/src/Core/arch/SYCL/MathFunctions.h +301 -0
  182. data/vendor/eigen/Eigen/src/Core/arch/SYCL/PacketMath.h +670 -0
  183. data/vendor/eigen/Eigen/src/Core/arch/SYCL/SyclMemoryModel.h +694 -0
  184. data/vendor/eigen/Eigen/src/Core/arch/SYCL/TypeCasting.h +85 -0
  185. data/vendor/eigen/Eigen/src/Core/arch/ZVector/Complex.h +212 -183
  186. data/vendor/eigen/Eigen/src/Core/arch/ZVector/MathFunctions.h +101 -5
  187. data/vendor/eigen/Eigen/src/Core/arch/ZVector/PacketMath.h +510 -395
  188. data/vendor/eigen/Eigen/src/Core/functors/AssignmentFunctors.h +11 -2
  189. data/vendor/eigen/Eigen/src/Core/functors/BinaryFunctors.h +112 -46
  190. data/vendor/eigen/Eigen/src/Core/functors/NullaryFunctors.h +31 -30
  191. data/vendor/eigen/Eigen/src/Core/functors/StlFunctors.h +32 -2
  192. data/vendor/eigen/Eigen/src/Core/functors/UnaryFunctors.h +355 -16
  193. data/vendor/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h +1075 -586
  194. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h +49 -24
  195. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +41 -35
  196. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +6 -6
  197. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h +4 -2
  198. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixVector.h +382 -483
  199. data/vendor/eigen/Eigen/src/Core/products/Parallelizer.h +22 -5
  200. data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +53 -30
  201. data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h +16 -8
  202. data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h +8 -6
  203. data/vendor/eigen/Eigen/src/Core/products/SelfadjointProduct.h +4 -4
  204. data/vendor/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h +5 -4
  205. data/vendor/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h +33 -27
  206. data/vendor/eigen/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h +14 -12
  207. data/vendor/eigen/Eigen/src/Core/products/TriangularSolverMatrix.h +36 -34
  208. data/vendor/eigen/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h +8 -4
  209. data/vendor/eigen/Eigen/src/Core/products/TriangularSolverVector.h +13 -10
  210. data/vendor/eigen/Eigen/src/Core/util/BlasUtil.h +304 -119
  211. data/vendor/eigen/Eigen/src/Core/util/ConfigureVectorization.h +512 -0
  212. data/vendor/eigen/Eigen/src/Core/util/Constants.h +25 -9
  213. data/vendor/eigen/Eigen/src/Core/util/DisableStupidWarnings.h +26 -3
  214. data/vendor/eigen/Eigen/src/Core/util/ForwardDeclarations.h +29 -9
  215. data/vendor/eigen/Eigen/src/Core/util/IndexedViewHelper.h +186 -0
  216. data/vendor/eigen/Eigen/src/Core/util/IntegralConstant.h +272 -0
  217. data/vendor/eigen/Eigen/src/Core/util/MKL_support.h +8 -1
  218. data/vendor/eigen/Eigen/src/Core/util/Macros.h +709 -246
  219. data/vendor/eigen/Eigen/src/Core/util/Memory.h +222 -52
  220. data/vendor/eigen/Eigen/src/Core/util/Meta.h +355 -77
  221. data/vendor/eigen/Eigen/src/Core/util/ReenableStupidWarnings.h +5 -1
  222. data/vendor/eigen/Eigen/src/Core/util/ReshapedHelper.h +51 -0
  223. data/vendor/eigen/Eigen/src/Core/util/StaticAssert.h +8 -5
  224. data/vendor/eigen/Eigen/src/Core/util/SymbolicIndex.h +293 -0
  225. data/vendor/eigen/Eigen/src/Core/util/XprHelper.h +65 -30
  226. data/vendor/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +1 -1
  227. data/vendor/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +7 -4
  228. data/vendor/eigen/Eigen/src/Eigenvalues/EigenSolver.h +2 -2
  229. data/vendor/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +1 -1
  230. data/vendor/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +2 -2
  231. data/vendor/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +2 -2
  232. data/vendor/eigen/Eigen/src/Eigenvalues/RealQZ.h +9 -6
  233. data/vendor/eigen/Eigen/src/Eigenvalues/RealSchur.h +21 -9
  234. data/vendor/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +77 -43
  235. data/vendor/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +20 -15
  236. data/vendor/eigen/Eigen/src/Geometry/AlignedBox.h +99 -5
  237. data/vendor/eigen/Eigen/src/Geometry/AngleAxis.h +4 -4
  238. data/vendor/eigen/Eigen/src/Geometry/EulerAngles.h +3 -3
  239. data/vendor/eigen/Eigen/src/Geometry/Homogeneous.h +15 -11
  240. data/vendor/eigen/Eigen/src/Geometry/Hyperplane.h +1 -1
  241. data/vendor/eigen/Eigen/src/Geometry/OrthoMethods.h +3 -2
  242. data/vendor/eigen/Eigen/src/Geometry/ParametrizedLine.h +39 -2
  243. data/vendor/eigen/Eigen/src/Geometry/Quaternion.h +70 -14
  244. data/vendor/eigen/Eigen/src/Geometry/Rotation2D.h +3 -3
  245. data/vendor/eigen/Eigen/src/Geometry/Scaling.h +23 -5
  246. data/vendor/eigen/Eigen/src/Geometry/Transform.h +88 -67
  247. data/vendor/eigen/Eigen/src/Geometry/Translation.h +6 -12
  248. data/vendor/eigen/Eigen/src/Geometry/Umeyama.h +1 -1
  249. data/vendor/eigen/Eigen/src/Geometry/arch/Geometry_SIMD.h +168 -0
  250. data/vendor/eigen/Eigen/src/Householder/BlockHouseholder.h +9 -2
  251. data/vendor/eigen/Eigen/src/Householder/Householder.h +8 -4
  252. data/vendor/eigen/Eigen/src/Householder/HouseholderSequence.h +123 -48
  253. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +15 -15
  254. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +7 -23
  255. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +5 -22
  256. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +41 -47
  257. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +51 -60
  258. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +70 -20
  259. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +2 -20
  260. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +11 -9
  261. data/vendor/eigen/Eigen/src/Jacobi/Jacobi.h +31 -10
  262. data/vendor/eigen/Eigen/src/KLUSupport/KLUSupport.h +358 -0
  263. data/vendor/eigen/Eigen/src/LU/Determinant.h +35 -19
  264. data/vendor/eigen/Eigen/src/LU/FullPivLU.h +29 -43
  265. data/vendor/eigen/Eigen/src/LU/InverseImpl.h +25 -8
  266. data/vendor/eigen/Eigen/src/LU/PartialPivLU.h +71 -58
  267. data/vendor/eigen/Eigen/src/LU/arch/InverseSize4.h +351 -0
  268. data/vendor/eigen/Eigen/src/OrderingMethods/Amd.h +7 -17
  269. data/vendor/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +297 -277
  270. data/vendor/eigen/Eigen/src/OrderingMethods/Ordering.h +6 -10
  271. data/vendor/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +1 -1
  272. data/vendor/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +9 -7
  273. data/vendor/eigen/Eigen/src/QR/ColPivHouseholderQR.h +41 -20
  274. data/vendor/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +100 -27
  275. data/vendor/eigen/Eigen/src/QR/FullPivHouseholderQR.h +59 -22
  276. data/vendor/eigen/Eigen/src/QR/HouseholderQR.h +48 -23
  277. data/vendor/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +25 -3
  278. data/vendor/eigen/Eigen/src/SVD/BDCSVD.h +183 -63
  279. data/vendor/eigen/Eigen/src/SVD/JacobiSVD.h +22 -14
  280. data/vendor/eigen/Eigen/src/SVD/SVDBase.h +83 -22
  281. data/vendor/eigen/Eigen/src/SVD/UpperBidiagonalization.h +3 -3
  282. data/vendor/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +17 -9
  283. data/vendor/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +12 -37
  284. data/vendor/eigen/Eigen/src/SparseCore/AmbiVector.h +3 -2
  285. data/vendor/eigen/Eigen/src/SparseCore/CompressedStorage.h +16 -0
  286. data/vendor/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +6 -6
  287. data/vendor/eigen/Eigen/src/SparseCore/SparseAssign.h +81 -27
  288. data/vendor/eigen/Eigen/src/SparseCore/SparseBlock.h +25 -57
  289. data/vendor/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +40 -11
  290. data/vendor/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +11 -15
  291. data/vendor/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +4 -2
  292. data/vendor/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +30 -8
  293. data/vendor/eigen/Eigen/src/SparseCore/SparseMatrix.h +126 -11
  294. data/vendor/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +5 -12
  295. data/vendor/eigen/Eigen/src/SparseCore/SparseProduct.h +13 -1
  296. data/vendor/eigen/Eigen/src/SparseCore/SparseRef.h +7 -7
  297. data/vendor/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +5 -2
  298. data/vendor/eigen/Eigen/src/SparseCore/SparseUtil.h +8 -0
  299. data/vendor/eigen/Eigen/src/SparseCore/SparseVector.h +1 -1
  300. data/vendor/eigen/Eigen/src/SparseCore/SparseView.h +1 -0
  301. data/vendor/eigen/Eigen/src/SparseLU/SparseLU.h +162 -12
  302. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +1 -1
  303. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +76 -2
  304. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +2 -2
  305. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +1 -1
  306. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +1 -1
  307. data/vendor/eigen/Eigen/src/SparseQR/SparseQR.h +19 -6
  308. data/vendor/eigen/Eigen/src/StlSupport/StdDeque.h +2 -12
  309. data/vendor/eigen/Eigen/src/StlSupport/StdList.h +2 -2
  310. data/vendor/eigen/Eigen/src/StlSupport/StdVector.h +2 -2
  311. data/vendor/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +6 -8
  312. data/vendor/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +175 -39
  313. data/vendor/eigen/Eigen/src/misc/lapacke.h +5 -4
  314. data/vendor/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +28 -2
  315. data/vendor/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +155 -11
  316. data/vendor/eigen/Eigen/src/plugins/BlockMethods.h +626 -242
  317. data/vendor/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.h +14 -0
  318. data/vendor/eigen/Eigen/src/plugins/IndexedViewMethods.h +262 -0
  319. data/vendor/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +4 -4
  320. data/vendor/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +10 -0
  321. data/vendor/eigen/Eigen/src/plugins/ReshapedMethods.h +149 -0
  322. data/vendor/eigen/README.md +2 -0
  323. data/vendor/eigen/bench/btl/README +1 -1
  324. data/vendor/eigen/bench/tensors/README +6 -7
  325. data/vendor/eigen/ci/README.md +56 -0
  326. data/vendor/eigen/demos/mix_eigen_and_c/README +1 -1
  327. data/vendor/eigen/unsupported/Eigen/CXX11/src/Tensor/README.md +213 -158
  328. data/vendor/eigen/unsupported/README.txt +1 -1
  329. data/vendor/tomotopy/README.kr.rst +78 -0
  330. data/vendor/tomotopy/README.rst +75 -0
  331. data/vendor/tomotopy/src/Labeling/FoRelevance.cpp +2 -2
  332. data/vendor/tomotopy/src/Labeling/Phraser.hpp +4 -4
  333. data/vendor/tomotopy/src/TopicModel/CTModel.hpp +7 -3
  334. data/vendor/tomotopy/src/TopicModel/DMRModel.hpp +7 -3
  335. data/vendor/tomotopy/src/TopicModel/DTModel.hpp +6 -3
  336. data/vendor/tomotopy/src/TopicModel/GDMRModel.hpp +2 -2
  337. data/vendor/tomotopy/src/TopicModel/HDP.h +1 -0
  338. data/vendor/tomotopy/src/TopicModel/HDPModel.hpp +57 -6
  339. data/vendor/tomotopy/src/TopicModel/HLDAModel.hpp +6 -3
  340. data/vendor/tomotopy/src/TopicModel/HPAModel.hpp +3 -2
  341. data/vendor/tomotopy/src/TopicModel/LDA.h +3 -3
  342. data/vendor/tomotopy/src/TopicModel/LDACVB0Model.hpp +5 -5
  343. data/vendor/tomotopy/src/TopicModel/LDAModel.hpp +50 -19
  344. data/vendor/tomotopy/src/TopicModel/LLDAModel.hpp +6 -2
  345. data/vendor/tomotopy/src/TopicModel/MGLDAModel.hpp +3 -2
  346. data/vendor/tomotopy/src/TopicModel/PAModel.hpp +1 -1
  347. data/vendor/tomotopy/src/TopicModel/PLDAModel.hpp +6 -2
  348. data/vendor/tomotopy/src/TopicModel/PT.h +3 -1
  349. data/vendor/tomotopy/src/TopicModel/PTModel.hpp +36 -3
  350. data/vendor/tomotopy/src/TopicModel/SLDAModel.hpp +6 -3
  351. data/vendor/tomotopy/src/TopicModel/TopicModel.hpp +55 -26
  352. data/vendor/tomotopy/src/Utils/AliasMethod.hpp +5 -4
  353. data/vendor/tomotopy/src/Utils/Dictionary.h +2 -2
  354. data/vendor/tomotopy/src/Utils/EigenAddonOps.hpp +36 -1
  355. data/vendor/tomotopy/src/Utils/MultiNormalDistribution.hpp +1 -1
  356. data/vendor/tomotopy/src/Utils/TruncMultiNormal.hpp +1 -1
  357. data/vendor/tomotopy/src/Utils/exception.h +6 -0
  358. data/vendor/tomotopy/src/Utils/math.h +2 -2
  359. data/vendor/tomotopy/src/Utils/sample.hpp +14 -12
  360. data/vendor/tomotopy/src/Utils/serializer.hpp +30 -5
  361. data/vendor/tomotopy/src/Utils/sse_gamma.h +0 -3
  362. metadata +64 -18
  363. data/vendor/eigen/Eigen/CMakeLists.txt +0 -19
  364. data/vendor/eigen/Eigen/src/Core/arch/CUDA/Half.h +0 -674
  365. data/vendor/eigen/Eigen/src/Core/arch/CUDA/PacketMath.h +0 -333
  366. data/vendor/eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +0 -1124
  367. data/vendor/eigen/Eigen/src/Core/arch/CUDA/TypeCasting.h +0 -212
  368. data/vendor/eigen/Eigen/src/Geometry/arch/Geometry_SSE.h +0 -161
  369. data/vendor/eigen/Eigen/src/LU/arch/Inverse_SSE.h +0 -338
@@ -24,14 +24,14 @@ struct gebp_kernel;
24
24
  template<typename Scalar, typename Index, typename DataMapper, int nr, int StorageOrder, bool Conjugate = false, bool PanelMode=false>
25
25
  struct gemm_pack_rhs;
26
26
 
27
- template<typename Scalar, typename Index, typename DataMapper, int Pack1, int Pack2, int StorageOrder, bool Conjugate = false, bool PanelMode = false>
27
+ template<typename Scalar, typename Index, typename DataMapper, int Pack1, int Pack2, typename Packet, int StorageOrder, bool Conjugate = false, bool PanelMode = false>
28
28
  struct gemm_pack_lhs;
29
29
 
30
30
  template<
31
31
  typename Index,
32
32
  typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs,
33
33
  typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs,
34
- int ResStorageOrder>
34
+ int ResStorageOrder, int ResInnerStride>
35
35
  struct general_matrix_matrix_product;
36
36
 
37
37
  template<typename Index,
@@ -39,90 +39,6 @@ template<typename Index,
39
39
  typename RhsScalar, typename RhsMapper, bool ConjugateRhs, int Version=Specialized>
40
40
  struct general_matrix_vector_product;
41
41
 
42
-
43
- template<bool Conjugate> struct conj_if;
44
-
45
- template<> struct conj_if<true> {
46
- template<typename T>
47
- inline T operator()(const T& x) const { return numext::conj(x); }
48
- template<typename T>
49
- inline T pconj(const T& x) const { return internal::pconj(x); }
50
- };
51
-
52
- template<> struct conj_if<false> {
53
- template<typename T>
54
- inline const T& operator()(const T& x) const { return x; }
55
- template<typename T>
56
- inline const T& pconj(const T& x) const { return x; }
57
- };
58
-
59
- // Generic implementation for custom complex types.
60
- template<typename LhsScalar, typename RhsScalar, bool ConjLhs, bool ConjRhs>
61
- struct conj_helper
62
- {
63
- typedef typename ScalarBinaryOpTraits<LhsScalar,RhsScalar>::ReturnType Scalar;
64
-
65
- EIGEN_STRONG_INLINE Scalar pmadd(const LhsScalar& x, const RhsScalar& y, const Scalar& c) const
66
- { return padd(c, pmul(x,y)); }
67
-
68
- EIGEN_STRONG_INLINE Scalar pmul(const LhsScalar& x, const RhsScalar& y) const
69
- { return conj_if<ConjLhs>()(x) * conj_if<ConjRhs>()(y); }
70
- };
71
-
72
- template<typename Scalar> struct conj_helper<Scalar,Scalar,false,false>
73
- {
74
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar pmadd(const Scalar& x, const Scalar& y, const Scalar& c) const { return internal::pmadd(x,y,c); }
75
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar pmul(const Scalar& x, const Scalar& y) const { return internal::pmul(x,y); }
76
- };
77
-
78
- template<typename RealScalar> struct conj_helper<std::complex<RealScalar>, std::complex<RealScalar>, false,true>
79
- {
80
- typedef std::complex<RealScalar> Scalar;
81
- EIGEN_STRONG_INLINE Scalar pmadd(const Scalar& x, const Scalar& y, const Scalar& c) const
82
- { return c + pmul(x,y); }
83
-
84
- EIGEN_STRONG_INLINE Scalar pmul(const Scalar& x, const Scalar& y) const
85
- { return Scalar(numext::real(x)*numext::real(y) + numext::imag(x)*numext::imag(y), numext::imag(x)*numext::real(y) - numext::real(x)*numext::imag(y)); }
86
- };
87
-
88
- template<typename RealScalar> struct conj_helper<std::complex<RealScalar>, std::complex<RealScalar>, true,false>
89
- {
90
- typedef std::complex<RealScalar> Scalar;
91
- EIGEN_STRONG_INLINE Scalar pmadd(const Scalar& x, const Scalar& y, const Scalar& c) const
92
- { return c + pmul(x,y); }
93
-
94
- EIGEN_STRONG_INLINE Scalar pmul(const Scalar& x, const Scalar& y) const
95
- { return Scalar(numext::real(x)*numext::real(y) + numext::imag(x)*numext::imag(y), numext::real(x)*numext::imag(y) - numext::imag(x)*numext::real(y)); }
96
- };
97
-
98
- template<typename RealScalar> struct conj_helper<std::complex<RealScalar>, std::complex<RealScalar>, true,true>
99
- {
100
- typedef std::complex<RealScalar> Scalar;
101
- EIGEN_STRONG_INLINE Scalar pmadd(const Scalar& x, const Scalar& y, const Scalar& c) const
102
- { return c + pmul(x,y); }
103
-
104
- EIGEN_STRONG_INLINE Scalar pmul(const Scalar& x, const Scalar& y) const
105
- { return Scalar(numext::real(x)*numext::real(y) - numext::imag(x)*numext::imag(y), - numext::real(x)*numext::imag(y) - numext::imag(x)*numext::real(y)); }
106
- };
107
-
108
- template<typename RealScalar,bool Conj> struct conj_helper<std::complex<RealScalar>, RealScalar, Conj,false>
109
- {
110
- typedef std::complex<RealScalar> Scalar;
111
- EIGEN_STRONG_INLINE Scalar pmadd(const Scalar& x, const RealScalar& y, const Scalar& c) const
112
- { return padd(c, pmul(x,y)); }
113
- EIGEN_STRONG_INLINE Scalar pmul(const Scalar& x, const RealScalar& y) const
114
- { return conj_if<Conj>()(x)*y; }
115
- };
116
-
117
- template<typename RealScalar,bool Conj> struct conj_helper<RealScalar, std::complex<RealScalar>, false,Conj>
118
- {
119
- typedef std::complex<RealScalar> Scalar;
120
- EIGEN_STRONG_INLINE Scalar pmadd(const RealScalar& x, const Scalar& y, const Scalar& c) const
121
- { return padd(c, pmul(x,y)); }
122
- EIGEN_STRONG_INLINE Scalar pmul(const RealScalar& x, const Scalar& y) const
123
- { return x*conj_if<Conj>()(y); }
124
- };
125
-
126
42
  template<typename From,typename To> struct get_factor {
127
43
  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE To run(const From& x) { return To(x); }
128
44
  };
@@ -155,13 +71,19 @@ class BlasVectorMapper {
155
71
  Scalar* m_data;
156
72
  };
157
73
 
158
- template<typename Scalar, typename Index, int AlignmentType>
159
- class BlasLinearMapper {
160
- public:
161
- typedef typename packet_traits<Scalar>::type Packet;
162
- typedef typename packet_traits<Scalar>::half HalfPacket;
74
+ template<typename Scalar, typename Index, int AlignmentType, int Incr=1>
75
+ class BlasLinearMapper;
163
76
 
164
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE BlasLinearMapper(Scalar *data) : m_data(data) {}
77
+ template<typename Scalar, typename Index, int AlignmentType>
78
+ class BlasLinearMapper<Scalar,Index,AlignmentType>
79
+ {
80
+ public:
81
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE BlasLinearMapper(Scalar *data, Index incr=1)
82
+ : m_data(data)
83
+ {
84
+ EIGEN_ONLY_USED_FOR_DEBUG(incr);
85
+ eigen_assert(incr==1);
86
+ }
165
87
 
166
88
  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void prefetch(int i) const {
167
89
  internal::prefetch(&operator()(i));
@@ -171,33 +93,86 @@ class BlasLinearMapper {
171
93
  return m_data[i];
172
94
  }
173
95
 
174
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet loadPacket(Index i) const {
175
- return ploadt<Packet, AlignmentType>(m_data + i);
176
- }
177
-
178
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE HalfPacket loadHalfPacket(Index i) const {
179
- return ploadt<HalfPacket, AlignmentType>(m_data + i);
96
+ template<typename PacketType>
97
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketType loadPacket(Index i) const {
98
+ return ploadt<PacketType, AlignmentType>(m_data + i);
180
99
  }
181
100
 
182
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void storePacket(Index i, const Packet &p) const {
183
- pstoret<Scalar, Packet, AlignmentType>(m_data + i, p);
101
+ template<typename PacketType>
102
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void storePacket(Index i, const PacketType &p) const {
103
+ pstoret<Scalar, PacketType, AlignmentType>(m_data + i, p);
184
104
  }
185
105
 
186
- protected:
106
+ protected:
187
107
  Scalar *m_data;
188
108
  };
189
109
 
190
110
  // Lightweight helper class to access matrix coefficients.
191
- template<typename Scalar, typename Index, int StorageOrder, int AlignmentType = Unaligned>
192
- class blas_data_mapper {
193
- public:
194
- typedef typename packet_traits<Scalar>::type Packet;
195
- typedef typename packet_traits<Scalar>::half HalfPacket;
111
+ template<typename Scalar, typename Index, int StorageOrder, int AlignmentType = Unaligned, int Incr = 1>
112
+ class blas_data_mapper;
113
+
114
+ // TMP to help PacketBlock store implementation.
115
+ // There's currently no known use case for PacketBlock load.
116
+ // The default implementation assumes ColMajor order.
117
+ // It always store each packet sequentially one `stride` apart.
118
+ template<typename Index, typename Scalar, typename Packet, int n, int idx, int StorageOrder>
119
+ struct PacketBlockManagement
120
+ {
121
+ PacketBlockManagement<Index, Scalar, Packet, n, idx - 1, StorageOrder> pbm;
122
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void store(Scalar *to, const Index stride, Index i, Index j, const PacketBlock<Packet, n> &block) const {
123
+ pbm.store(to, stride, i, j, block);
124
+ pstoreu<Scalar>(to + i + (j + idx)*stride, block.packet[idx]);
125
+ }
126
+ };
127
+
128
+ // PacketBlockManagement specialization to take care of RowMajor order without ifs.
129
+ template<typename Index, typename Scalar, typename Packet, int n, int idx>
130
+ struct PacketBlockManagement<Index, Scalar, Packet, n, idx, RowMajor>
131
+ {
132
+ PacketBlockManagement<Index, Scalar, Packet, n, idx - 1, RowMajor> pbm;
133
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void store(Scalar *to, const Index stride, Index i, Index j, const PacketBlock<Packet, n> &block) const {
134
+ pbm.store(to, stride, i, j, block);
135
+ pstoreu<Scalar>(to + j + (i + idx)*stride, block.packet[idx]);
136
+ }
137
+ };
138
+
139
+ template<typename Index, typename Scalar, typename Packet, int n, int StorageOrder>
140
+ struct PacketBlockManagement<Index, Scalar, Packet, n, -1, StorageOrder>
141
+ {
142
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void store(Scalar *to, const Index stride, Index i, Index j, const PacketBlock<Packet, n> &block) const {
143
+ EIGEN_UNUSED_VARIABLE(to);
144
+ EIGEN_UNUSED_VARIABLE(stride);
145
+ EIGEN_UNUSED_VARIABLE(i);
146
+ EIGEN_UNUSED_VARIABLE(j);
147
+ EIGEN_UNUSED_VARIABLE(block);
148
+ }
149
+ };
150
+
151
+ template<typename Index, typename Scalar, typename Packet, int n>
152
+ struct PacketBlockManagement<Index, Scalar, Packet, n, -1, RowMajor>
153
+ {
154
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void store(Scalar *to, const Index stride, Index i, Index j, const PacketBlock<Packet, n> &block) const {
155
+ EIGEN_UNUSED_VARIABLE(to);
156
+ EIGEN_UNUSED_VARIABLE(stride);
157
+ EIGEN_UNUSED_VARIABLE(i);
158
+ EIGEN_UNUSED_VARIABLE(j);
159
+ EIGEN_UNUSED_VARIABLE(block);
160
+ }
161
+ };
196
162
 
163
+ template<typename Scalar, typename Index, int StorageOrder, int AlignmentType>
164
+ class blas_data_mapper<Scalar,Index,StorageOrder,AlignmentType,1>
165
+ {
166
+ public:
197
167
  typedef BlasLinearMapper<Scalar, Index, AlignmentType> LinearMapper;
198
168
  typedef BlasVectorMapper<Scalar, Index> VectorMapper;
199
169
 
200
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE blas_data_mapper(Scalar* data, Index stride) : m_data(data), m_stride(stride) {}
170
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE blas_data_mapper(Scalar* data, Index stride, Index incr=1)
171
+ : m_data(data), m_stride(stride)
172
+ {
173
+ EIGEN_ONLY_USED_FOR_DEBUG(incr);
174
+ eigen_assert(incr==1);
175
+ }
201
176
 
202
177
  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType>
203
178
  getSubMapper(Index i, Index j) const {
@@ -218,12 +193,14 @@ class blas_data_mapper {
218
193
  return m_data[StorageOrder==RowMajor ? j + i*m_stride : i + j*m_stride];
219
194
  }
220
195
 
221
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet loadPacket(Index i, Index j) const {
222
- return ploadt<Packet, AlignmentType>(&operator()(i, j));
196
+ template<typename PacketType>
197
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketType loadPacket(Index i, Index j) const {
198
+ return ploadt<PacketType, AlignmentType>(&operator()(i, j));
223
199
  }
224
200
 
225
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE HalfPacket loadHalfPacket(Index i, Index j) const {
226
- return ploadt<HalfPacket, AlignmentType>(&operator()(i, j));
201
+ template <typename PacketT, int AlignmentT>
202
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketT load(Index i, Index j) const {
203
+ return ploadt<PacketT, AlignmentT>(&operator()(i, j));
227
204
  }
228
205
 
229
206
  template<typename SubPacket>
@@ -246,11 +223,167 @@ class blas_data_mapper {
246
223
  return internal::first_default_aligned(m_data, size);
247
224
  }
248
225
 
249
- protected:
226
+ template<typename SubPacket, int n>
227
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void storePacketBlock(Index i, Index j, const PacketBlock<SubPacket, n> &block) const {
228
+ PacketBlockManagement<Index, Scalar, SubPacket, n, n-1, StorageOrder> pbm;
229
+ pbm.store(m_data, m_stride, i, j, block);
230
+ }
231
+ protected:
250
232
  Scalar* EIGEN_RESTRICT m_data;
251
233
  const Index m_stride;
252
234
  };
253
235
 
236
+ // Implementation of non-natural increment (i.e. inner-stride != 1)
237
+ // The exposed API is not complete yet compared to the Incr==1 case
238
+ // because some features makes less sense in this case.
239
+ template<typename Scalar, typename Index, int AlignmentType, int Incr>
240
+ class BlasLinearMapper
241
+ {
242
+ public:
243
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE BlasLinearMapper(Scalar *data,Index incr) : m_data(data), m_incr(incr) {}
244
+
245
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void prefetch(int i) const {
246
+ internal::prefetch(&operator()(i));
247
+ }
248
+
249
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar& operator()(Index i) const {
250
+ return m_data[i*m_incr.value()];
251
+ }
252
+
253
+ template<typename PacketType>
254
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketType loadPacket(Index i) const {
255
+ return pgather<Scalar,PacketType>(m_data + i*m_incr.value(), m_incr.value());
256
+ }
257
+
258
+ template<typename PacketType>
259
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void storePacket(Index i, const PacketType &p) const {
260
+ pscatter<Scalar, PacketType>(m_data + i*m_incr.value(), p, m_incr.value());
261
+ }
262
+
263
+ protected:
264
+ Scalar *m_data;
265
+ const internal::variable_if_dynamic<Index,Incr> m_incr;
266
+ };
267
+
268
+ template<typename Scalar, typename Index, int StorageOrder, int AlignmentType,int Incr>
269
+ class blas_data_mapper
270
+ {
271
+ public:
272
+ typedef BlasLinearMapper<Scalar, Index, AlignmentType,Incr> LinearMapper;
273
+
274
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE blas_data_mapper(Scalar* data, Index stride, Index incr) : m_data(data), m_stride(stride), m_incr(incr) {}
275
+
276
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE blas_data_mapper
277
+ getSubMapper(Index i, Index j) const {
278
+ return blas_data_mapper(&operator()(i, j), m_stride, m_incr.value());
279
+ }
280
+
281
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE LinearMapper getLinearMapper(Index i, Index j) const {
282
+ return LinearMapper(&operator()(i, j), m_incr.value());
283
+ }
284
+
285
+ EIGEN_DEVICE_FUNC
286
+ EIGEN_ALWAYS_INLINE Scalar& operator()(Index i, Index j) const {
287
+ return m_data[StorageOrder==RowMajor ? j*m_incr.value() + i*m_stride : i*m_incr.value() + j*m_stride];
288
+ }
289
+
290
+ template<typename PacketType>
291
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketType loadPacket(Index i, Index j) const {
292
+ return pgather<Scalar,PacketType>(&operator()(i, j),m_incr.value());
293
+ }
294
+
295
+ template <typename PacketT, int AlignmentT>
296
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketT load(Index i, Index j) const {
297
+ return pgather<Scalar,PacketT>(&operator()(i, j),m_incr.value());
298
+ }
299
+
300
+ template<typename SubPacket>
301
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void scatterPacket(Index i, Index j, const SubPacket &p) const {
302
+ pscatter<Scalar, SubPacket>(&operator()(i, j), p, m_stride);
303
+ }
304
+
305
+ template<typename SubPacket>
306
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE SubPacket gatherPacket(Index i, Index j) const {
307
+ return pgather<Scalar, SubPacket>(&operator()(i, j), m_stride);
308
+ }
309
+
310
+ // storePacketBlock_helper defines a way to access values inside the PacketBlock, this is essentially required by the Complex types.
311
+ template<typename SubPacket, typename ScalarT, int n, int idx>
312
+ struct storePacketBlock_helper
313
+ {
314
+ storePacketBlock_helper<SubPacket, ScalarT, n, idx-1> spbh;
315
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void store(const blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType, Incr>* sup, Index i, Index j, const PacketBlock<SubPacket, n>& block) const {
316
+ spbh.store(sup, i,j,block);
317
+ for(int l = 0; l < unpacket_traits<SubPacket>::size; l++)
318
+ {
319
+ ScalarT *v = &sup->operator()(i+l, j+idx);
320
+ *v = block.packet[idx][l];
321
+ }
322
+ }
323
+ };
324
+
325
+ template<typename SubPacket, int n, int idx>
326
+ struct storePacketBlock_helper<SubPacket, std::complex<float>, n, idx>
327
+ {
328
+ storePacketBlock_helper<SubPacket, std::complex<float>, n, idx-1> spbh;
329
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void store(const blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType, Incr>* sup, Index i, Index j, const PacketBlock<SubPacket, n>& block) const {
330
+ spbh.store(sup,i,j,block);
331
+ for(int l = 0; l < unpacket_traits<SubPacket>::size; l++)
332
+ {
333
+ std::complex<float> *v = &sup->operator()(i+l, j+idx);
334
+ v->real(block.packet[idx].v[2*l+0]);
335
+ v->imag(block.packet[idx].v[2*l+1]);
336
+ }
337
+ }
338
+ };
339
+
340
+ template<typename SubPacket, int n, int idx>
341
+ struct storePacketBlock_helper<SubPacket, std::complex<double>, n, idx>
342
+ {
343
+ storePacketBlock_helper<SubPacket, std::complex<double>, n, idx-1> spbh;
344
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void store(const blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType, Incr>* sup, Index i, Index j, const PacketBlock<SubPacket, n>& block) const {
345
+ spbh.store(sup,i,j,block);
346
+ for(int l = 0; l < unpacket_traits<SubPacket>::size; l++)
347
+ {
348
+ std::complex<double> *v = &sup->operator()(i+l, j+idx);
349
+ v->real(block.packet[idx].v[2*l+0]);
350
+ v->imag(block.packet[idx].v[2*l+1]);
351
+ }
352
+ }
353
+ };
354
+
355
+ template<typename SubPacket, typename ScalarT, int n>
356
+ struct storePacketBlock_helper<SubPacket, ScalarT, n, -1>
357
+ {
358
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void store(const blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType, Incr>*, Index, Index, const PacketBlock<SubPacket, n>& ) const {
359
+ }
360
+ };
361
+
362
+ template<typename SubPacket, int n>
363
+ struct storePacketBlock_helper<SubPacket, std::complex<float>, n, -1>
364
+ {
365
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void store(const blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType, Incr>*, Index, Index, const PacketBlock<SubPacket, n>& ) const {
366
+ }
367
+ };
368
+
369
+ template<typename SubPacket, int n>
370
+ struct storePacketBlock_helper<SubPacket, std::complex<double>, n, -1>
371
+ {
372
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void store(const blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType, Incr>*, Index, Index, const PacketBlock<SubPacket, n>& ) const {
373
+ }
374
+ };
375
+ // This function stores a PacketBlock on m_data, this approach is really quite slow compare to Incr=1 and should be avoided when possible.
376
+ template<typename SubPacket, int n>
377
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void storePacketBlock(Index i, Index j, const PacketBlock<SubPacket, n>&block) const {
378
+ storePacketBlock_helper<SubPacket, Scalar, n, n-1> spb;
379
+ spb.store(this, i,j,block);
380
+ }
381
+ protected:
382
+ Scalar* EIGEN_RESTRICT m_data;
383
+ const Index m_stride;
384
+ const internal::variable_if_dynamic<Index,Incr> m_incr;
385
+ };
386
+
254
387
  // lightweight helper class to access matrix coefficients (const version)
255
388
  template<typename Scalar, typename Index, int StorageOrder>
256
389
  class const_blas_data_mapper : public blas_data_mapper<const Scalar, Index, StorageOrder> {
@@ -278,14 +411,15 @@ template<typename XprType> struct blas_traits
278
411
  HasUsableDirectAccess = ( (int(XprType::Flags)&DirectAccessBit)
279
412
  && ( bool(XprType::IsVectorAtCompileTime)
280
413
  || int(inner_stride_at_compile_time<XprType>::ret) == 1)
281
- ) ? 1 : 0
414
+ ) ? 1 : 0,
415
+ HasScalarFactor = false
282
416
  };
283
417
  typedef typename conditional<bool(HasUsableDirectAccess),
284
418
  ExtractType,
285
419
  typename _ExtractType::PlainObject
286
420
  >::type DirectLinearAccessType;
287
- static inline ExtractType extract(const XprType& x) { return x; }
288
- static inline const Scalar extractScalarFactor(const XprType&) { return Scalar(1); }
421
+ static inline EIGEN_DEVICE_FUNC ExtractType extract(const XprType& x) { return x; }
422
+ static inline EIGEN_DEVICE_FUNC const Scalar extractScalarFactor(const XprType&) { return Scalar(1); }
289
423
  };
290
424
 
291
425
  // pop conjugate
@@ -310,17 +444,23 @@ template<typename Scalar, typename NestedXpr, typename Plain>
310
444
  struct blas_traits<CwiseBinaryOp<scalar_product_op<Scalar>, const CwiseNullaryOp<scalar_constant_op<Scalar>,Plain>, NestedXpr> >
311
445
  : blas_traits<NestedXpr>
312
446
  {
447
+ enum {
448
+ HasScalarFactor = true
449
+ };
313
450
  typedef blas_traits<NestedXpr> Base;
314
451
  typedef CwiseBinaryOp<scalar_product_op<Scalar>, const CwiseNullaryOp<scalar_constant_op<Scalar>,Plain>, NestedXpr> XprType;
315
452
  typedef typename Base::ExtractType ExtractType;
316
- static inline ExtractType extract(const XprType& x) { return Base::extract(x.rhs()); }
317
- static inline Scalar extractScalarFactor(const XprType& x)
453
+ static inline EIGEN_DEVICE_FUNC ExtractType extract(const XprType& x) { return Base::extract(x.rhs()); }
454
+ static inline EIGEN_DEVICE_FUNC Scalar extractScalarFactor(const XprType& x)
318
455
  { return x.lhs().functor().m_other * Base::extractScalarFactor(x.rhs()); }
319
456
  };
320
457
  template<typename Scalar, typename NestedXpr, typename Plain>
321
458
  struct blas_traits<CwiseBinaryOp<scalar_product_op<Scalar>, NestedXpr, const CwiseNullaryOp<scalar_constant_op<Scalar>,Plain> > >
322
459
  : blas_traits<NestedXpr>
323
460
  {
461
+ enum {
462
+ HasScalarFactor = true
463
+ };
324
464
  typedef blas_traits<NestedXpr> Base;
325
465
  typedef CwiseBinaryOp<scalar_product_op<Scalar>, NestedXpr, const CwiseNullaryOp<scalar_constant_op<Scalar>,Plain> > XprType;
326
466
  typedef typename Base::ExtractType ExtractType;
@@ -339,6 +479,9 @@ template<typename Scalar, typename NestedXpr>
339
479
  struct blas_traits<CwiseUnaryOp<scalar_opposite_op<Scalar>, NestedXpr> >
340
480
  : blas_traits<NestedXpr>
341
481
  {
482
+ enum {
483
+ HasScalarFactor = true
484
+ };
342
485
  typedef blas_traits<NestedXpr> Base;
343
486
  typedef CwiseUnaryOp<scalar_opposite_op<Scalar>, NestedXpr> XprType;
344
487
  typedef typename Base::ExtractType ExtractType;
@@ -375,7 +518,7 @@ struct blas_traits<const T>
375
518
 
376
519
  template<typename T, bool HasUsableDirectAccess=blas_traits<T>::HasUsableDirectAccess>
377
520
  struct extract_data_selector {
378
- static const typename T::Scalar* run(const T& m)
521
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static const typename T::Scalar* run(const T& m)
379
522
  {
380
523
  return blas_traits<T>::extract(m).data();
381
524
  }
@@ -386,11 +529,53 @@ struct extract_data_selector<T,false> {
386
529
  static typename T::Scalar* run(const T&) { return 0; }
387
530
  };
388
531
 
389
- template<typename T> const typename T::Scalar* extract_data(const T& m)
532
+ template<typename T>
533
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE const typename T::Scalar* extract_data(const T& m)
390
534
  {
391
535
  return extract_data_selector<T>::run(m);
392
536
  }
393
537
 
538
+ /**
539
+ * \c combine_scalar_factors extracts and multiplies factors from GEMM and GEMV products.
540
+ * There is a specialization for booleans
541
+ */
542
+ template<typename ResScalar, typename Lhs, typename Rhs>
543
+ struct combine_scalar_factors_impl
544
+ {
545
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static ResScalar run(const Lhs& lhs, const Rhs& rhs)
546
+ {
547
+ return blas_traits<Lhs>::extractScalarFactor(lhs) * blas_traits<Rhs>::extractScalarFactor(rhs);
548
+ }
549
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static ResScalar run(const ResScalar& alpha, const Lhs& lhs, const Rhs& rhs)
550
+ {
551
+ return alpha * blas_traits<Lhs>::extractScalarFactor(lhs) * blas_traits<Rhs>::extractScalarFactor(rhs);
552
+ }
553
+ };
554
+ template<typename Lhs, typename Rhs>
555
+ struct combine_scalar_factors_impl<bool, Lhs, Rhs>
556
+ {
557
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static bool run(const Lhs& lhs, const Rhs& rhs)
558
+ {
559
+ return blas_traits<Lhs>::extractScalarFactor(lhs) && blas_traits<Rhs>::extractScalarFactor(rhs);
560
+ }
561
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static bool run(const bool& alpha, const Lhs& lhs, const Rhs& rhs)
562
+ {
563
+ return alpha && blas_traits<Lhs>::extractScalarFactor(lhs) && blas_traits<Rhs>::extractScalarFactor(rhs);
564
+ }
565
+ };
566
+
567
+ template<typename ResScalar, typename Lhs, typename Rhs>
568
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE ResScalar combine_scalar_factors(const ResScalar& alpha, const Lhs& lhs, const Rhs& rhs)
569
+ {
570
+ return combine_scalar_factors_impl<ResScalar,Lhs,Rhs>::run(alpha, lhs, rhs);
571
+ }
572
+ template<typename ResScalar, typename Lhs, typename Rhs>
573
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE ResScalar combine_scalar_factors(const Lhs& lhs, const Rhs& rhs)
574
+ {
575
+ return combine_scalar_factors_impl<ResScalar,Lhs,Rhs>::run(lhs, rhs);
576
+ }
577
+
578
+
394
579
  } // end namespace internal
395
580
 
396
581
  } // end namespace Eigen