tomoto 0.2.2 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (369) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +10 -0
  3. data/README.md +8 -10
  4. data/ext/tomoto/ct.cpp +11 -11
  5. data/ext/tomoto/dmr.cpp +14 -13
  6. data/ext/tomoto/dt.cpp +14 -14
  7. data/ext/tomoto/extconf.rb +7 -5
  8. data/ext/tomoto/gdmr.cpp +7 -7
  9. data/ext/tomoto/hdp.cpp +9 -9
  10. data/ext/tomoto/hlda.cpp +13 -13
  11. data/ext/tomoto/hpa.cpp +5 -5
  12. data/ext/tomoto/lda.cpp +42 -39
  13. data/ext/tomoto/llda.cpp +6 -6
  14. data/ext/tomoto/mglda.cpp +15 -15
  15. data/ext/tomoto/pa.cpp +6 -6
  16. data/ext/tomoto/plda.cpp +6 -6
  17. data/ext/tomoto/slda.cpp +8 -8
  18. data/ext/tomoto/{ext.cpp → tomoto.cpp} +8 -8
  19. data/ext/tomoto/utils.h +16 -70
  20. data/lib/tomoto/version.rb +1 -1
  21. data/lib/tomoto.rb +5 -1
  22. data/vendor/EigenRand/EigenRand/Core.h +10 -10
  23. data/vendor/EigenRand/EigenRand/Dists/Basic.h +208 -9
  24. data/vendor/EigenRand/EigenRand/Dists/Discrete.h +52 -31
  25. data/vendor/EigenRand/EigenRand/Dists/GammaPoisson.h +9 -8
  26. data/vendor/EigenRand/EigenRand/Dists/NormalExp.h +28 -21
  27. data/vendor/EigenRand/EigenRand/EigenRand +11 -6
  28. data/vendor/EigenRand/EigenRand/Macro.h +13 -7
  29. data/vendor/EigenRand/EigenRand/MorePacketMath.h +348 -740
  30. data/vendor/EigenRand/EigenRand/MvDists/Multinomial.h +5 -3
  31. data/vendor/EigenRand/EigenRand/MvDists/MvNormal.h +9 -3
  32. data/vendor/EigenRand/EigenRand/PacketFilter.h +11 -253
  33. data/vendor/EigenRand/EigenRand/PacketRandomEngine.h +21 -47
  34. data/vendor/EigenRand/EigenRand/RandUtils.h +50 -344
  35. data/vendor/EigenRand/EigenRand/arch/AVX/MorePacketMath.h +619 -0
  36. data/vendor/EigenRand/EigenRand/arch/AVX/PacketFilter.h +149 -0
  37. data/vendor/EigenRand/EigenRand/arch/AVX/RandUtils.h +228 -0
  38. data/vendor/EigenRand/EigenRand/arch/NEON/MorePacketMath.h +473 -0
  39. data/vendor/EigenRand/EigenRand/arch/NEON/PacketFilter.h +142 -0
  40. data/vendor/EigenRand/EigenRand/arch/NEON/RandUtils.h +126 -0
  41. data/vendor/EigenRand/EigenRand/arch/SSE/MorePacketMath.h +501 -0
  42. data/vendor/EigenRand/EigenRand/arch/SSE/PacketFilter.h +133 -0
  43. data/vendor/EigenRand/EigenRand/arch/SSE/RandUtils.h +120 -0
  44. data/vendor/EigenRand/EigenRand/doc.h +24 -12
  45. data/vendor/EigenRand/README.md +57 -4
  46. data/vendor/eigen/COPYING.APACHE +203 -0
  47. data/vendor/eigen/COPYING.BSD +1 -1
  48. data/vendor/eigen/COPYING.MINPACK +51 -52
  49. data/vendor/eigen/Eigen/Cholesky +0 -1
  50. data/vendor/eigen/Eigen/Core +112 -265
  51. data/vendor/eigen/Eigen/Eigenvalues +2 -3
  52. data/vendor/eigen/Eigen/Geometry +5 -8
  53. data/vendor/eigen/Eigen/Householder +0 -1
  54. data/vendor/eigen/Eigen/Jacobi +0 -1
  55. data/vendor/eigen/Eigen/KLUSupport +41 -0
  56. data/vendor/eigen/Eigen/LU +2 -5
  57. data/vendor/eigen/Eigen/OrderingMethods +0 -3
  58. data/vendor/eigen/Eigen/PaStiXSupport +1 -0
  59. data/vendor/eigen/Eigen/PardisoSupport +0 -0
  60. data/vendor/eigen/Eigen/QR +2 -3
  61. data/vendor/eigen/Eigen/QtAlignedMalloc +0 -1
  62. data/vendor/eigen/Eigen/SVD +0 -1
  63. data/vendor/eigen/Eigen/Sparse +0 -2
  64. data/vendor/eigen/Eigen/SparseCholesky +0 -8
  65. data/vendor/eigen/Eigen/SparseLU +4 -0
  66. data/vendor/eigen/Eigen/SparseQR +0 -1
  67. data/vendor/eigen/Eigen/src/Cholesky/LDLT.h +42 -27
  68. data/vendor/eigen/Eigen/src/Cholesky/LLT.h +39 -23
  69. data/vendor/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +90 -47
  70. data/vendor/eigen/Eigen/src/Core/ArithmeticSequence.h +413 -0
  71. data/vendor/eigen/Eigen/src/Core/Array.h +99 -11
  72. data/vendor/eigen/Eigen/src/Core/ArrayBase.h +3 -3
  73. data/vendor/eigen/Eigen/src/Core/ArrayWrapper.h +21 -21
  74. data/vendor/eigen/Eigen/src/Core/Assign.h +1 -1
  75. data/vendor/eigen/Eigen/src/Core/AssignEvaluator.h +125 -50
  76. data/vendor/eigen/Eigen/src/Core/Assign_MKL.h +10 -10
  77. data/vendor/eigen/Eigen/src/Core/BandMatrix.h +16 -16
  78. data/vendor/eigen/Eigen/src/Core/Block.h +56 -60
  79. data/vendor/eigen/Eigen/src/Core/BooleanRedux.h +29 -31
  80. data/vendor/eigen/Eigen/src/Core/CommaInitializer.h +7 -3
  81. data/vendor/eigen/Eigen/src/Core/CoreEvaluators.h +325 -272
  82. data/vendor/eigen/Eigen/src/Core/CoreIterators.h +5 -0
  83. data/vendor/eigen/Eigen/src/Core/CwiseBinaryOp.h +21 -22
  84. data/vendor/eigen/Eigen/src/Core/CwiseNullaryOp.h +153 -18
  85. data/vendor/eigen/Eigen/src/Core/CwiseUnaryOp.h +6 -6
  86. data/vendor/eigen/Eigen/src/Core/CwiseUnaryView.h +14 -10
  87. data/vendor/eigen/Eigen/src/Core/DenseBase.h +132 -42
  88. data/vendor/eigen/Eigen/src/Core/DenseCoeffsBase.h +25 -21
  89. data/vendor/eigen/Eigen/src/Core/DenseStorage.h +153 -71
  90. data/vendor/eigen/Eigen/src/Core/Diagonal.h +21 -23
  91. data/vendor/eigen/Eigen/src/Core/DiagonalMatrix.h +50 -2
  92. data/vendor/eigen/Eigen/src/Core/DiagonalProduct.h +1 -1
  93. data/vendor/eigen/Eigen/src/Core/Dot.h +10 -10
  94. data/vendor/eigen/Eigen/src/Core/EigenBase.h +10 -9
  95. data/vendor/eigen/Eigen/src/Core/ForceAlignedAccess.h +8 -4
  96. data/vendor/eigen/Eigen/src/Core/Fuzzy.h +3 -3
  97. data/vendor/eigen/Eigen/src/Core/GeneralProduct.h +20 -10
  98. data/vendor/eigen/Eigen/src/Core/GenericPacketMath.h +599 -152
  99. data/vendor/eigen/Eigen/src/Core/GlobalFunctions.h +40 -33
  100. data/vendor/eigen/Eigen/src/Core/IO.h +40 -7
  101. data/vendor/eigen/Eigen/src/Core/IndexedView.h +237 -0
  102. data/vendor/eigen/Eigen/src/Core/Inverse.h +9 -10
  103. data/vendor/eigen/Eigen/src/Core/Map.h +7 -7
  104. data/vendor/eigen/Eigen/src/Core/MapBase.h +10 -3
  105. data/vendor/eigen/Eigen/src/Core/MathFunctions.h +767 -125
  106. data/vendor/eigen/Eigen/src/Core/MathFunctionsImpl.h +118 -19
  107. data/vendor/eigen/Eigen/src/Core/Matrix.h +131 -25
  108. data/vendor/eigen/Eigen/src/Core/MatrixBase.h +21 -3
  109. data/vendor/eigen/Eigen/src/Core/NestByValue.h +25 -50
  110. data/vendor/eigen/Eigen/src/Core/NoAlias.h +4 -3
  111. data/vendor/eigen/Eigen/src/Core/NumTraits.h +107 -20
  112. data/vendor/eigen/Eigen/src/Core/PartialReduxEvaluator.h +232 -0
  113. data/vendor/eigen/Eigen/src/Core/PermutationMatrix.h +3 -31
  114. data/vendor/eigen/Eigen/src/Core/PlainObjectBase.h +152 -59
  115. data/vendor/eigen/Eigen/src/Core/Product.h +30 -25
  116. data/vendor/eigen/Eigen/src/Core/ProductEvaluators.h +192 -125
  117. data/vendor/eigen/Eigen/src/Core/Random.h +37 -1
  118. data/vendor/eigen/Eigen/src/Core/Redux.h +180 -170
  119. data/vendor/eigen/Eigen/src/Core/Ref.h +121 -23
  120. data/vendor/eigen/Eigen/src/Core/Replicate.h +8 -8
  121. data/vendor/eigen/Eigen/src/Core/Reshaped.h +454 -0
  122. data/vendor/eigen/Eigen/src/Core/ReturnByValue.h +7 -5
  123. data/vendor/eigen/Eigen/src/Core/Reverse.h +18 -12
  124. data/vendor/eigen/Eigen/src/Core/Select.h +8 -6
  125. data/vendor/eigen/Eigen/src/Core/SelfAdjointView.h +33 -20
  126. data/vendor/eigen/Eigen/src/Core/Solve.h +14 -14
  127. data/vendor/eigen/Eigen/src/Core/SolveTriangular.h +16 -16
  128. data/vendor/eigen/Eigen/src/Core/SolverBase.h +41 -3
  129. data/vendor/eigen/Eigen/src/Core/StableNorm.h +100 -70
  130. data/vendor/eigen/Eigen/src/Core/StlIterators.h +463 -0
  131. data/vendor/eigen/Eigen/src/Core/Stride.h +9 -4
  132. data/vendor/eigen/Eigen/src/Core/Swap.h +5 -4
  133. data/vendor/eigen/Eigen/src/Core/Transpose.h +88 -27
  134. data/vendor/eigen/Eigen/src/Core/Transpositions.h +26 -47
  135. data/vendor/eigen/Eigen/src/Core/TriangularMatrix.h +93 -75
  136. data/vendor/eigen/Eigen/src/Core/VectorBlock.h +5 -5
  137. data/vendor/eigen/Eigen/src/Core/VectorwiseOp.h +159 -70
  138. data/vendor/eigen/Eigen/src/Core/Visitor.h +137 -29
  139. data/vendor/eigen/Eigen/src/Core/arch/AVX/Complex.h +50 -129
  140. data/vendor/eigen/Eigen/src/Core/arch/AVX/MathFunctions.h +126 -337
  141. data/vendor/eigen/Eigen/src/Core/arch/AVX/PacketMath.h +1092 -155
  142. data/vendor/eigen/Eigen/src/Core/arch/AVX/TypeCasting.h +65 -1
  143. data/vendor/eigen/Eigen/src/Core/arch/AVX512/Complex.h +422 -0
  144. data/vendor/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h +207 -236
  145. data/vendor/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h +1482 -495
  146. data/vendor/eigen/Eigen/src/Core/arch/AVX512/TypeCasting.h +89 -0
  147. data/vendor/eigen/Eigen/src/Core/arch/AltiVec/Complex.h +152 -165
  148. data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MathFunctions.h +19 -251
  149. data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MatrixProduct.h +2937 -0
  150. data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h +221 -0
  151. data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h +629 -0
  152. data/vendor/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h +2042 -392
  153. data/vendor/eigen/Eigen/src/Core/arch/CUDA/Complex.h +235 -80
  154. data/vendor/eigen/Eigen/src/Core/arch/Default/BFloat16.h +700 -0
  155. data/vendor/eigen/Eigen/src/Core/arch/Default/ConjHelper.h +102 -14
  156. data/vendor/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +1649 -0
  157. data/vendor/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +110 -0
  158. data/vendor/eigen/Eigen/src/Core/arch/Default/Half.h +942 -0
  159. data/vendor/eigen/Eigen/src/Core/arch/Default/Settings.h +1 -1
  160. data/vendor/eigen/Eigen/src/Core/arch/Default/TypeCasting.h +120 -0
  161. data/vendor/eigen/Eigen/src/Core/arch/{CUDA → GPU}/MathFunctions.h +16 -4
  162. data/vendor/eigen/Eigen/src/Core/arch/GPU/PacketMath.h +1685 -0
  163. data/vendor/eigen/Eigen/src/Core/arch/GPU/TypeCasting.h +80 -0
  164. data/vendor/eigen/Eigen/src/Core/arch/HIP/hcc/math_constants.h +23 -0
  165. data/vendor/eigen/Eigen/src/Core/arch/MSA/Complex.h +648 -0
  166. data/vendor/eigen/Eigen/src/Core/arch/MSA/MathFunctions.h +387 -0
  167. data/vendor/eigen/Eigen/src/Core/arch/MSA/PacketMath.h +1233 -0
  168. data/vendor/eigen/Eigen/src/Core/arch/NEON/Complex.h +313 -219
  169. data/vendor/eigen/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h +183 -0
  170. data/vendor/eigen/Eigen/src/Core/arch/NEON/MathFunctions.h +54 -70
  171. data/vendor/eigen/Eigen/src/Core/arch/NEON/PacketMath.h +4376 -549
  172. data/vendor/eigen/Eigen/src/Core/arch/NEON/TypeCasting.h +1419 -0
  173. data/vendor/eigen/Eigen/src/Core/arch/SSE/Complex.h +59 -179
  174. data/vendor/eigen/Eigen/src/Core/arch/SSE/MathFunctions.h +65 -428
  175. data/vendor/eigen/Eigen/src/Core/arch/SSE/PacketMath.h +893 -283
  176. data/vendor/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h +65 -0
  177. data/vendor/eigen/Eigen/src/Core/arch/SVE/MathFunctions.h +44 -0
  178. data/vendor/eigen/Eigen/src/Core/arch/SVE/PacketMath.h +752 -0
  179. data/vendor/eigen/Eigen/src/Core/arch/SVE/TypeCasting.h +49 -0
  180. data/vendor/eigen/Eigen/src/Core/arch/SYCL/InteropHeaders.h +232 -0
  181. data/vendor/eigen/Eigen/src/Core/arch/SYCL/MathFunctions.h +301 -0
  182. data/vendor/eigen/Eigen/src/Core/arch/SYCL/PacketMath.h +670 -0
  183. data/vendor/eigen/Eigen/src/Core/arch/SYCL/SyclMemoryModel.h +694 -0
  184. data/vendor/eigen/Eigen/src/Core/arch/SYCL/TypeCasting.h +85 -0
  185. data/vendor/eigen/Eigen/src/Core/arch/ZVector/Complex.h +212 -183
  186. data/vendor/eigen/Eigen/src/Core/arch/ZVector/MathFunctions.h +101 -5
  187. data/vendor/eigen/Eigen/src/Core/arch/ZVector/PacketMath.h +510 -395
  188. data/vendor/eigen/Eigen/src/Core/functors/AssignmentFunctors.h +11 -2
  189. data/vendor/eigen/Eigen/src/Core/functors/BinaryFunctors.h +112 -46
  190. data/vendor/eigen/Eigen/src/Core/functors/NullaryFunctors.h +31 -30
  191. data/vendor/eigen/Eigen/src/Core/functors/StlFunctors.h +32 -2
  192. data/vendor/eigen/Eigen/src/Core/functors/UnaryFunctors.h +355 -16
  193. data/vendor/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h +1075 -586
  194. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h +49 -24
  195. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +41 -35
  196. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +6 -6
  197. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h +4 -2
  198. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixVector.h +382 -483
  199. data/vendor/eigen/Eigen/src/Core/products/Parallelizer.h +22 -5
  200. data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +53 -30
  201. data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h +16 -8
  202. data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h +8 -6
  203. data/vendor/eigen/Eigen/src/Core/products/SelfadjointProduct.h +4 -4
  204. data/vendor/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h +5 -4
  205. data/vendor/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h +33 -27
  206. data/vendor/eigen/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h +14 -12
  207. data/vendor/eigen/Eigen/src/Core/products/TriangularSolverMatrix.h +36 -34
  208. data/vendor/eigen/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h +8 -4
  209. data/vendor/eigen/Eigen/src/Core/products/TriangularSolverVector.h +13 -10
  210. data/vendor/eigen/Eigen/src/Core/util/BlasUtil.h +304 -119
  211. data/vendor/eigen/Eigen/src/Core/util/ConfigureVectorization.h +512 -0
  212. data/vendor/eigen/Eigen/src/Core/util/Constants.h +25 -9
  213. data/vendor/eigen/Eigen/src/Core/util/DisableStupidWarnings.h +26 -3
  214. data/vendor/eigen/Eigen/src/Core/util/ForwardDeclarations.h +29 -9
  215. data/vendor/eigen/Eigen/src/Core/util/IndexedViewHelper.h +186 -0
  216. data/vendor/eigen/Eigen/src/Core/util/IntegralConstant.h +272 -0
  217. data/vendor/eigen/Eigen/src/Core/util/MKL_support.h +8 -1
  218. data/vendor/eigen/Eigen/src/Core/util/Macros.h +709 -246
  219. data/vendor/eigen/Eigen/src/Core/util/Memory.h +222 -52
  220. data/vendor/eigen/Eigen/src/Core/util/Meta.h +355 -77
  221. data/vendor/eigen/Eigen/src/Core/util/ReenableStupidWarnings.h +5 -1
  222. data/vendor/eigen/Eigen/src/Core/util/ReshapedHelper.h +51 -0
  223. data/vendor/eigen/Eigen/src/Core/util/StaticAssert.h +8 -5
  224. data/vendor/eigen/Eigen/src/Core/util/SymbolicIndex.h +293 -0
  225. data/vendor/eigen/Eigen/src/Core/util/XprHelper.h +65 -30
  226. data/vendor/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +1 -1
  227. data/vendor/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +7 -4
  228. data/vendor/eigen/Eigen/src/Eigenvalues/EigenSolver.h +2 -2
  229. data/vendor/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +1 -1
  230. data/vendor/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +2 -2
  231. data/vendor/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +2 -2
  232. data/vendor/eigen/Eigen/src/Eigenvalues/RealQZ.h +9 -6
  233. data/vendor/eigen/Eigen/src/Eigenvalues/RealSchur.h +21 -9
  234. data/vendor/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +77 -43
  235. data/vendor/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +20 -15
  236. data/vendor/eigen/Eigen/src/Geometry/AlignedBox.h +99 -5
  237. data/vendor/eigen/Eigen/src/Geometry/AngleAxis.h +4 -4
  238. data/vendor/eigen/Eigen/src/Geometry/EulerAngles.h +3 -3
  239. data/vendor/eigen/Eigen/src/Geometry/Homogeneous.h +15 -11
  240. data/vendor/eigen/Eigen/src/Geometry/Hyperplane.h +1 -1
  241. data/vendor/eigen/Eigen/src/Geometry/OrthoMethods.h +3 -2
  242. data/vendor/eigen/Eigen/src/Geometry/ParametrizedLine.h +39 -2
  243. data/vendor/eigen/Eigen/src/Geometry/Quaternion.h +70 -14
  244. data/vendor/eigen/Eigen/src/Geometry/Rotation2D.h +3 -3
  245. data/vendor/eigen/Eigen/src/Geometry/Scaling.h +23 -5
  246. data/vendor/eigen/Eigen/src/Geometry/Transform.h +88 -67
  247. data/vendor/eigen/Eigen/src/Geometry/Translation.h +6 -12
  248. data/vendor/eigen/Eigen/src/Geometry/Umeyama.h +1 -1
  249. data/vendor/eigen/Eigen/src/Geometry/arch/Geometry_SIMD.h +168 -0
  250. data/vendor/eigen/Eigen/src/Householder/BlockHouseholder.h +9 -2
  251. data/vendor/eigen/Eigen/src/Householder/Householder.h +8 -4
  252. data/vendor/eigen/Eigen/src/Householder/HouseholderSequence.h +123 -48
  253. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +15 -15
  254. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +7 -23
  255. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +5 -22
  256. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +41 -47
  257. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +51 -60
  258. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +70 -20
  259. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +2 -20
  260. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +11 -9
  261. data/vendor/eigen/Eigen/src/Jacobi/Jacobi.h +31 -10
  262. data/vendor/eigen/Eigen/src/KLUSupport/KLUSupport.h +358 -0
  263. data/vendor/eigen/Eigen/src/LU/Determinant.h +35 -19
  264. data/vendor/eigen/Eigen/src/LU/FullPivLU.h +29 -43
  265. data/vendor/eigen/Eigen/src/LU/InverseImpl.h +25 -8
  266. data/vendor/eigen/Eigen/src/LU/PartialPivLU.h +71 -58
  267. data/vendor/eigen/Eigen/src/LU/arch/InverseSize4.h +351 -0
  268. data/vendor/eigen/Eigen/src/OrderingMethods/Amd.h +7 -17
  269. data/vendor/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +297 -277
  270. data/vendor/eigen/Eigen/src/OrderingMethods/Ordering.h +6 -10
  271. data/vendor/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +1 -1
  272. data/vendor/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +9 -7
  273. data/vendor/eigen/Eigen/src/QR/ColPivHouseholderQR.h +41 -20
  274. data/vendor/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +100 -27
  275. data/vendor/eigen/Eigen/src/QR/FullPivHouseholderQR.h +59 -22
  276. data/vendor/eigen/Eigen/src/QR/HouseholderQR.h +48 -23
  277. data/vendor/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +25 -3
  278. data/vendor/eigen/Eigen/src/SVD/BDCSVD.h +183 -63
  279. data/vendor/eigen/Eigen/src/SVD/JacobiSVD.h +22 -14
  280. data/vendor/eigen/Eigen/src/SVD/SVDBase.h +83 -22
  281. data/vendor/eigen/Eigen/src/SVD/UpperBidiagonalization.h +3 -3
  282. data/vendor/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +17 -9
  283. data/vendor/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +12 -37
  284. data/vendor/eigen/Eigen/src/SparseCore/AmbiVector.h +3 -2
  285. data/vendor/eigen/Eigen/src/SparseCore/CompressedStorage.h +16 -0
  286. data/vendor/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +6 -6
  287. data/vendor/eigen/Eigen/src/SparseCore/SparseAssign.h +81 -27
  288. data/vendor/eigen/Eigen/src/SparseCore/SparseBlock.h +25 -57
  289. data/vendor/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +40 -11
  290. data/vendor/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +11 -15
  291. data/vendor/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +4 -2
  292. data/vendor/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +30 -8
  293. data/vendor/eigen/Eigen/src/SparseCore/SparseMatrix.h +126 -11
  294. data/vendor/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +5 -12
  295. data/vendor/eigen/Eigen/src/SparseCore/SparseProduct.h +13 -1
  296. data/vendor/eigen/Eigen/src/SparseCore/SparseRef.h +7 -7
  297. data/vendor/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +5 -2
  298. data/vendor/eigen/Eigen/src/SparseCore/SparseUtil.h +8 -0
  299. data/vendor/eigen/Eigen/src/SparseCore/SparseVector.h +1 -1
  300. data/vendor/eigen/Eigen/src/SparseCore/SparseView.h +1 -0
  301. data/vendor/eigen/Eigen/src/SparseLU/SparseLU.h +162 -12
  302. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +1 -1
  303. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +76 -2
  304. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +2 -2
  305. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +1 -1
  306. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +1 -1
  307. data/vendor/eigen/Eigen/src/SparseQR/SparseQR.h +19 -6
  308. data/vendor/eigen/Eigen/src/StlSupport/StdDeque.h +2 -12
  309. data/vendor/eigen/Eigen/src/StlSupport/StdList.h +2 -2
  310. data/vendor/eigen/Eigen/src/StlSupport/StdVector.h +2 -2
  311. data/vendor/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +6 -8
  312. data/vendor/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +175 -39
  313. data/vendor/eigen/Eigen/src/misc/lapacke.h +5 -4
  314. data/vendor/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +28 -2
  315. data/vendor/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +155 -11
  316. data/vendor/eigen/Eigen/src/plugins/BlockMethods.h +626 -242
  317. data/vendor/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.h +14 -0
  318. data/vendor/eigen/Eigen/src/plugins/IndexedViewMethods.h +262 -0
  319. data/vendor/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +4 -4
  320. data/vendor/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +10 -0
  321. data/vendor/eigen/Eigen/src/plugins/ReshapedMethods.h +149 -0
  322. data/vendor/eigen/README.md +2 -0
  323. data/vendor/eigen/bench/btl/README +1 -1
  324. data/vendor/eigen/bench/tensors/README +6 -7
  325. data/vendor/eigen/ci/README.md +56 -0
  326. data/vendor/eigen/demos/mix_eigen_and_c/README +1 -1
  327. data/vendor/eigen/unsupported/Eigen/CXX11/src/Tensor/README.md +213 -158
  328. data/vendor/eigen/unsupported/README.txt +1 -1
  329. data/vendor/tomotopy/README.kr.rst +78 -0
  330. data/vendor/tomotopy/README.rst +75 -0
  331. data/vendor/tomotopy/src/Labeling/FoRelevance.cpp +2 -2
  332. data/vendor/tomotopy/src/Labeling/Phraser.hpp +4 -4
  333. data/vendor/tomotopy/src/TopicModel/CTModel.hpp +7 -3
  334. data/vendor/tomotopy/src/TopicModel/DMRModel.hpp +7 -3
  335. data/vendor/tomotopy/src/TopicModel/DTModel.hpp +6 -3
  336. data/vendor/tomotopy/src/TopicModel/GDMRModel.hpp +2 -2
  337. data/vendor/tomotopy/src/TopicModel/HDP.h +1 -0
  338. data/vendor/tomotopy/src/TopicModel/HDPModel.hpp +57 -6
  339. data/vendor/tomotopy/src/TopicModel/HLDAModel.hpp +6 -3
  340. data/vendor/tomotopy/src/TopicModel/HPAModel.hpp +3 -2
  341. data/vendor/tomotopy/src/TopicModel/LDA.h +3 -3
  342. data/vendor/tomotopy/src/TopicModel/LDACVB0Model.hpp +5 -5
  343. data/vendor/tomotopy/src/TopicModel/LDAModel.hpp +50 -19
  344. data/vendor/tomotopy/src/TopicModel/LLDAModel.hpp +6 -2
  345. data/vendor/tomotopy/src/TopicModel/MGLDAModel.hpp +3 -2
  346. data/vendor/tomotopy/src/TopicModel/PAModel.hpp +1 -1
  347. data/vendor/tomotopy/src/TopicModel/PLDAModel.hpp +6 -2
  348. data/vendor/tomotopy/src/TopicModel/PT.h +3 -1
  349. data/vendor/tomotopy/src/TopicModel/PTModel.hpp +36 -3
  350. data/vendor/tomotopy/src/TopicModel/SLDAModel.hpp +6 -3
  351. data/vendor/tomotopy/src/TopicModel/TopicModel.hpp +55 -26
  352. data/vendor/tomotopy/src/Utils/AliasMethod.hpp +5 -4
  353. data/vendor/tomotopy/src/Utils/Dictionary.h +2 -2
  354. data/vendor/tomotopy/src/Utils/EigenAddonOps.hpp +36 -1
  355. data/vendor/tomotopy/src/Utils/MultiNormalDistribution.hpp +1 -1
  356. data/vendor/tomotopy/src/Utils/TruncMultiNormal.hpp +1 -1
  357. data/vendor/tomotopy/src/Utils/exception.h +6 -0
  358. data/vendor/tomotopy/src/Utils/math.h +2 -2
  359. data/vendor/tomotopy/src/Utils/sample.hpp +14 -12
  360. data/vendor/tomotopy/src/Utils/serializer.hpp +30 -5
  361. data/vendor/tomotopy/src/Utils/sse_gamma.h +0 -3
  362. metadata +64 -18
  363. data/vendor/eigen/Eigen/CMakeLists.txt +0 -19
  364. data/vendor/eigen/Eigen/src/Core/arch/CUDA/Half.h +0 -674
  365. data/vendor/eigen/Eigen/src/Core/arch/CUDA/PacketMath.h +0 -333
  366. data/vendor/eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +0 -1124
  367. data/vendor/eigen/Eigen/src/Core/arch/CUDA/TypeCasting.h +0 -212
  368. data/vendor/eigen/Eigen/src/Geometry/arch/Geometry_SSE.h +0 -161
  369. data/vendor/eigen/Eigen/src/LU/arch/Inverse_SSE.h +0 -338
@@ -63,14 +63,28 @@ namespace Eigen {
63
63
 
64
64
  namespace internal {
65
65
 
66
- EIGEN_DEVICE_FUNC
66
+ EIGEN_DEVICE_FUNC
67
67
  inline void throw_std_bad_alloc()
68
68
  {
69
69
  #ifdef EIGEN_EXCEPTIONS
70
70
  throw std::bad_alloc();
71
71
  #else
72
72
  std::size_t huge = static_cast<std::size_t>(-1);
73
- ::operator new(huge);
73
+ #if defined(EIGEN_HIPCC)
74
+ //
75
+ // calls to "::operator new" are to be treated as opaque function calls (i.e no inlining),
76
+ // and as a consequence the code in the #else block triggers the hipcc warning :
77
+ // "no overloaded function has restriction specifiers that are compatible with the ambient context"
78
+ //
79
+ // "throw_std_bad_alloc" has the EIGEN_DEVICE_FUNC attribute, so it seems that hipcc expects
80
+ // the same on "operator new"
81
+ // Reverting code back to the old version in this #if block for the hipcc compiler
82
+ //
83
+ new int[huge];
84
+ #else
85
+ void* unused = ::operator new(huge);
86
+ EIGEN_UNUSED_VARIABLE(unused);
87
+ #endif
74
88
  #endif
75
89
  }
76
90
 
@@ -83,19 +97,26 @@ inline void throw_std_bad_alloc()
83
97
  /** \internal Like malloc, but the returned pointer is guaranteed to be 16-byte aligned.
84
98
  * Fast, but wastes 16 additional bytes of memory. Does not throw any exception.
85
99
  */
86
- inline void* handmade_aligned_malloc(std::size_t size)
100
+ EIGEN_DEVICE_FUNC inline void* handmade_aligned_malloc(std::size_t size, std::size_t alignment = EIGEN_DEFAULT_ALIGN_BYTES)
87
101
  {
88
- void *original = std::malloc(size+EIGEN_DEFAULT_ALIGN_BYTES);
102
+ eigen_assert(alignment >= sizeof(void*) && (alignment & (alignment-1)) == 0 && "Alignment must be at least sizeof(void*) and a power of 2");
103
+
104
+ EIGEN_USING_STD(malloc)
105
+ void *original = malloc(size+alignment);
106
+
89
107
  if (original == 0) return 0;
90
- void *aligned = reinterpret_cast<void*>((reinterpret_cast<std::size_t>(original) & ~(std::size_t(EIGEN_DEFAULT_ALIGN_BYTES-1))) + EIGEN_DEFAULT_ALIGN_BYTES);
108
+ void *aligned = reinterpret_cast<void*>((reinterpret_cast<std::size_t>(original) & ~(std::size_t(alignment-1))) + alignment);
91
109
  *(reinterpret_cast<void**>(aligned) - 1) = original;
92
110
  return aligned;
93
111
  }
94
112
 
95
113
  /** \internal Frees memory allocated with handmade_aligned_malloc */
96
- inline void handmade_aligned_free(void *ptr)
114
+ EIGEN_DEVICE_FUNC inline void handmade_aligned_free(void *ptr)
97
115
  {
98
- if (ptr) std::free(*(reinterpret_cast<void**>(ptr) - 1));
116
+ if (ptr) {
117
+ EIGEN_USING_STD(free)
118
+ free(*(reinterpret_cast<void**>(ptr) - 1));
119
+ }
99
120
  }
100
121
 
101
122
  /** \internal
@@ -114,7 +135,7 @@ inline void* handmade_aligned_realloc(void* ptr, std::size_t size, std::size_t =
114
135
  void *previous_aligned = static_cast<char *>(original)+previous_offset;
115
136
  if(aligned!=previous_aligned)
116
137
  std::memmove(aligned, previous_aligned, size);
117
-
138
+
118
139
  *(reinterpret_cast<void**>(aligned) - 1) = original;
119
140
  return aligned;
120
141
  }
@@ -142,7 +163,7 @@ EIGEN_DEVICE_FUNC inline void check_that_malloc_is_allowed()
142
163
  {
143
164
  eigen_assert(is_malloc_allowed() && "heap allocation is forbidden (EIGEN_RUNTIME_NO_MALLOC is defined and g_is_malloc_allowed is false)");
144
165
  }
145
- #else
166
+ #else
146
167
  EIGEN_DEVICE_FUNC inline void check_that_malloc_is_allowed()
147
168
  {}
148
169
  #endif
@@ -156,9 +177,12 @@ EIGEN_DEVICE_FUNC inline void* aligned_malloc(std::size_t size)
156
177
 
157
178
  void *result;
158
179
  #if (EIGEN_DEFAULT_ALIGN_BYTES==0) || EIGEN_MALLOC_ALREADY_ALIGNED
159
- result = std::malloc(size);
180
+
181
+ EIGEN_USING_STD(malloc)
182
+ result = malloc(size);
183
+
160
184
  #if EIGEN_DEFAULT_ALIGN_BYTES==16
161
- eigen_assert((size<16 || (std::size_t(result)%16)==0) && "System's malloc returned an unaligned pointer. Compile with EIGEN_MALLOC_ALREADY_ALIGNED=0 to fallback to handmade alignd memory allocator.");
185
+ eigen_assert((size<16 || (std::size_t(result)%16)==0) && "System's malloc returned an unaligned pointer. Compile with EIGEN_MALLOC_ALREADY_ALIGNED=0 to fallback to handmade aligned memory allocator.");
162
186
  #endif
163
187
  #else
164
188
  result = handmade_aligned_malloc(size);
@@ -174,7 +198,10 @@ EIGEN_DEVICE_FUNC inline void* aligned_malloc(std::size_t size)
174
198
  EIGEN_DEVICE_FUNC inline void aligned_free(void *ptr)
175
199
  {
176
200
  #if (EIGEN_DEFAULT_ALIGN_BYTES==0) || EIGEN_MALLOC_ALREADY_ALIGNED
177
- std::free(ptr);
201
+
202
+ EIGEN_USING_STD(free)
203
+ free(ptr);
204
+
178
205
  #else
179
206
  handmade_aligned_free(ptr);
180
207
  #endif
@@ -187,7 +214,7 @@ EIGEN_DEVICE_FUNC inline void aligned_free(void *ptr)
187
214
  */
188
215
  inline void* aligned_realloc(void *ptr, std::size_t new_size, std::size_t old_size)
189
216
  {
190
- EIGEN_UNUSED_VARIABLE(old_size);
217
+ EIGEN_UNUSED_VARIABLE(old_size)
191
218
 
192
219
  void *result;
193
220
  #if (EIGEN_DEFAULT_ALIGN_BYTES==0) || EIGEN_MALLOC_ALREADY_ALIGNED
@@ -218,7 +245,9 @@ template<> EIGEN_DEVICE_FUNC inline void* conditional_aligned_malloc<false>(std:
218
245
  {
219
246
  check_that_malloc_is_allowed();
220
247
 
221
- void *result = std::malloc(size);
248
+ EIGEN_USING_STD(malloc)
249
+ void *result = malloc(size);
250
+
222
251
  if(!result && size)
223
252
  throw_std_bad_alloc();
224
253
  return result;
@@ -232,7 +261,8 @@ template<bool Align> EIGEN_DEVICE_FUNC inline void conditional_aligned_free(void
232
261
 
233
262
  template<> EIGEN_DEVICE_FUNC inline void conditional_aligned_free<false>(void *ptr)
234
263
  {
235
- std::free(ptr);
264
+ EIGEN_USING_STD(free)
265
+ free(ptr);
236
266
  }
237
267
 
238
268
  template<bool Align> inline void* conditional_aligned_realloc(void* ptr, std::size_t new_size, std::size_t old_size)
@@ -331,7 +361,7 @@ template<typename T, bool Align> EIGEN_DEVICE_FUNC inline T* conditional_aligned
331
361
  template<typename T> EIGEN_DEVICE_FUNC inline void aligned_delete(T *ptr, std::size_t size)
332
362
  {
333
363
  destruct_elements_of_array<T>(ptr, size);
334
- aligned_free(ptr);
364
+ Eigen::internal::aligned_free(ptr);
335
365
  }
336
366
 
337
367
  /** \internal Deletes objects constructed with conditional_aligned_new
@@ -471,8 +501,8 @@ EIGEN_DEVICE_FUNC inline Index first_default_aligned(const Scalar* array, Index
471
501
  }
472
502
 
473
503
  /** \internal Returns the smallest integer multiple of \a base and greater or equal to \a size
474
- */
475
- template<typename Index>
504
+ */
505
+ template<typename Index>
476
506
  inline Index first_multiple(Index size, Index base)
477
507
  {
478
508
  return ((size+base-1)/base)*base;
@@ -493,7 +523,8 @@ template<typename T> struct smart_copy_helper<T,true> {
493
523
  IntPtr size = IntPtr(end)-IntPtr(start);
494
524
  if(size==0) return;
495
525
  eigen_internal_assert(start!=0 && end!=0 && target!=0);
496
- std::memcpy(target, start, size);
526
+ EIGEN_USING_STD(memcpy)
527
+ memcpy(target, start, size);
497
528
  }
498
529
  };
499
530
 
@@ -502,7 +533,7 @@ template<typename T> struct smart_copy_helper<T,false> {
502
533
  { std::copy(start, end, target); }
503
534
  };
504
535
 
505
- // intelligent memmove. falls back to std::memmove for POD types, uses std::copy otherwise.
536
+ // intelligent memmove. falls back to std::memmove for POD types, uses std::copy otherwise.
506
537
  template<typename T, bool UseMemmove> struct smart_memmove_helper;
507
538
 
508
539
  template<typename T> void smart_memmove(const T* start, const T* end, T* target)
@@ -522,19 +553,30 @@ template<typename T> struct smart_memmove_helper<T,true> {
522
553
 
523
554
  template<typename T> struct smart_memmove_helper<T,false> {
524
555
  static inline void run(const T* start, const T* end, T* target)
525
- {
556
+ {
526
557
  if (UIntPtr(target) < UIntPtr(start))
527
558
  {
528
559
  std::copy(start, end, target);
529
560
  }
530
- else
561
+ else
531
562
  {
532
563
  std::ptrdiff_t count = (std::ptrdiff_t(end)-std::ptrdiff_t(start)) / sizeof(T);
533
- std::copy_backward(start, end, target + count);
564
+ std::copy_backward(start, end, target + count);
534
565
  }
535
566
  }
536
567
  };
537
568
 
569
+ #if EIGEN_HAS_RVALUE_REFERENCES
570
+ template<typename T> EIGEN_DEVICE_FUNC T* smart_move(T* start, T* end, T* target)
571
+ {
572
+ return std::move(start, end, target);
573
+ }
574
+ #else
575
+ template<typename T> EIGEN_DEVICE_FUNC T* smart_move(T* start, T* end, T* target)
576
+ {
577
+ return std::copy(start, end, target);
578
+ }
579
+ #endif
538
580
 
539
581
  /*****************************************************************************
540
582
  *** Implementation of runtime stack allocation (falling back to malloc) ***
@@ -542,7 +584,7 @@ template<typename T> struct smart_memmove_helper<T,false> {
542
584
 
543
585
  // you can overwrite Eigen's default behavior regarding alloca by defining EIGEN_ALLOCA
544
586
  // to the appropriate stack allocation function
545
- #ifndef EIGEN_ALLOCA
587
+ #if ! defined EIGEN_ALLOCA && ! defined EIGEN_GPU_COMPILE_PHASE
546
588
  #if EIGEN_OS_LINUX || EIGEN_OS_MAC || (defined alloca)
547
589
  #define EIGEN_ALLOCA alloca
548
590
  #elif EIGEN_COMP_MSVC
@@ -550,6 +592,15 @@ template<typename T> struct smart_memmove_helper<T,false> {
550
592
  #endif
551
593
  #endif
552
594
 
595
+ // With clang -Oz -mthumb, alloca changes the stack pointer in a way that is
596
+ // not allowed in Thumb2. -DEIGEN_STACK_ALLOCATION_LIMIT=0 doesn't work because
597
+ // the compiler still emits bad code because stack allocation checks use "<=".
598
+ // TODO: Eliminate after https://bugs.llvm.org/show_bug.cgi?id=23772
599
+ // is fixed.
600
+ #if defined(__clang__) && defined(__thumb__)
601
+ #undef EIGEN_ALLOCA
602
+ #endif
603
+
553
604
  // This helper class construct the allocated memory, and takes care of destructing and freeing the handled data
554
605
  // at destruction time. In practice this helper class is mainly useful to avoid memory leak in case of exceptions.
555
606
  template<typename T> class aligned_stack_memory_handler : noncopyable
@@ -561,12 +612,14 @@ template<typename T> class aligned_stack_memory_handler : noncopyable
561
612
  * In this case, the buffer elements will also be destructed when this handler will be destructed.
562
613
  * Finally, if \a dealloc is true, then the pointer \a ptr is freed.
563
614
  **/
615
+ EIGEN_DEVICE_FUNC
564
616
  aligned_stack_memory_handler(T* ptr, std::size_t size, bool dealloc)
565
617
  : m_ptr(ptr), m_size(size), m_deallocate(dealloc)
566
618
  {
567
619
  if(NumTraits<T>::RequireInitialization && m_ptr)
568
620
  Eigen::internal::construct_elements_of_array(m_ptr, size);
569
621
  }
622
+ EIGEN_DEVICE_FUNC
570
623
  ~aligned_stack_memory_handler()
571
624
  {
572
625
  if(NumTraits<T>::RequireInitialization && m_ptr)
@@ -580,6 +633,60 @@ template<typename T> class aligned_stack_memory_handler : noncopyable
580
633
  bool m_deallocate;
581
634
  };
582
635
 
636
+ #ifdef EIGEN_ALLOCA
637
+
638
+ template<typename Xpr, int NbEvaluations,
639
+ bool MapExternalBuffer = nested_eval<Xpr,NbEvaluations>::Evaluate && Xpr::MaxSizeAtCompileTime==Dynamic
640
+ >
641
+ struct local_nested_eval_wrapper
642
+ {
643
+ static const bool NeedExternalBuffer = false;
644
+ typedef typename Xpr::Scalar Scalar;
645
+ typedef typename nested_eval<Xpr,NbEvaluations>::type ObjectType;
646
+ ObjectType object;
647
+
648
+ EIGEN_DEVICE_FUNC
649
+ local_nested_eval_wrapper(const Xpr& xpr, Scalar* ptr) : object(xpr)
650
+ {
651
+ EIGEN_UNUSED_VARIABLE(ptr);
652
+ eigen_internal_assert(ptr==0);
653
+ }
654
+ };
655
+
656
+ template<typename Xpr, int NbEvaluations>
657
+ struct local_nested_eval_wrapper<Xpr,NbEvaluations,true>
658
+ {
659
+ static const bool NeedExternalBuffer = true;
660
+ typedef typename Xpr::Scalar Scalar;
661
+ typedef typename plain_object_eval<Xpr>::type PlainObject;
662
+ typedef Map<PlainObject,EIGEN_DEFAULT_ALIGN_BYTES> ObjectType;
663
+ ObjectType object;
664
+
665
+ EIGEN_DEVICE_FUNC
666
+ local_nested_eval_wrapper(const Xpr& xpr, Scalar* ptr)
667
+ : object(ptr==0 ? reinterpret_cast<Scalar*>(Eigen::internal::aligned_malloc(sizeof(Scalar)*xpr.size())) : ptr, xpr.rows(), xpr.cols()),
668
+ m_deallocate(ptr==0)
669
+ {
670
+ if(NumTraits<Scalar>::RequireInitialization && object.data())
671
+ Eigen::internal::construct_elements_of_array(object.data(), object.size());
672
+ object = xpr;
673
+ }
674
+
675
+ EIGEN_DEVICE_FUNC
676
+ ~local_nested_eval_wrapper()
677
+ {
678
+ if(NumTraits<Scalar>::RequireInitialization && object.data())
679
+ Eigen::internal::destruct_elements_of_array(object.data(), object.size());
680
+ if(m_deallocate)
681
+ Eigen::internal::aligned_free(object.data());
682
+ }
683
+
684
+ private:
685
+ bool m_deallocate;
686
+ };
687
+
688
+ #endif // EIGEN_ALLOCA
689
+
583
690
  template<typename T> class scoped_array : noncopyable
584
691
  {
585
692
  T* m_ptr;
@@ -603,13 +710,15 @@ template<typename T> void swap(scoped_array<T> &a,scoped_array<T> &b)
603
710
  {
604
711
  std::swap(a.ptr(),b.ptr());
605
712
  }
606
-
713
+
607
714
  } // end namespace internal
608
715
 
609
716
  /** \internal
610
- * Declares, allocates and construct an aligned buffer named NAME of SIZE elements of type TYPE on the stack
611
- * if SIZE is smaller than EIGEN_STACK_ALLOCATION_LIMIT, and if stack allocation is supported by the platform
612
- * (currently, this is Linux and Visual Studio only). Otherwise the memory is allocated on the heap.
717
+ *
718
+ * The macro ei_declare_aligned_stack_constructed_variable(TYPE,NAME,SIZE,BUFFER) declares, allocates,
719
+ * and construct an aligned buffer named NAME of SIZE elements of type TYPE on the stack
720
+ * if the size in bytes is smaller than EIGEN_STACK_ALLOCATION_LIMIT, and if stack allocation is supported by the platform
721
+ * (currently, this is Linux, OSX and Visual Studio only). Otherwise the memory is allocated on the heap.
613
722
  * The allocated buffer is automatically deleted when exiting the scope of this declaration.
614
723
  * If BUFFER is non null, then the declared variable is simply an alias for BUFFER, and no allocation/deletion occurs.
615
724
  * Here is an example:
@@ -620,9 +729,17 @@ template<typename T> void swap(scoped_array<T> &a,scoped_array<T> &b)
620
729
  * }
621
730
  * \endcode
622
731
  * The underlying stack allocation function can controlled with the EIGEN_ALLOCA preprocessor token.
732
+ *
733
+ * The macro ei_declare_local_nested_eval(XPR_T,XPR,N,NAME) is analogue to
734
+ * \code
735
+ * typename internal::nested_eval<XPRT_T,N>::type NAME(XPR);
736
+ * \endcode
737
+ * with the advantage of using aligned stack allocation even if the maximal size of XPR at compile time is unknown.
738
+ * This is accomplished through alloca if this later is supported and if the required number of bytes
739
+ * is below EIGEN_STACK_ALLOCATION_LIMIT.
623
740
  */
624
741
  #ifdef EIGEN_ALLOCA
625
-
742
+
626
743
  #if EIGEN_DEFAULT_ALIGN_BYTES>0
627
744
  // We always manually re-align the result of EIGEN_ALLOCA.
628
745
  // If alloca is already aligned, the compiler should be smart enough to optimize away the re-alignment.
@@ -639,13 +756,23 @@ template<typename T> void swap(scoped_array<T> &a,scoped_array<T> &b)
639
756
  : Eigen::internal::aligned_malloc(sizeof(TYPE)*SIZE) ); \
640
757
  Eigen::internal::aligned_stack_memory_handler<TYPE> EIGEN_CAT(NAME,_stack_memory_destructor)((BUFFER)==0 ? NAME : 0,SIZE,sizeof(TYPE)*SIZE>EIGEN_STACK_ALLOCATION_LIMIT)
641
758
 
759
+
760
+ #define ei_declare_local_nested_eval(XPR_T,XPR,N,NAME) \
761
+ Eigen::internal::local_nested_eval_wrapper<XPR_T,N> EIGEN_CAT(NAME,_wrapper)(XPR, reinterpret_cast<typename XPR_T::Scalar*>( \
762
+ ( (Eigen::internal::local_nested_eval_wrapper<XPR_T,N>::NeedExternalBuffer) && ((sizeof(typename XPR_T::Scalar)*XPR.size())<=EIGEN_STACK_ALLOCATION_LIMIT) ) \
763
+ ? EIGEN_ALIGNED_ALLOCA( sizeof(typename XPR_T::Scalar)*XPR.size() ) : 0 ) ) ; \
764
+ typename Eigen::internal::local_nested_eval_wrapper<XPR_T,N>::ObjectType NAME(EIGEN_CAT(NAME,_wrapper).object)
765
+
642
766
  #else
643
767
 
644
768
  #define ei_declare_aligned_stack_constructed_variable(TYPE,NAME,SIZE,BUFFER) \
645
769
  Eigen::internal::check_size_for_overflow<TYPE>(SIZE); \
646
770
  TYPE* NAME = (BUFFER)!=0 ? BUFFER : reinterpret_cast<TYPE*>(Eigen::internal::aligned_malloc(sizeof(TYPE)*SIZE)); \
647
771
  Eigen::internal::aligned_stack_memory_handler<TYPE> EIGEN_CAT(NAME,_stack_memory_destructor)((BUFFER)==0 ? NAME : 0,SIZE,true)
648
-
772
+
773
+
774
+ #define ei_declare_local_nested_eval(XPR_T,XPR,N,NAME) typename Eigen::internal::nested_eval<XPR_T,N>::type NAME(XPR)
775
+
649
776
  #endif
650
777
 
651
778
 
@@ -653,32 +780,56 @@ template<typename T> void swap(scoped_array<T> &a,scoped_array<T> &b)
653
780
  *** Implementation of EIGEN_MAKE_ALIGNED_OPERATOR_NEW [_IF] ***
654
781
  *****************************************************************************/
655
782
 
656
- #if EIGEN_MAX_ALIGN_BYTES!=0
783
+ #if EIGEN_HAS_CXX17_OVERALIGN
784
+
785
+ // C++17 -> no need to bother about alignment anymore :)
786
+
787
+ #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign)
788
+ #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign)
789
+ #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW
790
+ #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(Scalar,Size)
791
+
792
+ #else
793
+
794
+ // HIP does not support new/delete on device.
795
+ #if EIGEN_MAX_ALIGN_BYTES!=0 && !defined(EIGEN_HIP_DEVICE_COMPILE)
657
796
  #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign) \
797
+ EIGEN_DEVICE_FUNC \
658
798
  void* operator new(std::size_t size, const std::nothrow_t&) EIGEN_NO_THROW { \
659
799
  EIGEN_TRY { return Eigen::internal::conditional_aligned_malloc<NeedsToAlign>(size); } \
660
800
  EIGEN_CATCH (...) { return 0; } \
661
801
  }
662
802
  #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign) \
803
+ EIGEN_DEVICE_FUNC \
663
804
  void *operator new(std::size_t size) { \
664
805
  return Eigen::internal::conditional_aligned_malloc<NeedsToAlign>(size); \
665
806
  } \
807
+ EIGEN_DEVICE_FUNC \
666
808
  void *operator new[](std::size_t size) { \
667
809
  return Eigen::internal::conditional_aligned_malloc<NeedsToAlign>(size); \
668
810
  } \
811
+ EIGEN_DEVICE_FUNC \
669
812
  void operator delete(void * ptr) EIGEN_NO_THROW { Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); } \
813
+ EIGEN_DEVICE_FUNC \
670
814
  void operator delete[](void * ptr) EIGEN_NO_THROW { Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); } \
815
+ EIGEN_DEVICE_FUNC \
671
816
  void operator delete(void * ptr, std::size_t /* sz */) EIGEN_NO_THROW { Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); } \
817
+ EIGEN_DEVICE_FUNC \
672
818
  void operator delete[](void * ptr, std::size_t /* sz */) EIGEN_NO_THROW { Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); } \
673
819
  /* in-place new and delete. since (at least afaik) there is no actual */ \
674
820
  /* memory allocated we can safely let the default implementation handle */ \
675
821
  /* this particular case. */ \
822
+ EIGEN_DEVICE_FUNC \
676
823
  static void *operator new(std::size_t size, void *ptr) { return ::operator new(size,ptr); } \
824
+ EIGEN_DEVICE_FUNC \
677
825
  static void *operator new[](std::size_t size, void* ptr) { return ::operator new[](size,ptr); } \
826
+ EIGEN_DEVICE_FUNC \
678
827
  void operator delete(void * memory, void *ptr) EIGEN_NO_THROW { return ::operator delete(memory,ptr); } \
828
+ EIGEN_DEVICE_FUNC \
679
829
  void operator delete[](void * memory, void *ptr) EIGEN_NO_THROW { return ::operator delete[](memory,ptr); } \
680
830
  /* nothrow-new (returns zero instead of std::bad_alloc) */ \
681
831
  EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign) \
832
+ EIGEN_DEVICE_FUNC \
682
833
  void operator delete(void *ptr, const std::nothrow_t&) EIGEN_NO_THROW { \
683
834
  Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); \
684
835
  } \
@@ -688,8 +839,14 @@ template<typename T> void swap(scoped_array<T> &a,scoped_array<T> &b)
688
839
  #endif
689
840
 
690
841
  #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(true)
691
- #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(Scalar,Size) \
692
- EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(bool(((Size)!=Eigen::Dynamic) && ((sizeof(Scalar)*(Size))%EIGEN_MAX_ALIGN_BYTES==0)))
842
+ #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(Scalar,Size) \
843
+ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(bool( \
844
+ ((Size)!=Eigen::Dynamic) && \
845
+ (((EIGEN_MAX_ALIGN_BYTES>=16) && ((sizeof(Scalar)*(Size))%(EIGEN_MAX_ALIGN_BYTES )==0)) || \
846
+ ((EIGEN_MAX_ALIGN_BYTES>=32) && ((sizeof(Scalar)*(Size))%(EIGEN_MAX_ALIGN_BYTES/2)==0)) || \
847
+ ((EIGEN_MAX_ALIGN_BYTES>=64) && ((sizeof(Scalar)*(Size))%(EIGEN_MAX_ALIGN_BYTES/4)==0)) )))
848
+
849
+ #endif
693
850
 
694
851
  /****************************************************************************/
695
852
 
@@ -703,13 +860,13 @@ template<typename T> void swap(scoped_array<T> &a,scoped_array<T> &b)
703
860
  * - 32 bytes alignment if AVX is enabled.
704
861
  * - 64 bytes alignment if AVX512 is enabled.
705
862
  *
706
- * This can be controled using the \c EIGEN_MAX_ALIGN_BYTES macro as documented
863
+ * This can be controlled using the \c EIGEN_MAX_ALIGN_BYTES macro as documented
707
864
  * \link TopicPreprocessorDirectivesPerformance there \endlink.
708
865
  *
709
866
  * Example:
710
867
  * \code
711
868
  * // Matrix4f requires 16 bytes alignment:
712
- * std::map< int, Matrix4f, std::less<int>,
869
+ * std::map< int, Matrix4f, std::less<int>,
713
870
  * aligned_allocator<std::pair<const int, Matrix4f> > > my_map_mat4;
714
871
  * // Vector3f does not require 16 bytes alignment, no need to use Eigen's allocator:
715
872
  * std::map< int, Vector3f > my_map_vec3;
@@ -744,18 +901,19 @@ public:
744
901
 
745
902
  ~aligned_allocator() {}
746
903
 
904
+ #if EIGEN_COMP_GNUC_STRICT && EIGEN_GNUC_AT_LEAST(7,0)
905
+ // In gcc std::allocator::max_size() is bugged making gcc triggers a warning:
906
+ // eigen/Eigen/src/Core/util/Memory.h:189:12: warning: argument 1 value '18446744073709551612' exceeds maximum object size 9223372036854775807
907
+ // See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=87544
908
+ size_type max_size() const {
909
+ return (std::numeric_limits<std::ptrdiff_t>::max)()/sizeof(T);
910
+ }
911
+ #endif
912
+
747
913
  pointer allocate(size_type num, const void* /*hint*/ = 0)
748
914
  {
749
915
  internal::check_size_for_overflow<T>(num);
750
- size_type size = num * sizeof(T);
751
- #if EIGEN_COMP_GNUC_STRICT && EIGEN_GNUC_AT_LEAST(7,0)
752
- // workaround gcc bug https://gcc.gnu.org/bugzilla/show_bug.cgi?id=87544
753
- // It triggered eigen/Eigen/src/Core/util/Memory.h:189:12: warning: argument 1 value '18446744073709551612' exceeds maximum object size 9223372036854775807
754
- if(size>=std::size_t((std::numeric_limits<std::ptrdiff_t>::max)()))
755
- return 0;
756
- else
757
- #endif
758
- return static_cast<pointer>( internal::aligned_malloc(size) );
916
+ return static_cast<pointer>( internal::aligned_malloc(num * sizeof(T)) );
759
917
  }
760
918
 
761
919
  void deallocate(pointer p, size_type /*num*/)
@@ -914,20 +1072,32 @@ inline void queryCacheSizes_intel(int& l1, int& l2, int& l3, int max_std_funcs)
914
1072
  {
915
1073
  if(max_std_funcs>=4)
916
1074
  queryCacheSizes_intel_direct(l1,l2,l3);
917
- else
1075
+ else if(max_std_funcs>=2)
918
1076
  queryCacheSizes_intel_codes(l1,l2,l3);
1077
+ else
1078
+ l1 = l2 = l3 = 0;
919
1079
  }
920
1080
 
921
1081
  inline void queryCacheSizes_amd(int& l1, int& l2, int& l3)
922
1082
  {
923
1083
  int abcd[4];
924
1084
  abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0;
925
- EIGEN_CPUID(abcd,0x80000005,0);
926
- l1 = (abcd[2] >> 24) * 1024; // C[31:24] = L1 size in KB
927
- abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0;
928
- EIGEN_CPUID(abcd,0x80000006,0);
929
- l2 = (abcd[2] >> 16) * 1024; // C[31;16] = l2 cache size in KB
930
- l3 = ((abcd[3] & 0xFFFC000) >> 18) * 512 * 1024; // D[31;18] = l3 cache size in 512KB
1085
+
1086
+ // First query the max supported function.
1087
+ EIGEN_CPUID(abcd,0x80000000,0);
1088
+ if(static_cast<numext::uint32_t>(abcd[0]) >= static_cast<numext::uint32_t>(0x80000006))
1089
+ {
1090
+ EIGEN_CPUID(abcd,0x80000005,0);
1091
+ l1 = (abcd[2] >> 24) * 1024; // C[31:24] = L1 size in KB
1092
+ abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0;
1093
+ EIGEN_CPUID(abcd,0x80000006,0);
1094
+ l2 = (abcd[2] >> 16) * 1024; // C[31;16] = l2 cache size in KB
1095
+ l3 = ((abcd[3] & 0xFFFC000) >> 18) * 512 * 1024; // D[31;18] = l3 cache size in 512KB
1096
+ }
1097
+ else
1098
+ {
1099
+ l1 = l2 = l3 = 0;
1100
+ }
931
1101
  }
932
1102
  #endif
933
1103
 
@@ -943,7 +1113,7 @@ inline void queryCacheSizes(int& l1, int& l2, int& l3)
943
1113
 
944
1114
  // identify the CPU vendor
945
1115
  EIGEN_CPUID(abcd,0x0,0);
946
- int max_std_funcs = abcd[1];
1116
+ int max_std_funcs = abcd[0];
947
1117
  if(cpuid_is_vendor(abcd,GenuineIntel))
948
1118
  queryCacheSizes_intel(l1,l2,l3,max_std_funcs);
949
1119
  else if(cpuid_is_vendor(abcd,AuthenticAMD) || cpuid_is_vendor(abcd,AMDisbetter_))