tomoto 0.2.2 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (369) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +10 -0
  3. data/README.md +8 -10
  4. data/ext/tomoto/ct.cpp +11 -11
  5. data/ext/tomoto/dmr.cpp +14 -13
  6. data/ext/tomoto/dt.cpp +14 -14
  7. data/ext/tomoto/extconf.rb +7 -5
  8. data/ext/tomoto/gdmr.cpp +7 -7
  9. data/ext/tomoto/hdp.cpp +9 -9
  10. data/ext/tomoto/hlda.cpp +13 -13
  11. data/ext/tomoto/hpa.cpp +5 -5
  12. data/ext/tomoto/lda.cpp +42 -39
  13. data/ext/tomoto/llda.cpp +6 -6
  14. data/ext/tomoto/mglda.cpp +15 -15
  15. data/ext/tomoto/pa.cpp +6 -6
  16. data/ext/tomoto/plda.cpp +6 -6
  17. data/ext/tomoto/slda.cpp +8 -8
  18. data/ext/tomoto/{ext.cpp → tomoto.cpp} +8 -8
  19. data/ext/tomoto/utils.h +16 -70
  20. data/lib/tomoto/version.rb +1 -1
  21. data/lib/tomoto.rb +5 -1
  22. data/vendor/EigenRand/EigenRand/Core.h +10 -10
  23. data/vendor/EigenRand/EigenRand/Dists/Basic.h +208 -9
  24. data/vendor/EigenRand/EigenRand/Dists/Discrete.h +52 -31
  25. data/vendor/EigenRand/EigenRand/Dists/GammaPoisson.h +9 -8
  26. data/vendor/EigenRand/EigenRand/Dists/NormalExp.h +28 -21
  27. data/vendor/EigenRand/EigenRand/EigenRand +11 -6
  28. data/vendor/EigenRand/EigenRand/Macro.h +13 -7
  29. data/vendor/EigenRand/EigenRand/MorePacketMath.h +348 -740
  30. data/vendor/EigenRand/EigenRand/MvDists/Multinomial.h +5 -3
  31. data/vendor/EigenRand/EigenRand/MvDists/MvNormal.h +9 -3
  32. data/vendor/EigenRand/EigenRand/PacketFilter.h +11 -253
  33. data/vendor/EigenRand/EigenRand/PacketRandomEngine.h +21 -47
  34. data/vendor/EigenRand/EigenRand/RandUtils.h +50 -344
  35. data/vendor/EigenRand/EigenRand/arch/AVX/MorePacketMath.h +619 -0
  36. data/vendor/EigenRand/EigenRand/arch/AVX/PacketFilter.h +149 -0
  37. data/vendor/EigenRand/EigenRand/arch/AVX/RandUtils.h +228 -0
  38. data/vendor/EigenRand/EigenRand/arch/NEON/MorePacketMath.h +473 -0
  39. data/vendor/EigenRand/EigenRand/arch/NEON/PacketFilter.h +142 -0
  40. data/vendor/EigenRand/EigenRand/arch/NEON/RandUtils.h +126 -0
  41. data/vendor/EigenRand/EigenRand/arch/SSE/MorePacketMath.h +501 -0
  42. data/vendor/EigenRand/EigenRand/arch/SSE/PacketFilter.h +133 -0
  43. data/vendor/EigenRand/EigenRand/arch/SSE/RandUtils.h +120 -0
  44. data/vendor/EigenRand/EigenRand/doc.h +24 -12
  45. data/vendor/EigenRand/README.md +57 -4
  46. data/vendor/eigen/COPYING.APACHE +203 -0
  47. data/vendor/eigen/COPYING.BSD +1 -1
  48. data/vendor/eigen/COPYING.MINPACK +51 -52
  49. data/vendor/eigen/Eigen/Cholesky +0 -1
  50. data/vendor/eigen/Eigen/Core +112 -265
  51. data/vendor/eigen/Eigen/Eigenvalues +2 -3
  52. data/vendor/eigen/Eigen/Geometry +5 -8
  53. data/vendor/eigen/Eigen/Householder +0 -1
  54. data/vendor/eigen/Eigen/Jacobi +0 -1
  55. data/vendor/eigen/Eigen/KLUSupport +41 -0
  56. data/vendor/eigen/Eigen/LU +2 -5
  57. data/vendor/eigen/Eigen/OrderingMethods +0 -3
  58. data/vendor/eigen/Eigen/PaStiXSupport +1 -0
  59. data/vendor/eigen/Eigen/PardisoSupport +0 -0
  60. data/vendor/eigen/Eigen/QR +2 -3
  61. data/vendor/eigen/Eigen/QtAlignedMalloc +0 -1
  62. data/vendor/eigen/Eigen/SVD +0 -1
  63. data/vendor/eigen/Eigen/Sparse +0 -2
  64. data/vendor/eigen/Eigen/SparseCholesky +0 -8
  65. data/vendor/eigen/Eigen/SparseLU +4 -0
  66. data/vendor/eigen/Eigen/SparseQR +0 -1
  67. data/vendor/eigen/Eigen/src/Cholesky/LDLT.h +42 -27
  68. data/vendor/eigen/Eigen/src/Cholesky/LLT.h +39 -23
  69. data/vendor/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +90 -47
  70. data/vendor/eigen/Eigen/src/Core/ArithmeticSequence.h +413 -0
  71. data/vendor/eigen/Eigen/src/Core/Array.h +99 -11
  72. data/vendor/eigen/Eigen/src/Core/ArrayBase.h +3 -3
  73. data/vendor/eigen/Eigen/src/Core/ArrayWrapper.h +21 -21
  74. data/vendor/eigen/Eigen/src/Core/Assign.h +1 -1
  75. data/vendor/eigen/Eigen/src/Core/AssignEvaluator.h +125 -50
  76. data/vendor/eigen/Eigen/src/Core/Assign_MKL.h +10 -10
  77. data/vendor/eigen/Eigen/src/Core/BandMatrix.h +16 -16
  78. data/vendor/eigen/Eigen/src/Core/Block.h +56 -60
  79. data/vendor/eigen/Eigen/src/Core/BooleanRedux.h +29 -31
  80. data/vendor/eigen/Eigen/src/Core/CommaInitializer.h +7 -3
  81. data/vendor/eigen/Eigen/src/Core/CoreEvaluators.h +325 -272
  82. data/vendor/eigen/Eigen/src/Core/CoreIterators.h +5 -0
  83. data/vendor/eigen/Eigen/src/Core/CwiseBinaryOp.h +21 -22
  84. data/vendor/eigen/Eigen/src/Core/CwiseNullaryOp.h +153 -18
  85. data/vendor/eigen/Eigen/src/Core/CwiseUnaryOp.h +6 -6
  86. data/vendor/eigen/Eigen/src/Core/CwiseUnaryView.h +14 -10
  87. data/vendor/eigen/Eigen/src/Core/DenseBase.h +132 -42
  88. data/vendor/eigen/Eigen/src/Core/DenseCoeffsBase.h +25 -21
  89. data/vendor/eigen/Eigen/src/Core/DenseStorage.h +153 -71
  90. data/vendor/eigen/Eigen/src/Core/Diagonal.h +21 -23
  91. data/vendor/eigen/Eigen/src/Core/DiagonalMatrix.h +50 -2
  92. data/vendor/eigen/Eigen/src/Core/DiagonalProduct.h +1 -1
  93. data/vendor/eigen/Eigen/src/Core/Dot.h +10 -10
  94. data/vendor/eigen/Eigen/src/Core/EigenBase.h +10 -9
  95. data/vendor/eigen/Eigen/src/Core/ForceAlignedAccess.h +8 -4
  96. data/vendor/eigen/Eigen/src/Core/Fuzzy.h +3 -3
  97. data/vendor/eigen/Eigen/src/Core/GeneralProduct.h +20 -10
  98. data/vendor/eigen/Eigen/src/Core/GenericPacketMath.h +599 -152
  99. data/vendor/eigen/Eigen/src/Core/GlobalFunctions.h +40 -33
  100. data/vendor/eigen/Eigen/src/Core/IO.h +40 -7
  101. data/vendor/eigen/Eigen/src/Core/IndexedView.h +237 -0
  102. data/vendor/eigen/Eigen/src/Core/Inverse.h +9 -10
  103. data/vendor/eigen/Eigen/src/Core/Map.h +7 -7
  104. data/vendor/eigen/Eigen/src/Core/MapBase.h +10 -3
  105. data/vendor/eigen/Eigen/src/Core/MathFunctions.h +767 -125
  106. data/vendor/eigen/Eigen/src/Core/MathFunctionsImpl.h +118 -19
  107. data/vendor/eigen/Eigen/src/Core/Matrix.h +131 -25
  108. data/vendor/eigen/Eigen/src/Core/MatrixBase.h +21 -3
  109. data/vendor/eigen/Eigen/src/Core/NestByValue.h +25 -50
  110. data/vendor/eigen/Eigen/src/Core/NoAlias.h +4 -3
  111. data/vendor/eigen/Eigen/src/Core/NumTraits.h +107 -20
  112. data/vendor/eigen/Eigen/src/Core/PartialReduxEvaluator.h +232 -0
  113. data/vendor/eigen/Eigen/src/Core/PermutationMatrix.h +3 -31
  114. data/vendor/eigen/Eigen/src/Core/PlainObjectBase.h +152 -59
  115. data/vendor/eigen/Eigen/src/Core/Product.h +30 -25
  116. data/vendor/eigen/Eigen/src/Core/ProductEvaluators.h +192 -125
  117. data/vendor/eigen/Eigen/src/Core/Random.h +37 -1
  118. data/vendor/eigen/Eigen/src/Core/Redux.h +180 -170
  119. data/vendor/eigen/Eigen/src/Core/Ref.h +121 -23
  120. data/vendor/eigen/Eigen/src/Core/Replicate.h +8 -8
  121. data/vendor/eigen/Eigen/src/Core/Reshaped.h +454 -0
  122. data/vendor/eigen/Eigen/src/Core/ReturnByValue.h +7 -5
  123. data/vendor/eigen/Eigen/src/Core/Reverse.h +18 -12
  124. data/vendor/eigen/Eigen/src/Core/Select.h +8 -6
  125. data/vendor/eigen/Eigen/src/Core/SelfAdjointView.h +33 -20
  126. data/vendor/eigen/Eigen/src/Core/Solve.h +14 -14
  127. data/vendor/eigen/Eigen/src/Core/SolveTriangular.h +16 -16
  128. data/vendor/eigen/Eigen/src/Core/SolverBase.h +41 -3
  129. data/vendor/eigen/Eigen/src/Core/StableNorm.h +100 -70
  130. data/vendor/eigen/Eigen/src/Core/StlIterators.h +463 -0
  131. data/vendor/eigen/Eigen/src/Core/Stride.h +9 -4
  132. data/vendor/eigen/Eigen/src/Core/Swap.h +5 -4
  133. data/vendor/eigen/Eigen/src/Core/Transpose.h +88 -27
  134. data/vendor/eigen/Eigen/src/Core/Transpositions.h +26 -47
  135. data/vendor/eigen/Eigen/src/Core/TriangularMatrix.h +93 -75
  136. data/vendor/eigen/Eigen/src/Core/VectorBlock.h +5 -5
  137. data/vendor/eigen/Eigen/src/Core/VectorwiseOp.h +159 -70
  138. data/vendor/eigen/Eigen/src/Core/Visitor.h +137 -29
  139. data/vendor/eigen/Eigen/src/Core/arch/AVX/Complex.h +50 -129
  140. data/vendor/eigen/Eigen/src/Core/arch/AVX/MathFunctions.h +126 -337
  141. data/vendor/eigen/Eigen/src/Core/arch/AVX/PacketMath.h +1092 -155
  142. data/vendor/eigen/Eigen/src/Core/arch/AVX/TypeCasting.h +65 -1
  143. data/vendor/eigen/Eigen/src/Core/arch/AVX512/Complex.h +422 -0
  144. data/vendor/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h +207 -236
  145. data/vendor/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h +1482 -495
  146. data/vendor/eigen/Eigen/src/Core/arch/AVX512/TypeCasting.h +89 -0
  147. data/vendor/eigen/Eigen/src/Core/arch/AltiVec/Complex.h +152 -165
  148. data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MathFunctions.h +19 -251
  149. data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MatrixProduct.h +2937 -0
  150. data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h +221 -0
  151. data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h +629 -0
  152. data/vendor/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h +2042 -392
  153. data/vendor/eigen/Eigen/src/Core/arch/CUDA/Complex.h +235 -80
  154. data/vendor/eigen/Eigen/src/Core/arch/Default/BFloat16.h +700 -0
  155. data/vendor/eigen/Eigen/src/Core/arch/Default/ConjHelper.h +102 -14
  156. data/vendor/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +1649 -0
  157. data/vendor/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +110 -0
  158. data/vendor/eigen/Eigen/src/Core/arch/Default/Half.h +942 -0
  159. data/vendor/eigen/Eigen/src/Core/arch/Default/Settings.h +1 -1
  160. data/vendor/eigen/Eigen/src/Core/arch/Default/TypeCasting.h +120 -0
  161. data/vendor/eigen/Eigen/src/Core/arch/{CUDA → GPU}/MathFunctions.h +16 -4
  162. data/vendor/eigen/Eigen/src/Core/arch/GPU/PacketMath.h +1685 -0
  163. data/vendor/eigen/Eigen/src/Core/arch/GPU/TypeCasting.h +80 -0
  164. data/vendor/eigen/Eigen/src/Core/arch/HIP/hcc/math_constants.h +23 -0
  165. data/vendor/eigen/Eigen/src/Core/arch/MSA/Complex.h +648 -0
  166. data/vendor/eigen/Eigen/src/Core/arch/MSA/MathFunctions.h +387 -0
  167. data/vendor/eigen/Eigen/src/Core/arch/MSA/PacketMath.h +1233 -0
  168. data/vendor/eigen/Eigen/src/Core/arch/NEON/Complex.h +313 -219
  169. data/vendor/eigen/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h +183 -0
  170. data/vendor/eigen/Eigen/src/Core/arch/NEON/MathFunctions.h +54 -70
  171. data/vendor/eigen/Eigen/src/Core/arch/NEON/PacketMath.h +4376 -549
  172. data/vendor/eigen/Eigen/src/Core/arch/NEON/TypeCasting.h +1419 -0
  173. data/vendor/eigen/Eigen/src/Core/arch/SSE/Complex.h +59 -179
  174. data/vendor/eigen/Eigen/src/Core/arch/SSE/MathFunctions.h +65 -428
  175. data/vendor/eigen/Eigen/src/Core/arch/SSE/PacketMath.h +893 -283
  176. data/vendor/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h +65 -0
  177. data/vendor/eigen/Eigen/src/Core/arch/SVE/MathFunctions.h +44 -0
  178. data/vendor/eigen/Eigen/src/Core/arch/SVE/PacketMath.h +752 -0
  179. data/vendor/eigen/Eigen/src/Core/arch/SVE/TypeCasting.h +49 -0
  180. data/vendor/eigen/Eigen/src/Core/arch/SYCL/InteropHeaders.h +232 -0
  181. data/vendor/eigen/Eigen/src/Core/arch/SYCL/MathFunctions.h +301 -0
  182. data/vendor/eigen/Eigen/src/Core/arch/SYCL/PacketMath.h +670 -0
  183. data/vendor/eigen/Eigen/src/Core/arch/SYCL/SyclMemoryModel.h +694 -0
  184. data/vendor/eigen/Eigen/src/Core/arch/SYCL/TypeCasting.h +85 -0
  185. data/vendor/eigen/Eigen/src/Core/arch/ZVector/Complex.h +212 -183
  186. data/vendor/eigen/Eigen/src/Core/arch/ZVector/MathFunctions.h +101 -5
  187. data/vendor/eigen/Eigen/src/Core/arch/ZVector/PacketMath.h +510 -395
  188. data/vendor/eigen/Eigen/src/Core/functors/AssignmentFunctors.h +11 -2
  189. data/vendor/eigen/Eigen/src/Core/functors/BinaryFunctors.h +112 -46
  190. data/vendor/eigen/Eigen/src/Core/functors/NullaryFunctors.h +31 -30
  191. data/vendor/eigen/Eigen/src/Core/functors/StlFunctors.h +32 -2
  192. data/vendor/eigen/Eigen/src/Core/functors/UnaryFunctors.h +355 -16
  193. data/vendor/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h +1075 -586
  194. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h +49 -24
  195. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +41 -35
  196. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +6 -6
  197. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h +4 -2
  198. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixVector.h +382 -483
  199. data/vendor/eigen/Eigen/src/Core/products/Parallelizer.h +22 -5
  200. data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +53 -30
  201. data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h +16 -8
  202. data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h +8 -6
  203. data/vendor/eigen/Eigen/src/Core/products/SelfadjointProduct.h +4 -4
  204. data/vendor/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h +5 -4
  205. data/vendor/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h +33 -27
  206. data/vendor/eigen/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h +14 -12
  207. data/vendor/eigen/Eigen/src/Core/products/TriangularSolverMatrix.h +36 -34
  208. data/vendor/eigen/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h +8 -4
  209. data/vendor/eigen/Eigen/src/Core/products/TriangularSolverVector.h +13 -10
  210. data/vendor/eigen/Eigen/src/Core/util/BlasUtil.h +304 -119
  211. data/vendor/eigen/Eigen/src/Core/util/ConfigureVectorization.h +512 -0
  212. data/vendor/eigen/Eigen/src/Core/util/Constants.h +25 -9
  213. data/vendor/eigen/Eigen/src/Core/util/DisableStupidWarnings.h +26 -3
  214. data/vendor/eigen/Eigen/src/Core/util/ForwardDeclarations.h +29 -9
  215. data/vendor/eigen/Eigen/src/Core/util/IndexedViewHelper.h +186 -0
  216. data/vendor/eigen/Eigen/src/Core/util/IntegralConstant.h +272 -0
  217. data/vendor/eigen/Eigen/src/Core/util/MKL_support.h +8 -1
  218. data/vendor/eigen/Eigen/src/Core/util/Macros.h +709 -246
  219. data/vendor/eigen/Eigen/src/Core/util/Memory.h +222 -52
  220. data/vendor/eigen/Eigen/src/Core/util/Meta.h +355 -77
  221. data/vendor/eigen/Eigen/src/Core/util/ReenableStupidWarnings.h +5 -1
  222. data/vendor/eigen/Eigen/src/Core/util/ReshapedHelper.h +51 -0
  223. data/vendor/eigen/Eigen/src/Core/util/StaticAssert.h +8 -5
  224. data/vendor/eigen/Eigen/src/Core/util/SymbolicIndex.h +293 -0
  225. data/vendor/eigen/Eigen/src/Core/util/XprHelper.h +65 -30
  226. data/vendor/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +1 -1
  227. data/vendor/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +7 -4
  228. data/vendor/eigen/Eigen/src/Eigenvalues/EigenSolver.h +2 -2
  229. data/vendor/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +1 -1
  230. data/vendor/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +2 -2
  231. data/vendor/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +2 -2
  232. data/vendor/eigen/Eigen/src/Eigenvalues/RealQZ.h +9 -6
  233. data/vendor/eigen/Eigen/src/Eigenvalues/RealSchur.h +21 -9
  234. data/vendor/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +77 -43
  235. data/vendor/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +20 -15
  236. data/vendor/eigen/Eigen/src/Geometry/AlignedBox.h +99 -5
  237. data/vendor/eigen/Eigen/src/Geometry/AngleAxis.h +4 -4
  238. data/vendor/eigen/Eigen/src/Geometry/EulerAngles.h +3 -3
  239. data/vendor/eigen/Eigen/src/Geometry/Homogeneous.h +15 -11
  240. data/vendor/eigen/Eigen/src/Geometry/Hyperplane.h +1 -1
  241. data/vendor/eigen/Eigen/src/Geometry/OrthoMethods.h +3 -2
  242. data/vendor/eigen/Eigen/src/Geometry/ParametrizedLine.h +39 -2
  243. data/vendor/eigen/Eigen/src/Geometry/Quaternion.h +70 -14
  244. data/vendor/eigen/Eigen/src/Geometry/Rotation2D.h +3 -3
  245. data/vendor/eigen/Eigen/src/Geometry/Scaling.h +23 -5
  246. data/vendor/eigen/Eigen/src/Geometry/Transform.h +88 -67
  247. data/vendor/eigen/Eigen/src/Geometry/Translation.h +6 -12
  248. data/vendor/eigen/Eigen/src/Geometry/Umeyama.h +1 -1
  249. data/vendor/eigen/Eigen/src/Geometry/arch/Geometry_SIMD.h +168 -0
  250. data/vendor/eigen/Eigen/src/Householder/BlockHouseholder.h +9 -2
  251. data/vendor/eigen/Eigen/src/Householder/Householder.h +8 -4
  252. data/vendor/eigen/Eigen/src/Householder/HouseholderSequence.h +123 -48
  253. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +15 -15
  254. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +7 -23
  255. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +5 -22
  256. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +41 -47
  257. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +51 -60
  258. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +70 -20
  259. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +2 -20
  260. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +11 -9
  261. data/vendor/eigen/Eigen/src/Jacobi/Jacobi.h +31 -10
  262. data/vendor/eigen/Eigen/src/KLUSupport/KLUSupport.h +358 -0
  263. data/vendor/eigen/Eigen/src/LU/Determinant.h +35 -19
  264. data/vendor/eigen/Eigen/src/LU/FullPivLU.h +29 -43
  265. data/vendor/eigen/Eigen/src/LU/InverseImpl.h +25 -8
  266. data/vendor/eigen/Eigen/src/LU/PartialPivLU.h +71 -58
  267. data/vendor/eigen/Eigen/src/LU/arch/InverseSize4.h +351 -0
  268. data/vendor/eigen/Eigen/src/OrderingMethods/Amd.h +7 -17
  269. data/vendor/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +297 -277
  270. data/vendor/eigen/Eigen/src/OrderingMethods/Ordering.h +6 -10
  271. data/vendor/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +1 -1
  272. data/vendor/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +9 -7
  273. data/vendor/eigen/Eigen/src/QR/ColPivHouseholderQR.h +41 -20
  274. data/vendor/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +100 -27
  275. data/vendor/eigen/Eigen/src/QR/FullPivHouseholderQR.h +59 -22
  276. data/vendor/eigen/Eigen/src/QR/HouseholderQR.h +48 -23
  277. data/vendor/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +25 -3
  278. data/vendor/eigen/Eigen/src/SVD/BDCSVD.h +183 -63
  279. data/vendor/eigen/Eigen/src/SVD/JacobiSVD.h +22 -14
  280. data/vendor/eigen/Eigen/src/SVD/SVDBase.h +83 -22
  281. data/vendor/eigen/Eigen/src/SVD/UpperBidiagonalization.h +3 -3
  282. data/vendor/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +17 -9
  283. data/vendor/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +12 -37
  284. data/vendor/eigen/Eigen/src/SparseCore/AmbiVector.h +3 -2
  285. data/vendor/eigen/Eigen/src/SparseCore/CompressedStorage.h +16 -0
  286. data/vendor/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +6 -6
  287. data/vendor/eigen/Eigen/src/SparseCore/SparseAssign.h +81 -27
  288. data/vendor/eigen/Eigen/src/SparseCore/SparseBlock.h +25 -57
  289. data/vendor/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +40 -11
  290. data/vendor/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +11 -15
  291. data/vendor/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +4 -2
  292. data/vendor/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +30 -8
  293. data/vendor/eigen/Eigen/src/SparseCore/SparseMatrix.h +126 -11
  294. data/vendor/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +5 -12
  295. data/vendor/eigen/Eigen/src/SparseCore/SparseProduct.h +13 -1
  296. data/vendor/eigen/Eigen/src/SparseCore/SparseRef.h +7 -7
  297. data/vendor/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +5 -2
  298. data/vendor/eigen/Eigen/src/SparseCore/SparseUtil.h +8 -0
  299. data/vendor/eigen/Eigen/src/SparseCore/SparseVector.h +1 -1
  300. data/vendor/eigen/Eigen/src/SparseCore/SparseView.h +1 -0
  301. data/vendor/eigen/Eigen/src/SparseLU/SparseLU.h +162 -12
  302. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +1 -1
  303. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +76 -2
  304. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +2 -2
  305. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +1 -1
  306. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +1 -1
  307. data/vendor/eigen/Eigen/src/SparseQR/SparseQR.h +19 -6
  308. data/vendor/eigen/Eigen/src/StlSupport/StdDeque.h +2 -12
  309. data/vendor/eigen/Eigen/src/StlSupport/StdList.h +2 -2
  310. data/vendor/eigen/Eigen/src/StlSupport/StdVector.h +2 -2
  311. data/vendor/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +6 -8
  312. data/vendor/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +175 -39
  313. data/vendor/eigen/Eigen/src/misc/lapacke.h +5 -4
  314. data/vendor/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +28 -2
  315. data/vendor/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +155 -11
  316. data/vendor/eigen/Eigen/src/plugins/BlockMethods.h +626 -242
  317. data/vendor/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.h +14 -0
  318. data/vendor/eigen/Eigen/src/plugins/IndexedViewMethods.h +262 -0
  319. data/vendor/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +4 -4
  320. data/vendor/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +10 -0
  321. data/vendor/eigen/Eigen/src/plugins/ReshapedMethods.h +149 -0
  322. data/vendor/eigen/README.md +2 -0
  323. data/vendor/eigen/bench/btl/README +1 -1
  324. data/vendor/eigen/bench/tensors/README +6 -7
  325. data/vendor/eigen/ci/README.md +56 -0
  326. data/vendor/eigen/demos/mix_eigen_and_c/README +1 -1
  327. data/vendor/eigen/unsupported/Eigen/CXX11/src/Tensor/README.md +213 -158
  328. data/vendor/eigen/unsupported/README.txt +1 -1
  329. data/vendor/tomotopy/README.kr.rst +78 -0
  330. data/vendor/tomotopy/README.rst +75 -0
  331. data/vendor/tomotopy/src/Labeling/FoRelevance.cpp +2 -2
  332. data/vendor/tomotopy/src/Labeling/Phraser.hpp +4 -4
  333. data/vendor/tomotopy/src/TopicModel/CTModel.hpp +7 -3
  334. data/vendor/tomotopy/src/TopicModel/DMRModel.hpp +7 -3
  335. data/vendor/tomotopy/src/TopicModel/DTModel.hpp +6 -3
  336. data/vendor/tomotopy/src/TopicModel/GDMRModel.hpp +2 -2
  337. data/vendor/tomotopy/src/TopicModel/HDP.h +1 -0
  338. data/vendor/tomotopy/src/TopicModel/HDPModel.hpp +57 -6
  339. data/vendor/tomotopy/src/TopicModel/HLDAModel.hpp +6 -3
  340. data/vendor/tomotopy/src/TopicModel/HPAModel.hpp +3 -2
  341. data/vendor/tomotopy/src/TopicModel/LDA.h +3 -3
  342. data/vendor/tomotopy/src/TopicModel/LDACVB0Model.hpp +5 -5
  343. data/vendor/tomotopy/src/TopicModel/LDAModel.hpp +50 -19
  344. data/vendor/tomotopy/src/TopicModel/LLDAModel.hpp +6 -2
  345. data/vendor/tomotopy/src/TopicModel/MGLDAModel.hpp +3 -2
  346. data/vendor/tomotopy/src/TopicModel/PAModel.hpp +1 -1
  347. data/vendor/tomotopy/src/TopicModel/PLDAModel.hpp +6 -2
  348. data/vendor/tomotopy/src/TopicModel/PT.h +3 -1
  349. data/vendor/tomotopy/src/TopicModel/PTModel.hpp +36 -3
  350. data/vendor/tomotopy/src/TopicModel/SLDAModel.hpp +6 -3
  351. data/vendor/tomotopy/src/TopicModel/TopicModel.hpp +55 -26
  352. data/vendor/tomotopy/src/Utils/AliasMethod.hpp +5 -4
  353. data/vendor/tomotopy/src/Utils/Dictionary.h +2 -2
  354. data/vendor/tomotopy/src/Utils/EigenAddonOps.hpp +36 -1
  355. data/vendor/tomotopy/src/Utils/MultiNormalDistribution.hpp +1 -1
  356. data/vendor/tomotopy/src/Utils/TruncMultiNormal.hpp +1 -1
  357. data/vendor/tomotopy/src/Utils/exception.h +6 -0
  358. data/vendor/tomotopy/src/Utils/math.h +2 -2
  359. data/vendor/tomotopy/src/Utils/sample.hpp +14 -12
  360. data/vendor/tomotopy/src/Utils/serializer.hpp +30 -5
  361. data/vendor/tomotopy/src/Utils/sse_gamma.h +0 -3
  362. metadata +64 -18
  363. data/vendor/eigen/Eigen/CMakeLists.txt +0 -19
  364. data/vendor/eigen/Eigen/src/Core/arch/CUDA/Half.h +0 -674
  365. data/vendor/eigen/Eigen/src/Core/arch/CUDA/PacketMath.h +0 -333
  366. data/vendor/eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +0 -1124
  367. data/vendor/eigen/Eigen/src/Core/arch/CUDA/TypeCasting.h +0 -212
  368. data/vendor/eigen/Eigen/src/Geometry/arch/Geometry_SSE.h +0 -161
  369. data/vendor/eigen/Eigen/src/LU/arch/Inverse_SSE.h +0 -338
@@ -44,23 +44,29 @@ struct default_packet_traits
44
44
  enum {
45
45
  HasHalfPacket = 0,
46
46
 
47
- HasAdd = 1,
48
- HasSub = 1,
49
- HasMul = 1,
50
- HasNegate = 1,
51
- HasAbs = 1,
52
- HasArg = 0,
53
- HasAbs2 = 1,
54
- HasMin = 1,
55
- HasMax = 1,
56
- HasConj = 1,
47
+ HasAdd = 1,
48
+ HasSub = 1,
49
+ HasShift = 1,
50
+ HasMul = 1,
51
+ HasNegate = 1,
52
+ HasAbs = 1,
53
+ HasArg = 0,
54
+ HasAbs2 = 1,
55
+ HasAbsDiff = 0,
56
+ HasMin = 1,
57
+ HasMax = 1,
58
+ HasConj = 1,
57
59
  HasSetLinear = 1,
58
- HasBlend = 0,
60
+ HasBlend = 0,
61
+ // This flag is used to indicate whether packet comparison is supported.
62
+ // pcmp_eq, pcmp_lt and pcmp_le should be defined for it to be true.
63
+ HasCmp = 0,
59
64
 
60
65
  HasDiv = 0,
61
66
  HasSqrt = 0,
62
67
  HasRsqrt = 0,
63
68
  HasExp = 0,
69
+ HasExpm1 = 0,
64
70
  HasLog = 0,
65
71
  HasLog1p = 0,
66
72
  HasLog10 = 0,
@@ -81,14 +87,18 @@ struct default_packet_traits
81
87
  HasPolygamma = 0,
82
88
  HasErf = 0,
83
89
  HasErfc = 0,
90
+ HasNdtri = 0,
91
+ HasBessel = 0,
84
92
  HasIGamma = 0,
93
+ HasIGammaDerA = 0,
94
+ HasGammaSampleDerAlpha = 0,
85
95
  HasIGammac = 0,
86
96
  HasBetaInc = 0,
87
97
 
88
98
  HasRound = 0,
99
+ HasRint = 0,
89
100
  HasFloor = 0,
90
101
  HasCeil = 0,
91
-
92
102
  HasSign = 0
93
103
  };
94
104
  };
@@ -119,6 +129,22 @@ template<typename T> struct packet_traits : default_packet_traits
119
129
 
120
130
  template<typename T> struct packet_traits<const T> : packet_traits<T> { };
121
131
 
132
+ template<typename T> struct unpacket_traits
133
+ {
134
+ typedef T type;
135
+ typedef T half;
136
+ enum
137
+ {
138
+ size = 1,
139
+ alignment = 1,
140
+ vectorizable = false,
141
+ masked_load_available=false,
142
+ masked_store_available=false
143
+ };
144
+ };
145
+
146
+ template<typename T> struct unpacket_traits<const T> : unpacket_traits<T> { };
147
+
122
148
  template <typename Src, typename Tgt> struct type_casting_traits {
123
149
  enum {
124
150
  VectorizedCast = 0,
@@ -127,6 +153,34 @@ template <typename Src, typename Tgt> struct type_casting_traits {
127
153
  };
128
154
  };
129
155
 
156
+ /** \internal Wrapper to ensure that multiple packet types can map to the same
157
+ same underlying vector type. */
158
+ template<typename T, int unique_id = 0>
159
+ struct eigen_packet_wrapper
160
+ {
161
+ EIGEN_ALWAYS_INLINE operator T&() { return m_val; }
162
+ EIGEN_ALWAYS_INLINE operator const T&() const { return m_val; }
163
+ EIGEN_ALWAYS_INLINE eigen_packet_wrapper() {}
164
+ EIGEN_ALWAYS_INLINE eigen_packet_wrapper(const T &v) : m_val(v) {}
165
+ EIGEN_ALWAYS_INLINE eigen_packet_wrapper& operator=(const T &v) {
166
+ m_val = v;
167
+ return *this;
168
+ }
169
+
170
+ T m_val;
171
+ };
172
+
173
+
174
+ /** \internal A convenience utility for determining if the type is a scalar.
175
+ * This is used to enable some generic packet implementations.
176
+ */
177
+ template<typename Packet>
178
+ struct is_scalar {
179
+ typedef typename unpacket_traits<Packet>::type Scalar;
180
+ enum {
181
+ value = internal::is_same<Packet, Scalar>::value
182
+ };
183
+ };
130
184
 
131
185
  /** \internal \returns static_cast<TgtType>(a) (coeff-wise) */
132
186
  template <typename SrcPacket, typename TgtPacket>
@@ -139,75 +193,406 @@ EIGEN_DEVICE_FUNC inline TgtPacket
139
193
  pcast(const SrcPacket& a, const SrcPacket& /*b*/) {
140
194
  return static_cast<TgtPacket>(a);
141
195
  }
142
-
143
196
  template <typename SrcPacket, typename TgtPacket>
144
197
  EIGEN_DEVICE_FUNC inline TgtPacket
145
198
  pcast(const SrcPacket& a, const SrcPacket& /*b*/, const SrcPacket& /*c*/, const SrcPacket& /*d*/) {
146
199
  return static_cast<TgtPacket>(a);
147
200
  }
201
+ template <typename SrcPacket, typename TgtPacket>
202
+ EIGEN_DEVICE_FUNC inline TgtPacket
203
+ pcast(const SrcPacket& a, const SrcPacket& /*b*/, const SrcPacket& /*c*/, const SrcPacket& /*d*/,
204
+ const SrcPacket& /*e*/, const SrcPacket& /*f*/, const SrcPacket& /*g*/, const SrcPacket& /*h*/) {
205
+ return static_cast<TgtPacket>(a);
206
+ }
207
+
208
+ /** \internal \returns reinterpret_cast<Target>(a) */
209
+ template <typename Target, typename Packet>
210
+ EIGEN_DEVICE_FUNC inline Target
211
+ preinterpret(const Packet& a); /* { return reinterpret_cast<const Target&>(a); } */
148
212
 
149
213
  /** \internal \returns a + b (coeff-wise) */
150
214
  template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
151
- padd(const Packet& a,
152
- const Packet& b) { return a+b; }
215
+ padd(const Packet& a, const Packet& b) { return a+b; }
216
+ // Avoid compiler warning for boolean algebra.
217
+ template<> EIGEN_DEVICE_FUNC inline bool
218
+ padd(const bool& a, const bool& b) { return a || b; }
153
219
 
154
220
  /** \internal \returns a - b (coeff-wise) */
155
221
  template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
156
- psub(const Packet& a,
157
- const Packet& b) { return a-b; }
222
+ psub(const Packet& a, const Packet& b) { return a-b; }
158
223
 
159
224
  /** \internal \returns -a (coeff-wise) */
160
225
  template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
161
226
  pnegate(const Packet& a) { return -a; }
162
227
 
163
- /** \internal \returns conj(a) (coeff-wise) */
228
+ template<> EIGEN_DEVICE_FUNC inline bool
229
+ pnegate(const bool& a) { return !a; }
164
230
 
231
+ /** \internal \returns conj(a) (coeff-wise) */
165
232
  template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
166
233
  pconj(const Packet& a) { return numext::conj(a); }
167
234
 
168
235
  /** \internal \returns a * b (coeff-wise) */
169
236
  template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
170
- pmul(const Packet& a,
171
- const Packet& b) { return a*b; }
237
+ pmul(const Packet& a, const Packet& b) { return a*b; }
238
+ // Avoid compiler warning for boolean algebra.
239
+ template<> EIGEN_DEVICE_FUNC inline bool
240
+ pmul(const bool& a, const bool& b) { return a && b; }
172
241
 
173
242
  /** \internal \returns a / b (coeff-wise) */
174
243
  template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
175
- pdiv(const Packet& a,
176
- const Packet& b) { return a/b; }
244
+ pdiv(const Packet& a, const Packet& b) { return a/b; }
245
+
246
+ // In the generic case, memset to all one bits.
247
+ template<typename Packet, typename EnableIf = void>
248
+ struct ptrue_impl {
249
+ static EIGEN_DEVICE_FUNC inline Packet run(const Packet& /*a*/){
250
+ Packet b;
251
+ memset(static_cast<void*>(&b), 0xff, sizeof(Packet));
252
+ return b;
253
+ }
254
+ };
177
255
 
178
- /** \internal \returns the min of \a a and \a b (coeff-wise) */
256
+ // For non-trivial scalars, set to Scalar(1) (i.e. a non-zero value).
257
+ // Although this is technically not a valid bitmask, the scalar path for pselect
258
+ // uses a comparison to zero, so this should still work in most cases. We don't
259
+ // have another option, since the scalar type requires initialization.
260
+ template<typename T>
261
+ struct ptrue_impl<T,
262
+ typename internal::enable_if<is_scalar<T>::value && NumTraits<T>::RequireInitialization>::type > {
263
+ static EIGEN_DEVICE_FUNC inline T run(const T& /*a*/){
264
+ return T(1);
265
+ }
266
+ };
267
+
268
+ /** \internal \returns one bits. */
179
269
  template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
180
- pmin(const Packet& a,
181
- const Packet& b) { return numext::mini(a, b); }
270
+ ptrue(const Packet& a) {
271
+ return ptrue_impl<Packet>::run(a);
272
+ }
273
+
274
+ // In the general case, memset to zero.
275
+ template<typename Packet, typename EnableIf = void>
276
+ struct pzero_impl {
277
+ static EIGEN_DEVICE_FUNC inline Packet run(const Packet& /*a*/) {
278
+ Packet b;
279
+ memset(static_cast<void*>(&b), 0x00, sizeof(Packet));
280
+ return b;
281
+ }
282
+ };
283
+
284
+ // For scalars, explicitly set to Scalar(0), since the underlying representation
285
+ // for zero may not consist of all-zero bits.
286
+ template<typename T>
287
+ struct pzero_impl<T,
288
+ typename internal::enable_if<is_scalar<T>::value>::type> {
289
+ static EIGEN_DEVICE_FUNC inline T run(const T& /*a*/) {
290
+ return T(0);
291
+ }
292
+ };
182
293
 
183
- /** \internal \returns the max of \a a and \a b (coeff-wise) */
294
+ /** \internal \returns packet of zeros */
184
295
  template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
185
- pmax(const Packet& a,
186
- const Packet& b) { return numext::maxi(a, b); }
296
+ pzero(const Packet& a) {
297
+ return pzero_impl<Packet>::run(a);
298
+ }
187
299
 
188
- /** \internal \returns the absolute value of \a a */
300
+ /** \internal \returns a <= b as a bit mask */
189
301
  template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
190
- pabs(const Packet& a) { using std::abs; return abs(a); }
302
+ pcmp_le(const Packet& a, const Packet& b) { return a<=b ? ptrue(a) : pzero(a); }
191
303
 
192
- /** \internal \returns the phase angle of \a a */
304
+ /** \internal \returns a < b as a bit mask */
193
305
  template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
194
- parg(const Packet& a) { using numext::arg; return arg(a); }
306
+ pcmp_lt(const Packet& a, const Packet& b) { return a<b ? ptrue(a) : pzero(a); }
307
+
308
+ /** \internal \returns a == b as a bit mask */
309
+ template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
310
+ pcmp_eq(const Packet& a, const Packet& b) { return a==b ? ptrue(a) : pzero(a); }
311
+
312
+ /** \internal \returns a < b or a==NaN or b==NaN as a bit mask */
313
+ template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
314
+ pcmp_lt_or_nan(const Packet& a, const Packet& b) { return a>=b ? pzero(a) : ptrue(a); }
315
+
316
+ template<typename T>
317
+ struct bit_and {
318
+ EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR EIGEN_ALWAYS_INLINE T operator()(const T& a, const T& b) const {
319
+ return a & b;
320
+ }
321
+ };
322
+
323
+ template<typename T>
324
+ struct bit_or {
325
+ EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR EIGEN_ALWAYS_INLINE T operator()(const T& a, const T& b) const {
326
+ return a | b;
327
+ }
328
+ };
329
+
330
+ template<typename T>
331
+ struct bit_xor {
332
+ EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR EIGEN_ALWAYS_INLINE T operator()(const T& a, const T& b) const {
333
+ return a ^ b;
334
+ }
335
+ };
336
+
337
+ template<typename T>
338
+ struct bit_not {
339
+ EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR EIGEN_ALWAYS_INLINE T operator()(const T& a) const {
340
+ return ~a;
341
+ }
342
+ };
343
+
344
+ // Use operators &, |, ^, ~.
345
+ template<typename T>
346
+ struct operator_bitwise_helper {
347
+ EIGEN_DEVICE_FUNC static inline T bitwise_and(const T& a, const T& b) { return bit_and<T>()(a, b); }
348
+ EIGEN_DEVICE_FUNC static inline T bitwise_or(const T& a, const T& b) { return bit_or<T>()(a, b); }
349
+ EIGEN_DEVICE_FUNC static inline T bitwise_xor(const T& a, const T& b) { return bit_xor<T>()(a, b); }
350
+ EIGEN_DEVICE_FUNC static inline T bitwise_not(const T& a) { return bit_not<T>()(a); }
351
+ };
352
+
353
+ // Apply binary operations byte-by-byte
354
+ template<typename T>
355
+ struct bytewise_bitwise_helper {
356
+ EIGEN_DEVICE_FUNC static inline T bitwise_and(const T& a, const T& b) {
357
+ return binary(a, b, bit_and<unsigned char>());
358
+ }
359
+ EIGEN_DEVICE_FUNC static inline T bitwise_or(const T& a, const T& b) {
360
+ return binary(a, b, bit_or<unsigned char>());
361
+ }
362
+ EIGEN_DEVICE_FUNC static inline T bitwise_xor(const T& a, const T& b) {
363
+ return binary(a, b, bit_xor<unsigned char>());
364
+ }
365
+ EIGEN_DEVICE_FUNC static inline T bitwise_not(const T& a) {
366
+ return unary(a,bit_not<unsigned char>());
367
+ }
368
+
369
+ private:
370
+ template<typename Op>
371
+ EIGEN_DEVICE_FUNC static inline T unary(const T& a, Op op) {
372
+ const unsigned char* a_ptr = reinterpret_cast<const unsigned char*>(&a);
373
+ T c;
374
+ unsigned char* c_ptr = reinterpret_cast<unsigned char*>(&c);
375
+ for (size_t i = 0; i < sizeof(T); ++i) {
376
+ *c_ptr++ = op(*a_ptr++);
377
+ }
378
+ return c;
379
+ }
380
+
381
+ template<typename Op>
382
+ EIGEN_DEVICE_FUNC static inline T binary(const T& a, const T& b, Op op) {
383
+ const unsigned char* a_ptr = reinterpret_cast<const unsigned char*>(&a);
384
+ const unsigned char* b_ptr = reinterpret_cast<const unsigned char*>(&b);
385
+ T c;
386
+ unsigned char* c_ptr = reinterpret_cast<unsigned char*>(&c);
387
+ for (size_t i = 0; i < sizeof(T); ++i) {
388
+ *c_ptr++ = op(*a_ptr++, *b_ptr++);
389
+ }
390
+ return c;
391
+ }
392
+ };
393
+
394
+ // In the general case, use byte-by-byte manipulation.
395
+ template<typename T, typename EnableIf = void>
396
+ struct bitwise_helper : public bytewise_bitwise_helper<T> {};
397
+
398
+ // For integers or non-trivial scalars, use binary operators.
399
+ template<typename T>
400
+ struct bitwise_helper<T,
401
+ typename internal::enable_if<
402
+ is_scalar<T>::value && (NumTraits<T>::IsInteger || NumTraits<T>::RequireInitialization)>::type
403
+ > : public operator_bitwise_helper<T> {};
195
404
 
196
405
  /** \internal \returns the bitwise and of \a a and \a b */
197
406
  template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
198
- pand(const Packet& a, const Packet& b) { return a & b; }
407
+ pand(const Packet& a, const Packet& b) {
408
+ return bitwise_helper<Packet>::bitwise_and(a, b);
409
+ }
199
410
 
200
411
  /** \internal \returns the bitwise or of \a a and \a b */
201
412
  template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
202
- por(const Packet& a, const Packet& b) { return a | b; }
413
+ por(const Packet& a, const Packet& b) {
414
+ return bitwise_helper<Packet>::bitwise_or(a, b);
415
+ }
203
416
 
204
417
  /** \internal \returns the bitwise xor of \a a and \a b */
205
418
  template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
206
- pxor(const Packet& a, const Packet& b) { return a ^ b; }
419
+ pxor(const Packet& a, const Packet& b) {
420
+ return bitwise_helper<Packet>::bitwise_xor(a, b);
421
+ }
422
+
423
+ /** \internal \returns the bitwise not of \a a */
424
+ template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
425
+ pnot(const Packet& a) {
426
+ return bitwise_helper<Packet>::bitwise_not(a);
427
+ }
428
+
429
+ /** \internal \returns the bitwise and of \a a and not \a b */
430
+ template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
431
+ pandnot(const Packet& a, const Packet& b) { return pand(a, pnot(b)); }
432
+
433
+ // In the general case, use bitwise select.
434
+ template<typename Packet, typename EnableIf = void>
435
+ struct pselect_impl {
436
+ static EIGEN_DEVICE_FUNC inline Packet run(const Packet& mask, const Packet& a, const Packet& b) {
437
+ return por(pand(a,mask),pandnot(b,mask));
438
+ }
439
+ };
440
+
441
+ // For scalars, use ternary select.
442
+ template<typename Packet>
443
+ struct pselect_impl<Packet,
444
+ typename internal::enable_if<is_scalar<Packet>::value>::type > {
445
+ static EIGEN_DEVICE_FUNC inline Packet run(const Packet& mask, const Packet& a, const Packet& b) {
446
+ return numext::equal_strict(mask, Packet(0)) ? b : a;
447
+ }
448
+ };
449
+
450
+ /** \internal \returns \a or \b for each field in packet according to \mask */
451
+ template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
452
+ pselect(const Packet& mask, const Packet& a, const Packet& b) {
453
+ return pselect_impl<Packet>::run(mask, a, b);
454
+ }
455
+
456
+ template<> EIGEN_DEVICE_FUNC inline bool pselect<bool>(
457
+ const bool& cond, const bool& a, const bool& b) {
458
+ return cond ? a : b;
459
+ }
460
+
461
+ /** \internal \returns the min or of \a a and \a b (coeff-wise)
462
+ If either \a a or \a b are NaN, the result is implementation defined. */
463
+ template<int NaNPropagation>
464
+ struct pminmax_impl {
465
+ template <typename Packet, typename Op>
466
+ static EIGEN_DEVICE_FUNC inline Packet run(const Packet& a, const Packet& b, Op op) {
467
+ return op(a,b);
468
+ }
469
+ };
470
+
471
+ /** \internal \returns the min or max of \a a and \a b (coeff-wise)
472
+ If either \a a or \a b are NaN, NaN is returned. */
473
+ template<>
474
+ struct pminmax_impl<PropagateNaN> {
475
+ template <typename Packet, typename Op>
476
+ static EIGEN_DEVICE_FUNC inline Packet run(const Packet& a, const Packet& b, Op op) {
477
+ Packet not_nan_mask_a = pcmp_eq(a, a);
478
+ Packet not_nan_mask_b = pcmp_eq(b, b);
479
+ return pselect(not_nan_mask_a,
480
+ pselect(not_nan_mask_b, op(a, b), b),
481
+ a);
482
+ }
483
+ };
484
+
485
+ /** \internal \returns the min or max of \a a and \a b (coeff-wise)
486
+ If both \a a and \a b are NaN, NaN is returned.
487
+ Equivalent to std::fmin(a, b). */
488
+ template<>
489
+ struct pminmax_impl<PropagateNumbers> {
490
+ template <typename Packet, typename Op>
491
+ static EIGEN_DEVICE_FUNC inline Packet run(const Packet& a, const Packet& b, Op op) {
492
+ Packet not_nan_mask_a = pcmp_eq(a, a);
493
+ Packet not_nan_mask_b = pcmp_eq(b, b);
494
+ return pselect(not_nan_mask_a,
495
+ pselect(not_nan_mask_b, op(a, b), a),
496
+ b);
497
+ }
498
+ };
499
+
500
+
501
+ #ifndef SYCL_DEVICE_ONLY
502
+ #define EIGEN_BINARY_OP_NAN_PROPAGATION(Type, Func) Func
503
+ #else
504
+ #define EIGEN_BINARY_OP_NAN_PROPAGATION(Type, Func) \
505
+ [](const Type& a, const Type& b) { \
506
+ return Func(a, b);}
507
+ #endif
508
+
509
+ /** \internal \returns the min of \a a and \a b (coeff-wise).
510
+ If \a a or \b b is NaN, the return value is implementation defined. */
511
+ template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
512
+ pmin(const Packet& a, const Packet& b) { return numext::mini(a,b); }
513
+
514
+ /** \internal \returns the min of \a a and \a b (coeff-wise).
515
+ NaNPropagation determines the NaN propagation semantics. */
516
+ template <int NaNPropagation, typename Packet>
517
+ EIGEN_DEVICE_FUNC inline Packet pmin(const Packet& a, const Packet& b) {
518
+ return pminmax_impl<NaNPropagation>::run(a, b, EIGEN_BINARY_OP_NAN_PROPAGATION(Packet, (pmin<Packet>)));
519
+ }
520
+
521
+ /** \internal \returns the max of \a a and \a b (coeff-wise)
522
+ If \a a or \b b is NaN, the return value is implementation defined. */
523
+ template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
524
+ pmax(const Packet& a, const Packet& b) { return numext::maxi(a, b); }
525
+
526
+ /** \internal \returns the max of \a a and \a b (coeff-wise).
527
+ NaNPropagation determines the NaN propagation semantics. */
528
+ template <int NaNPropagation, typename Packet>
529
+ EIGEN_DEVICE_FUNC inline Packet pmax(const Packet& a, const Packet& b) {
530
+ return pminmax_impl<NaNPropagation>::run(a, b, EIGEN_BINARY_OP_NAN_PROPAGATION(Packet,(pmax<Packet>)));
531
+ }
532
+
533
+ /** \internal \returns the absolute value of \a a */
534
+ template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
535
+ pabs(const Packet& a) { return numext::abs(a); }
536
+ template<> EIGEN_DEVICE_FUNC inline unsigned int
537
+ pabs(const unsigned int& a) { return a; }
538
+ template<> EIGEN_DEVICE_FUNC inline unsigned long
539
+ pabs(const unsigned long& a) { return a; }
540
+ template<> EIGEN_DEVICE_FUNC inline unsigned long long
541
+ pabs(const unsigned long long& a) { return a; }
542
+
543
+ /** \internal \returns the addsub value of \a a,b */
544
+ template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
545
+ paddsub(const Packet& a, const Packet& b) {
546
+ return pselect(peven_mask(a), padd(a, b), psub(a, b));
547
+ }
548
+
549
+ /** \internal \returns the phase angle of \a a */
550
+ template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
551
+ parg(const Packet& a) { using numext::arg; return arg(a); }
552
+
553
+
554
+ /** \internal \returns \a a logically shifted by N bits to the right */
555
+ template<int N> EIGEN_DEVICE_FUNC inline int
556
+ parithmetic_shift_right(const int& a) { return a >> N; }
557
+ template<int N> EIGEN_DEVICE_FUNC inline long int
558
+ parithmetic_shift_right(const long int& a) { return a >> N; }
559
+
560
+ /** \internal \returns \a a arithmetically shifted by N bits to the right */
561
+ template<int N> EIGEN_DEVICE_FUNC inline int
562
+ plogical_shift_right(const int& a) { return static_cast<int>(static_cast<unsigned int>(a) >> N); }
563
+ template<int N> EIGEN_DEVICE_FUNC inline long int
564
+ plogical_shift_right(const long int& a) { return static_cast<long>(static_cast<unsigned long>(a) >> N); }
207
565
 
208
- /** \internal \returns the bitwise andnot of \a a and \a b */
566
+ /** \internal \returns \a a shifted by N bits to the left */
567
+ template<int N> EIGEN_DEVICE_FUNC inline int
568
+ plogical_shift_left(const int& a) { return a << N; }
569
+ template<int N> EIGEN_DEVICE_FUNC inline long int
570
+ plogical_shift_left(const long int& a) { return a << N; }
571
+
572
+ /** \internal \returns the significant and exponent of the underlying floating point numbers
573
+ * See https://en.cppreference.com/w/cpp/numeric/math/frexp
574
+ */
575
+ template <typename Packet>
576
+ EIGEN_DEVICE_FUNC inline Packet pfrexp(const Packet& a, Packet& exponent) {
577
+ int exp;
578
+ EIGEN_USING_STD(frexp);
579
+ Packet result = static_cast<Packet>(frexp(a, &exp));
580
+ exponent = static_cast<Packet>(exp);
581
+ return result;
582
+ }
583
+
584
+ /** \internal \returns a * 2^((int)exponent)
585
+ * See https://en.cppreference.com/w/cpp/numeric/math/ldexp
586
+ */
587
+ template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
588
+ pldexp(const Packet &a, const Packet &exponent) {
589
+ EIGEN_USING_STD(ldexp)
590
+ return static_cast<Packet>(ldexp(a, static_cast<int>(exponent)));
591
+ }
592
+
593
+ /** \internal \returns the min of \a a and \a b (coeff-wise) */
209
594
  template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
210
- pandnot(const Packet& a, const Packet& b) { return a & (!b); }
595
+ pabsdiff(const Packet& a, const Packet& b) { return pselect(pcmp_lt(a, b), psub(b, a), psub(a, b)); }
211
596
 
212
597
  /** \internal \returns a packet version of \a *from, from must be 16 bytes aligned */
213
598
  template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
@@ -217,10 +602,22 @@ pload(const typename unpacket_traits<Packet>::type* from) { return *from; }
217
602
  template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
218
603
  ploadu(const typename unpacket_traits<Packet>::type* from) { return *from; }
219
604
 
605
+ /** \internal \returns a packet version of \a *from, (un-aligned masked load)
606
+ * There is no generic implementation. We only have implementations for specialized
607
+ * cases. Generic case should not be called.
608
+ */
609
+ template<typename Packet> EIGEN_DEVICE_FUNC inline
610
+ typename enable_if<unpacket_traits<Packet>::masked_load_available, Packet>::type
611
+ ploadu(const typename unpacket_traits<Packet>::type* from, typename unpacket_traits<Packet>::mask_t umask);
612
+
220
613
  /** \internal \returns a packet with constant coefficients \a a, e.g.: (a,a,a,a) */
221
614
  template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
222
615
  pset1(const typename unpacket_traits<Packet>::type& a) { return a; }
223
616
 
617
+ /** \internal \returns a packet with constant coefficients set from bits */
618
+ template<typename Packet,typename BitsType> EIGEN_DEVICE_FUNC inline Packet
619
+ pset1frombits(BitsType a);
620
+
224
621
  /** \internal \returns a packet with constant coefficients \a a[0], e.g.: (a[0],a[0],a[0],a[0]) */
225
622
  template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
226
623
  pload1(const typename unpacket_traits<Packet>::type *a) { return pset1<Packet>(*a); }
@@ -237,7 +634,7 @@ ploaddup(const typename unpacket_traits<Packet>::type* from) { return *from; }
237
634
  * For instance, for a packet of 8 elements, 2 scalars will be read from \a *from and
238
635
  * replicated to form: {from[0],from[0],from[0],from[0],from[1],from[1],from[1],from[1]}
239
636
  * Currently, this function is only used in matrix products.
240
- * For packet-size smaller or equal to 4, this function is equivalent to pload1
637
+ * For packet-size smaller or equal to 4, this function is equivalent to pload1
241
638
  */
242
639
  template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
243
640
  ploadquad(const typename unpacket_traits<Packet>::type* from)
@@ -281,6 +678,20 @@ inline void pbroadcast2(const typename unpacket_traits<Packet>::type *a,
281
678
  template<typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet
282
679
  plset(const typename unpacket_traits<Packet>::type& a) { return a; }
283
680
 
681
+ /** \internal \returns a packet with constant coefficients \a a, e.g.: (x, 0, x, 0),
682
+ where x is the value of all 1-bits. */
683
+ template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
684
+ peven_mask(const Packet& /*a*/) {
685
+ typedef typename unpacket_traits<Packet>::type Scalar;
686
+ const size_t n = unpacket_traits<Packet>::size;
687
+ EIGEN_ALIGN_TO_BOUNDARY(sizeof(Packet)) Scalar elements[n];
688
+ for(size_t i = 0; i < n; ++i) {
689
+ memset(elements+i, ((i & 1) == 0 ? 0xff : 0), sizeof(Scalar));
690
+ }
691
+ return ploadu<Packet>(elements);
692
+ }
693
+
694
+
284
695
  /** \internal copy the packet \a from to \a *to, \a to must be 16 bytes aligned */
285
696
  template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pstore(Scalar* to, const Packet& from)
286
697
  { (*to) = from; }
@@ -289,6 +700,15 @@ template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pstore(
289
700
  template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pstoreu(Scalar* to, const Packet& from)
290
701
  { (*to) = from; }
291
702
 
703
+ /** \internal copy the packet \a from to \a *to, (un-aligned store with a mask)
704
+ * There is no generic implementation. We only have implementations for specialized
705
+ * cases. Generic case should not be called.
706
+ */
707
+ template<typename Scalar, typename Packet>
708
+ EIGEN_DEVICE_FUNC inline
709
+ typename enable_if<unpacket_traits<Packet>::masked_store_available, void>::type
710
+ pstoreu(Scalar* to, const Packet& from, typename unpacket_traits<Packet>::mask_t umask);
711
+
292
712
  template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline Packet pgather(const Scalar* from, Index /*stride*/)
293
713
  { return ploadu<Packet>(from); }
294
714
 
@@ -298,8 +718,10 @@ template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pstoreu
298
718
  /** \internal tries to do cache prefetching of \a addr */
299
719
  template<typename Scalar> EIGEN_DEVICE_FUNC inline void prefetch(const Scalar* addr)
300
720
  {
301
- #ifdef __CUDA_ARCH__
302
- #if defined(__LP64__)
721
+ #if defined(EIGEN_HIP_DEVICE_COMPILE)
722
+ // do nothing
723
+ #elif defined(EIGEN_CUDA_ARCH)
724
+ #if defined(__LP64__) || EIGEN_OS_WIN64
303
725
  // 64-bit pointer operand constraint for inlined asm
304
726
  asm(" prefetch.L1 [ %1 ];" : "=l"(addr) : "l"(addr));
305
727
  #else
@@ -311,39 +733,6 @@ template<typename Scalar> EIGEN_DEVICE_FUNC inline void prefetch(const Scalar* a
311
733
  #endif
312
734
  }
313
735
 
314
- /** \internal \returns the first element of a packet */
315
- template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type pfirst(const Packet& a)
316
- { return a; }
317
-
318
- /** \internal \returns a packet where the element i contains the sum of the packet of \a vec[i] */
319
- template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
320
- preduxp(const Packet* vecs) { return vecs[0]; }
321
-
322
- /** \internal \returns the sum of the elements of \a a*/
323
- template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux(const Packet& a)
324
- { return a; }
325
-
326
- /** \internal \returns the sum of the elements of \a a by block of 4 elements.
327
- * For a packet {a0, a1, a2, a3, a4, a5, a6, a7}, it returns a half packet {a0+a4, a1+a5, a2+a6, a3+a7}
328
- * For packet-size smaller or equal to 4, this boils down to a noop.
329
- */
330
- template<typename Packet> EIGEN_DEVICE_FUNC inline
331
- typename conditional<(unpacket_traits<Packet>::size%8)==0,typename unpacket_traits<Packet>::half,Packet>::type
332
- predux_downto4(const Packet& a)
333
- { return a; }
334
-
335
- /** \internal \returns the product of the elements of \a a*/
336
- template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_mul(const Packet& a)
337
- { return a; }
338
-
339
- /** \internal \returns the min of the elements of \a a*/
340
- template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_min(const Packet& a)
341
- { return a; }
342
-
343
- /** \internal \returns the max of the elements of \a a*/
344
- template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_max(const Packet& a)
345
- { return a; }
346
-
347
736
  /** \internal \returns the reversed elements of \a a*/
348
737
  template<typename Packet> EIGEN_DEVICE_FUNC inline Packet preverse(const Packet& a)
349
738
  { return a; }
@@ -351,10 +740,7 @@ template<typename Packet> EIGEN_DEVICE_FUNC inline Packet preverse(const Packet&
351
740
  /** \internal \returns \a a with real and imaginary part flipped (for complex type only) */
352
741
  template<typename Packet> EIGEN_DEVICE_FUNC inline Packet pcplxflip(const Packet& a)
353
742
  {
354
- // FIXME: uncomment the following in case we drop the internal imag and real functions.
355
- // using std::imag;
356
- // using std::real;
357
- return Packet(imag(a),real(a));
743
+ return Packet(numext::imag(a),numext::real(a));
358
744
  }
359
745
 
360
746
  /**************************
@@ -363,47 +749,51 @@ template<typename Packet> EIGEN_DEVICE_FUNC inline Packet pcplxflip(const Packet
363
749
 
364
750
  /** \internal \returns the sine of \a a (coeff-wise) */
365
751
  template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
366
- Packet psin(const Packet& a) { using std::sin; return sin(a); }
752
+ Packet psin(const Packet& a) { EIGEN_USING_STD(sin); return sin(a); }
367
753
 
368
754
  /** \internal \returns the cosine of \a a (coeff-wise) */
369
755
  template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
370
- Packet pcos(const Packet& a) { using std::cos; return cos(a); }
756
+ Packet pcos(const Packet& a) { EIGEN_USING_STD(cos); return cos(a); }
371
757
 
372
758
  /** \internal \returns the tan of \a a (coeff-wise) */
373
759
  template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
374
- Packet ptan(const Packet& a) { using std::tan; return tan(a); }
760
+ Packet ptan(const Packet& a) { EIGEN_USING_STD(tan); return tan(a); }
375
761
 
376
762
  /** \internal \returns the arc sine of \a a (coeff-wise) */
377
763
  template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
378
- Packet pasin(const Packet& a) { using std::asin; return asin(a); }
764
+ Packet pasin(const Packet& a) { EIGEN_USING_STD(asin); return asin(a); }
379
765
 
380
766
  /** \internal \returns the arc cosine of \a a (coeff-wise) */
381
767
  template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
382
- Packet pacos(const Packet& a) { using std::acos; return acos(a); }
768
+ Packet pacos(const Packet& a) { EIGEN_USING_STD(acos); return acos(a); }
383
769
 
384
770
  /** \internal \returns the arc tangent of \a a (coeff-wise) */
385
771
  template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
386
- Packet patan(const Packet& a) { using std::atan; return atan(a); }
772
+ Packet patan(const Packet& a) { EIGEN_USING_STD(atan); return atan(a); }
387
773
 
388
774
  /** \internal \returns the hyperbolic sine of \a a (coeff-wise) */
389
775
  template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
390
- Packet psinh(const Packet& a) { using std::sinh; return sinh(a); }
776
+ Packet psinh(const Packet& a) { EIGEN_USING_STD(sinh); return sinh(a); }
391
777
 
392
778
  /** \internal \returns the hyperbolic cosine of \a a (coeff-wise) */
393
779
  template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
394
- Packet pcosh(const Packet& a) { using std::cosh; return cosh(a); }
780
+ Packet pcosh(const Packet& a) { EIGEN_USING_STD(cosh); return cosh(a); }
395
781
 
396
782
  /** \internal \returns the hyperbolic tan of \a a (coeff-wise) */
397
783
  template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
398
- Packet ptanh(const Packet& a) { using std::tanh; return tanh(a); }
784
+ Packet ptanh(const Packet& a) { EIGEN_USING_STD(tanh); return tanh(a); }
399
785
 
400
786
  /** \internal \returns the exp of \a a (coeff-wise) */
401
787
  template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
402
- Packet pexp(const Packet& a) { using std::exp; return exp(a); }
788
+ Packet pexp(const Packet& a) { EIGEN_USING_STD(exp); return exp(a); }
789
+
790
+ /** \internal \returns the expm1 of \a a (coeff-wise) */
791
+ template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
792
+ Packet pexpm1(const Packet& a) { return numext::expm1(a); }
403
793
 
404
794
  /** \internal \returns the log of \a a (coeff-wise) */
405
795
  template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
406
- Packet plog(const Packet& a) { using std::log; return log(a); }
796
+ Packet plog(const Packet& a) { EIGEN_USING_STD(log); return log(a); }
407
797
 
408
798
  /** \internal \returns the log1p of \a a (coeff-wise) */
409
799
  template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
@@ -411,16 +801,24 @@ Packet plog1p(const Packet& a) { return numext::log1p(a); }
411
801
 
412
802
  /** \internal \returns the log10 of \a a (coeff-wise) */
413
803
  template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
414
- Packet plog10(const Packet& a) { using std::log10; return log10(a); }
804
+ Packet plog10(const Packet& a) { EIGEN_USING_STD(log10); return log10(a); }
805
+
806
+ /** \internal \returns the log10 of \a a (coeff-wise) */
807
+ template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
808
+ Packet plog2(const Packet& a) {
809
+ typedef typename internal::unpacket_traits<Packet>::type Scalar;
810
+ return pmul(pset1<Packet>(Scalar(EIGEN_LOG2E)), plog(a));
811
+ }
415
812
 
416
813
  /** \internal \returns the square-root of \a a (coeff-wise) */
417
814
  template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
418
- Packet psqrt(const Packet& a) { using std::sqrt; return sqrt(a); }
815
+ Packet psqrt(const Packet& a) { return numext::sqrt(a); }
419
816
 
420
817
  /** \internal \returns the reciprocal square-root of \a a (coeff-wise) */
421
818
  template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
422
819
  Packet prsqrt(const Packet& a) {
423
- return pdiv(pset1<Packet>(1), psqrt(a));
820
+ typedef typename internal::unpacket_traits<Packet>::type Scalar;
821
+ return pdiv(pset1<Packet>(Scalar(1)), psqrt(a));
424
822
  }
425
823
 
426
824
  /** \internal \returns the rounded value of \a a (coeff-wise) */
@@ -431,15 +829,121 @@ Packet pround(const Packet& a) { using numext::round; return round(a); }
431
829
  template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
432
830
  Packet pfloor(const Packet& a) { using numext::floor; return floor(a); }
433
831
 
832
+ /** \internal \returns the rounded value of \a a (coeff-wise) with current
833
+ * rounding mode */
834
+ template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
835
+ Packet print(const Packet& a) { using numext::rint; return rint(a); }
836
+
434
837
  /** \internal \returns the ceil of \a a (coeff-wise) */
435
838
  template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
436
839
  Packet pceil(const Packet& a) { using numext::ceil; return ceil(a); }
437
840
 
841
+ /** \internal \returns the first element of a packet */
842
+ template<typename Packet>
843
+ EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type
844
+ pfirst(const Packet& a)
845
+ { return a; }
846
+
847
+ /** \internal \returns the sum of the elements of upper and lower half of \a a if \a a is larger than 4.
848
+ * For a packet {a0, a1, a2, a3, a4, a5, a6, a7}, it returns a half packet {a0+a4, a1+a5, a2+a6, a3+a7}
849
+ * For packet-size smaller or equal to 4, this boils down to a noop.
850
+ */
851
+ template<typename Packet>
852
+ EIGEN_DEVICE_FUNC inline typename conditional<(unpacket_traits<Packet>::size%8)==0,typename unpacket_traits<Packet>::half,Packet>::type
853
+ predux_half_dowto4(const Packet& a)
854
+ { return a; }
855
+
856
+ // Slow generic implementation of Packet reduction.
857
+ template <typename Packet, typename Op>
858
+ EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type
859
+ predux_helper(const Packet& a, Op op) {
860
+ typedef typename unpacket_traits<Packet>::type Scalar;
861
+ const size_t n = unpacket_traits<Packet>::size;
862
+ EIGEN_ALIGN_TO_BOUNDARY(sizeof(Packet)) Scalar elements[n];
863
+ pstoreu<Scalar>(elements, a);
864
+ for(size_t k = n / 2; k > 0; k /= 2) {
865
+ for(size_t i = 0; i < k; ++i) {
866
+ elements[i] = op(elements[i], elements[i + k]);
867
+ }
868
+ }
869
+ return elements[0];
870
+ }
871
+
872
+ /** \internal \returns the sum of the elements of \a a*/
873
+ template<typename Packet>
874
+ EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type
875
+ predux(const Packet& a)
876
+ {
877
+ return a;
878
+ }
879
+
880
+ /** \internal \returns the product of the elements of \a a */
881
+ template <typename Packet>
882
+ EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_mul(
883
+ const Packet& a) {
884
+ typedef typename unpacket_traits<Packet>::type Scalar;
885
+ return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmul<Scalar>)));
886
+ }
887
+
888
+ /** \internal \returns the min of the elements of \a a */
889
+ template <typename Packet>
890
+ EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_min(
891
+ const Packet &a) {
892
+ typedef typename unpacket_traits<Packet>::type Scalar;
893
+ return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmin<PropagateFast, Scalar>)));
894
+ }
895
+
896
+ template <int NaNPropagation, typename Packet>
897
+ EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_min(
898
+ const Packet& a) {
899
+ typedef typename unpacket_traits<Packet>::type Scalar;
900
+ return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmin<NaNPropagation, Scalar>)));
901
+ }
902
+
903
+ /** \internal \returns the min of the elements of \a a */
904
+ template <typename Packet>
905
+ EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_max(
906
+ const Packet &a) {
907
+ typedef typename unpacket_traits<Packet>::type Scalar;
908
+ return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmax<PropagateFast, Scalar>)));
909
+ }
910
+
911
+ template <int NaNPropagation, typename Packet>
912
+ EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_max(
913
+ const Packet& a) {
914
+ typedef typename unpacket_traits<Packet>::type Scalar;
915
+ return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmax<NaNPropagation, Scalar>)));
916
+ }
917
+
918
+ #undef EIGEN_BINARY_OP_NAN_PROPAGATION
919
+
920
+ /** \internal \returns true if all coeffs of \a a means "true"
921
+ * It is supposed to be called on values returned by pcmp_*.
922
+ */
923
+ // not needed yet
924
+ // template<typename Packet> EIGEN_DEVICE_FUNC inline bool predux_all(const Packet& a)
925
+ // { return bool(a); }
926
+
927
+ /** \internal \returns true if any coeffs of \a a means "true"
928
+ * It is supposed to be called on values returned by pcmp_*.
929
+ */
930
+ template<typename Packet> EIGEN_DEVICE_FUNC inline bool predux_any(const Packet& a)
931
+ {
932
+ // Dirty but generic implementation where "true" is assumed to be non 0 and all the sames.
933
+ // It is expected that "true" is either:
934
+ // - Scalar(1)
935
+ // - bits full of ones (NaN for floats),
936
+ // - or first bit equals to 1 (1 for ints, smallest denormal for floats).
937
+ // For all these cases, taking the sum is just fine, and this boils down to a no-op for scalars.
938
+ typedef typename unpacket_traits<Packet>::type Scalar;
939
+ return numext::not_equal_strict(predux(a), Scalar(0));
940
+ }
941
+
438
942
  /***************************************************************************
439
943
  * The following functions might not have to be overwritten for vectorized types
440
944
  ***************************************************************************/
441
945
 
442
- /** \internal copy a packet with constant coeficient \a a (e.g., [a,a,a,a]) to \a *to. \a to must be 16 bytes aligned */
946
+ /** \internal copy a packet with constant coefficient \a a (e.g., [a,a,a,a]) to \a *to. \a to must be 16 bytes aligned */
443
947
  // NOTE: this function must really be templated on the packet type (think about different packet types for the same scalar type)
444
948
  template<typename Packet>
445
949
  inline void pstore1(typename unpacket_traits<Packet>::type* to, const typename unpacket_traits<Packet>::type& a)
@@ -487,47 +991,18 @@ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet ploadt_ro(const typename unpacket_t
487
991
  return ploadt<Packet, LoadMode>(from);
488
992
  }
489
993
 
490
- /** \internal default implementation of palign() allowing partial specialization */
491
- template<int Offset,typename PacketType>
492
- struct palign_impl
493
- {
494
- // by default data are aligned, so there is nothing to be done :)
495
- static inline void run(PacketType&, const PacketType&) {}
496
- };
497
-
498
- /** \internal update \a first using the concatenation of the packet_size minus \a Offset last elements
499
- * of \a first and \a Offset first elements of \a second.
500
- *
501
- * This function is currently only used to optimize matrix-vector products on unligned matrices.
502
- * It takes 2 packets that represent a contiguous memory array, and returns a packet starting
503
- * at the position \a Offset. For instance, for packets of 4 elements, we have:
504
- * Input:
505
- * - first = {f0,f1,f2,f3}
506
- * - second = {s0,s1,s2,s3}
507
- * Output:
508
- * - if Offset==0 then {f0,f1,f2,f3}
509
- * - if Offset==1 then {f1,f2,f3,s0}
510
- * - if Offset==2 then {f2,f3,s0,s1}
511
- * - if Offset==3 then {f3,s0,s1,s3}
512
- */
513
- template<int Offset,typename PacketType>
514
- inline void palign(PacketType& first, const PacketType& second)
515
- {
516
- palign_impl<Offset,PacketType>::run(first,second);
517
- }
518
-
519
994
  /***************************************************************************
520
995
  * Fast complex products (GCC generates a function call which is very slow)
521
996
  ***************************************************************************/
522
997
 
523
998
  // Eigen+CUDA does not support complexes.
524
- #ifndef __CUDACC__
999
+ #if !defined(EIGEN_GPUCC)
525
1000
 
526
1001
  template<> inline std::complex<float> pmul(const std::complex<float>& a, const std::complex<float>& b)
527
- { return std::complex<float>(real(a)*real(b) - imag(a)*imag(b), imag(a)*real(b) + real(a)*imag(b)); }
1002
+ { return std::complex<float>(a.real()*b.real() - a.imag()*b.imag(), a.imag()*b.real() + a.real()*b.imag()); }
528
1003
 
529
1004
  template<> inline std::complex<double> pmul(const std::complex<double>& a, const std::complex<double>& b)
530
- { return std::complex<double>(real(a)*real(b) - imag(a)*imag(b), imag(a)*real(b) + real(a)*imag(b)); }
1005
+ { return std::complex<double>(a.real()*b.real() - a.imag()*b.imag(), a.imag()*b.real() + a.real()*b.imag()); }
531
1006
 
532
1007
  #endif
533
1008
 
@@ -558,34 +1033,6 @@ pblend(const Selector<unpacket_traits<Packet>::size>& ifPacket, const Packet& th
558
1033
  return ifPacket.select[0] ? thenPacket : elsePacket;
559
1034
  }
560
1035
 
561
- /** \internal \returns \a a with the first coefficient replaced by the scalar b */
562
- template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
563
- pinsertfirst(const Packet& a, typename unpacket_traits<Packet>::type b)
564
- {
565
- // Default implementation based on pblend.
566
- // It must be specialized for higher performance.
567
- Selector<unpacket_traits<Packet>::size> mask;
568
- mask.select[0] = true;
569
- // This for loop should be optimized away by the compiler.
570
- for(Index i=1; i<unpacket_traits<Packet>::size; ++i)
571
- mask.select[i] = false;
572
- return pblend(mask, pset1<Packet>(b), a);
573
- }
574
-
575
- /** \internal \returns \a a with the last coefficient replaced by the scalar b */
576
- template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
577
- pinsertlast(const Packet& a, typename unpacket_traits<Packet>::type b)
578
- {
579
- // Default implementation based on pblend.
580
- // It must be specialized for higher performance.
581
- Selector<unpacket_traits<Packet>::size> mask;
582
- // This for loop should be optimized away by the compiler.
583
- for(Index i=0; i<unpacket_traits<Packet>::size-1; ++i)
584
- mask.select[i] = false;
585
- mask.select[unpacket_traits<Packet>::size-1] = true;
586
- return pblend(mask, pset1<Packet>(b), a);
587
- }
588
-
589
1036
  } // end namespace internal
590
1037
 
591
1038
  } // end namespace Eigen