tomoto 0.2.2 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (369) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +10 -0
  3. data/README.md +8 -10
  4. data/ext/tomoto/ct.cpp +11 -11
  5. data/ext/tomoto/dmr.cpp +14 -13
  6. data/ext/tomoto/dt.cpp +14 -14
  7. data/ext/tomoto/extconf.rb +7 -5
  8. data/ext/tomoto/gdmr.cpp +7 -7
  9. data/ext/tomoto/hdp.cpp +9 -9
  10. data/ext/tomoto/hlda.cpp +13 -13
  11. data/ext/tomoto/hpa.cpp +5 -5
  12. data/ext/tomoto/lda.cpp +42 -39
  13. data/ext/tomoto/llda.cpp +6 -6
  14. data/ext/tomoto/mglda.cpp +15 -15
  15. data/ext/tomoto/pa.cpp +6 -6
  16. data/ext/tomoto/plda.cpp +6 -6
  17. data/ext/tomoto/slda.cpp +8 -8
  18. data/ext/tomoto/{ext.cpp → tomoto.cpp} +8 -8
  19. data/ext/tomoto/utils.h +16 -70
  20. data/lib/tomoto/version.rb +1 -1
  21. data/lib/tomoto.rb +5 -1
  22. data/vendor/EigenRand/EigenRand/Core.h +10 -10
  23. data/vendor/EigenRand/EigenRand/Dists/Basic.h +208 -9
  24. data/vendor/EigenRand/EigenRand/Dists/Discrete.h +52 -31
  25. data/vendor/EigenRand/EigenRand/Dists/GammaPoisson.h +9 -8
  26. data/vendor/EigenRand/EigenRand/Dists/NormalExp.h +28 -21
  27. data/vendor/EigenRand/EigenRand/EigenRand +11 -6
  28. data/vendor/EigenRand/EigenRand/Macro.h +13 -7
  29. data/vendor/EigenRand/EigenRand/MorePacketMath.h +348 -740
  30. data/vendor/EigenRand/EigenRand/MvDists/Multinomial.h +5 -3
  31. data/vendor/EigenRand/EigenRand/MvDists/MvNormal.h +9 -3
  32. data/vendor/EigenRand/EigenRand/PacketFilter.h +11 -253
  33. data/vendor/EigenRand/EigenRand/PacketRandomEngine.h +21 -47
  34. data/vendor/EigenRand/EigenRand/RandUtils.h +50 -344
  35. data/vendor/EigenRand/EigenRand/arch/AVX/MorePacketMath.h +619 -0
  36. data/vendor/EigenRand/EigenRand/arch/AVX/PacketFilter.h +149 -0
  37. data/vendor/EigenRand/EigenRand/arch/AVX/RandUtils.h +228 -0
  38. data/vendor/EigenRand/EigenRand/arch/NEON/MorePacketMath.h +473 -0
  39. data/vendor/EigenRand/EigenRand/arch/NEON/PacketFilter.h +142 -0
  40. data/vendor/EigenRand/EigenRand/arch/NEON/RandUtils.h +126 -0
  41. data/vendor/EigenRand/EigenRand/arch/SSE/MorePacketMath.h +501 -0
  42. data/vendor/EigenRand/EigenRand/arch/SSE/PacketFilter.h +133 -0
  43. data/vendor/EigenRand/EigenRand/arch/SSE/RandUtils.h +120 -0
  44. data/vendor/EigenRand/EigenRand/doc.h +24 -12
  45. data/vendor/EigenRand/README.md +57 -4
  46. data/vendor/eigen/COPYING.APACHE +203 -0
  47. data/vendor/eigen/COPYING.BSD +1 -1
  48. data/vendor/eigen/COPYING.MINPACK +51 -52
  49. data/vendor/eigen/Eigen/Cholesky +0 -1
  50. data/vendor/eigen/Eigen/Core +112 -265
  51. data/vendor/eigen/Eigen/Eigenvalues +2 -3
  52. data/vendor/eigen/Eigen/Geometry +5 -8
  53. data/vendor/eigen/Eigen/Householder +0 -1
  54. data/vendor/eigen/Eigen/Jacobi +0 -1
  55. data/vendor/eigen/Eigen/KLUSupport +41 -0
  56. data/vendor/eigen/Eigen/LU +2 -5
  57. data/vendor/eigen/Eigen/OrderingMethods +0 -3
  58. data/vendor/eigen/Eigen/PaStiXSupport +1 -0
  59. data/vendor/eigen/Eigen/PardisoSupport +0 -0
  60. data/vendor/eigen/Eigen/QR +2 -3
  61. data/vendor/eigen/Eigen/QtAlignedMalloc +0 -1
  62. data/vendor/eigen/Eigen/SVD +0 -1
  63. data/vendor/eigen/Eigen/Sparse +0 -2
  64. data/vendor/eigen/Eigen/SparseCholesky +0 -8
  65. data/vendor/eigen/Eigen/SparseLU +4 -0
  66. data/vendor/eigen/Eigen/SparseQR +0 -1
  67. data/vendor/eigen/Eigen/src/Cholesky/LDLT.h +42 -27
  68. data/vendor/eigen/Eigen/src/Cholesky/LLT.h +39 -23
  69. data/vendor/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +90 -47
  70. data/vendor/eigen/Eigen/src/Core/ArithmeticSequence.h +413 -0
  71. data/vendor/eigen/Eigen/src/Core/Array.h +99 -11
  72. data/vendor/eigen/Eigen/src/Core/ArrayBase.h +3 -3
  73. data/vendor/eigen/Eigen/src/Core/ArrayWrapper.h +21 -21
  74. data/vendor/eigen/Eigen/src/Core/Assign.h +1 -1
  75. data/vendor/eigen/Eigen/src/Core/AssignEvaluator.h +125 -50
  76. data/vendor/eigen/Eigen/src/Core/Assign_MKL.h +10 -10
  77. data/vendor/eigen/Eigen/src/Core/BandMatrix.h +16 -16
  78. data/vendor/eigen/Eigen/src/Core/Block.h +56 -60
  79. data/vendor/eigen/Eigen/src/Core/BooleanRedux.h +29 -31
  80. data/vendor/eigen/Eigen/src/Core/CommaInitializer.h +7 -3
  81. data/vendor/eigen/Eigen/src/Core/CoreEvaluators.h +325 -272
  82. data/vendor/eigen/Eigen/src/Core/CoreIterators.h +5 -0
  83. data/vendor/eigen/Eigen/src/Core/CwiseBinaryOp.h +21 -22
  84. data/vendor/eigen/Eigen/src/Core/CwiseNullaryOp.h +153 -18
  85. data/vendor/eigen/Eigen/src/Core/CwiseUnaryOp.h +6 -6
  86. data/vendor/eigen/Eigen/src/Core/CwiseUnaryView.h +14 -10
  87. data/vendor/eigen/Eigen/src/Core/DenseBase.h +132 -42
  88. data/vendor/eigen/Eigen/src/Core/DenseCoeffsBase.h +25 -21
  89. data/vendor/eigen/Eigen/src/Core/DenseStorage.h +153 -71
  90. data/vendor/eigen/Eigen/src/Core/Diagonal.h +21 -23
  91. data/vendor/eigen/Eigen/src/Core/DiagonalMatrix.h +50 -2
  92. data/vendor/eigen/Eigen/src/Core/DiagonalProduct.h +1 -1
  93. data/vendor/eigen/Eigen/src/Core/Dot.h +10 -10
  94. data/vendor/eigen/Eigen/src/Core/EigenBase.h +10 -9
  95. data/vendor/eigen/Eigen/src/Core/ForceAlignedAccess.h +8 -4
  96. data/vendor/eigen/Eigen/src/Core/Fuzzy.h +3 -3
  97. data/vendor/eigen/Eigen/src/Core/GeneralProduct.h +20 -10
  98. data/vendor/eigen/Eigen/src/Core/GenericPacketMath.h +599 -152
  99. data/vendor/eigen/Eigen/src/Core/GlobalFunctions.h +40 -33
  100. data/vendor/eigen/Eigen/src/Core/IO.h +40 -7
  101. data/vendor/eigen/Eigen/src/Core/IndexedView.h +237 -0
  102. data/vendor/eigen/Eigen/src/Core/Inverse.h +9 -10
  103. data/vendor/eigen/Eigen/src/Core/Map.h +7 -7
  104. data/vendor/eigen/Eigen/src/Core/MapBase.h +10 -3
  105. data/vendor/eigen/Eigen/src/Core/MathFunctions.h +767 -125
  106. data/vendor/eigen/Eigen/src/Core/MathFunctionsImpl.h +118 -19
  107. data/vendor/eigen/Eigen/src/Core/Matrix.h +131 -25
  108. data/vendor/eigen/Eigen/src/Core/MatrixBase.h +21 -3
  109. data/vendor/eigen/Eigen/src/Core/NestByValue.h +25 -50
  110. data/vendor/eigen/Eigen/src/Core/NoAlias.h +4 -3
  111. data/vendor/eigen/Eigen/src/Core/NumTraits.h +107 -20
  112. data/vendor/eigen/Eigen/src/Core/PartialReduxEvaluator.h +232 -0
  113. data/vendor/eigen/Eigen/src/Core/PermutationMatrix.h +3 -31
  114. data/vendor/eigen/Eigen/src/Core/PlainObjectBase.h +152 -59
  115. data/vendor/eigen/Eigen/src/Core/Product.h +30 -25
  116. data/vendor/eigen/Eigen/src/Core/ProductEvaluators.h +192 -125
  117. data/vendor/eigen/Eigen/src/Core/Random.h +37 -1
  118. data/vendor/eigen/Eigen/src/Core/Redux.h +180 -170
  119. data/vendor/eigen/Eigen/src/Core/Ref.h +121 -23
  120. data/vendor/eigen/Eigen/src/Core/Replicate.h +8 -8
  121. data/vendor/eigen/Eigen/src/Core/Reshaped.h +454 -0
  122. data/vendor/eigen/Eigen/src/Core/ReturnByValue.h +7 -5
  123. data/vendor/eigen/Eigen/src/Core/Reverse.h +18 -12
  124. data/vendor/eigen/Eigen/src/Core/Select.h +8 -6
  125. data/vendor/eigen/Eigen/src/Core/SelfAdjointView.h +33 -20
  126. data/vendor/eigen/Eigen/src/Core/Solve.h +14 -14
  127. data/vendor/eigen/Eigen/src/Core/SolveTriangular.h +16 -16
  128. data/vendor/eigen/Eigen/src/Core/SolverBase.h +41 -3
  129. data/vendor/eigen/Eigen/src/Core/StableNorm.h +100 -70
  130. data/vendor/eigen/Eigen/src/Core/StlIterators.h +463 -0
  131. data/vendor/eigen/Eigen/src/Core/Stride.h +9 -4
  132. data/vendor/eigen/Eigen/src/Core/Swap.h +5 -4
  133. data/vendor/eigen/Eigen/src/Core/Transpose.h +88 -27
  134. data/vendor/eigen/Eigen/src/Core/Transpositions.h +26 -47
  135. data/vendor/eigen/Eigen/src/Core/TriangularMatrix.h +93 -75
  136. data/vendor/eigen/Eigen/src/Core/VectorBlock.h +5 -5
  137. data/vendor/eigen/Eigen/src/Core/VectorwiseOp.h +159 -70
  138. data/vendor/eigen/Eigen/src/Core/Visitor.h +137 -29
  139. data/vendor/eigen/Eigen/src/Core/arch/AVX/Complex.h +50 -129
  140. data/vendor/eigen/Eigen/src/Core/arch/AVX/MathFunctions.h +126 -337
  141. data/vendor/eigen/Eigen/src/Core/arch/AVX/PacketMath.h +1092 -155
  142. data/vendor/eigen/Eigen/src/Core/arch/AVX/TypeCasting.h +65 -1
  143. data/vendor/eigen/Eigen/src/Core/arch/AVX512/Complex.h +422 -0
  144. data/vendor/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h +207 -236
  145. data/vendor/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h +1482 -495
  146. data/vendor/eigen/Eigen/src/Core/arch/AVX512/TypeCasting.h +89 -0
  147. data/vendor/eigen/Eigen/src/Core/arch/AltiVec/Complex.h +152 -165
  148. data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MathFunctions.h +19 -251
  149. data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MatrixProduct.h +2937 -0
  150. data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h +221 -0
  151. data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h +629 -0
  152. data/vendor/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h +2042 -392
  153. data/vendor/eigen/Eigen/src/Core/arch/CUDA/Complex.h +235 -80
  154. data/vendor/eigen/Eigen/src/Core/arch/Default/BFloat16.h +700 -0
  155. data/vendor/eigen/Eigen/src/Core/arch/Default/ConjHelper.h +102 -14
  156. data/vendor/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +1649 -0
  157. data/vendor/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +110 -0
  158. data/vendor/eigen/Eigen/src/Core/arch/Default/Half.h +942 -0
  159. data/vendor/eigen/Eigen/src/Core/arch/Default/Settings.h +1 -1
  160. data/vendor/eigen/Eigen/src/Core/arch/Default/TypeCasting.h +120 -0
  161. data/vendor/eigen/Eigen/src/Core/arch/{CUDA → GPU}/MathFunctions.h +16 -4
  162. data/vendor/eigen/Eigen/src/Core/arch/GPU/PacketMath.h +1685 -0
  163. data/vendor/eigen/Eigen/src/Core/arch/GPU/TypeCasting.h +80 -0
  164. data/vendor/eigen/Eigen/src/Core/arch/HIP/hcc/math_constants.h +23 -0
  165. data/vendor/eigen/Eigen/src/Core/arch/MSA/Complex.h +648 -0
  166. data/vendor/eigen/Eigen/src/Core/arch/MSA/MathFunctions.h +387 -0
  167. data/vendor/eigen/Eigen/src/Core/arch/MSA/PacketMath.h +1233 -0
  168. data/vendor/eigen/Eigen/src/Core/arch/NEON/Complex.h +313 -219
  169. data/vendor/eigen/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h +183 -0
  170. data/vendor/eigen/Eigen/src/Core/arch/NEON/MathFunctions.h +54 -70
  171. data/vendor/eigen/Eigen/src/Core/arch/NEON/PacketMath.h +4376 -549
  172. data/vendor/eigen/Eigen/src/Core/arch/NEON/TypeCasting.h +1419 -0
  173. data/vendor/eigen/Eigen/src/Core/arch/SSE/Complex.h +59 -179
  174. data/vendor/eigen/Eigen/src/Core/arch/SSE/MathFunctions.h +65 -428
  175. data/vendor/eigen/Eigen/src/Core/arch/SSE/PacketMath.h +893 -283
  176. data/vendor/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h +65 -0
  177. data/vendor/eigen/Eigen/src/Core/arch/SVE/MathFunctions.h +44 -0
  178. data/vendor/eigen/Eigen/src/Core/arch/SVE/PacketMath.h +752 -0
  179. data/vendor/eigen/Eigen/src/Core/arch/SVE/TypeCasting.h +49 -0
  180. data/vendor/eigen/Eigen/src/Core/arch/SYCL/InteropHeaders.h +232 -0
  181. data/vendor/eigen/Eigen/src/Core/arch/SYCL/MathFunctions.h +301 -0
  182. data/vendor/eigen/Eigen/src/Core/arch/SYCL/PacketMath.h +670 -0
  183. data/vendor/eigen/Eigen/src/Core/arch/SYCL/SyclMemoryModel.h +694 -0
  184. data/vendor/eigen/Eigen/src/Core/arch/SYCL/TypeCasting.h +85 -0
  185. data/vendor/eigen/Eigen/src/Core/arch/ZVector/Complex.h +212 -183
  186. data/vendor/eigen/Eigen/src/Core/arch/ZVector/MathFunctions.h +101 -5
  187. data/vendor/eigen/Eigen/src/Core/arch/ZVector/PacketMath.h +510 -395
  188. data/vendor/eigen/Eigen/src/Core/functors/AssignmentFunctors.h +11 -2
  189. data/vendor/eigen/Eigen/src/Core/functors/BinaryFunctors.h +112 -46
  190. data/vendor/eigen/Eigen/src/Core/functors/NullaryFunctors.h +31 -30
  191. data/vendor/eigen/Eigen/src/Core/functors/StlFunctors.h +32 -2
  192. data/vendor/eigen/Eigen/src/Core/functors/UnaryFunctors.h +355 -16
  193. data/vendor/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h +1075 -586
  194. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h +49 -24
  195. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +41 -35
  196. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +6 -6
  197. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h +4 -2
  198. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixVector.h +382 -483
  199. data/vendor/eigen/Eigen/src/Core/products/Parallelizer.h +22 -5
  200. data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +53 -30
  201. data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h +16 -8
  202. data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h +8 -6
  203. data/vendor/eigen/Eigen/src/Core/products/SelfadjointProduct.h +4 -4
  204. data/vendor/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h +5 -4
  205. data/vendor/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h +33 -27
  206. data/vendor/eigen/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h +14 -12
  207. data/vendor/eigen/Eigen/src/Core/products/TriangularSolverMatrix.h +36 -34
  208. data/vendor/eigen/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h +8 -4
  209. data/vendor/eigen/Eigen/src/Core/products/TriangularSolverVector.h +13 -10
  210. data/vendor/eigen/Eigen/src/Core/util/BlasUtil.h +304 -119
  211. data/vendor/eigen/Eigen/src/Core/util/ConfigureVectorization.h +512 -0
  212. data/vendor/eigen/Eigen/src/Core/util/Constants.h +25 -9
  213. data/vendor/eigen/Eigen/src/Core/util/DisableStupidWarnings.h +26 -3
  214. data/vendor/eigen/Eigen/src/Core/util/ForwardDeclarations.h +29 -9
  215. data/vendor/eigen/Eigen/src/Core/util/IndexedViewHelper.h +186 -0
  216. data/vendor/eigen/Eigen/src/Core/util/IntegralConstant.h +272 -0
  217. data/vendor/eigen/Eigen/src/Core/util/MKL_support.h +8 -1
  218. data/vendor/eigen/Eigen/src/Core/util/Macros.h +709 -246
  219. data/vendor/eigen/Eigen/src/Core/util/Memory.h +222 -52
  220. data/vendor/eigen/Eigen/src/Core/util/Meta.h +355 -77
  221. data/vendor/eigen/Eigen/src/Core/util/ReenableStupidWarnings.h +5 -1
  222. data/vendor/eigen/Eigen/src/Core/util/ReshapedHelper.h +51 -0
  223. data/vendor/eigen/Eigen/src/Core/util/StaticAssert.h +8 -5
  224. data/vendor/eigen/Eigen/src/Core/util/SymbolicIndex.h +293 -0
  225. data/vendor/eigen/Eigen/src/Core/util/XprHelper.h +65 -30
  226. data/vendor/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +1 -1
  227. data/vendor/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +7 -4
  228. data/vendor/eigen/Eigen/src/Eigenvalues/EigenSolver.h +2 -2
  229. data/vendor/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +1 -1
  230. data/vendor/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +2 -2
  231. data/vendor/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +2 -2
  232. data/vendor/eigen/Eigen/src/Eigenvalues/RealQZ.h +9 -6
  233. data/vendor/eigen/Eigen/src/Eigenvalues/RealSchur.h +21 -9
  234. data/vendor/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +77 -43
  235. data/vendor/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +20 -15
  236. data/vendor/eigen/Eigen/src/Geometry/AlignedBox.h +99 -5
  237. data/vendor/eigen/Eigen/src/Geometry/AngleAxis.h +4 -4
  238. data/vendor/eigen/Eigen/src/Geometry/EulerAngles.h +3 -3
  239. data/vendor/eigen/Eigen/src/Geometry/Homogeneous.h +15 -11
  240. data/vendor/eigen/Eigen/src/Geometry/Hyperplane.h +1 -1
  241. data/vendor/eigen/Eigen/src/Geometry/OrthoMethods.h +3 -2
  242. data/vendor/eigen/Eigen/src/Geometry/ParametrizedLine.h +39 -2
  243. data/vendor/eigen/Eigen/src/Geometry/Quaternion.h +70 -14
  244. data/vendor/eigen/Eigen/src/Geometry/Rotation2D.h +3 -3
  245. data/vendor/eigen/Eigen/src/Geometry/Scaling.h +23 -5
  246. data/vendor/eigen/Eigen/src/Geometry/Transform.h +88 -67
  247. data/vendor/eigen/Eigen/src/Geometry/Translation.h +6 -12
  248. data/vendor/eigen/Eigen/src/Geometry/Umeyama.h +1 -1
  249. data/vendor/eigen/Eigen/src/Geometry/arch/Geometry_SIMD.h +168 -0
  250. data/vendor/eigen/Eigen/src/Householder/BlockHouseholder.h +9 -2
  251. data/vendor/eigen/Eigen/src/Householder/Householder.h +8 -4
  252. data/vendor/eigen/Eigen/src/Householder/HouseholderSequence.h +123 -48
  253. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +15 -15
  254. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +7 -23
  255. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +5 -22
  256. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +41 -47
  257. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +51 -60
  258. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +70 -20
  259. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +2 -20
  260. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +11 -9
  261. data/vendor/eigen/Eigen/src/Jacobi/Jacobi.h +31 -10
  262. data/vendor/eigen/Eigen/src/KLUSupport/KLUSupport.h +358 -0
  263. data/vendor/eigen/Eigen/src/LU/Determinant.h +35 -19
  264. data/vendor/eigen/Eigen/src/LU/FullPivLU.h +29 -43
  265. data/vendor/eigen/Eigen/src/LU/InverseImpl.h +25 -8
  266. data/vendor/eigen/Eigen/src/LU/PartialPivLU.h +71 -58
  267. data/vendor/eigen/Eigen/src/LU/arch/InverseSize4.h +351 -0
  268. data/vendor/eigen/Eigen/src/OrderingMethods/Amd.h +7 -17
  269. data/vendor/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +297 -277
  270. data/vendor/eigen/Eigen/src/OrderingMethods/Ordering.h +6 -10
  271. data/vendor/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +1 -1
  272. data/vendor/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +9 -7
  273. data/vendor/eigen/Eigen/src/QR/ColPivHouseholderQR.h +41 -20
  274. data/vendor/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +100 -27
  275. data/vendor/eigen/Eigen/src/QR/FullPivHouseholderQR.h +59 -22
  276. data/vendor/eigen/Eigen/src/QR/HouseholderQR.h +48 -23
  277. data/vendor/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +25 -3
  278. data/vendor/eigen/Eigen/src/SVD/BDCSVD.h +183 -63
  279. data/vendor/eigen/Eigen/src/SVD/JacobiSVD.h +22 -14
  280. data/vendor/eigen/Eigen/src/SVD/SVDBase.h +83 -22
  281. data/vendor/eigen/Eigen/src/SVD/UpperBidiagonalization.h +3 -3
  282. data/vendor/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +17 -9
  283. data/vendor/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +12 -37
  284. data/vendor/eigen/Eigen/src/SparseCore/AmbiVector.h +3 -2
  285. data/vendor/eigen/Eigen/src/SparseCore/CompressedStorage.h +16 -0
  286. data/vendor/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +6 -6
  287. data/vendor/eigen/Eigen/src/SparseCore/SparseAssign.h +81 -27
  288. data/vendor/eigen/Eigen/src/SparseCore/SparseBlock.h +25 -57
  289. data/vendor/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +40 -11
  290. data/vendor/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +11 -15
  291. data/vendor/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +4 -2
  292. data/vendor/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +30 -8
  293. data/vendor/eigen/Eigen/src/SparseCore/SparseMatrix.h +126 -11
  294. data/vendor/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +5 -12
  295. data/vendor/eigen/Eigen/src/SparseCore/SparseProduct.h +13 -1
  296. data/vendor/eigen/Eigen/src/SparseCore/SparseRef.h +7 -7
  297. data/vendor/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +5 -2
  298. data/vendor/eigen/Eigen/src/SparseCore/SparseUtil.h +8 -0
  299. data/vendor/eigen/Eigen/src/SparseCore/SparseVector.h +1 -1
  300. data/vendor/eigen/Eigen/src/SparseCore/SparseView.h +1 -0
  301. data/vendor/eigen/Eigen/src/SparseLU/SparseLU.h +162 -12
  302. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +1 -1
  303. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +76 -2
  304. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +2 -2
  305. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +1 -1
  306. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +1 -1
  307. data/vendor/eigen/Eigen/src/SparseQR/SparseQR.h +19 -6
  308. data/vendor/eigen/Eigen/src/StlSupport/StdDeque.h +2 -12
  309. data/vendor/eigen/Eigen/src/StlSupport/StdList.h +2 -2
  310. data/vendor/eigen/Eigen/src/StlSupport/StdVector.h +2 -2
  311. data/vendor/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +6 -8
  312. data/vendor/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +175 -39
  313. data/vendor/eigen/Eigen/src/misc/lapacke.h +5 -4
  314. data/vendor/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +28 -2
  315. data/vendor/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +155 -11
  316. data/vendor/eigen/Eigen/src/plugins/BlockMethods.h +626 -242
  317. data/vendor/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.h +14 -0
  318. data/vendor/eigen/Eigen/src/plugins/IndexedViewMethods.h +262 -0
  319. data/vendor/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +4 -4
  320. data/vendor/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +10 -0
  321. data/vendor/eigen/Eigen/src/plugins/ReshapedMethods.h +149 -0
  322. data/vendor/eigen/README.md +2 -0
  323. data/vendor/eigen/bench/btl/README +1 -1
  324. data/vendor/eigen/bench/tensors/README +6 -7
  325. data/vendor/eigen/ci/README.md +56 -0
  326. data/vendor/eigen/demos/mix_eigen_and_c/README +1 -1
  327. data/vendor/eigen/unsupported/Eigen/CXX11/src/Tensor/README.md +213 -158
  328. data/vendor/eigen/unsupported/README.txt +1 -1
  329. data/vendor/tomotopy/README.kr.rst +78 -0
  330. data/vendor/tomotopy/README.rst +75 -0
  331. data/vendor/tomotopy/src/Labeling/FoRelevance.cpp +2 -2
  332. data/vendor/tomotopy/src/Labeling/Phraser.hpp +4 -4
  333. data/vendor/tomotopy/src/TopicModel/CTModel.hpp +7 -3
  334. data/vendor/tomotopy/src/TopicModel/DMRModel.hpp +7 -3
  335. data/vendor/tomotopy/src/TopicModel/DTModel.hpp +6 -3
  336. data/vendor/tomotopy/src/TopicModel/GDMRModel.hpp +2 -2
  337. data/vendor/tomotopy/src/TopicModel/HDP.h +1 -0
  338. data/vendor/tomotopy/src/TopicModel/HDPModel.hpp +57 -6
  339. data/vendor/tomotopy/src/TopicModel/HLDAModel.hpp +6 -3
  340. data/vendor/tomotopy/src/TopicModel/HPAModel.hpp +3 -2
  341. data/vendor/tomotopy/src/TopicModel/LDA.h +3 -3
  342. data/vendor/tomotopy/src/TopicModel/LDACVB0Model.hpp +5 -5
  343. data/vendor/tomotopy/src/TopicModel/LDAModel.hpp +50 -19
  344. data/vendor/tomotopy/src/TopicModel/LLDAModel.hpp +6 -2
  345. data/vendor/tomotopy/src/TopicModel/MGLDAModel.hpp +3 -2
  346. data/vendor/tomotopy/src/TopicModel/PAModel.hpp +1 -1
  347. data/vendor/tomotopy/src/TopicModel/PLDAModel.hpp +6 -2
  348. data/vendor/tomotopy/src/TopicModel/PT.h +3 -1
  349. data/vendor/tomotopy/src/TopicModel/PTModel.hpp +36 -3
  350. data/vendor/tomotopy/src/TopicModel/SLDAModel.hpp +6 -3
  351. data/vendor/tomotopy/src/TopicModel/TopicModel.hpp +55 -26
  352. data/vendor/tomotopy/src/Utils/AliasMethod.hpp +5 -4
  353. data/vendor/tomotopy/src/Utils/Dictionary.h +2 -2
  354. data/vendor/tomotopy/src/Utils/EigenAddonOps.hpp +36 -1
  355. data/vendor/tomotopy/src/Utils/MultiNormalDistribution.hpp +1 -1
  356. data/vendor/tomotopy/src/Utils/TruncMultiNormal.hpp +1 -1
  357. data/vendor/tomotopy/src/Utils/exception.h +6 -0
  358. data/vendor/tomotopy/src/Utils/math.h +2 -2
  359. data/vendor/tomotopy/src/Utils/sample.hpp +14 -12
  360. data/vendor/tomotopy/src/Utils/serializer.hpp +30 -5
  361. data/vendor/tomotopy/src/Utils/sse_gamma.h +0 -3
  362. metadata +64 -18
  363. data/vendor/eigen/Eigen/CMakeLists.txt +0 -19
  364. data/vendor/eigen/Eigen/src/Core/arch/CUDA/Half.h +0 -674
  365. data/vendor/eigen/Eigen/src/Core/arch/CUDA/PacketMath.h +0 -333
  366. data/vendor/eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +0 -1124
  367. data/vendor/eigen/Eigen/src/Core/arch/CUDA/TypeCasting.h +0 -212
  368. data/vendor/eigen/Eigen/src/Geometry/arch/Geometry_SSE.h +0 -161
  369. data/vendor/eigen/Eigen/src/LU/arch/Inverse_SSE.h +0 -338
@@ -44,23 +44,29 @@ struct default_packet_traits
44
44
  enum {
45
45
  HasHalfPacket = 0,
46
46
 
47
- HasAdd = 1,
48
- HasSub = 1,
49
- HasMul = 1,
50
- HasNegate = 1,
51
- HasAbs = 1,
52
- HasArg = 0,
53
- HasAbs2 = 1,
54
- HasMin = 1,
55
- HasMax = 1,
56
- HasConj = 1,
47
+ HasAdd = 1,
48
+ HasSub = 1,
49
+ HasShift = 1,
50
+ HasMul = 1,
51
+ HasNegate = 1,
52
+ HasAbs = 1,
53
+ HasArg = 0,
54
+ HasAbs2 = 1,
55
+ HasAbsDiff = 0,
56
+ HasMin = 1,
57
+ HasMax = 1,
58
+ HasConj = 1,
57
59
  HasSetLinear = 1,
58
- HasBlend = 0,
60
+ HasBlend = 0,
61
+ // This flag is used to indicate whether packet comparison is supported.
62
+ // pcmp_eq, pcmp_lt and pcmp_le should be defined for it to be true.
63
+ HasCmp = 0,
59
64
 
60
65
  HasDiv = 0,
61
66
  HasSqrt = 0,
62
67
  HasRsqrt = 0,
63
68
  HasExp = 0,
69
+ HasExpm1 = 0,
64
70
  HasLog = 0,
65
71
  HasLog1p = 0,
66
72
  HasLog10 = 0,
@@ -81,14 +87,18 @@ struct default_packet_traits
81
87
  HasPolygamma = 0,
82
88
  HasErf = 0,
83
89
  HasErfc = 0,
90
+ HasNdtri = 0,
91
+ HasBessel = 0,
84
92
  HasIGamma = 0,
93
+ HasIGammaDerA = 0,
94
+ HasGammaSampleDerAlpha = 0,
85
95
  HasIGammac = 0,
86
96
  HasBetaInc = 0,
87
97
 
88
98
  HasRound = 0,
99
+ HasRint = 0,
89
100
  HasFloor = 0,
90
101
  HasCeil = 0,
91
-
92
102
  HasSign = 0
93
103
  };
94
104
  };
@@ -119,6 +129,22 @@ template<typename T> struct packet_traits : default_packet_traits
119
129
 
120
130
  template<typename T> struct packet_traits<const T> : packet_traits<T> { };
121
131
 
132
+ template<typename T> struct unpacket_traits
133
+ {
134
+ typedef T type;
135
+ typedef T half;
136
+ enum
137
+ {
138
+ size = 1,
139
+ alignment = 1,
140
+ vectorizable = false,
141
+ masked_load_available=false,
142
+ masked_store_available=false
143
+ };
144
+ };
145
+
146
+ template<typename T> struct unpacket_traits<const T> : unpacket_traits<T> { };
147
+
122
148
  template <typename Src, typename Tgt> struct type_casting_traits {
123
149
  enum {
124
150
  VectorizedCast = 0,
@@ -127,6 +153,34 @@ template <typename Src, typename Tgt> struct type_casting_traits {
127
153
  };
128
154
  };
129
155
 
156
+ /** \internal Wrapper to ensure that multiple packet types can map to the same
157
+ same underlying vector type. */
158
+ template<typename T, int unique_id = 0>
159
+ struct eigen_packet_wrapper
160
+ {
161
+ EIGEN_ALWAYS_INLINE operator T&() { return m_val; }
162
+ EIGEN_ALWAYS_INLINE operator const T&() const { return m_val; }
163
+ EIGEN_ALWAYS_INLINE eigen_packet_wrapper() {}
164
+ EIGEN_ALWAYS_INLINE eigen_packet_wrapper(const T &v) : m_val(v) {}
165
+ EIGEN_ALWAYS_INLINE eigen_packet_wrapper& operator=(const T &v) {
166
+ m_val = v;
167
+ return *this;
168
+ }
169
+
170
+ T m_val;
171
+ };
172
+
173
+
174
+ /** \internal A convenience utility for determining if the type is a scalar.
175
+ * This is used to enable some generic packet implementations.
176
+ */
177
+ template<typename Packet>
178
+ struct is_scalar {
179
+ typedef typename unpacket_traits<Packet>::type Scalar;
180
+ enum {
181
+ value = internal::is_same<Packet, Scalar>::value
182
+ };
183
+ };
130
184
 
131
185
  /** \internal \returns static_cast<TgtType>(a) (coeff-wise) */
132
186
  template <typename SrcPacket, typename TgtPacket>
@@ -139,75 +193,406 @@ EIGEN_DEVICE_FUNC inline TgtPacket
139
193
  pcast(const SrcPacket& a, const SrcPacket& /*b*/) {
140
194
  return static_cast<TgtPacket>(a);
141
195
  }
142
-
143
196
  template <typename SrcPacket, typename TgtPacket>
144
197
  EIGEN_DEVICE_FUNC inline TgtPacket
145
198
  pcast(const SrcPacket& a, const SrcPacket& /*b*/, const SrcPacket& /*c*/, const SrcPacket& /*d*/) {
146
199
  return static_cast<TgtPacket>(a);
147
200
  }
201
+ template <typename SrcPacket, typename TgtPacket>
202
+ EIGEN_DEVICE_FUNC inline TgtPacket
203
+ pcast(const SrcPacket& a, const SrcPacket& /*b*/, const SrcPacket& /*c*/, const SrcPacket& /*d*/,
204
+ const SrcPacket& /*e*/, const SrcPacket& /*f*/, const SrcPacket& /*g*/, const SrcPacket& /*h*/) {
205
+ return static_cast<TgtPacket>(a);
206
+ }
207
+
208
+ /** \internal \returns reinterpret_cast<Target>(a) */
209
+ template <typename Target, typename Packet>
210
+ EIGEN_DEVICE_FUNC inline Target
211
+ preinterpret(const Packet& a); /* { return reinterpret_cast<const Target&>(a); } */
148
212
 
149
213
  /** \internal \returns a + b (coeff-wise) */
150
214
  template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
151
- padd(const Packet& a,
152
- const Packet& b) { return a+b; }
215
+ padd(const Packet& a, const Packet& b) { return a+b; }
216
+ // Avoid compiler warning for boolean algebra.
217
+ template<> EIGEN_DEVICE_FUNC inline bool
218
+ padd(const bool& a, const bool& b) { return a || b; }
153
219
 
154
220
  /** \internal \returns a - b (coeff-wise) */
155
221
  template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
156
- psub(const Packet& a,
157
- const Packet& b) { return a-b; }
222
+ psub(const Packet& a, const Packet& b) { return a-b; }
158
223
 
159
224
  /** \internal \returns -a (coeff-wise) */
160
225
  template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
161
226
  pnegate(const Packet& a) { return -a; }
162
227
 
163
- /** \internal \returns conj(a) (coeff-wise) */
228
+ template<> EIGEN_DEVICE_FUNC inline bool
229
+ pnegate(const bool& a) { return !a; }
164
230
 
231
+ /** \internal \returns conj(a) (coeff-wise) */
165
232
  template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
166
233
  pconj(const Packet& a) { return numext::conj(a); }
167
234
 
168
235
  /** \internal \returns a * b (coeff-wise) */
169
236
  template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
170
- pmul(const Packet& a,
171
- const Packet& b) { return a*b; }
237
+ pmul(const Packet& a, const Packet& b) { return a*b; }
238
+ // Avoid compiler warning for boolean algebra.
239
+ template<> EIGEN_DEVICE_FUNC inline bool
240
+ pmul(const bool& a, const bool& b) { return a && b; }
172
241
 
173
242
  /** \internal \returns a / b (coeff-wise) */
174
243
  template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
175
- pdiv(const Packet& a,
176
- const Packet& b) { return a/b; }
244
+ pdiv(const Packet& a, const Packet& b) { return a/b; }
245
+
246
+ // In the generic case, memset to all one bits.
247
+ template<typename Packet, typename EnableIf = void>
248
+ struct ptrue_impl {
249
+ static EIGEN_DEVICE_FUNC inline Packet run(const Packet& /*a*/){
250
+ Packet b;
251
+ memset(static_cast<void*>(&b), 0xff, sizeof(Packet));
252
+ return b;
253
+ }
254
+ };
177
255
 
178
- /** \internal \returns the min of \a a and \a b (coeff-wise) */
256
+ // For non-trivial scalars, set to Scalar(1) (i.e. a non-zero value).
257
+ // Although this is technically not a valid bitmask, the scalar path for pselect
258
+ // uses a comparison to zero, so this should still work in most cases. We don't
259
+ // have another option, since the scalar type requires initialization.
260
+ template<typename T>
261
+ struct ptrue_impl<T,
262
+ typename internal::enable_if<is_scalar<T>::value && NumTraits<T>::RequireInitialization>::type > {
263
+ static EIGEN_DEVICE_FUNC inline T run(const T& /*a*/){
264
+ return T(1);
265
+ }
266
+ };
267
+
268
+ /** \internal \returns one bits. */
179
269
  template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
180
- pmin(const Packet& a,
181
- const Packet& b) { return numext::mini(a, b); }
270
+ ptrue(const Packet& a) {
271
+ return ptrue_impl<Packet>::run(a);
272
+ }
273
+
274
+ // In the general case, memset to zero.
275
+ template<typename Packet, typename EnableIf = void>
276
+ struct pzero_impl {
277
+ static EIGEN_DEVICE_FUNC inline Packet run(const Packet& /*a*/) {
278
+ Packet b;
279
+ memset(static_cast<void*>(&b), 0x00, sizeof(Packet));
280
+ return b;
281
+ }
282
+ };
283
+
284
+ // For scalars, explicitly set to Scalar(0), since the underlying representation
285
+ // for zero may not consist of all-zero bits.
286
+ template<typename T>
287
+ struct pzero_impl<T,
288
+ typename internal::enable_if<is_scalar<T>::value>::type> {
289
+ static EIGEN_DEVICE_FUNC inline T run(const T& /*a*/) {
290
+ return T(0);
291
+ }
292
+ };
182
293
 
183
- /** \internal \returns the max of \a a and \a b (coeff-wise) */
294
+ /** \internal \returns packet of zeros */
184
295
  template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
185
- pmax(const Packet& a,
186
- const Packet& b) { return numext::maxi(a, b); }
296
+ pzero(const Packet& a) {
297
+ return pzero_impl<Packet>::run(a);
298
+ }
187
299
 
188
- /** \internal \returns the absolute value of \a a */
300
+ /** \internal \returns a <= b as a bit mask */
189
301
  template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
190
- pabs(const Packet& a) { using std::abs; return abs(a); }
302
+ pcmp_le(const Packet& a, const Packet& b) { return a<=b ? ptrue(a) : pzero(a); }
191
303
 
192
- /** \internal \returns the phase angle of \a a */
304
+ /** \internal \returns a < b as a bit mask */
193
305
  template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
194
- parg(const Packet& a) { using numext::arg; return arg(a); }
306
+ pcmp_lt(const Packet& a, const Packet& b) { return a<b ? ptrue(a) : pzero(a); }
307
+
308
+ /** \internal \returns a == b as a bit mask */
309
+ template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
310
+ pcmp_eq(const Packet& a, const Packet& b) { return a==b ? ptrue(a) : pzero(a); }
311
+
312
+ /** \internal \returns a < b or a==NaN or b==NaN as a bit mask */
313
+ template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
314
+ pcmp_lt_or_nan(const Packet& a, const Packet& b) { return a>=b ? pzero(a) : ptrue(a); }
315
+
316
+ template<typename T>
317
+ struct bit_and {
318
+ EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR EIGEN_ALWAYS_INLINE T operator()(const T& a, const T& b) const {
319
+ return a & b;
320
+ }
321
+ };
322
+
323
+ template<typename T>
324
+ struct bit_or {
325
+ EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR EIGEN_ALWAYS_INLINE T operator()(const T& a, const T& b) const {
326
+ return a | b;
327
+ }
328
+ };
329
+
330
+ template<typename T>
331
+ struct bit_xor {
332
+ EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR EIGEN_ALWAYS_INLINE T operator()(const T& a, const T& b) const {
333
+ return a ^ b;
334
+ }
335
+ };
336
+
337
+ template<typename T>
338
+ struct bit_not {
339
+ EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR EIGEN_ALWAYS_INLINE T operator()(const T& a) const {
340
+ return ~a;
341
+ }
342
+ };
343
+
344
+ // Use operators &, |, ^, ~.
345
+ template<typename T>
346
+ struct operator_bitwise_helper {
347
+ EIGEN_DEVICE_FUNC static inline T bitwise_and(const T& a, const T& b) { return bit_and<T>()(a, b); }
348
+ EIGEN_DEVICE_FUNC static inline T bitwise_or(const T& a, const T& b) { return bit_or<T>()(a, b); }
349
+ EIGEN_DEVICE_FUNC static inline T bitwise_xor(const T& a, const T& b) { return bit_xor<T>()(a, b); }
350
+ EIGEN_DEVICE_FUNC static inline T bitwise_not(const T& a) { return bit_not<T>()(a); }
351
+ };
352
+
353
+ // Apply binary operations byte-by-byte
354
+ template<typename T>
355
+ struct bytewise_bitwise_helper {
356
+ EIGEN_DEVICE_FUNC static inline T bitwise_and(const T& a, const T& b) {
357
+ return binary(a, b, bit_and<unsigned char>());
358
+ }
359
+ EIGEN_DEVICE_FUNC static inline T bitwise_or(const T& a, const T& b) {
360
+ return binary(a, b, bit_or<unsigned char>());
361
+ }
362
+ EIGEN_DEVICE_FUNC static inline T bitwise_xor(const T& a, const T& b) {
363
+ return binary(a, b, bit_xor<unsigned char>());
364
+ }
365
+ EIGEN_DEVICE_FUNC static inline T bitwise_not(const T& a) {
366
+ return unary(a,bit_not<unsigned char>());
367
+ }
368
+
369
+ private:
370
+ template<typename Op>
371
+ EIGEN_DEVICE_FUNC static inline T unary(const T& a, Op op) {
372
+ const unsigned char* a_ptr = reinterpret_cast<const unsigned char*>(&a);
373
+ T c;
374
+ unsigned char* c_ptr = reinterpret_cast<unsigned char*>(&c);
375
+ for (size_t i = 0; i < sizeof(T); ++i) {
376
+ *c_ptr++ = op(*a_ptr++);
377
+ }
378
+ return c;
379
+ }
380
+
381
+ template<typename Op>
382
+ EIGEN_DEVICE_FUNC static inline T binary(const T& a, const T& b, Op op) {
383
+ const unsigned char* a_ptr = reinterpret_cast<const unsigned char*>(&a);
384
+ const unsigned char* b_ptr = reinterpret_cast<const unsigned char*>(&b);
385
+ T c;
386
+ unsigned char* c_ptr = reinterpret_cast<unsigned char*>(&c);
387
+ for (size_t i = 0; i < sizeof(T); ++i) {
388
+ *c_ptr++ = op(*a_ptr++, *b_ptr++);
389
+ }
390
+ return c;
391
+ }
392
+ };
393
+
394
+ // In the general case, use byte-by-byte manipulation.
395
+ template<typename T, typename EnableIf = void>
396
+ struct bitwise_helper : public bytewise_bitwise_helper<T> {};
397
+
398
+ // For integers or non-trivial scalars, use binary operators.
399
+ template<typename T>
400
+ struct bitwise_helper<T,
401
+ typename internal::enable_if<
402
+ is_scalar<T>::value && (NumTraits<T>::IsInteger || NumTraits<T>::RequireInitialization)>::type
403
+ > : public operator_bitwise_helper<T> {};
195
404
 
196
405
  /** \internal \returns the bitwise and of \a a and \a b */
197
406
  template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
198
- pand(const Packet& a, const Packet& b) { return a & b; }
407
+ pand(const Packet& a, const Packet& b) {
408
+ return bitwise_helper<Packet>::bitwise_and(a, b);
409
+ }
199
410
 
200
411
  /** \internal \returns the bitwise or of \a a and \a b */
201
412
  template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
202
- por(const Packet& a, const Packet& b) { return a | b; }
413
+ por(const Packet& a, const Packet& b) {
414
+ return bitwise_helper<Packet>::bitwise_or(a, b);
415
+ }
203
416
 
204
417
  /** \internal \returns the bitwise xor of \a a and \a b */
205
418
  template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
206
- pxor(const Packet& a, const Packet& b) { return a ^ b; }
419
+ pxor(const Packet& a, const Packet& b) {
420
+ return bitwise_helper<Packet>::bitwise_xor(a, b);
421
+ }
422
+
423
+ /** \internal \returns the bitwise not of \a a */
424
+ template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
425
+ pnot(const Packet& a) {
426
+ return bitwise_helper<Packet>::bitwise_not(a);
427
+ }
428
+
429
+ /** \internal \returns the bitwise and of \a a and not \a b */
430
+ template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
431
+ pandnot(const Packet& a, const Packet& b) { return pand(a, pnot(b)); }
432
+
433
+ // In the general case, use bitwise select.
434
+ template<typename Packet, typename EnableIf = void>
435
+ struct pselect_impl {
436
+ static EIGEN_DEVICE_FUNC inline Packet run(const Packet& mask, const Packet& a, const Packet& b) {
437
+ return por(pand(a,mask),pandnot(b,mask));
438
+ }
439
+ };
440
+
441
+ // For scalars, use ternary select.
442
+ template<typename Packet>
443
+ struct pselect_impl<Packet,
444
+ typename internal::enable_if<is_scalar<Packet>::value>::type > {
445
+ static EIGEN_DEVICE_FUNC inline Packet run(const Packet& mask, const Packet& a, const Packet& b) {
446
+ return numext::equal_strict(mask, Packet(0)) ? b : a;
447
+ }
448
+ };
449
+
450
+ /** \internal \returns \a or \b for each field in packet according to \mask */
451
+ template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
452
+ pselect(const Packet& mask, const Packet& a, const Packet& b) {
453
+ return pselect_impl<Packet>::run(mask, a, b);
454
+ }
455
+
456
+ template<> EIGEN_DEVICE_FUNC inline bool pselect<bool>(
457
+ const bool& cond, const bool& a, const bool& b) {
458
+ return cond ? a : b;
459
+ }
460
+
461
+ /** \internal \returns the min or of \a a and \a b (coeff-wise)
462
+ If either \a a or \a b are NaN, the result is implementation defined. */
463
+ template<int NaNPropagation>
464
+ struct pminmax_impl {
465
+ template <typename Packet, typename Op>
466
+ static EIGEN_DEVICE_FUNC inline Packet run(const Packet& a, const Packet& b, Op op) {
467
+ return op(a,b);
468
+ }
469
+ };
470
+
471
+ /** \internal \returns the min or max of \a a and \a b (coeff-wise)
472
+ If either \a a or \a b are NaN, NaN is returned. */
473
+ template<>
474
+ struct pminmax_impl<PropagateNaN> {
475
+ template <typename Packet, typename Op>
476
+ static EIGEN_DEVICE_FUNC inline Packet run(const Packet& a, const Packet& b, Op op) {
477
+ Packet not_nan_mask_a = pcmp_eq(a, a);
478
+ Packet not_nan_mask_b = pcmp_eq(b, b);
479
+ return pselect(not_nan_mask_a,
480
+ pselect(not_nan_mask_b, op(a, b), b),
481
+ a);
482
+ }
483
+ };
484
+
485
+ /** \internal \returns the min or max of \a a and \a b (coeff-wise)
486
+ If both \a a and \a b are NaN, NaN is returned.
487
+ Equivalent to std::fmin(a, b). */
488
+ template<>
489
+ struct pminmax_impl<PropagateNumbers> {
490
+ template <typename Packet, typename Op>
491
+ static EIGEN_DEVICE_FUNC inline Packet run(const Packet& a, const Packet& b, Op op) {
492
+ Packet not_nan_mask_a = pcmp_eq(a, a);
493
+ Packet not_nan_mask_b = pcmp_eq(b, b);
494
+ return pselect(not_nan_mask_a,
495
+ pselect(not_nan_mask_b, op(a, b), a),
496
+ b);
497
+ }
498
+ };
499
+
500
+
501
+ #ifndef SYCL_DEVICE_ONLY
502
+ #define EIGEN_BINARY_OP_NAN_PROPAGATION(Type, Func) Func
503
+ #else
504
+ #define EIGEN_BINARY_OP_NAN_PROPAGATION(Type, Func) \
505
+ [](const Type& a, const Type& b) { \
506
+ return Func(a, b);}
507
+ #endif
508
+
509
+ /** \internal \returns the min of \a a and \a b (coeff-wise).
510
+ If \a a or \b b is NaN, the return value is implementation defined. */
511
+ template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
512
+ pmin(const Packet& a, const Packet& b) { return numext::mini(a,b); }
513
+
514
+ /** \internal \returns the min of \a a and \a b (coeff-wise).
515
+ NaNPropagation determines the NaN propagation semantics. */
516
+ template <int NaNPropagation, typename Packet>
517
+ EIGEN_DEVICE_FUNC inline Packet pmin(const Packet& a, const Packet& b) {
518
+ return pminmax_impl<NaNPropagation>::run(a, b, EIGEN_BINARY_OP_NAN_PROPAGATION(Packet, (pmin<Packet>)));
519
+ }
520
+
521
+ /** \internal \returns the max of \a a and \a b (coeff-wise)
522
+ If \a a or \b b is NaN, the return value is implementation defined. */
523
+ template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
524
+ pmax(const Packet& a, const Packet& b) { return numext::maxi(a, b); }
525
+
526
+ /** \internal \returns the max of \a a and \a b (coeff-wise).
527
+ NaNPropagation determines the NaN propagation semantics. */
528
+ template <int NaNPropagation, typename Packet>
529
+ EIGEN_DEVICE_FUNC inline Packet pmax(const Packet& a, const Packet& b) {
530
+ return pminmax_impl<NaNPropagation>::run(a, b, EIGEN_BINARY_OP_NAN_PROPAGATION(Packet,(pmax<Packet>)));
531
+ }
532
+
533
+ /** \internal \returns the absolute value of \a a */
534
+ template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
535
+ pabs(const Packet& a) { return numext::abs(a); }
536
+ template<> EIGEN_DEVICE_FUNC inline unsigned int
537
+ pabs(const unsigned int& a) { return a; }
538
+ template<> EIGEN_DEVICE_FUNC inline unsigned long
539
+ pabs(const unsigned long& a) { return a; }
540
+ template<> EIGEN_DEVICE_FUNC inline unsigned long long
541
+ pabs(const unsigned long long& a) { return a; }
542
+
543
+ /** \internal \returns the addsub value of \a a,b */
544
+ template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
545
+ paddsub(const Packet& a, const Packet& b) {
546
+ return pselect(peven_mask(a), padd(a, b), psub(a, b));
547
+ }
548
+
549
+ /** \internal \returns the phase angle of \a a */
550
+ template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
551
+ parg(const Packet& a) { using numext::arg; return arg(a); }
552
+
553
+
554
+ /** \internal \returns \a a logically shifted by N bits to the right */
555
+ template<int N> EIGEN_DEVICE_FUNC inline int
556
+ parithmetic_shift_right(const int& a) { return a >> N; }
557
+ template<int N> EIGEN_DEVICE_FUNC inline long int
558
+ parithmetic_shift_right(const long int& a) { return a >> N; }
559
+
560
+ /** \internal \returns \a a arithmetically shifted by N bits to the right */
561
+ template<int N> EIGEN_DEVICE_FUNC inline int
562
+ plogical_shift_right(const int& a) { return static_cast<int>(static_cast<unsigned int>(a) >> N); }
563
+ template<int N> EIGEN_DEVICE_FUNC inline long int
564
+ plogical_shift_right(const long int& a) { return static_cast<long>(static_cast<unsigned long>(a) >> N); }
207
565
 
208
- /** \internal \returns the bitwise andnot of \a a and \a b */
566
+ /** \internal \returns \a a shifted by N bits to the left */
567
+ template<int N> EIGEN_DEVICE_FUNC inline int
568
+ plogical_shift_left(const int& a) { return a << N; }
569
+ template<int N> EIGEN_DEVICE_FUNC inline long int
570
+ plogical_shift_left(const long int& a) { return a << N; }
571
+
572
+ /** \internal \returns the significant and exponent of the underlying floating point numbers
573
+ * See https://en.cppreference.com/w/cpp/numeric/math/frexp
574
+ */
575
+ template <typename Packet>
576
+ EIGEN_DEVICE_FUNC inline Packet pfrexp(const Packet& a, Packet& exponent) {
577
+ int exp;
578
+ EIGEN_USING_STD(frexp);
579
+ Packet result = static_cast<Packet>(frexp(a, &exp));
580
+ exponent = static_cast<Packet>(exp);
581
+ return result;
582
+ }
583
+
584
+ /** \internal \returns a * 2^((int)exponent)
585
+ * See https://en.cppreference.com/w/cpp/numeric/math/ldexp
586
+ */
587
+ template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
588
+ pldexp(const Packet &a, const Packet &exponent) {
589
+ EIGEN_USING_STD(ldexp)
590
+ return static_cast<Packet>(ldexp(a, static_cast<int>(exponent)));
591
+ }
592
+
593
+ /** \internal \returns the min of \a a and \a b (coeff-wise) */
209
594
  template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
210
- pandnot(const Packet& a, const Packet& b) { return a & (!b); }
595
+ pabsdiff(const Packet& a, const Packet& b) { return pselect(pcmp_lt(a, b), psub(b, a), psub(a, b)); }
211
596
 
212
597
  /** \internal \returns a packet version of \a *from, from must be 16 bytes aligned */
213
598
  template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
@@ -217,10 +602,22 @@ pload(const typename unpacket_traits<Packet>::type* from) { return *from; }
217
602
  template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
218
603
  ploadu(const typename unpacket_traits<Packet>::type* from) { return *from; }
219
604
 
605
+ /** \internal \returns a packet version of \a *from, (un-aligned masked load)
606
+ * There is no generic implementation. We only have implementations for specialized
607
+ * cases. Generic case should not be called.
608
+ */
609
+ template<typename Packet> EIGEN_DEVICE_FUNC inline
610
+ typename enable_if<unpacket_traits<Packet>::masked_load_available, Packet>::type
611
+ ploadu(const typename unpacket_traits<Packet>::type* from, typename unpacket_traits<Packet>::mask_t umask);
612
+
220
613
  /** \internal \returns a packet with constant coefficients \a a, e.g.: (a,a,a,a) */
221
614
  template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
222
615
  pset1(const typename unpacket_traits<Packet>::type& a) { return a; }
223
616
 
617
+ /** \internal \returns a packet with constant coefficients set from bits */
618
+ template<typename Packet,typename BitsType> EIGEN_DEVICE_FUNC inline Packet
619
+ pset1frombits(BitsType a);
620
+
224
621
  /** \internal \returns a packet with constant coefficients \a a[0], e.g.: (a[0],a[0],a[0],a[0]) */
225
622
  template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
226
623
  pload1(const typename unpacket_traits<Packet>::type *a) { return pset1<Packet>(*a); }
@@ -237,7 +634,7 @@ ploaddup(const typename unpacket_traits<Packet>::type* from) { return *from; }
237
634
  * For instance, for a packet of 8 elements, 2 scalars will be read from \a *from and
238
635
  * replicated to form: {from[0],from[0],from[0],from[0],from[1],from[1],from[1],from[1]}
239
636
  * Currently, this function is only used in matrix products.
240
- * For packet-size smaller or equal to 4, this function is equivalent to pload1
637
+ * For packet-size smaller or equal to 4, this function is equivalent to pload1
241
638
  */
242
639
  template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
243
640
  ploadquad(const typename unpacket_traits<Packet>::type* from)
@@ -281,6 +678,20 @@ inline void pbroadcast2(const typename unpacket_traits<Packet>::type *a,
281
678
  template<typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet
282
679
  plset(const typename unpacket_traits<Packet>::type& a) { return a; }
283
680
 
681
+ /** \internal \returns a packet with constant coefficients \a a, e.g.: (x, 0, x, 0),
682
+ where x is the value of all 1-bits. */
683
+ template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
684
+ peven_mask(const Packet& /*a*/) {
685
+ typedef typename unpacket_traits<Packet>::type Scalar;
686
+ const size_t n = unpacket_traits<Packet>::size;
687
+ EIGEN_ALIGN_TO_BOUNDARY(sizeof(Packet)) Scalar elements[n];
688
+ for(size_t i = 0; i < n; ++i) {
689
+ memset(elements+i, ((i & 1) == 0 ? 0xff : 0), sizeof(Scalar));
690
+ }
691
+ return ploadu<Packet>(elements);
692
+ }
693
+
694
+
284
695
  /** \internal copy the packet \a from to \a *to, \a to must be 16 bytes aligned */
285
696
  template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pstore(Scalar* to, const Packet& from)
286
697
  { (*to) = from; }
@@ -289,6 +700,15 @@ template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pstore(
289
700
  template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pstoreu(Scalar* to, const Packet& from)
290
701
  { (*to) = from; }
291
702
 
703
+ /** \internal copy the packet \a from to \a *to, (un-aligned store with a mask)
704
+ * There is no generic implementation. We only have implementations for specialized
705
+ * cases. Generic case should not be called.
706
+ */
707
+ template<typename Scalar, typename Packet>
708
+ EIGEN_DEVICE_FUNC inline
709
+ typename enable_if<unpacket_traits<Packet>::masked_store_available, void>::type
710
+ pstoreu(Scalar* to, const Packet& from, typename unpacket_traits<Packet>::mask_t umask);
711
+
292
712
  template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline Packet pgather(const Scalar* from, Index /*stride*/)
293
713
  { return ploadu<Packet>(from); }
294
714
 
@@ -298,8 +718,10 @@ template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pstoreu
298
718
  /** \internal tries to do cache prefetching of \a addr */
299
719
  template<typename Scalar> EIGEN_DEVICE_FUNC inline void prefetch(const Scalar* addr)
300
720
  {
301
- #ifdef __CUDA_ARCH__
302
- #if defined(__LP64__)
721
+ #if defined(EIGEN_HIP_DEVICE_COMPILE)
722
+ // do nothing
723
+ #elif defined(EIGEN_CUDA_ARCH)
724
+ #if defined(__LP64__) || EIGEN_OS_WIN64
303
725
  // 64-bit pointer operand constraint for inlined asm
304
726
  asm(" prefetch.L1 [ %1 ];" : "=l"(addr) : "l"(addr));
305
727
  #else
@@ -311,39 +733,6 @@ template<typename Scalar> EIGEN_DEVICE_FUNC inline void prefetch(const Scalar* a
311
733
  #endif
312
734
  }
313
735
 
314
- /** \internal \returns the first element of a packet */
315
- template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type pfirst(const Packet& a)
316
- { return a; }
317
-
318
- /** \internal \returns a packet where the element i contains the sum of the packet of \a vec[i] */
319
- template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
320
- preduxp(const Packet* vecs) { return vecs[0]; }
321
-
322
- /** \internal \returns the sum of the elements of \a a*/
323
- template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux(const Packet& a)
324
- { return a; }
325
-
326
- /** \internal \returns the sum of the elements of \a a by block of 4 elements.
327
- * For a packet {a0, a1, a2, a3, a4, a5, a6, a7}, it returns a half packet {a0+a4, a1+a5, a2+a6, a3+a7}
328
- * For packet-size smaller or equal to 4, this boils down to a noop.
329
- */
330
- template<typename Packet> EIGEN_DEVICE_FUNC inline
331
- typename conditional<(unpacket_traits<Packet>::size%8)==0,typename unpacket_traits<Packet>::half,Packet>::type
332
- predux_downto4(const Packet& a)
333
- { return a; }
334
-
335
- /** \internal \returns the product of the elements of \a a*/
336
- template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_mul(const Packet& a)
337
- { return a; }
338
-
339
- /** \internal \returns the min of the elements of \a a*/
340
- template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_min(const Packet& a)
341
- { return a; }
342
-
343
- /** \internal \returns the max of the elements of \a a*/
344
- template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_max(const Packet& a)
345
- { return a; }
346
-
347
736
  /** \internal \returns the reversed elements of \a a*/
348
737
  template<typename Packet> EIGEN_DEVICE_FUNC inline Packet preverse(const Packet& a)
349
738
  { return a; }
@@ -351,10 +740,7 @@ template<typename Packet> EIGEN_DEVICE_FUNC inline Packet preverse(const Packet&
351
740
  /** \internal \returns \a a with real and imaginary part flipped (for complex type only) */
352
741
  template<typename Packet> EIGEN_DEVICE_FUNC inline Packet pcplxflip(const Packet& a)
353
742
  {
354
- // FIXME: uncomment the following in case we drop the internal imag and real functions.
355
- // using std::imag;
356
- // using std::real;
357
- return Packet(imag(a),real(a));
743
+ return Packet(numext::imag(a),numext::real(a));
358
744
  }
359
745
 
360
746
  /**************************
@@ -363,47 +749,51 @@ template<typename Packet> EIGEN_DEVICE_FUNC inline Packet pcplxflip(const Packet
363
749
 
364
750
  /** \internal \returns the sine of \a a (coeff-wise) */
365
751
  template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
366
- Packet psin(const Packet& a) { using std::sin; return sin(a); }
752
+ Packet psin(const Packet& a) { EIGEN_USING_STD(sin); return sin(a); }
367
753
 
368
754
  /** \internal \returns the cosine of \a a (coeff-wise) */
369
755
  template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
370
- Packet pcos(const Packet& a) { using std::cos; return cos(a); }
756
+ Packet pcos(const Packet& a) { EIGEN_USING_STD(cos); return cos(a); }
371
757
 
372
758
  /** \internal \returns the tan of \a a (coeff-wise) */
373
759
  template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
374
- Packet ptan(const Packet& a) { using std::tan; return tan(a); }
760
+ Packet ptan(const Packet& a) { EIGEN_USING_STD(tan); return tan(a); }
375
761
 
376
762
  /** \internal \returns the arc sine of \a a (coeff-wise) */
377
763
  template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
378
- Packet pasin(const Packet& a) { using std::asin; return asin(a); }
764
+ Packet pasin(const Packet& a) { EIGEN_USING_STD(asin); return asin(a); }
379
765
 
380
766
  /** \internal \returns the arc cosine of \a a (coeff-wise) */
381
767
  template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
382
- Packet pacos(const Packet& a) { using std::acos; return acos(a); }
768
+ Packet pacos(const Packet& a) { EIGEN_USING_STD(acos); return acos(a); }
383
769
 
384
770
  /** \internal \returns the arc tangent of \a a (coeff-wise) */
385
771
  template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
386
- Packet patan(const Packet& a) { using std::atan; return atan(a); }
772
+ Packet patan(const Packet& a) { EIGEN_USING_STD(atan); return atan(a); }
387
773
 
388
774
  /** \internal \returns the hyperbolic sine of \a a (coeff-wise) */
389
775
  template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
390
- Packet psinh(const Packet& a) { using std::sinh; return sinh(a); }
776
+ Packet psinh(const Packet& a) { EIGEN_USING_STD(sinh); return sinh(a); }
391
777
 
392
778
  /** \internal \returns the hyperbolic cosine of \a a (coeff-wise) */
393
779
  template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
394
- Packet pcosh(const Packet& a) { using std::cosh; return cosh(a); }
780
+ Packet pcosh(const Packet& a) { EIGEN_USING_STD(cosh); return cosh(a); }
395
781
 
396
782
  /** \internal \returns the hyperbolic tan of \a a (coeff-wise) */
397
783
  template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
398
- Packet ptanh(const Packet& a) { using std::tanh; return tanh(a); }
784
+ Packet ptanh(const Packet& a) { EIGEN_USING_STD(tanh); return tanh(a); }
399
785
 
400
786
  /** \internal \returns the exp of \a a (coeff-wise) */
401
787
  template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
402
- Packet pexp(const Packet& a) { using std::exp; return exp(a); }
788
+ Packet pexp(const Packet& a) { EIGEN_USING_STD(exp); return exp(a); }
789
+
790
+ /** \internal \returns the expm1 of \a a (coeff-wise) */
791
+ template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
792
+ Packet pexpm1(const Packet& a) { return numext::expm1(a); }
403
793
 
404
794
  /** \internal \returns the log of \a a (coeff-wise) */
405
795
  template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
406
- Packet plog(const Packet& a) { using std::log; return log(a); }
796
+ Packet plog(const Packet& a) { EIGEN_USING_STD(log); return log(a); }
407
797
 
408
798
  /** \internal \returns the log1p of \a a (coeff-wise) */
409
799
  template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
@@ -411,16 +801,24 @@ Packet plog1p(const Packet& a) { return numext::log1p(a); }
411
801
 
412
802
  /** \internal \returns the log10 of \a a (coeff-wise) */
413
803
  template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
414
- Packet plog10(const Packet& a) { using std::log10; return log10(a); }
804
+ Packet plog10(const Packet& a) { EIGEN_USING_STD(log10); return log10(a); }
805
+
806
+ /** \internal \returns the log10 of \a a (coeff-wise) */
807
+ template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
808
+ Packet plog2(const Packet& a) {
809
+ typedef typename internal::unpacket_traits<Packet>::type Scalar;
810
+ return pmul(pset1<Packet>(Scalar(EIGEN_LOG2E)), plog(a));
811
+ }
415
812
 
416
813
  /** \internal \returns the square-root of \a a (coeff-wise) */
417
814
  template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
418
- Packet psqrt(const Packet& a) { using std::sqrt; return sqrt(a); }
815
+ Packet psqrt(const Packet& a) { return numext::sqrt(a); }
419
816
 
420
817
  /** \internal \returns the reciprocal square-root of \a a (coeff-wise) */
421
818
  template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
422
819
  Packet prsqrt(const Packet& a) {
423
- return pdiv(pset1<Packet>(1), psqrt(a));
820
+ typedef typename internal::unpacket_traits<Packet>::type Scalar;
821
+ return pdiv(pset1<Packet>(Scalar(1)), psqrt(a));
424
822
  }
425
823
 
426
824
  /** \internal \returns the rounded value of \a a (coeff-wise) */
@@ -431,15 +829,121 @@ Packet pround(const Packet& a) { using numext::round; return round(a); }
431
829
  template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
432
830
  Packet pfloor(const Packet& a) { using numext::floor; return floor(a); }
433
831
 
832
+ /** \internal \returns the rounded value of \a a (coeff-wise) with current
833
+ * rounding mode */
834
+ template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
835
+ Packet print(const Packet& a) { using numext::rint; return rint(a); }
836
+
434
837
  /** \internal \returns the ceil of \a a (coeff-wise) */
435
838
  template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
436
839
  Packet pceil(const Packet& a) { using numext::ceil; return ceil(a); }
437
840
 
841
+ /** \internal \returns the first element of a packet */
842
+ template<typename Packet>
843
+ EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type
844
+ pfirst(const Packet& a)
845
+ { return a; }
846
+
847
+ /** \internal \returns the sum of the elements of upper and lower half of \a a if \a a is larger than 4.
848
+ * For a packet {a0, a1, a2, a3, a4, a5, a6, a7}, it returns a half packet {a0+a4, a1+a5, a2+a6, a3+a7}
849
+ * For packet-size smaller or equal to 4, this boils down to a noop.
850
+ */
851
+ template<typename Packet>
852
+ EIGEN_DEVICE_FUNC inline typename conditional<(unpacket_traits<Packet>::size%8)==0,typename unpacket_traits<Packet>::half,Packet>::type
853
+ predux_half_dowto4(const Packet& a)
854
+ { return a; }
855
+
856
+ // Slow generic implementation of Packet reduction.
857
+ template <typename Packet, typename Op>
858
+ EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type
859
+ predux_helper(const Packet& a, Op op) {
860
+ typedef typename unpacket_traits<Packet>::type Scalar;
861
+ const size_t n = unpacket_traits<Packet>::size;
862
+ EIGEN_ALIGN_TO_BOUNDARY(sizeof(Packet)) Scalar elements[n];
863
+ pstoreu<Scalar>(elements, a);
864
+ for(size_t k = n / 2; k > 0; k /= 2) {
865
+ for(size_t i = 0; i < k; ++i) {
866
+ elements[i] = op(elements[i], elements[i + k]);
867
+ }
868
+ }
869
+ return elements[0];
870
+ }
871
+
872
+ /** \internal \returns the sum of the elements of \a a*/
873
+ template<typename Packet>
874
+ EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type
875
+ predux(const Packet& a)
876
+ {
877
+ return a;
878
+ }
879
+
880
+ /** \internal \returns the product of the elements of \a a */
881
+ template <typename Packet>
882
+ EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_mul(
883
+ const Packet& a) {
884
+ typedef typename unpacket_traits<Packet>::type Scalar;
885
+ return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmul<Scalar>)));
886
+ }
887
+
888
+ /** \internal \returns the min of the elements of \a a */
889
+ template <typename Packet>
890
+ EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_min(
891
+ const Packet &a) {
892
+ typedef typename unpacket_traits<Packet>::type Scalar;
893
+ return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmin<PropagateFast, Scalar>)));
894
+ }
895
+
896
+ template <int NaNPropagation, typename Packet>
897
+ EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_min(
898
+ const Packet& a) {
899
+ typedef typename unpacket_traits<Packet>::type Scalar;
900
+ return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmin<NaNPropagation, Scalar>)));
901
+ }
902
+
903
+ /** \internal \returns the min of the elements of \a a */
904
+ template <typename Packet>
905
+ EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_max(
906
+ const Packet &a) {
907
+ typedef typename unpacket_traits<Packet>::type Scalar;
908
+ return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmax<PropagateFast, Scalar>)));
909
+ }
910
+
911
+ template <int NaNPropagation, typename Packet>
912
+ EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_max(
913
+ const Packet& a) {
914
+ typedef typename unpacket_traits<Packet>::type Scalar;
915
+ return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmax<NaNPropagation, Scalar>)));
916
+ }
917
+
918
+ #undef EIGEN_BINARY_OP_NAN_PROPAGATION
919
+
920
+ /** \internal \returns true if all coeffs of \a a means "true"
921
+ * It is supposed to be called on values returned by pcmp_*.
922
+ */
923
+ // not needed yet
924
+ // template<typename Packet> EIGEN_DEVICE_FUNC inline bool predux_all(const Packet& a)
925
+ // { return bool(a); }
926
+
927
+ /** \internal \returns true if any coeffs of \a a means "true"
928
+ * It is supposed to be called on values returned by pcmp_*.
929
+ */
930
+ template<typename Packet> EIGEN_DEVICE_FUNC inline bool predux_any(const Packet& a)
931
+ {
932
+ // Dirty but generic implementation where "true" is assumed to be non 0 and all the sames.
933
+ // It is expected that "true" is either:
934
+ // - Scalar(1)
935
+ // - bits full of ones (NaN for floats),
936
+ // - or first bit equals to 1 (1 for ints, smallest denormal for floats).
937
+ // For all these cases, taking the sum is just fine, and this boils down to a no-op for scalars.
938
+ typedef typename unpacket_traits<Packet>::type Scalar;
939
+ return numext::not_equal_strict(predux(a), Scalar(0));
940
+ }
941
+
438
942
  /***************************************************************************
439
943
  * The following functions might not have to be overwritten for vectorized types
440
944
  ***************************************************************************/
441
945
 
442
- /** \internal copy a packet with constant coeficient \a a (e.g., [a,a,a,a]) to \a *to. \a to must be 16 bytes aligned */
946
+ /** \internal copy a packet with constant coefficient \a a (e.g., [a,a,a,a]) to \a *to. \a to must be 16 bytes aligned */
443
947
  // NOTE: this function must really be templated on the packet type (think about different packet types for the same scalar type)
444
948
  template<typename Packet>
445
949
  inline void pstore1(typename unpacket_traits<Packet>::type* to, const typename unpacket_traits<Packet>::type& a)
@@ -487,47 +991,18 @@ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet ploadt_ro(const typename unpacket_t
487
991
  return ploadt<Packet, LoadMode>(from);
488
992
  }
489
993
 
490
- /** \internal default implementation of palign() allowing partial specialization */
491
- template<int Offset,typename PacketType>
492
- struct palign_impl
493
- {
494
- // by default data are aligned, so there is nothing to be done :)
495
- static inline void run(PacketType&, const PacketType&) {}
496
- };
497
-
498
- /** \internal update \a first using the concatenation of the packet_size minus \a Offset last elements
499
- * of \a first and \a Offset first elements of \a second.
500
- *
501
- * This function is currently only used to optimize matrix-vector products on unligned matrices.
502
- * It takes 2 packets that represent a contiguous memory array, and returns a packet starting
503
- * at the position \a Offset. For instance, for packets of 4 elements, we have:
504
- * Input:
505
- * - first = {f0,f1,f2,f3}
506
- * - second = {s0,s1,s2,s3}
507
- * Output:
508
- * - if Offset==0 then {f0,f1,f2,f3}
509
- * - if Offset==1 then {f1,f2,f3,s0}
510
- * - if Offset==2 then {f2,f3,s0,s1}
511
- * - if Offset==3 then {f3,s0,s1,s3}
512
- */
513
- template<int Offset,typename PacketType>
514
- inline void palign(PacketType& first, const PacketType& second)
515
- {
516
- palign_impl<Offset,PacketType>::run(first,second);
517
- }
518
-
519
994
  /***************************************************************************
520
995
  * Fast complex products (GCC generates a function call which is very slow)
521
996
  ***************************************************************************/
522
997
 
523
998
  // Eigen+CUDA does not support complexes.
524
- #ifndef __CUDACC__
999
+ #if !defined(EIGEN_GPUCC)
525
1000
 
526
1001
  template<> inline std::complex<float> pmul(const std::complex<float>& a, const std::complex<float>& b)
527
- { return std::complex<float>(real(a)*real(b) - imag(a)*imag(b), imag(a)*real(b) + real(a)*imag(b)); }
1002
+ { return std::complex<float>(a.real()*b.real() - a.imag()*b.imag(), a.imag()*b.real() + a.real()*b.imag()); }
528
1003
 
529
1004
  template<> inline std::complex<double> pmul(const std::complex<double>& a, const std::complex<double>& b)
530
- { return std::complex<double>(real(a)*real(b) - imag(a)*imag(b), imag(a)*real(b) + real(a)*imag(b)); }
1005
+ { return std::complex<double>(a.real()*b.real() - a.imag()*b.imag(), a.imag()*b.real() + a.real()*b.imag()); }
531
1006
 
532
1007
  #endif
533
1008
 
@@ -558,34 +1033,6 @@ pblend(const Selector<unpacket_traits<Packet>::size>& ifPacket, const Packet& th
558
1033
  return ifPacket.select[0] ? thenPacket : elsePacket;
559
1034
  }
560
1035
 
561
- /** \internal \returns \a a with the first coefficient replaced by the scalar b */
562
- template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
563
- pinsertfirst(const Packet& a, typename unpacket_traits<Packet>::type b)
564
- {
565
- // Default implementation based on pblend.
566
- // It must be specialized for higher performance.
567
- Selector<unpacket_traits<Packet>::size> mask;
568
- mask.select[0] = true;
569
- // This for loop should be optimized away by the compiler.
570
- for(Index i=1; i<unpacket_traits<Packet>::size; ++i)
571
- mask.select[i] = false;
572
- return pblend(mask, pset1<Packet>(b), a);
573
- }
574
-
575
- /** \internal \returns \a a with the last coefficient replaced by the scalar b */
576
- template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
577
- pinsertlast(const Packet& a, typename unpacket_traits<Packet>::type b)
578
- {
579
- // Default implementation based on pblend.
580
- // It must be specialized for higher performance.
581
- Selector<unpacket_traits<Packet>::size> mask;
582
- // This for loop should be optimized away by the compiler.
583
- for(Index i=0; i<unpacket_traits<Packet>::size-1; ++i)
584
- mask.select[i] = false;
585
- mask.select[unpacket_traits<Packet>::size-1] = true;
586
- return pblend(mask, pset1<Packet>(b), a);
587
- }
588
-
589
1036
  } // end namespace internal
590
1037
 
591
1038
  } // end namespace Eigen