tomoto 0.2.2 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (369) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +10 -0
  3. data/README.md +8 -10
  4. data/ext/tomoto/ct.cpp +11 -11
  5. data/ext/tomoto/dmr.cpp +14 -13
  6. data/ext/tomoto/dt.cpp +14 -14
  7. data/ext/tomoto/extconf.rb +7 -5
  8. data/ext/tomoto/gdmr.cpp +7 -7
  9. data/ext/tomoto/hdp.cpp +9 -9
  10. data/ext/tomoto/hlda.cpp +13 -13
  11. data/ext/tomoto/hpa.cpp +5 -5
  12. data/ext/tomoto/lda.cpp +42 -39
  13. data/ext/tomoto/llda.cpp +6 -6
  14. data/ext/tomoto/mglda.cpp +15 -15
  15. data/ext/tomoto/pa.cpp +6 -6
  16. data/ext/tomoto/plda.cpp +6 -6
  17. data/ext/tomoto/slda.cpp +8 -8
  18. data/ext/tomoto/{ext.cpp → tomoto.cpp} +8 -8
  19. data/ext/tomoto/utils.h +16 -70
  20. data/lib/tomoto/version.rb +1 -1
  21. data/lib/tomoto.rb +5 -1
  22. data/vendor/EigenRand/EigenRand/Core.h +10 -10
  23. data/vendor/EigenRand/EigenRand/Dists/Basic.h +208 -9
  24. data/vendor/EigenRand/EigenRand/Dists/Discrete.h +52 -31
  25. data/vendor/EigenRand/EigenRand/Dists/GammaPoisson.h +9 -8
  26. data/vendor/EigenRand/EigenRand/Dists/NormalExp.h +28 -21
  27. data/vendor/EigenRand/EigenRand/EigenRand +11 -6
  28. data/vendor/EigenRand/EigenRand/Macro.h +13 -7
  29. data/vendor/EigenRand/EigenRand/MorePacketMath.h +348 -740
  30. data/vendor/EigenRand/EigenRand/MvDists/Multinomial.h +5 -3
  31. data/vendor/EigenRand/EigenRand/MvDists/MvNormal.h +9 -3
  32. data/vendor/EigenRand/EigenRand/PacketFilter.h +11 -253
  33. data/vendor/EigenRand/EigenRand/PacketRandomEngine.h +21 -47
  34. data/vendor/EigenRand/EigenRand/RandUtils.h +50 -344
  35. data/vendor/EigenRand/EigenRand/arch/AVX/MorePacketMath.h +619 -0
  36. data/vendor/EigenRand/EigenRand/arch/AVX/PacketFilter.h +149 -0
  37. data/vendor/EigenRand/EigenRand/arch/AVX/RandUtils.h +228 -0
  38. data/vendor/EigenRand/EigenRand/arch/NEON/MorePacketMath.h +473 -0
  39. data/vendor/EigenRand/EigenRand/arch/NEON/PacketFilter.h +142 -0
  40. data/vendor/EigenRand/EigenRand/arch/NEON/RandUtils.h +126 -0
  41. data/vendor/EigenRand/EigenRand/arch/SSE/MorePacketMath.h +501 -0
  42. data/vendor/EigenRand/EigenRand/arch/SSE/PacketFilter.h +133 -0
  43. data/vendor/EigenRand/EigenRand/arch/SSE/RandUtils.h +120 -0
  44. data/vendor/EigenRand/EigenRand/doc.h +24 -12
  45. data/vendor/EigenRand/README.md +57 -4
  46. data/vendor/eigen/COPYING.APACHE +203 -0
  47. data/vendor/eigen/COPYING.BSD +1 -1
  48. data/vendor/eigen/COPYING.MINPACK +51 -52
  49. data/vendor/eigen/Eigen/Cholesky +0 -1
  50. data/vendor/eigen/Eigen/Core +112 -265
  51. data/vendor/eigen/Eigen/Eigenvalues +2 -3
  52. data/vendor/eigen/Eigen/Geometry +5 -8
  53. data/vendor/eigen/Eigen/Householder +0 -1
  54. data/vendor/eigen/Eigen/Jacobi +0 -1
  55. data/vendor/eigen/Eigen/KLUSupport +41 -0
  56. data/vendor/eigen/Eigen/LU +2 -5
  57. data/vendor/eigen/Eigen/OrderingMethods +0 -3
  58. data/vendor/eigen/Eigen/PaStiXSupport +1 -0
  59. data/vendor/eigen/Eigen/PardisoSupport +0 -0
  60. data/vendor/eigen/Eigen/QR +2 -3
  61. data/vendor/eigen/Eigen/QtAlignedMalloc +0 -1
  62. data/vendor/eigen/Eigen/SVD +0 -1
  63. data/vendor/eigen/Eigen/Sparse +0 -2
  64. data/vendor/eigen/Eigen/SparseCholesky +0 -8
  65. data/vendor/eigen/Eigen/SparseLU +4 -0
  66. data/vendor/eigen/Eigen/SparseQR +0 -1
  67. data/vendor/eigen/Eigen/src/Cholesky/LDLT.h +42 -27
  68. data/vendor/eigen/Eigen/src/Cholesky/LLT.h +39 -23
  69. data/vendor/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +90 -47
  70. data/vendor/eigen/Eigen/src/Core/ArithmeticSequence.h +413 -0
  71. data/vendor/eigen/Eigen/src/Core/Array.h +99 -11
  72. data/vendor/eigen/Eigen/src/Core/ArrayBase.h +3 -3
  73. data/vendor/eigen/Eigen/src/Core/ArrayWrapper.h +21 -21
  74. data/vendor/eigen/Eigen/src/Core/Assign.h +1 -1
  75. data/vendor/eigen/Eigen/src/Core/AssignEvaluator.h +125 -50
  76. data/vendor/eigen/Eigen/src/Core/Assign_MKL.h +10 -10
  77. data/vendor/eigen/Eigen/src/Core/BandMatrix.h +16 -16
  78. data/vendor/eigen/Eigen/src/Core/Block.h +56 -60
  79. data/vendor/eigen/Eigen/src/Core/BooleanRedux.h +29 -31
  80. data/vendor/eigen/Eigen/src/Core/CommaInitializer.h +7 -3
  81. data/vendor/eigen/Eigen/src/Core/CoreEvaluators.h +325 -272
  82. data/vendor/eigen/Eigen/src/Core/CoreIterators.h +5 -0
  83. data/vendor/eigen/Eigen/src/Core/CwiseBinaryOp.h +21 -22
  84. data/vendor/eigen/Eigen/src/Core/CwiseNullaryOp.h +153 -18
  85. data/vendor/eigen/Eigen/src/Core/CwiseUnaryOp.h +6 -6
  86. data/vendor/eigen/Eigen/src/Core/CwiseUnaryView.h +14 -10
  87. data/vendor/eigen/Eigen/src/Core/DenseBase.h +132 -42
  88. data/vendor/eigen/Eigen/src/Core/DenseCoeffsBase.h +25 -21
  89. data/vendor/eigen/Eigen/src/Core/DenseStorage.h +153 -71
  90. data/vendor/eigen/Eigen/src/Core/Diagonal.h +21 -23
  91. data/vendor/eigen/Eigen/src/Core/DiagonalMatrix.h +50 -2
  92. data/vendor/eigen/Eigen/src/Core/DiagonalProduct.h +1 -1
  93. data/vendor/eigen/Eigen/src/Core/Dot.h +10 -10
  94. data/vendor/eigen/Eigen/src/Core/EigenBase.h +10 -9
  95. data/vendor/eigen/Eigen/src/Core/ForceAlignedAccess.h +8 -4
  96. data/vendor/eigen/Eigen/src/Core/Fuzzy.h +3 -3
  97. data/vendor/eigen/Eigen/src/Core/GeneralProduct.h +20 -10
  98. data/vendor/eigen/Eigen/src/Core/GenericPacketMath.h +599 -152
  99. data/vendor/eigen/Eigen/src/Core/GlobalFunctions.h +40 -33
  100. data/vendor/eigen/Eigen/src/Core/IO.h +40 -7
  101. data/vendor/eigen/Eigen/src/Core/IndexedView.h +237 -0
  102. data/vendor/eigen/Eigen/src/Core/Inverse.h +9 -10
  103. data/vendor/eigen/Eigen/src/Core/Map.h +7 -7
  104. data/vendor/eigen/Eigen/src/Core/MapBase.h +10 -3
  105. data/vendor/eigen/Eigen/src/Core/MathFunctions.h +767 -125
  106. data/vendor/eigen/Eigen/src/Core/MathFunctionsImpl.h +118 -19
  107. data/vendor/eigen/Eigen/src/Core/Matrix.h +131 -25
  108. data/vendor/eigen/Eigen/src/Core/MatrixBase.h +21 -3
  109. data/vendor/eigen/Eigen/src/Core/NestByValue.h +25 -50
  110. data/vendor/eigen/Eigen/src/Core/NoAlias.h +4 -3
  111. data/vendor/eigen/Eigen/src/Core/NumTraits.h +107 -20
  112. data/vendor/eigen/Eigen/src/Core/PartialReduxEvaluator.h +232 -0
  113. data/vendor/eigen/Eigen/src/Core/PermutationMatrix.h +3 -31
  114. data/vendor/eigen/Eigen/src/Core/PlainObjectBase.h +152 -59
  115. data/vendor/eigen/Eigen/src/Core/Product.h +30 -25
  116. data/vendor/eigen/Eigen/src/Core/ProductEvaluators.h +192 -125
  117. data/vendor/eigen/Eigen/src/Core/Random.h +37 -1
  118. data/vendor/eigen/Eigen/src/Core/Redux.h +180 -170
  119. data/vendor/eigen/Eigen/src/Core/Ref.h +121 -23
  120. data/vendor/eigen/Eigen/src/Core/Replicate.h +8 -8
  121. data/vendor/eigen/Eigen/src/Core/Reshaped.h +454 -0
  122. data/vendor/eigen/Eigen/src/Core/ReturnByValue.h +7 -5
  123. data/vendor/eigen/Eigen/src/Core/Reverse.h +18 -12
  124. data/vendor/eigen/Eigen/src/Core/Select.h +8 -6
  125. data/vendor/eigen/Eigen/src/Core/SelfAdjointView.h +33 -20
  126. data/vendor/eigen/Eigen/src/Core/Solve.h +14 -14
  127. data/vendor/eigen/Eigen/src/Core/SolveTriangular.h +16 -16
  128. data/vendor/eigen/Eigen/src/Core/SolverBase.h +41 -3
  129. data/vendor/eigen/Eigen/src/Core/StableNorm.h +100 -70
  130. data/vendor/eigen/Eigen/src/Core/StlIterators.h +463 -0
  131. data/vendor/eigen/Eigen/src/Core/Stride.h +9 -4
  132. data/vendor/eigen/Eigen/src/Core/Swap.h +5 -4
  133. data/vendor/eigen/Eigen/src/Core/Transpose.h +88 -27
  134. data/vendor/eigen/Eigen/src/Core/Transpositions.h +26 -47
  135. data/vendor/eigen/Eigen/src/Core/TriangularMatrix.h +93 -75
  136. data/vendor/eigen/Eigen/src/Core/VectorBlock.h +5 -5
  137. data/vendor/eigen/Eigen/src/Core/VectorwiseOp.h +159 -70
  138. data/vendor/eigen/Eigen/src/Core/Visitor.h +137 -29
  139. data/vendor/eigen/Eigen/src/Core/arch/AVX/Complex.h +50 -129
  140. data/vendor/eigen/Eigen/src/Core/arch/AVX/MathFunctions.h +126 -337
  141. data/vendor/eigen/Eigen/src/Core/arch/AVX/PacketMath.h +1092 -155
  142. data/vendor/eigen/Eigen/src/Core/arch/AVX/TypeCasting.h +65 -1
  143. data/vendor/eigen/Eigen/src/Core/arch/AVX512/Complex.h +422 -0
  144. data/vendor/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h +207 -236
  145. data/vendor/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h +1482 -495
  146. data/vendor/eigen/Eigen/src/Core/arch/AVX512/TypeCasting.h +89 -0
  147. data/vendor/eigen/Eigen/src/Core/arch/AltiVec/Complex.h +152 -165
  148. data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MathFunctions.h +19 -251
  149. data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MatrixProduct.h +2937 -0
  150. data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h +221 -0
  151. data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h +629 -0
  152. data/vendor/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h +2042 -392
  153. data/vendor/eigen/Eigen/src/Core/arch/CUDA/Complex.h +235 -80
  154. data/vendor/eigen/Eigen/src/Core/arch/Default/BFloat16.h +700 -0
  155. data/vendor/eigen/Eigen/src/Core/arch/Default/ConjHelper.h +102 -14
  156. data/vendor/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +1649 -0
  157. data/vendor/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +110 -0
  158. data/vendor/eigen/Eigen/src/Core/arch/Default/Half.h +942 -0
  159. data/vendor/eigen/Eigen/src/Core/arch/Default/Settings.h +1 -1
  160. data/vendor/eigen/Eigen/src/Core/arch/Default/TypeCasting.h +120 -0
  161. data/vendor/eigen/Eigen/src/Core/arch/{CUDA → GPU}/MathFunctions.h +16 -4
  162. data/vendor/eigen/Eigen/src/Core/arch/GPU/PacketMath.h +1685 -0
  163. data/vendor/eigen/Eigen/src/Core/arch/GPU/TypeCasting.h +80 -0
  164. data/vendor/eigen/Eigen/src/Core/arch/HIP/hcc/math_constants.h +23 -0
  165. data/vendor/eigen/Eigen/src/Core/arch/MSA/Complex.h +648 -0
  166. data/vendor/eigen/Eigen/src/Core/arch/MSA/MathFunctions.h +387 -0
  167. data/vendor/eigen/Eigen/src/Core/arch/MSA/PacketMath.h +1233 -0
  168. data/vendor/eigen/Eigen/src/Core/arch/NEON/Complex.h +313 -219
  169. data/vendor/eigen/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h +183 -0
  170. data/vendor/eigen/Eigen/src/Core/arch/NEON/MathFunctions.h +54 -70
  171. data/vendor/eigen/Eigen/src/Core/arch/NEON/PacketMath.h +4376 -549
  172. data/vendor/eigen/Eigen/src/Core/arch/NEON/TypeCasting.h +1419 -0
  173. data/vendor/eigen/Eigen/src/Core/arch/SSE/Complex.h +59 -179
  174. data/vendor/eigen/Eigen/src/Core/arch/SSE/MathFunctions.h +65 -428
  175. data/vendor/eigen/Eigen/src/Core/arch/SSE/PacketMath.h +893 -283
  176. data/vendor/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h +65 -0
  177. data/vendor/eigen/Eigen/src/Core/arch/SVE/MathFunctions.h +44 -0
  178. data/vendor/eigen/Eigen/src/Core/arch/SVE/PacketMath.h +752 -0
  179. data/vendor/eigen/Eigen/src/Core/arch/SVE/TypeCasting.h +49 -0
  180. data/vendor/eigen/Eigen/src/Core/arch/SYCL/InteropHeaders.h +232 -0
  181. data/vendor/eigen/Eigen/src/Core/arch/SYCL/MathFunctions.h +301 -0
  182. data/vendor/eigen/Eigen/src/Core/arch/SYCL/PacketMath.h +670 -0
  183. data/vendor/eigen/Eigen/src/Core/arch/SYCL/SyclMemoryModel.h +694 -0
  184. data/vendor/eigen/Eigen/src/Core/arch/SYCL/TypeCasting.h +85 -0
  185. data/vendor/eigen/Eigen/src/Core/arch/ZVector/Complex.h +212 -183
  186. data/vendor/eigen/Eigen/src/Core/arch/ZVector/MathFunctions.h +101 -5
  187. data/vendor/eigen/Eigen/src/Core/arch/ZVector/PacketMath.h +510 -395
  188. data/vendor/eigen/Eigen/src/Core/functors/AssignmentFunctors.h +11 -2
  189. data/vendor/eigen/Eigen/src/Core/functors/BinaryFunctors.h +112 -46
  190. data/vendor/eigen/Eigen/src/Core/functors/NullaryFunctors.h +31 -30
  191. data/vendor/eigen/Eigen/src/Core/functors/StlFunctors.h +32 -2
  192. data/vendor/eigen/Eigen/src/Core/functors/UnaryFunctors.h +355 -16
  193. data/vendor/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h +1075 -586
  194. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h +49 -24
  195. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +41 -35
  196. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +6 -6
  197. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h +4 -2
  198. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixVector.h +382 -483
  199. data/vendor/eigen/Eigen/src/Core/products/Parallelizer.h +22 -5
  200. data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +53 -30
  201. data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h +16 -8
  202. data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h +8 -6
  203. data/vendor/eigen/Eigen/src/Core/products/SelfadjointProduct.h +4 -4
  204. data/vendor/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h +5 -4
  205. data/vendor/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h +33 -27
  206. data/vendor/eigen/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h +14 -12
  207. data/vendor/eigen/Eigen/src/Core/products/TriangularSolverMatrix.h +36 -34
  208. data/vendor/eigen/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h +8 -4
  209. data/vendor/eigen/Eigen/src/Core/products/TriangularSolverVector.h +13 -10
  210. data/vendor/eigen/Eigen/src/Core/util/BlasUtil.h +304 -119
  211. data/vendor/eigen/Eigen/src/Core/util/ConfigureVectorization.h +512 -0
  212. data/vendor/eigen/Eigen/src/Core/util/Constants.h +25 -9
  213. data/vendor/eigen/Eigen/src/Core/util/DisableStupidWarnings.h +26 -3
  214. data/vendor/eigen/Eigen/src/Core/util/ForwardDeclarations.h +29 -9
  215. data/vendor/eigen/Eigen/src/Core/util/IndexedViewHelper.h +186 -0
  216. data/vendor/eigen/Eigen/src/Core/util/IntegralConstant.h +272 -0
  217. data/vendor/eigen/Eigen/src/Core/util/MKL_support.h +8 -1
  218. data/vendor/eigen/Eigen/src/Core/util/Macros.h +709 -246
  219. data/vendor/eigen/Eigen/src/Core/util/Memory.h +222 -52
  220. data/vendor/eigen/Eigen/src/Core/util/Meta.h +355 -77
  221. data/vendor/eigen/Eigen/src/Core/util/ReenableStupidWarnings.h +5 -1
  222. data/vendor/eigen/Eigen/src/Core/util/ReshapedHelper.h +51 -0
  223. data/vendor/eigen/Eigen/src/Core/util/StaticAssert.h +8 -5
  224. data/vendor/eigen/Eigen/src/Core/util/SymbolicIndex.h +293 -0
  225. data/vendor/eigen/Eigen/src/Core/util/XprHelper.h +65 -30
  226. data/vendor/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +1 -1
  227. data/vendor/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +7 -4
  228. data/vendor/eigen/Eigen/src/Eigenvalues/EigenSolver.h +2 -2
  229. data/vendor/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +1 -1
  230. data/vendor/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +2 -2
  231. data/vendor/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +2 -2
  232. data/vendor/eigen/Eigen/src/Eigenvalues/RealQZ.h +9 -6
  233. data/vendor/eigen/Eigen/src/Eigenvalues/RealSchur.h +21 -9
  234. data/vendor/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +77 -43
  235. data/vendor/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +20 -15
  236. data/vendor/eigen/Eigen/src/Geometry/AlignedBox.h +99 -5
  237. data/vendor/eigen/Eigen/src/Geometry/AngleAxis.h +4 -4
  238. data/vendor/eigen/Eigen/src/Geometry/EulerAngles.h +3 -3
  239. data/vendor/eigen/Eigen/src/Geometry/Homogeneous.h +15 -11
  240. data/vendor/eigen/Eigen/src/Geometry/Hyperplane.h +1 -1
  241. data/vendor/eigen/Eigen/src/Geometry/OrthoMethods.h +3 -2
  242. data/vendor/eigen/Eigen/src/Geometry/ParametrizedLine.h +39 -2
  243. data/vendor/eigen/Eigen/src/Geometry/Quaternion.h +70 -14
  244. data/vendor/eigen/Eigen/src/Geometry/Rotation2D.h +3 -3
  245. data/vendor/eigen/Eigen/src/Geometry/Scaling.h +23 -5
  246. data/vendor/eigen/Eigen/src/Geometry/Transform.h +88 -67
  247. data/vendor/eigen/Eigen/src/Geometry/Translation.h +6 -12
  248. data/vendor/eigen/Eigen/src/Geometry/Umeyama.h +1 -1
  249. data/vendor/eigen/Eigen/src/Geometry/arch/Geometry_SIMD.h +168 -0
  250. data/vendor/eigen/Eigen/src/Householder/BlockHouseholder.h +9 -2
  251. data/vendor/eigen/Eigen/src/Householder/Householder.h +8 -4
  252. data/vendor/eigen/Eigen/src/Householder/HouseholderSequence.h +123 -48
  253. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +15 -15
  254. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +7 -23
  255. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +5 -22
  256. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +41 -47
  257. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +51 -60
  258. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +70 -20
  259. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +2 -20
  260. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +11 -9
  261. data/vendor/eigen/Eigen/src/Jacobi/Jacobi.h +31 -10
  262. data/vendor/eigen/Eigen/src/KLUSupport/KLUSupport.h +358 -0
  263. data/vendor/eigen/Eigen/src/LU/Determinant.h +35 -19
  264. data/vendor/eigen/Eigen/src/LU/FullPivLU.h +29 -43
  265. data/vendor/eigen/Eigen/src/LU/InverseImpl.h +25 -8
  266. data/vendor/eigen/Eigen/src/LU/PartialPivLU.h +71 -58
  267. data/vendor/eigen/Eigen/src/LU/arch/InverseSize4.h +351 -0
  268. data/vendor/eigen/Eigen/src/OrderingMethods/Amd.h +7 -17
  269. data/vendor/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +297 -277
  270. data/vendor/eigen/Eigen/src/OrderingMethods/Ordering.h +6 -10
  271. data/vendor/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +1 -1
  272. data/vendor/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +9 -7
  273. data/vendor/eigen/Eigen/src/QR/ColPivHouseholderQR.h +41 -20
  274. data/vendor/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +100 -27
  275. data/vendor/eigen/Eigen/src/QR/FullPivHouseholderQR.h +59 -22
  276. data/vendor/eigen/Eigen/src/QR/HouseholderQR.h +48 -23
  277. data/vendor/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +25 -3
  278. data/vendor/eigen/Eigen/src/SVD/BDCSVD.h +183 -63
  279. data/vendor/eigen/Eigen/src/SVD/JacobiSVD.h +22 -14
  280. data/vendor/eigen/Eigen/src/SVD/SVDBase.h +83 -22
  281. data/vendor/eigen/Eigen/src/SVD/UpperBidiagonalization.h +3 -3
  282. data/vendor/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +17 -9
  283. data/vendor/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +12 -37
  284. data/vendor/eigen/Eigen/src/SparseCore/AmbiVector.h +3 -2
  285. data/vendor/eigen/Eigen/src/SparseCore/CompressedStorage.h +16 -0
  286. data/vendor/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +6 -6
  287. data/vendor/eigen/Eigen/src/SparseCore/SparseAssign.h +81 -27
  288. data/vendor/eigen/Eigen/src/SparseCore/SparseBlock.h +25 -57
  289. data/vendor/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +40 -11
  290. data/vendor/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +11 -15
  291. data/vendor/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +4 -2
  292. data/vendor/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +30 -8
  293. data/vendor/eigen/Eigen/src/SparseCore/SparseMatrix.h +126 -11
  294. data/vendor/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +5 -12
  295. data/vendor/eigen/Eigen/src/SparseCore/SparseProduct.h +13 -1
  296. data/vendor/eigen/Eigen/src/SparseCore/SparseRef.h +7 -7
  297. data/vendor/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +5 -2
  298. data/vendor/eigen/Eigen/src/SparseCore/SparseUtil.h +8 -0
  299. data/vendor/eigen/Eigen/src/SparseCore/SparseVector.h +1 -1
  300. data/vendor/eigen/Eigen/src/SparseCore/SparseView.h +1 -0
  301. data/vendor/eigen/Eigen/src/SparseLU/SparseLU.h +162 -12
  302. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +1 -1
  303. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +76 -2
  304. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +2 -2
  305. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +1 -1
  306. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +1 -1
  307. data/vendor/eigen/Eigen/src/SparseQR/SparseQR.h +19 -6
  308. data/vendor/eigen/Eigen/src/StlSupport/StdDeque.h +2 -12
  309. data/vendor/eigen/Eigen/src/StlSupport/StdList.h +2 -2
  310. data/vendor/eigen/Eigen/src/StlSupport/StdVector.h +2 -2
  311. data/vendor/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +6 -8
  312. data/vendor/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +175 -39
  313. data/vendor/eigen/Eigen/src/misc/lapacke.h +5 -4
  314. data/vendor/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +28 -2
  315. data/vendor/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +155 -11
  316. data/vendor/eigen/Eigen/src/plugins/BlockMethods.h +626 -242
  317. data/vendor/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.h +14 -0
  318. data/vendor/eigen/Eigen/src/plugins/IndexedViewMethods.h +262 -0
  319. data/vendor/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +4 -4
  320. data/vendor/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +10 -0
  321. data/vendor/eigen/Eigen/src/plugins/ReshapedMethods.h +149 -0
  322. data/vendor/eigen/README.md +2 -0
  323. data/vendor/eigen/bench/btl/README +1 -1
  324. data/vendor/eigen/bench/tensors/README +6 -7
  325. data/vendor/eigen/ci/README.md +56 -0
  326. data/vendor/eigen/demos/mix_eigen_and_c/README +1 -1
  327. data/vendor/eigen/unsupported/Eigen/CXX11/src/Tensor/README.md +213 -158
  328. data/vendor/eigen/unsupported/README.txt +1 -1
  329. data/vendor/tomotopy/README.kr.rst +78 -0
  330. data/vendor/tomotopy/README.rst +75 -0
  331. data/vendor/tomotopy/src/Labeling/FoRelevance.cpp +2 -2
  332. data/vendor/tomotopy/src/Labeling/Phraser.hpp +4 -4
  333. data/vendor/tomotopy/src/TopicModel/CTModel.hpp +7 -3
  334. data/vendor/tomotopy/src/TopicModel/DMRModel.hpp +7 -3
  335. data/vendor/tomotopy/src/TopicModel/DTModel.hpp +6 -3
  336. data/vendor/tomotopy/src/TopicModel/GDMRModel.hpp +2 -2
  337. data/vendor/tomotopy/src/TopicModel/HDP.h +1 -0
  338. data/vendor/tomotopy/src/TopicModel/HDPModel.hpp +57 -6
  339. data/vendor/tomotopy/src/TopicModel/HLDAModel.hpp +6 -3
  340. data/vendor/tomotopy/src/TopicModel/HPAModel.hpp +3 -2
  341. data/vendor/tomotopy/src/TopicModel/LDA.h +3 -3
  342. data/vendor/tomotopy/src/TopicModel/LDACVB0Model.hpp +5 -5
  343. data/vendor/tomotopy/src/TopicModel/LDAModel.hpp +50 -19
  344. data/vendor/tomotopy/src/TopicModel/LLDAModel.hpp +6 -2
  345. data/vendor/tomotopy/src/TopicModel/MGLDAModel.hpp +3 -2
  346. data/vendor/tomotopy/src/TopicModel/PAModel.hpp +1 -1
  347. data/vendor/tomotopy/src/TopicModel/PLDAModel.hpp +6 -2
  348. data/vendor/tomotopy/src/TopicModel/PT.h +3 -1
  349. data/vendor/tomotopy/src/TopicModel/PTModel.hpp +36 -3
  350. data/vendor/tomotopy/src/TopicModel/SLDAModel.hpp +6 -3
  351. data/vendor/tomotopy/src/TopicModel/TopicModel.hpp +55 -26
  352. data/vendor/tomotopy/src/Utils/AliasMethod.hpp +5 -4
  353. data/vendor/tomotopy/src/Utils/Dictionary.h +2 -2
  354. data/vendor/tomotopy/src/Utils/EigenAddonOps.hpp +36 -1
  355. data/vendor/tomotopy/src/Utils/MultiNormalDistribution.hpp +1 -1
  356. data/vendor/tomotopy/src/Utils/TruncMultiNormal.hpp +1 -1
  357. data/vendor/tomotopy/src/Utils/exception.h +6 -0
  358. data/vendor/tomotopy/src/Utils/math.h +2 -2
  359. data/vendor/tomotopy/src/Utils/sample.hpp +14 -12
  360. data/vendor/tomotopy/src/Utils/serializer.hpp +30 -5
  361. data/vendor/tomotopy/src/Utils/sse_gamma.h +0 -3
  362. metadata +64 -18
  363. data/vendor/eigen/Eigen/CMakeLists.txt +0 -19
  364. data/vendor/eigen/Eigen/src/Core/arch/CUDA/Half.h +0 -674
  365. data/vendor/eigen/Eigen/src/Core/arch/CUDA/PacketMath.h +0 -333
  366. data/vendor/eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +0 -1124
  367. data/vendor/eigen/Eigen/src/Core/arch/CUDA/TypeCasting.h +0 -212
  368. data/vendor/eigen/Eigen/src/Geometry/arch/Geometry_SSE.h +0 -161
  369. data/vendor/eigen/Eigen/src/LU/arch/Inverse_SSE.h +0 -338
@@ -0,0 +1,619 @@
1
+ /**
2
+ * @file MorePacketMath.h
3
+ * @author bab2min (bab2min@gmail.com)
4
+ * @brief
5
+ * @version 0.3.5
6
+ * @date 2021-07-16
7
+ *
8
+ * @copyright Copyright (c) 2020-2021
9
+ *
10
+ */
11
+
12
+ #ifndef EIGENRAND_MORE_PACKET_MATH_AVX_H
13
+ #define EIGENRAND_MORE_PACKET_MATH_AVX_H
14
+
15
+ #include <immintrin.h>
16
+
17
+ namespace Eigen
18
+ {
19
+ namespace internal
20
+ {
21
+ template<>
22
+ struct IsIntPacket<Packet8i> : std::true_type {};
23
+
24
+ template<>
25
+ struct HalfPacket<Packet8i>
26
+ {
27
+ using type = Packet4i;
28
+ };
29
+
30
+ template<>
31
+ struct HalfPacket<Packet8f>
32
+ {
33
+ using type = Packet4f;
34
+ };
35
+
36
+ template<>
37
+ struct IsFloatPacket<Packet8f> : std::true_type {};
38
+
39
+ template<>
40
+ struct IsDoublePacket<Packet4d> : std::true_type {};
41
+
42
+ template<>
43
+ struct reinterpreter<Packet8i>
44
+ {
45
+ EIGEN_STRONG_INLINE Packet8f to_float(const Packet8i& x)
46
+ {
47
+ return _mm256_castsi256_ps(x);
48
+ }
49
+
50
+ EIGEN_STRONG_INLINE Packet4d to_double(const Packet8i& x)
51
+ {
52
+ return _mm256_castsi256_pd(x);
53
+ }
54
+
55
+ EIGEN_STRONG_INLINE Packet8i to_int(const Packet8i& x)
56
+ {
57
+ return x;
58
+ }
59
+ };
60
+
61
+ template<>
62
+ struct reinterpreter<Packet8f>
63
+ {
64
+ EIGEN_STRONG_INLINE Packet8f to_float(const Packet8f& x)
65
+ {
66
+ return x;
67
+ }
68
+
69
+ EIGEN_STRONG_INLINE Packet4d to_double(const Packet8f& x)
70
+ {
71
+ return _mm256_castps_pd(x);
72
+ }
73
+
74
+ EIGEN_STRONG_INLINE Packet8i to_int(const Packet8f& x)
75
+ {
76
+ return _mm256_castps_si256(x);
77
+ }
78
+ };
79
+
80
+ template<>
81
+ struct reinterpreter<Packet4d>
82
+ {
83
+ EIGEN_STRONG_INLINE Packet8f to_float(const Packet4d& x)
84
+ {
85
+ return _mm256_castpd_ps(x);
86
+ }
87
+
88
+ EIGEN_STRONG_INLINE Packet4d to_double(const Packet4d& x)
89
+ {
90
+ return x;
91
+ }
92
+
93
+ EIGEN_STRONG_INLINE Packet8i to_int(const Packet4d& x)
94
+ {
95
+ return _mm256_castpd_si256(x);
96
+ }
97
+ };
98
+
99
+ template<>
100
+ EIGEN_STRONG_INLINE void split_two<Packet8i>(const Packet8i& x, Packet4i& a, Packet4i& b)
101
+ {
102
+ a = _mm256_extractf128_si256(x, 0);
103
+ b = _mm256_extractf128_si256(x, 1);
104
+ }
105
+
106
+ EIGEN_STRONG_INLINE Packet8i combine_two(const Packet4i& a, const Packet4i& b)
107
+ {
108
+ return _mm256_insertf128_si256(_mm256_castsi128_si256(a), b, 1);
109
+ }
110
+
111
+ template<>
112
+ EIGEN_STRONG_INLINE void split_two<Packet8f>(const Packet8f& x, Packet4f& a, Packet4f& b)
113
+ {
114
+ a = _mm256_extractf128_ps(x, 0);
115
+ b = _mm256_extractf128_ps(x, 1);
116
+ }
117
+
118
+ EIGEN_STRONG_INLINE Packet8f combine_two(const Packet4f& a, const Packet4f& b)
119
+ {
120
+ return _mm256_insertf128_ps(_mm256_castps128_ps256(a), b, 1);
121
+ }
122
+
123
+
124
+ EIGEN_STRONG_INLINE Packet4i combine_low32(const Packet8i& a)
125
+ {
126
+ #ifdef EIGEN_VECTORIZE_AVX2
127
+ return _mm256_castsi256_si128(_mm256_permutevar8x32_epi32(a, _mm256_setr_epi32(0, 2, 4, 6, 1, 3, 5, 7)));
128
+ #else
129
+ auto sc = _mm256_permutevar_ps(_mm256_castsi256_ps(a), _mm256_setr_epi32(0, 2, 1, 3, 1, 3, 0, 2));
130
+ return _mm_castps_si128(_mm_blend_ps(_mm256_extractf128_ps(sc, 0), _mm256_extractf128_ps(sc, 1), 0b1100));
131
+ #endif
132
+ }
133
+
134
+ template<>
135
+ EIGEN_STRONG_INLINE Packet8i pseti64<Packet8i>(uint64_t a)
136
+ {
137
+ return _mm256_set1_epi64x(a);
138
+ }
139
+
140
+ template<>
141
+ EIGEN_STRONG_INLINE Packet8i padd64<Packet8i>(const Packet8i& a, const Packet8i& b)
142
+ {
143
+ #ifdef EIGEN_VECTORIZE_AVX2
144
+ return _mm256_add_epi64(a, b);
145
+ #else
146
+ Packet4i a1, a2, b1, b2;
147
+ split_two(a, a1, a2);
148
+ split_two(b, b1, b2);
149
+ return combine_two((Packet4i)_mm_add_epi64(a1, b1), (Packet4i)_mm_add_epi64(a2, b2));
150
+ #endif
151
+ }
152
+
153
+ template<>
154
+ EIGEN_STRONG_INLINE Packet8i psub64<Packet8i>(const Packet8i& a, const Packet8i& b)
155
+ {
156
+ #ifdef EIGEN_VECTORIZE_AVX2
157
+ return _mm256_sub_epi64(a, b);
158
+ #else
159
+ Packet4i a1, a2, b1, b2;
160
+ split_two(a, a1, a2);
161
+ split_two(b, b1, b2);
162
+ return combine_two((Packet4i)_mm_sub_epi64(a1, b1), (Packet4i)_mm_sub_epi64(a2, b2));
163
+ #endif
164
+ }
165
+
166
+ template<>
167
+ EIGEN_STRONG_INLINE Packet8i pcmpeq<Packet8i>(const Packet8i& a, const Packet8i& b)
168
+ {
169
+ #ifdef EIGEN_VECTORIZE_AVX2
170
+ return _mm256_cmpeq_epi32(a, b);
171
+ #else
172
+ Packet4i a1, a2, b1, b2;
173
+ split_two(a, a1, a2);
174
+ split_two(b, b1, b2);
175
+ return combine_two((Packet4i)_mm_cmpeq_epi32(a1, b1), (Packet4i)_mm_cmpeq_epi32(a2, b2));
176
+ #endif
177
+ }
178
+
179
+ template<>
180
+ struct BitShifter<Packet8i>
181
+ {
182
+ template<int b>
183
+ EIGEN_STRONG_INLINE Packet8i sll(const Packet8i& a)
184
+ {
185
+ #ifdef EIGEN_VECTORIZE_AVX2
186
+ return _mm256_slli_epi32(a, b);
187
+ #else
188
+ Packet4i a1, a2;
189
+ split_two(a, a1, a2);
190
+ return combine_two((Packet4i)_mm_slli_epi32(a1, b), (Packet4i)_mm_slli_epi32(a2, b));
191
+ #endif
192
+ }
193
+
194
+ template<int b>
195
+ EIGEN_STRONG_INLINE Packet8i srl(const Packet8i& a, int _b = b)
196
+ {
197
+ #ifdef EIGEN_VECTORIZE_AVX2
198
+ if (b >= 0)
199
+ {
200
+ return _mm256_srli_epi32(a, b);
201
+ }
202
+ else
203
+ {
204
+ return _mm256_srli_epi32(a, _b);
205
+ }
206
+ #else
207
+ Packet4i a1, a2;
208
+ split_two(a, a1, a2);
209
+ if (b >= 0)
210
+ {
211
+ return combine_two((Packet4i)_mm_srli_epi32(a1, b), (Packet4i)_mm_srli_epi32(a2, b));
212
+ }
213
+ else
214
+ {
215
+ return combine_two((Packet4i)_mm_srli_epi32(a1, _b), (Packet4i)_mm_srli_epi32(a2, _b));
216
+ }
217
+ #endif
218
+ }
219
+
220
+ template<int b>
221
+ EIGEN_STRONG_INLINE Packet8i sll64(const Packet8i& a)
222
+ {
223
+ #ifdef EIGEN_VECTORIZE_AVX2
224
+ return _mm256_slli_epi64(a, b);
225
+ #else
226
+ Packet4i a1, a2;
227
+ split_two(a, a1, a2);
228
+ return combine_two((Packet4i)_mm_slli_epi64(a1, b), (Packet4i)_mm_slli_epi64(a2, b));
229
+ #endif
230
+ }
231
+
232
+ template<int b>
233
+ EIGEN_STRONG_INLINE Packet8i srl64(const Packet8i& a)
234
+ {
235
+ #ifdef EIGEN_VECTORIZE_AVX2
236
+ return _mm256_srli_epi64(a, b);
237
+ #else
238
+ Packet4i a1, a2;
239
+ split_two(a, a1, a2);
240
+ return combine_two((Packet4i)_mm_srli_epi64(a1, b), (Packet4i)_mm_srli_epi64(a2, b));
241
+ #endif
242
+ }
243
+ };
244
+ #ifdef EIGENRAND_EIGEN_33_MODE
245
+ template<> EIGEN_STRONG_INLINE Packet8i padd<Packet8i>(const Packet8i& a, const Packet8i& b)
246
+ {
247
+ #ifdef EIGEN_VECTORIZE_AVX2
248
+ return _mm256_add_epi32(a, b);
249
+ #else
250
+ Packet4i a1, a2, b1, b2;
251
+ split_two(a, a1, a2);
252
+ split_two(b, b1, b2);
253
+ return combine_two((Packet4i)_mm_add_epi32(a1, b1), (Packet4i)_mm_add_epi32(a2, b2));
254
+ #endif
255
+ }
256
+
257
+ template<> EIGEN_STRONG_INLINE Packet8i psub<Packet8i>(const Packet8i& a, const Packet8i& b)
258
+ {
259
+ #ifdef EIGEN_VECTORIZE_AVX2
260
+ return _mm256_sub_epi32(a, b);
261
+ #else
262
+ Packet4i a1, a2, b1, b2;
263
+ split_two(a, a1, a2);
264
+ split_two(b, b1, b2);
265
+ return combine_two((Packet4i)_mm_sub_epi32(a1, b1), (Packet4i)_mm_sub_epi32(a2, b2));
266
+ #endif
267
+ }
268
+
269
+ template<> EIGEN_STRONG_INLINE Packet8i pand<Packet8i>(const Packet8i& a, const Packet8i& b)
270
+ {
271
+ #ifdef EIGEN_VECTORIZE_AVX2
272
+ return _mm256_and_si256(a, b);
273
+ #else
274
+ return reinterpret_to_int((Packet8f)_mm256_and_ps(reinterpret_to_float(a), reinterpret_to_float(b)));
275
+ #endif
276
+ }
277
+
278
+ template<> EIGEN_STRONG_INLINE Packet8i pandnot<Packet8i>(const Packet8i& a, const Packet8i& b)
279
+ {
280
+ #ifdef EIGEN_VECTORIZE_AVX2
281
+ return _mm256_andnot_si256(a, b);
282
+ #else
283
+ return reinterpret_to_int((Packet8f)_mm256_andnot_ps(reinterpret_to_float(a), reinterpret_to_float(b)));
284
+ #endif
285
+ }
286
+
287
+ template<> EIGEN_STRONG_INLINE Packet8i por<Packet8i>(const Packet8i& a, const Packet8i& b)
288
+ {
289
+ #ifdef EIGEN_VECTORIZE_AVX2
290
+ return _mm256_or_si256(a, b);
291
+ #else
292
+ return reinterpret_to_int((Packet8f)_mm256_or_ps(reinterpret_to_float(a), reinterpret_to_float(b)));
293
+ #endif
294
+ }
295
+
296
+ template<> EIGEN_STRONG_INLINE Packet8i pxor<Packet8i>(const Packet8i& a, const Packet8i& b)
297
+ {
298
+ #ifdef EIGEN_VECTORIZE_AVX2
299
+ return _mm256_xor_si256(a, b);
300
+ #else
301
+ return reinterpret_to_int((Packet8f)_mm256_xor_ps(reinterpret_to_float(a), reinterpret_to_float(b)));
302
+ #endif
303
+ }
304
+ #endif
305
+ template<>
306
+ EIGEN_STRONG_INLINE Packet8i pcmplt<Packet8i>(const Packet8i& a, const Packet8i& b)
307
+ {
308
+ #ifdef EIGEN_VECTORIZE_AVX2
309
+ return _mm256_cmpgt_epi32(b, a);
310
+ #else
311
+ Packet4i a1, a2, b1, b2;
312
+ split_two(a, a1, a2);
313
+ split_two(b, b1, b2);
314
+ return combine_two((Packet4i)_mm_cmpgt_epi32(b1, a1), (Packet4i)_mm_cmpgt_epi32(b2, a2));
315
+ #endif
316
+ }
317
+
318
+ template<>
319
+ EIGEN_STRONG_INLINE Packet8i pcmplt64<Packet8i>(const Packet8i& a, const Packet8i& b)
320
+ {
321
+ #ifdef EIGEN_VECTORIZE_AVX2
322
+ return _mm256_cmpgt_epi64(b, a);
323
+ #else
324
+ Packet4i a1, a2, b1, b2;
325
+ split_two(a, a1, a2);
326
+ split_two(b, b1, b2);
327
+ return combine_two((Packet4i)_mm_cmpgt_epi64(b1, a1), (Packet4i)_mm_cmpgt_epi64(b2, a2));
328
+ #endif
329
+ }
330
+
331
+ template<>
332
+ EIGEN_STRONG_INLINE Packet8f pcmplt<Packet8f>(const Packet8f& a, const Packet8f& b)
333
+ {
334
+ return _mm256_cmp_ps(a, b, _CMP_LT_OQ);
335
+ }
336
+
337
+ template<>
338
+ EIGEN_STRONG_INLINE Packet8f pcmple<Packet8f>(const Packet8f& a, const Packet8f& b)
339
+ {
340
+ return _mm256_cmp_ps(a, b, _CMP_LE_OQ);
341
+ }
342
+
343
+ template<>
344
+ EIGEN_STRONG_INLINE Packet4d pcmplt<Packet4d>(const Packet4d& a, const Packet4d& b)
345
+ {
346
+ return _mm256_cmp_pd(a, b, _CMP_LT_OQ);
347
+ }
348
+
349
+ template<>
350
+ EIGEN_STRONG_INLINE Packet4d pcmple<Packet4d>(const Packet4d& a, const Packet4d& b)
351
+ {
352
+ return _mm256_cmp_pd(a, b, _CMP_LE_OQ);
353
+ }
354
+
355
+ template<>
356
+ EIGEN_STRONG_INLINE Packet8f pblendv(const Packet8f& ifPacket, const Packet8f& thenPacket, const Packet8f& elsePacket)
357
+ {
358
+ return _mm256_blendv_ps(elsePacket, thenPacket, ifPacket);
359
+ }
360
+
361
+ template<>
362
+ EIGEN_STRONG_INLINE Packet8f pblendv(const Packet8i& ifPacket, const Packet8f& thenPacket, const Packet8f& elsePacket)
363
+ {
364
+ return pblendv(_mm256_castsi256_ps(ifPacket), thenPacket, elsePacket);
365
+ }
366
+
367
+ template<>
368
+ EIGEN_STRONG_INLINE Packet8i pblendv(const Packet8i& ifPacket, const Packet8i& thenPacket, const Packet8i& elsePacket)
369
+ {
370
+ return _mm256_castps_si256(_mm256_blendv_ps(
371
+ _mm256_castsi256_ps(elsePacket),
372
+ _mm256_castsi256_ps(thenPacket),
373
+ _mm256_castsi256_ps(ifPacket)
374
+ ));
375
+ }
376
+
377
+ template<>
378
+ EIGEN_STRONG_INLINE Packet4d pblendv(const Packet4d& ifPacket, const Packet4d& thenPacket, const Packet4d& elsePacket)
379
+ {
380
+ return _mm256_blendv_pd(elsePacket, thenPacket, ifPacket);
381
+ }
382
+
383
+ template<>
384
+ EIGEN_STRONG_INLINE Packet4d pblendv(const Packet8i& ifPacket, const Packet4d& thenPacket, const Packet4d& elsePacket)
385
+ {
386
+ return pblendv(_mm256_castsi256_pd(ifPacket), thenPacket, elsePacket);
387
+ }
388
+
389
+ template<>
390
+ EIGEN_STRONG_INLINE Packet8i pgather<Packet8i>(const int* addr, const Packet8i& index)
391
+ {
392
+ #ifdef EIGEN_VECTORIZE_AVX2
393
+ return _mm256_i32gather_epi32(addr, index, 4);
394
+ #else
395
+ uint32_t u[8];
396
+ _mm256_storeu_si256((Packet8i*)u, index);
397
+ return _mm256_setr_epi32(addr[u[0]], addr[u[1]], addr[u[2]], addr[u[3]],
398
+ addr[u[4]], addr[u[5]], addr[u[6]], addr[u[7]]);
399
+ #endif
400
+ }
401
+
402
+ template<>
403
+ EIGEN_STRONG_INLINE Packet8f pgather<Packet8i>(const float* addr, const Packet8i& index)
404
+ {
405
+ #ifdef EIGEN_VECTORIZE_AVX2
406
+ return _mm256_i32gather_ps(addr, index, 4);
407
+ #else
408
+ uint32_t u[8];
409
+ _mm256_storeu_si256((Packet8i*)u, index);
410
+ return _mm256_setr_ps(addr[u[0]], addr[u[1]], addr[u[2]], addr[u[3]],
411
+ addr[u[4]], addr[u[5]], addr[u[6]], addr[u[7]]);
412
+ #endif
413
+ }
414
+
415
+ template<>
416
+ EIGEN_STRONG_INLINE Packet4d pgather<Packet8i>(const double* addr, const Packet8i& index, bool upperhalf)
417
+ {
418
+ #ifdef EIGEN_VECTORIZE_AVX2
419
+ return _mm256_i32gather_pd(addr, _mm256_castsi256_si128(index), 8);
420
+ #else
421
+ uint32_t u[8];
422
+ _mm256_storeu_si256((Packet8i*)u, index);
423
+ if (upperhalf)
424
+ {
425
+ return _mm256_setr_pd(addr[u[4]], addr[u[5]], addr[u[6]], addr[u[7]]);
426
+ }
427
+ else
428
+ {
429
+ return _mm256_setr_pd(addr[u[0]], addr[u[1]], addr[u[2]], addr[u[3]]);
430
+ }
431
+ #endif
432
+ }
433
+
434
+ template<>
435
+ EIGEN_STRONG_INLINE int pmovemask<Packet8f>(const Packet8f& a)
436
+ {
437
+ return _mm256_movemask_ps(a);
438
+ }
439
+
440
+ template<>
441
+ EIGEN_STRONG_INLINE int pmovemask<Packet4d>(const Packet4d& a)
442
+ {
443
+ return _mm256_movemask_pd(a);
444
+ }
445
+
446
+ template<>
447
+ EIGEN_STRONG_INLINE int pmovemask<Packet8i>(const Packet8i& a)
448
+ {
449
+ return pmovemask(_mm256_castsi256_ps(a));
450
+ }
451
+
452
+ template<>
453
+ EIGEN_STRONG_INLINE Packet8f ptruncate<Packet8f>(const Packet8f& a)
454
+ {
455
+ return _mm256_round_ps(a, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
456
+ }
457
+
458
+ template<>
459
+ EIGEN_STRONG_INLINE Packet4d ptruncate<Packet4d>(const Packet4d& a)
460
+ {
461
+ return _mm256_round_pd(a, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
462
+ }
463
+
464
+ template<>
465
+ EIGEN_STRONG_INLINE Packet8i pcmpeq64<Packet8i>(const Packet8i& a, const Packet8i& b)
466
+ {
467
+ #ifdef EIGEN_VECTORIZE_AVX2
468
+ return _mm256_cmpeq_epi64(a, b);
469
+ #else
470
+ Packet4i a1, a2, b1, b2;
471
+ split_two(a, a1, a2);
472
+ split_two(b, b1, b2);
473
+ return combine_two((Packet4i)_mm_cmpeq_epi64(a1, b1), (Packet4i)_mm_cmpeq_epi64(a2, b2));
474
+ #endif
475
+ }
476
+
477
+ template<>
478
+ EIGEN_STRONG_INLINE Packet8i pmuluadd64<Packet8i>(const Packet8i& a, uint64_t b, uint64_t c)
479
+ {
480
+ uint64_t u[4];
481
+ _mm256_storeu_si256((__m256i*)u, a);
482
+ u[0] = u[0] * b + c;
483
+ u[1] = u[1] * b + c;
484
+ u[2] = u[2] * b + c;
485
+ u[3] = u[3] * b + c;
486
+ return _mm256_loadu_si256((__m256i*)u);
487
+ }
488
+
489
+ EIGEN_STRONG_INLINE __m256d uint64_to_double(__m256i x) {
490
+ auto y = _mm256_or_pd(_mm256_castsi256_pd(x), _mm256_set1_pd(0x0010000000000000));
491
+ return _mm256_sub_pd(y, _mm256_set1_pd(0x0010000000000000));
492
+ }
493
+
494
+ EIGEN_STRONG_INLINE __m256d int64_to_double(__m256i x) {
495
+ x = padd64(x, _mm256_castpd_si256(_mm256_set1_pd(0x0018000000000000)));
496
+ return _mm256_sub_pd(_mm256_castsi256_pd(x), _mm256_set1_pd(0x0018000000000000));
497
+ }
498
+
499
+ EIGEN_STRONG_INLINE __m256i double_to_int64(__m256d x) {
500
+ x = _mm256_add_pd(_mm256_floor_pd(x), _mm256_set1_pd(0x0018000000000000));
501
+ return psub64(
502
+ _mm256_castpd_si256(x),
503
+ _mm256_castpd_si256(_mm256_set1_pd(0x0018000000000000))
504
+ );
505
+ }
506
+
507
+ template<>
508
+ EIGEN_STRONG_INLINE Packet8i pcast64<Packet4d, Packet8i>(const Packet4d& a)
509
+ {
510
+ return double_to_int64(a);
511
+ }
512
+
513
+ template<>
514
+ EIGEN_STRONG_INLINE Packet4d pcast64<Packet8i, Packet4d>(const Packet8i& a)
515
+ {
516
+ return int64_to_double(a);
517
+ }
518
+
519
+ template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
520
+ Packet4d psin<Packet4d>(const Packet4d& x)
521
+ {
522
+ return _psin(x);
523
+ }
524
+
525
+ #ifdef EIGENRAND_EIGEN_33_MODE
526
+ template <>
527
+ EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4d
528
+ plog<Packet4d>(const Packet4d& _x) {
529
+ Packet4d x = _x;
530
+ _EIGEN_DECLARE_CONST_Packet4d(1, 1.0);
531
+ _EIGEN_DECLARE_CONST_Packet4d(half, 0.5);
532
+
533
+ auto inv_mant_mask = _mm256_castsi256_pd(pseti64<Packet8i>(~0x7ff0000000000000));
534
+ auto min_norm_pos = _mm256_castsi256_pd(pseti64<Packet8i>(0x10000000000000));
535
+ auto minus_inf = _mm256_castsi256_pd(pseti64<Packet8i>(0xfff0000000000000));
536
+
537
+ // Polynomial coefficients.
538
+ _EIGEN_DECLARE_CONST_Packet4d(cephes_SQRTHF, 0.707106781186547524);
539
+ _EIGEN_DECLARE_CONST_Packet4d(cephes_log_p0, 7.0376836292E-2);
540
+ _EIGEN_DECLARE_CONST_Packet4d(cephes_log_p1, -1.1514610310E-1);
541
+ _EIGEN_DECLARE_CONST_Packet4d(cephes_log_p2, 1.1676998740E-1);
542
+ _EIGEN_DECLARE_CONST_Packet4d(cephes_log_p3, -1.2420140846E-1);
543
+ _EIGEN_DECLARE_CONST_Packet4d(cephes_log_p4, +1.4249322787E-1);
544
+ _EIGEN_DECLARE_CONST_Packet4d(cephes_log_p5, -1.6668057665E-1);
545
+ _EIGEN_DECLARE_CONST_Packet4d(cephes_log_p6, +2.0000714765E-1);
546
+ _EIGEN_DECLARE_CONST_Packet4d(cephes_log_p7, -2.4999993993E-1);
547
+ _EIGEN_DECLARE_CONST_Packet4d(cephes_log_p8, +3.3333331174E-1);
548
+ _EIGEN_DECLARE_CONST_Packet4d(cephes_log_q1, -2.12194440e-4);
549
+ _EIGEN_DECLARE_CONST_Packet4d(cephes_log_q2, 0.693359375);
550
+
551
+ Packet4d invalid_mask = _mm256_cmp_pd(x, _mm256_setzero_pd(), _CMP_NGE_UQ); // not greater equal is true if x is NaN
552
+ Packet4d iszero_mask = _mm256_cmp_pd(x, _mm256_setzero_pd(), _CMP_EQ_OQ);
553
+
554
+ // Truncate input values to the minimum positive normal.
555
+ x = pmax(x, min_norm_pos);
556
+
557
+ Packet4d emm0 = uint64_to_double(psrl64<52>(_mm256_castpd_si256(x)));
558
+ Packet4d e = psub(emm0, pset1<Packet4d>(1022));
559
+
560
+ // Set the exponents to -1, i.e. x are in the range [0.5,1).
561
+ x = _mm256_and_pd(x, inv_mant_mask);
562
+ x = _mm256_or_pd(x, p4d_half);
563
+
564
+ // part2: Shift the inputs from the range [0.5,1) to [sqrt(1/2),sqrt(2))
565
+ // and shift by -1. The values are then centered around 0, which improves
566
+ // the stability of the polynomial evaluation.
567
+ // if( x < SQRTHF ) {
568
+ // e -= 1;
569
+ // x = x + x - 1.0;
570
+ // } else { x = x - 1.0; }
571
+ Packet4d mask = _mm256_cmp_pd(x, p4d_cephes_SQRTHF, _CMP_LT_OQ);
572
+ Packet4d tmp = _mm256_and_pd(x, mask);
573
+ x = psub(x, p4d_1);
574
+ e = psub(e, _mm256_and_pd(p4d_1, mask));
575
+ x = padd(x, tmp);
576
+
577
+ Packet4d x2 = pmul(x, x);
578
+ Packet4d x3 = pmul(x2, x);
579
+
580
+ // Evaluate the polynomial approximant of degree 8 in three parts, probably
581
+ // to improve instruction-level parallelism.
582
+ Packet4d y, y1, y2;
583
+ y = pmadd(p4d_cephes_log_p0, x, p4d_cephes_log_p1);
584
+ y1 = pmadd(p4d_cephes_log_p3, x, p4d_cephes_log_p4);
585
+ y2 = pmadd(p4d_cephes_log_p6, x, p4d_cephes_log_p7);
586
+ y = pmadd(y, x, p4d_cephes_log_p2);
587
+ y1 = pmadd(y1, x, p4d_cephes_log_p5);
588
+ y2 = pmadd(y2, x, p4d_cephes_log_p8);
589
+ y = pmadd(y, x3, y1);
590
+ y = pmadd(y, x3, y2);
591
+ y = pmul(y, x3);
592
+
593
+ // Add the logarithm of the exponent back to the result of the interpolation.
594
+ y1 = pmul(e, p4d_cephes_log_q1);
595
+ tmp = pmul(x2, p4d_half);
596
+ y = padd(y, y1);
597
+ x = psub(x, tmp);
598
+ y2 = pmul(e, p4d_cephes_log_q2);
599
+ x = padd(x, y);
600
+ x = padd(x, y2);
601
+
602
+ // Filter out invalid inputs, i.e. negative arg will be NAN, 0 will be -INF.
603
+ return pblendv(iszero_mask, minus_inf, _mm256_or_pd(x, invalid_mask));
604
+ }
605
+ #endif
606
+
607
+ #if !(EIGEN_VERSION_AT_LEAST(3,3,5))
608
+ template<> EIGEN_STRONG_INLINE Packet4f pcast<Packet4i, Packet4f>(const Packet4i& a) {
609
+ return _mm_cvtepi32_ps(a);
610
+ }
611
+
612
+ template<> EIGEN_STRONG_INLINE Packet4i pcast<Packet4f, Packet4i>(const Packet4f& a) {
613
+ return _mm_cvttps_epi32(a);
614
+ }
615
+ #endif
616
+ }
617
+ }
618
+
619
+ #endif