tomoto 0.2.2 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (369) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +10 -0
  3. data/README.md +8 -10
  4. data/ext/tomoto/ct.cpp +11 -11
  5. data/ext/tomoto/dmr.cpp +14 -13
  6. data/ext/tomoto/dt.cpp +14 -14
  7. data/ext/tomoto/extconf.rb +7 -5
  8. data/ext/tomoto/gdmr.cpp +7 -7
  9. data/ext/tomoto/hdp.cpp +9 -9
  10. data/ext/tomoto/hlda.cpp +13 -13
  11. data/ext/tomoto/hpa.cpp +5 -5
  12. data/ext/tomoto/lda.cpp +42 -39
  13. data/ext/tomoto/llda.cpp +6 -6
  14. data/ext/tomoto/mglda.cpp +15 -15
  15. data/ext/tomoto/pa.cpp +6 -6
  16. data/ext/tomoto/plda.cpp +6 -6
  17. data/ext/tomoto/slda.cpp +8 -8
  18. data/ext/tomoto/{ext.cpp → tomoto.cpp} +8 -8
  19. data/ext/tomoto/utils.h +16 -70
  20. data/lib/tomoto/version.rb +1 -1
  21. data/lib/tomoto.rb +5 -1
  22. data/vendor/EigenRand/EigenRand/Core.h +10 -10
  23. data/vendor/EigenRand/EigenRand/Dists/Basic.h +208 -9
  24. data/vendor/EigenRand/EigenRand/Dists/Discrete.h +52 -31
  25. data/vendor/EigenRand/EigenRand/Dists/GammaPoisson.h +9 -8
  26. data/vendor/EigenRand/EigenRand/Dists/NormalExp.h +28 -21
  27. data/vendor/EigenRand/EigenRand/EigenRand +11 -6
  28. data/vendor/EigenRand/EigenRand/Macro.h +13 -7
  29. data/vendor/EigenRand/EigenRand/MorePacketMath.h +348 -740
  30. data/vendor/EigenRand/EigenRand/MvDists/Multinomial.h +5 -3
  31. data/vendor/EigenRand/EigenRand/MvDists/MvNormal.h +9 -3
  32. data/vendor/EigenRand/EigenRand/PacketFilter.h +11 -253
  33. data/vendor/EigenRand/EigenRand/PacketRandomEngine.h +21 -47
  34. data/vendor/EigenRand/EigenRand/RandUtils.h +50 -344
  35. data/vendor/EigenRand/EigenRand/arch/AVX/MorePacketMath.h +619 -0
  36. data/vendor/EigenRand/EigenRand/arch/AVX/PacketFilter.h +149 -0
  37. data/vendor/EigenRand/EigenRand/arch/AVX/RandUtils.h +228 -0
  38. data/vendor/EigenRand/EigenRand/arch/NEON/MorePacketMath.h +473 -0
  39. data/vendor/EigenRand/EigenRand/arch/NEON/PacketFilter.h +142 -0
  40. data/vendor/EigenRand/EigenRand/arch/NEON/RandUtils.h +126 -0
  41. data/vendor/EigenRand/EigenRand/arch/SSE/MorePacketMath.h +501 -0
  42. data/vendor/EigenRand/EigenRand/arch/SSE/PacketFilter.h +133 -0
  43. data/vendor/EigenRand/EigenRand/arch/SSE/RandUtils.h +120 -0
  44. data/vendor/EigenRand/EigenRand/doc.h +24 -12
  45. data/vendor/EigenRand/README.md +57 -4
  46. data/vendor/eigen/COPYING.APACHE +203 -0
  47. data/vendor/eigen/COPYING.BSD +1 -1
  48. data/vendor/eigen/COPYING.MINPACK +51 -52
  49. data/vendor/eigen/Eigen/Cholesky +0 -1
  50. data/vendor/eigen/Eigen/Core +112 -265
  51. data/vendor/eigen/Eigen/Eigenvalues +2 -3
  52. data/vendor/eigen/Eigen/Geometry +5 -8
  53. data/vendor/eigen/Eigen/Householder +0 -1
  54. data/vendor/eigen/Eigen/Jacobi +0 -1
  55. data/vendor/eigen/Eigen/KLUSupport +41 -0
  56. data/vendor/eigen/Eigen/LU +2 -5
  57. data/vendor/eigen/Eigen/OrderingMethods +0 -3
  58. data/vendor/eigen/Eigen/PaStiXSupport +1 -0
  59. data/vendor/eigen/Eigen/PardisoSupport +0 -0
  60. data/vendor/eigen/Eigen/QR +2 -3
  61. data/vendor/eigen/Eigen/QtAlignedMalloc +0 -1
  62. data/vendor/eigen/Eigen/SVD +0 -1
  63. data/vendor/eigen/Eigen/Sparse +0 -2
  64. data/vendor/eigen/Eigen/SparseCholesky +0 -8
  65. data/vendor/eigen/Eigen/SparseLU +4 -0
  66. data/vendor/eigen/Eigen/SparseQR +0 -1
  67. data/vendor/eigen/Eigen/src/Cholesky/LDLT.h +42 -27
  68. data/vendor/eigen/Eigen/src/Cholesky/LLT.h +39 -23
  69. data/vendor/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +90 -47
  70. data/vendor/eigen/Eigen/src/Core/ArithmeticSequence.h +413 -0
  71. data/vendor/eigen/Eigen/src/Core/Array.h +99 -11
  72. data/vendor/eigen/Eigen/src/Core/ArrayBase.h +3 -3
  73. data/vendor/eigen/Eigen/src/Core/ArrayWrapper.h +21 -21
  74. data/vendor/eigen/Eigen/src/Core/Assign.h +1 -1
  75. data/vendor/eigen/Eigen/src/Core/AssignEvaluator.h +125 -50
  76. data/vendor/eigen/Eigen/src/Core/Assign_MKL.h +10 -10
  77. data/vendor/eigen/Eigen/src/Core/BandMatrix.h +16 -16
  78. data/vendor/eigen/Eigen/src/Core/Block.h +56 -60
  79. data/vendor/eigen/Eigen/src/Core/BooleanRedux.h +29 -31
  80. data/vendor/eigen/Eigen/src/Core/CommaInitializer.h +7 -3
  81. data/vendor/eigen/Eigen/src/Core/CoreEvaluators.h +325 -272
  82. data/vendor/eigen/Eigen/src/Core/CoreIterators.h +5 -0
  83. data/vendor/eigen/Eigen/src/Core/CwiseBinaryOp.h +21 -22
  84. data/vendor/eigen/Eigen/src/Core/CwiseNullaryOp.h +153 -18
  85. data/vendor/eigen/Eigen/src/Core/CwiseUnaryOp.h +6 -6
  86. data/vendor/eigen/Eigen/src/Core/CwiseUnaryView.h +14 -10
  87. data/vendor/eigen/Eigen/src/Core/DenseBase.h +132 -42
  88. data/vendor/eigen/Eigen/src/Core/DenseCoeffsBase.h +25 -21
  89. data/vendor/eigen/Eigen/src/Core/DenseStorage.h +153 -71
  90. data/vendor/eigen/Eigen/src/Core/Diagonal.h +21 -23
  91. data/vendor/eigen/Eigen/src/Core/DiagonalMatrix.h +50 -2
  92. data/vendor/eigen/Eigen/src/Core/DiagonalProduct.h +1 -1
  93. data/vendor/eigen/Eigen/src/Core/Dot.h +10 -10
  94. data/vendor/eigen/Eigen/src/Core/EigenBase.h +10 -9
  95. data/vendor/eigen/Eigen/src/Core/ForceAlignedAccess.h +8 -4
  96. data/vendor/eigen/Eigen/src/Core/Fuzzy.h +3 -3
  97. data/vendor/eigen/Eigen/src/Core/GeneralProduct.h +20 -10
  98. data/vendor/eigen/Eigen/src/Core/GenericPacketMath.h +599 -152
  99. data/vendor/eigen/Eigen/src/Core/GlobalFunctions.h +40 -33
  100. data/vendor/eigen/Eigen/src/Core/IO.h +40 -7
  101. data/vendor/eigen/Eigen/src/Core/IndexedView.h +237 -0
  102. data/vendor/eigen/Eigen/src/Core/Inverse.h +9 -10
  103. data/vendor/eigen/Eigen/src/Core/Map.h +7 -7
  104. data/vendor/eigen/Eigen/src/Core/MapBase.h +10 -3
  105. data/vendor/eigen/Eigen/src/Core/MathFunctions.h +767 -125
  106. data/vendor/eigen/Eigen/src/Core/MathFunctionsImpl.h +118 -19
  107. data/vendor/eigen/Eigen/src/Core/Matrix.h +131 -25
  108. data/vendor/eigen/Eigen/src/Core/MatrixBase.h +21 -3
  109. data/vendor/eigen/Eigen/src/Core/NestByValue.h +25 -50
  110. data/vendor/eigen/Eigen/src/Core/NoAlias.h +4 -3
  111. data/vendor/eigen/Eigen/src/Core/NumTraits.h +107 -20
  112. data/vendor/eigen/Eigen/src/Core/PartialReduxEvaluator.h +232 -0
  113. data/vendor/eigen/Eigen/src/Core/PermutationMatrix.h +3 -31
  114. data/vendor/eigen/Eigen/src/Core/PlainObjectBase.h +152 -59
  115. data/vendor/eigen/Eigen/src/Core/Product.h +30 -25
  116. data/vendor/eigen/Eigen/src/Core/ProductEvaluators.h +192 -125
  117. data/vendor/eigen/Eigen/src/Core/Random.h +37 -1
  118. data/vendor/eigen/Eigen/src/Core/Redux.h +180 -170
  119. data/vendor/eigen/Eigen/src/Core/Ref.h +121 -23
  120. data/vendor/eigen/Eigen/src/Core/Replicate.h +8 -8
  121. data/vendor/eigen/Eigen/src/Core/Reshaped.h +454 -0
  122. data/vendor/eigen/Eigen/src/Core/ReturnByValue.h +7 -5
  123. data/vendor/eigen/Eigen/src/Core/Reverse.h +18 -12
  124. data/vendor/eigen/Eigen/src/Core/Select.h +8 -6
  125. data/vendor/eigen/Eigen/src/Core/SelfAdjointView.h +33 -20
  126. data/vendor/eigen/Eigen/src/Core/Solve.h +14 -14
  127. data/vendor/eigen/Eigen/src/Core/SolveTriangular.h +16 -16
  128. data/vendor/eigen/Eigen/src/Core/SolverBase.h +41 -3
  129. data/vendor/eigen/Eigen/src/Core/StableNorm.h +100 -70
  130. data/vendor/eigen/Eigen/src/Core/StlIterators.h +463 -0
  131. data/vendor/eigen/Eigen/src/Core/Stride.h +9 -4
  132. data/vendor/eigen/Eigen/src/Core/Swap.h +5 -4
  133. data/vendor/eigen/Eigen/src/Core/Transpose.h +88 -27
  134. data/vendor/eigen/Eigen/src/Core/Transpositions.h +26 -47
  135. data/vendor/eigen/Eigen/src/Core/TriangularMatrix.h +93 -75
  136. data/vendor/eigen/Eigen/src/Core/VectorBlock.h +5 -5
  137. data/vendor/eigen/Eigen/src/Core/VectorwiseOp.h +159 -70
  138. data/vendor/eigen/Eigen/src/Core/Visitor.h +137 -29
  139. data/vendor/eigen/Eigen/src/Core/arch/AVX/Complex.h +50 -129
  140. data/vendor/eigen/Eigen/src/Core/arch/AVX/MathFunctions.h +126 -337
  141. data/vendor/eigen/Eigen/src/Core/arch/AVX/PacketMath.h +1092 -155
  142. data/vendor/eigen/Eigen/src/Core/arch/AVX/TypeCasting.h +65 -1
  143. data/vendor/eigen/Eigen/src/Core/arch/AVX512/Complex.h +422 -0
  144. data/vendor/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h +207 -236
  145. data/vendor/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h +1482 -495
  146. data/vendor/eigen/Eigen/src/Core/arch/AVX512/TypeCasting.h +89 -0
  147. data/vendor/eigen/Eigen/src/Core/arch/AltiVec/Complex.h +152 -165
  148. data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MathFunctions.h +19 -251
  149. data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MatrixProduct.h +2937 -0
  150. data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h +221 -0
  151. data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h +629 -0
  152. data/vendor/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h +2042 -392
  153. data/vendor/eigen/Eigen/src/Core/arch/CUDA/Complex.h +235 -80
  154. data/vendor/eigen/Eigen/src/Core/arch/Default/BFloat16.h +700 -0
  155. data/vendor/eigen/Eigen/src/Core/arch/Default/ConjHelper.h +102 -14
  156. data/vendor/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +1649 -0
  157. data/vendor/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +110 -0
  158. data/vendor/eigen/Eigen/src/Core/arch/Default/Half.h +942 -0
  159. data/vendor/eigen/Eigen/src/Core/arch/Default/Settings.h +1 -1
  160. data/vendor/eigen/Eigen/src/Core/arch/Default/TypeCasting.h +120 -0
  161. data/vendor/eigen/Eigen/src/Core/arch/{CUDA → GPU}/MathFunctions.h +16 -4
  162. data/vendor/eigen/Eigen/src/Core/arch/GPU/PacketMath.h +1685 -0
  163. data/vendor/eigen/Eigen/src/Core/arch/GPU/TypeCasting.h +80 -0
  164. data/vendor/eigen/Eigen/src/Core/arch/HIP/hcc/math_constants.h +23 -0
  165. data/vendor/eigen/Eigen/src/Core/arch/MSA/Complex.h +648 -0
  166. data/vendor/eigen/Eigen/src/Core/arch/MSA/MathFunctions.h +387 -0
  167. data/vendor/eigen/Eigen/src/Core/arch/MSA/PacketMath.h +1233 -0
  168. data/vendor/eigen/Eigen/src/Core/arch/NEON/Complex.h +313 -219
  169. data/vendor/eigen/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h +183 -0
  170. data/vendor/eigen/Eigen/src/Core/arch/NEON/MathFunctions.h +54 -70
  171. data/vendor/eigen/Eigen/src/Core/arch/NEON/PacketMath.h +4376 -549
  172. data/vendor/eigen/Eigen/src/Core/arch/NEON/TypeCasting.h +1419 -0
  173. data/vendor/eigen/Eigen/src/Core/arch/SSE/Complex.h +59 -179
  174. data/vendor/eigen/Eigen/src/Core/arch/SSE/MathFunctions.h +65 -428
  175. data/vendor/eigen/Eigen/src/Core/arch/SSE/PacketMath.h +893 -283
  176. data/vendor/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h +65 -0
  177. data/vendor/eigen/Eigen/src/Core/arch/SVE/MathFunctions.h +44 -0
  178. data/vendor/eigen/Eigen/src/Core/arch/SVE/PacketMath.h +752 -0
  179. data/vendor/eigen/Eigen/src/Core/arch/SVE/TypeCasting.h +49 -0
  180. data/vendor/eigen/Eigen/src/Core/arch/SYCL/InteropHeaders.h +232 -0
  181. data/vendor/eigen/Eigen/src/Core/arch/SYCL/MathFunctions.h +301 -0
  182. data/vendor/eigen/Eigen/src/Core/arch/SYCL/PacketMath.h +670 -0
  183. data/vendor/eigen/Eigen/src/Core/arch/SYCL/SyclMemoryModel.h +694 -0
  184. data/vendor/eigen/Eigen/src/Core/arch/SYCL/TypeCasting.h +85 -0
  185. data/vendor/eigen/Eigen/src/Core/arch/ZVector/Complex.h +212 -183
  186. data/vendor/eigen/Eigen/src/Core/arch/ZVector/MathFunctions.h +101 -5
  187. data/vendor/eigen/Eigen/src/Core/arch/ZVector/PacketMath.h +510 -395
  188. data/vendor/eigen/Eigen/src/Core/functors/AssignmentFunctors.h +11 -2
  189. data/vendor/eigen/Eigen/src/Core/functors/BinaryFunctors.h +112 -46
  190. data/vendor/eigen/Eigen/src/Core/functors/NullaryFunctors.h +31 -30
  191. data/vendor/eigen/Eigen/src/Core/functors/StlFunctors.h +32 -2
  192. data/vendor/eigen/Eigen/src/Core/functors/UnaryFunctors.h +355 -16
  193. data/vendor/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h +1075 -586
  194. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h +49 -24
  195. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +41 -35
  196. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +6 -6
  197. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h +4 -2
  198. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixVector.h +382 -483
  199. data/vendor/eigen/Eigen/src/Core/products/Parallelizer.h +22 -5
  200. data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +53 -30
  201. data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h +16 -8
  202. data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h +8 -6
  203. data/vendor/eigen/Eigen/src/Core/products/SelfadjointProduct.h +4 -4
  204. data/vendor/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h +5 -4
  205. data/vendor/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h +33 -27
  206. data/vendor/eigen/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h +14 -12
  207. data/vendor/eigen/Eigen/src/Core/products/TriangularSolverMatrix.h +36 -34
  208. data/vendor/eigen/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h +8 -4
  209. data/vendor/eigen/Eigen/src/Core/products/TriangularSolverVector.h +13 -10
  210. data/vendor/eigen/Eigen/src/Core/util/BlasUtil.h +304 -119
  211. data/vendor/eigen/Eigen/src/Core/util/ConfigureVectorization.h +512 -0
  212. data/vendor/eigen/Eigen/src/Core/util/Constants.h +25 -9
  213. data/vendor/eigen/Eigen/src/Core/util/DisableStupidWarnings.h +26 -3
  214. data/vendor/eigen/Eigen/src/Core/util/ForwardDeclarations.h +29 -9
  215. data/vendor/eigen/Eigen/src/Core/util/IndexedViewHelper.h +186 -0
  216. data/vendor/eigen/Eigen/src/Core/util/IntegralConstant.h +272 -0
  217. data/vendor/eigen/Eigen/src/Core/util/MKL_support.h +8 -1
  218. data/vendor/eigen/Eigen/src/Core/util/Macros.h +709 -246
  219. data/vendor/eigen/Eigen/src/Core/util/Memory.h +222 -52
  220. data/vendor/eigen/Eigen/src/Core/util/Meta.h +355 -77
  221. data/vendor/eigen/Eigen/src/Core/util/ReenableStupidWarnings.h +5 -1
  222. data/vendor/eigen/Eigen/src/Core/util/ReshapedHelper.h +51 -0
  223. data/vendor/eigen/Eigen/src/Core/util/StaticAssert.h +8 -5
  224. data/vendor/eigen/Eigen/src/Core/util/SymbolicIndex.h +293 -0
  225. data/vendor/eigen/Eigen/src/Core/util/XprHelper.h +65 -30
  226. data/vendor/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +1 -1
  227. data/vendor/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +7 -4
  228. data/vendor/eigen/Eigen/src/Eigenvalues/EigenSolver.h +2 -2
  229. data/vendor/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +1 -1
  230. data/vendor/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +2 -2
  231. data/vendor/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +2 -2
  232. data/vendor/eigen/Eigen/src/Eigenvalues/RealQZ.h +9 -6
  233. data/vendor/eigen/Eigen/src/Eigenvalues/RealSchur.h +21 -9
  234. data/vendor/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +77 -43
  235. data/vendor/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +20 -15
  236. data/vendor/eigen/Eigen/src/Geometry/AlignedBox.h +99 -5
  237. data/vendor/eigen/Eigen/src/Geometry/AngleAxis.h +4 -4
  238. data/vendor/eigen/Eigen/src/Geometry/EulerAngles.h +3 -3
  239. data/vendor/eigen/Eigen/src/Geometry/Homogeneous.h +15 -11
  240. data/vendor/eigen/Eigen/src/Geometry/Hyperplane.h +1 -1
  241. data/vendor/eigen/Eigen/src/Geometry/OrthoMethods.h +3 -2
  242. data/vendor/eigen/Eigen/src/Geometry/ParametrizedLine.h +39 -2
  243. data/vendor/eigen/Eigen/src/Geometry/Quaternion.h +70 -14
  244. data/vendor/eigen/Eigen/src/Geometry/Rotation2D.h +3 -3
  245. data/vendor/eigen/Eigen/src/Geometry/Scaling.h +23 -5
  246. data/vendor/eigen/Eigen/src/Geometry/Transform.h +88 -67
  247. data/vendor/eigen/Eigen/src/Geometry/Translation.h +6 -12
  248. data/vendor/eigen/Eigen/src/Geometry/Umeyama.h +1 -1
  249. data/vendor/eigen/Eigen/src/Geometry/arch/Geometry_SIMD.h +168 -0
  250. data/vendor/eigen/Eigen/src/Householder/BlockHouseholder.h +9 -2
  251. data/vendor/eigen/Eigen/src/Householder/Householder.h +8 -4
  252. data/vendor/eigen/Eigen/src/Householder/HouseholderSequence.h +123 -48
  253. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +15 -15
  254. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +7 -23
  255. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +5 -22
  256. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +41 -47
  257. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +51 -60
  258. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +70 -20
  259. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +2 -20
  260. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +11 -9
  261. data/vendor/eigen/Eigen/src/Jacobi/Jacobi.h +31 -10
  262. data/vendor/eigen/Eigen/src/KLUSupport/KLUSupport.h +358 -0
  263. data/vendor/eigen/Eigen/src/LU/Determinant.h +35 -19
  264. data/vendor/eigen/Eigen/src/LU/FullPivLU.h +29 -43
  265. data/vendor/eigen/Eigen/src/LU/InverseImpl.h +25 -8
  266. data/vendor/eigen/Eigen/src/LU/PartialPivLU.h +71 -58
  267. data/vendor/eigen/Eigen/src/LU/arch/InverseSize4.h +351 -0
  268. data/vendor/eigen/Eigen/src/OrderingMethods/Amd.h +7 -17
  269. data/vendor/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +297 -277
  270. data/vendor/eigen/Eigen/src/OrderingMethods/Ordering.h +6 -10
  271. data/vendor/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +1 -1
  272. data/vendor/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +9 -7
  273. data/vendor/eigen/Eigen/src/QR/ColPivHouseholderQR.h +41 -20
  274. data/vendor/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +100 -27
  275. data/vendor/eigen/Eigen/src/QR/FullPivHouseholderQR.h +59 -22
  276. data/vendor/eigen/Eigen/src/QR/HouseholderQR.h +48 -23
  277. data/vendor/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +25 -3
  278. data/vendor/eigen/Eigen/src/SVD/BDCSVD.h +183 -63
  279. data/vendor/eigen/Eigen/src/SVD/JacobiSVD.h +22 -14
  280. data/vendor/eigen/Eigen/src/SVD/SVDBase.h +83 -22
  281. data/vendor/eigen/Eigen/src/SVD/UpperBidiagonalization.h +3 -3
  282. data/vendor/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +17 -9
  283. data/vendor/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +12 -37
  284. data/vendor/eigen/Eigen/src/SparseCore/AmbiVector.h +3 -2
  285. data/vendor/eigen/Eigen/src/SparseCore/CompressedStorage.h +16 -0
  286. data/vendor/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +6 -6
  287. data/vendor/eigen/Eigen/src/SparseCore/SparseAssign.h +81 -27
  288. data/vendor/eigen/Eigen/src/SparseCore/SparseBlock.h +25 -57
  289. data/vendor/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +40 -11
  290. data/vendor/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +11 -15
  291. data/vendor/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +4 -2
  292. data/vendor/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +30 -8
  293. data/vendor/eigen/Eigen/src/SparseCore/SparseMatrix.h +126 -11
  294. data/vendor/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +5 -12
  295. data/vendor/eigen/Eigen/src/SparseCore/SparseProduct.h +13 -1
  296. data/vendor/eigen/Eigen/src/SparseCore/SparseRef.h +7 -7
  297. data/vendor/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +5 -2
  298. data/vendor/eigen/Eigen/src/SparseCore/SparseUtil.h +8 -0
  299. data/vendor/eigen/Eigen/src/SparseCore/SparseVector.h +1 -1
  300. data/vendor/eigen/Eigen/src/SparseCore/SparseView.h +1 -0
  301. data/vendor/eigen/Eigen/src/SparseLU/SparseLU.h +162 -12
  302. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +1 -1
  303. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +76 -2
  304. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +2 -2
  305. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +1 -1
  306. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +1 -1
  307. data/vendor/eigen/Eigen/src/SparseQR/SparseQR.h +19 -6
  308. data/vendor/eigen/Eigen/src/StlSupport/StdDeque.h +2 -12
  309. data/vendor/eigen/Eigen/src/StlSupport/StdList.h +2 -2
  310. data/vendor/eigen/Eigen/src/StlSupport/StdVector.h +2 -2
  311. data/vendor/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +6 -8
  312. data/vendor/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +175 -39
  313. data/vendor/eigen/Eigen/src/misc/lapacke.h +5 -4
  314. data/vendor/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +28 -2
  315. data/vendor/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +155 -11
  316. data/vendor/eigen/Eigen/src/plugins/BlockMethods.h +626 -242
  317. data/vendor/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.h +14 -0
  318. data/vendor/eigen/Eigen/src/plugins/IndexedViewMethods.h +262 -0
  319. data/vendor/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +4 -4
  320. data/vendor/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +10 -0
  321. data/vendor/eigen/Eigen/src/plugins/ReshapedMethods.h +149 -0
  322. data/vendor/eigen/README.md +2 -0
  323. data/vendor/eigen/bench/btl/README +1 -1
  324. data/vendor/eigen/bench/tensors/README +6 -7
  325. data/vendor/eigen/ci/README.md +56 -0
  326. data/vendor/eigen/demos/mix_eigen_and_c/README +1 -1
  327. data/vendor/eigen/unsupported/Eigen/CXX11/src/Tensor/README.md +213 -158
  328. data/vendor/eigen/unsupported/README.txt +1 -1
  329. data/vendor/tomotopy/README.kr.rst +78 -0
  330. data/vendor/tomotopy/README.rst +75 -0
  331. data/vendor/tomotopy/src/Labeling/FoRelevance.cpp +2 -2
  332. data/vendor/tomotopy/src/Labeling/Phraser.hpp +4 -4
  333. data/vendor/tomotopy/src/TopicModel/CTModel.hpp +7 -3
  334. data/vendor/tomotopy/src/TopicModel/DMRModel.hpp +7 -3
  335. data/vendor/tomotopy/src/TopicModel/DTModel.hpp +6 -3
  336. data/vendor/tomotopy/src/TopicModel/GDMRModel.hpp +2 -2
  337. data/vendor/tomotopy/src/TopicModel/HDP.h +1 -0
  338. data/vendor/tomotopy/src/TopicModel/HDPModel.hpp +57 -6
  339. data/vendor/tomotopy/src/TopicModel/HLDAModel.hpp +6 -3
  340. data/vendor/tomotopy/src/TopicModel/HPAModel.hpp +3 -2
  341. data/vendor/tomotopy/src/TopicModel/LDA.h +3 -3
  342. data/vendor/tomotopy/src/TopicModel/LDACVB0Model.hpp +5 -5
  343. data/vendor/tomotopy/src/TopicModel/LDAModel.hpp +50 -19
  344. data/vendor/tomotopy/src/TopicModel/LLDAModel.hpp +6 -2
  345. data/vendor/tomotopy/src/TopicModel/MGLDAModel.hpp +3 -2
  346. data/vendor/tomotopy/src/TopicModel/PAModel.hpp +1 -1
  347. data/vendor/tomotopy/src/TopicModel/PLDAModel.hpp +6 -2
  348. data/vendor/tomotopy/src/TopicModel/PT.h +3 -1
  349. data/vendor/tomotopy/src/TopicModel/PTModel.hpp +36 -3
  350. data/vendor/tomotopy/src/TopicModel/SLDAModel.hpp +6 -3
  351. data/vendor/tomotopy/src/TopicModel/TopicModel.hpp +55 -26
  352. data/vendor/tomotopy/src/Utils/AliasMethod.hpp +5 -4
  353. data/vendor/tomotopy/src/Utils/Dictionary.h +2 -2
  354. data/vendor/tomotopy/src/Utils/EigenAddonOps.hpp +36 -1
  355. data/vendor/tomotopy/src/Utils/MultiNormalDistribution.hpp +1 -1
  356. data/vendor/tomotopy/src/Utils/TruncMultiNormal.hpp +1 -1
  357. data/vendor/tomotopy/src/Utils/exception.h +6 -0
  358. data/vendor/tomotopy/src/Utils/math.h +2 -2
  359. data/vendor/tomotopy/src/Utils/sample.hpp +14 -12
  360. data/vendor/tomotopy/src/Utils/serializer.hpp +30 -5
  361. data/vendor/tomotopy/src/Utils/sse_gamma.h +0 -3
  362. metadata +64 -18
  363. data/vendor/eigen/Eigen/CMakeLists.txt +0 -19
  364. data/vendor/eigen/Eigen/src/Core/arch/CUDA/Half.h +0 -674
  365. data/vendor/eigen/Eigen/src/Core/arch/CUDA/PacketMath.h +0 -333
  366. data/vendor/eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +0 -1124
  367. data/vendor/eigen/Eigen/src/Core/arch/CUDA/TypeCasting.h +0 -212
  368. data/vendor/eigen/Eigen/src/Geometry/arch/Geometry_SSE.h +0 -161
  369. data/vendor/eigen/Eigen/src/LU/arch/Inverse_SSE.h +0 -338
@@ -0,0 +1,126 @@
1
+ /**
2
+ * @file RandUtils.h
3
+ * @author bab2min (bab2min@gmail.com)
4
+ * @brief
5
+ * @version 0.4.0
6
+ * @date 2021-09-17
7
+ *
8
+ * @copyright Copyright (c) 2020-2021
9
+ *
10
+ */
11
+
12
+ #ifndef EIGENRAND_RAND_UTILS_NEON_H
13
+ #define EIGENRAND_RAND_UTILS_NEON_H
14
+
15
+ #include <arm_neon.h>
16
+
17
+ namespace Eigen
18
+ {
19
+ namespace internal
20
+ {
21
+ template<typename Rng, typename RngResult>
22
+ struct RawbitsMaker<Packet4i, Rng, RngResult, Rand::RandomEngineType::scalar>
23
+ {
24
+ EIGEN_STRONG_INLINE Packet4i rawbits(Rng& rng)
25
+ {
26
+ if (sizeof(RngResult) == 8)
27
+ {
28
+ uint64_t v[2];
29
+ v[0] = rng();
30
+ v[1] = rng();
31
+ return vld1q_s32((int32_t*)v);
32
+ }
33
+ else
34
+ {
35
+ uint32_t v[4];
36
+ v[0] = rng();
37
+ v[1] = rng();
38
+ v[2] = rng();
39
+ v[3] = rng();
40
+ return vld1q_s32((int32_t*)v);
41
+ }
42
+ }
43
+
44
+ EIGEN_STRONG_INLINE Packet4i rawbits_34(Rng& rng)
45
+ {
46
+ if (sizeof(RngResult) == 8)
47
+ {
48
+ uint64_t v[2];
49
+ v[0] = rng();
50
+ v[1] = rng();
51
+ return vld1q_s32((int32_t*)v);
52
+ }
53
+ else
54
+ {
55
+ uint32_t v[4];
56
+ v[0] = rng();
57
+ v[1] = rng();
58
+ v[2] = rng();
59
+ v[3] = rng();
60
+ return vld1q_s32((int32_t*)v);
61
+ }
62
+ }
63
+
64
+ EIGEN_STRONG_INLINE Packet4i rawbits_half(Rng& rng)
65
+ {
66
+ if (sizeof(decltype(rng())) == 8)
67
+ {
68
+ uint64_t v[2];
69
+ v[0] = rng();
70
+ v[1] = 0;
71
+ return vld1q_s32((int32_t*)v);
72
+ }
73
+ else
74
+ {
75
+ uint32_t v[4];
76
+ v[0] = rng();
77
+ v[1] = rng();
78
+ v[2] = 0;
79
+ v[3] = 0;
80
+ return vld1q_s32((int32_t*)v);
81
+ }
82
+ }
83
+ };
84
+
85
+ template<typename Rng>
86
+ struct RawbitsMaker<Packet4i, Rng, Packet4i, Rand::RandomEngineType::packet>
87
+ {
88
+ EIGEN_STRONG_INLINE Packet4i rawbits(Rng& rng)
89
+ {
90
+ return rng();
91
+ }
92
+
93
+ EIGEN_STRONG_INLINE Packet4i rawbits_34(Rng& rng)
94
+ {
95
+ return rng();
96
+ }
97
+
98
+ EIGEN_STRONG_INLINE Packet4i rawbits_half(Rng& rng)
99
+ {
100
+ return rng();
101
+ }
102
+ };
103
+
104
+ template<typename Rng>
105
+ struct UniformRealUtils<Packet4f, Rng> : public RawbitsMaker<Packet4i, Rng>
106
+ {
107
+ EIGEN_STRONG_INLINE Packet4f zero_to_one(Rng& rng)
108
+ {
109
+ return pdiv((Packet4f)vcvtq_f32_s32(pand(this->rawbits(rng), pset1<Packet4i>(0x7FFFFFFF))),
110
+ pset1<Packet4f>(0x7FFFFFFF));
111
+ }
112
+
113
+ EIGEN_STRONG_INLINE Packet4f uniform_real(Rng& rng)
114
+ {
115
+ return bit_to_ur_float(this->rawbits_34(rng));
116
+ }
117
+ };
118
+
119
+ template<typename Gen, typename Urng, bool _mutable>
120
+ struct functor_traits<scalar_rng_adaptor<Gen, double, Urng, _mutable> >
121
+ {
122
+ enum { Cost = HugeCost, PacketAccess = 0, IsRepeatable = false };
123
+ };
124
+ }
125
+ }
126
+ #endif
@@ -0,0 +1,501 @@
1
+ /**
2
+ * @file MorePacketMath.h
3
+ * @author bab2min (bab2min@gmail.com)
4
+ * @brief
5
+ * @version 0.3.5
6
+ * @date 2021-07-16
7
+ *
8
+ * @copyright Copyright (c) 2020-2021
9
+ *
10
+ */
11
+
12
+ #ifndef EIGENRAND_MORE_PACKET_MATH_SSE_H
13
+ #define EIGENRAND_MORE_PACKET_MATH_SSE_H
14
+
15
+ #include <xmmintrin.h>
16
+
17
+ namespace Eigen
18
+ {
19
+ namespace internal
20
+ {
21
+ template<>
22
+ struct IsIntPacket<Packet4i> : std::true_type {};
23
+
24
+ template<>
25
+ struct IsFloatPacket<Packet4f> : std::true_type {};
26
+
27
+ template<>
28
+ struct IsDoublePacket<Packet2d> : std::true_type {};
29
+
30
+ template<>
31
+ struct HalfPacket<Packet4i>
32
+ {
33
+ using type = uint64_t;
34
+ };
35
+
36
+ #ifdef EIGEN_VECTORIZE_AVX
37
+ #else
38
+ template<>
39
+ struct HalfPacket<Packet4f>
40
+ {
41
+ //using type = Packet2f;
42
+ };
43
+ #endif
44
+ template<>
45
+ struct reinterpreter<Packet4i>
46
+ {
47
+ EIGEN_STRONG_INLINE Packet4f to_float(const Packet4i& x)
48
+ {
49
+ return _mm_castsi128_ps(x);
50
+ }
51
+
52
+ EIGEN_STRONG_INLINE Packet2d to_double(const Packet4i& x)
53
+ {
54
+ return _mm_castsi128_pd(x);
55
+ }
56
+
57
+ EIGEN_STRONG_INLINE Packet4i to_int(const Packet4i& x)
58
+ {
59
+ return x;
60
+ }
61
+ };
62
+
63
+ template<>
64
+ struct reinterpreter<Packet4f>
65
+ {
66
+ EIGEN_STRONG_INLINE Packet4f to_float(const Packet4f& x)
67
+ {
68
+ return x;
69
+ }
70
+
71
+ EIGEN_STRONG_INLINE Packet2d to_double(const Packet4f& x)
72
+ {
73
+ return _mm_castps_pd(x);
74
+ }
75
+
76
+ EIGEN_STRONG_INLINE Packet4i to_int(const Packet4f& x)
77
+ {
78
+ return _mm_castps_si128(x);
79
+ }
80
+ };
81
+
82
+ template<>
83
+ struct reinterpreter<Packet2d>
84
+ {
85
+ EIGEN_STRONG_INLINE Packet4f to_float(const Packet2d& x)
86
+ {
87
+ return _mm_castpd_ps(x);
88
+ }
89
+
90
+ EIGEN_STRONG_INLINE Packet2d to_double(const Packet2d& x)
91
+ {
92
+ return x;
93
+ }
94
+
95
+ EIGEN_STRONG_INLINE Packet4i to_int(const Packet2d& x)
96
+ {
97
+ return _mm_castpd_si128(x);
98
+ }
99
+ };
100
+
101
+ template<>
102
+ EIGEN_STRONG_INLINE void split_two<Packet4i>(const Packet4i& x, uint64_t& a, uint64_t& b)
103
+ {
104
+ #ifdef EIGEN_VECTORIZE_SSE4_1
105
+ a = _mm_extract_epi64(x, 0);
106
+ b = _mm_extract_epi64(x, 1);
107
+ #else
108
+ uint64_t u[2];
109
+ _mm_storeu_si128((__m128i*)u, x);
110
+ a = u[0];
111
+ b = u[1];
112
+ #endif
113
+ }
114
+
115
+ EIGEN_STRONG_INLINE Packet4i combine_low32(const Packet4i& a, const Packet4i& b)
116
+ {
117
+ auto sa = _mm_shuffle_epi32(a, _MM_SHUFFLE(3, 1, 2, 0));
118
+ auto sb = _mm_shuffle_epi32(b, _MM_SHUFFLE(2, 0, 3, 1));
119
+ sa = _mm_and_si128(sa, _mm_setr_epi32(-1, -1, 0, 0));
120
+ sb = _mm_and_si128(sb, _mm_setr_epi32(0, 0, -1, -1));
121
+ return _mm_or_si128(sa, sb);
122
+ }
123
+
124
+ template<>
125
+ EIGEN_STRONG_INLINE Packet4i pseti64<Packet4i>(uint64_t a)
126
+ {
127
+ return _mm_set1_epi64x(a);
128
+ }
129
+
130
+ template<>
131
+ EIGEN_STRONG_INLINE Packet4i padd64<Packet4i>(const Packet4i& a, const Packet4i& b)
132
+ {
133
+ return _mm_add_epi64(a, b);
134
+ }
135
+
136
+ template<>
137
+ EIGEN_STRONG_INLINE Packet4i psub64<Packet4i>(const Packet4i& a, const Packet4i& b)
138
+ {
139
+ return _mm_sub_epi64(a, b);
140
+ }
141
+
142
+ template<>
143
+ EIGEN_STRONG_INLINE Packet4i pcmpeq<Packet4i>(const Packet4i& a, const Packet4i& b)
144
+ {
145
+ return _mm_cmpeq_epi32(a, b);
146
+ }
147
+
148
+ template<>
149
+ struct BitShifter<Packet4i>
150
+ {
151
+ template<int b>
152
+ EIGEN_STRONG_INLINE Packet4i sll(const Packet4i& a)
153
+ {
154
+ return _mm_slli_epi32(a, b);
155
+ }
156
+
157
+ template<int b>
158
+ EIGEN_STRONG_INLINE Packet4i srl(const Packet4i& a, int _b = b)
159
+ {
160
+ if (b >= 0)
161
+ {
162
+ return _mm_srli_epi32(a, b);
163
+ }
164
+ else
165
+ {
166
+ return _mm_srli_epi32(a, _b);
167
+ }
168
+ }
169
+
170
+ template<int b>
171
+ EIGEN_STRONG_INLINE Packet4i sll64(const Packet4i& a)
172
+ {
173
+ return _mm_slli_epi64(a, b);
174
+ }
175
+
176
+ template<int b>
177
+ EIGEN_STRONG_INLINE Packet4i srl64(const Packet4i& a)
178
+ {
179
+ return _mm_srli_epi64(a, b);
180
+ }
181
+ };
182
+
183
+ template<>
184
+ EIGEN_STRONG_INLINE Packet4i pcmplt<Packet4i>(const Packet4i& a, const Packet4i& b)
185
+ {
186
+ return _mm_cmplt_epi32(a, b);
187
+ }
188
+
189
+ template<>
190
+ EIGEN_STRONG_INLINE Packet4i pcmplt64<Packet4i>(const Packet4i& a, const Packet4i& b)
191
+ {
192
+ #ifdef EIGEN_VECTORIZE_SSE4_2
193
+ return _mm_cmpgt_epi64(b, a);
194
+ #else
195
+ int64_t u[2], v[2];
196
+ _mm_storeu_si128((__m128i*)u, a);
197
+ _mm_storeu_si128((__m128i*)v, b);
198
+ return _mm_set_epi64x(u[1] < v[1] ? -1 : 0, u[0] < v[0] ? -1 : 0);
199
+ #endif
200
+ }
201
+
202
+ template<>
203
+ EIGEN_STRONG_INLINE Packet4f pcmplt<Packet4f>(const Packet4f& a, const Packet4f& b)
204
+ {
205
+ return _mm_cmplt_ps(a, b);
206
+ }
207
+
208
+ template<>
209
+ EIGEN_STRONG_INLINE Packet4f pcmple<Packet4f>(const Packet4f& a, const Packet4f& b)
210
+ {
211
+ return _mm_cmple_ps(a, b);
212
+ }
213
+
214
+ template<>
215
+ EIGEN_STRONG_INLINE Packet2d pcmplt<Packet2d>(const Packet2d& a, const Packet2d& b)
216
+ {
217
+ return _mm_cmplt_pd(a, b);
218
+ }
219
+
220
+ template<>
221
+ EIGEN_STRONG_INLINE Packet2d pcmple<Packet2d>(const Packet2d& a, const Packet2d& b)
222
+ {
223
+ return _mm_cmple_pd(a, b);
224
+ }
225
+
226
+ template<>
227
+ EIGEN_STRONG_INLINE Packet4f pblendv(const Packet4f& ifPacket, const Packet4f& thenPacket, const Packet4f& elsePacket)
228
+ {
229
+ #ifdef EIGEN_VECTORIZE_SSE4_1
230
+ return _mm_blendv_ps(elsePacket, thenPacket, ifPacket);
231
+ #else
232
+ return _mm_or_ps(_mm_and_ps(ifPacket, thenPacket), _mm_andnot_ps(ifPacket, elsePacket));
233
+ #endif
234
+ }
235
+
236
+ template<>
237
+ EIGEN_STRONG_INLINE Packet4f pblendv(const Packet4i& ifPacket, const Packet4f& thenPacket, const Packet4f& elsePacket)
238
+ {
239
+ return pblendv(_mm_castsi128_ps(ifPacket), thenPacket, elsePacket);
240
+ }
241
+
242
+ template<>
243
+ EIGEN_STRONG_INLINE Packet4i pblendv(const Packet4i& ifPacket, const Packet4i& thenPacket, const Packet4i& elsePacket)
244
+ {
245
+ #ifdef EIGEN_VECTORIZE_SSE4_1
246
+ return _mm_castps_si128(_mm_blendv_ps(_mm_castsi128_ps(elsePacket), _mm_castsi128_ps(thenPacket), _mm_castsi128_ps(ifPacket)));
247
+ #else
248
+ return _mm_or_si128(_mm_and_si128(ifPacket, thenPacket), _mm_andnot_si128(ifPacket, elsePacket));
249
+ #endif
250
+ }
251
+
252
+ template<>
253
+ EIGEN_STRONG_INLINE Packet2d pblendv(const Packet2d& ifPacket, const Packet2d& thenPacket, const Packet2d& elsePacket)
254
+ {
255
+ #ifdef EIGEN_VECTORIZE_SSE4_1
256
+ return _mm_blendv_pd(elsePacket, thenPacket, ifPacket);
257
+ #else
258
+ return _mm_or_pd(_mm_and_pd(ifPacket, thenPacket), _mm_andnot_pd(ifPacket, elsePacket));
259
+ #endif
260
+ }
261
+
262
+
263
+ template<>
264
+ EIGEN_STRONG_INLINE Packet2d pblendv(const Packet4i& ifPacket, const Packet2d& thenPacket, const Packet2d& elsePacket)
265
+ {
266
+ return pblendv(_mm_castsi128_pd(ifPacket), thenPacket, elsePacket);
267
+ }
268
+
269
+ template<>
270
+ EIGEN_STRONG_INLINE Packet4i pgather<Packet4i>(const int* addr, const Packet4i& index)
271
+ {
272
+ #ifdef EIGEN_VECTORIZE_AVX2
273
+ return _mm_i32gather_epi32(addr, index, 4);
274
+ #else
275
+ uint32_t u[4];
276
+ _mm_storeu_si128((__m128i*)u, index);
277
+ return _mm_setr_epi32(addr[u[0]], addr[u[1]], addr[u[2]], addr[u[3]]);
278
+ #endif
279
+ }
280
+
281
+ template<>
282
+ EIGEN_STRONG_INLINE Packet4f pgather<Packet4i>(const float* addr, const Packet4i& index)
283
+ {
284
+ #ifdef EIGEN_VECTORIZE_AVX2
285
+ return _mm_i32gather_ps(addr, index, 4);
286
+ #else
287
+ uint32_t u[4];
288
+ _mm_storeu_si128((__m128i*)u, index);
289
+ return _mm_setr_ps(addr[u[0]], addr[u[1]], addr[u[2]], addr[u[3]]);
290
+ #endif
291
+ }
292
+
293
+ template<>
294
+ EIGEN_STRONG_INLINE Packet2d pgather<Packet4i>(const double* addr, const Packet4i& index, bool upperhalf)
295
+ {
296
+ #ifdef EIGEN_VECTORIZE_AVX2
297
+ return _mm_i32gather_pd(addr, index, 8);
298
+ #else
299
+ uint32_t u[4];
300
+ _mm_storeu_si128((__m128i*)u, index);
301
+ if (upperhalf)
302
+ {
303
+ return _mm_setr_pd(addr[u[2]], addr[u[3]]);
304
+ }
305
+ else
306
+ {
307
+ return _mm_setr_pd(addr[u[0]], addr[u[1]]);
308
+ }
309
+ #endif
310
+ }
311
+
312
+ template<>
313
+ EIGEN_STRONG_INLINE int pmovemask<Packet4f>(const Packet4f& a)
314
+ {
315
+ return _mm_movemask_ps(a);
316
+ }
317
+
318
+ template<>
319
+ EIGEN_STRONG_INLINE int pmovemask<Packet2d>(const Packet2d& a)
320
+ {
321
+ return _mm_movemask_pd(a);
322
+ }
323
+
324
+ template<>
325
+ EIGEN_STRONG_INLINE int pmovemask<Packet4i>(const Packet4i& a)
326
+ {
327
+ return pmovemask((Packet4f)_mm_castsi128_ps(a));
328
+ }
329
+
330
+ template<>
331
+ EIGEN_STRONG_INLINE Packet4f ptruncate<Packet4f>(const Packet4f& a)
332
+ {
333
+ #ifdef EIGEN_VECTORIZE_SSE4_1
334
+ return _mm_round_ps(a, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
335
+ #else
336
+ auto round = _MM_GET_ROUNDING_MODE();
337
+ _MM_SET_ROUNDING_MODE(_MM_ROUND_TOWARD_ZERO);
338
+ auto ret = _mm_cvtepi32_ps(_mm_cvtps_epi32(a));
339
+ _MM_SET_ROUNDING_MODE(round);
340
+ return ret;
341
+ #endif
342
+ }
343
+
344
+ template<>
345
+ EIGEN_STRONG_INLINE Packet2d ptruncate<Packet2d>(const Packet2d& a)
346
+ {
347
+ #ifdef EIGEN_VECTORIZE_SSE4_1
348
+ return _mm_round_pd(a, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
349
+ #else
350
+ auto round = _MM_GET_ROUNDING_MODE();
351
+ _MM_SET_ROUNDING_MODE(_MM_ROUND_TOWARD_ZERO);
352
+ auto ret = _mm_cvtepi32_pd(_mm_cvtpd_epi32(a));
353
+ _MM_SET_ROUNDING_MODE(round);
354
+ return ret;
355
+ #endif
356
+ }
357
+
358
+ template<>
359
+ EIGEN_STRONG_INLINE Packet4i pcmpeq64<Packet4i>(const Packet4i& a, const Packet4i& b)
360
+ {
361
+ #ifdef EIGEN_VECTORIZE_SSE4_1
362
+ return _mm_cmpeq_epi64(a, b);
363
+ #else
364
+ Packet4i c = _mm_cmpeq_epi32(a, b);
365
+ return pand(c, (Packet4i)_mm_shuffle_epi32(c, _MM_SHUFFLE(2, 3, 0, 1)));
366
+ #endif
367
+ }
368
+
369
+ template<>
370
+ EIGEN_STRONG_INLINE Packet4i pmuluadd64<Packet4i>(const Packet4i& a, uint64_t b, uint64_t c)
371
+ {
372
+ uint64_t u[2];
373
+ _mm_storeu_si128((__m128i*)u, a);
374
+ u[0] = u[0] * b + c;
375
+ u[1] = u[1] * b + c;
376
+ return _mm_loadu_si128((__m128i*)u);
377
+ }
378
+
379
+ EIGEN_STRONG_INLINE __m128d uint64_to_double(__m128i x) {
380
+ x = _mm_or_si128(x, _mm_castpd_si128(_mm_set1_pd(0x0010000000000000)));
381
+ return _mm_sub_pd(_mm_castsi128_pd(x), _mm_set1_pd(0x0010000000000000));
382
+ }
383
+
384
+ EIGEN_STRONG_INLINE __m128d int64_to_double(__m128i x) {
385
+ x = _mm_add_epi64(x, _mm_castpd_si128(_mm_set1_pd(0x0018000000000000)));
386
+ return _mm_sub_pd(_mm_castsi128_pd(x), _mm_set1_pd(0x0018000000000000));
387
+ }
388
+
389
+ EIGEN_STRONG_INLINE __m128i double_to_int64(__m128d x) {
390
+ int _mm_rounding = _MM_GET_ROUNDING_MODE();
391
+ _MM_SET_ROUNDING_MODE(_MM_ROUND_DOWN);
392
+ x = _mm_add_pd(x, _mm_set1_pd(0x0018000000000000));
393
+ _MM_SET_ROUNDING_MODE(_mm_rounding);
394
+ return _mm_sub_epi64(
395
+ _mm_castpd_si128(x),
396
+ _mm_castpd_si128(_mm_set1_pd(0x0018000000000000))
397
+ );
398
+ }
399
+
400
+ template<>
401
+ EIGEN_STRONG_INLINE Packet4i pcast64<Packet2d, Packet4i>(const Packet2d& a)
402
+ {
403
+ return double_to_int64(a);
404
+ }
405
+
406
+ template<>
407
+ EIGEN_STRONG_INLINE Packet2d pcast64<Packet4i, Packet2d>(const Packet4i& a)
408
+ {
409
+ return int64_to_double(a);
410
+ }
411
+
412
+ template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
413
+ Packet2d psin<Packet2d>(const Packet2d& x)
414
+ {
415
+ return _psin(x);
416
+ }
417
+ #ifdef EIGENRAND_EIGEN_33_MODE
418
+ template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
419
+ Packet2d plog<Packet2d>(const Packet2d& _x)
420
+ {
421
+ Packet2d x = _x;
422
+ _EIGEN_DECLARE_CONST_Packet2d(1, 1.0f);
423
+ _EIGEN_DECLARE_CONST_Packet2d(half, 0.5f);
424
+ _EIGEN_DECLARE_CONST_Packet4i(0x7f, 0x7f);
425
+
426
+ auto inv_mant_mask = _mm_castsi128_pd(pseti64<Packet4i>(~0x7ff0000000000000));
427
+ auto min_norm_pos = _mm_castsi128_pd(pseti64<Packet4i>(0x10000000000000));
428
+ auto minus_inf = _mm_castsi128_pd(pseti64<Packet4i>(0xfff0000000000000));
429
+
430
+ /* natural logarithm computed for 4 simultaneous float
431
+ return NaN for x <= 0
432
+ */
433
+ _EIGEN_DECLARE_CONST_Packet2d(cephes_SQRTHF, 0.707106781186547524);
434
+ _EIGEN_DECLARE_CONST_Packet2d(cephes_log_p0, 7.0376836292E-2);
435
+ _EIGEN_DECLARE_CONST_Packet2d(cephes_log_p1, -1.1514610310E-1);
436
+ _EIGEN_DECLARE_CONST_Packet2d(cephes_log_p2, 1.1676998740E-1);
437
+ _EIGEN_DECLARE_CONST_Packet2d(cephes_log_p3, -1.2420140846E-1);
438
+ _EIGEN_DECLARE_CONST_Packet2d(cephes_log_p4, +1.4249322787E-1);
439
+ _EIGEN_DECLARE_CONST_Packet2d(cephes_log_p5, -1.6668057665E-1);
440
+ _EIGEN_DECLARE_CONST_Packet2d(cephes_log_p6, +2.0000714765E-1);
441
+ _EIGEN_DECLARE_CONST_Packet2d(cephes_log_p7, -2.4999993993E-1);
442
+ _EIGEN_DECLARE_CONST_Packet2d(cephes_log_p8, +3.3333331174E-1);
443
+ _EIGEN_DECLARE_CONST_Packet2d(cephes_log_q1, -2.12194440e-4);
444
+ _EIGEN_DECLARE_CONST_Packet2d(cephes_log_q2, 0.693359375);
445
+
446
+
447
+ Packet4i emm0;
448
+
449
+ Packet2d invalid_mask = _mm_cmpnge_pd(x, _mm_setzero_pd()); // not greater equal is true if x is NaN
450
+ Packet2d iszero_mask = _mm_cmpeq_pd(x, _mm_setzero_pd());
451
+
452
+ x = pmax(x, min_norm_pos); /* cut off denormalized stuff */
453
+ emm0 = _mm_srli_epi64(_mm_castpd_si128(x), 52);
454
+
455
+ /* keep only the fractional part */
456
+ x = _mm_and_pd(x, inv_mant_mask);
457
+ x = _mm_or_pd(x, p2d_half);
458
+
459
+ Packet2d e = _mm_sub_pd(uint64_to_double(emm0), pset1<Packet2d>(1022));
460
+
461
+ /* part2:
462
+ if( x < SQRTHF ) {
463
+ e -= 1;
464
+ x = x + x - 1.0;
465
+ } else { x = x - 1.0; }
466
+ */
467
+ Packet2d mask = _mm_cmplt_pd(x, p2d_cephes_SQRTHF);
468
+ Packet2d tmp = pand(x, mask);
469
+ x = psub(x, p2d_1);
470
+ e = psub(e, pand(p2d_1, mask));
471
+ x = padd(x, tmp);
472
+
473
+ Packet2d x2 = pmul(x, x);
474
+ Packet2d x3 = pmul(x2, x);
475
+
476
+ Packet2d y, y1, y2;
477
+ y = pmadd(p2d_cephes_log_p0, x, p2d_cephes_log_p1);
478
+ y1 = pmadd(p2d_cephes_log_p3, x, p2d_cephes_log_p4);
479
+ y2 = pmadd(p2d_cephes_log_p6, x, p2d_cephes_log_p7);
480
+ y = pmadd(y, x, p2d_cephes_log_p2);
481
+ y1 = pmadd(y1, x, p2d_cephes_log_p5);
482
+ y2 = pmadd(y2, x, p2d_cephes_log_p8);
483
+ y = pmadd(y, x3, y1);
484
+ y = pmadd(y, x3, y2);
485
+ y = pmul(y, x3);
486
+
487
+ y1 = pmul(e, p2d_cephes_log_q1);
488
+ tmp = pmul(x2, p2d_half);
489
+ y = padd(y, y1);
490
+ x = psub(x, tmp);
491
+ y2 = pmul(e, p2d_cephes_log_q2);
492
+ x = padd(x, y);
493
+ x = padd(x, y2);
494
+ // negative arg will be NAN, 0 will be -INF
495
+ return pblendv(iszero_mask, minus_inf, _mm_or_pd(x, invalid_mask));
496
+ }
497
+ #endif
498
+ }
499
+ }
500
+
501
+ #endif