tomoto 0.2.2 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (369) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +10 -0
  3. data/README.md +8 -10
  4. data/ext/tomoto/ct.cpp +11 -11
  5. data/ext/tomoto/dmr.cpp +14 -13
  6. data/ext/tomoto/dt.cpp +14 -14
  7. data/ext/tomoto/extconf.rb +7 -5
  8. data/ext/tomoto/gdmr.cpp +7 -7
  9. data/ext/tomoto/hdp.cpp +9 -9
  10. data/ext/tomoto/hlda.cpp +13 -13
  11. data/ext/tomoto/hpa.cpp +5 -5
  12. data/ext/tomoto/lda.cpp +42 -39
  13. data/ext/tomoto/llda.cpp +6 -6
  14. data/ext/tomoto/mglda.cpp +15 -15
  15. data/ext/tomoto/pa.cpp +6 -6
  16. data/ext/tomoto/plda.cpp +6 -6
  17. data/ext/tomoto/slda.cpp +8 -8
  18. data/ext/tomoto/{ext.cpp → tomoto.cpp} +8 -8
  19. data/ext/tomoto/utils.h +16 -70
  20. data/lib/tomoto/version.rb +1 -1
  21. data/lib/tomoto.rb +5 -1
  22. data/vendor/EigenRand/EigenRand/Core.h +10 -10
  23. data/vendor/EigenRand/EigenRand/Dists/Basic.h +208 -9
  24. data/vendor/EigenRand/EigenRand/Dists/Discrete.h +52 -31
  25. data/vendor/EigenRand/EigenRand/Dists/GammaPoisson.h +9 -8
  26. data/vendor/EigenRand/EigenRand/Dists/NormalExp.h +28 -21
  27. data/vendor/EigenRand/EigenRand/EigenRand +11 -6
  28. data/vendor/EigenRand/EigenRand/Macro.h +13 -7
  29. data/vendor/EigenRand/EigenRand/MorePacketMath.h +348 -740
  30. data/vendor/EigenRand/EigenRand/MvDists/Multinomial.h +5 -3
  31. data/vendor/EigenRand/EigenRand/MvDists/MvNormal.h +9 -3
  32. data/vendor/EigenRand/EigenRand/PacketFilter.h +11 -253
  33. data/vendor/EigenRand/EigenRand/PacketRandomEngine.h +21 -47
  34. data/vendor/EigenRand/EigenRand/RandUtils.h +50 -344
  35. data/vendor/EigenRand/EigenRand/arch/AVX/MorePacketMath.h +619 -0
  36. data/vendor/EigenRand/EigenRand/arch/AVX/PacketFilter.h +149 -0
  37. data/vendor/EigenRand/EigenRand/arch/AVX/RandUtils.h +228 -0
  38. data/vendor/EigenRand/EigenRand/arch/NEON/MorePacketMath.h +473 -0
  39. data/vendor/EigenRand/EigenRand/arch/NEON/PacketFilter.h +142 -0
  40. data/vendor/EigenRand/EigenRand/arch/NEON/RandUtils.h +126 -0
  41. data/vendor/EigenRand/EigenRand/arch/SSE/MorePacketMath.h +501 -0
  42. data/vendor/EigenRand/EigenRand/arch/SSE/PacketFilter.h +133 -0
  43. data/vendor/EigenRand/EigenRand/arch/SSE/RandUtils.h +120 -0
  44. data/vendor/EigenRand/EigenRand/doc.h +24 -12
  45. data/vendor/EigenRand/README.md +57 -4
  46. data/vendor/eigen/COPYING.APACHE +203 -0
  47. data/vendor/eigen/COPYING.BSD +1 -1
  48. data/vendor/eigen/COPYING.MINPACK +51 -52
  49. data/vendor/eigen/Eigen/Cholesky +0 -1
  50. data/vendor/eigen/Eigen/Core +112 -265
  51. data/vendor/eigen/Eigen/Eigenvalues +2 -3
  52. data/vendor/eigen/Eigen/Geometry +5 -8
  53. data/vendor/eigen/Eigen/Householder +0 -1
  54. data/vendor/eigen/Eigen/Jacobi +0 -1
  55. data/vendor/eigen/Eigen/KLUSupport +41 -0
  56. data/vendor/eigen/Eigen/LU +2 -5
  57. data/vendor/eigen/Eigen/OrderingMethods +0 -3
  58. data/vendor/eigen/Eigen/PaStiXSupport +1 -0
  59. data/vendor/eigen/Eigen/PardisoSupport +0 -0
  60. data/vendor/eigen/Eigen/QR +2 -3
  61. data/vendor/eigen/Eigen/QtAlignedMalloc +0 -1
  62. data/vendor/eigen/Eigen/SVD +0 -1
  63. data/vendor/eigen/Eigen/Sparse +0 -2
  64. data/vendor/eigen/Eigen/SparseCholesky +0 -8
  65. data/vendor/eigen/Eigen/SparseLU +4 -0
  66. data/vendor/eigen/Eigen/SparseQR +0 -1
  67. data/vendor/eigen/Eigen/src/Cholesky/LDLT.h +42 -27
  68. data/vendor/eigen/Eigen/src/Cholesky/LLT.h +39 -23
  69. data/vendor/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +90 -47
  70. data/vendor/eigen/Eigen/src/Core/ArithmeticSequence.h +413 -0
  71. data/vendor/eigen/Eigen/src/Core/Array.h +99 -11
  72. data/vendor/eigen/Eigen/src/Core/ArrayBase.h +3 -3
  73. data/vendor/eigen/Eigen/src/Core/ArrayWrapper.h +21 -21
  74. data/vendor/eigen/Eigen/src/Core/Assign.h +1 -1
  75. data/vendor/eigen/Eigen/src/Core/AssignEvaluator.h +125 -50
  76. data/vendor/eigen/Eigen/src/Core/Assign_MKL.h +10 -10
  77. data/vendor/eigen/Eigen/src/Core/BandMatrix.h +16 -16
  78. data/vendor/eigen/Eigen/src/Core/Block.h +56 -60
  79. data/vendor/eigen/Eigen/src/Core/BooleanRedux.h +29 -31
  80. data/vendor/eigen/Eigen/src/Core/CommaInitializer.h +7 -3
  81. data/vendor/eigen/Eigen/src/Core/CoreEvaluators.h +325 -272
  82. data/vendor/eigen/Eigen/src/Core/CoreIterators.h +5 -0
  83. data/vendor/eigen/Eigen/src/Core/CwiseBinaryOp.h +21 -22
  84. data/vendor/eigen/Eigen/src/Core/CwiseNullaryOp.h +153 -18
  85. data/vendor/eigen/Eigen/src/Core/CwiseUnaryOp.h +6 -6
  86. data/vendor/eigen/Eigen/src/Core/CwiseUnaryView.h +14 -10
  87. data/vendor/eigen/Eigen/src/Core/DenseBase.h +132 -42
  88. data/vendor/eigen/Eigen/src/Core/DenseCoeffsBase.h +25 -21
  89. data/vendor/eigen/Eigen/src/Core/DenseStorage.h +153 -71
  90. data/vendor/eigen/Eigen/src/Core/Diagonal.h +21 -23
  91. data/vendor/eigen/Eigen/src/Core/DiagonalMatrix.h +50 -2
  92. data/vendor/eigen/Eigen/src/Core/DiagonalProduct.h +1 -1
  93. data/vendor/eigen/Eigen/src/Core/Dot.h +10 -10
  94. data/vendor/eigen/Eigen/src/Core/EigenBase.h +10 -9
  95. data/vendor/eigen/Eigen/src/Core/ForceAlignedAccess.h +8 -4
  96. data/vendor/eigen/Eigen/src/Core/Fuzzy.h +3 -3
  97. data/vendor/eigen/Eigen/src/Core/GeneralProduct.h +20 -10
  98. data/vendor/eigen/Eigen/src/Core/GenericPacketMath.h +599 -152
  99. data/vendor/eigen/Eigen/src/Core/GlobalFunctions.h +40 -33
  100. data/vendor/eigen/Eigen/src/Core/IO.h +40 -7
  101. data/vendor/eigen/Eigen/src/Core/IndexedView.h +237 -0
  102. data/vendor/eigen/Eigen/src/Core/Inverse.h +9 -10
  103. data/vendor/eigen/Eigen/src/Core/Map.h +7 -7
  104. data/vendor/eigen/Eigen/src/Core/MapBase.h +10 -3
  105. data/vendor/eigen/Eigen/src/Core/MathFunctions.h +767 -125
  106. data/vendor/eigen/Eigen/src/Core/MathFunctionsImpl.h +118 -19
  107. data/vendor/eigen/Eigen/src/Core/Matrix.h +131 -25
  108. data/vendor/eigen/Eigen/src/Core/MatrixBase.h +21 -3
  109. data/vendor/eigen/Eigen/src/Core/NestByValue.h +25 -50
  110. data/vendor/eigen/Eigen/src/Core/NoAlias.h +4 -3
  111. data/vendor/eigen/Eigen/src/Core/NumTraits.h +107 -20
  112. data/vendor/eigen/Eigen/src/Core/PartialReduxEvaluator.h +232 -0
  113. data/vendor/eigen/Eigen/src/Core/PermutationMatrix.h +3 -31
  114. data/vendor/eigen/Eigen/src/Core/PlainObjectBase.h +152 -59
  115. data/vendor/eigen/Eigen/src/Core/Product.h +30 -25
  116. data/vendor/eigen/Eigen/src/Core/ProductEvaluators.h +192 -125
  117. data/vendor/eigen/Eigen/src/Core/Random.h +37 -1
  118. data/vendor/eigen/Eigen/src/Core/Redux.h +180 -170
  119. data/vendor/eigen/Eigen/src/Core/Ref.h +121 -23
  120. data/vendor/eigen/Eigen/src/Core/Replicate.h +8 -8
  121. data/vendor/eigen/Eigen/src/Core/Reshaped.h +454 -0
  122. data/vendor/eigen/Eigen/src/Core/ReturnByValue.h +7 -5
  123. data/vendor/eigen/Eigen/src/Core/Reverse.h +18 -12
  124. data/vendor/eigen/Eigen/src/Core/Select.h +8 -6
  125. data/vendor/eigen/Eigen/src/Core/SelfAdjointView.h +33 -20
  126. data/vendor/eigen/Eigen/src/Core/Solve.h +14 -14
  127. data/vendor/eigen/Eigen/src/Core/SolveTriangular.h +16 -16
  128. data/vendor/eigen/Eigen/src/Core/SolverBase.h +41 -3
  129. data/vendor/eigen/Eigen/src/Core/StableNorm.h +100 -70
  130. data/vendor/eigen/Eigen/src/Core/StlIterators.h +463 -0
  131. data/vendor/eigen/Eigen/src/Core/Stride.h +9 -4
  132. data/vendor/eigen/Eigen/src/Core/Swap.h +5 -4
  133. data/vendor/eigen/Eigen/src/Core/Transpose.h +88 -27
  134. data/vendor/eigen/Eigen/src/Core/Transpositions.h +26 -47
  135. data/vendor/eigen/Eigen/src/Core/TriangularMatrix.h +93 -75
  136. data/vendor/eigen/Eigen/src/Core/VectorBlock.h +5 -5
  137. data/vendor/eigen/Eigen/src/Core/VectorwiseOp.h +159 -70
  138. data/vendor/eigen/Eigen/src/Core/Visitor.h +137 -29
  139. data/vendor/eigen/Eigen/src/Core/arch/AVX/Complex.h +50 -129
  140. data/vendor/eigen/Eigen/src/Core/arch/AVX/MathFunctions.h +126 -337
  141. data/vendor/eigen/Eigen/src/Core/arch/AVX/PacketMath.h +1092 -155
  142. data/vendor/eigen/Eigen/src/Core/arch/AVX/TypeCasting.h +65 -1
  143. data/vendor/eigen/Eigen/src/Core/arch/AVX512/Complex.h +422 -0
  144. data/vendor/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h +207 -236
  145. data/vendor/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h +1482 -495
  146. data/vendor/eigen/Eigen/src/Core/arch/AVX512/TypeCasting.h +89 -0
  147. data/vendor/eigen/Eigen/src/Core/arch/AltiVec/Complex.h +152 -165
  148. data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MathFunctions.h +19 -251
  149. data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MatrixProduct.h +2937 -0
  150. data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h +221 -0
  151. data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h +629 -0
  152. data/vendor/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h +2042 -392
  153. data/vendor/eigen/Eigen/src/Core/arch/CUDA/Complex.h +235 -80
  154. data/vendor/eigen/Eigen/src/Core/arch/Default/BFloat16.h +700 -0
  155. data/vendor/eigen/Eigen/src/Core/arch/Default/ConjHelper.h +102 -14
  156. data/vendor/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +1649 -0
  157. data/vendor/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +110 -0
  158. data/vendor/eigen/Eigen/src/Core/arch/Default/Half.h +942 -0
  159. data/vendor/eigen/Eigen/src/Core/arch/Default/Settings.h +1 -1
  160. data/vendor/eigen/Eigen/src/Core/arch/Default/TypeCasting.h +120 -0
  161. data/vendor/eigen/Eigen/src/Core/arch/{CUDA → GPU}/MathFunctions.h +16 -4
  162. data/vendor/eigen/Eigen/src/Core/arch/GPU/PacketMath.h +1685 -0
  163. data/vendor/eigen/Eigen/src/Core/arch/GPU/TypeCasting.h +80 -0
  164. data/vendor/eigen/Eigen/src/Core/arch/HIP/hcc/math_constants.h +23 -0
  165. data/vendor/eigen/Eigen/src/Core/arch/MSA/Complex.h +648 -0
  166. data/vendor/eigen/Eigen/src/Core/arch/MSA/MathFunctions.h +387 -0
  167. data/vendor/eigen/Eigen/src/Core/arch/MSA/PacketMath.h +1233 -0
  168. data/vendor/eigen/Eigen/src/Core/arch/NEON/Complex.h +313 -219
  169. data/vendor/eigen/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h +183 -0
  170. data/vendor/eigen/Eigen/src/Core/arch/NEON/MathFunctions.h +54 -70
  171. data/vendor/eigen/Eigen/src/Core/arch/NEON/PacketMath.h +4376 -549
  172. data/vendor/eigen/Eigen/src/Core/arch/NEON/TypeCasting.h +1419 -0
  173. data/vendor/eigen/Eigen/src/Core/arch/SSE/Complex.h +59 -179
  174. data/vendor/eigen/Eigen/src/Core/arch/SSE/MathFunctions.h +65 -428
  175. data/vendor/eigen/Eigen/src/Core/arch/SSE/PacketMath.h +893 -283
  176. data/vendor/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h +65 -0
  177. data/vendor/eigen/Eigen/src/Core/arch/SVE/MathFunctions.h +44 -0
  178. data/vendor/eigen/Eigen/src/Core/arch/SVE/PacketMath.h +752 -0
  179. data/vendor/eigen/Eigen/src/Core/arch/SVE/TypeCasting.h +49 -0
  180. data/vendor/eigen/Eigen/src/Core/arch/SYCL/InteropHeaders.h +232 -0
  181. data/vendor/eigen/Eigen/src/Core/arch/SYCL/MathFunctions.h +301 -0
  182. data/vendor/eigen/Eigen/src/Core/arch/SYCL/PacketMath.h +670 -0
  183. data/vendor/eigen/Eigen/src/Core/arch/SYCL/SyclMemoryModel.h +694 -0
  184. data/vendor/eigen/Eigen/src/Core/arch/SYCL/TypeCasting.h +85 -0
  185. data/vendor/eigen/Eigen/src/Core/arch/ZVector/Complex.h +212 -183
  186. data/vendor/eigen/Eigen/src/Core/arch/ZVector/MathFunctions.h +101 -5
  187. data/vendor/eigen/Eigen/src/Core/arch/ZVector/PacketMath.h +510 -395
  188. data/vendor/eigen/Eigen/src/Core/functors/AssignmentFunctors.h +11 -2
  189. data/vendor/eigen/Eigen/src/Core/functors/BinaryFunctors.h +112 -46
  190. data/vendor/eigen/Eigen/src/Core/functors/NullaryFunctors.h +31 -30
  191. data/vendor/eigen/Eigen/src/Core/functors/StlFunctors.h +32 -2
  192. data/vendor/eigen/Eigen/src/Core/functors/UnaryFunctors.h +355 -16
  193. data/vendor/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h +1075 -586
  194. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h +49 -24
  195. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +41 -35
  196. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +6 -6
  197. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h +4 -2
  198. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixVector.h +382 -483
  199. data/vendor/eigen/Eigen/src/Core/products/Parallelizer.h +22 -5
  200. data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +53 -30
  201. data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h +16 -8
  202. data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h +8 -6
  203. data/vendor/eigen/Eigen/src/Core/products/SelfadjointProduct.h +4 -4
  204. data/vendor/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h +5 -4
  205. data/vendor/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h +33 -27
  206. data/vendor/eigen/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h +14 -12
  207. data/vendor/eigen/Eigen/src/Core/products/TriangularSolverMatrix.h +36 -34
  208. data/vendor/eigen/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h +8 -4
  209. data/vendor/eigen/Eigen/src/Core/products/TriangularSolverVector.h +13 -10
  210. data/vendor/eigen/Eigen/src/Core/util/BlasUtil.h +304 -119
  211. data/vendor/eigen/Eigen/src/Core/util/ConfigureVectorization.h +512 -0
  212. data/vendor/eigen/Eigen/src/Core/util/Constants.h +25 -9
  213. data/vendor/eigen/Eigen/src/Core/util/DisableStupidWarnings.h +26 -3
  214. data/vendor/eigen/Eigen/src/Core/util/ForwardDeclarations.h +29 -9
  215. data/vendor/eigen/Eigen/src/Core/util/IndexedViewHelper.h +186 -0
  216. data/vendor/eigen/Eigen/src/Core/util/IntegralConstant.h +272 -0
  217. data/vendor/eigen/Eigen/src/Core/util/MKL_support.h +8 -1
  218. data/vendor/eigen/Eigen/src/Core/util/Macros.h +709 -246
  219. data/vendor/eigen/Eigen/src/Core/util/Memory.h +222 -52
  220. data/vendor/eigen/Eigen/src/Core/util/Meta.h +355 -77
  221. data/vendor/eigen/Eigen/src/Core/util/ReenableStupidWarnings.h +5 -1
  222. data/vendor/eigen/Eigen/src/Core/util/ReshapedHelper.h +51 -0
  223. data/vendor/eigen/Eigen/src/Core/util/StaticAssert.h +8 -5
  224. data/vendor/eigen/Eigen/src/Core/util/SymbolicIndex.h +293 -0
  225. data/vendor/eigen/Eigen/src/Core/util/XprHelper.h +65 -30
  226. data/vendor/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +1 -1
  227. data/vendor/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +7 -4
  228. data/vendor/eigen/Eigen/src/Eigenvalues/EigenSolver.h +2 -2
  229. data/vendor/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +1 -1
  230. data/vendor/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +2 -2
  231. data/vendor/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +2 -2
  232. data/vendor/eigen/Eigen/src/Eigenvalues/RealQZ.h +9 -6
  233. data/vendor/eigen/Eigen/src/Eigenvalues/RealSchur.h +21 -9
  234. data/vendor/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +77 -43
  235. data/vendor/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +20 -15
  236. data/vendor/eigen/Eigen/src/Geometry/AlignedBox.h +99 -5
  237. data/vendor/eigen/Eigen/src/Geometry/AngleAxis.h +4 -4
  238. data/vendor/eigen/Eigen/src/Geometry/EulerAngles.h +3 -3
  239. data/vendor/eigen/Eigen/src/Geometry/Homogeneous.h +15 -11
  240. data/vendor/eigen/Eigen/src/Geometry/Hyperplane.h +1 -1
  241. data/vendor/eigen/Eigen/src/Geometry/OrthoMethods.h +3 -2
  242. data/vendor/eigen/Eigen/src/Geometry/ParametrizedLine.h +39 -2
  243. data/vendor/eigen/Eigen/src/Geometry/Quaternion.h +70 -14
  244. data/vendor/eigen/Eigen/src/Geometry/Rotation2D.h +3 -3
  245. data/vendor/eigen/Eigen/src/Geometry/Scaling.h +23 -5
  246. data/vendor/eigen/Eigen/src/Geometry/Transform.h +88 -67
  247. data/vendor/eigen/Eigen/src/Geometry/Translation.h +6 -12
  248. data/vendor/eigen/Eigen/src/Geometry/Umeyama.h +1 -1
  249. data/vendor/eigen/Eigen/src/Geometry/arch/Geometry_SIMD.h +168 -0
  250. data/vendor/eigen/Eigen/src/Householder/BlockHouseholder.h +9 -2
  251. data/vendor/eigen/Eigen/src/Householder/Householder.h +8 -4
  252. data/vendor/eigen/Eigen/src/Householder/HouseholderSequence.h +123 -48
  253. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +15 -15
  254. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +7 -23
  255. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +5 -22
  256. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +41 -47
  257. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +51 -60
  258. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +70 -20
  259. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +2 -20
  260. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +11 -9
  261. data/vendor/eigen/Eigen/src/Jacobi/Jacobi.h +31 -10
  262. data/vendor/eigen/Eigen/src/KLUSupport/KLUSupport.h +358 -0
  263. data/vendor/eigen/Eigen/src/LU/Determinant.h +35 -19
  264. data/vendor/eigen/Eigen/src/LU/FullPivLU.h +29 -43
  265. data/vendor/eigen/Eigen/src/LU/InverseImpl.h +25 -8
  266. data/vendor/eigen/Eigen/src/LU/PartialPivLU.h +71 -58
  267. data/vendor/eigen/Eigen/src/LU/arch/InverseSize4.h +351 -0
  268. data/vendor/eigen/Eigen/src/OrderingMethods/Amd.h +7 -17
  269. data/vendor/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +297 -277
  270. data/vendor/eigen/Eigen/src/OrderingMethods/Ordering.h +6 -10
  271. data/vendor/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +1 -1
  272. data/vendor/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +9 -7
  273. data/vendor/eigen/Eigen/src/QR/ColPivHouseholderQR.h +41 -20
  274. data/vendor/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +100 -27
  275. data/vendor/eigen/Eigen/src/QR/FullPivHouseholderQR.h +59 -22
  276. data/vendor/eigen/Eigen/src/QR/HouseholderQR.h +48 -23
  277. data/vendor/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +25 -3
  278. data/vendor/eigen/Eigen/src/SVD/BDCSVD.h +183 -63
  279. data/vendor/eigen/Eigen/src/SVD/JacobiSVD.h +22 -14
  280. data/vendor/eigen/Eigen/src/SVD/SVDBase.h +83 -22
  281. data/vendor/eigen/Eigen/src/SVD/UpperBidiagonalization.h +3 -3
  282. data/vendor/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +17 -9
  283. data/vendor/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +12 -37
  284. data/vendor/eigen/Eigen/src/SparseCore/AmbiVector.h +3 -2
  285. data/vendor/eigen/Eigen/src/SparseCore/CompressedStorage.h +16 -0
  286. data/vendor/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +6 -6
  287. data/vendor/eigen/Eigen/src/SparseCore/SparseAssign.h +81 -27
  288. data/vendor/eigen/Eigen/src/SparseCore/SparseBlock.h +25 -57
  289. data/vendor/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +40 -11
  290. data/vendor/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +11 -15
  291. data/vendor/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +4 -2
  292. data/vendor/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +30 -8
  293. data/vendor/eigen/Eigen/src/SparseCore/SparseMatrix.h +126 -11
  294. data/vendor/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +5 -12
  295. data/vendor/eigen/Eigen/src/SparseCore/SparseProduct.h +13 -1
  296. data/vendor/eigen/Eigen/src/SparseCore/SparseRef.h +7 -7
  297. data/vendor/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +5 -2
  298. data/vendor/eigen/Eigen/src/SparseCore/SparseUtil.h +8 -0
  299. data/vendor/eigen/Eigen/src/SparseCore/SparseVector.h +1 -1
  300. data/vendor/eigen/Eigen/src/SparseCore/SparseView.h +1 -0
  301. data/vendor/eigen/Eigen/src/SparseLU/SparseLU.h +162 -12
  302. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +1 -1
  303. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +76 -2
  304. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +2 -2
  305. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +1 -1
  306. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +1 -1
  307. data/vendor/eigen/Eigen/src/SparseQR/SparseQR.h +19 -6
  308. data/vendor/eigen/Eigen/src/StlSupport/StdDeque.h +2 -12
  309. data/vendor/eigen/Eigen/src/StlSupport/StdList.h +2 -2
  310. data/vendor/eigen/Eigen/src/StlSupport/StdVector.h +2 -2
  311. data/vendor/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +6 -8
  312. data/vendor/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +175 -39
  313. data/vendor/eigen/Eigen/src/misc/lapacke.h +5 -4
  314. data/vendor/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +28 -2
  315. data/vendor/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +155 -11
  316. data/vendor/eigen/Eigen/src/plugins/BlockMethods.h +626 -242
  317. data/vendor/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.h +14 -0
  318. data/vendor/eigen/Eigen/src/plugins/IndexedViewMethods.h +262 -0
  319. data/vendor/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +4 -4
  320. data/vendor/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +10 -0
  321. data/vendor/eigen/Eigen/src/plugins/ReshapedMethods.h +149 -0
  322. data/vendor/eigen/README.md +2 -0
  323. data/vendor/eigen/bench/btl/README +1 -1
  324. data/vendor/eigen/bench/tensors/README +6 -7
  325. data/vendor/eigen/ci/README.md +56 -0
  326. data/vendor/eigen/demos/mix_eigen_and_c/README +1 -1
  327. data/vendor/eigen/unsupported/Eigen/CXX11/src/Tensor/README.md +213 -158
  328. data/vendor/eigen/unsupported/README.txt +1 -1
  329. data/vendor/tomotopy/README.kr.rst +78 -0
  330. data/vendor/tomotopy/README.rst +75 -0
  331. data/vendor/tomotopy/src/Labeling/FoRelevance.cpp +2 -2
  332. data/vendor/tomotopy/src/Labeling/Phraser.hpp +4 -4
  333. data/vendor/tomotopy/src/TopicModel/CTModel.hpp +7 -3
  334. data/vendor/tomotopy/src/TopicModel/DMRModel.hpp +7 -3
  335. data/vendor/tomotopy/src/TopicModel/DTModel.hpp +6 -3
  336. data/vendor/tomotopy/src/TopicModel/GDMRModel.hpp +2 -2
  337. data/vendor/tomotopy/src/TopicModel/HDP.h +1 -0
  338. data/vendor/tomotopy/src/TopicModel/HDPModel.hpp +57 -6
  339. data/vendor/tomotopy/src/TopicModel/HLDAModel.hpp +6 -3
  340. data/vendor/tomotopy/src/TopicModel/HPAModel.hpp +3 -2
  341. data/vendor/tomotopy/src/TopicModel/LDA.h +3 -3
  342. data/vendor/tomotopy/src/TopicModel/LDACVB0Model.hpp +5 -5
  343. data/vendor/tomotopy/src/TopicModel/LDAModel.hpp +50 -19
  344. data/vendor/tomotopy/src/TopicModel/LLDAModel.hpp +6 -2
  345. data/vendor/tomotopy/src/TopicModel/MGLDAModel.hpp +3 -2
  346. data/vendor/tomotopy/src/TopicModel/PAModel.hpp +1 -1
  347. data/vendor/tomotopy/src/TopicModel/PLDAModel.hpp +6 -2
  348. data/vendor/tomotopy/src/TopicModel/PT.h +3 -1
  349. data/vendor/tomotopy/src/TopicModel/PTModel.hpp +36 -3
  350. data/vendor/tomotopy/src/TopicModel/SLDAModel.hpp +6 -3
  351. data/vendor/tomotopy/src/TopicModel/TopicModel.hpp +55 -26
  352. data/vendor/tomotopy/src/Utils/AliasMethod.hpp +5 -4
  353. data/vendor/tomotopy/src/Utils/Dictionary.h +2 -2
  354. data/vendor/tomotopy/src/Utils/EigenAddonOps.hpp +36 -1
  355. data/vendor/tomotopy/src/Utils/MultiNormalDistribution.hpp +1 -1
  356. data/vendor/tomotopy/src/Utils/TruncMultiNormal.hpp +1 -1
  357. data/vendor/tomotopy/src/Utils/exception.h +6 -0
  358. data/vendor/tomotopy/src/Utils/math.h +2 -2
  359. data/vendor/tomotopy/src/Utils/sample.hpp +14 -12
  360. data/vendor/tomotopy/src/Utils/serializer.hpp +30 -5
  361. data/vendor/tomotopy/src/Utils/sse_gamma.h +0 -3
  362. metadata +64 -18
  363. data/vendor/eigen/Eigen/CMakeLists.txt +0 -19
  364. data/vendor/eigen/Eigen/src/Core/arch/CUDA/Half.h +0 -674
  365. data/vendor/eigen/Eigen/src/Core/arch/CUDA/PacketMath.h +0 -333
  366. data/vendor/eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +0 -1124
  367. data/vendor/eigen/Eigen/src/Core/arch/CUDA/TypeCasting.h +0 -212
  368. data/vendor/eigen/Eigen/src/Geometry/arch/Geometry_SSE.h +0 -161
  369. data/vendor/eigen/Eigen/src/LU/arch/Inverse_SSE.h +0 -338
@@ -0,0 +1,473 @@
1
+ /**
2
+ * @file MorePacketMath.h
3
+ * @author bab2min (bab2min@gmail.com)
4
+ * @brief
5
+ * @version 0.4.0
6
+ * @date 2021-04-26
7
+ *
8
+ * @copyright Copyright (c) 2020-2021
9
+ *
10
+ */
11
+
12
+ #ifndef EIGENRAND_MORE_PACKET_MATH_NEON_H
13
+ #define EIGENRAND_MORE_PACKET_MATH_NEON_H
14
+
15
+ #include <arm_neon.h>
16
+
17
+ // device func of casting for Eigen ~3.3.9
18
+ #ifdef EIGENRAND_EIGEN_33_MODE
19
+ namespace Eigen
20
+ {
21
+ namespace internal
22
+ {
23
+ template<>
24
+ EIGEN_DEVICE_FUNC inline Packet4f pcast<Packet4i, Packet4f>(const Packet4i& a)
25
+ {
26
+ return vcvtq_f32_s32(a);
27
+ }
28
+
29
+ template<>
30
+ EIGEN_DEVICE_FUNC inline Packet4i pcast<Packet4f, Packet4i>(const Packet4f& a)
31
+ {
32
+ return vcvtq_s32_f32(a);
33
+ }
34
+
35
+ }
36
+ }
37
+ #endif
38
+
39
+ namespace Eigen
40
+ {
41
+ namespace internal
42
+ {
43
+ template<>
44
+ struct IsIntPacket<Packet4i> : std::true_type {};
45
+
46
+ template<>
47
+ struct IsFloatPacket<Packet4f> : std::true_type {};
48
+
49
+ template<>
50
+ struct HalfPacket<Packet4i>
51
+ {
52
+ using type = uint64_t;
53
+ };
54
+
55
+ template<>
56
+ struct reinterpreter<Packet4i>
57
+ {
58
+ EIGEN_STRONG_INLINE Packet4f to_float(const Packet4i& x)
59
+ {
60
+ return (Packet4f)vreinterpretq_f32_s32(x);
61
+ }
62
+
63
+ EIGEN_STRONG_INLINE Packet4i to_int(const Packet4i& x)
64
+ {
65
+ return x;
66
+ }
67
+ };
68
+
69
+ template<>
70
+ struct reinterpreter<Packet4f>
71
+ {
72
+ EIGEN_STRONG_INLINE Packet4f to_float(const Packet4f& x)
73
+ {
74
+ return x;
75
+ }
76
+
77
+ EIGEN_STRONG_INLINE Packet4i to_int(const Packet4f& x)
78
+ {
79
+ return (Packet4i)vreinterpretq_s32_f32(x);
80
+ }
81
+ };
82
+
83
+ template<>
84
+ EIGEN_STRONG_INLINE Packet4i pcmpeq<Packet4i>(const Packet4i& a, const Packet4i& b)
85
+ {
86
+ return vreinterpretq_s32_u32(vceqq_s32(a, b));
87
+ }
88
+
89
+ template<>
90
+ EIGEN_STRONG_INLINE Packet4f pcmpeq<Packet4f>(const Packet4f& a, const Packet4f& b)
91
+ {
92
+ return vreinterpretq_f32_u32(vceqq_f32(a, b));
93
+ }
94
+
95
+ template<>
96
+ EIGEN_STRONG_INLINE Packet4i pbitnot<Packet4i>(const Packet4i& a)
97
+ {
98
+ return vmvnq_s32(a);
99
+ }
100
+
101
+ template<>
102
+ EIGEN_STRONG_INLINE Packet4f pbitnot<Packet4f>(const Packet4f& a)
103
+ {
104
+ return (Packet4f)vreinterpretq_f32_s32(pbitnot((Packet4i)vreinterpretq_s32_f32(a)));
105
+ }
106
+
107
+ template<>
108
+ struct BitShifter<Packet4i>
109
+ {
110
+ template<int b>
111
+ EIGEN_STRONG_INLINE Packet4i sll(const Packet4i& a)
112
+ {
113
+ return vreinterpretq_s32_u32(vshlq_n_u32(vreinterpretq_u32_s32(a), b));
114
+ }
115
+
116
+ template<int b>
117
+ EIGEN_STRONG_INLINE Packet4i srl(const Packet4i& a, int _b = b)
118
+ {
119
+ if (b > 0)
120
+ {
121
+ return vreinterpretq_s32_u32(vshrq_n_u32(vreinterpretq_u32_s32(a), b > 0 ? b : 1));
122
+ }
123
+ else
124
+ {
125
+ switch (_b)
126
+ {
127
+ case 0: return a;
128
+ case 1: return vreinterpretq_s32_u32(vshrq_n_u32(vreinterpretq_u32_s32(a), 1));
129
+ case 2: return vreinterpretq_s32_u32(vshrq_n_u32(vreinterpretq_u32_s32(a), 2));
130
+ case 3: return vreinterpretq_s32_u32(vshrq_n_u32(vreinterpretq_u32_s32(a), 3));
131
+ case 4: return vreinterpretq_s32_u32(vshrq_n_u32(vreinterpretq_u32_s32(a), 4));
132
+ case 5: return vreinterpretq_s32_u32(vshrq_n_u32(vreinterpretq_u32_s32(a), 5));
133
+ case 6: return vreinterpretq_s32_u32(vshrq_n_u32(vreinterpretq_u32_s32(a), 6));
134
+ case 7: return vreinterpretq_s32_u32(vshrq_n_u32(vreinterpretq_u32_s32(a), 7));
135
+ case 8: return vreinterpretq_s32_u32(vshrq_n_u32(vreinterpretq_u32_s32(a), 8));
136
+ case 9: return vreinterpretq_s32_u32(vshrq_n_u32(vreinterpretq_u32_s32(a), 9));
137
+ case 10: return vreinterpretq_s32_u32(vshrq_n_u32(vreinterpretq_u32_s32(a), 10));
138
+ case 11: return vreinterpretq_s32_u32(vshrq_n_u32(vreinterpretq_u32_s32(a), 11));
139
+ case 12: return vreinterpretq_s32_u32(vshrq_n_u32(vreinterpretq_u32_s32(a), 12));
140
+ case 13: return vreinterpretq_s32_u32(vshrq_n_u32(vreinterpretq_u32_s32(a), 13));
141
+ case 14: return vreinterpretq_s32_u32(vshrq_n_u32(vreinterpretq_u32_s32(a), 14));
142
+ case 15: return vreinterpretq_s32_u32(vshrq_n_u32(vreinterpretq_u32_s32(a), 15));
143
+ case 16: return vreinterpretq_s32_u32(vshrq_n_u32(vreinterpretq_u32_s32(a), 16));
144
+ case 17: return vreinterpretq_s32_u32(vshrq_n_u32(vreinterpretq_u32_s32(a), 17));
145
+ case 18: return vreinterpretq_s32_u32(vshrq_n_u32(vreinterpretq_u32_s32(a), 18));
146
+ case 19: return vreinterpretq_s32_u32(vshrq_n_u32(vreinterpretq_u32_s32(a), 19));
147
+ case 20: return vreinterpretq_s32_u32(vshrq_n_u32(vreinterpretq_u32_s32(a), 20));
148
+ case 21: return vreinterpretq_s32_u32(vshrq_n_u32(vreinterpretq_u32_s32(a), 21));
149
+ case 22: return vreinterpretq_s32_u32(vshrq_n_u32(vreinterpretq_u32_s32(a), 22));
150
+ case 23: return vreinterpretq_s32_u32(vshrq_n_u32(vreinterpretq_u32_s32(a), 23));
151
+ case 24: return vreinterpretq_s32_u32(vshrq_n_u32(vreinterpretq_u32_s32(a), 24));
152
+ case 25: return vreinterpretq_s32_u32(vshrq_n_u32(vreinterpretq_u32_s32(a), 25));
153
+ case 26: return vreinterpretq_s32_u32(vshrq_n_u32(vreinterpretq_u32_s32(a), 26));
154
+ case 27: return vreinterpretq_s32_u32(vshrq_n_u32(vreinterpretq_u32_s32(a), 27));
155
+ case 28: return vreinterpretq_s32_u32(vshrq_n_u32(vreinterpretq_u32_s32(a), 28));
156
+ case 29: return vreinterpretq_s32_u32(vshrq_n_u32(vreinterpretq_u32_s32(a), 29));
157
+ case 30: return vreinterpretq_s32_u32(vshrq_n_u32(vreinterpretq_u32_s32(a), 30));
158
+ case 31: return vreinterpretq_s32_u32(vshrq_n_u32(vreinterpretq_u32_s32(a), 31));
159
+ }
160
+ return vdupq_n_s32(0);
161
+ }
162
+ }
163
+
164
+ template<int b>
165
+ EIGEN_STRONG_INLINE Packet4i sll64(const Packet4i& a)
166
+ {
167
+ return vreinterpretq_s32_u64(vshlq_n_u64(vreinterpretq_u64_s32(a), b));
168
+ }
169
+
170
+ template<int b>
171
+ EIGEN_STRONG_INLINE Packet4i srl64(const Packet4i& a)
172
+ {
173
+ return vreinterpretq_s32_u64(vshrq_n_u64(vreinterpretq_u64_s32(a), b));
174
+ }
175
+ };
176
+
177
+ template<>
178
+ EIGEN_STRONG_INLINE Packet4i pcmplt<Packet4i>(const Packet4i& a, const Packet4i& b)
179
+ {
180
+ return vreinterpretq_s32_u32(vcltq_s32(a, b));
181
+ }
182
+
183
+ template<>
184
+ EIGEN_STRONG_INLINE Packet4f pcmplt<Packet4f>(const Packet4f& a, const Packet4f& b)
185
+ {
186
+ return vreinterpretq_f32_u32(vcltq_f32(a, b));
187
+ }
188
+
189
+ template<>
190
+ EIGEN_STRONG_INLINE Packet4f pcmple<Packet4f>(const Packet4f& a, const Packet4f& b)
191
+ {
192
+ return vreinterpretq_f32_u32(vcleq_f32(a, b));
193
+ }
194
+
195
+ template<>
196
+ EIGEN_STRONG_INLINE Packet4f pblendv(const Packet4f& ifPacket, const Packet4f& thenPacket, const Packet4f& elsePacket)
197
+ {
198
+ return vbslq_f32(vreinterpretq_u32_f32(ifPacket), thenPacket, elsePacket);
199
+ }
200
+
201
+ template<>
202
+ EIGEN_STRONG_INLINE Packet4f pblendv(const Packet4i& ifPacket, const Packet4f& thenPacket, const Packet4f& elsePacket)
203
+ {
204
+ return vbslq_f32(vreinterpretq_u32_s32(ifPacket), thenPacket, elsePacket);
205
+ }
206
+
207
+ template<>
208
+ EIGEN_STRONG_INLINE Packet4i pblendv(const Packet4i& ifPacket, const Packet4i& thenPacket, const Packet4i& elsePacket)
209
+ {
210
+ return vbslq_s32(vreinterpretq_u32_s32(ifPacket), thenPacket, elsePacket);
211
+ }
212
+
213
+ template<>
214
+ EIGEN_STRONG_INLINE Packet4i pgather<Packet4i>(const int* addr, const Packet4i& index)
215
+ {
216
+ int32_t u[4];
217
+ vst1q_s32(u, index);
218
+ int32_t t[4];
219
+ t[0] = addr[u[0]];
220
+ t[1] = addr[u[1]];
221
+ t[2] = addr[u[2]];
222
+ t[3] = addr[u[3]];
223
+ return vld1q_s32(t);
224
+ }
225
+
226
+ template<>
227
+ EIGEN_STRONG_INLINE Packet4f pgather<Packet4i>(const float* addr, const Packet4i& index)
228
+ {
229
+ int32_t u[4];
230
+ vst1q_s32(u, index);
231
+ float t[4];
232
+ t[0] = addr[u[0]];
233
+ t[1] = addr[u[1]];
234
+ t[2] = addr[u[2]];
235
+ t[3] = addr[u[3]];
236
+ return vld1q_f32(t);
237
+ }
238
+
239
+ template<>
240
+ EIGEN_STRONG_INLINE int pmovemask<Packet4f>(const Packet4f& a)
241
+ {
242
+ int32_t bits[4] = { 1, 2, 4, 8 };
243
+ auto r = vbslq_s32(vreinterpretq_u32_f32(a), vld1q_s32(bits), vdupq_n_s32(0));
244
+ auto s = vadd_s32(vget_low_s32(r), vget_high_s32(r));
245
+ return vget_lane_s32(vpadd_s32(s, s), 0);
246
+ }
247
+
248
+ template<>
249
+ EIGEN_STRONG_INLINE int pmovemask<Packet4i>(const Packet4i& a)
250
+ {
251
+ return pmovemask((Packet4f)vreinterpretq_f32_s32(a));
252
+ }
253
+
254
+ template<>
255
+ EIGEN_STRONG_INLINE Packet4f ptruncate<Packet4f>(const Packet4f& a)
256
+ {
257
+ return vrndq_f32(a);
258
+ }
259
+
260
+ template<>
261
+ EIGEN_STRONG_INLINE Packet4i pseti64<Packet4i>(uint64_t a)
262
+ {
263
+ return vreinterpretq_s32_u64(vdupq_n_u64(a));
264
+ }
265
+
266
+ template<>
267
+ EIGEN_STRONG_INLINE Packet4i pcmpeq64<Packet4i>(const Packet4i& a, const Packet4i& b)
268
+ {
269
+ return vreinterpretq_s32_u64(vceqq_s64(vreinterpretq_s64_s32(a), vreinterpretq_s64_s32(b)));
270
+ }
271
+
272
+ template<>
273
+ EIGEN_STRONG_INLINE Packet4i pmuluadd64<Packet4i>(const Packet4i& a, uint64_t b, uint64_t c)
274
+ {
275
+ uint64_t u[2];
276
+ vst1q_u64(u, vreinterpretq_u64_s32(a));
277
+ u[0] = u[0] * b + c;
278
+ u[1] = u[1] * b + c;
279
+ return vreinterpretq_s32_u64(vld1q_u64(u));
280
+ }
281
+
282
+ #ifdef EIGENRAND_EIGEN_33_MODE
283
+ template<>
284
+ EIGEN_STRONG_INLINE Packet4f plog<Packet4f>(const Packet4f& _x)
285
+ {
286
+ Packet4f x = _x;
287
+ _EIGEN_DECLARE_CONST_Packet4f(1, 1.0f);
288
+ _EIGEN_DECLARE_CONST_Packet4f(half, 0.5f);
289
+ _EIGEN_DECLARE_CONST_Packet4i(0x7f, 0x7f);
290
+
291
+ const Packet4f p4f_inv_mant_mask = (Packet4f)vreinterpretq_f32_s32(pset1<Packet4i>(~0x7f800000));
292
+
293
+ /* the smallest non denormalized float number */
294
+ const Packet4f p4f_min_norm_pos = (Packet4f)vreinterpretq_f32_s32(pset1<Packet4i>(0x00800000));
295
+ const Packet4f p4f_minus_inf = (Packet4f)vreinterpretq_f32_s32(pset1<Packet4i>(0xff800000));
296
+
297
+ /* natural logarithm computed for 4 simultaneous float
298
+ return NaN for x <= 0
299
+ */
300
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_SQRTHF, 0.707106781186547524f);
301
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p0, 7.0376836292E-2f);
302
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p1, -1.1514610310E-1f);
303
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p2, 1.1676998740E-1f);
304
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p3, -1.2420140846E-1f);
305
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p4, +1.4249322787E-1f);
306
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p5, -1.6668057665E-1f);
307
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p6, +2.0000714765E-1f);
308
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p7, -2.4999993993E-1f);
309
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p8, +3.3333331174E-1f);
310
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_log_q1, -2.12194440e-4f);
311
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_log_q2, 0.693359375f);
312
+
313
+
314
+ Packet4i emm0;
315
+
316
+ Packet4f invalid_mask = pbitnot(pcmple(pset1<Packet4f>(0), x)); // not greater equal is true if x is NaN
317
+ Packet4f iszero_mask = pcmpeq(x, pset1<Packet4f>(0));
318
+
319
+ x = pmax(x, p4f_min_norm_pos); /* cut off denormalized stuff */
320
+ emm0 = BitShifter<Packet4i>{}.template srl<23>((Packet4i)vreinterpretq_s32_f32(x));
321
+
322
+ /* keep only the fractional part */
323
+ x = pand(x, p4f_inv_mant_mask);
324
+ x = por(x, p4f_half);
325
+
326
+ emm0 = psub(emm0, p4i_0x7f);
327
+ Packet4f e = padd(Packet4f(vcvtq_f32_s32(emm0)), p4f_1);
328
+
329
+ /* part2:
330
+ if( x < SQRTHF ) {
331
+ e -= 1;
332
+ x = x + x - 1.0;
333
+ } else { x = x - 1.0; }
334
+ */
335
+ Packet4f mask = pcmplt(x, p4f_cephes_SQRTHF);
336
+ Packet4f tmp = pand(x, mask);
337
+ x = psub(x, p4f_1);
338
+ e = psub(e, pand(p4f_1, mask));
339
+ x = padd(x, tmp);
340
+
341
+ Packet4f x2 = pmul(x, x);
342
+ Packet4f x3 = pmul(x2, x);
343
+
344
+ Packet4f y, y1, y2;
345
+ y = pmadd(p4f_cephes_log_p0, x, p4f_cephes_log_p1);
346
+ y1 = pmadd(p4f_cephes_log_p3, x, p4f_cephes_log_p4);
347
+ y2 = pmadd(p4f_cephes_log_p6, x, p4f_cephes_log_p7);
348
+ y = pmadd(y, x, p4f_cephes_log_p2);
349
+ y1 = pmadd(y1, x, p4f_cephes_log_p5);
350
+ y2 = pmadd(y2, x, p4f_cephes_log_p8);
351
+ y = pmadd(y, x3, y1);
352
+ y = pmadd(y, x3, y2);
353
+ y = pmul(y, x3);
354
+
355
+ y1 = pmul(e, p4f_cephes_log_q1);
356
+ tmp = pmul(x2, p4f_half);
357
+ y = padd(y, y1);
358
+ x = psub(x, tmp);
359
+ y2 = pmul(e, p4f_cephes_log_q2);
360
+ x = padd(x, y);
361
+ x = padd(x, y2);
362
+ // negative arg will be NAN, 0 will be -INF
363
+ return pblendv(iszero_mask, p4f_minus_inf, por(x, invalid_mask));
364
+ }
365
+
366
+ template<>
367
+ EIGEN_STRONG_INLINE Packet4f psqrt<Packet4f>(const Packet4f& x)
368
+ {
369
+ return vsqrtq_f32(x);
370
+ }
371
+
372
+ template<>
373
+ EIGEN_STRONG_INLINE Packet4f psin<Packet4f>(const Packet4f& _x)
374
+ {
375
+ Packet4f x = _x;
376
+ _EIGEN_DECLARE_CONST_Packet4f(1, 1.0f);
377
+ _EIGEN_DECLARE_CONST_Packet4f(half, 0.5f);
378
+
379
+ _EIGEN_DECLARE_CONST_Packet4i(1, 1);
380
+ _EIGEN_DECLARE_CONST_Packet4i(not1, ~1);
381
+ _EIGEN_DECLARE_CONST_Packet4i(2, 2);
382
+ _EIGEN_DECLARE_CONST_Packet4i(4, 4);
383
+
384
+ const Packet4f p4f_sign_mask = (Packet4f)vreinterpretq_f32_s32(pset1<Packet4i>(0x80000000));
385
+
386
+ _EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP1, -0.78515625f);
387
+ _EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP2, -2.4187564849853515625e-4f);
388
+ _EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP3, -3.77489497744594108e-8f);
389
+ _EIGEN_DECLARE_CONST_Packet4f(sincof_p0, -1.9515295891E-4f);
390
+ _EIGEN_DECLARE_CONST_Packet4f(sincof_p1, 8.3321608736E-3f);
391
+ _EIGEN_DECLARE_CONST_Packet4f(sincof_p2, -1.6666654611E-1f);
392
+ _EIGEN_DECLARE_CONST_Packet4f(coscof_p0, 2.443315711809948E-005f);
393
+ _EIGEN_DECLARE_CONST_Packet4f(coscof_p1, -1.388731625493765E-003f);
394
+ _EIGEN_DECLARE_CONST_Packet4f(coscof_p2, 4.166664568298827E-002f);
395
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_FOPI, 1.27323954473516f); // 4 / M_PI
396
+
397
+ Packet4f xmm1, xmm2, xmm3, sign_bit, y;
398
+
399
+ Packet4i emm0, emm2;
400
+ sign_bit = x;
401
+ /* take the absolute value */
402
+ x = pabs(x);
403
+
404
+ /* take the modulo */
405
+
406
+ /* extract the sign bit (upper one) */
407
+ sign_bit = pand(sign_bit, p4f_sign_mask);
408
+
409
+ /* scale by 4/Pi */
410
+ y = pmul(x, p4f_cephes_FOPI);
411
+
412
+ /* store the integer part of y in mm0 */
413
+ emm2 = vcvtq_s32_f32(y);
414
+ /* j=(j+1) & (~1) (see the cephes sources) */
415
+ emm2 = padd(emm2, p4i_1);
416
+ emm2 = pand(emm2, p4i_not1);
417
+ y = vcvtq_f32_s32(emm2);
418
+ /* get the swap sign flag */
419
+ emm0 = pand(emm2, p4i_4);
420
+ emm0 = BitShifter<Packet4i>{}.template sll<29>(emm0);
421
+ /* get the polynom selection mask
422
+ there is one polynom for 0 <= x <= Pi/4
423
+ and another one for Pi/4<x<=Pi/2
424
+
425
+ Both branches will be computed.
426
+ */
427
+ emm2 = pand(emm2, p4i_2);
428
+ emm2 = pcmpeq(emm2, pset1<Packet4i>(0));
429
+
430
+ Packet4f swap_sign_bit = (Packet4f)vreinterpretq_f32_s32(emm0);
431
+ Packet4f poly_mask = (Packet4f)vreinterpretq_f32_s32(emm2);
432
+ sign_bit = pxor(sign_bit, swap_sign_bit);
433
+
434
+ /* The magic pass: "Extended precision modular arithmetic"
435
+ x = ((x - y * DP1) - y * DP2) - y * DP3; */
436
+ xmm1 = pmul(y, p4f_minus_cephes_DP1);
437
+ xmm2 = pmul(y, p4f_minus_cephes_DP2);
438
+ xmm3 = pmul(y, p4f_minus_cephes_DP3);
439
+ x = padd(x, xmm1);
440
+ x = padd(x, xmm2);
441
+ x = padd(x, xmm3);
442
+
443
+ /* Evaluate the first polynom (0 <= x <= Pi/4) */
444
+ y = p4f_coscof_p0;
445
+ Packet4f z = pmul(x, x);
446
+
447
+ y = pmadd(y, z, p4f_coscof_p1);
448
+ y = pmadd(y, z, p4f_coscof_p2);
449
+ y = pmul(y, z);
450
+ y = pmul(y, z);
451
+ Packet4f tmp = pmul(z, p4f_half);
452
+ y = psub(y, tmp);
453
+ y = padd(y, p4f_1);
454
+
455
+ /* Evaluate the second polynom (Pi/4 <= x <= 0) */
456
+
457
+ Packet4f y2 = p4f_sincof_p0;
458
+ y2 = pmadd(y2, z, p4f_sincof_p1);
459
+ y2 = pmadd(y2, z, p4f_sincof_p2);
460
+ y2 = pmul(y2, z);
461
+ y2 = pmul(y2, x);
462
+ y2 = padd(y2, x);
463
+
464
+ /* select the correct result from the two polynoms */
465
+ y = pblendv(poly_mask, y2, y);
466
+ /* update the sign */
467
+ return pxor(y, sign_bit);
468
+ }
469
+ #endif
470
+ }
471
+ }
472
+
473
+ #endif
@@ -0,0 +1,142 @@
1
+ /**
2
+ * @file PacketFilter.h
3
+ * @author bab2min (bab2min@gmail.com)
4
+ * @brief
5
+ * @version 0.4.0
6
+ * @date 2021-09-17
7
+ *
8
+ * @copyright Copyright (c) 2020-2021
9
+ *
10
+ */
11
+
12
+ #ifndef EIGENRAND_PACKET_FILTER_NEON_H
13
+ #define EIGENRAND_PACKET_FILTER_NEON_H
14
+
15
+ #include <arm_neon.h>
16
+
17
+ namespace Eigen
18
+ {
19
+ namespace Rand
20
+ {
21
+ namespace detail
22
+ {
23
+ template<>
24
+ class CompressMask<16>
25
+ {
26
+ std::array<std::array<uint8_t, 16>, 7> idx;
27
+ std::array<internal::Packet4f, 4> selector;
28
+ std::array<uint8_t, 16> cnt;
29
+
30
+ static uint8_t make_compress(int mask, int offset = 0)
31
+ {
32
+ uint8_t ret = 0;
33
+ int n = offset;
34
+ for (int i = 0; i < 4; ++i)
35
+ {
36
+ int l = mask & 1;
37
+ mask >>= 1;
38
+ if (l)
39
+ {
40
+ if (n >= 0) ret |= (i & 3) << (2 * n);
41
+ if (++n >= 4) break;
42
+ }
43
+ }
44
+ return ret;
45
+ }
46
+
47
+ static uint8_t count(int mask)
48
+ {
49
+ uint8_t ret = 0;
50
+ for (int i = 0; i < 4; ++i)
51
+ {
52
+ ret += mask & 1;
53
+ mask >>= 1;
54
+ }
55
+ return ret;
56
+ }
57
+
58
+ CompressMask()
59
+ {
60
+ for (int i = 0; i < 16; ++i)
61
+ {
62
+ for (int o = 0; o < 7; ++o)
63
+ {
64
+ idx[o][i] = make_compress(i, o < 4 ? o : o - 7);
65
+ }
66
+
67
+ cnt[i] = count(i);
68
+ }
69
+
70
+ uint32_t v[4] = { 0, };
71
+
72
+ selector[0] = (internal::Packet4f)vreinterpretq_f32_u32(vld1q_u32(v));
73
+ v[0] = -1;
74
+ selector[1] = (internal::Packet4f)vreinterpretq_f32_u32(vld1q_u32(v));
75
+ v[1] = -1;
76
+ selector[2] = (internal::Packet4f)vreinterpretq_f32_u32(vld1q_u32(v));
77
+ v[2] = -1;
78
+ selector[3] = (internal::Packet4f)vreinterpretq_f32_u32(vld1q_u32(v));
79
+ }
80
+
81
+ static EIGEN_STRONG_INLINE internal::Packet4f permute(const internal::Packet4f& p, uint8_t i)
82
+ {
83
+ float u[4];
84
+ vst1q_f32(u, p);
85
+ float t[4];
86
+ t[0] = u[i & 3];
87
+ t[1] = u[(i >> 2) & 3];
88
+ t[2] = u[(i >> 4) & 3];
89
+ t[3] = u[(i >> 6) & 3];
90
+ return vld1q_f32(t);
91
+ }
92
+
93
+ public:
94
+
95
+ enum { full_size = 4 };
96
+
97
+ static const CompressMask& get_inst()
98
+ {
99
+ static CompressMask cm;
100
+ return cm;
101
+ }
102
+
103
+ template<typename Packet>
104
+ EIGEN_STRONG_INLINE int compress_append(Packet& _value, const Packet& _mask,
105
+ Packet& _rest, int rest_cnt, bool& full) const
106
+ {
107
+ auto& value = reinterpret_cast<internal::Packet4f&>(_value);
108
+ auto& mask = reinterpret_cast<const internal::Packet4f&>(_mask);
109
+ auto& rest = reinterpret_cast<internal::Packet4f&>(_rest);
110
+
111
+ int m = internal::pmovemask(mask);
112
+ if (cnt[m] == full_size)
113
+ {
114
+ full = true;
115
+ return rest_cnt;
116
+ }
117
+ auto p1 = permute(value, idx[rest_cnt][m]);
118
+ p1 = internal::pblendv(selector[rest_cnt], rest, p1);
119
+
120
+ auto new_cnt = rest_cnt + cnt[m];
121
+ if (new_cnt >= full_size)
122
+ {
123
+ if (new_cnt > full_size)
124
+ {
125
+ rest = permute(value, idx[new_cnt - cnt[m] + full_size - 1][m]);
126
+ }
127
+ value = p1;
128
+ full = true;
129
+ return new_cnt - full_size;
130
+ }
131
+ else
132
+ {
133
+ rest = p1;
134
+ full = false;
135
+ return new_cnt;
136
+ }
137
+ }
138
+ };
139
+ }
140
+ }
141
+ }
142
+ #endif