tomoto 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +3 -0
- data/LICENSE.txt +22 -0
- data/README.md +123 -0
- data/ext/tomoto/ext.cpp +245 -0
- data/ext/tomoto/extconf.rb +28 -0
- data/lib/tomoto.rb +12 -0
- data/lib/tomoto/ct.rb +11 -0
- data/lib/tomoto/hdp.rb +11 -0
- data/lib/tomoto/lda.rb +67 -0
- data/lib/tomoto/version.rb +3 -0
- data/vendor/EigenRand/EigenRand/Core.h +1139 -0
- data/vendor/EigenRand/EigenRand/Dists/Basic.h +111 -0
- data/vendor/EigenRand/EigenRand/Dists/Discrete.h +877 -0
- data/vendor/EigenRand/EigenRand/Dists/GammaPoisson.h +108 -0
- data/vendor/EigenRand/EigenRand/Dists/NormalExp.h +626 -0
- data/vendor/EigenRand/EigenRand/EigenRand +19 -0
- data/vendor/EigenRand/EigenRand/Macro.h +24 -0
- data/vendor/EigenRand/EigenRand/MorePacketMath.h +978 -0
- data/vendor/EigenRand/EigenRand/PacketFilter.h +286 -0
- data/vendor/EigenRand/EigenRand/PacketRandomEngine.h +624 -0
- data/vendor/EigenRand/EigenRand/RandUtils.h +413 -0
- data/vendor/EigenRand/EigenRand/doc.h +220 -0
- data/vendor/EigenRand/LICENSE +21 -0
- data/vendor/EigenRand/README.md +288 -0
- data/vendor/eigen/COPYING.BSD +26 -0
- data/vendor/eigen/COPYING.GPL +674 -0
- data/vendor/eigen/COPYING.LGPL +502 -0
- data/vendor/eigen/COPYING.MINPACK +52 -0
- data/vendor/eigen/COPYING.MPL2 +373 -0
- data/vendor/eigen/COPYING.README +18 -0
- data/vendor/eigen/Eigen/CMakeLists.txt +19 -0
- data/vendor/eigen/Eigen/Cholesky +46 -0
- data/vendor/eigen/Eigen/CholmodSupport +48 -0
- data/vendor/eigen/Eigen/Core +537 -0
- data/vendor/eigen/Eigen/Dense +7 -0
- data/vendor/eigen/Eigen/Eigen +2 -0
- data/vendor/eigen/Eigen/Eigenvalues +61 -0
- data/vendor/eigen/Eigen/Geometry +62 -0
- data/vendor/eigen/Eigen/Householder +30 -0
- data/vendor/eigen/Eigen/IterativeLinearSolvers +48 -0
- data/vendor/eigen/Eigen/Jacobi +33 -0
- data/vendor/eigen/Eigen/LU +50 -0
- data/vendor/eigen/Eigen/MetisSupport +35 -0
- data/vendor/eigen/Eigen/OrderingMethods +73 -0
- data/vendor/eigen/Eigen/PaStiXSupport +48 -0
- data/vendor/eigen/Eigen/PardisoSupport +35 -0
- data/vendor/eigen/Eigen/QR +51 -0
- data/vendor/eigen/Eigen/QtAlignedMalloc +40 -0
- data/vendor/eigen/Eigen/SPQRSupport +34 -0
- data/vendor/eigen/Eigen/SVD +51 -0
- data/vendor/eigen/Eigen/Sparse +36 -0
- data/vendor/eigen/Eigen/SparseCholesky +45 -0
- data/vendor/eigen/Eigen/SparseCore +69 -0
- data/vendor/eigen/Eigen/SparseLU +46 -0
- data/vendor/eigen/Eigen/SparseQR +37 -0
- data/vendor/eigen/Eigen/StdDeque +27 -0
- data/vendor/eigen/Eigen/StdList +26 -0
- data/vendor/eigen/Eigen/StdVector +27 -0
- data/vendor/eigen/Eigen/SuperLUSupport +64 -0
- data/vendor/eigen/Eigen/UmfPackSupport +40 -0
- data/vendor/eigen/Eigen/src/Cholesky/LDLT.h +673 -0
- data/vendor/eigen/Eigen/src/Cholesky/LLT.h +542 -0
- data/vendor/eigen/Eigen/src/Cholesky/LLT_LAPACKE.h +99 -0
- data/vendor/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +639 -0
- data/vendor/eigen/Eigen/src/Core/Array.h +329 -0
- data/vendor/eigen/Eigen/src/Core/ArrayBase.h +226 -0
- data/vendor/eigen/Eigen/src/Core/ArrayWrapper.h +209 -0
- data/vendor/eigen/Eigen/src/Core/Assign.h +90 -0
- data/vendor/eigen/Eigen/src/Core/AssignEvaluator.h +935 -0
- data/vendor/eigen/Eigen/src/Core/Assign_MKL.h +178 -0
- data/vendor/eigen/Eigen/src/Core/BandMatrix.h +353 -0
- data/vendor/eigen/Eigen/src/Core/Block.h +452 -0
- data/vendor/eigen/Eigen/src/Core/BooleanRedux.h +164 -0
- data/vendor/eigen/Eigen/src/Core/CommaInitializer.h +160 -0
- data/vendor/eigen/Eigen/src/Core/ConditionEstimator.h +175 -0
- data/vendor/eigen/Eigen/src/Core/CoreEvaluators.h +1688 -0
- data/vendor/eigen/Eigen/src/Core/CoreIterators.h +127 -0
- data/vendor/eigen/Eigen/src/Core/CwiseBinaryOp.h +184 -0
- data/vendor/eigen/Eigen/src/Core/CwiseNullaryOp.h +866 -0
- data/vendor/eigen/Eigen/src/Core/CwiseTernaryOp.h +197 -0
- data/vendor/eigen/Eigen/src/Core/CwiseUnaryOp.h +103 -0
- data/vendor/eigen/Eigen/src/Core/CwiseUnaryView.h +128 -0
- data/vendor/eigen/Eigen/src/Core/DenseBase.h +611 -0
- data/vendor/eigen/Eigen/src/Core/DenseCoeffsBase.h +681 -0
- data/vendor/eigen/Eigen/src/Core/DenseStorage.h +570 -0
- data/vendor/eigen/Eigen/src/Core/Diagonal.h +260 -0
- data/vendor/eigen/Eigen/src/Core/DiagonalMatrix.h +343 -0
- data/vendor/eigen/Eigen/src/Core/DiagonalProduct.h +28 -0
- data/vendor/eigen/Eigen/src/Core/Dot.h +318 -0
- data/vendor/eigen/Eigen/src/Core/EigenBase.h +159 -0
- data/vendor/eigen/Eigen/src/Core/ForceAlignedAccess.h +146 -0
- data/vendor/eigen/Eigen/src/Core/Fuzzy.h +155 -0
- data/vendor/eigen/Eigen/src/Core/GeneralProduct.h +455 -0
- data/vendor/eigen/Eigen/src/Core/GenericPacketMath.h +593 -0
- data/vendor/eigen/Eigen/src/Core/GlobalFunctions.h +187 -0
- data/vendor/eigen/Eigen/src/Core/IO.h +225 -0
- data/vendor/eigen/Eigen/src/Core/Inverse.h +118 -0
- data/vendor/eigen/Eigen/src/Core/Map.h +171 -0
- data/vendor/eigen/Eigen/src/Core/MapBase.h +303 -0
- data/vendor/eigen/Eigen/src/Core/MathFunctions.h +1415 -0
- data/vendor/eigen/Eigen/src/Core/MathFunctionsImpl.h +101 -0
- data/vendor/eigen/Eigen/src/Core/Matrix.h +459 -0
- data/vendor/eigen/Eigen/src/Core/MatrixBase.h +529 -0
- data/vendor/eigen/Eigen/src/Core/NestByValue.h +110 -0
- data/vendor/eigen/Eigen/src/Core/NoAlias.h +108 -0
- data/vendor/eigen/Eigen/src/Core/NumTraits.h +248 -0
- data/vendor/eigen/Eigen/src/Core/PermutationMatrix.h +633 -0
- data/vendor/eigen/Eigen/src/Core/PlainObjectBase.h +1035 -0
- data/vendor/eigen/Eigen/src/Core/Product.h +186 -0
- data/vendor/eigen/Eigen/src/Core/ProductEvaluators.h +1112 -0
- data/vendor/eigen/Eigen/src/Core/Random.h +182 -0
- data/vendor/eigen/Eigen/src/Core/Redux.h +505 -0
- data/vendor/eigen/Eigen/src/Core/Ref.h +283 -0
- data/vendor/eigen/Eigen/src/Core/Replicate.h +142 -0
- data/vendor/eigen/Eigen/src/Core/ReturnByValue.h +117 -0
- data/vendor/eigen/Eigen/src/Core/Reverse.h +211 -0
- data/vendor/eigen/Eigen/src/Core/Select.h +162 -0
- data/vendor/eigen/Eigen/src/Core/SelfAdjointView.h +352 -0
- data/vendor/eigen/Eigen/src/Core/SelfCwiseBinaryOp.h +47 -0
- data/vendor/eigen/Eigen/src/Core/Solve.h +188 -0
- data/vendor/eigen/Eigen/src/Core/SolveTriangular.h +235 -0
- data/vendor/eigen/Eigen/src/Core/SolverBase.h +130 -0
- data/vendor/eigen/Eigen/src/Core/StableNorm.h +221 -0
- data/vendor/eigen/Eigen/src/Core/Stride.h +111 -0
- data/vendor/eigen/Eigen/src/Core/Swap.h +67 -0
- data/vendor/eigen/Eigen/src/Core/Transpose.h +403 -0
- data/vendor/eigen/Eigen/src/Core/Transpositions.h +407 -0
- data/vendor/eigen/Eigen/src/Core/TriangularMatrix.h +983 -0
- data/vendor/eigen/Eigen/src/Core/VectorBlock.h +96 -0
- data/vendor/eigen/Eigen/src/Core/VectorwiseOp.h +695 -0
- data/vendor/eigen/Eigen/src/Core/Visitor.h +273 -0
- data/vendor/eigen/Eigen/src/Core/arch/AVX/Complex.h +451 -0
- data/vendor/eigen/Eigen/src/Core/arch/AVX/MathFunctions.h +439 -0
- data/vendor/eigen/Eigen/src/Core/arch/AVX/PacketMath.h +637 -0
- data/vendor/eigen/Eigen/src/Core/arch/AVX/TypeCasting.h +51 -0
- data/vendor/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h +391 -0
- data/vendor/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h +1316 -0
- data/vendor/eigen/Eigen/src/Core/arch/AltiVec/Complex.h +430 -0
- data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MathFunctions.h +322 -0
- data/vendor/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h +1061 -0
- data/vendor/eigen/Eigen/src/Core/arch/CUDA/Complex.h +103 -0
- data/vendor/eigen/Eigen/src/Core/arch/CUDA/Half.h +674 -0
- data/vendor/eigen/Eigen/src/Core/arch/CUDA/MathFunctions.h +91 -0
- data/vendor/eigen/Eigen/src/Core/arch/CUDA/PacketMath.h +333 -0
- data/vendor/eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +1124 -0
- data/vendor/eigen/Eigen/src/Core/arch/CUDA/TypeCasting.h +212 -0
- data/vendor/eigen/Eigen/src/Core/arch/Default/ConjHelper.h +29 -0
- data/vendor/eigen/Eigen/src/Core/arch/Default/Settings.h +49 -0
- data/vendor/eigen/Eigen/src/Core/arch/NEON/Complex.h +490 -0
- data/vendor/eigen/Eigen/src/Core/arch/NEON/MathFunctions.h +91 -0
- data/vendor/eigen/Eigen/src/Core/arch/NEON/PacketMath.h +760 -0
- data/vendor/eigen/Eigen/src/Core/arch/SSE/Complex.h +471 -0
- data/vendor/eigen/Eigen/src/Core/arch/SSE/MathFunctions.h +562 -0
- data/vendor/eigen/Eigen/src/Core/arch/SSE/PacketMath.h +895 -0
- data/vendor/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h +77 -0
- data/vendor/eigen/Eigen/src/Core/arch/ZVector/Complex.h +397 -0
- data/vendor/eigen/Eigen/src/Core/arch/ZVector/MathFunctions.h +137 -0
- data/vendor/eigen/Eigen/src/Core/arch/ZVector/PacketMath.h +945 -0
- data/vendor/eigen/Eigen/src/Core/functors/AssignmentFunctors.h +168 -0
- data/vendor/eigen/Eigen/src/Core/functors/BinaryFunctors.h +475 -0
- data/vendor/eigen/Eigen/src/Core/functors/NullaryFunctors.h +188 -0
- data/vendor/eigen/Eigen/src/Core/functors/StlFunctors.h +136 -0
- data/vendor/eigen/Eigen/src/Core/functors/TernaryFunctors.h +25 -0
- data/vendor/eigen/Eigen/src/Core/functors/UnaryFunctors.h +792 -0
- data/vendor/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h +2156 -0
- data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h +492 -0
- data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +311 -0
- data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +145 -0
- data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h +122 -0
- data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixVector.h +619 -0
- data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h +136 -0
- data/vendor/eigen/Eigen/src/Core/products/Parallelizer.h +163 -0
- data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +521 -0
- data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h +287 -0
- data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h +260 -0
- data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h +118 -0
- data/vendor/eigen/Eigen/src/Core/products/SelfadjointProduct.h +133 -0
- data/vendor/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h +93 -0
- data/vendor/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h +466 -0
- data/vendor/eigen/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h +315 -0
- data/vendor/eigen/Eigen/src/Core/products/TriangularMatrixVector.h +350 -0
- data/vendor/eigen/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h +255 -0
- data/vendor/eigen/Eigen/src/Core/products/TriangularSolverMatrix.h +335 -0
- data/vendor/eigen/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h +163 -0
- data/vendor/eigen/Eigen/src/Core/products/TriangularSolverVector.h +145 -0
- data/vendor/eigen/Eigen/src/Core/util/BlasUtil.h +398 -0
- data/vendor/eigen/Eigen/src/Core/util/Constants.h +547 -0
- data/vendor/eigen/Eigen/src/Core/util/DisableStupidWarnings.h +83 -0
- data/vendor/eigen/Eigen/src/Core/util/ForwardDeclarations.h +302 -0
- data/vendor/eigen/Eigen/src/Core/util/MKL_support.h +130 -0
- data/vendor/eigen/Eigen/src/Core/util/Macros.h +1001 -0
- data/vendor/eigen/Eigen/src/Core/util/Memory.h +993 -0
- data/vendor/eigen/Eigen/src/Core/util/Meta.h +534 -0
- data/vendor/eigen/Eigen/src/Core/util/NonMPL2.h +3 -0
- data/vendor/eigen/Eigen/src/Core/util/ReenableStupidWarnings.h +27 -0
- data/vendor/eigen/Eigen/src/Core/util/StaticAssert.h +218 -0
- data/vendor/eigen/Eigen/src/Core/util/XprHelper.h +821 -0
- data/vendor/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +346 -0
- data/vendor/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +459 -0
- data/vendor/eigen/Eigen/src/Eigenvalues/ComplexSchur_LAPACKE.h +91 -0
- data/vendor/eigen/Eigen/src/Eigenvalues/EigenSolver.h +622 -0
- data/vendor/eigen/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h +418 -0
- data/vendor/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +226 -0
- data/vendor/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +374 -0
- data/vendor/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +158 -0
- data/vendor/eigen/Eigen/src/Eigenvalues/RealQZ.h +654 -0
- data/vendor/eigen/Eigen/src/Eigenvalues/RealSchur.h +546 -0
- data/vendor/eigen/Eigen/src/Eigenvalues/RealSchur_LAPACKE.h +77 -0
- data/vendor/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +870 -0
- data/vendor/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h +87 -0
- data/vendor/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +556 -0
- data/vendor/eigen/Eigen/src/Geometry/AlignedBox.h +392 -0
- data/vendor/eigen/Eigen/src/Geometry/AngleAxis.h +247 -0
- data/vendor/eigen/Eigen/src/Geometry/EulerAngles.h +114 -0
- data/vendor/eigen/Eigen/src/Geometry/Homogeneous.h +497 -0
- data/vendor/eigen/Eigen/src/Geometry/Hyperplane.h +282 -0
- data/vendor/eigen/Eigen/src/Geometry/OrthoMethods.h +234 -0
- data/vendor/eigen/Eigen/src/Geometry/ParametrizedLine.h +195 -0
- data/vendor/eigen/Eigen/src/Geometry/Quaternion.h +814 -0
- data/vendor/eigen/Eigen/src/Geometry/Rotation2D.h +199 -0
- data/vendor/eigen/Eigen/src/Geometry/RotationBase.h +206 -0
- data/vendor/eigen/Eigen/src/Geometry/Scaling.h +170 -0
- data/vendor/eigen/Eigen/src/Geometry/Transform.h +1542 -0
- data/vendor/eigen/Eigen/src/Geometry/Translation.h +208 -0
- data/vendor/eigen/Eigen/src/Geometry/Umeyama.h +166 -0
- data/vendor/eigen/Eigen/src/Geometry/arch/Geometry_SSE.h +161 -0
- data/vendor/eigen/Eigen/src/Householder/BlockHouseholder.h +103 -0
- data/vendor/eigen/Eigen/src/Householder/Householder.h +172 -0
- data/vendor/eigen/Eigen/src/Householder/HouseholderSequence.h +470 -0
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +226 -0
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +228 -0
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +246 -0
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +400 -0
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +462 -0
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +394 -0
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +216 -0
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +115 -0
- data/vendor/eigen/Eigen/src/Jacobi/Jacobi.h +462 -0
- data/vendor/eigen/Eigen/src/LU/Determinant.h +101 -0
- data/vendor/eigen/Eigen/src/LU/FullPivLU.h +891 -0
- data/vendor/eigen/Eigen/src/LU/InverseImpl.h +415 -0
- data/vendor/eigen/Eigen/src/LU/PartialPivLU.h +611 -0
- data/vendor/eigen/Eigen/src/LU/PartialPivLU_LAPACKE.h +83 -0
- data/vendor/eigen/Eigen/src/LU/arch/Inverse_SSE.h +338 -0
- data/vendor/eigen/Eigen/src/MetisSupport/MetisSupport.h +137 -0
- data/vendor/eigen/Eigen/src/OrderingMethods/Amd.h +445 -0
- data/vendor/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +1843 -0
- data/vendor/eigen/Eigen/src/OrderingMethods/Ordering.h +157 -0
- data/vendor/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +678 -0
- data/vendor/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +543 -0
- data/vendor/eigen/Eigen/src/QR/ColPivHouseholderQR.h +653 -0
- data/vendor/eigen/Eigen/src/QR/ColPivHouseholderQR_LAPACKE.h +97 -0
- data/vendor/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +562 -0
- data/vendor/eigen/Eigen/src/QR/FullPivHouseholderQR.h +676 -0
- data/vendor/eigen/Eigen/src/QR/HouseholderQR.h +409 -0
- data/vendor/eigen/Eigen/src/QR/HouseholderQR_LAPACKE.h +68 -0
- data/vendor/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +313 -0
- data/vendor/eigen/Eigen/src/SVD/BDCSVD.h +1246 -0
- data/vendor/eigen/Eigen/src/SVD/JacobiSVD.h +804 -0
- data/vendor/eigen/Eigen/src/SVD/JacobiSVD_LAPACKE.h +91 -0
- data/vendor/eigen/Eigen/src/SVD/SVDBase.h +315 -0
- data/vendor/eigen/Eigen/src/SVD/UpperBidiagonalization.h +414 -0
- data/vendor/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +689 -0
- data/vendor/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +199 -0
- data/vendor/eigen/Eigen/src/SparseCore/AmbiVector.h +377 -0
- data/vendor/eigen/Eigen/src/SparseCore/CompressedStorage.h +258 -0
- data/vendor/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +352 -0
- data/vendor/eigen/Eigen/src/SparseCore/MappedSparseMatrix.h +67 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseAssign.h +216 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseBlock.h +603 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseColEtree.h +206 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +341 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +726 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +148 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +320 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseDiagonalProduct.h +138 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseDot.h +98 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseFuzzy.h +29 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseMap.h +305 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseMatrix.h +1403 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +405 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparsePermutation.h +178 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseProduct.h +169 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseRedux.h +49 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseRef.h +397 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +656 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseSolverBase.h +124 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseSparseProductWithPruning.h +198 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseTranspose.h +92 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseTriangularView.h +189 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseUtil.h +178 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseVector.h +478 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseView.h +253 -0
- data/vendor/eigen/Eigen/src/SparseCore/TriangularSolver.h +315 -0
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU.h +773 -0
- data/vendor/eigen/Eigen/src/SparseLU/SparseLUImpl.h +66 -0
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +226 -0
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_Structs.h +110 -0
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +301 -0
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_Utils.h +80 -0
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_column_bmod.h +181 -0
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +179 -0
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h +107 -0
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +280 -0
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h +126 -0
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_kernel_bmod.h +130 -0
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +223 -0
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_panel_dfs.h +258 -0
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_pivotL.h +137 -0
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_pruneL.h +136 -0
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_relax_snode.h +83 -0
- data/vendor/eigen/Eigen/src/SparseQR/SparseQR.h +745 -0
- data/vendor/eigen/Eigen/src/StlSupport/StdDeque.h +126 -0
- data/vendor/eigen/Eigen/src/StlSupport/StdList.h +106 -0
- data/vendor/eigen/Eigen/src/StlSupport/StdVector.h +131 -0
- data/vendor/eigen/Eigen/src/StlSupport/details.h +84 -0
- data/vendor/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +1027 -0
- data/vendor/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +506 -0
- data/vendor/eigen/Eigen/src/misc/Image.h +82 -0
- data/vendor/eigen/Eigen/src/misc/Kernel.h +79 -0
- data/vendor/eigen/Eigen/src/misc/RealSvd2x2.h +55 -0
- data/vendor/eigen/Eigen/src/misc/blas.h +440 -0
- data/vendor/eigen/Eigen/src/misc/lapack.h +152 -0
- data/vendor/eigen/Eigen/src/misc/lapacke.h +16291 -0
- data/vendor/eigen/Eigen/src/misc/lapacke_mangling.h +17 -0
- data/vendor/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +332 -0
- data/vendor/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +552 -0
- data/vendor/eigen/Eigen/src/plugins/BlockMethods.h +1058 -0
- data/vendor/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.h +115 -0
- data/vendor/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.h +163 -0
- data/vendor/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +152 -0
- data/vendor/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +85 -0
- data/vendor/eigen/README.md +3 -0
- data/vendor/eigen/bench/README.txt +55 -0
- data/vendor/eigen/bench/btl/COPYING +340 -0
- data/vendor/eigen/bench/btl/README +154 -0
- data/vendor/eigen/bench/tensors/README +21 -0
- data/vendor/eigen/blas/README.txt +6 -0
- data/vendor/eigen/demos/mandelbrot/README +10 -0
- data/vendor/eigen/demos/mix_eigen_and_c/README +9 -0
- data/vendor/eigen/demos/opengl/README +13 -0
- data/vendor/eigen/unsupported/Eigen/CXX11/src/Tensor/README.md +1760 -0
- data/vendor/eigen/unsupported/README.txt +50 -0
- data/vendor/tomotopy/LICENSE +21 -0
- data/vendor/tomotopy/README.kr.rst +375 -0
- data/vendor/tomotopy/README.rst +382 -0
- data/vendor/tomotopy/src/Labeling/FoRelevance.cpp +362 -0
- data/vendor/tomotopy/src/Labeling/FoRelevance.h +88 -0
- data/vendor/tomotopy/src/Labeling/Labeler.h +50 -0
- data/vendor/tomotopy/src/TopicModel/CT.h +37 -0
- data/vendor/tomotopy/src/TopicModel/CTModel.cpp +13 -0
- data/vendor/tomotopy/src/TopicModel/CTModel.hpp +293 -0
- data/vendor/tomotopy/src/TopicModel/DMR.h +51 -0
- data/vendor/tomotopy/src/TopicModel/DMRModel.cpp +13 -0
- data/vendor/tomotopy/src/TopicModel/DMRModel.hpp +374 -0
- data/vendor/tomotopy/src/TopicModel/DT.h +65 -0
- data/vendor/tomotopy/src/TopicModel/DTM.h +22 -0
- data/vendor/tomotopy/src/TopicModel/DTModel.cpp +15 -0
- data/vendor/tomotopy/src/TopicModel/DTModel.hpp +572 -0
- data/vendor/tomotopy/src/TopicModel/GDMR.h +37 -0
- data/vendor/tomotopy/src/TopicModel/GDMRModel.cpp +14 -0
- data/vendor/tomotopy/src/TopicModel/GDMRModel.hpp +485 -0
- data/vendor/tomotopy/src/TopicModel/HDP.h +74 -0
- data/vendor/tomotopy/src/TopicModel/HDPModel.cpp +13 -0
- data/vendor/tomotopy/src/TopicModel/HDPModel.hpp +592 -0
- data/vendor/tomotopy/src/TopicModel/HLDA.h +40 -0
- data/vendor/tomotopy/src/TopicModel/HLDAModel.cpp +13 -0
- data/vendor/tomotopy/src/TopicModel/HLDAModel.hpp +681 -0
- data/vendor/tomotopy/src/TopicModel/HPA.h +27 -0
- data/vendor/tomotopy/src/TopicModel/HPAModel.cpp +21 -0
- data/vendor/tomotopy/src/TopicModel/HPAModel.hpp +588 -0
- data/vendor/tomotopy/src/TopicModel/LDA.h +144 -0
- data/vendor/tomotopy/src/TopicModel/LDACVB0Model.hpp +442 -0
- data/vendor/tomotopy/src/TopicModel/LDAModel.cpp +13 -0
- data/vendor/tomotopy/src/TopicModel/LDAModel.hpp +1058 -0
- data/vendor/tomotopy/src/TopicModel/LLDA.h +45 -0
- data/vendor/tomotopy/src/TopicModel/LLDAModel.cpp +13 -0
- data/vendor/tomotopy/src/TopicModel/LLDAModel.hpp +203 -0
- data/vendor/tomotopy/src/TopicModel/MGLDA.h +63 -0
- data/vendor/tomotopy/src/TopicModel/MGLDAModel.cpp +17 -0
- data/vendor/tomotopy/src/TopicModel/MGLDAModel.hpp +558 -0
- data/vendor/tomotopy/src/TopicModel/PA.h +43 -0
- data/vendor/tomotopy/src/TopicModel/PAModel.cpp +13 -0
- data/vendor/tomotopy/src/TopicModel/PAModel.hpp +467 -0
- data/vendor/tomotopy/src/TopicModel/PLDA.h +17 -0
- data/vendor/tomotopy/src/TopicModel/PLDAModel.cpp +13 -0
- data/vendor/tomotopy/src/TopicModel/PLDAModel.hpp +214 -0
- data/vendor/tomotopy/src/TopicModel/SLDA.h +54 -0
- data/vendor/tomotopy/src/TopicModel/SLDAModel.cpp +17 -0
- data/vendor/tomotopy/src/TopicModel/SLDAModel.hpp +456 -0
- data/vendor/tomotopy/src/TopicModel/TopicModel.hpp +692 -0
- data/vendor/tomotopy/src/Utils/AliasMethod.hpp +169 -0
- data/vendor/tomotopy/src/Utils/Dictionary.h +80 -0
- data/vendor/tomotopy/src/Utils/EigenAddonOps.hpp +181 -0
- data/vendor/tomotopy/src/Utils/LBFGS.h +202 -0
- data/vendor/tomotopy/src/Utils/LBFGS/LineSearchBacktracking.h +120 -0
- data/vendor/tomotopy/src/Utils/LBFGS/LineSearchBracketing.h +122 -0
- data/vendor/tomotopy/src/Utils/LBFGS/Param.h +213 -0
- data/vendor/tomotopy/src/Utils/LUT.hpp +82 -0
- data/vendor/tomotopy/src/Utils/MultiNormalDistribution.hpp +69 -0
- data/vendor/tomotopy/src/Utils/PolyaGamma.hpp +200 -0
- data/vendor/tomotopy/src/Utils/PolyaGammaHybrid.hpp +672 -0
- data/vendor/tomotopy/src/Utils/ThreadPool.hpp +150 -0
- data/vendor/tomotopy/src/Utils/Trie.hpp +220 -0
- data/vendor/tomotopy/src/Utils/TruncMultiNormal.hpp +94 -0
- data/vendor/tomotopy/src/Utils/Utils.hpp +337 -0
- data/vendor/tomotopy/src/Utils/avx_gamma.h +46 -0
- data/vendor/tomotopy/src/Utils/avx_mathfun.h +736 -0
- data/vendor/tomotopy/src/Utils/exception.h +28 -0
- data/vendor/tomotopy/src/Utils/math.h +281 -0
- data/vendor/tomotopy/src/Utils/rtnorm.hpp +2690 -0
- data/vendor/tomotopy/src/Utils/sample.hpp +192 -0
- data/vendor/tomotopy/src/Utils/serializer.hpp +695 -0
- data/vendor/tomotopy/src/Utils/slp.hpp +131 -0
- data/vendor/tomotopy/src/Utils/sse_gamma.h +48 -0
- data/vendor/tomotopy/src/Utils/sse_mathfun.h +710 -0
- data/vendor/tomotopy/src/Utils/text.hpp +49 -0
- data/vendor/tomotopy/src/Utils/tvector.hpp +543 -0
- metadata +531 -0
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file EigenRand
|
|
3
|
+
* @author bab2min (bab2min@gmail.com)
|
|
4
|
+
* @brief
|
|
5
|
+
* @version 0.2.0
|
|
6
|
+
* @date 2020-06-22
|
|
7
|
+
*
|
|
8
|
+
* @copyright Copyright (c) 2020
|
|
9
|
+
*
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
#ifndef EIGENRAND_EIGENRAND_H
|
|
13
|
+
#define EIGENRAND_EIGENRAND_H
|
|
14
|
+
|
|
15
|
+
#include <Eigen/Dense>
|
|
16
|
+
#include <EigenRand/Macro.h>
|
|
17
|
+
#include <EigenRand/Core.h>
|
|
18
|
+
|
|
19
|
+
#endif
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file Macro.h
|
|
3
|
+
* @author bab2min (bab2min@gmail.com)
|
|
4
|
+
* @brief
|
|
5
|
+
* @version 0.2.1
|
|
6
|
+
* @date 2020-07-11
|
|
7
|
+
*
|
|
8
|
+
* @copyright Copyright (c) 2020
|
|
9
|
+
*
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
#ifndef EIGENRAND_MACRO_H
|
|
13
|
+
#define EIGENRAND_MACRO_H
|
|
14
|
+
|
|
15
|
+
#define EIGENRAND_WORLD_VERSION 0
|
|
16
|
+
#define EIGENRAND_MAJOR_VERSION 2
|
|
17
|
+
#define EIGENRAND_MINOR_VERSION 0
|
|
18
|
+
|
|
19
|
+
#if EIGEN_VERSION_AT_LEAST(3,3,7)
|
|
20
|
+
#else
|
|
21
|
+
#error At least Eigen 3.3.7 is required.
|
|
22
|
+
#endif
|
|
23
|
+
|
|
24
|
+
#endif
|
|
@@ -0,0 +1,978 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file MorePacketMath.h
|
|
3
|
+
* @author bab2min (bab2min@gmail.com)
|
|
4
|
+
* @brief
|
|
5
|
+
* @version 0.2.0
|
|
6
|
+
* @date 2020-06-22
|
|
7
|
+
*
|
|
8
|
+
* @copyright Copyright (c) 2020
|
|
9
|
+
*
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
#ifndef EIGENRAND_MORE_PACKET_MATH_H
|
|
13
|
+
#define EIGENRAND_MORE_PACKET_MATH_H
|
|
14
|
+
|
|
15
|
+
#include <Eigen/Dense>
|
|
16
|
+
|
|
17
|
+
namespace Eigen
|
|
18
|
+
{
|
|
19
|
+
namespace internal
|
|
20
|
+
{
|
|
21
|
+
template<typename Packet>
|
|
22
|
+
struct reinterpreter
|
|
23
|
+
{
|
|
24
|
+
};
|
|
25
|
+
|
|
26
|
+
template<typename Packet>
|
|
27
|
+
inline auto reinterpret_to_float(const Packet& x)
|
|
28
|
+
-> decltype(reinterpreter<Packet>{}.to_float(x))
|
|
29
|
+
{
|
|
30
|
+
return reinterpreter<Packet>{}.to_float(x);
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
template<typename Packet>
|
|
34
|
+
inline auto reinterpret_to_double(const Packet& x)
|
|
35
|
+
-> decltype(reinterpreter<Packet>{}.to_double(x))
|
|
36
|
+
{
|
|
37
|
+
return reinterpreter<Packet>{}.to_double(x);
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
template<typename Packet>
|
|
41
|
+
inline auto reinterpret_to_int(const Packet& x)
|
|
42
|
+
-> decltype(reinterpreter<Packet>{}.to_int(x))
|
|
43
|
+
{
|
|
44
|
+
return reinterpreter<Packet>{}.to_int(x);
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
template<typename Packet>
|
|
48
|
+
EIGEN_STRONG_INLINE Packet pseti64(uint64_t a);
|
|
49
|
+
|
|
50
|
+
template<typename Packet>
|
|
51
|
+
EIGEN_STRONG_INLINE Packet pcmpeq(const Packet& a, const Packet& b);
|
|
52
|
+
|
|
53
|
+
template<typename Packet>
|
|
54
|
+
EIGEN_STRONG_INLINE Packet psll(const Packet& a, int b);
|
|
55
|
+
|
|
56
|
+
template<typename Packet>
|
|
57
|
+
EIGEN_STRONG_INLINE Packet psrl(const Packet& a, int b);
|
|
58
|
+
|
|
59
|
+
template<typename Packet>
|
|
60
|
+
EIGEN_STRONG_INLINE Packet psll64(const Packet& a, int b);
|
|
61
|
+
|
|
62
|
+
template<typename Packet>
|
|
63
|
+
EIGEN_STRONG_INLINE Packet psrl64(const Packet& a, int b);
|
|
64
|
+
|
|
65
|
+
template<typename Packet>
|
|
66
|
+
EIGEN_STRONG_INLINE int pmovemask(const Packet& a);
|
|
67
|
+
|
|
68
|
+
template<>
|
|
69
|
+
EIGEN_STRONG_INLINE uint64_t psll64<uint64_t>(const uint64_t& a, int b)
|
|
70
|
+
{
|
|
71
|
+
return a << b;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
template<>
|
|
75
|
+
EIGEN_STRONG_INLINE uint64_t psrl64<uint64_t>(const uint64_t& a, int b)
|
|
76
|
+
{
|
|
77
|
+
return a >> b;
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
template<typename Packet>
|
|
81
|
+
EIGEN_STRONG_INLINE void psincos(Packet x, Packet &s, Packet &c)
|
|
82
|
+
{
|
|
83
|
+
Packet xmm1, xmm2, xmm3 = pset1<Packet>(0), sign_bit_sin, y;
|
|
84
|
+
using IntPacket = decltype(reinterpret_to_int(x));
|
|
85
|
+
IntPacket emm0, emm2, emm4;
|
|
86
|
+
|
|
87
|
+
sign_bit_sin = x;
|
|
88
|
+
/* take the absolute value */
|
|
89
|
+
x = pabs(x);
|
|
90
|
+
/* extract the sign bit (upper one) */
|
|
91
|
+
sign_bit_sin = reinterpret_to_float(
|
|
92
|
+
pand(reinterpret_to_int(sign_bit_sin), pset1<IntPacket>(0x80000000))
|
|
93
|
+
);
|
|
94
|
+
|
|
95
|
+
/* scale by 4/Pi */
|
|
96
|
+
y = pmul(x, pset1<Packet>(1.27323954473516));
|
|
97
|
+
|
|
98
|
+
/* store the integer part of y in emm2 */
|
|
99
|
+
emm2 = pcast<Packet, IntPacket>(y);
|
|
100
|
+
|
|
101
|
+
/* j=(j+1) & (~1) (see the cephes sources) */
|
|
102
|
+
emm2 = padd(emm2, pset1<IntPacket>(1));
|
|
103
|
+
emm2 = pand(emm2, pset1<IntPacket>(~1));
|
|
104
|
+
y = pcast<IntPacket, Packet>(emm2);
|
|
105
|
+
|
|
106
|
+
emm4 = emm2;
|
|
107
|
+
|
|
108
|
+
/* get the swap sign flag for the sine */
|
|
109
|
+
emm0 = pand(emm2, pset1<IntPacket>(4));
|
|
110
|
+
emm0 = psll(emm0, 29);
|
|
111
|
+
Packet swap_sign_bit_sin = reinterpret_to_float(emm0);
|
|
112
|
+
|
|
113
|
+
/* get the polynom selection mask for the sine*/
|
|
114
|
+
emm2 = pand(emm2, pset1<IntPacket>(2));
|
|
115
|
+
|
|
116
|
+
emm2 = pcmpeq(emm2, pset1<IntPacket>(0));
|
|
117
|
+
Packet poly_mask = reinterpret_to_float(emm2);
|
|
118
|
+
|
|
119
|
+
/* The magic pass: "Extended precision modular arithmetic"
|
|
120
|
+
x = ((x - y * DP1) - y * DP2) - y * DP3; */
|
|
121
|
+
xmm1 = pset1<Packet>(-0.78515625);
|
|
122
|
+
xmm2 = pset1<Packet>(-2.4187564849853515625e-4);
|
|
123
|
+
xmm3 = pset1<Packet>(-3.77489497744594108e-8);
|
|
124
|
+
xmm1 = pmul(y, xmm1);
|
|
125
|
+
xmm2 = pmul(y, xmm2);
|
|
126
|
+
xmm3 = pmul(y, xmm3);
|
|
127
|
+
x = padd(x, xmm1);
|
|
128
|
+
x = padd(x, xmm2);
|
|
129
|
+
x = padd(x, xmm3);
|
|
130
|
+
|
|
131
|
+
emm4 = psub(emm4, pset1<IntPacket>(2));
|
|
132
|
+
emm4 = pandnot(emm4, pset1<IntPacket>(4));
|
|
133
|
+
emm4 = psll(emm4, 29);
|
|
134
|
+
Packet sign_bit_cos = reinterpret_to_float(emm4);
|
|
135
|
+
sign_bit_sin = pxor(sign_bit_sin, swap_sign_bit_sin);
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
/* Evaluate the first polynom (0 <= x <= Pi/4) */
|
|
139
|
+
Packet z = pmul(x, x);
|
|
140
|
+
y = pset1<Packet>(2.443315711809948E-005);
|
|
141
|
+
|
|
142
|
+
y = pmul(y, z);
|
|
143
|
+
y = padd(y, pset1<Packet>(-1.388731625493765E-003));
|
|
144
|
+
y = pmul(y, z);
|
|
145
|
+
y = padd(y, pset1<Packet>(4.166664568298827E-002));
|
|
146
|
+
y = pmul(y, z);
|
|
147
|
+
y = pmul(y, z);
|
|
148
|
+
Packet tmp = pmul(z, pset1<Packet>(0.5));
|
|
149
|
+
y = psub(y, tmp);
|
|
150
|
+
y = padd(y, pset1<Packet>(1));
|
|
151
|
+
|
|
152
|
+
/* Evaluate the second polynom (Pi/4 <= x <= 0) */
|
|
153
|
+
|
|
154
|
+
Packet y2 = pset1<Packet>(-1.9515295891E-4);
|
|
155
|
+
y2 = pmul(y2, z);
|
|
156
|
+
y2 = padd(y2, pset1<Packet>(8.3321608736E-3));
|
|
157
|
+
y2 = pmul(y2, z);
|
|
158
|
+
y2 = padd(y2, pset1<Packet>(-1.6666654611E-1));
|
|
159
|
+
y2 = pmul(y2, z);
|
|
160
|
+
y2 = pmul(y2, x);
|
|
161
|
+
y2 = padd(y2, x);
|
|
162
|
+
|
|
163
|
+
/* select the correct result from the two polynoms */
|
|
164
|
+
xmm3 = poly_mask;
|
|
165
|
+
Packet ysin2 = pand(xmm3, y2);
|
|
166
|
+
Packet ysin1 = pandnot(xmm3, y);
|
|
167
|
+
y2 = psub(y2, ysin2);
|
|
168
|
+
y = psub(y, ysin1);
|
|
169
|
+
|
|
170
|
+
xmm1 = padd(ysin1, ysin2);
|
|
171
|
+
xmm2 = padd(y, y2);
|
|
172
|
+
|
|
173
|
+
/* update the sign */
|
|
174
|
+
s = pxor(xmm1, sign_bit_sin);
|
|
175
|
+
c = pxor(xmm2, sign_bit_cos);
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
// approximation : lgamma(z) ~= (z+2.5)ln(z+3) - z - 3 + 0.5 ln (2pi) + 1/12/(z + 3) - ln (z(z+1)(z+2))
|
|
179
|
+
template<typename Packet>
|
|
180
|
+
EIGEN_STRONG_INLINE Packet plgamma(const Packet& x)
|
|
181
|
+
{
|
|
182
|
+
auto x_3 = padd(x, pset1<Packet>(3));
|
|
183
|
+
auto ret = pmul(padd(x_3, pset1<Packet>(-0.5)), plog(x_3));
|
|
184
|
+
ret = psub(ret, x_3);
|
|
185
|
+
ret = padd(ret, pset1<Packet>(0.9189385332046727));
|
|
186
|
+
ret = padd(ret, pdiv(pset1<Packet>(1 / 12.), x_3));
|
|
187
|
+
ret = psub(ret, plog(pmul(
|
|
188
|
+
pmul(psub(x_3, pset1<Packet>(1)), psub(x_3, pset1<Packet>(2))), x)));
|
|
189
|
+
return ret;
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
template<typename Packet>
|
|
193
|
+
EIGEN_STRONG_INLINE Packet pcmplt(const Packet& a, const Packet& b);
|
|
194
|
+
|
|
195
|
+
template<typename Packet>
|
|
196
|
+
EIGEN_STRONG_INLINE Packet pcmple(const Packet& a, const Packet& b);
|
|
197
|
+
|
|
198
|
+
template<typename Packet>
|
|
199
|
+
EIGEN_STRONG_INLINE Packet pblendv(const Packet& ifPacket, const Packet& thenPacket, const Packet& elsePacket);
|
|
200
|
+
|
|
201
|
+
template<typename Packet>
|
|
202
|
+
EIGEN_STRONG_INLINE Packet pgather(const int* addr, const Packet& index);
|
|
203
|
+
|
|
204
|
+
template<typename Packet>
|
|
205
|
+
EIGEN_STRONG_INLINE auto pgather(const float* addr, const Packet& index) -> decltype(reinterpret_to_float(std::declval<Packet>()));
|
|
206
|
+
|
|
207
|
+
template<typename Packet>
|
|
208
|
+
EIGEN_STRONG_INLINE auto pgather(const double* addr, const Packet& index, bool upperhalf = false) -> decltype(reinterpret_to_double(std::declval<Packet>()));
|
|
209
|
+
|
|
210
|
+
template<typename Packet>
|
|
211
|
+
EIGEN_STRONG_INLINE Packet ptruncate(const Packet& a);
|
|
212
|
+
|
|
213
|
+
template<typename Packet>
|
|
214
|
+
EIGEN_STRONG_INLINE Packet pcmpeq64(const Packet& a, const Packet& b);
|
|
215
|
+
|
|
216
|
+
template<typename Packet>
|
|
217
|
+
EIGEN_STRONG_INLINE Packet pmuluadd64(const Packet& a, uint64_t b, uint64_t c);
|
|
218
|
+
|
|
219
|
+
template<typename IntPacket>
|
|
220
|
+
EIGEN_STRONG_INLINE auto bit_to_ur_float(const IntPacket& x) -> decltype(reinterpret_to_float(x))
|
|
221
|
+
{
|
|
222
|
+
using FloatPacket = decltype(reinterpret_to_float(x));
|
|
223
|
+
|
|
224
|
+
const IntPacket lower = pset1<IntPacket>(0x7FFFFF),
|
|
225
|
+
upper = pset1<IntPacket>(127 << 23);
|
|
226
|
+
const FloatPacket one = pset1<FloatPacket>(1);
|
|
227
|
+
|
|
228
|
+
return psub(reinterpret_to_float(por(pand(x, lower), upper)), one);
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
template<typename IntPacket>
|
|
232
|
+
EIGEN_STRONG_INLINE auto bit_to_ur_double(const IntPacket& x) -> decltype(reinterpret_to_double(x))
|
|
233
|
+
{
|
|
234
|
+
using DoublePacket = decltype(reinterpret_to_double(x));
|
|
235
|
+
|
|
236
|
+
const IntPacket lower = pseti64<IntPacket>(0xFFFFFFFFFFFFFull),
|
|
237
|
+
upper = pseti64<IntPacket>(1023ull << 52);
|
|
238
|
+
const DoublePacket one = pset1<DoublePacket>(1);
|
|
239
|
+
|
|
240
|
+
return psub(reinterpret_to_double(por(pand(x, lower), upper)), one);
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
template<typename Scalar>
|
|
244
|
+
struct bit_scalar;
|
|
245
|
+
|
|
246
|
+
template<>
|
|
247
|
+
struct bit_scalar<float>
|
|
248
|
+
{
|
|
249
|
+
float to_ur(uint32_t x)
|
|
250
|
+
{
|
|
251
|
+
union
|
|
252
|
+
{
|
|
253
|
+
uint32_t u;
|
|
254
|
+
float f;
|
|
255
|
+
};
|
|
256
|
+
u = (x & 0x7FFFFF) | (127 << 23);
|
|
257
|
+
return f - 1.f;
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
float to_nzur(uint32_t x)
|
|
261
|
+
{
|
|
262
|
+
return to_ur(x) + std::numeric_limits<float>::epsilon() / 8;
|
|
263
|
+
}
|
|
264
|
+
};
|
|
265
|
+
|
|
266
|
+
template<>
|
|
267
|
+
struct bit_scalar<double>
|
|
268
|
+
{
|
|
269
|
+
double to_ur(uint64_t x)
|
|
270
|
+
{
|
|
271
|
+
union
|
|
272
|
+
{
|
|
273
|
+
uint64_t u;
|
|
274
|
+
double f;
|
|
275
|
+
};
|
|
276
|
+
u = (x & 0xFFFFFFFFFFFFFull) | (1023ull << 52);
|
|
277
|
+
return f - 1.;
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
double to_nzur(uint64_t x)
|
|
281
|
+
{
|
|
282
|
+
return to_ur(x) + std::numeric_limits<double>::epsilon() / 8;
|
|
283
|
+
}
|
|
284
|
+
};
|
|
285
|
+
|
|
286
|
+
|
|
287
|
+
struct float2
|
|
288
|
+
{
|
|
289
|
+
float f[2];
|
|
290
|
+
};
|
|
291
|
+
|
|
292
|
+
EIGEN_STRONG_INLINE float2 bit_to_ur_float(uint64_t x)
|
|
293
|
+
{
|
|
294
|
+
bit_scalar<float> bs;
|
|
295
|
+
float2 ret;
|
|
296
|
+
ret.f[0] = bs.to_ur(x & 0xFFFFFFFF);
|
|
297
|
+
ret.f[1] = bs.to_ur(x >> 32);
|
|
298
|
+
return ret;
|
|
299
|
+
}
|
|
300
|
+
}
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
#ifdef EIGEN_VECTORIZE_AVX
|
|
304
|
+
#include <immintrin.h>
|
|
305
|
+
|
|
306
|
+
namespace Eigen
|
|
307
|
+
{
|
|
308
|
+
namespace internal
|
|
309
|
+
{
|
|
310
|
+
template<>
|
|
311
|
+
struct reinterpreter<Packet8i>
|
|
312
|
+
{
|
|
313
|
+
EIGEN_STRONG_INLINE Packet8f to_float(const Packet8i& x)
|
|
314
|
+
{
|
|
315
|
+
return _mm256_castsi256_ps(x);
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
EIGEN_STRONG_INLINE Packet4d to_double(const Packet8i& x)
|
|
319
|
+
{
|
|
320
|
+
return _mm256_castsi256_pd(x);
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
EIGEN_STRONG_INLINE Packet8i to_int(const Packet8i& x)
|
|
324
|
+
{
|
|
325
|
+
return x;
|
|
326
|
+
}
|
|
327
|
+
};
|
|
328
|
+
|
|
329
|
+
template<>
|
|
330
|
+
struct reinterpreter<Packet8f>
|
|
331
|
+
{
|
|
332
|
+
EIGEN_STRONG_INLINE Packet8f to_float(const Packet8f& x)
|
|
333
|
+
{
|
|
334
|
+
return x;
|
|
335
|
+
}
|
|
336
|
+
|
|
337
|
+
EIGEN_STRONG_INLINE Packet4d to_double(const Packet8f& x)
|
|
338
|
+
{
|
|
339
|
+
return _mm256_castps_pd(x);
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
EIGEN_STRONG_INLINE Packet8i to_int(const Packet8f& x)
|
|
343
|
+
{
|
|
344
|
+
return _mm256_castps_si256(x);
|
|
345
|
+
}
|
|
346
|
+
};
|
|
347
|
+
|
|
348
|
+
template<>
|
|
349
|
+
struct reinterpreter<Packet4d>
|
|
350
|
+
{
|
|
351
|
+
EIGEN_STRONG_INLINE Packet8f to_float(const Packet4d& x)
|
|
352
|
+
{
|
|
353
|
+
return _mm256_castpd_ps(x);
|
|
354
|
+
}
|
|
355
|
+
|
|
356
|
+
EIGEN_STRONG_INLINE Packet4d to_double(const Packet4d& x)
|
|
357
|
+
{
|
|
358
|
+
return x;
|
|
359
|
+
}
|
|
360
|
+
|
|
361
|
+
EIGEN_STRONG_INLINE Packet8i to_int(const Packet4d& x)
|
|
362
|
+
{
|
|
363
|
+
return _mm256_castpd_si256(x);
|
|
364
|
+
}
|
|
365
|
+
};
|
|
366
|
+
|
|
367
|
+
EIGEN_STRONG_INLINE void split_two(const Packet8i& x, Packet4i& a, Packet4i& b)
|
|
368
|
+
{
|
|
369
|
+
a = _mm256_extractf128_si256(x, 0);
|
|
370
|
+
b = _mm256_extractf128_si256(x, 1);
|
|
371
|
+
}
|
|
372
|
+
|
|
373
|
+
EIGEN_STRONG_INLINE Packet8i combine_two(const Packet4i& a, const Packet4i& b)
|
|
374
|
+
{
|
|
375
|
+
return _mm256_insertf128_si256(_mm256_castsi128_si256(a), b, 1);
|
|
376
|
+
}
|
|
377
|
+
|
|
378
|
+
EIGEN_STRONG_INLINE void split_two(const Packet8f& x, Packet4f& a, Packet4f& b)
|
|
379
|
+
{
|
|
380
|
+
a = _mm256_extractf128_ps(x, 0);
|
|
381
|
+
b = _mm256_extractf128_ps(x, 1);
|
|
382
|
+
}
|
|
383
|
+
|
|
384
|
+
EIGEN_STRONG_INLINE Packet8f combine_two(const Packet4f& a, const Packet4f& b)
|
|
385
|
+
{
|
|
386
|
+
return _mm256_insertf128_ps(_mm256_castps128_ps256(a), b, 1);
|
|
387
|
+
}
|
|
388
|
+
|
|
389
|
+
|
|
390
|
+
EIGEN_STRONG_INLINE Packet4i combine_low32(const Packet8i& a)
|
|
391
|
+
{
|
|
392
|
+
#ifdef EIGEN_VECTORIZE_AVX2
|
|
393
|
+
return _mm256_castsi256_si128(_mm256_permutevar8x32_epi32(a, _mm256_setr_epi32(0, 2, 4, 6, 1, 3, 5, 7)));
|
|
394
|
+
#else
|
|
395
|
+
auto sc = _mm256_permutevar_ps(_mm256_castsi256_ps(a), _mm256_setr_epi32(0, 2, 1, 3, 1, 3, 0, 2));
|
|
396
|
+
return _mm_castps_si128(_mm_blend_ps(_mm256_extractf128_ps(sc, 0), _mm256_extractf128_ps(sc, 1), 0b1100));
|
|
397
|
+
#endif
|
|
398
|
+
}
|
|
399
|
+
|
|
400
|
+
template<>
|
|
401
|
+
EIGEN_STRONG_INLINE Packet8i pseti64<Packet8i>(uint64_t a)
|
|
402
|
+
{
|
|
403
|
+
return _mm256_set1_epi64x(a);
|
|
404
|
+
}
|
|
405
|
+
|
|
406
|
+
template<>
|
|
407
|
+
EIGEN_STRONG_INLINE Packet8i pcmpeq<Packet8i>(const Packet8i& a, const Packet8i& b)
|
|
408
|
+
{
|
|
409
|
+
#ifdef EIGEN_VECTORIZE_AVX2
|
|
410
|
+
return _mm256_cmpeq_epi32(a, b);
|
|
411
|
+
#else
|
|
412
|
+
Packet4i a1, a2, b1, b2;
|
|
413
|
+
split_two(a, a1, a2);
|
|
414
|
+
split_two(b, b1, b2);
|
|
415
|
+
return combine_two(_mm_cmpeq_epi32(a1, b1), _mm_cmpeq_epi32(a2, b2));
|
|
416
|
+
#endif
|
|
417
|
+
}
|
|
418
|
+
|
|
419
|
+
template<>
|
|
420
|
+
EIGEN_STRONG_INLINE Packet8i psll<Packet8i>(const Packet8i& a, int b)
|
|
421
|
+
{
|
|
422
|
+
#ifdef EIGEN_VECTORIZE_AVX2
|
|
423
|
+
return _mm256_slli_epi32(a, b);
|
|
424
|
+
#else
|
|
425
|
+
Packet4i a1, a2;
|
|
426
|
+
split_two(a, a1, a2);
|
|
427
|
+
return combine_two(_mm_slli_epi32(a1, b), _mm_slli_epi32(a2, b));
|
|
428
|
+
#endif
|
|
429
|
+
}
|
|
430
|
+
|
|
431
|
+
template<>
|
|
432
|
+
EIGEN_STRONG_INLINE Packet8i psrl<Packet8i>(const Packet8i& a, int b)
|
|
433
|
+
{
|
|
434
|
+
#ifdef EIGEN_VECTORIZE_AVX2
|
|
435
|
+
return _mm256_srli_epi32(a, b);
|
|
436
|
+
#else
|
|
437
|
+
Packet4i a1, a2;
|
|
438
|
+
split_two(a, a1, a2);
|
|
439
|
+
return combine_two(_mm_srli_epi32(a1, b), _mm_srli_epi32(a2, b));
|
|
440
|
+
#endif
|
|
441
|
+
}
|
|
442
|
+
|
|
443
|
+
template<>
|
|
444
|
+
EIGEN_STRONG_INLINE Packet8i psll64<Packet8i>(const Packet8i& a, int b)
|
|
445
|
+
{
|
|
446
|
+
#ifdef EIGEN_VECTORIZE_AVX2
|
|
447
|
+
return _mm256_slli_epi64(a, b);
|
|
448
|
+
#else
|
|
449
|
+
Packet4i a1, a2;
|
|
450
|
+
split_two(a, a1, a2);
|
|
451
|
+
return combine_two(_mm_slli_epi64(a1, b), _mm_slli_epi64(a2, b));
|
|
452
|
+
#endif
|
|
453
|
+
}
|
|
454
|
+
|
|
455
|
+
template<>
|
|
456
|
+
EIGEN_STRONG_INLINE Packet8i psrl64<Packet8i>(const Packet8i& a, int b)
|
|
457
|
+
{
|
|
458
|
+
#ifdef EIGEN_VECTORIZE_AVX2
|
|
459
|
+
return _mm256_srli_epi64(a, b);
|
|
460
|
+
#else
|
|
461
|
+
Packet4i a1, a2;
|
|
462
|
+
split_two(a, a1, a2);
|
|
463
|
+
return combine_two(_mm_srli_epi64(a1, b), _mm_srli_epi64(a2, b));
|
|
464
|
+
#endif
|
|
465
|
+
}
|
|
466
|
+
|
|
467
|
+
template<> EIGEN_STRONG_INLINE Packet8i padd<Packet8i>(const Packet8i& a, const Packet8i& b)
|
|
468
|
+
{
|
|
469
|
+
#ifdef EIGEN_VECTORIZE_AVX2
|
|
470
|
+
return _mm256_add_epi32(a, b);
|
|
471
|
+
#else
|
|
472
|
+
Packet4i a1, a2, b1, b2;
|
|
473
|
+
split_two(a, a1, a2);
|
|
474
|
+
split_two(b, b1, b2);
|
|
475
|
+
return combine_two(_mm_add_epi32(a1, b1), _mm_add_epi32(a2, b2));
|
|
476
|
+
#endif
|
|
477
|
+
}
|
|
478
|
+
|
|
479
|
+
template<> EIGEN_STRONG_INLINE Packet8i psub<Packet8i>(const Packet8i& a, const Packet8i& b)
|
|
480
|
+
{
|
|
481
|
+
#ifdef EIGEN_VECTORIZE_AVX2
|
|
482
|
+
return _mm256_sub_epi32(a, b);
|
|
483
|
+
#else
|
|
484
|
+
Packet4i a1, a2, b1, b2;
|
|
485
|
+
split_two(a, a1, a2);
|
|
486
|
+
split_two(b, b1, b2);
|
|
487
|
+
return combine_two(_mm_sub_epi32(a1, b1), _mm_sub_epi32(a2, b2));
|
|
488
|
+
#endif
|
|
489
|
+
}
|
|
490
|
+
|
|
491
|
+
template<> EIGEN_STRONG_INLINE Packet8i pand<Packet8i>(const Packet8i& a, const Packet8i& b)
|
|
492
|
+
{
|
|
493
|
+
#ifdef EIGEN_VECTORIZE_AVX2
|
|
494
|
+
return _mm256_and_si256(a, b);
|
|
495
|
+
#else
|
|
496
|
+
return reinterpret_to_int(_mm256_and_ps(reinterpret_to_float(a), reinterpret_to_float(b)));
|
|
497
|
+
#endif
|
|
498
|
+
}
|
|
499
|
+
|
|
500
|
+
template<> EIGEN_STRONG_INLINE Packet8i pandnot<Packet8i>(const Packet8i& a, const Packet8i& b)
|
|
501
|
+
{
|
|
502
|
+
#ifdef EIGEN_VECTORIZE_AVX2
|
|
503
|
+
return _mm256_andnot_si256(a, b);
|
|
504
|
+
#else
|
|
505
|
+
return reinterpret_to_int(_mm256_andnot_ps(reinterpret_to_float(a), reinterpret_to_float(b)));
|
|
506
|
+
#endif
|
|
507
|
+
}
|
|
508
|
+
|
|
509
|
+
template<> EIGEN_STRONG_INLINE Packet8i por<Packet8i>(const Packet8i& a, const Packet8i& b)
|
|
510
|
+
{
|
|
511
|
+
#ifdef EIGEN_VECTORIZE_AVX2
|
|
512
|
+
return _mm256_or_si256(a, b);
|
|
513
|
+
#else
|
|
514
|
+
return reinterpret_to_int(_mm256_or_ps(reinterpret_to_float(a), reinterpret_to_float(b)));
|
|
515
|
+
#endif
|
|
516
|
+
}
|
|
517
|
+
|
|
518
|
+
template<> EIGEN_STRONG_INLINE Packet8i pxor<Packet8i>(const Packet8i& a, const Packet8i& b)
|
|
519
|
+
{
|
|
520
|
+
#ifdef EIGEN_VECTORIZE_AVX2
|
|
521
|
+
return _mm256_xor_si256(a, b);
|
|
522
|
+
#else
|
|
523
|
+
return reinterpret_to_int(_mm256_xor_ps(reinterpret_to_float(a), reinterpret_to_float(b)));
|
|
524
|
+
#endif
|
|
525
|
+
}
|
|
526
|
+
|
|
527
|
+
template<>
|
|
528
|
+
EIGEN_STRONG_INLINE Packet8i pcmplt<Packet8i>(const Packet8i& a, const Packet8i& b)
|
|
529
|
+
{
|
|
530
|
+
return _mm256_cmpgt_epi32(b, a);
|
|
531
|
+
}
|
|
532
|
+
|
|
533
|
+
template<>
|
|
534
|
+
EIGEN_STRONG_INLINE Packet8f pcmplt<Packet8f>(const Packet8f& a, const Packet8f& b)
|
|
535
|
+
{
|
|
536
|
+
return _mm256_cmp_ps(a, b, _CMP_LT_OQ);
|
|
537
|
+
}
|
|
538
|
+
|
|
539
|
+
template<>
|
|
540
|
+
EIGEN_STRONG_INLINE Packet8f pcmple<Packet8f>(const Packet8f& a, const Packet8f& b)
|
|
541
|
+
{
|
|
542
|
+
return _mm256_cmp_ps(a, b, _CMP_LE_OQ);
|
|
543
|
+
}
|
|
544
|
+
|
|
545
|
+
template<>
|
|
546
|
+
EIGEN_STRONG_INLINE Packet4d pcmplt<Packet4d>(const Packet4d& a, const Packet4d& b)
|
|
547
|
+
{
|
|
548
|
+
return _mm256_cmp_pd(a, b, _CMP_LT_OQ);
|
|
549
|
+
}
|
|
550
|
+
|
|
551
|
+
template<>
|
|
552
|
+
EIGEN_STRONG_INLINE Packet4d pcmple<Packet4d>(const Packet4d& a, const Packet4d& b)
|
|
553
|
+
{
|
|
554
|
+
return _mm256_cmp_pd(a, b, _CMP_LE_OQ);
|
|
555
|
+
}
|
|
556
|
+
|
|
557
|
+
template<>
|
|
558
|
+
EIGEN_STRONG_INLINE Packet8f pblendv(const Packet8f& ifPacket, const Packet8f& thenPacket, const Packet8f& elsePacket)
|
|
559
|
+
{
|
|
560
|
+
return _mm256_blendv_ps(elsePacket, thenPacket, ifPacket);
|
|
561
|
+
}
|
|
562
|
+
|
|
563
|
+
template<>
|
|
564
|
+
EIGEN_STRONG_INLINE Packet8i pblendv(const Packet8i& ifPacket, const Packet8i& thenPacket, const Packet8i& elsePacket)
|
|
565
|
+
{
|
|
566
|
+
return _mm256_castps_si256(_mm256_blendv_ps(
|
|
567
|
+
_mm256_castsi256_ps(elsePacket),
|
|
568
|
+
_mm256_castsi256_ps(thenPacket),
|
|
569
|
+
_mm256_castsi256_ps(ifPacket)
|
|
570
|
+
));
|
|
571
|
+
}
|
|
572
|
+
|
|
573
|
+
template<>
|
|
574
|
+
EIGEN_STRONG_INLINE Packet4d pblendv(const Packet4d& ifPacket, const Packet4d& thenPacket, const Packet4d& elsePacket)
|
|
575
|
+
{
|
|
576
|
+
return _mm256_blendv_pd(elsePacket, thenPacket, ifPacket);
|
|
577
|
+
}
|
|
578
|
+
|
|
579
|
+
template<>
|
|
580
|
+
EIGEN_STRONG_INLINE Packet8i pgather<Packet8i>(const int* addr, const Packet8i& index)
|
|
581
|
+
{
|
|
582
|
+
#ifdef EIGEN_VECTORIZE_AVX2
|
|
583
|
+
return _mm256_i32gather_epi32(addr, index, 4);
|
|
584
|
+
#else
|
|
585
|
+
uint32_t u[8];
|
|
586
|
+
_mm256_storeu_si256((Packet8i*)u, index);
|
|
587
|
+
return _mm256_setr_epi32(addr[u[0]], addr[u[1]], addr[u[2]], addr[u[3]],
|
|
588
|
+
addr[u[4]], addr[u[5]], addr[u[6]], addr[u[7]]);
|
|
589
|
+
#endif
|
|
590
|
+
}
|
|
591
|
+
|
|
592
|
+
template<>
|
|
593
|
+
EIGEN_STRONG_INLINE Packet8f pgather<Packet8i>(const float *addr, const Packet8i& index)
|
|
594
|
+
{
|
|
595
|
+
#ifdef EIGEN_VECTORIZE_AVX2
|
|
596
|
+
return _mm256_i32gather_ps(addr, index, 4);
|
|
597
|
+
#else
|
|
598
|
+
uint32_t u[8];
|
|
599
|
+
_mm256_storeu_si256((Packet8i*)u, index);
|
|
600
|
+
return _mm256_setr_ps(addr[u[0]], addr[u[1]], addr[u[2]], addr[u[3]],
|
|
601
|
+
addr[u[4]], addr[u[5]], addr[u[6]], addr[u[7]]);
|
|
602
|
+
#endif
|
|
603
|
+
}
|
|
604
|
+
|
|
605
|
+
template<>
|
|
606
|
+
EIGEN_STRONG_INLINE Packet4d pgather<Packet8i>(const double *addr, const Packet8i& index, bool upperhalf)
|
|
607
|
+
{
|
|
608
|
+
#ifdef EIGEN_VECTORIZE_AVX2
|
|
609
|
+
return _mm256_i32gather_pd(addr, _mm256_castsi256_si128(index), 8);
|
|
610
|
+
#else
|
|
611
|
+
uint32_t u[8];
|
|
612
|
+
_mm256_storeu_si256((Packet8i*)u, index);
|
|
613
|
+
if (upperhalf)
|
|
614
|
+
{
|
|
615
|
+
return _mm256_setr_pd(addr[u[4]], addr[u[5]], addr[u[6]], addr[u[7]]);
|
|
616
|
+
}
|
|
617
|
+
else
|
|
618
|
+
{
|
|
619
|
+
return _mm256_setr_pd(addr[u[0]], addr[u[1]], addr[u[2]], addr[u[3]]);
|
|
620
|
+
}
|
|
621
|
+
#endif
|
|
622
|
+
}
|
|
623
|
+
|
|
624
|
+
template<>
|
|
625
|
+
EIGEN_STRONG_INLINE int pmovemask<Packet8f>(const Packet8f& a)
|
|
626
|
+
{
|
|
627
|
+
return _mm256_movemask_ps(a);
|
|
628
|
+
}
|
|
629
|
+
|
|
630
|
+
template<>
|
|
631
|
+
EIGEN_STRONG_INLINE int pmovemask<Packet4d>(const Packet4d& a)
|
|
632
|
+
{
|
|
633
|
+
return _mm256_movemask_pd(a);
|
|
634
|
+
}
|
|
635
|
+
|
|
636
|
+
template<>
|
|
637
|
+
EIGEN_STRONG_INLINE int pmovemask<Packet8i>(const Packet8i& a)
|
|
638
|
+
{
|
|
639
|
+
return pmovemask(_mm256_castsi256_ps(a));
|
|
640
|
+
}
|
|
641
|
+
|
|
642
|
+
template<>
|
|
643
|
+
EIGEN_STRONG_INLINE Packet8f ptruncate<Packet8f>(const Packet8f& a)
|
|
644
|
+
{
|
|
645
|
+
return _mm256_round_ps(a, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
|
|
646
|
+
}
|
|
647
|
+
|
|
648
|
+
template<>
|
|
649
|
+
EIGEN_STRONG_INLINE Packet4d ptruncate<Packet4d>(const Packet4d& a)
|
|
650
|
+
{
|
|
651
|
+
return _mm256_round_pd(a, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
|
|
652
|
+
}
|
|
653
|
+
|
|
654
|
+
template<>
|
|
655
|
+
EIGEN_STRONG_INLINE Packet8i pcmpeq64<Packet8i>(const Packet8i& a, const Packet8i& b)
|
|
656
|
+
{
|
|
657
|
+
#ifdef EIGEN_VECTORIZE_AVX2
|
|
658
|
+
return _mm256_cmpeq_epi64(a, b);
|
|
659
|
+
#else
|
|
660
|
+
Packet4i a1, a2, b1, b2;
|
|
661
|
+
split_two(a, a1, a2);
|
|
662
|
+
split_two(b, b1, b2);
|
|
663
|
+
return combine_two(_mm_cmpeq_epi64(a1, b1), _mm_cmpeq_epi64(a2, b2));
|
|
664
|
+
#endif
|
|
665
|
+
}
|
|
666
|
+
|
|
667
|
+
template<>
|
|
668
|
+
EIGEN_STRONG_INLINE Packet8i pmuluadd64<Packet8i>(const Packet8i& a, uint64_t b, uint64_t c)
|
|
669
|
+
{
|
|
670
|
+
uint64_t u[4];
|
|
671
|
+
_mm256_storeu_si256((__m256i*)u, a);
|
|
672
|
+
u[0] = u[0] * b + c;
|
|
673
|
+
u[1] = u[1] * b + c;
|
|
674
|
+
u[2] = u[2] * b + c;
|
|
675
|
+
u[3] = u[3] * b + c;
|
|
676
|
+
return _mm256_loadu_si256((__m256i*)u);
|
|
677
|
+
}
|
|
678
|
+
}
|
|
679
|
+
}
|
|
680
|
+
#endif
|
|
681
|
+
|
|
682
|
+
#ifdef EIGEN_VECTORIZE_SSE2
|
|
683
|
+
#include <xmmintrin.h>
|
|
684
|
+
|
|
685
|
+
namespace Eigen
|
|
686
|
+
{
|
|
687
|
+
namespace internal
|
|
688
|
+
{
|
|
689
|
+
template<>
|
|
690
|
+
struct reinterpreter<Packet4i>
|
|
691
|
+
{
|
|
692
|
+
EIGEN_STRONG_INLINE Packet4f to_float(const Packet4i& x)
|
|
693
|
+
{
|
|
694
|
+
return _mm_castsi128_ps(x);
|
|
695
|
+
}
|
|
696
|
+
|
|
697
|
+
EIGEN_STRONG_INLINE Packet2d to_double(const Packet4i& x)
|
|
698
|
+
{
|
|
699
|
+
return _mm_castsi128_pd(x);
|
|
700
|
+
}
|
|
701
|
+
|
|
702
|
+
EIGEN_STRONG_INLINE Packet4i to_int(const Packet4i& x)
|
|
703
|
+
{
|
|
704
|
+
return x;
|
|
705
|
+
}
|
|
706
|
+
};
|
|
707
|
+
|
|
708
|
+
template<>
|
|
709
|
+
struct reinterpreter<Packet4f>
|
|
710
|
+
{
|
|
711
|
+
EIGEN_STRONG_INLINE Packet4f to_float(const Packet4f& x)
|
|
712
|
+
{
|
|
713
|
+
return x;
|
|
714
|
+
}
|
|
715
|
+
|
|
716
|
+
EIGEN_STRONG_INLINE Packet2d to_double(const Packet4f& x)
|
|
717
|
+
{
|
|
718
|
+
return _mm_castps_pd(x);
|
|
719
|
+
}
|
|
720
|
+
|
|
721
|
+
EIGEN_STRONG_INLINE Packet4i to_int(const Packet4f& x)
|
|
722
|
+
{
|
|
723
|
+
return _mm_castps_si128(x);
|
|
724
|
+
}
|
|
725
|
+
};
|
|
726
|
+
|
|
727
|
+
template<>
|
|
728
|
+
struct reinterpreter<Packet2d>
|
|
729
|
+
{
|
|
730
|
+
EIGEN_STRONG_INLINE Packet4f to_float(const Packet2d& x)
|
|
731
|
+
{
|
|
732
|
+
return _mm_castpd_ps(x);
|
|
733
|
+
}
|
|
734
|
+
|
|
735
|
+
EIGEN_STRONG_INLINE Packet2d to_double(const Packet2d& x)
|
|
736
|
+
{
|
|
737
|
+
return x;
|
|
738
|
+
}
|
|
739
|
+
|
|
740
|
+
EIGEN_STRONG_INLINE Packet4i to_int(const Packet2d& x)
|
|
741
|
+
{
|
|
742
|
+
return _mm_castpd_si128(x);
|
|
743
|
+
}
|
|
744
|
+
};
|
|
745
|
+
|
|
746
|
+
EIGEN_STRONG_INLINE void split_two(const Packet4i& x, uint64_t& a, uint64_t& b)
|
|
747
|
+
{
|
|
748
|
+
#ifdef EIGEN_VECTORIZE_SSE4_1
|
|
749
|
+
a = _mm_extract_epi64(x, 0);
|
|
750
|
+
b = _mm_extract_epi64(x, 1);
|
|
751
|
+
#else
|
|
752
|
+
uint64_t u[2];
|
|
753
|
+
_mm_storeu_si128((__m128i*)u, x);
|
|
754
|
+
a = u[0];
|
|
755
|
+
b = u[1];
|
|
756
|
+
#endif
|
|
757
|
+
}
|
|
758
|
+
|
|
759
|
+
EIGEN_STRONG_INLINE Packet4i combine_low32(const Packet4i& a, const Packet4i& b)
|
|
760
|
+
{
|
|
761
|
+
auto sa = _mm_shuffle_epi32(a, _MM_SHUFFLE(3, 1, 2, 0));
|
|
762
|
+
auto sb = _mm_shuffle_epi32(b, _MM_SHUFFLE(2, 0, 3, 1));
|
|
763
|
+
sa = _mm_and_si128(sa, _mm_setr_epi32(-1, -1, 0, 0));
|
|
764
|
+
sb = _mm_and_si128(sb, _mm_setr_epi32(0, 0, -1, -1));
|
|
765
|
+
return _mm_or_si128(sa, sb);
|
|
766
|
+
}
|
|
767
|
+
|
|
768
|
+
template<>
|
|
769
|
+
EIGEN_STRONG_INLINE Packet4i pseti64<Packet4i>(uint64_t a)
|
|
770
|
+
{
|
|
771
|
+
return _mm_set1_epi64x(a);
|
|
772
|
+
}
|
|
773
|
+
|
|
774
|
+
template<>
|
|
775
|
+
EIGEN_STRONG_INLINE Packet4i pcmpeq<Packet4i>(const Packet4i& a, const Packet4i& b)
|
|
776
|
+
{
|
|
777
|
+
return _mm_cmpeq_epi32(a, b);
|
|
778
|
+
}
|
|
779
|
+
|
|
780
|
+
template<>
|
|
781
|
+
EIGEN_STRONG_INLINE Packet4i psll<Packet4i>(const Packet4i& a, int b)
|
|
782
|
+
{
|
|
783
|
+
return _mm_slli_epi32(a, b);
|
|
784
|
+
}
|
|
785
|
+
|
|
786
|
+
template<>
|
|
787
|
+
EIGEN_STRONG_INLINE Packet4i psrl<Packet4i>(const Packet4i& a, int b)
|
|
788
|
+
{
|
|
789
|
+
return _mm_srli_epi32(a, b);
|
|
790
|
+
}
|
|
791
|
+
|
|
792
|
+
|
|
793
|
+
template<>
|
|
794
|
+
EIGEN_STRONG_INLINE Packet4i psll64<Packet4i>(const Packet4i& a, int b)
|
|
795
|
+
{
|
|
796
|
+
return _mm_slli_epi64(a, b);
|
|
797
|
+
}
|
|
798
|
+
|
|
799
|
+
template<>
|
|
800
|
+
EIGEN_STRONG_INLINE Packet4i psrl64<Packet4i>(const Packet4i& a, int b)
|
|
801
|
+
{
|
|
802
|
+
return _mm_srli_epi64(a, b);
|
|
803
|
+
}
|
|
804
|
+
|
|
805
|
+
template<>
|
|
806
|
+
EIGEN_STRONG_INLINE Packet4i pcmplt<Packet4i>(const Packet4i& a, const Packet4i& b)
|
|
807
|
+
{
|
|
808
|
+
return _mm_cmplt_epi32(a, b);
|
|
809
|
+
}
|
|
810
|
+
|
|
811
|
+
template<>
|
|
812
|
+
EIGEN_STRONG_INLINE Packet4f pcmplt<Packet4f>(const Packet4f& a, const Packet4f& b)
|
|
813
|
+
{
|
|
814
|
+
return _mm_cmplt_ps(a, b);
|
|
815
|
+
}
|
|
816
|
+
|
|
817
|
+
template<>
|
|
818
|
+
EIGEN_STRONG_INLINE Packet4f pcmple<Packet4f>(const Packet4f& a, const Packet4f& b)
|
|
819
|
+
{
|
|
820
|
+
return _mm_cmple_ps(a, b);
|
|
821
|
+
}
|
|
822
|
+
|
|
823
|
+
template<>
|
|
824
|
+
EIGEN_STRONG_INLINE Packet2d pcmplt<Packet2d>(const Packet2d& a, const Packet2d& b)
|
|
825
|
+
{
|
|
826
|
+
return _mm_cmplt_pd(a, b);
|
|
827
|
+
}
|
|
828
|
+
|
|
829
|
+
template<>
|
|
830
|
+
EIGEN_STRONG_INLINE Packet2d pcmple<Packet2d>(const Packet2d& a, const Packet2d& b)
|
|
831
|
+
{
|
|
832
|
+
return _mm_cmple_pd(a, b);
|
|
833
|
+
}
|
|
834
|
+
|
|
835
|
+
template<>
|
|
836
|
+
EIGEN_STRONG_INLINE Packet4f pblendv(const Packet4f& ifPacket, const Packet4f& thenPacket, const Packet4f& elsePacket)
|
|
837
|
+
{
|
|
838
|
+
#ifdef EIGEN_VECTORIZE_SSE4_1
|
|
839
|
+
return _mm_blendv_ps(elsePacket, thenPacket, ifPacket);
|
|
840
|
+
#else
|
|
841
|
+
return _mm_or_ps(_mm_and_ps(ifPacket, thenPacket), _mm_andnot_ps(ifPacket, elsePacket));
|
|
842
|
+
#endif
|
|
843
|
+
}
|
|
844
|
+
|
|
845
|
+
template<>
|
|
846
|
+
EIGEN_STRONG_INLINE Packet4i pblendv(const Packet4i& ifPacket, const Packet4i& thenPacket, const Packet4i& elsePacket)
|
|
847
|
+
{
|
|
848
|
+
#ifdef EIGEN_VECTORIZE_SSE4_1
|
|
849
|
+
return _mm_castps_si128(_mm_blendv_ps(_mm_castsi128_ps(elsePacket), _mm_castsi128_ps(thenPacket), _mm_castsi128_ps(ifPacket)));
|
|
850
|
+
#else
|
|
851
|
+
return _mm_or_si128(_mm_and_si128(ifPacket, thenPacket), _mm_andnot_si128(ifPacket, elsePacket));
|
|
852
|
+
#endif
|
|
853
|
+
}
|
|
854
|
+
|
|
855
|
+
template<>
|
|
856
|
+
EIGEN_STRONG_INLINE Packet2d pblendv(const Packet2d& ifPacket, const Packet2d& thenPacket, const Packet2d& elsePacket)
|
|
857
|
+
{
|
|
858
|
+
#ifdef EIGEN_VECTORIZE_SSE4_1
|
|
859
|
+
return _mm_blendv_pd(elsePacket, thenPacket, ifPacket);
|
|
860
|
+
#else
|
|
861
|
+
return _mm_or_pd(_mm_and_pd(ifPacket, thenPacket), _mm_andnot_pd(ifPacket, elsePacket));
|
|
862
|
+
#endif
|
|
863
|
+
}
|
|
864
|
+
|
|
865
|
+
template<>
|
|
866
|
+
EIGEN_STRONG_INLINE Packet4i pgather<Packet4i>(const int* addr, const Packet4i& index)
|
|
867
|
+
{
|
|
868
|
+
#ifdef EIGEN_VECTORIZE_AVX2
|
|
869
|
+
return _mm_i32gather_epi32(addr, index, 4);
|
|
870
|
+
#else
|
|
871
|
+
uint32_t u[4];
|
|
872
|
+
_mm_storeu_si128((Packet4i*)u, index);
|
|
873
|
+
return _mm_setr_epi32(addr[u[0]], addr[u[1]], addr[u[2]], addr[u[3]]);
|
|
874
|
+
#endif
|
|
875
|
+
}
|
|
876
|
+
|
|
877
|
+
template<>
|
|
878
|
+
EIGEN_STRONG_INLINE Packet4f pgather<Packet4i>(const float* addr, const Packet4i& index)
|
|
879
|
+
{
|
|
880
|
+
#ifdef EIGEN_VECTORIZE_AVX2
|
|
881
|
+
return _mm_i32gather_ps(addr, index, 4);
|
|
882
|
+
#else
|
|
883
|
+
uint32_t u[4];
|
|
884
|
+
_mm_storeu_si128((Packet4i*)u, index);
|
|
885
|
+
return _mm_setr_ps(addr[u[0]], addr[u[1]], addr[u[2]], addr[u[3]]);
|
|
886
|
+
#endif
|
|
887
|
+
}
|
|
888
|
+
|
|
889
|
+
template<>
|
|
890
|
+
EIGEN_STRONG_INLINE Packet2d pgather<Packet4i>(const double* addr, const Packet4i& index, bool upperhalf)
|
|
891
|
+
{
|
|
892
|
+
#ifdef EIGEN_VECTORIZE_AVX2
|
|
893
|
+
return _mm_i32gather_pd(addr, index, 8);
|
|
894
|
+
#else
|
|
895
|
+
uint32_t u[4];
|
|
896
|
+
_mm_storeu_si128((Packet4i*)u, index);
|
|
897
|
+
if (upperhalf)
|
|
898
|
+
{
|
|
899
|
+
return _mm_setr_pd(addr[u[2]], addr[u[3]]);
|
|
900
|
+
}
|
|
901
|
+
else
|
|
902
|
+
{
|
|
903
|
+
return _mm_setr_pd(addr[u[0]], addr[u[1]]);
|
|
904
|
+
}
|
|
905
|
+
#endif
|
|
906
|
+
}
|
|
907
|
+
|
|
908
|
+
template<>
|
|
909
|
+
EIGEN_STRONG_INLINE int pmovemask<Packet4f>(const Packet4f& a)
|
|
910
|
+
{
|
|
911
|
+
return _mm_movemask_ps(a);
|
|
912
|
+
}
|
|
913
|
+
|
|
914
|
+
template<>
|
|
915
|
+
EIGEN_STRONG_INLINE int pmovemask<Packet2d>(const Packet2d& a)
|
|
916
|
+
{
|
|
917
|
+
return _mm_movemask_pd(a);
|
|
918
|
+
}
|
|
919
|
+
|
|
920
|
+
template<>
|
|
921
|
+
EIGEN_STRONG_INLINE int pmovemask<Packet4i>(const Packet4i& a)
|
|
922
|
+
{
|
|
923
|
+
return pmovemask(_mm_castsi128_ps(a));
|
|
924
|
+
}
|
|
925
|
+
|
|
926
|
+
template<>
|
|
927
|
+
EIGEN_STRONG_INLINE Packet4f ptruncate<Packet4f>(const Packet4f& a)
|
|
928
|
+
{
|
|
929
|
+
#ifdef EIGEN_VECTORIZE_SSE4_1
|
|
930
|
+
return _mm_round_ps(a, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
|
|
931
|
+
#else
|
|
932
|
+
auto round = _MM_GET_ROUNDING_MODE();
|
|
933
|
+
_MM_SET_ROUNDING_MODE(_MM_ROUND_TOWARD_ZERO);
|
|
934
|
+
auto ret = _mm_cvtepi32_ps(_mm_cvtps_epi32(a));
|
|
935
|
+
_MM_SET_ROUNDING_MODE(round);
|
|
936
|
+
return ret;
|
|
937
|
+
#endif
|
|
938
|
+
}
|
|
939
|
+
|
|
940
|
+
template<>
|
|
941
|
+
EIGEN_STRONG_INLINE Packet2d ptruncate<Packet2d>(const Packet2d& a)
|
|
942
|
+
{
|
|
943
|
+
#ifdef EIGEN_VECTORIZE_SSE4_1
|
|
944
|
+
return _mm_round_pd(a, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
|
|
945
|
+
#else
|
|
946
|
+
auto round = _MM_GET_ROUNDING_MODE();
|
|
947
|
+
_MM_SET_ROUNDING_MODE(_MM_ROUND_TOWARD_ZERO);
|
|
948
|
+
auto ret = _mm_cvtepi32_pd(_mm_cvtpd_epi32(a));
|
|
949
|
+
_MM_SET_ROUNDING_MODE(round);
|
|
950
|
+
return ret;
|
|
951
|
+
#endif
|
|
952
|
+
}
|
|
953
|
+
|
|
954
|
+
template<>
|
|
955
|
+
EIGEN_STRONG_INLINE Packet4i pcmpeq64<Packet4i>(const Packet4i& a, const Packet4i& b)
|
|
956
|
+
{
|
|
957
|
+
#ifdef EIGEN_VECTORIZE_SSE4_1
|
|
958
|
+
return _mm_cmpeq_epi64(a, b);
|
|
959
|
+
#else
|
|
960
|
+
Packet4i c = _mm_cmpeq_epi32(a, b);
|
|
961
|
+
return pand(c, _mm_shuffle_epi32(c, _MM_SHUFFLE(2, 3, 0, 1)));
|
|
962
|
+
#endif
|
|
963
|
+
}
|
|
964
|
+
|
|
965
|
+
template<>
|
|
966
|
+
EIGEN_STRONG_INLINE Packet4i pmuluadd64<Packet4i>(const Packet4i& a, uint64_t b, uint64_t c)
|
|
967
|
+
{
|
|
968
|
+
uint64_t u[2];
|
|
969
|
+
_mm_storeu_si128((__m128i*)u, a);
|
|
970
|
+
u[0] = u[0] * b + c;
|
|
971
|
+
u[1] = u[1] * b + c;
|
|
972
|
+
return _mm_loadu_si128((__m128i*)u);
|
|
973
|
+
}
|
|
974
|
+
}
|
|
975
|
+
}
|
|
976
|
+
#endif
|
|
977
|
+
|
|
978
|
+
#endif
|