tomoto 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +3 -0
- data/LICENSE.txt +22 -0
- data/README.md +123 -0
- data/ext/tomoto/ext.cpp +245 -0
- data/ext/tomoto/extconf.rb +28 -0
- data/lib/tomoto.rb +12 -0
- data/lib/tomoto/ct.rb +11 -0
- data/lib/tomoto/hdp.rb +11 -0
- data/lib/tomoto/lda.rb +67 -0
- data/lib/tomoto/version.rb +3 -0
- data/vendor/EigenRand/EigenRand/Core.h +1139 -0
- data/vendor/EigenRand/EigenRand/Dists/Basic.h +111 -0
- data/vendor/EigenRand/EigenRand/Dists/Discrete.h +877 -0
- data/vendor/EigenRand/EigenRand/Dists/GammaPoisson.h +108 -0
- data/vendor/EigenRand/EigenRand/Dists/NormalExp.h +626 -0
- data/vendor/EigenRand/EigenRand/EigenRand +19 -0
- data/vendor/EigenRand/EigenRand/Macro.h +24 -0
- data/vendor/EigenRand/EigenRand/MorePacketMath.h +978 -0
- data/vendor/EigenRand/EigenRand/PacketFilter.h +286 -0
- data/vendor/EigenRand/EigenRand/PacketRandomEngine.h +624 -0
- data/vendor/EigenRand/EigenRand/RandUtils.h +413 -0
- data/vendor/EigenRand/EigenRand/doc.h +220 -0
- data/vendor/EigenRand/LICENSE +21 -0
- data/vendor/EigenRand/README.md +288 -0
- data/vendor/eigen/COPYING.BSD +26 -0
- data/vendor/eigen/COPYING.GPL +674 -0
- data/vendor/eigen/COPYING.LGPL +502 -0
- data/vendor/eigen/COPYING.MINPACK +52 -0
- data/vendor/eigen/COPYING.MPL2 +373 -0
- data/vendor/eigen/COPYING.README +18 -0
- data/vendor/eigen/Eigen/CMakeLists.txt +19 -0
- data/vendor/eigen/Eigen/Cholesky +46 -0
- data/vendor/eigen/Eigen/CholmodSupport +48 -0
- data/vendor/eigen/Eigen/Core +537 -0
- data/vendor/eigen/Eigen/Dense +7 -0
- data/vendor/eigen/Eigen/Eigen +2 -0
- data/vendor/eigen/Eigen/Eigenvalues +61 -0
- data/vendor/eigen/Eigen/Geometry +62 -0
- data/vendor/eigen/Eigen/Householder +30 -0
- data/vendor/eigen/Eigen/IterativeLinearSolvers +48 -0
- data/vendor/eigen/Eigen/Jacobi +33 -0
- data/vendor/eigen/Eigen/LU +50 -0
- data/vendor/eigen/Eigen/MetisSupport +35 -0
- data/vendor/eigen/Eigen/OrderingMethods +73 -0
- data/vendor/eigen/Eigen/PaStiXSupport +48 -0
- data/vendor/eigen/Eigen/PardisoSupport +35 -0
- data/vendor/eigen/Eigen/QR +51 -0
- data/vendor/eigen/Eigen/QtAlignedMalloc +40 -0
- data/vendor/eigen/Eigen/SPQRSupport +34 -0
- data/vendor/eigen/Eigen/SVD +51 -0
- data/vendor/eigen/Eigen/Sparse +36 -0
- data/vendor/eigen/Eigen/SparseCholesky +45 -0
- data/vendor/eigen/Eigen/SparseCore +69 -0
- data/vendor/eigen/Eigen/SparseLU +46 -0
- data/vendor/eigen/Eigen/SparseQR +37 -0
- data/vendor/eigen/Eigen/StdDeque +27 -0
- data/vendor/eigen/Eigen/StdList +26 -0
- data/vendor/eigen/Eigen/StdVector +27 -0
- data/vendor/eigen/Eigen/SuperLUSupport +64 -0
- data/vendor/eigen/Eigen/UmfPackSupport +40 -0
- data/vendor/eigen/Eigen/src/Cholesky/LDLT.h +673 -0
- data/vendor/eigen/Eigen/src/Cholesky/LLT.h +542 -0
- data/vendor/eigen/Eigen/src/Cholesky/LLT_LAPACKE.h +99 -0
- data/vendor/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +639 -0
- data/vendor/eigen/Eigen/src/Core/Array.h +329 -0
- data/vendor/eigen/Eigen/src/Core/ArrayBase.h +226 -0
- data/vendor/eigen/Eigen/src/Core/ArrayWrapper.h +209 -0
- data/vendor/eigen/Eigen/src/Core/Assign.h +90 -0
- data/vendor/eigen/Eigen/src/Core/AssignEvaluator.h +935 -0
- data/vendor/eigen/Eigen/src/Core/Assign_MKL.h +178 -0
- data/vendor/eigen/Eigen/src/Core/BandMatrix.h +353 -0
- data/vendor/eigen/Eigen/src/Core/Block.h +452 -0
- data/vendor/eigen/Eigen/src/Core/BooleanRedux.h +164 -0
- data/vendor/eigen/Eigen/src/Core/CommaInitializer.h +160 -0
- data/vendor/eigen/Eigen/src/Core/ConditionEstimator.h +175 -0
- data/vendor/eigen/Eigen/src/Core/CoreEvaluators.h +1688 -0
- data/vendor/eigen/Eigen/src/Core/CoreIterators.h +127 -0
- data/vendor/eigen/Eigen/src/Core/CwiseBinaryOp.h +184 -0
- data/vendor/eigen/Eigen/src/Core/CwiseNullaryOp.h +866 -0
- data/vendor/eigen/Eigen/src/Core/CwiseTernaryOp.h +197 -0
- data/vendor/eigen/Eigen/src/Core/CwiseUnaryOp.h +103 -0
- data/vendor/eigen/Eigen/src/Core/CwiseUnaryView.h +128 -0
- data/vendor/eigen/Eigen/src/Core/DenseBase.h +611 -0
- data/vendor/eigen/Eigen/src/Core/DenseCoeffsBase.h +681 -0
- data/vendor/eigen/Eigen/src/Core/DenseStorage.h +570 -0
- data/vendor/eigen/Eigen/src/Core/Diagonal.h +260 -0
- data/vendor/eigen/Eigen/src/Core/DiagonalMatrix.h +343 -0
- data/vendor/eigen/Eigen/src/Core/DiagonalProduct.h +28 -0
- data/vendor/eigen/Eigen/src/Core/Dot.h +318 -0
- data/vendor/eigen/Eigen/src/Core/EigenBase.h +159 -0
- data/vendor/eigen/Eigen/src/Core/ForceAlignedAccess.h +146 -0
- data/vendor/eigen/Eigen/src/Core/Fuzzy.h +155 -0
- data/vendor/eigen/Eigen/src/Core/GeneralProduct.h +455 -0
- data/vendor/eigen/Eigen/src/Core/GenericPacketMath.h +593 -0
- data/vendor/eigen/Eigen/src/Core/GlobalFunctions.h +187 -0
- data/vendor/eigen/Eigen/src/Core/IO.h +225 -0
- data/vendor/eigen/Eigen/src/Core/Inverse.h +118 -0
- data/vendor/eigen/Eigen/src/Core/Map.h +171 -0
- data/vendor/eigen/Eigen/src/Core/MapBase.h +303 -0
- data/vendor/eigen/Eigen/src/Core/MathFunctions.h +1415 -0
- data/vendor/eigen/Eigen/src/Core/MathFunctionsImpl.h +101 -0
- data/vendor/eigen/Eigen/src/Core/Matrix.h +459 -0
- data/vendor/eigen/Eigen/src/Core/MatrixBase.h +529 -0
- data/vendor/eigen/Eigen/src/Core/NestByValue.h +110 -0
- data/vendor/eigen/Eigen/src/Core/NoAlias.h +108 -0
- data/vendor/eigen/Eigen/src/Core/NumTraits.h +248 -0
- data/vendor/eigen/Eigen/src/Core/PermutationMatrix.h +633 -0
- data/vendor/eigen/Eigen/src/Core/PlainObjectBase.h +1035 -0
- data/vendor/eigen/Eigen/src/Core/Product.h +186 -0
- data/vendor/eigen/Eigen/src/Core/ProductEvaluators.h +1112 -0
- data/vendor/eigen/Eigen/src/Core/Random.h +182 -0
- data/vendor/eigen/Eigen/src/Core/Redux.h +505 -0
- data/vendor/eigen/Eigen/src/Core/Ref.h +283 -0
- data/vendor/eigen/Eigen/src/Core/Replicate.h +142 -0
- data/vendor/eigen/Eigen/src/Core/ReturnByValue.h +117 -0
- data/vendor/eigen/Eigen/src/Core/Reverse.h +211 -0
- data/vendor/eigen/Eigen/src/Core/Select.h +162 -0
- data/vendor/eigen/Eigen/src/Core/SelfAdjointView.h +352 -0
- data/vendor/eigen/Eigen/src/Core/SelfCwiseBinaryOp.h +47 -0
- data/vendor/eigen/Eigen/src/Core/Solve.h +188 -0
- data/vendor/eigen/Eigen/src/Core/SolveTriangular.h +235 -0
- data/vendor/eigen/Eigen/src/Core/SolverBase.h +130 -0
- data/vendor/eigen/Eigen/src/Core/StableNorm.h +221 -0
- data/vendor/eigen/Eigen/src/Core/Stride.h +111 -0
- data/vendor/eigen/Eigen/src/Core/Swap.h +67 -0
- data/vendor/eigen/Eigen/src/Core/Transpose.h +403 -0
- data/vendor/eigen/Eigen/src/Core/Transpositions.h +407 -0
- data/vendor/eigen/Eigen/src/Core/TriangularMatrix.h +983 -0
- data/vendor/eigen/Eigen/src/Core/VectorBlock.h +96 -0
- data/vendor/eigen/Eigen/src/Core/VectorwiseOp.h +695 -0
- data/vendor/eigen/Eigen/src/Core/Visitor.h +273 -0
- data/vendor/eigen/Eigen/src/Core/arch/AVX/Complex.h +451 -0
- data/vendor/eigen/Eigen/src/Core/arch/AVX/MathFunctions.h +439 -0
- data/vendor/eigen/Eigen/src/Core/arch/AVX/PacketMath.h +637 -0
- data/vendor/eigen/Eigen/src/Core/arch/AVX/TypeCasting.h +51 -0
- data/vendor/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h +391 -0
- data/vendor/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h +1316 -0
- data/vendor/eigen/Eigen/src/Core/arch/AltiVec/Complex.h +430 -0
- data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MathFunctions.h +322 -0
- data/vendor/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h +1061 -0
- data/vendor/eigen/Eigen/src/Core/arch/CUDA/Complex.h +103 -0
- data/vendor/eigen/Eigen/src/Core/arch/CUDA/Half.h +674 -0
- data/vendor/eigen/Eigen/src/Core/arch/CUDA/MathFunctions.h +91 -0
- data/vendor/eigen/Eigen/src/Core/arch/CUDA/PacketMath.h +333 -0
- data/vendor/eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +1124 -0
- data/vendor/eigen/Eigen/src/Core/arch/CUDA/TypeCasting.h +212 -0
- data/vendor/eigen/Eigen/src/Core/arch/Default/ConjHelper.h +29 -0
- data/vendor/eigen/Eigen/src/Core/arch/Default/Settings.h +49 -0
- data/vendor/eigen/Eigen/src/Core/arch/NEON/Complex.h +490 -0
- data/vendor/eigen/Eigen/src/Core/arch/NEON/MathFunctions.h +91 -0
- data/vendor/eigen/Eigen/src/Core/arch/NEON/PacketMath.h +760 -0
- data/vendor/eigen/Eigen/src/Core/arch/SSE/Complex.h +471 -0
- data/vendor/eigen/Eigen/src/Core/arch/SSE/MathFunctions.h +562 -0
- data/vendor/eigen/Eigen/src/Core/arch/SSE/PacketMath.h +895 -0
- data/vendor/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h +77 -0
- data/vendor/eigen/Eigen/src/Core/arch/ZVector/Complex.h +397 -0
- data/vendor/eigen/Eigen/src/Core/arch/ZVector/MathFunctions.h +137 -0
- data/vendor/eigen/Eigen/src/Core/arch/ZVector/PacketMath.h +945 -0
- data/vendor/eigen/Eigen/src/Core/functors/AssignmentFunctors.h +168 -0
- data/vendor/eigen/Eigen/src/Core/functors/BinaryFunctors.h +475 -0
- data/vendor/eigen/Eigen/src/Core/functors/NullaryFunctors.h +188 -0
- data/vendor/eigen/Eigen/src/Core/functors/StlFunctors.h +136 -0
- data/vendor/eigen/Eigen/src/Core/functors/TernaryFunctors.h +25 -0
- data/vendor/eigen/Eigen/src/Core/functors/UnaryFunctors.h +792 -0
- data/vendor/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h +2156 -0
- data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h +492 -0
- data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +311 -0
- data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +145 -0
- data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h +122 -0
- data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixVector.h +619 -0
- data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h +136 -0
- data/vendor/eigen/Eigen/src/Core/products/Parallelizer.h +163 -0
- data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +521 -0
- data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h +287 -0
- data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h +260 -0
- data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h +118 -0
- data/vendor/eigen/Eigen/src/Core/products/SelfadjointProduct.h +133 -0
- data/vendor/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h +93 -0
- data/vendor/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h +466 -0
- data/vendor/eigen/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h +315 -0
- data/vendor/eigen/Eigen/src/Core/products/TriangularMatrixVector.h +350 -0
- data/vendor/eigen/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h +255 -0
- data/vendor/eigen/Eigen/src/Core/products/TriangularSolverMatrix.h +335 -0
- data/vendor/eigen/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h +163 -0
- data/vendor/eigen/Eigen/src/Core/products/TriangularSolverVector.h +145 -0
- data/vendor/eigen/Eigen/src/Core/util/BlasUtil.h +398 -0
- data/vendor/eigen/Eigen/src/Core/util/Constants.h +547 -0
- data/vendor/eigen/Eigen/src/Core/util/DisableStupidWarnings.h +83 -0
- data/vendor/eigen/Eigen/src/Core/util/ForwardDeclarations.h +302 -0
- data/vendor/eigen/Eigen/src/Core/util/MKL_support.h +130 -0
- data/vendor/eigen/Eigen/src/Core/util/Macros.h +1001 -0
- data/vendor/eigen/Eigen/src/Core/util/Memory.h +993 -0
- data/vendor/eigen/Eigen/src/Core/util/Meta.h +534 -0
- data/vendor/eigen/Eigen/src/Core/util/NonMPL2.h +3 -0
- data/vendor/eigen/Eigen/src/Core/util/ReenableStupidWarnings.h +27 -0
- data/vendor/eigen/Eigen/src/Core/util/StaticAssert.h +218 -0
- data/vendor/eigen/Eigen/src/Core/util/XprHelper.h +821 -0
- data/vendor/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +346 -0
- data/vendor/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +459 -0
- data/vendor/eigen/Eigen/src/Eigenvalues/ComplexSchur_LAPACKE.h +91 -0
- data/vendor/eigen/Eigen/src/Eigenvalues/EigenSolver.h +622 -0
- data/vendor/eigen/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h +418 -0
- data/vendor/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +226 -0
- data/vendor/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +374 -0
- data/vendor/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +158 -0
- data/vendor/eigen/Eigen/src/Eigenvalues/RealQZ.h +654 -0
- data/vendor/eigen/Eigen/src/Eigenvalues/RealSchur.h +546 -0
- data/vendor/eigen/Eigen/src/Eigenvalues/RealSchur_LAPACKE.h +77 -0
- data/vendor/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +870 -0
- data/vendor/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h +87 -0
- data/vendor/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +556 -0
- data/vendor/eigen/Eigen/src/Geometry/AlignedBox.h +392 -0
- data/vendor/eigen/Eigen/src/Geometry/AngleAxis.h +247 -0
- data/vendor/eigen/Eigen/src/Geometry/EulerAngles.h +114 -0
- data/vendor/eigen/Eigen/src/Geometry/Homogeneous.h +497 -0
- data/vendor/eigen/Eigen/src/Geometry/Hyperplane.h +282 -0
- data/vendor/eigen/Eigen/src/Geometry/OrthoMethods.h +234 -0
- data/vendor/eigen/Eigen/src/Geometry/ParametrizedLine.h +195 -0
- data/vendor/eigen/Eigen/src/Geometry/Quaternion.h +814 -0
- data/vendor/eigen/Eigen/src/Geometry/Rotation2D.h +199 -0
- data/vendor/eigen/Eigen/src/Geometry/RotationBase.h +206 -0
- data/vendor/eigen/Eigen/src/Geometry/Scaling.h +170 -0
- data/vendor/eigen/Eigen/src/Geometry/Transform.h +1542 -0
- data/vendor/eigen/Eigen/src/Geometry/Translation.h +208 -0
- data/vendor/eigen/Eigen/src/Geometry/Umeyama.h +166 -0
- data/vendor/eigen/Eigen/src/Geometry/arch/Geometry_SSE.h +161 -0
- data/vendor/eigen/Eigen/src/Householder/BlockHouseholder.h +103 -0
- data/vendor/eigen/Eigen/src/Householder/Householder.h +172 -0
- data/vendor/eigen/Eigen/src/Householder/HouseholderSequence.h +470 -0
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +226 -0
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +228 -0
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +246 -0
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +400 -0
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +462 -0
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +394 -0
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +216 -0
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +115 -0
- data/vendor/eigen/Eigen/src/Jacobi/Jacobi.h +462 -0
- data/vendor/eigen/Eigen/src/LU/Determinant.h +101 -0
- data/vendor/eigen/Eigen/src/LU/FullPivLU.h +891 -0
- data/vendor/eigen/Eigen/src/LU/InverseImpl.h +415 -0
- data/vendor/eigen/Eigen/src/LU/PartialPivLU.h +611 -0
- data/vendor/eigen/Eigen/src/LU/PartialPivLU_LAPACKE.h +83 -0
- data/vendor/eigen/Eigen/src/LU/arch/Inverse_SSE.h +338 -0
- data/vendor/eigen/Eigen/src/MetisSupport/MetisSupport.h +137 -0
- data/vendor/eigen/Eigen/src/OrderingMethods/Amd.h +445 -0
- data/vendor/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +1843 -0
- data/vendor/eigen/Eigen/src/OrderingMethods/Ordering.h +157 -0
- data/vendor/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +678 -0
- data/vendor/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +543 -0
- data/vendor/eigen/Eigen/src/QR/ColPivHouseholderQR.h +653 -0
- data/vendor/eigen/Eigen/src/QR/ColPivHouseholderQR_LAPACKE.h +97 -0
- data/vendor/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +562 -0
- data/vendor/eigen/Eigen/src/QR/FullPivHouseholderQR.h +676 -0
- data/vendor/eigen/Eigen/src/QR/HouseholderQR.h +409 -0
- data/vendor/eigen/Eigen/src/QR/HouseholderQR_LAPACKE.h +68 -0
- data/vendor/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +313 -0
- data/vendor/eigen/Eigen/src/SVD/BDCSVD.h +1246 -0
- data/vendor/eigen/Eigen/src/SVD/JacobiSVD.h +804 -0
- data/vendor/eigen/Eigen/src/SVD/JacobiSVD_LAPACKE.h +91 -0
- data/vendor/eigen/Eigen/src/SVD/SVDBase.h +315 -0
- data/vendor/eigen/Eigen/src/SVD/UpperBidiagonalization.h +414 -0
- data/vendor/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +689 -0
- data/vendor/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +199 -0
- data/vendor/eigen/Eigen/src/SparseCore/AmbiVector.h +377 -0
- data/vendor/eigen/Eigen/src/SparseCore/CompressedStorage.h +258 -0
- data/vendor/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +352 -0
- data/vendor/eigen/Eigen/src/SparseCore/MappedSparseMatrix.h +67 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseAssign.h +216 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseBlock.h +603 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseColEtree.h +206 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +341 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +726 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +148 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +320 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseDiagonalProduct.h +138 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseDot.h +98 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseFuzzy.h +29 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseMap.h +305 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseMatrix.h +1403 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +405 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparsePermutation.h +178 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseProduct.h +169 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseRedux.h +49 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseRef.h +397 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +656 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseSolverBase.h +124 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseSparseProductWithPruning.h +198 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseTranspose.h +92 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseTriangularView.h +189 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseUtil.h +178 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseVector.h +478 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseView.h +253 -0
- data/vendor/eigen/Eigen/src/SparseCore/TriangularSolver.h +315 -0
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU.h +773 -0
- data/vendor/eigen/Eigen/src/SparseLU/SparseLUImpl.h +66 -0
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +226 -0
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_Structs.h +110 -0
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +301 -0
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_Utils.h +80 -0
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_column_bmod.h +181 -0
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +179 -0
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h +107 -0
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +280 -0
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h +126 -0
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_kernel_bmod.h +130 -0
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +223 -0
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_panel_dfs.h +258 -0
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_pivotL.h +137 -0
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_pruneL.h +136 -0
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_relax_snode.h +83 -0
- data/vendor/eigen/Eigen/src/SparseQR/SparseQR.h +745 -0
- data/vendor/eigen/Eigen/src/StlSupport/StdDeque.h +126 -0
- data/vendor/eigen/Eigen/src/StlSupport/StdList.h +106 -0
- data/vendor/eigen/Eigen/src/StlSupport/StdVector.h +131 -0
- data/vendor/eigen/Eigen/src/StlSupport/details.h +84 -0
- data/vendor/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +1027 -0
- data/vendor/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +506 -0
- data/vendor/eigen/Eigen/src/misc/Image.h +82 -0
- data/vendor/eigen/Eigen/src/misc/Kernel.h +79 -0
- data/vendor/eigen/Eigen/src/misc/RealSvd2x2.h +55 -0
- data/vendor/eigen/Eigen/src/misc/blas.h +440 -0
- data/vendor/eigen/Eigen/src/misc/lapack.h +152 -0
- data/vendor/eigen/Eigen/src/misc/lapacke.h +16291 -0
- data/vendor/eigen/Eigen/src/misc/lapacke_mangling.h +17 -0
- data/vendor/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +332 -0
- data/vendor/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +552 -0
- data/vendor/eigen/Eigen/src/plugins/BlockMethods.h +1058 -0
- data/vendor/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.h +115 -0
- data/vendor/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.h +163 -0
- data/vendor/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +152 -0
- data/vendor/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +85 -0
- data/vendor/eigen/README.md +3 -0
- data/vendor/eigen/bench/README.txt +55 -0
- data/vendor/eigen/bench/btl/COPYING +340 -0
- data/vendor/eigen/bench/btl/README +154 -0
- data/vendor/eigen/bench/tensors/README +21 -0
- data/vendor/eigen/blas/README.txt +6 -0
- data/vendor/eigen/demos/mandelbrot/README +10 -0
- data/vendor/eigen/demos/mix_eigen_and_c/README +9 -0
- data/vendor/eigen/demos/opengl/README +13 -0
- data/vendor/eigen/unsupported/Eigen/CXX11/src/Tensor/README.md +1760 -0
- data/vendor/eigen/unsupported/README.txt +50 -0
- data/vendor/tomotopy/LICENSE +21 -0
- data/vendor/tomotopy/README.kr.rst +375 -0
- data/vendor/tomotopy/README.rst +382 -0
- data/vendor/tomotopy/src/Labeling/FoRelevance.cpp +362 -0
- data/vendor/tomotopy/src/Labeling/FoRelevance.h +88 -0
- data/vendor/tomotopy/src/Labeling/Labeler.h +50 -0
- data/vendor/tomotopy/src/TopicModel/CT.h +37 -0
- data/vendor/tomotopy/src/TopicModel/CTModel.cpp +13 -0
- data/vendor/tomotopy/src/TopicModel/CTModel.hpp +293 -0
- data/vendor/tomotopy/src/TopicModel/DMR.h +51 -0
- data/vendor/tomotopy/src/TopicModel/DMRModel.cpp +13 -0
- data/vendor/tomotopy/src/TopicModel/DMRModel.hpp +374 -0
- data/vendor/tomotopy/src/TopicModel/DT.h +65 -0
- data/vendor/tomotopy/src/TopicModel/DTM.h +22 -0
- data/vendor/tomotopy/src/TopicModel/DTModel.cpp +15 -0
- data/vendor/tomotopy/src/TopicModel/DTModel.hpp +572 -0
- data/vendor/tomotopy/src/TopicModel/GDMR.h +37 -0
- data/vendor/tomotopy/src/TopicModel/GDMRModel.cpp +14 -0
- data/vendor/tomotopy/src/TopicModel/GDMRModel.hpp +485 -0
- data/vendor/tomotopy/src/TopicModel/HDP.h +74 -0
- data/vendor/tomotopy/src/TopicModel/HDPModel.cpp +13 -0
- data/vendor/tomotopy/src/TopicModel/HDPModel.hpp +592 -0
- data/vendor/tomotopy/src/TopicModel/HLDA.h +40 -0
- data/vendor/tomotopy/src/TopicModel/HLDAModel.cpp +13 -0
- data/vendor/tomotopy/src/TopicModel/HLDAModel.hpp +681 -0
- data/vendor/tomotopy/src/TopicModel/HPA.h +27 -0
- data/vendor/tomotopy/src/TopicModel/HPAModel.cpp +21 -0
- data/vendor/tomotopy/src/TopicModel/HPAModel.hpp +588 -0
- data/vendor/tomotopy/src/TopicModel/LDA.h +144 -0
- data/vendor/tomotopy/src/TopicModel/LDACVB0Model.hpp +442 -0
- data/vendor/tomotopy/src/TopicModel/LDAModel.cpp +13 -0
- data/vendor/tomotopy/src/TopicModel/LDAModel.hpp +1058 -0
- data/vendor/tomotopy/src/TopicModel/LLDA.h +45 -0
- data/vendor/tomotopy/src/TopicModel/LLDAModel.cpp +13 -0
- data/vendor/tomotopy/src/TopicModel/LLDAModel.hpp +203 -0
- data/vendor/tomotopy/src/TopicModel/MGLDA.h +63 -0
- data/vendor/tomotopy/src/TopicModel/MGLDAModel.cpp +17 -0
- data/vendor/tomotopy/src/TopicModel/MGLDAModel.hpp +558 -0
- data/vendor/tomotopy/src/TopicModel/PA.h +43 -0
- data/vendor/tomotopy/src/TopicModel/PAModel.cpp +13 -0
- data/vendor/tomotopy/src/TopicModel/PAModel.hpp +467 -0
- data/vendor/tomotopy/src/TopicModel/PLDA.h +17 -0
- data/vendor/tomotopy/src/TopicModel/PLDAModel.cpp +13 -0
- data/vendor/tomotopy/src/TopicModel/PLDAModel.hpp +214 -0
- data/vendor/tomotopy/src/TopicModel/SLDA.h +54 -0
- data/vendor/tomotopy/src/TopicModel/SLDAModel.cpp +17 -0
- data/vendor/tomotopy/src/TopicModel/SLDAModel.hpp +456 -0
- data/vendor/tomotopy/src/TopicModel/TopicModel.hpp +692 -0
- data/vendor/tomotopy/src/Utils/AliasMethod.hpp +169 -0
- data/vendor/tomotopy/src/Utils/Dictionary.h +80 -0
- data/vendor/tomotopy/src/Utils/EigenAddonOps.hpp +181 -0
- data/vendor/tomotopy/src/Utils/LBFGS.h +202 -0
- data/vendor/tomotopy/src/Utils/LBFGS/LineSearchBacktracking.h +120 -0
- data/vendor/tomotopy/src/Utils/LBFGS/LineSearchBracketing.h +122 -0
- data/vendor/tomotopy/src/Utils/LBFGS/Param.h +213 -0
- data/vendor/tomotopy/src/Utils/LUT.hpp +82 -0
- data/vendor/tomotopy/src/Utils/MultiNormalDistribution.hpp +69 -0
- data/vendor/tomotopy/src/Utils/PolyaGamma.hpp +200 -0
- data/vendor/tomotopy/src/Utils/PolyaGammaHybrid.hpp +672 -0
- data/vendor/tomotopy/src/Utils/ThreadPool.hpp +150 -0
- data/vendor/tomotopy/src/Utils/Trie.hpp +220 -0
- data/vendor/tomotopy/src/Utils/TruncMultiNormal.hpp +94 -0
- data/vendor/tomotopy/src/Utils/Utils.hpp +337 -0
- data/vendor/tomotopy/src/Utils/avx_gamma.h +46 -0
- data/vendor/tomotopy/src/Utils/avx_mathfun.h +736 -0
- data/vendor/tomotopy/src/Utils/exception.h +28 -0
- data/vendor/tomotopy/src/Utils/math.h +281 -0
- data/vendor/tomotopy/src/Utils/rtnorm.hpp +2690 -0
- data/vendor/tomotopy/src/Utils/sample.hpp +192 -0
- data/vendor/tomotopy/src/Utils/serializer.hpp +695 -0
- data/vendor/tomotopy/src/Utils/slp.hpp +131 -0
- data/vendor/tomotopy/src/Utils/sse_gamma.h +48 -0
- data/vendor/tomotopy/src/Utils/sse_mathfun.h +710 -0
- data/vendor/tomotopy/src/Utils/text.hpp +49 -0
- data/vendor/tomotopy/src/Utils/tvector.hpp +543 -0
- metadata +531 -0
|
@@ -0,0 +1,286 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file PacketFilter.h
|
|
3
|
+
* @author bab2min (bab2min@gmail.com)
|
|
4
|
+
* @brief
|
|
5
|
+
* @version 0.2.0
|
|
6
|
+
* @date 2020-06-22
|
|
7
|
+
*
|
|
8
|
+
* @copyright Copyright (c) 2020
|
|
9
|
+
*
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
#ifndef EIGENRAND_PACKET_FILTER_H
|
|
13
|
+
#define EIGENRAND_PACKET_FILTER_H
|
|
14
|
+
|
|
15
|
+
#include <array>
|
|
16
|
+
#include <EigenRand/MorePacketMath.h>
|
|
17
|
+
|
|
18
|
+
namespace Eigen
|
|
19
|
+
{
|
|
20
|
+
namespace Rand
|
|
21
|
+
{
|
|
22
|
+
namespace detail
|
|
23
|
+
{
|
|
24
|
+
template<size_t PacketSize>
|
|
25
|
+
class CompressMask;
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
#ifdef EIGEN_VECTORIZE_AVX
|
|
30
|
+
#include <immintrin.h>
|
|
31
|
+
namespace Eigen
|
|
32
|
+
{
|
|
33
|
+
namespace Rand
|
|
34
|
+
{
|
|
35
|
+
namespace detail
|
|
36
|
+
{
|
|
37
|
+
template<>
|
|
38
|
+
class CompressMask<32>
|
|
39
|
+
{
|
|
40
|
+
std::array<std::array<internal::Packet8i, 256>, 15> idx;
|
|
41
|
+
std::array<internal::Packet8f, 8> selector;
|
|
42
|
+
std::array<uint8_t, 256> cnt;
|
|
43
|
+
|
|
44
|
+
static internal::Packet8i make_compress(int mask, int offset = 0)
|
|
45
|
+
{
|
|
46
|
+
int32_t ret[8] = { 0, };
|
|
47
|
+
int n = offset;
|
|
48
|
+
for (int i = 0; i < 8; ++i)
|
|
49
|
+
{
|
|
50
|
+
int l = mask & 1;
|
|
51
|
+
mask >>= 1;
|
|
52
|
+
if (l)
|
|
53
|
+
{
|
|
54
|
+
if (n >= 0) ret[n] = i;
|
|
55
|
+
if (++n >= 8) break;
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
return _mm256_loadu_si256((internal::Packet8i*)ret);
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
static uint8_t count(int mask)
|
|
62
|
+
{
|
|
63
|
+
uint8_t ret = 0;
|
|
64
|
+
for (int i = 0; i < 8; ++i)
|
|
65
|
+
{
|
|
66
|
+
ret += mask & 1;
|
|
67
|
+
mask >>= 1;
|
|
68
|
+
}
|
|
69
|
+
return ret;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
CompressMask()
|
|
73
|
+
{
|
|
74
|
+
for (int i = 0; i < 256; ++i)
|
|
75
|
+
{
|
|
76
|
+
for (int o = 0; o < 15; ++o)
|
|
77
|
+
{
|
|
78
|
+
idx[o][i] = make_compress(i, o < 8 ? o : o - 15);
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
cnt[i] = count(i);
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
selector[0] = _mm256_castsi256_ps(_mm256_setr_epi32(0, 0, 0, 0, 0, 0, 0, 0));
|
|
85
|
+
selector[1] = _mm256_castsi256_ps(_mm256_setr_epi32(-1, 0, 0, 0, 0, 0, 0, 0));
|
|
86
|
+
selector[2] = _mm256_castsi256_ps(_mm256_setr_epi32(-1, -1, 0, 0, 0, 0, 0, 0));
|
|
87
|
+
selector[3] = _mm256_castsi256_ps(_mm256_setr_epi32(-1, -1, -1, 0, 0, 0, 0, 0));
|
|
88
|
+
selector[4] = _mm256_castsi256_ps(_mm256_setr_epi32(-1, -1, -1, -1, 0, 0, 0, 0));
|
|
89
|
+
selector[5] = _mm256_castsi256_ps(_mm256_setr_epi32(-1, -1, -1, -1, -1, 0, 0, 0));
|
|
90
|
+
selector[6] = _mm256_castsi256_ps(_mm256_setr_epi32(-1, -1, -1, -1, -1, -1, 0, 0));
|
|
91
|
+
selector[7] = _mm256_castsi256_ps(_mm256_setr_epi32(-1, -1, -1, -1, -1, -1, -1, 0));
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
static EIGEN_STRONG_INLINE internal::Packet8f permute(const internal::Packet8f& p, const internal::Packet8i& i)
|
|
95
|
+
{
|
|
96
|
+
#ifdef EIGEN_VECTORIZE_AVX2
|
|
97
|
+
return _mm256_permutevar8x32_ps(p, i);
|
|
98
|
+
#else
|
|
99
|
+
auto l = _mm256_permutevar_ps(p, i);
|
|
100
|
+
auto h = _mm256_permutevar_ps(_mm256_permute2f128_ps(p, p, 0x01), i);
|
|
101
|
+
internal::Packet4i i1, i2;
|
|
102
|
+
internal::split_two(i, i1, i2);
|
|
103
|
+
i1 = _mm_slli_epi32(i1, 29);
|
|
104
|
+
i2 = _mm_slli_epi32(i2, 29);
|
|
105
|
+
auto c = _mm256_castsi256_ps(
|
|
106
|
+
internal::combine_two(
|
|
107
|
+
_mm_cmplt_epi32(i1, internal::pset1<internal::Packet4i>(0)),
|
|
108
|
+
_mm_cmplt_epi32(internal::pset1<internal::Packet4i>(-1), i2)
|
|
109
|
+
)
|
|
110
|
+
);
|
|
111
|
+
return internal::pblendv(c, h, l);
|
|
112
|
+
#endif
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
public:
|
|
116
|
+
enum { full_size = 8 };
|
|
117
|
+
static const CompressMask& get_inst()
|
|
118
|
+
{
|
|
119
|
+
static CompressMask cm;
|
|
120
|
+
return cm;
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
template<typename Packet>
|
|
124
|
+
EIGEN_STRONG_INLINE int compress_append(Packet& _value, const Packet& _mask,
|
|
125
|
+
Packet& _rest, int rest_cnt, bool& full) const
|
|
126
|
+
{
|
|
127
|
+
auto& value = reinterpret_cast<internal::Packet8f&>(_value);
|
|
128
|
+
auto& mask = reinterpret_cast<const internal::Packet8f&>(_mask);
|
|
129
|
+
auto& rest = reinterpret_cast<internal::Packet8f&>(_rest);
|
|
130
|
+
|
|
131
|
+
int m = _mm256_movemask_ps(mask);
|
|
132
|
+
if (cnt[m] == full_size)
|
|
133
|
+
{
|
|
134
|
+
full = true;
|
|
135
|
+
return rest_cnt;
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
auto p1 = permute(value, idx[rest_cnt][m]);
|
|
139
|
+
p1 = internal::pblendv(selector[rest_cnt], rest, p1);
|
|
140
|
+
|
|
141
|
+
auto new_cnt = rest_cnt + cnt[m];
|
|
142
|
+
if (new_cnt >= full_size)
|
|
143
|
+
{
|
|
144
|
+
if (new_cnt > full_size)
|
|
145
|
+
{
|
|
146
|
+
rest = permute(value, idx[new_cnt - cnt[m] + full_size - 1][m]);
|
|
147
|
+
}
|
|
148
|
+
value = p1;
|
|
149
|
+
full = true;
|
|
150
|
+
return new_cnt - full_size;
|
|
151
|
+
}
|
|
152
|
+
else
|
|
153
|
+
{
|
|
154
|
+
rest = p1;
|
|
155
|
+
full = false;
|
|
156
|
+
return new_cnt;
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
};
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
#endif
|
|
164
|
+
|
|
165
|
+
#ifdef EIGEN_VECTORIZE_SSE2
|
|
166
|
+
#include <xmmintrin.h>
|
|
167
|
+
|
|
168
|
+
namespace Eigen
|
|
169
|
+
{
|
|
170
|
+
namespace Rand
|
|
171
|
+
{
|
|
172
|
+
namespace detail
|
|
173
|
+
{
|
|
174
|
+
template<>
|
|
175
|
+
class CompressMask<16>
|
|
176
|
+
{
|
|
177
|
+
std::array<std::array<uint8_t, 16>, 7> idx;
|
|
178
|
+
std::array<internal::Packet4f, 4> selector;
|
|
179
|
+
std::array<uint8_t, 64> cnt;
|
|
180
|
+
|
|
181
|
+
static uint8_t make_compress(int mask, int offset = 0)
|
|
182
|
+
{
|
|
183
|
+
uint8_t ret = 0;
|
|
184
|
+
int n = offset;
|
|
185
|
+
for (int i = 0; i < 4; ++i)
|
|
186
|
+
{
|
|
187
|
+
int l = mask & 1;
|
|
188
|
+
mask >>= 1;
|
|
189
|
+
if (l)
|
|
190
|
+
{
|
|
191
|
+
if (n >= 0) ret |= (i & 3) << (2 * n);
|
|
192
|
+
if (++n >= 4) break;
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
return ret;
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
static uint8_t count(int mask)
|
|
199
|
+
{
|
|
200
|
+
uint8_t ret = 0;
|
|
201
|
+
for (int i = 0; i < 4; ++i)
|
|
202
|
+
{
|
|
203
|
+
ret += mask & 1;
|
|
204
|
+
mask >>= 1;
|
|
205
|
+
}
|
|
206
|
+
return ret;
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
CompressMask()
|
|
210
|
+
{
|
|
211
|
+
for (int i = 0; i < 16; ++i)
|
|
212
|
+
{
|
|
213
|
+
for (int o = 0; o < 7; ++o)
|
|
214
|
+
{
|
|
215
|
+
idx[o][i] = make_compress(i, o < 4 ? o : o - 7);
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
cnt[i] = count(i);
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
selector[0] = _mm_castsi128_ps(_mm_setr_epi32(0, 0, 0, 0));
|
|
222
|
+
selector[1] = _mm_castsi128_ps(_mm_setr_epi32(-1, 0, 0, 0));
|
|
223
|
+
selector[2] = _mm_castsi128_ps(_mm_setr_epi32(-1, -1, 0, 0));
|
|
224
|
+
selector[3] = _mm_castsi128_ps(_mm_setr_epi32(-1, -1, -1, 0));
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
static EIGEN_STRONG_INLINE internal::Packet4f permute(const internal::Packet4f& p, uint8_t i)
|
|
228
|
+
{
|
|
229
|
+
float u[4];
|
|
230
|
+
_mm_storeu_ps(u, p);
|
|
231
|
+
return _mm_setr_ps(u[i & 3], u[(i >> 2) & 3], u[(i >> 4) & 3], u[(i >> 6) & 3]);
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
public:
|
|
235
|
+
|
|
236
|
+
enum { full_size = 4 };
|
|
237
|
+
|
|
238
|
+
static const CompressMask& get_inst()
|
|
239
|
+
{
|
|
240
|
+
static CompressMask cm;
|
|
241
|
+
return cm;
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
template<typename Packet>
|
|
245
|
+
EIGEN_STRONG_INLINE int compress_append(Packet& _value, const Packet& _mask,
|
|
246
|
+
Packet& _rest, int rest_cnt, bool& full) const
|
|
247
|
+
{
|
|
248
|
+
auto& value = reinterpret_cast<internal::Packet4f&>(_value);
|
|
249
|
+
auto& mask = reinterpret_cast<const internal::Packet4f&>(_mask);
|
|
250
|
+
auto& rest = reinterpret_cast<internal::Packet4f&>(_rest);
|
|
251
|
+
|
|
252
|
+
int m = _mm_movemask_ps(mask);
|
|
253
|
+
if (cnt[m] == full_size)
|
|
254
|
+
{
|
|
255
|
+
full = true;
|
|
256
|
+
return rest_cnt;
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
auto p1 = permute(value, idx[rest_cnt][m]);
|
|
260
|
+
p1 = internal::pblendv(selector[rest_cnt], rest, p1);
|
|
261
|
+
|
|
262
|
+
auto new_cnt = rest_cnt + cnt[m];
|
|
263
|
+
if (new_cnt >= full_size)
|
|
264
|
+
{
|
|
265
|
+
if (new_cnt > full_size)
|
|
266
|
+
{
|
|
267
|
+
rest = permute(value, idx[new_cnt - cnt[m] + full_size - 1][m]);
|
|
268
|
+
}
|
|
269
|
+
value = p1;
|
|
270
|
+
full = true;
|
|
271
|
+
return new_cnt - full_size;
|
|
272
|
+
}
|
|
273
|
+
else
|
|
274
|
+
{
|
|
275
|
+
rest = p1;
|
|
276
|
+
full = false;
|
|
277
|
+
return new_cnt;
|
|
278
|
+
}
|
|
279
|
+
}
|
|
280
|
+
};
|
|
281
|
+
}
|
|
282
|
+
}
|
|
283
|
+
}
|
|
284
|
+
#endif
|
|
285
|
+
|
|
286
|
+
#endif
|
|
@@ -0,0 +1,624 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file PacketRandomEngine.h
|
|
3
|
+
* @author bab2min (bab2min@gmail.com)
|
|
4
|
+
* @brief
|
|
5
|
+
* @version 0.2.1
|
|
6
|
+
* @date 2020-07-11
|
|
7
|
+
*
|
|
8
|
+
* @copyright Copyright (c) 2020
|
|
9
|
+
*
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
#ifndef EIGENRAND_PACKET_RANDOM_ENGINE_H
|
|
13
|
+
#define EIGENRAND_PACKET_RANDOM_ENGINE_H
|
|
14
|
+
|
|
15
|
+
#include <array>
|
|
16
|
+
#include <random>
|
|
17
|
+
#include <type_traits>
|
|
18
|
+
#include <EigenRand/MorePacketMath.h>
|
|
19
|
+
#include <fstream>
|
|
20
|
+
|
|
21
|
+
namespace Eigen
|
|
22
|
+
{
|
|
23
|
+
namespace internal
|
|
24
|
+
{
|
|
25
|
+
template<typename Ty>
|
|
26
|
+
struct IsIntPacket : std::false_type {};
|
|
27
|
+
|
|
28
|
+
template<typename Ty>
|
|
29
|
+
struct HalfPacket;
|
|
30
|
+
|
|
31
|
+
#ifdef EIGEN_VECTORIZE_AVX2
|
|
32
|
+
template<>
|
|
33
|
+
struct IsIntPacket<Packet8i> : std::true_type {};
|
|
34
|
+
|
|
35
|
+
template<>
|
|
36
|
+
struct HalfPacket<Packet8i>
|
|
37
|
+
{
|
|
38
|
+
using type = Packet4i;
|
|
39
|
+
};
|
|
40
|
+
#endif
|
|
41
|
+
#ifdef EIGEN_VECTORIZE_SSE2
|
|
42
|
+
template<>
|
|
43
|
+
struct IsIntPacket<Packet4i> : std::true_type {};
|
|
44
|
+
|
|
45
|
+
template<>
|
|
46
|
+
struct HalfPacket<Packet4i>
|
|
47
|
+
{
|
|
48
|
+
using type = uint64_t;
|
|
49
|
+
};
|
|
50
|
+
#endif
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
namespace Rand
|
|
54
|
+
{
|
|
55
|
+
namespace detail
|
|
56
|
+
{
|
|
57
|
+
template<typename T>
|
|
58
|
+
auto test_integral_result_type(int)->std::integral_constant<bool, std::is_integral<typename T::result_type>::value>;
|
|
59
|
+
|
|
60
|
+
template<typename T>
|
|
61
|
+
auto test_integral_result_type(...)->std::false_type;
|
|
62
|
+
|
|
63
|
+
template<typename T>
|
|
64
|
+
auto test_intpacket_result_type(int)->std::integral_constant<bool, internal::IsIntPacket<typename T::result_type>::value>;
|
|
65
|
+
|
|
66
|
+
template<typename T>
|
|
67
|
+
auto test_intpacket_result_type(...)->std::false_type;
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
template<typename Ty>
|
|
71
|
+
struct IsScalarRandomEngine : decltype(detail::test_integral_result_type<Ty>(0))
|
|
72
|
+
{
|
|
73
|
+
};
|
|
74
|
+
|
|
75
|
+
template<typename Ty>
|
|
76
|
+
struct IsPacketRandomEngine : decltype(detail::test_intpacket_result_type<Ty>(0))
|
|
77
|
+
{
|
|
78
|
+
};
|
|
79
|
+
|
|
80
|
+
enum class RandomEngineType
|
|
81
|
+
{
|
|
82
|
+
none, scalar, packet
|
|
83
|
+
};
|
|
84
|
+
|
|
85
|
+
template<typename Ty>
|
|
86
|
+
struct GetRandomEngineType : std::integral_constant <
|
|
87
|
+
RandomEngineType,
|
|
88
|
+
IsPacketRandomEngine<Ty>::value ? RandomEngineType::packet :
|
|
89
|
+
(IsScalarRandomEngine<Ty>::value ? RandomEngineType::scalar : RandomEngineType::none)
|
|
90
|
+
>
|
|
91
|
+
{
|
|
92
|
+
};
|
|
93
|
+
|
|
94
|
+
template<typename Ty, size_t length, size_t alignment = 64>
|
|
95
|
+
class AlignedArray
|
|
96
|
+
{
|
|
97
|
+
public:
|
|
98
|
+
AlignedArray()
|
|
99
|
+
{
|
|
100
|
+
allocate();
|
|
101
|
+
for (size_t i = 0; i < length; ++i)
|
|
102
|
+
{
|
|
103
|
+
new (&aligned[i]) Ty();
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
AlignedArray(const AlignedArray& o)
|
|
108
|
+
{
|
|
109
|
+
allocate();
|
|
110
|
+
for (size_t i = 0; i < length; ++i)
|
|
111
|
+
{
|
|
112
|
+
aligned[i] = o[i];
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
AlignedArray(AlignedArray&& o)
|
|
117
|
+
{
|
|
118
|
+
std::swap(memory, o.memory);
|
|
119
|
+
std::swap(aligned, o.aligned);
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
AlignedArray& operator=(const AlignedArray& o)
|
|
123
|
+
{
|
|
124
|
+
for (size_t i = 0; i < length; ++i)
|
|
125
|
+
{
|
|
126
|
+
aligned[i] = o[i];
|
|
127
|
+
}
|
|
128
|
+
return *this;
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
AlignedArray& operator=(AlignedArray&& o)
|
|
132
|
+
{
|
|
133
|
+
std::swap(memory, o.memory);
|
|
134
|
+
std::swap(aligned, o.aligned);
|
|
135
|
+
return *this;
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
~AlignedArray()
|
|
139
|
+
{
|
|
140
|
+
deallocate();
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
Ty& operator[](size_t i)
|
|
144
|
+
{
|
|
145
|
+
return aligned[i];
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
const Ty& operator[](size_t i) const
|
|
149
|
+
{
|
|
150
|
+
return aligned[i];
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
size_t size() const
|
|
154
|
+
{
|
|
155
|
+
return length;
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
Ty* data()
|
|
159
|
+
{
|
|
160
|
+
return aligned;
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
const Ty* data() const
|
|
164
|
+
{
|
|
165
|
+
return aligned;
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
private:
|
|
169
|
+
void allocate()
|
|
170
|
+
{
|
|
171
|
+
memory = std::malloc(sizeof(Ty) * length + alignment);
|
|
172
|
+
aligned = (Ty*)(((size_t)memory + alignment) & ~(alignment - 1));
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
void deallocate()
|
|
176
|
+
{
|
|
177
|
+
if (memory)
|
|
178
|
+
{
|
|
179
|
+
for (size_t i = 0; i < length; ++i)
|
|
180
|
+
{
|
|
181
|
+
aligned[i].~Ty();
|
|
182
|
+
}
|
|
183
|
+
std::free(memory);
|
|
184
|
+
memory = nullptr;
|
|
185
|
+
aligned = nullptr;
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
void* memory = nullptr;
|
|
190
|
+
Ty* aligned = nullptr;
|
|
191
|
+
};
|
|
192
|
+
|
|
193
|
+
#ifndef EIGEN_DONT_VECTORIZE
|
|
194
|
+
/**
|
|
195
|
+
* @brief A vectorized version of Mersenne Twister Engine
|
|
196
|
+
*
|
|
197
|
+
* @tparam Packet a type of integer packet being generated from this engine
|
|
198
|
+
* @tparam _Nx
|
|
199
|
+
* @tparam _Mx
|
|
200
|
+
* @tparam _Rx
|
|
201
|
+
* @tparam _Px
|
|
202
|
+
* @tparam _Ux
|
|
203
|
+
* @tparam _Dx
|
|
204
|
+
* @tparam _Sx
|
|
205
|
+
* @tparam _Bx
|
|
206
|
+
* @tparam _Tx
|
|
207
|
+
* @tparam _Cx
|
|
208
|
+
* @tparam _Lx
|
|
209
|
+
* @tparam _Fx
|
|
210
|
+
*
|
|
211
|
+
* @note It is recommended to use the alias, Eigen::Rand::Vmt19937_64 rather than using raw MersenneTwister template class
|
|
212
|
+
* because the definition of Eigen::Rand::Vmt19937_64 is changed to use the appropriate PacketType depending on compile options and the architecture of machines.
|
|
213
|
+
*/
|
|
214
|
+
template<typename Packet,
|
|
215
|
+
int _Nx, int _Mx,
|
|
216
|
+
int _Rx, uint64_t _Px,
|
|
217
|
+
int _Ux, uint64_t _Dx,
|
|
218
|
+
int _Sx, uint64_t _Bx,
|
|
219
|
+
int _Tx, uint64_t _Cx,
|
|
220
|
+
int _Lx, uint64_t _Fx>
|
|
221
|
+
class MersenneTwister
|
|
222
|
+
{
|
|
223
|
+
public:
|
|
224
|
+
using result_type = Packet;
|
|
225
|
+
|
|
226
|
+
static constexpr int word_size = 64;
|
|
227
|
+
static constexpr int state_size = _Nx;
|
|
228
|
+
static constexpr int shift_size = _Mx;
|
|
229
|
+
static constexpr int mask_bits = _Rx;
|
|
230
|
+
static constexpr uint64_t parameter_a = _Px;
|
|
231
|
+
static constexpr int output_u = _Ux;
|
|
232
|
+
static constexpr int output_s = _Sx;
|
|
233
|
+
static constexpr uint64_t output_b = _Bx;
|
|
234
|
+
static constexpr int output_t = _Tx;
|
|
235
|
+
static constexpr uint64_t output_c = _Cx;
|
|
236
|
+
static constexpr int output_l = _Lx;
|
|
237
|
+
|
|
238
|
+
static constexpr uint64_t default_seed = 5489U;
|
|
239
|
+
|
|
240
|
+
/**
|
|
241
|
+
* @brief Construct a new Mersenne Twister engine with a scalar seed
|
|
242
|
+
*
|
|
243
|
+
* @param x0 scalar seed for the engine
|
|
244
|
+
*
|
|
245
|
+
* @note The seed for the first element of packet is initialized to `x0`,
|
|
246
|
+
* for the second element to `x0 + 1`, and the n-th element to is `x0 + n - 1`.
|
|
247
|
+
*/
|
|
248
|
+
MersenneTwister(uint64_t x0 = default_seed)
|
|
249
|
+
{
|
|
250
|
+
using namespace Eigen::internal;
|
|
251
|
+
std::array<uint64_t, unpacket_traits<Packet>::size / 2> seeds;
|
|
252
|
+
for (uint64_t i = 0; i < seeds.size(); ++i)
|
|
253
|
+
{
|
|
254
|
+
seeds[i] = x0 + i;
|
|
255
|
+
}
|
|
256
|
+
seed(ploadu<Packet>((int*)seeds.data()));
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
/**
|
|
260
|
+
* @brief Construct a new Mersenne Twister engine with a packet seed
|
|
261
|
+
*
|
|
262
|
+
* @param x0 packet seed for the engine
|
|
263
|
+
*/
|
|
264
|
+
MersenneTwister(Packet x0)
|
|
265
|
+
{
|
|
266
|
+
seed(x0);
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
/**
|
|
270
|
+
* @brief initialize the engine with a given seed
|
|
271
|
+
*
|
|
272
|
+
* @param x0 packet seed for the engine
|
|
273
|
+
*/
|
|
274
|
+
void seed(Packet x0)
|
|
275
|
+
{
|
|
276
|
+
using namespace Eigen::internal;
|
|
277
|
+
Packet prev = state[0] = x0;
|
|
278
|
+
for (int i = 1; i < _Nx; ++i)
|
|
279
|
+
{
|
|
280
|
+
prev = state[i] = pmuluadd64(pxor(prev, psrl64(prev, word_size - 2)), _Fx, i);
|
|
281
|
+
}
|
|
282
|
+
stateIdx = _Nx;
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
/**
|
|
286
|
+
* @brief minimum value of the result
|
|
287
|
+
*
|
|
288
|
+
* @return uint64_t
|
|
289
|
+
*/
|
|
290
|
+
uint64_t min() const
|
|
291
|
+
{
|
|
292
|
+
return 0;
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
/**
|
|
296
|
+
* @brief maximum value of the result
|
|
297
|
+
*
|
|
298
|
+
* @return uint64_t
|
|
299
|
+
*/
|
|
300
|
+
uint64_t max() const
|
|
301
|
+
{
|
|
302
|
+
return _wMask;
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
/**
|
|
306
|
+
* @brief Generates one random packet and advance the internal state.
|
|
307
|
+
*
|
|
308
|
+
* @return result_type
|
|
309
|
+
*
|
|
310
|
+
* @note A value generated from this engine is not scalar, but packet type.
|
|
311
|
+
* If you need to extract scalar values, use Eigen::Rand::makeScalarRng or Eigen::Rand::PacketRandomEngineAdaptor.
|
|
312
|
+
*/
|
|
313
|
+
result_type operator()()
|
|
314
|
+
{
|
|
315
|
+
if (stateIdx == _Nx)
|
|
316
|
+
refill_upper();
|
|
317
|
+
else if (2 * _Nx <= stateIdx)
|
|
318
|
+
refill_lower();
|
|
319
|
+
|
|
320
|
+
using namespace Eigen::internal;
|
|
321
|
+
|
|
322
|
+
Packet res = state[stateIdx++];
|
|
323
|
+
res = pxor(res, pand(psrl64(res, _Ux), pseti64<Packet>(_Dx)));
|
|
324
|
+
res = pxor(res, pand(psll64(res, _Sx), pseti64<Packet>(_Bx)));
|
|
325
|
+
res = pxor(res, pand(psll64(res, _Tx), pseti64<Packet>(_Cx)));
|
|
326
|
+
res = pxor(res, psrl64(res, _Lx));
|
|
327
|
+
return res;
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
/**
|
|
331
|
+
* @brief Discards `num` items being generated
|
|
332
|
+
*
|
|
333
|
+
* @param num the number of items being discarded
|
|
334
|
+
*/
|
|
335
|
+
void discard(unsigned long long num)
|
|
336
|
+
{
|
|
337
|
+
for (; 0 < num; --num)
|
|
338
|
+
{
|
|
339
|
+
operator()();
|
|
340
|
+
}
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
typename internal::HalfPacket<Packet>::type half()
|
|
344
|
+
{
|
|
345
|
+
if (valid)
|
|
346
|
+
{
|
|
347
|
+
valid = false;
|
|
348
|
+
return cache;
|
|
349
|
+
}
|
|
350
|
+
typename internal::HalfPacket<Packet>::type a;
|
|
351
|
+
internal::split_two(operator()(), a, cache);
|
|
352
|
+
valid = true;
|
|
353
|
+
return a;
|
|
354
|
+
}
|
|
355
|
+
|
|
356
|
+
protected:
|
|
357
|
+
|
|
358
|
+
void refill_lower()
|
|
359
|
+
{
|
|
360
|
+
using namespace Eigen::internal;
|
|
361
|
+
|
|
362
|
+
auto hmask = pseti64<Packet>(_hMask),
|
|
363
|
+
lmask = pseti64<Packet>(_lMask),
|
|
364
|
+
px = pseti64<Packet>(_Px),
|
|
365
|
+
one = pseti64<Packet>(1);
|
|
366
|
+
|
|
367
|
+
int i;
|
|
368
|
+
for (i = 0; i < _Nx - _Mx; ++i)
|
|
369
|
+
{
|
|
370
|
+
Packet tmp = por(pand(state[i + _Nx], hmask),
|
|
371
|
+
pand(state[i + _Nx + 1], lmask));
|
|
372
|
+
|
|
373
|
+
state[i] = pxor(pxor(
|
|
374
|
+
psrl64(tmp, 1),
|
|
375
|
+
pand(pcmpeq64(pand(tmp, one), one), px)),
|
|
376
|
+
state[i + _Nx + _Mx]
|
|
377
|
+
);
|
|
378
|
+
}
|
|
379
|
+
|
|
380
|
+
for (; i < _Nx - 1; ++i)
|
|
381
|
+
{
|
|
382
|
+
Packet tmp = por(pand(state[i + _Nx], hmask),
|
|
383
|
+
pand(state[i + _Nx + 1], lmask));
|
|
384
|
+
|
|
385
|
+
state[i] = pxor(pxor(
|
|
386
|
+
psrl64(tmp, 1),
|
|
387
|
+
pand(pcmpeq64(pand(tmp, one), one), px)),
|
|
388
|
+
state[i - _Nx + _Mx]
|
|
389
|
+
);
|
|
390
|
+
}
|
|
391
|
+
|
|
392
|
+
Packet tmp = por(pand(state[i + _Nx], hmask),
|
|
393
|
+
pand(state[0], lmask));
|
|
394
|
+
state[i] = pxor(pxor(
|
|
395
|
+
psrl64(tmp, 1),
|
|
396
|
+
pand(pcmpeq64(pand(tmp, one), one), px)),
|
|
397
|
+
state[_Mx - 1]
|
|
398
|
+
);
|
|
399
|
+
stateIdx = 0;
|
|
400
|
+
}
|
|
401
|
+
|
|
402
|
+
void refill_upper()
|
|
403
|
+
{
|
|
404
|
+
using namespace Eigen::internal;
|
|
405
|
+
|
|
406
|
+
auto hmask = pseti64<Packet>(_hMask),
|
|
407
|
+
lmask = pseti64<Packet>(_lMask),
|
|
408
|
+
px = pseti64<Packet>(_Px),
|
|
409
|
+
one = pseti64<Packet>(1);
|
|
410
|
+
|
|
411
|
+
for (int i = _Nx; i < 2 * _Nx; ++i)
|
|
412
|
+
{
|
|
413
|
+
Packet tmp = por(pand(state[i - _Nx], hmask),
|
|
414
|
+
pand(state[i - _Nx + 1], lmask));
|
|
415
|
+
|
|
416
|
+
state[i] = pxor(pxor(
|
|
417
|
+
psrl64(tmp, 1),
|
|
418
|
+
pand(pcmpeq64(pand(tmp, one), one), px)),
|
|
419
|
+
state[i - _Nx + _Mx]
|
|
420
|
+
);
|
|
421
|
+
}
|
|
422
|
+
}
|
|
423
|
+
|
|
424
|
+
AlignedArray<Packet, _Nx * 2> state;
|
|
425
|
+
size_t stateIdx = 0;
|
|
426
|
+
typename internal::HalfPacket<Packet>::type cache;
|
|
427
|
+
bool valid = false;
|
|
428
|
+
|
|
429
|
+
static constexpr uint64_t _wMask = (uint64_t)-1;
|
|
430
|
+
static constexpr uint64_t _hMask = (_wMask << _Rx) & _wMask;
|
|
431
|
+
static constexpr uint64_t _lMask = ~_hMask & _wMask;
|
|
432
|
+
};
|
|
433
|
+
|
|
434
|
+
/**
|
|
435
|
+
* @brief Alias of Eigen::Rand::MersenneTwister, equivalent to std::mt19937_64
|
|
436
|
+
*
|
|
437
|
+
* @tparam Packet
|
|
438
|
+
*/
|
|
439
|
+
template<typename Packet>
|
|
440
|
+
using Pmt19937_64 = MersenneTwister<Packet, 312, 156, 31,
|
|
441
|
+
0xb5026f5aa96619e9, 29,
|
|
442
|
+
0x5555555555555555, 17,
|
|
443
|
+
0x71d67fffeda60000, 37,
|
|
444
|
+
0xfff7eee000000000, 43, 6364136223846793005>;
|
|
445
|
+
#endif
|
|
446
|
+
|
|
447
|
+
template<typename UIntType, typename BaseRng, int numU64>
|
|
448
|
+
class ParallelRandomEngineAdaptor
|
|
449
|
+
{
|
|
450
|
+
static_assert(GetRandomEngineType<BaseRng>::value != RandomEngineType::none, "BaseRng must be a kind of Random Engine.");
|
|
451
|
+
public:
|
|
452
|
+
using result_type = UIntType;
|
|
453
|
+
|
|
454
|
+
ParallelRandomEngineAdaptor(size_t seed = BaseRng::default_seed)
|
|
455
|
+
{
|
|
456
|
+
for (int i = 0; i < num_parallel; ++i)
|
|
457
|
+
{
|
|
458
|
+
rngs[i].~BaseRng();
|
|
459
|
+
new (&rngs[i]) BaseRng{ seed + i * u64_stride };
|
|
460
|
+
}
|
|
461
|
+
}
|
|
462
|
+
|
|
463
|
+
ParallelRandomEngineAdaptor(const BaseRng& o)
|
|
464
|
+
{
|
|
465
|
+
for (int i = 0; i < num_parallel; ++i)
|
|
466
|
+
{
|
|
467
|
+
rngs[i].~BaseRng();
|
|
468
|
+
new (&rngs[i]) BaseRng{ o };
|
|
469
|
+
}
|
|
470
|
+
}
|
|
471
|
+
|
|
472
|
+
ParallelRandomEngineAdaptor(const ParallelRandomEngineAdaptor&) = default;
|
|
473
|
+
ParallelRandomEngineAdaptor(ParallelRandomEngineAdaptor&&) = default;
|
|
474
|
+
|
|
475
|
+
static constexpr result_type min()
|
|
476
|
+
{
|
|
477
|
+
return std::numeric_limits<result_type>::min();
|
|
478
|
+
}
|
|
479
|
+
|
|
480
|
+
static constexpr result_type max()
|
|
481
|
+
{
|
|
482
|
+
return std::numeric_limits<result_type>::max();
|
|
483
|
+
}
|
|
484
|
+
|
|
485
|
+
result_type operator()()
|
|
486
|
+
{
|
|
487
|
+
if (cnt >= buf_size)
|
|
488
|
+
{
|
|
489
|
+
refill_buffer();
|
|
490
|
+
}
|
|
491
|
+
return buf[cnt++];
|
|
492
|
+
}
|
|
493
|
+
|
|
494
|
+
float uniform_real()
|
|
495
|
+
{
|
|
496
|
+
if (fcnt >= fbuf_size)
|
|
497
|
+
{
|
|
498
|
+
refill_fbuffer();
|
|
499
|
+
}
|
|
500
|
+
return fbuf[fcnt++];
|
|
501
|
+
}
|
|
502
|
+
private:
|
|
503
|
+
|
|
504
|
+
void refill_buffer()
|
|
505
|
+
{
|
|
506
|
+
cnt = 0;
|
|
507
|
+
for (size_t i = 0; i < num_parallel; ++i)
|
|
508
|
+
{
|
|
509
|
+
reinterpret_cast<typename BaseRng::result_type&>(buf[i * result_type_stride]) = rngs[i]();
|
|
510
|
+
}
|
|
511
|
+
}
|
|
512
|
+
|
|
513
|
+
void refill_fbuffer()
|
|
514
|
+
{
|
|
515
|
+
fcnt = 0;
|
|
516
|
+
for (size_t i = 0; i < num_parallel; ++i)
|
|
517
|
+
{
|
|
518
|
+
auto urf = internal::bit_to_ur_float(rngs[i]());
|
|
519
|
+
reinterpret_cast<decltype(urf)&>(fbuf[i * u64_stride * 2]) = urf;
|
|
520
|
+
}
|
|
521
|
+
}
|
|
522
|
+
|
|
523
|
+
static constexpr int u64_stride = sizeof(typename BaseRng::result_type) / sizeof(uint64_t);
|
|
524
|
+
static constexpr int result_type_stride = sizeof(typename BaseRng::result_type) / sizeof(result_type);
|
|
525
|
+
static constexpr int num_parallel = numU64 / u64_stride;
|
|
526
|
+
static constexpr int byte_size = sizeof(uint64_t) * numU64;
|
|
527
|
+
static constexpr size_t buf_size = byte_size / sizeof(result_type);
|
|
528
|
+
static constexpr size_t fbuf_size = byte_size / sizeof(float);
|
|
529
|
+
|
|
530
|
+
std::array<BaseRng, num_parallel> rngs;
|
|
531
|
+
AlignedArray<result_type, buf_size> buf;
|
|
532
|
+
AlignedArray<float, fbuf_size> fbuf;
|
|
533
|
+
size_t cnt = buf_size, fcnt = fbuf_size;
|
|
534
|
+
};
|
|
535
|
+
|
|
536
|
+
/**
|
|
537
|
+
* @brief Scalar adaptor for random engines which generates packet
|
|
538
|
+
*
|
|
539
|
+
* @tparam UIntType scalar integer type for `result_type` of an adapted random number engine
|
|
540
|
+
* @tparam BaseRng
|
|
541
|
+
*/
|
|
542
|
+
template<typename UIntType, typename BaseRng>
|
|
543
|
+
using PacketRandomEngineAdaptor = ParallelRandomEngineAdaptor<UIntType, BaseRng,
|
|
544
|
+
sizeof(typename BaseRng::result_type) / sizeof(uint64_t)>;
|
|
545
|
+
|
|
546
|
+
template<typename BaseRng>
|
|
547
|
+
class RandomEngineWrapper : public BaseRng
|
|
548
|
+
{
|
|
549
|
+
public:
|
|
550
|
+
using BaseRng::BaseRng;
|
|
551
|
+
|
|
552
|
+
RandomEngineWrapper(const BaseRng& o) : BaseRng{ o }
|
|
553
|
+
{
|
|
554
|
+
}
|
|
555
|
+
|
|
556
|
+
RandomEngineWrapper(BaseRng&& o) : BaseRng{ o }
|
|
557
|
+
{
|
|
558
|
+
}
|
|
559
|
+
|
|
560
|
+
RandomEngineWrapper(size_t seed) : BaseRng{ seed }
|
|
561
|
+
{
|
|
562
|
+
}
|
|
563
|
+
|
|
564
|
+
RandomEngineWrapper() = default;
|
|
565
|
+
RandomEngineWrapper(const RandomEngineWrapper&) = default;
|
|
566
|
+
RandomEngineWrapper(RandomEngineWrapper&&) = default;
|
|
567
|
+
|
|
568
|
+
float uniform_real()
|
|
569
|
+
{
|
|
570
|
+
internal::bit_scalar<float> bs;
|
|
571
|
+
return bs.to_ur(this->operator()());
|
|
572
|
+
}
|
|
573
|
+
};
|
|
574
|
+
|
|
575
|
+
template<typename UIntType, typename Rng>
|
|
576
|
+
using UniversalRandomEngine = typename std::conditional<
|
|
577
|
+
IsPacketRandomEngine<typename std::remove_reference<Rng>::type>::value,
|
|
578
|
+
PacketRandomEngineAdaptor<UIntType, typename std::remove_reference<Rng>::type>,
|
|
579
|
+
typename std::conditional<
|
|
580
|
+
IsScalarRandomEngine<typename std::remove_reference<Rng>::type>::value,
|
|
581
|
+
RandomEngineWrapper<typename std::remove_reference<Rng>::type>,
|
|
582
|
+
void
|
|
583
|
+
>::type
|
|
584
|
+
>::type;
|
|
585
|
+
|
|
586
|
+
/**
|
|
587
|
+
* @brief Helper function for making a UniversalRandomEngine
|
|
588
|
+
*
|
|
589
|
+
* @tparam UIntType
|
|
590
|
+
* @tparam Rng
|
|
591
|
+
* @param rng any random number engine for either packet or scalar type
|
|
592
|
+
* @return an instance of PacketRandomEngineAdaptor for UIntType
|
|
593
|
+
*/
|
|
594
|
+
template<typename UIntType, typename Rng>
|
|
595
|
+
UniversalRandomEngine<UIntType, Rng> makeUniversalRng(Rng&& rng)
|
|
596
|
+
{
|
|
597
|
+
return { std::forward<Rng>(rng) };
|
|
598
|
+
}
|
|
599
|
+
|
|
600
|
+
#ifdef EIGEN_VECTORIZE_AVX2
|
|
601
|
+
using Vmt19937_64 = Pmt19937_64<internal::Packet8i>;
|
|
602
|
+
#elif defined(EIGEN_VECTORIZE_AVX) || defined(EIGEN_VECTORIZE_SSE2)
|
|
603
|
+
using Vmt19937_64 = Pmt19937_64<internal::Packet4i>;
|
|
604
|
+
#else
|
|
605
|
+
/**
|
|
606
|
+
* @brief same as std::mt19937_64 when EIGEN_DONT_VECTORIZE,
|
|
607
|
+
* Pmt19937_64<internal::Packet4i> when SSE2 enabled
|
|
608
|
+
* and Pmt19937_64<internal::Packet8i> when AVX2 enabled
|
|
609
|
+
*
|
|
610
|
+
* @note It yields the same random sequence only within the same seed and the same SIMD ISA.
|
|
611
|
+
* If you want to keep the same random sequence across different SIMD ISAs, use P8_mt19937_64.
|
|
612
|
+
*/
|
|
613
|
+
using Vmt19937_64 = std::mt19937_64;
|
|
614
|
+
#endif
|
|
615
|
+
/**
|
|
616
|
+
* @brief a vectorized mt19937_64 which generates 8 integers of 64bit simultaneously.
|
|
617
|
+
* It always yields the same value regardless of SIMD ISA.
|
|
618
|
+
*/
|
|
619
|
+
template<typename UIntType = uint64_t>
|
|
620
|
+
using P8_mt19937_64 = ParallelRandomEngineAdaptor<UIntType, Vmt19937_64, 8>;
|
|
621
|
+
}
|
|
622
|
+
}
|
|
623
|
+
|
|
624
|
+
#endif
|