tomoto 0.2.2 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -0
- data/README.md +8 -10
- data/ext/tomoto/ct.cpp +11 -11
- data/ext/tomoto/dmr.cpp +14 -13
- data/ext/tomoto/dt.cpp +14 -14
- data/ext/tomoto/extconf.rb +7 -5
- data/ext/tomoto/gdmr.cpp +7 -7
- data/ext/tomoto/hdp.cpp +9 -9
- data/ext/tomoto/hlda.cpp +13 -13
- data/ext/tomoto/hpa.cpp +5 -5
- data/ext/tomoto/lda.cpp +42 -39
- data/ext/tomoto/llda.cpp +6 -6
- data/ext/tomoto/mglda.cpp +15 -15
- data/ext/tomoto/pa.cpp +6 -6
- data/ext/tomoto/plda.cpp +6 -6
- data/ext/tomoto/slda.cpp +8 -8
- data/ext/tomoto/{ext.cpp → tomoto.cpp} +8 -8
- data/ext/tomoto/utils.h +16 -70
- data/lib/tomoto/version.rb +1 -1
- data/lib/tomoto.rb +5 -1
- data/vendor/EigenRand/EigenRand/Core.h +10 -10
- data/vendor/EigenRand/EigenRand/Dists/Basic.h +208 -9
- data/vendor/EigenRand/EigenRand/Dists/Discrete.h +52 -31
- data/vendor/EigenRand/EigenRand/Dists/GammaPoisson.h +9 -8
- data/vendor/EigenRand/EigenRand/Dists/NormalExp.h +28 -21
- data/vendor/EigenRand/EigenRand/EigenRand +11 -6
- data/vendor/EigenRand/EigenRand/Macro.h +13 -7
- data/vendor/EigenRand/EigenRand/MorePacketMath.h +348 -740
- data/vendor/EigenRand/EigenRand/MvDists/Multinomial.h +5 -3
- data/vendor/EigenRand/EigenRand/MvDists/MvNormal.h +9 -3
- data/vendor/EigenRand/EigenRand/PacketFilter.h +11 -253
- data/vendor/EigenRand/EigenRand/PacketRandomEngine.h +21 -47
- data/vendor/EigenRand/EigenRand/RandUtils.h +50 -344
- data/vendor/EigenRand/EigenRand/arch/AVX/MorePacketMath.h +619 -0
- data/vendor/EigenRand/EigenRand/arch/AVX/PacketFilter.h +149 -0
- data/vendor/EigenRand/EigenRand/arch/AVX/RandUtils.h +228 -0
- data/vendor/EigenRand/EigenRand/arch/NEON/MorePacketMath.h +473 -0
- data/vendor/EigenRand/EigenRand/arch/NEON/PacketFilter.h +142 -0
- data/vendor/EigenRand/EigenRand/arch/NEON/RandUtils.h +126 -0
- data/vendor/EigenRand/EigenRand/arch/SSE/MorePacketMath.h +501 -0
- data/vendor/EigenRand/EigenRand/arch/SSE/PacketFilter.h +133 -0
- data/vendor/EigenRand/EigenRand/arch/SSE/RandUtils.h +120 -0
- data/vendor/EigenRand/EigenRand/doc.h +24 -12
- data/vendor/EigenRand/README.md +57 -4
- data/vendor/eigen/COPYING.APACHE +203 -0
- data/vendor/eigen/COPYING.BSD +1 -1
- data/vendor/eigen/COPYING.MINPACK +51 -52
- data/vendor/eigen/Eigen/Cholesky +0 -1
- data/vendor/eigen/Eigen/Core +112 -265
- data/vendor/eigen/Eigen/Eigenvalues +2 -3
- data/vendor/eigen/Eigen/Geometry +5 -8
- data/vendor/eigen/Eigen/Householder +0 -1
- data/vendor/eigen/Eigen/Jacobi +0 -1
- data/vendor/eigen/Eigen/KLUSupport +41 -0
- data/vendor/eigen/Eigen/LU +2 -5
- data/vendor/eigen/Eigen/OrderingMethods +0 -3
- data/vendor/eigen/Eigen/PaStiXSupport +1 -0
- data/vendor/eigen/Eigen/PardisoSupport +0 -0
- data/vendor/eigen/Eigen/QR +2 -3
- data/vendor/eigen/Eigen/QtAlignedMalloc +0 -1
- data/vendor/eigen/Eigen/SVD +0 -1
- data/vendor/eigen/Eigen/Sparse +0 -2
- data/vendor/eigen/Eigen/SparseCholesky +0 -8
- data/vendor/eigen/Eigen/SparseLU +4 -0
- data/vendor/eigen/Eigen/SparseQR +0 -1
- data/vendor/eigen/Eigen/src/Cholesky/LDLT.h +42 -27
- data/vendor/eigen/Eigen/src/Cholesky/LLT.h +39 -23
- data/vendor/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +90 -47
- data/vendor/eigen/Eigen/src/Core/ArithmeticSequence.h +413 -0
- data/vendor/eigen/Eigen/src/Core/Array.h +99 -11
- data/vendor/eigen/Eigen/src/Core/ArrayBase.h +3 -3
- data/vendor/eigen/Eigen/src/Core/ArrayWrapper.h +21 -21
- data/vendor/eigen/Eigen/src/Core/Assign.h +1 -1
- data/vendor/eigen/Eigen/src/Core/AssignEvaluator.h +125 -50
- data/vendor/eigen/Eigen/src/Core/Assign_MKL.h +10 -10
- data/vendor/eigen/Eigen/src/Core/BandMatrix.h +16 -16
- data/vendor/eigen/Eigen/src/Core/Block.h +56 -60
- data/vendor/eigen/Eigen/src/Core/BooleanRedux.h +29 -31
- data/vendor/eigen/Eigen/src/Core/CommaInitializer.h +7 -3
- data/vendor/eigen/Eigen/src/Core/CoreEvaluators.h +325 -272
- data/vendor/eigen/Eigen/src/Core/CoreIterators.h +5 -0
- data/vendor/eigen/Eigen/src/Core/CwiseBinaryOp.h +21 -22
- data/vendor/eigen/Eigen/src/Core/CwiseNullaryOp.h +153 -18
- data/vendor/eigen/Eigen/src/Core/CwiseUnaryOp.h +6 -6
- data/vendor/eigen/Eigen/src/Core/CwiseUnaryView.h +14 -10
- data/vendor/eigen/Eigen/src/Core/DenseBase.h +132 -42
- data/vendor/eigen/Eigen/src/Core/DenseCoeffsBase.h +25 -21
- data/vendor/eigen/Eigen/src/Core/DenseStorage.h +153 -71
- data/vendor/eigen/Eigen/src/Core/Diagonal.h +21 -23
- data/vendor/eigen/Eigen/src/Core/DiagonalMatrix.h +50 -2
- data/vendor/eigen/Eigen/src/Core/DiagonalProduct.h +1 -1
- data/vendor/eigen/Eigen/src/Core/Dot.h +10 -10
- data/vendor/eigen/Eigen/src/Core/EigenBase.h +10 -9
- data/vendor/eigen/Eigen/src/Core/ForceAlignedAccess.h +8 -4
- data/vendor/eigen/Eigen/src/Core/Fuzzy.h +3 -3
- data/vendor/eigen/Eigen/src/Core/GeneralProduct.h +20 -10
- data/vendor/eigen/Eigen/src/Core/GenericPacketMath.h +599 -152
- data/vendor/eigen/Eigen/src/Core/GlobalFunctions.h +40 -33
- data/vendor/eigen/Eigen/src/Core/IO.h +40 -7
- data/vendor/eigen/Eigen/src/Core/IndexedView.h +237 -0
- data/vendor/eigen/Eigen/src/Core/Inverse.h +9 -10
- data/vendor/eigen/Eigen/src/Core/Map.h +7 -7
- data/vendor/eigen/Eigen/src/Core/MapBase.h +10 -3
- data/vendor/eigen/Eigen/src/Core/MathFunctions.h +767 -125
- data/vendor/eigen/Eigen/src/Core/MathFunctionsImpl.h +118 -19
- data/vendor/eigen/Eigen/src/Core/Matrix.h +131 -25
- data/vendor/eigen/Eigen/src/Core/MatrixBase.h +21 -3
- data/vendor/eigen/Eigen/src/Core/NestByValue.h +25 -50
- data/vendor/eigen/Eigen/src/Core/NoAlias.h +4 -3
- data/vendor/eigen/Eigen/src/Core/NumTraits.h +107 -20
- data/vendor/eigen/Eigen/src/Core/PartialReduxEvaluator.h +232 -0
- data/vendor/eigen/Eigen/src/Core/PermutationMatrix.h +3 -31
- data/vendor/eigen/Eigen/src/Core/PlainObjectBase.h +152 -59
- data/vendor/eigen/Eigen/src/Core/Product.h +30 -25
- data/vendor/eigen/Eigen/src/Core/ProductEvaluators.h +192 -125
- data/vendor/eigen/Eigen/src/Core/Random.h +37 -1
- data/vendor/eigen/Eigen/src/Core/Redux.h +180 -170
- data/vendor/eigen/Eigen/src/Core/Ref.h +121 -23
- data/vendor/eigen/Eigen/src/Core/Replicate.h +8 -8
- data/vendor/eigen/Eigen/src/Core/Reshaped.h +454 -0
- data/vendor/eigen/Eigen/src/Core/ReturnByValue.h +7 -5
- data/vendor/eigen/Eigen/src/Core/Reverse.h +18 -12
- data/vendor/eigen/Eigen/src/Core/Select.h +8 -6
- data/vendor/eigen/Eigen/src/Core/SelfAdjointView.h +33 -20
- data/vendor/eigen/Eigen/src/Core/Solve.h +14 -14
- data/vendor/eigen/Eigen/src/Core/SolveTriangular.h +16 -16
- data/vendor/eigen/Eigen/src/Core/SolverBase.h +41 -3
- data/vendor/eigen/Eigen/src/Core/StableNorm.h +100 -70
- data/vendor/eigen/Eigen/src/Core/StlIterators.h +463 -0
- data/vendor/eigen/Eigen/src/Core/Stride.h +9 -4
- data/vendor/eigen/Eigen/src/Core/Swap.h +5 -4
- data/vendor/eigen/Eigen/src/Core/Transpose.h +88 -27
- data/vendor/eigen/Eigen/src/Core/Transpositions.h +26 -47
- data/vendor/eigen/Eigen/src/Core/TriangularMatrix.h +93 -75
- data/vendor/eigen/Eigen/src/Core/VectorBlock.h +5 -5
- data/vendor/eigen/Eigen/src/Core/VectorwiseOp.h +159 -70
- data/vendor/eigen/Eigen/src/Core/Visitor.h +137 -29
- data/vendor/eigen/Eigen/src/Core/arch/AVX/Complex.h +50 -129
- data/vendor/eigen/Eigen/src/Core/arch/AVX/MathFunctions.h +126 -337
- data/vendor/eigen/Eigen/src/Core/arch/AVX/PacketMath.h +1092 -155
- data/vendor/eigen/Eigen/src/Core/arch/AVX/TypeCasting.h +65 -1
- data/vendor/eigen/Eigen/src/Core/arch/AVX512/Complex.h +422 -0
- data/vendor/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h +207 -236
- data/vendor/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h +1482 -495
- data/vendor/eigen/Eigen/src/Core/arch/AVX512/TypeCasting.h +89 -0
- data/vendor/eigen/Eigen/src/Core/arch/AltiVec/Complex.h +152 -165
- data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MathFunctions.h +19 -251
- data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MatrixProduct.h +2937 -0
- data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h +221 -0
- data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h +629 -0
- data/vendor/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h +2042 -392
- data/vendor/eigen/Eigen/src/Core/arch/CUDA/Complex.h +235 -80
- data/vendor/eigen/Eigen/src/Core/arch/Default/BFloat16.h +700 -0
- data/vendor/eigen/Eigen/src/Core/arch/Default/ConjHelper.h +102 -14
- data/vendor/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +1649 -0
- data/vendor/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +110 -0
- data/vendor/eigen/Eigen/src/Core/arch/Default/Half.h +942 -0
- data/vendor/eigen/Eigen/src/Core/arch/Default/Settings.h +1 -1
- data/vendor/eigen/Eigen/src/Core/arch/Default/TypeCasting.h +120 -0
- data/vendor/eigen/Eigen/src/Core/arch/{CUDA → GPU}/MathFunctions.h +16 -4
- data/vendor/eigen/Eigen/src/Core/arch/GPU/PacketMath.h +1685 -0
- data/vendor/eigen/Eigen/src/Core/arch/GPU/TypeCasting.h +80 -0
- data/vendor/eigen/Eigen/src/Core/arch/HIP/hcc/math_constants.h +23 -0
- data/vendor/eigen/Eigen/src/Core/arch/MSA/Complex.h +648 -0
- data/vendor/eigen/Eigen/src/Core/arch/MSA/MathFunctions.h +387 -0
- data/vendor/eigen/Eigen/src/Core/arch/MSA/PacketMath.h +1233 -0
- data/vendor/eigen/Eigen/src/Core/arch/NEON/Complex.h +313 -219
- data/vendor/eigen/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h +183 -0
- data/vendor/eigen/Eigen/src/Core/arch/NEON/MathFunctions.h +54 -70
- data/vendor/eigen/Eigen/src/Core/arch/NEON/PacketMath.h +4376 -549
- data/vendor/eigen/Eigen/src/Core/arch/NEON/TypeCasting.h +1419 -0
- data/vendor/eigen/Eigen/src/Core/arch/SSE/Complex.h +59 -179
- data/vendor/eigen/Eigen/src/Core/arch/SSE/MathFunctions.h +65 -428
- data/vendor/eigen/Eigen/src/Core/arch/SSE/PacketMath.h +893 -283
- data/vendor/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h +65 -0
- data/vendor/eigen/Eigen/src/Core/arch/SVE/MathFunctions.h +44 -0
- data/vendor/eigen/Eigen/src/Core/arch/SVE/PacketMath.h +752 -0
- data/vendor/eigen/Eigen/src/Core/arch/SVE/TypeCasting.h +49 -0
- data/vendor/eigen/Eigen/src/Core/arch/SYCL/InteropHeaders.h +232 -0
- data/vendor/eigen/Eigen/src/Core/arch/SYCL/MathFunctions.h +301 -0
- data/vendor/eigen/Eigen/src/Core/arch/SYCL/PacketMath.h +670 -0
- data/vendor/eigen/Eigen/src/Core/arch/SYCL/SyclMemoryModel.h +694 -0
- data/vendor/eigen/Eigen/src/Core/arch/SYCL/TypeCasting.h +85 -0
- data/vendor/eigen/Eigen/src/Core/arch/ZVector/Complex.h +212 -183
- data/vendor/eigen/Eigen/src/Core/arch/ZVector/MathFunctions.h +101 -5
- data/vendor/eigen/Eigen/src/Core/arch/ZVector/PacketMath.h +510 -395
- data/vendor/eigen/Eigen/src/Core/functors/AssignmentFunctors.h +11 -2
- data/vendor/eigen/Eigen/src/Core/functors/BinaryFunctors.h +112 -46
- data/vendor/eigen/Eigen/src/Core/functors/NullaryFunctors.h +31 -30
- data/vendor/eigen/Eigen/src/Core/functors/StlFunctors.h +32 -2
- data/vendor/eigen/Eigen/src/Core/functors/UnaryFunctors.h +355 -16
- data/vendor/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h +1075 -586
- data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h +49 -24
- data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +41 -35
- data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +6 -6
- data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h +4 -2
- data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixVector.h +382 -483
- data/vendor/eigen/Eigen/src/Core/products/Parallelizer.h +22 -5
- data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +53 -30
- data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h +16 -8
- data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h +8 -6
- data/vendor/eigen/Eigen/src/Core/products/SelfadjointProduct.h +4 -4
- data/vendor/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h +5 -4
- data/vendor/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h +33 -27
- data/vendor/eigen/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h +14 -12
- data/vendor/eigen/Eigen/src/Core/products/TriangularSolverMatrix.h +36 -34
- data/vendor/eigen/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h +8 -4
- data/vendor/eigen/Eigen/src/Core/products/TriangularSolverVector.h +13 -10
- data/vendor/eigen/Eigen/src/Core/util/BlasUtil.h +304 -119
- data/vendor/eigen/Eigen/src/Core/util/ConfigureVectorization.h +512 -0
- data/vendor/eigen/Eigen/src/Core/util/Constants.h +25 -9
- data/vendor/eigen/Eigen/src/Core/util/DisableStupidWarnings.h +26 -3
- data/vendor/eigen/Eigen/src/Core/util/ForwardDeclarations.h +29 -9
- data/vendor/eigen/Eigen/src/Core/util/IndexedViewHelper.h +186 -0
- data/vendor/eigen/Eigen/src/Core/util/IntegralConstant.h +272 -0
- data/vendor/eigen/Eigen/src/Core/util/MKL_support.h +8 -1
- data/vendor/eigen/Eigen/src/Core/util/Macros.h +709 -246
- data/vendor/eigen/Eigen/src/Core/util/Memory.h +222 -52
- data/vendor/eigen/Eigen/src/Core/util/Meta.h +355 -77
- data/vendor/eigen/Eigen/src/Core/util/ReenableStupidWarnings.h +5 -1
- data/vendor/eigen/Eigen/src/Core/util/ReshapedHelper.h +51 -0
- data/vendor/eigen/Eigen/src/Core/util/StaticAssert.h +8 -5
- data/vendor/eigen/Eigen/src/Core/util/SymbolicIndex.h +293 -0
- data/vendor/eigen/Eigen/src/Core/util/XprHelper.h +65 -30
- data/vendor/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +1 -1
- data/vendor/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +7 -4
- data/vendor/eigen/Eigen/src/Eigenvalues/EigenSolver.h +2 -2
- data/vendor/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +1 -1
- data/vendor/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +2 -2
- data/vendor/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +2 -2
- data/vendor/eigen/Eigen/src/Eigenvalues/RealQZ.h +9 -6
- data/vendor/eigen/Eigen/src/Eigenvalues/RealSchur.h +21 -9
- data/vendor/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +77 -43
- data/vendor/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +20 -15
- data/vendor/eigen/Eigen/src/Geometry/AlignedBox.h +99 -5
- data/vendor/eigen/Eigen/src/Geometry/AngleAxis.h +4 -4
- data/vendor/eigen/Eigen/src/Geometry/EulerAngles.h +3 -3
- data/vendor/eigen/Eigen/src/Geometry/Homogeneous.h +15 -11
- data/vendor/eigen/Eigen/src/Geometry/Hyperplane.h +1 -1
- data/vendor/eigen/Eigen/src/Geometry/OrthoMethods.h +3 -2
- data/vendor/eigen/Eigen/src/Geometry/ParametrizedLine.h +39 -2
- data/vendor/eigen/Eigen/src/Geometry/Quaternion.h +70 -14
- data/vendor/eigen/Eigen/src/Geometry/Rotation2D.h +3 -3
- data/vendor/eigen/Eigen/src/Geometry/Scaling.h +23 -5
- data/vendor/eigen/Eigen/src/Geometry/Transform.h +88 -67
- data/vendor/eigen/Eigen/src/Geometry/Translation.h +6 -12
- data/vendor/eigen/Eigen/src/Geometry/Umeyama.h +1 -1
- data/vendor/eigen/Eigen/src/Geometry/arch/Geometry_SIMD.h +168 -0
- data/vendor/eigen/Eigen/src/Householder/BlockHouseholder.h +9 -2
- data/vendor/eigen/Eigen/src/Householder/Householder.h +8 -4
- data/vendor/eigen/Eigen/src/Householder/HouseholderSequence.h +123 -48
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +15 -15
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +7 -23
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +5 -22
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +41 -47
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +51 -60
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +70 -20
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +2 -20
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +11 -9
- data/vendor/eigen/Eigen/src/Jacobi/Jacobi.h +31 -10
- data/vendor/eigen/Eigen/src/KLUSupport/KLUSupport.h +358 -0
- data/vendor/eigen/Eigen/src/LU/Determinant.h +35 -19
- data/vendor/eigen/Eigen/src/LU/FullPivLU.h +29 -43
- data/vendor/eigen/Eigen/src/LU/InverseImpl.h +25 -8
- data/vendor/eigen/Eigen/src/LU/PartialPivLU.h +71 -58
- data/vendor/eigen/Eigen/src/LU/arch/InverseSize4.h +351 -0
- data/vendor/eigen/Eigen/src/OrderingMethods/Amd.h +7 -17
- data/vendor/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +297 -277
- data/vendor/eigen/Eigen/src/OrderingMethods/Ordering.h +6 -10
- data/vendor/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +1 -1
- data/vendor/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +9 -7
- data/vendor/eigen/Eigen/src/QR/ColPivHouseholderQR.h +41 -20
- data/vendor/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +100 -27
- data/vendor/eigen/Eigen/src/QR/FullPivHouseholderQR.h +59 -22
- data/vendor/eigen/Eigen/src/QR/HouseholderQR.h +48 -23
- data/vendor/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +25 -3
- data/vendor/eigen/Eigen/src/SVD/BDCSVD.h +183 -63
- data/vendor/eigen/Eigen/src/SVD/JacobiSVD.h +22 -14
- data/vendor/eigen/Eigen/src/SVD/SVDBase.h +83 -22
- data/vendor/eigen/Eigen/src/SVD/UpperBidiagonalization.h +3 -3
- data/vendor/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +17 -9
- data/vendor/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +12 -37
- data/vendor/eigen/Eigen/src/SparseCore/AmbiVector.h +3 -2
- data/vendor/eigen/Eigen/src/SparseCore/CompressedStorage.h +16 -0
- data/vendor/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +6 -6
- data/vendor/eigen/Eigen/src/SparseCore/SparseAssign.h +81 -27
- data/vendor/eigen/Eigen/src/SparseCore/SparseBlock.h +25 -57
- data/vendor/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +40 -11
- data/vendor/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +11 -15
- data/vendor/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +4 -2
- data/vendor/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +30 -8
- data/vendor/eigen/Eigen/src/SparseCore/SparseMatrix.h +126 -11
- data/vendor/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +5 -12
- data/vendor/eigen/Eigen/src/SparseCore/SparseProduct.h +13 -1
- data/vendor/eigen/Eigen/src/SparseCore/SparseRef.h +7 -7
- data/vendor/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +5 -2
- data/vendor/eigen/Eigen/src/SparseCore/SparseUtil.h +8 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseVector.h +1 -1
- data/vendor/eigen/Eigen/src/SparseCore/SparseView.h +1 -0
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU.h +162 -12
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +1 -1
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +76 -2
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +2 -2
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +1 -1
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +1 -1
- data/vendor/eigen/Eigen/src/SparseQR/SparseQR.h +19 -6
- data/vendor/eigen/Eigen/src/StlSupport/StdDeque.h +2 -12
- data/vendor/eigen/Eigen/src/StlSupport/StdList.h +2 -2
- data/vendor/eigen/Eigen/src/StlSupport/StdVector.h +2 -2
- data/vendor/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +6 -8
- data/vendor/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +175 -39
- data/vendor/eigen/Eigen/src/misc/lapacke.h +5 -4
- data/vendor/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +28 -2
- data/vendor/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +155 -11
- data/vendor/eigen/Eigen/src/plugins/BlockMethods.h +626 -242
- data/vendor/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.h +14 -0
- data/vendor/eigen/Eigen/src/plugins/IndexedViewMethods.h +262 -0
- data/vendor/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +4 -4
- data/vendor/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +10 -0
- data/vendor/eigen/Eigen/src/plugins/ReshapedMethods.h +149 -0
- data/vendor/eigen/README.md +2 -0
- data/vendor/eigen/bench/btl/README +1 -1
- data/vendor/eigen/bench/tensors/README +6 -7
- data/vendor/eigen/ci/README.md +56 -0
- data/vendor/eigen/demos/mix_eigen_and_c/README +1 -1
- data/vendor/eigen/unsupported/Eigen/CXX11/src/Tensor/README.md +213 -158
- data/vendor/eigen/unsupported/README.txt +1 -1
- data/vendor/tomotopy/README.kr.rst +78 -0
- data/vendor/tomotopy/README.rst +75 -0
- data/vendor/tomotopy/src/Labeling/FoRelevance.cpp +2 -2
- data/vendor/tomotopy/src/Labeling/Phraser.hpp +4 -4
- data/vendor/tomotopy/src/TopicModel/CTModel.hpp +7 -3
- data/vendor/tomotopy/src/TopicModel/DMRModel.hpp +7 -3
- data/vendor/tomotopy/src/TopicModel/DTModel.hpp +6 -3
- data/vendor/tomotopy/src/TopicModel/GDMRModel.hpp +2 -2
- data/vendor/tomotopy/src/TopicModel/HDP.h +1 -0
- data/vendor/tomotopy/src/TopicModel/HDPModel.hpp +57 -6
- data/vendor/tomotopy/src/TopicModel/HLDAModel.hpp +6 -3
- data/vendor/tomotopy/src/TopicModel/HPAModel.hpp +3 -2
- data/vendor/tomotopy/src/TopicModel/LDA.h +3 -3
- data/vendor/tomotopy/src/TopicModel/LDACVB0Model.hpp +5 -5
- data/vendor/tomotopy/src/TopicModel/LDAModel.hpp +50 -19
- data/vendor/tomotopy/src/TopicModel/LLDAModel.hpp +6 -2
- data/vendor/tomotopy/src/TopicModel/MGLDAModel.hpp +3 -2
- data/vendor/tomotopy/src/TopicModel/PAModel.hpp +1 -1
- data/vendor/tomotopy/src/TopicModel/PLDAModel.hpp +6 -2
- data/vendor/tomotopy/src/TopicModel/PT.h +3 -1
- data/vendor/tomotopy/src/TopicModel/PTModel.hpp +36 -3
- data/vendor/tomotopy/src/TopicModel/SLDAModel.hpp +6 -3
- data/vendor/tomotopy/src/TopicModel/TopicModel.hpp +55 -26
- data/vendor/tomotopy/src/Utils/AliasMethod.hpp +5 -4
- data/vendor/tomotopy/src/Utils/Dictionary.h +2 -2
- data/vendor/tomotopy/src/Utils/EigenAddonOps.hpp +36 -1
- data/vendor/tomotopy/src/Utils/MultiNormalDistribution.hpp +1 -1
- data/vendor/tomotopy/src/Utils/TruncMultiNormal.hpp +1 -1
- data/vendor/tomotopy/src/Utils/exception.h +6 -0
- data/vendor/tomotopy/src/Utils/math.h +2 -2
- data/vendor/tomotopy/src/Utils/sample.hpp +14 -12
- data/vendor/tomotopy/src/Utils/serializer.hpp +30 -5
- data/vendor/tomotopy/src/Utils/sse_gamma.h +0 -3
- metadata +64 -18
- data/vendor/eigen/Eigen/CMakeLists.txt +0 -19
- data/vendor/eigen/Eigen/src/Core/arch/CUDA/Half.h +0 -674
- data/vendor/eigen/Eigen/src/Core/arch/CUDA/PacketMath.h +0 -333
- data/vendor/eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +0 -1124
- data/vendor/eigen/Eigen/src/Core/arch/CUDA/TypeCasting.h +0 -212
- data/vendor/eigen/Eigen/src/Geometry/arch/Geometry_SSE.h +0 -161
- data/vendor/eigen/Eigen/src/LU/arch/Inverse_SSE.h +0 -338
@@ -128,7 +128,7 @@ DenseBase<Derived>::Random()
|
|
128
128
|
* \sa class CwiseNullaryOp, setRandom(Index), setRandom(Index,Index)
|
129
129
|
*/
|
130
130
|
template<typename Derived>
|
131
|
-
inline Derived& DenseBase<Derived>::setRandom()
|
131
|
+
EIGEN_DEVICE_FUNC inline Derived& DenseBase<Derived>::setRandom()
|
132
132
|
{
|
133
133
|
return *this = Random(rows(), cols());
|
134
134
|
}
|
@@ -177,6 +177,42 @@ PlainObjectBase<Derived>::setRandom(Index rows, Index cols)
|
|
177
177
|
return setRandom();
|
178
178
|
}
|
179
179
|
|
180
|
+
/** Resizes to the given size, changing only the number of columns, and sets all
|
181
|
+
* coefficients in this expression to random values. For the parameter of type
|
182
|
+
* NoChange_t, just pass the special value \c NoChange.
|
183
|
+
*
|
184
|
+
* Numbers are uniformly spread through their whole definition range for integer types,
|
185
|
+
* and in the [-1:1] range for floating point scalar types.
|
186
|
+
*
|
187
|
+
* \not_reentrant
|
188
|
+
*
|
189
|
+
* \sa DenseBase::setRandom(), setRandom(Index), setRandom(Index, NoChange_t), class CwiseNullaryOp, DenseBase::Random()
|
190
|
+
*/
|
191
|
+
template<typename Derived>
|
192
|
+
EIGEN_STRONG_INLINE Derived&
|
193
|
+
PlainObjectBase<Derived>::setRandom(NoChange_t, Index cols)
|
194
|
+
{
|
195
|
+
return setRandom(rows(), cols);
|
196
|
+
}
|
197
|
+
|
198
|
+
/** Resizes to the given size, changing only the number of rows, and sets all
|
199
|
+
* coefficients in this expression to random values. For the parameter of type
|
200
|
+
* NoChange_t, just pass the special value \c NoChange.
|
201
|
+
*
|
202
|
+
* Numbers are uniformly spread through their whole definition range for integer types,
|
203
|
+
* and in the [-1:1] range for floating point scalar types.
|
204
|
+
*
|
205
|
+
* \not_reentrant
|
206
|
+
*
|
207
|
+
* \sa DenseBase::setRandom(), setRandom(Index), setRandom(NoChange_t, Index), class CwiseNullaryOp, DenseBase::Random()
|
208
|
+
*/
|
209
|
+
template<typename Derived>
|
210
|
+
EIGEN_STRONG_INLINE Derived&
|
211
|
+
PlainObjectBase<Derived>::setRandom(Index rows, NoChange_t)
|
212
|
+
{
|
213
|
+
return setRandom(rows, cols());
|
214
|
+
}
|
215
|
+
|
180
216
|
} // end namespace Eigen
|
181
217
|
|
182
218
|
#endif // EIGEN_RANDOM_H
|
@@ -23,23 +23,29 @@ namespace internal {
|
|
23
23
|
* Part 1 : the logic deciding a strategy for vectorization and unrolling
|
24
24
|
***************************************************************************/
|
25
25
|
|
26
|
-
template<typename Func, typename
|
26
|
+
template<typename Func, typename Evaluator>
|
27
27
|
struct redux_traits
|
28
28
|
{
|
29
29
|
public:
|
30
|
-
typedef typename find_best_packet<typename
|
30
|
+
typedef typename find_best_packet<typename Evaluator::Scalar,Evaluator::SizeAtCompileTime>::type PacketType;
|
31
31
|
enum {
|
32
32
|
PacketSize = unpacket_traits<PacketType>::size,
|
33
|
-
InnerMaxSize = int(
|
34
|
-
?
|
35
|
-
:
|
33
|
+
InnerMaxSize = int(Evaluator::IsRowMajor)
|
34
|
+
? Evaluator::MaxColsAtCompileTime
|
35
|
+
: Evaluator::MaxRowsAtCompileTime,
|
36
|
+
OuterMaxSize = int(Evaluator::IsRowMajor)
|
37
|
+
? Evaluator::MaxRowsAtCompileTime
|
38
|
+
: Evaluator::MaxColsAtCompileTime,
|
39
|
+
SliceVectorizedWork = int(InnerMaxSize)==Dynamic ? Dynamic
|
40
|
+
: int(OuterMaxSize)==Dynamic ? (int(InnerMaxSize)>=int(PacketSize) ? Dynamic : 0)
|
41
|
+
: (int(InnerMaxSize)/int(PacketSize)) * int(OuterMaxSize)
|
36
42
|
};
|
37
43
|
|
38
44
|
enum {
|
39
|
-
MightVectorize = (int(
|
45
|
+
MightVectorize = (int(Evaluator::Flags)&ActualPacketAccessBit)
|
40
46
|
&& (functor_traits<Func>::PacketAccess),
|
41
|
-
MayLinearVectorize = bool(MightVectorize) && (int(
|
42
|
-
MaySliceVectorize = bool(MightVectorize) && int(
|
47
|
+
MayLinearVectorize = bool(MightVectorize) && (int(Evaluator::Flags)&LinearAccessBit),
|
48
|
+
MaySliceVectorize = bool(MightVectorize) && (int(SliceVectorizedWork)==Dynamic || int(SliceVectorizedWork)>=3)
|
43
49
|
};
|
44
50
|
|
45
51
|
public:
|
@@ -51,8 +57,8 @@ public:
|
|
51
57
|
|
52
58
|
public:
|
53
59
|
enum {
|
54
|
-
Cost =
|
55
|
-
:
|
60
|
+
Cost = Evaluator::SizeAtCompileTime == Dynamic ? HugeCost
|
61
|
+
: int(Evaluator::SizeAtCompileTime) * int(Evaluator::CoeffReadCost) + (Evaluator::SizeAtCompileTime-1) * functor_traits<Func>::Cost,
|
56
62
|
UnrollingLimit = EIGEN_UNROLLING_LIMIT * (int(Traversal) == int(DefaultTraversal) ? 1 : int(PacketSize))
|
57
63
|
};
|
58
64
|
|
@@ -64,18 +70,20 @@ public:
|
|
64
70
|
#ifdef EIGEN_DEBUG_ASSIGN
|
65
71
|
static void debug()
|
66
72
|
{
|
67
|
-
std::cerr << "Xpr: " << typeid(typename
|
73
|
+
std::cerr << "Xpr: " << typeid(typename Evaluator::XprType).name() << std::endl;
|
68
74
|
std::cerr.setf(std::ios::hex, std::ios::basefield);
|
69
|
-
EIGEN_DEBUG_VAR(
|
75
|
+
EIGEN_DEBUG_VAR(Evaluator::Flags)
|
70
76
|
std::cerr.unsetf(std::ios::hex);
|
71
77
|
EIGEN_DEBUG_VAR(InnerMaxSize)
|
78
|
+
EIGEN_DEBUG_VAR(OuterMaxSize)
|
79
|
+
EIGEN_DEBUG_VAR(SliceVectorizedWork)
|
72
80
|
EIGEN_DEBUG_VAR(PacketSize)
|
73
81
|
EIGEN_DEBUG_VAR(MightVectorize)
|
74
82
|
EIGEN_DEBUG_VAR(MayLinearVectorize)
|
75
83
|
EIGEN_DEBUG_VAR(MaySliceVectorize)
|
76
|
-
|
84
|
+
std::cerr << "Traversal" << " = " << Traversal << " (" << demangle_traversal(Traversal) << ")" << std::endl;
|
77
85
|
EIGEN_DEBUG_VAR(UnrollingLimit)
|
78
|
-
|
86
|
+
std::cerr << "Unrolling" << " = " << Unrolling << " (" << demangle_unrolling(Unrolling) << ")" << std::endl;
|
79
87
|
std::cerr << std::endl;
|
80
88
|
}
|
81
89
|
#endif
|
@@ -87,88 +95,86 @@ public:
|
|
87
95
|
|
88
96
|
/*** no vectorization ***/
|
89
97
|
|
90
|
-
template<typename Func, typename
|
98
|
+
template<typename Func, typename Evaluator, int Start, int Length>
|
91
99
|
struct redux_novec_unroller
|
92
100
|
{
|
93
101
|
enum {
|
94
102
|
HalfLength = Length/2
|
95
103
|
};
|
96
104
|
|
97
|
-
typedef typename
|
105
|
+
typedef typename Evaluator::Scalar Scalar;
|
98
106
|
|
99
107
|
EIGEN_DEVICE_FUNC
|
100
|
-
static EIGEN_STRONG_INLINE Scalar run(const
|
108
|
+
static EIGEN_STRONG_INLINE Scalar run(const Evaluator &eval, const Func& func)
|
101
109
|
{
|
102
|
-
return func(redux_novec_unroller<Func,
|
103
|
-
redux_novec_unroller<Func,
|
110
|
+
return func(redux_novec_unroller<Func, Evaluator, Start, HalfLength>::run(eval,func),
|
111
|
+
redux_novec_unroller<Func, Evaluator, Start+HalfLength, Length-HalfLength>::run(eval,func));
|
104
112
|
}
|
105
113
|
};
|
106
114
|
|
107
|
-
template<typename Func, typename
|
108
|
-
struct redux_novec_unroller<Func,
|
115
|
+
template<typename Func, typename Evaluator, int Start>
|
116
|
+
struct redux_novec_unroller<Func, Evaluator, Start, 1>
|
109
117
|
{
|
110
118
|
enum {
|
111
|
-
outer = Start /
|
112
|
-
inner = Start %
|
119
|
+
outer = Start / Evaluator::InnerSizeAtCompileTime,
|
120
|
+
inner = Start % Evaluator::InnerSizeAtCompileTime
|
113
121
|
};
|
114
122
|
|
115
|
-
typedef typename
|
123
|
+
typedef typename Evaluator::Scalar Scalar;
|
116
124
|
|
117
125
|
EIGEN_DEVICE_FUNC
|
118
|
-
static EIGEN_STRONG_INLINE Scalar run(const
|
126
|
+
static EIGEN_STRONG_INLINE Scalar run(const Evaluator &eval, const Func&)
|
119
127
|
{
|
120
|
-
return
|
128
|
+
return eval.coeffByOuterInner(outer, inner);
|
121
129
|
}
|
122
130
|
};
|
123
131
|
|
124
132
|
// This is actually dead code and will never be called. It is required
|
125
133
|
// to prevent false warnings regarding failed inlining though
|
126
134
|
// for 0 length run() will never be called at all.
|
127
|
-
template<typename Func, typename
|
128
|
-
struct redux_novec_unroller<Func,
|
135
|
+
template<typename Func, typename Evaluator, int Start>
|
136
|
+
struct redux_novec_unroller<Func, Evaluator, Start, 0>
|
129
137
|
{
|
130
|
-
typedef typename
|
138
|
+
typedef typename Evaluator::Scalar Scalar;
|
131
139
|
EIGEN_DEVICE_FUNC
|
132
|
-
static EIGEN_STRONG_INLINE Scalar run(const
|
140
|
+
static EIGEN_STRONG_INLINE Scalar run(const Evaluator&, const Func&) { return Scalar(); }
|
133
141
|
};
|
134
142
|
|
135
143
|
/*** vectorization ***/
|
136
144
|
|
137
|
-
template<typename Func, typename
|
145
|
+
template<typename Func, typename Evaluator, int Start, int Length>
|
138
146
|
struct redux_vec_unroller
|
139
147
|
{
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
};
|
144
|
-
|
145
|
-
typedef typename Derived::Scalar Scalar;
|
146
|
-
typedef typename redux_traits<Func, Derived>::PacketType PacketScalar;
|
147
|
-
|
148
|
-
static EIGEN_STRONG_INLINE PacketScalar run(const Derived &mat, const Func& func)
|
148
|
+
template<typename PacketType>
|
149
|
+
EIGEN_DEVICE_FUNC
|
150
|
+
static EIGEN_STRONG_INLINE PacketType run(const Evaluator &eval, const Func& func)
|
149
151
|
{
|
152
|
+
enum {
|
153
|
+
PacketSize = unpacket_traits<PacketType>::size,
|
154
|
+
HalfLength = Length/2
|
155
|
+
};
|
156
|
+
|
150
157
|
return func.packetOp(
|
151
|
-
redux_vec_unroller<Func,
|
152
|
-
redux_vec_unroller<Func,
|
158
|
+
redux_vec_unroller<Func, Evaluator, Start, HalfLength>::template run<PacketType>(eval,func),
|
159
|
+
redux_vec_unroller<Func, Evaluator, Start+HalfLength, Length-HalfLength>::template run<PacketType>(eval,func) );
|
153
160
|
}
|
154
161
|
};
|
155
162
|
|
156
|
-
template<typename Func, typename
|
157
|
-
struct redux_vec_unroller<Func,
|
163
|
+
template<typename Func, typename Evaluator, int Start>
|
164
|
+
struct redux_vec_unroller<Func, Evaluator, Start, 1>
|
158
165
|
{
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
inner = index % int(Derived::InnerSizeAtCompileTime),
|
163
|
-
alignment = Derived::Alignment
|
164
|
-
};
|
165
|
-
|
166
|
-
typedef typename Derived::Scalar Scalar;
|
167
|
-
typedef typename redux_traits<Func, Derived>::PacketType PacketScalar;
|
168
|
-
|
169
|
-
static EIGEN_STRONG_INLINE PacketScalar run(const Derived &mat, const Func&)
|
166
|
+
template<typename PacketType>
|
167
|
+
EIGEN_DEVICE_FUNC
|
168
|
+
static EIGEN_STRONG_INLINE PacketType run(const Evaluator &eval, const Func&)
|
170
169
|
{
|
171
|
-
|
170
|
+
enum {
|
171
|
+
PacketSize = unpacket_traits<PacketType>::size,
|
172
|
+
index = Start * PacketSize,
|
173
|
+
outer = index / int(Evaluator::InnerSizeAtCompileTime),
|
174
|
+
inner = index % int(Evaluator::InnerSizeAtCompileTime),
|
175
|
+
alignment = Evaluator::Alignment
|
176
|
+
};
|
177
|
+
return eval.template packetByOuterInner<alignment,PacketType>(outer, inner);
|
172
178
|
}
|
173
179
|
};
|
174
180
|
|
@@ -176,53 +182,65 @@ struct redux_vec_unroller<Func, Derived, Start, 1>
|
|
176
182
|
* Part 3 : implementation of all cases
|
177
183
|
***************************************************************************/
|
178
184
|
|
179
|
-
template<typename Func, typename
|
180
|
-
int Traversal = redux_traits<Func,
|
181
|
-
int Unrolling = redux_traits<Func,
|
185
|
+
template<typename Func, typename Evaluator,
|
186
|
+
int Traversal = redux_traits<Func, Evaluator>::Traversal,
|
187
|
+
int Unrolling = redux_traits<Func, Evaluator>::Unrolling
|
182
188
|
>
|
183
189
|
struct redux_impl;
|
184
190
|
|
185
|
-
template<typename Func, typename
|
186
|
-
struct redux_impl<Func,
|
191
|
+
template<typename Func, typename Evaluator>
|
192
|
+
struct redux_impl<Func, Evaluator, DefaultTraversal, NoUnrolling>
|
187
193
|
{
|
188
|
-
typedef typename
|
189
|
-
|
190
|
-
|
194
|
+
typedef typename Evaluator::Scalar Scalar;
|
195
|
+
|
196
|
+
template<typename XprType>
|
197
|
+
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE
|
198
|
+
Scalar run(const Evaluator &eval, const Func& func, const XprType& xpr)
|
191
199
|
{
|
192
|
-
eigen_assert(
|
200
|
+
eigen_assert(xpr.rows()>0 && xpr.cols()>0 && "you are using an empty matrix");
|
193
201
|
Scalar res;
|
194
|
-
res =
|
195
|
-
for(Index i = 1; i <
|
196
|
-
res = func(res,
|
197
|
-
for(Index i = 1; i <
|
198
|
-
for(Index j = 0; j <
|
199
|
-
res = func(res,
|
202
|
+
res = eval.coeffByOuterInner(0, 0);
|
203
|
+
for(Index i = 1; i < xpr.innerSize(); ++i)
|
204
|
+
res = func(res, eval.coeffByOuterInner(0, i));
|
205
|
+
for(Index i = 1; i < xpr.outerSize(); ++i)
|
206
|
+
for(Index j = 0; j < xpr.innerSize(); ++j)
|
207
|
+
res = func(res, eval.coeffByOuterInner(i, j));
|
200
208
|
return res;
|
201
209
|
}
|
202
210
|
};
|
203
211
|
|
204
|
-
template<typename Func, typename
|
205
|
-
struct redux_impl<Func,
|
206
|
-
:
|
207
|
-
{
|
212
|
+
template<typename Func, typename Evaluator>
|
213
|
+
struct redux_impl<Func,Evaluator, DefaultTraversal, CompleteUnrolling>
|
214
|
+
: redux_novec_unroller<Func,Evaluator, 0, Evaluator::SizeAtCompileTime>
|
215
|
+
{
|
216
|
+
typedef redux_novec_unroller<Func,Evaluator, 0, Evaluator::SizeAtCompileTime> Base;
|
217
|
+
typedef typename Evaluator::Scalar Scalar;
|
218
|
+
template<typename XprType>
|
219
|
+
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE
|
220
|
+
Scalar run(const Evaluator &eval, const Func& func, const XprType& /*xpr*/)
|
221
|
+
{
|
222
|
+
return Base::run(eval,func);
|
223
|
+
}
|
224
|
+
};
|
208
225
|
|
209
|
-
template<typename Func, typename
|
210
|
-
struct redux_impl<Func,
|
226
|
+
template<typename Func, typename Evaluator>
|
227
|
+
struct redux_impl<Func, Evaluator, LinearVectorizedTraversal, NoUnrolling>
|
211
228
|
{
|
212
|
-
typedef typename
|
213
|
-
typedef typename redux_traits<Func,
|
229
|
+
typedef typename Evaluator::Scalar Scalar;
|
230
|
+
typedef typename redux_traits<Func, Evaluator>::PacketType PacketScalar;
|
214
231
|
|
215
|
-
|
232
|
+
template<typename XprType>
|
233
|
+
static Scalar run(const Evaluator &eval, const Func& func, const XprType& xpr)
|
216
234
|
{
|
217
|
-
const Index size =
|
235
|
+
const Index size = xpr.size();
|
218
236
|
|
219
|
-
const Index packetSize = redux_traits<Func,
|
237
|
+
const Index packetSize = redux_traits<Func, Evaluator>::PacketSize;
|
220
238
|
const int packetAlignment = unpacket_traits<PacketScalar>::alignment;
|
221
239
|
enum {
|
222
|
-
alignment0 = (bool(
|
223
|
-
alignment = EIGEN_PLAIN_ENUM_MAX(alignment0,
|
240
|
+
alignment0 = (bool(Evaluator::Flags & DirectAccessBit) && bool(packet_traits<Scalar>::AlignedOnScalar)) ? int(packetAlignment) : int(Unaligned),
|
241
|
+
alignment = EIGEN_PLAIN_ENUM_MAX(alignment0, Evaluator::Alignment)
|
224
242
|
};
|
225
|
-
const Index alignedStart = internal::first_default_aligned(
|
243
|
+
const Index alignedStart = internal::first_default_aligned(xpr);
|
226
244
|
const Index alignedSize2 = ((size-alignedStart)/(2*packetSize))*(2*packetSize);
|
227
245
|
const Index alignedSize = ((size-alignedStart)/(packetSize))*(packetSize);
|
228
246
|
const Index alignedEnd2 = alignedStart + alignedSize2;
|
@@ -230,34 +248,34 @@ struct redux_impl<Func, Derived, LinearVectorizedTraversal, NoUnrolling>
|
|
230
248
|
Scalar res;
|
231
249
|
if(alignedSize)
|
232
250
|
{
|
233
|
-
PacketScalar packet_res0 =
|
251
|
+
PacketScalar packet_res0 = eval.template packet<alignment,PacketScalar>(alignedStart);
|
234
252
|
if(alignedSize>packetSize) // we have at least two packets to partly unroll the loop
|
235
253
|
{
|
236
|
-
PacketScalar packet_res1 =
|
254
|
+
PacketScalar packet_res1 = eval.template packet<alignment,PacketScalar>(alignedStart+packetSize);
|
237
255
|
for(Index index = alignedStart + 2*packetSize; index < alignedEnd2; index += 2*packetSize)
|
238
256
|
{
|
239
|
-
packet_res0 = func.packetOp(packet_res0,
|
240
|
-
packet_res1 = func.packetOp(packet_res1,
|
257
|
+
packet_res0 = func.packetOp(packet_res0, eval.template packet<alignment,PacketScalar>(index));
|
258
|
+
packet_res1 = func.packetOp(packet_res1, eval.template packet<alignment,PacketScalar>(index+packetSize));
|
241
259
|
}
|
242
260
|
|
243
261
|
packet_res0 = func.packetOp(packet_res0,packet_res1);
|
244
262
|
if(alignedEnd>alignedEnd2)
|
245
|
-
packet_res0 = func.packetOp(packet_res0,
|
263
|
+
packet_res0 = func.packetOp(packet_res0, eval.template packet<alignment,PacketScalar>(alignedEnd2));
|
246
264
|
}
|
247
265
|
res = func.predux(packet_res0);
|
248
266
|
|
249
267
|
for(Index index = 0; index < alignedStart; ++index)
|
250
|
-
res = func(res,
|
268
|
+
res = func(res,eval.coeff(index));
|
251
269
|
|
252
270
|
for(Index index = alignedEnd; index < size; ++index)
|
253
|
-
res = func(res,
|
271
|
+
res = func(res,eval.coeff(index));
|
254
272
|
}
|
255
273
|
else // too small to vectorize anything.
|
256
274
|
// since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize.
|
257
275
|
{
|
258
|
-
res =
|
276
|
+
res = eval.coeff(0);
|
259
277
|
for(Index index = 1; index < size; ++index)
|
260
|
-
res = func(res,
|
278
|
+
res = func(res,eval.coeff(index));
|
261
279
|
}
|
262
280
|
|
263
281
|
return res;
|
@@ -265,130 +283,108 @@ struct redux_impl<Func, Derived, LinearVectorizedTraversal, NoUnrolling>
|
|
265
283
|
};
|
266
284
|
|
267
285
|
// NOTE: for SliceVectorizedTraversal we simply bypass unrolling
|
268
|
-
template<typename Func, typename
|
269
|
-
struct redux_impl<Func,
|
286
|
+
template<typename Func, typename Evaluator, int Unrolling>
|
287
|
+
struct redux_impl<Func, Evaluator, SliceVectorizedTraversal, Unrolling>
|
270
288
|
{
|
271
|
-
typedef typename
|
272
|
-
typedef typename redux_traits<Func,
|
289
|
+
typedef typename Evaluator::Scalar Scalar;
|
290
|
+
typedef typename redux_traits<Func, Evaluator>::PacketType PacketType;
|
273
291
|
|
274
|
-
|
292
|
+
template<typename XprType>
|
293
|
+
EIGEN_DEVICE_FUNC static Scalar run(const Evaluator &eval, const Func& func, const XprType& xpr)
|
275
294
|
{
|
276
|
-
eigen_assert(
|
277
|
-
const Index innerSize =
|
278
|
-
const Index outerSize =
|
295
|
+
eigen_assert(xpr.rows()>0 && xpr.cols()>0 && "you are using an empty matrix");
|
296
|
+
const Index innerSize = xpr.innerSize();
|
297
|
+
const Index outerSize = xpr.outerSize();
|
279
298
|
enum {
|
280
|
-
packetSize = redux_traits<Func,
|
299
|
+
packetSize = redux_traits<Func, Evaluator>::PacketSize
|
281
300
|
};
|
282
301
|
const Index packetedInnerSize = ((innerSize)/packetSize)*packetSize;
|
283
302
|
Scalar res;
|
284
303
|
if(packetedInnerSize)
|
285
304
|
{
|
286
|
-
PacketType packet_res =
|
305
|
+
PacketType packet_res = eval.template packet<Unaligned,PacketType>(0,0);
|
287
306
|
for(Index j=0; j<outerSize; ++j)
|
288
307
|
for(Index i=(j==0?packetSize:0); i<packetedInnerSize; i+=Index(packetSize))
|
289
|
-
packet_res = func.packetOp(packet_res,
|
308
|
+
packet_res = func.packetOp(packet_res, eval.template packetByOuterInner<Unaligned,PacketType>(j,i));
|
290
309
|
|
291
310
|
res = func.predux(packet_res);
|
292
311
|
for(Index j=0; j<outerSize; ++j)
|
293
312
|
for(Index i=packetedInnerSize; i<innerSize; ++i)
|
294
|
-
res = func(res,
|
313
|
+
res = func(res, eval.coeffByOuterInner(j,i));
|
295
314
|
}
|
296
315
|
else // too small to vectorize anything.
|
297
316
|
// since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize.
|
298
317
|
{
|
299
|
-
res = redux_impl<Func,
|
318
|
+
res = redux_impl<Func, Evaluator, DefaultTraversal, NoUnrolling>::run(eval, func, xpr);
|
300
319
|
}
|
301
320
|
|
302
321
|
return res;
|
303
322
|
}
|
304
323
|
};
|
305
324
|
|
306
|
-
template<typename Func, typename
|
307
|
-
struct redux_impl<Func,
|
325
|
+
template<typename Func, typename Evaluator>
|
326
|
+
struct redux_impl<Func, Evaluator, LinearVectorizedTraversal, CompleteUnrolling>
|
308
327
|
{
|
309
|
-
typedef typename
|
328
|
+
typedef typename Evaluator::Scalar Scalar;
|
310
329
|
|
311
|
-
typedef typename redux_traits<Func,
|
330
|
+
typedef typename redux_traits<Func, Evaluator>::PacketType PacketType;
|
312
331
|
enum {
|
313
|
-
PacketSize = redux_traits<Func,
|
314
|
-
Size =
|
315
|
-
VectorizedSize = (Size / PacketSize) * PacketSize
|
332
|
+
PacketSize = redux_traits<Func, Evaluator>::PacketSize,
|
333
|
+
Size = Evaluator::SizeAtCompileTime,
|
334
|
+
VectorizedSize = (int(Size) / int(PacketSize)) * int(PacketSize)
|
316
335
|
};
|
317
|
-
|
336
|
+
|
337
|
+
template<typename XprType>
|
338
|
+
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE
|
339
|
+
Scalar run(const Evaluator &eval, const Func& func, const XprType &xpr)
|
318
340
|
{
|
319
|
-
|
341
|
+
EIGEN_ONLY_USED_FOR_DEBUG(xpr)
|
342
|
+
eigen_assert(xpr.rows()>0 && xpr.cols()>0 && "you are using an empty matrix");
|
320
343
|
if (VectorizedSize > 0) {
|
321
|
-
Scalar res = func.predux(redux_vec_unroller<Func,
|
344
|
+
Scalar res = func.predux(redux_vec_unroller<Func, Evaluator, 0, Size / PacketSize>::template run<PacketType>(eval,func));
|
322
345
|
if (VectorizedSize != Size)
|
323
|
-
res = func(res,redux_novec_unroller<Func,
|
346
|
+
res = func(res,redux_novec_unroller<Func, Evaluator, VectorizedSize, Size-VectorizedSize>::run(eval,func));
|
324
347
|
return res;
|
325
348
|
}
|
326
349
|
else {
|
327
|
-
return redux_novec_unroller<Func,
|
350
|
+
return redux_novec_unroller<Func, Evaluator, 0, Size>::run(eval,func);
|
328
351
|
}
|
329
352
|
}
|
330
353
|
};
|
331
354
|
|
332
355
|
// evaluator adaptor
|
333
356
|
template<typename _XprType>
|
334
|
-
class redux_evaluator
|
357
|
+
class redux_evaluator : public internal::evaluator<_XprType>
|
335
358
|
{
|
359
|
+
typedef internal::evaluator<_XprType> Base;
|
336
360
|
public:
|
337
361
|
typedef _XprType XprType;
|
338
|
-
EIGEN_DEVICE_FUNC
|
362
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
363
|
+
explicit redux_evaluator(const XprType &xpr) : Base(xpr) {}
|
339
364
|
|
340
365
|
typedef typename XprType::Scalar Scalar;
|
341
366
|
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
342
367
|
typedef typename XprType::PacketScalar PacketScalar;
|
343
|
-
typedef typename XprType::PacketReturnType PacketReturnType;
|
344
368
|
|
345
369
|
enum {
|
346
370
|
MaxRowsAtCompileTime = XprType::MaxRowsAtCompileTime,
|
347
371
|
MaxColsAtCompileTime = XprType::MaxColsAtCompileTime,
|
348
372
|
// TODO we should not remove DirectAccessBit and rather find an elegant way to query the alignment offset at runtime from the evaluator
|
349
|
-
Flags =
|
373
|
+
Flags = Base::Flags & ~DirectAccessBit,
|
350
374
|
IsRowMajor = XprType::IsRowMajor,
|
351
375
|
SizeAtCompileTime = XprType::SizeAtCompileTime,
|
352
|
-
InnerSizeAtCompileTime = XprType::InnerSizeAtCompileTime
|
353
|
-
CoeffReadCost = evaluator<XprType>::CoeffReadCost,
|
354
|
-
Alignment = evaluator<XprType>::Alignment
|
376
|
+
InnerSizeAtCompileTime = XprType::InnerSizeAtCompileTime
|
355
377
|
};
|
356
378
|
|
357
|
-
EIGEN_DEVICE_FUNC
|
358
|
-
EIGEN_DEVICE_FUNC Index cols() const { return m_xpr.cols(); }
|
359
|
-
EIGEN_DEVICE_FUNC Index size() const { return m_xpr.size(); }
|
360
|
-
EIGEN_DEVICE_FUNC Index innerSize() const { return m_xpr.innerSize(); }
|
361
|
-
EIGEN_DEVICE_FUNC Index outerSize() const { return m_xpr.outerSize(); }
|
362
|
-
|
363
|
-
EIGEN_DEVICE_FUNC
|
364
|
-
CoeffReturnType coeff(Index row, Index col) const
|
365
|
-
{ return m_evaluator.coeff(row, col); }
|
366
|
-
|
367
|
-
EIGEN_DEVICE_FUNC
|
368
|
-
CoeffReturnType coeff(Index index) const
|
369
|
-
{ return m_evaluator.coeff(index); }
|
370
|
-
|
371
|
-
template<int LoadMode, typename PacketType>
|
372
|
-
PacketType packet(Index row, Index col) const
|
373
|
-
{ return m_evaluator.template packet<LoadMode,PacketType>(row, col); }
|
374
|
-
|
375
|
-
template<int LoadMode, typename PacketType>
|
376
|
-
PacketType packet(Index index) const
|
377
|
-
{ return m_evaluator.template packet<LoadMode,PacketType>(index); }
|
378
|
-
|
379
|
-
EIGEN_DEVICE_FUNC
|
379
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
380
380
|
CoeffReturnType coeffByOuterInner(Index outer, Index inner) const
|
381
|
-
{ return
|
381
|
+
{ return Base::coeff(IsRowMajor ? outer : inner, IsRowMajor ? inner : outer); }
|
382
382
|
|
383
383
|
template<int LoadMode, typename PacketType>
|
384
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
384
385
|
PacketType packetByOuterInner(Index outer, Index inner) const
|
385
|
-
{ return
|
386
|
+
{ return Base::template packet<LoadMode,PacketType>(IsRowMajor ? outer : inner, IsRowMajor ? inner : outer); }
|
386
387
|
|
387
|
-
const XprType & nestedExpression() const { return m_xpr; }
|
388
|
-
|
389
|
-
protected:
|
390
|
-
internal::evaluator<XprType> m_evaluator;
|
391
|
-
const XprType &m_xpr;
|
392
388
|
};
|
393
389
|
|
394
390
|
} // end namespace internal
|
@@ -403,39 +399,53 @@ protected:
|
|
403
399
|
* The template parameter \a BinaryOp is the type of the functor \a func which must be
|
404
400
|
* an associative operator. Both current C++98 and C++11 functor styles are handled.
|
405
401
|
*
|
402
|
+
* \warning the matrix must be not empty, otherwise an assertion is triggered.
|
403
|
+
*
|
406
404
|
* \sa DenseBase::sum(), DenseBase::minCoeff(), DenseBase::maxCoeff(), MatrixBase::colwise(), MatrixBase::rowwise()
|
407
405
|
*/
|
408
406
|
template<typename Derived>
|
409
407
|
template<typename Func>
|
410
|
-
EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
|
408
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
|
411
409
|
DenseBase<Derived>::redux(const Func& func) const
|
412
410
|
{
|
413
411
|
eigen_assert(this->rows()>0 && this->cols()>0 && "you are using an empty matrix");
|
414
412
|
|
415
413
|
typedef typename internal::redux_evaluator<Derived> ThisEvaluator;
|
416
414
|
ThisEvaluator thisEval(derived());
|
417
|
-
|
418
|
-
|
415
|
+
|
416
|
+
// The initial expression is passed to the reducer as an additional argument instead of
|
417
|
+
// passing it as a member of redux_evaluator to help
|
418
|
+
return internal::redux_impl<Func, ThisEvaluator>::run(thisEval, func, derived());
|
419
419
|
}
|
420
420
|
|
421
421
|
/** \returns the minimum of all coefficients of \c *this.
|
422
|
-
*
|
422
|
+
* In case \c *this contains NaN, NaNPropagation determines the behavior:
|
423
|
+
* NaNPropagation == PropagateFast : undefined
|
424
|
+
* NaNPropagation == PropagateNaN : result is NaN
|
425
|
+
* NaNPropagation == PropagateNumbers : result is minimum of elements that are not NaN
|
426
|
+
* \warning the matrix must be not empty, otherwise an assertion is triggered.
|
423
427
|
*/
|
424
428
|
template<typename Derived>
|
425
|
-
|
429
|
+
template<int NaNPropagation>
|
430
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
|
426
431
|
DenseBase<Derived>::minCoeff() const
|
427
432
|
{
|
428
|
-
return derived().redux(Eigen::internal::scalar_min_op<Scalar,Scalar>());
|
433
|
+
return derived().redux(Eigen::internal::scalar_min_op<Scalar,Scalar, NaNPropagation>());
|
429
434
|
}
|
430
435
|
|
431
|
-
/** \returns the maximum of all coefficients of \c *this.
|
432
|
-
*
|
436
|
+
/** \returns the maximum of all coefficients of \c *this.
|
437
|
+
* In case \c *this contains NaN, NaNPropagation determines the behavior:
|
438
|
+
* NaNPropagation == PropagateFast : undefined
|
439
|
+
* NaNPropagation == PropagateNaN : result is NaN
|
440
|
+
* NaNPropagation == PropagateNumbers : result is maximum of elements that are not NaN
|
441
|
+
* \warning the matrix must be not empty, otherwise an assertion is triggered.
|
433
442
|
*/
|
434
443
|
template<typename Derived>
|
435
|
-
|
444
|
+
template<int NaNPropagation>
|
445
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
|
436
446
|
DenseBase<Derived>::maxCoeff() const
|
437
447
|
{
|
438
|
-
return derived().redux(Eigen::internal::scalar_max_op<Scalar,Scalar>());
|
448
|
+
return derived().redux(Eigen::internal::scalar_max_op<Scalar,Scalar, NaNPropagation>());
|
439
449
|
}
|
440
450
|
|
441
451
|
/** \returns the sum of all coefficients of \c *this
|
@@ -445,7 +455,7 @@ DenseBase<Derived>::maxCoeff() const
|
|
445
455
|
* \sa trace(), prod(), mean()
|
446
456
|
*/
|
447
457
|
template<typename Derived>
|
448
|
-
EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
|
458
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
|
449
459
|
DenseBase<Derived>::sum() const
|
450
460
|
{
|
451
461
|
if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0))
|
@@ -458,7 +468,7 @@ DenseBase<Derived>::sum() const
|
|
458
468
|
* \sa trace(), prod(), sum()
|
459
469
|
*/
|
460
470
|
template<typename Derived>
|
461
|
-
EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
|
471
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
|
462
472
|
DenseBase<Derived>::mean() const
|
463
473
|
{
|
464
474
|
#ifdef __INTEL_COMPILER
|
@@ -479,7 +489,7 @@ DenseBase<Derived>::mean() const
|
|
479
489
|
* \sa sum(), mean(), trace()
|
480
490
|
*/
|
481
491
|
template<typename Derived>
|
482
|
-
EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
|
492
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
|
483
493
|
DenseBase<Derived>::prod() const
|
484
494
|
{
|
485
495
|
if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0))
|
@@ -494,7 +504,7 @@ DenseBase<Derived>::prod() const
|
|
494
504
|
* \sa diagonal(), sum()
|
495
505
|
*/
|
496
506
|
template<typename Derived>
|
497
|
-
EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
|
507
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
|
498
508
|
MatrixBase<Derived>::trace() const
|
499
509
|
{
|
500
510
|
return derived().diagonal().sum();
|