tomoto 0.2.2 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -0
- data/README.md +8 -10
- data/ext/tomoto/ct.cpp +11 -11
- data/ext/tomoto/dmr.cpp +14 -13
- data/ext/tomoto/dt.cpp +14 -14
- data/ext/tomoto/extconf.rb +7 -5
- data/ext/tomoto/gdmr.cpp +7 -7
- data/ext/tomoto/hdp.cpp +9 -9
- data/ext/tomoto/hlda.cpp +13 -13
- data/ext/tomoto/hpa.cpp +5 -5
- data/ext/tomoto/lda.cpp +42 -39
- data/ext/tomoto/llda.cpp +6 -6
- data/ext/tomoto/mglda.cpp +15 -15
- data/ext/tomoto/pa.cpp +6 -6
- data/ext/tomoto/plda.cpp +6 -6
- data/ext/tomoto/slda.cpp +8 -8
- data/ext/tomoto/{ext.cpp → tomoto.cpp} +8 -8
- data/ext/tomoto/utils.h +16 -70
- data/lib/tomoto/version.rb +1 -1
- data/lib/tomoto.rb +5 -1
- data/vendor/EigenRand/EigenRand/Core.h +10 -10
- data/vendor/EigenRand/EigenRand/Dists/Basic.h +208 -9
- data/vendor/EigenRand/EigenRand/Dists/Discrete.h +52 -31
- data/vendor/EigenRand/EigenRand/Dists/GammaPoisson.h +9 -8
- data/vendor/EigenRand/EigenRand/Dists/NormalExp.h +28 -21
- data/vendor/EigenRand/EigenRand/EigenRand +11 -6
- data/vendor/EigenRand/EigenRand/Macro.h +13 -7
- data/vendor/EigenRand/EigenRand/MorePacketMath.h +348 -740
- data/vendor/EigenRand/EigenRand/MvDists/Multinomial.h +5 -3
- data/vendor/EigenRand/EigenRand/MvDists/MvNormal.h +9 -3
- data/vendor/EigenRand/EigenRand/PacketFilter.h +11 -253
- data/vendor/EigenRand/EigenRand/PacketRandomEngine.h +21 -47
- data/vendor/EigenRand/EigenRand/RandUtils.h +50 -344
- data/vendor/EigenRand/EigenRand/arch/AVX/MorePacketMath.h +619 -0
- data/vendor/EigenRand/EigenRand/arch/AVX/PacketFilter.h +149 -0
- data/vendor/EigenRand/EigenRand/arch/AVX/RandUtils.h +228 -0
- data/vendor/EigenRand/EigenRand/arch/NEON/MorePacketMath.h +473 -0
- data/vendor/EigenRand/EigenRand/arch/NEON/PacketFilter.h +142 -0
- data/vendor/EigenRand/EigenRand/arch/NEON/RandUtils.h +126 -0
- data/vendor/EigenRand/EigenRand/arch/SSE/MorePacketMath.h +501 -0
- data/vendor/EigenRand/EigenRand/arch/SSE/PacketFilter.h +133 -0
- data/vendor/EigenRand/EigenRand/arch/SSE/RandUtils.h +120 -0
- data/vendor/EigenRand/EigenRand/doc.h +24 -12
- data/vendor/EigenRand/README.md +57 -4
- data/vendor/eigen/COPYING.APACHE +203 -0
- data/vendor/eigen/COPYING.BSD +1 -1
- data/vendor/eigen/COPYING.MINPACK +51 -52
- data/vendor/eigen/Eigen/Cholesky +0 -1
- data/vendor/eigen/Eigen/Core +112 -265
- data/vendor/eigen/Eigen/Eigenvalues +2 -3
- data/vendor/eigen/Eigen/Geometry +5 -8
- data/vendor/eigen/Eigen/Householder +0 -1
- data/vendor/eigen/Eigen/Jacobi +0 -1
- data/vendor/eigen/Eigen/KLUSupport +41 -0
- data/vendor/eigen/Eigen/LU +2 -5
- data/vendor/eigen/Eigen/OrderingMethods +0 -3
- data/vendor/eigen/Eigen/PaStiXSupport +1 -0
- data/vendor/eigen/Eigen/PardisoSupport +0 -0
- data/vendor/eigen/Eigen/QR +2 -3
- data/vendor/eigen/Eigen/QtAlignedMalloc +0 -1
- data/vendor/eigen/Eigen/SVD +0 -1
- data/vendor/eigen/Eigen/Sparse +0 -2
- data/vendor/eigen/Eigen/SparseCholesky +0 -8
- data/vendor/eigen/Eigen/SparseLU +4 -0
- data/vendor/eigen/Eigen/SparseQR +0 -1
- data/vendor/eigen/Eigen/src/Cholesky/LDLT.h +42 -27
- data/vendor/eigen/Eigen/src/Cholesky/LLT.h +39 -23
- data/vendor/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +90 -47
- data/vendor/eigen/Eigen/src/Core/ArithmeticSequence.h +413 -0
- data/vendor/eigen/Eigen/src/Core/Array.h +99 -11
- data/vendor/eigen/Eigen/src/Core/ArrayBase.h +3 -3
- data/vendor/eigen/Eigen/src/Core/ArrayWrapper.h +21 -21
- data/vendor/eigen/Eigen/src/Core/Assign.h +1 -1
- data/vendor/eigen/Eigen/src/Core/AssignEvaluator.h +125 -50
- data/vendor/eigen/Eigen/src/Core/Assign_MKL.h +10 -10
- data/vendor/eigen/Eigen/src/Core/BandMatrix.h +16 -16
- data/vendor/eigen/Eigen/src/Core/Block.h +56 -60
- data/vendor/eigen/Eigen/src/Core/BooleanRedux.h +29 -31
- data/vendor/eigen/Eigen/src/Core/CommaInitializer.h +7 -3
- data/vendor/eigen/Eigen/src/Core/CoreEvaluators.h +325 -272
- data/vendor/eigen/Eigen/src/Core/CoreIterators.h +5 -0
- data/vendor/eigen/Eigen/src/Core/CwiseBinaryOp.h +21 -22
- data/vendor/eigen/Eigen/src/Core/CwiseNullaryOp.h +153 -18
- data/vendor/eigen/Eigen/src/Core/CwiseUnaryOp.h +6 -6
- data/vendor/eigen/Eigen/src/Core/CwiseUnaryView.h +14 -10
- data/vendor/eigen/Eigen/src/Core/DenseBase.h +132 -42
- data/vendor/eigen/Eigen/src/Core/DenseCoeffsBase.h +25 -21
- data/vendor/eigen/Eigen/src/Core/DenseStorage.h +153 -71
- data/vendor/eigen/Eigen/src/Core/Diagonal.h +21 -23
- data/vendor/eigen/Eigen/src/Core/DiagonalMatrix.h +50 -2
- data/vendor/eigen/Eigen/src/Core/DiagonalProduct.h +1 -1
- data/vendor/eigen/Eigen/src/Core/Dot.h +10 -10
- data/vendor/eigen/Eigen/src/Core/EigenBase.h +10 -9
- data/vendor/eigen/Eigen/src/Core/ForceAlignedAccess.h +8 -4
- data/vendor/eigen/Eigen/src/Core/Fuzzy.h +3 -3
- data/vendor/eigen/Eigen/src/Core/GeneralProduct.h +20 -10
- data/vendor/eigen/Eigen/src/Core/GenericPacketMath.h +599 -152
- data/vendor/eigen/Eigen/src/Core/GlobalFunctions.h +40 -33
- data/vendor/eigen/Eigen/src/Core/IO.h +40 -7
- data/vendor/eigen/Eigen/src/Core/IndexedView.h +237 -0
- data/vendor/eigen/Eigen/src/Core/Inverse.h +9 -10
- data/vendor/eigen/Eigen/src/Core/Map.h +7 -7
- data/vendor/eigen/Eigen/src/Core/MapBase.h +10 -3
- data/vendor/eigen/Eigen/src/Core/MathFunctions.h +767 -125
- data/vendor/eigen/Eigen/src/Core/MathFunctionsImpl.h +118 -19
- data/vendor/eigen/Eigen/src/Core/Matrix.h +131 -25
- data/vendor/eigen/Eigen/src/Core/MatrixBase.h +21 -3
- data/vendor/eigen/Eigen/src/Core/NestByValue.h +25 -50
- data/vendor/eigen/Eigen/src/Core/NoAlias.h +4 -3
- data/vendor/eigen/Eigen/src/Core/NumTraits.h +107 -20
- data/vendor/eigen/Eigen/src/Core/PartialReduxEvaluator.h +232 -0
- data/vendor/eigen/Eigen/src/Core/PermutationMatrix.h +3 -31
- data/vendor/eigen/Eigen/src/Core/PlainObjectBase.h +152 -59
- data/vendor/eigen/Eigen/src/Core/Product.h +30 -25
- data/vendor/eigen/Eigen/src/Core/ProductEvaluators.h +192 -125
- data/vendor/eigen/Eigen/src/Core/Random.h +37 -1
- data/vendor/eigen/Eigen/src/Core/Redux.h +180 -170
- data/vendor/eigen/Eigen/src/Core/Ref.h +121 -23
- data/vendor/eigen/Eigen/src/Core/Replicate.h +8 -8
- data/vendor/eigen/Eigen/src/Core/Reshaped.h +454 -0
- data/vendor/eigen/Eigen/src/Core/ReturnByValue.h +7 -5
- data/vendor/eigen/Eigen/src/Core/Reverse.h +18 -12
- data/vendor/eigen/Eigen/src/Core/Select.h +8 -6
- data/vendor/eigen/Eigen/src/Core/SelfAdjointView.h +33 -20
- data/vendor/eigen/Eigen/src/Core/Solve.h +14 -14
- data/vendor/eigen/Eigen/src/Core/SolveTriangular.h +16 -16
- data/vendor/eigen/Eigen/src/Core/SolverBase.h +41 -3
- data/vendor/eigen/Eigen/src/Core/StableNorm.h +100 -70
- data/vendor/eigen/Eigen/src/Core/StlIterators.h +463 -0
- data/vendor/eigen/Eigen/src/Core/Stride.h +9 -4
- data/vendor/eigen/Eigen/src/Core/Swap.h +5 -4
- data/vendor/eigen/Eigen/src/Core/Transpose.h +88 -27
- data/vendor/eigen/Eigen/src/Core/Transpositions.h +26 -47
- data/vendor/eigen/Eigen/src/Core/TriangularMatrix.h +93 -75
- data/vendor/eigen/Eigen/src/Core/VectorBlock.h +5 -5
- data/vendor/eigen/Eigen/src/Core/VectorwiseOp.h +159 -70
- data/vendor/eigen/Eigen/src/Core/Visitor.h +137 -29
- data/vendor/eigen/Eigen/src/Core/arch/AVX/Complex.h +50 -129
- data/vendor/eigen/Eigen/src/Core/arch/AVX/MathFunctions.h +126 -337
- data/vendor/eigen/Eigen/src/Core/arch/AVX/PacketMath.h +1092 -155
- data/vendor/eigen/Eigen/src/Core/arch/AVX/TypeCasting.h +65 -1
- data/vendor/eigen/Eigen/src/Core/arch/AVX512/Complex.h +422 -0
- data/vendor/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h +207 -236
- data/vendor/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h +1482 -495
- data/vendor/eigen/Eigen/src/Core/arch/AVX512/TypeCasting.h +89 -0
- data/vendor/eigen/Eigen/src/Core/arch/AltiVec/Complex.h +152 -165
- data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MathFunctions.h +19 -251
- data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MatrixProduct.h +2937 -0
- data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h +221 -0
- data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h +629 -0
- data/vendor/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h +2042 -392
- data/vendor/eigen/Eigen/src/Core/arch/CUDA/Complex.h +235 -80
- data/vendor/eigen/Eigen/src/Core/arch/Default/BFloat16.h +700 -0
- data/vendor/eigen/Eigen/src/Core/arch/Default/ConjHelper.h +102 -14
- data/vendor/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +1649 -0
- data/vendor/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +110 -0
- data/vendor/eigen/Eigen/src/Core/arch/Default/Half.h +942 -0
- data/vendor/eigen/Eigen/src/Core/arch/Default/Settings.h +1 -1
- data/vendor/eigen/Eigen/src/Core/arch/Default/TypeCasting.h +120 -0
- data/vendor/eigen/Eigen/src/Core/arch/{CUDA → GPU}/MathFunctions.h +16 -4
- data/vendor/eigen/Eigen/src/Core/arch/GPU/PacketMath.h +1685 -0
- data/vendor/eigen/Eigen/src/Core/arch/GPU/TypeCasting.h +80 -0
- data/vendor/eigen/Eigen/src/Core/arch/HIP/hcc/math_constants.h +23 -0
- data/vendor/eigen/Eigen/src/Core/arch/MSA/Complex.h +648 -0
- data/vendor/eigen/Eigen/src/Core/arch/MSA/MathFunctions.h +387 -0
- data/vendor/eigen/Eigen/src/Core/arch/MSA/PacketMath.h +1233 -0
- data/vendor/eigen/Eigen/src/Core/arch/NEON/Complex.h +313 -219
- data/vendor/eigen/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h +183 -0
- data/vendor/eigen/Eigen/src/Core/arch/NEON/MathFunctions.h +54 -70
- data/vendor/eigen/Eigen/src/Core/arch/NEON/PacketMath.h +4376 -549
- data/vendor/eigen/Eigen/src/Core/arch/NEON/TypeCasting.h +1419 -0
- data/vendor/eigen/Eigen/src/Core/arch/SSE/Complex.h +59 -179
- data/vendor/eigen/Eigen/src/Core/arch/SSE/MathFunctions.h +65 -428
- data/vendor/eigen/Eigen/src/Core/arch/SSE/PacketMath.h +893 -283
- data/vendor/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h +65 -0
- data/vendor/eigen/Eigen/src/Core/arch/SVE/MathFunctions.h +44 -0
- data/vendor/eigen/Eigen/src/Core/arch/SVE/PacketMath.h +752 -0
- data/vendor/eigen/Eigen/src/Core/arch/SVE/TypeCasting.h +49 -0
- data/vendor/eigen/Eigen/src/Core/arch/SYCL/InteropHeaders.h +232 -0
- data/vendor/eigen/Eigen/src/Core/arch/SYCL/MathFunctions.h +301 -0
- data/vendor/eigen/Eigen/src/Core/arch/SYCL/PacketMath.h +670 -0
- data/vendor/eigen/Eigen/src/Core/arch/SYCL/SyclMemoryModel.h +694 -0
- data/vendor/eigen/Eigen/src/Core/arch/SYCL/TypeCasting.h +85 -0
- data/vendor/eigen/Eigen/src/Core/arch/ZVector/Complex.h +212 -183
- data/vendor/eigen/Eigen/src/Core/arch/ZVector/MathFunctions.h +101 -5
- data/vendor/eigen/Eigen/src/Core/arch/ZVector/PacketMath.h +510 -395
- data/vendor/eigen/Eigen/src/Core/functors/AssignmentFunctors.h +11 -2
- data/vendor/eigen/Eigen/src/Core/functors/BinaryFunctors.h +112 -46
- data/vendor/eigen/Eigen/src/Core/functors/NullaryFunctors.h +31 -30
- data/vendor/eigen/Eigen/src/Core/functors/StlFunctors.h +32 -2
- data/vendor/eigen/Eigen/src/Core/functors/UnaryFunctors.h +355 -16
- data/vendor/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h +1075 -586
- data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h +49 -24
- data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +41 -35
- data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +6 -6
- data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h +4 -2
- data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixVector.h +382 -483
- data/vendor/eigen/Eigen/src/Core/products/Parallelizer.h +22 -5
- data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +53 -30
- data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h +16 -8
- data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h +8 -6
- data/vendor/eigen/Eigen/src/Core/products/SelfadjointProduct.h +4 -4
- data/vendor/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h +5 -4
- data/vendor/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h +33 -27
- data/vendor/eigen/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h +14 -12
- data/vendor/eigen/Eigen/src/Core/products/TriangularSolverMatrix.h +36 -34
- data/vendor/eigen/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h +8 -4
- data/vendor/eigen/Eigen/src/Core/products/TriangularSolverVector.h +13 -10
- data/vendor/eigen/Eigen/src/Core/util/BlasUtil.h +304 -119
- data/vendor/eigen/Eigen/src/Core/util/ConfigureVectorization.h +512 -0
- data/vendor/eigen/Eigen/src/Core/util/Constants.h +25 -9
- data/vendor/eigen/Eigen/src/Core/util/DisableStupidWarnings.h +26 -3
- data/vendor/eigen/Eigen/src/Core/util/ForwardDeclarations.h +29 -9
- data/vendor/eigen/Eigen/src/Core/util/IndexedViewHelper.h +186 -0
- data/vendor/eigen/Eigen/src/Core/util/IntegralConstant.h +272 -0
- data/vendor/eigen/Eigen/src/Core/util/MKL_support.h +8 -1
- data/vendor/eigen/Eigen/src/Core/util/Macros.h +709 -246
- data/vendor/eigen/Eigen/src/Core/util/Memory.h +222 -52
- data/vendor/eigen/Eigen/src/Core/util/Meta.h +355 -77
- data/vendor/eigen/Eigen/src/Core/util/ReenableStupidWarnings.h +5 -1
- data/vendor/eigen/Eigen/src/Core/util/ReshapedHelper.h +51 -0
- data/vendor/eigen/Eigen/src/Core/util/StaticAssert.h +8 -5
- data/vendor/eigen/Eigen/src/Core/util/SymbolicIndex.h +293 -0
- data/vendor/eigen/Eigen/src/Core/util/XprHelper.h +65 -30
- data/vendor/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +1 -1
- data/vendor/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +7 -4
- data/vendor/eigen/Eigen/src/Eigenvalues/EigenSolver.h +2 -2
- data/vendor/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +1 -1
- data/vendor/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +2 -2
- data/vendor/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +2 -2
- data/vendor/eigen/Eigen/src/Eigenvalues/RealQZ.h +9 -6
- data/vendor/eigen/Eigen/src/Eigenvalues/RealSchur.h +21 -9
- data/vendor/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +77 -43
- data/vendor/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +20 -15
- data/vendor/eigen/Eigen/src/Geometry/AlignedBox.h +99 -5
- data/vendor/eigen/Eigen/src/Geometry/AngleAxis.h +4 -4
- data/vendor/eigen/Eigen/src/Geometry/EulerAngles.h +3 -3
- data/vendor/eigen/Eigen/src/Geometry/Homogeneous.h +15 -11
- data/vendor/eigen/Eigen/src/Geometry/Hyperplane.h +1 -1
- data/vendor/eigen/Eigen/src/Geometry/OrthoMethods.h +3 -2
- data/vendor/eigen/Eigen/src/Geometry/ParametrizedLine.h +39 -2
- data/vendor/eigen/Eigen/src/Geometry/Quaternion.h +70 -14
- data/vendor/eigen/Eigen/src/Geometry/Rotation2D.h +3 -3
- data/vendor/eigen/Eigen/src/Geometry/Scaling.h +23 -5
- data/vendor/eigen/Eigen/src/Geometry/Transform.h +88 -67
- data/vendor/eigen/Eigen/src/Geometry/Translation.h +6 -12
- data/vendor/eigen/Eigen/src/Geometry/Umeyama.h +1 -1
- data/vendor/eigen/Eigen/src/Geometry/arch/Geometry_SIMD.h +168 -0
- data/vendor/eigen/Eigen/src/Householder/BlockHouseholder.h +9 -2
- data/vendor/eigen/Eigen/src/Householder/Householder.h +8 -4
- data/vendor/eigen/Eigen/src/Householder/HouseholderSequence.h +123 -48
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +15 -15
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +7 -23
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +5 -22
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +41 -47
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +51 -60
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +70 -20
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +2 -20
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +11 -9
- data/vendor/eigen/Eigen/src/Jacobi/Jacobi.h +31 -10
- data/vendor/eigen/Eigen/src/KLUSupport/KLUSupport.h +358 -0
- data/vendor/eigen/Eigen/src/LU/Determinant.h +35 -19
- data/vendor/eigen/Eigen/src/LU/FullPivLU.h +29 -43
- data/vendor/eigen/Eigen/src/LU/InverseImpl.h +25 -8
- data/vendor/eigen/Eigen/src/LU/PartialPivLU.h +71 -58
- data/vendor/eigen/Eigen/src/LU/arch/InverseSize4.h +351 -0
- data/vendor/eigen/Eigen/src/OrderingMethods/Amd.h +7 -17
- data/vendor/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +297 -277
- data/vendor/eigen/Eigen/src/OrderingMethods/Ordering.h +6 -10
- data/vendor/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +1 -1
- data/vendor/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +9 -7
- data/vendor/eigen/Eigen/src/QR/ColPivHouseholderQR.h +41 -20
- data/vendor/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +100 -27
- data/vendor/eigen/Eigen/src/QR/FullPivHouseholderQR.h +59 -22
- data/vendor/eigen/Eigen/src/QR/HouseholderQR.h +48 -23
- data/vendor/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +25 -3
- data/vendor/eigen/Eigen/src/SVD/BDCSVD.h +183 -63
- data/vendor/eigen/Eigen/src/SVD/JacobiSVD.h +22 -14
- data/vendor/eigen/Eigen/src/SVD/SVDBase.h +83 -22
- data/vendor/eigen/Eigen/src/SVD/UpperBidiagonalization.h +3 -3
- data/vendor/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +17 -9
- data/vendor/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +12 -37
- data/vendor/eigen/Eigen/src/SparseCore/AmbiVector.h +3 -2
- data/vendor/eigen/Eigen/src/SparseCore/CompressedStorage.h +16 -0
- data/vendor/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +6 -6
- data/vendor/eigen/Eigen/src/SparseCore/SparseAssign.h +81 -27
- data/vendor/eigen/Eigen/src/SparseCore/SparseBlock.h +25 -57
- data/vendor/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +40 -11
- data/vendor/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +11 -15
- data/vendor/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +4 -2
- data/vendor/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +30 -8
- data/vendor/eigen/Eigen/src/SparseCore/SparseMatrix.h +126 -11
- data/vendor/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +5 -12
- data/vendor/eigen/Eigen/src/SparseCore/SparseProduct.h +13 -1
- data/vendor/eigen/Eigen/src/SparseCore/SparseRef.h +7 -7
- data/vendor/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +5 -2
- data/vendor/eigen/Eigen/src/SparseCore/SparseUtil.h +8 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseVector.h +1 -1
- data/vendor/eigen/Eigen/src/SparseCore/SparseView.h +1 -0
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU.h +162 -12
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +1 -1
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +76 -2
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +2 -2
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +1 -1
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +1 -1
- data/vendor/eigen/Eigen/src/SparseQR/SparseQR.h +19 -6
- data/vendor/eigen/Eigen/src/StlSupport/StdDeque.h +2 -12
- data/vendor/eigen/Eigen/src/StlSupport/StdList.h +2 -2
- data/vendor/eigen/Eigen/src/StlSupport/StdVector.h +2 -2
- data/vendor/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +6 -8
- data/vendor/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +175 -39
- data/vendor/eigen/Eigen/src/misc/lapacke.h +5 -4
- data/vendor/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +28 -2
- data/vendor/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +155 -11
- data/vendor/eigen/Eigen/src/plugins/BlockMethods.h +626 -242
- data/vendor/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.h +14 -0
- data/vendor/eigen/Eigen/src/plugins/IndexedViewMethods.h +262 -0
- data/vendor/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +4 -4
- data/vendor/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +10 -0
- data/vendor/eigen/Eigen/src/plugins/ReshapedMethods.h +149 -0
- data/vendor/eigen/README.md +2 -0
- data/vendor/eigen/bench/btl/README +1 -1
- data/vendor/eigen/bench/tensors/README +6 -7
- data/vendor/eigen/ci/README.md +56 -0
- data/vendor/eigen/demos/mix_eigen_and_c/README +1 -1
- data/vendor/eigen/unsupported/Eigen/CXX11/src/Tensor/README.md +213 -158
- data/vendor/eigen/unsupported/README.txt +1 -1
- data/vendor/tomotopy/README.kr.rst +78 -0
- data/vendor/tomotopy/README.rst +75 -0
- data/vendor/tomotopy/src/Labeling/FoRelevance.cpp +2 -2
- data/vendor/tomotopy/src/Labeling/Phraser.hpp +4 -4
- data/vendor/tomotopy/src/TopicModel/CTModel.hpp +7 -3
- data/vendor/tomotopy/src/TopicModel/DMRModel.hpp +7 -3
- data/vendor/tomotopy/src/TopicModel/DTModel.hpp +6 -3
- data/vendor/tomotopy/src/TopicModel/GDMRModel.hpp +2 -2
- data/vendor/tomotopy/src/TopicModel/HDP.h +1 -0
- data/vendor/tomotopy/src/TopicModel/HDPModel.hpp +57 -6
- data/vendor/tomotopy/src/TopicModel/HLDAModel.hpp +6 -3
- data/vendor/tomotopy/src/TopicModel/HPAModel.hpp +3 -2
- data/vendor/tomotopy/src/TopicModel/LDA.h +3 -3
- data/vendor/tomotopy/src/TopicModel/LDACVB0Model.hpp +5 -5
- data/vendor/tomotopy/src/TopicModel/LDAModel.hpp +50 -19
- data/vendor/tomotopy/src/TopicModel/LLDAModel.hpp +6 -2
- data/vendor/tomotopy/src/TopicModel/MGLDAModel.hpp +3 -2
- data/vendor/tomotopy/src/TopicModel/PAModel.hpp +1 -1
- data/vendor/tomotopy/src/TopicModel/PLDAModel.hpp +6 -2
- data/vendor/tomotopy/src/TopicModel/PT.h +3 -1
- data/vendor/tomotopy/src/TopicModel/PTModel.hpp +36 -3
- data/vendor/tomotopy/src/TopicModel/SLDAModel.hpp +6 -3
- data/vendor/tomotopy/src/TopicModel/TopicModel.hpp +55 -26
- data/vendor/tomotopy/src/Utils/AliasMethod.hpp +5 -4
- data/vendor/tomotopy/src/Utils/Dictionary.h +2 -2
- data/vendor/tomotopy/src/Utils/EigenAddonOps.hpp +36 -1
- data/vendor/tomotopy/src/Utils/MultiNormalDistribution.hpp +1 -1
- data/vendor/tomotopy/src/Utils/TruncMultiNormal.hpp +1 -1
- data/vendor/tomotopy/src/Utils/exception.h +6 -0
- data/vendor/tomotopy/src/Utils/math.h +2 -2
- data/vendor/tomotopy/src/Utils/sample.hpp +14 -12
- data/vendor/tomotopy/src/Utils/serializer.hpp +30 -5
- data/vendor/tomotopy/src/Utils/sse_gamma.h +0 -3
- metadata +64 -18
- data/vendor/eigen/Eigen/CMakeLists.txt +0 -19
- data/vendor/eigen/Eigen/src/Core/arch/CUDA/Half.h +0 -674
- data/vendor/eigen/Eigen/src/Core/arch/CUDA/PacketMath.h +0 -333
- data/vendor/eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +0 -1124
- data/vendor/eigen/Eigen/src/Core/arch/CUDA/TypeCasting.h +0 -212
- data/vendor/eigen/Eigen/src/Geometry/arch/Geometry_SSE.h +0 -161
- data/vendor/eigen/Eigen/src/LU/arch/Inverse_SSE.h +0 -338
@@ -63,14 +63,28 @@ namespace Eigen {
|
|
63
63
|
|
64
64
|
namespace internal {
|
65
65
|
|
66
|
-
EIGEN_DEVICE_FUNC
|
66
|
+
EIGEN_DEVICE_FUNC
|
67
67
|
inline void throw_std_bad_alloc()
|
68
68
|
{
|
69
69
|
#ifdef EIGEN_EXCEPTIONS
|
70
70
|
throw std::bad_alloc();
|
71
71
|
#else
|
72
72
|
std::size_t huge = static_cast<std::size_t>(-1);
|
73
|
-
|
73
|
+
#if defined(EIGEN_HIPCC)
|
74
|
+
//
|
75
|
+
// calls to "::operator new" are to be treated as opaque function calls (i.e no inlining),
|
76
|
+
// and as a consequence the code in the #else block triggers the hipcc warning :
|
77
|
+
// "no overloaded function has restriction specifiers that are compatible with the ambient context"
|
78
|
+
//
|
79
|
+
// "throw_std_bad_alloc" has the EIGEN_DEVICE_FUNC attribute, so it seems that hipcc expects
|
80
|
+
// the same on "operator new"
|
81
|
+
// Reverting code back to the old version in this #if block for the hipcc compiler
|
82
|
+
//
|
83
|
+
new int[huge];
|
84
|
+
#else
|
85
|
+
void* unused = ::operator new(huge);
|
86
|
+
EIGEN_UNUSED_VARIABLE(unused);
|
87
|
+
#endif
|
74
88
|
#endif
|
75
89
|
}
|
76
90
|
|
@@ -83,19 +97,26 @@ inline void throw_std_bad_alloc()
|
|
83
97
|
/** \internal Like malloc, but the returned pointer is guaranteed to be 16-byte aligned.
|
84
98
|
* Fast, but wastes 16 additional bytes of memory. Does not throw any exception.
|
85
99
|
*/
|
86
|
-
inline void* handmade_aligned_malloc(std::size_t size)
|
100
|
+
EIGEN_DEVICE_FUNC inline void* handmade_aligned_malloc(std::size_t size, std::size_t alignment = EIGEN_DEFAULT_ALIGN_BYTES)
|
87
101
|
{
|
88
|
-
void
|
102
|
+
eigen_assert(alignment >= sizeof(void*) && (alignment & (alignment-1)) == 0 && "Alignment must be at least sizeof(void*) and a power of 2");
|
103
|
+
|
104
|
+
EIGEN_USING_STD(malloc)
|
105
|
+
void *original = malloc(size+alignment);
|
106
|
+
|
89
107
|
if (original == 0) return 0;
|
90
|
-
void *aligned = reinterpret_cast<void*>((reinterpret_cast<std::size_t>(original) & ~(std::size_t(
|
108
|
+
void *aligned = reinterpret_cast<void*>((reinterpret_cast<std::size_t>(original) & ~(std::size_t(alignment-1))) + alignment);
|
91
109
|
*(reinterpret_cast<void**>(aligned) - 1) = original;
|
92
110
|
return aligned;
|
93
111
|
}
|
94
112
|
|
95
113
|
/** \internal Frees memory allocated with handmade_aligned_malloc */
|
96
|
-
inline void handmade_aligned_free(void *ptr)
|
114
|
+
EIGEN_DEVICE_FUNC inline void handmade_aligned_free(void *ptr)
|
97
115
|
{
|
98
|
-
if (ptr)
|
116
|
+
if (ptr) {
|
117
|
+
EIGEN_USING_STD(free)
|
118
|
+
free(*(reinterpret_cast<void**>(ptr) - 1));
|
119
|
+
}
|
99
120
|
}
|
100
121
|
|
101
122
|
/** \internal
|
@@ -114,7 +135,7 @@ inline void* handmade_aligned_realloc(void* ptr, std::size_t size, std::size_t =
|
|
114
135
|
void *previous_aligned = static_cast<char *>(original)+previous_offset;
|
115
136
|
if(aligned!=previous_aligned)
|
116
137
|
std::memmove(aligned, previous_aligned, size);
|
117
|
-
|
138
|
+
|
118
139
|
*(reinterpret_cast<void**>(aligned) - 1) = original;
|
119
140
|
return aligned;
|
120
141
|
}
|
@@ -142,7 +163,7 @@ EIGEN_DEVICE_FUNC inline void check_that_malloc_is_allowed()
|
|
142
163
|
{
|
143
164
|
eigen_assert(is_malloc_allowed() && "heap allocation is forbidden (EIGEN_RUNTIME_NO_MALLOC is defined and g_is_malloc_allowed is false)");
|
144
165
|
}
|
145
|
-
#else
|
166
|
+
#else
|
146
167
|
EIGEN_DEVICE_FUNC inline void check_that_malloc_is_allowed()
|
147
168
|
{}
|
148
169
|
#endif
|
@@ -156,9 +177,12 @@ EIGEN_DEVICE_FUNC inline void* aligned_malloc(std::size_t size)
|
|
156
177
|
|
157
178
|
void *result;
|
158
179
|
#if (EIGEN_DEFAULT_ALIGN_BYTES==0) || EIGEN_MALLOC_ALREADY_ALIGNED
|
159
|
-
|
180
|
+
|
181
|
+
EIGEN_USING_STD(malloc)
|
182
|
+
result = malloc(size);
|
183
|
+
|
160
184
|
#if EIGEN_DEFAULT_ALIGN_BYTES==16
|
161
|
-
eigen_assert((size<16 || (std::size_t(result)%16)==0) && "System's malloc returned an unaligned pointer. Compile with EIGEN_MALLOC_ALREADY_ALIGNED=0 to fallback to handmade
|
185
|
+
eigen_assert((size<16 || (std::size_t(result)%16)==0) && "System's malloc returned an unaligned pointer. Compile with EIGEN_MALLOC_ALREADY_ALIGNED=0 to fallback to handmade aligned memory allocator.");
|
162
186
|
#endif
|
163
187
|
#else
|
164
188
|
result = handmade_aligned_malloc(size);
|
@@ -174,7 +198,10 @@ EIGEN_DEVICE_FUNC inline void* aligned_malloc(std::size_t size)
|
|
174
198
|
EIGEN_DEVICE_FUNC inline void aligned_free(void *ptr)
|
175
199
|
{
|
176
200
|
#if (EIGEN_DEFAULT_ALIGN_BYTES==0) || EIGEN_MALLOC_ALREADY_ALIGNED
|
177
|
-
|
201
|
+
|
202
|
+
EIGEN_USING_STD(free)
|
203
|
+
free(ptr);
|
204
|
+
|
178
205
|
#else
|
179
206
|
handmade_aligned_free(ptr);
|
180
207
|
#endif
|
@@ -187,7 +214,7 @@ EIGEN_DEVICE_FUNC inline void aligned_free(void *ptr)
|
|
187
214
|
*/
|
188
215
|
inline void* aligned_realloc(void *ptr, std::size_t new_size, std::size_t old_size)
|
189
216
|
{
|
190
|
-
EIGEN_UNUSED_VARIABLE(old_size)
|
217
|
+
EIGEN_UNUSED_VARIABLE(old_size)
|
191
218
|
|
192
219
|
void *result;
|
193
220
|
#if (EIGEN_DEFAULT_ALIGN_BYTES==0) || EIGEN_MALLOC_ALREADY_ALIGNED
|
@@ -218,7 +245,9 @@ template<> EIGEN_DEVICE_FUNC inline void* conditional_aligned_malloc<false>(std:
|
|
218
245
|
{
|
219
246
|
check_that_malloc_is_allowed();
|
220
247
|
|
221
|
-
|
248
|
+
EIGEN_USING_STD(malloc)
|
249
|
+
void *result = malloc(size);
|
250
|
+
|
222
251
|
if(!result && size)
|
223
252
|
throw_std_bad_alloc();
|
224
253
|
return result;
|
@@ -232,7 +261,8 @@ template<bool Align> EIGEN_DEVICE_FUNC inline void conditional_aligned_free(void
|
|
232
261
|
|
233
262
|
template<> EIGEN_DEVICE_FUNC inline void conditional_aligned_free<false>(void *ptr)
|
234
263
|
{
|
235
|
-
|
264
|
+
EIGEN_USING_STD(free)
|
265
|
+
free(ptr);
|
236
266
|
}
|
237
267
|
|
238
268
|
template<bool Align> inline void* conditional_aligned_realloc(void* ptr, std::size_t new_size, std::size_t old_size)
|
@@ -331,7 +361,7 @@ template<typename T, bool Align> EIGEN_DEVICE_FUNC inline T* conditional_aligned
|
|
331
361
|
template<typename T> EIGEN_DEVICE_FUNC inline void aligned_delete(T *ptr, std::size_t size)
|
332
362
|
{
|
333
363
|
destruct_elements_of_array<T>(ptr, size);
|
334
|
-
aligned_free(ptr);
|
364
|
+
Eigen::internal::aligned_free(ptr);
|
335
365
|
}
|
336
366
|
|
337
367
|
/** \internal Deletes objects constructed with conditional_aligned_new
|
@@ -471,8 +501,8 @@ EIGEN_DEVICE_FUNC inline Index first_default_aligned(const Scalar* array, Index
|
|
471
501
|
}
|
472
502
|
|
473
503
|
/** \internal Returns the smallest integer multiple of \a base and greater or equal to \a size
|
474
|
-
*/
|
475
|
-
template<typename Index>
|
504
|
+
*/
|
505
|
+
template<typename Index>
|
476
506
|
inline Index first_multiple(Index size, Index base)
|
477
507
|
{
|
478
508
|
return ((size+base-1)/base)*base;
|
@@ -493,7 +523,8 @@ template<typename T> struct smart_copy_helper<T,true> {
|
|
493
523
|
IntPtr size = IntPtr(end)-IntPtr(start);
|
494
524
|
if(size==0) return;
|
495
525
|
eigen_internal_assert(start!=0 && end!=0 && target!=0);
|
496
|
-
|
526
|
+
EIGEN_USING_STD(memcpy)
|
527
|
+
memcpy(target, start, size);
|
497
528
|
}
|
498
529
|
};
|
499
530
|
|
@@ -502,7 +533,7 @@ template<typename T> struct smart_copy_helper<T,false> {
|
|
502
533
|
{ std::copy(start, end, target); }
|
503
534
|
};
|
504
535
|
|
505
|
-
// intelligent memmove. falls back to std::memmove for POD types, uses std::copy otherwise.
|
536
|
+
// intelligent memmove. falls back to std::memmove for POD types, uses std::copy otherwise.
|
506
537
|
template<typename T, bool UseMemmove> struct smart_memmove_helper;
|
507
538
|
|
508
539
|
template<typename T> void smart_memmove(const T* start, const T* end, T* target)
|
@@ -522,19 +553,30 @@ template<typename T> struct smart_memmove_helper<T,true> {
|
|
522
553
|
|
523
554
|
template<typename T> struct smart_memmove_helper<T,false> {
|
524
555
|
static inline void run(const T* start, const T* end, T* target)
|
525
|
-
{
|
556
|
+
{
|
526
557
|
if (UIntPtr(target) < UIntPtr(start))
|
527
558
|
{
|
528
559
|
std::copy(start, end, target);
|
529
560
|
}
|
530
|
-
else
|
561
|
+
else
|
531
562
|
{
|
532
563
|
std::ptrdiff_t count = (std::ptrdiff_t(end)-std::ptrdiff_t(start)) / sizeof(T);
|
533
|
-
std::copy_backward(start, end, target + count);
|
564
|
+
std::copy_backward(start, end, target + count);
|
534
565
|
}
|
535
566
|
}
|
536
567
|
};
|
537
568
|
|
569
|
+
#if EIGEN_HAS_RVALUE_REFERENCES
|
570
|
+
template<typename T> EIGEN_DEVICE_FUNC T* smart_move(T* start, T* end, T* target)
|
571
|
+
{
|
572
|
+
return std::move(start, end, target);
|
573
|
+
}
|
574
|
+
#else
|
575
|
+
template<typename T> EIGEN_DEVICE_FUNC T* smart_move(T* start, T* end, T* target)
|
576
|
+
{
|
577
|
+
return std::copy(start, end, target);
|
578
|
+
}
|
579
|
+
#endif
|
538
580
|
|
539
581
|
/*****************************************************************************
|
540
582
|
*** Implementation of runtime stack allocation (falling back to malloc) ***
|
@@ -542,7 +584,7 @@ template<typename T> struct smart_memmove_helper<T,false> {
|
|
542
584
|
|
543
585
|
// you can overwrite Eigen's default behavior regarding alloca by defining EIGEN_ALLOCA
|
544
586
|
// to the appropriate stack allocation function
|
545
|
-
#
|
587
|
+
#if ! defined EIGEN_ALLOCA && ! defined EIGEN_GPU_COMPILE_PHASE
|
546
588
|
#if EIGEN_OS_LINUX || EIGEN_OS_MAC || (defined alloca)
|
547
589
|
#define EIGEN_ALLOCA alloca
|
548
590
|
#elif EIGEN_COMP_MSVC
|
@@ -550,6 +592,15 @@ template<typename T> struct smart_memmove_helper<T,false> {
|
|
550
592
|
#endif
|
551
593
|
#endif
|
552
594
|
|
595
|
+
// With clang -Oz -mthumb, alloca changes the stack pointer in a way that is
|
596
|
+
// not allowed in Thumb2. -DEIGEN_STACK_ALLOCATION_LIMIT=0 doesn't work because
|
597
|
+
// the compiler still emits bad code because stack allocation checks use "<=".
|
598
|
+
// TODO: Eliminate after https://bugs.llvm.org/show_bug.cgi?id=23772
|
599
|
+
// is fixed.
|
600
|
+
#if defined(__clang__) && defined(__thumb__)
|
601
|
+
#undef EIGEN_ALLOCA
|
602
|
+
#endif
|
603
|
+
|
553
604
|
// This helper class construct the allocated memory, and takes care of destructing and freeing the handled data
|
554
605
|
// at destruction time. In practice this helper class is mainly useful to avoid memory leak in case of exceptions.
|
555
606
|
template<typename T> class aligned_stack_memory_handler : noncopyable
|
@@ -561,12 +612,14 @@ template<typename T> class aligned_stack_memory_handler : noncopyable
|
|
561
612
|
* In this case, the buffer elements will also be destructed when this handler will be destructed.
|
562
613
|
* Finally, if \a dealloc is true, then the pointer \a ptr is freed.
|
563
614
|
**/
|
615
|
+
EIGEN_DEVICE_FUNC
|
564
616
|
aligned_stack_memory_handler(T* ptr, std::size_t size, bool dealloc)
|
565
617
|
: m_ptr(ptr), m_size(size), m_deallocate(dealloc)
|
566
618
|
{
|
567
619
|
if(NumTraits<T>::RequireInitialization && m_ptr)
|
568
620
|
Eigen::internal::construct_elements_of_array(m_ptr, size);
|
569
621
|
}
|
622
|
+
EIGEN_DEVICE_FUNC
|
570
623
|
~aligned_stack_memory_handler()
|
571
624
|
{
|
572
625
|
if(NumTraits<T>::RequireInitialization && m_ptr)
|
@@ -580,6 +633,60 @@ template<typename T> class aligned_stack_memory_handler : noncopyable
|
|
580
633
|
bool m_deallocate;
|
581
634
|
};
|
582
635
|
|
636
|
+
#ifdef EIGEN_ALLOCA
|
637
|
+
|
638
|
+
template<typename Xpr, int NbEvaluations,
|
639
|
+
bool MapExternalBuffer = nested_eval<Xpr,NbEvaluations>::Evaluate && Xpr::MaxSizeAtCompileTime==Dynamic
|
640
|
+
>
|
641
|
+
struct local_nested_eval_wrapper
|
642
|
+
{
|
643
|
+
static const bool NeedExternalBuffer = false;
|
644
|
+
typedef typename Xpr::Scalar Scalar;
|
645
|
+
typedef typename nested_eval<Xpr,NbEvaluations>::type ObjectType;
|
646
|
+
ObjectType object;
|
647
|
+
|
648
|
+
EIGEN_DEVICE_FUNC
|
649
|
+
local_nested_eval_wrapper(const Xpr& xpr, Scalar* ptr) : object(xpr)
|
650
|
+
{
|
651
|
+
EIGEN_UNUSED_VARIABLE(ptr);
|
652
|
+
eigen_internal_assert(ptr==0);
|
653
|
+
}
|
654
|
+
};
|
655
|
+
|
656
|
+
template<typename Xpr, int NbEvaluations>
|
657
|
+
struct local_nested_eval_wrapper<Xpr,NbEvaluations,true>
|
658
|
+
{
|
659
|
+
static const bool NeedExternalBuffer = true;
|
660
|
+
typedef typename Xpr::Scalar Scalar;
|
661
|
+
typedef typename plain_object_eval<Xpr>::type PlainObject;
|
662
|
+
typedef Map<PlainObject,EIGEN_DEFAULT_ALIGN_BYTES> ObjectType;
|
663
|
+
ObjectType object;
|
664
|
+
|
665
|
+
EIGEN_DEVICE_FUNC
|
666
|
+
local_nested_eval_wrapper(const Xpr& xpr, Scalar* ptr)
|
667
|
+
: object(ptr==0 ? reinterpret_cast<Scalar*>(Eigen::internal::aligned_malloc(sizeof(Scalar)*xpr.size())) : ptr, xpr.rows(), xpr.cols()),
|
668
|
+
m_deallocate(ptr==0)
|
669
|
+
{
|
670
|
+
if(NumTraits<Scalar>::RequireInitialization && object.data())
|
671
|
+
Eigen::internal::construct_elements_of_array(object.data(), object.size());
|
672
|
+
object = xpr;
|
673
|
+
}
|
674
|
+
|
675
|
+
EIGEN_DEVICE_FUNC
|
676
|
+
~local_nested_eval_wrapper()
|
677
|
+
{
|
678
|
+
if(NumTraits<Scalar>::RequireInitialization && object.data())
|
679
|
+
Eigen::internal::destruct_elements_of_array(object.data(), object.size());
|
680
|
+
if(m_deallocate)
|
681
|
+
Eigen::internal::aligned_free(object.data());
|
682
|
+
}
|
683
|
+
|
684
|
+
private:
|
685
|
+
bool m_deallocate;
|
686
|
+
};
|
687
|
+
|
688
|
+
#endif // EIGEN_ALLOCA
|
689
|
+
|
583
690
|
template<typename T> class scoped_array : noncopyable
|
584
691
|
{
|
585
692
|
T* m_ptr;
|
@@ -603,13 +710,15 @@ template<typename T> void swap(scoped_array<T> &a,scoped_array<T> &b)
|
|
603
710
|
{
|
604
711
|
std::swap(a.ptr(),b.ptr());
|
605
712
|
}
|
606
|
-
|
713
|
+
|
607
714
|
} // end namespace internal
|
608
715
|
|
609
716
|
/** \internal
|
610
|
-
*
|
611
|
-
*
|
612
|
-
*
|
717
|
+
*
|
718
|
+
* The macro ei_declare_aligned_stack_constructed_variable(TYPE,NAME,SIZE,BUFFER) declares, allocates,
|
719
|
+
* and construct an aligned buffer named NAME of SIZE elements of type TYPE on the stack
|
720
|
+
* if the size in bytes is smaller than EIGEN_STACK_ALLOCATION_LIMIT, and if stack allocation is supported by the platform
|
721
|
+
* (currently, this is Linux, OSX and Visual Studio only). Otherwise the memory is allocated on the heap.
|
613
722
|
* The allocated buffer is automatically deleted when exiting the scope of this declaration.
|
614
723
|
* If BUFFER is non null, then the declared variable is simply an alias for BUFFER, and no allocation/deletion occurs.
|
615
724
|
* Here is an example:
|
@@ -620,9 +729,17 @@ template<typename T> void swap(scoped_array<T> &a,scoped_array<T> &b)
|
|
620
729
|
* }
|
621
730
|
* \endcode
|
622
731
|
* The underlying stack allocation function can controlled with the EIGEN_ALLOCA preprocessor token.
|
732
|
+
*
|
733
|
+
* The macro ei_declare_local_nested_eval(XPR_T,XPR,N,NAME) is analogue to
|
734
|
+
* \code
|
735
|
+
* typename internal::nested_eval<XPRT_T,N>::type NAME(XPR);
|
736
|
+
* \endcode
|
737
|
+
* with the advantage of using aligned stack allocation even if the maximal size of XPR at compile time is unknown.
|
738
|
+
* This is accomplished through alloca if this later is supported and if the required number of bytes
|
739
|
+
* is below EIGEN_STACK_ALLOCATION_LIMIT.
|
623
740
|
*/
|
624
741
|
#ifdef EIGEN_ALLOCA
|
625
|
-
|
742
|
+
|
626
743
|
#if EIGEN_DEFAULT_ALIGN_BYTES>0
|
627
744
|
// We always manually re-align the result of EIGEN_ALLOCA.
|
628
745
|
// If alloca is already aligned, the compiler should be smart enough to optimize away the re-alignment.
|
@@ -639,13 +756,23 @@ template<typename T> void swap(scoped_array<T> &a,scoped_array<T> &b)
|
|
639
756
|
: Eigen::internal::aligned_malloc(sizeof(TYPE)*SIZE) ); \
|
640
757
|
Eigen::internal::aligned_stack_memory_handler<TYPE> EIGEN_CAT(NAME,_stack_memory_destructor)((BUFFER)==0 ? NAME : 0,SIZE,sizeof(TYPE)*SIZE>EIGEN_STACK_ALLOCATION_LIMIT)
|
641
758
|
|
759
|
+
|
760
|
+
#define ei_declare_local_nested_eval(XPR_T,XPR,N,NAME) \
|
761
|
+
Eigen::internal::local_nested_eval_wrapper<XPR_T,N> EIGEN_CAT(NAME,_wrapper)(XPR, reinterpret_cast<typename XPR_T::Scalar*>( \
|
762
|
+
( (Eigen::internal::local_nested_eval_wrapper<XPR_T,N>::NeedExternalBuffer) && ((sizeof(typename XPR_T::Scalar)*XPR.size())<=EIGEN_STACK_ALLOCATION_LIMIT) ) \
|
763
|
+
? EIGEN_ALIGNED_ALLOCA( sizeof(typename XPR_T::Scalar)*XPR.size() ) : 0 ) ) ; \
|
764
|
+
typename Eigen::internal::local_nested_eval_wrapper<XPR_T,N>::ObjectType NAME(EIGEN_CAT(NAME,_wrapper).object)
|
765
|
+
|
642
766
|
#else
|
643
767
|
|
644
768
|
#define ei_declare_aligned_stack_constructed_variable(TYPE,NAME,SIZE,BUFFER) \
|
645
769
|
Eigen::internal::check_size_for_overflow<TYPE>(SIZE); \
|
646
770
|
TYPE* NAME = (BUFFER)!=0 ? BUFFER : reinterpret_cast<TYPE*>(Eigen::internal::aligned_malloc(sizeof(TYPE)*SIZE)); \
|
647
771
|
Eigen::internal::aligned_stack_memory_handler<TYPE> EIGEN_CAT(NAME,_stack_memory_destructor)((BUFFER)==0 ? NAME : 0,SIZE,true)
|
648
|
-
|
772
|
+
|
773
|
+
|
774
|
+
#define ei_declare_local_nested_eval(XPR_T,XPR,N,NAME) typename Eigen::internal::nested_eval<XPR_T,N>::type NAME(XPR)
|
775
|
+
|
649
776
|
#endif
|
650
777
|
|
651
778
|
|
@@ -653,32 +780,56 @@ template<typename T> void swap(scoped_array<T> &a,scoped_array<T> &b)
|
|
653
780
|
*** Implementation of EIGEN_MAKE_ALIGNED_OPERATOR_NEW [_IF] ***
|
654
781
|
*****************************************************************************/
|
655
782
|
|
656
|
-
#if
|
783
|
+
#if EIGEN_HAS_CXX17_OVERALIGN
|
784
|
+
|
785
|
+
// C++17 -> no need to bother about alignment anymore :)
|
786
|
+
|
787
|
+
#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign)
|
788
|
+
#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign)
|
789
|
+
#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW
|
790
|
+
#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(Scalar,Size)
|
791
|
+
|
792
|
+
#else
|
793
|
+
|
794
|
+
// HIP does not support new/delete on device.
|
795
|
+
#if EIGEN_MAX_ALIGN_BYTES!=0 && !defined(EIGEN_HIP_DEVICE_COMPILE)
|
657
796
|
#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign) \
|
797
|
+
EIGEN_DEVICE_FUNC \
|
658
798
|
void* operator new(std::size_t size, const std::nothrow_t&) EIGEN_NO_THROW { \
|
659
799
|
EIGEN_TRY { return Eigen::internal::conditional_aligned_malloc<NeedsToAlign>(size); } \
|
660
800
|
EIGEN_CATCH (...) { return 0; } \
|
661
801
|
}
|
662
802
|
#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign) \
|
803
|
+
EIGEN_DEVICE_FUNC \
|
663
804
|
void *operator new(std::size_t size) { \
|
664
805
|
return Eigen::internal::conditional_aligned_malloc<NeedsToAlign>(size); \
|
665
806
|
} \
|
807
|
+
EIGEN_DEVICE_FUNC \
|
666
808
|
void *operator new[](std::size_t size) { \
|
667
809
|
return Eigen::internal::conditional_aligned_malloc<NeedsToAlign>(size); \
|
668
810
|
} \
|
811
|
+
EIGEN_DEVICE_FUNC \
|
669
812
|
void operator delete(void * ptr) EIGEN_NO_THROW { Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); } \
|
813
|
+
EIGEN_DEVICE_FUNC \
|
670
814
|
void operator delete[](void * ptr) EIGEN_NO_THROW { Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); } \
|
815
|
+
EIGEN_DEVICE_FUNC \
|
671
816
|
void operator delete(void * ptr, std::size_t /* sz */) EIGEN_NO_THROW { Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); } \
|
817
|
+
EIGEN_DEVICE_FUNC \
|
672
818
|
void operator delete[](void * ptr, std::size_t /* sz */) EIGEN_NO_THROW { Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); } \
|
673
819
|
/* in-place new and delete. since (at least afaik) there is no actual */ \
|
674
820
|
/* memory allocated we can safely let the default implementation handle */ \
|
675
821
|
/* this particular case. */ \
|
822
|
+
EIGEN_DEVICE_FUNC \
|
676
823
|
static void *operator new(std::size_t size, void *ptr) { return ::operator new(size,ptr); } \
|
824
|
+
EIGEN_DEVICE_FUNC \
|
677
825
|
static void *operator new[](std::size_t size, void* ptr) { return ::operator new[](size,ptr); } \
|
826
|
+
EIGEN_DEVICE_FUNC \
|
678
827
|
void operator delete(void * memory, void *ptr) EIGEN_NO_THROW { return ::operator delete(memory,ptr); } \
|
828
|
+
EIGEN_DEVICE_FUNC \
|
679
829
|
void operator delete[](void * memory, void *ptr) EIGEN_NO_THROW { return ::operator delete[](memory,ptr); } \
|
680
830
|
/* nothrow-new (returns zero instead of std::bad_alloc) */ \
|
681
831
|
EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign) \
|
832
|
+
EIGEN_DEVICE_FUNC \
|
682
833
|
void operator delete(void *ptr, const std::nothrow_t&) EIGEN_NO_THROW { \
|
683
834
|
Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); \
|
684
835
|
} \
|
@@ -688,8 +839,14 @@ template<typename T> void swap(scoped_array<T> &a,scoped_array<T> &b)
|
|
688
839
|
#endif
|
689
840
|
|
690
841
|
#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(true)
|
691
|
-
#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(Scalar,Size)
|
692
|
-
EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(bool(
|
842
|
+
#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(Scalar,Size) \
|
843
|
+
EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(bool( \
|
844
|
+
((Size)!=Eigen::Dynamic) && \
|
845
|
+
(((EIGEN_MAX_ALIGN_BYTES>=16) && ((sizeof(Scalar)*(Size))%(EIGEN_MAX_ALIGN_BYTES )==0)) || \
|
846
|
+
((EIGEN_MAX_ALIGN_BYTES>=32) && ((sizeof(Scalar)*(Size))%(EIGEN_MAX_ALIGN_BYTES/2)==0)) || \
|
847
|
+
((EIGEN_MAX_ALIGN_BYTES>=64) && ((sizeof(Scalar)*(Size))%(EIGEN_MAX_ALIGN_BYTES/4)==0)) )))
|
848
|
+
|
849
|
+
#endif
|
693
850
|
|
694
851
|
/****************************************************************************/
|
695
852
|
|
@@ -703,13 +860,13 @@ template<typename T> void swap(scoped_array<T> &a,scoped_array<T> &b)
|
|
703
860
|
* - 32 bytes alignment if AVX is enabled.
|
704
861
|
* - 64 bytes alignment if AVX512 is enabled.
|
705
862
|
*
|
706
|
-
* This can be
|
863
|
+
* This can be controlled using the \c EIGEN_MAX_ALIGN_BYTES macro as documented
|
707
864
|
* \link TopicPreprocessorDirectivesPerformance there \endlink.
|
708
865
|
*
|
709
866
|
* Example:
|
710
867
|
* \code
|
711
868
|
* // Matrix4f requires 16 bytes alignment:
|
712
|
-
* std::map< int, Matrix4f, std::less<int>,
|
869
|
+
* std::map< int, Matrix4f, std::less<int>,
|
713
870
|
* aligned_allocator<std::pair<const int, Matrix4f> > > my_map_mat4;
|
714
871
|
* // Vector3f does not require 16 bytes alignment, no need to use Eigen's allocator:
|
715
872
|
* std::map< int, Vector3f > my_map_vec3;
|
@@ -744,18 +901,19 @@ public:
|
|
744
901
|
|
745
902
|
~aligned_allocator() {}
|
746
903
|
|
904
|
+
#if EIGEN_COMP_GNUC_STRICT && EIGEN_GNUC_AT_LEAST(7,0)
|
905
|
+
// In gcc std::allocator::max_size() is bugged making gcc triggers a warning:
|
906
|
+
// eigen/Eigen/src/Core/util/Memory.h:189:12: warning: argument 1 value '18446744073709551612' exceeds maximum object size 9223372036854775807
|
907
|
+
// See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=87544
|
908
|
+
size_type max_size() const {
|
909
|
+
return (std::numeric_limits<std::ptrdiff_t>::max)()/sizeof(T);
|
910
|
+
}
|
911
|
+
#endif
|
912
|
+
|
747
913
|
pointer allocate(size_type num, const void* /*hint*/ = 0)
|
748
914
|
{
|
749
915
|
internal::check_size_for_overflow<T>(num);
|
750
|
-
|
751
|
-
#if EIGEN_COMP_GNUC_STRICT && EIGEN_GNUC_AT_LEAST(7,0)
|
752
|
-
// workaround gcc bug https://gcc.gnu.org/bugzilla/show_bug.cgi?id=87544
|
753
|
-
// It triggered eigen/Eigen/src/Core/util/Memory.h:189:12: warning: argument 1 value '18446744073709551612' exceeds maximum object size 9223372036854775807
|
754
|
-
if(size>=std::size_t((std::numeric_limits<std::ptrdiff_t>::max)()))
|
755
|
-
return 0;
|
756
|
-
else
|
757
|
-
#endif
|
758
|
-
return static_cast<pointer>( internal::aligned_malloc(size) );
|
916
|
+
return static_cast<pointer>( internal::aligned_malloc(num * sizeof(T)) );
|
759
917
|
}
|
760
918
|
|
761
919
|
void deallocate(pointer p, size_type /*num*/)
|
@@ -914,20 +1072,32 @@ inline void queryCacheSizes_intel(int& l1, int& l2, int& l3, int max_std_funcs)
|
|
914
1072
|
{
|
915
1073
|
if(max_std_funcs>=4)
|
916
1074
|
queryCacheSizes_intel_direct(l1,l2,l3);
|
917
|
-
else
|
1075
|
+
else if(max_std_funcs>=2)
|
918
1076
|
queryCacheSizes_intel_codes(l1,l2,l3);
|
1077
|
+
else
|
1078
|
+
l1 = l2 = l3 = 0;
|
919
1079
|
}
|
920
1080
|
|
921
1081
|
inline void queryCacheSizes_amd(int& l1, int& l2, int& l3)
|
922
1082
|
{
|
923
1083
|
int abcd[4];
|
924
1084
|
abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0;
|
925
|
-
|
926
|
-
|
927
|
-
abcd
|
928
|
-
|
929
|
-
|
930
|
-
|
1085
|
+
|
1086
|
+
// First query the max supported function.
|
1087
|
+
EIGEN_CPUID(abcd,0x80000000,0);
|
1088
|
+
if(static_cast<numext::uint32_t>(abcd[0]) >= static_cast<numext::uint32_t>(0x80000006))
|
1089
|
+
{
|
1090
|
+
EIGEN_CPUID(abcd,0x80000005,0);
|
1091
|
+
l1 = (abcd[2] >> 24) * 1024; // C[31:24] = L1 size in KB
|
1092
|
+
abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0;
|
1093
|
+
EIGEN_CPUID(abcd,0x80000006,0);
|
1094
|
+
l2 = (abcd[2] >> 16) * 1024; // C[31;16] = l2 cache size in KB
|
1095
|
+
l3 = ((abcd[3] & 0xFFFC000) >> 18) * 512 * 1024; // D[31;18] = l3 cache size in 512KB
|
1096
|
+
}
|
1097
|
+
else
|
1098
|
+
{
|
1099
|
+
l1 = l2 = l3 = 0;
|
1100
|
+
}
|
931
1101
|
}
|
932
1102
|
#endif
|
933
1103
|
|
@@ -943,7 +1113,7 @@ inline void queryCacheSizes(int& l1, int& l2, int& l3)
|
|
943
1113
|
|
944
1114
|
// identify the CPU vendor
|
945
1115
|
EIGEN_CPUID(abcd,0x0,0);
|
946
|
-
int max_std_funcs = abcd[
|
1116
|
+
int max_std_funcs = abcd[0];
|
947
1117
|
if(cpuid_is_vendor(abcd,GenuineIntel))
|
948
1118
|
queryCacheSizes_intel(l1,l2,l3,max_std_funcs);
|
949
1119
|
else if(cpuid_is_vendor(abcd,AuthenticAMD) || cpuid_is_vendor(abcd,AMDisbetter_))
|