RubyGems - tomoto - Versions diffs - 0.2.3 → 0.3.0 - Mend

tomoto 0.2.3 → 0.3.0

Files changed (347) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +6 -0
data/README.md +8 -10
data/ext/tomoto/extconf.rb +6 -2
data/ext/tomoto/{ext.cpp → tomoto.cpp} +1 -1
data/lib/tomoto/version.rb +1 -1
data/lib/tomoto.rb +5 -1
data/vendor/EigenRand/EigenRand/Core.h +10 -10
data/vendor/EigenRand/EigenRand/Dists/Basic.h +208 -9
data/vendor/EigenRand/EigenRand/Dists/Discrete.h +52 -31
data/vendor/EigenRand/EigenRand/Dists/GammaPoisson.h +9 -8
data/vendor/EigenRand/EigenRand/Dists/NormalExp.h +28 -21
data/vendor/EigenRand/EigenRand/EigenRand +11 -6
data/vendor/EigenRand/EigenRand/Macro.h +13 -7
data/vendor/EigenRand/EigenRand/MorePacketMath.h +348 -740
data/vendor/EigenRand/EigenRand/MvDists/Multinomial.h +5 -3
data/vendor/EigenRand/EigenRand/MvDists/MvNormal.h +9 -3
data/vendor/EigenRand/EigenRand/PacketFilter.h +11 -253
data/vendor/EigenRand/EigenRand/PacketRandomEngine.h +21 -47
data/vendor/EigenRand/EigenRand/RandUtils.h +50 -344
data/vendor/EigenRand/EigenRand/arch/AVX/MorePacketMath.h +619 -0
data/vendor/EigenRand/EigenRand/arch/AVX/PacketFilter.h +149 -0
data/vendor/EigenRand/EigenRand/arch/AVX/RandUtils.h +228 -0
data/vendor/EigenRand/EigenRand/arch/NEON/MorePacketMath.h +473 -0
data/vendor/EigenRand/EigenRand/arch/NEON/PacketFilter.h +142 -0
data/vendor/EigenRand/EigenRand/arch/NEON/RandUtils.h +126 -0
data/vendor/EigenRand/EigenRand/arch/SSE/MorePacketMath.h +501 -0
data/vendor/EigenRand/EigenRand/arch/SSE/PacketFilter.h +133 -0
data/vendor/EigenRand/EigenRand/arch/SSE/RandUtils.h +120 -0
data/vendor/EigenRand/EigenRand/doc.h +24 -12
data/vendor/EigenRand/README.md +57 -4
data/vendor/eigen/COPYING.APACHE +203 -0
data/vendor/eigen/COPYING.BSD +1 -1
data/vendor/eigen/COPYING.MINPACK +51 -52
data/vendor/eigen/Eigen/Cholesky +0 -1
data/vendor/eigen/Eigen/Core +112 -265
data/vendor/eigen/Eigen/Eigenvalues +2 -3
data/vendor/eigen/Eigen/Geometry +5 -8
data/vendor/eigen/Eigen/Householder +0 -1
data/vendor/eigen/Eigen/Jacobi +0 -1
data/vendor/eigen/Eigen/KLUSupport +41 -0
data/vendor/eigen/Eigen/LU +2 -5
data/vendor/eigen/Eigen/OrderingMethods +0 -3
data/vendor/eigen/Eigen/PaStiXSupport +1 -0
data/vendor/eigen/Eigen/PardisoSupport +0 -0
data/vendor/eigen/Eigen/QR +2 -3
data/vendor/eigen/Eigen/QtAlignedMalloc +0 -1
data/vendor/eigen/Eigen/SVD +0 -1
data/vendor/eigen/Eigen/Sparse +0 -2
data/vendor/eigen/Eigen/SparseCholesky +0 -8
data/vendor/eigen/Eigen/SparseLU +4 -0
data/vendor/eigen/Eigen/SparseQR +0 -1
data/vendor/eigen/Eigen/src/Cholesky/LDLT.h +42 -27
data/vendor/eigen/Eigen/src/Cholesky/LLT.h +39 -23
data/vendor/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +90 -47
data/vendor/eigen/Eigen/src/Core/ArithmeticSequence.h +413 -0
data/vendor/eigen/Eigen/src/Core/Array.h +99 -11
data/vendor/eigen/Eigen/src/Core/ArrayBase.h +3 -3
data/vendor/eigen/Eigen/src/Core/ArrayWrapper.h +21 -21
data/vendor/eigen/Eigen/src/Core/Assign.h +1 -1
data/vendor/eigen/Eigen/src/Core/AssignEvaluator.h +125 -50
data/vendor/eigen/Eigen/src/Core/Assign_MKL.h +10 -10
data/vendor/eigen/Eigen/src/Core/BandMatrix.h +16 -16
data/vendor/eigen/Eigen/src/Core/Block.h +56 -60
data/vendor/eigen/Eigen/src/Core/BooleanRedux.h +29 -31
data/vendor/eigen/Eigen/src/Core/CommaInitializer.h +7 -3
data/vendor/eigen/Eigen/src/Core/CoreEvaluators.h +325 -272
data/vendor/eigen/Eigen/src/Core/CoreIterators.h +5 -0
data/vendor/eigen/Eigen/src/Core/CwiseBinaryOp.h +21 -22
data/vendor/eigen/Eigen/src/Core/CwiseNullaryOp.h +153 -18
data/vendor/eigen/Eigen/src/Core/CwiseUnaryOp.h +6 -6
data/vendor/eigen/Eigen/src/Core/CwiseUnaryView.h +14 -10
data/vendor/eigen/Eigen/src/Core/DenseBase.h +132 -42
data/vendor/eigen/Eigen/src/Core/DenseCoeffsBase.h +25 -21
data/vendor/eigen/Eigen/src/Core/DenseStorage.h +153 -71
data/vendor/eigen/Eigen/src/Core/Diagonal.h +21 -23
data/vendor/eigen/Eigen/src/Core/DiagonalMatrix.h +50 -2
data/vendor/eigen/Eigen/src/Core/DiagonalProduct.h +1 -1
data/vendor/eigen/Eigen/src/Core/Dot.h +10 -10
data/vendor/eigen/Eigen/src/Core/EigenBase.h +10 -9
data/vendor/eigen/Eigen/src/Core/ForceAlignedAccess.h +8 -4
data/vendor/eigen/Eigen/src/Core/Fuzzy.h +3 -3
data/vendor/eigen/Eigen/src/Core/GeneralProduct.h +20 -10
data/vendor/eigen/Eigen/src/Core/GenericPacketMath.h +599 -152
data/vendor/eigen/Eigen/src/Core/GlobalFunctions.h +40 -33
data/vendor/eigen/Eigen/src/Core/IO.h +40 -7
data/vendor/eigen/Eigen/src/Core/IndexedView.h +237 -0
data/vendor/eigen/Eigen/src/Core/Inverse.h +9 -10
data/vendor/eigen/Eigen/src/Core/Map.h +7 -7
data/vendor/eigen/Eigen/src/Core/MapBase.h +10 -3
data/vendor/eigen/Eigen/src/Core/MathFunctions.h +767 -125
data/vendor/eigen/Eigen/src/Core/MathFunctionsImpl.h +118 -19
data/vendor/eigen/Eigen/src/Core/Matrix.h +131 -25
data/vendor/eigen/Eigen/src/Core/MatrixBase.h +21 -3
data/vendor/eigen/Eigen/src/Core/NestByValue.h +25 -50
data/vendor/eigen/Eigen/src/Core/NoAlias.h +4 -3
data/vendor/eigen/Eigen/src/Core/NumTraits.h +107 -20
data/vendor/eigen/Eigen/src/Core/PartialReduxEvaluator.h +232 -0
data/vendor/eigen/Eigen/src/Core/PermutationMatrix.h +3 -31
data/vendor/eigen/Eigen/src/Core/PlainObjectBase.h +152 -59
data/vendor/eigen/Eigen/src/Core/Product.h +30 -25
data/vendor/eigen/Eigen/src/Core/ProductEvaluators.h +192 -125
data/vendor/eigen/Eigen/src/Core/Random.h +37 -1
data/vendor/eigen/Eigen/src/Core/Redux.h +180 -170
data/vendor/eigen/Eigen/src/Core/Ref.h +121 -23
data/vendor/eigen/Eigen/src/Core/Replicate.h +8 -8
data/vendor/eigen/Eigen/src/Core/Reshaped.h +454 -0
data/vendor/eigen/Eigen/src/Core/ReturnByValue.h +7 -5
data/vendor/eigen/Eigen/src/Core/Reverse.h +18 -12
data/vendor/eigen/Eigen/src/Core/Select.h +8 -6
data/vendor/eigen/Eigen/src/Core/SelfAdjointView.h +33 -20
data/vendor/eigen/Eigen/src/Core/Solve.h +14 -14
data/vendor/eigen/Eigen/src/Core/SolveTriangular.h +16 -16
data/vendor/eigen/Eigen/src/Core/SolverBase.h +41 -3
data/vendor/eigen/Eigen/src/Core/StableNorm.h +100 -70
data/vendor/eigen/Eigen/src/Core/StlIterators.h +463 -0
data/vendor/eigen/Eigen/src/Core/Stride.h +9 -4
data/vendor/eigen/Eigen/src/Core/Swap.h +5 -4
data/vendor/eigen/Eigen/src/Core/Transpose.h +88 -27
data/vendor/eigen/Eigen/src/Core/Transpositions.h +26 -47
data/vendor/eigen/Eigen/src/Core/TriangularMatrix.h +93 -75
data/vendor/eigen/Eigen/src/Core/VectorBlock.h +5 -5
data/vendor/eigen/Eigen/src/Core/VectorwiseOp.h +159 -70
data/vendor/eigen/Eigen/src/Core/Visitor.h +137 -29
data/vendor/eigen/Eigen/src/Core/arch/AVX/Complex.h +50 -129
data/vendor/eigen/Eigen/src/Core/arch/AVX/MathFunctions.h +126 -337
data/vendor/eigen/Eigen/src/Core/arch/AVX/PacketMath.h +1092 -155
data/vendor/eigen/Eigen/src/Core/arch/AVX/TypeCasting.h +65 -1
data/vendor/eigen/Eigen/src/Core/arch/AVX512/Complex.h +422 -0
data/vendor/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h +207 -236
data/vendor/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h +1482 -495
data/vendor/eigen/Eigen/src/Core/arch/AVX512/TypeCasting.h +89 -0
data/vendor/eigen/Eigen/src/Core/arch/AltiVec/Complex.h +152 -165
data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MathFunctions.h +19 -251
data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MatrixProduct.h +2937 -0
data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h +221 -0
data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h +629 -0
data/vendor/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h +2042 -392
data/vendor/eigen/Eigen/src/Core/arch/CUDA/Complex.h +235 -80
data/vendor/eigen/Eigen/src/Core/arch/Default/BFloat16.h +700 -0
data/vendor/eigen/Eigen/src/Core/arch/Default/ConjHelper.h +102 -14
data/vendor/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +1649 -0
data/vendor/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +110 -0
data/vendor/eigen/Eigen/src/Core/arch/Default/Half.h +942 -0
data/vendor/eigen/Eigen/src/Core/arch/Default/Settings.h +1 -1
data/vendor/eigen/Eigen/src/Core/arch/Default/TypeCasting.h +120 -0
data/vendor/eigen/Eigen/src/Core/arch/{CUDA → GPU}/MathFunctions.h +16 -4
data/vendor/eigen/Eigen/src/Core/arch/GPU/PacketMath.h +1685 -0
data/vendor/eigen/Eigen/src/Core/arch/GPU/TypeCasting.h +80 -0
data/vendor/eigen/Eigen/src/Core/arch/HIP/hcc/math_constants.h +23 -0
data/vendor/eigen/Eigen/src/Core/arch/MSA/Complex.h +648 -0
data/vendor/eigen/Eigen/src/Core/arch/MSA/MathFunctions.h +387 -0
data/vendor/eigen/Eigen/src/Core/arch/MSA/PacketMath.h +1233 -0
data/vendor/eigen/Eigen/src/Core/arch/NEON/Complex.h +313 -219
data/vendor/eigen/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h +183 -0
data/vendor/eigen/Eigen/src/Core/arch/NEON/MathFunctions.h +54 -70
data/vendor/eigen/Eigen/src/Core/arch/NEON/PacketMath.h +4376 -549
data/vendor/eigen/Eigen/src/Core/arch/NEON/TypeCasting.h +1419 -0
data/vendor/eigen/Eigen/src/Core/arch/SSE/Complex.h +59 -179
data/vendor/eigen/Eigen/src/Core/arch/SSE/MathFunctions.h +65 -428
data/vendor/eigen/Eigen/src/Core/arch/SSE/PacketMath.h +893 -283
data/vendor/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h +65 -0
data/vendor/eigen/Eigen/src/Core/arch/SVE/MathFunctions.h +44 -0
data/vendor/eigen/Eigen/src/Core/arch/SVE/PacketMath.h +752 -0
data/vendor/eigen/Eigen/src/Core/arch/SVE/TypeCasting.h +49 -0
data/vendor/eigen/Eigen/src/Core/arch/SYCL/InteropHeaders.h +232 -0
data/vendor/eigen/Eigen/src/Core/arch/SYCL/MathFunctions.h +301 -0
data/vendor/eigen/Eigen/src/Core/arch/SYCL/PacketMath.h +670 -0
data/vendor/eigen/Eigen/src/Core/arch/SYCL/SyclMemoryModel.h +694 -0
data/vendor/eigen/Eigen/src/Core/arch/SYCL/TypeCasting.h +85 -0
data/vendor/eigen/Eigen/src/Core/arch/ZVector/Complex.h +212 -183
data/vendor/eigen/Eigen/src/Core/arch/ZVector/MathFunctions.h +101 -5
data/vendor/eigen/Eigen/src/Core/arch/ZVector/PacketMath.h +510 -395
data/vendor/eigen/Eigen/src/Core/functors/AssignmentFunctors.h +11 -2
data/vendor/eigen/Eigen/src/Core/functors/BinaryFunctors.h +112 -46
data/vendor/eigen/Eigen/src/Core/functors/NullaryFunctors.h +31 -30
data/vendor/eigen/Eigen/src/Core/functors/StlFunctors.h +32 -2
data/vendor/eigen/Eigen/src/Core/functors/UnaryFunctors.h +355 -16
data/vendor/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h +1075 -586
data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h +49 -24
data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +41 -35
data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +6 -6
data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h +4 -2
data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixVector.h +382 -483
data/vendor/eigen/Eigen/src/Core/products/Parallelizer.h +22 -5
data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +53 -30
data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h +16 -8
data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h +8 -6
data/vendor/eigen/Eigen/src/Core/products/SelfadjointProduct.h +4 -4
data/vendor/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h +5 -4
data/vendor/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h +33 -27
data/vendor/eigen/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h +14 -12
data/vendor/eigen/Eigen/src/Core/products/TriangularSolverMatrix.h +36 -34
data/vendor/eigen/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h +8 -4
data/vendor/eigen/Eigen/src/Core/products/TriangularSolverVector.h +13 -10
data/vendor/eigen/Eigen/src/Core/util/BlasUtil.h +304 -119
data/vendor/eigen/Eigen/src/Core/util/ConfigureVectorization.h +512 -0
data/vendor/eigen/Eigen/src/Core/util/Constants.h +25 -9
data/vendor/eigen/Eigen/src/Core/util/DisableStupidWarnings.h +26 -3
data/vendor/eigen/Eigen/src/Core/util/ForwardDeclarations.h +29 -9
data/vendor/eigen/Eigen/src/Core/util/IndexedViewHelper.h +186 -0
data/vendor/eigen/Eigen/src/Core/util/IntegralConstant.h +272 -0
data/vendor/eigen/Eigen/src/Core/util/MKL_support.h +8 -1
data/vendor/eigen/Eigen/src/Core/util/Macros.h +709 -246
data/vendor/eigen/Eigen/src/Core/util/Memory.h +222 -52
data/vendor/eigen/Eigen/src/Core/util/Meta.h +355 -77
data/vendor/eigen/Eigen/src/Core/util/ReenableStupidWarnings.h +5 -1
data/vendor/eigen/Eigen/src/Core/util/ReshapedHelper.h +51 -0
data/vendor/eigen/Eigen/src/Core/util/StaticAssert.h +8 -5
data/vendor/eigen/Eigen/src/Core/util/SymbolicIndex.h +293 -0
data/vendor/eigen/Eigen/src/Core/util/XprHelper.h +65 -30
data/vendor/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +1 -1
data/vendor/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +7 -4
data/vendor/eigen/Eigen/src/Eigenvalues/EigenSolver.h +2 -2
data/vendor/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +1 -1
data/vendor/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +2 -2
data/vendor/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +2 -2
data/vendor/eigen/Eigen/src/Eigenvalues/RealQZ.h +9 -6
data/vendor/eigen/Eigen/src/Eigenvalues/RealSchur.h +21 -9
data/vendor/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +77 -43
data/vendor/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +20 -15
data/vendor/eigen/Eigen/src/Geometry/AlignedBox.h +99 -5
data/vendor/eigen/Eigen/src/Geometry/AngleAxis.h +4 -4
data/vendor/eigen/Eigen/src/Geometry/EulerAngles.h +3 -3
data/vendor/eigen/Eigen/src/Geometry/Homogeneous.h +15 -11
data/vendor/eigen/Eigen/src/Geometry/Hyperplane.h +1 -1
data/vendor/eigen/Eigen/src/Geometry/OrthoMethods.h +3 -2
data/vendor/eigen/Eigen/src/Geometry/ParametrizedLine.h +39 -2
data/vendor/eigen/Eigen/src/Geometry/Quaternion.h +70 -14
data/vendor/eigen/Eigen/src/Geometry/Rotation2D.h +3 -3
data/vendor/eigen/Eigen/src/Geometry/Scaling.h +23 -5
data/vendor/eigen/Eigen/src/Geometry/Transform.h +88 -67
data/vendor/eigen/Eigen/src/Geometry/Translation.h +6 -12
data/vendor/eigen/Eigen/src/Geometry/Umeyama.h +1 -1
data/vendor/eigen/Eigen/src/Geometry/arch/Geometry_SIMD.h +168 -0
data/vendor/eigen/Eigen/src/Householder/BlockHouseholder.h +9 -2
data/vendor/eigen/Eigen/src/Householder/Householder.h +8 -4
data/vendor/eigen/Eigen/src/Householder/HouseholderSequence.h +123 -48
data/vendor/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +15 -15
data/vendor/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +7 -23
data/vendor/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +5 -22
data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +41 -47
data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +51 -60
data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +70 -20
data/vendor/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +2 -20
data/vendor/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +11 -9
data/vendor/eigen/Eigen/src/Jacobi/Jacobi.h +31 -10
data/vendor/eigen/Eigen/src/KLUSupport/KLUSupport.h +358 -0
data/vendor/eigen/Eigen/src/LU/Determinant.h +35 -19
data/vendor/eigen/Eigen/src/LU/FullPivLU.h +29 -43
data/vendor/eigen/Eigen/src/LU/InverseImpl.h +25 -8
data/vendor/eigen/Eigen/src/LU/PartialPivLU.h +71 -58
data/vendor/eigen/Eigen/src/LU/arch/InverseSize4.h +351 -0
data/vendor/eigen/Eigen/src/OrderingMethods/Amd.h +7 -17
data/vendor/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +297 -277
data/vendor/eigen/Eigen/src/OrderingMethods/Ordering.h +6 -10
data/vendor/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +1 -1
data/vendor/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +9 -7
data/vendor/eigen/Eigen/src/QR/ColPivHouseholderQR.h +41 -20
data/vendor/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +100 -27
data/vendor/eigen/Eigen/src/QR/FullPivHouseholderQR.h +59 -22
data/vendor/eigen/Eigen/src/QR/HouseholderQR.h +48 -23
data/vendor/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +25 -3
data/vendor/eigen/Eigen/src/SVD/BDCSVD.h +183 -63
data/vendor/eigen/Eigen/src/SVD/JacobiSVD.h +22 -14
data/vendor/eigen/Eigen/src/SVD/SVDBase.h +83 -22
data/vendor/eigen/Eigen/src/SVD/UpperBidiagonalization.h +3 -3
data/vendor/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +17 -9
data/vendor/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +12 -37
data/vendor/eigen/Eigen/src/SparseCore/AmbiVector.h +3 -2
data/vendor/eigen/Eigen/src/SparseCore/CompressedStorage.h +16 -0
data/vendor/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +6 -6
data/vendor/eigen/Eigen/src/SparseCore/SparseAssign.h +81 -27
data/vendor/eigen/Eigen/src/SparseCore/SparseBlock.h +25 -57
data/vendor/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +40 -11
data/vendor/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +11 -15
data/vendor/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +4 -2
data/vendor/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +30 -8
data/vendor/eigen/Eigen/src/SparseCore/SparseMatrix.h +126 -11
data/vendor/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +5 -12
data/vendor/eigen/Eigen/src/SparseCore/SparseProduct.h +13 -1
data/vendor/eigen/Eigen/src/SparseCore/SparseRef.h +7 -7
data/vendor/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +5 -2
data/vendor/eigen/Eigen/src/SparseCore/SparseUtil.h +8 -0
data/vendor/eigen/Eigen/src/SparseCore/SparseVector.h +1 -1
data/vendor/eigen/Eigen/src/SparseCore/SparseView.h +1 -0
data/vendor/eigen/Eigen/src/SparseLU/SparseLU.h +162 -12
data/vendor/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +1 -1
data/vendor/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +76 -2
data/vendor/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +2 -2
data/vendor/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +1 -1
data/vendor/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +1 -1
data/vendor/eigen/Eigen/src/SparseQR/SparseQR.h +19 -6
data/vendor/eigen/Eigen/src/StlSupport/StdDeque.h +2 -12
data/vendor/eigen/Eigen/src/StlSupport/StdList.h +2 -2
data/vendor/eigen/Eigen/src/StlSupport/StdVector.h +2 -2
data/vendor/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +6 -8
data/vendor/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +175 -39
data/vendor/eigen/Eigen/src/misc/lapacke.h +5 -4
data/vendor/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +28 -2
data/vendor/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +155 -11
data/vendor/eigen/Eigen/src/plugins/BlockMethods.h +626 -242
data/vendor/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.h +14 -0
data/vendor/eigen/Eigen/src/plugins/IndexedViewMethods.h +262 -0
data/vendor/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +4 -4
data/vendor/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +10 -0
data/vendor/eigen/Eigen/src/plugins/ReshapedMethods.h +149 -0
data/vendor/eigen/README.md +2 -0
data/vendor/eigen/bench/btl/README +1 -1
data/vendor/eigen/bench/tensors/README +6 -7
data/vendor/eigen/ci/README.md +56 -0
data/vendor/eigen/demos/mix_eigen_and_c/README +1 -1
data/vendor/eigen/unsupported/Eigen/CXX11/src/Tensor/README.md +213 -158
data/vendor/eigen/unsupported/README.txt +1 -1
data/vendor/tomotopy/README.kr.rst +21 -0
data/vendor/tomotopy/README.rst +20 -0
data/vendor/tomotopy/src/Labeling/FoRelevance.cpp +2 -2
data/vendor/tomotopy/src/Labeling/Phraser.hpp +1 -1
data/vendor/tomotopy/src/TopicModel/CTModel.hpp +2 -1
data/vendor/tomotopy/src/TopicModel/DMRModel.hpp +2 -1
data/vendor/tomotopy/src/TopicModel/DTModel.hpp +1 -1
data/vendor/tomotopy/src/TopicModel/GDMRModel.hpp +2 -2
data/vendor/tomotopy/src/TopicModel/HDP.h +1 -0
data/vendor/tomotopy/src/TopicModel/HDPModel.hpp +53 -2
data/vendor/tomotopy/src/TopicModel/HLDAModel.hpp +1 -1
data/vendor/tomotopy/src/TopicModel/HPAModel.hpp +1 -0
data/vendor/tomotopy/src/TopicModel/LDACVB0Model.hpp +2 -2
data/vendor/tomotopy/src/TopicModel/LDAModel.hpp +16 -5
data/vendor/tomotopy/src/TopicModel/LLDAModel.hpp +1 -0
data/vendor/tomotopy/src/TopicModel/MGLDAModel.hpp +1 -0
data/vendor/tomotopy/src/TopicModel/PLDAModel.hpp +1 -0
data/vendor/tomotopy/src/TopicModel/PT.h +3 -1
data/vendor/tomotopy/src/TopicModel/PTModel.hpp +31 -1
data/vendor/tomotopy/src/TopicModel/SLDAModel.hpp +2 -2
data/vendor/tomotopy/src/TopicModel/TopicModel.hpp +7 -5
data/vendor/tomotopy/src/Utils/EigenAddonOps.hpp +36 -1
data/vendor/tomotopy/src/Utils/exception.h +6 -0
data/vendor/tomotopy/src/Utils/sample.hpp +14 -12
data/vendor/tomotopy/src/Utils/sse_gamma.h +0 -3
metadata +60 -14
data/vendor/eigen/Eigen/CMakeLists.txt +0 -19
data/vendor/eigen/Eigen/src/Core/arch/CUDA/Half.h +0 -674
data/vendor/eigen/Eigen/src/Core/arch/CUDA/PacketMath.h +0 -333
data/vendor/eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +0 -1124
data/vendor/eigen/Eigen/src/Core/arch/CUDA/TypeCasting.h +0 -212
data/vendor/eigen/Eigen/src/Geometry/arch/Geometry_SSE.h +0 -161
data/vendor/eigen/Eigen/src/LU/arch/Inverse_SSE.h +0 -338

data/vendor/eigen/Eigen/src/Core/CoreEvaluators.h CHANGED Viewed

@@ -14,7 +14,7 @@
 #define EIGEN_COREEVALUATORS_H
 namespace Eigen {
 namespace internal {
 // This class returns the evaluator kind from the expression storage kind.
@@ -63,8 +63,8 @@ template< typename T,
 template< typename T,
           typename Kind   = typename evaluator_traits<typename T::NestedExpression>::Kind,
           typename Scalar = typename T::Scalar> struct unary_evaluator;
-// evaluator_traits<T> contains traits for evaluator<T>
+// evaluator_traits<T> contains traits for evaluator<T>
 template<typename T>
 struct evaluator_traits_base
@@ -90,7 +90,8 @@ template<typename T>
 struct evaluator : public unary_evaluator<T>
 {
   typedef unary_evaluator<T> Base;
-  EIGEN_DEVICE_FUNC explicit evaluator(const T& xpr) : Base(xpr) {}
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+  explicit evaluator(const T& xpr) : Base(xpr) {}
 };
@@ -99,21 +100,29 @@ template<typename T>
 struct evaluator<const T>
   : evaluator<T>
 {
-  EIGEN_DEVICE_FUNC
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
   explicit evaluator(const T& xpr) : evaluator<T>(xpr) {}
 };
 // ---------- base class for all evaluators ----------
 template<typename ExpressionType>
-struct evaluator_base : public noncopyable
+struct evaluator_base
 {
   // TODO that's not very nice to have to propagate all these traits. They are currently only needed to handle outer,inner indices.
   typedef traits<ExpressionType> ExpressionTraits;
   enum {
     Alignment = 0
   };
+  // noncopyable:
+  // Don't make this class inherit noncopyable as this kills EBO (Empty Base Optimization)
+  // and make complex evaluator much larger than then should do.
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE evaluator_base() {}
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ~evaluator_base() {}
+private:
+  EIGEN_DEVICE_FUNC evaluator_base(const evaluator_base&);
+  EIGEN_DEVICE_FUNC const evaluator_base& operator=(const evaluator_base&);
 };
 // -------------------- Matrix and Array --------------------
@@ -123,6 +132,33 @@ struct evaluator_base : public noncopyable
 // Here we directly specialize evaluator. This is not really a unary expression, and it is, by definition, dense,
 // so no need for more sophisticated dispatching.
+// this helper permits to completely eliminate m_outerStride if it is known at compiletime.
+template<typename Scalar,int OuterStride> class plainobjectbase_evaluator_data {
+public:
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+  plainobjectbase_evaluator_data(const Scalar* ptr, Index outerStride) : data(ptr)
+  {
+#ifndef EIGEN_INTERNAL_DEBUGGING
+    EIGEN_UNUSED_VARIABLE(outerStride);
+#endif
+    eigen_internal_assert(outerStride==OuterStride);
+  }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+  Index outerStride() const EIGEN_NOEXCEPT { return OuterStride; }
+  const Scalar *data;
+};
+template<typename Scalar> class plainobjectbase_evaluator_data<Scalar,Dynamic> {
+public:
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+  plainobjectbase_evaluator_data(const Scalar* ptr, Index outerStride) : data(ptr), m_outerStride(outerStride) {}
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+  Index outerStride() const { return m_outerStride; }
+  const Scalar *data;
+protected:
+  Index m_outerStride;
+};
 template<typename Derived>
 struct evaluator<PlainObjectBase<Derived> >
   : evaluator_base<Derived>
@@ -136,23 +172,28 @@ struct evaluator<PlainObjectBase<Derived> >
     IsVectorAtCompileTime = PlainObjectType::IsVectorAtCompileTime,
     RowsAtCompileTime = PlainObjectType::RowsAtCompileTime,
     ColsAtCompileTime = PlainObjectType::ColsAtCompileTime,
     CoeffReadCost = NumTraits<Scalar>::ReadCost,
     Flags = traits<Derived>::EvaluatorFlags,
     Alignment = traits<Derived>::Alignment
   };
-  EIGEN_DEVICE_FUNC evaluator()
-    : m_data(0),
-      m_outerStride(IsVectorAtCompileTime  ? 0
-                                           : int(IsRowMajor) ? ColsAtCompileTime
-                                           : RowsAtCompileTime)
+  enum {
+    // We do not need to know the outer stride for vectors
+    OuterStrideAtCompileTime = IsVectorAtCompileTime  ? 0
+                                                      : int(IsRowMajor) ? ColsAtCompileTime
+                                                                        : RowsAtCompileTime
+  };
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+  evaluator()
+    : m_d(0,OuterStrideAtCompileTime)
   {
     EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
   }
-  EIGEN_DEVICE_FUNC explicit evaluator(const PlainObjectType& m)
-    : m_data(m.data()), m_outerStride(IsVectorAtCompileTime ? 0 : m.outerStride())
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+  explicit evaluator(const PlainObjectType& m)
+    : m_d(m.data(),IsVectorAtCompileTime ? 0 : m.outerStride())
   {
     EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
   }
@@ -161,30 +202,30 @@ struct evaluator<PlainObjectBase<Derived> >
   CoeffReturnType coeff(Index row, Index col) const
   {
     if (IsRowMajor)
-      return m_data[row * m_outerStride.value() + col];
+      return m_d.data[row * m_d.outerStride() + col];
     else
-      return m_data[row + col * m_outerStride.value()];
+      return m_d.data[row + col * m_d.outerStride()];
   }
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
   CoeffReturnType coeff(Index index) const
   {
-    return m_data[index];
+    return m_d.data[index];
   }
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
   Scalar& coeffRef(Index row, Index col)
   {
     if (IsRowMajor)
-      return const_cast<Scalar*>(m_data)[row * m_outerStride.value() + col];
+      return const_cast<Scalar*>(m_d.data)[row * m_d.outerStride() + col];
     else
-      return const_cast<Scalar*>(m_data)[row + col * m_outerStride.value()];
+      return const_cast<Scalar*>(m_d.data)[row + col * m_d.outerStride()];
   }
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
   Scalar& coeffRef(Index index)
   {
-    return const_cast<Scalar*>(m_data)[index];
+    return const_cast<Scalar*>(m_d.data)[index];
   }
   template<int LoadMode, typename PacketType>
@@ -192,16 +233,16 @@ struct evaluator<PlainObjectBase<Derived> >
   PacketType packet(Index row, Index col) const
   {
     if (IsRowMajor)
-      return ploadt<PacketType, LoadMode>(m_data + row * m_outerStride.value() + col);
+      return ploadt<PacketType, LoadMode>(m_d.data + row * m_d.outerStride() + col);
     else
-      return ploadt<PacketType, LoadMode>(m_data + row + col * m_outerStride.value());
+      return ploadt<PacketType, LoadMode>(m_d.data + row + col * m_d.outerStride());
   }
   template<int LoadMode, typename PacketType>
   EIGEN_STRONG_INLINE
   PacketType packet(Index index) const
   {
-    return ploadt<PacketType, LoadMode>(m_data + index);
+    return ploadt<PacketType, LoadMode>(m_d.data + index);
   }
   template<int StoreMode,typename PacketType>
@@ -210,26 +251,22 @@ struct evaluator<PlainObjectBase<Derived> >
   {
     if (IsRowMajor)
       return pstoret<Scalar, PacketType, StoreMode>
-	            (const_cast<Scalar*>(m_data) + row * m_outerStride.value() + col, x);
+	            (const_cast<Scalar*>(m_d.data) + row * m_d.outerStride() + col, x);
     else
       return pstoret<Scalar, PacketType, StoreMode>
-                    (const_cast<Scalar*>(m_data) + row + col * m_outerStride.value(), x);
+                    (const_cast<Scalar*>(m_d.data) + row + col * m_d.outerStride(), x);
   }
   template<int StoreMode, typename PacketType>
   EIGEN_STRONG_INLINE
   void writePacket(Index index, const PacketType& x)
   {
-    return pstoret<Scalar, PacketType, StoreMode>(const_cast<Scalar*>(m_data) + index, x);
+    return pstoret<Scalar, PacketType, StoreMode>(const_cast<Scalar*>(m_d.data) + index, x);
   }
 protected:
-  const Scalar *m_data;
-  // We do not need to know the outer stride for vectors
-  variable_if_dynamic<Index, IsVectorAtCompileTime  ? 0
-                                                    : int(IsRowMajor) ? ColsAtCompileTime
-                                                    : RowsAtCompileTime> m_outerStride;
+  plainobjectbase_evaluator_data<Scalar,OuterStrideAtCompileTime> m_d;
 };
 template<typename Scalar, int Rows, int Cols, int Options, int MaxRows, int MaxCols>
@@ -237,11 +274,13 @@ struct evaluator<Matrix<Scalar, Rows, Cols, Options, MaxRows, MaxCols> >
   : evaluator<PlainObjectBase<Matrix<Scalar, Rows, Cols, Options, MaxRows, MaxCols> > >
 {
   typedef Matrix<Scalar, Rows, Cols, Options, MaxRows, MaxCols> XprType;
-  EIGEN_DEVICE_FUNC evaluator() {}
-  EIGEN_DEVICE_FUNC explicit evaluator(const XprType& m)
-    : evaluator<PlainObjectBase<XprType> >(m)
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+  evaluator() {}
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+  explicit evaluator(const XprType& m)
+    : evaluator<PlainObjectBase<XprType> >(m)
   { }
 };
@@ -251,10 +290,12 @@ struct evaluator<Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols> >
 {
   typedef Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols> XprType;
-  EIGEN_DEVICE_FUNC evaluator() {}
-  EIGEN_DEVICE_FUNC explicit evaluator(const XprType& m)
-    : evaluator<PlainObjectBase<XprType> >(m)
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+  evaluator() {}
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+  explicit evaluator(const XprType& m)
+    : evaluator<PlainObjectBase<XprType> >(m)
   { }
 };
@@ -265,14 +306,15 @@ struct unary_evaluator<Transpose<ArgType>, IndexBased>
   : evaluator_base<Transpose<ArgType> >
 {
   typedef Transpose<ArgType> XprType;
   enum {
-    CoeffReadCost = evaluator<ArgType>::CoeffReadCost,
+    CoeffReadCost = evaluator<ArgType>::CoeffReadCost,
     Flags = evaluator<ArgType>::Flags ^ RowMajorBit,
     Alignment = evaluator<ArgType>::Alignment
   };
-  EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& t) : m_argImpl(t.nestedExpression()) {}
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+  explicit unary_evaluator(const XprType& t) : m_argImpl(t.nestedExpression()) {}
   typedef typename XprType::Scalar Scalar;
   typedef typename XprType::CoeffReturnType CoeffReturnType;
@@ -457,10 +499,10 @@ struct evaluator<CwiseNullaryOp<NullaryOp,PlainObjectType> >
 {
   typedef CwiseNullaryOp<NullaryOp,PlainObjectType> XprType;
   typedef typename internal::remove_all<PlainObjectType>::type PlainObjectTypeCleaned;
   enum {
     CoeffReadCost = internal::functor_traits<NullaryOp>::Cost,
     Flags = (evaluator<PlainObjectTypeCleaned>::Flags
           &  (  HereditaryBits
               | (functor_has_linear_access<NullaryOp>::ret  ? LinearAccessBit : 0)
@@ -517,19 +559,17 @@ struct unary_evaluator<CwiseUnaryOp<UnaryOp, ArgType>, IndexBased >
   : evaluator_base<CwiseUnaryOp<UnaryOp, ArgType> >
 {
   typedef CwiseUnaryOp<UnaryOp, ArgType> XprType;
   enum {
-    CoeffReadCost = evaluator<ArgType>::CoeffReadCost + functor_traits<UnaryOp>::Cost,
+    CoeffReadCost = int(evaluator<ArgType>::CoeffReadCost) + int(functor_traits<UnaryOp>::Cost),
     Flags = evaluator<ArgType>::Flags
           & (HereditaryBits | LinearAccessBit | (functor_traits<UnaryOp>::PacketAccess ? PacketAccessBit : 0)),
     Alignment = evaluator<ArgType>::Alignment
   };
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-  explicit unary_evaluator(const XprType& op)
-    : m_functor(op.functor()),
-      m_argImpl(op.nestedExpression())
+  explicit unary_evaluator(const XprType& op) : m_d(op)
   {
     EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits<UnaryOp>::Cost);
     EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
@@ -540,32 +580,43 @@ struct unary_evaluator<CwiseUnaryOp<UnaryOp, ArgType>, IndexBased >
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
   CoeffReturnType coeff(Index row, Index col) const
   {
-    return m_functor(m_argImpl.coeff(row, col));
+    return m_d.func()(m_d.argImpl.coeff(row, col));
   }
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
   CoeffReturnType coeff(Index index) const
   {
-    return m_functor(m_argImpl.coeff(index));
+    return m_d.func()(m_d.argImpl.coeff(index));
   }
   template<int LoadMode, typename PacketType>
   EIGEN_STRONG_INLINE
   PacketType packet(Index row, Index col) const
   {
-    return m_functor.packetOp(m_argImpl.template packet<LoadMode, PacketType>(row, col));
+    return m_d.func().packetOp(m_d.argImpl.template packet<LoadMode, PacketType>(row, col));
   }
   template<int LoadMode, typename PacketType>
   EIGEN_STRONG_INLINE
   PacketType packet(Index index) const
   {
-    return m_functor.packetOp(m_argImpl.template packet<LoadMode, PacketType>(index));
+    return m_d.func().packetOp(m_d.argImpl.template packet<LoadMode, PacketType>(index));
   }
 protected:
-  const UnaryOp m_functor;
-  evaluator<ArgType> m_argImpl;
+  // this helper permits to completely eliminate the functor if it is empty
+  struct Data
+  {
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+    Data(const XprType& xpr) : op(xpr.functor()), argImpl(xpr.nestedExpression()) {}
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+    const UnaryOp& func() const { return op; }
+    UnaryOp op;
+    evaluator<ArgType> argImpl;
+  };
+  Data m_d;
 };
 // -------------------- CwiseTernaryOp --------------------
@@ -577,7 +628,7 @@ struct evaluator<CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3> >
 {
   typedef CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3> XprType;
   typedef ternary_evaluator<CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3> > Base;
   EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) : Base(xpr) {}
 };
@@ -586,10 +637,10 @@ struct ternary_evaluator<CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3>, IndexBased
   : evaluator_base<CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3> >
 {
   typedef CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3> XprType;
   enum {
-    CoeffReadCost = evaluator<Arg1>::CoeffReadCost + evaluator<Arg2>::CoeffReadCost + evaluator<Arg3>::CoeffReadCost + functor_traits<TernaryOp>::Cost,
+    CoeffReadCost = int(evaluator<Arg1>::CoeffReadCost) + int(evaluator<Arg2>::CoeffReadCost) + int(evaluator<Arg3>::CoeffReadCost) + int(functor_traits<TernaryOp>::Cost),
     Arg1Flags = evaluator<Arg1>::Flags,
     Arg2Flags = evaluator<Arg2>::Flags,
     Arg3Flags = evaluator<Arg3>::Flags,
@@ -609,11 +660,7 @@ struct ternary_evaluator<CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3>, IndexBased
         evaluator<Arg3>::Alignment)
   };
-  EIGEN_DEVICE_FUNC explicit ternary_evaluator(const XprType& xpr)
-    : m_functor(xpr.functor()),
-      m_arg1Impl(xpr.arg1()),
-      m_arg2Impl(xpr.arg2()),
-      m_arg3Impl(xpr.arg3())
+  EIGEN_DEVICE_FUNC explicit ternary_evaluator(const XprType& xpr) : m_d(xpr)
   {
     EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits<TernaryOp>::Cost);
     EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
@@ -624,38 +671,48 @@ struct ternary_evaluator<CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3>, IndexBased
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
   CoeffReturnType coeff(Index row, Index col) const
   {
-    return m_functor(m_arg1Impl.coeff(row, col), m_arg2Impl.coeff(row, col), m_arg3Impl.coeff(row, col));
+    return m_d.func()(m_d.arg1Impl.coeff(row, col), m_d.arg2Impl.coeff(row, col), m_d.arg3Impl.coeff(row, col));
   }
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
   CoeffReturnType coeff(Index index) const
   {
-    return m_functor(m_arg1Impl.coeff(index), m_arg2Impl.coeff(index), m_arg3Impl.coeff(index));
+    return m_d.func()(m_d.arg1Impl.coeff(index), m_d.arg2Impl.coeff(index), m_d.arg3Impl.coeff(index));
   }
   template<int LoadMode, typename PacketType>
   EIGEN_STRONG_INLINE
   PacketType packet(Index row, Index col) const
   {
-    return m_functor.packetOp(m_arg1Impl.template packet<LoadMode,PacketType>(row, col),
-                              m_arg2Impl.template packet<LoadMode,PacketType>(row, col),
-                              m_arg3Impl.template packet<LoadMode,PacketType>(row, col));
+    return m_d.func().packetOp(m_d.arg1Impl.template packet<LoadMode,PacketType>(row, col),
+                               m_d.arg2Impl.template packet<LoadMode,PacketType>(row, col),
+                               m_d.arg3Impl.template packet<LoadMode,PacketType>(row, col));
   }
   template<int LoadMode, typename PacketType>
   EIGEN_STRONG_INLINE
   PacketType packet(Index index) const
   {
-    return m_functor.packetOp(m_arg1Impl.template packet<LoadMode,PacketType>(index),
-                              m_arg2Impl.template packet<LoadMode,PacketType>(index),
-                              m_arg3Impl.template packet<LoadMode,PacketType>(index));
+    return m_d.func().packetOp(m_d.arg1Impl.template packet<LoadMode,PacketType>(index),
+                               m_d.arg2Impl.template packet<LoadMode,PacketType>(index),
+                               m_d.arg3Impl.template packet<LoadMode,PacketType>(index));
   }
 protected:
-  const TernaryOp m_functor;
-  evaluator<Arg1> m_arg1Impl;
-  evaluator<Arg2> m_arg2Impl;
-  evaluator<Arg3> m_arg3Impl;
+  // this helper permits to completely eliminate the functor if it is empty
+  struct Data
+  {
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+    Data(const XprType& xpr) : op(xpr.functor()), arg1Impl(xpr.arg1()), arg2Impl(xpr.arg2()), arg3Impl(xpr.arg3()) {}
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+    const TernaryOp& func() const { return op; }
+    TernaryOp op;
+    evaluator<Arg1> arg1Impl;
+    evaluator<Arg2> arg2Impl;
+    evaluator<Arg3> arg3Impl;
+  };
+  Data m_d;
 };
 // -------------------- CwiseBinaryOp --------------------
@@ -667,8 +724,9 @@ struct evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >
 {
   typedef CwiseBinaryOp<BinaryOp, Lhs, Rhs> XprType;
   typedef binary_evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs> > Base;
-  EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) : Base(xpr) {}
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+  explicit evaluator(const XprType& xpr) : Base(xpr) {}
 };
 template<typename BinaryOp, typename Lhs, typename Rhs>
@@ -676,10 +734,10 @@ struct binary_evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs>, IndexBased, IndexBase
   : evaluator_base<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >
 {
   typedef CwiseBinaryOp<BinaryOp, Lhs, Rhs> XprType;
   enum {
-    CoeffReadCost = evaluator<Lhs>::CoeffReadCost + evaluator<Rhs>::CoeffReadCost + functor_traits<BinaryOp>::Cost,
+    CoeffReadCost = int(evaluator<Lhs>::CoeffReadCost) + int(evaluator<Rhs>::CoeffReadCost) + int(functor_traits<BinaryOp>::Cost),
     LhsFlags = evaluator<Lhs>::Flags,
     RhsFlags = evaluator<Rhs>::Flags,
     SameType = is_same<typename Lhs::Scalar,typename Rhs::Scalar>::value,
@@ -696,10 +754,8 @@ struct binary_evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs>, IndexBased, IndexBase
     Alignment = EIGEN_PLAIN_ENUM_MIN(evaluator<Lhs>::Alignment,evaluator<Rhs>::Alignment)
   };
-  EIGEN_DEVICE_FUNC explicit binary_evaluator(const XprType& xpr)
-    : m_functor(xpr.functor()),
-      m_lhsImpl(xpr.lhs()),
-      m_rhsImpl(xpr.rhs())
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+  explicit binary_evaluator(const XprType& xpr) : m_d(xpr)
   {
     EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits<BinaryOp>::Cost);
     EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
@@ -710,35 +766,46 @@ struct binary_evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs>, IndexBased, IndexBase
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
   CoeffReturnType coeff(Index row, Index col) const
   {
-    return m_functor(m_lhsImpl.coeff(row, col), m_rhsImpl.coeff(row, col));
+    return m_d.func()(m_d.lhsImpl.coeff(row, col), m_d.rhsImpl.coeff(row, col));
   }
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
   CoeffReturnType coeff(Index index) const
   {
-    return m_functor(m_lhsImpl.coeff(index), m_rhsImpl.coeff(index));
+    return m_d.func()(m_d.lhsImpl.coeff(index), m_d.rhsImpl.coeff(index));
   }
   template<int LoadMode, typename PacketType>
   EIGEN_STRONG_INLINE
   PacketType packet(Index row, Index col) const
   {
-    return m_functor.packetOp(m_lhsImpl.template packet<LoadMode,PacketType>(row, col),
-                              m_rhsImpl.template packet<LoadMode,PacketType>(row, col));
+    return m_d.func().packetOp(m_d.lhsImpl.template packet<LoadMode,PacketType>(row, col),
+                               m_d.rhsImpl.template packet<LoadMode,PacketType>(row, col));
   }
   template<int LoadMode, typename PacketType>
   EIGEN_STRONG_INLINE
   PacketType packet(Index index) const
   {
-    return m_functor.packetOp(m_lhsImpl.template packet<LoadMode,PacketType>(index),
-                              m_rhsImpl.template packet<LoadMode,PacketType>(index));
+    return m_d.func().packetOp(m_d.lhsImpl.template packet<LoadMode,PacketType>(index),
+                               m_d.rhsImpl.template packet<LoadMode,PacketType>(index));
   }
 protected:
-  const BinaryOp m_functor;
-  evaluator<Lhs> m_lhsImpl;
-  evaluator<Rhs> m_rhsImpl;
+  // this helper permits to completely eliminate the functor if it is empty
+  struct Data
+  {
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+    Data(const XprType& xpr) : op(xpr.functor()), lhsImpl(xpr.lhs()), rhsImpl(xpr.rhs()) {}
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+    const BinaryOp& func() const { return op; }
+    BinaryOp op;
+    evaluator<Lhs> lhsImpl;
+    evaluator<Rhs> rhsImpl;
+  };
+  Data m_d;
 };
 // -------------------- CwiseUnaryView --------------------
@@ -748,18 +815,16 @@ struct unary_evaluator<CwiseUnaryView<UnaryOp, ArgType>, IndexBased>
   : evaluator_base<CwiseUnaryView<UnaryOp, ArgType> >
 {
   typedef CwiseUnaryView<UnaryOp, ArgType> XprType;
   enum {
-    CoeffReadCost = evaluator<ArgType>::CoeffReadCost + functor_traits<UnaryOp>::Cost,
+    CoeffReadCost = int(evaluator<ArgType>::CoeffReadCost) + int(functor_traits<UnaryOp>::Cost),
     Flags = (evaluator<ArgType>::Flags & (HereditaryBits | LinearAccessBit | DirectAccessBit)),
     Alignment = 0 // FIXME it is not very clear why alignment is necessarily lost...
   };
-  EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& op)
-    : m_unaryOp(op.functor()),
-      m_argImpl(op.nestedExpression())
+  EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& op) : m_d(op)
   {
     EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits<UnaryOp>::Cost);
     EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
@@ -771,30 +836,41 @@ struct unary_evaluator<CwiseUnaryView<UnaryOp, ArgType>, IndexBased>
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
   CoeffReturnType coeff(Index row, Index col) const
   {
-    return m_unaryOp(m_argImpl.coeff(row, col));
+    return m_d.func()(m_d.argImpl.coeff(row, col));
   }
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
   CoeffReturnType coeff(Index index) const
   {
-    return m_unaryOp(m_argImpl.coeff(index));
+    return m_d.func()(m_d.argImpl.coeff(index));
   }
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
   Scalar& coeffRef(Index row, Index col)
   {
-    return m_unaryOp(m_argImpl.coeffRef(row, col));
+    return m_d.func()(m_d.argImpl.coeffRef(row, col));
   }
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
   Scalar& coeffRef(Index index)
   {
-    return m_unaryOp(m_argImpl.coeffRef(index));
+    return m_d.func()(m_d.argImpl.coeffRef(index));
   }
 protected:
-  const UnaryOp m_unaryOp;
-  evaluator<ArgType> m_argImpl;
+  // this helper permits to completely eliminate the functor if it is empty
+  struct Data
+  {
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+    Data(const XprType& xpr) : op(xpr.functor()), argImpl(xpr.nestedExpression()) {}
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+    const UnaryOp& func() const { return op; }
+    UnaryOp op;
+    evaluator<ArgType> argImpl;
+  };
+  Data m_d;
 };
 // -------------------- Map --------------------
@@ -811,14 +887,15 @@ struct mapbase_evaluator : evaluator_base<Derived>
   typedef typename XprType::PointerType PointerType;
   typedef typename XprType::Scalar Scalar;
   typedef typename XprType::CoeffReturnType CoeffReturnType;
   enum {
     IsRowMajor = XprType::RowsAtCompileTime,
     ColsAtCompileTime = XprType::ColsAtCompileTime,
     CoeffReadCost = NumTraits<Scalar>::ReadCost
   };
-  EIGEN_DEVICE_FUNC explicit mapbase_evaluator(const XprType& map)
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+  explicit mapbase_evaluator(const XprType& map)
     : m_data(const_cast<PointerType>(map.data())),
       m_innerStride(map.innerStride()),
       m_outerStride(map.outerStride())
@@ -882,17 +959,21 @@ struct mapbase_evaluator : evaluator_base<Derived>
     internal::pstoret<Scalar, PacketType, StoreMode>(m_data + index * m_innerStride.value(), x);
   }
 protected:
-  EIGEN_DEVICE_FUNC
-  inline Index rowStride() const { return XprType::IsRowMajor ? m_outerStride.value() : m_innerStride.value(); }
-  EIGEN_DEVICE_FUNC
-  inline Index colStride() const { return XprType::IsRowMajor ? m_innerStride.value() : m_outerStride.value(); }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+  Index rowStride() const EIGEN_NOEXCEPT {
+    return XprType::IsRowMajor ? m_outerStride.value() : m_innerStride.value();
+  }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+  Index colStride() const EIGEN_NOEXCEPT {
+     return XprType::IsRowMajor ? m_innerStride.value() : m_outerStride.value();
+  }
   PointerType m_data;
   const internal::variable_if_dynamic<Index, XprType::InnerStrideAtCompileTime> m_innerStride;
   const internal::variable_if_dynamic<Index, XprType::OuterStrideAtCompileTime> m_outerStride;
 };
-template<typename PlainObjectType, int MapOptions, typename StrideType>
+template<typename PlainObjectType, int MapOptions, typename StrideType>
 struct evaluator<Map<PlainObjectType, MapOptions, StrideType> >
   : public mapbase_evaluator<Map<PlainObjectType, MapOptions, StrideType>, PlainObjectType>
 {
@@ -900,7 +981,7 @@ struct evaluator<Map<PlainObjectType, MapOptions, StrideType> >
   typedef typename XprType::Scalar Scalar;
   // TODO: should check for smaller packet types once we can handle multi-sized packet types
   typedef typename packet_traits<Scalar>::type PacketScalar;
   enum {
     InnerStrideAtCompileTime = StrideType::InnerStrideAtCompileTime == 0
                              ? int(PlainObjectType::InnerStrideAtCompileTime)
@@ -912,34 +993,35 @@ struct evaluator<Map<PlainObjectType, MapOptions, StrideType> >
     HasNoOuterStride = StrideType::OuterStrideAtCompileTime == 0,
     HasNoStride = HasNoInnerStride && HasNoOuterStride,
     IsDynamicSize = PlainObjectType::SizeAtCompileTime==Dynamic,
     PacketAccessMask = bool(HasNoInnerStride) ? ~int(0) : ~int(PacketAccessBit),
     LinearAccessMask = bool(HasNoStride) || bool(PlainObjectType::IsVectorAtCompileTime) ? ~int(0) : ~int(LinearAccessBit),
     Flags = int( evaluator<PlainObjectType>::Flags) & (LinearAccessMask&PacketAccessMask),
     Alignment = int(MapOptions)&int(AlignedMask)
   };
   EIGEN_DEVICE_FUNC explicit evaluator(const XprType& map)
-    : mapbase_evaluator<XprType, PlainObjectType>(map)
+    : mapbase_evaluator<XprType, PlainObjectType>(map)
   { }
 };
 // -------------------- Ref --------------------
-template<typename PlainObjectType, int RefOptions, typename StrideType>
+template<typename PlainObjectType, int RefOptions, typename StrideType>
 struct evaluator<Ref<PlainObjectType, RefOptions, StrideType> >
   : public mapbase_evaluator<Ref<PlainObjectType, RefOptions, StrideType>, PlainObjectType>
 {
   typedef Ref<PlainObjectType, RefOptions, StrideType> XprType;
   enum {
     Flags = evaluator<Map<PlainObjectType, RefOptions, StrideType> >::Flags,
     Alignment = evaluator<Map<PlainObjectType, RefOptions, StrideType> >::Alignment
   };
-  EIGEN_DEVICE_FUNC explicit evaluator(const XprType& ref)
-    : mapbase_evaluator<XprType, PlainObjectType>(ref)
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+  explicit evaluator(const XprType& ref)
+    : mapbase_evaluator<XprType, PlainObjectType>(ref)
   { }
 };
@@ -947,8 +1029,8 @@ struct evaluator<Ref<PlainObjectType, RefOptions, StrideType> >
 template<typename ArgType, int BlockRows, int BlockCols, bool InnerPanel,
          bool HasDirectAccess = internal::has_direct_access<ArgType>::ret> struct block_evaluator;
-template<typename ArgType, int BlockRows, int BlockCols, bool InnerPanel>
+template<typename ArgType, int BlockRows, int BlockCols, bool InnerPanel>
 struct evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel> >
   : block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel>
 {
@@ -956,15 +1038,15 @@ struct evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel> >
   typedef typename XprType::Scalar Scalar;
   // TODO: should check for smaller packet types once we can handle multi-sized packet types
   typedef typename packet_traits<Scalar>::type PacketScalar;
   enum {
     CoeffReadCost = evaluator<ArgType>::CoeffReadCost,
     RowsAtCompileTime = traits<XprType>::RowsAtCompileTime,
     ColsAtCompileTime = traits<XprType>::ColsAtCompileTime,
     MaxRowsAtCompileTime = traits<XprType>::MaxRowsAtCompileTime,
     MaxColsAtCompileTime = traits<XprType>::MaxColsAtCompileTime,
     ArgTypeIsRowMajor = (int(evaluator<ArgType>::Flags)&RowMajorBit) != 0,
     IsRowMajor = (MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1) ? 1
                : (MaxColsAtCompileTime==1 && MaxRowsAtCompileTime!=1) ? 0
@@ -978,14 +1060,14 @@ struct evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel> >
                              ? int(outer_stride_at_compile_time<ArgType>::ret)
                              : int(inner_stride_at_compile_time<ArgType>::ret),
     MaskPacketAccessBit = (InnerStrideAtCompileTime == 1 || HasSameStorageOrderAsArgType) ? PacketAccessBit : 0,
-    FlagsLinearAccessBit = (RowsAtCompileTime == 1 || ColsAtCompileTime == 1 || (InnerPanel && (evaluator<ArgType>::Flags&LinearAccessBit))) ? LinearAccessBit : 0,
+    FlagsLinearAccessBit = (RowsAtCompileTime == 1 || ColsAtCompileTime == 1 || (InnerPanel && (evaluator<ArgType>::Flags&LinearAccessBit))) ? LinearAccessBit : 0,
     FlagsRowMajorBit = XprType::Flags&RowMajorBit,
     Flags0 = evaluator<ArgType>::Flags & ( (HereditaryBits & ~RowMajorBit) |
                                            DirectAccessBit |
                                            MaskPacketAccessBit),
     Flags = Flags0 | FlagsLinearAccessBit | FlagsRowMajorBit,
     PacketAlignment = unpacket_traits<PacketScalar>::alignment,
     Alignment0 = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic)
                              && (OuterStrideAtCompileTime!=0)
@@ -993,7 +1075,8 @@ struct evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel> >
     Alignment = EIGEN_PLAIN_ENUM_MIN(evaluator<ArgType>::Alignment, Alignment0)
   };
   typedef block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel> block_evaluator_type;
-  EIGEN_DEVICE_FUNC explicit evaluator(const XprType& block) : block_evaluator_type(block)
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+  explicit evaluator(const XprType& block) : block_evaluator_type(block)
   {
     EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
   }
@@ -1006,8 +1089,9 @@ struct block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel, /*HasDirectAcc
 {
   typedef Block<ArgType, BlockRows, BlockCols, InnerPanel> XprType;
-  EIGEN_DEVICE_FUNC explicit block_evaluator(const XprType& block)
-    : unary_evaluator<XprType>(block)
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+  explicit block_evaluator(const XprType& block)
+    : unary_evaluator<XprType>(block)
   {}
 };
@@ -1017,79 +1101,74 @@ struct unary_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel>, IndexBa
 {
   typedef Block<ArgType, BlockRows, BlockCols, InnerPanel> XprType;
-  EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& block)
-    : m_argImpl(block.nestedExpression()),
-      m_startRow(block.startRow()),
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+  explicit unary_evaluator(const XprType& block)
+    : m_argImpl(block.nestedExpression()),
+      m_startRow(block.startRow()),
       m_startCol(block.startCol()),
-      m_linear_offset(InnerPanel?(XprType::IsRowMajor ? block.startRow()*block.cols() : block.startCol()*block.rows()):0)
+      m_linear_offset(ForwardLinearAccess?(ArgType::IsRowMajor ? block.startRow()*block.nestedExpression().cols() + block.startCol() : block.startCol()*block.nestedExpression().rows() + block.startRow()):0)
   { }
   typedef typename XprType::Scalar Scalar;
   typedef typename XprType::CoeffReturnType CoeffReturnType;
   enum {
     RowsAtCompileTime = XprType::RowsAtCompileTime,
-    ForwardLinearAccess = InnerPanel && bool(evaluator<ArgType>::Flags&LinearAccessBit)
+    ForwardLinearAccess = (InnerPanel || int(XprType::IsRowMajor)==int(ArgType::IsRowMajor)) && bool(evaluator<ArgType>::Flags&LinearAccessBit)
   };
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
   CoeffReturnType coeff(Index row, Index col) const
-  {
-    return m_argImpl.coeff(m_startRow.value() + row, m_startCol.value() + col);
+  {
+    return m_argImpl.coeff(m_startRow.value() + row, m_startCol.value() + col);
   }
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
   CoeffReturnType coeff(Index index) const
-  {
-    if (ForwardLinearAccess)
-      return m_argImpl.coeff(m_linear_offset.value() + index);
-    else
-      return coeff(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0);
+  {
+    return linear_coeff_impl(index, bool_constant<ForwardLinearAccess>());
   }
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
   Scalar& coeffRef(Index row, Index col)
-  {
-    return m_argImpl.coeffRef(m_startRow.value() + row, m_startCol.value() + col);
+  {
+    return m_argImpl.coeffRef(m_startRow.value() + row, m_startCol.value() + col);
   }
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
   Scalar& coeffRef(Index index)
-  {
-    if (ForwardLinearAccess)
-      return m_argImpl.coeffRef(m_linear_offset.value() + index);
-    else
-      return coeffRef(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0);
+  {
+    return linear_coeffRef_impl(index, bool_constant<ForwardLinearAccess>());
   }
   template<int LoadMode, typename PacketType>
   EIGEN_STRONG_INLINE
-  PacketType packet(Index row, Index col) const
-  {
-    return m_argImpl.template packet<LoadMode,PacketType>(m_startRow.value() + row, m_startCol.value() + col);
+  PacketType packet(Index row, Index col) const
+  {
+    return m_argImpl.template packet<LoadMode,PacketType>(m_startRow.value() + row, m_startCol.value() + col);
   }
   template<int LoadMode, typename PacketType>
   EIGEN_STRONG_INLINE
-  PacketType packet(Index index) const
-  {
+  PacketType packet(Index index) const
+  {
     if (ForwardLinearAccess)
       return m_argImpl.template packet<LoadMode,PacketType>(m_linear_offset.value() + index);
     else
       return packet<LoadMode,PacketType>(RowsAtCompileTime == 1 ? 0 : index,
                                          RowsAtCompileTime == 1 ? index : 0);
   }
   template<int StoreMode, typename PacketType>
   EIGEN_STRONG_INLINE
-  void writePacket(Index row, Index col, const PacketType& x)
+  void writePacket(Index row, Index col, const PacketType& x)
   {
-    return m_argImpl.template writePacket<StoreMode,PacketType>(m_startRow.value() + row, m_startCol.value() + col, x);
+    return m_argImpl.template writePacket<StoreMode,PacketType>(m_startRow.value() + row, m_startCol.value() + col, x);
   }
   template<int StoreMode, typename PacketType>
   EIGEN_STRONG_INLINE
-  void writePacket(Index index, const PacketType& x)
+  void writePacket(Index index, const PacketType& x)
   {
     if (ForwardLinearAccess)
       return m_argImpl.template writePacket<StoreMode,PacketType>(m_linear_offset.value() + index, x);
@@ -1098,18 +1177,40 @@ struct unary_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel>, IndexBa
                                               RowsAtCompileTime == 1 ? index : 0,
                                               x);
   }
 protected:
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+  CoeffReturnType linear_coeff_impl(Index index, internal::true_type /* ForwardLinearAccess */) const
+  {
+    return m_argImpl.coeff(m_linear_offset.value() + index);
+  }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+  CoeffReturnType linear_coeff_impl(Index index, internal::false_type /* not ForwardLinearAccess */) const
+  {
+    return coeff(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0);
+  }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+  Scalar& linear_coeffRef_impl(Index index, internal::true_type /* ForwardLinearAccess */)
+  {
+    return m_argImpl.coeffRef(m_linear_offset.value() + index);
+  }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+  Scalar& linear_coeffRef_impl(Index index, internal::false_type /* not ForwardLinearAccess */)
+  {
+    return coeffRef(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0);
+  }
   evaluator<ArgType> m_argImpl;
   const variable_if_dynamic<Index, (ArgType::RowsAtCompileTime == 1 && BlockRows==1) ? 0 : Dynamic> m_startRow;
   const variable_if_dynamic<Index, (ArgType::ColsAtCompileTime == 1 && BlockCols==1) ? 0 : Dynamic> m_startCol;
-  const variable_if_dynamic<Index, InnerPanel ? Dynamic : 0> m_linear_offset;
+  const variable_if_dynamic<Index, ForwardLinearAccess ? Dynamic : 0> m_linear_offset;
 };
-// TODO: This evaluator does not actually use the child evaluator;
+// TODO: This evaluator does not actually use the child evaluator;
 // all action is via the data() as returned by the Block expression.
-template<typename ArgType, int BlockRows, int BlockCols, bool InnerPanel>
+template<typename ArgType, int BlockRows, int BlockCols, bool InnerPanel>
 struct block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel, /* HasDirectAccess */ true>
   : mapbase_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel>,
                       typename Block<ArgType, BlockRows, BlockCols, InnerPanel>::PlainObject>
@@ -1117,8 +1218,9 @@ struct block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel, /* HasDirectAc
   typedef Block<ArgType, BlockRows, BlockCols, InnerPanel> XprType;
   typedef typename XprType::Scalar Scalar;
-  EIGEN_DEVICE_FUNC explicit block_evaluator(const XprType& block)
-    : mapbase_evaluator<XprType, typename XprType::PlainObject>(block)
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+  explicit block_evaluator(const XprType& block)
+    : mapbase_evaluator<XprType, typename XprType::PlainObject>(block)
   {
     // TODO: for the 3.3 release, this should be turned to an internal assertion, but let's keep it as is for the beta lifetime
     eigen_assert(((internal::UIntPtr(block.data()) % EIGEN_PLAIN_ENUM_MAX(1,evaluator<XprType>::Alignment)) == 0) && "data is not aligned");
@@ -1141,18 +1243,19 @@ struct evaluator<Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> >
                                          evaluator<ElseMatrixType>::CoeffReadCost),
     Flags = (unsigned int)evaluator<ThenMatrixType>::Flags & evaluator<ElseMatrixType>::Flags & HereditaryBits,
     Alignment = EIGEN_PLAIN_ENUM_MIN(evaluator<ThenMatrixType>::Alignment, evaluator<ElseMatrixType>::Alignment)
   };
-  EIGEN_DEVICE_FUNC explicit evaluator(const XprType& select)
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+  explicit evaluator(const XprType& select)
     : m_conditionImpl(select.conditionMatrix()),
       m_thenImpl(select.thenMatrix()),
       m_elseImpl(select.elseMatrix())
   {
     EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
   }
   typedef typename XprType::CoeffReturnType CoeffReturnType;
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
@@ -1172,7 +1275,7 @@ struct evaluator<Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> >
     else
       return m_elseImpl.coeff(index);
   }
 protected:
   evaluator<ConditionMatrixType> m_conditionImpl;
   evaluator<ThenMatrixType> m_thenImpl;
@@ -1182,7 +1285,7 @@ protected:
 // -------------------- Replicate --------------------
-template<typename ArgType, int RowFactor, int ColFactor>
+template<typename ArgType, int RowFactor, int ColFactor>
 struct unary_evaluator<Replicate<ArgType, RowFactor, ColFactor> >
   : evaluator_base<Replicate<ArgType, RowFactor, ColFactor> >
 {
@@ -1193,22 +1296,23 @@ struct unary_evaluator<Replicate<ArgType, RowFactor, ColFactor> >
   };
   typedef typename internal::nested_eval<ArgType,Factor>::type ArgTypeNested;
   typedef typename internal::remove_all<ArgTypeNested>::type ArgTypeNestedCleaned;
   enum {
     CoeffReadCost = evaluator<ArgTypeNestedCleaned>::CoeffReadCost,
     LinearAccessMask = XprType::IsVectorAtCompileTime ? LinearAccessBit : 0,
     Flags = (evaluator<ArgTypeNestedCleaned>::Flags & (HereditaryBits|LinearAccessMask) & ~RowMajorBit) | (traits<XprType>::Flags & RowMajorBit),
     Alignment = evaluator<ArgTypeNestedCleaned>::Alignment
   };
-  EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& replicate)
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+  explicit unary_evaluator(const XprType& replicate)
     : m_arg(replicate.nestedExpression()),
       m_argImpl(m_arg),
       m_rows(replicate.nestedExpression().rows()),
       m_cols(replicate.nestedExpression().cols())
   {}
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
   CoeffReturnType coeff(Index row, Index col) const
   {
@@ -1219,10 +1323,10 @@ struct unary_evaluator<Replicate<ArgType, RowFactor, ColFactor> >
     const Index actual_col = internal::traits<XprType>::ColsAtCompileTime==1 ? 0
                            : ColFactor==1 ? col
                            : col % m_cols.value();
     return m_argImpl.coeff(actual_row, actual_col);
   }
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
   CoeffReturnType coeff(Index index) const
   {
@@ -1230,7 +1334,7 @@ struct unary_evaluator<Replicate<ArgType, RowFactor, ColFactor> >
     const Index actual_index = internal::traits<XprType>::RowsAtCompileTime==1
                                   ? (ColFactor==1 ?  index : index%m_cols.value())
                                   : (RowFactor==1 ?  index : index%m_rows.value());
     return m_argImpl.coeff(actual_index);
   }
@@ -1247,7 +1351,7 @@ struct unary_evaluator<Replicate<ArgType, RowFactor, ColFactor> >
     return m_argImpl.template packet<LoadMode,PacketType>(actual_row, actual_col);
   }
   template<int LoadMode, typename PacketType>
   EIGEN_STRONG_INLINE
   PacketType packet(Index index) const
@@ -1258,7 +1362,7 @@ struct unary_evaluator<Replicate<ArgType, RowFactor, ColFactor> >
     return m_argImpl.template packet<LoadMode,PacketType>(actual_index);
   }
 protected:
   const ArgTypeNested m_arg;
   evaluator<ArgTypeNestedCleaned> m_argImpl;
@@ -1266,64 +1370,6 @@ protected:
   const variable_if_dynamic<Index, ArgType::ColsAtCompileTime> m_cols;
 };
-// -------------------- PartialReduxExpr --------------------
-template< typename ArgType, typename MemberOp, int Direction>
-struct evaluator<PartialReduxExpr<ArgType, MemberOp, Direction> >
-  : evaluator_base<PartialReduxExpr<ArgType, MemberOp, Direction> >
-{
-  typedef PartialReduxExpr<ArgType, MemberOp, Direction> XprType;
-  typedef typename internal::nested_eval<ArgType,1>::type ArgTypeNested;
-  typedef typename internal::remove_all<ArgTypeNested>::type ArgTypeNestedCleaned;
-  typedef typename ArgType::Scalar InputScalar;
-  typedef typename XprType::Scalar Scalar;
-  enum {
-    TraversalSize = Direction==int(Vertical) ? int(ArgType::RowsAtCompileTime) :  int(ArgType::ColsAtCompileTime)
-  };
-  typedef typename MemberOp::template Cost<InputScalar,int(TraversalSize)> CostOpType;
-  enum {
-    CoeffReadCost = TraversalSize==Dynamic ? HugeCost
-                  : TraversalSize * evaluator<ArgType>::CoeffReadCost + int(CostOpType::value),
-    Flags = (traits<XprType>::Flags&RowMajorBit) | (evaluator<ArgType>::Flags&(HereditaryBits&(~RowMajorBit))) | LinearAccessBit,
-    Alignment = 0 // FIXME this will need to be improved once PartialReduxExpr is vectorized
-  };
-  EIGEN_DEVICE_FUNC explicit evaluator(const XprType xpr)
-    : m_arg(xpr.nestedExpression()), m_functor(xpr.functor())
-  {
-    EIGEN_INTERNAL_CHECK_COST_VALUE(TraversalSize==Dynamic ? HugeCost : int(CostOpType::value));
-    EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
-  }
-  typedef typename XprType::CoeffReturnType CoeffReturnType;
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-  const Scalar coeff(Index i, Index j) const
-  {
-    if (Direction==Vertical)
-      return m_functor(m_arg.col(j));
-    else
-      return m_functor(m_arg.row(i));
-  }
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-  const Scalar coeff(Index index) const
-  {
-    if (Direction==Vertical)
-      return m_functor(m_arg.col(index));
-    else
-      return m_functor(m_arg.row(index));
-  }
-protected:
-  typename internal::add_const_on_value_type<ArgTypeNested>::type m_arg;
-  const MemberOp m_functor;
-};
 // -------------------- MatrixWrapper and ArrayWrapper --------------------
 //
 // evaluator_wrapper_base<T> is a common base class for the
@@ -1340,7 +1386,8 @@ struct evaluator_wrapper_base
     Alignment = evaluator<ArgType>::Alignment
   };
-  EIGEN_DEVICE_FUNC explicit evaluator_wrapper_base(const ArgType& arg) : m_argImpl(arg) {}
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+  explicit evaluator_wrapper_base(const ArgType& arg) : m_argImpl(arg) {}
   typedef typename ArgType::Scalar Scalar;
   typedef typename ArgType::CoeffReturnType CoeffReturnType;
@@ -1407,7 +1454,8 @@ struct unary_evaluator<MatrixWrapper<TArgType> >
 {
   typedef MatrixWrapper<TArgType> XprType;
-  EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& wrapper)
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+  explicit unary_evaluator(const XprType& wrapper)
     : evaluator_wrapper_base<MatrixWrapper<TArgType> >(wrapper.nestedExpression())
   { }
 };
@@ -1418,7 +1466,8 @@ struct unary_evaluator<ArrayWrapper<TArgType> >
 {
   typedef ArrayWrapper<TArgType> XprType;
-  EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& wrapper)
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+  explicit unary_evaluator(const XprType& wrapper)
     : evaluator_wrapper_base<ArrayWrapper<TArgType> >(wrapper.nestedExpression())
   { }
 };
@@ -1445,9 +1494,9 @@ struct unary_evaluator<Reverse<ArgType, Direction> >
     ReversePacket = (Direction == BothDirections)
                     || ((Direction == Vertical)   && IsColMajor)
                     || ((Direction == Horizontal) && IsRowMajor),
     CoeffReadCost = evaluator<ArgType>::CoeffReadCost,
     // let's enable LinearAccess only with vectorization because of the product overhead
     // FIXME enable DirectAccess with negative strides?
     Flags0 = evaluator<ArgType>::Flags,
@@ -1456,16 +1505,17 @@ struct unary_evaluator<Reverse<ArgType, Direction> >
                  ? LinearAccessBit : 0,
     Flags = int(Flags0) & (HereditaryBits | PacketAccessBit | LinearAccess),
     Alignment = 0 // FIXME in some rare cases, Alignment could be preserved, like a Vector4f.
   };
-  EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& reverse)
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+  explicit unary_evaluator(const XprType& reverse)
     : m_argImpl(reverse.nestedExpression()),
       m_rows(ReverseRow ? reverse.nestedExpression().rows() : 1),
       m_cols(ReverseCol ? reverse.nestedExpression().cols() : 1)
   { }
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
   CoeffReturnType coeff(Index row, Index col) const
   {
@@ -1540,7 +1590,7 @@ struct unary_evaluator<Reverse<ArgType, Direction> >
     m_argImpl.template writePacket<LoadMode>
       (m_rows.value() * m_cols.value() - index - PacketSize, preverse(x));
   }
 protected:
   evaluator<ArgType> m_argImpl;
@@ -1558,20 +1608,21 @@ struct evaluator<Diagonal<ArgType, DiagIndex> >
   : evaluator_base<Diagonal<ArgType, DiagIndex> >
 {
   typedef Diagonal<ArgType, DiagIndex> XprType;
   enum {
     CoeffReadCost = evaluator<ArgType>::CoeffReadCost,
     Flags = (unsigned int)(evaluator<ArgType>::Flags & (HereditaryBits | DirectAccessBit) & ~RowMajorBit) | LinearAccessBit,
     Alignment = 0
   };
-  EIGEN_DEVICE_FUNC explicit evaluator(const XprType& diagonal)
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+  explicit evaluator(const XprType& diagonal)
     : m_argImpl(diagonal.nestedExpression()),
       m_index(diagonal.index())
   { }
   typedef typename XprType::Scalar Scalar;
   typedef typename XprType::CoeffReturnType CoeffReturnType;
@@ -1604,8 +1655,10 @@ protected:
   const internal::variable_if_dynamicindex<Index, XprType::DiagIndex> m_index;
 private:
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index rowOffset() const { return m_index.value() > 0 ? 0 : -m_index.value(); }
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index colOffset() const { return m_index.value() > 0 ? m_index.value() : 0; }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+  Index rowOffset() const { return m_index.value() > 0 ? 0 : -m_index.value(); }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+  Index colOffset() const { return m_index.value() > 0 ? m_index.value() : 0; }
 };
@@ -1629,25 +1682,25 @@ class EvalToTemp
   : public dense_xpr_base<EvalToTemp<ArgType> >::type
 {
  public:
   typedef typename dense_xpr_base<EvalToTemp>::type Base;
   EIGEN_GENERIC_PUBLIC_INTERFACE(EvalToTemp)
   explicit EvalToTemp(const ArgType& arg)
     : m_arg(arg)
   { }
   const ArgType& arg() const
   {
     return m_arg;
   }
-  Index rows() const
+  EIGEN_CONSTEXPR Index rows() const EIGEN_NOEXCEPT
   {
     return m_arg.rows();
   }
-  Index cols() const
+  EIGEN_CONSTEXPR Index cols() const EIGEN_NOEXCEPT
   {
     return m_arg.cols();
   }
@@ -1655,7 +1708,7 @@ class EvalToTemp
  private:
   const ArgType& m_arg;
 };
 template<typename ArgType>
 struct evaluator<EvalToTemp<ArgType> >
   : public evaluator<typename ArgType::PlainObject>
@@ -1663,7 +1716,7 @@ struct evaluator<EvalToTemp<ArgType> >
   typedef EvalToTemp<ArgType>                   XprType;
   typedef typename ArgType::PlainObject         PlainObject;
   typedef evaluator<PlainObject> Base;
   EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr)
     : m_result(xpr.arg())
   {