@smake/eigen 1.0.2 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/eigen/Eigen/AccelerateSupport +52 -0
- package/eigen/Eigen/Cholesky +18 -21
- package/eigen/Eigen/CholmodSupport +28 -28
- package/eigen/Eigen/Core +235 -326
- package/eigen/Eigen/Eigenvalues +16 -14
- package/eigen/Eigen/Geometry +21 -24
- package/eigen/Eigen/Householder +9 -8
- package/eigen/Eigen/IterativeLinearSolvers +8 -4
- package/eigen/Eigen/Jacobi +14 -14
- package/eigen/Eigen/KLUSupport +43 -0
- package/eigen/Eigen/LU +16 -20
- package/eigen/Eigen/MetisSupport +12 -12
- package/eigen/Eigen/OrderingMethods +54 -54
- package/eigen/Eigen/PaStiXSupport +23 -20
- package/eigen/Eigen/PardisoSupport +17 -14
- package/eigen/Eigen/QR +18 -21
- package/eigen/Eigen/QtAlignedMalloc +5 -13
- package/eigen/Eigen/SPQRSupport +21 -14
- package/eigen/Eigen/SVD +23 -18
- package/eigen/Eigen/Sparse +1 -4
- package/eigen/Eigen/SparseCholesky +18 -23
- package/eigen/Eigen/SparseCore +18 -17
- package/eigen/Eigen/SparseLU +12 -8
- package/eigen/Eigen/SparseQR +16 -14
- package/eigen/Eigen/StdDeque +5 -2
- package/eigen/Eigen/StdList +5 -2
- package/eigen/Eigen/StdVector +5 -2
- package/eigen/Eigen/SuperLUSupport +30 -24
- package/eigen/Eigen/ThreadPool +80 -0
- package/eigen/Eigen/UmfPackSupport +19 -17
- package/eigen/Eigen/Version +14 -0
- package/eigen/Eigen/src/AccelerateSupport/AccelerateSupport.h +423 -0
- package/eigen/Eigen/src/AccelerateSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Cholesky/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Cholesky/LDLT.h +377 -401
- package/eigen/Eigen/src/Cholesky/LLT.h +332 -360
- package/eigen/Eigen/src/Cholesky/LLT_LAPACKE.h +81 -56
- package/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +620 -521
- package/eigen/Eigen/src/CholmodSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Core/ArithmeticSequence.h +239 -0
- package/eigen/Eigen/src/Core/Array.h +341 -294
- package/eigen/Eigen/src/Core/ArrayBase.h +190 -203
- package/eigen/Eigen/src/Core/ArrayWrapper.h +127 -171
- package/eigen/Eigen/src/Core/Assign.h +30 -40
- package/eigen/Eigen/src/Core/AssignEvaluator.h +711 -589
- package/eigen/Eigen/src/Core/Assign_MKL.h +130 -125
- package/eigen/Eigen/src/Core/BandMatrix.h +268 -283
- package/eigen/Eigen/src/Core/Block.h +375 -398
- package/eigen/Eigen/src/Core/CommaInitializer.h +86 -97
- package/eigen/Eigen/src/Core/ConditionEstimator.h +51 -53
- package/eigen/Eigen/src/Core/CoreEvaluators.h +1356 -1026
- package/eigen/Eigen/src/Core/CoreIterators.h +73 -59
- package/eigen/Eigen/src/Core/CwiseBinaryOp.h +114 -132
- package/eigen/Eigen/src/Core/CwiseNullaryOp.h +726 -617
- package/eigen/Eigen/src/Core/CwiseTernaryOp.h +77 -103
- package/eigen/Eigen/src/Core/CwiseUnaryOp.h +56 -68
- package/eigen/Eigen/src/Core/CwiseUnaryView.h +132 -95
- package/eigen/Eigen/src/Core/DenseBase.h +632 -571
- package/eigen/Eigen/src/Core/DenseCoeffsBase.h +511 -624
- package/eigen/Eigen/src/Core/DenseStorage.h +512 -509
- package/eigen/Eigen/src/Core/DeviceWrapper.h +153 -0
- package/eigen/Eigen/src/Core/Diagonal.h +169 -210
- package/eigen/Eigen/src/Core/DiagonalMatrix.h +351 -274
- package/eigen/Eigen/src/Core/DiagonalProduct.h +12 -10
- package/eigen/Eigen/src/Core/Dot.h +172 -222
- package/eigen/Eigen/src/Core/EigenBase.h +75 -85
- package/eigen/Eigen/src/Core/Fill.h +138 -0
- package/eigen/Eigen/src/Core/FindCoeff.h +464 -0
- package/eigen/Eigen/src/Core/ForceAlignedAccess.h +90 -109
- package/eigen/Eigen/src/Core/Fuzzy.h +82 -105
- package/eigen/Eigen/src/Core/GeneralProduct.h +327 -263
- package/eigen/Eigen/src/Core/GenericPacketMath.h +1472 -360
- package/eigen/Eigen/src/Core/GlobalFunctions.h +194 -151
- package/eigen/Eigen/src/Core/IO.h +147 -139
- package/eigen/Eigen/src/Core/IndexedView.h +321 -0
- package/eigen/Eigen/src/Core/InnerProduct.h +260 -0
- package/eigen/Eigen/src/Core/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Core/Inverse.h +56 -66
- package/eigen/Eigen/src/Core/Map.h +124 -142
- package/eigen/Eigen/src/Core/MapBase.h +256 -281
- package/eigen/Eigen/src/Core/MathFunctions.h +1620 -938
- package/eigen/Eigen/src/Core/MathFunctionsImpl.h +233 -71
- package/eigen/Eigen/src/Core/Matrix.h +491 -416
- package/eigen/Eigen/src/Core/MatrixBase.h +468 -453
- package/eigen/Eigen/src/Core/NestByValue.h +66 -85
- package/eigen/Eigen/src/Core/NoAlias.h +79 -85
- package/eigen/Eigen/src/Core/NumTraits.h +235 -148
- package/eigen/Eigen/src/Core/PartialReduxEvaluator.h +253 -0
- package/eigen/Eigen/src/Core/PermutationMatrix.h +461 -511
- package/eigen/Eigen/src/Core/PlainObjectBase.h +871 -894
- package/eigen/Eigen/src/Core/Product.h +260 -139
- package/eigen/Eigen/src/Core/ProductEvaluators.h +863 -714
- package/eigen/Eigen/src/Core/Random.h +161 -136
- package/eigen/Eigen/src/Core/RandomImpl.h +262 -0
- package/eigen/Eigen/src/Core/RealView.h +250 -0
- package/eigen/Eigen/src/Core/Redux.h +366 -336
- package/eigen/Eigen/src/Core/Ref.h +308 -209
- package/eigen/Eigen/src/Core/Replicate.h +94 -106
- package/eigen/Eigen/src/Core/Reshaped.h +398 -0
- package/eigen/Eigen/src/Core/ReturnByValue.h +49 -55
- package/eigen/Eigen/src/Core/Reverse.h +136 -145
- package/eigen/Eigen/src/Core/Select.h +70 -140
- package/eigen/Eigen/src/Core/SelfAdjointView.h +262 -285
- package/eigen/Eigen/src/Core/SelfCwiseBinaryOp.h +23 -20
- package/eigen/Eigen/src/Core/SkewSymmetricMatrix3.h +382 -0
- package/eigen/Eigen/src/Core/Solve.h +97 -111
- package/eigen/Eigen/src/Core/SolveTriangular.h +131 -129
- package/eigen/Eigen/src/Core/SolverBase.h +138 -101
- package/eigen/Eigen/src/Core/StableNorm.h +156 -160
- package/eigen/Eigen/src/Core/StlIterators.h +619 -0
- package/eigen/Eigen/src/Core/Stride.h +91 -88
- package/eigen/Eigen/src/Core/Swap.h +70 -38
- package/eigen/Eigen/src/Core/Transpose.h +295 -273
- package/eigen/Eigen/src/Core/Transpositions.h +272 -317
- package/eigen/Eigen/src/Core/TriangularMatrix.h +670 -755
- package/eigen/Eigen/src/Core/VectorBlock.h +59 -72
- package/eigen/Eigen/src/Core/VectorwiseOp.h +668 -630
- package/eigen/Eigen/src/Core/Visitor.h +480 -216
- package/eigen/Eigen/src/Core/arch/AVX/Complex.h +407 -293
- package/eigen/Eigen/src/Core/arch/AVX/MathFunctions.h +79 -388
- package/eigen/Eigen/src/Core/arch/AVX/PacketMath.h +2935 -491
- package/eigen/Eigen/src/Core/arch/AVX/Reductions.h +353 -0
- package/eigen/Eigen/src/Core/arch/AVX/TypeCasting.h +279 -22
- package/eigen/Eigen/src/Core/arch/AVX512/Complex.h +472 -0
- package/eigen/Eigen/src/Core/arch/AVX512/GemmKernel.h +1245 -0
- package/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h +85 -333
- package/eigen/Eigen/src/Core/arch/AVX512/MathFunctionsFP16.h +75 -0
- package/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h +2490 -649
- package/eigen/Eigen/src/Core/arch/AVX512/PacketMathFP16.h +1413 -0
- package/eigen/Eigen/src/Core/arch/AVX512/Reductions.h +297 -0
- package/eigen/Eigen/src/Core/arch/AVX512/TrsmKernel.h +1167 -0
- package/eigen/Eigen/src/Core/arch/AVX512/TrsmUnrolls.inc +1219 -0
- package/eigen/Eigen/src/Core/arch/AVX512/TypeCasting.h +277 -0
- package/eigen/Eigen/src/Core/arch/AVX512/TypeCastingFP16.h +130 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/Complex.h +521 -298
- package/eigen/Eigen/src/Core/arch/AltiVec/MathFunctions.h +39 -280
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProduct.h +3686 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h +205 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h +901 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMAbfloat16.h +742 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixVectorProduct.inc +2818 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h +3391 -723
- package/eigen/Eigen/src/Core/arch/AltiVec/TypeCasting.h +153 -0
- package/eigen/Eigen/src/Core/arch/Default/BFloat16.h +866 -0
- package/eigen/Eigen/src/Core/arch/Default/ConjHelper.h +113 -14
- package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +2634 -0
- package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +227 -0
- package/eigen/Eigen/src/Core/arch/Default/Half.h +1091 -0
- package/eigen/Eigen/src/Core/arch/Default/Settings.h +11 -13
- package/eigen/Eigen/src/Core/arch/GPU/Complex.h +244 -0
- package/eigen/Eigen/src/Core/arch/GPU/MathFunctions.h +104 -0
- package/eigen/Eigen/src/Core/arch/GPU/PacketMath.h +1712 -0
- package/eigen/Eigen/src/Core/arch/GPU/Tuple.h +268 -0
- package/eigen/Eigen/src/Core/arch/GPU/TypeCasting.h +77 -0
- package/eigen/Eigen/src/Core/arch/HIP/hcc/math_constants.h +23 -0
- package/eigen/Eigen/src/Core/arch/HVX/PacketMath.h +1088 -0
- package/eigen/Eigen/src/Core/arch/LSX/Complex.h +520 -0
- package/eigen/Eigen/src/Core/arch/LSX/GeneralBlockPanelKernel.h +23 -0
- package/eigen/Eigen/src/Core/arch/LSX/MathFunctions.h +43 -0
- package/eigen/Eigen/src/Core/arch/LSX/PacketMath.h +2866 -0
- package/eigen/Eigen/src/Core/arch/LSX/TypeCasting.h +526 -0
- package/eigen/Eigen/src/Core/arch/MSA/Complex.h +620 -0
- package/eigen/Eigen/src/Core/arch/MSA/MathFunctions.h +379 -0
- package/eigen/Eigen/src/Core/arch/MSA/PacketMath.h +1237 -0
- package/eigen/Eigen/src/Core/arch/NEON/Complex.h +531 -289
- package/eigen/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h +243 -0
- package/eigen/Eigen/src/Core/arch/NEON/MathFunctions.h +50 -73
- package/eigen/Eigen/src/Core/arch/NEON/PacketMath.h +5915 -579
- package/eigen/Eigen/src/Core/arch/NEON/TypeCasting.h +1642 -0
- package/eigen/Eigen/src/Core/arch/NEON/UnaryFunctors.h +57 -0
- package/eigen/Eigen/src/Core/arch/SSE/Complex.h +366 -334
- package/eigen/Eigen/src/Core/arch/SSE/MathFunctions.h +40 -514
- package/eigen/Eigen/src/Core/arch/SSE/PacketMath.h +2164 -675
- package/eigen/Eigen/src/Core/arch/SSE/Reductions.h +324 -0
- package/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h +188 -35
- package/eigen/Eigen/src/Core/arch/SVE/MathFunctions.h +48 -0
- package/eigen/Eigen/src/Core/arch/SVE/PacketMath.h +674 -0
- package/eigen/Eigen/src/Core/arch/SVE/TypeCasting.h +52 -0
- package/eigen/Eigen/src/Core/arch/SYCL/InteropHeaders.h +227 -0
- package/eigen/Eigen/src/Core/arch/SYCL/MathFunctions.h +303 -0
- package/eigen/Eigen/src/Core/arch/SYCL/PacketMath.h +576 -0
- package/eigen/Eigen/src/Core/arch/SYCL/TypeCasting.h +83 -0
- package/eigen/Eigen/src/Core/arch/ZVector/Complex.h +434 -261
- package/eigen/Eigen/src/Core/arch/ZVector/MathFunctions.h +160 -53
- package/eigen/Eigen/src/Core/arch/ZVector/PacketMath.h +1073 -605
- package/eigen/Eigen/src/Core/functors/AssignmentFunctors.h +123 -117
- package/eigen/Eigen/src/Core/functors/BinaryFunctors.h +594 -322
- package/eigen/Eigen/src/Core/functors/NullaryFunctors.h +204 -118
- package/eigen/Eigen/src/Core/functors/StlFunctors.h +110 -97
- package/eigen/Eigen/src/Core/functors/TernaryFunctors.h +34 -7
- package/eigen/Eigen/src/Core/functors/UnaryFunctors.h +1158 -530
- package/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h +2329 -1333
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h +328 -364
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +191 -178
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +85 -82
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h +154 -73
- package/eigen/Eigen/src/Core/products/GeneralMatrixVector.h +396 -542
- package/eigen/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h +80 -77
- package/eigen/Eigen/src/Core/products/Parallelizer.h +208 -92
- package/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +331 -375
- package/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h +206 -224
- package/eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h +139 -146
- package/eigen/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h +58 -61
- package/eigen/Eigen/src/Core/products/SelfadjointProduct.h +71 -71
- package/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h +48 -46
- package/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h +294 -369
- package/eigen/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h +246 -238
- package/eigen/Eigen/src/Core/products/TriangularMatrixVector.h +244 -247
- package/eigen/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h +212 -192
- package/eigen/Eigen/src/Core/products/TriangularSolverMatrix.h +328 -275
- package/eigen/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h +108 -109
- package/eigen/Eigen/src/Core/products/TriangularSolverVector.h +70 -93
- package/eigen/Eigen/src/Core/util/Assert.h +158 -0
- package/eigen/Eigen/src/Core/util/BlasUtil.h +413 -290
- package/eigen/Eigen/src/Core/util/ConfigureVectorization.h +543 -0
- package/eigen/Eigen/src/Core/util/Constants.h +314 -263
- package/eigen/Eigen/src/Core/util/DisableStupidWarnings.h +130 -78
- package/eigen/Eigen/src/Core/util/EmulateArray.h +270 -0
- package/eigen/Eigen/src/Core/util/ForwardDeclarations.h +450 -224
- package/eigen/Eigen/src/Core/util/GpuHipCudaDefines.inc +101 -0
- package/eigen/Eigen/src/Core/util/GpuHipCudaUndefines.inc +45 -0
- package/eigen/Eigen/src/Core/util/IndexedViewHelper.h +487 -0
- package/eigen/Eigen/src/Core/util/IntegralConstant.h +279 -0
- package/eigen/Eigen/src/Core/util/MKL_support.h +39 -30
- package/eigen/Eigen/src/Core/util/Macros.h +939 -646
- package/eigen/Eigen/src/Core/util/MaxSizeVector.h +139 -0
- package/eigen/Eigen/src/Core/util/Memory.h +1042 -650
- package/eigen/Eigen/src/Core/util/Meta.h +618 -426
- package/eigen/Eigen/src/Core/util/MoreMeta.h +638 -0
- package/eigen/Eigen/src/Core/util/ReenableStupidWarnings.h +32 -19
- package/eigen/Eigen/src/Core/util/ReshapedHelper.h +51 -0
- package/eigen/Eigen/src/Core/util/Serializer.h +209 -0
- package/eigen/Eigen/src/Core/util/StaticAssert.h +51 -164
- package/eigen/Eigen/src/Core/util/SymbolicIndex.h +445 -0
- package/eigen/Eigen/src/Core/util/XprHelper.h +793 -538
- package/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +246 -277
- package/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +299 -319
- package/eigen/Eigen/src/Eigenvalues/ComplexSchur_LAPACKE.h +52 -48
- package/eigen/Eigen/src/Eigenvalues/EigenSolver.h +413 -456
- package/eigen/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h +309 -325
- package/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +157 -171
- package/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +292 -310
- package/eigen/Eigen/src/Eigenvalues/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +91 -107
- package/eigen/Eigen/src/Eigenvalues/RealQZ.h +539 -606
- package/eigen/Eigen/src/Eigenvalues/RealSchur.h +348 -382
- package/eigen/Eigen/src/Eigenvalues/RealSchur_LAPACKE.h +41 -35
- package/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +579 -600
- package/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h +47 -44
- package/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +434 -461
- package/eigen/Eigen/src/Geometry/AlignedBox.h +307 -214
- package/eigen/Eigen/src/Geometry/AngleAxis.h +135 -137
- package/eigen/Eigen/src/Geometry/EulerAngles.h +163 -74
- package/eigen/Eigen/src/Geometry/Homogeneous.h +289 -333
- package/eigen/Eigen/src/Geometry/Hyperplane.h +152 -161
- package/eigen/Eigen/src/Geometry/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Geometry/OrthoMethods.h +168 -145
- package/eigen/Eigen/src/Geometry/ParametrizedLine.h +141 -104
- package/eigen/Eigen/src/Geometry/Quaternion.h +595 -497
- package/eigen/Eigen/src/Geometry/Rotation2D.h +110 -108
- package/eigen/Eigen/src/Geometry/RotationBase.h +148 -145
- package/eigen/Eigen/src/Geometry/Scaling.h +115 -90
- package/eigen/Eigen/src/Geometry/Transform.h +896 -953
- package/eigen/Eigen/src/Geometry/Translation.h +100 -98
- package/eigen/Eigen/src/Geometry/Umeyama.h +79 -84
- package/eigen/Eigen/src/Geometry/arch/Geometry_SIMD.h +154 -0
- package/eigen/Eigen/src/Householder/BlockHouseholder.h +54 -42
- package/eigen/Eigen/src/Householder/Householder.h +104 -122
- package/eigen/Eigen/src/Householder/HouseholderSequence.h +416 -382
- package/eigen/Eigen/src/Householder/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +153 -166
- package/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +127 -138
- package/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +95 -124
- package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +269 -267
- package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +246 -259
- package/eigen/Eigen/src/IterativeLinearSolvers/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +218 -217
- package/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +80 -103
- package/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +59 -63
- package/eigen/Eigen/src/Jacobi/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Jacobi/Jacobi.h +256 -291
- package/eigen/Eigen/src/KLUSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/KLUSupport/KLUSupport.h +339 -0
- package/eigen/Eigen/src/LU/Determinant.h +60 -63
- package/eigen/Eigen/src/LU/FullPivLU.h +561 -626
- package/eigen/Eigen/src/LU/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/LU/InverseImpl.h +213 -275
- package/eigen/Eigen/src/LU/PartialPivLU.h +407 -435
- package/eigen/Eigen/src/LU/PartialPivLU_LAPACKE.h +54 -40
- package/eigen/Eigen/src/LU/arch/InverseSize4.h +353 -0
- package/eigen/Eigen/src/MetisSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/MetisSupport/MetisSupport.h +81 -93
- package/eigen/Eigen/src/OrderingMethods/Amd.h +250 -282
- package/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +950 -1103
- package/eigen/Eigen/src/OrderingMethods/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/OrderingMethods/Ordering.h +111 -122
- package/eigen/Eigen/src/PaStiXSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +524 -570
- package/eigen/Eigen/src/PardisoSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +385 -429
- package/eigen/Eigen/src/QR/ColPivHouseholderQR.h +494 -473
- package/eigen/Eigen/src/QR/ColPivHouseholderQR_LAPACKE.h +120 -56
- package/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +223 -137
- package/eigen/Eigen/src/QR/FullPivHouseholderQR.h +517 -460
- package/eigen/Eigen/src/QR/HouseholderQR.h +412 -278
- package/eigen/Eigen/src/QR/HouseholderQR_LAPACKE.h +32 -23
- package/eigen/Eigen/src/QR/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SPQRSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +263 -261
- package/eigen/Eigen/src/SVD/BDCSVD.h +872 -679
- package/eigen/Eigen/src/SVD/BDCSVD_LAPACKE.h +174 -0
- package/eigen/Eigen/src/SVD/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SVD/JacobiSVD.h +585 -543
- package/eigen/Eigen/src/SVD/JacobiSVD_LAPACKE.h +85 -49
- package/eigen/Eigen/src/SVD/SVDBase.h +281 -160
- package/eigen/Eigen/src/SVD/UpperBidiagonalization.h +202 -237
- package/eigen/Eigen/src/SparseCholesky/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +769 -590
- package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +318 -129
- package/eigen/Eigen/src/SparseCore/AmbiVector.h +202 -251
- package/eigen/Eigen/src/SparseCore/CompressedStorage.h +184 -236
- package/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +140 -184
- package/eigen/Eigen/src/SparseCore/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SparseCore/SparseAssign.h +174 -111
- package/eigen/Eigen/src/SparseCore/SparseBlock.h +408 -477
- package/eigen/Eigen/src/SparseCore/SparseColEtree.h +100 -112
- package/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +531 -280
- package/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +559 -347
- package/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +100 -108
- package/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +185 -191
- package/eigen/Eigen/src/SparseCore/SparseDiagonalProduct.h +71 -71
- package/eigen/Eigen/src/SparseCore/SparseDot.h +49 -47
- package/eigen/Eigen/src/SparseCore/SparseFuzzy.h +13 -11
- package/eigen/Eigen/src/SparseCore/SparseMap.h +243 -253
- package/eigen/Eigen/src/SparseCore/SparseMatrix.h +1614 -1142
- package/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +403 -357
- package/eigen/Eigen/src/SparseCore/SparsePermutation.h +186 -115
- package/eigen/Eigen/src/SparseCore/SparseProduct.h +100 -91
- package/eigen/Eigen/src/SparseCore/SparseRedux.h +22 -24
- package/eigen/Eigen/src/SparseCore/SparseRef.h +268 -295
- package/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +371 -414
- package/eigen/Eigen/src/SparseCore/SparseSolverBase.h +78 -87
- package/eigen/Eigen/src/SparseCore/SparseSparseProductWithPruning.h +81 -95
- package/eigen/Eigen/src/SparseCore/SparseTranspose.h +62 -71
- package/eigen/Eigen/src/SparseCore/SparseTriangularView.h +132 -144
- package/eigen/Eigen/src/SparseCore/SparseUtil.h +146 -115
- package/eigen/Eigen/src/SparseCore/SparseVector.h +426 -372
- package/eigen/Eigen/src/SparseCore/SparseView.h +164 -193
- package/eigen/Eigen/src/SparseCore/TriangularSolver.h +129 -170
- package/eigen/Eigen/src/SparseLU/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SparseLU/SparseLU.h +814 -618
- package/eigen/Eigen/src/SparseLU/SparseLUImpl.h +61 -48
- package/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +102 -118
- package/eigen/Eigen/src/SparseLU/SparseLU_Structs.h +38 -35
- package/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +273 -255
- package/eigen/Eigen/src/SparseLU/SparseLU_Utils.h +44 -49
- package/eigen/Eigen/src/SparseLU/SparseLU_column_bmod.h +104 -108
- package/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +90 -101
- package/eigen/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h +57 -58
- package/eigen/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h +43 -55
- package/eigen/Eigen/src/SparseLU/SparseLU_kernel_bmod.h +74 -71
- package/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +125 -133
- package/eigen/Eigen/src/SparseLU/SparseLU_panel_dfs.h +136 -159
- package/eigen/Eigen/src/SparseLU/SparseLU_pivotL.h +51 -52
- package/eigen/Eigen/src/SparseLU/SparseLU_pruneL.h +67 -73
- package/eigen/Eigen/src/SparseLU/SparseLU_relax_snode.h +24 -26
- package/eigen/Eigen/src/SparseQR/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SparseQR/SparseQR.h +451 -490
- package/eigen/Eigen/src/StlSupport/StdDeque.h +28 -105
- package/eigen/Eigen/src/StlSupport/StdList.h +28 -84
- package/eigen/Eigen/src/StlSupport/StdVector.h +28 -108
- package/eigen/Eigen/src/StlSupport/details.h +48 -50
- package/eigen/Eigen/src/SuperLUSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +634 -732
- package/eigen/Eigen/src/ThreadPool/Barrier.h +70 -0
- package/eigen/Eigen/src/ThreadPool/CoreThreadPoolDevice.h +336 -0
- package/eigen/Eigen/src/ThreadPool/EventCount.h +241 -0
- package/eigen/Eigen/src/ThreadPool/ForkJoin.h +140 -0
- package/eigen/Eigen/src/ThreadPool/InternalHeaderCheck.h +4 -0
- package/eigen/Eigen/src/ThreadPool/NonBlockingThreadPool.h +587 -0
- package/eigen/Eigen/src/ThreadPool/RunQueue.h +230 -0
- package/eigen/Eigen/src/ThreadPool/ThreadCancel.h +21 -0
- package/eigen/Eigen/src/ThreadPool/ThreadEnvironment.h +43 -0
- package/eigen/Eigen/src/ThreadPool/ThreadLocal.h +289 -0
- package/eigen/Eigen/src/ThreadPool/ThreadPoolInterface.h +50 -0
- package/eigen/Eigen/src/ThreadPool/ThreadYield.h +16 -0
- package/eigen/Eigen/src/UmfPackSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +480 -380
- package/eigen/Eigen/src/misc/Image.h +41 -43
- package/eigen/Eigen/src/misc/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/misc/Kernel.h +39 -41
- package/eigen/Eigen/src/misc/RealSvd2x2.h +19 -21
- package/eigen/Eigen/src/misc/blas.h +83 -426
- package/eigen/Eigen/src/misc/lapacke.h +9976 -16182
- package/eigen/Eigen/src/misc/lapacke_helpers.h +163 -0
- package/eigen/Eigen/src/misc/lapacke_mangling.h +4 -5
- package/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.inc +344 -0
- package/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.inc +544 -0
- package/eigen/Eigen/src/plugins/BlockMethods.inc +1370 -0
- package/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.inc +116 -0
- package/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.inc +167 -0
- package/eigen/Eigen/src/plugins/IndexedViewMethods.inc +192 -0
- package/eigen/Eigen/src/plugins/InternalHeaderCheck.inc +3 -0
- package/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.inc +331 -0
- package/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.inc +118 -0
- package/eigen/Eigen/src/plugins/ReshapedMethods.inc +133 -0
- package/lib/LibEigen.d.ts +4 -0
- package/lib/LibEigen.js +14 -0
- package/lib/index.d.ts +1 -1
- package/lib/index.js +7 -3
- package/package.json +2 -10
- package/eigen/Eigen/CMakeLists.txt +0 -19
- package/eigen/Eigen/src/Core/BooleanRedux.h +0 -164
- package/eigen/Eigen/src/Core/arch/CUDA/Complex.h +0 -103
- package/eigen/Eigen/src/Core/arch/CUDA/Half.h +0 -675
- package/eigen/Eigen/src/Core/arch/CUDA/MathFunctions.h +0 -91
- package/eigen/Eigen/src/Core/arch/CUDA/PacketMath.h +0 -333
- package/eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +0 -1124
- package/eigen/Eigen/src/Core/arch/CUDA/TypeCasting.h +0 -212
- package/eigen/Eigen/src/Core/util/NonMPL2.h +0 -3
- package/eigen/Eigen/src/Geometry/arch/Geometry_SSE.h +0 -161
- package/eigen/Eigen/src/LU/arch/Inverse_SSE.h +0 -338
- package/eigen/Eigen/src/SparseCore/MappedSparseMatrix.h +0 -67
- package/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +0 -280
- package/eigen/Eigen/src/misc/lapack.h +0 -152
- package/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +0 -332
- package/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +0 -552
- package/eigen/Eigen/src/plugins/BlockMethods.h +0 -1058
- package/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.h +0 -115
- package/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.h +0 -163
- package/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +0 -152
- package/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +0 -85
- package/lib/eigen.d.ts +0 -2
- package/lib/eigen.js +0 -15
|
@@ -12,155 +12,149 @@
|
|
|
12
12
|
#ifndef EIGEN_ASSIGN_EVALUATOR_H
|
|
13
13
|
#define EIGEN_ASSIGN_EVALUATOR_H
|
|
14
14
|
|
|
15
|
+
// IWYU pragma: private
|
|
16
|
+
#include "./InternalHeaderCheck.h"
|
|
17
|
+
|
|
15
18
|
namespace Eigen {
|
|
16
19
|
|
|
17
20
|
// This implementation is based on Assign.h
|
|
18
21
|
|
|
19
22
|
namespace internal {
|
|
20
|
-
|
|
23
|
+
|
|
21
24
|
/***************************************************************************
|
|
22
|
-
* Part 1 : the logic deciding a strategy for traversal and unrolling *
|
|
23
|
-
***************************************************************************/
|
|
25
|
+
* Part 1 : the logic deciding a strategy for traversal and unrolling *
|
|
26
|
+
***************************************************************************/
|
|
24
27
|
|
|
25
28
|
// copy_using_evaluator_traits is based on assign_traits
|
|
26
29
|
|
|
27
|
-
template <typename DstEvaluator, typename SrcEvaluator, typename AssignFunc>
|
|
28
|
-
struct copy_using_evaluator_traits
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
30
|
+
template <typename DstEvaluator, typename SrcEvaluator, typename AssignFunc, int MaxPacketSize = Dynamic>
|
|
31
|
+
struct copy_using_evaluator_traits {
|
|
32
|
+
using Src = typename SrcEvaluator::XprType;
|
|
33
|
+
using Dst = typename DstEvaluator::XprType;
|
|
34
|
+
using DstScalar = typename Dst::Scalar;
|
|
35
|
+
|
|
36
|
+
static constexpr int DstFlags = DstEvaluator::Flags;
|
|
37
|
+
static constexpr int SrcFlags = SrcEvaluator::Flags;
|
|
38
|
+
|
|
39
|
+
public:
|
|
40
|
+
static constexpr int DstAlignment = DstEvaluator::Alignment;
|
|
41
|
+
static constexpr int SrcAlignment = SrcEvaluator::Alignment;
|
|
42
|
+
static constexpr int JointAlignment = plain_enum_min(DstAlignment, SrcAlignment);
|
|
43
|
+
static constexpr bool DstHasDirectAccess = bool(DstFlags & DirectAccessBit);
|
|
44
|
+
static constexpr bool SrcIsRowMajor = bool(SrcFlags & RowMajorBit);
|
|
45
|
+
static constexpr bool DstIsRowMajor = bool(DstFlags & RowMajorBit);
|
|
46
|
+
static constexpr bool IsVectorAtCompileTime = Dst::IsVectorAtCompileTime;
|
|
47
|
+
static constexpr int RowsAtCompileTime = size_prefer_fixed(Src::RowsAtCompileTime, Dst::RowsAtCompileTime);
|
|
48
|
+
static constexpr int ColsAtCompileTime = size_prefer_fixed(Src::ColsAtCompileTime, Dst::ColsAtCompileTime);
|
|
49
|
+
static constexpr int SizeAtCompileTime = size_at_compile_time(RowsAtCompileTime, ColsAtCompileTime);
|
|
50
|
+
static constexpr int MaxRowsAtCompileTime =
|
|
51
|
+
min_size_prefer_fixed(Src::MaxRowsAtCompileTime, Dst::MaxRowsAtCompileTime);
|
|
52
|
+
static constexpr int MaxColsAtCompileTime =
|
|
53
|
+
min_size_prefer_fixed(Src::MaxColsAtCompileTime, Dst::MaxColsAtCompileTime);
|
|
54
|
+
static constexpr int MaxSizeAtCompileTime =
|
|
55
|
+
min_size_prefer_fixed(Src::MaxSizeAtCompileTime, Dst::MaxSizeAtCompileTime);
|
|
56
|
+
static constexpr int InnerSizeAtCompileTime = IsVectorAtCompileTime ? SizeAtCompileTime
|
|
57
|
+
: DstIsRowMajor ? ColsAtCompileTime
|
|
58
|
+
: RowsAtCompileTime;
|
|
59
|
+
static constexpr int MaxInnerSizeAtCompileTime = IsVectorAtCompileTime ? MaxSizeAtCompileTime
|
|
60
|
+
: DstIsRowMajor ? MaxColsAtCompileTime
|
|
61
|
+
: MaxRowsAtCompileTime;
|
|
62
|
+
static constexpr int RestrictedInnerSize = min_size_prefer_fixed(MaxInnerSizeAtCompileTime, MaxPacketSize);
|
|
63
|
+
static constexpr int RestrictedLinearSize = min_size_prefer_fixed(MaxSizeAtCompileTime, MaxPacketSize);
|
|
64
|
+
static constexpr int OuterStride = outer_stride_at_compile_time<Dst>::ret;
|
|
57
65
|
|
|
58
66
|
// TODO distinguish between linear traversal and inner-traversals
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
Unrolling = (int(Traversal) == int(InnerVectorizedTraversal) || int(Traversal) == int(DefaultTraversal))
|
|
128
|
-
? (
|
|
129
|
-
int(MayUnrollCompletely) ? int(CompleteUnrolling)
|
|
130
|
-
: int(MayUnrollInner) ? int(InnerUnrolling)
|
|
131
|
-
: int(NoUnrolling)
|
|
132
|
-
)
|
|
133
|
-
: int(Traversal) == int(LinearVectorizedTraversal)
|
|
134
|
-
? ( bool(MayUnrollCompletely) && ( EIGEN_UNALIGNED_VECTORIZE || (int(DstAlignment)>=int(LinearRequiredAlignment)))
|
|
135
|
-
? int(CompleteUnrolling)
|
|
136
|
-
: int(NoUnrolling) )
|
|
137
|
-
: int(Traversal) == int(LinearTraversal)
|
|
138
|
-
? ( bool(MayUnrollCompletely) ? int(CompleteUnrolling)
|
|
139
|
-
: int(NoUnrolling) )
|
|
67
|
+
using LinearPacketType = typename find_best_packet<DstScalar, RestrictedLinearSize>::type;
|
|
68
|
+
using InnerPacketType = typename find_best_packet<DstScalar, RestrictedInnerSize>::type;
|
|
69
|
+
|
|
70
|
+
static constexpr int LinearPacketSize = unpacket_traits<LinearPacketType>::size;
|
|
71
|
+
static constexpr int InnerPacketSize = unpacket_traits<InnerPacketType>::size;
|
|
72
|
+
|
|
73
|
+
public:
|
|
74
|
+
static constexpr int LinearRequiredAlignment = unpacket_traits<LinearPacketType>::alignment;
|
|
75
|
+
static constexpr int InnerRequiredAlignment = unpacket_traits<InnerPacketType>::alignment;
|
|
76
|
+
|
|
77
|
+
private:
|
|
78
|
+
static constexpr bool StorageOrdersAgree = DstIsRowMajor == SrcIsRowMajor;
|
|
79
|
+
static constexpr bool MightVectorize = StorageOrdersAgree && bool(DstFlags & SrcFlags & ActualPacketAccessBit) &&
|
|
80
|
+
bool(functor_traits<AssignFunc>::PacketAccess);
|
|
81
|
+
static constexpr bool MayInnerVectorize = MightVectorize && (InnerSizeAtCompileTime != Dynamic) &&
|
|
82
|
+
(InnerSizeAtCompileTime % InnerPacketSize == 0) &&
|
|
83
|
+
(OuterStride != Dynamic) && (OuterStride % InnerPacketSize == 0) &&
|
|
84
|
+
(EIGEN_UNALIGNED_VECTORIZE || JointAlignment >= InnerRequiredAlignment);
|
|
85
|
+
static constexpr bool MayLinearize = StorageOrdersAgree && (DstFlags & SrcFlags & LinearAccessBit);
|
|
86
|
+
static constexpr bool MayLinearVectorize =
|
|
87
|
+
MightVectorize && MayLinearize && DstHasDirectAccess &&
|
|
88
|
+
(EIGEN_UNALIGNED_VECTORIZE || (DstAlignment >= LinearRequiredAlignment) || MaxSizeAtCompileTime == Dynamic) &&
|
|
89
|
+
(MaxSizeAtCompileTime == Dynamic || MaxSizeAtCompileTime >= LinearPacketSize);
|
|
90
|
+
/* If the destination isn't aligned, we have to do runtime checks and we don't unroll,
|
|
91
|
+
so it's only good for large enough sizes. */
|
|
92
|
+
static constexpr int InnerSizeThreshold = (EIGEN_UNALIGNED_VECTORIZE ? 1 : 3) * InnerPacketSize;
|
|
93
|
+
static constexpr bool MaySliceVectorize =
|
|
94
|
+
MightVectorize && DstHasDirectAccess &&
|
|
95
|
+
(MaxInnerSizeAtCompileTime == Dynamic || MaxInnerSizeAtCompileTime >= InnerSizeThreshold);
|
|
96
|
+
/* slice vectorization can be slow, so we only want it if the slices are big, which is
|
|
97
|
+
indicated by InnerMaxSize rather than InnerSize, think of the case of a dynamic block
|
|
98
|
+
in a fixed-size matrix
|
|
99
|
+
However, with EIGEN_UNALIGNED_VECTORIZE and unrolling, slice vectorization is still worth it */
|
|
100
|
+
|
|
101
|
+
public:
|
|
102
|
+
static constexpr int Traversal = SizeAtCompileTime == 0 ? AllAtOnceTraversal
|
|
103
|
+
: (MayLinearVectorize && (LinearPacketSize > InnerPacketSize))
|
|
104
|
+
? LinearVectorizedTraversal
|
|
105
|
+
: MayInnerVectorize ? InnerVectorizedTraversal
|
|
106
|
+
: MayLinearVectorize ? LinearVectorizedTraversal
|
|
107
|
+
: MaySliceVectorize ? SliceVectorizedTraversal
|
|
108
|
+
: MayLinearize ? LinearTraversal
|
|
109
|
+
: DefaultTraversal;
|
|
110
|
+
static constexpr bool Vectorized = Traversal == InnerVectorizedTraversal || Traversal == LinearVectorizedTraversal ||
|
|
111
|
+
Traversal == SliceVectorizedTraversal;
|
|
112
|
+
|
|
113
|
+
using PacketType = std::conditional_t<Traversal == LinearVectorizedTraversal, LinearPacketType, InnerPacketType>;
|
|
114
|
+
|
|
115
|
+
private:
|
|
116
|
+
static constexpr int ActualPacketSize = Vectorized ? unpacket_traits<PacketType>::size : 1;
|
|
117
|
+
static constexpr int UnrollingLimit = EIGEN_UNROLLING_LIMIT * ActualPacketSize;
|
|
118
|
+
static constexpr int CoeffReadCost = int(DstEvaluator::CoeffReadCost) + int(SrcEvaluator::CoeffReadCost);
|
|
119
|
+
static constexpr bool MayUnrollCompletely =
|
|
120
|
+
(SizeAtCompileTime != Dynamic) && (SizeAtCompileTime * CoeffReadCost <= UnrollingLimit);
|
|
121
|
+
static constexpr bool MayUnrollInner =
|
|
122
|
+
(InnerSizeAtCompileTime != Dynamic) && (InnerSizeAtCompileTime * CoeffReadCost <= UnrollingLimit);
|
|
123
|
+
|
|
124
|
+
public:
|
|
125
|
+
static constexpr int Unrolling =
|
|
126
|
+
(Traversal == InnerVectorizedTraversal || Traversal == DefaultTraversal)
|
|
127
|
+
? (MayUnrollCompletely ? CompleteUnrolling
|
|
128
|
+
: MayUnrollInner ? InnerUnrolling
|
|
129
|
+
: NoUnrolling)
|
|
130
|
+
: Traversal == LinearVectorizedTraversal
|
|
131
|
+
? (MayUnrollCompletely && (EIGEN_UNALIGNED_VECTORIZE || (DstAlignment >= LinearRequiredAlignment))
|
|
132
|
+
? CompleteUnrolling
|
|
133
|
+
: NoUnrolling)
|
|
134
|
+
: Traversal == LinearTraversal ? (MayUnrollCompletely ? CompleteUnrolling : NoUnrolling)
|
|
140
135
|
#if EIGEN_UNALIGNED_VECTORIZE
|
|
141
|
-
|
|
142
|
-
? ( bool(MayUnrollInner) ? int(InnerUnrolling)
|
|
143
|
-
: int(NoUnrolling) )
|
|
136
|
+
: Traversal == SliceVectorizedTraversal ? (MayUnrollInner ? InnerUnrolling : NoUnrolling)
|
|
144
137
|
#endif
|
|
145
|
-
|
|
146
|
-
|
|
138
|
+
: NoUnrolling;
|
|
139
|
+
static constexpr bool UsePacketSegment = has_packet_segment<PacketType>::value;
|
|
147
140
|
|
|
148
141
|
#ifdef EIGEN_DEBUG_ASSIGN
|
|
149
|
-
static void debug()
|
|
150
|
-
{
|
|
142
|
+
static void debug() {
|
|
151
143
|
std::cerr << "DstXpr: " << typeid(typename DstEvaluator::XprType).name() << std::endl;
|
|
152
144
|
std::cerr << "SrcXpr: " << typeid(typename SrcEvaluator::XprType).name() << std::endl;
|
|
153
145
|
std::cerr.setf(std::ios::hex, std::ios::basefield);
|
|
154
|
-
std::cerr << "DstFlags"
|
|
155
|
-
|
|
146
|
+
std::cerr << "DstFlags"
|
|
147
|
+
<< " = " << DstFlags << " (" << demangle_flags(DstFlags) << " )" << std::endl;
|
|
148
|
+
std::cerr << "SrcFlags"
|
|
149
|
+
<< " = " << SrcFlags << " (" << demangle_flags(SrcFlags) << " )" << std::endl;
|
|
156
150
|
std::cerr.unsetf(std::ios::hex);
|
|
157
151
|
EIGEN_DEBUG_VAR(DstAlignment)
|
|
158
152
|
EIGEN_DEBUG_VAR(SrcAlignment)
|
|
159
153
|
EIGEN_DEBUG_VAR(LinearRequiredAlignment)
|
|
160
154
|
EIGEN_DEBUG_VAR(InnerRequiredAlignment)
|
|
161
155
|
EIGEN_DEBUG_VAR(JointAlignment)
|
|
162
|
-
EIGEN_DEBUG_VAR(
|
|
163
|
-
EIGEN_DEBUG_VAR(
|
|
156
|
+
EIGEN_DEBUG_VAR(InnerSizeAtCompileTime)
|
|
157
|
+
EIGEN_DEBUG_VAR(MaxInnerSizeAtCompileTime)
|
|
164
158
|
EIGEN_DEBUG_VAR(LinearPacketSize)
|
|
165
159
|
EIGEN_DEBUG_VAR(InnerPacketSize)
|
|
166
160
|
EIGEN_DEBUG_VAR(ActualPacketSize)
|
|
@@ -170,185 +164,213 @@ public:
|
|
|
170
164
|
EIGEN_DEBUG_VAR(MayInnerVectorize)
|
|
171
165
|
EIGEN_DEBUG_VAR(MayLinearVectorize)
|
|
172
166
|
EIGEN_DEBUG_VAR(MaySliceVectorize)
|
|
173
|
-
std::cerr << "Traversal"
|
|
167
|
+
std::cerr << "Traversal"
|
|
168
|
+
<< " = " << Traversal << " (" << demangle_traversal(Traversal) << ")" << std::endl;
|
|
174
169
|
EIGEN_DEBUG_VAR(SrcEvaluator::CoeffReadCost)
|
|
170
|
+
EIGEN_DEBUG_VAR(DstEvaluator::CoeffReadCost)
|
|
171
|
+
EIGEN_DEBUG_VAR(Dst::SizeAtCompileTime)
|
|
175
172
|
EIGEN_DEBUG_VAR(UnrollingLimit)
|
|
176
173
|
EIGEN_DEBUG_VAR(MayUnrollCompletely)
|
|
177
174
|
EIGEN_DEBUG_VAR(MayUnrollInner)
|
|
178
|
-
std::cerr << "Unrolling"
|
|
175
|
+
std::cerr << "Unrolling"
|
|
176
|
+
<< " = " << Unrolling << " (" << demangle_unrolling(Unrolling) << ")" << std::endl;
|
|
179
177
|
std::cerr << std::endl;
|
|
180
178
|
}
|
|
181
179
|
#endif
|
|
182
180
|
};
|
|
183
181
|
|
|
184
182
|
/***************************************************************************
|
|
185
|
-
* Part 2 : meta-unrollers
|
|
186
|
-
***************************************************************************/
|
|
183
|
+
* Part 2 : meta-unrollers
|
|
184
|
+
***************************************************************************/
|
|
187
185
|
|
|
188
186
|
/************************
|
|
189
187
|
*** Default traversal ***
|
|
190
188
|
************************/
|
|
191
189
|
|
|
192
|
-
template<typename Kernel, int
|
|
193
|
-
struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
typedef typename Kernel::DstEvaluatorType DstEvaluatorType;
|
|
197
|
-
typedef typename DstEvaluatorType::XprType DstXprType;
|
|
198
|
-
|
|
199
|
-
enum {
|
|
200
|
-
outer = Index / DstXprType::InnerSizeAtCompileTime,
|
|
201
|
-
inner = Index % DstXprType::InnerSizeAtCompileTime
|
|
202
|
-
};
|
|
190
|
+
template <typename Kernel, int Index_, int Stop>
|
|
191
|
+
struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling {
|
|
192
|
+
static constexpr int Outer = Index_ / Kernel::AssignmentTraits::InnerSizeAtCompileTime;
|
|
193
|
+
static constexpr int Inner = Index_ % Kernel::AssignmentTraits::InnerSizeAtCompileTime;
|
|
203
194
|
|
|
204
|
-
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, Index+1, Stop>::run(kernel);
|
|
195
|
+
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel) {
|
|
196
|
+
kernel.assignCoeffByOuterInner(Outer, Inner);
|
|
197
|
+
copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, Index_ + 1, Stop>::run(kernel);
|
|
208
198
|
}
|
|
209
199
|
};
|
|
210
200
|
|
|
211
|
-
template<typename Kernel, int Stop>
|
|
212
|
-
struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, Stop, Stop>
|
|
213
|
-
{
|
|
214
|
-
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { }
|
|
201
|
+
template <typename Kernel, int Stop>
|
|
202
|
+
struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, Stop, Stop> {
|
|
203
|
+
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel&) {}
|
|
215
204
|
};
|
|
216
205
|
|
|
217
|
-
template<typename Kernel, int Index_, int Stop>
|
|
218
|
-
struct copy_using_evaluator_DefaultTraversal_InnerUnrolling
|
|
219
|
-
{
|
|
220
|
-
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel, Index outer)
|
|
221
|
-
{
|
|
206
|
+
template <typename Kernel, int Index_, int Stop>
|
|
207
|
+
struct copy_using_evaluator_DefaultTraversal_InnerUnrolling {
|
|
208
|
+
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel, Index outer) {
|
|
222
209
|
kernel.assignCoeffByOuterInner(outer, Index_);
|
|
223
|
-
copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, Index_+1, Stop>::run(kernel, outer);
|
|
210
|
+
copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, Index_ + 1, Stop>::run(kernel, outer);
|
|
224
211
|
}
|
|
225
212
|
};
|
|
226
213
|
|
|
227
|
-
template<typename Kernel, int Stop>
|
|
228
|
-
struct copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, Stop, Stop>
|
|
229
|
-
{
|
|
230
|
-
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&, Index) { }
|
|
214
|
+
template <typename Kernel, int Stop>
|
|
215
|
+
struct copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, Stop, Stop> {
|
|
216
|
+
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel&, Index) {}
|
|
231
217
|
};
|
|
232
218
|
|
|
233
219
|
/***********************
|
|
234
220
|
*** Linear traversal ***
|
|
235
221
|
***********************/
|
|
236
222
|
|
|
237
|
-
template<typename Kernel, int
|
|
238
|
-
struct copy_using_evaluator_LinearTraversal_CompleteUnrolling
|
|
239
|
-
{
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
kernel.assignCoeff(Index);
|
|
243
|
-
copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, Index+1, Stop>::run(kernel);
|
|
223
|
+
template <typename Kernel, int Index_, int Stop>
|
|
224
|
+
struct copy_using_evaluator_LinearTraversal_CompleteUnrolling {
|
|
225
|
+
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel) {
|
|
226
|
+
kernel.assignCoeff(Index_);
|
|
227
|
+
copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, Index_ + 1, Stop>::run(kernel);
|
|
244
228
|
}
|
|
245
229
|
};
|
|
246
230
|
|
|
247
|
-
template<typename Kernel, int Stop>
|
|
248
|
-
struct copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, Stop, Stop>
|
|
249
|
-
{
|
|
250
|
-
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { }
|
|
231
|
+
template <typename Kernel, int Stop>
|
|
232
|
+
struct copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, Stop, Stop> {
|
|
233
|
+
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel&) {}
|
|
251
234
|
};
|
|
252
235
|
|
|
253
236
|
/**************************
|
|
254
237
|
*** Inner vectorization ***
|
|
255
238
|
**************************/
|
|
256
239
|
|
|
257
|
-
template<typename Kernel, int
|
|
258
|
-
struct copy_using_evaluator_innervec_CompleteUnrolling
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
SrcAlignment = Kernel::AssignmentTraits::SrcAlignment,
|
|
269
|
-
DstAlignment = Kernel::AssignmentTraits::DstAlignment
|
|
270
|
-
};
|
|
271
|
-
|
|
272
|
-
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
|
|
273
|
-
{
|
|
274
|
-
kernel.template assignPacketByOuterInner<DstAlignment, SrcAlignment, PacketType>(outer, inner);
|
|
275
|
-
enum { NextIndex = Index + unpacket_traits<PacketType>::size };
|
|
240
|
+
template <typename Kernel, int Index_, int Stop>
|
|
241
|
+
struct copy_using_evaluator_innervec_CompleteUnrolling {
|
|
242
|
+
using PacketType = typename Kernel::PacketType;
|
|
243
|
+
static constexpr int Outer = Index_ / Kernel::AssignmentTraits::InnerSizeAtCompileTime;
|
|
244
|
+
static constexpr int Inner = Index_ % Kernel::AssignmentTraits::InnerSizeAtCompileTime;
|
|
245
|
+
static constexpr int NextIndex = Index_ + unpacket_traits<PacketType>::size;
|
|
246
|
+
static constexpr int SrcAlignment = Kernel::AssignmentTraits::SrcAlignment;
|
|
247
|
+
static constexpr int DstAlignment = Kernel::AssignmentTraits::DstAlignment;
|
|
248
|
+
|
|
249
|
+
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel) {
|
|
250
|
+
kernel.template assignPacketByOuterInner<DstAlignment, SrcAlignment, PacketType>(Outer, Inner);
|
|
276
251
|
copy_using_evaluator_innervec_CompleteUnrolling<Kernel, NextIndex, Stop>::run(kernel);
|
|
277
252
|
}
|
|
278
253
|
};
|
|
279
254
|
|
|
280
|
-
template<typename Kernel, int Stop>
|
|
281
|
-
struct copy_using_evaluator_innervec_CompleteUnrolling<Kernel, Stop, Stop>
|
|
282
|
-
{
|
|
283
|
-
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { }
|
|
255
|
+
template <typename Kernel, int Stop>
|
|
256
|
+
struct copy_using_evaluator_innervec_CompleteUnrolling<Kernel, Stop, Stop> {
|
|
257
|
+
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel&) {}
|
|
284
258
|
};
|
|
285
259
|
|
|
286
|
-
template<typename Kernel, int Index_, int Stop, int SrcAlignment, int DstAlignment>
|
|
287
|
-
struct copy_using_evaluator_innervec_InnerUnrolling
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
{
|
|
260
|
+
template <typename Kernel, int Index_, int Stop, int SrcAlignment, int DstAlignment>
|
|
261
|
+
struct copy_using_evaluator_innervec_InnerUnrolling {
|
|
262
|
+
using PacketType = typename Kernel::PacketType;
|
|
263
|
+
static constexpr int NextIndex = Index_ + unpacket_traits<PacketType>::size;
|
|
264
|
+
|
|
265
|
+
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel, Index outer) {
|
|
292
266
|
kernel.template assignPacketByOuterInner<DstAlignment, SrcAlignment, PacketType>(outer, Index_);
|
|
293
|
-
|
|
294
|
-
|
|
267
|
+
copy_using_evaluator_innervec_InnerUnrolling<Kernel, NextIndex, Stop, SrcAlignment, DstAlignment>::run(kernel,
|
|
268
|
+
outer);
|
|
269
|
+
}
|
|
270
|
+
};
|
|
271
|
+
|
|
272
|
+
template <typename Kernel, int Stop, int SrcAlignment, int DstAlignment>
|
|
273
|
+
struct copy_using_evaluator_innervec_InnerUnrolling<Kernel, Stop, Stop, SrcAlignment, DstAlignment> {
|
|
274
|
+
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel&, Index) {}
|
|
275
|
+
};
|
|
276
|
+
|
|
277
|
+
template <typename Kernel, int Start, int Stop, int SrcAlignment, int DstAlignment, bool UsePacketSegment>
|
|
278
|
+
struct copy_using_evaluator_innervec_segment {
|
|
279
|
+
using PacketType = typename Kernel::PacketType;
|
|
280
|
+
|
|
281
|
+
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel, Index outer) {
|
|
282
|
+
kernel.template assignPacketSegmentByOuterInner<DstAlignment, SrcAlignment, PacketType>(outer, Start, 0,
|
|
283
|
+
Stop - Start);
|
|
295
284
|
}
|
|
296
285
|
};
|
|
297
286
|
|
|
298
|
-
template<typename Kernel, int Stop, int SrcAlignment, int DstAlignment>
|
|
299
|
-
struct
|
|
300
|
-
|
|
301
|
-
|
|
287
|
+
template <typename Kernel, int Start, int Stop, int SrcAlignment, int DstAlignment>
|
|
288
|
+
struct copy_using_evaluator_innervec_segment<Kernel, Start, Stop, SrcAlignment, DstAlignment,
|
|
289
|
+
/*UsePacketSegment*/ false>
|
|
290
|
+
: copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, Start, Stop> {};
|
|
291
|
+
|
|
292
|
+
template <typename Kernel, int Stop, int SrcAlignment, int DstAlignment>
|
|
293
|
+
struct copy_using_evaluator_innervec_segment<Kernel, Stop, Stop, SrcAlignment, DstAlignment,
|
|
294
|
+
/*UsePacketSegment*/ true> {
|
|
295
|
+
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel&, Index) {}
|
|
296
|
+
};
|
|
297
|
+
|
|
298
|
+
template <typename Kernel, int Stop, int SrcAlignment, int DstAlignment>
|
|
299
|
+
struct copy_using_evaluator_innervec_segment<Kernel, Stop, Stop, SrcAlignment, DstAlignment,
|
|
300
|
+
/*UsePacketSegment*/ false> {
|
|
301
|
+
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel&, Index) {}
|
|
302
302
|
};
|
|
303
303
|
|
|
304
304
|
/***************************************************************************
|
|
305
|
-
* Part 3 : implementation of all cases
|
|
306
|
-
***************************************************************************/
|
|
305
|
+
* Part 3 : implementation of all cases
|
|
306
|
+
***************************************************************************/
|
|
307
307
|
|
|
308
308
|
// dense_assignment_loop is based on assign_impl
|
|
309
309
|
|
|
310
|
-
template<typename Kernel,
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
310
|
+
template <typename Kernel, int Traversal = Kernel::AssignmentTraits::Traversal,
|
|
311
|
+
int Unrolling = Kernel::AssignmentTraits::Unrolling>
|
|
312
|
+
struct dense_assignment_loop_impl;
|
|
313
|
+
|
|
314
|
+
template <typename Kernel, int Traversal = Kernel::AssignmentTraits::Traversal,
|
|
315
|
+
int Unrolling = Kernel::AssignmentTraits::Unrolling>
|
|
316
|
+
struct dense_assignment_loop {
|
|
317
|
+
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel) {
|
|
318
|
+
#ifdef __cpp_lib_is_constant_evaluated
|
|
319
|
+
if (internal::is_constant_evaluated())
|
|
320
|
+
dense_assignment_loop_impl<Kernel, Traversal == AllAtOnceTraversal ? AllAtOnceTraversal : DefaultTraversal,
|
|
321
|
+
NoUnrolling>::run(kernel);
|
|
322
|
+
else
|
|
323
|
+
#endif
|
|
324
|
+
dense_assignment_loop_impl<Kernel, Traversal, Unrolling>::run(kernel);
|
|
325
|
+
}
|
|
326
|
+
};
|
|
327
|
+
|
|
328
|
+
/************************
|
|
329
|
+
***** Special Cases *****
|
|
330
|
+
************************/
|
|
331
|
+
|
|
332
|
+
// Zero-sized assignment is a no-op.
|
|
333
|
+
template <typename Kernel, int Unrolling>
|
|
334
|
+
struct dense_assignment_loop_impl<Kernel, AllAtOnceTraversal, Unrolling> {
|
|
335
|
+
static constexpr int SizeAtCompileTime = Kernel::AssignmentTraits::SizeAtCompileTime;
|
|
336
|
+
|
|
337
|
+
EIGEN_DEVICE_FUNC static void EIGEN_STRONG_INLINE constexpr run(Kernel& /*kernel*/) {
|
|
338
|
+
EIGEN_STATIC_ASSERT(SizeAtCompileTime == 0, EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT)
|
|
339
|
+
}
|
|
340
|
+
};
|
|
314
341
|
|
|
315
342
|
/************************
|
|
316
343
|
*** Default traversal ***
|
|
317
344
|
************************/
|
|
318
345
|
|
|
319
|
-
template<typename Kernel>
|
|
320
|
-
struct
|
|
321
|
-
{
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
for(Index outer = 0; outer < kernel.outerSize(); ++outer) {
|
|
325
|
-
for(Index inner = 0; inner < kernel.innerSize(); ++inner) {
|
|
346
|
+
template <typename Kernel>
|
|
347
|
+
struct dense_assignment_loop_impl<Kernel, DefaultTraversal, NoUnrolling> {
|
|
348
|
+
EIGEN_DEVICE_FUNC static void EIGEN_STRONG_INLINE constexpr run(Kernel& kernel) {
|
|
349
|
+
for (Index outer = 0; outer < kernel.outerSize(); ++outer) {
|
|
350
|
+
for (Index inner = 0; inner < kernel.innerSize(); ++inner) {
|
|
326
351
|
kernel.assignCoeffByOuterInner(outer, inner);
|
|
327
352
|
}
|
|
328
353
|
}
|
|
329
354
|
}
|
|
330
355
|
};
|
|
331
356
|
|
|
332
|
-
template<typename Kernel>
|
|
333
|
-
struct
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
{
|
|
337
|
-
|
|
338
|
-
copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, 0, DstXprType::SizeAtCompileTime>::run(kernel);
|
|
357
|
+
template <typename Kernel>
|
|
358
|
+
struct dense_assignment_loop_impl<Kernel, DefaultTraversal, CompleteUnrolling> {
|
|
359
|
+
static constexpr int SizeAtCompileTime = Kernel::AssignmentTraits::SizeAtCompileTime;
|
|
360
|
+
|
|
361
|
+
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel) {
|
|
362
|
+
copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, 0, SizeAtCompileTime>::run(kernel);
|
|
339
363
|
}
|
|
340
364
|
};
|
|
341
365
|
|
|
342
|
-
template<typename Kernel>
|
|
343
|
-
struct
|
|
344
|
-
|
|
345
|
-
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
|
|
346
|
-
{
|
|
347
|
-
typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
|
|
366
|
+
template <typename Kernel>
|
|
367
|
+
struct dense_assignment_loop_impl<Kernel, DefaultTraversal, InnerUnrolling> {
|
|
368
|
+
static constexpr int InnerSizeAtCompileTime = Kernel::AssignmentTraits::InnerSizeAtCompileTime;
|
|
348
369
|
|
|
370
|
+
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel) {
|
|
349
371
|
const Index outerSize = kernel.outerSize();
|
|
350
|
-
for(Index outer = 0; outer < outerSize; ++outer)
|
|
351
|
-
copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, 0,
|
|
372
|
+
for (Index outer = 0; outer < outerSize; ++outer)
|
|
373
|
+
copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, 0, InnerSizeAtCompileTime>::run(kernel, outer);
|
|
352
374
|
}
|
|
353
375
|
};
|
|
354
376
|
|
|
@@ -356,83 +378,137 @@ struct dense_assignment_loop<Kernel, DefaultTraversal, InnerUnrolling>
|
|
|
356
378
|
*** Linear vectorization ***
|
|
357
379
|
***************************/
|
|
358
380
|
|
|
359
|
-
|
|
360
381
|
// The goal of unaligned_dense_assignment_loop is simply to factorize the handling
|
|
361
382
|
// of the non vectorizable beginning and ending parts
|
|
362
383
|
|
|
363
|
-
template <bool
|
|
364
|
-
struct unaligned_dense_assignment_loop
|
|
365
|
-
|
|
366
|
-
|
|
384
|
+
template <typename PacketType, int DstAlignment, int SrcAlignment, bool UsePacketSegment, bool Skip>
|
|
385
|
+
struct unaligned_dense_assignment_loop {
|
|
386
|
+
// if Skip == true, then do nothing
|
|
387
|
+
template <typename Kernel>
|
|
388
|
+
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& /*kernel*/, Index /*start*/, Index /*end*/) {}
|
|
367
389
|
template <typename Kernel>
|
|
368
|
-
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel
|
|
390
|
+
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& /*kernel*/, Index /*outer*/,
|
|
391
|
+
Index /*innerStart*/, Index /*innerEnd*/) {}
|
|
369
392
|
};
|
|
370
393
|
|
|
371
|
-
template
|
|
372
|
-
struct unaligned_dense_assignment_loop<
|
|
373
|
-
{
|
|
374
|
-
// MSVC must not inline this functions. If it does, it fails to optimize the
|
|
375
|
-
// packet access path.
|
|
376
|
-
// FIXME check which version exhibits this issue
|
|
377
|
-
#if EIGEN_COMP_MSVC
|
|
394
|
+
template <typename PacketType, int DstAlignment, int SrcAlignment>
|
|
395
|
+
struct unaligned_dense_assignment_loop<PacketType, DstAlignment, SrcAlignment, /*UsePacketSegment*/ true,
|
|
396
|
+
/*Skip*/ false> {
|
|
378
397
|
template <typename Kernel>
|
|
379
|
-
static
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
398
|
+
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel, Index start, Index end) {
|
|
399
|
+
Index count = end - start;
|
|
400
|
+
eigen_assert(count <= unpacket_traits<PacketType>::size);
|
|
401
|
+
if (count > 0) kernel.template assignPacketSegment<DstAlignment, SrcAlignment, PacketType>(start, 0, count);
|
|
402
|
+
}
|
|
383
403
|
template <typename Kernel>
|
|
384
|
-
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
for (Index index = start; index < end; ++index)
|
|
390
|
-
kernel.assignCoeff(index);
|
|
404
|
+
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel, Index outer, Index start, Index end) {
|
|
405
|
+
Index count = end - start;
|
|
406
|
+
eigen_assert(count <= unpacket_traits<PacketType>::size);
|
|
407
|
+
if (count > 0)
|
|
408
|
+
kernel.template assignPacketSegmentByOuterInner<DstAlignment, SrcAlignment, PacketType>(outer, start, 0, count);
|
|
391
409
|
}
|
|
392
410
|
};
|
|
393
411
|
|
|
394
|
-
template<typename
|
|
395
|
-
struct
|
|
396
|
-
{
|
|
397
|
-
|
|
398
|
-
{
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
: int(Kernel::AssignmentTraits::DstAlignment),
|
|
408
|
-
srcAlignment = Kernel::AssignmentTraits::JointAlignment
|
|
409
|
-
};
|
|
410
|
-
const Index alignedStart = dstIsAligned ? 0 : internal::first_aligned<requestedAlignment>(kernel.dstDataPtr(), size);
|
|
411
|
-
const Index alignedEnd = alignedStart + ((size-alignedStart)/packetSize)*packetSize;
|
|
412
|
+
template <typename PacketType, int DstAlignment, int SrcAlignment>
|
|
413
|
+
struct unaligned_dense_assignment_loop<PacketType, DstAlignment, SrcAlignment, /*UsePacketSegment*/ false,
|
|
414
|
+
/*Skip*/ false> {
|
|
415
|
+
template <typename Kernel>
|
|
416
|
+
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel, Index start, Index end) {
|
|
417
|
+
for (Index index = start; index < end; ++index) kernel.assignCoeff(index);
|
|
418
|
+
}
|
|
419
|
+
template <typename Kernel>
|
|
420
|
+
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel, Index outer, Index innerStart,
|
|
421
|
+
Index innerEnd) {
|
|
422
|
+
for (Index inner = innerStart; inner < innerEnd; ++inner) kernel.assignCoeffByOuterInner(outer, inner);
|
|
423
|
+
}
|
|
424
|
+
};
|
|
412
425
|
|
|
413
|
-
|
|
426
|
+
template <typename Kernel, int Index_, int Stop>
|
|
427
|
+
struct copy_using_evaluator_linearvec_CompleteUnrolling {
|
|
428
|
+
using PacketType = typename Kernel::PacketType;
|
|
429
|
+
static constexpr int SrcAlignment = Kernel::AssignmentTraits::SrcAlignment;
|
|
430
|
+
static constexpr int DstAlignment = Kernel::AssignmentTraits::DstAlignment;
|
|
431
|
+
static constexpr int NextIndex = Index_ + unpacket_traits<PacketType>::size;
|
|
414
432
|
|
|
415
|
-
|
|
416
|
-
|
|
433
|
+
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel) {
|
|
434
|
+
kernel.template assignPacket<DstAlignment, SrcAlignment, PacketType>(Index_);
|
|
435
|
+
copy_using_evaluator_linearvec_CompleteUnrolling<Kernel, NextIndex, Stop>::run(kernel);
|
|
436
|
+
}
|
|
437
|
+
};
|
|
438
|
+
|
|
439
|
+
template <typename Kernel, int Stop>
|
|
440
|
+
struct copy_using_evaluator_linearvec_CompleteUnrolling<Kernel, Stop, Stop> {
|
|
441
|
+
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel&) {}
|
|
442
|
+
};
|
|
417
443
|
|
|
418
|
-
|
|
444
|
+
template <typename Kernel, int Index_, int Stop, bool UsePacketSegment>
|
|
445
|
+
struct copy_using_evaluator_linearvec_segment {
|
|
446
|
+
using PacketType = typename Kernel::PacketType;
|
|
447
|
+
static constexpr int SrcAlignment = Kernel::AssignmentTraits::SrcAlignment;
|
|
448
|
+
static constexpr int DstAlignment = Kernel::AssignmentTraits::DstAlignment;
|
|
449
|
+
|
|
450
|
+
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel) {
|
|
451
|
+
kernel.template assignPacketSegment<DstAlignment, SrcAlignment, PacketType>(Index_, 0, Stop - Index_);
|
|
419
452
|
}
|
|
420
453
|
};
|
|
421
454
|
|
|
422
|
-
template<typename Kernel>
|
|
423
|
-
struct
|
|
424
|
-
{
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
455
|
+
template <typename Kernel, int Index_, int Stop>
|
|
456
|
+
struct copy_using_evaluator_linearvec_segment<Kernel, Index_, Stop, /*UsePacketSegment*/ false>
|
|
457
|
+
: copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, Index_, Stop> {};
|
|
458
|
+
|
|
459
|
+
template <typename Kernel, int Stop>
|
|
460
|
+
struct copy_using_evaluator_linearvec_segment<Kernel, Stop, Stop, /*UsePacketSegment*/ true> {
|
|
461
|
+
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel&) {}
|
|
462
|
+
};
|
|
463
|
+
|
|
464
|
+
template <typename Kernel, int Stop>
|
|
465
|
+
struct copy_using_evaluator_linearvec_segment<Kernel, Stop, Stop, /*UsePacketSegment*/ false> {
|
|
466
|
+
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel&) {}
|
|
467
|
+
};
|
|
468
|
+
|
|
469
|
+
template <typename Kernel>
|
|
470
|
+
struct dense_assignment_loop_impl<Kernel, LinearVectorizedTraversal, NoUnrolling> {
|
|
471
|
+
using Scalar = typename Kernel::Scalar;
|
|
472
|
+
using PacketType = typename Kernel::PacketType;
|
|
473
|
+
static constexpr int PacketSize = unpacket_traits<PacketType>::size;
|
|
474
|
+
static constexpr int SrcAlignment = Kernel::AssignmentTraits::JointAlignment;
|
|
475
|
+
static constexpr int DstAlignment = plain_enum_max(Kernel::AssignmentTraits::DstAlignment, alignof(Scalar));
|
|
476
|
+
static constexpr int RequestedAlignment = unpacket_traits<PacketType>::alignment;
|
|
477
|
+
static constexpr bool Alignable =
|
|
478
|
+
(DstAlignment >= RequestedAlignment) || ((RequestedAlignment - DstAlignment) % sizeof(Scalar) == 0);
|
|
479
|
+
static constexpr int Alignment = Alignable ? RequestedAlignment : DstAlignment;
|
|
480
|
+
static constexpr bool DstIsAligned = DstAlignment >= Alignment;
|
|
481
|
+
static constexpr bool UsePacketSegment = Kernel::AssignmentTraits::UsePacketSegment;
|
|
482
|
+
|
|
483
|
+
using head_loop =
|
|
484
|
+
unaligned_dense_assignment_loop<PacketType, DstAlignment, SrcAlignment, UsePacketSegment, DstIsAligned>;
|
|
485
|
+
using tail_loop = unaligned_dense_assignment_loop<PacketType, Alignment, SrcAlignment, UsePacketSegment, false>;
|
|
486
|
+
|
|
487
|
+
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel) {
|
|
488
|
+
const Index size = kernel.size();
|
|
489
|
+
const Index alignedStart = DstIsAligned ? 0 : first_aligned<Alignment>(kernel.dstDataPtr(), size);
|
|
490
|
+
const Index alignedEnd = alignedStart + numext::round_down(size - alignedStart, PacketSize);
|
|
491
|
+
|
|
492
|
+
head_loop::run(kernel, 0, alignedStart);
|
|
493
|
+
|
|
494
|
+
for (Index index = alignedStart; index < alignedEnd; index += PacketSize)
|
|
495
|
+
kernel.template assignPacket<Alignment, SrcAlignment, PacketType>(index);
|
|
433
496
|
|
|
434
|
-
|
|
435
|
-
|
|
497
|
+
tail_loop::run(kernel, alignedEnd, size);
|
|
498
|
+
}
|
|
499
|
+
};
|
|
500
|
+
|
|
501
|
+
template <typename Kernel>
|
|
502
|
+
struct dense_assignment_loop_impl<Kernel, LinearVectorizedTraversal, CompleteUnrolling> {
|
|
503
|
+
using PacketType = typename Kernel::PacketType;
|
|
504
|
+
static constexpr int PacketSize = unpacket_traits<PacketType>::size;
|
|
505
|
+
static constexpr int Size = Kernel::AssignmentTraits::SizeAtCompileTime;
|
|
506
|
+
static constexpr int AlignedSize = numext::round_down(Size, PacketSize);
|
|
507
|
+
static constexpr bool UsePacketSegment = Kernel::AssignmentTraits::UsePacketSegment;
|
|
508
|
+
|
|
509
|
+
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel) {
|
|
510
|
+
copy_using_evaluator_linearvec_CompleteUnrolling<Kernel, 0, AlignedSize>::run(kernel);
|
|
511
|
+
copy_using_evaluator_linearvec_segment<Kernel, AlignedSize, Size, UsePacketSegment>::run(kernel);
|
|
436
512
|
}
|
|
437
513
|
};
|
|
438
514
|
|
|
@@ -440,46 +516,42 @@ struct dense_assignment_loop<Kernel, LinearVectorizedTraversal, CompleteUnrollin
|
|
|
440
516
|
*** Inner vectorization ***
|
|
441
517
|
**************************/
|
|
442
518
|
|
|
443
|
-
template<typename Kernel>
|
|
444
|
-
struct
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
|
|
452
|
-
{
|
|
519
|
+
template <typename Kernel>
|
|
520
|
+
struct dense_assignment_loop_impl<Kernel, InnerVectorizedTraversal, NoUnrolling> {
|
|
521
|
+
using PacketType = typename Kernel::PacketType;
|
|
522
|
+
static constexpr int PacketSize = unpacket_traits<PacketType>::size;
|
|
523
|
+
static constexpr int SrcAlignment = Kernel::AssignmentTraits::JointAlignment;
|
|
524
|
+
static constexpr int DstAlignment = Kernel::AssignmentTraits::DstAlignment;
|
|
525
|
+
|
|
526
|
+
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel) {
|
|
453
527
|
const Index innerSize = kernel.innerSize();
|
|
454
528
|
const Index outerSize = kernel.outerSize();
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
for(Index inner = 0; inner < innerSize; inner+=packetSize)
|
|
529
|
+
for (Index outer = 0; outer < outerSize; ++outer)
|
|
530
|
+
for (Index inner = 0; inner < innerSize; inner += PacketSize)
|
|
458
531
|
kernel.template assignPacketByOuterInner<DstAlignment, SrcAlignment, PacketType>(outer, inner);
|
|
459
532
|
}
|
|
460
533
|
};
|
|
461
534
|
|
|
462
|
-
template<typename Kernel>
|
|
463
|
-
struct
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
{
|
|
467
|
-
|
|
468
|
-
copy_using_evaluator_innervec_CompleteUnrolling<Kernel, 0, DstXprType::SizeAtCompileTime>::run(kernel);
|
|
535
|
+
template <typename Kernel>
|
|
536
|
+
struct dense_assignment_loop_impl<Kernel, InnerVectorizedTraversal, CompleteUnrolling> {
|
|
537
|
+
static constexpr int SizeAtCompileTime = Kernel::AssignmentTraits::SizeAtCompileTime;
|
|
538
|
+
|
|
539
|
+
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel) {
|
|
540
|
+
copy_using_evaluator_innervec_CompleteUnrolling<Kernel, 0, SizeAtCompileTime>::run(kernel);
|
|
469
541
|
}
|
|
470
542
|
};
|
|
471
543
|
|
|
472
|
-
template<typename Kernel>
|
|
473
|
-
struct
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
544
|
+
template <typename Kernel>
|
|
545
|
+
struct dense_assignment_loop_impl<Kernel, InnerVectorizedTraversal, InnerUnrolling> {
|
|
546
|
+
static constexpr int InnerSize = Kernel::AssignmentTraits::InnerSizeAtCompileTime;
|
|
547
|
+
static constexpr int SrcAlignment = Kernel::AssignmentTraits::SrcAlignment;
|
|
548
|
+
static constexpr int DstAlignment = Kernel::AssignmentTraits::DstAlignment;
|
|
549
|
+
|
|
550
|
+
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel) {
|
|
479
551
|
const Index outerSize = kernel.outerSize();
|
|
480
|
-
for(Index outer = 0; outer < outerSize; ++outer)
|
|
481
|
-
copy_using_evaluator_innervec_InnerUnrolling<Kernel, 0,
|
|
482
|
-
|
|
552
|
+
for (Index outer = 0; outer < outerSize; ++outer)
|
|
553
|
+
copy_using_evaluator_innervec_InnerUnrolling<Kernel, 0, InnerSize, SrcAlignment, DstAlignment>::run(kernel,
|
|
554
|
+
outer);
|
|
483
555
|
}
|
|
484
556
|
};
|
|
485
557
|
|
|
@@ -487,24 +559,19 @@ struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, InnerUnrolling>
|
|
|
487
559
|
*** Linear traversal ***
|
|
488
560
|
***********************/
|
|
489
561
|
|
|
490
|
-
template<typename Kernel>
|
|
491
|
-
struct
|
|
492
|
-
{
|
|
493
|
-
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
|
|
494
|
-
{
|
|
562
|
+
template <typename Kernel>
|
|
563
|
+
struct dense_assignment_loop_impl<Kernel, LinearTraversal, NoUnrolling> {
|
|
564
|
+
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel) {
|
|
495
565
|
const Index size = kernel.size();
|
|
496
|
-
for(Index i = 0; i < size; ++i)
|
|
497
|
-
kernel.assignCoeff(i);
|
|
566
|
+
for (Index i = 0; i < size; ++i) kernel.assignCoeff(i);
|
|
498
567
|
}
|
|
499
568
|
};
|
|
500
569
|
|
|
501
|
-
template<typename Kernel>
|
|
502
|
-
struct
|
|
503
|
-
{
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
|
|
507
|
-
copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, 0, DstXprType::SizeAtCompileTime>::run(kernel);
|
|
570
|
+
template <typename Kernel>
|
|
571
|
+
struct dense_assignment_loop_impl<Kernel, LinearTraversal, CompleteUnrolling> {
|
|
572
|
+
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel) {
|
|
573
|
+
copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, 0, Kernel::AssignmentTraits::SizeAtCompileTime>::run(
|
|
574
|
+
kernel);
|
|
508
575
|
}
|
|
509
576
|
};
|
|
510
577
|
|
|
@@ -512,79 +579,71 @@ struct dense_assignment_loop<Kernel, LinearTraversal, CompleteUnrolling>
|
|
|
512
579
|
*** Slice vectorization ***
|
|
513
580
|
***************************/
|
|
514
581
|
|
|
515
|
-
template<typename Kernel>
|
|
516
|
-
struct
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
return dense_assignment_loop<Kernel,DefaultTraversal,NoUnrolling>::run(kernel);
|
|
535
|
-
}
|
|
536
|
-
const Index packetAlignedMask = packetSize - 1;
|
|
582
|
+
template <typename Kernel>
|
|
583
|
+
struct dense_assignment_loop_impl<Kernel, SliceVectorizedTraversal, NoUnrolling> {
|
|
584
|
+
using Scalar = typename Kernel::Scalar;
|
|
585
|
+
using PacketType = typename Kernel::PacketType;
|
|
586
|
+
static constexpr int PacketSize = unpacket_traits<PacketType>::size;
|
|
587
|
+
static constexpr int SrcAlignment = Kernel::AssignmentTraits::JointAlignment;
|
|
588
|
+
static constexpr int DstAlignment = plain_enum_max(Kernel::AssignmentTraits::DstAlignment, alignof(Scalar));
|
|
589
|
+
static constexpr int RequestedAlignment = unpacket_traits<PacketType>::alignment;
|
|
590
|
+
static constexpr bool Alignable =
|
|
591
|
+
(DstAlignment >= RequestedAlignment) || ((RequestedAlignment - DstAlignment) % sizeof(Scalar) == 0);
|
|
592
|
+
static constexpr int Alignment = Alignable ? RequestedAlignment : DstAlignment;
|
|
593
|
+
static constexpr bool DstIsAligned = DstAlignment >= Alignment;
|
|
594
|
+
static constexpr bool UsePacketSegment = Kernel::AssignmentTraits::UsePacketSegment;
|
|
595
|
+
|
|
596
|
+
using head_loop = unaligned_dense_assignment_loop<PacketType, DstAlignment, Unaligned, UsePacketSegment, !Alignable>;
|
|
597
|
+
using tail_loop = unaligned_dense_assignment_loop<PacketType, Alignment, Unaligned, UsePacketSegment, false>;
|
|
598
|
+
|
|
599
|
+
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel) {
|
|
600
|
+
const Scalar* dst_ptr = kernel.dstDataPtr();
|
|
537
601
|
const Index innerSize = kernel.innerSize();
|
|
538
602
|
const Index outerSize = kernel.outerSize();
|
|
539
|
-
const Index alignedStep =
|
|
540
|
-
Index alignedStart = ((!
|
|
541
|
-
|
|
542
|
-
for(Index outer = 0; outer < outerSize; ++outer)
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
for(Index inner = 0; inner<alignedStart ; ++inner)
|
|
547
|
-
kernel.assignCoeffByOuterInner(outer, inner);
|
|
603
|
+
const Index alignedStep = Alignable ? (PacketSize - kernel.outerStride() % PacketSize) % PacketSize : 0;
|
|
604
|
+
Index alignedStart = ((!Alignable) || DstIsAligned) ? 0 : internal::first_aligned<Alignment>(dst_ptr, innerSize);
|
|
605
|
+
|
|
606
|
+
for (Index outer = 0; outer < outerSize; ++outer) {
|
|
607
|
+
const Index alignedEnd = alignedStart + numext::round_down(innerSize - alignedStart, PacketSize);
|
|
608
|
+
|
|
609
|
+
head_loop::run(kernel, outer, 0, alignedStart);
|
|
548
610
|
|
|
549
611
|
// do the vectorizable part of the assignment
|
|
550
|
-
for(Index inner = alignedStart; inner<alignedEnd; inner+=
|
|
551
|
-
kernel.template assignPacketByOuterInner<
|
|
612
|
+
for (Index inner = alignedStart; inner < alignedEnd; inner += PacketSize)
|
|
613
|
+
kernel.template assignPacketByOuterInner<Alignment, Unaligned, PacketType>(outer, inner);
|
|
552
614
|
|
|
553
|
-
|
|
554
|
-
for(Index inner = alignedEnd; inner<innerSize ; ++inner)
|
|
555
|
-
kernel.assignCoeffByOuterInner(outer, inner);
|
|
615
|
+
tail_loop::run(kernel, outer, alignedEnd, innerSize);
|
|
556
616
|
|
|
557
|
-
alignedStart = numext::mini((alignedStart+alignedStep)%
|
|
617
|
+
alignedStart = numext::mini((alignedStart + alignedStep) % PacketSize, innerSize);
|
|
558
618
|
}
|
|
559
619
|
}
|
|
560
620
|
};
|
|
561
621
|
|
|
562
622
|
#if EIGEN_UNALIGNED_VECTORIZE
|
|
563
|
-
template<typename Kernel>
|
|
564
|
-
struct
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
{
|
|
577
|
-
|
|
578
|
-
|
|
623
|
+
template <typename Kernel>
|
|
624
|
+
struct dense_assignment_loop_impl<Kernel, SliceVectorizedTraversal, InnerUnrolling> {
|
|
625
|
+
using PacketType = typename Kernel::PacketType;
|
|
626
|
+
static constexpr int PacketSize = unpacket_traits<PacketType>::size;
|
|
627
|
+
static constexpr int InnerSize = Kernel::AssignmentTraits::InnerSizeAtCompileTime;
|
|
628
|
+
static constexpr int VectorizableSize = numext::round_down(InnerSize, PacketSize);
|
|
629
|
+
static constexpr bool UsePacketSegment = Kernel::AssignmentTraits::UsePacketSegment;
|
|
630
|
+
|
|
631
|
+
using packet_loop = copy_using_evaluator_innervec_InnerUnrolling<Kernel, 0, VectorizableSize, Unaligned, Unaligned>;
|
|
632
|
+
using packet_segment_loop = copy_using_evaluator_innervec_segment<Kernel, VectorizableSize, InnerSize, Unaligned,
|
|
633
|
+
Unaligned, UsePacketSegment>;
|
|
634
|
+
|
|
635
|
+
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel) {
|
|
636
|
+
for (Index outer = 0; outer < kernel.outerSize(); ++outer) {
|
|
637
|
+
packet_loop::run(kernel, outer);
|
|
638
|
+
packet_segment_loop::run(kernel, outer);
|
|
579
639
|
}
|
|
580
640
|
}
|
|
581
641
|
};
|
|
582
642
|
#endif
|
|
583
643
|
|
|
584
|
-
|
|
585
644
|
/***************************************************************************
|
|
586
|
-
* Part 4 : Generic dense assignment kernel
|
|
587
|
-
***************************************************************************/
|
|
645
|
+
* Part 4 : Generic dense assignment kernel
|
|
646
|
+
***************************************************************************/
|
|
588
647
|
|
|
589
648
|
// This class generalize the assignment of a coefficient (or packet) from one dense evaluator
|
|
590
649
|
// to another dense writable evaluator.
|
|
@@ -592,138 +651,166 @@ struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, InnerUnrolling>
|
|
|
592
651
|
// This abstraction level permits to keep the evaluation loops as simple and as generic as possible.
|
|
593
652
|
// One can customize the assignment using this generic dense_assignment_kernel with different
|
|
594
653
|
// functors, or by completely overloading it, by-passing a functor.
|
|
595
|
-
template<typename DstEvaluatorTypeT, typename SrcEvaluatorTypeT, typename Functor, int Version = Specialized>
|
|
596
|
-
class generic_dense_assignment_kernel
|
|
597
|
-
|
|
598
|
-
protected:
|
|
654
|
+
template <typename DstEvaluatorTypeT, typename SrcEvaluatorTypeT, typename Functor, int Version = Specialized>
|
|
655
|
+
class generic_dense_assignment_kernel {
|
|
656
|
+
protected:
|
|
599
657
|
typedef typename DstEvaluatorTypeT::XprType DstXprType;
|
|
600
658
|
typedef typename SrcEvaluatorTypeT::XprType SrcXprType;
|
|
601
|
-
|
|
602
|
-
|
|
659
|
+
|
|
660
|
+
public:
|
|
603
661
|
typedef DstEvaluatorTypeT DstEvaluatorType;
|
|
604
662
|
typedef SrcEvaluatorTypeT SrcEvaluatorType;
|
|
605
663
|
typedef typename DstEvaluatorType::Scalar Scalar;
|
|
606
664
|
typedef copy_using_evaluator_traits<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor> AssignmentTraits;
|
|
607
665
|
typedef typename AssignmentTraits::PacketType PacketType;
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
666
|
+
|
|
667
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr generic_dense_assignment_kernel(DstEvaluatorType& dst,
|
|
668
|
+
const SrcEvaluatorType& src,
|
|
669
|
+
const Functor& func,
|
|
670
|
+
DstXprType& dstExpr)
|
|
671
|
+
: m_dst(dst), m_src(src), m_functor(func), m_dstExpr(dstExpr) {
|
|
672
|
+
#ifdef EIGEN_DEBUG_ASSIGN
|
|
614
673
|
AssignmentTraits::debug();
|
|
615
|
-
|
|
616
|
-
}
|
|
617
|
-
|
|
618
|
-
EIGEN_DEVICE_FUNC Index size() const
|
|
619
|
-
EIGEN_DEVICE_FUNC Index innerSize() const
|
|
620
|
-
EIGEN_DEVICE_FUNC Index outerSize() const
|
|
621
|
-
EIGEN_DEVICE_FUNC Index rows() const
|
|
622
|
-
EIGEN_DEVICE_FUNC Index cols() const
|
|
623
|
-
EIGEN_DEVICE_FUNC Index outerStride() const { return m_dstExpr.outerStride(); }
|
|
624
|
-
|
|
625
|
-
EIGEN_DEVICE_FUNC DstEvaluatorType& dstEvaluator() { return m_dst; }
|
|
626
|
-
EIGEN_DEVICE_FUNC const SrcEvaluatorType& srcEvaluator() const { return m_src; }
|
|
627
|
-
|
|
674
|
+
#endif
|
|
675
|
+
}
|
|
676
|
+
|
|
677
|
+
EIGEN_DEVICE_FUNC constexpr Index size() const noexcept { return m_dstExpr.size(); }
|
|
678
|
+
EIGEN_DEVICE_FUNC constexpr Index innerSize() const noexcept { return m_dstExpr.innerSize(); }
|
|
679
|
+
EIGEN_DEVICE_FUNC constexpr Index outerSize() const noexcept { return m_dstExpr.outerSize(); }
|
|
680
|
+
EIGEN_DEVICE_FUNC constexpr Index rows() const noexcept { return m_dstExpr.rows(); }
|
|
681
|
+
EIGEN_DEVICE_FUNC constexpr Index cols() const noexcept { return m_dstExpr.cols(); }
|
|
682
|
+
EIGEN_DEVICE_FUNC constexpr Index outerStride() const noexcept { return m_dstExpr.outerStride(); }
|
|
683
|
+
|
|
684
|
+
EIGEN_DEVICE_FUNC DstEvaluatorType& dstEvaluator() noexcept { return m_dst; }
|
|
685
|
+
EIGEN_DEVICE_FUNC const SrcEvaluatorType& srcEvaluator() const noexcept { return m_src; }
|
|
686
|
+
|
|
628
687
|
/// Assign src(row,col) to dst(row,col) through the assignment functor.
|
|
629
|
-
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Index row, Index col)
|
|
630
|
-
|
|
631
|
-
m_functor.assignCoeff(m_dst.coeffRef(row,col), m_src.coeff(row,col));
|
|
688
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void assignCoeff(Index row, Index col) {
|
|
689
|
+
m_functor.assignCoeff(m_dst.coeffRef(row, col), m_src.coeff(row, col));
|
|
632
690
|
}
|
|
633
|
-
|
|
691
|
+
|
|
634
692
|
/// \sa assignCoeff(Index,Index)
|
|
635
|
-
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Index index)
|
|
636
|
-
{
|
|
693
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Index index) {
|
|
637
694
|
m_functor.assignCoeff(m_dst.coeffRef(index), m_src.coeff(index));
|
|
638
695
|
}
|
|
639
|
-
|
|
696
|
+
|
|
640
697
|
/// \sa assignCoeff(Index,Index)
|
|
641
|
-
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeffByOuterInner(Index outer, Index inner)
|
|
642
|
-
|
|
643
|
-
Index
|
|
644
|
-
Index col = colIndexByOuterInner(outer, inner);
|
|
698
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void assignCoeffByOuterInner(Index outer, Index inner) {
|
|
699
|
+
Index row = rowIndexByOuterInner(outer, inner);
|
|
700
|
+
Index col = colIndexByOuterInner(outer, inner);
|
|
645
701
|
assignCoeff(row, col);
|
|
646
702
|
}
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacketByOuterInner(Index outer, Index inner)
|
|
663
|
-
{
|
|
664
|
-
Index row = rowIndexByOuterInner(outer, inner);
|
|
703
|
+
|
|
704
|
+
template <int StoreMode, int LoadMode, typename Packet>
|
|
705
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacket(Index row, Index col) {
|
|
706
|
+
m_functor.template assignPacket<StoreMode>(&m_dst.coeffRef(row, col),
|
|
707
|
+
m_src.template packet<LoadMode, Packet>(row, col));
|
|
708
|
+
}
|
|
709
|
+
|
|
710
|
+
template <int StoreMode, int LoadMode, typename Packet>
|
|
711
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacket(Index index) {
|
|
712
|
+
m_functor.template assignPacket<StoreMode>(&m_dst.coeffRef(index), m_src.template packet<LoadMode, Packet>(index));
|
|
713
|
+
}
|
|
714
|
+
|
|
715
|
+
template <int StoreMode, int LoadMode, typename Packet>
|
|
716
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacketByOuterInner(Index outer, Index inner) {
|
|
717
|
+
Index row = rowIndexByOuterInner(outer, inner);
|
|
665
718
|
Index col = colIndexByOuterInner(outer, inner);
|
|
666
|
-
assignPacket<StoreMode,LoadMode,
|
|
719
|
+
assignPacket<StoreMode, LoadMode, Packet>(row, col);
|
|
667
720
|
}
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
{
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
: int(DstEvaluatorType::Flags)&RowMajorBit ? outer
|
|
675
|
-
: inner;
|
|
721
|
+
|
|
722
|
+
template <int StoreMode, int LoadMode, typename Packet>
|
|
723
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacketSegment(Index row, Index col, Index begin, Index count) {
|
|
724
|
+
m_functor.template assignPacketSegment<StoreMode>(
|
|
725
|
+
&m_dst.coeffRef(row, col), m_src.template packetSegment<LoadMode, Packet>(row, col, begin, count), begin,
|
|
726
|
+
count);
|
|
676
727
|
}
|
|
677
728
|
|
|
678
|
-
|
|
679
|
-
{
|
|
729
|
+
template <int StoreMode, int LoadMode, typename Packet>
|
|
730
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacketSegment(Index index, Index begin, Index count) {
|
|
731
|
+
m_functor.template assignPacketSegment<StoreMode>(
|
|
732
|
+
&m_dst.coeffRef(index), m_src.template packetSegment<LoadMode, Packet>(index, begin, count), begin, count);
|
|
733
|
+
}
|
|
734
|
+
|
|
735
|
+
template <int StoreMode, int LoadMode, typename Packet>
|
|
736
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacketSegmentByOuterInner(Index outer, Index inner, Index begin,
|
|
737
|
+
Index count) {
|
|
738
|
+
Index row = rowIndexByOuterInner(outer, inner);
|
|
739
|
+
Index col = colIndexByOuterInner(outer, inner);
|
|
740
|
+
assignPacketSegment<StoreMode, LoadMode, Packet>(row, col, begin, count);
|
|
741
|
+
}
|
|
742
|
+
|
|
743
|
+
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr Index rowIndexByOuterInner(Index outer, Index inner) {
|
|
680
744
|
typedef typename DstEvaluatorType::ExpressionTraits Traits;
|
|
681
|
-
return int(Traits::
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
|
|
745
|
+
return int(Traits::RowsAtCompileTime) == 1 ? 0
|
|
746
|
+
: int(Traits::ColsAtCompileTime) == 1 ? inner
|
|
747
|
+
: int(DstEvaluatorType::Flags) & RowMajorBit ? outer
|
|
748
|
+
: inner;
|
|
685
749
|
}
|
|
686
750
|
|
|
687
|
-
EIGEN_DEVICE_FUNC
|
|
688
|
-
|
|
689
|
-
return
|
|
751
|
+
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr Index colIndexByOuterInner(Index outer, Index inner) {
|
|
752
|
+
typedef typename DstEvaluatorType::ExpressionTraits Traits;
|
|
753
|
+
return int(Traits::ColsAtCompileTime) == 1 ? 0
|
|
754
|
+
: int(Traits::RowsAtCompileTime) == 1 ? inner
|
|
755
|
+
: int(DstEvaluatorType::Flags) & RowMajorBit ? inner
|
|
756
|
+
: outer;
|
|
690
757
|
}
|
|
691
|
-
|
|
692
|
-
|
|
758
|
+
|
|
759
|
+
EIGEN_DEVICE_FUNC const Scalar* dstDataPtr() const { return m_dstExpr.data(); }
|
|
760
|
+
|
|
761
|
+
protected:
|
|
693
762
|
DstEvaluatorType& m_dst;
|
|
694
763
|
const SrcEvaluatorType& m_src;
|
|
695
|
-
const Functor
|
|
764
|
+
const Functor& m_functor;
|
|
696
765
|
// TODO find a way to avoid the needs of the original expression
|
|
697
766
|
DstXprType& m_dstExpr;
|
|
698
767
|
};
|
|
699
768
|
|
|
769
|
+
// Special kernel used when computing small products whose operands have dynamic dimensions. It ensures that the
|
|
770
|
+
// PacketSize used is no larger than 4, thereby increasing the chance that vectorized instructions will be used
|
|
771
|
+
// when computing the product.
|
|
772
|
+
|
|
773
|
+
template <typename DstEvaluatorTypeT, typename SrcEvaluatorTypeT, typename Functor>
|
|
774
|
+
class restricted_packet_dense_assignment_kernel
|
|
775
|
+
: public generic_dense_assignment_kernel<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor, BuiltIn> {
|
|
776
|
+
protected:
|
|
777
|
+
typedef generic_dense_assignment_kernel<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor, BuiltIn> Base;
|
|
778
|
+
|
|
779
|
+
public:
|
|
780
|
+
typedef typename Base::Scalar Scalar;
|
|
781
|
+
typedef typename Base::DstXprType DstXprType;
|
|
782
|
+
typedef copy_using_evaluator_traits<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor, 4> AssignmentTraits;
|
|
783
|
+
typedef typename AssignmentTraits::PacketType PacketType;
|
|
784
|
+
|
|
785
|
+
EIGEN_DEVICE_FUNC restricted_packet_dense_assignment_kernel(DstEvaluatorTypeT& dst, const SrcEvaluatorTypeT& src,
|
|
786
|
+
const Functor& func, DstXprType& dstExpr)
|
|
787
|
+
: Base(dst, src, func, dstExpr) {}
|
|
788
|
+
};
|
|
789
|
+
|
|
700
790
|
/***************************************************************************
|
|
701
|
-
* Part 5 : Entry point for dense rectangular assignment
|
|
702
|
-
***************************************************************************/
|
|
791
|
+
* Part 5 : Entry point for dense rectangular assignment
|
|
792
|
+
***************************************************************************/
|
|
703
793
|
|
|
704
|
-
template<typename DstXprType,typename SrcXprType, typename Functor>
|
|
705
|
-
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
|
706
|
-
|
|
707
|
-
{
|
|
794
|
+
template <typename DstXprType, typename SrcXprType, typename Functor>
|
|
795
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void resize_if_allowed(DstXprType& dst, const SrcXprType& src,
|
|
796
|
+
const Functor& /*func*/) {
|
|
708
797
|
EIGEN_ONLY_USED_FOR_DEBUG(dst);
|
|
709
798
|
EIGEN_ONLY_USED_FOR_DEBUG(src);
|
|
710
799
|
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
|
|
711
800
|
}
|
|
712
801
|
|
|
713
|
-
template<typename DstXprType,typename SrcXprType, typename T1, typename T2>
|
|
714
|
-
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
|
715
|
-
|
|
716
|
-
{
|
|
802
|
+
template <typename DstXprType, typename SrcXprType, typename T1, typename T2>
|
|
803
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void resize_if_allowed(DstXprType& dst, const SrcXprType& src,
|
|
804
|
+
const internal::assign_op<T1, T2>& /*func*/) {
|
|
717
805
|
Index dstRows = src.rows();
|
|
718
806
|
Index dstCols = src.cols();
|
|
719
|
-
if(((dst.rows()!=dstRows) || (dst.cols()!=dstCols)))
|
|
720
|
-
dst.resize(dstRows, dstCols);
|
|
807
|
+
if (((dst.rows() != dstRows) || (dst.cols() != dstCols))) dst.resize(dstRows, dstCols);
|
|
721
808
|
eigen_assert(dst.rows() == dstRows && dst.cols() == dstCols);
|
|
722
809
|
}
|
|
723
810
|
|
|
724
|
-
template<typename DstXprType, typename SrcXprType, typename Functor>
|
|
725
|
-
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(DstXprType& dst, const SrcXprType& src,
|
|
726
|
-
{
|
|
811
|
+
template <typename DstXprType, typename SrcXprType, typename Functor>
|
|
812
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void call_dense_assignment_loop(DstXprType& dst, const SrcXprType& src,
|
|
813
|
+
const Functor& func) {
|
|
727
814
|
typedef evaluator<DstXprType> DstEvaluatorType;
|
|
728
815
|
typedef evaluator<SrcXprType> SrcEvaluatorType;
|
|
729
816
|
|
|
@@ -734,202 +821,237 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(DstXprType
|
|
|
734
821
|
resize_if_allowed(dst, src, func);
|
|
735
822
|
|
|
736
823
|
DstEvaluatorType dstEvaluator(dst);
|
|
737
|
-
|
|
738
|
-
typedef generic_dense_assignment_kernel<DstEvaluatorType,SrcEvaluatorType,Functor> Kernel;
|
|
824
|
+
|
|
825
|
+
typedef generic_dense_assignment_kernel<DstEvaluatorType, SrcEvaluatorType, Functor> Kernel;
|
|
739
826
|
Kernel kernel(dstEvaluator, srcEvaluator, func, dst.const_cast_derived());
|
|
740
827
|
|
|
741
828
|
dense_assignment_loop<Kernel>::run(kernel);
|
|
742
829
|
}
|
|
743
830
|
|
|
744
|
-
template<typename DstXprType, typename SrcXprType>
|
|
745
|
-
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(DstXprType& dst, const SrcXprType& src)
|
|
746
|
-
|
|
747
|
-
call_dense_assignment_loop(dst, src, internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar>());
|
|
831
|
+
template <typename DstXprType, typename SrcXprType>
|
|
832
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(DstXprType& dst, const SrcXprType& src) {
|
|
833
|
+
call_dense_assignment_loop(dst, src, internal::assign_op<typename DstXprType::Scalar, typename SrcXprType::Scalar>());
|
|
748
834
|
}
|
|
749
835
|
|
|
750
836
|
/***************************************************************************
|
|
751
|
-
* Part 6 : Generic assignment
|
|
752
|
-
***************************************************************************/
|
|
837
|
+
* Part 6 : Generic assignment
|
|
838
|
+
***************************************************************************/
|
|
753
839
|
|
|
754
840
|
// Based on the respective shapes of the destination and source,
|
|
755
841
|
// the class AssignmentKind determine the kind of assignment mechanism.
|
|
756
842
|
// AssignmentKind must define a Kind typedef.
|
|
757
|
-
template<typename DstShape, typename SrcShape>
|
|
843
|
+
template <typename DstShape, typename SrcShape>
|
|
844
|
+
struct AssignmentKind;
|
|
758
845
|
|
|
759
|
-
//
|
|
846
|
+
// Assignment kind defined in this file:
|
|
760
847
|
struct Dense2Dense {};
|
|
761
848
|
struct EigenBase2EigenBase {};
|
|
762
849
|
|
|
763
|
-
template<typename,typename>
|
|
764
|
-
|
|
765
|
-
|
|
850
|
+
template <typename, typename>
|
|
851
|
+
struct AssignmentKind {
|
|
852
|
+
typedef EigenBase2EigenBase Kind;
|
|
853
|
+
};
|
|
854
|
+
template <>
|
|
855
|
+
struct AssignmentKind<DenseShape, DenseShape> {
|
|
856
|
+
typedef Dense2Dense Kind;
|
|
857
|
+
};
|
|
858
|
+
|
|
766
859
|
// This is the main assignment class
|
|
767
|
-
template<
|
|
768
|
-
typename Kind = typename AssignmentKind<
|
|
860
|
+
template <typename DstXprType, typename SrcXprType, typename Functor,
|
|
861
|
+
typename Kind = typename AssignmentKind<typename evaluator_traits<DstXprType>::Shape,
|
|
862
|
+
typename evaluator_traits<SrcXprType>::Shape>::Kind,
|
|
769
863
|
typename EnableIf = void>
|
|
770
864
|
struct Assignment;
|
|
771
865
|
|
|
866
|
+
// The only purpose of this call_assignment() function is to deal with noalias() / "assume-aliasing" and automatic
|
|
867
|
+
// transposition. Indeed, I (Gael) think that this concept of "assume-aliasing" was a mistake, and it makes thing quite
|
|
868
|
+
// complicated. So this intermediate function removes everything related to "assume-aliasing" such that Assignment does
|
|
869
|
+
// not has to bother about these annoying details.
|
|
772
870
|
|
|
773
|
-
|
|
774
|
-
|
|
775
|
-
|
|
776
|
-
// does not has to bother about these annoying details.
|
|
777
|
-
|
|
778
|
-
template<typename Dst, typename Src>
|
|
779
|
-
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
|
780
|
-
void call_assignment(Dst& dst, const Src& src)
|
|
781
|
-
{
|
|
782
|
-
call_assignment(dst, src, internal::assign_op<typename Dst::Scalar,typename Src::Scalar>());
|
|
871
|
+
template <typename Dst, typename Src>
|
|
872
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void call_assignment(Dst& dst, const Src& src) {
|
|
873
|
+
call_assignment(dst, src, internal::assign_op<typename Dst::Scalar, typename Src::Scalar>());
|
|
783
874
|
}
|
|
784
|
-
template<typename Dst, typename Src>
|
|
785
|
-
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
|
786
|
-
|
|
787
|
-
{
|
|
788
|
-
call_assignment(dst, src, internal::assign_op<typename Dst::Scalar,typename Src::Scalar>());
|
|
875
|
+
template <typename Dst, typename Src>
|
|
876
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_assignment(const Dst& dst, const Src& src) {
|
|
877
|
+
call_assignment(dst, src, internal::assign_op<typename Dst::Scalar, typename Src::Scalar>());
|
|
789
878
|
}
|
|
790
|
-
|
|
879
|
+
|
|
791
880
|
// Deal with "assume-aliasing"
|
|
792
|
-
template<typename Dst, typename Src, typename Func>
|
|
793
|
-
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
|
794
|
-
|
|
795
|
-
{
|
|
881
|
+
template <typename Dst, typename Src, typename Func>
|
|
882
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void call_assignment(
|
|
883
|
+
Dst& dst, const Src& src, const Func& func, std::enable_if_t<evaluator_assume_aliasing<Src>::value, void*> = 0) {
|
|
796
884
|
typename plain_matrix_type<Src>::type tmp(src);
|
|
797
885
|
call_assignment_no_alias(dst, tmp, func);
|
|
798
886
|
}
|
|
799
887
|
|
|
800
|
-
template<typename Dst, typename Src, typename Func>
|
|
801
|
-
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
|
802
|
-
|
|
803
|
-
{
|
|
888
|
+
template <typename Dst, typename Src, typename Func>
|
|
889
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void call_assignment(
|
|
890
|
+
Dst& dst, const Src& src, const Func& func, std::enable_if_t<!evaluator_assume_aliasing<Src>::value, void*> = 0) {
|
|
804
891
|
call_assignment_no_alias(dst, src, func);
|
|
805
892
|
}
|
|
806
893
|
|
|
807
894
|
// by-pass "assume-aliasing"
|
|
808
895
|
// When there is no aliasing, we require that 'dst' has been properly resized
|
|
809
|
-
template<typename Dst, template <typename> class StorageBase, typename Src, typename Func>
|
|
810
|
-
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
|
811
|
-
|
|
812
|
-
{
|
|
896
|
+
template <typename Dst, template <typename> class StorageBase, typename Src, typename Func>
|
|
897
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void call_assignment(NoAlias<Dst, StorageBase>& dst, const Src& src,
|
|
898
|
+
const Func& func) {
|
|
813
899
|
call_assignment_no_alias(dst.expression(), src, func);
|
|
814
900
|
}
|
|
815
901
|
|
|
816
|
-
|
|
817
|
-
|
|
818
|
-
|
|
819
|
-
void call_assignment_no_alias(Dst& dst, const Src& src, const Func& func)
|
|
820
|
-
{
|
|
902
|
+
template <typename Dst, typename Src, typename Func>
|
|
903
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void call_assignment_no_alias(Dst& dst, const Src& src,
|
|
904
|
+
const Func& func) {
|
|
821
905
|
enum {
|
|
822
|
-
NeedToTranspose = (
|
|
823
|
-
|
|
824
|
-
|
|
906
|
+
NeedToTranspose = ((int(Dst::RowsAtCompileTime) == 1 && int(Src::ColsAtCompileTime) == 1) ||
|
|
907
|
+
(int(Dst::ColsAtCompileTime) == 1 && int(Src::RowsAtCompileTime) == 1)) &&
|
|
908
|
+
int(Dst::SizeAtCompileTime) != 1
|
|
825
909
|
};
|
|
826
910
|
|
|
827
|
-
typedef
|
|
828
|
-
typedef
|
|
911
|
+
typedef std::conditional_t<NeedToTranspose, Transpose<Dst>, Dst> ActualDstTypeCleaned;
|
|
912
|
+
typedef std::conditional_t<NeedToTranspose, Transpose<Dst>, Dst&> ActualDstType;
|
|
829
913
|
ActualDstType actualDst(dst);
|
|
830
|
-
|
|
914
|
+
|
|
831
915
|
// TODO check whether this is the right place to perform these checks:
|
|
832
916
|
EIGEN_STATIC_ASSERT_LVALUE(Dst)
|
|
833
|
-
EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(ActualDstTypeCleaned,Src)
|
|
834
|
-
EIGEN_CHECK_BINARY_COMPATIBILIY(Func,typename ActualDstTypeCleaned::Scalar,typename Src::Scalar);
|
|
835
|
-
|
|
836
|
-
Assignment<ActualDstTypeCleaned,Src,Func>::run(actualDst, src, func);
|
|
917
|
+
EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(ActualDstTypeCleaned, Src)
|
|
918
|
+
EIGEN_CHECK_BINARY_COMPATIBILIY(Func, typename ActualDstTypeCleaned::Scalar, typename Src::Scalar);
|
|
919
|
+
|
|
920
|
+
Assignment<ActualDstTypeCleaned, Src, Func>::run(actualDst, src, func);
|
|
921
|
+
}
|
|
922
|
+
|
|
923
|
+
template <typename Dst, typename Src, typename Func>
|
|
924
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_restricted_packet_assignment_no_alias(Dst& dst, const Src& src,
|
|
925
|
+
const Func& func) {
|
|
926
|
+
typedef evaluator<Dst> DstEvaluatorType;
|
|
927
|
+
typedef evaluator<Src> SrcEvaluatorType;
|
|
928
|
+
typedef restricted_packet_dense_assignment_kernel<DstEvaluatorType, SrcEvaluatorType, Func> Kernel;
|
|
929
|
+
|
|
930
|
+
EIGEN_STATIC_ASSERT_LVALUE(Dst)
|
|
931
|
+
EIGEN_CHECK_BINARY_COMPATIBILIY(Func, typename Dst::Scalar, typename Src::Scalar);
|
|
932
|
+
|
|
933
|
+
SrcEvaluatorType srcEvaluator(src);
|
|
934
|
+
resize_if_allowed(dst, src, func);
|
|
935
|
+
|
|
936
|
+
DstEvaluatorType dstEvaluator(dst);
|
|
937
|
+
Kernel kernel(dstEvaluator, srcEvaluator, func, dst.const_cast_derived());
|
|
938
|
+
|
|
939
|
+
dense_assignment_loop<Kernel>::run(kernel);
|
|
837
940
|
}
|
|
838
|
-
|
|
839
|
-
|
|
840
|
-
void call_assignment_no_alias(Dst& dst, const Src& src)
|
|
841
|
-
|
|
842
|
-
call_assignment_no_alias(dst, src, internal::assign_op<typename Dst::Scalar,typename Src::Scalar>());
|
|
941
|
+
|
|
942
|
+
template <typename Dst, typename Src>
|
|
943
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void call_assignment_no_alias(Dst& dst, const Src& src) {
|
|
944
|
+
call_assignment_no_alias(dst, src, internal::assign_op<typename Dst::Scalar, typename Src::Scalar>());
|
|
843
945
|
}
|
|
844
946
|
|
|
845
|
-
template<typename Dst, typename Src, typename Func>
|
|
846
|
-
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
|
847
|
-
|
|
848
|
-
{
|
|
947
|
+
template <typename Dst, typename Src, typename Func>
|
|
948
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src,
|
|
949
|
+
const Func& func) {
|
|
849
950
|
// TODO check whether this is the right place to perform these checks:
|
|
850
951
|
EIGEN_STATIC_ASSERT_LVALUE(Dst)
|
|
851
|
-
EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Dst,Src)
|
|
852
|
-
EIGEN_CHECK_BINARY_COMPATIBILIY(Func,typename Dst::Scalar,typename Src::Scalar);
|
|
952
|
+
EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Dst, Src)
|
|
953
|
+
EIGEN_CHECK_BINARY_COMPATIBILIY(Func, typename Dst::Scalar, typename Src::Scalar);
|
|
853
954
|
|
|
854
|
-
Assignment<Dst,Src,Func>::run(dst, src, func);
|
|
955
|
+
Assignment<Dst, Src, Func>::run(dst, src, func);
|
|
855
956
|
}
|
|
856
|
-
template<typename Dst, typename Src>
|
|
857
|
-
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
|
858
|
-
|
|
859
|
-
{
|
|
860
|
-
call_assignment_no_alias_no_transpose(dst, src, internal::assign_op<typename Dst::Scalar,typename Src::Scalar>());
|
|
957
|
+
template <typename Dst, typename Src>
|
|
958
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src) {
|
|
959
|
+
call_assignment_no_alias_no_transpose(dst, src, internal::assign_op<typename Dst::Scalar, typename Src::Scalar>());
|
|
861
960
|
}
|
|
862
961
|
|
|
863
962
|
// forward declaration
|
|
864
|
-
template<typename Dst, typename Src>
|
|
963
|
+
template <typename Dst, typename Src>
|
|
964
|
+
EIGEN_DEVICE_FUNC void check_for_aliasing(const Dst& dst, const Src& src);
|
|
865
965
|
|
|
866
966
|
// Generic Dense to Dense assignment
|
|
867
967
|
// Note that the last template argument "Weak" is needed to make it possible to perform
|
|
868
968
|
// both partial specialization+SFINAE without ambiguous specialization
|
|
869
|
-
template<
|
|
870
|
-
struct Assignment<DstXprType, SrcXprType, Functor, Dense2Dense, Weak>
|
|
871
|
-
|
|
872
|
-
|
|
873
|
-
static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const Functor &func)
|
|
874
|
-
{
|
|
969
|
+
template <typename DstXprType, typename SrcXprType, typename Functor, typename Weak>
|
|
970
|
+
struct Assignment<DstXprType, SrcXprType, Functor, Dense2Dense, Weak> {
|
|
971
|
+
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(DstXprType& dst, const SrcXprType& src,
|
|
972
|
+
const Functor& func) {
|
|
875
973
|
#ifndef EIGEN_NO_DEBUG
|
|
876
|
-
internal::
|
|
974
|
+
if (!internal::is_constant_evaluated()) {
|
|
975
|
+
internal::check_for_aliasing(dst, src);
|
|
976
|
+
}
|
|
877
977
|
#endif
|
|
878
|
-
|
|
978
|
+
|
|
879
979
|
call_dense_assignment_loop(dst, src, func);
|
|
880
980
|
}
|
|
881
981
|
};
|
|
882
982
|
|
|
983
|
+
template <typename DstXprType, typename SrcPlainObject, typename Weak>
|
|
984
|
+
struct Assignment<DstXprType, CwiseNullaryOp<scalar_constant_op<typename DstXprType::Scalar>, SrcPlainObject>,
|
|
985
|
+
assign_op<typename DstXprType::Scalar, typename DstXprType::Scalar>, Dense2Dense, Weak> {
|
|
986
|
+
using Scalar = typename DstXprType::Scalar;
|
|
987
|
+
using NullaryOp = scalar_constant_op<Scalar>;
|
|
988
|
+
using SrcXprType = CwiseNullaryOp<NullaryOp, SrcPlainObject>;
|
|
989
|
+
using Functor = assign_op<Scalar, Scalar>;
|
|
990
|
+
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(DstXprType& dst, const SrcXprType& src,
|
|
991
|
+
const Functor& /*func*/) {
|
|
992
|
+
eigen_fill_impl<DstXprType>::run(dst, src);
|
|
993
|
+
}
|
|
994
|
+
};
|
|
995
|
+
|
|
996
|
+
template <typename DstXprType, typename SrcPlainObject, typename Weak>
|
|
997
|
+
struct Assignment<DstXprType, CwiseNullaryOp<scalar_zero_op<typename DstXprType::Scalar>, SrcPlainObject>,
|
|
998
|
+
assign_op<typename DstXprType::Scalar, typename DstXprType::Scalar>, Dense2Dense, Weak> {
|
|
999
|
+
using Scalar = typename DstXprType::Scalar;
|
|
1000
|
+
using NullaryOp = scalar_zero_op<Scalar>;
|
|
1001
|
+
using SrcXprType = CwiseNullaryOp<NullaryOp, SrcPlainObject>;
|
|
1002
|
+
using Functor = assign_op<Scalar, Scalar>;
|
|
1003
|
+
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(DstXprType& dst, const SrcXprType& src,
|
|
1004
|
+
const Functor& /*func*/) {
|
|
1005
|
+
eigen_zero_impl<DstXprType>::run(dst, src);
|
|
1006
|
+
}
|
|
1007
|
+
};
|
|
1008
|
+
|
|
883
1009
|
// Generic assignment through evalTo.
|
|
884
1010
|
// TODO: not sure we have to keep that one, but it helps porting current code to new evaluator mechanism.
|
|
885
1011
|
// Note that the last template argument "Weak" is needed to make it possible to perform
|
|
886
1012
|
// both partial specialization+SFINAE without ambiguous specialization
|
|
887
|
-
template<
|
|
888
|
-
struct Assignment<DstXprType, SrcXprType, Functor, EigenBase2EigenBase, Weak>
|
|
889
|
-
|
|
890
|
-
|
|
891
|
-
|
|
892
|
-
{
|
|
1013
|
+
template <typename DstXprType, typename SrcXprType, typename Functor, typename Weak>
|
|
1014
|
+
struct Assignment<DstXprType, SrcXprType, Functor, EigenBase2EigenBase, Weak> {
|
|
1015
|
+
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(
|
|
1016
|
+
DstXprType& dst, const SrcXprType& src,
|
|
1017
|
+
const internal::assign_op<typename DstXprType::Scalar, typename SrcXprType::Scalar>& /*func*/) {
|
|
893
1018
|
Index dstRows = src.rows();
|
|
894
1019
|
Index dstCols = src.cols();
|
|
895
|
-
if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
|
|
896
|
-
dst.resize(dstRows, dstCols);
|
|
1020
|
+
if ((dst.rows() != dstRows) || (dst.cols() != dstCols)) dst.resize(dstRows, dstCols);
|
|
897
1021
|
|
|
898
1022
|
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
|
|
899
1023
|
src.evalTo(dst);
|
|
900
1024
|
}
|
|
901
1025
|
|
|
902
|
-
// NOTE The following two functions are templated to avoid their
|
|
1026
|
+
// NOTE The following two functions are templated to avoid their instantiation if not needed
|
|
903
1027
|
// This is needed because some expressions supports evalTo only and/or have 'void' as scalar type.
|
|
904
|
-
template<typename SrcScalarType>
|
|
905
|
-
EIGEN_DEVICE_FUNC
|
|
906
|
-
|
|
907
|
-
|
|
1028
|
+
template <typename SrcScalarType>
|
|
1029
|
+
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(
|
|
1030
|
+
DstXprType& dst, const SrcXprType& src,
|
|
1031
|
+
const internal::add_assign_op<typename DstXprType::Scalar, SrcScalarType>& /*func*/) {
|
|
908
1032
|
Index dstRows = src.rows();
|
|
909
1033
|
Index dstCols = src.cols();
|
|
910
|
-
if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
|
|
911
|
-
dst.resize(dstRows, dstCols);
|
|
1034
|
+
if ((dst.rows() != dstRows) || (dst.cols() != dstCols)) dst.resize(dstRows, dstCols);
|
|
912
1035
|
|
|
913
1036
|
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
|
|
914
1037
|
src.addTo(dst);
|
|
915
1038
|
}
|
|
916
1039
|
|
|
917
|
-
template<typename SrcScalarType>
|
|
918
|
-
EIGEN_DEVICE_FUNC
|
|
919
|
-
|
|
920
|
-
|
|
1040
|
+
template <typename SrcScalarType>
|
|
1041
|
+
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(
|
|
1042
|
+
DstXprType& dst, const SrcXprType& src,
|
|
1043
|
+
const internal::sub_assign_op<typename DstXprType::Scalar, SrcScalarType>& /*func*/) {
|
|
921
1044
|
Index dstRows = src.rows();
|
|
922
1045
|
Index dstCols = src.cols();
|
|
923
|
-
if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
|
|
924
|
-
dst.resize(dstRows, dstCols);
|
|
1046
|
+
if ((dst.rows() != dstRows) || (dst.cols() != dstCols)) dst.resize(dstRows, dstCols);
|
|
925
1047
|
|
|
926
1048
|
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
|
|
927
1049
|
src.subTo(dst);
|
|
928
1050
|
}
|
|
929
1051
|
};
|
|
930
1052
|
|
|
931
|
-
}
|
|
1053
|
+
} // namespace internal
|
|
932
1054
|
|
|
933
|
-
}
|
|
1055
|
+
} // end namespace Eigen
|
|
934
1056
|
|
|
935
|
-
#endif
|
|
1057
|
+
#endif // EIGEN_ASSIGN_EVALUATOR_H
|