pylibsparseir 0.1.0__cp310-cp310-macosx_15_0_arm64.whl → 0.6.0__cp310-cp310-macosx_15_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pylibsparseir might be problematic. Click here for more details.
- include/eigen3/Eigen/Cholesky +45 -0
- include/eigen3/Eigen/CholmodSupport +48 -0
- include/eigen3/Eigen/Core +384 -0
- include/eigen3/Eigen/Dense +7 -0
- include/eigen3/Eigen/Eigen +2 -0
- include/eigen3/Eigen/Eigenvalues +60 -0
- include/eigen3/Eigen/Geometry +59 -0
- include/eigen3/Eigen/Householder +29 -0
- include/eigen3/Eigen/IterativeLinearSolvers +48 -0
- include/eigen3/Eigen/Jacobi +32 -0
- include/eigen3/Eigen/KLUSupport +41 -0
- include/eigen3/Eigen/LU +47 -0
- include/eigen3/Eigen/MetisSupport +35 -0
- include/eigen3/Eigen/OrderingMethods +70 -0
- include/eigen3/Eigen/PaStiXSupport +49 -0
- include/eigen3/Eigen/PardisoSupport +35 -0
- include/eigen3/Eigen/QR +50 -0
- include/eigen3/Eigen/QtAlignedMalloc +39 -0
- include/eigen3/Eigen/SPQRSupport +34 -0
- include/eigen3/Eigen/SVD +50 -0
- include/eigen3/Eigen/Sparse +34 -0
- include/eigen3/Eigen/SparseCholesky +37 -0
- include/eigen3/Eigen/SparseCore +69 -0
- include/eigen3/Eigen/SparseLU +50 -0
- include/eigen3/Eigen/SparseQR +36 -0
- include/eigen3/Eigen/StdDeque +27 -0
- include/eigen3/Eigen/StdList +26 -0
- include/eigen3/Eigen/StdVector +27 -0
- include/eigen3/Eigen/SuperLUSupport +64 -0
- include/eigen3/Eigen/UmfPackSupport +40 -0
- include/eigen3/Eigen/src/Cholesky/LDLT.h +688 -0
- include/eigen3/Eigen/src/Cholesky/LLT.h +558 -0
- include/eigen3/Eigen/src/Cholesky/LLT_LAPACKE.h +99 -0
- include/eigen3/Eigen/src/CholmodSupport/CholmodSupport.h +682 -0
- include/eigen3/Eigen/src/Core/ArithmeticSequence.h +413 -0
- include/eigen3/Eigen/src/Core/Array.h +417 -0
- include/eigen3/Eigen/src/Core/ArrayBase.h +226 -0
- include/eigen3/Eigen/src/Core/ArrayWrapper.h +209 -0
- include/eigen3/Eigen/src/Core/Assign.h +90 -0
- include/eigen3/Eigen/src/Core/AssignEvaluator.h +1010 -0
- include/eigen3/Eigen/src/Core/Assign_MKL.h +178 -0
- include/eigen3/Eigen/src/Core/BandMatrix.h +353 -0
- include/eigen3/Eigen/src/Core/Block.h +448 -0
- include/eigen3/Eigen/src/Core/BooleanRedux.h +162 -0
- include/eigen3/Eigen/src/Core/CommaInitializer.h +164 -0
- include/eigen3/Eigen/src/Core/ConditionEstimator.h +175 -0
- include/eigen3/Eigen/src/Core/CoreEvaluators.h +1741 -0
- include/eigen3/Eigen/src/Core/CoreIterators.h +132 -0
- include/eigen3/Eigen/src/Core/CwiseBinaryOp.h +183 -0
- include/eigen3/Eigen/src/Core/CwiseNullaryOp.h +1001 -0
- include/eigen3/Eigen/src/Core/CwiseTernaryOp.h +197 -0
- include/eigen3/Eigen/src/Core/CwiseUnaryOp.h +103 -0
- include/eigen3/Eigen/src/Core/CwiseUnaryView.h +132 -0
- include/eigen3/Eigen/src/Core/DenseBase.h +701 -0
- include/eigen3/Eigen/src/Core/DenseCoeffsBase.h +685 -0
- include/eigen3/Eigen/src/Core/DenseStorage.h +652 -0
- include/eigen3/Eigen/src/Core/Diagonal.h +258 -0
- include/eigen3/Eigen/src/Core/DiagonalMatrix.h +391 -0
- include/eigen3/Eigen/src/Core/DiagonalProduct.h +28 -0
- include/eigen3/Eigen/src/Core/Dot.h +318 -0
- include/eigen3/Eigen/src/Core/EigenBase.h +160 -0
- include/eigen3/Eigen/src/Core/ForceAlignedAccess.h +150 -0
- include/eigen3/Eigen/src/Core/Fuzzy.h +155 -0
- include/eigen3/Eigen/src/Core/GeneralProduct.h +465 -0
- include/eigen3/Eigen/src/Core/GenericPacketMath.h +1040 -0
- include/eigen3/Eigen/src/Core/GlobalFunctions.h +194 -0
- include/eigen3/Eigen/src/Core/IO.h +258 -0
- include/eigen3/Eigen/src/Core/IndexedView.h +237 -0
- include/eigen3/Eigen/src/Core/Inverse.h +117 -0
- include/eigen3/Eigen/src/Core/Map.h +171 -0
- include/eigen3/Eigen/src/Core/MapBase.h +310 -0
- include/eigen3/Eigen/src/Core/MathFunctions.h +2057 -0
- include/eigen3/Eigen/src/Core/MathFunctionsImpl.h +200 -0
- include/eigen3/Eigen/src/Core/Matrix.h +565 -0
- include/eigen3/Eigen/src/Core/MatrixBase.h +547 -0
- include/eigen3/Eigen/src/Core/NestByValue.h +85 -0
- include/eigen3/Eigen/src/Core/NoAlias.h +109 -0
- include/eigen3/Eigen/src/Core/NumTraits.h +335 -0
- include/eigen3/Eigen/src/Core/PartialReduxEvaluator.h +232 -0
- include/eigen3/Eigen/src/Core/PermutationMatrix.h +605 -0
- include/eigen3/Eigen/src/Core/PlainObjectBase.h +1128 -0
- include/eigen3/Eigen/src/Core/Product.h +191 -0
- include/eigen3/Eigen/src/Core/ProductEvaluators.h +1179 -0
- include/eigen3/Eigen/src/Core/Random.h +218 -0
- include/eigen3/Eigen/src/Core/Redux.h +515 -0
- include/eigen3/Eigen/src/Core/Ref.h +381 -0
- include/eigen3/Eigen/src/Core/Replicate.h +142 -0
- include/eigen3/Eigen/src/Core/Reshaped.h +454 -0
- include/eigen3/Eigen/src/Core/ReturnByValue.h +119 -0
- include/eigen3/Eigen/src/Core/Reverse.h +217 -0
- include/eigen3/Eigen/src/Core/Select.h +164 -0
- include/eigen3/Eigen/src/Core/SelfAdjointView.h +365 -0
- include/eigen3/Eigen/src/Core/SelfCwiseBinaryOp.h +47 -0
- include/eigen3/Eigen/src/Core/Solve.h +188 -0
- include/eigen3/Eigen/src/Core/SolveTriangular.h +235 -0
- include/eigen3/Eigen/src/Core/SolverBase.h +168 -0
- include/eigen3/Eigen/src/Core/StableNorm.h +251 -0
- include/eigen3/Eigen/src/Core/StlIterators.h +463 -0
- include/eigen3/Eigen/src/Core/Stride.h +116 -0
- include/eigen3/Eigen/src/Core/Swap.h +68 -0
- include/eigen3/Eigen/src/Core/Transpose.h +464 -0
- include/eigen3/Eigen/src/Core/Transpositions.h +386 -0
- include/eigen3/Eigen/src/Core/TriangularMatrix.h +1001 -0
- include/eigen3/Eigen/src/Core/VectorBlock.h +96 -0
- include/eigen3/Eigen/src/Core/VectorwiseOp.h +784 -0
- include/eigen3/Eigen/src/Core/Visitor.h +381 -0
- include/eigen3/Eigen/src/Core/arch/AVX/Complex.h +372 -0
- include/eigen3/Eigen/src/Core/arch/AVX/MathFunctions.h +228 -0
- include/eigen3/Eigen/src/Core/arch/AVX/PacketMath.h +1574 -0
- include/eigen3/Eigen/src/Core/arch/AVX/TypeCasting.h +115 -0
- include/eigen3/Eigen/src/Core/arch/AVX512/Complex.h +422 -0
- include/eigen3/Eigen/src/Core/arch/AVX512/MathFunctions.h +362 -0
- include/eigen3/Eigen/src/Core/arch/AVX512/PacketMath.h +2303 -0
- include/eigen3/Eigen/src/Core/arch/AVX512/TypeCasting.h +89 -0
- include/eigen3/Eigen/src/Core/arch/AltiVec/Complex.h +417 -0
- include/eigen3/Eigen/src/Core/arch/AltiVec/MathFunctions.h +90 -0
- include/eigen3/Eigen/src/Core/arch/AltiVec/MatrixProduct.h +2937 -0
- include/eigen3/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h +221 -0
- include/eigen3/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h +629 -0
- include/eigen3/Eigen/src/Core/arch/AltiVec/PacketMath.h +2711 -0
- include/eigen3/Eigen/src/Core/arch/CUDA/Complex.h +258 -0
- include/eigen3/Eigen/src/Core/arch/Default/BFloat16.h +700 -0
- include/eigen3/Eigen/src/Core/arch/Default/ConjHelper.h +117 -0
- include/eigen3/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +1649 -0
- include/eigen3/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +110 -0
- include/eigen3/Eigen/src/Core/arch/Default/Half.h +942 -0
- include/eigen3/Eigen/src/Core/arch/Default/Settings.h +49 -0
- include/eigen3/Eigen/src/Core/arch/Default/TypeCasting.h +120 -0
- include/eigen3/Eigen/src/Core/arch/GPU/MathFunctions.h +103 -0
- include/eigen3/Eigen/src/Core/arch/GPU/PacketMath.h +1685 -0
- include/eigen3/Eigen/src/Core/arch/GPU/TypeCasting.h +80 -0
- include/eigen3/Eigen/src/Core/arch/HIP/hcc/math_constants.h +23 -0
- include/eigen3/Eigen/src/Core/arch/MSA/Complex.h +648 -0
- include/eigen3/Eigen/src/Core/arch/MSA/MathFunctions.h +387 -0
- include/eigen3/Eigen/src/Core/arch/MSA/PacketMath.h +1233 -0
- include/eigen3/Eigen/src/Core/arch/NEON/Complex.h +584 -0
- include/eigen3/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h +183 -0
- include/eigen3/Eigen/src/Core/arch/NEON/MathFunctions.h +75 -0
- include/eigen3/Eigen/src/Core/arch/NEON/PacketMath.h +4587 -0
- include/eigen3/Eigen/src/Core/arch/NEON/TypeCasting.h +1419 -0
- include/eigen3/Eigen/src/Core/arch/SSE/Complex.h +351 -0
- include/eigen3/Eigen/src/Core/arch/SSE/MathFunctions.h +199 -0
- include/eigen3/Eigen/src/Core/arch/SSE/PacketMath.h +1505 -0
- include/eigen3/Eigen/src/Core/arch/SSE/TypeCasting.h +142 -0
- include/eigen3/Eigen/src/Core/arch/SVE/MathFunctions.h +44 -0
- include/eigen3/Eigen/src/Core/arch/SVE/PacketMath.h +752 -0
- include/eigen3/Eigen/src/Core/arch/SVE/TypeCasting.h +49 -0
- include/eigen3/Eigen/src/Core/arch/SYCL/InteropHeaders.h +232 -0
- include/eigen3/Eigen/src/Core/arch/SYCL/MathFunctions.h +301 -0
- include/eigen3/Eigen/src/Core/arch/SYCL/PacketMath.h +670 -0
- include/eigen3/Eigen/src/Core/arch/SYCL/SyclMemoryModel.h +694 -0
- include/eigen3/Eigen/src/Core/arch/SYCL/TypeCasting.h +85 -0
- include/eigen3/Eigen/src/Core/arch/ZVector/Complex.h +426 -0
- include/eigen3/Eigen/src/Core/arch/ZVector/MathFunctions.h +233 -0
- include/eigen3/Eigen/src/Core/arch/ZVector/PacketMath.h +1060 -0
- include/eigen3/Eigen/src/Core/functors/AssignmentFunctors.h +177 -0
- include/eigen3/Eigen/src/Core/functors/BinaryFunctors.h +541 -0
- include/eigen3/Eigen/src/Core/functors/NullaryFunctors.h +189 -0
- include/eigen3/Eigen/src/Core/functors/StlFunctors.h +166 -0
- include/eigen3/Eigen/src/Core/functors/TernaryFunctors.h +25 -0
- include/eigen3/Eigen/src/Core/functors/UnaryFunctors.h +1131 -0
- include/eigen3/Eigen/src/Core/products/GeneralBlockPanelKernel.h +2645 -0
- include/eigen3/Eigen/src/Core/products/GeneralMatrixMatrix.h +517 -0
- include/eigen3/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +317 -0
- include/eigen3/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +145 -0
- include/eigen3/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h +124 -0
- include/eigen3/Eigen/src/Core/products/GeneralMatrixVector.h +518 -0
- include/eigen3/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h +136 -0
- include/eigen3/Eigen/src/Core/products/Parallelizer.h +180 -0
- include/eigen3/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +544 -0
- include/eigen3/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h +295 -0
- include/eigen3/Eigen/src/Core/products/SelfadjointMatrixVector.h +262 -0
- include/eigen3/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h +118 -0
- include/eigen3/Eigen/src/Core/products/SelfadjointProduct.h +133 -0
- include/eigen3/Eigen/src/Core/products/SelfadjointRank2Update.h +94 -0
- include/eigen3/Eigen/src/Core/products/TriangularMatrixMatrix.h +472 -0
- include/eigen3/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h +317 -0
- include/eigen3/Eigen/src/Core/products/TriangularMatrixVector.h +350 -0
- include/eigen3/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h +255 -0
- include/eigen3/Eigen/src/Core/products/TriangularSolverMatrix.h +337 -0
- include/eigen3/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h +167 -0
- include/eigen3/Eigen/src/Core/products/TriangularSolverVector.h +148 -0
- include/eigen3/Eigen/src/Core/util/BlasUtil.h +583 -0
- include/eigen3/Eigen/src/Core/util/ConfigureVectorization.h +512 -0
- include/eigen3/Eigen/src/Core/util/Constants.h +563 -0
- include/eigen3/Eigen/src/Core/util/DisableStupidWarnings.h +106 -0
- include/eigen3/Eigen/src/Core/util/ForwardDeclarations.h +322 -0
- include/eigen3/Eigen/src/Core/util/IndexedViewHelper.h +186 -0
- include/eigen3/Eigen/src/Core/util/IntegralConstant.h +272 -0
- include/eigen3/Eigen/src/Core/util/MKL_support.h +137 -0
- include/eigen3/Eigen/src/Core/util/Macros.h +1464 -0
- include/eigen3/Eigen/src/Core/util/Memory.h +1163 -0
- include/eigen3/Eigen/src/Core/util/Meta.h +812 -0
- include/eigen3/Eigen/src/Core/util/NonMPL2.h +3 -0
- include/eigen3/Eigen/src/Core/util/ReenableStupidWarnings.h +31 -0
- include/eigen3/Eigen/src/Core/util/ReshapedHelper.h +51 -0
- include/eigen3/Eigen/src/Core/util/StaticAssert.h +221 -0
- include/eigen3/Eigen/src/Core/util/SymbolicIndex.h +293 -0
- include/eigen3/Eigen/src/Core/util/XprHelper.h +856 -0
- include/eigen3/Eigen/src/Eigenvalues/ComplexEigenSolver.h +346 -0
- include/eigen3/Eigen/src/Eigenvalues/ComplexSchur.h +462 -0
- include/eigen3/Eigen/src/Eigenvalues/ComplexSchur_LAPACKE.h +91 -0
- include/eigen3/Eigen/src/Eigenvalues/EigenSolver.h +622 -0
- include/eigen3/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h +418 -0
- include/eigen3/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +226 -0
- include/eigen3/Eigen/src/Eigenvalues/HessenbergDecomposition.h +374 -0
- include/eigen3/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +158 -0
- include/eigen3/Eigen/src/Eigenvalues/RealQZ.h +657 -0
- include/eigen3/Eigen/src/Eigenvalues/RealSchur.h +558 -0
- include/eigen3/Eigen/src/Eigenvalues/RealSchur_LAPACKE.h +77 -0
- include/eigen3/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +904 -0
- include/eigen3/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h +87 -0
- include/eigen3/Eigen/src/Eigenvalues/Tridiagonalization.h +561 -0
- include/eigen3/Eigen/src/Geometry/AlignedBox.h +486 -0
- include/eigen3/Eigen/src/Geometry/AngleAxis.h +247 -0
- include/eigen3/Eigen/src/Geometry/EulerAngles.h +114 -0
- include/eigen3/Eigen/src/Geometry/Homogeneous.h +501 -0
- include/eigen3/Eigen/src/Geometry/Hyperplane.h +282 -0
- include/eigen3/Eigen/src/Geometry/OrthoMethods.h +235 -0
- include/eigen3/Eigen/src/Geometry/ParametrizedLine.h +232 -0
- include/eigen3/Eigen/src/Geometry/Quaternion.h +870 -0
- include/eigen3/Eigen/src/Geometry/Rotation2D.h +199 -0
- include/eigen3/Eigen/src/Geometry/RotationBase.h +206 -0
- include/eigen3/Eigen/src/Geometry/Scaling.h +188 -0
- include/eigen3/Eigen/src/Geometry/Transform.h +1563 -0
- include/eigen3/Eigen/src/Geometry/Translation.h +202 -0
- include/eigen3/Eigen/src/Geometry/Umeyama.h +166 -0
- include/eigen3/Eigen/src/Geometry/arch/Geometry_SIMD.h +168 -0
- include/eigen3/Eigen/src/Householder/BlockHouseholder.h +110 -0
- include/eigen3/Eigen/src/Householder/Householder.h +176 -0
- include/eigen3/Eigen/src/Householder/HouseholderSequence.h +545 -0
- include/eigen3/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +226 -0
- include/eigen3/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +212 -0
- include/eigen3/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +229 -0
- include/eigen3/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +394 -0
- include/eigen3/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +453 -0
- include/eigen3/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +444 -0
- include/eigen3/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +198 -0
- include/eigen3/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +117 -0
- include/eigen3/Eigen/src/Jacobi/Jacobi.h +483 -0
- include/eigen3/Eigen/src/KLUSupport/KLUSupport.h +358 -0
- include/eigen3/Eigen/src/LU/Determinant.h +117 -0
- include/eigen3/Eigen/src/LU/FullPivLU.h +877 -0
- include/eigen3/Eigen/src/LU/InverseImpl.h +432 -0
- include/eigen3/Eigen/src/LU/PartialPivLU.h +624 -0
- include/eigen3/Eigen/src/LU/PartialPivLU_LAPACKE.h +83 -0
- include/eigen3/Eigen/src/LU/arch/InverseSize4.h +351 -0
- include/eigen3/Eigen/src/MetisSupport/MetisSupport.h +137 -0
- include/eigen3/Eigen/src/OrderingMethods/Amd.h +435 -0
- include/eigen3/Eigen/src/OrderingMethods/Eigen_Colamd.h +1863 -0
- include/eigen3/Eigen/src/OrderingMethods/Ordering.h +153 -0
- include/eigen3/Eigen/src/PaStiXSupport/PaStiXSupport.h +678 -0
- include/eigen3/Eigen/src/PardisoSupport/PardisoSupport.h +545 -0
- include/eigen3/Eigen/src/QR/ColPivHouseholderQR.h +674 -0
- include/eigen3/Eigen/src/QR/ColPivHouseholderQR_LAPACKE.h +97 -0
- include/eigen3/Eigen/src/QR/CompleteOrthogonalDecomposition.h +635 -0
- include/eigen3/Eigen/src/QR/FullPivHouseholderQR.h +713 -0
- include/eigen3/Eigen/src/QR/HouseholderQR.h +434 -0
- include/eigen3/Eigen/src/QR/HouseholderQR_LAPACKE.h +68 -0
- include/eigen3/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +335 -0
- include/eigen3/Eigen/src/SVD/BDCSVD.h +1366 -0
- include/eigen3/Eigen/src/SVD/JacobiSVD.h +812 -0
- include/eigen3/Eigen/src/SVD/JacobiSVD_LAPACKE.h +91 -0
- include/eigen3/Eigen/src/SVD/SVDBase.h +376 -0
- include/eigen3/Eigen/src/SVD/UpperBidiagonalization.h +414 -0
- include/eigen3/Eigen/src/SparseCholesky/SimplicialCholesky.h +697 -0
- include/eigen3/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +174 -0
- include/eigen3/Eigen/src/SparseCore/AmbiVector.h +378 -0
- include/eigen3/Eigen/src/SparseCore/CompressedStorage.h +274 -0
- include/eigen3/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +352 -0
- include/eigen3/Eigen/src/SparseCore/MappedSparseMatrix.h +67 -0
- include/eigen3/Eigen/src/SparseCore/SparseAssign.h +270 -0
- include/eigen3/Eigen/src/SparseCore/SparseBlock.h +571 -0
- include/eigen3/Eigen/src/SparseCore/SparseColEtree.h +206 -0
- include/eigen3/Eigen/src/SparseCore/SparseCompressedBase.h +370 -0
- include/eigen3/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +722 -0
- include/eigen3/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +150 -0
- include/eigen3/Eigen/src/SparseCore/SparseDenseProduct.h +342 -0
- include/eigen3/Eigen/src/SparseCore/SparseDiagonalProduct.h +138 -0
- include/eigen3/Eigen/src/SparseCore/SparseDot.h +98 -0
- include/eigen3/Eigen/src/SparseCore/SparseFuzzy.h +29 -0
- include/eigen3/Eigen/src/SparseCore/SparseMap.h +305 -0
- include/eigen3/Eigen/src/SparseCore/SparseMatrix.h +1518 -0
- include/eigen3/Eigen/src/SparseCore/SparseMatrixBase.h +398 -0
- include/eigen3/Eigen/src/SparseCore/SparsePermutation.h +178 -0
- include/eigen3/Eigen/src/SparseCore/SparseProduct.h +181 -0
- include/eigen3/Eigen/src/SparseCore/SparseRedux.h +49 -0
- include/eigen3/Eigen/src/SparseCore/SparseRef.h +397 -0
- include/eigen3/Eigen/src/SparseCore/SparseSelfAdjointView.h +659 -0
- include/eigen3/Eigen/src/SparseCore/SparseSolverBase.h +124 -0
- include/eigen3/Eigen/src/SparseCore/SparseSparseProductWithPruning.h +198 -0
- include/eigen3/Eigen/src/SparseCore/SparseTranspose.h +92 -0
- include/eigen3/Eigen/src/SparseCore/SparseTriangularView.h +189 -0
- include/eigen3/Eigen/src/SparseCore/SparseUtil.h +186 -0
- include/eigen3/Eigen/src/SparseCore/SparseVector.h +478 -0
- include/eigen3/Eigen/src/SparseCore/SparseView.h +254 -0
- include/eigen3/Eigen/src/SparseCore/TriangularSolver.h +315 -0
- include/eigen3/Eigen/src/SparseLU/SparseLU.h +923 -0
- include/eigen3/Eigen/src/SparseLU/SparseLUImpl.h +66 -0
- include/eigen3/Eigen/src/SparseLU/SparseLU_Memory.h +226 -0
- include/eigen3/Eigen/src/SparseLU/SparseLU_Structs.h +110 -0
- include/eigen3/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +375 -0
- include/eigen3/Eigen/src/SparseLU/SparseLU_Utils.h +80 -0
- include/eigen3/Eigen/src/SparseLU/SparseLU_column_bmod.h +181 -0
- include/eigen3/Eigen/src/SparseLU/SparseLU_column_dfs.h +179 -0
- include/eigen3/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h +107 -0
- include/eigen3/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +280 -0
- include/eigen3/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h +126 -0
- include/eigen3/Eigen/src/SparseLU/SparseLU_kernel_bmod.h +130 -0
- include/eigen3/Eigen/src/SparseLU/SparseLU_panel_bmod.h +223 -0
- include/eigen3/Eigen/src/SparseLU/SparseLU_panel_dfs.h +258 -0
- include/eigen3/Eigen/src/SparseLU/SparseLU_pivotL.h +137 -0
- include/eigen3/Eigen/src/SparseLU/SparseLU_pruneL.h +136 -0
- include/eigen3/Eigen/src/SparseLU/SparseLU_relax_snode.h +83 -0
- include/eigen3/Eigen/src/SparseQR/SparseQR.h +758 -0
- include/eigen3/Eigen/src/StlSupport/StdDeque.h +116 -0
- include/eigen3/Eigen/src/StlSupport/StdList.h +106 -0
- include/eigen3/Eigen/src/StlSupport/StdVector.h +131 -0
- include/eigen3/Eigen/src/StlSupport/details.h +84 -0
- include/eigen3/Eigen/src/SuperLUSupport/SuperLUSupport.h +1025 -0
- include/eigen3/Eigen/src/UmfPackSupport/UmfPackSupport.h +642 -0
- include/eigen3/Eigen/src/misc/Image.h +82 -0
- include/eigen3/Eigen/src/misc/Kernel.h +79 -0
- include/eigen3/Eigen/src/misc/RealSvd2x2.h +55 -0
- include/eigen3/Eigen/src/misc/blas.h +440 -0
- include/eigen3/Eigen/src/misc/lapack.h +152 -0
- include/eigen3/Eigen/src/misc/lapacke.h +16292 -0
- include/eigen3/Eigen/src/misc/lapacke_mangling.h +17 -0
- include/eigen3/Eigen/src/plugins/ArrayCwiseBinaryOps.h +358 -0
- include/eigen3/Eigen/src/plugins/ArrayCwiseUnaryOps.h +696 -0
- include/eigen3/Eigen/src/plugins/BlockMethods.h +1442 -0
- include/eigen3/Eigen/src/plugins/CommonCwiseBinaryOps.h +115 -0
- include/eigen3/Eigen/src/plugins/CommonCwiseUnaryOps.h +177 -0
- include/eigen3/Eigen/src/plugins/IndexedViewMethods.h +262 -0
- include/eigen3/Eigen/src/plugins/MatrixCwiseBinaryOps.h +152 -0
- include/eigen3/Eigen/src/plugins/MatrixCwiseUnaryOps.h +95 -0
- include/eigen3/Eigen/src/plugins/ReshapedMethods.h +149 -0
- include/eigen3/signature_of_eigen3_matrix_library +1 -0
- include/eigen3/unsupported/Eigen/AdolcForward +159 -0
- include/eigen3/unsupported/Eigen/AlignedVector3 +234 -0
- include/eigen3/unsupported/Eigen/ArpackSupport +30 -0
- include/eigen3/unsupported/Eigen/AutoDiff +46 -0
- include/eigen3/unsupported/Eigen/BVH +95 -0
- include/eigen3/unsupported/Eigen/CXX11/Tensor +137 -0
- include/eigen3/unsupported/Eigen/CXX11/TensorSymmetry +42 -0
- include/eigen3/unsupported/Eigen/CXX11/ThreadPool +74 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/Tensor.h +554 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorArgMax.h +329 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h +247 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h +1176 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h +1559 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h +1093 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h +518 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h +377 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h +1023 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorContractionBlocking.h +73 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorContractionCuda.h +6 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorContractionGpu.h +1413 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorContractionMapper.h +575 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorContractionSycl.h +1650 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h +1679 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h +456 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h +1132 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorConvolutionSycl.h +544 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorCostModel.h +214 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h +347 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h +137 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h +6 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h +104 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceGpu.h +389 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h +1048 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h +409 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDimensionList.h +236 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h +490 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h +236 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h +983 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h +703 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h +388 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h +669 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorFixedSize.h +379 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h +237 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h +191 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h +488 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h +302 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorGlobalFunctions.h +33 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorGpuHipCudaDefines.h +99 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorGpuHipCudaUndefines.h +44 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h +79 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h +603 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h +738 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorInflation.h +247 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorInitializer.h +82 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h +263 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorLayoutSwap.h +216 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorMacros.h +98 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h +327 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h +311 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h +1102 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h +708 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorPatch.h +291 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorRandom.h +322 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h +998 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h +6 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorReductionGpu.h +966 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorReductionSycl.h +582 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorRef.h +454 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h +465 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h +528 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorScanSycl.h +513 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h +471 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorStorage.h +161 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorStriding.h +346 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorTrace.h +303 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h +264 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorUInt128.h +249 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorVolumePatch.h +629 -0
- include/eigen3/unsupported/Eigen/CXX11/src/TensorSymmetry/DynamicSymmetry.h +293 -0
- include/eigen3/unsupported/Eigen/CXX11/src/TensorSymmetry/StaticSymmetry.h +236 -0
- include/eigen3/unsupported/Eigen/CXX11/src/TensorSymmetry/Symmetry.h +338 -0
- include/eigen3/unsupported/Eigen/CXX11/src/TensorSymmetry/util/TemplateGroupTheory.h +669 -0
- include/eigen3/unsupported/Eigen/CXX11/src/ThreadPool/Barrier.h +67 -0
- include/eigen3/unsupported/Eigen/CXX11/src/ThreadPool/EventCount.h +249 -0
- include/eigen3/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h +486 -0
- include/eigen3/unsupported/Eigen/CXX11/src/ThreadPool/RunQueue.h +236 -0
- include/eigen3/unsupported/Eigen/CXX11/src/ThreadPool/ThreadCancel.h +23 -0
- include/eigen3/unsupported/Eigen/CXX11/src/ThreadPool/ThreadEnvironment.h +40 -0
- include/eigen3/unsupported/Eigen/CXX11/src/ThreadPool/ThreadLocal.h +301 -0
- include/eigen3/unsupported/Eigen/CXX11/src/ThreadPool/ThreadPoolInterface.h +48 -0
- include/eigen3/unsupported/Eigen/CXX11/src/ThreadPool/ThreadYield.h +20 -0
- include/eigen3/unsupported/Eigen/CXX11/src/util/CXX11Meta.h +537 -0
- include/eigen3/unsupported/Eigen/CXX11/src/util/CXX11Workarounds.h +88 -0
- include/eigen3/unsupported/Eigen/CXX11/src/util/EmulateArray.h +261 -0
- include/eigen3/unsupported/Eigen/CXX11/src/util/MaxSizeVector.h +158 -0
- include/eigen3/unsupported/Eigen/EulerAngles +43 -0
- include/eigen3/unsupported/Eigen/FFT +419 -0
- include/eigen3/unsupported/Eigen/IterativeSolvers +51 -0
- include/eigen3/unsupported/Eigen/KroneckerProduct +36 -0
- include/eigen3/unsupported/Eigen/LevenbergMarquardt +49 -0
- include/eigen3/unsupported/Eigen/MPRealSupport +213 -0
- include/eigen3/unsupported/Eigen/MatrixFunctions +504 -0
- include/eigen3/unsupported/Eigen/MoreVectorization +24 -0
- include/eigen3/unsupported/Eigen/NonLinearOptimization +140 -0
- include/eigen3/unsupported/Eigen/NumericalDiff +56 -0
- include/eigen3/unsupported/Eigen/OpenGLSupport +322 -0
- include/eigen3/unsupported/Eigen/Polynomials +137 -0
- include/eigen3/unsupported/Eigen/Skyline +39 -0
- include/eigen3/unsupported/Eigen/SparseExtra +54 -0
- include/eigen3/unsupported/Eigen/SpecialFunctions +103 -0
- include/eigen3/unsupported/Eigen/Splines +35 -0
- include/eigen3/unsupported/Eigen/src/AutoDiff/AutoDiffJacobian.h +108 -0
- include/eigen3/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h +730 -0
- include/eigen3/unsupported/Eigen/src/AutoDiff/AutoDiffVector.h +220 -0
- include/eigen3/unsupported/Eigen/src/BVH/BVAlgorithms.h +293 -0
- include/eigen3/unsupported/Eigen/src/BVH/KdBVH.h +223 -0
- include/eigen3/unsupported/Eigen/src/Eigenvalues/ArpackSelfAdjointEigenSolver.h +790 -0
- include/eigen3/unsupported/Eigen/src/EulerAngles/EulerAngles.h +355 -0
- include/eigen3/unsupported/Eigen/src/EulerAngles/EulerSystem.h +305 -0
- include/eigen3/unsupported/Eigen/src/FFT/ei_fftw_impl.h +261 -0
- include/eigen3/unsupported/Eigen/src/FFT/ei_kissfft_impl.h +449 -0
- include/eigen3/unsupported/Eigen/src/IterativeSolvers/ConstrainedConjGrad.h +187 -0
- include/eigen3/unsupported/Eigen/src/IterativeSolvers/DGMRES.h +511 -0
- include/eigen3/unsupported/Eigen/src/IterativeSolvers/GMRES.h +335 -0
- include/eigen3/unsupported/Eigen/src/IterativeSolvers/IDRS.h +436 -0
- include/eigen3/unsupported/Eigen/src/IterativeSolvers/IncompleteLU.h +90 -0
- include/eigen3/unsupported/Eigen/src/IterativeSolvers/IterationController.h +154 -0
- include/eigen3/unsupported/Eigen/src/IterativeSolvers/MINRES.h +267 -0
- include/eigen3/unsupported/Eigen/src/IterativeSolvers/Scaling.h +193 -0
- include/eigen3/unsupported/Eigen/src/KroneckerProduct/KroneckerTensorProduct.h +305 -0
- include/eigen3/unsupported/Eigen/src/LevenbergMarquardt/LMcovar.h +84 -0
- include/eigen3/unsupported/Eigen/src/LevenbergMarquardt/LMonestep.h +202 -0
- include/eigen3/unsupported/Eigen/src/LevenbergMarquardt/LMpar.h +160 -0
- include/eigen3/unsupported/Eigen/src/LevenbergMarquardt/LMqrsolv.h +188 -0
- include/eigen3/unsupported/Eigen/src/LevenbergMarquardt/LevenbergMarquardt.h +396 -0
- include/eigen3/unsupported/Eigen/src/MatrixFunctions/MatrixExponential.h +441 -0
- include/eigen3/unsupported/Eigen/src/MatrixFunctions/MatrixFunction.h +569 -0
- include/eigen3/unsupported/Eigen/src/MatrixFunctions/MatrixLogarithm.h +373 -0
- include/eigen3/unsupported/Eigen/src/MatrixFunctions/MatrixPower.h +705 -0
- include/eigen3/unsupported/Eigen/src/MatrixFunctions/MatrixSquareRoot.h +368 -0
- include/eigen3/unsupported/Eigen/src/MatrixFunctions/StemFunction.h +117 -0
- include/eigen3/unsupported/Eigen/src/MoreVectorization/MathFunctions.h +95 -0
- include/eigen3/unsupported/Eigen/src/NonLinearOptimization/HybridNonLinearSolver.h +601 -0
- include/eigen3/unsupported/Eigen/src/NonLinearOptimization/LevenbergMarquardt.h +657 -0
- include/eigen3/unsupported/Eigen/src/NonLinearOptimization/chkder.h +66 -0
- include/eigen3/unsupported/Eigen/src/NonLinearOptimization/covar.h +70 -0
- include/eigen3/unsupported/Eigen/src/NonLinearOptimization/dogleg.h +107 -0
- include/eigen3/unsupported/Eigen/src/NonLinearOptimization/fdjac1.h +79 -0
- include/eigen3/unsupported/Eigen/src/NonLinearOptimization/lmpar.h +298 -0
- include/eigen3/unsupported/Eigen/src/NonLinearOptimization/qrsolv.h +91 -0
- include/eigen3/unsupported/Eigen/src/NonLinearOptimization/r1mpyq.h +30 -0
- include/eigen3/unsupported/Eigen/src/NonLinearOptimization/r1updt.h +99 -0
- include/eigen3/unsupported/Eigen/src/NonLinearOptimization/rwupdt.h +49 -0
- include/eigen3/unsupported/Eigen/src/NumericalDiff/NumericalDiff.h +130 -0
- include/eigen3/unsupported/Eigen/src/Polynomials/Companion.h +280 -0
- include/eigen3/unsupported/Eigen/src/Polynomials/PolynomialSolver.h +428 -0
- include/eigen3/unsupported/Eigen/src/Polynomials/PolynomialUtils.h +143 -0
- include/eigen3/unsupported/Eigen/src/Skyline/SkylineInplaceLU.h +352 -0
- include/eigen3/unsupported/Eigen/src/Skyline/SkylineMatrix.h +862 -0
- include/eigen3/unsupported/Eigen/src/Skyline/SkylineMatrixBase.h +212 -0
- include/eigen3/unsupported/Eigen/src/Skyline/SkylineProduct.h +295 -0
- include/eigen3/unsupported/Eigen/src/Skyline/SkylineStorage.h +259 -0
- include/eigen3/unsupported/Eigen/src/Skyline/SkylineUtil.h +89 -0
- include/eigen3/unsupported/Eigen/src/SparseExtra/BlockOfDynamicSparseMatrix.h +122 -0
- include/eigen3/unsupported/Eigen/src/SparseExtra/BlockSparseMatrix.h +1079 -0
- include/eigen3/unsupported/Eigen/src/SparseExtra/DynamicSparseMatrix.h +404 -0
- include/eigen3/unsupported/Eigen/src/SparseExtra/MarketIO.h +282 -0
- include/eigen3/unsupported/Eigen/src/SparseExtra/MatrixMarketIterator.h +247 -0
- include/eigen3/unsupported/Eigen/src/SparseExtra/RandomSetter.h +349 -0
- include/eigen3/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsArrayAPI.h +286 -0
- include/eigen3/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsBFloat16.h +68 -0
- include/eigen3/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsFunctors.h +357 -0
- include/eigen3/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsHalf.h +66 -0
- include/eigen3/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsImpl.h +1959 -0
- include/eigen3/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsPacketMath.h +118 -0
- include/eigen3/unsupported/Eigen/src/SpecialFunctions/HipVectorCompatibility.h +67 -0
- include/eigen3/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsArrayAPI.h +167 -0
- include/eigen3/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsBFloat16.h +58 -0
- include/eigen3/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsFunctors.h +330 -0
- include/eigen3/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsHalf.h +58 -0
- include/eigen3/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h +2045 -0
- include/eigen3/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsPacketMath.h +79 -0
- include/eigen3/unsupported/Eigen/src/SpecialFunctions/arch/AVX/BesselFunctions.h +46 -0
- include/eigen3/unsupported/Eigen/src/SpecialFunctions/arch/AVX/SpecialFunctions.h +16 -0
- include/eigen3/unsupported/Eigen/src/SpecialFunctions/arch/AVX512/BesselFunctions.h +46 -0
- include/eigen3/unsupported/Eigen/src/SpecialFunctions/arch/AVX512/SpecialFunctions.h +16 -0
- include/eigen3/unsupported/Eigen/src/SpecialFunctions/arch/GPU/SpecialFunctions.h +369 -0
- include/eigen3/unsupported/Eigen/src/SpecialFunctions/arch/NEON/BesselFunctions.h +54 -0
- include/eigen3/unsupported/Eigen/src/SpecialFunctions/arch/NEON/SpecialFunctions.h +34 -0
- include/eigen3/unsupported/Eigen/src/Splines/Spline.h +507 -0
- include/eigen3/unsupported/Eigen/src/Splines/SplineFitting.h +431 -0
- include/eigen3/unsupported/Eigen/src/Splines/SplineFwd.h +93 -0
- pylibsparseir/__init__.py +31 -0
- pylibsparseir/clean_build_artifacts.py +0 -1
- pylibsparseir/core.py +178 -62
- pylibsparseir/libsparseir.dylib +0 -0
- pylibsparseir-0.6.0.dist-info/METADATA +213 -0
- pylibsparseir-0.6.0.dist-info/RECORD +545 -0
- {pylibsparseir-0.1.0.dist-info → pylibsparseir-0.6.0.dist-info}/WHEEL +1 -1
- pylibsparseir-0.6.0.dist-info/licenses/LICENSE +21 -0
- share/eigen3/cmake/Eigen3Config.cmake +37 -0
- share/eigen3/cmake/Eigen3ConfigVersion.cmake +65 -0
- share/eigen3/cmake/Eigen3Targets.cmake +106 -0
- share/eigen3/cmake/UseEigen3.cmake +6 -0
- share/pkgconfig/eigen3.pc +9 -0
- pylibsparseir/libsparseir.0.4.2.dylib +0 -0
- pylibsparseir/libsparseir.0.dylib +0 -0
- pylibsparseir-0.1.0.dist-info/METADATA +0 -130
- pylibsparseir-0.1.0.dist-info/RECORD +0 -12
- pylibsparseir-0.1.0.dist-info/entry_points.txt +0 -2
- pylibsparseir-0.1.0.dist-info/top_level.txt +0 -1
|
@@ -0,0 +1,1093 @@
|
|
|
1
|
+
// This file is part of Eigen, a lightweight C++ template library
|
|
2
|
+
// for linear algebra.
|
|
3
|
+
//
|
|
4
|
+
// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
|
|
5
|
+
//
|
|
6
|
+
// This Source Code Form is subject to the terms of the Mozilla
|
|
7
|
+
// Public License v. 2.0. If a copy of the MPL was not distributed
|
|
8
|
+
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
|
9
|
+
|
|
10
|
+
#ifndef EIGEN_CXX11_TENSOR_TENSOR_BROADCASTING_H
|
|
11
|
+
#define EIGEN_CXX11_TENSOR_TENSOR_BROADCASTING_H
|
|
12
|
+
|
|
13
|
+
namespace Eigen {
|
|
14
|
+
|
|
15
|
+
/** \class TensorBroadcasting
|
|
16
|
+
* \ingroup CXX11_Tensor_Module
|
|
17
|
+
*
|
|
18
|
+
* \brief Tensor broadcasting class.
|
|
19
|
+
*
|
|
20
|
+
*
|
|
21
|
+
*/
|
|
22
|
+
namespace internal {
|
|
23
|
+
template<typename Broadcast, typename XprType>
|
|
24
|
+
struct traits<TensorBroadcastingOp<Broadcast, XprType> > : public traits<XprType>
|
|
25
|
+
{
|
|
26
|
+
typedef typename XprType::Scalar Scalar;
|
|
27
|
+
typedef traits<XprType> XprTraits;
|
|
28
|
+
typedef typename XprTraits::StorageKind StorageKind;
|
|
29
|
+
typedef typename XprTraits::Index Index;
|
|
30
|
+
typedef typename XprType::Nested Nested;
|
|
31
|
+
typedef typename remove_reference<Nested>::type _Nested;
|
|
32
|
+
static const int NumDimensions = XprTraits::NumDimensions;
|
|
33
|
+
static const int Layout = XprTraits::Layout;
|
|
34
|
+
typedef typename XprTraits::PointerType PointerType;
|
|
35
|
+
};
|
|
36
|
+
|
|
37
|
+
template<typename Broadcast, typename XprType>
|
|
38
|
+
struct eval<TensorBroadcastingOp<Broadcast, XprType>, Eigen::Dense>
|
|
39
|
+
{
|
|
40
|
+
typedef const TensorBroadcastingOp<Broadcast, XprType> EIGEN_DEVICE_REF type;
|
|
41
|
+
};
|
|
42
|
+
|
|
43
|
+
template<typename Broadcast, typename XprType>
|
|
44
|
+
struct nested<TensorBroadcastingOp<Broadcast, XprType>, 1, typename eval<TensorBroadcastingOp<Broadcast, XprType> >::type>
|
|
45
|
+
{
|
|
46
|
+
typedef TensorBroadcastingOp<Broadcast, XprType> type;
|
|
47
|
+
};
|
|
48
|
+
|
|
49
|
+
template <typename Dims>
|
|
50
|
+
struct is_input_scalar {
|
|
51
|
+
static const bool value = false;
|
|
52
|
+
};
|
|
53
|
+
template <>
|
|
54
|
+
struct is_input_scalar<Sizes<> > {
|
|
55
|
+
static const bool value = true;
|
|
56
|
+
};
|
|
57
|
+
#ifndef EIGEN_EMULATE_CXX11_META_H
|
|
58
|
+
template <typename std::ptrdiff_t... Indices>
|
|
59
|
+
struct is_input_scalar<Sizes<Indices...> > {
|
|
60
|
+
static const bool value = (Sizes<Indices...>::total_size == 1);
|
|
61
|
+
};
|
|
62
|
+
#endif
|
|
63
|
+
|
|
64
|
+
} // end namespace internal
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
template<typename Broadcast, typename XprType>
|
|
69
|
+
class TensorBroadcastingOp : public TensorBase<TensorBroadcastingOp<Broadcast, XprType>, ReadOnlyAccessors>
|
|
70
|
+
{
|
|
71
|
+
public:
|
|
72
|
+
typedef typename Eigen::internal::traits<TensorBroadcastingOp>::Scalar Scalar;
|
|
73
|
+
typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
|
|
74
|
+
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
|
75
|
+
typedef typename Eigen::internal::nested<TensorBroadcastingOp>::type Nested;
|
|
76
|
+
typedef typename Eigen::internal::traits<TensorBroadcastingOp>::StorageKind StorageKind;
|
|
77
|
+
typedef typename Eigen::internal::traits<TensorBroadcastingOp>::Index Index;
|
|
78
|
+
|
|
79
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBroadcastingOp(const XprType& expr, const Broadcast& broadcast)
|
|
80
|
+
: m_xpr(expr), m_broadcast(broadcast) {}
|
|
81
|
+
|
|
82
|
+
EIGEN_DEVICE_FUNC
|
|
83
|
+
const Broadcast& broadcast() const { return m_broadcast; }
|
|
84
|
+
|
|
85
|
+
EIGEN_DEVICE_FUNC
|
|
86
|
+
const typename internal::remove_all<typename XprType::Nested>::type&
|
|
87
|
+
expression() const { return m_xpr; }
|
|
88
|
+
|
|
89
|
+
protected:
|
|
90
|
+
typename XprType::Nested m_xpr;
|
|
91
|
+
const Broadcast m_broadcast;
|
|
92
|
+
};
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
// Eval as rvalue
|
|
96
|
+
template<typename Broadcast, typename ArgType, typename Device>
|
|
97
|
+
struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
|
|
98
|
+
{
|
|
99
|
+
typedef TensorBroadcastingOp<Broadcast, ArgType> XprType;
|
|
100
|
+
typedef typename XprType::Index Index;
|
|
101
|
+
static const int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
|
|
102
|
+
typedef DSizes<Index, NumDims> Dimensions;
|
|
103
|
+
typedef typename XprType::Scalar Scalar;
|
|
104
|
+
typedef typename TensorEvaluator<ArgType, Device>::Dimensions InputDimensions;
|
|
105
|
+
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
|
106
|
+
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
|
|
107
|
+
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
|
108
|
+
protected: // all the non-static fields must have the same access control, otherwise the TensorEvaluator wont be standard layout;
|
|
109
|
+
bool isCopy, nByOne, oneByN;
|
|
110
|
+
public:
|
|
111
|
+
typedef StorageMemory<CoeffReturnType, Device> Storage;
|
|
112
|
+
typedef typename Storage::Type EvaluatorPointerType;
|
|
113
|
+
|
|
114
|
+
enum {
|
|
115
|
+
IsAligned = TensorEvaluator<ArgType, Device>::IsAligned,
|
|
116
|
+
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
|
|
117
|
+
BlockAccess = TensorEvaluator<ArgType, Device>::BlockAccess,
|
|
118
|
+
PreferBlockAccess = true,
|
|
119
|
+
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
|
120
|
+
RawAccess = false
|
|
121
|
+
};
|
|
122
|
+
|
|
123
|
+
typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
|
|
124
|
+
|
|
125
|
+
// We do block based broadcasting using a trick with 2x tensor rank and 0
|
|
126
|
+
// strides. See block method implementation for details.
|
|
127
|
+
typedef DSizes<Index, 2 * NumDims> BroadcastDimensions;
|
|
128
|
+
|
|
129
|
+
//===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
|
|
130
|
+
typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
|
|
131
|
+
typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
|
|
132
|
+
|
|
133
|
+
typedef typename TensorEvaluator<const ArgType, Device>::TensorBlock
|
|
134
|
+
ArgTensorBlock;
|
|
135
|
+
|
|
136
|
+
typedef typename internal::TensorMaterializedBlock<ScalarNoConst, NumDims,
|
|
137
|
+
Layout, Index>
|
|
138
|
+
TensorBlock;
|
|
139
|
+
//===--------------------------------------------------------------------===//
|
|
140
|
+
|
|
141
|
+
EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
|
|
142
|
+
: isCopy(false), nByOne(false), oneByN(false),
|
|
143
|
+
m_device(device), m_broadcast(op.broadcast()), m_impl(op.expression(), device)
|
|
144
|
+
{
|
|
145
|
+
|
|
146
|
+
// The broadcasting op doesn't change the rank of the tensor. One can't broadcast a scalar
|
|
147
|
+
// and store the result in a scalar. Instead one should reshape the scalar into a a N-D
|
|
148
|
+
// tensor with N >= 1 of 1 element first and then broadcast.
|
|
149
|
+
EIGEN_STATIC_ASSERT((NumDims > 0), YOU_MADE_A_PROGRAMMING_MISTAKE);
|
|
150
|
+
const InputDimensions& input_dims = m_impl.dimensions();
|
|
151
|
+
isCopy = true;
|
|
152
|
+
for (int i = 0; i < NumDims; ++i) {
|
|
153
|
+
eigen_assert(input_dims[i] > 0);
|
|
154
|
+
m_dimensions[i] = input_dims[i] * m_broadcast[i];
|
|
155
|
+
if (m_broadcast[i] != 1) {
|
|
156
|
+
isCopy = false;
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
|
|
161
|
+
m_inputStrides[0] = 1;
|
|
162
|
+
m_outputStrides[0] = 1;
|
|
163
|
+
for (int i = 1; i < NumDims; ++i) {
|
|
164
|
+
m_inputStrides[i] = m_inputStrides[i-1] * input_dims[i-1];
|
|
165
|
+
m_outputStrides[i] = m_outputStrides[i-1] * m_dimensions[i-1];
|
|
166
|
+
}
|
|
167
|
+
} else {
|
|
168
|
+
m_inputStrides[NumDims-1] = 1;
|
|
169
|
+
m_outputStrides[NumDims-1] = 1;
|
|
170
|
+
for (int i = NumDims-2; i >= 0; --i) {
|
|
171
|
+
m_inputStrides[i] = m_inputStrides[i+1] * input_dims[i+1];
|
|
172
|
+
m_outputStrides[i] = m_outputStrides[i+1] * m_dimensions[i+1];
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
if (input_dims[0] == 1) {
|
|
177
|
+
oneByN = true;
|
|
178
|
+
for (int i = 1; i < NumDims; ++i) {
|
|
179
|
+
if (m_broadcast[i] != 1) {
|
|
180
|
+
oneByN = false;
|
|
181
|
+
break;
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
} else if (input_dims[NumDims-1] == 1) {
|
|
185
|
+
nByOne = true;
|
|
186
|
+
for (int i = 0; i < NumDims-1; ++i) {
|
|
187
|
+
if (m_broadcast[i] != 1) {
|
|
188
|
+
nByOne = false;
|
|
189
|
+
break;
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
// Handle special format like NCHW, its input shape is '[1, N..., 1]' and
|
|
195
|
+
// broadcast shape is '[N, 1..., N]'
|
|
196
|
+
if (!oneByN && !nByOne) {
|
|
197
|
+
if (input_dims[0] == 1 && input_dims[NumDims-1] == 1 && NumDims > 2) {
|
|
198
|
+
nByOne = true;
|
|
199
|
+
oneByN = true;
|
|
200
|
+
for (int i = 1; i < NumDims-1; ++i) {
|
|
201
|
+
if (m_broadcast[i] != 1) {
|
|
202
|
+
nByOne = false;
|
|
203
|
+
oneByN = false;
|
|
204
|
+
break;
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
|
|
212
|
+
|
|
213
|
+
EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) {
|
|
214
|
+
m_impl.evalSubExprsIfNeeded(NULL);
|
|
215
|
+
return true;
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
#ifdef EIGEN_USE_THREADS
|
|
219
|
+
template <typename EvalSubExprsCallback>
|
|
220
|
+
EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
|
|
221
|
+
EvaluatorPointerType, EvalSubExprsCallback done) {
|
|
222
|
+
m_impl.evalSubExprsIfNeededAsync(nullptr, [done](bool) { done(true); });
|
|
223
|
+
}
|
|
224
|
+
#endif // EIGEN_USE_THREADS
|
|
225
|
+
|
|
226
|
+
EIGEN_STRONG_INLINE void cleanup() {
|
|
227
|
+
m_impl.cleanup();
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE CoeffReturnType coeff(Index index) const
|
|
231
|
+
{
|
|
232
|
+
if (internal::is_input_scalar<typename internal::remove_all<InputDimensions>::type>::value) {
|
|
233
|
+
return m_impl.coeff(0);
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
|
|
237
|
+
if (isCopy) {
|
|
238
|
+
return m_impl.coeff(index);
|
|
239
|
+
} else {
|
|
240
|
+
return coeffColMajor(index);
|
|
241
|
+
}
|
|
242
|
+
} else {
|
|
243
|
+
if (isCopy) {
|
|
244
|
+
return m_impl.coeff(index);
|
|
245
|
+
} else {
|
|
246
|
+
return coeffRowMajor(index);
|
|
247
|
+
}
|
|
248
|
+
}
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
// TODO: attempt to speed this up. The integer divisions and modulo are slow
|
|
252
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index indexColMajor(Index index) const {
|
|
253
|
+
Index inputIndex = 0;
|
|
254
|
+
EIGEN_UNROLL_LOOP
|
|
255
|
+
for (int i = NumDims - 1; i > 0; --i) {
|
|
256
|
+
const Index idx = index / m_outputStrides[i];
|
|
257
|
+
if (internal::index_statically_eq<Broadcast>(i, 1)) {
|
|
258
|
+
eigen_assert(idx < m_impl.dimensions()[i]);
|
|
259
|
+
inputIndex += idx * m_inputStrides[i];
|
|
260
|
+
} else {
|
|
261
|
+
if (internal::index_statically_eq<InputDimensions>(i, 1)) {
|
|
262
|
+
eigen_assert(idx % m_impl.dimensions()[i] == 0);
|
|
263
|
+
} else {
|
|
264
|
+
inputIndex += (idx % m_impl.dimensions()[i]) * m_inputStrides[i];
|
|
265
|
+
}
|
|
266
|
+
}
|
|
267
|
+
index -= idx * m_outputStrides[i];
|
|
268
|
+
}
|
|
269
|
+
if (internal::index_statically_eq<Broadcast>(0, 1)) {
|
|
270
|
+
eigen_assert(index < m_impl.dimensions()[0]);
|
|
271
|
+
inputIndex += index;
|
|
272
|
+
} else {
|
|
273
|
+
if (internal::index_statically_eq<InputDimensions>(0, 1)) {
|
|
274
|
+
eigen_assert(index % m_impl.dimensions()[0] == 0);
|
|
275
|
+
} else {
|
|
276
|
+
inputIndex += (index % m_impl.dimensions()[0]);
|
|
277
|
+
}
|
|
278
|
+
}
|
|
279
|
+
return inputIndex;
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeffColMajor(Index index) const
|
|
283
|
+
{
|
|
284
|
+
return m_impl.coeff(indexColMajor(index));
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index indexRowMajor(Index index) const {
|
|
288
|
+
Index inputIndex = 0;
|
|
289
|
+
EIGEN_UNROLL_LOOP
|
|
290
|
+
for (int i = 0; i < NumDims - 1; ++i) {
|
|
291
|
+
const Index idx = index / m_outputStrides[i];
|
|
292
|
+
if (internal::index_statically_eq<Broadcast>(i, 1)) {
|
|
293
|
+
eigen_assert(idx < m_impl.dimensions()[i]);
|
|
294
|
+
inputIndex += idx * m_inputStrides[i];
|
|
295
|
+
} else {
|
|
296
|
+
if (internal::index_statically_eq<InputDimensions>(i, 1)) {
|
|
297
|
+
eigen_assert(idx % m_impl.dimensions()[i] == 0);
|
|
298
|
+
} else {
|
|
299
|
+
inputIndex += (idx % m_impl.dimensions()[i]) * m_inputStrides[i];
|
|
300
|
+
}
|
|
301
|
+
}
|
|
302
|
+
index -= idx * m_outputStrides[i];
|
|
303
|
+
}
|
|
304
|
+
if (internal::index_statically_eq<Broadcast>(NumDims - 1, 1)) {
|
|
305
|
+
eigen_assert(index < m_impl.dimensions()[NumDims - 1]);
|
|
306
|
+
inputIndex += index;
|
|
307
|
+
} else {
|
|
308
|
+
if (internal::index_statically_eq<InputDimensions>(NumDims - 1, 1)) {
|
|
309
|
+
eigen_assert(index % m_impl.dimensions()[NumDims - 1] == 0);
|
|
310
|
+
} else {
|
|
311
|
+
inputIndex += (index % m_impl.dimensions()[NumDims - 1]);
|
|
312
|
+
}
|
|
313
|
+
}
|
|
314
|
+
return inputIndex;
|
|
315
|
+
}
|
|
316
|
+
|
|
317
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeffRowMajor(Index index) const
|
|
318
|
+
{
|
|
319
|
+
return m_impl.coeff(indexRowMajor(index));
|
|
320
|
+
}
|
|
321
|
+
|
|
322
|
+
template<int LoadMode>
|
|
323
|
+
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketReturnType packet(Index index) const
|
|
324
|
+
{
|
|
325
|
+
if (internal::is_input_scalar<typename internal::remove_all<InputDimensions>::type>::value) {
|
|
326
|
+
return internal::pset1<PacketReturnType>(m_impl.coeff(0));
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
|
|
330
|
+
if (isCopy) {
|
|
331
|
+
#ifdef EIGEN_GPU_COMPILE_PHASE
|
|
332
|
+
// See PR 437: on NVIDIA P100 and K20m we observed a x3-4 speed up by enforcing
|
|
333
|
+
// unaligned loads here. The reason is unclear though.
|
|
334
|
+
return m_impl.template packet<Unaligned>(index);
|
|
335
|
+
#else
|
|
336
|
+
return m_impl.template packet<LoadMode>(index);
|
|
337
|
+
#endif
|
|
338
|
+
} else if (oneByN && !nByOne) {
|
|
339
|
+
return packetNByOne<LoadMode>(index);
|
|
340
|
+
} else if (!oneByN && nByOne) {
|
|
341
|
+
return packetOneByN<LoadMode>(index);
|
|
342
|
+
} else if (oneByN && nByOne) {
|
|
343
|
+
return packetOneByNByOne<LoadMode>(index);
|
|
344
|
+
} else {
|
|
345
|
+
return packetColMajor<LoadMode>(index);
|
|
346
|
+
}
|
|
347
|
+
} else {
|
|
348
|
+
if (isCopy) {
|
|
349
|
+
#ifdef EIGEN_GPU_COMPILE_PHASE
|
|
350
|
+
// See above.
|
|
351
|
+
return m_impl.template packet<Unaligned>(index);
|
|
352
|
+
#else
|
|
353
|
+
return m_impl.template packet<LoadMode>(index);
|
|
354
|
+
#endif
|
|
355
|
+
} else if (oneByN && !nByOne) {
|
|
356
|
+
return packetOneByN<LoadMode>(index);
|
|
357
|
+
} else if (!oneByN && nByOne) {
|
|
358
|
+
return packetNByOne<LoadMode>(index);
|
|
359
|
+
} else if (oneByN && nByOne) {
|
|
360
|
+
return packetOneByNByOne<LoadMode>(index);
|
|
361
|
+
} else {
|
|
362
|
+
return packetRowMajor<LoadMode>(index);
|
|
363
|
+
}
|
|
364
|
+
}
|
|
365
|
+
}
|
|
366
|
+
|
|
367
|
+
template<int LoadMode>
|
|
368
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetOneByNByOne
|
|
369
|
+
(Index index) const
|
|
370
|
+
{
|
|
371
|
+
EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
|
|
372
|
+
eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
|
|
373
|
+
|
|
374
|
+
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
|
|
375
|
+
Index startDim, endDim;
|
|
376
|
+
Index inputIndex, outputOffset, batchedIndex;
|
|
377
|
+
|
|
378
|
+
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
|
|
379
|
+
startDim = NumDims - 1;
|
|
380
|
+
endDim = 1;
|
|
381
|
+
} else {
|
|
382
|
+
startDim = 0;
|
|
383
|
+
endDim = NumDims - 2;
|
|
384
|
+
}
|
|
385
|
+
|
|
386
|
+
batchedIndex = index % m_outputStrides[startDim];
|
|
387
|
+
inputIndex = batchedIndex / m_outputStrides[endDim];
|
|
388
|
+
outputOffset = batchedIndex % m_outputStrides[endDim];
|
|
389
|
+
|
|
390
|
+
if (outputOffset + PacketSize <= m_outputStrides[endDim]) {
|
|
391
|
+
values[0] = m_impl.coeff(inputIndex);
|
|
392
|
+
return internal::pload1<PacketReturnType>(values);
|
|
393
|
+
} else {
|
|
394
|
+
EIGEN_UNROLL_LOOP
|
|
395
|
+
for (int i = 0, cur = 0; i < PacketSize; ++i, ++cur) {
|
|
396
|
+
if (outputOffset + cur < m_outputStrides[endDim]) {
|
|
397
|
+
values[i] = m_impl.coeff(inputIndex);
|
|
398
|
+
} else {
|
|
399
|
+
++inputIndex;
|
|
400
|
+
inputIndex = (inputIndex == m_inputStrides[startDim] ? 0 : inputIndex);
|
|
401
|
+
values[i] = m_impl.coeff(inputIndex);
|
|
402
|
+
outputOffset = 0;
|
|
403
|
+
cur = 0;
|
|
404
|
+
}
|
|
405
|
+
}
|
|
406
|
+
return internal::pload<PacketReturnType>(values);
|
|
407
|
+
}
|
|
408
|
+
}
|
|
409
|
+
|
|
410
|
+
template<int LoadMode>
|
|
411
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetOneByN(Index index) const
|
|
412
|
+
{
|
|
413
|
+
EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
|
|
414
|
+
eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
|
|
415
|
+
|
|
416
|
+
Index dim, inputIndex;
|
|
417
|
+
|
|
418
|
+
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
|
|
419
|
+
dim = NumDims - 1;
|
|
420
|
+
} else {
|
|
421
|
+
dim = 0;
|
|
422
|
+
}
|
|
423
|
+
|
|
424
|
+
inputIndex = index % m_inputStrides[dim];
|
|
425
|
+
if (inputIndex + PacketSize <= m_inputStrides[dim]) {
|
|
426
|
+
return m_impl.template packet<Unaligned>(inputIndex);
|
|
427
|
+
} else {
|
|
428
|
+
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
|
|
429
|
+
EIGEN_UNROLL_LOOP
|
|
430
|
+
for (int i = 0; i < PacketSize; ++i) {
|
|
431
|
+
if (inputIndex > m_inputStrides[dim]-1) {
|
|
432
|
+
inputIndex = 0;
|
|
433
|
+
}
|
|
434
|
+
values[i] = m_impl.coeff(inputIndex++);
|
|
435
|
+
}
|
|
436
|
+
return internal::pload<PacketReturnType>(values);
|
|
437
|
+
}
|
|
438
|
+
}
|
|
439
|
+
|
|
440
|
+
template<int LoadMode>
|
|
441
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetNByOne(Index index) const
|
|
442
|
+
{
|
|
443
|
+
EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
|
|
444
|
+
eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
|
|
445
|
+
|
|
446
|
+
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
|
|
447
|
+
Index dim, inputIndex, outputOffset;
|
|
448
|
+
|
|
449
|
+
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
|
|
450
|
+
dim = 1;
|
|
451
|
+
} else {
|
|
452
|
+
dim = NumDims - 2;
|
|
453
|
+
}
|
|
454
|
+
|
|
455
|
+
inputIndex = index / m_outputStrides[dim];
|
|
456
|
+
outputOffset = index % m_outputStrides[dim];
|
|
457
|
+
if (outputOffset + PacketSize <= m_outputStrides[dim]) {
|
|
458
|
+
values[0] = m_impl.coeff(inputIndex);
|
|
459
|
+
return internal::pload1<PacketReturnType>(values);
|
|
460
|
+
} else {
|
|
461
|
+
EIGEN_UNROLL_LOOP
|
|
462
|
+
for (int i = 0, cur = 0; i < PacketSize; ++i, ++cur) {
|
|
463
|
+
if (outputOffset + cur < m_outputStrides[dim]) {
|
|
464
|
+
values[i] = m_impl.coeff(inputIndex);
|
|
465
|
+
} else {
|
|
466
|
+
values[i] = m_impl.coeff(++inputIndex);
|
|
467
|
+
outputOffset = 0;
|
|
468
|
+
cur = 0;
|
|
469
|
+
}
|
|
470
|
+
}
|
|
471
|
+
return internal::pload<PacketReturnType>(values);
|
|
472
|
+
}
|
|
473
|
+
}
|
|
474
|
+
|
|
475
|
+
// Ignore the LoadMode and always use unaligned loads since we can't guarantee
|
|
476
|
+
// the alignment at compile time.
|
|
477
|
+
template<int LoadMode>
|
|
478
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetColMajor(Index index) const
|
|
479
|
+
{
|
|
480
|
+
EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
|
|
481
|
+
eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
|
|
482
|
+
|
|
483
|
+
const Index originalIndex = index;
|
|
484
|
+
|
|
485
|
+
Index inputIndex = 0;
|
|
486
|
+
EIGEN_UNROLL_LOOP
|
|
487
|
+
for (int i = NumDims - 1; i > 0; --i) {
|
|
488
|
+
const Index idx = index / m_outputStrides[i];
|
|
489
|
+
if (internal::index_statically_eq<Broadcast>(i, 1)) {
|
|
490
|
+
eigen_assert(idx < m_impl.dimensions()[i]);
|
|
491
|
+
inputIndex += idx * m_inputStrides[i];
|
|
492
|
+
} else {
|
|
493
|
+
if (internal::index_statically_eq<InputDimensions>(i, 1)) {
|
|
494
|
+
eigen_assert(idx % m_impl.dimensions()[i] == 0);
|
|
495
|
+
} else {
|
|
496
|
+
inputIndex += (idx % m_impl.dimensions()[i]) * m_inputStrides[i];
|
|
497
|
+
}
|
|
498
|
+
}
|
|
499
|
+
index -= idx * m_outputStrides[i];
|
|
500
|
+
}
|
|
501
|
+
Index innermostLoc;
|
|
502
|
+
if (internal::index_statically_eq<Broadcast>(0, 1)) {
|
|
503
|
+
eigen_assert(index < m_impl.dimensions()[0]);
|
|
504
|
+
innermostLoc = index;
|
|
505
|
+
} else {
|
|
506
|
+
if (internal::index_statically_eq<InputDimensions>(0, 1)) {
|
|
507
|
+
eigen_assert(index % m_impl.dimensions()[0] == 0);
|
|
508
|
+
innermostLoc = 0;
|
|
509
|
+
} else {
|
|
510
|
+
innermostLoc = index % m_impl.dimensions()[0];
|
|
511
|
+
}
|
|
512
|
+
}
|
|
513
|
+
inputIndex += innermostLoc;
|
|
514
|
+
|
|
515
|
+
// Todo: this could be extended to the second dimension if we're not
|
|
516
|
+
// broadcasting alongside the first dimension, and so on.
|
|
517
|
+
if (innermostLoc + PacketSize <= m_impl.dimensions()[0]) {
|
|
518
|
+
return m_impl.template packet<Unaligned>(inputIndex);
|
|
519
|
+
} else {
|
|
520
|
+
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
|
|
521
|
+
values[0] = m_impl.coeff(inputIndex);
|
|
522
|
+
EIGEN_UNROLL_LOOP
|
|
523
|
+
for (int i = 1; i < PacketSize; ++i) {
|
|
524
|
+
if (innermostLoc + i < m_impl.dimensions()[0]) {
|
|
525
|
+
values[i] = m_impl.coeff(inputIndex+i);
|
|
526
|
+
} else {
|
|
527
|
+
values[i] = coeffColMajor(originalIndex+i);
|
|
528
|
+
}
|
|
529
|
+
}
|
|
530
|
+
PacketReturnType rslt = internal::pload<PacketReturnType>(values);
|
|
531
|
+
return rslt;
|
|
532
|
+
}
|
|
533
|
+
}
|
|
534
|
+
|
|
535
|
+
template<int LoadMode>
|
|
536
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetRowMajor(Index index) const
|
|
537
|
+
{
|
|
538
|
+
EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
|
|
539
|
+
eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
|
|
540
|
+
|
|
541
|
+
const Index originalIndex = index;
|
|
542
|
+
|
|
543
|
+
Index inputIndex = 0;
|
|
544
|
+
EIGEN_UNROLL_LOOP
|
|
545
|
+
for (int i = 0; i < NumDims - 1; ++i) {
|
|
546
|
+
const Index idx = index / m_outputStrides[i];
|
|
547
|
+
if (internal::index_statically_eq<Broadcast>(i, 1)) {
|
|
548
|
+
eigen_assert(idx < m_impl.dimensions()[i]);
|
|
549
|
+
inputIndex += idx * m_inputStrides[i];
|
|
550
|
+
} else {
|
|
551
|
+
if (internal::index_statically_eq<InputDimensions>(i, 1)) {
|
|
552
|
+
eigen_assert(idx % m_impl.dimensions()[i] == 0);
|
|
553
|
+
} else {
|
|
554
|
+
inputIndex += (idx % m_impl.dimensions()[i]) * m_inputStrides[i];
|
|
555
|
+
}
|
|
556
|
+
}
|
|
557
|
+
index -= idx * m_outputStrides[i];
|
|
558
|
+
}
|
|
559
|
+
Index innermostLoc;
|
|
560
|
+
if (internal::index_statically_eq<Broadcast>(NumDims-1, 1)) {
|
|
561
|
+
eigen_assert(index < m_impl.dimensions()[NumDims-1]);
|
|
562
|
+
innermostLoc = index;
|
|
563
|
+
} else {
|
|
564
|
+
if (internal::index_statically_eq<InputDimensions>(NumDims-1, 1)) {
|
|
565
|
+
eigen_assert(index % m_impl.dimensions()[NumDims-1] == 0);
|
|
566
|
+
innermostLoc = 0;
|
|
567
|
+
} else {
|
|
568
|
+
innermostLoc = index % m_impl.dimensions()[NumDims-1];
|
|
569
|
+
}
|
|
570
|
+
}
|
|
571
|
+
inputIndex += innermostLoc;
|
|
572
|
+
|
|
573
|
+
// Todo: this could be extended to the second dimension if we're not
|
|
574
|
+
// broadcasting alongside the first dimension, and so on.
|
|
575
|
+
if (innermostLoc + PacketSize <= m_impl.dimensions()[NumDims-1]) {
|
|
576
|
+
return m_impl.template packet<Unaligned>(inputIndex);
|
|
577
|
+
} else {
|
|
578
|
+
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
|
|
579
|
+
values[0] = m_impl.coeff(inputIndex);
|
|
580
|
+
EIGEN_UNROLL_LOOP
|
|
581
|
+
for (int i = 1; i < PacketSize; ++i) {
|
|
582
|
+
if (innermostLoc + i < m_impl.dimensions()[NumDims-1]) {
|
|
583
|
+
values[i] = m_impl.coeff(inputIndex+i);
|
|
584
|
+
} else {
|
|
585
|
+
values[i] = coeffRowMajor(originalIndex+i);
|
|
586
|
+
}
|
|
587
|
+
}
|
|
588
|
+
PacketReturnType rslt = internal::pload<PacketReturnType>(values);
|
|
589
|
+
return rslt;
|
|
590
|
+
}
|
|
591
|
+
}
|
|
592
|
+
|
|
593
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost
|
|
594
|
+
costPerCoeff(bool vectorized) const {
|
|
595
|
+
double compute_cost = TensorOpCost::AddCost<Index>();
|
|
596
|
+
if (!isCopy && NumDims > 0) {
|
|
597
|
+
EIGEN_UNROLL_LOOP
|
|
598
|
+
for (int i = NumDims - 1; i > 0; --i) {
|
|
599
|
+
compute_cost += TensorOpCost::DivCost<Index>();
|
|
600
|
+
if (internal::index_statically_eq<Broadcast>(i, 1)) {
|
|
601
|
+
compute_cost +=
|
|
602
|
+
TensorOpCost::MulCost<Index>() + TensorOpCost::AddCost<Index>();
|
|
603
|
+
} else {
|
|
604
|
+
if (!internal::index_statically_eq<InputDimensions>(i, 1)) {
|
|
605
|
+
compute_cost += TensorOpCost::MulCost<Index>() +
|
|
606
|
+
TensorOpCost::ModCost<Index>() +
|
|
607
|
+
TensorOpCost::AddCost<Index>();
|
|
608
|
+
}
|
|
609
|
+
}
|
|
610
|
+
compute_cost +=
|
|
611
|
+
TensorOpCost::MulCost<Index>() + TensorOpCost::AddCost<Index>();
|
|
612
|
+
}
|
|
613
|
+
}
|
|
614
|
+
return m_impl.costPerCoeff(vectorized) +
|
|
615
|
+
TensorOpCost(0, 0, compute_cost, vectorized, PacketSize);
|
|
616
|
+
}
|
|
617
|
+
|
|
618
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
|
619
|
+
internal::TensorBlockResourceRequirements getResourceRequirements() const {
|
|
620
|
+
// TODO(wuke): Targeting L1 size is 30% faster than targeting L{-1} on large
|
|
621
|
+
// tensors. But this might need further tuning.
|
|
622
|
+
const size_t target_size = m_device.firstLevelCacheSize();
|
|
623
|
+
return internal::TensorBlockResourceRequirements::merge(
|
|
624
|
+
m_impl.getResourceRequirements(),
|
|
625
|
+
internal::TensorBlockResourceRequirements::skewed<Scalar>(target_size));
|
|
626
|
+
}
|
|
627
|
+
|
|
628
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
|
|
629
|
+
block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
|
|
630
|
+
bool /*root_of_expr_ast*/ = false) const {
|
|
631
|
+
BlockBroadcastingParams params = blockBroadcastingParams(desc);
|
|
632
|
+
|
|
633
|
+
if (params.inner_dim_size == 0 || params.bcast_dim_size == 0) {
|
|
634
|
+
return emptyBlock();
|
|
635
|
+
}
|
|
636
|
+
|
|
637
|
+
// Prepare storage for the materialized broadcasting result.
|
|
638
|
+
const typename TensorBlock::Storage block_storage =
|
|
639
|
+
TensorBlock::prepareStorage(desc, scratch);
|
|
640
|
+
ScalarNoConst* materialized_output = block_storage.data();
|
|
641
|
+
|
|
642
|
+
// We potentially will need to materialize input blocks.
|
|
643
|
+
size_t materialized_input_size = 0;
|
|
644
|
+
ScalarNoConst* materialized_input = NULL;
|
|
645
|
+
|
|
646
|
+
// Initialize block broadcating iterator state for outer dimensions (outer
|
|
647
|
+
// with regard to bcast dimension). Dimension in this array are always in
|
|
648
|
+
// inner_most -> outer_most order (col major layout).
|
|
649
|
+
array<BlockBroadcastingIteratorState, NumDims> it;
|
|
650
|
+
int idx = 0;
|
|
651
|
+
|
|
652
|
+
for (int i = params.inner_dim_count + 1; i < NumDims; ++i) {
|
|
653
|
+
const Index dim = IsColMajor ? i : NumDims - 1 - i;
|
|
654
|
+
it[idx].size = params.output_dims[dim];
|
|
655
|
+
it[idx].count = 0;
|
|
656
|
+
it[idx].output_stride = m_outputStrides[dim];
|
|
657
|
+
it[idx].output_span = it[idx].output_stride * (it[idx].size - 1);
|
|
658
|
+
idx++;
|
|
659
|
+
}
|
|
660
|
+
|
|
661
|
+
// Write output into the beginning of `materialized_output`.
|
|
662
|
+
Index output_offset = 0;
|
|
663
|
+
|
|
664
|
+
// We will fill output block by broadcasting along the bcast dim, and
|
|
665
|
+
// iterating over outer dimension.
|
|
666
|
+
const Index output_size = NumDims == 0 ? 1 : params.output_dims.TotalSize();
|
|
667
|
+
|
|
668
|
+
for (Index num_output_coeffs = 0; num_output_coeffs < output_size;) {
|
|
669
|
+
ScalarNoConst* bcast_output = materialized_output + num_output_coeffs;
|
|
670
|
+
Index bcast_offset = desc.offset() + output_offset;
|
|
671
|
+
|
|
672
|
+
// Broadcast along the bcast dimension.
|
|
673
|
+
num_output_coeffs += BroadcastBlockAlongBcastDim(
|
|
674
|
+
params, bcast_offset, scratch, bcast_output, &materialized_input,
|
|
675
|
+
&materialized_input_size);
|
|
676
|
+
|
|
677
|
+
// Switch to the next outer dimension.
|
|
678
|
+
for (int j = 0; j < idx; ++j) {
|
|
679
|
+
if (++it[j].count < it[j].size) {
|
|
680
|
+
output_offset += it[j].output_stride;
|
|
681
|
+
break;
|
|
682
|
+
}
|
|
683
|
+
it[j].count = 0;
|
|
684
|
+
output_offset -= it[j].output_span;
|
|
685
|
+
}
|
|
686
|
+
}
|
|
687
|
+
|
|
688
|
+
return block_storage.AsTensorMaterializedBlock();
|
|
689
|
+
}
|
|
690
|
+
|
|
691
|
+
EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return NULL; }
|
|
692
|
+
|
|
693
|
+
const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; }
|
|
694
|
+
|
|
695
|
+
Broadcast functor() const { return m_broadcast; }
|
|
696
|
+
#ifdef EIGEN_USE_SYCL
|
|
697
|
+
// binding placeholder accessors to a command group handler for SYCL
|
|
698
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(
|
|
699
|
+
cl::sycl::handler& cgh) const {
|
|
700
|
+
m_impl.bind(cgh);
|
|
701
|
+
}
|
|
702
|
+
#endif
|
|
703
|
+
private:
|
|
704
|
+
static const bool IsColMajor =
|
|
705
|
+
static_cast<int>(Layout) == static_cast<int>(ColMajor);
|
|
706
|
+
|
|
707
|
+
// We will build a general case block broadcasting on top of broadcasting
|
|
708
|
+
// primitive that will do broadcasting only for the inner dimension(s) along
|
|
709
|
+
// the first dimension smaller than the input size (it's called `bcast_dim`).
|
|
710
|
+
//
|
|
711
|
+
// Example:
|
|
712
|
+
// dim: 0 1 2 (ColMajor)
|
|
713
|
+
// input size: [9, 3, 6]
|
|
714
|
+
// block size: [9, 2, 6]
|
|
715
|
+
//
|
|
716
|
+
// We will compute broadcasted block by iterating over the outer dimensions
|
|
717
|
+
// before `bcast_dim` (only dimension `2` in this example) and computing
|
|
718
|
+
// broadcasts along the `bcast_dim` (dimension `1` in this example).
|
|
719
|
+
|
|
720
|
+
// BlockBroadcastingParams holds precomputed parameters for broadcasting a
|
|
721
|
+
// single block along the broadcasting dimension. Sizes and strides along the
|
|
722
|
+
// `bcast_dim` might be invalid, they will be adjusted later in
|
|
723
|
+
// `BroadcastBlockAlongBcastDim`.
|
|
724
|
+
struct BlockBroadcastingParams {
|
|
725
|
+
Dimensions input_dims; // input expression dimensions
|
|
726
|
+
Dimensions output_dims; // output block sizes
|
|
727
|
+
Dimensions output_strides; // output block strides
|
|
728
|
+
|
|
729
|
+
int inner_dim_count; // count inner dimensions matching in size
|
|
730
|
+
int bcast_dim; // broadcasting dimension index
|
|
731
|
+
Index bcast_dim_size; // broadcasting dimension size
|
|
732
|
+
Index inner_dim_size; // inner dimensions size
|
|
733
|
+
|
|
734
|
+
// Block sizes and strides for the input block where all dimensions before
|
|
735
|
+
// `bcast_dim` are equal to `1`.
|
|
736
|
+
Dimensions input_block_sizes;
|
|
737
|
+
Dimensions input_block_strides;
|
|
738
|
+
|
|
739
|
+
// Block sizes and strides for blocks with extra dimensions and strides `0`.
|
|
740
|
+
BroadcastDimensions bcast_block_sizes;
|
|
741
|
+
BroadcastDimensions bcast_block_strides;
|
|
742
|
+
BroadcastDimensions bcast_input_strides;
|
|
743
|
+
};
|
|
744
|
+
|
|
745
|
+
struct BlockBroadcastingIteratorState {
|
|
746
|
+
Index size;
|
|
747
|
+
Index count;
|
|
748
|
+
Index output_stride;
|
|
749
|
+
Index output_span;
|
|
750
|
+
};
|
|
751
|
+
|
|
752
|
+
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE BlockBroadcastingParams
|
|
753
|
+
blockBroadcastingParams(TensorBlockDesc& desc) const {
|
|
754
|
+
BlockBroadcastingParams params;
|
|
755
|
+
|
|
756
|
+
params.input_dims = Dimensions(m_impl.dimensions());
|
|
757
|
+
|
|
758
|
+
// Output block sizes and strides.
|
|
759
|
+
params.output_dims = desc.dimensions();
|
|
760
|
+
params.output_strides = internal::strides<Layout>(params.output_dims);
|
|
761
|
+
|
|
762
|
+
// Find the broadcasting dimension (first dimension with output size smaller
|
|
763
|
+
// that the input size).
|
|
764
|
+
params.bcast_dim = 0;
|
|
765
|
+
params.bcast_dim_size = 1;
|
|
766
|
+
params.inner_dim_size = 1;
|
|
767
|
+
|
|
768
|
+
// Count the number of inner dimensions that have the same size in the block
|
|
769
|
+
// and in the broadcast expression.
|
|
770
|
+
params.inner_dim_count = 0;
|
|
771
|
+
|
|
772
|
+
for (int i = 0; i < NumDims; ++i) {
|
|
773
|
+
const int dim = IsColMajor ? i : NumDims - i - 1;
|
|
774
|
+
|
|
775
|
+
if (params.output_dims[dim] == m_dimensions[dim]) {
|
|
776
|
+
params.inner_dim_size *= params.output_dims[dim];
|
|
777
|
+
++params.inner_dim_count;
|
|
778
|
+
continue;
|
|
779
|
+
}
|
|
780
|
+
|
|
781
|
+
// First non-matching dimension is the broadcasting dimension.
|
|
782
|
+
eigen_assert(params.output_dims[dim] < m_dimensions[dim]);
|
|
783
|
+
params.bcast_dim = dim;
|
|
784
|
+
params.bcast_dim_size = params.output_dims[dim];
|
|
785
|
+
break;
|
|
786
|
+
}
|
|
787
|
+
|
|
788
|
+
// Calculate the input block size for looking into the input.
|
|
789
|
+
for (int i = 0; i < params.inner_dim_count; ++i) {
|
|
790
|
+
const int dim = IsColMajor ? i : NumDims - i - 1;
|
|
791
|
+
params.input_block_sizes[dim] = params.input_dims[dim];
|
|
792
|
+
}
|
|
793
|
+
for (int i = params.inner_dim_count; i < NumDims; ++i) {
|
|
794
|
+
const int dim = IsColMajor ? i : NumDims - i - 1;
|
|
795
|
+
params.input_block_sizes[dim] = 1;
|
|
796
|
+
}
|
|
797
|
+
params.input_block_strides =
|
|
798
|
+
internal::strides<Layout>(params.input_block_sizes);
|
|
799
|
+
|
|
800
|
+
// Broadcast with the 0-stride trick: Create 1 extra dim for each
|
|
801
|
+
// broadcast, set the input stride to 0.
|
|
802
|
+
//
|
|
803
|
+
// When ColMajor:
|
|
804
|
+
//
|
|
805
|
+
// - bcast_block_sizes:
|
|
806
|
+
// [d_0, b_0, d_1, b_1, ...]
|
|
807
|
+
//
|
|
808
|
+
// - bcast_block_strides:
|
|
809
|
+
// [output_block_strides[0], output_block_strides[0] * d_0,
|
|
810
|
+
// output_block_strides[1], output_block_strides[1] * d_1,
|
|
811
|
+
// ...]
|
|
812
|
+
//
|
|
813
|
+
// - bcast_input_strides:
|
|
814
|
+
// [input_block_strides[0], 0,
|
|
815
|
+
// input_block_strides[1], 0,
|
|
816
|
+
// ...].
|
|
817
|
+
//
|
|
818
|
+
for (int i = 0; i < params.inner_dim_count; ++i) {
|
|
819
|
+
const int dim = IsColMajor ? i : NumDims - i - 1;
|
|
820
|
+
|
|
821
|
+
const int copy_dim = IsColMajor ? 2 * i : 2 * NumDims - 2 * i - 1;
|
|
822
|
+
const int broadcast_dim = IsColMajor ? copy_dim + 1 : copy_dim - 1;
|
|
823
|
+
|
|
824
|
+
params.bcast_block_sizes[copy_dim] = params.input_dims[dim];
|
|
825
|
+
params.bcast_block_sizes[broadcast_dim] = m_broadcast[dim];
|
|
826
|
+
params.bcast_block_strides[copy_dim] = params.output_strides[dim];
|
|
827
|
+
params.bcast_block_strides[broadcast_dim] =
|
|
828
|
+
params.output_strides[dim] * params.input_dims[dim];
|
|
829
|
+
params.bcast_input_strides[copy_dim] = params.input_block_strides[dim];
|
|
830
|
+
params.bcast_input_strides[broadcast_dim] = 0;
|
|
831
|
+
}
|
|
832
|
+
|
|
833
|
+
for (int i = 2 * params.inner_dim_count; i < 2 * NumDims; ++i) {
|
|
834
|
+
const int dim = IsColMajor ? i : 2 * NumDims - i - 1;
|
|
835
|
+
params.bcast_block_sizes[dim] = 1;
|
|
836
|
+
params.bcast_block_strides[dim] = 0;
|
|
837
|
+
params.bcast_input_strides[dim] = 0;
|
|
838
|
+
}
|
|
839
|
+
|
|
840
|
+
return params;
|
|
841
|
+
}
|
|
842
|
+
|
|
843
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock emptyBlock() const {
|
|
844
|
+
DSizes<Index, NumDims> dimensions;
|
|
845
|
+
for (int i = 0; i < NumDims; ++i) dimensions[i] = 0;
|
|
846
|
+
return TensorBlock(internal::TensorBlockKind::kView, NULL, dimensions);
|
|
847
|
+
}
|
|
848
|
+
|
|
849
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index BroadcastBlockAlongBcastDim(
|
|
850
|
+
BlockBroadcastingParams params, Index bcast_offset,
|
|
851
|
+
TensorBlockScratch& scratch, ScalarNoConst* materialized_output,
|
|
852
|
+
ScalarNoConst** materialized_input,
|
|
853
|
+
size_t* materialized_input_size) const {
|
|
854
|
+
if (params.bcast_dim_size == 1) {
|
|
855
|
+
// We just need one block read using the ready-set values above.
|
|
856
|
+
return BroadcastBlock(
|
|
857
|
+
params.input_block_sizes, params.input_block_strides,
|
|
858
|
+
params.bcast_block_sizes, params.bcast_block_strides,
|
|
859
|
+
params.bcast_input_strides, bcast_offset, 0, scratch,
|
|
860
|
+
materialized_output, materialized_input, materialized_input_size);
|
|
861
|
+
|
|
862
|
+
} else if (params.input_dims[params.bcast_dim] == 1) {
|
|
863
|
+
// Broadcast bcast dimension (< NumDims) by bcast_dim_size.
|
|
864
|
+
const int broadcast_bcast_dim =
|
|
865
|
+
IsColMajor ? 2 * params.inner_dim_count + 1
|
|
866
|
+
: 2 * NumDims - 2 * params.inner_dim_count - 2;
|
|
867
|
+
|
|
868
|
+
params.bcast_block_sizes[broadcast_bcast_dim] = params.bcast_dim_size;
|
|
869
|
+
params.bcast_input_strides[broadcast_bcast_dim] = 0;
|
|
870
|
+
params.bcast_block_strides[broadcast_bcast_dim] =
|
|
871
|
+
params.output_strides[params.bcast_dim];
|
|
872
|
+
|
|
873
|
+
return BroadcastBlock(
|
|
874
|
+
params.input_block_sizes, params.input_block_strides,
|
|
875
|
+
params.bcast_block_sizes, params.bcast_block_strides,
|
|
876
|
+
params.bcast_input_strides, bcast_offset, 0, scratch,
|
|
877
|
+
materialized_output, materialized_input, materialized_input_size);
|
|
878
|
+
|
|
879
|
+
} else {
|
|
880
|
+
// Keep track of the total number of the coefficients written to the
|
|
881
|
+
// output block.
|
|
882
|
+
Index num_output_coeffs = 0;
|
|
883
|
+
|
|
884
|
+
// The general case. Let's denote the output block as
|
|
885
|
+
//
|
|
886
|
+
// x[..., a:a+bcast_dim_size, :, ..., :]
|
|
887
|
+
//
|
|
888
|
+
// where a:a+bcast_dim_size is a slice on the bcast_dim dimension
|
|
889
|
+
// (< NumDims). We need to split the a:a+bcast_dim_size into possibly 3
|
|
890
|
+
// sub-blocks:
|
|
891
|
+
//
|
|
892
|
+
// (1) a:b, where b is the smallest multiple of
|
|
893
|
+
// input_dims[bcast_dim_start] in [a, a+bcast_dim_size].
|
|
894
|
+
//
|
|
895
|
+
// (2) b:c, where c is the largest multiple of input_dims[bcast_dim_start]
|
|
896
|
+
// in [a, a+bcast_dim_size].
|
|
897
|
+
//
|
|
898
|
+
// (3) c:a+bcast_dim_size .
|
|
899
|
+
//
|
|
900
|
+
// Or, when b and c do not exist, we just need to process the whole block
|
|
901
|
+
// together.
|
|
902
|
+
|
|
903
|
+
// Find a.
|
|
904
|
+
const Index bcast_dim_left_index =
|
|
905
|
+
bcast_offset / m_outputStrides[params.bcast_dim];
|
|
906
|
+
|
|
907
|
+
// Find b and c.
|
|
908
|
+
const Index input_bcast_dim_size = params.input_dims[params.bcast_dim];
|
|
909
|
+
|
|
910
|
+
// First multiple after a. This is b when <= bcast_dim_left_index +
|
|
911
|
+
// bcast_dim_size.
|
|
912
|
+
const Index first_multiple =
|
|
913
|
+
divup<Index>(bcast_dim_left_index, input_bcast_dim_size) *
|
|
914
|
+
input_bcast_dim_size;
|
|
915
|
+
|
|
916
|
+
if (first_multiple <= bcast_dim_left_index + params.bcast_dim_size) {
|
|
917
|
+
// b exists, so does c. Find it.
|
|
918
|
+
const Index last_multiple =
|
|
919
|
+
(bcast_dim_left_index + params.bcast_dim_size) /
|
|
920
|
+
input_bcast_dim_size * input_bcast_dim_size;
|
|
921
|
+
const int copy_bcast_dim =
|
|
922
|
+
IsColMajor ? 2 * params.inner_dim_count
|
|
923
|
+
: 2 * NumDims - 2 * params.inner_dim_count - 1;
|
|
924
|
+
const int broadcast_bcast_dim =
|
|
925
|
+
IsColMajor ? 2 * params.inner_dim_count + 1
|
|
926
|
+
: 2 * NumDims - 2 * params.inner_dim_count - 2;
|
|
927
|
+
|
|
928
|
+
if (first_multiple > bcast_dim_left_index) {
|
|
929
|
+
const Index head_size = first_multiple - bcast_dim_left_index;
|
|
930
|
+
params.input_block_sizes[params.bcast_dim] = head_size;
|
|
931
|
+
params.bcast_block_sizes[copy_bcast_dim] = head_size;
|
|
932
|
+
params.bcast_input_strides[copy_bcast_dim] =
|
|
933
|
+
params.input_block_strides[params.bcast_dim];
|
|
934
|
+
params.bcast_block_strides[copy_bcast_dim] =
|
|
935
|
+
params.output_strides[params.bcast_dim];
|
|
936
|
+
params.bcast_block_sizes[broadcast_bcast_dim] = 1;
|
|
937
|
+
params.bcast_input_strides[broadcast_bcast_dim] = 0;
|
|
938
|
+
params.bcast_block_strides[broadcast_bcast_dim] =
|
|
939
|
+
params.output_strides[params.bcast_dim] *
|
|
940
|
+
params.input_dims[params.bcast_dim];
|
|
941
|
+
|
|
942
|
+
num_output_coeffs += BroadcastBlock(
|
|
943
|
+
params.input_block_sizes, params.input_block_strides,
|
|
944
|
+
params.bcast_block_sizes, params.bcast_block_strides,
|
|
945
|
+
params.bcast_input_strides, bcast_offset, 0, scratch,
|
|
946
|
+
materialized_output, materialized_input, materialized_input_size);
|
|
947
|
+
}
|
|
948
|
+
if (first_multiple < last_multiple) {
|
|
949
|
+
params.input_block_sizes[params.bcast_dim] = input_bcast_dim_size;
|
|
950
|
+
params.bcast_block_sizes[copy_bcast_dim] = input_bcast_dim_size;
|
|
951
|
+
params.bcast_input_strides[copy_bcast_dim] =
|
|
952
|
+
params.input_block_strides[params.bcast_dim];
|
|
953
|
+
params.bcast_block_strides[copy_bcast_dim] =
|
|
954
|
+
params.output_strides[params.bcast_dim];
|
|
955
|
+
params.bcast_block_sizes[broadcast_bcast_dim] =
|
|
956
|
+
(last_multiple - first_multiple) / input_bcast_dim_size;
|
|
957
|
+
params.bcast_input_strides[broadcast_bcast_dim] = 0;
|
|
958
|
+
params.bcast_block_strides[broadcast_bcast_dim] =
|
|
959
|
+
params.output_strides[params.bcast_dim] *
|
|
960
|
+
params.input_dims[params.bcast_dim];
|
|
961
|
+
const Index offset = (first_multiple - bcast_dim_left_index) *
|
|
962
|
+
m_outputStrides[params.bcast_dim];
|
|
963
|
+
|
|
964
|
+
num_output_coeffs += BroadcastBlock(
|
|
965
|
+
params.input_block_sizes, params.input_block_strides,
|
|
966
|
+
params.bcast_block_sizes, params.bcast_block_strides,
|
|
967
|
+
params.bcast_input_strides, bcast_offset, offset, scratch,
|
|
968
|
+
materialized_output, materialized_input, materialized_input_size);
|
|
969
|
+
}
|
|
970
|
+
if (last_multiple < bcast_dim_left_index + params.bcast_dim_size) {
|
|
971
|
+
const Index tail_size =
|
|
972
|
+
bcast_dim_left_index + params.bcast_dim_size - last_multiple;
|
|
973
|
+
params.input_block_sizes[params.bcast_dim] = tail_size;
|
|
974
|
+
params.bcast_block_sizes[copy_bcast_dim] = tail_size;
|
|
975
|
+
params.bcast_input_strides[copy_bcast_dim] =
|
|
976
|
+
params.input_block_strides[params.bcast_dim];
|
|
977
|
+
params.bcast_block_strides[copy_bcast_dim] =
|
|
978
|
+
params.output_strides[params.bcast_dim];
|
|
979
|
+
params.bcast_block_sizes[broadcast_bcast_dim] = 1;
|
|
980
|
+
params.bcast_input_strides[broadcast_bcast_dim] = 0;
|
|
981
|
+
params.bcast_block_strides[broadcast_bcast_dim] =
|
|
982
|
+
params.output_strides[params.bcast_dim] *
|
|
983
|
+
params.input_dims[params.bcast_dim];
|
|
984
|
+
const Index offset = (last_multiple - bcast_dim_left_index) *
|
|
985
|
+
m_outputStrides[params.bcast_dim];
|
|
986
|
+
|
|
987
|
+
num_output_coeffs += BroadcastBlock(
|
|
988
|
+
params.input_block_sizes, params.input_block_strides,
|
|
989
|
+
params.bcast_block_sizes, params.bcast_block_strides,
|
|
990
|
+
params.bcast_input_strides, bcast_offset, offset, scratch,
|
|
991
|
+
materialized_output, materialized_input, materialized_input_size);
|
|
992
|
+
}
|
|
993
|
+
} else {
|
|
994
|
+
// b and c do not exist.
|
|
995
|
+
const int copy_bcast_dim =
|
|
996
|
+
IsColMajor ? 2 * params.inner_dim_count
|
|
997
|
+
: 2 * NumDims - 2 * params.inner_dim_count - 1;
|
|
998
|
+
params.input_block_sizes[params.bcast_dim] = params.bcast_dim_size;
|
|
999
|
+
params.bcast_block_sizes[copy_bcast_dim] = params.bcast_dim_size;
|
|
1000
|
+
params.bcast_input_strides[copy_bcast_dim] =
|
|
1001
|
+
params.input_block_strides[params.bcast_dim];
|
|
1002
|
+
params.bcast_block_strides[copy_bcast_dim] =
|
|
1003
|
+
params.output_strides[params.bcast_dim];
|
|
1004
|
+
|
|
1005
|
+
num_output_coeffs += BroadcastBlock(
|
|
1006
|
+
params.input_block_sizes, params.input_block_strides,
|
|
1007
|
+
params.bcast_block_sizes, params.bcast_block_strides,
|
|
1008
|
+
params.bcast_input_strides, bcast_offset, 0, scratch,
|
|
1009
|
+
materialized_output, materialized_input, materialized_input_size);
|
|
1010
|
+
}
|
|
1011
|
+
|
|
1012
|
+
return num_output_coeffs;
|
|
1013
|
+
}
|
|
1014
|
+
}
|
|
1015
|
+
|
|
1016
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index BroadcastBlock(
|
|
1017
|
+
const Dimensions& input_block_sizes,
|
|
1018
|
+
const Dimensions& input_block_strides,
|
|
1019
|
+
const BroadcastDimensions& bcast_block_sizes,
|
|
1020
|
+
const BroadcastDimensions& bcast_block_strides,
|
|
1021
|
+
const BroadcastDimensions& bcast_input_strides, Index bcast_offset,
|
|
1022
|
+
Index offset, TensorBlockScratch& scratch,
|
|
1023
|
+
ScalarNoConst* materialized_output, ScalarNoConst** materialized_input,
|
|
1024
|
+
size_t* materialized_input_size) const {
|
|
1025
|
+
// ---------------------------------------------------------------------- //
|
|
1026
|
+
// Tensor block descriptor for reading block from the input.
|
|
1027
|
+
const Index input_offset = bcast_offset + offset;
|
|
1028
|
+
TensorBlockDesc input_desc(
|
|
1029
|
+
IsColMajor ? indexColMajor(input_offset) : indexRowMajor(input_offset),
|
|
1030
|
+
input_block_sizes);
|
|
1031
|
+
|
|
1032
|
+
ArgTensorBlock input_block = m_impl.block(input_desc, scratch);
|
|
1033
|
+
|
|
1034
|
+
// ---------------------------------------------------------------------- //
|
|
1035
|
+
// Materialize input block into a temporary memory buffer only if it's not
|
|
1036
|
+
// already available in the arg block.
|
|
1037
|
+
const ScalarNoConst* input_buffer = NULL;
|
|
1038
|
+
|
|
1039
|
+
if (input_block.data() != NULL) {
|
|
1040
|
+
// Input block already has raw data, there is no need to materialize it.
|
|
1041
|
+
input_buffer = input_block.data();
|
|
1042
|
+
|
|
1043
|
+
} else {
|
|
1044
|
+
// Otherwise we have to do block assignment into a temporary buffer.
|
|
1045
|
+
|
|
1046
|
+
// Maybe reuse previously allocated buffer, or allocate a new one with a
|
|
1047
|
+
// scratch allocator.
|
|
1048
|
+
const size_t input_total_size = input_block_sizes.TotalSize();
|
|
1049
|
+
if (*materialized_input == NULL ||
|
|
1050
|
+
*materialized_input_size < input_total_size) {
|
|
1051
|
+
*materialized_input_size = input_total_size;
|
|
1052
|
+
void* mem = scratch.allocate(*materialized_input_size * sizeof(Scalar));
|
|
1053
|
+
*materialized_input = static_cast<ScalarNoConst*>(mem);
|
|
1054
|
+
}
|
|
1055
|
+
|
|
1056
|
+
typedef internal::TensorBlockAssignment<
|
|
1057
|
+
ScalarNoConst, NumDims, typename ArgTensorBlock::XprType, Index>
|
|
1058
|
+
TensorBlockAssignment;
|
|
1059
|
+
|
|
1060
|
+
TensorBlockAssignment::Run(
|
|
1061
|
+
TensorBlockAssignment::target(input_block_sizes, input_block_strides,
|
|
1062
|
+
*materialized_input),
|
|
1063
|
+
input_block.expr());
|
|
1064
|
+
|
|
1065
|
+
input_buffer = *materialized_input;
|
|
1066
|
+
}
|
|
1067
|
+
|
|
1068
|
+
// ---------------------------------------------------------------------- //
|
|
1069
|
+
// Copy data from materialized input block to the materialized output, using
|
|
1070
|
+
// given broadcast strides (strides with zeroes).
|
|
1071
|
+
typedef internal::TensorBlockIO<ScalarNoConst, Index, 2 * NumDims, Layout>
|
|
1072
|
+
TensorBlockIO;
|
|
1073
|
+
|
|
1074
|
+
typename TensorBlockIO::Src src(bcast_input_strides, input_buffer);
|
|
1075
|
+
typename TensorBlockIO::Dst dst(bcast_block_sizes, bcast_block_strides,
|
|
1076
|
+
materialized_output + offset);
|
|
1077
|
+
|
|
1078
|
+
return TensorBlockIO::Copy(dst, src);
|
|
1079
|
+
}
|
|
1080
|
+
|
|
1081
|
+
protected:
|
|
1082
|
+
const Device EIGEN_DEVICE_REF m_device;
|
|
1083
|
+
const typename internal::remove_reference<Broadcast>::type m_broadcast;
|
|
1084
|
+
Dimensions m_dimensions;
|
|
1085
|
+
array<Index, NumDims> m_outputStrides;
|
|
1086
|
+
array<Index, NumDims> m_inputStrides;
|
|
1087
|
+
TensorEvaluator<ArgType, Device> m_impl;
|
|
1088
|
+
};
|
|
1089
|
+
|
|
1090
|
+
|
|
1091
|
+
} // end namespace Eigen
|
|
1092
|
+
|
|
1093
|
+
#endif // EIGEN_CXX11_TENSOR_TENSOR_BROADCASTING_H
|