pylibsparseir 0.1.0__cp310-cp310-macosx_15_0_arm64.whl → 0.6.0__cp310-cp310-macosx_15_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pylibsparseir might be problematic. Click here for more details.
- include/eigen3/Eigen/Cholesky +45 -0
- include/eigen3/Eigen/CholmodSupport +48 -0
- include/eigen3/Eigen/Core +384 -0
- include/eigen3/Eigen/Dense +7 -0
- include/eigen3/Eigen/Eigen +2 -0
- include/eigen3/Eigen/Eigenvalues +60 -0
- include/eigen3/Eigen/Geometry +59 -0
- include/eigen3/Eigen/Householder +29 -0
- include/eigen3/Eigen/IterativeLinearSolvers +48 -0
- include/eigen3/Eigen/Jacobi +32 -0
- include/eigen3/Eigen/KLUSupport +41 -0
- include/eigen3/Eigen/LU +47 -0
- include/eigen3/Eigen/MetisSupport +35 -0
- include/eigen3/Eigen/OrderingMethods +70 -0
- include/eigen3/Eigen/PaStiXSupport +49 -0
- include/eigen3/Eigen/PardisoSupport +35 -0
- include/eigen3/Eigen/QR +50 -0
- include/eigen3/Eigen/QtAlignedMalloc +39 -0
- include/eigen3/Eigen/SPQRSupport +34 -0
- include/eigen3/Eigen/SVD +50 -0
- include/eigen3/Eigen/Sparse +34 -0
- include/eigen3/Eigen/SparseCholesky +37 -0
- include/eigen3/Eigen/SparseCore +69 -0
- include/eigen3/Eigen/SparseLU +50 -0
- include/eigen3/Eigen/SparseQR +36 -0
- include/eigen3/Eigen/StdDeque +27 -0
- include/eigen3/Eigen/StdList +26 -0
- include/eigen3/Eigen/StdVector +27 -0
- include/eigen3/Eigen/SuperLUSupport +64 -0
- include/eigen3/Eigen/UmfPackSupport +40 -0
- include/eigen3/Eigen/src/Cholesky/LDLT.h +688 -0
- include/eigen3/Eigen/src/Cholesky/LLT.h +558 -0
- include/eigen3/Eigen/src/Cholesky/LLT_LAPACKE.h +99 -0
- include/eigen3/Eigen/src/CholmodSupport/CholmodSupport.h +682 -0
- include/eigen3/Eigen/src/Core/ArithmeticSequence.h +413 -0
- include/eigen3/Eigen/src/Core/Array.h +417 -0
- include/eigen3/Eigen/src/Core/ArrayBase.h +226 -0
- include/eigen3/Eigen/src/Core/ArrayWrapper.h +209 -0
- include/eigen3/Eigen/src/Core/Assign.h +90 -0
- include/eigen3/Eigen/src/Core/AssignEvaluator.h +1010 -0
- include/eigen3/Eigen/src/Core/Assign_MKL.h +178 -0
- include/eigen3/Eigen/src/Core/BandMatrix.h +353 -0
- include/eigen3/Eigen/src/Core/Block.h +448 -0
- include/eigen3/Eigen/src/Core/BooleanRedux.h +162 -0
- include/eigen3/Eigen/src/Core/CommaInitializer.h +164 -0
- include/eigen3/Eigen/src/Core/ConditionEstimator.h +175 -0
- include/eigen3/Eigen/src/Core/CoreEvaluators.h +1741 -0
- include/eigen3/Eigen/src/Core/CoreIterators.h +132 -0
- include/eigen3/Eigen/src/Core/CwiseBinaryOp.h +183 -0
- include/eigen3/Eigen/src/Core/CwiseNullaryOp.h +1001 -0
- include/eigen3/Eigen/src/Core/CwiseTernaryOp.h +197 -0
- include/eigen3/Eigen/src/Core/CwiseUnaryOp.h +103 -0
- include/eigen3/Eigen/src/Core/CwiseUnaryView.h +132 -0
- include/eigen3/Eigen/src/Core/DenseBase.h +701 -0
- include/eigen3/Eigen/src/Core/DenseCoeffsBase.h +685 -0
- include/eigen3/Eigen/src/Core/DenseStorage.h +652 -0
- include/eigen3/Eigen/src/Core/Diagonal.h +258 -0
- include/eigen3/Eigen/src/Core/DiagonalMatrix.h +391 -0
- include/eigen3/Eigen/src/Core/DiagonalProduct.h +28 -0
- include/eigen3/Eigen/src/Core/Dot.h +318 -0
- include/eigen3/Eigen/src/Core/EigenBase.h +160 -0
- include/eigen3/Eigen/src/Core/ForceAlignedAccess.h +150 -0
- include/eigen3/Eigen/src/Core/Fuzzy.h +155 -0
- include/eigen3/Eigen/src/Core/GeneralProduct.h +465 -0
- include/eigen3/Eigen/src/Core/GenericPacketMath.h +1040 -0
- include/eigen3/Eigen/src/Core/GlobalFunctions.h +194 -0
- include/eigen3/Eigen/src/Core/IO.h +258 -0
- include/eigen3/Eigen/src/Core/IndexedView.h +237 -0
- include/eigen3/Eigen/src/Core/Inverse.h +117 -0
- include/eigen3/Eigen/src/Core/Map.h +171 -0
- include/eigen3/Eigen/src/Core/MapBase.h +310 -0
- include/eigen3/Eigen/src/Core/MathFunctions.h +2057 -0
- include/eigen3/Eigen/src/Core/MathFunctionsImpl.h +200 -0
- include/eigen3/Eigen/src/Core/Matrix.h +565 -0
- include/eigen3/Eigen/src/Core/MatrixBase.h +547 -0
- include/eigen3/Eigen/src/Core/NestByValue.h +85 -0
- include/eigen3/Eigen/src/Core/NoAlias.h +109 -0
- include/eigen3/Eigen/src/Core/NumTraits.h +335 -0
- include/eigen3/Eigen/src/Core/PartialReduxEvaluator.h +232 -0
- include/eigen3/Eigen/src/Core/PermutationMatrix.h +605 -0
- include/eigen3/Eigen/src/Core/PlainObjectBase.h +1128 -0
- include/eigen3/Eigen/src/Core/Product.h +191 -0
- include/eigen3/Eigen/src/Core/ProductEvaluators.h +1179 -0
- include/eigen3/Eigen/src/Core/Random.h +218 -0
- include/eigen3/Eigen/src/Core/Redux.h +515 -0
- include/eigen3/Eigen/src/Core/Ref.h +381 -0
- include/eigen3/Eigen/src/Core/Replicate.h +142 -0
- include/eigen3/Eigen/src/Core/Reshaped.h +454 -0
- include/eigen3/Eigen/src/Core/ReturnByValue.h +119 -0
- include/eigen3/Eigen/src/Core/Reverse.h +217 -0
- include/eigen3/Eigen/src/Core/Select.h +164 -0
- include/eigen3/Eigen/src/Core/SelfAdjointView.h +365 -0
- include/eigen3/Eigen/src/Core/SelfCwiseBinaryOp.h +47 -0
- include/eigen3/Eigen/src/Core/Solve.h +188 -0
- include/eigen3/Eigen/src/Core/SolveTriangular.h +235 -0
- include/eigen3/Eigen/src/Core/SolverBase.h +168 -0
- include/eigen3/Eigen/src/Core/StableNorm.h +251 -0
- include/eigen3/Eigen/src/Core/StlIterators.h +463 -0
- include/eigen3/Eigen/src/Core/Stride.h +116 -0
- include/eigen3/Eigen/src/Core/Swap.h +68 -0
- include/eigen3/Eigen/src/Core/Transpose.h +464 -0
- include/eigen3/Eigen/src/Core/Transpositions.h +386 -0
- include/eigen3/Eigen/src/Core/TriangularMatrix.h +1001 -0
- include/eigen3/Eigen/src/Core/VectorBlock.h +96 -0
- include/eigen3/Eigen/src/Core/VectorwiseOp.h +784 -0
- include/eigen3/Eigen/src/Core/Visitor.h +381 -0
- include/eigen3/Eigen/src/Core/arch/AVX/Complex.h +372 -0
- include/eigen3/Eigen/src/Core/arch/AVX/MathFunctions.h +228 -0
- include/eigen3/Eigen/src/Core/arch/AVX/PacketMath.h +1574 -0
- include/eigen3/Eigen/src/Core/arch/AVX/TypeCasting.h +115 -0
- include/eigen3/Eigen/src/Core/arch/AVX512/Complex.h +422 -0
- include/eigen3/Eigen/src/Core/arch/AVX512/MathFunctions.h +362 -0
- include/eigen3/Eigen/src/Core/arch/AVX512/PacketMath.h +2303 -0
- include/eigen3/Eigen/src/Core/arch/AVX512/TypeCasting.h +89 -0
- include/eigen3/Eigen/src/Core/arch/AltiVec/Complex.h +417 -0
- include/eigen3/Eigen/src/Core/arch/AltiVec/MathFunctions.h +90 -0
- include/eigen3/Eigen/src/Core/arch/AltiVec/MatrixProduct.h +2937 -0
- include/eigen3/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h +221 -0
- include/eigen3/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h +629 -0
- include/eigen3/Eigen/src/Core/arch/AltiVec/PacketMath.h +2711 -0
- include/eigen3/Eigen/src/Core/arch/CUDA/Complex.h +258 -0
- include/eigen3/Eigen/src/Core/arch/Default/BFloat16.h +700 -0
- include/eigen3/Eigen/src/Core/arch/Default/ConjHelper.h +117 -0
- include/eigen3/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +1649 -0
- include/eigen3/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +110 -0
- include/eigen3/Eigen/src/Core/arch/Default/Half.h +942 -0
- include/eigen3/Eigen/src/Core/arch/Default/Settings.h +49 -0
- include/eigen3/Eigen/src/Core/arch/Default/TypeCasting.h +120 -0
- include/eigen3/Eigen/src/Core/arch/GPU/MathFunctions.h +103 -0
- include/eigen3/Eigen/src/Core/arch/GPU/PacketMath.h +1685 -0
- include/eigen3/Eigen/src/Core/arch/GPU/TypeCasting.h +80 -0
- include/eigen3/Eigen/src/Core/arch/HIP/hcc/math_constants.h +23 -0
- include/eigen3/Eigen/src/Core/arch/MSA/Complex.h +648 -0
- include/eigen3/Eigen/src/Core/arch/MSA/MathFunctions.h +387 -0
- include/eigen3/Eigen/src/Core/arch/MSA/PacketMath.h +1233 -0
- include/eigen3/Eigen/src/Core/arch/NEON/Complex.h +584 -0
- include/eigen3/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h +183 -0
- include/eigen3/Eigen/src/Core/arch/NEON/MathFunctions.h +75 -0
- include/eigen3/Eigen/src/Core/arch/NEON/PacketMath.h +4587 -0
- include/eigen3/Eigen/src/Core/arch/NEON/TypeCasting.h +1419 -0
- include/eigen3/Eigen/src/Core/arch/SSE/Complex.h +351 -0
- include/eigen3/Eigen/src/Core/arch/SSE/MathFunctions.h +199 -0
- include/eigen3/Eigen/src/Core/arch/SSE/PacketMath.h +1505 -0
- include/eigen3/Eigen/src/Core/arch/SSE/TypeCasting.h +142 -0
- include/eigen3/Eigen/src/Core/arch/SVE/MathFunctions.h +44 -0
- include/eigen3/Eigen/src/Core/arch/SVE/PacketMath.h +752 -0
- include/eigen3/Eigen/src/Core/arch/SVE/TypeCasting.h +49 -0
- include/eigen3/Eigen/src/Core/arch/SYCL/InteropHeaders.h +232 -0
- include/eigen3/Eigen/src/Core/arch/SYCL/MathFunctions.h +301 -0
- include/eigen3/Eigen/src/Core/arch/SYCL/PacketMath.h +670 -0
- include/eigen3/Eigen/src/Core/arch/SYCL/SyclMemoryModel.h +694 -0
- include/eigen3/Eigen/src/Core/arch/SYCL/TypeCasting.h +85 -0
- include/eigen3/Eigen/src/Core/arch/ZVector/Complex.h +426 -0
- include/eigen3/Eigen/src/Core/arch/ZVector/MathFunctions.h +233 -0
- include/eigen3/Eigen/src/Core/arch/ZVector/PacketMath.h +1060 -0
- include/eigen3/Eigen/src/Core/functors/AssignmentFunctors.h +177 -0
- include/eigen3/Eigen/src/Core/functors/BinaryFunctors.h +541 -0
- include/eigen3/Eigen/src/Core/functors/NullaryFunctors.h +189 -0
- include/eigen3/Eigen/src/Core/functors/StlFunctors.h +166 -0
- include/eigen3/Eigen/src/Core/functors/TernaryFunctors.h +25 -0
- include/eigen3/Eigen/src/Core/functors/UnaryFunctors.h +1131 -0
- include/eigen3/Eigen/src/Core/products/GeneralBlockPanelKernel.h +2645 -0
- include/eigen3/Eigen/src/Core/products/GeneralMatrixMatrix.h +517 -0
- include/eigen3/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +317 -0
- include/eigen3/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +145 -0
- include/eigen3/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h +124 -0
- include/eigen3/Eigen/src/Core/products/GeneralMatrixVector.h +518 -0
- include/eigen3/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h +136 -0
- include/eigen3/Eigen/src/Core/products/Parallelizer.h +180 -0
- include/eigen3/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +544 -0
- include/eigen3/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h +295 -0
- include/eigen3/Eigen/src/Core/products/SelfadjointMatrixVector.h +262 -0
- include/eigen3/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h +118 -0
- include/eigen3/Eigen/src/Core/products/SelfadjointProduct.h +133 -0
- include/eigen3/Eigen/src/Core/products/SelfadjointRank2Update.h +94 -0
- include/eigen3/Eigen/src/Core/products/TriangularMatrixMatrix.h +472 -0
- include/eigen3/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h +317 -0
- include/eigen3/Eigen/src/Core/products/TriangularMatrixVector.h +350 -0
- include/eigen3/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h +255 -0
- include/eigen3/Eigen/src/Core/products/TriangularSolverMatrix.h +337 -0
- include/eigen3/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h +167 -0
- include/eigen3/Eigen/src/Core/products/TriangularSolverVector.h +148 -0
- include/eigen3/Eigen/src/Core/util/BlasUtil.h +583 -0
- include/eigen3/Eigen/src/Core/util/ConfigureVectorization.h +512 -0
- include/eigen3/Eigen/src/Core/util/Constants.h +563 -0
- include/eigen3/Eigen/src/Core/util/DisableStupidWarnings.h +106 -0
- include/eigen3/Eigen/src/Core/util/ForwardDeclarations.h +322 -0
- include/eigen3/Eigen/src/Core/util/IndexedViewHelper.h +186 -0
- include/eigen3/Eigen/src/Core/util/IntegralConstant.h +272 -0
- include/eigen3/Eigen/src/Core/util/MKL_support.h +137 -0
- include/eigen3/Eigen/src/Core/util/Macros.h +1464 -0
- include/eigen3/Eigen/src/Core/util/Memory.h +1163 -0
- include/eigen3/Eigen/src/Core/util/Meta.h +812 -0
- include/eigen3/Eigen/src/Core/util/NonMPL2.h +3 -0
- include/eigen3/Eigen/src/Core/util/ReenableStupidWarnings.h +31 -0
- include/eigen3/Eigen/src/Core/util/ReshapedHelper.h +51 -0
- include/eigen3/Eigen/src/Core/util/StaticAssert.h +221 -0
- include/eigen3/Eigen/src/Core/util/SymbolicIndex.h +293 -0
- include/eigen3/Eigen/src/Core/util/XprHelper.h +856 -0
- include/eigen3/Eigen/src/Eigenvalues/ComplexEigenSolver.h +346 -0
- include/eigen3/Eigen/src/Eigenvalues/ComplexSchur.h +462 -0
- include/eigen3/Eigen/src/Eigenvalues/ComplexSchur_LAPACKE.h +91 -0
- include/eigen3/Eigen/src/Eigenvalues/EigenSolver.h +622 -0
- include/eigen3/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h +418 -0
- include/eigen3/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +226 -0
- include/eigen3/Eigen/src/Eigenvalues/HessenbergDecomposition.h +374 -0
- include/eigen3/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +158 -0
- include/eigen3/Eigen/src/Eigenvalues/RealQZ.h +657 -0
- include/eigen3/Eigen/src/Eigenvalues/RealSchur.h +558 -0
- include/eigen3/Eigen/src/Eigenvalues/RealSchur_LAPACKE.h +77 -0
- include/eigen3/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +904 -0
- include/eigen3/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h +87 -0
- include/eigen3/Eigen/src/Eigenvalues/Tridiagonalization.h +561 -0
- include/eigen3/Eigen/src/Geometry/AlignedBox.h +486 -0
- include/eigen3/Eigen/src/Geometry/AngleAxis.h +247 -0
- include/eigen3/Eigen/src/Geometry/EulerAngles.h +114 -0
- include/eigen3/Eigen/src/Geometry/Homogeneous.h +501 -0
- include/eigen3/Eigen/src/Geometry/Hyperplane.h +282 -0
- include/eigen3/Eigen/src/Geometry/OrthoMethods.h +235 -0
- include/eigen3/Eigen/src/Geometry/ParametrizedLine.h +232 -0
- include/eigen3/Eigen/src/Geometry/Quaternion.h +870 -0
- include/eigen3/Eigen/src/Geometry/Rotation2D.h +199 -0
- include/eigen3/Eigen/src/Geometry/RotationBase.h +206 -0
- include/eigen3/Eigen/src/Geometry/Scaling.h +188 -0
- include/eigen3/Eigen/src/Geometry/Transform.h +1563 -0
- include/eigen3/Eigen/src/Geometry/Translation.h +202 -0
- include/eigen3/Eigen/src/Geometry/Umeyama.h +166 -0
- include/eigen3/Eigen/src/Geometry/arch/Geometry_SIMD.h +168 -0
- include/eigen3/Eigen/src/Householder/BlockHouseholder.h +110 -0
- include/eigen3/Eigen/src/Householder/Householder.h +176 -0
- include/eigen3/Eigen/src/Householder/HouseholderSequence.h +545 -0
- include/eigen3/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +226 -0
- include/eigen3/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +212 -0
- include/eigen3/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +229 -0
- include/eigen3/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +394 -0
- include/eigen3/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +453 -0
- include/eigen3/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +444 -0
- include/eigen3/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +198 -0
- include/eigen3/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +117 -0
- include/eigen3/Eigen/src/Jacobi/Jacobi.h +483 -0
- include/eigen3/Eigen/src/KLUSupport/KLUSupport.h +358 -0
- include/eigen3/Eigen/src/LU/Determinant.h +117 -0
- include/eigen3/Eigen/src/LU/FullPivLU.h +877 -0
- include/eigen3/Eigen/src/LU/InverseImpl.h +432 -0
- include/eigen3/Eigen/src/LU/PartialPivLU.h +624 -0
- include/eigen3/Eigen/src/LU/PartialPivLU_LAPACKE.h +83 -0
- include/eigen3/Eigen/src/LU/arch/InverseSize4.h +351 -0
- include/eigen3/Eigen/src/MetisSupport/MetisSupport.h +137 -0
- include/eigen3/Eigen/src/OrderingMethods/Amd.h +435 -0
- include/eigen3/Eigen/src/OrderingMethods/Eigen_Colamd.h +1863 -0
- include/eigen3/Eigen/src/OrderingMethods/Ordering.h +153 -0
- include/eigen3/Eigen/src/PaStiXSupport/PaStiXSupport.h +678 -0
- include/eigen3/Eigen/src/PardisoSupport/PardisoSupport.h +545 -0
- include/eigen3/Eigen/src/QR/ColPivHouseholderQR.h +674 -0
- include/eigen3/Eigen/src/QR/ColPivHouseholderQR_LAPACKE.h +97 -0
- include/eigen3/Eigen/src/QR/CompleteOrthogonalDecomposition.h +635 -0
- include/eigen3/Eigen/src/QR/FullPivHouseholderQR.h +713 -0
- include/eigen3/Eigen/src/QR/HouseholderQR.h +434 -0
- include/eigen3/Eigen/src/QR/HouseholderQR_LAPACKE.h +68 -0
- include/eigen3/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +335 -0
- include/eigen3/Eigen/src/SVD/BDCSVD.h +1366 -0
- include/eigen3/Eigen/src/SVD/JacobiSVD.h +812 -0
- include/eigen3/Eigen/src/SVD/JacobiSVD_LAPACKE.h +91 -0
- include/eigen3/Eigen/src/SVD/SVDBase.h +376 -0
- include/eigen3/Eigen/src/SVD/UpperBidiagonalization.h +414 -0
- include/eigen3/Eigen/src/SparseCholesky/SimplicialCholesky.h +697 -0
- include/eigen3/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +174 -0
- include/eigen3/Eigen/src/SparseCore/AmbiVector.h +378 -0
- include/eigen3/Eigen/src/SparseCore/CompressedStorage.h +274 -0
- include/eigen3/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +352 -0
- include/eigen3/Eigen/src/SparseCore/MappedSparseMatrix.h +67 -0
- include/eigen3/Eigen/src/SparseCore/SparseAssign.h +270 -0
- include/eigen3/Eigen/src/SparseCore/SparseBlock.h +571 -0
- include/eigen3/Eigen/src/SparseCore/SparseColEtree.h +206 -0
- include/eigen3/Eigen/src/SparseCore/SparseCompressedBase.h +370 -0
- include/eigen3/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +722 -0
- include/eigen3/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +150 -0
- include/eigen3/Eigen/src/SparseCore/SparseDenseProduct.h +342 -0
- include/eigen3/Eigen/src/SparseCore/SparseDiagonalProduct.h +138 -0
- include/eigen3/Eigen/src/SparseCore/SparseDot.h +98 -0
- include/eigen3/Eigen/src/SparseCore/SparseFuzzy.h +29 -0
- include/eigen3/Eigen/src/SparseCore/SparseMap.h +305 -0
- include/eigen3/Eigen/src/SparseCore/SparseMatrix.h +1518 -0
- include/eigen3/Eigen/src/SparseCore/SparseMatrixBase.h +398 -0
- include/eigen3/Eigen/src/SparseCore/SparsePermutation.h +178 -0
- include/eigen3/Eigen/src/SparseCore/SparseProduct.h +181 -0
- include/eigen3/Eigen/src/SparseCore/SparseRedux.h +49 -0
- include/eigen3/Eigen/src/SparseCore/SparseRef.h +397 -0
- include/eigen3/Eigen/src/SparseCore/SparseSelfAdjointView.h +659 -0
- include/eigen3/Eigen/src/SparseCore/SparseSolverBase.h +124 -0
- include/eigen3/Eigen/src/SparseCore/SparseSparseProductWithPruning.h +198 -0
- include/eigen3/Eigen/src/SparseCore/SparseTranspose.h +92 -0
- include/eigen3/Eigen/src/SparseCore/SparseTriangularView.h +189 -0
- include/eigen3/Eigen/src/SparseCore/SparseUtil.h +186 -0
- include/eigen3/Eigen/src/SparseCore/SparseVector.h +478 -0
- include/eigen3/Eigen/src/SparseCore/SparseView.h +254 -0
- include/eigen3/Eigen/src/SparseCore/TriangularSolver.h +315 -0
- include/eigen3/Eigen/src/SparseLU/SparseLU.h +923 -0
- include/eigen3/Eigen/src/SparseLU/SparseLUImpl.h +66 -0
- include/eigen3/Eigen/src/SparseLU/SparseLU_Memory.h +226 -0
- include/eigen3/Eigen/src/SparseLU/SparseLU_Structs.h +110 -0
- include/eigen3/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +375 -0
- include/eigen3/Eigen/src/SparseLU/SparseLU_Utils.h +80 -0
- include/eigen3/Eigen/src/SparseLU/SparseLU_column_bmod.h +181 -0
- include/eigen3/Eigen/src/SparseLU/SparseLU_column_dfs.h +179 -0
- include/eigen3/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h +107 -0
- include/eigen3/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +280 -0
- include/eigen3/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h +126 -0
- include/eigen3/Eigen/src/SparseLU/SparseLU_kernel_bmod.h +130 -0
- include/eigen3/Eigen/src/SparseLU/SparseLU_panel_bmod.h +223 -0
- include/eigen3/Eigen/src/SparseLU/SparseLU_panel_dfs.h +258 -0
- include/eigen3/Eigen/src/SparseLU/SparseLU_pivotL.h +137 -0
- include/eigen3/Eigen/src/SparseLU/SparseLU_pruneL.h +136 -0
- include/eigen3/Eigen/src/SparseLU/SparseLU_relax_snode.h +83 -0
- include/eigen3/Eigen/src/SparseQR/SparseQR.h +758 -0
- include/eigen3/Eigen/src/StlSupport/StdDeque.h +116 -0
- include/eigen3/Eigen/src/StlSupport/StdList.h +106 -0
- include/eigen3/Eigen/src/StlSupport/StdVector.h +131 -0
- include/eigen3/Eigen/src/StlSupport/details.h +84 -0
- include/eigen3/Eigen/src/SuperLUSupport/SuperLUSupport.h +1025 -0
- include/eigen3/Eigen/src/UmfPackSupport/UmfPackSupport.h +642 -0
- include/eigen3/Eigen/src/misc/Image.h +82 -0
- include/eigen3/Eigen/src/misc/Kernel.h +79 -0
- include/eigen3/Eigen/src/misc/RealSvd2x2.h +55 -0
- include/eigen3/Eigen/src/misc/blas.h +440 -0
- include/eigen3/Eigen/src/misc/lapack.h +152 -0
- include/eigen3/Eigen/src/misc/lapacke.h +16292 -0
- include/eigen3/Eigen/src/misc/lapacke_mangling.h +17 -0
- include/eigen3/Eigen/src/plugins/ArrayCwiseBinaryOps.h +358 -0
- include/eigen3/Eigen/src/plugins/ArrayCwiseUnaryOps.h +696 -0
- include/eigen3/Eigen/src/plugins/BlockMethods.h +1442 -0
- include/eigen3/Eigen/src/plugins/CommonCwiseBinaryOps.h +115 -0
- include/eigen3/Eigen/src/plugins/CommonCwiseUnaryOps.h +177 -0
- include/eigen3/Eigen/src/plugins/IndexedViewMethods.h +262 -0
- include/eigen3/Eigen/src/plugins/MatrixCwiseBinaryOps.h +152 -0
- include/eigen3/Eigen/src/plugins/MatrixCwiseUnaryOps.h +95 -0
- include/eigen3/Eigen/src/plugins/ReshapedMethods.h +149 -0
- include/eigen3/signature_of_eigen3_matrix_library +1 -0
- include/eigen3/unsupported/Eigen/AdolcForward +159 -0
- include/eigen3/unsupported/Eigen/AlignedVector3 +234 -0
- include/eigen3/unsupported/Eigen/ArpackSupport +30 -0
- include/eigen3/unsupported/Eigen/AutoDiff +46 -0
- include/eigen3/unsupported/Eigen/BVH +95 -0
- include/eigen3/unsupported/Eigen/CXX11/Tensor +137 -0
- include/eigen3/unsupported/Eigen/CXX11/TensorSymmetry +42 -0
- include/eigen3/unsupported/Eigen/CXX11/ThreadPool +74 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/Tensor.h +554 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorArgMax.h +329 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h +247 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h +1176 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h +1559 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h +1093 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h +518 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h +377 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h +1023 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorContractionBlocking.h +73 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorContractionCuda.h +6 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorContractionGpu.h +1413 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorContractionMapper.h +575 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorContractionSycl.h +1650 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h +1679 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h +456 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h +1132 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorConvolutionSycl.h +544 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorCostModel.h +214 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h +347 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h +137 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h +6 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h +104 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceGpu.h +389 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h +1048 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h +409 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDimensionList.h +236 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h +490 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h +236 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h +983 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h +703 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h +388 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h +669 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorFixedSize.h +379 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h +237 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h +191 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h +488 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h +302 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorGlobalFunctions.h +33 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorGpuHipCudaDefines.h +99 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorGpuHipCudaUndefines.h +44 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h +79 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h +603 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h +738 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorInflation.h +247 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorInitializer.h +82 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h +263 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorLayoutSwap.h +216 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorMacros.h +98 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h +327 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h +311 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h +1102 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h +708 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorPatch.h +291 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorRandom.h +322 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h +998 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h +6 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorReductionGpu.h +966 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorReductionSycl.h +582 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorRef.h +454 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h +465 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h +528 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorScanSycl.h +513 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h +471 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorStorage.h +161 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorStriding.h +346 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorTrace.h +303 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h +264 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorUInt128.h +249 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorVolumePatch.h +629 -0
- include/eigen3/unsupported/Eigen/CXX11/src/TensorSymmetry/DynamicSymmetry.h +293 -0
- include/eigen3/unsupported/Eigen/CXX11/src/TensorSymmetry/StaticSymmetry.h +236 -0
- include/eigen3/unsupported/Eigen/CXX11/src/TensorSymmetry/Symmetry.h +338 -0
- include/eigen3/unsupported/Eigen/CXX11/src/TensorSymmetry/util/TemplateGroupTheory.h +669 -0
- include/eigen3/unsupported/Eigen/CXX11/src/ThreadPool/Barrier.h +67 -0
- include/eigen3/unsupported/Eigen/CXX11/src/ThreadPool/EventCount.h +249 -0
- include/eigen3/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h +486 -0
- include/eigen3/unsupported/Eigen/CXX11/src/ThreadPool/RunQueue.h +236 -0
- include/eigen3/unsupported/Eigen/CXX11/src/ThreadPool/ThreadCancel.h +23 -0
- include/eigen3/unsupported/Eigen/CXX11/src/ThreadPool/ThreadEnvironment.h +40 -0
- include/eigen3/unsupported/Eigen/CXX11/src/ThreadPool/ThreadLocal.h +301 -0
- include/eigen3/unsupported/Eigen/CXX11/src/ThreadPool/ThreadPoolInterface.h +48 -0
- include/eigen3/unsupported/Eigen/CXX11/src/ThreadPool/ThreadYield.h +20 -0
- include/eigen3/unsupported/Eigen/CXX11/src/util/CXX11Meta.h +537 -0
- include/eigen3/unsupported/Eigen/CXX11/src/util/CXX11Workarounds.h +88 -0
- include/eigen3/unsupported/Eigen/CXX11/src/util/EmulateArray.h +261 -0
- include/eigen3/unsupported/Eigen/CXX11/src/util/MaxSizeVector.h +158 -0
- include/eigen3/unsupported/Eigen/EulerAngles +43 -0
- include/eigen3/unsupported/Eigen/FFT +419 -0
- include/eigen3/unsupported/Eigen/IterativeSolvers +51 -0
- include/eigen3/unsupported/Eigen/KroneckerProduct +36 -0
- include/eigen3/unsupported/Eigen/LevenbergMarquardt +49 -0
- include/eigen3/unsupported/Eigen/MPRealSupport +213 -0
- include/eigen3/unsupported/Eigen/MatrixFunctions +504 -0
- include/eigen3/unsupported/Eigen/MoreVectorization +24 -0
- include/eigen3/unsupported/Eigen/NonLinearOptimization +140 -0
- include/eigen3/unsupported/Eigen/NumericalDiff +56 -0
- include/eigen3/unsupported/Eigen/OpenGLSupport +322 -0
- include/eigen3/unsupported/Eigen/Polynomials +137 -0
- include/eigen3/unsupported/Eigen/Skyline +39 -0
- include/eigen3/unsupported/Eigen/SparseExtra +54 -0
- include/eigen3/unsupported/Eigen/SpecialFunctions +103 -0
- include/eigen3/unsupported/Eigen/Splines +35 -0
- include/eigen3/unsupported/Eigen/src/AutoDiff/AutoDiffJacobian.h +108 -0
- include/eigen3/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h +730 -0
- include/eigen3/unsupported/Eigen/src/AutoDiff/AutoDiffVector.h +220 -0
- include/eigen3/unsupported/Eigen/src/BVH/BVAlgorithms.h +293 -0
- include/eigen3/unsupported/Eigen/src/BVH/KdBVH.h +223 -0
- include/eigen3/unsupported/Eigen/src/Eigenvalues/ArpackSelfAdjointEigenSolver.h +790 -0
- include/eigen3/unsupported/Eigen/src/EulerAngles/EulerAngles.h +355 -0
- include/eigen3/unsupported/Eigen/src/EulerAngles/EulerSystem.h +305 -0
- include/eigen3/unsupported/Eigen/src/FFT/ei_fftw_impl.h +261 -0
- include/eigen3/unsupported/Eigen/src/FFT/ei_kissfft_impl.h +449 -0
- include/eigen3/unsupported/Eigen/src/IterativeSolvers/ConstrainedConjGrad.h +187 -0
- include/eigen3/unsupported/Eigen/src/IterativeSolvers/DGMRES.h +511 -0
- include/eigen3/unsupported/Eigen/src/IterativeSolvers/GMRES.h +335 -0
- include/eigen3/unsupported/Eigen/src/IterativeSolvers/IDRS.h +436 -0
- include/eigen3/unsupported/Eigen/src/IterativeSolvers/IncompleteLU.h +90 -0
- include/eigen3/unsupported/Eigen/src/IterativeSolvers/IterationController.h +154 -0
- include/eigen3/unsupported/Eigen/src/IterativeSolvers/MINRES.h +267 -0
- include/eigen3/unsupported/Eigen/src/IterativeSolvers/Scaling.h +193 -0
- include/eigen3/unsupported/Eigen/src/KroneckerProduct/KroneckerTensorProduct.h +305 -0
- include/eigen3/unsupported/Eigen/src/LevenbergMarquardt/LMcovar.h +84 -0
- include/eigen3/unsupported/Eigen/src/LevenbergMarquardt/LMonestep.h +202 -0
- include/eigen3/unsupported/Eigen/src/LevenbergMarquardt/LMpar.h +160 -0
- include/eigen3/unsupported/Eigen/src/LevenbergMarquardt/LMqrsolv.h +188 -0
- include/eigen3/unsupported/Eigen/src/LevenbergMarquardt/LevenbergMarquardt.h +396 -0
- include/eigen3/unsupported/Eigen/src/MatrixFunctions/MatrixExponential.h +441 -0
- include/eigen3/unsupported/Eigen/src/MatrixFunctions/MatrixFunction.h +569 -0
- include/eigen3/unsupported/Eigen/src/MatrixFunctions/MatrixLogarithm.h +373 -0
- include/eigen3/unsupported/Eigen/src/MatrixFunctions/MatrixPower.h +705 -0
- include/eigen3/unsupported/Eigen/src/MatrixFunctions/MatrixSquareRoot.h +368 -0
- include/eigen3/unsupported/Eigen/src/MatrixFunctions/StemFunction.h +117 -0
- include/eigen3/unsupported/Eigen/src/MoreVectorization/MathFunctions.h +95 -0
- include/eigen3/unsupported/Eigen/src/NonLinearOptimization/HybridNonLinearSolver.h +601 -0
- include/eigen3/unsupported/Eigen/src/NonLinearOptimization/LevenbergMarquardt.h +657 -0
- include/eigen3/unsupported/Eigen/src/NonLinearOptimization/chkder.h +66 -0
- include/eigen3/unsupported/Eigen/src/NonLinearOptimization/covar.h +70 -0
- include/eigen3/unsupported/Eigen/src/NonLinearOptimization/dogleg.h +107 -0
- include/eigen3/unsupported/Eigen/src/NonLinearOptimization/fdjac1.h +79 -0
- include/eigen3/unsupported/Eigen/src/NonLinearOptimization/lmpar.h +298 -0
- include/eigen3/unsupported/Eigen/src/NonLinearOptimization/qrsolv.h +91 -0
- include/eigen3/unsupported/Eigen/src/NonLinearOptimization/r1mpyq.h +30 -0
- include/eigen3/unsupported/Eigen/src/NonLinearOptimization/r1updt.h +99 -0
- include/eigen3/unsupported/Eigen/src/NonLinearOptimization/rwupdt.h +49 -0
- include/eigen3/unsupported/Eigen/src/NumericalDiff/NumericalDiff.h +130 -0
- include/eigen3/unsupported/Eigen/src/Polynomials/Companion.h +280 -0
- include/eigen3/unsupported/Eigen/src/Polynomials/PolynomialSolver.h +428 -0
- include/eigen3/unsupported/Eigen/src/Polynomials/PolynomialUtils.h +143 -0
- include/eigen3/unsupported/Eigen/src/Skyline/SkylineInplaceLU.h +352 -0
- include/eigen3/unsupported/Eigen/src/Skyline/SkylineMatrix.h +862 -0
- include/eigen3/unsupported/Eigen/src/Skyline/SkylineMatrixBase.h +212 -0
- include/eigen3/unsupported/Eigen/src/Skyline/SkylineProduct.h +295 -0
- include/eigen3/unsupported/Eigen/src/Skyline/SkylineStorage.h +259 -0
- include/eigen3/unsupported/Eigen/src/Skyline/SkylineUtil.h +89 -0
- include/eigen3/unsupported/Eigen/src/SparseExtra/BlockOfDynamicSparseMatrix.h +122 -0
- include/eigen3/unsupported/Eigen/src/SparseExtra/BlockSparseMatrix.h +1079 -0
- include/eigen3/unsupported/Eigen/src/SparseExtra/DynamicSparseMatrix.h +404 -0
- include/eigen3/unsupported/Eigen/src/SparseExtra/MarketIO.h +282 -0
- include/eigen3/unsupported/Eigen/src/SparseExtra/MatrixMarketIterator.h +247 -0
- include/eigen3/unsupported/Eigen/src/SparseExtra/RandomSetter.h +349 -0
- include/eigen3/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsArrayAPI.h +286 -0
- include/eigen3/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsBFloat16.h +68 -0
- include/eigen3/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsFunctors.h +357 -0
- include/eigen3/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsHalf.h +66 -0
- include/eigen3/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsImpl.h +1959 -0
- include/eigen3/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsPacketMath.h +118 -0
- include/eigen3/unsupported/Eigen/src/SpecialFunctions/HipVectorCompatibility.h +67 -0
- include/eigen3/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsArrayAPI.h +167 -0
- include/eigen3/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsBFloat16.h +58 -0
- include/eigen3/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsFunctors.h +330 -0
- include/eigen3/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsHalf.h +58 -0
- include/eigen3/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h +2045 -0
- include/eigen3/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsPacketMath.h +79 -0
- include/eigen3/unsupported/Eigen/src/SpecialFunctions/arch/AVX/BesselFunctions.h +46 -0
- include/eigen3/unsupported/Eigen/src/SpecialFunctions/arch/AVX/SpecialFunctions.h +16 -0
- include/eigen3/unsupported/Eigen/src/SpecialFunctions/arch/AVX512/BesselFunctions.h +46 -0
- include/eigen3/unsupported/Eigen/src/SpecialFunctions/arch/AVX512/SpecialFunctions.h +16 -0
- include/eigen3/unsupported/Eigen/src/SpecialFunctions/arch/GPU/SpecialFunctions.h +369 -0
- include/eigen3/unsupported/Eigen/src/SpecialFunctions/arch/NEON/BesselFunctions.h +54 -0
- include/eigen3/unsupported/Eigen/src/SpecialFunctions/arch/NEON/SpecialFunctions.h +34 -0
- include/eigen3/unsupported/Eigen/src/Splines/Spline.h +507 -0
- include/eigen3/unsupported/Eigen/src/Splines/SplineFitting.h +431 -0
- include/eigen3/unsupported/Eigen/src/Splines/SplineFwd.h +93 -0
- pylibsparseir/__init__.py +31 -0
- pylibsparseir/clean_build_artifacts.py +0 -1
- pylibsparseir/core.py +178 -62
- pylibsparseir/libsparseir.dylib +0 -0
- pylibsparseir-0.6.0.dist-info/METADATA +213 -0
- pylibsparseir-0.6.0.dist-info/RECORD +545 -0
- {pylibsparseir-0.1.0.dist-info → pylibsparseir-0.6.0.dist-info}/WHEEL +1 -1
- pylibsparseir-0.6.0.dist-info/licenses/LICENSE +21 -0
- share/eigen3/cmake/Eigen3Config.cmake +37 -0
- share/eigen3/cmake/Eigen3ConfigVersion.cmake +65 -0
- share/eigen3/cmake/Eigen3Targets.cmake +106 -0
- share/eigen3/cmake/UseEigen3.cmake +6 -0
- share/pkgconfig/eigen3.pc +9 -0
- pylibsparseir/libsparseir.0.4.2.dylib +0 -0
- pylibsparseir/libsparseir.0.dylib +0 -0
- pylibsparseir-0.1.0.dist-info/METADATA +0 -130
- pylibsparseir-0.1.0.dist-info/RECORD +0 -12
- pylibsparseir-0.1.0.dist-info/entry_points.txt +0 -2
- pylibsparseir-0.1.0.dist-info/top_level.txt +0 -1
|
@@ -0,0 +1,582 @@
|
|
|
1
|
+
// This file is part of Eigen, a lightweight C++ template library
|
|
2
|
+
// for linear algebra.
|
|
3
|
+
//
|
|
4
|
+
// Mehdi Goli Codeplay Software Ltd.
|
|
5
|
+
// Ralph Potter Codeplay Software Ltd.
|
|
6
|
+
// Luke Iwanski Codeplay Software Ltd.
|
|
7
|
+
// Contact: <eigen@codeplay.com>
|
|
8
|
+
//
|
|
9
|
+
// This Source Code Form is subject to the terms of the Mozilla
|
|
10
|
+
// Public License v. 2.0. If a copy of the MPL was not distributed
|
|
11
|
+
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
|
12
|
+
|
|
13
|
+
/*****************************************************************
|
|
14
|
+
* TensorReductionSycl.h
|
|
15
|
+
*
|
|
16
|
+
* \brief:
|
|
17
|
+
* This is the specialization of the reduction operation. Two phase reduction approach
|
|
18
|
+
* is used since the GPU does not have Global Synchronization for global memory among
|
|
19
|
+
* different work-group/thread block. To solve the problem, we need to create two kernels
|
|
20
|
+
* to reduce the data, where the first kernel reduce the data locally and each local
|
|
21
|
+
* workgroup/thread-block save the input data into global memory. In the second phase (global reduction)
|
|
22
|
+
* one work-group uses one work-group/thread-block to reduces the intermediate data into one single element.
|
|
23
|
+
* Here is an NVIDIA presentation explaining the optimized two phase reduction algorithm on GPU:
|
|
24
|
+
* https://developer.download.nvidia.com/assets/cuda/files/reduction.pdf
|
|
25
|
+
*
|
|
26
|
+
*****************************************************************/
|
|
27
|
+
|
|
28
|
+
#ifndef UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSOR_REDUCTION_SYCL_HPP
|
|
29
|
+
#define UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSOR_REDUCTION_SYCL_HPP
|
|
30
|
+
namespace Eigen {
|
|
31
|
+
namespace TensorSycl {
|
|
32
|
+
namespace internal {
|
|
33
|
+
|
|
34
|
+
template <typename Op, typename CoeffReturnType, typename Index, bool Vectorizable>
|
|
35
|
+
struct OpDefiner {
|
|
36
|
+
typedef typename Vectorise<CoeffReturnType, Eigen::SyclDevice, Vectorizable>::PacketReturnType PacketReturnType;
|
|
37
|
+
typedef Op type;
|
|
38
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE type get_op(Op &op) { return op; }
|
|
39
|
+
|
|
40
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType finalise_op(const PacketReturnType &accumulator,
|
|
41
|
+
const Index &) {
|
|
42
|
+
return accumulator;
|
|
43
|
+
}
|
|
44
|
+
};
|
|
45
|
+
|
|
46
|
+
template <typename CoeffReturnType, typename Index>
|
|
47
|
+
struct OpDefiner<Eigen::internal::MeanReducer<CoeffReturnType>, CoeffReturnType, Index, false> {
|
|
48
|
+
typedef Eigen::internal::SumReducer<CoeffReturnType> type;
|
|
49
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE type get_op(Eigen::internal::MeanReducer<CoeffReturnType> &) {
|
|
50
|
+
return type();
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType finalise_op(const CoeffReturnType &accumulator,
|
|
54
|
+
const Index &scale) {
|
|
55
|
+
::Eigen::internal::scalar_quotient_op<CoeffReturnType> quotient_op;
|
|
56
|
+
return quotient_op(accumulator, CoeffReturnType(scale));
|
|
57
|
+
}
|
|
58
|
+
};
|
|
59
|
+
|
|
60
|
+
template <typename CoeffReturnType, typename Index>
|
|
61
|
+
struct OpDefiner<Eigen::internal::MeanReducer<CoeffReturnType>, CoeffReturnType, Index, true> {
|
|
62
|
+
typedef typename Vectorise<CoeffReturnType, Eigen::SyclDevice, true>::PacketReturnType PacketReturnType;
|
|
63
|
+
typedef Eigen::internal::SumReducer<CoeffReturnType> type;
|
|
64
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE type get_op(Eigen::internal::MeanReducer<CoeffReturnType> &) {
|
|
65
|
+
return type();
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType finalise_op(const PacketReturnType &accumulator,
|
|
69
|
+
const Index &scale) {
|
|
70
|
+
return ::Eigen::internal::pdiv(accumulator, ::Eigen::internal::pset1<PacketReturnType>(CoeffReturnType(scale)));
|
|
71
|
+
}
|
|
72
|
+
};
|
|
73
|
+
|
|
74
|
+
template <typename CoeffReturnType, typename OpType, typename InputAccessor, typename OutputAccessor, typename Index,
|
|
75
|
+
Index local_range>
|
|
76
|
+
struct SecondStepFullReducer {
|
|
77
|
+
typedef cl::sycl::accessor<CoeffReturnType, 1, cl::sycl::access::mode::read_write, cl::sycl::access::target::local>
|
|
78
|
+
LocalAccessor;
|
|
79
|
+
typedef OpDefiner<OpType, CoeffReturnType, Index, true> OpDef;
|
|
80
|
+
typedef typename OpDef::type Op;
|
|
81
|
+
LocalAccessor scratch;
|
|
82
|
+
InputAccessor aI;
|
|
83
|
+
OutputAccessor outAcc;
|
|
84
|
+
Op op;
|
|
85
|
+
SecondStepFullReducer(LocalAccessor scratch_, InputAccessor aI_, OutputAccessor outAcc_, OpType op_)
|
|
86
|
+
: scratch(scratch_), aI(aI_), outAcc(outAcc_), op(OpDef::get_op(op_)) {}
|
|
87
|
+
|
|
88
|
+
void operator()(cl::sycl::nd_item<1> itemID) {
|
|
89
|
+
// Our empirical research shows that the best performance will be achieved
|
|
90
|
+
// when there is only one element per thread to reduce in the second step.
|
|
91
|
+
// in this step the second step reduction time is almost negligible.
|
|
92
|
+
// Hence, in the second step of reduction the input size is fixed to the
|
|
93
|
+
// local size, thus, there is only one element read per thread. The
|
|
94
|
+
// algorithm must be changed if the number of reduce per thread in the
|
|
95
|
+
// second step is greater than 1. Otherwise, the result will be wrong.
|
|
96
|
+
const Index localid = itemID.get_local_id(0);
|
|
97
|
+
auto aInPtr = aI.get_pointer() + localid;
|
|
98
|
+
auto aOutPtr = outAcc.get_pointer();
|
|
99
|
+
CoeffReturnType *scratchptr = scratch.get_pointer();
|
|
100
|
+
CoeffReturnType accumulator = *aInPtr;
|
|
101
|
+
|
|
102
|
+
scratchptr[localid] = op.finalize(accumulator);
|
|
103
|
+
for (Index offset = itemID.get_local_range(0) / 2; offset > 0; offset /= 2) {
|
|
104
|
+
itemID.barrier(cl::sycl::access::fence_space::local_space);
|
|
105
|
+
if (localid < offset) {
|
|
106
|
+
op.reduce(scratchptr[localid + offset], &accumulator);
|
|
107
|
+
scratchptr[localid] = op.finalize(accumulator);
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
if (localid == 0) *aOutPtr = op.finalize(accumulator);
|
|
111
|
+
}
|
|
112
|
+
};
|
|
113
|
+
|
|
114
|
+
// Full reduction first phase. In this version the vectorization is true and the reduction accept
|
|
115
|
+
// any generic reducerOp e.g( max, min, sum, mean, iamax, iamin, etc ).
|
|
116
|
+
template <typename Evaluator, typename OpType, typename Evaluator::Index local_range>
|
|
117
|
+
class FullReductionKernelFunctor {
|
|
118
|
+
public:
|
|
119
|
+
typedef typename Evaluator::CoeffReturnType CoeffReturnType;
|
|
120
|
+
typedef typename Evaluator::Index Index;
|
|
121
|
+
typedef OpDefiner<OpType, typename Evaluator::CoeffReturnType, Index,
|
|
122
|
+
(Evaluator::ReducerTraits::PacketAccess & Evaluator::InputPacketAccess)>
|
|
123
|
+
OpDef;
|
|
124
|
+
|
|
125
|
+
typedef typename OpDef::type Op;
|
|
126
|
+
typedef typename Evaluator::EvaluatorPointerType EvaluatorPointerType;
|
|
127
|
+
typedef typename Evaluator::PacketReturnType PacketReturnType;
|
|
128
|
+
typedef
|
|
129
|
+
typename ::Eigen::internal::conditional<(Evaluator::ReducerTraits::PacketAccess & Evaluator::InputPacketAccess),
|
|
130
|
+
PacketReturnType, CoeffReturnType>::type OutType;
|
|
131
|
+
typedef cl::sycl::accessor<OutType, 1, cl::sycl::access::mode::read_write, cl::sycl::access::target::local>
|
|
132
|
+
LocalAccessor;
|
|
133
|
+
LocalAccessor scratch;
|
|
134
|
+
Evaluator evaluator;
|
|
135
|
+
EvaluatorPointerType final_output;
|
|
136
|
+
Index rng;
|
|
137
|
+
Op op;
|
|
138
|
+
|
|
139
|
+
FullReductionKernelFunctor(LocalAccessor scratch_, Evaluator evaluator_, EvaluatorPointerType final_output_,
|
|
140
|
+
Index rng_, OpType op_)
|
|
141
|
+
: scratch(scratch_), evaluator(evaluator_), final_output(final_output_), rng(rng_), op(OpDef::get_op(op_)) {}
|
|
142
|
+
|
|
143
|
+
void operator()(cl::sycl::nd_item<1> itemID) { compute_reduction(itemID); }
|
|
144
|
+
|
|
145
|
+
template <bool Vect = (Evaluator::ReducerTraits::PacketAccess & Evaluator::InputPacketAccess)>
|
|
146
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename ::Eigen::internal::enable_if<Vect>::type compute_reduction(
|
|
147
|
+
const cl::sycl::nd_item<1> &itemID) {
|
|
148
|
+
auto output_ptr = final_output.get_pointer();
|
|
149
|
+
Index VectorizedRange = (rng / Evaluator::PacketSize) * Evaluator::PacketSize;
|
|
150
|
+
Index globalid = itemID.get_global_id(0);
|
|
151
|
+
Index localid = itemID.get_local_id(0);
|
|
152
|
+
Index step = Evaluator::PacketSize * itemID.get_global_range(0);
|
|
153
|
+
Index start = Evaluator::PacketSize * globalid;
|
|
154
|
+
// vectorizable parts
|
|
155
|
+
PacketReturnType packetAccumulator = op.template initializePacket<PacketReturnType>();
|
|
156
|
+
for (Index i = start; i < VectorizedRange; i += step) {
|
|
157
|
+
op.template reducePacket<PacketReturnType>(evaluator.impl().template packet<Unaligned>(i), &packetAccumulator);
|
|
158
|
+
}
|
|
159
|
+
globalid += VectorizedRange;
|
|
160
|
+
// non vectorizable parts
|
|
161
|
+
for (Index i = globalid; i < rng; i += itemID.get_global_range(0)) {
|
|
162
|
+
op.template reducePacket<PacketReturnType>(
|
|
163
|
+
::Eigen::TensorSycl::internal::PacketWrapper<PacketReturnType, Evaluator::PacketSize>::convert_to_packet_type(
|
|
164
|
+
evaluator.impl().coeff(i), op.initialize()),
|
|
165
|
+
&packetAccumulator);
|
|
166
|
+
}
|
|
167
|
+
scratch[localid] = packetAccumulator =
|
|
168
|
+
OpDef::finalise_op(op.template finalizePacket<PacketReturnType>(packetAccumulator), rng);
|
|
169
|
+
// reduction parts // Local size is always power of 2
|
|
170
|
+
EIGEN_UNROLL_LOOP
|
|
171
|
+
for (Index offset = local_range / 2; offset > 0; offset /= 2) {
|
|
172
|
+
itemID.barrier(cl::sycl::access::fence_space::local_space);
|
|
173
|
+
if (localid < offset) {
|
|
174
|
+
op.template reducePacket<PacketReturnType>(scratch[localid + offset], &packetAccumulator);
|
|
175
|
+
scratch[localid] = op.template finalizePacket<PacketReturnType>(packetAccumulator);
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
if (localid == 0) {
|
|
179
|
+
output_ptr[itemID.get_group(0)] =
|
|
180
|
+
op.finalizeBoth(op.initialize(), op.template finalizePacket<PacketReturnType>(packetAccumulator));
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
template <bool Vect = (Evaluator::ReducerTraits::PacketAccess & Evaluator::InputPacketAccess)>
|
|
185
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename ::Eigen::internal::enable_if<!Vect>::type compute_reduction(
|
|
186
|
+
const cl::sycl::nd_item<1> &itemID) {
|
|
187
|
+
auto output_ptr = final_output.get_pointer();
|
|
188
|
+
Index globalid = itemID.get_global_id(0);
|
|
189
|
+
Index localid = itemID.get_local_id(0);
|
|
190
|
+
// vectorizable parts
|
|
191
|
+
CoeffReturnType accumulator = op.initialize();
|
|
192
|
+
// non vectorizable parts
|
|
193
|
+
for (Index i = globalid; i < rng; i += itemID.get_global_range(0)) {
|
|
194
|
+
op.reduce(evaluator.impl().coeff(i), &accumulator);
|
|
195
|
+
}
|
|
196
|
+
scratch[localid] = accumulator = OpDef::finalise_op(op.finalize(accumulator), rng);
|
|
197
|
+
|
|
198
|
+
// reduction parts. the local size is always power of 2
|
|
199
|
+
EIGEN_UNROLL_LOOP
|
|
200
|
+
for (Index offset = local_range / 2; offset > 0; offset /= 2) {
|
|
201
|
+
itemID.barrier(cl::sycl::access::fence_space::local_space);
|
|
202
|
+
if (localid < offset) {
|
|
203
|
+
op.reduce(scratch[localid + offset], &accumulator);
|
|
204
|
+
scratch[localid] = op.finalize(accumulator);
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
if (localid == 0) {
|
|
208
|
+
output_ptr[itemID.get_group(0)] = op.finalize(accumulator);
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
};
|
|
212
|
+
|
|
213
|
+
template <typename Evaluator, typename OpType>
|
|
214
|
+
class GenericNondeterministicReducer {
|
|
215
|
+
public:
|
|
216
|
+
typedef typename Evaluator::CoeffReturnType CoeffReturnType;
|
|
217
|
+
typedef typename Evaluator::EvaluatorPointerType EvaluatorPointerType;
|
|
218
|
+
typedef typename Evaluator::Index Index;
|
|
219
|
+
typedef OpDefiner<OpType, CoeffReturnType, Index, false> OpDef;
|
|
220
|
+
typedef typename OpDef::type Op;
|
|
221
|
+
template <typename Scratch>
|
|
222
|
+
GenericNondeterministicReducer(Scratch, Evaluator evaluator_, EvaluatorPointerType output_accessor_, OpType functor_,
|
|
223
|
+
Index range_, Index num_values_to_reduce_)
|
|
224
|
+
: evaluator(evaluator_),
|
|
225
|
+
output_accessor(output_accessor_),
|
|
226
|
+
functor(OpDef::get_op(functor_)),
|
|
227
|
+
range(range_),
|
|
228
|
+
num_values_to_reduce(num_values_to_reduce_) {}
|
|
229
|
+
|
|
230
|
+
void operator()(cl::sycl::nd_item<1> itemID) {
|
|
231
|
+
auto output_accessor_ptr = output_accessor.get_pointer();
|
|
232
|
+
/// const cast added as a naive solution to solve the qualifier drop error
|
|
233
|
+
Index globalid = static_cast<Index>(itemID.get_global_linear_id());
|
|
234
|
+
if (globalid < range) {
|
|
235
|
+
CoeffReturnType accum = functor.initialize();
|
|
236
|
+
Eigen::internal::GenericDimReducer<Evaluator::NumReducedDims - 1, Evaluator, Op>::reduce(
|
|
237
|
+
evaluator, evaluator.firstInput(globalid), functor, &accum);
|
|
238
|
+
output_accessor_ptr[globalid] = OpDef::finalise_op(functor.finalize(accum), num_values_to_reduce);
|
|
239
|
+
}
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
private:
|
|
243
|
+
Evaluator evaluator;
|
|
244
|
+
EvaluatorPointerType output_accessor;
|
|
245
|
+
Op functor;
|
|
246
|
+
Index range;
|
|
247
|
+
Index num_values_to_reduce;
|
|
248
|
+
};
|
|
249
|
+
|
|
250
|
+
enum class reduction_dim { inner_most, outer_most };
|
|
251
|
+
// default is preserver
|
|
252
|
+
template <typename Evaluator, typename OpType, typename PannelParameters, reduction_dim rt>
|
|
253
|
+
struct PartialReductionKernel {
|
|
254
|
+
typedef typename Evaluator::CoeffReturnType CoeffReturnType;
|
|
255
|
+
typedef typename Evaluator::EvaluatorPointerType EvaluatorPointerType;
|
|
256
|
+
typedef typename Evaluator::Index Index;
|
|
257
|
+
typedef OpDefiner<OpType, CoeffReturnType, Index, false> OpDef;
|
|
258
|
+
typedef typename OpDef::type Op;
|
|
259
|
+
typedef cl::sycl::accessor<CoeffReturnType, 1, cl::sycl::access::mode::read_write, cl::sycl::access::target::local>
|
|
260
|
+
ScratchAcc;
|
|
261
|
+
ScratchAcc scratch;
|
|
262
|
+
Evaluator evaluator;
|
|
263
|
+
EvaluatorPointerType output_accessor;
|
|
264
|
+
Op op;
|
|
265
|
+
const Index preserve_elements_num_groups;
|
|
266
|
+
const Index reduce_elements_num_groups;
|
|
267
|
+
const Index num_coeffs_to_preserve;
|
|
268
|
+
const Index num_coeffs_to_reduce;
|
|
269
|
+
|
|
270
|
+
PartialReductionKernel(ScratchAcc scratch_, Evaluator evaluator_, EvaluatorPointerType output_accessor_, OpType op_,
|
|
271
|
+
const Index preserve_elements_num_groups_, const Index reduce_elements_num_groups_,
|
|
272
|
+
const Index num_coeffs_to_preserve_, const Index num_coeffs_to_reduce_)
|
|
273
|
+
: scratch(scratch_),
|
|
274
|
+
evaluator(evaluator_),
|
|
275
|
+
output_accessor(output_accessor_),
|
|
276
|
+
op(OpDef::get_op(op_)),
|
|
277
|
+
preserve_elements_num_groups(preserve_elements_num_groups_),
|
|
278
|
+
reduce_elements_num_groups(reduce_elements_num_groups_),
|
|
279
|
+
num_coeffs_to_preserve(num_coeffs_to_preserve_),
|
|
280
|
+
num_coeffs_to_reduce(num_coeffs_to_reduce_) {}
|
|
281
|
+
|
|
282
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void element_wise_reduce(Index globalRId, Index globalPId,
|
|
283
|
+
CoeffReturnType &accumulator) {
|
|
284
|
+
if (globalPId >= num_coeffs_to_preserve) {
|
|
285
|
+
return;
|
|
286
|
+
}
|
|
287
|
+
Index global_offset = rt == reduction_dim::outer_most ? globalPId + (globalRId * num_coeffs_to_preserve)
|
|
288
|
+
: globalRId + (globalPId * num_coeffs_to_reduce);
|
|
289
|
+
Index localOffset = globalRId;
|
|
290
|
+
|
|
291
|
+
const Index per_thread_local_stride = PannelParameters::LocalThreadSizeR * reduce_elements_num_groups;
|
|
292
|
+
const Index per_thread_global_stride =
|
|
293
|
+
rt == reduction_dim::outer_most ? num_coeffs_to_preserve * per_thread_local_stride : per_thread_local_stride;
|
|
294
|
+
for (Index i = globalRId; i < num_coeffs_to_reduce; i += per_thread_local_stride) {
|
|
295
|
+
op.reduce(evaluator.impl().coeff(global_offset), &accumulator);
|
|
296
|
+
localOffset += per_thread_local_stride;
|
|
297
|
+
global_offset += per_thread_global_stride;
|
|
298
|
+
}
|
|
299
|
+
}
|
|
300
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void operator()(cl::sycl::nd_item<1> itemID) {
|
|
301
|
+
const Index linearLocalThreadId = itemID.get_local_id(0);
|
|
302
|
+
Index pLocalThreadId = rt == reduction_dim::outer_most ? linearLocalThreadId % PannelParameters::LocalThreadSizeP
|
|
303
|
+
: linearLocalThreadId / PannelParameters::LocalThreadSizeR;
|
|
304
|
+
Index rLocalThreadId = rt == reduction_dim::outer_most ? linearLocalThreadId / PannelParameters::LocalThreadSizeP
|
|
305
|
+
: linearLocalThreadId % PannelParameters::LocalThreadSizeR;
|
|
306
|
+
const Index pGroupId = rt == reduction_dim::outer_most ? itemID.get_group(0) % preserve_elements_num_groups
|
|
307
|
+
: itemID.get_group(0) / reduce_elements_num_groups;
|
|
308
|
+
const Index rGroupId = rt == reduction_dim::outer_most ? itemID.get_group(0) / preserve_elements_num_groups
|
|
309
|
+
: itemID.get_group(0) % reduce_elements_num_groups;
|
|
310
|
+
|
|
311
|
+
Index globalPId = pGroupId * PannelParameters::LocalThreadSizeP + pLocalThreadId;
|
|
312
|
+
const Index globalRId = rGroupId * PannelParameters::LocalThreadSizeR + rLocalThreadId;
|
|
313
|
+
auto scratchPtr = scratch.get_pointer().get();
|
|
314
|
+
auto outPtr =
|
|
315
|
+
output_accessor.get_pointer() + (reduce_elements_num_groups > 1 ? rGroupId * num_coeffs_to_preserve : 0);
|
|
316
|
+
CoeffReturnType accumulator = op.initialize();
|
|
317
|
+
|
|
318
|
+
element_wise_reduce(globalRId, globalPId, accumulator);
|
|
319
|
+
|
|
320
|
+
accumulator = OpDef::finalise_op(op.finalize(accumulator), num_coeffs_to_reduce);
|
|
321
|
+
scratchPtr[pLocalThreadId + rLocalThreadId * (PannelParameters::LocalThreadSizeP + PannelParameters::BC)] =
|
|
322
|
+
accumulator;
|
|
323
|
+
if (rt == reduction_dim::inner_most) {
|
|
324
|
+
pLocalThreadId = linearLocalThreadId % PannelParameters::LocalThreadSizeP;
|
|
325
|
+
rLocalThreadId = linearLocalThreadId / PannelParameters::LocalThreadSizeP;
|
|
326
|
+
globalPId = pGroupId * PannelParameters::LocalThreadSizeP + pLocalThreadId;
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
/* Apply the reduction operation between the current local
|
|
330
|
+
* id and the one on the other half of the vector. */
|
|
331
|
+
auto out_scratch_ptr =
|
|
332
|
+
scratchPtr + (pLocalThreadId + (rLocalThreadId * (PannelParameters::LocalThreadSizeP + PannelParameters::BC)));
|
|
333
|
+
itemID.barrier(cl::sycl::access::fence_space::local_space);
|
|
334
|
+
if (rt == reduction_dim::inner_most) {
|
|
335
|
+
accumulator = *out_scratch_ptr;
|
|
336
|
+
}
|
|
337
|
+
// The Local LocalThreadSizeR is always power of 2
|
|
338
|
+
EIGEN_UNROLL_LOOP
|
|
339
|
+
for (Index offset = PannelParameters::LocalThreadSizeR >> 1; offset > 0; offset >>= 1) {
|
|
340
|
+
if (rLocalThreadId < offset) {
|
|
341
|
+
op.reduce(out_scratch_ptr[(PannelParameters::LocalThreadSizeP + PannelParameters::BC) * offset], &accumulator);
|
|
342
|
+
// The result has already been divided for mean reducer in the
|
|
343
|
+
// previous reduction so no need to divide furthermore
|
|
344
|
+
*out_scratch_ptr = op.finalize(accumulator);
|
|
345
|
+
}
|
|
346
|
+
/* All threads collectively read from global memory into local.
|
|
347
|
+
* The barrier ensures all threads' IO is resolved before
|
|
348
|
+
* execution continues (strictly speaking, all threads within
|
|
349
|
+
* a single work-group - there is no co-ordination between
|
|
350
|
+
* work-groups, only work-items). */
|
|
351
|
+
itemID.barrier(cl::sycl::access::fence_space::local_space);
|
|
352
|
+
}
|
|
353
|
+
|
|
354
|
+
if (rLocalThreadId == 0 && (globalPId < num_coeffs_to_preserve)) {
|
|
355
|
+
outPtr[globalPId] = op.finalize(accumulator);
|
|
356
|
+
}
|
|
357
|
+
}
|
|
358
|
+
};
|
|
359
|
+
|
|
360
|
+
template <typename OutScalar, typename Index, typename InputAccessor, typename OutputAccessor, typename OpType>
|
|
361
|
+
struct SecondStepPartialReduction {
|
|
362
|
+
typedef OpDefiner<OpType, OutScalar, Index, false> OpDef;
|
|
363
|
+
typedef typename OpDef::type Op;
|
|
364
|
+
typedef cl::sycl::accessor<OutScalar, 1, cl::sycl::access::mode::read_write, cl::sycl::access::target::local>
|
|
365
|
+
ScratchAccessor;
|
|
366
|
+
InputAccessor input_accessor;
|
|
367
|
+
OutputAccessor output_accessor;
|
|
368
|
+
Op op;
|
|
369
|
+
const Index num_coeffs_to_preserve;
|
|
370
|
+
const Index num_coeffs_to_reduce;
|
|
371
|
+
|
|
372
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE SecondStepPartialReduction(ScratchAccessor, InputAccessor input_accessor_,
|
|
373
|
+
OutputAccessor output_accessor_, OpType op_,
|
|
374
|
+
const Index num_coeffs_to_preserve_,
|
|
375
|
+
const Index num_coeffs_to_reduce_)
|
|
376
|
+
: input_accessor(input_accessor_),
|
|
377
|
+
output_accessor(output_accessor_),
|
|
378
|
+
op(OpDef::get_op(op_)),
|
|
379
|
+
num_coeffs_to_preserve(num_coeffs_to_preserve_),
|
|
380
|
+
num_coeffs_to_reduce(num_coeffs_to_reduce_) {}
|
|
381
|
+
|
|
382
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void operator()(cl::sycl::nd_item<1> itemID) {
|
|
383
|
+
const Index globalId = itemID.get_global_id(0);
|
|
384
|
+
|
|
385
|
+
if (globalId >= num_coeffs_to_preserve) return;
|
|
386
|
+
|
|
387
|
+
auto in_ptr = input_accessor.get_pointer() + globalId;
|
|
388
|
+
|
|
389
|
+
OutScalar accumulator = op.initialize();
|
|
390
|
+
// num_coeffs_to_reduce is not bigger that 256
|
|
391
|
+
for (Index i = 0; i < num_coeffs_to_reduce; i++) {
|
|
392
|
+
op.reduce(*in_ptr, &accumulator);
|
|
393
|
+
in_ptr += num_coeffs_to_preserve;
|
|
394
|
+
}
|
|
395
|
+
output_accessor.get_pointer()[globalId] = op.finalize(accumulator);
|
|
396
|
+
}
|
|
397
|
+
}; // namespace internal
|
|
398
|
+
|
|
399
|
+
template <typename Index, Index LTP, Index LTR, bool BC_>
|
|
400
|
+
struct ReductionPannel {
|
|
401
|
+
static EIGEN_CONSTEXPR Index LocalThreadSizeP = LTP;
|
|
402
|
+
static EIGEN_CONSTEXPR Index LocalThreadSizeR = LTR;
|
|
403
|
+
static EIGEN_CONSTEXPR bool BC = BC_;
|
|
404
|
+
};
|
|
405
|
+
|
|
406
|
+
template <typename Self, typename Op, TensorSycl::internal::reduction_dim rt>
|
|
407
|
+
struct PartialReducerLauncher {
|
|
408
|
+
typedef typename Self::EvaluatorPointerType EvaluatorPointerType;
|
|
409
|
+
typedef typename Self::CoeffReturnType CoeffReturnType;
|
|
410
|
+
typedef typename Self::Storage Storage;
|
|
411
|
+
typedef typename Self::Index Index;
|
|
412
|
+
typedef ReductionPannel<typename Self::Index, EIGEN_SYCL_LOCAL_THREAD_DIM0, EIGEN_SYCL_LOCAL_THREAD_DIM1, true>
|
|
413
|
+
PannelParameters;
|
|
414
|
+
|
|
415
|
+
typedef PartialReductionKernel<Self, Op, PannelParameters, rt> SyclReducerKerneType;
|
|
416
|
+
|
|
417
|
+
static bool run(const Self &self, const Op &reducer, const Eigen::SyclDevice &dev, EvaluatorPointerType output,
|
|
418
|
+
Index num_coeffs_to_reduce, Index num_coeffs_to_preserve) {
|
|
419
|
+
Index roundUpP = roundUp(num_coeffs_to_preserve, PannelParameters::LocalThreadSizeP);
|
|
420
|
+
|
|
421
|
+
// getPowerOfTwo makes sure local range is power of 2 and <=
|
|
422
|
+
// maxSyclThreadPerBlock this will help us to avoid extra check on the
|
|
423
|
+
// kernel
|
|
424
|
+
static_assert(!((PannelParameters::LocalThreadSizeP * PannelParameters::LocalThreadSizeR) &
|
|
425
|
+
(PannelParameters::LocalThreadSizeP * PannelParameters::LocalThreadSizeR - 1)),
|
|
426
|
+
"The Local thread size must be a power of 2 for the reduction "
|
|
427
|
+
"operation");
|
|
428
|
+
|
|
429
|
+
EIGEN_CONSTEXPR Index localRange = PannelParameters::LocalThreadSizeP * PannelParameters::LocalThreadSizeR;
|
|
430
|
+
// In this step, we force the code not to be more than 2-step reduction:
|
|
431
|
+
// Our empirical research shows that if each thread reduces at least 64
|
|
432
|
+
// elemnts individually, we get better performance. However, this can change
|
|
433
|
+
// on different platforms. In this step we force the code not to be
|
|
434
|
+
// morthan step reduction: Our empirical research shows that for inner_most
|
|
435
|
+
// dim reducer, it is better to have 8 group in a reduce dimension for sizes
|
|
436
|
+
// > 1024 to achieve the best performance.
|
|
437
|
+
const Index reductionPerThread = 64;
|
|
438
|
+
Index cu = dev.getPowerOfTwo(dev.getNumSyclMultiProcessors(), true);
|
|
439
|
+
const Index pNumGroups = roundUpP / PannelParameters::LocalThreadSizeP;
|
|
440
|
+
Index rGroups = (cu + pNumGroups - 1) / pNumGroups;
|
|
441
|
+
const Index rNumGroups = num_coeffs_to_reduce > reductionPerThread * localRange ? std::min(rGroups, localRange) : 1;
|
|
442
|
+
const Index globalRange = pNumGroups * rNumGroups * localRange;
|
|
443
|
+
|
|
444
|
+
EIGEN_CONSTEXPR Index scratchSize =
|
|
445
|
+
PannelParameters::LocalThreadSizeR * (PannelParameters::LocalThreadSizeP + PannelParameters::BC);
|
|
446
|
+
auto thread_range = cl::sycl::nd_range<1>(cl::sycl::range<1>(globalRange), cl::sycl::range<1>(localRange));
|
|
447
|
+
if (rNumGroups > 1) {
|
|
448
|
+
CoeffReturnType *temp_pointer = static_cast<CoeffReturnType *>(
|
|
449
|
+
dev.allocate_temp(num_coeffs_to_preserve * rNumGroups * sizeof(CoeffReturnType)));
|
|
450
|
+
EvaluatorPointerType temp_accessor = dev.get(temp_pointer);
|
|
451
|
+
dev.template unary_kernel_launcher<CoeffReturnType, SyclReducerKerneType>(
|
|
452
|
+
self, temp_accessor, thread_range, scratchSize, reducer, pNumGroups, rNumGroups, num_coeffs_to_preserve,
|
|
453
|
+
num_coeffs_to_reduce);
|
|
454
|
+
|
|
455
|
+
typedef SecondStepPartialReduction<CoeffReturnType, Index, EvaluatorPointerType, EvaluatorPointerType, Op>
|
|
456
|
+
SecondStepPartialReductionKernel;
|
|
457
|
+
|
|
458
|
+
dev.template unary_kernel_launcher<CoeffReturnType, SecondStepPartialReductionKernel>(
|
|
459
|
+
temp_accessor, output,
|
|
460
|
+
cl::sycl::nd_range<1>(cl::sycl::range<1>(pNumGroups * localRange), cl::sycl::range<1>(localRange)), Index(1),
|
|
461
|
+
reducer, num_coeffs_to_preserve, rNumGroups);
|
|
462
|
+
|
|
463
|
+
self.device().deallocate_temp(temp_pointer);
|
|
464
|
+
} else {
|
|
465
|
+
dev.template unary_kernel_launcher<CoeffReturnType, SyclReducerKerneType>(
|
|
466
|
+
self, output, thread_range, scratchSize, reducer, pNumGroups, rNumGroups, num_coeffs_to_preserve,
|
|
467
|
+
num_coeffs_to_reduce);
|
|
468
|
+
}
|
|
469
|
+
return false;
|
|
470
|
+
}
|
|
471
|
+
};
|
|
472
|
+
} // namespace internal
|
|
473
|
+
} // namespace TensorSycl
|
|
474
|
+
|
|
475
|
+
namespace internal {
|
|
476
|
+
|
|
477
|
+
template <typename Self, typename Op, bool Vectorizable>
|
|
478
|
+
struct FullReducer<Self, Op, Eigen::SyclDevice, Vectorizable> {
|
|
479
|
+
typedef typename Self::CoeffReturnType CoeffReturnType;
|
|
480
|
+
typedef typename Self::EvaluatorPointerType EvaluatorPointerType;
|
|
481
|
+
static EIGEN_CONSTEXPR bool HasOptimizedImplementation = true;
|
|
482
|
+
static EIGEN_CONSTEXPR int PacketSize = Self::PacketAccess ? Self::PacketSize : 1;
|
|
483
|
+
static void run(const Self &self, Op &reducer, const Eigen::SyclDevice &dev, EvaluatorPointerType data) {
|
|
484
|
+
typedef typename conditional<Self::PacketAccess, typename Self::PacketReturnType, CoeffReturnType>::type OutType;
|
|
485
|
+
static_assert(!((EIGEN_SYCL_LOCAL_THREAD_DIM0 * EIGEN_SYCL_LOCAL_THREAD_DIM1) &
|
|
486
|
+
(EIGEN_SYCL_LOCAL_THREAD_DIM0 * EIGEN_SYCL_LOCAL_THREAD_DIM1 - 1)),
|
|
487
|
+
"The Local thread size must be a power of 2 for the reduction "
|
|
488
|
+
"operation");
|
|
489
|
+
EIGEN_CONSTEXPR Index local_range = EIGEN_SYCL_LOCAL_THREAD_DIM0 * EIGEN_SYCL_LOCAL_THREAD_DIM1;
|
|
490
|
+
|
|
491
|
+
typename Self::Index inputSize = self.impl().dimensions().TotalSize();
|
|
492
|
+
// In this step we force the code not to be more than 2-step reduction:
|
|
493
|
+
// Our empirical research shows that if each thread reduces at least 512
|
|
494
|
+
// elemnts individually, we get better performance.
|
|
495
|
+
const Index reductionPerThread = 2048;
|
|
496
|
+
// const Index num_work_group =
|
|
497
|
+
Index reductionGroup = dev.getPowerOfTwo(
|
|
498
|
+
(inputSize + (reductionPerThread * local_range - 1)) / (reductionPerThread * local_range), true);
|
|
499
|
+
const Index num_work_group = std::min(reductionGroup, local_range);
|
|
500
|
+
// 1
|
|
501
|
+
// ? local_range
|
|
502
|
+
// : 1);
|
|
503
|
+
const Index global_range = num_work_group * local_range;
|
|
504
|
+
|
|
505
|
+
auto thread_range = cl::sycl::nd_range<1>(cl::sycl::range<1>(global_range), cl::sycl::range<1>(local_range));
|
|
506
|
+
typedef TensorSycl::internal::FullReductionKernelFunctor<Self, Op, local_range> reduction_kernel_t;
|
|
507
|
+
if (num_work_group > 1) {
|
|
508
|
+
CoeffReturnType *temp_pointer =
|
|
509
|
+
static_cast<CoeffReturnType *>(dev.allocate_temp(num_work_group * sizeof(CoeffReturnType)));
|
|
510
|
+
typename Self::EvaluatorPointerType tmp_global_accessor = dev.get(temp_pointer);
|
|
511
|
+
dev.template unary_kernel_launcher<OutType, reduction_kernel_t>(self, tmp_global_accessor, thread_range,
|
|
512
|
+
local_range, inputSize, reducer);
|
|
513
|
+
|
|
514
|
+
typedef TensorSycl::internal::SecondStepFullReducer<CoeffReturnType, Op, EvaluatorPointerType,
|
|
515
|
+
EvaluatorPointerType, Index, local_range>
|
|
516
|
+
GenericRKernel;
|
|
517
|
+
dev.template unary_kernel_launcher<CoeffReturnType, GenericRKernel>(
|
|
518
|
+
tmp_global_accessor, data,
|
|
519
|
+
cl::sycl::nd_range<1>(cl::sycl::range<1>(num_work_group), cl::sycl::range<1>(num_work_group)), num_work_group,
|
|
520
|
+
reducer);
|
|
521
|
+
|
|
522
|
+
dev.deallocate_temp(temp_pointer);
|
|
523
|
+
} else {
|
|
524
|
+
dev.template unary_kernel_launcher<OutType, reduction_kernel_t>(self, data, thread_range, local_range, inputSize,
|
|
525
|
+
reducer);
|
|
526
|
+
}
|
|
527
|
+
}
|
|
528
|
+
};
|
|
529
|
+
// vectorizable inner_most most dim preserver
|
|
530
|
+
// col reduction
|
|
531
|
+
template <typename Self, typename Op>
|
|
532
|
+
struct OuterReducer<Self, Op, Eigen::SyclDevice> {
|
|
533
|
+
static EIGEN_CONSTEXPR bool HasOptimizedImplementation = true;
|
|
534
|
+
|
|
535
|
+
static bool run(const Self &self, const Op &reducer, const Eigen::SyclDevice &dev,
|
|
536
|
+
typename Self::EvaluatorPointerType output, typename Self::Index num_coeffs_to_reduce,
|
|
537
|
+
typename Self::Index num_coeffs_to_preserve) {
|
|
538
|
+
return ::Eigen::TensorSycl::internal::PartialReducerLauncher<
|
|
539
|
+
Self, Op, ::Eigen::TensorSycl::internal::reduction_dim::outer_most>::run(self, reducer, dev, output,
|
|
540
|
+
num_coeffs_to_reduce,
|
|
541
|
+
num_coeffs_to_preserve);
|
|
542
|
+
}
|
|
543
|
+
};
|
|
544
|
+
// row reduction
|
|
545
|
+
template <typename Self, typename Op>
|
|
546
|
+
struct InnerReducer<Self, Op, Eigen::SyclDevice> {
|
|
547
|
+
static EIGEN_CONSTEXPR bool HasOptimizedImplementation = true;
|
|
548
|
+
|
|
549
|
+
static bool run(const Self &self, const Op &reducer, const Eigen::SyclDevice &dev,
|
|
550
|
+
typename Self::EvaluatorPointerType output, typename Self::Index num_coeffs_to_reduce,
|
|
551
|
+
typename Self::Index num_coeffs_to_preserve) {
|
|
552
|
+
return ::Eigen::TensorSycl::internal::PartialReducerLauncher<
|
|
553
|
+
Self, Op, ::Eigen::TensorSycl::internal::reduction_dim::inner_most>::run(self, reducer, dev, output,
|
|
554
|
+
num_coeffs_to_reduce,
|
|
555
|
+
num_coeffs_to_preserve);
|
|
556
|
+
}
|
|
557
|
+
};
|
|
558
|
+
|
|
559
|
+
// ArmgMax uses this kernel for partial reduction//
|
|
560
|
+
// TODO(@mehdi.goli) come up with a better kernel
|
|
561
|
+
// generic partial reduction
|
|
562
|
+
template <typename Self, typename Op>
|
|
563
|
+
struct GenericReducer<Self, Op, Eigen::SyclDevice> {
|
|
564
|
+
static EIGEN_CONSTEXPR bool HasOptimizedImplementation = false;
|
|
565
|
+
static bool run(const Self &self, const Op &reducer, const Eigen::SyclDevice &dev,
|
|
566
|
+
typename Self::EvaluatorPointerType output, typename Self::Index num_values_to_reduce,
|
|
567
|
+
typename Self::Index num_coeffs_to_preserve) {
|
|
568
|
+
typename Self::Index range, GRange, tileSize;
|
|
569
|
+
dev.parallel_for_setup(num_coeffs_to_preserve, tileSize, range, GRange);
|
|
570
|
+
|
|
571
|
+
dev.template unary_kernel_launcher<typename Self::CoeffReturnType,
|
|
572
|
+
TensorSycl::internal::GenericNondeterministicReducer<Self, Op>>(
|
|
573
|
+
self, output, cl::sycl::nd_range<1>(cl::sycl::range<1>(GRange), cl::sycl::range<1>(tileSize)), Index(1),
|
|
574
|
+
reducer, range, (num_values_to_reduce != 0) ? num_values_to_reduce : static_cast<Index>(1));
|
|
575
|
+
return false;
|
|
576
|
+
}
|
|
577
|
+
};
|
|
578
|
+
|
|
579
|
+
} // namespace internal
|
|
580
|
+
} // namespace Eigen
|
|
581
|
+
|
|
582
|
+
#endif // UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSOR_REDUCTION_SYCL_HPP
|