pylibsparseir 0.1.0__cp313-cp313-macosx_15_0_arm64.whl → 0.5.2__cp313-cp313-macosx_15_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pylibsparseir might be problematic. Click here for more details.
- include/eigen3/Eigen/Cholesky +45 -0
- include/eigen3/Eigen/CholmodSupport +48 -0
- include/eigen3/Eigen/Core +384 -0
- include/eigen3/Eigen/Dense +7 -0
- include/eigen3/Eigen/Eigen +2 -0
- include/eigen3/Eigen/Eigenvalues +60 -0
- include/eigen3/Eigen/Geometry +59 -0
- include/eigen3/Eigen/Householder +29 -0
- include/eigen3/Eigen/IterativeLinearSolvers +48 -0
- include/eigen3/Eigen/Jacobi +32 -0
- include/eigen3/Eigen/KLUSupport +41 -0
- include/eigen3/Eigen/LU +47 -0
- include/eigen3/Eigen/MetisSupport +35 -0
- include/eigen3/Eigen/OrderingMethods +70 -0
- include/eigen3/Eigen/PaStiXSupport +49 -0
- include/eigen3/Eigen/PardisoSupport +35 -0
- include/eigen3/Eigen/QR +50 -0
- include/eigen3/Eigen/QtAlignedMalloc +39 -0
- include/eigen3/Eigen/SPQRSupport +34 -0
- include/eigen3/Eigen/SVD +50 -0
- include/eigen3/Eigen/Sparse +34 -0
- include/eigen3/Eigen/SparseCholesky +37 -0
- include/eigen3/Eigen/SparseCore +69 -0
- include/eigen3/Eigen/SparseLU +50 -0
- include/eigen3/Eigen/SparseQR +36 -0
- include/eigen3/Eigen/StdDeque +27 -0
- include/eigen3/Eigen/StdList +26 -0
- include/eigen3/Eigen/StdVector +27 -0
- include/eigen3/Eigen/SuperLUSupport +64 -0
- include/eigen3/Eigen/UmfPackSupport +40 -0
- include/eigen3/Eigen/src/Cholesky/LDLT.h +688 -0
- include/eigen3/Eigen/src/Cholesky/LLT.h +558 -0
- include/eigen3/Eigen/src/Cholesky/LLT_LAPACKE.h +99 -0
- include/eigen3/Eigen/src/CholmodSupport/CholmodSupport.h +682 -0
- include/eigen3/Eigen/src/Core/ArithmeticSequence.h +413 -0
- include/eigen3/Eigen/src/Core/Array.h +417 -0
- include/eigen3/Eigen/src/Core/ArrayBase.h +226 -0
- include/eigen3/Eigen/src/Core/ArrayWrapper.h +209 -0
- include/eigen3/Eigen/src/Core/Assign.h +90 -0
- include/eigen3/Eigen/src/Core/AssignEvaluator.h +1010 -0
- include/eigen3/Eigen/src/Core/Assign_MKL.h +178 -0
- include/eigen3/Eigen/src/Core/BandMatrix.h +353 -0
- include/eigen3/Eigen/src/Core/Block.h +448 -0
- include/eigen3/Eigen/src/Core/BooleanRedux.h +162 -0
- include/eigen3/Eigen/src/Core/CommaInitializer.h +164 -0
- include/eigen3/Eigen/src/Core/ConditionEstimator.h +175 -0
- include/eigen3/Eigen/src/Core/CoreEvaluators.h +1741 -0
- include/eigen3/Eigen/src/Core/CoreIterators.h +132 -0
- include/eigen3/Eigen/src/Core/CwiseBinaryOp.h +183 -0
- include/eigen3/Eigen/src/Core/CwiseNullaryOp.h +1001 -0
- include/eigen3/Eigen/src/Core/CwiseTernaryOp.h +197 -0
- include/eigen3/Eigen/src/Core/CwiseUnaryOp.h +103 -0
- include/eigen3/Eigen/src/Core/CwiseUnaryView.h +132 -0
- include/eigen3/Eigen/src/Core/DenseBase.h +701 -0
- include/eigen3/Eigen/src/Core/DenseCoeffsBase.h +685 -0
- include/eigen3/Eigen/src/Core/DenseStorage.h +652 -0
- include/eigen3/Eigen/src/Core/Diagonal.h +258 -0
- include/eigen3/Eigen/src/Core/DiagonalMatrix.h +391 -0
- include/eigen3/Eigen/src/Core/DiagonalProduct.h +28 -0
- include/eigen3/Eigen/src/Core/Dot.h +318 -0
- include/eigen3/Eigen/src/Core/EigenBase.h +160 -0
- include/eigen3/Eigen/src/Core/ForceAlignedAccess.h +150 -0
- include/eigen3/Eigen/src/Core/Fuzzy.h +155 -0
- include/eigen3/Eigen/src/Core/GeneralProduct.h +465 -0
- include/eigen3/Eigen/src/Core/GenericPacketMath.h +1040 -0
- include/eigen3/Eigen/src/Core/GlobalFunctions.h +194 -0
- include/eigen3/Eigen/src/Core/IO.h +258 -0
- include/eigen3/Eigen/src/Core/IndexedView.h +237 -0
- include/eigen3/Eigen/src/Core/Inverse.h +117 -0
- include/eigen3/Eigen/src/Core/Map.h +171 -0
- include/eigen3/Eigen/src/Core/MapBase.h +310 -0
- include/eigen3/Eigen/src/Core/MathFunctions.h +2057 -0
- include/eigen3/Eigen/src/Core/MathFunctionsImpl.h +200 -0
- include/eigen3/Eigen/src/Core/Matrix.h +565 -0
- include/eigen3/Eigen/src/Core/MatrixBase.h +547 -0
- include/eigen3/Eigen/src/Core/NestByValue.h +85 -0
- include/eigen3/Eigen/src/Core/NoAlias.h +109 -0
- include/eigen3/Eigen/src/Core/NumTraits.h +335 -0
- include/eigen3/Eigen/src/Core/PartialReduxEvaluator.h +232 -0
- include/eigen3/Eigen/src/Core/PermutationMatrix.h +605 -0
- include/eigen3/Eigen/src/Core/PlainObjectBase.h +1128 -0
- include/eigen3/Eigen/src/Core/Product.h +191 -0
- include/eigen3/Eigen/src/Core/ProductEvaluators.h +1179 -0
- include/eigen3/Eigen/src/Core/Random.h +218 -0
- include/eigen3/Eigen/src/Core/Redux.h +515 -0
- include/eigen3/Eigen/src/Core/Ref.h +381 -0
- include/eigen3/Eigen/src/Core/Replicate.h +142 -0
- include/eigen3/Eigen/src/Core/Reshaped.h +454 -0
- include/eigen3/Eigen/src/Core/ReturnByValue.h +119 -0
- include/eigen3/Eigen/src/Core/Reverse.h +217 -0
- include/eigen3/Eigen/src/Core/Select.h +164 -0
- include/eigen3/Eigen/src/Core/SelfAdjointView.h +365 -0
- include/eigen3/Eigen/src/Core/SelfCwiseBinaryOp.h +47 -0
- include/eigen3/Eigen/src/Core/Solve.h +188 -0
- include/eigen3/Eigen/src/Core/SolveTriangular.h +235 -0
- include/eigen3/Eigen/src/Core/SolverBase.h +168 -0
- include/eigen3/Eigen/src/Core/StableNorm.h +251 -0
- include/eigen3/Eigen/src/Core/StlIterators.h +463 -0
- include/eigen3/Eigen/src/Core/Stride.h +116 -0
- include/eigen3/Eigen/src/Core/Swap.h +68 -0
- include/eigen3/Eigen/src/Core/Transpose.h +464 -0
- include/eigen3/Eigen/src/Core/Transpositions.h +386 -0
- include/eigen3/Eigen/src/Core/TriangularMatrix.h +1001 -0
- include/eigen3/Eigen/src/Core/VectorBlock.h +96 -0
- include/eigen3/Eigen/src/Core/VectorwiseOp.h +784 -0
- include/eigen3/Eigen/src/Core/Visitor.h +381 -0
- include/eigen3/Eigen/src/Core/arch/AVX/Complex.h +372 -0
- include/eigen3/Eigen/src/Core/arch/AVX/MathFunctions.h +228 -0
- include/eigen3/Eigen/src/Core/arch/AVX/PacketMath.h +1574 -0
- include/eigen3/Eigen/src/Core/arch/AVX/TypeCasting.h +115 -0
- include/eigen3/Eigen/src/Core/arch/AVX512/Complex.h +422 -0
- include/eigen3/Eigen/src/Core/arch/AVX512/MathFunctions.h +362 -0
- include/eigen3/Eigen/src/Core/arch/AVX512/PacketMath.h +2303 -0
- include/eigen3/Eigen/src/Core/arch/AVX512/TypeCasting.h +89 -0
- include/eigen3/Eigen/src/Core/arch/AltiVec/Complex.h +417 -0
- include/eigen3/Eigen/src/Core/arch/AltiVec/MathFunctions.h +90 -0
- include/eigen3/Eigen/src/Core/arch/AltiVec/MatrixProduct.h +2937 -0
- include/eigen3/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h +221 -0
- include/eigen3/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h +629 -0
- include/eigen3/Eigen/src/Core/arch/AltiVec/PacketMath.h +2711 -0
- include/eigen3/Eigen/src/Core/arch/CUDA/Complex.h +258 -0
- include/eigen3/Eigen/src/Core/arch/Default/BFloat16.h +700 -0
- include/eigen3/Eigen/src/Core/arch/Default/ConjHelper.h +117 -0
- include/eigen3/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +1649 -0
- include/eigen3/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +110 -0
- include/eigen3/Eigen/src/Core/arch/Default/Half.h +942 -0
- include/eigen3/Eigen/src/Core/arch/Default/Settings.h +49 -0
- include/eigen3/Eigen/src/Core/arch/Default/TypeCasting.h +120 -0
- include/eigen3/Eigen/src/Core/arch/GPU/MathFunctions.h +103 -0
- include/eigen3/Eigen/src/Core/arch/GPU/PacketMath.h +1685 -0
- include/eigen3/Eigen/src/Core/arch/GPU/TypeCasting.h +80 -0
- include/eigen3/Eigen/src/Core/arch/HIP/hcc/math_constants.h +23 -0
- include/eigen3/Eigen/src/Core/arch/MSA/Complex.h +648 -0
- include/eigen3/Eigen/src/Core/arch/MSA/MathFunctions.h +387 -0
- include/eigen3/Eigen/src/Core/arch/MSA/PacketMath.h +1233 -0
- include/eigen3/Eigen/src/Core/arch/NEON/Complex.h +584 -0
- include/eigen3/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h +183 -0
- include/eigen3/Eigen/src/Core/arch/NEON/MathFunctions.h +75 -0
- include/eigen3/Eigen/src/Core/arch/NEON/PacketMath.h +4587 -0
- include/eigen3/Eigen/src/Core/arch/NEON/TypeCasting.h +1419 -0
- include/eigen3/Eigen/src/Core/arch/SSE/Complex.h +351 -0
- include/eigen3/Eigen/src/Core/arch/SSE/MathFunctions.h +199 -0
- include/eigen3/Eigen/src/Core/arch/SSE/PacketMath.h +1505 -0
- include/eigen3/Eigen/src/Core/arch/SSE/TypeCasting.h +142 -0
- include/eigen3/Eigen/src/Core/arch/SVE/MathFunctions.h +44 -0
- include/eigen3/Eigen/src/Core/arch/SVE/PacketMath.h +752 -0
- include/eigen3/Eigen/src/Core/arch/SVE/TypeCasting.h +49 -0
- include/eigen3/Eigen/src/Core/arch/SYCL/InteropHeaders.h +232 -0
- include/eigen3/Eigen/src/Core/arch/SYCL/MathFunctions.h +301 -0
- include/eigen3/Eigen/src/Core/arch/SYCL/PacketMath.h +670 -0
- include/eigen3/Eigen/src/Core/arch/SYCL/SyclMemoryModel.h +694 -0
- include/eigen3/Eigen/src/Core/arch/SYCL/TypeCasting.h +85 -0
- include/eigen3/Eigen/src/Core/arch/ZVector/Complex.h +426 -0
- include/eigen3/Eigen/src/Core/arch/ZVector/MathFunctions.h +233 -0
- include/eigen3/Eigen/src/Core/arch/ZVector/PacketMath.h +1060 -0
- include/eigen3/Eigen/src/Core/functors/AssignmentFunctors.h +177 -0
- include/eigen3/Eigen/src/Core/functors/BinaryFunctors.h +541 -0
- include/eigen3/Eigen/src/Core/functors/NullaryFunctors.h +189 -0
- include/eigen3/Eigen/src/Core/functors/StlFunctors.h +166 -0
- include/eigen3/Eigen/src/Core/functors/TernaryFunctors.h +25 -0
- include/eigen3/Eigen/src/Core/functors/UnaryFunctors.h +1131 -0
- include/eigen3/Eigen/src/Core/products/GeneralBlockPanelKernel.h +2645 -0
- include/eigen3/Eigen/src/Core/products/GeneralMatrixMatrix.h +517 -0
- include/eigen3/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +317 -0
- include/eigen3/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +145 -0
- include/eigen3/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h +124 -0
- include/eigen3/Eigen/src/Core/products/GeneralMatrixVector.h +518 -0
- include/eigen3/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h +136 -0
- include/eigen3/Eigen/src/Core/products/Parallelizer.h +180 -0
- include/eigen3/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +544 -0
- include/eigen3/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h +295 -0
- include/eigen3/Eigen/src/Core/products/SelfadjointMatrixVector.h +262 -0
- include/eigen3/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h +118 -0
- include/eigen3/Eigen/src/Core/products/SelfadjointProduct.h +133 -0
- include/eigen3/Eigen/src/Core/products/SelfadjointRank2Update.h +94 -0
- include/eigen3/Eigen/src/Core/products/TriangularMatrixMatrix.h +472 -0
- include/eigen3/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h +317 -0
- include/eigen3/Eigen/src/Core/products/TriangularMatrixVector.h +350 -0
- include/eigen3/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h +255 -0
- include/eigen3/Eigen/src/Core/products/TriangularSolverMatrix.h +337 -0
- include/eigen3/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h +167 -0
- include/eigen3/Eigen/src/Core/products/TriangularSolverVector.h +148 -0
- include/eigen3/Eigen/src/Core/util/BlasUtil.h +583 -0
- include/eigen3/Eigen/src/Core/util/ConfigureVectorization.h +512 -0
- include/eigen3/Eigen/src/Core/util/Constants.h +563 -0
- include/eigen3/Eigen/src/Core/util/DisableStupidWarnings.h +106 -0
- include/eigen3/Eigen/src/Core/util/ForwardDeclarations.h +322 -0
- include/eigen3/Eigen/src/Core/util/IndexedViewHelper.h +186 -0
- include/eigen3/Eigen/src/Core/util/IntegralConstant.h +272 -0
- include/eigen3/Eigen/src/Core/util/MKL_support.h +137 -0
- include/eigen3/Eigen/src/Core/util/Macros.h +1464 -0
- include/eigen3/Eigen/src/Core/util/Memory.h +1163 -0
- include/eigen3/Eigen/src/Core/util/Meta.h +812 -0
- include/eigen3/Eigen/src/Core/util/NonMPL2.h +3 -0
- include/eigen3/Eigen/src/Core/util/ReenableStupidWarnings.h +31 -0
- include/eigen3/Eigen/src/Core/util/ReshapedHelper.h +51 -0
- include/eigen3/Eigen/src/Core/util/StaticAssert.h +221 -0
- include/eigen3/Eigen/src/Core/util/SymbolicIndex.h +293 -0
- include/eigen3/Eigen/src/Core/util/XprHelper.h +856 -0
- include/eigen3/Eigen/src/Eigenvalues/ComplexEigenSolver.h +346 -0
- include/eigen3/Eigen/src/Eigenvalues/ComplexSchur.h +462 -0
- include/eigen3/Eigen/src/Eigenvalues/ComplexSchur_LAPACKE.h +91 -0
- include/eigen3/Eigen/src/Eigenvalues/EigenSolver.h +622 -0
- include/eigen3/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h +418 -0
- include/eigen3/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +226 -0
- include/eigen3/Eigen/src/Eigenvalues/HessenbergDecomposition.h +374 -0
- include/eigen3/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +158 -0
- include/eigen3/Eigen/src/Eigenvalues/RealQZ.h +657 -0
- include/eigen3/Eigen/src/Eigenvalues/RealSchur.h +558 -0
- include/eigen3/Eigen/src/Eigenvalues/RealSchur_LAPACKE.h +77 -0
- include/eigen3/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +904 -0
- include/eigen3/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h +87 -0
- include/eigen3/Eigen/src/Eigenvalues/Tridiagonalization.h +561 -0
- include/eigen3/Eigen/src/Geometry/AlignedBox.h +486 -0
- include/eigen3/Eigen/src/Geometry/AngleAxis.h +247 -0
- include/eigen3/Eigen/src/Geometry/EulerAngles.h +114 -0
- include/eigen3/Eigen/src/Geometry/Homogeneous.h +501 -0
- include/eigen3/Eigen/src/Geometry/Hyperplane.h +282 -0
- include/eigen3/Eigen/src/Geometry/OrthoMethods.h +235 -0
- include/eigen3/Eigen/src/Geometry/ParametrizedLine.h +232 -0
- include/eigen3/Eigen/src/Geometry/Quaternion.h +870 -0
- include/eigen3/Eigen/src/Geometry/Rotation2D.h +199 -0
- include/eigen3/Eigen/src/Geometry/RotationBase.h +206 -0
- include/eigen3/Eigen/src/Geometry/Scaling.h +188 -0
- include/eigen3/Eigen/src/Geometry/Transform.h +1563 -0
- include/eigen3/Eigen/src/Geometry/Translation.h +202 -0
- include/eigen3/Eigen/src/Geometry/Umeyama.h +166 -0
- include/eigen3/Eigen/src/Geometry/arch/Geometry_SIMD.h +168 -0
- include/eigen3/Eigen/src/Householder/BlockHouseholder.h +110 -0
- include/eigen3/Eigen/src/Householder/Householder.h +176 -0
- include/eigen3/Eigen/src/Householder/HouseholderSequence.h +545 -0
- include/eigen3/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +226 -0
- include/eigen3/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +212 -0
- include/eigen3/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +229 -0
- include/eigen3/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +394 -0
- include/eigen3/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +453 -0
- include/eigen3/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +444 -0
- include/eigen3/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +198 -0
- include/eigen3/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +117 -0
- include/eigen3/Eigen/src/Jacobi/Jacobi.h +483 -0
- include/eigen3/Eigen/src/KLUSupport/KLUSupport.h +358 -0
- include/eigen3/Eigen/src/LU/Determinant.h +117 -0
- include/eigen3/Eigen/src/LU/FullPivLU.h +877 -0
- include/eigen3/Eigen/src/LU/InverseImpl.h +432 -0
- include/eigen3/Eigen/src/LU/PartialPivLU.h +624 -0
- include/eigen3/Eigen/src/LU/PartialPivLU_LAPACKE.h +83 -0
- include/eigen3/Eigen/src/LU/arch/InverseSize4.h +351 -0
- include/eigen3/Eigen/src/MetisSupport/MetisSupport.h +137 -0
- include/eigen3/Eigen/src/OrderingMethods/Amd.h +435 -0
- include/eigen3/Eigen/src/OrderingMethods/Eigen_Colamd.h +1863 -0
- include/eigen3/Eigen/src/OrderingMethods/Ordering.h +153 -0
- include/eigen3/Eigen/src/PaStiXSupport/PaStiXSupport.h +678 -0
- include/eigen3/Eigen/src/PardisoSupport/PardisoSupport.h +545 -0
- include/eigen3/Eigen/src/QR/ColPivHouseholderQR.h +674 -0
- include/eigen3/Eigen/src/QR/ColPivHouseholderQR_LAPACKE.h +97 -0
- include/eigen3/Eigen/src/QR/CompleteOrthogonalDecomposition.h +635 -0
- include/eigen3/Eigen/src/QR/FullPivHouseholderQR.h +713 -0
- include/eigen3/Eigen/src/QR/HouseholderQR.h +434 -0
- include/eigen3/Eigen/src/QR/HouseholderQR_LAPACKE.h +68 -0
- include/eigen3/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +335 -0
- include/eigen3/Eigen/src/SVD/BDCSVD.h +1366 -0
- include/eigen3/Eigen/src/SVD/JacobiSVD.h +812 -0
- include/eigen3/Eigen/src/SVD/JacobiSVD_LAPACKE.h +91 -0
- include/eigen3/Eigen/src/SVD/SVDBase.h +376 -0
- include/eigen3/Eigen/src/SVD/UpperBidiagonalization.h +414 -0
- include/eigen3/Eigen/src/SparseCholesky/SimplicialCholesky.h +697 -0
- include/eigen3/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +174 -0
- include/eigen3/Eigen/src/SparseCore/AmbiVector.h +378 -0
- include/eigen3/Eigen/src/SparseCore/CompressedStorage.h +274 -0
- include/eigen3/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +352 -0
- include/eigen3/Eigen/src/SparseCore/MappedSparseMatrix.h +67 -0
- include/eigen3/Eigen/src/SparseCore/SparseAssign.h +270 -0
- include/eigen3/Eigen/src/SparseCore/SparseBlock.h +571 -0
- include/eigen3/Eigen/src/SparseCore/SparseColEtree.h +206 -0
- include/eigen3/Eigen/src/SparseCore/SparseCompressedBase.h +370 -0
- include/eigen3/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +722 -0
- include/eigen3/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +150 -0
- include/eigen3/Eigen/src/SparseCore/SparseDenseProduct.h +342 -0
- include/eigen3/Eigen/src/SparseCore/SparseDiagonalProduct.h +138 -0
- include/eigen3/Eigen/src/SparseCore/SparseDot.h +98 -0
- include/eigen3/Eigen/src/SparseCore/SparseFuzzy.h +29 -0
- include/eigen3/Eigen/src/SparseCore/SparseMap.h +305 -0
- include/eigen3/Eigen/src/SparseCore/SparseMatrix.h +1518 -0
- include/eigen3/Eigen/src/SparseCore/SparseMatrixBase.h +398 -0
- include/eigen3/Eigen/src/SparseCore/SparsePermutation.h +178 -0
- include/eigen3/Eigen/src/SparseCore/SparseProduct.h +181 -0
- include/eigen3/Eigen/src/SparseCore/SparseRedux.h +49 -0
- include/eigen3/Eigen/src/SparseCore/SparseRef.h +397 -0
- include/eigen3/Eigen/src/SparseCore/SparseSelfAdjointView.h +659 -0
- include/eigen3/Eigen/src/SparseCore/SparseSolverBase.h +124 -0
- include/eigen3/Eigen/src/SparseCore/SparseSparseProductWithPruning.h +198 -0
- include/eigen3/Eigen/src/SparseCore/SparseTranspose.h +92 -0
- include/eigen3/Eigen/src/SparseCore/SparseTriangularView.h +189 -0
- include/eigen3/Eigen/src/SparseCore/SparseUtil.h +186 -0
- include/eigen3/Eigen/src/SparseCore/SparseVector.h +478 -0
- include/eigen3/Eigen/src/SparseCore/SparseView.h +254 -0
- include/eigen3/Eigen/src/SparseCore/TriangularSolver.h +315 -0
- include/eigen3/Eigen/src/SparseLU/SparseLU.h +923 -0
- include/eigen3/Eigen/src/SparseLU/SparseLUImpl.h +66 -0
- include/eigen3/Eigen/src/SparseLU/SparseLU_Memory.h +226 -0
- include/eigen3/Eigen/src/SparseLU/SparseLU_Structs.h +110 -0
- include/eigen3/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +375 -0
- include/eigen3/Eigen/src/SparseLU/SparseLU_Utils.h +80 -0
- include/eigen3/Eigen/src/SparseLU/SparseLU_column_bmod.h +181 -0
- include/eigen3/Eigen/src/SparseLU/SparseLU_column_dfs.h +179 -0
- include/eigen3/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h +107 -0
- include/eigen3/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +280 -0
- include/eigen3/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h +126 -0
- include/eigen3/Eigen/src/SparseLU/SparseLU_kernel_bmod.h +130 -0
- include/eigen3/Eigen/src/SparseLU/SparseLU_panel_bmod.h +223 -0
- include/eigen3/Eigen/src/SparseLU/SparseLU_panel_dfs.h +258 -0
- include/eigen3/Eigen/src/SparseLU/SparseLU_pivotL.h +137 -0
- include/eigen3/Eigen/src/SparseLU/SparseLU_pruneL.h +136 -0
- include/eigen3/Eigen/src/SparseLU/SparseLU_relax_snode.h +83 -0
- include/eigen3/Eigen/src/SparseQR/SparseQR.h +758 -0
- include/eigen3/Eigen/src/StlSupport/StdDeque.h +116 -0
- include/eigen3/Eigen/src/StlSupport/StdList.h +106 -0
- include/eigen3/Eigen/src/StlSupport/StdVector.h +131 -0
- include/eigen3/Eigen/src/StlSupport/details.h +84 -0
- include/eigen3/Eigen/src/SuperLUSupport/SuperLUSupport.h +1025 -0
- include/eigen3/Eigen/src/UmfPackSupport/UmfPackSupport.h +642 -0
- include/eigen3/Eigen/src/misc/Image.h +82 -0
- include/eigen3/Eigen/src/misc/Kernel.h +79 -0
- include/eigen3/Eigen/src/misc/RealSvd2x2.h +55 -0
- include/eigen3/Eigen/src/misc/blas.h +440 -0
- include/eigen3/Eigen/src/misc/lapack.h +152 -0
- include/eigen3/Eigen/src/misc/lapacke.h +16292 -0
- include/eigen3/Eigen/src/misc/lapacke_mangling.h +17 -0
- include/eigen3/Eigen/src/plugins/ArrayCwiseBinaryOps.h +358 -0
- include/eigen3/Eigen/src/plugins/ArrayCwiseUnaryOps.h +696 -0
- include/eigen3/Eigen/src/plugins/BlockMethods.h +1442 -0
- include/eigen3/Eigen/src/plugins/CommonCwiseBinaryOps.h +115 -0
- include/eigen3/Eigen/src/plugins/CommonCwiseUnaryOps.h +177 -0
- include/eigen3/Eigen/src/plugins/IndexedViewMethods.h +262 -0
- include/eigen3/Eigen/src/plugins/MatrixCwiseBinaryOps.h +152 -0
- include/eigen3/Eigen/src/plugins/MatrixCwiseUnaryOps.h +95 -0
- include/eigen3/Eigen/src/plugins/ReshapedMethods.h +149 -0
- include/eigen3/signature_of_eigen3_matrix_library +1 -0
- include/eigen3/unsupported/Eigen/AdolcForward +159 -0
- include/eigen3/unsupported/Eigen/AlignedVector3 +234 -0
- include/eigen3/unsupported/Eigen/ArpackSupport +30 -0
- include/eigen3/unsupported/Eigen/AutoDiff +46 -0
- include/eigen3/unsupported/Eigen/BVH +95 -0
- include/eigen3/unsupported/Eigen/CXX11/Tensor +137 -0
- include/eigen3/unsupported/Eigen/CXX11/TensorSymmetry +42 -0
- include/eigen3/unsupported/Eigen/CXX11/ThreadPool +74 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/Tensor.h +554 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorArgMax.h +329 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h +247 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h +1176 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h +1559 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h +1093 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h +518 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h +377 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h +1023 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorContractionBlocking.h +73 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorContractionCuda.h +6 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorContractionGpu.h +1413 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorContractionMapper.h +575 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorContractionSycl.h +1650 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h +1679 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h +456 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h +1132 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorConvolutionSycl.h +544 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorCostModel.h +214 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h +347 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h +137 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h +6 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h +104 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceGpu.h +389 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h +1048 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h +409 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDimensionList.h +236 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h +490 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h +236 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h +983 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h +703 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h +388 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h +669 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorFixedSize.h +379 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h +237 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h +191 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h +488 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h +302 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorGlobalFunctions.h +33 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorGpuHipCudaDefines.h +99 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorGpuHipCudaUndefines.h +44 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h +79 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h +603 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h +738 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorInflation.h +247 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorInitializer.h +82 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h +263 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorLayoutSwap.h +216 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorMacros.h +98 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h +327 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h +311 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h +1102 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h +708 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorPatch.h +291 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorRandom.h +322 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h +998 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h +6 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorReductionGpu.h +966 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorReductionSycl.h +582 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorRef.h +454 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h +465 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h +528 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorScanSycl.h +513 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h +471 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorStorage.h +161 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorStriding.h +346 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorTrace.h +303 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h +264 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorUInt128.h +249 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorVolumePatch.h +629 -0
- include/eigen3/unsupported/Eigen/CXX11/src/TensorSymmetry/DynamicSymmetry.h +293 -0
- include/eigen3/unsupported/Eigen/CXX11/src/TensorSymmetry/StaticSymmetry.h +236 -0
- include/eigen3/unsupported/Eigen/CXX11/src/TensorSymmetry/Symmetry.h +338 -0
- include/eigen3/unsupported/Eigen/CXX11/src/TensorSymmetry/util/TemplateGroupTheory.h +669 -0
- include/eigen3/unsupported/Eigen/CXX11/src/ThreadPool/Barrier.h +67 -0
- include/eigen3/unsupported/Eigen/CXX11/src/ThreadPool/EventCount.h +249 -0
- include/eigen3/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h +486 -0
- include/eigen3/unsupported/Eigen/CXX11/src/ThreadPool/RunQueue.h +236 -0
- include/eigen3/unsupported/Eigen/CXX11/src/ThreadPool/ThreadCancel.h +23 -0
- include/eigen3/unsupported/Eigen/CXX11/src/ThreadPool/ThreadEnvironment.h +40 -0
- include/eigen3/unsupported/Eigen/CXX11/src/ThreadPool/ThreadLocal.h +301 -0
- include/eigen3/unsupported/Eigen/CXX11/src/ThreadPool/ThreadPoolInterface.h +48 -0
- include/eigen3/unsupported/Eigen/CXX11/src/ThreadPool/ThreadYield.h +20 -0
- include/eigen3/unsupported/Eigen/CXX11/src/util/CXX11Meta.h +537 -0
- include/eigen3/unsupported/Eigen/CXX11/src/util/CXX11Workarounds.h +88 -0
- include/eigen3/unsupported/Eigen/CXX11/src/util/EmulateArray.h +261 -0
- include/eigen3/unsupported/Eigen/CXX11/src/util/MaxSizeVector.h +158 -0
- include/eigen3/unsupported/Eigen/EulerAngles +43 -0
- include/eigen3/unsupported/Eigen/FFT +419 -0
- include/eigen3/unsupported/Eigen/IterativeSolvers +51 -0
- include/eigen3/unsupported/Eigen/KroneckerProduct +36 -0
- include/eigen3/unsupported/Eigen/LevenbergMarquardt +49 -0
- include/eigen3/unsupported/Eigen/MPRealSupport +213 -0
- include/eigen3/unsupported/Eigen/MatrixFunctions +504 -0
- include/eigen3/unsupported/Eigen/MoreVectorization +24 -0
- include/eigen3/unsupported/Eigen/NonLinearOptimization +140 -0
- include/eigen3/unsupported/Eigen/NumericalDiff +56 -0
- include/eigen3/unsupported/Eigen/OpenGLSupport +322 -0
- include/eigen3/unsupported/Eigen/Polynomials +137 -0
- include/eigen3/unsupported/Eigen/Skyline +39 -0
- include/eigen3/unsupported/Eigen/SparseExtra +54 -0
- include/eigen3/unsupported/Eigen/SpecialFunctions +103 -0
- include/eigen3/unsupported/Eigen/Splines +35 -0
- include/eigen3/unsupported/Eigen/src/AutoDiff/AutoDiffJacobian.h +108 -0
- include/eigen3/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h +730 -0
- include/eigen3/unsupported/Eigen/src/AutoDiff/AutoDiffVector.h +220 -0
- include/eigen3/unsupported/Eigen/src/BVH/BVAlgorithms.h +293 -0
- include/eigen3/unsupported/Eigen/src/BVH/KdBVH.h +223 -0
- include/eigen3/unsupported/Eigen/src/Eigenvalues/ArpackSelfAdjointEigenSolver.h +790 -0
- include/eigen3/unsupported/Eigen/src/EulerAngles/EulerAngles.h +355 -0
- include/eigen3/unsupported/Eigen/src/EulerAngles/EulerSystem.h +305 -0
- include/eigen3/unsupported/Eigen/src/FFT/ei_fftw_impl.h +261 -0
- include/eigen3/unsupported/Eigen/src/FFT/ei_kissfft_impl.h +449 -0
- include/eigen3/unsupported/Eigen/src/IterativeSolvers/ConstrainedConjGrad.h +187 -0
- include/eigen3/unsupported/Eigen/src/IterativeSolvers/DGMRES.h +511 -0
- include/eigen3/unsupported/Eigen/src/IterativeSolvers/GMRES.h +335 -0
- include/eigen3/unsupported/Eigen/src/IterativeSolvers/IDRS.h +436 -0
- include/eigen3/unsupported/Eigen/src/IterativeSolvers/IncompleteLU.h +90 -0
- include/eigen3/unsupported/Eigen/src/IterativeSolvers/IterationController.h +154 -0
- include/eigen3/unsupported/Eigen/src/IterativeSolvers/MINRES.h +267 -0
- include/eigen3/unsupported/Eigen/src/IterativeSolvers/Scaling.h +193 -0
- include/eigen3/unsupported/Eigen/src/KroneckerProduct/KroneckerTensorProduct.h +305 -0
- include/eigen3/unsupported/Eigen/src/LevenbergMarquardt/LMcovar.h +84 -0
- include/eigen3/unsupported/Eigen/src/LevenbergMarquardt/LMonestep.h +202 -0
- include/eigen3/unsupported/Eigen/src/LevenbergMarquardt/LMpar.h +160 -0
- include/eigen3/unsupported/Eigen/src/LevenbergMarquardt/LMqrsolv.h +188 -0
- include/eigen3/unsupported/Eigen/src/LevenbergMarquardt/LevenbergMarquardt.h +396 -0
- include/eigen3/unsupported/Eigen/src/MatrixFunctions/MatrixExponential.h +441 -0
- include/eigen3/unsupported/Eigen/src/MatrixFunctions/MatrixFunction.h +569 -0
- include/eigen3/unsupported/Eigen/src/MatrixFunctions/MatrixLogarithm.h +373 -0
- include/eigen3/unsupported/Eigen/src/MatrixFunctions/MatrixPower.h +705 -0
- include/eigen3/unsupported/Eigen/src/MatrixFunctions/MatrixSquareRoot.h +368 -0
- include/eigen3/unsupported/Eigen/src/MatrixFunctions/StemFunction.h +117 -0
- include/eigen3/unsupported/Eigen/src/MoreVectorization/MathFunctions.h +95 -0
- include/eigen3/unsupported/Eigen/src/NonLinearOptimization/HybridNonLinearSolver.h +601 -0
- include/eigen3/unsupported/Eigen/src/NonLinearOptimization/LevenbergMarquardt.h +657 -0
- include/eigen3/unsupported/Eigen/src/NonLinearOptimization/chkder.h +66 -0
- include/eigen3/unsupported/Eigen/src/NonLinearOptimization/covar.h +70 -0
- include/eigen3/unsupported/Eigen/src/NonLinearOptimization/dogleg.h +107 -0
- include/eigen3/unsupported/Eigen/src/NonLinearOptimization/fdjac1.h +79 -0
- include/eigen3/unsupported/Eigen/src/NonLinearOptimization/lmpar.h +298 -0
- include/eigen3/unsupported/Eigen/src/NonLinearOptimization/qrsolv.h +91 -0
- include/eigen3/unsupported/Eigen/src/NonLinearOptimization/r1mpyq.h +30 -0
- include/eigen3/unsupported/Eigen/src/NonLinearOptimization/r1updt.h +99 -0
- include/eigen3/unsupported/Eigen/src/NonLinearOptimization/rwupdt.h +49 -0
- include/eigen3/unsupported/Eigen/src/NumericalDiff/NumericalDiff.h +130 -0
- include/eigen3/unsupported/Eigen/src/Polynomials/Companion.h +280 -0
- include/eigen3/unsupported/Eigen/src/Polynomials/PolynomialSolver.h +428 -0
- include/eigen3/unsupported/Eigen/src/Polynomials/PolynomialUtils.h +143 -0
- include/eigen3/unsupported/Eigen/src/Skyline/SkylineInplaceLU.h +352 -0
- include/eigen3/unsupported/Eigen/src/Skyline/SkylineMatrix.h +862 -0
- include/eigen3/unsupported/Eigen/src/Skyline/SkylineMatrixBase.h +212 -0
- include/eigen3/unsupported/Eigen/src/Skyline/SkylineProduct.h +295 -0
- include/eigen3/unsupported/Eigen/src/Skyline/SkylineStorage.h +259 -0
- include/eigen3/unsupported/Eigen/src/Skyline/SkylineUtil.h +89 -0
- include/eigen3/unsupported/Eigen/src/SparseExtra/BlockOfDynamicSparseMatrix.h +122 -0
- include/eigen3/unsupported/Eigen/src/SparseExtra/BlockSparseMatrix.h +1079 -0
- include/eigen3/unsupported/Eigen/src/SparseExtra/DynamicSparseMatrix.h +404 -0
- include/eigen3/unsupported/Eigen/src/SparseExtra/MarketIO.h +282 -0
- include/eigen3/unsupported/Eigen/src/SparseExtra/MatrixMarketIterator.h +247 -0
- include/eigen3/unsupported/Eigen/src/SparseExtra/RandomSetter.h +349 -0
- include/eigen3/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsArrayAPI.h +286 -0
- include/eigen3/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsBFloat16.h +68 -0
- include/eigen3/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsFunctors.h +357 -0
- include/eigen3/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsHalf.h +66 -0
- include/eigen3/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsImpl.h +1959 -0
- include/eigen3/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsPacketMath.h +118 -0
- include/eigen3/unsupported/Eigen/src/SpecialFunctions/HipVectorCompatibility.h +67 -0
- include/eigen3/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsArrayAPI.h +167 -0
- include/eigen3/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsBFloat16.h +58 -0
- include/eigen3/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsFunctors.h +330 -0
- include/eigen3/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsHalf.h +58 -0
- include/eigen3/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h +2045 -0
- include/eigen3/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsPacketMath.h +79 -0
- include/eigen3/unsupported/Eigen/src/SpecialFunctions/arch/AVX/BesselFunctions.h +46 -0
- include/eigen3/unsupported/Eigen/src/SpecialFunctions/arch/AVX/SpecialFunctions.h +16 -0
- include/eigen3/unsupported/Eigen/src/SpecialFunctions/arch/AVX512/BesselFunctions.h +46 -0
- include/eigen3/unsupported/Eigen/src/SpecialFunctions/arch/AVX512/SpecialFunctions.h +16 -0
- include/eigen3/unsupported/Eigen/src/SpecialFunctions/arch/GPU/SpecialFunctions.h +369 -0
- include/eigen3/unsupported/Eigen/src/SpecialFunctions/arch/NEON/BesselFunctions.h +54 -0
- include/eigen3/unsupported/Eigen/src/SpecialFunctions/arch/NEON/SpecialFunctions.h +34 -0
- include/eigen3/unsupported/Eigen/src/Splines/Spline.h +507 -0
- include/eigen3/unsupported/Eigen/src/Splines/SplineFitting.h +431 -0
- include/eigen3/unsupported/Eigen/src/Splines/SplineFwd.h +93 -0
- pylibsparseir/__init__.py +31 -0
- pylibsparseir/clean_build_artifacts.py +0 -1
- pylibsparseir/core.py +160 -45
- pylibsparseir/libsparseir.dylib +0 -0
- pylibsparseir-0.5.2.dist-info/METADATA +215 -0
- pylibsparseir-0.5.2.dist-info/RECORD +545 -0
- {pylibsparseir-0.1.0.dist-info → pylibsparseir-0.5.2.dist-info}/WHEEL +1 -1
- pylibsparseir-0.5.2.dist-info/licenses/LICENSE +21 -0
- share/eigen3/cmake/Eigen3Config.cmake +37 -0
- share/eigen3/cmake/Eigen3ConfigVersion.cmake +65 -0
- share/eigen3/cmake/Eigen3Targets.cmake +106 -0
- share/eigen3/cmake/UseEigen3.cmake +6 -0
- share/pkgconfig/eigen3.pc +9 -0
- pylibsparseir/libsparseir.0.4.2.dylib +0 -0
- pylibsparseir/libsparseir.0.dylib +0 -0
- pylibsparseir-0.1.0.dist-info/METADATA +0 -130
- pylibsparseir-0.1.0.dist-info/RECORD +0 -12
- pylibsparseir-0.1.0.dist-info/entry_points.txt +0 -2
- pylibsparseir-0.1.0.dist-info/top_level.txt +0 -1
|
@@ -0,0 +1,513 @@
|
|
|
1
|
+
// This file is part of Eigen, a lightweight C++ template library
|
|
2
|
+
// for linear algebra.
|
|
3
|
+
//
|
|
4
|
+
// Mehdi Goli Codeplay Software Ltd.
|
|
5
|
+
// Ralph Potter Codeplay Software Ltd.
|
|
6
|
+
// Luke Iwanski Codeplay Software Ltd.
|
|
7
|
+
// Contact: <eigen@codeplay.com>
|
|
8
|
+
//
|
|
9
|
+
// This Source Code Form is subject to the terms of the Mozilla
|
|
10
|
+
// Public License v. 2.0. If a copy of the MPL was not distributed
|
|
11
|
+
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
|
12
|
+
|
|
13
|
+
/*****************************************************************
|
|
14
|
+
* TensorScanSycl.h
|
|
15
|
+
*
|
|
16
|
+
* \brief:
|
|
17
|
+
* Tensor Scan Sycl implement the extend version of
|
|
18
|
+
* "Efficient parallel scan algorithms for GPUs." .for Tensor operations.
|
|
19
|
+
* The algorithm requires up to 3 stage (consequently 3 kernels) depending on
|
|
20
|
+
* the size of the tensor. In the first kernel (ScanKernelFunctor), each
|
|
21
|
+
* threads within the work-group individually reduces the allocated elements per
|
|
22
|
+
* thread in order to reduces the total number of blocks. In the next step all
|
|
23
|
+
* thread within the work-group will reduce the associated blocks into the
|
|
24
|
+
* temporary buffers. In the next kernel(ScanBlockKernelFunctor), the temporary
|
|
25
|
+
* buffer is given as an input and all the threads within a work-group scan and
|
|
26
|
+
* reduces the boundaries between the blocks (generated from the previous
|
|
27
|
+
* kernel). and write the data on the temporary buffer. If the second kernel is
|
|
28
|
+
* required, the third and final kerenl (ScanAdjustmentKernelFunctor) will
|
|
29
|
+
* adjust the final result into the output buffer.
|
|
30
|
+
* The original algorithm for the parallel prefix sum can be found here:
|
|
31
|
+
*
|
|
32
|
+
* Sengupta, Shubhabrata, Mark Harris, and Michael Garland. "Efficient parallel
|
|
33
|
+
* scan algorithms for GPUs." NVIDIA, Santa Clara, CA, Tech. Rep. NVR-2008-003
|
|
34
|
+
*1, no. 1 (2008): 1-17.
|
|
35
|
+
*****************************************************************/
|
|
36
|
+
|
|
37
|
+
#ifndef UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSOR_SYCL_SYCL_HPP
|
|
38
|
+
#define UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSOR_SYCL_SYCL_HPP
|
|
39
|
+
|
|
40
|
+
namespace Eigen {
|
|
41
|
+
namespace TensorSycl {
|
|
42
|
+
namespace internal {
|
|
43
|
+
|
|
44
|
+
#ifndef EIGEN_SYCL_MAX_GLOBAL_RANGE
|
|
45
|
+
#define EIGEN_SYCL_MAX_GLOBAL_RANGE (EIGEN_SYCL_LOCAL_THREAD_DIM0 * EIGEN_SYCL_LOCAL_THREAD_DIM1 * 4)
|
|
46
|
+
#endif
|
|
47
|
+
|
|
48
|
+
template <typename index_t>
|
|
49
|
+
struct ScanParameters {
|
|
50
|
+
// must be power of 2
|
|
51
|
+
static EIGEN_CONSTEXPR index_t ScanPerThread = 8;
|
|
52
|
+
const index_t total_size;
|
|
53
|
+
const index_t non_scan_size;
|
|
54
|
+
const index_t scan_size;
|
|
55
|
+
const index_t non_scan_stride;
|
|
56
|
+
const index_t scan_stride;
|
|
57
|
+
const index_t panel_threads;
|
|
58
|
+
const index_t group_threads;
|
|
59
|
+
const index_t block_threads;
|
|
60
|
+
const index_t elements_per_group;
|
|
61
|
+
const index_t elements_per_block;
|
|
62
|
+
const index_t loop_range;
|
|
63
|
+
|
|
64
|
+
ScanParameters(index_t total_size_, index_t non_scan_size_, index_t scan_size_, index_t non_scan_stride_,
|
|
65
|
+
index_t scan_stride_, index_t panel_threads_, index_t group_threads_, index_t block_threads_,
|
|
66
|
+
index_t elements_per_group_, index_t elements_per_block_, index_t loop_range_)
|
|
67
|
+
: total_size(total_size_),
|
|
68
|
+
non_scan_size(non_scan_size_),
|
|
69
|
+
scan_size(scan_size_),
|
|
70
|
+
non_scan_stride(non_scan_stride_),
|
|
71
|
+
scan_stride(scan_stride_),
|
|
72
|
+
panel_threads(panel_threads_),
|
|
73
|
+
group_threads(group_threads_),
|
|
74
|
+
block_threads(block_threads_),
|
|
75
|
+
elements_per_group(elements_per_group_),
|
|
76
|
+
elements_per_block(elements_per_block_),
|
|
77
|
+
loop_range(loop_range_) {}
|
|
78
|
+
};
|
|
79
|
+
|
|
80
|
+
enum class scan_step { first, second };
|
|
81
|
+
template <typename Evaluator, typename CoeffReturnType, typename OutAccessor, typename Op, typename Index,
|
|
82
|
+
scan_step stp>
|
|
83
|
+
struct ScanKernelFunctor {
|
|
84
|
+
typedef cl::sycl::accessor<CoeffReturnType, 1, cl::sycl::access::mode::read_write, cl::sycl::access::target::local>
|
|
85
|
+
LocalAccessor;
|
|
86
|
+
static EIGEN_CONSTEXPR int PacketSize = ScanParameters<Index>::ScanPerThread / 2;
|
|
87
|
+
|
|
88
|
+
LocalAccessor scratch;
|
|
89
|
+
Evaluator dev_eval;
|
|
90
|
+
OutAccessor out_accessor;
|
|
91
|
+
OutAccessor temp_accessor;
|
|
92
|
+
const ScanParameters<Index> scanParameters;
|
|
93
|
+
Op accumulator;
|
|
94
|
+
const bool inclusive;
|
|
95
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ScanKernelFunctor(LocalAccessor scratch_, const Evaluator dev_eval_,
|
|
96
|
+
OutAccessor out_accessor_, OutAccessor temp_accessor_,
|
|
97
|
+
const ScanParameters<Index> scanParameters_, Op accumulator_,
|
|
98
|
+
const bool inclusive_)
|
|
99
|
+
: scratch(scratch_),
|
|
100
|
+
dev_eval(dev_eval_),
|
|
101
|
+
out_accessor(out_accessor_),
|
|
102
|
+
temp_accessor(temp_accessor_),
|
|
103
|
+
scanParameters(scanParameters_),
|
|
104
|
+
accumulator(accumulator_),
|
|
105
|
+
inclusive(inclusive_) {}
|
|
106
|
+
|
|
107
|
+
template <scan_step sst = stp, typename Input>
|
|
108
|
+
typename ::Eigen::internal::enable_if<sst == scan_step::first, CoeffReturnType>::type EIGEN_DEVICE_FUNC
|
|
109
|
+
EIGEN_STRONG_INLINE
|
|
110
|
+
read(const Input &inpt, Index global_id) {
|
|
111
|
+
return inpt.coeff(global_id);
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
template <scan_step sst = stp, typename Input>
|
|
115
|
+
typename ::Eigen::internal::enable_if<sst != scan_step::first, CoeffReturnType>::type EIGEN_DEVICE_FUNC
|
|
116
|
+
EIGEN_STRONG_INLINE
|
|
117
|
+
read(const Input &inpt, Index global_id) {
|
|
118
|
+
return inpt[global_id];
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
template <scan_step sst = stp, typename InclusiveOp>
|
|
122
|
+
typename ::Eigen::internal::enable_if<sst == scan_step::first>::type EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
|
123
|
+
first_step_inclusive_Operation(InclusiveOp inclusive_op) {
|
|
124
|
+
inclusive_op();
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
template <scan_step sst = stp, typename InclusiveOp>
|
|
128
|
+
typename ::Eigen::internal::enable_if<sst != scan_step::first>::type EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
|
129
|
+
first_step_inclusive_Operation(InclusiveOp) {}
|
|
130
|
+
|
|
131
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void operator()(cl::sycl::nd_item<1> itemID) {
|
|
132
|
+
auto out_ptr = out_accessor.get_pointer();
|
|
133
|
+
auto tmp_ptr = temp_accessor.get_pointer();
|
|
134
|
+
auto scratch_ptr = scratch.get_pointer().get();
|
|
135
|
+
|
|
136
|
+
for (Index loop_offset = 0; loop_offset < scanParameters.loop_range; loop_offset++) {
|
|
137
|
+
Index data_offset = (itemID.get_global_id(0) + (itemID.get_global_range(0) * loop_offset));
|
|
138
|
+
Index tmp = data_offset % scanParameters.panel_threads;
|
|
139
|
+
const Index panel_id = data_offset / scanParameters.panel_threads;
|
|
140
|
+
const Index group_id = tmp / scanParameters.group_threads;
|
|
141
|
+
tmp = tmp % scanParameters.group_threads;
|
|
142
|
+
const Index block_id = tmp / scanParameters.block_threads;
|
|
143
|
+
const Index local_id = tmp % scanParameters.block_threads;
|
|
144
|
+
// we put one element per packet in scratch_mem
|
|
145
|
+
const Index scratch_stride = scanParameters.elements_per_block / PacketSize;
|
|
146
|
+
const Index scratch_offset = (itemID.get_local_id(0) / scanParameters.block_threads) * scratch_stride;
|
|
147
|
+
CoeffReturnType private_scan[ScanParameters<Index>::ScanPerThread];
|
|
148
|
+
CoeffReturnType inclusive_scan;
|
|
149
|
+
// the actual panel size is scan_size * non_scan_size.
|
|
150
|
+
// elements_per_panel is roundup to power of 2 for binary tree
|
|
151
|
+
const Index panel_offset = panel_id * scanParameters.scan_size * scanParameters.non_scan_size;
|
|
152
|
+
const Index group_offset = group_id * scanParameters.non_scan_stride;
|
|
153
|
+
// This will be effective when the size is bigger than elements_per_block
|
|
154
|
+
const Index block_offset = block_id * scanParameters.elements_per_block * scanParameters.scan_stride;
|
|
155
|
+
const Index thread_offset = (ScanParameters<Index>::ScanPerThread * local_id * scanParameters.scan_stride);
|
|
156
|
+
const Index global_offset = panel_offset + group_offset + block_offset + thread_offset;
|
|
157
|
+
Index next_elements = 0;
|
|
158
|
+
EIGEN_UNROLL_LOOP
|
|
159
|
+
for (int i = 0; i < ScanParameters<Index>::ScanPerThread; i++) {
|
|
160
|
+
Index global_id = global_offset + next_elements;
|
|
161
|
+
private_scan[i] = ((((block_id * scanParameters.elements_per_block) +
|
|
162
|
+
(ScanParameters<Index>::ScanPerThread * local_id) + i) < scanParameters.scan_size) &&
|
|
163
|
+
(global_id < scanParameters.total_size))
|
|
164
|
+
? read(dev_eval, global_id)
|
|
165
|
+
: accumulator.initialize();
|
|
166
|
+
next_elements += scanParameters.scan_stride;
|
|
167
|
+
}
|
|
168
|
+
first_step_inclusive_Operation([&]() EIGEN_DEVICE_FUNC {
|
|
169
|
+
if (inclusive) {
|
|
170
|
+
inclusive_scan = private_scan[ScanParameters<Index>::ScanPerThread - 1];
|
|
171
|
+
}
|
|
172
|
+
});
|
|
173
|
+
// This for loop must be 2
|
|
174
|
+
EIGEN_UNROLL_LOOP
|
|
175
|
+
for (int packetIndex = 0; packetIndex < ScanParameters<Index>::ScanPerThread; packetIndex += PacketSize) {
|
|
176
|
+
Index private_offset = 1;
|
|
177
|
+
// build sum in place up the tree
|
|
178
|
+
EIGEN_UNROLL_LOOP
|
|
179
|
+
for (Index d = PacketSize >> 1; d > 0; d >>= 1) {
|
|
180
|
+
EIGEN_UNROLL_LOOP
|
|
181
|
+
for (Index l = 0; l < d; l++) {
|
|
182
|
+
Index ai = private_offset * (2 * l + 1) - 1 + packetIndex;
|
|
183
|
+
Index bi = private_offset * (2 * l + 2) - 1 + packetIndex;
|
|
184
|
+
CoeffReturnType accum = accumulator.initialize();
|
|
185
|
+
accumulator.reduce(private_scan[ai], &accum);
|
|
186
|
+
accumulator.reduce(private_scan[bi], &accum);
|
|
187
|
+
private_scan[bi] = accumulator.finalize(accum);
|
|
188
|
+
}
|
|
189
|
+
private_offset *= 2;
|
|
190
|
+
}
|
|
191
|
+
scratch_ptr[2 * local_id + (packetIndex / PacketSize) + scratch_offset] =
|
|
192
|
+
private_scan[PacketSize - 1 + packetIndex];
|
|
193
|
+
private_scan[PacketSize - 1 + packetIndex] = accumulator.initialize();
|
|
194
|
+
// traverse down tree & build scan
|
|
195
|
+
EIGEN_UNROLL_LOOP
|
|
196
|
+
for (Index d = 1; d < PacketSize; d *= 2) {
|
|
197
|
+
private_offset >>= 1;
|
|
198
|
+
EIGEN_UNROLL_LOOP
|
|
199
|
+
for (Index l = 0; l < d; l++) {
|
|
200
|
+
Index ai = private_offset * (2 * l + 1) - 1 + packetIndex;
|
|
201
|
+
Index bi = private_offset * (2 * l + 2) - 1 + packetIndex;
|
|
202
|
+
CoeffReturnType accum = accumulator.initialize();
|
|
203
|
+
accumulator.reduce(private_scan[ai], &accum);
|
|
204
|
+
accumulator.reduce(private_scan[bi], &accum);
|
|
205
|
+
private_scan[ai] = private_scan[bi];
|
|
206
|
+
private_scan[bi] = accumulator.finalize(accum);
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
Index offset = 1;
|
|
212
|
+
// build sum in place up the tree
|
|
213
|
+
for (Index d = scratch_stride >> 1; d > 0; d >>= 1) {
|
|
214
|
+
// Synchronise
|
|
215
|
+
itemID.barrier(cl::sycl::access::fence_space::local_space);
|
|
216
|
+
if (local_id < d) {
|
|
217
|
+
Index ai = offset * (2 * local_id + 1) - 1 + scratch_offset;
|
|
218
|
+
Index bi = offset * (2 * local_id + 2) - 1 + scratch_offset;
|
|
219
|
+
CoeffReturnType accum = accumulator.initialize();
|
|
220
|
+
accumulator.reduce(scratch_ptr[ai], &accum);
|
|
221
|
+
accumulator.reduce(scratch_ptr[bi], &accum);
|
|
222
|
+
scratch_ptr[bi] = accumulator.finalize(accum);
|
|
223
|
+
}
|
|
224
|
+
offset *= 2;
|
|
225
|
+
}
|
|
226
|
+
// Synchronise
|
|
227
|
+
itemID.barrier(cl::sycl::access::fence_space::local_space);
|
|
228
|
+
// next step optimisation
|
|
229
|
+
if (local_id == 0) {
|
|
230
|
+
if (((scanParameters.elements_per_group / scanParameters.elements_per_block) > 1)) {
|
|
231
|
+
const Index temp_id = panel_id * (scanParameters.elements_per_group / scanParameters.elements_per_block) *
|
|
232
|
+
scanParameters.non_scan_size +
|
|
233
|
+
group_id * (scanParameters.elements_per_group / scanParameters.elements_per_block) +
|
|
234
|
+
block_id;
|
|
235
|
+
tmp_ptr[temp_id] = scratch_ptr[scratch_stride - 1 + scratch_offset];
|
|
236
|
+
}
|
|
237
|
+
// clear the last element
|
|
238
|
+
scratch_ptr[scratch_stride - 1 + scratch_offset] = accumulator.initialize();
|
|
239
|
+
}
|
|
240
|
+
// traverse down tree & build scan
|
|
241
|
+
for (Index d = 1; d < scratch_stride; d *= 2) {
|
|
242
|
+
offset >>= 1;
|
|
243
|
+
// Synchronise
|
|
244
|
+
itemID.barrier(cl::sycl::access::fence_space::local_space);
|
|
245
|
+
if (local_id < d) {
|
|
246
|
+
Index ai = offset * (2 * local_id + 1) - 1 + scratch_offset;
|
|
247
|
+
Index bi = offset * (2 * local_id + 2) - 1 + scratch_offset;
|
|
248
|
+
CoeffReturnType accum = accumulator.initialize();
|
|
249
|
+
accumulator.reduce(scratch_ptr[ai], &accum);
|
|
250
|
+
accumulator.reduce(scratch_ptr[bi], &accum);
|
|
251
|
+
scratch_ptr[ai] = scratch_ptr[bi];
|
|
252
|
+
scratch_ptr[bi] = accumulator.finalize(accum);
|
|
253
|
+
}
|
|
254
|
+
}
|
|
255
|
+
// Synchronise
|
|
256
|
+
itemID.barrier(cl::sycl::access::fence_space::local_space);
|
|
257
|
+
// This for loop must be 2
|
|
258
|
+
EIGEN_UNROLL_LOOP
|
|
259
|
+
for (int packetIndex = 0; packetIndex < ScanParameters<Index>::ScanPerThread; packetIndex += PacketSize) {
|
|
260
|
+
EIGEN_UNROLL_LOOP
|
|
261
|
+
for (Index i = 0; i < PacketSize; i++) {
|
|
262
|
+
CoeffReturnType accum = private_scan[packetIndex + i];
|
|
263
|
+
accumulator.reduce(scratch_ptr[2 * local_id + (packetIndex / PacketSize) + scratch_offset], &accum);
|
|
264
|
+
private_scan[packetIndex + i] = accumulator.finalize(accum);
|
|
265
|
+
}
|
|
266
|
+
}
|
|
267
|
+
first_step_inclusive_Operation([&]() EIGEN_DEVICE_FUNC {
|
|
268
|
+
if (inclusive) {
|
|
269
|
+
accumulator.reduce(private_scan[ScanParameters<Index>::ScanPerThread - 1], &inclusive_scan);
|
|
270
|
+
private_scan[0] = accumulator.finalize(inclusive_scan);
|
|
271
|
+
}
|
|
272
|
+
});
|
|
273
|
+
next_elements = 0;
|
|
274
|
+
// right the first set of private param
|
|
275
|
+
EIGEN_UNROLL_LOOP
|
|
276
|
+
for (Index i = 0; i < ScanParameters<Index>::ScanPerThread; i++) {
|
|
277
|
+
Index global_id = global_offset + next_elements;
|
|
278
|
+
if ((((block_id * scanParameters.elements_per_block) + (ScanParameters<Index>::ScanPerThread * local_id) + i) <
|
|
279
|
+
scanParameters.scan_size) &&
|
|
280
|
+
(global_id < scanParameters.total_size)) {
|
|
281
|
+
Index private_id = (i * !inclusive) + (((i + 1) % ScanParameters<Index>::ScanPerThread) * (inclusive));
|
|
282
|
+
out_ptr[global_id] = private_scan[private_id];
|
|
283
|
+
}
|
|
284
|
+
next_elements += scanParameters.scan_stride;
|
|
285
|
+
}
|
|
286
|
+
} // end for loop
|
|
287
|
+
}
|
|
288
|
+
};
|
|
289
|
+
|
|
290
|
+
template <typename CoeffReturnType, typename InAccessor, typename OutAccessor, typename Op, typename Index>
|
|
291
|
+
struct ScanAdjustmentKernelFunctor {
|
|
292
|
+
typedef cl::sycl::accessor<CoeffReturnType, 1, cl::sycl::access::mode::read_write, cl::sycl::access::target::local>
|
|
293
|
+
LocalAccessor;
|
|
294
|
+
static EIGEN_CONSTEXPR int PacketSize = ScanParameters<Index>::ScanPerThread / 2;
|
|
295
|
+
InAccessor in_accessor;
|
|
296
|
+
OutAccessor out_accessor;
|
|
297
|
+
const ScanParameters<Index> scanParameters;
|
|
298
|
+
Op accumulator;
|
|
299
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ScanAdjustmentKernelFunctor(LocalAccessor, InAccessor in_accessor_,
|
|
300
|
+
OutAccessor out_accessor_,
|
|
301
|
+
const ScanParameters<Index> scanParameters_,
|
|
302
|
+
Op accumulator_)
|
|
303
|
+
: in_accessor(in_accessor_),
|
|
304
|
+
out_accessor(out_accessor_),
|
|
305
|
+
scanParameters(scanParameters_),
|
|
306
|
+
accumulator(accumulator_) {}
|
|
307
|
+
|
|
308
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void operator()(cl::sycl::nd_item<1> itemID) {
|
|
309
|
+
auto in_ptr = in_accessor.get_pointer();
|
|
310
|
+
auto out_ptr = out_accessor.get_pointer();
|
|
311
|
+
|
|
312
|
+
for (Index loop_offset = 0; loop_offset < scanParameters.loop_range; loop_offset++) {
|
|
313
|
+
Index data_offset = (itemID.get_global_id(0) + (itemID.get_global_range(0) * loop_offset));
|
|
314
|
+
Index tmp = data_offset % scanParameters.panel_threads;
|
|
315
|
+
const Index panel_id = data_offset / scanParameters.panel_threads;
|
|
316
|
+
const Index group_id = tmp / scanParameters.group_threads;
|
|
317
|
+
tmp = tmp % scanParameters.group_threads;
|
|
318
|
+
const Index block_id = tmp / scanParameters.block_threads;
|
|
319
|
+
const Index local_id = tmp % scanParameters.block_threads;
|
|
320
|
+
|
|
321
|
+
// the actual panel size is scan_size * non_scan_size.
|
|
322
|
+
// elements_per_panel is roundup to power of 2 for binary tree
|
|
323
|
+
const Index panel_offset = panel_id * scanParameters.scan_size * scanParameters.non_scan_size;
|
|
324
|
+
const Index group_offset = group_id * scanParameters.non_scan_stride;
|
|
325
|
+
// This will be effective when the size is bigger than elements_per_block
|
|
326
|
+
const Index block_offset = block_id * scanParameters.elements_per_block * scanParameters.scan_stride;
|
|
327
|
+
const Index thread_offset = ScanParameters<Index>::ScanPerThread * local_id * scanParameters.scan_stride;
|
|
328
|
+
|
|
329
|
+
const Index global_offset = panel_offset + group_offset + block_offset + thread_offset;
|
|
330
|
+
const Index block_size = scanParameters.elements_per_group / scanParameters.elements_per_block;
|
|
331
|
+
const Index in_id = (panel_id * block_size * scanParameters.non_scan_size) + (group_id * block_size) + block_id;
|
|
332
|
+
CoeffReturnType adjust_val = in_ptr[in_id];
|
|
333
|
+
|
|
334
|
+
Index next_elements = 0;
|
|
335
|
+
EIGEN_UNROLL_LOOP
|
|
336
|
+
for (Index i = 0; i < ScanParameters<Index>::ScanPerThread; i++) {
|
|
337
|
+
Index global_id = global_offset + next_elements;
|
|
338
|
+
if ((((block_id * scanParameters.elements_per_block) + (ScanParameters<Index>::ScanPerThread * local_id) + i) <
|
|
339
|
+
scanParameters.scan_size) &&
|
|
340
|
+
(global_id < scanParameters.total_size)) {
|
|
341
|
+
CoeffReturnType accum = adjust_val;
|
|
342
|
+
accumulator.reduce(out_ptr[global_id], &accum);
|
|
343
|
+
out_ptr[global_id] = accumulator.finalize(accum);
|
|
344
|
+
}
|
|
345
|
+
next_elements += scanParameters.scan_stride;
|
|
346
|
+
}
|
|
347
|
+
}
|
|
348
|
+
}
|
|
349
|
+
};
|
|
350
|
+
|
|
351
|
+
template <typename Index>
|
|
352
|
+
struct ScanInfo {
|
|
353
|
+
const Index &total_size;
|
|
354
|
+
const Index &scan_size;
|
|
355
|
+
const Index &panel_size;
|
|
356
|
+
const Index &non_scan_size;
|
|
357
|
+
const Index &scan_stride;
|
|
358
|
+
const Index &non_scan_stride;
|
|
359
|
+
|
|
360
|
+
Index max_elements_per_block;
|
|
361
|
+
Index block_size;
|
|
362
|
+
Index panel_threads;
|
|
363
|
+
Index group_threads;
|
|
364
|
+
Index block_threads;
|
|
365
|
+
Index elements_per_group;
|
|
366
|
+
Index elements_per_block;
|
|
367
|
+
Index loop_range;
|
|
368
|
+
Index global_range;
|
|
369
|
+
Index local_range;
|
|
370
|
+
const Eigen::SyclDevice &dev;
|
|
371
|
+
EIGEN_STRONG_INLINE ScanInfo(const Index &total_size_, const Index &scan_size_, const Index &panel_size_,
|
|
372
|
+
const Index &non_scan_size_, const Index &scan_stride_, const Index &non_scan_stride_,
|
|
373
|
+
const Eigen::SyclDevice &dev_)
|
|
374
|
+
: total_size(total_size_),
|
|
375
|
+
scan_size(scan_size_),
|
|
376
|
+
panel_size(panel_size_),
|
|
377
|
+
non_scan_size(non_scan_size_),
|
|
378
|
+
scan_stride(scan_stride_),
|
|
379
|
+
non_scan_stride(non_scan_stride_),
|
|
380
|
+
dev(dev_) {
|
|
381
|
+
// must be power of 2
|
|
382
|
+
local_range = std::min(Index(dev.getNearestPowerOfTwoWorkGroupSize()),
|
|
383
|
+
Index(EIGEN_SYCL_LOCAL_THREAD_DIM0 * EIGEN_SYCL_LOCAL_THREAD_DIM1));
|
|
384
|
+
|
|
385
|
+
max_elements_per_block = local_range * ScanParameters<Index>::ScanPerThread;
|
|
386
|
+
|
|
387
|
+
elements_per_group =
|
|
388
|
+
dev.getPowerOfTwo(Index(roundUp(Index(scan_size), ScanParameters<Index>::ScanPerThread)), true);
|
|
389
|
+
const Index elements_per_panel = elements_per_group * non_scan_size;
|
|
390
|
+
elements_per_block = std::min(Index(elements_per_group), Index(max_elements_per_block));
|
|
391
|
+
panel_threads = elements_per_panel / ScanParameters<Index>::ScanPerThread;
|
|
392
|
+
group_threads = elements_per_group / ScanParameters<Index>::ScanPerThread;
|
|
393
|
+
block_threads = elements_per_block / ScanParameters<Index>::ScanPerThread;
|
|
394
|
+
block_size = elements_per_group / elements_per_block;
|
|
395
|
+
#ifdef EIGEN_SYCL_MAX_GLOBAL_RANGE
|
|
396
|
+
const Index max_threads = std::min(Index(panel_threads * panel_size), Index(EIGEN_SYCL_MAX_GLOBAL_RANGE));
|
|
397
|
+
#else
|
|
398
|
+
const Index max_threads = panel_threads * panel_size;
|
|
399
|
+
#endif
|
|
400
|
+
global_range = roundUp(max_threads, local_range);
|
|
401
|
+
loop_range = Index(
|
|
402
|
+
std::ceil(double(elements_per_panel * panel_size) / (global_range * ScanParameters<Index>::ScanPerThread)));
|
|
403
|
+
}
|
|
404
|
+
inline ScanParameters<Index> get_scan_parameter() {
|
|
405
|
+
return ScanParameters<Index>(total_size, non_scan_size, scan_size, non_scan_stride, scan_stride, panel_threads,
|
|
406
|
+
group_threads, block_threads, elements_per_group, elements_per_block, loop_range);
|
|
407
|
+
}
|
|
408
|
+
inline cl::sycl::nd_range<1> get_thread_range() {
|
|
409
|
+
return cl::sycl::nd_range<1>(cl::sycl::range<1>(global_range), cl::sycl::range<1>(local_range));
|
|
410
|
+
}
|
|
411
|
+
};
|
|
412
|
+
|
|
413
|
+
template <typename EvaluatorPointerType, typename CoeffReturnType, typename Reducer, typename Index>
|
|
414
|
+
struct SYCLAdjustBlockOffset {
|
|
415
|
+
EIGEN_STRONG_INLINE static void adjust_scan_block_offset(EvaluatorPointerType in_ptr, EvaluatorPointerType out_ptr,
|
|
416
|
+
Reducer &accumulator, const Index total_size,
|
|
417
|
+
const Index scan_size, const Index panel_size,
|
|
418
|
+
const Index non_scan_size, const Index scan_stride,
|
|
419
|
+
const Index non_scan_stride, const Eigen::SyclDevice &dev) {
|
|
420
|
+
auto scan_info =
|
|
421
|
+
ScanInfo<Index>(total_size, scan_size, panel_size, non_scan_size, scan_stride, non_scan_stride, dev);
|
|
422
|
+
|
|
423
|
+
typedef ScanAdjustmentKernelFunctor<CoeffReturnType, EvaluatorPointerType, EvaluatorPointerType, Reducer, Index>
|
|
424
|
+
AdjustFuctor;
|
|
425
|
+
dev.template unary_kernel_launcher<CoeffReturnType, AdjustFuctor>(in_ptr, out_ptr, scan_info.get_thread_range(),
|
|
426
|
+
scan_info.max_elements_per_block,
|
|
427
|
+
scan_info.get_scan_parameter(), accumulator);
|
|
428
|
+
}
|
|
429
|
+
};
|
|
430
|
+
|
|
431
|
+
template <typename CoeffReturnType, scan_step stp>
|
|
432
|
+
struct ScanLauncher_impl {
|
|
433
|
+
template <typename Input, typename EvaluatorPointerType, typename Reducer, typename Index>
|
|
434
|
+
EIGEN_STRONG_INLINE static void scan_block(Input in_ptr, EvaluatorPointerType out_ptr, Reducer &accumulator,
|
|
435
|
+
const Index total_size, const Index scan_size, const Index panel_size,
|
|
436
|
+
const Index non_scan_size, const Index scan_stride,
|
|
437
|
+
const Index non_scan_stride, const bool inclusive,
|
|
438
|
+
const Eigen::SyclDevice &dev) {
|
|
439
|
+
auto scan_info =
|
|
440
|
+
ScanInfo<Index>(total_size, scan_size, panel_size, non_scan_size, scan_stride, non_scan_stride, dev);
|
|
441
|
+
const Index temp_pointer_size = scan_info.block_size * non_scan_size * panel_size;
|
|
442
|
+
const Index scratch_size = scan_info.max_elements_per_block / (ScanParameters<Index>::ScanPerThread / 2);
|
|
443
|
+
CoeffReturnType *temp_pointer =
|
|
444
|
+
static_cast<CoeffReturnType *>(dev.allocate_temp(temp_pointer_size * sizeof(CoeffReturnType)));
|
|
445
|
+
EvaluatorPointerType tmp_global_accessor = dev.get(temp_pointer);
|
|
446
|
+
|
|
447
|
+
typedef ScanKernelFunctor<Input, CoeffReturnType, EvaluatorPointerType, Reducer, Index, stp> ScanFunctor;
|
|
448
|
+
dev.template binary_kernel_launcher<CoeffReturnType, ScanFunctor>(
|
|
449
|
+
in_ptr, out_ptr, tmp_global_accessor, scan_info.get_thread_range(), scratch_size,
|
|
450
|
+
scan_info.get_scan_parameter(), accumulator, inclusive);
|
|
451
|
+
|
|
452
|
+
if (scan_info.block_size > 1) {
|
|
453
|
+
ScanLauncher_impl<CoeffReturnType, scan_step::second>::scan_block(
|
|
454
|
+
tmp_global_accessor, tmp_global_accessor, accumulator, temp_pointer_size, scan_info.block_size, panel_size,
|
|
455
|
+
non_scan_size, Index(1), scan_info.block_size, false, dev);
|
|
456
|
+
|
|
457
|
+
SYCLAdjustBlockOffset<EvaluatorPointerType, CoeffReturnType, Reducer, Index>::adjust_scan_block_offset(
|
|
458
|
+
tmp_global_accessor, out_ptr, accumulator, total_size, scan_size, panel_size, non_scan_size, scan_stride,
|
|
459
|
+
non_scan_stride, dev);
|
|
460
|
+
}
|
|
461
|
+
dev.deallocate_temp(temp_pointer);
|
|
462
|
+
}
|
|
463
|
+
};
|
|
464
|
+
|
|
465
|
+
} // namespace internal
|
|
466
|
+
} // namespace TensorSycl
|
|
467
|
+
namespace internal {
|
|
468
|
+
template <typename Self, typename Reducer, bool vectorize>
|
|
469
|
+
struct ScanLauncher<Self, Reducer, Eigen::SyclDevice, vectorize> {
|
|
470
|
+
typedef typename Self::Index Index;
|
|
471
|
+
typedef typename Self::CoeffReturnType CoeffReturnType;
|
|
472
|
+
typedef typename Self::Storage Storage;
|
|
473
|
+
typedef typename Self::EvaluatorPointerType EvaluatorPointerType;
|
|
474
|
+
void operator()(Self &self, EvaluatorPointerType data) {
|
|
475
|
+
const Index total_size = internal::array_prod(self.dimensions());
|
|
476
|
+
const Index scan_size = self.size();
|
|
477
|
+
const Index scan_stride = self.stride();
|
|
478
|
+
// this is the scan op (can be sum or ...)
|
|
479
|
+
auto accumulator = self.accumulator();
|
|
480
|
+
auto inclusive = !self.exclusive();
|
|
481
|
+
auto consume_dim = self.consume_dim();
|
|
482
|
+
auto dev = self.device();
|
|
483
|
+
|
|
484
|
+
auto dims = self.inner().dimensions();
|
|
485
|
+
|
|
486
|
+
Index non_scan_size = 1;
|
|
487
|
+
Index panel_size = 1;
|
|
488
|
+
if (static_cast<int>(Self::Layout) == static_cast<int>(ColMajor)) {
|
|
489
|
+
for (int i = 0; i < consume_dim; i++) {
|
|
490
|
+
non_scan_size *= dims[i];
|
|
491
|
+
}
|
|
492
|
+
for (int i = consume_dim + 1; i < Self::NumDims; i++) {
|
|
493
|
+
panel_size *= dims[i];
|
|
494
|
+
}
|
|
495
|
+
} else {
|
|
496
|
+
for (int i = Self::NumDims - 1; i > consume_dim; i--) {
|
|
497
|
+
non_scan_size *= dims[i];
|
|
498
|
+
}
|
|
499
|
+
for (int i = consume_dim - 1; i >= 0; i--) {
|
|
500
|
+
panel_size *= dims[i];
|
|
501
|
+
}
|
|
502
|
+
}
|
|
503
|
+
const Index non_scan_stride = (scan_stride > 1) ? 1 : scan_size;
|
|
504
|
+
auto eval_impl = self.inner();
|
|
505
|
+
TensorSycl::internal::ScanLauncher_impl<CoeffReturnType, TensorSycl::internal::scan_step::first>::scan_block(
|
|
506
|
+
eval_impl, data, accumulator, total_size, scan_size, panel_size, non_scan_size, scan_stride, non_scan_stride,
|
|
507
|
+
inclusive, dev);
|
|
508
|
+
}
|
|
509
|
+
};
|
|
510
|
+
} // namespace internal
|
|
511
|
+
} // namespace Eigen
|
|
512
|
+
|
|
513
|
+
#endif // UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSOR_SYCL_SYCL_HPP
|