pyvale 2026.1.1__cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- include/eigen3/Eigen/Cholesky +45 -0
- include/eigen3/Eigen/CholmodSupport +48 -0
- include/eigen3/Eigen/Core +384 -0
- include/eigen3/Eigen/Dense +7 -0
- include/eigen3/Eigen/Eigen +2 -0
- include/eigen3/Eigen/Eigenvalues +60 -0
- include/eigen3/Eigen/Geometry +59 -0
- include/eigen3/Eigen/Householder +29 -0
- include/eigen3/Eigen/IterativeLinearSolvers +48 -0
- include/eigen3/Eigen/Jacobi +32 -0
- include/eigen3/Eigen/KLUSupport +41 -0
- include/eigen3/Eigen/LU +47 -0
- include/eigen3/Eigen/MetisSupport +35 -0
- include/eigen3/Eigen/OrderingMethods +70 -0
- include/eigen3/Eigen/PaStiXSupport +49 -0
- include/eigen3/Eigen/PardisoSupport +35 -0
- include/eigen3/Eigen/QR +50 -0
- include/eigen3/Eigen/QtAlignedMalloc +39 -0
- include/eigen3/Eigen/SPQRSupport +34 -0
- include/eigen3/Eigen/SVD +50 -0
- include/eigen3/Eigen/Sparse +34 -0
- include/eigen3/Eigen/SparseCholesky +37 -0
- include/eigen3/Eigen/SparseCore +69 -0
- include/eigen3/Eigen/SparseLU +50 -0
- include/eigen3/Eigen/SparseQR +36 -0
- include/eigen3/Eigen/StdDeque +27 -0
- include/eigen3/Eigen/StdList +26 -0
- include/eigen3/Eigen/StdVector +27 -0
- include/eigen3/Eigen/SuperLUSupport +64 -0
- include/eigen3/Eigen/UmfPackSupport +40 -0
- include/eigen3/Eigen/src/Cholesky/LDLT.h +688 -0
- include/eigen3/Eigen/src/Cholesky/LLT.h +558 -0
- include/eigen3/Eigen/src/Cholesky/LLT_LAPACKE.h +99 -0
- include/eigen3/Eigen/src/CholmodSupport/CholmodSupport.h +682 -0
- include/eigen3/Eigen/src/Core/ArithmeticSequence.h +413 -0
- include/eigen3/Eigen/src/Core/Array.h +417 -0
- include/eigen3/Eigen/src/Core/ArrayBase.h +226 -0
- include/eigen3/Eigen/src/Core/ArrayWrapper.h +209 -0
- include/eigen3/Eigen/src/Core/Assign.h +90 -0
- include/eigen3/Eigen/src/Core/AssignEvaluator.h +1010 -0
- include/eigen3/Eigen/src/Core/Assign_MKL.h +178 -0
- include/eigen3/Eigen/src/Core/BandMatrix.h +353 -0
- include/eigen3/Eigen/src/Core/Block.h +448 -0
- include/eigen3/Eigen/src/Core/BooleanRedux.h +162 -0
- include/eigen3/Eigen/src/Core/CommaInitializer.h +164 -0
- include/eigen3/Eigen/src/Core/ConditionEstimator.h +175 -0
- include/eigen3/Eigen/src/Core/CoreEvaluators.h +1741 -0
- include/eigen3/Eigen/src/Core/CoreIterators.h +132 -0
- include/eigen3/Eigen/src/Core/CwiseBinaryOp.h +183 -0
- include/eigen3/Eigen/src/Core/CwiseNullaryOp.h +1001 -0
- include/eigen3/Eigen/src/Core/CwiseTernaryOp.h +197 -0
- include/eigen3/Eigen/src/Core/CwiseUnaryOp.h +103 -0
- include/eigen3/Eigen/src/Core/CwiseUnaryView.h +132 -0
- include/eigen3/Eigen/src/Core/DenseBase.h +701 -0
- include/eigen3/Eigen/src/Core/DenseCoeffsBase.h +685 -0
- include/eigen3/Eigen/src/Core/DenseStorage.h +652 -0
- include/eigen3/Eigen/src/Core/Diagonal.h +258 -0
- include/eigen3/Eigen/src/Core/DiagonalMatrix.h +391 -0
- include/eigen3/Eigen/src/Core/DiagonalProduct.h +28 -0
- include/eigen3/Eigen/src/Core/Dot.h +318 -0
- include/eigen3/Eigen/src/Core/EigenBase.h +160 -0
- include/eigen3/Eigen/src/Core/ForceAlignedAccess.h +150 -0
- include/eigen3/Eigen/src/Core/Fuzzy.h +155 -0
- include/eigen3/Eigen/src/Core/GeneralProduct.h +465 -0
- include/eigen3/Eigen/src/Core/GenericPacketMath.h +1040 -0
- include/eigen3/Eigen/src/Core/GlobalFunctions.h +194 -0
- include/eigen3/Eigen/src/Core/IO.h +258 -0
- include/eigen3/Eigen/src/Core/IndexedView.h +237 -0
- include/eigen3/Eigen/src/Core/Inverse.h +117 -0
- include/eigen3/Eigen/src/Core/Map.h +171 -0
- include/eigen3/Eigen/src/Core/MapBase.h +310 -0
- include/eigen3/Eigen/src/Core/MathFunctions.h +2057 -0
- include/eigen3/Eigen/src/Core/MathFunctionsImpl.h +200 -0
- include/eigen3/Eigen/src/Core/Matrix.h +565 -0
- include/eigen3/Eigen/src/Core/MatrixBase.h +547 -0
- include/eigen3/Eigen/src/Core/NestByValue.h +85 -0
- include/eigen3/Eigen/src/Core/NoAlias.h +109 -0
- include/eigen3/Eigen/src/Core/NumTraits.h +335 -0
- include/eigen3/Eigen/src/Core/PartialReduxEvaluator.h +232 -0
- include/eigen3/Eigen/src/Core/PermutationMatrix.h +605 -0
- include/eigen3/Eigen/src/Core/PlainObjectBase.h +1128 -0
- include/eigen3/Eigen/src/Core/Product.h +191 -0
- include/eigen3/Eigen/src/Core/ProductEvaluators.h +1179 -0
- include/eigen3/Eigen/src/Core/Random.h +218 -0
- include/eigen3/Eigen/src/Core/Redux.h +515 -0
- include/eigen3/Eigen/src/Core/Ref.h +381 -0
- include/eigen3/Eigen/src/Core/Replicate.h +142 -0
- include/eigen3/Eigen/src/Core/Reshaped.h +454 -0
- include/eigen3/Eigen/src/Core/ReturnByValue.h +119 -0
- include/eigen3/Eigen/src/Core/Reverse.h +217 -0
- include/eigen3/Eigen/src/Core/Select.h +164 -0
- include/eigen3/Eigen/src/Core/SelfAdjointView.h +365 -0
- include/eigen3/Eigen/src/Core/SelfCwiseBinaryOp.h +47 -0
- include/eigen3/Eigen/src/Core/Solve.h +188 -0
- include/eigen3/Eigen/src/Core/SolveTriangular.h +235 -0
- include/eigen3/Eigen/src/Core/SolverBase.h +168 -0
- include/eigen3/Eigen/src/Core/StableNorm.h +251 -0
- include/eigen3/Eigen/src/Core/StlIterators.h +463 -0
- include/eigen3/Eigen/src/Core/Stride.h +116 -0
- include/eigen3/Eigen/src/Core/Swap.h +68 -0
- include/eigen3/Eigen/src/Core/Transpose.h +464 -0
- include/eigen3/Eigen/src/Core/Transpositions.h +386 -0
- include/eigen3/Eigen/src/Core/TriangularMatrix.h +1001 -0
- include/eigen3/Eigen/src/Core/VectorBlock.h +96 -0
- include/eigen3/Eigen/src/Core/VectorwiseOp.h +784 -0
- include/eigen3/Eigen/src/Core/Visitor.h +381 -0
- include/eigen3/Eigen/src/Core/arch/AVX/Complex.h +372 -0
- include/eigen3/Eigen/src/Core/arch/AVX/MathFunctions.h +228 -0
- include/eigen3/Eigen/src/Core/arch/AVX/PacketMath.h +1574 -0
- include/eigen3/Eigen/src/Core/arch/AVX/TypeCasting.h +115 -0
- include/eigen3/Eigen/src/Core/arch/AVX512/Complex.h +422 -0
- include/eigen3/Eigen/src/Core/arch/AVX512/MathFunctions.h +362 -0
- include/eigen3/Eigen/src/Core/arch/AVX512/PacketMath.h +2303 -0
- include/eigen3/Eigen/src/Core/arch/AVX512/TypeCasting.h +89 -0
- include/eigen3/Eigen/src/Core/arch/AltiVec/Complex.h +417 -0
- include/eigen3/Eigen/src/Core/arch/AltiVec/MathFunctions.h +90 -0
- include/eigen3/Eigen/src/Core/arch/AltiVec/MatrixProduct.h +2937 -0
- include/eigen3/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h +221 -0
- include/eigen3/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h +629 -0
- include/eigen3/Eigen/src/Core/arch/AltiVec/PacketMath.h +2711 -0
- include/eigen3/Eigen/src/Core/arch/CUDA/Complex.h +258 -0
- include/eigen3/Eigen/src/Core/arch/Default/BFloat16.h +700 -0
- include/eigen3/Eigen/src/Core/arch/Default/ConjHelper.h +117 -0
- include/eigen3/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +1649 -0
- include/eigen3/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +110 -0
- include/eigen3/Eigen/src/Core/arch/Default/Half.h +942 -0
- include/eigen3/Eigen/src/Core/arch/Default/Settings.h +49 -0
- include/eigen3/Eigen/src/Core/arch/Default/TypeCasting.h +120 -0
- include/eigen3/Eigen/src/Core/arch/GPU/MathFunctions.h +103 -0
- include/eigen3/Eigen/src/Core/arch/GPU/PacketMath.h +1685 -0
- include/eigen3/Eigen/src/Core/arch/GPU/TypeCasting.h +80 -0
- include/eigen3/Eigen/src/Core/arch/HIP/hcc/math_constants.h +23 -0
- include/eigen3/Eigen/src/Core/arch/MSA/Complex.h +648 -0
- include/eigen3/Eigen/src/Core/arch/MSA/MathFunctions.h +387 -0
- include/eigen3/Eigen/src/Core/arch/MSA/PacketMath.h +1233 -0
- include/eigen3/Eigen/src/Core/arch/NEON/Complex.h +584 -0
- include/eigen3/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h +183 -0
- include/eigen3/Eigen/src/Core/arch/NEON/MathFunctions.h +75 -0
- include/eigen3/Eigen/src/Core/arch/NEON/PacketMath.h +4587 -0
- include/eigen3/Eigen/src/Core/arch/NEON/TypeCasting.h +1419 -0
- include/eigen3/Eigen/src/Core/arch/SSE/Complex.h +351 -0
- include/eigen3/Eigen/src/Core/arch/SSE/MathFunctions.h +199 -0
- include/eigen3/Eigen/src/Core/arch/SSE/PacketMath.h +1505 -0
- include/eigen3/Eigen/src/Core/arch/SSE/TypeCasting.h +142 -0
- include/eigen3/Eigen/src/Core/arch/SVE/MathFunctions.h +44 -0
- include/eigen3/Eigen/src/Core/arch/SVE/PacketMath.h +752 -0
- include/eigen3/Eigen/src/Core/arch/SVE/TypeCasting.h +49 -0
- include/eigen3/Eigen/src/Core/arch/SYCL/InteropHeaders.h +232 -0
- include/eigen3/Eigen/src/Core/arch/SYCL/MathFunctions.h +301 -0
- include/eigen3/Eigen/src/Core/arch/SYCL/PacketMath.h +670 -0
- include/eigen3/Eigen/src/Core/arch/SYCL/SyclMemoryModel.h +694 -0
- include/eigen3/Eigen/src/Core/arch/SYCL/TypeCasting.h +85 -0
- include/eigen3/Eigen/src/Core/arch/ZVector/Complex.h +426 -0
- include/eigen3/Eigen/src/Core/arch/ZVector/MathFunctions.h +233 -0
- include/eigen3/Eigen/src/Core/arch/ZVector/PacketMath.h +1060 -0
- include/eigen3/Eigen/src/Core/functors/AssignmentFunctors.h +177 -0
- include/eigen3/Eigen/src/Core/functors/BinaryFunctors.h +541 -0
- include/eigen3/Eigen/src/Core/functors/NullaryFunctors.h +189 -0
- include/eigen3/Eigen/src/Core/functors/StlFunctors.h +166 -0
- include/eigen3/Eigen/src/Core/functors/TernaryFunctors.h +25 -0
- include/eigen3/Eigen/src/Core/functors/UnaryFunctors.h +1131 -0
- include/eigen3/Eigen/src/Core/products/GeneralBlockPanelKernel.h +2645 -0
- include/eigen3/Eigen/src/Core/products/GeneralMatrixMatrix.h +517 -0
- include/eigen3/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +317 -0
- include/eigen3/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +145 -0
- include/eigen3/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h +124 -0
- include/eigen3/Eigen/src/Core/products/GeneralMatrixVector.h +518 -0
- include/eigen3/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h +136 -0
- include/eigen3/Eigen/src/Core/products/Parallelizer.h +180 -0
- include/eigen3/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +544 -0
- include/eigen3/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h +295 -0
- include/eigen3/Eigen/src/Core/products/SelfadjointMatrixVector.h +262 -0
- include/eigen3/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h +118 -0
- include/eigen3/Eigen/src/Core/products/SelfadjointProduct.h +133 -0
- include/eigen3/Eigen/src/Core/products/SelfadjointRank2Update.h +94 -0
- include/eigen3/Eigen/src/Core/products/TriangularMatrixMatrix.h +472 -0
- include/eigen3/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h +317 -0
- include/eigen3/Eigen/src/Core/products/TriangularMatrixVector.h +350 -0
- include/eigen3/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h +255 -0
- include/eigen3/Eigen/src/Core/products/TriangularSolverMatrix.h +337 -0
- include/eigen3/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h +167 -0
- include/eigen3/Eigen/src/Core/products/TriangularSolverVector.h +148 -0
- include/eigen3/Eigen/src/Core/util/BlasUtil.h +583 -0
- include/eigen3/Eigen/src/Core/util/ConfigureVectorization.h +512 -0
- include/eigen3/Eigen/src/Core/util/Constants.h +563 -0
- include/eigen3/Eigen/src/Core/util/DisableStupidWarnings.h +106 -0
- include/eigen3/Eigen/src/Core/util/ForwardDeclarations.h +322 -0
- include/eigen3/Eigen/src/Core/util/IndexedViewHelper.h +186 -0
- include/eigen3/Eigen/src/Core/util/IntegralConstant.h +272 -0
- include/eigen3/Eigen/src/Core/util/MKL_support.h +137 -0
- include/eigen3/Eigen/src/Core/util/Macros.h +1464 -0
- include/eigen3/Eigen/src/Core/util/Memory.h +1163 -0
- include/eigen3/Eigen/src/Core/util/Meta.h +812 -0
- include/eigen3/Eigen/src/Core/util/NonMPL2.h +3 -0
- include/eigen3/Eigen/src/Core/util/ReenableStupidWarnings.h +31 -0
- include/eigen3/Eigen/src/Core/util/ReshapedHelper.h +51 -0
- include/eigen3/Eigen/src/Core/util/StaticAssert.h +221 -0
- include/eigen3/Eigen/src/Core/util/SymbolicIndex.h +293 -0
- include/eigen3/Eigen/src/Core/util/XprHelper.h +856 -0
- include/eigen3/Eigen/src/Eigenvalues/ComplexEigenSolver.h +346 -0
- include/eigen3/Eigen/src/Eigenvalues/ComplexSchur.h +462 -0
- include/eigen3/Eigen/src/Eigenvalues/ComplexSchur_LAPACKE.h +91 -0
- include/eigen3/Eigen/src/Eigenvalues/EigenSolver.h +622 -0
- include/eigen3/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h +418 -0
- include/eigen3/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +226 -0
- include/eigen3/Eigen/src/Eigenvalues/HessenbergDecomposition.h +374 -0
- include/eigen3/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +158 -0
- include/eigen3/Eigen/src/Eigenvalues/RealQZ.h +657 -0
- include/eigen3/Eigen/src/Eigenvalues/RealSchur.h +558 -0
- include/eigen3/Eigen/src/Eigenvalues/RealSchur_LAPACKE.h +77 -0
- include/eigen3/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +904 -0
- include/eigen3/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h +87 -0
- include/eigen3/Eigen/src/Eigenvalues/Tridiagonalization.h +561 -0
- include/eigen3/Eigen/src/Geometry/AlignedBox.h +486 -0
- include/eigen3/Eigen/src/Geometry/AngleAxis.h +247 -0
- include/eigen3/Eigen/src/Geometry/EulerAngles.h +114 -0
- include/eigen3/Eigen/src/Geometry/Homogeneous.h +501 -0
- include/eigen3/Eigen/src/Geometry/Hyperplane.h +282 -0
- include/eigen3/Eigen/src/Geometry/OrthoMethods.h +235 -0
- include/eigen3/Eigen/src/Geometry/ParametrizedLine.h +232 -0
- include/eigen3/Eigen/src/Geometry/Quaternion.h +870 -0
- include/eigen3/Eigen/src/Geometry/Rotation2D.h +199 -0
- include/eigen3/Eigen/src/Geometry/RotationBase.h +206 -0
- include/eigen3/Eigen/src/Geometry/Scaling.h +188 -0
- include/eigen3/Eigen/src/Geometry/Transform.h +1563 -0
- include/eigen3/Eigen/src/Geometry/Translation.h +202 -0
- include/eigen3/Eigen/src/Geometry/Umeyama.h +166 -0
- include/eigen3/Eigen/src/Geometry/arch/Geometry_SIMD.h +168 -0
- include/eigen3/Eigen/src/Householder/BlockHouseholder.h +110 -0
- include/eigen3/Eigen/src/Householder/Householder.h +176 -0
- include/eigen3/Eigen/src/Householder/HouseholderSequence.h +545 -0
- include/eigen3/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +226 -0
- include/eigen3/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +212 -0
- include/eigen3/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +229 -0
- include/eigen3/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +394 -0
- include/eigen3/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +453 -0
- include/eigen3/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +444 -0
- include/eigen3/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +198 -0
- include/eigen3/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +117 -0
- include/eigen3/Eigen/src/Jacobi/Jacobi.h +483 -0
- include/eigen3/Eigen/src/KLUSupport/KLUSupport.h +358 -0
- include/eigen3/Eigen/src/LU/Determinant.h +117 -0
- include/eigen3/Eigen/src/LU/FullPivLU.h +877 -0
- include/eigen3/Eigen/src/LU/InverseImpl.h +432 -0
- include/eigen3/Eigen/src/LU/PartialPivLU.h +624 -0
- include/eigen3/Eigen/src/LU/PartialPivLU_LAPACKE.h +83 -0
- include/eigen3/Eigen/src/LU/arch/InverseSize4.h +351 -0
- include/eigen3/Eigen/src/MetisSupport/MetisSupport.h +137 -0
- include/eigen3/Eigen/src/OrderingMethods/Amd.h +435 -0
- include/eigen3/Eigen/src/OrderingMethods/Eigen_Colamd.h +1863 -0
- include/eigen3/Eigen/src/OrderingMethods/Ordering.h +153 -0
- include/eigen3/Eigen/src/PaStiXSupport/PaStiXSupport.h +678 -0
- include/eigen3/Eigen/src/PardisoSupport/PardisoSupport.h +545 -0
- include/eigen3/Eigen/src/QR/ColPivHouseholderQR.h +674 -0
- include/eigen3/Eigen/src/QR/ColPivHouseholderQR_LAPACKE.h +97 -0
- include/eigen3/Eigen/src/QR/CompleteOrthogonalDecomposition.h +635 -0
- include/eigen3/Eigen/src/QR/FullPivHouseholderQR.h +713 -0
- include/eigen3/Eigen/src/QR/HouseholderQR.h +434 -0
- include/eigen3/Eigen/src/QR/HouseholderQR_LAPACKE.h +68 -0
- include/eigen3/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +335 -0
- include/eigen3/Eigen/src/SVD/BDCSVD.h +1366 -0
- include/eigen3/Eigen/src/SVD/JacobiSVD.h +812 -0
- include/eigen3/Eigen/src/SVD/JacobiSVD_LAPACKE.h +91 -0
- include/eigen3/Eigen/src/SVD/SVDBase.h +376 -0
- include/eigen3/Eigen/src/SVD/UpperBidiagonalization.h +414 -0
- include/eigen3/Eigen/src/SparseCholesky/SimplicialCholesky.h +697 -0
- include/eigen3/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +174 -0
- include/eigen3/Eigen/src/SparseCore/AmbiVector.h +378 -0
- include/eigen3/Eigen/src/SparseCore/CompressedStorage.h +274 -0
- include/eigen3/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +352 -0
- include/eigen3/Eigen/src/SparseCore/MappedSparseMatrix.h +67 -0
- include/eigen3/Eigen/src/SparseCore/SparseAssign.h +270 -0
- include/eigen3/Eigen/src/SparseCore/SparseBlock.h +571 -0
- include/eigen3/Eigen/src/SparseCore/SparseColEtree.h +206 -0
- include/eigen3/Eigen/src/SparseCore/SparseCompressedBase.h +370 -0
- include/eigen3/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +722 -0
- include/eigen3/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +150 -0
- include/eigen3/Eigen/src/SparseCore/SparseDenseProduct.h +342 -0
- include/eigen3/Eigen/src/SparseCore/SparseDiagonalProduct.h +138 -0
- include/eigen3/Eigen/src/SparseCore/SparseDot.h +98 -0
- include/eigen3/Eigen/src/SparseCore/SparseFuzzy.h +29 -0
- include/eigen3/Eigen/src/SparseCore/SparseMap.h +305 -0
- include/eigen3/Eigen/src/SparseCore/SparseMatrix.h +1518 -0
- include/eigen3/Eigen/src/SparseCore/SparseMatrixBase.h +398 -0
- include/eigen3/Eigen/src/SparseCore/SparsePermutation.h +178 -0
- include/eigen3/Eigen/src/SparseCore/SparseProduct.h +181 -0
- include/eigen3/Eigen/src/SparseCore/SparseRedux.h +49 -0
- include/eigen3/Eigen/src/SparseCore/SparseRef.h +397 -0
- include/eigen3/Eigen/src/SparseCore/SparseSelfAdjointView.h +659 -0
- include/eigen3/Eigen/src/SparseCore/SparseSolverBase.h +124 -0
- include/eigen3/Eigen/src/SparseCore/SparseSparseProductWithPruning.h +198 -0
- include/eigen3/Eigen/src/SparseCore/SparseTranspose.h +92 -0
- include/eigen3/Eigen/src/SparseCore/SparseTriangularView.h +189 -0
- include/eigen3/Eigen/src/SparseCore/SparseUtil.h +186 -0
- include/eigen3/Eigen/src/SparseCore/SparseVector.h +478 -0
- include/eigen3/Eigen/src/SparseCore/SparseView.h +254 -0
- include/eigen3/Eigen/src/SparseCore/TriangularSolver.h +315 -0
- include/eigen3/Eigen/src/SparseLU/SparseLU.h +923 -0
- include/eigen3/Eigen/src/SparseLU/SparseLUImpl.h +66 -0
- include/eigen3/Eigen/src/SparseLU/SparseLU_Memory.h +226 -0
- include/eigen3/Eigen/src/SparseLU/SparseLU_Structs.h +110 -0
- include/eigen3/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +375 -0
- include/eigen3/Eigen/src/SparseLU/SparseLU_Utils.h +80 -0
- include/eigen3/Eigen/src/SparseLU/SparseLU_column_bmod.h +181 -0
- include/eigen3/Eigen/src/SparseLU/SparseLU_column_dfs.h +179 -0
- include/eigen3/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h +107 -0
- include/eigen3/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +280 -0
- include/eigen3/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h +126 -0
- include/eigen3/Eigen/src/SparseLU/SparseLU_kernel_bmod.h +130 -0
- include/eigen3/Eigen/src/SparseLU/SparseLU_panel_bmod.h +223 -0
- include/eigen3/Eigen/src/SparseLU/SparseLU_panel_dfs.h +258 -0
- include/eigen3/Eigen/src/SparseLU/SparseLU_pivotL.h +137 -0
- include/eigen3/Eigen/src/SparseLU/SparseLU_pruneL.h +136 -0
- include/eigen3/Eigen/src/SparseLU/SparseLU_relax_snode.h +83 -0
- include/eigen3/Eigen/src/SparseQR/SparseQR.h +758 -0
- include/eigen3/Eigen/src/StlSupport/StdDeque.h +116 -0
- include/eigen3/Eigen/src/StlSupport/StdList.h +106 -0
- include/eigen3/Eigen/src/StlSupport/StdVector.h +131 -0
- include/eigen3/Eigen/src/StlSupport/details.h +84 -0
- include/eigen3/Eigen/src/SuperLUSupport/SuperLUSupport.h +1025 -0
- include/eigen3/Eigen/src/UmfPackSupport/UmfPackSupport.h +642 -0
- include/eigen3/Eigen/src/misc/Image.h +82 -0
- include/eigen3/Eigen/src/misc/Kernel.h +79 -0
- include/eigen3/Eigen/src/misc/RealSvd2x2.h +55 -0
- include/eigen3/Eigen/src/misc/blas.h +440 -0
- include/eigen3/Eigen/src/misc/lapack.h +152 -0
- include/eigen3/Eigen/src/misc/lapacke.h +16292 -0
- include/eigen3/Eigen/src/misc/lapacke_mangling.h +17 -0
- include/eigen3/Eigen/src/plugins/ArrayCwiseBinaryOps.h +358 -0
- include/eigen3/Eigen/src/plugins/ArrayCwiseUnaryOps.h +696 -0
- include/eigen3/Eigen/src/plugins/BlockMethods.h +1442 -0
- include/eigen3/Eigen/src/plugins/CommonCwiseBinaryOps.h +115 -0
- include/eigen3/Eigen/src/plugins/CommonCwiseUnaryOps.h +177 -0
- include/eigen3/Eigen/src/plugins/IndexedViewMethods.h +262 -0
- include/eigen3/Eigen/src/plugins/MatrixCwiseBinaryOps.h +152 -0
- include/eigen3/Eigen/src/plugins/MatrixCwiseUnaryOps.h +95 -0
- include/eigen3/Eigen/src/plugins/ReshapedMethods.h +149 -0
- include/eigen3/signature_of_eigen3_matrix_library +1 -0
- include/eigen3/unsupported/Eigen/AdolcForward +159 -0
- include/eigen3/unsupported/Eigen/AlignedVector3 +234 -0
- include/eigen3/unsupported/Eigen/ArpackSupport +30 -0
- include/eigen3/unsupported/Eigen/AutoDiff +46 -0
- include/eigen3/unsupported/Eigen/BVH +95 -0
- include/eigen3/unsupported/Eigen/CXX11/Tensor +137 -0
- include/eigen3/unsupported/Eigen/CXX11/TensorSymmetry +42 -0
- include/eigen3/unsupported/Eigen/CXX11/ThreadPool +74 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/Tensor.h +554 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorArgMax.h +329 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h +247 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h +1176 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h +1559 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h +1093 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h +518 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h +377 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h +1023 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorContractionBlocking.h +73 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorContractionCuda.h +6 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorContractionGpu.h +1413 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorContractionMapper.h +575 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorContractionSycl.h +1650 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h +1679 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h +456 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h +1132 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorConvolutionSycl.h +544 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorCostModel.h +214 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h +347 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h +137 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h +6 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h +104 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceGpu.h +389 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h +1048 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h +409 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDimensionList.h +236 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h +490 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h +236 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h +983 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h +703 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h +388 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h +669 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorFixedSize.h +379 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h +237 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h +191 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h +488 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h +302 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorGlobalFunctions.h +33 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorGpuHipCudaDefines.h +99 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorGpuHipCudaUndefines.h +44 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h +79 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h +603 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h +738 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorInflation.h +247 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorInitializer.h +82 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h +263 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorLayoutSwap.h +216 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorMacros.h +98 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h +327 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h +311 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h +1102 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h +708 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorPatch.h +291 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorRandom.h +322 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h +998 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h +6 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorReductionGpu.h +966 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorReductionSycl.h +582 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorRef.h +454 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h +465 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h +528 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorScanSycl.h +513 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h +471 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorStorage.h +161 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorStriding.h +346 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorTrace.h +303 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h +264 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorUInt128.h +249 -0
- include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorVolumePatch.h +629 -0
- include/eigen3/unsupported/Eigen/CXX11/src/TensorSymmetry/DynamicSymmetry.h +293 -0
- include/eigen3/unsupported/Eigen/CXX11/src/TensorSymmetry/StaticSymmetry.h +236 -0
- include/eigen3/unsupported/Eigen/CXX11/src/TensorSymmetry/Symmetry.h +338 -0
- include/eigen3/unsupported/Eigen/CXX11/src/TensorSymmetry/util/TemplateGroupTheory.h +669 -0
- include/eigen3/unsupported/Eigen/CXX11/src/ThreadPool/Barrier.h +67 -0
- include/eigen3/unsupported/Eigen/CXX11/src/ThreadPool/EventCount.h +249 -0
- include/eigen3/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h +486 -0
- include/eigen3/unsupported/Eigen/CXX11/src/ThreadPool/RunQueue.h +236 -0
- include/eigen3/unsupported/Eigen/CXX11/src/ThreadPool/ThreadCancel.h +23 -0
- include/eigen3/unsupported/Eigen/CXX11/src/ThreadPool/ThreadEnvironment.h +40 -0
- include/eigen3/unsupported/Eigen/CXX11/src/ThreadPool/ThreadLocal.h +301 -0
- include/eigen3/unsupported/Eigen/CXX11/src/ThreadPool/ThreadPoolInterface.h +48 -0
- include/eigen3/unsupported/Eigen/CXX11/src/ThreadPool/ThreadYield.h +20 -0
- include/eigen3/unsupported/Eigen/CXX11/src/util/CXX11Meta.h +537 -0
- include/eigen3/unsupported/Eigen/CXX11/src/util/CXX11Workarounds.h +88 -0
- include/eigen3/unsupported/Eigen/CXX11/src/util/EmulateArray.h +261 -0
- include/eigen3/unsupported/Eigen/CXX11/src/util/MaxSizeVector.h +158 -0
- include/eigen3/unsupported/Eigen/EulerAngles +43 -0
- include/eigen3/unsupported/Eigen/FFT +419 -0
- include/eigen3/unsupported/Eigen/IterativeSolvers +51 -0
- include/eigen3/unsupported/Eigen/KroneckerProduct +36 -0
- include/eigen3/unsupported/Eigen/LevenbergMarquardt +49 -0
- include/eigen3/unsupported/Eigen/MPRealSupport +213 -0
- include/eigen3/unsupported/Eigen/MatrixFunctions +504 -0
- include/eigen3/unsupported/Eigen/MoreVectorization +24 -0
- include/eigen3/unsupported/Eigen/NonLinearOptimization +140 -0
- include/eigen3/unsupported/Eigen/NumericalDiff +56 -0
- include/eigen3/unsupported/Eigen/OpenGLSupport +322 -0
- include/eigen3/unsupported/Eigen/Polynomials +137 -0
- include/eigen3/unsupported/Eigen/Skyline +39 -0
- include/eigen3/unsupported/Eigen/SparseExtra +54 -0
- include/eigen3/unsupported/Eigen/SpecialFunctions +103 -0
- include/eigen3/unsupported/Eigen/Splines +35 -0
- include/eigen3/unsupported/Eigen/src/AutoDiff/AutoDiffJacobian.h +108 -0
- include/eigen3/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h +730 -0
- include/eigen3/unsupported/Eigen/src/AutoDiff/AutoDiffVector.h +220 -0
- include/eigen3/unsupported/Eigen/src/BVH/BVAlgorithms.h +293 -0
- include/eigen3/unsupported/Eigen/src/BVH/KdBVH.h +223 -0
- include/eigen3/unsupported/Eigen/src/Eigenvalues/ArpackSelfAdjointEigenSolver.h +790 -0
- include/eigen3/unsupported/Eigen/src/EulerAngles/EulerAngles.h +355 -0
- include/eigen3/unsupported/Eigen/src/EulerAngles/EulerSystem.h +305 -0
- include/eigen3/unsupported/Eigen/src/FFT/ei_fftw_impl.h +261 -0
- include/eigen3/unsupported/Eigen/src/FFT/ei_kissfft_impl.h +449 -0
- include/eigen3/unsupported/Eigen/src/IterativeSolvers/ConstrainedConjGrad.h +187 -0
- include/eigen3/unsupported/Eigen/src/IterativeSolvers/DGMRES.h +511 -0
- include/eigen3/unsupported/Eigen/src/IterativeSolvers/GMRES.h +335 -0
- include/eigen3/unsupported/Eigen/src/IterativeSolvers/IDRS.h +436 -0
- include/eigen3/unsupported/Eigen/src/IterativeSolvers/IncompleteLU.h +90 -0
- include/eigen3/unsupported/Eigen/src/IterativeSolvers/IterationController.h +154 -0
- include/eigen3/unsupported/Eigen/src/IterativeSolvers/MINRES.h +267 -0
- include/eigen3/unsupported/Eigen/src/IterativeSolvers/Scaling.h +193 -0
- include/eigen3/unsupported/Eigen/src/KroneckerProduct/KroneckerTensorProduct.h +305 -0
- include/eigen3/unsupported/Eigen/src/LevenbergMarquardt/LMcovar.h +84 -0
- include/eigen3/unsupported/Eigen/src/LevenbergMarquardt/LMonestep.h +202 -0
- include/eigen3/unsupported/Eigen/src/LevenbergMarquardt/LMpar.h +160 -0
- include/eigen3/unsupported/Eigen/src/LevenbergMarquardt/LMqrsolv.h +188 -0
- include/eigen3/unsupported/Eigen/src/LevenbergMarquardt/LevenbergMarquardt.h +396 -0
- include/eigen3/unsupported/Eigen/src/MatrixFunctions/MatrixExponential.h +441 -0
- include/eigen3/unsupported/Eigen/src/MatrixFunctions/MatrixFunction.h +569 -0
- include/eigen3/unsupported/Eigen/src/MatrixFunctions/MatrixLogarithm.h +373 -0
- include/eigen3/unsupported/Eigen/src/MatrixFunctions/MatrixPower.h +705 -0
- include/eigen3/unsupported/Eigen/src/MatrixFunctions/MatrixSquareRoot.h +368 -0
- include/eigen3/unsupported/Eigen/src/MatrixFunctions/StemFunction.h +117 -0
- include/eigen3/unsupported/Eigen/src/MoreVectorization/MathFunctions.h +95 -0
- include/eigen3/unsupported/Eigen/src/NonLinearOptimization/HybridNonLinearSolver.h +601 -0
- include/eigen3/unsupported/Eigen/src/NonLinearOptimization/LevenbergMarquardt.h +657 -0
- include/eigen3/unsupported/Eigen/src/NonLinearOptimization/chkder.h +66 -0
- include/eigen3/unsupported/Eigen/src/NonLinearOptimization/covar.h +70 -0
- include/eigen3/unsupported/Eigen/src/NonLinearOptimization/dogleg.h +107 -0
- include/eigen3/unsupported/Eigen/src/NonLinearOptimization/fdjac1.h +79 -0
- include/eigen3/unsupported/Eigen/src/NonLinearOptimization/lmpar.h +298 -0
- include/eigen3/unsupported/Eigen/src/NonLinearOptimization/qrsolv.h +91 -0
- include/eigen3/unsupported/Eigen/src/NonLinearOptimization/r1mpyq.h +30 -0
- include/eigen3/unsupported/Eigen/src/NonLinearOptimization/r1updt.h +99 -0
- include/eigen3/unsupported/Eigen/src/NonLinearOptimization/rwupdt.h +49 -0
- include/eigen3/unsupported/Eigen/src/NumericalDiff/NumericalDiff.h +130 -0
- include/eigen3/unsupported/Eigen/src/Polynomials/Companion.h +280 -0
- include/eigen3/unsupported/Eigen/src/Polynomials/PolynomialSolver.h +428 -0
- include/eigen3/unsupported/Eigen/src/Polynomials/PolynomialUtils.h +143 -0
- include/eigen3/unsupported/Eigen/src/Skyline/SkylineInplaceLU.h +352 -0
- include/eigen3/unsupported/Eigen/src/Skyline/SkylineMatrix.h +862 -0
- include/eigen3/unsupported/Eigen/src/Skyline/SkylineMatrixBase.h +212 -0
- include/eigen3/unsupported/Eigen/src/Skyline/SkylineProduct.h +295 -0
- include/eigen3/unsupported/Eigen/src/Skyline/SkylineStorage.h +259 -0
- include/eigen3/unsupported/Eigen/src/Skyline/SkylineUtil.h +89 -0
- include/eigen3/unsupported/Eigen/src/SparseExtra/BlockOfDynamicSparseMatrix.h +122 -0
- include/eigen3/unsupported/Eigen/src/SparseExtra/BlockSparseMatrix.h +1079 -0
- include/eigen3/unsupported/Eigen/src/SparseExtra/DynamicSparseMatrix.h +404 -0
- include/eigen3/unsupported/Eigen/src/SparseExtra/MarketIO.h +282 -0
- include/eigen3/unsupported/Eigen/src/SparseExtra/MatrixMarketIterator.h +247 -0
- include/eigen3/unsupported/Eigen/src/SparseExtra/RandomSetter.h +349 -0
- include/eigen3/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsArrayAPI.h +286 -0
- include/eigen3/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsBFloat16.h +68 -0
- include/eigen3/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsFunctors.h +357 -0
- include/eigen3/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsHalf.h +66 -0
- include/eigen3/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsImpl.h +1959 -0
- include/eigen3/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsPacketMath.h +118 -0
- include/eigen3/unsupported/Eigen/src/SpecialFunctions/HipVectorCompatibility.h +67 -0
- include/eigen3/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsArrayAPI.h +167 -0
- include/eigen3/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsBFloat16.h +58 -0
- include/eigen3/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsFunctors.h +330 -0
- include/eigen3/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsHalf.h +58 -0
- include/eigen3/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h +2045 -0
- include/eigen3/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsPacketMath.h +79 -0
- include/eigen3/unsupported/Eigen/src/SpecialFunctions/arch/AVX/BesselFunctions.h +46 -0
- include/eigen3/unsupported/Eigen/src/SpecialFunctions/arch/AVX/SpecialFunctions.h +16 -0
- include/eigen3/unsupported/Eigen/src/SpecialFunctions/arch/AVX512/BesselFunctions.h +46 -0
- include/eigen3/unsupported/Eigen/src/SpecialFunctions/arch/AVX512/SpecialFunctions.h +16 -0
- include/eigen3/unsupported/Eigen/src/SpecialFunctions/arch/GPU/SpecialFunctions.h +369 -0
- include/eigen3/unsupported/Eigen/src/SpecialFunctions/arch/NEON/BesselFunctions.h +54 -0
- include/eigen3/unsupported/Eigen/src/SpecialFunctions/arch/NEON/SpecialFunctions.h +34 -0
- include/eigen3/unsupported/Eigen/src/Splines/Spline.h +507 -0
- include/eigen3/unsupported/Eigen/src/Splines/SplineFitting.h +431 -0
- include/eigen3/unsupported/Eigen/src/Splines/SplineFwd.h +93 -0
- pyvale/__init__.py +23 -0
- pyvale/blender/__init__.py +23 -0
- pyvale/blender/blendercalibrationdata.py +17 -0
- pyvale/blender/blenderexceptions.py +8 -0
- pyvale/blender/blenderlightdata.py +26 -0
- pyvale/blender/blendermaterialdata.py +15 -0
- pyvale/blender/blenderrenderdata.py +35 -0
- pyvale/blender/blenderscene.py +493 -0
- pyvale/blender/blendertools.py +449 -0
- pyvale/calib/__init__.py +11 -0
- pyvale/calib/calibcpp.cpython-311-i386-linux-gnu.so +0 -0
- pyvale/calib/calibdotdetect.py +510 -0
- pyvale/calib/calibparams.py +47 -0
- pyvale/calib/calibstereo.py +441 -0
- pyvale/calib/cpp/bindings.cpp +22 -0
- pyvale/calib/cpp/calibdotdetect.cpp +16 -0
- pyvale/calib/cpp/calibdotdetect.hpp +20 -0
- pyvale/calib/cpp/calibopt.cpp +347 -0
- pyvale/calib/cpp/calibopt.hpp +84 -0
- pyvale/calib/cpp/calibstereo.cpp +95 -0
- pyvale/calib/cpp/calibstereo.hpp +27 -0
- pyvale/common_cpp/__init__.py +5 -0
- pyvale/common_cpp/bindings.cpp +33 -0
- pyvale/common_cpp/common_cpp.cpython-311-i386-linux-gnu.so +0 -0
- pyvale/common_cpp/defines.hpp +39 -0
- pyvale/common_cpp/dicsignalhandler.cpp +16 -0
- pyvale/common_cpp/dicsignalhandler.hpp +11 -0
- pyvale/common_cpp/pocketfft_hdronly.h +3744 -0
- pyvale/common_cpp/progressbar.hpp +107 -0
- pyvale/common_cpp/util.cpp +19 -0
- pyvale/common_cpp/util.hpp +72 -0
- pyvale/common_py/util.py +63 -0
- pyvale/data/DIC_Challenge_Star_Noise_Def.tiff +0 -0
- pyvale/data/DIC_Challenge_Star_Noise_Ref.tiff +0 -0
- pyvale/data/__init__.py +5 -0
- pyvale/data/cal_target.tiff +0 -0
- pyvale/data/calib.caldat +26 -0
- pyvale/data/case00_HEX20_out.e +0 -0
- pyvale/data/case00_HEX27_out.e +0 -0
- pyvale/data/case00_HEX8_out.e +0 -0
- pyvale/data/case00_TET10_out.e +0 -0
- pyvale/data/case00_TET14_out.e +0 -0
- pyvale/data/case00_TET4_out.e +0 -0
- pyvale/data/case16_d_out.e +0 -0
- pyvale/data/case16_out.e +0 -0
- pyvale/data/case17_out.e +0 -0
- pyvale/data/case18_d_out.e +0 -0
- pyvale/data/case18_out.e +0 -0
- pyvale/data/case26_out.e +0 -0
- pyvale/data/optspeckle_2464x2056px_spec5px_8bit_gblur1px.tiff +0 -0
- pyvale/data/plate_hole_def0000.tiff +0 -0
- pyvale/data/plate_hole_def0001.tiff +0 -0
- pyvale/data/plate_hole_ref0000.tiff +0 -0
- pyvale/data/plate_rigid_def0000.tiff +0 -0
- pyvale/data/plate_rigid_def0001.tiff +0 -0
- pyvale/data/plate_rigid_def_25px.tiff +0 -0
- pyvale/data/plate_rigid_def_50px.tiff +0 -0
- pyvale/data/plate_rigid_ref0000.tiff +0 -0
- pyvale/dataset/__init__.py +7 -0
- pyvale/dataset/dataset.py +483 -0
- pyvale/dic/__init__.py +15 -0
- pyvale/dic/cpp/bindings.cpp +52 -0
- pyvale/dic/cpp/dicfourier.cpp +705 -0
- pyvale/dic/cpp/dicfourier.hpp +410 -0
- pyvale/dic/cpp/dicinterpolator.cpp +633 -0
- pyvale/dic/cpp/dicinterpolator.hpp +162 -0
- pyvale/dic/cpp/dicmain.cpp +214 -0
- pyvale/dic/cpp/dicmain.hpp +61 -0
- pyvale/dic/cpp/dicoptimizer.cpp +564 -0
- pyvale/dic/cpp/dicoptimizer.hpp +279 -0
- pyvale/dic/cpp/dicresults.cpp +239 -0
- pyvale/dic/cpp/dicresults.hpp +64 -0
- pyvale/dic/cpp/dicrg.cpp +55 -0
- pyvale/dic/cpp/dicrg.hpp +52 -0
- pyvale/dic/cpp/dicscanmethod.cpp +819 -0
- pyvale/dic/cpp/dicscanmethod.hpp +119 -0
- pyvale/dic/cpp/dicshapefunc.cpp +117 -0
- pyvale/dic/cpp/dicshapefunc.hpp +40 -0
- pyvale/dic/cpp/dicsubset.cpp +325 -0
- pyvale/dic/cpp/dicsubset.hpp +122 -0
- pyvale/dic/cpp/dicutil.cpp +108 -0
- pyvale/dic/cpp/dicutil.hpp +96 -0
- pyvale/dic/cuda/malloc.cu +99 -0
- pyvale/dic/cuda/malloc.hpp +17 -0
- pyvale/dic/dic2d.py +190 -0
- pyvale/dic/dic2dconv.py +6 -0
- pyvale/dic/dic2dcpp.cpython-311-i386-linux-gnu.so +0 -0
- pyvale/dic/dicchecks.py +455 -0
- pyvale/dic/dicdataimport.py +402 -0
- pyvale/dic/dicregionofinterest.py +1163 -0
- pyvale/dic/dicresults.py +58 -0
- pyvale/examples/__init__.py +5 -0
- pyvale/examples/basicsensorsim/README.md +2 -0
- pyvale/examples/basicsensorsim/ex0_quickstart.py +139 -0
- pyvale/examples/basicsensorsim/ex1_scalar_sensors.py +240 -0
- pyvale/examples/basicsensorsim/ex2_vector_tensor_sensors.py +280 -0
- pyvale/examples/basicsensorsim/ex3_experiment_simulator.py +397 -0
- pyvale/examples/blenderimagedef/README.md +2 -0
- pyvale/examples/blenderimagedef/ex1_blender_scene2d.py +176 -0
- pyvale/examples/blenderimagedef/ex2_blender_imagedef2d.py +177 -0
- pyvale/examples/blenderimagedef/ex3_blender_scenestereo.py +205 -0
- pyvale/examples/blenderimagedef/ex4_blender_imagedefstereo.py +213 -0
- pyvale/examples/blenderimagedef/ex5_blender_calibstereo.py +190 -0
- pyvale/examples/dic/README.md +2 -0
- pyvale/examples/dic/ex1_region_of_interest.py +101 -0
- pyvale/examples/dic/ex2_plate_with_hole.py +155 -0
- pyvale/examples/dic/ex3_plate_with_hole_strain.py +99 -0
- pyvale/examples/dic/ex4_dic_blender.py +97 -0
- pyvale/examples/dic/ex5_dic_challenge.py +107 -0
- pyvale/examples/extsensorsim/README.md +2 -0
- pyvale/examples/extsensorsim/ex1_byosimdata.py +211 -0
- pyvale/examples/extsensorsim/ex2_meshfreesensors.py +174 -0
- pyvale/examples/extsensorsim/ex3a_scal2d.py +151 -0
- pyvale/examples/extsensorsim/ex3b_scal3d.py +150 -0
- pyvale/examples/extsensorsim/ex3c_vec2d.py +163 -0
- pyvale/examples/extsensorsim/ex3d_vec3d.py +169 -0
- pyvale/examples/extsensorsim/ex3e_tens2d.py +170 -0
- pyvale/examples/extsensorsim/ex3f_tens3d.py +198 -0
- pyvale/examples/extsensorsim/ex4a_basicerrs_scal2d.py +201 -0
- pyvale/examples/extsensorsim/ex4b_fielderrs_scal3d.py +197 -0
- pyvale/examples/extsensorsim/ex4c_angleerrs_vec2d.py +215 -0
- pyvale/examples/extsensorsim/ex4d_fieldlockerrs_vec3d.py +184 -0
- pyvale/examples/extsensorsim/ex4e_chainfielderrs_vec2d.py +233 -0
- pyvale/examples/extsensorsim/ex4f_caliberrs_scal2d.py +167 -0
- pyvale/examples/extsensorsim/ex4g_spatavgerrs_scal2d.py +146 -0
- pyvale/examples/extsensorsim/ex5a_expsim_thermmech2d.py +350 -0
- pyvale/examples/extsensorsim/ex5b_expsim_thermmech3d.py +358 -0
- pyvale/examples/genanalyticdata/ex1_1_scalarvisualisation.py +41 -0
- pyvale/examples/genanalyticdata/ex1_2_scalarcasebuild.py +43 -0
- pyvale/examples/genanalyticdata/ex2_1_analyticsensors.py +86 -0
- pyvale/examples/genanalyticdata/ex2_2_analyticsensors_nomesh.py +89 -0
- pyvale/examples/imagedef2d/ex_imagedef2d_todisk.py +84 -0
- pyvale/examples/mooseherder/README.md +2 -0
- pyvale/examples/mooseherder/ex0_create_moose_config.py +65 -0
- pyvale/examples/mooseherder/ex1a_modify_moose_input.py +71 -0
- pyvale/examples/mooseherder/ex1b_modify_gmsh_input.py +69 -0
- pyvale/examples/mooseherder/ex2a_run_moose_once.py +80 -0
- pyvale/examples/mooseherder/ex2b_run_gmsh_once.py +64 -0
- pyvale/examples/mooseherder/ex2c_run_both_once.py +114 -0
- pyvale/examples/mooseherder/ex3_run_moose_seq_para.py +157 -0
- pyvale/examples/mooseherder/ex4_run_gmsh-moose_seq_para.py +176 -0
- pyvale/examples/mooseherder/ex5_run_moose_paramulti.py +136 -0
- pyvale/examples/mooseherder/ex6_read_moose_exodus.py +163 -0
- pyvale/examples/mooseherder/ex7a_read_moose_herd_results.py +153 -0
- pyvale/examples/mooseherder/ex7b_read_multi_herd_results.py +116 -0
- pyvale/examples/mooseherder/ex7c_read_multi_gmshmoose_results.py +127 -0
- pyvale/examples/mooseherder/ex7d_readconfig_multi_gmshmoose_results.py +143 -0
- pyvale/examples/mooseherder/ex8_read_existing_sweep_output.py +72 -0
- pyvale/examples/rasterimagedef/ex_rastenp.py +194 -0
- pyvale/examples/rasterimagedef/ex_rastercyth_oneframe.py +206 -0
- pyvale/examples/rasterimagedef/ex_rastercyth_static_cypara.py +189 -0
- pyvale/examples/rasterimagedef/ex_rastercyth_static_pypara.py +219 -0
- pyvale/examples/visualisation/ex1_visualisation_options.py +111 -0
- pyvale/mooseherder/__init__.py +55 -0
- pyvale/mooseherder/directorymanager.py +408 -0
- pyvale/mooseherder/exceptions.py +10 -0
- pyvale/mooseherder/exodusloader.py +762 -0
- pyvale/mooseherder/gmshrunner.py +158 -0
- pyvale/mooseherder/inputmodifier.py +240 -0
- pyvale/mooseherder/mooseconfig.py +212 -0
- pyvale/mooseherder/mooseherd.py +539 -0
- pyvale/mooseherder/mooserunner.py +307 -0
- pyvale/mooseherder/outputloader.py +17 -0
- pyvale/mooseherder/simdata.py +93 -0
- pyvale/mooseherder/simloaderbyfield.py +211 -0
- pyvale/mooseherder/simloaderbytime.py +193 -0
- pyvale/mooseherder/simloadopts.py +55 -0
- pyvale/mooseherder/simloadtools.py +465 -0
- pyvale/mooseherder/simrunner.py +31 -0
- pyvale/mooseherder/simsaver.py +401 -0
- pyvale/mooseherder/sweeploader.py +358 -0
- pyvale/mooseherder/sweeptools.py +76 -0
- pyvale/sensorsim/__init__.py +86 -0
- pyvale/sensorsim/camera.py +147 -0
- pyvale/sensorsim/cameradata.py +72 -0
- pyvale/sensorsim/cameradata2d.py +84 -0
- pyvale/sensorsim/camerasensor.py +147 -0
- pyvale/sensorsim/camerastereo.py +217 -0
- pyvale/sensorsim/cameratools.py +484 -0
- pyvale/sensorsim/cython/rastercyth.c +32404 -0
- pyvale/sensorsim/cython/rastercyth.html +3392 -0
- pyvale/sensorsim/cython/rastercyth.py +684 -0
- pyvale/sensorsim/enums.py +16 -0
- pyvale/sensorsim/errordriftcalc.py +104 -0
- pyvale/sensorsim/errorintegrator.py +359 -0
- pyvale/sensorsim/errorrand.py +105 -0
- pyvale/sensorsim/errorsimulator.py +137 -0
- pyvale/sensorsim/errorsyscalib.py +93 -0
- pyvale/sensorsim/errorsysdep.py +197 -0
- pyvale/sensorsim/errorsysfield.py +383 -0
- pyvale/sensorsim/errorsysindep.py +209 -0
- pyvale/sensorsim/exceptions.py +14 -0
- pyvale/sensorsim/experimentsimio.py +94 -0
- pyvale/sensorsim/experimentsimulator.py +615 -0
- pyvale/sensorsim/experimentstats.py +115 -0
- pyvale/sensorsim/field.py +127 -0
- pyvale/sensorsim/fieldconverter.py +378 -0
- pyvale/sensorsim/fieldinterp.py +89 -0
- pyvale/sensorsim/fieldinterpmesh.py +119 -0
- pyvale/sensorsim/fieldinterppoints.py +93 -0
- pyvale/sensorsim/fieldsampler.py +110 -0
- pyvale/sensorsim/fieldscalar.py +94 -0
- pyvale/sensorsim/fieldtensor.py +150 -0
- pyvale/sensorsim/fieldtransform.py +388 -0
- pyvale/sensorsim/fieldvector.py +136 -0
- pyvale/sensorsim/generatorsrandom.py +420 -0
- pyvale/sensorsim/imagedef2d.py +577 -0
- pyvale/sensorsim/imagetools.py +137 -0
- pyvale/sensorsim/integratorfactory.py +240 -0
- pyvale/sensorsim/integratorquadrature.py +217 -0
- pyvale/sensorsim/integratorrectangle.py +165 -0
- pyvale/sensorsim/integratorspatial.py +89 -0
- pyvale/sensorsim/integratortype.py +43 -0
- pyvale/sensorsim/logger.py +23 -0
- pyvale/sensorsim/plotting_logs.py +22 -0
- pyvale/sensorsim/raster.py +31 -0
- pyvale/sensorsim/rastercy.py +107 -0
- pyvale/sensorsim/rasternp.py +627 -0
- pyvale/sensorsim/rasteropts.py +58 -0
- pyvale/sensorsim/renderer.py +47 -0
- pyvale/sensorsim/rendermesh.py +137 -0
- pyvale/sensorsim/renderscene.py +51 -0
- pyvale/sensorsim/sensorarray.py +178 -0
- pyvale/sensorsim/sensordata.py +74 -0
- pyvale/sensorsim/sensordescriptor.py +275 -0
- pyvale/sensorsim/sensorfactory.py +179 -0
- pyvale/sensorsim/sensorspoint.py +308 -0
- pyvale/sensorsim/sensortools.py +113 -0
- pyvale/sensorsim/simtools.py +300 -0
- pyvale/sensorsim/visualexpplotter.py +201 -0
- pyvale/sensorsim/visualimagedef.py +74 -0
- pyvale/sensorsim/visualimages.py +76 -0
- pyvale/sensorsim/visualopts.py +507 -0
- pyvale/sensorsim/visualsimanimator.py +111 -0
- pyvale/sensorsim/visualsimplotter.py +180 -0
- pyvale/sensorsim/visualsimsensors.py +343 -0
- pyvale/sensorsim/visualtools.py +136 -0
- pyvale/sensorsim/visualtraceanimator.py +77 -0
- pyvale/sensorsim/visualtraceplotter.py +296 -0
- pyvale/simcases/case00_HEX20.i +242 -0
- pyvale/simcases/case00_HEX27.i +242 -0
- pyvale/simcases/case00_HEX8.i +242 -0
- pyvale/simcases/case00_TET10.i +242 -0
- pyvale/simcases/case00_TET14.i +242 -0
- pyvale/simcases/case00_TET4.i +242 -0
- pyvale/simcases/case01.i +101 -0
- pyvale/simcases/case02.i +156 -0
- pyvale/simcases/case03.i +136 -0
- pyvale/simcases/case04.i +181 -0
- pyvale/simcases/case05.i +234 -0
- pyvale/simcases/case06.i +305 -0
- pyvale/simcases/case07.geo +135 -0
- pyvale/simcases/case07.i +87 -0
- pyvale/simcases/case08.geo +144 -0
- pyvale/simcases/case08.i +153 -0
- pyvale/simcases/case09.geo +204 -0
- pyvale/simcases/case09.i +87 -0
- pyvale/simcases/case10.geo +204 -0
- pyvale/simcases/case10.i +257 -0
- pyvale/simcases/case11.geo +337 -0
- pyvale/simcases/case11.i +147 -0
- pyvale/simcases/case12.geo +388 -0
- pyvale/simcases/case12.i +329 -0
- pyvale/simcases/case13.i +140 -0
- pyvale/simcases/case14.i +159 -0
- pyvale/simcases/case15.geo +337 -0
- pyvale/simcases/case15.i +150 -0
- pyvale/simcases/case16.geo +391 -0
- pyvale/simcases/case16.i +357 -0
- pyvale/simcases/case16_d.i +360 -0
- pyvale/simcases/case16_u.i +360 -0
- pyvale/simcases/case17.geo +138 -0
- pyvale/simcases/case17.i +144 -0
- pyvale/simcases/case18.i +271 -0
- pyvale/simcases/case18_d.i +271 -0
- pyvale/simcases/case18_u.i +271 -0
- pyvale/simcases/case19.geo +252 -0
- pyvale/simcases/case19.i +99 -0
- pyvale/simcases/case20.geo +252 -0
- pyvale/simcases/case20.i +250 -0
- pyvale/simcases/case21.geo +74 -0
- pyvale/simcases/case21.i +155 -0
- pyvale/simcases/case22.geo +82 -0
- pyvale/simcases/case22.i +140 -0
- pyvale/simcases/case23.geo +164 -0
- pyvale/simcases/case23.i +140 -0
- pyvale/simcases/case24.geo +79 -0
- pyvale/simcases/case24.i +123 -0
- pyvale/simcases/case25.geo +82 -0
- pyvale/simcases/case25.i +140 -0
- pyvale/simcases/case26.geo +166 -0
- pyvale/simcases/case26.i +140 -0
- pyvale/simcases/cases_dictionary.yaml +336 -0
- pyvale/simcases/run_1case.py +60 -0
- pyvale/simcases/run_all_cases.py +69 -0
- pyvale/simcases/run_build_case.py +64 -0
- pyvale/simcases/run_example_cases.py +69 -0
- pyvale/strain/__init__.py +13 -0
- pyvale/strain/cpp/bindings.cpp +25 -0
- pyvale/strain/cpp/smooth.cpp +140 -0
- pyvale/strain/cpp/smooth.hpp +53 -0
- pyvale/strain/cpp/strain.cpp +390 -0
- pyvale/strain/cpp/strain.hpp +177 -0
- pyvale/strain/strain.py +117 -0
- pyvale/strain/strain_cpp.cpython-311-i386-linux-gnu.so +0 -0
- pyvale/strain/strainchecks.py +47 -0
- pyvale/strain/strainimport.py +303 -0
- pyvale/strain/strainresults.py +55 -0
- pyvale/verif/__init__.py +15 -0
- pyvale/verif/analyticmeshgen.py +102 -0
- pyvale/verif/analyticsimdatafactory.py +125 -0
- pyvale/verif/analyticsimdatagenerator.py +368 -0
- pyvale/verif/matchsimdata.py +113 -0
- pyvale/verif/pointsens.py +120 -0
- pyvale/verif/pointsensconst.py +19 -0
- pyvale/verif/pointsensmech.py +270 -0
- pyvale/verif/pointsensmultiphys.py +184 -0
- pyvale/verif/pointsensscalar.py +383 -0
- pyvale/verif/pointsenstensor.py +159 -0
- pyvale/verif/pointsensvector.py +157 -0
- pyvale-2026.1.1.dist-info/METADATA +98 -0
- pyvale-2026.1.1.dist-info/RECORD +860 -0
- pyvale-2026.1.1.dist-info/WHEEL +6 -0
- pyvale-2026.1.1.dist-info/licenses/LICENSE +21 -0
- pyvale.libs/libgomp-65f46eca.so.1.0.0 +0 -0
- share/eigen3/cmake/Eigen3Config.cmake +37 -0
- share/eigen3/cmake/Eigen3ConfigVersion.cmake +65 -0
- share/eigen3/cmake/Eigen3Targets.cmake +106 -0
- share/eigen3/cmake/UseEigen3.cmake +6 -0
- share/pkgconfig/eigen3.pc +9 -0
|
@@ -0,0 +1,544 @@
|
|
|
1
|
+
// This file is part of Eigen, a lightweight C++ template library
|
|
2
|
+
// for linear algebra.
|
|
3
|
+
//
|
|
4
|
+
// Mehdi Goli Codeplay Software Ltd.
|
|
5
|
+
// Ralph Potter Codeplay Software Ltd.
|
|
6
|
+
// Luke Iwanski Codeplay Software Ltd.
|
|
7
|
+
// Contact: <eigen@codeplay.com>
|
|
8
|
+
// Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com>
|
|
9
|
+
|
|
10
|
+
//
|
|
11
|
+
// This Source Code Form is subject to the terms of the Mozilla
|
|
12
|
+
// Public License v. 2.0. If a copy of the MPL was not distributed
|
|
13
|
+
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
|
14
|
+
|
|
15
|
+
#ifndef EIGEN_CXX11_TENSOR_TENSOR_CONVOLUTION_SYCL_H
|
|
16
|
+
#define EIGEN_CXX11_TENSOR_TENSOR_CONVOLUTION_SYCL_H
|
|
17
|
+
|
|
18
|
+
namespace Eigen {
|
|
19
|
+
|
|
20
|
+
/** \class TensorConvolution
|
|
21
|
+
* \ingroup CXX11_Tensor_Module
|
|
22
|
+
*
|
|
23
|
+
* \brief Tensor convolution class.
|
|
24
|
+
*
|
|
25
|
+
*
|
|
26
|
+
*/
|
|
27
|
+
|
|
28
|
+
enum class convolution_type { CONV1D, CONV2D, CONV3D };
|
|
29
|
+
template <typename Evaluator, typename CoeffReturnType, typename KernelType, typename Index, typename InputDims,
|
|
30
|
+
typename Kernel_accessor, typename Buffer_accessor, convolution_type Conv_Dim>
|
|
31
|
+
struct EigenConvolutionKernel;
|
|
32
|
+
template <typename Evaluator, typename CoeffReturnType, typename KernelType, typename Index, typename InputDims,
|
|
33
|
+
typename Kernel_accessor, typename Buffer_accessor>
|
|
34
|
+
struct EigenConvolutionKernel<Evaluator, CoeffReturnType, KernelType, Index, InputDims, Kernel_accessor,
|
|
35
|
+
Buffer_accessor, convolution_type::CONV1D> {
|
|
36
|
+
typedef cl::sycl::accessor<CoeffReturnType, 1, cl::sycl::access::mode::read_write, cl::sycl::access::target::local>
|
|
37
|
+
Local_accessor;
|
|
38
|
+
Local_accessor local_acc;
|
|
39
|
+
Evaluator device_evaluator;
|
|
40
|
+
Kernel_accessor kernel_filter;
|
|
41
|
+
Buffer_accessor buffer_acc;
|
|
42
|
+
internal::IndexMapper<Index, InputDims, 1, Evaluator::Layout> indexMapper;
|
|
43
|
+
const size_t kernelSize;
|
|
44
|
+
const cl::sycl::range<2> input_range;
|
|
45
|
+
EigenConvolutionKernel(Local_accessor local_acc_, Evaluator device_evaluator_, Kernel_accessor kernel_filter_,
|
|
46
|
+
Buffer_accessor buffer_acc_,
|
|
47
|
+
internal::IndexMapper<Index, InputDims, 1, Evaluator::Layout> indexMapper_,
|
|
48
|
+
const size_t kernelSize_, const cl::sycl::range<2> input_range_)
|
|
49
|
+
: local_acc(local_acc_),
|
|
50
|
+
device_evaluator(device_evaluator_),
|
|
51
|
+
kernel_filter(kernel_filter_),
|
|
52
|
+
buffer_acc(buffer_acc_),
|
|
53
|
+
indexMapper(indexMapper_),
|
|
54
|
+
kernelSize(kernelSize_),
|
|
55
|
+
input_range(input_range_) {}
|
|
56
|
+
|
|
57
|
+
template <typename BooleanDim2>
|
|
58
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool boundary_check(const BooleanDim2 boolean_check) {
|
|
59
|
+
return (boolean_check[0] && boolean_check[1]);
|
|
60
|
+
}
|
|
61
|
+
void operator()(cl::sycl::nd_item<2> itemID) {
|
|
62
|
+
auto buffer_ptr = buffer_acc.get_pointer();
|
|
63
|
+
auto kernel_ptr = kernel_filter.get_pointer();
|
|
64
|
+
// the required row to be calculated for the for each plane in shered memory
|
|
65
|
+
const size_t num_input = (itemID.get_local_range()[0] + kernelSize - 1);
|
|
66
|
+
const size_t plane_kernel_offset = itemID.get_local_id(1) * num_input;
|
|
67
|
+
const size_t input_offset = itemID.get_group(0) * itemID.get_local_range()[0];
|
|
68
|
+
const size_t plane_tensor_offset = indexMapper.mapGpuInputPlaneToTensorInputOffset(itemID.get_global_id(1));
|
|
69
|
+
/// fill the shared memory
|
|
70
|
+
for (size_t i = itemID.get_local_id(0); i < num_input; i += itemID.get_local_range()[0]) {
|
|
71
|
+
const size_t local_index = i + plane_kernel_offset;
|
|
72
|
+
const size_t tensor_index =
|
|
73
|
+
plane_tensor_offset + indexMapper.mapGpuInputKernelToTensorInputOffset(i + input_offset);
|
|
74
|
+
|
|
75
|
+
local_acc[local_index] =
|
|
76
|
+
(((i + input_offset) < (input_range[0] + kernelSize - 1)) && itemID.get_global_id(1) < input_range[1])
|
|
77
|
+
? device_evaluator.coeff(tensor_index)
|
|
78
|
+
: CoeffReturnType(0);
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
itemID.barrier(cl::sycl::access::fence_space::local_space);
|
|
82
|
+
|
|
83
|
+
// calculate the convolution // output start x
|
|
84
|
+
const size_t first_output_start = itemID.get_group(0) * (itemID.get_local_range()[0]);
|
|
85
|
+
if (boundary_check(itemID.get_global_id() < input_range)) {
|
|
86
|
+
CoeffReturnType result = static_cast<CoeffReturnType>(0);
|
|
87
|
+
const size_t index = plane_kernel_offset + itemID.get_local_id(0);
|
|
88
|
+
for (size_t k = 0; k < kernelSize; ++k) {
|
|
89
|
+
result += (local_acc[k + index] * kernel_ptr[k]);
|
|
90
|
+
}
|
|
91
|
+
const size_t tensor_index =
|
|
92
|
+
indexMapper.mapGpuOutputPlaneToTensorOutputOffset(itemID.get_global_id(1)) +
|
|
93
|
+
indexMapper.mapGpuOutputKernelToTensorOutputOffset(itemID.get_local_id(0) + first_output_start);
|
|
94
|
+
buffer_ptr[tensor_index] = result;
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
};
|
|
98
|
+
|
|
99
|
+
template <typename Evaluator, typename CoeffReturnType, typename KernelType, typename Index, typename InputDims,
|
|
100
|
+
typename Kernel_accessor, typename Buffer_accessor>
|
|
101
|
+
struct EigenConvolutionKernel<Evaluator, CoeffReturnType, KernelType, Index, InputDims, Kernel_accessor,
|
|
102
|
+
Buffer_accessor, convolution_type::CONV2D> {
|
|
103
|
+
typedef cl::sycl::accessor<CoeffReturnType, 1, cl::sycl::access::mode::read_write, cl::sycl::access::target::local>
|
|
104
|
+
Local_accessor;
|
|
105
|
+
Local_accessor local_acc;
|
|
106
|
+
Evaluator device_evaluator;
|
|
107
|
+
Kernel_accessor kernel_filter;
|
|
108
|
+
Buffer_accessor buffer_acc;
|
|
109
|
+
internal::IndexMapper<Index, InputDims, 2, Evaluator::Layout> indexMapper;
|
|
110
|
+
const cl::sycl::range<2> kernel_size;
|
|
111
|
+
const cl::sycl::range<3> input_range;
|
|
112
|
+
EigenConvolutionKernel(Local_accessor local_acc_, Evaluator device_evaluator_, Kernel_accessor kernel_filter_,
|
|
113
|
+
Buffer_accessor buffer_acc_,
|
|
114
|
+
internal::IndexMapper<Index, InputDims, 2, Evaluator::Layout> indexMapper_,
|
|
115
|
+
const cl::sycl::range<2> kernel_size_, const cl::sycl::range<3> input_range_)
|
|
116
|
+
: local_acc(local_acc_),
|
|
117
|
+
device_evaluator(device_evaluator_),
|
|
118
|
+
kernel_filter(kernel_filter_),
|
|
119
|
+
buffer_acc(buffer_acc_),
|
|
120
|
+
indexMapper(indexMapper_),
|
|
121
|
+
kernel_size(kernel_size_),
|
|
122
|
+
input_range(input_range_) {}
|
|
123
|
+
template <typename BooleanDim3>
|
|
124
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool boundary_check(const BooleanDim3 boolean_check) {
|
|
125
|
+
return (boolean_check[0] && boolean_check[1] && boolean_check[2]);
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
void operator()(cl::sycl::nd_item<3> itemID) {
|
|
129
|
+
auto buffer_ptr = buffer_acc.get_pointer();
|
|
130
|
+
auto kernel_ptr = kernel_filter.get_pointer();
|
|
131
|
+
// the required row to be calculated for the for each plane in shered memory
|
|
132
|
+
const auto num_input = cl::sycl::range<2>{
|
|
133
|
+
(cl::sycl::range<2>(itemID.get_local_range()[0], itemID.get_local_range()[1]) + kernel_size - 1)};
|
|
134
|
+
|
|
135
|
+
const size_t plane_input_offset = indexMapper.mapGpuInputPlaneToTensorInputOffset(itemID.get_global_id(2));
|
|
136
|
+
const size_t plane_kernel_offset = itemID.get_local_id(2) * num_input[1];
|
|
137
|
+
|
|
138
|
+
const auto input_offset = cl::sycl::range<2>{itemID.get_group(0) * itemID.get_local_range()[0],
|
|
139
|
+
itemID.get_group(1) * itemID.get_local_range()[1]};
|
|
140
|
+
|
|
141
|
+
// fill the local memory
|
|
142
|
+
bool in_range_dim2 = itemID.get_global_id(2) < input_range[2];
|
|
143
|
+
for (size_t j = itemID.get_local_id(1); j < num_input[1]; j += itemID.get_local_range()[1]) {
|
|
144
|
+
const size_t local_input_offset = num_input[0] * (j + plane_kernel_offset);
|
|
145
|
+
bool in_range_dim1 = ((j + input_offset[1]) < (input_range[1] + kernel_size[1] - 1));
|
|
146
|
+
for (size_t i = itemID.get_local_id(0); i < num_input[0]; i += itemID.get_local_range()[0]) {
|
|
147
|
+
const size_t local_index = i + local_input_offset;
|
|
148
|
+
const size_t tensor_index = plane_input_offset + indexMapper.mapGpuInputKernelToTensorInputOffset(
|
|
149
|
+
i + input_offset[0], j + input_offset[1]);
|
|
150
|
+
local_acc[local_index] = (((i + input_offset[0]) < (input_range[0] + kernel_size[0] - 1)) &&
|
|
151
|
+
in_range_dim1 && in_range_dim2)
|
|
152
|
+
? device_evaluator.coeff(tensor_index)
|
|
153
|
+
: CoeffReturnType(0);
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
itemID.barrier(cl::sycl::access::fence_space::local_space);
|
|
158
|
+
|
|
159
|
+
// output offset start for each thread
|
|
160
|
+
const auto output_offset = cl::sycl::range<2>{itemID.get_group(0) * itemID.get_local_range()[0],
|
|
161
|
+
itemID.get_group(1) * itemID.get_local_range()[1]};
|
|
162
|
+
|
|
163
|
+
if (boundary_check(itemID.get_global_id() < input_range)) {
|
|
164
|
+
CoeffReturnType result = static_cast<CoeffReturnType>(0);
|
|
165
|
+
|
|
166
|
+
for (size_t j = 0; j < kernel_size[1]; j++) {
|
|
167
|
+
size_t kernel_offset = kernel_size[0] * j;
|
|
168
|
+
const size_t index =
|
|
169
|
+
(num_input[0] * (plane_kernel_offset + j + itemID.get_local_id(1))) + itemID.get_local_id(0);
|
|
170
|
+
for (size_t i = 0; i < kernel_size[0]; i++) {
|
|
171
|
+
result += (local_acc[i + index] * kernel_ptr[i + kernel_offset]);
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
const size_t tensor_index =
|
|
175
|
+
indexMapper.mapGpuOutputPlaneToTensorOutputOffset(itemID.get_global_id(2)) +
|
|
176
|
+
indexMapper.mapGpuOutputKernelToTensorOutputOffset(itemID.get_local_id(0) + output_offset[0],
|
|
177
|
+
itemID.get_local_id(1) + output_offset[1]);
|
|
178
|
+
|
|
179
|
+
buffer_ptr[tensor_index] = result;
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
};
|
|
183
|
+
|
|
184
|
+
template <typename Evaluator, typename CoeffReturnType, typename KernelType, typename Index, typename InputDims,
|
|
185
|
+
typename Kernel_accessor, typename Buffer_accessor>
|
|
186
|
+
struct EigenConvolutionKernel<Evaluator, CoeffReturnType, KernelType, Index, InputDims, Kernel_accessor,
|
|
187
|
+
Buffer_accessor, convolution_type::CONV3D> {
|
|
188
|
+
typedef cl::sycl::accessor<CoeffReturnType, 1, cl::sycl::access::mode::read_write, cl::sycl::access::target::local>
|
|
189
|
+
Local_accessor;
|
|
190
|
+
Local_accessor local_acc;
|
|
191
|
+
Evaluator device_evaluator;
|
|
192
|
+
Kernel_accessor kernel_filter;
|
|
193
|
+
Buffer_accessor buffer_acc;
|
|
194
|
+
internal::IndexMapper<Index, InputDims, 3, Evaluator::Layout> indexMapper;
|
|
195
|
+
const cl::sycl::range<3> kernel_size;
|
|
196
|
+
const cl::sycl::range<3> input_range;
|
|
197
|
+
const size_t numP;
|
|
198
|
+
|
|
199
|
+
EigenConvolutionKernel(Local_accessor local_acc_, Evaluator device_evaluator_, Kernel_accessor kernel_filter_,
|
|
200
|
+
Buffer_accessor buffer_acc_,
|
|
201
|
+
internal::IndexMapper<Index, InputDims, 3, Evaluator::Layout> indexMapper_,
|
|
202
|
+
const cl::sycl::range<3> kernel_size_, const cl::sycl::range<3> input_range_,
|
|
203
|
+
const size_t numP_)
|
|
204
|
+
: local_acc(local_acc_),
|
|
205
|
+
device_evaluator(device_evaluator_),
|
|
206
|
+
kernel_filter(kernel_filter_),
|
|
207
|
+
buffer_acc(buffer_acc_),
|
|
208
|
+
indexMapper(indexMapper_),
|
|
209
|
+
kernel_size(kernel_size_),
|
|
210
|
+
input_range(input_range_),
|
|
211
|
+
numP(numP_) {}
|
|
212
|
+
template <typename BooleanDim3>
|
|
213
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool boundary_check(const BooleanDim3 boolean_check) {
|
|
214
|
+
return (boolean_check[0] && boolean_check[1] && boolean_check[2]);
|
|
215
|
+
}
|
|
216
|
+
void operator()(cl::sycl::nd_item<3> itemID) {
|
|
217
|
+
auto buffer_ptr = buffer_acc.get_pointer();
|
|
218
|
+
auto kernel_ptr = kernel_filter.get_pointer();
|
|
219
|
+
const auto num_input = cl::sycl::range<3>{itemID.get_local_range() + kernel_size - 1};
|
|
220
|
+
|
|
221
|
+
const auto input_offset = cl::sycl::range<3>{itemID.get_group().get_id() * itemID.get_local_range()};
|
|
222
|
+
|
|
223
|
+
const auto output_offset =
|
|
224
|
+
cl::sycl::range<3>{itemID.get_group().get_id() * itemID.get_local_range() + itemID.get_local_id()};
|
|
225
|
+
|
|
226
|
+
for (size_t p = 0; p < numP; p++) {
|
|
227
|
+
/// fill the shared memory
|
|
228
|
+
const size_t plane_input_offset = indexMapper.mapGpuInputPlaneToTensorInputOffset(p);
|
|
229
|
+
for (size_t k = itemID.get_local_id(2); k < num_input[2]; k += itemID.get_local_range()[2]) {
|
|
230
|
+
size_t local_index_dim2 = num_input[0] * num_input[1] * k;
|
|
231
|
+
bool cond_k_dim = (k + input_offset[2] < (input_range[2] + kernel_size[2] - 1));
|
|
232
|
+
for (size_t j = itemID.get_local_id(1); j < num_input[1]; j += itemID.get_local_range()[1]) {
|
|
233
|
+
bool cond_j_dim = cond_k_dim && (j + input_offset[1] < (input_range[1] + kernel_size[1] - 1));
|
|
234
|
+
size_t local_index_dim1 = (num_input[0] * j) + local_index_dim2;
|
|
235
|
+
for (size_t i = itemID.get_local_id(0); i < num_input[0]; i += itemID.get_local_range()[0]) {
|
|
236
|
+
bool conds = cond_j_dim && (i + input_offset[0] < (input_range[0] + kernel_size[0] - 1));
|
|
237
|
+
const size_t local_index = local_index_dim1 + i;
|
|
238
|
+
const size_t tensor_index =
|
|
239
|
+
plane_input_offset + indexMapper.mapGpuInputKernelToTensorInputOffset(
|
|
240
|
+
i + input_offset[0], j + input_offset[1], k + input_offset[2]);
|
|
241
|
+
local_acc[local_index] = conds ? device_evaluator.coeff(tensor_index) : CoeffReturnType(0);
|
|
242
|
+
}
|
|
243
|
+
}
|
|
244
|
+
}
|
|
245
|
+
itemID.barrier(cl::sycl::access::fence_space::local_space);
|
|
246
|
+
|
|
247
|
+
// calculate the convolution
|
|
248
|
+
|
|
249
|
+
if (boundary_check(itemID.get_global_id() < input_range)) {
|
|
250
|
+
CoeffReturnType result = static_cast<CoeffReturnType>(0);
|
|
251
|
+
for (size_t k = 0; k < kernel_size[2]; k++) {
|
|
252
|
+
for (size_t j = 0; j < kernel_size[1]; j++) {
|
|
253
|
+
for (size_t i = 0; i < kernel_size[0]; i++) {
|
|
254
|
+
const size_t kernel_index = i + kernel_size[0] * (j + kernel_size[1] * k);
|
|
255
|
+
const size_t local_index =
|
|
256
|
+
((i + itemID.get_local_id(0)) +
|
|
257
|
+
num_input[0] * ((j + itemID.get_local_id(1)) + num_input[1] * (k + itemID.get_local_id(2))));
|
|
258
|
+
|
|
259
|
+
result += (local_acc[local_index] * kernel_ptr[kernel_index]);
|
|
260
|
+
}
|
|
261
|
+
}
|
|
262
|
+
}
|
|
263
|
+
const size_t tensor_index =
|
|
264
|
+
indexMapper.mapGpuOutputPlaneToTensorOutputOffset(p) +
|
|
265
|
+
indexMapper.mapGpuOutputKernelToTensorOutputOffset(output_offset[0], output_offset[1], output_offset[2]);
|
|
266
|
+
buffer_ptr[tensor_index] = result;
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
itemID.barrier(cl::sycl::access::fence_space::local_space);
|
|
270
|
+
}
|
|
271
|
+
}
|
|
272
|
+
};
|
|
273
|
+
|
|
274
|
+
template <typename Indices, typename InputArgType, typename KernelArgType>
|
|
275
|
+
struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelArgType>, Eigen::SyclDevice> {
|
|
276
|
+
typedef TensorConvolutionOp<Indices, InputArgType, KernelArgType> XprType;
|
|
277
|
+
|
|
278
|
+
static const int NumDims =
|
|
279
|
+
internal::array_size<typename TensorEvaluator<InputArgType, Eigen::SyclDevice>::Dimensions>::value;
|
|
280
|
+
static const int NumKernelDims = internal::array_size<Indices>::value;
|
|
281
|
+
typedef typename XprType::Index Index;
|
|
282
|
+
typedef DSizes<Index, NumDims> Dimensions;
|
|
283
|
+
typedef typename TensorEvaluator<KernelArgType, Eigen::SyclDevice>::Dimensions KernelDimensions;
|
|
284
|
+
typedef const Eigen::SyclDevice Device;
|
|
285
|
+
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
|
286
|
+
typedef typename PacketType<CoeffReturnType, Eigen::SyclDevice>::type PacketReturnType;
|
|
287
|
+
typedef typename InputArgType::Scalar Scalar;
|
|
288
|
+
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
|
289
|
+
typedef StorageMemory<CoeffReturnType, Eigen::SyclDevice> Storage;
|
|
290
|
+
typedef typename Storage::Type EvaluatorPointerType;
|
|
291
|
+
typedef StorageMemory<const CoeffReturnType, Eigen::SyclDevice> KernelStorage;
|
|
292
|
+
|
|
293
|
+
enum {
|
|
294
|
+
IsAligned = TensorEvaluator<InputArgType, Eigen::SyclDevice>::IsAligned &
|
|
295
|
+
TensorEvaluator<KernelArgType, Eigen::SyclDevice>::IsAligned,
|
|
296
|
+
PacketAccess = false,
|
|
297
|
+
BlockAccess = false,
|
|
298
|
+
PreferBlockAccess = false,
|
|
299
|
+
Layout = TensorEvaluator<InputArgType, Eigen::SyclDevice>::Layout,
|
|
300
|
+
CoordAccess = false, // to be implemented
|
|
301
|
+
RawAccess = false
|
|
302
|
+
};
|
|
303
|
+
|
|
304
|
+
//===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
|
|
305
|
+
typedef internal::TensorBlockNotImplemented TensorBlock;
|
|
306
|
+
//===--------------------------------------------------------------------===//
|
|
307
|
+
|
|
308
|
+
TensorEvaluator(const XprType &op, const Eigen::SyclDevice &device)
|
|
309
|
+
: m_inputImpl(op.inputExpression(), device),
|
|
310
|
+
m_kernelArg(op.kernelExpression()),
|
|
311
|
+
m_kernelImpl(op.kernelExpression(), device),
|
|
312
|
+
m_indices(op.indices()),
|
|
313
|
+
m_buf(NULL),
|
|
314
|
+
m_kernel(NULL),
|
|
315
|
+
m_local_kernel(false),
|
|
316
|
+
m_device(device) {
|
|
317
|
+
EIGEN_STATIC_ASSERT((static_cast<int>(TensorEvaluator<InputArgType, Eigen::SyclDevice>::Layout) ==
|
|
318
|
+
static_cast<int>(TensorEvaluator<KernelArgType, Eigen::SyclDevice>::Layout)),
|
|
319
|
+
YOU_MADE_A_PROGRAMMING_MISTAKE);
|
|
320
|
+
|
|
321
|
+
const typename TensorEvaluator<InputArgType, Eigen::SyclDevice>::Dimensions &input_dims = m_inputImpl.dimensions();
|
|
322
|
+
const typename TensorEvaluator<KernelArgType, Eigen::SyclDevice>::Dimensions &kernel_dims =
|
|
323
|
+
m_kernelImpl.dimensions();
|
|
324
|
+
|
|
325
|
+
m_dimensions = m_inputImpl.dimensions();
|
|
326
|
+
for (int i = 0; i < NumKernelDims; ++i) {
|
|
327
|
+
const Index index = op.indices()[i];
|
|
328
|
+
const Index input_dim = input_dims[index];
|
|
329
|
+
const Index kernel_dim = kernel_dims[i];
|
|
330
|
+
const Index result_dim = input_dim - kernel_dim + 1;
|
|
331
|
+
m_dimensions[index] = result_dim;
|
|
332
|
+
}
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
EIGEN_DEVICE_FUNC const Dimensions &dimensions() const { return m_dimensions; }
|
|
336
|
+
|
|
337
|
+
EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType data) {
|
|
338
|
+
preloadKernel();
|
|
339
|
+
m_inputImpl.evalSubExprsIfNeeded(NULL);
|
|
340
|
+
if (data) {
|
|
341
|
+
executeEval(data);
|
|
342
|
+
return false;
|
|
343
|
+
} else {
|
|
344
|
+
m_buf = (EvaluatorPointerType)m_device.get(
|
|
345
|
+
(Scalar *)m_device.allocate_temp(dimensions().TotalSize() * sizeof(Scalar)));
|
|
346
|
+
executeEval(m_buf);
|
|
347
|
+
return true;
|
|
348
|
+
}
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
EIGEN_STRONG_INLINE void cleanup() {
|
|
352
|
+
m_inputImpl.cleanup();
|
|
353
|
+
if (m_buf) {
|
|
354
|
+
m_device.deallocate_temp(m_buf);
|
|
355
|
+
m_buf = NULL;
|
|
356
|
+
}
|
|
357
|
+
if (m_local_kernel) {
|
|
358
|
+
m_device.deallocate_temp(m_kernel);
|
|
359
|
+
m_local_kernel = false;
|
|
360
|
+
}
|
|
361
|
+
m_kernel = NULL;
|
|
362
|
+
}
|
|
363
|
+
/// used by sycl in order to build the sycl buffer
|
|
364
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Device &device() const { return m_device; }
|
|
365
|
+
/// used by sycl in order to build the sycl buffer
|
|
366
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EvaluatorPointerType data() const { return m_buf; }
|
|
367
|
+
|
|
368
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void preloadKernel() {
|
|
369
|
+
// Don't make a local copy of the kernel unless we have to (i.e. it's an
|
|
370
|
+
// expression that needs to be evaluated)
|
|
371
|
+
typename KernelStorage::Type in_place = m_kernelImpl.data();
|
|
372
|
+
if (in_place) {
|
|
373
|
+
m_kernel = in_place;
|
|
374
|
+
m_local_kernel = false;
|
|
375
|
+
} else {
|
|
376
|
+
ptrdiff_t kernel_sz = m_kernelImpl.dimensions().TotalSize() * sizeof(Scalar);
|
|
377
|
+
EvaluatorPointerType local = (EvaluatorPointerType)m_device.get((Scalar *)m_device.allocate_temp(kernel_sz));
|
|
378
|
+
typedef TensorEvalToOp<const KernelArgType> EvalTo;
|
|
379
|
+
EvalTo evalToTmp(m_device.get(local), m_kernelArg);
|
|
380
|
+
const bool PacketAccess = internal::IsVectorizable<Eigen::SyclDevice, KernelArgType>::value;
|
|
381
|
+
internal::TensorExecutor<const EvalTo, Eigen::SyclDevice, PacketAccess>::run(evalToTmp, m_device);
|
|
382
|
+
m_kernel = local;
|
|
383
|
+
m_local_kernel = true;
|
|
384
|
+
}
|
|
385
|
+
}
|
|
386
|
+
|
|
387
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void executeEval(EvaluatorPointerType data) const {
|
|
388
|
+
typedef TensorEvaluator<InputArgType, Eigen::SyclDevice> InputEvaluator;
|
|
389
|
+
typedef typename InputEvaluator::Dimensions InputDims;
|
|
390
|
+
switch (NumKernelDims) {
|
|
391
|
+
case 1: {
|
|
392
|
+
const size_t numX = dimensions()[m_indices[0]];
|
|
393
|
+
const size_t numP = dimensions().TotalSize() / numX;
|
|
394
|
+
const auto input_dim = std::array<size_t, 2>{numX, numP};
|
|
395
|
+
auto global_range = cl::sycl::range<2>{};
|
|
396
|
+
auto local_range = cl::sycl::range<2>{};
|
|
397
|
+
const size_t kernel_size = m_kernelImpl.dimensions().TotalSize();
|
|
398
|
+
|
|
399
|
+
m_device.parallel_for_setup(input_dim, global_range, local_range);
|
|
400
|
+
const size_t local_memory_size = (local_range[0] + kernel_size - 1) * (local_range[1]);
|
|
401
|
+
gpu_assert(static_cast<unsigned long>(local_memory_size) <= m_device.sharedMemPerBlock());
|
|
402
|
+
const array<Index, 1> indices{{m_indices[0]}};
|
|
403
|
+
const array<Index, 1> kernel_dims{{m_kernelImpl.dimensions()[0]}};
|
|
404
|
+
internal::IndexMapper<Index, InputDims, 1, Layout> indexMapper(m_inputImpl.dimensions(), kernel_dims, indices);
|
|
405
|
+
|
|
406
|
+
typedef EigenConvolutionKernel<InputEvaluator, CoeffReturnType, Scalar, Index, InputDims,
|
|
407
|
+
typename KernelStorage::Type, EvaluatorPointerType, convolution_type::CONV1D>
|
|
408
|
+
ConvKernel;
|
|
409
|
+
|
|
410
|
+
m_device.template binary_kernel_launcher<CoeffReturnType, ConvKernel>(
|
|
411
|
+
m_inputImpl, m_kernel, data, cl::sycl::nd_range<2>(global_range, local_range), local_memory_size,
|
|
412
|
+
indexMapper, kernel_size, cl::sycl::range<2>(input_dim[0], input_dim[1]));
|
|
413
|
+
break;
|
|
414
|
+
}
|
|
415
|
+
|
|
416
|
+
case 2: {
|
|
417
|
+
auto kernel_index = std::array<size_t, 2>{static_cast<int>(Layout) == static_cast<int>(ColMajor) ? 0 : 1,
|
|
418
|
+
static_cast<int>(Layout) == static_cast<int>(ColMajor) ? 1 : 0};
|
|
419
|
+
auto kernel_size = cl::sycl::range<2>{(size_t)m_kernelImpl.dimensions()[kernel_index[0]],
|
|
420
|
+
(size_t)m_kernelImpl.dimensions()[kernel_index[1]]};
|
|
421
|
+
const size_t numX = dimensions()[m_indices[kernel_index[0]]];
|
|
422
|
+
const size_t numY = dimensions()[m_indices[kernel_index[1]]];
|
|
423
|
+
const size_t numP = dimensions().TotalSize() / (numX * numY);
|
|
424
|
+
auto input_dim = std::array<size_t, 3>{numX, numY, numP};
|
|
425
|
+
|
|
426
|
+
auto global_range = cl::sycl::range<3>{};
|
|
427
|
+
auto local_range = cl::sycl::range<3>{};
|
|
428
|
+
|
|
429
|
+
m_device.parallel_for_setup(input_dim, global_range, local_range);
|
|
430
|
+
|
|
431
|
+
const size_t local_memory_size =
|
|
432
|
+
(local_range[0] + kernel_size[0] - 1) * (local_range[1] + kernel_size[1] - 1) * local_range[2];
|
|
433
|
+
gpu_assert(static_cast<unsigned long>(local_memory_size) <= m_device.sharedMemPerBlock());
|
|
434
|
+
const array<Index, 2> indices{{m_indices[kernel_index[0]], m_indices[kernel_index[1]]}};
|
|
435
|
+
const array<Index, 2> kernel_dims{
|
|
436
|
+
{m_kernelImpl.dimensions()[kernel_index[0]], m_kernelImpl.dimensions()[kernel_index[1]]}};
|
|
437
|
+
internal::IndexMapper<Index, InputDims, 2, Layout> indexMapper(m_inputImpl.dimensions(), kernel_dims, indices);
|
|
438
|
+
typedef EigenConvolutionKernel<InputEvaluator, CoeffReturnType, Scalar, Index, InputDims,
|
|
439
|
+
typename KernelStorage::Type, EvaluatorPointerType, convolution_type::CONV2D>
|
|
440
|
+
ConvKernel;
|
|
441
|
+
m_device.template binary_kernel_launcher<CoeffReturnType, ConvKernel>(
|
|
442
|
+
m_inputImpl, m_kernel, data, cl::sycl::nd_range<3>(global_range, local_range), local_memory_size,
|
|
443
|
+
indexMapper, kernel_size, cl::sycl::range<3>{input_dim[0], input_dim[1], input_dim[2]});
|
|
444
|
+
break;
|
|
445
|
+
}
|
|
446
|
+
|
|
447
|
+
case 3: {
|
|
448
|
+
auto kernel_index = std::array<size_t, 3>{static_cast<int>(Layout) == static_cast<int>(ColMajor) ? 0 : 2,
|
|
449
|
+
static_cast<int>(Layout) == static_cast<int>(ColMajor) ? 1 : 1,
|
|
450
|
+
static_cast<int>(Layout) == static_cast<int>(ColMajor) ? 2 : 0};
|
|
451
|
+
|
|
452
|
+
auto kernel_size = cl::sycl::range<3>{(size_t)m_kernelImpl.dimensions()[kernel_index[0]],
|
|
453
|
+
(size_t)m_kernelImpl.dimensions()[kernel_index[1]],
|
|
454
|
+
(size_t)m_kernelImpl.dimensions()[kernel_index[2]]};
|
|
455
|
+
|
|
456
|
+
const size_t numX = dimensions()[m_indices[kernel_index[0]]];
|
|
457
|
+
const size_t numY = dimensions()[m_indices[kernel_index[1]]];
|
|
458
|
+
const size_t numZ = dimensions()[m_indices[kernel_index[2]]];
|
|
459
|
+
auto input_dim = std::array<size_t, 3>{numX, numY, numZ};
|
|
460
|
+
const size_t numP = dimensions().TotalSize() / (numX * numY * numZ);
|
|
461
|
+
|
|
462
|
+
const array<Index, 3> indices{
|
|
463
|
+
{m_indices[kernel_index[0]], m_indices[kernel_index[1]], m_indices[kernel_index[2]]}};
|
|
464
|
+
const array<Index, 3> kernel_dims{{m_kernelImpl.dimensions()[kernel_index[0]],
|
|
465
|
+
m_kernelImpl.dimensions()[kernel_index[1]],
|
|
466
|
+
m_kernelImpl.dimensions()[kernel_index[2]]}};
|
|
467
|
+
|
|
468
|
+
internal::IndexMapper<Index, InputDims, 3, Layout> indexMapper(m_inputImpl.dimensions(), kernel_dims, indices);
|
|
469
|
+
|
|
470
|
+
auto global_range = cl::sycl::range<3>{};
|
|
471
|
+
auto local_range = cl::sycl::range<3>{};
|
|
472
|
+
|
|
473
|
+
m_device.parallel_for_setup(input_dim, global_range, local_range);
|
|
474
|
+
auto local_memory_range = (local_range + kernel_size - 1);
|
|
475
|
+
const size_t local_memory_size = local_memory_range[0] * local_memory_range[1] * local_memory_range[2];
|
|
476
|
+
|
|
477
|
+
gpu_assert(static_cast<unsigned long>(local_memory_size) <= m_device.sharedMemPerBlock());
|
|
478
|
+
typedef EigenConvolutionKernel<InputEvaluator, CoeffReturnType, Scalar, Index, InputDims,
|
|
479
|
+
typename KernelStorage::Type, EvaluatorPointerType, convolution_type::CONV3D>
|
|
480
|
+
ConvKernel;
|
|
481
|
+
m_device.template binary_kernel_launcher<CoeffReturnType, ConvKernel>(
|
|
482
|
+
m_inputImpl, m_kernel, data, cl::sycl::nd_range<3>(global_range, local_range), local_memory_size,
|
|
483
|
+
indexMapper, kernel_size, cl::sycl::range<3>(input_dim[0], input_dim[1], input_dim[2]), numP);
|
|
484
|
+
break;
|
|
485
|
+
}
|
|
486
|
+
|
|
487
|
+
default: {
|
|
488
|
+
EIGEN_STATIC_ASSERT((NumKernelDims >= 1 && NumKernelDims <= 3),
|
|
489
|
+
THIS_METHOD_IS_ONLY_FOR_OBJECTS_OF_A_SPECIFIC_SIZE);
|
|
490
|
+
}
|
|
491
|
+
}
|
|
492
|
+
}
|
|
493
|
+
|
|
494
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const {
|
|
495
|
+
eigen_assert(m_buf != NULL);
|
|
496
|
+
eigen_assert(index < m_dimensions.TotalSize());
|
|
497
|
+
return m_buf[index];
|
|
498
|
+
}
|
|
499
|
+
|
|
500
|
+
template <int LoadMode>
|
|
501
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(const Index index) const {
|
|
502
|
+
eigen_assert(m_buf != NULL);
|
|
503
|
+
eigen_assert(index < m_dimensions.TotalSize());
|
|
504
|
+
return internal::ploadt<PacketReturnType, LoadMode>(m_buf + index);
|
|
505
|
+
}
|
|
506
|
+
|
|
507
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {
|
|
508
|
+
// TODO(rmlarsen): FIXME: For now, this is just a copy of the CPU cost
|
|
509
|
+
// model.
|
|
510
|
+
const double kernel_size = m_kernelImpl.dimensions().TotalSize();
|
|
511
|
+
// We ignore the use of fused multiply-add.
|
|
512
|
+
const double convolve_compute_cost = TensorOpCost::AddCost<Scalar>() + TensorOpCost::MulCost<Scalar>();
|
|
513
|
+
const double firstIndex_compute_cost =
|
|
514
|
+
NumDims *
|
|
515
|
+
(2 * TensorOpCost::AddCost<Index>() + 2 * TensorOpCost::MulCost<Index>() + TensorOpCost::DivCost<Index>());
|
|
516
|
+
return TensorOpCost(0, 0, firstIndex_compute_cost, vectorized, PacketSize) +
|
|
517
|
+
kernel_size * (m_inputImpl.costPerCoeff(vectorized) + m_kernelImpl.costPerCoeff(vectorized) +
|
|
518
|
+
TensorOpCost(0, 0, convolve_compute_cost, vectorized, PacketSize));
|
|
519
|
+
}
|
|
520
|
+
// binding placeholder accessors to a command group handler for SYCL
|
|
521
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
|
|
522
|
+
m_kernelImpl.bind(cgh);
|
|
523
|
+
m_inputImpl.bind(cgh);
|
|
524
|
+
m_buf.bind(cgh);
|
|
525
|
+
m_kernel.bind(cgh);
|
|
526
|
+
}
|
|
527
|
+
|
|
528
|
+
private:
|
|
529
|
+
// No assignment (copies are needed by the kernels)
|
|
530
|
+
TensorEvaluator &operator=(const TensorEvaluator &);
|
|
531
|
+
TensorEvaluator<InputArgType, Eigen::SyclDevice> m_inputImpl;
|
|
532
|
+
KernelArgType m_kernelArg;
|
|
533
|
+
TensorEvaluator<KernelArgType, Eigen::SyclDevice> m_kernelImpl;
|
|
534
|
+
Indices m_indices;
|
|
535
|
+
Dimensions m_dimensions;
|
|
536
|
+
EvaluatorPointerType m_buf;
|
|
537
|
+
typename KernelStorage::Type m_kernel;
|
|
538
|
+
bool m_local_kernel;
|
|
539
|
+
const Eigen::SyclDevice EIGEN_DEVICE_REF m_device;
|
|
540
|
+
}; // namespace Eigen
|
|
541
|
+
|
|
542
|
+
} // end namespace Eigen
|
|
543
|
+
|
|
544
|
+
#endif // EIGEN_CXX11_TENSOR_TENSOR_CONVOLUTION_H
|