@smake/eigen 1.0.2 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/eigen/Eigen/AccelerateSupport +52 -0
- package/eigen/Eigen/Cholesky +18 -21
- package/eigen/Eigen/CholmodSupport +28 -28
- package/eigen/Eigen/Core +235 -326
- package/eigen/Eigen/Eigenvalues +16 -14
- package/eigen/Eigen/Geometry +21 -24
- package/eigen/Eigen/Householder +9 -8
- package/eigen/Eigen/IterativeLinearSolvers +8 -4
- package/eigen/Eigen/Jacobi +14 -14
- package/eigen/Eigen/KLUSupport +43 -0
- package/eigen/Eigen/LU +16 -20
- package/eigen/Eigen/MetisSupport +12 -12
- package/eigen/Eigen/OrderingMethods +54 -54
- package/eigen/Eigen/PaStiXSupport +23 -20
- package/eigen/Eigen/PardisoSupport +17 -14
- package/eigen/Eigen/QR +18 -21
- package/eigen/Eigen/QtAlignedMalloc +5 -13
- package/eigen/Eigen/SPQRSupport +21 -14
- package/eigen/Eigen/SVD +23 -18
- package/eigen/Eigen/Sparse +1 -4
- package/eigen/Eigen/SparseCholesky +18 -23
- package/eigen/Eigen/SparseCore +18 -17
- package/eigen/Eigen/SparseLU +12 -8
- package/eigen/Eigen/SparseQR +16 -14
- package/eigen/Eigen/StdDeque +5 -2
- package/eigen/Eigen/StdList +5 -2
- package/eigen/Eigen/StdVector +5 -2
- package/eigen/Eigen/SuperLUSupport +30 -24
- package/eigen/Eigen/ThreadPool +80 -0
- package/eigen/Eigen/UmfPackSupport +19 -17
- package/eigen/Eigen/Version +14 -0
- package/eigen/Eigen/src/AccelerateSupport/AccelerateSupport.h +423 -0
- package/eigen/Eigen/src/AccelerateSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Cholesky/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Cholesky/LDLT.h +377 -401
- package/eigen/Eigen/src/Cholesky/LLT.h +332 -360
- package/eigen/Eigen/src/Cholesky/LLT_LAPACKE.h +81 -56
- package/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +620 -521
- package/eigen/Eigen/src/CholmodSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Core/ArithmeticSequence.h +239 -0
- package/eigen/Eigen/src/Core/Array.h +341 -294
- package/eigen/Eigen/src/Core/ArrayBase.h +190 -203
- package/eigen/Eigen/src/Core/ArrayWrapper.h +127 -171
- package/eigen/Eigen/src/Core/Assign.h +30 -40
- package/eigen/Eigen/src/Core/AssignEvaluator.h +711 -589
- package/eigen/Eigen/src/Core/Assign_MKL.h +130 -125
- package/eigen/Eigen/src/Core/BandMatrix.h +268 -283
- package/eigen/Eigen/src/Core/Block.h +375 -398
- package/eigen/Eigen/src/Core/CommaInitializer.h +86 -97
- package/eigen/Eigen/src/Core/ConditionEstimator.h +51 -53
- package/eigen/Eigen/src/Core/CoreEvaluators.h +1356 -1026
- package/eigen/Eigen/src/Core/CoreIterators.h +73 -59
- package/eigen/Eigen/src/Core/CwiseBinaryOp.h +114 -132
- package/eigen/Eigen/src/Core/CwiseNullaryOp.h +726 -617
- package/eigen/Eigen/src/Core/CwiseTernaryOp.h +77 -103
- package/eigen/Eigen/src/Core/CwiseUnaryOp.h +56 -68
- package/eigen/Eigen/src/Core/CwiseUnaryView.h +132 -95
- package/eigen/Eigen/src/Core/DenseBase.h +632 -571
- package/eigen/Eigen/src/Core/DenseCoeffsBase.h +511 -624
- package/eigen/Eigen/src/Core/DenseStorage.h +512 -509
- package/eigen/Eigen/src/Core/DeviceWrapper.h +153 -0
- package/eigen/Eigen/src/Core/Diagonal.h +169 -210
- package/eigen/Eigen/src/Core/DiagonalMatrix.h +351 -274
- package/eigen/Eigen/src/Core/DiagonalProduct.h +12 -10
- package/eigen/Eigen/src/Core/Dot.h +172 -222
- package/eigen/Eigen/src/Core/EigenBase.h +75 -85
- package/eigen/Eigen/src/Core/Fill.h +138 -0
- package/eigen/Eigen/src/Core/FindCoeff.h +464 -0
- package/eigen/Eigen/src/Core/ForceAlignedAccess.h +90 -109
- package/eigen/Eigen/src/Core/Fuzzy.h +82 -105
- package/eigen/Eigen/src/Core/GeneralProduct.h +327 -263
- package/eigen/Eigen/src/Core/GenericPacketMath.h +1472 -360
- package/eigen/Eigen/src/Core/GlobalFunctions.h +194 -151
- package/eigen/Eigen/src/Core/IO.h +147 -139
- package/eigen/Eigen/src/Core/IndexedView.h +321 -0
- package/eigen/Eigen/src/Core/InnerProduct.h +260 -0
- package/eigen/Eigen/src/Core/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Core/Inverse.h +56 -66
- package/eigen/Eigen/src/Core/Map.h +124 -142
- package/eigen/Eigen/src/Core/MapBase.h +256 -281
- package/eigen/Eigen/src/Core/MathFunctions.h +1620 -938
- package/eigen/Eigen/src/Core/MathFunctionsImpl.h +233 -71
- package/eigen/Eigen/src/Core/Matrix.h +491 -416
- package/eigen/Eigen/src/Core/MatrixBase.h +468 -453
- package/eigen/Eigen/src/Core/NestByValue.h +66 -85
- package/eigen/Eigen/src/Core/NoAlias.h +79 -85
- package/eigen/Eigen/src/Core/NumTraits.h +235 -148
- package/eigen/Eigen/src/Core/PartialReduxEvaluator.h +253 -0
- package/eigen/Eigen/src/Core/PermutationMatrix.h +461 -511
- package/eigen/Eigen/src/Core/PlainObjectBase.h +871 -894
- package/eigen/Eigen/src/Core/Product.h +260 -139
- package/eigen/Eigen/src/Core/ProductEvaluators.h +863 -714
- package/eigen/Eigen/src/Core/Random.h +161 -136
- package/eigen/Eigen/src/Core/RandomImpl.h +262 -0
- package/eigen/Eigen/src/Core/RealView.h +250 -0
- package/eigen/Eigen/src/Core/Redux.h +366 -336
- package/eigen/Eigen/src/Core/Ref.h +308 -209
- package/eigen/Eigen/src/Core/Replicate.h +94 -106
- package/eigen/Eigen/src/Core/Reshaped.h +398 -0
- package/eigen/Eigen/src/Core/ReturnByValue.h +49 -55
- package/eigen/Eigen/src/Core/Reverse.h +136 -145
- package/eigen/Eigen/src/Core/Select.h +70 -140
- package/eigen/Eigen/src/Core/SelfAdjointView.h +262 -285
- package/eigen/Eigen/src/Core/SelfCwiseBinaryOp.h +23 -20
- package/eigen/Eigen/src/Core/SkewSymmetricMatrix3.h +382 -0
- package/eigen/Eigen/src/Core/Solve.h +97 -111
- package/eigen/Eigen/src/Core/SolveTriangular.h +131 -129
- package/eigen/Eigen/src/Core/SolverBase.h +138 -101
- package/eigen/Eigen/src/Core/StableNorm.h +156 -160
- package/eigen/Eigen/src/Core/StlIterators.h +619 -0
- package/eigen/Eigen/src/Core/Stride.h +91 -88
- package/eigen/Eigen/src/Core/Swap.h +70 -38
- package/eigen/Eigen/src/Core/Transpose.h +295 -273
- package/eigen/Eigen/src/Core/Transpositions.h +272 -317
- package/eigen/Eigen/src/Core/TriangularMatrix.h +670 -755
- package/eigen/Eigen/src/Core/VectorBlock.h +59 -72
- package/eigen/Eigen/src/Core/VectorwiseOp.h +668 -630
- package/eigen/Eigen/src/Core/Visitor.h +480 -216
- package/eigen/Eigen/src/Core/arch/AVX/Complex.h +407 -293
- package/eigen/Eigen/src/Core/arch/AVX/MathFunctions.h +79 -388
- package/eigen/Eigen/src/Core/arch/AVX/PacketMath.h +2935 -491
- package/eigen/Eigen/src/Core/arch/AVX/Reductions.h +353 -0
- package/eigen/Eigen/src/Core/arch/AVX/TypeCasting.h +279 -22
- package/eigen/Eigen/src/Core/arch/AVX512/Complex.h +472 -0
- package/eigen/Eigen/src/Core/arch/AVX512/GemmKernel.h +1245 -0
- package/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h +85 -333
- package/eigen/Eigen/src/Core/arch/AVX512/MathFunctionsFP16.h +75 -0
- package/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h +2490 -649
- package/eigen/Eigen/src/Core/arch/AVX512/PacketMathFP16.h +1413 -0
- package/eigen/Eigen/src/Core/arch/AVX512/Reductions.h +297 -0
- package/eigen/Eigen/src/Core/arch/AVX512/TrsmKernel.h +1167 -0
- package/eigen/Eigen/src/Core/arch/AVX512/TrsmUnrolls.inc +1219 -0
- package/eigen/Eigen/src/Core/arch/AVX512/TypeCasting.h +277 -0
- package/eigen/Eigen/src/Core/arch/AVX512/TypeCastingFP16.h +130 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/Complex.h +521 -298
- package/eigen/Eigen/src/Core/arch/AltiVec/MathFunctions.h +39 -280
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProduct.h +3686 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h +205 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h +901 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMAbfloat16.h +742 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixVectorProduct.inc +2818 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h +3391 -723
- package/eigen/Eigen/src/Core/arch/AltiVec/TypeCasting.h +153 -0
- package/eigen/Eigen/src/Core/arch/Default/BFloat16.h +866 -0
- package/eigen/Eigen/src/Core/arch/Default/ConjHelper.h +113 -14
- package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +2634 -0
- package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +227 -0
- package/eigen/Eigen/src/Core/arch/Default/Half.h +1091 -0
- package/eigen/Eigen/src/Core/arch/Default/Settings.h +11 -13
- package/eigen/Eigen/src/Core/arch/GPU/Complex.h +244 -0
- package/eigen/Eigen/src/Core/arch/GPU/MathFunctions.h +104 -0
- package/eigen/Eigen/src/Core/arch/GPU/PacketMath.h +1712 -0
- package/eigen/Eigen/src/Core/arch/GPU/Tuple.h +268 -0
- package/eigen/Eigen/src/Core/arch/GPU/TypeCasting.h +77 -0
- package/eigen/Eigen/src/Core/arch/HIP/hcc/math_constants.h +23 -0
- package/eigen/Eigen/src/Core/arch/HVX/PacketMath.h +1088 -0
- package/eigen/Eigen/src/Core/arch/LSX/Complex.h +520 -0
- package/eigen/Eigen/src/Core/arch/LSX/GeneralBlockPanelKernel.h +23 -0
- package/eigen/Eigen/src/Core/arch/LSX/MathFunctions.h +43 -0
- package/eigen/Eigen/src/Core/arch/LSX/PacketMath.h +2866 -0
- package/eigen/Eigen/src/Core/arch/LSX/TypeCasting.h +526 -0
- package/eigen/Eigen/src/Core/arch/MSA/Complex.h +620 -0
- package/eigen/Eigen/src/Core/arch/MSA/MathFunctions.h +379 -0
- package/eigen/Eigen/src/Core/arch/MSA/PacketMath.h +1237 -0
- package/eigen/Eigen/src/Core/arch/NEON/Complex.h +531 -289
- package/eigen/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h +243 -0
- package/eigen/Eigen/src/Core/arch/NEON/MathFunctions.h +50 -73
- package/eigen/Eigen/src/Core/arch/NEON/PacketMath.h +5915 -579
- package/eigen/Eigen/src/Core/arch/NEON/TypeCasting.h +1642 -0
- package/eigen/Eigen/src/Core/arch/NEON/UnaryFunctors.h +57 -0
- package/eigen/Eigen/src/Core/arch/SSE/Complex.h +366 -334
- package/eigen/Eigen/src/Core/arch/SSE/MathFunctions.h +40 -514
- package/eigen/Eigen/src/Core/arch/SSE/PacketMath.h +2164 -675
- package/eigen/Eigen/src/Core/arch/SSE/Reductions.h +324 -0
- package/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h +188 -35
- package/eigen/Eigen/src/Core/arch/SVE/MathFunctions.h +48 -0
- package/eigen/Eigen/src/Core/arch/SVE/PacketMath.h +674 -0
- package/eigen/Eigen/src/Core/arch/SVE/TypeCasting.h +52 -0
- package/eigen/Eigen/src/Core/arch/SYCL/InteropHeaders.h +227 -0
- package/eigen/Eigen/src/Core/arch/SYCL/MathFunctions.h +303 -0
- package/eigen/Eigen/src/Core/arch/SYCL/PacketMath.h +576 -0
- package/eigen/Eigen/src/Core/arch/SYCL/TypeCasting.h +83 -0
- package/eigen/Eigen/src/Core/arch/ZVector/Complex.h +434 -261
- package/eigen/Eigen/src/Core/arch/ZVector/MathFunctions.h +160 -53
- package/eigen/Eigen/src/Core/arch/ZVector/PacketMath.h +1073 -605
- package/eigen/Eigen/src/Core/functors/AssignmentFunctors.h +123 -117
- package/eigen/Eigen/src/Core/functors/BinaryFunctors.h +594 -322
- package/eigen/Eigen/src/Core/functors/NullaryFunctors.h +204 -118
- package/eigen/Eigen/src/Core/functors/StlFunctors.h +110 -97
- package/eigen/Eigen/src/Core/functors/TernaryFunctors.h +34 -7
- package/eigen/Eigen/src/Core/functors/UnaryFunctors.h +1158 -530
- package/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h +2329 -1333
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h +328 -364
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +191 -178
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +85 -82
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h +154 -73
- package/eigen/Eigen/src/Core/products/GeneralMatrixVector.h +396 -542
- package/eigen/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h +80 -77
- package/eigen/Eigen/src/Core/products/Parallelizer.h +208 -92
- package/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +331 -375
- package/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h +206 -224
- package/eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h +139 -146
- package/eigen/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h +58 -61
- package/eigen/Eigen/src/Core/products/SelfadjointProduct.h +71 -71
- package/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h +48 -46
- package/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h +294 -369
- package/eigen/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h +246 -238
- package/eigen/Eigen/src/Core/products/TriangularMatrixVector.h +244 -247
- package/eigen/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h +212 -192
- package/eigen/Eigen/src/Core/products/TriangularSolverMatrix.h +328 -275
- package/eigen/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h +108 -109
- package/eigen/Eigen/src/Core/products/TriangularSolverVector.h +70 -93
- package/eigen/Eigen/src/Core/util/Assert.h +158 -0
- package/eigen/Eigen/src/Core/util/BlasUtil.h +413 -290
- package/eigen/Eigen/src/Core/util/ConfigureVectorization.h +543 -0
- package/eigen/Eigen/src/Core/util/Constants.h +314 -263
- package/eigen/Eigen/src/Core/util/DisableStupidWarnings.h +130 -78
- package/eigen/Eigen/src/Core/util/EmulateArray.h +270 -0
- package/eigen/Eigen/src/Core/util/ForwardDeclarations.h +450 -224
- package/eigen/Eigen/src/Core/util/GpuHipCudaDefines.inc +101 -0
- package/eigen/Eigen/src/Core/util/GpuHipCudaUndefines.inc +45 -0
- package/eigen/Eigen/src/Core/util/IndexedViewHelper.h +487 -0
- package/eigen/Eigen/src/Core/util/IntegralConstant.h +279 -0
- package/eigen/Eigen/src/Core/util/MKL_support.h +39 -30
- package/eigen/Eigen/src/Core/util/Macros.h +939 -646
- package/eigen/Eigen/src/Core/util/MaxSizeVector.h +139 -0
- package/eigen/Eigen/src/Core/util/Memory.h +1042 -650
- package/eigen/Eigen/src/Core/util/Meta.h +618 -426
- package/eigen/Eigen/src/Core/util/MoreMeta.h +638 -0
- package/eigen/Eigen/src/Core/util/ReenableStupidWarnings.h +32 -19
- package/eigen/Eigen/src/Core/util/ReshapedHelper.h +51 -0
- package/eigen/Eigen/src/Core/util/Serializer.h +209 -0
- package/eigen/Eigen/src/Core/util/StaticAssert.h +51 -164
- package/eigen/Eigen/src/Core/util/SymbolicIndex.h +445 -0
- package/eigen/Eigen/src/Core/util/XprHelper.h +793 -538
- package/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +246 -277
- package/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +299 -319
- package/eigen/Eigen/src/Eigenvalues/ComplexSchur_LAPACKE.h +52 -48
- package/eigen/Eigen/src/Eigenvalues/EigenSolver.h +413 -456
- package/eigen/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h +309 -325
- package/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +157 -171
- package/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +292 -310
- package/eigen/Eigen/src/Eigenvalues/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +91 -107
- package/eigen/Eigen/src/Eigenvalues/RealQZ.h +539 -606
- package/eigen/Eigen/src/Eigenvalues/RealSchur.h +348 -382
- package/eigen/Eigen/src/Eigenvalues/RealSchur_LAPACKE.h +41 -35
- package/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +579 -600
- package/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h +47 -44
- package/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +434 -461
- package/eigen/Eigen/src/Geometry/AlignedBox.h +307 -214
- package/eigen/Eigen/src/Geometry/AngleAxis.h +135 -137
- package/eigen/Eigen/src/Geometry/EulerAngles.h +163 -74
- package/eigen/Eigen/src/Geometry/Homogeneous.h +289 -333
- package/eigen/Eigen/src/Geometry/Hyperplane.h +152 -161
- package/eigen/Eigen/src/Geometry/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Geometry/OrthoMethods.h +168 -145
- package/eigen/Eigen/src/Geometry/ParametrizedLine.h +141 -104
- package/eigen/Eigen/src/Geometry/Quaternion.h +595 -497
- package/eigen/Eigen/src/Geometry/Rotation2D.h +110 -108
- package/eigen/Eigen/src/Geometry/RotationBase.h +148 -145
- package/eigen/Eigen/src/Geometry/Scaling.h +115 -90
- package/eigen/Eigen/src/Geometry/Transform.h +896 -953
- package/eigen/Eigen/src/Geometry/Translation.h +100 -98
- package/eigen/Eigen/src/Geometry/Umeyama.h +79 -84
- package/eigen/Eigen/src/Geometry/arch/Geometry_SIMD.h +154 -0
- package/eigen/Eigen/src/Householder/BlockHouseholder.h +54 -42
- package/eigen/Eigen/src/Householder/Householder.h +104 -122
- package/eigen/Eigen/src/Householder/HouseholderSequence.h +416 -382
- package/eigen/Eigen/src/Householder/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +153 -166
- package/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +127 -138
- package/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +95 -124
- package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +269 -267
- package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +246 -259
- package/eigen/Eigen/src/IterativeLinearSolvers/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +218 -217
- package/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +80 -103
- package/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +59 -63
- package/eigen/Eigen/src/Jacobi/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Jacobi/Jacobi.h +256 -291
- package/eigen/Eigen/src/KLUSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/KLUSupport/KLUSupport.h +339 -0
- package/eigen/Eigen/src/LU/Determinant.h +60 -63
- package/eigen/Eigen/src/LU/FullPivLU.h +561 -626
- package/eigen/Eigen/src/LU/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/LU/InverseImpl.h +213 -275
- package/eigen/Eigen/src/LU/PartialPivLU.h +407 -435
- package/eigen/Eigen/src/LU/PartialPivLU_LAPACKE.h +54 -40
- package/eigen/Eigen/src/LU/arch/InverseSize4.h +353 -0
- package/eigen/Eigen/src/MetisSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/MetisSupport/MetisSupport.h +81 -93
- package/eigen/Eigen/src/OrderingMethods/Amd.h +250 -282
- package/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +950 -1103
- package/eigen/Eigen/src/OrderingMethods/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/OrderingMethods/Ordering.h +111 -122
- package/eigen/Eigen/src/PaStiXSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +524 -570
- package/eigen/Eigen/src/PardisoSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +385 -429
- package/eigen/Eigen/src/QR/ColPivHouseholderQR.h +494 -473
- package/eigen/Eigen/src/QR/ColPivHouseholderQR_LAPACKE.h +120 -56
- package/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +223 -137
- package/eigen/Eigen/src/QR/FullPivHouseholderQR.h +517 -460
- package/eigen/Eigen/src/QR/HouseholderQR.h +412 -278
- package/eigen/Eigen/src/QR/HouseholderQR_LAPACKE.h +32 -23
- package/eigen/Eigen/src/QR/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SPQRSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +263 -261
- package/eigen/Eigen/src/SVD/BDCSVD.h +872 -679
- package/eigen/Eigen/src/SVD/BDCSVD_LAPACKE.h +174 -0
- package/eigen/Eigen/src/SVD/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SVD/JacobiSVD.h +585 -543
- package/eigen/Eigen/src/SVD/JacobiSVD_LAPACKE.h +85 -49
- package/eigen/Eigen/src/SVD/SVDBase.h +281 -160
- package/eigen/Eigen/src/SVD/UpperBidiagonalization.h +202 -237
- package/eigen/Eigen/src/SparseCholesky/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +769 -590
- package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +318 -129
- package/eigen/Eigen/src/SparseCore/AmbiVector.h +202 -251
- package/eigen/Eigen/src/SparseCore/CompressedStorage.h +184 -236
- package/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +140 -184
- package/eigen/Eigen/src/SparseCore/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SparseCore/SparseAssign.h +174 -111
- package/eigen/Eigen/src/SparseCore/SparseBlock.h +408 -477
- package/eigen/Eigen/src/SparseCore/SparseColEtree.h +100 -112
- package/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +531 -280
- package/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +559 -347
- package/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +100 -108
- package/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +185 -191
- package/eigen/Eigen/src/SparseCore/SparseDiagonalProduct.h +71 -71
- package/eigen/Eigen/src/SparseCore/SparseDot.h +49 -47
- package/eigen/Eigen/src/SparseCore/SparseFuzzy.h +13 -11
- package/eigen/Eigen/src/SparseCore/SparseMap.h +243 -253
- package/eigen/Eigen/src/SparseCore/SparseMatrix.h +1614 -1142
- package/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +403 -357
- package/eigen/Eigen/src/SparseCore/SparsePermutation.h +186 -115
- package/eigen/Eigen/src/SparseCore/SparseProduct.h +100 -91
- package/eigen/Eigen/src/SparseCore/SparseRedux.h +22 -24
- package/eigen/Eigen/src/SparseCore/SparseRef.h +268 -295
- package/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +371 -414
- package/eigen/Eigen/src/SparseCore/SparseSolverBase.h +78 -87
- package/eigen/Eigen/src/SparseCore/SparseSparseProductWithPruning.h +81 -95
- package/eigen/Eigen/src/SparseCore/SparseTranspose.h +62 -71
- package/eigen/Eigen/src/SparseCore/SparseTriangularView.h +132 -144
- package/eigen/Eigen/src/SparseCore/SparseUtil.h +146 -115
- package/eigen/Eigen/src/SparseCore/SparseVector.h +426 -372
- package/eigen/Eigen/src/SparseCore/SparseView.h +164 -193
- package/eigen/Eigen/src/SparseCore/TriangularSolver.h +129 -170
- package/eigen/Eigen/src/SparseLU/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SparseLU/SparseLU.h +814 -618
- package/eigen/Eigen/src/SparseLU/SparseLUImpl.h +61 -48
- package/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +102 -118
- package/eigen/Eigen/src/SparseLU/SparseLU_Structs.h +38 -35
- package/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +273 -255
- package/eigen/Eigen/src/SparseLU/SparseLU_Utils.h +44 -49
- package/eigen/Eigen/src/SparseLU/SparseLU_column_bmod.h +104 -108
- package/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +90 -101
- package/eigen/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h +57 -58
- package/eigen/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h +43 -55
- package/eigen/Eigen/src/SparseLU/SparseLU_kernel_bmod.h +74 -71
- package/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +125 -133
- package/eigen/Eigen/src/SparseLU/SparseLU_panel_dfs.h +136 -159
- package/eigen/Eigen/src/SparseLU/SparseLU_pivotL.h +51 -52
- package/eigen/Eigen/src/SparseLU/SparseLU_pruneL.h +67 -73
- package/eigen/Eigen/src/SparseLU/SparseLU_relax_snode.h +24 -26
- package/eigen/Eigen/src/SparseQR/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SparseQR/SparseQR.h +451 -490
- package/eigen/Eigen/src/StlSupport/StdDeque.h +28 -105
- package/eigen/Eigen/src/StlSupport/StdList.h +28 -84
- package/eigen/Eigen/src/StlSupport/StdVector.h +28 -108
- package/eigen/Eigen/src/StlSupport/details.h +48 -50
- package/eigen/Eigen/src/SuperLUSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +634 -732
- package/eigen/Eigen/src/ThreadPool/Barrier.h +70 -0
- package/eigen/Eigen/src/ThreadPool/CoreThreadPoolDevice.h +336 -0
- package/eigen/Eigen/src/ThreadPool/EventCount.h +241 -0
- package/eigen/Eigen/src/ThreadPool/ForkJoin.h +140 -0
- package/eigen/Eigen/src/ThreadPool/InternalHeaderCheck.h +4 -0
- package/eigen/Eigen/src/ThreadPool/NonBlockingThreadPool.h +587 -0
- package/eigen/Eigen/src/ThreadPool/RunQueue.h +230 -0
- package/eigen/Eigen/src/ThreadPool/ThreadCancel.h +21 -0
- package/eigen/Eigen/src/ThreadPool/ThreadEnvironment.h +43 -0
- package/eigen/Eigen/src/ThreadPool/ThreadLocal.h +289 -0
- package/eigen/Eigen/src/ThreadPool/ThreadPoolInterface.h +50 -0
- package/eigen/Eigen/src/ThreadPool/ThreadYield.h +16 -0
- package/eigen/Eigen/src/UmfPackSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +480 -380
- package/eigen/Eigen/src/misc/Image.h +41 -43
- package/eigen/Eigen/src/misc/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/misc/Kernel.h +39 -41
- package/eigen/Eigen/src/misc/RealSvd2x2.h +19 -21
- package/eigen/Eigen/src/misc/blas.h +83 -426
- package/eigen/Eigen/src/misc/lapacke.h +9976 -16182
- package/eigen/Eigen/src/misc/lapacke_helpers.h +163 -0
- package/eigen/Eigen/src/misc/lapacke_mangling.h +4 -5
- package/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.inc +344 -0
- package/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.inc +544 -0
- package/eigen/Eigen/src/plugins/BlockMethods.inc +1370 -0
- package/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.inc +116 -0
- package/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.inc +167 -0
- package/eigen/Eigen/src/plugins/IndexedViewMethods.inc +192 -0
- package/eigen/Eigen/src/plugins/InternalHeaderCheck.inc +3 -0
- package/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.inc +331 -0
- package/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.inc +118 -0
- package/eigen/Eigen/src/plugins/ReshapedMethods.inc +133 -0
- package/lib/LibEigen.d.ts +4 -0
- package/lib/LibEigen.js +14 -0
- package/lib/index.d.ts +1 -1
- package/lib/index.js +7 -3
- package/package.json +2 -10
- package/eigen/Eigen/CMakeLists.txt +0 -19
- package/eigen/Eigen/src/Core/BooleanRedux.h +0 -164
- package/eigen/Eigen/src/Core/arch/CUDA/Complex.h +0 -103
- package/eigen/Eigen/src/Core/arch/CUDA/Half.h +0 -675
- package/eigen/Eigen/src/Core/arch/CUDA/MathFunctions.h +0 -91
- package/eigen/Eigen/src/Core/arch/CUDA/PacketMath.h +0 -333
- package/eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +0 -1124
- package/eigen/Eigen/src/Core/arch/CUDA/TypeCasting.h +0 -212
- package/eigen/Eigen/src/Core/util/NonMPL2.h +0 -3
- package/eigen/Eigen/src/Geometry/arch/Geometry_SSE.h +0 -161
- package/eigen/Eigen/src/LU/arch/Inverse_SSE.h +0 -338
- package/eigen/Eigen/src/SparseCore/MappedSparseMatrix.h +0 -67
- package/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +0 -280
- package/eigen/Eigen/src/misc/lapack.h +0 -152
- package/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +0 -332
- package/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +0 -552
- package/eigen/Eigen/src/plugins/BlockMethods.h +0 -1058
- package/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.h +0 -115
- package/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.h +0 -163
- package/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +0 -152
- package/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +0 -85
- package/lib/eigen.d.ts +0 -2
- package/lib/eigen.js +0 -15
|
@@ -0,0 +1,576 @@
|
|
|
1
|
+
// This file is part of Eigen, a lightweight C++ template library
|
|
2
|
+
// for linear algebra.
|
|
3
|
+
//
|
|
4
|
+
// Mehdi Goli Codeplay Software Ltd.
|
|
5
|
+
// Ralph Potter Codeplay Software Ltd.
|
|
6
|
+
// Luke Iwanski Codeplay Software Ltd.
|
|
7
|
+
// Contact: <eigen@codeplay.com>
|
|
8
|
+
//
|
|
9
|
+
// This Source Code Form is subject to the terms of the Mozilla
|
|
10
|
+
// Public License v. 2.0. If a copy of the MPL was not distributed
|
|
11
|
+
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
|
12
|
+
|
|
13
|
+
/*****************************************************************
|
|
14
|
+
* PacketMath.h
|
|
15
|
+
*
|
|
16
|
+
* \brief:
|
|
17
|
+
* PacketMath
|
|
18
|
+
*
|
|
19
|
+
*****************************************************************/
|
|
20
|
+
|
|
21
|
+
#ifndef EIGEN_PACKET_MATH_SYCL_H
|
|
22
|
+
#define EIGEN_PACKET_MATH_SYCL_H
|
|
23
|
+
#include <type_traits>
|
|
24
|
+
|
|
25
|
+
// IWYU pragma: private
|
|
26
|
+
#include "../../InternalHeaderCheck.h"
|
|
27
|
+
|
|
28
|
+
namespace Eigen {
|
|
29
|
+
|
|
30
|
+
namespace internal {
|
|
31
|
+
#ifdef SYCL_DEVICE_ONLY
|
|
32
|
+
#define SYCL_PLOAD(packet_type, AlignedType) \
|
|
33
|
+
template <> \
|
|
34
|
+
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type pload##AlignedType<packet_type>( \
|
|
35
|
+
const typename unpacket_traits<packet_type>::type* from) { \
|
|
36
|
+
auto ptr = \
|
|
37
|
+
cl::sycl::address_space_cast<cl::sycl::access::address_space::generic_space, cl::sycl::access::decorated::no>( \
|
|
38
|
+
from); \
|
|
39
|
+
packet_type res{}; \
|
|
40
|
+
res.load(0, ptr); \
|
|
41
|
+
return res; \
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
SYCL_PLOAD(cl::sycl::cl_float4, u)
|
|
45
|
+
SYCL_PLOAD(cl::sycl::cl_float4, )
|
|
46
|
+
SYCL_PLOAD(cl::sycl::cl_double2, u)
|
|
47
|
+
SYCL_PLOAD(cl::sycl::cl_double2, )
|
|
48
|
+
#undef SYCL_PLOAD
|
|
49
|
+
|
|
50
|
+
template <>
|
|
51
|
+
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_half8 pload<cl::sycl::cl_half8>(
|
|
52
|
+
const typename unpacket_traits<cl::sycl::cl_half8>::type* from) {
|
|
53
|
+
auto ptr =
|
|
54
|
+
cl::sycl::address_space_cast<cl::sycl::access::address_space::generic_space, cl::sycl::access::decorated::no>(
|
|
55
|
+
reinterpret_cast<const cl::sycl::cl_half*>(from));
|
|
56
|
+
cl::sycl::cl_half8 res{};
|
|
57
|
+
res.load(0, ptr);
|
|
58
|
+
return res;
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
template <>
|
|
62
|
+
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_half8 ploadu<cl::sycl::cl_half8>(
|
|
63
|
+
const typename unpacket_traits<cl::sycl::cl_half8>::type* from) {
|
|
64
|
+
auto ptr =
|
|
65
|
+
cl::sycl::address_space_cast<cl::sycl::access::address_space::generic_space, cl::sycl::access::decorated::no>(
|
|
66
|
+
reinterpret_cast<const cl::sycl::cl_half*>(from));
|
|
67
|
+
cl::sycl::cl_half8 res{};
|
|
68
|
+
res.load(0, ptr);
|
|
69
|
+
return res;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
#define SYCL_PSTORE(scalar, packet_type, alignment) \
|
|
73
|
+
template <> \
|
|
74
|
+
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstore##alignment(scalar* to, const packet_type& from) { \
|
|
75
|
+
auto ptr = \
|
|
76
|
+
cl::sycl::address_space_cast<cl::sycl::access::address_space::generic_space, cl::sycl::access::decorated::no>( \
|
|
77
|
+
to); \
|
|
78
|
+
from.store(0, ptr); \
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
SYCL_PSTORE(float, cl::sycl::cl_float4, )
|
|
82
|
+
SYCL_PSTORE(float, cl::sycl::cl_float4, u)
|
|
83
|
+
SYCL_PSTORE(double, cl::sycl::cl_double2, )
|
|
84
|
+
SYCL_PSTORE(double, cl::sycl::cl_double2, u)
|
|
85
|
+
#undef SYCL_PSTORE
|
|
86
|
+
|
|
87
|
+
template <>
|
|
88
|
+
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstoreu(Eigen::half* to, const cl::sycl::cl_half8& from) {
|
|
89
|
+
auto ptr =
|
|
90
|
+
cl::sycl::address_space_cast<cl::sycl::access::address_space::generic_space, cl::sycl::access::decorated::no>(
|
|
91
|
+
reinterpret_cast<cl::sycl::cl_half*>(to));
|
|
92
|
+
from.store(0, ptr);
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
template <>
|
|
96
|
+
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstore(Eigen::half* to, const cl::sycl::cl_half8& from) {
|
|
97
|
+
auto ptr =
|
|
98
|
+
cl::sycl::address_space_cast<cl::sycl::access::address_space::generic_space, cl::sycl::access::decorated::no>(
|
|
99
|
+
reinterpret_cast<cl::sycl::cl_half*>(to));
|
|
100
|
+
from.store(0, ptr);
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
#define SYCL_PSET1(packet_type) \
|
|
104
|
+
template <> \
|
|
105
|
+
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type pset1<packet_type>( \
|
|
106
|
+
const typename unpacket_traits<packet_type>::type& from) { \
|
|
107
|
+
return packet_type(from); \
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
// global space
|
|
111
|
+
SYCL_PSET1(cl::sycl::cl_half8)
|
|
112
|
+
SYCL_PSET1(cl::sycl::cl_float4)
|
|
113
|
+
SYCL_PSET1(cl::sycl::cl_double2)
|
|
114
|
+
|
|
115
|
+
#undef SYCL_PSET1
|
|
116
|
+
|
|
117
|
+
template <typename packet_type>
|
|
118
|
+
struct get_base_packet {
|
|
119
|
+
template <typename sycl_multi_pointer>
|
|
120
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type get_ploaddup(sycl_multi_pointer) {}
|
|
121
|
+
|
|
122
|
+
template <typename sycl_multi_pointer>
|
|
123
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type get_pgather(sycl_multi_pointer, Index) {}
|
|
124
|
+
};
|
|
125
|
+
|
|
126
|
+
template <>
|
|
127
|
+
struct get_base_packet<cl::sycl::cl_half8> {
|
|
128
|
+
template <typename sycl_multi_pointer>
|
|
129
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_half8 get_ploaddup(sycl_multi_pointer from) {
|
|
130
|
+
return cl::sycl::cl_half8(static_cast<cl::sycl::half>(from[0]), static_cast<cl::sycl::half>(from[0]),
|
|
131
|
+
static_cast<cl::sycl::half>(from[1]), static_cast<cl::sycl::half>(from[1]),
|
|
132
|
+
static_cast<cl::sycl::half>(from[2]), static_cast<cl::sycl::half>(from[2]),
|
|
133
|
+
static_cast<cl::sycl::half>(from[3]), static_cast<cl::sycl::half>(from[3]));
|
|
134
|
+
}
|
|
135
|
+
template <typename sycl_multi_pointer>
|
|
136
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_half8 get_pgather(sycl_multi_pointer from, Index stride) {
|
|
137
|
+
return cl::sycl::cl_half8(
|
|
138
|
+
static_cast<cl::sycl::half>(from[0 * stride]), static_cast<cl::sycl::half>(from[1 * stride]),
|
|
139
|
+
static_cast<cl::sycl::half>(from[2 * stride]), static_cast<cl::sycl::half>(from[3 * stride]),
|
|
140
|
+
static_cast<cl::sycl::half>(from[4 * stride]), static_cast<cl::sycl::half>(from[5 * stride]),
|
|
141
|
+
static_cast<cl::sycl::half>(from[6 * stride]), static_cast<cl::sycl::half>(from[7 * stride]));
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
template <typename sycl_multi_pointer>
|
|
145
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void set_pscatter(sycl_multi_pointer to, const cl::sycl::cl_half8& from,
|
|
146
|
+
Index stride) {
|
|
147
|
+
auto tmp = stride;
|
|
148
|
+
to[0] = Eigen::half(from.s0());
|
|
149
|
+
to[tmp] = Eigen::half(from.s1());
|
|
150
|
+
to[tmp += stride] = Eigen::half(from.s2());
|
|
151
|
+
to[tmp += stride] = Eigen::half(from.s3());
|
|
152
|
+
to[tmp += stride] = Eigen::half(from.s4());
|
|
153
|
+
to[tmp += stride] = Eigen::half(from.s5());
|
|
154
|
+
to[tmp += stride] = Eigen::half(from.s6());
|
|
155
|
+
to[tmp += stride] = Eigen::half(from.s7());
|
|
156
|
+
}
|
|
157
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_half8 set_plset(const cl::sycl::half& a) {
|
|
158
|
+
return cl::sycl::cl_half8(static_cast<cl::sycl::half>(a), static_cast<cl::sycl::half>(a + 1),
|
|
159
|
+
static_cast<cl::sycl::half>(a + 2), static_cast<cl::sycl::half>(a + 3),
|
|
160
|
+
static_cast<cl::sycl::half>(a + 4), static_cast<cl::sycl::half>(a + 5),
|
|
161
|
+
static_cast<cl::sycl::half>(a + 6), static_cast<cl::sycl::half>(a + 7));
|
|
162
|
+
}
|
|
163
|
+
};
|
|
164
|
+
|
|
165
|
+
template <>
|
|
166
|
+
struct get_base_packet<cl::sycl::cl_float4> {
|
|
167
|
+
template <typename sycl_multi_pointer>
|
|
168
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_float4 get_ploaddup(sycl_multi_pointer from) {
|
|
169
|
+
return cl::sycl::cl_float4(from[0], from[0], from[1], from[1]);
|
|
170
|
+
}
|
|
171
|
+
template <typename sycl_multi_pointer>
|
|
172
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_float4 get_pgather(sycl_multi_pointer from, Index stride) {
|
|
173
|
+
return cl::sycl::cl_float4(from[0 * stride], from[1 * stride], from[2 * stride], from[3 * stride]);
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
template <typename sycl_multi_pointer>
|
|
177
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void set_pscatter(sycl_multi_pointer to, const cl::sycl::cl_float4& from,
|
|
178
|
+
Index stride) {
|
|
179
|
+
auto tmp = stride;
|
|
180
|
+
to[0] = from.x();
|
|
181
|
+
to[tmp] = from.y();
|
|
182
|
+
to[tmp += stride] = from.z();
|
|
183
|
+
to[tmp += stride] = from.w();
|
|
184
|
+
}
|
|
185
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_float4 set_plset(const float& a) {
|
|
186
|
+
return cl::sycl::cl_float4(static_cast<float>(a), static_cast<float>(a + 1), static_cast<float>(a + 2),
|
|
187
|
+
static_cast<float>(a + 3));
|
|
188
|
+
}
|
|
189
|
+
};
|
|
190
|
+
|
|
191
|
+
template <>
|
|
192
|
+
struct get_base_packet<cl::sycl::cl_double2> {
|
|
193
|
+
template <typename sycl_multi_pointer>
|
|
194
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_double2 get_ploaddup(const sycl_multi_pointer from) {
|
|
195
|
+
return cl::sycl::cl_double2(from[0], from[0]);
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
template <typename sycl_multi_pointer, typename Index>
|
|
199
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_double2 get_pgather(const sycl_multi_pointer from,
|
|
200
|
+
Index stride) {
|
|
201
|
+
return cl::sycl::cl_double2(from[0 * stride], from[1 * stride]);
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
template <typename sycl_multi_pointer>
|
|
205
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void set_pscatter(sycl_multi_pointer to,
|
|
206
|
+
const cl::sycl::cl_double2& from, Index stride) {
|
|
207
|
+
to[0] = from.x();
|
|
208
|
+
to[stride] = from.y();
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_double2 set_plset(const double& a) {
|
|
212
|
+
return cl::sycl::cl_double2(static_cast<double>(a), static_cast<double>(a + 1));
|
|
213
|
+
}
|
|
214
|
+
};
|
|
215
|
+
|
|
216
|
+
#define SYCL_PLOAD_DUP_SPECILIZE(packet_type) \
|
|
217
|
+
template <> \
|
|
218
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type ploaddup<packet_type>( \
|
|
219
|
+
const typename unpacket_traits<packet_type>::type* from) { \
|
|
220
|
+
return get_base_packet<packet_type>::get_ploaddup(from); \
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
SYCL_PLOAD_DUP_SPECILIZE(cl::sycl::cl_half8)
|
|
224
|
+
SYCL_PLOAD_DUP_SPECILIZE(cl::sycl::cl_float4)
|
|
225
|
+
SYCL_PLOAD_DUP_SPECILIZE(cl::sycl::cl_double2)
|
|
226
|
+
|
|
227
|
+
#undef SYCL_PLOAD_DUP_SPECILIZE
|
|
228
|
+
|
|
229
|
+
#define SYCL_PLSET(packet_type) \
|
|
230
|
+
template <> \
|
|
231
|
+
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type plset<packet_type>( \
|
|
232
|
+
const typename unpacket_traits<packet_type>::type& a) { \
|
|
233
|
+
return get_base_packet<packet_type>::set_plset(a); \
|
|
234
|
+
}
|
|
235
|
+
SYCL_PLSET(cl::sycl::cl_float4)
|
|
236
|
+
SYCL_PLSET(cl::sycl::cl_double2)
|
|
237
|
+
#undef SYCL_PLSET
|
|
238
|
+
|
|
239
|
+
template <>
|
|
240
|
+
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_half8 plset<cl::sycl::cl_half8>(
|
|
241
|
+
const typename unpacket_traits<cl::sycl::cl_half8>::type& a) {
|
|
242
|
+
return get_base_packet<cl::sycl::cl_half8>::set_plset((const cl::sycl::half&)a);
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
#define SYCL_PGATHER_SPECILIZE(scalar, packet_type) \
|
|
246
|
+
template <> \
|
|
247
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type pgather<scalar, packet_type>( \
|
|
248
|
+
const typename unpacket_traits<packet_type>::type* from, Index stride) { \
|
|
249
|
+
return get_base_packet<packet_type>::get_pgather(from, stride); \
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
SYCL_PGATHER_SPECILIZE(Eigen::half, cl::sycl::cl_half8)
|
|
253
|
+
SYCL_PGATHER_SPECILIZE(float, cl::sycl::cl_float4)
|
|
254
|
+
SYCL_PGATHER_SPECILIZE(double, cl::sycl::cl_double2)
|
|
255
|
+
#undef SYCL_PGATHER_SPECILIZE
|
|
256
|
+
|
|
257
|
+
#define SYCL_PSCATTER_SPECILIZE(scalar, packet_type) \
|
|
258
|
+
template <> \
|
|
259
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pscatter<scalar, packet_type>( \
|
|
260
|
+
typename unpacket_traits<packet_type>::type * to, const packet_type& from, Index stride) { \
|
|
261
|
+
get_base_packet<packet_type>::set_pscatter(to, from, stride); \
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
SYCL_PSCATTER_SPECILIZE(Eigen::half, cl::sycl::cl_half8)
|
|
265
|
+
SYCL_PSCATTER_SPECILIZE(float, cl::sycl::cl_float4)
|
|
266
|
+
SYCL_PSCATTER_SPECILIZE(double, cl::sycl::cl_double2)
|
|
267
|
+
|
|
268
|
+
#undef SYCL_PSCATTER_SPECILIZE
|
|
269
|
+
|
|
270
|
+
#define SYCL_PMAD(packet_type) \
|
|
271
|
+
template <> \
|
|
272
|
+
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type pmadd(const packet_type& a, const packet_type& b, \
|
|
273
|
+
const packet_type& c) { \
|
|
274
|
+
return cl::sycl::mad(a, b, c); \
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
SYCL_PMAD(cl::sycl::cl_half8)
|
|
278
|
+
SYCL_PMAD(cl::sycl::cl_float4)
|
|
279
|
+
SYCL_PMAD(cl::sycl::cl_double2)
|
|
280
|
+
#undef SYCL_PMAD
|
|
281
|
+
|
|
282
|
+
template <>
|
|
283
|
+
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Eigen::half pfirst<cl::sycl::cl_half8>(const cl::sycl::cl_half8& a) {
|
|
284
|
+
return Eigen::half(a.s0());
|
|
285
|
+
}
|
|
286
|
+
template <>
|
|
287
|
+
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float pfirst<cl::sycl::cl_float4>(const cl::sycl::cl_float4& a) {
|
|
288
|
+
return a.x();
|
|
289
|
+
}
|
|
290
|
+
template <>
|
|
291
|
+
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double pfirst<cl::sycl::cl_double2>(const cl::sycl::cl_double2& a) {
|
|
292
|
+
return a.x();
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
template <>
|
|
296
|
+
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Eigen::half predux<cl::sycl::cl_half8>(const cl::sycl::cl_half8& a) {
|
|
297
|
+
return Eigen::half(a.s0() + a.s1() + a.s2() + a.s3() + a.s4() + a.s5() + a.s6() + a.s7());
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
template <>
|
|
301
|
+
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float predux<cl::sycl::cl_float4>(const cl::sycl::cl_float4& a) {
|
|
302
|
+
return a.x() + a.y() + a.z() + a.w();
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
template <>
|
|
306
|
+
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double predux<cl::sycl::cl_double2>(const cl::sycl::cl_double2& a) {
|
|
307
|
+
return a.x() + a.y();
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
template <>
|
|
311
|
+
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Eigen::half predux_max<cl::sycl::cl_half8>(const cl::sycl::cl_half8& a) {
|
|
312
|
+
return Eigen::half(cl::sycl::fmax(cl::sycl::fmax(cl::sycl::fmax(a.s0(), a.s1()), cl::sycl::fmax(a.s2(), a.s3())),
|
|
313
|
+
cl::sycl::fmax(cl::sycl::fmax(a.s4(), a.s5()), cl::sycl::fmax(a.s6(), a.s7()))));
|
|
314
|
+
}
|
|
315
|
+
template <>
|
|
316
|
+
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float predux_max<cl::sycl::cl_float4>(const cl::sycl::cl_float4& a) {
|
|
317
|
+
return cl::sycl::fmax(cl::sycl::fmax(a.x(), a.y()), cl::sycl::fmax(a.z(), a.w()));
|
|
318
|
+
}
|
|
319
|
+
template <>
|
|
320
|
+
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double predux_max<cl::sycl::cl_double2>(const cl::sycl::cl_double2& a) {
|
|
321
|
+
return cl::sycl::fmax(a.x(), a.y());
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
template <>
|
|
325
|
+
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Eigen::half predux_min<cl::sycl::cl_half8>(const cl::sycl::cl_half8& a) {
|
|
326
|
+
return Eigen::half(cl::sycl::fmin(cl::sycl::fmin(cl::sycl::fmin(a.s0(), a.s1()), cl::sycl::fmin(a.s2(), a.s3())),
|
|
327
|
+
cl::sycl::fmin(cl::sycl::fmin(a.s4(), a.s5()), cl::sycl::fmin(a.s6(), a.s7()))));
|
|
328
|
+
}
|
|
329
|
+
template <>
|
|
330
|
+
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float predux_min<cl::sycl::cl_float4>(const cl::sycl::cl_float4& a) {
|
|
331
|
+
return cl::sycl::fmin(cl::sycl::fmin(a.x(), a.y()), cl::sycl::fmin(a.z(), a.w()));
|
|
332
|
+
}
|
|
333
|
+
template <>
|
|
334
|
+
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double predux_min<cl::sycl::cl_double2>(const cl::sycl::cl_double2& a) {
|
|
335
|
+
return cl::sycl::fmin(a.x(), a.y());
|
|
336
|
+
}
|
|
337
|
+
|
|
338
|
+
template <>
|
|
339
|
+
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Eigen::half predux_mul<cl::sycl::cl_half8>(const cl::sycl::cl_half8& a) {
|
|
340
|
+
return Eigen::half(a.s0() * a.s1() * a.s2() * a.s3() * a.s4() * a.s5() * a.s6() * a.s7());
|
|
341
|
+
}
|
|
342
|
+
template <>
|
|
343
|
+
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float predux_mul<cl::sycl::cl_float4>(const cl::sycl::cl_float4& a) {
|
|
344
|
+
return a.x() * a.y() * a.z() * a.w();
|
|
345
|
+
}
|
|
346
|
+
template <>
|
|
347
|
+
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double predux_mul<cl::sycl::cl_double2>(const cl::sycl::cl_double2& a) {
|
|
348
|
+
return a.x() * a.y();
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
template <>
|
|
352
|
+
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_half8 pabs<cl::sycl::cl_half8>(const cl::sycl::cl_half8& a) {
|
|
353
|
+
return cl::sycl::cl_half8(cl::sycl::fabs(a.s0()), cl::sycl::fabs(a.s1()), cl::sycl::fabs(a.s2()),
|
|
354
|
+
cl::sycl::fabs(a.s3()), cl::sycl::fabs(a.s4()), cl::sycl::fabs(a.s5()),
|
|
355
|
+
cl::sycl::fabs(a.s6()), cl::sycl::fabs(a.s7()));
|
|
356
|
+
}
|
|
357
|
+
template <>
|
|
358
|
+
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_float4 pabs<cl::sycl::cl_float4>(const cl::sycl::cl_float4& a) {
|
|
359
|
+
return cl::sycl::cl_float4(cl::sycl::fabs(a.x()), cl::sycl::fabs(a.y()), cl::sycl::fabs(a.z()),
|
|
360
|
+
cl::sycl::fabs(a.w()));
|
|
361
|
+
}
|
|
362
|
+
template <>
|
|
363
|
+
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_double2 pabs<cl::sycl::cl_double2>(const cl::sycl::cl_double2& a) {
|
|
364
|
+
return cl::sycl::cl_double2(cl::sycl::fabs(a.x()), cl::sycl::fabs(a.y()));
|
|
365
|
+
}
|
|
366
|
+
|
|
367
|
+
template <typename Packet>
|
|
368
|
+
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet sycl_pcmp_le(const Packet& a, const Packet& b) {
|
|
369
|
+
return (a <= b).template as<Packet>();
|
|
370
|
+
}
|
|
371
|
+
|
|
372
|
+
template <typename Packet>
|
|
373
|
+
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet sycl_pcmp_lt(const Packet& a, const Packet& b) {
|
|
374
|
+
return (a < b).template as<Packet>();
|
|
375
|
+
}
|
|
376
|
+
|
|
377
|
+
template <typename Packet>
|
|
378
|
+
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet sycl_pcmp_eq(const Packet& a, const Packet& b) {
|
|
379
|
+
return (a == b).template as<Packet>();
|
|
380
|
+
}
|
|
381
|
+
|
|
382
|
+
#define SYCL_PCMP(OP, TYPE) \
|
|
383
|
+
template <> \
|
|
384
|
+
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE TYPE pcmp_##OP<TYPE>(const TYPE& a, const TYPE& b) { \
|
|
385
|
+
return sycl_pcmp_##OP<TYPE>(a, b); \
|
|
386
|
+
}
|
|
387
|
+
|
|
388
|
+
SYCL_PCMP(le, cl::sycl::cl_half8)
|
|
389
|
+
SYCL_PCMP(lt, cl::sycl::cl_half8)
|
|
390
|
+
SYCL_PCMP(eq, cl::sycl::cl_half8)
|
|
391
|
+
SYCL_PCMP(le, cl::sycl::cl_float4)
|
|
392
|
+
SYCL_PCMP(lt, cl::sycl::cl_float4)
|
|
393
|
+
SYCL_PCMP(eq, cl::sycl::cl_float4)
|
|
394
|
+
SYCL_PCMP(le, cl::sycl::cl_double2)
|
|
395
|
+
SYCL_PCMP(lt, cl::sycl::cl_double2)
|
|
396
|
+
SYCL_PCMP(eq, cl::sycl::cl_double2)
|
|
397
|
+
#undef SYCL_PCMP
|
|
398
|
+
|
|
399
|
+
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void ptranspose(PacketBlock<cl::sycl::cl_half8, 8>& kernel) {
|
|
400
|
+
cl::sycl::cl_half tmp = kernel.packet[0].s1();
|
|
401
|
+
kernel.packet[0].s1() = kernel.packet[1].s0();
|
|
402
|
+
kernel.packet[1].s0() = tmp;
|
|
403
|
+
|
|
404
|
+
tmp = kernel.packet[0].s2();
|
|
405
|
+
kernel.packet[0].s2() = kernel.packet[2].s0();
|
|
406
|
+
kernel.packet[2].s0() = tmp;
|
|
407
|
+
|
|
408
|
+
tmp = kernel.packet[0].s3();
|
|
409
|
+
kernel.packet[0].s3() = kernel.packet[3].s0();
|
|
410
|
+
kernel.packet[3].s0() = tmp;
|
|
411
|
+
|
|
412
|
+
tmp = kernel.packet[0].s4();
|
|
413
|
+
kernel.packet[0].s4() = kernel.packet[4].s0();
|
|
414
|
+
kernel.packet[4].s0() = tmp;
|
|
415
|
+
|
|
416
|
+
tmp = kernel.packet[0].s5();
|
|
417
|
+
kernel.packet[0].s5() = kernel.packet[5].s0();
|
|
418
|
+
kernel.packet[5].s0() = tmp;
|
|
419
|
+
|
|
420
|
+
tmp = kernel.packet[0].s6();
|
|
421
|
+
kernel.packet[0].s6() = kernel.packet[6].s0();
|
|
422
|
+
kernel.packet[6].s0() = tmp;
|
|
423
|
+
|
|
424
|
+
tmp = kernel.packet[0].s7();
|
|
425
|
+
kernel.packet[0].s7() = kernel.packet[7].s0();
|
|
426
|
+
kernel.packet[7].s0() = tmp;
|
|
427
|
+
|
|
428
|
+
tmp = kernel.packet[1].s2();
|
|
429
|
+
kernel.packet[1].s2() = kernel.packet[2].s1();
|
|
430
|
+
kernel.packet[2].s1() = tmp;
|
|
431
|
+
|
|
432
|
+
tmp = kernel.packet[1].s3();
|
|
433
|
+
kernel.packet[1].s3() = kernel.packet[3].s1();
|
|
434
|
+
kernel.packet[3].s1() = tmp;
|
|
435
|
+
|
|
436
|
+
tmp = kernel.packet[1].s4();
|
|
437
|
+
kernel.packet[1].s4() = kernel.packet[4].s1();
|
|
438
|
+
kernel.packet[4].s1() = tmp;
|
|
439
|
+
|
|
440
|
+
tmp = kernel.packet[1].s5();
|
|
441
|
+
kernel.packet[1].s5() = kernel.packet[5].s1();
|
|
442
|
+
kernel.packet[5].s1() = tmp;
|
|
443
|
+
|
|
444
|
+
tmp = kernel.packet[1].s6();
|
|
445
|
+
kernel.packet[1].s6() = kernel.packet[6].s1();
|
|
446
|
+
kernel.packet[6].s1() = tmp;
|
|
447
|
+
|
|
448
|
+
tmp = kernel.packet[1].s7();
|
|
449
|
+
kernel.packet[1].s7() = kernel.packet[7].s1();
|
|
450
|
+
kernel.packet[7].s1() = tmp;
|
|
451
|
+
|
|
452
|
+
tmp = kernel.packet[2].s3();
|
|
453
|
+
kernel.packet[2].s3() = kernel.packet[3].s2();
|
|
454
|
+
kernel.packet[3].s2() = tmp;
|
|
455
|
+
|
|
456
|
+
tmp = kernel.packet[2].s4();
|
|
457
|
+
kernel.packet[2].s4() = kernel.packet[4].s2();
|
|
458
|
+
kernel.packet[4].s2() = tmp;
|
|
459
|
+
|
|
460
|
+
tmp = kernel.packet[2].s5();
|
|
461
|
+
kernel.packet[2].s5() = kernel.packet[5].s2();
|
|
462
|
+
kernel.packet[5].s2() = tmp;
|
|
463
|
+
|
|
464
|
+
tmp = kernel.packet[2].s6();
|
|
465
|
+
kernel.packet[2].s6() = kernel.packet[6].s2();
|
|
466
|
+
kernel.packet[6].s2() = tmp;
|
|
467
|
+
|
|
468
|
+
tmp = kernel.packet[2].s7();
|
|
469
|
+
kernel.packet[2].s7() = kernel.packet[7].s2();
|
|
470
|
+
kernel.packet[7].s2() = tmp;
|
|
471
|
+
|
|
472
|
+
tmp = kernel.packet[3].s4();
|
|
473
|
+
kernel.packet[3].s4() = kernel.packet[4].s3();
|
|
474
|
+
kernel.packet[4].s3() = tmp;
|
|
475
|
+
|
|
476
|
+
tmp = kernel.packet[3].s5();
|
|
477
|
+
kernel.packet[3].s5() = kernel.packet[5].s3();
|
|
478
|
+
kernel.packet[5].s3() = tmp;
|
|
479
|
+
|
|
480
|
+
tmp = kernel.packet[3].s6();
|
|
481
|
+
kernel.packet[3].s6() = kernel.packet[6].s3();
|
|
482
|
+
kernel.packet[6].s3() = tmp;
|
|
483
|
+
|
|
484
|
+
tmp = kernel.packet[3].s7();
|
|
485
|
+
kernel.packet[3].s7() = kernel.packet[7].s3();
|
|
486
|
+
kernel.packet[7].s3() = tmp;
|
|
487
|
+
|
|
488
|
+
tmp = kernel.packet[4].s5();
|
|
489
|
+
kernel.packet[4].s5() = kernel.packet[5].s4();
|
|
490
|
+
kernel.packet[5].s4() = tmp;
|
|
491
|
+
|
|
492
|
+
tmp = kernel.packet[4].s6();
|
|
493
|
+
kernel.packet[4].s6() = kernel.packet[6].s4();
|
|
494
|
+
kernel.packet[6].s4() = tmp;
|
|
495
|
+
|
|
496
|
+
tmp = kernel.packet[4].s7();
|
|
497
|
+
kernel.packet[4].s7() = kernel.packet[7].s4();
|
|
498
|
+
kernel.packet[7].s4() = tmp;
|
|
499
|
+
|
|
500
|
+
tmp = kernel.packet[5].s6();
|
|
501
|
+
kernel.packet[5].s6() = kernel.packet[6].s5();
|
|
502
|
+
kernel.packet[6].s5() = tmp;
|
|
503
|
+
|
|
504
|
+
tmp = kernel.packet[5].s7();
|
|
505
|
+
kernel.packet[5].s7() = kernel.packet[7].s5();
|
|
506
|
+
kernel.packet[7].s5() = tmp;
|
|
507
|
+
|
|
508
|
+
tmp = kernel.packet[6].s7();
|
|
509
|
+
kernel.packet[6].s7() = kernel.packet[7].s6();
|
|
510
|
+
kernel.packet[7].s6() = tmp;
|
|
511
|
+
}
|
|
512
|
+
|
|
513
|
+
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void ptranspose(PacketBlock<cl::sycl::cl_float4, 4>& kernel) {
|
|
514
|
+
float tmp = kernel.packet[0].y();
|
|
515
|
+
kernel.packet[0].y() = kernel.packet[1].x();
|
|
516
|
+
kernel.packet[1].x() = tmp;
|
|
517
|
+
|
|
518
|
+
tmp = kernel.packet[0].z();
|
|
519
|
+
kernel.packet[0].z() = kernel.packet[2].x();
|
|
520
|
+
kernel.packet[2].x() = tmp;
|
|
521
|
+
|
|
522
|
+
tmp = kernel.packet[0].w();
|
|
523
|
+
kernel.packet[0].w() = kernel.packet[3].x();
|
|
524
|
+
kernel.packet[3].x() = tmp;
|
|
525
|
+
|
|
526
|
+
tmp = kernel.packet[1].z();
|
|
527
|
+
kernel.packet[1].z() = kernel.packet[2].y();
|
|
528
|
+
kernel.packet[2].y() = tmp;
|
|
529
|
+
|
|
530
|
+
tmp = kernel.packet[1].w();
|
|
531
|
+
kernel.packet[1].w() = kernel.packet[3].y();
|
|
532
|
+
kernel.packet[3].y() = tmp;
|
|
533
|
+
|
|
534
|
+
tmp = kernel.packet[2].w();
|
|
535
|
+
kernel.packet[2].w() = kernel.packet[3].z();
|
|
536
|
+
kernel.packet[3].z() = tmp;
|
|
537
|
+
}
|
|
538
|
+
|
|
539
|
+
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void ptranspose(PacketBlock<cl::sycl::cl_double2, 2>& kernel) {
|
|
540
|
+
double tmp = kernel.packet[0].y();
|
|
541
|
+
kernel.packet[0].y() = kernel.packet[1].x();
|
|
542
|
+
kernel.packet[1].x() = tmp;
|
|
543
|
+
}
|
|
544
|
+
|
|
545
|
+
template <>
|
|
546
|
+
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_half8 pblend(
|
|
547
|
+
const Selector<unpacket_traits<cl::sycl::cl_half8>::size>& ifPacket, const cl::sycl::cl_half8& thenPacket,
|
|
548
|
+
const cl::sycl::cl_half8& elsePacket) {
|
|
549
|
+
cl::sycl::cl_short8 condition(ifPacket.select[0] ? 0 : -1, ifPacket.select[1] ? 0 : -1, ifPacket.select[2] ? 0 : -1,
|
|
550
|
+
ifPacket.select[3] ? 0 : -1, ifPacket.select[4] ? 0 : -1, ifPacket.select[5] ? 0 : -1,
|
|
551
|
+
ifPacket.select[6] ? 0 : -1, ifPacket.select[7] ? 0 : -1);
|
|
552
|
+
return cl::sycl::select(thenPacket, elsePacket, condition);
|
|
553
|
+
}
|
|
554
|
+
|
|
555
|
+
template <>
|
|
556
|
+
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_float4 pblend(
|
|
557
|
+
const Selector<unpacket_traits<cl::sycl::cl_float4>::size>& ifPacket, const cl::sycl::cl_float4& thenPacket,
|
|
558
|
+
const cl::sycl::cl_float4& elsePacket) {
|
|
559
|
+
cl::sycl::cl_int4 condition(ifPacket.select[0] ? 0 : -1, ifPacket.select[1] ? 0 : -1, ifPacket.select[2] ? 0 : -1,
|
|
560
|
+
ifPacket.select[3] ? 0 : -1);
|
|
561
|
+
return cl::sycl::select(thenPacket, elsePacket, condition);
|
|
562
|
+
}
|
|
563
|
+
|
|
564
|
+
template <>
|
|
565
|
+
inline cl::sycl::cl_double2 pblend(const Selector<unpacket_traits<cl::sycl::cl_double2>::size>& ifPacket,
|
|
566
|
+
const cl::sycl::cl_double2& thenPacket, const cl::sycl::cl_double2& elsePacket) {
|
|
567
|
+
cl::sycl::cl_long2 condition(ifPacket.select[0] ? 0 : -1, ifPacket.select[1] ? 0 : -1);
|
|
568
|
+
return cl::sycl::select(thenPacket, elsePacket, condition);
|
|
569
|
+
}
|
|
570
|
+
#endif // SYCL_DEVICE_ONLY
|
|
571
|
+
|
|
572
|
+
} // end namespace internal
|
|
573
|
+
|
|
574
|
+
} // end namespace Eigen
|
|
575
|
+
|
|
576
|
+
#endif // EIGEN_PACKET_MATH_SYCL_H
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
// This file is part of Eigen, a lightweight C++ template library
|
|
2
|
+
// for linear algebra.
|
|
3
|
+
//
|
|
4
|
+
// Mehdi Goli Codeplay Software Ltd.
|
|
5
|
+
// Ralph Potter Codeplay Software Ltd.
|
|
6
|
+
// Luke Iwanski Codeplay Software Ltd.
|
|
7
|
+
// Contact: <eigen@codeplay.com>
|
|
8
|
+
//
|
|
9
|
+
// This Source Code Form is subject to the terms of the Mozilla
|
|
10
|
+
// Public License v. 2.0. If a copy of the MPL was not distributed
|
|
11
|
+
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
|
12
|
+
|
|
13
|
+
/*****************************************************************
|
|
14
|
+
* TypeCasting.h
|
|
15
|
+
*
|
|
16
|
+
* \brief:
|
|
17
|
+
* TypeCasting
|
|
18
|
+
*
|
|
19
|
+
*****************************************************************/
|
|
20
|
+
|
|
21
|
+
#ifndef EIGEN_TYPE_CASTING_SYCL_H
|
|
22
|
+
#define EIGEN_TYPE_CASTING_SYCL_H
|
|
23
|
+
|
|
24
|
+
// IWYU pragma: private
|
|
25
|
+
#include "../../InternalHeaderCheck.h"
|
|
26
|
+
|
|
27
|
+
namespace Eigen {
|
|
28
|
+
|
|
29
|
+
namespace internal {
|
|
30
|
+
#ifdef SYCL_DEVICE_ONLY
|
|
31
|
+
template <>
|
|
32
|
+
struct type_casting_traits<float, int> {
|
|
33
|
+
enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 };
|
|
34
|
+
};
|
|
35
|
+
|
|
36
|
+
template <>
|
|
37
|
+
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_int4 pcast<cl::sycl::cl_float4, cl::sycl::cl_int4>(
|
|
38
|
+
const cl::sycl::cl_float4& a) {
|
|
39
|
+
return a.template convert<cl::sycl::cl_int, cl::sycl::rounding_mode::automatic>();
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
template <>
|
|
43
|
+
struct type_casting_traits<int, float> {
|
|
44
|
+
enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 };
|
|
45
|
+
};
|
|
46
|
+
|
|
47
|
+
template <>
|
|
48
|
+
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_float4 pcast<cl::sycl::cl_int4, cl::sycl::cl_float4>(
|
|
49
|
+
const cl::sycl::cl_int4& a) {
|
|
50
|
+
return a.template convert<cl::sycl::cl_float, cl::sycl::rounding_mode::automatic>();
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
template <>
|
|
54
|
+
struct type_casting_traits<double, float> {
|
|
55
|
+
enum { VectorizedCast = 1, SrcCoeffRatio = 2, TgtCoeffRatio = 1 };
|
|
56
|
+
};
|
|
57
|
+
|
|
58
|
+
template <>
|
|
59
|
+
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_float4 pcast<cl::sycl::cl_double2, cl::sycl::cl_float4>(
|
|
60
|
+
const cl::sycl::cl_double2& a, const cl::sycl::cl_double2& b) {
|
|
61
|
+
auto a1 = a.template convert<cl::sycl::cl_float, cl::sycl::rounding_mode::automatic>();
|
|
62
|
+
auto b1 = b.template convert<cl::sycl::cl_float, cl::sycl::rounding_mode::automatic>();
|
|
63
|
+
return cl::sycl::cl_float4(a1.x(), a1.y(), b1.x(), b1.y());
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
template <>
|
|
67
|
+
struct type_casting_traits<float, double> {
|
|
68
|
+
enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 2 };
|
|
69
|
+
};
|
|
70
|
+
|
|
71
|
+
template <>
|
|
72
|
+
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_double2 pcast<cl::sycl::cl_float4, cl::sycl::cl_double2>(
|
|
73
|
+
const cl::sycl::cl_float4& a) {
|
|
74
|
+
// Simply discard the second half of the input
|
|
75
|
+
return cl::sycl::cl_double2(a.x(), a.y());
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
#endif
|
|
79
|
+
} // end namespace internal
|
|
80
|
+
|
|
81
|
+
} // end namespace Eigen
|
|
82
|
+
|
|
83
|
+
#endif // EIGEN_TYPE_CASTING_SYCL_H
|