@smake/eigen 1.1.0 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/eigen/Eigen/AccelerateSupport +52 -0
- package/eigen/Eigen/Cholesky +18 -20
- package/eigen/Eigen/CholmodSupport +28 -28
- package/eigen/Eigen/Core +187 -120
- package/eigen/Eigen/Eigenvalues +16 -13
- package/eigen/Eigen/Geometry +18 -18
- package/eigen/Eigen/Householder +9 -7
- package/eigen/Eigen/IterativeLinearSolvers +8 -4
- package/eigen/Eigen/Jacobi +14 -13
- package/eigen/Eigen/KLUSupport +23 -21
- package/eigen/Eigen/LU +15 -16
- package/eigen/Eigen/MetisSupport +12 -12
- package/eigen/Eigen/OrderingMethods +54 -51
- package/eigen/Eigen/PaStiXSupport +23 -21
- package/eigen/Eigen/PardisoSupport +17 -14
- package/eigen/Eigen/QR +18 -20
- package/eigen/Eigen/QtAlignedMalloc +5 -12
- package/eigen/Eigen/SPQRSupport +21 -14
- package/eigen/Eigen/SVD +23 -17
- package/eigen/Eigen/Sparse +1 -2
- package/eigen/Eigen/SparseCholesky +18 -15
- package/eigen/Eigen/SparseCore +18 -17
- package/eigen/Eigen/SparseLU +9 -9
- package/eigen/Eigen/SparseQR +16 -14
- package/eigen/Eigen/StdDeque +5 -2
- package/eigen/Eigen/StdList +5 -2
- package/eigen/Eigen/StdVector +5 -2
- package/eigen/Eigen/SuperLUSupport +30 -24
- package/eigen/Eigen/ThreadPool +80 -0
- package/eigen/Eigen/UmfPackSupport +19 -17
- package/eigen/Eigen/Version +14 -0
- package/eigen/Eigen/src/AccelerateSupport/AccelerateSupport.h +423 -0
- package/eigen/Eigen/src/AccelerateSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Cholesky/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Cholesky/LDLT.h +366 -405
- package/eigen/Eigen/src/Cholesky/LLT.h +323 -367
- package/eigen/Eigen/src/Cholesky/LLT_LAPACKE.h +81 -56
- package/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +585 -529
- package/eigen/Eigen/src/CholmodSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Core/ArithmeticSequence.h +143 -317
- package/eigen/Eigen/src/Core/Array.h +329 -370
- package/eigen/Eigen/src/Core/ArrayBase.h +190 -203
- package/eigen/Eigen/src/Core/ArrayWrapper.h +126 -170
- package/eigen/Eigen/src/Core/Assign.h +30 -40
- package/eigen/Eigen/src/Core/AssignEvaluator.h +651 -604
- package/eigen/Eigen/src/Core/Assign_MKL.h +125 -120
- package/eigen/Eigen/src/Core/BandMatrix.h +267 -282
- package/eigen/Eigen/src/Core/Block.h +371 -390
- package/eigen/Eigen/src/Core/CommaInitializer.h +85 -100
- package/eigen/Eigen/src/Core/ConditionEstimator.h +51 -53
- package/eigen/Eigen/src/Core/CoreEvaluators.h +1214 -937
- package/eigen/Eigen/src/Core/CoreIterators.h +72 -63
- package/eigen/Eigen/src/Core/CwiseBinaryOp.h +112 -129
- package/eigen/Eigen/src/Core/CwiseNullaryOp.h +676 -702
- package/eigen/Eigen/src/Core/CwiseTernaryOp.h +77 -103
- package/eigen/Eigen/src/Core/CwiseUnaryOp.h +55 -67
- package/eigen/Eigen/src/Core/CwiseUnaryView.h +127 -92
- package/eigen/Eigen/src/Core/DenseBase.h +630 -658
- package/eigen/Eigen/src/Core/DenseCoeffsBase.h +511 -628
- package/eigen/Eigen/src/Core/DenseStorage.h +511 -590
- package/eigen/Eigen/src/Core/DeviceWrapper.h +153 -0
- package/eigen/Eigen/src/Core/Diagonal.h +168 -207
- package/eigen/Eigen/src/Core/DiagonalMatrix.h +346 -317
- package/eigen/Eigen/src/Core/DiagonalProduct.h +12 -10
- package/eigen/Eigen/src/Core/Dot.h +167 -217
- package/eigen/Eigen/src/Core/EigenBase.h +74 -85
- package/eigen/Eigen/src/Core/Fill.h +138 -0
- package/eigen/Eigen/src/Core/FindCoeff.h +464 -0
- package/eigen/Eigen/src/Core/ForceAlignedAccess.h +90 -113
- package/eigen/Eigen/src/Core/Fuzzy.h +82 -105
- package/eigen/Eigen/src/Core/GeneralProduct.h +315 -261
- package/eigen/Eigen/src/Core/GenericPacketMath.h +1182 -520
- package/eigen/Eigen/src/Core/GlobalFunctions.h +193 -157
- package/eigen/Eigen/src/Core/IO.h +131 -156
- package/eigen/Eigen/src/Core/IndexedView.h +209 -125
- package/eigen/Eigen/src/Core/InnerProduct.h +260 -0
- package/eigen/Eigen/src/Core/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Core/Inverse.h +50 -59
- package/eigen/Eigen/src/Core/Map.h +123 -141
- package/eigen/Eigen/src/Core/MapBase.h +255 -282
- package/eigen/Eigen/src/Core/MathFunctions.h +1247 -1201
- package/eigen/Eigen/src/Core/MathFunctionsImpl.h +162 -99
- package/eigen/Eigen/src/Core/Matrix.h +463 -494
- package/eigen/Eigen/src/Core/MatrixBase.h +468 -470
- package/eigen/Eigen/src/Core/NestByValue.h +58 -52
- package/eigen/Eigen/src/Core/NoAlias.h +79 -86
- package/eigen/Eigen/src/Core/NumTraits.h +206 -206
- package/eigen/Eigen/src/Core/PartialReduxEvaluator.h +163 -142
- package/eigen/Eigen/src/Core/PermutationMatrix.h +461 -511
- package/eigen/Eigen/src/Core/PlainObjectBase.h +858 -972
- package/eigen/Eigen/src/Core/Product.h +246 -130
- package/eigen/Eigen/src/Core/ProductEvaluators.h +779 -671
- package/eigen/Eigen/src/Core/Random.h +153 -164
- package/eigen/Eigen/src/Core/RandomImpl.h +262 -0
- package/eigen/Eigen/src/Core/RealView.h +250 -0
- package/eigen/Eigen/src/Core/Redux.h +334 -314
- package/eigen/Eigen/src/Core/Ref.h +259 -257
- package/eigen/Eigen/src/Core/Replicate.h +92 -104
- package/eigen/Eigen/src/Core/Reshaped.h +215 -271
- package/eigen/Eigen/src/Core/ReturnByValue.h +47 -55
- package/eigen/Eigen/src/Core/Reverse.h +133 -148
- package/eigen/Eigen/src/Core/Select.h +68 -140
- package/eigen/Eigen/src/Core/SelfAdjointView.h +254 -290
- package/eigen/Eigen/src/Core/SelfCwiseBinaryOp.h +23 -20
- package/eigen/Eigen/src/Core/SkewSymmetricMatrix3.h +382 -0
- package/eigen/Eigen/src/Core/Solve.h +88 -102
- package/eigen/Eigen/src/Core/SolveTriangular.h +126 -124
- package/eigen/Eigen/src/Core/SolverBase.h +132 -133
- package/eigen/Eigen/src/Core/StableNorm.h +113 -147
- package/eigen/Eigen/src/Core/StlIterators.h +404 -248
- package/eigen/Eigen/src/Core/Stride.h +90 -92
- package/eigen/Eigen/src/Core/Swap.h +70 -39
- package/eigen/Eigen/src/Core/Transpose.h +258 -295
- package/eigen/Eigen/src/Core/Transpositions.h +270 -333
- package/eigen/Eigen/src/Core/TriangularMatrix.h +642 -743
- package/eigen/Eigen/src/Core/VectorBlock.h +59 -72
- package/eigen/Eigen/src/Core/VectorwiseOp.h +653 -704
- package/eigen/Eigen/src/Core/Visitor.h +464 -308
- package/eigen/Eigen/src/Core/arch/AVX/Complex.h +380 -187
- package/eigen/Eigen/src/Core/arch/AVX/MathFunctions.h +65 -163
- package/eigen/Eigen/src/Core/arch/AVX/PacketMath.h +2145 -638
- package/eigen/Eigen/src/Core/arch/AVX/Reductions.h +353 -0
- package/eigen/Eigen/src/Core/arch/AVX/TypeCasting.h +253 -60
- package/eigen/Eigen/src/Core/arch/AVX512/Complex.h +278 -228
- package/eigen/Eigen/src/Core/arch/AVX512/GemmKernel.h +1245 -0
- package/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h +48 -269
- package/eigen/Eigen/src/Core/arch/AVX512/MathFunctionsFP16.h +75 -0
- package/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h +1597 -754
- package/eigen/Eigen/src/Core/arch/AVX512/PacketMathFP16.h +1413 -0
- package/eigen/Eigen/src/Core/arch/AVX512/Reductions.h +297 -0
- package/eigen/Eigen/src/Core/arch/AVX512/TrsmKernel.h +1167 -0
- package/eigen/Eigen/src/Core/arch/AVX512/TrsmUnrolls.inc +1219 -0
- package/eigen/Eigen/src/Core/arch/AVX512/TypeCasting.h +229 -41
- package/eigen/Eigen/src/Core/arch/AVX512/TypeCastingFP16.h +130 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/Complex.h +420 -184
- package/eigen/Eigen/src/Core/arch/AltiVec/MathFunctions.h +40 -49
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProduct.h +2962 -2213
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h +196 -212
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h +713 -441
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMAbfloat16.h +742 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixVectorProduct.inc +2818 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h +2380 -1362
- package/eigen/Eigen/src/Core/arch/AltiVec/TypeCasting.h +153 -0
- package/eigen/Eigen/src/Core/arch/Default/BFloat16.h +390 -224
- package/eigen/Eigen/src/Core/arch/Default/ConjHelper.h +78 -67
- package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +1784 -799
- package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +167 -50
- package/eigen/Eigen/src/Core/arch/Default/Half.h +528 -379
- package/eigen/Eigen/src/Core/arch/Default/Settings.h +10 -12
- package/eigen/Eigen/src/Core/arch/GPU/Complex.h +244 -0
- package/eigen/Eigen/src/Core/arch/GPU/MathFunctions.h +41 -40
- package/eigen/Eigen/src/Core/arch/GPU/PacketMath.h +550 -523
- package/eigen/Eigen/src/Core/arch/GPU/Tuple.h +268 -0
- package/eigen/Eigen/src/Core/arch/GPU/TypeCasting.h +27 -30
- package/eigen/Eigen/src/Core/arch/HIP/hcc/math_constants.h +8 -8
- package/eigen/Eigen/src/Core/arch/HVX/PacketMath.h +1088 -0
- package/eigen/Eigen/src/Core/arch/LSX/Complex.h +520 -0
- package/eigen/Eigen/src/Core/arch/LSX/GeneralBlockPanelKernel.h +23 -0
- package/eigen/Eigen/src/Core/arch/LSX/MathFunctions.h +43 -0
- package/eigen/Eigen/src/Core/arch/LSX/PacketMath.h +2866 -0
- package/eigen/Eigen/src/Core/arch/LSX/TypeCasting.h +526 -0
- package/eigen/Eigen/src/Core/arch/MSA/Complex.h +54 -82
- package/eigen/Eigen/src/Core/arch/MSA/MathFunctions.h +84 -92
- package/eigen/Eigen/src/Core/arch/MSA/PacketMath.h +51 -47
- package/eigen/Eigen/src/Core/arch/NEON/Complex.h +454 -306
- package/eigen/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h +175 -115
- package/eigen/Eigen/src/Core/arch/NEON/MathFunctions.h +23 -30
- package/eigen/Eigen/src/Core/arch/NEON/PacketMath.h +4366 -2857
- package/eigen/Eigen/src/Core/arch/NEON/TypeCasting.h +616 -393
- package/eigen/Eigen/src/Core/arch/NEON/UnaryFunctors.h +57 -0
- package/eigen/Eigen/src/Core/arch/SSE/Complex.h +350 -198
- package/eigen/Eigen/src/Core/arch/SSE/MathFunctions.h +38 -149
- package/eigen/Eigen/src/Core/arch/SSE/PacketMath.h +1791 -912
- package/eigen/Eigen/src/Core/arch/SSE/Reductions.h +324 -0
- package/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h +128 -40
- package/eigen/Eigen/src/Core/arch/SVE/MathFunctions.h +10 -6
- package/eigen/Eigen/src/Core/arch/SVE/PacketMath.h +156 -234
- package/eigen/Eigen/src/Core/arch/SVE/TypeCasting.h +6 -3
- package/eigen/Eigen/src/Core/arch/SYCL/InteropHeaders.h +27 -32
- package/eigen/Eigen/src/Core/arch/SYCL/MathFunctions.h +119 -117
- package/eigen/Eigen/src/Core/arch/SYCL/PacketMath.h +325 -419
- package/eigen/Eigen/src/Core/arch/SYCL/TypeCasting.h +15 -17
- package/eigen/Eigen/src/Core/arch/ZVector/Complex.h +325 -181
- package/eigen/Eigen/src/Core/arch/ZVector/MathFunctions.h +94 -83
- package/eigen/Eigen/src/Core/arch/ZVector/PacketMath.h +811 -458
- package/eigen/Eigen/src/Core/functors/AssignmentFunctors.h +121 -124
- package/eigen/Eigen/src/Core/functors/BinaryFunctors.h +576 -370
- package/eigen/Eigen/src/Core/functors/NullaryFunctors.h +194 -109
- package/eigen/Eigen/src/Core/functors/StlFunctors.h +95 -112
- package/eigen/Eigen/src/Core/functors/TernaryFunctors.h +34 -7
- package/eigen/Eigen/src/Core/functors/UnaryFunctors.h +1038 -749
- package/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h +1883 -1375
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h +312 -370
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +189 -176
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +84 -81
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h +154 -73
- package/eigen/Eigen/src/Core/products/GeneralMatrixVector.h +292 -337
- package/eigen/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h +80 -77
- package/eigen/Eigen/src/Core/products/Parallelizer.h +207 -105
- package/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +327 -388
- package/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h +206 -224
- package/eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h +138 -147
- package/eigen/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h +58 -61
- package/eigen/Eigen/src/Core/products/SelfadjointProduct.h +71 -71
- package/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h +48 -47
- package/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h +294 -369
- package/eigen/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h +246 -238
- package/eigen/Eigen/src/Core/products/TriangularMatrixVector.h +244 -247
- package/eigen/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h +212 -192
- package/eigen/Eigen/src/Core/products/TriangularSolverMatrix.h +328 -277
- package/eigen/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h +108 -109
- package/eigen/Eigen/src/Core/products/TriangularSolverVector.h +68 -94
- package/eigen/Eigen/src/Core/util/Assert.h +158 -0
- package/eigen/Eigen/src/Core/util/BlasUtil.h +342 -303
- package/eigen/Eigen/src/Core/util/ConfigureVectorization.h +348 -317
- package/eigen/Eigen/src/Core/util/Constants.h +297 -262
- package/eigen/Eigen/src/Core/util/DisableStupidWarnings.h +130 -90
- package/eigen/Eigen/src/Core/util/EmulateArray.h +270 -0
- package/eigen/Eigen/src/Core/util/ForwardDeclarations.h +449 -247
- package/eigen/Eigen/src/Core/util/GpuHipCudaDefines.inc +101 -0
- package/eigen/Eigen/src/Core/util/GpuHipCudaUndefines.inc +45 -0
- package/eigen/Eigen/src/Core/util/IndexedViewHelper.h +417 -116
- package/eigen/Eigen/src/Core/util/IntegralConstant.h +211 -204
- package/eigen/Eigen/src/Core/util/MKL_support.h +39 -37
- package/eigen/Eigen/src/Core/util/Macros.h +655 -773
- package/eigen/Eigen/src/Core/util/MaxSizeVector.h +139 -0
- package/eigen/Eigen/src/Core/util/Memory.h +970 -748
- package/eigen/Eigen/src/Core/util/Meta.h +581 -633
- package/eigen/Eigen/src/Core/util/MoreMeta.h +638 -0
- package/eigen/Eigen/src/Core/util/ReenableStupidWarnings.h +32 -19
- package/eigen/Eigen/src/Core/util/ReshapedHelper.h +17 -17
- package/eigen/Eigen/src/Core/util/Serializer.h +209 -0
- package/eigen/Eigen/src/Core/util/StaticAssert.h +50 -166
- package/eigen/Eigen/src/Core/util/SymbolicIndex.h +377 -225
- package/eigen/Eigen/src/Core/util/XprHelper.h +784 -547
- package/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +246 -277
- package/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +299 -319
- package/eigen/Eigen/src/Eigenvalues/ComplexSchur_LAPACKE.h +52 -48
- package/eigen/Eigen/src/Eigenvalues/EigenSolver.h +413 -456
- package/eigen/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h +309 -325
- package/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +157 -171
- package/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +292 -310
- package/eigen/Eigen/src/Eigenvalues/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +89 -105
- package/eigen/Eigen/src/Eigenvalues/RealQZ.h +537 -607
- package/eigen/Eigen/src/Eigenvalues/RealSchur.h +342 -381
- package/eigen/Eigen/src/Eigenvalues/RealSchur_LAPACKE.h +41 -35
- package/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +541 -595
- package/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h +47 -44
- package/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +430 -462
- package/eigen/Eigen/src/Geometry/AlignedBox.h +226 -227
- package/eigen/Eigen/src/Geometry/AngleAxis.h +131 -133
- package/eigen/Eigen/src/Geometry/EulerAngles.h +163 -74
- package/eigen/Eigen/src/Geometry/Homogeneous.h +285 -333
- package/eigen/Eigen/src/Geometry/Hyperplane.h +151 -160
- package/eigen/Eigen/src/Geometry/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Geometry/OrthoMethods.h +168 -146
- package/eigen/Eigen/src/Geometry/ParametrizedLine.h +127 -127
- package/eigen/Eigen/src/Geometry/Quaternion.h +566 -506
- package/eigen/Eigen/src/Geometry/Rotation2D.h +107 -105
- package/eigen/Eigen/src/Geometry/RotationBase.h +148 -145
- package/eigen/Eigen/src/Geometry/Scaling.h +113 -106
- package/eigen/Eigen/src/Geometry/Transform.h +858 -936
- package/eigen/Eigen/src/Geometry/Translation.h +94 -92
- package/eigen/Eigen/src/Geometry/Umeyama.h +79 -84
- package/eigen/Eigen/src/Geometry/arch/Geometry_SIMD.h +90 -104
- package/eigen/Eigen/src/Householder/BlockHouseholder.h +51 -46
- package/eigen/Eigen/src/Householder/Householder.h +102 -124
- package/eigen/Eigen/src/Householder/HouseholderSequence.h +412 -453
- package/eigen/Eigen/src/Householder/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +149 -162
- package/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +124 -119
- package/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +92 -104
- package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +251 -243
- package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +224 -228
- package/eigen/Eigen/src/IterativeLinearSolvers/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +178 -227
- package/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +79 -84
- package/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +54 -60
- package/eigen/Eigen/src/Jacobi/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Jacobi/Jacobi.h +252 -308
- package/eigen/Eigen/src/KLUSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/KLUSupport/KLUSupport.h +208 -227
- package/eigen/Eigen/src/LU/Determinant.h +50 -69
- package/eigen/Eigen/src/LU/FullPivLU.h +545 -596
- package/eigen/Eigen/src/LU/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/LU/InverseImpl.h +206 -285
- package/eigen/Eigen/src/LU/PartialPivLU.h +390 -428
- package/eigen/Eigen/src/LU/PartialPivLU_LAPACKE.h +54 -40
- package/eigen/Eigen/src/LU/arch/InverseSize4.h +72 -70
- package/eigen/Eigen/src/MetisSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/MetisSupport/MetisSupport.h +81 -93
- package/eigen/Eigen/src/OrderingMethods/Amd.h +243 -265
- package/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +831 -1004
- package/eigen/Eigen/src/OrderingMethods/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/OrderingMethods/Ordering.h +112 -119
- package/eigen/Eigen/src/PaStiXSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +524 -570
- package/eigen/Eigen/src/PardisoSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +385 -430
- package/eigen/Eigen/src/QR/ColPivHouseholderQR.h +479 -479
- package/eigen/Eigen/src/QR/ColPivHouseholderQR_LAPACKE.h +120 -56
- package/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +166 -153
- package/eigen/Eigen/src/QR/FullPivHouseholderQR.h +495 -475
- package/eigen/Eigen/src/QR/HouseholderQR.h +394 -285
- package/eigen/Eigen/src/QR/HouseholderQR_LAPACKE.h +32 -23
- package/eigen/Eigen/src/QR/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SPQRSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +244 -264
- package/eigen/Eigen/src/SVD/BDCSVD.h +817 -713
- package/eigen/Eigen/src/SVD/BDCSVD_LAPACKE.h +174 -0
- package/eigen/Eigen/src/SVD/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SVD/JacobiSVD.h +577 -543
- package/eigen/Eigen/src/SVD/JacobiSVD_LAPACKE.h +85 -49
- package/eigen/Eigen/src/SVD/SVDBase.h +242 -182
- package/eigen/Eigen/src/SVD/UpperBidiagonalization.h +200 -235
- package/eigen/Eigen/src/SparseCholesky/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +765 -594
- package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +308 -94
- package/eigen/Eigen/src/SparseCore/AmbiVector.h +202 -251
- package/eigen/Eigen/src/SparseCore/CompressedStorage.h +184 -252
- package/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +134 -178
- package/eigen/Eigen/src/SparseCore/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SparseCore/SparseAssign.h +149 -140
- package/eigen/Eigen/src/SparseCore/SparseBlock.h +403 -440
- package/eigen/Eigen/src/SparseCore/SparseColEtree.h +100 -112
- package/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +525 -303
- package/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +555 -339
- package/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +100 -108
- package/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +169 -197
- package/eigen/Eigen/src/SparseCore/SparseDiagonalProduct.h +71 -71
- package/eigen/Eigen/src/SparseCore/SparseDot.h +49 -47
- package/eigen/Eigen/src/SparseCore/SparseFuzzy.h +13 -11
- package/eigen/Eigen/src/SparseCore/SparseMap.h +243 -253
- package/eigen/Eigen/src/SparseCore/SparseMatrix.h +1603 -1245
- package/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +403 -350
- package/eigen/Eigen/src/SparseCore/SparsePermutation.h +186 -115
- package/eigen/Eigen/src/SparseCore/SparseProduct.h +94 -97
- package/eigen/Eigen/src/SparseCore/SparseRedux.h +22 -24
- package/eigen/Eigen/src/SparseCore/SparseRef.h +268 -295
- package/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +370 -416
- package/eigen/Eigen/src/SparseCore/SparseSolverBase.h +78 -87
- package/eigen/Eigen/src/SparseCore/SparseSparseProductWithPruning.h +81 -95
- package/eigen/Eigen/src/SparseCore/SparseTranspose.h +62 -71
- package/eigen/Eigen/src/SparseCore/SparseTriangularView.h +132 -144
- package/eigen/Eigen/src/SparseCore/SparseUtil.h +138 -115
- package/eigen/Eigen/src/SparseCore/SparseVector.h +426 -372
- package/eigen/Eigen/src/SparseCore/SparseView.h +164 -193
- package/eigen/Eigen/src/SparseCore/TriangularSolver.h +129 -170
- package/eigen/Eigen/src/SparseLU/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SparseLU/SparseLU.h +756 -710
- package/eigen/Eigen/src/SparseLU/SparseLUImpl.h +61 -48
- package/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +102 -118
- package/eigen/Eigen/src/SparseLU/SparseLU_Structs.h +38 -35
- package/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +245 -301
- package/eigen/Eigen/src/SparseLU/SparseLU_Utils.h +44 -49
- package/eigen/Eigen/src/SparseLU/SparseLU_column_bmod.h +104 -108
- package/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +89 -100
- package/eigen/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h +57 -58
- package/eigen/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h +43 -55
- package/eigen/Eigen/src/SparseLU/SparseLU_kernel_bmod.h +74 -71
- package/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +124 -132
- package/eigen/Eigen/src/SparseLU/SparseLU_panel_dfs.h +136 -159
- package/eigen/Eigen/src/SparseLU/SparseLU_pivotL.h +51 -52
- package/eigen/Eigen/src/SparseLU/SparseLU_pruneL.h +67 -73
- package/eigen/Eigen/src/SparseLU/SparseLU_relax_snode.h +24 -26
- package/eigen/Eigen/src/SparseQR/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SparseQR/SparseQR.h +450 -502
- package/eigen/Eigen/src/StlSupport/StdDeque.h +28 -93
- package/eigen/Eigen/src/StlSupport/StdList.h +28 -84
- package/eigen/Eigen/src/StlSupport/StdVector.h +28 -108
- package/eigen/Eigen/src/StlSupport/details.h +48 -50
- package/eigen/Eigen/src/SuperLUSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +634 -730
- package/eigen/Eigen/src/ThreadPool/Barrier.h +70 -0
- package/eigen/Eigen/src/ThreadPool/CoreThreadPoolDevice.h +336 -0
- package/eigen/Eigen/src/ThreadPool/EventCount.h +241 -0
- package/eigen/Eigen/src/ThreadPool/ForkJoin.h +140 -0
- package/eigen/Eigen/src/ThreadPool/InternalHeaderCheck.h +4 -0
- package/eigen/Eigen/src/ThreadPool/NonBlockingThreadPool.h +587 -0
- package/eigen/Eigen/src/ThreadPool/RunQueue.h +230 -0
- package/eigen/Eigen/src/ThreadPool/ThreadCancel.h +21 -0
- package/eigen/Eigen/src/ThreadPool/ThreadEnvironment.h +43 -0
- package/eigen/Eigen/src/ThreadPool/ThreadLocal.h +289 -0
- package/eigen/Eigen/src/ThreadPool/ThreadPoolInterface.h +50 -0
- package/eigen/Eigen/src/ThreadPool/ThreadYield.h +16 -0
- package/eigen/Eigen/src/UmfPackSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +428 -464
- package/eigen/Eigen/src/misc/Image.h +41 -43
- package/eigen/Eigen/src/misc/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/misc/Kernel.h +39 -41
- package/eigen/Eigen/src/misc/RealSvd2x2.h +19 -21
- package/eigen/Eigen/src/misc/blas.h +83 -426
- package/eigen/Eigen/src/misc/lapacke.h +9972 -16179
- package/eigen/Eigen/src/misc/lapacke_helpers.h +163 -0
- package/eigen/Eigen/src/misc/lapacke_mangling.h +4 -5
- package/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.inc +344 -0
- package/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.inc +544 -0
- package/eigen/Eigen/src/plugins/{BlockMethods.h → BlockMethods.inc} +434 -506
- package/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.inc +116 -0
- package/eigen/Eigen/src/plugins/{CommonCwiseUnaryOps.h → CommonCwiseUnaryOps.inc} +58 -68
- package/eigen/Eigen/src/plugins/IndexedViewMethods.inc +192 -0
- package/eigen/Eigen/src/plugins/InternalHeaderCheck.inc +3 -0
- package/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.inc +331 -0
- package/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.inc +118 -0
- package/eigen/Eigen/src/plugins/ReshapedMethods.inc +133 -0
- package/package.json +1 -1
- package/eigen/COPYING.APACHE +0 -203
- package/eigen/COPYING.BSD +0 -26
- package/eigen/COPYING.GPL +0 -674
- package/eigen/COPYING.LGPL +0 -502
- package/eigen/COPYING.MINPACK +0 -51
- package/eigen/COPYING.MPL2 +0 -373
- package/eigen/COPYING.README +0 -18
- package/eigen/Eigen/src/Core/BooleanRedux.h +0 -162
- package/eigen/Eigen/src/Core/arch/CUDA/Complex.h +0 -258
- package/eigen/Eigen/src/Core/arch/Default/TypeCasting.h +0 -120
- package/eigen/Eigen/src/Core/arch/SYCL/SyclMemoryModel.h +0 -694
- package/eigen/Eigen/src/Core/util/NonMPL2.h +0 -3
- package/eigen/Eigen/src/SparseCore/MappedSparseMatrix.h +0 -67
- package/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +0 -280
- package/eigen/Eigen/src/misc/lapack.h +0 -152
- package/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +0 -358
- package/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +0 -696
- package/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.h +0 -115
- package/eigen/Eigen/src/plugins/IndexedViewMethods.h +0 -262
- package/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +0 -152
- package/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +0 -95
- package/eigen/Eigen/src/plugins/ReshapedMethods.h +0 -149
- package/eigen/README.md +0 -5
|
@@ -21,190 +21,84 @@
|
|
|
21
21
|
#ifndef EIGEN_PACKET_MATH_SYCL_H
|
|
22
22
|
#define EIGEN_PACKET_MATH_SYCL_H
|
|
23
23
|
#include <type_traits>
|
|
24
|
-
namespace Eigen {
|
|
25
|
-
|
|
26
|
-
namespace internal {
|
|
27
|
-
#ifdef SYCL_DEVICE_ONLY
|
|
28
24
|
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type ploadt_ro( \
|
|
32
|
-
typename cl::sycl::multi_ptr< \
|
|
33
|
-
const typename unpacket_traits<packet_type>::type, \
|
|
34
|
-
cl::sycl::access::address_space::address_space_target>::pointer_t \
|
|
35
|
-
from) { \
|
|
36
|
-
typedef typename unpacket_traits<packet_type>::type scalar; \
|
|
37
|
-
typedef cl::sycl::multi_ptr< \
|
|
38
|
-
scalar, cl::sycl::access::address_space::address_space_target> \
|
|
39
|
-
multi_ptr; \
|
|
40
|
-
auto res = packet_type( \
|
|
41
|
-
static_cast<typename unpacket_traits<packet_type>::type>(0)); \
|
|
42
|
-
res.load(0, multi_ptr(const_cast<typename multi_ptr::pointer_t>(from))); \
|
|
43
|
-
return res; \
|
|
44
|
-
}
|
|
45
|
-
|
|
46
|
-
SYCL_PLOADT_RO(global_space)
|
|
47
|
-
SYCL_PLOADT_RO(local_space)
|
|
48
|
-
#undef SYCL_PLOADT_RO
|
|
49
|
-
#endif
|
|
25
|
+
// IWYU pragma: private
|
|
26
|
+
#include "../../InternalHeaderCheck.h"
|
|
50
27
|
|
|
51
|
-
|
|
52
|
-
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type
|
|
53
|
-
ploadt_ro(const Eigen::TensorSycl::internal::RangeAccess<
|
|
54
|
-
cl::sycl::access::mode::read_write, T>& from) {
|
|
55
|
-
return ploadt_ro<packet_type, Alignment>(from.get_pointer());
|
|
56
|
-
}
|
|
28
|
+
namespace Eigen {
|
|
57
29
|
|
|
30
|
+
namespace internal {
|
|
58
31
|
#ifdef SYCL_DEVICE_ONLY
|
|
59
|
-
#define SYCL_PLOAD(
|
|
60
|
-
template
|
|
61
|
-
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type pload##AlignedType(
|
|
62
|
-
typename
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
32
|
+
#define SYCL_PLOAD(packet_type, AlignedType) \
|
|
33
|
+
template <> \
|
|
34
|
+
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type pload##AlignedType<packet_type>( \
|
|
35
|
+
const typename unpacket_traits<packet_type>::type* from) { \
|
|
36
|
+
auto ptr = \
|
|
37
|
+
cl::sycl::address_space_cast<cl::sycl::access::address_space::generic_space, cl::sycl::access::decorated::no>( \
|
|
38
|
+
from); \
|
|
39
|
+
packet_type res{}; \
|
|
40
|
+
res.load(0, ptr); \
|
|
41
|
+
return res; \
|
|
67
42
|
}
|
|
68
43
|
|
|
69
|
-
|
|
70
|
-
SYCL_PLOAD(
|
|
71
|
-
SYCL_PLOAD(
|
|
72
|
-
|
|
73
|
-
SYCL_PLOAD(local_space, Unaligned, u)
|
|
74
|
-
SYCL_PLOAD(local_space, Aligned, )
|
|
75
|
-
|
|
76
|
-
#undef SYCL_PLOAD
|
|
77
|
-
#endif
|
|
78
|
-
|
|
79
|
-
#define SYCL_PLOAD(Alignment, AlignedType) \
|
|
80
|
-
template <typename packet_type> \
|
|
81
|
-
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type pload##AlignedType( \
|
|
82
|
-
const Eigen::TensorSycl::internal::RangeAccess< \
|
|
83
|
-
cl::sycl::access::mode::read_write, \
|
|
84
|
-
typename unpacket_traits<packet_type>::type> \
|
|
85
|
-
from) { \
|
|
86
|
-
return ploadt_ro<packet_type, Alignment>(from); \
|
|
87
|
-
}
|
|
88
|
-
SYCL_PLOAD(Unaligned, u)
|
|
89
|
-
SYCL_PLOAD(Aligned, )
|
|
44
|
+
SYCL_PLOAD(cl::sycl::cl_float4, u)
|
|
45
|
+
SYCL_PLOAD(cl::sycl::cl_float4, )
|
|
46
|
+
SYCL_PLOAD(cl::sycl::cl_double2, u)
|
|
47
|
+
SYCL_PLOAD(cl::sycl::cl_double2, )
|
|
90
48
|
#undef SYCL_PLOAD
|
|
91
49
|
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
*
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
from) { \
|
|
102
|
-
if (Alignment >= unpacket_traits<packet_type>::alignment) \
|
|
103
|
-
return pload<packet_type>(from); \
|
|
104
|
-
else \
|
|
105
|
-
return ploadu<packet_type>(from); \
|
|
106
|
-
}
|
|
107
|
-
|
|
108
|
-
// global space
|
|
109
|
-
SYCL_PLOADT(global_space)
|
|
110
|
-
// local space
|
|
111
|
-
SYCL_PLOADT(local_space)
|
|
112
|
-
#undef SYCL_PLOADT
|
|
113
|
-
#endif
|
|
114
|
-
|
|
115
|
-
template <typename packet_type, int Alignment>
|
|
116
|
-
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type
|
|
117
|
-
ploadt(const Eigen::TensorSycl::internal::RangeAccess<
|
|
118
|
-
cl::sycl::access::mode::read_write,
|
|
119
|
-
typename unpacket_traits<packet_type>::type>& from) {
|
|
120
|
-
return ploadt<packet_type, Alignment>(from.get_pointer());
|
|
50
|
+
template <>
|
|
51
|
+
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_half8 pload<cl::sycl::cl_half8>(
|
|
52
|
+
const typename unpacket_traits<cl::sycl::cl_half8>::type* from) {
|
|
53
|
+
auto ptr =
|
|
54
|
+
cl::sycl::address_space_cast<cl::sycl::access::address_space::generic_space, cl::sycl::access::decorated::no>(
|
|
55
|
+
reinterpret_cast<const cl::sycl::cl_half*>(from));
|
|
56
|
+
cl::sycl::cl_half8 res{};
|
|
57
|
+
res.load(0, ptr);
|
|
58
|
+
return res;
|
|
121
59
|
}
|
|
122
|
-
#ifdef SYCL_DEVICE_ONLY
|
|
123
60
|
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
return res; \
|
|
135
|
-
}
|
|
136
|
-
|
|
137
|
-
SYCL_PLOADT_RO_SPECIAL(cl::sycl::cl_float4, Aligned)
|
|
138
|
-
SYCL_PLOADT_RO_SPECIAL(cl::sycl::cl_double2, Aligned)
|
|
139
|
-
SYCL_PLOADT_RO_SPECIAL(cl::sycl::cl_float4, Unaligned)
|
|
140
|
-
SYCL_PLOADT_RO_SPECIAL(cl::sycl::cl_double2, Unaligned)
|
|
61
|
+
template <>
|
|
62
|
+
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_half8 ploadu<cl::sycl::cl_half8>(
|
|
63
|
+
const typename unpacket_traits<cl::sycl::cl_half8>::type* from) {
|
|
64
|
+
auto ptr =
|
|
65
|
+
cl::sycl::address_space_cast<cl::sycl::access::address_space::generic_space, cl::sycl::access::decorated::no>(
|
|
66
|
+
reinterpret_cast<const cl::sycl::cl_half*>(from));
|
|
67
|
+
cl::sycl::cl_half8 res{};
|
|
68
|
+
res.load(0, ptr);
|
|
69
|
+
return res;
|
|
70
|
+
}
|
|
141
71
|
|
|
142
|
-
#define
|
|
143
|
-
template <>
|
|
144
|
-
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
0, const_cast<scalar*>(from)); \
|
|
150
|
-
return res; \
|
|
151
|
-
}
|
|
152
|
-
SYCL_PLOAD_SPECIAL(cl::sycl::cl_float4, )
|
|
153
|
-
SYCL_PLOAD_SPECIAL(cl::sycl::cl_double2, )
|
|
154
|
-
SYCL_PLOAD_SPECIAL(cl::sycl::cl_float4, u)
|
|
155
|
-
SYCL_PLOAD_SPECIAL(cl::sycl::cl_double2, u)
|
|
156
|
-
|
|
157
|
-
#undef SYCL_PLOAD_SPECIAL
|
|
158
|
-
|
|
159
|
-
#define SYCL_PSTORE(scalar, packet_type, address_space_target, alignment) \
|
|
160
|
-
template <> \
|
|
161
|
-
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstore##alignment( \
|
|
162
|
-
typename cl::sycl::multi_ptr< \
|
|
163
|
-
scalar, \
|
|
164
|
-
cl::sycl::access::address_space::address_space_target>::pointer_t \
|
|
165
|
-
to, \
|
|
166
|
-
const packet_type& from) { \
|
|
167
|
-
typedef cl::sycl::multi_ptr< \
|
|
168
|
-
scalar, cl::sycl::access::address_space::address_space_target> \
|
|
169
|
-
multi_ptr; \
|
|
170
|
-
from.store(0, multi_ptr(to)); \
|
|
72
|
+
#define SYCL_PSTORE(scalar, packet_type, alignment) \
|
|
73
|
+
template <> \
|
|
74
|
+
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstore##alignment(scalar* to, const packet_type& from) { \
|
|
75
|
+
auto ptr = \
|
|
76
|
+
cl::sycl::address_space_cast<cl::sycl::access::address_space::generic_space, cl::sycl::access::decorated::no>( \
|
|
77
|
+
to); \
|
|
78
|
+
from.store(0, ptr); \
|
|
171
79
|
}
|
|
172
80
|
|
|
173
|
-
|
|
174
|
-
SYCL_PSTORE(float, cl::sycl::cl_float4,
|
|
175
|
-
SYCL_PSTORE(
|
|
176
|
-
SYCL_PSTORE(double, cl::sycl::cl_double2,
|
|
177
|
-
SYCL_PSTORE(double, cl::sycl::cl_double2, global_space, u)
|
|
178
|
-
SYCL_PSTORE(float, cl::sycl::cl_float4, local_space, )
|
|
179
|
-
SYCL_PSTORE(float, cl::sycl::cl_float4, local_space, u)
|
|
180
|
-
SYCL_PSTORE(double, cl::sycl::cl_double2, local_space, )
|
|
181
|
-
SYCL_PSTORE(double, cl::sycl::cl_double2, local_space, u)
|
|
182
|
-
|
|
183
|
-
SYCL_PSTORE(float, cl::sycl::cl_float4, private_space, )
|
|
184
|
-
SYCL_PSTORE(float, cl::sycl::cl_float4, private_space, u)
|
|
185
|
-
SYCL_PSTORE(double, cl::sycl::cl_double2, private_space, )
|
|
186
|
-
SYCL_PSTORE(double, cl::sycl::cl_double2, private_space, u)
|
|
81
|
+
SYCL_PSTORE(float, cl::sycl::cl_float4, )
|
|
82
|
+
SYCL_PSTORE(float, cl::sycl::cl_float4, u)
|
|
83
|
+
SYCL_PSTORE(double, cl::sycl::cl_double2, )
|
|
84
|
+
SYCL_PSTORE(double, cl::sycl::cl_double2, u)
|
|
187
85
|
#undef SYCL_PSTORE
|
|
188
86
|
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
const packet_type& from) { \
|
|
197
|
-
if (Alignment) \
|
|
198
|
-
pstore(to, from); \
|
|
199
|
-
else \
|
|
200
|
-
pstoreu(to, from); \
|
|
201
|
-
}
|
|
202
|
-
|
|
203
|
-
SYCL_PSTORE_T(global_space)
|
|
204
|
-
|
|
205
|
-
SYCL_PSTORE_T(local_space)
|
|
87
|
+
template <>
|
|
88
|
+
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstoreu(Eigen::half* to, const cl::sycl::cl_half8& from) {
|
|
89
|
+
auto ptr =
|
|
90
|
+
cl::sycl::address_space_cast<cl::sycl::access::address_space::generic_space, cl::sycl::access::decorated::no>(
|
|
91
|
+
reinterpret_cast<cl::sycl::cl_half*>(to));
|
|
92
|
+
from.store(0, ptr);
|
|
93
|
+
}
|
|
206
94
|
|
|
207
|
-
|
|
95
|
+
template <>
|
|
96
|
+
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstore(Eigen::half* to, const cl::sycl::cl_half8& from) {
|
|
97
|
+
auto ptr =
|
|
98
|
+
cl::sycl::address_space_cast<cl::sycl::access::address_space::generic_space, cl::sycl::access::decorated::no>(
|
|
99
|
+
reinterpret_cast<cl::sycl::cl_half*>(to));
|
|
100
|
+
from.store(0, ptr);
|
|
101
|
+
}
|
|
208
102
|
|
|
209
103
|
#define SYCL_PSET1(packet_type) \
|
|
210
104
|
template <> \
|
|
@@ -214,6 +108,7 @@ SYCL_PSTORE_T(local_space)
|
|
|
214
108
|
}
|
|
215
109
|
|
|
216
110
|
// global space
|
|
111
|
+
SYCL_PSET1(cl::sycl::cl_half8)
|
|
217
112
|
SYCL_PSET1(cl::sycl::cl_float4)
|
|
218
113
|
SYCL_PSET1(cl::sycl::cl_double2)
|
|
219
114
|
|
|
@@ -222,41 +117,73 @@ SYCL_PSET1(cl::sycl::cl_double2)
|
|
|
222
117
|
template <typename packet_type>
|
|
223
118
|
struct get_base_packet {
|
|
224
119
|
template <typename sycl_multi_pointer>
|
|
225
|
-
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type
|
|
226
|
-
|
|
120
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type get_ploaddup(sycl_multi_pointer) {}
|
|
121
|
+
|
|
122
|
+
template <typename sycl_multi_pointer>
|
|
123
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type get_pgather(sycl_multi_pointer, Index) {}
|
|
124
|
+
};
|
|
125
|
+
|
|
126
|
+
template <>
|
|
127
|
+
struct get_base_packet<cl::sycl::cl_half8> {
|
|
128
|
+
template <typename sycl_multi_pointer>
|
|
129
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_half8 get_ploaddup(sycl_multi_pointer from) {
|
|
130
|
+
return cl::sycl::cl_half8(static_cast<cl::sycl::half>(from[0]), static_cast<cl::sycl::half>(from[0]),
|
|
131
|
+
static_cast<cl::sycl::half>(from[1]), static_cast<cl::sycl::half>(from[1]),
|
|
132
|
+
static_cast<cl::sycl::half>(from[2]), static_cast<cl::sycl::half>(from[2]),
|
|
133
|
+
static_cast<cl::sycl::half>(from[3]), static_cast<cl::sycl::half>(from[3]));
|
|
134
|
+
}
|
|
135
|
+
template <typename sycl_multi_pointer>
|
|
136
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_half8 get_pgather(sycl_multi_pointer from, Index stride) {
|
|
137
|
+
return cl::sycl::cl_half8(
|
|
138
|
+
static_cast<cl::sycl::half>(from[0 * stride]), static_cast<cl::sycl::half>(from[1 * stride]),
|
|
139
|
+
static_cast<cl::sycl::half>(from[2 * stride]), static_cast<cl::sycl::half>(from[3 * stride]),
|
|
140
|
+
static_cast<cl::sycl::half>(from[4 * stride]), static_cast<cl::sycl::half>(from[5 * stride]),
|
|
141
|
+
static_cast<cl::sycl::half>(from[6 * stride]), static_cast<cl::sycl::half>(from[7 * stride]));
|
|
142
|
+
}
|
|
227
143
|
|
|
228
144
|
template <typename sycl_multi_pointer>
|
|
229
|
-
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
|
230
|
-
|
|
145
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void set_pscatter(sycl_multi_pointer to, const cl::sycl::cl_half8& from,
|
|
146
|
+
Index stride) {
|
|
147
|
+
auto tmp = stride;
|
|
148
|
+
to[0] = Eigen::half(from.s0());
|
|
149
|
+
to[tmp] = Eigen::half(from.s1());
|
|
150
|
+
to[tmp += stride] = Eigen::half(from.s2());
|
|
151
|
+
to[tmp += stride] = Eigen::half(from.s3());
|
|
152
|
+
to[tmp += stride] = Eigen::half(from.s4());
|
|
153
|
+
to[tmp += stride] = Eigen::half(from.s5());
|
|
154
|
+
to[tmp += stride] = Eigen::half(from.s6());
|
|
155
|
+
to[tmp += stride] = Eigen::half(from.s7());
|
|
156
|
+
}
|
|
157
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_half8 set_plset(const cl::sycl::half& a) {
|
|
158
|
+
return cl::sycl::cl_half8(static_cast<cl::sycl::half>(a), static_cast<cl::sycl::half>(a + 1),
|
|
159
|
+
static_cast<cl::sycl::half>(a + 2), static_cast<cl::sycl::half>(a + 3),
|
|
160
|
+
static_cast<cl::sycl::half>(a + 4), static_cast<cl::sycl::half>(a + 5),
|
|
161
|
+
static_cast<cl::sycl::half>(a + 6), static_cast<cl::sycl::half>(a + 7));
|
|
162
|
+
}
|
|
231
163
|
};
|
|
232
164
|
|
|
233
165
|
template <>
|
|
234
166
|
struct get_base_packet<cl::sycl::cl_float4> {
|
|
235
167
|
template <typename sycl_multi_pointer>
|
|
236
|
-
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_float4 get_ploaddup(
|
|
237
|
-
sycl_multi_pointer from) {
|
|
168
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_float4 get_ploaddup(sycl_multi_pointer from) {
|
|
238
169
|
return cl::sycl::cl_float4(from[0], from[0], from[1], from[1]);
|
|
239
170
|
}
|
|
240
171
|
template <typename sycl_multi_pointer>
|
|
241
|
-
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_float4 get_pgather(
|
|
242
|
-
|
|
243
|
-
return cl::sycl::cl_float4(from[0 * stride], from[1 * stride],
|
|
244
|
-
from[2 * stride], from[3 * stride]);
|
|
172
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_float4 get_pgather(sycl_multi_pointer from, Index stride) {
|
|
173
|
+
return cl::sycl::cl_float4(from[0 * stride], from[1 * stride], from[2 * stride], from[3 * stride]);
|
|
245
174
|
}
|
|
246
175
|
|
|
247
176
|
template <typename sycl_multi_pointer>
|
|
248
|
-
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void set_pscatter(
|
|
249
|
-
|
|
177
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void set_pscatter(sycl_multi_pointer to, const cl::sycl::cl_float4& from,
|
|
178
|
+
Index stride) {
|
|
250
179
|
auto tmp = stride;
|
|
251
180
|
to[0] = from.x();
|
|
252
181
|
to[tmp] = from.y();
|
|
253
182
|
to[tmp += stride] = from.z();
|
|
254
183
|
to[tmp += stride] = from.w();
|
|
255
184
|
}
|
|
256
|
-
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_float4 set_plset(
|
|
257
|
-
|
|
258
|
-
return cl::sycl::cl_float4(static_cast<float>(a), static_cast<float>(a + 1),
|
|
259
|
-
static_cast<float>(a + 2),
|
|
185
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_float4 set_plset(const float& a) {
|
|
186
|
+
return cl::sycl::cl_float4(static_cast<float>(a), static_cast<float>(a + 1), static_cast<float>(a + 2),
|
|
260
187
|
static_cast<float>(a + 3));
|
|
261
188
|
}
|
|
262
189
|
};
|
|
@@ -264,47 +191,28 @@ struct get_base_packet<cl::sycl::cl_float4> {
|
|
|
264
191
|
template <>
|
|
265
192
|
struct get_base_packet<cl::sycl::cl_double2> {
|
|
266
193
|
template <typename sycl_multi_pointer>
|
|
267
|
-
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_double2
|
|
268
|
-
get_ploaddup(const sycl_multi_pointer from) {
|
|
194
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_double2 get_ploaddup(const sycl_multi_pointer from) {
|
|
269
195
|
return cl::sycl::cl_double2(from[0], from[0]);
|
|
270
196
|
}
|
|
271
197
|
|
|
272
198
|
template <typename sycl_multi_pointer, typename Index>
|
|
273
|
-
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_double2 get_pgather(
|
|
274
|
-
|
|
199
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_double2 get_pgather(const sycl_multi_pointer from,
|
|
200
|
+
Index stride) {
|
|
275
201
|
return cl::sycl::cl_double2(from[0 * stride], from[1 * stride]);
|
|
276
202
|
}
|
|
277
203
|
|
|
278
204
|
template <typename sycl_multi_pointer>
|
|
279
|
-
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void set_pscatter(
|
|
280
|
-
|
|
205
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void set_pscatter(sycl_multi_pointer to,
|
|
206
|
+
const cl::sycl::cl_double2& from, Index stride) {
|
|
281
207
|
to[0] = from.x();
|
|
282
208
|
to[stride] = from.y();
|
|
283
209
|
}
|
|
284
210
|
|
|
285
|
-
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_double2 set_plset(
|
|
286
|
-
|
|
287
|
-
return cl::sycl::cl_double2(static_cast<double>(a),
|
|
288
|
-
static_cast<double>(a + 1));
|
|
211
|
+
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_double2 set_plset(const double& a) {
|
|
212
|
+
return cl::sycl::cl_double2(static_cast<double>(a), static_cast<double>(a + 1));
|
|
289
213
|
}
|
|
290
214
|
};
|
|
291
215
|
|
|
292
|
-
#define SYCL_PLOAD_DUP(address_space_target) \
|
|
293
|
-
template <typename packet_type> \
|
|
294
|
-
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type ploaddup( \
|
|
295
|
-
typename cl::sycl::multi_ptr< \
|
|
296
|
-
const typename unpacket_traits<packet_type>::type, \
|
|
297
|
-
cl::sycl::access::address_space::address_space_target>::pointer_t \
|
|
298
|
-
from) { \
|
|
299
|
-
return get_base_packet<packet_type>::get_ploaddup(from); \
|
|
300
|
-
}
|
|
301
|
-
|
|
302
|
-
// global space
|
|
303
|
-
SYCL_PLOAD_DUP(global_space)
|
|
304
|
-
// local_space
|
|
305
|
-
SYCL_PLOAD_DUP(local_space)
|
|
306
|
-
#undef SYCL_PLOAD_DUP
|
|
307
|
-
|
|
308
216
|
#define SYCL_PLOAD_DUP_SPECILIZE(packet_type) \
|
|
309
217
|
template <> \
|
|
310
218
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type ploaddup<packet_type>( \
|
|
@@ -312,6 +220,7 @@ SYCL_PLOAD_DUP(local_space)
|
|
|
312
220
|
return get_base_packet<packet_type>::get_ploaddup(from); \
|
|
313
221
|
}
|
|
314
222
|
|
|
223
|
+
SYCL_PLOAD_DUP_SPECILIZE(cl::sycl::cl_half8)
|
|
315
224
|
SYCL_PLOAD_DUP_SPECILIZE(cl::sycl::cl_float4)
|
|
316
225
|
SYCL_PLOAD_DUP_SPECILIZE(cl::sycl::cl_double2)
|
|
317
226
|
|
|
@@ -323,186 +232,162 @@ SYCL_PLOAD_DUP_SPECILIZE(cl::sycl::cl_double2)
|
|
|
323
232
|
const typename unpacket_traits<packet_type>::type& a) { \
|
|
324
233
|
return get_base_packet<packet_type>::set_plset(a); \
|
|
325
234
|
}
|
|
326
|
-
|
|
327
235
|
SYCL_PLSET(cl::sycl::cl_float4)
|
|
328
236
|
SYCL_PLSET(cl::sycl::cl_double2)
|
|
329
|
-
|
|
330
237
|
#undef SYCL_PLSET
|
|
331
238
|
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
cl::sycl::access::address_space::address_space_target>::pointer_t \
|
|
338
|
-
from, \
|
|
339
|
-
Index stride) { \
|
|
340
|
-
return get_base_packet<packet_type>::get_pgather(from, stride); \
|
|
341
|
-
}
|
|
239
|
+
template <>
|
|
240
|
+
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_half8 plset<cl::sycl::cl_half8>(
|
|
241
|
+
const typename unpacket_traits<cl::sycl::cl_half8>::type& a) {
|
|
242
|
+
return get_base_packet<cl::sycl::cl_half8>::set_plset((const cl::sycl::half&)a);
|
|
243
|
+
}
|
|
342
244
|
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
#undef SYCL_PGATHER
|
|
349
|
-
|
|
350
|
-
#define SYCL_PGATHER_SPECILIZE(scalar, packet_type) \
|
|
351
|
-
template <> \
|
|
352
|
-
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type \
|
|
353
|
-
pgather<scalar, packet_type>( \
|
|
354
|
-
const typename unpacket_traits<packet_type>::type* from, Index stride) { \
|
|
355
|
-
return get_base_packet<packet_type>::get_pgather(from, stride); \
|
|
245
|
+
#define SYCL_PGATHER_SPECILIZE(scalar, packet_type) \
|
|
246
|
+
template <> \
|
|
247
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type pgather<scalar, packet_type>( \
|
|
248
|
+
const typename unpacket_traits<packet_type>::type* from, Index stride) { \
|
|
249
|
+
return get_base_packet<packet_type>::get_pgather(from, stride); \
|
|
356
250
|
}
|
|
357
251
|
|
|
252
|
+
SYCL_PGATHER_SPECILIZE(Eigen::half, cl::sycl::cl_half8)
|
|
358
253
|
SYCL_PGATHER_SPECILIZE(float, cl::sycl::cl_float4)
|
|
359
254
|
SYCL_PGATHER_SPECILIZE(double, cl::sycl::cl_double2)
|
|
360
|
-
|
|
361
255
|
#undef SYCL_PGATHER_SPECILIZE
|
|
362
256
|
|
|
363
|
-
#define
|
|
364
|
-
template
|
|
365
|
-
EIGEN_DEVICE_FUNC
|
|
366
|
-
typename
|
|
367
|
-
|
|
368
|
-
cl::sycl::access::address_space::address_space_target>::pointer_t \
|
|
369
|
-
to, \
|
|
370
|
-
const packet_type& from, Index stride) { \
|
|
371
|
-
get_base_packet<packet_type>::set_pscatter(to, from, stride); \
|
|
372
|
-
}
|
|
373
|
-
|
|
374
|
-
// global space
|
|
375
|
-
SYCL_PSCATTER(global_space)
|
|
376
|
-
// local space
|
|
377
|
-
SYCL_PSCATTER(local_space)
|
|
378
|
-
|
|
379
|
-
#undef SYCL_PSCATTER
|
|
380
|
-
|
|
381
|
-
#define SYCL_PSCATTER_SPECILIZE(scalar, packet_type) \
|
|
382
|
-
template <> \
|
|
383
|
-
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pscatter<scalar, packet_type>( \
|
|
384
|
-
typename unpacket_traits<packet_type>::type * to, \
|
|
385
|
-
const packet_type& from, Index stride) { \
|
|
386
|
-
get_base_packet<packet_type>::set_pscatter(to, from, stride); \
|
|
257
|
+
#define SYCL_PSCATTER_SPECILIZE(scalar, packet_type) \
|
|
258
|
+
template <> \
|
|
259
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pscatter<scalar, packet_type>( \
|
|
260
|
+
typename unpacket_traits<packet_type>::type * to, const packet_type& from, Index stride) { \
|
|
261
|
+
get_base_packet<packet_type>::set_pscatter(to, from, stride); \
|
|
387
262
|
}
|
|
388
263
|
|
|
264
|
+
SYCL_PSCATTER_SPECILIZE(Eigen::half, cl::sycl::cl_half8)
|
|
389
265
|
SYCL_PSCATTER_SPECILIZE(float, cl::sycl::cl_float4)
|
|
390
266
|
SYCL_PSCATTER_SPECILIZE(double, cl::sycl::cl_double2)
|
|
391
267
|
|
|
392
268
|
#undef SYCL_PSCATTER_SPECILIZE
|
|
393
269
|
|
|
394
|
-
#define SYCL_PMAD(packet_type)
|
|
395
|
-
template <>
|
|
396
|
-
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type pmadd(
|
|
397
|
-
|
|
398
|
-
return cl::sycl::mad(a, b, c);
|
|
270
|
+
#define SYCL_PMAD(packet_type) \
|
|
271
|
+
template <> \
|
|
272
|
+
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type pmadd(const packet_type& a, const packet_type& b, \
|
|
273
|
+
const packet_type& c) { \
|
|
274
|
+
return cl::sycl::mad(a, b, c); \
|
|
399
275
|
}
|
|
400
276
|
|
|
277
|
+
SYCL_PMAD(cl::sycl::cl_half8)
|
|
401
278
|
SYCL_PMAD(cl::sycl::cl_float4)
|
|
402
279
|
SYCL_PMAD(cl::sycl::cl_double2)
|
|
403
280
|
#undef SYCL_PMAD
|
|
404
281
|
|
|
405
282
|
template <>
|
|
406
|
-
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
|
407
|
-
|
|
283
|
+
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Eigen::half pfirst<cl::sycl::cl_half8>(const cl::sycl::cl_half8& a) {
|
|
284
|
+
return Eigen::half(a.s0());
|
|
285
|
+
}
|
|
286
|
+
template <>
|
|
287
|
+
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float pfirst<cl::sycl::cl_float4>(const cl::sycl::cl_float4& a) {
|
|
408
288
|
return a.x();
|
|
409
289
|
}
|
|
410
290
|
template <>
|
|
411
|
-
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double pfirst<cl::sycl::cl_double2>(
|
|
412
|
-
const cl::sycl::cl_double2& a) {
|
|
291
|
+
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double pfirst<cl::sycl::cl_double2>(const cl::sycl::cl_double2& a) {
|
|
413
292
|
return a.x();
|
|
414
293
|
}
|
|
415
294
|
|
|
416
295
|
template <>
|
|
417
|
-
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
|
418
|
-
|
|
296
|
+
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Eigen::half predux<cl::sycl::cl_half8>(const cl::sycl::cl_half8& a) {
|
|
297
|
+
return Eigen::half(a.s0() + a.s1() + a.s2() + a.s3() + a.s4() + a.s5() + a.s6() + a.s7());
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
template <>
|
|
301
|
+
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float predux<cl::sycl::cl_float4>(const cl::sycl::cl_float4& a) {
|
|
419
302
|
return a.x() + a.y() + a.z() + a.w();
|
|
420
303
|
}
|
|
421
304
|
|
|
422
305
|
template <>
|
|
423
|
-
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double predux<cl::sycl::cl_double2>(
|
|
424
|
-
const cl::sycl::cl_double2& a) {
|
|
306
|
+
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double predux<cl::sycl::cl_double2>(const cl::sycl::cl_double2& a) {
|
|
425
307
|
return a.x() + a.y();
|
|
426
308
|
}
|
|
427
309
|
|
|
428
310
|
template <>
|
|
429
|
-
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
cl::sycl::fmax(a.z(), a.w()));
|
|
311
|
+
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Eigen::half predux_max<cl::sycl::cl_half8>(const cl::sycl::cl_half8& a) {
|
|
312
|
+
return Eigen::half(cl::sycl::fmax(cl::sycl::fmax(cl::sycl::fmax(a.s0(), a.s1()), cl::sycl::fmax(a.s2(), a.s3())),
|
|
313
|
+
cl::sycl::fmax(cl::sycl::fmax(a.s4(), a.s5()), cl::sycl::fmax(a.s6(), a.s7()))));
|
|
433
314
|
}
|
|
434
315
|
template <>
|
|
435
|
-
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
|
436
|
-
|
|
316
|
+
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float predux_max<cl::sycl::cl_float4>(const cl::sycl::cl_float4& a) {
|
|
317
|
+
return cl::sycl::fmax(cl::sycl::fmax(a.x(), a.y()), cl::sycl::fmax(a.z(), a.w()));
|
|
318
|
+
}
|
|
319
|
+
template <>
|
|
320
|
+
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double predux_max<cl::sycl::cl_double2>(const cl::sycl::cl_double2& a) {
|
|
437
321
|
return cl::sycl::fmax(a.x(), a.y());
|
|
438
322
|
}
|
|
439
323
|
|
|
440
324
|
template <>
|
|
441
|
-
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
325
|
+
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Eigen::half predux_min<cl::sycl::cl_half8>(const cl::sycl::cl_half8& a) {
|
|
326
|
+
return Eigen::half(cl::sycl::fmin(cl::sycl::fmin(cl::sycl::fmin(a.s0(), a.s1()), cl::sycl::fmin(a.s2(), a.s3())),
|
|
327
|
+
cl::sycl::fmin(cl::sycl::fmin(a.s4(), a.s5()), cl::sycl::fmin(a.s6(), a.s7()))));
|
|
328
|
+
}
|
|
329
|
+
template <>
|
|
330
|
+
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float predux_min<cl::sycl::cl_float4>(const cl::sycl::cl_float4& a) {
|
|
331
|
+
return cl::sycl::fmin(cl::sycl::fmin(a.x(), a.y()), cl::sycl::fmin(a.z(), a.w()));
|
|
445
332
|
}
|
|
446
333
|
template <>
|
|
447
|
-
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double predux_min<cl::sycl::cl_double2>(
|
|
448
|
-
const cl::sycl::cl_double2& a) {
|
|
334
|
+
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double predux_min<cl::sycl::cl_double2>(const cl::sycl::cl_double2& a) {
|
|
449
335
|
return cl::sycl::fmin(a.x(), a.y());
|
|
450
336
|
}
|
|
451
337
|
|
|
452
338
|
template <>
|
|
453
|
-
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
|
454
|
-
|
|
339
|
+
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Eigen::half predux_mul<cl::sycl::cl_half8>(const cl::sycl::cl_half8& a) {
|
|
340
|
+
return Eigen::half(a.s0() * a.s1() * a.s2() * a.s3() * a.s4() * a.s5() * a.s6() * a.s7());
|
|
341
|
+
}
|
|
342
|
+
template <>
|
|
343
|
+
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float predux_mul<cl::sycl::cl_float4>(const cl::sycl::cl_float4& a) {
|
|
455
344
|
return a.x() * a.y() * a.z() * a.w();
|
|
456
345
|
}
|
|
457
346
|
template <>
|
|
458
|
-
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double predux_mul<cl::sycl::cl_double2>(
|
|
459
|
-
const cl::sycl::cl_double2& a) {
|
|
347
|
+
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double predux_mul<cl::sycl::cl_double2>(const cl::sycl::cl_double2& a) {
|
|
460
348
|
return a.x() * a.y();
|
|
461
349
|
}
|
|
462
350
|
|
|
463
351
|
template <>
|
|
464
|
-
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
352
|
+
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_half8 pabs<cl::sycl::cl_half8>(const cl::sycl::cl_half8& a) {
|
|
353
|
+
return cl::sycl::cl_half8(cl::sycl::fabs(a.s0()), cl::sycl::fabs(a.s1()), cl::sycl::fabs(a.s2()),
|
|
354
|
+
cl::sycl::fabs(a.s3()), cl::sycl::fabs(a.s4()), cl::sycl::fabs(a.s5()),
|
|
355
|
+
cl::sycl::fabs(a.s6()), cl::sycl::fabs(a.s7()));
|
|
468
356
|
}
|
|
469
357
|
template <>
|
|
470
|
-
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::
|
|
471
|
-
|
|
358
|
+
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_float4 pabs<cl::sycl::cl_float4>(const cl::sycl::cl_float4& a) {
|
|
359
|
+
return cl::sycl::cl_float4(cl::sycl::fabs(a.x()), cl::sycl::fabs(a.y()), cl::sycl::fabs(a.z()),
|
|
360
|
+
cl::sycl::fabs(a.w()));
|
|
361
|
+
}
|
|
362
|
+
template <>
|
|
363
|
+
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_double2 pabs<cl::sycl::cl_double2>(const cl::sycl::cl_double2& a) {
|
|
472
364
|
return cl::sycl::cl_double2(cl::sycl::fabs(a.x()), cl::sycl::fabs(a.y()));
|
|
473
365
|
}
|
|
474
366
|
|
|
475
367
|
template <typename Packet>
|
|
476
|
-
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet sycl_pcmp_le(const Packet
|
|
477
|
-
|
|
478
|
-
return ((a <= b)
|
|
479
|
-
.template convert<typename unpacket_traits<Packet>::type,
|
|
480
|
-
cl::sycl::rounding_mode::automatic>());
|
|
368
|
+
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet sycl_pcmp_le(const Packet& a, const Packet& b) {
|
|
369
|
+
return (a <= b).template as<Packet>();
|
|
481
370
|
}
|
|
482
371
|
|
|
483
372
|
template <typename Packet>
|
|
484
|
-
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet sycl_pcmp_lt(const Packet
|
|
485
|
-
|
|
486
|
-
return ((a < b)
|
|
487
|
-
.template convert<typename unpacket_traits<Packet>::type,
|
|
488
|
-
cl::sycl::rounding_mode::automatic>());
|
|
373
|
+
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet sycl_pcmp_lt(const Packet& a, const Packet& b) {
|
|
374
|
+
return (a < b).template as<Packet>();
|
|
489
375
|
}
|
|
490
376
|
|
|
491
377
|
template <typename Packet>
|
|
492
|
-
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet sycl_pcmp_eq(const Packet
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
template <> \
|
|
501
|
-
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE TYPE pcmp_##OP<TYPE>(const TYPE &a, \
|
|
502
|
-
const TYPE &b) { \
|
|
503
|
-
return sycl_pcmp_##OP<TYPE>(a, b); \
|
|
378
|
+
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet sycl_pcmp_eq(const Packet& a, const Packet& b) {
|
|
379
|
+
return (a == b).template as<Packet>();
|
|
380
|
+
}
|
|
381
|
+
|
|
382
|
+
#define SYCL_PCMP(OP, TYPE) \
|
|
383
|
+
template <> \
|
|
384
|
+
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE TYPE pcmp_##OP<TYPE>(const TYPE& a, const TYPE& b) { \
|
|
385
|
+
return sycl_pcmp_##OP<TYPE>(a, b); \
|
|
504
386
|
}
|
|
505
387
|
|
|
388
|
+
SYCL_PCMP(le, cl::sycl::cl_half8)
|
|
389
|
+
SYCL_PCMP(lt, cl::sycl::cl_half8)
|
|
390
|
+
SYCL_PCMP(eq, cl::sycl::cl_half8)
|
|
506
391
|
SYCL_PCMP(le, cl::sycl::cl_float4)
|
|
507
392
|
SYCL_PCMP(lt, cl::sycl::cl_float4)
|
|
508
393
|
SYCL_PCMP(eq, cl::sycl::cl_float4)
|
|
@@ -511,78 +396,121 @@ SYCL_PCMP(lt, cl::sycl::cl_double2)
|
|
|
511
396
|
SYCL_PCMP(eq, cl::sycl::cl_double2)
|
|
512
397
|
#undef SYCL_PCMP
|
|
513
398
|
|
|
514
|
-
|
|
399
|
+
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void ptranspose(PacketBlock<cl::sycl::cl_half8, 8>& kernel) {
|
|
400
|
+
cl::sycl::cl_half tmp = kernel.packet[0].s1();
|
|
401
|
+
kernel.packet[0].s1() = kernel.packet[1].s0();
|
|
402
|
+
kernel.packet[1].s0() = tmp;
|
|
515
403
|
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
};
|
|
520
|
-
template <> struct convert_to_integer<double> {
|
|
521
|
-
using type = std::int64_t;
|
|
522
|
-
using packet_type = cl::sycl::cl_long2;
|
|
523
|
-
};
|
|
404
|
+
tmp = kernel.packet[0].s2();
|
|
405
|
+
kernel.packet[0].s2() = kernel.packet[2].s0();
|
|
406
|
+
kernel.packet[2].s0() = tmp;
|
|
524
407
|
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
vector_as_int(const PacketIn &p) {
|
|
529
|
-
return (
|
|
530
|
-
p.template convert<typename convert_to_integer<
|
|
531
|
-
typename unpacket_traits<PacketIn>::type>::type,
|
|
532
|
-
cl::sycl::rounding_mode::automatic>());
|
|
533
|
-
}
|
|
408
|
+
tmp = kernel.packet[0].s3();
|
|
409
|
+
kernel.packet[0].s3() = kernel.packet[3].s0();
|
|
410
|
+
kernel.packet[3].s0() = tmp;
|
|
534
411
|
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
return (p.template convert<typename unpacket_traits<packetOut>::type,
|
|
539
|
-
cl::sycl::rounding_mode::automatic>());
|
|
540
|
-
}
|
|
412
|
+
tmp = kernel.packet[0].s4();
|
|
413
|
+
kernel.packet[0].s4() = kernel.packet[4].s0();
|
|
414
|
+
kernel.packet[4].s0() = tmp;
|
|
541
415
|
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
const TYPE &b) { \
|
|
546
|
-
return convert_vector<TYPE>(vector_as_int(a) & vector_as_int(b)); \
|
|
547
|
-
}
|
|
548
|
-
SYCL_PAND(cl::sycl::cl_float4)
|
|
549
|
-
SYCL_PAND(cl::sycl::cl_double2)
|
|
550
|
-
#undef SYCL_PAND
|
|
551
|
-
|
|
552
|
-
#define SYCL_POR(TYPE) \
|
|
553
|
-
template <> \
|
|
554
|
-
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TYPE por<TYPE>(const TYPE &a, \
|
|
555
|
-
const TYPE &b) { \
|
|
556
|
-
return convert_vector<TYPE>(vector_as_int(a) | vector_as_int(b)); \
|
|
557
|
-
}
|
|
416
|
+
tmp = kernel.packet[0].s5();
|
|
417
|
+
kernel.packet[0].s5() = kernel.packet[5].s0();
|
|
418
|
+
kernel.packet[5].s0() = tmp;
|
|
558
419
|
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
420
|
+
tmp = kernel.packet[0].s6();
|
|
421
|
+
kernel.packet[0].s6() = kernel.packet[6].s0();
|
|
422
|
+
kernel.packet[6].s0() = tmp;
|
|
562
423
|
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
const TYPE &b) { \
|
|
567
|
-
return convert_vector<TYPE>(vector_as_int(a) ^ vector_as_int(b)); \
|
|
568
|
-
}
|
|
424
|
+
tmp = kernel.packet[0].s7();
|
|
425
|
+
kernel.packet[0].s7() = kernel.packet[7].s0();
|
|
426
|
+
kernel.packet[7].s0() = tmp;
|
|
569
427
|
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
428
|
+
tmp = kernel.packet[1].s2();
|
|
429
|
+
kernel.packet[1].s2() = kernel.packet[2].s1();
|
|
430
|
+
kernel.packet[2].s1() = tmp;
|
|
573
431
|
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
432
|
+
tmp = kernel.packet[1].s3();
|
|
433
|
+
kernel.packet[1].s3() = kernel.packet[3].s1();
|
|
434
|
+
kernel.packet[3].s1() = tmp;
|
|
435
|
+
|
|
436
|
+
tmp = kernel.packet[1].s4();
|
|
437
|
+
kernel.packet[1].s4() = kernel.packet[4].s1();
|
|
438
|
+
kernel.packet[4].s1() = tmp;
|
|
439
|
+
|
|
440
|
+
tmp = kernel.packet[1].s5();
|
|
441
|
+
kernel.packet[1].s5() = kernel.packet[5].s1();
|
|
442
|
+
kernel.packet[5].s1() = tmp;
|
|
443
|
+
|
|
444
|
+
tmp = kernel.packet[1].s6();
|
|
445
|
+
kernel.packet[1].s6() = kernel.packet[6].s1();
|
|
446
|
+
kernel.packet[6].s1() = tmp;
|
|
583
447
|
|
|
584
|
-
|
|
585
|
-
|
|
448
|
+
tmp = kernel.packet[1].s7();
|
|
449
|
+
kernel.packet[1].s7() = kernel.packet[7].s1();
|
|
450
|
+
kernel.packet[7].s1() = tmp;
|
|
451
|
+
|
|
452
|
+
tmp = kernel.packet[2].s3();
|
|
453
|
+
kernel.packet[2].s3() = kernel.packet[3].s2();
|
|
454
|
+
kernel.packet[3].s2() = tmp;
|
|
455
|
+
|
|
456
|
+
tmp = kernel.packet[2].s4();
|
|
457
|
+
kernel.packet[2].s4() = kernel.packet[4].s2();
|
|
458
|
+
kernel.packet[4].s2() = tmp;
|
|
459
|
+
|
|
460
|
+
tmp = kernel.packet[2].s5();
|
|
461
|
+
kernel.packet[2].s5() = kernel.packet[5].s2();
|
|
462
|
+
kernel.packet[5].s2() = tmp;
|
|
463
|
+
|
|
464
|
+
tmp = kernel.packet[2].s6();
|
|
465
|
+
kernel.packet[2].s6() = kernel.packet[6].s2();
|
|
466
|
+
kernel.packet[6].s2() = tmp;
|
|
467
|
+
|
|
468
|
+
tmp = kernel.packet[2].s7();
|
|
469
|
+
kernel.packet[2].s7() = kernel.packet[7].s2();
|
|
470
|
+
kernel.packet[7].s2() = tmp;
|
|
471
|
+
|
|
472
|
+
tmp = kernel.packet[3].s4();
|
|
473
|
+
kernel.packet[3].s4() = kernel.packet[4].s3();
|
|
474
|
+
kernel.packet[4].s3() = tmp;
|
|
475
|
+
|
|
476
|
+
tmp = kernel.packet[3].s5();
|
|
477
|
+
kernel.packet[3].s5() = kernel.packet[5].s3();
|
|
478
|
+
kernel.packet[5].s3() = tmp;
|
|
479
|
+
|
|
480
|
+
tmp = kernel.packet[3].s6();
|
|
481
|
+
kernel.packet[3].s6() = kernel.packet[6].s3();
|
|
482
|
+
kernel.packet[6].s3() = tmp;
|
|
483
|
+
|
|
484
|
+
tmp = kernel.packet[3].s7();
|
|
485
|
+
kernel.packet[3].s7() = kernel.packet[7].s3();
|
|
486
|
+
kernel.packet[7].s3() = tmp;
|
|
487
|
+
|
|
488
|
+
tmp = kernel.packet[4].s5();
|
|
489
|
+
kernel.packet[4].s5() = kernel.packet[5].s4();
|
|
490
|
+
kernel.packet[5].s4() = tmp;
|
|
491
|
+
|
|
492
|
+
tmp = kernel.packet[4].s6();
|
|
493
|
+
kernel.packet[4].s6() = kernel.packet[6].s4();
|
|
494
|
+
kernel.packet[6].s4() = tmp;
|
|
495
|
+
|
|
496
|
+
tmp = kernel.packet[4].s7();
|
|
497
|
+
kernel.packet[4].s7() = kernel.packet[7].s4();
|
|
498
|
+
kernel.packet[7].s4() = tmp;
|
|
499
|
+
|
|
500
|
+
tmp = kernel.packet[5].s6();
|
|
501
|
+
kernel.packet[5].s6() = kernel.packet[6].s5();
|
|
502
|
+
kernel.packet[6].s5() = tmp;
|
|
503
|
+
|
|
504
|
+
tmp = kernel.packet[5].s7();
|
|
505
|
+
kernel.packet[5].s7() = kernel.packet[7].s5();
|
|
506
|
+
kernel.packet[7].s5() = tmp;
|
|
507
|
+
|
|
508
|
+
tmp = kernel.packet[6].s7();
|
|
509
|
+
kernel.packet[6].s7() = kernel.packet[7].s6();
|
|
510
|
+
kernel.packet[7].s6() = tmp;
|
|
511
|
+
}
|
|
512
|
+
|
|
513
|
+
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void ptranspose(PacketBlock<cl::sycl::cl_float4, 4>& kernel) {
|
|
586
514
|
float tmp = kernel.packet[0].y();
|
|
587
515
|
kernel.packet[0].y() = kernel.packet[1].x();
|
|
588
516
|
kernel.packet[1].x() = tmp;
|
|
@@ -608,61 +536,39 @@ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void ptranspose(
|
|
|
608
536
|
kernel.packet[3].z() = tmp;
|
|
609
537
|
}
|
|
610
538
|
|
|
611
|
-
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void ptranspose(
|
|
612
|
-
PacketBlock<cl::sycl::cl_double2, 2>& kernel) {
|
|
539
|
+
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void ptranspose(PacketBlock<cl::sycl::cl_double2, 2>& kernel) {
|
|
613
540
|
double tmp = kernel.packet[0].y();
|
|
614
541
|
kernel.packet[0].y() = kernel.packet[1].x();
|
|
615
542
|
kernel.packet[1].x() = tmp;
|
|
616
543
|
}
|
|
617
544
|
|
|
545
|
+
template <>
|
|
546
|
+
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_half8 pblend(
|
|
547
|
+
const Selector<unpacket_traits<cl::sycl::cl_half8>::size>& ifPacket, const cl::sycl::cl_half8& thenPacket,
|
|
548
|
+
const cl::sycl::cl_half8& elsePacket) {
|
|
549
|
+
cl::sycl::cl_short8 condition(ifPacket.select[0] ? 0 : -1, ifPacket.select[1] ? 0 : -1, ifPacket.select[2] ? 0 : -1,
|
|
550
|
+
ifPacket.select[3] ? 0 : -1, ifPacket.select[4] ? 0 : -1, ifPacket.select[5] ? 0 : -1,
|
|
551
|
+
ifPacket.select[6] ? 0 : -1, ifPacket.select[7] ? 0 : -1);
|
|
552
|
+
return cl::sycl::select(thenPacket, elsePacket, condition);
|
|
553
|
+
}
|
|
554
|
+
|
|
618
555
|
template <>
|
|
619
556
|
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_float4 pblend(
|
|
620
|
-
const Selector<unpacket_traits<cl::sycl::cl_float4>::size>& ifPacket,
|
|
621
|
-
const cl::sycl::cl_float4& thenPacket,
|
|
557
|
+
const Selector<unpacket_traits<cl::sycl::cl_float4>::size>& ifPacket, const cl::sycl::cl_float4& thenPacket,
|
|
622
558
|
const cl::sycl::cl_float4& elsePacket) {
|
|
623
|
-
cl::sycl::cl_int4 condition(
|
|
624
|
-
|
|
625
|
-
ifPacket.select[2] ? 0 : -1, ifPacket.select[3] ? 0 : -1);
|
|
559
|
+
cl::sycl::cl_int4 condition(ifPacket.select[0] ? 0 : -1, ifPacket.select[1] ? 0 : -1, ifPacket.select[2] ? 0 : -1,
|
|
560
|
+
ifPacket.select[3] ? 0 : -1);
|
|
626
561
|
return cl::sycl::select(thenPacket, elsePacket, condition);
|
|
627
562
|
}
|
|
628
563
|
|
|
629
564
|
template <>
|
|
630
|
-
inline cl::sycl::cl_double2 pblend(
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
const cl::sycl::cl_double2& elsePacket) {
|
|
634
|
-
cl::sycl::cl_long2 condition(ifPacket.select[0] ? 0 : -1,
|
|
635
|
-
ifPacket.select[1] ? 0 : -1);
|
|
565
|
+
inline cl::sycl::cl_double2 pblend(const Selector<unpacket_traits<cl::sycl::cl_double2>::size>& ifPacket,
|
|
566
|
+
const cl::sycl::cl_double2& thenPacket, const cl::sycl::cl_double2& elsePacket) {
|
|
567
|
+
cl::sycl::cl_long2 condition(ifPacket.select[0] ? 0 : -1, ifPacket.select[1] ? 0 : -1);
|
|
636
568
|
return cl::sycl::select(thenPacket, elsePacket, condition);
|
|
637
569
|
}
|
|
638
570
|
#endif // SYCL_DEVICE_ONLY
|
|
639
571
|
|
|
640
|
-
#define SYCL_PSTORE(alignment) \
|
|
641
|
-
template <typename packet_type> \
|
|
642
|
-
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstore##alignment( \
|
|
643
|
-
const Eigen::TensorSycl::internal::RangeAccess< \
|
|
644
|
-
cl::sycl::access::mode::read_write, \
|
|
645
|
-
typename unpacket_traits<packet_type>::type>& to, \
|
|
646
|
-
const packet_type& from) { \
|
|
647
|
-
pstore##alignment(to.get_pointer(), from); \
|
|
648
|
-
}
|
|
649
|
-
|
|
650
|
-
// global space
|
|
651
|
-
SYCL_PSTORE()
|
|
652
|
-
SYCL_PSTORE(u)
|
|
653
|
-
|
|
654
|
-
#undef SYCL_PSTORE
|
|
655
|
-
|
|
656
|
-
template <typename scalar, typename packet_type, int Alignment>
|
|
657
|
-
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstoret(
|
|
658
|
-
Eigen::TensorSycl::internal::RangeAccess<
|
|
659
|
-
cl::sycl::access::mode::read_write,
|
|
660
|
-
typename unpacket_traits<packet_type>::type>
|
|
661
|
-
to,
|
|
662
|
-
const packet_type& from) {
|
|
663
|
-
pstoret<scalar, packet_type, Alignment>(to.get_pointer(), from);
|
|
664
|
-
}
|
|
665
|
-
|
|
666
572
|
} // end namespace internal
|
|
667
573
|
|
|
668
574
|
} // end namespace Eigen
|