@smake/eigen 1.1.0 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (431) hide show
  1. package/README.md +1 -1
  2. package/eigen/Eigen/AccelerateSupport +52 -0
  3. package/eigen/Eigen/Cholesky +18 -20
  4. package/eigen/Eigen/CholmodSupport +28 -28
  5. package/eigen/Eigen/Core +187 -120
  6. package/eigen/Eigen/Eigenvalues +16 -13
  7. package/eigen/Eigen/Geometry +18 -18
  8. package/eigen/Eigen/Householder +9 -7
  9. package/eigen/Eigen/IterativeLinearSolvers +8 -4
  10. package/eigen/Eigen/Jacobi +14 -13
  11. package/eigen/Eigen/KLUSupport +23 -21
  12. package/eigen/Eigen/LU +15 -16
  13. package/eigen/Eigen/MetisSupport +12 -12
  14. package/eigen/Eigen/OrderingMethods +54 -51
  15. package/eigen/Eigen/PaStiXSupport +23 -21
  16. package/eigen/Eigen/PardisoSupport +17 -14
  17. package/eigen/Eigen/QR +18 -20
  18. package/eigen/Eigen/QtAlignedMalloc +5 -12
  19. package/eigen/Eigen/SPQRSupport +21 -14
  20. package/eigen/Eigen/SVD +23 -17
  21. package/eigen/Eigen/Sparse +1 -2
  22. package/eigen/Eigen/SparseCholesky +18 -15
  23. package/eigen/Eigen/SparseCore +18 -17
  24. package/eigen/Eigen/SparseLU +9 -9
  25. package/eigen/Eigen/SparseQR +16 -14
  26. package/eigen/Eigen/StdDeque +5 -2
  27. package/eigen/Eigen/StdList +5 -2
  28. package/eigen/Eigen/StdVector +5 -2
  29. package/eigen/Eigen/SuperLUSupport +30 -24
  30. package/eigen/Eigen/ThreadPool +80 -0
  31. package/eigen/Eigen/UmfPackSupport +19 -17
  32. package/eigen/Eigen/Version +14 -0
  33. package/eigen/Eigen/src/AccelerateSupport/AccelerateSupport.h +423 -0
  34. package/eigen/Eigen/src/AccelerateSupport/InternalHeaderCheck.h +3 -0
  35. package/eigen/Eigen/src/Cholesky/InternalHeaderCheck.h +3 -0
  36. package/eigen/Eigen/src/Cholesky/LDLT.h +366 -405
  37. package/eigen/Eigen/src/Cholesky/LLT.h +323 -367
  38. package/eigen/Eigen/src/Cholesky/LLT_LAPACKE.h +81 -56
  39. package/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +585 -529
  40. package/eigen/Eigen/src/CholmodSupport/InternalHeaderCheck.h +3 -0
  41. package/eigen/Eigen/src/Core/ArithmeticSequence.h +143 -317
  42. package/eigen/Eigen/src/Core/Array.h +329 -370
  43. package/eigen/Eigen/src/Core/ArrayBase.h +190 -203
  44. package/eigen/Eigen/src/Core/ArrayWrapper.h +126 -170
  45. package/eigen/Eigen/src/Core/Assign.h +30 -40
  46. package/eigen/Eigen/src/Core/AssignEvaluator.h +651 -604
  47. package/eigen/Eigen/src/Core/Assign_MKL.h +125 -120
  48. package/eigen/Eigen/src/Core/BandMatrix.h +267 -282
  49. package/eigen/Eigen/src/Core/Block.h +371 -390
  50. package/eigen/Eigen/src/Core/CommaInitializer.h +85 -100
  51. package/eigen/Eigen/src/Core/ConditionEstimator.h +51 -53
  52. package/eigen/Eigen/src/Core/CoreEvaluators.h +1214 -937
  53. package/eigen/Eigen/src/Core/CoreIterators.h +72 -63
  54. package/eigen/Eigen/src/Core/CwiseBinaryOp.h +112 -129
  55. package/eigen/Eigen/src/Core/CwiseNullaryOp.h +676 -702
  56. package/eigen/Eigen/src/Core/CwiseTernaryOp.h +77 -103
  57. package/eigen/Eigen/src/Core/CwiseUnaryOp.h +55 -67
  58. package/eigen/Eigen/src/Core/CwiseUnaryView.h +127 -92
  59. package/eigen/Eigen/src/Core/DenseBase.h +630 -658
  60. package/eigen/Eigen/src/Core/DenseCoeffsBase.h +511 -628
  61. package/eigen/Eigen/src/Core/DenseStorage.h +511 -590
  62. package/eigen/Eigen/src/Core/DeviceWrapper.h +153 -0
  63. package/eigen/Eigen/src/Core/Diagonal.h +168 -207
  64. package/eigen/Eigen/src/Core/DiagonalMatrix.h +346 -317
  65. package/eigen/Eigen/src/Core/DiagonalProduct.h +12 -10
  66. package/eigen/Eigen/src/Core/Dot.h +167 -217
  67. package/eigen/Eigen/src/Core/EigenBase.h +74 -85
  68. package/eigen/Eigen/src/Core/Fill.h +138 -0
  69. package/eigen/Eigen/src/Core/FindCoeff.h +464 -0
  70. package/eigen/Eigen/src/Core/ForceAlignedAccess.h +90 -113
  71. package/eigen/Eigen/src/Core/Fuzzy.h +82 -105
  72. package/eigen/Eigen/src/Core/GeneralProduct.h +315 -261
  73. package/eigen/Eigen/src/Core/GenericPacketMath.h +1182 -520
  74. package/eigen/Eigen/src/Core/GlobalFunctions.h +193 -157
  75. package/eigen/Eigen/src/Core/IO.h +131 -156
  76. package/eigen/Eigen/src/Core/IndexedView.h +209 -125
  77. package/eigen/Eigen/src/Core/InnerProduct.h +260 -0
  78. package/eigen/Eigen/src/Core/InternalHeaderCheck.h +3 -0
  79. package/eigen/Eigen/src/Core/Inverse.h +50 -59
  80. package/eigen/Eigen/src/Core/Map.h +123 -141
  81. package/eigen/Eigen/src/Core/MapBase.h +255 -282
  82. package/eigen/Eigen/src/Core/MathFunctions.h +1247 -1201
  83. package/eigen/Eigen/src/Core/MathFunctionsImpl.h +162 -99
  84. package/eigen/Eigen/src/Core/Matrix.h +463 -494
  85. package/eigen/Eigen/src/Core/MatrixBase.h +468 -470
  86. package/eigen/Eigen/src/Core/NestByValue.h +58 -52
  87. package/eigen/Eigen/src/Core/NoAlias.h +79 -86
  88. package/eigen/Eigen/src/Core/NumTraits.h +206 -206
  89. package/eigen/Eigen/src/Core/PartialReduxEvaluator.h +163 -142
  90. package/eigen/Eigen/src/Core/PermutationMatrix.h +461 -511
  91. package/eigen/Eigen/src/Core/PlainObjectBase.h +858 -972
  92. package/eigen/Eigen/src/Core/Product.h +246 -130
  93. package/eigen/Eigen/src/Core/ProductEvaluators.h +779 -671
  94. package/eigen/Eigen/src/Core/Random.h +153 -164
  95. package/eigen/Eigen/src/Core/RandomImpl.h +262 -0
  96. package/eigen/Eigen/src/Core/RealView.h +250 -0
  97. package/eigen/Eigen/src/Core/Redux.h +334 -314
  98. package/eigen/Eigen/src/Core/Ref.h +259 -257
  99. package/eigen/Eigen/src/Core/Replicate.h +92 -104
  100. package/eigen/Eigen/src/Core/Reshaped.h +215 -271
  101. package/eigen/Eigen/src/Core/ReturnByValue.h +47 -55
  102. package/eigen/Eigen/src/Core/Reverse.h +133 -148
  103. package/eigen/Eigen/src/Core/Select.h +68 -140
  104. package/eigen/Eigen/src/Core/SelfAdjointView.h +254 -290
  105. package/eigen/Eigen/src/Core/SelfCwiseBinaryOp.h +23 -20
  106. package/eigen/Eigen/src/Core/SkewSymmetricMatrix3.h +382 -0
  107. package/eigen/Eigen/src/Core/Solve.h +88 -102
  108. package/eigen/Eigen/src/Core/SolveTriangular.h +126 -124
  109. package/eigen/Eigen/src/Core/SolverBase.h +132 -133
  110. package/eigen/Eigen/src/Core/StableNorm.h +113 -147
  111. package/eigen/Eigen/src/Core/StlIterators.h +404 -248
  112. package/eigen/Eigen/src/Core/Stride.h +90 -92
  113. package/eigen/Eigen/src/Core/Swap.h +70 -39
  114. package/eigen/Eigen/src/Core/Transpose.h +258 -295
  115. package/eigen/Eigen/src/Core/Transpositions.h +270 -333
  116. package/eigen/Eigen/src/Core/TriangularMatrix.h +642 -743
  117. package/eigen/Eigen/src/Core/VectorBlock.h +59 -72
  118. package/eigen/Eigen/src/Core/VectorwiseOp.h +653 -704
  119. package/eigen/Eigen/src/Core/Visitor.h +464 -308
  120. package/eigen/Eigen/src/Core/arch/AVX/Complex.h +380 -187
  121. package/eigen/Eigen/src/Core/arch/AVX/MathFunctions.h +65 -163
  122. package/eigen/Eigen/src/Core/arch/AVX/PacketMath.h +2145 -638
  123. package/eigen/Eigen/src/Core/arch/AVX/Reductions.h +353 -0
  124. package/eigen/Eigen/src/Core/arch/AVX/TypeCasting.h +253 -60
  125. package/eigen/Eigen/src/Core/arch/AVX512/Complex.h +278 -228
  126. package/eigen/Eigen/src/Core/arch/AVX512/GemmKernel.h +1245 -0
  127. package/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h +48 -269
  128. package/eigen/Eigen/src/Core/arch/AVX512/MathFunctionsFP16.h +75 -0
  129. package/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h +1597 -754
  130. package/eigen/Eigen/src/Core/arch/AVX512/PacketMathFP16.h +1413 -0
  131. package/eigen/Eigen/src/Core/arch/AVX512/Reductions.h +297 -0
  132. package/eigen/Eigen/src/Core/arch/AVX512/TrsmKernel.h +1167 -0
  133. package/eigen/Eigen/src/Core/arch/AVX512/TrsmUnrolls.inc +1219 -0
  134. package/eigen/Eigen/src/Core/arch/AVX512/TypeCasting.h +229 -41
  135. package/eigen/Eigen/src/Core/arch/AVX512/TypeCastingFP16.h +130 -0
  136. package/eigen/Eigen/src/Core/arch/AltiVec/Complex.h +420 -184
  137. package/eigen/Eigen/src/Core/arch/AltiVec/MathFunctions.h +40 -49
  138. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProduct.h +2962 -2213
  139. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h +196 -212
  140. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h +713 -441
  141. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMAbfloat16.h +742 -0
  142. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixVectorProduct.inc +2818 -0
  143. package/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h +2380 -1362
  144. package/eigen/Eigen/src/Core/arch/AltiVec/TypeCasting.h +153 -0
  145. package/eigen/Eigen/src/Core/arch/Default/BFloat16.h +390 -224
  146. package/eigen/Eigen/src/Core/arch/Default/ConjHelper.h +78 -67
  147. package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +1784 -799
  148. package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +167 -50
  149. package/eigen/Eigen/src/Core/arch/Default/Half.h +528 -379
  150. package/eigen/Eigen/src/Core/arch/Default/Settings.h +10 -12
  151. package/eigen/Eigen/src/Core/arch/GPU/Complex.h +244 -0
  152. package/eigen/Eigen/src/Core/arch/GPU/MathFunctions.h +41 -40
  153. package/eigen/Eigen/src/Core/arch/GPU/PacketMath.h +550 -523
  154. package/eigen/Eigen/src/Core/arch/GPU/Tuple.h +268 -0
  155. package/eigen/Eigen/src/Core/arch/GPU/TypeCasting.h +27 -30
  156. package/eigen/Eigen/src/Core/arch/HIP/hcc/math_constants.h +8 -8
  157. package/eigen/Eigen/src/Core/arch/HVX/PacketMath.h +1088 -0
  158. package/eigen/Eigen/src/Core/arch/LSX/Complex.h +520 -0
  159. package/eigen/Eigen/src/Core/arch/LSX/GeneralBlockPanelKernel.h +23 -0
  160. package/eigen/Eigen/src/Core/arch/LSX/MathFunctions.h +43 -0
  161. package/eigen/Eigen/src/Core/arch/LSX/PacketMath.h +2866 -0
  162. package/eigen/Eigen/src/Core/arch/LSX/TypeCasting.h +526 -0
  163. package/eigen/Eigen/src/Core/arch/MSA/Complex.h +54 -82
  164. package/eigen/Eigen/src/Core/arch/MSA/MathFunctions.h +84 -92
  165. package/eigen/Eigen/src/Core/arch/MSA/PacketMath.h +51 -47
  166. package/eigen/Eigen/src/Core/arch/NEON/Complex.h +454 -306
  167. package/eigen/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h +175 -115
  168. package/eigen/Eigen/src/Core/arch/NEON/MathFunctions.h +23 -30
  169. package/eigen/Eigen/src/Core/arch/NEON/PacketMath.h +4366 -2857
  170. package/eigen/Eigen/src/Core/arch/NEON/TypeCasting.h +616 -393
  171. package/eigen/Eigen/src/Core/arch/NEON/UnaryFunctors.h +57 -0
  172. package/eigen/Eigen/src/Core/arch/SSE/Complex.h +350 -198
  173. package/eigen/Eigen/src/Core/arch/SSE/MathFunctions.h +38 -149
  174. package/eigen/Eigen/src/Core/arch/SSE/PacketMath.h +1791 -912
  175. package/eigen/Eigen/src/Core/arch/SSE/Reductions.h +324 -0
  176. package/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h +128 -40
  177. package/eigen/Eigen/src/Core/arch/SVE/MathFunctions.h +10 -6
  178. package/eigen/Eigen/src/Core/arch/SVE/PacketMath.h +156 -234
  179. package/eigen/Eigen/src/Core/arch/SVE/TypeCasting.h +6 -3
  180. package/eigen/Eigen/src/Core/arch/SYCL/InteropHeaders.h +27 -32
  181. package/eigen/Eigen/src/Core/arch/SYCL/MathFunctions.h +119 -117
  182. package/eigen/Eigen/src/Core/arch/SYCL/PacketMath.h +325 -419
  183. package/eigen/Eigen/src/Core/arch/SYCL/TypeCasting.h +15 -17
  184. package/eigen/Eigen/src/Core/arch/ZVector/Complex.h +325 -181
  185. package/eigen/Eigen/src/Core/arch/ZVector/MathFunctions.h +94 -83
  186. package/eigen/Eigen/src/Core/arch/ZVector/PacketMath.h +811 -458
  187. package/eigen/Eigen/src/Core/functors/AssignmentFunctors.h +121 -124
  188. package/eigen/Eigen/src/Core/functors/BinaryFunctors.h +576 -370
  189. package/eigen/Eigen/src/Core/functors/NullaryFunctors.h +194 -109
  190. package/eigen/Eigen/src/Core/functors/StlFunctors.h +95 -112
  191. package/eigen/Eigen/src/Core/functors/TernaryFunctors.h +34 -7
  192. package/eigen/Eigen/src/Core/functors/UnaryFunctors.h +1038 -749
  193. package/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h +1883 -1375
  194. package/eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h +312 -370
  195. package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +189 -176
  196. package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +84 -81
  197. package/eigen/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h +154 -73
  198. package/eigen/Eigen/src/Core/products/GeneralMatrixVector.h +292 -337
  199. package/eigen/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h +80 -77
  200. package/eigen/Eigen/src/Core/products/Parallelizer.h +207 -105
  201. package/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +327 -388
  202. package/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h +206 -224
  203. package/eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h +138 -147
  204. package/eigen/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h +58 -61
  205. package/eigen/Eigen/src/Core/products/SelfadjointProduct.h +71 -71
  206. package/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h +48 -47
  207. package/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h +294 -369
  208. package/eigen/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h +246 -238
  209. package/eigen/Eigen/src/Core/products/TriangularMatrixVector.h +244 -247
  210. package/eigen/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h +212 -192
  211. package/eigen/Eigen/src/Core/products/TriangularSolverMatrix.h +328 -277
  212. package/eigen/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h +108 -109
  213. package/eigen/Eigen/src/Core/products/TriangularSolverVector.h +68 -94
  214. package/eigen/Eigen/src/Core/util/Assert.h +158 -0
  215. package/eigen/Eigen/src/Core/util/BlasUtil.h +342 -303
  216. package/eigen/Eigen/src/Core/util/ConfigureVectorization.h +348 -317
  217. package/eigen/Eigen/src/Core/util/Constants.h +297 -262
  218. package/eigen/Eigen/src/Core/util/DisableStupidWarnings.h +130 -90
  219. package/eigen/Eigen/src/Core/util/EmulateArray.h +270 -0
  220. package/eigen/Eigen/src/Core/util/ForwardDeclarations.h +449 -247
  221. package/eigen/Eigen/src/Core/util/GpuHipCudaDefines.inc +101 -0
  222. package/eigen/Eigen/src/Core/util/GpuHipCudaUndefines.inc +45 -0
  223. package/eigen/Eigen/src/Core/util/IndexedViewHelper.h +417 -116
  224. package/eigen/Eigen/src/Core/util/IntegralConstant.h +211 -204
  225. package/eigen/Eigen/src/Core/util/MKL_support.h +39 -37
  226. package/eigen/Eigen/src/Core/util/Macros.h +655 -773
  227. package/eigen/Eigen/src/Core/util/MaxSizeVector.h +139 -0
  228. package/eigen/Eigen/src/Core/util/Memory.h +970 -748
  229. package/eigen/Eigen/src/Core/util/Meta.h +581 -633
  230. package/eigen/Eigen/src/Core/util/MoreMeta.h +638 -0
  231. package/eigen/Eigen/src/Core/util/ReenableStupidWarnings.h +32 -19
  232. package/eigen/Eigen/src/Core/util/ReshapedHelper.h +17 -17
  233. package/eigen/Eigen/src/Core/util/Serializer.h +209 -0
  234. package/eigen/Eigen/src/Core/util/StaticAssert.h +50 -166
  235. package/eigen/Eigen/src/Core/util/SymbolicIndex.h +377 -225
  236. package/eigen/Eigen/src/Core/util/XprHelper.h +784 -547
  237. package/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +246 -277
  238. package/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +299 -319
  239. package/eigen/Eigen/src/Eigenvalues/ComplexSchur_LAPACKE.h +52 -48
  240. package/eigen/Eigen/src/Eigenvalues/EigenSolver.h +413 -456
  241. package/eigen/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h +309 -325
  242. package/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +157 -171
  243. package/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +292 -310
  244. package/eigen/Eigen/src/Eigenvalues/InternalHeaderCheck.h +3 -0
  245. package/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +89 -105
  246. package/eigen/Eigen/src/Eigenvalues/RealQZ.h +537 -607
  247. package/eigen/Eigen/src/Eigenvalues/RealSchur.h +342 -381
  248. package/eigen/Eigen/src/Eigenvalues/RealSchur_LAPACKE.h +41 -35
  249. package/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +541 -595
  250. package/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h +47 -44
  251. package/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +430 -462
  252. package/eigen/Eigen/src/Geometry/AlignedBox.h +226 -227
  253. package/eigen/Eigen/src/Geometry/AngleAxis.h +131 -133
  254. package/eigen/Eigen/src/Geometry/EulerAngles.h +163 -74
  255. package/eigen/Eigen/src/Geometry/Homogeneous.h +285 -333
  256. package/eigen/Eigen/src/Geometry/Hyperplane.h +151 -160
  257. package/eigen/Eigen/src/Geometry/InternalHeaderCheck.h +3 -0
  258. package/eigen/Eigen/src/Geometry/OrthoMethods.h +168 -146
  259. package/eigen/Eigen/src/Geometry/ParametrizedLine.h +127 -127
  260. package/eigen/Eigen/src/Geometry/Quaternion.h +566 -506
  261. package/eigen/Eigen/src/Geometry/Rotation2D.h +107 -105
  262. package/eigen/Eigen/src/Geometry/RotationBase.h +148 -145
  263. package/eigen/Eigen/src/Geometry/Scaling.h +113 -106
  264. package/eigen/Eigen/src/Geometry/Transform.h +858 -936
  265. package/eigen/Eigen/src/Geometry/Translation.h +94 -92
  266. package/eigen/Eigen/src/Geometry/Umeyama.h +79 -84
  267. package/eigen/Eigen/src/Geometry/arch/Geometry_SIMD.h +90 -104
  268. package/eigen/Eigen/src/Householder/BlockHouseholder.h +51 -46
  269. package/eigen/Eigen/src/Householder/Householder.h +102 -124
  270. package/eigen/Eigen/src/Householder/HouseholderSequence.h +412 -453
  271. package/eigen/Eigen/src/Householder/InternalHeaderCheck.h +3 -0
  272. package/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +149 -162
  273. package/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +124 -119
  274. package/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +92 -104
  275. package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +251 -243
  276. package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +224 -228
  277. package/eigen/Eigen/src/IterativeLinearSolvers/InternalHeaderCheck.h +3 -0
  278. package/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +178 -227
  279. package/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +79 -84
  280. package/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +54 -60
  281. package/eigen/Eigen/src/Jacobi/InternalHeaderCheck.h +3 -0
  282. package/eigen/Eigen/src/Jacobi/Jacobi.h +252 -308
  283. package/eigen/Eigen/src/KLUSupport/InternalHeaderCheck.h +3 -0
  284. package/eigen/Eigen/src/KLUSupport/KLUSupport.h +208 -227
  285. package/eigen/Eigen/src/LU/Determinant.h +50 -69
  286. package/eigen/Eigen/src/LU/FullPivLU.h +545 -596
  287. package/eigen/Eigen/src/LU/InternalHeaderCheck.h +3 -0
  288. package/eigen/Eigen/src/LU/InverseImpl.h +206 -285
  289. package/eigen/Eigen/src/LU/PartialPivLU.h +390 -428
  290. package/eigen/Eigen/src/LU/PartialPivLU_LAPACKE.h +54 -40
  291. package/eigen/Eigen/src/LU/arch/InverseSize4.h +72 -70
  292. package/eigen/Eigen/src/MetisSupport/InternalHeaderCheck.h +3 -0
  293. package/eigen/Eigen/src/MetisSupport/MetisSupport.h +81 -93
  294. package/eigen/Eigen/src/OrderingMethods/Amd.h +243 -265
  295. package/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +831 -1004
  296. package/eigen/Eigen/src/OrderingMethods/InternalHeaderCheck.h +3 -0
  297. package/eigen/Eigen/src/OrderingMethods/Ordering.h +112 -119
  298. package/eigen/Eigen/src/PaStiXSupport/InternalHeaderCheck.h +3 -0
  299. package/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +524 -570
  300. package/eigen/Eigen/src/PardisoSupport/InternalHeaderCheck.h +3 -0
  301. package/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +385 -430
  302. package/eigen/Eigen/src/QR/ColPivHouseholderQR.h +479 -479
  303. package/eigen/Eigen/src/QR/ColPivHouseholderQR_LAPACKE.h +120 -56
  304. package/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +166 -153
  305. package/eigen/Eigen/src/QR/FullPivHouseholderQR.h +495 -475
  306. package/eigen/Eigen/src/QR/HouseholderQR.h +394 -285
  307. package/eigen/Eigen/src/QR/HouseholderQR_LAPACKE.h +32 -23
  308. package/eigen/Eigen/src/QR/InternalHeaderCheck.h +3 -0
  309. package/eigen/Eigen/src/SPQRSupport/InternalHeaderCheck.h +3 -0
  310. package/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +244 -264
  311. package/eigen/Eigen/src/SVD/BDCSVD.h +817 -713
  312. package/eigen/Eigen/src/SVD/BDCSVD_LAPACKE.h +174 -0
  313. package/eigen/Eigen/src/SVD/InternalHeaderCheck.h +3 -0
  314. package/eigen/Eigen/src/SVD/JacobiSVD.h +577 -543
  315. package/eigen/Eigen/src/SVD/JacobiSVD_LAPACKE.h +85 -49
  316. package/eigen/Eigen/src/SVD/SVDBase.h +242 -182
  317. package/eigen/Eigen/src/SVD/UpperBidiagonalization.h +200 -235
  318. package/eigen/Eigen/src/SparseCholesky/InternalHeaderCheck.h +3 -0
  319. package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +765 -594
  320. package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +308 -94
  321. package/eigen/Eigen/src/SparseCore/AmbiVector.h +202 -251
  322. package/eigen/Eigen/src/SparseCore/CompressedStorage.h +184 -252
  323. package/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +134 -178
  324. package/eigen/Eigen/src/SparseCore/InternalHeaderCheck.h +3 -0
  325. package/eigen/Eigen/src/SparseCore/SparseAssign.h +149 -140
  326. package/eigen/Eigen/src/SparseCore/SparseBlock.h +403 -440
  327. package/eigen/Eigen/src/SparseCore/SparseColEtree.h +100 -112
  328. package/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +525 -303
  329. package/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +555 -339
  330. package/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +100 -108
  331. package/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +169 -197
  332. package/eigen/Eigen/src/SparseCore/SparseDiagonalProduct.h +71 -71
  333. package/eigen/Eigen/src/SparseCore/SparseDot.h +49 -47
  334. package/eigen/Eigen/src/SparseCore/SparseFuzzy.h +13 -11
  335. package/eigen/Eigen/src/SparseCore/SparseMap.h +243 -253
  336. package/eigen/Eigen/src/SparseCore/SparseMatrix.h +1603 -1245
  337. package/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +403 -350
  338. package/eigen/Eigen/src/SparseCore/SparsePermutation.h +186 -115
  339. package/eigen/Eigen/src/SparseCore/SparseProduct.h +94 -97
  340. package/eigen/Eigen/src/SparseCore/SparseRedux.h +22 -24
  341. package/eigen/Eigen/src/SparseCore/SparseRef.h +268 -295
  342. package/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +370 -416
  343. package/eigen/Eigen/src/SparseCore/SparseSolverBase.h +78 -87
  344. package/eigen/Eigen/src/SparseCore/SparseSparseProductWithPruning.h +81 -95
  345. package/eigen/Eigen/src/SparseCore/SparseTranspose.h +62 -71
  346. package/eigen/Eigen/src/SparseCore/SparseTriangularView.h +132 -144
  347. package/eigen/Eigen/src/SparseCore/SparseUtil.h +138 -115
  348. package/eigen/Eigen/src/SparseCore/SparseVector.h +426 -372
  349. package/eigen/Eigen/src/SparseCore/SparseView.h +164 -193
  350. package/eigen/Eigen/src/SparseCore/TriangularSolver.h +129 -170
  351. package/eigen/Eigen/src/SparseLU/InternalHeaderCheck.h +3 -0
  352. package/eigen/Eigen/src/SparseLU/SparseLU.h +756 -710
  353. package/eigen/Eigen/src/SparseLU/SparseLUImpl.h +61 -48
  354. package/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +102 -118
  355. package/eigen/Eigen/src/SparseLU/SparseLU_Structs.h +38 -35
  356. package/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +245 -301
  357. package/eigen/Eigen/src/SparseLU/SparseLU_Utils.h +44 -49
  358. package/eigen/Eigen/src/SparseLU/SparseLU_column_bmod.h +104 -108
  359. package/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +89 -100
  360. package/eigen/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h +57 -58
  361. package/eigen/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h +43 -55
  362. package/eigen/Eigen/src/SparseLU/SparseLU_kernel_bmod.h +74 -71
  363. package/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +124 -132
  364. package/eigen/Eigen/src/SparseLU/SparseLU_panel_dfs.h +136 -159
  365. package/eigen/Eigen/src/SparseLU/SparseLU_pivotL.h +51 -52
  366. package/eigen/Eigen/src/SparseLU/SparseLU_pruneL.h +67 -73
  367. package/eigen/Eigen/src/SparseLU/SparseLU_relax_snode.h +24 -26
  368. package/eigen/Eigen/src/SparseQR/InternalHeaderCheck.h +3 -0
  369. package/eigen/Eigen/src/SparseQR/SparseQR.h +450 -502
  370. package/eigen/Eigen/src/StlSupport/StdDeque.h +28 -93
  371. package/eigen/Eigen/src/StlSupport/StdList.h +28 -84
  372. package/eigen/Eigen/src/StlSupport/StdVector.h +28 -108
  373. package/eigen/Eigen/src/StlSupport/details.h +48 -50
  374. package/eigen/Eigen/src/SuperLUSupport/InternalHeaderCheck.h +3 -0
  375. package/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +634 -730
  376. package/eigen/Eigen/src/ThreadPool/Barrier.h +70 -0
  377. package/eigen/Eigen/src/ThreadPool/CoreThreadPoolDevice.h +336 -0
  378. package/eigen/Eigen/src/ThreadPool/EventCount.h +241 -0
  379. package/eigen/Eigen/src/ThreadPool/ForkJoin.h +140 -0
  380. package/eigen/Eigen/src/ThreadPool/InternalHeaderCheck.h +4 -0
  381. package/eigen/Eigen/src/ThreadPool/NonBlockingThreadPool.h +587 -0
  382. package/eigen/Eigen/src/ThreadPool/RunQueue.h +230 -0
  383. package/eigen/Eigen/src/ThreadPool/ThreadCancel.h +21 -0
  384. package/eigen/Eigen/src/ThreadPool/ThreadEnvironment.h +43 -0
  385. package/eigen/Eigen/src/ThreadPool/ThreadLocal.h +289 -0
  386. package/eigen/Eigen/src/ThreadPool/ThreadPoolInterface.h +50 -0
  387. package/eigen/Eigen/src/ThreadPool/ThreadYield.h +16 -0
  388. package/eigen/Eigen/src/UmfPackSupport/InternalHeaderCheck.h +3 -0
  389. package/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +428 -464
  390. package/eigen/Eigen/src/misc/Image.h +41 -43
  391. package/eigen/Eigen/src/misc/InternalHeaderCheck.h +3 -0
  392. package/eigen/Eigen/src/misc/Kernel.h +39 -41
  393. package/eigen/Eigen/src/misc/RealSvd2x2.h +19 -21
  394. package/eigen/Eigen/src/misc/blas.h +83 -426
  395. package/eigen/Eigen/src/misc/lapacke.h +9972 -16179
  396. package/eigen/Eigen/src/misc/lapacke_helpers.h +163 -0
  397. package/eigen/Eigen/src/misc/lapacke_mangling.h +4 -5
  398. package/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.inc +344 -0
  399. package/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.inc +544 -0
  400. package/eigen/Eigen/src/plugins/{BlockMethods.h → BlockMethods.inc} +434 -506
  401. package/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.inc +116 -0
  402. package/eigen/Eigen/src/plugins/{CommonCwiseUnaryOps.h → CommonCwiseUnaryOps.inc} +58 -68
  403. package/eigen/Eigen/src/plugins/IndexedViewMethods.inc +192 -0
  404. package/eigen/Eigen/src/plugins/InternalHeaderCheck.inc +3 -0
  405. package/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.inc +331 -0
  406. package/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.inc +118 -0
  407. package/eigen/Eigen/src/plugins/ReshapedMethods.inc +133 -0
  408. package/package.json +1 -1
  409. package/eigen/COPYING.APACHE +0 -203
  410. package/eigen/COPYING.BSD +0 -26
  411. package/eigen/COPYING.GPL +0 -674
  412. package/eigen/COPYING.LGPL +0 -502
  413. package/eigen/COPYING.MINPACK +0 -51
  414. package/eigen/COPYING.MPL2 +0 -373
  415. package/eigen/COPYING.README +0 -18
  416. package/eigen/Eigen/src/Core/BooleanRedux.h +0 -162
  417. package/eigen/Eigen/src/Core/arch/CUDA/Complex.h +0 -258
  418. package/eigen/Eigen/src/Core/arch/Default/TypeCasting.h +0 -120
  419. package/eigen/Eigen/src/Core/arch/SYCL/SyclMemoryModel.h +0 -694
  420. package/eigen/Eigen/src/Core/util/NonMPL2.h +0 -3
  421. package/eigen/Eigen/src/SparseCore/MappedSparseMatrix.h +0 -67
  422. package/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +0 -280
  423. package/eigen/Eigen/src/misc/lapack.h +0 -152
  424. package/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +0 -358
  425. package/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +0 -696
  426. package/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.h +0 -115
  427. package/eigen/Eigen/src/plugins/IndexedViewMethods.h +0 -262
  428. package/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +0 -152
  429. package/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +0 -95
  430. package/eigen/Eigen/src/plugins/ReshapedMethods.h +0 -149
  431. package/eigen/README.md +0 -5
@@ -21,190 +21,84 @@
21
21
  #ifndef EIGEN_PACKET_MATH_SYCL_H
22
22
  #define EIGEN_PACKET_MATH_SYCL_H
23
23
  #include <type_traits>
24
- namespace Eigen {
25
-
26
- namespace internal {
27
- #ifdef SYCL_DEVICE_ONLY
28
24
 
29
- #define SYCL_PLOADT_RO(address_space_target) \
30
- template <typename packet_type, int Alignment> \
31
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type ploadt_ro( \
32
- typename cl::sycl::multi_ptr< \
33
- const typename unpacket_traits<packet_type>::type, \
34
- cl::sycl::access::address_space::address_space_target>::pointer_t \
35
- from) { \
36
- typedef typename unpacket_traits<packet_type>::type scalar; \
37
- typedef cl::sycl::multi_ptr< \
38
- scalar, cl::sycl::access::address_space::address_space_target> \
39
- multi_ptr; \
40
- auto res = packet_type( \
41
- static_cast<typename unpacket_traits<packet_type>::type>(0)); \
42
- res.load(0, multi_ptr(const_cast<typename multi_ptr::pointer_t>(from))); \
43
- return res; \
44
- }
45
-
46
- SYCL_PLOADT_RO(global_space)
47
- SYCL_PLOADT_RO(local_space)
48
- #undef SYCL_PLOADT_RO
49
- #endif
25
+ // IWYU pragma: private
26
+ #include "../../InternalHeaderCheck.h"
50
27
 
51
- template <typename packet_type, int Alignment, typename T>
52
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type
53
- ploadt_ro(const Eigen::TensorSycl::internal::RangeAccess<
54
- cl::sycl::access::mode::read_write, T>& from) {
55
- return ploadt_ro<packet_type, Alignment>(from.get_pointer());
56
- }
28
+ namespace Eigen {
57
29
 
30
+ namespace internal {
58
31
  #ifdef SYCL_DEVICE_ONLY
59
- #define SYCL_PLOAD(address_space_target, Alignment, AlignedType) \
60
- template <typename packet_type> \
61
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type pload##AlignedType( \
62
- typename cl::sycl::multi_ptr< \
63
- const typename unpacket_traits<packet_type>::type, \
64
- cl::sycl::access::address_space::address_space_target>::pointer_t \
65
- from) { \
66
- return ploadt_ro<packet_type, Alignment>(from); \
32
+ #define SYCL_PLOAD(packet_type, AlignedType) \
33
+ template <> \
34
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type pload##AlignedType<packet_type>( \
35
+ const typename unpacket_traits<packet_type>::type* from) { \
36
+ auto ptr = \
37
+ cl::sycl::address_space_cast<cl::sycl::access::address_space::generic_space, cl::sycl::access::decorated::no>( \
38
+ from); \
39
+ packet_type res{}; \
40
+ res.load(0, ptr); \
41
+ return res; \
67
42
  }
68
43
 
69
- // global space
70
- SYCL_PLOAD(global_space, Unaligned, u)
71
- SYCL_PLOAD(global_space, Aligned, )
72
- // local space
73
- SYCL_PLOAD(local_space, Unaligned, u)
74
- SYCL_PLOAD(local_space, Aligned, )
75
-
76
- #undef SYCL_PLOAD
77
- #endif
78
-
79
- #define SYCL_PLOAD(Alignment, AlignedType) \
80
- template <typename packet_type> \
81
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type pload##AlignedType( \
82
- const Eigen::TensorSycl::internal::RangeAccess< \
83
- cl::sycl::access::mode::read_write, \
84
- typename unpacket_traits<packet_type>::type> \
85
- from) { \
86
- return ploadt_ro<packet_type, Alignment>(from); \
87
- }
88
- SYCL_PLOAD(Unaligned, u)
89
- SYCL_PLOAD(Aligned, )
44
+ SYCL_PLOAD(cl::sycl::cl_float4, u)
45
+ SYCL_PLOAD(cl::sycl::cl_float4, )
46
+ SYCL_PLOAD(cl::sycl::cl_double2, u)
47
+ SYCL_PLOAD(cl::sycl::cl_double2, )
90
48
  #undef SYCL_PLOAD
91
49
 
92
- #ifdef SYCL_DEVICE_ONLY
93
- /** \internal \returns a packet version of \a *from.
94
- * The pointer \a from must be aligned on a \a Alignment bytes boundary. */
95
- #define SYCL_PLOADT(address_space_target) \
96
- template <typename packet_type, int Alignment> \
97
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type ploadt( \
98
- typename cl::sycl::multi_ptr< \
99
- const typename unpacket_traits<packet_type>::type, \
100
- cl::sycl::access::address_space::address_space_target>::pointer_t \
101
- from) { \
102
- if (Alignment >= unpacket_traits<packet_type>::alignment) \
103
- return pload<packet_type>(from); \
104
- else \
105
- return ploadu<packet_type>(from); \
106
- }
107
-
108
- // global space
109
- SYCL_PLOADT(global_space)
110
- // local space
111
- SYCL_PLOADT(local_space)
112
- #undef SYCL_PLOADT
113
- #endif
114
-
115
- template <typename packet_type, int Alignment>
116
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type
117
- ploadt(const Eigen::TensorSycl::internal::RangeAccess<
118
- cl::sycl::access::mode::read_write,
119
- typename unpacket_traits<packet_type>::type>& from) {
120
- return ploadt<packet_type, Alignment>(from.get_pointer());
50
+ template <>
51
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_half8 pload<cl::sycl::cl_half8>(
52
+ const typename unpacket_traits<cl::sycl::cl_half8>::type* from) {
53
+ auto ptr =
54
+ cl::sycl::address_space_cast<cl::sycl::access::address_space::generic_space, cl::sycl::access::decorated::no>(
55
+ reinterpret_cast<const cl::sycl::cl_half*>(from));
56
+ cl::sycl::cl_half8 res{};
57
+ res.load(0, ptr);
58
+ return res;
121
59
  }
122
- #ifdef SYCL_DEVICE_ONLY
123
60
 
124
- // private_space
125
- #define SYCL_PLOADT_RO_SPECIAL(packet_type, Alignment) \
126
- template <> \
127
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type \
128
- ploadt_ro<packet_type, Alignment>( \
129
- const typename unpacket_traits<packet_type>::type* from) { \
130
- typedef typename unpacket_traits<packet_type>::type scalar; \
131
- auto res = packet_type(static_cast<scalar>(0)); \
132
- res.template load<cl::sycl::access::address_space::private_space>( \
133
- 0, const_cast<scalar*>(from)); \
134
- return res; \
135
- }
136
-
137
- SYCL_PLOADT_RO_SPECIAL(cl::sycl::cl_float4, Aligned)
138
- SYCL_PLOADT_RO_SPECIAL(cl::sycl::cl_double2, Aligned)
139
- SYCL_PLOADT_RO_SPECIAL(cl::sycl::cl_float4, Unaligned)
140
- SYCL_PLOADT_RO_SPECIAL(cl::sycl::cl_double2, Unaligned)
61
+ template <>
62
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_half8 ploadu<cl::sycl::cl_half8>(
63
+ const typename unpacket_traits<cl::sycl::cl_half8>::type* from) {
64
+ auto ptr =
65
+ cl::sycl::address_space_cast<cl::sycl::access::address_space::generic_space, cl::sycl::access::decorated::no>(
66
+ reinterpret_cast<const cl::sycl::cl_half*>(from));
67
+ cl::sycl::cl_half8 res{};
68
+ res.load(0, ptr);
69
+ return res;
70
+ }
141
71
 
142
- #define SYCL_PLOAD_SPECIAL(packet_type, alignment_type) \
143
- template <> \
144
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type pload##alignment_type( \
145
- const typename unpacket_traits<packet_type>::type* from) { \
146
- typedef typename unpacket_traits<packet_type>::type scalar; \
147
- auto res = packet_type(static_cast<scalar>(0)); \
148
- res.template load<cl::sycl::access::address_space::private_space>( \
149
- 0, const_cast<scalar*>(from)); \
150
- return res; \
151
- }
152
- SYCL_PLOAD_SPECIAL(cl::sycl::cl_float4, )
153
- SYCL_PLOAD_SPECIAL(cl::sycl::cl_double2, )
154
- SYCL_PLOAD_SPECIAL(cl::sycl::cl_float4, u)
155
- SYCL_PLOAD_SPECIAL(cl::sycl::cl_double2, u)
156
-
157
- #undef SYCL_PLOAD_SPECIAL
158
-
159
- #define SYCL_PSTORE(scalar, packet_type, address_space_target, alignment) \
160
- template <> \
161
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstore##alignment( \
162
- typename cl::sycl::multi_ptr< \
163
- scalar, \
164
- cl::sycl::access::address_space::address_space_target>::pointer_t \
165
- to, \
166
- const packet_type& from) { \
167
- typedef cl::sycl::multi_ptr< \
168
- scalar, cl::sycl::access::address_space::address_space_target> \
169
- multi_ptr; \
170
- from.store(0, multi_ptr(to)); \
72
+ #define SYCL_PSTORE(scalar, packet_type, alignment) \
73
+ template <> \
74
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstore##alignment(scalar* to, const packet_type& from) { \
75
+ auto ptr = \
76
+ cl::sycl::address_space_cast<cl::sycl::access::address_space::generic_space, cl::sycl::access::decorated::no>( \
77
+ to); \
78
+ from.store(0, ptr); \
171
79
  }
172
80
 
173
- // global space
174
- SYCL_PSTORE(float, cl::sycl::cl_float4, global_space, )
175
- SYCL_PSTORE(float, cl::sycl::cl_float4, global_space, u)
176
- SYCL_PSTORE(double, cl::sycl::cl_double2, global_space, )
177
- SYCL_PSTORE(double, cl::sycl::cl_double2, global_space, u)
178
- SYCL_PSTORE(float, cl::sycl::cl_float4, local_space, )
179
- SYCL_PSTORE(float, cl::sycl::cl_float4, local_space, u)
180
- SYCL_PSTORE(double, cl::sycl::cl_double2, local_space, )
181
- SYCL_PSTORE(double, cl::sycl::cl_double2, local_space, u)
182
-
183
- SYCL_PSTORE(float, cl::sycl::cl_float4, private_space, )
184
- SYCL_PSTORE(float, cl::sycl::cl_float4, private_space, u)
185
- SYCL_PSTORE(double, cl::sycl::cl_double2, private_space, )
186
- SYCL_PSTORE(double, cl::sycl::cl_double2, private_space, u)
81
+ SYCL_PSTORE(float, cl::sycl::cl_float4, )
82
+ SYCL_PSTORE(float, cl::sycl::cl_float4, u)
83
+ SYCL_PSTORE(double, cl::sycl::cl_double2, )
84
+ SYCL_PSTORE(double, cl::sycl::cl_double2, u)
187
85
  #undef SYCL_PSTORE
188
86
 
189
- #define SYCL_PSTORE_T(address_space_target) \
190
- template <typename scalar, typename packet_type, int Alignment> \
191
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstoret( \
192
- typename cl::sycl::multi_ptr< \
193
- scalar, \
194
- cl::sycl::access::address_space::address_space_target>::pointer_t \
195
- to, \
196
- const packet_type& from) { \
197
- if (Alignment) \
198
- pstore(to, from); \
199
- else \
200
- pstoreu(to, from); \
201
- }
202
-
203
- SYCL_PSTORE_T(global_space)
204
-
205
- SYCL_PSTORE_T(local_space)
87
+ template <>
88
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstoreu(Eigen::half* to, const cl::sycl::cl_half8& from) {
89
+ auto ptr =
90
+ cl::sycl::address_space_cast<cl::sycl::access::address_space::generic_space, cl::sycl::access::decorated::no>(
91
+ reinterpret_cast<cl::sycl::cl_half*>(to));
92
+ from.store(0, ptr);
93
+ }
206
94
 
207
- #undef SYCL_PSTORE_T
95
+ template <>
96
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstore(Eigen::half* to, const cl::sycl::cl_half8& from) {
97
+ auto ptr =
98
+ cl::sycl::address_space_cast<cl::sycl::access::address_space::generic_space, cl::sycl::access::decorated::no>(
99
+ reinterpret_cast<cl::sycl::cl_half*>(to));
100
+ from.store(0, ptr);
101
+ }
208
102
 
209
103
  #define SYCL_PSET1(packet_type) \
210
104
  template <> \
@@ -214,6 +108,7 @@ SYCL_PSTORE_T(local_space)
214
108
  }
215
109
 
216
110
  // global space
111
+ SYCL_PSET1(cl::sycl::cl_half8)
217
112
  SYCL_PSET1(cl::sycl::cl_float4)
218
113
  SYCL_PSET1(cl::sycl::cl_double2)
219
114
 
@@ -222,41 +117,73 @@ SYCL_PSET1(cl::sycl::cl_double2)
222
117
  template <typename packet_type>
223
118
  struct get_base_packet {
224
119
  template <typename sycl_multi_pointer>
225
- static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type
226
- get_ploaddup(sycl_multi_pointer) {}
120
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type get_ploaddup(sycl_multi_pointer) {}
121
+
122
+ template <typename sycl_multi_pointer>
123
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type get_pgather(sycl_multi_pointer, Index) {}
124
+ };
125
+
126
+ template <>
127
+ struct get_base_packet<cl::sycl::cl_half8> {
128
+ template <typename sycl_multi_pointer>
129
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_half8 get_ploaddup(sycl_multi_pointer from) {
130
+ return cl::sycl::cl_half8(static_cast<cl::sycl::half>(from[0]), static_cast<cl::sycl::half>(from[0]),
131
+ static_cast<cl::sycl::half>(from[1]), static_cast<cl::sycl::half>(from[1]),
132
+ static_cast<cl::sycl::half>(from[2]), static_cast<cl::sycl::half>(from[2]),
133
+ static_cast<cl::sycl::half>(from[3]), static_cast<cl::sycl::half>(from[3]));
134
+ }
135
+ template <typename sycl_multi_pointer>
136
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_half8 get_pgather(sycl_multi_pointer from, Index stride) {
137
+ return cl::sycl::cl_half8(
138
+ static_cast<cl::sycl::half>(from[0 * stride]), static_cast<cl::sycl::half>(from[1 * stride]),
139
+ static_cast<cl::sycl::half>(from[2 * stride]), static_cast<cl::sycl::half>(from[3 * stride]),
140
+ static_cast<cl::sycl::half>(from[4 * stride]), static_cast<cl::sycl::half>(from[5 * stride]),
141
+ static_cast<cl::sycl::half>(from[6 * stride]), static_cast<cl::sycl::half>(from[7 * stride]));
142
+ }
227
143
 
228
144
  template <typename sycl_multi_pointer>
229
- static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type
230
- get_pgather(sycl_multi_pointer, Index) {}
145
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void set_pscatter(sycl_multi_pointer to, const cl::sycl::cl_half8& from,
146
+ Index stride) {
147
+ auto tmp = stride;
148
+ to[0] = Eigen::half(from.s0());
149
+ to[tmp] = Eigen::half(from.s1());
150
+ to[tmp += stride] = Eigen::half(from.s2());
151
+ to[tmp += stride] = Eigen::half(from.s3());
152
+ to[tmp += stride] = Eigen::half(from.s4());
153
+ to[tmp += stride] = Eigen::half(from.s5());
154
+ to[tmp += stride] = Eigen::half(from.s6());
155
+ to[tmp += stride] = Eigen::half(from.s7());
156
+ }
157
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_half8 set_plset(const cl::sycl::half& a) {
158
+ return cl::sycl::cl_half8(static_cast<cl::sycl::half>(a), static_cast<cl::sycl::half>(a + 1),
159
+ static_cast<cl::sycl::half>(a + 2), static_cast<cl::sycl::half>(a + 3),
160
+ static_cast<cl::sycl::half>(a + 4), static_cast<cl::sycl::half>(a + 5),
161
+ static_cast<cl::sycl::half>(a + 6), static_cast<cl::sycl::half>(a + 7));
162
+ }
231
163
  };
232
164
 
233
165
  template <>
234
166
  struct get_base_packet<cl::sycl::cl_float4> {
235
167
  template <typename sycl_multi_pointer>
236
- static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_float4 get_ploaddup(
237
- sycl_multi_pointer from) {
168
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_float4 get_ploaddup(sycl_multi_pointer from) {
238
169
  return cl::sycl::cl_float4(from[0], from[0], from[1], from[1]);
239
170
  }
240
171
  template <typename sycl_multi_pointer>
241
- static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_float4 get_pgather(
242
- sycl_multi_pointer from, Index stride) {
243
- return cl::sycl::cl_float4(from[0 * stride], from[1 * stride],
244
- from[2 * stride], from[3 * stride]);
172
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_float4 get_pgather(sycl_multi_pointer from, Index stride) {
173
+ return cl::sycl::cl_float4(from[0 * stride], from[1 * stride], from[2 * stride], from[3 * stride]);
245
174
  }
246
175
 
247
176
  template <typename sycl_multi_pointer>
248
- static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void set_pscatter(
249
- sycl_multi_pointer to, const cl::sycl::cl_float4& from, Index stride) {
177
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void set_pscatter(sycl_multi_pointer to, const cl::sycl::cl_float4& from,
178
+ Index stride) {
250
179
  auto tmp = stride;
251
180
  to[0] = from.x();
252
181
  to[tmp] = from.y();
253
182
  to[tmp += stride] = from.z();
254
183
  to[tmp += stride] = from.w();
255
184
  }
256
- static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_float4 set_plset(
257
- const float& a) {
258
- return cl::sycl::cl_float4(static_cast<float>(a), static_cast<float>(a + 1),
259
- static_cast<float>(a + 2),
185
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_float4 set_plset(const float& a) {
186
+ return cl::sycl::cl_float4(static_cast<float>(a), static_cast<float>(a + 1), static_cast<float>(a + 2),
260
187
  static_cast<float>(a + 3));
261
188
  }
262
189
  };
@@ -264,47 +191,28 @@ struct get_base_packet<cl::sycl::cl_float4> {
264
191
  template <>
265
192
  struct get_base_packet<cl::sycl::cl_double2> {
266
193
  template <typename sycl_multi_pointer>
267
- static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_double2
268
- get_ploaddup(const sycl_multi_pointer from) {
194
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_double2 get_ploaddup(const sycl_multi_pointer from) {
269
195
  return cl::sycl::cl_double2(from[0], from[0]);
270
196
  }
271
197
 
272
198
  template <typename sycl_multi_pointer, typename Index>
273
- static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_double2 get_pgather(
274
- const sycl_multi_pointer from, Index stride) {
199
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_double2 get_pgather(const sycl_multi_pointer from,
200
+ Index stride) {
275
201
  return cl::sycl::cl_double2(from[0 * stride], from[1 * stride]);
276
202
  }
277
203
 
278
204
  template <typename sycl_multi_pointer>
279
- static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void set_pscatter(
280
- sycl_multi_pointer to, const cl::sycl::cl_double2& from, Index stride) {
205
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void set_pscatter(sycl_multi_pointer to,
206
+ const cl::sycl::cl_double2& from, Index stride) {
281
207
  to[0] = from.x();
282
208
  to[stride] = from.y();
283
209
  }
284
210
 
285
- static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_double2 set_plset(
286
- const double& a) {
287
- return cl::sycl::cl_double2(static_cast<double>(a),
288
- static_cast<double>(a + 1));
211
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_double2 set_plset(const double& a) {
212
+ return cl::sycl::cl_double2(static_cast<double>(a), static_cast<double>(a + 1));
289
213
  }
290
214
  };
291
215
 
292
- #define SYCL_PLOAD_DUP(address_space_target) \
293
- template <typename packet_type> \
294
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type ploaddup( \
295
- typename cl::sycl::multi_ptr< \
296
- const typename unpacket_traits<packet_type>::type, \
297
- cl::sycl::access::address_space::address_space_target>::pointer_t \
298
- from) { \
299
- return get_base_packet<packet_type>::get_ploaddup(from); \
300
- }
301
-
302
- // global space
303
- SYCL_PLOAD_DUP(global_space)
304
- // local_space
305
- SYCL_PLOAD_DUP(local_space)
306
- #undef SYCL_PLOAD_DUP
307
-
308
216
  #define SYCL_PLOAD_DUP_SPECILIZE(packet_type) \
309
217
  template <> \
310
218
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type ploaddup<packet_type>( \
@@ -312,6 +220,7 @@ SYCL_PLOAD_DUP(local_space)
312
220
  return get_base_packet<packet_type>::get_ploaddup(from); \
313
221
  }
314
222
 
223
+ SYCL_PLOAD_DUP_SPECILIZE(cl::sycl::cl_half8)
315
224
  SYCL_PLOAD_DUP_SPECILIZE(cl::sycl::cl_float4)
316
225
  SYCL_PLOAD_DUP_SPECILIZE(cl::sycl::cl_double2)
317
226
 
@@ -323,186 +232,162 @@ SYCL_PLOAD_DUP_SPECILIZE(cl::sycl::cl_double2)
323
232
  const typename unpacket_traits<packet_type>::type& a) { \
324
233
  return get_base_packet<packet_type>::set_plset(a); \
325
234
  }
326
-
327
235
  SYCL_PLSET(cl::sycl::cl_float4)
328
236
  SYCL_PLSET(cl::sycl::cl_double2)
329
-
330
237
  #undef SYCL_PLSET
331
238
 
332
- #define SYCL_PGATHER(address_space_target) \
333
- template <typename Scalar, typename packet_type> \
334
- EIGEN_DEVICE_FUNC inline packet_type pgather( \
335
- typename cl::sycl::multi_ptr< \
336
- const typename unpacket_traits<packet_type>::type, \
337
- cl::sycl::access::address_space::address_space_target>::pointer_t \
338
- from, \
339
- Index stride) { \
340
- return get_base_packet<packet_type>::get_pgather(from, stride); \
341
- }
239
+ template <>
240
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_half8 plset<cl::sycl::cl_half8>(
241
+ const typename unpacket_traits<cl::sycl::cl_half8>::type& a) {
242
+ return get_base_packet<cl::sycl::cl_half8>::set_plset((const cl::sycl::half&)a);
243
+ }
342
244
 
343
- // global space
344
- SYCL_PGATHER(global_space)
345
- // local space
346
- SYCL_PGATHER(local_space)
347
-
348
- #undef SYCL_PGATHER
349
-
350
- #define SYCL_PGATHER_SPECILIZE(scalar, packet_type) \
351
- template <> \
352
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type \
353
- pgather<scalar, packet_type>( \
354
- const typename unpacket_traits<packet_type>::type* from, Index stride) { \
355
- return get_base_packet<packet_type>::get_pgather(from, stride); \
245
+ #define SYCL_PGATHER_SPECILIZE(scalar, packet_type) \
246
+ template <> \
247
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type pgather<scalar, packet_type>( \
248
+ const typename unpacket_traits<packet_type>::type* from, Index stride) { \
249
+ return get_base_packet<packet_type>::get_pgather(from, stride); \
356
250
  }
357
251
 
252
+ SYCL_PGATHER_SPECILIZE(Eigen::half, cl::sycl::cl_half8)
358
253
  SYCL_PGATHER_SPECILIZE(float, cl::sycl::cl_float4)
359
254
  SYCL_PGATHER_SPECILIZE(double, cl::sycl::cl_double2)
360
-
361
255
  #undef SYCL_PGATHER_SPECILIZE
362
256
 
363
- #define SYCL_PSCATTER(address_space_target) \
364
- template <typename Scalar, typename packet_type> \
365
- EIGEN_DEVICE_FUNC inline void pscatter( \
366
- typename cl::sycl::multi_ptr< \
367
- typename unpacket_traits<packet_type>::type, \
368
- cl::sycl::access::address_space::address_space_target>::pointer_t \
369
- to, \
370
- const packet_type& from, Index stride) { \
371
- get_base_packet<packet_type>::set_pscatter(to, from, stride); \
372
- }
373
-
374
- // global space
375
- SYCL_PSCATTER(global_space)
376
- // local space
377
- SYCL_PSCATTER(local_space)
378
-
379
- #undef SYCL_PSCATTER
380
-
381
- #define SYCL_PSCATTER_SPECILIZE(scalar, packet_type) \
382
- template <> \
383
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pscatter<scalar, packet_type>( \
384
- typename unpacket_traits<packet_type>::type * to, \
385
- const packet_type& from, Index stride) { \
386
- get_base_packet<packet_type>::set_pscatter(to, from, stride); \
257
+ #define SYCL_PSCATTER_SPECILIZE(scalar, packet_type) \
258
+ template <> \
259
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pscatter<scalar, packet_type>( \
260
+ typename unpacket_traits<packet_type>::type * to, const packet_type& from, Index stride) { \
261
+ get_base_packet<packet_type>::set_pscatter(to, from, stride); \
387
262
  }
388
263
 
264
+ SYCL_PSCATTER_SPECILIZE(Eigen::half, cl::sycl::cl_half8)
389
265
  SYCL_PSCATTER_SPECILIZE(float, cl::sycl::cl_float4)
390
266
  SYCL_PSCATTER_SPECILIZE(double, cl::sycl::cl_double2)
391
267
 
392
268
  #undef SYCL_PSCATTER_SPECILIZE
393
269
 
394
- #define SYCL_PMAD(packet_type) \
395
- template <> \
396
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type pmadd( \
397
- const packet_type& a, const packet_type& b, const packet_type& c) { \
398
- return cl::sycl::mad(a, b, c); \
270
+ #define SYCL_PMAD(packet_type) \
271
+ template <> \
272
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type pmadd(const packet_type& a, const packet_type& b, \
273
+ const packet_type& c) { \
274
+ return cl::sycl::mad(a, b, c); \
399
275
  }
400
276
 
277
+ SYCL_PMAD(cl::sycl::cl_half8)
401
278
  SYCL_PMAD(cl::sycl::cl_float4)
402
279
  SYCL_PMAD(cl::sycl::cl_double2)
403
280
  #undef SYCL_PMAD
404
281
 
405
282
  template <>
406
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float pfirst<cl::sycl::cl_float4>(
407
- const cl::sycl::cl_float4& a) {
283
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Eigen::half pfirst<cl::sycl::cl_half8>(const cl::sycl::cl_half8& a) {
284
+ return Eigen::half(a.s0());
285
+ }
286
+ template <>
287
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float pfirst<cl::sycl::cl_float4>(const cl::sycl::cl_float4& a) {
408
288
  return a.x();
409
289
  }
410
290
  template <>
411
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double pfirst<cl::sycl::cl_double2>(
412
- const cl::sycl::cl_double2& a) {
291
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double pfirst<cl::sycl::cl_double2>(const cl::sycl::cl_double2& a) {
413
292
  return a.x();
414
293
  }
415
294
 
416
295
  template <>
417
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float predux<cl::sycl::cl_float4>(
418
- const cl::sycl::cl_float4& a) {
296
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Eigen::half predux<cl::sycl::cl_half8>(const cl::sycl::cl_half8& a) {
297
+ return Eigen::half(a.s0() + a.s1() + a.s2() + a.s3() + a.s4() + a.s5() + a.s6() + a.s7());
298
+ }
299
+
300
+ template <>
301
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float predux<cl::sycl::cl_float4>(const cl::sycl::cl_float4& a) {
419
302
  return a.x() + a.y() + a.z() + a.w();
420
303
  }
421
304
 
422
305
  template <>
423
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double predux<cl::sycl::cl_double2>(
424
- const cl::sycl::cl_double2& a) {
306
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double predux<cl::sycl::cl_double2>(const cl::sycl::cl_double2& a) {
425
307
  return a.x() + a.y();
426
308
  }
427
309
 
428
310
  template <>
429
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float predux_max<cl::sycl::cl_float4>(
430
- const cl::sycl::cl_float4& a) {
431
- return cl::sycl::fmax(cl::sycl::fmax(a.x(), a.y()),
432
- cl::sycl::fmax(a.z(), a.w()));
311
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Eigen::half predux_max<cl::sycl::cl_half8>(const cl::sycl::cl_half8& a) {
312
+ return Eigen::half(cl::sycl::fmax(cl::sycl::fmax(cl::sycl::fmax(a.s0(), a.s1()), cl::sycl::fmax(a.s2(), a.s3())),
313
+ cl::sycl::fmax(cl::sycl::fmax(a.s4(), a.s5()), cl::sycl::fmax(a.s6(), a.s7()))));
433
314
  }
434
315
  template <>
435
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double predux_max<cl::sycl::cl_double2>(
436
- const cl::sycl::cl_double2& a) {
316
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float predux_max<cl::sycl::cl_float4>(const cl::sycl::cl_float4& a) {
317
+ return cl::sycl::fmax(cl::sycl::fmax(a.x(), a.y()), cl::sycl::fmax(a.z(), a.w()));
318
+ }
319
+ template <>
320
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double predux_max<cl::sycl::cl_double2>(const cl::sycl::cl_double2& a) {
437
321
  return cl::sycl::fmax(a.x(), a.y());
438
322
  }
439
323
 
440
324
  template <>
441
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float predux_min<cl::sycl::cl_float4>(
442
- const cl::sycl::cl_float4& a) {
443
- return cl::sycl::fmin(cl::sycl::fmin(a.x(), a.y()),
444
- cl::sycl::fmin(a.z(), a.w()));
325
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Eigen::half predux_min<cl::sycl::cl_half8>(const cl::sycl::cl_half8& a) {
326
+ return Eigen::half(cl::sycl::fmin(cl::sycl::fmin(cl::sycl::fmin(a.s0(), a.s1()), cl::sycl::fmin(a.s2(), a.s3())),
327
+ cl::sycl::fmin(cl::sycl::fmin(a.s4(), a.s5()), cl::sycl::fmin(a.s6(), a.s7()))));
328
+ }
329
+ template <>
330
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float predux_min<cl::sycl::cl_float4>(const cl::sycl::cl_float4& a) {
331
+ return cl::sycl::fmin(cl::sycl::fmin(a.x(), a.y()), cl::sycl::fmin(a.z(), a.w()));
445
332
  }
446
333
  template <>
447
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double predux_min<cl::sycl::cl_double2>(
448
- const cl::sycl::cl_double2& a) {
334
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double predux_min<cl::sycl::cl_double2>(const cl::sycl::cl_double2& a) {
449
335
  return cl::sycl::fmin(a.x(), a.y());
450
336
  }
451
337
 
452
338
  template <>
453
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float predux_mul<cl::sycl::cl_float4>(
454
- const cl::sycl::cl_float4& a) {
339
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Eigen::half predux_mul<cl::sycl::cl_half8>(const cl::sycl::cl_half8& a) {
340
+ return Eigen::half(a.s0() * a.s1() * a.s2() * a.s3() * a.s4() * a.s5() * a.s6() * a.s7());
341
+ }
342
+ template <>
343
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float predux_mul<cl::sycl::cl_float4>(const cl::sycl::cl_float4& a) {
455
344
  return a.x() * a.y() * a.z() * a.w();
456
345
  }
457
346
  template <>
458
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double predux_mul<cl::sycl::cl_double2>(
459
- const cl::sycl::cl_double2& a) {
347
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double predux_mul<cl::sycl::cl_double2>(const cl::sycl::cl_double2& a) {
460
348
  return a.x() * a.y();
461
349
  }
462
350
 
463
351
  template <>
464
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_float4
465
- pabs<cl::sycl::cl_float4>(const cl::sycl::cl_float4& a) {
466
- return cl::sycl::cl_float4(cl::sycl::fabs(a.x()), cl::sycl::fabs(a.y()),
467
- cl::sycl::fabs(a.z()), cl::sycl::fabs(a.w()));
352
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_half8 pabs<cl::sycl::cl_half8>(const cl::sycl::cl_half8& a) {
353
+ return cl::sycl::cl_half8(cl::sycl::fabs(a.s0()), cl::sycl::fabs(a.s1()), cl::sycl::fabs(a.s2()),
354
+ cl::sycl::fabs(a.s3()), cl::sycl::fabs(a.s4()), cl::sycl::fabs(a.s5()),
355
+ cl::sycl::fabs(a.s6()), cl::sycl::fabs(a.s7()));
468
356
  }
469
357
  template <>
470
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_double2
471
- pabs<cl::sycl::cl_double2>(const cl::sycl::cl_double2& a) {
358
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_float4 pabs<cl::sycl::cl_float4>(const cl::sycl::cl_float4& a) {
359
+ return cl::sycl::cl_float4(cl::sycl::fabs(a.x()), cl::sycl::fabs(a.y()), cl::sycl::fabs(a.z()),
360
+ cl::sycl::fabs(a.w()));
361
+ }
362
+ template <>
363
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_double2 pabs<cl::sycl::cl_double2>(const cl::sycl::cl_double2& a) {
472
364
  return cl::sycl::cl_double2(cl::sycl::fabs(a.x()), cl::sycl::fabs(a.y()));
473
365
  }
474
366
 
475
367
  template <typename Packet>
476
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet sycl_pcmp_le(const Packet &a,
477
- const Packet &b) {
478
- return ((a <= b)
479
- .template convert<typename unpacket_traits<Packet>::type,
480
- cl::sycl::rounding_mode::automatic>());
368
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet sycl_pcmp_le(const Packet& a, const Packet& b) {
369
+ return (a <= b).template as<Packet>();
481
370
  }
482
371
 
483
372
  template <typename Packet>
484
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet sycl_pcmp_lt(const Packet &a,
485
- const Packet &b) {
486
- return ((a < b)
487
- .template convert<typename unpacket_traits<Packet>::type,
488
- cl::sycl::rounding_mode::automatic>());
373
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet sycl_pcmp_lt(const Packet& a, const Packet& b) {
374
+ return (a < b).template as<Packet>();
489
375
  }
490
376
 
491
377
  template <typename Packet>
492
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet sycl_pcmp_eq(const Packet &a,
493
- const Packet &b) {
494
- return ((a == b)
495
- .template convert<typename unpacket_traits<Packet>::type,
496
- cl::sycl::rounding_mode::automatic>());
497
- }
498
-
499
- #define SYCL_PCMP(OP, TYPE) \
500
- template <> \
501
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE TYPE pcmp_##OP<TYPE>(const TYPE &a, \
502
- const TYPE &b) { \
503
- return sycl_pcmp_##OP<TYPE>(a, b); \
378
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet sycl_pcmp_eq(const Packet& a, const Packet& b) {
379
+ return (a == b).template as<Packet>();
380
+ }
381
+
382
+ #define SYCL_PCMP(OP, TYPE) \
383
+ template <> \
384
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE TYPE pcmp_##OP<TYPE>(const TYPE& a, const TYPE& b) { \
385
+ return sycl_pcmp_##OP<TYPE>(a, b); \
504
386
  }
505
387
 
388
+ SYCL_PCMP(le, cl::sycl::cl_half8)
389
+ SYCL_PCMP(lt, cl::sycl::cl_half8)
390
+ SYCL_PCMP(eq, cl::sycl::cl_half8)
506
391
  SYCL_PCMP(le, cl::sycl::cl_float4)
507
392
  SYCL_PCMP(lt, cl::sycl::cl_float4)
508
393
  SYCL_PCMP(eq, cl::sycl::cl_float4)
@@ -511,78 +396,121 @@ SYCL_PCMP(lt, cl::sycl::cl_double2)
511
396
  SYCL_PCMP(eq, cl::sycl::cl_double2)
512
397
  #undef SYCL_PCMP
513
398
 
514
- template <typename T> struct convert_to_integer;
399
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void ptranspose(PacketBlock<cl::sycl::cl_half8, 8>& kernel) {
400
+ cl::sycl::cl_half tmp = kernel.packet[0].s1();
401
+ kernel.packet[0].s1() = kernel.packet[1].s0();
402
+ kernel.packet[1].s0() = tmp;
515
403
 
516
- template <> struct convert_to_integer<float> {
517
- using type = std::int32_t;
518
- using packet_type = cl::sycl::cl_int4;
519
- };
520
- template <> struct convert_to_integer<double> {
521
- using type = std::int64_t;
522
- using packet_type = cl::sycl::cl_long2;
523
- };
404
+ tmp = kernel.packet[0].s2();
405
+ kernel.packet[0].s2() = kernel.packet[2].s0();
406
+ kernel.packet[2].s0() = tmp;
524
407
 
525
- template <typename PacketIn>
526
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename convert_to_integer<
527
- typename unpacket_traits<PacketIn>::type>::packet_type
528
- vector_as_int(const PacketIn &p) {
529
- return (
530
- p.template convert<typename convert_to_integer<
531
- typename unpacket_traits<PacketIn>::type>::type,
532
- cl::sycl::rounding_mode::automatic>());
533
- }
408
+ tmp = kernel.packet[0].s3();
409
+ kernel.packet[0].s3() = kernel.packet[3].s0();
410
+ kernel.packet[3].s0() = tmp;
534
411
 
535
- template <typename packetOut, typename PacketIn>
536
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packetOut
537
- convert_vector(const PacketIn &p) {
538
- return (p.template convert<typename unpacket_traits<packetOut>::type,
539
- cl::sycl::rounding_mode::automatic>());
540
- }
412
+ tmp = kernel.packet[0].s4();
413
+ kernel.packet[0].s4() = kernel.packet[4].s0();
414
+ kernel.packet[4].s0() = tmp;
541
415
 
542
- #define SYCL_PAND(TYPE) \
543
- template <> \
544
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TYPE pand<TYPE>(const TYPE &a, \
545
- const TYPE &b) { \
546
- return convert_vector<TYPE>(vector_as_int(a) & vector_as_int(b)); \
547
- }
548
- SYCL_PAND(cl::sycl::cl_float4)
549
- SYCL_PAND(cl::sycl::cl_double2)
550
- #undef SYCL_PAND
551
-
552
- #define SYCL_POR(TYPE) \
553
- template <> \
554
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TYPE por<TYPE>(const TYPE &a, \
555
- const TYPE &b) { \
556
- return convert_vector<TYPE>(vector_as_int(a) | vector_as_int(b)); \
557
- }
416
+ tmp = kernel.packet[0].s5();
417
+ kernel.packet[0].s5() = kernel.packet[5].s0();
418
+ kernel.packet[5].s0() = tmp;
558
419
 
559
- SYCL_POR(cl::sycl::cl_float4)
560
- SYCL_POR(cl::sycl::cl_double2)
561
- #undef SYCL_POR
420
+ tmp = kernel.packet[0].s6();
421
+ kernel.packet[0].s6() = kernel.packet[6].s0();
422
+ kernel.packet[6].s0() = tmp;
562
423
 
563
- #define SYCL_PXOR(TYPE) \
564
- template <> \
565
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TYPE pxor<TYPE>(const TYPE &a, \
566
- const TYPE &b) { \
567
- return convert_vector<TYPE>(vector_as_int(a) ^ vector_as_int(b)); \
568
- }
424
+ tmp = kernel.packet[0].s7();
425
+ kernel.packet[0].s7() = kernel.packet[7].s0();
426
+ kernel.packet[7].s0() = tmp;
569
427
 
570
- SYCL_PXOR(cl::sycl::cl_float4)
571
- SYCL_PXOR(cl::sycl::cl_double2)
572
- #undef SYCL_PXOR
428
+ tmp = kernel.packet[1].s2();
429
+ kernel.packet[1].s2() = kernel.packet[2].s1();
430
+ kernel.packet[2].s1() = tmp;
573
431
 
574
- #define SYCL_PANDNOT(TYPE) \
575
- template <> \
576
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TYPE pandnot<TYPE>(const TYPE &a, \
577
- const TYPE &b) { \
578
- return convert_vector<TYPE>(vector_as_int(a) & (~vector_as_int(b))); \
579
- }
580
- SYCL_PANDNOT(cl::sycl::cl_float4)
581
- SYCL_PANDNOT(cl::sycl::cl_double2)
582
- #undef SYCL_PANDNOT
432
+ tmp = kernel.packet[1].s3();
433
+ kernel.packet[1].s3() = kernel.packet[3].s1();
434
+ kernel.packet[3].s1() = tmp;
435
+
436
+ tmp = kernel.packet[1].s4();
437
+ kernel.packet[1].s4() = kernel.packet[4].s1();
438
+ kernel.packet[4].s1() = tmp;
439
+
440
+ tmp = kernel.packet[1].s5();
441
+ kernel.packet[1].s5() = kernel.packet[5].s1();
442
+ kernel.packet[5].s1() = tmp;
443
+
444
+ tmp = kernel.packet[1].s6();
445
+ kernel.packet[1].s6() = kernel.packet[6].s1();
446
+ kernel.packet[6].s1() = tmp;
583
447
 
584
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void ptranspose(
585
- PacketBlock<cl::sycl::cl_float4, 4>& kernel) {
448
+ tmp = kernel.packet[1].s7();
449
+ kernel.packet[1].s7() = kernel.packet[7].s1();
450
+ kernel.packet[7].s1() = tmp;
451
+
452
+ tmp = kernel.packet[2].s3();
453
+ kernel.packet[2].s3() = kernel.packet[3].s2();
454
+ kernel.packet[3].s2() = tmp;
455
+
456
+ tmp = kernel.packet[2].s4();
457
+ kernel.packet[2].s4() = kernel.packet[4].s2();
458
+ kernel.packet[4].s2() = tmp;
459
+
460
+ tmp = kernel.packet[2].s5();
461
+ kernel.packet[2].s5() = kernel.packet[5].s2();
462
+ kernel.packet[5].s2() = tmp;
463
+
464
+ tmp = kernel.packet[2].s6();
465
+ kernel.packet[2].s6() = kernel.packet[6].s2();
466
+ kernel.packet[6].s2() = tmp;
467
+
468
+ tmp = kernel.packet[2].s7();
469
+ kernel.packet[2].s7() = kernel.packet[7].s2();
470
+ kernel.packet[7].s2() = tmp;
471
+
472
+ tmp = kernel.packet[3].s4();
473
+ kernel.packet[3].s4() = kernel.packet[4].s3();
474
+ kernel.packet[4].s3() = tmp;
475
+
476
+ tmp = kernel.packet[3].s5();
477
+ kernel.packet[3].s5() = kernel.packet[5].s3();
478
+ kernel.packet[5].s3() = tmp;
479
+
480
+ tmp = kernel.packet[3].s6();
481
+ kernel.packet[3].s6() = kernel.packet[6].s3();
482
+ kernel.packet[6].s3() = tmp;
483
+
484
+ tmp = kernel.packet[3].s7();
485
+ kernel.packet[3].s7() = kernel.packet[7].s3();
486
+ kernel.packet[7].s3() = tmp;
487
+
488
+ tmp = kernel.packet[4].s5();
489
+ kernel.packet[4].s5() = kernel.packet[5].s4();
490
+ kernel.packet[5].s4() = tmp;
491
+
492
+ tmp = kernel.packet[4].s6();
493
+ kernel.packet[4].s6() = kernel.packet[6].s4();
494
+ kernel.packet[6].s4() = tmp;
495
+
496
+ tmp = kernel.packet[4].s7();
497
+ kernel.packet[4].s7() = kernel.packet[7].s4();
498
+ kernel.packet[7].s4() = tmp;
499
+
500
+ tmp = kernel.packet[5].s6();
501
+ kernel.packet[5].s6() = kernel.packet[6].s5();
502
+ kernel.packet[6].s5() = tmp;
503
+
504
+ tmp = kernel.packet[5].s7();
505
+ kernel.packet[5].s7() = kernel.packet[7].s5();
506
+ kernel.packet[7].s5() = tmp;
507
+
508
+ tmp = kernel.packet[6].s7();
509
+ kernel.packet[6].s7() = kernel.packet[7].s6();
510
+ kernel.packet[7].s6() = tmp;
511
+ }
512
+
513
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void ptranspose(PacketBlock<cl::sycl::cl_float4, 4>& kernel) {
586
514
  float tmp = kernel.packet[0].y();
587
515
  kernel.packet[0].y() = kernel.packet[1].x();
588
516
  kernel.packet[1].x() = tmp;
@@ -608,61 +536,39 @@ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void ptranspose(
608
536
  kernel.packet[3].z() = tmp;
609
537
  }
610
538
 
611
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void ptranspose(
612
- PacketBlock<cl::sycl::cl_double2, 2>& kernel) {
539
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void ptranspose(PacketBlock<cl::sycl::cl_double2, 2>& kernel) {
613
540
  double tmp = kernel.packet[0].y();
614
541
  kernel.packet[0].y() = kernel.packet[1].x();
615
542
  kernel.packet[1].x() = tmp;
616
543
  }
617
544
 
545
+ template <>
546
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_half8 pblend(
547
+ const Selector<unpacket_traits<cl::sycl::cl_half8>::size>& ifPacket, const cl::sycl::cl_half8& thenPacket,
548
+ const cl::sycl::cl_half8& elsePacket) {
549
+ cl::sycl::cl_short8 condition(ifPacket.select[0] ? 0 : -1, ifPacket.select[1] ? 0 : -1, ifPacket.select[2] ? 0 : -1,
550
+ ifPacket.select[3] ? 0 : -1, ifPacket.select[4] ? 0 : -1, ifPacket.select[5] ? 0 : -1,
551
+ ifPacket.select[6] ? 0 : -1, ifPacket.select[7] ? 0 : -1);
552
+ return cl::sycl::select(thenPacket, elsePacket, condition);
553
+ }
554
+
618
555
  template <>
619
556
  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_float4 pblend(
620
- const Selector<unpacket_traits<cl::sycl::cl_float4>::size>& ifPacket,
621
- const cl::sycl::cl_float4& thenPacket,
557
+ const Selector<unpacket_traits<cl::sycl::cl_float4>::size>& ifPacket, const cl::sycl::cl_float4& thenPacket,
622
558
  const cl::sycl::cl_float4& elsePacket) {
623
- cl::sycl::cl_int4 condition(
624
- ifPacket.select[0] ? 0 : -1, ifPacket.select[1] ? 0 : -1,
625
- ifPacket.select[2] ? 0 : -1, ifPacket.select[3] ? 0 : -1);
559
+ cl::sycl::cl_int4 condition(ifPacket.select[0] ? 0 : -1, ifPacket.select[1] ? 0 : -1, ifPacket.select[2] ? 0 : -1,
560
+ ifPacket.select[3] ? 0 : -1);
626
561
  return cl::sycl::select(thenPacket, elsePacket, condition);
627
562
  }
628
563
 
629
564
  template <>
630
- inline cl::sycl::cl_double2 pblend(
631
- const Selector<unpacket_traits<cl::sycl::cl_double2>::size>& ifPacket,
632
- const cl::sycl::cl_double2& thenPacket,
633
- const cl::sycl::cl_double2& elsePacket) {
634
- cl::sycl::cl_long2 condition(ifPacket.select[0] ? 0 : -1,
635
- ifPacket.select[1] ? 0 : -1);
565
+ inline cl::sycl::cl_double2 pblend(const Selector<unpacket_traits<cl::sycl::cl_double2>::size>& ifPacket,
566
+ const cl::sycl::cl_double2& thenPacket, const cl::sycl::cl_double2& elsePacket) {
567
+ cl::sycl::cl_long2 condition(ifPacket.select[0] ? 0 : -1, ifPacket.select[1] ? 0 : -1);
636
568
  return cl::sycl::select(thenPacket, elsePacket, condition);
637
569
  }
638
570
  #endif // SYCL_DEVICE_ONLY
639
571
 
640
- #define SYCL_PSTORE(alignment) \
641
- template <typename packet_type> \
642
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstore##alignment( \
643
- const Eigen::TensorSycl::internal::RangeAccess< \
644
- cl::sycl::access::mode::read_write, \
645
- typename unpacket_traits<packet_type>::type>& to, \
646
- const packet_type& from) { \
647
- pstore##alignment(to.get_pointer(), from); \
648
- }
649
-
650
- // global space
651
- SYCL_PSTORE()
652
- SYCL_PSTORE(u)
653
-
654
- #undef SYCL_PSTORE
655
-
656
- template <typename scalar, typename packet_type, int Alignment>
657
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstoret(
658
- Eigen::TensorSycl::internal::RangeAccess<
659
- cl::sycl::access::mode::read_write,
660
- typename unpacket_traits<packet_type>::type>
661
- to,
662
- const packet_type& from) {
663
- pstoret<scalar, packet_type, Alignment>(to.get_pointer(), from);
664
- }
665
-
666
572
  } // end namespace internal
667
573
 
668
574
  } // end namespace Eigen