forgeo-gmlib 0.6.2__cp39-cp39-musllinux_1_2_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (554) hide show
  1. forgeo/gmlib/GeologicalModel3D.py +758 -0
  2. forgeo/gmlib/__init__.py +9 -0
  3. forgeo/gmlib/_version.py +34 -0
  4. forgeo/gmlib/architecture/__init__.py +1 -0
  5. forgeo/gmlib/architecture/core.py +130 -0
  6. forgeo/gmlib/common.cpython-39-x86_64-linux-gnu.so +0 -0
  7. forgeo/gmlib/fault_network.py +171 -0
  8. forgeo/gmlib/geomodeller_data.py +101 -0
  9. forgeo/gmlib/geomodeller_project.py +396 -0
  10. forgeo/gmlib/myxmltools.py +30 -0
  11. forgeo/gmlib/pypotential2D.cpython-39-x86_64-linux-gnu.so +0 -0
  12. forgeo/gmlib/pypotential3D.cpython-39-x86_64-linux-gnu.so +0 -0
  13. forgeo/gmlib/tesselate.py +236 -0
  14. forgeo/gmlib/tesselate_deprecated.py +249 -0
  15. forgeo/gmlib/topography_reader.py +198 -0
  16. forgeo/gmlib/utils/__init__.py +0 -0
  17. forgeo/gmlib/utils/append_data.py +508 -0
  18. forgeo/gmlib/utils/export.py +45 -0
  19. forgeo/gmlib/utils/normalized_gradient.py +40 -0
  20. forgeo/gmlib/utils/tools.py +35 -0
  21. forgeo_gmlib-0.6.2.dist-info/METADATA +23 -0
  22. forgeo_gmlib-0.6.2.dist-info/RECORD +554 -0
  23. forgeo_gmlib-0.6.2.dist-info/WHEEL +5 -0
  24. forgeo_gmlib-0.6.2.dist-info/licenses/LICENSE +661 -0
  25. include/eigen3/Eigen/Cholesky +45 -0
  26. include/eigen3/Eigen/CholmodSupport +48 -0
  27. include/eigen3/Eigen/Core +385 -0
  28. include/eigen3/Eigen/Dense +7 -0
  29. include/eigen3/Eigen/Eigen +2 -0
  30. include/eigen3/Eigen/Eigenvalues +60 -0
  31. include/eigen3/Eigen/Geometry +59 -0
  32. include/eigen3/Eigen/Householder +29 -0
  33. include/eigen3/Eigen/IterativeLinearSolvers +48 -0
  34. include/eigen3/Eigen/Jacobi +32 -0
  35. include/eigen3/Eigen/KLUSupport +41 -0
  36. include/eigen3/Eigen/LU +47 -0
  37. include/eigen3/Eigen/MetisSupport +35 -0
  38. include/eigen3/Eigen/OrderingMethods +70 -0
  39. include/eigen3/Eigen/PaStiXSupport +49 -0
  40. include/eigen3/Eigen/PardisoSupport +35 -0
  41. include/eigen3/Eigen/QR +50 -0
  42. include/eigen3/Eigen/QtAlignedMalloc +39 -0
  43. include/eigen3/Eigen/SPQRSupport +34 -0
  44. include/eigen3/Eigen/SVD +50 -0
  45. include/eigen3/Eigen/Sparse +34 -0
  46. include/eigen3/Eigen/SparseCholesky +37 -0
  47. include/eigen3/Eigen/SparseCore +69 -0
  48. include/eigen3/Eigen/SparseLU +48 -0
  49. include/eigen3/Eigen/SparseQR +36 -0
  50. include/eigen3/Eigen/StdDeque +27 -0
  51. include/eigen3/Eigen/StdList +26 -0
  52. include/eigen3/Eigen/StdVector +27 -0
  53. include/eigen3/Eigen/SuperLUSupport +64 -0
  54. include/eigen3/Eigen/UmfPackSupport +40 -0
  55. include/eigen3/Eigen/src/Cholesky/LDLT.h +688 -0
  56. include/eigen3/Eigen/src/Cholesky/LLT.h +558 -0
  57. include/eigen3/Eigen/src/Cholesky/LLT_LAPACKE.h +99 -0
  58. include/eigen3/Eigen/src/CholmodSupport/CholmodSupport.h +682 -0
  59. include/eigen3/Eigen/src/Core/ArithmeticSequence.h +406 -0
  60. include/eigen3/Eigen/src/Core/Array.h +425 -0
  61. include/eigen3/Eigen/src/Core/ArrayBase.h +226 -0
  62. include/eigen3/Eigen/src/Core/ArrayWrapper.h +209 -0
  63. include/eigen3/Eigen/src/Core/Assign.h +90 -0
  64. include/eigen3/Eigen/src/Core/AssignEvaluator.h +1010 -0
  65. include/eigen3/Eigen/src/Core/Assign_MKL.h +178 -0
  66. include/eigen3/Eigen/src/Core/BandMatrix.h +353 -0
  67. include/eigen3/Eigen/src/Core/Block.h +463 -0
  68. include/eigen3/Eigen/src/Core/BooleanRedux.h +164 -0
  69. include/eigen3/Eigen/src/Core/CommaInitializer.h +164 -0
  70. include/eigen3/Eigen/src/Core/ConditionEstimator.h +175 -0
  71. include/eigen3/Eigen/src/Core/CoreEvaluators.h +1741 -0
  72. include/eigen3/Eigen/src/Core/CoreIterators.h +132 -0
  73. include/eigen3/Eigen/src/Core/CwiseBinaryOp.h +183 -0
  74. include/eigen3/Eigen/src/Core/CwiseNullaryOp.h +1001 -0
  75. include/eigen3/Eigen/src/Core/CwiseTernaryOp.h +197 -0
  76. include/eigen3/Eigen/src/Core/CwiseUnaryOp.h +103 -0
  77. include/eigen3/Eigen/src/Core/CwiseUnaryView.h +132 -0
  78. include/eigen3/Eigen/src/Core/DenseBase.h +701 -0
  79. include/eigen3/Eigen/src/Core/DenseCoeffsBase.h +685 -0
  80. include/eigen3/Eigen/src/Core/DenseStorage.h +652 -0
  81. include/eigen3/Eigen/src/Core/Diagonal.h +259 -0
  82. include/eigen3/Eigen/src/Core/DiagonalMatrix.h +391 -0
  83. include/eigen3/Eigen/src/Core/DiagonalProduct.h +28 -0
  84. include/eigen3/Eigen/src/Core/Dot.h +313 -0
  85. include/eigen3/Eigen/src/Core/EigenBase.h +160 -0
  86. include/eigen3/Eigen/src/Core/ForceAlignedAccess.h +150 -0
  87. include/eigen3/Eigen/src/Core/Fuzzy.h +155 -0
  88. include/eigen3/Eigen/src/Core/GeneralProduct.h +465 -0
  89. include/eigen3/Eigen/src/Core/GenericPacketMath.h +1040 -0
  90. include/eigen3/Eigen/src/Core/GlobalFunctions.h +194 -0
  91. include/eigen3/Eigen/src/Core/IO.h +258 -0
  92. include/eigen3/Eigen/src/Core/IndexedView.h +247 -0
  93. include/eigen3/Eigen/src/Core/Inverse.h +117 -0
  94. include/eigen3/Eigen/src/Core/Map.h +171 -0
  95. include/eigen3/Eigen/src/Core/MapBase.h +310 -0
  96. include/eigen3/Eigen/src/Core/MathFunctions.h +2212 -0
  97. include/eigen3/Eigen/src/Core/MathFunctionsImpl.h +200 -0
  98. include/eigen3/Eigen/src/Core/Matrix.h +578 -0
  99. include/eigen3/Eigen/src/Core/MatrixBase.h +541 -0
  100. include/eigen3/Eigen/src/Core/NestByValue.h +85 -0
  101. include/eigen3/Eigen/src/Core/NoAlias.h +109 -0
  102. include/eigen3/Eigen/src/Core/NumTraits.h +351 -0
  103. include/eigen3/Eigen/src/Core/PartialReduxEvaluator.h +237 -0
  104. include/eigen3/Eigen/src/Core/PermutationMatrix.h +605 -0
  105. include/eigen3/Eigen/src/Core/PlainObjectBase.h +1128 -0
  106. include/eigen3/Eigen/src/Core/Product.h +191 -0
  107. include/eigen3/Eigen/src/Core/ProductEvaluators.h +1179 -0
  108. include/eigen3/Eigen/src/Core/Random.h +218 -0
  109. include/eigen3/Eigen/src/Core/Redux.h +515 -0
  110. include/eigen3/Eigen/src/Core/Ref.h +381 -0
  111. include/eigen3/Eigen/src/Core/Replicate.h +142 -0
  112. include/eigen3/Eigen/src/Core/Reshaped.h +454 -0
  113. include/eigen3/Eigen/src/Core/ReturnByValue.h +119 -0
  114. include/eigen3/Eigen/src/Core/Reverse.h +217 -0
  115. include/eigen3/Eigen/src/Core/Select.h +164 -0
  116. include/eigen3/Eigen/src/Core/SelfAdjointView.h +365 -0
  117. include/eigen3/Eigen/src/Core/SelfCwiseBinaryOp.h +47 -0
  118. include/eigen3/Eigen/src/Core/Solve.h +188 -0
  119. include/eigen3/Eigen/src/Core/SolveTriangular.h +235 -0
  120. include/eigen3/Eigen/src/Core/SolverBase.h +168 -0
  121. include/eigen3/Eigen/src/Core/StableNorm.h +251 -0
  122. include/eigen3/Eigen/src/Core/StlIterators.h +463 -0
  123. include/eigen3/Eigen/src/Core/Stride.h +120 -0
  124. include/eigen3/Eigen/src/Core/Swap.h +68 -0
  125. include/eigen3/Eigen/src/Core/Transpose.h +464 -0
  126. include/eigen3/Eigen/src/Core/Transpositions.h +386 -0
  127. include/eigen3/Eigen/src/Core/TriangularMatrix.h +994 -0
  128. include/eigen3/Eigen/src/Core/VectorBlock.h +96 -0
  129. include/eigen3/Eigen/src/Core/VectorwiseOp.h +784 -0
  130. include/eigen3/Eigen/src/Core/Visitor.h +381 -0
  131. include/eigen3/Eigen/src/Core/arch/AVX/Complex.h +368 -0
  132. include/eigen3/Eigen/src/Core/arch/AVX/MathFunctions.h +228 -0
  133. include/eigen3/Eigen/src/Core/arch/AVX/PacketMath.h +1588 -0
  134. include/eigen3/Eigen/src/Core/arch/AVX/TypeCasting.h +115 -0
  135. include/eigen3/Eigen/src/Core/arch/AVX512/Complex.h +384 -0
  136. include/eigen3/Eigen/src/Core/arch/AVX512/MathFunctions.h +361 -0
  137. include/eigen3/Eigen/src/Core/arch/AVX512/PacketMath.h +2270 -0
  138. include/eigen3/Eigen/src/Core/arch/AVX512/TypeCasting.h +89 -0
  139. include/eigen3/Eigen/src/Core/arch/AltiVec/Complex.h +415 -0
  140. include/eigen3/Eigen/src/Core/arch/AltiVec/MathFunctions.h +119 -0
  141. include/eigen3/Eigen/src/Core/arch/AltiVec/MatrixProduct.h +2776 -0
  142. include/eigen3/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h +159 -0
  143. include/eigen3/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h +627 -0
  144. include/eigen3/Eigen/src/Core/arch/AltiVec/MatrixVectorProduct.h +2400 -0
  145. include/eigen3/Eigen/src/Core/arch/AltiVec/PacketMath.h +2743 -0
  146. include/eigen3/Eigen/src/Core/arch/CUDA/Complex.h +269 -0
  147. include/eigen3/Eigen/src/Core/arch/Default/BFloat16.h +688 -0
  148. include/eigen3/Eigen/src/Core/arch/Default/ConjHelper.h +117 -0
  149. include/eigen3/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +1662 -0
  150. include/eigen3/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +116 -0
  151. include/eigen3/Eigen/src/Core/arch/Default/Half.h +950 -0
  152. include/eigen3/Eigen/src/Core/arch/Default/Settings.h +49 -0
  153. include/eigen3/Eigen/src/Core/arch/Default/TypeCasting.h +120 -0
  154. include/eigen3/Eigen/src/Core/arch/GPU/MathFunctions.h +103 -0
  155. include/eigen3/Eigen/src/Core/arch/GPU/PacketMath.h +1646 -0
  156. include/eigen3/Eigen/src/Core/arch/GPU/TypeCasting.h +79 -0
  157. include/eigen3/Eigen/src/Core/arch/HIP/hcc/math_constants.h +23 -0
  158. include/eigen3/Eigen/src/Core/arch/MSA/Complex.h +645 -0
  159. include/eigen3/Eigen/src/Core/arch/MSA/MathFunctions.h +387 -0
  160. include/eigen3/Eigen/src/Core/arch/MSA/PacketMath.h +1233 -0
  161. include/eigen3/Eigen/src/Core/arch/NEON/Complex.h +560 -0
  162. include/eigen3/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h +183 -0
  163. include/eigen3/Eigen/src/Core/arch/NEON/MathFunctions.h +75 -0
  164. include/eigen3/Eigen/src/Core/arch/NEON/PacketMath.h +4653 -0
  165. include/eigen3/Eigen/src/Core/arch/NEON/TypeCasting.h +1424 -0
  166. include/eigen3/Eigen/src/Core/arch/SSE/Complex.h +338 -0
  167. include/eigen3/Eigen/src/Core/arch/SSE/MathFunctions.h +199 -0
  168. include/eigen3/Eigen/src/Core/arch/SSE/PacketMath.h +1505 -0
  169. include/eigen3/Eigen/src/Core/arch/SSE/TypeCasting.h +142 -0
  170. include/eigen3/Eigen/src/Core/arch/SVE/MathFunctions.h +44 -0
  171. include/eigen3/Eigen/src/Core/arch/SVE/PacketMath.h +752 -0
  172. include/eigen3/Eigen/src/Core/arch/SVE/TypeCasting.h +49 -0
  173. include/eigen3/Eigen/src/Core/arch/SYCL/InteropHeaders.h +232 -0
  174. include/eigen3/Eigen/src/Core/arch/SYCL/MathFunctions.h +301 -0
  175. include/eigen3/Eigen/src/Core/arch/SYCL/PacketMath.h +670 -0
  176. include/eigen3/Eigen/src/Core/arch/SYCL/SyclMemoryModel.h +694 -0
  177. include/eigen3/Eigen/src/Core/arch/SYCL/TypeCasting.h +85 -0
  178. include/eigen3/Eigen/src/Core/arch/ZVector/Complex.h +428 -0
  179. include/eigen3/Eigen/src/Core/arch/ZVector/MathFunctions.h +233 -0
  180. include/eigen3/Eigen/src/Core/arch/ZVector/PacketMath.h +1060 -0
  181. include/eigen3/Eigen/src/Core/functors/AssignmentFunctors.h +177 -0
  182. include/eigen3/Eigen/src/Core/functors/BinaryFunctors.h +541 -0
  183. include/eigen3/Eigen/src/Core/functors/NullaryFunctors.h +189 -0
  184. include/eigen3/Eigen/src/Core/functors/StlFunctors.h +166 -0
  185. include/eigen3/Eigen/src/Core/functors/TernaryFunctors.h +25 -0
  186. include/eigen3/Eigen/src/Core/functors/UnaryFunctors.h +1131 -0
  187. include/eigen3/Eigen/src/Core/products/GeneralBlockPanelKernel.h +2645 -0
  188. include/eigen3/Eigen/src/Core/products/GeneralMatrixMatrix.h +517 -0
  189. include/eigen3/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +322 -0
  190. include/eigen3/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +145 -0
  191. include/eigen3/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h +124 -0
  192. include/eigen3/Eigen/src/Core/products/GeneralMatrixVector.h +523 -0
  193. include/eigen3/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h +136 -0
  194. include/eigen3/Eigen/src/Core/products/Parallelizer.h +180 -0
  195. include/eigen3/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +544 -0
  196. include/eigen3/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h +295 -0
  197. include/eigen3/Eigen/src/Core/products/SelfadjointMatrixVector.h +262 -0
  198. include/eigen3/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h +118 -0
  199. include/eigen3/Eigen/src/Core/products/SelfadjointProduct.h +133 -0
  200. include/eigen3/Eigen/src/Core/products/SelfadjointRank2Update.h +94 -0
  201. include/eigen3/Eigen/src/Core/products/TriangularMatrixMatrix.h +472 -0
  202. include/eigen3/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h +317 -0
  203. include/eigen3/Eigen/src/Core/products/TriangularMatrixVector.h +350 -0
  204. include/eigen3/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h +255 -0
  205. include/eigen3/Eigen/src/Core/products/TriangularSolverMatrix.h +337 -0
  206. include/eigen3/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h +167 -0
  207. include/eigen3/Eigen/src/Core/products/TriangularSolverVector.h +148 -0
  208. include/eigen3/Eigen/src/Core/util/BlasUtil.h +583 -0
  209. include/eigen3/Eigen/src/Core/util/ConfigureVectorization.h +521 -0
  210. include/eigen3/Eigen/src/Core/util/Constants.h +563 -0
  211. include/eigen3/Eigen/src/Core/util/DisableStupidWarnings.h +138 -0
  212. include/eigen3/Eigen/src/Core/util/ForwardDeclarations.h +322 -0
  213. include/eigen3/Eigen/src/Core/util/IndexedViewHelper.h +186 -0
  214. include/eigen3/Eigen/src/Core/util/IntegralConstant.h +272 -0
  215. include/eigen3/Eigen/src/Core/util/MKL_support.h +137 -0
  216. include/eigen3/Eigen/src/Core/util/Macros.h +1511 -0
  217. include/eigen3/Eigen/src/Core/util/Memory.h +1202 -0
  218. include/eigen3/Eigen/src/Core/util/Meta.h +812 -0
  219. include/eigen3/Eigen/src/Core/util/NonMPL2.h +3 -0
  220. include/eigen3/Eigen/src/Core/util/ReenableStupidWarnings.h +31 -0
  221. include/eigen3/Eigen/src/Core/util/ReshapedHelper.h +51 -0
  222. include/eigen3/Eigen/src/Core/util/StaticAssert.h +221 -0
  223. include/eigen3/Eigen/src/Core/util/SymbolicIndex.h +293 -0
  224. include/eigen3/Eigen/src/Core/util/XprHelper.h +856 -0
  225. include/eigen3/Eigen/src/Eigenvalues/ComplexEigenSolver.h +345 -0
  226. include/eigen3/Eigen/src/Eigenvalues/ComplexSchur.h +462 -0
  227. include/eigen3/Eigen/src/Eigenvalues/ComplexSchur_LAPACKE.h +91 -0
  228. include/eigen3/Eigen/src/Eigenvalues/EigenSolver.h +622 -0
  229. include/eigen3/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h +417 -0
  230. include/eigen3/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +226 -0
  231. include/eigen3/Eigen/src/Eigenvalues/HessenbergDecomposition.h +374 -0
  232. include/eigen3/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +158 -0
  233. include/eigen3/Eigen/src/Eigenvalues/RealQZ.h +657 -0
  234. include/eigen3/Eigen/src/Eigenvalues/RealSchur.h +557 -0
  235. include/eigen3/Eigen/src/Eigenvalues/RealSchur_LAPACKE.h +77 -0
  236. include/eigen3/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +904 -0
  237. include/eigen3/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h +87 -0
  238. include/eigen3/Eigen/src/Eigenvalues/Tridiagonalization.h +560 -0
  239. include/eigen3/Eigen/src/Geometry/AlignedBox.h +486 -0
  240. include/eigen3/Eigen/src/Geometry/AngleAxis.h +247 -0
  241. include/eigen3/Eigen/src/Geometry/EulerAngles.h +114 -0
  242. include/eigen3/Eigen/src/Geometry/Homogeneous.h +501 -0
  243. include/eigen3/Eigen/src/Geometry/Hyperplane.h +282 -0
  244. include/eigen3/Eigen/src/Geometry/OrthoMethods.h +235 -0
  245. include/eigen3/Eigen/src/Geometry/ParametrizedLine.h +232 -0
  246. include/eigen3/Eigen/src/Geometry/Quaternion.h +870 -0
  247. include/eigen3/Eigen/src/Geometry/Rotation2D.h +199 -0
  248. include/eigen3/Eigen/src/Geometry/RotationBase.h +206 -0
  249. include/eigen3/Eigen/src/Geometry/Scaling.h +188 -0
  250. include/eigen3/Eigen/src/Geometry/Transform.h +1566 -0
  251. include/eigen3/Eigen/src/Geometry/Translation.h +202 -0
  252. include/eigen3/Eigen/src/Geometry/Umeyama.h +168 -0
  253. include/eigen3/Eigen/src/Geometry/arch/Geometry_SIMD.h +168 -0
  254. include/eigen3/Eigen/src/Householder/BlockHouseholder.h +110 -0
  255. include/eigen3/Eigen/src/Householder/Householder.h +176 -0
  256. include/eigen3/Eigen/src/Householder/HouseholderSequence.h +553 -0
  257. include/eigen3/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +226 -0
  258. include/eigen3/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +212 -0
  259. include/eigen3/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +227 -0
  260. include/eigen3/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +394 -0
  261. include/eigen3/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +453 -0
  262. include/eigen3/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +444 -0
  263. include/eigen3/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +198 -0
  264. include/eigen3/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +117 -0
  265. include/eigen3/Eigen/src/Jacobi/Jacobi.h +483 -0
  266. include/eigen3/Eigen/src/KLUSupport/KLUSupport.h +358 -0
  267. include/eigen3/Eigen/src/LU/Determinant.h +117 -0
  268. include/eigen3/Eigen/src/LU/FullPivLU.h +877 -0
  269. include/eigen3/Eigen/src/LU/InverseImpl.h +432 -0
  270. include/eigen3/Eigen/src/LU/PartialPivLU.h +624 -0
  271. include/eigen3/Eigen/src/LU/PartialPivLU_LAPACKE.h +83 -0
  272. include/eigen3/Eigen/src/LU/arch/InverseSize4.h +363 -0
  273. include/eigen3/Eigen/src/MetisSupport/MetisSupport.h +137 -0
  274. include/eigen3/Eigen/src/OrderingMethods/Amd.h +435 -0
  275. include/eigen3/Eigen/src/OrderingMethods/Eigen_Colamd.h +1863 -0
  276. include/eigen3/Eigen/src/OrderingMethods/Ordering.h +153 -0
  277. include/eigen3/Eigen/src/PaStiXSupport/PaStiXSupport.h +678 -0
  278. include/eigen3/Eigen/src/PardisoSupport/PardisoSupport.h +545 -0
  279. include/eigen3/Eigen/src/QR/ColPivHouseholderQR.h +674 -0
  280. include/eigen3/Eigen/src/QR/ColPivHouseholderQR_LAPACKE.h +97 -0
  281. include/eigen3/Eigen/src/QR/CompleteOrthogonalDecomposition.h +635 -0
  282. include/eigen3/Eigen/src/QR/FullPivHouseholderQR.h +713 -0
  283. include/eigen3/Eigen/src/QR/HouseholderQR.h +434 -0
  284. include/eigen3/Eigen/src/QR/HouseholderQR_LAPACKE.h +68 -0
  285. include/eigen3/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +335 -0
  286. include/eigen3/Eigen/src/SVD/BDCSVD.h +1377 -0
  287. include/eigen3/Eigen/src/SVD/JacobiSVD.h +813 -0
  288. include/eigen3/Eigen/src/SVD/JacobiSVD_LAPACKE.h +91 -0
  289. include/eigen3/Eigen/src/SVD/SVDBase.h +376 -0
  290. include/eigen3/Eigen/src/SVD/UpperBidiagonalization.h +415 -0
  291. include/eigen3/Eigen/src/SparseCholesky/SimplicialCholesky.h +697 -0
  292. include/eigen3/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +174 -0
  293. include/eigen3/Eigen/src/SparseCore/AmbiVector.h +378 -0
  294. include/eigen3/Eigen/src/SparseCore/CompressedStorage.h +274 -0
  295. include/eigen3/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +352 -0
  296. include/eigen3/Eigen/src/SparseCore/MappedSparseMatrix.h +67 -0
  297. include/eigen3/Eigen/src/SparseCore/SparseAssign.h +270 -0
  298. include/eigen3/Eigen/src/SparseCore/SparseBlock.h +566 -0
  299. include/eigen3/Eigen/src/SparseCore/SparseColEtree.h +206 -0
  300. include/eigen3/Eigen/src/SparseCore/SparseCompressedBase.h +370 -0
  301. include/eigen3/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +722 -0
  302. include/eigen3/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +150 -0
  303. include/eigen3/Eigen/src/SparseCore/SparseDenseProduct.h +342 -0
  304. include/eigen3/Eigen/src/SparseCore/SparseDiagonalProduct.h +138 -0
  305. include/eigen3/Eigen/src/SparseCore/SparseDot.h +98 -0
  306. include/eigen3/Eigen/src/SparseCore/SparseFuzzy.h +29 -0
  307. include/eigen3/Eigen/src/SparseCore/SparseMap.h +306 -0
  308. include/eigen3/Eigen/src/SparseCore/SparseMatrix.h +1518 -0
  309. include/eigen3/Eigen/src/SparseCore/SparseMatrixBase.h +399 -0
  310. include/eigen3/Eigen/src/SparseCore/SparsePermutation.h +178 -0
  311. include/eigen3/Eigen/src/SparseCore/SparseProduct.h +182 -0
  312. include/eigen3/Eigen/src/SparseCore/SparseRedux.h +49 -0
  313. include/eigen3/Eigen/src/SparseCore/SparseRef.h +397 -0
  314. include/eigen3/Eigen/src/SparseCore/SparseSelfAdjointView.h +659 -0
  315. include/eigen3/Eigen/src/SparseCore/SparseSolverBase.h +124 -0
  316. include/eigen3/Eigen/src/SparseCore/SparseSparseProductWithPruning.h +198 -0
  317. include/eigen3/Eigen/src/SparseCore/SparseTranspose.h +92 -0
  318. include/eigen3/Eigen/src/SparseCore/SparseTriangularView.h +189 -0
  319. include/eigen3/Eigen/src/SparseCore/SparseUtil.h +186 -0
  320. include/eigen3/Eigen/src/SparseCore/SparseVector.h +480 -0
  321. include/eigen3/Eigen/src/SparseCore/SparseView.h +254 -0
  322. include/eigen3/Eigen/src/SparseCore/TriangularSolver.h +315 -0
  323. include/eigen3/Eigen/src/SparseLU/SparseLU.h +925 -0
  324. include/eigen3/Eigen/src/SparseLU/SparseLUImpl.h +66 -0
  325. include/eigen3/Eigen/src/SparseLU/SparseLU_Memory.h +226 -0
  326. include/eigen3/Eigen/src/SparseLU/SparseLU_Structs.h +110 -0
  327. include/eigen3/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +374 -0
  328. include/eigen3/Eigen/src/SparseLU/SparseLU_Utils.h +80 -0
  329. include/eigen3/Eigen/src/SparseLU/SparseLU_column_bmod.h +181 -0
  330. include/eigen3/Eigen/src/SparseLU/SparseLU_column_dfs.h +179 -0
  331. include/eigen3/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h +107 -0
  332. include/eigen3/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h +121 -0
  333. include/eigen3/Eigen/src/SparseLU/SparseLU_kernel_bmod.h +129 -0
  334. include/eigen3/Eigen/src/SparseLU/SparseLU_panel_bmod.h +222 -0
  335. include/eigen3/Eigen/src/SparseLU/SparseLU_panel_dfs.h +258 -0
  336. include/eigen3/Eigen/src/SparseLU/SparseLU_pivotL.h +137 -0
  337. include/eigen3/Eigen/src/SparseLU/SparseLU_pruneL.h +136 -0
  338. include/eigen3/Eigen/src/SparseLU/SparseLU_relax_snode.h +83 -0
  339. include/eigen3/Eigen/src/SparseQR/SparseQR.h +758 -0
  340. include/eigen3/Eigen/src/StlSupport/StdDeque.h +116 -0
  341. include/eigen3/Eigen/src/StlSupport/StdList.h +106 -0
  342. include/eigen3/Eigen/src/StlSupport/StdVector.h +131 -0
  343. include/eigen3/Eigen/src/StlSupport/details.h +84 -0
  344. include/eigen3/Eigen/src/SuperLUSupport/SuperLUSupport.h +1025 -0
  345. include/eigen3/Eigen/src/UmfPackSupport/UmfPackSupport.h +642 -0
  346. include/eigen3/Eigen/src/misc/Image.h +82 -0
  347. include/eigen3/Eigen/src/misc/Kernel.h +79 -0
  348. include/eigen3/Eigen/src/misc/RealSvd2x2.h +55 -0
  349. include/eigen3/Eigen/src/misc/blas.h +440 -0
  350. include/eigen3/Eigen/src/misc/lapack.h +152 -0
  351. include/eigen3/Eigen/src/misc/lapacke.h +16292 -0
  352. include/eigen3/Eigen/src/misc/lapacke_mangling.h +17 -0
  353. include/eigen3/Eigen/src/plugins/ArrayCwiseBinaryOps.h +431 -0
  354. include/eigen3/Eigen/src/plugins/ArrayCwiseUnaryOps.h +696 -0
  355. include/eigen3/Eigen/src/plugins/BlockMethods.h +1442 -0
  356. include/eigen3/Eigen/src/plugins/CommonCwiseBinaryOps.h +115 -0
  357. include/eigen3/Eigen/src/plugins/CommonCwiseUnaryOps.h +177 -0
  358. include/eigen3/Eigen/src/plugins/IndexedViewMethods.h +262 -0
  359. include/eigen3/Eigen/src/plugins/MatrixCwiseBinaryOps.h +184 -0
  360. include/eigen3/Eigen/src/plugins/MatrixCwiseUnaryOps.h +95 -0
  361. include/eigen3/Eigen/src/plugins/ReshapedMethods.h +149 -0
  362. include/eigen3/signature_of_eigen3_matrix_library +1 -0
  363. include/eigen3/unsupported/Eigen/AdolcForward +159 -0
  364. include/eigen3/unsupported/Eigen/AlignedVector3 +234 -0
  365. include/eigen3/unsupported/Eigen/ArpackSupport +30 -0
  366. include/eigen3/unsupported/Eigen/AutoDiff +48 -0
  367. include/eigen3/unsupported/Eigen/BVH +95 -0
  368. include/eigen3/unsupported/Eigen/CXX11/Tensor +137 -0
  369. include/eigen3/unsupported/Eigen/CXX11/TensorSymmetry +42 -0
  370. include/eigen3/unsupported/Eigen/CXX11/ThreadPool +74 -0
  371. include/eigen3/unsupported/Eigen/CXX11/src/Tensor/Tensor.h +554 -0
  372. include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorArgMax.h +327 -0
  373. include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h +242 -0
  374. include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h +1176 -0
  375. include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h +1559 -0
  376. include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h +1083 -0
  377. include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h +510 -0
  378. include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h +373 -0
  379. include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h +1019 -0
  380. include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorContractionBlocking.h +73 -0
  381. include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorContractionCuda.h +6 -0
  382. include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorContractionGpu.h +1413 -0
  383. include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorContractionMapper.h +575 -0
  384. include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorContractionSycl.h +1650 -0
  385. include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h +1679 -0
  386. include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h +455 -0
  387. include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h +1126 -0
  388. include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorConvolutionSycl.h +536 -0
  389. include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorCostModel.h +213 -0
  390. include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h +342 -0
  391. include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h +137 -0
  392. include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h +6 -0
  393. include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h +104 -0
  394. include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceGpu.h +389 -0
  395. include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h +1048 -0
  396. include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h +409 -0
  397. include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDimensionList.h +234 -0
  398. include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h +493 -0
  399. include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h +229 -0
  400. include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h +980 -0
  401. include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h +701 -0
  402. include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h +389 -0
  403. include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h +669 -0
  404. include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorFixedSize.h +377 -0
  405. include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h +232 -0
  406. include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h +191 -0
  407. include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h +488 -0
  408. include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h +297 -0
  409. include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorGlobalFunctions.h +33 -0
  410. include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorGpuHipCudaDefines.h +99 -0
  411. include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorGpuHipCudaUndefines.h +44 -0
  412. include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h +79 -0
  413. include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h +602 -0
  414. include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h +735 -0
  415. include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorInflation.h +244 -0
  416. include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorInitializer.h +82 -0
  417. include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h +257 -0
  418. include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorLayoutSwap.h +213 -0
  419. include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorMacros.h +98 -0
  420. include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h +327 -0
  421. include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h +311 -0
  422. include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h +1098 -0
  423. include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h +705 -0
  424. include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorPatch.h +286 -0
  425. include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorRandom.h +317 -0
  426. include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h +1000 -0
  427. include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h +6 -0
  428. include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorReductionGpu.h +973 -0
  429. include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorReductionSycl.h +582 -0
  430. include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorRef.h +454 -0
  431. include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h +462 -0
  432. include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h +528 -0
  433. include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorScanSycl.h +513 -0
  434. include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h +466 -0
  435. include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorStorage.h +157 -0
  436. include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorStriding.h +341 -0
  437. include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorTrace.h +299 -0
  438. include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h +264 -0
  439. include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorUInt128.h +249 -0
  440. include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorVolumePatch.h +628 -0
  441. include/eigen3/unsupported/Eigen/CXX11/src/TensorSymmetry/DynamicSymmetry.h +293 -0
  442. include/eigen3/unsupported/Eigen/CXX11/src/TensorSymmetry/StaticSymmetry.h +236 -0
  443. include/eigen3/unsupported/Eigen/CXX11/src/TensorSymmetry/Symmetry.h +338 -0
  444. include/eigen3/unsupported/Eigen/CXX11/src/TensorSymmetry/util/TemplateGroupTheory.h +669 -0
  445. include/eigen3/unsupported/Eigen/CXX11/src/ThreadPool/Barrier.h +67 -0
  446. include/eigen3/unsupported/Eigen/CXX11/src/ThreadPool/EventCount.h +249 -0
  447. include/eigen3/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h +486 -0
  448. include/eigen3/unsupported/Eigen/CXX11/src/ThreadPool/RunQueue.h +236 -0
  449. include/eigen3/unsupported/Eigen/CXX11/src/ThreadPool/ThreadCancel.h +23 -0
  450. include/eigen3/unsupported/Eigen/CXX11/src/ThreadPool/ThreadEnvironment.h +40 -0
  451. include/eigen3/unsupported/Eigen/CXX11/src/ThreadPool/ThreadLocal.h +301 -0
  452. include/eigen3/unsupported/Eigen/CXX11/src/ThreadPool/ThreadPoolInterface.h +48 -0
  453. include/eigen3/unsupported/Eigen/CXX11/src/ThreadPool/ThreadYield.h +20 -0
  454. include/eigen3/unsupported/Eigen/CXX11/src/util/CXX11Meta.h +538 -0
  455. include/eigen3/unsupported/Eigen/CXX11/src/util/CXX11Workarounds.h +88 -0
  456. include/eigen3/unsupported/Eigen/CXX11/src/util/EmulateArray.h +261 -0
  457. include/eigen3/unsupported/Eigen/CXX11/src/util/MaxSizeVector.h +158 -0
  458. include/eigen3/unsupported/Eigen/EulerAngles +43 -0
  459. include/eigen3/unsupported/Eigen/FFT +420 -0
  460. include/eigen3/unsupported/Eigen/IterativeSolvers +51 -0
  461. include/eigen3/unsupported/Eigen/KroneckerProduct +36 -0
  462. include/eigen3/unsupported/Eigen/LevenbergMarquardt +49 -0
  463. include/eigen3/unsupported/Eigen/MPRealSupport +213 -0
  464. include/eigen3/unsupported/Eigen/MatrixFunctions +504 -0
  465. include/eigen3/unsupported/Eigen/MoreVectorization +24 -0
  466. include/eigen3/unsupported/Eigen/NonLinearOptimization +140 -0
  467. include/eigen3/unsupported/Eigen/NumericalDiff +56 -0
  468. include/eigen3/unsupported/Eigen/OpenGLSupport +322 -0
  469. include/eigen3/unsupported/Eigen/Polynomials +137 -0
  470. include/eigen3/unsupported/Eigen/Skyline +39 -0
  471. include/eigen3/unsupported/Eigen/SparseExtra +54 -0
  472. include/eigen3/unsupported/Eigen/SpecialFunctions +103 -0
  473. include/eigen3/unsupported/Eigen/Splines +35 -0
  474. include/eigen3/unsupported/Eigen/src/AutoDiff/AutoDiffJacobian.h +108 -0
  475. include/eigen3/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h +730 -0
  476. include/eigen3/unsupported/Eigen/src/AutoDiff/AutoDiffVector.h +220 -0
  477. include/eigen3/unsupported/Eigen/src/BVH/BVAlgorithms.h +293 -0
  478. include/eigen3/unsupported/Eigen/src/BVH/KdBVH.h +223 -0
  479. include/eigen3/unsupported/Eigen/src/Eigenvalues/ArpackSelfAdjointEigenSolver.h +790 -0
  480. include/eigen3/unsupported/Eigen/src/EulerAngles/EulerAngles.h +356 -0
  481. include/eigen3/unsupported/Eigen/src/EulerAngles/EulerSystem.h +306 -0
  482. include/eigen3/unsupported/Eigen/src/FFT/ei_fftw_impl.h +261 -0
  483. include/eigen3/unsupported/Eigen/src/FFT/ei_kissfft_impl.h +449 -0
  484. include/eigen3/unsupported/Eigen/src/IterativeSolvers/ConstrainedConjGrad.h +187 -0
  485. include/eigen3/unsupported/Eigen/src/IterativeSolvers/DGMRES.h +511 -0
  486. include/eigen3/unsupported/Eigen/src/IterativeSolvers/GMRES.h +335 -0
  487. include/eigen3/unsupported/Eigen/src/IterativeSolvers/IDRS.h +436 -0
  488. include/eigen3/unsupported/Eigen/src/IterativeSolvers/IncompleteLU.h +90 -0
  489. include/eigen3/unsupported/Eigen/src/IterativeSolvers/IterationController.h +154 -0
  490. include/eigen3/unsupported/Eigen/src/IterativeSolvers/MINRES.h +267 -0
  491. include/eigen3/unsupported/Eigen/src/IterativeSolvers/Scaling.h +193 -0
  492. include/eigen3/unsupported/Eigen/src/KroneckerProduct/KroneckerTensorProduct.h +305 -0
  493. include/eigen3/unsupported/Eigen/src/LevenbergMarquardt/LMcovar.h +84 -0
  494. include/eigen3/unsupported/Eigen/src/LevenbergMarquardt/LMonestep.h +202 -0
  495. include/eigen3/unsupported/Eigen/src/LevenbergMarquardt/LMpar.h +160 -0
  496. include/eigen3/unsupported/Eigen/src/LevenbergMarquardt/LMqrsolv.h +188 -0
  497. include/eigen3/unsupported/Eigen/src/LevenbergMarquardt/LevenbergMarquardt.h +396 -0
  498. include/eigen3/unsupported/Eigen/src/MatrixFunctions/MatrixExponential.h +441 -0
  499. include/eigen3/unsupported/Eigen/src/MatrixFunctions/MatrixFunction.h +569 -0
  500. include/eigen3/unsupported/Eigen/src/MatrixFunctions/MatrixLogarithm.h +373 -0
  501. include/eigen3/unsupported/Eigen/src/MatrixFunctions/MatrixPower.h +705 -0
  502. include/eigen3/unsupported/Eigen/src/MatrixFunctions/MatrixSquareRoot.h +368 -0
  503. include/eigen3/unsupported/Eigen/src/MatrixFunctions/StemFunction.h +117 -0
  504. include/eigen3/unsupported/Eigen/src/MoreVectorization/MathFunctions.h +95 -0
  505. include/eigen3/unsupported/Eigen/src/NonLinearOptimization/HybridNonLinearSolver.h +601 -0
  506. include/eigen3/unsupported/Eigen/src/NonLinearOptimization/LevenbergMarquardt.h +657 -0
  507. include/eigen3/unsupported/Eigen/src/NonLinearOptimization/chkder.h +66 -0
  508. include/eigen3/unsupported/Eigen/src/NonLinearOptimization/covar.h +70 -0
  509. include/eigen3/unsupported/Eigen/src/NonLinearOptimization/dogleg.h +107 -0
  510. include/eigen3/unsupported/Eigen/src/NonLinearOptimization/fdjac1.h +79 -0
  511. include/eigen3/unsupported/Eigen/src/NonLinearOptimization/lmpar.h +298 -0
  512. include/eigen3/unsupported/Eigen/src/NonLinearOptimization/qrsolv.h +91 -0
  513. include/eigen3/unsupported/Eigen/src/NonLinearOptimization/r1mpyq.h +30 -0
  514. include/eigen3/unsupported/Eigen/src/NonLinearOptimization/r1updt.h +99 -0
  515. include/eigen3/unsupported/Eigen/src/NonLinearOptimization/rwupdt.h +49 -0
  516. include/eigen3/unsupported/Eigen/src/NumericalDiff/NumericalDiff.h +130 -0
  517. include/eigen3/unsupported/Eigen/src/Polynomials/Companion.h +280 -0
  518. include/eigen3/unsupported/Eigen/src/Polynomials/PolynomialSolver.h +429 -0
  519. include/eigen3/unsupported/Eigen/src/Polynomials/PolynomialUtils.h +143 -0
  520. include/eigen3/unsupported/Eigen/src/Skyline/SkylineInplaceLU.h +352 -0
  521. include/eigen3/unsupported/Eigen/src/Skyline/SkylineMatrix.h +862 -0
  522. include/eigen3/unsupported/Eigen/src/Skyline/SkylineMatrixBase.h +212 -0
  523. include/eigen3/unsupported/Eigen/src/Skyline/SkylineProduct.h +295 -0
  524. include/eigen3/unsupported/Eigen/src/Skyline/SkylineStorage.h +259 -0
  525. include/eigen3/unsupported/Eigen/src/Skyline/SkylineUtil.h +89 -0
  526. include/eigen3/unsupported/Eigen/src/SparseExtra/BlockOfDynamicSparseMatrix.h +122 -0
  527. include/eigen3/unsupported/Eigen/src/SparseExtra/BlockSparseMatrix.h +1079 -0
  528. include/eigen3/unsupported/Eigen/src/SparseExtra/DynamicSparseMatrix.h +404 -0
  529. include/eigen3/unsupported/Eigen/src/SparseExtra/MarketIO.h +282 -0
  530. include/eigen3/unsupported/Eigen/src/SparseExtra/MatrixMarketIterator.h +247 -0
  531. include/eigen3/unsupported/Eigen/src/SparseExtra/RandomSetter.h +349 -0
  532. include/eigen3/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsArrayAPI.h +286 -0
  533. include/eigen3/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsBFloat16.h +68 -0
  534. include/eigen3/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsFunctors.h +357 -0
  535. include/eigen3/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsHalf.h +66 -0
  536. include/eigen3/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsImpl.h +1959 -0
  537. include/eigen3/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsPacketMath.h +118 -0
  538. include/eigen3/unsupported/Eigen/src/SpecialFunctions/HipVectorCompatibility.h +67 -0
  539. include/eigen3/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsArrayAPI.h +167 -0
  540. include/eigen3/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsBFloat16.h +58 -0
  541. include/eigen3/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsFunctors.h +330 -0
  542. include/eigen3/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsHalf.h +58 -0
  543. include/eigen3/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h +2051 -0
  544. include/eigen3/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsPacketMath.h +79 -0
  545. include/eigen3/unsupported/Eigen/src/SpecialFunctions/arch/AVX/BesselFunctions.h +46 -0
  546. include/eigen3/unsupported/Eigen/src/SpecialFunctions/arch/AVX/SpecialFunctions.h +16 -0
  547. include/eigen3/unsupported/Eigen/src/SpecialFunctions/arch/AVX512/BesselFunctions.h +51 -0
  548. include/eigen3/unsupported/Eigen/src/SpecialFunctions/arch/AVX512/SpecialFunctions.h +16 -0
  549. include/eigen3/unsupported/Eigen/src/SpecialFunctions/arch/GPU/SpecialFunctions.h +369 -0
  550. include/eigen3/unsupported/Eigen/src/SpecialFunctions/arch/NEON/BesselFunctions.h +54 -0
  551. include/eigen3/unsupported/Eigen/src/SpecialFunctions/arch/NEON/SpecialFunctions.h +34 -0
  552. include/eigen3/unsupported/Eigen/src/Splines/Spline.h +507 -0
  553. include/eigen3/unsupported/Eigen/src/Splines/SplineFitting.h +431 -0
  554. include/eigen3/unsupported/Eigen/src/Splines/SplineFwd.h +93 -0
@@ -0,0 +1,1646 @@
1
+ // This file is part of Eigen, a lightweight C++ template library
2
+ // for linear algebra.
3
+ //
4
+ // Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
5
+ //
6
+ // This Source Code Form is subject to the terms of the Mozilla
7
+ // Public License v. 2.0. If a copy of the MPL was not distributed
8
+ // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9
+
10
+ #ifndef EIGEN_PACKET_MATH_GPU_H
11
+ #define EIGEN_PACKET_MATH_GPU_H
12
+
13
+ namespace Eigen {
14
+
15
+ namespace internal {
16
+
17
+ // Read-only data cached load available.
18
+ #if defined(EIGEN_HIP_DEVICE_COMPILE) || (defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 350)
19
+ #define EIGEN_GPU_HAS_LDG 1
20
+ #endif
21
+
22
+ // FP16 math available.
23
+ #if (defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 530)
24
+ #define EIGEN_CUDA_HAS_FP16_ARITHMETIC 1
25
+ #endif
26
+
27
+ #if defined(EIGEN_HIP_DEVICE_COMPILE) || defined(EIGEN_CUDA_HAS_FP16_ARITHMETIC)
28
+ #define EIGEN_GPU_HAS_FP16_ARITHMETIC 1
29
+ #endif
30
+
31
+ // Make sure this is only available when targeting a GPU: we don't want to
32
+ // introduce conflicts between these packet_traits definitions and the ones
33
+ // we'll use on the host side (SSE, AVX, ...)
34
+ #if defined(EIGEN_GPUCC) && defined(EIGEN_USE_GPU)
35
+
36
+ template<> struct is_arithmetic<float4> { enum { value = true }; };
37
+ template<> struct is_arithmetic<double2> { enum { value = true }; };
38
+
39
+ template<> struct packet_traits<float> : default_packet_traits
40
+ {
41
+ typedef float4 type;
42
+ typedef float4 half;
43
+ enum {
44
+ Vectorizable = 1,
45
+ AlignedOnScalar = 1,
46
+ size=4,
47
+ HasHalfPacket = 0,
48
+
49
+ HasDiv = 1,
50
+ HasSin = 0,
51
+ HasCos = 0,
52
+ HasLog = 1,
53
+ HasExp = 1,
54
+ HasSqrt = 1,
55
+ HasRsqrt = 1,
56
+ HasLGamma = 1,
57
+ HasDiGamma = 1,
58
+ HasZeta = 1,
59
+ HasPolygamma = 1,
60
+ HasErf = 1,
61
+ HasErfc = 1,
62
+ HasNdtri = 1,
63
+ HasBessel = 1,
64
+ HasIGamma = 1,
65
+ HasIGammaDerA = 1,
66
+ HasGammaSampleDerAlpha = 1,
67
+ HasIGammac = 1,
68
+ HasBetaInc = 1,
69
+
70
+ HasBlend = 0,
71
+ HasFloor = 1,
72
+ };
73
+ };
74
+
75
+ template<> struct packet_traits<double> : default_packet_traits
76
+ {
77
+ typedef double2 type;
78
+ typedef double2 half;
79
+ enum {
80
+ Vectorizable = 1,
81
+ AlignedOnScalar = 1,
82
+ size=2,
83
+ HasHalfPacket = 0,
84
+
85
+ HasDiv = 1,
86
+ HasLog = 1,
87
+ HasExp = 1,
88
+ HasSqrt = 1,
89
+ HasRsqrt = 1,
90
+ HasLGamma = 1,
91
+ HasDiGamma = 1,
92
+ HasZeta = 1,
93
+ HasPolygamma = 1,
94
+ HasErf = 1,
95
+ HasErfc = 1,
96
+ HasNdtri = 1,
97
+ HasBessel = 1,
98
+ HasIGamma = 1,
99
+ HasIGammaDerA = 1,
100
+ HasGammaSampleDerAlpha = 1,
101
+ HasIGammac = 1,
102
+ HasBetaInc = 1,
103
+
104
+ HasBlend = 0,
105
+ HasFloor = 1,
106
+ };
107
+ };
108
+
109
+
110
+ template<> struct unpacket_traits<float4> { typedef float type; enum {size=4, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false}; typedef float4 half; };
111
+ template<> struct unpacket_traits<double2> { typedef double type; enum {size=2, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false}; typedef double2 half; };
112
+
113
+ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pset1<float4>(const float& from) {
114
+ return make_float4(from, from, from, from);
115
+ }
116
+ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pset1<double2>(const double& from) {
117
+ return make_double2(from, from);
118
+ }
119
+
120
+ // We need to distinguish ‘clang as the CUDA compiler’ from ‘clang as the host compiler,
121
+ // invoked by NVCC’ (e.g. on MacOS). The former needs to see both host and device implementation
122
+ // of the functions, while the latter can only deal with one of them.
123
+ #if defined(EIGEN_CUDA_ARCH) || defined(EIGEN_HIPCC) || (defined(EIGEN_CUDACC) && EIGEN_COMP_CLANG && !EIGEN_COMP_NVCC)
124
+
125
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float bitwise_and(const float& a,
126
+ const float& b) {
127
+ return __int_as_float(__float_as_int(a) & __float_as_int(b));
128
+ }
129
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double bitwise_and(const double& a,
130
+ const double& b) {
131
+ return __longlong_as_double(__double_as_longlong(a) &
132
+ __double_as_longlong(b));
133
+ }
134
+
135
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float bitwise_or(const float& a,
136
+ const float& b) {
137
+ return __int_as_float(__float_as_int(a) | __float_as_int(b));
138
+ }
139
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double bitwise_or(const double& a,
140
+ const double& b) {
141
+ return __longlong_as_double(__double_as_longlong(a) |
142
+ __double_as_longlong(b));
143
+ }
144
+
145
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float bitwise_xor(const float& a,
146
+ const float& b) {
147
+ return __int_as_float(__float_as_int(a) ^ __float_as_int(b));
148
+ }
149
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double bitwise_xor(const double& a,
150
+ const double& b) {
151
+ return __longlong_as_double(__double_as_longlong(a) ^
152
+ __double_as_longlong(b));
153
+ }
154
+
155
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float bitwise_andnot(const float& a,
156
+ const float& b) {
157
+ return __int_as_float(__float_as_int(a) & ~__float_as_int(b));
158
+ }
159
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double bitwise_andnot(const double& a,
160
+ const double& b) {
161
+ return __longlong_as_double(__double_as_longlong(a) &
162
+ ~__double_as_longlong(b));
163
+ }
164
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float eq_mask(const float& a,
165
+ const float& b) {
166
+ return __int_as_float(a == b ? 0xffffffffu : 0u);
167
+ }
168
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double eq_mask(const double& a,
169
+ const double& b) {
170
+ return __longlong_as_double(a == b ? 0xffffffffffffffffull : 0ull);
171
+ }
172
+
173
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float lt_mask(const float& a,
174
+ const float& b) {
175
+ return __int_as_float(a < b ? 0xffffffffu : 0u);
176
+ }
177
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double lt_mask(const double& a,
178
+ const double& b) {
179
+ return __longlong_as_double(a < b ? 0xffffffffffffffffull : 0ull);
180
+ }
181
+
182
+ template <>
183
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pand<float4>(const float4& a,
184
+ const float4& b) {
185
+ return make_float4(bitwise_and(a.x, b.x), bitwise_and(a.y, b.y),
186
+ bitwise_and(a.z, b.z), bitwise_and(a.w, b.w));
187
+ }
188
+ template <>
189
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pand<double2>(const double2& a,
190
+ const double2& b) {
191
+ return make_double2(bitwise_and(a.x, b.x), bitwise_and(a.y, b.y));
192
+ }
193
+
194
+ template <>
195
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 por<float4>(const float4& a,
196
+ const float4& b) {
197
+ return make_float4(bitwise_or(a.x, b.x), bitwise_or(a.y, b.y),
198
+ bitwise_or(a.z, b.z), bitwise_or(a.w, b.w));
199
+ }
200
+ template <>
201
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 por<double2>(const double2& a,
202
+ const double2& b) {
203
+ return make_double2(bitwise_or(a.x, b.x), bitwise_or(a.y, b.y));
204
+ }
205
+
206
+ template <>
207
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pxor<float4>(const float4& a,
208
+ const float4& b) {
209
+ return make_float4(bitwise_xor(a.x, b.x), bitwise_xor(a.y, b.y),
210
+ bitwise_xor(a.z, b.z), bitwise_xor(a.w, b.w));
211
+ }
212
+ template <>
213
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pxor<double2>(const double2& a,
214
+ const double2& b) {
215
+ return make_double2(bitwise_xor(a.x, b.x), bitwise_xor(a.y, b.y));
216
+ }
217
+
218
+ template <>
219
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pandnot<float4>(const float4& a,
220
+ const float4& b) {
221
+ return make_float4(bitwise_andnot(a.x, b.x), bitwise_andnot(a.y, b.y),
222
+ bitwise_andnot(a.z, b.z), bitwise_andnot(a.w, b.w));
223
+ }
224
+ template <>
225
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2
226
+ pandnot<double2>(const double2& a, const double2& b) {
227
+ return make_double2(bitwise_andnot(a.x, b.x), bitwise_andnot(a.y, b.y));
228
+ }
229
+
230
+ template <>
231
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pcmp_eq<float4>(const float4& a,
232
+ const float4& b) {
233
+ return make_float4(eq_mask(a.x, b.x), eq_mask(a.y, b.y), eq_mask(a.z, b.z),
234
+ eq_mask(a.w, b.w));
235
+ }
236
+ template <>
237
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pcmp_lt<float4>(const float4& a,
238
+ const float4& b) {
239
+ return make_float4(lt_mask(a.x, b.x), lt_mask(a.y, b.y), lt_mask(a.z, b.z),
240
+ lt_mask(a.w, b.w));
241
+ }
242
+ template <>
243
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2
244
+ pcmp_eq<double2>(const double2& a, const double2& b) {
245
+ return make_double2(eq_mask(a.x, b.x), eq_mask(a.y, b.y));
246
+ }
247
+ template <>
248
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2
249
+ pcmp_lt<double2>(const double2& a, const double2& b) {
250
+ return make_double2(lt_mask(a.x, b.x), lt_mask(a.y, b.y));
251
+ }
252
+ #endif // defined(EIGEN_CUDA_ARCH) || defined(EIGEN_HIPCC) || (defined(EIGEN_CUDACC) && EIGEN_COMP_CLANG && !EIGEN_COMP_NVCC)
253
+
254
+ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 plset<float4>(const float& a) {
255
+ return make_float4(a, a+1, a+2, a+3);
256
+ }
257
+ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 plset<double2>(const double& a) {
258
+ return make_double2(a, a+1);
259
+ }
260
+
261
+ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 padd<float4>(const float4& a, const float4& b) {
262
+ return make_float4(a.x+b.x, a.y+b.y, a.z+b.z, a.w+b.w);
263
+ }
264
+ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 padd<double2>(const double2& a, const double2& b) {
265
+ return make_double2(a.x+b.x, a.y+b.y);
266
+ }
267
+
268
+ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 psub<float4>(const float4& a, const float4& b) {
269
+ return make_float4(a.x-b.x, a.y-b.y, a.z-b.z, a.w-b.w);
270
+ }
271
+ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 psub<double2>(const double2& a, const double2& b) {
272
+ return make_double2(a.x-b.x, a.y-b.y);
273
+ }
274
+
275
+ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pnegate(const float4& a) {
276
+ return make_float4(-a.x, -a.y, -a.z, -a.w);
277
+ }
278
+ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pnegate(const double2& a) {
279
+ return make_double2(-a.x, -a.y);
280
+ }
281
+
282
+ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pconj(const float4& a) { return a; }
283
+ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pconj(const double2& a) { return a; }
284
+
285
+ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pmul<float4>(const float4& a, const float4& b) {
286
+ return make_float4(a.x*b.x, a.y*b.y, a.z*b.z, a.w*b.w);
287
+ }
288
+ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pmul<double2>(const double2& a, const double2& b) {
289
+ return make_double2(a.x*b.x, a.y*b.y);
290
+ }
291
+
292
+ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pdiv<float4>(const float4& a, const float4& b) {
293
+ return make_float4(a.x/b.x, a.y/b.y, a.z/b.z, a.w/b.w);
294
+ }
295
+ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pdiv<double2>(const double2& a, const double2& b) {
296
+ return make_double2(a.x/b.x, a.y/b.y);
297
+ }
298
+
299
+ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pmin<float4>(const float4& a, const float4& b) {
300
+ return make_float4(fminf(a.x, b.x), fminf(a.y, b.y), fminf(a.z, b.z), fminf(a.w, b.w));
301
+ }
302
+ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pmin<double2>(const double2& a, const double2& b) {
303
+ return make_double2(fmin(a.x, b.x), fmin(a.y, b.y));
304
+ }
305
+
306
+ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pmax<float4>(const float4& a, const float4& b) {
307
+ return make_float4(fmaxf(a.x, b.x), fmaxf(a.y, b.y), fmaxf(a.z, b.z), fmaxf(a.w, b.w));
308
+ }
309
+ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pmax<double2>(const double2& a, const double2& b) {
310
+ return make_double2(fmax(a.x, b.x), fmax(a.y, b.y));
311
+ }
312
+
313
+ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pload<float4>(const float* from) {
314
+ return *reinterpret_cast<const float4*>(from);
315
+ }
316
+
317
+ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pload<double2>(const double* from) {
318
+ return *reinterpret_cast<const double2*>(from);
319
+ }
320
+
321
+ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 ploadu<float4>(const float* from) {
322
+ return make_float4(from[0], from[1], from[2], from[3]);
323
+ }
324
+ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 ploadu<double2>(const double* from) {
325
+ return make_double2(from[0], from[1]);
326
+ }
327
+
328
+ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 ploaddup<float4>(const float* from) {
329
+ return make_float4(from[0], from[0], from[1], from[1]);
330
+ }
331
+ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 ploaddup<double2>(const double* from) {
332
+ return make_double2(from[0], from[0]);
333
+ }
334
+
335
+ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstore<float>(float* to, const float4& from) {
336
+ *reinterpret_cast<float4*>(to) = from;
337
+ }
338
+
339
+ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstore<double>(double* to, const double2& from) {
340
+ *reinterpret_cast<double2*>(to) = from;
341
+ }
342
+
343
+ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const float4& from) {
344
+ to[0] = from.x;
345
+ to[1] = from.y;
346
+ to[2] = from.z;
347
+ to[3] = from.w;
348
+ }
349
+
350
+ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstoreu<double>(double* to, const double2& from) {
351
+ to[0] = from.x;
352
+ to[1] = from.y;
353
+ }
354
+
355
+ template<>
356
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float4 ploadt_ro<float4, Aligned>(const float* from) {
357
+ #if defined(EIGEN_GPU_HAS_LDG)
358
+ return __ldg((const float4*)from);
359
+ #else
360
+ return make_float4(from[0], from[1], from[2], from[3]);
361
+ #endif
362
+ }
363
+ template<>
364
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double2 ploadt_ro<double2, Aligned>(const double* from) {
365
+ #if defined(EIGEN_GPU_HAS_LDG)
366
+ return __ldg((const double2*)from);
367
+ #else
368
+ return make_double2(from[0], from[1]);
369
+ #endif
370
+ }
371
+
372
+ template<>
373
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float4 ploadt_ro<float4, Unaligned>(const float* from) {
374
+ #if defined(EIGEN_GPU_HAS_LDG)
375
+ return make_float4(__ldg(from+0), __ldg(from+1), __ldg(from+2), __ldg(from+3));
376
+ #else
377
+ return make_float4(from[0], from[1], from[2], from[3]);
378
+ #endif
379
+ }
380
+ template<>
381
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double2 ploadt_ro<double2, Unaligned>(const double* from) {
382
+ #if defined(EIGEN_GPU_HAS_LDG)
383
+ return make_double2(__ldg(from+0), __ldg(from+1));
384
+ #else
385
+ return make_double2(from[0], from[1]);
386
+ #endif
387
+ }
388
+
389
+ template<> EIGEN_DEVICE_FUNC inline float4 pgather<float, float4>(const float* from, Index stride) {
390
+ return make_float4(from[0*stride], from[1*stride], from[2*stride], from[3*stride]);
391
+ }
392
+
393
+ template<> EIGEN_DEVICE_FUNC inline double2 pgather<double, double2>(const double* from, Index stride) {
394
+ return make_double2(from[0*stride], from[1*stride]);
395
+ }
396
+
397
+ template<> EIGEN_DEVICE_FUNC inline void pscatter<float, float4>(float* to, const float4& from, Index stride) {
398
+ to[stride*0] = from.x;
399
+ to[stride*1] = from.y;
400
+ to[stride*2] = from.z;
401
+ to[stride*3] = from.w;
402
+ }
403
+ template<> EIGEN_DEVICE_FUNC inline void pscatter<double, double2>(double* to, const double2& from, Index stride) {
404
+ to[stride*0] = from.x;
405
+ to[stride*1] = from.y;
406
+ }
407
+
408
+ template<> EIGEN_DEVICE_FUNC inline float pfirst<float4>(const float4& a) {
409
+ return a.x;
410
+ }
411
+ template<> EIGEN_DEVICE_FUNC inline double pfirst<double2>(const double2& a) {
412
+ return a.x;
413
+ }
414
+
415
+ template<> EIGEN_DEVICE_FUNC inline float predux<float4>(const float4& a) {
416
+ return a.x + a.y + a.z + a.w;
417
+ }
418
+ template<> EIGEN_DEVICE_FUNC inline double predux<double2>(const double2& a) {
419
+ return a.x + a.y;
420
+ }
421
+
422
+ template<> EIGEN_DEVICE_FUNC inline float predux_max<float4>(const float4& a) {
423
+ return fmaxf(fmaxf(a.x, a.y), fmaxf(a.z, a.w));
424
+ }
425
+ template<> EIGEN_DEVICE_FUNC inline double predux_max<double2>(const double2& a) {
426
+ return fmax(a.x, a.y);
427
+ }
428
+
429
+ template<> EIGEN_DEVICE_FUNC inline float predux_min<float4>(const float4& a) {
430
+ return fminf(fminf(a.x, a.y), fminf(a.z, a.w));
431
+ }
432
+ template<> EIGEN_DEVICE_FUNC inline double predux_min<double2>(const double2& a) {
433
+ return fmin(a.x, a.y);
434
+ }
435
+
436
+ template<> EIGEN_DEVICE_FUNC inline float predux_mul<float4>(const float4& a) {
437
+ return a.x * a.y * a.z * a.w;
438
+ }
439
+ template<> EIGEN_DEVICE_FUNC inline double predux_mul<double2>(const double2& a) {
440
+ return a.x * a.y;
441
+ }
442
+
443
+ template<> EIGEN_DEVICE_FUNC inline float4 pabs<float4>(const float4& a) {
444
+ return make_float4(fabsf(a.x), fabsf(a.y), fabsf(a.z), fabsf(a.w));
445
+ }
446
+ template<> EIGEN_DEVICE_FUNC inline double2 pabs<double2>(const double2& a) {
447
+ return make_double2(fabs(a.x), fabs(a.y));
448
+ }
449
+
450
+ template<> EIGEN_DEVICE_FUNC inline float4 pfloor<float4>(const float4& a) {
451
+ return make_float4(floorf(a.x), floorf(a.y), floorf(a.z), floorf(a.w));
452
+ }
453
+ template<> EIGEN_DEVICE_FUNC inline double2 pfloor<double2>(const double2& a) {
454
+ return make_double2(floor(a.x), floor(a.y));
455
+ }
456
+
457
+ EIGEN_DEVICE_FUNC inline void
458
+ ptranspose(PacketBlock<float4,4>& kernel) {
459
+ float tmp = kernel.packet[0].y;
460
+ kernel.packet[0].y = kernel.packet[1].x;
461
+ kernel.packet[1].x = tmp;
462
+
463
+ tmp = kernel.packet[0].z;
464
+ kernel.packet[0].z = kernel.packet[2].x;
465
+ kernel.packet[2].x = tmp;
466
+
467
+ tmp = kernel.packet[0].w;
468
+ kernel.packet[0].w = kernel.packet[3].x;
469
+ kernel.packet[3].x = tmp;
470
+
471
+ tmp = kernel.packet[1].z;
472
+ kernel.packet[1].z = kernel.packet[2].y;
473
+ kernel.packet[2].y = tmp;
474
+
475
+ tmp = kernel.packet[1].w;
476
+ kernel.packet[1].w = kernel.packet[3].y;
477
+ kernel.packet[3].y = tmp;
478
+
479
+ tmp = kernel.packet[2].w;
480
+ kernel.packet[2].w = kernel.packet[3].z;
481
+ kernel.packet[3].z = tmp;
482
+ }
483
+
484
+ EIGEN_DEVICE_FUNC inline void
485
+ ptranspose(PacketBlock<double2,2>& kernel) {
486
+ double tmp = kernel.packet[0].y;
487
+ kernel.packet[0].y = kernel.packet[1].x;
488
+ kernel.packet[1].x = tmp;
489
+ }
490
+
491
+ #endif // defined(EIGEN_GPUCC) && defined(EIGEN_USE_GPU)
492
+
493
+ // Half-packet functions are not available on the host for CUDA 9.0-9.2, only
494
+ // on device. There is no benefit to using them on the host anyways, since they are
495
+ // emulated.
496
+ #if (defined(EIGEN_HAS_CUDA_FP16) || defined(EIGEN_HAS_HIP_FP16)) && defined(EIGEN_GPU_COMPILE_PHASE)
497
+
498
+ typedef ulonglong2 Packet4h2;
499
+ template<> struct unpacket_traits<Packet4h2> { typedef Eigen::half type; enum {size=8, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false}; typedef Packet4h2 half; };
500
+ template<> struct is_arithmetic<Packet4h2> { enum { value = true }; };
501
+
502
+ template<> struct unpacket_traits<half2> { typedef Eigen::half type; enum {size=2, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false}; typedef half2 half; };
503
+ template<> struct is_arithmetic<half2> { enum { value = true }; };
504
+
505
+ template<> struct packet_traits<Eigen::half> : default_packet_traits
506
+ {
507
+ typedef Packet4h2 type;
508
+ typedef Packet4h2 half;
509
+ enum {
510
+ Vectorizable = 1,
511
+ AlignedOnScalar = 1,
512
+ size=8,
513
+ HasHalfPacket = 0,
514
+ HasAdd = 1,
515
+ HasSub = 1,
516
+ HasMul = 1,
517
+ HasDiv = 1,
518
+ HasSqrt = 1,
519
+ HasRsqrt = 1,
520
+ HasExp = 1,
521
+ HasExpm1 = 1,
522
+ HasLog = 1,
523
+ HasLog1p = 1
524
+ };
525
+ };
526
+
527
+ template<>
528
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pset1<half2>(const Eigen::half& from) {
529
+ return __half2half2(from);
530
+ }
531
+
532
+ template <>
533
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2
534
+ pset1<Packet4h2>(const Eigen::half& from) {
535
+ Packet4h2 r;
536
+ half2* p_alias = reinterpret_cast<half2*>(&r);
537
+ p_alias[0] = pset1<half2>(from);
538
+ p_alias[1] = pset1<half2>(from);
539
+ p_alias[2] = pset1<half2>(from);
540
+ p_alias[3] = pset1<half2>(from);
541
+ return r;
542
+ }
543
+
544
+ namespace {
545
+
546
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pload(const Eigen::half* from) {
547
+ return *reinterpret_cast<const half2*>(from);
548
+ }
549
+
550
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 ploadu(const Eigen::half* from) {
551
+ return __halves2half2(from[0], from[1]);
552
+ }
553
+
554
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 ploaddup(const Eigen::half* from) {
555
+ return __halves2half2(from[0], from[0]);
556
+ }
557
+
558
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstore(Eigen::half* to,
559
+ const half2& from) {
560
+ *reinterpret_cast<half2*>(to) = from;
561
+ }
562
+
563
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstoreu(Eigen::half* to,
564
+ const half2& from) {
565
+ to[0] = __low2half(from);
566
+ to[1] = __high2half(from);
567
+ }
568
+
569
+
570
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE half2 ploadt_ro_aligned(
571
+ const Eigen::half* from) {
572
+ #if defined(EIGEN_GPU_HAS_LDG)
573
+ // Input is guaranteed to be properly aligned.
574
+ return __ldg(reinterpret_cast<const half2*>(from));
575
+ #else
576
+ return __halves2half2(*(from+0), *(from+1));
577
+ #endif
578
+ }
579
+
580
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE half2 ploadt_ro_unaligned(
581
+ const Eigen::half* from) {
582
+ #if defined(EIGEN_GPU_HAS_LDG)
583
+ return __halves2half2(__ldg(from+0), __ldg(from+1));
584
+ #else
585
+ return __halves2half2(*(from+0), *(from+1));
586
+ #endif
587
+ }
588
+
589
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pgather(const Eigen::half* from,
590
+ Index stride) {
591
+ return __halves2half2(from[0*stride], from[1*stride]);
592
+ }
593
+
594
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pscatter(
595
+ Eigen::half* to, const half2& from, Index stride) {
596
+ to[stride*0] = __low2half(from);
597
+ to[stride*1] = __high2half(from);
598
+ }
599
+
600
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half pfirst(const half2& a) {
601
+ return __low2half(a);
602
+ }
603
+
604
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pabs(const half2& a) {
605
+ half a1 = __low2half(a);
606
+ half a2 = __high2half(a);
607
+ half result1 = half_impl::raw_uint16_to_half(a1.x & 0x7FFF);
608
+ half result2 = half_impl::raw_uint16_to_half(a2.x & 0x7FFF);
609
+ return __halves2half2(result1, result2);
610
+ }
611
+
612
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 ptrue(const half2& /*a*/) {
613
+ half true_half = half_impl::raw_uint16_to_half(0xffffu);
614
+ return pset1<half2>(true_half);
615
+ }
616
+
617
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pzero(const half2& /*a*/) {
618
+ half false_half = half_impl::raw_uint16_to_half(0x0000u);
619
+ return pset1<half2>(false_half);
620
+ }
621
+
622
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void
623
+ ptranspose(PacketBlock<half2,2>& kernel) {
624
+ __half a1 = __low2half(kernel.packet[0]);
625
+ __half a2 = __high2half(kernel.packet[0]);
626
+ __half b1 = __low2half(kernel.packet[1]);
627
+ __half b2 = __high2half(kernel.packet[1]);
628
+ kernel.packet[0] = __halves2half2(a1, b1);
629
+ kernel.packet[1] = __halves2half2(a2, b2);
630
+ }
631
+
632
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 plset(const Eigen::half& a) {
633
+ #if defined(EIGEN_GPU_HAS_FP16_ARITHMETIC)
634
+ return __halves2half2(a, __hadd(a, __float2half(1.0f)));
635
+ #else
636
+ float f = __half2float(a) + 1.0f;
637
+ return __halves2half2(a, __float2half(f));
638
+ #endif
639
+ }
640
+
641
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pselect(const half2& mask,
642
+ const half2& a,
643
+ const half2& b) {
644
+ half mask_low = __low2half(mask);
645
+ half mask_high = __high2half(mask);
646
+ half result_low = mask_low == half(0) ? __low2half(b) : __low2half(a);
647
+ half result_high = mask_high == half(0) ? __high2half(b) : __high2half(a);
648
+ return __halves2half2(result_low, result_high);
649
+ }
650
+
651
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pcmp_eq(const half2& a,
652
+ const half2& b) {
653
+ half true_half = half_impl::raw_uint16_to_half(0xffffu);
654
+ half false_half = half_impl::raw_uint16_to_half(0x0000u);
655
+ half a1 = __low2half(a);
656
+ half a2 = __high2half(a);
657
+ half b1 = __low2half(b);
658
+ half b2 = __high2half(b);
659
+ half eq1 = __half2float(a1) == __half2float(b1) ? true_half : false_half;
660
+ half eq2 = __half2float(a2) == __half2float(b2) ? true_half : false_half;
661
+ return __halves2half2(eq1, eq2);
662
+ }
663
+
664
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pcmp_lt(const half2& a,
665
+ const half2& b) {
666
+ half true_half = half_impl::raw_uint16_to_half(0xffffu);
667
+ half false_half = half_impl::raw_uint16_to_half(0x0000u);
668
+ half a1 = __low2half(a);
669
+ half a2 = __high2half(a);
670
+ half b1 = __low2half(b);
671
+ half b2 = __high2half(b);
672
+ half eq1 = __half2float(a1) < __half2float(b1) ? true_half : false_half;
673
+ half eq2 = __half2float(a2) < __half2float(b2) ? true_half : false_half;
674
+ return __halves2half2(eq1, eq2);
675
+ }
676
+
677
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pand(const half2& a,
678
+ const half2& b) {
679
+ half a1 = __low2half(a);
680
+ half a2 = __high2half(a);
681
+ half b1 = __low2half(b);
682
+ half b2 = __high2half(b);
683
+ half result1 = half_impl::raw_uint16_to_half(a1.x & b1.x);
684
+ half result2 = half_impl::raw_uint16_to_half(a2.x & b2.x);
685
+ return __halves2half2(result1, result2);
686
+ }
687
+
688
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 por(const half2& a,
689
+ const half2& b) {
690
+ half a1 = __low2half(a);
691
+ half a2 = __high2half(a);
692
+ half b1 = __low2half(b);
693
+ half b2 = __high2half(b);
694
+ half result1 = half_impl::raw_uint16_to_half(a1.x | b1.x);
695
+ half result2 = half_impl::raw_uint16_to_half(a2.x | b2.x);
696
+ return __halves2half2(result1, result2);
697
+ }
698
+
699
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pxor(const half2& a,
700
+ const half2& b) {
701
+ half a1 = __low2half(a);
702
+ half a2 = __high2half(a);
703
+ half b1 = __low2half(b);
704
+ half b2 = __high2half(b);
705
+ half result1 = half_impl::raw_uint16_to_half(a1.x ^ b1.x);
706
+ half result2 = half_impl::raw_uint16_to_half(a2.x ^ b2.x);
707
+ return __halves2half2(result1, result2);
708
+ }
709
+
710
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pandnot(const half2& a,
711
+ const half2& b) {
712
+ half a1 = __low2half(a);
713
+ half a2 = __high2half(a);
714
+ half b1 = __low2half(b);
715
+ half b2 = __high2half(b);
716
+ half result1 = half_impl::raw_uint16_to_half(a1.x & ~b1.x);
717
+ half result2 = half_impl::raw_uint16_to_half(a2.x & ~b2.x);
718
+ return __halves2half2(result1, result2);
719
+ }
720
+
721
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 padd(const half2& a,
722
+ const half2& b) {
723
+ #if defined(EIGEN_GPU_HAS_FP16_ARITHMETIC)
724
+ return __hadd2(a, b);
725
+ #else
726
+ float a1 = __low2float(a);
727
+ float a2 = __high2float(a);
728
+ float b1 = __low2float(b);
729
+ float b2 = __high2float(b);
730
+ float r1 = a1 + b1;
731
+ float r2 = a2 + b2;
732
+ return __floats2half2_rn(r1, r2);
733
+ #endif
734
+ }
735
+
736
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 psub(const half2& a,
737
+ const half2& b) {
738
+ #if defined(EIGEN_GPU_HAS_FP16_ARITHMETIC)
739
+ return __hsub2(a, b);
740
+ #else
741
+ float a1 = __low2float(a);
742
+ float a2 = __high2float(a);
743
+ float b1 = __low2float(b);
744
+ float b2 = __high2float(b);
745
+ float r1 = a1 - b1;
746
+ float r2 = a2 - b2;
747
+ return __floats2half2_rn(r1, r2);
748
+ #endif
749
+ }
750
+
751
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pnegate(const half2& a) {
752
+ #if defined(EIGEN_GPU_HAS_FP16_ARITHMETIC)
753
+ return __hneg2(a);
754
+ #else
755
+ float a1 = __low2float(a);
756
+ float a2 = __high2float(a);
757
+ return __floats2half2_rn(-a1, -a2);
758
+ #endif
759
+ }
760
+
761
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pconj(const half2& a) { return a; }
762
+
763
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmul(const half2& a,
764
+ const half2& b) {
765
+ #if defined(EIGEN_GPU_HAS_FP16_ARITHMETIC)
766
+ return __hmul2(a, b);
767
+ #else
768
+ float a1 = __low2float(a);
769
+ float a2 = __high2float(a);
770
+ float b1 = __low2float(b);
771
+ float b2 = __high2float(b);
772
+ float r1 = a1 * b1;
773
+ float r2 = a2 * b2;
774
+ return __floats2half2_rn(r1, r2);
775
+ #endif
776
+ }
777
+
778
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmadd(const half2& a,
779
+ const half2& b,
780
+ const half2& c) {
781
+ #if defined(EIGEN_GPU_HAS_FP16_ARITHMETIC)
782
+ return __hfma2(a, b, c);
783
+ #else
784
+ float a1 = __low2float(a);
785
+ float a2 = __high2float(a);
786
+ float b1 = __low2float(b);
787
+ float b2 = __high2float(b);
788
+ float c1 = __low2float(c);
789
+ float c2 = __high2float(c);
790
+ float r1 = a1 * b1 + c1;
791
+ float r2 = a2 * b2 + c2;
792
+ return __floats2half2_rn(r1, r2);
793
+ #endif
794
+ }
795
+
796
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pdiv(const half2& a,
797
+ const half2& b) {
798
+ #if defined(EIGEN_GPU_HAS_FP16_ARITHMETIC)
799
+ return __h2div(a, b);
800
+ #else
801
+ float a1 = __low2float(a);
802
+ float a2 = __high2float(a);
803
+ float b1 = __low2float(b);
804
+ float b2 = __high2float(b);
805
+ float r1 = a1 / b1;
806
+ float r2 = a2 / b2;
807
+ return __floats2half2_rn(r1, r2);
808
+ #endif
809
+ }
810
+
811
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmin(const half2& a,
812
+ const half2& b) {
813
+ float a1 = __low2float(a);
814
+ float a2 = __high2float(a);
815
+ float b1 = __low2float(b);
816
+ float b2 = __high2float(b);
817
+ __half r1 = a1 < b1 ? __low2half(a) : __low2half(b);
818
+ __half r2 = a2 < b2 ? __high2half(a) : __high2half(b);
819
+ return __halves2half2(r1, r2);
820
+ }
821
+
822
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmax(const half2& a,
823
+ const half2& b) {
824
+ float a1 = __low2float(a);
825
+ float a2 = __high2float(a);
826
+ float b1 = __low2float(b);
827
+ float b2 = __high2float(b);
828
+ __half r1 = a1 > b1 ? __low2half(a) : __low2half(b);
829
+ __half r2 = a2 > b2 ? __high2half(a) : __high2half(b);
830
+ return __halves2half2(r1, r2);
831
+ }
832
+
833
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half predux(const half2& a) {
834
+ #if defined(EIGEN_GPU_HAS_FP16_ARITHMETIC)
835
+ return __hadd(__low2half(a), __high2half(a));
836
+ #else
837
+ float a1 = __low2float(a);
838
+ float a2 = __high2float(a);
839
+ return Eigen::half(__float2half(a1 + a2));
840
+ #endif
841
+ }
842
+
843
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half predux_max(const half2& a) {
844
+ #if defined(EIGEN_GPU_HAS_FP16_ARITHMETIC)
845
+ __half first = __low2half(a);
846
+ __half second = __high2half(a);
847
+ return __hgt(first, second) ? first : second;
848
+ #else
849
+ float a1 = __low2float(a);
850
+ float a2 = __high2float(a);
851
+ return a1 > a2 ? __low2half(a) : __high2half(a);
852
+ #endif
853
+ }
854
+
855
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half predux_min(const half2& a) {
856
+ #if defined(EIGEN_GPU_HAS_FP16_ARITHMETIC)
857
+ __half first = __low2half(a);
858
+ __half second = __high2half(a);
859
+ return __hlt(first, second) ? first : second;
860
+ #else
861
+ float a1 = __low2float(a);
862
+ float a2 = __high2float(a);
863
+ return a1 < a2 ? __low2half(a) : __high2half(a);
864
+ #endif
865
+ }
866
+
867
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half predux_mul(const half2& a) {
868
+ #if defined(EIGEN_GPU_HAS_FP16_ARITHMETIC)
869
+ return __hmul(__low2half(a), __high2half(a));
870
+ #else
871
+ float a1 = __low2float(a);
872
+ float a2 = __high2float(a);
873
+ return Eigen::half(__float2half(a1 * a2));
874
+ #endif
875
+ }
876
+
877
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 plog1p(const half2& a) {
878
+ float a1 = __low2float(a);
879
+ float a2 = __high2float(a);
880
+ float r1 = log1pf(a1);
881
+ float r2 = log1pf(a2);
882
+ return __floats2half2_rn(r1, r2);
883
+ }
884
+
885
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pexpm1(const half2& a) {
886
+ float a1 = __low2float(a);
887
+ float a2 = __high2float(a);
888
+ float r1 = expm1f(a1);
889
+ float r2 = expm1f(a2);
890
+ return __floats2half2_rn(r1, r2);
891
+ }
892
+
893
+ #if (EIGEN_CUDA_SDK_VER >= 80000 && defined(EIGEN_CUDA_HAS_FP16_ARITHMETIC)) || \
894
+ defined(EIGEN_HIP_DEVICE_COMPILE)
895
+
896
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
897
+ half2 plog(const half2& a) {
898
+ return h2log(a);
899
+ }
900
+
901
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
902
+ half2 pexp(const half2& a) {
903
+ return h2exp(a);
904
+ }
905
+
906
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
907
+ half2 psqrt(const half2& a) {
908
+ return h2sqrt(a);
909
+ }
910
+
911
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
912
+ half2 prsqrt(const half2& a) {
913
+ return h2rsqrt(a);
914
+ }
915
+
916
+ #else
917
+
918
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 plog(const half2& a) {
919
+ float a1 = __low2float(a);
920
+ float a2 = __high2float(a);
921
+ float r1 = logf(a1);
922
+ float r2 = logf(a2);
923
+ return __floats2half2_rn(r1, r2);
924
+ }
925
+
926
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pexp(const half2& a) {
927
+ float a1 = __low2float(a);
928
+ float a2 = __high2float(a);
929
+ float r1 = expf(a1);
930
+ float r2 = expf(a2);
931
+ return __floats2half2_rn(r1, r2);
932
+ }
933
+
934
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 psqrt(const half2& a) {
935
+ float a1 = __low2float(a);
936
+ float a2 = __high2float(a);
937
+ float r1 = sqrtf(a1);
938
+ float r2 = sqrtf(a2);
939
+ return __floats2half2_rn(r1, r2);
940
+ }
941
+
942
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 prsqrt(const half2& a) {
943
+ float a1 = __low2float(a);
944
+ float a2 = __high2float(a);
945
+ float r1 = rsqrtf(a1);
946
+ float r2 = rsqrtf(a2);
947
+ return __floats2half2_rn(r1, r2);
948
+ }
949
+ #endif
950
+ } // namespace
951
+
952
+ template <>
953
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2
954
+ pload<Packet4h2>(const Eigen::half* from) {
955
+ return *reinterpret_cast<const Packet4h2*>(from);
956
+ }
957
+
958
+ // unaligned load;
959
+ template <>
960
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2
961
+ ploadu<Packet4h2>(const Eigen::half* from) {
962
+ Packet4h2 r;
963
+ half2* p_alias = reinterpret_cast<half2*>(&r);
964
+ p_alias[0] = ploadu(from + 0);
965
+ p_alias[1] = ploadu(from + 2);
966
+ p_alias[2] = ploadu(from + 4);
967
+ p_alias[3] = ploadu(from + 6);
968
+ return r;
969
+ }
970
+
971
+ template <>
972
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2
973
+ ploaddup<Packet4h2>(const Eigen::half* from) {
974
+ Packet4h2 r;
975
+ half2* p_alias = reinterpret_cast<half2*>(&r);
976
+ p_alias[0] = ploaddup(from + 0);
977
+ p_alias[1] = ploaddup(from + 1);
978
+ p_alias[2] = ploaddup(from + 2);
979
+ p_alias[3] = ploaddup(from + 3);
980
+ return r;
981
+ }
982
+
983
+ template <>
984
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstore<Eigen::half>(
985
+ Eigen::half* to, const Packet4h2& from) {
986
+ *reinterpret_cast<Packet4h2*>(to) = from;
987
+ }
988
+
989
+ template <>
990
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstoreu<Eigen::half>(
991
+ Eigen::half* to, const Packet4h2& from) {
992
+ const half2* from_alias = reinterpret_cast<const half2*>(&from);
993
+ pstoreu(to + 0,from_alias[0]);
994
+ pstoreu(to + 2,from_alias[1]);
995
+ pstoreu(to + 4,from_alias[2]);
996
+ pstoreu(to + 6,from_alias[3]);
997
+ }
998
+
999
+ template <>
1000
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet4h2
1001
+ ploadt_ro<Packet4h2, Aligned>(const Eigen::half* from) {
1002
+ #if defined(EIGEN_GPU_HAS_LDG)
1003
+ Packet4h2 r;
1004
+ r = __ldg(reinterpret_cast<const Packet4h2*>(from));
1005
+ return r;
1006
+ #else
1007
+ Packet4h2 r;
1008
+ half2* r_alias = reinterpret_cast<half2*>(&r);
1009
+ r_alias[0] = ploadt_ro_aligned(from + 0);
1010
+ r_alias[1] = ploadt_ro_aligned(from + 2);
1011
+ r_alias[2] = ploadt_ro_aligned(from + 4);
1012
+ r_alias[3] = ploadt_ro_aligned(from + 6);
1013
+ return r;
1014
+ #endif
1015
+ }
1016
+
1017
+ template <>
1018
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet4h2
1019
+ ploadt_ro<Packet4h2, Unaligned>(const Eigen::half* from) {
1020
+ Packet4h2 r;
1021
+ half2* r_alias = reinterpret_cast<half2*>(&r);
1022
+ r_alias[0] = ploadt_ro_unaligned(from + 0);
1023
+ r_alias[1] = ploadt_ro_unaligned(from + 2);
1024
+ r_alias[2] = ploadt_ro_unaligned(from + 4);
1025
+ r_alias[3] = ploadt_ro_unaligned(from + 6);
1026
+ return r;
1027
+ }
1028
+
1029
+ template <>
1030
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2
1031
+ pgather<Eigen::half, Packet4h2>(const Eigen::half* from, Index stride) {
1032
+ Packet4h2 r;
1033
+ half2* p_alias = reinterpret_cast<half2*>(&r);
1034
+ p_alias[0] = __halves2half2(from[0 * stride], from[1 * stride]);
1035
+ p_alias[1] = __halves2half2(from[2 * stride], from[3 * stride]);
1036
+ p_alias[2] = __halves2half2(from[4 * stride], from[5 * stride]);
1037
+ p_alias[3] = __halves2half2(from[6 * stride], from[7 * stride]);
1038
+ return r;
1039
+ }
1040
+
1041
+ template <>
1042
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pscatter<Eigen::half, Packet4h2>(
1043
+ Eigen::half* to, const Packet4h2& from, Index stride) {
1044
+ const half2* from_alias = reinterpret_cast<const half2*>(&from);
1045
+ pscatter(to + stride * 0, from_alias[0], stride);
1046
+ pscatter(to + stride * 2, from_alias[1], stride);
1047
+ pscatter(to + stride * 4, from_alias[2], stride);
1048
+ pscatter(to + stride * 6, from_alias[3], stride);
1049
+ }
1050
+
1051
+ template <>
1052
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half pfirst<Packet4h2>(
1053
+ const Packet4h2& a) {
1054
+ return pfirst(*(reinterpret_cast<const half2*>(&a)));
1055
+ }
1056
+
1057
+ template <>
1058
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 pabs<Packet4h2>(
1059
+ const Packet4h2& a) {
1060
+ Packet4h2 r;
1061
+ half2* p_alias = reinterpret_cast<half2*>(&r);
1062
+ const half2* a_alias = reinterpret_cast<const half2*>(&a);
1063
+ p_alias[0] = pabs(a_alias[0]);
1064
+ p_alias[1] = pabs(a_alias[1]);
1065
+ p_alias[2] = pabs(a_alias[2]);
1066
+ p_alias[3] = pabs(a_alias[3]);
1067
+ return r;
1068
+ }
1069
+
1070
+ template <>
1071
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 ptrue<Packet4h2>(
1072
+ const Packet4h2& /*a*/) {
1073
+ half true_half = half_impl::raw_uint16_to_half(0xffffu);
1074
+ return pset1<Packet4h2>(true_half);
1075
+ }
1076
+
1077
+ template <>
1078
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 pzero<Packet4h2>(const Packet4h2& /*a*/) {
1079
+ half false_half = half_impl::raw_uint16_to_half(0x0000u);
1080
+ return pset1<Packet4h2>(false_half);
1081
+ }
1082
+
1083
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose_double(
1084
+ double* d_row0, double* d_row1, double* d_row2, double* d_row3,
1085
+ double* d_row4, double* d_row5, double* d_row6, double* d_row7) {
1086
+ double d_tmp;
1087
+ d_tmp = d_row0[1];
1088
+ d_row0[1] = d_row4[0];
1089
+ d_row4[0] = d_tmp;
1090
+
1091
+ d_tmp = d_row1[1];
1092
+ d_row1[1] = d_row5[0];
1093
+ d_row5[0] = d_tmp;
1094
+
1095
+ d_tmp = d_row2[1];
1096
+ d_row2[1] = d_row6[0];
1097
+ d_row6[0] = d_tmp;
1098
+
1099
+ d_tmp = d_row3[1];
1100
+ d_row3[1] = d_row7[0];
1101
+ d_row7[0] = d_tmp;
1102
+ }
1103
+
1104
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose_half2(
1105
+ half2* f_row0, half2* f_row1, half2* f_row2, half2* f_row3) {
1106
+ half2 f_tmp;
1107
+ f_tmp = f_row0[1];
1108
+ f_row0[1] = f_row2[0];
1109
+ f_row2[0] = f_tmp;
1110
+
1111
+ f_tmp = f_row1[1];
1112
+ f_row1[1] = f_row3[0];
1113
+ f_row3[0] = f_tmp;
1114
+ }
1115
+
1116
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void
1117
+ ptranspose_half(half2& f0, half2& f1) {
1118
+ __half a1 = __low2half(f0);
1119
+ __half a2 = __high2half(f0);
1120
+ __half b1 = __low2half(f1);
1121
+ __half b2 = __high2half(f1);
1122
+ f0 = __halves2half2(a1, b1);
1123
+ f1 = __halves2half2(a2, b2);
1124
+ }
1125
+
1126
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void
1127
+ ptranspose(PacketBlock<Packet4h2,8>& kernel) {
1128
+ double* d_row0 = reinterpret_cast<double*>(&kernel.packet[0]);
1129
+ double* d_row1 = reinterpret_cast<double*>(&kernel.packet[1]);
1130
+ double* d_row2 = reinterpret_cast<double*>(&kernel.packet[2]);
1131
+ double* d_row3 = reinterpret_cast<double*>(&kernel.packet[3]);
1132
+ double* d_row4 = reinterpret_cast<double*>(&kernel.packet[4]);
1133
+ double* d_row5 = reinterpret_cast<double*>(&kernel.packet[5]);
1134
+ double* d_row6 = reinterpret_cast<double*>(&kernel.packet[6]);
1135
+ double* d_row7 = reinterpret_cast<double*>(&kernel.packet[7]);
1136
+ ptranspose_double(d_row0, d_row1, d_row2, d_row3,
1137
+ d_row4, d_row5, d_row6, d_row7);
1138
+
1139
+
1140
+ half2* f_row0 = reinterpret_cast<half2*>(d_row0);
1141
+ half2* f_row1 = reinterpret_cast<half2*>(d_row1);
1142
+ half2* f_row2 = reinterpret_cast<half2*>(d_row2);
1143
+ half2* f_row3 = reinterpret_cast<half2*>(d_row3);
1144
+ ptranspose_half2(f_row0, f_row1, f_row2, f_row3);
1145
+ ptranspose_half(f_row0[0], f_row1[0]);
1146
+ ptranspose_half(f_row0[1], f_row1[1]);
1147
+ ptranspose_half(f_row2[0], f_row3[0]);
1148
+ ptranspose_half(f_row2[1], f_row3[1]);
1149
+
1150
+ f_row0 = reinterpret_cast<half2*>(d_row0 + 1);
1151
+ f_row1 = reinterpret_cast<half2*>(d_row1 + 1);
1152
+ f_row2 = reinterpret_cast<half2*>(d_row2 + 1);
1153
+ f_row3 = reinterpret_cast<half2*>(d_row3 + 1);
1154
+ ptranspose_half2(f_row0, f_row1, f_row2, f_row3);
1155
+ ptranspose_half(f_row0[0], f_row1[0]);
1156
+ ptranspose_half(f_row0[1], f_row1[1]);
1157
+ ptranspose_half(f_row2[0], f_row3[0]);
1158
+ ptranspose_half(f_row2[1], f_row3[1]);
1159
+
1160
+ f_row0 = reinterpret_cast<half2*>(d_row4);
1161
+ f_row1 = reinterpret_cast<half2*>(d_row5);
1162
+ f_row2 = reinterpret_cast<half2*>(d_row6);
1163
+ f_row3 = reinterpret_cast<half2*>(d_row7);
1164
+ ptranspose_half2(f_row0, f_row1, f_row2, f_row3);
1165
+ ptranspose_half(f_row0[0], f_row1[0]);
1166
+ ptranspose_half(f_row0[1], f_row1[1]);
1167
+ ptranspose_half(f_row2[0], f_row3[0]);
1168
+ ptranspose_half(f_row2[1], f_row3[1]);
1169
+
1170
+ f_row0 = reinterpret_cast<half2*>(d_row4 + 1);
1171
+ f_row1 = reinterpret_cast<half2*>(d_row5 + 1);
1172
+ f_row2 = reinterpret_cast<half2*>(d_row6 + 1);
1173
+ f_row3 = reinterpret_cast<half2*>(d_row7 + 1);
1174
+ ptranspose_half2(f_row0, f_row1, f_row2, f_row3);
1175
+ ptranspose_half(f_row0[0], f_row1[0]);
1176
+ ptranspose_half(f_row0[1], f_row1[1]);
1177
+ ptranspose_half(f_row2[0], f_row3[0]);
1178
+ ptranspose_half(f_row2[1], f_row3[1]);
1179
+
1180
+ }
1181
+
1182
+ template <>
1183
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2
1184
+ plset<Packet4h2>(const Eigen::half& a) {
1185
+ #if defined(EIGEN_HIP_DEVICE_COMPILE)
1186
+
1187
+ Packet4h2 r;
1188
+ half2* p_alias = reinterpret_cast<half2*>(&r);
1189
+ p_alias[0] = __halves2half2(a, __hadd(a, __float2half(1.0f)));
1190
+ p_alias[1] = __halves2half2(__hadd(a, __float2half(2.0f)),
1191
+ __hadd(a, __float2half(3.0f)));
1192
+ p_alias[2] = __halves2half2(__hadd(a, __float2half(4.0f)),
1193
+ __hadd(a, __float2half(5.0f)));
1194
+ p_alias[3] = __halves2half2(__hadd(a, __float2half(6.0f)),
1195
+ __hadd(a, __float2half(7.0f)));
1196
+ return r;
1197
+ #elif defined(EIGEN_CUDA_HAS_FP16_ARITHMETIC)
1198
+ Packet4h2 r;
1199
+ half2* r_alias = reinterpret_cast<half2*>(&r);
1200
+
1201
+ half2 b = pset1<half2>(a);
1202
+ half2 c;
1203
+ half2 half_offset0 = __halves2half2(__float2half(0.0f),__float2half(2.0f));
1204
+ half2 half_offset1 = __halves2half2(__float2half(4.0f),__float2half(6.0f));
1205
+
1206
+ c = __hadd2(b, half_offset0);
1207
+ r_alias[0] = plset(__low2half(c));
1208
+ r_alias[1] = plset(__high2half(c));
1209
+
1210
+ c = __hadd2(b, half_offset1);
1211
+ r_alias[2] = plset(__low2half(c));
1212
+ r_alias[3] = plset(__high2half(c));
1213
+
1214
+ return r;
1215
+
1216
+ #else
1217
+ float f = __half2float(a);
1218
+ Packet4h2 r;
1219
+ half2* p_alias = reinterpret_cast<half2*>(&r);
1220
+ p_alias[0] = __halves2half2(a, __float2half(f + 1.0f));
1221
+ p_alias[1] = __halves2half2(__float2half(f + 2.0f), __float2half(f + 3.0f));
1222
+ p_alias[2] = __halves2half2(__float2half(f + 4.0f), __float2half(f + 5.0f));
1223
+ p_alias[3] = __halves2half2(__float2half(f + 6.0f), __float2half(f + 7.0f));
1224
+ return r;
1225
+ #endif
1226
+ }
1227
+
1228
+ template <>
1229
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2
1230
+ pselect<Packet4h2>(const Packet4h2& mask, const Packet4h2& a,
1231
+ const Packet4h2& b) {
1232
+ Packet4h2 r;
1233
+ half2* r_alias = reinterpret_cast<half2*>(&r);
1234
+ const half2* mask_alias = reinterpret_cast<const half2*>(&mask);
1235
+ const half2* a_alias = reinterpret_cast<const half2*>(&a);
1236
+ const half2* b_alias = reinterpret_cast<const half2*>(&b);
1237
+ r_alias[0] = pselect(mask_alias[0], a_alias[0], b_alias[0]);
1238
+ r_alias[1] = pselect(mask_alias[1], a_alias[1], b_alias[1]);
1239
+ r_alias[2] = pselect(mask_alias[2], a_alias[2], b_alias[2]);
1240
+ r_alias[3] = pselect(mask_alias[3], a_alias[3], b_alias[3]);
1241
+ return r;
1242
+ }
1243
+
1244
+ template <>
1245
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2
1246
+ pcmp_eq<Packet4h2>(const Packet4h2& a, const Packet4h2& b) {
1247
+ Packet4h2 r;
1248
+ half2* r_alias = reinterpret_cast<half2*>(&r);
1249
+ const half2* a_alias = reinterpret_cast<const half2*>(&a);
1250
+ const half2* b_alias = reinterpret_cast<const half2*>(&b);
1251
+ r_alias[0] = pcmp_eq(a_alias[0], b_alias[0]);
1252
+ r_alias[1] = pcmp_eq(a_alias[1], b_alias[1]);
1253
+ r_alias[2] = pcmp_eq(a_alias[2], b_alias[2]);
1254
+ r_alias[3] = pcmp_eq(a_alias[3], b_alias[3]);
1255
+ return r;
1256
+ }
1257
+
1258
+ template <>
1259
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 pand<Packet4h2>(
1260
+ const Packet4h2& a, const Packet4h2& b) {
1261
+ Packet4h2 r;
1262
+ half2* r_alias = reinterpret_cast<half2*>(&r);
1263
+ const half2* a_alias = reinterpret_cast<const half2*>(&a);
1264
+ const half2* b_alias = reinterpret_cast<const half2*>(&b);
1265
+ r_alias[0] = pand(a_alias[0], b_alias[0]);
1266
+ r_alias[1] = pand(a_alias[1], b_alias[1]);
1267
+ r_alias[2] = pand(a_alias[2], b_alias[2]);
1268
+ r_alias[3] = pand(a_alias[3], b_alias[3]);
1269
+ return r;
1270
+ }
1271
+
1272
+ template <>
1273
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 por<Packet4h2>(
1274
+ const Packet4h2& a, const Packet4h2& b) {
1275
+ Packet4h2 r;
1276
+ half2* r_alias = reinterpret_cast<half2*>(&r);
1277
+ const half2* a_alias = reinterpret_cast<const half2*>(&a);
1278
+ const half2* b_alias = reinterpret_cast<const half2*>(&b);
1279
+ r_alias[0] = por(a_alias[0], b_alias[0]);
1280
+ r_alias[1] = por(a_alias[1], b_alias[1]);
1281
+ r_alias[2] = por(a_alias[2], b_alias[2]);
1282
+ r_alias[3] = por(a_alias[3], b_alias[3]);
1283
+ return r;
1284
+ }
1285
+
1286
+ template <>
1287
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 pxor<Packet4h2>(
1288
+ const Packet4h2& a, const Packet4h2& b) {
1289
+ Packet4h2 r;
1290
+ half2* r_alias = reinterpret_cast<half2*>(&r);
1291
+ const half2* a_alias = reinterpret_cast<const half2*>(&a);
1292
+ const half2* b_alias = reinterpret_cast<const half2*>(&b);
1293
+ r_alias[0] = pxor(a_alias[0], b_alias[0]);
1294
+ r_alias[1] = pxor(a_alias[1], b_alias[1]);
1295
+ r_alias[2] = pxor(a_alias[2], b_alias[2]);
1296
+ r_alias[3] = pxor(a_alias[3], b_alias[3]);
1297
+ return r;
1298
+ }
1299
+
1300
+ template <>
1301
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2
1302
+ pandnot<Packet4h2>(const Packet4h2& a, const Packet4h2& b) {
1303
+ Packet4h2 r;
1304
+ half2* r_alias = reinterpret_cast<half2*>(&r);
1305
+ const half2* a_alias = reinterpret_cast<const half2*>(&a);
1306
+ const half2* b_alias = reinterpret_cast<const half2*>(&b);
1307
+ r_alias[0] = pandnot(a_alias[0], b_alias[0]);
1308
+ r_alias[1] = pandnot(a_alias[1], b_alias[1]);
1309
+ r_alias[2] = pandnot(a_alias[2], b_alias[2]);
1310
+ r_alias[3] = pandnot(a_alias[3], b_alias[3]);
1311
+ return r;
1312
+ }
1313
+
1314
+ template <>
1315
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 padd<Packet4h2>(
1316
+ const Packet4h2& a, const Packet4h2& b) {
1317
+ Packet4h2 r;
1318
+ half2* r_alias = reinterpret_cast<half2*>(&r);
1319
+ const half2* a_alias = reinterpret_cast<const half2*>(&a);
1320
+ const half2* b_alias = reinterpret_cast<const half2*>(&b);
1321
+ r_alias[0] = padd(a_alias[0], b_alias[0]);
1322
+ r_alias[1] = padd(a_alias[1], b_alias[1]);
1323
+ r_alias[2] = padd(a_alias[2], b_alias[2]);
1324
+ r_alias[3] = padd(a_alias[3], b_alias[3]);
1325
+ return r;
1326
+ }
1327
+
1328
+ template <>
1329
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 psub<Packet4h2>(
1330
+ const Packet4h2& a, const Packet4h2& b) {
1331
+ Packet4h2 r;
1332
+ half2* r_alias = reinterpret_cast<half2*>(&r);
1333
+ const half2* a_alias = reinterpret_cast<const half2*>(&a);
1334
+ const half2* b_alias = reinterpret_cast<const half2*>(&b);
1335
+ r_alias[0] = psub(a_alias[0], b_alias[0]);
1336
+ r_alias[1] = psub(a_alias[1], b_alias[1]);
1337
+ r_alias[2] = psub(a_alias[2], b_alias[2]);
1338
+ r_alias[3] = psub(a_alias[3], b_alias[3]);
1339
+ return r;
1340
+ }
1341
+
1342
+ template <>
1343
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 pnegate(const Packet4h2& a) {
1344
+ Packet4h2 r;
1345
+ half2* r_alias = reinterpret_cast<half2*>(&r);
1346
+ const half2* a_alias = reinterpret_cast<const half2*>(&a);
1347
+ r_alias[0] = pnegate(a_alias[0]);
1348
+ r_alias[1] = pnegate(a_alias[1]);
1349
+ r_alias[2] = pnegate(a_alias[2]);
1350
+ r_alias[3] = pnegate(a_alias[3]);
1351
+ return r;
1352
+ }
1353
+
1354
+ template <>
1355
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 pconj(const Packet4h2& a) {
1356
+ return a;
1357
+ }
1358
+
1359
+ template <>
1360
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 pmul<Packet4h2>(
1361
+ const Packet4h2& a, const Packet4h2& b) {
1362
+ Packet4h2 r;
1363
+ half2* r_alias = reinterpret_cast<half2*>(&r);
1364
+ const half2* a_alias = reinterpret_cast<const half2*>(&a);
1365
+ const half2* b_alias = reinterpret_cast<const half2*>(&b);
1366
+ r_alias[0] = pmul(a_alias[0], b_alias[0]);
1367
+ r_alias[1] = pmul(a_alias[1], b_alias[1]);
1368
+ r_alias[2] = pmul(a_alias[2], b_alias[2]);
1369
+ r_alias[3] = pmul(a_alias[3], b_alias[3]);
1370
+ return r;
1371
+ }
1372
+
1373
+ template <>
1374
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 pmadd<Packet4h2>(
1375
+ const Packet4h2& a, const Packet4h2& b, const Packet4h2& c) {
1376
+ Packet4h2 r;
1377
+ half2* r_alias = reinterpret_cast<half2*>(&r);
1378
+ const half2* a_alias = reinterpret_cast<const half2*>(&a);
1379
+ const half2* b_alias = reinterpret_cast<const half2*>(&b);
1380
+ const half2* c_alias = reinterpret_cast<const half2*>(&c);
1381
+ r_alias[0] = pmadd(a_alias[0], b_alias[0], c_alias[0]);
1382
+ r_alias[1] = pmadd(a_alias[1], b_alias[1], c_alias[1]);
1383
+ r_alias[2] = pmadd(a_alias[2], b_alias[2], c_alias[2]);
1384
+ r_alias[3] = pmadd(a_alias[3], b_alias[3], c_alias[3]);
1385
+ return r;
1386
+ }
1387
+
1388
+ template <>
1389
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 pdiv<Packet4h2>(
1390
+ const Packet4h2& a, const Packet4h2& b) {
1391
+ Packet4h2 r;
1392
+ half2* r_alias = reinterpret_cast<half2*>(&r);
1393
+ const half2* a_alias = reinterpret_cast<const half2*>(&a);
1394
+ const half2* b_alias = reinterpret_cast<const half2*>(&b);
1395
+ r_alias[0] = pdiv(a_alias[0], b_alias[0]);
1396
+ r_alias[1] = pdiv(a_alias[1], b_alias[1]);
1397
+ r_alias[2] = pdiv(a_alias[2], b_alias[2]);
1398
+ r_alias[3] = pdiv(a_alias[3], b_alias[3]);
1399
+ return r;
1400
+ }
1401
+
1402
+ template <>
1403
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 pmin<Packet4h2>(
1404
+ const Packet4h2& a, const Packet4h2& b) {
1405
+ Packet4h2 r;
1406
+ half2* r_alias = reinterpret_cast<half2*>(&r);
1407
+ const half2* a_alias = reinterpret_cast<const half2*>(&a);
1408
+ const half2* b_alias = reinterpret_cast<const half2*>(&b);
1409
+ r_alias[0] = pmin(a_alias[0], b_alias[0]);
1410
+ r_alias[1] = pmin(a_alias[1], b_alias[1]);
1411
+ r_alias[2] = pmin(a_alias[2], b_alias[2]);
1412
+ r_alias[3] = pmin(a_alias[3], b_alias[3]);
1413
+ return r;
1414
+ }
1415
+
1416
+ template <>
1417
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 pmax<Packet4h2>(
1418
+ const Packet4h2& a, const Packet4h2& b) {
1419
+ Packet4h2 r;
1420
+ half2* r_alias = reinterpret_cast<half2*>(&r);
1421
+ const half2* a_alias = reinterpret_cast<const half2*>(&a);
1422
+ const half2* b_alias = reinterpret_cast<const half2*>(&b);
1423
+ r_alias[0] = pmax(a_alias[0], b_alias[0]);
1424
+ r_alias[1] = pmax(a_alias[1], b_alias[1]);
1425
+ r_alias[2] = pmax(a_alias[2], b_alias[2]);
1426
+ r_alias[3] = pmax(a_alias[3], b_alias[3]);
1427
+ return r;
1428
+ }
1429
+
1430
+ template <>
1431
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half predux<Packet4h2>(
1432
+ const Packet4h2& a) {
1433
+ const half2* a_alias = reinterpret_cast<const half2*>(&a);
1434
+
1435
+ return predux(a_alias[0]) + predux(a_alias[1]) +
1436
+ predux(a_alias[2]) + predux(a_alias[3]);
1437
+ }
1438
+
1439
+ template <>
1440
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half predux_max<Packet4h2>(
1441
+ const Packet4h2& a) {
1442
+ const half2* a_alias = reinterpret_cast<const half2*>(&a);
1443
+ half2 m0 = __halves2half2(predux_max(a_alias[0]),
1444
+ predux_max(a_alias[1]));
1445
+ half2 m1 = __halves2half2(predux_max(a_alias[2]),
1446
+ predux_max(a_alias[3]));
1447
+ __half first = predux_max(m0);
1448
+ __half second = predux_max(m1);
1449
+ #if defined(EIGEN_CUDA_HAS_FP16_ARITHMETIC)
1450
+ return (__hgt(first, second) ? first : second);
1451
+ #else
1452
+ float ffirst = __half2float(first);
1453
+ float fsecond = __half2float(second);
1454
+ return (ffirst > fsecond)? first: second;
1455
+ #endif
1456
+ }
1457
+
1458
+ template <>
1459
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half predux_min<Packet4h2>(
1460
+ const Packet4h2& a) {
1461
+ const half2* a_alias = reinterpret_cast<const half2*>(&a);
1462
+ half2 m0 = __halves2half2(predux_min(a_alias[0]),
1463
+ predux_min(a_alias[1]));
1464
+ half2 m1 = __halves2half2(predux_min(a_alias[2]),
1465
+ predux_min(a_alias[3]));
1466
+ __half first = predux_min(m0);
1467
+ __half second = predux_min(m1);
1468
+ #if defined(EIGEN_CUDA_HAS_FP16_ARITHMETIC)
1469
+ return (__hlt(first, second) ? first : second);
1470
+ #else
1471
+ float ffirst = __half2float(first);
1472
+ float fsecond = __half2float(second);
1473
+ return (ffirst < fsecond)? first: second;
1474
+ #endif
1475
+ }
1476
+
1477
+ // likely overflow/underflow
1478
+ template <>
1479
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half predux_mul<Packet4h2>(
1480
+ const Packet4h2& a) {
1481
+ const half2* a_alias = reinterpret_cast<const half2*>(&a);
1482
+ return predux_mul(pmul(pmul(a_alias[0], a_alias[1]),
1483
+ pmul(a_alias[2], a_alias[3])));
1484
+ }
1485
+
1486
+ template <>
1487
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2
1488
+ plog1p<Packet4h2>(const Packet4h2& a) {
1489
+ Packet4h2 r;
1490
+ half2* r_alias = reinterpret_cast<half2*>(&r);
1491
+ const half2* a_alias = reinterpret_cast<const half2*>(&a);
1492
+ r_alias[0] = plog1p(a_alias[0]);
1493
+ r_alias[1] = plog1p(a_alias[1]);
1494
+ r_alias[2] = plog1p(a_alias[2]);
1495
+ r_alias[3] = plog1p(a_alias[3]);
1496
+ return r;
1497
+ }
1498
+
1499
+ template <>
1500
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2
1501
+ pexpm1<Packet4h2>(const Packet4h2& a) {
1502
+ Packet4h2 r;
1503
+ half2* r_alias = reinterpret_cast<half2*>(&r);
1504
+ const half2* a_alias = reinterpret_cast<const half2*>(&a);
1505
+ r_alias[0] = pexpm1(a_alias[0]);
1506
+ r_alias[1] = pexpm1(a_alias[1]);
1507
+ r_alias[2] = pexpm1(a_alias[2]);
1508
+ r_alias[3] = pexpm1(a_alias[3]);
1509
+ return r;
1510
+ }
1511
+
1512
+ template <>
1513
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 plog<Packet4h2>(const Packet4h2& a) {
1514
+ Packet4h2 r;
1515
+ half2* r_alias = reinterpret_cast<half2*>(&r);
1516
+ const half2* a_alias = reinterpret_cast<const half2*>(&a);
1517
+ r_alias[0] = plog(a_alias[0]);
1518
+ r_alias[1] = plog(a_alias[1]);
1519
+ r_alias[2] = plog(a_alias[2]);
1520
+ r_alias[3] = plog(a_alias[3]);
1521
+ return r;
1522
+ }
1523
+
1524
+ template <>
1525
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 pexp<Packet4h2>(const Packet4h2& a) {
1526
+ Packet4h2 r;
1527
+ half2* r_alias = reinterpret_cast<half2*>(&r);
1528
+ const half2* a_alias = reinterpret_cast<const half2*>(&a);
1529
+ r_alias[0] = pexp(a_alias[0]);
1530
+ r_alias[1] = pexp(a_alias[1]);
1531
+ r_alias[2] = pexp(a_alias[2]);
1532
+ r_alias[3] = pexp(a_alias[3]);
1533
+ return r;
1534
+ }
1535
+
1536
+ template <>
1537
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 psqrt<Packet4h2>(const Packet4h2& a) {
1538
+ Packet4h2 r;
1539
+ half2* r_alias = reinterpret_cast<half2*>(&r);
1540
+ const half2* a_alias = reinterpret_cast<const half2*>(&a);
1541
+ r_alias[0] = psqrt(a_alias[0]);
1542
+ r_alias[1] = psqrt(a_alias[1]);
1543
+ r_alias[2] = psqrt(a_alias[2]);
1544
+ r_alias[3] = psqrt(a_alias[3]);
1545
+ return r;
1546
+ }
1547
+
1548
+ template <>
1549
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2
1550
+ prsqrt<Packet4h2>(const Packet4h2& a) {
1551
+ Packet4h2 r;
1552
+ half2* r_alias = reinterpret_cast<half2*>(&r);
1553
+ const half2* a_alias = reinterpret_cast<const half2*>(&a);
1554
+ r_alias[0] = prsqrt(a_alias[0]);
1555
+ r_alias[1] = prsqrt(a_alias[1]);
1556
+ r_alias[2] = prsqrt(a_alias[2]);
1557
+ r_alias[3] = prsqrt(a_alias[3]);
1558
+ return r;
1559
+ }
1560
+
1561
+ // The following specialized padd, pmul, pdiv, pmin, pmax, pset1 are needed for
1562
+ // the implementation of GPU half reduction.
1563
+ template<>
1564
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 padd<half2>(const half2& a,
1565
+ const half2& b) {
1566
+ #if defined(EIGEN_GPU_HAS_FP16_ARITHMETIC)
1567
+ return __hadd2(a, b);
1568
+ #else
1569
+ float a1 = __low2float(a);
1570
+ float a2 = __high2float(a);
1571
+ float b1 = __low2float(b);
1572
+ float b2 = __high2float(b);
1573
+ float r1 = a1 + b1;
1574
+ float r2 = a2 + b2;
1575
+ return __floats2half2_rn(r1, r2);
1576
+ #endif
1577
+ }
1578
+
1579
+ template<>
1580
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmul<half2>(const half2& a,
1581
+ const half2& b) {
1582
+ #if defined(EIGEN_GPU_HAS_FP16_ARITHMETIC)
1583
+ return __hmul2(a, b);
1584
+ #else
1585
+ float a1 = __low2float(a);
1586
+ float a2 = __high2float(a);
1587
+ float b1 = __low2float(b);
1588
+ float b2 = __high2float(b);
1589
+ float r1 = a1 * b1;
1590
+ float r2 = a2 * b2;
1591
+ return __floats2half2_rn(r1, r2);
1592
+ #endif
1593
+ }
1594
+
1595
+ template<>
1596
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pdiv<half2>(const half2& a,
1597
+ const half2& b) {
1598
+ #if defined(EIGEN_GPU_HAS_FP16_ARITHMETIC)
1599
+ return __h2div(a, b);
1600
+ #else
1601
+ float a1 = __low2float(a);
1602
+ float a2 = __high2float(a);
1603
+ float b1 = __low2float(b);
1604
+ float b2 = __high2float(b);
1605
+ float r1 = a1 / b1;
1606
+ float r2 = a2 / b2;
1607
+ return __floats2half2_rn(r1, r2);
1608
+ #endif
1609
+ }
1610
+
1611
+ template<>
1612
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmin<half2>(const half2& a,
1613
+ const half2& b) {
1614
+ float a1 = __low2float(a);
1615
+ float a2 = __high2float(a);
1616
+ float b1 = __low2float(b);
1617
+ float b2 = __high2float(b);
1618
+ __half r1 = a1 < b1 ? __low2half(a) : __low2half(b);
1619
+ __half r2 = a2 < b2 ? __high2half(a) : __high2half(b);
1620
+ return __halves2half2(r1, r2);
1621
+ }
1622
+
1623
+ template<>
1624
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmax<half2>(const half2& a,
1625
+ const half2& b) {
1626
+ float a1 = __low2float(a);
1627
+ float a2 = __high2float(a);
1628
+ float b1 = __low2float(b);
1629
+ float b2 = __high2float(b);
1630
+ __half r1 = a1 > b1 ? __low2half(a) : __low2half(b);
1631
+ __half r2 = a2 > b2 ? __high2half(a) : __high2half(b);
1632
+ return __halves2half2(r1, r2);
1633
+ }
1634
+
1635
+ #endif // (defined(EIGEN_HAS_CUDA_FP16) || defined(EIGEN_HAS_HIP_FP16)) && defined(EIGEN_GPU_COMPILE_PHASE)
1636
+
1637
+ #undef EIGEN_GPU_HAS_LDG
1638
+ #undef EIGEN_CUDA_HAS_FP16_ARITHMETIC
1639
+ #undef EIGEN_GPU_HAS_FP16_ARITHMETIC
1640
+
1641
+ } // end namespace internal
1642
+
1643
+ } // end namespace Eigen
1644
+
1645
+
1646
+ #endif // EIGEN_PACKET_MATH_GPU_H