@smake/eigen 1.0.2 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (435) hide show
  1. package/README.md +1 -1
  2. package/eigen/Eigen/AccelerateSupport +52 -0
  3. package/eigen/Eigen/Cholesky +18 -21
  4. package/eigen/Eigen/CholmodSupport +28 -28
  5. package/eigen/Eigen/Core +235 -326
  6. package/eigen/Eigen/Eigenvalues +16 -14
  7. package/eigen/Eigen/Geometry +21 -24
  8. package/eigen/Eigen/Householder +9 -8
  9. package/eigen/Eigen/IterativeLinearSolvers +8 -4
  10. package/eigen/Eigen/Jacobi +14 -14
  11. package/eigen/Eigen/KLUSupport +43 -0
  12. package/eigen/Eigen/LU +16 -20
  13. package/eigen/Eigen/MetisSupport +12 -12
  14. package/eigen/Eigen/OrderingMethods +54 -54
  15. package/eigen/Eigen/PaStiXSupport +23 -20
  16. package/eigen/Eigen/PardisoSupport +17 -14
  17. package/eigen/Eigen/QR +18 -21
  18. package/eigen/Eigen/QtAlignedMalloc +5 -13
  19. package/eigen/Eigen/SPQRSupport +21 -14
  20. package/eigen/Eigen/SVD +23 -18
  21. package/eigen/Eigen/Sparse +1 -4
  22. package/eigen/Eigen/SparseCholesky +18 -23
  23. package/eigen/Eigen/SparseCore +18 -17
  24. package/eigen/Eigen/SparseLU +12 -8
  25. package/eigen/Eigen/SparseQR +16 -14
  26. package/eigen/Eigen/StdDeque +5 -2
  27. package/eigen/Eigen/StdList +5 -2
  28. package/eigen/Eigen/StdVector +5 -2
  29. package/eigen/Eigen/SuperLUSupport +30 -24
  30. package/eigen/Eigen/ThreadPool +80 -0
  31. package/eigen/Eigen/UmfPackSupport +19 -17
  32. package/eigen/Eigen/Version +14 -0
  33. package/eigen/Eigen/src/AccelerateSupport/AccelerateSupport.h +423 -0
  34. package/eigen/Eigen/src/AccelerateSupport/InternalHeaderCheck.h +3 -0
  35. package/eigen/Eigen/src/Cholesky/InternalHeaderCheck.h +3 -0
  36. package/eigen/Eigen/src/Cholesky/LDLT.h +377 -401
  37. package/eigen/Eigen/src/Cholesky/LLT.h +332 -360
  38. package/eigen/Eigen/src/Cholesky/LLT_LAPACKE.h +81 -56
  39. package/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +620 -521
  40. package/eigen/Eigen/src/CholmodSupport/InternalHeaderCheck.h +3 -0
  41. package/eigen/Eigen/src/Core/ArithmeticSequence.h +239 -0
  42. package/eigen/Eigen/src/Core/Array.h +341 -294
  43. package/eigen/Eigen/src/Core/ArrayBase.h +190 -203
  44. package/eigen/Eigen/src/Core/ArrayWrapper.h +127 -171
  45. package/eigen/Eigen/src/Core/Assign.h +30 -40
  46. package/eigen/Eigen/src/Core/AssignEvaluator.h +711 -589
  47. package/eigen/Eigen/src/Core/Assign_MKL.h +130 -125
  48. package/eigen/Eigen/src/Core/BandMatrix.h +268 -283
  49. package/eigen/Eigen/src/Core/Block.h +375 -398
  50. package/eigen/Eigen/src/Core/CommaInitializer.h +86 -97
  51. package/eigen/Eigen/src/Core/ConditionEstimator.h +51 -53
  52. package/eigen/Eigen/src/Core/CoreEvaluators.h +1356 -1026
  53. package/eigen/Eigen/src/Core/CoreIterators.h +73 -59
  54. package/eigen/Eigen/src/Core/CwiseBinaryOp.h +114 -132
  55. package/eigen/Eigen/src/Core/CwiseNullaryOp.h +726 -617
  56. package/eigen/Eigen/src/Core/CwiseTernaryOp.h +77 -103
  57. package/eigen/Eigen/src/Core/CwiseUnaryOp.h +56 -68
  58. package/eigen/Eigen/src/Core/CwiseUnaryView.h +132 -95
  59. package/eigen/Eigen/src/Core/DenseBase.h +632 -571
  60. package/eigen/Eigen/src/Core/DenseCoeffsBase.h +511 -624
  61. package/eigen/Eigen/src/Core/DenseStorage.h +512 -509
  62. package/eigen/Eigen/src/Core/DeviceWrapper.h +153 -0
  63. package/eigen/Eigen/src/Core/Diagonal.h +169 -210
  64. package/eigen/Eigen/src/Core/DiagonalMatrix.h +351 -274
  65. package/eigen/Eigen/src/Core/DiagonalProduct.h +12 -10
  66. package/eigen/Eigen/src/Core/Dot.h +172 -222
  67. package/eigen/Eigen/src/Core/EigenBase.h +75 -85
  68. package/eigen/Eigen/src/Core/Fill.h +138 -0
  69. package/eigen/Eigen/src/Core/FindCoeff.h +464 -0
  70. package/eigen/Eigen/src/Core/ForceAlignedAccess.h +90 -109
  71. package/eigen/Eigen/src/Core/Fuzzy.h +82 -105
  72. package/eigen/Eigen/src/Core/GeneralProduct.h +327 -263
  73. package/eigen/Eigen/src/Core/GenericPacketMath.h +1472 -360
  74. package/eigen/Eigen/src/Core/GlobalFunctions.h +194 -151
  75. package/eigen/Eigen/src/Core/IO.h +147 -139
  76. package/eigen/Eigen/src/Core/IndexedView.h +321 -0
  77. package/eigen/Eigen/src/Core/InnerProduct.h +260 -0
  78. package/eigen/Eigen/src/Core/InternalHeaderCheck.h +3 -0
  79. package/eigen/Eigen/src/Core/Inverse.h +56 -66
  80. package/eigen/Eigen/src/Core/Map.h +124 -142
  81. package/eigen/Eigen/src/Core/MapBase.h +256 -281
  82. package/eigen/Eigen/src/Core/MathFunctions.h +1620 -938
  83. package/eigen/Eigen/src/Core/MathFunctionsImpl.h +233 -71
  84. package/eigen/Eigen/src/Core/Matrix.h +491 -416
  85. package/eigen/Eigen/src/Core/MatrixBase.h +468 -453
  86. package/eigen/Eigen/src/Core/NestByValue.h +66 -85
  87. package/eigen/Eigen/src/Core/NoAlias.h +79 -85
  88. package/eigen/Eigen/src/Core/NumTraits.h +235 -148
  89. package/eigen/Eigen/src/Core/PartialReduxEvaluator.h +253 -0
  90. package/eigen/Eigen/src/Core/PermutationMatrix.h +461 -511
  91. package/eigen/Eigen/src/Core/PlainObjectBase.h +871 -894
  92. package/eigen/Eigen/src/Core/Product.h +260 -139
  93. package/eigen/Eigen/src/Core/ProductEvaluators.h +863 -714
  94. package/eigen/Eigen/src/Core/Random.h +161 -136
  95. package/eigen/Eigen/src/Core/RandomImpl.h +262 -0
  96. package/eigen/Eigen/src/Core/RealView.h +250 -0
  97. package/eigen/Eigen/src/Core/Redux.h +366 -336
  98. package/eigen/Eigen/src/Core/Ref.h +308 -209
  99. package/eigen/Eigen/src/Core/Replicate.h +94 -106
  100. package/eigen/Eigen/src/Core/Reshaped.h +398 -0
  101. package/eigen/Eigen/src/Core/ReturnByValue.h +49 -55
  102. package/eigen/Eigen/src/Core/Reverse.h +136 -145
  103. package/eigen/Eigen/src/Core/Select.h +70 -140
  104. package/eigen/Eigen/src/Core/SelfAdjointView.h +262 -285
  105. package/eigen/Eigen/src/Core/SelfCwiseBinaryOp.h +23 -20
  106. package/eigen/Eigen/src/Core/SkewSymmetricMatrix3.h +382 -0
  107. package/eigen/Eigen/src/Core/Solve.h +97 -111
  108. package/eigen/Eigen/src/Core/SolveTriangular.h +131 -129
  109. package/eigen/Eigen/src/Core/SolverBase.h +138 -101
  110. package/eigen/Eigen/src/Core/StableNorm.h +156 -160
  111. package/eigen/Eigen/src/Core/StlIterators.h +619 -0
  112. package/eigen/Eigen/src/Core/Stride.h +91 -88
  113. package/eigen/Eigen/src/Core/Swap.h +70 -38
  114. package/eigen/Eigen/src/Core/Transpose.h +295 -273
  115. package/eigen/Eigen/src/Core/Transpositions.h +272 -317
  116. package/eigen/Eigen/src/Core/TriangularMatrix.h +670 -755
  117. package/eigen/Eigen/src/Core/VectorBlock.h +59 -72
  118. package/eigen/Eigen/src/Core/VectorwiseOp.h +668 -630
  119. package/eigen/Eigen/src/Core/Visitor.h +480 -216
  120. package/eigen/Eigen/src/Core/arch/AVX/Complex.h +407 -293
  121. package/eigen/Eigen/src/Core/arch/AVX/MathFunctions.h +79 -388
  122. package/eigen/Eigen/src/Core/arch/AVX/PacketMath.h +2935 -491
  123. package/eigen/Eigen/src/Core/arch/AVX/Reductions.h +353 -0
  124. package/eigen/Eigen/src/Core/arch/AVX/TypeCasting.h +279 -22
  125. package/eigen/Eigen/src/Core/arch/AVX512/Complex.h +472 -0
  126. package/eigen/Eigen/src/Core/arch/AVX512/GemmKernel.h +1245 -0
  127. package/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h +85 -333
  128. package/eigen/Eigen/src/Core/arch/AVX512/MathFunctionsFP16.h +75 -0
  129. package/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h +2490 -649
  130. package/eigen/Eigen/src/Core/arch/AVX512/PacketMathFP16.h +1413 -0
  131. package/eigen/Eigen/src/Core/arch/AVX512/Reductions.h +297 -0
  132. package/eigen/Eigen/src/Core/arch/AVX512/TrsmKernel.h +1167 -0
  133. package/eigen/Eigen/src/Core/arch/AVX512/TrsmUnrolls.inc +1219 -0
  134. package/eigen/Eigen/src/Core/arch/AVX512/TypeCasting.h +277 -0
  135. package/eigen/Eigen/src/Core/arch/AVX512/TypeCastingFP16.h +130 -0
  136. package/eigen/Eigen/src/Core/arch/AltiVec/Complex.h +521 -298
  137. package/eigen/Eigen/src/Core/arch/AltiVec/MathFunctions.h +39 -280
  138. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProduct.h +3686 -0
  139. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h +205 -0
  140. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h +901 -0
  141. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMAbfloat16.h +742 -0
  142. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixVectorProduct.inc +2818 -0
  143. package/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h +3391 -723
  144. package/eigen/Eigen/src/Core/arch/AltiVec/TypeCasting.h +153 -0
  145. package/eigen/Eigen/src/Core/arch/Default/BFloat16.h +866 -0
  146. package/eigen/Eigen/src/Core/arch/Default/ConjHelper.h +113 -14
  147. package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +2634 -0
  148. package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +227 -0
  149. package/eigen/Eigen/src/Core/arch/Default/Half.h +1091 -0
  150. package/eigen/Eigen/src/Core/arch/Default/Settings.h +11 -13
  151. package/eigen/Eigen/src/Core/arch/GPU/Complex.h +244 -0
  152. package/eigen/Eigen/src/Core/arch/GPU/MathFunctions.h +104 -0
  153. package/eigen/Eigen/src/Core/arch/GPU/PacketMath.h +1712 -0
  154. package/eigen/Eigen/src/Core/arch/GPU/Tuple.h +268 -0
  155. package/eigen/Eigen/src/Core/arch/GPU/TypeCasting.h +77 -0
  156. package/eigen/Eigen/src/Core/arch/HIP/hcc/math_constants.h +23 -0
  157. package/eigen/Eigen/src/Core/arch/HVX/PacketMath.h +1088 -0
  158. package/eigen/Eigen/src/Core/arch/LSX/Complex.h +520 -0
  159. package/eigen/Eigen/src/Core/arch/LSX/GeneralBlockPanelKernel.h +23 -0
  160. package/eigen/Eigen/src/Core/arch/LSX/MathFunctions.h +43 -0
  161. package/eigen/Eigen/src/Core/arch/LSX/PacketMath.h +2866 -0
  162. package/eigen/Eigen/src/Core/arch/LSX/TypeCasting.h +526 -0
  163. package/eigen/Eigen/src/Core/arch/MSA/Complex.h +620 -0
  164. package/eigen/Eigen/src/Core/arch/MSA/MathFunctions.h +379 -0
  165. package/eigen/Eigen/src/Core/arch/MSA/PacketMath.h +1237 -0
  166. package/eigen/Eigen/src/Core/arch/NEON/Complex.h +531 -289
  167. package/eigen/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h +243 -0
  168. package/eigen/Eigen/src/Core/arch/NEON/MathFunctions.h +50 -73
  169. package/eigen/Eigen/src/Core/arch/NEON/PacketMath.h +5915 -579
  170. package/eigen/Eigen/src/Core/arch/NEON/TypeCasting.h +1642 -0
  171. package/eigen/Eigen/src/Core/arch/NEON/UnaryFunctors.h +57 -0
  172. package/eigen/Eigen/src/Core/arch/SSE/Complex.h +366 -334
  173. package/eigen/Eigen/src/Core/arch/SSE/MathFunctions.h +40 -514
  174. package/eigen/Eigen/src/Core/arch/SSE/PacketMath.h +2164 -675
  175. package/eigen/Eigen/src/Core/arch/SSE/Reductions.h +324 -0
  176. package/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h +188 -35
  177. package/eigen/Eigen/src/Core/arch/SVE/MathFunctions.h +48 -0
  178. package/eigen/Eigen/src/Core/arch/SVE/PacketMath.h +674 -0
  179. package/eigen/Eigen/src/Core/arch/SVE/TypeCasting.h +52 -0
  180. package/eigen/Eigen/src/Core/arch/SYCL/InteropHeaders.h +227 -0
  181. package/eigen/Eigen/src/Core/arch/SYCL/MathFunctions.h +303 -0
  182. package/eigen/Eigen/src/Core/arch/SYCL/PacketMath.h +576 -0
  183. package/eigen/Eigen/src/Core/arch/SYCL/TypeCasting.h +83 -0
  184. package/eigen/Eigen/src/Core/arch/ZVector/Complex.h +434 -261
  185. package/eigen/Eigen/src/Core/arch/ZVector/MathFunctions.h +160 -53
  186. package/eigen/Eigen/src/Core/arch/ZVector/PacketMath.h +1073 -605
  187. package/eigen/Eigen/src/Core/functors/AssignmentFunctors.h +123 -117
  188. package/eigen/Eigen/src/Core/functors/BinaryFunctors.h +594 -322
  189. package/eigen/Eigen/src/Core/functors/NullaryFunctors.h +204 -118
  190. package/eigen/Eigen/src/Core/functors/StlFunctors.h +110 -97
  191. package/eigen/Eigen/src/Core/functors/TernaryFunctors.h +34 -7
  192. package/eigen/Eigen/src/Core/functors/UnaryFunctors.h +1158 -530
  193. package/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h +2329 -1333
  194. package/eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h +328 -364
  195. package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +191 -178
  196. package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +85 -82
  197. package/eigen/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h +154 -73
  198. package/eigen/Eigen/src/Core/products/GeneralMatrixVector.h +396 -542
  199. package/eigen/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h +80 -77
  200. package/eigen/Eigen/src/Core/products/Parallelizer.h +208 -92
  201. package/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +331 -375
  202. package/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h +206 -224
  203. package/eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h +139 -146
  204. package/eigen/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h +58 -61
  205. package/eigen/Eigen/src/Core/products/SelfadjointProduct.h +71 -71
  206. package/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h +48 -46
  207. package/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h +294 -369
  208. package/eigen/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h +246 -238
  209. package/eigen/Eigen/src/Core/products/TriangularMatrixVector.h +244 -247
  210. package/eigen/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h +212 -192
  211. package/eigen/Eigen/src/Core/products/TriangularSolverMatrix.h +328 -275
  212. package/eigen/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h +108 -109
  213. package/eigen/Eigen/src/Core/products/TriangularSolverVector.h +70 -93
  214. package/eigen/Eigen/src/Core/util/Assert.h +158 -0
  215. package/eigen/Eigen/src/Core/util/BlasUtil.h +413 -290
  216. package/eigen/Eigen/src/Core/util/ConfigureVectorization.h +543 -0
  217. package/eigen/Eigen/src/Core/util/Constants.h +314 -263
  218. package/eigen/Eigen/src/Core/util/DisableStupidWarnings.h +130 -78
  219. package/eigen/Eigen/src/Core/util/EmulateArray.h +270 -0
  220. package/eigen/Eigen/src/Core/util/ForwardDeclarations.h +450 -224
  221. package/eigen/Eigen/src/Core/util/GpuHipCudaDefines.inc +101 -0
  222. package/eigen/Eigen/src/Core/util/GpuHipCudaUndefines.inc +45 -0
  223. package/eigen/Eigen/src/Core/util/IndexedViewHelper.h +487 -0
  224. package/eigen/Eigen/src/Core/util/IntegralConstant.h +279 -0
  225. package/eigen/Eigen/src/Core/util/MKL_support.h +39 -30
  226. package/eigen/Eigen/src/Core/util/Macros.h +939 -646
  227. package/eigen/Eigen/src/Core/util/MaxSizeVector.h +139 -0
  228. package/eigen/Eigen/src/Core/util/Memory.h +1042 -650
  229. package/eigen/Eigen/src/Core/util/Meta.h +618 -426
  230. package/eigen/Eigen/src/Core/util/MoreMeta.h +638 -0
  231. package/eigen/Eigen/src/Core/util/ReenableStupidWarnings.h +32 -19
  232. package/eigen/Eigen/src/Core/util/ReshapedHelper.h +51 -0
  233. package/eigen/Eigen/src/Core/util/Serializer.h +209 -0
  234. package/eigen/Eigen/src/Core/util/StaticAssert.h +51 -164
  235. package/eigen/Eigen/src/Core/util/SymbolicIndex.h +445 -0
  236. package/eigen/Eigen/src/Core/util/XprHelper.h +793 -538
  237. package/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +246 -277
  238. package/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +299 -319
  239. package/eigen/Eigen/src/Eigenvalues/ComplexSchur_LAPACKE.h +52 -48
  240. package/eigen/Eigen/src/Eigenvalues/EigenSolver.h +413 -456
  241. package/eigen/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h +309 -325
  242. package/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +157 -171
  243. package/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +292 -310
  244. package/eigen/Eigen/src/Eigenvalues/InternalHeaderCheck.h +3 -0
  245. package/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +91 -107
  246. package/eigen/Eigen/src/Eigenvalues/RealQZ.h +539 -606
  247. package/eigen/Eigen/src/Eigenvalues/RealSchur.h +348 -382
  248. package/eigen/Eigen/src/Eigenvalues/RealSchur_LAPACKE.h +41 -35
  249. package/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +579 -600
  250. package/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h +47 -44
  251. package/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +434 -461
  252. package/eigen/Eigen/src/Geometry/AlignedBox.h +307 -214
  253. package/eigen/Eigen/src/Geometry/AngleAxis.h +135 -137
  254. package/eigen/Eigen/src/Geometry/EulerAngles.h +163 -74
  255. package/eigen/Eigen/src/Geometry/Homogeneous.h +289 -333
  256. package/eigen/Eigen/src/Geometry/Hyperplane.h +152 -161
  257. package/eigen/Eigen/src/Geometry/InternalHeaderCheck.h +3 -0
  258. package/eigen/Eigen/src/Geometry/OrthoMethods.h +168 -145
  259. package/eigen/Eigen/src/Geometry/ParametrizedLine.h +141 -104
  260. package/eigen/Eigen/src/Geometry/Quaternion.h +595 -497
  261. package/eigen/Eigen/src/Geometry/Rotation2D.h +110 -108
  262. package/eigen/Eigen/src/Geometry/RotationBase.h +148 -145
  263. package/eigen/Eigen/src/Geometry/Scaling.h +115 -90
  264. package/eigen/Eigen/src/Geometry/Transform.h +896 -953
  265. package/eigen/Eigen/src/Geometry/Translation.h +100 -98
  266. package/eigen/Eigen/src/Geometry/Umeyama.h +79 -84
  267. package/eigen/Eigen/src/Geometry/arch/Geometry_SIMD.h +154 -0
  268. package/eigen/Eigen/src/Householder/BlockHouseholder.h +54 -42
  269. package/eigen/Eigen/src/Householder/Householder.h +104 -122
  270. package/eigen/Eigen/src/Householder/HouseholderSequence.h +416 -382
  271. package/eigen/Eigen/src/Householder/InternalHeaderCheck.h +3 -0
  272. package/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +153 -166
  273. package/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +127 -138
  274. package/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +95 -124
  275. package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +269 -267
  276. package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +246 -259
  277. package/eigen/Eigen/src/IterativeLinearSolvers/InternalHeaderCheck.h +3 -0
  278. package/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +218 -217
  279. package/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +80 -103
  280. package/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +59 -63
  281. package/eigen/Eigen/src/Jacobi/InternalHeaderCheck.h +3 -0
  282. package/eigen/Eigen/src/Jacobi/Jacobi.h +256 -291
  283. package/eigen/Eigen/src/KLUSupport/InternalHeaderCheck.h +3 -0
  284. package/eigen/Eigen/src/KLUSupport/KLUSupport.h +339 -0
  285. package/eigen/Eigen/src/LU/Determinant.h +60 -63
  286. package/eigen/Eigen/src/LU/FullPivLU.h +561 -626
  287. package/eigen/Eigen/src/LU/InternalHeaderCheck.h +3 -0
  288. package/eigen/Eigen/src/LU/InverseImpl.h +213 -275
  289. package/eigen/Eigen/src/LU/PartialPivLU.h +407 -435
  290. package/eigen/Eigen/src/LU/PartialPivLU_LAPACKE.h +54 -40
  291. package/eigen/Eigen/src/LU/arch/InverseSize4.h +353 -0
  292. package/eigen/Eigen/src/MetisSupport/InternalHeaderCheck.h +3 -0
  293. package/eigen/Eigen/src/MetisSupport/MetisSupport.h +81 -93
  294. package/eigen/Eigen/src/OrderingMethods/Amd.h +250 -282
  295. package/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +950 -1103
  296. package/eigen/Eigen/src/OrderingMethods/InternalHeaderCheck.h +3 -0
  297. package/eigen/Eigen/src/OrderingMethods/Ordering.h +111 -122
  298. package/eigen/Eigen/src/PaStiXSupport/InternalHeaderCheck.h +3 -0
  299. package/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +524 -570
  300. package/eigen/Eigen/src/PardisoSupport/InternalHeaderCheck.h +3 -0
  301. package/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +385 -429
  302. package/eigen/Eigen/src/QR/ColPivHouseholderQR.h +494 -473
  303. package/eigen/Eigen/src/QR/ColPivHouseholderQR_LAPACKE.h +120 -56
  304. package/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +223 -137
  305. package/eigen/Eigen/src/QR/FullPivHouseholderQR.h +517 -460
  306. package/eigen/Eigen/src/QR/HouseholderQR.h +412 -278
  307. package/eigen/Eigen/src/QR/HouseholderQR_LAPACKE.h +32 -23
  308. package/eigen/Eigen/src/QR/InternalHeaderCheck.h +3 -0
  309. package/eigen/Eigen/src/SPQRSupport/InternalHeaderCheck.h +3 -0
  310. package/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +263 -261
  311. package/eigen/Eigen/src/SVD/BDCSVD.h +872 -679
  312. package/eigen/Eigen/src/SVD/BDCSVD_LAPACKE.h +174 -0
  313. package/eigen/Eigen/src/SVD/InternalHeaderCheck.h +3 -0
  314. package/eigen/Eigen/src/SVD/JacobiSVD.h +585 -543
  315. package/eigen/Eigen/src/SVD/JacobiSVD_LAPACKE.h +85 -49
  316. package/eigen/Eigen/src/SVD/SVDBase.h +281 -160
  317. package/eigen/Eigen/src/SVD/UpperBidiagonalization.h +202 -237
  318. package/eigen/Eigen/src/SparseCholesky/InternalHeaderCheck.h +3 -0
  319. package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +769 -590
  320. package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +318 -129
  321. package/eigen/Eigen/src/SparseCore/AmbiVector.h +202 -251
  322. package/eigen/Eigen/src/SparseCore/CompressedStorage.h +184 -236
  323. package/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +140 -184
  324. package/eigen/Eigen/src/SparseCore/InternalHeaderCheck.h +3 -0
  325. package/eigen/Eigen/src/SparseCore/SparseAssign.h +174 -111
  326. package/eigen/Eigen/src/SparseCore/SparseBlock.h +408 -477
  327. package/eigen/Eigen/src/SparseCore/SparseColEtree.h +100 -112
  328. package/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +531 -280
  329. package/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +559 -347
  330. package/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +100 -108
  331. package/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +185 -191
  332. package/eigen/Eigen/src/SparseCore/SparseDiagonalProduct.h +71 -71
  333. package/eigen/Eigen/src/SparseCore/SparseDot.h +49 -47
  334. package/eigen/Eigen/src/SparseCore/SparseFuzzy.h +13 -11
  335. package/eigen/Eigen/src/SparseCore/SparseMap.h +243 -253
  336. package/eigen/Eigen/src/SparseCore/SparseMatrix.h +1614 -1142
  337. package/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +403 -357
  338. package/eigen/Eigen/src/SparseCore/SparsePermutation.h +186 -115
  339. package/eigen/Eigen/src/SparseCore/SparseProduct.h +100 -91
  340. package/eigen/Eigen/src/SparseCore/SparseRedux.h +22 -24
  341. package/eigen/Eigen/src/SparseCore/SparseRef.h +268 -295
  342. package/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +371 -414
  343. package/eigen/Eigen/src/SparseCore/SparseSolverBase.h +78 -87
  344. package/eigen/Eigen/src/SparseCore/SparseSparseProductWithPruning.h +81 -95
  345. package/eigen/Eigen/src/SparseCore/SparseTranspose.h +62 -71
  346. package/eigen/Eigen/src/SparseCore/SparseTriangularView.h +132 -144
  347. package/eigen/Eigen/src/SparseCore/SparseUtil.h +146 -115
  348. package/eigen/Eigen/src/SparseCore/SparseVector.h +426 -372
  349. package/eigen/Eigen/src/SparseCore/SparseView.h +164 -193
  350. package/eigen/Eigen/src/SparseCore/TriangularSolver.h +129 -170
  351. package/eigen/Eigen/src/SparseLU/InternalHeaderCheck.h +3 -0
  352. package/eigen/Eigen/src/SparseLU/SparseLU.h +814 -618
  353. package/eigen/Eigen/src/SparseLU/SparseLUImpl.h +61 -48
  354. package/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +102 -118
  355. package/eigen/Eigen/src/SparseLU/SparseLU_Structs.h +38 -35
  356. package/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +273 -255
  357. package/eigen/Eigen/src/SparseLU/SparseLU_Utils.h +44 -49
  358. package/eigen/Eigen/src/SparseLU/SparseLU_column_bmod.h +104 -108
  359. package/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +90 -101
  360. package/eigen/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h +57 -58
  361. package/eigen/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h +43 -55
  362. package/eigen/Eigen/src/SparseLU/SparseLU_kernel_bmod.h +74 -71
  363. package/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +125 -133
  364. package/eigen/Eigen/src/SparseLU/SparseLU_panel_dfs.h +136 -159
  365. package/eigen/Eigen/src/SparseLU/SparseLU_pivotL.h +51 -52
  366. package/eigen/Eigen/src/SparseLU/SparseLU_pruneL.h +67 -73
  367. package/eigen/Eigen/src/SparseLU/SparseLU_relax_snode.h +24 -26
  368. package/eigen/Eigen/src/SparseQR/InternalHeaderCheck.h +3 -0
  369. package/eigen/Eigen/src/SparseQR/SparseQR.h +451 -490
  370. package/eigen/Eigen/src/StlSupport/StdDeque.h +28 -105
  371. package/eigen/Eigen/src/StlSupport/StdList.h +28 -84
  372. package/eigen/Eigen/src/StlSupport/StdVector.h +28 -108
  373. package/eigen/Eigen/src/StlSupport/details.h +48 -50
  374. package/eigen/Eigen/src/SuperLUSupport/InternalHeaderCheck.h +3 -0
  375. package/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +634 -732
  376. package/eigen/Eigen/src/ThreadPool/Barrier.h +70 -0
  377. package/eigen/Eigen/src/ThreadPool/CoreThreadPoolDevice.h +336 -0
  378. package/eigen/Eigen/src/ThreadPool/EventCount.h +241 -0
  379. package/eigen/Eigen/src/ThreadPool/ForkJoin.h +140 -0
  380. package/eigen/Eigen/src/ThreadPool/InternalHeaderCheck.h +4 -0
  381. package/eigen/Eigen/src/ThreadPool/NonBlockingThreadPool.h +587 -0
  382. package/eigen/Eigen/src/ThreadPool/RunQueue.h +230 -0
  383. package/eigen/Eigen/src/ThreadPool/ThreadCancel.h +21 -0
  384. package/eigen/Eigen/src/ThreadPool/ThreadEnvironment.h +43 -0
  385. package/eigen/Eigen/src/ThreadPool/ThreadLocal.h +289 -0
  386. package/eigen/Eigen/src/ThreadPool/ThreadPoolInterface.h +50 -0
  387. package/eigen/Eigen/src/ThreadPool/ThreadYield.h +16 -0
  388. package/eigen/Eigen/src/UmfPackSupport/InternalHeaderCheck.h +3 -0
  389. package/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +480 -380
  390. package/eigen/Eigen/src/misc/Image.h +41 -43
  391. package/eigen/Eigen/src/misc/InternalHeaderCheck.h +3 -0
  392. package/eigen/Eigen/src/misc/Kernel.h +39 -41
  393. package/eigen/Eigen/src/misc/RealSvd2x2.h +19 -21
  394. package/eigen/Eigen/src/misc/blas.h +83 -426
  395. package/eigen/Eigen/src/misc/lapacke.h +9976 -16182
  396. package/eigen/Eigen/src/misc/lapacke_helpers.h +163 -0
  397. package/eigen/Eigen/src/misc/lapacke_mangling.h +4 -5
  398. package/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.inc +344 -0
  399. package/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.inc +544 -0
  400. package/eigen/Eigen/src/plugins/BlockMethods.inc +1370 -0
  401. package/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.inc +116 -0
  402. package/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.inc +167 -0
  403. package/eigen/Eigen/src/plugins/IndexedViewMethods.inc +192 -0
  404. package/eigen/Eigen/src/plugins/InternalHeaderCheck.inc +3 -0
  405. package/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.inc +331 -0
  406. package/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.inc +118 -0
  407. package/eigen/Eigen/src/plugins/ReshapedMethods.inc +133 -0
  408. package/lib/LibEigen.d.ts +4 -0
  409. package/lib/LibEigen.js +14 -0
  410. package/lib/index.d.ts +1 -1
  411. package/lib/index.js +7 -3
  412. package/package.json +2 -10
  413. package/eigen/Eigen/CMakeLists.txt +0 -19
  414. package/eigen/Eigen/src/Core/BooleanRedux.h +0 -164
  415. package/eigen/Eigen/src/Core/arch/CUDA/Complex.h +0 -103
  416. package/eigen/Eigen/src/Core/arch/CUDA/Half.h +0 -675
  417. package/eigen/Eigen/src/Core/arch/CUDA/MathFunctions.h +0 -91
  418. package/eigen/Eigen/src/Core/arch/CUDA/PacketMath.h +0 -333
  419. package/eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +0 -1124
  420. package/eigen/Eigen/src/Core/arch/CUDA/TypeCasting.h +0 -212
  421. package/eigen/Eigen/src/Core/util/NonMPL2.h +0 -3
  422. package/eigen/Eigen/src/Geometry/arch/Geometry_SSE.h +0 -161
  423. package/eigen/Eigen/src/LU/arch/Inverse_SSE.h +0 -338
  424. package/eigen/Eigen/src/SparseCore/MappedSparseMatrix.h +0 -67
  425. package/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +0 -280
  426. package/eigen/Eigen/src/misc/lapack.h +0 -152
  427. package/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +0 -332
  428. package/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +0 -552
  429. package/eigen/Eigen/src/plugins/BlockMethods.h +0 -1058
  430. package/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.h +0 -115
  431. package/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.h +0 -163
  432. package/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +0 -152
  433. package/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +0 -85
  434. package/lib/eigen.d.ts +0 -2
  435. package/lib/eigen.js +0 -15
@@ -0,0 +1,1642 @@
1
+ // This file is part of Eigen, a lightweight C++ template library
2
+ // for linear algebra.
3
+ //
4
+ // Copyright (C) 2018 Rasmus Munk Larsen <rmlarsen@google.com>
5
+ // Copyright (C) 2020 Antonio Sanchez <cantonios@google.com>
6
+ //
7
+ // This Source Code Form is subject to the terms of the Mozilla
8
+ // Public License v. 2.0. If a copy of the MPL was not distributed
9
+ // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
10
+
11
+ #ifndef EIGEN_TYPE_CASTING_NEON_H
12
+ #define EIGEN_TYPE_CASTING_NEON_H
13
+
14
+ // IWYU pragma: private
15
+ #include "../../InternalHeaderCheck.h"
16
+
17
+ namespace Eigen {
18
+
19
+ namespace internal {
20
+
21
+ //==============================================================================
22
+ // preinterpret (truncation operations)
23
+ //==============================================================================
24
+
25
+ template <>
26
+ EIGEN_STRONG_INLINE Packet8c preinterpret<Packet8c, Packet16c>(const Packet16c& a) {
27
+ return Packet8c(vget_low_s8(a));
28
+ }
29
+ template <>
30
+ EIGEN_STRONG_INLINE Packet4c preinterpret<Packet4c, Packet8c>(const Packet8c& a) {
31
+ return Packet4c(vget_lane_s32(vreinterpret_s32_s8(a), 0));
32
+ }
33
+ template <>
34
+ EIGEN_STRONG_INLINE Packet4c preinterpret<Packet4c, Packet16c>(const Packet16c& a) {
35
+ return preinterpret<Packet4c>(preinterpret<Packet8c>(a));
36
+ }
37
+
38
+ template <>
39
+ EIGEN_STRONG_INLINE Packet8uc preinterpret<Packet8uc, Packet16uc>(const Packet16uc& a) {
40
+ return Packet8uc(vget_low_u8(a));
41
+ }
42
+ template <>
43
+ EIGEN_STRONG_INLINE Packet4uc preinterpret<Packet4uc, Packet8uc>(const Packet8uc& a) {
44
+ return Packet4uc(vget_lane_u32(vreinterpret_u32_u8(a), 0));
45
+ }
46
+ template <>
47
+ EIGEN_STRONG_INLINE Packet4uc preinterpret<Packet4uc, Packet16uc>(const Packet16uc& a) {
48
+ return preinterpret<Packet4uc>(preinterpret<Packet8uc>(a));
49
+ }
50
+
51
+ template <>
52
+ EIGEN_STRONG_INLINE Packet4s preinterpret<Packet4s, Packet8s>(const Packet8s& a) {
53
+ return Packet4s(vget_low_s16(a));
54
+ }
55
+
56
+ template <>
57
+ EIGEN_STRONG_INLINE Packet4us preinterpret<Packet4us, Packet8us>(const Packet8us& a) {
58
+ return Packet4us(vget_low_u16(a));
59
+ }
60
+
61
+ template <>
62
+ EIGEN_STRONG_INLINE Packet2i preinterpret<Packet2i, Packet4i>(const Packet4i& a) {
63
+ return Packet2i(vget_low_s32(a));
64
+ }
65
+ template <>
66
+ EIGEN_STRONG_INLINE Packet2ui preinterpret<Packet2ui, Packet4ui>(const Packet4ui& a) {
67
+ return Packet2ui(vget_low_u32(a));
68
+ }
69
+
70
+ template <>
71
+ EIGEN_STRONG_INLINE Packet2f preinterpret<Packet2f, Packet4f>(const Packet4f& a) {
72
+ return Packet2f(vget_low_f32(a));
73
+ }
74
+
75
+ //==============================================================================
76
+ // preinterpret
77
+ //==============================================================================
78
+ template <>
79
+ EIGEN_STRONG_INLINE Packet2f preinterpret<Packet2f, Packet2i>(const Packet2i& a) {
80
+ return Packet2f(vreinterpret_f32_s32(a));
81
+ }
82
+ template <>
83
+ EIGEN_STRONG_INLINE Packet2f preinterpret<Packet2f, Packet2ui>(const Packet2ui& a) {
84
+ return Packet2f(vreinterpret_f32_u32(a));
85
+ }
86
+ template <>
87
+ EIGEN_STRONG_INLINE Packet4f preinterpret<Packet4f, Packet4i>(const Packet4i& a) {
88
+ return Packet4f(vreinterpretq_f32_s32(a));
89
+ }
90
+ template <>
91
+ EIGEN_STRONG_INLINE Packet4f preinterpret<Packet4f, Packet4ui>(const Packet4ui& a) {
92
+ return Packet4f(vreinterpretq_f32_u32(a));
93
+ }
94
+
95
+ template <>
96
+ EIGEN_STRONG_INLINE Packet4c preinterpret<Packet4c, Packet4uc>(const Packet4uc& a) {
97
+ return static_cast<Packet4c>(a);
98
+ }
99
+ template <>
100
+ EIGEN_STRONG_INLINE Packet8c preinterpret<Packet8c, Packet8uc>(const Packet8uc& a) {
101
+ return Packet8c(vreinterpret_s8_u8(a));
102
+ }
103
+ template <>
104
+ EIGEN_STRONG_INLINE Packet16c preinterpret<Packet16c, Packet16uc>(const Packet16uc& a) {
105
+ return Packet16c(vreinterpretq_s8_u8(a));
106
+ }
107
+
108
+ template <>
109
+ EIGEN_STRONG_INLINE Packet4uc preinterpret<Packet4uc, Packet4c>(const Packet4c& a) {
110
+ return static_cast<Packet4uc>(a);
111
+ }
112
+ template <>
113
+ EIGEN_STRONG_INLINE Packet8uc preinterpret<Packet8uc, Packet8c>(const Packet8c& a) {
114
+ return Packet8uc(vreinterpret_u8_s8(a));
115
+ }
116
+ template <>
117
+ EIGEN_STRONG_INLINE Packet16uc preinterpret<Packet16uc, Packet16c>(const Packet16c& a) {
118
+ return Packet16uc(vreinterpretq_u8_s8(a));
119
+ }
120
+
121
+ template <>
122
+ EIGEN_STRONG_INLINE Packet4s preinterpret<Packet4s, Packet4us>(const Packet4us& a) {
123
+ return Packet4s(vreinterpret_s16_u16(a));
124
+ }
125
+ template <>
126
+ EIGEN_STRONG_INLINE Packet8s preinterpret<Packet8s, Packet8us>(const Packet8us& a) {
127
+ return Packet8s(vreinterpretq_s16_u16(a));
128
+ }
129
+ template <>
130
+ EIGEN_STRONG_INLINE Packet4us preinterpret<Packet4us, Packet4s>(const Packet4s& a) {
131
+ return Packet4us(vreinterpret_u16_s16(a));
132
+ }
133
+ template <>
134
+ EIGEN_STRONG_INLINE Packet8us preinterpret<Packet8us, Packet8s>(const Packet8s& a) {
135
+ return Packet8us(vreinterpretq_u16_s16(a));
136
+ }
137
+
138
+ template <>
139
+ EIGEN_STRONG_INLINE Packet2i preinterpret<Packet2i, Packet2f>(const Packet2f& a) {
140
+ return Packet2i(vreinterpret_s32_f32(a));
141
+ }
142
+ template <>
143
+ EIGEN_STRONG_INLINE Packet2i preinterpret<Packet2i, Packet2ui>(const Packet2ui& a) {
144
+ return Packet2i(vreinterpret_s32_u32(a));
145
+ }
146
+ template <>
147
+ EIGEN_STRONG_INLINE Packet4i preinterpret<Packet4i, Packet4f>(const Packet4f& a) {
148
+ return Packet4i(vreinterpretq_s32_f32(a));
149
+ }
150
+ template <>
151
+ EIGEN_STRONG_INLINE Packet4i preinterpret<Packet4i, Packet4ui>(const Packet4ui& a) {
152
+ return Packet4i(vreinterpretq_s32_u32(a));
153
+ }
154
+
155
+ template <>
156
+ EIGEN_STRONG_INLINE Packet2ui preinterpret<Packet2ui, Packet2f>(const Packet2f& a) {
157
+ return Packet2ui(vreinterpret_u32_f32(a));
158
+ }
159
+ template <>
160
+ EIGEN_STRONG_INLINE Packet2ui preinterpret<Packet2ui, Packet2i>(const Packet2i& a) {
161
+ return Packet2ui(vreinterpret_u32_s32(a));
162
+ }
163
+ template <>
164
+ EIGEN_STRONG_INLINE Packet4ui preinterpret<Packet4ui, Packet4f>(const Packet4f& a) {
165
+ return Packet4ui(vreinterpretq_u32_f32(a));
166
+ }
167
+ template <>
168
+ EIGEN_STRONG_INLINE Packet4ui preinterpret<Packet4ui, Packet4i>(const Packet4i& a) {
169
+ return Packet4ui(vreinterpretq_u32_s32(a));
170
+ }
171
+
172
+ template <>
173
+ EIGEN_STRONG_INLINE Packet2l preinterpret<Packet2l, Packet2ul>(const Packet2ul& a) {
174
+ return Packet2l(vreinterpretq_s64_u64(a));
175
+ }
176
+ template <>
177
+ EIGEN_STRONG_INLINE Packet2ul preinterpret<Packet2ul, Packet2l>(const Packet2l& a) {
178
+ return Packet2ul(vreinterpretq_u64_s64(a));
179
+ }
180
+
181
+ //==============================================================================
182
+ // pcast, SrcType = float
183
+ //==============================================================================
184
+
185
+ template <>
186
+ struct type_casting_traits<float, numext::int64_t> {
187
+ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 2 };
188
+ };
189
+ template <>
190
+ struct type_casting_traits<float, numext::uint64_t> {
191
+ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 2 };
192
+ };
193
+ // If float64 exists, first convert to that to keep as much precision as possible.
194
+ #if EIGEN_ARCH_ARM64
195
+ template <>
196
+ EIGEN_STRONG_INLINE Packet2l pcast<Packet4f, Packet2l>(const Packet4f& a) {
197
+ // Discard second half of input.
198
+ return vcvtq_s64_f64(vcvt_f64_f32(vget_low_f32(a)));
199
+ }
200
+ template <>
201
+ EIGEN_STRONG_INLINE Packet2l pcast<Packet2f, Packet2l>(const Packet2f& a) {
202
+ return vcvtq_s64_f64(vcvt_f64_f32(a));
203
+ }
204
+ template <>
205
+ EIGEN_STRONG_INLINE Packet2ul pcast<Packet4f, Packet2ul>(const Packet4f& a) {
206
+ // Discard second half of input.
207
+ return vcvtq_u64_f64(vcvt_f64_f32(vget_low_f32(a)));
208
+ }
209
+ template <>
210
+ EIGEN_STRONG_INLINE Packet2ul pcast<Packet2f, Packet2ul>(const Packet2f& a) {
211
+ return vcvtq_u64_f64(vcvt_f64_f32(a));
212
+ }
213
+ #else
214
+ template <>
215
+ EIGEN_STRONG_INLINE Packet2l pcast<Packet4f, Packet2l>(const Packet4f& a) {
216
+ // Discard second half of input.
217
+ return vmovl_s32(vget_low_s32(vcvtq_s32_f32(a)));
218
+ }
219
+ template <>
220
+ EIGEN_STRONG_INLINE Packet2l pcast<Packet2f, Packet2l>(const Packet2f& a) {
221
+ return vmovl_s32(vcvt_s32_f32(a));
222
+ }
223
+ template <>
224
+ EIGEN_STRONG_INLINE Packet2ul pcast<Packet4f, Packet2ul>(const Packet4f& a) {
225
+ // Discard second half of input.
226
+ return vmovl_u32(vget_low_u32(vcvtq_u32_f32(a)));
227
+ }
228
+ template <>
229
+ EIGEN_STRONG_INLINE Packet2ul pcast<Packet2f, Packet2ul>(const Packet2f& a) {
230
+ // Discard second half of input.
231
+ return vmovl_u32(vcvt_u32_f32(a));
232
+ }
233
+ #endif // EIGEN_ARCH_ARM64
234
+
235
+ template <>
236
+ struct type_casting_traits<float, numext::int32_t> {
237
+ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 };
238
+ };
239
+ template <>
240
+ EIGEN_STRONG_INLINE Packet4i pcast<Packet4f, Packet4i>(const Packet4f& a) {
241
+ return vcvtq_s32_f32(a);
242
+ }
243
+ template <>
244
+ EIGEN_STRONG_INLINE Packet2i pcast<Packet2f, Packet2i>(const Packet2f& a) {
245
+ return vcvt_s32_f32(a);
246
+ }
247
+
248
+ template <>
249
+ struct type_casting_traits<float, numext::uint32_t> {
250
+ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 };
251
+ };
252
+ template <>
253
+ EIGEN_STRONG_INLINE Packet4ui pcast<Packet4f, Packet4ui>(const Packet4f& a) {
254
+ return vcvtq_u32_f32(a);
255
+ }
256
+ template <>
257
+ EIGEN_STRONG_INLINE Packet2ui pcast<Packet2f, Packet2ui>(const Packet2f& a) {
258
+ return vcvt_u32_f32(a);
259
+ }
260
+
261
+ template <>
262
+ struct type_casting_traits<float, numext::int16_t> {
263
+ enum { VectorizedCast = 1, SrcCoeffRatio = 2, TgtCoeffRatio = 1 };
264
+ };
265
+ template <>
266
+ EIGEN_STRONG_INLINE Packet8s pcast<Packet4f, Packet8s>(const Packet4f& a, const Packet4f& b) {
267
+ return vcombine_s16(vmovn_s32(vcvtq_s32_f32(a)), vmovn_s32(vcvtq_s32_f32(b)));
268
+ }
269
+ template <>
270
+ EIGEN_STRONG_INLINE Packet4s pcast<Packet4f, Packet4s>(const Packet4f& a) {
271
+ return vmovn_s32(vcvtq_s32_f32(a));
272
+ }
273
+ template <>
274
+ EIGEN_STRONG_INLINE Packet4s pcast<Packet2f, Packet4s>(const Packet2f& a, const Packet2f& b) {
275
+ return vmovn_s32(vcombine_s32(vcvt_s32_f32(a), vcvt_s32_f32(b)));
276
+ }
277
+
278
+ template <>
279
+ struct type_casting_traits<float, numext::uint16_t> {
280
+ enum { VectorizedCast = 1, SrcCoeffRatio = 2, TgtCoeffRatio = 1 };
281
+ };
282
+ template <>
283
+ EIGEN_STRONG_INLINE Packet8us pcast<Packet4f, Packet8us>(const Packet4f& a, const Packet4f& b) {
284
+ return vcombine_u16(vmovn_u32(vcvtq_u32_f32(a)), vmovn_u32(vcvtq_u32_f32(b)));
285
+ }
286
+ template <>
287
+ EIGEN_STRONG_INLINE Packet4us pcast<Packet4f, Packet4us>(const Packet4f& a) {
288
+ return vmovn_u32(vcvtq_u32_f32(a));
289
+ }
290
+ template <>
291
+ EIGEN_STRONG_INLINE Packet4us pcast<Packet2f, Packet4us>(const Packet2f& a, const Packet2f& b) {
292
+ return vmovn_u32(vcombine_u32(vcvt_u32_f32(a), vcvt_u32_f32(b)));
293
+ }
294
+
295
+ template <>
296
+ struct type_casting_traits<float, numext::int8_t> {
297
+ enum { VectorizedCast = 1, SrcCoeffRatio = 4, TgtCoeffRatio = 1 };
298
+ };
299
+ template <>
300
+ EIGEN_STRONG_INLINE Packet16c pcast<Packet4f, Packet16c>(const Packet4f& a, const Packet4f& b, const Packet4f& c,
301
+ const Packet4f& d) {
302
+ const int16x8_t ab_s16 = pcast<Packet4f, Packet8s>(a, b);
303
+ const int16x8_t cd_s16 = pcast<Packet4f, Packet8s>(c, d);
304
+ return vcombine_s8(vmovn_s16(ab_s16), vmovn_s16(cd_s16));
305
+ }
306
+ template <>
307
+ EIGEN_STRONG_INLINE Packet8c pcast<Packet4f, Packet8c>(const Packet4f& a, const Packet4f& b) {
308
+ const int16x8_t ab_s16 = pcast<Packet4f, Packet8s>(a, b);
309
+ return vmovn_s16(ab_s16);
310
+ }
311
+ template <>
312
+ EIGEN_STRONG_INLINE Packet8c pcast<Packet2f, Packet8c>(const Packet2f& a, const Packet2f& b, const Packet2f& c,
313
+ const Packet2f& d) {
314
+ const int16x4_t ab_s16 = pcast<Packet2f, Packet4s>(a, b);
315
+ const int16x4_t cd_s16 = pcast<Packet2f, Packet4s>(c, d);
316
+ return vmovn_s16(vcombine_s16(ab_s16, cd_s16));
317
+ }
318
+ template <>
319
+ EIGEN_STRONG_INLINE Packet4c pcast<Packet4f, Packet4c>(const Packet4f& a) {
320
+ const int32x4_t a_s32x4 = vcvtq_s32_f32(a);
321
+ const int16x4_t a_s16x4 = vmovn_s32(a_s32x4);
322
+ const int16x8_t aa_s16x8 = vcombine_s16(a_s16x4, a_s16x4);
323
+ const int8x8_t aa_s8x8 = vmovn_s16(aa_s16x8);
324
+ return vget_lane_s32(vreinterpret_s32_s8(aa_s8x8), 0);
325
+ }
326
+
327
+ template <>
328
+ struct type_casting_traits<float, numext::uint8_t> {
329
+ enum { VectorizedCast = 1, SrcCoeffRatio = 4, TgtCoeffRatio = 1 };
330
+ };
331
+ template <>
332
+ EIGEN_STRONG_INLINE Packet16uc pcast<Packet4f, Packet16uc>(const Packet4f& a, const Packet4f& b, const Packet4f& c,
333
+ const Packet4f& d) {
334
+ return preinterpret<Packet16uc>(pcast<Packet4f, Packet16c>(a, b, c, d));
335
+ }
336
+ template <>
337
+ EIGEN_STRONG_INLINE Packet8uc pcast<Packet4f, Packet8uc>(const Packet4f& a, const Packet4f& b) {
338
+ return preinterpret<Packet8uc>(pcast<Packet4f, Packet8c>(a, b));
339
+ }
340
+ template <>
341
+ EIGEN_STRONG_INLINE Packet8uc pcast<Packet2f, Packet8uc>(const Packet2f& a, const Packet2f& b, const Packet2f& c,
342
+ const Packet2f& d) {
343
+ return preinterpret<Packet8uc>(pcast<Packet2f, Packet8c>(a, b, c, d));
344
+ }
345
+ template <>
346
+ EIGEN_STRONG_INLINE Packet4uc pcast<Packet4f, Packet4uc>(const Packet4f& a) {
347
+ return static_cast<Packet4uc>(pcast<Packet4f, Packet4c>(a));
348
+ }
349
+
350
+ //==============================================================================
351
+ // pcast, SrcType = int8_t
352
+ //==============================================================================
353
+ template <>
354
+ struct type_casting_traits<numext::int8_t, float> {
355
+ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 4 };
356
+ };
357
+ template <>
358
+ EIGEN_STRONG_INLINE Packet4f pcast<Packet16c, Packet4f>(const Packet16c& a) {
359
+ // Discard all but first 4 bytes.
360
+ return vcvtq_f32_s32(vmovl_s16(vget_low_s16(vmovl_s8(vget_low_s8(a)))));
361
+ }
362
+ template <>
363
+ EIGEN_STRONG_INLINE Packet4f pcast<Packet4c, Packet4f>(const Packet4c& a) {
364
+ return vcvtq_f32_s32(vmovl_s16(vget_low_s16(vmovl_s8(vreinterpret_s8_s32(vdup_n_s32(a))))));
365
+ }
366
+ template <>
367
+ EIGEN_STRONG_INLINE Packet2f pcast<Packet8c, Packet2f>(const Packet8c& a) {
368
+ // Discard all but first 2 bytes.
369
+ return vcvt_f32_s32(vget_low_s32(vmovl_s16(vget_low_s16(vmovl_s8(a)))));
370
+ }
371
+
372
+ template <>
373
+ struct type_casting_traits<numext::int8_t, numext::int64_t> {
374
+ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 8 };
375
+ };
376
+ template <>
377
+ EIGEN_STRONG_INLINE Packet2l pcast<Packet16c, Packet2l>(const Packet16c& a) {
378
+ // Discard all but first two bytes.
379
+ return vmovl_s32(vget_low_s32(vmovl_s16(vget_low_s16(vmovl_s8(vget_low_s8(a))))));
380
+ }
381
+
382
+ template <>
383
+ struct type_casting_traits<numext::int8_t, numext::uint64_t> {
384
+ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 8 };
385
+ };
386
+ template <>
387
+ EIGEN_STRONG_INLINE Packet2ul pcast<Packet16c, Packet2ul>(const Packet16c& a) {
388
+ return preinterpret<Packet2ul>(pcast<Packet16c, Packet2l>(a));
389
+ }
390
+
391
+ template <>
392
+ struct type_casting_traits<numext::int8_t, numext::int32_t> {
393
+ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 4 };
394
+ };
395
+ template <>
396
+ EIGEN_STRONG_INLINE Packet4i pcast<Packet16c, Packet4i>(const Packet16c& a) {
397
+ // Discard all but first 4 bytes.
398
+ return vmovl_s16(vget_low_s16(vmovl_s8(vget_low_s8(a))));
399
+ }
400
+ template <>
401
+ EIGEN_STRONG_INLINE Packet4i pcast<Packet8c, Packet4i>(const Packet8c& a) {
402
+ return vmovl_s16(vget_low_s16(vmovl_s8(a)));
403
+ }
404
+ template <>
405
+ EIGEN_STRONG_INLINE Packet4i pcast<Packet4c, Packet4i>(const Packet4c& a) {
406
+ return pcast<Packet8c, Packet4i>(vreinterpret_s8_s32(vdup_n_s32(a)));
407
+ }
408
+ template <>
409
+ EIGEN_STRONG_INLINE Packet2i pcast<Packet8c, Packet2i>(const Packet8c& a) {
410
+ // Discard all but first 2 bytes.
411
+ return vget_low_s32(vmovl_s16(vget_low_s16(vmovl_s8(a))));
412
+ }
413
+
414
+ template <>
415
+ struct type_casting_traits<numext::int8_t, numext::uint32_t> {
416
+ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 4 };
417
+ };
418
+ template <>
419
+ EIGEN_STRONG_INLINE Packet4ui pcast<Packet16c, Packet4ui>(const Packet16c& a) {
420
+ return preinterpret<Packet4ui>(pcast<Packet16c, Packet4i>(a));
421
+ }
422
+ template <>
423
+ EIGEN_STRONG_INLINE Packet2ui pcast<Packet8c, Packet2ui>(const Packet8c& a) {
424
+ return preinterpret<Packet2ui>(pcast<Packet8c, Packet2i>(a));
425
+ }
426
+ template <>
427
+ EIGEN_STRONG_INLINE Packet4ui pcast<Packet4c, Packet4ui>(const Packet4c& a) {
428
+ return preinterpret<Packet4ui>(pcast<Packet4c, Packet4i>(a));
429
+ }
430
+
431
+ template <>
432
+ struct type_casting_traits<numext::int8_t, numext::int16_t> {
433
+ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 2 };
434
+ };
435
+ template <>
436
+ EIGEN_STRONG_INLINE Packet8s pcast<Packet16c, Packet8s>(const Packet16c& a) {
437
+ // Discard second half of input.
438
+ return vmovl_s8(vget_low_s8(a));
439
+ }
440
+ template <>
441
+ EIGEN_STRONG_INLINE Packet8s pcast<Packet8c, Packet8s>(const Packet8c& a) {
442
+ return vmovl_s8(a);
443
+ }
444
+ template <>
445
+ EIGEN_STRONG_INLINE Packet4s pcast<Packet8c, Packet4s>(const Packet8c& a) {
446
+ // Discard second half of input.
447
+ return vget_low_s16(vmovl_s8(a));
448
+ }
449
+ template <>
450
+ EIGEN_STRONG_INLINE Packet4s pcast<Packet4c, Packet4s>(const Packet4c& a) {
451
+ return pcast<Packet8c, Packet4s>(vreinterpret_s8_s32(vdup_n_s32(a)));
452
+ }
453
+
454
+ template <>
455
+ struct type_casting_traits<numext::int8_t, numext::uint16_t> {
456
+ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 2 };
457
+ };
458
+ template <>
459
+ EIGEN_STRONG_INLINE Packet8us pcast<Packet16c, Packet8us>(const Packet16c& a) {
460
+ return preinterpret<Packet8us>(pcast<Packet16c, Packet8s>(a));
461
+ }
462
+ template <>
463
+ EIGEN_STRONG_INLINE Packet8us pcast<Packet8c, Packet8us>(const Packet8c& a) {
464
+ return preinterpret<Packet8us>(pcast<Packet8c, Packet8s>(a));
465
+ }
466
+ template <>
467
+ EIGEN_STRONG_INLINE Packet4us pcast<Packet8c, Packet4us>(const Packet8c& a) {
468
+ return preinterpret<Packet4us>(pcast<Packet8c, Packet4s>(a));
469
+ }
470
+ template <>
471
+ EIGEN_STRONG_INLINE Packet4us pcast<Packet4c, Packet4us>(const Packet4c& a) {
472
+ return preinterpret<Packet4us>(pcast<Packet4c, Packet4s>(a));
473
+ }
474
+
475
+ //==============================================================================
476
+ // pcast, SrcType = uint8_t
477
+ //==============================================================================
478
+ template <>
479
+ struct type_casting_traits<numext::uint8_t, float> {
480
+ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 4 };
481
+ };
482
+ template <>
483
+ EIGEN_STRONG_INLINE Packet4f pcast<Packet16uc, Packet4f>(const Packet16uc& a) {
484
+ // Discard all but first 4 bytes.
485
+ return vcvtq_f32_u32(vmovl_u16(vget_low_u16(vmovl_u8(vget_low_u8(a)))));
486
+ }
487
+ template <>
488
+ EIGEN_STRONG_INLINE Packet4f pcast<Packet4uc, Packet4f>(const Packet4uc& a) {
489
+ return vcvtq_f32_u32(vmovl_u16(vget_low_u16(vmovl_u8(vreinterpret_u8_u32(vdup_n_u32(a))))));
490
+ }
491
+ template <>
492
+ EIGEN_STRONG_INLINE Packet2f pcast<Packet8uc, Packet2f>(const Packet8uc& a) {
493
+ // Discard all but first 2 bytes.
494
+ return vcvt_f32_u32(vget_low_u32(vmovl_u16(vget_low_u16(vmovl_u8(a)))));
495
+ }
496
+
497
+ template <>
498
+ struct type_casting_traits<numext::uint8_t, numext::uint64_t> {
499
+ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 8 };
500
+ };
501
+ template <>
502
+ EIGEN_STRONG_INLINE Packet2ul pcast<Packet16uc, Packet2ul>(const Packet16uc& a) {
503
+ // Discard all but first two bytes.
504
+ return vmovl_u32(vget_low_u32(vmovl_u16(vget_low_u16(vmovl_u8(vget_low_u8(a))))));
505
+ }
506
+
507
+ template <>
508
+ struct type_casting_traits<numext::uint8_t, numext::int64_t> {
509
+ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 8 };
510
+ };
511
+ template <>
512
+ EIGEN_STRONG_INLINE Packet2l pcast<Packet16uc, Packet2l>(const Packet16uc& a) {
513
+ return preinterpret<Packet2l>(pcast<Packet16uc, Packet2ul>(a));
514
+ }
515
+
516
+ template <>
517
+ struct type_casting_traits<numext::uint8_t, numext::uint32_t> {
518
+ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 4 };
519
+ };
520
+ template <>
521
+ EIGEN_STRONG_INLINE Packet4ui pcast<Packet16uc, Packet4ui>(const Packet16uc& a) {
522
+ // Discard all but first 4 bytes.
523
+ return vmovl_u16(vget_low_u16(vmovl_u8(vget_low_u8(a))));
524
+ }
525
+ template <>
526
+ EIGEN_STRONG_INLINE Packet4ui pcast<Packet8uc, Packet4ui>(const Packet8uc& a) {
527
+ return vmovl_u16(vget_low_u16(vmovl_u8(a)));
528
+ }
529
+ template <>
530
+ EIGEN_STRONG_INLINE Packet2ui pcast<Packet8uc, Packet2ui>(const Packet8uc& a) {
531
+ // Discard all but first 2 bytes.
532
+ return vget_low_u32(vmovl_u16(vget_low_u16(vmovl_u8(a))));
533
+ }
534
+ template <>
535
+ EIGEN_STRONG_INLINE Packet4ui pcast<Packet4uc, Packet4ui>(const Packet4uc& a) {
536
+ return pcast<Packet8uc, Packet4ui>(vreinterpret_u8_u32(vdup_n_u32(a)));
537
+ }
538
+
539
+ template <>
540
+ struct type_casting_traits<numext::uint8_t, numext::int32_t> {
541
+ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 4 };
542
+ };
543
+ template <>
544
+ EIGEN_STRONG_INLINE Packet4i pcast<Packet16uc, Packet4i>(const Packet16uc& a) {
545
+ return preinterpret<Packet4i>(pcast<Packet16uc, Packet4ui>(a));
546
+ }
547
+ template <>
548
+ EIGEN_STRONG_INLINE Packet2i pcast<Packet8uc, Packet2i>(const Packet8uc& a) {
549
+ return preinterpret<Packet2i>(pcast<Packet8uc, Packet2ui>(a));
550
+ }
551
+ template <>
552
+ EIGEN_STRONG_INLINE Packet4i pcast<Packet4uc, Packet4i>(const Packet4uc& a) {
553
+ return preinterpret<Packet4i>(pcast<Packet4uc, Packet4ui>(a));
554
+ }
555
+
556
+ template <>
557
+ struct type_casting_traits<numext::uint8_t, numext::uint16_t> {
558
+ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 2 };
559
+ };
560
+ template <>
561
+ EIGEN_STRONG_INLINE Packet8us pcast<Packet16uc, Packet8us>(const Packet16uc& a) {
562
+ // Discard second half of input.
563
+ return vmovl_u8(vget_low_u8(a));
564
+ }
565
+ template <>
566
+ EIGEN_STRONG_INLINE Packet8us pcast<Packet8uc, Packet8us>(const Packet8uc& a) {
567
+ return vmovl_u8(a);
568
+ }
569
+ template <>
570
+ EIGEN_STRONG_INLINE Packet4us pcast<Packet4uc, Packet4us>(const Packet4uc& a) {
571
+ return vget_low_u16(vmovl_u8(vreinterpret_u8_u32(vdup_n_u32(a))));
572
+ }
573
+
574
+ template <>
575
+ struct type_casting_traits<numext::uint8_t, numext::int16_t> {
576
+ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 2 };
577
+ };
578
+ template <>
579
+ EIGEN_STRONG_INLINE Packet8s pcast<Packet16uc, Packet8s>(const Packet16uc& a) {
580
+ return preinterpret<Packet8s>(pcast<Packet16uc, Packet8us>(a));
581
+ }
582
+ template <>
583
+ EIGEN_STRONG_INLINE Packet8s pcast<Packet8uc, Packet8s>(const Packet8uc& a) {
584
+ return preinterpret<Packet8s>(pcast<Packet8uc, Packet8us>(a));
585
+ }
586
+ template <>
587
+ EIGEN_STRONG_INLINE Packet4s pcast<Packet4uc, Packet4s>(const Packet4uc& a) {
588
+ return preinterpret<Packet4s>(pcast<Packet4uc, Packet4us>(a));
589
+ }
590
+
591
+ //==============================================================================
592
+ // pcast, SrcType = int16_t
593
+ //==============================================================================
594
+ template <>
595
+ struct type_casting_traits<numext::int16_t, float> {
596
+ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 2 };
597
+ };
598
+ template <>
599
+ EIGEN_STRONG_INLINE Packet4f pcast<Packet8s, Packet4f>(const Packet8s& a) {
600
+ // Discard second half of input.
601
+ return vcvtq_f32_s32(vmovl_s16(vget_low_s16(a)));
602
+ }
603
+ template <>
604
+ EIGEN_STRONG_INLINE Packet4f pcast<Packet4s, Packet4f>(const Packet4s& a) {
605
+ return vcvtq_f32_s32(vmovl_s16(a));
606
+ }
607
+ template <>
608
+ EIGEN_STRONG_INLINE Packet2f pcast<Packet4s, Packet2f>(const Packet4s& a) {
609
+ // Discard second half of input.
610
+ return vcvt_f32_s32(vget_low_s32(vmovl_s16(a)));
611
+ }
612
+
613
+ template <>
614
+ struct type_casting_traits<numext::int16_t, numext::int64_t> {
615
+ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 4 };
616
+ };
617
+ template <>
618
+ EIGEN_STRONG_INLINE Packet2l pcast<Packet8s, Packet2l>(const Packet8s& a) {
619
+ // Discard all but first two values.
620
+ return vmovl_s32(vget_low_s32(vmovl_s16(vget_low_s16(a))));
621
+ }
622
+
623
+ template <>
624
+ struct type_casting_traits<numext::int16_t, numext::uint64_t> {
625
+ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 4 };
626
+ };
627
+ template <>
628
+ EIGEN_STRONG_INLINE Packet2ul pcast<Packet8s, Packet2ul>(const Packet8s& a) {
629
+ return preinterpret<Packet2ul>(pcast<Packet8s, Packet2l>(a));
630
+ }
631
+
632
+ template <>
633
+ struct type_casting_traits<numext::int16_t, numext::int32_t> {
634
+ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 2 };
635
+ };
636
+ template <>
637
+ EIGEN_STRONG_INLINE Packet4i pcast<Packet8s, Packet4i>(const Packet8s& a) {
638
+ // Discard second half of input.
639
+ return vmovl_s16(vget_low_s16(a));
640
+ }
641
+ template <>
642
+ EIGEN_STRONG_INLINE Packet4i pcast<Packet4s, Packet4i>(const Packet4s& a) {
643
+ return vmovl_s16(a);
644
+ }
645
+ template <>
646
+ EIGEN_STRONG_INLINE Packet2i pcast<Packet4s, Packet2i>(const Packet4s& a) {
647
+ // Discard second half of input.
648
+ return vget_low_s32(vmovl_s16(a));
649
+ }
650
+
651
+ template <>
652
+ struct type_casting_traits<numext::int16_t, numext::uint32_t> {
653
+ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 2 };
654
+ };
655
+ template <>
656
+ EIGEN_STRONG_INLINE Packet4ui pcast<Packet8s, Packet4ui>(const Packet8s& a) {
657
+ return preinterpret<Packet4ui>(pcast<Packet8s, Packet4i>(a));
658
+ }
659
+ template <>
660
+ EIGEN_STRONG_INLINE Packet4ui pcast<Packet4s, Packet4ui>(const Packet4s& a) {
661
+ return preinterpret<Packet4ui>(pcast<Packet4s, Packet4i>(a));
662
+ }
663
+ template <>
664
+ EIGEN_STRONG_INLINE Packet2ui pcast<Packet4s, Packet2ui>(const Packet4s& a) {
665
+ return preinterpret<Packet2ui>(pcast<Packet4s, Packet2i>(a));
666
+ }
667
+
668
+ template <>
669
+ struct type_casting_traits<numext::int16_t, numext::int8_t> {
670
+ enum { VectorizedCast = 1, SrcCoeffRatio = 2, TgtCoeffRatio = 1 };
671
+ };
672
+ template <>
673
+ EIGEN_STRONG_INLINE Packet16c pcast<Packet8s, Packet16c>(const Packet8s& a, const Packet8s& b) {
674
+ return vcombine_s8(vmovn_s16(a), vmovn_s16(b));
675
+ }
676
+ template <>
677
+ EIGEN_STRONG_INLINE Packet8c pcast<Packet8s, Packet8c>(const Packet8s& a) {
678
+ return vmovn_s16(a);
679
+ }
680
+ template <>
681
+ EIGEN_STRONG_INLINE Packet8c pcast<Packet4s, Packet8c>(const Packet4s& a, const Packet4s& b) {
682
+ return vmovn_s16(vcombine_s16(a, b));
683
+ }
684
+ template <>
685
+ EIGEN_STRONG_INLINE Packet4c pcast<Packet4s, Packet4c>(const Packet4s& a) {
686
+ const int8x8_t aa_s8x8 = pcast<Packet4s, Packet8c>(a, a);
687
+ return vget_lane_s32(vreinterpret_s32_s8(aa_s8x8), 0);
688
+ }
689
+
690
+ template <>
691
+ struct type_casting_traits<numext::int16_t, numext::uint8_t> {
692
+ enum { VectorizedCast = 1, SrcCoeffRatio = 2, TgtCoeffRatio = 1 };
693
+ };
694
+ template <>
695
+ EIGEN_STRONG_INLINE Packet16uc pcast<Packet8s, Packet16uc>(const Packet8s& a, const Packet8s& b) {
696
+ return preinterpret<Packet16uc>(pcast<Packet8s, Packet16c>(a, b));
697
+ }
698
+ template <>
699
+ EIGEN_STRONG_INLINE Packet8uc pcast<Packet8s, Packet8uc>(const Packet8s& a) {
700
+ return preinterpret<Packet8uc>(pcast<Packet8s, Packet8c>(a));
701
+ }
702
+ template <>
703
+ EIGEN_STRONG_INLINE Packet8uc pcast<Packet4s, Packet8uc>(const Packet4s& a, const Packet4s& b) {
704
+ return preinterpret<Packet8uc>(pcast<Packet4s, Packet8c>(a, b));
705
+ }
706
+ template <>
707
+ EIGEN_STRONG_INLINE Packet4uc pcast<Packet4s, Packet4uc>(const Packet4s& a) {
708
+ return static_cast<Packet4uc>(pcast<Packet4s, Packet4c>(a));
709
+ }
710
+
711
+ //==============================================================================
712
+ // pcast, SrcType = uint16_t
713
+ //==============================================================================
714
+ template <>
715
+ struct type_casting_traits<numext::uint16_t, float> {
716
+ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 2 };
717
+ };
718
+ template <>
719
+ EIGEN_STRONG_INLINE Packet4f pcast<Packet8us, Packet4f>(const Packet8us& a) {
720
+ // Discard second half of input.
721
+ return vcvtq_f32_u32(vmovl_u16(vget_low_u16(a)));
722
+ }
723
+ template <>
724
+ EIGEN_STRONG_INLINE Packet4f pcast<Packet4us, Packet4f>(const Packet4us& a) {
725
+ return vcvtq_f32_u32(vmovl_u16(a));
726
+ }
727
+ template <>
728
+ EIGEN_STRONG_INLINE Packet2f pcast<Packet4us, Packet2f>(const Packet4us& a) {
729
+ // Discard second half of input.
730
+ return vcvt_f32_u32(vget_low_u32(vmovl_u16(a)));
731
+ }
732
+
733
+ template <>
734
+ struct type_casting_traits<numext::uint16_t, numext::uint64_t> {
735
+ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 4 };
736
+ };
737
+ template <>
738
+ EIGEN_STRONG_INLINE Packet2ul pcast<Packet8us, Packet2ul>(const Packet8us& a) {
739
+ // Discard all but first two values.
740
+ return vmovl_u32(vget_low_u32(vmovl_u16(vget_low_u16(a))));
741
+ }
742
+
743
+ template <>
744
+ struct type_casting_traits<numext::uint16_t, numext::int64_t> {
745
+ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 4 };
746
+ };
747
+ template <>
748
+ EIGEN_STRONG_INLINE Packet2l pcast<Packet8us, Packet2l>(const Packet8us& a) {
749
+ return preinterpret<Packet2l>(pcast<Packet8us, Packet2ul>(a));
750
+ }
751
+
752
+ template <>
753
+ struct type_casting_traits<numext::uint16_t, numext::uint32_t> {
754
+ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 2 };
755
+ };
756
+ template <>
757
+ EIGEN_STRONG_INLINE Packet4ui pcast<Packet8us, Packet4ui>(const Packet8us& a) {
758
+ // Discard second half of input.
759
+ return vmovl_u16(vget_low_u16(a));
760
+ }
761
+ template <>
762
+ EIGEN_STRONG_INLINE Packet4ui pcast<Packet4us, Packet4ui>(const Packet4us& a) {
763
+ return vmovl_u16(a);
764
+ }
765
+ template <>
766
+ EIGEN_STRONG_INLINE Packet2ui pcast<Packet4us, Packet2ui>(const Packet4us& a) {
767
+ // Discard second half of input.
768
+ return vget_low_u32(vmovl_u16(a));
769
+ }
770
+
771
+ template <>
772
+ struct type_casting_traits<numext::uint16_t, numext::int32_t> {
773
+ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 2 };
774
+ };
775
+ template <>
776
+ EIGEN_STRONG_INLINE Packet4i pcast<Packet8us, Packet4i>(const Packet8us& a) {
777
+ return preinterpret<Packet4i>(pcast<Packet8us, Packet4ui>(a));
778
+ }
779
+ template <>
780
+ EIGEN_STRONG_INLINE Packet4i pcast<Packet4us, Packet4i>(const Packet4us& a) {
781
+ return preinterpret<Packet4i>(pcast<Packet4us, Packet4ui>(a));
782
+ }
783
+ template <>
784
+ EIGEN_STRONG_INLINE Packet2i pcast<Packet4us, Packet2i>(const Packet4us& a) {
785
+ return preinterpret<Packet2i>(pcast<Packet4us, Packet2ui>(a));
786
+ }
787
+
788
+ template <>
789
+ struct type_casting_traits<numext::uint16_t, numext::uint8_t> {
790
+ enum { VectorizedCast = 1, SrcCoeffRatio = 2, TgtCoeffRatio = 1 };
791
+ };
792
+ template <>
793
+ EIGEN_STRONG_INLINE Packet16uc pcast<Packet8us, Packet16uc>(const Packet8us& a, const Packet8us& b) {
794
+ return vcombine_u8(vmovn_u16(a), vmovn_u16(b));
795
+ }
796
+ template <>
797
+ EIGEN_STRONG_INLINE Packet8uc pcast<Packet8us, Packet8uc>(const Packet8us& a) {
798
+ return vmovn_u16(a);
799
+ }
800
+ template <>
801
+ EIGEN_STRONG_INLINE Packet8uc pcast<Packet4us, Packet8uc>(const Packet4us& a, const Packet4us& b) {
802
+ return vmovn_u16(vcombine_u16(a, b));
803
+ }
804
+ template <>
805
+ EIGEN_STRONG_INLINE Packet4uc pcast<Packet4us, Packet4uc>(const Packet4us& a) {
806
+ uint8x8_t aa_u8x8 = pcast<Packet4us, Packet8uc>(a, a);
807
+ return vget_lane_u32(vreinterpret_u32_u8(aa_u8x8), 0);
808
+ }
809
+
810
+ template <>
811
+ struct type_casting_traits<numext::uint16_t, numext::int8_t> {
812
+ enum { VectorizedCast = 1, SrcCoeffRatio = 2, TgtCoeffRatio = 1 };
813
+ };
814
+ template <>
815
+ EIGEN_STRONG_INLINE Packet16c pcast<Packet8us, Packet16c>(const Packet8us& a, const Packet8us& b) {
816
+ return preinterpret<Packet16c>(pcast<Packet8us, Packet16uc>(a, b));
817
+ }
818
+ template <>
819
+ EIGEN_STRONG_INLINE Packet8c pcast<Packet8us, Packet8c>(const Packet8us& a) {
820
+ return preinterpret<Packet8c>(pcast<Packet8us, Packet8uc>(a));
821
+ }
822
+ template <>
823
+ EIGEN_STRONG_INLINE Packet8c pcast<Packet4us, Packet8c>(const Packet4us& a, const Packet4us& b) {
824
+ return preinterpret<Packet8c>(pcast<Packet4us, Packet8uc>(a, b));
825
+ }
826
+ template <>
827
+ EIGEN_STRONG_INLINE Packet4c pcast<Packet4us, Packet4c>(const Packet4us& a) {
828
+ return static_cast<Packet4c>(pcast<Packet4us, Packet4uc>(a));
829
+ }
830
+
831
+ //==============================================================================
832
+ // pcast, SrcType = int32_t
833
+ //==============================================================================
834
+ template <>
835
+ struct type_casting_traits<numext::int32_t, float> {
836
+ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 };
837
+ };
838
+ template <>
839
+ EIGEN_STRONG_INLINE Packet4f pcast<Packet4i, Packet4f>(const Packet4i& a) {
840
+ return vcvtq_f32_s32(a);
841
+ }
842
+ template <>
843
+ EIGEN_STRONG_INLINE Packet2f pcast<Packet2i, Packet2f>(const Packet2i& a) {
844
+ return vcvt_f32_s32(a);
845
+ }
846
+
847
+ template <>
848
+ struct type_casting_traits<numext::int32_t, numext::int64_t> {
849
+ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 2 };
850
+ };
851
+ template <>
852
+ EIGEN_STRONG_INLINE Packet2l pcast<Packet4i, Packet2l>(const Packet4i& a) {
853
+ // Discard second half of input.
854
+ return vmovl_s32(vget_low_s32(a));
855
+ }
856
+ template <>
857
+ EIGEN_STRONG_INLINE Packet2l pcast<Packet2i, Packet2l>(const Packet2i& a) {
858
+ return vmovl_s32(a);
859
+ }
860
+
861
+ template <>
862
+ struct type_casting_traits<numext::int32_t, numext::uint64_t> {
863
+ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 2 };
864
+ };
865
+ template <>
866
+ EIGEN_STRONG_INLINE Packet2ul pcast<Packet4i, Packet2ul>(const Packet4i& a) {
867
+ return preinterpret<Packet2ul>(pcast<Packet4i, Packet2l>(a));
868
+ }
869
+ template <>
870
+ EIGEN_STRONG_INLINE Packet2ul pcast<Packet2i, Packet2ul>(const Packet2i& a) {
871
+ return preinterpret<Packet2ul>(pcast<Packet2i, Packet2l>(a));
872
+ }
873
+
874
+ template <>
875
+ struct type_casting_traits<numext::int32_t, numext::int16_t> {
876
+ enum { VectorizedCast = 1, SrcCoeffRatio = 2, TgtCoeffRatio = 1 };
877
+ };
878
+ template <>
879
+ EIGEN_STRONG_INLINE Packet8s pcast<Packet4i, Packet8s>(const Packet4i& a, const Packet4i& b) {
880
+ return vcombine_s16(vmovn_s32(a), vmovn_s32(b));
881
+ }
882
+ template <>
883
+ EIGEN_STRONG_INLINE Packet4s pcast<Packet4i, Packet4s>(const Packet4i& a) {
884
+ return vmovn_s32(a);
885
+ }
886
+ template <>
887
+ EIGEN_STRONG_INLINE Packet4s pcast<Packet2i, Packet4s>(const Packet2i& a, const Packet2i& b) {
888
+ return vmovn_s32(vcombine_s32(a, b));
889
+ }
890
+
891
+ template <>
892
+ struct type_casting_traits<numext::int32_t, numext::uint16_t> {
893
+ enum { VectorizedCast = 1, SrcCoeffRatio = 2, TgtCoeffRatio = 1 };
894
+ };
895
+ template <>
896
+ EIGEN_STRONG_INLINE Packet8us pcast<Packet4i, Packet8us>(const Packet4i& a, const Packet4i& b) {
897
+ return vcombine_u16(vmovn_u32(vreinterpretq_u32_s32(a)), vmovn_u32(vreinterpretq_u32_s32(b)));
898
+ }
899
+ template <>
900
+ EIGEN_STRONG_INLINE Packet4us pcast<Packet4i, Packet4us>(const Packet4i& a) {
901
+ return vmovn_u32(vreinterpretq_u32_s32(a));
902
+ }
903
+ template <>
904
+ EIGEN_STRONG_INLINE Packet4us pcast<Packet2i, Packet4us>(const Packet2i& a, const Packet2i& b) {
905
+ return vmovn_u32(vreinterpretq_u32_s32(vcombine_s32(a, b)));
906
+ }
907
+
908
+ template <>
909
+ struct type_casting_traits<numext::int32_t, numext::int8_t> {
910
+ enum { VectorizedCast = 1, SrcCoeffRatio = 4, TgtCoeffRatio = 1 };
911
+ };
912
+ template <>
913
+ EIGEN_STRONG_INLINE Packet16c pcast<Packet4i, Packet16c>(const Packet4i& a, const Packet4i& b, const Packet4i& c,
914
+ const Packet4i& d) {
915
+ const int16x8_t ab_s16 = pcast<Packet4i, Packet8s>(a, b);
916
+ const int16x8_t cd_s16 = pcast<Packet4i, Packet8s>(c, d);
917
+ return vcombine_s8(vmovn_s16(ab_s16), vmovn_s16(cd_s16));
918
+ }
919
+ template <>
920
+ EIGEN_STRONG_INLINE Packet8c pcast<Packet4i, Packet8c>(const Packet4i& a, const Packet4i& b) {
921
+ const int16x8_t ab_s16 = pcast<Packet4i, Packet8s>(a, b);
922
+ return vmovn_s16(ab_s16);
923
+ }
924
+ template <>
925
+ EIGEN_STRONG_INLINE Packet8c pcast<Packet2i, Packet8c>(const Packet2i& a, const Packet2i& b, const Packet2i& c,
926
+ const Packet2i& d) {
927
+ const int16x4_t ab_s16 = vmovn_s32(vcombine_s32(a, b));
928
+ const int16x4_t cd_s16 = vmovn_s32(vcombine_s32(c, d));
929
+ return vmovn_s16(vcombine_s16(ab_s16, cd_s16));
930
+ }
931
+ template <>
932
+ EIGEN_STRONG_INLINE Packet4c pcast<Packet4i, Packet4c>(const Packet4i& a) {
933
+ const int16x4_t a_s16x4 = vmovn_s32(a);
934
+ const int16x8_t aa_s16x8 = vcombine_s16(a_s16x4, a_s16x4);
935
+ const int8x8_t aa_s8x8 = vmovn_s16(aa_s16x8);
936
+ return vget_lane_s32(vreinterpret_s32_s8(aa_s8x8), 0);
937
+ }
938
+
939
+ template <>
940
+ struct type_casting_traits<numext::int32_t, numext::uint8_t> {
941
+ enum { VectorizedCast = 1, SrcCoeffRatio = 4, TgtCoeffRatio = 1 };
942
+ };
943
+ template <>
944
+ EIGEN_STRONG_INLINE Packet16uc pcast<Packet4i, Packet16uc>(const Packet4i& a, const Packet4i& b, const Packet4i& c,
945
+ const Packet4i& d) {
946
+ return preinterpret<Packet16uc>(pcast<Packet4i, Packet16c>(a, b, c, d));
947
+ }
948
+ template <>
949
+ EIGEN_STRONG_INLINE Packet8uc pcast<Packet4i, Packet8uc>(const Packet4i& a, const Packet4i& b) {
950
+ return preinterpret<Packet8uc>(pcast<Packet4i, Packet8c>(a, b));
951
+ }
952
+ template <>
953
+ EIGEN_STRONG_INLINE Packet8uc pcast<Packet2i, Packet8uc>(const Packet2i& a, const Packet2i& b, const Packet2i& c,
954
+ const Packet2i& d) {
955
+ return preinterpret<Packet8uc>(pcast<Packet2i, Packet8c>(a, b, c, d));
956
+ }
957
+ template <>
958
+ EIGEN_STRONG_INLINE Packet4uc pcast<Packet4i, Packet4uc>(const Packet4i& a) {
959
+ return static_cast<Packet4uc>(pcast<Packet4i, Packet4c>(a));
960
+ }
961
+
962
+ //==============================================================================
963
+ // pcast, SrcType = uint32_t
964
+ //==============================================================================
965
+ template <>
966
+ struct type_casting_traits<numext::uint32_t, float> {
967
+ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 };
968
+ };
969
+ template <>
970
+ EIGEN_STRONG_INLINE Packet4f pcast<Packet4ui, Packet4f>(const Packet4ui& a) {
971
+ return vcvtq_f32_u32(a);
972
+ }
973
+ template <>
974
+ EIGEN_STRONG_INLINE Packet2f pcast<Packet2ui, Packet2f>(const Packet2ui& a) {
975
+ return vcvt_f32_u32(a);
976
+ }
977
+
978
+ template <>
979
+ struct type_casting_traits<numext::uint32_t, numext::uint64_t> {
980
+ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 2 };
981
+ };
982
+ template <>
983
+ EIGEN_STRONG_INLINE Packet2ul pcast<Packet4ui, Packet2ul>(const Packet4ui& a) {
984
+ // Discard second half of input.
985
+ return vmovl_u32(vget_low_u32(a));
986
+ }
987
+ template <>
988
+ EIGEN_STRONG_INLINE Packet2ul pcast<Packet2ui, Packet2ul>(const Packet2ui& a) {
989
+ return vmovl_u32(a);
990
+ }
991
+
992
+ template <>
993
+ struct type_casting_traits<numext::uint32_t, numext::int64_t> {
994
+ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 2 };
995
+ };
996
+ template <>
997
+ EIGEN_STRONG_INLINE Packet2l pcast<Packet4ui, Packet2l>(const Packet4ui& a) {
998
+ return preinterpret<Packet2l>(pcast<Packet4ui, Packet2ul>(a));
999
+ }
1000
+ template <>
1001
+ EIGEN_STRONG_INLINE Packet2l pcast<Packet2ui, Packet2l>(const Packet2ui& a) {
1002
+ return preinterpret<Packet2l>(pcast<Packet2ui, Packet2ul>(a));
1003
+ }
1004
+
1005
+ template <>
1006
+ struct type_casting_traits<numext::uint32_t, numext::uint16_t> {
1007
+ enum { VectorizedCast = 1, SrcCoeffRatio = 2, TgtCoeffRatio = 1 };
1008
+ };
1009
+ template <>
1010
+ EIGEN_STRONG_INLINE Packet8us pcast<Packet4ui, Packet8us>(const Packet4ui& a, const Packet4ui& b) {
1011
+ return vcombine_u16(vmovn_u32(a), vmovn_u32(b));
1012
+ }
1013
+ template <>
1014
+ EIGEN_STRONG_INLINE Packet4us pcast<Packet2ui, Packet4us>(const Packet2ui& a, const Packet2ui& b) {
1015
+ return vmovn_u32(vcombine_u32(a, b));
1016
+ }
1017
+ template <>
1018
+ EIGEN_STRONG_INLINE Packet4us pcast<Packet4ui, Packet4us>(const Packet4ui& a) {
1019
+ return vmovn_u32(a);
1020
+ }
1021
+
1022
+ template <>
1023
+ struct type_casting_traits<numext::uint32_t, numext::int16_t> {
1024
+ enum { VectorizedCast = 1, SrcCoeffRatio = 2, TgtCoeffRatio = 1 };
1025
+ };
1026
+ template <>
1027
+ EIGEN_STRONG_INLINE Packet8s pcast<Packet4ui, Packet8s>(const Packet4ui& a, const Packet4ui& b) {
1028
+ return preinterpret<Packet8s>(pcast<Packet4ui, Packet8us>(a, b));
1029
+ }
1030
+ template <>
1031
+ EIGEN_STRONG_INLINE Packet4s pcast<Packet2ui, Packet4s>(const Packet2ui& a, const Packet2ui& b) {
1032
+ return preinterpret<Packet4s>(pcast<Packet2ui, Packet4us>(a, b));
1033
+ }
1034
+ template <>
1035
+ EIGEN_STRONG_INLINE Packet4s pcast<Packet4ui, Packet4s>(const Packet4ui& a) {
1036
+ return preinterpret<Packet4s>(pcast<Packet4ui, Packet4us>(a));
1037
+ }
1038
+
1039
+ template <>
1040
+ struct type_casting_traits<numext::uint32_t, numext::uint8_t> {
1041
+ enum { VectorizedCast = 1, SrcCoeffRatio = 4, TgtCoeffRatio = 1 };
1042
+ };
1043
+ template <>
1044
+ EIGEN_STRONG_INLINE Packet16uc pcast<Packet4ui, Packet16uc>(const Packet4ui& a, const Packet4ui& b, const Packet4ui& c,
1045
+ const Packet4ui& d) {
1046
+ const uint16x8_t ab_u16 = vcombine_u16(vmovn_u32(a), vmovn_u32(b));
1047
+ const uint16x8_t cd_u16 = vcombine_u16(vmovn_u32(c), vmovn_u32(d));
1048
+ return vcombine_u8(vmovn_u16(ab_u16), vmovn_u16(cd_u16));
1049
+ }
1050
+ template <>
1051
+ EIGEN_STRONG_INLINE Packet8uc pcast<Packet4ui, Packet8uc>(const Packet4ui& a, const Packet4ui& b) {
1052
+ const uint16x8_t ab_u16 = vcombine_u16(vmovn_u32(a), vmovn_u32(b));
1053
+ return vmovn_u16(ab_u16);
1054
+ }
1055
+ template <>
1056
+ EIGEN_STRONG_INLINE Packet8uc pcast<Packet2ui, Packet8uc>(const Packet2ui& a, const Packet2ui& b, const Packet2ui& c,
1057
+ const Packet2ui& d) {
1058
+ const uint16x4_t ab_u16 = vmovn_u32(vcombine_u32(a, b));
1059
+ const uint16x4_t cd_u16 = vmovn_u32(vcombine_u32(c, d));
1060
+ return vmovn_u16(vcombine_u16(ab_u16, cd_u16));
1061
+ }
1062
+ template <>
1063
+ EIGEN_STRONG_INLINE Packet4uc pcast<Packet4ui, Packet4uc>(const Packet4ui& a) {
1064
+ const uint16x4_t a_u16x4 = vmovn_u32(a);
1065
+ const uint16x8_t aa_u16x8 = vcombine_u16(a_u16x4, a_u16x4);
1066
+ const uint8x8_t aa_u8x8 = vmovn_u16(aa_u16x8);
1067
+ return vget_lane_u32(vreinterpret_u32_u8(aa_u8x8), 0);
1068
+ }
1069
+
1070
+ template <>
1071
+ struct type_casting_traits<numext::uint32_t, numext::int8_t> {
1072
+ enum { VectorizedCast = 1, SrcCoeffRatio = 4, TgtCoeffRatio = 1 };
1073
+ };
1074
+ template <>
1075
+ EIGEN_STRONG_INLINE Packet16c pcast<Packet4ui, Packet16c>(const Packet4ui& a, const Packet4ui& b, const Packet4ui& c,
1076
+ const Packet4ui& d) {
1077
+ return preinterpret<Packet16c>(pcast<Packet4ui, Packet16uc>(a, b, c, d));
1078
+ }
1079
+ template <>
1080
+ EIGEN_STRONG_INLINE Packet8c pcast<Packet4ui, Packet8c>(const Packet4ui& a, const Packet4ui& b) {
1081
+ return preinterpret<Packet8c>(pcast<Packet4ui, Packet8uc>(a, b));
1082
+ }
1083
+ template <>
1084
+ EIGEN_STRONG_INLINE Packet8c pcast<Packet2ui, Packet8c>(const Packet2ui& a, const Packet2ui& b, const Packet2ui& c,
1085
+ const Packet2ui& d) {
1086
+ return preinterpret<Packet8c>(pcast<Packet2ui, Packet8uc>(a, b, c, d));
1087
+ }
1088
+ template <>
1089
+ EIGEN_STRONG_INLINE Packet4c pcast<Packet4ui, Packet4c>(const Packet4ui& a) {
1090
+ return static_cast<Packet4c>(pcast<Packet4ui, Packet4uc>(a));
1091
+ }
1092
+
1093
+ //==============================================================================
1094
+ // pcast, SrcType = int64_t
1095
+ //==============================================================================
1096
+ template <>
1097
+ struct type_casting_traits<numext::int64_t, float> {
1098
+ enum { VectorizedCast = 1, SrcCoeffRatio = 2, TgtCoeffRatio = 1 };
1099
+ };
1100
+
1101
+ template <>
1102
+ EIGEN_STRONG_INLINE Packet4f pcast<Packet2l, Packet4f>(const Packet2l& a, const Packet2l& b) {
1103
+ #if EIGEN_ARCH_ARM64
1104
+ return vcombine_f32(vcvt_f32_f64(vcvtq_f64_s64(a)), vcvt_f32_f64(vcvtq_f64_s64(b)));
1105
+ #else
1106
+ EIGEN_ALIGN_MAX int64_t lvals[4];
1107
+ pstore(lvals, a);
1108
+ pstore(lvals + 2, b);
1109
+ EIGEN_ALIGN_MAX float fvals[4] = {static_cast<float>(lvals[0]), static_cast<float>(lvals[1]),
1110
+ static_cast<float>(lvals[2]), static_cast<float>(lvals[3])};
1111
+ return pload<Packet4f>(fvals);
1112
+ #endif
1113
+ }
1114
+
1115
+ template <>
1116
+ EIGEN_STRONG_INLINE Packet2f pcast<Packet2l, Packet2f>(const Packet2l& a) {
1117
+ #if EIGEN_ARCH_ARM64
1118
+ return vcvt_f32_f64(vcvtq_f64_s64(a));
1119
+ #else
1120
+ EIGEN_ALIGN_MAX int64_t lvals[2];
1121
+ pstore(lvals, a);
1122
+ EIGEN_ALIGN_MAX float fvals[2] = {static_cast<float>(lvals[0]), static_cast<float>(lvals[1])};
1123
+ return pload<Packet2f>(fvals);
1124
+ #endif
1125
+ }
1126
+
1127
+ template <>
1128
+ struct type_casting_traits<numext::int64_t, numext::int32_t> {
1129
+ enum { VectorizedCast = 1, SrcCoeffRatio = 2, TgtCoeffRatio = 1 };
1130
+ };
1131
+ template <>
1132
+ EIGEN_STRONG_INLINE Packet4i pcast<Packet2l, Packet4i>(const Packet2l& a, const Packet2l& b) {
1133
+ return vcombine_s32(vmovn_s64(a), vmovn_s64(b));
1134
+ }
1135
+ template <>
1136
+ EIGEN_STRONG_INLINE Packet2i pcast<Packet2l, Packet2i>(const Packet2l& a) {
1137
+ return vmovn_s64(a);
1138
+ }
1139
+
1140
+ template <>
1141
+ struct type_casting_traits<numext::int64_t, numext::uint32_t> {
1142
+ enum { VectorizedCast = 1, SrcCoeffRatio = 2, TgtCoeffRatio = 1 };
1143
+ };
1144
+ template <>
1145
+ EIGEN_STRONG_INLINE Packet4ui pcast<Packet2l, Packet4ui>(const Packet2l& a, const Packet2l& b) {
1146
+ return vcombine_u32(vmovn_u64(vreinterpretq_u64_s64(a)), vmovn_u64(vreinterpretq_u64_s64(b)));
1147
+ }
1148
+ template <>
1149
+ EIGEN_STRONG_INLINE Packet2ui pcast<Packet2l, Packet2ui>(const Packet2l& a) {
1150
+ return vmovn_u64(vreinterpretq_u64_s64(a));
1151
+ }
1152
+
1153
+ template <>
1154
+ struct type_casting_traits<numext::int64_t, numext::int16_t> {
1155
+ enum { VectorizedCast = 1, SrcCoeffRatio = 4, TgtCoeffRatio = 1 };
1156
+ };
1157
+ template <>
1158
+ EIGEN_STRONG_INLINE Packet8s pcast<Packet2l, Packet8s>(const Packet2l& a, const Packet2l& b, const Packet2l& c,
1159
+ const Packet2l& d) {
1160
+ const int32x4_t ab_s32 = pcast<Packet2l, Packet4i>(a, b);
1161
+ const int32x4_t cd_s32 = pcast<Packet2l, Packet4i>(c, d);
1162
+ return vcombine_s16(vmovn_s32(ab_s32), vmovn_s32(cd_s32));
1163
+ }
1164
+ template <>
1165
+ EIGEN_STRONG_INLINE Packet4s pcast<Packet2l, Packet4s>(const Packet2l& a, const Packet2l& b) {
1166
+ const int32x4_t ab_s32 = pcast<Packet2l, Packet4i>(a, b);
1167
+ return vmovn_s32(ab_s32);
1168
+ }
1169
+
1170
+ template <>
1171
+ struct type_casting_traits<numext::int64_t, numext::uint16_t> {
1172
+ enum { VectorizedCast = 1, SrcCoeffRatio = 4, TgtCoeffRatio = 1 };
1173
+ };
1174
+ template <>
1175
+ EIGEN_STRONG_INLINE Packet8us pcast<Packet2l, Packet8us>(const Packet2l& a, const Packet2l& b, const Packet2l& c,
1176
+ const Packet2l& d) {
1177
+ return preinterpret<Packet8us>(pcast<Packet2l, Packet8s>(a, b, c, d));
1178
+ }
1179
+ template <>
1180
+ EIGEN_STRONG_INLINE Packet4us pcast<Packet2l, Packet4us>(const Packet2l& a, const Packet2l& b) {
1181
+ return preinterpret<Packet4us>(pcast<Packet2l, Packet4s>(a, b));
1182
+ }
1183
+
1184
+ template <>
1185
+ struct type_casting_traits<numext::int64_t, numext::int8_t> {
1186
+ enum { VectorizedCast = 1, SrcCoeffRatio = 8, TgtCoeffRatio = 1 };
1187
+ };
1188
+ template <>
1189
+ EIGEN_STRONG_INLINE Packet16c pcast<Packet2l, Packet16c>(const Packet2l& a, const Packet2l& b, const Packet2l& c,
1190
+ const Packet2l& d, const Packet2l& e, const Packet2l& f,
1191
+ const Packet2l& g, const Packet2l& h) {
1192
+ const int16x8_t abcd_s16 = pcast<Packet2l, Packet8s>(a, b, c, d);
1193
+ const int16x8_t efgh_s16 = pcast<Packet2l, Packet8s>(e, f, g, h);
1194
+ return vcombine_s8(vmovn_s16(abcd_s16), vmovn_s16(efgh_s16));
1195
+ }
1196
+ template <>
1197
+ EIGEN_STRONG_INLINE Packet8c pcast<Packet2l, Packet8c>(const Packet2l& a, const Packet2l& b, const Packet2l& c,
1198
+ const Packet2l& d) {
1199
+ const int16x8_t abcd_s16 = pcast<Packet2l, Packet8s>(a, b, c, d);
1200
+ return vmovn_s16(abcd_s16);
1201
+ }
1202
+ template <>
1203
+ EIGEN_STRONG_INLINE Packet4c pcast<Packet2l, Packet4c>(const Packet2l& a, const Packet2l& b) {
1204
+ const int16x4_t ab_s16 = pcast<Packet2l, Packet4s>(a, b);
1205
+ const int16x8_t abab_s16 = vcombine_s16(ab_s16, ab_s16);
1206
+ const int8x8_t abab_s8 = vmovn_s16(abab_s16);
1207
+ return vget_lane_s32(vreinterpret_s32_s8(abab_s8), 0);
1208
+ }
1209
+
1210
+ template <>
1211
+ struct type_casting_traits<numext::int64_t, numext::uint8_t> {
1212
+ enum { VectorizedCast = 1, SrcCoeffRatio = 8, TgtCoeffRatio = 1 };
1213
+ };
1214
+ template <>
1215
+ EIGEN_STRONG_INLINE Packet16uc pcast<Packet2l, Packet16uc>(const Packet2l& a, const Packet2l& b, const Packet2l& c,
1216
+ const Packet2l& d, const Packet2l& e, const Packet2l& f,
1217
+ const Packet2l& g, const Packet2l& h) {
1218
+ const uint16x8_t abcd_u16 = pcast<Packet2l, Packet8us>(a, b, c, d);
1219
+ const uint16x8_t efgh_u16 = pcast<Packet2l, Packet8us>(e, f, g, h);
1220
+ return vcombine_u8(vmovn_u16(abcd_u16), vmovn_u16(efgh_u16));
1221
+ }
1222
+ template <>
1223
+ EIGEN_STRONG_INLINE Packet8uc pcast<Packet2l, Packet8uc>(const Packet2l& a, const Packet2l& b, const Packet2l& c,
1224
+ const Packet2l& d) {
1225
+ return preinterpret<Packet8uc>(pcast<Packet2l, Packet8c>(a, b, c, d));
1226
+ }
1227
+ template <>
1228
+ EIGEN_STRONG_INLINE Packet4uc pcast<Packet2l, Packet4uc>(const Packet2l& a, const Packet2l& b) {
1229
+ return static_cast<Packet4uc>(pcast<Packet2l, Packet4c>(a, b));
1230
+ }
1231
+
1232
+ //==============================================================================
1233
+ // pcast, SrcType = uint64_t
1234
+ //==============================================================================
1235
+ template <>
1236
+ struct type_casting_traits<numext::uint64_t, float> {
1237
+ enum { VectorizedCast = 1, SrcCoeffRatio = 2, TgtCoeffRatio = 1 };
1238
+ };
1239
+ template <>
1240
+ EIGEN_STRONG_INLINE Packet4f pcast<Packet2ul, Packet4f>(const Packet2ul& a, const Packet2ul& b) {
1241
+ #if EIGEN_ARCH_ARM64
1242
+ return vcombine_f32(vcvt_f32_f64(vcvtq_f64_u64(a)), vcvt_f32_f64(vcvtq_f64_u64(b)));
1243
+ #else
1244
+ EIGEN_ALIGN_MAX uint64_t uvals[4];
1245
+ pstore(uvals, a);
1246
+ pstore(uvals + 2, b);
1247
+ EIGEN_ALIGN_MAX float fvals[4] = {static_cast<float>(uvals[0]), static_cast<float>(uvals[1]),
1248
+ static_cast<float>(uvals[2]), static_cast<float>(uvals[3])};
1249
+ return pload<Packet4f>(fvals);
1250
+ #endif
1251
+ }
1252
+ template <>
1253
+ EIGEN_STRONG_INLINE Packet2f pcast<Packet2ul, Packet2f>(const Packet2ul& a) {
1254
+ #if EIGEN_ARCH_ARM64
1255
+ return vcvt_f32_f64(vcvtq_f64_u64(a));
1256
+ #else
1257
+ EIGEN_ALIGN_MAX uint64_t uvals[2];
1258
+ pstore(uvals, a);
1259
+ EIGEN_ALIGN_MAX float fvals[2] = {static_cast<float>(uvals[0]), static_cast<float>(uvals[1])};
1260
+ return pload<Packet2f>(fvals);
1261
+ #endif
1262
+ }
1263
+
1264
+ template <>
1265
+ struct type_casting_traits<numext::uint64_t, numext::uint32_t> {
1266
+ enum { VectorizedCast = 1, SrcCoeffRatio = 2, TgtCoeffRatio = 1 };
1267
+ };
1268
+ template <>
1269
+ EIGEN_STRONG_INLINE Packet4ui pcast<Packet2ul, Packet4ui>(const Packet2ul& a, const Packet2ul& b) {
1270
+ return vcombine_u32(vmovn_u64(a), vmovn_u64(b));
1271
+ }
1272
+ template <>
1273
+ EIGEN_STRONG_INLINE Packet2ui pcast<Packet2ul, Packet2ui>(const Packet2ul& a) {
1274
+ return vmovn_u64(a);
1275
+ }
1276
+
1277
+ template <>
1278
+ struct type_casting_traits<numext::uint64_t, numext::int32_t> {
1279
+ enum { VectorizedCast = 1, SrcCoeffRatio = 2, TgtCoeffRatio = 1 };
1280
+ };
1281
+ template <>
1282
+ EIGEN_STRONG_INLINE Packet4i pcast<Packet2ul, Packet4i>(const Packet2ul& a, const Packet2ul& b) {
1283
+ return preinterpret<Packet4i>(pcast<Packet2ul, Packet4ui>(a, b));
1284
+ }
1285
+ template <>
1286
+ EIGEN_STRONG_INLINE Packet2i pcast<Packet2ul, Packet2i>(const Packet2ul& a) {
1287
+ return preinterpret<Packet2i>(pcast<Packet2ul, Packet2ui>(a));
1288
+ }
1289
+
1290
+ template <>
1291
+ struct type_casting_traits<numext::uint64_t, numext::uint16_t> {
1292
+ enum { VectorizedCast = 1, SrcCoeffRatio = 4, TgtCoeffRatio = 1 };
1293
+ };
1294
+ template <>
1295
+ EIGEN_STRONG_INLINE Packet8us pcast<Packet2ul, Packet8us>(const Packet2ul& a, const Packet2ul& b, const Packet2ul& c,
1296
+ const Packet2ul& d) {
1297
+ const uint16x4_t ab_u16 = vmovn_u32(vcombine_u32(vmovn_u64(a), vmovn_u64(b)));
1298
+ const uint16x4_t cd_u16 = vmovn_u32(vcombine_u32(vmovn_u64(c), vmovn_u64(d)));
1299
+ return vcombine_u16(ab_u16, cd_u16);
1300
+ }
1301
+ template <>
1302
+ EIGEN_STRONG_INLINE Packet4us pcast<Packet2ul, Packet4us>(const Packet2ul& a, const Packet2ul& b) {
1303
+ return vmovn_u32(vcombine_u32(vmovn_u64(a), vmovn_u64(b)));
1304
+ }
1305
+
1306
+ template <>
1307
+ struct type_casting_traits<numext::uint64_t, numext::int16_t> {
1308
+ enum { VectorizedCast = 1, SrcCoeffRatio = 4, TgtCoeffRatio = 1 };
1309
+ };
1310
+ template <>
1311
+ EIGEN_STRONG_INLINE Packet8s pcast<Packet2ul, Packet8s>(const Packet2ul& a, const Packet2ul& b, const Packet2ul& c,
1312
+ const Packet2ul& d) {
1313
+ return preinterpret<Packet8s>(pcast<Packet2ul, Packet8us>(a, b, c, d));
1314
+ }
1315
+ template <>
1316
+ EIGEN_STRONG_INLINE Packet4s pcast<Packet2ul, Packet4s>(const Packet2ul& a, const Packet2ul& b) {
1317
+ return preinterpret<Packet4s>(pcast<Packet2ul, Packet4us>(a, b));
1318
+ }
1319
+
1320
+ template <>
1321
+ struct type_casting_traits<numext::uint64_t, numext::uint8_t> {
1322
+ enum { VectorizedCast = 1, SrcCoeffRatio = 8, TgtCoeffRatio = 1 };
1323
+ };
1324
+ template <>
1325
+ EIGEN_STRONG_INLINE Packet16uc pcast<Packet2ul, Packet16uc>(const Packet2ul& a, const Packet2ul& b, const Packet2ul& c,
1326
+ const Packet2ul& d, const Packet2ul& e, const Packet2ul& f,
1327
+ const Packet2ul& g, const Packet2ul& h) {
1328
+ const uint16x8_t abcd_u16 = pcast<Packet2ul, Packet8us>(a, b, c, d);
1329
+ const uint16x8_t efgh_u16 = pcast<Packet2ul, Packet8us>(e, f, g, h);
1330
+ return vcombine_u8(vmovn_u16(abcd_u16), vmovn_u16(efgh_u16));
1331
+ }
1332
+ template <>
1333
+ EIGEN_STRONG_INLINE Packet8uc pcast<Packet2ul, Packet8uc>(const Packet2ul& a, const Packet2ul& b, const Packet2ul& c,
1334
+ const Packet2ul& d) {
1335
+ const uint16x8_t abcd_u16 = pcast<Packet2ul, Packet8us>(a, b, c, d);
1336
+ return vmovn_u16(abcd_u16);
1337
+ }
1338
+ template <>
1339
+ EIGEN_STRONG_INLINE Packet4uc pcast<Packet2ul, Packet4uc>(const Packet2ul& a, const Packet2ul& b) {
1340
+ const uint16x4_t ab_u16 = pcast<Packet2ul, Packet4us>(a, b);
1341
+ const uint16x8_t abab_u16 = vcombine_u16(ab_u16, ab_u16);
1342
+ const uint8x8_t abab_u8 = vmovn_u16(abab_u16);
1343
+ return vget_lane_u32(vreinterpret_u32_u8(abab_u8), 0);
1344
+ }
1345
+
1346
+ template <>
1347
+ struct type_casting_traits<numext::uint64_t, numext::int8_t> {
1348
+ enum { VectorizedCast = 1, SrcCoeffRatio = 8, TgtCoeffRatio = 1 };
1349
+ };
1350
+ template <>
1351
+ EIGEN_STRONG_INLINE Packet16c pcast<Packet2ul, Packet16c>(const Packet2ul& a, const Packet2ul& b, const Packet2ul& c,
1352
+ const Packet2ul& d, const Packet2ul& e, const Packet2ul& f,
1353
+ const Packet2ul& g, const Packet2ul& h) {
1354
+ return preinterpret<Packet16c>(pcast<Packet2ul, Packet16uc>(a, b, c, d, e, f, g, h));
1355
+ }
1356
+ template <>
1357
+ EIGEN_STRONG_INLINE Packet8c pcast<Packet2ul, Packet8c>(const Packet2ul& a, const Packet2ul& b, const Packet2ul& c,
1358
+ const Packet2ul& d) {
1359
+ return preinterpret<Packet8c>(pcast<Packet2ul, Packet8uc>(a, b, c, d));
1360
+ }
1361
+ template <>
1362
+ EIGEN_STRONG_INLINE Packet4c pcast<Packet2ul, Packet4c>(const Packet2ul& a, const Packet2ul& b) {
1363
+ return static_cast<Packet4c>(pcast<Packet2ul, Packet4uc>(a, b));
1364
+ }
1365
+
1366
+ #if EIGEN_ARCH_ARM64
1367
+
1368
+ //==============================================================================
1369
+ // pcast/preinterpret, Double
1370
+ //==============================================================================
1371
+
1372
+ template <>
1373
+ EIGEN_STRONG_INLINE Packet2d preinterpret<Packet2d, Packet2l>(const Packet2l& a) {
1374
+ return Packet2d(vreinterpretq_f64_s64(a));
1375
+ }
1376
+ template <>
1377
+ EIGEN_STRONG_INLINE Packet2d preinterpret<Packet2d, Packet2ul>(const Packet2ul& a) {
1378
+ return Packet2d(vreinterpretq_f64_u64(a));
1379
+ }
1380
+ template <>
1381
+ EIGEN_STRONG_INLINE Packet2l preinterpret<Packet2l, Packet2d>(const Packet2d& a) {
1382
+ return Packet2l(vreinterpretq_s64_f64(a));
1383
+ }
1384
+ template <>
1385
+ EIGEN_STRONG_INLINE Packet2ul preinterpret<Packet2ul, Packet2d>(const Packet2d& a) {
1386
+ return Packet2ul(vreinterpretq_u64_f64(a));
1387
+ }
1388
+ template <>
1389
+ EIGEN_STRONG_INLINE Packet2d preinterpret<Packet2d, Packet4i>(const Packet4i& a) {
1390
+ return Packet2d(vreinterpretq_f64_s32(a));
1391
+ }
1392
+ template <>
1393
+ EIGEN_STRONG_INLINE Packet4i preinterpret<Packet4i, Packet2d>(const Packet2d& a) {
1394
+ return Packet4i(vreinterpretq_s32_f64(a));
1395
+ }
1396
+
1397
+ template <>
1398
+ struct type_casting_traits<double, float> {
1399
+ enum { VectorizedCast = 1, SrcCoeffRatio = 2, TgtCoeffRatio = 1 };
1400
+ };
1401
+ template <>
1402
+ EIGEN_STRONG_INLINE Packet4f pcast<Packet2d, Packet4f>(const Packet2d& a, const Packet2d& b) {
1403
+ return vcombine_f32(vcvt_f32_f64(a), vcvt_f32_f64(b));
1404
+ }
1405
+ template <>
1406
+ EIGEN_STRONG_INLINE Packet2f pcast<Packet2d, Packet2f>(const Packet2d& a) {
1407
+ return vcvt_f32_f64(a);
1408
+ }
1409
+
1410
+ template <>
1411
+ struct type_casting_traits<double, numext::int64_t> {
1412
+ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 };
1413
+ };
1414
+ template <>
1415
+ EIGEN_STRONG_INLINE Packet2l pcast<Packet2d, Packet2l>(const Packet2d& a) {
1416
+ return vcvtq_s64_f64(a);
1417
+ }
1418
+
1419
+ template <>
1420
+ struct type_casting_traits<double, numext::uint64_t> {
1421
+ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 };
1422
+ };
1423
+ template <>
1424
+ EIGEN_STRONG_INLINE Packet2ul pcast<Packet2d, Packet2ul>(const Packet2d& a) {
1425
+ return vcvtq_u64_f64(a);
1426
+ }
1427
+
1428
+ template <>
1429
+ struct type_casting_traits<double, numext::int32_t> {
1430
+ enum { VectorizedCast = 1, SrcCoeffRatio = 2, TgtCoeffRatio = 1 };
1431
+ };
1432
+ template <>
1433
+ EIGEN_STRONG_INLINE Packet4i pcast<Packet2d, Packet4i>(const Packet2d& a, const Packet2d& b) {
1434
+ return vcombine_s32(vmovn_s64(vcvtq_s64_f64(a)), vmovn_s64(vcvtq_s64_f64(b)));
1435
+ }
1436
+ template <>
1437
+ EIGEN_STRONG_INLINE Packet2i pcast<Packet2d, Packet2i>(const Packet2d& a) {
1438
+ return vmovn_s64(vcvtq_s64_f64(a));
1439
+ }
1440
+
1441
+ template <>
1442
+ struct type_casting_traits<double, numext::uint32_t> {
1443
+ enum { VectorizedCast = 1, SrcCoeffRatio = 2, TgtCoeffRatio = 1 };
1444
+ };
1445
+ template <>
1446
+ EIGEN_STRONG_INLINE Packet4ui pcast<Packet2d, Packet4ui>(const Packet2d& a, const Packet2d& b) {
1447
+ return vcombine_u32(vmovn_u64(vcvtq_u64_f64(a)), vmovn_u64(vcvtq_u64_f64(b)));
1448
+ }
1449
+ template <>
1450
+ EIGEN_STRONG_INLINE Packet2ui pcast<Packet2d, Packet2ui>(const Packet2d& a) {
1451
+ return vmovn_u64(vcvtq_u64_f64(a));
1452
+ }
1453
+
1454
+ template <>
1455
+ struct type_casting_traits<double, numext::int16_t> {
1456
+ enum { VectorizedCast = 1, SrcCoeffRatio = 4, TgtCoeffRatio = 1 };
1457
+ };
1458
+ template <>
1459
+ EIGEN_STRONG_INLINE Packet8s pcast<Packet2d, Packet8s>(const Packet2d& a, const Packet2d& b, const Packet2d& c,
1460
+ const Packet2d& d) {
1461
+ const int32x4_t ab_s32 = pcast<Packet2d, Packet4i>(a, b);
1462
+ const int32x4_t cd_s32 = pcast<Packet2d, Packet4i>(c, d);
1463
+ return vcombine_s16(vmovn_s32(ab_s32), vmovn_s32(cd_s32));
1464
+ }
1465
+ template <>
1466
+ EIGEN_STRONG_INLINE Packet4s pcast<Packet2d, Packet4s>(const Packet2d& a, const Packet2d& b) {
1467
+ const int32x4_t ab_s32 = pcast<Packet2d, Packet4i>(a, b);
1468
+ return vmovn_s32(ab_s32);
1469
+ }
1470
+
1471
+ template <>
1472
+ struct type_casting_traits<double, numext::uint16_t> {
1473
+ enum { VectorizedCast = 1, SrcCoeffRatio = 4, TgtCoeffRatio = 1 };
1474
+ };
1475
+ template <>
1476
+ EIGEN_STRONG_INLINE Packet8us pcast<Packet2d, Packet8us>(const Packet2d& a, const Packet2d& b, const Packet2d& c,
1477
+ const Packet2d& d) {
1478
+ return preinterpret<Packet8us>(pcast<Packet2d, Packet8s>(a, b, c, d));
1479
+ }
1480
+ template <>
1481
+ EIGEN_STRONG_INLINE Packet4us pcast<Packet2d, Packet4us>(const Packet2d& a, const Packet2d& b) {
1482
+ return preinterpret<Packet4us>(pcast<Packet2d, Packet4s>(a, b));
1483
+ }
1484
+
1485
+ template <>
1486
+ struct type_casting_traits<double, numext::int8_t> {
1487
+ enum { VectorizedCast = 1, SrcCoeffRatio = 8, TgtCoeffRatio = 1 };
1488
+ };
1489
+ template <>
1490
+ EIGEN_STRONG_INLINE Packet16c pcast<Packet2d, Packet16c>(const Packet2d& a, const Packet2d& b, const Packet2d& c,
1491
+ const Packet2d& d, const Packet2d& e, const Packet2d& f,
1492
+ const Packet2d& g, const Packet2d& h) {
1493
+ const int16x8_t abcd_s16 = pcast<Packet2d, Packet8s>(a, b, c, d);
1494
+ const int16x8_t efgh_s16 = pcast<Packet2d, Packet8s>(e, f, g, h);
1495
+ return vcombine_s8(vmovn_s16(abcd_s16), vmovn_s16(efgh_s16));
1496
+ }
1497
+ template <>
1498
+ EIGEN_STRONG_INLINE Packet8c pcast<Packet2d, Packet8c>(const Packet2d& a, const Packet2d& b, const Packet2d& c,
1499
+ const Packet2d& d) {
1500
+ const int16x8_t abcd_s16 = pcast<Packet2d, Packet8s>(a, b, c, d);
1501
+ return vmovn_s16(abcd_s16);
1502
+ }
1503
+ template <>
1504
+ EIGEN_STRONG_INLINE Packet4c pcast<Packet2d, Packet4c>(const Packet2d& a, const Packet2d& b) {
1505
+ const int32x4_t ab_s32 = pcast<Packet2d, Packet4i>(a, b);
1506
+ return pcast<Packet4i, Packet4c>(ab_s32);
1507
+ }
1508
+
1509
+ template <>
1510
+ struct type_casting_traits<double, numext::uint8_t> {
1511
+ enum { VectorizedCast = 1, SrcCoeffRatio = 8, TgtCoeffRatio = 1 };
1512
+ };
1513
+ template <>
1514
+ EIGEN_STRONG_INLINE Packet16uc pcast<Packet2d, Packet16uc>(const Packet2d& a, const Packet2d& b, const Packet2d& c,
1515
+ const Packet2d& d, const Packet2d& e, const Packet2d& f,
1516
+ const Packet2d& g, const Packet2d& h) {
1517
+ const uint16x8_t abcd_u16 = pcast<Packet2d, Packet8us>(a, b, c, d);
1518
+ const uint16x8_t efgh_u16 = pcast<Packet2d, Packet8us>(e, f, g, h);
1519
+ return vcombine_u8(vmovn_u16(abcd_u16), vmovn_u16(efgh_u16));
1520
+ }
1521
+ template <>
1522
+ EIGEN_STRONG_INLINE Packet8uc pcast<Packet2d, Packet8uc>(const Packet2d& a, const Packet2d& b, const Packet2d& c,
1523
+ const Packet2d& d) {
1524
+ return preinterpret<Packet8uc>(pcast<Packet2d, Packet8c>(a, b, c, d));
1525
+ }
1526
+ template <>
1527
+ EIGEN_STRONG_INLINE Packet4uc pcast<Packet2d, Packet4uc>(const Packet2d& a, const Packet2d& b) {
1528
+ return static_cast<Packet4uc>(pcast<Packet2d, Packet4c>(a, b));
1529
+ }
1530
+
1531
+ template <>
1532
+ struct type_casting_traits<float, double> {
1533
+ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 2 };
1534
+ };
1535
+ template <>
1536
+ EIGEN_STRONG_INLINE Packet2d pcast<Packet4f, Packet2d>(const Packet4f& a) {
1537
+ // Discard second-half of input.
1538
+ return vcvt_f64_f32(vget_low_f32(a));
1539
+ }
1540
+ template <>
1541
+ EIGEN_STRONG_INLINE Packet2d pcast<Packet2f, Packet2d>(const Packet2f& a) {
1542
+ return vcvt_f64_f32(a);
1543
+ }
1544
+
1545
+ template <>
1546
+ struct type_casting_traits<numext::int8_t, double> {
1547
+ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 8 };
1548
+ };
1549
+ template <>
1550
+ EIGEN_STRONG_INLINE Packet2d pcast<Packet16c, Packet2d>(const Packet16c& a) {
1551
+ // Discard all but first two values.
1552
+ // MSVC defines most intrinsics as macros, so we need to do this in two lines for portability.
1553
+ Packet2f tmp = pcast<Packet8c, Packet2f>(vget_low_s8(a));
1554
+ return vcvt_f64_f32(tmp);
1555
+ }
1556
+
1557
+ template <>
1558
+ struct type_casting_traits<numext::uint8_t, double> {
1559
+ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 8 };
1560
+ };
1561
+ template <>
1562
+ EIGEN_STRONG_INLINE Packet2d pcast<Packet16uc, Packet2d>(const Packet16uc& a) {
1563
+ // Discard all but first two values.
1564
+ Packet2f tmp = pcast<Packet8uc, Packet2f>(vget_low_u8(a));
1565
+ return vcvt_f64_f32(tmp);
1566
+ }
1567
+
1568
+ template <>
1569
+ struct type_casting_traits<numext::int16_t, double> {
1570
+ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 4 };
1571
+ };
1572
+ template <>
1573
+ EIGEN_STRONG_INLINE Packet2d pcast<Packet8s, Packet2d>(const Packet8s& a) {
1574
+ // Discard all but first two values.
1575
+ Packet2f tmp = pcast<Packet4s, Packet2f>(vget_low_s16(a));
1576
+ return vcvt_f64_f32(tmp);
1577
+ }
1578
+
1579
+ template <>
1580
+ struct type_casting_traits<numext::uint16_t, double> {
1581
+ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 4 };
1582
+ };
1583
+ template <>
1584
+ EIGEN_STRONG_INLINE Packet2d pcast<Packet8us, Packet2d>(const Packet8us& a) {
1585
+ // Discard all but first two values.
1586
+ Packet2f tmp = pcast<Packet4us, Packet2f>(vget_low_u16(a));
1587
+ return vcvt_f64_f32(tmp);
1588
+ }
1589
+
1590
+ template <>
1591
+ struct type_casting_traits<numext::int32_t, double> {
1592
+ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 2 };
1593
+ };
1594
+ template <>
1595
+ EIGEN_STRONG_INLINE Packet2d pcast<Packet4i, Packet2d>(const Packet4i& a) {
1596
+ // Discard second half of input.
1597
+ return vcvtq_f64_s64(vmovl_s32(vget_low_s32(a)));
1598
+ }
1599
+ template <>
1600
+ EIGEN_STRONG_INLINE Packet2d pcast<Packet2i, Packet2d>(const Packet2i& a) {
1601
+ return vcvtq_f64_s64(vmovl_s32(a));
1602
+ }
1603
+
1604
+ template <>
1605
+ struct type_casting_traits<numext::uint32_t, double> {
1606
+ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 2 };
1607
+ };
1608
+ template <>
1609
+ EIGEN_STRONG_INLINE Packet2d pcast<Packet4ui, Packet2d>(const Packet4ui& a) {
1610
+ // Discard second half of input.
1611
+ return vcvtq_f64_u64(vmovl_u32(vget_low_u32(a)));
1612
+ }
1613
+ template <>
1614
+ EIGEN_STRONG_INLINE Packet2d pcast<Packet2ui, Packet2d>(const Packet2ui& a) {
1615
+ return vcvtq_f64_u64(vmovl_u32(a));
1616
+ }
1617
+
1618
+ template <>
1619
+ struct type_casting_traits<numext::int64_t, double> {
1620
+ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 };
1621
+ };
1622
+ template <>
1623
+ EIGEN_STRONG_INLINE Packet2d pcast<Packet2l, Packet2d>(const Packet2l& a) {
1624
+ return vcvtq_f64_s64(a);
1625
+ }
1626
+
1627
+ template <>
1628
+ struct type_casting_traits<numext::uint64_t, double> {
1629
+ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 };
1630
+ };
1631
+ template <>
1632
+ EIGEN_STRONG_INLINE Packet2d pcast<Packet2ul, Packet2d>(const Packet2ul& a) {
1633
+ return vcvtq_f64_u64(a);
1634
+ }
1635
+
1636
+ #endif // EIGEN_ARCH_ARM64
1637
+
1638
+ } // end namespace internal
1639
+
1640
+ } // end namespace Eigen
1641
+
1642
+ #endif // EIGEN_TYPE_CASTING_NEON_H