@smake/eigen 1.1.0 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (431) hide show
  1. package/README.md +1 -1
  2. package/eigen/Eigen/AccelerateSupport +52 -0
  3. package/eigen/Eigen/Cholesky +18 -20
  4. package/eigen/Eigen/CholmodSupport +28 -28
  5. package/eigen/Eigen/Core +187 -120
  6. package/eigen/Eigen/Eigenvalues +16 -13
  7. package/eigen/Eigen/Geometry +18 -18
  8. package/eigen/Eigen/Householder +9 -7
  9. package/eigen/Eigen/IterativeLinearSolvers +8 -4
  10. package/eigen/Eigen/Jacobi +14 -13
  11. package/eigen/Eigen/KLUSupport +23 -21
  12. package/eigen/Eigen/LU +15 -16
  13. package/eigen/Eigen/MetisSupport +12 -12
  14. package/eigen/Eigen/OrderingMethods +54 -51
  15. package/eigen/Eigen/PaStiXSupport +23 -21
  16. package/eigen/Eigen/PardisoSupport +17 -14
  17. package/eigen/Eigen/QR +18 -20
  18. package/eigen/Eigen/QtAlignedMalloc +5 -12
  19. package/eigen/Eigen/SPQRSupport +21 -14
  20. package/eigen/Eigen/SVD +23 -17
  21. package/eigen/Eigen/Sparse +1 -2
  22. package/eigen/Eigen/SparseCholesky +18 -15
  23. package/eigen/Eigen/SparseCore +18 -17
  24. package/eigen/Eigen/SparseLU +9 -9
  25. package/eigen/Eigen/SparseQR +16 -14
  26. package/eigen/Eigen/StdDeque +5 -2
  27. package/eigen/Eigen/StdList +5 -2
  28. package/eigen/Eigen/StdVector +5 -2
  29. package/eigen/Eigen/SuperLUSupport +30 -24
  30. package/eigen/Eigen/ThreadPool +80 -0
  31. package/eigen/Eigen/UmfPackSupport +19 -17
  32. package/eigen/Eigen/Version +14 -0
  33. package/eigen/Eigen/src/AccelerateSupport/AccelerateSupport.h +423 -0
  34. package/eigen/Eigen/src/AccelerateSupport/InternalHeaderCheck.h +3 -0
  35. package/eigen/Eigen/src/Cholesky/InternalHeaderCheck.h +3 -0
  36. package/eigen/Eigen/src/Cholesky/LDLT.h +366 -405
  37. package/eigen/Eigen/src/Cholesky/LLT.h +323 -367
  38. package/eigen/Eigen/src/Cholesky/LLT_LAPACKE.h +81 -56
  39. package/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +585 -529
  40. package/eigen/Eigen/src/CholmodSupport/InternalHeaderCheck.h +3 -0
  41. package/eigen/Eigen/src/Core/ArithmeticSequence.h +143 -317
  42. package/eigen/Eigen/src/Core/Array.h +329 -370
  43. package/eigen/Eigen/src/Core/ArrayBase.h +190 -203
  44. package/eigen/Eigen/src/Core/ArrayWrapper.h +126 -170
  45. package/eigen/Eigen/src/Core/Assign.h +30 -40
  46. package/eigen/Eigen/src/Core/AssignEvaluator.h +651 -604
  47. package/eigen/Eigen/src/Core/Assign_MKL.h +125 -120
  48. package/eigen/Eigen/src/Core/BandMatrix.h +267 -282
  49. package/eigen/Eigen/src/Core/Block.h +371 -390
  50. package/eigen/Eigen/src/Core/CommaInitializer.h +85 -100
  51. package/eigen/Eigen/src/Core/ConditionEstimator.h +51 -53
  52. package/eigen/Eigen/src/Core/CoreEvaluators.h +1214 -937
  53. package/eigen/Eigen/src/Core/CoreIterators.h +72 -63
  54. package/eigen/Eigen/src/Core/CwiseBinaryOp.h +112 -129
  55. package/eigen/Eigen/src/Core/CwiseNullaryOp.h +676 -702
  56. package/eigen/Eigen/src/Core/CwiseTernaryOp.h +77 -103
  57. package/eigen/Eigen/src/Core/CwiseUnaryOp.h +55 -67
  58. package/eigen/Eigen/src/Core/CwiseUnaryView.h +127 -92
  59. package/eigen/Eigen/src/Core/DenseBase.h +630 -658
  60. package/eigen/Eigen/src/Core/DenseCoeffsBase.h +511 -628
  61. package/eigen/Eigen/src/Core/DenseStorage.h +511 -590
  62. package/eigen/Eigen/src/Core/DeviceWrapper.h +153 -0
  63. package/eigen/Eigen/src/Core/Diagonal.h +168 -207
  64. package/eigen/Eigen/src/Core/DiagonalMatrix.h +346 -317
  65. package/eigen/Eigen/src/Core/DiagonalProduct.h +12 -10
  66. package/eigen/Eigen/src/Core/Dot.h +167 -217
  67. package/eigen/Eigen/src/Core/EigenBase.h +74 -85
  68. package/eigen/Eigen/src/Core/Fill.h +138 -0
  69. package/eigen/Eigen/src/Core/FindCoeff.h +464 -0
  70. package/eigen/Eigen/src/Core/ForceAlignedAccess.h +90 -113
  71. package/eigen/Eigen/src/Core/Fuzzy.h +82 -105
  72. package/eigen/Eigen/src/Core/GeneralProduct.h +315 -261
  73. package/eigen/Eigen/src/Core/GenericPacketMath.h +1182 -520
  74. package/eigen/Eigen/src/Core/GlobalFunctions.h +193 -157
  75. package/eigen/Eigen/src/Core/IO.h +131 -156
  76. package/eigen/Eigen/src/Core/IndexedView.h +209 -125
  77. package/eigen/Eigen/src/Core/InnerProduct.h +260 -0
  78. package/eigen/Eigen/src/Core/InternalHeaderCheck.h +3 -0
  79. package/eigen/Eigen/src/Core/Inverse.h +50 -59
  80. package/eigen/Eigen/src/Core/Map.h +123 -141
  81. package/eigen/Eigen/src/Core/MapBase.h +255 -282
  82. package/eigen/Eigen/src/Core/MathFunctions.h +1247 -1201
  83. package/eigen/Eigen/src/Core/MathFunctionsImpl.h +162 -99
  84. package/eigen/Eigen/src/Core/Matrix.h +463 -494
  85. package/eigen/Eigen/src/Core/MatrixBase.h +468 -470
  86. package/eigen/Eigen/src/Core/NestByValue.h +58 -52
  87. package/eigen/Eigen/src/Core/NoAlias.h +79 -86
  88. package/eigen/Eigen/src/Core/NumTraits.h +206 -206
  89. package/eigen/Eigen/src/Core/PartialReduxEvaluator.h +163 -142
  90. package/eigen/Eigen/src/Core/PermutationMatrix.h +461 -511
  91. package/eigen/Eigen/src/Core/PlainObjectBase.h +858 -972
  92. package/eigen/Eigen/src/Core/Product.h +246 -130
  93. package/eigen/Eigen/src/Core/ProductEvaluators.h +779 -671
  94. package/eigen/Eigen/src/Core/Random.h +153 -164
  95. package/eigen/Eigen/src/Core/RandomImpl.h +262 -0
  96. package/eigen/Eigen/src/Core/RealView.h +250 -0
  97. package/eigen/Eigen/src/Core/Redux.h +334 -314
  98. package/eigen/Eigen/src/Core/Ref.h +259 -257
  99. package/eigen/Eigen/src/Core/Replicate.h +92 -104
  100. package/eigen/Eigen/src/Core/Reshaped.h +215 -271
  101. package/eigen/Eigen/src/Core/ReturnByValue.h +47 -55
  102. package/eigen/Eigen/src/Core/Reverse.h +133 -148
  103. package/eigen/Eigen/src/Core/Select.h +68 -140
  104. package/eigen/Eigen/src/Core/SelfAdjointView.h +254 -290
  105. package/eigen/Eigen/src/Core/SelfCwiseBinaryOp.h +23 -20
  106. package/eigen/Eigen/src/Core/SkewSymmetricMatrix3.h +382 -0
  107. package/eigen/Eigen/src/Core/Solve.h +88 -102
  108. package/eigen/Eigen/src/Core/SolveTriangular.h +126 -124
  109. package/eigen/Eigen/src/Core/SolverBase.h +132 -133
  110. package/eigen/Eigen/src/Core/StableNorm.h +113 -147
  111. package/eigen/Eigen/src/Core/StlIterators.h +404 -248
  112. package/eigen/Eigen/src/Core/Stride.h +90 -92
  113. package/eigen/Eigen/src/Core/Swap.h +70 -39
  114. package/eigen/Eigen/src/Core/Transpose.h +258 -295
  115. package/eigen/Eigen/src/Core/Transpositions.h +270 -333
  116. package/eigen/Eigen/src/Core/TriangularMatrix.h +642 -743
  117. package/eigen/Eigen/src/Core/VectorBlock.h +59 -72
  118. package/eigen/Eigen/src/Core/VectorwiseOp.h +653 -704
  119. package/eigen/Eigen/src/Core/Visitor.h +464 -308
  120. package/eigen/Eigen/src/Core/arch/AVX/Complex.h +380 -187
  121. package/eigen/Eigen/src/Core/arch/AVX/MathFunctions.h +65 -163
  122. package/eigen/Eigen/src/Core/arch/AVX/PacketMath.h +2145 -638
  123. package/eigen/Eigen/src/Core/arch/AVX/Reductions.h +353 -0
  124. package/eigen/Eigen/src/Core/arch/AVX/TypeCasting.h +253 -60
  125. package/eigen/Eigen/src/Core/arch/AVX512/Complex.h +278 -228
  126. package/eigen/Eigen/src/Core/arch/AVX512/GemmKernel.h +1245 -0
  127. package/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h +48 -269
  128. package/eigen/Eigen/src/Core/arch/AVX512/MathFunctionsFP16.h +75 -0
  129. package/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h +1597 -754
  130. package/eigen/Eigen/src/Core/arch/AVX512/PacketMathFP16.h +1413 -0
  131. package/eigen/Eigen/src/Core/arch/AVX512/Reductions.h +297 -0
  132. package/eigen/Eigen/src/Core/arch/AVX512/TrsmKernel.h +1167 -0
  133. package/eigen/Eigen/src/Core/arch/AVX512/TrsmUnrolls.inc +1219 -0
  134. package/eigen/Eigen/src/Core/arch/AVX512/TypeCasting.h +229 -41
  135. package/eigen/Eigen/src/Core/arch/AVX512/TypeCastingFP16.h +130 -0
  136. package/eigen/Eigen/src/Core/arch/AltiVec/Complex.h +420 -184
  137. package/eigen/Eigen/src/Core/arch/AltiVec/MathFunctions.h +40 -49
  138. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProduct.h +2962 -2213
  139. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h +196 -212
  140. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h +713 -441
  141. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMAbfloat16.h +742 -0
  142. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixVectorProduct.inc +2818 -0
  143. package/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h +2380 -1362
  144. package/eigen/Eigen/src/Core/arch/AltiVec/TypeCasting.h +153 -0
  145. package/eigen/Eigen/src/Core/arch/Default/BFloat16.h +390 -224
  146. package/eigen/Eigen/src/Core/arch/Default/ConjHelper.h +78 -67
  147. package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +1784 -799
  148. package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +167 -50
  149. package/eigen/Eigen/src/Core/arch/Default/Half.h +528 -379
  150. package/eigen/Eigen/src/Core/arch/Default/Settings.h +10 -12
  151. package/eigen/Eigen/src/Core/arch/GPU/Complex.h +244 -0
  152. package/eigen/Eigen/src/Core/arch/GPU/MathFunctions.h +41 -40
  153. package/eigen/Eigen/src/Core/arch/GPU/PacketMath.h +550 -523
  154. package/eigen/Eigen/src/Core/arch/GPU/Tuple.h +268 -0
  155. package/eigen/Eigen/src/Core/arch/GPU/TypeCasting.h +27 -30
  156. package/eigen/Eigen/src/Core/arch/HIP/hcc/math_constants.h +8 -8
  157. package/eigen/Eigen/src/Core/arch/HVX/PacketMath.h +1088 -0
  158. package/eigen/Eigen/src/Core/arch/LSX/Complex.h +520 -0
  159. package/eigen/Eigen/src/Core/arch/LSX/GeneralBlockPanelKernel.h +23 -0
  160. package/eigen/Eigen/src/Core/arch/LSX/MathFunctions.h +43 -0
  161. package/eigen/Eigen/src/Core/arch/LSX/PacketMath.h +2866 -0
  162. package/eigen/Eigen/src/Core/arch/LSX/TypeCasting.h +526 -0
  163. package/eigen/Eigen/src/Core/arch/MSA/Complex.h +54 -82
  164. package/eigen/Eigen/src/Core/arch/MSA/MathFunctions.h +84 -92
  165. package/eigen/Eigen/src/Core/arch/MSA/PacketMath.h +51 -47
  166. package/eigen/Eigen/src/Core/arch/NEON/Complex.h +454 -306
  167. package/eigen/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h +175 -115
  168. package/eigen/Eigen/src/Core/arch/NEON/MathFunctions.h +23 -30
  169. package/eigen/Eigen/src/Core/arch/NEON/PacketMath.h +4366 -2857
  170. package/eigen/Eigen/src/Core/arch/NEON/TypeCasting.h +616 -393
  171. package/eigen/Eigen/src/Core/arch/NEON/UnaryFunctors.h +57 -0
  172. package/eigen/Eigen/src/Core/arch/SSE/Complex.h +350 -198
  173. package/eigen/Eigen/src/Core/arch/SSE/MathFunctions.h +38 -149
  174. package/eigen/Eigen/src/Core/arch/SSE/PacketMath.h +1791 -912
  175. package/eigen/Eigen/src/Core/arch/SSE/Reductions.h +324 -0
  176. package/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h +128 -40
  177. package/eigen/Eigen/src/Core/arch/SVE/MathFunctions.h +10 -6
  178. package/eigen/Eigen/src/Core/arch/SVE/PacketMath.h +156 -234
  179. package/eigen/Eigen/src/Core/arch/SVE/TypeCasting.h +6 -3
  180. package/eigen/Eigen/src/Core/arch/SYCL/InteropHeaders.h +27 -32
  181. package/eigen/Eigen/src/Core/arch/SYCL/MathFunctions.h +119 -117
  182. package/eigen/Eigen/src/Core/arch/SYCL/PacketMath.h +325 -419
  183. package/eigen/Eigen/src/Core/arch/SYCL/TypeCasting.h +15 -17
  184. package/eigen/Eigen/src/Core/arch/ZVector/Complex.h +325 -181
  185. package/eigen/Eigen/src/Core/arch/ZVector/MathFunctions.h +94 -83
  186. package/eigen/Eigen/src/Core/arch/ZVector/PacketMath.h +811 -458
  187. package/eigen/Eigen/src/Core/functors/AssignmentFunctors.h +121 -124
  188. package/eigen/Eigen/src/Core/functors/BinaryFunctors.h +576 -370
  189. package/eigen/Eigen/src/Core/functors/NullaryFunctors.h +194 -109
  190. package/eigen/Eigen/src/Core/functors/StlFunctors.h +95 -112
  191. package/eigen/Eigen/src/Core/functors/TernaryFunctors.h +34 -7
  192. package/eigen/Eigen/src/Core/functors/UnaryFunctors.h +1038 -749
  193. package/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h +1883 -1375
  194. package/eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h +312 -370
  195. package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +189 -176
  196. package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +84 -81
  197. package/eigen/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h +154 -73
  198. package/eigen/Eigen/src/Core/products/GeneralMatrixVector.h +292 -337
  199. package/eigen/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h +80 -77
  200. package/eigen/Eigen/src/Core/products/Parallelizer.h +207 -105
  201. package/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +327 -388
  202. package/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h +206 -224
  203. package/eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h +138 -147
  204. package/eigen/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h +58 -61
  205. package/eigen/Eigen/src/Core/products/SelfadjointProduct.h +71 -71
  206. package/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h +48 -47
  207. package/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h +294 -369
  208. package/eigen/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h +246 -238
  209. package/eigen/Eigen/src/Core/products/TriangularMatrixVector.h +244 -247
  210. package/eigen/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h +212 -192
  211. package/eigen/Eigen/src/Core/products/TriangularSolverMatrix.h +328 -277
  212. package/eigen/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h +108 -109
  213. package/eigen/Eigen/src/Core/products/TriangularSolverVector.h +68 -94
  214. package/eigen/Eigen/src/Core/util/Assert.h +158 -0
  215. package/eigen/Eigen/src/Core/util/BlasUtil.h +342 -303
  216. package/eigen/Eigen/src/Core/util/ConfigureVectorization.h +348 -317
  217. package/eigen/Eigen/src/Core/util/Constants.h +297 -262
  218. package/eigen/Eigen/src/Core/util/DisableStupidWarnings.h +130 -90
  219. package/eigen/Eigen/src/Core/util/EmulateArray.h +270 -0
  220. package/eigen/Eigen/src/Core/util/ForwardDeclarations.h +449 -247
  221. package/eigen/Eigen/src/Core/util/GpuHipCudaDefines.inc +101 -0
  222. package/eigen/Eigen/src/Core/util/GpuHipCudaUndefines.inc +45 -0
  223. package/eigen/Eigen/src/Core/util/IndexedViewHelper.h +417 -116
  224. package/eigen/Eigen/src/Core/util/IntegralConstant.h +211 -204
  225. package/eigen/Eigen/src/Core/util/MKL_support.h +39 -37
  226. package/eigen/Eigen/src/Core/util/Macros.h +655 -773
  227. package/eigen/Eigen/src/Core/util/MaxSizeVector.h +139 -0
  228. package/eigen/Eigen/src/Core/util/Memory.h +970 -748
  229. package/eigen/Eigen/src/Core/util/Meta.h +581 -633
  230. package/eigen/Eigen/src/Core/util/MoreMeta.h +638 -0
  231. package/eigen/Eigen/src/Core/util/ReenableStupidWarnings.h +32 -19
  232. package/eigen/Eigen/src/Core/util/ReshapedHelper.h +17 -17
  233. package/eigen/Eigen/src/Core/util/Serializer.h +209 -0
  234. package/eigen/Eigen/src/Core/util/StaticAssert.h +50 -166
  235. package/eigen/Eigen/src/Core/util/SymbolicIndex.h +377 -225
  236. package/eigen/Eigen/src/Core/util/XprHelper.h +784 -547
  237. package/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +246 -277
  238. package/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +299 -319
  239. package/eigen/Eigen/src/Eigenvalues/ComplexSchur_LAPACKE.h +52 -48
  240. package/eigen/Eigen/src/Eigenvalues/EigenSolver.h +413 -456
  241. package/eigen/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h +309 -325
  242. package/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +157 -171
  243. package/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +292 -310
  244. package/eigen/Eigen/src/Eigenvalues/InternalHeaderCheck.h +3 -0
  245. package/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +89 -105
  246. package/eigen/Eigen/src/Eigenvalues/RealQZ.h +537 -607
  247. package/eigen/Eigen/src/Eigenvalues/RealSchur.h +342 -381
  248. package/eigen/Eigen/src/Eigenvalues/RealSchur_LAPACKE.h +41 -35
  249. package/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +541 -595
  250. package/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h +47 -44
  251. package/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +430 -462
  252. package/eigen/Eigen/src/Geometry/AlignedBox.h +226 -227
  253. package/eigen/Eigen/src/Geometry/AngleAxis.h +131 -133
  254. package/eigen/Eigen/src/Geometry/EulerAngles.h +163 -74
  255. package/eigen/Eigen/src/Geometry/Homogeneous.h +285 -333
  256. package/eigen/Eigen/src/Geometry/Hyperplane.h +151 -160
  257. package/eigen/Eigen/src/Geometry/InternalHeaderCheck.h +3 -0
  258. package/eigen/Eigen/src/Geometry/OrthoMethods.h +168 -146
  259. package/eigen/Eigen/src/Geometry/ParametrizedLine.h +127 -127
  260. package/eigen/Eigen/src/Geometry/Quaternion.h +566 -506
  261. package/eigen/Eigen/src/Geometry/Rotation2D.h +107 -105
  262. package/eigen/Eigen/src/Geometry/RotationBase.h +148 -145
  263. package/eigen/Eigen/src/Geometry/Scaling.h +113 -106
  264. package/eigen/Eigen/src/Geometry/Transform.h +858 -936
  265. package/eigen/Eigen/src/Geometry/Translation.h +94 -92
  266. package/eigen/Eigen/src/Geometry/Umeyama.h +79 -84
  267. package/eigen/Eigen/src/Geometry/arch/Geometry_SIMD.h +90 -104
  268. package/eigen/Eigen/src/Householder/BlockHouseholder.h +51 -46
  269. package/eigen/Eigen/src/Householder/Householder.h +102 -124
  270. package/eigen/Eigen/src/Householder/HouseholderSequence.h +412 -453
  271. package/eigen/Eigen/src/Householder/InternalHeaderCheck.h +3 -0
  272. package/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +149 -162
  273. package/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +124 -119
  274. package/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +92 -104
  275. package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +251 -243
  276. package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +224 -228
  277. package/eigen/Eigen/src/IterativeLinearSolvers/InternalHeaderCheck.h +3 -0
  278. package/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +178 -227
  279. package/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +79 -84
  280. package/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +54 -60
  281. package/eigen/Eigen/src/Jacobi/InternalHeaderCheck.h +3 -0
  282. package/eigen/Eigen/src/Jacobi/Jacobi.h +252 -308
  283. package/eigen/Eigen/src/KLUSupport/InternalHeaderCheck.h +3 -0
  284. package/eigen/Eigen/src/KLUSupport/KLUSupport.h +208 -227
  285. package/eigen/Eigen/src/LU/Determinant.h +50 -69
  286. package/eigen/Eigen/src/LU/FullPivLU.h +545 -596
  287. package/eigen/Eigen/src/LU/InternalHeaderCheck.h +3 -0
  288. package/eigen/Eigen/src/LU/InverseImpl.h +206 -285
  289. package/eigen/Eigen/src/LU/PartialPivLU.h +390 -428
  290. package/eigen/Eigen/src/LU/PartialPivLU_LAPACKE.h +54 -40
  291. package/eigen/Eigen/src/LU/arch/InverseSize4.h +72 -70
  292. package/eigen/Eigen/src/MetisSupport/InternalHeaderCheck.h +3 -0
  293. package/eigen/Eigen/src/MetisSupport/MetisSupport.h +81 -93
  294. package/eigen/Eigen/src/OrderingMethods/Amd.h +243 -265
  295. package/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +831 -1004
  296. package/eigen/Eigen/src/OrderingMethods/InternalHeaderCheck.h +3 -0
  297. package/eigen/Eigen/src/OrderingMethods/Ordering.h +112 -119
  298. package/eigen/Eigen/src/PaStiXSupport/InternalHeaderCheck.h +3 -0
  299. package/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +524 -570
  300. package/eigen/Eigen/src/PardisoSupport/InternalHeaderCheck.h +3 -0
  301. package/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +385 -430
  302. package/eigen/Eigen/src/QR/ColPivHouseholderQR.h +479 -479
  303. package/eigen/Eigen/src/QR/ColPivHouseholderQR_LAPACKE.h +120 -56
  304. package/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +166 -153
  305. package/eigen/Eigen/src/QR/FullPivHouseholderQR.h +495 -475
  306. package/eigen/Eigen/src/QR/HouseholderQR.h +394 -285
  307. package/eigen/Eigen/src/QR/HouseholderQR_LAPACKE.h +32 -23
  308. package/eigen/Eigen/src/QR/InternalHeaderCheck.h +3 -0
  309. package/eigen/Eigen/src/SPQRSupport/InternalHeaderCheck.h +3 -0
  310. package/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +244 -264
  311. package/eigen/Eigen/src/SVD/BDCSVD.h +817 -713
  312. package/eigen/Eigen/src/SVD/BDCSVD_LAPACKE.h +174 -0
  313. package/eigen/Eigen/src/SVD/InternalHeaderCheck.h +3 -0
  314. package/eigen/Eigen/src/SVD/JacobiSVD.h +577 -543
  315. package/eigen/Eigen/src/SVD/JacobiSVD_LAPACKE.h +85 -49
  316. package/eigen/Eigen/src/SVD/SVDBase.h +242 -182
  317. package/eigen/Eigen/src/SVD/UpperBidiagonalization.h +200 -235
  318. package/eigen/Eigen/src/SparseCholesky/InternalHeaderCheck.h +3 -0
  319. package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +765 -594
  320. package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +308 -94
  321. package/eigen/Eigen/src/SparseCore/AmbiVector.h +202 -251
  322. package/eigen/Eigen/src/SparseCore/CompressedStorage.h +184 -252
  323. package/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +134 -178
  324. package/eigen/Eigen/src/SparseCore/InternalHeaderCheck.h +3 -0
  325. package/eigen/Eigen/src/SparseCore/SparseAssign.h +149 -140
  326. package/eigen/Eigen/src/SparseCore/SparseBlock.h +403 -440
  327. package/eigen/Eigen/src/SparseCore/SparseColEtree.h +100 -112
  328. package/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +525 -303
  329. package/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +555 -339
  330. package/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +100 -108
  331. package/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +169 -197
  332. package/eigen/Eigen/src/SparseCore/SparseDiagonalProduct.h +71 -71
  333. package/eigen/Eigen/src/SparseCore/SparseDot.h +49 -47
  334. package/eigen/Eigen/src/SparseCore/SparseFuzzy.h +13 -11
  335. package/eigen/Eigen/src/SparseCore/SparseMap.h +243 -253
  336. package/eigen/Eigen/src/SparseCore/SparseMatrix.h +1603 -1245
  337. package/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +403 -350
  338. package/eigen/Eigen/src/SparseCore/SparsePermutation.h +186 -115
  339. package/eigen/Eigen/src/SparseCore/SparseProduct.h +94 -97
  340. package/eigen/Eigen/src/SparseCore/SparseRedux.h +22 -24
  341. package/eigen/Eigen/src/SparseCore/SparseRef.h +268 -295
  342. package/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +370 -416
  343. package/eigen/Eigen/src/SparseCore/SparseSolverBase.h +78 -87
  344. package/eigen/Eigen/src/SparseCore/SparseSparseProductWithPruning.h +81 -95
  345. package/eigen/Eigen/src/SparseCore/SparseTranspose.h +62 -71
  346. package/eigen/Eigen/src/SparseCore/SparseTriangularView.h +132 -144
  347. package/eigen/Eigen/src/SparseCore/SparseUtil.h +138 -115
  348. package/eigen/Eigen/src/SparseCore/SparseVector.h +426 -372
  349. package/eigen/Eigen/src/SparseCore/SparseView.h +164 -193
  350. package/eigen/Eigen/src/SparseCore/TriangularSolver.h +129 -170
  351. package/eigen/Eigen/src/SparseLU/InternalHeaderCheck.h +3 -0
  352. package/eigen/Eigen/src/SparseLU/SparseLU.h +756 -710
  353. package/eigen/Eigen/src/SparseLU/SparseLUImpl.h +61 -48
  354. package/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +102 -118
  355. package/eigen/Eigen/src/SparseLU/SparseLU_Structs.h +38 -35
  356. package/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +245 -301
  357. package/eigen/Eigen/src/SparseLU/SparseLU_Utils.h +44 -49
  358. package/eigen/Eigen/src/SparseLU/SparseLU_column_bmod.h +104 -108
  359. package/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +89 -100
  360. package/eigen/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h +57 -58
  361. package/eigen/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h +43 -55
  362. package/eigen/Eigen/src/SparseLU/SparseLU_kernel_bmod.h +74 -71
  363. package/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +124 -132
  364. package/eigen/Eigen/src/SparseLU/SparseLU_panel_dfs.h +136 -159
  365. package/eigen/Eigen/src/SparseLU/SparseLU_pivotL.h +51 -52
  366. package/eigen/Eigen/src/SparseLU/SparseLU_pruneL.h +67 -73
  367. package/eigen/Eigen/src/SparseLU/SparseLU_relax_snode.h +24 -26
  368. package/eigen/Eigen/src/SparseQR/InternalHeaderCheck.h +3 -0
  369. package/eigen/Eigen/src/SparseQR/SparseQR.h +450 -502
  370. package/eigen/Eigen/src/StlSupport/StdDeque.h +28 -93
  371. package/eigen/Eigen/src/StlSupport/StdList.h +28 -84
  372. package/eigen/Eigen/src/StlSupport/StdVector.h +28 -108
  373. package/eigen/Eigen/src/StlSupport/details.h +48 -50
  374. package/eigen/Eigen/src/SuperLUSupport/InternalHeaderCheck.h +3 -0
  375. package/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +634 -730
  376. package/eigen/Eigen/src/ThreadPool/Barrier.h +70 -0
  377. package/eigen/Eigen/src/ThreadPool/CoreThreadPoolDevice.h +336 -0
  378. package/eigen/Eigen/src/ThreadPool/EventCount.h +241 -0
  379. package/eigen/Eigen/src/ThreadPool/ForkJoin.h +140 -0
  380. package/eigen/Eigen/src/ThreadPool/InternalHeaderCheck.h +4 -0
  381. package/eigen/Eigen/src/ThreadPool/NonBlockingThreadPool.h +587 -0
  382. package/eigen/Eigen/src/ThreadPool/RunQueue.h +230 -0
  383. package/eigen/Eigen/src/ThreadPool/ThreadCancel.h +21 -0
  384. package/eigen/Eigen/src/ThreadPool/ThreadEnvironment.h +43 -0
  385. package/eigen/Eigen/src/ThreadPool/ThreadLocal.h +289 -0
  386. package/eigen/Eigen/src/ThreadPool/ThreadPoolInterface.h +50 -0
  387. package/eigen/Eigen/src/ThreadPool/ThreadYield.h +16 -0
  388. package/eigen/Eigen/src/UmfPackSupport/InternalHeaderCheck.h +3 -0
  389. package/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +428 -464
  390. package/eigen/Eigen/src/misc/Image.h +41 -43
  391. package/eigen/Eigen/src/misc/InternalHeaderCheck.h +3 -0
  392. package/eigen/Eigen/src/misc/Kernel.h +39 -41
  393. package/eigen/Eigen/src/misc/RealSvd2x2.h +19 -21
  394. package/eigen/Eigen/src/misc/blas.h +83 -426
  395. package/eigen/Eigen/src/misc/lapacke.h +9972 -16179
  396. package/eigen/Eigen/src/misc/lapacke_helpers.h +163 -0
  397. package/eigen/Eigen/src/misc/lapacke_mangling.h +4 -5
  398. package/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.inc +344 -0
  399. package/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.inc +544 -0
  400. package/eigen/Eigen/src/plugins/{BlockMethods.h → BlockMethods.inc} +434 -506
  401. package/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.inc +116 -0
  402. package/eigen/Eigen/src/plugins/{CommonCwiseUnaryOps.h → CommonCwiseUnaryOps.inc} +58 -68
  403. package/eigen/Eigen/src/plugins/IndexedViewMethods.inc +192 -0
  404. package/eigen/Eigen/src/plugins/InternalHeaderCheck.inc +3 -0
  405. package/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.inc +331 -0
  406. package/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.inc +118 -0
  407. package/eigen/Eigen/src/plugins/ReshapedMethods.inc +133 -0
  408. package/package.json +1 -1
  409. package/eigen/COPYING.APACHE +0 -203
  410. package/eigen/COPYING.BSD +0 -26
  411. package/eigen/COPYING.GPL +0 -674
  412. package/eigen/COPYING.LGPL +0 -502
  413. package/eigen/COPYING.MINPACK +0 -51
  414. package/eigen/COPYING.MPL2 +0 -373
  415. package/eigen/COPYING.README +0 -18
  416. package/eigen/Eigen/src/Core/BooleanRedux.h +0 -162
  417. package/eigen/Eigen/src/Core/arch/CUDA/Complex.h +0 -258
  418. package/eigen/Eigen/src/Core/arch/Default/TypeCasting.h +0 -120
  419. package/eigen/Eigen/src/Core/arch/SYCL/SyclMemoryModel.h +0 -694
  420. package/eigen/Eigen/src/Core/util/NonMPL2.h +0 -3
  421. package/eigen/Eigen/src/SparseCore/MappedSparseMatrix.h +0 -67
  422. package/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +0 -280
  423. package/eigen/Eigen/src/misc/lapack.h +0 -152
  424. package/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +0 -358
  425. package/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +0 -696
  426. package/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.h +0 -115
  427. package/eigen/Eigen/src/plugins/IndexedViewMethods.h +0 -262
  428. package/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +0 -152
  429. package/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +0 -95
  430. package/eigen/Eigen/src/plugins/ReshapedMethods.h +0 -149
  431. package/eigen/README.md +0 -5
@@ -11,26 +11,177 @@
11
11
  #ifndef EIGEN_TYPE_CASTING_NEON_H
12
12
  #define EIGEN_TYPE_CASTING_NEON_H
13
13
 
14
+ // IWYU pragma: private
15
+ #include "../../InternalHeaderCheck.h"
16
+
14
17
  namespace Eigen {
15
18
 
16
19
  namespace internal {
17
20
 
18
21
  //==============================================================================
19
- // pcast, SrcType = float
22
+ // preinterpret (truncation operations)
20
23
  //==============================================================================
24
+
21
25
  template <>
22
- struct type_casting_traits<float, float> {
23
- enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 };
24
- };
26
+ EIGEN_STRONG_INLINE Packet8c preinterpret<Packet8c, Packet16c>(const Packet16c& a) {
27
+ return Packet8c(vget_low_s8(a));
28
+ }
29
+ template <>
30
+ EIGEN_STRONG_INLINE Packet4c preinterpret<Packet4c, Packet8c>(const Packet8c& a) {
31
+ return Packet4c(vget_lane_s32(vreinterpret_s32_s8(a), 0));
32
+ }
33
+ template <>
34
+ EIGEN_STRONG_INLINE Packet4c preinterpret<Packet4c, Packet16c>(const Packet16c& a) {
35
+ return preinterpret<Packet4c>(preinterpret<Packet8c>(a));
36
+ }
37
+
38
+ template <>
39
+ EIGEN_STRONG_INLINE Packet8uc preinterpret<Packet8uc, Packet16uc>(const Packet16uc& a) {
40
+ return Packet8uc(vget_low_u8(a));
41
+ }
42
+ template <>
43
+ EIGEN_STRONG_INLINE Packet4uc preinterpret<Packet4uc, Packet8uc>(const Packet8uc& a) {
44
+ return Packet4uc(vget_lane_u32(vreinterpret_u32_u8(a), 0));
45
+ }
46
+ template <>
47
+ EIGEN_STRONG_INLINE Packet4uc preinterpret<Packet4uc, Packet16uc>(const Packet16uc& a) {
48
+ return preinterpret<Packet4uc>(preinterpret<Packet8uc>(a));
49
+ }
50
+
51
+ template <>
52
+ EIGEN_STRONG_INLINE Packet4s preinterpret<Packet4s, Packet8s>(const Packet8s& a) {
53
+ return Packet4s(vget_low_s16(a));
54
+ }
55
+
56
+ template <>
57
+ EIGEN_STRONG_INLINE Packet4us preinterpret<Packet4us, Packet8us>(const Packet8us& a) {
58
+ return Packet4us(vget_low_u16(a));
59
+ }
60
+
61
+ template <>
62
+ EIGEN_STRONG_INLINE Packet2i preinterpret<Packet2i, Packet4i>(const Packet4i& a) {
63
+ return Packet2i(vget_low_s32(a));
64
+ }
65
+ template <>
66
+ EIGEN_STRONG_INLINE Packet2ui preinterpret<Packet2ui, Packet4ui>(const Packet4ui& a) {
67
+ return Packet2ui(vget_low_u32(a));
68
+ }
69
+
70
+ template <>
71
+ EIGEN_STRONG_INLINE Packet2f preinterpret<Packet2f, Packet4f>(const Packet4f& a) {
72
+ return Packet2f(vget_low_f32(a));
73
+ }
74
+
75
+ //==============================================================================
76
+ // preinterpret
77
+ //==============================================================================
78
+ template <>
79
+ EIGEN_STRONG_INLINE Packet2f preinterpret<Packet2f, Packet2i>(const Packet2i& a) {
80
+ return Packet2f(vreinterpret_f32_s32(a));
81
+ }
82
+ template <>
83
+ EIGEN_STRONG_INLINE Packet2f preinterpret<Packet2f, Packet2ui>(const Packet2ui& a) {
84
+ return Packet2f(vreinterpret_f32_u32(a));
85
+ }
86
+ template <>
87
+ EIGEN_STRONG_INLINE Packet4f preinterpret<Packet4f, Packet4i>(const Packet4i& a) {
88
+ return Packet4f(vreinterpretq_f32_s32(a));
89
+ }
90
+ template <>
91
+ EIGEN_STRONG_INLINE Packet4f preinterpret<Packet4f, Packet4ui>(const Packet4ui& a) {
92
+ return Packet4f(vreinterpretq_f32_u32(a));
93
+ }
94
+
95
+ template <>
96
+ EIGEN_STRONG_INLINE Packet4c preinterpret<Packet4c, Packet4uc>(const Packet4uc& a) {
97
+ return static_cast<Packet4c>(a);
98
+ }
99
+ template <>
100
+ EIGEN_STRONG_INLINE Packet8c preinterpret<Packet8c, Packet8uc>(const Packet8uc& a) {
101
+ return Packet8c(vreinterpret_s8_u8(a));
102
+ }
103
+ template <>
104
+ EIGEN_STRONG_INLINE Packet16c preinterpret<Packet16c, Packet16uc>(const Packet16uc& a) {
105
+ return Packet16c(vreinterpretq_s8_u8(a));
106
+ }
107
+
108
+ template <>
109
+ EIGEN_STRONG_INLINE Packet4uc preinterpret<Packet4uc, Packet4c>(const Packet4c& a) {
110
+ return static_cast<Packet4uc>(a);
111
+ }
112
+ template <>
113
+ EIGEN_STRONG_INLINE Packet8uc preinterpret<Packet8uc, Packet8c>(const Packet8c& a) {
114
+ return Packet8uc(vreinterpret_u8_s8(a));
115
+ }
116
+ template <>
117
+ EIGEN_STRONG_INLINE Packet16uc preinterpret<Packet16uc, Packet16c>(const Packet16c& a) {
118
+ return Packet16uc(vreinterpretq_u8_s8(a));
119
+ }
120
+
121
+ template <>
122
+ EIGEN_STRONG_INLINE Packet4s preinterpret<Packet4s, Packet4us>(const Packet4us& a) {
123
+ return Packet4s(vreinterpret_s16_u16(a));
124
+ }
125
+ template <>
126
+ EIGEN_STRONG_INLINE Packet8s preinterpret<Packet8s, Packet8us>(const Packet8us& a) {
127
+ return Packet8s(vreinterpretq_s16_u16(a));
128
+ }
129
+ template <>
130
+ EIGEN_STRONG_INLINE Packet4us preinterpret<Packet4us, Packet4s>(const Packet4s& a) {
131
+ return Packet4us(vreinterpret_u16_s16(a));
132
+ }
133
+ template <>
134
+ EIGEN_STRONG_INLINE Packet8us preinterpret<Packet8us, Packet8s>(const Packet8s& a) {
135
+ return Packet8us(vreinterpretq_u16_s16(a));
136
+ }
137
+
25
138
  template <>
26
- EIGEN_STRONG_INLINE Packet4f pcast<Packet4f, Packet4f>(const Packet4f& a) {
27
- return a;
139
+ EIGEN_STRONG_INLINE Packet2i preinterpret<Packet2i, Packet2f>(const Packet2f& a) {
140
+ return Packet2i(vreinterpret_s32_f32(a));
141
+ }
142
+ template <>
143
+ EIGEN_STRONG_INLINE Packet2i preinterpret<Packet2i, Packet2ui>(const Packet2ui& a) {
144
+ return Packet2i(vreinterpret_s32_u32(a));
28
145
  }
29
146
  template <>
30
- EIGEN_STRONG_INLINE Packet2f pcast<Packet2f, Packet2f>(const Packet2f& a) {
31
- return a;
147
+ EIGEN_STRONG_INLINE Packet4i preinterpret<Packet4i, Packet4f>(const Packet4f& a) {
148
+ return Packet4i(vreinterpretq_s32_f32(a));
149
+ }
150
+ template <>
151
+ EIGEN_STRONG_INLINE Packet4i preinterpret<Packet4i, Packet4ui>(const Packet4ui& a) {
152
+ return Packet4i(vreinterpretq_s32_u32(a));
153
+ }
154
+
155
+ template <>
156
+ EIGEN_STRONG_INLINE Packet2ui preinterpret<Packet2ui, Packet2f>(const Packet2f& a) {
157
+ return Packet2ui(vreinterpret_u32_f32(a));
158
+ }
159
+ template <>
160
+ EIGEN_STRONG_INLINE Packet2ui preinterpret<Packet2ui, Packet2i>(const Packet2i& a) {
161
+ return Packet2ui(vreinterpret_u32_s32(a));
162
+ }
163
+ template <>
164
+ EIGEN_STRONG_INLINE Packet4ui preinterpret<Packet4ui, Packet4f>(const Packet4f& a) {
165
+ return Packet4ui(vreinterpretq_u32_f32(a));
166
+ }
167
+ template <>
168
+ EIGEN_STRONG_INLINE Packet4ui preinterpret<Packet4ui, Packet4i>(const Packet4i& a) {
169
+ return Packet4ui(vreinterpretq_u32_s32(a));
32
170
  }
33
171
 
172
+ template <>
173
+ EIGEN_STRONG_INLINE Packet2l preinterpret<Packet2l, Packet2ul>(const Packet2ul& a) {
174
+ return Packet2l(vreinterpretq_s64_u64(a));
175
+ }
176
+ template <>
177
+ EIGEN_STRONG_INLINE Packet2ul preinterpret<Packet2ul, Packet2l>(const Packet2l& a) {
178
+ return Packet2ul(vreinterpretq_u64_s64(a));
179
+ }
180
+
181
+ //==============================================================================
182
+ // pcast, SrcType = float
183
+ //==============================================================================
184
+
34
185
  template <>
35
186
  struct type_casting_traits<float, numext::int64_t> {
36
187
  enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 2 };
@@ -47,10 +198,18 @@ EIGEN_STRONG_INLINE Packet2l pcast<Packet4f, Packet2l>(const Packet4f& a) {
47
198
  return vcvtq_s64_f64(vcvt_f64_f32(vget_low_f32(a)));
48
199
  }
49
200
  template <>
201
+ EIGEN_STRONG_INLINE Packet2l pcast<Packet2f, Packet2l>(const Packet2f& a) {
202
+ return vcvtq_s64_f64(vcvt_f64_f32(a));
203
+ }
204
+ template <>
50
205
  EIGEN_STRONG_INLINE Packet2ul pcast<Packet4f, Packet2ul>(const Packet4f& a) {
51
206
  // Discard second half of input.
52
207
  return vcvtq_u64_f64(vcvt_f64_f32(vget_low_f32(a)));
53
208
  }
209
+ template <>
210
+ EIGEN_STRONG_INLINE Packet2ul pcast<Packet2f, Packet2ul>(const Packet2f& a) {
211
+ return vcvtq_u64_f64(vcvt_f64_f32(a));
212
+ }
54
213
  #else
55
214
  template <>
56
215
  EIGEN_STRONG_INLINE Packet2l pcast<Packet4f, Packet2l>(const Packet4f& a) {
@@ -58,10 +217,19 @@ EIGEN_STRONG_INLINE Packet2l pcast<Packet4f, Packet2l>(const Packet4f& a) {
58
217
  return vmovl_s32(vget_low_s32(vcvtq_s32_f32(a)));
59
218
  }
60
219
  template <>
220
+ EIGEN_STRONG_INLINE Packet2l pcast<Packet2f, Packet2l>(const Packet2f& a) {
221
+ return vmovl_s32(vcvt_s32_f32(a));
222
+ }
223
+ template <>
61
224
  EIGEN_STRONG_INLINE Packet2ul pcast<Packet4f, Packet2ul>(const Packet4f& a) {
62
225
  // Discard second half of input.
63
226
  return vmovl_u32(vget_low_u32(vcvtq_u32_f32(a)));
64
227
  }
228
+ template <>
229
+ EIGEN_STRONG_INLINE Packet2ul pcast<Packet2f, Packet2ul>(const Packet2f& a) {
230
+ // Discard second half of input.
231
+ return vmovl_u32(vcvt_u32_f32(a));
232
+ }
65
233
  #endif // EIGEN_ARCH_ARM64
66
234
 
67
235
  template <>
@@ -99,6 +267,10 @@ EIGEN_STRONG_INLINE Packet8s pcast<Packet4f, Packet8s>(const Packet4f& a, const
99
267
  return vcombine_s16(vmovn_s32(vcvtq_s32_f32(a)), vmovn_s32(vcvtq_s32_f32(b)));
100
268
  }
101
269
  template <>
270
+ EIGEN_STRONG_INLINE Packet4s pcast<Packet4f, Packet4s>(const Packet4f& a) {
271
+ return vmovn_s32(vcvtq_s32_f32(a));
272
+ }
273
+ template <>
102
274
  EIGEN_STRONG_INLINE Packet4s pcast<Packet2f, Packet4s>(const Packet2f& a, const Packet2f& b) {
103
275
  return vmovn_s32(vcombine_s32(vcvt_s32_f32(a), vcvt_s32_f32(b)));
104
276
  }
@@ -112,6 +284,10 @@ EIGEN_STRONG_INLINE Packet8us pcast<Packet4f, Packet8us>(const Packet4f& a, cons
112
284
  return vcombine_u16(vmovn_u32(vcvtq_u32_f32(a)), vmovn_u32(vcvtq_u32_f32(b)));
113
285
  }
114
286
  template <>
287
+ EIGEN_STRONG_INLINE Packet4us pcast<Packet4f, Packet4us>(const Packet4f& a) {
288
+ return vmovn_u32(vcvtq_u32_f32(a));
289
+ }
290
+ template <>
115
291
  EIGEN_STRONG_INLINE Packet4us pcast<Packet2f, Packet4us>(const Packet2f& a, const Packet2f& b) {
116
292
  return vmovn_u32(vcombine_u32(vcvt_u32_f32(a), vcvt_u32_f32(b)));
117
293
  }
@@ -128,12 +304,25 @@ EIGEN_STRONG_INLINE Packet16c pcast<Packet4f, Packet16c>(const Packet4f& a, cons
128
304
  return vcombine_s8(vmovn_s16(ab_s16), vmovn_s16(cd_s16));
129
305
  }
130
306
  template <>
307
+ EIGEN_STRONG_INLINE Packet8c pcast<Packet4f, Packet8c>(const Packet4f& a, const Packet4f& b) {
308
+ const int16x8_t ab_s16 = pcast<Packet4f, Packet8s>(a, b);
309
+ return vmovn_s16(ab_s16);
310
+ }
311
+ template <>
131
312
  EIGEN_STRONG_INLINE Packet8c pcast<Packet2f, Packet8c>(const Packet2f& a, const Packet2f& b, const Packet2f& c,
132
313
  const Packet2f& d) {
133
314
  const int16x4_t ab_s16 = pcast<Packet2f, Packet4s>(a, b);
134
315
  const int16x4_t cd_s16 = pcast<Packet2f, Packet4s>(c, d);
135
316
  return vmovn_s16(vcombine_s16(ab_s16, cd_s16));
136
317
  }
318
+ template <>
319
+ EIGEN_STRONG_INLINE Packet4c pcast<Packet4f, Packet4c>(const Packet4f& a) {
320
+ const int32x4_t a_s32x4 = vcvtq_s32_f32(a);
321
+ const int16x4_t a_s16x4 = vmovn_s32(a_s32x4);
322
+ const int16x8_t aa_s16x8 = vcombine_s16(a_s16x4, a_s16x4);
323
+ const int8x8_t aa_s8x8 = vmovn_s16(aa_s16x8);
324
+ return vget_lane_s32(vreinterpret_s32_s8(aa_s8x8), 0);
325
+ }
137
326
 
138
327
  template <>
139
328
  struct type_casting_traits<float, numext::uint8_t> {
@@ -142,16 +331,20 @@ struct type_casting_traits<float, numext::uint8_t> {
142
331
  template <>
143
332
  EIGEN_STRONG_INLINE Packet16uc pcast<Packet4f, Packet16uc>(const Packet4f& a, const Packet4f& b, const Packet4f& c,
144
333
  const Packet4f& d) {
145
- const uint16x8_t ab_u16 = pcast<Packet4f, Packet8us>(a, b);
146
- const uint16x8_t cd_u16 = pcast<Packet4f, Packet8us>(c, d);
147
- return vcombine_u8(vmovn_u16(ab_u16), vmovn_u16(cd_u16));
334
+ return preinterpret<Packet16uc>(pcast<Packet4f, Packet16c>(a, b, c, d));
335
+ }
336
+ template <>
337
+ EIGEN_STRONG_INLINE Packet8uc pcast<Packet4f, Packet8uc>(const Packet4f& a, const Packet4f& b) {
338
+ return preinterpret<Packet8uc>(pcast<Packet4f, Packet8c>(a, b));
148
339
  }
149
340
  template <>
150
341
  EIGEN_STRONG_INLINE Packet8uc pcast<Packet2f, Packet8uc>(const Packet2f& a, const Packet2f& b, const Packet2f& c,
151
342
  const Packet2f& d) {
152
- const uint16x4_t ab_u16 = pcast<Packet2f, Packet4us>(a, b);
153
- const uint16x4_t cd_u16 = pcast<Packet2f, Packet4us>(c, d);
154
- return vmovn_u16(vcombine_u16(ab_u16, cd_u16));
343
+ return preinterpret<Packet8uc>(pcast<Packet2f, Packet8c>(a, b, c, d));
344
+ }
345
+ template <>
346
+ EIGEN_STRONG_INLINE Packet4uc pcast<Packet4f, Packet4uc>(const Packet4f& a) {
347
+ return static_cast<Packet4uc>(pcast<Packet4f, Packet4c>(a));
155
348
  }
156
349
 
157
350
  //==============================================================================
@@ -167,6 +360,10 @@ EIGEN_STRONG_INLINE Packet4f pcast<Packet16c, Packet4f>(const Packet16c& a) {
167
360
  return vcvtq_f32_s32(vmovl_s16(vget_low_s16(vmovl_s8(vget_low_s8(a)))));
168
361
  }
169
362
  template <>
363
+ EIGEN_STRONG_INLINE Packet4f pcast<Packet4c, Packet4f>(const Packet4c& a) {
364
+ return vcvtq_f32_s32(vmovl_s16(vget_low_s16(vmovl_s8(vreinterpret_s8_s32(vdup_n_s32(a))))));
365
+ }
366
+ template <>
170
367
  EIGEN_STRONG_INLINE Packet2f pcast<Packet8c, Packet2f>(const Packet8c& a) {
171
368
  // Discard all but first 2 bytes.
172
369
  return vcvt_f32_s32(vget_low_s32(vmovl_s16(vget_low_s16(vmovl_s8(a)))));
@@ -188,7 +385,7 @@ struct type_casting_traits<numext::int8_t, numext::uint64_t> {
188
385
  };
189
386
  template <>
190
387
  EIGEN_STRONG_INLINE Packet2ul pcast<Packet16c, Packet2ul>(const Packet16c& a) {
191
- return vreinterpretq_u64_s64(pcast<Packet16c, Packet2l>(a));
388
+ return preinterpret<Packet2ul>(pcast<Packet16c, Packet2l>(a));
192
389
  }
193
390
 
194
391
  template <>
@@ -201,6 +398,14 @@ EIGEN_STRONG_INLINE Packet4i pcast<Packet16c, Packet4i>(const Packet16c& a) {
201
398
  return vmovl_s16(vget_low_s16(vmovl_s8(vget_low_s8(a))));
202
399
  }
203
400
  template <>
401
+ EIGEN_STRONG_INLINE Packet4i pcast<Packet8c, Packet4i>(const Packet8c& a) {
402
+ return vmovl_s16(vget_low_s16(vmovl_s8(a)));
403
+ }
404
+ template <>
405
+ EIGEN_STRONG_INLINE Packet4i pcast<Packet4c, Packet4i>(const Packet4c& a) {
406
+ return pcast<Packet8c, Packet4i>(vreinterpret_s8_s32(vdup_n_s32(a)));
407
+ }
408
+ template <>
204
409
  EIGEN_STRONG_INLINE Packet2i pcast<Packet8c, Packet2i>(const Packet8c& a) {
205
410
  // Discard all but first 2 bytes.
206
411
  return vget_low_s32(vmovl_s16(vget_low_s16(vmovl_s8(a))));
@@ -212,11 +417,15 @@ struct type_casting_traits<numext::int8_t, numext::uint32_t> {
212
417
  };
213
418
  template <>
214
419
  EIGEN_STRONG_INLINE Packet4ui pcast<Packet16c, Packet4ui>(const Packet16c& a) {
215
- return vreinterpretq_u32_s32(pcast<Packet16c, Packet4i>(a));
420
+ return preinterpret<Packet4ui>(pcast<Packet16c, Packet4i>(a));
216
421
  }
217
422
  template <>
218
423
  EIGEN_STRONG_INLINE Packet2ui pcast<Packet8c, Packet2ui>(const Packet8c& a) {
219
- return vreinterpret_u32_s32(pcast<Packet8c, Packet2i>(a));
424
+ return preinterpret<Packet2ui>(pcast<Packet8c, Packet2i>(a));
425
+ }
426
+ template <>
427
+ EIGEN_STRONG_INLINE Packet4ui pcast<Packet4c, Packet4ui>(const Packet4c& a) {
428
+ return preinterpret<Packet4ui>(pcast<Packet4c, Packet4i>(a));
220
429
  }
221
430
 
222
431
  template <>
@@ -229,10 +438,18 @@ EIGEN_STRONG_INLINE Packet8s pcast<Packet16c, Packet8s>(const Packet16c& a) {
229
438
  return vmovl_s8(vget_low_s8(a));
230
439
  }
231
440
  template <>
441
+ EIGEN_STRONG_INLINE Packet8s pcast<Packet8c, Packet8s>(const Packet8c& a) {
442
+ return vmovl_s8(a);
443
+ }
444
+ template <>
232
445
  EIGEN_STRONG_INLINE Packet4s pcast<Packet8c, Packet4s>(const Packet8c& a) {
233
446
  // Discard second half of input.
234
447
  return vget_low_s16(vmovl_s8(a));
235
448
  }
449
+ template <>
450
+ EIGEN_STRONG_INLINE Packet4s pcast<Packet4c, Packet4s>(const Packet4c& a) {
451
+ return pcast<Packet8c, Packet4s>(vreinterpret_s8_s32(vdup_n_s32(a)));
452
+ }
236
453
 
237
454
  template <>
238
455
  struct type_casting_traits<numext::int8_t, numext::uint16_t> {
@@ -240,45 +457,19 @@ struct type_casting_traits<numext::int8_t, numext::uint16_t> {
240
457
  };
241
458
  template <>
242
459
  EIGEN_STRONG_INLINE Packet8us pcast<Packet16c, Packet8us>(const Packet16c& a) {
243
- return vreinterpretq_u16_s16(pcast<Packet16c, Packet8s>(a));
460
+ return preinterpret<Packet8us>(pcast<Packet16c, Packet8s>(a));
244
461
  }
245
462
  template <>
246
- EIGEN_STRONG_INLINE Packet4us pcast<Packet8c, Packet4us>(const Packet8c& a) {
247
- return vreinterpret_u16_s16(pcast<Packet8c, Packet4s>(a));
463
+ EIGEN_STRONG_INLINE Packet8us pcast<Packet8c, Packet8us>(const Packet8c& a) {
464
+ return preinterpret<Packet8us>(pcast<Packet8c, Packet8s>(a));
248
465
  }
249
-
250
- template <>
251
- struct type_casting_traits<numext::int8_t, numext::int8_t> {
252
- enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 };
253
- };
254
466
  template <>
255
- EIGEN_STRONG_INLINE Packet16c pcast<Packet16c, Packet16c>(const Packet16c& a) {
256
- return a;
257
- }
258
- template <>
259
- EIGEN_STRONG_INLINE Packet8c pcast<Packet8c, Packet8c>(const Packet8c& a) {
260
- return a;
261
- }
262
- template <>
263
- EIGEN_STRONG_INLINE Packet4c pcast<Packet4c, Packet4c>(const Packet4c& a) {
264
- return a;
265
- }
266
-
267
- template <>
268
- struct type_casting_traits<numext::int8_t, numext::uint8_t> {
269
- enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 };
270
- };
271
- template <>
272
- EIGEN_STRONG_INLINE Packet16uc pcast<Packet16c, Packet16uc>(const Packet16c& a) {
273
- return vreinterpretq_u8_s8(a);
274
- }
275
- template <>
276
- EIGEN_STRONG_INLINE Packet8uc pcast<Packet8c, Packet8uc>(const Packet8c& a) {
277
- return vreinterpret_u8_s8(a);
467
+ EIGEN_STRONG_INLINE Packet4us pcast<Packet8c, Packet4us>(const Packet8c& a) {
468
+ return preinterpret<Packet4us>(pcast<Packet8c, Packet4s>(a));
278
469
  }
279
470
  template <>
280
- EIGEN_STRONG_INLINE Packet4uc pcast<Packet4c, Packet4uc>(const Packet4c& a) {
281
- return static_cast<Packet4uc>(a);
471
+ EIGEN_STRONG_INLINE Packet4us pcast<Packet4c, Packet4us>(const Packet4c& a) {
472
+ return preinterpret<Packet4us>(pcast<Packet4c, Packet4s>(a));
282
473
  }
283
474
 
284
475
  //==============================================================================
@@ -294,6 +485,10 @@ EIGEN_STRONG_INLINE Packet4f pcast<Packet16uc, Packet4f>(const Packet16uc& a) {
294
485
  return vcvtq_f32_u32(vmovl_u16(vget_low_u16(vmovl_u8(vget_low_u8(a)))));
295
486
  }
296
487
  template <>
488
+ EIGEN_STRONG_INLINE Packet4f pcast<Packet4uc, Packet4f>(const Packet4uc& a) {
489
+ return vcvtq_f32_u32(vmovl_u16(vget_low_u16(vmovl_u8(vreinterpret_u8_u32(vdup_n_u32(a))))));
490
+ }
491
+ template <>
297
492
  EIGEN_STRONG_INLINE Packet2f pcast<Packet8uc, Packet2f>(const Packet8uc& a) {
298
493
  // Discard all but first 2 bytes.
299
494
  return vcvt_f32_u32(vget_low_u32(vmovl_u16(vget_low_u16(vmovl_u8(a)))));
@@ -315,7 +510,7 @@ struct type_casting_traits<numext::uint8_t, numext::int64_t> {
315
510
  };
316
511
  template <>
317
512
  EIGEN_STRONG_INLINE Packet2l pcast<Packet16uc, Packet2l>(const Packet16uc& a) {
318
- return vreinterpretq_s64_u64(pcast<Packet16uc, Packet2ul>(a));
513
+ return preinterpret<Packet2l>(pcast<Packet16uc, Packet2ul>(a));
319
514
  }
320
515
 
321
516
  template <>
@@ -328,10 +523,18 @@ EIGEN_STRONG_INLINE Packet4ui pcast<Packet16uc, Packet4ui>(const Packet16uc& a)
328
523
  return vmovl_u16(vget_low_u16(vmovl_u8(vget_low_u8(a))));
329
524
  }
330
525
  template <>
526
+ EIGEN_STRONG_INLINE Packet4ui pcast<Packet8uc, Packet4ui>(const Packet8uc& a) {
527
+ return vmovl_u16(vget_low_u16(vmovl_u8(a)));
528
+ }
529
+ template <>
331
530
  EIGEN_STRONG_INLINE Packet2ui pcast<Packet8uc, Packet2ui>(const Packet8uc& a) {
332
531
  // Discard all but first 2 bytes.
333
532
  return vget_low_u32(vmovl_u16(vget_low_u16(vmovl_u8(a))));
334
533
  }
534
+ template <>
535
+ EIGEN_STRONG_INLINE Packet4ui pcast<Packet4uc, Packet4ui>(const Packet4uc& a) {
536
+ return pcast<Packet8uc, Packet4ui>(vreinterpret_u8_u32(vdup_n_u32(a)));
537
+ }
335
538
 
336
539
  template <>
337
540
  struct type_casting_traits<numext::uint8_t, numext::int32_t> {
@@ -339,11 +542,15 @@ struct type_casting_traits<numext::uint8_t, numext::int32_t> {
339
542
  };
340
543
  template <>
341
544
  EIGEN_STRONG_INLINE Packet4i pcast<Packet16uc, Packet4i>(const Packet16uc& a) {
342
- return vreinterpretq_s32_u32(pcast<Packet16uc, Packet4ui>(a));
545
+ return preinterpret<Packet4i>(pcast<Packet16uc, Packet4ui>(a));
343
546
  }
344
547
  template <>
345
548
  EIGEN_STRONG_INLINE Packet2i pcast<Packet8uc, Packet2i>(const Packet8uc& a) {
346
- return vreinterpret_s32_u32(pcast<Packet8uc, Packet2ui>(a));
549
+ return preinterpret<Packet2i>(pcast<Packet8uc, Packet2ui>(a));
550
+ }
551
+ template <>
552
+ EIGEN_STRONG_INLINE Packet4i pcast<Packet4uc, Packet4i>(const Packet4uc& a) {
553
+ return preinterpret<Packet4i>(pcast<Packet4uc, Packet4ui>(a));
347
554
  }
348
555
 
349
556
  template <>
@@ -356,9 +563,12 @@ EIGEN_STRONG_INLINE Packet8us pcast<Packet16uc, Packet8us>(const Packet16uc& a)
356
563
  return vmovl_u8(vget_low_u8(a));
357
564
  }
358
565
  template <>
359
- EIGEN_STRONG_INLINE Packet4us pcast<Packet8uc, Packet4us>(const Packet8uc& a) {
360
- // Discard second half of input.
361
- return vget_low_u16(vmovl_u8(a));
566
+ EIGEN_STRONG_INLINE Packet8us pcast<Packet8uc, Packet8us>(const Packet8uc& a) {
567
+ return vmovl_u8(a);
568
+ }
569
+ template <>
570
+ EIGEN_STRONG_INLINE Packet4us pcast<Packet4uc, Packet4us>(const Packet4uc& a) {
571
+ return vget_low_u16(vmovl_u8(vreinterpret_u8_u32(vdup_n_u32(a))));
362
572
  }
363
573
 
364
574
  template <>
@@ -367,45 +577,15 @@ struct type_casting_traits<numext::uint8_t, numext::int16_t> {
367
577
  };
368
578
  template <>
369
579
  EIGEN_STRONG_INLINE Packet8s pcast<Packet16uc, Packet8s>(const Packet16uc& a) {
370
- return vreinterpretq_s16_u16(pcast<Packet16uc, Packet8us>(a));
371
- }
372
- template <>
373
- EIGEN_STRONG_INLINE Packet4s pcast<Packet8uc, Packet4s>(const Packet8uc& a) {
374
- return vreinterpret_s16_u16(pcast<Packet8uc, Packet4us>(a));
375
- }
376
-
377
- template <>
378
- struct type_casting_traits<numext::uint8_t, numext::uint8_t> {
379
- enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 };
380
- };
381
- template <>
382
- EIGEN_STRONG_INLINE Packet16uc pcast<Packet16uc, Packet16uc>(const Packet16uc& a) {
383
- return a;
384
- }
385
- template <>
386
- EIGEN_STRONG_INLINE Packet8uc pcast<Packet8uc, Packet8uc>(const Packet8uc& a) {
387
- return a;
580
+ return preinterpret<Packet8s>(pcast<Packet16uc, Packet8us>(a));
388
581
  }
389
582
  template <>
390
- EIGEN_STRONG_INLINE Packet4uc pcast<Packet4uc, Packet4uc>(const Packet4uc& a) {
391
- return a;
583
+ EIGEN_STRONG_INLINE Packet8s pcast<Packet8uc, Packet8s>(const Packet8uc& a) {
584
+ return preinterpret<Packet8s>(pcast<Packet8uc, Packet8us>(a));
392
585
  }
393
-
394
- template <>
395
- struct type_casting_traits<numext::uint8_t, numext::int8_t> {
396
- enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 };
397
- };
398
586
  template <>
399
- EIGEN_STRONG_INLINE Packet16c pcast<Packet16uc, Packet16c>(const Packet16uc& a) {
400
- return vreinterpretq_s8_u8(a);
401
- }
402
- template <>
403
- EIGEN_STRONG_INLINE Packet8c pcast<Packet8uc, Packet8c>(const Packet8uc& a) {
404
- return vreinterpret_s8_u8(a);
405
- }
406
- template <>
407
- EIGEN_STRONG_INLINE Packet4c pcast<Packet4uc, Packet4c>(const Packet4uc& a) {
408
- return static_cast<Packet4c>(a);
587
+ EIGEN_STRONG_INLINE Packet4s pcast<Packet4uc, Packet4s>(const Packet4uc& a) {
588
+ return preinterpret<Packet4s>(pcast<Packet4uc, Packet4us>(a));
409
589
  }
410
590
 
411
591
  //==============================================================================
@@ -421,6 +601,10 @@ EIGEN_STRONG_INLINE Packet4f pcast<Packet8s, Packet4f>(const Packet8s& a) {
421
601
  return vcvtq_f32_s32(vmovl_s16(vget_low_s16(a)));
422
602
  }
423
603
  template <>
604
+ EIGEN_STRONG_INLINE Packet4f pcast<Packet4s, Packet4f>(const Packet4s& a) {
605
+ return vcvtq_f32_s32(vmovl_s16(a));
606
+ }
607
+ template <>
424
608
  EIGEN_STRONG_INLINE Packet2f pcast<Packet4s, Packet2f>(const Packet4s& a) {
425
609
  // Discard second half of input.
426
610
  return vcvt_f32_s32(vget_low_s32(vmovl_s16(a)));
@@ -442,7 +626,7 @@ struct type_casting_traits<numext::int16_t, numext::uint64_t> {
442
626
  };
443
627
  template <>
444
628
  EIGEN_STRONG_INLINE Packet2ul pcast<Packet8s, Packet2ul>(const Packet8s& a) {
445
- return vreinterpretq_u64_s64(pcast<Packet8s, Packet2l>(a));
629
+ return preinterpret<Packet2ul>(pcast<Packet8s, Packet2l>(a));
446
630
  }
447
631
 
448
632
  template <>
@@ -455,6 +639,10 @@ EIGEN_STRONG_INLINE Packet4i pcast<Packet8s, Packet4i>(const Packet8s& a) {
455
639
  return vmovl_s16(vget_low_s16(a));
456
640
  }
457
641
  template <>
642
+ EIGEN_STRONG_INLINE Packet4i pcast<Packet4s, Packet4i>(const Packet4s& a) {
643
+ return vmovl_s16(a);
644
+ }
645
+ template <>
458
646
  EIGEN_STRONG_INLINE Packet2i pcast<Packet4s, Packet2i>(const Packet4s& a) {
459
647
  // Discard second half of input.
460
648
  return vget_low_s32(vmovl_s16(a));
@@ -466,37 +654,15 @@ struct type_casting_traits<numext::int16_t, numext::uint32_t> {
466
654
  };
467
655
  template <>
468
656
  EIGEN_STRONG_INLINE Packet4ui pcast<Packet8s, Packet4ui>(const Packet8s& a) {
469
- return vreinterpretq_u32_s32(pcast<Packet8s, Packet4i>(a));
657
+ return preinterpret<Packet4ui>(pcast<Packet8s, Packet4i>(a));
470
658
  }
471
659
  template <>
472
- EIGEN_STRONG_INLINE Packet2ui pcast<Packet4s, Packet2ui>(const Packet4s& a) {
473
- return vreinterpret_u32_s32(pcast<Packet4s, Packet2i>(a));
660
+ EIGEN_STRONG_INLINE Packet4ui pcast<Packet4s, Packet4ui>(const Packet4s& a) {
661
+ return preinterpret<Packet4ui>(pcast<Packet4s, Packet4i>(a));
474
662
  }
475
-
476
663
  template <>
477
- struct type_casting_traits<numext::int16_t, numext::int16_t> {
478
- enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 };
479
- };
480
- template <>
481
- EIGEN_STRONG_INLINE Packet8s pcast<Packet8s, Packet8s>(const Packet8s& a) {
482
- return a;
483
- }
484
- template <>
485
- EIGEN_STRONG_INLINE Packet4s pcast<Packet4s, Packet4s>(const Packet4s& a) {
486
- return a;
487
- }
488
-
489
- template <>
490
- struct type_casting_traits<numext::int16_t, numext::uint16_t> {
491
- enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 };
492
- };
493
- template <>
494
- EIGEN_STRONG_INLINE Packet8us pcast<Packet8s, Packet8us>(const Packet8s& a) {
495
- return vreinterpretq_u16_s16(a);
496
- }
497
- template <>
498
- EIGEN_STRONG_INLINE Packet4us pcast<Packet4s, Packet4us>(const Packet4s& a) {
499
- return vreinterpret_u16_s16(a);
664
+ EIGEN_STRONG_INLINE Packet2ui pcast<Packet4s, Packet2ui>(const Packet4s& a) {
665
+ return preinterpret<Packet2ui>(pcast<Packet4s, Packet2i>(a));
500
666
  }
501
667
 
502
668
  template <>
@@ -508,9 +674,18 @@ EIGEN_STRONG_INLINE Packet16c pcast<Packet8s, Packet16c>(const Packet8s& a, cons
508
674
  return vcombine_s8(vmovn_s16(a), vmovn_s16(b));
509
675
  }
510
676
  template <>
677
+ EIGEN_STRONG_INLINE Packet8c pcast<Packet8s, Packet8c>(const Packet8s& a) {
678
+ return vmovn_s16(a);
679
+ }
680
+ template <>
511
681
  EIGEN_STRONG_INLINE Packet8c pcast<Packet4s, Packet8c>(const Packet4s& a, const Packet4s& b) {
512
682
  return vmovn_s16(vcombine_s16(a, b));
513
683
  }
684
+ template <>
685
+ EIGEN_STRONG_INLINE Packet4c pcast<Packet4s, Packet4c>(const Packet4s& a) {
686
+ const int8x8_t aa_s8x8 = pcast<Packet4s, Packet8c>(a, a);
687
+ return vget_lane_s32(vreinterpret_s32_s8(aa_s8x8), 0);
688
+ }
514
689
 
515
690
  template <>
516
691
  struct type_casting_traits<numext::int16_t, numext::uint8_t> {
@@ -518,11 +693,19 @@ struct type_casting_traits<numext::int16_t, numext::uint8_t> {
518
693
  };
519
694
  template <>
520
695
  EIGEN_STRONG_INLINE Packet16uc pcast<Packet8s, Packet16uc>(const Packet8s& a, const Packet8s& b) {
521
- return vcombine_u8(vmovn_u16(vreinterpretq_u16_s16(a)), vmovn_u16(vreinterpretq_u16_s16(b)));
696
+ return preinterpret<Packet16uc>(pcast<Packet8s, Packet16c>(a, b));
697
+ }
698
+ template <>
699
+ EIGEN_STRONG_INLINE Packet8uc pcast<Packet8s, Packet8uc>(const Packet8s& a) {
700
+ return preinterpret<Packet8uc>(pcast<Packet8s, Packet8c>(a));
522
701
  }
523
702
  template <>
524
703
  EIGEN_STRONG_INLINE Packet8uc pcast<Packet4s, Packet8uc>(const Packet4s& a, const Packet4s& b) {
525
- return vmovn_u16(vcombine_u16(vreinterpret_u16_s16(a), vreinterpret_u16_s16(b)));
704
+ return preinterpret<Packet8uc>(pcast<Packet4s, Packet8c>(a, b));
705
+ }
706
+ template <>
707
+ EIGEN_STRONG_INLINE Packet4uc pcast<Packet4s, Packet4uc>(const Packet4s& a) {
708
+ return static_cast<Packet4uc>(pcast<Packet4s, Packet4c>(a));
526
709
  }
527
710
 
528
711
  //==============================================================================
@@ -538,6 +721,10 @@ EIGEN_STRONG_INLINE Packet4f pcast<Packet8us, Packet4f>(const Packet8us& a) {
538
721
  return vcvtq_f32_u32(vmovl_u16(vget_low_u16(a)));
539
722
  }
540
723
  template <>
724
+ EIGEN_STRONG_INLINE Packet4f pcast<Packet4us, Packet4f>(const Packet4us& a) {
725
+ return vcvtq_f32_u32(vmovl_u16(a));
726
+ }
727
+ template <>
541
728
  EIGEN_STRONG_INLINE Packet2f pcast<Packet4us, Packet2f>(const Packet4us& a) {
542
729
  // Discard second half of input.
543
730
  return vcvt_f32_u32(vget_low_u32(vmovl_u16(a)));
@@ -559,7 +746,7 @@ struct type_casting_traits<numext::uint16_t, numext::int64_t> {
559
746
  };
560
747
  template <>
561
748
  EIGEN_STRONG_INLINE Packet2l pcast<Packet8us, Packet2l>(const Packet8us& a) {
562
- return vreinterpretq_s64_u64(pcast<Packet8us, Packet2ul>(a));
749
+ return preinterpret<Packet2l>(pcast<Packet8us, Packet2ul>(a));
563
750
  }
564
751
 
565
752
  template <>
@@ -572,6 +759,10 @@ EIGEN_STRONG_INLINE Packet4ui pcast<Packet8us, Packet4ui>(const Packet8us& a) {
572
759
  return vmovl_u16(vget_low_u16(a));
573
760
  }
574
761
  template <>
762
+ EIGEN_STRONG_INLINE Packet4ui pcast<Packet4us, Packet4ui>(const Packet4us& a) {
763
+ return vmovl_u16(a);
764
+ }
765
+ template <>
575
766
  EIGEN_STRONG_INLINE Packet2ui pcast<Packet4us, Packet2ui>(const Packet4us& a) {
576
767
  // Discard second half of input.
577
768
  return vget_low_u32(vmovl_u16(a));
@@ -583,37 +774,15 @@ struct type_casting_traits<numext::uint16_t, numext::int32_t> {
583
774
  };
584
775
  template <>
585
776
  EIGEN_STRONG_INLINE Packet4i pcast<Packet8us, Packet4i>(const Packet8us& a) {
586
- return vreinterpretq_s32_u32(pcast<Packet8us, Packet4ui>(a));
587
- }
588
- template <>
589
- EIGEN_STRONG_INLINE Packet2i pcast<Packet4us, Packet2i>(const Packet4us& a) {
590
- return vreinterpret_s32_u32(pcast<Packet4us, Packet2ui>(a));
591
- }
592
-
593
- template <>
594
- struct type_casting_traits<numext::uint16_t, numext::uint16_t> {
595
- enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 };
596
- };
597
- template <>
598
- EIGEN_STRONG_INLINE Packet8us pcast<Packet8us, Packet8us>(const Packet8us& a) {
599
- return a;
777
+ return preinterpret<Packet4i>(pcast<Packet8us, Packet4ui>(a));
600
778
  }
601
779
  template <>
602
- EIGEN_STRONG_INLINE Packet4us pcast<Packet4us, Packet4us>(const Packet4us& a) {
603
- return a;
780
+ EIGEN_STRONG_INLINE Packet4i pcast<Packet4us, Packet4i>(const Packet4us& a) {
781
+ return preinterpret<Packet4i>(pcast<Packet4us, Packet4ui>(a));
604
782
  }
605
-
606
783
  template <>
607
- struct type_casting_traits<numext::uint16_t, numext::int16_t> {
608
- enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 };
609
- };
610
- template <>
611
- EIGEN_STRONG_INLINE Packet8s pcast<Packet8us, Packet8s>(const Packet8us& a) {
612
- return vreinterpretq_s16_u16(a);
613
- }
614
- template <>
615
- EIGEN_STRONG_INLINE Packet4s pcast<Packet4us, Packet4s>(const Packet4us& a) {
616
- return vreinterpret_s16_u16(a);
784
+ EIGEN_STRONG_INLINE Packet2i pcast<Packet4us, Packet2i>(const Packet4us& a) {
785
+ return preinterpret<Packet2i>(pcast<Packet4us, Packet2ui>(a));
617
786
  }
618
787
 
619
788
  template <>
@@ -625,9 +794,18 @@ EIGEN_STRONG_INLINE Packet16uc pcast<Packet8us, Packet16uc>(const Packet8us& a,
625
794
  return vcombine_u8(vmovn_u16(a), vmovn_u16(b));
626
795
  }
627
796
  template <>
797
+ EIGEN_STRONG_INLINE Packet8uc pcast<Packet8us, Packet8uc>(const Packet8us& a) {
798
+ return vmovn_u16(a);
799
+ }
800
+ template <>
628
801
  EIGEN_STRONG_INLINE Packet8uc pcast<Packet4us, Packet8uc>(const Packet4us& a, const Packet4us& b) {
629
802
  return vmovn_u16(vcombine_u16(a, b));
630
803
  }
804
+ template <>
805
+ EIGEN_STRONG_INLINE Packet4uc pcast<Packet4us, Packet4uc>(const Packet4us& a) {
806
+ uint8x8_t aa_u8x8 = pcast<Packet4us, Packet8uc>(a, a);
807
+ return vget_lane_u32(vreinterpret_u32_u8(aa_u8x8), 0);
808
+ }
631
809
 
632
810
  template <>
633
811
  struct type_casting_traits<numext::uint16_t, numext::int8_t> {
@@ -635,11 +813,19 @@ struct type_casting_traits<numext::uint16_t, numext::int8_t> {
635
813
  };
636
814
  template <>
637
815
  EIGEN_STRONG_INLINE Packet16c pcast<Packet8us, Packet16c>(const Packet8us& a, const Packet8us& b) {
638
- return vreinterpretq_s8_u8(pcast<Packet8us, Packet16uc>(a, b));
816
+ return preinterpret<Packet16c>(pcast<Packet8us, Packet16uc>(a, b));
817
+ }
818
+ template <>
819
+ EIGEN_STRONG_INLINE Packet8c pcast<Packet8us, Packet8c>(const Packet8us& a) {
820
+ return preinterpret<Packet8c>(pcast<Packet8us, Packet8uc>(a));
639
821
  }
640
822
  template <>
641
823
  EIGEN_STRONG_INLINE Packet8c pcast<Packet4us, Packet8c>(const Packet4us& a, const Packet4us& b) {
642
- return vreinterpret_s8_u8(pcast<Packet4us, Packet8uc>(a, b));
824
+ return preinterpret<Packet8c>(pcast<Packet4us, Packet8uc>(a, b));
825
+ }
826
+ template <>
827
+ EIGEN_STRONG_INLINE Packet4c pcast<Packet4us, Packet4c>(const Packet4us& a) {
828
+ return static_cast<Packet4c>(pcast<Packet4us, Packet4uc>(a));
643
829
  }
644
830
 
645
831
  //==============================================================================
@@ -654,53 +840,35 @@ EIGEN_STRONG_INLINE Packet4f pcast<Packet4i, Packet4f>(const Packet4i& a) {
654
840
  return vcvtq_f32_s32(a);
655
841
  }
656
842
  template <>
657
- EIGEN_STRONG_INLINE Packet2f pcast<Packet2i, Packet2f>(const Packet2i& a) {
658
- return vcvt_f32_s32(a);
659
- }
660
-
661
- template <>
662
- struct type_casting_traits<numext::int32_t, numext::int64_t> {
663
- enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 2 };
664
- };
665
- template <>
666
- EIGEN_STRONG_INLINE Packet2l pcast<Packet4i, Packet2l>(const Packet4i& a) {
667
- // Discard second half of input.
668
- return vmovl_s32(vget_low_s32(a));
669
- }
670
-
671
- template <>
672
- struct type_casting_traits<numext::int32_t, numext::uint64_t> {
673
- enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 2 };
674
- };
675
- template <>
676
- EIGEN_STRONG_INLINE Packet2ul pcast<Packet4i, Packet2ul>(const Packet4i& a) {
677
- return vreinterpretq_u64_s64(pcast<Packet4i, Packet2l>(a));
843
+ EIGEN_STRONG_INLINE Packet2f pcast<Packet2i, Packet2f>(const Packet2i& a) {
844
+ return vcvt_f32_s32(a);
678
845
  }
679
846
 
680
847
  template <>
681
- struct type_casting_traits<numext::int32_t, numext::int32_t> {
682
- enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 };
848
+ struct type_casting_traits<numext::int32_t, numext::int64_t> {
849
+ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 2 };
683
850
  };
684
851
  template <>
685
- EIGEN_STRONG_INLINE Packet4i pcast<Packet4i, Packet4i>(const Packet4i& a) {
686
- return a;
852
+ EIGEN_STRONG_INLINE Packet2l pcast<Packet4i, Packet2l>(const Packet4i& a) {
853
+ // Discard second half of input.
854
+ return vmovl_s32(vget_low_s32(a));
687
855
  }
688
856
  template <>
689
- EIGEN_STRONG_INLINE Packet2i pcast<Packet2i, Packet2i>(const Packet2i& a) {
690
- return a;
857
+ EIGEN_STRONG_INLINE Packet2l pcast<Packet2i, Packet2l>(const Packet2i& a) {
858
+ return vmovl_s32(a);
691
859
  }
692
860
 
693
861
  template <>
694
- struct type_casting_traits<numext::int32_t, numext::uint32_t> {
695
- enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 };
862
+ struct type_casting_traits<numext::int32_t, numext::uint64_t> {
863
+ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 2 };
696
864
  };
697
865
  template <>
698
- EIGEN_STRONG_INLINE Packet4ui pcast<Packet4i, Packet4ui>(const Packet4i& a) {
699
- return vreinterpretq_u32_s32(a);
866
+ EIGEN_STRONG_INLINE Packet2ul pcast<Packet4i, Packet2ul>(const Packet4i& a) {
867
+ return preinterpret<Packet2ul>(pcast<Packet4i, Packet2l>(a));
700
868
  }
701
869
  template <>
702
- EIGEN_STRONG_INLINE Packet2ui pcast<Packet2i, Packet2ui>(const Packet2i& a) {
703
- return vreinterpret_u32_s32(a);
870
+ EIGEN_STRONG_INLINE Packet2ul pcast<Packet2i, Packet2ul>(const Packet2i& a) {
871
+ return preinterpret<Packet2ul>(pcast<Packet2i, Packet2l>(a));
704
872
  }
705
873
 
706
874
  template <>
@@ -712,6 +880,10 @@ EIGEN_STRONG_INLINE Packet8s pcast<Packet4i, Packet8s>(const Packet4i& a, const
712
880
  return vcombine_s16(vmovn_s32(a), vmovn_s32(b));
713
881
  }
714
882
  template <>
883
+ EIGEN_STRONG_INLINE Packet4s pcast<Packet4i, Packet4s>(const Packet4i& a) {
884
+ return vmovn_s32(a);
885
+ }
886
+ template <>
715
887
  EIGEN_STRONG_INLINE Packet4s pcast<Packet2i, Packet4s>(const Packet2i& a, const Packet2i& b) {
716
888
  return vmovn_s32(vcombine_s32(a, b));
717
889
  }
@@ -725,6 +897,10 @@ EIGEN_STRONG_INLINE Packet8us pcast<Packet4i, Packet8us>(const Packet4i& a, cons
725
897
  return vcombine_u16(vmovn_u32(vreinterpretq_u32_s32(a)), vmovn_u32(vreinterpretq_u32_s32(b)));
726
898
  }
727
899
  template <>
900
+ EIGEN_STRONG_INLINE Packet4us pcast<Packet4i, Packet4us>(const Packet4i& a) {
901
+ return vmovn_u32(vreinterpretq_u32_s32(a));
902
+ }
903
+ template <>
728
904
  EIGEN_STRONG_INLINE Packet4us pcast<Packet2i, Packet4us>(const Packet2i& a, const Packet2i& b) {
729
905
  return vmovn_u32(vreinterpretq_u32_s32(vcombine_s32(a, b)));
730
906
  }
@@ -741,12 +917,24 @@ EIGEN_STRONG_INLINE Packet16c pcast<Packet4i, Packet16c>(const Packet4i& a, cons
741
917
  return vcombine_s8(vmovn_s16(ab_s16), vmovn_s16(cd_s16));
742
918
  }
743
919
  template <>
920
+ EIGEN_STRONG_INLINE Packet8c pcast<Packet4i, Packet8c>(const Packet4i& a, const Packet4i& b) {
921
+ const int16x8_t ab_s16 = pcast<Packet4i, Packet8s>(a, b);
922
+ return vmovn_s16(ab_s16);
923
+ }
924
+ template <>
744
925
  EIGEN_STRONG_INLINE Packet8c pcast<Packet2i, Packet8c>(const Packet2i& a, const Packet2i& b, const Packet2i& c,
745
926
  const Packet2i& d) {
746
927
  const int16x4_t ab_s16 = vmovn_s32(vcombine_s32(a, b));
747
928
  const int16x4_t cd_s16 = vmovn_s32(vcombine_s32(c, d));
748
929
  return vmovn_s16(vcombine_s16(ab_s16, cd_s16));
749
930
  }
931
+ template <>
932
+ EIGEN_STRONG_INLINE Packet4c pcast<Packet4i, Packet4c>(const Packet4i& a) {
933
+ const int16x4_t a_s16x4 = vmovn_s32(a);
934
+ const int16x8_t aa_s16x8 = vcombine_s16(a_s16x4, a_s16x4);
935
+ const int8x8_t aa_s8x8 = vmovn_s16(aa_s16x8);
936
+ return vget_lane_s32(vreinterpret_s32_s8(aa_s8x8), 0);
937
+ }
750
938
 
751
939
  template <>
752
940
  struct type_casting_traits<numext::int32_t, numext::uint8_t> {
@@ -755,16 +943,20 @@ struct type_casting_traits<numext::int32_t, numext::uint8_t> {
755
943
  template <>
756
944
  EIGEN_STRONG_INLINE Packet16uc pcast<Packet4i, Packet16uc>(const Packet4i& a, const Packet4i& b, const Packet4i& c,
757
945
  const Packet4i& d) {
758
- const uint16x8_t ab_u16 = pcast<Packet4i, Packet8us>(a, b);
759
- const uint16x8_t cd_u16 = pcast<Packet4i, Packet8us>(c, d);
760
- return vcombine_u8(vmovn_u16(ab_u16), vmovn_u16(cd_u16));
946
+ return preinterpret<Packet16uc>(pcast<Packet4i, Packet16c>(a, b, c, d));
947
+ }
948
+ template <>
949
+ EIGEN_STRONG_INLINE Packet8uc pcast<Packet4i, Packet8uc>(const Packet4i& a, const Packet4i& b) {
950
+ return preinterpret<Packet8uc>(pcast<Packet4i, Packet8c>(a, b));
761
951
  }
762
952
  template <>
763
953
  EIGEN_STRONG_INLINE Packet8uc pcast<Packet2i, Packet8uc>(const Packet2i& a, const Packet2i& b, const Packet2i& c,
764
954
  const Packet2i& d) {
765
- const uint16x4_t ab_u16 = pcast<Packet2i, Packet4us>(a, b);
766
- const uint16x4_t cd_u16 = pcast<Packet2i, Packet4us>(c, d);
767
- return vmovn_u16(vcombine_u16(ab_u16, cd_u16));
955
+ return preinterpret<Packet8uc>(pcast<Packet2i, Packet8c>(a, b, c, d));
956
+ }
957
+ template <>
958
+ EIGEN_STRONG_INLINE Packet4uc pcast<Packet4i, Packet4uc>(const Packet4i& a) {
959
+ return static_cast<Packet4uc>(pcast<Packet4i, Packet4c>(a));
768
960
  }
769
961
 
770
962
  //==============================================================================
@@ -792,6 +984,10 @@ EIGEN_STRONG_INLINE Packet2ul pcast<Packet4ui, Packet2ul>(const Packet4ui& a) {
792
984
  // Discard second half of input.
793
985
  return vmovl_u32(vget_low_u32(a));
794
986
  }
987
+ template <>
988
+ EIGEN_STRONG_INLINE Packet2ul pcast<Packet2ui, Packet2ul>(const Packet2ui& a) {
989
+ return vmovl_u32(a);
990
+ }
795
991
 
796
992
  template <>
797
993
  struct type_casting_traits<numext::uint32_t, numext::int64_t> {
@@ -799,33 +995,11 @@ struct type_casting_traits<numext::uint32_t, numext::int64_t> {
799
995
  };
800
996
  template <>
801
997
  EIGEN_STRONG_INLINE Packet2l pcast<Packet4ui, Packet2l>(const Packet4ui& a) {
802
- return vreinterpretq_s64_u64(pcast<Packet4ui, Packet2ul>(a));
803
- }
804
-
805
- template <>
806
- struct type_casting_traits<numext::uint32_t, numext::uint32_t> {
807
- enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 };
808
- };
809
- template <>
810
- EIGEN_STRONG_INLINE Packet4ui pcast<Packet4ui, Packet4ui>(const Packet4ui& a) {
811
- return a;
812
- }
813
- template <>
814
- EIGEN_STRONG_INLINE Packet2ui pcast<Packet2ui, Packet2ui>(const Packet2ui& a) {
815
- return a;
816
- }
817
-
818
- template <>
819
- struct type_casting_traits<numext::uint32_t, numext::int32_t> {
820
- enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 };
821
- };
822
- template <>
823
- EIGEN_STRONG_INLINE Packet4i pcast<Packet4ui, Packet4i>(const Packet4ui& a) {
824
- return vreinterpretq_s32_u32(a);
998
+ return preinterpret<Packet2l>(pcast<Packet4ui, Packet2ul>(a));
825
999
  }
826
1000
  template <>
827
- EIGEN_STRONG_INLINE Packet2i pcast<Packet2ui, Packet2i>(const Packet2ui& a) {
828
- return vreinterpret_s32_u32(a);
1001
+ EIGEN_STRONG_INLINE Packet2l pcast<Packet2ui, Packet2l>(const Packet2ui& a) {
1002
+ return preinterpret<Packet2l>(pcast<Packet2ui, Packet2ul>(a));
829
1003
  }
830
1004
 
831
1005
  template <>
@@ -840,6 +1014,10 @@ template <>
840
1014
  EIGEN_STRONG_INLINE Packet4us pcast<Packet2ui, Packet4us>(const Packet2ui& a, const Packet2ui& b) {
841
1015
  return vmovn_u32(vcombine_u32(a, b));
842
1016
  }
1017
+ template <>
1018
+ EIGEN_STRONG_INLINE Packet4us pcast<Packet4ui, Packet4us>(const Packet4ui& a) {
1019
+ return vmovn_u32(a);
1020
+ }
843
1021
 
844
1022
  template <>
845
1023
  struct type_casting_traits<numext::uint32_t, numext::int16_t> {
@@ -847,11 +1025,15 @@ struct type_casting_traits<numext::uint32_t, numext::int16_t> {
847
1025
  };
848
1026
  template <>
849
1027
  EIGEN_STRONG_INLINE Packet8s pcast<Packet4ui, Packet8s>(const Packet4ui& a, const Packet4ui& b) {
850
- return vreinterpretq_s16_u16(pcast<Packet4ui, Packet8us>(a, b));
1028
+ return preinterpret<Packet8s>(pcast<Packet4ui, Packet8us>(a, b));
851
1029
  }
852
1030
  template <>
853
1031
  EIGEN_STRONG_INLINE Packet4s pcast<Packet2ui, Packet4s>(const Packet2ui& a, const Packet2ui& b) {
854
- return vreinterpret_s16_u16(pcast<Packet2ui, Packet4us>(a, b));
1032
+ return preinterpret<Packet4s>(pcast<Packet2ui, Packet4us>(a, b));
1033
+ }
1034
+ template <>
1035
+ EIGEN_STRONG_INLINE Packet4s pcast<Packet4ui, Packet4s>(const Packet4ui& a) {
1036
+ return preinterpret<Packet4s>(pcast<Packet4ui, Packet4us>(a));
855
1037
  }
856
1038
 
857
1039
  template <>
@@ -866,12 +1048,24 @@ EIGEN_STRONG_INLINE Packet16uc pcast<Packet4ui, Packet16uc>(const Packet4ui& a,
866
1048
  return vcombine_u8(vmovn_u16(ab_u16), vmovn_u16(cd_u16));
867
1049
  }
868
1050
  template <>
1051
+ EIGEN_STRONG_INLINE Packet8uc pcast<Packet4ui, Packet8uc>(const Packet4ui& a, const Packet4ui& b) {
1052
+ const uint16x8_t ab_u16 = vcombine_u16(vmovn_u32(a), vmovn_u32(b));
1053
+ return vmovn_u16(ab_u16);
1054
+ }
1055
+ template <>
869
1056
  EIGEN_STRONG_INLINE Packet8uc pcast<Packet2ui, Packet8uc>(const Packet2ui& a, const Packet2ui& b, const Packet2ui& c,
870
1057
  const Packet2ui& d) {
871
1058
  const uint16x4_t ab_u16 = vmovn_u32(vcombine_u32(a, b));
872
1059
  const uint16x4_t cd_u16 = vmovn_u32(vcombine_u32(c, d));
873
1060
  return vmovn_u16(vcombine_u16(ab_u16, cd_u16));
874
1061
  }
1062
+ template <>
1063
+ EIGEN_STRONG_INLINE Packet4uc pcast<Packet4ui, Packet4uc>(const Packet4ui& a) {
1064
+ const uint16x4_t a_u16x4 = vmovn_u32(a);
1065
+ const uint16x8_t aa_u16x8 = vcombine_u16(a_u16x4, a_u16x4);
1066
+ const uint8x8_t aa_u8x8 = vmovn_u16(aa_u16x8);
1067
+ return vget_lane_u32(vreinterpret_u32_u8(aa_u8x8), 0);
1068
+ }
875
1069
 
876
1070
  template <>
877
1071
  struct type_casting_traits<numext::uint32_t, numext::int8_t> {
@@ -880,12 +1074,20 @@ struct type_casting_traits<numext::uint32_t, numext::int8_t> {
880
1074
  template <>
881
1075
  EIGEN_STRONG_INLINE Packet16c pcast<Packet4ui, Packet16c>(const Packet4ui& a, const Packet4ui& b, const Packet4ui& c,
882
1076
  const Packet4ui& d) {
883
- return vreinterpretq_s8_u8(pcast<Packet4ui, Packet16uc>(a, b, c, d));
1077
+ return preinterpret<Packet16c>(pcast<Packet4ui, Packet16uc>(a, b, c, d));
1078
+ }
1079
+ template <>
1080
+ EIGEN_STRONG_INLINE Packet8c pcast<Packet4ui, Packet8c>(const Packet4ui& a, const Packet4ui& b) {
1081
+ return preinterpret<Packet8c>(pcast<Packet4ui, Packet8uc>(a, b));
884
1082
  }
885
1083
  template <>
886
1084
  EIGEN_STRONG_INLINE Packet8c pcast<Packet2ui, Packet8c>(const Packet2ui& a, const Packet2ui& b, const Packet2ui& c,
887
1085
  const Packet2ui& d) {
888
- return vreinterpret_s8_u8(pcast<Packet2ui, Packet8uc>(a, b, c, d));
1086
+ return preinterpret<Packet8c>(pcast<Packet2ui, Packet8uc>(a, b, c, d));
1087
+ }
1088
+ template <>
1089
+ EIGEN_STRONG_INLINE Packet4c pcast<Packet4ui, Packet4c>(const Packet4ui& a) {
1090
+ return static_cast<Packet4c>(pcast<Packet4ui, Packet4uc>(a));
889
1091
  }
890
1092
 
891
1093
  //==============================================================================
@@ -895,27 +1097,31 @@ template <>
895
1097
  struct type_casting_traits<numext::int64_t, float> {
896
1098
  enum { VectorizedCast = 1, SrcCoeffRatio = 2, TgtCoeffRatio = 1 };
897
1099
  };
898
- template <>
899
- EIGEN_STRONG_INLINE Packet4f pcast<Packet2l, Packet4f>(const Packet2l& a, const Packet2l& b) {
900
- return vcvtq_f32_s32(vcombine_s32(vmovn_s64(a), vmovn_s64(b)));
901
- }
902
1100
 
903
1101
  template <>
904
- struct type_casting_traits<numext::int64_t, numext::int64_t> {
905
- enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 };
906
- };
907
- template <>
908
- EIGEN_STRONG_INLINE Packet2l pcast<Packet2l, Packet2l>(const Packet2l& a) {
909
- return a;
1102
+ EIGEN_STRONG_INLINE Packet4f pcast<Packet2l, Packet4f>(const Packet2l& a, const Packet2l& b) {
1103
+ #if EIGEN_ARCH_ARM64
1104
+ return vcombine_f32(vcvt_f32_f64(vcvtq_f64_s64(a)), vcvt_f32_f64(vcvtq_f64_s64(b)));
1105
+ #else
1106
+ EIGEN_ALIGN_MAX int64_t lvals[4];
1107
+ pstore(lvals, a);
1108
+ pstore(lvals + 2, b);
1109
+ EIGEN_ALIGN_MAX float fvals[4] = {static_cast<float>(lvals[0]), static_cast<float>(lvals[1]),
1110
+ static_cast<float>(lvals[2]), static_cast<float>(lvals[3])};
1111
+ return pload<Packet4f>(fvals);
1112
+ #endif
910
1113
  }
911
1114
 
912
1115
  template <>
913
- struct type_casting_traits<numext::int64_t, numext::uint64_t> {
914
- enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 };
915
- };
916
- template <>
917
- EIGEN_STRONG_INLINE Packet2ul pcast<Packet2l, Packet2ul>(const Packet2l& a) {
918
- return vreinterpretq_u64_s64(a);
1116
+ EIGEN_STRONG_INLINE Packet2f pcast<Packet2l, Packet2f>(const Packet2l& a) {
1117
+ #if EIGEN_ARCH_ARM64
1118
+ return vcvt_f32_f64(vcvtq_f64_s64(a));
1119
+ #else
1120
+ EIGEN_ALIGN_MAX int64_t lvals[2];
1121
+ pstore(lvals, a);
1122
+ EIGEN_ALIGN_MAX float fvals[2] = {static_cast<float>(lvals[0]), static_cast<float>(lvals[1])};
1123
+ return pload<Packet2f>(fvals);
1124
+ #endif
919
1125
  }
920
1126
 
921
1127
  template <>
@@ -926,6 +1132,10 @@ template <>
926
1132
  EIGEN_STRONG_INLINE Packet4i pcast<Packet2l, Packet4i>(const Packet2l& a, const Packet2l& b) {
927
1133
  return vcombine_s32(vmovn_s64(a), vmovn_s64(b));
928
1134
  }
1135
+ template <>
1136
+ EIGEN_STRONG_INLINE Packet2i pcast<Packet2l, Packet2i>(const Packet2l& a) {
1137
+ return vmovn_s64(a);
1138
+ }
929
1139
 
930
1140
  template <>
931
1141
  struct type_casting_traits<numext::int64_t, numext::uint32_t> {
@@ -935,6 +1145,10 @@ template <>
935
1145
  EIGEN_STRONG_INLINE Packet4ui pcast<Packet2l, Packet4ui>(const Packet2l& a, const Packet2l& b) {
936
1146
  return vcombine_u32(vmovn_u64(vreinterpretq_u64_s64(a)), vmovn_u64(vreinterpretq_u64_s64(b)));
937
1147
  }
1148
+ template <>
1149
+ EIGEN_STRONG_INLINE Packet2ui pcast<Packet2l, Packet2ui>(const Packet2l& a) {
1150
+ return vmovn_u64(vreinterpretq_u64_s64(a));
1151
+ }
938
1152
 
939
1153
  template <>
940
1154
  struct type_casting_traits<numext::int64_t, numext::int16_t> {
@@ -947,6 +1161,11 @@ EIGEN_STRONG_INLINE Packet8s pcast<Packet2l, Packet8s>(const Packet2l& a, const
947
1161
  const int32x4_t cd_s32 = pcast<Packet2l, Packet4i>(c, d);
948
1162
  return vcombine_s16(vmovn_s32(ab_s32), vmovn_s32(cd_s32));
949
1163
  }
1164
+ template <>
1165
+ EIGEN_STRONG_INLINE Packet4s pcast<Packet2l, Packet4s>(const Packet2l& a, const Packet2l& b) {
1166
+ const int32x4_t ab_s32 = pcast<Packet2l, Packet4i>(a, b);
1167
+ return vmovn_s32(ab_s32);
1168
+ }
950
1169
 
951
1170
  template <>
952
1171
  struct type_casting_traits<numext::int64_t, numext::uint16_t> {
@@ -955,9 +1174,11 @@ struct type_casting_traits<numext::int64_t, numext::uint16_t> {
955
1174
  template <>
956
1175
  EIGEN_STRONG_INLINE Packet8us pcast<Packet2l, Packet8us>(const Packet2l& a, const Packet2l& b, const Packet2l& c,
957
1176
  const Packet2l& d) {
958
- const uint32x4_t ab_u32 = pcast<Packet2l, Packet4ui>(a, b);
959
- const uint32x4_t cd_u32 = pcast<Packet2l, Packet4ui>(c, d);
960
- return vcombine_u16(vmovn_u32(ab_u32), vmovn_u32(cd_u32));
1177
+ return preinterpret<Packet8us>(pcast<Packet2l, Packet8s>(a, b, c, d));
1178
+ }
1179
+ template <>
1180
+ EIGEN_STRONG_INLINE Packet4us pcast<Packet2l, Packet4us>(const Packet2l& a, const Packet2l& b) {
1181
+ return preinterpret<Packet4us>(pcast<Packet2l, Packet4s>(a, b));
961
1182
  }
962
1183
 
963
1184
  template <>
@@ -972,6 +1193,19 @@ EIGEN_STRONG_INLINE Packet16c pcast<Packet2l, Packet16c>(const Packet2l& a, cons
972
1193
  const int16x8_t efgh_s16 = pcast<Packet2l, Packet8s>(e, f, g, h);
973
1194
  return vcombine_s8(vmovn_s16(abcd_s16), vmovn_s16(efgh_s16));
974
1195
  }
1196
+ template <>
1197
+ EIGEN_STRONG_INLINE Packet8c pcast<Packet2l, Packet8c>(const Packet2l& a, const Packet2l& b, const Packet2l& c,
1198
+ const Packet2l& d) {
1199
+ const int16x8_t abcd_s16 = pcast<Packet2l, Packet8s>(a, b, c, d);
1200
+ return vmovn_s16(abcd_s16);
1201
+ }
1202
+ template <>
1203
+ EIGEN_STRONG_INLINE Packet4c pcast<Packet2l, Packet4c>(const Packet2l& a, const Packet2l& b) {
1204
+ const int16x4_t ab_s16 = pcast<Packet2l, Packet4s>(a, b);
1205
+ const int16x8_t abab_s16 = vcombine_s16(ab_s16, ab_s16);
1206
+ const int8x8_t abab_s8 = vmovn_s16(abab_s16);
1207
+ return vget_lane_s32(vreinterpret_s32_s8(abab_s8), 0);
1208
+ }
975
1209
 
976
1210
  template <>
977
1211
  struct type_casting_traits<numext::int64_t, numext::uint8_t> {
@@ -985,6 +1219,15 @@ EIGEN_STRONG_INLINE Packet16uc pcast<Packet2l, Packet16uc>(const Packet2l& a, co
985
1219
  const uint16x8_t efgh_u16 = pcast<Packet2l, Packet8us>(e, f, g, h);
986
1220
  return vcombine_u8(vmovn_u16(abcd_u16), vmovn_u16(efgh_u16));
987
1221
  }
1222
+ template <>
1223
+ EIGEN_STRONG_INLINE Packet8uc pcast<Packet2l, Packet8uc>(const Packet2l& a, const Packet2l& b, const Packet2l& c,
1224
+ const Packet2l& d) {
1225
+ return preinterpret<Packet8uc>(pcast<Packet2l, Packet8c>(a, b, c, d));
1226
+ }
1227
+ template <>
1228
+ EIGEN_STRONG_INLINE Packet4uc pcast<Packet2l, Packet4uc>(const Packet2l& a, const Packet2l& b) {
1229
+ return static_cast<Packet4uc>(pcast<Packet2l, Packet4c>(a, b));
1230
+ }
988
1231
 
989
1232
  //==============================================================================
990
1233
  // pcast, SrcType = uint64_t
@@ -995,25 +1238,27 @@ struct type_casting_traits<numext::uint64_t, float> {
995
1238
  };
996
1239
  template <>
997
1240
  EIGEN_STRONG_INLINE Packet4f pcast<Packet2ul, Packet4f>(const Packet2ul& a, const Packet2ul& b) {
998
- return vcvtq_f32_u32(vcombine_u32(vmovn_u64(a), vmovn_u64(b)));
999
- }
1000
-
1001
- template <>
1002
- struct type_casting_traits<numext::uint64_t, numext::uint64_t> {
1003
- enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 };
1004
- };
1005
- template <>
1006
- EIGEN_STRONG_INLINE Packet2ul pcast<Packet2ul, Packet2ul>(const Packet2ul& a) {
1007
- return a;
1241
+ #if EIGEN_ARCH_ARM64
1242
+ return vcombine_f32(vcvt_f32_f64(vcvtq_f64_u64(a)), vcvt_f32_f64(vcvtq_f64_u64(b)));
1243
+ #else
1244
+ EIGEN_ALIGN_MAX uint64_t uvals[4];
1245
+ pstore(uvals, a);
1246
+ pstore(uvals + 2, b);
1247
+ EIGEN_ALIGN_MAX float fvals[4] = {static_cast<float>(uvals[0]), static_cast<float>(uvals[1]),
1248
+ static_cast<float>(uvals[2]), static_cast<float>(uvals[3])};
1249
+ return pload<Packet4f>(fvals);
1250
+ #endif
1008
1251
  }
1009
-
1010
1252
  template <>
1011
- struct type_casting_traits<numext::uint64_t, numext::int64_t> {
1012
- enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 };
1013
- };
1014
- template <>
1015
- EIGEN_STRONG_INLINE Packet2l pcast<Packet2ul, Packet2l>(const Packet2ul& a) {
1016
- return vreinterpretq_s64_u64(a);
1253
+ EIGEN_STRONG_INLINE Packet2f pcast<Packet2ul, Packet2f>(const Packet2ul& a) {
1254
+ #if EIGEN_ARCH_ARM64
1255
+ return vcvt_f32_f64(vcvtq_f64_u64(a));
1256
+ #else
1257
+ EIGEN_ALIGN_MAX uint64_t uvals[2];
1258
+ pstore(uvals, a);
1259
+ EIGEN_ALIGN_MAX float fvals[2] = {static_cast<float>(uvals[0]), static_cast<float>(uvals[1])};
1260
+ return pload<Packet2f>(fvals);
1261
+ #endif
1017
1262
  }
1018
1263
 
1019
1264
  template <>
@@ -1024,6 +1269,10 @@ template <>
1024
1269
  EIGEN_STRONG_INLINE Packet4ui pcast<Packet2ul, Packet4ui>(const Packet2ul& a, const Packet2ul& b) {
1025
1270
  return vcombine_u32(vmovn_u64(a), vmovn_u64(b));
1026
1271
  }
1272
+ template <>
1273
+ EIGEN_STRONG_INLINE Packet2ui pcast<Packet2ul, Packet2ui>(const Packet2ul& a) {
1274
+ return vmovn_u64(a);
1275
+ }
1027
1276
 
1028
1277
  template <>
1029
1278
  struct type_casting_traits<numext::uint64_t, numext::int32_t> {
@@ -1031,7 +1280,11 @@ struct type_casting_traits<numext::uint64_t, numext::int32_t> {
1031
1280
  };
1032
1281
  template <>
1033
1282
  EIGEN_STRONG_INLINE Packet4i pcast<Packet2ul, Packet4i>(const Packet2ul& a, const Packet2ul& b) {
1034
- return vreinterpretq_s32_u32(pcast<Packet2ul, Packet4ui>(a, b));
1283
+ return preinterpret<Packet4i>(pcast<Packet2ul, Packet4ui>(a, b));
1284
+ }
1285
+ template <>
1286
+ EIGEN_STRONG_INLINE Packet2i pcast<Packet2ul, Packet2i>(const Packet2ul& a) {
1287
+ return preinterpret<Packet2i>(pcast<Packet2ul, Packet2ui>(a));
1035
1288
  }
1036
1289
 
1037
1290
  template <>
@@ -1045,6 +1298,10 @@ EIGEN_STRONG_INLINE Packet8us pcast<Packet2ul, Packet8us>(const Packet2ul& a, co
1045
1298
  const uint16x4_t cd_u16 = vmovn_u32(vcombine_u32(vmovn_u64(c), vmovn_u64(d)));
1046
1299
  return vcombine_u16(ab_u16, cd_u16);
1047
1300
  }
1301
+ template <>
1302
+ EIGEN_STRONG_INLINE Packet4us pcast<Packet2ul, Packet4us>(const Packet2ul& a, const Packet2ul& b) {
1303
+ return vmovn_u32(vcombine_u32(vmovn_u64(a), vmovn_u64(b)));
1304
+ }
1048
1305
 
1049
1306
  template <>
1050
1307
  struct type_casting_traits<numext::uint64_t, numext::int16_t> {
@@ -1053,7 +1310,11 @@ struct type_casting_traits<numext::uint64_t, numext::int16_t> {
1053
1310
  template <>
1054
1311
  EIGEN_STRONG_INLINE Packet8s pcast<Packet2ul, Packet8s>(const Packet2ul& a, const Packet2ul& b, const Packet2ul& c,
1055
1312
  const Packet2ul& d) {
1056
- return vreinterpretq_s16_u16(pcast<Packet2ul, Packet8us>(a, b, c, d));
1313
+ return preinterpret<Packet8s>(pcast<Packet2ul, Packet8us>(a, b, c, d));
1314
+ }
1315
+ template <>
1316
+ EIGEN_STRONG_INLINE Packet4s pcast<Packet2ul, Packet4s>(const Packet2ul& a, const Packet2ul& b) {
1317
+ return preinterpret<Packet4s>(pcast<Packet2ul, Packet4us>(a, b));
1057
1318
  }
1058
1319
 
1059
1320
  template <>
@@ -1068,6 +1329,19 @@ EIGEN_STRONG_INLINE Packet16uc pcast<Packet2ul, Packet16uc>(const Packet2ul& a,
1068
1329
  const uint16x8_t efgh_u16 = pcast<Packet2ul, Packet8us>(e, f, g, h);
1069
1330
  return vcombine_u8(vmovn_u16(abcd_u16), vmovn_u16(efgh_u16));
1070
1331
  }
1332
+ template <>
1333
+ EIGEN_STRONG_INLINE Packet8uc pcast<Packet2ul, Packet8uc>(const Packet2ul& a, const Packet2ul& b, const Packet2ul& c,
1334
+ const Packet2ul& d) {
1335
+ const uint16x8_t abcd_u16 = pcast<Packet2ul, Packet8us>(a, b, c, d);
1336
+ return vmovn_u16(abcd_u16);
1337
+ }
1338
+ template <>
1339
+ EIGEN_STRONG_INLINE Packet4uc pcast<Packet2ul, Packet4uc>(const Packet2ul& a, const Packet2ul& b) {
1340
+ const uint16x4_t ab_u16 = pcast<Packet2ul, Packet4us>(a, b);
1341
+ const uint16x8_t abab_u16 = vcombine_u16(ab_u16, ab_u16);
1342
+ const uint8x8_t abab_u8 = vmovn_u16(abab_u16);
1343
+ return vget_lane_u32(vreinterpret_u32_u8(abab_u8), 0);
1344
+ }
1071
1345
 
1072
1346
  template <>
1073
1347
  struct type_casting_traits<numext::uint64_t, numext::int8_t> {
@@ -1077,129 +1351,47 @@ template <>
1077
1351
  EIGEN_STRONG_INLINE Packet16c pcast<Packet2ul, Packet16c>(const Packet2ul& a, const Packet2ul& b, const Packet2ul& c,
1078
1352
  const Packet2ul& d, const Packet2ul& e, const Packet2ul& f,
1079
1353
  const Packet2ul& g, const Packet2ul& h) {
1080
- return vreinterpretq_s8_u8(pcast<Packet2ul, Packet16uc>(a, b, c, d, e, f, g, h));
1081
- }
1082
-
1083
- //==============================================================================
1084
- // preinterpret
1085
- //==============================================================================
1086
- template <>
1087
- EIGEN_STRONG_INLINE Packet2f preinterpret<Packet2f, Packet2i>(const Packet2i& a) {
1088
- return vreinterpret_f32_s32(a);
1089
- }
1090
- template <>
1091
- EIGEN_STRONG_INLINE Packet2f preinterpret<Packet2f, Packet2ui>(const Packet2ui& a) {
1092
- return vreinterpret_f32_u32(a);
1093
- }
1094
- template <>
1095
- EIGEN_STRONG_INLINE Packet4f preinterpret<Packet4f, Packet4i>(const Packet4i& a) {
1096
- return vreinterpretq_f32_s32(a);
1097
- }
1098
- template <>
1099
- EIGEN_STRONG_INLINE Packet4f preinterpret<Packet4f, Packet4ui>(const Packet4ui& a) {
1100
- return vreinterpretq_f32_u32(a);
1101
- }
1102
-
1103
- template <>
1104
- EIGEN_STRONG_INLINE Packet4c preinterpret<Packet4c, Packet4uc>(const Packet4uc& a) {
1105
- return static_cast<Packet4c>(a);
1106
- }
1107
- template <>
1108
- EIGEN_STRONG_INLINE Packet8c preinterpret<Packet8c, Packet8uc>(const Packet8uc& a) {
1109
- return vreinterpret_s8_u8(a);
1110
- }
1111
- template <>
1112
- EIGEN_STRONG_INLINE Packet16c preinterpret<Packet16c, Packet16uc>(const Packet16uc& a) {
1113
- return vreinterpretq_s8_u8(a);
1114
- }
1115
-
1116
- template <>
1117
- EIGEN_STRONG_INLINE Packet4uc preinterpret<Packet4uc, Packet4c>(const Packet4c& a) {
1118
- return static_cast<Packet4uc>(a);
1119
- }
1120
- template <>
1121
- EIGEN_STRONG_INLINE Packet8uc preinterpret<Packet8uc, Packet8c>(const Packet8c& a) {
1122
- return vreinterpret_u8_s8(a);
1123
- }
1124
- template <>
1125
- EIGEN_STRONG_INLINE Packet16uc preinterpret<Packet16uc, Packet16c>(const Packet16c& a) {
1126
- return vreinterpretq_u8_s8(a);
1354
+ return preinterpret<Packet16c>(pcast<Packet2ul, Packet16uc>(a, b, c, d, e, f, g, h));
1127
1355
  }
1128
-
1129
1356
  template <>
1130
- EIGEN_STRONG_INLINE Packet4s preinterpret<Packet4s, Packet4us>(const Packet4us& a) {
1131
- return vreinterpret_s16_u16(a);
1357
+ EIGEN_STRONG_INLINE Packet8c pcast<Packet2ul, Packet8c>(const Packet2ul& a, const Packet2ul& b, const Packet2ul& c,
1358
+ const Packet2ul& d) {
1359
+ return preinterpret<Packet8c>(pcast<Packet2ul, Packet8uc>(a, b, c, d));
1132
1360
  }
1133
1361
  template <>
1134
- EIGEN_STRONG_INLINE Packet8s preinterpret<Packet8s, Packet8us>(const Packet8us& a) {
1135
- return vreinterpretq_s16_u16(a);
1362
+ EIGEN_STRONG_INLINE Packet4c pcast<Packet2ul, Packet4c>(const Packet2ul& a, const Packet2ul& b) {
1363
+ return static_cast<Packet4c>(pcast<Packet2ul, Packet4uc>(a, b));
1136
1364
  }
1137
1365
 
1138
- template <>
1139
- EIGEN_STRONG_INLINE Packet4us preinterpret<Packet4us, Packet4s>(const Packet4s& a) {
1140
- return vreinterpret_u16_s16(a);
1141
- }
1142
- template <>
1143
- EIGEN_STRONG_INLINE Packet8us preinterpret<Packet8us, Packet8s>(const Packet8s& a) {
1144
- return vreinterpretq_u16_s16(a);
1145
- }
1366
+ #if EIGEN_ARCH_ARM64
1146
1367
 
1147
- template <>
1148
- EIGEN_STRONG_INLINE Packet2i preinterpret<Packet2i, Packet2f>(const Packet2f& a) {
1149
- return vreinterpret_s32_f32(a);
1150
- }
1151
- template <>
1152
- EIGEN_STRONG_INLINE Packet2i preinterpret<Packet2i, Packet2ui>(const Packet2ui& a) {
1153
- return vreinterpret_s32_u32(a);
1154
- }
1155
- template <>
1156
- EIGEN_STRONG_INLINE Packet4i preinterpret<Packet4i, Packet4f>(const Packet4f& a) {
1157
- return vreinterpretq_s32_f32(a);
1158
- }
1159
- template <>
1160
- EIGEN_STRONG_INLINE Packet4i preinterpret<Packet4i, Packet4ui>(const Packet4ui& a) {
1161
- return vreinterpretq_s32_u32(a);
1162
- }
1368
+ //==============================================================================
1369
+ // pcast/preinterpret, Double
1370
+ //==============================================================================
1163
1371
 
1164
1372
  template <>
1165
- EIGEN_STRONG_INLINE Packet2ui preinterpret<Packet2ui, Packet2f>(const Packet2f& a) {
1166
- return vreinterpret_u32_f32(a);
1167
- }
1168
- template <>
1169
- EIGEN_STRONG_INLINE Packet2ui preinterpret<Packet2ui, Packet2i>(const Packet2i& a) {
1170
- return vreinterpret_u32_s32(a);
1373
+ EIGEN_STRONG_INLINE Packet2d preinterpret<Packet2d, Packet2l>(const Packet2l& a) {
1374
+ return Packet2d(vreinterpretq_f64_s64(a));
1171
1375
  }
1172
1376
  template <>
1173
- EIGEN_STRONG_INLINE Packet4ui preinterpret<Packet4ui, Packet4f>(const Packet4f& a) {
1174
- return vreinterpretq_u32_f32(a);
1377
+ EIGEN_STRONG_INLINE Packet2d preinterpret<Packet2d, Packet2ul>(const Packet2ul& a) {
1378
+ return Packet2d(vreinterpretq_f64_u64(a));
1175
1379
  }
1176
1380
  template <>
1177
- EIGEN_STRONG_INLINE Packet4ui preinterpret<Packet4ui, Packet4i>(const Packet4i& a) {
1178
- return vreinterpretq_u32_s32(a);
1381
+ EIGEN_STRONG_INLINE Packet2l preinterpret<Packet2l, Packet2d>(const Packet2d& a) {
1382
+ return Packet2l(vreinterpretq_s64_f64(a));
1179
1383
  }
1180
-
1181
1384
  template <>
1182
- EIGEN_STRONG_INLINE Packet2l preinterpret<Packet2l, Packet2ul>(const Packet2ul& a) {
1183
- return vreinterpretq_s64_u64(a);
1385
+ EIGEN_STRONG_INLINE Packet2ul preinterpret<Packet2ul, Packet2d>(const Packet2d& a) {
1386
+ return Packet2ul(vreinterpretq_u64_f64(a));
1184
1387
  }
1185
1388
  template <>
1186
- EIGEN_STRONG_INLINE Packet2ul preinterpret<Packet2ul, Packet2l>(const Packet2l& a) {
1187
- return vreinterpretq_u64_s64(a);
1389
+ EIGEN_STRONG_INLINE Packet2d preinterpret<Packet2d, Packet4i>(const Packet4i& a) {
1390
+ return Packet2d(vreinterpretq_f64_s32(a));
1188
1391
  }
1189
-
1190
- #if EIGEN_ARCH_ARM64
1191
-
1192
- //==============================================================================
1193
- // pcast/preinterpret, Double
1194
- //==============================================================================
1195
-
1196
1392
  template <>
1197
- struct type_casting_traits<double, double> {
1198
- enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 };
1199
- };
1200
- template <>
1201
- EIGEN_STRONG_INLINE Packet2d pcast<Packet2d, Packet2d>(const Packet2d& a) {
1202
- return a;
1393
+ EIGEN_STRONG_INLINE Packet4i preinterpret<Packet4i, Packet2d>(const Packet2d& a) {
1394
+ return Packet4i(vreinterpretq_s32_f64(a));
1203
1395
  }
1204
1396
 
1205
1397
  template <>
@@ -1210,6 +1402,10 @@ template <>
1210
1402
  EIGEN_STRONG_INLINE Packet4f pcast<Packet2d, Packet4f>(const Packet2d& a, const Packet2d& b) {
1211
1403
  return vcombine_f32(vcvt_f32_f64(a), vcvt_f32_f64(b));
1212
1404
  }
1405
+ template <>
1406
+ EIGEN_STRONG_INLINE Packet2f pcast<Packet2d, Packet2f>(const Packet2d& a) {
1407
+ return vcvt_f32_f64(a);
1408
+ }
1213
1409
 
1214
1410
  template <>
1215
1411
  struct type_casting_traits<double, numext::int64_t> {
@@ -1237,6 +1433,10 @@ template <>
1237
1433
  EIGEN_STRONG_INLINE Packet4i pcast<Packet2d, Packet4i>(const Packet2d& a, const Packet2d& b) {
1238
1434
  return vcombine_s32(vmovn_s64(vcvtq_s64_f64(a)), vmovn_s64(vcvtq_s64_f64(b)));
1239
1435
  }
1436
+ template <>
1437
+ EIGEN_STRONG_INLINE Packet2i pcast<Packet2d, Packet2i>(const Packet2d& a) {
1438
+ return vmovn_s64(vcvtq_s64_f64(a));
1439
+ }
1240
1440
 
1241
1441
  template <>
1242
1442
  struct type_casting_traits<double, numext::uint32_t> {
@@ -1246,6 +1446,10 @@ template <>
1246
1446
  EIGEN_STRONG_INLINE Packet4ui pcast<Packet2d, Packet4ui>(const Packet2d& a, const Packet2d& b) {
1247
1447
  return vcombine_u32(vmovn_u64(vcvtq_u64_f64(a)), vmovn_u64(vcvtq_u64_f64(b)));
1248
1448
  }
1449
+ template <>
1450
+ EIGEN_STRONG_INLINE Packet2ui pcast<Packet2d, Packet2ui>(const Packet2d& a) {
1451
+ return vmovn_u64(vcvtq_u64_f64(a));
1452
+ }
1249
1453
 
1250
1454
  template <>
1251
1455
  struct type_casting_traits<double, numext::int16_t> {
@@ -1258,6 +1462,11 @@ EIGEN_STRONG_INLINE Packet8s pcast<Packet2d, Packet8s>(const Packet2d& a, const
1258
1462
  const int32x4_t cd_s32 = pcast<Packet2d, Packet4i>(c, d);
1259
1463
  return vcombine_s16(vmovn_s32(ab_s32), vmovn_s32(cd_s32));
1260
1464
  }
1465
+ template <>
1466
+ EIGEN_STRONG_INLINE Packet4s pcast<Packet2d, Packet4s>(const Packet2d& a, const Packet2d& b) {
1467
+ const int32x4_t ab_s32 = pcast<Packet2d, Packet4i>(a, b);
1468
+ return vmovn_s32(ab_s32);
1469
+ }
1261
1470
 
1262
1471
  template <>
1263
1472
  struct type_casting_traits<double, numext::uint16_t> {
@@ -1266,9 +1475,11 @@ struct type_casting_traits<double, numext::uint16_t> {
1266
1475
  template <>
1267
1476
  EIGEN_STRONG_INLINE Packet8us pcast<Packet2d, Packet8us>(const Packet2d& a, const Packet2d& b, const Packet2d& c,
1268
1477
  const Packet2d& d) {
1269
- const uint32x4_t ab_u32 = pcast<Packet2d, Packet4ui>(a, b);
1270
- const uint32x4_t cd_u32 = pcast<Packet2d, Packet4ui>(c, d);
1271
- return vcombine_u16(vmovn_u32(ab_u32), vmovn_u32(cd_u32));
1478
+ return preinterpret<Packet8us>(pcast<Packet2d, Packet8s>(a, b, c, d));
1479
+ }
1480
+ template <>
1481
+ EIGEN_STRONG_INLINE Packet4us pcast<Packet2d, Packet4us>(const Packet2d& a, const Packet2d& b) {
1482
+ return preinterpret<Packet4us>(pcast<Packet2d, Packet4s>(a, b));
1272
1483
  }
1273
1484
 
1274
1485
  template <>
@@ -1283,6 +1494,17 @@ EIGEN_STRONG_INLINE Packet16c pcast<Packet2d, Packet16c>(const Packet2d& a, cons
1283
1494
  const int16x8_t efgh_s16 = pcast<Packet2d, Packet8s>(e, f, g, h);
1284
1495
  return vcombine_s8(vmovn_s16(abcd_s16), vmovn_s16(efgh_s16));
1285
1496
  }
1497
+ template <>
1498
+ EIGEN_STRONG_INLINE Packet8c pcast<Packet2d, Packet8c>(const Packet2d& a, const Packet2d& b, const Packet2d& c,
1499
+ const Packet2d& d) {
1500
+ const int16x8_t abcd_s16 = pcast<Packet2d, Packet8s>(a, b, c, d);
1501
+ return vmovn_s16(abcd_s16);
1502
+ }
1503
+ template <>
1504
+ EIGEN_STRONG_INLINE Packet4c pcast<Packet2d, Packet4c>(const Packet2d& a, const Packet2d& b) {
1505
+ const int32x4_t ab_s32 = pcast<Packet2d, Packet4i>(a, b);
1506
+ return pcast<Packet4i, Packet4c>(ab_s32);
1507
+ }
1286
1508
 
1287
1509
  template <>
1288
1510
  struct type_casting_traits<double, numext::uint8_t> {
@@ -1296,6 +1518,15 @@ EIGEN_STRONG_INLINE Packet16uc pcast<Packet2d, Packet16uc>(const Packet2d& a, co
1296
1518
  const uint16x8_t efgh_u16 = pcast<Packet2d, Packet8us>(e, f, g, h);
1297
1519
  return vcombine_u8(vmovn_u16(abcd_u16), vmovn_u16(efgh_u16));
1298
1520
  }
1521
+ template <>
1522
+ EIGEN_STRONG_INLINE Packet8uc pcast<Packet2d, Packet8uc>(const Packet2d& a, const Packet2d& b, const Packet2d& c,
1523
+ const Packet2d& d) {
1524
+ return preinterpret<Packet8uc>(pcast<Packet2d, Packet8c>(a, b, c, d));
1525
+ }
1526
+ template <>
1527
+ EIGEN_STRONG_INLINE Packet4uc pcast<Packet2d, Packet4uc>(const Packet2d& a, const Packet2d& b) {
1528
+ return static_cast<Packet4uc>(pcast<Packet2d, Packet4c>(a, b));
1529
+ }
1299
1530
 
1300
1531
  template <>
1301
1532
  struct type_casting_traits<float, double> {
@@ -1306,6 +1537,10 @@ EIGEN_STRONG_INLINE Packet2d pcast<Packet4f, Packet2d>(const Packet4f& a) {
1306
1537
  // Discard second-half of input.
1307
1538
  return vcvt_f64_f32(vget_low_f32(a));
1308
1539
  }
1540
+ template <>
1541
+ EIGEN_STRONG_INLINE Packet2d pcast<Packet2f, Packet2d>(const Packet2f& a) {
1542
+ return vcvt_f64_f32(a);
1543
+ }
1309
1544
 
1310
1545
  template <>
1311
1546
  struct type_casting_traits<numext::int8_t, double> {
@@ -1314,7 +1549,9 @@ struct type_casting_traits<numext::int8_t, double> {
1314
1549
  template <>
1315
1550
  EIGEN_STRONG_INLINE Packet2d pcast<Packet16c, Packet2d>(const Packet16c& a) {
1316
1551
  // Discard all but first two values.
1317
- return vcvt_f64_f32(pcast<Packet8c, Packet2f>(vget_low_s8(a)));
1552
+ // MSVC defines most intrinsics as macros, so we need to do this in two lines for portability.
1553
+ Packet2f tmp = pcast<Packet8c, Packet2f>(vget_low_s8(a));
1554
+ return vcvt_f64_f32(tmp);
1318
1555
  }
1319
1556
 
1320
1557
  template <>
@@ -1324,7 +1561,8 @@ struct type_casting_traits<numext::uint8_t, double> {
1324
1561
  template <>
1325
1562
  EIGEN_STRONG_INLINE Packet2d pcast<Packet16uc, Packet2d>(const Packet16uc& a) {
1326
1563
  // Discard all but first two values.
1327
- return vcvt_f64_f32(pcast<Packet8uc, Packet2f>(vget_low_u8(a)));
1564
+ Packet2f tmp = pcast<Packet8uc, Packet2f>(vget_low_u8(a));
1565
+ return vcvt_f64_f32(tmp);
1328
1566
  }
1329
1567
 
1330
1568
  template <>
@@ -1334,7 +1572,8 @@ struct type_casting_traits<numext::int16_t, double> {
1334
1572
  template <>
1335
1573
  EIGEN_STRONG_INLINE Packet2d pcast<Packet8s, Packet2d>(const Packet8s& a) {
1336
1574
  // Discard all but first two values.
1337
- return vcvt_f64_f32(pcast<Packet4s, Packet2f>(vget_low_s16(a)));
1575
+ Packet2f tmp = pcast<Packet4s, Packet2f>(vget_low_s16(a));
1576
+ return vcvt_f64_f32(tmp);
1338
1577
  }
1339
1578
 
1340
1579
  template <>
@@ -1344,7 +1583,8 @@ struct type_casting_traits<numext::uint16_t, double> {
1344
1583
  template <>
1345
1584
  EIGEN_STRONG_INLINE Packet2d pcast<Packet8us, Packet2d>(const Packet8us& a) {
1346
1585
  // Discard all but first two values.
1347
- return vcvt_f64_f32(pcast<Packet4us, Packet2f>(vget_low_u16(a)));
1586
+ Packet2f tmp = pcast<Packet4us, Packet2f>(vget_low_u16(a));
1587
+ return vcvt_f64_f32(tmp);
1348
1588
  }
1349
1589
 
1350
1590
  template <>
@@ -1356,6 +1596,10 @@ EIGEN_STRONG_INLINE Packet2d pcast<Packet4i, Packet2d>(const Packet4i& a) {
1356
1596
  // Discard second half of input.
1357
1597
  return vcvtq_f64_s64(vmovl_s32(vget_low_s32(a)));
1358
1598
  }
1599
+ template <>
1600
+ EIGEN_STRONG_INLINE Packet2d pcast<Packet2i, Packet2d>(const Packet2i& a) {
1601
+ return vcvtq_f64_s64(vmovl_s32(a));
1602
+ }
1359
1603
 
1360
1604
  template <>
1361
1605
  struct type_casting_traits<numext::uint32_t, double> {
@@ -1366,6 +1610,10 @@ EIGEN_STRONG_INLINE Packet2d pcast<Packet4ui, Packet2d>(const Packet4ui& a) {
1366
1610
  // Discard second half of input.
1367
1611
  return vcvtq_f64_u64(vmovl_u32(vget_low_u32(a)));
1368
1612
  }
1613
+ template <>
1614
+ EIGEN_STRONG_INLINE Packet2d pcast<Packet2ui, Packet2d>(const Packet2ui& a) {
1615
+ return vcvtq_f64_u64(vmovl_u32(a));
1616
+ }
1369
1617
 
1370
1618
  template <>
1371
1619
  struct type_casting_traits<numext::int64_t, double> {
@@ -1385,31 +1633,6 @@ EIGEN_STRONG_INLINE Packet2d pcast<Packet2ul, Packet2d>(const Packet2ul& a) {
1385
1633
  return vcvtq_f64_u64(a);
1386
1634
  }
1387
1635
 
1388
- template <>
1389
- EIGEN_STRONG_INLINE Packet2d preinterpret<Packet2d, Packet2l>(const Packet2l& a) {
1390
- return vreinterpretq_f64_s64(a);
1391
- }
1392
- template <>
1393
- EIGEN_STRONG_INLINE Packet2d preinterpret<Packet2d, Packet2ul>(const Packet2ul& a) {
1394
- return vreinterpretq_f64_u64(a);
1395
- }
1396
- template <>
1397
- EIGEN_STRONG_INLINE Packet2l preinterpret<Packet2l, Packet2d>(const Packet2d& a) {
1398
- return vreinterpretq_s64_f64(a);
1399
- }
1400
- template <>
1401
- EIGEN_STRONG_INLINE Packet2ul preinterpret<Packet2ul, Packet2d>(const Packet2d& a) {
1402
- return vreinterpretq_u64_f64(a);
1403
- }
1404
- template <>
1405
- EIGEN_STRONG_INLINE Packet2d preinterpret<Packet2d, Packet4i>(const Packet4i& a) {
1406
- return vreinterpretq_f64_s32(a);
1407
- }
1408
- template <>
1409
- EIGEN_STRONG_INLINE Packet4i preinterpret<Packet4i, Packet2d>(const Packet2d& a) {
1410
- return vreinterpretq_s32_f64(a);
1411
- }
1412
-
1413
1636
  #endif // EIGEN_ARCH_ARM64
1414
1637
 
1415
1638
  } // end namespace internal