@smake/eigen 1.0.2 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (287) hide show
  1. package/README.md +1 -1
  2. package/eigen/COPYING.APACHE +203 -0
  3. package/eigen/COPYING.BSD +26 -0
  4. package/eigen/COPYING.GPL +674 -0
  5. package/eigen/COPYING.LGPL +502 -0
  6. package/eigen/COPYING.MINPACK +51 -0
  7. package/eigen/COPYING.MPL2 +373 -0
  8. package/eigen/COPYING.README +18 -0
  9. package/eigen/Eigen/Cholesky +0 -1
  10. package/eigen/Eigen/Core +108 -266
  11. package/eigen/Eigen/Eigenvalues +0 -1
  12. package/eigen/Eigen/Geometry +3 -6
  13. package/eigen/Eigen/Householder +0 -1
  14. package/eigen/Eigen/Jacobi +0 -1
  15. package/eigen/Eigen/KLUSupport +41 -0
  16. package/eigen/Eigen/LU +2 -5
  17. package/eigen/Eigen/OrderingMethods +0 -3
  18. package/eigen/Eigen/PaStiXSupport +1 -0
  19. package/eigen/Eigen/PardisoSupport +0 -0
  20. package/eigen/Eigen/QR +0 -1
  21. package/eigen/Eigen/QtAlignedMalloc +0 -1
  22. package/eigen/Eigen/SVD +0 -1
  23. package/eigen/Eigen/Sparse +0 -2
  24. package/eigen/Eigen/SparseCholesky +0 -8
  25. package/eigen/Eigen/SparseLU +4 -0
  26. package/eigen/Eigen/src/Cholesky/LDLT.h +42 -27
  27. package/eigen/Eigen/src/Cholesky/LLT.h +39 -23
  28. package/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +90 -47
  29. package/eigen/Eigen/src/Core/ArithmeticSequence.h +413 -0
  30. package/eigen/Eigen/src/Core/Array.h +99 -11
  31. package/eigen/Eigen/src/Core/ArrayBase.h +1 -1
  32. package/eigen/Eigen/src/Core/ArrayWrapper.h +21 -21
  33. package/eigen/Eigen/src/Core/Assign.h +1 -1
  34. package/eigen/Eigen/src/Core/AssignEvaluator.h +125 -50
  35. package/eigen/Eigen/src/Core/Assign_MKL.h +10 -10
  36. package/eigen/Eigen/src/Core/BandMatrix.h +16 -16
  37. package/eigen/Eigen/src/Core/Block.h +56 -60
  38. package/eigen/Eigen/src/Core/BooleanRedux.h +29 -31
  39. package/eigen/Eigen/src/Core/CommaInitializer.h +7 -3
  40. package/eigen/Eigen/src/Core/CoreEvaluators.h +325 -272
  41. package/eigen/Eigen/src/Core/CoreIterators.h +5 -0
  42. package/eigen/Eigen/src/Core/CwiseBinaryOp.h +21 -22
  43. package/eigen/Eigen/src/Core/CwiseNullaryOp.h +153 -18
  44. package/eigen/Eigen/src/Core/CwiseUnaryOp.h +6 -6
  45. package/eigen/Eigen/src/Core/CwiseUnaryView.h +12 -10
  46. package/eigen/Eigen/src/Core/DenseBase.h +128 -39
  47. package/eigen/Eigen/src/Core/DenseCoeffsBase.h +25 -21
  48. package/eigen/Eigen/src/Core/DenseStorage.h +150 -68
  49. package/eigen/Eigen/src/Core/Diagonal.h +21 -23
  50. package/eigen/Eigen/src/Core/DiagonalMatrix.h +50 -2
  51. package/eigen/Eigen/src/Core/DiagonalProduct.h +1 -1
  52. package/eigen/Eigen/src/Core/Dot.h +10 -10
  53. package/eigen/Eigen/src/Core/EigenBase.h +10 -9
  54. package/eigen/Eigen/src/Core/ForceAlignedAccess.h +8 -4
  55. package/eigen/Eigen/src/Core/Fuzzy.h +3 -3
  56. package/eigen/Eigen/src/Core/GeneralProduct.h +20 -10
  57. package/eigen/Eigen/src/Core/GenericPacketMath.h +597 -147
  58. package/eigen/Eigen/src/Core/GlobalFunctions.h +40 -33
  59. package/eigen/Eigen/src/Core/IO.h +40 -7
  60. package/eigen/Eigen/src/Core/IndexedView.h +237 -0
  61. package/eigen/Eigen/src/Core/Inverse.h +9 -10
  62. package/eigen/Eigen/src/Core/Map.h +7 -7
  63. package/eigen/Eigen/src/Core/MapBase.h +5 -3
  64. package/eigen/Eigen/src/Core/MathFunctions.h +756 -120
  65. package/eigen/Eigen/src/Core/MathFunctionsImpl.h +118 -19
  66. package/eigen/Eigen/src/Core/Matrix.h +131 -25
  67. package/eigen/Eigen/src/Core/MatrixBase.h +19 -2
  68. package/eigen/Eigen/src/Core/NestByValue.h +25 -50
  69. package/eigen/Eigen/src/Core/NoAlias.h +4 -3
  70. package/eigen/Eigen/src/Core/NumTraits.h +107 -20
  71. package/eigen/Eigen/src/Core/PartialReduxEvaluator.h +232 -0
  72. package/eigen/Eigen/src/Core/PermutationMatrix.h +3 -3
  73. package/eigen/Eigen/src/Core/PlainObjectBase.h +145 -54
  74. package/eigen/Eigen/src/Core/Product.h +30 -25
  75. package/eigen/Eigen/src/Core/ProductEvaluators.h +183 -142
  76. package/eigen/Eigen/src/Core/Random.h +37 -1
  77. package/eigen/Eigen/src/Core/Redux.h +180 -170
  78. package/eigen/Eigen/src/Core/Ref.h +118 -21
  79. package/eigen/Eigen/src/Core/Replicate.h +8 -8
  80. package/eigen/Eigen/src/Core/Reshaped.h +454 -0
  81. package/eigen/Eigen/src/Core/ReturnByValue.h +7 -5
  82. package/eigen/Eigen/src/Core/Reverse.h +18 -12
  83. package/eigen/Eigen/src/Core/Select.h +8 -6
  84. package/eigen/Eigen/src/Core/SelfAdjointView.h +33 -20
  85. package/eigen/Eigen/src/Core/Solve.h +14 -14
  86. package/eigen/Eigen/src/Core/SolveTriangular.h +13 -13
  87. package/eigen/Eigen/src/Core/SolverBase.h +41 -3
  88. package/eigen/Eigen/src/Core/StableNorm.h +100 -70
  89. package/eigen/Eigen/src/Core/StlIterators.h +463 -0
  90. package/eigen/Eigen/src/Core/Stride.h +9 -4
  91. package/eigen/Eigen/src/Core/Swap.h +5 -4
  92. package/eigen/Eigen/src/Core/Transpose.h +86 -27
  93. package/eigen/Eigen/src/Core/Transpositions.h +26 -8
  94. package/eigen/Eigen/src/Core/TriangularMatrix.h +88 -72
  95. package/eigen/Eigen/src/Core/VectorBlock.h +5 -5
  96. package/eigen/Eigen/src/Core/VectorwiseOp.h +159 -70
  97. package/eigen/Eigen/src/Core/Visitor.h +137 -29
  98. package/eigen/Eigen/src/Core/arch/AVX/Complex.h +50 -129
  99. package/eigen/Eigen/src/Core/arch/AVX/MathFunctions.h +126 -337
  100. package/eigen/Eigen/src/Core/arch/AVX/PacketMath.h +1092 -155
  101. package/eigen/Eigen/src/Core/arch/AVX/TypeCasting.h +65 -1
  102. package/eigen/Eigen/src/Core/arch/AVX512/Complex.h +422 -0
  103. package/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h +186 -213
  104. package/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h +1250 -252
  105. package/eigen/Eigen/src/Core/arch/AVX512/TypeCasting.h +89 -0
  106. package/eigen/Eigen/src/Core/arch/AltiVec/Complex.h +152 -165
  107. package/eigen/Eigen/src/Core/arch/AltiVec/MathFunctions.h +19 -251
  108. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProduct.h +2937 -0
  109. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h +221 -0
  110. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h +629 -0
  111. package/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h +2042 -392
  112. package/eigen/Eigen/src/Core/arch/CUDA/Complex.h +235 -80
  113. package/eigen/Eigen/src/Core/arch/Default/BFloat16.h +700 -0
  114. package/eigen/Eigen/src/Core/arch/Default/ConjHelper.h +102 -14
  115. package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +1649 -0
  116. package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +110 -0
  117. package/eigen/Eigen/src/Core/arch/Default/Half.h +942 -0
  118. package/eigen/Eigen/src/Core/arch/Default/Settings.h +1 -1
  119. package/eigen/Eigen/src/Core/arch/Default/TypeCasting.h +120 -0
  120. package/eigen/Eigen/src/Core/arch/{CUDA → GPU}/MathFunctions.h +16 -4
  121. package/eigen/Eigen/src/Core/arch/GPU/PacketMath.h +1685 -0
  122. package/eigen/Eigen/src/Core/arch/GPU/TypeCasting.h +80 -0
  123. package/eigen/Eigen/src/Core/arch/HIP/hcc/math_constants.h +23 -0
  124. package/eigen/Eigen/src/Core/arch/MSA/Complex.h +648 -0
  125. package/eigen/Eigen/src/Core/arch/MSA/MathFunctions.h +387 -0
  126. package/eigen/Eigen/src/Core/arch/MSA/PacketMath.h +1233 -0
  127. package/eigen/Eigen/src/Core/arch/NEON/Complex.h +313 -219
  128. package/eigen/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h +183 -0
  129. package/eigen/Eigen/src/Core/arch/NEON/MathFunctions.h +54 -70
  130. package/eigen/Eigen/src/Core/arch/NEON/PacketMath.h +4376 -549
  131. package/eigen/Eigen/src/Core/arch/NEON/TypeCasting.h +1419 -0
  132. package/eigen/Eigen/src/Core/arch/SSE/Complex.h +59 -179
  133. package/eigen/Eigen/src/Core/arch/SSE/MathFunctions.h +65 -428
  134. package/eigen/Eigen/src/Core/arch/SSE/PacketMath.h +893 -283
  135. package/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h +65 -0
  136. package/eigen/Eigen/src/Core/arch/SVE/MathFunctions.h +44 -0
  137. package/eigen/Eigen/src/Core/arch/SVE/PacketMath.h +752 -0
  138. package/eigen/Eigen/src/Core/arch/SVE/TypeCasting.h +49 -0
  139. package/eigen/Eigen/src/Core/arch/SYCL/InteropHeaders.h +232 -0
  140. package/eigen/Eigen/src/Core/arch/SYCL/MathFunctions.h +301 -0
  141. package/eigen/Eigen/src/Core/arch/SYCL/PacketMath.h +670 -0
  142. package/eigen/Eigen/src/Core/arch/SYCL/SyclMemoryModel.h +694 -0
  143. package/eigen/Eigen/src/Core/arch/SYCL/TypeCasting.h +85 -0
  144. package/eigen/Eigen/src/Core/arch/ZVector/Complex.h +212 -183
  145. package/eigen/Eigen/src/Core/arch/ZVector/MathFunctions.h +101 -5
  146. package/eigen/Eigen/src/Core/arch/ZVector/PacketMath.h +510 -395
  147. package/eigen/Eigen/src/Core/functors/AssignmentFunctors.h +11 -2
  148. package/eigen/Eigen/src/Core/functors/BinaryFunctors.h +112 -46
  149. package/eigen/Eigen/src/Core/functors/NullaryFunctors.h +31 -30
  150. package/eigen/Eigen/src/Core/functors/StlFunctors.h +32 -2
  151. package/eigen/Eigen/src/Core/functors/UnaryFunctors.h +354 -15
  152. package/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h +1073 -585
  153. package/eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h +29 -7
  154. package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +4 -4
  155. package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +1 -1
  156. package/eigen/Eigen/src/Core/products/GeneralMatrixVector.h +382 -483
  157. package/eigen/Eigen/src/Core/products/Parallelizer.h +23 -9
  158. package/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +23 -6
  159. package/eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h +8 -6
  160. package/eigen/Eigen/src/Core/products/SelfadjointProduct.h +2 -2
  161. package/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h +5 -4
  162. package/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h +3 -3
  163. package/eigen/Eigen/src/Core/products/TriangularSolverMatrix.h +5 -3
  164. package/eigen/Eigen/src/Core/products/TriangularSolverVector.h +13 -10
  165. package/eigen/Eigen/src/Core/util/BlasUtil.h +208 -124
  166. package/eigen/Eigen/src/Core/util/ConfigureVectorization.h +512 -0
  167. package/eigen/Eigen/src/Core/util/Constants.h +25 -9
  168. package/eigen/Eigen/src/Core/util/DisableStupidWarnings.h +14 -2
  169. package/eigen/Eigen/src/Core/util/ForwardDeclarations.h +28 -4
  170. package/eigen/Eigen/src/Core/util/IndexedViewHelper.h +186 -0
  171. package/eigen/Eigen/src/Core/util/IntegralConstant.h +272 -0
  172. package/eigen/Eigen/src/Core/util/MKL_support.h +8 -1
  173. package/eigen/Eigen/src/Core/util/Macros.h +661 -250
  174. package/eigen/Eigen/src/Core/util/Memory.h +222 -52
  175. package/eigen/Eigen/src/Core/util/Meta.h +349 -105
  176. package/eigen/Eigen/src/Core/util/ReshapedHelper.h +51 -0
  177. package/eigen/Eigen/src/Core/util/StaticAssert.h +8 -5
  178. package/eigen/Eigen/src/Core/util/SymbolicIndex.h +293 -0
  179. package/eigen/Eigen/src/Core/util/XprHelper.h +48 -30
  180. package/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +1 -1
  181. package/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +1 -1
  182. package/eigen/Eigen/src/Eigenvalues/EigenSolver.h +2 -2
  183. package/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +1 -1
  184. package/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +2 -2
  185. package/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +2 -2
  186. package/eigen/Eigen/src/Eigenvalues/RealQZ.h +9 -6
  187. package/eigen/Eigen/src/Eigenvalues/RealSchur.h +10 -5
  188. package/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +75 -42
  189. package/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +20 -15
  190. package/eigen/Eigen/src/Geometry/AlignedBox.h +99 -5
  191. package/eigen/Eigen/src/Geometry/AngleAxis.h +4 -4
  192. package/eigen/Eigen/src/Geometry/EulerAngles.h +3 -3
  193. package/eigen/Eigen/src/Geometry/Homogeneous.h +15 -11
  194. package/eigen/Eigen/src/Geometry/Hyperplane.h +1 -1
  195. package/eigen/Eigen/src/Geometry/OrthoMethods.h +3 -2
  196. package/eigen/Eigen/src/Geometry/ParametrizedLine.h +39 -2
  197. package/eigen/Eigen/src/Geometry/Quaternion.h +52 -14
  198. package/eigen/Eigen/src/Geometry/Rotation2D.h +3 -3
  199. package/eigen/Eigen/src/Geometry/Scaling.h +22 -4
  200. package/eigen/Eigen/src/Geometry/Transform.h +86 -65
  201. package/eigen/Eigen/src/Geometry/Translation.h +6 -6
  202. package/eigen/Eigen/src/Geometry/arch/Geometry_SIMD.h +168 -0
  203. package/eigen/Eigen/src/Householder/BlockHouseholder.h +9 -2
  204. package/eigen/Eigen/src/Householder/Householder.h +8 -4
  205. package/eigen/Eigen/src/Householder/HouseholderSequence.h +123 -48
  206. package/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +15 -15
  207. package/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +7 -23
  208. package/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +5 -22
  209. package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +41 -47
  210. package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +51 -60
  211. package/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +70 -20
  212. package/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +2 -20
  213. package/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +11 -9
  214. package/eigen/Eigen/src/Jacobi/Jacobi.h +31 -10
  215. package/eigen/Eigen/src/KLUSupport/KLUSupport.h +358 -0
  216. package/eigen/Eigen/src/LU/Determinant.h +35 -19
  217. package/eigen/Eigen/src/LU/FullPivLU.h +29 -43
  218. package/eigen/Eigen/src/LU/InverseImpl.h +25 -8
  219. package/eigen/Eigen/src/LU/PartialPivLU.h +67 -57
  220. package/eigen/Eigen/src/LU/arch/InverseSize4.h +351 -0
  221. package/eigen/Eigen/src/OrderingMethods/Amd.h +7 -17
  222. package/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +297 -277
  223. package/eigen/Eigen/src/OrderingMethods/Ordering.h +6 -10
  224. package/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +1 -1
  225. package/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +10 -9
  226. package/eigen/Eigen/src/QR/ColPivHouseholderQR.h +41 -20
  227. package/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +100 -27
  228. package/eigen/Eigen/src/QR/FullPivHouseholderQR.h +59 -22
  229. package/eigen/Eigen/src/QR/HouseholderQR.h +48 -23
  230. package/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +25 -3
  231. package/eigen/Eigen/src/SVD/BDCSVD.h +137 -48
  232. package/eigen/Eigen/src/SVD/JacobiSVD.h +22 -14
  233. package/eigen/Eigen/src/SVD/SVDBase.h +82 -21
  234. package/eigen/Eigen/src/SVD/UpperBidiagonalization.h +3 -3
  235. package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +16 -8
  236. package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +11 -36
  237. package/eigen/Eigen/src/SparseCore/CompressedStorage.h +16 -0
  238. package/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +6 -6
  239. package/eigen/Eigen/src/SparseCore/SparseAssign.h +81 -27
  240. package/eigen/Eigen/src/SparseCore/SparseBlock.h +25 -57
  241. package/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +40 -11
  242. package/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +11 -15
  243. package/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +2 -2
  244. package/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +30 -8
  245. package/eigen/Eigen/src/SparseCore/SparseMatrix.h +124 -10
  246. package/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +5 -12
  247. package/eigen/Eigen/src/SparseCore/SparseProduct.h +13 -1
  248. package/eigen/Eigen/src/SparseCore/SparseRef.h +7 -7
  249. package/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +3 -0
  250. package/eigen/Eigen/src/SparseCore/SparseUtil.h +8 -0
  251. package/eigen/Eigen/src/SparseCore/SparseVector.h +1 -1
  252. package/eigen/Eigen/src/SparseLU/SparseLU.h +160 -10
  253. package/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +1 -1
  254. package/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +76 -2
  255. package/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +2 -2
  256. package/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +1 -1
  257. package/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +1 -1
  258. package/eigen/Eigen/src/SparseQR/SparseQR.h +19 -6
  259. package/eigen/Eigen/src/StlSupport/StdDeque.h +2 -14
  260. package/eigen/Eigen/src/StlSupport/StdList.h +2 -2
  261. package/eigen/Eigen/src/StlSupport/StdVector.h +2 -2
  262. package/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +6 -8
  263. package/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +175 -39
  264. package/eigen/Eigen/src/misc/lapacke.h +5 -4
  265. package/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +27 -1
  266. package/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +155 -11
  267. package/eigen/Eigen/src/plugins/BlockMethods.h +626 -242
  268. package/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.h +14 -0
  269. package/eigen/Eigen/src/plugins/IndexedViewMethods.h +262 -0
  270. package/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +4 -4
  271. package/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +10 -0
  272. package/eigen/Eigen/src/plugins/ReshapedMethods.h +149 -0
  273. package/eigen/README.md +5 -0
  274. package/lib/LibEigen.d.ts +4 -0
  275. package/lib/LibEigen.js +14 -0
  276. package/lib/index.d.ts +1 -1
  277. package/lib/index.js +7 -3
  278. package/package.json +2 -10
  279. package/eigen/Eigen/CMakeLists.txt +0 -19
  280. package/eigen/Eigen/src/Core/arch/CUDA/Half.h +0 -675
  281. package/eigen/Eigen/src/Core/arch/CUDA/PacketMath.h +0 -333
  282. package/eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +0 -1124
  283. package/eigen/Eigen/src/Core/arch/CUDA/TypeCasting.h +0 -212
  284. package/eigen/Eigen/src/Geometry/arch/Geometry_SSE.h +0 -161
  285. package/eigen/Eigen/src/LU/arch/Inverse_SSE.h +0 -338
  286. package/lib/eigen.d.ts +0 -2
  287. package/lib/eigen.js +0 -15
@@ -15,9 +15,10 @@ namespace Eigen {
15
15
 
16
16
  namespace internal {
17
17
 
18
- inline uint32x4_t p4ui_CONJ_XOR() {
18
+ inline uint32x4_t p4ui_CONJ_XOR()
19
+ {
19
20
  // See bug 1325, clang fails to call vld1q_u64.
20
- #if EIGEN_COMP_CLANG
21
+ #if EIGEN_COMP_CLANG || EIGEN_COMP_CASTXML
21
22
  uint32x4_t ret = { 0x00000000, 0x80000000, 0x00000000, 0x80000000 };
22
23
  return ret;
23
24
  #else
@@ -26,61 +27,136 @@ inline uint32x4_t p4ui_CONJ_XOR() {
26
27
  #endif
27
28
  }
28
29
 
29
- inline uint32x2_t p2ui_CONJ_XOR() {
30
+ inline uint32x2_t p2ui_CONJ_XOR()
31
+ {
30
32
  static const uint32_t conj_XOR_DATA[] = { 0x00000000, 0x80000000 };
31
33
  return vld1_u32( conj_XOR_DATA );
32
34
  }
33
35
 
34
36
  //---------- float ----------
37
+
38
+ struct Packet1cf
39
+ {
40
+ EIGEN_STRONG_INLINE Packet1cf() {}
41
+ EIGEN_STRONG_INLINE explicit Packet1cf(const Packet2f& a) : v(a) {}
42
+ Packet2f v;
43
+ };
35
44
  struct Packet2cf
36
45
  {
37
46
  EIGEN_STRONG_INLINE Packet2cf() {}
38
47
  EIGEN_STRONG_INLINE explicit Packet2cf(const Packet4f& a) : v(a) {}
39
- Packet4f v;
48
+ Packet4f v;
40
49
  };
41
50
 
42
- template<> struct packet_traits<std::complex<float> > : default_packet_traits
51
+ template<> struct packet_traits<std::complex<float> > : default_packet_traits
43
52
  {
44
53
  typedef Packet2cf type;
45
- typedef Packet2cf half;
46
- enum {
54
+ typedef Packet1cf half;
55
+ enum
56
+ {
47
57
  Vectorizable = 1,
48
58
  AlignedOnScalar = 1,
49
59
  size = 2,
50
- HasHalfPacket = 0,
51
-
52
- HasAdd = 1,
53
- HasSub = 1,
54
- HasMul = 1,
55
- HasDiv = 1,
56
- HasNegate = 1,
57
- HasAbs = 0,
58
- HasAbs2 = 0,
59
- HasMin = 0,
60
- HasMax = 0,
60
+ HasHalfPacket = 1,
61
+
62
+ HasAdd = 1,
63
+ HasSub = 1,
64
+ HasMul = 1,
65
+ HasDiv = 1,
66
+ HasNegate = 1,
67
+ HasAbs = 0,
68
+ HasAbs2 = 0,
69
+ HasMin = 0,
70
+ HasMax = 0,
61
71
  HasSetLinear = 0
62
72
  };
63
73
  };
64
74
 
65
- template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2, alignment=Aligned16}; typedef Packet2cf half; };
66
-
67
- template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>& from)
75
+ template<> struct unpacket_traits<Packet1cf>
68
76
  {
69
- float32x2_t r64;
70
- r64 = vld1_f32((const float *)&from);
77
+ typedef std::complex<float> type;
78
+ typedef Packet1cf half;
79
+ typedef Packet2f as_real;
80
+ enum
81
+ {
82
+ size = 1,
83
+ alignment = Aligned16,
84
+ vectorizable = true,
85
+ masked_load_available = false,
86
+ masked_store_available = false
87
+ };
88
+ };
89
+ template<> struct unpacket_traits<Packet2cf>
90
+ {
91
+ typedef std::complex<float> type;
92
+ typedef Packet1cf half;
93
+ typedef Packet4f as_real;
94
+ enum
95
+ {
96
+ size = 2,
97
+ alignment = Aligned16,
98
+ vectorizable = true,
99
+ masked_load_available = false,
100
+ masked_store_available = false
101
+ };
102
+ };
103
+
104
+ template<> EIGEN_STRONG_INLINE Packet1cf pcast<float,Packet1cf>(const float& a)
105
+ { return Packet1cf(vset_lane_f32(a, vdup_n_f32(0.f), 0)); }
106
+ template<> EIGEN_STRONG_INLINE Packet2cf pcast<Packet2f,Packet2cf>(const Packet2f& a)
107
+ { return Packet2cf(vreinterpretq_f32_u64(vmovl_u32(vreinterpret_u32_f32(a)))); }
71
108
 
109
+ template<> EIGEN_STRONG_INLINE Packet1cf pset1<Packet1cf>(const std::complex<float>& from)
110
+ { return Packet1cf(vld1_f32(reinterpret_cast<const float*>(&from))); }
111
+ template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>& from)
112
+ {
113
+ const float32x2_t r64 = vld1_f32(reinterpret_cast<const float*>(&from));
72
114
  return Packet2cf(vcombine_f32(r64, r64));
73
115
  }
74
116
 
75
- template<> EIGEN_STRONG_INLINE Packet2cf padd<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(padd<Packet4f>(a.v,b.v)); }
76
- template<> EIGEN_STRONG_INLINE Packet2cf psub<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(psub<Packet4f>(a.v,b.v)); }
117
+ template<> EIGEN_STRONG_INLINE Packet1cf padd<Packet1cf>(const Packet1cf& a, const Packet1cf& b)
118
+ { return Packet1cf(padd<Packet2f>(a.v, b.v)); }
119
+ template<> EIGEN_STRONG_INLINE Packet2cf padd<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
120
+ { return Packet2cf(padd<Packet4f>(a.v, b.v)); }
121
+
122
+ template<> EIGEN_STRONG_INLINE Packet1cf psub<Packet1cf>(const Packet1cf& a, const Packet1cf& b)
123
+ { return Packet1cf(psub<Packet2f>(a.v, b.v)); }
124
+ template<> EIGEN_STRONG_INLINE Packet2cf psub<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
125
+ { return Packet2cf(psub<Packet4f>(a.v, b.v)); }
126
+
127
+ template<> EIGEN_STRONG_INLINE Packet1cf pnegate(const Packet1cf& a) { return Packet1cf(pnegate<Packet2f>(a.v)); }
77
128
  template<> EIGEN_STRONG_INLINE Packet2cf pnegate(const Packet2cf& a) { return Packet2cf(pnegate<Packet4f>(a.v)); }
129
+
130
+ template<> EIGEN_STRONG_INLINE Packet1cf pconj(const Packet1cf& a)
131
+ {
132
+ const Packet2ui b = vreinterpret_u32_f32(a.v);
133
+ return Packet1cf(vreinterpret_f32_u32(veor_u32(b, p2ui_CONJ_XOR())));
134
+ }
78
135
  template<> EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf& a)
79
136
  {
80
- Packet4ui b = vreinterpretq_u32_f32(a.v);
137
+ const Packet4ui b = vreinterpretq_u32_f32(a.v);
81
138
  return Packet2cf(vreinterpretq_f32_u32(veorq_u32(b, p4ui_CONJ_XOR())));
82
139
  }
83
140
 
141
+ template<> EIGEN_STRONG_INLINE Packet1cf pmul<Packet1cf>(const Packet1cf& a, const Packet1cf& b)
142
+ {
143
+ Packet2f v1, v2;
144
+
145
+ // Get the real values of a | a1_re | a1_re |
146
+ v1 = vdup_lane_f32(a.v, 0);
147
+ // Get the imag values of a | a1_im | a1_im |
148
+ v2 = vdup_lane_f32(a.v, 1);
149
+ // Multiply the real a with b
150
+ v1 = vmul_f32(v1, b.v);
151
+ // Multiply the imag a with b
152
+ v2 = vmul_f32(v2, b.v);
153
+ // Conjugate v2
154
+ v2 = vreinterpret_f32_u32(veor_u32(vreinterpret_u32_f32(v2), p2ui_CONJ_XOR()));
155
+ // Swap real/imag elements in v2.
156
+ v2 = vrev64_f32(v2);
157
+ // Add and return the result
158
+ return Packet1cf(vadd_f32(v1, v2));
159
+ }
84
160
  template<> EIGEN_STRONG_INLINE Packet2cf pmul<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
85
161
  {
86
162
  Packet4f v1, v2;
@@ -93,7 +169,7 @@ template<> EIGEN_STRONG_INLINE Packet2cf pmul<Packet2cf>(const Packet2cf& a, con
93
169
  v1 = vmulq_f32(v1, b.v);
94
170
  // Multiply the imag a with b
95
171
  v2 = vmulq_f32(v2, b.v);
96
- // Conjugate v2
172
+ // Conjugate v2
97
173
  v2 = vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(v2), p4ui_CONJ_XOR()));
98
174
  // Swap real/imag elements in v2.
99
175
  v2 = vrev64q_f32(v2);
@@ -101,98 +177,144 @@ template<> EIGEN_STRONG_INLINE Packet2cf pmul<Packet2cf>(const Packet2cf& a, con
101
177
  return Packet2cf(vaddq_f32(v1, v2));
102
178
  }
103
179
 
104
- template<> EIGEN_STRONG_INLINE Packet2cf pand <Packet2cf>(const Packet2cf& a, const Packet2cf& b)
180
+ template<> EIGEN_STRONG_INLINE Packet1cf pcmp_eq(const Packet1cf& a, const Packet1cf& b)
105
181
  {
106
- return Packet2cf(vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v))));
182
+ // Compare real and imaginary parts of a and b to get the mask vector:
183
+ // [re(a[0])==re(b[0]), im(a[0])==im(b[0])]
184
+ Packet2f eq = pcmp_eq<Packet2f>(a.v, b.v);
185
+ // Swap real/imag elements in the mask in to get:
186
+ // [im(a[0])==im(b[0]), re(a[0])==re(b[0])]
187
+ Packet2f eq_swapped = vrev64_f32(eq);
188
+ // Return re(a)==re(b) && im(a)==im(b) by computing bitwise AND of eq and eq_swapped
189
+ return Packet1cf(pand<Packet2f>(eq, eq_swapped));
107
190
  }
108
- template<> EIGEN_STRONG_INLINE Packet2cf por <Packet2cf>(const Packet2cf& a, const Packet2cf& b)
109
- {
110
- return Packet2cf(vreinterpretq_f32_u32(vorrq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v))));
111
- }
112
- template<> EIGEN_STRONG_INLINE Packet2cf pxor <Packet2cf>(const Packet2cf& a, const Packet2cf& b)
113
- {
114
- return Packet2cf(vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v))));
115
- }
116
- template<> EIGEN_STRONG_INLINE Packet2cf pandnot<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
117
- {
118
- return Packet2cf(vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v))));
191
+ template<> EIGEN_STRONG_INLINE Packet2cf pcmp_eq(const Packet2cf& a, const Packet2cf& b)
192
+ {
193
+ // Compare real and imaginary parts of a and b to get the mask vector:
194
+ // [re(a[0])==re(b[0]), im(a[0])==im(b[0]), re(a[1])==re(b[1]), im(a[1])==im(b[1])]
195
+ Packet4f eq = pcmp_eq<Packet4f>(a.v, b.v);
196
+ // Swap real/imag elements in the mask in to get:
197
+ // [im(a[0])==im(b[0]), re(a[0])==re(b[0]), im(a[1])==im(b[1]), re(a[1])==re(b[1])]
198
+ Packet4f eq_swapped = vrev64q_f32(eq);
199
+ // Return re(a)==re(b) && im(a)==im(b) by computing bitwise AND of eq and eq_swapped
200
+ return Packet2cf(pand<Packet4f>(eq, eq_swapped));
119
201
  }
120
202
 
121
- template<> EIGEN_STRONG_INLINE Packet2cf pload<Packet2cf>(const std::complex<float>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet2cf(pload<Packet4f>((const float*)from)); }
122
- template<> EIGEN_STRONG_INLINE Packet2cf ploadu<Packet2cf>(const std::complex<float>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cf(ploadu<Packet4f>((const float*)from)); }
203
+ template<> EIGEN_STRONG_INLINE Packet1cf pand<Packet1cf>(const Packet1cf& a, const Packet1cf& b)
204
+ { return Packet1cf(vreinterpret_f32_u32(vand_u32(vreinterpret_u32_f32(a.v), vreinterpret_u32_f32(b.v)))); }
205
+ template<> EIGEN_STRONG_INLINE Packet2cf pand<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
206
+ { return Packet2cf(vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(a.v), vreinterpretq_u32_f32(b.v)))); }
123
207
 
124
- template<> EIGEN_STRONG_INLINE Packet2cf ploaddup<Packet2cf>(const std::complex<float>* from) { return pset1<Packet2cf>(*from); }
208
+ template<> EIGEN_STRONG_INLINE Packet1cf por<Packet1cf>(const Packet1cf& a, const Packet1cf& b)
209
+ { return Packet1cf(vreinterpret_f32_u32(vorr_u32(vreinterpret_u32_f32(a.v), vreinterpret_u32_f32(b.v)))); }
210
+ template<> EIGEN_STRONG_INLINE Packet2cf por<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
211
+ { return Packet2cf(vreinterpretq_f32_u32(vorrq_u32(vreinterpretq_u32_f32(a.v), vreinterpretq_u32_f32(b.v)))); }
125
212
 
126
- template<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((float*)to, from.v); }
127
- template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((float*)to, from.v); }
213
+ template<> EIGEN_STRONG_INLINE Packet1cf pxor<Packet1cf>(const Packet1cf& a, const Packet1cf& b)
214
+ { return Packet1cf(vreinterpret_f32_u32(veor_u32(vreinterpret_u32_f32(a.v), vreinterpret_u32_f32(b.v)))); }
215
+ template<> EIGEN_STRONG_INLINE Packet2cf pxor<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
216
+ { return Packet2cf(vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(a.v), vreinterpretq_u32_f32(b.v)))); }
128
217
 
129
- template<> EIGEN_DEVICE_FUNC inline Packet2cf pgather<std::complex<float>, Packet2cf>(const std::complex<float>* from, Index stride)
218
+ template<> EIGEN_STRONG_INLINE Packet1cf pandnot<Packet1cf>(const Packet1cf& a, const Packet1cf& b)
219
+ { return Packet1cf(vreinterpret_f32_u32(vbic_u32(vreinterpret_u32_f32(a.v), vreinterpret_u32_f32(b.v)))); }
220
+ template<> EIGEN_STRONG_INLINE Packet2cf pandnot<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
221
+ { return Packet2cf(vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(a.v), vreinterpretq_u32_f32(b.v)))); }
222
+
223
+ template<> EIGEN_STRONG_INLINE Packet1cf pload<Packet1cf>(const std::complex<float>* from)
224
+ { EIGEN_DEBUG_ALIGNED_LOAD return Packet1cf(pload<Packet2f>((const float*)from)); }
225
+ template<> EIGEN_STRONG_INLINE Packet2cf pload<Packet2cf>(const std::complex<float>* from)
226
+ { EIGEN_DEBUG_ALIGNED_LOAD return Packet2cf(pload<Packet4f>(reinterpret_cast<const float*>(from))); }
227
+
228
+ template<> EIGEN_STRONG_INLINE Packet1cf ploadu<Packet1cf>(const std::complex<float>* from)
229
+ { EIGEN_DEBUG_UNALIGNED_LOAD return Packet1cf(ploadu<Packet2f>((const float*)from)); }
230
+ template<> EIGEN_STRONG_INLINE Packet2cf ploadu<Packet2cf>(const std::complex<float>* from)
231
+ { EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cf(ploadu<Packet4f>(reinterpret_cast<const float*>(from))); }
232
+
233
+ template<> EIGEN_STRONG_INLINE Packet1cf ploaddup<Packet1cf>(const std::complex<float>* from)
234
+ { return pset1<Packet1cf>(*from); }
235
+ template<> EIGEN_STRONG_INLINE Packet2cf ploaddup<Packet2cf>(const std::complex<float>* from)
236
+ { return pset1<Packet2cf>(*from); }
237
+
238
+ template<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float> *to, const Packet1cf& from)
239
+ { EIGEN_DEBUG_ALIGNED_STORE pstore((float*)to, from.v); }
240
+ template<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float> *to, const Packet2cf& from)
241
+ { EIGEN_DEBUG_ALIGNED_STORE pstore(reinterpret_cast<float*>(to), from.v); }
242
+
243
+ template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float> *to, const Packet1cf& from)
244
+ { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((float*)to, from.v); }
245
+ template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float> *to, const Packet2cf& from)
246
+ { EIGEN_DEBUG_UNALIGNED_STORE pstoreu(reinterpret_cast<float*>(to), from.v); }
247
+
248
+ template<> EIGEN_DEVICE_FUNC inline Packet1cf pgather<std::complex<float>, Packet1cf>(
249
+ const std::complex<float>* from, Index stride)
250
+ {
251
+ const Packet2f tmp = vdup_n_f32(std::real(from[0*stride]));
252
+ return Packet1cf(vset_lane_f32(std::imag(from[0*stride]), tmp, 1));
253
+ }
254
+ template<> EIGEN_DEVICE_FUNC inline Packet2cf pgather<std::complex<float>, Packet2cf>(
255
+ const std::complex<float>* from, Index stride)
130
256
  {
131
- Packet4f res = pset1<Packet4f>(0.f);
132
- res = vsetq_lane_f32(std::real(from[0*stride]), res, 0);
257
+ Packet4f res = vdupq_n_f32(std::real(from[0*stride]));
133
258
  res = vsetq_lane_f32(std::imag(from[0*stride]), res, 1);
134
259
  res = vsetq_lane_f32(std::real(from[1*stride]), res, 2);
135
260
  res = vsetq_lane_f32(std::imag(from[1*stride]), res, 3);
136
261
  return Packet2cf(res);
137
262
  }
138
263
 
139
- template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf>(std::complex<float>* to, const Packet2cf& from, Index stride)
264
+ template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet1cf>(
265
+ std::complex<float>* to, const Packet1cf& from, Index stride)
266
+ { to[stride*0] = std::complex<float>(vget_lane_f32(from.v, 0), vget_lane_f32(from.v, 1)); }
267
+ template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf>(
268
+ std::complex<float>* to, const Packet2cf& from, Index stride)
140
269
  {
141
270
  to[stride*0] = std::complex<float>(vgetq_lane_f32(from.v, 0), vgetq_lane_f32(from.v, 1));
142
271
  to[stride*1] = std::complex<float>(vgetq_lane_f32(from.v, 2), vgetq_lane_f32(from.v, 3));
143
272
  }
144
273
 
145
- template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> * addr) { EIGEN_ARM_PREFETCH((const float *)addr); }
274
+ template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> *addr)
275
+ { EIGEN_ARM_PREFETCH(reinterpret_cast<const float*>(addr)); }
146
276
 
147
- template<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet2cf>(const Packet2cf& a)
277
+ template<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet1cf>(const Packet1cf& a)
148
278
  {
149
- std::complex<float> EIGEN_ALIGN16 x[2];
150
- vst1q_f32((float *)x, a.v);
279
+ EIGEN_ALIGN16 std::complex<float> x;
280
+ vst1_f32(reinterpret_cast<float*>(&x), a.v);
281
+ return x;
282
+ }
283
+ template<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet2cf>(const Packet2cf& a)
284
+ {
285
+ EIGEN_ALIGN16 std::complex<float> x[2];
286
+ vst1q_f32(reinterpret_cast<float*>(x), a.v);
151
287
  return x[0];
152
288
  }
153
289
 
290
+ template<> EIGEN_STRONG_INLINE Packet1cf preverse(const Packet1cf& a) { return a; }
154
291
  template<> EIGEN_STRONG_INLINE Packet2cf preverse(const Packet2cf& a)
155
- {
156
- float32x2_t a_lo, a_hi;
157
- Packet4f a_r128;
158
-
159
- a_lo = vget_low_f32(a.v);
160
- a_hi = vget_high_f32(a.v);
161
- a_r128 = vcombine_f32(a_hi, a_lo);
162
-
163
- return Packet2cf(a_r128);
164
- }
292
+ { return Packet2cf(vcombine_f32(vget_high_f32(a.v), vget_low_f32(a.v))); }
165
293
 
294
+ template<> EIGEN_STRONG_INLINE Packet1cf pcplxflip<Packet1cf>(const Packet1cf& a)
295
+ { return Packet1cf(vrev64_f32(a.v)); }
166
296
  template<> EIGEN_STRONG_INLINE Packet2cf pcplxflip<Packet2cf>(const Packet2cf& a)
297
+ { return Packet2cf(vrev64q_f32(a.v)); }
298
+
299
+ template<> EIGEN_STRONG_INLINE std::complex<float> predux<Packet1cf>(const Packet1cf& a)
167
300
  {
168
- return Packet2cf(vrev64q_f32(a.v));
301
+ std::complex<float> s;
302
+ vst1_f32((float *)&s, a.v);
303
+ return s;
169
304
  }
170
-
171
305
  template<> EIGEN_STRONG_INLINE std::complex<float> predux<Packet2cf>(const Packet2cf& a)
172
306
  {
173
- float32x2_t a1, a2;
174
307
  std::complex<float> s;
175
-
176
- a1 = vget_low_f32(a.v);
177
- a2 = vget_high_f32(a.v);
178
- a2 = vadd_f32(a1, a2);
179
- vst1_f32((float *)&s, a2);
180
-
308
+ vst1_f32(reinterpret_cast<float*>(&s), vadd_f32(vget_low_f32(a.v), vget_high_f32(a.v)));
181
309
  return s;
182
310
  }
183
311
 
184
- template<> EIGEN_STRONG_INLINE Packet2cf preduxp<Packet2cf>(const Packet2cf* vecs)
312
+ template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet1cf>(const Packet1cf& a)
185
313
  {
186
- Packet4f sum1, sum2, sum;
187
-
188
- // Add the first two 64-bit float32x2_t of vecs[0]
189
- sum1 = vcombine_f32(vget_low_f32(vecs[0].v), vget_low_f32(vecs[1].v));
190
- sum2 = vcombine_f32(vget_high_f32(vecs[0].v), vget_high_f32(vecs[1].v));
191
- sum = vaddq_f32(sum1, sum2);
192
-
193
- return Packet2cf(sum);
314
+ std::complex<float> s;
315
+ vst1_f32((float *)&s, a.v);
316
+ return s;
194
317
  }
195
-
196
318
  template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const Packet2cf& a)
197
319
  {
198
320
  float32x2_t a1, a2, v1, v2, prod;
@@ -208,90 +330,67 @@ template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const P
208
330
  v1 = vmul_f32(v1, a2);
209
331
  // Multiply the imag a with b
210
332
  v2 = vmul_f32(v2, a2);
211
- // Conjugate v2
333
+ // Conjugate v2
212
334
  v2 = vreinterpret_f32_u32(veor_u32(vreinterpret_u32_f32(v2), p2ui_CONJ_XOR()));
213
335
  // Swap real/imag elements in v2.
214
336
  v2 = vrev64_f32(v2);
215
337
  // Add v1, v2
216
338
  prod = vadd_f32(v1, v2);
217
339
 
218
- vst1_f32((float *)&s, prod);
340
+ vst1_f32(reinterpret_cast<float*>(&s), prod);
219
341
 
220
342
  return s;
221
343
  }
222
344
 
223
- template<int Offset>
224
- struct palign_impl<Offset,Packet2cf>
225
- {
226
- EIGEN_STRONG_INLINE static void run(Packet2cf& first, const Packet2cf& second)
227
- {
228
- if (Offset==1)
229
- {
230
- first.v = vextq_f32(first.v, second.v, 2);
231
- }
232
- }
233
- };
234
-
235
- template<> struct conj_helper<Packet2cf, Packet2cf, false,true>
236
- {
237
- EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
238
- { return padd(pmul(x,y),c); }
239
-
240
- EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
241
- {
242
- return internal::pmul(a, pconj(b));
243
- }
244
- };
245
-
246
- template<> struct conj_helper<Packet2cf, Packet2cf, true,false>
247
- {
248
- EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
249
- { return padd(pmul(x,y),c); }
250
-
251
- EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
252
- {
253
- return internal::pmul(pconj(a), b);
254
- }
255
- };
345
+ EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet1cf,Packet2f)
346
+ EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cf,Packet4f)
256
347
 
257
- template<> struct conj_helper<Packet2cf, Packet2cf, true,true>
348
+ template<> EIGEN_STRONG_INLINE Packet1cf pdiv<Packet1cf>(const Packet1cf& a, const Packet1cf& b)
258
349
  {
259
- EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
260
- { return padd(pmul(x,y),c); }
261
-
262
- EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
263
- {
264
- return pconj(internal::pmul(a, b));
265
- }
266
- };
350
+ // TODO optimize it for NEON
351
+ Packet1cf res = pmul(a, pconj(b));
352
+ Packet2f s, rev_s;
267
353
 
268
- EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cf,Packet4f)
354
+ // this computes the norm
355
+ s = vmul_f32(b.v, b.v);
356
+ rev_s = vrev64_f32(s);
269
357
 
358
+ return Packet1cf(pdiv<Packet2f>(res.v, vadd_f32(s, rev_s)));
359
+ }
270
360
  template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
271
361
  {
272
362
  // TODO optimize it for NEON
273
- Packet2cf res = conj_helper<Packet2cf,Packet2cf,false,true>().pmul(a,b);
363
+ Packet2cf res = pmul(a,pconj(b));
274
364
  Packet4f s, rev_s;
275
365
 
276
366
  // this computes the norm
277
367
  s = vmulq_f32(b.v, b.v);
278
368
  rev_s = vrev64q_f32(s);
279
369
 
280
- return Packet2cf(pdiv<Packet4f>(res.v, vaddq_f32(s,rev_s)));
370
+ return Packet2cf(pdiv<Packet4f>(res.v, vaddq_f32(s, rev_s)));
281
371
  }
282
372
 
283
- EIGEN_DEVICE_FUNC inline void
284
- ptranspose(PacketBlock<Packet2cf,2>& kernel) {
373
+ EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet1cf, 1>& /*kernel*/) {}
374
+ EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet2cf, 2>& kernel)
375
+ {
285
376
  Packet4f tmp = vcombine_f32(vget_high_f32(kernel.packet[0].v), vget_high_f32(kernel.packet[1].v));
286
377
  kernel.packet[0].v = vcombine_f32(vget_low_f32(kernel.packet[0].v), vget_low_f32(kernel.packet[1].v));
287
378
  kernel.packet[1].v = tmp;
288
379
  }
289
380
 
381
+ template<> EIGEN_STRONG_INLINE Packet1cf psqrt<Packet1cf>(const Packet1cf& a) {
382
+ return psqrt_complex<Packet1cf>(a);
383
+ }
384
+
385
+ template<> EIGEN_STRONG_INLINE Packet2cf psqrt<Packet2cf>(const Packet2cf& a) {
386
+ return psqrt_complex<Packet2cf>(a);
387
+ }
388
+
290
389
  //---------- double ----------
291
390
  #if EIGEN_ARCH_ARM64 && !EIGEN_APPLE_DOUBLE_NEON_BUG
292
391
 
293
392
  // See bug 1325, clang fails to call vld1q_u64.
294
- #if EIGEN_COMP_CLANG
393
+ #if EIGEN_COMP_CLANG || EIGEN_COMP_CASTXML
295
394
  static uint64x2_t p2ul_CONJ_XOR = {0x0, 0x8000000000000000};
296
395
  #else
297
396
  const uint64_t p2ul_conj_XOR_DATA[] = { 0x0, 0x8000000000000000 };
@@ -309,7 +408,8 @@ template<> struct packet_traits<std::complex<double> > : default_packet_traits
309
408
  {
310
409
  typedef Packet1cd type;
311
410
  typedef Packet1cd half;
312
- enum {
411
+ enum
412
+ {
313
413
  Vectorizable = 1,
314
414
  AlignedOnScalar = 0,
315
415
  size = 1,
@@ -328,24 +428,50 @@ template<> struct packet_traits<std::complex<double> > : default_packet_traits
328
428
  };
329
429
  };
330
430
 
331
- template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1, alignment=Aligned16}; typedef Packet1cd half; };
431
+ template<> struct unpacket_traits<Packet1cd>
432
+ {
433
+ typedef std::complex<double> type;
434
+ typedef Packet1cd half;
435
+ typedef Packet2d as_real;
436
+ enum
437
+ {
438
+ size=1,
439
+ alignment=Aligned16,
440
+ vectorizable=true,
441
+ masked_load_available=false,
442
+ masked_store_available=false
443
+ };
444
+ };
445
+
446
+ template<> EIGEN_STRONG_INLINE Packet1cd pload<Packet1cd>(const std::complex<double>* from)
447
+ { EIGEN_DEBUG_ALIGNED_LOAD return Packet1cd(pload<Packet2d>(reinterpret_cast<const double*>(from))); }
448
+
449
+ template<> EIGEN_STRONG_INLINE Packet1cd ploadu<Packet1cd>(const std::complex<double>* from)
450
+ { EIGEN_DEBUG_UNALIGNED_LOAD return Packet1cd(ploadu<Packet2d>(reinterpret_cast<const double*>(from))); }
451
+
452
+ template<> EIGEN_STRONG_INLINE Packet1cd pset1<Packet1cd>(const std::complex<double>& from)
453
+ {
454
+ /* here we really have to use unaligned loads :( */
455
+ return ploadu<Packet1cd>(&from);
456
+ }
332
457
 
333
- template<> EIGEN_STRONG_INLINE Packet1cd pload<Packet1cd>(const std::complex<double>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet1cd(pload<Packet2d>((const double*)from)); }
334
- template<> EIGEN_STRONG_INLINE Packet1cd ploadu<Packet1cd>(const std::complex<double>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet1cd(ploadu<Packet2d>((const double*)from)); }
458
+ template<> EIGEN_STRONG_INLINE Packet1cd padd<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
459
+ { return Packet1cd(padd<Packet2d>(a.v, b.v)); }
335
460
 
336
- template<> EIGEN_STRONG_INLINE Packet1cd pset1<Packet1cd>(const std::complex<double>& from)
337
- { /* here we really have to use unaligned loads :( */ return ploadu<Packet1cd>(&from); }
461
+ template<> EIGEN_STRONG_INLINE Packet1cd psub<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
462
+ { return Packet1cd(psub<Packet2d>(a.v, b.v)); }
338
463
 
339
- template<> EIGEN_STRONG_INLINE Packet1cd padd<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(padd<Packet2d>(a.v,b.v)); }
340
- template<> EIGEN_STRONG_INLINE Packet1cd psub<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(psub<Packet2d>(a.v,b.v)); }
341
- template<> EIGEN_STRONG_INLINE Packet1cd pnegate(const Packet1cd& a) { return Packet1cd(pnegate<Packet2d>(a.v)); }
342
- template<> EIGEN_STRONG_INLINE Packet1cd pconj(const Packet1cd& a) { return Packet1cd(vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(a.v), p2ul_CONJ_XOR))); }
464
+ template<> EIGEN_STRONG_INLINE Packet1cd pnegate(const Packet1cd& a)
465
+ { return Packet1cd(pnegate<Packet2d>(a.v)); }
466
+
467
+ template<> EIGEN_STRONG_INLINE Packet1cd pconj(const Packet1cd& a)
468
+ { return Packet1cd(vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(a.v), p2ul_CONJ_XOR))); }
343
469
 
344
470
  template<> EIGEN_STRONG_INLINE Packet1cd pmul<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
345
471
  {
346
472
  Packet2d v1, v2;
347
473
 
348
- // Get the real values of a
474
+ // Get the real values of a
349
475
  v1 = vdupq_lane_f64(vget_low_f64(a.v), 0);
350
476
  // Get the imag values of a
351
477
  v2 = vdupq_lane_f64(vget_high_f64(a.v), 0);
@@ -353,7 +479,7 @@ template<> EIGEN_STRONG_INLINE Packet1cd pmul<Packet1cd>(const Packet1cd& a, con
353
479
  v1 = vmulq_f64(v1, b.v);
354
480
  // Multiply the imag a with b
355
481
  v2 = vmulq_f64(v2, b.v);
356
- // Conjugate v2
482
+ // Conjugate v2
357
483
  v2 = vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(v2), p2ul_CONJ_XOR));
358
484
  // Swap real/imag elements in v2.
359
485
  v2 = preverse<Packet2d>(v2);
@@ -361,31 +487,44 @@ template<> EIGEN_STRONG_INLINE Packet1cd pmul<Packet1cd>(const Packet1cd& a, con
361
487
  return Packet1cd(vaddq_f64(v1, v2));
362
488
  }
363
489
 
364
- template<> EIGEN_STRONG_INLINE Packet1cd pand <Packet1cd>(const Packet1cd& a, const Packet1cd& b)
365
- {
366
- return Packet1cd(vreinterpretq_f64_u64(vandq_u64(vreinterpretq_u64_f64(a.v),vreinterpretq_u64_f64(b.v))));
367
- }
368
- template<> EIGEN_STRONG_INLINE Packet1cd por <Packet1cd>(const Packet1cd& a, const Packet1cd& b)
369
- {
370
- return Packet1cd(vreinterpretq_f64_u64(vorrq_u64(vreinterpretq_u64_f64(a.v),vreinterpretq_u64_f64(b.v))));
371
- }
372
- template<> EIGEN_STRONG_INLINE Packet1cd pxor <Packet1cd>(const Packet1cd& a, const Packet1cd& b)
490
+ template<> EIGEN_STRONG_INLINE Packet1cd pcmp_eq(const Packet1cd& a, const Packet1cd& b)
373
491
  {
374
- return Packet1cd(vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(a.v),vreinterpretq_u64_f64(b.v))));
492
+ // Compare real and imaginary parts of a and b to get the mask vector:
493
+ // [re(a)==re(b), im(a)==im(b)]
494
+ Packet2d eq = pcmp_eq<Packet2d>(a.v, b.v);
495
+ // Swap real/imag elements in the mask in to get:
496
+ // [im(a)==im(b), re(a)==re(b)]
497
+ Packet2d eq_swapped = vreinterpretq_f64_u32(vrev64q_u32(vreinterpretq_u32_f64(eq)));
498
+ // Return re(a)==re(b) & im(a)==im(b) by computing bitwise AND of eq and eq_swapped
499
+ return Packet1cd(pand<Packet2d>(eq, eq_swapped));
375
500
  }
501
+
502
+ template<> EIGEN_STRONG_INLINE Packet1cd pand<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
503
+ { return Packet1cd(vreinterpretq_f64_u64(vandq_u64(vreinterpretq_u64_f64(a.v),vreinterpretq_u64_f64(b.v)))); }
504
+
505
+ template<> EIGEN_STRONG_INLINE Packet1cd por<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
506
+ { return Packet1cd(vreinterpretq_f64_u64(vorrq_u64(vreinterpretq_u64_f64(a.v),vreinterpretq_u64_f64(b.v)))); }
507
+
508
+ template<> EIGEN_STRONG_INLINE Packet1cd pxor<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
509
+ { return Packet1cd(vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(a.v),vreinterpretq_u64_f64(b.v)))); }
510
+
376
511
  template<> EIGEN_STRONG_INLINE Packet1cd pandnot<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
377
- {
378
- return Packet1cd(vreinterpretq_f64_u64(vbicq_u64(vreinterpretq_u64_f64(a.v),vreinterpretq_u64_f64(b.v))));
379
- }
512
+ { return Packet1cd(vreinterpretq_f64_u64(vbicq_u64(vreinterpretq_u64_f64(a.v),vreinterpretq_u64_f64(b.v)))); }
513
+
514
+ template<> EIGEN_STRONG_INLINE Packet1cd ploaddup<Packet1cd>(const std::complex<double>* from)
515
+ { return pset1<Packet1cd>(*from); }
380
516
 
381
- template<> EIGEN_STRONG_INLINE Packet1cd ploaddup<Packet1cd>(const std::complex<double>* from) { return pset1<Packet1cd>(*from); }
517
+ template<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> *to, const Packet1cd& from)
518
+ { EIGEN_DEBUG_ALIGNED_STORE pstore(reinterpret_cast<double*>(to), from.v); }
382
519
 
383
- template<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, from.v); }
384
- template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, from.v); }
520
+ template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> *to, const Packet1cd& from)
521
+ { EIGEN_DEBUG_UNALIGNED_STORE pstoreu(reinterpret_cast<double*>(to), from.v); }
385
522
 
386
- template<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::complex<double> * addr) { EIGEN_ARM_PREFETCH((const double *)addr); }
523
+ template<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::complex<double> *addr)
524
+ { EIGEN_ARM_PREFETCH(reinterpret_cast<const double*>(addr)); }
387
525
 
388
- template<> EIGEN_DEVICE_FUNC inline Packet1cd pgather<std::complex<double>, Packet1cd>(const std::complex<double>* from, Index stride)
526
+ template<> EIGEN_DEVICE_FUNC inline Packet1cd pgather<std::complex<double>, Packet1cd>(
527
+ const std::complex<double>* from, Index stride)
389
528
  {
390
529
  Packet2d res = pset1<Packet2d>(0.0);
391
530
  res = vsetq_lane_f64(std::real(from[0*stride]), res, 0);
@@ -393,17 +532,14 @@ template<> EIGEN_DEVICE_FUNC inline Packet1cd pgather<std::complex<double>, Pack
393
532
  return Packet1cd(res);
394
533
  }
395
534
 
396
- template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<double>, Packet1cd>(std::complex<double>* to, const Packet1cd& from, Index stride)
397
- {
398
- to[stride*0] = std::complex<double>(vgetq_lane_f64(from.v, 0), vgetq_lane_f64(from.v, 1));
399
- }
535
+ template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<double>, Packet1cd>(
536
+ std::complex<double>* to, const Packet1cd& from, Index stride)
537
+ { to[stride*0] = std::complex<double>(vgetq_lane_f64(from.v, 0), vgetq_lane_f64(from.v, 1)); }
400
538
 
401
-
402
- template<> EIGEN_STRONG_INLINE std::complex<double> pfirst<Packet1cd>(const Packet1cd& a)
539
+ template<> EIGEN_STRONG_INLINE std::complex<double> pfirst<Packet1cd>(const Packet1cd& a)
403
540
  {
404
- std::complex<double> EIGEN_ALIGN16 res;
541
+ EIGEN_ALIGN16 std::complex<double> res;
405
542
  pstore<std::complex<double> >(&res, a);
406
-
407
543
  return res;
408
544
  }
409
545
 
@@ -411,59 +547,14 @@ template<> EIGEN_STRONG_INLINE Packet1cd preverse(const Packet1cd& a) { return a
411
547
 
412
548
  template<> EIGEN_STRONG_INLINE std::complex<double> predux<Packet1cd>(const Packet1cd& a) { return pfirst(a); }
413
549
 
414
- template<> EIGEN_STRONG_INLINE Packet1cd preduxp<Packet1cd>(const Packet1cd* vecs) { return vecs[0]; }
415
-
416
550
  template<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet1cd>(const Packet1cd& a) { return pfirst(a); }
417
551
 
418
- template<int Offset>
419
- struct palign_impl<Offset,Packet1cd>
420
- {
421
- static EIGEN_STRONG_INLINE void run(Packet1cd& /*first*/, const Packet1cd& /*second*/)
422
- {
423
- // FIXME is it sure we never have to align a Packet1cd?
424
- // Even though a std::complex<double> has 16 bytes, it is not necessarily aligned on a 16 bytes boundary...
425
- }
426
- };
427
-
428
- template<> struct conj_helper<Packet1cd, Packet1cd, false,true>
429
- {
430
- EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
431
- { return padd(pmul(x,y),c); }
432
-
433
- EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
434
- {
435
- return internal::pmul(a, pconj(b));
436
- }
437
- };
438
-
439
- template<> struct conj_helper<Packet1cd, Packet1cd, true,false>
440
- {
441
- EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
442
- { return padd(pmul(x,y),c); }
443
-
444
- EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
445
- {
446
- return internal::pmul(pconj(a), b);
447
- }
448
- };
449
-
450
- template<> struct conj_helper<Packet1cd, Packet1cd, true,true>
451
- {
452
- EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
453
- { return padd(pmul(x,y),c); }
454
-
455
- EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
456
- {
457
- return pconj(internal::pmul(a, b));
458
- }
459
- };
460
-
461
552
  EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet1cd,Packet2d)
462
553
 
463
554
  template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
464
555
  {
465
556
  // TODO optimize it for NEON
466
- Packet1cd res = conj_helper<Packet1cd,Packet1cd,false,true>().pmul(a,b);
557
+ Packet1cd res = pmul(a,pconj(b));
467
558
  Packet2d s = pmul<Packet2d>(b.v, b.v);
468
559
  Packet2d rev_s = preverse<Packet2d>(s);
469
560
 
@@ -471,9 +562,7 @@ template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, con
471
562
  }
472
563
 
473
564
  EIGEN_STRONG_INLINE Packet1cd pcplxflip/*<Packet1cd>*/(const Packet1cd& x)
474
- {
475
- return Packet1cd(preverse(Packet2d(x.v)));
476
- }
565
+ { return Packet1cd(preverse(Packet2d(x.v))); }
477
566
 
478
567
  EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet1cd,2>& kernel)
479
568
  {
@@ -481,6 +570,11 @@ EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet1cd,2>& kernel)
481
570
  kernel.packet[0].v = vcombine_f64(vget_low_f64(kernel.packet[0].v), vget_low_f64(kernel.packet[1].v));
482
571
  kernel.packet[1].v = tmp;
483
572
  }
573
+
574
+ template<> EIGEN_STRONG_INLINE Packet1cd psqrt<Packet1cd>(const Packet1cd& a) {
575
+ return psqrt_complex<Packet1cd>(a);
576
+ }
577
+
484
578
  #endif // EIGEN_ARCH_ARM64
485
579
 
486
580
  } // end namespace internal