@smake/eigen 1.0.2 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (287) hide show
  1. package/README.md +1 -1
  2. package/eigen/COPYING.APACHE +203 -0
  3. package/eigen/COPYING.BSD +26 -0
  4. package/eigen/COPYING.GPL +674 -0
  5. package/eigen/COPYING.LGPL +502 -0
  6. package/eigen/COPYING.MINPACK +51 -0
  7. package/eigen/COPYING.MPL2 +373 -0
  8. package/eigen/COPYING.README +18 -0
  9. package/eigen/Eigen/Cholesky +0 -1
  10. package/eigen/Eigen/Core +108 -266
  11. package/eigen/Eigen/Eigenvalues +0 -1
  12. package/eigen/Eigen/Geometry +3 -6
  13. package/eigen/Eigen/Householder +0 -1
  14. package/eigen/Eigen/Jacobi +0 -1
  15. package/eigen/Eigen/KLUSupport +41 -0
  16. package/eigen/Eigen/LU +2 -5
  17. package/eigen/Eigen/OrderingMethods +0 -3
  18. package/eigen/Eigen/PaStiXSupport +1 -0
  19. package/eigen/Eigen/PardisoSupport +0 -0
  20. package/eigen/Eigen/QR +0 -1
  21. package/eigen/Eigen/QtAlignedMalloc +0 -1
  22. package/eigen/Eigen/SVD +0 -1
  23. package/eigen/Eigen/Sparse +0 -2
  24. package/eigen/Eigen/SparseCholesky +0 -8
  25. package/eigen/Eigen/SparseLU +4 -0
  26. package/eigen/Eigen/src/Cholesky/LDLT.h +42 -27
  27. package/eigen/Eigen/src/Cholesky/LLT.h +39 -23
  28. package/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +90 -47
  29. package/eigen/Eigen/src/Core/ArithmeticSequence.h +413 -0
  30. package/eigen/Eigen/src/Core/Array.h +99 -11
  31. package/eigen/Eigen/src/Core/ArrayBase.h +1 -1
  32. package/eigen/Eigen/src/Core/ArrayWrapper.h +21 -21
  33. package/eigen/Eigen/src/Core/Assign.h +1 -1
  34. package/eigen/Eigen/src/Core/AssignEvaluator.h +125 -50
  35. package/eigen/Eigen/src/Core/Assign_MKL.h +10 -10
  36. package/eigen/Eigen/src/Core/BandMatrix.h +16 -16
  37. package/eigen/Eigen/src/Core/Block.h +56 -60
  38. package/eigen/Eigen/src/Core/BooleanRedux.h +29 -31
  39. package/eigen/Eigen/src/Core/CommaInitializer.h +7 -3
  40. package/eigen/Eigen/src/Core/CoreEvaluators.h +325 -272
  41. package/eigen/Eigen/src/Core/CoreIterators.h +5 -0
  42. package/eigen/Eigen/src/Core/CwiseBinaryOp.h +21 -22
  43. package/eigen/Eigen/src/Core/CwiseNullaryOp.h +153 -18
  44. package/eigen/Eigen/src/Core/CwiseUnaryOp.h +6 -6
  45. package/eigen/Eigen/src/Core/CwiseUnaryView.h +12 -10
  46. package/eigen/Eigen/src/Core/DenseBase.h +128 -39
  47. package/eigen/Eigen/src/Core/DenseCoeffsBase.h +25 -21
  48. package/eigen/Eigen/src/Core/DenseStorage.h +150 -68
  49. package/eigen/Eigen/src/Core/Diagonal.h +21 -23
  50. package/eigen/Eigen/src/Core/DiagonalMatrix.h +50 -2
  51. package/eigen/Eigen/src/Core/DiagonalProduct.h +1 -1
  52. package/eigen/Eigen/src/Core/Dot.h +10 -10
  53. package/eigen/Eigen/src/Core/EigenBase.h +10 -9
  54. package/eigen/Eigen/src/Core/ForceAlignedAccess.h +8 -4
  55. package/eigen/Eigen/src/Core/Fuzzy.h +3 -3
  56. package/eigen/Eigen/src/Core/GeneralProduct.h +20 -10
  57. package/eigen/Eigen/src/Core/GenericPacketMath.h +597 -147
  58. package/eigen/Eigen/src/Core/GlobalFunctions.h +40 -33
  59. package/eigen/Eigen/src/Core/IO.h +40 -7
  60. package/eigen/Eigen/src/Core/IndexedView.h +237 -0
  61. package/eigen/Eigen/src/Core/Inverse.h +9 -10
  62. package/eigen/Eigen/src/Core/Map.h +7 -7
  63. package/eigen/Eigen/src/Core/MapBase.h +5 -3
  64. package/eigen/Eigen/src/Core/MathFunctions.h +756 -120
  65. package/eigen/Eigen/src/Core/MathFunctionsImpl.h +118 -19
  66. package/eigen/Eigen/src/Core/Matrix.h +131 -25
  67. package/eigen/Eigen/src/Core/MatrixBase.h +19 -2
  68. package/eigen/Eigen/src/Core/NestByValue.h +25 -50
  69. package/eigen/Eigen/src/Core/NoAlias.h +4 -3
  70. package/eigen/Eigen/src/Core/NumTraits.h +107 -20
  71. package/eigen/Eigen/src/Core/PartialReduxEvaluator.h +232 -0
  72. package/eigen/Eigen/src/Core/PermutationMatrix.h +3 -3
  73. package/eigen/Eigen/src/Core/PlainObjectBase.h +145 -54
  74. package/eigen/Eigen/src/Core/Product.h +30 -25
  75. package/eigen/Eigen/src/Core/ProductEvaluators.h +183 -142
  76. package/eigen/Eigen/src/Core/Random.h +37 -1
  77. package/eigen/Eigen/src/Core/Redux.h +180 -170
  78. package/eigen/Eigen/src/Core/Ref.h +118 -21
  79. package/eigen/Eigen/src/Core/Replicate.h +8 -8
  80. package/eigen/Eigen/src/Core/Reshaped.h +454 -0
  81. package/eigen/Eigen/src/Core/ReturnByValue.h +7 -5
  82. package/eigen/Eigen/src/Core/Reverse.h +18 -12
  83. package/eigen/Eigen/src/Core/Select.h +8 -6
  84. package/eigen/Eigen/src/Core/SelfAdjointView.h +33 -20
  85. package/eigen/Eigen/src/Core/Solve.h +14 -14
  86. package/eigen/Eigen/src/Core/SolveTriangular.h +13 -13
  87. package/eigen/Eigen/src/Core/SolverBase.h +41 -3
  88. package/eigen/Eigen/src/Core/StableNorm.h +100 -70
  89. package/eigen/Eigen/src/Core/StlIterators.h +463 -0
  90. package/eigen/Eigen/src/Core/Stride.h +9 -4
  91. package/eigen/Eigen/src/Core/Swap.h +5 -4
  92. package/eigen/Eigen/src/Core/Transpose.h +86 -27
  93. package/eigen/Eigen/src/Core/Transpositions.h +26 -8
  94. package/eigen/Eigen/src/Core/TriangularMatrix.h +88 -72
  95. package/eigen/Eigen/src/Core/VectorBlock.h +5 -5
  96. package/eigen/Eigen/src/Core/VectorwiseOp.h +159 -70
  97. package/eigen/Eigen/src/Core/Visitor.h +137 -29
  98. package/eigen/Eigen/src/Core/arch/AVX/Complex.h +50 -129
  99. package/eigen/Eigen/src/Core/arch/AVX/MathFunctions.h +126 -337
  100. package/eigen/Eigen/src/Core/arch/AVX/PacketMath.h +1092 -155
  101. package/eigen/Eigen/src/Core/arch/AVX/TypeCasting.h +65 -1
  102. package/eigen/Eigen/src/Core/arch/AVX512/Complex.h +422 -0
  103. package/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h +186 -213
  104. package/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h +1250 -252
  105. package/eigen/Eigen/src/Core/arch/AVX512/TypeCasting.h +89 -0
  106. package/eigen/Eigen/src/Core/arch/AltiVec/Complex.h +152 -165
  107. package/eigen/Eigen/src/Core/arch/AltiVec/MathFunctions.h +19 -251
  108. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProduct.h +2937 -0
  109. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h +221 -0
  110. package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h +629 -0
  111. package/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h +2042 -392
  112. package/eigen/Eigen/src/Core/arch/CUDA/Complex.h +235 -80
  113. package/eigen/Eigen/src/Core/arch/Default/BFloat16.h +700 -0
  114. package/eigen/Eigen/src/Core/arch/Default/ConjHelper.h +102 -14
  115. package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +1649 -0
  116. package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +110 -0
  117. package/eigen/Eigen/src/Core/arch/Default/Half.h +942 -0
  118. package/eigen/Eigen/src/Core/arch/Default/Settings.h +1 -1
  119. package/eigen/Eigen/src/Core/arch/Default/TypeCasting.h +120 -0
  120. package/eigen/Eigen/src/Core/arch/{CUDA → GPU}/MathFunctions.h +16 -4
  121. package/eigen/Eigen/src/Core/arch/GPU/PacketMath.h +1685 -0
  122. package/eigen/Eigen/src/Core/arch/GPU/TypeCasting.h +80 -0
  123. package/eigen/Eigen/src/Core/arch/HIP/hcc/math_constants.h +23 -0
  124. package/eigen/Eigen/src/Core/arch/MSA/Complex.h +648 -0
  125. package/eigen/Eigen/src/Core/arch/MSA/MathFunctions.h +387 -0
  126. package/eigen/Eigen/src/Core/arch/MSA/PacketMath.h +1233 -0
  127. package/eigen/Eigen/src/Core/arch/NEON/Complex.h +313 -219
  128. package/eigen/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h +183 -0
  129. package/eigen/Eigen/src/Core/arch/NEON/MathFunctions.h +54 -70
  130. package/eigen/Eigen/src/Core/arch/NEON/PacketMath.h +4376 -549
  131. package/eigen/Eigen/src/Core/arch/NEON/TypeCasting.h +1419 -0
  132. package/eigen/Eigen/src/Core/arch/SSE/Complex.h +59 -179
  133. package/eigen/Eigen/src/Core/arch/SSE/MathFunctions.h +65 -428
  134. package/eigen/Eigen/src/Core/arch/SSE/PacketMath.h +893 -283
  135. package/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h +65 -0
  136. package/eigen/Eigen/src/Core/arch/SVE/MathFunctions.h +44 -0
  137. package/eigen/Eigen/src/Core/arch/SVE/PacketMath.h +752 -0
  138. package/eigen/Eigen/src/Core/arch/SVE/TypeCasting.h +49 -0
  139. package/eigen/Eigen/src/Core/arch/SYCL/InteropHeaders.h +232 -0
  140. package/eigen/Eigen/src/Core/arch/SYCL/MathFunctions.h +301 -0
  141. package/eigen/Eigen/src/Core/arch/SYCL/PacketMath.h +670 -0
  142. package/eigen/Eigen/src/Core/arch/SYCL/SyclMemoryModel.h +694 -0
  143. package/eigen/Eigen/src/Core/arch/SYCL/TypeCasting.h +85 -0
  144. package/eigen/Eigen/src/Core/arch/ZVector/Complex.h +212 -183
  145. package/eigen/Eigen/src/Core/arch/ZVector/MathFunctions.h +101 -5
  146. package/eigen/Eigen/src/Core/arch/ZVector/PacketMath.h +510 -395
  147. package/eigen/Eigen/src/Core/functors/AssignmentFunctors.h +11 -2
  148. package/eigen/Eigen/src/Core/functors/BinaryFunctors.h +112 -46
  149. package/eigen/Eigen/src/Core/functors/NullaryFunctors.h +31 -30
  150. package/eigen/Eigen/src/Core/functors/StlFunctors.h +32 -2
  151. package/eigen/Eigen/src/Core/functors/UnaryFunctors.h +354 -15
  152. package/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h +1073 -585
  153. package/eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h +29 -7
  154. package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +4 -4
  155. package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +1 -1
  156. package/eigen/Eigen/src/Core/products/GeneralMatrixVector.h +382 -483
  157. package/eigen/Eigen/src/Core/products/Parallelizer.h +23 -9
  158. package/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +23 -6
  159. package/eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h +8 -6
  160. package/eigen/Eigen/src/Core/products/SelfadjointProduct.h +2 -2
  161. package/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h +5 -4
  162. package/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h +3 -3
  163. package/eigen/Eigen/src/Core/products/TriangularSolverMatrix.h +5 -3
  164. package/eigen/Eigen/src/Core/products/TriangularSolverVector.h +13 -10
  165. package/eigen/Eigen/src/Core/util/BlasUtil.h +208 -124
  166. package/eigen/Eigen/src/Core/util/ConfigureVectorization.h +512 -0
  167. package/eigen/Eigen/src/Core/util/Constants.h +25 -9
  168. package/eigen/Eigen/src/Core/util/DisableStupidWarnings.h +14 -2
  169. package/eigen/Eigen/src/Core/util/ForwardDeclarations.h +28 -4
  170. package/eigen/Eigen/src/Core/util/IndexedViewHelper.h +186 -0
  171. package/eigen/Eigen/src/Core/util/IntegralConstant.h +272 -0
  172. package/eigen/Eigen/src/Core/util/MKL_support.h +8 -1
  173. package/eigen/Eigen/src/Core/util/Macros.h +661 -250
  174. package/eigen/Eigen/src/Core/util/Memory.h +222 -52
  175. package/eigen/Eigen/src/Core/util/Meta.h +349 -105
  176. package/eigen/Eigen/src/Core/util/ReshapedHelper.h +51 -0
  177. package/eigen/Eigen/src/Core/util/StaticAssert.h +8 -5
  178. package/eigen/Eigen/src/Core/util/SymbolicIndex.h +293 -0
  179. package/eigen/Eigen/src/Core/util/XprHelper.h +48 -30
  180. package/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +1 -1
  181. package/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +1 -1
  182. package/eigen/Eigen/src/Eigenvalues/EigenSolver.h +2 -2
  183. package/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +1 -1
  184. package/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +2 -2
  185. package/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +2 -2
  186. package/eigen/Eigen/src/Eigenvalues/RealQZ.h +9 -6
  187. package/eigen/Eigen/src/Eigenvalues/RealSchur.h +10 -5
  188. package/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +75 -42
  189. package/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +20 -15
  190. package/eigen/Eigen/src/Geometry/AlignedBox.h +99 -5
  191. package/eigen/Eigen/src/Geometry/AngleAxis.h +4 -4
  192. package/eigen/Eigen/src/Geometry/EulerAngles.h +3 -3
  193. package/eigen/Eigen/src/Geometry/Homogeneous.h +15 -11
  194. package/eigen/Eigen/src/Geometry/Hyperplane.h +1 -1
  195. package/eigen/Eigen/src/Geometry/OrthoMethods.h +3 -2
  196. package/eigen/Eigen/src/Geometry/ParametrizedLine.h +39 -2
  197. package/eigen/Eigen/src/Geometry/Quaternion.h +52 -14
  198. package/eigen/Eigen/src/Geometry/Rotation2D.h +3 -3
  199. package/eigen/Eigen/src/Geometry/Scaling.h +22 -4
  200. package/eigen/Eigen/src/Geometry/Transform.h +86 -65
  201. package/eigen/Eigen/src/Geometry/Translation.h +6 -6
  202. package/eigen/Eigen/src/Geometry/arch/Geometry_SIMD.h +168 -0
  203. package/eigen/Eigen/src/Householder/BlockHouseholder.h +9 -2
  204. package/eigen/Eigen/src/Householder/Householder.h +8 -4
  205. package/eigen/Eigen/src/Householder/HouseholderSequence.h +123 -48
  206. package/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +15 -15
  207. package/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +7 -23
  208. package/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +5 -22
  209. package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +41 -47
  210. package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +51 -60
  211. package/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +70 -20
  212. package/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +2 -20
  213. package/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +11 -9
  214. package/eigen/Eigen/src/Jacobi/Jacobi.h +31 -10
  215. package/eigen/Eigen/src/KLUSupport/KLUSupport.h +358 -0
  216. package/eigen/Eigen/src/LU/Determinant.h +35 -19
  217. package/eigen/Eigen/src/LU/FullPivLU.h +29 -43
  218. package/eigen/Eigen/src/LU/InverseImpl.h +25 -8
  219. package/eigen/Eigen/src/LU/PartialPivLU.h +67 -57
  220. package/eigen/Eigen/src/LU/arch/InverseSize4.h +351 -0
  221. package/eigen/Eigen/src/OrderingMethods/Amd.h +7 -17
  222. package/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +297 -277
  223. package/eigen/Eigen/src/OrderingMethods/Ordering.h +6 -10
  224. package/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +1 -1
  225. package/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +10 -9
  226. package/eigen/Eigen/src/QR/ColPivHouseholderQR.h +41 -20
  227. package/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +100 -27
  228. package/eigen/Eigen/src/QR/FullPivHouseholderQR.h +59 -22
  229. package/eigen/Eigen/src/QR/HouseholderQR.h +48 -23
  230. package/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +25 -3
  231. package/eigen/Eigen/src/SVD/BDCSVD.h +137 -48
  232. package/eigen/Eigen/src/SVD/JacobiSVD.h +22 -14
  233. package/eigen/Eigen/src/SVD/SVDBase.h +82 -21
  234. package/eigen/Eigen/src/SVD/UpperBidiagonalization.h +3 -3
  235. package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +16 -8
  236. package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +11 -36
  237. package/eigen/Eigen/src/SparseCore/CompressedStorage.h +16 -0
  238. package/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +6 -6
  239. package/eigen/Eigen/src/SparseCore/SparseAssign.h +81 -27
  240. package/eigen/Eigen/src/SparseCore/SparseBlock.h +25 -57
  241. package/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +40 -11
  242. package/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +11 -15
  243. package/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +2 -2
  244. package/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +30 -8
  245. package/eigen/Eigen/src/SparseCore/SparseMatrix.h +124 -10
  246. package/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +5 -12
  247. package/eigen/Eigen/src/SparseCore/SparseProduct.h +13 -1
  248. package/eigen/Eigen/src/SparseCore/SparseRef.h +7 -7
  249. package/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +3 -0
  250. package/eigen/Eigen/src/SparseCore/SparseUtil.h +8 -0
  251. package/eigen/Eigen/src/SparseCore/SparseVector.h +1 -1
  252. package/eigen/Eigen/src/SparseLU/SparseLU.h +160 -10
  253. package/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +1 -1
  254. package/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +76 -2
  255. package/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +2 -2
  256. package/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +1 -1
  257. package/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +1 -1
  258. package/eigen/Eigen/src/SparseQR/SparseQR.h +19 -6
  259. package/eigen/Eigen/src/StlSupport/StdDeque.h +2 -14
  260. package/eigen/Eigen/src/StlSupport/StdList.h +2 -2
  261. package/eigen/Eigen/src/StlSupport/StdVector.h +2 -2
  262. package/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +6 -8
  263. package/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +175 -39
  264. package/eigen/Eigen/src/misc/lapacke.h +5 -4
  265. package/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +27 -1
  266. package/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +155 -11
  267. package/eigen/Eigen/src/plugins/BlockMethods.h +626 -242
  268. package/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.h +14 -0
  269. package/eigen/Eigen/src/plugins/IndexedViewMethods.h +262 -0
  270. package/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +4 -4
  271. package/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +10 -0
  272. package/eigen/Eigen/src/plugins/ReshapedMethods.h +149 -0
  273. package/eigen/README.md +5 -0
  274. package/lib/LibEigen.d.ts +4 -0
  275. package/lib/LibEigen.js +14 -0
  276. package/lib/index.d.ts +1 -1
  277. package/lib/index.js +7 -3
  278. package/package.json +2 -10
  279. package/eigen/Eigen/CMakeLists.txt +0 -19
  280. package/eigen/Eigen/src/Core/arch/CUDA/Half.h +0 -675
  281. package/eigen/Eigen/src/Core/arch/CUDA/PacketMath.h +0 -333
  282. package/eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +0 -1124
  283. package/eigen/Eigen/src/Core/arch/CUDA/TypeCasting.h +0 -212
  284. package/eigen/Eigen/src/Geometry/arch/Geometry_SSE.h +0 -161
  285. package/eigen/Eigen/src/LU/arch/Inverse_SSE.h +0 -338
  286. package/lib/eigen.d.ts +0 -2
  287. package/lib/eigen.js +0 -15
@@ -0,0 +1,85 @@
1
+ // This file is part of Eigen, a lightweight C++ template library
2
+ // for linear algebra.
3
+ //
4
+ // Mehdi Goli Codeplay Software Ltd.
5
+ // Ralph Potter Codeplay Software Ltd.
6
+ // Luke Iwanski Codeplay Software Ltd.
7
+ // Contact: <eigen@codeplay.com>
8
+ //
9
+ // This Source Code Form is subject to the terms of the Mozilla
10
+ // Public License v. 2.0. If a copy of the MPL was not distributed
11
+ // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
12
+
13
+ /*****************************************************************
14
+ * TypeCasting.h
15
+ *
16
+ * \brief:
17
+ * TypeCasting
18
+ *
19
+ *****************************************************************/
20
+
21
+ #ifndef EIGEN_TYPE_CASTING_SYCL_H
22
+ #define EIGEN_TYPE_CASTING_SYCL_H
23
+
24
+ namespace Eigen {
25
+
26
+ namespace internal {
27
+ #ifdef SYCL_DEVICE_ONLY
28
+ template <>
29
+ struct type_casting_traits<float, int> {
30
+ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 };
31
+ };
32
+
33
+ template <>
34
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_int4
35
+ pcast<cl::sycl::cl_float4, cl::sycl::cl_int4>(const cl::sycl::cl_float4& a) {
36
+ return a
37
+ .template convert<cl::sycl::cl_int, cl::sycl::rounding_mode::automatic>();
38
+ }
39
+
40
+ template <>
41
+ struct type_casting_traits<int, float> {
42
+ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 };
43
+ };
44
+
45
+ template <>
46
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_float4
47
+ pcast<cl::sycl::cl_int4, cl::sycl::cl_float4>(const cl::sycl::cl_int4& a) {
48
+ return a.template convert<cl::sycl::cl_float,
49
+ cl::sycl::rounding_mode::automatic>();
50
+ }
51
+
52
+ template <>
53
+ struct type_casting_traits<double, float> {
54
+ enum { VectorizedCast = 1, SrcCoeffRatio = 2, TgtCoeffRatio = 1 };
55
+ };
56
+
57
+ template <>
58
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_float4
59
+ pcast<cl::sycl::cl_double2, cl::sycl::cl_float4>(
60
+ const cl::sycl::cl_double2& a, const cl::sycl::cl_double2& b) {
61
+ auto a1 = a.template convert<cl::sycl::cl_float,
62
+ cl::sycl::rounding_mode::automatic>();
63
+ auto b1 = b.template convert<cl::sycl::cl_float,
64
+ cl::sycl::rounding_mode::automatic>();
65
+ return cl::sycl::float4(a1.x(), a1.y(), b1.x(), b1.y());
66
+ }
67
+
68
+ template <>
69
+ struct type_casting_traits<float, double> {
70
+ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 2 };
71
+ };
72
+
73
+ template <>
74
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_double2
75
+ pcast<cl::sycl::cl_float4, cl::sycl::cl_double2>(const cl::sycl::cl_float4& a) {
76
+ // Simply discard the second half of the input
77
+ return cl::sycl::cl_double2(a.x(), a.y());
78
+ }
79
+
80
+ #endif
81
+ } // end namespace internal
82
+
83
+ } // end namespace Eigen
84
+
85
+ #endif // EIGEN_TYPE_CASTING_SYCL_H
@@ -15,6 +15,10 @@ namespace Eigen {
15
15
 
16
16
  namespace internal {
17
17
 
18
+ #if !defined(__ARCH__) || (defined(__ARCH__) && __ARCH__ >= 12)
19
+ static Packet4ui p4ui_CONJ_XOR = { 0x00000000, 0x80000000, 0x00000000, 0x80000000 }; //vec_mergeh((Packet4ui)p4i_ZERO, (Packet4ui)p4f_MZERO);
20
+ #endif
21
+
18
22
  static Packet2ul p2ul_CONJ_XOR1 = (Packet2ul) vec_sld((Packet4ui) p2d_ZERO_, (Packet4ui) p2l_ZERO, 8);//{ 0x8000000000000000, 0x0000000000000000 };
19
23
  static Packet2ul p2ul_CONJ_XOR2 = (Packet2ul) vec_sld((Packet4ui) p2l_ZERO, (Packet4ui) p2d_ZERO_, 8);//{ 0x8000000000000000, 0x0000000000000000 };
20
24
 
@@ -29,10 +33,14 @@ struct Packet2cf
29
33
  {
30
34
  EIGEN_STRONG_INLINE Packet2cf() {}
31
35
  EIGEN_STRONG_INLINE explicit Packet2cf(const Packet4f& a) : v(a) {}
36
+ #if !defined(__ARCH__) || (defined(__ARCH__) && __ARCH__ < 12)
32
37
  union {
33
38
  Packet4f v;
34
39
  Packet1cd cd[2];
35
40
  };
41
+ #else
42
+ Packet4f v;
43
+ #endif
36
44
  };
37
45
 
38
46
  template<> struct packet_traits<std::complex<float> > : default_packet_traits
@@ -83,69 +91,33 @@ template<> struct packet_traits<std::complex<double> > : default_packet_traits
83
91
  };
84
92
  };
85
93
 
86
- template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2, alignment=Aligned16}; typedef Packet2cf half; };
87
- template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1, alignment=Aligned16}; typedef Packet1cd half; };
94
+ template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false}; typedef Packet2cf half; };
95
+ template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false}; typedef Packet1cd half; };
88
96
 
89
97
  /* Forward declaration */
90
98
  EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet2cf,2>& kernel);
91
99
 
92
- template<> EIGEN_STRONG_INLINE Packet2cf pload <Packet2cf>(const std::complex<float>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet2cf(pload<Packet4f>((const float*)from)); }
100
+ /* complex<double> first */
93
101
  template<> EIGEN_STRONG_INLINE Packet1cd pload <Packet1cd>(const std::complex<double>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet1cd(pload<Packet2d>((const double*)from)); }
94
- template<> EIGEN_STRONG_INLINE Packet2cf ploadu<Packet2cf>(const std::complex<float>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cf(ploadu<Packet4f>((const float*)from)); }
95
102
  template<> EIGEN_STRONG_INLINE Packet1cd ploadu<Packet1cd>(const std::complex<double>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet1cd(ploadu<Packet2d>((const double*)from)); }
96
- template<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((float*)to, from.v); }
97
103
  template<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, from.v); }
98
- template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((float*)to, from.v); }
99
104
  template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, from.v); }
100
105
 
101
106
  template<> EIGEN_STRONG_INLINE Packet1cd pset1<Packet1cd>(const std::complex<double>& from)
102
107
  { /* here we really have to use unaligned loads :( */ return ploadu<Packet1cd>(&from); }
103
108
 
104
- template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>& from)
105
- {
106
- Packet2cf res;
107
- res.cd[0] = Packet1cd(vec_ld2f((const float *)&from));
108
- res.cd[1] = res.cd[0];
109
- return res;
110
- }
111
- template<> EIGEN_DEVICE_FUNC inline Packet2cf pgather<std::complex<float>, Packet2cf>(const std::complex<float>* from, Index stride)
112
- {
113
- std::complex<float> EIGEN_ALIGN16 af[2];
114
- af[0] = from[0*stride];
115
- af[1] = from[1*stride];
116
- return pload<Packet2cf>(af);
117
- }
118
109
  template<> EIGEN_DEVICE_FUNC inline Packet1cd pgather<std::complex<double>, Packet1cd>(const std::complex<double>* from, Index stride EIGEN_UNUSED)
119
110
  {
120
111
  return pload<Packet1cd>(from);
121
112
  }
122
- template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf>(std::complex<float>* to, const Packet2cf& from, Index stride)
123
- {
124
- std::complex<float> EIGEN_ALIGN16 af[2];
125
- pstore<std::complex<float> >((std::complex<float> *) af, from);
126
- to[0*stride] = af[0];
127
- to[1*stride] = af[1];
128
- }
129
113
  template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<double>, Packet1cd>(std::complex<double>* to, const Packet1cd& from, Index stride EIGEN_UNUSED)
130
114
  {
131
115
  pstore<std::complex<double> >(to, from);
132
116
  }
133
-
134
- template<> EIGEN_STRONG_INLINE Packet2cf padd<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(padd<Packet4f>(a.v, b.v)); }
135
117
  template<> EIGEN_STRONG_INLINE Packet1cd padd<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(a.v + b.v); }
136
- template<> EIGEN_STRONG_INLINE Packet2cf psub<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(psub<Packet4f>(a.v, b.v)); }
137
118
  template<> EIGEN_STRONG_INLINE Packet1cd psub<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(a.v - b.v); }
138
119
  template<> EIGEN_STRONG_INLINE Packet1cd pnegate(const Packet1cd& a) { return Packet1cd(pnegate(Packet2d(a.v))); }
139
- template<> EIGEN_STRONG_INLINE Packet2cf pnegate(const Packet2cf& a) { return Packet2cf(pnegate(Packet4f(a.v))); }
140
120
  template<> EIGEN_STRONG_INLINE Packet1cd pconj(const Packet1cd& a) { return Packet1cd((Packet2d)vec_xor((Packet2d)a.v, (Packet2d)p2ul_CONJ_XOR2)); }
141
- template<> EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf& a)
142
- {
143
- Packet2cf res;
144
- res.v.v4f[0] = pconj(Packet1cd(reinterpret_cast<Packet2d>(a.v.v4f[0]))).v;
145
- res.v.v4f[1] = pconj(Packet1cd(reinterpret_cast<Packet2d>(a.v.v4f[1]))).v;
146
- return res;
147
- }
148
-
149
121
  template<> EIGEN_STRONG_INLINE Packet1cd pmul<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
150
122
  {
151
123
  Packet2d a_re, a_im, v1, v2;
@@ -163,27 +135,17 @@ template<> EIGEN_STRONG_INLINE Packet1cd pmul<Packet1cd>(const Packet1cd& a, con
163
135
 
164
136
  return Packet1cd(v1 + v2);
165
137
  }
166
- template<> EIGEN_STRONG_INLINE Packet2cf pmul<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
167
- {
168
- Packet2cf res;
169
- res.v.v4f[0] = pmul(Packet1cd(reinterpret_cast<Packet2d>(a.v.v4f[0])), Packet1cd(reinterpret_cast<Packet2d>(b.v.v4f[0]))).v;
170
- res.v.v4f[1] = pmul(Packet1cd(reinterpret_cast<Packet2d>(a.v.v4f[1])), Packet1cd(reinterpret_cast<Packet2d>(b.v.v4f[1]))).v;
171
- return res;
172
- }
173
-
174
- template<> EIGEN_STRONG_INLINE Packet1cd pand <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_and(a.v,b.v)); }
175
- template<> EIGEN_STRONG_INLINE Packet2cf pand <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(pand<Packet4f>(a.v,b.v)); }
176
- template<> EIGEN_STRONG_INLINE Packet1cd por <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_or(a.v,b.v)); }
177
- template<> EIGEN_STRONG_INLINE Packet2cf por <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(por<Packet4f>(a.v,b.v)); }
178
- template<> EIGEN_STRONG_INLINE Packet1cd pxor <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_xor(a.v,b.v)); }
179
- template<> EIGEN_STRONG_INLINE Packet2cf pxor <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(pxor<Packet4f>(a.v,b.v)); }
180
- template<> EIGEN_STRONG_INLINE Packet1cd pandnot<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_and(a.v, vec_nor(b.v,b.v))); }
181
- template<> EIGEN_STRONG_INLINE Packet2cf pandnot<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(pandnot<Packet4f>(a.v,b.v)); }
182
-
138
+ template<> EIGEN_STRONG_INLINE Packet1cd pand <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_and(a.v,b.v)); }
139
+ template<> EIGEN_STRONG_INLINE Packet1cd por <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_or(a.v,b.v)); }
140
+ template<> EIGEN_STRONG_INLINE Packet1cd pxor <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_xor(a.v,b.v)); }
141
+ template<> EIGEN_STRONG_INLINE Packet1cd pandnot <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_and(a.v, vec_nor(b.v,b.v))); }
183
142
  template<> EIGEN_STRONG_INLINE Packet1cd ploaddup<Packet1cd>(const std::complex<double>* from) { return pset1<Packet1cd>(*from); }
184
- template<> EIGEN_STRONG_INLINE Packet2cf ploaddup<Packet2cf>(const std::complex<float>* from) { return pset1<Packet2cf>(*from); }
143
+ template<> EIGEN_STRONG_INLINE Packet1cd pcmp_eq(const Packet1cd& a, const Packet1cd& b) {
144
+ Packet2d eq = vec_cmpeq (a.v, b.v);
145
+ Packet2d tmp = { eq[1], eq[0] };
146
+ return (Packet1cd)pand<Packet2d>(eq, tmp);
147
+ }
185
148
 
186
- template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> * addr) { EIGEN_ZVECTOR_PREFETCH(addr); }
187
149
  template<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::complex<double> * addr) { EIGEN_ZVECTOR_PREFETCH(addr); }
188
150
 
189
151
  template<> EIGEN_STRONG_INLINE std::complex<double> pfirst<Packet1cd>(const Packet1cd& a)
@@ -193,160 +155,157 @@ template<> EIGEN_STRONG_INLINE std::complex<double> pfirst<Packet1cd>(const Pac
193
155
 
194
156
  return res;
195
157
  }
196
- template<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet2cf>(const Packet2cf& a)
197
- {
198
- std::complex<float> EIGEN_ALIGN16 res[2];
199
- pstore<std::complex<float> >(res, a);
200
-
201
- return res[0];
202
- }
203
158
 
204
159
  template<> EIGEN_STRONG_INLINE Packet1cd preverse(const Packet1cd& a) { return a; }
205
- template<> EIGEN_STRONG_INLINE Packet2cf preverse(const Packet2cf& a)
160
+ template<> EIGEN_STRONG_INLINE std::complex<double> predux<Packet1cd>(const Packet1cd& a)
206
161
  {
207
- Packet2cf res;
208
- res.cd[0] = a.cd[1];
209
- res.cd[1] = a.cd[0];
210
- return res;
162
+ return pfirst(a);
211
163
  }
212
-
213
- template<> EIGEN_STRONG_INLINE std::complex<double> predux<Packet1cd>(const Packet1cd& a)
164
+ template<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet1cd>(const Packet1cd& a)
214
165
  {
215
166
  return pfirst(a);
216
167
  }
217
- template<> EIGEN_STRONG_INLINE std::complex<float> predux<Packet2cf>(const Packet2cf& a)
168
+ EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet1cd,Packet2d)
169
+
170
+ template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
218
171
  {
219
- std::complex<float> res;
220
- Packet1cd b = padd<Packet1cd>(a.cd[0], a.cd[1]);
221
- vec_st2f(b.v, (float*)&res);
222
- return res;
172
+ // TODO optimize it for AltiVec
173
+ Packet1cd res = pmul(a,pconj(b));
174
+ Packet2d s = vec_madd(b.v, b.v, p2d_ZERO_);
175
+ return Packet1cd(pdiv(res.v, s + vec_perm(s, s, p16uc_REVERSE64)));
223
176
  }
224
177
 
225
- template<> EIGEN_STRONG_INLINE Packet1cd preduxp<Packet1cd>(const Packet1cd* vecs)
178
+ EIGEN_STRONG_INLINE Packet1cd pcplxflip/*<Packet1cd>*/(const Packet1cd& x)
226
179
  {
227
- return vecs[0];
180
+ return Packet1cd(preverse(Packet2d(x.v)));
228
181
  }
229
- template<> EIGEN_STRONG_INLINE Packet2cf preduxp<Packet2cf>(const Packet2cf* vecs)
182
+
183
+ EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet1cd,2>& kernel)
230
184
  {
231
- PacketBlock<Packet2cf,2> transpose;
232
- transpose.packet[0] = vecs[0];
233
- transpose.packet[1] = vecs[1];
234
- ptranspose(transpose);
185
+ Packet2d tmp = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_TRANSPOSE64_HI);
186
+ kernel.packet[1].v = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_TRANSPOSE64_LO);
187
+ kernel.packet[0].v = tmp;
188
+ }
235
189
 
236
- return padd<Packet2cf>(transpose.packet[0], transpose.packet[1]);
237
- }
190
+ /* complex<float> follows */
191
+ template<> EIGEN_STRONG_INLINE Packet2cf pload <Packet2cf>(const std::complex<float>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet2cf(pload<Packet4f>((const float*)from)); }
192
+ template<> EIGEN_STRONG_INLINE Packet2cf ploadu<Packet2cf>(const std::complex<float>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cf(ploadu<Packet4f>((const float*)from)); }
193
+ template<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((float*)to, from.v); }
194
+ template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((float*)to, from.v); }
238
195
 
239
- template<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet1cd>(const Packet1cd& a)
196
+ template<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet2cf>(const Packet2cf& a)
240
197
  {
241
- return pfirst(a);
198
+ std::complex<float> EIGEN_ALIGN16 res[2];
199
+ pstore<std::complex<float> >(res, a);
200
+
201
+ return res[0];
242
202
  }
243
- template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const Packet2cf& a)
203
+
204
+
205
+ #if !defined(__ARCH__) || (defined(__ARCH__) && __ARCH__ < 12)
206
+ template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>& from)
244
207
  {
245
- std::complex<float> res;
246
- Packet1cd b = pmul<Packet1cd>(a.cd[0], a.cd[1]);
247
- vec_st2f(b.v, (float*)&res);
208
+ Packet2cf res;
209
+ res.cd[0] = Packet1cd(vec_ld2f((const float *)&from));
210
+ res.cd[1] = res.cd[0];
248
211
  return res;
249
212
  }
250
-
251
- template<int Offset>
252
- struct palign_impl<Offset,Packet1cd>
213
+ #else
214
+ template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>& from)
253
215
  {
254
- static EIGEN_STRONG_INLINE void run(Packet1cd& /*first*/, const Packet1cd& /*second*/)
255
- {
256
- // FIXME is it sure we never have to align a Packet1cd?
257
- // Even though a std::complex<double> has 16 bytes, it is not necessarily aligned on a 16 bytes boundary...
258
- }
259
- };
216
+ Packet2cf res;
217
+ if((std::ptrdiff_t(&from) % 16) == 0)
218
+ res.v = pload<Packet4f>((const float *)&from);
219
+ else
220
+ res.v = ploadu<Packet4f>((const float *)&from);
221
+ res.v = vec_perm(res.v, res.v, p16uc_PSET64_HI);
222
+ return res;
223
+ }
224
+ #endif
260
225
 
261
- template<int Offset>
262
- struct palign_impl<Offset,Packet2cf>
226
+ template<> EIGEN_DEVICE_FUNC inline Packet2cf pgather<std::complex<float>, Packet2cf>(const std::complex<float>* from, Index stride)
263
227
  {
264
- static EIGEN_STRONG_INLINE void run(Packet2cf& first, const Packet2cf& second)
265
- {
266
- if (Offset == 1) {
267
- first.cd[0] = first.cd[1];
268
- first.cd[1] = second.cd[0];
269
- }
270
- }
271
- };
272
-
273
- template<> struct conj_helper<Packet1cd, Packet1cd, false,true>
228
+ std::complex<float> EIGEN_ALIGN16 af[2];
229
+ af[0] = from[0*stride];
230
+ af[1] = from[1*stride];
231
+ return pload<Packet2cf>(af);
232
+ }
233
+ template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf>(std::complex<float>* to, const Packet2cf& from, Index stride)
274
234
  {
275
- EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
276
- { return padd(pmul(x,y),c); }
235
+ std::complex<float> EIGEN_ALIGN16 af[2];
236
+ pstore<std::complex<float> >((std::complex<float> *) af, from);
237
+ to[0*stride] = af[0];
238
+ to[1*stride] = af[1];
239
+ }
277
240
 
278
- EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
279
- {
280
- return internal::pmul(a, pconj(b));
281
- }
282
- };
241
+ template<> EIGEN_STRONG_INLINE Packet2cf padd<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(padd<Packet4f>(a.v, b.v)); }
242
+ template<> EIGEN_STRONG_INLINE Packet2cf psub<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(psub<Packet4f>(a.v, b.v)); }
243
+ template<> EIGEN_STRONG_INLINE Packet2cf pnegate(const Packet2cf& a) { return Packet2cf(pnegate(Packet4f(a.v))); }
283
244
 
284
- template<> struct conj_helper<Packet1cd, Packet1cd, true,false>
285
- {
286
- EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
287
- { return padd(pmul(x,y),c); }
245
+ template<> EIGEN_STRONG_INLINE Packet2cf pand <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(pand<Packet4f>(a.v,b.v)); }
246
+ template<> EIGEN_STRONG_INLINE Packet2cf por <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(por<Packet4f>(a.v,b.v)); }
247
+ template<> EIGEN_STRONG_INLINE Packet2cf pxor <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(pxor<Packet4f>(a.v,b.v)); }
248
+ template<> EIGEN_STRONG_INLINE Packet2cf pandnot<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(pandnot<Packet4f>(a.v,b.v)); }
288
249
 
289
- EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
290
- {
291
- return internal::pmul(pconj(a), b);
292
- }
293
- };
250
+ template<> EIGEN_STRONG_INLINE Packet2cf ploaddup<Packet2cf>(const std::complex<float>* from) { return pset1<Packet2cf>(*from); }
294
251
 
295
- template<> struct conj_helper<Packet1cd, Packet1cd, true,true>
296
- {
297
- EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
298
- { return padd(pmul(x,y),c); }
252
+ template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> * addr) { EIGEN_ZVECTOR_PREFETCH(addr); }
299
253
 
300
- EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
301
- {
302
- return pconj(internal::pmul(a, b));
303
- }
304
- };
305
254
 
306
- template<> struct conj_helper<Packet2cf, Packet2cf, false,true>
307
- {
308
- EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
309
- { return padd(pmul(x,y),c); }
255
+ #if !defined(__ARCH__) || (defined(__ARCH__) && __ARCH__ < 12)
310
256
 
311
- EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
312
- {
313
- return internal::pmul(a, pconj(b));
314
- }
315
- };
257
+ template<> EIGEN_STRONG_INLINE Packet2cf pcmp_eq(const Packet2cf& a, const Packet2cf& b) {
258
+ Packet4f eq = pcmp_eq<Packet4f> (a.v, b.v);
259
+ Packet2cf res;
260
+ Packet2d tmp1 = { eq.v4f[0][1], eq.v4f[0][0] };
261
+ Packet2d tmp2 = { eq.v4f[1][1], eq.v4f[1][0] };
262
+ res.v.v4f[0] = pand<Packet2d>(eq.v4f[0], tmp1);
263
+ res.v.v4f[1] = pand<Packet2d>(eq.v4f[1], tmp2);
264
+ return res;
265
+ }
316
266
 
317
- template<> struct conj_helper<Packet2cf, Packet2cf, true,false>
267
+ template<> EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf& a)
318
268
  {
319
- EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
320
- { return padd(pmul(x,y),c); }
321
-
322
- EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
323
- {
324
- return internal::pmul(pconj(a), b);
325
- }
326
- };
269
+ Packet2cf res;
270
+ res.v.v4f[0] = pconj(Packet1cd(reinterpret_cast<Packet2d>(a.v.v4f[0]))).v;
271
+ res.v.v4f[1] = pconj(Packet1cd(reinterpret_cast<Packet2d>(a.v.v4f[1]))).v;
272
+ return res;
273
+ }
327
274
 
328
- template<> struct conj_helper<Packet2cf, Packet2cf, true,true>
275
+ template<> EIGEN_STRONG_INLINE Packet2cf pmul<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
329
276
  {
330
- EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
331
- { return padd(pmul(x,y),c); }
277
+ Packet2cf res;
278
+ res.v.v4f[0] = pmul(Packet1cd(reinterpret_cast<Packet2d>(a.v.v4f[0])), Packet1cd(reinterpret_cast<Packet2d>(b.v.v4f[0]))).v;
279
+ res.v.v4f[1] = pmul(Packet1cd(reinterpret_cast<Packet2d>(a.v.v4f[1])), Packet1cd(reinterpret_cast<Packet2d>(b.v.v4f[1]))).v;
280
+ return res;
281
+ }
332
282
 
333
- EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
334
- {
335
- return pconj(internal::pmul(a, b));
336
- }
337
- };
283
+ template<> EIGEN_STRONG_INLINE Packet2cf preverse(const Packet2cf& a)
284
+ {
285
+ Packet2cf res;
286
+ res.cd[0] = a.cd[1];
287
+ res.cd[1] = a.cd[0];
288
+ return res;
289
+ }
338
290
 
339
- EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cf,Packet4f)
340
- EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet1cd,Packet2d)
291
+ template<> EIGEN_STRONG_INLINE std::complex<float> predux<Packet2cf>(const Packet2cf& a)
292
+ {
293
+ std::complex<float> res;
294
+ Packet1cd b = padd<Packet1cd>(a.cd[0], a.cd[1]);
295
+ vec_st2f(b.v, (float*)&res);
296
+ return res;
297
+ }
341
298
 
342
- template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
299
+ template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const Packet2cf& a)
343
300
  {
344
- // TODO optimize it for AltiVec
345
- Packet1cd res = conj_helper<Packet1cd,Packet1cd,false,true>().pmul(a,b);
346
- Packet2d s = vec_madd(b.v, b.v, p2d_ZERO_);
347
- return Packet1cd(pdiv(res.v, s + vec_perm(s, s, p16uc_REVERSE64)));
301
+ std::complex<float> res;
302
+ Packet1cd b = pmul<Packet1cd>(a.cd[0], a.cd[1]);
303
+ vec_st2f(b.v, (float*)&res);
304
+ return res;
348
305
  }
349
306
 
307
+ EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cf,Packet4f)
308
+
350
309
  template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
351
310
  {
352
311
  // TODO optimize it for AltiVec
@@ -356,11 +315,6 @@ template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, con
356
315
  return res;
357
316
  }
358
317
 
359
- EIGEN_STRONG_INLINE Packet1cd pcplxflip/*<Packet1cd>*/(const Packet1cd& x)
360
- {
361
- return Packet1cd(preverse(Packet2d(x.v)));
362
- }
363
-
364
318
  EIGEN_STRONG_INLINE Packet2cf pcplxflip/*<Packet2cf>*/(const Packet2cf& x)
365
319
  {
366
320
  Packet2cf res;
@@ -369,13 +323,6 @@ EIGEN_STRONG_INLINE Packet2cf pcplxflip/*<Packet2cf>*/(const Packet2cf& x)
369
323
  return res;
370
324
  }
371
325
 
372
- EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet1cd,2>& kernel)
373
- {
374
- Packet2d tmp = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_TRANSPOSE64_HI);
375
- kernel.packet[1].v = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_TRANSPOSE64_LO);
376
- kernel.packet[0].v = tmp;
377
- }
378
-
379
326
  EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet2cf,2>& kernel)
380
327
  {
381
328
  Packet1cd tmp = kernel.packet[0].cd[1];
@@ -389,6 +336,88 @@ template<> EIGEN_STRONG_INLINE Packet2cf pblend(const Selector<2>& ifPacket, con
389
336
  result.v = pblend<Packet4f>(ifPacket4, thenPacket.v, elsePacket.v);
390
337
  return result;
391
338
  }
339
+ #else
340
+ template<> EIGEN_STRONG_INLINE Packet2cf pcmp_eq(const Packet2cf& a, const Packet2cf& b) {
341
+ Packet4f eq = vec_cmpeq (a.v, b.v);
342
+ Packet4f tmp = { eq[1], eq[0], eq[3], eq[2] };
343
+ return (Packet2cf)pand<Packet4f>(eq, tmp);
344
+ }
345
+ template<> EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf& a) { return Packet2cf(pxor<Packet4f>(a.v, reinterpret_cast<Packet4f>(p4ui_CONJ_XOR))); }
346
+ template<> EIGEN_STRONG_INLINE Packet2cf pmul<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
347
+ {
348
+ Packet4f a_re, a_im, prod, prod_im;
349
+
350
+ // Permute and multiply the real parts of a and b
351
+ a_re = vec_perm(a.v, a.v, p16uc_PSET32_WODD);
352
+
353
+ // Get the imaginary parts of a
354
+ a_im = vec_perm(a.v, a.v, p16uc_PSET32_WEVEN);
355
+
356
+ // multiply a_im * b and get the conjugate result
357
+ prod_im = a_im * b.v;
358
+ prod_im = pxor<Packet4f>(prod_im, reinterpret_cast<Packet4f>(p4ui_CONJ_XOR));
359
+ // permute back to a proper order
360
+ prod_im = vec_perm(prod_im, prod_im, p16uc_COMPLEX32_REV);
361
+
362
+ // multiply a_re * b, add prod_im
363
+ prod = pmadd<Packet4f>(a_re, b.v, prod_im);
364
+
365
+ return Packet2cf(prod);
366
+ }
367
+
368
+ template<> EIGEN_STRONG_INLINE Packet2cf preverse(const Packet2cf& a)
369
+ {
370
+ Packet4f rev_a;
371
+ rev_a = vec_perm(a.v, a.v, p16uc_COMPLEX32_REV2);
372
+ return Packet2cf(rev_a);
373
+ }
374
+
375
+ template<> EIGEN_STRONG_INLINE std::complex<float> predux<Packet2cf>(const Packet2cf& a)
376
+ {
377
+ Packet4f b;
378
+ b = vec_sld(a.v, a.v, 8);
379
+ b = padd<Packet4f>(a.v, b);
380
+ return pfirst<Packet2cf>(Packet2cf(b));
381
+ }
382
+
383
+ template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const Packet2cf& a)
384
+ {
385
+ Packet4f b;
386
+ Packet2cf prod;
387
+ b = vec_sld(a.v, a.v, 8);
388
+ prod = pmul<Packet2cf>(a, Packet2cf(b));
389
+
390
+ return pfirst<Packet2cf>(prod);
391
+ }
392
+
393
+ EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cf,Packet4f)
394
+
395
+ template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
396
+ {
397
+ // TODO optimize it for AltiVec
398
+ Packet2cf res = pmul(a, pconj(b));
399
+ Packet4f s = pmul<Packet4f>(b.v, b.v);
400
+ return Packet2cf(pdiv(res.v, padd<Packet4f>(s, vec_perm(s, s, p16uc_COMPLEX32_REV))));
401
+ }
402
+
403
+ template<> EIGEN_STRONG_INLINE Packet2cf pcplxflip<Packet2cf>(const Packet2cf& x)
404
+ {
405
+ return Packet2cf(vec_perm(x.v, x.v, p16uc_COMPLEX32_REV));
406
+ }
407
+
408
+ EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet2cf,2>& kernel)
409
+ {
410
+ Packet4f tmp = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_TRANSPOSE64_HI);
411
+ kernel.packet[1].v = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_TRANSPOSE64_LO);
412
+ kernel.packet[0].v = tmp;
413
+ }
414
+
415
+ template<> EIGEN_STRONG_INLINE Packet2cf pblend(const Selector<2>& ifPacket, const Packet2cf& thenPacket, const Packet2cf& elsePacket) {
416
+ Packet2cf result;
417
+ result.v = reinterpret_cast<Packet4f>(pblend<Packet2d>(ifPacket, reinterpret_cast<Packet2d>(thenPacket.v), reinterpret_cast<Packet2d>(elsePacket.v)));
418
+ return result;
419
+ }
420
+ #endif
392
421
 
393
422
  } // end namespace internal
394
423