ruby-eigen 0.0.9 → 0.0.10.pre1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (293) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE +22 -0
  3. data/README.md +21 -0
  4. data/ext/eigen/eigen3/COPYING.BSD +26 -0
  5. data/ext/eigen/eigen3/COPYING.MPL2 +373 -0
  6. data/ext/eigen/eigen3/COPYING.README +18 -0
  7. data/ext/eigen/eigen3/Eigen/Array +11 -0
  8. data/ext/eigen/eigen3/Eigen/Cholesky +32 -0
  9. data/ext/eigen/eigen3/Eigen/CholmodSupport +45 -0
  10. data/ext/eigen/eigen3/Eigen/Core +376 -0
  11. data/ext/eigen/eigen3/Eigen/Dense +7 -0
  12. data/ext/eigen/eigen3/Eigen/Eigen +2 -0
  13. data/ext/eigen/eigen3/Eigen/Eigen2Support +95 -0
  14. data/ext/eigen/eigen3/Eigen/Eigenvalues +48 -0
  15. data/ext/eigen/eigen3/Eigen/Geometry +63 -0
  16. data/ext/eigen/eigen3/Eigen/Householder +23 -0
  17. data/ext/eigen/eigen3/Eigen/IterativeLinearSolvers +40 -0
  18. data/ext/eigen/eigen3/Eigen/Jacobi +26 -0
  19. data/ext/eigen/eigen3/Eigen/LU +41 -0
  20. data/ext/eigen/eigen3/Eigen/LeastSquares +32 -0
  21. data/ext/eigen/eigen3/Eigen/MetisSupport +28 -0
  22. data/ext/eigen/eigen3/Eigen/PaStiXSupport +46 -0
  23. data/ext/eigen/eigen3/Eigen/PardisoSupport +30 -0
  24. data/ext/eigen/eigen3/Eigen/QR +45 -0
  25. data/ext/eigen/eigen3/Eigen/QtAlignedMalloc +34 -0
  26. data/ext/eigen/eigen3/Eigen/SPQRSupport +29 -0
  27. data/ext/eigen/eigen3/Eigen/SVD +37 -0
  28. data/ext/eigen/eigen3/Eigen/Sparse +27 -0
  29. data/ext/eigen/eigen3/Eigen/SparseCore +64 -0
  30. data/ext/eigen/eigen3/Eigen/SparseLU +49 -0
  31. data/ext/eigen/eigen3/Eigen/SparseQR +33 -0
  32. data/ext/eigen/eigen3/Eigen/StdDeque +27 -0
  33. data/ext/eigen/eigen3/Eigen/StdList +26 -0
  34. data/ext/eigen/eigen3/Eigen/StdVector +27 -0
  35. data/ext/eigen/eigen3/Eigen/SuperLUSupport +59 -0
  36. data/ext/eigen/eigen3/Eigen/UmfPackSupport +36 -0
  37. data/ext/eigen/eigen3/Eigen/src/Cholesky/LDLT.h +611 -0
  38. data/ext/eigen/eigen3/Eigen/src/Cholesky/LLT.h +498 -0
  39. data/ext/eigen/eigen3/Eigen/src/Cholesky/LLT_MKL.h +102 -0
  40. data/ext/eigen/eigen3/Eigen/src/CholmodSupport/CholmodSupport.h +607 -0
  41. data/ext/eigen/eigen3/Eigen/src/Core/Array.h +323 -0
  42. data/ext/eigen/eigen3/Eigen/src/Core/ArrayBase.h +226 -0
  43. data/ext/eigen/eigen3/Eigen/src/Core/ArrayWrapper.h +264 -0
  44. data/ext/eigen/eigen3/Eigen/src/Core/Assign.h +590 -0
  45. data/ext/eigen/eigen3/Eigen/src/Core/Assign_MKL.h +224 -0
  46. data/ext/eigen/eigen3/Eigen/src/Core/BandMatrix.h +334 -0
  47. data/ext/eigen/eigen3/Eigen/src/Core/Block.h +406 -0
  48. data/ext/eigen/eigen3/Eigen/src/Core/BooleanRedux.h +154 -0
  49. data/ext/eigen/eigen3/Eigen/src/Core/CommaInitializer.h +154 -0
  50. data/ext/eigen/eigen3/Eigen/src/Core/CoreIterators.h +61 -0
  51. data/ext/eigen/eigen3/Eigen/src/Core/CwiseBinaryOp.h +230 -0
  52. data/ext/eigen/eigen3/Eigen/src/Core/CwiseNullaryOp.h +864 -0
  53. data/ext/eigen/eigen3/Eigen/src/Core/CwiseUnaryOp.h +126 -0
  54. data/ext/eigen/eigen3/Eigen/src/Core/CwiseUnaryView.h +139 -0
  55. data/ext/eigen/eigen3/Eigen/src/Core/DenseBase.h +521 -0
  56. data/ext/eigen/eigen3/Eigen/src/Core/DenseCoeffsBase.h +754 -0
  57. data/ext/eigen/eigen3/Eigen/src/Core/DenseStorage.h +434 -0
  58. data/ext/eigen/eigen3/Eigen/src/Core/Diagonal.h +237 -0
  59. data/ext/eigen/eigen3/Eigen/src/Core/DiagonalMatrix.h +313 -0
  60. data/ext/eigen/eigen3/Eigen/src/Core/DiagonalProduct.h +131 -0
  61. data/ext/eigen/eigen3/Eigen/src/Core/Dot.h +263 -0
  62. data/ext/eigen/eigen3/Eigen/src/Core/EigenBase.h +131 -0
  63. data/ext/eigen/eigen3/Eigen/src/Core/Flagged.h +140 -0
  64. data/ext/eigen/eigen3/Eigen/src/Core/ForceAlignedAccess.h +146 -0
  65. data/ext/eigen/eigen3/Eigen/src/Core/Functors.h +1026 -0
  66. data/ext/eigen/eigen3/Eigen/src/Core/Fuzzy.h +150 -0
  67. data/ext/eigen/eigen3/Eigen/src/Core/GeneralProduct.h +635 -0
  68. data/ext/eigen/eigen3/Eigen/src/Core/GenericPacketMath.h +350 -0
  69. data/ext/eigen/eigen3/Eigen/src/Core/GlobalFunctions.h +92 -0
  70. data/ext/eigen/eigen3/Eigen/src/Core/IO.h +250 -0
  71. data/ext/eigen/eigen3/Eigen/src/Core/Map.h +192 -0
  72. data/ext/eigen/eigen3/Eigen/src/Core/MapBase.h +247 -0
  73. data/ext/eigen/eigen3/Eigen/src/Core/MathFunctions.h +768 -0
  74. data/ext/eigen/eigen3/Eigen/src/Core/Matrix.h +420 -0
  75. data/ext/eigen/eigen3/Eigen/src/Core/MatrixBase.h +563 -0
  76. data/ext/eigen/eigen3/Eigen/src/Core/NestByValue.h +111 -0
  77. data/ext/eigen/eigen3/Eigen/src/Core/NoAlias.h +134 -0
  78. data/ext/eigen/eigen3/Eigen/src/Core/NumTraits.h +150 -0
  79. data/ext/eigen/eigen3/Eigen/src/Core/PermutationMatrix.h +721 -0
  80. data/ext/eigen/eigen3/Eigen/src/Core/PlainObjectBase.h +822 -0
  81. data/ext/eigen/eigen3/Eigen/src/Core/ProductBase.h +290 -0
  82. data/ext/eigen/eigen3/Eigen/src/Core/Random.h +152 -0
  83. data/ext/eigen/eigen3/Eigen/src/Core/Redux.h +409 -0
  84. data/ext/eigen/eigen3/Eigen/src/Core/Ref.h +278 -0
  85. data/ext/eigen/eigen3/Eigen/src/Core/Replicate.h +177 -0
  86. data/ext/eigen/eigen3/Eigen/src/Core/ReturnByValue.h +99 -0
  87. data/ext/eigen/eigen3/Eigen/src/Core/Reverse.h +224 -0
  88. data/ext/eigen/eigen3/Eigen/src/Core/Select.h +162 -0
  89. data/ext/eigen/eigen3/Eigen/src/Core/SelfAdjointView.h +314 -0
  90. data/ext/eigen/eigen3/Eigen/src/Core/SelfCwiseBinaryOp.h +191 -0
  91. data/ext/eigen/eigen3/Eigen/src/Core/SolveTriangular.h +260 -0
  92. data/ext/eigen/eigen3/Eigen/src/Core/StableNorm.h +203 -0
  93. data/ext/eigen/eigen3/Eigen/src/Core/Stride.h +108 -0
  94. data/ext/eigen/eigen3/Eigen/src/Core/Swap.h +126 -0
  95. data/ext/eigen/eigen3/Eigen/src/Core/Transpose.h +419 -0
  96. data/ext/eigen/eigen3/Eigen/src/Core/Transpositions.h +436 -0
  97. data/ext/eigen/eigen3/Eigen/src/Core/TriangularMatrix.h +839 -0
  98. data/ext/eigen/eigen3/Eigen/src/Core/VectorBlock.h +95 -0
  99. data/ext/eigen/eigen3/Eigen/src/Core/VectorwiseOp.h +642 -0
  100. data/ext/eigen/eigen3/Eigen/src/Core/Visitor.h +237 -0
  101. data/ext/eigen/eigen3/Eigen/src/Core/arch/AltiVec/Complex.h +217 -0
  102. data/ext/eigen/eigen3/Eigen/src/Core/arch/AltiVec/PacketMath.h +501 -0
  103. data/ext/eigen/eigen3/Eigen/src/Core/arch/Default/Settings.h +49 -0
  104. data/ext/eigen/eigen3/Eigen/src/Core/arch/NEON/Complex.h +253 -0
  105. data/ext/eigen/eigen3/Eigen/src/Core/arch/NEON/PacketMath.h +420 -0
  106. data/ext/eigen/eigen3/Eigen/src/Core/arch/SSE/Complex.h +442 -0
  107. data/ext/eigen/eigen3/Eigen/src/Core/arch/SSE/MathFunctions.h +475 -0
  108. data/ext/eigen/eigen3/Eigen/src/Core/arch/SSE/PacketMath.h +649 -0
  109. data/ext/eigen/eigen3/Eigen/src/Core/products/CoeffBasedProduct.h +476 -0
  110. data/ext/eigen/eigen3/Eigen/src/Core/products/GeneralBlockPanelKernel.h +1341 -0
  111. data/ext/eigen/eigen3/Eigen/src/Core/products/GeneralMatrixMatrix.h +427 -0
  112. data/ext/eigen/eigen3/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +278 -0
  113. data/ext/eigen/eigen3/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_MKL.h +146 -0
  114. data/ext/eigen/eigen3/Eigen/src/Core/products/GeneralMatrixMatrix_MKL.h +118 -0
  115. data/ext/eigen/eigen3/Eigen/src/Core/products/GeneralMatrixVector.h +566 -0
  116. data/ext/eigen/eigen3/Eigen/src/Core/products/GeneralMatrixVector_MKL.h +131 -0
  117. data/ext/eigen/eigen3/Eigen/src/Core/products/Parallelizer.h +162 -0
  118. data/ext/eigen/eigen3/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +436 -0
  119. data/ext/eigen/eigen3/Eigen/src/Core/products/SelfadjointMatrixMatrix_MKL.h +295 -0
  120. data/ext/eigen/eigen3/Eigen/src/Core/products/SelfadjointMatrixVector.h +281 -0
  121. data/ext/eigen/eigen3/Eigen/src/Core/products/SelfadjointMatrixVector_MKL.h +114 -0
  122. data/ext/eigen/eigen3/Eigen/src/Core/products/SelfadjointProduct.h +123 -0
  123. data/ext/eigen/eigen3/Eigen/src/Core/products/SelfadjointRank2Update.h +93 -0
  124. data/ext/eigen/eigen3/Eigen/src/Core/products/TriangularMatrixMatrix.h +427 -0
  125. data/ext/eigen/eigen3/Eigen/src/Core/products/TriangularMatrixMatrix_MKL.h +309 -0
  126. data/ext/eigen/eigen3/Eigen/src/Core/products/TriangularMatrixVector.h +348 -0
  127. data/ext/eigen/eigen3/Eigen/src/Core/products/TriangularMatrixVector_MKL.h +247 -0
  128. data/ext/eigen/eigen3/Eigen/src/Core/products/TriangularSolverMatrix.h +332 -0
  129. data/ext/eigen/eigen3/Eigen/src/Core/products/TriangularSolverMatrix_MKL.h +155 -0
  130. data/ext/eigen/eigen3/Eigen/src/Core/products/TriangularSolverVector.h +139 -0
  131. data/ext/eigen/eigen3/Eigen/src/Core/util/BlasUtil.h +264 -0
  132. data/ext/eigen/eigen3/Eigen/src/Core/util/Constants.h +451 -0
  133. data/ext/eigen/eigen3/Eigen/src/Core/util/DisableStupidWarnings.h +40 -0
  134. data/ext/eigen/eigen3/Eigen/src/Core/util/ForwardDeclarations.h +302 -0
  135. data/ext/eigen/eigen3/Eigen/src/Core/util/MKL_support.h +158 -0
  136. data/ext/eigen/eigen3/Eigen/src/Core/util/Macros.h +451 -0
  137. data/ext/eigen/eigen3/Eigen/src/Core/util/Memory.h +977 -0
  138. data/ext/eigen/eigen3/Eigen/src/Core/util/Meta.h +243 -0
  139. data/ext/eigen/eigen3/Eigen/src/Core/util/NonMPL2.h +3 -0
  140. data/ext/eigen/eigen3/Eigen/src/Core/util/ReenableStupidWarnings.h +14 -0
  141. data/ext/eigen/eigen3/Eigen/src/Core/util/StaticAssert.h +208 -0
  142. data/ext/eigen/eigen3/Eigen/src/Core/util/XprHelper.h +469 -0
  143. data/ext/eigen/eigen3/Eigen/src/Eigen2Support/Block.h +126 -0
  144. data/ext/eigen/eigen3/Eigen/src/Eigen2Support/Cwise.h +192 -0
  145. data/ext/eigen/eigen3/Eigen/src/Eigen2Support/CwiseOperators.h +298 -0
  146. data/ext/eigen/eigen3/Eigen/src/Eigen2Support/Geometry/AlignedBox.h +159 -0
  147. data/ext/eigen/eigen3/Eigen/src/Eigen2Support/Geometry/All.h +115 -0
  148. data/ext/eigen/eigen3/Eigen/src/Eigen2Support/Geometry/AngleAxis.h +214 -0
  149. data/ext/eigen/eigen3/Eigen/src/Eigen2Support/Geometry/Hyperplane.h +254 -0
  150. data/ext/eigen/eigen3/Eigen/src/Eigen2Support/Geometry/ParametrizedLine.h +141 -0
  151. data/ext/eigen/eigen3/Eigen/src/Eigen2Support/Geometry/Quaternion.h +495 -0
  152. data/ext/eigen/eigen3/Eigen/src/Eigen2Support/Geometry/Rotation2D.h +145 -0
  153. data/ext/eigen/eigen3/Eigen/src/Eigen2Support/Geometry/RotationBase.h +123 -0
  154. data/ext/eigen/eigen3/Eigen/src/Eigen2Support/Geometry/Scaling.h +167 -0
  155. data/ext/eigen/eigen3/Eigen/src/Eigen2Support/Geometry/Transform.h +786 -0
  156. data/ext/eigen/eigen3/Eigen/src/Eigen2Support/Geometry/Translation.h +184 -0
  157. data/ext/eigen/eigen3/Eigen/src/Eigen2Support/LU.h +120 -0
  158. data/ext/eigen/eigen3/Eigen/src/Eigen2Support/Lazy.h +71 -0
  159. data/ext/eigen/eigen3/Eigen/src/Eigen2Support/LeastSquares.h +169 -0
  160. data/ext/eigen/eigen3/Eigen/src/Eigen2Support/Macros.h +20 -0
  161. data/ext/eigen/eigen3/Eigen/src/Eigen2Support/MathFunctions.h +57 -0
  162. data/ext/eigen/eigen3/Eigen/src/Eigen2Support/Memory.h +45 -0
  163. data/ext/eigen/eigen3/Eigen/src/Eigen2Support/Meta.h +75 -0
  164. data/ext/eigen/eigen3/Eigen/src/Eigen2Support/Minor.h +117 -0
  165. data/ext/eigen/eigen3/Eigen/src/Eigen2Support/QR.h +67 -0
  166. data/ext/eigen/eigen3/Eigen/src/Eigen2Support/SVD.h +637 -0
  167. data/ext/eigen/eigen3/Eigen/src/Eigen2Support/TriangularSolver.h +42 -0
  168. data/ext/eigen/eigen3/Eigen/src/Eigen2Support/VectorBlock.h +94 -0
  169. data/ext/eigen/eigen3/Eigen/src/Eigenvalues/ComplexEigenSolver.h +341 -0
  170. data/ext/eigen/eigen3/Eigen/src/Eigenvalues/ComplexSchur.h +456 -0
  171. data/ext/eigen/eigen3/Eigen/src/Eigenvalues/ComplexSchur_MKL.h +94 -0
  172. data/ext/eigen/eigen3/Eigen/src/Eigenvalues/EigenSolver.h +607 -0
  173. data/ext/eigen/eigen3/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h +350 -0
  174. data/ext/eigen/eigen3/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +227 -0
  175. data/ext/eigen/eigen3/Eigen/src/Eigenvalues/HessenbergDecomposition.h +373 -0
  176. data/ext/eigen/eigen3/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +160 -0
  177. data/ext/eigen/eigen3/Eigen/src/Eigenvalues/RealQZ.h +624 -0
  178. data/ext/eigen/eigen3/Eigen/src/Eigenvalues/RealSchur.h +525 -0
  179. data/ext/eigen/eigen3/Eigen/src/Eigenvalues/RealSchur_MKL.h +83 -0
  180. data/ext/eigen/eigen3/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +801 -0
  181. data/ext/eigen/eigen3/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_MKL.h +92 -0
  182. data/ext/eigen/eigen3/Eigen/src/Eigenvalues/Tridiagonalization.h +557 -0
  183. data/ext/eigen/eigen3/Eigen/src/Geometry/AlignedBox.h +392 -0
  184. data/ext/eigen/eigen3/Eigen/src/Geometry/AngleAxis.h +233 -0
  185. data/ext/eigen/eigen3/Eigen/src/Geometry/EulerAngles.h +104 -0
  186. data/ext/eigen/eigen3/Eigen/src/Geometry/Homogeneous.h +307 -0
  187. data/ext/eigen/eigen3/Eigen/src/Geometry/Hyperplane.h +280 -0
  188. data/ext/eigen/eigen3/Eigen/src/Geometry/OrthoMethods.h +218 -0
  189. data/ext/eigen/eigen3/Eigen/src/Geometry/ParametrizedLine.h +195 -0
  190. data/ext/eigen/eigen3/Eigen/src/Geometry/Quaternion.h +776 -0
  191. data/ext/eigen/eigen3/Eigen/src/Geometry/Rotation2D.h +160 -0
  192. data/ext/eigen/eigen3/Eigen/src/Geometry/RotationBase.h +206 -0
  193. data/ext/eigen/eigen3/Eigen/src/Geometry/Scaling.h +166 -0
  194. data/ext/eigen/eigen3/Eigen/src/Geometry/Transform.h +1455 -0
  195. data/ext/eigen/eigen3/Eigen/src/Geometry/Translation.h +206 -0
  196. data/ext/eigen/eigen3/Eigen/src/Geometry/Umeyama.h +177 -0
  197. data/ext/eigen/eigen3/Eigen/src/Geometry/arch/Geometry_SSE.h +115 -0
  198. data/ext/eigen/eigen3/Eigen/src/Householder/BlockHouseholder.h +68 -0
  199. data/ext/eigen/eigen3/Eigen/src/Householder/Householder.h +171 -0
  200. data/ext/eigen/eigen3/Eigen/src/Householder/HouseholderSequence.h +441 -0
  201. data/ext/eigen/eigen3/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +149 -0
  202. data/ext/eigen/eigen3/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +263 -0
  203. data/ext/eigen/eigen3/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +256 -0
  204. data/ext/eigen/eigen3/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +282 -0
  205. data/ext/eigen/eigen3/Eigen/src/Jacobi/Jacobi.h +433 -0
  206. data/ext/eigen/eigen3/Eigen/src/LU/Determinant.h +101 -0
  207. data/ext/eigen/eigen3/Eigen/src/LU/FullPivLU.h +751 -0
  208. data/ext/eigen/eigen3/Eigen/src/LU/Inverse.h +400 -0
  209. data/ext/eigen/eigen3/Eigen/src/LU/PartialPivLU.h +509 -0
  210. data/ext/eigen/eigen3/Eigen/src/LU/PartialPivLU_MKL.h +85 -0
  211. data/ext/eigen/eigen3/Eigen/src/LU/arch/Inverse_SSE.h +329 -0
  212. data/ext/eigen/eigen3/Eigen/src/MetisSupport/MetisSupport.h +137 -0
  213. data/ext/eigen/eigen3/Eigen/src/OrderingMethods/Amd.h +444 -0
  214. data/ext/eigen/eigen3/Eigen/src/OrderingMethods/Eigen_Colamd.h +1850 -0
  215. data/ext/eigen/eigen3/Eigen/src/PaStiXSupport/PaStiXSupport.h +721 -0
  216. data/ext/eigen/eigen3/Eigen/src/PardisoSupport/PardisoSupport.h +592 -0
  217. data/ext/eigen/eigen3/Eigen/src/QR/ColPivHouseholderQR.h +580 -0
  218. data/ext/eigen/eigen3/Eigen/src/QR/ColPivHouseholderQR_MKL.h +99 -0
  219. data/ext/eigen/eigen3/Eigen/src/QR/FullPivHouseholderQR.h +622 -0
  220. data/ext/eigen/eigen3/Eigen/src/QR/HouseholderQR.h +388 -0
  221. data/ext/eigen/eigen3/Eigen/src/QR/HouseholderQR_MKL.h +71 -0
  222. data/ext/eigen/eigen3/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +338 -0
  223. data/ext/eigen/eigen3/Eigen/src/SVD/JacobiSVD.h +976 -0
  224. data/ext/eigen/eigen3/Eigen/src/SVD/JacobiSVD_MKL.h +92 -0
  225. data/ext/eigen/eigen3/Eigen/src/SVD/UpperBidiagonalization.h +148 -0
  226. data/ext/eigen/eigen3/Eigen/src/SparseCholesky/SimplicialCholesky.h +671 -0
  227. data/ext/eigen/eigen3/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +199 -0
  228. data/ext/eigen/eigen3/Eigen/src/SparseCore/AmbiVector.h +373 -0
  229. data/ext/eigen/eigen3/Eigen/src/SparseCore/CompressedStorage.h +233 -0
  230. data/ext/eigen/eigen3/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +245 -0
  231. data/ext/eigen/eigen3/Eigen/src/SparseCore/MappedSparseMatrix.h +181 -0
  232. data/ext/eigen/eigen3/Eigen/src/SparseCore/SparseBlock.h +537 -0
  233. data/ext/eigen/eigen3/Eigen/src/SparseCore/SparseColEtree.h +206 -0
  234. data/ext/eigen/eigen3/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +325 -0
  235. data/ext/eigen/eigen3/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +163 -0
  236. data/ext/eigen/eigen3/Eigen/src/SparseCore/SparseDenseProduct.h +311 -0
  237. data/ext/eigen/eigen3/Eigen/src/SparseCore/SparseDiagonalProduct.h +196 -0
  238. data/ext/eigen/eigen3/Eigen/src/SparseCore/SparseDot.h +101 -0
  239. data/ext/eigen/eigen3/Eigen/src/SparseCore/SparseFuzzy.h +26 -0
  240. data/ext/eigen/eigen3/Eigen/src/SparseCore/SparseMatrix.h +1262 -0
  241. data/ext/eigen/eigen3/Eigen/src/SparseCore/SparseMatrixBase.h +461 -0
  242. data/ext/eigen/eigen3/Eigen/src/SparseCore/SparsePermutation.h +148 -0
  243. data/ext/eigen/eigen3/Eigen/src/SparseCore/SparseProduct.h +188 -0
  244. data/ext/eigen/eigen3/Eigen/src/SparseCore/SparseRedux.h +45 -0
  245. data/ext/eigen/eigen3/Eigen/src/SparseCore/SparseSelfAdjointView.h +507 -0
  246. data/ext/eigen/eigen3/Eigen/src/SparseCore/SparseSparseProductWithPruning.h +150 -0
  247. data/ext/eigen/eigen3/Eigen/src/SparseCore/SparseTranspose.h +63 -0
  248. data/ext/eigen/eigen3/Eigen/src/SparseCore/SparseTriangularView.h +179 -0
  249. data/ext/eigen/eigen3/Eigen/src/SparseCore/SparseUtil.h +172 -0
  250. data/ext/eigen/eigen3/Eigen/src/SparseCore/SparseVector.h +448 -0
  251. data/ext/eigen/eigen3/Eigen/src/SparseCore/SparseView.h +99 -0
  252. data/ext/eigen/eigen3/Eigen/src/SparseCore/TriangularSolver.h +334 -0
  253. data/ext/eigen/eigen3/Eigen/src/SparseLU/SparseLU.h +806 -0
  254. data/ext/eigen/eigen3/Eigen/src/SparseLU/SparseLUImpl.h +66 -0
  255. data/ext/eigen/eigen3/Eigen/src/SparseLU/SparseLU_Memory.h +227 -0
  256. data/ext/eigen/eigen3/Eigen/src/SparseLU/SparseLU_Structs.h +111 -0
  257. data/ext/eigen/eigen3/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +298 -0
  258. data/ext/eigen/eigen3/Eigen/src/SparseLU/SparseLU_Utils.h +80 -0
  259. data/ext/eigen/eigen3/Eigen/src/SparseLU/SparseLU_column_bmod.h +180 -0
  260. data/ext/eigen/eigen3/Eigen/src/SparseLU/SparseLU_column_dfs.h +177 -0
  261. data/ext/eigen/eigen3/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h +106 -0
  262. data/ext/eigen/eigen3/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +279 -0
  263. data/ext/eigen/eigen3/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h +127 -0
  264. data/ext/eigen/eigen3/Eigen/src/SparseLU/SparseLU_kernel_bmod.h +130 -0
  265. data/ext/eigen/eigen3/Eigen/src/SparseLU/SparseLU_panel_bmod.h +223 -0
  266. data/ext/eigen/eigen3/Eigen/src/SparseLU/SparseLU_panel_dfs.h +258 -0
  267. data/ext/eigen/eigen3/Eigen/src/SparseLU/SparseLU_pivotL.h +137 -0
  268. data/ext/eigen/eigen3/Eigen/src/SparseLU/SparseLU_pruneL.h +135 -0
  269. data/ext/eigen/eigen3/Eigen/src/SparseLU/SparseLU_relax_snode.h +83 -0
  270. data/ext/eigen/eigen3/Eigen/src/SparseQR/SparseQR.h +714 -0
  271. data/ext/eigen/eigen3/Eigen/src/StlSupport/StdDeque.h +134 -0
  272. data/ext/eigen/eigen3/Eigen/src/StlSupport/StdList.h +114 -0
  273. data/ext/eigen/eigen3/Eigen/src/StlSupport/StdVector.h +126 -0
  274. data/ext/eigen/eigen3/Eigen/src/StlSupport/details.h +84 -0
  275. data/ext/eigen/eigen3/Eigen/src/SuperLUSupport/SuperLUSupport.h +1026 -0
  276. data/ext/eigen/eigen3/Eigen/src/UmfPackSupport/UmfPackSupport.h +474 -0
  277. data/ext/eigen/eigen3/Eigen/src/misc/Image.h +84 -0
  278. data/ext/eigen/eigen3/Eigen/src/misc/Kernel.h +81 -0
  279. data/ext/eigen/eigen3/Eigen/src/misc/Solve.h +76 -0
  280. data/ext/eigen/eigen3/Eigen/src/misc/SparseSolve.h +128 -0
  281. data/ext/eigen/eigen3/Eigen/src/misc/blas.h +658 -0
  282. data/ext/eigen/eigen3/Eigen/src/plugins/ArrayCwiseBinaryOps.h +253 -0
  283. data/ext/eigen/eigen3/Eigen/src/plugins/ArrayCwiseUnaryOps.h +187 -0
  284. data/ext/eigen/eigen3/Eigen/src/plugins/BlockMethods.h +935 -0
  285. data/ext/eigen/eigen3/Eigen/src/plugins/CommonCwiseBinaryOps.h +46 -0
  286. data/ext/eigen/eigen3/Eigen/src/plugins/CommonCwiseUnaryOps.h +172 -0
  287. data/ext/eigen/eigen3/Eigen/src/plugins/MatrixCwiseBinaryOps.h +143 -0
  288. data/ext/eigen/eigen3/Eigen/src/plugins/MatrixCwiseUnaryOps.h +52 -0
  289. data/ext/eigen/eigen3/signature_of_eigen3_matrix_library +1 -0
  290. data/ext/eigen/eigen_wrap.cxx +19420 -10396
  291. data/ext/eigen/extconf.rb +37 -2
  292. data/lib/eigen.rb +146 -3
  293. metadata +294 -7
@@ -0,0 +1,49 @@
1
+ // This file is part of Eigen, a lightweight C++ template library
2
+ // for linear algebra.
3
+ //
4
+ // Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
5
+ // Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
6
+ //
7
+ // This Source Code Form is subject to the terms of the Mozilla
8
+ // Public License v. 2.0. If a copy of the MPL was not distributed
9
+ // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
10
+
11
+
12
+ /* All the parameters defined in this file can be specialized in the
13
+ * architecture specific files, and/or by the user.
14
+ * More to come... */
15
+
16
+ #ifndef EIGEN_DEFAULT_SETTINGS_H
17
+ #define EIGEN_DEFAULT_SETTINGS_H
18
+
19
+ /** Defines the maximal loop size to enable meta unrolling of loops.
20
+ * Note that the value here is expressed in Eigen's own notion of "number of FLOPS",
21
+ * it does not correspond to the number of iterations or the number of instructions
22
+ */
23
+ #ifndef EIGEN_UNROLLING_LIMIT
24
+ #define EIGEN_UNROLLING_LIMIT 100
25
+ #endif
26
+
27
+ /** Defines the threshold between a "small" and a "large" matrix.
28
+ * This threshold is mainly used to select the proper product implementation.
29
+ */
30
+ #ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
31
+ #define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 8
32
+ #endif
33
+
34
+ /** Defines the maximal width of the blocks used in the triangular product and solver
35
+ * for vectors (level 2 blas xTRMV and xTRSV). The default is 8.
36
+ */
37
+ #ifndef EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH
38
+ #define EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH 8
39
+ #endif
40
+
41
+
42
+ /** Defines the default number of registers available for that architecture.
43
+ * Currently it must be 8 or 16. Other values will fail.
44
+ */
45
+ #ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS
46
+ #define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 8
47
+ #endif
48
+
49
+ #endif // EIGEN_DEFAULT_SETTINGS_H
@@ -0,0 +1,253 @@
1
+ // This file is part of Eigen, a lightweight C++ template library
2
+ // for linear algebra.
3
+ //
4
+ // Copyright (C) 2010 Gael Guennebaud <gael.guennebaud@inria.fr>
5
+ //
6
+ // This Source Code Form is subject to the terms of the Mozilla
7
+ // Public License v. 2.0. If a copy of the MPL was not distributed
8
+ // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9
+
10
+ #ifndef EIGEN_COMPLEX_NEON_H
11
+ #define EIGEN_COMPLEX_NEON_H
12
+
13
+ namespace Eigen {
14
+
15
+ namespace internal {
16
+
17
+ static uint32x4_t p4ui_CONJ_XOR = EIGEN_INIT_NEON_PACKET4(0x00000000, 0x80000000, 0x00000000, 0x80000000);
18
+ static uint32x2_t p2ui_CONJ_XOR = EIGEN_INIT_NEON_PACKET2(0x00000000, 0x80000000);
19
+
20
+ //---------- float ----------
21
+ struct Packet2cf
22
+ {
23
+ EIGEN_STRONG_INLINE Packet2cf() {}
24
+ EIGEN_STRONG_INLINE explicit Packet2cf(const Packet4f& a) : v(a) {}
25
+ Packet4f v;
26
+ };
27
+
28
+ template<> struct packet_traits<std::complex<float> > : default_packet_traits
29
+ {
30
+ typedef Packet2cf type;
31
+ enum {
32
+ Vectorizable = 1,
33
+ AlignedOnScalar = 1,
34
+ size = 2,
35
+
36
+ HasAdd = 1,
37
+ HasSub = 1,
38
+ HasMul = 1,
39
+ HasDiv = 1,
40
+ HasNegate = 1,
41
+ HasAbs = 0,
42
+ HasAbs2 = 0,
43
+ HasMin = 0,
44
+ HasMax = 0,
45
+ HasSetLinear = 0
46
+ };
47
+ };
48
+
49
+ template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2}; };
50
+
51
+ template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>& from)
52
+ {
53
+ float32x2_t r64;
54
+ r64 = vld1_f32((float *)&from);
55
+
56
+ return Packet2cf(vcombine_f32(r64, r64));
57
+ }
58
+
59
+ template<> EIGEN_STRONG_INLINE Packet2cf padd<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(padd<Packet4f>(a.v,b.v)); }
60
+ template<> EIGEN_STRONG_INLINE Packet2cf psub<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(psub<Packet4f>(a.v,b.v)); }
61
+ template<> EIGEN_STRONG_INLINE Packet2cf pnegate(const Packet2cf& a) { return Packet2cf(pnegate<Packet4f>(a.v)); }
62
+ template<> EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf& a)
63
+ {
64
+ Packet4ui b = vreinterpretq_u32_f32(a.v);
65
+ return Packet2cf(vreinterpretq_f32_u32(veorq_u32(b, p4ui_CONJ_XOR)));
66
+ }
67
+
68
+ template<> EIGEN_STRONG_INLINE Packet2cf pmul<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
69
+ {
70
+ Packet4f v1, v2;
71
+
72
+ // Get the real values of a | a1_re | a1_re | a2_re | a2_re |
73
+ v1 = vcombine_f32(vdup_lane_f32(vget_low_f32(a.v), 0), vdup_lane_f32(vget_high_f32(a.v), 0));
74
+ // Get the real values of a | a1_im | a1_im | a2_im | a2_im |
75
+ v2 = vcombine_f32(vdup_lane_f32(vget_low_f32(a.v), 1), vdup_lane_f32(vget_high_f32(a.v), 1));
76
+ // Multiply the real a with b
77
+ v1 = vmulq_f32(v1, b.v);
78
+ // Multiply the imag a with b
79
+ v2 = vmulq_f32(v2, b.v);
80
+ // Conjugate v2
81
+ v2 = vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(v2), p4ui_CONJ_XOR));
82
+ // Swap real/imag elements in v2.
83
+ v2 = vrev64q_f32(v2);
84
+ // Add and return the result
85
+ return Packet2cf(vaddq_f32(v1, v2));
86
+ }
87
+
88
+ template<> EIGEN_STRONG_INLINE Packet2cf pand <Packet2cf>(const Packet2cf& a, const Packet2cf& b)
89
+ {
90
+ return Packet2cf(vreinterpretq_f32_u32(vorrq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v))));
91
+ }
92
+ template<> EIGEN_STRONG_INLINE Packet2cf por <Packet2cf>(const Packet2cf& a, const Packet2cf& b)
93
+ {
94
+ return Packet2cf(vreinterpretq_f32_u32(vorrq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v))));
95
+ }
96
+ template<> EIGEN_STRONG_INLINE Packet2cf pxor <Packet2cf>(const Packet2cf& a, const Packet2cf& b)
97
+ {
98
+ return Packet2cf(vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v))));
99
+ }
100
+ template<> EIGEN_STRONG_INLINE Packet2cf pandnot<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
101
+ {
102
+ return Packet2cf(vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v))));
103
+ }
104
+
105
+ template<> EIGEN_STRONG_INLINE Packet2cf pload<Packet2cf>(const std::complex<float>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet2cf(pload<Packet4f>((const float*)from)); }
106
+ template<> EIGEN_STRONG_INLINE Packet2cf ploadu<Packet2cf>(const std::complex<float>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cf(ploadu<Packet4f>((const float*)from)); }
107
+
108
+ template<> EIGEN_STRONG_INLINE Packet2cf ploaddup<Packet2cf>(const std::complex<float>* from) { return pset1<Packet2cf>(*from); }
109
+
110
+ template<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((float*)to, from.v); }
111
+ template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((float*)to, from.v); }
112
+
113
+ template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> * addr) { EIGEN_ARM_PREFETCH((float *)addr); }
114
+
115
+ template<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet2cf>(const Packet2cf& a)
116
+ {
117
+ std::complex<float> EIGEN_ALIGN16 x[2];
118
+ vst1q_f32((float *)x, a.v);
119
+ return x[0];
120
+ }
121
+
122
+ template<> EIGEN_STRONG_INLINE Packet2cf preverse(const Packet2cf& a)
123
+ {
124
+ float32x2_t a_lo, a_hi;
125
+ Packet4f a_r128;
126
+
127
+ a_lo = vget_low_f32(a.v);
128
+ a_hi = vget_high_f32(a.v);
129
+ a_r128 = vcombine_f32(a_hi, a_lo);
130
+
131
+ return Packet2cf(a_r128);
132
+ }
133
+
134
+ template<> EIGEN_STRONG_INLINE Packet2cf pcplxflip<Packet2cf>(const Packet2cf& a)
135
+ {
136
+ return Packet2cf(vrev64q_f32(a.v));
137
+ }
138
+
139
+ template<> EIGEN_STRONG_INLINE std::complex<float> predux<Packet2cf>(const Packet2cf& a)
140
+ {
141
+ float32x2_t a1, a2;
142
+ std::complex<float> s;
143
+
144
+ a1 = vget_low_f32(a.v);
145
+ a2 = vget_high_f32(a.v);
146
+ a2 = vadd_f32(a1, a2);
147
+ vst1_f32((float *)&s, a2);
148
+
149
+ return s;
150
+ }
151
+
152
+ template<> EIGEN_STRONG_INLINE Packet2cf preduxp<Packet2cf>(const Packet2cf* vecs)
153
+ {
154
+ Packet4f sum1, sum2, sum;
155
+
156
+ // Add the first two 64-bit float32x2_t of vecs[0]
157
+ sum1 = vcombine_f32(vget_low_f32(vecs[0].v), vget_low_f32(vecs[1].v));
158
+ sum2 = vcombine_f32(vget_high_f32(vecs[0].v), vget_high_f32(vecs[1].v));
159
+ sum = vaddq_f32(sum1, sum2);
160
+
161
+ return Packet2cf(sum);
162
+ }
163
+
164
+ template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const Packet2cf& a)
165
+ {
166
+ float32x2_t a1, a2, v1, v2, prod;
167
+ std::complex<float> s;
168
+
169
+ a1 = vget_low_f32(a.v);
170
+ a2 = vget_high_f32(a.v);
171
+ // Get the real values of a | a1_re | a1_re | a2_re | a2_re |
172
+ v1 = vdup_lane_f32(a1, 0);
173
+ // Get the real values of a | a1_im | a1_im | a2_im | a2_im |
174
+ v2 = vdup_lane_f32(a1, 1);
175
+ // Multiply the real a with b
176
+ v1 = vmul_f32(v1, a2);
177
+ // Multiply the imag a with b
178
+ v2 = vmul_f32(v2, a2);
179
+ // Conjugate v2
180
+ v2 = vreinterpret_f32_u32(veor_u32(vreinterpret_u32_f32(v2), p2ui_CONJ_XOR));
181
+ // Swap real/imag elements in v2.
182
+ v2 = vrev64_f32(v2);
183
+ // Add v1, v2
184
+ prod = vadd_f32(v1, v2);
185
+
186
+ vst1_f32((float *)&s, prod);
187
+
188
+ return s;
189
+ }
190
+
191
+ template<int Offset>
192
+ struct palign_impl<Offset,Packet2cf>
193
+ {
194
+ EIGEN_STRONG_INLINE static void run(Packet2cf& first, const Packet2cf& second)
195
+ {
196
+ if (Offset==1)
197
+ {
198
+ first.v = vextq_f32(first.v, second.v, 2);
199
+ }
200
+ }
201
+ };
202
+
203
+ template<> struct conj_helper<Packet2cf, Packet2cf, false,true>
204
+ {
205
+ EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
206
+ { return padd(pmul(x,y),c); }
207
+
208
+ EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
209
+ {
210
+ return internal::pmul(a, pconj(b));
211
+ }
212
+ };
213
+
214
+ template<> struct conj_helper<Packet2cf, Packet2cf, true,false>
215
+ {
216
+ EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
217
+ { return padd(pmul(x,y),c); }
218
+
219
+ EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
220
+ {
221
+ return internal::pmul(pconj(a), b);
222
+ }
223
+ };
224
+
225
+ template<> struct conj_helper<Packet2cf, Packet2cf, true,true>
226
+ {
227
+ EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
228
+ { return padd(pmul(x,y),c); }
229
+
230
+ EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
231
+ {
232
+ return pconj(internal::pmul(a, b));
233
+ }
234
+ };
235
+
236
+ template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
237
+ {
238
+ // TODO optimize it for AltiVec
239
+ Packet2cf res = conj_helper<Packet2cf,Packet2cf,false,true>().pmul(a,b);
240
+ Packet4f s, rev_s;
241
+
242
+ // this computes the norm
243
+ s = vmulq_f32(b.v, b.v);
244
+ rev_s = vrev64q_f32(s);
245
+
246
+ return Packet2cf(pdiv(res.v, vaddq_f32(s,rev_s)));
247
+ }
248
+
249
+ } // end namespace internal
250
+
251
+ } // end namespace Eigen
252
+
253
+ #endif // EIGEN_COMPLEX_NEON_H
@@ -0,0 +1,420 @@
1
+ // This file is part of Eigen, a lightweight C++ template library
2
+ // for linear algebra.
3
+ //
4
+ // Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr>
5
+ // Copyright (C) 2010 Konstantinos Margaritis <markos@codex.gr>
6
+ // Heavily based on Gael's SSE version.
7
+ //
8
+ // This Source Code Form is subject to the terms of the Mozilla
9
+ // Public License v. 2.0. If a copy of the MPL was not distributed
10
+ // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
11
+
12
+ #ifndef EIGEN_PACKET_MATH_NEON_H
13
+ #define EIGEN_PACKET_MATH_NEON_H
14
+
15
+ namespace Eigen {
16
+
17
+ namespace internal {
18
+
19
+ #ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
20
+ #define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 8
21
+ #endif
22
+
23
+ // FIXME NEON has 16 quad registers, but since the current register allocator
24
+ // is so bad, it is much better to reduce it to 8
25
+ #ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS
26
+ #define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 8
27
+ #endif
28
+
29
+ typedef float32x4_t Packet4f;
30
+ typedef int32x4_t Packet4i;
31
+ typedef uint32x4_t Packet4ui;
32
+
33
+ #define _EIGEN_DECLARE_CONST_Packet4f(NAME,X) \
34
+ const Packet4f p4f_##NAME = pset1<Packet4f>(X)
35
+
36
+ #define _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(NAME,X) \
37
+ const Packet4f p4f_##NAME = vreinterpretq_f32_u32(pset1<int>(X))
38
+
39
+ #define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \
40
+ const Packet4i p4i_##NAME = pset1<Packet4i>(X)
41
+
42
+ #if defined(__llvm__) && !defined(__clang__)
43
+ //Special treatment for Apple's llvm-gcc, its NEON packet types are unions
44
+ #define EIGEN_INIT_NEON_PACKET2(X, Y) {{X, Y}}
45
+ #define EIGEN_INIT_NEON_PACKET4(X, Y, Z, W) {{X, Y, Z, W}}
46
+ #else
47
+ //Default initializer for packets
48
+ #define EIGEN_INIT_NEON_PACKET2(X, Y) {X, Y}
49
+ #define EIGEN_INIT_NEON_PACKET4(X, Y, Z, W) {X, Y, Z, W}
50
+ #endif
51
+
52
+ // arm64 does have the pld instruction. If available, let's trust the __builtin_prefetch built-in function
53
+ // which available on LLVM and GCC (at least)
54
+ #if EIGEN_HAS_BUILTIN(__builtin_prefetch) || defined(__GNUC__)
55
+ #define EIGEN_ARM_PREFETCH(ADDR) __builtin_prefetch(ADDR);
56
+ #elif defined __pld
57
+ #define EIGEN_ARM_PREFETCH(ADDR) __pld(ADDR)
58
+ #elif !defined(__aarch64__)
59
+ #define EIGEN_ARM_PREFETCH(ADDR) __asm__ __volatile__ ( " pld [%[addr]]\n" :: [addr] "r" (ADDR) : "cc" );
60
+ #else
61
+ // by default no explicit prefetching
62
+ #define EIGEN_ARM_PREFETCH(ADDR)
63
+ #endif
64
+
65
+ template<> struct packet_traits<float> : default_packet_traits
66
+ {
67
+ typedef Packet4f type;
68
+ enum {
69
+ Vectorizable = 1,
70
+ AlignedOnScalar = 1,
71
+ size = 4,
72
+
73
+ HasDiv = 1,
74
+ // FIXME check the Has*
75
+ HasSin = 0,
76
+ HasCos = 0,
77
+ HasLog = 0,
78
+ HasExp = 0,
79
+ HasSqrt = 0
80
+ };
81
+ };
82
+ template<> struct packet_traits<int> : default_packet_traits
83
+ {
84
+ typedef Packet4i type;
85
+ enum {
86
+ Vectorizable = 1,
87
+ AlignedOnScalar = 1,
88
+ size=4
89
+ // FIXME check the Has*
90
+ };
91
+ };
92
+
93
+ #if EIGEN_GNUC_AT_MOST(4,4) && !defined(__llvm__)
94
+ // workaround gcc 4.2, 4.3 and 4.4 compilatin issue
95
+ EIGEN_STRONG_INLINE float32x4_t vld1q_f32(const float* x) { return ::vld1q_f32((const float32_t*)x); }
96
+ EIGEN_STRONG_INLINE float32x2_t vld1_f32 (const float* x) { return ::vld1_f32 ((const float32_t*)x); }
97
+ EIGEN_STRONG_INLINE void vst1q_f32(float* to, float32x4_t from) { ::vst1q_f32((float32_t*)to,from); }
98
+ EIGEN_STRONG_INLINE void vst1_f32 (float* to, float32x2_t from) { ::vst1_f32 ((float32_t*)to,from); }
99
+ #endif
100
+
101
+ template<> struct unpacket_traits<Packet4f> { typedef float type; enum {size=4}; };
102
+ template<> struct unpacket_traits<Packet4i> { typedef int type; enum {size=4}; };
103
+
104
+ template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) { return vdupq_n_f32(from); }
105
+ template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from) { return vdupq_n_s32(from); }
106
+
107
+ template<> EIGEN_STRONG_INLINE Packet4f plset<float>(const float& a)
108
+ {
109
+ Packet4f countdown = EIGEN_INIT_NEON_PACKET4(0, 1, 2, 3);
110
+ return vaddq_f32(pset1<Packet4f>(a), countdown);
111
+ }
112
+ template<> EIGEN_STRONG_INLINE Packet4i plset<int>(const int& a)
113
+ {
114
+ Packet4i countdown = EIGEN_INIT_NEON_PACKET4(0, 1, 2, 3);
115
+ return vaddq_s32(pset1<Packet4i>(a), countdown);
116
+ }
117
+
118
+ template<> EIGEN_STRONG_INLINE Packet4f padd<Packet4f>(const Packet4f& a, const Packet4f& b) { return vaddq_f32(a,b); }
119
+ template<> EIGEN_STRONG_INLINE Packet4i padd<Packet4i>(const Packet4i& a, const Packet4i& b) { return vaddq_s32(a,b); }
120
+
121
+ template<> EIGEN_STRONG_INLINE Packet4f psub<Packet4f>(const Packet4f& a, const Packet4f& b) { return vsubq_f32(a,b); }
122
+ template<> EIGEN_STRONG_INLINE Packet4i psub<Packet4i>(const Packet4i& a, const Packet4i& b) { return vsubq_s32(a,b); }
123
+
124
+ template<> EIGEN_STRONG_INLINE Packet4f pnegate(const Packet4f& a) { return vnegq_f32(a); }
125
+ template<> EIGEN_STRONG_INLINE Packet4i pnegate(const Packet4i& a) { return vnegq_s32(a); }
126
+
127
+ template<> EIGEN_STRONG_INLINE Packet4f pconj(const Packet4f& a) { return a; }
128
+ template<> EIGEN_STRONG_INLINE Packet4i pconj(const Packet4i& a) { return a; }
129
+
130
+ template<> EIGEN_STRONG_INLINE Packet4f pmul<Packet4f>(const Packet4f& a, const Packet4f& b) { return vmulq_f32(a,b); }
131
+ template<> EIGEN_STRONG_INLINE Packet4i pmul<Packet4i>(const Packet4i& a, const Packet4i& b) { return vmulq_s32(a,b); }
132
+
133
+ template<> EIGEN_STRONG_INLINE Packet4f pdiv<Packet4f>(const Packet4f& a, const Packet4f& b)
134
+ {
135
+ Packet4f inv, restep, div;
136
+
137
+ // NEON does not offer a divide instruction, we have to do a reciprocal approximation
138
+ // However NEON in contrast to other SIMD engines (AltiVec/SSE), offers
139
+ // a reciprocal estimate AND a reciprocal step -which saves a few instructions
140
+ // vrecpeq_f32() returns an estimate to 1/b, which we will finetune with
141
+ // Newton-Raphson and vrecpsq_f32()
142
+ inv = vrecpeq_f32(b);
143
+
144
+ // This returns a differential, by which we will have to multiply inv to get a better
145
+ // approximation of 1/b.
146
+ restep = vrecpsq_f32(b, inv);
147
+ inv = vmulq_f32(restep, inv);
148
+
149
+ // Finally, multiply a by 1/b and get the wanted result of the division.
150
+ div = vmulq_f32(a, inv);
151
+
152
+ return div;
153
+ }
154
+ template<> EIGEN_STRONG_INLINE Packet4i pdiv<Packet4i>(const Packet4i& /*a*/, const Packet4i& /*b*/)
155
+ { eigen_assert(false && "packet integer division are not supported by NEON");
156
+ return pset1<Packet4i>(0);
157
+ }
158
+
159
+ // for some weird raisons, it has to be overloaded for packet of integers
160
+ template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) { return vmlaq_f32(c,a,b); }
161
+ template<> EIGEN_STRONG_INLINE Packet4i pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) { return vmlaq_s32(c,a,b); }
162
+
163
+ template<> EIGEN_STRONG_INLINE Packet4f pmin<Packet4f>(const Packet4f& a, const Packet4f& b) { return vminq_f32(a,b); }
164
+ template<> EIGEN_STRONG_INLINE Packet4i pmin<Packet4i>(const Packet4i& a, const Packet4i& b) { return vminq_s32(a,b); }
165
+
166
+ template<> EIGEN_STRONG_INLINE Packet4f pmax<Packet4f>(const Packet4f& a, const Packet4f& b) { return vmaxq_f32(a,b); }
167
+ template<> EIGEN_STRONG_INLINE Packet4i pmax<Packet4i>(const Packet4i& a, const Packet4i& b) { return vmaxq_s32(a,b); }
168
+
169
+ // Logical Operations are not supported for float, so we have to reinterpret casts using NEON intrinsics
170
+ template<> EIGEN_STRONG_INLINE Packet4f pand<Packet4f>(const Packet4f& a, const Packet4f& b)
171
+ {
172
+ return vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(a),vreinterpretq_u32_f32(b)));
173
+ }
174
+ template<> EIGEN_STRONG_INLINE Packet4i pand<Packet4i>(const Packet4i& a, const Packet4i& b) { return vandq_s32(a,b); }
175
+
176
+ template<> EIGEN_STRONG_INLINE Packet4f por<Packet4f>(const Packet4f& a, const Packet4f& b)
177
+ {
178
+ return vreinterpretq_f32_u32(vorrq_u32(vreinterpretq_u32_f32(a),vreinterpretq_u32_f32(b)));
179
+ }
180
+ template<> EIGEN_STRONG_INLINE Packet4i por<Packet4i>(const Packet4i& a, const Packet4i& b) { return vorrq_s32(a,b); }
181
+
182
+ template<> EIGEN_STRONG_INLINE Packet4f pxor<Packet4f>(const Packet4f& a, const Packet4f& b)
183
+ {
184
+ return vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(a),vreinterpretq_u32_f32(b)));
185
+ }
186
+ template<> EIGEN_STRONG_INLINE Packet4i pxor<Packet4i>(const Packet4i& a, const Packet4i& b) { return veorq_s32(a,b); }
187
+
188
+ template<> EIGEN_STRONG_INLINE Packet4f pandnot<Packet4f>(const Packet4f& a, const Packet4f& b)
189
+ {
190
+ return vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(a),vreinterpretq_u32_f32(b)));
191
+ }
192
+ template<> EIGEN_STRONG_INLINE Packet4i pandnot<Packet4i>(const Packet4i& a, const Packet4i& b) { return vbicq_s32(a,b); }
193
+
194
+ template<> EIGEN_STRONG_INLINE Packet4f pload<Packet4f>(const float* from) { EIGEN_DEBUG_ALIGNED_LOAD return vld1q_f32(from); }
195
+ template<> EIGEN_STRONG_INLINE Packet4i pload<Packet4i>(const int* from) { EIGEN_DEBUG_ALIGNED_LOAD return vld1q_s32(from); }
196
+
197
+ template<> EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float* from) { EIGEN_DEBUG_UNALIGNED_LOAD return vld1q_f32(from); }
198
+ template<> EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int* from) { EIGEN_DEBUG_UNALIGNED_LOAD return vld1q_s32(from); }
199
+
200
+ template<> EIGEN_STRONG_INLINE Packet4f ploaddup<Packet4f>(const float* from)
201
+ {
202
+ float32x2_t lo, hi;
203
+ lo = vld1_dup_f32(from);
204
+ hi = vld1_dup_f32(from+1);
205
+ return vcombine_f32(lo, hi);
206
+ }
207
+ template<> EIGEN_STRONG_INLINE Packet4i ploaddup<Packet4i>(const int* from)
208
+ {
209
+ int32x2_t lo, hi;
210
+ lo = vld1_dup_s32(from);
211
+ hi = vld1_dup_s32(from+1);
212
+ return vcombine_s32(lo, hi);
213
+ }
214
+
215
+ template<> EIGEN_STRONG_INLINE void pstore<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_ALIGNED_STORE vst1q_f32(to, from); }
216
+ template<> EIGEN_STRONG_INLINE void pstore<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_ALIGNED_STORE vst1q_s32(to, from); }
217
+
218
+ template<> EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_UNALIGNED_STORE vst1q_f32(to, from); }
219
+ template<> EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_UNALIGNED_STORE vst1q_s32(to, from); }
220
+
221
+ template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { EIGEN_ARM_PREFETCH(addr); }
222
+ template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { EIGEN_ARM_PREFETCH(addr); }
223
+
224
+ // FIXME only store the 2 first elements ?
225
+ template<> EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) { float EIGEN_ALIGN16 x[4]; vst1q_f32(x, a); return x[0]; }
226
+ template<> EIGEN_STRONG_INLINE int pfirst<Packet4i>(const Packet4i& a) { int EIGEN_ALIGN16 x[4]; vst1q_s32(x, a); return x[0]; }
227
+
228
+ template<> EIGEN_STRONG_INLINE Packet4f preverse(const Packet4f& a) {
229
+ float32x2_t a_lo, a_hi;
230
+ Packet4f a_r64;
231
+
232
+ a_r64 = vrev64q_f32(a);
233
+ a_lo = vget_low_f32(a_r64);
234
+ a_hi = vget_high_f32(a_r64);
235
+ return vcombine_f32(a_hi, a_lo);
236
+ }
237
+ template<> EIGEN_STRONG_INLINE Packet4i preverse(const Packet4i& a) {
238
+ int32x2_t a_lo, a_hi;
239
+ Packet4i a_r64;
240
+
241
+ a_r64 = vrev64q_s32(a);
242
+ a_lo = vget_low_s32(a_r64);
243
+ a_hi = vget_high_s32(a_r64);
244
+ return vcombine_s32(a_hi, a_lo);
245
+ }
246
+ template<> EIGEN_STRONG_INLINE Packet4f pabs(const Packet4f& a) { return vabsq_f32(a); }
247
+ template<> EIGEN_STRONG_INLINE Packet4i pabs(const Packet4i& a) { return vabsq_s32(a); }
248
+
249
+ template<> EIGEN_STRONG_INLINE float predux<Packet4f>(const Packet4f& a)
250
+ {
251
+ float32x2_t a_lo, a_hi, sum;
252
+
253
+ a_lo = vget_low_f32(a);
254
+ a_hi = vget_high_f32(a);
255
+ sum = vpadd_f32(a_lo, a_hi);
256
+ sum = vpadd_f32(sum, sum);
257
+ return vget_lane_f32(sum, 0);
258
+ }
259
+
260
+ template<> EIGEN_STRONG_INLINE Packet4f preduxp<Packet4f>(const Packet4f* vecs)
261
+ {
262
+ float32x4x2_t vtrn1, vtrn2, res1, res2;
263
+ Packet4f sum1, sum2, sum;
264
+
265
+ // NEON zip performs interleaving of the supplied vectors.
266
+ // We perform two interleaves in a row to acquire the transposed vector
267
+ vtrn1 = vzipq_f32(vecs[0], vecs[2]);
268
+ vtrn2 = vzipq_f32(vecs[1], vecs[3]);
269
+ res1 = vzipq_f32(vtrn1.val[0], vtrn2.val[0]);
270
+ res2 = vzipq_f32(vtrn1.val[1], vtrn2.val[1]);
271
+
272
+ // Do the addition of the resulting vectors
273
+ sum1 = vaddq_f32(res1.val[0], res1.val[1]);
274
+ sum2 = vaddq_f32(res2.val[0], res2.val[1]);
275
+ sum = vaddq_f32(sum1, sum2);
276
+
277
+ return sum;
278
+ }
279
+
280
+ template<> EIGEN_STRONG_INLINE int predux<Packet4i>(const Packet4i& a)
281
+ {
282
+ int32x2_t a_lo, a_hi, sum;
283
+
284
+ a_lo = vget_low_s32(a);
285
+ a_hi = vget_high_s32(a);
286
+ sum = vpadd_s32(a_lo, a_hi);
287
+ sum = vpadd_s32(sum, sum);
288
+ return vget_lane_s32(sum, 0);
289
+ }
290
+
291
+ template<> EIGEN_STRONG_INLINE Packet4i preduxp<Packet4i>(const Packet4i* vecs)
292
+ {
293
+ int32x4x2_t vtrn1, vtrn2, res1, res2;
294
+ Packet4i sum1, sum2, sum;
295
+
296
+ // NEON zip performs interleaving of the supplied vectors.
297
+ // We perform two interleaves in a row to acquire the transposed vector
298
+ vtrn1 = vzipq_s32(vecs[0], vecs[2]);
299
+ vtrn2 = vzipq_s32(vecs[1], vecs[3]);
300
+ res1 = vzipq_s32(vtrn1.val[0], vtrn2.val[0]);
301
+ res2 = vzipq_s32(vtrn1.val[1], vtrn2.val[1]);
302
+
303
+ // Do the addition of the resulting vectors
304
+ sum1 = vaddq_s32(res1.val[0], res1.val[1]);
305
+ sum2 = vaddq_s32(res2.val[0], res2.val[1]);
306
+ sum = vaddq_s32(sum1, sum2);
307
+
308
+ return sum;
309
+ }
310
+
311
+ // Other reduction functions:
312
+ // mul
313
+ template<> EIGEN_STRONG_INLINE float predux_mul<Packet4f>(const Packet4f& a)
314
+ {
315
+ float32x2_t a_lo, a_hi, prod;
316
+
317
+ // Get a_lo = |a1|a2| and a_hi = |a3|a4|
318
+ a_lo = vget_low_f32(a);
319
+ a_hi = vget_high_f32(a);
320
+ // Get the product of a_lo * a_hi -> |a1*a3|a2*a4|
321
+ prod = vmul_f32(a_lo, a_hi);
322
+ // Multiply prod with its swapped value |a2*a4|a1*a3|
323
+ prod = vmul_f32(prod, vrev64_f32(prod));
324
+
325
+ return vget_lane_f32(prod, 0);
326
+ }
327
+ template<> EIGEN_STRONG_INLINE int predux_mul<Packet4i>(const Packet4i& a)
328
+ {
329
+ int32x2_t a_lo, a_hi, prod;
330
+
331
+ // Get a_lo = |a1|a2| and a_hi = |a3|a4|
332
+ a_lo = vget_low_s32(a);
333
+ a_hi = vget_high_s32(a);
334
+ // Get the product of a_lo * a_hi -> |a1*a3|a2*a4|
335
+ prod = vmul_s32(a_lo, a_hi);
336
+ // Multiply prod with its swapped value |a2*a4|a1*a3|
337
+ prod = vmul_s32(prod, vrev64_s32(prod));
338
+
339
+ return vget_lane_s32(prod, 0);
340
+ }
341
+
342
+ // min
343
+ template<> EIGEN_STRONG_INLINE float predux_min<Packet4f>(const Packet4f& a)
344
+ {
345
+ float32x2_t a_lo, a_hi, min;
346
+
347
+ a_lo = vget_low_f32(a);
348
+ a_hi = vget_high_f32(a);
349
+ min = vpmin_f32(a_lo, a_hi);
350
+ min = vpmin_f32(min, min);
351
+
352
+ return vget_lane_f32(min, 0);
353
+ }
354
+
355
+ template<> EIGEN_STRONG_INLINE int predux_min<Packet4i>(const Packet4i& a)
356
+ {
357
+ int32x2_t a_lo, a_hi, min;
358
+
359
+ a_lo = vget_low_s32(a);
360
+ a_hi = vget_high_s32(a);
361
+ min = vpmin_s32(a_lo, a_hi);
362
+ min = vpmin_s32(min, min);
363
+
364
+ return vget_lane_s32(min, 0);
365
+ }
366
+
367
+ // max
368
+ template<> EIGEN_STRONG_INLINE float predux_max<Packet4f>(const Packet4f& a)
369
+ {
370
+ float32x2_t a_lo, a_hi, max;
371
+
372
+ a_lo = vget_low_f32(a);
373
+ a_hi = vget_high_f32(a);
374
+ max = vpmax_f32(a_lo, a_hi);
375
+ max = vpmax_f32(max, max);
376
+
377
+ return vget_lane_f32(max, 0);
378
+ }
379
+
380
+ template<> EIGEN_STRONG_INLINE int predux_max<Packet4i>(const Packet4i& a)
381
+ {
382
+ int32x2_t a_lo, a_hi, max;
383
+
384
+ a_lo = vget_low_s32(a);
385
+ a_hi = vget_high_s32(a);
386
+ max = vpmax_s32(a_lo, a_hi);
387
+ max = vpmax_s32(max, max);
388
+
389
+ return vget_lane_s32(max, 0);
390
+ }
391
+
392
+ // this PALIGN_NEON business is to work around a bug in LLVM Clang 3.0 causing incorrect compilation errors,
393
+ // see bug 347 and this LLVM bug: http://llvm.org/bugs/show_bug.cgi?id=11074
394
+ #define PALIGN_NEON(Offset,Type,Command) \
395
+ template<>\
396
+ struct palign_impl<Offset,Type>\
397
+ {\
398
+ EIGEN_STRONG_INLINE static void run(Type& first, const Type& second)\
399
+ {\
400
+ if (Offset!=0)\
401
+ first = Command(first, second, Offset);\
402
+ }\
403
+ };\
404
+
405
+ PALIGN_NEON(0,Packet4f,vextq_f32)
406
+ PALIGN_NEON(1,Packet4f,vextq_f32)
407
+ PALIGN_NEON(2,Packet4f,vextq_f32)
408
+ PALIGN_NEON(3,Packet4f,vextq_f32)
409
+ PALIGN_NEON(0,Packet4i,vextq_s32)
410
+ PALIGN_NEON(1,Packet4i,vextq_s32)
411
+ PALIGN_NEON(2,Packet4i,vextq_s32)
412
+ PALIGN_NEON(3,Packet4i,vextq_s32)
413
+
414
+ #undef PALIGN_NEON
415
+
416
+ } // end namespace internal
417
+
418
+ } // end namespace Eigen
419
+
420
+ #endif // EIGEN_PACKET_MATH_NEON_H