ruby-eigen 0.0.9 → 0.0.10.pre1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/LICENSE +22 -0
- data/README.md +21 -0
- data/ext/eigen/eigen3/COPYING.BSD +26 -0
- data/ext/eigen/eigen3/COPYING.MPL2 +373 -0
- data/ext/eigen/eigen3/COPYING.README +18 -0
- data/ext/eigen/eigen3/Eigen/Array +11 -0
- data/ext/eigen/eigen3/Eigen/Cholesky +32 -0
- data/ext/eigen/eigen3/Eigen/CholmodSupport +45 -0
- data/ext/eigen/eigen3/Eigen/Core +376 -0
- data/ext/eigen/eigen3/Eigen/Dense +7 -0
- data/ext/eigen/eigen3/Eigen/Eigen +2 -0
- data/ext/eigen/eigen3/Eigen/Eigen2Support +95 -0
- data/ext/eigen/eigen3/Eigen/Eigenvalues +48 -0
- data/ext/eigen/eigen3/Eigen/Geometry +63 -0
- data/ext/eigen/eigen3/Eigen/Householder +23 -0
- data/ext/eigen/eigen3/Eigen/IterativeLinearSolvers +40 -0
- data/ext/eigen/eigen3/Eigen/Jacobi +26 -0
- data/ext/eigen/eigen3/Eigen/LU +41 -0
- data/ext/eigen/eigen3/Eigen/LeastSquares +32 -0
- data/ext/eigen/eigen3/Eigen/MetisSupport +28 -0
- data/ext/eigen/eigen3/Eigen/PaStiXSupport +46 -0
- data/ext/eigen/eigen3/Eigen/PardisoSupport +30 -0
- data/ext/eigen/eigen3/Eigen/QR +45 -0
- data/ext/eigen/eigen3/Eigen/QtAlignedMalloc +34 -0
- data/ext/eigen/eigen3/Eigen/SPQRSupport +29 -0
- data/ext/eigen/eigen3/Eigen/SVD +37 -0
- data/ext/eigen/eigen3/Eigen/Sparse +27 -0
- data/ext/eigen/eigen3/Eigen/SparseCore +64 -0
- data/ext/eigen/eigen3/Eigen/SparseLU +49 -0
- data/ext/eigen/eigen3/Eigen/SparseQR +33 -0
- data/ext/eigen/eigen3/Eigen/StdDeque +27 -0
- data/ext/eigen/eigen3/Eigen/StdList +26 -0
- data/ext/eigen/eigen3/Eigen/StdVector +27 -0
- data/ext/eigen/eigen3/Eigen/SuperLUSupport +59 -0
- data/ext/eigen/eigen3/Eigen/UmfPackSupport +36 -0
- data/ext/eigen/eigen3/Eigen/src/Cholesky/LDLT.h +611 -0
- data/ext/eigen/eigen3/Eigen/src/Cholesky/LLT.h +498 -0
- data/ext/eigen/eigen3/Eigen/src/Cholesky/LLT_MKL.h +102 -0
- data/ext/eigen/eigen3/Eigen/src/CholmodSupport/CholmodSupport.h +607 -0
- data/ext/eigen/eigen3/Eigen/src/Core/Array.h +323 -0
- data/ext/eigen/eigen3/Eigen/src/Core/ArrayBase.h +226 -0
- data/ext/eigen/eigen3/Eigen/src/Core/ArrayWrapper.h +264 -0
- data/ext/eigen/eigen3/Eigen/src/Core/Assign.h +590 -0
- data/ext/eigen/eigen3/Eigen/src/Core/Assign_MKL.h +224 -0
- data/ext/eigen/eigen3/Eigen/src/Core/BandMatrix.h +334 -0
- data/ext/eigen/eigen3/Eigen/src/Core/Block.h +406 -0
- data/ext/eigen/eigen3/Eigen/src/Core/BooleanRedux.h +154 -0
- data/ext/eigen/eigen3/Eigen/src/Core/CommaInitializer.h +154 -0
- data/ext/eigen/eigen3/Eigen/src/Core/CoreIterators.h +61 -0
- data/ext/eigen/eigen3/Eigen/src/Core/CwiseBinaryOp.h +230 -0
- data/ext/eigen/eigen3/Eigen/src/Core/CwiseNullaryOp.h +864 -0
- data/ext/eigen/eigen3/Eigen/src/Core/CwiseUnaryOp.h +126 -0
- data/ext/eigen/eigen3/Eigen/src/Core/CwiseUnaryView.h +139 -0
- data/ext/eigen/eigen3/Eigen/src/Core/DenseBase.h +521 -0
- data/ext/eigen/eigen3/Eigen/src/Core/DenseCoeffsBase.h +754 -0
- data/ext/eigen/eigen3/Eigen/src/Core/DenseStorage.h +434 -0
- data/ext/eigen/eigen3/Eigen/src/Core/Diagonal.h +237 -0
- data/ext/eigen/eigen3/Eigen/src/Core/DiagonalMatrix.h +313 -0
- data/ext/eigen/eigen3/Eigen/src/Core/DiagonalProduct.h +131 -0
- data/ext/eigen/eigen3/Eigen/src/Core/Dot.h +263 -0
- data/ext/eigen/eigen3/Eigen/src/Core/EigenBase.h +131 -0
- data/ext/eigen/eigen3/Eigen/src/Core/Flagged.h +140 -0
- data/ext/eigen/eigen3/Eigen/src/Core/ForceAlignedAccess.h +146 -0
- data/ext/eigen/eigen3/Eigen/src/Core/Functors.h +1026 -0
- data/ext/eigen/eigen3/Eigen/src/Core/Fuzzy.h +150 -0
- data/ext/eigen/eigen3/Eigen/src/Core/GeneralProduct.h +635 -0
- data/ext/eigen/eigen3/Eigen/src/Core/GenericPacketMath.h +350 -0
- data/ext/eigen/eigen3/Eigen/src/Core/GlobalFunctions.h +92 -0
- data/ext/eigen/eigen3/Eigen/src/Core/IO.h +250 -0
- data/ext/eigen/eigen3/Eigen/src/Core/Map.h +192 -0
- data/ext/eigen/eigen3/Eigen/src/Core/MapBase.h +247 -0
- data/ext/eigen/eigen3/Eigen/src/Core/MathFunctions.h +768 -0
- data/ext/eigen/eigen3/Eigen/src/Core/Matrix.h +420 -0
- data/ext/eigen/eigen3/Eigen/src/Core/MatrixBase.h +563 -0
- data/ext/eigen/eigen3/Eigen/src/Core/NestByValue.h +111 -0
- data/ext/eigen/eigen3/Eigen/src/Core/NoAlias.h +134 -0
- data/ext/eigen/eigen3/Eigen/src/Core/NumTraits.h +150 -0
- data/ext/eigen/eigen3/Eigen/src/Core/PermutationMatrix.h +721 -0
- data/ext/eigen/eigen3/Eigen/src/Core/PlainObjectBase.h +822 -0
- data/ext/eigen/eigen3/Eigen/src/Core/ProductBase.h +290 -0
- data/ext/eigen/eigen3/Eigen/src/Core/Random.h +152 -0
- data/ext/eigen/eigen3/Eigen/src/Core/Redux.h +409 -0
- data/ext/eigen/eigen3/Eigen/src/Core/Ref.h +278 -0
- data/ext/eigen/eigen3/Eigen/src/Core/Replicate.h +177 -0
- data/ext/eigen/eigen3/Eigen/src/Core/ReturnByValue.h +99 -0
- data/ext/eigen/eigen3/Eigen/src/Core/Reverse.h +224 -0
- data/ext/eigen/eigen3/Eigen/src/Core/Select.h +162 -0
- data/ext/eigen/eigen3/Eigen/src/Core/SelfAdjointView.h +314 -0
- data/ext/eigen/eigen3/Eigen/src/Core/SelfCwiseBinaryOp.h +191 -0
- data/ext/eigen/eigen3/Eigen/src/Core/SolveTriangular.h +260 -0
- data/ext/eigen/eigen3/Eigen/src/Core/StableNorm.h +203 -0
- data/ext/eigen/eigen3/Eigen/src/Core/Stride.h +108 -0
- data/ext/eigen/eigen3/Eigen/src/Core/Swap.h +126 -0
- data/ext/eigen/eigen3/Eigen/src/Core/Transpose.h +419 -0
- data/ext/eigen/eigen3/Eigen/src/Core/Transpositions.h +436 -0
- data/ext/eigen/eigen3/Eigen/src/Core/TriangularMatrix.h +839 -0
- data/ext/eigen/eigen3/Eigen/src/Core/VectorBlock.h +95 -0
- data/ext/eigen/eigen3/Eigen/src/Core/VectorwiseOp.h +642 -0
- data/ext/eigen/eigen3/Eigen/src/Core/Visitor.h +237 -0
- data/ext/eigen/eigen3/Eigen/src/Core/arch/AltiVec/Complex.h +217 -0
- data/ext/eigen/eigen3/Eigen/src/Core/arch/AltiVec/PacketMath.h +501 -0
- data/ext/eigen/eigen3/Eigen/src/Core/arch/Default/Settings.h +49 -0
- data/ext/eigen/eigen3/Eigen/src/Core/arch/NEON/Complex.h +253 -0
- data/ext/eigen/eigen3/Eigen/src/Core/arch/NEON/PacketMath.h +420 -0
- data/ext/eigen/eigen3/Eigen/src/Core/arch/SSE/Complex.h +442 -0
- data/ext/eigen/eigen3/Eigen/src/Core/arch/SSE/MathFunctions.h +475 -0
- data/ext/eigen/eigen3/Eigen/src/Core/arch/SSE/PacketMath.h +649 -0
- data/ext/eigen/eigen3/Eigen/src/Core/products/CoeffBasedProduct.h +476 -0
- data/ext/eigen/eigen3/Eigen/src/Core/products/GeneralBlockPanelKernel.h +1341 -0
- data/ext/eigen/eigen3/Eigen/src/Core/products/GeneralMatrixMatrix.h +427 -0
- data/ext/eigen/eigen3/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +278 -0
- data/ext/eigen/eigen3/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_MKL.h +146 -0
- data/ext/eigen/eigen3/Eigen/src/Core/products/GeneralMatrixMatrix_MKL.h +118 -0
- data/ext/eigen/eigen3/Eigen/src/Core/products/GeneralMatrixVector.h +566 -0
- data/ext/eigen/eigen3/Eigen/src/Core/products/GeneralMatrixVector_MKL.h +131 -0
- data/ext/eigen/eigen3/Eigen/src/Core/products/Parallelizer.h +162 -0
- data/ext/eigen/eigen3/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +436 -0
- data/ext/eigen/eigen3/Eigen/src/Core/products/SelfadjointMatrixMatrix_MKL.h +295 -0
- data/ext/eigen/eigen3/Eigen/src/Core/products/SelfadjointMatrixVector.h +281 -0
- data/ext/eigen/eigen3/Eigen/src/Core/products/SelfadjointMatrixVector_MKL.h +114 -0
- data/ext/eigen/eigen3/Eigen/src/Core/products/SelfadjointProduct.h +123 -0
- data/ext/eigen/eigen3/Eigen/src/Core/products/SelfadjointRank2Update.h +93 -0
- data/ext/eigen/eigen3/Eigen/src/Core/products/TriangularMatrixMatrix.h +427 -0
- data/ext/eigen/eigen3/Eigen/src/Core/products/TriangularMatrixMatrix_MKL.h +309 -0
- data/ext/eigen/eigen3/Eigen/src/Core/products/TriangularMatrixVector.h +348 -0
- data/ext/eigen/eigen3/Eigen/src/Core/products/TriangularMatrixVector_MKL.h +247 -0
- data/ext/eigen/eigen3/Eigen/src/Core/products/TriangularSolverMatrix.h +332 -0
- data/ext/eigen/eigen3/Eigen/src/Core/products/TriangularSolverMatrix_MKL.h +155 -0
- data/ext/eigen/eigen3/Eigen/src/Core/products/TriangularSolverVector.h +139 -0
- data/ext/eigen/eigen3/Eigen/src/Core/util/BlasUtil.h +264 -0
- data/ext/eigen/eigen3/Eigen/src/Core/util/Constants.h +451 -0
- data/ext/eigen/eigen3/Eigen/src/Core/util/DisableStupidWarnings.h +40 -0
- data/ext/eigen/eigen3/Eigen/src/Core/util/ForwardDeclarations.h +302 -0
- data/ext/eigen/eigen3/Eigen/src/Core/util/MKL_support.h +158 -0
- data/ext/eigen/eigen3/Eigen/src/Core/util/Macros.h +451 -0
- data/ext/eigen/eigen3/Eigen/src/Core/util/Memory.h +977 -0
- data/ext/eigen/eigen3/Eigen/src/Core/util/Meta.h +243 -0
- data/ext/eigen/eigen3/Eigen/src/Core/util/NonMPL2.h +3 -0
- data/ext/eigen/eigen3/Eigen/src/Core/util/ReenableStupidWarnings.h +14 -0
- data/ext/eigen/eigen3/Eigen/src/Core/util/StaticAssert.h +208 -0
- data/ext/eigen/eigen3/Eigen/src/Core/util/XprHelper.h +469 -0
- data/ext/eigen/eigen3/Eigen/src/Eigen2Support/Block.h +126 -0
- data/ext/eigen/eigen3/Eigen/src/Eigen2Support/Cwise.h +192 -0
- data/ext/eigen/eigen3/Eigen/src/Eigen2Support/CwiseOperators.h +298 -0
- data/ext/eigen/eigen3/Eigen/src/Eigen2Support/Geometry/AlignedBox.h +159 -0
- data/ext/eigen/eigen3/Eigen/src/Eigen2Support/Geometry/All.h +115 -0
- data/ext/eigen/eigen3/Eigen/src/Eigen2Support/Geometry/AngleAxis.h +214 -0
- data/ext/eigen/eigen3/Eigen/src/Eigen2Support/Geometry/Hyperplane.h +254 -0
- data/ext/eigen/eigen3/Eigen/src/Eigen2Support/Geometry/ParametrizedLine.h +141 -0
- data/ext/eigen/eigen3/Eigen/src/Eigen2Support/Geometry/Quaternion.h +495 -0
- data/ext/eigen/eigen3/Eigen/src/Eigen2Support/Geometry/Rotation2D.h +145 -0
- data/ext/eigen/eigen3/Eigen/src/Eigen2Support/Geometry/RotationBase.h +123 -0
- data/ext/eigen/eigen3/Eigen/src/Eigen2Support/Geometry/Scaling.h +167 -0
- data/ext/eigen/eigen3/Eigen/src/Eigen2Support/Geometry/Transform.h +786 -0
- data/ext/eigen/eigen3/Eigen/src/Eigen2Support/Geometry/Translation.h +184 -0
- data/ext/eigen/eigen3/Eigen/src/Eigen2Support/LU.h +120 -0
- data/ext/eigen/eigen3/Eigen/src/Eigen2Support/Lazy.h +71 -0
- data/ext/eigen/eigen3/Eigen/src/Eigen2Support/LeastSquares.h +169 -0
- data/ext/eigen/eigen3/Eigen/src/Eigen2Support/Macros.h +20 -0
- data/ext/eigen/eigen3/Eigen/src/Eigen2Support/MathFunctions.h +57 -0
- data/ext/eigen/eigen3/Eigen/src/Eigen2Support/Memory.h +45 -0
- data/ext/eigen/eigen3/Eigen/src/Eigen2Support/Meta.h +75 -0
- data/ext/eigen/eigen3/Eigen/src/Eigen2Support/Minor.h +117 -0
- data/ext/eigen/eigen3/Eigen/src/Eigen2Support/QR.h +67 -0
- data/ext/eigen/eigen3/Eigen/src/Eigen2Support/SVD.h +637 -0
- data/ext/eigen/eigen3/Eigen/src/Eigen2Support/TriangularSolver.h +42 -0
- data/ext/eigen/eigen3/Eigen/src/Eigen2Support/VectorBlock.h +94 -0
- data/ext/eigen/eigen3/Eigen/src/Eigenvalues/ComplexEigenSolver.h +341 -0
- data/ext/eigen/eigen3/Eigen/src/Eigenvalues/ComplexSchur.h +456 -0
- data/ext/eigen/eigen3/Eigen/src/Eigenvalues/ComplexSchur_MKL.h +94 -0
- data/ext/eigen/eigen3/Eigen/src/Eigenvalues/EigenSolver.h +607 -0
- data/ext/eigen/eigen3/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h +350 -0
- data/ext/eigen/eigen3/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +227 -0
- data/ext/eigen/eigen3/Eigen/src/Eigenvalues/HessenbergDecomposition.h +373 -0
- data/ext/eigen/eigen3/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +160 -0
- data/ext/eigen/eigen3/Eigen/src/Eigenvalues/RealQZ.h +624 -0
- data/ext/eigen/eigen3/Eigen/src/Eigenvalues/RealSchur.h +525 -0
- data/ext/eigen/eigen3/Eigen/src/Eigenvalues/RealSchur_MKL.h +83 -0
- data/ext/eigen/eigen3/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +801 -0
- data/ext/eigen/eigen3/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_MKL.h +92 -0
- data/ext/eigen/eigen3/Eigen/src/Eigenvalues/Tridiagonalization.h +557 -0
- data/ext/eigen/eigen3/Eigen/src/Geometry/AlignedBox.h +392 -0
- data/ext/eigen/eigen3/Eigen/src/Geometry/AngleAxis.h +233 -0
- data/ext/eigen/eigen3/Eigen/src/Geometry/EulerAngles.h +104 -0
- data/ext/eigen/eigen3/Eigen/src/Geometry/Homogeneous.h +307 -0
- data/ext/eigen/eigen3/Eigen/src/Geometry/Hyperplane.h +280 -0
- data/ext/eigen/eigen3/Eigen/src/Geometry/OrthoMethods.h +218 -0
- data/ext/eigen/eigen3/Eigen/src/Geometry/ParametrizedLine.h +195 -0
- data/ext/eigen/eigen3/Eigen/src/Geometry/Quaternion.h +776 -0
- data/ext/eigen/eigen3/Eigen/src/Geometry/Rotation2D.h +160 -0
- data/ext/eigen/eigen3/Eigen/src/Geometry/RotationBase.h +206 -0
- data/ext/eigen/eigen3/Eigen/src/Geometry/Scaling.h +166 -0
- data/ext/eigen/eigen3/Eigen/src/Geometry/Transform.h +1455 -0
- data/ext/eigen/eigen3/Eigen/src/Geometry/Translation.h +206 -0
- data/ext/eigen/eigen3/Eigen/src/Geometry/Umeyama.h +177 -0
- data/ext/eigen/eigen3/Eigen/src/Geometry/arch/Geometry_SSE.h +115 -0
- data/ext/eigen/eigen3/Eigen/src/Householder/BlockHouseholder.h +68 -0
- data/ext/eigen/eigen3/Eigen/src/Householder/Householder.h +171 -0
- data/ext/eigen/eigen3/Eigen/src/Householder/HouseholderSequence.h +441 -0
- data/ext/eigen/eigen3/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +149 -0
- data/ext/eigen/eigen3/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +263 -0
- data/ext/eigen/eigen3/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +256 -0
- data/ext/eigen/eigen3/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +282 -0
- data/ext/eigen/eigen3/Eigen/src/Jacobi/Jacobi.h +433 -0
- data/ext/eigen/eigen3/Eigen/src/LU/Determinant.h +101 -0
- data/ext/eigen/eigen3/Eigen/src/LU/FullPivLU.h +751 -0
- data/ext/eigen/eigen3/Eigen/src/LU/Inverse.h +400 -0
- data/ext/eigen/eigen3/Eigen/src/LU/PartialPivLU.h +509 -0
- data/ext/eigen/eigen3/Eigen/src/LU/PartialPivLU_MKL.h +85 -0
- data/ext/eigen/eigen3/Eigen/src/LU/arch/Inverse_SSE.h +329 -0
- data/ext/eigen/eigen3/Eigen/src/MetisSupport/MetisSupport.h +137 -0
- data/ext/eigen/eigen3/Eigen/src/OrderingMethods/Amd.h +444 -0
- data/ext/eigen/eigen3/Eigen/src/OrderingMethods/Eigen_Colamd.h +1850 -0
- data/ext/eigen/eigen3/Eigen/src/PaStiXSupport/PaStiXSupport.h +721 -0
- data/ext/eigen/eigen3/Eigen/src/PardisoSupport/PardisoSupport.h +592 -0
- data/ext/eigen/eigen3/Eigen/src/QR/ColPivHouseholderQR.h +580 -0
- data/ext/eigen/eigen3/Eigen/src/QR/ColPivHouseholderQR_MKL.h +99 -0
- data/ext/eigen/eigen3/Eigen/src/QR/FullPivHouseholderQR.h +622 -0
- data/ext/eigen/eigen3/Eigen/src/QR/HouseholderQR.h +388 -0
- data/ext/eigen/eigen3/Eigen/src/QR/HouseholderQR_MKL.h +71 -0
- data/ext/eigen/eigen3/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +338 -0
- data/ext/eigen/eigen3/Eigen/src/SVD/JacobiSVD.h +976 -0
- data/ext/eigen/eigen3/Eigen/src/SVD/JacobiSVD_MKL.h +92 -0
- data/ext/eigen/eigen3/Eigen/src/SVD/UpperBidiagonalization.h +148 -0
- data/ext/eigen/eigen3/Eigen/src/SparseCholesky/SimplicialCholesky.h +671 -0
- data/ext/eigen/eigen3/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +199 -0
- data/ext/eigen/eigen3/Eigen/src/SparseCore/AmbiVector.h +373 -0
- data/ext/eigen/eigen3/Eigen/src/SparseCore/CompressedStorage.h +233 -0
- data/ext/eigen/eigen3/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +245 -0
- data/ext/eigen/eigen3/Eigen/src/SparseCore/MappedSparseMatrix.h +181 -0
- data/ext/eigen/eigen3/Eigen/src/SparseCore/SparseBlock.h +537 -0
- data/ext/eigen/eigen3/Eigen/src/SparseCore/SparseColEtree.h +206 -0
- data/ext/eigen/eigen3/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +325 -0
- data/ext/eigen/eigen3/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +163 -0
- data/ext/eigen/eigen3/Eigen/src/SparseCore/SparseDenseProduct.h +311 -0
- data/ext/eigen/eigen3/Eigen/src/SparseCore/SparseDiagonalProduct.h +196 -0
- data/ext/eigen/eigen3/Eigen/src/SparseCore/SparseDot.h +101 -0
- data/ext/eigen/eigen3/Eigen/src/SparseCore/SparseFuzzy.h +26 -0
- data/ext/eigen/eigen3/Eigen/src/SparseCore/SparseMatrix.h +1262 -0
- data/ext/eigen/eigen3/Eigen/src/SparseCore/SparseMatrixBase.h +461 -0
- data/ext/eigen/eigen3/Eigen/src/SparseCore/SparsePermutation.h +148 -0
- data/ext/eigen/eigen3/Eigen/src/SparseCore/SparseProduct.h +188 -0
- data/ext/eigen/eigen3/Eigen/src/SparseCore/SparseRedux.h +45 -0
- data/ext/eigen/eigen3/Eigen/src/SparseCore/SparseSelfAdjointView.h +507 -0
- data/ext/eigen/eigen3/Eigen/src/SparseCore/SparseSparseProductWithPruning.h +150 -0
- data/ext/eigen/eigen3/Eigen/src/SparseCore/SparseTranspose.h +63 -0
- data/ext/eigen/eigen3/Eigen/src/SparseCore/SparseTriangularView.h +179 -0
- data/ext/eigen/eigen3/Eigen/src/SparseCore/SparseUtil.h +172 -0
- data/ext/eigen/eigen3/Eigen/src/SparseCore/SparseVector.h +448 -0
- data/ext/eigen/eigen3/Eigen/src/SparseCore/SparseView.h +99 -0
- data/ext/eigen/eigen3/Eigen/src/SparseCore/TriangularSolver.h +334 -0
- data/ext/eigen/eigen3/Eigen/src/SparseLU/SparseLU.h +806 -0
- data/ext/eigen/eigen3/Eigen/src/SparseLU/SparseLUImpl.h +66 -0
- data/ext/eigen/eigen3/Eigen/src/SparseLU/SparseLU_Memory.h +227 -0
- data/ext/eigen/eigen3/Eigen/src/SparseLU/SparseLU_Structs.h +111 -0
- data/ext/eigen/eigen3/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +298 -0
- data/ext/eigen/eigen3/Eigen/src/SparseLU/SparseLU_Utils.h +80 -0
- data/ext/eigen/eigen3/Eigen/src/SparseLU/SparseLU_column_bmod.h +180 -0
- data/ext/eigen/eigen3/Eigen/src/SparseLU/SparseLU_column_dfs.h +177 -0
- data/ext/eigen/eigen3/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h +106 -0
- data/ext/eigen/eigen3/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +279 -0
- data/ext/eigen/eigen3/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h +127 -0
- data/ext/eigen/eigen3/Eigen/src/SparseLU/SparseLU_kernel_bmod.h +130 -0
- data/ext/eigen/eigen3/Eigen/src/SparseLU/SparseLU_panel_bmod.h +223 -0
- data/ext/eigen/eigen3/Eigen/src/SparseLU/SparseLU_panel_dfs.h +258 -0
- data/ext/eigen/eigen3/Eigen/src/SparseLU/SparseLU_pivotL.h +137 -0
- data/ext/eigen/eigen3/Eigen/src/SparseLU/SparseLU_pruneL.h +135 -0
- data/ext/eigen/eigen3/Eigen/src/SparseLU/SparseLU_relax_snode.h +83 -0
- data/ext/eigen/eigen3/Eigen/src/SparseQR/SparseQR.h +714 -0
- data/ext/eigen/eigen3/Eigen/src/StlSupport/StdDeque.h +134 -0
- data/ext/eigen/eigen3/Eigen/src/StlSupport/StdList.h +114 -0
- data/ext/eigen/eigen3/Eigen/src/StlSupport/StdVector.h +126 -0
- data/ext/eigen/eigen3/Eigen/src/StlSupport/details.h +84 -0
- data/ext/eigen/eigen3/Eigen/src/SuperLUSupport/SuperLUSupport.h +1026 -0
- data/ext/eigen/eigen3/Eigen/src/UmfPackSupport/UmfPackSupport.h +474 -0
- data/ext/eigen/eigen3/Eigen/src/misc/Image.h +84 -0
- data/ext/eigen/eigen3/Eigen/src/misc/Kernel.h +81 -0
- data/ext/eigen/eigen3/Eigen/src/misc/Solve.h +76 -0
- data/ext/eigen/eigen3/Eigen/src/misc/SparseSolve.h +128 -0
- data/ext/eigen/eigen3/Eigen/src/misc/blas.h +658 -0
- data/ext/eigen/eigen3/Eigen/src/plugins/ArrayCwiseBinaryOps.h +253 -0
- data/ext/eigen/eigen3/Eigen/src/plugins/ArrayCwiseUnaryOps.h +187 -0
- data/ext/eigen/eigen3/Eigen/src/plugins/BlockMethods.h +935 -0
- data/ext/eigen/eigen3/Eigen/src/plugins/CommonCwiseBinaryOps.h +46 -0
- data/ext/eigen/eigen3/Eigen/src/plugins/CommonCwiseUnaryOps.h +172 -0
- data/ext/eigen/eigen3/Eigen/src/plugins/MatrixCwiseBinaryOps.h +143 -0
- data/ext/eigen/eigen3/Eigen/src/plugins/MatrixCwiseUnaryOps.h +52 -0
- data/ext/eigen/eigen3/signature_of_eigen3_matrix_library +1 -0
- data/ext/eigen/eigen_wrap.cxx +19420 -10396
- data/ext/eigen/extconf.rb +37 -2
- data/lib/eigen.rb +146 -3
- metadata +294 -7
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
// This file is part of Eigen, a lightweight C++ template library
|
|
2
|
+
// for linear algebra.
|
|
3
|
+
//
|
|
4
|
+
// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
|
|
5
|
+
// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
|
|
6
|
+
//
|
|
7
|
+
// This Source Code Form is subject to the terms of the Mozilla
|
|
8
|
+
// Public License v. 2.0. If a copy of the MPL was not distributed
|
|
9
|
+
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
/* All the parameters defined in this file can be specialized in the
|
|
13
|
+
* architecture specific files, and/or by the user.
|
|
14
|
+
* More to come... */
|
|
15
|
+
|
|
16
|
+
#ifndef EIGEN_DEFAULT_SETTINGS_H
|
|
17
|
+
#define EIGEN_DEFAULT_SETTINGS_H
|
|
18
|
+
|
|
19
|
+
/** Defines the maximal loop size to enable meta unrolling of loops.
|
|
20
|
+
* Note that the value here is expressed in Eigen's own notion of "number of FLOPS",
|
|
21
|
+
* it does not correspond to the number of iterations or the number of instructions
|
|
22
|
+
*/
|
|
23
|
+
#ifndef EIGEN_UNROLLING_LIMIT
|
|
24
|
+
#define EIGEN_UNROLLING_LIMIT 100
|
|
25
|
+
#endif
|
|
26
|
+
|
|
27
|
+
/** Defines the threshold between a "small" and a "large" matrix.
|
|
28
|
+
* This threshold is mainly used to select the proper product implementation.
|
|
29
|
+
*/
|
|
30
|
+
#ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
|
|
31
|
+
#define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 8
|
|
32
|
+
#endif
|
|
33
|
+
|
|
34
|
+
/** Defines the maximal width of the blocks used in the triangular product and solver
|
|
35
|
+
* for vectors (level 2 blas xTRMV and xTRSV). The default is 8.
|
|
36
|
+
*/
|
|
37
|
+
#ifndef EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH
|
|
38
|
+
#define EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH 8
|
|
39
|
+
#endif
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
/** Defines the default number of registers available for that architecture.
|
|
43
|
+
* Currently it must be 8 or 16. Other values will fail.
|
|
44
|
+
*/
|
|
45
|
+
#ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS
|
|
46
|
+
#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 8
|
|
47
|
+
#endif
|
|
48
|
+
|
|
49
|
+
#endif // EIGEN_DEFAULT_SETTINGS_H
|
|
@@ -0,0 +1,253 @@
|
|
|
1
|
+
// This file is part of Eigen, a lightweight C++ template library
|
|
2
|
+
// for linear algebra.
|
|
3
|
+
//
|
|
4
|
+
// Copyright (C) 2010 Gael Guennebaud <gael.guennebaud@inria.fr>
|
|
5
|
+
//
|
|
6
|
+
// This Source Code Form is subject to the terms of the Mozilla
|
|
7
|
+
// Public License v. 2.0. If a copy of the MPL was not distributed
|
|
8
|
+
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
|
9
|
+
|
|
10
|
+
#ifndef EIGEN_COMPLEX_NEON_H
|
|
11
|
+
#define EIGEN_COMPLEX_NEON_H
|
|
12
|
+
|
|
13
|
+
namespace Eigen {
|
|
14
|
+
|
|
15
|
+
namespace internal {
|
|
16
|
+
|
|
17
|
+
static uint32x4_t p4ui_CONJ_XOR = EIGEN_INIT_NEON_PACKET4(0x00000000, 0x80000000, 0x00000000, 0x80000000);
|
|
18
|
+
static uint32x2_t p2ui_CONJ_XOR = EIGEN_INIT_NEON_PACKET2(0x00000000, 0x80000000);
|
|
19
|
+
|
|
20
|
+
//---------- float ----------
|
|
21
|
+
struct Packet2cf
|
|
22
|
+
{
|
|
23
|
+
EIGEN_STRONG_INLINE Packet2cf() {}
|
|
24
|
+
EIGEN_STRONG_INLINE explicit Packet2cf(const Packet4f& a) : v(a) {}
|
|
25
|
+
Packet4f v;
|
|
26
|
+
};
|
|
27
|
+
|
|
28
|
+
template<> struct packet_traits<std::complex<float> > : default_packet_traits
|
|
29
|
+
{
|
|
30
|
+
typedef Packet2cf type;
|
|
31
|
+
enum {
|
|
32
|
+
Vectorizable = 1,
|
|
33
|
+
AlignedOnScalar = 1,
|
|
34
|
+
size = 2,
|
|
35
|
+
|
|
36
|
+
HasAdd = 1,
|
|
37
|
+
HasSub = 1,
|
|
38
|
+
HasMul = 1,
|
|
39
|
+
HasDiv = 1,
|
|
40
|
+
HasNegate = 1,
|
|
41
|
+
HasAbs = 0,
|
|
42
|
+
HasAbs2 = 0,
|
|
43
|
+
HasMin = 0,
|
|
44
|
+
HasMax = 0,
|
|
45
|
+
HasSetLinear = 0
|
|
46
|
+
};
|
|
47
|
+
};
|
|
48
|
+
|
|
49
|
+
template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2}; };
|
|
50
|
+
|
|
51
|
+
template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>& from)
|
|
52
|
+
{
|
|
53
|
+
float32x2_t r64;
|
|
54
|
+
r64 = vld1_f32((float *)&from);
|
|
55
|
+
|
|
56
|
+
return Packet2cf(vcombine_f32(r64, r64));
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
template<> EIGEN_STRONG_INLINE Packet2cf padd<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(padd<Packet4f>(a.v,b.v)); }
|
|
60
|
+
template<> EIGEN_STRONG_INLINE Packet2cf psub<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(psub<Packet4f>(a.v,b.v)); }
|
|
61
|
+
template<> EIGEN_STRONG_INLINE Packet2cf pnegate(const Packet2cf& a) { return Packet2cf(pnegate<Packet4f>(a.v)); }
|
|
62
|
+
template<> EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf& a)
|
|
63
|
+
{
|
|
64
|
+
Packet4ui b = vreinterpretq_u32_f32(a.v);
|
|
65
|
+
return Packet2cf(vreinterpretq_f32_u32(veorq_u32(b, p4ui_CONJ_XOR)));
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
template<> EIGEN_STRONG_INLINE Packet2cf pmul<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
|
|
69
|
+
{
|
|
70
|
+
Packet4f v1, v2;
|
|
71
|
+
|
|
72
|
+
// Get the real values of a | a1_re | a1_re | a2_re | a2_re |
|
|
73
|
+
v1 = vcombine_f32(vdup_lane_f32(vget_low_f32(a.v), 0), vdup_lane_f32(vget_high_f32(a.v), 0));
|
|
74
|
+
// Get the real values of a | a1_im | a1_im | a2_im | a2_im |
|
|
75
|
+
v2 = vcombine_f32(vdup_lane_f32(vget_low_f32(a.v), 1), vdup_lane_f32(vget_high_f32(a.v), 1));
|
|
76
|
+
// Multiply the real a with b
|
|
77
|
+
v1 = vmulq_f32(v1, b.v);
|
|
78
|
+
// Multiply the imag a with b
|
|
79
|
+
v2 = vmulq_f32(v2, b.v);
|
|
80
|
+
// Conjugate v2
|
|
81
|
+
v2 = vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(v2), p4ui_CONJ_XOR));
|
|
82
|
+
// Swap real/imag elements in v2.
|
|
83
|
+
v2 = vrev64q_f32(v2);
|
|
84
|
+
// Add and return the result
|
|
85
|
+
return Packet2cf(vaddq_f32(v1, v2));
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
template<> EIGEN_STRONG_INLINE Packet2cf pand <Packet2cf>(const Packet2cf& a, const Packet2cf& b)
|
|
89
|
+
{
|
|
90
|
+
return Packet2cf(vreinterpretq_f32_u32(vorrq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v))));
|
|
91
|
+
}
|
|
92
|
+
template<> EIGEN_STRONG_INLINE Packet2cf por <Packet2cf>(const Packet2cf& a, const Packet2cf& b)
|
|
93
|
+
{
|
|
94
|
+
return Packet2cf(vreinterpretq_f32_u32(vorrq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v))));
|
|
95
|
+
}
|
|
96
|
+
template<> EIGEN_STRONG_INLINE Packet2cf pxor <Packet2cf>(const Packet2cf& a, const Packet2cf& b)
|
|
97
|
+
{
|
|
98
|
+
return Packet2cf(vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v))));
|
|
99
|
+
}
|
|
100
|
+
template<> EIGEN_STRONG_INLINE Packet2cf pandnot<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
|
|
101
|
+
{
|
|
102
|
+
return Packet2cf(vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v))));
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
template<> EIGEN_STRONG_INLINE Packet2cf pload<Packet2cf>(const std::complex<float>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet2cf(pload<Packet4f>((const float*)from)); }
|
|
106
|
+
template<> EIGEN_STRONG_INLINE Packet2cf ploadu<Packet2cf>(const std::complex<float>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cf(ploadu<Packet4f>((const float*)from)); }
|
|
107
|
+
|
|
108
|
+
template<> EIGEN_STRONG_INLINE Packet2cf ploaddup<Packet2cf>(const std::complex<float>* from) { return pset1<Packet2cf>(*from); }
|
|
109
|
+
|
|
110
|
+
template<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((float*)to, from.v); }
|
|
111
|
+
template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((float*)to, from.v); }
|
|
112
|
+
|
|
113
|
+
template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> * addr) { EIGEN_ARM_PREFETCH((float *)addr); }
|
|
114
|
+
|
|
115
|
+
template<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet2cf>(const Packet2cf& a)
|
|
116
|
+
{
|
|
117
|
+
std::complex<float> EIGEN_ALIGN16 x[2];
|
|
118
|
+
vst1q_f32((float *)x, a.v);
|
|
119
|
+
return x[0];
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
template<> EIGEN_STRONG_INLINE Packet2cf preverse(const Packet2cf& a)
|
|
123
|
+
{
|
|
124
|
+
float32x2_t a_lo, a_hi;
|
|
125
|
+
Packet4f a_r128;
|
|
126
|
+
|
|
127
|
+
a_lo = vget_low_f32(a.v);
|
|
128
|
+
a_hi = vget_high_f32(a.v);
|
|
129
|
+
a_r128 = vcombine_f32(a_hi, a_lo);
|
|
130
|
+
|
|
131
|
+
return Packet2cf(a_r128);
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
template<> EIGEN_STRONG_INLINE Packet2cf pcplxflip<Packet2cf>(const Packet2cf& a)
|
|
135
|
+
{
|
|
136
|
+
return Packet2cf(vrev64q_f32(a.v));
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
template<> EIGEN_STRONG_INLINE std::complex<float> predux<Packet2cf>(const Packet2cf& a)
|
|
140
|
+
{
|
|
141
|
+
float32x2_t a1, a2;
|
|
142
|
+
std::complex<float> s;
|
|
143
|
+
|
|
144
|
+
a1 = vget_low_f32(a.v);
|
|
145
|
+
a2 = vget_high_f32(a.v);
|
|
146
|
+
a2 = vadd_f32(a1, a2);
|
|
147
|
+
vst1_f32((float *)&s, a2);
|
|
148
|
+
|
|
149
|
+
return s;
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
template<> EIGEN_STRONG_INLINE Packet2cf preduxp<Packet2cf>(const Packet2cf* vecs)
|
|
153
|
+
{
|
|
154
|
+
Packet4f sum1, sum2, sum;
|
|
155
|
+
|
|
156
|
+
// Add the first two 64-bit float32x2_t of vecs[0]
|
|
157
|
+
sum1 = vcombine_f32(vget_low_f32(vecs[0].v), vget_low_f32(vecs[1].v));
|
|
158
|
+
sum2 = vcombine_f32(vget_high_f32(vecs[0].v), vget_high_f32(vecs[1].v));
|
|
159
|
+
sum = vaddq_f32(sum1, sum2);
|
|
160
|
+
|
|
161
|
+
return Packet2cf(sum);
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const Packet2cf& a)
|
|
165
|
+
{
|
|
166
|
+
float32x2_t a1, a2, v1, v2, prod;
|
|
167
|
+
std::complex<float> s;
|
|
168
|
+
|
|
169
|
+
a1 = vget_low_f32(a.v);
|
|
170
|
+
a2 = vget_high_f32(a.v);
|
|
171
|
+
// Get the real values of a | a1_re | a1_re | a2_re | a2_re |
|
|
172
|
+
v1 = vdup_lane_f32(a1, 0);
|
|
173
|
+
// Get the real values of a | a1_im | a1_im | a2_im | a2_im |
|
|
174
|
+
v2 = vdup_lane_f32(a1, 1);
|
|
175
|
+
// Multiply the real a with b
|
|
176
|
+
v1 = vmul_f32(v1, a2);
|
|
177
|
+
// Multiply the imag a with b
|
|
178
|
+
v2 = vmul_f32(v2, a2);
|
|
179
|
+
// Conjugate v2
|
|
180
|
+
v2 = vreinterpret_f32_u32(veor_u32(vreinterpret_u32_f32(v2), p2ui_CONJ_XOR));
|
|
181
|
+
// Swap real/imag elements in v2.
|
|
182
|
+
v2 = vrev64_f32(v2);
|
|
183
|
+
// Add v1, v2
|
|
184
|
+
prod = vadd_f32(v1, v2);
|
|
185
|
+
|
|
186
|
+
vst1_f32((float *)&s, prod);
|
|
187
|
+
|
|
188
|
+
return s;
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
template<int Offset>
|
|
192
|
+
struct palign_impl<Offset,Packet2cf>
|
|
193
|
+
{
|
|
194
|
+
EIGEN_STRONG_INLINE static void run(Packet2cf& first, const Packet2cf& second)
|
|
195
|
+
{
|
|
196
|
+
if (Offset==1)
|
|
197
|
+
{
|
|
198
|
+
first.v = vextq_f32(first.v, second.v, 2);
|
|
199
|
+
}
|
|
200
|
+
}
|
|
201
|
+
};
|
|
202
|
+
|
|
203
|
+
template<> struct conj_helper<Packet2cf, Packet2cf, false,true>
|
|
204
|
+
{
|
|
205
|
+
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
|
|
206
|
+
{ return padd(pmul(x,y),c); }
|
|
207
|
+
|
|
208
|
+
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
|
|
209
|
+
{
|
|
210
|
+
return internal::pmul(a, pconj(b));
|
|
211
|
+
}
|
|
212
|
+
};
|
|
213
|
+
|
|
214
|
+
template<> struct conj_helper<Packet2cf, Packet2cf, true,false>
|
|
215
|
+
{
|
|
216
|
+
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
|
|
217
|
+
{ return padd(pmul(x,y),c); }
|
|
218
|
+
|
|
219
|
+
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
|
|
220
|
+
{
|
|
221
|
+
return internal::pmul(pconj(a), b);
|
|
222
|
+
}
|
|
223
|
+
};
|
|
224
|
+
|
|
225
|
+
template<> struct conj_helper<Packet2cf, Packet2cf, true,true>
|
|
226
|
+
{
|
|
227
|
+
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
|
|
228
|
+
{ return padd(pmul(x,y),c); }
|
|
229
|
+
|
|
230
|
+
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
|
|
231
|
+
{
|
|
232
|
+
return pconj(internal::pmul(a, b));
|
|
233
|
+
}
|
|
234
|
+
};
|
|
235
|
+
|
|
236
|
+
template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
|
|
237
|
+
{
|
|
238
|
+
// TODO optimize it for AltiVec
|
|
239
|
+
Packet2cf res = conj_helper<Packet2cf,Packet2cf,false,true>().pmul(a,b);
|
|
240
|
+
Packet4f s, rev_s;
|
|
241
|
+
|
|
242
|
+
// this computes the norm
|
|
243
|
+
s = vmulq_f32(b.v, b.v);
|
|
244
|
+
rev_s = vrev64q_f32(s);
|
|
245
|
+
|
|
246
|
+
return Packet2cf(pdiv(res.v, vaddq_f32(s,rev_s)));
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
} // end namespace internal
|
|
250
|
+
|
|
251
|
+
} // end namespace Eigen
|
|
252
|
+
|
|
253
|
+
#endif // EIGEN_COMPLEX_NEON_H
|
|
@@ -0,0 +1,420 @@
|
|
|
1
|
+
// This file is part of Eigen, a lightweight C++ template library
|
|
2
|
+
// for linear algebra.
|
|
3
|
+
//
|
|
4
|
+
// Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr>
|
|
5
|
+
// Copyright (C) 2010 Konstantinos Margaritis <markos@codex.gr>
|
|
6
|
+
// Heavily based on Gael's SSE version.
|
|
7
|
+
//
|
|
8
|
+
// This Source Code Form is subject to the terms of the Mozilla
|
|
9
|
+
// Public License v. 2.0. If a copy of the MPL was not distributed
|
|
10
|
+
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
|
11
|
+
|
|
12
|
+
#ifndef EIGEN_PACKET_MATH_NEON_H
|
|
13
|
+
#define EIGEN_PACKET_MATH_NEON_H
|
|
14
|
+
|
|
15
|
+
namespace Eigen {
|
|
16
|
+
|
|
17
|
+
namespace internal {
|
|
18
|
+
|
|
19
|
+
#ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
|
|
20
|
+
#define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 8
|
|
21
|
+
#endif
|
|
22
|
+
|
|
23
|
+
// FIXME NEON has 16 quad registers, but since the current register allocator
|
|
24
|
+
// is so bad, it is much better to reduce it to 8
|
|
25
|
+
#ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS
|
|
26
|
+
#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 8
|
|
27
|
+
#endif
|
|
28
|
+
|
|
29
|
+
typedef float32x4_t Packet4f;
|
|
30
|
+
typedef int32x4_t Packet4i;
|
|
31
|
+
typedef uint32x4_t Packet4ui;
|
|
32
|
+
|
|
33
|
+
#define _EIGEN_DECLARE_CONST_Packet4f(NAME,X) \
|
|
34
|
+
const Packet4f p4f_##NAME = pset1<Packet4f>(X)
|
|
35
|
+
|
|
36
|
+
#define _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(NAME,X) \
|
|
37
|
+
const Packet4f p4f_##NAME = vreinterpretq_f32_u32(pset1<int>(X))
|
|
38
|
+
|
|
39
|
+
#define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \
|
|
40
|
+
const Packet4i p4i_##NAME = pset1<Packet4i>(X)
|
|
41
|
+
|
|
42
|
+
#if defined(__llvm__) && !defined(__clang__)
|
|
43
|
+
//Special treatment for Apple's llvm-gcc, its NEON packet types are unions
|
|
44
|
+
#define EIGEN_INIT_NEON_PACKET2(X, Y) {{X, Y}}
|
|
45
|
+
#define EIGEN_INIT_NEON_PACKET4(X, Y, Z, W) {{X, Y, Z, W}}
|
|
46
|
+
#else
|
|
47
|
+
//Default initializer for packets
|
|
48
|
+
#define EIGEN_INIT_NEON_PACKET2(X, Y) {X, Y}
|
|
49
|
+
#define EIGEN_INIT_NEON_PACKET4(X, Y, Z, W) {X, Y, Z, W}
|
|
50
|
+
#endif
|
|
51
|
+
|
|
52
|
+
// arm64 does have the pld instruction. If available, let's trust the __builtin_prefetch built-in function
|
|
53
|
+
// which available on LLVM and GCC (at least)
|
|
54
|
+
#if EIGEN_HAS_BUILTIN(__builtin_prefetch) || defined(__GNUC__)
|
|
55
|
+
#define EIGEN_ARM_PREFETCH(ADDR) __builtin_prefetch(ADDR);
|
|
56
|
+
#elif defined __pld
|
|
57
|
+
#define EIGEN_ARM_PREFETCH(ADDR) __pld(ADDR)
|
|
58
|
+
#elif !defined(__aarch64__)
|
|
59
|
+
#define EIGEN_ARM_PREFETCH(ADDR) __asm__ __volatile__ ( " pld [%[addr]]\n" :: [addr] "r" (ADDR) : "cc" );
|
|
60
|
+
#else
|
|
61
|
+
// by default no explicit prefetching
|
|
62
|
+
#define EIGEN_ARM_PREFETCH(ADDR)
|
|
63
|
+
#endif
|
|
64
|
+
|
|
65
|
+
template<> struct packet_traits<float> : default_packet_traits
|
|
66
|
+
{
|
|
67
|
+
typedef Packet4f type;
|
|
68
|
+
enum {
|
|
69
|
+
Vectorizable = 1,
|
|
70
|
+
AlignedOnScalar = 1,
|
|
71
|
+
size = 4,
|
|
72
|
+
|
|
73
|
+
HasDiv = 1,
|
|
74
|
+
// FIXME check the Has*
|
|
75
|
+
HasSin = 0,
|
|
76
|
+
HasCos = 0,
|
|
77
|
+
HasLog = 0,
|
|
78
|
+
HasExp = 0,
|
|
79
|
+
HasSqrt = 0
|
|
80
|
+
};
|
|
81
|
+
};
|
|
82
|
+
template<> struct packet_traits<int> : default_packet_traits
|
|
83
|
+
{
|
|
84
|
+
typedef Packet4i type;
|
|
85
|
+
enum {
|
|
86
|
+
Vectorizable = 1,
|
|
87
|
+
AlignedOnScalar = 1,
|
|
88
|
+
size=4
|
|
89
|
+
// FIXME check the Has*
|
|
90
|
+
};
|
|
91
|
+
};
|
|
92
|
+
|
|
93
|
+
#if EIGEN_GNUC_AT_MOST(4,4) && !defined(__llvm__)
|
|
94
|
+
// workaround gcc 4.2, 4.3 and 4.4 compilatin issue
|
|
95
|
+
EIGEN_STRONG_INLINE float32x4_t vld1q_f32(const float* x) { return ::vld1q_f32((const float32_t*)x); }
|
|
96
|
+
EIGEN_STRONG_INLINE float32x2_t vld1_f32 (const float* x) { return ::vld1_f32 ((const float32_t*)x); }
|
|
97
|
+
EIGEN_STRONG_INLINE void vst1q_f32(float* to, float32x4_t from) { ::vst1q_f32((float32_t*)to,from); }
|
|
98
|
+
EIGEN_STRONG_INLINE void vst1_f32 (float* to, float32x2_t from) { ::vst1_f32 ((float32_t*)to,from); }
|
|
99
|
+
#endif
|
|
100
|
+
|
|
101
|
+
template<> struct unpacket_traits<Packet4f> { typedef float type; enum {size=4}; };
|
|
102
|
+
template<> struct unpacket_traits<Packet4i> { typedef int type; enum {size=4}; };
|
|
103
|
+
|
|
104
|
+
template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) { return vdupq_n_f32(from); }
|
|
105
|
+
template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from) { return vdupq_n_s32(from); }
|
|
106
|
+
|
|
107
|
+
template<> EIGEN_STRONG_INLINE Packet4f plset<float>(const float& a)
|
|
108
|
+
{
|
|
109
|
+
Packet4f countdown = EIGEN_INIT_NEON_PACKET4(0, 1, 2, 3);
|
|
110
|
+
return vaddq_f32(pset1<Packet4f>(a), countdown);
|
|
111
|
+
}
|
|
112
|
+
template<> EIGEN_STRONG_INLINE Packet4i plset<int>(const int& a)
|
|
113
|
+
{
|
|
114
|
+
Packet4i countdown = EIGEN_INIT_NEON_PACKET4(0, 1, 2, 3);
|
|
115
|
+
return vaddq_s32(pset1<Packet4i>(a), countdown);
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
template<> EIGEN_STRONG_INLINE Packet4f padd<Packet4f>(const Packet4f& a, const Packet4f& b) { return vaddq_f32(a,b); }
|
|
119
|
+
template<> EIGEN_STRONG_INLINE Packet4i padd<Packet4i>(const Packet4i& a, const Packet4i& b) { return vaddq_s32(a,b); }
|
|
120
|
+
|
|
121
|
+
template<> EIGEN_STRONG_INLINE Packet4f psub<Packet4f>(const Packet4f& a, const Packet4f& b) { return vsubq_f32(a,b); }
|
|
122
|
+
template<> EIGEN_STRONG_INLINE Packet4i psub<Packet4i>(const Packet4i& a, const Packet4i& b) { return vsubq_s32(a,b); }
|
|
123
|
+
|
|
124
|
+
template<> EIGEN_STRONG_INLINE Packet4f pnegate(const Packet4f& a) { return vnegq_f32(a); }
|
|
125
|
+
template<> EIGEN_STRONG_INLINE Packet4i pnegate(const Packet4i& a) { return vnegq_s32(a); }
|
|
126
|
+
|
|
127
|
+
template<> EIGEN_STRONG_INLINE Packet4f pconj(const Packet4f& a) { return a; }
|
|
128
|
+
template<> EIGEN_STRONG_INLINE Packet4i pconj(const Packet4i& a) { return a; }
|
|
129
|
+
|
|
130
|
+
template<> EIGEN_STRONG_INLINE Packet4f pmul<Packet4f>(const Packet4f& a, const Packet4f& b) { return vmulq_f32(a,b); }
|
|
131
|
+
template<> EIGEN_STRONG_INLINE Packet4i pmul<Packet4i>(const Packet4i& a, const Packet4i& b) { return vmulq_s32(a,b); }
|
|
132
|
+
|
|
133
|
+
template<> EIGEN_STRONG_INLINE Packet4f pdiv<Packet4f>(const Packet4f& a, const Packet4f& b)
|
|
134
|
+
{
|
|
135
|
+
Packet4f inv, restep, div;
|
|
136
|
+
|
|
137
|
+
// NEON does not offer a divide instruction, we have to do a reciprocal approximation
|
|
138
|
+
// However NEON in contrast to other SIMD engines (AltiVec/SSE), offers
|
|
139
|
+
// a reciprocal estimate AND a reciprocal step -which saves a few instructions
|
|
140
|
+
// vrecpeq_f32() returns an estimate to 1/b, which we will finetune with
|
|
141
|
+
// Newton-Raphson and vrecpsq_f32()
|
|
142
|
+
inv = vrecpeq_f32(b);
|
|
143
|
+
|
|
144
|
+
// This returns a differential, by which we will have to multiply inv to get a better
|
|
145
|
+
// approximation of 1/b.
|
|
146
|
+
restep = vrecpsq_f32(b, inv);
|
|
147
|
+
inv = vmulq_f32(restep, inv);
|
|
148
|
+
|
|
149
|
+
// Finally, multiply a by 1/b and get the wanted result of the division.
|
|
150
|
+
div = vmulq_f32(a, inv);
|
|
151
|
+
|
|
152
|
+
return div;
|
|
153
|
+
}
|
|
154
|
+
template<> EIGEN_STRONG_INLINE Packet4i pdiv<Packet4i>(const Packet4i& /*a*/, const Packet4i& /*b*/)
|
|
155
|
+
{ eigen_assert(false && "packet integer division are not supported by NEON");
|
|
156
|
+
return pset1<Packet4i>(0);
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
// for some weird raisons, it has to be overloaded for packet of integers
|
|
160
|
+
template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) { return vmlaq_f32(c,a,b); }
|
|
161
|
+
template<> EIGEN_STRONG_INLINE Packet4i pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) { return vmlaq_s32(c,a,b); }
|
|
162
|
+
|
|
163
|
+
template<> EIGEN_STRONG_INLINE Packet4f pmin<Packet4f>(const Packet4f& a, const Packet4f& b) { return vminq_f32(a,b); }
|
|
164
|
+
template<> EIGEN_STRONG_INLINE Packet4i pmin<Packet4i>(const Packet4i& a, const Packet4i& b) { return vminq_s32(a,b); }
|
|
165
|
+
|
|
166
|
+
template<> EIGEN_STRONG_INLINE Packet4f pmax<Packet4f>(const Packet4f& a, const Packet4f& b) { return vmaxq_f32(a,b); }
|
|
167
|
+
template<> EIGEN_STRONG_INLINE Packet4i pmax<Packet4i>(const Packet4i& a, const Packet4i& b) { return vmaxq_s32(a,b); }
|
|
168
|
+
|
|
169
|
+
// Logical Operations are not supported for float, so we have to reinterpret casts using NEON intrinsics
|
|
170
|
+
template<> EIGEN_STRONG_INLINE Packet4f pand<Packet4f>(const Packet4f& a, const Packet4f& b)
|
|
171
|
+
{
|
|
172
|
+
return vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(a),vreinterpretq_u32_f32(b)));
|
|
173
|
+
}
|
|
174
|
+
template<> EIGEN_STRONG_INLINE Packet4i pand<Packet4i>(const Packet4i& a, const Packet4i& b) { return vandq_s32(a,b); }
|
|
175
|
+
|
|
176
|
+
template<> EIGEN_STRONG_INLINE Packet4f por<Packet4f>(const Packet4f& a, const Packet4f& b)
|
|
177
|
+
{
|
|
178
|
+
return vreinterpretq_f32_u32(vorrq_u32(vreinterpretq_u32_f32(a),vreinterpretq_u32_f32(b)));
|
|
179
|
+
}
|
|
180
|
+
template<> EIGEN_STRONG_INLINE Packet4i por<Packet4i>(const Packet4i& a, const Packet4i& b) { return vorrq_s32(a,b); }
|
|
181
|
+
|
|
182
|
+
template<> EIGEN_STRONG_INLINE Packet4f pxor<Packet4f>(const Packet4f& a, const Packet4f& b)
|
|
183
|
+
{
|
|
184
|
+
return vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(a),vreinterpretq_u32_f32(b)));
|
|
185
|
+
}
|
|
186
|
+
template<> EIGEN_STRONG_INLINE Packet4i pxor<Packet4i>(const Packet4i& a, const Packet4i& b) { return veorq_s32(a,b); }
|
|
187
|
+
|
|
188
|
+
template<> EIGEN_STRONG_INLINE Packet4f pandnot<Packet4f>(const Packet4f& a, const Packet4f& b)
|
|
189
|
+
{
|
|
190
|
+
return vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(a),vreinterpretq_u32_f32(b)));
|
|
191
|
+
}
|
|
192
|
+
template<> EIGEN_STRONG_INLINE Packet4i pandnot<Packet4i>(const Packet4i& a, const Packet4i& b) { return vbicq_s32(a,b); }
|
|
193
|
+
|
|
194
|
+
template<> EIGEN_STRONG_INLINE Packet4f pload<Packet4f>(const float* from) { EIGEN_DEBUG_ALIGNED_LOAD return vld1q_f32(from); }
|
|
195
|
+
template<> EIGEN_STRONG_INLINE Packet4i pload<Packet4i>(const int* from) { EIGEN_DEBUG_ALIGNED_LOAD return vld1q_s32(from); }
|
|
196
|
+
|
|
197
|
+
template<> EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float* from) { EIGEN_DEBUG_UNALIGNED_LOAD return vld1q_f32(from); }
|
|
198
|
+
template<> EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int* from) { EIGEN_DEBUG_UNALIGNED_LOAD return vld1q_s32(from); }
|
|
199
|
+
|
|
200
|
+
template<> EIGEN_STRONG_INLINE Packet4f ploaddup<Packet4f>(const float* from)
|
|
201
|
+
{
|
|
202
|
+
float32x2_t lo, hi;
|
|
203
|
+
lo = vld1_dup_f32(from);
|
|
204
|
+
hi = vld1_dup_f32(from+1);
|
|
205
|
+
return vcombine_f32(lo, hi);
|
|
206
|
+
}
|
|
207
|
+
template<> EIGEN_STRONG_INLINE Packet4i ploaddup<Packet4i>(const int* from)
|
|
208
|
+
{
|
|
209
|
+
int32x2_t lo, hi;
|
|
210
|
+
lo = vld1_dup_s32(from);
|
|
211
|
+
hi = vld1_dup_s32(from+1);
|
|
212
|
+
return vcombine_s32(lo, hi);
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
template<> EIGEN_STRONG_INLINE void pstore<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_ALIGNED_STORE vst1q_f32(to, from); }
|
|
216
|
+
template<> EIGEN_STRONG_INLINE void pstore<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_ALIGNED_STORE vst1q_s32(to, from); }
|
|
217
|
+
|
|
218
|
+
template<> EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_UNALIGNED_STORE vst1q_f32(to, from); }
|
|
219
|
+
template<> EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_UNALIGNED_STORE vst1q_s32(to, from); }
|
|
220
|
+
|
|
221
|
+
template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { EIGEN_ARM_PREFETCH(addr); }
|
|
222
|
+
template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { EIGEN_ARM_PREFETCH(addr); }
|
|
223
|
+
|
|
224
|
+
// FIXME only store the 2 first elements ?
|
|
225
|
+
template<> EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) { float EIGEN_ALIGN16 x[4]; vst1q_f32(x, a); return x[0]; }
|
|
226
|
+
template<> EIGEN_STRONG_INLINE int pfirst<Packet4i>(const Packet4i& a) { int EIGEN_ALIGN16 x[4]; vst1q_s32(x, a); return x[0]; }
|
|
227
|
+
|
|
228
|
+
template<> EIGEN_STRONG_INLINE Packet4f preverse(const Packet4f& a) {
|
|
229
|
+
float32x2_t a_lo, a_hi;
|
|
230
|
+
Packet4f a_r64;
|
|
231
|
+
|
|
232
|
+
a_r64 = vrev64q_f32(a);
|
|
233
|
+
a_lo = vget_low_f32(a_r64);
|
|
234
|
+
a_hi = vget_high_f32(a_r64);
|
|
235
|
+
return vcombine_f32(a_hi, a_lo);
|
|
236
|
+
}
|
|
237
|
+
template<> EIGEN_STRONG_INLINE Packet4i preverse(const Packet4i& a) {
|
|
238
|
+
int32x2_t a_lo, a_hi;
|
|
239
|
+
Packet4i a_r64;
|
|
240
|
+
|
|
241
|
+
a_r64 = vrev64q_s32(a);
|
|
242
|
+
a_lo = vget_low_s32(a_r64);
|
|
243
|
+
a_hi = vget_high_s32(a_r64);
|
|
244
|
+
return vcombine_s32(a_hi, a_lo);
|
|
245
|
+
}
|
|
246
|
+
template<> EIGEN_STRONG_INLINE Packet4f pabs(const Packet4f& a) { return vabsq_f32(a); }
|
|
247
|
+
template<> EIGEN_STRONG_INLINE Packet4i pabs(const Packet4i& a) { return vabsq_s32(a); }
|
|
248
|
+
|
|
249
|
+
template<> EIGEN_STRONG_INLINE float predux<Packet4f>(const Packet4f& a)
|
|
250
|
+
{
|
|
251
|
+
float32x2_t a_lo, a_hi, sum;
|
|
252
|
+
|
|
253
|
+
a_lo = vget_low_f32(a);
|
|
254
|
+
a_hi = vget_high_f32(a);
|
|
255
|
+
sum = vpadd_f32(a_lo, a_hi);
|
|
256
|
+
sum = vpadd_f32(sum, sum);
|
|
257
|
+
return vget_lane_f32(sum, 0);
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
template<> EIGEN_STRONG_INLINE Packet4f preduxp<Packet4f>(const Packet4f* vecs)
|
|
261
|
+
{
|
|
262
|
+
float32x4x2_t vtrn1, vtrn2, res1, res2;
|
|
263
|
+
Packet4f sum1, sum2, sum;
|
|
264
|
+
|
|
265
|
+
// NEON zip performs interleaving of the supplied vectors.
|
|
266
|
+
// We perform two interleaves in a row to acquire the transposed vector
|
|
267
|
+
vtrn1 = vzipq_f32(vecs[0], vecs[2]);
|
|
268
|
+
vtrn2 = vzipq_f32(vecs[1], vecs[3]);
|
|
269
|
+
res1 = vzipq_f32(vtrn1.val[0], vtrn2.val[0]);
|
|
270
|
+
res2 = vzipq_f32(vtrn1.val[1], vtrn2.val[1]);
|
|
271
|
+
|
|
272
|
+
// Do the addition of the resulting vectors
|
|
273
|
+
sum1 = vaddq_f32(res1.val[0], res1.val[1]);
|
|
274
|
+
sum2 = vaddq_f32(res2.val[0], res2.val[1]);
|
|
275
|
+
sum = vaddq_f32(sum1, sum2);
|
|
276
|
+
|
|
277
|
+
return sum;
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
template<> EIGEN_STRONG_INLINE int predux<Packet4i>(const Packet4i& a)
|
|
281
|
+
{
|
|
282
|
+
int32x2_t a_lo, a_hi, sum;
|
|
283
|
+
|
|
284
|
+
a_lo = vget_low_s32(a);
|
|
285
|
+
a_hi = vget_high_s32(a);
|
|
286
|
+
sum = vpadd_s32(a_lo, a_hi);
|
|
287
|
+
sum = vpadd_s32(sum, sum);
|
|
288
|
+
return vget_lane_s32(sum, 0);
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
template<> EIGEN_STRONG_INLINE Packet4i preduxp<Packet4i>(const Packet4i* vecs)
|
|
292
|
+
{
|
|
293
|
+
int32x4x2_t vtrn1, vtrn2, res1, res2;
|
|
294
|
+
Packet4i sum1, sum2, sum;
|
|
295
|
+
|
|
296
|
+
// NEON zip performs interleaving of the supplied vectors.
|
|
297
|
+
// We perform two interleaves in a row to acquire the transposed vector
|
|
298
|
+
vtrn1 = vzipq_s32(vecs[0], vecs[2]);
|
|
299
|
+
vtrn2 = vzipq_s32(vecs[1], vecs[3]);
|
|
300
|
+
res1 = vzipq_s32(vtrn1.val[0], vtrn2.val[0]);
|
|
301
|
+
res2 = vzipq_s32(vtrn1.val[1], vtrn2.val[1]);
|
|
302
|
+
|
|
303
|
+
// Do the addition of the resulting vectors
|
|
304
|
+
sum1 = vaddq_s32(res1.val[0], res1.val[1]);
|
|
305
|
+
sum2 = vaddq_s32(res2.val[0], res2.val[1]);
|
|
306
|
+
sum = vaddq_s32(sum1, sum2);
|
|
307
|
+
|
|
308
|
+
return sum;
|
|
309
|
+
}
|
|
310
|
+
|
|
311
|
+
// Other reduction functions:
|
|
312
|
+
// mul
|
|
313
|
+
template<> EIGEN_STRONG_INLINE float predux_mul<Packet4f>(const Packet4f& a)
|
|
314
|
+
{
|
|
315
|
+
float32x2_t a_lo, a_hi, prod;
|
|
316
|
+
|
|
317
|
+
// Get a_lo = |a1|a2| and a_hi = |a3|a4|
|
|
318
|
+
a_lo = vget_low_f32(a);
|
|
319
|
+
a_hi = vget_high_f32(a);
|
|
320
|
+
// Get the product of a_lo * a_hi -> |a1*a3|a2*a4|
|
|
321
|
+
prod = vmul_f32(a_lo, a_hi);
|
|
322
|
+
// Multiply prod with its swapped value |a2*a4|a1*a3|
|
|
323
|
+
prod = vmul_f32(prod, vrev64_f32(prod));
|
|
324
|
+
|
|
325
|
+
return vget_lane_f32(prod, 0);
|
|
326
|
+
}
|
|
327
|
+
template<> EIGEN_STRONG_INLINE int predux_mul<Packet4i>(const Packet4i& a)
|
|
328
|
+
{
|
|
329
|
+
int32x2_t a_lo, a_hi, prod;
|
|
330
|
+
|
|
331
|
+
// Get a_lo = |a1|a2| and a_hi = |a3|a4|
|
|
332
|
+
a_lo = vget_low_s32(a);
|
|
333
|
+
a_hi = vget_high_s32(a);
|
|
334
|
+
// Get the product of a_lo * a_hi -> |a1*a3|a2*a4|
|
|
335
|
+
prod = vmul_s32(a_lo, a_hi);
|
|
336
|
+
// Multiply prod with its swapped value |a2*a4|a1*a3|
|
|
337
|
+
prod = vmul_s32(prod, vrev64_s32(prod));
|
|
338
|
+
|
|
339
|
+
return vget_lane_s32(prod, 0);
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
// min
|
|
343
|
+
template<> EIGEN_STRONG_INLINE float predux_min<Packet4f>(const Packet4f& a)
|
|
344
|
+
{
|
|
345
|
+
float32x2_t a_lo, a_hi, min;
|
|
346
|
+
|
|
347
|
+
a_lo = vget_low_f32(a);
|
|
348
|
+
a_hi = vget_high_f32(a);
|
|
349
|
+
min = vpmin_f32(a_lo, a_hi);
|
|
350
|
+
min = vpmin_f32(min, min);
|
|
351
|
+
|
|
352
|
+
return vget_lane_f32(min, 0);
|
|
353
|
+
}
|
|
354
|
+
|
|
355
|
+
template<> EIGEN_STRONG_INLINE int predux_min<Packet4i>(const Packet4i& a)
|
|
356
|
+
{
|
|
357
|
+
int32x2_t a_lo, a_hi, min;
|
|
358
|
+
|
|
359
|
+
a_lo = vget_low_s32(a);
|
|
360
|
+
a_hi = vget_high_s32(a);
|
|
361
|
+
min = vpmin_s32(a_lo, a_hi);
|
|
362
|
+
min = vpmin_s32(min, min);
|
|
363
|
+
|
|
364
|
+
return vget_lane_s32(min, 0);
|
|
365
|
+
}
|
|
366
|
+
|
|
367
|
+
// max
|
|
368
|
+
template<> EIGEN_STRONG_INLINE float predux_max<Packet4f>(const Packet4f& a)
|
|
369
|
+
{
|
|
370
|
+
float32x2_t a_lo, a_hi, max;
|
|
371
|
+
|
|
372
|
+
a_lo = vget_low_f32(a);
|
|
373
|
+
a_hi = vget_high_f32(a);
|
|
374
|
+
max = vpmax_f32(a_lo, a_hi);
|
|
375
|
+
max = vpmax_f32(max, max);
|
|
376
|
+
|
|
377
|
+
return vget_lane_f32(max, 0);
|
|
378
|
+
}
|
|
379
|
+
|
|
380
|
+
template<> EIGEN_STRONG_INLINE int predux_max<Packet4i>(const Packet4i& a)
|
|
381
|
+
{
|
|
382
|
+
int32x2_t a_lo, a_hi, max;
|
|
383
|
+
|
|
384
|
+
a_lo = vget_low_s32(a);
|
|
385
|
+
a_hi = vget_high_s32(a);
|
|
386
|
+
max = vpmax_s32(a_lo, a_hi);
|
|
387
|
+
max = vpmax_s32(max, max);
|
|
388
|
+
|
|
389
|
+
return vget_lane_s32(max, 0);
|
|
390
|
+
}
|
|
391
|
+
|
|
392
|
+
// this PALIGN_NEON business is to work around a bug in LLVM Clang 3.0 causing incorrect compilation errors,
|
|
393
|
+
// see bug 347 and this LLVM bug: http://llvm.org/bugs/show_bug.cgi?id=11074
|
|
394
|
+
#define PALIGN_NEON(Offset,Type,Command) \
|
|
395
|
+
template<>\
|
|
396
|
+
struct palign_impl<Offset,Type>\
|
|
397
|
+
{\
|
|
398
|
+
EIGEN_STRONG_INLINE static void run(Type& first, const Type& second)\
|
|
399
|
+
{\
|
|
400
|
+
if (Offset!=0)\
|
|
401
|
+
first = Command(first, second, Offset);\
|
|
402
|
+
}\
|
|
403
|
+
};\
|
|
404
|
+
|
|
405
|
+
PALIGN_NEON(0,Packet4f,vextq_f32)
|
|
406
|
+
PALIGN_NEON(1,Packet4f,vextq_f32)
|
|
407
|
+
PALIGN_NEON(2,Packet4f,vextq_f32)
|
|
408
|
+
PALIGN_NEON(3,Packet4f,vextq_f32)
|
|
409
|
+
PALIGN_NEON(0,Packet4i,vextq_s32)
|
|
410
|
+
PALIGN_NEON(1,Packet4i,vextq_s32)
|
|
411
|
+
PALIGN_NEON(2,Packet4i,vextq_s32)
|
|
412
|
+
PALIGN_NEON(3,Packet4i,vextq_s32)
|
|
413
|
+
|
|
414
|
+
#undef PALIGN_NEON
|
|
415
|
+
|
|
416
|
+
} // end namespace internal
|
|
417
|
+
|
|
418
|
+
} // end namespace Eigen
|
|
419
|
+
|
|
420
|
+
#endif // EIGEN_PACKET_MATH_NEON_H
|