tomoto 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +3 -0
- data/LICENSE.txt +22 -0
- data/README.md +123 -0
- data/ext/tomoto/ext.cpp +245 -0
- data/ext/tomoto/extconf.rb +28 -0
- data/lib/tomoto.rb +12 -0
- data/lib/tomoto/ct.rb +11 -0
- data/lib/tomoto/hdp.rb +11 -0
- data/lib/tomoto/lda.rb +67 -0
- data/lib/tomoto/version.rb +3 -0
- data/vendor/EigenRand/EigenRand/Core.h +1139 -0
- data/vendor/EigenRand/EigenRand/Dists/Basic.h +111 -0
- data/vendor/EigenRand/EigenRand/Dists/Discrete.h +877 -0
- data/vendor/EigenRand/EigenRand/Dists/GammaPoisson.h +108 -0
- data/vendor/EigenRand/EigenRand/Dists/NormalExp.h +626 -0
- data/vendor/EigenRand/EigenRand/EigenRand +19 -0
- data/vendor/EigenRand/EigenRand/Macro.h +24 -0
- data/vendor/EigenRand/EigenRand/MorePacketMath.h +978 -0
- data/vendor/EigenRand/EigenRand/PacketFilter.h +286 -0
- data/vendor/EigenRand/EigenRand/PacketRandomEngine.h +624 -0
- data/vendor/EigenRand/EigenRand/RandUtils.h +413 -0
- data/vendor/EigenRand/EigenRand/doc.h +220 -0
- data/vendor/EigenRand/LICENSE +21 -0
- data/vendor/EigenRand/README.md +288 -0
- data/vendor/eigen/COPYING.BSD +26 -0
- data/vendor/eigen/COPYING.GPL +674 -0
- data/vendor/eigen/COPYING.LGPL +502 -0
- data/vendor/eigen/COPYING.MINPACK +52 -0
- data/vendor/eigen/COPYING.MPL2 +373 -0
- data/vendor/eigen/COPYING.README +18 -0
- data/vendor/eigen/Eigen/CMakeLists.txt +19 -0
- data/vendor/eigen/Eigen/Cholesky +46 -0
- data/vendor/eigen/Eigen/CholmodSupport +48 -0
- data/vendor/eigen/Eigen/Core +537 -0
- data/vendor/eigen/Eigen/Dense +7 -0
- data/vendor/eigen/Eigen/Eigen +2 -0
- data/vendor/eigen/Eigen/Eigenvalues +61 -0
- data/vendor/eigen/Eigen/Geometry +62 -0
- data/vendor/eigen/Eigen/Householder +30 -0
- data/vendor/eigen/Eigen/IterativeLinearSolvers +48 -0
- data/vendor/eigen/Eigen/Jacobi +33 -0
- data/vendor/eigen/Eigen/LU +50 -0
- data/vendor/eigen/Eigen/MetisSupport +35 -0
- data/vendor/eigen/Eigen/OrderingMethods +73 -0
- data/vendor/eigen/Eigen/PaStiXSupport +48 -0
- data/vendor/eigen/Eigen/PardisoSupport +35 -0
- data/vendor/eigen/Eigen/QR +51 -0
- data/vendor/eigen/Eigen/QtAlignedMalloc +40 -0
- data/vendor/eigen/Eigen/SPQRSupport +34 -0
- data/vendor/eigen/Eigen/SVD +51 -0
- data/vendor/eigen/Eigen/Sparse +36 -0
- data/vendor/eigen/Eigen/SparseCholesky +45 -0
- data/vendor/eigen/Eigen/SparseCore +69 -0
- data/vendor/eigen/Eigen/SparseLU +46 -0
- data/vendor/eigen/Eigen/SparseQR +37 -0
- data/vendor/eigen/Eigen/StdDeque +27 -0
- data/vendor/eigen/Eigen/StdList +26 -0
- data/vendor/eigen/Eigen/StdVector +27 -0
- data/vendor/eigen/Eigen/SuperLUSupport +64 -0
- data/vendor/eigen/Eigen/UmfPackSupport +40 -0
- data/vendor/eigen/Eigen/src/Cholesky/LDLT.h +673 -0
- data/vendor/eigen/Eigen/src/Cholesky/LLT.h +542 -0
- data/vendor/eigen/Eigen/src/Cholesky/LLT_LAPACKE.h +99 -0
- data/vendor/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +639 -0
- data/vendor/eigen/Eigen/src/Core/Array.h +329 -0
- data/vendor/eigen/Eigen/src/Core/ArrayBase.h +226 -0
- data/vendor/eigen/Eigen/src/Core/ArrayWrapper.h +209 -0
- data/vendor/eigen/Eigen/src/Core/Assign.h +90 -0
- data/vendor/eigen/Eigen/src/Core/AssignEvaluator.h +935 -0
- data/vendor/eigen/Eigen/src/Core/Assign_MKL.h +178 -0
- data/vendor/eigen/Eigen/src/Core/BandMatrix.h +353 -0
- data/vendor/eigen/Eigen/src/Core/Block.h +452 -0
- data/vendor/eigen/Eigen/src/Core/BooleanRedux.h +164 -0
- data/vendor/eigen/Eigen/src/Core/CommaInitializer.h +160 -0
- data/vendor/eigen/Eigen/src/Core/ConditionEstimator.h +175 -0
- data/vendor/eigen/Eigen/src/Core/CoreEvaluators.h +1688 -0
- data/vendor/eigen/Eigen/src/Core/CoreIterators.h +127 -0
- data/vendor/eigen/Eigen/src/Core/CwiseBinaryOp.h +184 -0
- data/vendor/eigen/Eigen/src/Core/CwiseNullaryOp.h +866 -0
- data/vendor/eigen/Eigen/src/Core/CwiseTernaryOp.h +197 -0
- data/vendor/eigen/Eigen/src/Core/CwiseUnaryOp.h +103 -0
- data/vendor/eigen/Eigen/src/Core/CwiseUnaryView.h +128 -0
- data/vendor/eigen/Eigen/src/Core/DenseBase.h +611 -0
- data/vendor/eigen/Eigen/src/Core/DenseCoeffsBase.h +681 -0
- data/vendor/eigen/Eigen/src/Core/DenseStorage.h +570 -0
- data/vendor/eigen/Eigen/src/Core/Diagonal.h +260 -0
- data/vendor/eigen/Eigen/src/Core/DiagonalMatrix.h +343 -0
- data/vendor/eigen/Eigen/src/Core/DiagonalProduct.h +28 -0
- data/vendor/eigen/Eigen/src/Core/Dot.h +318 -0
- data/vendor/eigen/Eigen/src/Core/EigenBase.h +159 -0
- data/vendor/eigen/Eigen/src/Core/ForceAlignedAccess.h +146 -0
- data/vendor/eigen/Eigen/src/Core/Fuzzy.h +155 -0
- data/vendor/eigen/Eigen/src/Core/GeneralProduct.h +455 -0
- data/vendor/eigen/Eigen/src/Core/GenericPacketMath.h +593 -0
- data/vendor/eigen/Eigen/src/Core/GlobalFunctions.h +187 -0
- data/vendor/eigen/Eigen/src/Core/IO.h +225 -0
- data/vendor/eigen/Eigen/src/Core/Inverse.h +118 -0
- data/vendor/eigen/Eigen/src/Core/Map.h +171 -0
- data/vendor/eigen/Eigen/src/Core/MapBase.h +303 -0
- data/vendor/eigen/Eigen/src/Core/MathFunctions.h +1415 -0
- data/vendor/eigen/Eigen/src/Core/MathFunctionsImpl.h +101 -0
- data/vendor/eigen/Eigen/src/Core/Matrix.h +459 -0
- data/vendor/eigen/Eigen/src/Core/MatrixBase.h +529 -0
- data/vendor/eigen/Eigen/src/Core/NestByValue.h +110 -0
- data/vendor/eigen/Eigen/src/Core/NoAlias.h +108 -0
- data/vendor/eigen/Eigen/src/Core/NumTraits.h +248 -0
- data/vendor/eigen/Eigen/src/Core/PermutationMatrix.h +633 -0
- data/vendor/eigen/Eigen/src/Core/PlainObjectBase.h +1035 -0
- data/vendor/eigen/Eigen/src/Core/Product.h +186 -0
- data/vendor/eigen/Eigen/src/Core/ProductEvaluators.h +1112 -0
- data/vendor/eigen/Eigen/src/Core/Random.h +182 -0
- data/vendor/eigen/Eigen/src/Core/Redux.h +505 -0
- data/vendor/eigen/Eigen/src/Core/Ref.h +283 -0
- data/vendor/eigen/Eigen/src/Core/Replicate.h +142 -0
- data/vendor/eigen/Eigen/src/Core/ReturnByValue.h +117 -0
- data/vendor/eigen/Eigen/src/Core/Reverse.h +211 -0
- data/vendor/eigen/Eigen/src/Core/Select.h +162 -0
- data/vendor/eigen/Eigen/src/Core/SelfAdjointView.h +352 -0
- data/vendor/eigen/Eigen/src/Core/SelfCwiseBinaryOp.h +47 -0
- data/vendor/eigen/Eigen/src/Core/Solve.h +188 -0
- data/vendor/eigen/Eigen/src/Core/SolveTriangular.h +235 -0
- data/vendor/eigen/Eigen/src/Core/SolverBase.h +130 -0
- data/vendor/eigen/Eigen/src/Core/StableNorm.h +221 -0
- data/vendor/eigen/Eigen/src/Core/Stride.h +111 -0
- data/vendor/eigen/Eigen/src/Core/Swap.h +67 -0
- data/vendor/eigen/Eigen/src/Core/Transpose.h +403 -0
- data/vendor/eigen/Eigen/src/Core/Transpositions.h +407 -0
- data/vendor/eigen/Eigen/src/Core/TriangularMatrix.h +983 -0
- data/vendor/eigen/Eigen/src/Core/VectorBlock.h +96 -0
- data/vendor/eigen/Eigen/src/Core/VectorwiseOp.h +695 -0
- data/vendor/eigen/Eigen/src/Core/Visitor.h +273 -0
- data/vendor/eigen/Eigen/src/Core/arch/AVX/Complex.h +451 -0
- data/vendor/eigen/Eigen/src/Core/arch/AVX/MathFunctions.h +439 -0
- data/vendor/eigen/Eigen/src/Core/arch/AVX/PacketMath.h +637 -0
- data/vendor/eigen/Eigen/src/Core/arch/AVX/TypeCasting.h +51 -0
- data/vendor/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h +391 -0
- data/vendor/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h +1316 -0
- data/vendor/eigen/Eigen/src/Core/arch/AltiVec/Complex.h +430 -0
- data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MathFunctions.h +322 -0
- data/vendor/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h +1061 -0
- data/vendor/eigen/Eigen/src/Core/arch/CUDA/Complex.h +103 -0
- data/vendor/eigen/Eigen/src/Core/arch/CUDA/Half.h +674 -0
- data/vendor/eigen/Eigen/src/Core/arch/CUDA/MathFunctions.h +91 -0
- data/vendor/eigen/Eigen/src/Core/arch/CUDA/PacketMath.h +333 -0
- data/vendor/eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +1124 -0
- data/vendor/eigen/Eigen/src/Core/arch/CUDA/TypeCasting.h +212 -0
- data/vendor/eigen/Eigen/src/Core/arch/Default/ConjHelper.h +29 -0
- data/vendor/eigen/Eigen/src/Core/arch/Default/Settings.h +49 -0
- data/vendor/eigen/Eigen/src/Core/arch/NEON/Complex.h +490 -0
- data/vendor/eigen/Eigen/src/Core/arch/NEON/MathFunctions.h +91 -0
- data/vendor/eigen/Eigen/src/Core/arch/NEON/PacketMath.h +760 -0
- data/vendor/eigen/Eigen/src/Core/arch/SSE/Complex.h +471 -0
- data/vendor/eigen/Eigen/src/Core/arch/SSE/MathFunctions.h +562 -0
- data/vendor/eigen/Eigen/src/Core/arch/SSE/PacketMath.h +895 -0
- data/vendor/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h +77 -0
- data/vendor/eigen/Eigen/src/Core/arch/ZVector/Complex.h +397 -0
- data/vendor/eigen/Eigen/src/Core/arch/ZVector/MathFunctions.h +137 -0
- data/vendor/eigen/Eigen/src/Core/arch/ZVector/PacketMath.h +945 -0
- data/vendor/eigen/Eigen/src/Core/functors/AssignmentFunctors.h +168 -0
- data/vendor/eigen/Eigen/src/Core/functors/BinaryFunctors.h +475 -0
- data/vendor/eigen/Eigen/src/Core/functors/NullaryFunctors.h +188 -0
- data/vendor/eigen/Eigen/src/Core/functors/StlFunctors.h +136 -0
- data/vendor/eigen/Eigen/src/Core/functors/TernaryFunctors.h +25 -0
- data/vendor/eigen/Eigen/src/Core/functors/UnaryFunctors.h +792 -0
- data/vendor/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h +2156 -0
- data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h +492 -0
- data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +311 -0
- data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +145 -0
- data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h +122 -0
- data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixVector.h +619 -0
- data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h +136 -0
- data/vendor/eigen/Eigen/src/Core/products/Parallelizer.h +163 -0
- data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +521 -0
- data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h +287 -0
- data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h +260 -0
- data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h +118 -0
- data/vendor/eigen/Eigen/src/Core/products/SelfadjointProduct.h +133 -0
- data/vendor/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h +93 -0
- data/vendor/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h +466 -0
- data/vendor/eigen/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h +315 -0
- data/vendor/eigen/Eigen/src/Core/products/TriangularMatrixVector.h +350 -0
- data/vendor/eigen/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h +255 -0
- data/vendor/eigen/Eigen/src/Core/products/TriangularSolverMatrix.h +335 -0
- data/vendor/eigen/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h +163 -0
- data/vendor/eigen/Eigen/src/Core/products/TriangularSolverVector.h +145 -0
- data/vendor/eigen/Eigen/src/Core/util/BlasUtil.h +398 -0
- data/vendor/eigen/Eigen/src/Core/util/Constants.h +547 -0
- data/vendor/eigen/Eigen/src/Core/util/DisableStupidWarnings.h +83 -0
- data/vendor/eigen/Eigen/src/Core/util/ForwardDeclarations.h +302 -0
- data/vendor/eigen/Eigen/src/Core/util/MKL_support.h +130 -0
- data/vendor/eigen/Eigen/src/Core/util/Macros.h +1001 -0
- data/vendor/eigen/Eigen/src/Core/util/Memory.h +993 -0
- data/vendor/eigen/Eigen/src/Core/util/Meta.h +534 -0
- data/vendor/eigen/Eigen/src/Core/util/NonMPL2.h +3 -0
- data/vendor/eigen/Eigen/src/Core/util/ReenableStupidWarnings.h +27 -0
- data/vendor/eigen/Eigen/src/Core/util/StaticAssert.h +218 -0
- data/vendor/eigen/Eigen/src/Core/util/XprHelper.h +821 -0
- data/vendor/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +346 -0
- data/vendor/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +459 -0
- data/vendor/eigen/Eigen/src/Eigenvalues/ComplexSchur_LAPACKE.h +91 -0
- data/vendor/eigen/Eigen/src/Eigenvalues/EigenSolver.h +622 -0
- data/vendor/eigen/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h +418 -0
- data/vendor/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +226 -0
- data/vendor/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +374 -0
- data/vendor/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +158 -0
- data/vendor/eigen/Eigen/src/Eigenvalues/RealQZ.h +654 -0
- data/vendor/eigen/Eigen/src/Eigenvalues/RealSchur.h +546 -0
- data/vendor/eigen/Eigen/src/Eigenvalues/RealSchur_LAPACKE.h +77 -0
- data/vendor/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +870 -0
- data/vendor/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h +87 -0
- data/vendor/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +556 -0
- data/vendor/eigen/Eigen/src/Geometry/AlignedBox.h +392 -0
- data/vendor/eigen/Eigen/src/Geometry/AngleAxis.h +247 -0
- data/vendor/eigen/Eigen/src/Geometry/EulerAngles.h +114 -0
- data/vendor/eigen/Eigen/src/Geometry/Homogeneous.h +497 -0
- data/vendor/eigen/Eigen/src/Geometry/Hyperplane.h +282 -0
- data/vendor/eigen/Eigen/src/Geometry/OrthoMethods.h +234 -0
- data/vendor/eigen/Eigen/src/Geometry/ParametrizedLine.h +195 -0
- data/vendor/eigen/Eigen/src/Geometry/Quaternion.h +814 -0
- data/vendor/eigen/Eigen/src/Geometry/Rotation2D.h +199 -0
- data/vendor/eigen/Eigen/src/Geometry/RotationBase.h +206 -0
- data/vendor/eigen/Eigen/src/Geometry/Scaling.h +170 -0
- data/vendor/eigen/Eigen/src/Geometry/Transform.h +1542 -0
- data/vendor/eigen/Eigen/src/Geometry/Translation.h +208 -0
- data/vendor/eigen/Eigen/src/Geometry/Umeyama.h +166 -0
- data/vendor/eigen/Eigen/src/Geometry/arch/Geometry_SSE.h +161 -0
- data/vendor/eigen/Eigen/src/Householder/BlockHouseholder.h +103 -0
- data/vendor/eigen/Eigen/src/Householder/Householder.h +172 -0
- data/vendor/eigen/Eigen/src/Householder/HouseholderSequence.h +470 -0
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +226 -0
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +228 -0
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +246 -0
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +400 -0
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +462 -0
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +394 -0
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +216 -0
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +115 -0
- data/vendor/eigen/Eigen/src/Jacobi/Jacobi.h +462 -0
- data/vendor/eigen/Eigen/src/LU/Determinant.h +101 -0
- data/vendor/eigen/Eigen/src/LU/FullPivLU.h +891 -0
- data/vendor/eigen/Eigen/src/LU/InverseImpl.h +415 -0
- data/vendor/eigen/Eigen/src/LU/PartialPivLU.h +611 -0
- data/vendor/eigen/Eigen/src/LU/PartialPivLU_LAPACKE.h +83 -0
- data/vendor/eigen/Eigen/src/LU/arch/Inverse_SSE.h +338 -0
- data/vendor/eigen/Eigen/src/MetisSupport/MetisSupport.h +137 -0
- data/vendor/eigen/Eigen/src/OrderingMethods/Amd.h +445 -0
- data/vendor/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +1843 -0
- data/vendor/eigen/Eigen/src/OrderingMethods/Ordering.h +157 -0
- data/vendor/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +678 -0
- data/vendor/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +543 -0
- data/vendor/eigen/Eigen/src/QR/ColPivHouseholderQR.h +653 -0
- data/vendor/eigen/Eigen/src/QR/ColPivHouseholderQR_LAPACKE.h +97 -0
- data/vendor/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +562 -0
- data/vendor/eigen/Eigen/src/QR/FullPivHouseholderQR.h +676 -0
- data/vendor/eigen/Eigen/src/QR/HouseholderQR.h +409 -0
- data/vendor/eigen/Eigen/src/QR/HouseholderQR_LAPACKE.h +68 -0
- data/vendor/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +313 -0
- data/vendor/eigen/Eigen/src/SVD/BDCSVD.h +1246 -0
- data/vendor/eigen/Eigen/src/SVD/JacobiSVD.h +804 -0
- data/vendor/eigen/Eigen/src/SVD/JacobiSVD_LAPACKE.h +91 -0
- data/vendor/eigen/Eigen/src/SVD/SVDBase.h +315 -0
- data/vendor/eigen/Eigen/src/SVD/UpperBidiagonalization.h +414 -0
- data/vendor/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +689 -0
- data/vendor/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +199 -0
- data/vendor/eigen/Eigen/src/SparseCore/AmbiVector.h +377 -0
- data/vendor/eigen/Eigen/src/SparseCore/CompressedStorage.h +258 -0
- data/vendor/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +352 -0
- data/vendor/eigen/Eigen/src/SparseCore/MappedSparseMatrix.h +67 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseAssign.h +216 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseBlock.h +603 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseColEtree.h +206 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +341 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +726 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +148 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +320 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseDiagonalProduct.h +138 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseDot.h +98 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseFuzzy.h +29 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseMap.h +305 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseMatrix.h +1403 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +405 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparsePermutation.h +178 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseProduct.h +169 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseRedux.h +49 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseRef.h +397 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +656 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseSolverBase.h +124 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseSparseProductWithPruning.h +198 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseTranspose.h +92 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseTriangularView.h +189 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseUtil.h +178 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseVector.h +478 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseView.h +253 -0
- data/vendor/eigen/Eigen/src/SparseCore/TriangularSolver.h +315 -0
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU.h +773 -0
- data/vendor/eigen/Eigen/src/SparseLU/SparseLUImpl.h +66 -0
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +226 -0
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_Structs.h +110 -0
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +301 -0
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_Utils.h +80 -0
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_column_bmod.h +181 -0
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +179 -0
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h +107 -0
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +280 -0
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h +126 -0
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_kernel_bmod.h +130 -0
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +223 -0
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_panel_dfs.h +258 -0
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_pivotL.h +137 -0
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_pruneL.h +136 -0
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_relax_snode.h +83 -0
- data/vendor/eigen/Eigen/src/SparseQR/SparseQR.h +745 -0
- data/vendor/eigen/Eigen/src/StlSupport/StdDeque.h +126 -0
- data/vendor/eigen/Eigen/src/StlSupport/StdList.h +106 -0
- data/vendor/eigen/Eigen/src/StlSupport/StdVector.h +131 -0
- data/vendor/eigen/Eigen/src/StlSupport/details.h +84 -0
- data/vendor/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +1027 -0
- data/vendor/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +506 -0
- data/vendor/eigen/Eigen/src/misc/Image.h +82 -0
- data/vendor/eigen/Eigen/src/misc/Kernel.h +79 -0
- data/vendor/eigen/Eigen/src/misc/RealSvd2x2.h +55 -0
- data/vendor/eigen/Eigen/src/misc/blas.h +440 -0
- data/vendor/eigen/Eigen/src/misc/lapack.h +152 -0
- data/vendor/eigen/Eigen/src/misc/lapacke.h +16291 -0
- data/vendor/eigen/Eigen/src/misc/lapacke_mangling.h +17 -0
- data/vendor/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +332 -0
- data/vendor/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +552 -0
- data/vendor/eigen/Eigen/src/plugins/BlockMethods.h +1058 -0
- data/vendor/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.h +115 -0
- data/vendor/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.h +163 -0
- data/vendor/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +152 -0
- data/vendor/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +85 -0
- data/vendor/eigen/README.md +3 -0
- data/vendor/eigen/bench/README.txt +55 -0
- data/vendor/eigen/bench/btl/COPYING +340 -0
- data/vendor/eigen/bench/btl/README +154 -0
- data/vendor/eigen/bench/tensors/README +21 -0
- data/vendor/eigen/blas/README.txt +6 -0
- data/vendor/eigen/demos/mandelbrot/README +10 -0
- data/vendor/eigen/demos/mix_eigen_and_c/README +9 -0
- data/vendor/eigen/demos/opengl/README +13 -0
- data/vendor/eigen/unsupported/Eigen/CXX11/src/Tensor/README.md +1760 -0
- data/vendor/eigen/unsupported/README.txt +50 -0
- data/vendor/tomotopy/LICENSE +21 -0
- data/vendor/tomotopy/README.kr.rst +375 -0
- data/vendor/tomotopy/README.rst +382 -0
- data/vendor/tomotopy/src/Labeling/FoRelevance.cpp +362 -0
- data/vendor/tomotopy/src/Labeling/FoRelevance.h +88 -0
- data/vendor/tomotopy/src/Labeling/Labeler.h +50 -0
- data/vendor/tomotopy/src/TopicModel/CT.h +37 -0
- data/vendor/tomotopy/src/TopicModel/CTModel.cpp +13 -0
- data/vendor/tomotopy/src/TopicModel/CTModel.hpp +293 -0
- data/vendor/tomotopy/src/TopicModel/DMR.h +51 -0
- data/vendor/tomotopy/src/TopicModel/DMRModel.cpp +13 -0
- data/vendor/tomotopy/src/TopicModel/DMRModel.hpp +374 -0
- data/vendor/tomotopy/src/TopicModel/DT.h +65 -0
- data/vendor/tomotopy/src/TopicModel/DTM.h +22 -0
- data/vendor/tomotopy/src/TopicModel/DTModel.cpp +15 -0
- data/vendor/tomotopy/src/TopicModel/DTModel.hpp +572 -0
- data/vendor/tomotopy/src/TopicModel/GDMR.h +37 -0
- data/vendor/tomotopy/src/TopicModel/GDMRModel.cpp +14 -0
- data/vendor/tomotopy/src/TopicModel/GDMRModel.hpp +485 -0
- data/vendor/tomotopy/src/TopicModel/HDP.h +74 -0
- data/vendor/tomotopy/src/TopicModel/HDPModel.cpp +13 -0
- data/vendor/tomotopy/src/TopicModel/HDPModel.hpp +592 -0
- data/vendor/tomotopy/src/TopicModel/HLDA.h +40 -0
- data/vendor/tomotopy/src/TopicModel/HLDAModel.cpp +13 -0
- data/vendor/tomotopy/src/TopicModel/HLDAModel.hpp +681 -0
- data/vendor/tomotopy/src/TopicModel/HPA.h +27 -0
- data/vendor/tomotopy/src/TopicModel/HPAModel.cpp +21 -0
- data/vendor/tomotopy/src/TopicModel/HPAModel.hpp +588 -0
- data/vendor/tomotopy/src/TopicModel/LDA.h +144 -0
- data/vendor/tomotopy/src/TopicModel/LDACVB0Model.hpp +442 -0
- data/vendor/tomotopy/src/TopicModel/LDAModel.cpp +13 -0
- data/vendor/tomotopy/src/TopicModel/LDAModel.hpp +1058 -0
- data/vendor/tomotopy/src/TopicModel/LLDA.h +45 -0
- data/vendor/tomotopy/src/TopicModel/LLDAModel.cpp +13 -0
- data/vendor/tomotopy/src/TopicModel/LLDAModel.hpp +203 -0
- data/vendor/tomotopy/src/TopicModel/MGLDA.h +63 -0
- data/vendor/tomotopy/src/TopicModel/MGLDAModel.cpp +17 -0
- data/vendor/tomotopy/src/TopicModel/MGLDAModel.hpp +558 -0
- data/vendor/tomotopy/src/TopicModel/PA.h +43 -0
- data/vendor/tomotopy/src/TopicModel/PAModel.cpp +13 -0
- data/vendor/tomotopy/src/TopicModel/PAModel.hpp +467 -0
- data/vendor/tomotopy/src/TopicModel/PLDA.h +17 -0
- data/vendor/tomotopy/src/TopicModel/PLDAModel.cpp +13 -0
- data/vendor/tomotopy/src/TopicModel/PLDAModel.hpp +214 -0
- data/vendor/tomotopy/src/TopicModel/SLDA.h +54 -0
- data/vendor/tomotopy/src/TopicModel/SLDAModel.cpp +17 -0
- data/vendor/tomotopy/src/TopicModel/SLDAModel.hpp +456 -0
- data/vendor/tomotopy/src/TopicModel/TopicModel.hpp +692 -0
- data/vendor/tomotopy/src/Utils/AliasMethod.hpp +169 -0
- data/vendor/tomotopy/src/Utils/Dictionary.h +80 -0
- data/vendor/tomotopy/src/Utils/EigenAddonOps.hpp +181 -0
- data/vendor/tomotopy/src/Utils/LBFGS.h +202 -0
- data/vendor/tomotopy/src/Utils/LBFGS/LineSearchBacktracking.h +120 -0
- data/vendor/tomotopy/src/Utils/LBFGS/LineSearchBracketing.h +122 -0
- data/vendor/tomotopy/src/Utils/LBFGS/Param.h +213 -0
- data/vendor/tomotopy/src/Utils/LUT.hpp +82 -0
- data/vendor/tomotopy/src/Utils/MultiNormalDistribution.hpp +69 -0
- data/vendor/tomotopy/src/Utils/PolyaGamma.hpp +200 -0
- data/vendor/tomotopy/src/Utils/PolyaGammaHybrid.hpp +672 -0
- data/vendor/tomotopy/src/Utils/ThreadPool.hpp +150 -0
- data/vendor/tomotopy/src/Utils/Trie.hpp +220 -0
- data/vendor/tomotopy/src/Utils/TruncMultiNormal.hpp +94 -0
- data/vendor/tomotopy/src/Utils/Utils.hpp +337 -0
- data/vendor/tomotopy/src/Utils/avx_gamma.h +46 -0
- data/vendor/tomotopy/src/Utils/avx_mathfun.h +736 -0
- data/vendor/tomotopy/src/Utils/exception.h +28 -0
- data/vendor/tomotopy/src/Utils/math.h +281 -0
- data/vendor/tomotopy/src/Utils/rtnorm.hpp +2690 -0
- data/vendor/tomotopy/src/Utils/sample.hpp +192 -0
- data/vendor/tomotopy/src/Utils/serializer.hpp +695 -0
- data/vendor/tomotopy/src/Utils/slp.hpp +131 -0
- data/vendor/tomotopy/src/Utils/sse_gamma.h +48 -0
- data/vendor/tomotopy/src/Utils/sse_mathfun.h +710 -0
- data/vendor/tomotopy/src/Utils/text.hpp +49 -0
- data/vendor/tomotopy/src/Utils/tvector.hpp +543 -0
- metadata +531 -0
|
@@ -0,0 +1,212 @@
|
|
|
1
|
+
// This file is part of Eigen, a lightweight C++ template library
|
|
2
|
+
// for linear algebra.
|
|
3
|
+
//
|
|
4
|
+
// Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com>
|
|
5
|
+
//
|
|
6
|
+
// This Source Code Form is subject to the terms of the Mozilla
|
|
7
|
+
// Public License v. 2.0. If a copy of the MPL was not distributed
|
|
8
|
+
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
|
9
|
+
|
|
10
|
+
#ifndef EIGEN_TYPE_CASTING_CUDA_H
|
|
11
|
+
#define EIGEN_TYPE_CASTING_CUDA_H
|
|
12
|
+
|
|
13
|
+
namespace Eigen {
|
|
14
|
+
|
|
15
|
+
namespace internal {
|
|
16
|
+
|
|
17
|
+
template<>
|
|
18
|
+
struct scalar_cast_op<float, Eigen::half> {
|
|
19
|
+
EIGEN_EMPTY_STRUCT_CTOR(scalar_cast_op)
|
|
20
|
+
typedef Eigen::half result_type;
|
|
21
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half operator() (const float& a) const {
|
|
22
|
+
#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
|
|
23
|
+
return __float2half(a);
|
|
24
|
+
#else
|
|
25
|
+
return Eigen::half(a);
|
|
26
|
+
#endif
|
|
27
|
+
}
|
|
28
|
+
};
|
|
29
|
+
|
|
30
|
+
template<>
|
|
31
|
+
struct functor_traits<scalar_cast_op<float, Eigen::half> >
|
|
32
|
+
{ enum { Cost = NumTraits<float>::AddCost, PacketAccess = false }; };
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
template<>
|
|
36
|
+
struct scalar_cast_op<int, Eigen::half> {
|
|
37
|
+
EIGEN_EMPTY_STRUCT_CTOR(scalar_cast_op)
|
|
38
|
+
typedef Eigen::half result_type;
|
|
39
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half operator() (const int& a) const {
|
|
40
|
+
#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
|
|
41
|
+
return __float2half(static_cast<float>(a));
|
|
42
|
+
#else
|
|
43
|
+
return Eigen::half(static_cast<float>(a));
|
|
44
|
+
#endif
|
|
45
|
+
}
|
|
46
|
+
};
|
|
47
|
+
|
|
48
|
+
template<>
|
|
49
|
+
struct functor_traits<scalar_cast_op<int, Eigen::half> >
|
|
50
|
+
{ enum { Cost = NumTraits<float>::AddCost, PacketAccess = false }; };
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
template<>
|
|
54
|
+
struct scalar_cast_op<Eigen::half, float> {
|
|
55
|
+
EIGEN_EMPTY_STRUCT_CTOR(scalar_cast_op)
|
|
56
|
+
typedef float result_type;
|
|
57
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float operator() (const Eigen::half& a) const {
|
|
58
|
+
#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
|
|
59
|
+
return __half2float(a);
|
|
60
|
+
#else
|
|
61
|
+
return static_cast<float>(a);
|
|
62
|
+
#endif
|
|
63
|
+
}
|
|
64
|
+
};
|
|
65
|
+
|
|
66
|
+
template<>
|
|
67
|
+
struct functor_traits<scalar_cast_op<Eigen::half, float> >
|
|
68
|
+
{ enum { Cost = NumTraits<float>::AddCost, PacketAccess = false }; };
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
|
|
73
|
+
|
|
74
|
+
template <>
|
|
75
|
+
struct type_casting_traits<Eigen::half, float> {
|
|
76
|
+
enum {
|
|
77
|
+
VectorizedCast = 1,
|
|
78
|
+
SrcCoeffRatio = 2,
|
|
79
|
+
TgtCoeffRatio = 1
|
|
80
|
+
};
|
|
81
|
+
};
|
|
82
|
+
|
|
83
|
+
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pcast<half2, float4>(const half2& a, const half2& b) {
|
|
84
|
+
float2 r1 = __half22float2(a);
|
|
85
|
+
float2 r2 = __half22float2(b);
|
|
86
|
+
return make_float4(r1.x, r1.y, r2.x, r2.y);
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
template <>
|
|
90
|
+
struct type_casting_traits<float, Eigen::half> {
|
|
91
|
+
enum {
|
|
92
|
+
VectorizedCast = 1,
|
|
93
|
+
SrcCoeffRatio = 1,
|
|
94
|
+
TgtCoeffRatio = 2
|
|
95
|
+
};
|
|
96
|
+
};
|
|
97
|
+
|
|
98
|
+
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pcast<float4, half2>(const float4& a) {
|
|
99
|
+
// Simply discard the second half of the input
|
|
100
|
+
return __floats2half2_rn(a.x, a.y);
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
#elif defined EIGEN_VECTORIZE_AVX512
|
|
104
|
+
template <>
|
|
105
|
+
struct type_casting_traits<half, float> {
|
|
106
|
+
enum {
|
|
107
|
+
VectorizedCast = 1,
|
|
108
|
+
SrcCoeffRatio = 1,
|
|
109
|
+
TgtCoeffRatio = 1
|
|
110
|
+
};
|
|
111
|
+
};
|
|
112
|
+
|
|
113
|
+
template<> EIGEN_STRONG_INLINE Packet16f pcast<Packet16h, Packet16f>(const Packet16h& a) {
|
|
114
|
+
return half2float(a);
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
template <>
|
|
118
|
+
struct type_casting_traits<float, half> {
|
|
119
|
+
enum {
|
|
120
|
+
VectorizedCast = 1,
|
|
121
|
+
SrcCoeffRatio = 1,
|
|
122
|
+
TgtCoeffRatio = 1
|
|
123
|
+
};
|
|
124
|
+
};
|
|
125
|
+
|
|
126
|
+
template<> EIGEN_STRONG_INLINE Packet16h pcast<Packet16f, Packet16h>(const Packet16f& a) {
|
|
127
|
+
return float2half(a);
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
#elif defined EIGEN_VECTORIZE_AVX
|
|
131
|
+
|
|
132
|
+
template <>
|
|
133
|
+
struct type_casting_traits<Eigen::half, float> {
|
|
134
|
+
enum {
|
|
135
|
+
VectorizedCast = 1,
|
|
136
|
+
SrcCoeffRatio = 1,
|
|
137
|
+
TgtCoeffRatio = 1
|
|
138
|
+
};
|
|
139
|
+
};
|
|
140
|
+
|
|
141
|
+
template<> EIGEN_STRONG_INLINE Packet8f pcast<Packet8h, Packet8f>(const Packet8h& a) {
|
|
142
|
+
return half2float(a);
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
template <>
|
|
146
|
+
struct type_casting_traits<float, Eigen::half> {
|
|
147
|
+
enum {
|
|
148
|
+
VectorizedCast = 1,
|
|
149
|
+
SrcCoeffRatio = 1,
|
|
150
|
+
TgtCoeffRatio = 1
|
|
151
|
+
};
|
|
152
|
+
};
|
|
153
|
+
|
|
154
|
+
template<> EIGEN_STRONG_INLINE Packet8h pcast<Packet8f, Packet8h>(const Packet8f& a) {
|
|
155
|
+
return float2half(a);
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
// Disable the following code since it's broken on too many platforms / compilers.
|
|
159
|
+
//#elif defined(EIGEN_VECTORIZE_SSE) && (!EIGEN_ARCH_x86_64) && (!EIGEN_COMP_MSVC)
|
|
160
|
+
#elif 0
|
|
161
|
+
|
|
162
|
+
template <>
|
|
163
|
+
struct type_casting_traits<Eigen::half, float> {
|
|
164
|
+
enum {
|
|
165
|
+
VectorizedCast = 1,
|
|
166
|
+
SrcCoeffRatio = 1,
|
|
167
|
+
TgtCoeffRatio = 1
|
|
168
|
+
};
|
|
169
|
+
};
|
|
170
|
+
|
|
171
|
+
template<> EIGEN_STRONG_INLINE Packet4f pcast<Packet4h, Packet4f>(const Packet4h& a) {
|
|
172
|
+
__int64_t a64 = _mm_cvtm64_si64(a.x);
|
|
173
|
+
Eigen::half h = raw_uint16_to_half(static_cast<unsigned short>(a64));
|
|
174
|
+
float f1 = static_cast<float>(h);
|
|
175
|
+
h = raw_uint16_to_half(static_cast<unsigned short>(a64 >> 16));
|
|
176
|
+
float f2 = static_cast<float>(h);
|
|
177
|
+
h = raw_uint16_to_half(static_cast<unsigned short>(a64 >> 32));
|
|
178
|
+
float f3 = static_cast<float>(h);
|
|
179
|
+
h = raw_uint16_to_half(static_cast<unsigned short>(a64 >> 48));
|
|
180
|
+
float f4 = static_cast<float>(h);
|
|
181
|
+
return _mm_set_ps(f4, f3, f2, f1);
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
template <>
|
|
185
|
+
struct type_casting_traits<float, Eigen::half> {
|
|
186
|
+
enum {
|
|
187
|
+
VectorizedCast = 1,
|
|
188
|
+
SrcCoeffRatio = 1,
|
|
189
|
+
TgtCoeffRatio = 1
|
|
190
|
+
};
|
|
191
|
+
};
|
|
192
|
+
|
|
193
|
+
template<> EIGEN_STRONG_INLINE Packet4h pcast<Packet4f, Packet4h>(const Packet4f& a) {
|
|
194
|
+
EIGEN_ALIGN16 float aux[4];
|
|
195
|
+
pstore(aux, a);
|
|
196
|
+
Eigen::half h0(aux[0]);
|
|
197
|
+
Eigen::half h1(aux[1]);
|
|
198
|
+
Eigen::half h2(aux[2]);
|
|
199
|
+
Eigen::half h3(aux[3]);
|
|
200
|
+
|
|
201
|
+
Packet4h result;
|
|
202
|
+
result.x = _mm_set_pi16(h3.x, h2.x, h1.x, h0.x);
|
|
203
|
+
return result;
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
#endif
|
|
207
|
+
|
|
208
|
+
} // end namespace internal
|
|
209
|
+
|
|
210
|
+
} // end namespace Eigen
|
|
211
|
+
|
|
212
|
+
#endif // EIGEN_TYPE_CASTING_CUDA_H
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
|
|
2
|
+
// This file is part of Eigen, a lightweight C++ template library
|
|
3
|
+
// for linear algebra.
|
|
4
|
+
//
|
|
5
|
+
// Copyright (C) 2017 Gael Guennebaud <gael.guennebaud@inria.fr>
|
|
6
|
+
//
|
|
7
|
+
// This Source Code Form is subject to the terms of the Mozilla
|
|
8
|
+
// Public License v. 2.0. If a copy of the MPL was not distributed
|
|
9
|
+
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
|
10
|
+
|
|
11
|
+
#ifndef EIGEN_ARCH_CONJ_HELPER_H
|
|
12
|
+
#define EIGEN_ARCH_CONJ_HELPER_H
|
|
13
|
+
|
|
14
|
+
#define EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(PACKET_CPLX, PACKET_REAL) \
|
|
15
|
+
template<> struct conj_helper<PACKET_REAL, PACKET_CPLX, false,false> { \
|
|
16
|
+
EIGEN_STRONG_INLINE PACKET_CPLX pmadd(const PACKET_REAL& x, const PACKET_CPLX& y, const PACKET_CPLX& c) const \
|
|
17
|
+
{ return padd(c, pmul(x,y)); } \
|
|
18
|
+
EIGEN_STRONG_INLINE PACKET_CPLX pmul(const PACKET_REAL& x, const PACKET_CPLX& y) const \
|
|
19
|
+
{ return PACKET_CPLX(Eigen::internal::pmul<PACKET_REAL>(x, y.v)); } \
|
|
20
|
+
}; \
|
|
21
|
+
\
|
|
22
|
+
template<> struct conj_helper<PACKET_CPLX, PACKET_REAL, false,false> { \
|
|
23
|
+
EIGEN_STRONG_INLINE PACKET_CPLX pmadd(const PACKET_CPLX& x, const PACKET_REAL& y, const PACKET_CPLX& c) const \
|
|
24
|
+
{ return padd(c, pmul(x,y)); } \
|
|
25
|
+
EIGEN_STRONG_INLINE PACKET_CPLX pmul(const PACKET_CPLX& x, const PACKET_REAL& y) const \
|
|
26
|
+
{ return PACKET_CPLX(Eigen::internal::pmul<PACKET_REAL>(x.v, y)); } \
|
|
27
|
+
};
|
|
28
|
+
|
|
29
|
+
#endif // EIGEN_ARCH_CONJ_HELPER_H
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
// This file is part of Eigen, a lightweight C++ template library
|
|
2
|
+
// for linear algebra.
|
|
3
|
+
//
|
|
4
|
+
// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
|
|
5
|
+
// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
|
|
6
|
+
//
|
|
7
|
+
// This Source Code Form is subject to the terms of the Mozilla
|
|
8
|
+
// Public License v. 2.0. If a copy of the MPL was not distributed
|
|
9
|
+
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
/* All the parameters defined in this file can be specialized in the
|
|
13
|
+
* architecture specific files, and/or by the user.
|
|
14
|
+
* More to come... */
|
|
15
|
+
|
|
16
|
+
#ifndef EIGEN_DEFAULT_SETTINGS_H
|
|
17
|
+
#define EIGEN_DEFAULT_SETTINGS_H
|
|
18
|
+
|
|
19
|
+
/** Defines the maximal loop size to enable meta unrolling of loops.
|
|
20
|
+
* Note that the value here is expressed in Eigen's own notion of "number of FLOPS",
|
|
21
|
+
* it does not correspond to the number of iterations or the number of instructions
|
|
22
|
+
*/
|
|
23
|
+
#ifndef EIGEN_UNROLLING_LIMIT
|
|
24
|
+
#define EIGEN_UNROLLING_LIMIT 100
|
|
25
|
+
#endif
|
|
26
|
+
|
|
27
|
+
/** Defines the threshold between a "small" and a "large" matrix.
|
|
28
|
+
* This threshold is mainly used to select the proper product implementation.
|
|
29
|
+
*/
|
|
30
|
+
#ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
|
|
31
|
+
#define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 8
|
|
32
|
+
#endif
|
|
33
|
+
|
|
34
|
+
/** Defines the maximal width of the blocks used in the triangular product and solver
|
|
35
|
+
* for vectors (level 2 blas xTRMV and xTRSV). The default is 8.
|
|
36
|
+
*/
|
|
37
|
+
#ifndef EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH
|
|
38
|
+
#define EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH 8
|
|
39
|
+
#endif
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
/** Defines the default number of registers available for that architecture.
|
|
43
|
+
* Currently it must be 8 or 16. Other values will fail.
|
|
44
|
+
*/
|
|
45
|
+
#ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS
|
|
46
|
+
#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 8
|
|
47
|
+
#endif
|
|
48
|
+
|
|
49
|
+
#endif // EIGEN_DEFAULT_SETTINGS_H
|
|
@@ -0,0 +1,490 @@
|
|
|
1
|
+
// This file is part of Eigen, a lightweight C++ template library
|
|
2
|
+
// for linear algebra.
|
|
3
|
+
//
|
|
4
|
+
// Copyright (C) 2010 Gael Guennebaud <gael.guennebaud@inria.fr>
|
|
5
|
+
// Copyright (C) 2010 Konstantinos Margaritis <markos@freevec.org>
|
|
6
|
+
//
|
|
7
|
+
// This Source Code Form is subject to the terms of the Mozilla
|
|
8
|
+
// Public License v. 2.0. If a copy of the MPL was not distributed
|
|
9
|
+
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
|
10
|
+
|
|
11
|
+
#ifndef EIGEN_COMPLEX_NEON_H
|
|
12
|
+
#define EIGEN_COMPLEX_NEON_H
|
|
13
|
+
|
|
14
|
+
namespace Eigen {
|
|
15
|
+
|
|
16
|
+
namespace internal {
|
|
17
|
+
|
|
18
|
+
inline uint32x4_t p4ui_CONJ_XOR() {
|
|
19
|
+
// See bug 1325, clang fails to call vld1q_u64.
|
|
20
|
+
#if EIGEN_COMP_CLANG
|
|
21
|
+
uint32x4_t ret = { 0x00000000, 0x80000000, 0x00000000, 0x80000000 };
|
|
22
|
+
return ret;
|
|
23
|
+
#else
|
|
24
|
+
static const uint32_t conj_XOR_DATA[] = { 0x00000000, 0x80000000, 0x00000000, 0x80000000 };
|
|
25
|
+
return vld1q_u32( conj_XOR_DATA );
|
|
26
|
+
#endif
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
inline uint32x2_t p2ui_CONJ_XOR() {
|
|
30
|
+
static const uint32_t conj_XOR_DATA[] = { 0x00000000, 0x80000000 };
|
|
31
|
+
return vld1_u32( conj_XOR_DATA );
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
//---------- float ----------
|
|
35
|
+
struct Packet2cf
|
|
36
|
+
{
|
|
37
|
+
EIGEN_STRONG_INLINE Packet2cf() {}
|
|
38
|
+
EIGEN_STRONG_INLINE explicit Packet2cf(const Packet4f& a) : v(a) {}
|
|
39
|
+
Packet4f v;
|
|
40
|
+
};
|
|
41
|
+
|
|
42
|
+
template<> struct packet_traits<std::complex<float> > : default_packet_traits
|
|
43
|
+
{
|
|
44
|
+
typedef Packet2cf type;
|
|
45
|
+
typedef Packet2cf half;
|
|
46
|
+
enum {
|
|
47
|
+
Vectorizable = 1,
|
|
48
|
+
AlignedOnScalar = 1,
|
|
49
|
+
size = 2,
|
|
50
|
+
HasHalfPacket = 0,
|
|
51
|
+
|
|
52
|
+
HasAdd = 1,
|
|
53
|
+
HasSub = 1,
|
|
54
|
+
HasMul = 1,
|
|
55
|
+
HasDiv = 1,
|
|
56
|
+
HasNegate = 1,
|
|
57
|
+
HasAbs = 0,
|
|
58
|
+
HasAbs2 = 0,
|
|
59
|
+
HasMin = 0,
|
|
60
|
+
HasMax = 0,
|
|
61
|
+
HasSetLinear = 0
|
|
62
|
+
};
|
|
63
|
+
};
|
|
64
|
+
|
|
65
|
+
template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2, alignment=Aligned16}; typedef Packet2cf half; };
|
|
66
|
+
|
|
67
|
+
template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>& from)
|
|
68
|
+
{
|
|
69
|
+
float32x2_t r64;
|
|
70
|
+
r64 = vld1_f32((const float *)&from);
|
|
71
|
+
|
|
72
|
+
return Packet2cf(vcombine_f32(r64, r64));
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
template<> EIGEN_STRONG_INLINE Packet2cf padd<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(padd<Packet4f>(a.v,b.v)); }
|
|
76
|
+
template<> EIGEN_STRONG_INLINE Packet2cf psub<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(psub<Packet4f>(a.v,b.v)); }
|
|
77
|
+
template<> EIGEN_STRONG_INLINE Packet2cf pnegate(const Packet2cf& a) { return Packet2cf(pnegate<Packet4f>(a.v)); }
|
|
78
|
+
template<> EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf& a)
|
|
79
|
+
{
|
|
80
|
+
Packet4ui b = vreinterpretq_u32_f32(a.v);
|
|
81
|
+
return Packet2cf(vreinterpretq_f32_u32(veorq_u32(b, p4ui_CONJ_XOR())));
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
template<> EIGEN_STRONG_INLINE Packet2cf pmul<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
|
|
85
|
+
{
|
|
86
|
+
Packet4f v1, v2;
|
|
87
|
+
|
|
88
|
+
// Get the real values of a | a1_re | a1_re | a2_re | a2_re |
|
|
89
|
+
v1 = vcombine_f32(vdup_lane_f32(vget_low_f32(a.v), 0), vdup_lane_f32(vget_high_f32(a.v), 0));
|
|
90
|
+
// Get the imag values of a | a1_im | a1_im | a2_im | a2_im |
|
|
91
|
+
v2 = vcombine_f32(vdup_lane_f32(vget_low_f32(a.v), 1), vdup_lane_f32(vget_high_f32(a.v), 1));
|
|
92
|
+
// Multiply the real a with b
|
|
93
|
+
v1 = vmulq_f32(v1, b.v);
|
|
94
|
+
// Multiply the imag a with b
|
|
95
|
+
v2 = vmulq_f32(v2, b.v);
|
|
96
|
+
// Conjugate v2
|
|
97
|
+
v2 = vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(v2), p4ui_CONJ_XOR()));
|
|
98
|
+
// Swap real/imag elements in v2.
|
|
99
|
+
v2 = vrev64q_f32(v2);
|
|
100
|
+
// Add and return the result
|
|
101
|
+
return Packet2cf(vaddq_f32(v1, v2));
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
template<> EIGEN_STRONG_INLINE Packet2cf pand <Packet2cf>(const Packet2cf& a, const Packet2cf& b)
|
|
105
|
+
{
|
|
106
|
+
return Packet2cf(vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v))));
|
|
107
|
+
}
|
|
108
|
+
template<> EIGEN_STRONG_INLINE Packet2cf por <Packet2cf>(const Packet2cf& a, const Packet2cf& b)
|
|
109
|
+
{
|
|
110
|
+
return Packet2cf(vreinterpretq_f32_u32(vorrq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v))));
|
|
111
|
+
}
|
|
112
|
+
template<> EIGEN_STRONG_INLINE Packet2cf pxor <Packet2cf>(const Packet2cf& a, const Packet2cf& b)
|
|
113
|
+
{
|
|
114
|
+
return Packet2cf(vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v))));
|
|
115
|
+
}
|
|
116
|
+
template<> EIGEN_STRONG_INLINE Packet2cf pandnot<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
|
|
117
|
+
{
|
|
118
|
+
return Packet2cf(vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v))));
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
template<> EIGEN_STRONG_INLINE Packet2cf pload<Packet2cf>(const std::complex<float>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet2cf(pload<Packet4f>((const float*)from)); }
|
|
122
|
+
template<> EIGEN_STRONG_INLINE Packet2cf ploadu<Packet2cf>(const std::complex<float>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cf(ploadu<Packet4f>((const float*)from)); }
|
|
123
|
+
|
|
124
|
+
template<> EIGEN_STRONG_INLINE Packet2cf ploaddup<Packet2cf>(const std::complex<float>* from) { return pset1<Packet2cf>(*from); }
|
|
125
|
+
|
|
126
|
+
template<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((float*)to, from.v); }
|
|
127
|
+
template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((float*)to, from.v); }
|
|
128
|
+
|
|
129
|
+
template<> EIGEN_DEVICE_FUNC inline Packet2cf pgather<std::complex<float>, Packet2cf>(const std::complex<float>* from, Index stride)
|
|
130
|
+
{
|
|
131
|
+
Packet4f res = pset1<Packet4f>(0.f);
|
|
132
|
+
res = vsetq_lane_f32(std::real(from[0*stride]), res, 0);
|
|
133
|
+
res = vsetq_lane_f32(std::imag(from[0*stride]), res, 1);
|
|
134
|
+
res = vsetq_lane_f32(std::real(from[1*stride]), res, 2);
|
|
135
|
+
res = vsetq_lane_f32(std::imag(from[1*stride]), res, 3);
|
|
136
|
+
return Packet2cf(res);
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf>(std::complex<float>* to, const Packet2cf& from, Index stride)
|
|
140
|
+
{
|
|
141
|
+
to[stride*0] = std::complex<float>(vgetq_lane_f32(from.v, 0), vgetq_lane_f32(from.v, 1));
|
|
142
|
+
to[stride*1] = std::complex<float>(vgetq_lane_f32(from.v, 2), vgetq_lane_f32(from.v, 3));
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> * addr) { EIGEN_ARM_PREFETCH((const float *)addr); }
|
|
146
|
+
|
|
147
|
+
template<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet2cf>(const Packet2cf& a)
|
|
148
|
+
{
|
|
149
|
+
std::complex<float> EIGEN_ALIGN16 x[2];
|
|
150
|
+
vst1q_f32((float *)x, a.v);
|
|
151
|
+
return x[0];
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
template<> EIGEN_STRONG_INLINE Packet2cf preverse(const Packet2cf& a)
|
|
155
|
+
{
|
|
156
|
+
float32x2_t a_lo, a_hi;
|
|
157
|
+
Packet4f a_r128;
|
|
158
|
+
|
|
159
|
+
a_lo = vget_low_f32(a.v);
|
|
160
|
+
a_hi = vget_high_f32(a.v);
|
|
161
|
+
a_r128 = vcombine_f32(a_hi, a_lo);
|
|
162
|
+
|
|
163
|
+
return Packet2cf(a_r128);
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
template<> EIGEN_STRONG_INLINE Packet2cf pcplxflip<Packet2cf>(const Packet2cf& a)
|
|
167
|
+
{
|
|
168
|
+
return Packet2cf(vrev64q_f32(a.v));
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
template<> EIGEN_STRONG_INLINE std::complex<float> predux<Packet2cf>(const Packet2cf& a)
|
|
172
|
+
{
|
|
173
|
+
float32x2_t a1, a2;
|
|
174
|
+
std::complex<float> s;
|
|
175
|
+
|
|
176
|
+
a1 = vget_low_f32(a.v);
|
|
177
|
+
a2 = vget_high_f32(a.v);
|
|
178
|
+
a2 = vadd_f32(a1, a2);
|
|
179
|
+
vst1_f32((float *)&s, a2);
|
|
180
|
+
|
|
181
|
+
return s;
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
template<> EIGEN_STRONG_INLINE Packet2cf preduxp<Packet2cf>(const Packet2cf* vecs)
|
|
185
|
+
{
|
|
186
|
+
Packet4f sum1, sum2, sum;
|
|
187
|
+
|
|
188
|
+
// Add the first two 64-bit float32x2_t of vecs[0]
|
|
189
|
+
sum1 = vcombine_f32(vget_low_f32(vecs[0].v), vget_low_f32(vecs[1].v));
|
|
190
|
+
sum2 = vcombine_f32(vget_high_f32(vecs[0].v), vget_high_f32(vecs[1].v));
|
|
191
|
+
sum = vaddq_f32(sum1, sum2);
|
|
192
|
+
|
|
193
|
+
return Packet2cf(sum);
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const Packet2cf& a)
|
|
197
|
+
{
|
|
198
|
+
float32x2_t a1, a2, v1, v2, prod;
|
|
199
|
+
std::complex<float> s;
|
|
200
|
+
|
|
201
|
+
a1 = vget_low_f32(a.v);
|
|
202
|
+
a2 = vget_high_f32(a.v);
|
|
203
|
+
// Get the real values of a | a1_re | a1_re | a2_re | a2_re |
|
|
204
|
+
v1 = vdup_lane_f32(a1, 0);
|
|
205
|
+
// Get the real values of a | a1_im | a1_im | a2_im | a2_im |
|
|
206
|
+
v2 = vdup_lane_f32(a1, 1);
|
|
207
|
+
// Multiply the real a with b
|
|
208
|
+
v1 = vmul_f32(v1, a2);
|
|
209
|
+
// Multiply the imag a with b
|
|
210
|
+
v2 = vmul_f32(v2, a2);
|
|
211
|
+
// Conjugate v2
|
|
212
|
+
v2 = vreinterpret_f32_u32(veor_u32(vreinterpret_u32_f32(v2), p2ui_CONJ_XOR()));
|
|
213
|
+
// Swap real/imag elements in v2.
|
|
214
|
+
v2 = vrev64_f32(v2);
|
|
215
|
+
// Add v1, v2
|
|
216
|
+
prod = vadd_f32(v1, v2);
|
|
217
|
+
|
|
218
|
+
vst1_f32((float *)&s, prod);
|
|
219
|
+
|
|
220
|
+
return s;
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
template<int Offset>
|
|
224
|
+
struct palign_impl<Offset,Packet2cf>
|
|
225
|
+
{
|
|
226
|
+
EIGEN_STRONG_INLINE static void run(Packet2cf& first, const Packet2cf& second)
|
|
227
|
+
{
|
|
228
|
+
if (Offset==1)
|
|
229
|
+
{
|
|
230
|
+
first.v = vextq_f32(first.v, second.v, 2);
|
|
231
|
+
}
|
|
232
|
+
}
|
|
233
|
+
};
|
|
234
|
+
|
|
235
|
+
template<> struct conj_helper<Packet2cf, Packet2cf, false,true>
|
|
236
|
+
{
|
|
237
|
+
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
|
|
238
|
+
{ return padd(pmul(x,y),c); }
|
|
239
|
+
|
|
240
|
+
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
|
|
241
|
+
{
|
|
242
|
+
return internal::pmul(a, pconj(b));
|
|
243
|
+
}
|
|
244
|
+
};
|
|
245
|
+
|
|
246
|
+
template<> struct conj_helper<Packet2cf, Packet2cf, true,false>
|
|
247
|
+
{
|
|
248
|
+
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
|
|
249
|
+
{ return padd(pmul(x,y),c); }
|
|
250
|
+
|
|
251
|
+
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
|
|
252
|
+
{
|
|
253
|
+
return internal::pmul(pconj(a), b);
|
|
254
|
+
}
|
|
255
|
+
};
|
|
256
|
+
|
|
257
|
+
template<> struct conj_helper<Packet2cf, Packet2cf, true,true>
|
|
258
|
+
{
|
|
259
|
+
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
|
|
260
|
+
{ return padd(pmul(x,y),c); }
|
|
261
|
+
|
|
262
|
+
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
|
|
263
|
+
{
|
|
264
|
+
return pconj(internal::pmul(a, b));
|
|
265
|
+
}
|
|
266
|
+
};
|
|
267
|
+
|
|
268
|
+
EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cf,Packet4f)
|
|
269
|
+
|
|
270
|
+
template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
|
|
271
|
+
{
|
|
272
|
+
// TODO optimize it for NEON
|
|
273
|
+
Packet2cf res = conj_helper<Packet2cf,Packet2cf,false,true>().pmul(a,b);
|
|
274
|
+
Packet4f s, rev_s;
|
|
275
|
+
|
|
276
|
+
// this computes the norm
|
|
277
|
+
s = vmulq_f32(b.v, b.v);
|
|
278
|
+
rev_s = vrev64q_f32(s);
|
|
279
|
+
|
|
280
|
+
return Packet2cf(pdiv<Packet4f>(res.v, vaddq_f32(s,rev_s)));
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
EIGEN_DEVICE_FUNC inline void
|
|
284
|
+
ptranspose(PacketBlock<Packet2cf,2>& kernel) {
|
|
285
|
+
Packet4f tmp = vcombine_f32(vget_high_f32(kernel.packet[0].v), vget_high_f32(kernel.packet[1].v));
|
|
286
|
+
kernel.packet[0].v = vcombine_f32(vget_low_f32(kernel.packet[0].v), vget_low_f32(kernel.packet[1].v));
|
|
287
|
+
kernel.packet[1].v = tmp;
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
//---------- double ----------
|
|
291
|
+
#if EIGEN_ARCH_ARM64 && !EIGEN_APPLE_DOUBLE_NEON_BUG
|
|
292
|
+
|
|
293
|
+
// See bug 1325, clang fails to call vld1q_u64.
|
|
294
|
+
#if EIGEN_COMP_CLANG
|
|
295
|
+
static uint64x2_t p2ul_CONJ_XOR = {0x0, 0x8000000000000000};
|
|
296
|
+
#else
|
|
297
|
+
const uint64_t p2ul_conj_XOR_DATA[] = { 0x0, 0x8000000000000000 };
|
|
298
|
+
static uint64x2_t p2ul_CONJ_XOR = vld1q_u64( p2ul_conj_XOR_DATA );
|
|
299
|
+
#endif
|
|
300
|
+
|
|
301
|
+
struct Packet1cd
|
|
302
|
+
{
|
|
303
|
+
EIGEN_STRONG_INLINE Packet1cd() {}
|
|
304
|
+
EIGEN_STRONG_INLINE explicit Packet1cd(const Packet2d& a) : v(a) {}
|
|
305
|
+
Packet2d v;
|
|
306
|
+
};
|
|
307
|
+
|
|
308
|
+
template<> struct packet_traits<std::complex<double> > : default_packet_traits
|
|
309
|
+
{
|
|
310
|
+
typedef Packet1cd type;
|
|
311
|
+
typedef Packet1cd half;
|
|
312
|
+
enum {
|
|
313
|
+
Vectorizable = 1,
|
|
314
|
+
AlignedOnScalar = 0,
|
|
315
|
+
size = 1,
|
|
316
|
+
HasHalfPacket = 0,
|
|
317
|
+
|
|
318
|
+
HasAdd = 1,
|
|
319
|
+
HasSub = 1,
|
|
320
|
+
HasMul = 1,
|
|
321
|
+
HasDiv = 1,
|
|
322
|
+
HasNegate = 1,
|
|
323
|
+
HasAbs = 0,
|
|
324
|
+
HasAbs2 = 0,
|
|
325
|
+
HasMin = 0,
|
|
326
|
+
HasMax = 0,
|
|
327
|
+
HasSetLinear = 0
|
|
328
|
+
};
|
|
329
|
+
};
|
|
330
|
+
|
|
331
|
+
template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1, alignment=Aligned16}; typedef Packet1cd half; };
|
|
332
|
+
|
|
333
|
+
template<> EIGEN_STRONG_INLINE Packet1cd pload<Packet1cd>(const std::complex<double>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet1cd(pload<Packet2d>((const double*)from)); }
|
|
334
|
+
template<> EIGEN_STRONG_INLINE Packet1cd ploadu<Packet1cd>(const std::complex<double>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet1cd(ploadu<Packet2d>((const double*)from)); }
|
|
335
|
+
|
|
336
|
+
template<> EIGEN_STRONG_INLINE Packet1cd pset1<Packet1cd>(const std::complex<double>& from)
|
|
337
|
+
{ /* here we really have to use unaligned loads :( */ return ploadu<Packet1cd>(&from); }
|
|
338
|
+
|
|
339
|
+
template<> EIGEN_STRONG_INLINE Packet1cd padd<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(padd<Packet2d>(a.v,b.v)); }
|
|
340
|
+
template<> EIGEN_STRONG_INLINE Packet1cd psub<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(psub<Packet2d>(a.v,b.v)); }
|
|
341
|
+
template<> EIGEN_STRONG_INLINE Packet1cd pnegate(const Packet1cd& a) { return Packet1cd(pnegate<Packet2d>(a.v)); }
|
|
342
|
+
template<> EIGEN_STRONG_INLINE Packet1cd pconj(const Packet1cd& a) { return Packet1cd(vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(a.v), p2ul_CONJ_XOR))); }
|
|
343
|
+
|
|
344
|
+
template<> EIGEN_STRONG_INLINE Packet1cd pmul<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
|
|
345
|
+
{
|
|
346
|
+
Packet2d v1, v2;
|
|
347
|
+
|
|
348
|
+
// Get the real values of a
|
|
349
|
+
v1 = vdupq_lane_f64(vget_low_f64(a.v), 0);
|
|
350
|
+
// Get the imag values of a
|
|
351
|
+
v2 = vdupq_lane_f64(vget_high_f64(a.v), 0);
|
|
352
|
+
// Multiply the real a with b
|
|
353
|
+
v1 = vmulq_f64(v1, b.v);
|
|
354
|
+
// Multiply the imag a with b
|
|
355
|
+
v2 = vmulq_f64(v2, b.v);
|
|
356
|
+
// Conjugate v2
|
|
357
|
+
v2 = vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(v2), p2ul_CONJ_XOR));
|
|
358
|
+
// Swap real/imag elements in v2.
|
|
359
|
+
v2 = preverse<Packet2d>(v2);
|
|
360
|
+
// Add and return the result
|
|
361
|
+
return Packet1cd(vaddq_f64(v1, v2));
|
|
362
|
+
}
|
|
363
|
+
|
|
364
|
+
template<> EIGEN_STRONG_INLINE Packet1cd pand <Packet1cd>(const Packet1cd& a, const Packet1cd& b)
|
|
365
|
+
{
|
|
366
|
+
return Packet1cd(vreinterpretq_f64_u64(vandq_u64(vreinterpretq_u64_f64(a.v),vreinterpretq_u64_f64(b.v))));
|
|
367
|
+
}
|
|
368
|
+
template<> EIGEN_STRONG_INLINE Packet1cd por <Packet1cd>(const Packet1cd& a, const Packet1cd& b)
|
|
369
|
+
{
|
|
370
|
+
return Packet1cd(vreinterpretq_f64_u64(vorrq_u64(vreinterpretq_u64_f64(a.v),vreinterpretq_u64_f64(b.v))));
|
|
371
|
+
}
|
|
372
|
+
template<> EIGEN_STRONG_INLINE Packet1cd pxor <Packet1cd>(const Packet1cd& a, const Packet1cd& b)
|
|
373
|
+
{
|
|
374
|
+
return Packet1cd(vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(a.v),vreinterpretq_u64_f64(b.v))));
|
|
375
|
+
}
|
|
376
|
+
template<> EIGEN_STRONG_INLINE Packet1cd pandnot<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
|
|
377
|
+
{
|
|
378
|
+
return Packet1cd(vreinterpretq_f64_u64(vbicq_u64(vreinterpretq_u64_f64(a.v),vreinterpretq_u64_f64(b.v))));
|
|
379
|
+
}
|
|
380
|
+
|
|
381
|
+
template<> EIGEN_STRONG_INLINE Packet1cd ploaddup<Packet1cd>(const std::complex<double>* from) { return pset1<Packet1cd>(*from); }
|
|
382
|
+
|
|
383
|
+
template<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, from.v); }
|
|
384
|
+
template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, from.v); }
|
|
385
|
+
|
|
386
|
+
template<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::complex<double> * addr) { EIGEN_ARM_PREFETCH((const double *)addr); }
|
|
387
|
+
|
|
388
|
+
template<> EIGEN_DEVICE_FUNC inline Packet1cd pgather<std::complex<double>, Packet1cd>(const std::complex<double>* from, Index stride)
|
|
389
|
+
{
|
|
390
|
+
Packet2d res = pset1<Packet2d>(0.0);
|
|
391
|
+
res = vsetq_lane_f64(std::real(from[0*stride]), res, 0);
|
|
392
|
+
res = vsetq_lane_f64(std::imag(from[0*stride]), res, 1);
|
|
393
|
+
return Packet1cd(res);
|
|
394
|
+
}
|
|
395
|
+
|
|
396
|
+
template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<double>, Packet1cd>(std::complex<double>* to, const Packet1cd& from, Index stride)
|
|
397
|
+
{
|
|
398
|
+
to[stride*0] = std::complex<double>(vgetq_lane_f64(from.v, 0), vgetq_lane_f64(from.v, 1));
|
|
399
|
+
}
|
|
400
|
+
|
|
401
|
+
|
|
402
|
+
template<> EIGEN_STRONG_INLINE std::complex<double> pfirst<Packet1cd>(const Packet1cd& a)
|
|
403
|
+
{
|
|
404
|
+
std::complex<double> EIGEN_ALIGN16 res;
|
|
405
|
+
pstore<std::complex<double> >(&res, a);
|
|
406
|
+
|
|
407
|
+
return res;
|
|
408
|
+
}
|
|
409
|
+
|
|
410
|
+
template<> EIGEN_STRONG_INLINE Packet1cd preverse(const Packet1cd& a) { return a; }
|
|
411
|
+
|
|
412
|
+
template<> EIGEN_STRONG_INLINE std::complex<double> predux<Packet1cd>(const Packet1cd& a) { return pfirst(a); }
|
|
413
|
+
|
|
414
|
+
template<> EIGEN_STRONG_INLINE Packet1cd preduxp<Packet1cd>(const Packet1cd* vecs) { return vecs[0]; }
|
|
415
|
+
|
|
416
|
+
template<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet1cd>(const Packet1cd& a) { return pfirst(a); }
|
|
417
|
+
|
|
418
|
+
template<int Offset>
|
|
419
|
+
struct palign_impl<Offset,Packet1cd>
|
|
420
|
+
{
|
|
421
|
+
static EIGEN_STRONG_INLINE void run(Packet1cd& /*first*/, const Packet1cd& /*second*/)
|
|
422
|
+
{
|
|
423
|
+
// FIXME is it sure we never have to align a Packet1cd?
|
|
424
|
+
// Even though a std::complex<double> has 16 bytes, it is not necessarily aligned on a 16 bytes boundary...
|
|
425
|
+
}
|
|
426
|
+
};
|
|
427
|
+
|
|
428
|
+
template<> struct conj_helper<Packet1cd, Packet1cd, false,true>
|
|
429
|
+
{
|
|
430
|
+
EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
|
|
431
|
+
{ return padd(pmul(x,y),c); }
|
|
432
|
+
|
|
433
|
+
EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
|
|
434
|
+
{
|
|
435
|
+
return internal::pmul(a, pconj(b));
|
|
436
|
+
}
|
|
437
|
+
};
|
|
438
|
+
|
|
439
|
+
template<> struct conj_helper<Packet1cd, Packet1cd, true,false>
|
|
440
|
+
{
|
|
441
|
+
EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
|
|
442
|
+
{ return padd(pmul(x,y),c); }
|
|
443
|
+
|
|
444
|
+
EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
|
|
445
|
+
{
|
|
446
|
+
return internal::pmul(pconj(a), b);
|
|
447
|
+
}
|
|
448
|
+
};
|
|
449
|
+
|
|
450
|
+
template<> struct conj_helper<Packet1cd, Packet1cd, true,true>
|
|
451
|
+
{
|
|
452
|
+
EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
|
|
453
|
+
{ return padd(pmul(x,y),c); }
|
|
454
|
+
|
|
455
|
+
EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
|
|
456
|
+
{
|
|
457
|
+
return pconj(internal::pmul(a, b));
|
|
458
|
+
}
|
|
459
|
+
};
|
|
460
|
+
|
|
461
|
+
EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet1cd,Packet2d)
|
|
462
|
+
|
|
463
|
+
template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
|
|
464
|
+
{
|
|
465
|
+
// TODO optimize it for NEON
|
|
466
|
+
Packet1cd res = conj_helper<Packet1cd,Packet1cd,false,true>().pmul(a,b);
|
|
467
|
+
Packet2d s = pmul<Packet2d>(b.v, b.v);
|
|
468
|
+
Packet2d rev_s = preverse<Packet2d>(s);
|
|
469
|
+
|
|
470
|
+
return Packet1cd(pdiv(res.v, padd<Packet2d>(s,rev_s)));
|
|
471
|
+
}
|
|
472
|
+
|
|
473
|
+
EIGEN_STRONG_INLINE Packet1cd pcplxflip/*<Packet1cd>*/(const Packet1cd& x)
|
|
474
|
+
{
|
|
475
|
+
return Packet1cd(preverse(Packet2d(x.v)));
|
|
476
|
+
}
|
|
477
|
+
|
|
478
|
+
EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet1cd,2>& kernel)
|
|
479
|
+
{
|
|
480
|
+
Packet2d tmp = vcombine_f64(vget_high_f64(kernel.packet[0].v), vget_high_f64(kernel.packet[1].v));
|
|
481
|
+
kernel.packet[0].v = vcombine_f64(vget_low_f64(kernel.packet[0].v), vget_low_f64(kernel.packet[1].v));
|
|
482
|
+
kernel.packet[1].v = tmp;
|
|
483
|
+
}
|
|
484
|
+
#endif // EIGEN_ARCH_ARM64
|
|
485
|
+
|
|
486
|
+
} // end namespace internal
|
|
487
|
+
|
|
488
|
+
} // end namespace Eigen
|
|
489
|
+
|
|
490
|
+
#endif // EIGEN_COMPLEX_NEON_H
|