@smake/eigen 1.0.2 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/eigen/Eigen/AccelerateSupport +52 -0
- package/eigen/Eigen/Cholesky +18 -21
- package/eigen/Eigen/CholmodSupport +28 -28
- package/eigen/Eigen/Core +235 -326
- package/eigen/Eigen/Eigenvalues +16 -14
- package/eigen/Eigen/Geometry +21 -24
- package/eigen/Eigen/Householder +9 -8
- package/eigen/Eigen/IterativeLinearSolvers +8 -4
- package/eigen/Eigen/Jacobi +14 -14
- package/eigen/Eigen/KLUSupport +43 -0
- package/eigen/Eigen/LU +16 -20
- package/eigen/Eigen/MetisSupport +12 -12
- package/eigen/Eigen/OrderingMethods +54 -54
- package/eigen/Eigen/PaStiXSupport +23 -20
- package/eigen/Eigen/PardisoSupport +17 -14
- package/eigen/Eigen/QR +18 -21
- package/eigen/Eigen/QtAlignedMalloc +5 -13
- package/eigen/Eigen/SPQRSupport +21 -14
- package/eigen/Eigen/SVD +23 -18
- package/eigen/Eigen/Sparse +1 -4
- package/eigen/Eigen/SparseCholesky +18 -23
- package/eigen/Eigen/SparseCore +18 -17
- package/eigen/Eigen/SparseLU +12 -8
- package/eigen/Eigen/SparseQR +16 -14
- package/eigen/Eigen/StdDeque +5 -2
- package/eigen/Eigen/StdList +5 -2
- package/eigen/Eigen/StdVector +5 -2
- package/eigen/Eigen/SuperLUSupport +30 -24
- package/eigen/Eigen/ThreadPool +80 -0
- package/eigen/Eigen/UmfPackSupport +19 -17
- package/eigen/Eigen/Version +14 -0
- package/eigen/Eigen/src/AccelerateSupport/AccelerateSupport.h +423 -0
- package/eigen/Eigen/src/AccelerateSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Cholesky/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Cholesky/LDLT.h +377 -401
- package/eigen/Eigen/src/Cholesky/LLT.h +332 -360
- package/eigen/Eigen/src/Cholesky/LLT_LAPACKE.h +81 -56
- package/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +620 -521
- package/eigen/Eigen/src/CholmodSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Core/ArithmeticSequence.h +239 -0
- package/eigen/Eigen/src/Core/Array.h +341 -294
- package/eigen/Eigen/src/Core/ArrayBase.h +190 -203
- package/eigen/Eigen/src/Core/ArrayWrapper.h +127 -171
- package/eigen/Eigen/src/Core/Assign.h +30 -40
- package/eigen/Eigen/src/Core/AssignEvaluator.h +711 -589
- package/eigen/Eigen/src/Core/Assign_MKL.h +130 -125
- package/eigen/Eigen/src/Core/BandMatrix.h +268 -283
- package/eigen/Eigen/src/Core/Block.h +375 -398
- package/eigen/Eigen/src/Core/CommaInitializer.h +86 -97
- package/eigen/Eigen/src/Core/ConditionEstimator.h +51 -53
- package/eigen/Eigen/src/Core/CoreEvaluators.h +1356 -1026
- package/eigen/Eigen/src/Core/CoreIterators.h +73 -59
- package/eigen/Eigen/src/Core/CwiseBinaryOp.h +114 -132
- package/eigen/Eigen/src/Core/CwiseNullaryOp.h +726 -617
- package/eigen/Eigen/src/Core/CwiseTernaryOp.h +77 -103
- package/eigen/Eigen/src/Core/CwiseUnaryOp.h +56 -68
- package/eigen/Eigen/src/Core/CwiseUnaryView.h +132 -95
- package/eigen/Eigen/src/Core/DenseBase.h +632 -571
- package/eigen/Eigen/src/Core/DenseCoeffsBase.h +511 -624
- package/eigen/Eigen/src/Core/DenseStorage.h +512 -509
- package/eigen/Eigen/src/Core/DeviceWrapper.h +153 -0
- package/eigen/Eigen/src/Core/Diagonal.h +169 -210
- package/eigen/Eigen/src/Core/DiagonalMatrix.h +351 -274
- package/eigen/Eigen/src/Core/DiagonalProduct.h +12 -10
- package/eigen/Eigen/src/Core/Dot.h +172 -222
- package/eigen/Eigen/src/Core/EigenBase.h +75 -85
- package/eigen/Eigen/src/Core/Fill.h +138 -0
- package/eigen/Eigen/src/Core/FindCoeff.h +464 -0
- package/eigen/Eigen/src/Core/ForceAlignedAccess.h +90 -109
- package/eigen/Eigen/src/Core/Fuzzy.h +82 -105
- package/eigen/Eigen/src/Core/GeneralProduct.h +327 -263
- package/eigen/Eigen/src/Core/GenericPacketMath.h +1472 -360
- package/eigen/Eigen/src/Core/GlobalFunctions.h +194 -151
- package/eigen/Eigen/src/Core/IO.h +147 -139
- package/eigen/Eigen/src/Core/IndexedView.h +321 -0
- package/eigen/Eigen/src/Core/InnerProduct.h +260 -0
- package/eigen/Eigen/src/Core/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Core/Inverse.h +56 -66
- package/eigen/Eigen/src/Core/Map.h +124 -142
- package/eigen/Eigen/src/Core/MapBase.h +256 -281
- package/eigen/Eigen/src/Core/MathFunctions.h +1620 -938
- package/eigen/Eigen/src/Core/MathFunctionsImpl.h +233 -71
- package/eigen/Eigen/src/Core/Matrix.h +491 -416
- package/eigen/Eigen/src/Core/MatrixBase.h +468 -453
- package/eigen/Eigen/src/Core/NestByValue.h +66 -85
- package/eigen/Eigen/src/Core/NoAlias.h +79 -85
- package/eigen/Eigen/src/Core/NumTraits.h +235 -148
- package/eigen/Eigen/src/Core/PartialReduxEvaluator.h +253 -0
- package/eigen/Eigen/src/Core/PermutationMatrix.h +461 -511
- package/eigen/Eigen/src/Core/PlainObjectBase.h +871 -894
- package/eigen/Eigen/src/Core/Product.h +260 -139
- package/eigen/Eigen/src/Core/ProductEvaluators.h +863 -714
- package/eigen/Eigen/src/Core/Random.h +161 -136
- package/eigen/Eigen/src/Core/RandomImpl.h +262 -0
- package/eigen/Eigen/src/Core/RealView.h +250 -0
- package/eigen/Eigen/src/Core/Redux.h +366 -336
- package/eigen/Eigen/src/Core/Ref.h +308 -209
- package/eigen/Eigen/src/Core/Replicate.h +94 -106
- package/eigen/Eigen/src/Core/Reshaped.h +398 -0
- package/eigen/Eigen/src/Core/ReturnByValue.h +49 -55
- package/eigen/Eigen/src/Core/Reverse.h +136 -145
- package/eigen/Eigen/src/Core/Select.h +70 -140
- package/eigen/Eigen/src/Core/SelfAdjointView.h +262 -285
- package/eigen/Eigen/src/Core/SelfCwiseBinaryOp.h +23 -20
- package/eigen/Eigen/src/Core/SkewSymmetricMatrix3.h +382 -0
- package/eigen/Eigen/src/Core/Solve.h +97 -111
- package/eigen/Eigen/src/Core/SolveTriangular.h +131 -129
- package/eigen/Eigen/src/Core/SolverBase.h +138 -101
- package/eigen/Eigen/src/Core/StableNorm.h +156 -160
- package/eigen/Eigen/src/Core/StlIterators.h +619 -0
- package/eigen/Eigen/src/Core/Stride.h +91 -88
- package/eigen/Eigen/src/Core/Swap.h +70 -38
- package/eigen/Eigen/src/Core/Transpose.h +295 -273
- package/eigen/Eigen/src/Core/Transpositions.h +272 -317
- package/eigen/Eigen/src/Core/TriangularMatrix.h +670 -755
- package/eigen/Eigen/src/Core/VectorBlock.h +59 -72
- package/eigen/Eigen/src/Core/VectorwiseOp.h +668 -630
- package/eigen/Eigen/src/Core/Visitor.h +480 -216
- package/eigen/Eigen/src/Core/arch/AVX/Complex.h +407 -293
- package/eigen/Eigen/src/Core/arch/AVX/MathFunctions.h +79 -388
- package/eigen/Eigen/src/Core/arch/AVX/PacketMath.h +2935 -491
- package/eigen/Eigen/src/Core/arch/AVX/Reductions.h +353 -0
- package/eigen/Eigen/src/Core/arch/AVX/TypeCasting.h +279 -22
- package/eigen/Eigen/src/Core/arch/AVX512/Complex.h +472 -0
- package/eigen/Eigen/src/Core/arch/AVX512/GemmKernel.h +1245 -0
- package/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h +85 -333
- package/eigen/Eigen/src/Core/arch/AVX512/MathFunctionsFP16.h +75 -0
- package/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h +2490 -649
- package/eigen/Eigen/src/Core/arch/AVX512/PacketMathFP16.h +1413 -0
- package/eigen/Eigen/src/Core/arch/AVX512/Reductions.h +297 -0
- package/eigen/Eigen/src/Core/arch/AVX512/TrsmKernel.h +1167 -0
- package/eigen/Eigen/src/Core/arch/AVX512/TrsmUnrolls.inc +1219 -0
- package/eigen/Eigen/src/Core/arch/AVX512/TypeCasting.h +277 -0
- package/eigen/Eigen/src/Core/arch/AVX512/TypeCastingFP16.h +130 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/Complex.h +521 -298
- package/eigen/Eigen/src/Core/arch/AltiVec/MathFunctions.h +39 -280
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProduct.h +3686 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h +205 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h +901 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMAbfloat16.h +742 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/MatrixVectorProduct.inc +2818 -0
- package/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h +3391 -723
- package/eigen/Eigen/src/Core/arch/AltiVec/TypeCasting.h +153 -0
- package/eigen/Eigen/src/Core/arch/Default/BFloat16.h +866 -0
- package/eigen/Eigen/src/Core/arch/Default/ConjHelper.h +113 -14
- package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +2634 -0
- package/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +227 -0
- package/eigen/Eigen/src/Core/arch/Default/Half.h +1091 -0
- package/eigen/Eigen/src/Core/arch/Default/Settings.h +11 -13
- package/eigen/Eigen/src/Core/arch/GPU/Complex.h +244 -0
- package/eigen/Eigen/src/Core/arch/GPU/MathFunctions.h +104 -0
- package/eigen/Eigen/src/Core/arch/GPU/PacketMath.h +1712 -0
- package/eigen/Eigen/src/Core/arch/GPU/Tuple.h +268 -0
- package/eigen/Eigen/src/Core/arch/GPU/TypeCasting.h +77 -0
- package/eigen/Eigen/src/Core/arch/HIP/hcc/math_constants.h +23 -0
- package/eigen/Eigen/src/Core/arch/HVX/PacketMath.h +1088 -0
- package/eigen/Eigen/src/Core/arch/LSX/Complex.h +520 -0
- package/eigen/Eigen/src/Core/arch/LSX/GeneralBlockPanelKernel.h +23 -0
- package/eigen/Eigen/src/Core/arch/LSX/MathFunctions.h +43 -0
- package/eigen/Eigen/src/Core/arch/LSX/PacketMath.h +2866 -0
- package/eigen/Eigen/src/Core/arch/LSX/TypeCasting.h +526 -0
- package/eigen/Eigen/src/Core/arch/MSA/Complex.h +620 -0
- package/eigen/Eigen/src/Core/arch/MSA/MathFunctions.h +379 -0
- package/eigen/Eigen/src/Core/arch/MSA/PacketMath.h +1237 -0
- package/eigen/Eigen/src/Core/arch/NEON/Complex.h +531 -289
- package/eigen/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h +243 -0
- package/eigen/Eigen/src/Core/arch/NEON/MathFunctions.h +50 -73
- package/eigen/Eigen/src/Core/arch/NEON/PacketMath.h +5915 -579
- package/eigen/Eigen/src/Core/arch/NEON/TypeCasting.h +1642 -0
- package/eigen/Eigen/src/Core/arch/NEON/UnaryFunctors.h +57 -0
- package/eigen/Eigen/src/Core/arch/SSE/Complex.h +366 -334
- package/eigen/Eigen/src/Core/arch/SSE/MathFunctions.h +40 -514
- package/eigen/Eigen/src/Core/arch/SSE/PacketMath.h +2164 -675
- package/eigen/Eigen/src/Core/arch/SSE/Reductions.h +324 -0
- package/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h +188 -35
- package/eigen/Eigen/src/Core/arch/SVE/MathFunctions.h +48 -0
- package/eigen/Eigen/src/Core/arch/SVE/PacketMath.h +674 -0
- package/eigen/Eigen/src/Core/arch/SVE/TypeCasting.h +52 -0
- package/eigen/Eigen/src/Core/arch/SYCL/InteropHeaders.h +227 -0
- package/eigen/Eigen/src/Core/arch/SYCL/MathFunctions.h +303 -0
- package/eigen/Eigen/src/Core/arch/SYCL/PacketMath.h +576 -0
- package/eigen/Eigen/src/Core/arch/SYCL/TypeCasting.h +83 -0
- package/eigen/Eigen/src/Core/arch/ZVector/Complex.h +434 -261
- package/eigen/Eigen/src/Core/arch/ZVector/MathFunctions.h +160 -53
- package/eigen/Eigen/src/Core/arch/ZVector/PacketMath.h +1073 -605
- package/eigen/Eigen/src/Core/functors/AssignmentFunctors.h +123 -117
- package/eigen/Eigen/src/Core/functors/BinaryFunctors.h +594 -322
- package/eigen/Eigen/src/Core/functors/NullaryFunctors.h +204 -118
- package/eigen/Eigen/src/Core/functors/StlFunctors.h +110 -97
- package/eigen/Eigen/src/Core/functors/TernaryFunctors.h +34 -7
- package/eigen/Eigen/src/Core/functors/UnaryFunctors.h +1158 -530
- package/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h +2329 -1333
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h +328 -364
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +191 -178
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +85 -82
- package/eigen/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h +154 -73
- package/eigen/Eigen/src/Core/products/GeneralMatrixVector.h +396 -542
- package/eigen/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h +80 -77
- package/eigen/Eigen/src/Core/products/Parallelizer.h +208 -92
- package/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +331 -375
- package/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h +206 -224
- package/eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h +139 -146
- package/eigen/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h +58 -61
- package/eigen/Eigen/src/Core/products/SelfadjointProduct.h +71 -71
- package/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h +48 -46
- package/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h +294 -369
- package/eigen/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h +246 -238
- package/eigen/Eigen/src/Core/products/TriangularMatrixVector.h +244 -247
- package/eigen/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h +212 -192
- package/eigen/Eigen/src/Core/products/TriangularSolverMatrix.h +328 -275
- package/eigen/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h +108 -109
- package/eigen/Eigen/src/Core/products/TriangularSolverVector.h +70 -93
- package/eigen/Eigen/src/Core/util/Assert.h +158 -0
- package/eigen/Eigen/src/Core/util/BlasUtil.h +413 -290
- package/eigen/Eigen/src/Core/util/ConfigureVectorization.h +543 -0
- package/eigen/Eigen/src/Core/util/Constants.h +314 -263
- package/eigen/Eigen/src/Core/util/DisableStupidWarnings.h +130 -78
- package/eigen/Eigen/src/Core/util/EmulateArray.h +270 -0
- package/eigen/Eigen/src/Core/util/ForwardDeclarations.h +450 -224
- package/eigen/Eigen/src/Core/util/GpuHipCudaDefines.inc +101 -0
- package/eigen/Eigen/src/Core/util/GpuHipCudaUndefines.inc +45 -0
- package/eigen/Eigen/src/Core/util/IndexedViewHelper.h +487 -0
- package/eigen/Eigen/src/Core/util/IntegralConstant.h +279 -0
- package/eigen/Eigen/src/Core/util/MKL_support.h +39 -30
- package/eigen/Eigen/src/Core/util/Macros.h +939 -646
- package/eigen/Eigen/src/Core/util/MaxSizeVector.h +139 -0
- package/eigen/Eigen/src/Core/util/Memory.h +1042 -650
- package/eigen/Eigen/src/Core/util/Meta.h +618 -426
- package/eigen/Eigen/src/Core/util/MoreMeta.h +638 -0
- package/eigen/Eigen/src/Core/util/ReenableStupidWarnings.h +32 -19
- package/eigen/Eigen/src/Core/util/ReshapedHelper.h +51 -0
- package/eigen/Eigen/src/Core/util/Serializer.h +209 -0
- package/eigen/Eigen/src/Core/util/StaticAssert.h +51 -164
- package/eigen/Eigen/src/Core/util/SymbolicIndex.h +445 -0
- package/eigen/Eigen/src/Core/util/XprHelper.h +793 -538
- package/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +246 -277
- package/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +299 -319
- package/eigen/Eigen/src/Eigenvalues/ComplexSchur_LAPACKE.h +52 -48
- package/eigen/Eigen/src/Eigenvalues/EigenSolver.h +413 -456
- package/eigen/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h +309 -325
- package/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +157 -171
- package/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +292 -310
- package/eigen/Eigen/src/Eigenvalues/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +91 -107
- package/eigen/Eigen/src/Eigenvalues/RealQZ.h +539 -606
- package/eigen/Eigen/src/Eigenvalues/RealSchur.h +348 -382
- package/eigen/Eigen/src/Eigenvalues/RealSchur_LAPACKE.h +41 -35
- package/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +579 -600
- package/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h +47 -44
- package/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +434 -461
- package/eigen/Eigen/src/Geometry/AlignedBox.h +307 -214
- package/eigen/Eigen/src/Geometry/AngleAxis.h +135 -137
- package/eigen/Eigen/src/Geometry/EulerAngles.h +163 -74
- package/eigen/Eigen/src/Geometry/Homogeneous.h +289 -333
- package/eigen/Eigen/src/Geometry/Hyperplane.h +152 -161
- package/eigen/Eigen/src/Geometry/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Geometry/OrthoMethods.h +168 -145
- package/eigen/Eigen/src/Geometry/ParametrizedLine.h +141 -104
- package/eigen/Eigen/src/Geometry/Quaternion.h +595 -497
- package/eigen/Eigen/src/Geometry/Rotation2D.h +110 -108
- package/eigen/Eigen/src/Geometry/RotationBase.h +148 -145
- package/eigen/Eigen/src/Geometry/Scaling.h +115 -90
- package/eigen/Eigen/src/Geometry/Transform.h +896 -953
- package/eigen/Eigen/src/Geometry/Translation.h +100 -98
- package/eigen/Eigen/src/Geometry/Umeyama.h +79 -84
- package/eigen/Eigen/src/Geometry/arch/Geometry_SIMD.h +154 -0
- package/eigen/Eigen/src/Householder/BlockHouseholder.h +54 -42
- package/eigen/Eigen/src/Householder/Householder.h +104 -122
- package/eigen/Eigen/src/Householder/HouseholderSequence.h +416 -382
- package/eigen/Eigen/src/Householder/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +153 -166
- package/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +127 -138
- package/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +95 -124
- package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +269 -267
- package/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +246 -259
- package/eigen/Eigen/src/IterativeLinearSolvers/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +218 -217
- package/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +80 -103
- package/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +59 -63
- package/eigen/Eigen/src/Jacobi/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/Jacobi/Jacobi.h +256 -291
- package/eigen/Eigen/src/KLUSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/KLUSupport/KLUSupport.h +339 -0
- package/eigen/Eigen/src/LU/Determinant.h +60 -63
- package/eigen/Eigen/src/LU/FullPivLU.h +561 -626
- package/eigen/Eigen/src/LU/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/LU/InverseImpl.h +213 -275
- package/eigen/Eigen/src/LU/PartialPivLU.h +407 -435
- package/eigen/Eigen/src/LU/PartialPivLU_LAPACKE.h +54 -40
- package/eigen/Eigen/src/LU/arch/InverseSize4.h +353 -0
- package/eigen/Eigen/src/MetisSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/MetisSupport/MetisSupport.h +81 -93
- package/eigen/Eigen/src/OrderingMethods/Amd.h +250 -282
- package/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +950 -1103
- package/eigen/Eigen/src/OrderingMethods/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/OrderingMethods/Ordering.h +111 -122
- package/eigen/Eigen/src/PaStiXSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +524 -570
- package/eigen/Eigen/src/PardisoSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +385 -429
- package/eigen/Eigen/src/QR/ColPivHouseholderQR.h +494 -473
- package/eigen/Eigen/src/QR/ColPivHouseholderQR_LAPACKE.h +120 -56
- package/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +223 -137
- package/eigen/Eigen/src/QR/FullPivHouseholderQR.h +517 -460
- package/eigen/Eigen/src/QR/HouseholderQR.h +412 -278
- package/eigen/Eigen/src/QR/HouseholderQR_LAPACKE.h +32 -23
- package/eigen/Eigen/src/QR/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SPQRSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +263 -261
- package/eigen/Eigen/src/SVD/BDCSVD.h +872 -679
- package/eigen/Eigen/src/SVD/BDCSVD_LAPACKE.h +174 -0
- package/eigen/Eigen/src/SVD/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SVD/JacobiSVD.h +585 -543
- package/eigen/Eigen/src/SVD/JacobiSVD_LAPACKE.h +85 -49
- package/eigen/Eigen/src/SVD/SVDBase.h +281 -160
- package/eigen/Eigen/src/SVD/UpperBidiagonalization.h +202 -237
- package/eigen/Eigen/src/SparseCholesky/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +769 -590
- package/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +318 -129
- package/eigen/Eigen/src/SparseCore/AmbiVector.h +202 -251
- package/eigen/Eigen/src/SparseCore/CompressedStorage.h +184 -236
- package/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +140 -184
- package/eigen/Eigen/src/SparseCore/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SparseCore/SparseAssign.h +174 -111
- package/eigen/Eigen/src/SparseCore/SparseBlock.h +408 -477
- package/eigen/Eigen/src/SparseCore/SparseColEtree.h +100 -112
- package/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +531 -280
- package/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +559 -347
- package/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +100 -108
- package/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +185 -191
- package/eigen/Eigen/src/SparseCore/SparseDiagonalProduct.h +71 -71
- package/eigen/Eigen/src/SparseCore/SparseDot.h +49 -47
- package/eigen/Eigen/src/SparseCore/SparseFuzzy.h +13 -11
- package/eigen/Eigen/src/SparseCore/SparseMap.h +243 -253
- package/eigen/Eigen/src/SparseCore/SparseMatrix.h +1614 -1142
- package/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +403 -357
- package/eigen/Eigen/src/SparseCore/SparsePermutation.h +186 -115
- package/eigen/Eigen/src/SparseCore/SparseProduct.h +100 -91
- package/eigen/Eigen/src/SparseCore/SparseRedux.h +22 -24
- package/eigen/Eigen/src/SparseCore/SparseRef.h +268 -295
- package/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +371 -414
- package/eigen/Eigen/src/SparseCore/SparseSolverBase.h +78 -87
- package/eigen/Eigen/src/SparseCore/SparseSparseProductWithPruning.h +81 -95
- package/eigen/Eigen/src/SparseCore/SparseTranspose.h +62 -71
- package/eigen/Eigen/src/SparseCore/SparseTriangularView.h +132 -144
- package/eigen/Eigen/src/SparseCore/SparseUtil.h +146 -115
- package/eigen/Eigen/src/SparseCore/SparseVector.h +426 -372
- package/eigen/Eigen/src/SparseCore/SparseView.h +164 -193
- package/eigen/Eigen/src/SparseCore/TriangularSolver.h +129 -170
- package/eigen/Eigen/src/SparseLU/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SparseLU/SparseLU.h +814 -618
- package/eigen/Eigen/src/SparseLU/SparseLUImpl.h +61 -48
- package/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +102 -118
- package/eigen/Eigen/src/SparseLU/SparseLU_Structs.h +38 -35
- package/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +273 -255
- package/eigen/Eigen/src/SparseLU/SparseLU_Utils.h +44 -49
- package/eigen/Eigen/src/SparseLU/SparseLU_column_bmod.h +104 -108
- package/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +90 -101
- package/eigen/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h +57 -58
- package/eigen/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h +43 -55
- package/eigen/Eigen/src/SparseLU/SparseLU_kernel_bmod.h +74 -71
- package/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +125 -133
- package/eigen/Eigen/src/SparseLU/SparseLU_panel_dfs.h +136 -159
- package/eigen/Eigen/src/SparseLU/SparseLU_pivotL.h +51 -52
- package/eigen/Eigen/src/SparseLU/SparseLU_pruneL.h +67 -73
- package/eigen/Eigen/src/SparseLU/SparseLU_relax_snode.h +24 -26
- package/eigen/Eigen/src/SparseQR/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SparseQR/SparseQR.h +451 -490
- package/eigen/Eigen/src/StlSupport/StdDeque.h +28 -105
- package/eigen/Eigen/src/StlSupport/StdList.h +28 -84
- package/eigen/Eigen/src/StlSupport/StdVector.h +28 -108
- package/eigen/Eigen/src/StlSupport/details.h +48 -50
- package/eigen/Eigen/src/SuperLUSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +634 -732
- package/eigen/Eigen/src/ThreadPool/Barrier.h +70 -0
- package/eigen/Eigen/src/ThreadPool/CoreThreadPoolDevice.h +336 -0
- package/eigen/Eigen/src/ThreadPool/EventCount.h +241 -0
- package/eigen/Eigen/src/ThreadPool/ForkJoin.h +140 -0
- package/eigen/Eigen/src/ThreadPool/InternalHeaderCheck.h +4 -0
- package/eigen/Eigen/src/ThreadPool/NonBlockingThreadPool.h +587 -0
- package/eigen/Eigen/src/ThreadPool/RunQueue.h +230 -0
- package/eigen/Eigen/src/ThreadPool/ThreadCancel.h +21 -0
- package/eigen/Eigen/src/ThreadPool/ThreadEnvironment.h +43 -0
- package/eigen/Eigen/src/ThreadPool/ThreadLocal.h +289 -0
- package/eigen/Eigen/src/ThreadPool/ThreadPoolInterface.h +50 -0
- package/eigen/Eigen/src/ThreadPool/ThreadYield.h +16 -0
- package/eigen/Eigen/src/UmfPackSupport/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +480 -380
- package/eigen/Eigen/src/misc/Image.h +41 -43
- package/eigen/Eigen/src/misc/InternalHeaderCheck.h +3 -0
- package/eigen/Eigen/src/misc/Kernel.h +39 -41
- package/eigen/Eigen/src/misc/RealSvd2x2.h +19 -21
- package/eigen/Eigen/src/misc/blas.h +83 -426
- package/eigen/Eigen/src/misc/lapacke.h +9976 -16182
- package/eigen/Eigen/src/misc/lapacke_helpers.h +163 -0
- package/eigen/Eigen/src/misc/lapacke_mangling.h +4 -5
- package/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.inc +344 -0
- package/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.inc +544 -0
- package/eigen/Eigen/src/plugins/BlockMethods.inc +1370 -0
- package/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.inc +116 -0
- package/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.inc +167 -0
- package/eigen/Eigen/src/plugins/IndexedViewMethods.inc +192 -0
- package/eigen/Eigen/src/plugins/InternalHeaderCheck.inc +3 -0
- package/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.inc +331 -0
- package/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.inc +118 -0
- package/eigen/Eigen/src/plugins/ReshapedMethods.inc +133 -0
- package/lib/LibEigen.d.ts +4 -0
- package/lib/LibEigen.js +14 -0
- package/lib/index.d.ts +1 -1
- package/lib/index.js +7 -3
- package/package.json +2 -10
- package/eigen/Eigen/CMakeLists.txt +0 -19
- package/eigen/Eigen/src/Core/BooleanRedux.h +0 -164
- package/eigen/Eigen/src/Core/arch/CUDA/Complex.h +0 -103
- package/eigen/Eigen/src/Core/arch/CUDA/Half.h +0 -675
- package/eigen/Eigen/src/Core/arch/CUDA/MathFunctions.h +0 -91
- package/eigen/Eigen/src/Core/arch/CUDA/PacketMath.h +0 -333
- package/eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +0 -1124
- package/eigen/Eigen/src/Core/arch/CUDA/TypeCasting.h +0 -212
- package/eigen/Eigen/src/Core/util/NonMPL2.h +0 -3
- package/eigen/Eigen/src/Geometry/arch/Geometry_SSE.h +0 -161
- package/eigen/Eigen/src/LU/arch/Inverse_SSE.h +0 -338
- package/eigen/Eigen/src/SparseCore/MappedSparseMatrix.h +0 -67
- package/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +0 -280
- package/eigen/Eigen/src/misc/lapack.h +0 -152
- package/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +0 -332
- package/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +0 -552
- package/eigen/Eigen/src/plugins/BlockMethods.h +0 -1058
- package/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.h +0 -115
- package/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.h +0 -163
- package/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +0 -152
- package/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +0 -85
- package/lib/eigen.d.ts +0 -2
- package/lib/eigen.js +0 -15
|
@@ -0,0 +1,353 @@
|
|
|
1
|
+
// This file is part of Eigen, a lightweight C++ template library
|
|
2
|
+
// for linear algebra.
|
|
3
|
+
//
|
|
4
|
+
// Copyright (C) 2025 Charlie Schlosser <cs.schlosser@gmail.com>
|
|
5
|
+
//
|
|
6
|
+
// This Source Code Form is subject to the terms of the Mozilla
|
|
7
|
+
// Public License v. 2.0. If a copy of the MPL was not distributed
|
|
8
|
+
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
|
9
|
+
|
|
10
|
+
#ifndef EIGEN_REDUCTIONS_AVX_H
|
|
11
|
+
#define EIGEN_REDUCTIONS_AVX_H
|
|
12
|
+
|
|
13
|
+
// IWYU pragma: private
|
|
14
|
+
#include "../../InternalHeaderCheck.h"
|
|
15
|
+
|
|
16
|
+
namespace Eigen {
|
|
17
|
+
|
|
18
|
+
namespace internal {
|
|
19
|
+
|
|
20
|
+
/* -- -- -- -- -- -- -- -- -- -- -- -- Packet8i -- -- -- -- -- -- -- -- -- -- -- -- */
|
|
21
|
+
|
|
22
|
+
template <>
|
|
23
|
+
EIGEN_STRONG_INLINE int predux(const Packet8i& a) {
|
|
24
|
+
Packet4i lo = _mm256_castsi256_si128(a);
|
|
25
|
+
Packet4i hi = _mm256_extractf128_si256(a, 1);
|
|
26
|
+
return predux(padd(lo, hi));
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
template <>
|
|
30
|
+
EIGEN_STRONG_INLINE int predux_mul(const Packet8i& a) {
|
|
31
|
+
Packet4i lo = _mm256_castsi256_si128(a);
|
|
32
|
+
Packet4i hi = _mm256_extractf128_si256(a, 1);
|
|
33
|
+
return predux_mul(pmul(lo, hi));
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
template <>
|
|
37
|
+
EIGEN_STRONG_INLINE int predux_min(const Packet8i& a) {
|
|
38
|
+
Packet4i lo = _mm256_castsi256_si128(a);
|
|
39
|
+
Packet4i hi = _mm256_extractf128_si256(a, 1);
|
|
40
|
+
return predux_min(pmin(lo, hi));
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
template <>
|
|
44
|
+
EIGEN_STRONG_INLINE int predux_max(const Packet8i& a) {
|
|
45
|
+
Packet4i lo = _mm256_castsi256_si128(a);
|
|
46
|
+
Packet4i hi = _mm256_extractf128_si256(a, 1);
|
|
47
|
+
return predux_max(pmax(lo, hi));
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
template <>
|
|
51
|
+
EIGEN_STRONG_INLINE bool predux_any(const Packet8i& a) {
|
|
52
|
+
#ifdef EIGEN_VECTORIZE_AVX2
|
|
53
|
+
return _mm256_movemask_epi8(a) != 0x0;
|
|
54
|
+
#else
|
|
55
|
+
return _mm256_movemask_ps(_mm256_castsi256_ps(a)) != 0x0;
|
|
56
|
+
#endif
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
/* -- -- -- -- -- -- -- -- -- -- -- -- Packet8ui -- -- -- -- -- -- -- -- -- -- -- -- */
|
|
60
|
+
|
|
61
|
+
template <>
|
|
62
|
+
EIGEN_STRONG_INLINE uint32_t predux(const Packet8ui& a) {
|
|
63
|
+
Packet4ui lo = _mm256_castsi256_si128(a);
|
|
64
|
+
Packet4ui hi = _mm256_extractf128_si256(a, 1);
|
|
65
|
+
return predux(padd(lo, hi));
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
template <>
|
|
69
|
+
EIGEN_STRONG_INLINE uint32_t predux_mul(const Packet8ui& a) {
|
|
70
|
+
Packet4ui lo = _mm256_castsi256_si128(a);
|
|
71
|
+
Packet4ui hi = _mm256_extractf128_si256(a, 1);
|
|
72
|
+
return predux_mul(pmul(lo, hi));
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
template <>
|
|
76
|
+
EIGEN_STRONG_INLINE uint32_t predux_min(const Packet8ui& a) {
|
|
77
|
+
Packet4ui lo = _mm256_castsi256_si128(a);
|
|
78
|
+
Packet4ui hi = _mm256_extractf128_si256(a, 1);
|
|
79
|
+
return predux_min(pmin(lo, hi));
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
template <>
|
|
83
|
+
EIGEN_STRONG_INLINE uint32_t predux_max(const Packet8ui& a) {
|
|
84
|
+
Packet4ui lo = _mm256_castsi256_si128(a);
|
|
85
|
+
Packet4ui hi = _mm256_extractf128_si256(a, 1);
|
|
86
|
+
return predux_max(pmax(lo, hi));
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
template <>
|
|
90
|
+
EIGEN_STRONG_INLINE bool predux_any(const Packet8ui& a) {
|
|
91
|
+
#ifdef EIGEN_VECTORIZE_AVX2
|
|
92
|
+
return _mm256_movemask_epi8(a) != 0x0;
|
|
93
|
+
#else
|
|
94
|
+
return _mm256_movemask_ps(_mm256_castsi256_ps(a)) != 0x0;
|
|
95
|
+
#endif
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
#ifdef EIGEN_VECTORIZE_AVX2
|
|
99
|
+
|
|
100
|
+
/* -- -- -- -- -- -- -- -- -- -- -- -- Packet4l -- -- -- -- -- -- -- -- -- -- -- -- */
|
|
101
|
+
|
|
102
|
+
template <>
|
|
103
|
+
EIGEN_STRONG_INLINE int64_t predux(const Packet4l& a) {
|
|
104
|
+
Packet2l lo = _mm256_castsi256_si128(a);
|
|
105
|
+
Packet2l hi = _mm256_extractf128_si256(a, 1);
|
|
106
|
+
return predux(padd(lo, hi));
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
template <>
|
|
110
|
+
EIGEN_STRONG_INLINE bool predux_any(const Packet4l& a) {
|
|
111
|
+
return _mm256_movemask_pd(_mm256_castsi256_pd(a)) != 0x0;
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
/* -- -- -- -- -- -- -- -- -- -- -- -- Packet4ul -- -- -- -- -- -- -- -- -- -- -- -- */
|
|
115
|
+
|
|
116
|
+
template <>
|
|
117
|
+
EIGEN_STRONG_INLINE uint64_t predux(const Packet4ul& a) {
|
|
118
|
+
return static_cast<uint64_t>(predux(Packet4l(a)));
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
template <>
|
|
122
|
+
EIGEN_STRONG_INLINE bool predux_any(const Packet4ul& a) {
|
|
123
|
+
return _mm256_movemask_pd(_mm256_castsi256_pd(a)) != 0x0;
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
#endif
|
|
127
|
+
|
|
128
|
+
/* -- -- -- -- -- -- -- -- -- -- -- -- Packet8f -- -- -- -- -- -- -- -- -- -- -- -- */
|
|
129
|
+
|
|
130
|
+
template <>
|
|
131
|
+
EIGEN_STRONG_INLINE float predux(const Packet8f& a) {
|
|
132
|
+
Packet4f lo = _mm256_castps256_ps128(a);
|
|
133
|
+
Packet4f hi = _mm256_extractf128_ps(a, 1);
|
|
134
|
+
return predux(padd(lo, hi));
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
template <>
|
|
138
|
+
EIGEN_STRONG_INLINE float predux_mul(const Packet8f& a) {
|
|
139
|
+
Packet4f lo = _mm256_castps256_ps128(a);
|
|
140
|
+
Packet4f hi = _mm256_extractf128_ps(a, 1);
|
|
141
|
+
return predux_mul(pmul(lo, hi));
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
template <>
|
|
145
|
+
EIGEN_STRONG_INLINE float predux_min(const Packet8f& a) {
|
|
146
|
+
Packet4f lo = _mm256_castps256_ps128(a);
|
|
147
|
+
Packet4f hi = _mm256_extractf128_ps(a, 1);
|
|
148
|
+
return predux_min(pmin(lo, hi));
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
template <>
|
|
152
|
+
EIGEN_STRONG_INLINE float predux_min<PropagateNumbers>(const Packet8f& a) {
|
|
153
|
+
Packet4f lo = _mm256_castps256_ps128(a);
|
|
154
|
+
Packet4f hi = _mm256_extractf128_ps(a, 1);
|
|
155
|
+
return predux_min<PropagateNumbers>(pmin<PropagateNumbers>(lo, hi));
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
template <>
|
|
159
|
+
EIGEN_STRONG_INLINE float predux_min<PropagateNaN>(const Packet8f& a) {
|
|
160
|
+
Packet4f lo = _mm256_castps256_ps128(a);
|
|
161
|
+
Packet4f hi = _mm256_extractf128_ps(a, 1);
|
|
162
|
+
return predux_min<PropagateNaN>(pmin<PropagateNaN>(lo, hi));
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
template <>
|
|
166
|
+
EIGEN_STRONG_INLINE float predux_max(const Packet8f& a) {
|
|
167
|
+
Packet4f lo = _mm256_castps256_ps128(a);
|
|
168
|
+
Packet4f hi = _mm256_extractf128_ps(a, 1);
|
|
169
|
+
return predux_max(pmax(lo, hi));
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
template <>
|
|
173
|
+
EIGEN_STRONG_INLINE float predux_max<PropagateNumbers>(const Packet8f& a) {
|
|
174
|
+
Packet4f lo = _mm256_castps256_ps128(a);
|
|
175
|
+
Packet4f hi = _mm256_extractf128_ps(a, 1);
|
|
176
|
+
return predux_max<PropagateNumbers>(pmax<PropagateNumbers>(lo, hi));
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
template <>
|
|
180
|
+
EIGEN_STRONG_INLINE float predux_max<PropagateNaN>(const Packet8f& a) {
|
|
181
|
+
Packet4f lo = _mm256_castps256_ps128(a);
|
|
182
|
+
Packet4f hi = _mm256_extractf128_ps(a, 1);
|
|
183
|
+
return predux_max<PropagateNaN>(pmax<PropagateNaN>(lo, hi));
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
template <>
|
|
187
|
+
EIGEN_STRONG_INLINE bool predux_any(const Packet8f& a) {
|
|
188
|
+
return _mm256_movemask_ps(a) != 0x0;
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
/* -- -- -- -- -- -- -- -- -- -- -- -- Packet4d -- -- -- -- -- -- -- -- -- -- -- -- */
|
|
192
|
+
|
|
193
|
+
template <>
|
|
194
|
+
EIGEN_STRONG_INLINE double predux(const Packet4d& a) {
|
|
195
|
+
Packet2d lo = _mm256_castpd256_pd128(a);
|
|
196
|
+
Packet2d hi = _mm256_extractf128_pd(a, 1);
|
|
197
|
+
return predux(padd(lo, hi));
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
template <>
|
|
201
|
+
EIGEN_STRONG_INLINE double predux_mul(const Packet4d& a) {
|
|
202
|
+
Packet2d lo = _mm256_castpd256_pd128(a);
|
|
203
|
+
Packet2d hi = _mm256_extractf128_pd(a, 1);
|
|
204
|
+
return predux_mul(pmul(lo, hi));
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
template <>
|
|
208
|
+
EIGEN_STRONG_INLINE double predux_min(const Packet4d& a) {
|
|
209
|
+
Packet2d lo = _mm256_castpd256_pd128(a);
|
|
210
|
+
Packet2d hi = _mm256_extractf128_pd(a, 1);
|
|
211
|
+
return predux_min(pmin(lo, hi));
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
template <>
|
|
215
|
+
EIGEN_STRONG_INLINE double predux_min<PropagateNumbers>(const Packet4d& a) {
|
|
216
|
+
Packet2d lo = _mm256_castpd256_pd128(a);
|
|
217
|
+
Packet2d hi = _mm256_extractf128_pd(a, 1);
|
|
218
|
+
return predux_min<PropagateNumbers>(pmin<PropagateNumbers>(lo, hi));
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
template <>
|
|
222
|
+
EIGEN_STRONG_INLINE double predux_min<PropagateNaN>(const Packet4d& a) {
|
|
223
|
+
Packet2d lo = _mm256_castpd256_pd128(a);
|
|
224
|
+
Packet2d hi = _mm256_extractf128_pd(a, 1);
|
|
225
|
+
return predux_min<PropagateNaN>(pmin<PropagateNaN>(lo, hi));
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
template <>
|
|
229
|
+
EIGEN_STRONG_INLINE double predux_max(const Packet4d& a) {
|
|
230
|
+
Packet2d lo = _mm256_castpd256_pd128(a);
|
|
231
|
+
Packet2d hi = _mm256_extractf128_pd(a, 1);
|
|
232
|
+
return predux_max(pmax(lo, hi));
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
template <>
|
|
236
|
+
EIGEN_STRONG_INLINE double predux_max<PropagateNumbers>(const Packet4d& a) {
|
|
237
|
+
Packet2d lo = _mm256_castpd256_pd128(a);
|
|
238
|
+
Packet2d hi = _mm256_extractf128_pd(a, 1);
|
|
239
|
+
return predux_max<PropagateNumbers>(pmax<PropagateNumbers>(lo, hi));
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
template <>
|
|
243
|
+
EIGEN_STRONG_INLINE double predux_max<PropagateNaN>(const Packet4d& a) {
|
|
244
|
+
Packet2d lo = _mm256_castpd256_pd128(a);
|
|
245
|
+
Packet2d hi = _mm256_extractf128_pd(a, 1);
|
|
246
|
+
return predux_max<PropagateNaN>(pmax<PropagateNaN>(lo, hi));
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
template <>
|
|
250
|
+
EIGEN_STRONG_INLINE bool predux_any(const Packet4d& a) {
|
|
251
|
+
return _mm256_movemask_pd(a) != 0x0;
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
/* -- -- -- -- -- -- -- -- -- -- -- -- Packet8h -- -- -- -- -- -- -- -- -- -- -- -- */
|
|
255
|
+
#ifndef EIGEN_VECTORIZE_AVX512FP16
|
|
256
|
+
|
|
257
|
+
template <>
|
|
258
|
+
EIGEN_STRONG_INLINE half predux(const Packet8h& a) {
|
|
259
|
+
return static_cast<half>(predux(half2float(a)));
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
template <>
|
|
263
|
+
EIGEN_STRONG_INLINE half predux_mul(const Packet8h& a) {
|
|
264
|
+
return static_cast<half>(predux_mul(half2float(a)));
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
template <>
|
|
268
|
+
EIGEN_STRONG_INLINE half predux_min(const Packet8h& a) {
|
|
269
|
+
return static_cast<half>(predux_min(half2float(a)));
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
template <>
|
|
273
|
+
EIGEN_STRONG_INLINE half predux_min<PropagateNumbers>(const Packet8h& a) {
|
|
274
|
+
return static_cast<half>(predux_min<PropagateNumbers>(half2float(a)));
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
template <>
|
|
278
|
+
EIGEN_STRONG_INLINE half predux_min<PropagateNaN>(const Packet8h& a) {
|
|
279
|
+
return static_cast<half>(predux_min<PropagateNaN>(half2float(a)));
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
template <>
|
|
283
|
+
EIGEN_STRONG_INLINE half predux_max(const Packet8h& a) {
|
|
284
|
+
return static_cast<half>(predux_max(half2float(a)));
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
template <>
|
|
288
|
+
EIGEN_STRONG_INLINE half predux_max<PropagateNumbers>(const Packet8h& a) {
|
|
289
|
+
return static_cast<half>(predux_max<PropagateNumbers>(half2float(a)));
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
template <>
|
|
293
|
+
EIGEN_STRONG_INLINE half predux_max<PropagateNaN>(const Packet8h& a) {
|
|
294
|
+
return static_cast<half>(predux_max<PropagateNaN>(half2float(a)));
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
template <>
|
|
298
|
+
EIGEN_STRONG_INLINE bool predux_any(const Packet8h& a) {
|
|
299
|
+
return _mm_movemask_epi8(a) != 0;
|
|
300
|
+
}
|
|
301
|
+
#endif // EIGEN_VECTORIZE_AVX512FP16
|
|
302
|
+
|
|
303
|
+
/* -- -- -- -- -- -- -- -- -- -- -- -- Packet8bf -- -- -- -- -- -- -- -- -- -- -- -- */
|
|
304
|
+
|
|
305
|
+
template <>
|
|
306
|
+
EIGEN_STRONG_INLINE bfloat16 predux(const Packet8bf& a) {
|
|
307
|
+
return static_cast<bfloat16>(predux<Packet8f>(Bf16ToF32(a)));
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
template <>
|
|
311
|
+
EIGEN_STRONG_INLINE bfloat16 predux_mul(const Packet8bf& a) {
|
|
312
|
+
return static_cast<bfloat16>(predux_mul<Packet8f>(Bf16ToF32(a)));
|
|
313
|
+
}
|
|
314
|
+
|
|
315
|
+
template <>
|
|
316
|
+
EIGEN_STRONG_INLINE bfloat16 predux_min(const Packet8bf& a) {
|
|
317
|
+
return static_cast<bfloat16>(predux_min(Bf16ToF32(a)));
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
template <>
|
|
321
|
+
EIGEN_STRONG_INLINE bfloat16 predux_min<PropagateNumbers>(const Packet8bf& a) {
|
|
322
|
+
return static_cast<bfloat16>(predux_min<PropagateNumbers>(Bf16ToF32(a)));
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
template <>
|
|
326
|
+
EIGEN_STRONG_INLINE bfloat16 predux_min<PropagateNaN>(const Packet8bf& a) {
|
|
327
|
+
return static_cast<bfloat16>(predux_min<PropagateNaN>(Bf16ToF32(a)));
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
template <>
|
|
331
|
+
EIGEN_STRONG_INLINE bfloat16 predux_max(const Packet8bf& a) {
|
|
332
|
+
return static_cast<bfloat16>(predux_max<Packet8f>(Bf16ToF32(a)));
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
template <>
|
|
336
|
+
EIGEN_STRONG_INLINE bfloat16 predux_max<PropagateNumbers>(const Packet8bf& a) {
|
|
337
|
+
return static_cast<bfloat16>(predux_max<PropagateNumbers>(Bf16ToF32(a)));
|
|
338
|
+
}
|
|
339
|
+
|
|
340
|
+
template <>
|
|
341
|
+
EIGEN_STRONG_INLINE bfloat16 predux_max<PropagateNaN>(const Packet8bf& a) {
|
|
342
|
+
return static_cast<bfloat16>(predux_max<PropagateNaN>(Bf16ToF32(a)));
|
|
343
|
+
}
|
|
344
|
+
|
|
345
|
+
template <>
|
|
346
|
+
EIGEN_STRONG_INLINE bool predux_any(const Packet8bf& a) {
|
|
347
|
+
return _mm_movemask_epi8(a) != 0;
|
|
348
|
+
}
|
|
349
|
+
|
|
350
|
+
} // end namespace internal
|
|
351
|
+
} // end namespace Eigen
|
|
352
|
+
|
|
353
|
+
#endif // EIGEN_REDUCTIONS_AVX_H
|
|
@@ -10,42 +10,299 @@
|
|
|
10
10
|
#ifndef EIGEN_TYPE_CASTING_AVX_H
|
|
11
11
|
#define EIGEN_TYPE_CASTING_AVX_H
|
|
12
12
|
|
|
13
|
+
// IWYU pragma: private
|
|
14
|
+
#include "../../InternalHeaderCheck.h"
|
|
15
|
+
|
|
13
16
|
namespace Eigen {
|
|
14
17
|
|
|
15
18
|
namespace internal {
|
|
16
19
|
|
|
17
|
-
|
|
18
|
-
|
|
20
|
+
#ifndef EIGEN_VECTORIZE_AVX512
|
|
21
|
+
template <>
|
|
22
|
+
struct type_casting_traits<float, bool> : vectorized_type_casting_traits<float, bool> {};
|
|
19
23
|
template <>
|
|
20
|
-
struct type_casting_traits<float,
|
|
21
|
-
enum {
|
|
22
|
-
VectorizedCast = 0,
|
|
23
|
-
SrcCoeffRatio = 1,
|
|
24
|
-
TgtCoeffRatio = 1
|
|
25
|
-
};
|
|
26
|
-
};
|
|
24
|
+
struct type_casting_traits<bool, float> : vectorized_type_casting_traits<bool, float> {};
|
|
27
25
|
|
|
28
26
|
template <>
|
|
29
|
-
struct type_casting_traits<int,
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
SrcCoeffRatio = 1,
|
|
33
|
-
TgtCoeffRatio = 1
|
|
34
|
-
};
|
|
35
|
-
};
|
|
27
|
+
struct type_casting_traits<float, int> : vectorized_type_casting_traits<float, int> {};
|
|
28
|
+
template <>
|
|
29
|
+
struct type_casting_traits<int, float> : vectorized_type_casting_traits<int, float> {};
|
|
36
30
|
|
|
31
|
+
template <>
|
|
32
|
+
struct type_casting_traits<float, double> : vectorized_type_casting_traits<float, double> {};
|
|
33
|
+
template <>
|
|
34
|
+
struct type_casting_traits<double, float> : vectorized_type_casting_traits<double, float> {};
|
|
37
35
|
|
|
36
|
+
template <>
|
|
37
|
+
struct type_casting_traits<double, int> : vectorized_type_casting_traits<double, int> {};
|
|
38
|
+
template <>
|
|
39
|
+
struct type_casting_traits<int, double> : vectorized_type_casting_traits<int, double> {};
|
|
38
40
|
|
|
39
|
-
template<>
|
|
40
|
-
|
|
41
|
+
template <>
|
|
42
|
+
struct type_casting_traits<half, float> : vectorized_type_casting_traits<half, float> {};
|
|
43
|
+
template <>
|
|
44
|
+
struct type_casting_traits<float, half> : vectorized_type_casting_traits<float, half> {};
|
|
45
|
+
|
|
46
|
+
template <>
|
|
47
|
+
struct type_casting_traits<bfloat16, float> : vectorized_type_casting_traits<bfloat16, float> {};
|
|
48
|
+
template <>
|
|
49
|
+
struct type_casting_traits<float, bfloat16> : vectorized_type_casting_traits<float, bfloat16> {};
|
|
50
|
+
|
|
51
|
+
#ifdef EIGEN_VECTORIZE_AVX2
|
|
52
|
+
template <>
|
|
53
|
+
struct type_casting_traits<double, int64_t> : vectorized_type_casting_traits<double, int64_t> {};
|
|
54
|
+
template <>
|
|
55
|
+
struct type_casting_traits<int64_t, double> : vectorized_type_casting_traits<int64_t, double> {};
|
|
56
|
+
#endif
|
|
57
|
+
#endif
|
|
58
|
+
|
|
59
|
+
template <>
|
|
60
|
+
EIGEN_STRONG_INLINE Packet16b pcast<Packet8f, Packet16b>(const Packet8f& a, const Packet8f& b) {
|
|
61
|
+
__m256 nonzero_a = _mm256_cmp_ps(a, pzero(a), _CMP_NEQ_UQ);
|
|
62
|
+
__m256 nonzero_b = _mm256_cmp_ps(b, pzero(b), _CMP_NEQ_UQ);
|
|
63
|
+
constexpr char kFF = '\255';
|
|
64
|
+
#ifndef EIGEN_VECTORIZE_AVX2
|
|
65
|
+
__m128i shuffle_mask128_a_lo = _mm_set_epi8(kFF, kFF, kFF, kFF, kFF, kFF, kFF, kFF, kFF, kFF, kFF, kFF, 12, 8, 4, 0);
|
|
66
|
+
__m128i shuffle_mask128_a_hi = _mm_set_epi8(kFF, kFF, kFF, kFF, kFF, kFF, kFF, kFF, 12, 8, 4, 0, kFF, kFF, kFF, kFF);
|
|
67
|
+
__m128i shuffle_mask128_b_lo = _mm_set_epi8(kFF, kFF, kFF, kFF, 12, 8, 4, 0, kFF, kFF, kFF, kFF, kFF, kFF, kFF, kFF);
|
|
68
|
+
__m128i shuffle_mask128_b_hi = _mm_set_epi8(12, 8, 4, 0, kFF, kFF, kFF, kFF, kFF, kFF, kFF, kFF, kFF, kFF, kFF, kFF);
|
|
69
|
+
__m128i a_hi = _mm_shuffle_epi8(_mm256_extractf128_si256(_mm256_castps_si256(nonzero_a), 1), shuffle_mask128_a_hi);
|
|
70
|
+
__m128i a_lo = _mm_shuffle_epi8(_mm256_extractf128_si256(_mm256_castps_si256(nonzero_a), 0), shuffle_mask128_a_lo);
|
|
71
|
+
__m128i b_hi = _mm_shuffle_epi8(_mm256_extractf128_si256(_mm256_castps_si256(nonzero_b), 1), shuffle_mask128_b_hi);
|
|
72
|
+
__m128i b_lo = _mm_shuffle_epi8(_mm256_extractf128_si256(_mm256_castps_si256(nonzero_b), 0), shuffle_mask128_b_lo);
|
|
73
|
+
__m128i merged = _mm_or_si128(_mm_or_si128(b_lo, b_hi), _mm_or_si128(a_lo, a_hi));
|
|
74
|
+
return _mm_and_si128(merged, _mm_set1_epi8(1));
|
|
75
|
+
#else
|
|
76
|
+
__m256i a_shuffle_mask = _mm256_set_epi8(kFF, kFF, kFF, kFF, kFF, kFF, kFF, kFF, 12, 8, 4, 0, kFF, kFF, kFF, kFF, kFF,
|
|
77
|
+
kFF, kFF, kFF, kFF, kFF, kFF, kFF, kFF, kFF, kFF, kFF, 12, 8, 4, 0);
|
|
78
|
+
__m256i b_shuffle_mask = _mm256_set_epi8(12, 8, 4, 0, kFF, kFF, kFF, kFF, kFF, kFF, kFF, kFF, kFF, kFF, kFF, kFF, kFF,
|
|
79
|
+
kFF, kFF, kFF, 12, 8, 4, 0, kFF, kFF, kFF, kFF, kFF, kFF, kFF, kFF);
|
|
80
|
+
__m256i a_shuff = _mm256_shuffle_epi8(_mm256_castps_si256(nonzero_a), a_shuffle_mask);
|
|
81
|
+
__m256i b_shuff = _mm256_shuffle_epi8(_mm256_castps_si256(nonzero_b), b_shuffle_mask);
|
|
82
|
+
__m256i a_or_b = _mm256_or_si256(a_shuff, b_shuff);
|
|
83
|
+
__m256i merged = _mm256_or_si256(a_or_b, _mm256_castsi128_si256(_mm256_extractf128_si256(a_or_b, 1)));
|
|
84
|
+
return _mm256_castsi256_si128(_mm256_and_si256(merged, _mm256_set1_epi8(1)));
|
|
85
|
+
#endif
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
template <>
|
|
89
|
+
EIGEN_STRONG_INLINE Packet8f pcast<Packet16b, Packet8f>(const Packet16b& a) {
|
|
90
|
+
const __m256 cst_one = _mm256_set1_ps(1.0f);
|
|
91
|
+
#ifdef EIGEN_VECTORIZE_AVX2
|
|
92
|
+
__m256i a_extended = _mm256_cvtepi8_epi32(a);
|
|
93
|
+
__m256i abcd_efgh = _mm256_cmpeq_epi32(a_extended, _mm256_setzero_si256());
|
|
94
|
+
#else
|
|
95
|
+
__m128i abcd_efhg_ijkl_mnop = _mm_cmpeq_epi8(a, _mm_setzero_si128());
|
|
96
|
+
__m128i aabb_ccdd_eeff_gghh = _mm_unpacklo_epi8(abcd_efhg_ijkl_mnop, abcd_efhg_ijkl_mnop);
|
|
97
|
+
__m128i aaaa_bbbb_cccc_dddd = _mm_unpacklo_epi8(aabb_ccdd_eeff_gghh, aabb_ccdd_eeff_gghh);
|
|
98
|
+
__m128i eeee_ffff_gggg_hhhh = _mm_unpackhi_epi8(aabb_ccdd_eeff_gghh, aabb_ccdd_eeff_gghh);
|
|
99
|
+
__m256i abcd_efgh = _mm256_setr_m128i(aaaa_bbbb_cccc_dddd, eeee_ffff_gggg_hhhh);
|
|
100
|
+
#endif
|
|
101
|
+
__m256 result = _mm256_andnot_ps(_mm256_castsi256_ps(abcd_efgh), cst_one);
|
|
102
|
+
return result;
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
template <>
|
|
106
|
+
EIGEN_STRONG_INLINE Packet8i pcast<Packet8f, Packet8i>(const Packet8f& a) {
|
|
107
|
+
return _mm256_cvttps_epi32(a);
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
template <>
|
|
111
|
+
EIGEN_STRONG_INLINE Packet8i pcast<Packet4d, Packet8i>(const Packet4d& a, const Packet4d& b) {
|
|
112
|
+
return _mm256_set_m128i(_mm256_cvttpd_epi32(b), _mm256_cvttpd_epi32(a));
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
template <>
|
|
116
|
+
EIGEN_STRONG_INLINE Packet4i pcast<Packet4d, Packet4i>(const Packet4d& a) {
|
|
117
|
+
return _mm256_cvttpd_epi32(a);
|
|
41
118
|
}
|
|
42
119
|
|
|
43
|
-
template<>
|
|
120
|
+
template <>
|
|
121
|
+
EIGEN_STRONG_INLINE Packet8f pcast<Packet8i, Packet8f>(const Packet8i& a) {
|
|
44
122
|
return _mm256_cvtepi32_ps(a);
|
|
45
123
|
}
|
|
46
124
|
|
|
47
|
-
|
|
125
|
+
template <>
|
|
126
|
+
EIGEN_STRONG_INLINE Packet8f pcast<Packet4d, Packet8f>(const Packet4d& a, const Packet4d& b) {
|
|
127
|
+
return _mm256_set_m128(_mm256_cvtpd_ps(b), _mm256_cvtpd_ps(a));
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
template <>
|
|
131
|
+
EIGEN_STRONG_INLINE Packet4f pcast<Packet4d, Packet4f>(const Packet4d& a) {
|
|
132
|
+
return _mm256_cvtpd_ps(a);
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
template <>
|
|
136
|
+
EIGEN_STRONG_INLINE Packet4d pcast<Packet8i, Packet4d>(const Packet8i& a) {
|
|
137
|
+
return _mm256_cvtepi32_pd(_mm256_castsi256_si128(a));
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
template <>
|
|
141
|
+
EIGEN_STRONG_INLINE Packet4d pcast<Packet4i, Packet4d>(const Packet4i& a) {
|
|
142
|
+
return _mm256_cvtepi32_pd(a);
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
template <>
|
|
146
|
+
EIGEN_STRONG_INLINE Packet4d pcast<Packet8f, Packet4d>(const Packet8f& a) {
|
|
147
|
+
return _mm256_cvtps_pd(_mm256_castps256_ps128(a));
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
template <>
|
|
151
|
+
EIGEN_STRONG_INLINE Packet4d pcast<Packet4f, Packet4d>(const Packet4f& a) {
|
|
152
|
+
return _mm256_cvtps_pd(a);
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
template <>
|
|
156
|
+
EIGEN_STRONG_INLINE Packet8i preinterpret<Packet8i, Packet8f>(const Packet8f& a) {
|
|
157
|
+
return _mm256_castps_si256(a);
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
template <>
|
|
161
|
+
EIGEN_STRONG_INLINE Packet8f preinterpret<Packet8f, Packet8i>(const Packet8i& a) {
|
|
162
|
+
return _mm256_castsi256_ps(a);
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
template <>
|
|
166
|
+
EIGEN_STRONG_INLINE Packet8ui preinterpret<Packet8ui, Packet8i>(const Packet8i& a) {
|
|
167
|
+
return Packet8ui(a);
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
template <>
|
|
171
|
+
EIGEN_STRONG_INLINE Packet8i preinterpret<Packet8i, Packet8ui>(const Packet8ui& a) {
|
|
172
|
+
return Packet8i(a);
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
// truncation operations
|
|
176
|
+
|
|
177
|
+
template <>
|
|
178
|
+
EIGEN_STRONG_INLINE Packet4f preinterpret<Packet4f, Packet8f>(const Packet8f& a) {
|
|
179
|
+
return _mm256_castps256_ps128(a);
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
template <>
|
|
183
|
+
EIGEN_STRONG_INLINE Packet2d preinterpret<Packet2d, Packet4d>(const Packet4d& a) {
|
|
184
|
+
return _mm256_castpd256_pd128(a);
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
template <>
|
|
188
|
+
EIGEN_STRONG_INLINE Packet4i preinterpret<Packet4i, Packet8i>(const Packet8i& a) {
|
|
189
|
+
return _mm256_castsi256_si128(a);
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
template <>
|
|
193
|
+
EIGEN_STRONG_INLINE Packet4ui preinterpret<Packet4ui, Packet8ui>(const Packet8ui& a) {
|
|
194
|
+
return _mm256_castsi256_si128(a);
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
#ifdef EIGEN_VECTORIZE_AVX2
|
|
198
|
+
template <>
|
|
199
|
+
EIGEN_STRONG_INLINE Packet4l pcast<Packet4d, Packet4l>(const Packet4d& a) {
|
|
200
|
+
#if defined(EIGEN_VECTORIZE_AVX512DQ) && defined(EIGEN_VECTORIZE_AVS512VL)
|
|
201
|
+
return _mm256_cvttpd_epi64(a);
|
|
202
|
+
#else
|
|
203
|
+
|
|
204
|
+
// if 'a' exceeds the numerical limits of int64_t, the behavior is undefined
|
|
205
|
+
|
|
206
|
+
// e <= 0 corresponds to |a| < 1, which should result in zero. incidentally, intel intrinsics with shift arguments
|
|
207
|
+
// greater than or equal to 64 produce zero. furthermore, negative shifts appear to be interpreted as large positive
|
|
208
|
+
// shifts (two's complement), which also result in zero. therefore, e does not need to be clamped to [0, 64)
|
|
209
|
+
|
|
210
|
+
constexpr int kTotalBits = sizeof(double) * CHAR_BIT, kMantissaBits = std::numeric_limits<double>::digits - 1,
|
|
211
|
+
kExponentBits = kTotalBits - kMantissaBits - 1, kBias = (1 << (kExponentBits - 1)) - 1;
|
|
212
|
+
|
|
213
|
+
const __m256i cst_one = _mm256_set1_epi64x(1);
|
|
214
|
+
const __m256i cst_total_bits = _mm256_set1_epi64x(kTotalBits);
|
|
215
|
+
const __m256i cst_bias = _mm256_set1_epi64x(kBias);
|
|
216
|
+
|
|
217
|
+
__m256i a_bits = _mm256_castpd_si256(a);
|
|
218
|
+
// shift left by 1 to clear the sign bit, and shift right by kMantissaBits + 1 to recover biased exponent
|
|
219
|
+
__m256i biased_e = _mm256_srli_epi64(_mm256_slli_epi64(a_bits, 1), kMantissaBits + 1);
|
|
220
|
+
__m256i e = _mm256_sub_epi64(biased_e, cst_bias);
|
|
221
|
+
|
|
222
|
+
// shift to the left by kExponentBits + 1 to clear the sign and exponent bits
|
|
223
|
+
__m256i shifted_mantissa = _mm256_slli_epi64(a_bits, kExponentBits + 1);
|
|
224
|
+
// shift to the right by kTotalBits - e to convert the significand to an integer
|
|
225
|
+
__m256i result_significand = _mm256_srlv_epi64(shifted_mantissa, _mm256_sub_epi64(cst_total_bits, e));
|
|
226
|
+
|
|
227
|
+
// add the implied bit
|
|
228
|
+
__m256i result_exponent = _mm256_sllv_epi64(cst_one, e);
|
|
229
|
+
// e <= 0 is interpreted as a large positive shift (2's complement), which also conveniently results in zero
|
|
230
|
+
__m256i result = _mm256_add_epi64(result_significand, result_exponent);
|
|
231
|
+
// handle negative arguments
|
|
232
|
+
__m256i sign_mask = _mm256_cmpgt_epi64(_mm256_setzero_si256(), a_bits);
|
|
233
|
+
result = _mm256_sub_epi64(_mm256_xor_si256(result, sign_mask), sign_mask);
|
|
234
|
+
return result;
|
|
235
|
+
#endif
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
template <>
|
|
239
|
+
EIGEN_STRONG_INLINE Packet4d pcast<Packet4l, Packet4d>(const Packet4l& a) {
|
|
240
|
+
#if defined(EIGEN_VECTORIZE_AVX512DQ) && defined(EIGEN_VECTORIZE_AVS512VL)
|
|
241
|
+
return _mm256_cvtepi64_pd(a);
|
|
242
|
+
#else
|
|
243
|
+
int64_t aux[4];
|
|
244
|
+
pstoreu(aux, a);
|
|
245
|
+
return _mm256_set_pd(static_cast<double>(aux[3]), static_cast<double>(aux[2]), static_cast<double>(aux[1]),
|
|
246
|
+
static_cast<double>(aux[0]));
|
|
247
|
+
#endif
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
template <>
|
|
251
|
+
EIGEN_STRONG_INLINE Packet4d pcast<Packet2l, Packet4d>(const Packet2l& a, const Packet2l& b) {
|
|
252
|
+
return _mm256_set_m128d((pcast<Packet2l, Packet2d>(b)), (pcast<Packet2l, Packet2d>(a)));
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
template <>
|
|
256
|
+
EIGEN_STRONG_INLINE Packet4ul preinterpret<Packet4ul, Packet4l>(const Packet4l& a) {
|
|
257
|
+
return Packet4ul(a);
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
template <>
|
|
261
|
+
EIGEN_STRONG_INLINE Packet4l preinterpret<Packet4l, Packet4ul>(const Packet4ul& a) {
|
|
262
|
+
return Packet4l(a);
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
template <>
|
|
266
|
+
EIGEN_STRONG_INLINE Packet4l preinterpret<Packet4l, Packet4d>(const Packet4d& a) {
|
|
267
|
+
return _mm256_castpd_si256(a);
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
template <>
|
|
271
|
+
EIGEN_STRONG_INLINE Packet4d preinterpret<Packet4d, Packet4l>(const Packet4l& a) {
|
|
272
|
+
return _mm256_castsi256_pd(a);
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
// truncation operations
|
|
276
|
+
template <>
|
|
277
|
+
EIGEN_STRONG_INLINE Packet2l preinterpret<Packet2l, Packet4l>(const Packet4l& a) {
|
|
278
|
+
return _mm256_castsi256_si128(a);
|
|
279
|
+
}
|
|
280
|
+
#endif
|
|
281
|
+
|
|
282
|
+
#ifndef EIGEN_VECTORIZE_AVX512FP16
|
|
283
|
+
template <>
|
|
284
|
+
EIGEN_STRONG_INLINE Packet8f pcast<Packet8h, Packet8f>(const Packet8h& a) {
|
|
285
|
+
return half2float(a);
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
template <>
|
|
289
|
+
EIGEN_STRONG_INLINE Packet8h pcast<Packet8f, Packet8h>(const Packet8f& a) {
|
|
290
|
+
return float2half(a);
|
|
291
|
+
}
|
|
292
|
+
#endif
|
|
293
|
+
|
|
294
|
+
template <>
|
|
295
|
+
EIGEN_STRONG_INLINE Packet8f pcast<Packet8bf, Packet8f>(const Packet8bf& a) {
|
|
296
|
+
return Bf16ToF32(a);
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
template <>
|
|
300
|
+
EIGEN_STRONG_INLINE Packet8bf pcast<Packet8f, Packet8bf>(const Packet8f& a) {
|
|
301
|
+
return F32ToBf16(a);
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
} // end namespace internal
|
|
48
305
|
|
|
49
|
-
}
|
|
306
|
+
} // end namespace Eigen
|
|
50
307
|
|
|
51
|
-
#endif
|
|
308
|
+
#endif // EIGEN_TYPE_CASTING_AVX_H
|