casadi 3.7.2__cp314-none-manylinux2014_i686.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- casadi/__init__.py +92 -0
- casadi/_casadi.so +0 -0
- casadi/casadi-cli +0 -0
- casadi/casadi.py +54468 -0
- casadi/cbc +0 -0
- casadi/clp +0 -0
- casadi/cmake/alpaqa/alpaqaConfig.cmake +24 -0
- casadi/cmake/alpaqa/alpaqaConfigVersion.cmake +65 -0
- casadi/cmake/alpaqa/alpaqaTargets-release.cmake +29 -0
- casadi/cmake/alpaqa/alpaqaTargets.cmake +131 -0
- casadi/cmake/casadi-config-version.cmake +11 -0
- casadi/cmake/casadi-config.cmake +8 -0
- casadi/cmake/casadi-targets-release.cmake +19 -0
- casadi/cmake/casadi-targets.cmake +107 -0
- casadi/cmake/ghc_filesystem/ghc_filesystem-config-version.cmake +85 -0
- casadi/cmake/ghc_filesystem/ghc_filesystem-config.cmake +30 -0
- casadi/cmake/ghc_filesystem/ghc_filesystem-targets.cmake +107 -0
- casadi/cmake/highs/highs-config.cmake +17 -0
- casadi/cmake/highs/highs-targets-release.cmake +28 -0
- casadi/cmake/highs/highs-targets.cmake +117 -0
- casadi/cmake/libzip/libzip-config-version.cmake +43 -0
- casadi/cmake/libzip/libzip-config.cmake +69 -0
- casadi/cmake/libzip/libzip-targets-release.cmake +19 -0
- casadi/cmake/libzip/libzip-targets.cmake +107 -0
- casadi/cmake/libzip/modules/FindMbedTLS.cmake +141 -0
- casadi/cmake/libzip/modules/FindNettle.cmake +141 -0
- casadi/cmake/libzip/modules/Findzstd.cmake +186 -0
- casadi/cmake/osqp/osqp-config.cmake +1 -0
- casadi/cmake/osqp/osqp-targets-noconfig.cmake +29 -0
- casadi/cmake/osqp/osqp-targets.cmake +113 -0
- casadi/cmake/proxsuite/find-external/Simde/FindSimde.cmake +39 -0
- casadi/cmake/proxsuite/proxsuiteConfig.cmake +177 -0
- casadi/cmake/proxsuite/proxsuiteConfigVersion.cmake +65 -0
- casadi/cmake/proxsuite/proxsuiteTargets.cmake +115 -0
- casadi/cmake/qdldl/qdldl-config.cmake +1 -0
- casadi/cmake/qdldl/qdldl-targets-noconfig.cmake +29 -0
- casadi/cmake/qdldl/qdldl-targets.cmake +113 -0
- casadi/cmake/sleqp/sleqp-config-version.cmake +65 -0
- casadi/cmake/sleqp/sleqp-config.cmake +1 -0
- casadi/cmake/sleqp/sleqp-targets-release.cmake +20 -0
- casadi/cmake/sleqp/sleqp-targets.cmake +106 -0
- casadi/cmake/trlib/trlib-config-release.cmake +19 -0
- casadi/cmake/trlib/trlib-config-version.cmake +83 -0
- casadi/cmake/trlib/trlib-config.cmake +107 -0
- casadi/highs +0 -0
- casadi/include/casadi/casadi.hpp +31 -0
- casadi/include/casadi/casadi.i +4920 -0
- casadi/include/casadi/casadi_c.h +138 -0
- casadi/include/casadi/casadi_numpy.hpp +97 -0
- casadi/include/casadi/config.h +46 -0
- casadi/include/casadi/core/archiver.hpp +58 -0
- casadi/include/casadi/core/blazing_spline.hpp +47 -0
- casadi/include/casadi/core/calculus.hpp +1805 -0
- casadi/include/casadi/core/callback.hpp +235 -0
- casadi/include/casadi/core/casadi_common.hpp +355 -0
- casadi/include/casadi/core/casadi_enum.hpp +90 -0
- casadi/include/casadi/core/casadi_export.h +43 -0
- casadi/include/casadi/core/casadi_interrupt.hpp +83 -0
- casadi/include/casadi/core/casadi_limits.hpp +104 -0
- casadi/include/casadi/core/casadi_logger.hpp +134 -0
- casadi/include/casadi/core/casadi_meta.hpp +122 -0
- casadi/include/casadi/core/casadi_misc.hpp +1022 -0
- casadi/include/casadi/core/casadi_types.hpp +66 -0
- casadi/include/casadi/core/code_generator.hpp +1071 -0
- casadi/include/casadi/core/conic.hpp +213 -0
- casadi/include/casadi/core/core.hpp +75 -0
- casadi/include/casadi/core/dae_builder.hpp +885 -0
- casadi/include/casadi/core/dm.hpp +90 -0
- casadi/include/casadi/core/dm_fwd.hpp +39 -0
- casadi/include/casadi/core/dple.hpp +138 -0
- casadi/include/casadi/core/exception.hpp +167 -0
- casadi/include/casadi/core/expm.hpp +84 -0
- casadi/include/casadi/core/external.hpp +70 -0
- casadi/include/casadi/core/filesystem.hpp +58 -0
- casadi/include/casadi/core/fmu.hpp +270 -0
- casadi/include/casadi/core/function.hpp +1389 -0
- casadi/include/casadi/core/generic_expression.hpp +760 -0
- casadi/include/casadi/core/generic_matrix.hpp +1805 -0
- casadi/include/casadi/core/generic_shared.hpp +395 -0
- casadi/include/casadi/core/generic_shared_impl.hpp +218 -0
- casadi/include/casadi/core/generic_shared_internal.hpp +215 -0
- casadi/include/casadi/core/generic_type.hpp +314 -0
- casadi/include/casadi/core/global_options.hpp +107 -0
- casadi/include/casadi/core/im.hpp +52 -0
- casadi/include/casadi/core/im_fwd.hpp +35 -0
- casadi/include/casadi/core/importer.hpp +221 -0
- casadi/include/casadi/core/integration_tools.hpp +292 -0
- casadi/include/casadi/core/integrator.hpp +290 -0
- casadi/include/casadi/core/interpolant.hpp +163 -0
- casadi/include/casadi/core/linsol.hpp +171 -0
- casadi/include/casadi/core/matrix_decl.hpp +1423 -0
- casadi/include/casadi/core/matrix_fwd.hpp +37 -0
- casadi/include/casadi/core/mx.hpp +1014 -0
- casadi/include/casadi/core/nlp_builder.hpp +163 -0
- casadi/include/casadi/core/nlp_tools.hpp +124 -0
- casadi/include/casadi/core/nlpsol.hpp +234 -0
- casadi/include/casadi/core/nonzeros.hpp +111 -0
- casadi/include/casadi/core/options.hpp +122 -0
- casadi/include/casadi/core/optistack.hpp +704 -0
- casadi/include/casadi/core/polynomial.hpp +126 -0
- casadi/include/casadi/core/printable.hpp +81 -0
- casadi/include/casadi/core/resource.hpp +107 -0
- casadi/include/casadi/core/rootfinder.hpp +176 -0
- casadi/include/casadi/core/runtime/casadi_axpy.hpp +8 -0
- casadi/include/casadi/core/runtime/casadi_bfgs.hpp +49 -0
- casadi/include/casadi/core/runtime/casadi_bilin.hpp +42 -0
- casadi/include/casadi/core/runtime/casadi_blazing_1d_boor_eval.hpp +112 -0
- casadi/include/casadi/core/runtime/casadi_blazing_2d_boor_eval.hpp +311 -0
- casadi/include/casadi/core/runtime/casadi_blazing_3d_boor_eval.hpp +645 -0
- casadi/include/casadi/core/runtime/casadi_blazing_de_boor.hpp +101 -0
- casadi/include/casadi/core/runtime/casadi_bound_consistency.hpp +51 -0
- casadi/include/casadi/core/runtime/casadi_cache.hpp +59 -0
- casadi/include/casadi/core/runtime/casadi_clear.hpp +27 -0
- casadi/include/casadi/core/runtime/casadi_clip_max.hpp +33 -0
- casadi/include/casadi/core/runtime/casadi_clip_min.hpp +33 -0
- casadi/include/casadi/core/runtime/casadi_convexify.hpp +182 -0
- casadi/include/casadi/core/runtime/casadi_copy.hpp +31 -0
- casadi/include/casadi/core/runtime/casadi_cvx.hpp +463 -0
- casadi/include/casadi/core/runtime/casadi_de_boor.hpp +36 -0
- casadi/include/casadi/core/runtime/casadi_dense_lsqr.hpp +247 -0
- casadi/include/casadi/core/runtime/casadi_densify.hpp +48 -0
- casadi/include/casadi/core/runtime/casadi_dot.hpp +27 -0
- casadi/include/casadi/core/runtime/casadi_feasiblesqpmethod.hpp +208 -0
- casadi/include/casadi/core/runtime/casadi_file_slurp.hpp +32 -0
- casadi/include/casadi/core/runtime/casadi_fill.hpp +27 -0
- casadi/include/casadi/core/runtime/casadi_finite_diff.hpp +345 -0
- casadi/include/casadi/core/runtime/casadi_flip.hpp +33 -0
- casadi/include/casadi/core/runtime/casadi_getu.hpp +35 -0
- casadi/include/casadi/core/runtime/casadi_iamax.hpp +36 -0
- casadi/include/casadi/core/runtime/casadi_interpn.hpp +39 -0
- casadi/include/casadi/core/runtime/casadi_interpn_grad.hpp +72 -0
- casadi/include/casadi/core/runtime/casadi_interpn_interpolate.hpp +43 -0
- casadi/include/casadi/core/runtime/casadi_interpn_weights.hpp +39 -0
- casadi/include/casadi/core/runtime/casadi_ipqp.hpp +868 -0
- casadi/include/casadi/core/runtime/casadi_jac.hpp +186 -0
- casadi/include/casadi/core/runtime/casadi_kkt.hpp +67 -0
- casadi/include/casadi/core/runtime/casadi_kron.hpp +50 -0
- casadi/include/casadi/core/runtime/casadi_ldl.hpp +109 -0
- casadi/include/casadi/core/runtime/casadi_logsumexp.hpp +41 -0
- casadi/include/casadi/core/runtime/casadi_low.hpp +65 -0
- casadi/include/casadi/core/runtime/casadi_lsqr.hpp +247 -0
- casadi/include/casadi/core/runtime/casadi_masked_norm_inf.hpp +33 -0
- casadi/include/casadi/core/runtime/casadi_max_viol.hpp +37 -0
- casadi/include/casadi/core/runtime/casadi_mmax.hpp +28 -0
- casadi/include/casadi/core/runtime/casadi_mmin.hpp +29 -0
- casadi/include/casadi/core/runtime/casadi_mtimes.hpp +75 -0
- casadi/include/casadi/core/runtime/casadi_mv.hpp +46 -0
- casadi/include/casadi/core/runtime/casadi_mv_dense.hpp +39 -0
- casadi/include/casadi/core/runtime/casadi_nd_boor_dual_eval.hpp +127 -0
- casadi/include/casadi/core/runtime/casadi_nd_boor_eval.hpp +120 -0
- casadi/include/casadi/core/runtime/casadi_newton.hpp +66 -0
- casadi/include/casadi/core/runtime/casadi_nlp.hpp +295 -0
- casadi/include/casadi/core/runtime/casadi_norm_1.hpp +29 -0
- casadi/include/casadi/core/runtime/casadi_norm_2.hpp +24 -0
- casadi/include/casadi/core/runtime/casadi_norm_inf.hpp +28 -0
- casadi/include/casadi/core/runtime/casadi_norm_inf_mul.hpp +105 -0
- casadi/include/casadi/core/runtime/casadi_ocp_block.hpp +55 -0
- casadi/include/casadi/core/runtime/casadi_oracle.hpp +44 -0
- casadi/include/casadi/core/runtime/casadi_oracle_callback.hpp +39 -0
- casadi/include/casadi/core/runtime/casadi_polyval.hpp +29 -0
- casadi/include/casadi/core/runtime/casadi_print_canonical.hpp +55 -0
- casadi/include/casadi/core/runtime/casadi_print_scalar.hpp +25 -0
- casadi/include/casadi/core/runtime/casadi_print_vector.hpp +32 -0
- casadi/include/casadi/core/runtime/casadi_printme.hpp +26 -0
- casadi/include/casadi/core/runtime/casadi_project.hpp +39 -0
- casadi/include/casadi/core/runtime/casadi_qp.hpp +86 -0
- casadi/include/casadi/core/runtime/casadi_qr.hpp +272 -0
- casadi/include/casadi/core/runtime/casadi_qrqp.hpp +1239 -0
- casadi/include/casadi/core/runtime/casadi_rank1.hpp +40 -0
- casadi/include/casadi/core/runtime/casadi_regularize.hpp +73 -0
- casadi/include/casadi/core/runtime/casadi_runtime.hpp +318 -0
- casadi/include/casadi/core/runtime/casadi_scal.hpp +26 -0
- casadi/include/casadi/core/runtime/casadi_scaled_copy.hpp +31 -0
- casadi/include/casadi/core/runtime/casadi_sparsify.hpp +42 -0
- casadi/include/casadi/core/runtime/casadi_sparsity.hpp +24 -0
- casadi/include/casadi/core/runtime/casadi_sqpmethod.hpp +178 -0
- casadi/include/casadi/core/runtime/casadi_sum.hpp +31 -0
- casadi/include/casadi/core/runtime/casadi_sum_viol.hpp +37 -0
- casadi/include/casadi/core/runtime/casadi_swap.hpp +32 -0
- casadi/include/casadi/core/runtime/casadi_trans.hpp +35 -0
- casadi/include/casadi/core/runtime/casadi_tri_project.hpp +37 -0
- casadi/include/casadi/core/runtime/casadi_trilsolve.hpp +81 -0
- casadi/include/casadi/core/runtime/casadi_triusolve.hpp +81 -0
- casadi/include/casadi/core/runtime/casadi_vector_fmax.hpp +28 -0
- casadi/include/casadi/core/runtime/casadi_vector_fmin.hpp +28 -0
- casadi/include/casadi/core/runtime/casadi_vfmax.hpp +28 -0
- casadi/include/casadi/core/runtime/casadi_vfmin.hpp +28 -0
- casadi/include/casadi/core/runtime/shared.hpp +261 -0
- casadi/include/casadi/core/serializer.hpp +264 -0
- casadi/include/casadi/core/serializing_stream.hpp +336 -0
- casadi/include/casadi/core/shared_object.hpp +182 -0
- casadi/include/casadi/core/slice.hpp +149 -0
- casadi/include/casadi/core/sparsity.hpp +1507 -0
- casadi/include/casadi/core/sparsity_interface.hpp +763 -0
- casadi/include/casadi/core/submatrix.hpp +156 -0
- casadi/include/casadi/core/sx.hpp +244 -0
- casadi/include/casadi/core/sx_elem.hpp +376 -0
- casadi/include/casadi/core/sx_fwd.hpp +45 -0
- casadi/include/casadi/core/timing.hpp +98 -0
- casadi/include/casadi/core/tools.hpp +67 -0
- casadi/include/casadi/core/xml_file.hpp +93 -0
- casadi/include/casadi/core/xml_node.hpp +212 -0
- casadi/include/casadi/doc.i +62244 -0
- casadi/include/casadi/doc_merged.i +38499 -0
- casadi/include/casadi/mem.h +311 -0
- casadi/include/casadi/valgrind-casadi.supp +649 -0
- casadi/include/casadi/valgrind-python.supp +3886 -0
- casadi/include/coin-or/IpAlgBuilder.hpp +417 -0
- casadi/include/coin-or/IpAlgStrategy.hpp +201 -0
- casadi/include/coin-or/IpAlgTypes.hpp +64 -0
- casadi/include/coin-or/IpAugSystemSolver.hpp +212 -0
- casadi/include/coin-or/IpBlas.hpp +426 -0
- casadi/include/coin-or/IpCachedResults.hpp +897 -0
- casadi/include/coin-or/IpCompoundMatrix.hpp +423 -0
- casadi/include/coin-or/IpCompoundSymMatrix.hpp +348 -0
- casadi/include/coin-or/IpCompoundVector.hpp +395 -0
- casadi/include/coin-or/IpConvCheck.hpp +97 -0
- casadi/include/coin-or/IpDebug.hpp +167 -0
- casadi/include/coin-or/IpDenseVector.hpp +626 -0
- casadi/include/coin-or/IpDiagMatrix.hpp +158 -0
- casadi/include/coin-or/IpEqMultCalculator.hpp +76 -0
- casadi/include/coin-or/IpException.hpp +156 -0
- casadi/include/coin-or/IpExpansionMatrix.hpp +245 -0
- casadi/include/coin-or/IpGenTMatrix.hpp +290 -0
- casadi/include/coin-or/IpHessianUpdater.hpp +73 -0
- casadi/include/coin-or/IpIdentityMatrix.hpp +167 -0
- casadi/include/coin-or/IpIpoptAlg.hpp +257 -0
- casadi/include/coin-or/IpIpoptApplication.hpp +367 -0
- casadi/include/coin-or/IpIpoptCalculatedQuantities.hpp +1009 -0
- casadi/include/coin-or/IpIpoptData.hpp +966 -0
- casadi/include/coin-or/IpIpoptNLP.hpp +328 -0
- casadi/include/coin-or/IpIterateInitializer.hpp +68 -0
- casadi/include/coin-or/IpIteratesVector.hpp +840 -0
- casadi/include/coin-or/IpIterationOutput.hpp +78 -0
- casadi/include/coin-or/IpJournalist.hpp +573 -0
- casadi/include/coin-or/IpLapack.hpp +227 -0
- casadi/include/coin-or/IpLibraryLoader.hpp +76 -0
- casadi/include/coin-or/IpLineSearch.hpp +106 -0
- casadi/include/coin-or/IpLinearSolvers.h +46 -0
- casadi/include/coin-or/IpMatrix.hpp +434 -0
- casadi/include/coin-or/IpMuUpdate.hpp +77 -0
- casadi/include/coin-or/IpNLP.hpp +306 -0
- casadi/include/coin-or/IpNLPScaling.hpp +582 -0
- casadi/include/coin-or/IpObserver.hpp +422 -0
- casadi/include/coin-or/IpOptionsList.hpp +412 -0
- casadi/include/coin-or/IpOrigIpoptNLP.hpp +603 -0
- casadi/include/coin-or/IpPDSystemSolver.hpp +137 -0
- casadi/include/coin-or/IpReferenced.hpp +262 -0
- casadi/include/coin-or/IpRegOptions.hpp +1152 -0
- casadi/include/coin-or/IpReturnCodes.h +23 -0
- casadi/include/coin-or/IpReturnCodes.hpp +18 -0
- casadi/include/coin-or/IpReturnCodes.inc +71 -0
- casadi/include/coin-or/IpReturnCodes_inc.h +45 -0
- casadi/include/coin-or/IpScaledMatrix.hpp +291 -0
- casadi/include/coin-or/IpSearchDirCalculator.hpp +72 -0
- casadi/include/coin-or/IpSmartPtr.hpp +865 -0
- casadi/include/coin-or/IpSolveStatistics.hpp +210 -0
- casadi/include/coin-or/IpSparseSymLinearSolverInterface.hpp +260 -0
- casadi/include/coin-or/IpStdAugSystemSolver.cpp +555 -0
- casadi/include/coin-or/IpStdCInterface.h +428 -0
- casadi/include/coin-or/IpSumSymMatrix.hpp +186 -0
- casadi/include/coin-or/IpSymLinearSolver.hpp +141 -0
- casadi/include/coin-or/IpSymMatrix.hpp +167 -0
- casadi/include/coin-or/IpSymScaledMatrix.hpp +255 -0
- casadi/include/coin-or/IpSymTMatrix.hpp +275 -0
- casadi/include/coin-or/IpTNLP.hpp +820 -0
- casadi/include/coin-or/IpTNLPAdapter.hpp +648 -0
- casadi/include/coin-or/IpTNLPReducer.hpp +274 -0
- casadi/include/coin-or/IpTaggedObject.hpp +128 -0
- casadi/include/coin-or/IpTimedTask.hpp +218 -0
- casadi/include/coin-or/IpTimingStatistics.hpp +323 -0
- casadi/include/coin-or/IpTripletHelper.hpp +308 -0
- casadi/include/coin-or/IpTypes.h +81 -0
- casadi/include/coin-or/IpTypes.hpp +30 -0
- casadi/include/coin-or/IpUtils.hpp +166 -0
- casadi/include/coin-or/IpVector.hpp +892 -0
- casadi/include/coin-or/IpZeroSymMatrix.hpp +155 -0
- casadi/include/coin-or/IpoptConfig.h +45 -0
- casadi/include/coin-or/SensAlgorithm.hpp +114 -0
- casadi/include/coin-or/SensApplication.hpp +188 -0
- casadi/include/coin-or/SensBacksolver.hpp +36 -0
- casadi/include/coin-or/SensMeasurement.hpp +56 -0
- casadi/include/coin-or/SensPCalculator.hpp +137 -0
- casadi/include/coin-or/SensRegOp.hpp +21 -0
- casadi/include/coin-or/SensSchurData.hpp +182 -0
- casadi/include/coin-or/SensSchurDriver.hpp +118 -0
- casadi/include/coin-or/SensSimpleBacksolver.hpp +49 -0
- casadi/include/coin-or/SensStepCalc.hpp +85 -0
- casadi/include/coin-or/SensUtils.hpp +63 -0
- casadi/include/coin-or/metis/defs.h +161 -0
- casadi/include/coin-or/metis/macros.h +143 -0
- casadi/include/coin-or/metis/metis.h +37 -0
- casadi/include/coin-or/metis/proto.h +505 -0
- casadi/include/coin-or/metis/rename.h +418 -0
- casadi/include/coin-or/metis/struct.h +251 -0
- casadi/include/coin-or/mumps/dmumps_c.h +142 -0
- casadi/include/coin-or/mumps/mumps_c_types.h +72 -0
- casadi/include/coin-or/mumps/mumps_compat.h +27 -0
- casadi/include/coin-or/mumps/mumps_int_def.h +11 -0
- casadi/include/coin-or/mumps/mumps_mpi.h +67 -0
- casadi/include/daqp/api.h +46 -0
- casadi/include/daqp/auxiliary.h +29 -0
- casadi/include/daqp/bnb.h +32 -0
- casadi/include/daqp/codegen.h +18 -0
- casadi/include/daqp/constants.h +92 -0
- casadi/include/daqp/daqp.h +22 -0
- casadi/include/daqp/daqp_prox.h +18 -0
- casadi/include/daqp/factorization.h +18 -0
- casadi/include/daqp/types.h +161 -0
- casadi/include/daqp/utils.h +44 -0
- casadi/include/eigen3/Eigen/Cholesky +45 -0
- casadi/include/eigen3/Eigen/CholmodSupport +48 -0
- casadi/include/eigen3/Eigen/Core +384 -0
- casadi/include/eigen3/Eigen/Dense +7 -0
- casadi/include/eigen3/Eigen/Eigen +2 -0
- casadi/include/eigen3/Eigen/Eigenvalues +60 -0
- casadi/include/eigen3/Eigen/Geometry +59 -0
- casadi/include/eigen3/Eigen/Householder +29 -0
- casadi/include/eigen3/Eigen/IterativeLinearSolvers +48 -0
- casadi/include/eigen3/Eigen/Jacobi +32 -0
- casadi/include/eigen3/Eigen/KLUSupport +41 -0
- casadi/include/eigen3/Eigen/LU +47 -0
- casadi/include/eigen3/Eigen/MetisSupport +35 -0
- casadi/include/eigen3/Eigen/OrderingMethods +70 -0
- casadi/include/eigen3/Eigen/PaStiXSupport +49 -0
- casadi/include/eigen3/Eigen/PardisoSupport +35 -0
- casadi/include/eigen3/Eigen/QR +50 -0
- casadi/include/eigen3/Eigen/QtAlignedMalloc +39 -0
- casadi/include/eigen3/Eigen/SPQRSupport +34 -0
- casadi/include/eigen3/Eigen/SVD +50 -0
- casadi/include/eigen3/Eigen/Sparse +34 -0
- casadi/include/eigen3/Eigen/SparseCholesky +37 -0
- casadi/include/eigen3/Eigen/SparseCore +69 -0
- casadi/include/eigen3/Eigen/SparseLU +50 -0
- casadi/include/eigen3/Eigen/SparseQR +36 -0
- casadi/include/eigen3/Eigen/StdDeque +27 -0
- casadi/include/eigen3/Eigen/StdList +26 -0
- casadi/include/eigen3/Eigen/StdVector +27 -0
- casadi/include/eigen3/Eigen/SuperLUSupport +64 -0
- casadi/include/eigen3/Eigen/UmfPackSupport +40 -0
- casadi/include/eigen3/Eigen/src/Cholesky/LDLT.h +688 -0
- casadi/include/eigen3/Eigen/src/Cholesky/LLT.h +558 -0
- casadi/include/eigen3/Eigen/src/Cholesky/LLT_LAPACKE.h +99 -0
- casadi/include/eigen3/Eigen/src/CholmodSupport/CholmodSupport.h +682 -0
- casadi/include/eigen3/Eigen/src/Core/ArithmeticSequence.h +413 -0
- casadi/include/eigen3/Eigen/src/Core/Array.h +417 -0
- casadi/include/eigen3/Eigen/src/Core/ArrayBase.h +226 -0
- casadi/include/eigen3/Eigen/src/Core/ArrayWrapper.h +209 -0
- casadi/include/eigen3/Eigen/src/Core/Assign.h +90 -0
- casadi/include/eigen3/Eigen/src/Core/AssignEvaluator.h +1010 -0
- casadi/include/eigen3/Eigen/src/Core/Assign_MKL.h +178 -0
- casadi/include/eigen3/Eigen/src/Core/BandMatrix.h +353 -0
- casadi/include/eigen3/Eigen/src/Core/Block.h +448 -0
- casadi/include/eigen3/Eigen/src/Core/BooleanRedux.h +162 -0
- casadi/include/eigen3/Eigen/src/Core/CommaInitializer.h +164 -0
- casadi/include/eigen3/Eigen/src/Core/ConditionEstimator.h +175 -0
- casadi/include/eigen3/Eigen/src/Core/CoreEvaluators.h +1741 -0
- casadi/include/eigen3/Eigen/src/Core/CoreIterators.h +132 -0
- casadi/include/eigen3/Eigen/src/Core/CwiseBinaryOp.h +183 -0
- casadi/include/eigen3/Eigen/src/Core/CwiseNullaryOp.h +1001 -0
- casadi/include/eigen3/Eigen/src/Core/CwiseTernaryOp.h +197 -0
- casadi/include/eigen3/Eigen/src/Core/CwiseUnaryOp.h +103 -0
- casadi/include/eigen3/Eigen/src/Core/CwiseUnaryView.h +132 -0
- casadi/include/eigen3/Eigen/src/Core/DenseBase.h +701 -0
- casadi/include/eigen3/Eigen/src/Core/DenseCoeffsBase.h +685 -0
- casadi/include/eigen3/Eigen/src/Core/DenseStorage.h +652 -0
- casadi/include/eigen3/Eigen/src/Core/Diagonal.h +258 -0
- casadi/include/eigen3/Eigen/src/Core/DiagonalMatrix.h +391 -0
- casadi/include/eigen3/Eigen/src/Core/DiagonalProduct.h +28 -0
- casadi/include/eigen3/Eigen/src/Core/Dot.h +318 -0
- casadi/include/eigen3/Eigen/src/Core/EigenBase.h +160 -0
- casadi/include/eigen3/Eigen/src/Core/ForceAlignedAccess.h +150 -0
- casadi/include/eigen3/Eigen/src/Core/Fuzzy.h +155 -0
- casadi/include/eigen3/Eigen/src/Core/GeneralProduct.h +465 -0
- casadi/include/eigen3/Eigen/src/Core/GenericPacketMath.h +1040 -0
- casadi/include/eigen3/Eigen/src/Core/GlobalFunctions.h +194 -0
- casadi/include/eigen3/Eigen/src/Core/IO.h +258 -0
- casadi/include/eigen3/Eigen/src/Core/IndexedView.h +237 -0
- casadi/include/eigen3/Eigen/src/Core/Inverse.h +117 -0
- casadi/include/eigen3/Eigen/src/Core/Map.h +171 -0
- casadi/include/eigen3/Eigen/src/Core/MapBase.h +310 -0
- casadi/include/eigen3/Eigen/src/Core/MathFunctions.h +2057 -0
- casadi/include/eigen3/Eigen/src/Core/MathFunctionsImpl.h +200 -0
- casadi/include/eigen3/Eigen/src/Core/Matrix.h +565 -0
- casadi/include/eigen3/Eigen/src/Core/MatrixBase.h +547 -0
- casadi/include/eigen3/Eigen/src/Core/NestByValue.h +85 -0
- casadi/include/eigen3/Eigen/src/Core/NoAlias.h +109 -0
- casadi/include/eigen3/Eigen/src/Core/NumTraits.h +335 -0
- casadi/include/eigen3/Eigen/src/Core/PartialReduxEvaluator.h +232 -0
- casadi/include/eigen3/Eigen/src/Core/PermutationMatrix.h +605 -0
- casadi/include/eigen3/Eigen/src/Core/PlainObjectBase.h +1128 -0
- casadi/include/eigen3/Eigen/src/Core/Product.h +191 -0
- casadi/include/eigen3/Eigen/src/Core/ProductEvaluators.h +1179 -0
- casadi/include/eigen3/Eigen/src/Core/Random.h +218 -0
- casadi/include/eigen3/Eigen/src/Core/Redux.h +515 -0
- casadi/include/eigen3/Eigen/src/Core/Ref.h +381 -0
- casadi/include/eigen3/Eigen/src/Core/Replicate.h +142 -0
- casadi/include/eigen3/Eigen/src/Core/Reshaped.h +454 -0
- casadi/include/eigen3/Eigen/src/Core/ReturnByValue.h +119 -0
- casadi/include/eigen3/Eigen/src/Core/Reverse.h +217 -0
- casadi/include/eigen3/Eigen/src/Core/Select.h +164 -0
- casadi/include/eigen3/Eigen/src/Core/SelfAdjointView.h +365 -0
- casadi/include/eigen3/Eigen/src/Core/SelfCwiseBinaryOp.h +47 -0
- casadi/include/eigen3/Eigen/src/Core/Solve.h +188 -0
- casadi/include/eigen3/Eigen/src/Core/SolveTriangular.h +235 -0
- casadi/include/eigen3/Eigen/src/Core/SolverBase.h +168 -0
- casadi/include/eigen3/Eigen/src/Core/StableNorm.h +251 -0
- casadi/include/eigen3/Eigen/src/Core/StlIterators.h +463 -0
- casadi/include/eigen3/Eigen/src/Core/Stride.h +116 -0
- casadi/include/eigen3/Eigen/src/Core/Swap.h +68 -0
- casadi/include/eigen3/Eigen/src/Core/Transpose.h +464 -0
- casadi/include/eigen3/Eigen/src/Core/Transpositions.h +386 -0
- casadi/include/eigen3/Eigen/src/Core/TriangularMatrix.h +1001 -0
- casadi/include/eigen3/Eigen/src/Core/VectorBlock.h +96 -0
- casadi/include/eigen3/Eigen/src/Core/VectorwiseOp.h +784 -0
- casadi/include/eigen3/Eigen/src/Core/Visitor.h +381 -0
- casadi/include/eigen3/Eigen/src/Core/arch/AVX/Complex.h +372 -0
- casadi/include/eigen3/Eigen/src/Core/arch/AVX/MathFunctions.h +228 -0
- casadi/include/eigen3/Eigen/src/Core/arch/AVX/PacketMath.h +1574 -0
- casadi/include/eigen3/Eigen/src/Core/arch/AVX/TypeCasting.h +115 -0
- casadi/include/eigen3/Eigen/src/Core/arch/AVX512/Complex.h +422 -0
- casadi/include/eigen3/Eigen/src/Core/arch/AVX512/MathFunctions.h +362 -0
- casadi/include/eigen3/Eigen/src/Core/arch/AVX512/PacketMath.h +2303 -0
- casadi/include/eigen3/Eigen/src/Core/arch/AVX512/TypeCasting.h +89 -0
- casadi/include/eigen3/Eigen/src/Core/arch/AltiVec/Complex.h +417 -0
- casadi/include/eigen3/Eigen/src/Core/arch/AltiVec/MathFunctions.h +90 -0
- casadi/include/eigen3/Eigen/src/Core/arch/AltiVec/MatrixProduct.h +2937 -0
- casadi/include/eigen3/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h +221 -0
- casadi/include/eigen3/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h +629 -0
- casadi/include/eigen3/Eigen/src/Core/arch/AltiVec/PacketMath.h +2711 -0
- casadi/include/eigen3/Eigen/src/Core/arch/CUDA/Complex.h +258 -0
- casadi/include/eigen3/Eigen/src/Core/arch/Default/BFloat16.h +700 -0
- casadi/include/eigen3/Eigen/src/Core/arch/Default/ConjHelper.h +117 -0
- casadi/include/eigen3/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +1649 -0
- casadi/include/eigen3/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +110 -0
- casadi/include/eigen3/Eigen/src/Core/arch/Default/Half.h +942 -0
- casadi/include/eigen3/Eigen/src/Core/arch/Default/Settings.h +49 -0
- casadi/include/eigen3/Eigen/src/Core/arch/Default/TypeCasting.h +120 -0
- casadi/include/eigen3/Eigen/src/Core/arch/GPU/MathFunctions.h +103 -0
- casadi/include/eigen3/Eigen/src/Core/arch/GPU/PacketMath.h +1685 -0
- casadi/include/eigen3/Eigen/src/Core/arch/GPU/TypeCasting.h +80 -0
- casadi/include/eigen3/Eigen/src/Core/arch/HIP/hcc/math_constants.h +23 -0
- casadi/include/eigen3/Eigen/src/Core/arch/MSA/Complex.h +648 -0
- casadi/include/eigen3/Eigen/src/Core/arch/MSA/MathFunctions.h +387 -0
- casadi/include/eigen3/Eigen/src/Core/arch/MSA/PacketMath.h +1233 -0
- casadi/include/eigen3/Eigen/src/Core/arch/NEON/Complex.h +584 -0
- casadi/include/eigen3/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h +183 -0
- casadi/include/eigen3/Eigen/src/Core/arch/NEON/MathFunctions.h +75 -0
- casadi/include/eigen3/Eigen/src/Core/arch/NEON/PacketMath.h +4587 -0
- casadi/include/eigen3/Eigen/src/Core/arch/NEON/TypeCasting.h +1419 -0
- casadi/include/eigen3/Eigen/src/Core/arch/SSE/Complex.h +351 -0
- casadi/include/eigen3/Eigen/src/Core/arch/SSE/MathFunctions.h +199 -0
- casadi/include/eigen3/Eigen/src/Core/arch/SSE/PacketMath.h +1505 -0
- casadi/include/eigen3/Eigen/src/Core/arch/SSE/TypeCasting.h +142 -0
- casadi/include/eigen3/Eigen/src/Core/arch/SVE/MathFunctions.h +44 -0
- casadi/include/eigen3/Eigen/src/Core/arch/SVE/PacketMath.h +752 -0
- casadi/include/eigen3/Eigen/src/Core/arch/SVE/TypeCasting.h +49 -0
- casadi/include/eigen3/Eigen/src/Core/arch/SYCL/InteropHeaders.h +232 -0
- casadi/include/eigen3/Eigen/src/Core/arch/SYCL/MathFunctions.h +301 -0
- casadi/include/eigen3/Eigen/src/Core/arch/SYCL/PacketMath.h +670 -0
- casadi/include/eigen3/Eigen/src/Core/arch/SYCL/SyclMemoryModel.h +694 -0
- casadi/include/eigen3/Eigen/src/Core/arch/SYCL/TypeCasting.h +85 -0
- casadi/include/eigen3/Eigen/src/Core/arch/ZVector/Complex.h +426 -0
- casadi/include/eigen3/Eigen/src/Core/arch/ZVector/MathFunctions.h +233 -0
- casadi/include/eigen3/Eigen/src/Core/arch/ZVector/PacketMath.h +1060 -0
- casadi/include/eigen3/Eigen/src/Core/functors/AssignmentFunctors.h +177 -0
- casadi/include/eigen3/Eigen/src/Core/functors/BinaryFunctors.h +541 -0
- casadi/include/eigen3/Eigen/src/Core/functors/NullaryFunctors.h +189 -0
- casadi/include/eigen3/Eigen/src/Core/functors/StlFunctors.h +166 -0
- casadi/include/eigen3/Eigen/src/Core/functors/TernaryFunctors.h +25 -0
- casadi/include/eigen3/Eigen/src/Core/functors/UnaryFunctors.h +1131 -0
- casadi/include/eigen3/Eigen/src/Core/products/GeneralBlockPanelKernel.h +2645 -0
- casadi/include/eigen3/Eigen/src/Core/products/GeneralMatrixMatrix.h +517 -0
- casadi/include/eigen3/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +317 -0
- casadi/include/eigen3/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +145 -0
- casadi/include/eigen3/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h +124 -0
- casadi/include/eigen3/Eigen/src/Core/products/GeneralMatrixVector.h +518 -0
- casadi/include/eigen3/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h +136 -0
- casadi/include/eigen3/Eigen/src/Core/products/Parallelizer.h +180 -0
- casadi/include/eigen3/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +544 -0
- casadi/include/eigen3/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h +295 -0
- casadi/include/eigen3/Eigen/src/Core/products/SelfadjointMatrixVector.h +262 -0
- casadi/include/eigen3/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h +118 -0
- casadi/include/eigen3/Eigen/src/Core/products/SelfadjointProduct.h +133 -0
- casadi/include/eigen3/Eigen/src/Core/products/SelfadjointRank2Update.h +94 -0
- casadi/include/eigen3/Eigen/src/Core/products/TriangularMatrixMatrix.h +472 -0
- casadi/include/eigen3/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h +317 -0
- casadi/include/eigen3/Eigen/src/Core/products/TriangularMatrixVector.h +350 -0
- casadi/include/eigen3/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h +255 -0
- casadi/include/eigen3/Eigen/src/Core/products/TriangularSolverMatrix.h +337 -0
- casadi/include/eigen3/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h +167 -0
- casadi/include/eigen3/Eigen/src/Core/products/TriangularSolverVector.h +148 -0
- casadi/include/eigen3/Eigen/src/Core/util/BlasUtil.h +583 -0
- casadi/include/eigen3/Eigen/src/Core/util/ConfigureVectorization.h +512 -0
- casadi/include/eigen3/Eigen/src/Core/util/Constants.h +563 -0
- casadi/include/eigen3/Eigen/src/Core/util/DisableStupidWarnings.h +106 -0
- casadi/include/eigen3/Eigen/src/Core/util/ForwardDeclarations.h +322 -0
- casadi/include/eigen3/Eigen/src/Core/util/IndexedViewHelper.h +186 -0
- casadi/include/eigen3/Eigen/src/Core/util/IntegralConstant.h +272 -0
- casadi/include/eigen3/Eigen/src/Core/util/MKL_support.h +137 -0
- casadi/include/eigen3/Eigen/src/Core/util/Macros.h +1464 -0
- casadi/include/eigen3/Eigen/src/Core/util/Memory.h +1163 -0
- casadi/include/eigen3/Eigen/src/Core/util/Meta.h +812 -0
- casadi/include/eigen3/Eigen/src/Core/util/NonMPL2.h +3 -0
- casadi/include/eigen3/Eigen/src/Core/util/ReenableStupidWarnings.h +31 -0
- casadi/include/eigen3/Eigen/src/Core/util/ReshapedHelper.h +51 -0
- casadi/include/eigen3/Eigen/src/Core/util/StaticAssert.h +221 -0
- casadi/include/eigen3/Eigen/src/Core/util/SymbolicIndex.h +293 -0
- casadi/include/eigen3/Eigen/src/Core/util/XprHelper.h +856 -0
- casadi/include/eigen3/Eigen/src/Eigenvalues/ComplexEigenSolver.h +346 -0
- casadi/include/eigen3/Eigen/src/Eigenvalues/ComplexSchur.h +462 -0
- casadi/include/eigen3/Eigen/src/Eigenvalues/ComplexSchur_LAPACKE.h +91 -0
- casadi/include/eigen3/Eigen/src/Eigenvalues/EigenSolver.h +622 -0
- casadi/include/eigen3/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h +418 -0
- casadi/include/eigen3/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +226 -0
- casadi/include/eigen3/Eigen/src/Eigenvalues/HessenbergDecomposition.h +374 -0
- casadi/include/eigen3/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +158 -0
- casadi/include/eigen3/Eigen/src/Eigenvalues/RealQZ.h +657 -0
- casadi/include/eigen3/Eigen/src/Eigenvalues/RealSchur.h +558 -0
- casadi/include/eigen3/Eigen/src/Eigenvalues/RealSchur_LAPACKE.h +77 -0
- casadi/include/eigen3/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +904 -0
- casadi/include/eigen3/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h +87 -0
- casadi/include/eigen3/Eigen/src/Eigenvalues/Tridiagonalization.h +561 -0
- casadi/include/eigen3/Eigen/src/Geometry/AlignedBox.h +486 -0
- casadi/include/eigen3/Eigen/src/Geometry/AngleAxis.h +247 -0
- casadi/include/eigen3/Eigen/src/Geometry/EulerAngles.h +114 -0
- casadi/include/eigen3/Eigen/src/Geometry/Homogeneous.h +501 -0
- casadi/include/eigen3/Eigen/src/Geometry/Hyperplane.h +282 -0
- casadi/include/eigen3/Eigen/src/Geometry/OrthoMethods.h +235 -0
- casadi/include/eigen3/Eigen/src/Geometry/ParametrizedLine.h +232 -0
- casadi/include/eigen3/Eigen/src/Geometry/Quaternion.h +870 -0
- casadi/include/eigen3/Eigen/src/Geometry/Rotation2D.h +199 -0
- casadi/include/eigen3/Eigen/src/Geometry/RotationBase.h +206 -0
- casadi/include/eigen3/Eigen/src/Geometry/Scaling.h +188 -0
- casadi/include/eigen3/Eigen/src/Geometry/Transform.h +1563 -0
- casadi/include/eigen3/Eigen/src/Geometry/Translation.h +202 -0
- casadi/include/eigen3/Eigen/src/Geometry/Umeyama.h +166 -0
- casadi/include/eigen3/Eigen/src/Geometry/arch/Geometry_SIMD.h +168 -0
- casadi/include/eigen3/Eigen/src/Householder/BlockHouseholder.h +110 -0
- casadi/include/eigen3/Eigen/src/Householder/Householder.h +176 -0
- casadi/include/eigen3/Eigen/src/Householder/HouseholderSequence.h +545 -0
- casadi/include/eigen3/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +226 -0
- casadi/include/eigen3/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +212 -0
- casadi/include/eigen3/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +229 -0
- casadi/include/eigen3/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +394 -0
- casadi/include/eigen3/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +453 -0
- casadi/include/eigen3/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +444 -0
- casadi/include/eigen3/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +198 -0
- casadi/include/eigen3/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +117 -0
- casadi/include/eigen3/Eigen/src/Jacobi/Jacobi.h +483 -0
- casadi/include/eigen3/Eigen/src/KLUSupport/KLUSupport.h +358 -0
- casadi/include/eigen3/Eigen/src/LU/Determinant.h +117 -0
- casadi/include/eigen3/Eigen/src/LU/FullPivLU.h +877 -0
- casadi/include/eigen3/Eigen/src/LU/InverseImpl.h +432 -0
- casadi/include/eigen3/Eigen/src/LU/PartialPivLU.h +624 -0
- casadi/include/eigen3/Eigen/src/LU/PartialPivLU_LAPACKE.h +83 -0
- casadi/include/eigen3/Eigen/src/LU/arch/InverseSize4.h +351 -0
- casadi/include/eigen3/Eigen/src/MetisSupport/MetisSupport.h +137 -0
- casadi/include/eigen3/Eigen/src/OrderingMethods/Amd.h +435 -0
- casadi/include/eigen3/Eigen/src/OrderingMethods/Eigen_Colamd.h +1863 -0
- casadi/include/eigen3/Eigen/src/OrderingMethods/Ordering.h +153 -0
- casadi/include/eigen3/Eigen/src/PaStiXSupport/PaStiXSupport.h +678 -0
- casadi/include/eigen3/Eigen/src/PardisoSupport/PardisoSupport.h +545 -0
- casadi/include/eigen3/Eigen/src/QR/ColPivHouseholderQR.h +674 -0
- casadi/include/eigen3/Eigen/src/QR/ColPivHouseholderQR_LAPACKE.h +97 -0
- casadi/include/eigen3/Eigen/src/QR/CompleteOrthogonalDecomposition.h +635 -0
- casadi/include/eigen3/Eigen/src/QR/FullPivHouseholderQR.h +713 -0
- casadi/include/eigen3/Eigen/src/QR/HouseholderQR.h +434 -0
- casadi/include/eigen3/Eigen/src/QR/HouseholderQR_LAPACKE.h +68 -0
- casadi/include/eigen3/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +335 -0
- casadi/include/eigen3/Eigen/src/SVD/BDCSVD.h +1366 -0
- casadi/include/eigen3/Eigen/src/SVD/JacobiSVD.h +812 -0
- casadi/include/eigen3/Eigen/src/SVD/JacobiSVD_LAPACKE.h +91 -0
- casadi/include/eigen3/Eigen/src/SVD/SVDBase.h +376 -0
- casadi/include/eigen3/Eigen/src/SVD/UpperBidiagonalization.h +414 -0
- casadi/include/eigen3/Eigen/src/SparseCholesky/SimplicialCholesky.h +697 -0
- casadi/include/eigen3/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +174 -0
- casadi/include/eigen3/Eigen/src/SparseCore/AmbiVector.h +378 -0
- casadi/include/eigen3/Eigen/src/SparseCore/CompressedStorage.h +274 -0
- casadi/include/eigen3/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +352 -0
- casadi/include/eigen3/Eigen/src/SparseCore/MappedSparseMatrix.h +67 -0
- casadi/include/eigen3/Eigen/src/SparseCore/SparseAssign.h +270 -0
- casadi/include/eigen3/Eigen/src/SparseCore/SparseBlock.h +571 -0
- casadi/include/eigen3/Eigen/src/SparseCore/SparseColEtree.h +206 -0
- casadi/include/eigen3/Eigen/src/SparseCore/SparseCompressedBase.h +370 -0
- casadi/include/eigen3/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +722 -0
- casadi/include/eigen3/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +150 -0
- casadi/include/eigen3/Eigen/src/SparseCore/SparseDenseProduct.h +342 -0
- casadi/include/eigen3/Eigen/src/SparseCore/SparseDiagonalProduct.h +138 -0
- casadi/include/eigen3/Eigen/src/SparseCore/SparseDot.h +98 -0
- casadi/include/eigen3/Eigen/src/SparseCore/SparseFuzzy.h +29 -0
- casadi/include/eigen3/Eigen/src/SparseCore/SparseMap.h +305 -0
- casadi/include/eigen3/Eigen/src/SparseCore/SparseMatrix.h +1518 -0
- casadi/include/eigen3/Eigen/src/SparseCore/SparseMatrixBase.h +398 -0
- casadi/include/eigen3/Eigen/src/SparseCore/SparsePermutation.h +178 -0
- casadi/include/eigen3/Eigen/src/SparseCore/SparseProduct.h +181 -0
- casadi/include/eigen3/Eigen/src/SparseCore/SparseRedux.h +49 -0
- casadi/include/eigen3/Eigen/src/SparseCore/SparseRef.h +397 -0
- casadi/include/eigen3/Eigen/src/SparseCore/SparseSelfAdjointView.h +659 -0
- casadi/include/eigen3/Eigen/src/SparseCore/SparseSolverBase.h +124 -0
- casadi/include/eigen3/Eigen/src/SparseCore/SparseSparseProductWithPruning.h +198 -0
- casadi/include/eigen3/Eigen/src/SparseCore/SparseTranspose.h +92 -0
- casadi/include/eigen3/Eigen/src/SparseCore/SparseTriangularView.h +189 -0
- casadi/include/eigen3/Eigen/src/SparseCore/SparseUtil.h +186 -0
- casadi/include/eigen3/Eigen/src/SparseCore/SparseVector.h +478 -0
- casadi/include/eigen3/Eigen/src/SparseCore/SparseView.h +254 -0
- casadi/include/eigen3/Eigen/src/SparseCore/TriangularSolver.h +315 -0
- casadi/include/eigen3/Eigen/src/SparseLU/SparseLU.h +923 -0
- casadi/include/eigen3/Eigen/src/SparseLU/SparseLUImpl.h +66 -0
- casadi/include/eigen3/Eigen/src/SparseLU/SparseLU_Memory.h +226 -0
- casadi/include/eigen3/Eigen/src/SparseLU/SparseLU_Structs.h +110 -0
- casadi/include/eigen3/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +375 -0
- casadi/include/eigen3/Eigen/src/SparseLU/SparseLU_Utils.h +80 -0
- casadi/include/eigen3/Eigen/src/SparseLU/SparseLU_column_bmod.h +181 -0
- casadi/include/eigen3/Eigen/src/SparseLU/SparseLU_column_dfs.h +179 -0
- casadi/include/eigen3/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h +107 -0
- casadi/include/eigen3/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +280 -0
- casadi/include/eigen3/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h +126 -0
- casadi/include/eigen3/Eigen/src/SparseLU/SparseLU_kernel_bmod.h +130 -0
- casadi/include/eigen3/Eigen/src/SparseLU/SparseLU_panel_bmod.h +223 -0
- casadi/include/eigen3/Eigen/src/SparseLU/SparseLU_panel_dfs.h +258 -0
- casadi/include/eigen3/Eigen/src/SparseLU/SparseLU_pivotL.h +137 -0
- casadi/include/eigen3/Eigen/src/SparseLU/SparseLU_pruneL.h +136 -0
- casadi/include/eigen3/Eigen/src/SparseLU/SparseLU_relax_snode.h +83 -0
- casadi/include/eigen3/Eigen/src/SparseQR/SparseQR.h +758 -0
- casadi/include/eigen3/Eigen/src/StlSupport/StdDeque.h +116 -0
- casadi/include/eigen3/Eigen/src/StlSupport/StdList.h +106 -0
- casadi/include/eigen3/Eigen/src/StlSupport/StdVector.h +131 -0
- casadi/include/eigen3/Eigen/src/StlSupport/details.h +84 -0
- casadi/include/eigen3/Eigen/src/SuperLUSupport/SuperLUSupport.h +1025 -0
- casadi/include/eigen3/Eigen/src/UmfPackSupport/UmfPackSupport.h +642 -0
- casadi/include/eigen3/Eigen/src/misc/Image.h +82 -0
- casadi/include/eigen3/Eigen/src/misc/Kernel.h +79 -0
- casadi/include/eigen3/Eigen/src/misc/RealSvd2x2.h +55 -0
- casadi/include/eigen3/Eigen/src/misc/blas.h +440 -0
- casadi/include/eigen3/Eigen/src/misc/lapack.h +152 -0
- casadi/include/eigen3/Eigen/src/misc/lapacke.h +16292 -0
- casadi/include/eigen3/Eigen/src/misc/lapacke_mangling.h +17 -0
- casadi/include/eigen3/Eigen/src/plugins/ArrayCwiseBinaryOps.h +358 -0
- casadi/include/eigen3/Eigen/src/plugins/ArrayCwiseUnaryOps.h +696 -0
- casadi/include/eigen3/Eigen/src/plugins/BlockMethods.h +1442 -0
- casadi/include/eigen3/Eigen/src/plugins/CommonCwiseBinaryOps.h +115 -0
- casadi/include/eigen3/Eigen/src/plugins/CommonCwiseUnaryOps.h +177 -0
- casadi/include/eigen3/Eigen/src/plugins/IndexedViewMethods.h +262 -0
- casadi/include/eigen3/Eigen/src/plugins/MatrixCwiseBinaryOps.h +152 -0
- casadi/include/eigen3/Eigen/src/plugins/MatrixCwiseUnaryOps.h +95 -0
- casadi/include/eigen3/Eigen/src/plugins/ReshapedMethods.h +149 -0
- casadi/include/eigen3/signature_of_eigen3_matrix_library +1 -0
- casadi/include/eigen3/unsupported/Eigen/AdolcForward +159 -0
- casadi/include/eigen3/unsupported/Eigen/AlignedVector3 +234 -0
- casadi/include/eigen3/unsupported/Eigen/ArpackSupport +30 -0
- casadi/include/eigen3/unsupported/Eigen/AutoDiff +46 -0
- casadi/include/eigen3/unsupported/Eigen/BVH +95 -0
- casadi/include/eigen3/unsupported/Eigen/CXX11/Tensor +137 -0
- casadi/include/eigen3/unsupported/Eigen/CXX11/TensorSymmetry +42 -0
- casadi/include/eigen3/unsupported/Eigen/CXX11/ThreadPool +74 -0
- casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/Tensor.h +554 -0
- casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorArgMax.h +329 -0
- casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h +247 -0
- casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h +1176 -0
- casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h +1559 -0
- casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h +1093 -0
- casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h +518 -0
- casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h +377 -0
- casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h +1023 -0
- casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorContractionBlocking.h +73 -0
- casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorContractionCuda.h +6 -0
- casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorContractionGpu.h +1413 -0
- casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorContractionMapper.h +575 -0
- casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorContractionSycl.h +1650 -0
- casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h +1679 -0
- casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h +456 -0
- casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h +1132 -0
- casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorConvolutionSycl.h +544 -0
- casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorCostModel.h +214 -0
- casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h +347 -0
- casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h +137 -0
- casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h +6 -0
- casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h +104 -0
- casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceGpu.h +389 -0
- casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h +1048 -0
- casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h +409 -0
- casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDimensionList.h +236 -0
- casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h +490 -0
- casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h +236 -0
- casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h +983 -0
- casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h +703 -0
- casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h +388 -0
- casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h +669 -0
- casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorFixedSize.h +379 -0
- casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h +237 -0
- casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h +191 -0
- casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h +488 -0
- casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h +302 -0
- casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorGlobalFunctions.h +33 -0
- casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorGpuHipCudaDefines.h +99 -0
- casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorGpuHipCudaUndefines.h +44 -0
- casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h +79 -0
- casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h +603 -0
- casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h +738 -0
- casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorInflation.h +247 -0
- casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorInitializer.h +82 -0
- casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h +263 -0
- casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorLayoutSwap.h +216 -0
- casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorMacros.h +98 -0
- casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h +327 -0
- casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h +311 -0
- casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h +1102 -0
- casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h +708 -0
- casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorPatch.h +291 -0
- casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorRandom.h +322 -0
- casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h +998 -0
- casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h +6 -0
- casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorReductionGpu.h +966 -0
- casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorReductionSycl.h +582 -0
- casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorRef.h +454 -0
- casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h +465 -0
- casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h +528 -0
- casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorScanSycl.h +513 -0
- casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h +471 -0
- casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorStorage.h +161 -0
- casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorStriding.h +346 -0
- casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorTrace.h +303 -0
- casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h +264 -0
- casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorUInt128.h +249 -0
- casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorVolumePatch.h +629 -0
- casadi/include/eigen3/unsupported/Eigen/CXX11/src/TensorSymmetry/DynamicSymmetry.h +293 -0
- casadi/include/eigen3/unsupported/Eigen/CXX11/src/TensorSymmetry/StaticSymmetry.h +236 -0
- casadi/include/eigen3/unsupported/Eigen/CXX11/src/TensorSymmetry/Symmetry.h +338 -0
- casadi/include/eigen3/unsupported/Eigen/CXX11/src/TensorSymmetry/util/TemplateGroupTheory.h +669 -0
- casadi/include/eigen3/unsupported/Eigen/CXX11/src/ThreadPool/Barrier.h +67 -0
- casadi/include/eigen3/unsupported/Eigen/CXX11/src/ThreadPool/EventCount.h +249 -0
- casadi/include/eigen3/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h +486 -0
- casadi/include/eigen3/unsupported/Eigen/CXX11/src/ThreadPool/RunQueue.h +236 -0
- casadi/include/eigen3/unsupported/Eigen/CXX11/src/ThreadPool/ThreadCancel.h +23 -0
- casadi/include/eigen3/unsupported/Eigen/CXX11/src/ThreadPool/ThreadEnvironment.h +40 -0
- casadi/include/eigen3/unsupported/Eigen/CXX11/src/ThreadPool/ThreadLocal.h +301 -0
- casadi/include/eigen3/unsupported/Eigen/CXX11/src/ThreadPool/ThreadPoolInterface.h +48 -0
- casadi/include/eigen3/unsupported/Eigen/CXX11/src/ThreadPool/ThreadYield.h +20 -0
- casadi/include/eigen3/unsupported/Eigen/CXX11/src/util/CXX11Meta.h +537 -0
- casadi/include/eigen3/unsupported/Eigen/CXX11/src/util/CXX11Workarounds.h +88 -0
- casadi/include/eigen3/unsupported/Eigen/CXX11/src/util/EmulateArray.h +261 -0
- casadi/include/eigen3/unsupported/Eigen/CXX11/src/util/MaxSizeVector.h +158 -0
- casadi/include/eigen3/unsupported/Eigen/EulerAngles +43 -0
- casadi/include/eigen3/unsupported/Eigen/FFT +419 -0
- casadi/include/eigen3/unsupported/Eigen/IterativeSolvers +51 -0
- casadi/include/eigen3/unsupported/Eigen/KroneckerProduct +36 -0
- casadi/include/eigen3/unsupported/Eigen/LevenbergMarquardt +49 -0
- casadi/include/eigen3/unsupported/Eigen/MPRealSupport +213 -0
- casadi/include/eigen3/unsupported/Eigen/MatrixFunctions +504 -0
- casadi/include/eigen3/unsupported/Eigen/MoreVectorization +24 -0
- casadi/include/eigen3/unsupported/Eigen/NonLinearOptimization +140 -0
- casadi/include/eigen3/unsupported/Eigen/NumericalDiff +56 -0
- casadi/include/eigen3/unsupported/Eigen/OpenGLSupport +322 -0
- casadi/include/eigen3/unsupported/Eigen/Polynomials +137 -0
- casadi/include/eigen3/unsupported/Eigen/Skyline +39 -0
- casadi/include/eigen3/unsupported/Eigen/SparseExtra +54 -0
- casadi/include/eigen3/unsupported/Eigen/SpecialFunctions +103 -0
- casadi/include/eigen3/unsupported/Eigen/Splines +35 -0
- casadi/include/eigen3/unsupported/Eigen/src/AutoDiff/AutoDiffJacobian.h +108 -0
- casadi/include/eigen3/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h +730 -0
- casadi/include/eigen3/unsupported/Eigen/src/AutoDiff/AutoDiffVector.h +220 -0
- casadi/include/eigen3/unsupported/Eigen/src/BVH/BVAlgorithms.h +293 -0
- casadi/include/eigen3/unsupported/Eigen/src/BVH/KdBVH.h +223 -0
- casadi/include/eigen3/unsupported/Eigen/src/Eigenvalues/ArpackSelfAdjointEigenSolver.h +790 -0
- casadi/include/eigen3/unsupported/Eigen/src/EulerAngles/EulerAngles.h +355 -0
- casadi/include/eigen3/unsupported/Eigen/src/EulerAngles/EulerSystem.h +305 -0
- casadi/include/eigen3/unsupported/Eigen/src/FFT/ei_fftw_impl.h +261 -0
- casadi/include/eigen3/unsupported/Eigen/src/FFT/ei_kissfft_impl.h +449 -0
- casadi/include/eigen3/unsupported/Eigen/src/IterativeSolvers/ConstrainedConjGrad.h +187 -0
- casadi/include/eigen3/unsupported/Eigen/src/IterativeSolvers/DGMRES.h +511 -0
- casadi/include/eigen3/unsupported/Eigen/src/IterativeSolvers/GMRES.h +335 -0
- casadi/include/eigen3/unsupported/Eigen/src/IterativeSolvers/IDRS.h +436 -0
- casadi/include/eigen3/unsupported/Eigen/src/IterativeSolvers/IncompleteLU.h +90 -0
- casadi/include/eigen3/unsupported/Eigen/src/IterativeSolvers/IterationController.h +154 -0
- casadi/include/eigen3/unsupported/Eigen/src/IterativeSolvers/MINRES.h +267 -0
- casadi/include/eigen3/unsupported/Eigen/src/IterativeSolvers/Scaling.h +193 -0
- casadi/include/eigen3/unsupported/Eigen/src/KroneckerProduct/KroneckerTensorProduct.h +305 -0
- casadi/include/eigen3/unsupported/Eigen/src/LevenbergMarquardt/LMcovar.h +84 -0
- casadi/include/eigen3/unsupported/Eigen/src/LevenbergMarquardt/LMonestep.h +202 -0
- casadi/include/eigen3/unsupported/Eigen/src/LevenbergMarquardt/LMpar.h +160 -0
- casadi/include/eigen3/unsupported/Eigen/src/LevenbergMarquardt/LMqrsolv.h +188 -0
- casadi/include/eigen3/unsupported/Eigen/src/LevenbergMarquardt/LevenbergMarquardt.h +396 -0
- casadi/include/eigen3/unsupported/Eigen/src/MatrixFunctions/MatrixExponential.h +441 -0
- casadi/include/eigen3/unsupported/Eigen/src/MatrixFunctions/MatrixFunction.h +569 -0
- casadi/include/eigen3/unsupported/Eigen/src/MatrixFunctions/MatrixLogarithm.h +373 -0
- casadi/include/eigen3/unsupported/Eigen/src/MatrixFunctions/MatrixPower.h +705 -0
- casadi/include/eigen3/unsupported/Eigen/src/MatrixFunctions/MatrixSquareRoot.h +368 -0
- casadi/include/eigen3/unsupported/Eigen/src/MatrixFunctions/StemFunction.h +117 -0
- casadi/include/eigen3/unsupported/Eigen/src/MoreVectorization/MathFunctions.h +95 -0
- casadi/include/eigen3/unsupported/Eigen/src/NonLinearOptimization/HybridNonLinearSolver.h +601 -0
- casadi/include/eigen3/unsupported/Eigen/src/NonLinearOptimization/LevenbergMarquardt.h +657 -0
- casadi/include/eigen3/unsupported/Eigen/src/NonLinearOptimization/chkder.h +66 -0
- casadi/include/eigen3/unsupported/Eigen/src/NonLinearOptimization/covar.h +70 -0
- casadi/include/eigen3/unsupported/Eigen/src/NonLinearOptimization/dogleg.h +107 -0
- casadi/include/eigen3/unsupported/Eigen/src/NonLinearOptimization/fdjac1.h +79 -0
- casadi/include/eigen3/unsupported/Eigen/src/NonLinearOptimization/lmpar.h +298 -0
- casadi/include/eigen3/unsupported/Eigen/src/NonLinearOptimization/qrsolv.h +91 -0
- casadi/include/eigen3/unsupported/Eigen/src/NonLinearOptimization/r1mpyq.h +30 -0
- casadi/include/eigen3/unsupported/Eigen/src/NonLinearOptimization/r1updt.h +99 -0
- casadi/include/eigen3/unsupported/Eigen/src/NonLinearOptimization/rwupdt.h +49 -0
- casadi/include/eigen3/unsupported/Eigen/src/NumericalDiff/NumericalDiff.h +130 -0
- casadi/include/eigen3/unsupported/Eigen/src/Polynomials/Companion.h +280 -0
- casadi/include/eigen3/unsupported/Eigen/src/Polynomials/PolynomialSolver.h +428 -0
- casadi/include/eigen3/unsupported/Eigen/src/Polynomials/PolynomialUtils.h +143 -0
- casadi/include/eigen3/unsupported/Eigen/src/Skyline/SkylineInplaceLU.h +352 -0
- casadi/include/eigen3/unsupported/Eigen/src/Skyline/SkylineMatrix.h +862 -0
- casadi/include/eigen3/unsupported/Eigen/src/Skyline/SkylineMatrixBase.h +212 -0
- casadi/include/eigen3/unsupported/Eigen/src/Skyline/SkylineProduct.h +295 -0
- casadi/include/eigen3/unsupported/Eigen/src/Skyline/SkylineStorage.h +259 -0
- casadi/include/eigen3/unsupported/Eigen/src/Skyline/SkylineUtil.h +89 -0
- casadi/include/eigen3/unsupported/Eigen/src/SparseExtra/BlockOfDynamicSparseMatrix.h +122 -0
- casadi/include/eigen3/unsupported/Eigen/src/SparseExtra/BlockSparseMatrix.h +1079 -0
- casadi/include/eigen3/unsupported/Eigen/src/SparseExtra/DynamicSparseMatrix.h +404 -0
- casadi/include/eigen3/unsupported/Eigen/src/SparseExtra/MarketIO.h +282 -0
- casadi/include/eigen3/unsupported/Eigen/src/SparseExtra/MatrixMarketIterator.h +247 -0
- casadi/include/eigen3/unsupported/Eigen/src/SparseExtra/RandomSetter.h +349 -0
- casadi/include/eigen3/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsArrayAPI.h +286 -0
- casadi/include/eigen3/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsBFloat16.h +68 -0
- casadi/include/eigen3/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsFunctors.h +357 -0
- casadi/include/eigen3/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsHalf.h +66 -0
- casadi/include/eigen3/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsImpl.h +1959 -0
- casadi/include/eigen3/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsPacketMath.h +118 -0
- casadi/include/eigen3/unsupported/Eigen/src/SpecialFunctions/HipVectorCompatibility.h +67 -0
- casadi/include/eigen3/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsArrayAPI.h +167 -0
- casadi/include/eigen3/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsBFloat16.h +58 -0
- casadi/include/eigen3/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsFunctors.h +330 -0
- casadi/include/eigen3/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsHalf.h +58 -0
- casadi/include/eigen3/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h +2045 -0
- casadi/include/eigen3/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsPacketMath.h +79 -0
- casadi/include/eigen3/unsupported/Eigen/src/SpecialFunctions/arch/AVX/BesselFunctions.h +46 -0
- casadi/include/eigen3/unsupported/Eigen/src/SpecialFunctions/arch/AVX/SpecialFunctions.h +16 -0
- casadi/include/eigen3/unsupported/Eigen/src/SpecialFunctions/arch/AVX512/BesselFunctions.h +46 -0
- casadi/include/eigen3/unsupported/Eigen/src/SpecialFunctions/arch/AVX512/SpecialFunctions.h +16 -0
- casadi/include/eigen3/unsupported/Eigen/src/SpecialFunctions/arch/GPU/SpecialFunctions.h +369 -0
- casadi/include/eigen3/unsupported/Eigen/src/SpecialFunctions/arch/NEON/BesselFunctions.h +54 -0
- casadi/include/eigen3/unsupported/Eigen/src/SpecialFunctions/arch/NEON/SpecialFunctions.h +34 -0
- casadi/include/eigen3/unsupported/Eigen/src/Splines/Spline.h +507 -0
- casadi/include/eigen3/unsupported/Eigen/src/Splines/SplineFitting.h +431 -0
- casadi/include/eigen3/unsupported/Eigen/src/Splines/SplineFwd.h +93 -0
- casadi/include/highs/HConfig.h +23 -0
- casadi/include/highs/Highs.h +1703 -0
- casadi/include/highs/filereaderlp/builder.hpp +25 -0
- casadi/include/highs/filereaderlp/def.hpp +19 -0
- casadi/include/highs/filereaderlp/model.hpp +68 -0
- casadi/include/highs/filereaderlp/reader.hpp +10 -0
- casadi/include/highs/interfaces/highs_c_api.h +2456 -0
- casadi/include/highs/io/Filereader.h +45 -0
- casadi/include/highs/io/FilereaderEms.h +30 -0
- casadi/include/highs/io/FilereaderLp.h +51 -0
- casadi/include/highs/io/FilereaderMps.h +27 -0
- casadi/include/highs/io/HMPSIO.h +78 -0
- casadi/include/highs/io/HMpsFF.h +238 -0
- casadi/include/highs/io/HighsIO.h +114 -0
- casadi/include/highs/io/LoadOptions.h +24 -0
- casadi/include/highs/ipm/IpxSolution.h +32 -0
- casadi/include/highs/ipm/IpxWrapper.h +70 -0
- casadi/include/highs/ipm/basiclu/basiclu.h +161 -0
- casadi/include/highs/ipm/basiclu/basiclu_factorize.h +247 -0
- casadi/include/highs/ipm/basiclu/basiclu_get_factors.h +108 -0
- casadi/include/highs/ipm/basiclu/basiclu_initialize.h +119 -0
- casadi/include/highs/ipm/basiclu/basiclu_obj_factorize.h +34 -0
- casadi/include/highs/ipm/basiclu/basiclu_obj_free.h +19 -0
- casadi/include/highs/ipm/basiclu/basiclu_obj_get_factors.h +34 -0
- casadi/include/highs/ipm/basiclu/basiclu_obj_initialize.h +46 -0
- casadi/include/highs/ipm/basiclu/basiclu_obj_solve_dense.h +29 -0
- casadi/include/highs/ipm/basiclu/basiclu_obj_solve_for_update.h +42 -0
- casadi/include/highs/ipm/basiclu/basiclu_obj_solve_sparse.h +32 -0
- casadi/include/highs/ipm/basiclu/basiclu_obj_update.h +31 -0
- casadi/include/highs/ipm/basiclu/basiclu_object.h +30 -0
- casadi/include/highs/ipm/basiclu/basiclu_solve_dense.h +75 -0
- casadi/include/highs/ipm/basiclu/basiclu_solve_for_update.h +169 -0
- casadi/include/highs/ipm/basiclu/basiclu_solve_sparse.h +112 -0
- casadi/include/highs/ipm/basiclu/basiclu_update.h +125 -0
- casadi/include/highs/ipm/basiclu/lu_def.h +39 -0
- casadi/include/highs/ipm/basiclu/lu_file.h +21 -0
- casadi/include/highs/ipm/basiclu/lu_internal.h +220 -0
- casadi/include/highs/ipm/basiclu/lu_list.h +168 -0
- casadi/include/highs/ipm/ipx/basiclu_kernel.h +20 -0
- casadi/include/highs/ipm/ipx/basiclu_wrapper.h +47 -0
- casadi/include/highs/ipm/ipx/basis.h +351 -0
- casadi/include/highs/ipm/ipx/conjugate_residuals.h +74 -0
- casadi/include/highs/ipm/ipx/control.h +164 -0
- casadi/include/highs/ipm/ipx/crossover.h +157 -0
- casadi/include/highs/ipm/ipx/diagonal_precond.h +45 -0
- casadi/include/highs/ipm/ipx/forrest_tomlin.h +102 -0
- casadi/include/highs/ipm/ipx/guess_basis.h +21 -0
- casadi/include/highs/ipm/ipx/indexed_vector.h +113 -0
- casadi/include/highs/ipm/ipx/info.h +27 -0
- casadi/include/highs/ipm/ipx/ipm.h +94 -0
- casadi/include/highs/ipm/ipx/ipx_c.h +47 -0
- casadi/include/highs/ipm/ipx/ipx_config.h +9 -0
- casadi/include/highs/ipm/ipx/ipx_info.h +111 -0
- casadi/include/highs/ipm/ipx/ipx_internal.h +88 -0
- casadi/include/highs/ipm/ipx/ipx_parameters.h +76 -0
- casadi/include/highs/ipm/ipx/ipx_status.h +57 -0
- casadi/include/highs/ipm/ipx/iterate.h +328 -0
- casadi/include/highs/ipm/ipx/kkt_solver.h +70 -0
- casadi/include/highs/ipm/ipx/kkt_solver_basis.h +66 -0
- casadi/include/highs/ipm/ipx/kkt_solver_diag.h +48 -0
- casadi/include/highs/ipm/ipx/linear_operator.h +26 -0
- casadi/include/highs/ipm/ipx/lp_solver.h +202 -0
- casadi/include/highs/ipm/ipx/lu_factorization.h +79 -0
- casadi/include/highs/ipm/ipx/lu_update.h +129 -0
- casadi/include/highs/ipm/ipx/maxvolume.h +54 -0
- casadi/include/highs/ipm/ipx/model.h +413 -0
- casadi/include/highs/ipm/ipx/multistream.h +52 -0
- casadi/include/highs/ipm/ipx/normal_matrix.h +44 -0
- casadi/include/highs/ipm/ipx/power_method.h +44 -0
- casadi/include/highs/ipm/ipx/sparse_matrix.h +195 -0
- casadi/include/highs/ipm/ipx/sparse_utils.h +58 -0
- casadi/include/highs/ipm/ipx/splitted_normal_matrix.h +63 -0
- casadi/include/highs/ipm/ipx/starting_basis.h +39 -0
- casadi/include/highs/ipm/ipx/symbolic_invert.h +29 -0
- casadi/include/highs/ipm/ipx/timer.h +24 -0
- casadi/include/highs/ipm/ipx/utils.h +39 -0
- casadi/include/highs/lp_data/HConst.h +320 -0
- casadi/include/highs/lp_data/HStruct.h +182 -0
- casadi/include/highs/lp_data/HighsAnalysis.h +23 -0
- casadi/include/highs/lp_data/HighsCallback.h +47 -0
- casadi/include/highs/lp_data/HighsCallbackStruct.h +62 -0
- casadi/include/highs/lp_data/HighsDebug.h +34 -0
- casadi/include/highs/lp_data/HighsIis.h +62 -0
- casadi/include/highs/lp_data/HighsInfo.h +329 -0
- casadi/include/highs/lp_data/HighsInfoDebug.h +27 -0
- casadi/include/highs/lp_data/HighsLp.h +100 -0
- casadi/include/highs/lp_data/HighsLpSolverObject.h +45 -0
- casadi/include/highs/lp_data/HighsLpUtils.h +298 -0
- casadi/include/highs/lp_data/HighsModelUtils.h +112 -0
- casadi/include/highs/lp_data/HighsOptions.h +1469 -0
- casadi/include/highs/lp_data/HighsRanging.h +43 -0
- casadi/include/highs/lp_data/HighsSolution.h +144 -0
- casadi/include/highs/lp_data/HighsSolutionDebug.h +87 -0
- casadi/include/highs/lp_data/HighsSolve.h +23 -0
- casadi/include/highs/lp_data/HighsStatus.h +29 -0
- casadi/include/highs/mip/HighsCliqueTable.h +318 -0
- casadi/include/highs/mip/HighsConflictPool.h +109 -0
- casadi/include/highs/mip/HighsCutGeneration.h +106 -0
- casadi/include/highs/mip/HighsCutPool.h +168 -0
- casadi/include/highs/mip/HighsDebugSol.h +132 -0
- casadi/include/highs/mip/HighsDomain.h +653 -0
- casadi/include/highs/mip/HighsDomainChange.h +48 -0
- casadi/include/highs/mip/HighsDynamicRowMatrix.h +104 -0
- casadi/include/highs/mip/HighsGFkSolve.h +438 -0
- casadi/include/highs/mip/HighsImplications.h +170 -0
- casadi/include/highs/mip/HighsLpAggregator.h +50 -0
- casadi/include/highs/mip/HighsLpRelaxation.h +357 -0
- casadi/include/highs/mip/HighsMipAnalysis.h +52 -0
- casadi/include/highs/mip/HighsMipSolver.h +112 -0
- casadi/include/highs/mip/HighsMipSolverData.h +297 -0
- casadi/include/highs/mip/HighsModkSeparator.h +60 -0
- casadi/include/highs/mip/HighsNodeQueue.h +311 -0
- casadi/include/highs/mip/HighsObjectiveFunction.h +71 -0
- casadi/include/highs/mip/HighsPathSeparator.h +39 -0
- casadi/include/highs/mip/HighsPrimalHeuristics.h +70 -0
- casadi/include/highs/mip/HighsPseudocost.h +360 -0
- casadi/include/highs/mip/HighsRedcostFixing.h +42 -0
- casadi/include/highs/mip/HighsSearch.h +241 -0
- casadi/include/highs/mip/HighsSeparation.h +41 -0
- casadi/include/highs/mip/HighsSeparator.h +52 -0
- casadi/include/highs/mip/HighsTableauSeparator.h +34 -0
- casadi/include/highs/mip/HighsTransformedLp.h +63 -0
- casadi/include/highs/mip/MipTimer.h +471 -0
- casadi/include/highs/model/HighsHessian.h +54 -0
- casadi/include/highs/model/HighsHessianUtils.h +49 -0
- casadi/include/highs/model/HighsModel.h +52 -0
- casadi/include/highs/parallel/HighsBinarySemaphore.h +113 -0
- casadi/include/highs/parallel/HighsCacheAlign.h +87 -0
- casadi/include/highs/parallel/HighsCombinable.h +121 -0
- casadi/include/highs/parallel/HighsMutex.h +129 -0
- casadi/include/highs/parallel/HighsParallel.h +133 -0
- casadi/include/highs/parallel/HighsRaceTimer.h +43 -0
- casadi/include/highs/parallel/HighsSchedulerConstants.h +24 -0
- casadi/include/highs/parallel/HighsSpinMutex.h +53 -0
- casadi/include/highs/parallel/HighsSplitDeque.h +583 -0
- casadi/include/highs/parallel/HighsTask.h +175 -0
- casadi/include/highs/parallel/HighsTaskExecutor.h +222 -0
- casadi/include/highs/pdlp/CupdlpWrapper.h +104 -0
- casadi/include/highs/pdlp/cupdlp/cupdlp_cs.h +40 -0
- casadi/include/highs/pdlp/cupdlp/cupdlp_defs.h +433 -0
- casadi/include/highs/pdlp/cupdlp/cupdlp_linalg.h +189 -0
- casadi/include/highs/pdlp/cupdlp/cupdlp_proj.h +19 -0
- casadi/include/highs/pdlp/cupdlp/cupdlp_restart.h +31 -0
- casadi/include/highs/pdlp/cupdlp/cupdlp_scaling.h +26 -0
- casadi/include/highs/pdlp/cupdlp/cupdlp_solver.h +98 -0
- casadi/include/highs/pdlp/cupdlp/cupdlp_step.h +37 -0
- casadi/include/highs/pdlp/cupdlp/cupdlp_utils.c +1807 -0
- casadi/include/highs/pdqsort/pdqsort.h +532 -0
- casadi/include/highs/presolve/HPresolve.h +405 -0
- casadi/include/highs/presolve/HPresolveAnalysis.h +51 -0
- casadi/include/highs/presolve/HighsPostsolveStack.h +940 -0
- casadi/include/highs/presolve/HighsSymmetry.h +281 -0
- casadi/include/highs/presolve/ICrash.h +124 -0
- casadi/include/highs/presolve/ICrashUtil.h +62 -0
- casadi/include/highs/presolve/ICrashX.h +23 -0
- casadi/include/highs/presolve/PresolveComponent.h +90 -0
- casadi/include/highs/qpsolver/a_asm.hpp +70 -0
- casadi/include/highs/qpsolver/a_quass.hpp +15 -0
- casadi/include/highs/qpsolver/basis.hpp +152 -0
- casadi/include/highs/qpsolver/crashsolution.hpp +13 -0
- casadi/include/highs/qpsolver/dantzigpricing.hpp +73 -0
- casadi/include/highs/qpsolver/devexpricing.hpp +101 -0
- casadi/include/highs/qpsolver/eventhandler.hpp +23 -0
- casadi/include/highs/qpsolver/factor.hpp +401 -0
- casadi/include/highs/qpsolver/feasibility_bounded.hpp +107 -0
- casadi/include/highs/qpsolver/feasibility_highs.hpp +294 -0
- casadi/include/highs/qpsolver/gradient.hpp +39 -0
- casadi/include/highs/qpsolver/instance.hpp +63 -0
- casadi/include/highs/qpsolver/matrix.hpp +335 -0
- casadi/include/highs/qpsolver/perturbation.hpp +8 -0
- casadi/include/highs/qpsolver/pricing.hpp +15 -0
- casadi/include/highs/qpsolver/qpconst.hpp +27 -0
- casadi/include/highs/qpsolver/qpvector.hpp +235 -0
- casadi/include/highs/qpsolver/quass.hpp +20 -0
- casadi/include/highs/qpsolver/ratiotest.hpp +19 -0
- casadi/include/highs/qpsolver/runtime.hpp +38 -0
- casadi/include/highs/qpsolver/scaling.hpp +8 -0
- casadi/include/highs/qpsolver/settings.hpp +69 -0
- casadi/include/highs/qpsolver/snippets.hpp +29 -0
- casadi/include/highs/qpsolver/statistics.hpp +23 -0
- casadi/include/highs/qpsolver/steepestedgepricing.hpp +166 -0
- casadi/include/highs/simplex/HApp.h +476 -0
- casadi/include/highs/simplex/HEkk.h +416 -0
- casadi/include/highs/simplex/HEkkDual.h +513 -0
- casadi/include/highs/simplex/HEkkDualRHS.h +134 -0
- casadi/include/highs/simplex/HEkkDualRow.h +201 -0
- casadi/include/highs/simplex/HEkkPrimal.h +191 -0
- casadi/include/highs/simplex/HSimplex.h +42 -0
- casadi/include/highs/simplex/HSimplexDebug.h +48 -0
- casadi/include/highs/simplex/HSimplexNla.h +158 -0
- casadi/include/highs/simplex/HSimplexReport.h +21 -0
- casadi/include/highs/simplex/HighsSimplexAnalysis.h +500 -0
- casadi/include/highs/simplex/SimplexConst.h +273 -0
- casadi/include/highs/simplex/SimplexStruct.h +261 -0
- casadi/include/highs/simplex/SimplexTimer.h +409 -0
- casadi/include/highs/test/DevKkt.h +143 -0
- casadi/include/highs/test/KktCh2.h +79 -0
- casadi/include/highs/util/FactorTimer.h +199 -0
- casadi/include/highs/util/HFactor.h +587 -0
- casadi/include/highs/util/HFactorConst.h +81 -0
- casadi/include/highs/util/HFactorDebug.h +55 -0
- casadi/include/highs/util/HSet.h +89 -0
- casadi/include/highs/util/HVector.h +22 -0
- casadi/include/highs/util/HVectorBase.h +102 -0
- casadi/include/highs/util/HighsCDouble.h +319 -0
- casadi/include/highs/util/HighsComponent.h +53 -0
- casadi/include/highs/util/HighsDataStack.h +83 -0
- casadi/include/highs/util/HighsDisjointSets.h +107 -0
- casadi/include/highs/util/HighsHash.h +1274 -0
- casadi/include/highs/util/HighsHashTree.h +1447 -0
- casadi/include/highs/util/HighsInt.h +36 -0
- casadi/include/highs/util/HighsIntegers.h +212 -0
- casadi/include/highs/util/HighsLinearSumBounds.h +157 -0
- casadi/include/highs/util/HighsMatrixPic.h +37 -0
- casadi/include/highs/util/HighsMatrixSlice.h +561 -0
- casadi/include/highs/util/HighsMatrixUtils.h +54 -0
- casadi/include/highs/util/HighsMemoryAllocation.h +63 -0
- casadi/include/highs/util/HighsRandom.h +242 -0
- casadi/include/highs/util/HighsRbTree.h +452 -0
- casadi/include/highs/util/HighsSort.h +131 -0
- casadi/include/highs/util/HighsSparseMatrix.h +150 -0
- casadi/include/highs/util/HighsSparseVectorSum.h +95 -0
- casadi/include/highs/util/HighsSplay.h +135 -0
- casadi/include/highs/util/HighsTimer.h +381 -0
- casadi/include/highs/util/HighsUtils.h +217 -0
- casadi/include/highs/util/stringutil.h +46 -0
- casadi/include/highs/zstr/strict_fstream.hpp +237 -0
- casadi/include/highs/zstr/zstr.hpp +472 -0
- casadi/include/licenses/FMI-Standard-2.0.2/LICENSE.txt +473 -0
- casadi/include/licenses/FMI-Standard-3.0/LICENSE.txt +464 -0
- casadi/include/licenses/alpaqa-external/LICENSE +165 -0
- casadi/include/licenses/bonmin-external/Bonmin/LICENSE +87 -0
- casadi/include/licenses/bonmin-external/LICENSE +3 -0
- casadi/include/licenses/casadi/LICENSE/LICENSE.txt +165 -0
- casadi/include/licenses/casadi-sundials/LICENSE +64 -0
- casadi/include/licenses/casadi-sundials/cvodes/LICENSE +60 -0
- casadi/include/licenses/casadi-sundials/idas/LICENSE +59 -0
- casadi/include/licenses/casadi-sundials/kinsol/LICENSE +59 -0
- casadi/include/licenses/casadi-sundials/sundials/LICENSE +67 -0
- casadi/include/licenses/cbc-external/Cbc/LICENSE +239 -0
- casadi/include/licenses/cbc-external/LICENSE +245 -0
- casadi/include/licenses/cgl-external/Cgl/LICENSE +239 -0
- casadi/include/licenses/cgl-external/LICENSE +245 -0
- casadi/include/licenses/clp-external/Clp/LICENSE +239 -0
- casadi/include/licenses/clp-external/LICENSE +245 -0
- casadi/include/licenses/coinutils-external/CoinUtils/LICENSE +239 -0
- casadi/include/licenses/coinutils-external/LICENSE +245 -0
- casadi/include/licenses/daqp-external/LICENSE +21 -0
- casadi/include/licenses/ghc-external/LICENSE +19 -0
- casadi/include/licenses/highs-external/LICENSE.txt +21 -0
- casadi/include/licenses/highs-external/extern/filereaderlp/LICENSE +19 -0
- casadi/include/licenses/ipopt-external/LICENSE +260 -0
- casadi/include/licenses/libz-external/LICENSE +22 -0
- casadi/include/licenses/libz-external/contrib/dotzlib/LICENSE_1_0.txt +23 -0
- casadi/include/licenses/libzip-external/LICENSE +31 -0
- casadi/include/licenses/metis-external/LICENSE +87 -0
- casadi/include/licenses/metis-external/metis-4.0/LICENSE +18 -0
- casadi/include/licenses/mockups-external/LICENSE +21 -0
- casadi/include/licenses/mumps-external/LICENSE +87 -0
- casadi/include/licenses/mumps-external/MUMPS/LICENSE +50 -0
- casadi/include/licenses/openblas-external/LICENSE +29 -0
- casadi/include/licenses/openblas-external/ctest/LICENSE +23 -0
- casadi/include/licenses/openblas-external/lapack-netlib/LAPACKE/LICENSE +26 -0
- casadi/include/licenses/openblas-external/lapack-netlib/LICENSE +48 -0
- casadi/include/licenses/openblas-external/reference/LICENSE +23 -0
- casadi/include/licenses/openblas-external/relapack/LICENSE +22 -0
- casadi/include/licenses/openblas-external/test/LICENSE +23 -0
- casadi/include/licenses/osi-external/LICENSE +245 -0
- casadi/include/licenses/osi-external/Osi/LICENSE +239 -0
- casadi/include/licenses/osqp-external/LICENSE +201 -0
- casadi/include/licenses/osqp-external/lin_sys/direct/qdldl/amd/LICENSE +36 -0
- casadi/include/licenses/osqp-external/lin_sys/direct/qdldl/qdldl_sources/LICENSE +201 -0
- casadi/include/licenses/proxqp-external/LICENSE +25 -0
- casadi/include/licenses/proxqp-external/bindings/python/external/pybind11/LICENSE +29 -0
- casadi/include/licenses/proxqp-external/cmake-module/LICENSE +4 -0
- casadi/include/licenses/proxqp-external/cmake-module/doxygen/MathJax/LICENSE +202 -0
- casadi/include/licenses/proxqp-external/external/cereal/LICENSE +24 -0
- casadi/include/licenses/proxqp-external/external/cereal/include/cereal/external/LICENSE +21 -0
- casadi/include/licenses/proxqp-external/external/cereal/include/cereal/external/rapidjson/LICENSE +13 -0
- casadi/include/licenses/proxqp-external/external/cereal/include/cereal/external/rapidjson/msinttypes/LICENSE +29 -0
- casadi/include/licenses/qpOASES/LICENSE.txt +503 -0
- casadi/include/licenses/sleqp-external/LICENSE +165 -0
- casadi/include/licenses/superscs-external/LICENSE.txt +24 -0
- casadi/include/licenses/tinyxml2-9.0.0/LICENSE.txt +18 -0
- casadi/include/licenses/trlib-external/LICENSE +21 -0
- casadi/include/osqp/auxil.h +181 -0
- casadi/include/osqp/constants.h +128 -0
- casadi/include/osqp/cs.h +180 -0
- casadi/include/osqp/ctrlc.h +56 -0
- casadi/include/osqp/error.h +38 -0
- casadi/include/osqp/glob_opts.h +167 -0
- casadi/include/osqp/lin_alg.h +216 -0
- casadi/include/osqp/lin_sys.h +54 -0
- casadi/include/osqp/osqp.h +430 -0
- casadi/include/osqp/osqp_configure.h +49 -0
- casadi/include/osqp/polish.h +25 -0
- casadi/include/osqp/proj.h +37 -0
- casadi/include/osqp/scaling.h +44 -0
- casadi/include/osqp/types.h +326 -0
- casadi/include/osqp/util.h +222 -0
- casadi/include/osqp/version.h +9 -0
- casadi/include/proxsuite/config.hpp +68 -0
- casadi/include/proxsuite/deprecated.hpp +56 -0
- casadi/include/proxsuite/fwd.hpp +52 -0
- casadi/include/proxsuite/helpers/common.hpp +70 -0
- casadi/include/proxsuite/helpers/instruction-set.hpp +275 -0
- casadi/include/proxsuite/helpers/optional.hpp +46 -0
- casadi/include/proxsuite/helpers/tl-optional.hpp +2472 -0
- casadi/include/proxsuite/helpers/version.hpp +39 -0
- casadi/include/proxsuite/linalg/dense/core.hpp +863 -0
- casadi/include/proxsuite/linalg/dense/factorize.hpp +375 -0
- casadi/include/proxsuite/linalg/dense/ldlt.hpp +817 -0
- casadi/include/proxsuite/linalg/dense/modify.hpp +333 -0
- casadi/include/proxsuite/linalg/dense/solve.hpp +38 -0
- casadi/include/proxsuite/linalg/dense/update.hpp +330 -0
- casadi/include/proxsuite/linalg/sparse/core.hpp +531 -0
- casadi/include/proxsuite/linalg/sparse/factorize.hpp +1303 -0
- casadi/include/proxsuite/linalg/sparse/rowmod.hpp +443 -0
- casadi/include/proxsuite/linalg/sparse/update.hpp +348 -0
- casadi/include/proxsuite/linalg/veg/internal/assert_impl.hpp +20 -0
- casadi/include/proxsuite/linalg/veg/internal/collection_algo.hpp +93 -0
- casadi/include/proxsuite/linalg/veg/internal/dbg.hpp +15 -0
- casadi/include/proxsuite/linalg/veg/internal/delete_special_members.hpp +77 -0
- casadi/include/proxsuite/linalg/veg/internal/dyn_index.hpp +292 -0
- casadi/include/proxsuite/linalg/veg/internal/epilogue.hpp +31 -0
- casadi/include/proxsuite/linalg/veg/internal/external/hedley.ext.hpp +2074 -0
- casadi/include/proxsuite/linalg/veg/internal/external/unhedley.ext.hpp +148 -0
- casadi/include/proxsuite/linalg/veg/internal/fix_index.hpp +339 -0
- casadi/include/proxsuite/linalg/veg/internal/has_asan.hpp +17 -0
- casadi/include/proxsuite/linalg/veg/internal/integer_seq.hpp +248 -0
- casadi/include/proxsuite/linalg/veg/internal/macros.hpp +1312 -0
- casadi/include/proxsuite/linalg/veg/internal/narrow.hpp +46 -0
- casadi/include/proxsuite/linalg/veg/internal/preprocessor.hpp +434 -0
- casadi/include/proxsuite/linalg/veg/internal/prologue.hpp +157 -0
- casadi/include/proxsuite/linalg/veg/internal/std.hpp +13 -0
- casadi/include/proxsuite/linalg/veg/internal/terminate.hpp +22 -0
- casadi/include/proxsuite/linalg/veg/internal/typedefs.hpp +58 -0
- casadi/include/proxsuite/linalg/veg/memory/address.hpp +97 -0
- casadi/include/proxsuite/linalg/veg/memory/alloc.hpp +352 -0
- casadi/include/proxsuite/linalg/veg/memory/dynamic_stack.hpp +504 -0
- casadi/include/proxsuite/linalg/veg/memory/placement.hpp +202 -0
- casadi/include/proxsuite/linalg/veg/memory/stack_alloc.hpp +239 -0
- casadi/include/proxsuite/linalg/veg/ref.hpp +148 -0
- casadi/include/proxsuite/linalg/veg/slice.hpp +240 -0
- casadi/include/proxsuite/linalg/veg/tuple.hpp +876 -0
- casadi/include/proxsuite/linalg/veg/type_traits/alloc.hpp +169 -0
- casadi/include/proxsuite/linalg/veg/type_traits/assignable.hpp +53 -0
- casadi/include/proxsuite/linalg/veg/type_traits/constructible.hpp +217 -0
- casadi/include/proxsuite/linalg/veg/type_traits/core.hpp +298 -0
- casadi/include/proxsuite/linalg/veg/type_traits/invocable.hpp +47 -0
- casadi/include/proxsuite/linalg/veg/type_traits/primitives.hpp +43 -0
- casadi/include/proxsuite/linalg/veg/type_traits/tags.hpp +47 -0
- casadi/include/proxsuite/linalg/veg/util/assert.hpp +48 -0
- casadi/include/proxsuite/linalg/veg/util/dbg.hpp +6 -0
- casadi/include/proxsuite/linalg/veg/util/defer.hpp +57 -0
- casadi/include/proxsuite/linalg/veg/util/dynstack_alloc.hpp +19 -0
- casadi/include/proxsuite/linalg/veg/util/get.hpp +153 -0
- casadi/include/proxsuite/linalg/veg/util/index.hpp +6 -0
- casadi/include/proxsuite/linalg/veg/util/unreachable.hpp +41 -0
- casadi/include/proxsuite/linalg/veg/vec.hpp +1034 -0
- casadi/include/proxsuite/proxqp/dense/dense.hpp +10 -0
- casadi/include/proxsuite/proxqp/dense/fwd.hpp +55 -0
- casadi/include/proxsuite/proxqp/dense/helpers.hpp +520 -0
- casadi/include/proxsuite/proxqp/dense/linesearch.hpp +517 -0
- casadi/include/proxsuite/proxqp/dense/model.hpp +147 -0
- casadi/include/proxsuite/proxqp/dense/preconditioner/identity.hpp +113 -0
- casadi/include/proxsuite/proxqp/dense/preconditioner/ruiz.hpp +571 -0
- casadi/include/proxsuite/proxqp/dense/solver.hpp +1330 -0
- casadi/include/proxsuite/proxqp/dense/utils.hpp +415 -0
- casadi/include/proxsuite/proxqp/dense/views.hpp +1466 -0
- casadi/include/proxsuite/proxqp/dense/workspace.hpp +264 -0
- casadi/include/proxsuite/proxqp/dense/wrapper.hpp +491 -0
- casadi/include/proxsuite/proxqp/results.hpp +212 -0
- casadi/include/proxsuite/proxqp/settings.hpp +302 -0
- casadi/include/proxsuite/proxqp/sparse/fwd.hpp +58 -0
- casadi/include/proxsuite/proxqp/sparse/helpers.hpp +309 -0
- casadi/include/proxsuite/proxqp/sparse/model.hpp +228 -0
- casadi/include/proxsuite/proxqp/sparse/preconditioner/identity.hpp +64 -0
- casadi/include/proxsuite/proxqp/sparse/preconditioner/ruiz.hpp +569 -0
- casadi/include/proxsuite/proxqp/sparse/solver.hpp +1441 -0
- casadi/include/proxsuite/proxqp/sparse/sparse.hpp +10 -0
- casadi/include/proxsuite/proxqp/sparse/utils.hpp +815 -0
- casadi/include/proxsuite/proxqp/sparse/views.hpp +63 -0
- casadi/include/proxsuite/proxqp/sparse/workspace.hpp +790 -0
- casadi/include/proxsuite/proxqp/sparse/wrapper.hpp +772 -0
- casadi/include/proxsuite/proxqp/status.hpp +46 -0
- casadi/include/proxsuite/proxqp/timings.hpp +101 -0
- casadi/include/proxsuite/proxqp/utils/prints.hpp +47 -0
- casadi/include/proxsuite/proxqp/utils/random_qp_problems.hpp +669 -0
- casadi/include/proxsuite/serialization/archive.hpp +231 -0
- casadi/include/proxsuite/serialization/eigen.hpp +107 -0
- casadi/include/proxsuite/serialization/model.hpp +34 -0
- casadi/include/proxsuite/serialization/results.hpp +74 -0
- casadi/include/proxsuite/serialization/settings.hpp +60 -0
- casadi/include/proxsuite/serialization/wrapper.hpp +24 -0
- casadi/include/proxsuite/warning.hpp +35 -0
- casadi/include/simde/arm/neon/aba.h +208 -0
- casadi/include/simde/arm/neon/abd.h +384 -0
- casadi/include/simde/arm/neon/abdl.h +147 -0
- casadi/include/simde/arm/neon/abs.h +408 -0
- casadi/include/simde/arm/neon/add.h +681 -0
- casadi/include/simde/arm/neon/addl.h +127 -0
- casadi/include/simde/arm/neon/addl_high.h +127 -0
- casadi/include/simde/arm/neon/addlv.h +317 -0
- casadi/include/simde/arm/neon/addv.h +447 -0
- casadi/include/simde/arm/neon/addw.h +222 -0
- casadi/include/simde/arm/neon/addw_high.h +193 -0
- casadi/include/simde/arm/neon/and.h +552 -0
- casadi/include/simde/arm/neon/bic.h +472 -0
- casadi/include/simde/arm/neon/bsl.h +448 -0
- casadi/include/simde/arm/neon/cagt.h +168 -0
- casadi/include/simde/arm/neon/ceq.h +711 -0
- casadi/include/simde/arm/neon/ceqz.h +335 -0
- casadi/include/simde/arm/neon/cge.h +677 -0
- casadi/include/simde/arm/neon/cgez.h +378 -0
- casadi/include/simde/arm/neon/cgt.h +686 -0
- casadi/include/simde/arm/neon/cgtz.h +380 -0
- casadi/include/simde/arm/neon/cle.h +677 -0
- casadi/include/simde/arm/neon/clez.h +378 -0
- casadi/include/simde/arm/neon/cls.h +148 -0
- casadi/include/simde/arm/neon/clt.h +679 -0
- casadi/include/simde/arm/neon/cltz.h +263 -0
- casadi/include/simde/arm/neon/clz.h +423 -0
- casadi/include/simde/arm/neon/cnt.h +145 -0
- casadi/include/simde/arm/neon/combine.h +343 -0
- casadi/include/simde/arm/neon/create.h +186 -0
- casadi/include/simde/arm/neon/cvt.h +492 -0
- casadi/include/simde/arm/neon/dot.h +171 -0
- casadi/include/simde/arm/neon/dot_lane.h +196 -0
- casadi/include/simde/arm/neon/dup_lane.h +702 -0
- casadi/include/simde/arm/neon/dup_n.h +534 -0
- casadi/include/simde/arm/neon/eor.h +552 -0
- casadi/include/simde/arm/neon/ext.h +887 -0
- casadi/include/simde/arm/neon/get_high.h +260 -0
- casadi/include/simde/arm/neon/get_lane.h +499 -0
- casadi/include/simde/arm/neon/get_low.h +276 -0
- casadi/include/simde/arm/neon/hadd.h +287 -0
- casadi/include/simde/arm/neon/hsub.h +287 -0
- casadi/include/simde/arm/neon/ld1.h +399 -0
- casadi/include/simde/arm/neon/ld3.h +609 -0
- casadi/include/simde/arm/neon/ld4.h +448 -0
- casadi/include/simde/arm/neon/max.h +614 -0
- casadi/include/simde/arm/neon/maxnm.h +215 -0
- casadi/include/simde/arm/neon/maxv.h +400 -0
- casadi/include/simde/arm/neon/min.h +660 -0
- casadi/include/simde/arm/neon/minnm.h +215 -0
- casadi/include/simde/arm/neon/minv.h +424 -0
- casadi/include/simde/arm/neon/mla.h +530 -0
- casadi/include/simde/arm/neon/mla_n.h +333 -0
- casadi/include/simde/arm/neon/mlal.h +156 -0
- casadi/include/simde/arm/neon/mlal_high.h +156 -0
- casadi/include/simde/arm/neon/mlal_n.h +128 -0
- casadi/include/simde/arm/neon/mls.h +264 -0
- casadi/include/simde/arm/neon/mlsl.h +124 -0
- casadi/include/simde/arm/neon/mlsl_high.h +124 -0
- casadi/include/simde/arm/neon/mlsl_n.h +96 -0
- casadi/include/simde/arm/neon/movl.h +208 -0
- casadi/include/simde/arm/neon/movl_high.h +126 -0
- casadi/include/simde/arm/neon/movn.h +195 -0
- casadi/include/simde/arm/neon/movn_high.h +125 -0
- casadi/include/simde/arm/neon/mul.h +594 -0
- casadi/include/simde/arm/neon/mul_lane.h +472 -0
- casadi/include/simde/arm/neon/mul_n.h +383 -0
- casadi/include/simde/arm/neon/mull.h +236 -0
- casadi/include/simde/arm/neon/mull_high.h +125 -0
- casadi/include/simde/arm/neon/mull_n.h +158 -0
- casadi/include/simde/arm/neon/mvn.h +426 -0
- casadi/include/simde/arm/neon/neg.h +393 -0
- casadi/include/simde/arm/neon/orn.h +505 -0
- casadi/include/simde/arm/neon/orr.h +552 -0
- casadi/include/simde/arm/neon/padal.h +211 -0
- casadi/include/simde/arm/neon/padd.h +293 -0
- casadi/include/simde/arm/neon/paddl.h +239 -0
- casadi/include/simde/arm/neon/pmax.h +253 -0
- casadi/include/simde/arm/neon/pmin.h +260 -0
- casadi/include/simde/arm/neon/qabs.h +281 -0
- casadi/include/simde/arm/neon/qadd.h +553 -0
- casadi/include/simde/arm/neon/qdmulh.h +125 -0
- casadi/include/simde/arm/neon/qdmull.h +125 -0
- casadi/include/simde/arm/neon/qmovn.h +273 -0
- casadi/include/simde/arm/neon/qmovn_high.h +127 -0
- casadi/include/simde/arm/neon/qmovun.h +159 -0
- casadi/include/simde/arm/neon/qneg.h +301 -0
- casadi/include/simde/arm/neon/qrdmulh.h +165 -0
- casadi/include/simde/arm/neon/qrdmulh_n.h +136 -0
- casadi/include/simde/arm/neon/qshl.h +732 -0
- casadi/include/simde/arm/neon/qsub.h +549 -0
- casadi/include/simde/arm/neon/qtbl.h +455 -0
- casadi/include/simde/arm/neon/qtbx.h +470 -0
- casadi/include/simde/arm/neon/rbit.h +165 -0
- casadi/include/simde/arm/neon/reinterpret.h +3101 -0
- casadi/include/simde/arm/neon/rev16.h +137 -0
- casadi/include/simde/arm/neon/rev32.h +235 -0
- casadi/include/simde/arm/neon/rev64.h +358 -0
- casadi/include/simde/arm/neon/rhadd.h +406 -0
- casadi/include/simde/arm/neon/rnd.h +143 -0
- casadi/include/simde/arm/neon/rndi.h +135 -0
- casadi/include/simde/arm/neon/rndm.h +143 -0
- casadi/include/simde/arm/neon/rndn.h +135 -0
- casadi/include/simde/arm/neon/rndp.h +143 -0
- casadi/include/simde/arm/neon/rshl.h +903 -0
- casadi/include/simde/arm/neon/rshr_n.h +471 -0
- casadi/include/simde/arm/neon/rsra_n.h +209 -0
- casadi/include/simde/arm/neon/set_lane.h +422 -0
- casadi/include/simde/arm/neon/shl.h +805 -0
- casadi/include/simde/arm/neon/shl_n.h +560 -0
- casadi/include/simde/arm/neon/shr_n.h +612 -0
- casadi/include/simde/arm/neon/sra_n.h +202 -0
- casadi/include/simde/arm/neon/st1.h +353 -0
- casadi/include/simde/arm/neon/st1_lane.h +363 -0
- casadi/include/simde/arm/neon/st3.h +426 -0
- casadi/include/simde/arm/neon/st4.h +445 -0
- casadi/include/simde/arm/neon/sub.h +659 -0
- casadi/include/simde/arm/neon/subl.h +127 -0
- casadi/include/simde/arm/neon/subw.h +221 -0
- casadi/include/simde/arm/neon/subw_high.h +222 -0
- casadi/include/simde/arm/neon/tbl.h +224 -0
- casadi/include/simde/arm/neon/tbx.h +247 -0
- casadi/include/simde/arm/neon/trn.h +252 -0
- casadi/include/simde/arm/neon/trn1.h +500 -0
- casadi/include/simde/arm/neon/trn2.h +499 -0
- casadi/include/simde/arm/neon/tst.h +540 -0
- casadi/include/simde/arm/neon/types.h +683 -0
- casadi/include/simde/arm/neon/uqadd.h +325 -0
- casadi/include/simde/arm/neon/uzp.h +252 -0
- casadi/include/simde/arm/neon/uzp1.h +643 -0
- casadi/include/simde/arm/neon/uzp2.h +647 -0
- casadi/include/simde/arm/neon/zip.h +252 -0
- casadi/include/simde/arm/neon/zip1.h +625 -0
- casadi/include/simde/arm/neon/zip2.h +625 -0
- casadi/include/simde/arm/neon.h +166 -0
- casadi/include/simde/check.h +276 -0
- casadi/include/simde/debug-trap.h +85 -0
- casadi/include/simde/hedley.h +1971 -0
- casadi/include/simde/simde-align.h +449 -0
- casadi/include/simde/simde-arch.h +532 -0
- casadi/include/simde/simde-common.h +890 -0
- casadi/include/simde/simde-complex.h +148 -0
- casadi/include/simde/simde-constify.h +397 -0
- casadi/include/simde/simde-detect-clang.h +109 -0
- casadi/include/simde/simde-diagnostic.h +428 -0
- casadi/include/simde/simde-features.h +522 -0
- casadi/include/simde/simde-math.h +1805 -0
- casadi/include/simde/x86/avx.h +6193 -0
- casadi/include/simde/x86/avx2.h +5660 -0
- casadi/include/simde/x86/avx512/2intersect.h +250 -0
- casadi/include/simde/x86/avx512/abs.h +562 -0
- casadi/include/simde/x86/avx512/add.h +641 -0
- casadi/include/simde/x86/avx512/adds.h +390 -0
- casadi/include/simde/x86/avx512/and.h +305 -0
- casadi/include/simde/x86/avx512/andnot.h +193 -0
- casadi/include/simde/x86/avx512/avg.h +258 -0
- casadi/include/simde/x86/avx512/blend.h +293 -0
- casadi/include/simde/x86/avx512/broadcast.h +897 -0
- casadi/include/simde/x86/avx512/cast.h +324 -0
- casadi/include/simde/x86/avx512/cmp.h +587 -0
- casadi/include/simde/x86/avx512/cmpeq.h +179 -0
- casadi/include/simde/x86/avx512/cmpge.h +104 -0
- casadi/include/simde/x86/avx512/cmpgt.h +189 -0
- casadi/include/simde/x86/avx512/cmple.h +103 -0
- casadi/include/simde/x86/avx512/cmplt.h +123 -0
- casadi/include/simde/x86/avx512/copysign.h +86 -0
- casadi/include/simde/x86/avx512/cvt.h +122 -0
- casadi/include/simde/x86/avx512/cvts.h +723 -0
- casadi/include/simde/x86/avx512/div.h +162 -0
- casadi/include/simde/x86/avx512/extract.h +198 -0
- casadi/include/simde/x86/avx512/fmadd.h +136 -0
- casadi/include/simde/x86/avx512/fmsub.h +108 -0
- casadi/include/simde/x86/avx512/fnmadd.h +108 -0
- casadi/include/simde/x86/avx512/fnmsub.h +108 -0
- casadi/include/simde/x86/avx512/insert.h +193 -0
- casadi/include/simde/x86/avx512/kshift.h +152 -0
- casadi/include/simde/x86/avx512/load.h +67 -0
- casadi/include/simde/x86/avx512/loadu.h +113 -0
- casadi/include/simde/x86/avx512/lzcnt.h +209 -0
- casadi/include/simde/x86/avx512/madd.h +155 -0
- casadi/include/simde/x86/avx512/maddubs.h +159 -0
- casadi/include/simde/x86/avx512/max.h +587 -0
- casadi/include/simde/x86/avx512/min.h +587 -0
- casadi/include/simde/x86/avx512/mov.h +859 -0
- casadi/include/simde/x86/avx512/mov_mask.h +372 -0
- casadi/include/simde/x86/avx512/movm.h +460 -0
- casadi/include/simde/x86/avx512/mul.h +279 -0
- casadi/include/simde/x86/avx512/mulhi.h +65 -0
- casadi/include/simde/x86/avx512/mulhrs.h +65 -0
- casadi/include/simde/x86/avx512/mullo.h +117 -0
- casadi/include/simde/x86/avx512/negate.h +88 -0
- casadi/include/simde/x86/avx512/or.h +252 -0
- casadi/include/simde/x86/avx512/packs.h +122 -0
- casadi/include/simde/x86/avx512/packus.h +122 -0
- casadi/include/simde/x86/avx512/permutex2var.h +1645 -0
- casadi/include/simde/x86/avx512/permutexvar.h +1180 -0
- casadi/include/simde/x86/avx512/sad.h +77 -0
- casadi/include/simde/x86/avx512/set.h +477 -0
- casadi/include/simde/x86/avx512/set1.h +331 -0
- casadi/include/simde/x86/avx512/set4.h +140 -0
- casadi/include/simde/x86/avx512/setone.h +66 -0
- casadi/include/simde/x86/avx512/setr.h +144 -0
- casadi/include/simde/x86/avx512/setr4.h +140 -0
- casadi/include/simde/x86/avx512/setzero.h +90 -0
- casadi/include/simde/x86/avx512/shuffle.h +176 -0
- casadi/include/simde/x86/avx512/sll.h +247 -0
- casadi/include/simde/x86/avx512/slli.h +179 -0
- casadi/include/simde/x86/avx512/sllv.h +68 -0
- casadi/include/simde/x86/avx512/sqrt.h +127 -0
- casadi/include/simde/x86/avx512/sra.h +81 -0
- casadi/include/simde/x86/avx512/srai.h +70 -0
- casadi/include/simde/x86/avx512/srav.h +67 -0
- casadi/include/simde/x86/avx512/srl.h +216 -0
- casadi/include/simde/x86/avx512/srli.h +180 -0
- casadi/include/simde/x86/avx512/srlv.h +282 -0
- casadi/include/simde/x86/avx512/store.h +93 -0
- casadi/include/simde/x86/avx512/storeu.h +93 -0
- casadi/include/simde/x86/avx512/sub.h +351 -0
- casadi/include/simde/x86/avx512/subs.h +222 -0
- casadi/include/simde/x86/avx512/test.h +193 -0
- casadi/include/simde/x86/avx512/types.h +380 -0
- casadi/include/simde/x86/avx512/unpackhi.h +380 -0
- casadi/include/simde/x86/avx512/unpacklo.h +104 -0
- casadi/include/simde/x86/avx512/xor.h +263 -0
- casadi/include/simde/x86/avx512/xorsign.h +72 -0
- casadi/include/simde/x86/avx512.h +108 -0
- casadi/include/simde/x86/clmul.h +414 -0
- casadi/include/simde/x86/fma.h +724 -0
- casadi/include/simde/x86/gfni.h +802 -0
- casadi/include/simde/x86/mmx.h +2399 -0
- casadi/include/simde/x86/sse.h +4471 -0
- casadi/include/simde/x86/sse2.h +7389 -0
- casadi/include/simde/x86/sse3.h +499 -0
- casadi/include/simde/x86/sse4.1.h +2216 -0
- casadi/include/simde/x86/sse4.2.h +347 -0
- casadi/include/simde/x86/ssse3.h +1032 -0
- casadi/include/simde/x86/svml.h +12139 -0
- casadi/include/simde/x86/xop.h +3644 -0
- casadi/include/superscs/cones.h +185 -0
- casadi/include/superscs/constants.h +144 -0
- casadi/include/superscs/cs.h +109 -0
- casadi/include/superscs/ctrlc.h +77 -0
- casadi/include/superscs/directions.h +125 -0
- casadi/include/superscs/glbopts.h +240 -0
- casadi/include/superscs/linAlg.h +437 -0
- casadi/include/superscs/linSys.h +205 -0
- casadi/include/superscs/linsys/amatrix.h +77 -0
- casadi/include/superscs/linsys/common.h +49 -0
- casadi/include/superscs/normalize.h +138 -0
- casadi/include/superscs/scs.h +656 -0
- casadi/include/superscs/scs_blas.h +79 -0
- casadi/include/superscs/scs_parser.h +187 -0
- casadi/include/superscs/unit_test_util.h +210 -0
- casadi/include/superscs/util.h +354 -0
- casadi/include/trlib/trlib_eigen_inverse.h +118 -0
- casadi/include/trlib/trlib_krylov.h +493 -0
- casadi/include/trlib/trlib_leftmost.h +181 -0
- casadi/include/trlib/trlib_private.h +109 -0
- casadi/include/trlib/trlib_quadratic_zero.h +57 -0
- casadi/include/trlib/trlib_tri_factor.h +409 -0
- casadi/include/trlib/trlib_types.h +36 -0
- casadi/libCbc.la +35 -0
- casadi/libCbc.so +0 -0
- casadi/libCbc.so.3 +0 -0
- casadi/libCbc.so.3.10.11 +0 -0
- casadi/libCbcSolver.la +35 -0
- casadi/libCbcSolver.so +0 -0
- casadi/libCbcSolver.so.3 +0 -0
- casadi/libCbcSolver.so.3.10.11 +0 -0
- casadi/libCgl.la +35 -0
- casadi/libCgl.so +0 -0
- casadi/libCgl.so.1 +0 -0
- casadi/libCgl.so.1.10.8 +0 -0
- casadi/libClp.la +35 -0
- casadi/libClp.so +0 -0
- casadi/libClp.so.1 +0 -0
- casadi/libClp.so.1.14.9 +0 -0
- casadi/libClpSolver.la +35 -0
- casadi/libClpSolver.so +0 -0
- casadi/libClpSolver.so.1 +0 -0
- casadi/libClpSolver.so.1.14.9 +0 -0
- casadi/libCoinUtils.la +35 -0
- casadi/libCoinUtils.so +0 -0
- casadi/libCoinUtils.so.3 +0 -0
- casadi/libCoinUtils.so.3.11.10 +0 -0
- casadi/libOsi.la +35 -0
- casadi/libOsi.so +0 -0
- casadi/libOsi.so.1 +0 -0
- casadi/libOsi.so.1.13.9 +0 -0
- casadi/libOsiCbc.la +35 -0
- casadi/libOsiCbc.so +0 -0
- casadi/libOsiCbc.so.3 +0 -0
- casadi/libOsiCbc.so.3.10.11 +0 -0
- casadi/libOsiClp.la +35 -0
- casadi/libOsiClp.so +0 -0
- casadi/libOsiClp.so.1 +0 -0
- casadi/libOsiClp.so.1.14.9 +0 -0
- casadi/libOsiCommonTests.la +35 -0
- casadi/libOsiCommonTests.so +0 -0
- casadi/libOsiCommonTests.so.1 +0 -0
- casadi/libOsiCommonTests.so.1.13.9 +0 -0
- casadi/libalpaqa-dl-loader.so +0 -0
- casadi/libalpaqa-dl-loader.so.1.0.0 +0 -0
- casadi/libalpaqa.so +0 -0
- casadi/libalpaqa.so.1.0.0 +0 -0
- casadi/libbonmin.la +35 -0
- casadi/libbonmin.so +0 -0
- casadi/libbonmin.so.4 +0 -0
- casadi/libbonmin.so.4.8.9 +0 -0
- casadi/libcasadi-tp-openblas.so +0 -0
- casadi/libcasadi-tp-openblas.so.0 +0 -0
- casadi/libcasadi-tp-openblas.so.0.3 +0 -0
- casadi/libcasadi.so +0 -0
- casadi/libcasadi.so.3.7 +0 -0
- casadi/libcasadi_archiver_libzip.so +0 -0
- casadi/libcasadi_archiver_libzip.so.3.7 +0 -0
- casadi/libcasadi_conic_cbc.so +0 -0
- casadi/libcasadi_conic_cbc.so.3.7 +0 -0
- casadi/libcasadi_conic_clp.so +0 -0
- casadi/libcasadi_conic_clp.so.3.7 +0 -0
- casadi/libcasadi_conic_daqp.so +0 -0
- casadi/libcasadi_conic_daqp.so.3.7 +0 -0
- casadi/libcasadi_conic_gurobi.so +0 -0
- casadi/libcasadi_conic_gurobi.so.3.7 +0 -0
- casadi/libcasadi_conic_highs.so +0 -0
- casadi/libcasadi_conic_highs.so.3.7 +0 -0
- casadi/libcasadi_conic_ipqp.so +0 -0
- casadi/libcasadi_conic_ipqp.so.3.7 +0 -0
- casadi/libcasadi_conic_nlpsol.so +0 -0
- casadi/libcasadi_conic_nlpsol.so.3.7 +0 -0
- casadi/libcasadi_conic_osqp.so +0 -0
- casadi/libcasadi_conic_osqp.so.3.7 +0 -0
- casadi/libcasadi_conic_proxqp.so +0 -0
- casadi/libcasadi_conic_proxqp.so.3.7 +0 -0
- casadi/libcasadi_conic_qpoases.so +0 -0
- casadi/libcasadi_conic_qpoases.so.3.7 +0 -0
- casadi/libcasadi_conic_qrqp.so +0 -0
- casadi/libcasadi_conic_qrqp.so.3.7 +0 -0
- casadi/libcasadi_conic_superscs.so +0 -0
- casadi/libcasadi_conic_superscs.so.3.7 +0 -0
- casadi/libcasadi_filesystem_ghc.so +0 -0
- casadi/libcasadi_filesystem_ghc.so.3.7 +0 -0
- casadi/libcasadi_importer_shell.so +0 -0
- casadi/libcasadi_importer_shell.so.3.7 +0 -0
- casadi/libcasadi_integrator_collocation.so +0 -0
- casadi/libcasadi_integrator_collocation.so.3.7 +0 -0
- casadi/libcasadi_integrator_cvodes.so +0 -0
- casadi/libcasadi_integrator_cvodes.so.3.7 +0 -0
- casadi/libcasadi_integrator_idas.so +0 -0
- casadi/libcasadi_integrator_idas.so.3.7 +0 -0
- casadi/libcasadi_integrator_rk.so +0 -0
- casadi/libcasadi_integrator_rk.so.3.7 +0 -0
- casadi/libcasadi_interpolant_bspline.so +0 -0
- casadi/libcasadi_interpolant_bspline.so.3.7 +0 -0
- casadi/libcasadi_interpolant_linear.so +0 -0
- casadi/libcasadi_interpolant_linear.so.3.7 +0 -0
- casadi/libcasadi_linsol_csparse.so +0 -0
- casadi/libcasadi_linsol_csparse.so.3.7 +0 -0
- casadi/libcasadi_linsol_csparsecholesky.so +0 -0
- casadi/libcasadi_linsol_csparsecholesky.so.3.7 +0 -0
- casadi/libcasadi_linsol_lapacklu.so +0 -0
- casadi/libcasadi_linsol_lapacklu.so.3.7 +0 -0
- casadi/libcasadi_linsol_lapackqr.so +0 -0
- casadi/libcasadi_linsol_lapackqr.so.3.7 +0 -0
- casadi/libcasadi_linsol_ldl.so +0 -0
- casadi/libcasadi_linsol_ldl.so.3.7 +0 -0
- casadi/libcasadi_linsol_lsqr.so +0 -0
- casadi/libcasadi_linsol_lsqr.so.3.7 +0 -0
- casadi/libcasadi_linsol_ma27.so +0 -0
- casadi/libcasadi_linsol_ma27.so.3.7 +0 -0
- casadi/libcasadi_linsol_mumps.so +0 -0
- casadi/libcasadi_linsol_mumps.so.3.7 +0 -0
- casadi/libcasadi_linsol_qr.so +0 -0
- casadi/libcasadi_linsol_qr.so.3.7 +0 -0
- casadi/libcasadi_linsol_symbolicqr.so +0 -0
- casadi/libcasadi_linsol_symbolicqr.so.3.7 +0 -0
- casadi/libcasadi_linsol_tridiag.so +0 -0
- casadi/libcasadi_linsol_tridiag.so.3.7 +0 -0
- casadi/libcasadi_nlpsol_alpaqa.so +0 -0
- casadi/libcasadi_nlpsol_alpaqa.so.3.7 +0 -0
- casadi/libcasadi_nlpsol_ampl.so +0 -0
- casadi/libcasadi_nlpsol_ampl.so.3.7 +0 -0
- casadi/libcasadi_nlpsol_blocksqp.so +0 -0
- casadi/libcasadi_nlpsol_blocksqp.so.3.7 +0 -0
- casadi/libcasadi_nlpsol_bonmin.so +0 -0
- casadi/libcasadi_nlpsol_bonmin.so.3.7 +0 -0
- casadi/libcasadi_nlpsol_feasiblesqpmethod.so +0 -0
- casadi/libcasadi_nlpsol_feasiblesqpmethod.so.3.7 +0 -0
- casadi/libcasadi_nlpsol_ipopt.so +0 -0
- casadi/libcasadi_nlpsol_ipopt.so.3.7 +0 -0
- casadi/libcasadi_nlpsol_knitro.so +0 -0
- casadi/libcasadi_nlpsol_knitro.so.3.7 +0 -0
- casadi/libcasadi_nlpsol_madnlp.so +0 -0
- casadi/libcasadi_nlpsol_madnlp.so.3.7 +0 -0
- casadi/libcasadi_nlpsol_qrsqp.so +0 -0
- casadi/libcasadi_nlpsol_qrsqp.so.3.7 +0 -0
- casadi/libcasadi_nlpsol_scpgen.so +0 -0
- casadi/libcasadi_nlpsol_scpgen.so.3.7 +0 -0
- casadi/libcasadi_nlpsol_sleqp.so +0 -0
- casadi/libcasadi_nlpsol_sleqp.so.3.7 +0 -0
- casadi/libcasadi_nlpsol_snopt.so +0 -0
- casadi/libcasadi_nlpsol_snopt.so.3.7 +0 -0
- casadi/libcasadi_nlpsol_sqpmethod.so +0 -0
- casadi/libcasadi_nlpsol_sqpmethod.so.3.7 +0 -0
- casadi/libcasadi_nlpsol_worhp.so +0 -0
- casadi/libcasadi_nlpsol_worhp.so.3.7 +0 -0
- casadi/libcasadi_rootfinder_fast_newton.so +0 -0
- casadi/libcasadi_rootfinder_fast_newton.so.3.7 +0 -0
- casadi/libcasadi_rootfinder_kinsol.so +0 -0
- casadi/libcasadi_rootfinder_kinsol.so.3.7 +0 -0
- casadi/libcasadi_rootfinder_newton.so +0 -0
- casadi/libcasadi_rootfinder_newton.so.3.7 +0 -0
- casadi/libcasadi_rootfinder_nlpsol.so +0 -0
- casadi/libcasadi_rootfinder_nlpsol.so.3.7 +0 -0
- casadi/libcasadi_sundials_common.so +0 -0
- casadi/libcasadi_sundials_common.so.3.7 +0 -0
- casadi/libcasadi_xmlfile_tinyxml.so +0 -0
- casadi/libcasadi_xmlfile_tinyxml.so.3.7 +0 -0
- casadi/libcoinmetis.la +41 -0
- casadi/libcoinmetis.so +0 -0
- casadi/libcoinmetis.so.2 +0 -0
- casadi/libcoinmetis.so.2.0.0 +0 -0
- casadi/libcoinmumps.la +41 -0
- casadi/libcoinmumps.so +0 -0
- casadi/libcoinmumps.so.3 +0 -0
- casadi/libcoinmumps.so.3.0.1 +0 -0
- casadi/libdaqp.so +0 -0
- casadi/libdaqpstat.a +0 -0
- casadi/libgfortran-a8535147.so.5.0.0 +0 -0
- casadi/libgurobi_adaptor.so +0 -0
- casadi/libhighs.so +0 -0
- casadi/libhighs.so.1 +0 -0
- casadi/libhighs.so.1.10.0 +0 -0
- casadi/libindirect.a +0 -0
- casadi/libipopt.la +41 -0
- casadi/libipopt.so +0 -0
- casadi/libipopt.so.3 +0 -0
- casadi/libipopt.so.3.14.11 +0 -0
- casadi/liblinsys.a +0 -0
- casadi/libmatlab_ipc.so +0 -0
- casadi/libosqp.a +0 -0
- casadi/libosqp.so +0 -0
- casadi/libqdldl.a +0 -0
- casadi/libqdldl.so +0 -0
- casadi/libquadmath-e2ac3af2.so.0.0.0 +0 -0
- casadi/libsipopt.la +41 -0
- casadi/libsipopt.so +0 -0
- casadi/libsipopt.so.3 +0 -0
- casadi/libsipopt.so.3.14.11 +0 -0
- casadi/libsleqp.so +0 -0
- casadi/libsleqp.so.1.0.1 +0 -0
- casadi/libspral.a +0 -0
- casadi/libsuperscs.a +0 -0
- casadi/libtrlib.so +0 -0
- casadi/libtrlib.so.0.4 +0 -0
- casadi/libz.a +0 -0
- casadi/libz.so +0 -0
- casadi/libz.so.1 +0 -0
- casadi/libz.so.1.3.1 +0 -0
- casadi/libzip.a +0 -0
- casadi/pkgconfig/blas.pc +11 -0
- casadi/pkgconfig/bonmin.pc +12 -0
- casadi/pkgconfig/casadi.pc +12 -0
- casadi/pkgconfig/cbc.pc +12 -0
- casadi/pkgconfig/cgl.pc +12 -0
- casadi/pkgconfig/clp.pc +12 -0
- casadi/pkgconfig/coinmetis.pc +13 -0
- casadi/pkgconfig/coinmumps.pc +15 -0
- casadi/pkgconfig/coinutils.pc +12 -0
- casadi/pkgconfig/highs.pc +12 -0
- casadi/pkgconfig/ipopt.pc +15 -0
- casadi/pkgconfig/lapack.pc +11 -0
- casadi/pkgconfig/libzip.pc +14 -0
- casadi/pkgconfig/openblas.pc +11 -0
- casadi/pkgconfig/osi-cbc.pc +12 -0
- casadi/pkgconfig/osi-clp.pc +12 -0
- casadi/pkgconfig/osi-unittests.pc +12 -0
- casadi/pkgconfig/osi.pc +12 -0
- casadi/pkgconfig/proxsuite.pc +22 -0
- casadi/pkgconfig/sleqp.pc +10 -0
- casadi/tools/__init__.py +54 -0
- casadi/tools/bounds.py +107 -0
- casadi/tools/graph/__init__.py +35 -0
- casadi/tools/graph/graph.py +747 -0
- casadi/tools/in_out.py +89 -0
- casadi/tools/structure3.py +1441 -0
- casadi-3.7.2.dist-info/METADATA +45 -0
- casadi-3.7.2.dist-info/RECORD +1726 -0
- casadi-3.7.2.dist-info/WHEEL +4 -0
- dummy.txt +1 -0
@@ -0,0 +1,4471 @@
|
|
1
|
+
/* SPDX-License-Identifier: MIT
|
2
|
+
*
|
3
|
+
* Permission is hereby granted, free of charge, to any person
|
4
|
+
* obtaining a copy of this software and associated documentation
|
5
|
+
* files (the "Software"), to deal in the Software without
|
6
|
+
* restriction, including without limitation the rights to use, copy,
|
7
|
+
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
8
|
+
* of the Software, and to permit persons to whom the Software is
|
9
|
+
* furnished to do so, subject to the following conditions:
|
10
|
+
*
|
11
|
+
* The above copyright notice and this permission notice shall be
|
12
|
+
* included in all copies or substantial portions of the Software.
|
13
|
+
*
|
14
|
+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
18
|
+
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
19
|
+
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
20
|
+
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
+
* SOFTWARE.
|
22
|
+
*
|
23
|
+
* Copyright:
|
24
|
+
* 2017-2020 Evan Nemerson <evan@nemerson.com>
|
25
|
+
* 2015-2017 John W. Ratcliff <jratcliffscarab@gmail.com>
|
26
|
+
* 2015 Brandon Rowlett <browlett@nvidia.com>
|
27
|
+
* 2015 Ken Fast <kfast@gdeb.com>
|
28
|
+
*/
|
29
|
+
|
30
|
+
#if !defined(SIMDE_X86_SSE_H)
|
31
|
+
#define SIMDE_X86_SSE_H
|
32
|
+
|
33
|
+
#include "mmx.h"
|
34
|
+
|
35
|
+
#if defined(_WIN32)
|
36
|
+
#include <windows.h>
|
37
|
+
#endif
|
38
|
+
|
39
|
+
HEDLEY_DIAGNOSTIC_PUSH
|
40
|
+
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
|
41
|
+
SIMDE_BEGIN_DECLS_
|
42
|
+
|
43
|
+
typedef union {
|
44
|
+
#if defined(SIMDE_VECTOR_SUBSCRIPT)
|
45
|
+
SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
|
46
|
+
SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
|
47
|
+
SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
|
48
|
+
SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
|
49
|
+
SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
|
50
|
+
SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
|
51
|
+
SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
|
52
|
+
SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
|
53
|
+
#if defined(SIMDE_HAVE_INT128_)
|
54
|
+
SIMDE_ALIGN_TO_16 simde_int128 i128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
|
55
|
+
SIMDE_ALIGN_TO_16 simde_uint128 u128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
|
56
|
+
#endif
|
57
|
+
SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
|
58
|
+
SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
|
59
|
+
SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
|
60
|
+
#else
|
61
|
+
SIMDE_ALIGN_TO_16 int8_t i8[16];
|
62
|
+
SIMDE_ALIGN_TO_16 int16_t i16[8];
|
63
|
+
SIMDE_ALIGN_TO_16 int32_t i32[4];
|
64
|
+
SIMDE_ALIGN_TO_16 int64_t i64[2];
|
65
|
+
SIMDE_ALIGN_TO_16 uint8_t u8[16];
|
66
|
+
SIMDE_ALIGN_TO_16 uint16_t u16[8];
|
67
|
+
SIMDE_ALIGN_TO_16 uint32_t u32[4];
|
68
|
+
SIMDE_ALIGN_TO_16 uint64_t u64[2];
|
69
|
+
#if defined(SIMDE_HAVE_INT128_)
|
70
|
+
SIMDE_ALIGN_TO_16 simde_int128 i128[1];
|
71
|
+
SIMDE_ALIGN_TO_16 simde_uint128 u128[1];
|
72
|
+
#endif
|
73
|
+
SIMDE_ALIGN_TO_16 simde_float32 f32[4];
|
74
|
+
SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)];
|
75
|
+
SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)];
|
76
|
+
#endif
|
77
|
+
|
78
|
+
SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2];
|
79
|
+
SIMDE_ALIGN_TO_16 simde__m64 m64[2];
|
80
|
+
|
81
|
+
#if defined(SIMDE_X86_SSE_NATIVE)
|
82
|
+
SIMDE_ALIGN_TO_16 __m128 n;
|
83
|
+
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
84
|
+
SIMDE_ALIGN_TO_16 int8x16_t neon_i8;
|
85
|
+
SIMDE_ALIGN_TO_16 int16x8_t neon_i16;
|
86
|
+
SIMDE_ALIGN_TO_16 int32x4_t neon_i32;
|
87
|
+
SIMDE_ALIGN_TO_16 int64x2_t neon_i64;
|
88
|
+
SIMDE_ALIGN_TO_16 uint8x16_t neon_u8;
|
89
|
+
SIMDE_ALIGN_TO_16 uint16x8_t neon_u16;
|
90
|
+
SIMDE_ALIGN_TO_16 uint32x4_t neon_u32;
|
91
|
+
SIMDE_ALIGN_TO_16 uint64x2_t neon_u64;
|
92
|
+
SIMDE_ALIGN_TO_16 float32x4_t neon_f32;
|
93
|
+
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
94
|
+
SIMDE_ALIGN_TO_16 float64x2_t neon_f64;
|
95
|
+
#endif
|
96
|
+
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
97
|
+
SIMDE_ALIGN_TO_16 v128_t wasm_v128;
|
98
|
+
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
|
99
|
+
SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8;
|
100
|
+
SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16;
|
101
|
+
SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32;
|
102
|
+
SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8;
|
103
|
+
SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16;
|
104
|
+
SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32;
|
105
|
+
SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32;
|
106
|
+
#if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
|
107
|
+
SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64;
|
108
|
+
SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64;
|
109
|
+
SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64;
|
110
|
+
#endif
|
111
|
+
#endif
|
112
|
+
} simde__m128_private;
|
113
|
+
|
114
|
+
#if defined(SIMDE_X86_SSE_NATIVE)
|
115
|
+
typedef __m128 simde__m128;
|
116
|
+
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
117
|
+
typedef float32x4_t simde__m128;
|
118
|
+
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
119
|
+
typedef v128_t simde__m128;
|
120
|
+
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
|
121
|
+
typedef SIMDE_POWER_ALTIVEC_VECTOR(float) simde__m128;
|
122
|
+
#elif defined(SIMDE_VECTOR_SUBSCRIPT)
|
123
|
+
typedef simde_float32 simde__m128 SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
|
124
|
+
#else
|
125
|
+
typedef simde__m128_private simde__m128;
|
126
|
+
#endif
|
127
|
+
|
128
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
129
|
+
typedef simde__m128 __m128;
|
130
|
+
#endif
|
131
|
+
|
132
|
+
HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128), "simde__m128 size incorrect");
|
133
|
+
HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128_private), "simde__m128_private size incorrect");
|
134
|
+
#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF)
|
135
|
+
HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128) == 16, "simde__m128 is not 16-byte aligned");
|
136
|
+
HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128_private) == 16, "simde__m128_private is not 16-byte aligned");
|
137
|
+
#endif
|
138
|
+
|
139
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
140
|
+
simde__m128
|
141
|
+
simde__m128_from_private(simde__m128_private v) {
|
142
|
+
simde__m128 r;
|
143
|
+
simde_memcpy(&r, &v, sizeof(r));
|
144
|
+
return r;
|
145
|
+
}
|
146
|
+
|
147
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
148
|
+
simde__m128_private
|
149
|
+
simde__m128_to_private(simde__m128 v) {
|
150
|
+
simde__m128_private r;
|
151
|
+
simde_memcpy(&r, &v, sizeof(r));
|
152
|
+
return r;
|
153
|
+
}
|
154
|
+
|
155
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
156
|
+
SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int8x16_t, neon, i8)
|
157
|
+
SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int16x8_t, neon, i16)
|
158
|
+
SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int32x4_t, neon, i32)
|
159
|
+
SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int64x2_t, neon, i64)
|
160
|
+
SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint8x16_t, neon, u8)
|
161
|
+
SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint16x8_t, neon, u16)
|
162
|
+
SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint32x4_t, neon, u32)
|
163
|
+
SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint64x2_t, neon, u64)
|
164
|
+
SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, float32x4_t, neon, f32)
|
165
|
+
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
166
|
+
SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, float64x2_t, neon, f64)
|
167
|
+
#endif
|
168
|
+
#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */
|
169
|
+
|
170
|
+
#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
|
171
|
+
SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed char), altivec, i8)
|
172
|
+
SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed short), altivec, i16)
|
173
|
+
SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed int), altivec, i32)
|
174
|
+
SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), altivec, u8)
|
175
|
+
SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), altivec, u16)
|
176
|
+
SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), altivec, u32)
|
177
|
+
|
178
|
+
#if defined(SIMDE_BUG_GCC_95782)
|
179
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
180
|
+
SIMDE_POWER_ALTIVEC_VECTOR(float)
|
181
|
+
simde__m128_to_altivec_f32(simde__m128 value) {
|
182
|
+
simde__m128_private r_ = simde__m128_to_private(value);
|
183
|
+
return r_.altivec_f32;
|
184
|
+
}
|
185
|
+
|
186
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
187
|
+
simde__m128
|
188
|
+
simde__m128_from_altivec_f32(SIMDE_POWER_ALTIVEC_VECTOR(float) value) {
|
189
|
+
simde__m128_private r_;
|
190
|
+
r_.altivec_f32 = value;
|
191
|
+
return simde__m128_from_private(r_);
|
192
|
+
}
|
193
|
+
#else
|
194
|
+
SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(float), altivec, f32)
|
195
|
+
#endif
|
196
|
+
|
197
|
+
#if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
|
198
|
+
SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed long long), altivec, i64)
|
199
|
+
SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), altivec, u64)
|
200
|
+
#endif
|
201
|
+
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
202
|
+
SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v128_t, wasm, v128);
|
203
|
+
#endif /* defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) */
|
204
|
+
|
205
|
+
enum {
|
206
|
+
#if defined(SIMDE_X86_SSE_NATIVE)
|
207
|
+
SIMDE_MM_ROUND_NEAREST = _MM_ROUND_NEAREST,
|
208
|
+
SIMDE_MM_ROUND_DOWN = _MM_ROUND_DOWN,
|
209
|
+
SIMDE_MM_ROUND_UP = _MM_ROUND_UP,
|
210
|
+
SIMDE_MM_ROUND_TOWARD_ZERO = _MM_ROUND_TOWARD_ZERO
|
211
|
+
#else
|
212
|
+
SIMDE_MM_ROUND_NEAREST = 0x0000,
|
213
|
+
SIMDE_MM_ROUND_DOWN = 0x2000,
|
214
|
+
SIMDE_MM_ROUND_UP = 0x4000,
|
215
|
+
SIMDE_MM_ROUND_TOWARD_ZERO = 0x6000
|
216
|
+
#endif
|
217
|
+
};
|
218
|
+
|
219
|
+
#if defined(_MM_FROUND_TO_NEAREST_INT)
|
220
|
+
# define SIMDE_MM_FROUND_TO_NEAREST_INT _MM_FROUND_TO_NEAREST_INT
|
221
|
+
# define SIMDE_MM_FROUND_TO_NEG_INF _MM_FROUND_TO_NEG_INF
|
222
|
+
# define SIMDE_MM_FROUND_TO_POS_INF _MM_FROUND_TO_POS_INF
|
223
|
+
# define SIMDE_MM_FROUND_TO_ZERO _MM_FROUND_TO_ZERO
|
224
|
+
# define SIMDE_MM_FROUND_CUR_DIRECTION _MM_FROUND_CUR_DIRECTION
|
225
|
+
|
226
|
+
# define SIMDE_MM_FROUND_RAISE_EXC _MM_FROUND_RAISE_EXC
|
227
|
+
# define SIMDE_MM_FROUND_NO_EXC _MM_FROUND_NO_EXC
|
228
|
+
#else
|
229
|
+
# define SIMDE_MM_FROUND_TO_NEAREST_INT 0x00
|
230
|
+
# define SIMDE_MM_FROUND_TO_NEG_INF 0x01
|
231
|
+
# define SIMDE_MM_FROUND_TO_POS_INF 0x02
|
232
|
+
# define SIMDE_MM_FROUND_TO_ZERO 0x03
|
233
|
+
# define SIMDE_MM_FROUND_CUR_DIRECTION 0x04
|
234
|
+
|
235
|
+
# define SIMDE_MM_FROUND_RAISE_EXC 0x00
|
236
|
+
# define SIMDE_MM_FROUND_NO_EXC 0x08
|
237
|
+
#endif
|
238
|
+
|
239
|
+
#define SIMDE_MM_FROUND_NINT \
|
240
|
+
(SIMDE_MM_FROUND_TO_NEAREST_INT | SIMDE_MM_FROUND_RAISE_EXC)
|
241
|
+
#define SIMDE_MM_FROUND_FLOOR \
|
242
|
+
(SIMDE_MM_FROUND_TO_NEG_INF | SIMDE_MM_FROUND_RAISE_EXC)
|
243
|
+
#define SIMDE_MM_FROUND_CEIL \
|
244
|
+
(SIMDE_MM_FROUND_TO_POS_INF | SIMDE_MM_FROUND_RAISE_EXC)
|
245
|
+
#define SIMDE_MM_FROUND_TRUNC \
|
246
|
+
(SIMDE_MM_FROUND_TO_ZERO | SIMDE_MM_FROUND_RAISE_EXC)
|
247
|
+
#define SIMDE_MM_FROUND_RINT \
|
248
|
+
(SIMDE_MM_FROUND_CUR_DIRECTION | SIMDE_MM_FROUND_RAISE_EXC)
|
249
|
+
#define SIMDE_MM_FROUND_NEARBYINT \
|
250
|
+
(SIMDE_MM_FROUND_CUR_DIRECTION | SIMDE_MM_FROUND_NO_EXC)
|
251
|
+
|
252
|
+
#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) && !defined(_MM_FROUND_TO_NEAREST_INT)
|
253
|
+
# define _MM_FROUND_TO_NEAREST_INT SIMDE_MM_FROUND_TO_NEAREST_INT
|
254
|
+
# define _MM_FROUND_TO_NEG_INF SIMDE_MM_FROUND_TO_NEG_INF
|
255
|
+
# define _MM_FROUND_TO_POS_INF SIMDE_MM_FROUND_TO_POS_INF
|
256
|
+
# define _MM_FROUND_TO_ZERO SIMDE_MM_FROUND_TO_ZERO
|
257
|
+
# define _MM_FROUND_CUR_DIRECTION SIMDE_MM_FROUND_CUR_DIRECTION
|
258
|
+
# define _MM_FROUND_RAISE_EXC SIMDE_MM_FROUND_RAISE_EXC
|
259
|
+
# define _MM_FROUND_NINT SIMDE_MM_FROUND_NINT
|
260
|
+
# define _MM_FROUND_FLOOR SIMDE_MM_FROUND_FLOOR
|
261
|
+
# define _MM_FROUND_CEIL SIMDE_MM_FROUND_CEIL
|
262
|
+
# define _MM_FROUND_TRUNC SIMDE_MM_FROUND_TRUNC
|
263
|
+
# define _MM_FROUND_RINT SIMDE_MM_FROUND_RINT
|
264
|
+
# define _MM_FROUND_NEARBYINT SIMDE_MM_FROUND_NEARBYINT
|
265
|
+
#endif
|
266
|
+
|
267
|
+
#if defined(_MM_EXCEPT_INVALID)
|
268
|
+
# define SIMDE_MM_EXCEPT_INVALID _MM_EXCEPT_INVALID
|
269
|
+
#else
|
270
|
+
# define SIMDE_MM_EXCEPT_INVALID (0x0001)
|
271
|
+
#endif
|
272
|
+
#if defined(_MM_EXCEPT_DENORM)
|
273
|
+
# define SIMDE_MM_EXCEPT_DENORM _MM_EXCEPT_DENORM
|
274
|
+
#else
|
275
|
+
# define SIMDE_MM_EXCEPT_DENORM (0x0002)
|
276
|
+
#endif
|
277
|
+
#if defined(_MM_EXCEPT_DIV_ZERO)
|
278
|
+
# define SIMDE_MM_EXCEPT_DIV_ZERO _MM_EXCEPT_DIV_ZERO
|
279
|
+
#else
|
280
|
+
# define SIMDE_MM_EXCEPT_DIV_ZERO (0x0004)
|
281
|
+
#endif
|
282
|
+
#if defined(_MM_EXCEPT_OVERFLOW)
|
283
|
+
# define SIMDE_MM_EXCEPT_OVERFLOW _MM_EXCEPT_OVERFLOW
|
284
|
+
#else
|
285
|
+
# define SIMDE_MM_EXCEPT_OVERFLOW (0x0008)
|
286
|
+
#endif
|
287
|
+
#if defined(_MM_EXCEPT_UNDERFLOW)
|
288
|
+
# define SIMDE_MM_EXCEPT_UNDERFLOW _MM_EXCEPT_UNDERFLOW
|
289
|
+
#else
|
290
|
+
# define SIMDE_MM_EXCEPT_UNDERFLOW (0x0010)
|
291
|
+
#endif
|
292
|
+
#if defined(_MM_EXCEPT_INEXACT)
|
293
|
+
# define SIMDE_MM_EXCEPT_INEXACT _MM_EXCEPT_INEXACT
|
294
|
+
#else
|
295
|
+
# define SIMDE_MM_EXCEPT_INEXACT (0x0020)
|
296
|
+
#endif
|
297
|
+
#if defined(_MM_EXCEPT_MASK)
|
298
|
+
# define SIMDE_MM_EXCEPT_MASK _MM_EXCEPT_MASK
|
299
|
+
#else
|
300
|
+
# define SIMDE_MM_EXCEPT_MASK \
|
301
|
+
(SIMDE_MM_EXCEPT_INVALID | SIMDE_MM_EXCEPT_DENORM | \
|
302
|
+
SIMDE_MM_EXCEPT_DIV_ZERO | SIMDE_MM_EXCEPT_OVERFLOW | \
|
303
|
+
SIMDE_MM_EXCEPT_UNDERFLOW | SIMDE_MM_EXCEPT_INEXACT)
|
304
|
+
#endif
|
305
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
306
|
+
#define _MM_EXCEPT_INVALID SIMDE_MM_EXCEPT_INVALID
|
307
|
+
#define _MM_EXCEPT_DENORM SIMDE_MM_EXCEPT_DENORM
|
308
|
+
#define _MM_EXCEPT_DIV_ZERO SIMDE_MM_EXCEPT_DIV_ZERO
|
309
|
+
#define _MM_EXCEPT_OVERFLOW SIMDE_MM_EXCEPT_OVERFLOW
|
310
|
+
#define _MM_EXCEPT_UNDERFLOW SIMDE_MM_EXCEPT_UNDERFLOW
|
311
|
+
#define _MM_EXCEPT_INEXACT SIMDE_MM_EXCEPT_INEXACT
|
312
|
+
#define _MM_EXCEPT_MASK SIMDE_MM_EXCEPT_MASK
|
313
|
+
#endif
|
314
|
+
|
315
|
+
#if defined(_MM_MASK_INVALID)
|
316
|
+
# define SIMDE_MM_MASK_INVALID _MM_MASK_INVALID
|
317
|
+
#else
|
318
|
+
# define SIMDE_MM_MASK_INVALID (0x0080)
|
319
|
+
#endif
|
320
|
+
#if defined(_MM_MASK_DENORM)
|
321
|
+
# define SIMDE_MM_MASK_DENORM _MM_MASK_DENORM
|
322
|
+
#else
|
323
|
+
# define SIMDE_MM_MASK_DENORM (0x0100)
|
324
|
+
#endif
|
325
|
+
#if defined(_MM_MASK_DIV_ZERO)
|
326
|
+
# define SIMDE_MM_MASK_DIV_ZERO _MM_MASK_DIV_ZERO
|
327
|
+
#else
|
328
|
+
# define SIMDE_MM_MASK_DIV_ZERO (0x0200)
|
329
|
+
#endif
|
330
|
+
#if defined(_MM_MASK_OVERFLOW)
|
331
|
+
# define SIMDE_MM_MASK_OVERFLOW _MM_MASK_OVERFLOW
|
332
|
+
#else
|
333
|
+
# define SIMDE_MM_MASK_OVERFLOW (0x0400)
|
334
|
+
#endif
|
335
|
+
#if defined(_MM_MASK_UNDERFLOW)
|
336
|
+
# define SIMDE_MM_MASK_UNDERFLOW _MM_MASK_UNDERFLOW
|
337
|
+
#else
|
338
|
+
# define SIMDE_MM_MASK_UNDERFLOW (0x0800)
|
339
|
+
#endif
|
340
|
+
#if defined(_MM_MASK_INEXACT)
|
341
|
+
# define SIMDE_MM_MASK_INEXACT _MM_MASK_INEXACT
|
342
|
+
#else
|
343
|
+
# define SIMDE_MM_MASK_INEXACT (0x1000)
|
344
|
+
#endif
|
345
|
+
#if defined(_MM_MASK_MASK)
|
346
|
+
# define SIMDE_MM_MASK_MASK _MM_MASK_MASK
|
347
|
+
#else
|
348
|
+
# define SIMDE_MM_MASK_MASK \
|
349
|
+
(SIMDE_MM_MASK_INVALID | SIMDE_MM_MASK_DENORM | \
|
350
|
+
SIMDE_MM_MASK_DIV_ZERO | SIMDE_MM_MASK_OVERFLOW | \
|
351
|
+
SIMDE_MM_MASK_UNDERFLOW | SIMDE_MM_MASK_INEXACT)
|
352
|
+
#endif
|
353
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
354
|
+
#define _MM_MASK_INVALID SIMDE_MM_MASK_INVALID
|
355
|
+
#define _MM_MASK_DENORM SIMDE_MM_MASK_DENORM
|
356
|
+
#define _MM_MASK_DIV_ZERO SIMDE_MM_MASK_DIV_ZERO
|
357
|
+
#define _MM_MASK_OVERFLOW SIMDE_MM_MASK_OVERFLOW
|
358
|
+
#define _MM_MASK_UNDERFLOW SIMDE_MM_MASK_UNDERFLOW
|
359
|
+
#define _MM_MASK_INEXACT SIMDE_MM_MASK_INEXACT
|
360
|
+
#define _MM_MASK_MASK SIMDE_MM_MASK_MASK
|
361
|
+
#endif
|
362
|
+
|
363
|
+
#if defined(_MM_FLUSH_ZERO_MASK)
|
364
|
+
# define SIMDE_MM_FLUSH_ZERO_MASK _MM_FLUSH_ZERO_MASK
|
365
|
+
#else
|
366
|
+
# define SIMDE_MM_FLUSH_ZERO_MASK (0x8000)
|
367
|
+
#endif
|
368
|
+
#if defined(_MM_FLUSH_ZERO_ON)
|
369
|
+
# define SIMDE_MM_FLUSH_ZERO_ON _MM_FLUSH_ZERO_ON
|
370
|
+
#else
|
371
|
+
# define SIMDE_MM_FLUSH_ZERO_ON (0x8000)
|
372
|
+
#endif
|
373
|
+
#if defined(_MM_FLUSH_ZERO_OFF)
|
374
|
+
# define SIMDE_MM_FLUSH_ZERO_OFF _MM_FLUSH_ZERO_OFF
|
375
|
+
#else
|
376
|
+
# define SIMDE_MM_FLUSH_ZERO_OFF (0x0000)
|
377
|
+
#endif
|
378
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
379
|
+
#define _MM_FLUSH_ZERO_MASK SIMDE_MM_FLUSH_ZERO_MASK
|
380
|
+
#define _MM_FLUSH_ZERO_ON SIMDE_MM_FLUSH_ZERO_ON
|
381
|
+
#define _MM_FLUSH_ZERO_OFF SIMDE_MM_FLUSH_ZERO_OFF
|
382
|
+
#endif
|
383
|
+
|
384
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
385
|
+
unsigned int
|
386
|
+
SIMDE_MM_GET_ROUNDING_MODE(void) {
|
387
|
+
#if defined(SIMDE_X86_SSE_NATIVE)
|
388
|
+
return _MM_GET_ROUNDING_MODE();
|
389
|
+
#elif defined(SIMDE_HAVE_FENV_H)
|
390
|
+
unsigned int vfe_mode;
|
391
|
+
|
392
|
+
switch (fegetround()) {
|
393
|
+
#if defined(FE_TONEAREST)
|
394
|
+
case FE_TONEAREST:
|
395
|
+
vfe_mode = SIMDE_MM_ROUND_NEAREST;
|
396
|
+
break;
|
397
|
+
#endif
|
398
|
+
|
399
|
+
#if defined(FE_TOWARDZERO)
|
400
|
+
case FE_TOWARDZERO:
|
401
|
+
vfe_mode = SIMDE_MM_ROUND_DOWN;
|
402
|
+
break;
|
403
|
+
#endif
|
404
|
+
|
405
|
+
#if defined(FE_UPWARD)
|
406
|
+
case FE_UPWARD:
|
407
|
+
vfe_mode = SIMDE_MM_ROUND_UP;
|
408
|
+
break;
|
409
|
+
#endif
|
410
|
+
|
411
|
+
#if defined(FE_DOWNWARD)
|
412
|
+
case FE_DOWNWARD:
|
413
|
+
vfe_mode = SIMDE_MM_ROUND_TOWARD_ZERO;
|
414
|
+
break;
|
415
|
+
#endif
|
416
|
+
|
417
|
+
default:
|
418
|
+
vfe_mode = SIMDE_MM_ROUND_NEAREST;
|
419
|
+
break;
|
420
|
+
}
|
421
|
+
|
422
|
+
return vfe_mode;
|
423
|
+
#else
|
424
|
+
return SIMDE_MM_ROUND_NEAREST;
|
425
|
+
#endif
|
426
|
+
}
|
427
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
428
|
+
#define _MM_GET_ROUNDING_MODE() SIMDE_MM_GET_ROUNDING_MODE()
|
429
|
+
#endif
|
430
|
+
|
431
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
432
|
+
void
|
433
|
+
SIMDE_MM_SET_ROUNDING_MODE(unsigned int a) {
|
434
|
+
#if defined(SIMDE_X86_SSE_NATIVE)
|
435
|
+
_MM_SET_ROUNDING_MODE(a);
|
436
|
+
#elif defined(SIMDE_HAVE_FENV_H)
|
437
|
+
int fe_mode = FE_TONEAREST;
|
438
|
+
|
439
|
+
switch (a) {
|
440
|
+
#if defined(FE_TONEAREST)
|
441
|
+
case SIMDE_MM_ROUND_NEAREST:
|
442
|
+
fe_mode = FE_TONEAREST;
|
443
|
+
break;
|
444
|
+
#endif
|
445
|
+
|
446
|
+
#if defined(FE_TOWARDZERO)
|
447
|
+
case SIMDE_MM_ROUND_TOWARD_ZERO:
|
448
|
+
fe_mode = FE_TOWARDZERO;
|
449
|
+
break;
|
450
|
+
#endif
|
451
|
+
|
452
|
+
#if defined(FE_DOWNWARD)
|
453
|
+
case SIMDE_MM_ROUND_DOWN:
|
454
|
+
fe_mode = FE_DOWNWARD;
|
455
|
+
break;
|
456
|
+
#endif
|
457
|
+
|
458
|
+
#if defined(FE_UPWARD)
|
459
|
+
case SIMDE_MM_ROUND_UP:
|
460
|
+
fe_mode = FE_UPWARD;
|
461
|
+
break;
|
462
|
+
#endif
|
463
|
+
|
464
|
+
default:
|
465
|
+
return;
|
466
|
+
}
|
467
|
+
|
468
|
+
fesetround(fe_mode);
|
469
|
+
#else
|
470
|
+
(void) a;
|
471
|
+
#endif
|
472
|
+
}
|
473
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
474
|
+
#define _MM_SET_ROUNDING_MODE(a) SIMDE_MM_SET_ROUNDING_MODE(a)
|
475
|
+
#endif
|
476
|
+
|
477
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
478
|
+
uint32_t
|
479
|
+
SIMDE_MM_GET_FLUSH_ZERO_MODE (void) {
|
480
|
+
#if defined(SIMDE_X86_SSE_NATIVE)
|
481
|
+
return _mm_getcsr() & _MM_FLUSH_ZERO_MASK;
|
482
|
+
#else
|
483
|
+
return SIMDE_MM_FLUSH_ZERO_OFF;
|
484
|
+
#endif
|
485
|
+
}
|
486
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
487
|
+
#define _MM_SET_FLUSH_ZERO_MODE(a) SIMDE_MM_SET_FLUSH_ZERO_MODE(a)
|
488
|
+
#endif
|
489
|
+
|
490
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
491
|
+
void
|
492
|
+
SIMDE_MM_SET_FLUSH_ZERO_MODE (uint32_t a) {
|
493
|
+
#if defined(SIMDE_X86_SSE_NATIVE)
|
494
|
+
_MM_SET_FLUSH_ZERO_MODE(a);
|
495
|
+
#else
|
496
|
+
(void) a;
|
497
|
+
#endif
|
498
|
+
}
|
499
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
500
|
+
#define _MM_SET_FLUSH_ZERO_MODE(a) SIMDE_MM_SET_FLUSH_ZERO_MODE(a)
|
501
|
+
#endif
|
502
|
+
|
503
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
504
|
+
uint32_t
|
505
|
+
simde_mm_getcsr (void) {
|
506
|
+
#if defined(SIMDE_X86_SSE_NATIVE)
|
507
|
+
return _mm_getcsr();
|
508
|
+
#else
|
509
|
+
return SIMDE_MM_GET_ROUNDING_MODE();
|
510
|
+
#endif
|
511
|
+
}
|
512
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
513
|
+
#define _mm_getcsr() simde_mm_getcsr()
|
514
|
+
#endif
|
515
|
+
|
516
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
517
|
+
void
|
518
|
+
simde_mm_setcsr (uint32_t a) {
|
519
|
+
#if defined(SIMDE_X86_SSE_NATIVE)
|
520
|
+
_mm_setcsr(a);
|
521
|
+
#else
|
522
|
+
SIMDE_MM_SET_ROUNDING_MODE(HEDLEY_STATIC_CAST(unsigned int, a));
|
523
|
+
#endif
|
524
|
+
}
|
525
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
526
|
+
#define _mm_setcsr(a) simde_mm_setcsr(a)
|
527
|
+
#endif
|
528
|
+
|
529
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
530
|
+
simde__m128
|
531
|
+
simde_x_mm_round_ps (simde__m128 a, int rounding, int lax_rounding)
|
532
|
+
SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15)
|
533
|
+
SIMDE_REQUIRE_CONSTANT_RANGE(lax_rounding, 0, 1) {
|
534
|
+
simde__m128_private
|
535
|
+
r_,
|
536
|
+
a_ = simde__m128_to_private(a);
|
537
|
+
|
538
|
+
(void) lax_rounding;
|
539
|
+
|
540
|
+
/* For architectures which lack a current direction SIMD instruction.
|
541
|
+
*
|
542
|
+
* Note that NEON actually has a current rounding mode instruction,
|
543
|
+
* but in ARMv8+ the rounding mode is ignored and nearest is always
|
544
|
+
* used, so we treat ARMv7 as having a rounding mode but ARMv8 as
|
545
|
+
* not. */
|
546
|
+
#if \
|
547
|
+
defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || \
|
548
|
+
defined(SIMDE_ARM_NEON_A32V8)
|
549
|
+
if ((rounding & 7) == SIMDE_MM_FROUND_CUR_DIRECTION)
|
550
|
+
rounding = HEDLEY_STATIC_CAST(int, SIMDE_MM_GET_ROUNDING_MODE()) << 13;
|
551
|
+
#endif
|
552
|
+
|
553
|
+
switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) {
|
554
|
+
case SIMDE_MM_FROUND_CUR_DIRECTION:
|
555
|
+
#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
|
556
|
+
r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_round(a_.altivec_f32));
|
557
|
+
#elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95399)
|
558
|
+
r_.neon_f32 = vrndiq_f32(a_.neon_f32);
|
559
|
+
#elif defined(simde_math_nearbyintf)
|
560
|
+
SIMDE_VECTORIZE
|
561
|
+
for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
|
562
|
+
r_.f32[i] = simde_math_nearbyintf(a_.f32[i]);
|
563
|
+
}
|
564
|
+
#else
|
565
|
+
HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd());
|
566
|
+
#endif
|
567
|
+
break;
|
568
|
+
|
569
|
+
case SIMDE_MM_FROUND_TO_NEAREST_INT:
|
570
|
+
#if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
|
571
|
+
r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_rint(a_.altivec_f32));
|
572
|
+
#elif defined(SIMDE_ARM_NEON_A32V8_NATIVE)
|
573
|
+
r_.neon_f32 = vrndnq_f32(a_.neon_f32);
|
574
|
+
#elif defined(simde_math_roundevenf)
|
575
|
+
SIMDE_VECTORIZE
|
576
|
+
for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
|
577
|
+
r_.f32[i] = simde_math_roundevenf(a_.f32[i]);
|
578
|
+
}
|
579
|
+
#else
|
580
|
+
HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd());
|
581
|
+
#endif
|
582
|
+
break;
|
583
|
+
|
584
|
+
case SIMDE_MM_FROUND_TO_NEG_INF:
|
585
|
+
#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
|
586
|
+
r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_floor(a_.altivec_f32));
|
587
|
+
#elif defined(SIMDE_ARM_NEON_A32V8_NATIVE)
|
588
|
+
r_.neon_f32 = vrndmq_f32(a_.neon_f32);
|
589
|
+
#elif defined(simde_math_floorf)
|
590
|
+
SIMDE_VECTORIZE
|
591
|
+
for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
|
592
|
+
r_.f32[i] = simde_math_floorf(a_.f32[i]);
|
593
|
+
}
|
594
|
+
#else
|
595
|
+
HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd());
|
596
|
+
#endif
|
597
|
+
break;
|
598
|
+
|
599
|
+
case SIMDE_MM_FROUND_TO_POS_INF:
|
600
|
+
#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
|
601
|
+
r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_ceil(a_.altivec_f32));
|
602
|
+
#elif defined(SIMDE_ARM_NEON_A32V8_NATIVE)
|
603
|
+
r_.neon_f32 = vrndpq_f32(a_.neon_f32);
|
604
|
+
#elif defined(simde_math_ceilf)
|
605
|
+
SIMDE_VECTORIZE
|
606
|
+
for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
|
607
|
+
r_.f32[i] = simde_math_ceilf(a_.f32[i]);
|
608
|
+
}
|
609
|
+
#else
|
610
|
+
HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd());
|
611
|
+
#endif
|
612
|
+
break;
|
613
|
+
|
614
|
+
case SIMDE_MM_FROUND_TO_ZERO:
|
615
|
+
#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
|
616
|
+
r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_trunc(a_.altivec_f32));
|
617
|
+
#elif defined(SIMDE_ARM_NEON_A32V8_NATIVE)
|
618
|
+
r_.neon_f32 = vrndq_f32(a_.neon_f32);
|
619
|
+
#elif defined(simde_math_truncf)
|
620
|
+
SIMDE_VECTORIZE
|
621
|
+
for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
|
622
|
+
r_.f32[i] = simde_math_truncf(a_.f32[i]);
|
623
|
+
}
|
624
|
+
#else
|
625
|
+
HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd());
|
626
|
+
#endif
|
627
|
+
break;
|
628
|
+
|
629
|
+
default:
|
630
|
+
HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd());
|
631
|
+
}
|
632
|
+
|
633
|
+
return simde__m128_from_private(r_);
|
634
|
+
}
|
635
|
+
#if defined(SIMDE_X86_SSE4_1_NATIVE)
|
636
|
+
#define simde_mm_round_ps(a, rounding) _mm_round_ps((a), (rounding))
|
637
|
+
#else
|
638
|
+
#define simde_mm_round_ps(a, rounding) simde_x_mm_round_ps((a), (rounding), 0)
|
639
|
+
#endif
|
640
|
+
#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES)
|
641
|
+
#define _mm_round_ps(a, rounding) simde_mm_round_ps((a), (rounding))
|
642
|
+
#endif
|
643
|
+
|
644
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
645
|
+
simde__m128
|
646
|
+
simde_mm_set_ps (simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) {
|
647
|
+
#if defined(SIMDE_X86_SSE_NATIVE)
|
648
|
+
return _mm_set_ps(e3, e2, e1, e0);
|
649
|
+
#else
|
650
|
+
simde__m128_private r_;
|
651
|
+
|
652
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
653
|
+
SIMDE_ALIGN_TO_16 simde_float32 data[4] = { e0, e1, e2, e3 };
|
654
|
+
r_.neon_f32 = vld1q_f32(data);
|
655
|
+
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
656
|
+
r_.wasm_v128 = wasm_f32x4_make(e0, e1, e2, e3);
|
657
|
+
#else
|
658
|
+
r_.f32[0] = e0;
|
659
|
+
r_.f32[1] = e1;
|
660
|
+
r_.f32[2] = e2;
|
661
|
+
r_.f32[3] = e3;
|
662
|
+
#endif
|
663
|
+
|
664
|
+
return simde__m128_from_private(r_);
|
665
|
+
#endif
|
666
|
+
}
|
667
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
668
|
+
# define _mm_set_ps(e3, e2, e1, e0) simde_mm_set_ps(e3, e2, e1, e0)
|
669
|
+
#endif
|
670
|
+
|
671
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
672
|
+
simde__m128
|
673
|
+
simde_mm_set_ps1 (simde_float32 a) {
|
674
|
+
#if defined(SIMDE_X86_SSE_NATIVE)
|
675
|
+
return _mm_set_ps1(a);
|
676
|
+
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
677
|
+
return vdupq_n_f32(a);
|
678
|
+
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
|
679
|
+
(void) a;
|
680
|
+
return vec_splats(a);
|
681
|
+
#else
|
682
|
+
return simde_mm_set_ps(a, a, a, a);
|
683
|
+
#endif
|
684
|
+
}
|
685
|
+
#define simde_mm_set1_ps(a) simde_mm_set_ps1(a)
|
686
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
687
|
+
# define _mm_set_ps1(a) simde_mm_set_ps1(a)
|
688
|
+
# define _mm_set1_ps(a) simde_mm_set1_ps(a)
|
689
|
+
#endif
|
690
|
+
|
691
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
692
|
+
simde__m128
|
693
|
+
simde_mm_move_ss (simde__m128 a, simde__m128 b) {
|
694
|
+
#if defined(SIMDE_X86_SSE_NATIVE)
|
695
|
+
return _mm_move_ss(a, b);
|
696
|
+
#else
|
697
|
+
simde__m128_private
|
698
|
+
r_,
|
699
|
+
a_ = simde__m128_to_private(a),
|
700
|
+
b_ = simde__m128_to_private(b);
|
701
|
+
|
702
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
703
|
+
r_.neon_f32 = vsetq_lane_f32(vgetq_lane_f32(b_.neon_f32, 0), a_.neon_f32, 0);
|
704
|
+
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
|
705
|
+
SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) m = {
|
706
|
+
16, 17, 18, 19,
|
707
|
+
4, 5, 6, 7,
|
708
|
+
8, 9, 10, 11,
|
709
|
+
12, 13, 14, 15
|
710
|
+
};
|
711
|
+
r_.altivec_f32 = vec_perm(a_.altivec_f32, b_.altivec_f32, m);
|
712
|
+
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
713
|
+
r_.wasm_v128 = wasm_v8x16_shuffle(b_.wasm_v128, a_.wasm_v128, 0, 1, 2, 3, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
|
714
|
+
#elif defined(SIMDE_SHUFFLE_VECTOR_)
|
715
|
+
r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 4, 1, 2, 3);
|
716
|
+
#else
|
717
|
+
r_.f32[0] = b_.f32[0];
|
718
|
+
r_.f32[1] = a_.f32[1];
|
719
|
+
r_.f32[2] = a_.f32[2];
|
720
|
+
r_.f32[3] = a_.f32[3];
|
721
|
+
#endif
|
722
|
+
|
723
|
+
return simde__m128_from_private(r_);
|
724
|
+
#endif
|
725
|
+
}
|
726
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
727
|
+
# define _mm_move_ss(a, b) simde_mm_move_ss((a), (b))
|
728
|
+
#endif
|
729
|
+
|
730
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
731
|
+
simde__m128
|
732
|
+
simde_mm_add_ps (simde__m128 a, simde__m128 b) {
|
733
|
+
#if defined(SIMDE_X86_SSE_NATIVE)
|
734
|
+
return _mm_add_ps(a, b);
|
735
|
+
#else
|
736
|
+
simde__m128_private
|
737
|
+
r_,
|
738
|
+
a_ = simde__m128_to_private(a),
|
739
|
+
b_ = simde__m128_to_private(b);
|
740
|
+
|
741
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
742
|
+
r_.neon_f32 = vaddq_f32(a_.neon_f32, b_.neon_f32);
|
743
|
+
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
744
|
+
r_.wasm_v128 = wasm_f32x4_add(a_.wasm_v128, b_.wasm_v128);
|
745
|
+
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
|
746
|
+
r_.altivec_f32 = vec_add(a_.altivec_f32, b_.altivec_f32);
|
747
|
+
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
748
|
+
r_.f32 = a_.f32 + b_.f32;
|
749
|
+
#else
|
750
|
+
SIMDE_VECTORIZE
|
751
|
+
for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
|
752
|
+
r_.f32[i] = a_.f32[i] + b_.f32[i];
|
753
|
+
}
|
754
|
+
#endif
|
755
|
+
|
756
|
+
return simde__m128_from_private(r_);
|
757
|
+
#endif
|
758
|
+
}
|
759
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
760
|
+
# define _mm_add_ps(a, b) simde_mm_add_ps((a), (b))
|
761
|
+
#endif
|
762
|
+
|
763
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
764
|
+
simde__m128
|
765
|
+
simde_mm_add_ss (simde__m128 a, simde__m128 b) {
|
766
|
+
#if defined(SIMDE_X86_SSE_NATIVE)
|
767
|
+
return _mm_add_ss(a, b);
|
768
|
+
#elif (SIMDE_NATURAL_VECTOR_SIZE > 0)
|
769
|
+
return simde_mm_move_ss(a, simde_mm_add_ps(a, b));
|
770
|
+
#else
|
771
|
+
simde__m128_private
|
772
|
+
r_,
|
773
|
+
a_ = simde__m128_to_private(a),
|
774
|
+
b_ = simde__m128_to_private(b);
|
775
|
+
|
776
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
777
|
+
float32_t b0 = vgetq_lane_f32(b_.neon_f32, 0);
|
778
|
+
float32x4_t value = vsetq_lane_f32(b0, vdupq_n_f32(0), 0);
|
779
|
+
// the upper values in the result must be the remnants of <a>.
|
780
|
+
r_.neon_f32 = vaddq_f32(a_.neon_f32, value);
|
781
|
+
#else
|
782
|
+
r_.f32[0] = a_.f32[0] + b_.f32[0];
|
783
|
+
r_.f32[1] = a_.f32[1];
|
784
|
+
r_.f32[2] = a_.f32[2];
|
785
|
+
r_.f32[3] = a_.f32[3];
|
786
|
+
#endif
|
787
|
+
|
788
|
+
return simde__m128_from_private(r_);
|
789
|
+
#endif
|
790
|
+
}
|
791
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
792
|
+
# define _mm_add_ss(a, b) simde_mm_add_ss((a), (b))
|
793
|
+
#endif
|
794
|
+
|
795
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
796
|
+
simde__m128
|
797
|
+
simde_mm_and_ps (simde__m128 a, simde__m128 b) {
|
798
|
+
#if defined(SIMDE_X86_SSE_NATIVE)
|
799
|
+
return _mm_and_ps(a, b);
|
800
|
+
#else
|
801
|
+
simde__m128_private
|
802
|
+
r_,
|
803
|
+
a_ = simde__m128_to_private(a),
|
804
|
+
b_ = simde__m128_to_private(b);
|
805
|
+
|
806
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
807
|
+
r_.neon_i32 = vandq_s32(a_.neon_i32, b_.neon_i32);
|
808
|
+
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
809
|
+
r_.wasm_v128 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128);
|
810
|
+
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
811
|
+
r_.i32 = a_.i32 & b_.i32;
|
812
|
+
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
|
813
|
+
r_.altivec_f32 = vec_and(a_.altivec_f32, b_.altivec_f32);
|
814
|
+
#else
|
815
|
+
SIMDE_VECTORIZE
|
816
|
+
for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
|
817
|
+
r_.i32[i] = a_.i32[i] & b_.i32[i];
|
818
|
+
}
|
819
|
+
#endif
|
820
|
+
|
821
|
+
return simde__m128_from_private(r_);
|
822
|
+
#endif
|
823
|
+
}
|
824
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
825
|
+
# define _mm_and_ps(a, b) simde_mm_and_ps((a), (b))
|
826
|
+
#endif
|
827
|
+
|
828
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
829
|
+
simde__m128
|
830
|
+
simde_mm_andnot_ps (simde__m128 a, simde__m128 b) {
|
831
|
+
#if defined(SIMDE_X86_SSE_NATIVE)
|
832
|
+
return _mm_andnot_ps(a, b);
|
833
|
+
#else
|
834
|
+
simde__m128_private
|
835
|
+
r_,
|
836
|
+
a_ = simde__m128_to_private(a),
|
837
|
+
b_ = simde__m128_to_private(b);
|
838
|
+
|
839
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
840
|
+
r_.neon_i32 = vbicq_s32(b_.neon_i32, a_.neon_i32);
|
841
|
+
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
842
|
+
r_.wasm_v128 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128);
|
843
|
+
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
|
844
|
+
r_.altivec_f32 = vec_andc(b_.altivec_f32, a_.altivec_f32);
|
845
|
+
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
846
|
+
r_.i32 = ~a_.i32 & b_.i32;
|
847
|
+
#else
|
848
|
+
SIMDE_VECTORIZE
|
849
|
+
for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
|
850
|
+
r_.i32[i] = ~(a_.i32[i]) & b_.i32[i];
|
851
|
+
}
|
852
|
+
#endif
|
853
|
+
|
854
|
+
return simde__m128_from_private(r_);
|
855
|
+
#endif
|
856
|
+
}
|
857
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
858
|
+
# define _mm_andnot_ps(a, b) simde_mm_andnot_ps((a), (b))
|
859
|
+
#endif
|
860
|
+
|
861
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
862
|
+
simde__m128
|
863
|
+
simde_mm_xor_ps (simde__m128 a, simde__m128 b) {
|
864
|
+
#if defined(SIMDE_X86_SSE_NATIVE)
|
865
|
+
return _mm_xor_ps(a, b);
|
866
|
+
#else
|
867
|
+
simde__m128_private
|
868
|
+
r_,
|
869
|
+
a_ = simde__m128_to_private(a),
|
870
|
+
b_ = simde__m128_to_private(b);
|
871
|
+
|
872
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
873
|
+
r_.neon_i32 = veorq_s32(a_.neon_i32, b_.neon_i32);
|
874
|
+
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
875
|
+
r_.wasm_v128 = wasm_v128_xor(a_.wasm_v128, b_.wasm_v128);
|
876
|
+
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
|
877
|
+
r_.altivec_i32 = vec_xor(a_.altivec_i32, b_.altivec_i32);
|
878
|
+
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
879
|
+
r_.i32f = a_.i32f ^ b_.i32f;
|
880
|
+
#else
|
881
|
+
SIMDE_VECTORIZE
|
882
|
+
for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) {
|
883
|
+
r_.u32[i] = a_.u32[i] ^ b_.u32[i];
|
884
|
+
}
|
885
|
+
#endif
|
886
|
+
|
887
|
+
return simde__m128_from_private(r_);
|
888
|
+
#endif
|
889
|
+
}
|
890
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
891
|
+
# define _mm_xor_ps(a, b) simde_mm_xor_ps((a), (b))
|
892
|
+
#endif
|
893
|
+
|
894
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
895
|
+
simde__m128
|
896
|
+
simde_mm_or_ps (simde__m128 a, simde__m128 b) {
|
897
|
+
#if defined(SIMDE_X86_SSE_NATIVE)
|
898
|
+
return _mm_or_ps(a, b);
|
899
|
+
#else
|
900
|
+
simde__m128_private
|
901
|
+
r_,
|
902
|
+
a_ = simde__m128_to_private(a),
|
903
|
+
b_ = simde__m128_to_private(b);
|
904
|
+
|
905
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
906
|
+
r_.neon_i32 = vorrq_s32(a_.neon_i32, b_.neon_i32);
|
907
|
+
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
908
|
+
r_.wasm_v128 = wasm_v128_or(a_.wasm_v128, b_.wasm_v128);
|
909
|
+
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
|
910
|
+
r_.altivec_i32 = vec_or(a_.altivec_i32, b_.altivec_i32);
|
911
|
+
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
912
|
+
r_.i32f = a_.i32f | b_.i32f;
|
913
|
+
#else
|
914
|
+
SIMDE_VECTORIZE
|
915
|
+
for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) {
|
916
|
+
r_.u32[i] = a_.u32[i] | b_.u32[i];
|
917
|
+
}
|
918
|
+
#endif
|
919
|
+
|
920
|
+
return simde__m128_from_private(r_);
|
921
|
+
#endif
|
922
|
+
}
|
923
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
924
|
+
# define _mm_or_ps(a, b) simde_mm_or_ps((a), (b))
|
925
|
+
#endif
|
926
|
+
|
927
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
928
|
+
simde__m128
|
929
|
+
simde_x_mm_not_ps(simde__m128 a) {
|
930
|
+
#if defined(SIMDE_X86_AVX512VL_NATIVE)
|
931
|
+
__m128i ai = _mm_castps_si128(a);
|
932
|
+
return _mm_castsi128_ps(_mm_ternarylogic_epi32(ai, ai, ai, 0x55));
|
933
|
+
#elif defined(SIMDE_X86_SSE2_NATIVE)
|
934
|
+
/* Note: we use ints instead of floats because we don't want cmpeq
|
935
|
+
* to return false for (NaN, NaN) */
|
936
|
+
__m128i ai = _mm_castps_si128(a);
|
937
|
+
return _mm_castsi128_ps(_mm_andnot_si128(ai, _mm_cmpeq_epi32(ai, ai)));
|
938
|
+
#else
|
939
|
+
simde__m128_private
|
940
|
+
r_,
|
941
|
+
a_ = simde__m128_to_private(a);
|
942
|
+
|
943
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
944
|
+
r_.neon_i32 = vmvnq_s32(a_.neon_i32);
|
945
|
+
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
|
946
|
+
r_.altivec_i32 = vec_nor(a_.altivec_i32, a_.altivec_i32);
|
947
|
+
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
948
|
+
r_.wasm_v128 = wasm_v128_not(a_.wasm_v128);
|
949
|
+
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
950
|
+
r_.i32 = ~a_.i32;
|
951
|
+
#else
|
952
|
+
SIMDE_VECTORIZE
|
953
|
+
for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
|
954
|
+
r_.i32[i] = ~(a_.i32[i]);
|
955
|
+
}
|
956
|
+
#endif
|
957
|
+
|
958
|
+
return simde__m128_from_private(r_);
|
959
|
+
#endif
|
960
|
+
}
|
961
|
+
|
962
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
963
|
+
simde__m128
|
964
|
+
simde_x_mm_select_ps(simde__m128 a, simde__m128 b, simde__m128 mask) {
|
965
|
+
/* This function is for when you want to blend two elements together
|
966
|
+
* according to a mask. It is similar to _mm_blendv_ps, except that
|
967
|
+
* it is undefined whether the blend is based on the highest bit in
|
968
|
+
* each lane (like blendv) or just bitwise operations. This allows
|
969
|
+
* us to implement the function efficiently everywhere.
|
970
|
+
*
|
971
|
+
* Basically, you promise that all the lanes in mask are either 0 or
|
972
|
+
* ~0. */
|
973
|
+
#if defined(SIMDE_X86_SSE4_1_NATIVE)
|
974
|
+
return _mm_blendv_ps(a, b, mask);
|
975
|
+
#else
|
976
|
+
simde__m128_private
|
977
|
+
r_,
|
978
|
+
a_ = simde__m128_to_private(a),
|
979
|
+
b_ = simde__m128_to_private(b),
|
980
|
+
mask_ = simde__m128_to_private(mask);
|
981
|
+
|
982
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
983
|
+
r_.neon_i32 = vbslq_s32(mask_.neon_u32, b_.neon_i32, a_.neon_i32);
|
984
|
+
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
985
|
+
r_.wasm_v128 = wasm_v128_bitselect(b_.wasm_v128, a_.wasm_v128, mask_.wasm_v128);
|
986
|
+
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
|
987
|
+
r_.altivec_i32 = vec_sel(a_.altivec_i32, b_.altivec_i32, mask_.altivec_u32);
|
988
|
+
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
989
|
+
r_.i32 = a_.i32 ^ ((a_.i32 ^ b_.i32) & mask_.i32);
|
990
|
+
#else
|
991
|
+
SIMDE_VECTORIZE
|
992
|
+
for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
|
993
|
+
r_.i32[i] = a_.i32[i] ^ ((a_.i32[i] ^ b_.i32[i]) & mask_.i32[i]);
|
994
|
+
}
|
995
|
+
#endif
|
996
|
+
|
997
|
+
return simde__m128_from_private(r_);
|
998
|
+
#endif
|
999
|
+
}
|
1000
|
+
|
1001
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
1002
|
+
simde__m64
|
1003
|
+
simde_mm_avg_pu16 (simde__m64 a, simde__m64 b) {
|
1004
|
+
#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
|
1005
|
+
return _mm_avg_pu16(a, b);
|
1006
|
+
#else
|
1007
|
+
simde__m64_private
|
1008
|
+
r_,
|
1009
|
+
a_ = simde__m64_to_private(a),
|
1010
|
+
b_ = simde__m64_to_private(b);
|
1011
|
+
|
1012
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
1013
|
+
r_.neon_u16 = vrhadd_u16(b_.neon_u16, a_.neon_u16);
|
1014
|
+
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_)
|
1015
|
+
uint32_t wa SIMDE_VECTOR(16);
|
1016
|
+
uint32_t wb SIMDE_VECTOR(16);
|
1017
|
+
uint32_t wr SIMDE_VECTOR(16);
|
1018
|
+
SIMDE_CONVERT_VECTOR_(wa, a_.u16);
|
1019
|
+
SIMDE_CONVERT_VECTOR_(wb, b_.u16);
|
1020
|
+
wr = (wa + wb + 1) >> 1;
|
1021
|
+
SIMDE_CONVERT_VECTOR_(r_.u16, wr);
|
1022
|
+
#else
|
1023
|
+
SIMDE_VECTORIZE
|
1024
|
+
for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {
|
1025
|
+
r_.u16[i] = (a_.u16[i] + b_.u16[i] + 1) >> 1;
|
1026
|
+
}
|
1027
|
+
#endif
|
1028
|
+
|
1029
|
+
return simde__m64_from_private(r_);
|
1030
|
+
#endif
|
1031
|
+
}
|
1032
|
+
#define simde_m_pavgw(a, b) simde_mm_avg_pu16(a, b)
|
1033
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
1034
|
+
# define _mm_avg_pu16(a, b) simde_mm_avg_pu16(a, b)
|
1035
|
+
# define _m_pavgw(a, b) simde_mm_avg_pu16(a, b)
|
1036
|
+
#endif
|
1037
|
+
|
1038
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
1039
|
+
simde__m64
|
1040
|
+
simde_mm_avg_pu8 (simde__m64 a, simde__m64 b) {
|
1041
|
+
#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
|
1042
|
+
return _mm_avg_pu8(a, b);
|
1043
|
+
#else
|
1044
|
+
simde__m64_private
|
1045
|
+
r_,
|
1046
|
+
a_ = simde__m64_to_private(a),
|
1047
|
+
b_ = simde__m64_to_private(b);
|
1048
|
+
|
1049
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
1050
|
+
r_.neon_u8 = vrhadd_u8(b_.neon_u8, a_.neon_u8);
|
1051
|
+
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_)
|
1052
|
+
uint16_t wa SIMDE_VECTOR(16);
|
1053
|
+
uint16_t wb SIMDE_VECTOR(16);
|
1054
|
+
uint16_t wr SIMDE_VECTOR(16);
|
1055
|
+
SIMDE_CONVERT_VECTOR_(wa, a_.u8);
|
1056
|
+
SIMDE_CONVERT_VECTOR_(wb, b_.u8);
|
1057
|
+
wr = (wa + wb + 1) >> 1;
|
1058
|
+
SIMDE_CONVERT_VECTOR_(r_.u8, wr);
|
1059
|
+
#else
|
1060
|
+
SIMDE_VECTORIZE
|
1061
|
+
for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) {
|
1062
|
+
r_.u8[i] = (a_.u8[i] + b_.u8[i] + 1) >> 1;
|
1063
|
+
}
|
1064
|
+
#endif
|
1065
|
+
|
1066
|
+
return simde__m64_from_private(r_);
|
1067
|
+
#endif
|
1068
|
+
}
|
1069
|
+
#define simde_m_pavgb(a, b) simde_mm_avg_pu8(a, b)
|
1070
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
1071
|
+
# define _mm_avg_pu8(a, b) simde_mm_avg_pu8(a, b)
|
1072
|
+
# define _m_pavgb(a, b) simde_mm_avg_pu8(a, b)
|
1073
|
+
#endif
|
1074
|
+
|
1075
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
1076
|
+
simde__m128
|
1077
|
+
simde_x_mm_abs_ps(simde__m128 a) {
|
1078
|
+
#if defined(SIMDE_X86_AVX512F_NATIVE) && \
|
1079
|
+
(!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,1,0))
|
1080
|
+
return _mm512_castps512_ps128(_mm512_abs_ps(_mm512_castps128_ps512(a)));
|
1081
|
+
#else
|
1082
|
+
simde__m128_private
|
1083
|
+
r_,
|
1084
|
+
a_ = simde__m128_to_private(a);
|
1085
|
+
|
1086
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
1087
|
+
r_.neon_f32 = vabsq_f32(a_.neon_f32);
|
1088
|
+
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
|
1089
|
+
r_.altivec_f32 = vec_abs(a_.altivec_f32);
|
1090
|
+
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
1091
|
+
r_.wasm_v128 = wasm_f32x4_abs(a_.wasm_v128);
|
1092
|
+
#else
|
1093
|
+
SIMDE_VECTORIZE
|
1094
|
+
for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
|
1095
|
+
r_.f32[i] = simde_math_fabsf(a_.f32[i]);
|
1096
|
+
}
|
1097
|
+
#endif
|
1098
|
+
|
1099
|
+
return simde__m128_from_private(r_);
|
1100
|
+
#endif
|
1101
|
+
}
|
1102
|
+
|
1103
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
1104
|
+
simde__m128
|
1105
|
+
simde_mm_cmpeq_ps (simde__m128 a, simde__m128 b) {
|
1106
|
+
#if defined(SIMDE_X86_SSE_NATIVE)
|
1107
|
+
return _mm_cmpeq_ps(a, b);
|
1108
|
+
#else
|
1109
|
+
simde__m128_private
|
1110
|
+
r_,
|
1111
|
+
a_ = simde__m128_to_private(a),
|
1112
|
+
b_ = simde__m128_to_private(b);
|
1113
|
+
|
1114
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
1115
|
+
r_.neon_u32 = vceqq_f32(a_.neon_f32, b_.neon_f32);
|
1116
|
+
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
1117
|
+
r_.wasm_v128 = wasm_f32x4_eq(a_.wasm_v128, b_.wasm_v128);
|
1118
|
+
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
|
1119
|
+
r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpeq(a_.altivec_f32, b_.altivec_f32));
|
1120
|
+
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
1121
|
+
r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), a_.f32 == b_.f32);
|
1122
|
+
#else
|
1123
|
+
SIMDE_VECTORIZE
|
1124
|
+
for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
|
1125
|
+
r_.u32[i] = (a_.f32[i] == b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0);
|
1126
|
+
}
|
1127
|
+
#endif
|
1128
|
+
|
1129
|
+
return simde__m128_from_private(r_);
|
1130
|
+
#endif
|
1131
|
+
}
|
1132
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
1133
|
+
# define _mm_cmpeq_ps(a, b) simde_mm_cmpeq_ps((a), (b))
|
1134
|
+
#endif
|
1135
|
+
|
1136
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
1137
|
+
simde__m128
|
1138
|
+
simde_mm_cmpeq_ss (simde__m128 a, simde__m128 b) {
|
1139
|
+
#if defined(SIMDE_X86_SSE_NATIVE)
|
1140
|
+
return _mm_cmpeq_ss(a, b);
|
1141
|
+
#elif (SIMDE_NATURAL_VECTOR_SIZE > 0)
|
1142
|
+
return simde_mm_move_ss(a, simde_mm_cmpeq_ps(a, b));
|
1143
|
+
#else
|
1144
|
+
simde__m128_private
|
1145
|
+
r_,
|
1146
|
+
a_ = simde__m128_to_private(a),
|
1147
|
+
b_ = simde__m128_to_private(b);
|
1148
|
+
|
1149
|
+
r_.u32[0] = (a_.f32[0] == b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0);
|
1150
|
+
SIMDE_VECTORIZE
|
1151
|
+
for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
|
1152
|
+
r_.u32[i] = a_.u32[i];
|
1153
|
+
}
|
1154
|
+
|
1155
|
+
return simde__m128_from_private(r_);
|
1156
|
+
#endif
|
1157
|
+
}
|
1158
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
1159
|
+
# define _mm_cmpeq_ss(a, b) simde_mm_cmpeq_ss((a), (b))
|
1160
|
+
#endif
|
1161
|
+
|
1162
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
1163
|
+
simde__m128
|
1164
|
+
simde_mm_cmpge_ps (simde__m128 a, simde__m128 b) {
|
1165
|
+
#if defined(SIMDE_X86_SSE_NATIVE)
|
1166
|
+
return _mm_cmpge_ps(a, b);
|
1167
|
+
#else
|
1168
|
+
simde__m128_private
|
1169
|
+
r_,
|
1170
|
+
a_ = simde__m128_to_private(a),
|
1171
|
+
b_ = simde__m128_to_private(b);
|
1172
|
+
|
1173
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
1174
|
+
r_.neon_u32 = vcgeq_f32(a_.neon_f32, b_.neon_f32);
|
1175
|
+
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
1176
|
+
r_.wasm_v128 = wasm_f32x4_ge(a_.wasm_v128, b_.wasm_v128);
|
1177
|
+
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
|
1178
|
+
r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpge(a_.altivec_f32, b_.altivec_f32));
|
1179
|
+
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
1180
|
+
r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 >= b_.f32));
|
1181
|
+
#else
|
1182
|
+
SIMDE_VECTORIZE
|
1183
|
+
for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
|
1184
|
+
r_.u32[i] = (a_.f32[i] >= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0);
|
1185
|
+
}
|
1186
|
+
#endif
|
1187
|
+
|
1188
|
+
return simde__m128_from_private(r_);
|
1189
|
+
#endif
|
1190
|
+
}
|
1191
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
1192
|
+
# define _mm_cmpge_ps(a, b) simde_mm_cmpge_ps((a), (b))
|
1193
|
+
#endif
|
1194
|
+
|
1195
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
1196
|
+
simde__m128
|
1197
|
+
simde_mm_cmpge_ss (simde__m128 a, simde__m128 b) {
|
1198
|
+
#if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI)
|
1199
|
+
return _mm_cmpge_ss(a, b);
|
1200
|
+
#elif (SIMDE_NATURAL_VECTOR_SIZE > 0)
|
1201
|
+
return simde_mm_move_ss(a, simde_mm_cmpge_ps(a, b));
|
1202
|
+
#else
|
1203
|
+
simde__m128_private
|
1204
|
+
r_,
|
1205
|
+
a_ = simde__m128_to_private(a),
|
1206
|
+
b_ = simde__m128_to_private(b);
|
1207
|
+
|
1208
|
+
r_.u32[0] = (a_.f32[0] >= b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0);
|
1209
|
+
SIMDE_VECTORIZE
|
1210
|
+
for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
|
1211
|
+
r_.u32[i] = a_.u32[i];
|
1212
|
+
}
|
1213
|
+
|
1214
|
+
return simde__m128_from_private(r_);
|
1215
|
+
#endif
|
1216
|
+
}
|
1217
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
1218
|
+
# define _mm_cmpge_ss(a, b) simde_mm_cmpge_ss((a), (b))
|
1219
|
+
#endif
|
1220
|
+
|
1221
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
1222
|
+
simde__m128
|
1223
|
+
simde_mm_cmpgt_ps (simde__m128 a, simde__m128 b) {
|
1224
|
+
#if defined(SIMDE_X86_SSE_NATIVE)
|
1225
|
+
return _mm_cmpgt_ps(a, b);
|
1226
|
+
#else
|
1227
|
+
simde__m128_private
|
1228
|
+
r_,
|
1229
|
+
a_ = simde__m128_to_private(a),
|
1230
|
+
b_ = simde__m128_to_private(b);
|
1231
|
+
|
1232
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
1233
|
+
r_.neon_u32 = vcgtq_f32(a_.neon_f32, b_.neon_f32);
|
1234
|
+
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
1235
|
+
r_.wasm_v128 = wasm_f32x4_gt(a_.wasm_v128, b_.wasm_v128);
|
1236
|
+
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
|
1237
|
+
r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpgt(a_.altivec_f32, b_.altivec_f32));
|
1238
|
+
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
1239
|
+
r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 > b_.f32));
|
1240
|
+
#else
|
1241
|
+
SIMDE_VECTORIZE
|
1242
|
+
for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
|
1243
|
+
r_.u32[i] = (a_.f32[i] > b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0);
|
1244
|
+
}
|
1245
|
+
#endif
|
1246
|
+
|
1247
|
+
return simde__m128_from_private(r_);
|
1248
|
+
#endif
|
1249
|
+
}
|
1250
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
1251
|
+
# define _mm_cmpgt_ps(a, b) simde_mm_cmpgt_ps((a), (b))
|
1252
|
+
#endif
|
1253
|
+
|
1254
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
1255
|
+
simde__m128
|
1256
|
+
simde_mm_cmpgt_ss (simde__m128 a, simde__m128 b) {
|
1257
|
+
#if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI)
|
1258
|
+
return _mm_cmpgt_ss(a, b);
|
1259
|
+
#elif (SIMDE_NATURAL_VECTOR_SIZE > 0)
|
1260
|
+
return simde_mm_move_ss(a, simde_mm_cmpgt_ps(a, b));
|
1261
|
+
#else
|
1262
|
+
simde__m128_private
|
1263
|
+
r_,
|
1264
|
+
a_ = simde__m128_to_private(a),
|
1265
|
+
b_ = simde__m128_to_private(b);
|
1266
|
+
|
1267
|
+
r_.u32[0] = (a_.f32[0] > b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0);
|
1268
|
+
SIMDE_VECTORIZE
|
1269
|
+
for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
|
1270
|
+
r_.u32[i] = a_.u32[i];
|
1271
|
+
}
|
1272
|
+
|
1273
|
+
return simde__m128_from_private(r_);
|
1274
|
+
#endif
|
1275
|
+
}
|
1276
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
1277
|
+
# define _mm_cmpgt_ss(a, b) simde_mm_cmpgt_ss((a), (b))
|
1278
|
+
#endif
|
1279
|
+
|
1280
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
1281
|
+
simde__m128
|
1282
|
+
simde_mm_cmple_ps (simde__m128 a, simde__m128 b) {
|
1283
|
+
#if defined(SIMDE_X86_SSE_NATIVE)
|
1284
|
+
return _mm_cmple_ps(a, b);
|
1285
|
+
#else
|
1286
|
+
simde__m128_private
|
1287
|
+
r_,
|
1288
|
+
a_ = simde__m128_to_private(a),
|
1289
|
+
b_ = simde__m128_to_private(b);
|
1290
|
+
|
1291
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
1292
|
+
r_.neon_u32 = vcleq_f32(a_.neon_f32, b_.neon_f32);
|
1293
|
+
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
1294
|
+
r_.wasm_v128 = wasm_f32x4_le(a_.wasm_v128, b_.wasm_v128);
|
1295
|
+
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
|
1296
|
+
r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmple(a_.altivec_f32, b_.altivec_f32));
|
1297
|
+
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
1298
|
+
r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 <= b_.f32));
|
1299
|
+
#else
|
1300
|
+
SIMDE_VECTORIZE
|
1301
|
+
for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
|
1302
|
+
r_.u32[i] = (a_.f32[i] <= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0);
|
1303
|
+
}
|
1304
|
+
#endif
|
1305
|
+
|
1306
|
+
return simde__m128_from_private(r_);
|
1307
|
+
#endif
|
1308
|
+
}
|
1309
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
1310
|
+
# define _mm_cmple_ps(a, b) simde_mm_cmple_ps((a), (b))
|
1311
|
+
#endif
|
1312
|
+
|
1313
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
1314
|
+
simde__m128
|
1315
|
+
simde_mm_cmple_ss (simde__m128 a, simde__m128 b) {
|
1316
|
+
#if defined(SIMDE_X86_SSE_NATIVE)
|
1317
|
+
return _mm_cmple_ss(a, b);
|
1318
|
+
#elif (SIMDE_NATURAL_VECTOR_SIZE > 0)
|
1319
|
+
return simde_mm_move_ss(a, simde_mm_cmple_ps(a, b));
|
1320
|
+
#else
|
1321
|
+
simde__m128_private
|
1322
|
+
r_,
|
1323
|
+
a_ = simde__m128_to_private(a),
|
1324
|
+
b_ = simde__m128_to_private(b);
|
1325
|
+
|
1326
|
+
r_.u32[0] = (a_.f32[0] <= b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0);
|
1327
|
+
SIMDE_VECTORIZE
|
1328
|
+
for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
|
1329
|
+
r_.u32[i] = a_.u32[i];
|
1330
|
+
}
|
1331
|
+
|
1332
|
+
return simde__m128_from_private(r_);
|
1333
|
+
#endif
|
1334
|
+
}
|
1335
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
1336
|
+
# define _mm_cmple_ss(a, b) simde_mm_cmple_ss((a), (b))
|
1337
|
+
#endif
|
1338
|
+
|
1339
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
1340
|
+
simde__m128
|
1341
|
+
simde_mm_cmplt_ps (simde__m128 a, simde__m128 b) {
|
1342
|
+
#if defined(SIMDE_X86_SSE_NATIVE)
|
1343
|
+
return _mm_cmplt_ps(a, b);
|
1344
|
+
#else
|
1345
|
+
simde__m128_private
|
1346
|
+
r_,
|
1347
|
+
a_ = simde__m128_to_private(a),
|
1348
|
+
b_ = simde__m128_to_private(b);
|
1349
|
+
|
1350
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
1351
|
+
r_.neon_u32 = vcltq_f32(a_.neon_f32, b_.neon_f32);
|
1352
|
+
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
1353
|
+
r_.wasm_v128 = wasm_f32x4_lt(a_.wasm_v128, b_.wasm_v128);
|
1354
|
+
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
|
1355
|
+
r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmplt(a_.altivec_f32, b_.altivec_f32));
|
1356
|
+
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
1357
|
+
r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 < b_.f32));
|
1358
|
+
#else
|
1359
|
+
SIMDE_VECTORIZE
|
1360
|
+
for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
|
1361
|
+
r_.u32[i] = (a_.f32[i] < b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0);
|
1362
|
+
}
|
1363
|
+
#endif
|
1364
|
+
|
1365
|
+
return simde__m128_from_private(r_);
|
1366
|
+
#endif
|
1367
|
+
}
|
1368
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
1369
|
+
# define _mm_cmplt_ps(a, b) simde_mm_cmplt_ps((a), (b))
|
1370
|
+
#endif
|
1371
|
+
|
1372
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
1373
|
+
simde__m128
|
1374
|
+
simde_mm_cmplt_ss (simde__m128 a, simde__m128 b) {
|
1375
|
+
#if defined(SIMDE_X86_SSE_NATIVE)
|
1376
|
+
return _mm_cmplt_ss(a, b);
|
1377
|
+
#elif (SIMDE_NATURAL_VECTOR_SIZE > 0)
|
1378
|
+
return simde_mm_move_ss(a, simde_mm_cmplt_ps(a, b));
|
1379
|
+
#else
|
1380
|
+
simde__m128_private
|
1381
|
+
r_,
|
1382
|
+
a_ = simde__m128_to_private(a),
|
1383
|
+
b_ = simde__m128_to_private(b);
|
1384
|
+
|
1385
|
+
r_.u32[0] = (a_.f32[0] < b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0);
|
1386
|
+
SIMDE_VECTORIZE
|
1387
|
+
for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
|
1388
|
+
r_.u32[i] = a_.u32[i];
|
1389
|
+
}
|
1390
|
+
|
1391
|
+
return simde__m128_from_private(r_);
|
1392
|
+
#endif
|
1393
|
+
}
|
1394
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
1395
|
+
# define _mm_cmplt_ss(a, b) simde_mm_cmplt_ss((a), (b))
|
1396
|
+
#endif
|
1397
|
+
|
1398
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
1399
|
+
simde__m128
|
1400
|
+
simde_mm_cmpneq_ps (simde__m128 a, simde__m128 b) {
|
1401
|
+
#if defined(SIMDE_X86_SSE_NATIVE)
|
1402
|
+
return _mm_cmpneq_ps(a, b);
|
1403
|
+
#else
|
1404
|
+
simde__m128_private
|
1405
|
+
r_,
|
1406
|
+
a_ = simde__m128_to_private(a),
|
1407
|
+
b_ = simde__m128_to_private(b);
|
1408
|
+
|
1409
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
1410
|
+
r_.neon_u32 = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32));
|
1411
|
+
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
1412
|
+
r_.wasm_v128 = wasm_f32x4_ne(a_.wasm_v128, b_.wasm_v128);
|
1413
|
+
#elif defined(SIMDE_POWER_ALTIVEC_P9_NATIVE) && SIMDE_ARCH_POWER_CHECK(900) && !defined(HEDLEY_IBM_VERSION)
|
1414
|
+
/* vec_cmpne(SIMDE_POWER_ALTIVEC_VECTOR(float), SIMDE_POWER_ALTIVEC_VECTOR(float))
|
1415
|
+
is missing from XL C/C++ v16.1.1,
|
1416
|
+
though the documentation (table 89 on page 432 of the IBM XL C/C++ for
|
1417
|
+
Linux Compiler Reference, Version 16.1.1) shows that it should be
|
1418
|
+
present. Both GCC and clang support it. */
|
1419
|
+
r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpne(a_.altivec_f32, b_.altivec_f32));
|
1420
|
+
#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
|
1421
|
+
r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpeq(a_.altivec_f32, b_.altivec_f32));
|
1422
|
+
r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_nor(r_.altivec_f32, r_.altivec_f32));
|
1423
|
+
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
1424
|
+
r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 != b_.f32));
|
1425
|
+
#else
|
1426
|
+
SIMDE_VECTORIZE
|
1427
|
+
for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
|
1428
|
+
r_.u32[i] = (a_.f32[i] != b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0);
|
1429
|
+
}
|
1430
|
+
#endif
|
1431
|
+
|
1432
|
+
return simde__m128_from_private(r_);
|
1433
|
+
#endif
|
1434
|
+
}
|
1435
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
1436
|
+
# define _mm_cmpneq_ps(a, b) simde_mm_cmpneq_ps((a), (b))
|
1437
|
+
#endif
|
1438
|
+
|
1439
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
1440
|
+
simde__m128
|
1441
|
+
simde_mm_cmpneq_ss (simde__m128 a, simde__m128 b) {
|
1442
|
+
#if defined(SIMDE_X86_SSE_NATIVE)
|
1443
|
+
return _mm_cmpneq_ss(a, b);
|
1444
|
+
#elif (SIMDE_NATURAL_VECTOR_SIZE > 0)
|
1445
|
+
return simde_mm_move_ss(a, simde_mm_cmpneq_ps(a, b));
|
1446
|
+
#else
|
1447
|
+
simde__m128_private
|
1448
|
+
r_,
|
1449
|
+
a_ = simde__m128_to_private(a),
|
1450
|
+
b_ = simde__m128_to_private(b);
|
1451
|
+
|
1452
|
+
r_.u32[0] = (a_.f32[0] != b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0);
|
1453
|
+
SIMDE_VECTORIZE
|
1454
|
+
for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
|
1455
|
+
r_.u32[i] = a_.u32[i];
|
1456
|
+
}
|
1457
|
+
|
1458
|
+
return simde__m128_from_private(r_);
|
1459
|
+
#endif
|
1460
|
+
}
|
1461
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
1462
|
+
# define _mm_cmpneq_ss(a, b) simde_mm_cmpneq_ss((a), (b))
|
1463
|
+
#endif
|
1464
|
+
|
1465
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
1466
|
+
simde__m128
|
1467
|
+
simde_mm_cmpnge_ps (simde__m128 a, simde__m128 b) {
|
1468
|
+
return simde_mm_cmplt_ps(a, b);
|
1469
|
+
}
|
1470
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
1471
|
+
# define _mm_cmpnge_ps(a, b) simde_mm_cmpnge_ps((a), (b))
|
1472
|
+
#endif
|
1473
|
+
|
1474
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
1475
|
+
simde__m128
|
1476
|
+
simde_mm_cmpnge_ss (simde__m128 a, simde__m128 b) {
|
1477
|
+
return simde_mm_cmplt_ss(a, b);
|
1478
|
+
}
|
1479
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
1480
|
+
# define _mm_cmpnge_ss(a, b) simde_mm_cmpnge_ss((a), (b))
|
1481
|
+
#endif
|
1482
|
+
|
1483
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
1484
|
+
simde__m128
|
1485
|
+
simde_mm_cmpngt_ps (simde__m128 a, simde__m128 b) {
|
1486
|
+
return simde_mm_cmple_ps(a, b);
|
1487
|
+
}
|
1488
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
1489
|
+
# define _mm_cmpngt_ps(a, b) simde_mm_cmpngt_ps((a), (b))
|
1490
|
+
#endif
|
1491
|
+
|
1492
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
1493
|
+
simde__m128
|
1494
|
+
simde_mm_cmpngt_ss (simde__m128 a, simde__m128 b) {
|
1495
|
+
return simde_mm_cmple_ss(a, b);
|
1496
|
+
}
|
1497
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
1498
|
+
# define _mm_cmpngt_ss(a, b) simde_mm_cmpngt_ss((a), (b))
|
1499
|
+
#endif
|
1500
|
+
|
1501
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
1502
|
+
simde__m128
|
1503
|
+
simde_mm_cmpnle_ps (simde__m128 a, simde__m128 b) {
|
1504
|
+
return simde_mm_cmpgt_ps(a, b);
|
1505
|
+
}
|
1506
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
1507
|
+
# define _mm_cmpnle_ps(a, b) simde_mm_cmpnle_ps((a), (b))
|
1508
|
+
#endif
|
1509
|
+
|
1510
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
1511
|
+
simde__m128
|
1512
|
+
simde_mm_cmpnle_ss (simde__m128 a, simde__m128 b) {
|
1513
|
+
return simde_mm_cmpgt_ss(a, b);
|
1514
|
+
}
|
1515
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
1516
|
+
# define _mm_cmpnle_ss(a, b) simde_mm_cmpnle_ss((a), (b))
|
1517
|
+
#endif
|
1518
|
+
|
1519
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
1520
|
+
simde__m128
|
1521
|
+
simde_mm_cmpnlt_ps (simde__m128 a, simde__m128 b) {
|
1522
|
+
return simde_mm_cmpge_ps(a, b);
|
1523
|
+
}
|
1524
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
1525
|
+
# define _mm_cmpnlt_ps(a, b) simde_mm_cmpnlt_ps((a), (b))
|
1526
|
+
#endif
|
1527
|
+
|
1528
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
1529
|
+
simde__m128
|
1530
|
+
simde_mm_cmpnlt_ss (simde__m128 a, simde__m128 b) {
|
1531
|
+
return simde_mm_cmpge_ss(a, b);
|
1532
|
+
}
|
1533
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
1534
|
+
# define _mm_cmpnlt_ss(a, b) simde_mm_cmpnlt_ss((a), (b))
|
1535
|
+
#endif
|
1536
|
+
|
1537
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
1538
|
+
simde__m128
|
1539
|
+
simde_mm_cmpord_ps (simde__m128 a, simde__m128 b) {
|
1540
|
+
#if defined(SIMDE_X86_SSE_NATIVE)
|
1541
|
+
return _mm_cmpord_ps(a, b);
|
1542
|
+
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
1543
|
+
return wasm_v128_and(wasm_f32x4_eq(a, a), wasm_f32x4_eq(b, b));
|
1544
|
+
#else
|
1545
|
+
simde__m128_private
|
1546
|
+
r_,
|
1547
|
+
a_ = simde__m128_to_private(a),
|
1548
|
+
b_ = simde__m128_to_private(b);
|
1549
|
+
|
1550
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
1551
|
+
/* Note: NEON does not have ordered compare builtin
|
1552
|
+
Need to compare a eq a and b eq b to check for NaN
|
1553
|
+
Do AND of results to get final */
|
1554
|
+
uint32x4_t ceqaa = vceqq_f32(a_.neon_f32, a_.neon_f32);
|
1555
|
+
uint32x4_t ceqbb = vceqq_f32(b_.neon_f32, b_.neon_f32);
|
1556
|
+
r_.neon_u32 = vandq_u32(ceqaa, ceqbb);
|
1557
|
+
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
1558
|
+
r_.wasm_v128 = wasm_v128_and(wasm_f32x4_eq(a_.wasm_v128, a_.wasm_v128), wasm_f32x4_eq(b_.wasm_v128, b_.wasm_v128));
|
1559
|
+
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
|
1560
|
+
r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float),
|
1561
|
+
vec_and(vec_cmpeq(a_.altivec_f32, a_.altivec_f32), vec_cmpeq(b_.altivec_f32, b_.altivec_f32)));
|
1562
|
+
#elif defined(simde_math_isnanf)
|
1563
|
+
SIMDE_VECTORIZE
|
1564
|
+
for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
|
1565
|
+
r_.u32[i] = (simde_math_isnanf(a_.f32[i]) || simde_math_isnanf(b_.f32[i])) ? UINT32_C(0) : ~UINT32_C(0);
|
1566
|
+
}
|
1567
|
+
#else
|
1568
|
+
HEDLEY_UNREACHABLE();
|
1569
|
+
#endif
|
1570
|
+
|
1571
|
+
return simde__m128_from_private(r_);
|
1572
|
+
#endif
|
1573
|
+
}
|
1574
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
1575
|
+
# define _mm_cmpord_ps(a, b) simde_mm_cmpord_ps((a), (b))
|
1576
|
+
#endif
|
1577
|
+
|
1578
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
1579
|
+
simde__m128
|
1580
|
+
simde_mm_cmpunord_ps (simde__m128 a, simde__m128 b) {
|
1581
|
+
#if defined(SIMDE_X86_SSE_NATIVE)
|
1582
|
+
return _mm_cmpunord_ps(a, b);
|
1583
|
+
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
1584
|
+
return wasm_v128_or(wasm_f32x4_ne(a, a), wasm_f32x4_ne(b, b));
|
1585
|
+
#else
|
1586
|
+
simde__m128_private
|
1587
|
+
r_,
|
1588
|
+
a_ = simde__m128_to_private(a),
|
1589
|
+
b_ = simde__m128_to_private(b);
|
1590
|
+
|
1591
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
1592
|
+
uint32x4_t ceqaa = vceqq_f32(a_.neon_f32, a_.neon_f32);
|
1593
|
+
uint32x4_t ceqbb = vceqq_f32(b_.neon_f32, b_.neon_f32);
|
1594
|
+
r_.neon_u32 = vmvnq_u32(vandq_u32(ceqaa, ceqbb));
|
1595
|
+
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
1596
|
+
r_.wasm_v128 = wasm_v128_or(wasm_f32x4_ne(a_.wasm_v128, a_.wasm_v128), wasm_f32x4_ne(b_.wasm_v128, b_.wasm_v128));
|
1597
|
+
#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
|
1598
|
+
r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float),
|
1599
|
+
vec_nand(vec_cmpeq(a_.altivec_f32, a_.altivec_f32), vec_cmpeq(b_.altivec_f32, b_.altivec_f32)));
|
1600
|
+
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
|
1601
|
+
r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float),
|
1602
|
+
vec_and(vec_cmpeq(a_.altivec_f32, a_.altivec_f32), vec_cmpeq(b_.altivec_f32, b_.altivec_f32)));
|
1603
|
+
r_.altivec_f32 = vec_nor(r_.altivec_f32, r_.altivec_f32);
|
1604
|
+
#elif defined(simde_math_isnanf)
|
1605
|
+
SIMDE_VECTORIZE
|
1606
|
+
for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
|
1607
|
+
r_.u32[i] = (simde_math_isnanf(a_.f32[i]) || simde_math_isnanf(b_.f32[i])) ? ~UINT32_C(0) : UINT32_C(0);
|
1608
|
+
}
|
1609
|
+
#else
|
1610
|
+
HEDLEY_UNREACHABLE();
|
1611
|
+
#endif
|
1612
|
+
|
1613
|
+
return simde__m128_from_private(r_);
|
1614
|
+
#endif
|
1615
|
+
}
|
1616
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
1617
|
+
# define _mm_cmpunord_ps(a, b) simde_mm_cmpunord_ps((a), (b))
|
1618
|
+
#endif
|
1619
|
+
|
1620
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
1621
|
+
simde__m128
|
1622
|
+
simde_mm_cmpunord_ss (simde__m128 a, simde__m128 b) {
|
1623
|
+
#if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI)
|
1624
|
+
return _mm_cmpunord_ss(a, b);
|
1625
|
+
#elif (SIMDE_NATURAL_VECTOR_SIZE > 0)
|
1626
|
+
return simde_mm_move_ss(a, simde_mm_cmpunord_ps(a, b));
|
1627
|
+
#else
|
1628
|
+
simde__m128_private
|
1629
|
+
r_,
|
1630
|
+
a_ = simde__m128_to_private(a),
|
1631
|
+
b_ = simde__m128_to_private(b);
|
1632
|
+
|
1633
|
+
#if defined(simde_math_isnanf)
|
1634
|
+
r_.u32[0] = (simde_math_isnanf(a_.f32[0]) || simde_math_isnanf(b_.f32[0])) ? ~UINT32_C(0) : UINT32_C(0);
|
1635
|
+
SIMDE_VECTORIZE
|
1636
|
+
for (size_t i = 1 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) {
|
1637
|
+
r_.u32[i] = a_.u32[i];
|
1638
|
+
}
|
1639
|
+
#else
|
1640
|
+
HEDLEY_UNREACHABLE();
|
1641
|
+
#endif
|
1642
|
+
|
1643
|
+
return simde__m128_from_private(r_);
|
1644
|
+
#endif
|
1645
|
+
}
|
1646
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
1647
|
+
# define _mm_cmpunord_ss(a, b) simde_mm_cmpunord_ss((a), (b))
|
1648
|
+
#endif
|
1649
|
+
|
1650
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
1651
|
+
int
|
1652
|
+
simde_mm_comieq_ss (simde__m128 a, simde__m128 b) {
|
1653
|
+
#if defined(SIMDE_X86_SSE_NATIVE)
|
1654
|
+
return _mm_comieq_ss(a, b);
|
1655
|
+
#else
|
1656
|
+
simde__m128_private
|
1657
|
+
a_ = simde__m128_to_private(a),
|
1658
|
+
b_ = simde__m128_to_private(b);
|
1659
|
+
|
1660
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
1661
|
+
uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32);
|
1662
|
+
uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32);
|
1663
|
+
uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan));
|
1664
|
+
uint32x4_t a_eq_b = vceqq_f32(a_.neon_f32, b_.neon_f32);
|
1665
|
+
return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_eq_b), 0) != 0);
|
1666
|
+
#else
|
1667
|
+
return a_.f32[0] == b_.f32[0];
|
1668
|
+
#endif
|
1669
|
+
#endif
|
1670
|
+
}
|
1671
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
1672
|
+
# define _mm_comieq_ss(a, b) simde_mm_comieq_ss((a), (b))
|
1673
|
+
#endif
|
1674
|
+
|
1675
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
1676
|
+
int
|
1677
|
+
simde_mm_comige_ss (simde__m128 a, simde__m128 b) {
|
1678
|
+
#if defined(SIMDE_X86_SSE_NATIVE)
|
1679
|
+
return _mm_comige_ss(a, b);
|
1680
|
+
#else
|
1681
|
+
simde__m128_private
|
1682
|
+
a_ = simde__m128_to_private(a),
|
1683
|
+
b_ = simde__m128_to_private(b);
|
1684
|
+
|
1685
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
1686
|
+
uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32);
|
1687
|
+
uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32);
|
1688
|
+
uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan);
|
1689
|
+
uint32x4_t a_ge_b = vcgeq_f32(a_.neon_f32, b_.neon_f32);
|
1690
|
+
return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_ge_b), 0) != 0);
|
1691
|
+
#else
|
1692
|
+
return a_.f32[0] >= b_.f32[0];
|
1693
|
+
#endif
|
1694
|
+
#endif
|
1695
|
+
}
|
1696
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
1697
|
+
# define _mm_comige_ss(a, b) simde_mm_comige_ss((a), (b))
|
1698
|
+
#endif
|
1699
|
+
|
1700
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
1701
|
+
int
|
1702
|
+
simde_mm_comigt_ss (simde__m128 a, simde__m128 b) {
|
1703
|
+
#if defined(SIMDE_X86_SSE_NATIVE)
|
1704
|
+
return _mm_comigt_ss(a, b);
|
1705
|
+
#else
|
1706
|
+
simde__m128_private
|
1707
|
+
a_ = simde__m128_to_private(a),
|
1708
|
+
b_ = simde__m128_to_private(b);
|
1709
|
+
|
1710
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
1711
|
+
uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32);
|
1712
|
+
uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32);
|
1713
|
+
uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan);
|
1714
|
+
uint32x4_t a_gt_b = vcgtq_f32(a_.neon_f32, b_.neon_f32);
|
1715
|
+
return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_gt_b), 0) != 0);
|
1716
|
+
#else
|
1717
|
+
return a_.f32[0] > b_.f32[0];
|
1718
|
+
#endif
|
1719
|
+
#endif
|
1720
|
+
}
|
1721
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
1722
|
+
# define _mm_comigt_ss(a, b) simde_mm_comigt_ss((a), (b))
|
1723
|
+
#endif
|
1724
|
+
|
1725
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
1726
|
+
int
|
1727
|
+
simde_mm_comile_ss (simde__m128 a, simde__m128 b) {
|
1728
|
+
#if defined(SIMDE_X86_SSE_NATIVE)
|
1729
|
+
return _mm_comile_ss(a, b);
|
1730
|
+
#else
|
1731
|
+
simde__m128_private
|
1732
|
+
a_ = simde__m128_to_private(a),
|
1733
|
+
b_ = simde__m128_to_private(b);
|
1734
|
+
|
1735
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
1736
|
+
uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32);
|
1737
|
+
uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32);
|
1738
|
+
uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan));
|
1739
|
+
uint32x4_t a_le_b = vcleq_f32(a_.neon_f32, b_.neon_f32);
|
1740
|
+
return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_le_b), 0) != 0);
|
1741
|
+
#else
|
1742
|
+
return a_.f32[0] <= b_.f32[0];
|
1743
|
+
#endif
|
1744
|
+
#endif
|
1745
|
+
}
|
1746
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
1747
|
+
# define _mm_comile_ss(a, b) simde_mm_comile_ss((a), (b))
|
1748
|
+
#endif
|
1749
|
+
|
1750
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
1751
|
+
int
|
1752
|
+
simde_mm_comilt_ss (simde__m128 a, simde__m128 b) {
|
1753
|
+
#if defined(SIMDE_X86_SSE_NATIVE)
|
1754
|
+
return _mm_comilt_ss(a, b);
|
1755
|
+
#else
|
1756
|
+
simde__m128_private
|
1757
|
+
a_ = simde__m128_to_private(a),
|
1758
|
+
b_ = simde__m128_to_private(b);
|
1759
|
+
|
1760
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
1761
|
+
uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32);
|
1762
|
+
uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32);
|
1763
|
+
uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan));
|
1764
|
+
uint32x4_t a_lt_b = vcltq_f32(a_.neon_f32, b_.neon_f32);
|
1765
|
+
return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_lt_b), 0) != 0);
|
1766
|
+
#else
|
1767
|
+
return a_.f32[0] < b_.f32[0];
|
1768
|
+
#endif
|
1769
|
+
#endif
|
1770
|
+
}
|
1771
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
1772
|
+
# define _mm_comilt_ss(a, b) simde_mm_comilt_ss((a), (b))
|
1773
|
+
#endif
|
1774
|
+
|
1775
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
1776
|
+
int
|
1777
|
+
simde_mm_comineq_ss (simde__m128 a, simde__m128 b) {
|
1778
|
+
#if defined(SIMDE_X86_SSE_NATIVE)
|
1779
|
+
return _mm_comineq_ss(a, b);
|
1780
|
+
#else
|
1781
|
+
simde__m128_private
|
1782
|
+
a_ = simde__m128_to_private(a),
|
1783
|
+
b_ = simde__m128_to_private(b);
|
1784
|
+
|
1785
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
1786
|
+
uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32);
|
1787
|
+
uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32);
|
1788
|
+
uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan);
|
1789
|
+
uint32x4_t a_neq_b = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32));
|
1790
|
+
return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_neq_b), 0) != 0);
|
1791
|
+
#else
|
1792
|
+
return a_.f32[0] != b_.f32[0];
|
1793
|
+
#endif
|
1794
|
+
#endif
|
1795
|
+
}
|
1796
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
1797
|
+
# define _mm_comineq_ss(a, b) simde_mm_comineq_ss((a), (b))
|
1798
|
+
#endif
|
1799
|
+
|
1800
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
1801
|
+
simde__m128
|
1802
|
+
simde_x_mm_copysign_ps(simde__m128 dest, simde__m128 src) {
|
1803
|
+
simde__m128_private
|
1804
|
+
r_,
|
1805
|
+
dest_ = simde__m128_to_private(dest),
|
1806
|
+
src_ = simde__m128_to_private(src);
|
1807
|
+
|
1808
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
1809
|
+
const uint32x4_t sign_pos = vreinterpretq_u32_f32(vdupq_n_f32(-SIMDE_FLOAT32_C(0.0)));
|
1810
|
+
r_.neon_u32 = vbslq_u32(sign_pos, src_.neon_u32, dest_.neon_u32);
|
1811
|
+
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
1812
|
+
const v128_t sign_pos = wasm_f32x4_splat(-0.0f);
|
1813
|
+
r_.wasm_v128 = wasm_v128_bitselect(src_.wasm_v128, dest_.wasm_v128, sign_pos);
|
1814
|
+
#elif defined(SIMDE_POWER_ALTIVEC_P9_NATIVE)
|
1815
|
+
#if !defined(HEDLEY_IBM_VERSION)
|
1816
|
+
r_.altivec_f32 = vec_cpsgn(dest_.altivec_f32, src_.altivec_f32);
|
1817
|
+
#else
|
1818
|
+
r_.altivec_f32 = vec_cpsgn(src_.altivec_f32, dest_.altivec_f32);
|
1819
|
+
#endif
|
1820
|
+
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
|
1821
|
+
const SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) sign_pos = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_splats(-0.0f));
|
1822
|
+
r_.altivec_f32 = vec_sel(dest_.altivec_f32, src_.altivec_f32, sign_pos);
|
1823
|
+
#elif defined(SIMDE_IEEE754_STORAGE)
|
1824
|
+
(void) src_;
|
1825
|
+
(void) dest_;
|
1826
|
+
simde__m128 sign_pos = simde_mm_set1_ps(-0.0f);
|
1827
|
+
r_ = simde__m128_to_private(simde_mm_xor_ps(dest, simde_mm_and_ps(simde_mm_xor_ps(dest, src), sign_pos)));
|
1828
|
+
#else
|
1829
|
+
SIMDE_VECTORIZE
|
1830
|
+
for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
|
1831
|
+
r_.f32[i] = simde_math_copysignf(dest_.f32[i], src_.f32[i]);
|
1832
|
+
}
|
1833
|
+
#endif
|
1834
|
+
|
1835
|
+
return simde__m128_from_private(r_);
|
1836
|
+
}
|
1837
|
+
|
1838
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
1839
|
+
simde__m128
|
1840
|
+
simde_x_mm_xorsign_ps(simde__m128 dest, simde__m128 src) {
|
1841
|
+
return simde_mm_xor_ps(simde_mm_and_ps(simde_mm_set1_ps(-0.0f), src), dest);
|
1842
|
+
}
|
1843
|
+
|
1844
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
1845
|
+
simde__m128
|
1846
|
+
simde_mm_cvt_pi2ps (simde__m128 a, simde__m64 b) {
|
1847
|
+
#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
|
1848
|
+
return _mm_cvt_pi2ps(a, b);
|
1849
|
+
#else
|
1850
|
+
simde__m128_private
|
1851
|
+
r_,
|
1852
|
+
a_ = simde__m128_to_private(a);
|
1853
|
+
simde__m64_private b_ = simde__m64_to_private(b);
|
1854
|
+
|
1855
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
1856
|
+
r_.neon_f32 = vcombine_f32(vcvt_f32_s32(b_.neon_i32), vget_high_f32(a_.neon_f32));
|
1857
|
+
#elif defined(SIMDE_CONVERT_VECTOR_)
|
1858
|
+
SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, b_.i32);
|
1859
|
+
r_.m64_private[1] = a_.m64_private[1];
|
1860
|
+
#else
|
1861
|
+
r_.f32[0] = (simde_float32) b_.i32[0];
|
1862
|
+
r_.f32[1] = (simde_float32) b_.i32[1];
|
1863
|
+
r_.i32[2] = a_.i32[2];
|
1864
|
+
r_.i32[3] = a_.i32[3];
|
1865
|
+
#endif
|
1866
|
+
|
1867
|
+
return simde__m128_from_private(r_);
|
1868
|
+
#endif
|
1869
|
+
}
|
1870
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
1871
|
+
# define _mm_cvt_pi2ps(a, b) simde_mm_cvt_pi2ps((a), (b))
|
1872
|
+
#endif
|
1873
|
+
|
1874
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
1875
|
+
simde__m64
|
1876
|
+
simde_mm_cvt_ps2pi (simde__m128 a) {
|
1877
|
+
#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
|
1878
|
+
return _mm_cvt_ps2pi(a);
|
1879
|
+
#else
|
1880
|
+
simde__m64_private r_;
|
1881
|
+
simde__m128_private a_;
|
1882
|
+
|
1883
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
1884
|
+
a_ = simde__m128_to_private(simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION));
|
1885
|
+
r_.neon_i32 = vcvt_s32_f32(vget_low_f32(a_.neon_f32));
|
1886
|
+
#elif defined(SIMDE_CONVERT_VECTOR_) && SIMDE_NATURAL_VECTOR_SIZE_GE(128)
|
1887
|
+
a_ = simde__m128_to_private(simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION));
|
1888
|
+
SIMDE_CONVERT_VECTOR_(r_.i32, a_.m64_private[0].f32);
|
1889
|
+
#else
|
1890
|
+
a_ = simde__m128_to_private(a);
|
1891
|
+
|
1892
|
+
SIMDE_VECTORIZE
|
1893
|
+
for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
|
1894
|
+
r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, simde_math_nearbyintf(a_.f32[i]));
|
1895
|
+
}
|
1896
|
+
#endif
|
1897
|
+
|
1898
|
+
return simde__m64_from_private(r_);
|
1899
|
+
#endif
|
1900
|
+
}
|
1901
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
1902
|
+
# define _mm_cvt_ps2pi(a) simde_mm_cvt_ps2pi((a))
|
1903
|
+
#endif
|
1904
|
+
|
1905
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
1906
|
+
simde__m128
|
1907
|
+
simde_mm_cvt_si2ss (simde__m128 a, int32_t b) {
|
1908
|
+
#if defined(SIMDE_X86_SSE_NATIVE)
|
1909
|
+
return _mm_cvt_si2ss(a, b);
|
1910
|
+
#else
|
1911
|
+
simde__m128_private
|
1912
|
+
r_,
|
1913
|
+
a_ = simde__m128_to_private(a);
|
1914
|
+
|
1915
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
1916
|
+
r_.neon_f32 = vsetq_lane_f32(HEDLEY_STATIC_CAST(float, b), a_.neon_f32, 0);
|
1917
|
+
#else
|
1918
|
+
r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b);
|
1919
|
+
r_.i32[1] = a_.i32[1];
|
1920
|
+
r_.i32[2] = a_.i32[2];
|
1921
|
+
r_.i32[3] = a_.i32[3];
|
1922
|
+
#endif
|
1923
|
+
|
1924
|
+
return simde__m128_from_private(r_);
|
1925
|
+
#endif
|
1926
|
+
}
|
1927
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
1928
|
+
# define _mm_cvt_si2ss(a, b) simde_mm_cvt_si2ss((a), b)
|
1929
|
+
#endif
|
1930
|
+
|
1931
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
1932
|
+
int32_t
|
1933
|
+
simde_mm_cvt_ss2si (simde__m128 a) {
|
1934
|
+
#if defined(SIMDE_X86_SSE_NATIVE)
|
1935
|
+
return _mm_cvt_ss2si(a);
|
1936
|
+
#elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_BUG_GCC_95399)
|
1937
|
+
return vgetq_lane_s32(vcvtnq_s32_f32(simde__m128_to_neon_f32(a)), 0);
|
1938
|
+
#else
|
1939
|
+
simde__m128_private a_ = simde__m128_to_private(simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION));
|
1940
|
+
#if !defined(SIMDE_FAST_CONVERSION_RANGE)
|
1941
|
+
return ((a_.f32[0] > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) &&
|
1942
|
+
(a_.f32[0] < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ?
|
1943
|
+
SIMDE_CONVERT_FTOI(int32_t, a_.f32[0]) : INT32_MIN;
|
1944
|
+
#else
|
1945
|
+
return SIMDE_CONVERT_FTOI(int32_t, a_.f32[0]);
|
1946
|
+
#endif
|
1947
|
+
#endif
|
1948
|
+
}
|
1949
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
1950
|
+
# define _mm_cvt_ss2si(a) simde_mm_cvt_ss2si((a))
|
1951
|
+
#endif
|
1952
|
+
|
1953
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
1954
|
+
simde__m128
|
1955
|
+
simde_mm_cvtpi16_ps (simde__m64 a) {
|
1956
|
+
#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
|
1957
|
+
return _mm_cvtpi16_ps(a);
|
1958
|
+
#else
|
1959
|
+
simde__m128_private r_;
|
1960
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
1961
|
+
|
1962
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
1963
|
+
r_.neon_f32 = vcvtq_f32_s32(vmovl_s16(a_.neon_i16));
|
1964
|
+
#elif defined(SIMDE_CONVERT_VECTOR_)
|
1965
|
+
SIMDE_CONVERT_VECTOR_(r_.f32, a_.i16);
|
1966
|
+
#else
|
1967
|
+
SIMDE_VECTORIZE
|
1968
|
+
for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
|
1969
|
+
simde_float32 v = a_.i16[i];
|
1970
|
+
r_.f32[i] = v;
|
1971
|
+
}
|
1972
|
+
#endif
|
1973
|
+
|
1974
|
+
return simde__m128_from_private(r_);
|
1975
|
+
#endif
|
1976
|
+
}
|
1977
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
1978
|
+
# define _mm_cvtpi16_ps(a) simde_mm_cvtpi16_ps(a)
|
1979
|
+
#endif
|
1980
|
+
|
1981
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
1982
|
+
simde__m128
|
1983
|
+
simde_mm_cvtpi32_ps (simde__m128 a, simde__m64 b) {
|
1984
|
+
#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
|
1985
|
+
return _mm_cvtpi32_ps(a, b);
|
1986
|
+
#else
|
1987
|
+
simde__m128_private
|
1988
|
+
r_,
|
1989
|
+
a_ = simde__m128_to_private(a);
|
1990
|
+
simde__m64_private b_ = simde__m64_to_private(b);
|
1991
|
+
|
1992
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
1993
|
+
r_.neon_f32 = vcombine_f32(vcvt_f32_s32(b_.neon_i32), vget_high_f32(a_.neon_f32));
|
1994
|
+
#elif defined(SIMDE_CONVERT_VECTOR_)
|
1995
|
+
SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, b_.i32);
|
1996
|
+
r_.m64_private[1] = a_.m64_private[1];
|
1997
|
+
#else
|
1998
|
+
r_.f32[0] = (simde_float32) b_.i32[0];
|
1999
|
+
r_.f32[1] = (simde_float32) b_.i32[1];
|
2000
|
+
r_.i32[2] = a_.i32[2];
|
2001
|
+
r_.i32[3] = a_.i32[3];
|
2002
|
+
#endif
|
2003
|
+
|
2004
|
+
return simde__m128_from_private(r_);
|
2005
|
+
#endif
|
2006
|
+
}
|
2007
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
2008
|
+
# define _mm_cvtpi32_ps(a, b) simde_mm_cvtpi32_ps((a), b)
|
2009
|
+
#endif
|
2010
|
+
|
2011
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
2012
|
+
simde__m128
|
2013
|
+
simde_mm_cvtpi32x2_ps (simde__m64 a, simde__m64 b) {
|
2014
|
+
#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
|
2015
|
+
return _mm_cvtpi32x2_ps(a, b);
|
2016
|
+
#else
|
2017
|
+
simde__m128_private r_;
|
2018
|
+
simde__m64_private
|
2019
|
+
a_ = simde__m64_to_private(a),
|
2020
|
+
b_ = simde__m64_to_private(b);
|
2021
|
+
|
2022
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
2023
|
+
r_.neon_f32 = vcvtq_f32_s32(vcombine_s32(a_.neon_i32, b_.neon_i32));
|
2024
|
+
#elif defined(SIMDE_CONVERT_VECTOR_)
|
2025
|
+
SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, a_.i32);
|
2026
|
+
SIMDE_CONVERT_VECTOR_(r_.m64_private[1].f32, b_.i32);
|
2027
|
+
#else
|
2028
|
+
r_.f32[0] = (simde_float32) a_.i32[0];
|
2029
|
+
r_.f32[1] = (simde_float32) a_.i32[1];
|
2030
|
+
r_.f32[2] = (simde_float32) b_.i32[0];
|
2031
|
+
r_.f32[3] = (simde_float32) b_.i32[1];
|
2032
|
+
#endif
|
2033
|
+
|
2034
|
+
return simde__m128_from_private(r_);
|
2035
|
+
#endif
|
2036
|
+
}
|
2037
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
2038
|
+
# define _mm_cvtpi32x2_ps(a, b) simde_mm_cvtpi32x2_ps(a, b)
|
2039
|
+
#endif
|
2040
|
+
|
2041
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
2042
|
+
simde__m128
|
2043
|
+
simde_mm_cvtpi8_ps (simde__m64 a) {
|
2044
|
+
#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
|
2045
|
+
return _mm_cvtpi8_ps(a);
|
2046
|
+
#else
|
2047
|
+
simde__m128_private r_;
|
2048
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
2049
|
+
|
2050
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
2051
|
+
r_.neon_f32 = vcvtq_f32_s32(vmovl_s16(vget_low_s16(vmovl_s8(a_.neon_i8))));
|
2052
|
+
#else
|
2053
|
+
r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[0]);
|
2054
|
+
r_.f32[1] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[1]);
|
2055
|
+
r_.f32[2] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[2]);
|
2056
|
+
r_.f32[3] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[3]);
|
2057
|
+
#endif
|
2058
|
+
|
2059
|
+
return simde__m128_from_private(r_);
|
2060
|
+
#endif
|
2061
|
+
}
|
2062
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
2063
|
+
# define _mm_cvtpi8_ps(a) simde_mm_cvtpi8_ps(a)
|
2064
|
+
#endif
|
2065
|
+
|
2066
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
2067
|
+
simde__m64
|
2068
|
+
simde_mm_cvtps_pi16 (simde__m128 a) {
|
2069
|
+
#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
|
2070
|
+
return _mm_cvtps_pi16(a);
|
2071
|
+
#else
|
2072
|
+
simde__m64_private r_;
|
2073
|
+
simde__m128_private a_ = simde__m128_to_private(a);
|
2074
|
+
|
2075
|
+
#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95399)
|
2076
|
+
r_.neon_i16 = vmovn_s32(vcvtq_s32_f32(vrndiq_f32(a_.neon_f32)));
|
2077
|
+
#else
|
2078
|
+
SIMDE_VECTORIZE
|
2079
|
+
for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
|
2080
|
+
r_.i16[i] = SIMDE_CONVERT_FTOI(int16_t, simde_math_roundf(a_.f32[i]));
|
2081
|
+
}
|
2082
|
+
#endif
|
2083
|
+
|
2084
|
+
return simde__m64_from_private(r_);
|
2085
|
+
#endif
|
2086
|
+
}
|
2087
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
2088
|
+
# define _mm_cvtps_pi16(a) simde_mm_cvtps_pi16((a))
|
2089
|
+
#endif
|
2090
|
+
|
2091
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
2092
|
+
simde__m64
|
2093
|
+
simde_mm_cvtps_pi32 (simde__m128 a) {
|
2094
|
+
#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
|
2095
|
+
return _mm_cvtps_pi32(a);
|
2096
|
+
#else
|
2097
|
+
simde__m64_private r_;
|
2098
|
+
simde__m128_private a_ = simde__m128_to_private(a);
|
2099
|
+
|
2100
|
+
#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_BUG_GCC_95399)
|
2101
|
+
r_.neon_i32 = vcvt_s32_f32(vget_low_f32(vrndiq_f32(a_.neon_f32)));
|
2102
|
+
#else
|
2103
|
+
SIMDE_VECTORIZE
|
2104
|
+
for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
|
2105
|
+
simde_float32 v = simde_math_roundf(a_.f32[i]);
|
2106
|
+
#if !defined(SIMDE_FAST_CONVERSION_RANGE)
|
2107
|
+
r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ?
|
2108
|
+
SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN;
|
2109
|
+
#else
|
2110
|
+
r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v);
|
2111
|
+
#endif
|
2112
|
+
}
|
2113
|
+
#endif
|
2114
|
+
|
2115
|
+
return simde__m64_from_private(r_);
|
2116
|
+
#endif
|
2117
|
+
}
|
2118
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
2119
|
+
# define _mm_cvtps_pi32(a) simde_mm_cvtps_pi32((a))
|
2120
|
+
#endif
|
2121
|
+
|
2122
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
2123
|
+
simde__m64
|
2124
|
+
simde_mm_cvtps_pi8 (simde__m128 a) {
|
2125
|
+
#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
|
2126
|
+
return _mm_cvtps_pi8(a);
|
2127
|
+
#else
|
2128
|
+
simde__m64_private r_;
|
2129
|
+
simde__m128_private a_ = simde__m128_to_private(a);
|
2130
|
+
|
2131
|
+
#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95471)
|
2132
|
+
/* Clamp the input to [INT8_MIN, INT8_MAX], round, convert to i32, narrow to
|
2133
|
+
* i16, combine with an all-zero vector of i16 (which will become the upper
|
2134
|
+
* half), narrow to i8. */
|
2135
|
+
float32x4_t max = vdupq_n_f32(HEDLEY_STATIC_CAST(simde_float32, INT8_MAX));
|
2136
|
+
float32x4_t min = vdupq_n_f32(HEDLEY_STATIC_CAST(simde_float32, INT8_MIN));
|
2137
|
+
float32x4_t values = vrndnq_f32(vmaxq_f32(vminq_f32(max, a_.neon_f32), min));
|
2138
|
+
r_.neon_i8 = vmovn_s16(vcombine_s16(vmovn_s32(vcvtq_s32_f32(values)), vdup_n_s16(0)));
|
2139
|
+
#else
|
2140
|
+
SIMDE_VECTORIZE
|
2141
|
+
for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) {
|
2142
|
+
if (a_.f32[i] > HEDLEY_STATIC_CAST(simde_float32, INT8_MAX))
|
2143
|
+
r_.i8[i] = INT8_MAX;
|
2144
|
+
else if (a_.f32[i] < HEDLEY_STATIC_CAST(simde_float32, INT8_MIN))
|
2145
|
+
r_.i8[i] = INT8_MIN;
|
2146
|
+
else
|
2147
|
+
r_.i8[i] = SIMDE_CONVERT_FTOI(int8_t, simde_math_roundf(a_.f32[i]));
|
2148
|
+
}
|
2149
|
+
/* Note: the upper half is undefined */
|
2150
|
+
#endif
|
2151
|
+
|
2152
|
+
return simde__m64_from_private(r_);
|
2153
|
+
#endif
|
2154
|
+
}
|
2155
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
2156
|
+
# define _mm_cvtps_pi8(a) simde_mm_cvtps_pi8((a))
|
2157
|
+
#endif
|
2158
|
+
|
2159
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
2160
|
+
simde__m128
|
2161
|
+
simde_mm_cvtpu16_ps (simde__m64 a) {
|
2162
|
+
#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
|
2163
|
+
return _mm_cvtpu16_ps(a);
|
2164
|
+
#else
|
2165
|
+
simde__m128_private r_;
|
2166
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
2167
|
+
|
2168
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
2169
|
+
r_.neon_f32 = vcvtq_f32_u32(vmovl_u16(a_.neon_u16));
|
2170
|
+
#elif defined(SIMDE_CONVERT_VECTOR_)
|
2171
|
+
SIMDE_CONVERT_VECTOR_(r_.f32, a_.u16);
|
2172
|
+
#else
|
2173
|
+
SIMDE_VECTORIZE
|
2174
|
+
for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
|
2175
|
+
r_.f32[i] = (simde_float32) a_.u16[i];
|
2176
|
+
}
|
2177
|
+
#endif
|
2178
|
+
|
2179
|
+
return simde__m128_from_private(r_);
|
2180
|
+
#endif
|
2181
|
+
}
|
2182
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
2183
|
+
# define _mm_cvtpu16_ps(a) simde_mm_cvtpu16_ps(a)
|
2184
|
+
#endif
|
2185
|
+
|
2186
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
2187
|
+
simde__m128
|
2188
|
+
simde_mm_cvtpu8_ps (simde__m64 a) {
|
2189
|
+
#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
|
2190
|
+
return _mm_cvtpu8_ps(a);
|
2191
|
+
#else
|
2192
|
+
simde__m128_private r_;
|
2193
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
2194
|
+
|
2195
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
2196
|
+
r_.neon_f32 = vcvtq_f32_u32(vmovl_u16(vget_low_u16(vmovl_u8(a_.neon_u8))));
|
2197
|
+
#else
|
2198
|
+
SIMDE_VECTORIZE
|
2199
|
+
for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
|
2200
|
+
r_.f32[i] = HEDLEY_STATIC_CAST(simde_float32, a_.u8[i]);
|
2201
|
+
}
|
2202
|
+
#endif
|
2203
|
+
|
2204
|
+
return simde__m128_from_private(r_);
|
2205
|
+
#endif
|
2206
|
+
}
|
2207
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
2208
|
+
# define _mm_cvtpu8_ps(a) simde_mm_cvtpu8_ps(a)
|
2209
|
+
#endif
|
2210
|
+
|
2211
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
2212
|
+
simde__m128
|
2213
|
+
simde_mm_cvtsi32_ss (simde__m128 a, int32_t b) {
|
2214
|
+
#if defined(SIMDE_X86_SSE_NATIVE)
|
2215
|
+
return _mm_cvtsi32_ss(a, b);
|
2216
|
+
#else
|
2217
|
+
simde__m128_private r_;
|
2218
|
+
simde__m128_private a_ = simde__m128_to_private(a);
|
2219
|
+
|
2220
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
2221
|
+
r_.neon_f32 = vsetq_lane_f32(HEDLEY_STATIC_CAST(float32_t, b), a_.neon_f32, 0);
|
2222
|
+
#else
|
2223
|
+
r_ = a_;
|
2224
|
+
r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b);
|
2225
|
+
#endif
|
2226
|
+
|
2227
|
+
return simde__m128_from_private(r_);
|
2228
|
+
#endif
|
2229
|
+
}
|
2230
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
2231
|
+
# define _mm_cvtsi32_ss(a, b) simde_mm_cvtsi32_ss((a), b)
|
2232
|
+
#endif
|
2233
|
+
|
2234
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
2235
|
+
simde__m128
|
2236
|
+
simde_mm_cvtsi64_ss (simde__m128 a, int64_t b) {
|
2237
|
+
#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64)
|
2238
|
+
#if !defined(__PGI)
|
2239
|
+
return _mm_cvtsi64_ss(a, b);
|
2240
|
+
#else
|
2241
|
+
return _mm_cvtsi64x_ss(a, b);
|
2242
|
+
#endif
|
2243
|
+
#else
|
2244
|
+
simde__m128_private r_;
|
2245
|
+
simde__m128_private a_ = simde__m128_to_private(a);
|
2246
|
+
|
2247
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
2248
|
+
r_.neon_f32 = vsetq_lane_f32(HEDLEY_STATIC_CAST(float32_t, b), a_.neon_f32, 0);
|
2249
|
+
#else
|
2250
|
+
r_ = a_;
|
2251
|
+
r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b);
|
2252
|
+
#endif
|
2253
|
+
|
2254
|
+
return simde__m128_from_private(r_);
|
2255
|
+
#endif
|
2256
|
+
}
|
2257
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
2258
|
+
# define _mm_cvtsi64_ss(a, b) simde_mm_cvtsi64_ss((a), b)
|
2259
|
+
#endif
|
2260
|
+
|
2261
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
2262
|
+
simde_float32
|
2263
|
+
simde_mm_cvtss_f32 (simde__m128 a) {
|
2264
|
+
#if defined(SIMDE_X86_SSE_NATIVE)
|
2265
|
+
return _mm_cvtss_f32(a);
|
2266
|
+
#else
|
2267
|
+
simde__m128_private a_ = simde__m128_to_private(a);
|
2268
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
2269
|
+
return vgetq_lane_f32(a_.neon_f32, 0);
|
2270
|
+
#else
|
2271
|
+
return a_.f32[0];
|
2272
|
+
#endif
|
2273
|
+
#endif
|
2274
|
+
}
|
2275
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
2276
|
+
# define _mm_cvtss_f32(a) simde_mm_cvtss_f32((a))
|
2277
|
+
#endif
|
2278
|
+
|
2279
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
2280
|
+
int32_t
|
2281
|
+
simde_mm_cvtss_si32 (simde__m128 a) {
|
2282
|
+
return simde_mm_cvt_ss2si(a);
|
2283
|
+
}
|
2284
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
2285
|
+
# define _mm_cvtss_si32(a) simde_mm_cvtss_si32((a))
|
2286
|
+
#endif
|
2287
|
+
|
2288
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
2289
|
+
int64_t
|
2290
|
+
simde_mm_cvtss_si64 (simde__m128 a) {
|
2291
|
+
#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64)
|
2292
|
+
#if !defined(__PGI)
|
2293
|
+
return _mm_cvtss_si64(a);
|
2294
|
+
#else
|
2295
|
+
return _mm_cvtss_si64x(a);
|
2296
|
+
#endif
|
2297
|
+
#else
|
2298
|
+
simde__m128_private a_ = simde__m128_to_private(a);
|
2299
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
2300
|
+
return SIMDE_CONVERT_FTOI(int64_t, simde_math_roundf(vgetq_lane_f32(a_.neon_f32, 0)));
|
2301
|
+
#else
|
2302
|
+
return SIMDE_CONVERT_FTOI(int64_t, simde_math_roundf(a_.f32[0]));
|
2303
|
+
#endif
|
2304
|
+
#endif
|
2305
|
+
}
|
2306
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
2307
|
+
# define _mm_cvtss_si64(a) simde_mm_cvtss_si64((a))
|
2308
|
+
#endif
|
2309
|
+
|
2310
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
2311
|
+
simde__m64
|
2312
|
+
simde_mm_cvtt_ps2pi (simde__m128 a) {
|
2313
|
+
#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
|
2314
|
+
return _mm_cvtt_ps2pi(a);
|
2315
|
+
#else
|
2316
|
+
simde__m64_private r_;
|
2317
|
+
simde__m128_private a_ = simde__m128_to_private(a);
|
2318
|
+
|
2319
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE)
|
2320
|
+
r_.neon_i32 = vcvt_s32_f32(vget_low_f32(a_.neon_f32));
|
2321
|
+
#else
|
2322
|
+
SIMDE_VECTORIZE
|
2323
|
+
for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
|
2324
|
+
simde_float32 v = a_.f32[i];
|
2325
|
+
#if !defined(SIMDE_FAST_CONVERSION_RANGE)
|
2326
|
+
r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ?
|
2327
|
+
SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN;
|
2328
|
+
#else
|
2329
|
+
r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v);
|
2330
|
+
#endif
|
2331
|
+
}
|
2332
|
+
#endif
|
2333
|
+
|
2334
|
+
return simde__m64_from_private(r_);
|
2335
|
+
#endif
|
2336
|
+
}
|
2337
|
+
#define simde_mm_cvttps_pi32(a) simde_mm_cvtt_ps2pi(a)
|
2338
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
2339
|
+
# define _mm_cvtt_ps2pi(a) simde_mm_cvtt_ps2pi((a))
|
2340
|
+
# define _mm_cvttps_pi32(a) simde_mm_cvttps_pi32((a))
|
2341
|
+
#endif
|
2342
|
+
|
2343
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
2344
|
+
int32_t
|
2345
|
+
simde_mm_cvtt_ss2si (simde__m128 a) {
|
2346
|
+
#if defined(SIMDE_X86_SSE_NATIVE)
|
2347
|
+
return _mm_cvtt_ss2si(a);
|
2348
|
+
#else
|
2349
|
+
simde__m128_private a_ = simde__m128_to_private(a);
|
2350
|
+
|
2351
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE)
|
2352
|
+
return SIMDE_CONVERT_FTOI(int32_t, vgetq_lane_f32(a_.neon_f32, 0));
|
2353
|
+
#else
|
2354
|
+
simde_float32 v = a_.f32[0];
|
2355
|
+
#if !defined(SIMDE_FAST_CONVERSION_RANGE)
|
2356
|
+
return ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ?
|
2357
|
+
SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN;
|
2358
|
+
#else
|
2359
|
+
return SIMDE_CONVERT_FTOI(int32_t, v);
|
2360
|
+
#endif
|
2361
|
+
#endif
|
2362
|
+
#endif
|
2363
|
+
}
|
2364
|
+
#define simde_mm_cvttss_si32(a) simde_mm_cvtt_ss2si((a))
|
2365
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
2366
|
+
# define _mm_cvtt_ss2si(a) simde_mm_cvtt_ss2si((a))
|
2367
|
+
# define _mm_cvttss_si32(a) simde_mm_cvtt_ss2si((a))
|
2368
|
+
#endif
|
2369
|
+
|
2370
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
2371
|
+
int64_t
|
2372
|
+
simde_mm_cvttss_si64 (simde__m128 a) {
|
2373
|
+
#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(_MSC_VER)
|
2374
|
+
#if defined(__PGI)
|
2375
|
+
return _mm_cvttss_si64x(a);
|
2376
|
+
#else
|
2377
|
+
return _mm_cvttss_si64(a);
|
2378
|
+
#endif
|
2379
|
+
#else
|
2380
|
+
simde__m128_private a_ = simde__m128_to_private(a);
|
2381
|
+
|
2382
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
2383
|
+
return SIMDE_CONVERT_FTOI(int64_t, vgetq_lane_f32(a_.neon_f32, 0));
|
2384
|
+
#else
|
2385
|
+
return SIMDE_CONVERT_FTOI(int64_t, a_.f32[0]);
|
2386
|
+
#endif
|
2387
|
+
#endif
|
2388
|
+
}
|
2389
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
2390
|
+
# define _mm_cvttss_si64(a) simde_mm_cvttss_si64((a))
|
2391
|
+
#endif
|
2392
|
+
|
2393
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
2394
|
+
simde__m128
|
2395
|
+
simde_mm_cmpord_ss (simde__m128 a, simde__m128 b) {
|
2396
|
+
#if defined(SIMDE_X86_SSE_NATIVE)
|
2397
|
+
return _mm_cmpord_ss(a, b);
|
2398
|
+
#elif (SIMDE_NATURAL_VECTOR_SIZE > 0)
|
2399
|
+
return simde_mm_move_ss(a, simde_mm_cmpord_ps(a, b));
|
2400
|
+
#else
|
2401
|
+
simde__m128_private
|
2402
|
+
r_,
|
2403
|
+
a_ = simde__m128_to_private(a);
|
2404
|
+
|
2405
|
+
#if defined(simde_math_isnanf)
|
2406
|
+
r_.u32[0] = (simde_math_isnanf(simde_mm_cvtss_f32(a)) || simde_math_isnanf(simde_mm_cvtss_f32(b))) ? UINT32_C(0) : ~UINT32_C(0);
|
2407
|
+
SIMDE_VECTORIZE
|
2408
|
+
for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
|
2409
|
+
r_.u32[i] = a_.u32[i];
|
2410
|
+
}
|
2411
|
+
#else
|
2412
|
+
HEDLEY_UNREACHABLE();
|
2413
|
+
#endif
|
2414
|
+
|
2415
|
+
return simde__m128_from_private(r_);
|
2416
|
+
#endif
|
2417
|
+
}
|
2418
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
2419
|
+
# define _mm_cmpord_ss(a, b) simde_mm_cmpord_ss((a), (b))
|
2420
|
+
#endif
|
2421
|
+
|
2422
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
2423
|
+
simde__m128
|
2424
|
+
simde_mm_div_ps (simde__m128 a, simde__m128 b) {
|
2425
|
+
#if defined(SIMDE_X86_SSE_NATIVE)
|
2426
|
+
return _mm_div_ps(a, b);
|
2427
|
+
#else
|
2428
|
+
simde__m128_private
|
2429
|
+
r_,
|
2430
|
+
a_ = simde__m128_to_private(a),
|
2431
|
+
b_ = simde__m128_to_private(b);
|
2432
|
+
|
2433
|
+
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
2434
|
+
r_.neon_f32 = vdivq_f32(a_.neon_f32, b_.neon_f32);
|
2435
|
+
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
2436
|
+
float32x4_t recip0 = vrecpeq_f32(b_.neon_f32);
|
2437
|
+
float32x4_t recip1 = vmulq_f32(recip0, vrecpsq_f32(recip0, b_.neon_f32));
|
2438
|
+
r_.neon_f32 = vmulq_f32(a_.neon_f32, recip1);
|
2439
|
+
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
2440
|
+
r_.wasm_v128 = wasm_f32x4_div(a_.wasm_v128, b_.wasm_v128);
|
2441
|
+
#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
|
2442
|
+
r_.altivec_f32 = vec_div(a_.altivec_f32, b_.altivec_f32);
|
2443
|
+
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
2444
|
+
r_.f32 = a_.f32 / b_.f32;
|
2445
|
+
#else
|
2446
|
+
SIMDE_VECTORIZE
|
2447
|
+
for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
|
2448
|
+
r_.f32[i] = a_.f32[i] / b_.f32[i];
|
2449
|
+
}
|
2450
|
+
#endif
|
2451
|
+
|
2452
|
+
return simde__m128_from_private(r_);
|
2453
|
+
#endif
|
2454
|
+
}
|
2455
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
2456
|
+
# define _mm_div_ps(a, b) simde_mm_div_ps((a), (b))
|
2457
|
+
#endif
|
2458
|
+
|
2459
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
2460
|
+
simde__m128
|
2461
|
+
simde_mm_div_ss (simde__m128 a, simde__m128 b) {
|
2462
|
+
#if defined(SIMDE_X86_SSE_NATIVE)
|
2463
|
+
return _mm_div_ss(a, b);
|
2464
|
+
#elif (SIMDE_NATURAL_VECTOR_SIZE > 0)
|
2465
|
+
return simde_mm_move_ss(a, simde_mm_div_ps(a, b));
|
2466
|
+
#else
|
2467
|
+
simde__m128_private
|
2468
|
+
r_,
|
2469
|
+
a_ = simde__m128_to_private(a),
|
2470
|
+
b_ = simde__m128_to_private(b);
|
2471
|
+
|
2472
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
2473
|
+
float32_t value =
|
2474
|
+
vgetq_lane_f32(simde__m128_to_private(simde_mm_div_ps(a, b)).neon_f32, 0);
|
2475
|
+
r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0);
|
2476
|
+
#else
|
2477
|
+
r_.f32[0] = a_.f32[0] / b_.f32[0];
|
2478
|
+
SIMDE_VECTORIZE
|
2479
|
+
for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
|
2480
|
+
r_.f32[i] = a_.f32[i];
|
2481
|
+
}
|
2482
|
+
#endif
|
2483
|
+
|
2484
|
+
return simde__m128_from_private(r_);
|
2485
|
+
#endif
|
2486
|
+
}
|
2487
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
2488
|
+
# define _mm_div_ss(a, b) simde_mm_div_ss((a), (b))
|
2489
|
+
#endif
|
2490
|
+
|
2491
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
2492
|
+
int16_t
|
2493
|
+
simde_mm_extract_pi16 (simde__m64 a, const int imm8)
|
2494
|
+
SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) {
|
2495
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
2496
|
+
return a_.i16[imm8];
|
2497
|
+
}
|
2498
|
+
#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(HEDLEY_PGI_VERSION)
|
2499
|
+
# if defined(SIMDE_BUG_CLANG_44589)
|
2500
|
+
# define simde_mm_extract_pi16(a, imm8) ( \
|
2501
|
+
HEDLEY_DIAGNOSTIC_PUSH \
|
2502
|
+
_Pragma("clang diagnostic ignored \"-Wvector-conversion\"") \
|
2503
|
+
HEDLEY_STATIC_CAST(int16_t, _mm_extract_pi16((a), (imm8))) \
|
2504
|
+
HEDLEY_DIAGNOSTIC_POP \
|
2505
|
+
)
|
2506
|
+
# else
|
2507
|
+
# define simde_mm_extract_pi16(a, imm8) HEDLEY_STATIC_CAST(int16_t, _mm_extract_pi16(a, imm8))
|
2508
|
+
# endif
|
2509
|
+
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
2510
|
+
# define simde_mm_extract_pi16(a, imm8) vget_lane_s16(simde__m64_to_private(a).neon_i16, imm8)
|
2511
|
+
#endif
|
2512
|
+
#define simde_m_pextrw(a, imm8) simde_mm_extract_pi16(a, imm8)
|
2513
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
2514
|
+
# define _mm_extract_pi16(a, imm8) simde_mm_extract_pi16((a), (imm8))
|
2515
|
+
# define _m_pextrw(a, imm8) simde_mm_extract_pi16((a), (imm8))
|
2516
|
+
#endif
|
2517
|
+
|
2518
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
2519
|
+
simde__m64
|
2520
|
+
simde_mm_insert_pi16 (simde__m64 a, int16_t i, const int imm8)
|
2521
|
+
SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) {
|
2522
|
+
simde__m64_private
|
2523
|
+
r_,
|
2524
|
+
a_ = simde__m64_to_private(a);
|
2525
|
+
|
2526
|
+
r_.i64[0] = a_.i64[0];
|
2527
|
+
r_.i16[imm8] = i;
|
2528
|
+
|
2529
|
+
return simde__m64_from_private(r_);
|
2530
|
+
}
|
2531
|
+
#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI)
|
2532
|
+
# if defined(SIMDE_BUG_CLANG_44589)
|
2533
|
+
# define ssimde_mm_insert_pi16(a, i, imm8) ( \
|
2534
|
+
HEDLEY_DIAGNOSTIC_PUSH \
|
2535
|
+
_Pragma("clang diagnostic ignored \"-Wvector-conversion\"") \
|
2536
|
+
(_mm_insert_pi16((a), (i), (imm8))) \
|
2537
|
+
HEDLEY_DIAGNOSTIC_POP \
|
2538
|
+
)
|
2539
|
+
# else
|
2540
|
+
# define simde_mm_insert_pi16(a, i, imm8) _mm_insert_pi16(a, i, imm8)
|
2541
|
+
# endif
|
2542
|
+
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
2543
|
+
# define simde_mm_insert_pi16(a, i, imm8) simde__m64_from_neon_i16(vset_lane_s16((i), simde__m64_to_neon_i16(a), (imm8)))
|
2544
|
+
#endif
|
2545
|
+
#define simde_m_pinsrw(a, i, imm8) (simde_mm_insert_pi16(a, i, imm8))
|
2546
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
2547
|
+
# define _mm_insert_pi16(a, i, imm8) simde_mm_insert_pi16(a, i, imm8)
|
2548
|
+
# define _m_pinsrw(a, i, imm8) simde_mm_insert_pi16(a, i, imm8)
|
2549
|
+
#endif
|
2550
|
+
|
2551
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
2552
|
+
simde__m128
|
2553
|
+
simde_mm_load_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) {
|
2554
|
+
#if defined(SIMDE_X86_SSE_NATIVE)
|
2555
|
+
return _mm_load_ps(mem_addr);
|
2556
|
+
#else
|
2557
|
+
simde__m128_private r_;
|
2558
|
+
|
2559
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
2560
|
+
r_.neon_f32 = vld1q_f32(mem_addr);
|
2561
|
+
#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
|
2562
|
+
r_.altivec_f32 = vec_vsx_ld(0, mem_addr);
|
2563
|
+
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
|
2564
|
+
r_.altivec_f32 = vec_ld(0, mem_addr);
|
2565
|
+
#else
|
2566
|
+
simde_memcpy(&r_, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128), sizeof(r_));
|
2567
|
+
#endif
|
2568
|
+
|
2569
|
+
return simde__m128_from_private(r_);
|
2570
|
+
#endif
|
2571
|
+
}
|
2572
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
2573
|
+
# define _mm_load_ps(mem_addr) simde_mm_load_ps(mem_addr)
|
2574
|
+
#endif
|
2575
|
+
|
2576
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
2577
|
+
simde__m128
|
2578
|
+
simde_mm_load1_ps (simde_float32 const* mem_addr) {
|
2579
|
+
#if defined(SIMDE_X86_SSE_NATIVE)
|
2580
|
+
return _mm_load_ps1(mem_addr);
|
2581
|
+
#else
|
2582
|
+
simde__m128_private r_;
|
2583
|
+
|
2584
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
2585
|
+
r_.neon_f32 = vld1q_dup_f32(mem_addr);
|
2586
|
+
#else
|
2587
|
+
r_ = simde__m128_to_private(simde_mm_set1_ps(*mem_addr));
|
2588
|
+
#endif
|
2589
|
+
|
2590
|
+
return simde__m128_from_private(r_);
|
2591
|
+
#endif
|
2592
|
+
}
|
2593
|
+
#define simde_mm_load_ps1(mem_addr) simde_mm_load1_ps(mem_addr)
|
2594
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
2595
|
+
# define _mm_load_ps1(mem_addr) simde_mm_load1_ps(mem_addr)
|
2596
|
+
# define _mm_load1_ps(mem_addr) simde_mm_load1_ps(mem_addr)
|
2597
|
+
#endif
|
2598
|
+
|
2599
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
2600
|
+
simde__m128
|
2601
|
+
simde_mm_load_ss (simde_float32 const* mem_addr) {
|
2602
|
+
#if defined(SIMDE_X86_SSE_NATIVE)
|
2603
|
+
return _mm_load_ss(mem_addr);
|
2604
|
+
#else
|
2605
|
+
simde__m128_private r_;
|
2606
|
+
|
2607
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
2608
|
+
r_.neon_f32 = vsetq_lane_f32(*mem_addr, vdupq_n_f32(0), 0);
|
2609
|
+
#else
|
2610
|
+
r_.f32[0] = *mem_addr;
|
2611
|
+
r_.i32[1] = 0;
|
2612
|
+
r_.i32[2] = 0;
|
2613
|
+
r_.i32[3] = 0;
|
2614
|
+
#endif
|
2615
|
+
|
2616
|
+
return simde__m128_from_private(r_);
|
2617
|
+
#endif
|
2618
|
+
}
|
2619
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
2620
|
+
# define _mm_load_ss(mem_addr) simde_mm_load_ss(mem_addr)
|
2621
|
+
#endif
|
2622
|
+
|
2623
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
2624
|
+
simde__m128
|
2625
|
+
simde_mm_loadh_pi (simde__m128 a, simde__m64 const* mem_addr) {
|
2626
|
+
#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
|
2627
|
+
return _mm_loadh_pi(a, HEDLEY_REINTERPRET_CAST(__m64 const*, mem_addr));
|
2628
|
+
#else
|
2629
|
+
simde__m128_private
|
2630
|
+
r_,
|
2631
|
+
a_ = simde__m128_to_private(a);
|
2632
|
+
|
2633
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
2634
|
+
r_.neon_f32 = vcombine_f32(vget_low_f32(a_.neon_f32), vld1_f32(HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr)));
|
2635
|
+
#else
|
2636
|
+
simde__m64_private b_ = *HEDLEY_REINTERPRET_CAST(simde__m64_private const*, mem_addr);
|
2637
|
+
r_.f32[0] = a_.f32[0];
|
2638
|
+
r_.f32[1] = a_.f32[1];
|
2639
|
+
r_.f32[2] = b_.f32[0];
|
2640
|
+
r_.f32[3] = b_.f32[1];
|
2641
|
+
#endif
|
2642
|
+
|
2643
|
+
return simde__m128_from_private(r_);
|
2644
|
+
#endif
|
2645
|
+
}
|
2646
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
2647
|
+
#if HEDLEY_HAS_WARNING("-Wold-style-cast")
|
2648
|
+
#define _mm_loadh_pi(a, mem_addr) simde_mm_loadh_pi((a), HEDLEY_REINTERPRET_CAST(simde__m64 const*, (mem_addr)))
|
2649
|
+
#else
|
2650
|
+
#define _mm_loadh_pi(a, mem_addr) simde_mm_loadh_pi((a), (simde__m64 const*) (mem_addr))
|
2651
|
+
#endif
|
2652
|
+
#endif
|
2653
|
+
|
2654
|
+
/* The SSE documentation says that there are no alignment requirements
|
2655
|
+
for mem_addr. Unfortunately they used the __m64 type for the argument
|
2656
|
+
which is supposed to be 8-byte aligned, so some compilers (like clang
|
2657
|
+
with -Wcast-align) will generate a warning if you try to cast, say,
|
2658
|
+
a simde_float32* to a simde__m64* for this function.
|
2659
|
+
|
2660
|
+
I think the choice of argument type is unfortunate, but I do think we
|
2661
|
+
need to stick to it here. If there is demand I can always add something
|
2662
|
+
like simde_x_mm_loadl_f32(simde__m128, simde_float32 mem_addr[2]) */
|
2663
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
2664
|
+
simde__m128
|
2665
|
+
simde_mm_loadl_pi (simde__m128 a, simde__m64 const* mem_addr) {
|
2666
|
+
#if defined(SIMDE_X86_SSE_NATIVE)
|
2667
|
+
return _mm_loadl_pi(a, HEDLEY_REINTERPRET_CAST(__m64 const*, mem_addr));
|
2668
|
+
#else
|
2669
|
+
simde__m128_private
|
2670
|
+
r_,
|
2671
|
+
a_ = simde__m128_to_private(a);
|
2672
|
+
|
2673
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
2674
|
+
r_.neon_f32 = vcombine_f32(vld1_f32(
|
2675
|
+
HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr)), vget_high_f32(a_.neon_f32));
|
2676
|
+
#else
|
2677
|
+
simde__m64_private b_;
|
2678
|
+
simde_memcpy(&b_, mem_addr, sizeof(b_));
|
2679
|
+
r_.i32[0] = b_.i32[0];
|
2680
|
+
r_.i32[1] = b_.i32[1];
|
2681
|
+
r_.i32[2] = a_.i32[2];
|
2682
|
+
r_.i32[3] = a_.i32[3];
|
2683
|
+
#endif
|
2684
|
+
|
2685
|
+
return simde__m128_from_private(r_);
|
2686
|
+
#endif
|
2687
|
+
}
|
2688
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
2689
|
+
#if HEDLEY_HAS_WARNING("-Wold-style-cast")
|
2690
|
+
#define _mm_loadl_pi(a, mem_addr) simde_mm_loadl_pi((a), HEDLEY_REINTERPRET_CAST(simde__m64 const*, (mem_addr)))
|
2691
|
+
#else
|
2692
|
+
#define _mm_loadl_pi(a, mem_addr) simde_mm_loadl_pi((a), (simde__m64 const*) (mem_addr))
|
2693
|
+
#endif
|
2694
|
+
#endif
|
2695
|
+
|
2696
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
2697
|
+
simde__m128
|
2698
|
+
simde_mm_loadr_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) {
|
2699
|
+
#if defined(SIMDE_X86_SSE_NATIVE)
|
2700
|
+
return _mm_loadr_ps(mem_addr);
|
2701
|
+
#else
|
2702
|
+
simde__m128_private
|
2703
|
+
r_,
|
2704
|
+
v_ = simde__m128_to_private(simde_mm_load_ps(mem_addr));
|
2705
|
+
|
2706
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
2707
|
+
r_.neon_f32 = vrev64q_f32(v_.neon_f32);
|
2708
|
+
r_.neon_f32 = vextq_f32(r_.neon_f32, r_.neon_f32, 2);
|
2709
|
+
#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && defined(__PPC64__)
|
2710
|
+
r_.altivec_f32 = vec_reve(v_.altivec_f32);
|
2711
|
+
#elif defined(SIMDE_SHUFFLE_VECTOR_)
|
2712
|
+
r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, v_.f32, v_.f32, 3, 2, 1, 0);
|
2713
|
+
#else
|
2714
|
+
r_.f32[0] = v_.f32[3];
|
2715
|
+
r_.f32[1] = v_.f32[2];
|
2716
|
+
r_.f32[2] = v_.f32[1];
|
2717
|
+
r_.f32[3] = v_.f32[0];
|
2718
|
+
#endif
|
2719
|
+
|
2720
|
+
return simde__m128_from_private(r_);
|
2721
|
+
#endif
|
2722
|
+
}
|
2723
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
2724
|
+
# define _mm_loadr_ps(mem_addr) simde_mm_loadr_ps(mem_addr)
|
2725
|
+
#endif
|
2726
|
+
|
2727
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
2728
|
+
simde__m128
|
2729
|
+
simde_mm_loadu_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) {
|
2730
|
+
#if defined(SIMDE_X86_SSE_NATIVE)
|
2731
|
+
return _mm_loadu_ps(mem_addr);
|
2732
|
+
#else
|
2733
|
+
simde__m128_private r_;
|
2734
|
+
|
2735
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
2736
|
+
r_.neon_f32 = vld1q_f32(HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr));
|
2737
|
+
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
2738
|
+
r_.wasm_v128 = wasm_v128_load(mem_addr);
|
2739
|
+
#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && defined(__PPC64__)
|
2740
|
+
r_.altivec_f32 = vec_vsx_ld(0, mem_addr);
|
2741
|
+
#else
|
2742
|
+
simde_memcpy(&r_, mem_addr, sizeof(r_));
|
2743
|
+
#endif
|
2744
|
+
|
2745
|
+
return simde__m128_from_private(r_);
|
2746
|
+
#endif
|
2747
|
+
}
|
2748
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
2749
|
+
# define _mm_loadu_ps(mem_addr) simde_mm_loadu_ps(mem_addr)
|
2750
|
+
#endif
|
2751
|
+
|
2752
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
2753
|
+
void
|
2754
|
+
simde_mm_maskmove_si64 (simde__m64 a, simde__m64 mask, int8_t* mem_addr) {
|
2755
|
+
#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
|
2756
|
+
_mm_maskmove_si64(a, mask, HEDLEY_REINTERPRET_CAST(char*, mem_addr));
|
2757
|
+
#else
|
2758
|
+
simde__m64_private
|
2759
|
+
a_ = simde__m64_to_private(a),
|
2760
|
+
mask_ = simde__m64_to_private(mask);
|
2761
|
+
|
2762
|
+
SIMDE_VECTORIZE
|
2763
|
+
for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++)
|
2764
|
+
if (mask_.i8[i] < 0)
|
2765
|
+
mem_addr[i] = a_.i8[i];
|
2766
|
+
#endif
|
2767
|
+
}
|
2768
|
+
#define simde_m_maskmovq(a, mask, mem_addr) simde_mm_maskmove_si64(a, mask, mem_addr)
|
2769
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
2770
|
+
# define _mm_maskmove_si64(a, mask, mem_addr) simde_mm_maskmove_si64((a), (mask), SIMDE_CHECKED_REINTERPRET_CAST(int8_t*, char*, (mem_addr)))
|
2771
|
+
# define _m_maskmovq(a, mask, mem_addr) simde_mm_maskmove_si64((a), (mask), SIMDE_CHECKED_REINTERPRET_CAST(int8_t*, char*, (mem_addr)))
|
2772
|
+
#endif
|
2773
|
+
|
2774
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
2775
|
+
simde__m64
|
2776
|
+
simde_mm_max_pi16 (simde__m64 a, simde__m64 b) {
|
2777
|
+
#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
|
2778
|
+
return _mm_max_pi16(a, b);
|
2779
|
+
#else
|
2780
|
+
simde__m64_private
|
2781
|
+
r_,
|
2782
|
+
a_ = simde__m64_to_private(a),
|
2783
|
+
b_ = simde__m64_to_private(b);
|
2784
|
+
|
2785
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
2786
|
+
r_.neon_i16 = vmax_s16(a_.neon_i16, b_.neon_i16);
|
2787
|
+
#else
|
2788
|
+
SIMDE_VECTORIZE
|
2789
|
+
for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
|
2790
|
+
r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? a_.i16[i] : b_.i16[i];
|
2791
|
+
}
|
2792
|
+
#endif
|
2793
|
+
|
2794
|
+
return simde__m64_from_private(r_);
|
2795
|
+
#endif
|
2796
|
+
}
|
2797
|
+
#define simde_m_pmaxsw(a, b) simde_mm_max_pi16(a, b)
|
2798
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
2799
|
+
# define _mm_max_pi16(a, b) simde_mm_max_pi16(a, b)
|
2800
|
+
# define _m_pmaxsw(a, b) simde_mm_max_pi16(a, b)
|
2801
|
+
#endif
|
2802
|
+
|
2803
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
2804
|
+
simde__m128
|
2805
|
+
simde_mm_max_ps (simde__m128 a, simde__m128 b) {
|
2806
|
+
#if defined(SIMDE_X86_SSE_NATIVE)
|
2807
|
+
return _mm_max_ps(a, b);
|
2808
|
+
#else
|
2809
|
+
simde__m128_private
|
2810
|
+
r_,
|
2811
|
+
a_ = simde__m128_to_private(a),
|
2812
|
+
b_ = simde__m128_to_private(b);
|
2813
|
+
|
2814
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_NANS)
|
2815
|
+
r_.neon_f32 = vmaxq_f32(a_.neon_f32, b_.neon_f32);
|
2816
|
+
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
2817
|
+
r_.neon_f32 = vbslq_f32(vcgtq_f32(a_.neon_f32, b_.neon_f32), a_.neon_f32, b_.neon_f32);
|
2818
|
+
#elif defined(SIMDE_WASM_SIMD128_NATIVE) && defined(SIMDE_FAST_NANS)
|
2819
|
+
r_.wasm_v128 = wasm_f32x4_max(a_.wasm_v128, b_.wasm_v128);
|
2820
|
+
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
2821
|
+
r_.wasm_v128 = wasm_v128_bitselect(a_.wasm_v128, b_.wasm_v128, wasm_f32x4_gt(a_.wasm_v128, b_.wasm_v128));
|
2822
|
+
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && defined(SIMDE_FAST_NANS)
|
2823
|
+
r_.altivec_f32 = vec_max(a_.altivec_f32, b_.altivec_f32);
|
2824
|
+
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
|
2825
|
+
r_.altivec_f32 = vec_sel(b_.altivec_f32, a_.altivec_f32, vec_cmpgt(a_.altivec_f32, b_.altivec_f32));
|
2826
|
+
#else
|
2827
|
+
SIMDE_VECTORIZE
|
2828
|
+
for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
|
2829
|
+
r_.f32[i] = (a_.f32[i] > b_.f32[i]) ? a_.f32[i] : b_.f32[i];
|
2830
|
+
}
|
2831
|
+
#endif
|
2832
|
+
|
2833
|
+
return simde__m128_from_private(r_);
|
2834
|
+
#endif
|
2835
|
+
}
|
2836
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
2837
|
+
# define _mm_max_ps(a, b) simde_mm_max_ps((a), (b))
|
2838
|
+
#endif
|
2839
|
+
|
2840
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
2841
|
+
simde__m64
|
2842
|
+
simde_mm_max_pu8 (simde__m64 a, simde__m64 b) {
|
2843
|
+
#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
|
2844
|
+
return _mm_max_pu8(a, b);
|
2845
|
+
#else
|
2846
|
+
simde__m64_private
|
2847
|
+
r_,
|
2848
|
+
a_ = simde__m64_to_private(a),
|
2849
|
+
b_ = simde__m64_to_private(b);
|
2850
|
+
|
2851
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
2852
|
+
r_.neon_u8 = vmax_u8(a_.neon_u8, b_.neon_u8);
|
2853
|
+
#else
|
2854
|
+
SIMDE_VECTORIZE
|
2855
|
+
for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) {
|
2856
|
+
r_.u8[i] = (a_.u8[i] > b_.u8[i]) ? a_.u8[i] : b_.u8[i];
|
2857
|
+
}
|
2858
|
+
#endif
|
2859
|
+
|
2860
|
+
return simde__m64_from_private(r_);
|
2861
|
+
#endif
|
2862
|
+
}
|
2863
|
+
#define simde_m_pmaxub(a, b) simde_mm_max_pu8(a, b)
|
2864
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
2865
|
+
# define _mm_max_pu8(a, b) simde_mm_max_pu8(a, b)
|
2866
|
+
# define _m_pmaxub(a, b) simde_mm_max_pu8(a, b)
|
2867
|
+
#endif
|
2868
|
+
|
2869
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
2870
|
+
simde__m128
|
2871
|
+
simde_mm_max_ss (simde__m128 a, simde__m128 b) {
|
2872
|
+
#if defined(SIMDE_X86_SSE_NATIVE)
|
2873
|
+
return _mm_max_ss(a, b);
|
2874
|
+
#elif (SIMDE_NATURAL_VECTOR_SIZE > 0)
|
2875
|
+
return simde_mm_move_ss(a, simde_mm_max_ps(a, b));
|
2876
|
+
#else
|
2877
|
+
simde__m128_private
|
2878
|
+
r_,
|
2879
|
+
a_ = simde__m128_to_private(a),
|
2880
|
+
b_ = simde__m128_to_private(b);
|
2881
|
+
|
2882
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
2883
|
+
float32_t value = vgetq_lane_f32(maxq_f32(a_.neon_f32, b_.neon_f32), 0);
|
2884
|
+
r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0);
|
2885
|
+
#else
|
2886
|
+
r_.f32[0] = (a_.f32[0] > b_.f32[0]) ? a_.f32[0] : b_.f32[0];
|
2887
|
+
r_.f32[1] = a_.f32[1];
|
2888
|
+
r_.f32[2] = a_.f32[2];
|
2889
|
+
r_.f32[3] = a_.f32[3];
|
2890
|
+
#endif
|
2891
|
+
|
2892
|
+
return simde__m128_from_private(r_);
|
2893
|
+
#endif
|
2894
|
+
}
|
2895
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
2896
|
+
# define _mm_max_ss(a, b) simde_mm_max_ss((a), (b))
|
2897
|
+
#endif
|
2898
|
+
|
2899
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
2900
|
+
simde__m64
|
2901
|
+
simde_mm_min_pi16 (simde__m64 a, simde__m64 b) {
|
2902
|
+
#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
|
2903
|
+
return _mm_min_pi16(a, b);
|
2904
|
+
#else
|
2905
|
+
simde__m64_private
|
2906
|
+
r_,
|
2907
|
+
a_ = simde__m64_to_private(a),
|
2908
|
+
b_ = simde__m64_to_private(b);
|
2909
|
+
|
2910
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
2911
|
+
r_.neon_i16 = vmin_s16(a_.neon_i16, b_.neon_i16);
|
2912
|
+
#else
|
2913
|
+
SIMDE_VECTORIZE
|
2914
|
+
for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
|
2915
|
+
r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? a_.i16[i] : b_.i16[i];
|
2916
|
+
}
|
2917
|
+
#endif
|
2918
|
+
|
2919
|
+
return simde__m64_from_private(r_);
|
2920
|
+
#endif
|
2921
|
+
}
|
2922
|
+
#define simde_m_pminsw(a, b) simde_mm_min_pi16(a, b)
|
2923
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
2924
|
+
# define _mm_min_pi16(a, b) simde_mm_min_pi16(a, b)
|
2925
|
+
# define _m_pminsw(a, b) simde_mm_min_pi16(a, b)
|
2926
|
+
#endif
|
2927
|
+
|
2928
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
2929
|
+
simde__m128
|
2930
|
+
simde_mm_min_ps (simde__m128 a, simde__m128 b) {
|
2931
|
+
#if defined(SIMDE_X86_SSE_NATIVE)
|
2932
|
+
return _mm_min_ps(a, b);
|
2933
|
+
#elif defined(SIMDE_FAST_NANS) && defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
2934
|
+
return simde__m128_from_neon_f32(vminq_f32(simde__m128_to_neon_f32(a), simde__m128_to_neon_f32(b)));
|
2935
|
+
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
2936
|
+
simde__m128_private
|
2937
|
+
r_,
|
2938
|
+
a_ = simde__m128_to_private(a),
|
2939
|
+
b_ = simde__m128_to_private(b);
|
2940
|
+
#if defined(SIMDE_FAST_NANS)
|
2941
|
+
r_.wasm_v128 = wasm_f32x4_min(a_.wasm_v128, b_.wasm_v128);
|
2942
|
+
#else
|
2943
|
+
r_.wasm_v128 = wasm_v128_bitselect(a_.wasm_v128, b_.wasm_v128, wasm_f32x4_lt(a_.wasm_v128, b_.wasm_v128));
|
2944
|
+
#endif
|
2945
|
+
return simde__m128_from_private(r_);
|
2946
|
+
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
|
2947
|
+
simde__m128_private
|
2948
|
+
r_,
|
2949
|
+
a_ = simde__m128_to_private(a),
|
2950
|
+
b_ = simde__m128_to_private(b);
|
2951
|
+
|
2952
|
+
#if defined(SIMDE_FAST_NANS)
|
2953
|
+
r_.altivec_f32 = vec_min(a_.altivec_f32, b_.altivec_f32);
|
2954
|
+
#else
|
2955
|
+
r_.altivec_f32 = vec_sel(b_.altivec_f32, a_.altivec_f32, vec_cmpgt(b_.altivec_f32, a_.altivec_f32));
|
2956
|
+
#endif
|
2957
|
+
|
2958
|
+
return simde__m128_from_private(r_);
|
2959
|
+
#elif (SIMDE_NATURAL_VECTOR_SIZE > 0)
|
2960
|
+
simde__m128 mask = simde_mm_cmplt_ps(a, b);
|
2961
|
+
return simde_mm_or_ps(simde_mm_and_ps(mask, a), simde_mm_andnot_ps(mask, b));
|
2962
|
+
#else
|
2963
|
+
simde__m128_private
|
2964
|
+
r_,
|
2965
|
+
a_ = simde__m128_to_private(a),
|
2966
|
+
b_ = simde__m128_to_private(b);
|
2967
|
+
|
2968
|
+
SIMDE_VECTORIZE
|
2969
|
+
for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
|
2970
|
+
r_.f32[i] = (a_.f32[i] < b_.f32[i]) ? a_.f32[i] : b_.f32[i];
|
2971
|
+
}
|
2972
|
+
|
2973
|
+
return simde__m128_from_private(r_);
|
2974
|
+
#endif
|
2975
|
+
}
|
2976
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
2977
|
+
# define _mm_min_ps(a, b) simde_mm_min_ps((a), (b))
|
2978
|
+
#endif
|
2979
|
+
|
2980
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
2981
|
+
simde__m64
|
2982
|
+
simde_mm_min_pu8 (simde__m64 a, simde__m64 b) {
|
2983
|
+
#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
|
2984
|
+
return _mm_min_pu8(a, b);
|
2985
|
+
#else
|
2986
|
+
simde__m64_private
|
2987
|
+
r_,
|
2988
|
+
a_ = simde__m64_to_private(a),
|
2989
|
+
b_ = simde__m64_to_private(b);
|
2990
|
+
|
2991
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
2992
|
+
r_.neon_u8 = vmin_u8(a_.neon_u8, b_.neon_u8);
|
2993
|
+
#else
|
2994
|
+
SIMDE_VECTORIZE
|
2995
|
+
for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) {
|
2996
|
+
r_.u8[i] = (a_.u8[i] < b_.u8[i]) ? a_.u8[i] : b_.u8[i];
|
2997
|
+
}
|
2998
|
+
#endif
|
2999
|
+
|
3000
|
+
return simde__m64_from_private(r_);
|
3001
|
+
#endif
|
3002
|
+
}
|
3003
|
+
#define simde_m_pminub(a, b) simde_mm_min_pu8(a, b)
|
3004
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
3005
|
+
# define _mm_min_pu8(a, b) simde_mm_min_pu8(a, b)
|
3006
|
+
# define _m_pminub(a, b) simde_mm_min_pu8(a, b)
|
3007
|
+
#endif
|
3008
|
+
|
3009
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
3010
|
+
simde__m128
|
3011
|
+
simde_mm_min_ss (simde__m128 a, simde__m128 b) {
|
3012
|
+
#if defined(SIMDE_X86_SSE_NATIVE)
|
3013
|
+
return _mm_min_ss(a, b);
|
3014
|
+
#elif (SIMDE_NATURAL_VECTOR_SIZE > 0)
|
3015
|
+
return simde_mm_move_ss(a, simde_mm_min_ps(a, b));
|
3016
|
+
#else
|
3017
|
+
simde__m128_private
|
3018
|
+
r_,
|
3019
|
+
a_ = simde__m128_to_private(a),
|
3020
|
+
b_ = simde__m128_to_private(b);
|
3021
|
+
|
3022
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
3023
|
+
float32_t value = vgetq_lane_f32(vminq_f32(a_.neon_f32, b_.neon_f32), 0);
|
3024
|
+
r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0);
|
3025
|
+
#else
|
3026
|
+
r_.f32[0] = (a_.f32[0] < b_.f32[0]) ? a_.f32[0] : b_.f32[0];
|
3027
|
+
r_.f32[1] = a_.f32[1];
|
3028
|
+
r_.f32[2] = a_.f32[2];
|
3029
|
+
r_.f32[3] = a_.f32[3];
|
3030
|
+
#endif
|
3031
|
+
|
3032
|
+
return simde__m128_from_private(r_);
|
3033
|
+
#endif
|
3034
|
+
}
|
3035
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
3036
|
+
# define _mm_min_ss(a, b) simde_mm_min_ss((a), (b))
|
3037
|
+
#endif
|
3038
|
+
|
3039
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
3040
|
+
simde__m128
|
3041
|
+
simde_mm_movehl_ps (simde__m128 a, simde__m128 b) {
|
3042
|
+
#if defined(SIMDE_X86_SSE_NATIVE)
|
3043
|
+
return _mm_movehl_ps(a, b);
|
3044
|
+
#else
|
3045
|
+
simde__m128_private
|
3046
|
+
r_,
|
3047
|
+
a_ = simde__m128_to_private(a),
|
3048
|
+
b_ = simde__m128_to_private(b);
|
3049
|
+
|
3050
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
3051
|
+
float32x2_t a32 = vget_high_f32(a_.neon_f32);
|
3052
|
+
float32x2_t b32 = vget_high_f32(b_.neon_f32);
|
3053
|
+
r_.neon_f32 = vcombine_f32(b32, a32);
|
3054
|
+
#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
|
3055
|
+
r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float),
|
3056
|
+
vec_mergel(b_.altivec_i64, a_.altivec_i64));
|
3057
|
+
#elif defined(SIMDE_SHUFFLE_VECTOR_)
|
3058
|
+
r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 6, 7, 2, 3);
|
3059
|
+
#else
|
3060
|
+
r_.f32[0] = b_.f32[2];
|
3061
|
+
r_.f32[1] = b_.f32[3];
|
3062
|
+
r_.f32[2] = a_.f32[2];
|
3063
|
+
r_.f32[3] = a_.f32[3];
|
3064
|
+
#endif
|
3065
|
+
|
3066
|
+
return simde__m128_from_private(r_);
|
3067
|
+
#endif
|
3068
|
+
}
|
3069
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
3070
|
+
# define _mm_movehl_ps(a, b) simde_mm_movehl_ps((a), (b))
|
3071
|
+
#endif
|
3072
|
+
|
3073
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
3074
|
+
simde__m128
|
3075
|
+
simde_mm_movelh_ps (simde__m128 a, simde__m128 b) {
|
3076
|
+
#if defined(SIMDE_X86_SSE_NATIVE)
|
3077
|
+
return _mm_movelh_ps(a, b);
|
3078
|
+
#else
|
3079
|
+
simde__m128_private
|
3080
|
+
r_,
|
3081
|
+
a_ = simde__m128_to_private(a),
|
3082
|
+
b_ = simde__m128_to_private(b);
|
3083
|
+
|
3084
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
3085
|
+
float32x2_t a10 = vget_low_f32(a_.neon_f32);
|
3086
|
+
float32x2_t b10 = vget_low_f32(b_.neon_f32);
|
3087
|
+
r_.neon_f32 = vcombine_f32(a10, b10);
|
3088
|
+
#elif defined(SIMDE_SHUFFLE_VECTOR_)
|
3089
|
+
r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 0, 1, 4, 5);
|
3090
|
+
#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
|
3091
|
+
r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float),
|
3092
|
+
vec_mergeh(a_.altivec_i64, b_.altivec_i64));
|
3093
|
+
#else
|
3094
|
+
r_.f32[0] = a_.f32[0];
|
3095
|
+
r_.f32[1] = a_.f32[1];
|
3096
|
+
r_.f32[2] = b_.f32[0];
|
3097
|
+
r_.f32[3] = b_.f32[1];
|
3098
|
+
#endif
|
3099
|
+
|
3100
|
+
return simde__m128_from_private(r_);
|
3101
|
+
#endif
|
3102
|
+
}
|
3103
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
3104
|
+
# define _mm_movelh_ps(a, b) simde_mm_movelh_ps((a), (b))
|
3105
|
+
#endif
|
3106
|
+
|
3107
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
3108
|
+
int
|
3109
|
+
simde_mm_movemask_pi8 (simde__m64 a) {
|
3110
|
+
#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
|
3111
|
+
return _mm_movemask_pi8(a);
|
3112
|
+
#else
|
3113
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
3114
|
+
int r = 0;
|
3115
|
+
|
3116
|
+
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
3117
|
+
uint8x8_t input = a_.neon_u8;
|
3118
|
+
const int8_t xr[8] = {-7, -6, -5, -4, -3, -2, -1, 0};
|
3119
|
+
const uint8x8_t mask_and = vdup_n_u8(0x80);
|
3120
|
+
const int8x8_t mask_shift = vld1_s8(xr);
|
3121
|
+
const uint8x8_t mask_result = vshl_u8(vand_u8(input, mask_and), mask_shift);
|
3122
|
+
uint8x8_t lo = mask_result;
|
3123
|
+
r = vaddv_u8(lo);
|
3124
|
+
#else
|
3125
|
+
const size_t nmemb = sizeof(a_.i8) / sizeof(a_.i8[0]);
|
3126
|
+
SIMDE_VECTORIZE_REDUCTION(|:r)
|
3127
|
+
for (size_t i = 0 ; i < nmemb ; i++) {
|
3128
|
+
r |= (a_.u8[nmemb - 1 - i] >> 7) << (nmemb - 1 - i);
|
3129
|
+
}
|
3130
|
+
#endif
|
3131
|
+
|
3132
|
+
return r;
|
3133
|
+
#endif
|
3134
|
+
}
|
3135
|
+
#define simde_m_pmovmskb(a) simde_mm_movemask_pi8(a)
|
3136
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
3137
|
+
# define _mm_movemask_pi8(a) simde_mm_movemask_pi8(a)
|
3138
|
+
# define _m_pmovmskb(a) simde_mm_movemask_pi8(a)
|
3139
|
+
#endif
|
3140
|
+
|
3141
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
3142
|
+
int
|
3143
|
+
simde_mm_movemask_ps (simde__m128 a) {
|
3144
|
+
#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
|
3145
|
+
return _mm_movemask_ps(a);
|
3146
|
+
#else
|
3147
|
+
int r = 0;
|
3148
|
+
simde__m128_private a_ = simde__m128_to_private(a);
|
3149
|
+
|
3150
|
+
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
3151
|
+
static const int32_t shift_amount[] = { 0, 1, 2, 3 };
|
3152
|
+
const int32x4_t shift = vld1q_s32(shift_amount);
|
3153
|
+
uint32x4_t tmp = vshrq_n_u32(a_.neon_u32, 31);
|
3154
|
+
return HEDLEY_STATIC_CAST(int, vaddvq_u32(vshlq_u32(tmp, shift)));
|
3155
|
+
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
3156
|
+
// Shift out everything but the sign bits with a 32-bit unsigned shift right.
|
3157
|
+
uint64x2_t high_bits = vreinterpretq_u64_u32(vshrq_n_u32(a_.neon_u32, 31));
|
3158
|
+
// Merge the two pairs together with a 64-bit unsigned shift right + add.
|
3159
|
+
uint8x16_t paired = vreinterpretq_u8_u64(vsraq_n_u64(high_bits, high_bits, 31));
|
3160
|
+
// Extract the result.
|
3161
|
+
return vgetq_lane_u8(paired, 0) | (vgetq_lane_u8(paired, 8) << 2);
|
3162
|
+
#else
|
3163
|
+
SIMDE_VECTORIZE_REDUCTION(|:r)
|
3164
|
+
for (size_t i = 0 ; i < sizeof(a_.u32) / sizeof(a_.u32[0]) ; i++) {
|
3165
|
+
r |= (a_.u32[i] >> ((sizeof(a_.u32[i]) * CHAR_BIT) - 1)) << i;
|
3166
|
+
}
|
3167
|
+
#endif
|
3168
|
+
|
3169
|
+
return r;
|
3170
|
+
#endif
|
3171
|
+
}
|
3172
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
3173
|
+
# define _mm_movemask_ps(a) simde_mm_movemask_ps((a))
|
3174
|
+
#endif
|
3175
|
+
|
3176
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
3177
|
+
simde__m128
|
3178
|
+
simde_mm_mul_ps (simde__m128 a, simde__m128 b) {
|
3179
|
+
#if defined(SIMDE_X86_SSE_NATIVE)
|
3180
|
+
return _mm_mul_ps(a, b);
|
3181
|
+
#else
|
3182
|
+
simde__m128_private
|
3183
|
+
r_,
|
3184
|
+
a_ = simde__m128_to_private(a),
|
3185
|
+
b_ = simde__m128_to_private(b);
|
3186
|
+
|
3187
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
3188
|
+
r_.neon_f32 = vmulq_f32(a_.neon_f32, b_.neon_f32);
|
3189
|
+
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
3190
|
+
r_.wasm_v128 = wasm_f32x4_mul(a_.wasm_v128, b_.wasm_v128);
|
3191
|
+
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
3192
|
+
r_.f32 = a_.f32 * b_.f32;
|
3193
|
+
#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
|
3194
|
+
r_.altivec_f32 = vec_mul(a_.altivec_f32, b_.altivec_f32);
|
3195
|
+
#else
|
3196
|
+
SIMDE_VECTORIZE
|
3197
|
+
for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
|
3198
|
+
r_.f32[i] = a_.f32[i] * b_.f32[i];
|
3199
|
+
}
|
3200
|
+
#endif
|
3201
|
+
|
3202
|
+
return simde__m128_from_private(r_);
|
3203
|
+
#endif
|
3204
|
+
}
|
3205
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
3206
|
+
# define _mm_mul_ps(a, b) simde_mm_mul_ps((a), (b))
|
3207
|
+
#endif
|
3208
|
+
|
3209
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
3210
|
+
simde__m128
|
3211
|
+
simde_mm_mul_ss (simde__m128 a, simde__m128 b) {
|
3212
|
+
#if defined(SIMDE_X86_SSE_NATIVE)
|
3213
|
+
return _mm_mul_ss(a, b);
|
3214
|
+
#elif (SIMDE_NATURAL_VECTOR_SIZE > 0)
|
3215
|
+
return simde_mm_move_ss(a, simde_mm_mul_ps(a, b));
|
3216
|
+
#else
|
3217
|
+
simde__m128_private
|
3218
|
+
r_,
|
3219
|
+
a_ = simde__m128_to_private(a),
|
3220
|
+
b_ = simde__m128_to_private(b);
|
3221
|
+
|
3222
|
+
r_.f32[0] = a_.f32[0] * b_.f32[0];
|
3223
|
+
r_.f32[1] = a_.f32[1];
|
3224
|
+
r_.f32[2] = a_.f32[2];
|
3225
|
+
r_.f32[3] = a_.f32[3];
|
3226
|
+
|
3227
|
+
return simde__m128_from_private(r_);
|
3228
|
+
#endif
|
3229
|
+
}
|
3230
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
3231
|
+
# define _mm_mul_ss(a, b) simde_mm_mul_ss((a), (b))
|
3232
|
+
#endif
|
3233
|
+
|
3234
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
3235
|
+
simde__m64
|
3236
|
+
simde_mm_mulhi_pu16 (simde__m64 a, simde__m64 b) {
|
3237
|
+
#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
|
3238
|
+
return _mm_mulhi_pu16(a, b);
|
3239
|
+
#else
|
3240
|
+
simde__m64_private
|
3241
|
+
r_,
|
3242
|
+
a_ = simde__m64_to_private(a),
|
3243
|
+
b_ = simde__m64_to_private(b);
|
3244
|
+
|
3245
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
3246
|
+
const uint32x4_t t1 = vmull_u16(a_.neon_u16, b_.neon_u16);
|
3247
|
+
const uint32x4_t t2 = vshrq_n_u32(t1, 16);
|
3248
|
+
const uint16x4_t t3 = vmovn_u32(t2);
|
3249
|
+
r_.neon_u16 = t3;
|
3250
|
+
#else
|
3251
|
+
SIMDE_VECTORIZE
|
3252
|
+
for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {
|
3253
|
+
r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, ((HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i])) >> UINT32_C(16)));
|
3254
|
+
}
|
3255
|
+
#endif
|
3256
|
+
|
3257
|
+
return simde__m64_from_private(r_);
|
3258
|
+
#endif
|
3259
|
+
}
|
3260
|
+
#define simde_m_pmulhuw(a, b) simde_mm_mulhi_pu16(a, b)
|
3261
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
3262
|
+
# define _mm_mulhi_pu16(a, b) simde_mm_mulhi_pu16(a, b)
|
3263
|
+
# define _m_pmulhuw(a, b) simde_mm_mulhi_pu16(a, b)
|
3264
|
+
#endif
|
3265
|
+
|
3266
|
+
#if defined(SIMDE_X86_SSE_NATIVE) && defined(HEDLEY_GCC_VERSION)
|
3267
|
+
#define SIMDE_MM_HINT_NTA HEDLEY_STATIC_CAST(enum _mm_hint, 0)
|
3268
|
+
#define SIMDE_MM_HINT_T0 HEDLEY_STATIC_CAST(enum _mm_hint, 1)
|
3269
|
+
#define SIMDE_MM_HINT_T1 HEDLEY_STATIC_CAST(enum _mm_hint, 2)
|
3270
|
+
#define SIMDE_MM_HINT_T2 HEDLEY_STATIC_CAST(enum _mm_hint, 3)
|
3271
|
+
#define SIMDE_MM_HINT_ENTA HEDLEY_STATIC_CAST(enum _mm_hint, 4)
|
3272
|
+
#define SIMDE_MM_HINT_ET0 HEDLEY_STATIC_CAST(enum _mm_hint, 5)
|
3273
|
+
#define SIMDE_MM_HINT_ET1 HEDLEY_STATIC_CAST(enum _mm_hint, 6)
|
3274
|
+
#define SIMDE_MM_HINT_ET2 HEDLEY_STATIC_CAST(enum _mm_hint, 7)
|
3275
|
+
#else
|
3276
|
+
#define SIMDE_MM_HINT_NTA 0
|
3277
|
+
#define SIMDE_MM_HINT_T0 1
|
3278
|
+
#define SIMDE_MM_HINT_T1 2
|
3279
|
+
#define SIMDE_MM_HINT_T2 3
|
3280
|
+
#define SIMDE_MM_HINT_ENTA 4
|
3281
|
+
#define SIMDE_MM_HINT_ET0 5
|
3282
|
+
#define SIMDE_MM_HINT_ET1 6
|
3283
|
+
#define SIMDE_MM_HINT_ET2 7
|
3284
|
+
#endif
|
3285
|
+
|
3286
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
3287
|
+
HEDLEY_DIAGNOSTIC_PUSH
|
3288
|
+
#if HEDLEY_HAS_WARNING("-Wreserved-id-macro")
|
3289
|
+
_Pragma("clang diagnostic ignored \"-Wreserved-id-macro\"")
|
3290
|
+
#endif
|
3291
|
+
#undef _MM_HINT_NTA
|
3292
|
+
#define _MM_HINT_NTA SIMDE_MM_HINT_NTA
|
3293
|
+
#undef _MM_HINT_T0
|
3294
|
+
#define _MM_HINT_T0 SIMDE_MM_HINT_T0
|
3295
|
+
#undef _MM_HINT_T1
|
3296
|
+
#define _MM_HINT_T1 SIMDE_MM_HINT_T1
|
3297
|
+
#undef _MM_HINT_T2
|
3298
|
+
#define _MM_HINT_T2 SIMDE_MM_HINT_T2
|
3299
|
+
#undef _MM_HINT_ETNA
|
3300
|
+
#define _MM_HINT_ETNA SIMDE_MM_HINT_ETNA
|
3301
|
+
#undef _MM_HINT_ET0
|
3302
|
+
#define _MM_HINT_ET0 SIMDE_MM_HINT_ET0
|
3303
|
+
#undef _MM_HINT_ET1
|
3304
|
+
#define _MM_HINT_ET1 SIMDE_MM_HINT_ET1
|
3305
|
+
#undef _MM_HINT_ET1
|
3306
|
+
#define _MM_HINT_ET2 SIMDE_MM_HINT_ET2
|
3307
|
+
HEDLEY_DIAGNOSTIC_POP
|
3308
|
+
#endif
|
3309
|
+
|
3310
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
3311
|
+
void
|
3312
|
+
simde_mm_prefetch (char const* p, int i) {
|
3313
|
+
#if defined(HEDLEY_GCC_VERSION)
|
3314
|
+
__builtin_prefetch(p);
|
3315
|
+
#else
|
3316
|
+
(void) p;
|
3317
|
+
#endif
|
3318
|
+
|
3319
|
+
(void) i;
|
3320
|
+
}
|
3321
|
+
#if defined(SIMDE_X86_SSE_NATIVE)
|
3322
|
+
#if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) /* https://reviews.llvm.org/D71718 */
|
3323
|
+
#define simde_mm_prefetch(p, i) \
|
3324
|
+
(__extension__({ \
|
3325
|
+
HEDLEY_DIAGNOSTIC_PUSH \
|
3326
|
+
HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL \
|
3327
|
+
_mm_prefetch((p), (i)); \
|
3328
|
+
HEDLEY_DIAGNOSTIC_POP \
|
3329
|
+
}))
|
3330
|
+
#else
|
3331
|
+
#define simde_mm_prefetch(p, i) _mm_prefetch(p, i)
|
3332
|
+
#endif
|
3333
|
+
#endif
|
3334
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
3335
|
+
#define _mm_prefetch(p, i) simde_mm_prefetch(p, i)
|
3336
|
+
#endif
|
3337
|
+
|
3338
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
3339
|
+
simde__m128
|
3340
|
+
simde_x_mm_negate_ps(simde__m128 a) {
|
3341
|
+
#if defined(SIMDE_X86_SSE_NATIVE)
|
3342
|
+
return simde_mm_xor_ps(a, _mm_set1_ps(SIMDE_FLOAT32_C(-0.0)));
|
3343
|
+
#else
|
3344
|
+
simde__m128_private
|
3345
|
+
r_,
|
3346
|
+
a_ = simde__m128_to_private(a);
|
3347
|
+
|
3348
|
+
#if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && \
|
3349
|
+
(!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,1,0))
|
3350
|
+
r_.altivec_f32 = vec_neg(a_.altivec_f32);
|
3351
|
+
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
3352
|
+
r_.neon_f32 = vnegq_f32(a_.neon_f32);
|
3353
|
+
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
3354
|
+
r_.wasm_v128 = wasm_f32x4_neg(a_.wasm_v128);
|
3355
|
+
#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
|
3356
|
+
r_.altivec_f32 = vec_neg(a_.altivec_f32);
|
3357
|
+
#elif defined(SIMDE_VECTOR_NEGATE)
|
3358
|
+
r_.f32 = -a_.f32;
|
3359
|
+
#else
|
3360
|
+
SIMDE_VECTORIZE
|
3361
|
+
for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
|
3362
|
+
r_.f32[i] = -a_.f32[i];
|
3363
|
+
}
|
3364
|
+
#endif
|
3365
|
+
|
3366
|
+
return simde__m128_from_private(r_);
|
3367
|
+
#endif
|
3368
|
+
}
|
3369
|
+
|
3370
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
3371
|
+
simde__m128
|
3372
|
+
simde_mm_rcp_ps (simde__m128 a) {
|
3373
|
+
#if defined(SIMDE_X86_SSE_NATIVE)
|
3374
|
+
return _mm_rcp_ps(a);
|
3375
|
+
#else
|
3376
|
+
simde__m128_private
|
3377
|
+
r_,
|
3378
|
+
a_ = simde__m128_to_private(a);
|
3379
|
+
|
3380
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
3381
|
+
float32x4_t recip = vrecpeq_f32(a_.neon_f32);
|
3382
|
+
|
3383
|
+
#if SIMDE_ACCURACY_PREFERENCE > 0
|
3384
|
+
for (int i = 0; i < SIMDE_ACCURACY_PREFERENCE ; ++i) {
|
3385
|
+
recip = vmulq_f32(recip, vrecpsq_f32(recip, a_.neon_f32));
|
3386
|
+
}
|
3387
|
+
#endif
|
3388
|
+
|
3389
|
+
r_.neon_f32 = recip;
|
3390
|
+
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
3391
|
+
r_.wasm_v128 = wasm_f32x4_div(simde_mm_set1_ps(1.0f), a_.wasm_v128);
|
3392
|
+
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
|
3393
|
+
r_.altivec_f32 = vec_re(a_.altivec_f32);
|
3394
|
+
#elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
|
3395
|
+
r_.f32 = 1.0f / a_.f32;
|
3396
|
+
#elif defined(SIMDE_IEEE754_STORAGE)
|
3397
|
+
/* https://stackoverflow.com/questions/12227126/division-as-multiply-and-lut-fast-float-division-reciprocal/12228234#12228234 */
|
3398
|
+
SIMDE_VECTORIZE
|
3399
|
+
for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
|
3400
|
+
int32_t ix;
|
3401
|
+
simde_float32 fx = a_.f32[i];
|
3402
|
+
simde_memcpy(&ix, &fx, sizeof(ix));
|
3403
|
+
int32_t x = INT32_C(0x7EF311C3) - ix;
|
3404
|
+
simde_float32 temp;
|
3405
|
+
simde_memcpy(&temp, &x, sizeof(temp));
|
3406
|
+
r_.f32[i] = temp * (SIMDE_FLOAT32_C(2.0) - temp * fx);
|
3407
|
+
}
|
3408
|
+
#else
|
3409
|
+
SIMDE_VECTORIZE
|
3410
|
+
for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
|
3411
|
+
r_.f32[i] = 1.0f / a_.f32[i];
|
3412
|
+
}
|
3413
|
+
#endif
|
3414
|
+
|
3415
|
+
return simde__m128_from_private(r_);
|
3416
|
+
#endif
|
3417
|
+
}
|
3418
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
3419
|
+
# define _mm_rcp_ps(a) simde_mm_rcp_ps((a))
|
3420
|
+
#endif
|
3421
|
+
|
3422
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
3423
|
+
simde__m128
|
3424
|
+
simde_mm_rcp_ss (simde__m128 a) {
|
3425
|
+
#if defined(SIMDE_X86_SSE_NATIVE)
|
3426
|
+
return _mm_rcp_ss(a);
|
3427
|
+
#elif (SIMDE_NATURAL_VECTOR_SIZE > 0)
|
3428
|
+
return simde_mm_move_ss(a, simde_mm_rcp_ps(a));
|
3429
|
+
#else
|
3430
|
+
simde__m128_private
|
3431
|
+
r_,
|
3432
|
+
a_ = simde__m128_to_private(a);
|
3433
|
+
|
3434
|
+
r_.f32[0] = 1.0f / a_.f32[0];
|
3435
|
+
r_.f32[1] = a_.f32[1];
|
3436
|
+
r_.f32[2] = a_.f32[2];
|
3437
|
+
r_.f32[3] = a_.f32[3];
|
3438
|
+
|
3439
|
+
return simde__m128_from_private(r_);
|
3440
|
+
#endif
|
3441
|
+
}
|
3442
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
3443
|
+
# define _mm_rcp_ss(a) simde_mm_rcp_ss((a))
|
3444
|
+
#endif
|
3445
|
+
|
3446
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
3447
|
+
simde__m128
|
3448
|
+
simde_mm_rsqrt_ps (simde__m128 a) {
|
3449
|
+
#if defined(SIMDE_X86_SSE_NATIVE)
|
3450
|
+
return _mm_rsqrt_ps(a);
|
3451
|
+
#else
|
3452
|
+
simde__m128_private
|
3453
|
+
r_,
|
3454
|
+
a_ = simde__m128_to_private(a);
|
3455
|
+
|
3456
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
3457
|
+
r_.neon_f32 = vrsqrteq_f32(a_.neon_f32);
|
3458
|
+
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
|
3459
|
+
r_.altivec_f32 = vec_rsqrte(a_.altivec_f32);
|
3460
|
+
#elif defined(SIMDE_IEEE754_STORAGE)
|
3461
|
+
/* https://basesandframes.files.wordpress.com/2020/04/even_faster_math_functions_green_2020.pdf
|
3462
|
+
Pages 100 - 103 */
|
3463
|
+
SIMDE_VECTORIZE
|
3464
|
+
for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
|
3465
|
+
#if SIMDE_ACCURACY_PREFERENCE <= 0
|
3466
|
+
r_.i32[i] = INT32_C(0x5F37624F) - (a_.i32[i] >> 1);
|
3467
|
+
#else
|
3468
|
+
simde_float32 x = a_.f32[i];
|
3469
|
+
simde_float32 xhalf = SIMDE_FLOAT32_C(0.5) * x;
|
3470
|
+
int32_t ix;
|
3471
|
+
|
3472
|
+
simde_memcpy(&ix, &x, sizeof(ix));
|
3473
|
+
|
3474
|
+
#if SIMDE_ACCURACY_PREFERENCE == 1
|
3475
|
+
ix = INT32_C(0x5F375A82) - (ix >> 1);
|
3476
|
+
#else
|
3477
|
+
ix = INT32_C(0x5F37599E) - (ix >> 1);
|
3478
|
+
#endif
|
3479
|
+
|
3480
|
+
simde_memcpy(&x, &ix, sizeof(x));
|
3481
|
+
|
3482
|
+
#if SIMDE_ACCURACY_PREFERENCE >= 2
|
3483
|
+
x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x);
|
3484
|
+
#endif
|
3485
|
+
x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x);
|
3486
|
+
|
3487
|
+
r_.f32[i] = x;
|
3488
|
+
#endif
|
3489
|
+
}
|
3490
|
+
#elif defined(simde_math_sqrtf)
|
3491
|
+
SIMDE_VECTORIZE
|
3492
|
+
for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
|
3493
|
+
r_.f32[i] = 1.0f / simde_math_sqrtf(a_.f32[i]);
|
3494
|
+
}
|
3495
|
+
#else
|
3496
|
+
HEDLEY_UNREACHABLE();
|
3497
|
+
#endif
|
3498
|
+
|
3499
|
+
return simde__m128_from_private(r_);
|
3500
|
+
#endif
|
3501
|
+
}
|
3502
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
3503
|
+
# define _mm_rsqrt_ps(a) simde_mm_rsqrt_ps((a))
|
3504
|
+
#endif
|
3505
|
+
|
3506
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
3507
|
+
simde__m128
|
3508
|
+
simde_mm_rsqrt_ss (simde__m128 a) {
|
3509
|
+
#if defined(SIMDE_X86_SSE_NATIVE)
|
3510
|
+
return _mm_rsqrt_ss(a);
|
3511
|
+
#elif (SIMDE_NATURAL_VECTOR_SIZE > 0)
|
3512
|
+
return simde_mm_move_ss(a, simde_mm_rsqrt_ps(a));
|
3513
|
+
#else
|
3514
|
+
simde__m128_private
|
3515
|
+
r_,
|
3516
|
+
a_ = simde__m128_to_private(a);
|
3517
|
+
|
3518
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
3519
|
+
r_.neon_f32 = vsetq_lane_f32(vgetq_lane_f32(simde_mm_rsqrt_ps(a).neon_f32, 0), a_.neon_f32, 0);
|
3520
|
+
#elif defined(SIMDE_IEEE754_STORAGE)
|
3521
|
+
{
|
3522
|
+
#if SIMDE_ACCURACY_PREFERENCE <= 0
|
3523
|
+
r_.i32[0] = INT32_C(0x5F37624F) - (a_.i32[0] >> 1);
|
3524
|
+
#else
|
3525
|
+
simde_float32 x = a_.f32[0];
|
3526
|
+
simde_float32 xhalf = SIMDE_FLOAT32_C(0.5) * x;
|
3527
|
+
int32_t ix;
|
3528
|
+
|
3529
|
+
simde_memcpy(&ix, &x, sizeof(ix));
|
3530
|
+
|
3531
|
+
#if SIMDE_ACCURACY_PREFERENCE == 1
|
3532
|
+
ix = INT32_C(0x5F375A82) - (ix >> 1);
|
3533
|
+
#else
|
3534
|
+
ix = INT32_C(0x5F37599E) - (ix >> 1);
|
3535
|
+
#endif
|
3536
|
+
|
3537
|
+
simde_memcpy(&x, &ix, sizeof(x));
|
3538
|
+
|
3539
|
+
#if SIMDE_ACCURACY_PREFERENCE >= 2
|
3540
|
+
x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x);
|
3541
|
+
#endif
|
3542
|
+
x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x);
|
3543
|
+
|
3544
|
+
r_.f32[0] = x;
|
3545
|
+
#endif
|
3546
|
+
}
|
3547
|
+
r_.f32[1] = a_.f32[1];
|
3548
|
+
r_.f32[2] = a_.f32[2];
|
3549
|
+
r_.f32[3] = a_.f32[3];
|
3550
|
+
#elif defined(simde_math_sqrtf)
|
3551
|
+
r_.f32[0] = 1.0f / simde_math_sqrtf(a_.f32[0]);
|
3552
|
+
r_.f32[1] = a_.f32[1];
|
3553
|
+
r_.f32[2] = a_.f32[2];
|
3554
|
+
r_.f32[3] = a_.f32[3];
|
3555
|
+
#else
|
3556
|
+
HEDLEY_UNREACHABLE();
|
3557
|
+
#endif
|
3558
|
+
|
3559
|
+
return simde__m128_from_private(r_);
|
3560
|
+
#endif
|
3561
|
+
}
|
3562
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
3563
|
+
# define _mm_rsqrt_ss(a) simde_mm_rsqrt_ss((a))
|
3564
|
+
#endif
|
3565
|
+
|
3566
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
3567
|
+
simde__m64
|
3568
|
+
simde_mm_sad_pu8 (simde__m64 a, simde__m64 b) {
|
3569
|
+
#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
|
3570
|
+
return _mm_sad_pu8(a, b);
|
3571
|
+
#else
|
3572
|
+
simde__m64_private
|
3573
|
+
r_,
|
3574
|
+
a_ = simde__m64_to_private(a),
|
3575
|
+
b_ = simde__m64_to_private(b);
|
3576
|
+
|
3577
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
3578
|
+
uint16x4_t t = vpaddl_u8(vabd_u8(a_.neon_u8, b_.neon_u8));
|
3579
|
+
uint16_t r0 = t[0] + t[1] + t[2] + t[3];
|
3580
|
+
r_.neon_u16 = vset_lane_u16(r0, vdup_n_u16(0), 0);
|
3581
|
+
#else
|
3582
|
+
uint16_t sum = 0;
|
3583
|
+
|
3584
|
+
#if defined(SIMDE_HAVE_STDLIB_H)
|
3585
|
+
SIMDE_VECTORIZE_REDUCTION(+:sum)
|
3586
|
+
for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) {
|
3587
|
+
sum += HEDLEY_STATIC_CAST(uint8_t, abs(a_.u8[i] - b_.u8[i]));
|
3588
|
+
}
|
3589
|
+
|
3590
|
+
r_.i16[0] = HEDLEY_STATIC_CAST(int16_t, sum);
|
3591
|
+
r_.i16[1] = 0;
|
3592
|
+
r_.i16[2] = 0;
|
3593
|
+
r_.i16[3] = 0;
|
3594
|
+
#else
|
3595
|
+
HEDLEY_UNREACHABLE();
|
3596
|
+
#endif
|
3597
|
+
#endif
|
3598
|
+
|
3599
|
+
return simde__m64_from_private(r_);
|
3600
|
+
#endif
|
3601
|
+
}
|
3602
|
+
#define simde_m_psadbw(a, b) simde_mm_sad_pu8(a, b)
|
3603
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
3604
|
+
# define _mm_sad_pu8(a, b) simde_mm_sad_pu8(a, b)
|
3605
|
+
# define _m_psadbw(a, b) simde_mm_sad_pu8(a, b)
|
3606
|
+
#endif
|
3607
|
+
|
3608
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
3609
|
+
simde__m128
|
3610
|
+
simde_mm_set_ss (simde_float32 a) {
|
3611
|
+
#if defined(SIMDE_X86_SSE_NATIVE)
|
3612
|
+
return _mm_set_ss(a);
|
3613
|
+
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
3614
|
+
return vsetq_lane_f32(a, vdupq_n_f32(SIMDE_FLOAT32_C(0.0)), 0);
|
3615
|
+
#else
|
3616
|
+
return simde_mm_set_ps(SIMDE_FLOAT32_C(0.0), SIMDE_FLOAT32_C(0.0), SIMDE_FLOAT32_C(0.0), a);
|
3617
|
+
#endif
|
3618
|
+
}
|
3619
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
3620
|
+
# define _mm_set_ss(a) simde_mm_set_ss(a)
|
3621
|
+
#endif
|
3622
|
+
|
3623
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
3624
|
+
simde__m128
|
3625
|
+
simde_mm_setr_ps (simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) {
|
3626
|
+
#if defined(SIMDE_X86_SSE_NATIVE)
|
3627
|
+
return _mm_setr_ps(e3, e2, e1, e0);
|
3628
|
+
#else
|
3629
|
+
return simde_mm_set_ps(e0, e1, e2, e3);
|
3630
|
+
#endif
|
3631
|
+
}
|
3632
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
3633
|
+
# define _mm_setr_ps(e3, e2, e1, e0) simde_mm_setr_ps(e3, e2, e1, e0)
|
3634
|
+
#endif
|
3635
|
+
|
3636
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
3637
|
+
simde__m128
|
3638
|
+
simde_mm_setzero_ps (void) {
|
3639
|
+
#if defined(SIMDE_X86_SSE_NATIVE)
|
3640
|
+
return _mm_setzero_ps();
|
3641
|
+
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
3642
|
+
return vdupq_n_f32(SIMDE_FLOAT32_C(0.0));
|
3643
|
+
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
|
3644
|
+
return vec_splats(SIMDE_FLOAT32_C(0.0));
|
3645
|
+
#else
|
3646
|
+
simde__m128 r;
|
3647
|
+
simde_memset(&r, 0, sizeof(r));
|
3648
|
+
return r;
|
3649
|
+
#endif
|
3650
|
+
}
|
3651
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
3652
|
+
# define _mm_setzero_ps() simde_mm_setzero_ps()
|
3653
|
+
#endif
|
3654
|
+
|
3655
|
+
#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_)
|
3656
|
+
HEDLEY_DIAGNOSTIC_PUSH
|
3657
|
+
SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_
|
3658
|
+
#endif
|
3659
|
+
|
3660
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
3661
|
+
simde__m128
|
3662
|
+
simde_mm_undefined_ps (void) {
|
3663
|
+
simde__m128_private r_;
|
3664
|
+
|
3665
|
+
#if defined(SIMDE_HAVE_UNDEFINED128)
|
3666
|
+
r_.n = _mm_undefined_ps();
|
3667
|
+
#elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_)
|
3668
|
+
r_ = simde__m128_to_private(simde_mm_setzero_ps());
|
3669
|
+
#endif
|
3670
|
+
|
3671
|
+
return simde__m128_from_private(r_);
|
3672
|
+
}
|
3673
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
3674
|
+
# define _mm_undefined_ps() simde_mm_undefined_ps()
|
3675
|
+
#endif
|
3676
|
+
|
3677
|
+
#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_)
|
3678
|
+
HEDLEY_DIAGNOSTIC_POP
|
3679
|
+
#endif
|
3680
|
+
|
3681
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
3682
|
+
simde__m128
|
3683
|
+
simde_x_mm_setone_ps (void) {
|
3684
|
+
simde__m128 t = simde_mm_setzero_ps();
|
3685
|
+
return simde_mm_cmpeq_ps(t, t);
|
3686
|
+
}
|
3687
|
+
|
3688
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
3689
|
+
void
|
3690
|
+
simde_mm_sfence (void) {
|
3691
|
+
/* TODO: Use Hedley. */
|
3692
|
+
#if defined(SIMDE_X86_SSE_NATIVE)
|
3693
|
+
_mm_sfence();
|
3694
|
+
#elif defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 7))
|
3695
|
+
__atomic_thread_fence(__ATOMIC_SEQ_CST);
|
3696
|
+
#elif !defined(__INTEL_COMPILER) && defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && !defined(__STDC_NO_ATOMICS__)
|
3697
|
+
#if defined(__GNUC__) && (__GNUC__ == 4) && (__GNUC_MINOR__ < 9)
|
3698
|
+
__atomic_thread_fence(__ATOMIC_SEQ_CST);
|
3699
|
+
#else
|
3700
|
+
atomic_thread_fence(memory_order_seq_cst);
|
3701
|
+
#endif
|
3702
|
+
#elif defined(_MSC_VER)
|
3703
|
+
MemoryBarrier();
|
3704
|
+
#elif HEDLEY_HAS_EXTENSION(c_atomic)
|
3705
|
+
__c11_atomic_thread_fence(__ATOMIC_SEQ_CST);
|
3706
|
+
#elif defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1))
|
3707
|
+
__sync_synchronize();
|
3708
|
+
#elif defined(_OPENMP)
|
3709
|
+
#pragma omp critical(simde_mm_sfence_)
|
3710
|
+
{ }
|
3711
|
+
#endif
|
3712
|
+
}
|
3713
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
3714
|
+
# define _mm_sfence() simde_mm_sfence()
|
3715
|
+
#endif
|
3716
|
+
|
3717
|
+
#define SIMDE_MM_SHUFFLE(z, y, x, w) (((z) << 6) | ((y) << 4) | ((x) << 2) | (w))
|
3718
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
3719
|
+
# define _MM_SHUFFLE(z, y, x, w) SIMDE_MM_SHUFFLE(z, y, x, w)
|
3720
|
+
#endif
|
3721
|
+
|
3722
|
+
#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI)
|
3723
|
+
# define simde_mm_shuffle_pi16(a, imm8) _mm_shuffle_pi16(a, imm8)
|
3724
|
+
#elif defined(SIMDE_SHUFFLE_VECTOR_)
|
3725
|
+
# define simde_mm_shuffle_pi16(a, imm8) (__extension__ ({ \
|
3726
|
+
const simde__m64_private simde__tmp_a_ = simde__m64_to_private(a); \
|
3727
|
+
simde__m64_from_private((simde__m64_private) { .i16 = \
|
3728
|
+
SIMDE_SHUFFLE_VECTOR_(16, 8, \
|
3729
|
+
(simde__tmp_a_).i16, \
|
3730
|
+
(simde__tmp_a_).i16, \
|
3731
|
+
(((imm8) ) & 3), \
|
3732
|
+
(((imm8) >> 2) & 3), \
|
3733
|
+
(((imm8) >> 4) & 3), \
|
3734
|
+
(((imm8) >> 6) & 3)) }); }))
|
3735
|
+
#else
|
3736
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
3737
|
+
simde__m64
|
3738
|
+
simde_mm_shuffle_pi16 (simde__m64 a, const int imm8)
|
3739
|
+
SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) {
|
3740
|
+
simde__m64_private r_;
|
3741
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
3742
|
+
|
3743
|
+
for (size_t i = 0 ; i < sizeof(r_.i16) / sizeof(r_.i16[0]) ; i++) {
|
3744
|
+
r_.i16[i] = a_.i16[(imm8 >> (i * 2)) & 3];
|
3745
|
+
}
|
3746
|
+
|
3747
|
+
HEDLEY_DIAGNOSTIC_PUSH
|
3748
|
+
#if HEDLEY_HAS_WARNING("-Wconditional-uninitialized")
|
3749
|
+
# pragma clang diagnostic ignored "-Wconditional-uninitialized"
|
3750
|
+
#endif
|
3751
|
+
return simde__m64_from_private(r_);
|
3752
|
+
HEDLEY_DIAGNOSTIC_POP
|
3753
|
+
}
|
3754
|
+
#endif
|
3755
|
+
#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI)
|
3756
|
+
# define simde_m_pshufw(a, imm8) _m_pshufw(a, imm8)
|
3757
|
+
#else
|
3758
|
+
# define simde_m_pshufw(a, imm8) simde_mm_shuffle_pi16(a, imm8)
|
3759
|
+
#endif
|
3760
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
3761
|
+
# define _mm_shuffle_pi16(a, imm8) simde_mm_shuffle_pi16(a, imm8)
|
3762
|
+
# define _m_pshufw(a, imm8) simde_mm_shuffle_pi16(a, imm8)
|
3763
|
+
#endif
|
3764
|
+
|
3765
|
+
#if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI)
|
3766
|
+
# define simde_mm_shuffle_ps(a, b, imm8) _mm_shuffle_ps(a, b, imm8)
|
3767
|
+
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
3768
|
+
#define simde_mm_shuffle_ps(a, b, imm8) \
|
3769
|
+
__extension__({ \
|
3770
|
+
float32x4_t ret; \
|
3771
|
+
ret = vmovq_n_f32( \
|
3772
|
+
vgetq_lane_f32(a, (imm8) & (0x3))); \
|
3773
|
+
ret = vsetq_lane_f32( \
|
3774
|
+
vgetq_lane_f32(a, ((imm8) >> 2) & 0x3), \
|
3775
|
+
ret, 1); \
|
3776
|
+
ret = vsetq_lane_f32( \
|
3777
|
+
vgetq_lane_f32(b, ((imm8) >> 4) & 0x3), \
|
3778
|
+
ret, 2); \
|
3779
|
+
ret = vsetq_lane_f32( \
|
3780
|
+
vgetq_lane_f32(b, ((imm8) >> 6) & 0x3), \
|
3781
|
+
ret, 3); \
|
3782
|
+
})
|
3783
|
+
#elif defined(SIMDE_SHUFFLE_VECTOR_)
|
3784
|
+
# define simde_mm_shuffle_ps(a, b, imm8) (__extension__ ({ \
|
3785
|
+
simde__m128_from_private((simde__m128_private) { .f32 = \
|
3786
|
+
SIMDE_SHUFFLE_VECTOR_(32, 16, \
|
3787
|
+
simde__m128_to_private(a).f32, \
|
3788
|
+
simde__m128_to_private(b).f32, \
|
3789
|
+
(((imm8) ) & 3), \
|
3790
|
+
(((imm8) >> 2) & 3), \
|
3791
|
+
(((imm8) >> 4) & 3) + 4, \
|
3792
|
+
(((imm8) >> 6) & 3) + 4) }); }))
|
3793
|
+
#else
|
3794
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
3795
|
+
simde__m128
|
3796
|
+
simde_mm_shuffle_ps (simde__m128 a, simde__m128 b, const int imm8)
|
3797
|
+
SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) {
|
3798
|
+
simde__m128_private
|
3799
|
+
r_,
|
3800
|
+
a_ = simde__m128_to_private(a),
|
3801
|
+
b_ = simde__m128_to_private(b);
|
3802
|
+
|
3803
|
+
r_.f32[0] = a_.f32[(imm8 >> 0) & 3];
|
3804
|
+
r_.f32[1] = a_.f32[(imm8 >> 2) & 3];
|
3805
|
+
r_.f32[2] = b_.f32[(imm8 >> 4) & 3];
|
3806
|
+
r_.f32[3] = b_.f32[(imm8 >> 6) & 3];
|
3807
|
+
|
3808
|
+
return simde__m128_from_private(r_);
|
3809
|
+
}
|
3810
|
+
#endif
|
3811
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
3812
|
+
# define _mm_shuffle_ps(a, b, imm8) simde_mm_shuffle_ps((a), (b), imm8)
|
3813
|
+
#endif
|
3814
|
+
|
3815
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
3816
|
+
simde__m128
|
3817
|
+
simde_mm_sqrt_ps (simde__m128 a) {
|
3818
|
+
#if defined(SIMDE_X86_SSE_NATIVE)
|
3819
|
+
return _mm_sqrt_ps(a);
|
3820
|
+
#else
|
3821
|
+
simde__m128_private
|
3822
|
+
r_,
|
3823
|
+
a_ = simde__m128_to_private(a);
|
3824
|
+
|
3825
|
+
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
3826
|
+
r_.neon_f32 = vsqrtq_f32(a_.neon_f32);
|
3827
|
+
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
3828
|
+
float32x4_t est = vrsqrteq_f32(a_.neon_f32);
|
3829
|
+
for (int i = 0 ; i <= SIMDE_ACCURACY_PREFERENCE ; i++) {
|
3830
|
+
est = vmulq_f32(vrsqrtsq_f32(vmulq_f32(a_.neon_f32, est), est), est);
|
3831
|
+
}
|
3832
|
+
r_.neon_f32 = vmulq_f32(a_.neon_f32, est);
|
3833
|
+
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
3834
|
+
r_.wasm_v128 = wasm_f32x4_sqrt(a_.wasm_v128);
|
3835
|
+
#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
|
3836
|
+
r_.altivec_f32 = vec_sqrt(a_.altivec_f32);
|
3837
|
+
#elif defined(simde_math_sqrt)
|
3838
|
+
SIMDE_VECTORIZE
|
3839
|
+
for (size_t i = 0 ; i < sizeof(r_.f32) / sizeof(r_.f32[0]) ; i++) {
|
3840
|
+
r_.f32[i] = simde_math_sqrtf(a_.f32[i]);
|
3841
|
+
}
|
3842
|
+
#else
|
3843
|
+
HEDLEY_UNREACHABLE();
|
3844
|
+
#endif
|
3845
|
+
|
3846
|
+
return simde__m128_from_private(r_);
|
3847
|
+
#endif
|
3848
|
+
}
|
3849
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
3850
|
+
# define _mm_sqrt_ps(a) simde_mm_sqrt_ps((a))
|
3851
|
+
#endif
|
3852
|
+
|
3853
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
3854
|
+
simde__m128
|
3855
|
+
simde_mm_sqrt_ss (simde__m128 a) {
|
3856
|
+
#if defined(SIMDE_X86_SSE_NATIVE)
|
3857
|
+
return _mm_sqrt_ss(a);
|
3858
|
+
#elif (SIMDE_NATURAL_VECTOR_SIZE > 0)
|
3859
|
+
return simde_mm_move_ss(a, simde_mm_sqrt_ps(a));
|
3860
|
+
#else
|
3861
|
+
simde__m128_private
|
3862
|
+
r_,
|
3863
|
+
a_ = simde__m128_to_private(a);
|
3864
|
+
|
3865
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
3866
|
+
float32_t value =
|
3867
|
+
vgetq_lane_f32(simde__m128_to_private(simde_mm_sqrt_ps(a)).neon_f32, 0);
|
3868
|
+
r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0);
|
3869
|
+
#elif defined(simde_math_sqrtf)
|
3870
|
+
r_.f32[0] = simde_math_sqrtf(a_.f32[0]);
|
3871
|
+
r_.f32[1] = a_.f32[1];
|
3872
|
+
r_.f32[2] = a_.f32[2];
|
3873
|
+
r_.f32[3] = a_.f32[3];
|
3874
|
+
#else
|
3875
|
+
HEDLEY_UNREACHABLE();
|
3876
|
+
#endif
|
3877
|
+
|
3878
|
+
return simde__m128_from_private(r_);
|
3879
|
+
#endif
|
3880
|
+
}
|
3881
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
3882
|
+
# define _mm_sqrt_ss(a) simde_mm_sqrt_ss((a))
|
3883
|
+
#endif
|
3884
|
+
|
3885
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
3886
|
+
void
|
3887
|
+
simde_mm_store_ps (simde_float32 mem_addr[4], simde__m128 a) {
|
3888
|
+
#if defined(SIMDE_X86_SSE_NATIVE)
|
3889
|
+
_mm_store_ps(mem_addr, a);
|
3890
|
+
#else
|
3891
|
+
simde__m128_private a_ = simde__m128_to_private(a);
|
3892
|
+
|
3893
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
3894
|
+
vst1q_f32(mem_addr, a_.neon_f32);
|
3895
|
+
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
|
3896
|
+
vec_st(a_.altivec_f32, 0, mem_addr);
|
3897
|
+
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
3898
|
+
wasm_v128_store(mem_addr, a_.wasm_v128);
|
3899
|
+
#else
|
3900
|
+
simde_memcpy(mem_addr, &a_, sizeof(a));
|
3901
|
+
#endif
|
3902
|
+
#endif
|
3903
|
+
}
|
3904
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
3905
|
+
# define _mm_store_ps(mem_addr, a) simde_mm_store_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a))
|
3906
|
+
#endif
|
3907
|
+
|
3908
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
3909
|
+
void
|
3910
|
+
simde_mm_store1_ps (simde_float32 mem_addr[4], simde__m128 a) {
|
3911
|
+
simde_float32* mem_addr_ = SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128);
|
3912
|
+
|
3913
|
+
#if defined(SIMDE_X86_SSE_NATIVE)
|
3914
|
+
_mm_store_ps1(mem_addr_, a);
|
3915
|
+
#else
|
3916
|
+
simde__m128_private a_ = simde__m128_to_private(a);
|
3917
|
+
|
3918
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
3919
|
+
vst1q_f32(mem_addr_, vdupq_lane_f32(vget_low_f32(a_.neon_f32), 0));
|
3920
|
+
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
3921
|
+
wasm_v128_store(mem_addr_, wasm_v32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 0, 0, 0));
|
3922
|
+
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
|
3923
|
+
vec_st(vec_splat(a_.altivec_f32, 0), 0, mem_addr_);
|
3924
|
+
#elif defined(SIMDE_SHUFFLE_VECTOR_)
|
3925
|
+
simde__m128_private tmp_;
|
3926
|
+
tmp_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 0, 0, 0, 0);
|
3927
|
+
simde_mm_store_ps(mem_addr_, tmp_.f32);
|
3928
|
+
#else
|
3929
|
+
SIMDE_VECTORIZE_ALIGNED(mem_addr_:16)
|
3930
|
+
for (size_t i = 0 ; i < sizeof(a_.f32) / sizeof(a_.f32[0]) ; i++) {
|
3931
|
+
mem_addr_[i] = a_.f32[0];
|
3932
|
+
}
|
3933
|
+
#endif
|
3934
|
+
#endif
|
3935
|
+
}
|
3936
|
+
#define simde_mm_store_ps1(mem_addr, a) simde_mm_store1_ps(mem_addr, a)
|
3937
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
3938
|
+
# define _mm_store_ps1(mem_addr, a) simde_mm_store1_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a))
|
3939
|
+
# define _mm_store1_ps(mem_addr, a) simde_mm_store1_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a))
|
3940
|
+
#endif
|
3941
|
+
|
3942
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
3943
|
+
void
|
3944
|
+
simde_mm_store_ss (simde_float32* mem_addr, simde__m128 a) {
|
3945
|
+
#if defined(SIMDE_X86_SSE_NATIVE)
|
3946
|
+
_mm_store_ss(mem_addr, a);
|
3947
|
+
#else
|
3948
|
+
simde__m128_private a_ = simde__m128_to_private(a);
|
3949
|
+
|
3950
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
3951
|
+
vst1q_lane_f32(mem_addr, a_.neon_f32, 0);
|
3952
|
+
#else
|
3953
|
+
*mem_addr = a_.f32[0];
|
3954
|
+
#endif
|
3955
|
+
#endif
|
3956
|
+
}
|
3957
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
3958
|
+
# define _mm_store_ss(mem_addr, a) simde_mm_store_ss(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a))
|
3959
|
+
#endif
|
3960
|
+
|
3961
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
3962
|
+
void
|
3963
|
+
simde_mm_storeh_pi (simde__m64* mem_addr, simde__m128 a) {
|
3964
|
+
#if defined(SIMDE_X86_SSE_NATIVE)
|
3965
|
+
_mm_storeh_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a);
|
3966
|
+
#else
|
3967
|
+
simde__m128_private a_ = simde__m128_to_private(a);
|
3968
|
+
|
3969
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
3970
|
+
vst1_f32(HEDLEY_REINTERPRET_CAST(float32_t*, mem_addr), vget_high_f32(a_.neon_f32));
|
3971
|
+
#else
|
3972
|
+
simde_memcpy(mem_addr, &(a_.m64[1]), sizeof(a_.m64[1]));
|
3973
|
+
#endif
|
3974
|
+
#endif
|
3975
|
+
}
|
3976
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
3977
|
+
# define _mm_storeh_pi(mem_addr, a) simde_mm_storeh_pi(mem_addr, (a))
|
3978
|
+
#endif
|
3979
|
+
|
3980
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
3981
|
+
void
|
3982
|
+
simde_mm_storel_pi (simde__m64* mem_addr, simde__m128 a) {
|
3983
|
+
#if defined(SIMDE_X86_SSE_NATIVE)
|
3984
|
+
_mm_storel_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a);
|
3985
|
+
#else
|
3986
|
+
simde__m64_private* dest_ = HEDLEY_REINTERPRET_CAST(simde__m64_private*, mem_addr);
|
3987
|
+
simde__m128_private a_ = simde__m128_to_private(a);
|
3988
|
+
|
3989
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
3990
|
+
dest_->neon_f32 = vget_low_f32(a_.neon_f32);
|
3991
|
+
#else
|
3992
|
+
dest_->f32[0] = a_.f32[0];
|
3993
|
+
dest_->f32[1] = a_.f32[1];
|
3994
|
+
#endif
|
3995
|
+
#endif
|
3996
|
+
}
|
3997
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
3998
|
+
# define _mm_storel_pi(mem_addr, a) simde_mm_storel_pi(mem_addr, (a))
|
3999
|
+
#endif
|
4000
|
+
|
4001
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
4002
|
+
void
|
4003
|
+
simde_mm_storer_ps (simde_float32 mem_addr[4], simde__m128 a) {
|
4004
|
+
#if defined(SIMDE_X86_SSE_NATIVE)
|
4005
|
+
_mm_storer_ps(mem_addr, a);
|
4006
|
+
#else
|
4007
|
+
simde__m128_private a_ = simde__m128_to_private(a);
|
4008
|
+
|
4009
|
+
#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
|
4010
|
+
vec_st(vec_reve(a_.altivec_f32), 0, mem_addr);
|
4011
|
+
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
4012
|
+
float32x4_t tmp = vrev64q_f32(a_.neon_f32);
|
4013
|
+
vst1q_f32(mem_addr, vextq_f32(tmp, tmp, 2));
|
4014
|
+
#elif defined(SIMDE_SHUFFLE_VECTOR_)
|
4015
|
+
a_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 3, 2, 1, 0);
|
4016
|
+
simde_mm_store_ps(mem_addr, simde__m128_from_private(a_));
|
4017
|
+
#else
|
4018
|
+
SIMDE_VECTORIZE_ALIGNED(mem_addr:16)
|
4019
|
+
for (size_t i = 0 ; i < sizeof(a_.f32) / sizeof(a_.f32[0]) ; i++) {
|
4020
|
+
mem_addr[i] = a_.f32[((sizeof(a_.f32) / sizeof(a_.f32[0])) - 1) - i];
|
4021
|
+
}
|
4022
|
+
#endif
|
4023
|
+
#endif
|
4024
|
+
}
|
4025
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
4026
|
+
# define _mm_storer_ps(mem_addr, a) simde_mm_storer_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a))
|
4027
|
+
#endif
|
4028
|
+
|
4029
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
4030
|
+
void
|
4031
|
+
simde_mm_storeu_ps (simde_float32 mem_addr[4], simde__m128 a) {
|
4032
|
+
#if defined(SIMDE_X86_SSE_NATIVE)
|
4033
|
+
_mm_storeu_ps(mem_addr, a);
|
4034
|
+
#else
|
4035
|
+
simde__m128_private a_ = simde__m128_to_private(a);
|
4036
|
+
|
4037
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
4038
|
+
vst1q_f32(mem_addr, a_.neon_f32);
|
4039
|
+
#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
|
4040
|
+
vec_vsx_st(a_.altivec_f32, 0, mem_addr);
|
4041
|
+
#else
|
4042
|
+
simde_memcpy(mem_addr, &a_, sizeof(a_));
|
4043
|
+
#endif
|
4044
|
+
#endif
|
4045
|
+
}
|
4046
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
4047
|
+
# define _mm_storeu_ps(mem_addr, a) simde_mm_storeu_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a))
|
4048
|
+
#endif
|
4049
|
+
|
4050
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
4051
|
+
simde__m128
|
4052
|
+
simde_mm_sub_ps (simde__m128 a, simde__m128 b) {
|
4053
|
+
#if defined(SIMDE_X86_SSE_NATIVE)
|
4054
|
+
return _mm_sub_ps(a, b);
|
4055
|
+
#else
|
4056
|
+
simde__m128_private
|
4057
|
+
r_,
|
4058
|
+
a_ = simde__m128_to_private(a),
|
4059
|
+
b_ = simde__m128_to_private(b);
|
4060
|
+
|
4061
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
4062
|
+
r_.neon_f32 = vsubq_f32(a_.neon_f32, b_.neon_f32);
|
4063
|
+
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
|
4064
|
+
r_.wasm_v128 = wasm_f32x4_sub(a_.wasm_v128, b_.wasm_v128);
|
4065
|
+
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
|
4066
|
+
r_.altivec_f32 = vec_sub(a_.altivec_f32, b_.altivec_f32);
|
4067
|
+
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
4068
|
+
r_.f32 = a_.f32 - b_.f32;
|
4069
|
+
#else
|
4070
|
+
SIMDE_VECTORIZE
|
4071
|
+
for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
|
4072
|
+
r_.f32[i] = a_.f32[i] - b_.f32[i];
|
4073
|
+
}
|
4074
|
+
#endif
|
4075
|
+
|
4076
|
+
return simde__m128_from_private(r_);
|
4077
|
+
#endif
|
4078
|
+
}
|
4079
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
4080
|
+
# define _mm_sub_ps(a, b) simde_mm_sub_ps((a), (b))
|
4081
|
+
#endif
|
4082
|
+
|
4083
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
4084
|
+
simde__m128
|
4085
|
+
simde_mm_sub_ss (simde__m128 a, simde__m128 b) {
|
4086
|
+
#if defined(SIMDE_X86_SSE_NATIVE)
|
4087
|
+
return _mm_sub_ss(a, b);
|
4088
|
+
#elif (SIMDE_NATURAL_VECTOR_SIZE > 0)
|
4089
|
+
return simde_mm_move_ss(a, simde_mm_sub_ps(a, b));
|
4090
|
+
#else
|
4091
|
+
simde__m128_private
|
4092
|
+
r_,
|
4093
|
+
a_ = simde__m128_to_private(a),
|
4094
|
+
b_ = simde__m128_to_private(b);
|
4095
|
+
|
4096
|
+
r_.f32[0] = a_.f32[0] - b_.f32[0];
|
4097
|
+
r_.f32[1] = a_.f32[1];
|
4098
|
+
r_.f32[2] = a_.f32[2];
|
4099
|
+
r_.f32[3] = a_.f32[3];
|
4100
|
+
|
4101
|
+
return simde__m128_from_private(r_);
|
4102
|
+
#endif
|
4103
|
+
}
|
4104
|
+
|
4105
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
4106
|
+
# define _mm_sub_ss(a, b) simde_mm_sub_ss((a), (b))
|
4107
|
+
#endif
|
4108
|
+
|
4109
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
4110
|
+
int
|
4111
|
+
simde_mm_ucomieq_ss (simde__m128 a, simde__m128 b) {
|
4112
|
+
#if defined(SIMDE_X86_SSE_NATIVE)
|
4113
|
+
return _mm_ucomieq_ss(a, b);
|
4114
|
+
#else
|
4115
|
+
simde__m128_private
|
4116
|
+
a_ = simde__m128_to_private(a),
|
4117
|
+
b_ = simde__m128_to_private(b);
|
4118
|
+
int r;
|
4119
|
+
|
4120
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
4121
|
+
uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32);
|
4122
|
+
uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32);
|
4123
|
+
uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan));
|
4124
|
+
uint32x4_t a_eq_b = vceqq_f32(a_.neon_f32, b_.neon_f32);
|
4125
|
+
r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_eq_b), 0) != 0);
|
4126
|
+
#elif defined(SIMDE_HAVE_FENV_H)
|
4127
|
+
fenv_t envp;
|
4128
|
+
int x = feholdexcept(&envp);
|
4129
|
+
r = a_.f32[0] == b_.f32[0];
|
4130
|
+
if (HEDLEY_LIKELY(x == 0))
|
4131
|
+
fesetenv(&envp);
|
4132
|
+
#else
|
4133
|
+
r = a_.f32[0] == b_.f32[0];
|
4134
|
+
#endif
|
4135
|
+
|
4136
|
+
return r;
|
4137
|
+
#endif
|
4138
|
+
}
|
4139
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
4140
|
+
# define _mm_ucomieq_ss(a, b) simde_mm_ucomieq_ss((a), (b))
|
4141
|
+
#endif
|
4142
|
+
|
4143
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
4144
|
+
int
|
4145
|
+
simde_mm_ucomige_ss (simde__m128 a, simde__m128 b) {
|
4146
|
+
#if defined(SIMDE_X86_SSE_NATIVE)
|
4147
|
+
return _mm_ucomige_ss(a, b);
|
4148
|
+
#else
|
4149
|
+
simde__m128_private
|
4150
|
+
a_ = simde__m128_to_private(a),
|
4151
|
+
b_ = simde__m128_to_private(b);
|
4152
|
+
int r;
|
4153
|
+
|
4154
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
4155
|
+
uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32);
|
4156
|
+
uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32);
|
4157
|
+
uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan);
|
4158
|
+
uint32x4_t a_ge_b = vcgeq_f32(a_.neon_f32, b_.neon_f32);
|
4159
|
+
r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_ge_b), 0) != 0);
|
4160
|
+
#elif defined(SIMDE_HAVE_FENV_H)
|
4161
|
+
fenv_t envp;
|
4162
|
+
int x = feholdexcept(&envp);
|
4163
|
+
r = a_.f32[0] >= b_.f32[0];
|
4164
|
+
if (HEDLEY_LIKELY(x == 0))
|
4165
|
+
fesetenv(&envp);
|
4166
|
+
#else
|
4167
|
+
r = a_.f32[0] >= b_.f32[0];
|
4168
|
+
#endif
|
4169
|
+
|
4170
|
+
return r;
|
4171
|
+
#endif
|
4172
|
+
}
|
4173
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
4174
|
+
# define _mm_ucomige_ss(a, b) simde_mm_ucomige_ss((a), (b))
|
4175
|
+
#endif
|
4176
|
+
|
4177
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
4178
|
+
int
|
4179
|
+
simde_mm_ucomigt_ss (simde__m128 a, simde__m128 b) {
|
4180
|
+
#if defined(SIMDE_X86_SSE_NATIVE)
|
4181
|
+
return _mm_ucomigt_ss(a, b);
|
4182
|
+
#else
|
4183
|
+
simde__m128_private
|
4184
|
+
a_ = simde__m128_to_private(a),
|
4185
|
+
b_ = simde__m128_to_private(b);
|
4186
|
+
int r;
|
4187
|
+
|
4188
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
4189
|
+
uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32);
|
4190
|
+
uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32);
|
4191
|
+
uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan);
|
4192
|
+
uint32x4_t a_gt_b = vcgtq_f32(a_.neon_f32, b_.neon_f32);
|
4193
|
+
r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_gt_b), 0) != 0);
|
4194
|
+
#elif defined(SIMDE_HAVE_FENV_H)
|
4195
|
+
fenv_t envp;
|
4196
|
+
int x = feholdexcept(&envp);
|
4197
|
+
r = a_.f32[0] > b_.f32[0];
|
4198
|
+
if (HEDLEY_LIKELY(x == 0))
|
4199
|
+
fesetenv(&envp);
|
4200
|
+
#else
|
4201
|
+
r = a_.f32[0] > b_.f32[0];
|
4202
|
+
#endif
|
4203
|
+
|
4204
|
+
return r;
|
4205
|
+
#endif
|
4206
|
+
}
|
4207
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
4208
|
+
# define _mm_ucomigt_ss(a, b) simde_mm_ucomigt_ss((a), (b))
|
4209
|
+
#endif
|
4210
|
+
|
4211
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
4212
|
+
int
|
4213
|
+
simde_mm_ucomile_ss (simde__m128 a, simde__m128 b) {
|
4214
|
+
#if defined(SIMDE_X86_SSE_NATIVE)
|
4215
|
+
return _mm_ucomile_ss(a, b);
|
4216
|
+
#else
|
4217
|
+
simde__m128_private
|
4218
|
+
a_ = simde__m128_to_private(a),
|
4219
|
+
b_ = simde__m128_to_private(b);
|
4220
|
+
int r;
|
4221
|
+
|
4222
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
4223
|
+
uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32);
|
4224
|
+
uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32);
|
4225
|
+
uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan));
|
4226
|
+
uint32x4_t a_le_b = vcleq_f32(a_.neon_f32, b_.neon_f32);
|
4227
|
+
r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_le_b), 0) != 0);
|
4228
|
+
#elif defined(SIMDE_HAVE_FENV_H)
|
4229
|
+
fenv_t envp;
|
4230
|
+
int x = feholdexcept(&envp);
|
4231
|
+
r = a_.f32[0] <= b_.f32[0];
|
4232
|
+
if (HEDLEY_LIKELY(x == 0))
|
4233
|
+
fesetenv(&envp);
|
4234
|
+
#else
|
4235
|
+
r = a_.f32[0] <= b_.f32[0];
|
4236
|
+
#endif
|
4237
|
+
|
4238
|
+
return r;
|
4239
|
+
#endif
|
4240
|
+
}
|
4241
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
4242
|
+
# define _mm_ucomile_ss(a, b) simde_mm_ucomile_ss((a), (b))
|
4243
|
+
#endif
|
4244
|
+
|
4245
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
4246
|
+
int
|
4247
|
+
simde_mm_ucomilt_ss (simde__m128 a, simde__m128 b) {
|
4248
|
+
#if defined(SIMDE_X86_SSE_NATIVE)
|
4249
|
+
return _mm_ucomilt_ss(a, b);
|
4250
|
+
#else
|
4251
|
+
simde__m128_private
|
4252
|
+
a_ = simde__m128_to_private(a),
|
4253
|
+
b_ = simde__m128_to_private(b);
|
4254
|
+
int r;
|
4255
|
+
|
4256
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
4257
|
+
uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32);
|
4258
|
+
uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32);
|
4259
|
+
uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan));
|
4260
|
+
uint32x4_t a_lt_b = vcltq_f32(a_.neon_f32, b_.neon_f32);
|
4261
|
+
r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_lt_b), 0) != 0);
|
4262
|
+
#elif defined(SIMDE_HAVE_FENV_H)
|
4263
|
+
fenv_t envp;
|
4264
|
+
int x = feholdexcept(&envp);
|
4265
|
+
r = a_.f32[0] < b_.f32[0];
|
4266
|
+
if (HEDLEY_LIKELY(x == 0))
|
4267
|
+
fesetenv(&envp);
|
4268
|
+
#else
|
4269
|
+
r = a_.f32[0] < b_.f32[0];
|
4270
|
+
#endif
|
4271
|
+
|
4272
|
+
return r;
|
4273
|
+
#endif
|
4274
|
+
}
|
4275
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
4276
|
+
# define _mm_ucomilt_ss(a, b) simde_mm_ucomilt_ss((a), (b))
|
4277
|
+
#endif
|
4278
|
+
|
4279
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
4280
|
+
int
|
4281
|
+
simde_mm_ucomineq_ss (simde__m128 a, simde__m128 b) {
|
4282
|
+
#if defined(SIMDE_X86_SSE_NATIVE)
|
4283
|
+
return _mm_ucomineq_ss(a, b);
|
4284
|
+
#else
|
4285
|
+
simde__m128_private
|
4286
|
+
a_ = simde__m128_to_private(a),
|
4287
|
+
b_ = simde__m128_to_private(b);
|
4288
|
+
int r;
|
4289
|
+
|
4290
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
4291
|
+
uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32);
|
4292
|
+
uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32);
|
4293
|
+
uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan);
|
4294
|
+
uint32x4_t a_neq_b = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32));
|
4295
|
+
r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_neq_b), 0) != 0);
|
4296
|
+
#elif defined(SIMDE_HAVE_FENV_H)
|
4297
|
+
fenv_t envp;
|
4298
|
+
int x = feholdexcept(&envp);
|
4299
|
+
r = a_.f32[0] != b_.f32[0];
|
4300
|
+
if (HEDLEY_LIKELY(x == 0))
|
4301
|
+
fesetenv(&envp);
|
4302
|
+
#else
|
4303
|
+
r = a_.f32[0] != b_.f32[0];
|
4304
|
+
#endif
|
4305
|
+
|
4306
|
+
return r;
|
4307
|
+
#endif
|
4308
|
+
}
|
4309
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
4310
|
+
# define _mm_ucomineq_ss(a, b) simde_mm_ucomineq_ss((a), (b))
|
4311
|
+
#endif
|
4312
|
+
|
4313
|
+
#if defined(SIMDE_X86_SSE_NATIVE)
|
4314
|
+
# if defined(__has_builtin)
|
4315
|
+
# if __has_builtin(__builtin_ia32_undef128)
|
4316
|
+
# define SIMDE_HAVE_UNDEFINED128
|
4317
|
+
# endif
|
4318
|
+
# elif !defined(__PGI) && !defined(SIMDE_BUG_GCC_REV_208793) && !defined(_MSC_VER)
|
4319
|
+
# define SIMDE_HAVE_UNDEFINED128
|
4320
|
+
# endif
|
4321
|
+
#endif
|
4322
|
+
|
4323
|
+
#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_)
|
4324
|
+
HEDLEY_DIAGNOSTIC_PUSH
|
4325
|
+
SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_
|
4326
|
+
#endif
|
4327
|
+
|
4328
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
4329
|
+
simde__m128
|
4330
|
+
simde_mm_unpackhi_ps (simde__m128 a, simde__m128 b) {
|
4331
|
+
#if defined(SIMDE_X86_SSE_NATIVE)
|
4332
|
+
return _mm_unpackhi_ps(a, b);
|
4333
|
+
#else
|
4334
|
+
simde__m128_private
|
4335
|
+
r_,
|
4336
|
+
a_ = simde__m128_to_private(a),
|
4337
|
+
b_ = simde__m128_to_private(b);
|
4338
|
+
|
4339
|
+
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
4340
|
+
r_.neon_f32 = vzip2q_f32(a_.neon_f32, b_.neon_f32);
|
4341
|
+
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
4342
|
+
float32x2_t a1 = vget_high_f32(a_.neon_f32);
|
4343
|
+
float32x2_t b1 = vget_high_f32(b_.neon_f32);
|
4344
|
+
float32x2x2_t result = vzip_f32(a1, b1);
|
4345
|
+
r_.neon_f32 = vcombine_f32(result.val[0], result.val[1]);
|
4346
|
+
#elif defined(SIMDE_SHUFFLE_VECTOR_)
|
4347
|
+
r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 2, 6, 3, 7);
|
4348
|
+
#else
|
4349
|
+
r_.f32[0] = a_.f32[2];
|
4350
|
+
r_.f32[1] = b_.f32[2];
|
4351
|
+
r_.f32[2] = a_.f32[3];
|
4352
|
+
r_.f32[3] = b_.f32[3];
|
4353
|
+
#endif
|
4354
|
+
|
4355
|
+
return simde__m128_from_private(r_);
|
4356
|
+
#endif
|
4357
|
+
}
|
4358
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
4359
|
+
# define _mm_unpackhi_ps(a, b) simde_mm_unpackhi_ps((a), (b))
|
4360
|
+
#endif
|
4361
|
+
|
4362
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
4363
|
+
simde__m128
|
4364
|
+
simde_mm_unpacklo_ps (simde__m128 a, simde__m128 b) {
|
4365
|
+
#if defined(SIMDE_X86_SSE_NATIVE)
|
4366
|
+
return _mm_unpacklo_ps(a, b);
|
4367
|
+
#else
|
4368
|
+
simde__m128_private
|
4369
|
+
r_,
|
4370
|
+
a_ = simde__m128_to_private(a),
|
4371
|
+
b_ = simde__m128_to_private(b);
|
4372
|
+
|
4373
|
+
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
4374
|
+
r_.neon_f32 = vzip1q_f32(a_.neon_f32, b_.neon_f32);
|
4375
|
+
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
|
4376
|
+
r_.altivec_f32 = vec_mergeh(a_.altivec_f32, b_.altivec_f32);
|
4377
|
+
#elif defined(SIMDE_SHUFFLE_VECTOR_)
|
4378
|
+
r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 0, 4, 1, 5);
|
4379
|
+
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
4380
|
+
float32x2_t a1 = vget_low_f32(a_.neon_f32);
|
4381
|
+
float32x2_t b1 = vget_low_f32(b_.neon_f32);
|
4382
|
+
float32x2x2_t result = vzip_f32(a1, b1);
|
4383
|
+
r_.neon_f32 = vcombine_f32(result.val[0], result.val[1]);
|
4384
|
+
#else
|
4385
|
+
r_.f32[0] = a_.f32[0];
|
4386
|
+
r_.f32[1] = b_.f32[0];
|
4387
|
+
r_.f32[2] = a_.f32[1];
|
4388
|
+
r_.f32[3] = b_.f32[1];
|
4389
|
+
#endif
|
4390
|
+
|
4391
|
+
return simde__m128_from_private(r_);
|
4392
|
+
#endif
|
4393
|
+
}
|
4394
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
4395
|
+
# define _mm_unpacklo_ps(a, b) simde_mm_unpacklo_ps((a), (b))
|
4396
|
+
#endif
|
4397
|
+
|
4398
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
4399
|
+
void
|
4400
|
+
simde_mm_stream_pi (simde__m64* mem_addr, simde__m64 a) {
|
4401
|
+
#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
|
4402
|
+
_mm_stream_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a);
|
4403
|
+
#else
|
4404
|
+
simde__m64_private*
|
4405
|
+
dest = HEDLEY_REINTERPRET_CAST(simde__m64_private*, mem_addr),
|
4406
|
+
a_ = simde__m64_to_private(a);
|
4407
|
+
|
4408
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
4409
|
+
dest->i64[0] = vget_lane_s64(a_.neon_i64, 0);
|
4410
|
+
#else
|
4411
|
+
dest->i64[0] = a_.i64[0];
|
4412
|
+
#endif
|
4413
|
+
#endif
|
4414
|
+
}
|
4415
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
4416
|
+
# define _mm_stream_pi(mem_addr, a) simde_mm_stream_pi(mem_addr, (a))
|
4417
|
+
#endif
|
4418
|
+
|
4419
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
4420
|
+
void
|
4421
|
+
simde_mm_stream_ps (simde_float32 mem_addr[4], simde__m128 a) {
|
4422
|
+
#if defined(SIMDE_X86_SSE_NATIVE)
|
4423
|
+
_mm_stream_ps(mem_addr, a);
|
4424
|
+
#elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
4425
|
+
simde__m128_private a_ = simde__m128_to_private(a);
|
4426
|
+
__builtin_nontemporal_store(a_.f32, SIMDE_ALIGN_CAST(__typeof__(a_.f32)*, mem_addr));
|
4427
|
+
#else
|
4428
|
+
simde_mm_store_ps(mem_addr, a);
|
4429
|
+
#endif
|
4430
|
+
}
|
4431
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
4432
|
+
# define _mm_stream_ps(mem_addr, a) simde_mm_stream_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a))
|
4433
|
+
#endif
|
4434
|
+
|
4435
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
4436
|
+
#define SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3) \
|
4437
|
+
do { \
|
4438
|
+
float32x4x2_t ROW01 = vtrnq_f32(row0, row1); \
|
4439
|
+
float32x4x2_t ROW23 = vtrnq_f32(row2, row3); \
|
4440
|
+
row0 = vcombine_f32(vget_low_f32(ROW01.val[0]), \
|
4441
|
+
vget_low_f32(ROW23.val[0])); \
|
4442
|
+
row1 = vcombine_f32(vget_low_f32(ROW01.val[1]), \
|
4443
|
+
vget_low_f32(ROW23.val[1])); \
|
4444
|
+
row2 = vcombine_f32(vget_high_f32(ROW01.val[0]), \
|
4445
|
+
vget_high_f32(ROW23.val[0])); \
|
4446
|
+
row3 = vcombine_f32(vget_high_f32(ROW01.val[1]), \
|
4447
|
+
vget_high_f32(ROW23.val[1])); \
|
4448
|
+
} while (0)
|
4449
|
+
#else
|
4450
|
+
#define SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3) \
|
4451
|
+
do { \
|
4452
|
+
simde__m128 tmp3, tmp2, tmp1, tmp0; \
|
4453
|
+
tmp0 = simde_mm_unpacklo_ps((row0), (row1)); \
|
4454
|
+
tmp2 = simde_mm_unpacklo_ps((row2), (row3)); \
|
4455
|
+
tmp1 = simde_mm_unpackhi_ps((row0), (row1)); \
|
4456
|
+
tmp3 = simde_mm_unpackhi_ps((row2), (row3)); \
|
4457
|
+
row0 = simde_mm_movelh_ps(tmp0, tmp2); \
|
4458
|
+
row1 = simde_mm_movehl_ps(tmp2, tmp0); \
|
4459
|
+
row2 = simde_mm_movelh_ps(tmp1, tmp3); \
|
4460
|
+
row3 = simde_mm_movehl_ps(tmp3, tmp1); \
|
4461
|
+
} while (0)
|
4462
|
+
#endif
|
4463
|
+
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
|
4464
|
+
# define _MM_TRANSPOSE4_PS(row0, row1, row2, row3) SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3)
|
4465
|
+
#endif
|
4466
|
+
|
4467
|
+
SIMDE_END_DECLS_
|
4468
|
+
|
4469
|
+
HEDLEY_DIAGNOSTIC_POP
|
4470
|
+
|
4471
|
+
#endif /* !defined(SIMDE_X86_SSE_H) */
|