casadi 3.7.2__cp314-none-manylinux2014_i686.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (1726) hide show
  1. casadi/__init__.py +92 -0
  2. casadi/_casadi.so +0 -0
  3. casadi/casadi-cli +0 -0
  4. casadi/casadi.py +54468 -0
  5. casadi/cbc +0 -0
  6. casadi/clp +0 -0
  7. casadi/cmake/alpaqa/alpaqaConfig.cmake +24 -0
  8. casadi/cmake/alpaqa/alpaqaConfigVersion.cmake +65 -0
  9. casadi/cmake/alpaqa/alpaqaTargets-release.cmake +29 -0
  10. casadi/cmake/alpaqa/alpaqaTargets.cmake +131 -0
  11. casadi/cmake/casadi-config-version.cmake +11 -0
  12. casadi/cmake/casadi-config.cmake +8 -0
  13. casadi/cmake/casadi-targets-release.cmake +19 -0
  14. casadi/cmake/casadi-targets.cmake +107 -0
  15. casadi/cmake/ghc_filesystem/ghc_filesystem-config-version.cmake +85 -0
  16. casadi/cmake/ghc_filesystem/ghc_filesystem-config.cmake +30 -0
  17. casadi/cmake/ghc_filesystem/ghc_filesystem-targets.cmake +107 -0
  18. casadi/cmake/highs/highs-config.cmake +17 -0
  19. casadi/cmake/highs/highs-targets-release.cmake +28 -0
  20. casadi/cmake/highs/highs-targets.cmake +117 -0
  21. casadi/cmake/libzip/libzip-config-version.cmake +43 -0
  22. casadi/cmake/libzip/libzip-config.cmake +69 -0
  23. casadi/cmake/libzip/libzip-targets-release.cmake +19 -0
  24. casadi/cmake/libzip/libzip-targets.cmake +107 -0
  25. casadi/cmake/libzip/modules/FindMbedTLS.cmake +141 -0
  26. casadi/cmake/libzip/modules/FindNettle.cmake +141 -0
  27. casadi/cmake/libzip/modules/Findzstd.cmake +186 -0
  28. casadi/cmake/osqp/osqp-config.cmake +1 -0
  29. casadi/cmake/osqp/osqp-targets-noconfig.cmake +29 -0
  30. casadi/cmake/osqp/osqp-targets.cmake +113 -0
  31. casadi/cmake/proxsuite/find-external/Simde/FindSimde.cmake +39 -0
  32. casadi/cmake/proxsuite/proxsuiteConfig.cmake +177 -0
  33. casadi/cmake/proxsuite/proxsuiteConfigVersion.cmake +65 -0
  34. casadi/cmake/proxsuite/proxsuiteTargets.cmake +115 -0
  35. casadi/cmake/qdldl/qdldl-config.cmake +1 -0
  36. casadi/cmake/qdldl/qdldl-targets-noconfig.cmake +29 -0
  37. casadi/cmake/qdldl/qdldl-targets.cmake +113 -0
  38. casadi/cmake/sleqp/sleqp-config-version.cmake +65 -0
  39. casadi/cmake/sleqp/sleqp-config.cmake +1 -0
  40. casadi/cmake/sleqp/sleqp-targets-release.cmake +20 -0
  41. casadi/cmake/sleqp/sleqp-targets.cmake +106 -0
  42. casadi/cmake/trlib/trlib-config-release.cmake +19 -0
  43. casadi/cmake/trlib/trlib-config-version.cmake +83 -0
  44. casadi/cmake/trlib/trlib-config.cmake +107 -0
  45. casadi/highs +0 -0
  46. casadi/include/casadi/casadi.hpp +31 -0
  47. casadi/include/casadi/casadi.i +4920 -0
  48. casadi/include/casadi/casadi_c.h +138 -0
  49. casadi/include/casadi/casadi_numpy.hpp +97 -0
  50. casadi/include/casadi/config.h +46 -0
  51. casadi/include/casadi/core/archiver.hpp +58 -0
  52. casadi/include/casadi/core/blazing_spline.hpp +47 -0
  53. casadi/include/casadi/core/calculus.hpp +1805 -0
  54. casadi/include/casadi/core/callback.hpp +235 -0
  55. casadi/include/casadi/core/casadi_common.hpp +355 -0
  56. casadi/include/casadi/core/casadi_enum.hpp +90 -0
  57. casadi/include/casadi/core/casadi_export.h +43 -0
  58. casadi/include/casadi/core/casadi_interrupt.hpp +83 -0
  59. casadi/include/casadi/core/casadi_limits.hpp +104 -0
  60. casadi/include/casadi/core/casadi_logger.hpp +134 -0
  61. casadi/include/casadi/core/casadi_meta.hpp +122 -0
  62. casadi/include/casadi/core/casadi_misc.hpp +1022 -0
  63. casadi/include/casadi/core/casadi_types.hpp +66 -0
  64. casadi/include/casadi/core/code_generator.hpp +1071 -0
  65. casadi/include/casadi/core/conic.hpp +213 -0
  66. casadi/include/casadi/core/core.hpp +75 -0
  67. casadi/include/casadi/core/dae_builder.hpp +885 -0
  68. casadi/include/casadi/core/dm.hpp +90 -0
  69. casadi/include/casadi/core/dm_fwd.hpp +39 -0
  70. casadi/include/casadi/core/dple.hpp +138 -0
  71. casadi/include/casadi/core/exception.hpp +167 -0
  72. casadi/include/casadi/core/expm.hpp +84 -0
  73. casadi/include/casadi/core/external.hpp +70 -0
  74. casadi/include/casadi/core/filesystem.hpp +58 -0
  75. casadi/include/casadi/core/fmu.hpp +270 -0
  76. casadi/include/casadi/core/function.hpp +1389 -0
  77. casadi/include/casadi/core/generic_expression.hpp +760 -0
  78. casadi/include/casadi/core/generic_matrix.hpp +1805 -0
  79. casadi/include/casadi/core/generic_shared.hpp +395 -0
  80. casadi/include/casadi/core/generic_shared_impl.hpp +218 -0
  81. casadi/include/casadi/core/generic_shared_internal.hpp +215 -0
  82. casadi/include/casadi/core/generic_type.hpp +314 -0
  83. casadi/include/casadi/core/global_options.hpp +107 -0
  84. casadi/include/casadi/core/im.hpp +52 -0
  85. casadi/include/casadi/core/im_fwd.hpp +35 -0
  86. casadi/include/casadi/core/importer.hpp +221 -0
  87. casadi/include/casadi/core/integration_tools.hpp +292 -0
  88. casadi/include/casadi/core/integrator.hpp +290 -0
  89. casadi/include/casadi/core/interpolant.hpp +163 -0
  90. casadi/include/casadi/core/linsol.hpp +171 -0
  91. casadi/include/casadi/core/matrix_decl.hpp +1423 -0
  92. casadi/include/casadi/core/matrix_fwd.hpp +37 -0
  93. casadi/include/casadi/core/mx.hpp +1014 -0
  94. casadi/include/casadi/core/nlp_builder.hpp +163 -0
  95. casadi/include/casadi/core/nlp_tools.hpp +124 -0
  96. casadi/include/casadi/core/nlpsol.hpp +234 -0
  97. casadi/include/casadi/core/nonzeros.hpp +111 -0
  98. casadi/include/casadi/core/options.hpp +122 -0
  99. casadi/include/casadi/core/optistack.hpp +704 -0
  100. casadi/include/casadi/core/polynomial.hpp +126 -0
  101. casadi/include/casadi/core/printable.hpp +81 -0
  102. casadi/include/casadi/core/resource.hpp +107 -0
  103. casadi/include/casadi/core/rootfinder.hpp +176 -0
  104. casadi/include/casadi/core/runtime/casadi_axpy.hpp +8 -0
  105. casadi/include/casadi/core/runtime/casadi_bfgs.hpp +49 -0
  106. casadi/include/casadi/core/runtime/casadi_bilin.hpp +42 -0
  107. casadi/include/casadi/core/runtime/casadi_blazing_1d_boor_eval.hpp +112 -0
  108. casadi/include/casadi/core/runtime/casadi_blazing_2d_boor_eval.hpp +311 -0
  109. casadi/include/casadi/core/runtime/casadi_blazing_3d_boor_eval.hpp +645 -0
  110. casadi/include/casadi/core/runtime/casadi_blazing_de_boor.hpp +101 -0
  111. casadi/include/casadi/core/runtime/casadi_bound_consistency.hpp +51 -0
  112. casadi/include/casadi/core/runtime/casadi_cache.hpp +59 -0
  113. casadi/include/casadi/core/runtime/casadi_clear.hpp +27 -0
  114. casadi/include/casadi/core/runtime/casadi_clip_max.hpp +33 -0
  115. casadi/include/casadi/core/runtime/casadi_clip_min.hpp +33 -0
  116. casadi/include/casadi/core/runtime/casadi_convexify.hpp +182 -0
  117. casadi/include/casadi/core/runtime/casadi_copy.hpp +31 -0
  118. casadi/include/casadi/core/runtime/casadi_cvx.hpp +463 -0
  119. casadi/include/casadi/core/runtime/casadi_de_boor.hpp +36 -0
  120. casadi/include/casadi/core/runtime/casadi_dense_lsqr.hpp +247 -0
  121. casadi/include/casadi/core/runtime/casadi_densify.hpp +48 -0
  122. casadi/include/casadi/core/runtime/casadi_dot.hpp +27 -0
  123. casadi/include/casadi/core/runtime/casadi_feasiblesqpmethod.hpp +208 -0
  124. casadi/include/casadi/core/runtime/casadi_file_slurp.hpp +32 -0
  125. casadi/include/casadi/core/runtime/casadi_fill.hpp +27 -0
  126. casadi/include/casadi/core/runtime/casadi_finite_diff.hpp +345 -0
  127. casadi/include/casadi/core/runtime/casadi_flip.hpp +33 -0
  128. casadi/include/casadi/core/runtime/casadi_getu.hpp +35 -0
  129. casadi/include/casadi/core/runtime/casadi_iamax.hpp +36 -0
  130. casadi/include/casadi/core/runtime/casadi_interpn.hpp +39 -0
  131. casadi/include/casadi/core/runtime/casadi_interpn_grad.hpp +72 -0
  132. casadi/include/casadi/core/runtime/casadi_interpn_interpolate.hpp +43 -0
  133. casadi/include/casadi/core/runtime/casadi_interpn_weights.hpp +39 -0
  134. casadi/include/casadi/core/runtime/casadi_ipqp.hpp +868 -0
  135. casadi/include/casadi/core/runtime/casadi_jac.hpp +186 -0
  136. casadi/include/casadi/core/runtime/casadi_kkt.hpp +67 -0
  137. casadi/include/casadi/core/runtime/casadi_kron.hpp +50 -0
  138. casadi/include/casadi/core/runtime/casadi_ldl.hpp +109 -0
  139. casadi/include/casadi/core/runtime/casadi_logsumexp.hpp +41 -0
  140. casadi/include/casadi/core/runtime/casadi_low.hpp +65 -0
  141. casadi/include/casadi/core/runtime/casadi_lsqr.hpp +247 -0
  142. casadi/include/casadi/core/runtime/casadi_masked_norm_inf.hpp +33 -0
  143. casadi/include/casadi/core/runtime/casadi_max_viol.hpp +37 -0
  144. casadi/include/casadi/core/runtime/casadi_mmax.hpp +28 -0
  145. casadi/include/casadi/core/runtime/casadi_mmin.hpp +29 -0
  146. casadi/include/casadi/core/runtime/casadi_mtimes.hpp +75 -0
  147. casadi/include/casadi/core/runtime/casadi_mv.hpp +46 -0
  148. casadi/include/casadi/core/runtime/casadi_mv_dense.hpp +39 -0
  149. casadi/include/casadi/core/runtime/casadi_nd_boor_dual_eval.hpp +127 -0
  150. casadi/include/casadi/core/runtime/casadi_nd_boor_eval.hpp +120 -0
  151. casadi/include/casadi/core/runtime/casadi_newton.hpp +66 -0
  152. casadi/include/casadi/core/runtime/casadi_nlp.hpp +295 -0
  153. casadi/include/casadi/core/runtime/casadi_norm_1.hpp +29 -0
  154. casadi/include/casadi/core/runtime/casadi_norm_2.hpp +24 -0
  155. casadi/include/casadi/core/runtime/casadi_norm_inf.hpp +28 -0
  156. casadi/include/casadi/core/runtime/casadi_norm_inf_mul.hpp +105 -0
  157. casadi/include/casadi/core/runtime/casadi_ocp_block.hpp +55 -0
  158. casadi/include/casadi/core/runtime/casadi_oracle.hpp +44 -0
  159. casadi/include/casadi/core/runtime/casadi_oracle_callback.hpp +39 -0
  160. casadi/include/casadi/core/runtime/casadi_polyval.hpp +29 -0
  161. casadi/include/casadi/core/runtime/casadi_print_canonical.hpp +55 -0
  162. casadi/include/casadi/core/runtime/casadi_print_scalar.hpp +25 -0
  163. casadi/include/casadi/core/runtime/casadi_print_vector.hpp +32 -0
  164. casadi/include/casadi/core/runtime/casadi_printme.hpp +26 -0
  165. casadi/include/casadi/core/runtime/casadi_project.hpp +39 -0
  166. casadi/include/casadi/core/runtime/casadi_qp.hpp +86 -0
  167. casadi/include/casadi/core/runtime/casadi_qr.hpp +272 -0
  168. casadi/include/casadi/core/runtime/casadi_qrqp.hpp +1239 -0
  169. casadi/include/casadi/core/runtime/casadi_rank1.hpp +40 -0
  170. casadi/include/casadi/core/runtime/casadi_regularize.hpp +73 -0
  171. casadi/include/casadi/core/runtime/casadi_runtime.hpp +318 -0
  172. casadi/include/casadi/core/runtime/casadi_scal.hpp +26 -0
  173. casadi/include/casadi/core/runtime/casadi_scaled_copy.hpp +31 -0
  174. casadi/include/casadi/core/runtime/casadi_sparsify.hpp +42 -0
  175. casadi/include/casadi/core/runtime/casadi_sparsity.hpp +24 -0
  176. casadi/include/casadi/core/runtime/casadi_sqpmethod.hpp +178 -0
  177. casadi/include/casadi/core/runtime/casadi_sum.hpp +31 -0
  178. casadi/include/casadi/core/runtime/casadi_sum_viol.hpp +37 -0
  179. casadi/include/casadi/core/runtime/casadi_swap.hpp +32 -0
  180. casadi/include/casadi/core/runtime/casadi_trans.hpp +35 -0
  181. casadi/include/casadi/core/runtime/casadi_tri_project.hpp +37 -0
  182. casadi/include/casadi/core/runtime/casadi_trilsolve.hpp +81 -0
  183. casadi/include/casadi/core/runtime/casadi_triusolve.hpp +81 -0
  184. casadi/include/casadi/core/runtime/casadi_vector_fmax.hpp +28 -0
  185. casadi/include/casadi/core/runtime/casadi_vector_fmin.hpp +28 -0
  186. casadi/include/casadi/core/runtime/casadi_vfmax.hpp +28 -0
  187. casadi/include/casadi/core/runtime/casadi_vfmin.hpp +28 -0
  188. casadi/include/casadi/core/runtime/shared.hpp +261 -0
  189. casadi/include/casadi/core/serializer.hpp +264 -0
  190. casadi/include/casadi/core/serializing_stream.hpp +336 -0
  191. casadi/include/casadi/core/shared_object.hpp +182 -0
  192. casadi/include/casadi/core/slice.hpp +149 -0
  193. casadi/include/casadi/core/sparsity.hpp +1507 -0
  194. casadi/include/casadi/core/sparsity_interface.hpp +763 -0
  195. casadi/include/casadi/core/submatrix.hpp +156 -0
  196. casadi/include/casadi/core/sx.hpp +244 -0
  197. casadi/include/casadi/core/sx_elem.hpp +376 -0
  198. casadi/include/casadi/core/sx_fwd.hpp +45 -0
  199. casadi/include/casadi/core/timing.hpp +98 -0
  200. casadi/include/casadi/core/tools.hpp +67 -0
  201. casadi/include/casadi/core/xml_file.hpp +93 -0
  202. casadi/include/casadi/core/xml_node.hpp +212 -0
  203. casadi/include/casadi/doc.i +62244 -0
  204. casadi/include/casadi/doc_merged.i +38499 -0
  205. casadi/include/casadi/mem.h +311 -0
  206. casadi/include/casadi/valgrind-casadi.supp +649 -0
  207. casadi/include/casadi/valgrind-python.supp +3886 -0
  208. casadi/include/coin-or/IpAlgBuilder.hpp +417 -0
  209. casadi/include/coin-or/IpAlgStrategy.hpp +201 -0
  210. casadi/include/coin-or/IpAlgTypes.hpp +64 -0
  211. casadi/include/coin-or/IpAugSystemSolver.hpp +212 -0
  212. casadi/include/coin-or/IpBlas.hpp +426 -0
  213. casadi/include/coin-or/IpCachedResults.hpp +897 -0
  214. casadi/include/coin-or/IpCompoundMatrix.hpp +423 -0
  215. casadi/include/coin-or/IpCompoundSymMatrix.hpp +348 -0
  216. casadi/include/coin-or/IpCompoundVector.hpp +395 -0
  217. casadi/include/coin-or/IpConvCheck.hpp +97 -0
  218. casadi/include/coin-or/IpDebug.hpp +167 -0
  219. casadi/include/coin-or/IpDenseVector.hpp +626 -0
  220. casadi/include/coin-or/IpDiagMatrix.hpp +158 -0
  221. casadi/include/coin-or/IpEqMultCalculator.hpp +76 -0
  222. casadi/include/coin-or/IpException.hpp +156 -0
  223. casadi/include/coin-or/IpExpansionMatrix.hpp +245 -0
  224. casadi/include/coin-or/IpGenTMatrix.hpp +290 -0
  225. casadi/include/coin-or/IpHessianUpdater.hpp +73 -0
  226. casadi/include/coin-or/IpIdentityMatrix.hpp +167 -0
  227. casadi/include/coin-or/IpIpoptAlg.hpp +257 -0
  228. casadi/include/coin-or/IpIpoptApplication.hpp +367 -0
  229. casadi/include/coin-or/IpIpoptCalculatedQuantities.hpp +1009 -0
  230. casadi/include/coin-or/IpIpoptData.hpp +966 -0
  231. casadi/include/coin-or/IpIpoptNLP.hpp +328 -0
  232. casadi/include/coin-or/IpIterateInitializer.hpp +68 -0
  233. casadi/include/coin-or/IpIteratesVector.hpp +840 -0
  234. casadi/include/coin-or/IpIterationOutput.hpp +78 -0
  235. casadi/include/coin-or/IpJournalist.hpp +573 -0
  236. casadi/include/coin-or/IpLapack.hpp +227 -0
  237. casadi/include/coin-or/IpLibraryLoader.hpp +76 -0
  238. casadi/include/coin-or/IpLineSearch.hpp +106 -0
  239. casadi/include/coin-or/IpLinearSolvers.h +46 -0
  240. casadi/include/coin-or/IpMatrix.hpp +434 -0
  241. casadi/include/coin-or/IpMuUpdate.hpp +77 -0
  242. casadi/include/coin-or/IpNLP.hpp +306 -0
  243. casadi/include/coin-or/IpNLPScaling.hpp +582 -0
  244. casadi/include/coin-or/IpObserver.hpp +422 -0
  245. casadi/include/coin-or/IpOptionsList.hpp +412 -0
  246. casadi/include/coin-or/IpOrigIpoptNLP.hpp +603 -0
  247. casadi/include/coin-or/IpPDSystemSolver.hpp +137 -0
  248. casadi/include/coin-or/IpReferenced.hpp +262 -0
  249. casadi/include/coin-or/IpRegOptions.hpp +1152 -0
  250. casadi/include/coin-or/IpReturnCodes.h +23 -0
  251. casadi/include/coin-or/IpReturnCodes.hpp +18 -0
  252. casadi/include/coin-or/IpReturnCodes.inc +71 -0
  253. casadi/include/coin-or/IpReturnCodes_inc.h +45 -0
  254. casadi/include/coin-or/IpScaledMatrix.hpp +291 -0
  255. casadi/include/coin-or/IpSearchDirCalculator.hpp +72 -0
  256. casadi/include/coin-or/IpSmartPtr.hpp +865 -0
  257. casadi/include/coin-or/IpSolveStatistics.hpp +210 -0
  258. casadi/include/coin-or/IpSparseSymLinearSolverInterface.hpp +260 -0
  259. casadi/include/coin-or/IpStdAugSystemSolver.cpp +555 -0
  260. casadi/include/coin-or/IpStdCInterface.h +428 -0
  261. casadi/include/coin-or/IpSumSymMatrix.hpp +186 -0
  262. casadi/include/coin-or/IpSymLinearSolver.hpp +141 -0
  263. casadi/include/coin-or/IpSymMatrix.hpp +167 -0
  264. casadi/include/coin-or/IpSymScaledMatrix.hpp +255 -0
  265. casadi/include/coin-or/IpSymTMatrix.hpp +275 -0
  266. casadi/include/coin-or/IpTNLP.hpp +820 -0
  267. casadi/include/coin-or/IpTNLPAdapter.hpp +648 -0
  268. casadi/include/coin-or/IpTNLPReducer.hpp +274 -0
  269. casadi/include/coin-or/IpTaggedObject.hpp +128 -0
  270. casadi/include/coin-or/IpTimedTask.hpp +218 -0
  271. casadi/include/coin-or/IpTimingStatistics.hpp +323 -0
  272. casadi/include/coin-or/IpTripletHelper.hpp +308 -0
  273. casadi/include/coin-or/IpTypes.h +81 -0
  274. casadi/include/coin-or/IpTypes.hpp +30 -0
  275. casadi/include/coin-or/IpUtils.hpp +166 -0
  276. casadi/include/coin-or/IpVector.hpp +892 -0
  277. casadi/include/coin-or/IpZeroSymMatrix.hpp +155 -0
  278. casadi/include/coin-or/IpoptConfig.h +45 -0
  279. casadi/include/coin-or/SensAlgorithm.hpp +114 -0
  280. casadi/include/coin-or/SensApplication.hpp +188 -0
  281. casadi/include/coin-or/SensBacksolver.hpp +36 -0
  282. casadi/include/coin-or/SensMeasurement.hpp +56 -0
  283. casadi/include/coin-or/SensPCalculator.hpp +137 -0
  284. casadi/include/coin-or/SensRegOp.hpp +21 -0
  285. casadi/include/coin-or/SensSchurData.hpp +182 -0
  286. casadi/include/coin-or/SensSchurDriver.hpp +118 -0
  287. casadi/include/coin-or/SensSimpleBacksolver.hpp +49 -0
  288. casadi/include/coin-or/SensStepCalc.hpp +85 -0
  289. casadi/include/coin-or/SensUtils.hpp +63 -0
  290. casadi/include/coin-or/metis/defs.h +161 -0
  291. casadi/include/coin-or/metis/macros.h +143 -0
  292. casadi/include/coin-or/metis/metis.h +37 -0
  293. casadi/include/coin-or/metis/proto.h +505 -0
  294. casadi/include/coin-or/metis/rename.h +418 -0
  295. casadi/include/coin-or/metis/struct.h +251 -0
  296. casadi/include/coin-or/mumps/dmumps_c.h +142 -0
  297. casadi/include/coin-or/mumps/mumps_c_types.h +72 -0
  298. casadi/include/coin-or/mumps/mumps_compat.h +27 -0
  299. casadi/include/coin-or/mumps/mumps_int_def.h +11 -0
  300. casadi/include/coin-or/mumps/mumps_mpi.h +67 -0
  301. casadi/include/daqp/api.h +46 -0
  302. casadi/include/daqp/auxiliary.h +29 -0
  303. casadi/include/daqp/bnb.h +32 -0
  304. casadi/include/daqp/codegen.h +18 -0
  305. casadi/include/daqp/constants.h +92 -0
  306. casadi/include/daqp/daqp.h +22 -0
  307. casadi/include/daqp/daqp_prox.h +18 -0
  308. casadi/include/daqp/factorization.h +18 -0
  309. casadi/include/daqp/types.h +161 -0
  310. casadi/include/daqp/utils.h +44 -0
  311. casadi/include/eigen3/Eigen/Cholesky +45 -0
  312. casadi/include/eigen3/Eigen/CholmodSupport +48 -0
  313. casadi/include/eigen3/Eigen/Core +384 -0
  314. casadi/include/eigen3/Eigen/Dense +7 -0
  315. casadi/include/eigen3/Eigen/Eigen +2 -0
  316. casadi/include/eigen3/Eigen/Eigenvalues +60 -0
  317. casadi/include/eigen3/Eigen/Geometry +59 -0
  318. casadi/include/eigen3/Eigen/Householder +29 -0
  319. casadi/include/eigen3/Eigen/IterativeLinearSolvers +48 -0
  320. casadi/include/eigen3/Eigen/Jacobi +32 -0
  321. casadi/include/eigen3/Eigen/KLUSupport +41 -0
  322. casadi/include/eigen3/Eigen/LU +47 -0
  323. casadi/include/eigen3/Eigen/MetisSupport +35 -0
  324. casadi/include/eigen3/Eigen/OrderingMethods +70 -0
  325. casadi/include/eigen3/Eigen/PaStiXSupport +49 -0
  326. casadi/include/eigen3/Eigen/PardisoSupport +35 -0
  327. casadi/include/eigen3/Eigen/QR +50 -0
  328. casadi/include/eigen3/Eigen/QtAlignedMalloc +39 -0
  329. casadi/include/eigen3/Eigen/SPQRSupport +34 -0
  330. casadi/include/eigen3/Eigen/SVD +50 -0
  331. casadi/include/eigen3/Eigen/Sparse +34 -0
  332. casadi/include/eigen3/Eigen/SparseCholesky +37 -0
  333. casadi/include/eigen3/Eigen/SparseCore +69 -0
  334. casadi/include/eigen3/Eigen/SparseLU +50 -0
  335. casadi/include/eigen3/Eigen/SparseQR +36 -0
  336. casadi/include/eigen3/Eigen/StdDeque +27 -0
  337. casadi/include/eigen3/Eigen/StdList +26 -0
  338. casadi/include/eigen3/Eigen/StdVector +27 -0
  339. casadi/include/eigen3/Eigen/SuperLUSupport +64 -0
  340. casadi/include/eigen3/Eigen/UmfPackSupport +40 -0
  341. casadi/include/eigen3/Eigen/src/Cholesky/LDLT.h +688 -0
  342. casadi/include/eigen3/Eigen/src/Cholesky/LLT.h +558 -0
  343. casadi/include/eigen3/Eigen/src/Cholesky/LLT_LAPACKE.h +99 -0
  344. casadi/include/eigen3/Eigen/src/CholmodSupport/CholmodSupport.h +682 -0
  345. casadi/include/eigen3/Eigen/src/Core/ArithmeticSequence.h +413 -0
  346. casadi/include/eigen3/Eigen/src/Core/Array.h +417 -0
  347. casadi/include/eigen3/Eigen/src/Core/ArrayBase.h +226 -0
  348. casadi/include/eigen3/Eigen/src/Core/ArrayWrapper.h +209 -0
  349. casadi/include/eigen3/Eigen/src/Core/Assign.h +90 -0
  350. casadi/include/eigen3/Eigen/src/Core/AssignEvaluator.h +1010 -0
  351. casadi/include/eigen3/Eigen/src/Core/Assign_MKL.h +178 -0
  352. casadi/include/eigen3/Eigen/src/Core/BandMatrix.h +353 -0
  353. casadi/include/eigen3/Eigen/src/Core/Block.h +448 -0
  354. casadi/include/eigen3/Eigen/src/Core/BooleanRedux.h +162 -0
  355. casadi/include/eigen3/Eigen/src/Core/CommaInitializer.h +164 -0
  356. casadi/include/eigen3/Eigen/src/Core/ConditionEstimator.h +175 -0
  357. casadi/include/eigen3/Eigen/src/Core/CoreEvaluators.h +1741 -0
  358. casadi/include/eigen3/Eigen/src/Core/CoreIterators.h +132 -0
  359. casadi/include/eigen3/Eigen/src/Core/CwiseBinaryOp.h +183 -0
  360. casadi/include/eigen3/Eigen/src/Core/CwiseNullaryOp.h +1001 -0
  361. casadi/include/eigen3/Eigen/src/Core/CwiseTernaryOp.h +197 -0
  362. casadi/include/eigen3/Eigen/src/Core/CwiseUnaryOp.h +103 -0
  363. casadi/include/eigen3/Eigen/src/Core/CwiseUnaryView.h +132 -0
  364. casadi/include/eigen3/Eigen/src/Core/DenseBase.h +701 -0
  365. casadi/include/eigen3/Eigen/src/Core/DenseCoeffsBase.h +685 -0
  366. casadi/include/eigen3/Eigen/src/Core/DenseStorage.h +652 -0
  367. casadi/include/eigen3/Eigen/src/Core/Diagonal.h +258 -0
  368. casadi/include/eigen3/Eigen/src/Core/DiagonalMatrix.h +391 -0
  369. casadi/include/eigen3/Eigen/src/Core/DiagonalProduct.h +28 -0
  370. casadi/include/eigen3/Eigen/src/Core/Dot.h +318 -0
  371. casadi/include/eigen3/Eigen/src/Core/EigenBase.h +160 -0
  372. casadi/include/eigen3/Eigen/src/Core/ForceAlignedAccess.h +150 -0
  373. casadi/include/eigen3/Eigen/src/Core/Fuzzy.h +155 -0
  374. casadi/include/eigen3/Eigen/src/Core/GeneralProduct.h +465 -0
  375. casadi/include/eigen3/Eigen/src/Core/GenericPacketMath.h +1040 -0
  376. casadi/include/eigen3/Eigen/src/Core/GlobalFunctions.h +194 -0
  377. casadi/include/eigen3/Eigen/src/Core/IO.h +258 -0
  378. casadi/include/eigen3/Eigen/src/Core/IndexedView.h +237 -0
  379. casadi/include/eigen3/Eigen/src/Core/Inverse.h +117 -0
  380. casadi/include/eigen3/Eigen/src/Core/Map.h +171 -0
  381. casadi/include/eigen3/Eigen/src/Core/MapBase.h +310 -0
  382. casadi/include/eigen3/Eigen/src/Core/MathFunctions.h +2057 -0
  383. casadi/include/eigen3/Eigen/src/Core/MathFunctionsImpl.h +200 -0
  384. casadi/include/eigen3/Eigen/src/Core/Matrix.h +565 -0
  385. casadi/include/eigen3/Eigen/src/Core/MatrixBase.h +547 -0
  386. casadi/include/eigen3/Eigen/src/Core/NestByValue.h +85 -0
  387. casadi/include/eigen3/Eigen/src/Core/NoAlias.h +109 -0
  388. casadi/include/eigen3/Eigen/src/Core/NumTraits.h +335 -0
  389. casadi/include/eigen3/Eigen/src/Core/PartialReduxEvaluator.h +232 -0
  390. casadi/include/eigen3/Eigen/src/Core/PermutationMatrix.h +605 -0
  391. casadi/include/eigen3/Eigen/src/Core/PlainObjectBase.h +1128 -0
  392. casadi/include/eigen3/Eigen/src/Core/Product.h +191 -0
  393. casadi/include/eigen3/Eigen/src/Core/ProductEvaluators.h +1179 -0
  394. casadi/include/eigen3/Eigen/src/Core/Random.h +218 -0
  395. casadi/include/eigen3/Eigen/src/Core/Redux.h +515 -0
  396. casadi/include/eigen3/Eigen/src/Core/Ref.h +381 -0
  397. casadi/include/eigen3/Eigen/src/Core/Replicate.h +142 -0
  398. casadi/include/eigen3/Eigen/src/Core/Reshaped.h +454 -0
  399. casadi/include/eigen3/Eigen/src/Core/ReturnByValue.h +119 -0
  400. casadi/include/eigen3/Eigen/src/Core/Reverse.h +217 -0
  401. casadi/include/eigen3/Eigen/src/Core/Select.h +164 -0
  402. casadi/include/eigen3/Eigen/src/Core/SelfAdjointView.h +365 -0
  403. casadi/include/eigen3/Eigen/src/Core/SelfCwiseBinaryOp.h +47 -0
  404. casadi/include/eigen3/Eigen/src/Core/Solve.h +188 -0
  405. casadi/include/eigen3/Eigen/src/Core/SolveTriangular.h +235 -0
  406. casadi/include/eigen3/Eigen/src/Core/SolverBase.h +168 -0
  407. casadi/include/eigen3/Eigen/src/Core/StableNorm.h +251 -0
  408. casadi/include/eigen3/Eigen/src/Core/StlIterators.h +463 -0
  409. casadi/include/eigen3/Eigen/src/Core/Stride.h +116 -0
  410. casadi/include/eigen3/Eigen/src/Core/Swap.h +68 -0
  411. casadi/include/eigen3/Eigen/src/Core/Transpose.h +464 -0
  412. casadi/include/eigen3/Eigen/src/Core/Transpositions.h +386 -0
  413. casadi/include/eigen3/Eigen/src/Core/TriangularMatrix.h +1001 -0
  414. casadi/include/eigen3/Eigen/src/Core/VectorBlock.h +96 -0
  415. casadi/include/eigen3/Eigen/src/Core/VectorwiseOp.h +784 -0
  416. casadi/include/eigen3/Eigen/src/Core/Visitor.h +381 -0
  417. casadi/include/eigen3/Eigen/src/Core/arch/AVX/Complex.h +372 -0
  418. casadi/include/eigen3/Eigen/src/Core/arch/AVX/MathFunctions.h +228 -0
  419. casadi/include/eigen3/Eigen/src/Core/arch/AVX/PacketMath.h +1574 -0
  420. casadi/include/eigen3/Eigen/src/Core/arch/AVX/TypeCasting.h +115 -0
  421. casadi/include/eigen3/Eigen/src/Core/arch/AVX512/Complex.h +422 -0
  422. casadi/include/eigen3/Eigen/src/Core/arch/AVX512/MathFunctions.h +362 -0
  423. casadi/include/eigen3/Eigen/src/Core/arch/AVX512/PacketMath.h +2303 -0
  424. casadi/include/eigen3/Eigen/src/Core/arch/AVX512/TypeCasting.h +89 -0
  425. casadi/include/eigen3/Eigen/src/Core/arch/AltiVec/Complex.h +417 -0
  426. casadi/include/eigen3/Eigen/src/Core/arch/AltiVec/MathFunctions.h +90 -0
  427. casadi/include/eigen3/Eigen/src/Core/arch/AltiVec/MatrixProduct.h +2937 -0
  428. casadi/include/eigen3/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h +221 -0
  429. casadi/include/eigen3/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h +629 -0
  430. casadi/include/eigen3/Eigen/src/Core/arch/AltiVec/PacketMath.h +2711 -0
  431. casadi/include/eigen3/Eigen/src/Core/arch/CUDA/Complex.h +258 -0
  432. casadi/include/eigen3/Eigen/src/Core/arch/Default/BFloat16.h +700 -0
  433. casadi/include/eigen3/Eigen/src/Core/arch/Default/ConjHelper.h +117 -0
  434. casadi/include/eigen3/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +1649 -0
  435. casadi/include/eigen3/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +110 -0
  436. casadi/include/eigen3/Eigen/src/Core/arch/Default/Half.h +942 -0
  437. casadi/include/eigen3/Eigen/src/Core/arch/Default/Settings.h +49 -0
  438. casadi/include/eigen3/Eigen/src/Core/arch/Default/TypeCasting.h +120 -0
  439. casadi/include/eigen3/Eigen/src/Core/arch/GPU/MathFunctions.h +103 -0
  440. casadi/include/eigen3/Eigen/src/Core/arch/GPU/PacketMath.h +1685 -0
  441. casadi/include/eigen3/Eigen/src/Core/arch/GPU/TypeCasting.h +80 -0
  442. casadi/include/eigen3/Eigen/src/Core/arch/HIP/hcc/math_constants.h +23 -0
  443. casadi/include/eigen3/Eigen/src/Core/arch/MSA/Complex.h +648 -0
  444. casadi/include/eigen3/Eigen/src/Core/arch/MSA/MathFunctions.h +387 -0
  445. casadi/include/eigen3/Eigen/src/Core/arch/MSA/PacketMath.h +1233 -0
  446. casadi/include/eigen3/Eigen/src/Core/arch/NEON/Complex.h +584 -0
  447. casadi/include/eigen3/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h +183 -0
  448. casadi/include/eigen3/Eigen/src/Core/arch/NEON/MathFunctions.h +75 -0
  449. casadi/include/eigen3/Eigen/src/Core/arch/NEON/PacketMath.h +4587 -0
  450. casadi/include/eigen3/Eigen/src/Core/arch/NEON/TypeCasting.h +1419 -0
  451. casadi/include/eigen3/Eigen/src/Core/arch/SSE/Complex.h +351 -0
  452. casadi/include/eigen3/Eigen/src/Core/arch/SSE/MathFunctions.h +199 -0
  453. casadi/include/eigen3/Eigen/src/Core/arch/SSE/PacketMath.h +1505 -0
  454. casadi/include/eigen3/Eigen/src/Core/arch/SSE/TypeCasting.h +142 -0
  455. casadi/include/eigen3/Eigen/src/Core/arch/SVE/MathFunctions.h +44 -0
  456. casadi/include/eigen3/Eigen/src/Core/arch/SVE/PacketMath.h +752 -0
  457. casadi/include/eigen3/Eigen/src/Core/arch/SVE/TypeCasting.h +49 -0
  458. casadi/include/eigen3/Eigen/src/Core/arch/SYCL/InteropHeaders.h +232 -0
  459. casadi/include/eigen3/Eigen/src/Core/arch/SYCL/MathFunctions.h +301 -0
  460. casadi/include/eigen3/Eigen/src/Core/arch/SYCL/PacketMath.h +670 -0
  461. casadi/include/eigen3/Eigen/src/Core/arch/SYCL/SyclMemoryModel.h +694 -0
  462. casadi/include/eigen3/Eigen/src/Core/arch/SYCL/TypeCasting.h +85 -0
  463. casadi/include/eigen3/Eigen/src/Core/arch/ZVector/Complex.h +426 -0
  464. casadi/include/eigen3/Eigen/src/Core/arch/ZVector/MathFunctions.h +233 -0
  465. casadi/include/eigen3/Eigen/src/Core/arch/ZVector/PacketMath.h +1060 -0
  466. casadi/include/eigen3/Eigen/src/Core/functors/AssignmentFunctors.h +177 -0
  467. casadi/include/eigen3/Eigen/src/Core/functors/BinaryFunctors.h +541 -0
  468. casadi/include/eigen3/Eigen/src/Core/functors/NullaryFunctors.h +189 -0
  469. casadi/include/eigen3/Eigen/src/Core/functors/StlFunctors.h +166 -0
  470. casadi/include/eigen3/Eigen/src/Core/functors/TernaryFunctors.h +25 -0
  471. casadi/include/eigen3/Eigen/src/Core/functors/UnaryFunctors.h +1131 -0
  472. casadi/include/eigen3/Eigen/src/Core/products/GeneralBlockPanelKernel.h +2645 -0
  473. casadi/include/eigen3/Eigen/src/Core/products/GeneralMatrixMatrix.h +517 -0
  474. casadi/include/eigen3/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +317 -0
  475. casadi/include/eigen3/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +145 -0
  476. casadi/include/eigen3/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h +124 -0
  477. casadi/include/eigen3/Eigen/src/Core/products/GeneralMatrixVector.h +518 -0
  478. casadi/include/eigen3/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h +136 -0
  479. casadi/include/eigen3/Eigen/src/Core/products/Parallelizer.h +180 -0
  480. casadi/include/eigen3/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +544 -0
  481. casadi/include/eigen3/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h +295 -0
  482. casadi/include/eigen3/Eigen/src/Core/products/SelfadjointMatrixVector.h +262 -0
  483. casadi/include/eigen3/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h +118 -0
  484. casadi/include/eigen3/Eigen/src/Core/products/SelfadjointProduct.h +133 -0
  485. casadi/include/eigen3/Eigen/src/Core/products/SelfadjointRank2Update.h +94 -0
  486. casadi/include/eigen3/Eigen/src/Core/products/TriangularMatrixMatrix.h +472 -0
  487. casadi/include/eigen3/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h +317 -0
  488. casadi/include/eigen3/Eigen/src/Core/products/TriangularMatrixVector.h +350 -0
  489. casadi/include/eigen3/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h +255 -0
  490. casadi/include/eigen3/Eigen/src/Core/products/TriangularSolverMatrix.h +337 -0
  491. casadi/include/eigen3/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h +167 -0
  492. casadi/include/eigen3/Eigen/src/Core/products/TriangularSolverVector.h +148 -0
  493. casadi/include/eigen3/Eigen/src/Core/util/BlasUtil.h +583 -0
  494. casadi/include/eigen3/Eigen/src/Core/util/ConfigureVectorization.h +512 -0
  495. casadi/include/eigen3/Eigen/src/Core/util/Constants.h +563 -0
  496. casadi/include/eigen3/Eigen/src/Core/util/DisableStupidWarnings.h +106 -0
  497. casadi/include/eigen3/Eigen/src/Core/util/ForwardDeclarations.h +322 -0
  498. casadi/include/eigen3/Eigen/src/Core/util/IndexedViewHelper.h +186 -0
  499. casadi/include/eigen3/Eigen/src/Core/util/IntegralConstant.h +272 -0
  500. casadi/include/eigen3/Eigen/src/Core/util/MKL_support.h +137 -0
  501. casadi/include/eigen3/Eigen/src/Core/util/Macros.h +1464 -0
  502. casadi/include/eigen3/Eigen/src/Core/util/Memory.h +1163 -0
  503. casadi/include/eigen3/Eigen/src/Core/util/Meta.h +812 -0
  504. casadi/include/eigen3/Eigen/src/Core/util/NonMPL2.h +3 -0
  505. casadi/include/eigen3/Eigen/src/Core/util/ReenableStupidWarnings.h +31 -0
  506. casadi/include/eigen3/Eigen/src/Core/util/ReshapedHelper.h +51 -0
  507. casadi/include/eigen3/Eigen/src/Core/util/StaticAssert.h +221 -0
  508. casadi/include/eigen3/Eigen/src/Core/util/SymbolicIndex.h +293 -0
  509. casadi/include/eigen3/Eigen/src/Core/util/XprHelper.h +856 -0
  510. casadi/include/eigen3/Eigen/src/Eigenvalues/ComplexEigenSolver.h +346 -0
  511. casadi/include/eigen3/Eigen/src/Eigenvalues/ComplexSchur.h +462 -0
  512. casadi/include/eigen3/Eigen/src/Eigenvalues/ComplexSchur_LAPACKE.h +91 -0
  513. casadi/include/eigen3/Eigen/src/Eigenvalues/EigenSolver.h +622 -0
  514. casadi/include/eigen3/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h +418 -0
  515. casadi/include/eigen3/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +226 -0
  516. casadi/include/eigen3/Eigen/src/Eigenvalues/HessenbergDecomposition.h +374 -0
  517. casadi/include/eigen3/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +158 -0
  518. casadi/include/eigen3/Eigen/src/Eigenvalues/RealQZ.h +657 -0
  519. casadi/include/eigen3/Eigen/src/Eigenvalues/RealSchur.h +558 -0
  520. casadi/include/eigen3/Eigen/src/Eigenvalues/RealSchur_LAPACKE.h +77 -0
  521. casadi/include/eigen3/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +904 -0
  522. casadi/include/eigen3/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h +87 -0
  523. casadi/include/eigen3/Eigen/src/Eigenvalues/Tridiagonalization.h +561 -0
  524. casadi/include/eigen3/Eigen/src/Geometry/AlignedBox.h +486 -0
  525. casadi/include/eigen3/Eigen/src/Geometry/AngleAxis.h +247 -0
  526. casadi/include/eigen3/Eigen/src/Geometry/EulerAngles.h +114 -0
  527. casadi/include/eigen3/Eigen/src/Geometry/Homogeneous.h +501 -0
  528. casadi/include/eigen3/Eigen/src/Geometry/Hyperplane.h +282 -0
  529. casadi/include/eigen3/Eigen/src/Geometry/OrthoMethods.h +235 -0
  530. casadi/include/eigen3/Eigen/src/Geometry/ParametrizedLine.h +232 -0
  531. casadi/include/eigen3/Eigen/src/Geometry/Quaternion.h +870 -0
  532. casadi/include/eigen3/Eigen/src/Geometry/Rotation2D.h +199 -0
  533. casadi/include/eigen3/Eigen/src/Geometry/RotationBase.h +206 -0
  534. casadi/include/eigen3/Eigen/src/Geometry/Scaling.h +188 -0
  535. casadi/include/eigen3/Eigen/src/Geometry/Transform.h +1563 -0
  536. casadi/include/eigen3/Eigen/src/Geometry/Translation.h +202 -0
  537. casadi/include/eigen3/Eigen/src/Geometry/Umeyama.h +166 -0
  538. casadi/include/eigen3/Eigen/src/Geometry/arch/Geometry_SIMD.h +168 -0
  539. casadi/include/eigen3/Eigen/src/Householder/BlockHouseholder.h +110 -0
  540. casadi/include/eigen3/Eigen/src/Householder/Householder.h +176 -0
  541. casadi/include/eigen3/Eigen/src/Householder/HouseholderSequence.h +545 -0
  542. casadi/include/eigen3/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +226 -0
  543. casadi/include/eigen3/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +212 -0
  544. casadi/include/eigen3/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +229 -0
  545. casadi/include/eigen3/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +394 -0
  546. casadi/include/eigen3/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +453 -0
  547. casadi/include/eigen3/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +444 -0
  548. casadi/include/eigen3/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +198 -0
  549. casadi/include/eigen3/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +117 -0
  550. casadi/include/eigen3/Eigen/src/Jacobi/Jacobi.h +483 -0
  551. casadi/include/eigen3/Eigen/src/KLUSupport/KLUSupport.h +358 -0
  552. casadi/include/eigen3/Eigen/src/LU/Determinant.h +117 -0
  553. casadi/include/eigen3/Eigen/src/LU/FullPivLU.h +877 -0
  554. casadi/include/eigen3/Eigen/src/LU/InverseImpl.h +432 -0
  555. casadi/include/eigen3/Eigen/src/LU/PartialPivLU.h +624 -0
  556. casadi/include/eigen3/Eigen/src/LU/PartialPivLU_LAPACKE.h +83 -0
  557. casadi/include/eigen3/Eigen/src/LU/arch/InverseSize4.h +351 -0
  558. casadi/include/eigen3/Eigen/src/MetisSupport/MetisSupport.h +137 -0
  559. casadi/include/eigen3/Eigen/src/OrderingMethods/Amd.h +435 -0
  560. casadi/include/eigen3/Eigen/src/OrderingMethods/Eigen_Colamd.h +1863 -0
  561. casadi/include/eigen3/Eigen/src/OrderingMethods/Ordering.h +153 -0
  562. casadi/include/eigen3/Eigen/src/PaStiXSupport/PaStiXSupport.h +678 -0
  563. casadi/include/eigen3/Eigen/src/PardisoSupport/PardisoSupport.h +545 -0
  564. casadi/include/eigen3/Eigen/src/QR/ColPivHouseholderQR.h +674 -0
  565. casadi/include/eigen3/Eigen/src/QR/ColPivHouseholderQR_LAPACKE.h +97 -0
  566. casadi/include/eigen3/Eigen/src/QR/CompleteOrthogonalDecomposition.h +635 -0
  567. casadi/include/eigen3/Eigen/src/QR/FullPivHouseholderQR.h +713 -0
  568. casadi/include/eigen3/Eigen/src/QR/HouseholderQR.h +434 -0
  569. casadi/include/eigen3/Eigen/src/QR/HouseholderQR_LAPACKE.h +68 -0
  570. casadi/include/eigen3/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +335 -0
  571. casadi/include/eigen3/Eigen/src/SVD/BDCSVD.h +1366 -0
  572. casadi/include/eigen3/Eigen/src/SVD/JacobiSVD.h +812 -0
  573. casadi/include/eigen3/Eigen/src/SVD/JacobiSVD_LAPACKE.h +91 -0
  574. casadi/include/eigen3/Eigen/src/SVD/SVDBase.h +376 -0
  575. casadi/include/eigen3/Eigen/src/SVD/UpperBidiagonalization.h +414 -0
  576. casadi/include/eigen3/Eigen/src/SparseCholesky/SimplicialCholesky.h +697 -0
  577. casadi/include/eigen3/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +174 -0
  578. casadi/include/eigen3/Eigen/src/SparseCore/AmbiVector.h +378 -0
  579. casadi/include/eigen3/Eigen/src/SparseCore/CompressedStorage.h +274 -0
  580. casadi/include/eigen3/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +352 -0
  581. casadi/include/eigen3/Eigen/src/SparseCore/MappedSparseMatrix.h +67 -0
  582. casadi/include/eigen3/Eigen/src/SparseCore/SparseAssign.h +270 -0
  583. casadi/include/eigen3/Eigen/src/SparseCore/SparseBlock.h +571 -0
  584. casadi/include/eigen3/Eigen/src/SparseCore/SparseColEtree.h +206 -0
  585. casadi/include/eigen3/Eigen/src/SparseCore/SparseCompressedBase.h +370 -0
  586. casadi/include/eigen3/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +722 -0
  587. casadi/include/eigen3/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +150 -0
  588. casadi/include/eigen3/Eigen/src/SparseCore/SparseDenseProduct.h +342 -0
  589. casadi/include/eigen3/Eigen/src/SparseCore/SparseDiagonalProduct.h +138 -0
  590. casadi/include/eigen3/Eigen/src/SparseCore/SparseDot.h +98 -0
  591. casadi/include/eigen3/Eigen/src/SparseCore/SparseFuzzy.h +29 -0
  592. casadi/include/eigen3/Eigen/src/SparseCore/SparseMap.h +305 -0
  593. casadi/include/eigen3/Eigen/src/SparseCore/SparseMatrix.h +1518 -0
  594. casadi/include/eigen3/Eigen/src/SparseCore/SparseMatrixBase.h +398 -0
  595. casadi/include/eigen3/Eigen/src/SparseCore/SparsePermutation.h +178 -0
  596. casadi/include/eigen3/Eigen/src/SparseCore/SparseProduct.h +181 -0
  597. casadi/include/eigen3/Eigen/src/SparseCore/SparseRedux.h +49 -0
  598. casadi/include/eigen3/Eigen/src/SparseCore/SparseRef.h +397 -0
  599. casadi/include/eigen3/Eigen/src/SparseCore/SparseSelfAdjointView.h +659 -0
  600. casadi/include/eigen3/Eigen/src/SparseCore/SparseSolverBase.h +124 -0
  601. casadi/include/eigen3/Eigen/src/SparseCore/SparseSparseProductWithPruning.h +198 -0
  602. casadi/include/eigen3/Eigen/src/SparseCore/SparseTranspose.h +92 -0
  603. casadi/include/eigen3/Eigen/src/SparseCore/SparseTriangularView.h +189 -0
  604. casadi/include/eigen3/Eigen/src/SparseCore/SparseUtil.h +186 -0
  605. casadi/include/eigen3/Eigen/src/SparseCore/SparseVector.h +478 -0
  606. casadi/include/eigen3/Eigen/src/SparseCore/SparseView.h +254 -0
  607. casadi/include/eigen3/Eigen/src/SparseCore/TriangularSolver.h +315 -0
  608. casadi/include/eigen3/Eigen/src/SparseLU/SparseLU.h +923 -0
  609. casadi/include/eigen3/Eigen/src/SparseLU/SparseLUImpl.h +66 -0
  610. casadi/include/eigen3/Eigen/src/SparseLU/SparseLU_Memory.h +226 -0
  611. casadi/include/eigen3/Eigen/src/SparseLU/SparseLU_Structs.h +110 -0
  612. casadi/include/eigen3/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +375 -0
  613. casadi/include/eigen3/Eigen/src/SparseLU/SparseLU_Utils.h +80 -0
  614. casadi/include/eigen3/Eigen/src/SparseLU/SparseLU_column_bmod.h +181 -0
  615. casadi/include/eigen3/Eigen/src/SparseLU/SparseLU_column_dfs.h +179 -0
  616. casadi/include/eigen3/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h +107 -0
  617. casadi/include/eigen3/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +280 -0
  618. casadi/include/eigen3/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h +126 -0
  619. casadi/include/eigen3/Eigen/src/SparseLU/SparseLU_kernel_bmod.h +130 -0
  620. casadi/include/eigen3/Eigen/src/SparseLU/SparseLU_panel_bmod.h +223 -0
  621. casadi/include/eigen3/Eigen/src/SparseLU/SparseLU_panel_dfs.h +258 -0
  622. casadi/include/eigen3/Eigen/src/SparseLU/SparseLU_pivotL.h +137 -0
  623. casadi/include/eigen3/Eigen/src/SparseLU/SparseLU_pruneL.h +136 -0
  624. casadi/include/eigen3/Eigen/src/SparseLU/SparseLU_relax_snode.h +83 -0
  625. casadi/include/eigen3/Eigen/src/SparseQR/SparseQR.h +758 -0
  626. casadi/include/eigen3/Eigen/src/StlSupport/StdDeque.h +116 -0
  627. casadi/include/eigen3/Eigen/src/StlSupport/StdList.h +106 -0
  628. casadi/include/eigen3/Eigen/src/StlSupport/StdVector.h +131 -0
  629. casadi/include/eigen3/Eigen/src/StlSupport/details.h +84 -0
  630. casadi/include/eigen3/Eigen/src/SuperLUSupport/SuperLUSupport.h +1025 -0
  631. casadi/include/eigen3/Eigen/src/UmfPackSupport/UmfPackSupport.h +642 -0
  632. casadi/include/eigen3/Eigen/src/misc/Image.h +82 -0
  633. casadi/include/eigen3/Eigen/src/misc/Kernel.h +79 -0
  634. casadi/include/eigen3/Eigen/src/misc/RealSvd2x2.h +55 -0
  635. casadi/include/eigen3/Eigen/src/misc/blas.h +440 -0
  636. casadi/include/eigen3/Eigen/src/misc/lapack.h +152 -0
  637. casadi/include/eigen3/Eigen/src/misc/lapacke.h +16292 -0
  638. casadi/include/eigen3/Eigen/src/misc/lapacke_mangling.h +17 -0
  639. casadi/include/eigen3/Eigen/src/plugins/ArrayCwiseBinaryOps.h +358 -0
  640. casadi/include/eigen3/Eigen/src/plugins/ArrayCwiseUnaryOps.h +696 -0
  641. casadi/include/eigen3/Eigen/src/plugins/BlockMethods.h +1442 -0
  642. casadi/include/eigen3/Eigen/src/plugins/CommonCwiseBinaryOps.h +115 -0
  643. casadi/include/eigen3/Eigen/src/plugins/CommonCwiseUnaryOps.h +177 -0
  644. casadi/include/eigen3/Eigen/src/plugins/IndexedViewMethods.h +262 -0
  645. casadi/include/eigen3/Eigen/src/plugins/MatrixCwiseBinaryOps.h +152 -0
  646. casadi/include/eigen3/Eigen/src/plugins/MatrixCwiseUnaryOps.h +95 -0
  647. casadi/include/eigen3/Eigen/src/plugins/ReshapedMethods.h +149 -0
  648. casadi/include/eigen3/signature_of_eigen3_matrix_library +1 -0
  649. casadi/include/eigen3/unsupported/Eigen/AdolcForward +159 -0
  650. casadi/include/eigen3/unsupported/Eigen/AlignedVector3 +234 -0
  651. casadi/include/eigen3/unsupported/Eigen/ArpackSupport +30 -0
  652. casadi/include/eigen3/unsupported/Eigen/AutoDiff +46 -0
  653. casadi/include/eigen3/unsupported/Eigen/BVH +95 -0
  654. casadi/include/eigen3/unsupported/Eigen/CXX11/Tensor +137 -0
  655. casadi/include/eigen3/unsupported/Eigen/CXX11/TensorSymmetry +42 -0
  656. casadi/include/eigen3/unsupported/Eigen/CXX11/ThreadPool +74 -0
  657. casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/Tensor.h +554 -0
  658. casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorArgMax.h +329 -0
  659. casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h +247 -0
  660. casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h +1176 -0
  661. casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h +1559 -0
  662. casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h +1093 -0
  663. casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h +518 -0
  664. casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h +377 -0
  665. casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h +1023 -0
  666. casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorContractionBlocking.h +73 -0
  667. casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorContractionCuda.h +6 -0
  668. casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorContractionGpu.h +1413 -0
  669. casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorContractionMapper.h +575 -0
  670. casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorContractionSycl.h +1650 -0
  671. casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h +1679 -0
  672. casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h +456 -0
  673. casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h +1132 -0
  674. casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorConvolutionSycl.h +544 -0
  675. casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorCostModel.h +214 -0
  676. casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h +347 -0
  677. casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h +137 -0
  678. casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h +6 -0
  679. casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h +104 -0
  680. casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceGpu.h +389 -0
  681. casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h +1048 -0
  682. casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h +409 -0
  683. casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDimensionList.h +236 -0
  684. casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h +490 -0
  685. casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h +236 -0
  686. casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h +983 -0
  687. casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h +703 -0
  688. casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h +388 -0
  689. casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h +669 -0
  690. casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorFixedSize.h +379 -0
  691. casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h +237 -0
  692. casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h +191 -0
  693. casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h +488 -0
  694. casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h +302 -0
  695. casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorGlobalFunctions.h +33 -0
  696. casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorGpuHipCudaDefines.h +99 -0
  697. casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorGpuHipCudaUndefines.h +44 -0
  698. casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h +79 -0
  699. casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h +603 -0
  700. casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h +738 -0
  701. casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorInflation.h +247 -0
  702. casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorInitializer.h +82 -0
  703. casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h +263 -0
  704. casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorLayoutSwap.h +216 -0
  705. casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorMacros.h +98 -0
  706. casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h +327 -0
  707. casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h +311 -0
  708. casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h +1102 -0
  709. casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h +708 -0
  710. casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorPatch.h +291 -0
  711. casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorRandom.h +322 -0
  712. casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h +998 -0
  713. casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h +6 -0
  714. casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorReductionGpu.h +966 -0
  715. casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorReductionSycl.h +582 -0
  716. casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorRef.h +454 -0
  717. casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h +465 -0
  718. casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h +528 -0
  719. casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorScanSycl.h +513 -0
  720. casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h +471 -0
  721. casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorStorage.h +161 -0
  722. casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorStriding.h +346 -0
  723. casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorTrace.h +303 -0
  724. casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h +264 -0
  725. casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorUInt128.h +249 -0
  726. casadi/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorVolumePatch.h +629 -0
  727. casadi/include/eigen3/unsupported/Eigen/CXX11/src/TensorSymmetry/DynamicSymmetry.h +293 -0
  728. casadi/include/eigen3/unsupported/Eigen/CXX11/src/TensorSymmetry/StaticSymmetry.h +236 -0
  729. casadi/include/eigen3/unsupported/Eigen/CXX11/src/TensorSymmetry/Symmetry.h +338 -0
  730. casadi/include/eigen3/unsupported/Eigen/CXX11/src/TensorSymmetry/util/TemplateGroupTheory.h +669 -0
  731. casadi/include/eigen3/unsupported/Eigen/CXX11/src/ThreadPool/Barrier.h +67 -0
  732. casadi/include/eigen3/unsupported/Eigen/CXX11/src/ThreadPool/EventCount.h +249 -0
  733. casadi/include/eigen3/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h +486 -0
  734. casadi/include/eigen3/unsupported/Eigen/CXX11/src/ThreadPool/RunQueue.h +236 -0
  735. casadi/include/eigen3/unsupported/Eigen/CXX11/src/ThreadPool/ThreadCancel.h +23 -0
  736. casadi/include/eigen3/unsupported/Eigen/CXX11/src/ThreadPool/ThreadEnvironment.h +40 -0
  737. casadi/include/eigen3/unsupported/Eigen/CXX11/src/ThreadPool/ThreadLocal.h +301 -0
  738. casadi/include/eigen3/unsupported/Eigen/CXX11/src/ThreadPool/ThreadPoolInterface.h +48 -0
  739. casadi/include/eigen3/unsupported/Eigen/CXX11/src/ThreadPool/ThreadYield.h +20 -0
  740. casadi/include/eigen3/unsupported/Eigen/CXX11/src/util/CXX11Meta.h +537 -0
  741. casadi/include/eigen3/unsupported/Eigen/CXX11/src/util/CXX11Workarounds.h +88 -0
  742. casadi/include/eigen3/unsupported/Eigen/CXX11/src/util/EmulateArray.h +261 -0
  743. casadi/include/eigen3/unsupported/Eigen/CXX11/src/util/MaxSizeVector.h +158 -0
  744. casadi/include/eigen3/unsupported/Eigen/EulerAngles +43 -0
  745. casadi/include/eigen3/unsupported/Eigen/FFT +419 -0
  746. casadi/include/eigen3/unsupported/Eigen/IterativeSolvers +51 -0
  747. casadi/include/eigen3/unsupported/Eigen/KroneckerProduct +36 -0
  748. casadi/include/eigen3/unsupported/Eigen/LevenbergMarquardt +49 -0
  749. casadi/include/eigen3/unsupported/Eigen/MPRealSupport +213 -0
  750. casadi/include/eigen3/unsupported/Eigen/MatrixFunctions +504 -0
  751. casadi/include/eigen3/unsupported/Eigen/MoreVectorization +24 -0
  752. casadi/include/eigen3/unsupported/Eigen/NonLinearOptimization +140 -0
  753. casadi/include/eigen3/unsupported/Eigen/NumericalDiff +56 -0
  754. casadi/include/eigen3/unsupported/Eigen/OpenGLSupport +322 -0
  755. casadi/include/eigen3/unsupported/Eigen/Polynomials +137 -0
  756. casadi/include/eigen3/unsupported/Eigen/Skyline +39 -0
  757. casadi/include/eigen3/unsupported/Eigen/SparseExtra +54 -0
  758. casadi/include/eigen3/unsupported/Eigen/SpecialFunctions +103 -0
  759. casadi/include/eigen3/unsupported/Eigen/Splines +35 -0
  760. casadi/include/eigen3/unsupported/Eigen/src/AutoDiff/AutoDiffJacobian.h +108 -0
  761. casadi/include/eigen3/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h +730 -0
  762. casadi/include/eigen3/unsupported/Eigen/src/AutoDiff/AutoDiffVector.h +220 -0
  763. casadi/include/eigen3/unsupported/Eigen/src/BVH/BVAlgorithms.h +293 -0
  764. casadi/include/eigen3/unsupported/Eigen/src/BVH/KdBVH.h +223 -0
  765. casadi/include/eigen3/unsupported/Eigen/src/Eigenvalues/ArpackSelfAdjointEigenSolver.h +790 -0
  766. casadi/include/eigen3/unsupported/Eigen/src/EulerAngles/EulerAngles.h +355 -0
  767. casadi/include/eigen3/unsupported/Eigen/src/EulerAngles/EulerSystem.h +305 -0
  768. casadi/include/eigen3/unsupported/Eigen/src/FFT/ei_fftw_impl.h +261 -0
  769. casadi/include/eigen3/unsupported/Eigen/src/FFT/ei_kissfft_impl.h +449 -0
  770. casadi/include/eigen3/unsupported/Eigen/src/IterativeSolvers/ConstrainedConjGrad.h +187 -0
  771. casadi/include/eigen3/unsupported/Eigen/src/IterativeSolvers/DGMRES.h +511 -0
  772. casadi/include/eigen3/unsupported/Eigen/src/IterativeSolvers/GMRES.h +335 -0
  773. casadi/include/eigen3/unsupported/Eigen/src/IterativeSolvers/IDRS.h +436 -0
  774. casadi/include/eigen3/unsupported/Eigen/src/IterativeSolvers/IncompleteLU.h +90 -0
  775. casadi/include/eigen3/unsupported/Eigen/src/IterativeSolvers/IterationController.h +154 -0
  776. casadi/include/eigen3/unsupported/Eigen/src/IterativeSolvers/MINRES.h +267 -0
  777. casadi/include/eigen3/unsupported/Eigen/src/IterativeSolvers/Scaling.h +193 -0
  778. casadi/include/eigen3/unsupported/Eigen/src/KroneckerProduct/KroneckerTensorProduct.h +305 -0
  779. casadi/include/eigen3/unsupported/Eigen/src/LevenbergMarquardt/LMcovar.h +84 -0
  780. casadi/include/eigen3/unsupported/Eigen/src/LevenbergMarquardt/LMonestep.h +202 -0
  781. casadi/include/eigen3/unsupported/Eigen/src/LevenbergMarquardt/LMpar.h +160 -0
  782. casadi/include/eigen3/unsupported/Eigen/src/LevenbergMarquardt/LMqrsolv.h +188 -0
  783. casadi/include/eigen3/unsupported/Eigen/src/LevenbergMarquardt/LevenbergMarquardt.h +396 -0
  784. casadi/include/eigen3/unsupported/Eigen/src/MatrixFunctions/MatrixExponential.h +441 -0
  785. casadi/include/eigen3/unsupported/Eigen/src/MatrixFunctions/MatrixFunction.h +569 -0
  786. casadi/include/eigen3/unsupported/Eigen/src/MatrixFunctions/MatrixLogarithm.h +373 -0
  787. casadi/include/eigen3/unsupported/Eigen/src/MatrixFunctions/MatrixPower.h +705 -0
  788. casadi/include/eigen3/unsupported/Eigen/src/MatrixFunctions/MatrixSquareRoot.h +368 -0
  789. casadi/include/eigen3/unsupported/Eigen/src/MatrixFunctions/StemFunction.h +117 -0
  790. casadi/include/eigen3/unsupported/Eigen/src/MoreVectorization/MathFunctions.h +95 -0
  791. casadi/include/eigen3/unsupported/Eigen/src/NonLinearOptimization/HybridNonLinearSolver.h +601 -0
  792. casadi/include/eigen3/unsupported/Eigen/src/NonLinearOptimization/LevenbergMarquardt.h +657 -0
  793. casadi/include/eigen3/unsupported/Eigen/src/NonLinearOptimization/chkder.h +66 -0
  794. casadi/include/eigen3/unsupported/Eigen/src/NonLinearOptimization/covar.h +70 -0
  795. casadi/include/eigen3/unsupported/Eigen/src/NonLinearOptimization/dogleg.h +107 -0
  796. casadi/include/eigen3/unsupported/Eigen/src/NonLinearOptimization/fdjac1.h +79 -0
  797. casadi/include/eigen3/unsupported/Eigen/src/NonLinearOptimization/lmpar.h +298 -0
  798. casadi/include/eigen3/unsupported/Eigen/src/NonLinearOptimization/qrsolv.h +91 -0
  799. casadi/include/eigen3/unsupported/Eigen/src/NonLinearOptimization/r1mpyq.h +30 -0
  800. casadi/include/eigen3/unsupported/Eigen/src/NonLinearOptimization/r1updt.h +99 -0
  801. casadi/include/eigen3/unsupported/Eigen/src/NonLinearOptimization/rwupdt.h +49 -0
  802. casadi/include/eigen3/unsupported/Eigen/src/NumericalDiff/NumericalDiff.h +130 -0
  803. casadi/include/eigen3/unsupported/Eigen/src/Polynomials/Companion.h +280 -0
  804. casadi/include/eigen3/unsupported/Eigen/src/Polynomials/PolynomialSolver.h +428 -0
  805. casadi/include/eigen3/unsupported/Eigen/src/Polynomials/PolynomialUtils.h +143 -0
  806. casadi/include/eigen3/unsupported/Eigen/src/Skyline/SkylineInplaceLU.h +352 -0
  807. casadi/include/eigen3/unsupported/Eigen/src/Skyline/SkylineMatrix.h +862 -0
  808. casadi/include/eigen3/unsupported/Eigen/src/Skyline/SkylineMatrixBase.h +212 -0
  809. casadi/include/eigen3/unsupported/Eigen/src/Skyline/SkylineProduct.h +295 -0
  810. casadi/include/eigen3/unsupported/Eigen/src/Skyline/SkylineStorage.h +259 -0
  811. casadi/include/eigen3/unsupported/Eigen/src/Skyline/SkylineUtil.h +89 -0
  812. casadi/include/eigen3/unsupported/Eigen/src/SparseExtra/BlockOfDynamicSparseMatrix.h +122 -0
  813. casadi/include/eigen3/unsupported/Eigen/src/SparseExtra/BlockSparseMatrix.h +1079 -0
  814. casadi/include/eigen3/unsupported/Eigen/src/SparseExtra/DynamicSparseMatrix.h +404 -0
  815. casadi/include/eigen3/unsupported/Eigen/src/SparseExtra/MarketIO.h +282 -0
  816. casadi/include/eigen3/unsupported/Eigen/src/SparseExtra/MatrixMarketIterator.h +247 -0
  817. casadi/include/eigen3/unsupported/Eigen/src/SparseExtra/RandomSetter.h +349 -0
  818. casadi/include/eigen3/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsArrayAPI.h +286 -0
  819. casadi/include/eigen3/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsBFloat16.h +68 -0
  820. casadi/include/eigen3/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsFunctors.h +357 -0
  821. casadi/include/eigen3/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsHalf.h +66 -0
  822. casadi/include/eigen3/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsImpl.h +1959 -0
  823. casadi/include/eigen3/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsPacketMath.h +118 -0
  824. casadi/include/eigen3/unsupported/Eigen/src/SpecialFunctions/HipVectorCompatibility.h +67 -0
  825. casadi/include/eigen3/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsArrayAPI.h +167 -0
  826. casadi/include/eigen3/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsBFloat16.h +58 -0
  827. casadi/include/eigen3/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsFunctors.h +330 -0
  828. casadi/include/eigen3/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsHalf.h +58 -0
  829. casadi/include/eigen3/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h +2045 -0
  830. casadi/include/eigen3/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsPacketMath.h +79 -0
  831. casadi/include/eigen3/unsupported/Eigen/src/SpecialFunctions/arch/AVX/BesselFunctions.h +46 -0
  832. casadi/include/eigen3/unsupported/Eigen/src/SpecialFunctions/arch/AVX/SpecialFunctions.h +16 -0
  833. casadi/include/eigen3/unsupported/Eigen/src/SpecialFunctions/arch/AVX512/BesselFunctions.h +46 -0
  834. casadi/include/eigen3/unsupported/Eigen/src/SpecialFunctions/arch/AVX512/SpecialFunctions.h +16 -0
  835. casadi/include/eigen3/unsupported/Eigen/src/SpecialFunctions/arch/GPU/SpecialFunctions.h +369 -0
  836. casadi/include/eigen3/unsupported/Eigen/src/SpecialFunctions/arch/NEON/BesselFunctions.h +54 -0
  837. casadi/include/eigen3/unsupported/Eigen/src/SpecialFunctions/arch/NEON/SpecialFunctions.h +34 -0
  838. casadi/include/eigen3/unsupported/Eigen/src/Splines/Spline.h +507 -0
  839. casadi/include/eigen3/unsupported/Eigen/src/Splines/SplineFitting.h +431 -0
  840. casadi/include/eigen3/unsupported/Eigen/src/Splines/SplineFwd.h +93 -0
  841. casadi/include/highs/HConfig.h +23 -0
  842. casadi/include/highs/Highs.h +1703 -0
  843. casadi/include/highs/filereaderlp/builder.hpp +25 -0
  844. casadi/include/highs/filereaderlp/def.hpp +19 -0
  845. casadi/include/highs/filereaderlp/model.hpp +68 -0
  846. casadi/include/highs/filereaderlp/reader.hpp +10 -0
  847. casadi/include/highs/interfaces/highs_c_api.h +2456 -0
  848. casadi/include/highs/io/Filereader.h +45 -0
  849. casadi/include/highs/io/FilereaderEms.h +30 -0
  850. casadi/include/highs/io/FilereaderLp.h +51 -0
  851. casadi/include/highs/io/FilereaderMps.h +27 -0
  852. casadi/include/highs/io/HMPSIO.h +78 -0
  853. casadi/include/highs/io/HMpsFF.h +238 -0
  854. casadi/include/highs/io/HighsIO.h +114 -0
  855. casadi/include/highs/io/LoadOptions.h +24 -0
  856. casadi/include/highs/ipm/IpxSolution.h +32 -0
  857. casadi/include/highs/ipm/IpxWrapper.h +70 -0
  858. casadi/include/highs/ipm/basiclu/basiclu.h +161 -0
  859. casadi/include/highs/ipm/basiclu/basiclu_factorize.h +247 -0
  860. casadi/include/highs/ipm/basiclu/basiclu_get_factors.h +108 -0
  861. casadi/include/highs/ipm/basiclu/basiclu_initialize.h +119 -0
  862. casadi/include/highs/ipm/basiclu/basiclu_obj_factorize.h +34 -0
  863. casadi/include/highs/ipm/basiclu/basiclu_obj_free.h +19 -0
  864. casadi/include/highs/ipm/basiclu/basiclu_obj_get_factors.h +34 -0
  865. casadi/include/highs/ipm/basiclu/basiclu_obj_initialize.h +46 -0
  866. casadi/include/highs/ipm/basiclu/basiclu_obj_solve_dense.h +29 -0
  867. casadi/include/highs/ipm/basiclu/basiclu_obj_solve_for_update.h +42 -0
  868. casadi/include/highs/ipm/basiclu/basiclu_obj_solve_sparse.h +32 -0
  869. casadi/include/highs/ipm/basiclu/basiclu_obj_update.h +31 -0
  870. casadi/include/highs/ipm/basiclu/basiclu_object.h +30 -0
  871. casadi/include/highs/ipm/basiclu/basiclu_solve_dense.h +75 -0
  872. casadi/include/highs/ipm/basiclu/basiclu_solve_for_update.h +169 -0
  873. casadi/include/highs/ipm/basiclu/basiclu_solve_sparse.h +112 -0
  874. casadi/include/highs/ipm/basiclu/basiclu_update.h +125 -0
  875. casadi/include/highs/ipm/basiclu/lu_def.h +39 -0
  876. casadi/include/highs/ipm/basiclu/lu_file.h +21 -0
  877. casadi/include/highs/ipm/basiclu/lu_internal.h +220 -0
  878. casadi/include/highs/ipm/basiclu/lu_list.h +168 -0
  879. casadi/include/highs/ipm/ipx/basiclu_kernel.h +20 -0
  880. casadi/include/highs/ipm/ipx/basiclu_wrapper.h +47 -0
  881. casadi/include/highs/ipm/ipx/basis.h +351 -0
  882. casadi/include/highs/ipm/ipx/conjugate_residuals.h +74 -0
  883. casadi/include/highs/ipm/ipx/control.h +164 -0
  884. casadi/include/highs/ipm/ipx/crossover.h +157 -0
  885. casadi/include/highs/ipm/ipx/diagonal_precond.h +45 -0
  886. casadi/include/highs/ipm/ipx/forrest_tomlin.h +102 -0
  887. casadi/include/highs/ipm/ipx/guess_basis.h +21 -0
  888. casadi/include/highs/ipm/ipx/indexed_vector.h +113 -0
  889. casadi/include/highs/ipm/ipx/info.h +27 -0
  890. casadi/include/highs/ipm/ipx/ipm.h +94 -0
  891. casadi/include/highs/ipm/ipx/ipx_c.h +47 -0
  892. casadi/include/highs/ipm/ipx/ipx_config.h +9 -0
  893. casadi/include/highs/ipm/ipx/ipx_info.h +111 -0
  894. casadi/include/highs/ipm/ipx/ipx_internal.h +88 -0
  895. casadi/include/highs/ipm/ipx/ipx_parameters.h +76 -0
  896. casadi/include/highs/ipm/ipx/ipx_status.h +57 -0
  897. casadi/include/highs/ipm/ipx/iterate.h +328 -0
  898. casadi/include/highs/ipm/ipx/kkt_solver.h +70 -0
  899. casadi/include/highs/ipm/ipx/kkt_solver_basis.h +66 -0
  900. casadi/include/highs/ipm/ipx/kkt_solver_diag.h +48 -0
  901. casadi/include/highs/ipm/ipx/linear_operator.h +26 -0
  902. casadi/include/highs/ipm/ipx/lp_solver.h +202 -0
  903. casadi/include/highs/ipm/ipx/lu_factorization.h +79 -0
  904. casadi/include/highs/ipm/ipx/lu_update.h +129 -0
  905. casadi/include/highs/ipm/ipx/maxvolume.h +54 -0
  906. casadi/include/highs/ipm/ipx/model.h +413 -0
  907. casadi/include/highs/ipm/ipx/multistream.h +52 -0
  908. casadi/include/highs/ipm/ipx/normal_matrix.h +44 -0
  909. casadi/include/highs/ipm/ipx/power_method.h +44 -0
  910. casadi/include/highs/ipm/ipx/sparse_matrix.h +195 -0
  911. casadi/include/highs/ipm/ipx/sparse_utils.h +58 -0
  912. casadi/include/highs/ipm/ipx/splitted_normal_matrix.h +63 -0
  913. casadi/include/highs/ipm/ipx/starting_basis.h +39 -0
  914. casadi/include/highs/ipm/ipx/symbolic_invert.h +29 -0
  915. casadi/include/highs/ipm/ipx/timer.h +24 -0
  916. casadi/include/highs/ipm/ipx/utils.h +39 -0
  917. casadi/include/highs/lp_data/HConst.h +320 -0
  918. casadi/include/highs/lp_data/HStruct.h +182 -0
  919. casadi/include/highs/lp_data/HighsAnalysis.h +23 -0
  920. casadi/include/highs/lp_data/HighsCallback.h +47 -0
  921. casadi/include/highs/lp_data/HighsCallbackStruct.h +62 -0
  922. casadi/include/highs/lp_data/HighsDebug.h +34 -0
  923. casadi/include/highs/lp_data/HighsIis.h +62 -0
  924. casadi/include/highs/lp_data/HighsInfo.h +329 -0
  925. casadi/include/highs/lp_data/HighsInfoDebug.h +27 -0
  926. casadi/include/highs/lp_data/HighsLp.h +100 -0
  927. casadi/include/highs/lp_data/HighsLpSolverObject.h +45 -0
  928. casadi/include/highs/lp_data/HighsLpUtils.h +298 -0
  929. casadi/include/highs/lp_data/HighsModelUtils.h +112 -0
  930. casadi/include/highs/lp_data/HighsOptions.h +1469 -0
  931. casadi/include/highs/lp_data/HighsRanging.h +43 -0
  932. casadi/include/highs/lp_data/HighsSolution.h +144 -0
  933. casadi/include/highs/lp_data/HighsSolutionDebug.h +87 -0
  934. casadi/include/highs/lp_data/HighsSolve.h +23 -0
  935. casadi/include/highs/lp_data/HighsStatus.h +29 -0
  936. casadi/include/highs/mip/HighsCliqueTable.h +318 -0
  937. casadi/include/highs/mip/HighsConflictPool.h +109 -0
  938. casadi/include/highs/mip/HighsCutGeneration.h +106 -0
  939. casadi/include/highs/mip/HighsCutPool.h +168 -0
  940. casadi/include/highs/mip/HighsDebugSol.h +132 -0
  941. casadi/include/highs/mip/HighsDomain.h +653 -0
  942. casadi/include/highs/mip/HighsDomainChange.h +48 -0
  943. casadi/include/highs/mip/HighsDynamicRowMatrix.h +104 -0
  944. casadi/include/highs/mip/HighsGFkSolve.h +438 -0
  945. casadi/include/highs/mip/HighsImplications.h +170 -0
  946. casadi/include/highs/mip/HighsLpAggregator.h +50 -0
  947. casadi/include/highs/mip/HighsLpRelaxation.h +357 -0
  948. casadi/include/highs/mip/HighsMipAnalysis.h +52 -0
  949. casadi/include/highs/mip/HighsMipSolver.h +112 -0
  950. casadi/include/highs/mip/HighsMipSolverData.h +297 -0
  951. casadi/include/highs/mip/HighsModkSeparator.h +60 -0
  952. casadi/include/highs/mip/HighsNodeQueue.h +311 -0
  953. casadi/include/highs/mip/HighsObjectiveFunction.h +71 -0
  954. casadi/include/highs/mip/HighsPathSeparator.h +39 -0
  955. casadi/include/highs/mip/HighsPrimalHeuristics.h +70 -0
  956. casadi/include/highs/mip/HighsPseudocost.h +360 -0
  957. casadi/include/highs/mip/HighsRedcostFixing.h +42 -0
  958. casadi/include/highs/mip/HighsSearch.h +241 -0
  959. casadi/include/highs/mip/HighsSeparation.h +41 -0
  960. casadi/include/highs/mip/HighsSeparator.h +52 -0
  961. casadi/include/highs/mip/HighsTableauSeparator.h +34 -0
  962. casadi/include/highs/mip/HighsTransformedLp.h +63 -0
  963. casadi/include/highs/mip/MipTimer.h +471 -0
  964. casadi/include/highs/model/HighsHessian.h +54 -0
  965. casadi/include/highs/model/HighsHessianUtils.h +49 -0
  966. casadi/include/highs/model/HighsModel.h +52 -0
  967. casadi/include/highs/parallel/HighsBinarySemaphore.h +113 -0
  968. casadi/include/highs/parallel/HighsCacheAlign.h +87 -0
  969. casadi/include/highs/parallel/HighsCombinable.h +121 -0
  970. casadi/include/highs/parallel/HighsMutex.h +129 -0
  971. casadi/include/highs/parallel/HighsParallel.h +133 -0
  972. casadi/include/highs/parallel/HighsRaceTimer.h +43 -0
  973. casadi/include/highs/parallel/HighsSchedulerConstants.h +24 -0
  974. casadi/include/highs/parallel/HighsSpinMutex.h +53 -0
  975. casadi/include/highs/parallel/HighsSplitDeque.h +583 -0
  976. casadi/include/highs/parallel/HighsTask.h +175 -0
  977. casadi/include/highs/parallel/HighsTaskExecutor.h +222 -0
  978. casadi/include/highs/pdlp/CupdlpWrapper.h +104 -0
  979. casadi/include/highs/pdlp/cupdlp/cupdlp_cs.h +40 -0
  980. casadi/include/highs/pdlp/cupdlp/cupdlp_defs.h +433 -0
  981. casadi/include/highs/pdlp/cupdlp/cupdlp_linalg.h +189 -0
  982. casadi/include/highs/pdlp/cupdlp/cupdlp_proj.h +19 -0
  983. casadi/include/highs/pdlp/cupdlp/cupdlp_restart.h +31 -0
  984. casadi/include/highs/pdlp/cupdlp/cupdlp_scaling.h +26 -0
  985. casadi/include/highs/pdlp/cupdlp/cupdlp_solver.h +98 -0
  986. casadi/include/highs/pdlp/cupdlp/cupdlp_step.h +37 -0
  987. casadi/include/highs/pdlp/cupdlp/cupdlp_utils.c +1807 -0
  988. casadi/include/highs/pdqsort/pdqsort.h +532 -0
  989. casadi/include/highs/presolve/HPresolve.h +405 -0
  990. casadi/include/highs/presolve/HPresolveAnalysis.h +51 -0
  991. casadi/include/highs/presolve/HighsPostsolveStack.h +940 -0
  992. casadi/include/highs/presolve/HighsSymmetry.h +281 -0
  993. casadi/include/highs/presolve/ICrash.h +124 -0
  994. casadi/include/highs/presolve/ICrashUtil.h +62 -0
  995. casadi/include/highs/presolve/ICrashX.h +23 -0
  996. casadi/include/highs/presolve/PresolveComponent.h +90 -0
  997. casadi/include/highs/qpsolver/a_asm.hpp +70 -0
  998. casadi/include/highs/qpsolver/a_quass.hpp +15 -0
  999. casadi/include/highs/qpsolver/basis.hpp +152 -0
  1000. casadi/include/highs/qpsolver/crashsolution.hpp +13 -0
  1001. casadi/include/highs/qpsolver/dantzigpricing.hpp +73 -0
  1002. casadi/include/highs/qpsolver/devexpricing.hpp +101 -0
  1003. casadi/include/highs/qpsolver/eventhandler.hpp +23 -0
  1004. casadi/include/highs/qpsolver/factor.hpp +401 -0
  1005. casadi/include/highs/qpsolver/feasibility_bounded.hpp +107 -0
  1006. casadi/include/highs/qpsolver/feasibility_highs.hpp +294 -0
  1007. casadi/include/highs/qpsolver/gradient.hpp +39 -0
  1008. casadi/include/highs/qpsolver/instance.hpp +63 -0
  1009. casadi/include/highs/qpsolver/matrix.hpp +335 -0
  1010. casadi/include/highs/qpsolver/perturbation.hpp +8 -0
  1011. casadi/include/highs/qpsolver/pricing.hpp +15 -0
  1012. casadi/include/highs/qpsolver/qpconst.hpp +27 -0
  1013. casadi/include/highs/qpsolver/qpvector.hpp +235 -0
  1014. casadi/include/highs/qpsolver/quass.hpp +20 -0
  1015. casadi/include/highs/qpsolver/ratiotest.hpp +19 -0
  1016. casadi/include/highs/qpsolver/runtime.hpp +38 -0
  1017. casadi/include/highs/qpsolver/scaling.hpp +8 -0
  1018. casadi/include/highs/qpsolver/settings.hpp +69 -0
  1019. casadi/include/highs/qpsolver/snippets.hpp +29 -0
  1020. casadi/include/highs/qpsolver/statistics.hpp +23 -0
  1021. casadi/include/highs/qpsolver/steepestedgepricing.hpp +166 -0
  1022. casadi/include/highs/simplex/HApp.h +476 -0
  1023. casadi/include/highs/simplex/HEkk.h +416 -0
  1024. casadi/include/highs/simplex/HEkkDual.h +513 -0
  1025. casadi/include/highs/simplex/HEkkDualRHS.h +134 -0
  1026. casadi/include/highs/simplex/HEkkDualRow.h +201 -0
  1027. casadi/include/highs/simplex/HEkkPrimal.h +191 -0
  1028. casadi/include/highs/simplex/HSimplex.h +42 -0
  1029. casadi/include/highs/simplex/HSimplexDebug.h +48 -0
  1030. casadi/include/highs/simplex/HSimplexNla.h +158 -0
  1031. casadi/include/highs/simplex/HSimplexReport.h +21 -0
  1032. casadi/include/highs/simplex/HighsSimplexAnalysis.h +500 -0
  1033. casadi/include/highs/simplex/SimplexConst.h +273 -0
  1034. casadi/include/highs/simplex/SimplexStruct.h +261 -0
  1035. casadi/include/highs/simplex/SimplexTimer.h +409 -0
  1036. casadi/include/highs/test/DevKkt.h +143 -0
  1037. casadi/include/highs/test/KktCh2.h +79 -0
  1038. casadi/include/highs/util/FactorTimer.h +199 -0
  1039. casadi/include/highs/util/HFactor.h +587 -0
  1040. casadi/include/highs/util/HFactorConst.h +81 -0
  1041. casadi/include/highs/util/HFactorDebug.h +55 -0
  1042. casadi/include/highs/util/HSet.h +89 -0
  1043. casadi/include/highs/util/HVector.h +22 -0
  1044. casadi/include/highs/util/HVectorBase.h +102 -0
  1045. casadi/include/highs/util/HighsCDouble.h +319 -0
  1046. casadi/include/highs/util/HighsComponent.h +53 -0
  1047. casadi/include/highs/util/HighsDataStack.h +83 -0
  1048. casadi/include/highs/util/HighsDisjointSets.h +107 -0
  1049. casadi/include/highs/util/HighsHash.h +1274 -0
  1050. casadi/include/highs/util/HighsHashTree.h +1447 -0
  1051. casadi/include/highs/util/HighsInt.h +36 -0
  1052. casadi/include/highs/util/HighsIntegers.h +212 -0
  1053. casadi/include/highs/util/HighsLinearSumBounds.h +157 -0
  1054. casadi/include/highs/util/HighsMatrixPic.h +37 -0
  1055. casadi/include/highs/util/HighsMatrixSlice.h +561 -0
  1056. casadi/include/highs/util/HighsMatrixUtils.h +54 -0
  1057. casadi/include/highs/util/HighsMemoryAllocation.h +63 -0
  1058. casadi/include/highs/util/HighsRandom.h +242 -0
  1059. casadi/include/highs/util/HighsRbTree.h +452 -0
  1060. casadi/include/highs/util/HighsSort.h +131 -0
  1061. casadi/include/highs/util/HighsSparseMatrix.h +150 -0
  1062. casadi/include/highs/util/HighsSparseVectorSum.h +95 -0
  1063. casadi/include/highs/util/HighsSplay.h +135 -0
  1064. casadi/include/highs/util/HighsTimer.h +381 -0
  1065. casadi/include/highs/util/HighsUtils.h +217 -0
  1066. casadi/include/highs/util/stringutil.h +46 -0
  1067. casadi/include/highs/zstr/strict_fstream.hpp +237 -0
  1068. casadi/include/highs/zstr/zstr.hpp +472 -0
  1069. casadi/include/licenses/FMI-Standard-2.0.2/LICENSE.txt +473 -0
  1070. casadi/include/licenses/FMI-Standard-3.0/LICENSE.txt +464 -0
  1071. casadi/include/licenses/alpaqa-external/LICENSE +165 -0
  1072. casadi/include/licenses/bonmin-external/Bonmin/LICENSE +87 -0
  1073. casadi/include/licenses/bonmin-external/LICENSE +3 -0
  1074. casadi/include/licenses/casadi/LICENSE/LICENSE.txt +165 -0
  1075. casadi/include/licenses/casadi-sundials/LICENSE +64 -0
  1076. casadi/include/licenses/casadi-sundials/cvodes/LICENSE +60 -0
  1077. casadi/include/licenses/casadi-sundials/idas/LICENSE +59 -0
  1078. casadi/include/licenses/casadi-sundials/kinsol/LICENSE +59 -0
  1079. casadi/include/licenses/casadi-sundials/sundials/LICENSE +67 -0
  1080. casadi/include/licenses/cbc-external/Cbc/LICENSE +239 -0
  1081. casadi/include/licenses/cbc-external/LICENSE +245 -0
  1082. casadi/include/licenses/cgl-external/Cgl/LICENSE +239 -0
  1083. casadi/include/licenses/cgl-external/LICENSE +245 -0
  1084. casadi/include/licenses/clp-external/Clp/LICENSE +239 -0
  1085. casadi/include/licenses/clp-external/LICENSE +245 -0
  1086. casadi/include/licenses/coinutils-external/CoinUtils/LICENSE +239 -0
  1087. casadi/include/licenses/coinutils-external/LICENSE +245 -0
  1088. casadi/include/licenses/daqp-external/LICENSE +21 -0
  1089. casadi/include/licenses/ghc-external/LICENSE +19 -0
  1090. casadi/include/licenses/highs-external/LICENSE.txt +21 -0
  1091. casadi/include/licenses/highs-external/extern/filereaderlp/LICENSE +19 -0
  1092. casadi/include/licenses/ipopt-external/LICENSE +260 -0
  1093. casadi/include/licenses/libz-external/LICENSE +22 -0
  1094. casadi/include/licenses/libz-external/contrib/dotzlib/LICENSE_1_0.txt +23 -0
  1095. casadi/include/licenses/libzip-external/LICENSE +31 -0
  1096. casadi/include/licenses/metis-external/LICENSE +87 -0
  1097. casadi/include/licenses/metis-external/metis-4.0/LICENSE +18 -0
  1098. casadi/include/licenses/mockups-external/LICENSE +21 -0
  1099. casadi/include/licenses/mumps-external/LICENSE +87 -0
  1100. casadi/include/licenses/mumps-external/MUMPS/LICENSE +50 -0
  1101. casadi/include/licenses/openblas-external/LICENSE +29 -0
  1102. casadi/include/licenses/openblas-external/ctest/LICENSE +23 -0
  1103. casadi/include/licenses/openblas-external/lapack-netlib/LAPACKE/LICENSE +26 -0
  1104. casadi/include/licenses/openblas-external/lapack-netlib/LICENSE +48 -0
  1105. casadi/include/licenses/openblas-external/reference/LICENSE +23 -0
  1106. casadi/include/licenses/openblas-external/relapack/LICENSE +22 -0
  1107. casadi/include/licenses/openblas-external/test/LICENSE +23 -0
  1108. casadi/include/licenses/osi-external/LICENSE +245 -0
  1109. casadi/include/licenses/osi-external/Osi/LICENSE +239 -0
  1110. casadi/include/licenses/osqp-external/LICENSE +201 -0
  1111. casadi/include/licenses/osqp-external/lin_sys/direct/qdldl/amd/LICENSE +36 -0
  1112. casadi/include/licenses/osqp-external/lin_sys/direct/qdldl/qdldl_sources/LICENSE +201 -0
  1113. casadi/include/licenses/proxqp-external/LICENSE +25 -0
  1114. casadi/include/licenses/proxqp-external/bindings/python/external/pybind11/LICENSE +29 -0
  1115. casadi/include/licenses/proxqp-external/cmake-module/LICENSE +4 -0
  1116. casadi/include/licenses/proxqp-external/cmake-module/doxygen/MathJax/LICENSE +202 -0
  1117. casadi/include/licenses/proxqp-external/external/cereal/LICENSE +24 -0
  1118. casadi/include/licenses/proxqp-external/external/cereal/include/cereal/external/LICENSE +21 -0
  1119. casadi/include/licenses/proxqp-external/external/cereal/include/cereal/external/rapidjson/LICENSE +13 -0
  1120. casadi/include/licenses/proxqp-external/external/cereal/include/cereal/external/rapidjson/msinttypes/LICENSE +29 -0
  1121. casadi/include/licenses/qpOASES/LICENSE.txt +503 -0
  1122. casadi/include/licenses/sleqp-external/LICENSE +165 -0
  1123. casadi/include/licenses/superscs-external/LICENSE.txt +24 -0
  1124. casadi/include/licenses/tinyxml2-9.0.0/LICENSE.txt +18 -0
  1125. casadi/include/licenses/trlib-external/LICENSE +21 -0
  1126. casadi/include/osqp/auxil.h +181 -0
  1127. casadi/include/osqp/constants.h +128 -0
  1128. casadi/include/osqp/cs.h +180 -0
  1129. casadi/include/osqp/ctrlc.h +56 -0
  1130. casadi/include/osqp/error.h +38 -0
  1131. casadi/include/osqp/glob_opts.h +167 -0
  1132. casadi/include/osqp/lin_alg.h +216 -0
  1133. casadi/include/osqp/lin_sys.h +54 -0
  1134. casadi/include/osqp/osqp.h +430 -0
  1135. casadi/include/osqp/osqp_configure.h +49 -0
  1136. casadi/include/osqp/polish.h +25 -0
  1137. casadi/include/osqp/proj.h +37 -0
  1138. casadi/include/osqp/scaling.h +44 -0
  1139. casadi/include/osqp/types.h +326 -0
  1140. casadi/include/osqp/util.h +222 -0
  1141. casadi/include/osqp/version.h +9 -0
  1142. casadi/include/proxsuite/config.hpp +68 -0
  1143. casadi/include/proxsuite/deprecated.hpp +56 -0
  1144. casadi/include/proxsuite/fwd.hpp +52 -0
  1145. casadi/include/proxsuite/helpers/common.hpp +70 -0
  1146. casadi/include/proxsuite/helpers/instruction-set.hpp +275 -0
  1147. casadi/include/proxsuite/helpers/optional.hpp +46 -0
  1148. casadi/include/proxsuite/helpers/tl-optional.hpp +2472 -0
  1149. casadi/include/proxsuite/helpers/version.hpp +39 -0
  1150. casadi/include/proxsuite/linalg/dense/core.hpp +863 -0
  1151. casadi/include/proxsuite/linalg/dense/factorize.hpp +375 -0
  1152. casadi/include/proxsuite/linalg/dense/ldlt.hpp +817 -0
  1153. casadi/include/proxsuite/linalg/dense/modify.hpp +333 -0
  1154. casadi/include/proxsuite/linalg/dense/solve.hpp +38 -0
  1155. casadi/include/proxsuite/linalg/dense/update.hpp +330 -0
  1156. casadi/include/proxsuite/linalg/sparse/core.hpp +531 -0
  1157. casadi/include/proxsuite/linalg/sparse/factorize.hpp +1303 -0
  1158. casadi/include/proxsuite/linalg/sparse/rowmod.hpp +443 -0
  1159. casadi/include/proxsuite/linalg/sparse/update.hpp +348 -0
  1160. casadi/include/proxsuite/linalg/veg/internal/assert_impl.hpp +20 -0
  1161. casadi/include/proxsuite/linalg/veg/internal/collection_algo.hpp +93 -0
  1162. casadi/include/proxsuite/linalg/veg/internal/dbg.hpp +15 -0
  1163. casadi/include/proxsuite/linalg/veg/internal/delete_special_members.hpp +77 -0
  1164. casadi/include/proxsuite/linalg/veg/internal/dyn_index.hpp +292 -0
  1165. casadi/include/proxsuite/linalg/veg/internal/epilogue.hpp +31 -0
  1166. casadi/include/proxsuite/linalg/veg/internal/external/hedley.ext.hpp +2074 -0
  1167. casadi/include/proxsuite/linalg/veg/internal/external/unhedley.ext.hpp +148 -0
  1168. casadi/include/proxsuite/linalg/veg/internal/fix_index.hpp +339 -0
  1169. casadi/include/proxsuite/linalg/veg/internal/has_asan.hpp +17 -0
  1170. casadi/include/proxsuite/linalg/veg/internal/integer_seq.hpp +248 -0
  1171. casadi/include/proxsuite/linalg/veg/internal/macros.hpp +1312 -0
  1172. casadi/include/proxsuite/linalg/veg/internal/narrow.hpp +46 -0
  1173. casadi/include/proxsuite/linalg/veg/internal/preprocessor.hpp +434 -0
  1174. casadi/include/proxsuite/linalg/veg/internal/prologue.hpp +157 -0
  1175. casadi/include/proxsuite/linalg/veg/internal/std.hpp +13 -0
  1176. casadi/include/proxsuite/linalg/veg/internal/terminate.hpp +22 -0
  1177. casadi/include/proxsuite/linalg/veg/internal/typedefs.hpp +58 -0
  1178. casadi/include/proxsuite/linalg/veg/memory/address.hpp +97 -0
  1179. casadi/include/proxsuite/linalg/veg/memory/alloc.hpp +352 -0
  1180. casadi/include/proxsuite/linalg/veg/memory/dynamic_stack.hpp +504 -0
  1181. casadi/include/proxsuite/linalg/veg/memory/placement.hpp +202 -0
  1182. casadi/include/proxsuite/linalg/veg/memory/stack_alloc.hpp +239 -0
  1183. casadi/include/proxsuite/linalg/veg/ref.hpp +148 -0
  1184. casadi/include/proxsuite/linalg/veg/slice.hpp +240 -0
  1185. casadi/include/proxsuite/linalg/veg/tuple.hpp +876 -0
  1186. casadi/include/proxsuite/linalg/veg/type_traits/alloc.hpp +169 -0
  1187. casadi/include/proxsuite/linalg/veg/type_traits/assignable.hpp +53 -0
  1188. casadi/include/proxsuite/linalg/veg/type_traits/constructible.hpp +217 -0
  1189. casadi/include/proxsuite/linalg/veg/type_traits/core.hpp +298 -0
  1190. casadi/include/proxsuite/linalg/veg/type_traits/invocable.hpp +47 -0
  1191. casadi/include/proxsuite/linalg/veg/type_traits/primitives.hpp +43 -0
  1192. casadi/include/proxsuite/linalg/veg/type_traits/tags.hpp +47 -0
  1193. casadi/include/proxsuite/linalg/veg/util/assert.hpp +48 -0
  1194. casadi/include/proxsuite/linalg/veg/util/dbg.hpp +6 -0
  1195. casadi/include/proxsuite/linalg/veg/util/defer.hpp +57 -0
  1196. casadi/include/proxsuite/linalg/veg/util/dynstack_alloc.hpp +19 -0
  1197. casadi/include/proxsuite/linalg/veg/util/get.hpp +153 -0
  1198. casadi/include/proxsuite/linalg/veg/util/index.hpp +6 -0
  1199. casadi/include/proxsuite/linalg/veg/util/unreachable.hpp +41 -0
  1200. casadi/include/proxsuite/linalg/veg/vec.hpp +1034 -0
  1201. casadi/include/proxsuite/proxqp/dense/dense.hpp +10 -0
  1202. casadi/include/proxsuite/proxqp/dense/fwd.hpp +55 -0
  1203. casadi/include/proxsuite/proxqp/dense/helpers.hpp +520 -0
  1204. casadi/include/proxsuite/proxqp/dense/linesearch.hpp +517 -0
  1205. casadi/include/proxsuite/proxqp/dense/model.hpp +147 -0
  1206. casadi/include/proxsuite/proxqp/dense/preconditioner/identity.hpp +113 -0
  1207. casadi/include/proxsuite/proxqp/dense/preconditioner/ruiz.hpp +571 -0
  1208. casadi/include/proxsuite/proxqp/dense/solver.hpp +1330 -0
  1209. casadi/include/proxsuite/proxqp/dense/utils.hpp +415 -0
  1210. casadi/include/proxsuite/proxqp/dense/views.hpp +1466 -0
  1211. casadi/include/proxsuite/proxqp/dense/workspace.hpp +264 -0
  1212. casadi/include/proxsuite/proxqp/dense/wrapper.hpp +491 -0
  1213. casadi/include/proxsuite/proxqp/results.hpp +212 -0
  1214. casadi/include/proxsuite/proxqp/settings.hpp +302 -0
  1215. casadi/include/proxsuite/proxqp/sparse/fwd.hpp +58 -0
  1216. casadi/include/proxsuite/proxqp/sparse/helpers.hpp +309 -0
  1217. casadi/include/proxsuite/proxqp/sparse/model.hpp +228 -0
  1218. casadi/include/proxsuite/proxqp/sparse/preconditioner/identity.hpp +64 -0
  1219. casadi/include/proxsuite/proxqp/sparse/preconditioner/ruiz.hpp +569 -0
  1220. casadi/include/proxsuite/proxqp/sparse/solver.hpp +1441 -0
  1221. casadi/include/proxsuite/proxqp/sparse/sparse.hpp +10 -0
  1222. casadi/include/proxsuite/proxqp/sparse/utils.hpp +815 -0
  1223. casadi/include/proxsuite/proxqp/sparse/views.hpp +63 -0
  1224. casadi/include/proxsuite/proxqp/sparse/workspace.hpp +790 -0
  1225. casadi/include/proxsuite/proxqp/sparse/wrapper.hpp +772 -0
  1226. casadi/include/proxsuite/proxqp/status.hpp +46 -0
  1227. casadi/include/proxsuite/proxqp/timings.hpp +101 -0
  1228. casadi/include/proxsuite/proxqp/utils/prints.hpp +47 -0
  1229. casadi/include/proxsuite/proxqp/utils/random_qp_problems.hpp +669 -0
  1230. casadi/include/proxsuite/serialization/archive.hpp +231 -0
  1231. casadi/include/proxsuite/serialization/eigen.hpp +107 -0
  1232. casadi/include/proxsuite/serialization/model.hpp +34 -0
  1233. casadi/include/proxsuite/serialization/results.hpp +74 -0
  1234. casadi/include/proxsuite/serialization/settings.hpp +60 -0
  1235. casadi/include/proxsuite/serialization/wrapper.hpp +24 -0
  1236. casadi/include/proxsuite/warning.hpp +35 -0
  1237. casadi/include/simde/arm/neon/aba.h +208 -0
  1238. casadi/include/simde/arm/neon/abd.h +384 -0
  1239. casadi/include/simde/arm/neon/abdl.h +147 -0
  1240. casadi/include/simde/arm/neon/abs.h +408 -0
  1241. casadi/include/simde/arm/neon/add.h +681 -0
  1242. casadi/include/simde/arm/neon/addl.h +127 -0
  1243. casadi/include/simde/arm/neon/addl_high.h +127 -0
  1244. casadi/include/simde/arm/neon/addlv.h +317 -0
  1245. casadi/include/simde/arm/neon/addv.h +447 -0
  1246. casadi/include/simde/arm/neon/addw.h +222 -0
  1247. casadi/include/simde/arm/neon/addw_high.h +193 -0
  1248. casadi/include/simde/arm/neon/and.h +552 -0
  1249. casadi/include/simde/arm/neon/bic.h +472 -0
  1250. casadi/include/simde/arm/neon/bsl.h +448 -0
  1251. casadi/include/simde/arm/neon/cagt.h +168 -0
  1252. casadi/include/simde/arm/neon/ceq.h +711 -0
  1253. casadi/include/simde/arm/neon/ceqz.h +335 -0
  1254. casadi/include/simde/arm/neon/cge.h +677 -0
  1255. casadi/include/simde/arm/neon/cgez.h +378 -0
  1256. casadi/include/simde/arm/neon/cgt.h +686 -0
  1257. casadi/include/simde/arm/neon/cgtz.h +380 -0
  1258. casadi/include/simde/arm/neon/cle.h +677 -0
  1259. casadi/include/simde/arm/neon/clez.h +378 -0
  1260. casadi/include/simde/arm/neon/cls.h +148 -0
  1261. casadi/include/simde/arm/neon/clt.h +679 -0
  1262. casadi/include/simde/arm/neon/cltz.h +263 -0
  1263. casadi/include/simde/arm/neon/clz.h +423 -0
  1264. casadi/include/simde/arm/neon/cnt.h +145 -0
  1265. casadi/include/simde/arm/neon/combine.h +343 -0
  1266. casadi/include/simde/arm/neon/create.h +186 -0
  1267. casadi/include/simde/arm/neon/cvt.h +492 -0
  1268. casadi/include/simde/arm/neon/dot.h +171 -0
  1269. casadi/include/simde/arm/neon/dot_lane.h +196 -0
  1270. casadi/include/simde/arm/neon/dup_lane.h +702 -0
  1271. casadi/include/simde/arm/neon/dup_n.h +534 -0
  1272. casadi/include/simde/arm/neon/eor.h +552 -0
  1273. casadi/include/simde/arm/neon/ext.h +887 -0
  1274. casadi/include/simde/arm/neon/get_high.h +260 -0
  1275. casadi/include/simde/arm/neon/get_lane.h +499 -0
  1276. casadi/include/simde/arm/neon/get_low.h +276 -0
  1277. casadi/include/simde/arm/neon/hadd.h +287 -0
  1278. casadi/include/simde/arm/neon/hsub.h +287 -0
  1279. casadi/include/simde/arm/neon/ld1.h +399 -0
  1280. casadi/include/simde/arm/neon/ld3.h +609 -0
  1281. casadi/include/simde/arm/neon/ld4.h +448 -0
  1282. casadi/include/simde/arm/neon/max.h +614 -0
  1283. casadi/include/simde/arm/neon/maxnm.h +215 -0
  1284. casadi/include/simde/arm/neon/maxv.h +400 -0
  1285. casadi/include/simde/arm/neon/min.h +660 -0
  1286. casadi/include/simde/arm/neon/minnm.h +215 -0
  1287. casadi/include/simde/arm/neon/minv.h +424 -0
  1288. casadi/include/simde/arm/neon/mla.h +530 -0
  1289. casadi/include/simde/arm/neon/mla_n.h +333 -0
  1290. casadi/include/simde/arm/neon/mlal.h +156 -0
  1291. casadi/include/simde/arm/neon/mlal_high.h +156 -0
  1292. casadi/include/simde/arm/neon/mlal_n.h +128 -0
  1293. casadi/include/simde/arm/neon/mls.h +264 -0
  1294. casadi/include/simde/arm/neon/mlsl.h +124 -0
  1295. casadi/include/simde/arm/neon/mlsl_high.h +124 -0
  1296. casadi/include/simde/arm/neon/mlsl_n.h +96 -0
  1297. casadi/include/simde/arm/neon/movl.h +208 -0
  1298. casadi/include/simde/arm/neon/movl_high.h +126 -0
  1299. casadi/include/simde/arm/neon/movn.h +195 -0
  1300. casadi/include/simde/arm/neon/movn_high.h +125 -0
  1301. casadi/include/simde/arm/neon/mul.h +594 -0
  1302. casadi/include/simde/arm/neon/mul_lane.h +472 -0
  1303. casadi/include/simde/arm/neon/mul_n.h +383 -0
  1304. casadi/include/simde/arm/neon/mull.h +236 -0
  1305. casadi/include/simde/arm/neon/mull_high.h +125 -0
  1306. casadi/include/simde/arm/neon/mull_n.h +158 -0
  1307. casadi/include/simde/arm/neon/mvn.h +426 -0
  1308. casadi/include/simde/arm/neon/neg.h +393 -0
  1309. casadi/include/simde/arm/neon/orn.h +505 -0
  1310. casadi/include/simde/arm/neon/orr.h +552 -0
  1311. casadi/include/simde/arm/neon/padal.h +211 -0
  1312. casadi/include/simde/arm/neon/padd.h +293 -0
  1313. casadi/include/simde/arm/neon/paddl.h +239 -0
  1314. casadi/include/simde/arm/neon/pmax.h +253 -0
  1315. casadi/include/simde/arm/neon/pmin.h +260 -0
  1316. casadi/include/simde/arm/neon/qabs.h +281 -0
  1317. casadi/include/simde/arm/neon/qadd.h +553 -0
  1318. casadi/include/simde/arm/neon/qdmulh.h +125 -0
  1319. casadi/include/simde/arm/neon/qdmull.h +125 -0
  1320. casadi/include/simde/arm/neon/qmovn.h +273 -0
  1321. casadi/include/simde/arm/neon/qmovn_high.h +127 -0
  1322. casadi/include/simde/arm/neon/qmovun.h +159 -0
  1323. casadi/include/simde/arm/neon/qneg.h +301 -0
  1324. casadi/include/simde/arm/neon/qrdmulh.h +165 -0
  1325. casadi/include/simde/arm/neon/qrdmulh_n.h +136 -0
  1326. casadi/include/simde/arm/neon/qshl.h +732 -0
  1327. casadi/include/simde/arm/neon/qsub.h +549 -0
  1328. casadi/include/simde/arm/neon/qtbl.h +455 -0
  1329. casadi/include/simde/arm/neon/qtbx.h +470 -0
  1330. casadi/include/simde/arm/neon/rbit.h +165 -0
  1331. casadi/include/simde/arm/neon/reinterpret.h +3101 -0
  1332. casadi/include/simde/arm/neon/rev16.h +137 -0
  1333. casadi/include/simde/arm/neon/rev32.h +235 -0
  1334. casadi/include/simde/arm/neon/rev64.h +358 -0
  1335. casadi/include/simde/arm/neon/rhadd.h +406 -0
  1336. casadi/include/simde/arm/neon/rnd.h +143 -0
  1337. casadi/include/simde/arm/neon/rndi.h +135 -0
  1338. casadi/include/simde/arm/neon/rndm.h +143 -0
  1339. casadi/include/simde/arm/neon/rndn.h +135 -0
  1340. casadi/include/simde/arm/neon/rndp.h +143 -0
  1341. casadi/include/simde/arm/neon/rshl.h +903 -0
  1342. casadi/include/simde/arm/neon/rshr_n.h +471 -0
  1343. casadi/include/simde/arm/neon/rsra_n.h +209 -0
  1344. casadi/include/simde/arm/neon/set_lane.h +422 -0
  1345. casadi/include/simde/arm/neon/shl.h +805 -0
  1346. casadi/include/simde/arm/neon/shl_n.h +560 -0
  1347. casadi/include/simde/arm/neon/shr_n.h +612 -0
  1348. casadi/include/simde/arm/neon/sra_n.h +202 -0
  1349. casadi/include/simde/arm/neon/st1.h +353 -0
  1350. casadi/include/simde/arm/neon/st1_lane.h +363 -0
  1351. casadi/include/simde/arm/neon/st3.h +426 -0
  1352. casadi/include/simde/arm/neon/st4.h +445 -0
  1353. casadi/include/simde/arm/neon/sub.h +659 -0
  1354. casadi/include/simde/arm/neon/subl.h +127 -0
  1355. casadi/include/simde/arm/neon/subw.h +221 -0
  1356. casadi/include/simde/arm/neon/subw_high.h +222 -0
  1357. casadi/include/simde/arm/neon/tbl.h +224 -0
  1358. casadi/include/simde/arm/neon/tbx.h +247 -0
  1359. casadi/include/simde/arm/neon/trn.h +252 -0
  1360. casadi/include/simde/arm/neon/trn1.h +500 -0
  1361. casadi/include/simde/arm/neon/trn2.h +499 -0
  1362. casadi/include/simde/arm/neon/tst.h +540 -0
  1363. casadi/include/simde/arm/neon/types.h +683 -0
  1364. casadi/include/simde/arm/neon/uqadd.h +325 -0
  1365. casadi/include/simde/arm/neon/uzp.h +252 -0
  1366. casadi/include/simde/arm/neon/uzp1.h +643 -0
  1367. casadi/include/simde/arm/neon/uzp2.h +647 -0
  1368. casadi/include/simde/arm/neon/zip.h +252 -0
  1369. casadi/include/simde/arm/neon/zip1.h +625 -0
  1370. casadi/include/simde/arm/neon/zip2.h +625 -0
  1371. casadi/include/simde/arm/neon.h +166 -0
  1372. casadi/include/simde/check.h +276 -0
  1373. casadi/include/simde/debug-trap.h +85 -0
  1374. casadi/include/simde/hedley.h +1971 -0
  1375. casadi/include/simde/simde-align.h +449 -0
  1376. casadi/include/simde/simde-arch.h +532 -0
  1377. casadi/include/simde/simde-common.h +890 -0
  1378. casadi/include/simde/simde-complex.h +148 -0
  1379. casadi/include/simde/simde-constify.h +397 -0
  1380. casadi/include/simde/simde-detect-clang.h +109 -0
  1381. casadi/include/simde/simde-diagnostic.h +428 -0
  1382. casadi/include/simde/simde-features.h +522 -0
  1383. casadi/include/simde/simde-math.h +1805 -0
  1384. casadi/include/simde/x86/avx.h +6193 -0
  1385. casadi/include/simde/x86/avx2.h +5660 -0
  1386. casadi/include/simde/x86/avx512/2intersect.h +250 -0
  1387. casadi/include/simde/x86/avx512/abs.h +562 -0
  1388. casadi/include/simde/x86/avx512/add.h +641 -0
  1389. casadi/include/simde/x86/avx512/adds.h +390 -0
  1390. casadi/include/simde/x86/avx512/and.h +305 -0
  1391. casadi/include/simde/x86/avx512/andnot.h +193 -0
  1392. casadi/include/simde/x86/avx512/avg.h +258 -0
  1393. casadi/include/simde/x86/avx512/blend.h +293 -0
  1394. casadi/include/simde/x86/avx512/broadcast.h +897 -0
  1395. casadi/include/simde/x86/avx512/cast.h +324 -0
  1396. casadi/include/simde/x86/avx512/cmp.h +587 -0
  1397. casadi/include/simde/x86/avx512/cmpeq.h +179 -0
  1398. casadi/include/simde/x86/avx512/cmpge.h +104 -0
  1399. casadi/include/simde/x86/avx512/cmpgt.h +189 -0
  1400. casadi/include/simde/x86/avx512/cmple.h +103 -0
  1401. casadi/include/simde/x86/avx512/cmplt.h +123 -0
  1402. casadi/include/simde/x86/avx512/copysign.h +86 -0
  1403. casadi/include/simde/x86/avx512/cvt.h +122 -0
  1404. casadi/include/simde/x86/avx512/cvts.h +723 -0
  1405. casadi/include/simde/x86/avx512/div.h +162 -0
  1406. casadi/include/simde/x86/avx512/extract.h +198 -0
  1407. casadi/include/simde/x86/avx512/fmadd.h +136 -0
  1408. casadi/include/simde/x86/avx512/fmsub.h +108 -0
  1409. casadi/include/simde/x86/avx512/fnmadd.h +108 -0
  1410. casadi/include/simde/x86/avx512/fnmsub.h +108 -0
  1411. casadi/include/simde/x86/avx512/insert.h +193 -0
  1412. casadi/include/simde/x86/avx512/kshift.h +152 -0
  1413. casadi/include/simde/x86/avx512/load.h +67 -0
  1414. casadi/include/simde/x86/avx512/loadu.h +113 -0
  1415. casadi/include/simde/x86/avx512/lzcnt.h +209 -0
  1416. casadi/include/simde/x86/avx512/madd.h +155 -0
  1417. casadi/include/simde/x86/avx512/maddubs.h +159 -0
  1418. casadi/include/simde/x86/avx512/max.h +587 -0
  1419. casadi/include/simde/x86/avx512/min.h +587 -0
  1420. casadi/include/simde/x86/avx512/mov.h +859 -0
  1421. casadi/include/simde/x86/avx512/mov_mask.h +372 -0
  1422. casadi/include/simde/x86/avx512/movm.h +460 -0
  1423. casadi/include/simde/x86/avx512/mul.h +279 -0
  1424. casadi/include/simde/x86/avx512/mulhi.h +65 -0
  1425. casadi/include/simde/x86/avx512/mulhrs.h +65 -0
  1426. casadi/include/simde/x86/avx512/mullo.h +117 -0
  1427. casadi/include/simde/x86/avx512/negate.h +88 -0
  1428. casadi/include/simde/x86/avx512/or.h +252 -0
  1429. casadi/include/simde/x86/avx512/packs.h +122 -0
  1430. casadi/include/simde/x86/avx512/packus.h +122 -0
  1431. casadi/include/simde/x86/avx512/permutex2var.h +1645 -0
  1432. casadi/include/simde/x86/avx512/permutexvar.h +1180 -0
  1433. casadi/include/simde/x86/avx512/sad.h +77 -0
  1434. casadi/include/simde/x86/avx512/set.h +477 -0
  1435. casadi/include/simde/x86/avx512/set1.h +331 -0
  1436. casadi/include/simde/x86/avx512/set4.h +140 -0
  1437. casadi/include/simde/x86/avx512/setone.h +66 -0
  1438. casadi/include/simde/x86/avx512/setr.h +144 -0
  1439. casadi/include/simde/x86/avx512/setr4.h +140 -0
  1440. casadi/include/simde/x86/avx512/setzero.h +90 -0
  1441. casadi/include/simde/x86/avx512/shuffle.h +176 -0
  1442. casadi/include/simde/x86/avx512/sll.h +247 -0
  1443. casadi/include/simde/x86/avx512/slli.h +179 -0
  1444. casadi/include/simde/x86/avx512/sllv.h +68 -0
  1445. casadi/include/simde/x86/avx512/sqrt.h +127 -0
  1446. casadi/include/simde/x86/avx512/sra.h +81 -0
  1447. casadi/include/simde/x86/avx512/srai.h +70 -0
  1448. casadi/include/simde/x86/avx512/srav.h +67 -0
  1449. casadi/include/simde/x86/avx512/srl.h +216 -0
  1450. casadi/include/simde/x86/avx512/srli.h +180 -0
  1451. casadi/include/simde/x86/avx512/srlv.h +282 -0
  1452. casadi/include/simde/x86/avx512/store.h +93 -0
  1453. casadi/include/simde/x86/avx512/storeu.h +93 -0
  1454. casadi/include/simde/x86/avx512/sub.h +351 -0
  1455. casadi/include/simde/x86/avx512/subs.h +222 -0
  1456. casadi/include/simde/x86/avx512/test.h +193 -0
  1457. casadi/include/simde/x86/avx512/types.h +380 -0
  1458. casadi/include/simde/x86/avx512/unpackhi.h +380 -0
  1459. casadi/include/simde/x86/avx512/unpacklo.h +104 -0
  1460. casadi/include/simde/x86/avx512/xor.h +263 -0
  1461. casadi/include/simde/x86/avx512/xorsign.h +72 -0
  1462. casadi/include/simde/x86/avx512.h +108 -0
  1463. casadi/include/simde/x86/clmul.h +414 -0
  1464. casadi/include/simde/x86/fma.h +724 -0
  1465. casadi/include/simde/x86/gfni.h +802 -0
  1466. casadi/include/simde/x86/mmx.h +2399 -0
  1467. casadi/include/simde/x86/sse.h +4471 -0
  1468. casadi/include/simde/x86/sse2.h +7389 -0
  1469. casadi/include/simde/x86/sse3.h +499 -0
  1470. casadi/include/simde/x86/sse4.1.h +2216 -0
  1471. casadi/include/simde/x86/sse4.2.h +347 -0
  1472. casadi/include/simde/x86/ssse3.h +1032 -0
  1473. casadi/include/simde/x86/svml.h +12139 -0
  1474. casadi/include/simde/x86/xop.h +3644 -0
  1475. casadi/include/superscs/cones.h +185 -0
  1476. casadi/include/superscs/constants.h +144 -0
  1477. casadi/include/superscs/cs.h +109 -0
  1478. casadi/include/superscs/ctrlc.h +77 -0
  1479. casadi/include/superscs/directions.h +125 -0
  1480. casadi/include/superscs/glbopts.h +240 -0
  1481. casadi/include/superscs/linAlg.h +437 -0
  1482. casadi/include/superscs/linSys.h +205 -0
  1483. casadi/include/superscs/linsys/amatrix.h +77 -0
  1484. casadi/include/superscs/linsys/common.h +49 -0
  1485. casadi/include/superscs/normalize.h +138 -0
  1486. casadi/include/superscs/scs.h +656 -0
  1487. casadi/include/superscs/scs_blas.h +79 -0
  1488. casadi/include/superscs/scs_parser.h +187 -0
  1489. casadi/include/superscs/unit_test_util.h +210 -0
  1490. casadi/include/superscs/util.h +354 -0
  1491. casadi/include/trlib/trlib_eigen_inverse.h +118 -0
  1492. casadi/include/trlib/trlib_krylov.h +493 -0
  1493. casadi/include/trlib/trlib_leftmost.h +181 -0
  1494. casadi/include/trlib/trlib_private.h +109 -0
  1495. casadi/include/trlib/trlib_quadratic_zero.h +57 -0
  1496. casadi/include/trlib/trlib_tri_factor.h +409 -0
  1497. casadi/include/trlib/trlib_types.h +36 -0
  1498. casadi/libCbc.la +35 -0
  1499. casadi/libCbc.so +0 -0
  1500. casadi/libCbc.so.3 +0 -0
  1501. casadi/libCbc.so.3.10.11 +0 -0
  1502. casadi/libCbcSolver.la +35 -0
  1503. casadi/libCbcSolver.so +0 -0
  1504. casadi/libCbcSolver.so.3 +0 -0
  1505. casadi/libCbcSolver.so.3.10.11 +0 -0
  1506. casadi/libCgl.la +35 -0
  1507. casadi/libCgl.so +0 -0
  1508. casadi/libCgl.so.1 +0 -0
  1509. casadi/libCgl.so.1.10.8 +0 -0
  1510. casadi/libClp.la +35 -0
  1511. casadi/libClp.so +0 -0
  1512. casadi/libClp.so.1 +0 -0
  1513. casadi/libClp.so.1.14.9 +0 -0
  1514. casadi/libClpSolver.la +35 -0
  1515. casadi/libClpSolver.so +0 -0
  1516. casadi/libClpSolver.so.1 +0 -0
  1517. casadi/libClpSolver.so.1.14.9 +0 -0
  1518. casadi/libCoinUtils.la +35 -0
  1519. casadi/libCoinUtils.so +0 -0
  1520. casadi/libCoinUtils.so.3 +0 -0
  1521. casadi/libCoinUtils.so.3.11.10 +0 -0
  1522. casadi/libOsi.la +35 -0
  1523. casadi/libOsi.so +0 -0
  1524. casadi/libOsi.so.1 +0 -0
  1525. casadi/libOsi.so.1.13.9 +0 -0
  1526. casadi/libOsiCbc.la +35 -0
  1527. casadi/libOsiCbc.so +0 -0
  1528. casadi/libOsiCbc.so.3 +0 -0
  1529. casadi/libOsiCbc.so.3.10.11 +0 -0
  1530. casadi/libOsiClp.la +35 -0
  1531. casadi/libOsiClp.so +0 -0
  1532. casadi/libOsiClp.so.1 +0 -0
  1533. casadi/libOsiClp.so.1.14.9 +0 -0
  1534. casadi/libOsiCommonTests.la +35 -0
  1535. casadi/libOsiCommonTests.so +0 -0
  1536. casadi/libOsiCommonTests.so.1 +0 -0
  1537. casadi/libOsiCommonTests.so.1.13.9 +0 -0
  1538. casadi/libalpaqa-dl-loader.so +0 -0
  1539. casadi/libalpaqa-dl-loader.so.1.0.0 +0 -0
  1540. casadi/libalpaqa.so +0 -0
  1541. casadi/libalpaqa.so.1.0.0 +0 -0
  1542. casadi/libbonmin.la +35 -0
  1543. casadi/libbonmin.so +0 -0
  1544. casadi/libbonmin.so.4 +0 -0
  1545. casadi/libbonmin.so.4.8.9 +0 -0
  1546. casadi/libcasadi-tp-openblas.so +0 -0
  1547. casadi/libcasadi-tp-openblas.so.0 +0 -0
  1548. casadi/libcasadi-tp-openblas.so.0.3 +0 -0
  1549. casadi/libcasadi.so +0 -0
  1550. casadi/libcasadi.so.3.7 +0 -0
  1551. casadi/libcasadi_archiver_libzip.so +0 -0
  1552. casadi/libcasadi_archiver_libzip.so.3.7 +0 -0
  1553. casadi/libcasadi_conic_cbc.so +0 -0
  1554. casadi/libcasadi_conic_cbc.so.3.7 +0 -0
  1555. casadi/libcasadi_conic_clp.so +0 -0
  1556. casadi/libcasadi_conic_clp.so.3.7 +0 -0
  1557. casadi/libcasadi_conic_daqp.so +0 -0
  1558. casadi/libcasadi_conic_daqp.so.3.7 +0 -0
  1559. casadi/libcasadi_conic_gurobi.so +0 -0
  1560. casadi/libcasadi_conic_gurobi.so.3.7 +0 -0
  1561. casadi/libcasadi_conic_highs.so +0 -0
  1562. casadi/libcasadi_conic_highs.so.3.7 +0 -0
  1563. casadi/libcasadi_conic_ipqp.so +0 -0
  1564. casadi/libcasadi_conic_ipqp.so.3.7 +0 -0
  1565. casadi/libcasadi_conic_nlpsol.so +0 -0
  1566. casadi/libcasadi_conic_nlpsol.so.3.7 +0 -0
  1567. casadi/libcasadi_conic_osqp.so +0 -0
  1568. casadi/libcasadi_conic_osqp.so.3.7 +0 -0
  1569. casadi/libcasadi_conic_proxqp.so +0 -0
  1570. casadi/libcasadi_conic_proxqp.so.3.7 +0 -0
  1571. casadi/libcasadi_conic_qpoases.so +0 -0
  1572. casadi/libcasadi_conic_qpoases.so.3.7 +0 -0
  1573. casadi/libcasadi_conic_qrqp.so +0 -0
  1574. casadi/libcasadi_conic_qrqp.so.3.7 +0 -0
  1575. casadi/libcasadi_conic_superscs.so +0 -0
  1576. casadi/libcasadi_conic_superscs.so.3.7 +0 -0
  1577. casadi/libcasadi_filesystem_ghc.so +0 -0
  1578. casadi/libcasadi_filesystem_ghc.so.3.7 +0 -0
  1579. casadi/libcasadi_importer_shell.so +0 -0
  1580. casadi/libcasadi_importer_shell.so.3.7 +0 -0
  1581. casadi/libcasadi_integrator_collocation.so +0 -0
  1582. casadi/libcasadi_integrator_collocation.so.3.7 +0 -0
  1583. casadi/libcasadi_integrator_cvodes.so +0 -0
  1584. casadi/libcasadi_integrator_cvodes.so.3.7 +0 -0
  1585. casadi/libcasadi_integrator_idas.so +0 -0
  1586. casadi/libcasadi_integrator_idas.so.3.7 +0 -0
  1587. casadi/libcasadi_integrator_rk.so +0 -0
  1588. casadi/libcasadi_integrator_rk.so.3.7 +0 -0
  1589. casadi/libcasadi_interpolant_bspline.so +0 -0
  1590. casadi/libcasadi_interpolant_bspline.so.3.7 +0 -0
  1591. casadi/libcasadi_interpolant_linear.so +0 -0
  1592. casadi/libcasadi_interpolant_linear.so.3.7 +0 -0
  1593. casadi/libcasadi_linsol_csparse.so +0 -0
  1594. casadi/libcasadi_linsol_csparse.so.3.7 +0 -0
  1595. casadi/libcasadi_linsol_csparsecholesky.so +0 -0
  1596. casadi/libcasadi_linsol_csparsecholesky.so.3.7 +0 -0
  1597. casadi/libcasadi_linsol_lapacklu.so +0 -0
  1598. casadi/libcasadi_linsol_lapacklu.so.3.7 +0 -0
  1599. casadi/libcasadi_linsol_lapackqr.so +0 -0
  1600. casadi/libcasadi_linsol_lapackqr.so.3.7 +0 -0
  1601. casadi/libcasadi_linsol_ldl.so +0 -0
  1602. casadi/libcasadi_linsol_ldl.so.3.7 +0 -0
  1603. casadi/libcasadi_linsol_lsqr.so +0 -0
  1604. casadi/libcasadi_linsol_lsqr.so.3.7 +0 -0
  1605. casadi/libcasadi_linsol_ma27.so +0 -0
  1606. casadi/libcasadi_linsol_ma27.so.3.7 +0 -0
  1607. casadi/libcasadi_linsol_mumps.so +0 -0
  1608. casadi/libcasadi_linsol_mumps.so.3.7 +0 -0
  1609. casadi/libcasadi_linsol_qr.so +0 -0
  1610. casadi/libcasadi_linsol_qr.so.3.7 +0 -0
  1611. casadi/libcasadi_linsol_symbolicqr.so +0 -0
  1612. casadi/libcasadi_linsol_symbolicqr.so.3.7 +0 -0
  1613. casadi/libcasadi_linsol_tridiag.so +0 -0
  1614. casadi/libcasadi_linsol_tridiag.so.3.7 +0 -0
  1615. casadi/libcasadi_nlpsol_alpaqa.so +0 -0
  1616. casadi/libcasadi_nlpsol_alpaqa.so.3.7 +0 -0
  1617. casadi/libcasadi_nlpsol_ampl.so +0 -0
  1618. casadi/libcasadi_nlpsol_ampl.so.3.7 +0 -0
  1619. casadi/libcasadi_nlpsol_blocksqp.so +0 -0
  1620. casadi/libcasadi_nlpsol_blocksqp.so.3.7 +0 -0
  1621. casadi/libcasadi_nlpsol_bonmin.so +0 -0
  1622. casadi/libcasadi_nlpsol_bonmin.so.3.7 +0 -0
  1623. casadi/libcasadi_nlpsol_feasiblesqpmethod.so +0 -0
  1624. casadi/libcasadi_nlpsol_feasiblesqpmethod.so.3.7 +0 -0
  1625. casadi/libcasadi_nlpsol_ipopt.so +0 -0
  1626. casadi/libcasadi_nlpsol_ipopt.so.3.7 +0 -0
  1627. casadi/libcasadi_nlpsol_knitro.so +0 -0
  1628. casadi/libcasadi_nlpsol_knitro.so.3.7 +0 -0
  1629. casadi/libcasadi_nlpsol_madnlp.so +0 -0
  1630. casadi/libcasadi_nlpsol_madnlp.so.3.7 +0 -0
  1631. casadi/libcasadi_nlpsol_qrsqp.so +0 -0
  1632. casadi/libcasadi_nlpsol_qrsqp.so.3.7 +0 -0
  1633. casadi/libcasadi_nlpsol_scpgen.so +0 -0
  1634. casadi/libcasadi_nlpsol_scpgen.so.3.7 +0 -0
  1635. casadi/libcasadi_nlpsol_sleqp.so +0 -0
  1636. casadi/libcasadi_nlpsol_sleqp.so.3.7 +0 -0
  1637. casadi/libcasadi_nlpsol_snopt.so +0 -0
  1638. casadi/libcasadi_nlpsol_snopt.so.3.7 +0 -0
  1639. casadi/libcasadi_nlpsol_sqpmethod.so +0 -0
  1640. casadi/libcasadi_nlpsol_sqpmethod.so.3.7 +0 -0
  1641. casadi/libcasadi_nlpsol_worhp.so +0 -0
  1642. casadi/libcasadi_nlpsol_worhp.so.3.7 +0 -0
  1643. casadi/libcasadi_rootfinder_fast_newton.so +0 -0
  1644. casadi/libcasadi_rootfinder_fast_newton.so.3.7 +0 -0
  1645. casadi/libcasadi_rootfinder_kinsol.so +0 -0
  1646. casadi/libcasadi_rootfinder_kinsol.so.3.7 +0 -0
  1647. casadi/libcasadi_rootfinder_newton.so +0 -0
  1648. casadi/libcasadi_rootfinder_newton.so.3.7 +0 -0
  1649. casadi/libcasadi_rootfinder_nlpsol.so +0 -0
  1650. casadi/libcasadi_rootfinder_nlpsol.so.3.7 +0 -0
  1651. casadi/libcasadi_sundials_common.so +0 -0
  1652. casadi/libcasadi_sundials_common.so.3.7 +0 -0
  1653. casadi/libcasadi_xmlfile_tinyxml.so +0 -0
  1654. casadi/libcasadi_xmlfile_tinyxml.so.3.7 +0 -0
  1655. casadi/libcoinmetis.la +41 -0
  1656. casadi/libcoinmetis.so +0 -0
  1657. casadi/libcoinmetis.so.2 +0 -0
  1658. casadi/libcoinmetis.so.2.0.0 +0 -0
  1659. casadi/libcoinmumps.la +41 -0
  1660. casadi/libcoinmumps.so +0 -0
  1661. casadi/libcoinmumps.so.3 +0 -0
  1662. casadi/libcoinmumps.so.3.0.1 +0 -0
  1663. casadi/libdaqp.so +0 -0
  1664. casadi/libdaqpstat.a +0 -0
  1665. casadi/libgfortran-a8535147.so.5.0.0 +0 -0
  1666. casadi/libgurobi_adaptor.so +0 -0
  1667. casadi/libhighs.so +0 -0
  1668. casadi/libhighs.so.1 +0 -0
  1669. casadi/libhighs.so.1.10.0 +0 -0
  1670. casadi/libindirect.a +0 -0
  1671. casadi/libipopt.la +41 -0
  1672. casadi/libipopt.so +0 -0
  1673. casadi/libipopt.so.3 +0 -0
  1674. casadi/libipopt.so.3.14.11 +0 -0
  1675. casadi/liblinsys.a +0 -0
  1676. casadi/libmatlab_ipc.so +0 -0
  1677. casadi/libosqp.a +0 -0
  1678. casadi/libosqp.so +0 -0
  1679. casadi/libqdldl.a +0 -0
  1680. casadi/libqdldl.so +0 -0
  1681. casadi/libquadmath-e2ac3af2.so.0.0.0 +0 -0
  1682. casadi/libsipopt.la +41 -0
  1683. casadi/libsipopt.so +0 -0
  1684. casadi/libsipopt.so.3 +0 -0
  1685. casadi/libsipopt.so.3.14.11 +0 -0
  1686. casadi/libsleqp.so +0 -0
  1687. casadi/libsleqp.so.1.0.1 +0 -0
  1688. casadi/libspral.a +0 -0
  1689. casadi/libsuperscs.a +0 -0
  1690. casadi/libtrlib.so +0 -0
  1691. casadi/libtrlib.so.0.4 +0 -0
  1692. casadi/libz.a +0 -0
  1693. casadi/libz.so +0 -0
  1694. casadi/libz.so.1 +0 -0
  1695. casadi/libz.so.1.3.1 +0 -0
  1696. casadi/libzip.a +0 -0
  1697. casadi/pkgconfig/blas.pc +11 -0
  1698. casadi/pkgconfig/bonmin.pc +12 -0
  1699. casadi/pkgconfig/casadi.pc +12 -0
  1700. casadi/pkgconfig/cbc.pc +12 -0
  1701. casadi/pkgconfig/cgl.pc +12 -0
  1702. casadi/pkgconfig/clp.pc +12 -0
  1703. casadi/pkgconfig/coinmetis.pc +13 -0
  1704. casadi/pkgconfig/coinmumps.pc +15 -0
  1705. casadi/pkgconfig/coinutils.pc +12 -0
  1706. casadi/pkgconfig/highs.pc +12 -0
  1707. casadi/pkgconfig/ipopt.pc +15 -0
  1708. casadi/pkgconfig/lapack.pc +11 -0
  1709. casadi/pkgconfig/libzip.pc +14 -0
  1710. casadi/pkgconfig/openblas.pc +11 -0
  1711. casadi/pkgconfig/osi-cbc.pc +12 -0
  1712. casadi/pkgconfig/osi-clp.pc +12 -0
  1713. casadi/pkgconfig/osi-unittests.pc +12 -0
  1714. casadi/pkgconfig/osi.pc +12 -0
  1715. casadi/pkgconfig/proxsuite.pc +22 -0
  1716. casadi/pkgconfig/sleqp.pc +10 -0
  1717. casadi/tools/__init__.py +54 -0
  1718. casadi/tools/bounds.py +107 -0
  1719. casadi/tools/graph/__init__.py +35 -0
  1720. casadi/tools/graph/graph.py +747 -0
  1721. casadi/tools/in_out.py +89 -0
  1722. casadi/tools/structure3.py +1441 -0
  1723. casadi-3.7.2.dist-info/METADATA +45 -0
  1724. casadi-3.7.2.dist-info/RECORD +1726 -0
  1725. casadi-3.7.2.dist-info/WHEEL +4 -0
  1726. dummy.txt +1 -0
@@ -0,0 +1,4471 @@
1
+ /* SPDX-License-Identifier: MIT
2
+ *
3
+ * Permission is hereby granted, free of charge, to any person
4
+ * obtaining a copy of this software and associated documentation
5
+ * files (the "Software"), to deal in the Software without
6
+ * restriction, including without limitation the rights to use, copy,
7
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
8
+ * of the Software, and to permit persons to whom the Software is
9
+ * furnished to do so, subject to the following conditions:
10
+ *
11
+ * The above copyright notice and this permission notice shall be
12
+ * included in all copies or substantial portions of the Software.
13
+ *
14
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
18
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
19
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ * SOFTWARE.
22
+ *
23
+ * Copyright:
24
+ * 2017-2020 Evan Nemerson <evan@nemerson.com>
25
+ * 2015-2017 John W. Ratcliff <jratcliffscarab@gmail.com>
26
+ * 2015 Brandon Rowlett <browlett@nvidia.com>
27
+ * 2015 Ken Fast <kfast@gdeb.com>
28
+ */
29
+
30
+ #if !defined(SIMDE_X86_SSE_H)
31
+ #define SIMDE_X86_SSE_H
32
+
33
+ #include "mmx.h"
34
+
35
+ #if defined(_WIN32)
36
+ #include <windows.h>
37
+ #endif
38
+
39
+ HEDLEY_DIAGNOSTIC_PUSH
40
+ SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
41
+ SIMDE_BEGIN_DECLS_
42
+
43
+ typedef union {
44
+ #if defined(SIMDE_VECTOR_SUBSCRIPT)
45
+ SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
46
+ SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
47
+ SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
48
+ SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
49
+ SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
50
+ SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
51
+ SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
52
+ SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
53
+ #if defined(SIMDE_HAVE_INT128_)
54
+ SIMDE_ALIGN_TO_16 simde_int128 i128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
55
+ SIMDE_ALIGN_TO_16 simde_uint128 u128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
56
+ #endif
57
+ SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
58
+ SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
59
+ SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
60
+ #else
61
+ SIMDE_ALIGN_TO_16 int8_t i8[16];
62
+ SIMDE_ALIGN_TO_16 int16_t i16[8];
63
+ SIMDE_ALIGN_TO_16 int32_t i32[4];
64
+ SIMDE_ALIGN_TO_16 int64_t i64[2];
65
+ SIMDE_ALIGN_TO_16 uint8_t u8[16];
66
+ SIMDE_ALIGN_TO_16 uint16_t u16[8];
67
+ SIMDE_ALIGN_TO_16 uint32_t u32[4];
68
+ SIMDE_ALIGN_TO_16 uint64_t u64[2];
69
+ #if defined(SIMDE_HAVE_INT128_)
70
+ SIMDE_ALIGN_TO_16 simde_int128 i128[1];
71
+ SIMDE_ALIGN_TO_16 simde_uint128 u128[1];
72
+ #endif
73
+ SIMDE_ALIGN_TO_16 simde_float32 f32[4];
74
+ SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)];
75
+ SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)];
76
+ #endif
77
+
78
+ SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2];
79
+ SIMDE_ALIGN_TO_16 simde__m64 m64[2];
80
+
81
+ #if defined(SIMDE_X86_SSE_NATIVE)
82
+ SIMDE_ALIGN_TO_16 __m128 n;
83
+ #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
84
+ SIMDE_ALIGN_TO_16 int8x16_t neon_i8;
85
+ SIMDE_ALIGN_TO_16 int16x8_t neon_i16;
86
+ SIMDE_ALIGN_TO_16 int32x4_t neon_i32;
87
+ SIMDE_ALIGN_TO_16 int64x2_t neon_i64;
88
+ SIMDE_ALIGN_TO_16 uint8x16_t neon_u8;
89
+ SIMDE_ALIGN_TO_16 uint16x8_t neon_u16;
90
+ SIMDE_ALIGN_TO_16 uint32x4_t neon_u32;
91
+ SIMDE_ALIGN_TO_16 uint64x2_t neon_u64;
92
+ SIMDE_ALIGN_TO_16 float32x4_t neon_f32;
93
+ #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
94
+ SIMDE_ALIGN_TO_16 float64x2_t neon_f64;
95
+ #endif
96
+ #elif defined(SIMDE_WASM_SIMD128_NATIVE)
97
+ SIMDE_ALIGN_TO_16 v128_t wasm_v128;
98
+ #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
99
+ SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8;
100
+ SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16;
101
+ SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32;
102
+ SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8;
103
+ SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16;
104
+ SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32;
105
+ SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32;
106
+ #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
107
+ SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64;
108
+ SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64;
109
+ SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64;
110
+ #endif
111
+ #endif
112
+ } simde__m128_private;
113
+
114
+ #if defined(SIMDE_X86_SSE_NATIVE)
115
+ typedef __m128 simde__m128;
116
+ #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
117
+ typedef float32x4_t simde__m128;
118
+ #elif defined(SIMDE_WASM_SIMD128_NATIVE)
119
+ typedef v128_t simde__m128;
120
+ #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
121
+ typedef SIMDE_POWER_ALTIVEC_VECTOR(float) simde__m128;
122
+ #elif defined(SIMDE_VECTOR_SUBSCRIPT)
123
+ typedef simde_float32 simde__m128 SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
124
+ #else
125
+ typedef simde__m128_private simde__m128;
126
+ #endif
127
+
128
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
129
+ typedef simde__m128 __m128;
130
+ #endif
131
+
132
+ HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128), "simde__m128 size incorrect");
133
+ HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128_private), "simde__m128_private size incorrect");
134
+ #if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF)
135
+ HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128) == 16, "simde__m128 is not 16-byte aligned");
136
+ HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128_private) == 16, "simde__m128_private is not 16-byte aligned");
137
+ #endif
138
+
139
+ SIMDE_FUNCTION_ATTRIBUTES
140
+ simde__m128
141
+ simde__m128_from_private(simde__m128_private v) {
142
+ simde__m128 r;
143
+ simde_memcpy(&r, &v, sizeof(r));
144
+ return r;
145
+ }
146
+
147
+ SIMDE_FUNCTION_ATTRIBUTES
148
+ simde__m128_private
149
+ simde__m128_to_private(simde__m128 v) {
150
+ simde__m128_private r;
151
+ simde_memcpy(&r, &v, sizeof(r));
152
+ return r;
153
+ }
154
+
155
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
156
+ SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int8x16_t, neon, i8)
157
+ SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int16x8_t, neon, i16)
158
+ SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int32x4_t, neon, i32)
159
+ SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int64x2_t, neon, i64)
160
+ SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint8x16_t, neon, u8)
161
+ SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint16x8_t, neon, u16)
162
+ SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint32x4_t, neon, u32)
163
+ SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint64x2_t, neon, u64)
164
+ SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, float32x4_t, neon, f32)
165
+ #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
166
+ SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, float64x2_t, neon, f64)
167
+ #endif
168
+ #endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */
169
+
170
+ #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
171
+ SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed char), altivec, i8)
172
+ SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed short), altivec, i16)
173
+ SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed int), altivec, i32)
174
+ SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), altivec, u8)
175
+ SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), altivec, u16)
176
+ SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), altivec, u32)
177
+
178
+ #if defined(SIMDE_BUG_GCC_95782)
179
+ SIMDE_FUNCTION_ATTRIBUTES
180
+ SIMDE_POWER_ALTIVEC_VECTOR(float)
181
+ simde__m128_to_altivec_f32(simde__m128 value) {
182
+ simde__m128_private r_ = simde__m128_to_private(value);
183
+ return r_.altivec_f32;
184
+ }
185
+
186
+ SIMDE_FUNCTION_ATTRIBUTES
187
+ simde__m128
188
+ simde__m128_from_altivec_f32(SIMDE_POWER_ALTIVEC_VECTOR(float) value) {
189
+ simde__m128_private r_;
190
+ r_.altivec_f32 = value;
191
+ return simde__m128_from_private(r_);
192
+ }
193
+ #else
194
+ SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(float), altivec, f32)
195
+ #endif
196
+
197
+ #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
198
+ SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed long long), altivec, i64)
199
+ SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), altivec, u64)
200
+ #endif
201
+ #elif defined(SIMDE_WASM_SIMD128_NATIVE)
202
+ SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v128_t, wasm, v128);
203
+ #endif /* defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) */
204
+
205
+ enum {
206
+ #if defined(SIMDE_X86_SSE_NATIVE)
207
+ SIMDE_MM_ROUND_NEAREST = _MM_ROUND_NEAREST,
208
+ SIMDE_MM_ROUND_DOWN = _MM_ROUND_DOWN,
209
+ SIMDE_MM_ROUND_UP = _MM_ROUND_UP,
210
+ SIMDE_MM_ROUND_TOWARD_ZERO = _MM_ROUND_TOWARD_ZERO
211
+ #else
212
+ SIMDE_MM_ROUND_NEAREST = 0x0000,
213
+ SIMDE_MM_ROUND_DOWN = 0x2000,
214
+ SIMDE_MM_ROUND_UP = 0x4000,
215
+ SIMDE_MM_ROUND_TOWARD_ZERO = 0x6000
216
+ #endif
217
+ };
218
+
219
+ #if defined(_MM_FROUND_TO_NEAREST_INT)
220
+ # define SIMDE_MM_FROUND_TO_NEAREST_INT _MM_FROUND_TO_NEAREST_INT
221
+ # define SIMDE_MM_FROUND_TO_NEG_INF _MM_FROUND_TO_NEG_INF
222
+ # define SIMDE_MM_FROUND_TO_POS_INF _MM_FROUND_TO_POS_INF
223
+ # define SIMDE_MM_FROUND_TO_ZERO _MM_FROUND_TO_ZERO
224
+ # define SIMDE_MM_FROUND_CUR_DIRECTION _MM_FROUND_CUR_DIRECTION
225
+
226
+ # define SIMDE_MM_FROUND_RAISE_EXC _MM_FROUND_RAISE_EXC
227
+ # define SIMDE_MM_FROUND_NO_EXC _MM_FROUND_NO_EXC
228
+ #else
229
+ # define SIMDE_MM_FROUND_TO_NEAREST_INT 0x00
230
+ # define SIMDE_MM_FROUND_TO_NEG_INF 0x01
231
+ # define SIMDE_MM_FROUND_TO_POS_INF 0x02
232
+ # define SIMDE_MM_FROUND_TO_ZERO 0x03
233
+ # define SIMDE_MM_FROUND_CUR_DIRECTION 0x04
234
+
235
+ # define SIMDE_MM_FROUND_RAISE_EXC 0x00
236
+ # define SIMDE_MM_FROUND_NO_EXC 0x08
237
+ #endif
238
+
239
+ #define SIMDE_MM_FROUND_NINT \
240
+ (SIMDE_MM_FROUND_TO_NEAREST_INT | SIMDE_MM_FROUND_RAISE_EXC)
241
+ #define SIMDE_MM_FROUND_FLOOR \
242
+ (SIMDE_MM_FROUND_TO_NEG_INF | SIMDE_MM_FROUND_RAISE_EXC)
243
+ #define SIMDE_MM_FROUND_CEIL \
244
+ (SIMDE_MM_FROUND_TO_POS_INF | SIMDE_MM_FROUND_RAISE_EXC)
245
+ #define SIMDE_MM_FROUND_TRUNC \
246
+ (SIMDE_MM_FROUND_TO_ZERO | SIMDE_MM_FROUND_RAISE_EXC)
247
+ #define SIMDE_MM_FROUND_RINT \
248
+ (SIMDE_MM_FROUND_CUR_DIRECTION | SIMDE_MM_FROUND_RAISE_EXC)
249
+ #define SIMDE_MM_FROUND_NEARBYINT \
250
+ (SIMDE_MM_FROUND_CUR_DIRECTION | SIMDE_MM_FROUND_NO_EXC)
251
+
252
+ #if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) && !defined(_MM_FROUND_TO_NEAREST_INT)
253
+ # define _MM_FROUND_TO_NEAREST_INT SIMDE_MM_FROUND_TO_NEAREST_INT
254
+ # define _MM_FROUND_TO_NEG_INF SIMDE_MM_FROUND_TO_NEG_INF
255
+ # define _MM_FROUND_TO_POS_INF SIMDE_MM_FROUND_TO_POS_INF
256
+ # define _MM_FROUND_TO_ZERO SIMDE_MM_FROUND_TO_ZERO
257
+ # define _MM_FROUND_CUR_DIRECTION SIMDE_MM_FROUND_CUR_DIRECTION
258
+ # define _MM_FROUND_RAISE_EXC SIMDE_MM_FROUND_RAISE_EXC
259
+ # define _MM_FROUND_NINT SIMDE_MM_FROUND_NINT
260
+ # define _MM_FROUND_FLOOR SIMDE_MM_FROUND_FLOOR
261
+ # define _MM_FROUND_CEIL SIMDE_MM_FROUND_CEIL
262
+ # define _MM_FROUND_TRUNC SIMDE_MM_FROUND_TRUNC
263
+ # define _MM_FROUND_RINT SIMDE_MM_FROUND_RINT
264
+ # define _MM_FROUND_NEARBYINT SIMDE_MM_FROUND_NEARBYINT
265
+ #endif
266
+
267
+ #if defined(_MM_EXCEPT_INVALID)
268
+ # define SIMDE_MM_EXCEPT_INVALID _MM_EXCEPT_INVALID
269
+ #else
270
+ # define SIMDE_MM_EXCEPT_INVALID (0x0001)
271
+ #endif
272
+ #if defined(_MM_EXCEPT_DENORM)
273
+ # define SIMDE_MM_EXCEPT_DENORM _MM_EXCEPT_DENORM
274
+ #else
275
+ # define SIMDE_MM_EXCEPT_DENORM (0x0002)
276
+ #endif
277
+ #if defined(_MM_EXCEPT_DIV_ZERO)
278
+ # define SIMDE_MM_EXCEPT_DIV_ZERO _MM_EXCEPT_DIV_ZERO
279
+ #else
280
+ # define SIMDE_MM_EXCEPT_DIV_ZERO (0x0004)
281
+ #endif
282
+ #if defined(_MM_EXCEPT_OVERFLOW)
283
+ # define SIMDE_MM_EXCEPT_OVERFLOW _MM_EXCEPT_OVERFLOW
284
+ #else
285
+ # define SIMDE_MM_EXCEPT_OVERFLOW (0x0008)
286
+ #endif
287
+ #if defined(_MM_EXCEPT_UNDERFLOW)
288
+ # define SIMDE_MM_EXCEPT_UNDERFLOW _MM_EXCEPT_UNDERFLOW
289
+ #else
290
+ # define SIMDE_MM_EXCEPT_UNDERFLOW (0x0010)
291
+ #endif
292
+ #if defined(_MM_EXCEPT_INEXACT)
293
+ # define SIMDE_MM_EXCEPT_INEXACT _MM_EXCEPT_INEXACT
294
+ #else
295
+ # define SIMDE_MM_EXCEPT_INEXACT (0x0020)
296
+ #endif
297
+ #if defined(_MM_EXCEPT_MASK)
298
+ # define SIMDE_MM_EXCEPT_MASK _MM_EXCEPT_MASK
299
+ #else
300
+ # define SIMDE_MM_EXCEPT_MASK \
301
+ (SIMDE_MM_EXCEPT_INVALID | SIMDE_MM_EXCEPT_DENORM | \
302
+ SIMDE_MM_EXCEPT_DIV_ZERO | SIMDE_MM_EXCEPT_OVERFLOW | \
303
+ SIMDE_MM_EXCEPT_UNDERFLOW | SIMDE_MM_EXCEPT_INEXACT)
304
+ #endif
305
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
306
+ #define _MM_EXCEPT_INVALID SIMDE_MM_EXCEPT_INVALID
307
+ #define _MM_EXCEPT_DENORM SIMDE_MM_EXCEPT_DENORM
308
+ #define _MM_EXCEPT_DIV_ZERO SIMDE_MM_EXCEPT_DIV_ZERO
309
+ #define _MM_EXCEPT_OVERFLOW SIMDE_MM_EXCEPT_OVERFLOW
310
+ #define _MM_EXCEPT_UNDERFLOW SIMDE_MM_EXCEPT_UNDERFLOW
311
+ #define _MM_EXCEPT_INEXACT SIMDE_MM_EXCEPT_INEXACT
312
+ #define _MM_EXCEPT_MASK SIMDE_MM_EXCEPT_MASK
313
+ #endif
314
+
315
+ #if defined(_MM_MASK_INVALID)
316
+ # define SIMDE_MM_MASK_INVALID _MM_MASK_INVALID
317
+ #else
318
+ # define SIMDE_MM_MASK_INVALID (0x0080)
319
+ #endif
320
+ #if defined(_MM_MASK_DENORM)
321
+ # define SIMDE_MM_MASK_DENORM _MM_MASK_DENORM
322
+ #else
323
+ # define SIMDE_MM_MASK_DENORM (0x0100)
324
+ #endif
325
+ #if defined(_MM_MASK_DIV_ZERO)
326
+ # define SIMDE_MM_MASK_DIV_ZERO _MM_MASK_DIV_ZERO
327
+ #else
328
+ # define SIMDE_MM_MASK_DIV_ZERO (0x0200)
329
+ #endif
330
+ #if defined(_MM_MASK_OVERFLOW)
331
+ # define SIMDE_MM_MASK_OVERFLOW _MM_MASK_OVERFLOW
332
+ #else
333
+ # define SIMDE_MM_MASK_OVERFLOW (0x0400)
334
+ #endif
335
+ #if defined(_MM_MASK_UNDERFLOW)
336
+ # define SIMDE_MM_MASK_UNDERFLOW _MM_MASK_UNDERFLOW
337
+ #else
338
+ # define SIMDE_MM_MASK_UNDERFLOW (0x0800)
339
+ #endif
340
+ #if defined(_MM_MASK_INEXACT)
341
+ # define SIMDE_MM_MASK_INEXACT _MM_MASK_INEXACT
342
+ #else
343
+ # define SIMDE_MM_MASK_INEXACT (0x1000)
344
+ #endif
345
+ #if defined(_MM_MASK_MASK)
346
+ # define SIMDE_MM_MASK_MASK _MM_MASK_MASK
347
+ #else
348
+ # define SIMDE_MM_MASK_MASK \
349
+ (SIMDE_MM_MASK_INVALID | SIMDE_MM_MASK_DENORM | \
350
+ SIMDE_MM_MASK_DIV_ZERO | SIMDE_MM_MASK_OVERFLOW | \
351
+ SIMDE_MM_MASK_UNDERFLOW | SIMDE_MM_MASK_INEXACT)
352
+ #endif
353
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
354
+ #define _MM_MASK_INVALID SIMDE_MM_MASK_INVALID
355
+ #define _MM_MASK_DENORM SIMDE_MM_MASK_DENORM
356
+ #define _MM_MASK_DIV_ZERO SIMDE_MM_MASK_DIV_ZERO
357
+ #define _MM_MASK_OVERFLOW SIMDE_MM_MASK_OVERFLOW
358
+ #define _MM_MASK_UNDERFLOW SIMDE_MM_MASK_UNDERFLOW
359
+ #define _MM_MASK_INEXACT SIMDE_MM_MASK_INEXACT
360
+ #define _MM_MASK_MASK SIMDE_MM_MASK_MASK
361
+ #endif
362
+
363
+ #if defined(_MM_FLUSH_ZERO_MASK)
364
+ # define SIMDE_MM_FLUSH_ZERO_MASK _MM_FLUSH_ZERO_MASK
365
+ #else
366
+ # define SIMDE_MM_FLUSH_ZERO_MASK (0x8000)
367
+ #endif
368
+ #if defined(_MM_FLUSH_ZERO_ON)
369
+ # define SIMDE_MM_FLUSH_ZERO_ON _MM_FLUSH_ZERO_ON
370
+ #else
371
+ # define SIMDE_MM_FLUSH_ZERO_ON (0x8000)
372
+ #endif
373
+ #if defined(_MM_FLUSH_ZERO_OFF)
374
+ # define SIMDE_MM_FLUSH_ZERO_OFF _MM_FLUSH_ZERO_OFF
375
+ #else
376
+ # define SIMDE_MM_FLUSH_ZERO_OFF (0x0000)
377
+ #endif
378
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
379
+ #define _MM_FLUSH_ZERO_MASK SIMDE_MM_FLUSH_ZERO_MASK
380
+ #define _MM_FLUSH_ZERO_ON SIMDE_MM_FLUSH_ZERO_ON
381
+ #define _MM_FLUSH_ZERO_OFF SIMDE_MM_FLUSH_ZERO_OFF
382
+ #endif
383
+
384
+ SIMDE_FUNCTION_ATTRIBUTES
385
+ unsigned int
386
+ SIMDE_MM_GET_ROUNDING_MODE(void) {
387
+ #if defined(SIMDE_X86_SSE_NATIVE)
388
+ return _MM_GET_ROUNDING_MODE();
389
+ #elif defined(SIMDE_HAVE_FENV_H)
390
+ unsigned int vfe_mode;
391
+
392
+ switch (fegetround()) {
393
+ #if defined(FE_TONEAREST)
394
+ case FE_TONEAREST:
395
+ vfe_mode = SIMDE_MM_ROUND_NEAREST;
396
+ break;
397
+ #endif
398
+
399
+ #if defined(FE_TOWARDZERO)
400
+ case FE_TOWARDZERO:
401
+ vfe_mode = SIMDE_MM_ROUND_DOWN;
402
+ break;
403
+ #endif
404
+
405
+ #if defined(FE_UPWARD)
406
+ case FE_UPWARD:
407
+ vfe_mode = SIMDE_MM_ROUND_UP;
408
+ break;
409
+ #endif
410
+
411
+ #if defined(FE_DOWNWARD)
412
+ case FE_DOWNWARD:
413
+ vfe_mode = SIMDE_MM_ROUND_TOWARD_ZERO;
414
+ break;
415
+ #endif
416
+
417
+ default:
418
+ vfe_mode = SIMDE_MM_ROUND_NEAREST;
419
+ break;
420
+ }
421
+
422
+ return vfe_mode;
423
+ #else
424
+ return SIMDE_MM_ROUND_NEAREST;
425
+ #endif
426
+ }
427
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
428
+ #define _MM_GET_ROUNDING_MODE() SIMDE_MM_GET_ROUNDING_MODE()
429
+ #endif
430
+
431
+ SIMDE_FUNCTION_ATTRIBUTES
432
+ void
433
+ SIMDE_MM_SET_ROUNDING_MODE(unsigned int a) {
434
+ #if defined(SIMDE_X86_SSE_NATIVE)
435
+ _MM_SET_ROUNDING_MODE(a);
436
+ #elif defined(SIMDE_HAVE_FENV_H)
437
+ int fe_mode = FE_TONEAREST;
438
+
439
+ switch (a) {
440
+ #if defined(FE_TONEAREST)
441
+ case SIMDE_MM_ROUND_NEAREST:
442
+ fe_mode = FE_TONEAREST;
443
+ break;
444
+ #endif
445
+
446
+ #if defined(FE_TOWARDZERO)
447
+ case SIMDE_MM_ROUND_TOWARD_ZERO:
448
+ fe_mode = FE_TOWARDZERO;
449
+ break;
450
+ #endif
451
+
452
+ #if defined(FE_DOWNWARD)
453
+ case SIMDE_MM_ROUND_DOWN:
454
+ fe_mode = FE_DOWNWARD;
455
+ break;
456
+ #endif
457
+
458
+ #if defined(FE_UPWARD)
459
+ case SIMDE_MM_ROUND_UP:
460
+ fe_mode = FE_UPWARD;
461
+ break;
462
+ #endif
463
+
464
+ default:
465
+ return;
466
+ }
467
+
468
+ fesetround(fe_mode);
469
+ #else
470
+ (void) a;
471
+ #endif
472
+ }
473
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
474
+ #define _MM_SET_ROUNDING_MODE(a) SIMDE_MM_SET_ROUNDING_MODE(a)
475
+ #endif
476
+
477
+ SIMDE_FUNCTION_ATTRIBUTES
478
+ uint32_t
479
+ SIMDE_MM_GET_FLUSH_ZERO_MODE (void) {
480
+ #if defined(SIMDE_X86_SSE_NATIVE)
481
+ return _mm_getcsr() & _MM_FLUSH_ZERO_MASK;
482
+ #else
483
+ return SIMDE_MM_FLUSH_ZERO_OFF;
484
+ #endif
485
+ }
486
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
487
+ #define _MM_SET_FLUSH_ZERO_MODE(a) SIMDE_MM_SET_FLUSH_ZERO_MODE(a)
488
+ #endif
489
+
490
+ SIMDE_FUNCTION_ATTRIBUTES
491
+ void
492
+ SIMDE_MM_SET_FLUSH_ZERO_MODE (uint32_t a) {
493
+ #if defined(SIMDE_X86_SSE_NATIVE)
494
+ _MM_SET_FLUSH_ZERO_MODE(a);
495
+ #else
496
+ (void) a;
497
+ #endif
498
+ }
499
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
500
+ #define _MM_SET_FLUSH_ZERO_MODE(a) SIMDE_MM_SET_FLUSH_ZERO_MODE(a)
501
+ #endif
502
+
503
+ SIMDE_FUNCTION_ATTRIBUTES
504
+ uint32_t
505
+ simde_mm_getcsr (void) {
506
+ #if defined(SIMDE_X86_SSE_NATIVE)
507
+ return _mm_getcsr();
508
+ #else
509
+ return SIMDE_MM_GET_ROUNDING_MODE();
510
+ #endif
511
+ }
512
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
513
+ #define _mm_getcsr() simde_mm_getcsr()
514
+ #endif
515
+
516
+ SIMDE_FUNCTION_ATTRIBUTES
517
+ void
518
+ simde_mm_setcsr (uint32_t a) {
519
+ #if defined(SIMDE_X86_SSE_NATIVE)
520
+ _mm_setcsr(a);
521
+ #else
522
+ SIMDE_MM_SET_ROUNDING_MODE(HEDLEY_STATIC_CAST(unsigned int, a));
523
+ #endif
524
+ }
525
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
526
+ #define _mm_setcsr(a) simde_mm_setcsr(a)
527
+ #endif
528
+
529
+ SIMDE_FUNCTION_ATTRIBUTES
530
+ simde__m128
531
+ simde_x_mm_round_ps (simde__m128 a, int rounding, int lax_rounding)
532
+ SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15)
533
+ SIMDE_REQUIRE_CONSTANT_RANGE(lax_rounding, 0, 1) {
534
+ simde__m128_private
535
+ r_,
536
+ a_ = simde__m128_to_private(a);
537
+
538
+ (void) lax_rounding;
539
+
540
+ /* For architectures which lack a current direction SIMD instruction.
541
+ *
542
+ * Note that NEON actually has a current rounding mode instruction,
543
+ * but in ARMv8+ the rounding mode is ignored and nearest is always
544
+ * used, so we treat ARMv7 as having a rounding mode but ARMv8 as
545
+ * not. */
546
+ #if \
547
+ defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || \
548
+ defined(SIMDE_ARM_NEON_A32V8)
549
+ if ((rounding & 7) == SIMDE_MM_FROUND_CUR_DIRECTION)
550
+ rounding = HEDLEY_STATIC_CAST(int, SIMDE_MM_GET_ROUNDING_MODE()) << 13;
551
+ #endif
552
+
553
+ switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) {
554
+ case SIMDE_MM_FROUND_CUR_DIRECTION:
555
+ #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
556
+ r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_round(a_.altivec_f32));
557
+ #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95399)
558
+ r_.neon_f32 = vrndiq_f32(a_.neon_f32);
559
+ #elif defined(simde_math_nearbyintf)
560
+ SIMDE_VECTORIZE
561
+ for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
562
+ r_.f32[i] = simde_math_nearbyintf(a_.f32[i]);
563
+ }
564
+ #else
565
+ HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd());
566
+ #endif
567
+ break;
568
+
569
+ case SIMDE_MM_FROUND_TO_NEAREST_INT:
570
+ #if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
571
+ r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_rint(a_.altivec_f32));
572
+ #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE)
573
+ r_.neon_f32 = vrndnq_f32(a_.neon_f32);
574
+ #elif defined(simde_math_roundevenf)
575
+ SIMDE_VECTORIZE
576
+ for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
577
+ r_.f32[i] = simde_math_roundevenf(a_.f32[i]);
578
+ }
579
+ #else
580
+ HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd());
581
+ #endif
582
+ break;
583
+
584
+ case SIMDE_MM_FROUND_TO_NEG_INF:
585
+ #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
586
+ r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_floor(a_.altivec_f32));
587
+ #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE)
588
+ r_.neon_f32 = vrndmq_f32(a_.neon_f32);
589
+ #elif defined(simde_math_floorf)
590
+ SIMDE_VECTORIZE
591
+ for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
592
+ r_.f32[i] = simde_math_floorf(a_.f32[i]);
593
+ }
594
+ #else
595
+ HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd());
596
+ #endif
597
+ break;
598
+
599
+ case SIMDE_MM_FROUND_TO_POS_INF:
600
+ #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
601
+ r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_ceil(a_.altivec_f32));
602
+ #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE)
603
+ r_.neon_f32 = vrndpq_f32(a_.neon_f32);
604
+ #elif defined(simde_math_ceilf)
605
+ SIMDE_VECTORIZE
606
+ for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
607
+ r_.f32[i] = simde_math_ceilf(a_.f32[i]);
608
+ }
609
+ #else
610
+ HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd());
611
+ #endif
612
+ break;
613
+
614
+ case SIMDE_MM_FROUND_TO_ZERO:
615
+ #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
616
+ r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_trunc(a_.altivec_f32));
617
+ #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE)
618
+ r_.neon_f32 = vrndq_f32(a_.neon_f32);
619
+ #elif defined(simde_math_truncf)
620
+ SIMDE_VECTORIZE
621
+ for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
622
+ r_.f32[i] = simde_math_truncf(a_.f32[i]);
623
+ }
624
+ #else
625
+ HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd());
626
+ #endif
627
+ break;
628
+
629
+ default:
630
+ HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd());
631
+ }
632
+
633
+ return simde__m128_from_private(r_);
634
+ }
635
+ #if defined(SIMDE_X86_SSE4_1_NATIVE)
636
+ #define simde_mm_round_ps(a, rounding) _mm_round_ps((a), (rounding))
637
+ #else
638
+ #define simde_mm_round_ps(a, rounding) simde_x_mm_round_ps((a), (rounding), 0)
639
+ #endif
640
+ #if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES)
641
+ #define _mm_round_ps(a, rounding) simde_mm_round_ps((a), (rounding))
642
+ #endif
643
+
644
+ SIMDE_FUNCTION_ATTRIBUTES
645
+ simde__m128
646
+ simde_mm_set_ps (simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) {
647
+ #if defined(SIMDE_X86_SSE_NATIVE)
648
+ return _mm_set_ps(e3, e2, e1, e0);
649
+ #else
650
+ simde__m128_private r_;
651
+
652
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
653
+ SIMDE_ALIGN_TO_16 simde_float32 data[4] = { e0, e1, e2, e3 };
654
+ r_.neon_f32 = vld1q_f32(data);
655
+ #elif defined(SIMDE_WASM_SIMD128_NATIVE)
656
+ r_.wasm_v128 = wasm_f32x4_make(e0, e1, e2, e3);
657
+ #else
658
+ r_.f32[0] = e0;
659
+ r_.f32[1] = e1;
660
+ r_.f32[2] = e2;
661
+ r_.f32[3] = e3;
662
+ #endif
663
+
664
+ return simde__m128_from_private(r_);
665
+ #endif
666
+ }
667
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
668
+ # define _mm_set_ps(e3, e2, e1, e0) simde_mm_set_ps(e3, e2, e1, e0)
669
+ #endif
670
+
671
+ SIMDE_FUNCTION_ATTRIBUTES
672
+ simde__m128
673
+ simde_mm_set_ps1 (simde_float32 a) {
674
+ #if defined(SIMDE_X86_SSE_NATIVE)
675
+ return _mm_set_ps1(a);
676
+ #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
677
+ return vdupq_n_f32(a);
678
+ #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
679
+ (void) a;
680
+ return vec_splats(a);
681
+ #else
682
+ return simde_mm_set_ps(a, a, a, a);
683
+ #endif
684
+ }
685
+ #define simde_mm_set1_ps(a) simde_mm_set_ps1(a)
686
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
687
+ # define _mm_set_ps1(a) simde_mm_set_ps1(a)
688
+ # define _mm_set1_ps(a) simde_mm_set1_ps(a)
689
+ #endif
690
+
691
+ SIMDE_FUNCTION_ATTRIBUTES
692
+ simde__m128
693
+ simde_mm_move_ss (simde__m128 a, simde__m128 b) {
694
+ #if defined(SIMDE_X86_SSE_NATIVE)
695
+ return _mm_move_ss(a, b);
696
+ #else
697
+ simde__m128_private
698
+ r_,
699
+ a_ = simde__m128_to_private(a),
700
+ b_ = simde__m128_to_private(b);
701
+
702
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
703
+ r_.neon_f32 = vsetq_lane_f32(vgetq_lane_f32(b_.neon_f32, 0), a_.neon_f32, 0);
704
+ #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
705
+ SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) m = {
706
+ 16, 17, 18, 19,
707
+ 4, 5, 6, 7,
708
+ 8, 9, 10, 11,
709
+ 12, 13, 14, 15
710
+ };
711
+ r_.altivec_f32 = vec_perm(a_.altivec_f32, b_.altivec_f32, m);
712
+ #elif defined(SIMDE_WASM_SIMD128_NATIVE)
713
+ r_.wasm_v128 = wasm_v8x16_shuffle(b_.wasm_v128, a_.wasm_v128, 0, 1, 2, 3, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
714
+ #elif defined(SIMDE_SHUFFLE_VECTOR_)
715
+ r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 4, 1, 2, 3);
716
+ #else
717
+ r_.f32[0] = b_.f32[0];
718
+ r_.f32[1] = a_.f32[1];
719
+ r_.f32[2] = a_.f32[2];
720
+ r_.f32[3] = a_.f32[3];
721
+ #endif
722
+
723
+ return simde__m128_from_private(r_);
724
+ #endif
725
+ }
726
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
727
+ # define _mm_move_ss(a, b) simde_mm_move_ss((a), (b))
728
+ #endif
729
+
730
+ SIMDE_FUNCTION_ATTRIBUTES
731
+ simde__m128
732
+ simde_mm_add_ps (simde__m128 a, simde__m128 b) {
733
+ #if defined(SIMDE_X86_SSE_NATIVE)
734
+ return _mm_add_ps(a, b);
735
+ #else
736
+ simde__m128_private
737
+ r_,
738
+ a_ = simde__m128_to_private(a),
739
+ b_ = simde__m128_to_private(b);
740
+
741
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
742
+ r_.neon_f32 = vaddq_f32(a_.neon_f32, b_.neon_f32);
743
+ #elif defined(SIMDE_WASM_SIMD128_NATIVE)
744
+ r_.wasm_v128 = wasm_f32x4_add(a_.wasm_v128, b_.wasm_v128);
745
+ #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
746
+ r_.altivec_f32 = vec_add(a_.altivec_f32, b_.altivec_f32);
747
+ #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
748
+ r_.f32 = a_.f32 + b_.f32;
749
+ #else
750
+ SIMDE_VECTORIZE
751
+ for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
752
+ r_.f32[i] = a_.f32[i] + b_.f32[i];
753
+ }
754
+ #endif
755
+
756
+ return simde__m128_from_private(r_);
757
+ #endif
758
+ }
759
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
760
+ # define _mm_add_ps(a, b) simde_mm_add_ps((a), (b))
761
+ #endif
762
+
763
+ SIMDE_FUNCTION_ATTRIBUTES
764
+ simde__m128
765
+ simde_mm_add_ss (simde__m128 a, simde__m128 b) {
766
+ #if defined(SIMDE_X86_SSE_NATIVE)
767
+ return _mm_add_ss(a, b);
768
+ #elif (SIMDE_NATURAL_VECTOR_SIZE > 0)
769
+ return simde_mm_move_ss(a, simde_mm_add_ps(a, b));
770
+ #else
771
+ simde__m128_private
772
+ r_,
773
+ a_ = simde__m128_to_private(a),
774
+ b_ = simde__m128_to_private(b);
775
+
776
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
777
+ float32_t b0 = vgetq_lane_f32(b_.neon_f32, 0);
778
+ float32x4_t value = vsetq_lane_f32(b0, vdupq_n_f32(0), 0);
779
+ // the upper values in the result must be the remnants of <a>.
780
+ r_.neon_f32 = vaddq_f32(a_.neon_f32, value);
781
+ #else
782
+ r_.f32[0] = a_.f32[0] + b_.f32[0];
783
+ r_.f32[1] = a_.f32[1];
784
+ r_.f32[2] = a_.f32[2];
785
+ r_.f32[3] = a_.f32[3];
786
+ #endif
787
+
788
+ return simde__m128_from_private(r_);
789
+ #endif
790
+ }
791
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
792
+ # define _mm_add_ss(a, b) simde_mm_add_ss((a), (b))
793
+ #endif
794
+
795
+ SIMDE_FUNCTION_ATTRIBUTES
796
+ simde__m128
797
+ simde_mm_and_ps (simde__m128 a, simde__m128 b) {
798
+ #if defined(SIMDE_X86_SSE_NATIVE)
799
+ return _mm_and_ps(a, b);
800
+ #else
801
+ simde__m128_private
802
+ r_,
803
+ a_ = simde__m128_to_private(a),
804
+ b_ = simde__m128_to_private(b);
805
+
806
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
807
+ r_.neon_i32 = vandq_s32(a_.neon_i32, b_.neon_i32);
808
+ #elif defined(SIMDE_WASM_SIMD128_NATIVE)
809
+ r_.wasm_v128 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128);
810
+ #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
811
+ r_.i32 = a_.i32 & b_.i32;
812
+ #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
813
+ r_.altivec_f32 = vec_and(a_.altivec_f32, b_.altivec_f32);
814
+ #else
815
+ SIMDE_VECTORIZE
816
+ for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
817
+ r_.i32[i] = a_.i32[i] & b_.i32[i];
818
+ }
819
+ #endif
820
+
821
+ return simde__m128_from_private(r_);
822
+ #endif
823
+ }
824
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
825
+ # define _mm_and_ps(a, b) simde_mm_and_ps((a), (b))
826
+ #endif
827
+
828
+ SIMDE_FUNCTION_ATTRIBUTES
829
+ simde__m128
830
+ simde_mm_andnot_ps (simde__m128 a, simde__m128 b) {
831
+ #if defined(SIMDE_X86_SSE_NATIVE)
832
+ return _mm_andnot_ps(a, b);
833
+ #else
834
+ simde__m128_private
835
+ r_,
836
+ a_ = simde__m128_to_private(a),
837
+ b_ = simde__m128_to_private(b);
838
+
839
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
840
+ r_.neon_i32 = vbicq_s32(b_.neon_i32, a_.neon_i32);
841
+ #elif defined(SIMDE_WASM_SIMD128_NATIVE)
842
+ r_.wasm_v128 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128);
843
+ #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
844
+ r_.altivec_f32 = vec_andc(b_.altivec_f32, a_.altivec_f32);
845
+ #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
846
+ r_.i32 = ~a_.i32 & b_.i32;
847
+ #else
848
+ SIMDE_VECTORIZE
849
+ for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
850
+ r_.i32[i] = ~(a_.i32[i]) & b_.i32[i];
851
+ }
852
+ #endif
853
+
854
+ return simde__m128_from_private(r_);
855
+ #endif
856
+ }
857
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
858
+ # define _mm_andnot_ps(a, b) simde_mm_andnot_ps((a), (b))
859
+ #endif
860
+
861
+ SIMDE_FUNCTION_ATTRIBUTES
862
+ simde__m128
863
+ simde_mm_xor_ps (simde__m128 a, simde__m128 b) {
864
+ #if defined(SIMDE_X86_SSE_NATIVE)
865
+ return _mm_xor_ps(a, b);
866
+ #else
867
+ simde__m128_private
868
+ r_,
869
+ a_ = simde__m128_to_private(a),
870
+ b_ = simde__m128_to_private(b);
871
+
872
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
873
+ r_.neon_i32 = veorq_s32(a_.neon_i32, b_.neon_i32);
874
+ #elif defined(SIMDE_WASM_SIMD128_NATIVE)
875
+ r_.wasm_v128 = wasm_v128_xor(a_.wasm_v128, b_.wasm_v128);
876
+ #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
877
+ r_.altivec_i32 = vec_xor(a_.altivec_i32, b_.altivec_i32);
878
+ #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
879
+ r_.i32f = a_.i32f ^ b_.i32f;
880
+ #else
881
+ SIMDE_VECTORIZE
882
+ for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) {
883
+ r_.u32[i] = a_.u32[i] ^ b_.u32[i];
884
+ }
885
+ #endif
886
+
887
+ return simde__m128_from_private(r_);
888
+ #endif
889
+ }
890
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
891
+ # define _mm_xor_ps(a, b) simde_mm_xor_ps((a), (b))
892
+ #endif
893
+
894
+ SIMDE_FUNCTION_ATTRIBUTES
895
+ simde__m128
896
+ simde_mm_or_ps (simde__m128 a, simde__m128 b) {
897
+ #if defined(SIMDE_X86_SSE_NATIVE)
898
+ return _mm_or_ps(a, b);
899
+ #else
900
+ simde__m128_private
901
+ r_,
902
+ a_ = simde__m128_to_private(a),
903
+ b_ = simde__m128_to_private(b);
904
+
905
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
906
+ r_.neon_i32 = vorrq_s32(a_.neon_i32, b_.neon_i32);
907
+ #elif defined(SIMDE_WASM_SIMD128_NATIVE)
908
+ r_.wasm_v128 = wasm_v128_or(a_.wasm_v128, b_.wasm_v128);
909
+ #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
910
+ r_.altivec_i32 = vec_or(a_.altivec_i32, b_.altivec_i32);
911
+ #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
912
+ r_.i32f = a_.i32f | b_.i32f;
913
+ #else
914
+ SIMDE_VECTORIZE
915
+ for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) {
916
+ r_.u32[i] = a_.u32[i] | b_.u32[i];
917
+ }
918
+ #endif
919
+
920
+ return simde__m128_from_private(r_);
921
+ #endif
922
+ }
923
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
924
+ # define _mm_or_ps(a, b) simde_mm_or_ps((a), (b))
925
+ #endif
926
+
927
+ SIMDE_FUNCTION_ATTRIBUTES
928
+ simde__m128
929
+ simde_x_mm_not_ps(simde__m128 a) {
930
+ #if defined(SIMDE_X86_AVX512VL_NATIVE)
931
+ __m128i ai = _mm_castps_si128(a);
932
+ return _mm_castsi128_ps(_mm_ternarylogic_epi32(ai, ai, ai, 0x55));
933
+ #elif defined(SIMDE_X86_SSE2_NATIVE)
934
+ /* Note: we use ints instead of floats because we don't want cmpeq
935
+ * to return false for (NaN, NaN) */
936
+ __m128i ai = _mm_castps_si128(a);
937
+ return _mm_castsi128_ps(_mm_andnot_si128(ai, _mm_cmpeq_epi32(ai, ai)));
938
+ #else
939
+ simde__m128_private
940
+ r_,
941
+ a_ = simde__m128_to_private(a);
942
+
943
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
944
+ r_.neon_i32 = vmvnq_s32(a_.neon_i32);
945
+ #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
946
+ r_.altivec_i32 = vec_nor(a_.altivec_i32, a_.altivec_i32);
947
+ #elif defined(SIMDE_WASM_SIMD128_NATIVE)
948
+ r_.wasm_v128 = wasm_v128_not(a_.wasm_v128);
949
+ #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
950
+ r_.i32 = ~a_.i32;
951
+ #else
952
+ SIMDE_VECTORIZE
953
+ for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
954
+ r_.i32[i] = ~(a_.i32[i]);
955
+ }
956
+ #endif
957
+
958
+ return simde__m128_from_private(r_);
959
+ #endif
960
+ }
961
+
962
+ SIMDE_FUNCTION_ATTRIBUTES
963
+ simde__m128
964
+ simde_x_mm_select_ps(simde__m128 a, simde__m128 b, simde__m128 mask) {
965
+ /* This function is for when you want to blend two elements together
966
+ * according to a mask. It is similar to _mm_blendv_ps, except that
967
+ * it is undefined whether the blend is based on the highest bit in
968
+ * each lane (like blendv) or just bitwise operations. This allows
969
+ * us to implement the function efficiently everywhere.
970
+ *
971
+ * Basically, you promise that all the lanes in mask are either 0 or
972
+ * ~0. */
973
+ #if defined(SIMDE_X86_SSE4_1_NATIVE)
974
+ return _mm_blendv_ps(a, b, mask);
975
+ #else
976
+ simde__m128_private
977
+ r_,
978
+ a_ = simde__m128_to_private(a),
979
+ b_ = simde__m128_to_private(b),
980
+ mask_ = simde__m128_to_private(mask);
981
+
982
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
983
+ r_.neon_i32 = vbslq_s32(mask_.neon_u32, b_.neon_i32, a_.neon_i32);
984
+ #elif defined(SIMDE_WASM_SIMD128_NATIVE)
985
+ r_.wasm_v128 = wasm_v128_bitselect(b_.wasm_v128, a_.wasm_v128, mask_.wasm_v128);
986
+ #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
987
+ r_.altivec_i32 = vec_sel(a_.altivec_i32, b_.altivec_i32, mask_.altivec_u32);
988
+ #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
989
+ r_.i32 = a_.i32 ^ ((a_.i32 ^ b_.i32) & mask_.i32);
990
+ #else
991
+ SIMDE_VECTORIZE
992
+ for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
993
+ r_.i32[i] = a_.i32[i] ^ ((a_.i32[i] ^ b_.i32[i]) & mask_.i32[i]);
994
+ }
995
+ #endif
996
+
997
+ return simde__m128_from_private(r_);
998
+ #endif
999
+ }
1000
+
1001
+ SIMDE_FUNCTION_ATTRIBUTES
1002
+ simde__m64
1003
+ simde_mm_avg_pu16 (simde__m64 a, simde__m64 b) {
1004
+ #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
1005
+ return _mm_avg_pu16(a, b);
1006
+ #else
1007
+ simde__m64_private
1008
+ r_,
1009
+ a_ = simde__m64_to_private(a),
1010
+ b_ = simde__m64_to_private(b);
1011
+
1012
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
1013
+ r_.neon_u16 = vrhadd_u16(b_.neon_u16, a_.neon_u16);
1014
+ #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_)
1015
+ uint32_t wa SIMDE_VECTOR(16);
1016
+ uint32_t wb SIMDE_VECTOR(16);
1017
+ uint32_t wr SIMDE_VECTOR(16);
1018
+ SIMDE_CONVERT_VECTOR_(wa, a_.u16);
1019
+ SIMDE_CONVERT_VECTOR_(wb, b_.u16);
1020
+ wr = (wa + wb + 1) >> 1;
1021
+ SIMDE_CONVERT_VECTOR_(r_.u16, wr);
1022
+ #else
1023
+ SIMDE_VECTORIZE
1024
+ for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {
1025
+ r_.u16[i] = (a_.u16[i] + b_.u16[i] + 1) >> 1;
1026
+ }
1027
+ #endif
1028
+
1029
+ return simde__m64_from_private(r_);
1030
+ #endif
1031
+ }
1032
+ #define simde_m_pavgw(a, b) simde_mm_avg_pu16(a, b)
1033
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
1034
+ # define _mm_avg_pu16(a, b) simde_mm_avg_pu16(a, b)
1035
+ # define _m_pavgw(a, b) simde_mm_avg_pu16(a, b)
1036
+ #endif
1037
+
1038
+ SIMDE_FUNCTION_ATTRIBUTES
1039
+ simde__m64
1040
+ simde_mm_avg_pu8 (simde__m64 a, simde__m64 b) {
1041
+ #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
1042
+ return _mm_avg_pu8(a, b);
1043
+ #else
1044
+ simde__m64_private
1045
+ r_,
1046
+ a_ = simde__m64_to_private(a),
1047
+ b_ = simde__m64_to_private(b);
1048
+
1049
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
1050
+ r_.neon_u8 = vrhadd_u8(b_.neon_u8, a_.neon_u8);
1051
+ #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_)
1052
+ uint16_t wa SIMDE_VECTOR(16);
1053
+ uint16_t wb SIMDE_VECTOR(16);
1054
+ uint16_t wr SIMDE_VECTOR(16);
1055
+ SIMDE_CONVERT_VECTOR_(wa, a_.u8);
1056
+ SIMDE_CONVERT_VECTOR_(wb, b_.u8);
1057
+ wr = (wa + wb + 1) >> 1;
1058
+ SIMDE_CONVERT_VECTOR_(r_.u8, wr);
1059
+ #else
1060
+ SIMDE_VECTORIZE
1061
+ for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) {
1062
+ r_.u8[i] = (a_.u8[i] + b_.u8[i] + 1) >> 1;
1063
+ }
1064
+ #endif
1065
+
1066
+ return simde__m64_from_private(r_);
1067
+ #endif
1068
+ }
1069
+ #define simde_m_pavgb(a, b) simde_mm_avg_pu8(a, b)
1070
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
1071
+ # define _mm_avg_pu8(a, b) simde_mm_avg_pu8(a, b)
1072
+ # define _m_pavgb(a, b) simde_mm_avg_pu8(a, b)
1073
+ #endif
1074
+
1075
+ SIMDE_FUNCTION_ATTRIBUTES
1076
+ simde__m128
1077
+ simde_x_mm_abs_ps(simde__m128 a) {
1078
+ #if defined(SIMDE_X86_AVX512F_NATIVE) && \
1079
+ (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,1,0))
1080
+ return _mm512_castps512_ps128(_mm512_abs_ps(_mm512_castps128_ps512(a)));
1081
+ #else
1082
+ simde__m128_private
1083
+ r_,
1084
+ a_ = simde__m128_to_private(a);
1085
+
1086
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
1087
+ r_.neon_f32 = vabsq_f32(a_.neon_f32);
1088
+ #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
1089
+ r_.altivec_f32 = vec_abs(a_.altivec_f32);
1090
+ #elif defined(SIMDE_WASM_SIMD128_NATIVE)
1091
+ r_.wasm_v128 = wasm_f32x4_abs(a_.wasm_v128);
1092
+ #else
1093
+ SIMDE_VECTORIZE
1094
+ for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
1095
+ r_.f32[i] = simde_math_fabsf(a_.f32[i]);
1096
+ }
1097
+ #endif
1098
+
1099
+ return simde__m128_from_private(r_);
1100
+ #endif
1101
+ }
1102
+
1103
+ SIMDE_FUNCTION_ATTRIBUTES
1104
+ simde__m128
1105
+ simde_mm_cmpeq_ps (simde__m128 a, simde__m128 b) {
1106
+ #if defined(SIMDE_X86_SSE_NATIVE)
1107
+ return _mm_cmpeq_ps(a, b);
1108
+ #else
1109
+ simde__m128_private
1110
+ r_,
1111
+ a_ = simde__m128_to_private(a),
1112
+ b_ = simde__m128_to_private(b);
1113
+
1114
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
1115
+ r_.neon_u32 = vceqq_f32(a_.neon_f32, b_.neon_f32);
1116
+ #elif defined(SIMDE_WASM_SIMD128_NATIVE)
1117
+ r_.wasm_v128 = wasm_f32x4_eq(a_.wasm_v128, b_.wasm_v128);
1118
+ #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
1119
+ r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpeq(a_.altivec_f32, b_.altivec_f32));
1120
+ #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
1121
+ r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), a_.f32 == b_.f32);
1122
+ #else
1123
+ SIMDE_VECTORIZE
1124
+ for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
1125
+ r_.u32[i] = (a_.f32[i] == b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0);
1126
+ }
1127
+ #endif
1128
+
1129
+ return simde__m128_from_private(r_);
1130
+ #endif
1131
+ }
1132
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
1133
+ # define _mm_cmpeq_ps(a, b) simde_mm_cmpeq_ps((a), (b))
1134
+ #endif
1135
+
1136
+ SIMDE_FUNCTION_ATTRIBUTES
1137
+ simde__m128
1138
+ simde_mm_cmpeq_ss (simde__m128 a, simde__m128 b) {
1139
+ #if defined(SIMDE_X86_SSE_NATIVE)
1140
+ return _mm_cmpeq_ss(a, b);
1141
+ #elif (SIMDE_NATURAL_VECTOR_SIZE > 0)
1142
+ return simde_mm_move_ss(a, simde_mm_cmpeq_ps(a, b));
1143
+ #else
1144
+ simde__m128_private
1145
+ r_,
1146
+ a_ = simde__m128_to_private(a),
1147
+ b_ = simde__m128_to_private(b);
1148
+
1149
+ r_.u32[0] = (a_.f32[0] == b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0);
1150
+ SIMDE_VECTORIZE
1151
+ for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
1152
+ r_.u32[i] = a_.u32[i];
1153
+ }
1154
+
1155
+ return simde__m128_from_private(r_);
1156
+ #endif
1157
+ }
1158
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
1159
+ # define _mm_cmpeq_ss(a, b) simde_mm_cmpeq_ss((a), (b))
1160
+ #endif
1161
+
1162
+ SIMDE_FUNCTION_ATTRIBUTES
1163
+ simde__m128
1164
+ simde_mm_cmpge_ps (simde__m128 a, simde__m128 b) {
1165
+ #if defined(SIMDE_X86_SSE_NATIVE)
1166
+ return _mm_cmpge_ps(a, b);
1167
+ #else
1168
+ simde__m128_private
1169
+ r_,
1170
+ a_ = simde__m128_to_private(a),
1171
+ b_ = simde__m128_to_private(b);
1172
+
1173
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
1174
+ r_.neon_u32 = vcgeq_f32(a_.neon_f32, b_.neon_f32);
1175
+ #elif defined(SIMDE_WASM_SIMD128_NATIVE)
1176
+ r_.wasm_v128 = wasm_f32x4_ge(a_.wasm_v128, b_.wasm_v128);
1177
+ #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
1178
+ r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpge(a_.altivec_f32, b_.altivec_f32));
1179
+ #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
1180
+ r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 >= b_.f32));
1181
+ #else
1182
+ SIMDE_VECTORIZE
1183
+ for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
1184
+ r_.u32[i] = (a_.f32[i] >= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0);
1185
+ }
1186
+ #endif
1187
+
1188
+ return simde__m128_from_private(r_);
1189
+ #endif
1190
+ }
1191
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
1192
+ # define _mm_cmpge_ps(a, b) simde_mm_cmpge_ps((a), (b))
1193
+ #endif
1194
+
1195
+ SIMDE_FUNCTION_ATTRIBUTES
1196
+ simde__m128
1197
+ simde_mm_cmpge_ss (simde__m128 a, simde__m128 b) {
1198
+ #if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI)
1199
+ return _mm_cmpge_ss(a, b);
1200
+ #elif (SIMDE_NATURAL_VECTOR_SIZE > 0)
1201
+ return simde_mm_move_ss(a, simde_mm_cmpge_ps(a, b));
1202
+ #else
1203
+ simde__m128_private
1204
+ r_,
1205
+ a_ = simde__m128_to_private(a),
1206
+ b_ = simde__m128_to_private(b);
1207
+
1208
+ r_.u32[0] = (a_.f32[0] >= b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0);
1209
+ SIMDE_VECTORIZE
1210
+ for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
1211
+ r_.u32[i] = a_.u32[i];
1212
+ }
1213
+
1214
+ return simde__m128_from_private(r_);
1215
+ #endif
1216
+ }
1217
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
1218
+ # define _mm_cmpge_ss(a, b) simde_mm_cmpge_ss((a), (b))
1219
+ #endif
1220
+
1221
+ SIMDE_FUNCTION_ATTRIBUTES
1222
+ simde__m128
1223
+ simde_mm_cmpgt_ps (simde__m128 a, simde__m128 b) {
1224
+ #if defined(SIMDE_X86_SSE_NATIVE)
1225
+ return _mm_cmpgt_ps(a, b);
1226
+ #else
1227
+ simde__m128_private
1228
+ r_,
1229
+ a_ = simde__m128_to_private(a),
1230
+ b_ = simde__m128_to_private(b);
1231
+
1232
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
1233
+ r_.neon_u32 = vcgtq_f32(a_.neon_f32, b_.neon_f32);
1234
+ #elif defined(SIMDE_WASM_SIMD128_NATIVE)
1235
+ r_.wasm_v128 = wasm_f32x4_gt(a_.wasm_v128, b_.wasm_v128);
1236
+ #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
1237
+ r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpgt(a_.altivec_f32, b_.altivec_f32));
1238
+ #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
1239
+ r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 > b_.f32));
1240
+ #else
1241
+ SIMDE_VECTORIZE
1242
+ for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
1243
+ r_.u32[i] = (a_.f32[i] > b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0);
1244
+ }
1245
+ #endif
1246
+
1247
+ return simde__m128_from_private(r_);
1248
+ #endif
1249
+ }
1250
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
1251
+ # define _mm_cmpgt_ps(a, b) simde_mm_cmpgt_ps((a), (b))
1252
+ #endif
1253
+
1254
+ SIMDE_FUNCTION_ATTRIBUTES
1255
+ simde__m128
1256
+ simde_mm_cmpgt_ss (simde__m128 a, simde__m128 b) {
1257
+ #if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI)
1258
+ return _mm_cmpgt_ss(a, b);
1259
+ #elif (SIMDE_NATURAL_VECTOR_SIZE > 0)
1260
+ return simde_mm_move_ss(a, simde_mm_cmpgt_ps(a, b));
1261
+ #else
1262
+ simde__m128_private
1263
+ r_,
1264
+ a_ = simde__m128_to_private(a),
1265
+ b_ = simde__m128_to_private(b);
1266
+
1267
+ r_.u32[0] = (a_.f32[0] > b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0);
1268
+ SIMDE_VECTORIZE
1269
+ for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
1270
+ r_.u32[i] = a_.u32[i];
1271
+ }
1272
+
1273
+ return simde__m128_from_private(r_);
1274
+ #endif
1275
+ }
1276
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
1277
+ # define _mm_cmpgt_ss(a, b) simde_mm_cmpgt_ss((a), (b))
1278
+ #endif
1279
+
1280
+ SIMDE_FUNCTION_ATTRIBUTES
1281
+ simde__m128
1282
+ simde_mm_cmple_ps (simde__m128 a, simde__m128 b) {
1283
+ #if defined(SIMDE_X86_SSE_NATIVE)
1284
+ return _mm_cmple_ps(a, b);
1285
+ #else
1286
+ simde__m128_private
1287
+ r_,
1288
+ a_ = simde__m128_to_private(a),
1289
+ b_ = simde__m128_to_private(b);
1290
+
1291
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
1292
+ r_.neon_u32 = vcleq_f32(a_.neon_f32, b_.neon_f32);
1293
+ #elif defined(SIMDE_WASM_SIMD128_NATIVE)
1294
+ r_.wasm_v128 = wasm_f32x4_le(a_.wasm_v128, b_.wasm_v128);
1295
+ #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
1296
+ r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmple(a_.altivec_f32, b_.altivec_f32));
1297
+ #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
1298
+ r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 <= b_.f32));
1299
+ #else
1300
+ SIMDE_VECTORIZE
1301
+ for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
1302
+ r_.u32[i] = (a_.f32[i] <= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0);
1303
+ }
1304
+ #endif
1305
+
1306
+ return simde__m128_from_private(r_);
1307
+ #endif
1308
+ }
1309
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
1310
+ # define _mm_cmple_ps(a, b) simde_mm_cmple_ps((a), (b))
1311
+ #endif
1312
+
1313
+ SIMDE_FUNCTION_ATTRIBUTES
1314
+ simde__m128
1315
+ simde_mm_cmple_ss (simde__m128 a, simde__m128 b) {
1316
+ #if defined(SIMDE_X86_SSE_NATIVE)
1317
+ return _mm_cmple_ss(a, b);
1318
+ #elif (SIMDE_NATURAL_VECTOR_SIZE > 0)
1319
+ return simde_mm_move_ss(a, simde_mm_cmple_ps(a, b));
1320
+ #else
1321
+ simde__m128_private
1322
+ r_,
1323
+ a_ = simde__m128_to_private(a),
1324
+ b_ = simde__m128_to_private(b);
1325
+
1326
+ r_.u32[0] = (a_.f32[0] <= b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0);
1327
+ SIMDE_VECTORIZE
1328
+ for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
1329
+ r_.u32[i] = a_.u32[i];
1330
+ }
1331
+
1332
+ return simde__m128_from_private(r_);
1333
+ #endif
1334
+ }
1335
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
1336
+ # define _mm_cmple_ss(a, b) simde_mm_cmple_ss((a), (b))
1337
+ #endif
1338
+
1339
+ SIMDE_FUNCTION_ATTRIBUTES
1340
+ simde__m128
1341
+ simde_mm_cmplt_ps (simde__m128 a, simde__m128 b) {
1342
+ #if defined(SIMDE_X86_SSE_NATIVE)
1343
+ return _mm_cmplt_ps(a, b);
1344
+ #else
1345
+ simde__m128_private
1346
+ r_,
1347
+ a_ = simde__m128_to_private(a),
1348
+ b_ = simde__m128_to_private(b);
1349
+
1350
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
1351
+ r_.neon_u32 = vcltq_f32(a_.neon_f32, b_.neon_f32);
1352
+ #elif defined(SIMDE_WASM_SIMD128_NATIVE)
1353
+ r_.wasm_v128 = wasm_f32x4_lt(a_.wasm_v128, b_.wasm_v128);
1354
+ #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
1355
+ r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmplt(a_.altivec_f32, b_.altivec_f32));
1356
+ #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
1357
+ r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 < b_.f32));
1358
+ #else
1359
+ SIMDE_VECTORIZE
1360
+ for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
1361
+ r_.u32[i] = (a_.f32[i] < b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0);
1362
+ }
1363
+ #endif
1364
+
1365
+ return simde__m128_from_private(r_);
1366
+ #endif
1367
+ }
1368
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
1369
+ # define _mm_cmplt_ps(a, b) simde_mm_cmplt_ps((a), (b))
1370
+ #endif
1371
+
1372
+ SIMDE_FUNCTION_ATTRIBUTES
1373
+ simde__m128
1374
+ simde_mm_cmplt_ss (simde__m128 a, simde__m128 b) {
1375
+ #if defined(SIMDE_X86_SSE_NATIVE)
1376
+ return _mm_cmplt_ss(a, b);
1377
+ #elif (SIMDE_NATURAL_VECTOR_SIZE > 0)
1378
+ return simde_mm_move_ss(a, simde_mm_cmplt_ps(a, b));
1379
+ #else
1380
+ simde__m128_private
1381
+ r_,
1382
+ a_ = simde__m128_to_private(a),
1383
+ b_ = simde__m128_to_private(b);
1384
+
1385
+ r_.u32[0] = (a_.f32[0] < b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0);
1386
+ SIMDE_VECTORIZE
1387
+ for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
1388
+ r_.u32[i] = a_.u32[i];
1389
+ }
1390
+
1391
+ return simde__m128_from_private(r_);
1392
+ #endif
1393
+ }
1394
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
1395
+ # define _mm_cmplt_ss(a, b) simde_mm_cmplt_ss((a), (b))
1396
+ #endif
1397
+
1398
+ SIMDE_FUNCTION_ATTRIBUTES
1399
+ simde__m128
1400
+ simde_mm_cmpneq_ps (simde__m128 a, simde__m128 b) {
1401
+ #if defined(SIMDE_X86_SSE_NATIVE)
1402
+ return _mm_cmpneq_ps(a, b);
1403
+ #else
1404
+ simde__m128_private
1405
+ r_,
1406
+ a_ = simde__m128_to_private(a),
1407
+ b_ = simde__m128_to_private(b);
1408
+
1409
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
1410
+ r_.neon_u32 = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32));
1411
+ #elif defined(SIMDE_WASM_SIMD128_NATIVE)
1412
+ r_.wasm_v128 = wasm_f32x4_ne(a_.wasm_v128, b_.wasm_v128);
1413
+ #elif defined(SIMDE_POWER_ALTIVEC_P9_NATIVE) && SIMDE_ARCH_POWER_CHECK(900) && !defined(HEDLEY_IBM_VERSION)
1414
+ /* vec_cmpne(SIMDE_POWER_ALTIVEC_VECTOR(float), SIMDE_POWER_ALTIVEC_VECTOR(float))
1415
+ is missing from XL C/C++ v16.1.1,
1416
+ though the documentation (table 89 on page 432 of the IBM XL C/C++ for
1417
+ Linux Compiler Reference, Version 16.1.1) shows that it should be
1418
+ present. Both GCC and clang support it. */
1419
+ r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpne(a_.altivec_f32, b_.altivec_f32));
1420
+ #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
1421
+ r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpeq(a_.altivec_f32, b_.altivec_f32));
1422
+ r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_nor(r_.altivec_f32, r_.altivec_f32));
1423
+ #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
1424
+ r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 != b_.f32));
1425
+ #else
1426
+ SIMDE_VECTORIZE
1427
+ for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
1428
+ r_.u32[i] = (a_.f32[i] != b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0);
1429
+ }
1430
+ #endif
1431
+
1432
+ return simde__m128_from_private(r_);
1433
+ #endif
1434
+ }
1435
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
1436
+ # define _mm_cmpneq_ps(a, b) simde_mm_cmpneq_ps((a), (b))
1437
+ #endif
1438
+
1439
+ SIMDE_FUNCTION_ATTRIBUTES
1440
+ simde__m128
1441
+ simde_mm_cmpneq_ss (simde__m128 a, simde__m128 b) {
1442
+ #if defined(SIMDE_X86_SSE_NATIVE)
1443
+ return _mm_cmpneq_ss(a, b);
1444
+ #elif (SIMDE_NATURAL_VECTOR_SIZE > 0)
1445
+ return simde_mm_move_ss(a, simde_mm_cmpneq_ps(a, b));
1446
+ #else
1447
+ simde__m128_private
1448
+ r_,
1449
+ a_ = simde__m128_to_private(a),
1450
+ b_ = simde__m128_to_private(b);
1451
+
1452
+ r_.u32[0] = (a_.f32[0] != b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0);
1453
+ SIMDE_VECTORIZE
1454
+ for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
1455
+ r_.u32[i] = a_.u32[i];
1456
+ }
1457
+
1458
+ return simde__m128_from_private(r_);
1459
+ #endif
1460
+ }
1461
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
1462
+ # define _mm_cmpneq_ss(a, b) simde_mm_cmpneq_ss((a), (b))
1463
+ #endif
1464
+
1465
+ SIMDE_FUNCTION_ATTRIBUTES
1466
+ simde__m128
1467
+ simde_mm_cmpnge_ps (simde__m128 a, simde__m128 b) {
1468
+ return simde_mm_cmplt_ps(a, b);
1469
+ }
1470
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
1471
+ # define _mm_cmpnge_ps(a, b) simde_mm_cmpnge_ps((a), (b))
1472
+ #endif
1473
+
1474
+ SIMDE_FUNCTION_ATTRIBUTES
1475
+ simde__m128
1476
+ simde_mm_cmpnge_ss (simde__m128 a, simde__m128 b) {
1477
+ return simde_mm_cmplt_ss(a, b);
1478
+ }
1479
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
1480
+ # define _mm_cmpnge_ss(a, b) simde_mm_cmpnge_ss((a), (b))
1481
+ #endif
1482
+
1483
+ SIMDE_FUNCTION_ATTRIBUTES
1484
+ simde__m128
1485
+ simde_mm_cmpngt_ps (simde__m128 a, simde__m128 b) {
1486
+ return simde_mm_cmple_ps(a, b);
1487
+ }
1488
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
1489
+ # define _mm_cmpngt_ps(a, b) simde_mm_cmpngt_ps((a), (b))
1490
+ #endif
1491
+
1492
+ SIMDE_FUNCTION_ATTRIBUTES
1493
+ simde__m128
1494
+ simde_mm_cmpngt_ss (simde__m128 a, simde__m128 b) {
1495
+ return simde_mm_cmple_ss(a, b);
1496
+ }
1497
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
1498
+ # define _mm_cmpngt_ss(a, b) simde_mm_cmpngt_ss((a), (b))
1499
+ #endif
1500
+
1501
+ SIMDE_FUNCTION_ATTRIBUTES
1502
+ simde__m128
1503
+ simde_mm_cmpnle_ps (simde__m128 a, simde__m128 b) {
1504
+ return simde_mm_cmpgt_ps(a, b);
1505
+ }
1506
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
1507
+ # define _mm_cmpnle_ps(a, b) simde_mm_cmpnle_ps((a), (b))
1508
+ #endif
1509
+
1510
+ SIMDE_FUNCTION_ATTRIBUTES
1511
+ simde__m128
1512
+ simde_mm_cmpnle_ss (simde__m128 a, simde__m128 b) {
1513
+ return simde_mm_cmpgt_ss(a, b);
1514
+ }
1515
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
1516
+ # define _mm_cmpnle_ss(a, b) simde_mm_cmpnle_ss((a), (b))
1517
+ #endif
1518
+
1519
+ SIMDE_FUNCTION_ATTRIBUTES
1520
+ simde__m128
1521
+ simde_mm_cmpnlt_ps (simde__m128 a, simde__m128 b) {
1522
+ return simde_mm_cmpge_ps(a, b);
1523
+ }
1524
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
1525
+ # define _mm_cmpnlt_ps(a, b) simde_mm_cmpnlt_ps((a), (b))
1526
+ #endif
1527
+
1528
+ SIMDE_FUNCTION_ATTRIBUTES
1529
+ simde__m128
1530
+ simde_mm_cmpnlt_ss (simde__m128 a, simde__m128 b) {
1531
+ return simde_mm_cmpge_ss(a, b);
1532
+ }
1533
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
1534
+ # define _mm_cmpnlt_ss(a, b) simde_mm_cmpnlt_ss((a), (b))
1535
+ #endif
1536
+
1537
+ SIMDE_FUNCTION_ATTRIBUTES
1538
+ simde__m128
1539
+ simde_mm_cmpord_ps (simde__m128 a, simde__m128 b) {
1540
+ #if defined(SIMDE_X86_SSE_NATIVE)
1541
+ return _mm_cmpord_ps(a, b);
1542
+ #elif defined(SIMDE_WASM_SIMD128_NATIVE)
1543
+ return wasm_v128_and(wasm_f32x4_eq(a, a), wasm_f32x4_eq(b, b));
1544
+ #else
1545
+ simde__m128_private
1546
+ r_,
1547
+ a_ = simde__m128_to_private(a),
1548
+ b_ = simde__m128_to_private(b);
1549
+
1550
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
1551
+ /* Note: NEON does not have ordered compare builtin
1552
+ Need to compare a eq a and b eq b to check for NaN
1553
+ Do AND of results to get final */
1554
+ uint32x4_t ceqaa = vceqq_f32(a_.neon_f32, a_.neon_f32);
1555
+ uint32x4_t ceqbb = vceqq_f32(b_.neon_f32, b_.neon_f32);
1556
+ r_.neon_u32 = vandq_u32(ceqaa, ceqbb);
1557
+ #elif defined(SIMDE_WASM_SIMD128_NATIVE)
1558
+ r_.wasm_v128 = wasm_v128_and(wasm_f32x4_eq(a_.wasm_v128, a_.wasm_v128), wasm_f32x4_eq(b_.wasm_v128, b_.wasm_v128));
1559
+ #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
1560
+ r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float),
1561
+ vec_and(vec_cmpeq(a_.altivec_f32, a_.altivec_f32), vec_cmpeq(b_.altivec_f32, b_.altivec_f32)));
1562
+ #elif defined(simde_math_isnanf)
1563
+ SIMDE_VECTORIZE
1564
+ for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
1565
+ r_.u32[i] = (simde_math_isnanf(a_.f32[i]) || simde_math_isnanf(b_.f32[i])) ? UINT32_C(0) : ~UINT32_C(0);
1566
+ }
1567
+ #else
1568
+ HEDLEY_UNREACHABLE();
1569
+ #endif
1570
+
1571
+ return simde__m128_from_private(r_);
1572
+ #endif
1573
+ }
1574
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
1575
+ # define _mm_cmpord_ps(a, b) simde_mm_cmpord_ps((a), (b))
1576
+ #endif
1577
+
1578
+ SIMDE_FUNCTION_ATTRIBUTES
1579
+ simde__m128
1580
+ simde_mm_cmpunord_ps (simde__m128 a, simde__m128 b) {
1581
+ #if defined(SIMDE_X86_SSE_NATIVE)
1582
+ return _mm_cmpunord_ps(a, b);
1583
+ #elif defined(SIMDE_WASM_SIMD128_NATIVE)
1584
+ return wasm_v128_or(wasm_f32x4_ne(a, a), wasm_f32x4_ne(b, b));
1585
+ #else
1586
+ simde__m128_private
1587
+ r_,
1588
+ a_ = simde__m128_to_private(a),
1589
+ b_ = simde__m128_to_private(b);
1590
+
1591
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
1592
+ uint32x4_t ceqaa = vceqq_f32(a_.neon_f32, a_.neon_f32);
1593
+ uint32x4_t ceqbb = vceqq_f32(b_.neon_f32, b_.neon_f32);
1594
+ r_.neon_u32 = vmvnq_u32(vandq_u32(ceqaa, ceqbb));
1595
+ #elif defined(SIMDE_WASM_SIMD128_NATIVE)
1596
+ r_.wasm_v128 = wasm_v128_or(wasm_f32x4_ne(a_.wasm_v128, a_.wasm_v128), wasm_f32x4_ne(b_.wasm_v128, b_.wasm_v128));
1597
+ #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
1598
+ r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float),
1599
+ vec_nand(vec_cmpeq(a_.altivec_f32, a_.altivec_f32), vec_cmpeq(b_.altivec_f32, b_.altivec_f32)));
1600
+ #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
1601
+ r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float),
1602
+ vec_and(vec_cmpeq(a_.altivec_f32, a_.altivec_f32), vec_cmpeq(b_.altivec_f32, b_.altivec_f32)));
1603
+ r_.altivec_f32 = vec_nor(r_.altivec_f32, r_.altivec_f32);
1604
+ #elif defined(simde_math_isnanf)
1605
+ SIMDE_VECTORIZE
1606
+ for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
1607
+ r_.u32[i] = (simde_math_isnanf(a_.f32[i]) || simde_math_isnanf(b_.f32[i])) ? ~UINT32_C(0) : UINT32_C(0);
1608
+ }
1609
+ #else
1610
+ HEDLEY_UNREACHABLE();
1611
+ #endif
1612
+
1613
+ return simde__m128_from_private(r_);
1614
+ #endif
1615
+ }
1616
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
1617
+ # define _mm_cmpunord_ps(a, b) simde_mm_cmpunord_ps((a), (b))
1618
+ #endif
1619
+
1620
+ SIMDE_FUNCTION_ATTRIBUTES
1621
+ simde__m128
1622
+ simde_mm_cmpunord_ss (simde__m128 a, simde__m128 b) {
1623
+ #if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI)
1624
+ return _mm_cmpunord_ss(a, b);
1625
+ #elif (SIMDE_NATURAL_VECTOR_SIZE > 0)
1626
+ return simde_mm_move_ss(a, simde_mm_cmpunord_ps(a, b));
1627
+ #else
1628
+ simde__m128_private
1629
+ r_,
1630
+ a_ = simde__m128_to_private(a),
1631
+ b_ = simde__m128_to_private(b);
1632
+
1633
+ #if defined(simde_math_isnanf)
1634
+ r_.u32[0] = (simde_math_isnanf(a_.f32[0]) || simde_math_isnanf(b_.f32[0])) ? ~UINT32_C(0) : UINT32_C(0);
1635
+ SIMDE_VECTORIZE
1636
+ for (size_t i = 1 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) {
1637
+ r_.u32[i] = a_.u32[i];
1638
+ }
1639
+ #else
1640
+ HEDLEY_UNREACHABLE();
1641
+ #endif
1642
+
1643
+ return simde__m128_from_private(r_);
1644
+ #endif
1645
+ }
1646
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
1647
+ # define _mm_cmpunord_ss(a, b) simde_mm_cmpunord_ss((a), (b))
1648
+ #endif
1649
+
1650
+ SIMDE_FUNCTION_ATTRIBUTES
1651
+ int
1652
+ simde_mm_comieq_ss (simde__m128 a, simde__m128 b) {
1653
+ #if defined(SIMDE_X86_SSE_NATIVE)
1654
+ return _mm_comieq_ss(a, b);
1655
+ #else
1656
+ simde__m128_private
1657
+ a_ = simde__m128_to_private(a),
1658
+ b_ = simde__m128_to_private(b);
1659
+
1660
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
1661
+ uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32);
1662
+ uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32);
1663
+ uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan));
1664
+ uint32x4_t a_eq_b = vceqq_f32(a_.neon_f32, b_.neon_f32);
1665
+ return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_eq_b), 0) != 0);
1666
+ #else
1667
+ return a_.f32[0] == b_.f32[0];
1668
+ #endif
1669
+ #endif
1670
+ }
1671
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
1672
+ # define _mm_comieq_ss(a, b) simde_mm_comieq_ss((a), (b))
1673
+ #endif
1674
+
1675
+ SIMDE_FUNCTION_ATTRIBUTES
1676
+ int
1677
+ simde_mm_comige_ss (simde__m128 a, simde__m128 b) {
1678
+ #if defined(SIMDE_X86_SSE_NATIVE)
1679
+ return _mm_comige_ss(a, b);
1680
+ #else
1681
+ simde__m128_private
1682
+ a_ = simde__m128_to_private(a),
1683
+ b_ = simde__m128_to_private(b);
1684
+
1685
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
1686
+ uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32);
1687
+ uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32);
1688
+ uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan);
1689
+ uint32x4_t a_ge_b = vcgeq_f32(a_.neon_f32, b_.neon_f32);
1690
+ return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_ge_b), 0) != 0);
1691
+ #else
1692
+ return a_.f32[0] >= b_.f32[0];
1693
+ #endif
1694
+ #endif
1695
+ }
1696
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
1697
+ # define _mm_comige_ss(a, b) simde_mm_comige_ss((a), (b))
1698
+ #endif
1699
+
1700
+ SIMDE_FUNCTION_ATTRIBUTES
1701
+ int
1702
+ simde_mm_comigt_ss (simde__m128 a, simde__m128 b) {
1703
+ #if defined(SIMDE_X86_SSE_NATIVE)
1704
+ return _mm_comigt_ss(a, b);
1705
+ #else
1706
+ simde__m128_private
1707
+ a_ = simde__m128_to_private(a),
1708
+ b_ = simde__m128_to_private(b);
1709
+
1710
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
1711
+ uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32);
1712
+ uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32);
1713
+ uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan);
1714
+ uint32x4_t a_gt_b = vcgtq_f32(a_.neon_f32, b_.neon_f32);
1715
+ return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_gt_b), 0) != 0);
1716
+ #else
1717
+ return a_.f32[0] > b_.f32[0];
1718
+ #endif
1719
+ #endif
1720
+ }
1721
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
1722
+ # define _mm_comigt_ss(a, b) simde_mm_comigt_ss((a), (b))
1723
+ #endif
1724
+
1725
+ SIMDE_FUNCTION_ATTRIBUTES
1726
+ int
1727
+ simde_mm_comile_ss (simde__m128 a, simde__m128 b) {
1728
+ #if defined(SIMDE_X86_SSE_NATIVE)
1729
+ return _mm_comile_ss(a, b);
1730
+ #else
1731
+ simde__m128_private
1732
+ a_ = simde__m128_to_private(a),
1733
+ b_ = simde__m128_to_private(b);
1734
+
1735
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
1736
+ uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32);
1737
+ uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32);
1738
+ uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan));
1739
+ uint32x4_t a_le_b = vcleq_f32(a_.neon_f32, b_.neon_f32);
1740
+ return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_le_b), 0) != 0);
1741
+ #else
1742
+ return a_.f32[0] <= b_.f32[0];
1743
+ #endif
1744
+ #endif
1745
+ }
1746
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
1747
+ # define _mm_comile_ss(a, b) simde_mm_comile_ss((a), (b))
1748
+ #endif
1749
+
1750
+ SIMDE_FUNCTION_ATTRIBUTES
1751
+ int
1752
+ simde_mm_comilt_ss (simde__m128 a, simde__m128 b) {
1753
+ #if defined(SIMDE_X86_SSE_NATIVE)
1754
+ return _mm_comilt_ss(a, b);
1755
+ #else
1756
+ simde__m128_private
1757
+ a_ = simde__m128_to_private(a),
1758
+ b_ = simde__m128_to_private(b);
1759
+
1760
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
1761
+ uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32);
1762
+ uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32);
1763
+ uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan));
1764
+ uint32x4_t a_lt_b = vcltq_f32(a_.neon_f32, b_.neon_f32);
1765
+ return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_lt_b), 0) != 0);
1766
+ #else
1767
+ return a_.f32[0] < b_.f32[0];
1768
+ #endif
1769
+ #endif
1770
+ }
1771
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
1772
+ # define _mm_comilt_ss(a, b) simde_mm_comilt_ss((a), (b))
1773
+ #endif
1774
+
1775
+ SIMDE_FUNCTION_ATTRIBUTES
1776
+ int
1777
+ simde_mm_comineq_ss (simde__m128 a, simde__m128 b) {
1778
+ #if defined(SIMDE_X86_SSE_NATIVE)
1779
+ return _mm_comineq_ss(a, b);
1780
+ #else
1781
+ simde__m128_private
1782
+ a_ = simde__m128_to_private(a),
1783
+ b_ = simde__m128_to_private(b);
1784
+
1785
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
1786
+ uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32);
1787
+ uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32);
1788
+ uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan);
1789
+ uint32x4_t a_neq_b = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32));
1790
+ return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_neq_b), 0) != 0);
1791
+ #else
1792
+ return a_.f32[0] != b_.f32[0];
1793
+ #endif
1794
+ #endif
1795
+ }
1796
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
1797
+ # define _mm_comineq_ss(a, b) simde_mm_comineq_ss((a), (b))
1798
+ #endif
1799
+
1800
+ SIMDE_FUNCTION_ATTRIBUTES
1801
+ simde__m128
1802
+ simde_x_mm_copysign_ps(simde__m128 dest, simde__m128 src) {
1803
+ simde__m128_private
1804
+ r_,
1805
+ dest_ = simde__m128_to_private(dest),
1806
+ src_ = simde__m128_to_private(src);
1807
+
1808
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
1809
+ const uint32x4_t sign_pos = vreinterpretq_u32_f32(vdupq_n_f32(-SIMDE_FLOAT32_C(0.0)));
1810
+ r_.neon_u32 = vbslq_u32(sign_pos, src_.neon_u32, dest_.neon_u32);
1811
+ #elif defined(SIMDE_WASM_SIMD128_NATIVE)
1812
+ const v128_t sign_pos = wasm_f32x4_splat(-0.0f);
1813
+ r_.wasm_v128 = wasm_v128_bitselect(src_.wasm_v128, dest_.wasm_v128, sign_pos);
1814
+ #elif defined(SIMDE_POWER_ALTIVEC_P9_NATIVE)
1815
+ #if !defined(HEDLEY_IBM_VERSION)
1816
+ r_.altivec_f32 = vec_cpsgn(dest_.altivec_f32, src_.altivec_f32);
1817
+ #else
1818
+ r_.altivec_f32 = vec_cpsgn(src_.altivec_f32, dest_.altivec_f32);
1819
+ #endif
1820
+ #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
1821
+ const SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) sign_pos = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_splats(-0.0f));
1822
+ r_.altivec_f32 = vec_sel(dest_.altivec_f32, src_.altivec_f32, sign_pos);
1823
+ #elif defined(SIMDE_IEEE754_STORAGE)
1824
+ (void) src_;
1825
+ (void) dest_;
1826
+ simde__m128 sign_pos = simde_mm_set1_ps(-0.0f);
1827
+ r_ = simde__m128_to_private(simde_mm_xor_ps(dest, simde_mm_and_ps(simde_mm_xor_ps(dest, src), sign_pos)));
1828
+ #else
1829
+ SIMDE_VECTORIZE
1830
+ for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
1831
+ r_.f32[i] = simde_math_copysignf(dest_.f32[i], src_.f32[i]);
1832
+ }
1833
+ #endif
1834
+
1835
+ return simde__m128_from_private(r_);
1836
+ }
1837
+
1838
+ SIMDE_FUNCTION_ATTRIBUTES
1839
+ simde__m128
1840
+ simde_x_mm_xorsign_ps(simde__m128 dest, simde__m128 src) {
1841
+ return simde_mm_xor_ps(simde_mm_and_ps(simde_mm_set1_ps(-0.0f), src), dest);
1842
+ }
1843
+
1844
+ SIMDE_FUNCTION_ATTRIBUTES
1845
+ simde__m128
1846
+ simde_mm_cvt_pi2ps (simde__m128 a, simde__m64 b) {
1847
+ #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
1848
+ return _mm_cvt_pi2ps(a, b);
1849
+ #else
1850
+ simde__m128_private
1851
+ r_,
1852
+ a_ = simde__m128_to_private(a);
1853
+ simde__m64_private b_ = simde__m64_to_private(b);
1854
+
1855
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
1856
+ r_.neon_f32 = vcombine_f32(vcvt_f32_s32(b_.neon_i32), vget_high_f32(a_.neon_f32));
1857
+ #elif defined(SIMDE_CONVERT_VECTOR_)
1858
+ SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, b_.i32);
1859
+ r_.m64_private[1] = a_.m64_private[1];
1860
+ #else
1861
+ r_.f32[0] = (simde_float32) b_.i32[0];
1862
+ r_.f32[1] = (simde_float32) b_.i32[1];
1863
+ r_.i32[2] = a_.i32[2];
1864
+ r_.i32[3] = a_.i32[3];
1865
+ #endif
1866
+
1867
+ return simde__m128_from_private(r_);
1868
+ #endif
1869
+ }
1870
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
1871
+ # define _mm_cvt_pi2ps(a, b) simde_mm_cvt_pi2ps((a), (b))
1872
+ #endif
1873
+
1874
+ SIMDE_FUNCTION_ATTRIBUTES
1875
+ simde__m64
1876
+ simde_mm_cvt_ps2pi (simde__m128 a) {
1877
+ #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
1878
+ return _mm_cvt_ps2pi(a);
1879
+ #else
1880
+ simde__m64_private r_;
1881
+ simde__m128_private a_;
1882
+
1883
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
1884
+ a_ = simde__m128_to_private(simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION));
1885
+ r_.neon_i32 = vcvt_s32_f32(vget_low_f32(a_.neon_f32));
1886
+ #elif defined(SIMDE_CONVERT_VECTOR_) && SIMDE_NATURAL_VECTOR_SIZE_GE(128)
1887
+ a_ = simde__m128_to_private(simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION));
1888
+ SIMDE_CONVERT_VECTOR_(r_.i32, a_.m64_private[0].f32);
1889
+ #else
1890
+ a_ = simde__m128_to_private(a);
1891
+
1892
+ SIMDE_VECTORIZE
1893
+ for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
1894
+ r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, simde_math_nearbyintf(a_.f32[i]));
1895
+ }
1896
+ #endif
1897
+
1898
+ return simde__m64_from_private(r_);
1899
+ #endif
1900
+ }
1901
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
1902
+ # define _mm_cvt_ps2pi(a) simde_mm_cvt_ps2pi((a))
1903
+ #endif
1904
+
1905
+ SIMDE_FUNCTION_ATTRIBUTES
1906
+ simde__m128
1907
+ simde_mm_cvt_si2ss (simde__m128 a, int32_t b) {
1908
+ #if defined(SIMDE_X86_SSE_NATIVE)
1909
+ return _mm_cvt_si2ss(a, b);
1910
+ #else
1911
+ simde__m128_private
1912
+ r_,
1913
+ a_ = simde__m128_to_private(a);
1914
+
1915
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
1916
+ r_.neon_f32 = vsetq_lane_f32(HEDLEY_STATIC_CAST(float, b), a_.neon_f32, 0);
1917
+ #else
1918
+ r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b);
1919
+ r_.i32[1] = a_.i32[1];
1920
+ r_.i32[2] = a_.i32[2];
1921
+ r_.i32[3] = a_.i32[3];
1922
+ #endif
1923
+
1924
+ return simde__m128_from_private(r_);
1925
+ #endif
1926
+ }
1927
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
1928
+ # define _mm_cvt_si2ss(a, b) simde_mm_cvt_si2ss((a), b)
1929
+ #endif
1930
+
1931
+ SIMDE_FUNCTION_ATTRIBUTES
1932
+ int32_t
1933
+ simde_mm_cvt_ss2si (simde__m128 a) {
1934
+ #if defined(SIMDE_X86_SSE_NATIVE)
1935
+ return _mm_cvt_ss2si(a);
1936
+ #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_BUG_GCC_95399)
1937
+ return vgetq_lane_s32(vcvtnq_s32_f32(simde__m128_to_neon_f32(a)), 0);
1938
+ #else
1939
+ simde__m128_private a_ = simde__m128_to_private(simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION));
1940
+ #if !defined(SIMDE_FAST_CONVERSION_RANGE)
1941
+ return ((a_.f32[0] > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) &&
1942
+ (a_.f32[0] < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ?
1943
+ SIMDE_CONVERT_FTOI(int32_t, a_.f32[0]) : INT32_MIN;
1944
+ #else
1945
+ return SIMDE_CONVERT_FTOI(int32_t, a_.f32[0]);
1946
+ #endif
1947
+ #endif
1948
+ }
1949
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
1950
+ # define _mm_cvt_ss2si(a) simde_mm_cvt_ss2si((a))
1951
+ #endif
1952
+
1953
+ SIMDE_FUNCTION_ATTRIBUTES
1954
+ simde__m128
1955
+ simde_mm_cvtpi16_ps (simde__m64 a) {
1956
+ #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
1957
+ return _mm_cvtpi16_ps(a);
1958
+ #else
1959
+ simde__m128_private r_;
1960
+ simde__m64_private a_ = simde__m64_to_private(a);
1961
+
1962
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
1963
+ r_.neon_f32 = vcvtq_f32_s32(vmovl_s16(a_.neon_i16));
1964
+ #elif defined(SIMDE_CONVERT_VECTOR_)
1965
+ SIMDE_CONVERT_VECTOR_(r_.f32, a_.i16);
1966
+ #else
1967
+ SIMDE_VECTORIZE
1968
+ for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
1969
+ simde_float32 v = a_.i16[i];
1970
+ r_.f32[i] = v;
1971
+ }
1972
+ #endif
1973
+
1974
+ return simde__m128_from_private(r_);
1975
+ #endif
1976
+ }
1977
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
1978
+ # define _mm_cvtpi16_ps(a) simde_mm_cvtpi16_ps(a)
1979
+ #endif
1980
+
1981
+ SIMDE_FUNCTION_ATTRIBUTES
1982
+ simde__m128
1983
+ simde_mm_cvtpi32_ps (simde__m128 a, simde__m64 b) {
1984
+ #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
1985
+ return _mm_cvtpi32_ps(a, b);
1986
+ #else
1987
+ simde__m128_private
1988
+ r_,
1989
+ a_ = simde__m128_to_private(a);
1990
+ simde__m64_private b_ = simde__m64_to_private(b);
1991
+
1992
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
1993
+ r_.neon_f32 = vcombine_f32(vcvt_f32_s32(b_.neon_i32), vget_high_f32(a_.neon_f32));
1994
+ #elif defined(SIMDE_CONVERT_VECTOR_)
1995
+ SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, b_.i32);
1996
+ r_.m64_private[1] = a_.m64_private[1];
1997
+ #else
1998
+ r_.f32[0] = (simde_float32) b_.i32[0];
1999
+ r_.f32[1] = (simde_float32) b_.i32[1];
2000
+ r_.i32[2] = a_.i32[2];
2001
+ r_.i32[3] = a_.i32[3];
2002
+ #endif
2003
+
2004
+ return simde__m128_from_private(r_);
2005
+ #endif
2006
+ }
2007
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
2008
+ # define _mm_cvtpi32_ps(a, b) simde_mm_cvtpi32_ps((a), b)
2009
+ #endif
2010
+
2011
+ SIMDE_FUNCTION_ATTRIBUTES
2012
+ simde__m128
2013
+ simde_mm_cvtpi32x2_ps (simde__m64 a, simde__m64 b) {
2014
+ #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
2015
+ return _mm_cvtpi32x2_ps(a, b);
2016
+ #else
2017
+ simde__m128_private r_;
2018
+ simde__m64_private
2019
+ a_ = simde__m64_to_private(a),
2020
+ b_ = simde__m64_to_private(b);
2021
+
2022
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
2023
+ r_.neon_f32 = vcvtq_f32_s32(vcombine_s32(a_.neon_i32, b_.neon_i32));
2024
+ #elif defined(SIMDE_CONVERT_VECTOR_)
2025
+ SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, a_.i32);
2026
+ SIMDE_CONVERT_VECTOR_(r_.m64_private[1].f32, b_.i32);
2027
+ #else
2028
+ r_.f32[0] = (simde_float32) a_.i32[0];
2029
+ r_.f32[1] = (simde_float32) a_.i32[1];
2030
+ r_.f32[2] = (simde_float32) b_.i32[0];
2031
+ r_.f32[3] = (simde_float32) b_.i32[1];
2032
+ #endif
2033
+
2034
+ return simde__m128_from_private(r_);
2035
+ #endif
2036
+ }
2037
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
2038
+ # define _mm_cvtpi32x2_ps(a, b) simde_mm_cvtpi32x2_ps(a, b)
2039
+ #endif
2040
+
2041
+ SIMDE_FUNCTION_ATTRIBUTES
2042
+ simde__m128
2043
+ simde_mm_cvtpi8_ps (simde__m64 a) {
2044
+ #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
2045
+ return _mm_cvtpi8_ps(a);
2046
+ #else
2047
+ simde__m128_private r_;
2048
+ simde__m64_private a_ = simde__m64_to_private(a);
2049
+
2050
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
2051
+ r_.neon_f32 = vcvtq_f32_s32(vmovl_s16(vget_low_s16(vmovl_s8(a_.neon_i8))));
2052
+ #else
2053
+ r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[0]);
2054
+ r_.f32[1] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[1]);
2055
+ r_.f32[2] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[2]);
2056
+ r_.f32[3] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[3]);
2057
+ #endif
2058
+
2059
+ return simde__m128_from_private(r_);
2060
+ #endif
2061
+ }
2062
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
2063
+ # define _mm_cvtpi8_ps(a) simde_mm_cvtpi8_ps(a)
2064
+ #endif
2065
+
2066
+ SIMDE_FUNCTION_ATTRIBUTES
2067
+ simde__m64
2068
+ simde_mm_cvtps_pi16 (simde__m128 a) {
2069
+ #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
2070
+ return _mm_cvtps_pi16(a);
2071
+ #else
2072
+ simde__m64_private r_;
2073
+ simde__m128_private a_ = simde__m128_to_private(a);
2074
+
2075
+ #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95399)
2076
+ r_.neon_i16 = vmovn_s32(vcvtq_s32_f32(vrndiq_f32(a_.neon_f32)));
2077
+ #else
2078
+ SIMDE_VECTORIZE
2079
+ for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
2080
+ r_.i16[i] = SIMDE_CONVERT_FTOI(int16_t, simde_math_roundf(a_.f32[i]));
2081
+ }
2082
+ #endif
2083
+
2084
+ return simde__m64_from_private(r_);
2085
+ #endif
2086
+ }
2087
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
2088
+ # define _mm_cvtps_pi16(a) simde_mm_cvtps_pi16((a))
2089
+ #endif
2090
+
2091
+ SIMDE_FUNCTION_ATTRIBUTES
2092
+ simde__m64
2093
+ simde_mm_cvtps_pi32 (simde__m128 a) {
2094
+ #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
2095
+ return _mm_cvtps_pi32(a);
2096
+ #else
2097
+ simde__m64_private r_;
2098
+ simde__m128_private a_ = simde__m128_to_private(a);
2099
+
2100
+ #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_BUG_GCC_95399)
2101
+ r_.neon_i32 = vcvt_s32_f32(vget_low_f32(vrndiq_f32(a_.neon_f32)));
2102
+ #else
2103
+ SIMDE_VECTORIZE
2104
+ for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
2105
+ simde_float32 v = simde_math_roundf(a_.f32[i]);
2106
+ #if !defined(SIMDE_FAST_CONVERSION_RANGE)
2107
+ r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ?
2108
+ SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN;
2109
+ #else
2110
+ r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v);
2111
+ #endif
2112
+ }
2113
+ #endif
2114
+
2115
+ return simde__m64_from_private(r_);
2116
+ #endif
2117
+ }
2118
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
2119
+ # define _mm_cvtps_pi32(a) simde_mm_cvtps_pi32((a))
2120
+ #endif
2121
+
2122
+ SIMDE_FUNCTION_ATTRIBUTES
2123
+ simde__m64
2124
+ simde_mm_cvtps_pi8 (simde__m128 a) {
2125
+ #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
2126
+ return _mm_cvtps_pi8(a);
2127
+ #else
2128
+ simde__m64_private r_;
2129
+ simde__m128_private a_ = simde__m128_to_private(a);
2130
+
2131
+ #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95471)
2132
+ /* Clamp the input to [INT8_MIN, INT8_MAX], round, convert to i32, narrow to
2133
+ * i16, combine with an all-zero vector of i16 (which will become the upper
2134
+ * half), narrow to i8. */
2135
+ float32x4_t max = vdupq_n_f32(HEDLEY_STATIC_CAST(simde_float32, INT8_MAX));
2136
+ float32x4_t min = vdupq_n_f32(HEDLEY_STATIC_CAST(simde_float32, INT8_MIN));
2137
+ float32x4_t values = vrndnq_f32(vmaxq_f32(vminq_f32(max, a_.neon_f32), min));
2138
+ r_.neon_i8 = vmovn_s16(vcombine_s16(vmovn_s32(vcvtq_s32_f32(values)), vdup_n_s16(0)));
2139
+ #else
2140
+ SIMDE_VECTORIZE
2141
+ for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) {
2142
+ if (a_.f32[i] > HEDLEY_STATIC_CAST(simde_float32, INT8_MAX))
2143
+ r_.i8[i] = INT8_MAX;
2144
+ else if (a_.f32[i] < HEDLEY_STATIC_CAST(simde_float32, INT8_MIN))
2145
+ r_.i8[i] = INT8_MIN;
2146
+ else
2147
+ r_.i8[i] = SIMDE_CONVERT_FTOI(int8_t, simde_math_roundf(a_.f32[i]));
2148
+ }
2149
+ /* Note: the upper half is undefined */
2150
+ #endif
2151
+
2152
+ return simde__m64_from_private(r_);
2153
+ #endif
2154
+ }
2155
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
2156
+ # define _mm_cvtps_pi8(a) simde_mm_cvtps_pi8((a))
2157
+ #endif
2158
+
2159
+ SIMDE_FUNCTION_ATTRIBUTES
2160
+ simde__m128
2161
+ simde_mm_cvtpu16_ps (simde__m64 a) {
2162
+ #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
2163
+ return _mm_cvtpu16_ps(a);
2164
+ #else
2165
+ simde__m128_private r_;
2166
+ simde__m64_private a_ = simde__m64_to_private(a);
2167
+
2168
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
2169
+ r_.neon_f32 = vcvtq_f32_u32(vmovl_u16(a_.neon_u16));
2170
+ #elif defined(SIMDE_CONVERT_VECTOR_)
2171
+ SIMDE_CONVERT_VECTOR_(r_.f32, a_.u16);
2172
+ #else
2173
+ SIMDE_VECTORIZE
2174
+ for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
2175
+ r_.f32[i] = (simde_float32) a_.u16[i];
2176
+ }
2177
+ #endif
2178
+
2179
+ return simde__m128_from_private(r_);
2180
+ #endif
2181
+ }
2182
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
2183
+ # define _mm_cvtpu16_ps(a) simde_mm_cvtpu16_ps(a)
2184
+ #endif
2185
+
2186
+ SIMDE_FUNCTION_ATTRIBUTES
2187
+ simde__m128
2188
+ simde_mm_cvtpu8_ps (simde__m64 a) {
2189
+ #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
2190
+ return _mm_cvtpu8_ps(a);
2191
+ #else
2192
+ simde__m128_private r_;
2193
+ simde__m64_private a_ = simde__m64_to_private(a);
2194
+
2195
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
2196
+ r_.neon_f32 = vcvtq_f32_u32(vmovl_u16(vget_low_u16(vmovl_u8(a_.neon_u8))));
2197
+ #else
2198
+ SIMDE_VECTORIZE
2199
+ for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
2200
+ r_.f32[i] = HEDLEY_STATIC_CAST(simde_float32, a_.u8[i]);
2201
+ }
2202
+ #endif
2203
+
2204
+ return simde__m128_from_private(r_);
2205
+ #endif
2206
+ }
2207
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
2208
+ # define _mm_cvtpu8_ps(a) simde_mm_cvtpu8_ps(a)
2209
+ #endif
2210
+
2211
+ SIMDE_FUNCTION_ATTRIBUTES
2212
+ simde__m128
2213
+ simde_mm_cvtsi32_ss (simde__m128 a, int32_t b) {
2214
+ #if defined(SIMDE_X86_SSE_NATIVE)
2215
+ return _mm_cvtsi32_ss(a, b);
2216
+ #else
2217
+ simde__m128_private r_;
2218
+ simde__m128_private a_ = simde__m128_to_private(a);
2219
+
2220
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
2221
+ r_.neon_f32 = vsetq_lane_f32(HEDLEY_STATIC_CAST(float32_t, b), a_.neon_f32, 0);
2222
+ #else
2223
+ r_ = a_;
2224
+ r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b);
2225
+ #endif
2226
+
2227
+ return simde__m128_from_private(r_);
2228
+ #endif
2229
+ }
2230
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
2231
+ # define _mm_cvtsi32_ss(a, b) simde_mm_cvtsi32_ss((a), b)
2232
+ #endif
2233
+
2234
+ SIMDE_FUNCTION_ATTRIBUTES
2235
+ simde__m128
2236
+ simde_mm_cvtsi64_ss (simde__m128 a, int64_t b) {
2237
+ #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64)
2238
+ #if !defined(__PGI)
2239
+ return _mm_cvtsi64_ss(a, b);
2240
+ #else
2241
+ return _mm_cvtsi64x_ss(a, b);
2242
+ #endif
2243
+ #else
2244
+ simde__m128_private r_;
2245
+ simde__m128_private a_ = simde__m128_to_private(a);
2246
+
2247
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
2248
+ r_.neon_f32 = vsetq_lane_f32(HEDLEY_STATIC_CAST(float32_t, b), a_.neon_f32, 0);
2249
+ #else
2250
+ r_ = a_;
2251
+ r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b);
2252
+ #endif
2253
+
2254
+ return simde__m128_from_private(r_);
2255
+ #endif
2256
+ }
2257
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
2258
+ # define _mm_cvtsi64_ss(a, b) simde_mm_cvtsi64_ss((a), b)
2259
+ #endif
2260
+
2261
+ SIMDE_FUNCTION_ATTRIBUTES
2262
+ simde_float32
2263
+ simde_mm_cvtss_f32 (simde__m128 a) {
2264
+ #if defined(SIMDE_X86_SSE_NATIVE)
2265
+ return _mm_cvtss_f32(a);
2266
+ #else
2267
+ simde__m128_private a_ = simde__m128_to_private(a);
2268
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
2269
+ return vgetq_lane_f32(a_.neon_f32, 0);
2270
+ #else
2271
+ return a_.f32[0];
2272
+ #endif
2273
+ #endif
2274
+ }
2275
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
2276
+ # define _mm_cvtss_f32(a) simde_mm_cvtss_f32((a))
2277
+ #endif
2278
+
2279
+ SIMDE_FUNCTION_ATTRIBUTES
2280
+ int32_t
2281
+ simde_mm_cvtss_si32 (simde__m128 a) {
2282
+ return simde_mm_cvt_ss2si(a);
2283
+ }
2284
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
2285
+ # define _mm_cvtss_si32(a) simde_mm_cvtss_si32((a))
2286
+ #endif
2287
+
2288
+ SIMDE_FUNCTION_ATTRIBUTES
2289
+ int64_t
2290
+ simde_mm_cvtss_si64 (simde__m128 a) {
2291
+ #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64)
2292
+ #if !defined(__PGI)
2293
+ return _mm_cvtss_si64(a);
2294
+ #else
2295
+ return _mm_cvtss_si64x(a);
2296
+ #endif
2297
+ #else
2298
+ simde__m128_private a_ = simde__m128_to_private(a);
2299
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
2300
+ return SIMDE_CONVERT_FTOI(int64_t, simde_math_roundf(vgetq_lane_f32(a_.neon_f32, 0)));
2301
+ #else
2302
+ return SIMDE_CONVERT_FTOI(int64_t, simde_math_roundf(a_.f32[0]));
2303
+ #endif
2304
+ #endif
2305
+ }
2306
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
2307
+ # define _mm_cvtss_si64(a) simde_mm_cvtss_si64((a))
2308
+ #endif
2309
+
2310
+ SIMDE_FUNCTION_ATTRIBUTES
2311
+ simde__m64
2312
+ simde_mm_cvtt_ps2pi (simde__m128 a) {
2313
+ #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
2314
+ return _mm_cvtt_ps2pi(a);
2315
+ #else
2316
+ simde__m64_private r_;
2317
+ simde__m128_private a_ = simde__m128_to_private(a);
2318
+
2319
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE)
2320
+ r_.neon_i32 = vcvt_s32_f32(vget_low_f32(a_.neon_f32));
2321
+ #else
2322
+ SIMDE_VECTORIZE
2323
+ for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
2324
+ simde_float32 v = a_.f32[i];
2325
+ #if !defined(SIMDE_FAST_CONVERSION_RANGE)
2326
+ r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ?
2327
+ SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN;
2328
+ #else
2329
+ r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v);
2330
+ #endif
2331
+ }
2332
+ #endif
2333
+
2334
+ return simde__m64_from_private(r_);
2335
+ #endif
2336
+ }
2337
+ #define simde_mm_cvttps_pi32(a) simde_mm_cvtt_ps2pi(a)
2338
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
2339
+ # define _mm_cvtt_ps2pi(a) simde_mm_cvtt_ps2pi((a))
2340
+ # define _mm_cvttps_pi32(a) simde_mm_cvttps_pi32((a))
2341
+ #endif
2342
+
2343
+ SIMDE_FUNCTION_ATTRIBUTES
2344
+ int32_t
2345
+ simde_mm_cvtt_ss2si (simde__m128 a) {
2346
+ #if defined(SIMDE_X86_SSE_NATIVE)
2347
+ return _mm_cvtt_ss2si(a);
2348
+ #else
2349
+ simde__m128_private a_ = simde__m128_to_private(a);
2350
+
2351
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE)
2352
+ return SIMDE_CONVERT_FTOI(int32_t, vgetq_lane_f32(a_.neon_f32, 0));
2353
+ #else
2354
+ simde_float32 v = a_.f32[0];
2355
+ #if !defined(SIMDE_FAST_CONVERSION_RANGE)
2356
+ return ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ?
2357
+ SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN;
2358
+ #else
2359
+ return SIMDE_CONVERT_FTOI(int32_t, v);
2360
+ #endif
2361
+ #endif
2362
+ #endif
2363
+ }
2364
+ #define simde_mm_cvttss_si32(a) simde_mm_cvtt_ss2si((a))
2365
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
2366
+ # define _mm_cvtt_ss2si(a) simde_mm_cvtt_ss2si((a))
2367
+ # define _mm_cvttss_si32(a) simde_mm_cvtt_ss2si((a))
2368
+ #endif
2369
+
2370
+ SIMDE_FUNCTION_ATTRIBUTES
2371
+ int64_t
2372
+ simde_mm_cvttss_si64 (simde__m128 a) {
2373
+ #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(_MSC_VER)
2374
+ #if defined(__PGI)
2375
+ return _mm_cvttss_si64x(a);
2376
+ #else
2377
+ return _mm_cvttss_si64(a);
2378
+ #endif
2379
+ #else
2380
+ simde__m128_private a_ = simde__m128_to_private(a);
2381
+
2382
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
2383
+ return SIMDE_CONVERT_FTOI(int64_t, vgetq_lane_f32(a_.neon_f32, 0));
2384
+ #else
2385
+ return SIMDE_CONVERT_FTOI(int64_t, a_.f32[0]);
2386
+ #endif
2387
+ #endif
2388
+ }
2389
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
2390
+ # define _mm_cvttss_si64(a) simde_mm_cvttss_si64((a))
2391
+ #endif
2392
+
2393
+ SIMDE_FUNCTION_ATTRIBUTES
2394
+ simde__m128
2395
+ simde_mm_cmpord_ss (simde__m128 a, simde__m128 b) {
2396
+ #if defined(SIMDE_X86_SSE_NATIVE)
2397
+ return _mm_cmpord_ss(a, b);
2398
+ #elif (SIMDE_NATURAL_VECTOR_SIZE > 0)
2399
+ return simde_mm_move_ss(a, simde_mm_cmpord_ps(a, b));
2400
+ #else
2401
+ simde__m128_private
2402
+ r_,
2403
+ a_ = simde__m128_to_private(a);
2404
+
2405
+ #if defined(simde_math_isnanf)
2406
+ r_.u32[0] = (simde_math_isnanf(simde_mm_cvtss_f32(a)) || simde_math_isnanf(simde_mm_cvtss_f32(b))) ? UINT32_C(0) : ~UINT32_C(0);
2407
+ SIMDE_VECTORIZE
2408
+ for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
2409
+ r_.u32[i] = a_.u32[i];
2410
+ }
2411
+ #else
2412
+ HEDLEY_UNREACHABLE();
2413
+ #endif
2414
+
2415
+ return simde__m128_from_private(r_);
2416
+ #endif
2417
+ }
2418
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
2419
+ # define _mm_cmpord_ss(a, b) simde_mm_cmpord_ss((a), (b))
2420
+ #endif
2421
+
2422
+ SIMDE_FUNCTION_ATTRIBUTES
2423
+ simde__m128
2424
+ simde_mm_div_ps (simde__m128 a, simde__m128 b) {
2425
+ #if defined(SIMDE_X86_SSE_NATIVE)
2426
+ return _mm_div_ps(a, b);
2427
+ #else
2428
+ simde__m128_private
2429
+ r_,
2430
+ a_ = simde__m128_to_private(a),
2431
+ b_ = simde__m128_to_private(b);
2432
+
2433
+ #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
2434
+ r_.neon_f32 = vdivq_f32(a_.neon_f32, b_.neon_f32);
2435
+ #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
2436
+ float32x4_t recip0 = vrecpeq_f32(b_.neon_f32);
2437
+ float32x4_t recip1 = vmulq_f32(recip0, vrecpsq_f32(recip0, b_.neon_f32));
2438
+ r_.neon_f32 = vmulq_f32(a_.neon_f32, recip1);
2439
+ #elif defined(SIMDE_WASM_SIMD128_NATIVE)
2440
+ r_.wasm_v128 = wasm_f32x4_div(a_.wasm_v128, b_.wasm_v128);
2441
+ #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
2442
+ r_.altivec_f32 = vec_div(a_.altivec_f32, b_.altivec_f32);
2443
+ #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
2444
+ r_.f32 = a_.f32 / b_.f32;
2445
+ #else
2446
+ SIMDE_VECTORIZE
2447
+ for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
2448
+ r_.f32[i] = a_.f32[i] / b_.f32[i];
2449
+ }
2450
+ #endif
2451
+
2452
+ return simde__m128_from_private(r_);
2453
+ #endif
2454
+ }
2455
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
2456
+ # define _mm_div_ps(a, b) simde_mm_div_ps((a), (b))
2457
+ #endif
2458
+
2459
+ SIMDE_FUNCTION_ATTRIBUTES
2460
+ simde__m128
2461
+ simde_mm_div_ss (simde__m128 a, simde__m128 b) {
2462
+ #if defined(SIMDE_X86_SSE_NATIVE)
2463
+ return _mm_div_ss(a, b);
2464
+ #elif (SIMDE_NATURAL_VECTOR_SIZE > 0)
2465
+ return simde_mm_move_ss(a, simde_mm_div_ps(a, b));
2466
+ #else
2467
+ simde__m128_private
2468
+ r_,
2469
+ a_ = simde__m128_to_private(a),
2470
+ b_ = simde__m128_to_private(b);
2471
+
2472
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
2473
+ float32_t value =
2474
+ vgetq_lane_f32(simde__m128_to_private(simde_mm_div_ps(a, b)).neon_f32, 0);
2475
+ r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0);
2476
+ #else
2477
+ r_.f32[0] = a_.f32[0] / b_.f32[0];
2478
+ SIMDE_VECTORIZE
2479
+ for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
2480
+ r_.f32[i] = a_.f32[i];
2481
+ }
2482
+ #endif
2483
+
2484
+ return simde__m128_from_private(r_);
2485
+ #endif
2486
+ }
2487
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
2488
+ # define _mm_div_ss(a, b) simde_mm_div_ss((a), (b))
2489
+ #endif
2490
+
2491
+ SIMDE_FUNCTION_ATTRIBUTES
2492
+ int16_t
2493
+ simde_mm_extract_pi16 (simde__m64 a, const int imm8)
2494
+ SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) {
2495
+ simde__m64_private a_ = simde__m64_to_private(a);
2496
+ return a_.i16[imm8];
2497
+ }
2498
+ #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(HEDLEY_PGI_VERSION)
2499
+ # if defined(SIMDE_BUG_CLANG_44589)
2500
+ # define simde_mm_extract_pi16(a, imm8) ( \
2501
+ HEDLEY_DIAGNOSTIC_PUSH \
2502
+ _Pragma("clang diagnostic ignored \"-Wvector-conversion\"") \
2503
+ HEDLEY_STATIC_CAST(int16_t, _mm_extract_pi16((a), (imm8))) \
2504
+ HEDLEY_DIAGNOSTIC_POP \
2505
+ )
2506
+ # else
2507
+ # define simde_mm_extract_pi16(a, imm8) HEDLEY_STATIC_CAST(int16_t, _mm_extract_pi16(a, imm8))
2508
+ # endif
2509
+ #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
2510
+ # define simde_mm_extract_pi16(a, imm8) vget_lane_s16(simde__m64_to_private(a).neon_i16, imm8)
2511
+ #endif
2512
+ #define simde_m_pextrw(a, imm8) simde_mm_extract_pi16(a, imm8)
2513
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
2514
+ # define _mm_extract_pi16(a, imm8) simde_mm_extract_pi16((a), (imm8))
2515
+ # define _m_pextrw(a, imm8) simde_mm_extract_pi16((a), (imm8))
2516
+ #endif
2517
+
2518
+ SIMDE_FUNCTION_ATTRIBUTES
2519
+ simde__m64
2520
+ simde_mm_insert_pi16 (simde__m64 a, int16_t i, const int imm8)
2521
+ SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) {
2522
+ simde__m64_private
2523
+ r_,
2524
+ a_ = simde__m64_to_private(a);
2525
+
2526
+ r_.i64[0] = a_.i64[0];
2527
+ r_.i16[imm8] = i;
2528
+
2529
+ return simde__m64_from_private(r_);
2530
+ }
2531
+ #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI)
2532
+ # if defined(SIMDE_BUG_CLANG_44589)
2533
+ # define ssimde_mm_insert_pi16(a, i, imm8) ( \
2534
+ HEDLEY_DIAGNOSTIC_PUSH \
2535
+ _Pragma("clang diagnostic ignored \"-Wvector-conversion\"") \
2536
+ (_mm_insert_pi16((a), (i), (imm8))) \
2537
+ HEDLEY_DIAGNOSTIC_POP \
2538
+ )
2539
+ # else
2540
+ # define simde_mm_insert_pi16(a, i, imm8) _mm_insert_pi16(a, i, imm8)
2541
+ # endif
2542
+ #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
2543
+ # define simde_mm_insert_pi16(a, i, imm8) simde__m64_from_neon_i16(vset_lane_s16((i), simde__m64_to_neon_i16(a), (imm8)))
2544
+ #endif
2545
+ #define simde_m_pinsrw(a, i, imm8) (simde_mm_insert_pi16(a, i, imm8))
2546
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
2547
+ # define _mm_insert_pi16(a, i, imm8) simde_mm_insert_pi16(a, i, imm8)
2548
+ # define _m_pinsrw(a, i, imm8) simde_mm_insert_pi16(a, i, imm8)
2549
+ #endif
2550
+
2551
+ SIMDE_FUNCTION_ATTRIBUTES
2552
+ simde__m128
2553
+ simde_mm_load_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) {
2554
+ #if defined(SIMDE_X86_SSE_NATIVE)
2555
+ return _mm_load_ps(mem_addr);
2556
+ #else
2557
+ simde__m128_private r_;
2558
+
2559
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
2560
+ r_.neon_f32 = vld1q_f32(mem_addr);
2561
+ #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
2562
+ r_.altivec_f32 = vec_vsx_ld(0, mem_addr);
2563
+ #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
2564
+ r_.altivec_f32 = vec_ld(0, mem_addr);
2565
+ #else
2566
+ simde_memcpy(&r_, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128), sizeof(r_));
2567
+ #endif
2568
+
2569
+ return simde__m128_from_private(r_);
2570
+ #endif
2571
+ }
2572
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
2573
+ # define _mm_load_ps(mem_addr) simde_mm_load_ps(mem_addr)
2574
+ #endif
2575
+
2576
+ SIMDE_FUNCTION_ATTRIBUTES
2577
+ simde__m128
2578
+ simde_mm_load1_ps (simde_float32 const* mem_addr) {
2579
+ #if defined(SIMDE_X86_SSE_NATIVE)
2580
+ return _mm_load_ps1(mem_addr);
2581
+ #else
2582
+ simde__m128_private r_;
2583
+
2584
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
2585
+ r_.neon_f32 = vld1q_dup_f32(mem_addr);
2586
+ #else
2587
+ r_ = simde__m128_to_private(simde_mm_set1_ps(*mem_addr));
2588
+ #endif
2589
+
2590
+ return simde__m128_from_private(r_);
2591
+ #endif
2592
+ }
2593
+ #define simde_mm_load_ps1(mem_addr) simde_mm_load1_ps(mem_addr)
2594
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
2595
+ # define _mm_load_ps1(mem_addr) simde_mm_load1_ps(mem_addr)
2596
+ # define _mm_load1_ps(mem_addr) simde_mm_load1_ps(mem_addr)
2597
+ #endif
2598
+
2599
+ SIMDE_FUNCTION_ATTRIBUTES
2600
+ simde__m128
2601
+ simde_mm_load_ss (simde_float32 const* mem_addr) {
2602
+ #if defined(SIMDE_X86_SSE_NATIVE)
2603
+ return _mm_load_ss(mem_addr);
2604
+ #else
2605
+ simde__m128_private r_;
2606
+
2607
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
2608
+ r_.neon_f32 = vsetq_lane_f32(*mem_addr, vdupq_n_f32(0), 0);
2609
+ #else
2610
+ r_.f32[0] = *mem_addr;
2611
+ r_.i32[1] = 0;
2612
+ r_.i32[2] = 0;
2613
+ r_.i32[3] = 0;
2614
+ #endif
2615
+
2616
+ return simde__m128_from_private(r_);
2617
+ #endif
2618
+ }
2619
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
2620
+ # define _mm_load_ss(mem_addr) simde_mm_load_ss(mem_addr)
2621
+ #endif
2622
+
2623
+ SIMDE_FUNCTION_ATTRIBUTES
2624
+ simde__m128
2625
+ simde_mm_loadh_pi (simde__m128 a, simde__m64 const* mem_addr) {
2626
+ #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
2627
+ return _mm_loadh_pi(a, HEDLEY_REINTERPRET_CAST(__m64 const*, mem_addr));
2628
+ #else
2629
+ simde__m128_private
2630
+ r_,
2631
+ a_ = simde__m128_to_private(a);
2632
+
2633
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
2634
+ r_.neon_f32 = vcombine_f32(vget_low_f32(a_.neon_f32), vld1_f32(HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr)));
2635
+ #else
2636
+ simde__m64_private b_ = *HEDLEY_REINTERPRET_CAST(simde__m64_private const*, mem_addr);
2637
+ r_.f32[0] = a_.f32[0];
2638
+ r_.f32[1] = a_.f32[1];
2639
+ r_.f32[2] = b_.f32[0];
2640
+ r_.f32[3] = b_.f32[1];
2641
+ #endif
2642
+
2643
+ return simde__m128_from_private(r_);
2644
+ #endif
2645
+ }
2646
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
2647
+ #if HEDLEY_HAS_WARNING("-Wold-style-cast")
2648
+ #define _mm_loadh_pi(a, mem_addr) simde_mm_loadh_pi((a), HEDLEY_REINTERPRET_CAST(simde__m64 const*, (mem_addr)))
2649
+ #else
2650
+ #define _mm_loadh_pi(a, mem_addr) simde_mm_loadh_pi((a), (simde__m64 const*) (mem_addr))
2651
+ #endif
2652
+ #endif
2653
+
2654
+ /* The SSE documentation says that there are no alignment requirements
2655
+ for mem_addr. Unfortunately they used the __m64 type for the argument
2656
+ which is supposed to be 8-byte aligned, so some compilers (like clang
2657
+ with -Wcast-align) will generate a warning if you try to cast, say,
2658
+ a simde_float32* to a simde__m64* for this function.
2659
+
2660
+ I think the choice of argument type is unfortunate, but I do think we
2661
+ need to stick to it here. If there is demand I can always add something
2662
+ like simde_x_mm_loadl_f32(simde__m128, simde_float32 mem_addr[2]) */
2663
+ SIMDE_FUNCTION_ATTRIBUTES
2664
+ simde__m128
2665
+ simde_mm_loadl_pi (simde__m128 a, simde__m64 const* mem_addr) {
2666
+ #if defined(SIMDE_X86_SSE_NATIVE)
2667
+ return _mm_loadl_pi(a, HEDLEY_REINTERPRET_CAST(__m64 const*, mem_addr));
2668
+ #else
2669
+ simde__m128_private
2670
+ r_,
2671
+ a_ = simde__m128_to_private(a);
2672
+
2673
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
2674
+ r_.neon_f32 = vcombine_f32(vld1_f32(
2675
+ HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr)), vget_high_f32(a_.neon_f32));
2676
+ #else
2677
+ simde__m64_private b_;
2678
+ simde_memcpy(&b_, mem_addr, sizeof(b_));
2679
+ r_.i32[0] = b_.i32[0];
2680
+ r_.i32[1] = b_.i32[1];
2681
+ r_.i32[2] = a_.i32[2];
2682
+ r_.i32[3] = a_.i32[3];
2683
+ #endif
2684
+
2685
+ return simde__m128_from_private(r_);
2686
+ #endif
2687
+ }
2688
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
2689
+ #if HEDLEY_HAS_WARNING("-Wold-style-cast")
2690
+ #define _mm_loadl_pi(a, mem_addr) simde_mm_loadl_pi((a), HEDLEY_REINTERPRET_CAST(simde__m64 const*, (mem_addr)))
2691
+ #else
2692
+ #define _mm_loadl_pi(a, mem_addr) simde_mm_loadl_pi((a), (simde__m64 const*) (mem_addr))
2693
+ #endif
2694
+ #endif
2695
+
2696
+ SIMDE_FUNCTION_ATTRIBUTES
2697
+ simde__m128
2698
+ simde_mm_loadr_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) {
2699
+ #if defined(SIMDE_X86_SSE_NATIVE)
2700
+ return _mm_loadr_ps(mem_addr);
2701
+ #else
2702
+ simde__m128_private
2703
+ r_,
2704
+ v_ = simde__m128_to_private(simde_mm_load_ps(mem_addr));
2705
+
2706
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
2707
+ r_.neon_f32 = vrev64q_f32(v_.neon_f32);
2708
+ r_.neon_f32 = vextq_f32(r_.neon_f32, r_.neon_f32, 2);
2709
+ #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && defined(__PPC64__)
2710
+ r_.altivec_f32 = vec_reve(v_.altivec_f32);
2711
+ #elif defined(SIMDE_SHUFFLE_VECTOR_)
2712
+ r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, v_.f32, v_.f32, 3, 2, 1, 0);
2713
+ #else
2714
+ r_.f32[0] = v_.f32[3];
2715
+ r_.f32[1] = v_.f32[2];
2716
+ r_.f32[2] = v_.f32[1];
2717
+ r_.f32[3] = v_.f32[0];
2718
+ #endif
2719
+
2720
+ return simde__m128_from_private(r_);
2721
+ #endif
2722
+ }
2723
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
2724
+ # define _mm_loadr_ps(mem_addr) simde_mm_loadr_ps(mem_addr)
2725
+ #endif
2726
+
2727
+ SIMDE_FUNCTION_ATTRIBUTES
2728
+ simde__m128
2729
+ simde_mm_loadu_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) {
2730
+ #if defined(SIMDE_X86_SSE_NATIVE)
2731
+ return _mm_loadu_ps(mem_addr);
2732
+ #else
2733
+ simde__m128_private r_;
2734
+
2735
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
2736
+ r_.neon_f32 = vld1q_f32(HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr));
2737
+ #elif defined(SIMDE_WASM_SIMD128_NATIVE)
2738
+ r_.wasm_v128 = wasm_v128_load(mem_addr);
2739
+ #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && defined(__PPC64__)
2740
+ r_.altivec_f32 = vec_vsx_ld(0, mem_addr);
2741
+ #else
2742
+ simde_memcpy(&r_, mem_addr, sizeof(r_));
2743
+ #endif
2744
+
2745
+ return simde__m128_from_private(r_);
2746
+ #endif
2747
+ }
2748
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
2749
+ # define _mm_loadu_ps(mem_addr) simde_mm_loadu_ps(mem_addr)
2750
+ #endif
2751
+
2752
+ SIMDE_FUNCTION_ATTRIBUTES
2753
+ void
2754
+ simde_mm_maskmove_si64 (simde__m64 a, simde__m64 mask, int8_t* mem_addr) {
2755
+ #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
2756
+ _mm_maskmove_si64(a, mask, HEDLEY_REINTERPRET_CAST(char*, mem_addr));
2757
+ #else
2758
+ simde__m64_private
2759
+ a_ = simde__m64_to_private(a),
2760
+ mask_ = simde__m64_to_private(mask);
2761
+
2762
+ SIMDE_VECTORIZE
2763
+ for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++)
2764
+ if (mask_.i8[i] < 0)
2765
+ mem_addr[i] = a_.i8[i];
2766
+ #endif
2767
+ }
2768
+ #define simde_m_maskmovq(a, mask, mem_addr) simde_mm_maskmove_si64(a, mask, mem_addr)
2769
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
2770
+ # define _mm_maskmove_si64(a, mask, mem_addr) simde_mm_maskmove_si64((a), (mask), SIMDE_CHECKED_REINTERPRET_CAST(int8_t*, char*, (mem_addr)))
2771
+ # define _m_maskmovq(a, mask, mem_addr) simde_mm_maskmove_si64((a), (mask), SIMDE_CHECKED_REINTERPRET_CAST(int8_t*, char*, (mem_addr)))
2772
+ #endif
2773
+
2774
+ SIMDE_FUNCTION_ATTRIBUTES
2775
+ simde__m64
2776
+ simde_mm_max_pi16 (simde__m64 a, simde__m64 b) {
2777
+ #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
2778
+ return _mm_max_pi16(a, b);
2779
+ #else
2780
+ simde__m64_private
2781
+ r_,
2782
+ a_ = simde__m64_to_private(a),
2783
+ b_ = simde__m64_to_private(b);
2784
+
2785
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
2786
+ r_.neon_i16 = vmax_s16(a_.neon_i16, b_.neon_i16);
2787
+ #else
2788
+ SIMDE_VECTORIZE
2789
+ for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
2790
+ r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? a_.i16[i] : b_.i16[i];
2791
+ }
2792
+ #endif
2793
+
2794
+ return simde__m64_from_private(r_);
2795
+ #endif
2796
+ }
2797
+ #define simde_m_pmaxsw(a, b) simde_mm_max_pi16(a, b)
2798
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
2799
+ # define _mm_max_pi16(a, b) simde_mm_max_pi16(a, b)
2800
+ # define _m_pmaxsw(a, b) simde_mm_max_pi16(a, b)
2801
+ #endif
2802
+
2803
+ SIMDE_FUNCTION_ATTRIBUTES
2804
+ simde__m128
2805
+ simde_mm_max_ps (simde__m128 a, simde__m128 b) {
2806
+ #if defined(SIMDE_X86_SSE_NATIVE)
2807
+ return _mm_max_ps(a, b);
2808
+ #else
2809
+ simde__m128_private
2810
+ r_,
2811
+ a_ = simde__m128_to_private(a),
2812
+ b_ = simde__m128_to_private(b);
2813
+
2814
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_NANS)
2815
+ r_.neon_f32 = vmaxq_f32(a_.neon_f32, b_.neon_f32);
2816
+ #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
2817
+ r_.neon_f32 = vbslq_f32(vcgtq_f32(a_.neon_f32, b_.neon_f32), a_.neon_f32, b_.neon_f32);
2818
+ #elif defined(SIMDE_WASM_SIMD128_NATIVE) && defined(SIMDE_FAST_NANS)
2819
+ r_.wasm_v128 = wasm_f32x4_max(a_.wasm_v128, b_.wasm_v128);
2820
+ #elif defined(SIMDE_WASM_SIMD128_NATIVE)
2821
+ r_.wasm_v128 = wasm_v128_bitselect(a_.wasm_v128, b_.wasm_v128, wasm_f32x4_gt(a_.wasm_v128, b_.wasm_v128));
2822
+ #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && defined(SIMDE_FAST_NANS)
2823
+ r_.altivec_f32 = vec_max(a_.altivec_f32, b_.altivec_f32);
2824
+ #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
2825
+ r_.altivec_f32 = vec_sel(b_.altivec_f32, a_.altivec_f32, vec_cmpgt(a_.altivec_f32, b_.altivec_f32));
2826
+ #else
2827
+ SIMDE_VECTORIZE
2828
+ for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
2829
+ r_.f32[i] = (a_.f32[i] > b_.f32[i]) ? a_.f32[i] : b_.f32[i];
2830
+ }
2831
+ #endif
2832
+
2833
+ return simde__m128_from_private(r_);
2834
+ #endif
2835
+ }
2836
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
2837
+ # define _mm_max_ps(a, b) simde_mm_max_ps((a), (b))
2838
+ #endif
2839
+
2840
+ SIMDE_FUNCTION_ATTRIBUTES
2841
+ simde__m64
2842
+ simde_mm_max_pu8 (simde__m64 a, simde__m64 b) {
2843
+ #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
2844
+ return _mm_max_pu8(a, b);
2845
+ #else
2846
+ simde__m64_private
2847
+ r_,
2848
+ a_ = simde__m64_to_private(a),
2849
+ b_ = simde__m64_to_private(b);
2850
+
2851
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
2852
+ r_.neon_u8 = vmax_u8(a_.neon_u8, b_.neon_u8);
2853
+ #else
2854
+ SIMDE_VECTORIZE
2855
+ for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) {
2856
+ r_.u8[i] = (a_.u8[i] > b_.u8[i]) ? a_.u8[i] : b_.u8[i];
2857
+ }
2858
+ #endif
2859
+
2860
+ return simde__m64_from_private(r_);
2861
+ #endif
2862
+ }
2863
+ #define simde_m_pmaxub(a, b) simde_mm_max_pu8(a, b)
2864
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
2865
+ # define _mm_max_pu8(a, b) simde_mm_max_pu8(a, b)
2866
+ # define _m_pmaxub(a, b) simde_mm_max_pu8(a, b)
2867
+ #endif
2868
+
2869
+ SIMDE_FUNCTION_ATTRIBUTES
2870
+ simde__m128
2871
+ simde_mm_max_ss (simde__m128 a, simde__m128 b) {
2872
+ #if defined(SIMDE_X86_SSE_NATIVE)
2873
+ return _mm_max_ss(a, b);
2874
+ #elif (SIMDE_NATURAL_VECTOR_SIZE > 0)
2875
+ return simde_mm_move_ss(a, simde_mm_max_ps(a, b));
2876
+ #else
2877
+ simde__m128_private
2878
+ r_,
2879
+ a_ = simde__m128_to_private(a),
2880
+ b_ = simde__m128_to_private(b);
2881
+
2882
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
2883
+ float32_t value = vgetq_lane_f32(maxq_f32(a_.neon_f32, b_.neon_f32), 0);
2884
+ r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0);
2885
+ #else
2886
+ r_.f32[0] = (a_.f32[0] > b_.f32[0]) ? a_.f32[0] : b_.f32[0];
2887
+ r_.f32[1] = a_.f32[1];
2888
+ r_.f32[2] = a_.f32[2];
2889
+ r_.f32[3] = a_.f32[3];
2890
+ #endif
2891
+
2892
+ return simde__m128_from_private(r_);
2893
+ #endif
2894
+ }
2895
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
2896
+ # define _mm_max_ss(a, b) simde_mm_max_ss((a), (b))
2897
+ #endif
2898
+
2899
+ SIMDE_FUNCTION_ATTRIBUTES
2900
+ simde__m64
2901
+ simde_mm_min_pi16 (simde__m64 a, simde__m64 b) {
2902
+ #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
2903
+ return _mm_min_pi16(a, b);
2904
+ #else
2905
+ simde__m64_private
2906
+ r_,
2907
+ a_ = simde__m64_to_private(a),
2908
+ b_ = simde__m64_to_private(b);
2909
+
2910
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
2911
+ r_.neon_i16 = vmin_s16(a_.neon_i16, b_.neon_i16);
2912
+ #else
2913
+ SIMDE_VECTORIZE
2914
+ for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
2915
+ r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? a_.i16[i] : b_.i16[i];
2916
+ }
2917
+ #endif
2918
+
2919
+ return simde__m64_from_private(r_);
2920
+ #endif
2921
+ }
2922
+ #define simde_m_pminsw(a, b) simde_mm_min_pi16(a, b)
2923
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
2924
+ # define _mm_min_pi16(a, b) simde_mm_min_pi16(a, b)
2925
+ # define _m_pminsw(a, b) simde_mm_min_pi16(a, b)
2926
+ #endif
2927
+
2928
+ SIMDE_FUNCTION_ATTRIBUTES
2929
+ simde__m128
2930
+ simde_mm_min_ps (simde__m128 a, simde__m128 b) {
2931
+ #if defined(SIMDE_X86_SSE_NATIVE)
2932
+ return _mm_min_ps(a, b);
2933
+ #elif defined(SIMDE_FAST_NANS) && defined(SIMDE_ARM_NEON_A32V7_NATIVE)
2934
+ return simde__m128_from_neon_f32(vminq_f32(simde__m128_to_neon_f32(a), simde__m128_to_neon_f32(b)));
2935
+ #elif defined(SIMDE_WASM_SIMD128_NATIVE)
2936
+ simde__m128_private
2937
+ r_,
2938
+ a_ = simde__m128_to_private(a),
2939
+ b_ = simde__m128_to_private(b);
2940
+ #if defined(SIMDE_FAST_NANS)
2941
+ r_.wasm_v128 = wasm_f32x4_min(a_.wasm_v128, b_.wasm_v128);
2942
+ #else
2943
+ r_.wasm_v128 = wasm_v128_bitselect(a_.wasm_v128, b_.wasm_v128, wasm_f32x4_lt(a_.wasm_v128, b_.wasm_v128));
2944
+ #endif
2945
+ return simde__m128_from_private(r_);
2946
+ #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
2947
+ simde__m128_private
2948
+ r_,
2949
+ a_ = simde__m128_to_private(a),
2950
+ b_ = simde__m128_to_private(b);
2951
+
2952
+ #if defined(SIMDE_FAST_NANS)
2953
+ r_.altivec_f32 = vec_min(a_.altivec_f32, b_.altivec_f32);
2954
+ #else
2955
+ r_.altivec_f32 = vec_sel(b_.altivec_f32, a_.altivec_f32, vec_cmpgt(b_.altivec_f32, a_.altivec_f32));
2956
+ #endif
2957
+
2958
+ return simde__m128_from_private(r_);
2959
+ #elif (SIMDE_NATURAL_VECTOR_SIZE > 0)
2960
+ simde__m128 mask = simde_mm_cmplt_ps(a, b);
2961
+ return simde_mm_or_ps(simde_mm_and_ps(mask, a), simde_mm_andnot_ps(mask, b));
2962
+ #else
2963
+ simde__m128_private
2964
+ r_,
2965
+ a_ = simde__m128_to_private(a),
2966
+ b_ = simde__m128_to_private(b);
2967
+
2968
+ SIMDE_VECTORIZE
2969
+ for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
2970
+ r_.f32[i] = (a_.f32[i] < b_.f32[i]) ? a_.f32[i] : b_.f32[i];
2971
+ }
2972
+
2973
+ return simde__m128_from_private(r_);
2974
+ #endif
2975
+ }
2976
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
2977
+ # define _mm_min_ps(a, b) simde_mm_min_ps((a), (b))
2978
+ #endif
2979
+
2980
+ SIMDE_FUNCTION_ATTRIBUTES
2981
+ simde__m64
2982
+ simde_mm_min_pu8 (simde__m64 a, simde__m64 b) {
2983
+ #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
2984
+ return _mm_min_pu8(a, b);
2985
+ #else
2986
+ simde__m64_private
2987
+ r_,
2988
+ a_ = simde__m64_to_private(a),
2989
+ b_ = simde__m64_to_private(b);
2990
+
2991
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
2992
+ r_.neon_u8 = vmin_u8(a_.neon_u8, b_.neon_u8);
2993
+ #else
2994
+ SIMDE_VECTORIZE
2995
+ for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) {
2996
+ r_.u8[i] = (a_.u8[i] < b_.u8[i]) ? a_.u8[i] : b_.u8[i];
2997
+ }
2998
+ #endif
2999
+
3000
+ return simde__m64_from_private(r_);
3001
+ #endif
3002
+ }
3003
+ #define simde_m_pminub(a, b) simde_mm_min_pu8(a, b)
3004
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
3005
+ # define _mm_min_pu8(a, b) simde_mm_min_pu8(a, b)
3006
+ # define _m_pminub(a, b) simde_mm_min_pu8(a, b)
3007
+ #endif
3008
+
3009
+ SIMDE_FUNCTION_ATTRIBUTES
3010
+ simde__m128
3011
+ simde_mm_min_ss (simde__m128 a, simde__m128 b) {
3012
+ #if defined(SIMDE_X86_SSE_NATIVE)
3013
+ return _mm_min_ss(a, b);
3014
+ #elif (SIMDE_NATURAL_VECTOR_SIZE > 0)
3015
+ return simde_mm_move_ss(a, simde_mm_min_ps(a, b));
3016
+ #else
3017
+ simde__m128_private
3018
+ r_,
3019
+ a_ = simde__m128_to_private(a),
3020
+ b_ = simde__m128_to_private(b);
3021
+
3022
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
3023
+ float32_t value = vgetq_lane_f32(vminq_f32(a_.neon_f32, b_.neon_f32), 0);
3024
+ r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0);
3025
+ #else
3026
+ r_.f32[0] = (a_.f32[0] < b_.f32[0]) ? a_.f32[0] : b_.f32[0];
3027
+ r_.f32[1] = a_.f32[1];
3028
+ r_.f32[2] = a_.f32[2];
3029
+ r_.f32[3] = a_.f32[3];
3030
+ #endif
3031
+
3032
+ return simde__m128_from_private(r_);
3033
+ #endif
3034
+ }
3035
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
3036
+ # define _mm_min_ss(a, b) simde_mm_min_ss((a), (b))
3037
+ #endif
3038
+
3039
+ SIMDE_FUNCTION_ATTRIBUTES
3040
+ simde__m128
3041
+ simde_mm_movehl_ps (simde__m128 a, simde__m128 b) {
3042
+ #if defined(SIMDE_X86_SSE_NATIVE)
3043
+ return _mm_movehl_ps(a, b);
3044
+ #else
3045
+ simde__m128_private
3046
+ r_,
3047
+ a_ = simde__m128_to_private(a),
3048
+ b_ = simde__m128_to_private(b);
3049
+
3050
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
3051
+ float32x2_t a32 = vget_high_f32(a_.neon_f32);
3052
+ float32x2_t b32 = vget_high_f32(b_.neon_f32);
3053
+ r_.neon_f32 = vcombine_f32(b32, a32);
3054
+ #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
3055
+ r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float),
3056
+ vec_mergel(b_.altivec_i64, a_.altivec_i64));
3057
+ #elif defined(SIMDE_SHUFFLE_VECTOR_)
3058
+ r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 6, 7, 2, 3);
3059
+ #else
3060
+ r_.f32[0] = b_.f32[2];
3061
+ r_.f32[1] = b_.f32[3];
3062
+ r_.f32[2] = a_.f32[2];
3063
+ r_.f32[3] = a_.f32[3];
3064
+ #endif
3065
+
3066
+ return simde__m128_from_private(r_);
3067
+ #endif
3068
+ }
3069
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
3070
+ # define _mm_movehl_ps(a, b) simde_mm_movehl_ps((a), (b))
3071
+ #endif
3072
+
3073
+ SIMDE_FUNCTION_ATTRIBUTES
3074
+ simde__m128
3075
+ simde_mm_movelh_ps (simde__m128 a, simde__m128 b) {
3076
+ #if defined(SIMDE_X86_SSE_NATIVE)
3077
+ return _mm_movelh_ps(a, b);
3078
+ #else
3079
+ simde__m128_private
3080
+ r_,
3081
+ a_ = simde__m128_to_private(a),
3082
+ b_ = simde__m128_to_private(b);
3083
+
3084
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
3085
+ float32x2_t a10 = vget_low_f32(a_.neon_f32);
3086
+ float32x2_t b10 = vget_low_f32(b_.neon_f32);
3087
+ r_.neon_f32 = vcombine_f32(a10, b10);
3088
+ #elif defined(SIMDE_SHUFFLE_VECTOR_)
3089
+ r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 0, 1, 4, 5);
3090
+ #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
3091
+ r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float),
3092
+ vec_mergeh(a_.altivec_i64, b_.altivec_i64));
3093
+ #else
3094
+ r_.f32[0] = a_.f32[0];
3095
+ r_.f32[1] = a_.f32[1];
3096
+ r_.f32[2] = b_.f32[0];
3097
+ r_.f32[3] = b_.f32[1];
3098
+ #endif
3099
+
3100
+ return simde__m128_from_private(r_);
3101
+ #endif
3102
+ }
3103
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
3104
+ # define _mm_movelh_ps(a, b) simde_mm_movelh_ps((a), (b))
3105
+ #endif
3106
+
3107
+ SIMDE_FUNCTION_ATTRIBUTES
3108
+ int
3109
+ simde_mm_movemask_pi8 (simde__m64 a) {
3110
+ #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
3111
+ return _mm_movemask_pi8(a);
3112
+ #else
3113
+ simde__m64_private a_ = simde__m64_to_private(a);
3114
+ int r = 0;
3115
+
3116
+ #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
3117
+ uint8x8_t input = a_.neon_u8;
3118
+ const int8_t xr[8] = {-7, -6, -5, -4, -3, -2, -1, 0};
3119
+ const uint8x8_t mask_and = vdup_n_u8(0x80);
3120
+ const int8x8_t mask_shift = vld1_s8(xr);
3121
+ const uint8x8_t mask_result = vshl_u8(vand_u8(input, mask_and), mask_shift);
3122
+ uint8x8_t lo = mask_result;
3123
+ r = vaddv_u8(lo);
3124
+ #else
3125
+ const size_t nmemb = sizeof(a_.i8) / sizeof(a_.i8[0]);
3126
+ SIMDE_VECTORIZE_REDUCTION(|:r)
3127
+ for (size_t i = 0 ; i < nmemb ; i++) {
3128
+ r |= (a_.u8[nmemb - 1 - i] >> 7) << (nmemb - 1 - i);
3129
+ }
3130
+ #endif
3131
+
3132
+ return r;
3133
+ #endif
3134
+ }
3135
+ #define simde_m_pmovmskb(a) simde_mm_movemask_pi8(a)
3136
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
3137
+ # define _mm_movemask_pi8(a) simde_mm_movemask_pi8(a)
3138
+ # define _m_pmovmskb(a) simde_mm_movemask_pi8(a)
3139
+ #endif
3140
+
3141
+ SIMDE_FUNCTION_ATTRIBUTES
3142
+ int
3143
+ simde_mm_movemask_ps (simde__m128 a) {
3144
+ #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
3145
+ return _mm_movemask_ps(a);
3146
+ #else
3147
+ int r = 0;
3148
+ simde__m128_private a_ = simde__m128_to_private(a);
3149
+
3150
+ #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
3151
+ static const int32_t shift_amount[] = { 0, 1, 2, 3 };
3152
+ const int32x4_t shift = vld1q_s32(shift_amount);
3153
+ uint32x4_t tmp = vshrq_n_u32(a_.neon_u32, 31);
3154
+ return HEDLEY_STATIC_CAST(int, vaddvq_u32(vshlq_u32(tmp, shift)));
3155
+ #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
3156
+ // Shift out everything but the sign bits with a 32-bit unsigned shift right.
3157
+ uint64x2_t high_bits = vreinterpretq_u64_u32(vshrq_n_u32(a_.neon_u32, 31));
3158
+ // Merge the two pairs together with a 64-bit unsigned shift right + add.
3159
+ uint8x16_t paired = vreinterpretq_u8_u64(vsraq_n_u64(high_bits, high_bits, 31));
3160
+ // Extract the result.
3161
+ return vgetq_lane_u8(paired, 0) | (vgetq_lane_u8(paired, 8) << 2);
3162
+ #else
3163
+ SIMDE_VECTORIZE_REDUCTION(|:r)
3164
+ for (size_t i = 0 ; i < sizeof(a_.u32) / sizeof(a_.u32[0]) ; i++) {
3165
+ r |= (a_.u32[i] >> ((sizeof(a_.u32[i]) * CHAR_BIT) - 1)) << i;
3166
+ }
3167
+ #endif
3168
+
3169
+ return r;
3170
+ #endif
3171
+ }
3172
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
3173
+ # define _mm_movemask_ps(a) simde_mm_movemask_ps((a))
3174
+ #endif
3175
+
3176
+ SIMDE_FUNCTION_ATTRIBUTES
3177
+ simde__m128
3178
+ simde_mm_mul_ps (simde__m128 a, simde__m128 b) {
3179
+ #if defined(SIMDE_X86_SSE_NATIVE)
3180
+ return _mm_mul_ps(a, b);
3181
+ #else
3182
+ simde__m128_private
3183
+ r_,
3184
+ a_ = simde__m128_to_private(a),
3185
+ b_ = simde__m128_to_private(b);
3186
+
3187
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
3188
+ r_.neon_f32 = vmulq_f32(a_.neon_f32, b_.neon_f32);
3189
+ #elif defined(SIMDE_WASM_SIMD128_NATIVE)
3190
+ r_.wasm_v128 = wasm_f32x4_mul(a_.wasm_v128, b_.wasm_v128);
3191
+ #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
3192
+ r_.f32 = a_.f32 * b_.f32;
3193
+ #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
3194
+ r_.altivec_f32 = vec_mul(a_.altivec_f32, b_.altivec_f32);
3195
+ #else
3196
+ SIMDE_VECTORIZE
3197
+ for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
3198
+ r_.f32[i] = a_.f32[i] * b_.f32[i];
3199
+ }
3200
+ #endif
3201
+
3202
+ return simde__m128_from_private(r_);
3203
+ #endif
3204
+ }
3205
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
3206
+ # define _mm_mul_ps(a, b) simde_mm_mul_ps((a), (b))
3207
+ #endif
3208
+
3209
+ SIMDE_FUNCTION_ATTRIBUTES
3210
+ simde__m128
3211
+ simde_mm_mul_ss (simde__m128 a, simde__m128 b) {
3212
+ #if defined(SIMDE_X86_SSE_NATIVE)
3213
+ return _mm_mul_ss(a, b);
3214
+ #elif (SIMDE_NATURAL_VECTOR_SIZE > 0)
3215
+ return simde_mm_move_ss(a, simde_mm_mul_ps(a, b));
3216
+ #else
3217
+ simde__m128_private
3218
+ r_,
3219
+ a_ = simde__m128_to_private(a),
3220
+ b_ = simde__m128_to_private(b);
3221
+
3222
+ r_.f32[0] = a_.f32[0] * b_.f32[0];
3223
+ r_.f32[1] = a_.f32[1];
3224
+ r_.f32[2] = a_.f32[2];
3225
+ r_.f32[3] = a_.f32[3];
3226
+
3227
+ return simde__m128_from_private(r_);
3228
+ #endif
3229
+ }
3230
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
3231
+ # define _mm_mul_ss(a, b) simde_mm_mul_ss((a), (b))
3232
+ #endif
3233
+
3234
+ SIMDE_FUNCTION_ATTRIBUTES
3235
+ simde__m64
3236
+ simde_mm_mulhi_pu16 (simde__m64 a, simde__m64 b) {
3237
+ #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
3238
+ return _mm_mulhi_pu16(a, b);
3239
+ #else
3240
+ simde__m64_private
3241
+ r_,
3242
+ a_ = simde__m64_to_private(a),
3243
+ b_ = simde__m64_to_private(b);
3244
+
3245
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
3246
+ const uint32x4_t t1 = vmull_u16(a_.neon_u16, b_.neon_u16);
3247
+ const uint32x4_t t2 = vshrq_n_u32(t1, 16);
3248
+ const uint16x4_t t3 = vmovn_u32(t2);
3249
+ r_.neon_u16 = t3;
3250
+ #else
3251
+ SIMDE_VECTORIZE
3252
+ for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {
3253
+ r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, ((HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i])) >> UINT32_C(16)));
3254
+ }
3255
+ #endif
3256
+
3257
+ return simde__m64_from_private(r_);
3258
+ #endif
3259
+ }
3260
+ #define simde_m_pmulhuw(a, b) simde_mm_mulhi_pu16(a, b)
3261
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
3262
+ # define _mm_mulhi_pu16(a, b) simde_mm_mulhi_pu16(a, b)
3263
+ # define _m_pmulhuw(a, b) simde_mm_mulhi_pu16(a, b)
3264
+ #endif
3265
+
3266
+ #if defined(SIMDE_X86_SSE_NATIVE) && defined(HEDLEY_GCC_VERSION)
3267
+ #define SIMDE_MM_HINT_NTA HEDLEY_STATIC_CAST(enum _mm_hint, 0)
3268
+ #define SIMDE_MM_HINT_T0 HEDLEY_STATIC_CAST(enum _mm_hint, 1)
3269
+ #define SIMDE_MM_HINT_T1 HEDLEY_STATIC_CAST(enum _mm_hint, 2)
3270
+ #define SIMDE_MM_HINT_T2 HEDLEY_STATIC_CAST(enum _mm_hint, 3)
3271
+ #define SIMDE_MM_HINT_ENTA HEDLEY_STATIC_CAST(enum _mm_hint, 4)
3272
+ #define SIMDE_MM_HINT_ET0 HEDLEY_STATIC_CAST(enum _mm_hint, 5)
3273
+ #define SIMDE_MM_HINT_ET1 HEDLEY_STATIC_CAST(enum _mm_hint, 6)
3274
+ #define SIMDE_MM_HINT_ET2 HEDLEY_STATIC_CAST(enum _mm_hint, 7)
3275
+ #else
3276
+ #define SIMDE_MM_HINT_NTA 0
3277
+ #define SIMDE_MM_HINT_T0 1
3278
+ #define SIMDE_MM_HINT_T1 2
3279
+ #define SIMDE_MM_HINT_T2 3
3280
+ #define SIMDE_MM_HINT_ENTA 4
3281
+ #define SIMDE_MM_HINT_ET0 5
3282
+ #define SIMDE_MM_HINT_ET1 6
3283
+ #define SIMDE_MM_HINT_ET2 7
3284
+ #endif
3285
+
3286
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
3287
+ HEDLEY_DIAGNOSTIC_PUSH
3288
+ #if HEDLEY_HAS_WARNING("-Wreserved-id-macro")
3289
+ _Pragma("clang diagnostic ignored \"-Wreserved-id-macro\"")
3290
+ #endif
3291
+ #undef _MM_HINT_NTA
3292
+ #define _MM_HINT_NTA SIMDE_MM_HINT_NTA
3293
+ #undef _MM_HINT_T0
3294
+ #define _MM_HINT_T0 SIMDE_MM_HINT_T0
3295
+ #undef _MM_HINT_T1
3296
+ #define _MM_HINT_T1 SIMDE_MM_HINT_T1
3297
+ #undef _MM_HINT_T2
3298
+ #define _MM_HINT_T2 SIMDE_MM_HINT_T2
3299
+ #undef _MM_HINT_ETNA
3300
+ #define _MM_HINT_ETNA SIMDE_MM_HINT_ETNA
3301
+ #undef _MM_HINT_ET0
3302
+ #define _MM_HINT_ET0 SIMDE_MM_HINT_ET0
3303
+ #undef _MM_HINT_ET1
3304
+ #define _MM_HINT_ET1 SIMDE_MM_HINT_ET1
3305
+ #undef _MM_HINT_ET1
3306
+ #define _MM_HINT_ET2 SIMDE_MM_HINT_ET2
3307
+ HEDLEY_DIAGNOSTIC_POP
3308
+ #endif
3309
+
3310
+ SIMDE_FUNCTION_ATTRIBUTES
3311
+ void
3312
+ simde_mm_prefetch (char const* p, int i) {
3313
+ #if defined(HEDLEY_GCC_VERSION)
3314
+ __builtin_prefetch(p);
3315
+ #else
3316
+ (void) p;
3317
+ #endif
3318
+
3319
+ (void) i;
3320
+ }
3321
+ #if defined(SIMDE_X86_SSE_NATIVE)
3322
+ #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) /* https://reviews.llvm.org/D71718 */
3323
+ #define simde_mm_prefetch(p, i) \
3324
+ (__extension__({ \
3325
+ HEDLEY_DIAGNOSTIC_PUSH \
3326
+ HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL \
3327
+ _mm_prefetch((p), (i)); \
3328
+ HEDLEY_DIAGNOSTIC_POP \
3329
+ }))
3330
+ #else
3331
+ #define simde_mm_prefetch(p, i) _mm_prefetch(p, i)
3332
+ #endif
3333
+ #endif
3334
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
3335
+ #define _mm_prefetch(p, i) simde_mm_prefetch(p, i)
3336
+ #endif
3337
+
3338
+ SIMDE_FUNCTION_ATTRIBUTES
3339
+ simde__m128
3340
+ simde_x_mm_negate_ps(simde__m128 a) {
3341
+ #if defined(SIMDE_X86_SSE_NATIVE)
3342
+ return simde_mm_xor_ps(a, _mm_set1_ps(SIMDE_FLOAT32_C(-0.0)));
3343
+ #else
3344
+ simde__m128_private
3345
+ r_,
3346
+ a_ = simde__m128_to_private(a);
3347
+
3348
+ #if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && \
3349
+ (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,1,0))
3350
+ r_.altivec_f32 = vec_neg(a_.altivec_f32);
3351
+ #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
3352
+ r_.neon_f32 = vnegq_f32(a_.neon_f32);
3353
+ #elif defined(SIMDE_WASM_SIMD128_NATIVE)
3354
+ r_.wasm_v128 = wasm_f32x4_neg(a_.wasm_v128);
3355
+ #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
3356
+ r_.altivec_f32 = vec_neg(a_.altivec_f32);
3357
+ #elif defined(SIMDE_VECTOR_NEGATE)
3358
+ r_.f32 = -a_.f32;
3359
+ #else
3360
+ SIMDE_VECTORIZE
3361
+ for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
3362
+ r_.f32[i] = -a_.f32[i];
3363
+ }
3364
+ #endif
3365
+
3366
+ return simde__m128_from_private(r_);
3367
+ #endif
3368
+ }
3369
+
3370
+ SIMDE_FUNCTION_ATTRIBUTES
3371
+ simde__m128
3372
+ simde_mm_rcp_ps (simde__m128 a) {
3373
+ #if defined(SIMDE_X86_SSE_NATIVE)
3374
+ return _mm_rcp_ps(a);
3375
+ #else
3376
+ simde__m128_private
3377
+ r_,
3378
+ a_ = simde__m128_to_private(a);
3379
+
3380
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
3381
+ float32x4_t recip = vrecpeq_f32(a_.neon_f32);
3382
+
3383
+ #if SIMDE_ACCURACY_PREFERENCE > 0
3384
+ for (int i = 0; i < SIMDE_ACCURACY_PREFERENCE ; ++i) {
3385
+ recip = vmulq_f32(recip, vrecpsq_f32(recip, a_.neon_f32));
3386
+ }
3387
+ #endif
3388
+
3389
+ r_.neon_f32 = recip;
3390
+ #elif defined(SIMDE_WASM_SIMD128_NATIVE)
3391
+ r_.wasm_v128 = wasm_f32x4_div(simde_mm_set1_ps(1.0f), a_.wasm_v128);
3392
+ #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
3393
+ r_.altivec_f32 = vec_re(a_.altivec_f32);
3394
+ #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
3395
+ r_.f32 = 1.0f / a_.f32;
3396
+ #elif defined(SIMDE_IEEE754_STORAGE)
3397
+ /* https://stackoverflow.com/questions/12227126/division-as-multiply-and-lut-fast-float-division-reciprocal/12228234#12228234 */
3398
+ SIMDE_VECTORIZE
3399
+ for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
3400
+ int32_t ix;
3401
+ simde_float32 fx = a_.f32[i];
3402
+ simde_memcpy(&ix, &fx, sizeof(ix));
3403
+ int32_t x = INT32_C(0x7EF311C3) - ix;
3404
+ simde_float32 temp;
3405
+ simde_memcpy(&temp, &x, sizeof(temp));
3406
+ r_.f32[i] = temp * (SIMDE_FLOAT32_C(2.0) - temp * fx);
3407
+ }
3408
+ #else
3409
+ SIMDE_VECTORIZE
3410
+ for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
3411
+ r_.f32[i] = 1.0f / a_.f32[i];
3412
+ }
3413
+ #endif
3414
+
3415
+ return simde__m128_from_private(r_);
3416
+ #endif
3417
+ }
3418
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
3419
+ # define _mm_rcp_ps(a) simde_mm_rcp_ps((a))
3420
+ #endif
3421
+
3422
+ SIMDE_FUNCTION_ATTRIBUTES
3423
+ simde__m128
3424
+ simde_mm_rcp_ss (simde__m128 a) {
3425
+ #if defined(SIMDE_X86_SSE_NATIVE)
3426
+ return _mm_rcp_ss(a);
3427
+ #elif (SIMDE_NATURAL_VECTOR_SIZE > 0)
3428
+ return simde_mm_move_ss(a, simde_mm_rcp_ps(a));
3429
+ #else
3430
+ simde__m128_private
3431
+ r_,
3432
+ a_ = simde__m128_to_private(a);
3433
+
3434
+ r_.f32[0] = 1.0f / a_.f32[0];
3435
+ r_.f32[1] = a_.f32[1];
3436
+ r_.f32[2] = a_.f32[2];
3437
+ r_.f32[3] = a_.f32[3];
3438
+
3439
+ return simde__m128_from_private(r_);
3440
+ #endif
3441
+ }
3442
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
3443
+ # define _mm_rcp_ss(a) simde_mm_rcp_ss((a))
3444
+ #endif
3445
+
3446
+ SIMDE_FUNCTION_ATTRIBUTES
3447
+ simde__m128
3448
+ simde_mm_rsqrt_ps (simde__m128 a) {
3449
+ #if defined(SIMDE_X86_SSE_NATIVE)
3450
+ return _mm_rsqrt_ps(a);
3451
+ #else
3452
+ simde__m128_private
3453
+ r_,
3454
+ a_ = simde__m128_to_private(a);
3455
+
3456
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
3457
+ r_.neon_f32 = vrsqrteq_f32(a_.neon_f32);
3458
+ #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
3459
+ r_.altivec_f32 = vec_rsqrte(a_.altivec_f32);
3460
+ #elif defined(SIMDE_IEEE754_STORAGE)
3461
+ /* https://basesandframes.files.wordpress.com/2020/04/even_faster_math_functions_green_2020.pdf
3462
+ Pages 100 - 103 */
3463
+ SIMDE_VECTORIZE
3464
+ for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
3465
+ #if SIMDE_ACCURACY_PREFERENCE <= 0
3466
+ r_.i32[i] = INT32_C(0x5F37624F) - (a_.i32[i] >> 1);
3467
+ #else
3468
+ simde_float32 x = a_.f32[i];
3469
+ simde_float32 xhalf = SIMDE_FLOAT32_C(0.5) * x;
3470
+ int32_t ix;
3471
+
3472
+ simde_memcpy(&ix, &x, sizeof(ix));
3473
+
3474
+ #if SIMDE_ACCURACY_PREFERENCE == 1
3475
+ ix = INT32_C(0x5F375A82) - (ix >> 1);
3476
+ #else
3477
+ ix = INT32_C(0x5F37599E) - (ix >> 1);
3478
+ #endif
3479
+
3480
+ simde_memcpy(&x, &ix, sizeof(x));
3481
+
3482
+ #if SIMDE_ACCURACY_PREFERENCE >= 2
3483
+ x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x);
3484
+ #endif
3485
+ x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x);
3486
+
3487
+ r_.f32[i] = x;
3488
+ #endif
3489
+ }
3490
+ #elif defined(simde_math_sqrtf)
3491
+ SIMDE_VECTORIZE
3492
+ for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
3493
+ r_.f32[i] = 1.0f / simde_math_sqrtf(a_.f32[i]);
3494
+ }
3495
+ #else
3496
+ HEDLEY_UNREACHABLE();
3497
+ #endif
3498
+
3499
+ return simde__m128_from_private(r_);
3500
+ #endif
3501
+ }
3502
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
3503
+ # define _mm_rsqrt_ps(a) simde_mm_rsqrt_ps((a))
3504
+ #endif
3505
+
3506
+ SIMDE_FUNCTION_ATTRIBUTES
3507
+ simde__m128
3508
+ simde_mm_rsqrt_ss (simde__m128 a) {
3509
+ #if defined(SIMDE_X86_SSE_NATIVE)
3510
+ return _mm_rsqrt_ss(a);
3511
+ #elif (SIMDE_NATURAL_VECTOR_SIZE > 0)
3512
+ return simde_mm_move_ss(a, simde_mm_rsqrt_ps(a));
3513
+ #else
3514
+ simde__m128_private
3515
+ r_,
3516
+ a_ = simde__m128_to_private(a);
3517
+
3518
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
3519
+ r_.neon_f32 = vsetq_lane_f32(vgetq_lane_f32(simde_mm_rsqrt_ps(a).neon_f32, 0), a_.neon_f32, 0);
3520
+ #elif defined(SIMDE_IEEE754_STORAGE)
3521
+ {
3522
+ #if SIMDE_ACCURACY_PREFERENCE <= 0
3523
+ r_.i32[0] = INT32_C(0x5F37624F) - (a_.i32[0] >> 1);
3524
+ #else
3525
+ simde_float32 x = a_.f32[0];
3526
+ simde_float32 xhalf = SIMDE_FLOAT32_C(0.5) * x;
3527
+ int32_t ix;
3528
+
3529
+ simde_memcpy(&ix, &x, sizeof(ix));
3530
+
3531
+ #if SIMDE_ACCURACY_PREFERENCE == 1
3532
+ ix = INT32_C(0x5F375A82) - (ix >> 1);
3533
+ #else
3534
+ ix = INT32_C(0x5F37599E) - (ix >> 1);
3535
+ #endif
3536
+
3537
+ simde_memcpy(&x, &ix, sizeof(x));
3538
+
3539
+ #if SIMDE_ACCURACY_PREFERENCE >= 2
3540
+ x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x);
3541
+ #endif
3542
+ x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x);
3543
+
3544
+ r_.f32[0] = x;
3545
+ #endif
3546
+ }
3547
+ r_.f32[1] = a_.f32[1];
3548
+ r_.f32[2] = a_.f32[2];
3549
+ r_.f32[3] = a_.f32[3];
3550
+ #elif defined(simde_math_sqrtf)
3551
+ r_.f32[0] = 1.0f / simde_math_sqrtf(a_.f32[0]);
3552
+ r_.f32[1] = a_.f32[1];
3553
+ r_.f32[2] = a_.f32[2];
3554
+ r_.f32[3] = a_.f32[3];
3555
+ #else
3556
+ HEDLEY_UNREACHABLE();
3557
+ #endif
3558
+
3559
+ return simde__m128_from_private(r_);
3560
+ #endif
3561
+ }
3562
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
3563
+ # define _mm_rsqrt_ss(a) simde_mm_rsqrt_ss((a))
3564
+ #endif
3565
+
3566
+ SIMDE_FUNCTION_ATTRIBUTES
3567
+ simde__m64
3568
+ simde_mm_sad_pu8 (simde__m64 a, simde__m64 b) {
3569
+ #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
3570
+ return _mm_sad_pu8(a, b);
3571
+ #else
3572
+ simde__m64_private
3573
+ r_,
3574
+ a_ = simde__m64_to_private(a),
3575
+ b_ = simde__m64_to_private(b);
3576
+
3577
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
3578
+ uint16x4_t t = vpaddl_u8(vabd_u8(a_.neon_u8, b_.neon_u8));
3579
+ uint16_t r0 = t[0] + t[1] + t[2] + t[3];
3580
+ r_.neon_u16 = vset_lane_u16(r0, vdup_n_u16(0), 0);
3581
+ #else
3582
+ uint16_t sum = 0;
3583
+
3584
+ #if defined(SIMDE_HAVE_STDLIB_H)
3585
+ SIMDE_VECTORIZE_REDUCTION(+:sum)
3586
+ for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) {
3587
+ sum += HEDLEY_STATIC_CAST(uint8_t, abs(a_.u8[i] - b_.u8[i]));
3588
+ }
3589
+
3590
+ r_.i16[0] = HEDLEY_STATIC_CAST(int16_t, sum);
3591
+ r_.i16[1] = 0;
3592
+ r_.i16[2] = 0;
3593
+ r_.i16[3] = 0;
3594
+ #else
3595
+ HEDLEY_UNREACHABLE();
3596
+ #endif
3597
+ #endif
3598
+
3599
+ return simde__m64_from_private(r_);
3600
+ #endif
3601
+ }
3602
+ #define simde_m_psadbw(a, b) simde_mm_sad_pu8(a, b)
3603
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
3604
+ # define _mm_sad_pu8(a, b) simde_mm_sad_pu8(a, b)
3605
+ # define _m_psadbw(a, b) simde_mm_sad_pu8(a, b)
3606
+ #endif
3607
+
3608
+ SIMDE_FUNCTION_ATTRIBUTES
3609
+ simde__m128
3610
+ simde_mm_set_ss (simde_float32 a) {
3611
+ #if defined(SIMDE_X86_SSE_NATIVE)
3612
+ return _mm_set_ss(a);
3613
+ #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
3614
+ return vsetq_lane_f32(a, vdupq_n_f32(SIMDE_FLOAT32_C(0.0)), 0);
3615
+ #else
3616
+ return simde_mm_set_ps(SIMDE_FLOAT32_C(0.0), SIMDE_FLOAT32_C(0.0), SIMDE_FLOAT32_C(0.0), a);
3617
+ #endif
3618
+ }
3619
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
3620
+ # define _mm_set_ss(a) simde_mm_set_ss(a)
3621
+ #endif
3622
+
3623
+ SIMDE_FUNCTION_ATTRIBUTES
3624
+ simde__m128
3625
+ simde_mm_setr_ps (simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) {
3626
+ #if defined(SIMDE_X86_SSE_NATIVE)
3627
+ return _mm_setr_ps(e3, e2, e1, e0);
3628
+ #else
3629
+ return simde_mm_set_ps(e0, e1, e2, e3);
3630
+ #endif
3631
+ }
3632
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
3633
+ # define _mm_setr_ps(e3, e2, e1, e0) simde_mm_setr_ps(e3, e2, e1, e0)
3634
+ #endif
3635
+
3636
+ SIMDE_FUNCTION_ATTRIBUTES
3637
+ simde__m128
3638
+ simde_mm_setzero_ps (void) {
3639
+ #if defined(SIMDE_X86_SSE_NATIVE)
3640
+ return _mm_setzero_ps();
3641
+ #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
3642
+ return vdupq_n_f32(SIMDE_FLOAT32_C(0.0));
3643
+ #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
3644
+ return vec_splats(SIMDE_FLOAT32_C(0.0));
3645
+ #else
3646
+ simde__m128 r;
3647
+ simde_memset(&r, 0, sizeof(r));
3648
+ return r;
3649
+ #endif
3650
+ }
3651
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
3652
+ # define _mm_setzero_ps() simde_mm_setzero_ps()
3653
+ #endif
3654
+
3655
+ #if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_)
3656
+ HEDLEY_DIAGNOSTIC_PUSH
3657
+ SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_
3658
+ #endif
3659
+
3660
+ SIMDE_FUNCTION_ATTRIBUTES
3661
+ simde__m128
3662
+ simde_mm_undefined_ps (void) {
3663
+ simde__m128_private r_;
3664
+
3665
+ #if defined(SIMDE_HAVE_UNDEFINED128)
3666
+ r_.n = _mm_undefined_ps();
3667
+ #elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_)
3668
+ r_ = simde__m128_to_private(simde_mm_setzero_ps());
3669
+ #endif
3670
+
3671
+ return simde__m128_from_private(r_);
3672
+ }
3673
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
3674
+ # define _mm_undefined_ps() simde_mm_undefined_ps()
3675
+ #endif
3676
+
3677
+ #if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_)
3678
+ HEDLEY_DIAGNOSTIC_POP
3679
+ #endif
3680
+
3681
+ SIMDE_FUNCTION_ATTRIBUTES
3682
+ simde__m128
3683
+ simde_x_mm_setone_ps (void) {
3684
+ simde__m128 t = simde_mm_setzero_ps();
3685
+ return simde_mm_cmpeq_ps(t, t);
3686
+ }
3687
+
3688
+ SIMDE_FUNCTION_ATTRIBUTES
3689
+ void
3690
+ simde_mm_sfence (void) {
3691
+ /* TODO: Use Hedley. */
3692
+ #if defined(SIMDE_X86_SSE_NATIVE)
3693
+ _mm_sfence();
3694
+ #elif defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 7))
3695
+ __atomic_thread_fence(__ATOMIC_SEQ_CST);
3696
+ #elif !defined(__INTEL_COMPILER) && defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && !defined(__STDC_NO_ATOMICS__)
3697
+ #if defined(__GNUC__) && (__GNUC__ == 4) && (__GNUC_MINOR__ < 9)
3698
+ __atomic_thread_fence(__ATOMIC_SEQ_CST);
3699
+ #else
3700
+ atomic_thread_fence(memory_order_seq_cst);
3701
+ #endif
3702
+ #elif defined(_MSC_VER)
3703
+ MemoryBarrier();
3704
+ #elif HEDLEY_HAS_EXTENSION(c_atomic)
3705
+ __c11_atomic_thread_fence(__ATOMIC_SEQ_CST);
3706
+ #elif defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1))
3707
+ __sync_synchronize();
3708
+ #elif defined(_OPENMP)
3709
+ #pragma omp critical(simde_mm_sfence_)
3710
+ { }
3711
+ #endif
3712
+ }
3713
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
3714
+ # define _mm_sfence() simde_mm_sfence()
3715
+ #endif
3716
+
3717
+ #define SIMDE_MM_SHUFFLE(z, y, x, w) (((z) << 6) | ((y) << 4) | ((x) << 2) | (w))
3718
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
3719
+ # define _MM_SHUFFLE(z, y, x, w) SIMDE_MM_SHUFFLE(z, y, x, w)
3720
+ #endif
3721
+
3722
+ #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI)
3723
+ # define simde_mm_shuffle_pi16(a, imm8) _mm_shuffle_pi16(a, imm8)
3724
+ #elif defined(SIMDE_SHUFFLE_VECTOR_)
3725
+ # define simde_mm_shuffle_pi16(a, imm8) (__extension__ ({ \
3726
+ const simde__m64_private simde__tmp_a_ = simde__m64_to_private(a); \
3727
+ simde__m64_from_private((simde__m64_private) { .i16 = \
3728
+ SIMDE_SHUFFLE_VECTOR_(16, 8, \
3729
+ (simde__tmp_a_).i16, \
3730
+ (simde__tmp_a_).i16, \
3731
+ (((imm8) ) & 3), \
3732
+ (((imm8) >> 2) & 3), \
3733
+ (((imm8) >> 4) & 3), \
3734
+ (((imm8) >> 6) & 3)) }); }))
3735
+ #else
3736
+ SIMDE_FUNCTION_ATTRIBUTES
3737
+ simde__m64
3738
+ simde_mm_shuffle_pi16 (simde__m64 a, const int imm8)
3739
+ SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) {
3740
+ simde__m64_private r_;
3741
+ simde__m64_private a_ = simde__m64_to_private(a);
3742
+
3743
+ for (size_t i = 0 ; i < sizeof(r_.i16) / sizeof(r_.i16[0]) ; i++) {
3744
+ r_.i16[i] = a_.i16[(imm8 >> (i * 2)) & 3];
3745
+ }
3746
+
3747
+ HEDLEY_DIAGNOSTIC_PUSH
3748
+ #if HEDLEY_HAS_WARNING("-Wconditional-uninitialized")
3749
+ # pragma clang diagnostic ignored "-Wconditional-uninitialized"
3750
+ #endif
3751
+ return simde__m64_from_private(r_);
3752
+ HEDLEY_DIAGNOSTIC_POP
3753
+ }
3754
+ #endif
3755
+ #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI)
3756
+ # define simde_m_pshufw(a, imm8) _m_pshufw(a, imm8)
3757
+ #else
3758
+ # define simde_m_pshufw(a, imm8) simde_mm_shuffle_pi16(a, imm8)
3759
+ #endif
3760
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
3761
+ # define _mm_shuffle_pi16(a, imm8) simde_mm_shuffle_pi16(a, imm8)
3762
+ # define _m_pshufw(a, imm8) simde_mm_shuffle_pi16(a, imm8)
3763
+ #endif
3764
+
3765
+ #if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI)
3766
+ # define simde_mm_shuffle_ps(a, b, imm8) _mm_shuffle_ps(a, b, imm8)
3767
+ #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
3768
+ #define simde_mm_shuffle_ps(a, b, imm8) \
3769
+ __extension__({ \
3770
+ float32x4_t ret; \
3771
+ ret = vmovq_n_f32( \
3772
+ vgetq_lane_f32(a, (imm8) & (0x3))); \
3773
+ ret = vsetq_lane_f32( \
3774
+ vgetq_lane_f32(a, ((imm8) >> 2) & 0x3), \
3775
+ ret, 1); \
3776
+ ret = vsetq_lane_f32( \
3777
+ vgetq_lane_f32(b, ((imm8) >> 4) & 0x3), \
3778
+ ret, 2); \
3779
+ ret = vsetq_lane_f32( \
3780
+ vgetq_lane_f32(b, ((imm8) >> 6) & 0x3), \
3781
+ ret, 3); \
3782
+ })
3783
+ #elif defined(SIMDE_SHUFFLE_VECTOR_)
3784
+ # define simde_mm_shuffle_ps(a, b, imm8) (__extension__ ({ \
3785
+ simde__m128_from_private((simde__m128_private) { .f32 = \
3786
+ SIMDE_SHUFFLE_VECTOR_(32, 16, \
3787
+ simde__m128_to_private(a).f32, \
3788
+ simde__m128_to_private(b).f32, \
3789
+ (((imm8) ) & 3), \
3790
+ (((imm8) >> 2) & 3), \
3791
+ (((imm8) >> 4) & 3) + 4, \
3792
+ (((imm8) >> 6) & 3) + 4) }); }))
3793
+ #else
3794
+ SIMDE_FUNCTION_ATTRIBUTES
3795
+ simde__m128
3796
+ simde_mm_shuffle_ps (simde__m128 a, simde__m128 b, const int imm8)
3797
+ SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) {
3798
+ simde__m128_private
3799
+ r_,
3800
+ a_ = simde__m128_to_private(a),
3801
+ b_ = simde__m128_to_private(b);
3802
+
3803
+ r_.f32[0] = a_.f32[(imm8 >> 0) & 3];
3804
+ r_.f32[1] = a_.f32[(imm8 >> 2) & 3];
3805
+ r_.f32[2] = b_.f32[(imm8 >> 4) & 3];
3806
+ r_.f32[3] = b_.f32[(imm8 >> 6) & 3];
3807
+
3808
+ return simde__m128_from_private(r_);
3809
+ }
3810
+ #endif
3811
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
3812
+ # define _mm_shuffle_ps(a, b, imm8) simde_mm_shuffle_ps((a), (b), imm8)
3813
+ #endif
3814
+
3815
+ SIMDE_FUNCTION_ATTRIBUTES
3816
+ simde__m128
3817
+ simde_mm_sqrt_ps (simde__m128 a) {
3818
+ #if defined(SIMDE_X86_SSE_NATIVE)
3819
+ return _mm_sqrt_ps(a);
3820
+ #else
3821
+ simde__m128_private
3822
+ r_,
3823
+ a_ = simde__m128_to_private(a);
3824
+
3825
+ #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
3826
+ r_.neon_f32 = vsqrtq_f32(a_.neon_f32);
3827
+ #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
3828
+ float32x4_t est = vrsqrteq_f32(a_.neon_f32);
3829
+ for (int i = 0 ; i <= SIMDE_ACCURACY_PREFERENCE ; i++) {
3830
+ est = vmulq_f32(vrsqrtsq_f32(vmulq_f32(a_.neon_f32, est), est), est);
3831
+ }
3832
+ r_.neon_f32 = vmulq_f32(a_.neon_f32, est);
3833
+ #elif defined(SIMDE_WASM_SIMD128_NATIVE)
3834
+ r_.wasm_v128 = wasm_f32x4_sqrt(a_.wasm_v128);
3835
+ #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
3836
+ r_.altivec_f32 = vec_sqrt(a_.altivec_f32);
3837
+ #elif defined(simde_math_sqrt)
3838
+ SIMDE_VECTORIZE
3839
+ for (size_t i = 0 ; i < sizeof(r_.f32) / sizeof(r_.f32[0]) ; i++) {
3840
+ r_.f32[i] = simde_math_sqrtf(a_.f32[i]);
3841
+ }
3842
+ #else
3843
+ HEDLEY_UNREACHABLE();
3844
+ #endif
3845
+
3846
+ return simde__m128_from_private(r_);
3847
+ #endif
3848
+ }
3849
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
3850
+ # define _mm_sqrt_ps(a) simde_mm_sqrt_ps((a))
3851
+ #endif
3852
+
3853
+ SIMDE_FUNCTION_ATTRIBUTES
3854
+ simde__m128
3855
+ simde_mm_sqrt_ss (simde__m128 a) {
3856
+ #if defined(SIMDE_X86_SSE_NATIVE)
3857
+ return _mm_sqrt_ss(a);
3858
+ #elif (SIMDE_NATURAL_VECTOR_SIZE > 0)
3859
+ return simde_mm_move_ss(a, simde_mm_sqrt_ps(a));
3860
+ #else
3861
+ simde__m128_private
3862
+ r_,
3863
+ a_ = simde__m128_to_private(a);
3864
+
3865
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
3866
+ float32_t value =
3867
+ vgetq_lane_f32(simde__m128_to_private(simde_mm_sqrt_ps(a)).neon_f32, 0);
3868
+ r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0);
3869
+ #elif defined(simde_math_sqrtf)
3870
+ r_.f32[0] = simde_math_sqrtf(a_.f32[0]);
3871
+ r_.f32[1] = a_.f32[1];
3872
+ r_.f32[2] = a_.f32[2];
3873
+ r_.f32[3] = a_.f32[3];
3874
+ #else
3875
+ HEDLEY_UNREACHABLE();
3876
+ #endif
3877
+
3878
+ return simde__m128_from_private(r_);
3879
+ #endif
3880
+ }
3881
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
3882
+ # define _mm_sqrt_ss(a) simde_mm_sqrt_ss((a))
3883
+ #endif
3884
+
3885
+ SIMDE_FUNCTION_ATTRIBUTES
3886
+ void
3887
+ simde_mm_store_ps (simde_float32 mem_addr[4], simde__m128 a) {
3888
+ #if defined(SIMDE_X86_SSE_NATIVE)
3889
+ _mm_store_ps(mem_addr, a);
3890
+ #else
3891
+ simde__m128_private a_ = simde__m128_to_private(a);
3892
+
3893
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
3894
+ vst1q_f32(mem_addr, a_.neon_f32);
3895
+ #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
3896
+ vec_st(a_.altivec_f32, 0, mem_addr);
3897
+ #elif defined(SIMDE_WASM_SIMD128_NATIVE)
3898
+ wasm_v128_store(mem_addr, a_.wasm_v128);
3899
+ #else
3900
+ simde_memcpy(mem_addr, &a_, sizeof(a));
3901
+ #endif
3902
+ #endif
3903
+ }
3904
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
3905
+ # define _mm_store_ps(mem_addr, a) simde_mm_store_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a))
3906
+ #endif
3907
+
3908
+ SIMDE_FUNCTION_ATTRIBUTES
3909
+ void
3910
+ simde_mm_store1_ps (simde_float32 mem_addr[4], simde__m128 a) {
3911
+ simde_float32* mem_addr_ = SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128);
3912
+
3913
+ #if defined(SIMDE_X86_SSE_NATIVE)
3914
+ _mm_store_ps1(mem_addr_, a);
3915
+ #else
3916
+ simde__m128_private a_ = simde__m128_to_private(a);
3917
+
3918
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
3919
+ vst1q_f32(mem_addr_, vdupq_lane_f32(vget_low_f32(a_.neon_f32), 0));
3920
+ #elif defined(SIMDE_WASM_SIMD128_NATIVE)
3921
+ wasm_v128_store(mem_addr_, wasm_v32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 0, 0, 0));
3922
+ #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
3923
+ vec_st(vec_splat(a_.altivec_f32, 0), 0, mem_addr_);
3924
+ #elif defined(SIMDE_SHUFFLE_VECTOR_)
3925
+ simde__m128_private tmp_;
3926
+ tmp_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 0, 0, 0, 0);
3927
+ simde_mm_store_ps(mem_addr_, tmp_.f32);
3928
+ #else
3929
+ SIMDE_VECTORIZE_ALIGNED(mem_addr_:16)
3930
+ for (size_t i = 0 ; i < sizeof(a_.f32) / sizeof(a_.f32[0]) ; i++) {
3931
+ mem_addr_[i] = a_.f32[0];
3932
+ }
3933
+ #endif
3934
+ #endif
3935
+ }
3936
+ #define simde_mm_store_ps1(mem_addr, a) simde_mm_store1_ps(mem_addr, a)
3937
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
3938
+ # define _mm_store_ps1(mem_addr, a) simde_mm_store1_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a))
3939
+ # define _mm_store1_ps(mem_addr, a) simde_mm_store1_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a))
3940
+ #endif
3941
+
3942
+ SIMDE_FUNCTION_ATTRIBUTES
3943
+ void
3944
+ simde_mm_store_ss (simde_float32* mem_addr, simde__m128 a) {
3945
+ #if defined(SIMDE_X86_SSE_NATIVE)
3946
+ _mm_store_ss(mem_addr, a);
3947
+ #else
3948
+ simde__m128_private a_ = simde__m128_to_private(a);
3949
+
3950
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
3951
+ vst1q_lane_f32(mem_addr, a_.neon_f32, 0);
3952
+ #else
3953
+ *mem_addr = a_.f32[0];
3954
+ #endif
3955
+ #endif
3956
+ }
3957
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
3958
+ # define _mm_store_ss(mem_addr, a) simde_mm_store_ss(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a))
3959
+ #endif
3960
+
3961
+ SIMDE_FUNCTION_ATTRIBUTES
3962
+ void
3963
+ simde_mm_storeh_pi (simde__m64* mem_addr, simde__m128 a) {
3964
+ #if defined(SIMDE_X86_SSE_NATIVE)
3965
+ _mm_storeh_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a);
3966
+ #else
3967
+ simde__m128_private a_ = simde__m128_to_private(a);
3968
+
3969
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
3970
+ vst1_f32(HEDLEY_REINTERPRET_CAST(float32_t*, mem_addr), vget_high_f32(a_.neon_f32));
3971
+ #else
3972
+ simde_memcpy(mem_addr, &(a_.m64[1]), sizeof(a_.m64[1]));
3973
+ #endif
3974
+ #endif
3975
+ }
3976
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
3977
+ # define _mm_storeh_pi(mem_addr, a) simde_mm_storeh_pi(mem_addr, (a))
3978
+ #endif
3979
+
3980
+ SIMDE_FUNCTION_ATTRIBUTES
3981
+ void
3982
+ simde_mm_storel_pi (simde__m64* mem_addr, simde__m128 a) {
3983
+ #if defined(SIMDE_X86_SSE_NATIVE)
3984
+ _mm_storel_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a);
3985
+ #else
3986
+ simde__m64_private* dest_ = HEDLEY_REINTERPRET_CAST(simde__m64_private*, mem_addr);
3987
+ simde__m128_private a_ = simde__m128_to_private(a);
3988
+
3989
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
3990
+ dest_->neon_f32 = vget_low_f32(a_.neon_f32);
3991
+ #else
3992
+ dest_->f32[0] = a_.f32[0];
3993
+ dest_->f32[1] = a_.f32[1];
3994
+ #endif
3995
+ #endif
3996
+ }
3997
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
3998
+ # define _mm_storel_pi(mem_addr, a) simde_mm_storel_pi(mem_addr, (a))
3999
+ #endif
4000
+
4001
+ SIMDE_FUNCTION_ATTRIBUTES
4002
+ void
4003
+ simde_mm_storer_ps (simde_float32 mem_addr[4], simde__m128 a) {
4004
+ #if defined(SIMDE_X86_SSE_NATIVE)
4005
+ _mm_storer_ps(mem_addr, a);
4006
+ #else
4007
+ simde__m128_private a_ = simde__m128_to_private(a);
4008
+
4009
+ #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
4010
+ vec_st(vec_reve(a_.altivec_f32), 0, mem_addr);
4011
+ #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
4012
+ float32x4_t tmp = vrev64q_f32(a_.neon_f32);
4013
+ vst1q_f32(mem_addr, vextq_f32(tmp, tmp, 2));
4014
+ #elif defined(SIMDE_SHUFFLE_VECTOR_)
4015
+ a_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 3, 2, 1, 0);
4016
+ simde_mm_store_ps(mem_addr, simde__m128_from_private(a_));
4017
+ #else
4018
+ SIMDE_VECTORIZE_ALIGNED(mem_addr:16)
4019
+ for (size_t i = 0 ; i < sizeof(a_.f32) / sizeof(a_.f32[0]) ; i++) {
4020
+ mem_addr[i] = a_.f32[((sizeof(a_.f32) / sizeof(a_.f32[0])) - 1) - i];
4021
+ }
4022
+ #endif
4023
+ #endif
4024
+ }
4025
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
4026
+ # define _mm_storer_ps(mem_addr, a) simde_mm_storer_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a))
4027
+ #endif
4028
+
4029
+ SIMDE_FUNCTION_ATTRIBUTES
4030
+ void
4031
+ simde_mm_storeu_ps (simde_float32 mem_addr[4], simde__m128 a) {
4032
+ #if defined(SIMDE_X86_SSE_NATIVE)
4033
+ _mm_storeu_ps(mem_addr, a);
4034
+ #else
4035
+ simde__m128_private a_ = simde__m128_to_private(a);
4036
+
4037
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
4038
+ vst1q_f32(mem_addr, a_.neon_f32);
4039
+ #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
4040
+ vec_vsx_st(a_.altivec_f32, 0, mem_addr);
4041
+ #else
4042
+ simde_memcpy(mem_addr, &a_, sizeof(a_));
4043
+ #endif
4044
+ #endif
4045
+ }
4046
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
4047
+ # define _mm_storeu_ps(mem_addr, a) simde_mm_storeu_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a))
4048
+ #endif
4049
+
4050
+ SIMDE_FUNCTION_ATTRIBUTES
4051
+ simde__m128
4052
+ simde_mm_sub_ps (simde__m128 a, simde__m128 b) {
4053
+ #if defined(SIMDE_X86_SSE_NATIVE)
4054
+ return _mm_sub_ps(a, b);
4055
+ #else
4056
+ simde__m128_private
4057
+ r_,
4058
+ a_ = simde__m128_to_private(a),
4059
+ b_ = simde__m128_to_private(b);
4060
+
4061
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
4062
+ r_.neon_f32 = vsubq_f32(a_.neon_f32, b_.neon_f32);
4063
+ #elif defined(SIMDE_WASM_SIMD128_NATIVE)
4064
+ r_.wasm_v128 = wasm_f32x4_sub(a_.wasm_v128, b_.wasm_v128);
4065
+ #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
4066
+ r_.altivec_f32 = vec_sub(a_.altivec_f32, b_.altivec_f32);
4067
+ #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
4068
+ r_.f32 = a_.f32 - b_.f32;
4069
+ #else
4070
+ SIMDE_VECTORIZE
4071
+ for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
4072
+ r_.f32[i] = a_.f32[i] - b_.f32[i];
4073
+ }
4074
+ #endif
4075
+
4076
+ return simde__m128_from_private(r_);
4077
+ #endif
4078
+ }
4079
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
4080
+ # define _mm_sub_ps(a, b) simde_mm_sub_ps((a), (b))
4081
+ #endif
4082
+
4083
+ SIMDE_FUNCTION_ATTRIBUTES
4084
+ simde__m128
4085
+ simde_mm_sub_ss (simde__m128 a, simde__m128 b) {
4086
+ #if defined(SIMDE_X86_SSE_NATIVE)
4087
+ return _mm_sub_ss(a, b);
4088
+ #elif (SIMDE_NATURAL_VECTOR_SIZE > 0)
4089
+ return simde_mm_move_ss(a, simde_mm_sub_ps(a, b));
4090
+ #else
4091
+ simde__m128_private
4092
+ r_,
4093
+ a_ = simde__m128_to_private(a),
4094
+ b_ = simde__m128_to_private(b);
4095
+
4096
+ r_.f32[0] = a_.f32[0] - b_.f32[0];
4097
+ r_.f32[1] = a_.f32[1];
4098
+ r_.f32[2] = a_.f32[2];
4099
+ r_.f32[3] = a_.f32[3];
4100
+
4101
+ return simde__m128_from_private(r_);
4102
+ #endif
4103
+ }
4104
+
4105
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
4106
+ # define _mm_sub_ss(a, b) simde_mm_sub_ss((a), (b))
4107
+ #endif
4108
+
4109
+ SIMDE_FUNCTION_ATTRIBUTES
4110
+ int
4111
+ simde_mm_ucomieq_ss (simde__m128 a, simde__m128 b) {
4112
+ #if defined(SIMDE_X86_SSE_NATIVE)
4113
+ return _mm_ucomieq_ss(a, b);
4114
+ #else
4115
+ simde__m128_private
4116
+ a_ = simde__m128_to_private(a),
4117
+ b_ = simde__m128_to_private(b);
4118
+ int r;
4119
+
4120
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
4121
+ uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32);
4122
+ uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32);
4123
+ uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan));
4124
+ uint32x4_t a_eq_b = vceqq_f32(a_.neon_f32, b_.neon_f32);
4125
+ r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_eq_b), 0) != 0);
4126
+ #elif defined(SIMDE_HAVE_FENV_H)
4127
+ fenv_t envp;
4128
+ int x = feholdexcept(&envp);
4129
+ r = a_.f32[0] == b_.f32[0];
4130
+ if (HEDLEY_LIKELY(x == 0))
4131
+ fesetenv(&envp);
4132
+ #else
4133
+ r = a_.f32[0] == b_.f32[0];
4134
+ #endif
4135
+
4136
+ return r;
4137
+ #endif
4138
+ }
4139
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
4140
+ # define _mm_ucomieq_ss(a, b) simde_mm_ucomieq_ss((a), (b))
4141
+ #endif
4142
+
4143
+ SIMDE_FUNCTION_ATTRIBUTES
4144
+ int
4145
+ simde_mm_ucomige_ss (simde__m128 a, simde__m128 b) {
4146
+ #if defined(SIMDE_X86_SSE_NATIVE)
4147
+ return _mm_ucomige_ss(a, b);
4148
+ #else
4149
+ simde__m128_private
4150
+ a_ = simde__m128_to_private(a),
4151
+ b_ = simde__m128_to_private(b);
4152
+ int r;
4153
+
4154
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
4155
+ uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32);
4156
+ uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32);
4157
+ uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan);
4158
+ uint32x4_t a_ge_b = vcgeq_f32(a_.neon_f32, b_.neon_f32);
4159
+ r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_ge_b), 0) != 0);
4160
+ #elif defined(SIMDE_HAVE_FENV_H)
4161
+ fenv_t envp;
4162
+ int x = feholdexcept(&envp);
4163
+ r = a_.f32[0] >= b_.f32[0];
4164
+ if (HEDLEY_LIKELY(x == 0))
4165
+ fesetenv(&envp);
4166
+ #else
4167
+ r = a_.f32[0] >= b_.f32[0];
4168
+ #endif
4169
+
4170
+ return r;
4171
+ #endif
4172
+ }
4173
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
4174
+ # define _mm_ucomige_ss(a, b) simde_mm_ucomige_ss((a), (b))
4175
+ #endif
4176
+
4177
+ SIMDE_FUNCTION_ATTRIBUTES
4178
+ int
4179
+ simde_mm_ucomigt_ss (simde__m128 a, simde__m128 b) {
4180
+ #if defined(SIMDE_X86_SSE_NATIVE)
4181
+ return _mm_ucomigt_ss(a, b);
4182
+ #else
4183
+ simde__m128_private
4184
+ a_ = simde__m128_to_private(a),
4185
+ b_ = simde__m128_to_private(b);
4186
+ int r;
4187
+
4188
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
4189
+ uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32);
4190
+ uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32);
4191
+ uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan);
4192
+ uint32x4_t a_gt_b = vcgtq_f32(a_.neon_f32, b_.neon_f32);
4193
+ r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_gt_b), 0) != 0);
4194
+ #elif defined(SIMDE_HAVE_FENV_H)
4195
+ fenv_t envp;
4196
+ int x = feholdexcept(&envp);
4197
+ r = a_.f32[0] > b_.f32[0];
4198
+ if (HEDLEY_LIKELY(x == 0))
4199
+ fesetenv(&envp);
4200
+ #else
4201
+ r = a_.f32[0] > b_.f32[0];
4202
+ #endif
4203
+
4204
+ return r;
4205
+ #endif
4206
+ }
4207
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
4208
+ # define _mm_ucomigt_ss(a, b) simde_mm_ucomigt_ss((a), (b))
4209
+ #endif
4210
+
4211
+ SIMDE_FUNCTION_ATTRIBUTES
4212
+ int
4213
+ simde_mm_ucomile_ss (simde__m128 a, simde__m128 b) {
4214
+ #if defined(SIMDE_X86_SSE_NATIVE)
4215
+ return _mm_ucomile_ss(a, b);
4216
+ #else
4217
+ simde__m128_private
4218
+ a_ = simde__m128_to_private(a),
4219
+ b_ = simde__m128_to_private(b);
4220
+ int r;
4221
+
4222
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
4223
+ uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32);
4224
+ uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32);
4225
+ uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan));
4226
+ uint32x4_t a_le_b = vcleq_f32(a_.neon_f32, b_.neon_f32);
4227
+ r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_le_b), 0) != 0);
4228
+ #elif defined(SIMDE_HAVE_FENV_H)
4229
+ fenv_t envp;
4230
+ int x = feholdexcept(&envp);
4231
+ r = a_.f32[0] <= b_.f32[0];
4232
+ if (HEDLEY_LIKELY(x == 0))
4233
+ fesetenv(&envp);
4234
+ #else
4235
+ r = a_.f32[0] <= b_.f32[0];
4236
+ #endif
4237
+
4238
+ return r;
4239
+ #endif
4240
+ }
4241
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
4242
+ # define _mm_ucomile_ss(a, b) simde_mm_ucomile_ss((a), (b))
4243
+ #endif
4244
+
4245
+ SIMDE_FUNCTION_ATTRIBUTES
4246
+ int
4247
+ simde_mm_ucomilt_ss (simde__m128 a, simde__m128 b) {
4248
+ #if defined(SIMDE_X86_SSE_NATIVE)
4249
+ return _mm_ucomilt_ss(a, b);
4250
+ #else
4251
+ simde__m128_private
4252
+ a_ = simde__m128_to_private(a),
4253
+ b_ = simde__m128_to_private(b);
4254
+ int r;
4255
+
4256
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
4257
+ uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32);
4258
+ uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32);
4259
+ uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan));
4260
+ uint32x4_t a_lt_b = vcltq_f32(a_.neon_f32, b_.neon_f32);
4261
+ r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_lt_b), 0) != 0);
4262
+ #elif defined(SIMDE_HAVE_FENV_H)
4263
+ fenv_t envp;
4264
+ int x = feholdexcept(&envp);
4265
+ r = a_.f32[0] < b_.f32[0];
4266
+ if (HEDLEY_LIKELY(x == 0))
4267
+ fesetenv(&envp);
4268
+ #else
4269
+ r = a_.f32[0] < b_.f32[0];
4270
+ #endif
4271
+
4272
+ return r;
4273
+ #endif
4274
+ }
4275
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
4276
+ # define _mm_ucomilt_ss(a, b) simde_mm_ucomilt_ss((a), (b))
4277
+ #endif
4278
+
4279
+ SIMDE_FUNCTION_ATTRIBUTES
4280
+ int
4281
+ simde_mm_ucomineq_ss (simde__m128 a, simde__m128 b) {
4282
+ #if defined(SIMDE_X86_SSE_NATIVE)
4283
+ return _mm_ucomineq_ss(a, b);
4284
+ #else
4285
+ simde__m128_private
4286
+ a_ = simde__m128_to_private(a),
4287
+ b_ = simde__m128_to_private(b);
4288
+ int r;
4289
+
4290
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
4291
+ uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32);
4292
+ uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32);
4293
+ uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan);
4294
+ uint32x4_t a_neq_b = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32));
4295
+ r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_neq_b), 0) != 0);
4296
+ #elif defined(SIMDE_HAVE_FENV_H)
4297
+ fenv_t envp;
4298
+ int x = feholdexcept(&envp);
4299
+ r = a_.f32[0] != b_.f32[0];
4300
+ if (HEDLEY_LIKELY(x == 0))
4301
+ fesetenv(&envp);
4302
+ #else
4303
+ r = a_.f32[0] != b_.f32[0];
4304
+ #endif
4305
+
4306
+ return r;
4307
+ #endif
4308
+ }
4309
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
4310
+ # define _mm_ucomineq_ss(a, b) simde_mm_ucomineq_ss((a), (b))
4311
+ #endif
4312
+
4313
+ #if defined(SIMDE_X86_SSE_NATIVE)
4314
+ # if defined(__has_builtin)
4315
+ # if __has_builtin(__builtin_ia32_undef128)
4316
+ # define SIMDE_HAVE_UNDEFINED128
4317
+ # endif
4318
+ # elif !defined(__PGI) && !defined(SIMDE_BUG_GCC_REV_208793) && !defined(_MSC_VER)
4319
+ # define SIMDE_HAVE_UNDEFINED128
4320
+ # endif
4321
+ #endif
4322
+
4323
+ #if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_)
4324
+ HEDLEY_DIAGNOSTIC_PUSH
4325
+ SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_
4326
+ #endif
4327
+
4328
+ SIMDE_FUNCTION_ATTRIBUTES
4329
+ simde__m128
4330
+ simde_mm_unpackhi_ps (simde__m128 a, simde__m128 b) {
4331
+ #if defined(SIMDE_X86_SSE_NATIVE)
4332
+ return _mm_unpackhi_ps(a, b);
4333
+ #else
4334
+ simde__m128_private
4335
+ r_,
4336
+ a_ = simde__m128_to_private(a),
4337
+ b_ = simde__m128_to_private(b);
4338
+
4339
+ #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
4340
+ r_.neon_f32 = vzip2q_f32(a_.neon_f32, b_.neon_f32);
4341
+ #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
4342
+ float32x2_t a1 = vget_high_f32(a_.neon_f32);
4343
+ float32x2_t b1 = vget_high_f32(b_.neon_f32);
4344
+ float32x2x2_t result = vzip_f32(a1, b1);
4345
+ r_.neon_f32 = vcombine_f32(result.val[0], result.val[1]);
4346
+ #elif defined(SIMDE_SHUFFLE_VECTOR_)
4347
+ r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 2, 6, 3, 7);
4348
+ #else
4349
+ r_.f32[0] = a_.f32[2];
4350
+ r_.f32[1] = b_.f32[2];
4351
+ r_.f32[2] = a_.f32[3];
4352
+ r_.f32[3] = b_.f32[3];
4353
+ #endif
4354
+
4355
+ return simde__m128_from_private(r_);
4356
+ #endif
4357
+ }
4358
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
4359
+ # define _mm_unpackhi_ps(a, b) simde_mm_unpackhi_ps((a), (b))
4360
+ #endif
4361
+
4362
+ SIMDE_FUNCTION_ATTRIBUTES
4363
+ simde__m128
4364
+ simde_mm_unpacklo_ps (simde__m128 a, simde__m128 b) {
4365
+ #if defined(SIMDE_X86_SSE_NATIVE)
4366
+ return _mm_unpacklo_ps(a, b);
4367
+ #else
4368
+ simde__m128_private
4369
+ r_,
4370
+ a_ = simde__m128_to_private(a),
4371
+ b_ = simde__m128_to_private(b);
4372
+
4373
+ #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
4374
+ r_.neon_f32 = vzip1q_f32(a_.neon_f32, b_.neon_f32);
4375
+ #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
4376
+ r_.altivec_f32 = vec_mergeh(a_.altivec_f32, b_.altivec_f32);
4377
+ #elif defined(SIMDE_SHUFFLE_VECTOR_)
4378
+ r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 0, 4, 1, 5);
4379
+ #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
4380
+ float32x2_t a1 = vget_low_f32(a_.neon_f32);
4381
+ float32x2_t b1 = vget_low_f32(b_.neon_f32);
4382
+ float32x2x2_t result = vzip_f32(a1, b1);
4383
+ r_.neon_f32 = vcombine_f32(result.val[0], result.val[1]);
4384
+ #else
4385
+ r_.f32[0] = a_.f32[0];
4386
+ r_.f32[1] = b_.f32[0];
4387
+ r_.f32[2] = a_.f32[1];
4388
+ r_.f32[3] = b_.f32[1];
4389
+ #endif
4390
+
4391
+ return simde__m128_from_private(r_);
4392
+ #endif
4393
+ }
4394
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
4395
+ # define _mm_unpacklo_ps(a, b) simde_mm_unpacklo_ps((a), (b))
4396
+ #endif
4397
+
4398
+ SIMDE_FUNCTION_ATTRIBUTES
4399
+ void
4400
+ simde_mm_stream_pi (simde__m64* mem_addr, simde__m64 a) {
4401
+ #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
4402
+ _mm_stream_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a);
4403
+ #else
4404
+ simde__m64_private*
4405
+ dest = HEDLEY_REINTERPRET_CAST(simde__m64_private*, mem_addr),
4406
+ a_ = simde__m64_to_private(a);
4407
+
4408
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
4409
+ dest->i64[0] = vget_lane_s64(a_.neon_i64, 0);
4410
+ #else
4411
+ dest->i64[0] = a_.i64[0];
4412
+ #endif
4413
+ #endif
4414
+ }
4415
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
4416
+ # define _mm_stream_pi(mem_addr, a) simde_mm_stream_pi(mem_addr, (a))
4417
+ #endif
4418
+
4419
+ SIMDE_FUNCTION_ATTRIBUTES
4420
+ void
4421
+ simde_mm_stream_ps (simde_float32 mem_addr[4], simde__m128 a) {
4422
+ #if defined(SIMDE_X86_SSE_NATIVE)
4423
+ _mm_stream_ps(mem_addr, a);
4424
+ #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
4425
+ simde__m128_private a_ = simde__m128_to_private(a);
4426
+ __builtin_nontemporal_store(a_.f32, SIMDE_ALIGN_CAST(__typeof__(a_.f32)*, mem_addr));
4427
+ #else
4428
+ simde_mm_store_ps(mem_addr, a);
4429
+ #endif
4430
+ }
4431
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
4432
+ # define _mm_stream_ps(mem_addr, a) simde_mm_stream_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a))
4433
+ #endif
4434
+
4435
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
4436
+ #define SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3) \
4437
+ do { \
4438
+ float32x4x2_t ROW01 = vtrnq_f32(row0, row1); \
4439
+ float32x4x2_t ROW23 = vtrnq_f32(row2, row3); \
4440
+ row0 = vcombine_f32(vget_low_f32(ROW01.val[0]), \
4441
+ vget_low_f32(ROW23.val[0])); \
4442
+ row1 = vcombine_f32(vget_low_f32(ROW01.val[1]), \
4443
+ vget_low_f32(ROW23.val[1])); \
4444
+ row2 = vcombine_f32(vget_high_f32(ROW01.val[0]), \
4445
+ vget_high_f32(ROW23.val[0])); \
4446
+ row3 = vcombine_f32(vget_high_f32(ROW01.val[1]), \
4447
+ vget_high_f32(ROW23.val[1])); \
4448
+ } while (0)
4449
+ #else
4450
+ #define SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3) \
4451
+ do { \
4452
+ simde__m128 tmp3, tmp2, tmp1, tmp0; \
4453
+ tmp0 = simde_mm_unpacklo_ps((row0), (row1)); \
4454
+ tmp2 = simde_mm_unpacklo_ps((row2), (row3)); \
4455
+ tmp1 = simde_mm_unpackhi_ps((row0), (row1)); \
4456
+ tmp3 = simde_mm_unpackhi_ps((row2), (row3)); \
4457
+ row0 = simde_mm_movelh_ps(tmp0, tmp2); \
4458
+ row1 = simde_mm_movehl_ps(tmp2, tmp0); \
4459
+ row2 = simde_mm_movelh_ps(tmp1, tmp3); \
4460
+ row3 = simde_mm_movehl_ps(tmp3, tmp1); \
4461
+ } while (0)
4462
+ #endif
4463
+ #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
4464
+ # define _MM_TRANSPOSE4_PS(row0, row1, row2, row3) SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3)
4465
+ #endif
4466
+
4467
+ SIMDE_END_DECLS_
4468
+
4469
+ HEDLEY_DIAGNOSTIC_POP
4470
+
4471
+ #endif /* !defined(SIMDE_X86_SSE_H) */