casadi 3.6.7__cp35-none-win_amd64.whl → 3.7.0__cp35-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (442) hide show
  1. casadi/_casadi.pyd +0 -0
  2. casadi/blasfeo.lib +0 -0
  3. casadi/casadi-cli.exe +0 -0
  4. casadi/casadi.lib +0 -0
  5. casadi/casadi.py +6070 -3038
  6. casadi/cbc.exe +0 -0
  7. casadi/clp.exe +0 -0
  8. casadi/cmake/casadi-config-version.cmake +1 -1
  9. casadi/cmake/casadi-targets-release.cmake +5 -5
  10. casadi/cmake/casadi-targets.cmake +5 -5
  11. casadi/{lib/cmake/tinyxml2/tinyxml2-config-version.cmake → cmake/ghc_filesystem/ghc_filesystem-config-version.cmake} +19 -10
  12. casadi/cmake/ghc_filesystem/ghc_filesystem-config.cmake +30 -0
  13. casadi/cmake/ghc_filesystem/ghc_filesystem-targets.cmake +99 -0
  14. casadi/cmake/highs/highs-config.cmake +1 -0
  15. casadi/cmake/highs/highs-targets.cmake +2 -2
  16. casadi/cmake/libzip/libzip-config-version.cmake +48 -0
  17. casadi/cmake/libzip/libzip-config.cmake +69 -0
  18. casadi/cmake/libzip/libzip-targets-release.cmake +19 -0
  19. casadi/{lib/cmake/tinyxml2/tinyxml2-shared-targets.cmake → cmake/libzip/libzip-targets.cmake} +11 -7
  20. casadi/cmake/libzip/modules/FindMbedTLS.cmake +141 -0
  21. casadi/cmake/libzip/modules/FindNettle.cmake +141 -0
  22. casadi/cmake/libzip/modules/Findzstd.cmake +186 -0
  23. casadi/daqp.lib +0 -0
  24. casadi/fatrop.lib +0 -0
  25. casadi/highs.exe +0 -0
  26. casadi/highs.lib +0 -0
  27. casadi/hpipm.lib +0 -0
  28. casadi/include/casadi/casadi.i +194 -44
  29. casadi/include/casadi/config.h +11 -11
  30. casadi/include/casadi/core/archiver.hpp +58 -0
  31. casadi/include/casadi/core/blazing_spline.hpp +47 -0
  32. casadi/include/casadi/core/calculus.hpp +57 -2
  33. casadi/include/casadi/core/casadi_common.hpp +37 -0
  34. casadi/include/casadi/core/casadi_meta.hpp +15 -0
  35. casadi/include/casadi/core/casadi_misc.hpp +7 -0
  36. casadi/include/casadi/core/code_generator.hpp +95 -17
  37. casadi/include/casadi/core/core.hpp +5 -0
  38. casadi/include/casadi/core/dae_builder.hpp +283 -141
  39. casadi/include/casadi/core/dm.hpp +3 -0
  40. casadi/include/casadi/core/filesystem.hpp +58 -0
  41. casadi/include/casadi/core/fmu.hpp +51 -16
  42. casadi/include/casadi/core/function.hpp +19 -0
  43. casadi/include/casadi/core/generic_matrix.hpp +214 -7
  44. casadi/include/casadi/core/generic_shared.hpp +318 -0
  45. casadi/include/casadi/core/generic_shared_impl.hpp +214 -0
  46. casadi/include/casadi/core/generic_shared_internal.hpp +215 -0
  47. casadi/include/casadi/core/generic_type.hpp +3 -0
  48. casadi/include/casadi/core/global_options.hpp +10 -0
  49. casadi/include/casadi/core/integrator.hpp +41 -7
  50. casadi/include/casadi/core/matrix_decl.hpp +67 -0
  51. casadi/include/casadi/core/mx.hpp +63 -2
  52. casadi/include/casadi/core/options.hpp +6 -3
  53. casadi/include/casadi/core/optistack.hpp +43 -9
  54. casadi/include/casadi/core/printable.hpp +8 -0
  55. casadi/include/casadi/core/resource.hpp +107 -0
  56. casadi/include/casadi/core/runtime/casadi_blazing_1d_boor_eval.hpp +112 -0
  57. casadi/include/casadi/core/runtime/casadi_blazing_2d_boor_eval.hpp +311 -0
  58. casadi/include/casadi/core/runtime/casadi_blazing_3d_boor_eval.hpp +645 -0
  59. casadi/include/casadi/core/runtime/casadi_blazing_de_boor.hpp +101 -0
  60. casadi/include/casadi/core/runtime/casadi_nlp.hpp +8 -2
  61. casadi/include/casadi/core/runtime/casadi_printme.hpp +25 -0
  62. casadi/include/casadi/core/serializer.hpp +12 -4
  63. casadi/include/casadi/core/serializing_stream.hpp +3 -0
  64. casadi/include/casadi/core/shared_object.hpp +73 -161
  65. casadi/include/casadi/core/sparsity.hpp +13 -1
  66. casadi/include/casadi/core/sparsity_interface.hpp +19 -1
  67. casadi/include/casadi/core/sx.hpp +41 -0
  68. casadi/include/casadi/core/sx_elem.hpp +25 -0
  69. casadi/include/casadi/core/xml_node.hpp +5 -0
  70. casadi/include/casadi/doc.i +9703 -6539
  71. casadi/include/casadi/doc_merged.i +6483 -4447
  72. casadi/include/casadi/valgrind-casadi.supp +138 -0
  73. casadi/include/casadi/valgrind-python.supp +2470 -0
  74. casadi/include/fatrop/blasfeo_wrapper/LinearAlgebraBlasfeo.hpp +4 -1
  75. casadi/include/ghc/filesystem.hpp +6083 -0
  76. casadi/include/ghc/fs_fwd.hpp +38 -0
  77. casadi/include/ghc/fs_impl.hpp +35 -0
  78. casadi/include/ghc/fs_std.hpp +60 -0
  79. casadi/include/ghc/fs_std_fwd.hpp +63 -0
  80. casadi/include/ghc/fs_std_impl.hpp +46 -0
  81. casadi/include/highs/HConfig.h +7 -5
  82. casadi/include/highs/Highs.h +260 -86
  83. casadi/include/highs/filereaderlp/reader.hpp +10 -10
  84. casadi/include/highs/interfaces/highs_c_api.h +195 -22
  85. casadi/include/highs/io/Filereader.h +4 -6
  86. casadi/include/highs/io/FilereaderEms.h +0 -3
  87. casadi/include/highs/io/FilereaderLp.h +0 -3
  88. casadi/include/highs/io/FilereaderMps.h +0 -3
  89. casadi/include/highs/io/HMPSIO.h +1 -4
  90. casadi/include/highs/io/HMpsFF.h +8 -5
  91. casadi/include/highs/io/HighsIO.h +10 -5
  92. casadi/include/highs/io/LoadOptions.h +0 -3
  93. casadi/include/highs/ipm/IpxSolution.h +0 -3
  94. casadi/include/highs/ipm/IpxWrapper.h +4 -7
  95. casadi/include/highs/ipm/ipx/control.h +1 -0
  96. casadi/include/highs/ipm/ipx/ipx_parameters.h +1 -0
  97. casadi/include/highs/ipm/ipx/lp_solver.h +3 -2
  98. casadi/include/highs/ipm/ipx/model.h +7 -3
  99. casadi/include/highs/lp_data/HConst.h +21 -6
  100. casadi/include/highs/lp_data/HStruct.h +40 -4
  101. casadi/include/highs/lp_data/HighsAnalysis.h +0 -3
  102. casadi/include/highs/lp_data/HighsCallback.h +10 -3
  103. casadi/include/highs/lp_data/HighsCallbackStruct.h +3 -3
  104. casadi/include/highs/lp_data/HighsDebug.h +0 -3
  105. casadi/include/highs/lp_data/HighsIis.h +62 -0
  106. casadi/include/highs/lp_data/HighsInfo.h +50 -43
  107. casadi/include/highs/lp_data/HighsInfoDebug.h +0 -3
  108. casadi/include/highs/lp_data/HighsLp.h +2 -3
  109. casadi/include/highs/lp_data/HighsLpSolverObject.h +0 -3
  110. casadi/include/highs/lp_data/HighsLpUtils.h +38 -6
  111. casadi/include/highs/lp_data/HighsModelUtils.h +21 -18
  112. casadi/include/highs/lp_data/HighsOptions.h +134 -22
  113. casadi/include/highs/lp_data/HighsRanging.h +0 -3
  114. casadi/include/highs/lp_data/HighsSolution.h +10 -3
  115. casadi/include/highs/lp_data/HighsSolutionDebug.h +0 -3
  116. casadi/include/highs/lp_data/HighsSolve.h +0 -3
  117. casadi/include/highs/lp_data/HighsStatus.h +0 -3
  118. casadi/include/highs/mip/HighsCliqueTable.h +3 -3
  119. casadi/include/highs/mip/HighsConflictPool.h +0 -3
  120. casadi/include/highs/mip/HighsCutGeneration.h +13 -3
  121. casadi/include/highs/mip/HighsCutPool.h +0 -3
  122. casadi/include/highs/mip/HighsDebugSol.h +0 -3
  123. casadi/include/highs/mip/HighsDomain.h +16 -4
  124. casadi/include/highs/mip/HighsDomainChange.h +0 -3
  125. casadi/include/highs/mip/HighsDynamicRowMatrix.h +0 -3
  126. casadi/include/highs/mip/HighsGFkSolve.h +3 -6
  127. casadi/include/highs/mip/HighsImplications.h +29 -5
  128. casadi/include/highs/mip/HighsLpAggregator.h +0 -3
  129. casadi/include/highs/mip/HighsLpRelaxation.h +0 -3
  130. casadi/include/highs/mip/HighsMipAnalysis.h +52 -0
  131. casadi/include/highs/mip/HighsMipSolver.h +11 -4
  132. casadi/include/highs/mip/HighsMipSolverData.h +67 -6
  133. casadi/include/highs/mip/HighsModkSeparator.h +1 -4
  134. casadi/include/highs/mip/HighsNodeQueue.h +0 -3
  135. casadi/include/highs/mip/HighsObjectiveFunction.h +0 -3
  136. casadi/include/highs/mip/HighsPathSeparator.h +1 -4
  137. casadi/include/highs/mip/HighsPrimalHeuristics.h +7 -5
  138. casadi/include/highs/mip/HighsPseudocost.h +0 -3
  139. casadi/include/highs/mip/HighsRedcostFixing.h +0 -3
  140. casadi/include/highs/mip/HighsSearch.h +0 -3
  141. casadi/include/highs/mip/HighsSeparation.h +0 -3
  142. casadi/include/highs/mip/HighsSeparator.h +1 -5
  143. casadi/include/highs/mip/HighsTableauSeparator.h +1 -4
  144. casadi/include/highs/mip/HighsTransformedLp.h +0 -3
  145. casadi/include/highs/mip/MipTimer.h +471 -0
  146. casadi/include/highs/parallel/HighsBinarySemaphore.h +1 -1
  147. casadi/include/highs/parallel/HighsCacheAlign.h +1 -1
  148. casadi/include/highs/parallel/HighsCombinable.h +1 -1
  149. casadi/include/highs/parallel/HighsMutex.h +1 -1
  150. casadi/include/highs/parallel/HighsRaceTimer.h +1 -1
  151. casadi/include/highs/parallel/HighsSchedulerConstants.h +1 -1
  152. casadi/include/highs/parallel/HighsSplitDeque.h +1 -1
  153. casadi/include/highs/parallel/HighsTaskExecutor.h +68 -55
  154. casadi/include/highs/pdlp/CupdlpWrapper.h +18 -7
  155. casadi/include/highs/pdlp/cupdlp/cupdlp_cs.h +2 -3
  156. casadi/include/highs/pdlp/cupdlp/cupdlp_defs.h +16 -6
  157. casadi/include/highs/pdlp/cupdlp/cupdlp_linalg.h +10 -4
  158. casadi/include/highs/pdlp/cupdlp/cupdlp_proj.h +3 -3
  159. casadi/include/highs/pdlp/cupdlp/cupdlp_restart.h +6 -6
  160. casadi/include/highs/pdlp/cupdlp/cupdlp_scaling.h +26 -0
  161. casadi/include/highs/pdlp/cupdlp/cupdlp_solver.h +11 -11
  162. casadi/include/highs/pdlp/cupdlp/cupdlp_step.h +8 -4
  163. casadi/include/highs/pdlp/cupdlp/cupdlp_utils.c +253 -172
  164. casadi/include/highs/presolve/HPresolve.h +50 -10
  165. casadi/include/highs/presolve/HPresolveAnalysis.h +0 -3
  166. casadi/include/highs/presolve/HighsPostsolveStack.h +39 -6
  167. casadi/include/highs/presolve/HighsSymmetry.h +0 -3
  168. casadi/include/highs/presolve/ICrash.h +0 -3
  169. casadi/include/highs/presolve/ICrashUtil.h +1 -4
  170. casadi/include/highs/presolve/ICrashX.h +0 -3
  171. casadi/include/highs/presolve/PresolveComponent.h +0 -3
  172. casadi/include/highs/qpsolver/a_asm.hpp +20 -17
  173. casadi/include/highs/qpsolver/a_quass.hpp +5 -9
  174. casadi/include/highs/qpsolver/basis.hpp +12 -10
  175. casadi/include/highs/qpsolver/crashsolution.hpp +4 -3
  176. casadi/include/highs/qpsolver/dantzigpricing.hpp +3 -2
  177. casadi/include/highs/qpsolver/devexpricing.hpp +3 -1
  178. casadi/include/highs/qpsolver/factor.hpp +6 -5
  179. casadi/include/highs/qpsolver/feasibility_bounded.hpp +31 -29
  180. casadi/include/highs/qpsolver/feasibility_highs.hpp +94 -70
  181. casadi/include/highs/qpsolver/gradient.hpp +1 -1
  182. casadi/include/highs/qpsolver/instance.hpp +1 -1
  183. casadi/include/highs/qpsolver/pricing.hpp +2 -2
  184. casadi/include/highs/qpsolver/qpconst.hpp +2 -2
  185. casadi/include/highs/qpsolver/quass.hpp +2 -2
  186. casadi/include/highs/qpsolver/runtime.hpp +2 -2
  187. casadi/include/highs/qpsolver/settings.hpp +20 -8
  188. casadi/include/highs/qpsolver/steepestedgepricing.hpp +38 -39
  189. casadi/include/highs/simplex/HApp.h +88 -34
  190. casadi/include/highs/simplex/HEkk.h +24 -11
  191. casadi/include/highs/simplex/HEkkDual.h +2 -4
  192. casadi/include/highs/simplex/HEkkDualRHS.h +0 -3
  193. casadi/include/highs/simplex/HEkkDualRow.h +0 -3
  194. casadi/include/highs/simplex/HEkkPrimal.h +1 -5
  195. casadi/include/highs/simplex/HSimplex.h +0 -3
  196. casadi/include/highs/simplex/HSimplexDebug.h +0 -3
  197. casadi/include/highs/simplex/HSimplexNla.h +0 -26
  198. casadi/include/highs/simplex/HSimplexReport.h +0 -3
  199. casadi/include/highs/simplex/HighsSimplexAnalysis.h +5 -8
  200. casadi/include/highs/simplex/SimplexConst.h +10 -11
  201. casadi/include/highs/simplex/SimplexStruct.h +9 -12
  202. casadi/include/highs/simplex/SimplexTimer.h +83 -101
  203. casadi/include/highs/test/DevKkt.h +0 -3
  204. casadi/include/highs/test/KktCh2.h +0 -3
  205. casadi/include/highs/util/FactorTimer.h +39 -64
  206. casadi/include/highs/util/HFactor.h +4 -4
  207. casadi/include/highs/util/HFactorConst.h +0 -3
  208. casadi/include/highs/util/HFactorDebug.h +0 -3
  209. casadi/include/highs/util/HSet.h +0 -3
  210. casadi/include/highs/util/HVector.h +0 -3
  211. casadi/include/highs/util/HVectorBase.h +1 -4
  212. casadi/include/highs/util/HighsCDouble.h +12 -3
  213. casadi/include/highs/util/HighsComponent.h +0 -3
  214. casadi/include/highs/util/HighsDataStack.h +0 -3
  215. casadi/include/highs/util/HighsDisjointSets.h +0 -3
  216. casadi/include/highs/util/HighsHash.h +9 -12
  217. casadi/include/highs/util/HighsHashTree.h +15 -11
  218. casadi/include/highs/util/HighsInt.h +0 -3
  219. casadi/include/highs/util/HighsIntegers.h +0 -3
  220. casadi/include/highs/util/HighsLinearSumBounds.h +0 -3
  221. casadi/include/highs/util/HighsMatrixPic.h +0 -3
  222. casadi/include/highs/util/HighsMatrixSlice.h +3 -6
  223. casadi/include/highs/util/HighsMatrixUtils.h +0 -3
  224. casadi/include/highs/util/HighsMemoryAllocation.h +11 -3
  225. casadi/include/highs/util/HighsRandom.h +3 -6
  226. casadi/include/highs/util/HighsRbTree.h +0 -3
  227. casadi/include/highs/util/HighsSort.h +0 -3
  228. casadi/include/highs/util/HighsSparseMatrix.h +6 -3
  229. casadi/include/highs/util/HighsSparseVectorSum.h +0 -3
  230. casadi/include/highs/util/HighsSplay.h +0 -3
  231. casadi/include/highs/util/HighsTimer.h +94 -56
  232. casadi/include/highs/util/HighsUtils.h +10 -3
  233. casadi/include/highs/util/stringutil.h +14 -12
  234. casadi/include/licenses/ghc-external/LICENSE +19 -0
  235. casadi/include/licenses/libz-external/LICENSE +22 -0
  236. casadi/include/licenses/libz-external/contrib/dotzlib/LICENSE_1_0.txt +23 -0
  237. casadi/include/licenses/libzip-external/LICENSE +31 -0
  238. casadi/include/zconf.h +545 -0
  239. casadi/include/zip.h +528 -0
  240. casadi/include/zipconf.h +48 -0
  241. casadi/include/zlib.h +1938 -0
  242. casadi/ipopt.lib +0 -0
  243. casadi/lapack.lib +0 -0
  244. casadi/libCbc-3.dll +0 -0
  245. casadi/libCbc.dll.a +0 -0
  246. casadi/libCbcSolver-3.dll +0 -0
  247. casadi/libCbcSolver.dll.a +0 -0
  248. casadi/libCgl-1.dll +0 -0
  249. casadi/libCgl.dll.a +0 -0
  250. casadi/libClp-1.dll +0 -0
  251. casadi/libClp.dll.a +0 -0
  252. casadi/libClpSolver-1.dll +0 -0
  253. casadi/libClpSolver.dll.a +0 -0
  254. casadi/libCoinUtils-3.dll +0 -0
  255. casadi/libCoinUtils.dll.a +0 -0
  256. casadi/libOsi-1.dll +0 -0
  257. casadi/libOsi.dll.a +0 -0
  258. casadi/libOsiCbc-3.dll +0 -0
  259. casadi/libOsiCbc.dll.a +0 -0
  260. casadi/libOsiClp-1.dll +0 -0
  261. casadi/libOsiClp.dll.a +0 -0
  262. casadi/libOsiCommonTests-1.dll +0 -0
  263. casadi/libOsiCommonTests.dll.a +0 -0
  264. casadi/libalpaqa.dll +0 -0
  265. casadi/libalpaqa.dll.a +0 -0
  266. casadi/libblasfeo.dll +0 -0
  267. casadi/libblasfeo.dll.a +0 -0
  268. casadi/libbonmin-4.dll +0 -0
  269. casadi/libbonmin.dll.a +0 -0
  270. casadi/libcasadi-tp-openblas.dll +0 -0
  271. casadi/libcasadi-tp-openblas.dll.a +0 -0
  272. casadi/libcasadi.dll +0 -0
  273. casadi/libcasadi.dll.a +0 -0
  274. casadi/libcasadi.lib +0 -0
  275. casadi/libcasadi_archiver_libzip.dll +0 -0
  276. casadi/libcasadi_archiver_libzip.dll.a +0 -0
  277. casadi/libcasadi_conic_cbc.dll +0 -0
  278. casadi/libcasadi_conic_cbc.dll.a +0 -0
  279. casadi/libcasadi_conic_clp.dll +0 -0
  280. casadi/libcasadi_conic_clp.dll.a +0 -0
  281. casadi/libcasadi_conic_cplex.dll +0 -0
  282. casadi/libcasadi_conic_cplex.dll.a +0 -0
  283. casadi/libcasadi_conic_daqp.dll +0 -0
  284. casadi/libcasadi_conic_daqp.dll.a +0 -0
  285. casadi/libcasadi_conic_fatrop.dll +0 -0
  286. casadi/libcasadi_conic_fatrop.dll.a +0 -0
  287. casadi/libcasadi_conic_gurobi.dll +0 -0
  288. casadi/libcasadi_conic_gurobi.dll.a +0 -0
  289. casadi/libcasadi_conic_highs.dll +0 -0
  290. casadi/libcasadi_conic_highs.dll.a +0 -0
  291. casadi/libcasadi_conic_hpipm.dll +0 -0
  292. casadi/libcasadi_conic_hpipm.dll.a +0 -0
  293. casadi/libcasadi_conic_ipqp.dll +0 -0
  294. casadi/libcasadi_conic_ipqp.dll.a +0 -0
  295. casadi/libcasadi_conic_nlpsol.dll +0 -0
  296. casadi/libcasadi_conic_nlpsol.dll.a +0 -0
  297. casadi/libcasadi_conic_osqp.dll +0 -0
  298. casadi/libcasadi_conic_osqp.dll.a +0 -0
  299. casadi/libcasadi_conic_proxqp.dll +0 -0
  300. casadi/libcasadi_conic_proxqp.dll.a +0 -0
  301. casadi/libcasadi_conic_qpoases.dll +0 -0
  302. casadi/libcasadi_conic_qpoases.dll.a +0 -0
  303. casadi/libcasadi_conic_qrqp.dll +0 -0
  304. casadi/libcasadi_conic_qrqp.dll.a +0 -0
  305. casadi/libcasadi_conic_superscs.dll +0 -0
  306. casadi/libcasadi_conic_superscs.dll.a +0 -0
  307. casadi/libcasadi_filesystem_ghc.dll +0 -0
  308. casadi/libcasadi_filesystem_ghc.dll.a +0 -0
  309. casadi/libcasadi_importer_shell.dll +0 -0
  310. casadi/libcasadi_importer_shell.dll.a +0 -0
  311. casadi/libcasadi_integrator_collocation.dll +0 -0
  312. casadi/libcasadi_integrator_collocation.dll.a +0 -0
  313. casadi/libcasadi_integrator_cvodes.dll +0 -0
  314. casadi/libcasadi_integrator_cvodes.dll.a +0 -0
  315. casadi/libcasadi_integrator_idas.dll +0 -0
  316. casadi/libcasadi_integrator_idas.dll.a +0 -0
  317. casadi/libcasadi_integrator_rk.dll +0 -0
  318. casadi/libcasadi_integrator_rk.dll.a +0 -0
  319. casadi/libcasadi_interpolant_bspline.dll +0 -0
  320. casadi/libcasadi_interpolant_bspline.dll.a +0 -0
  321. casadi/libcasadi_interpolant_linear.dll +0 -0
  322. casadi/libcasadi_interpolant_linear.dll.a +0 -0
  323. casadi/libcasadi_linsol_csparse.dll +0 -0
  324. casadi/libcasadi_linsol_csparse.dll.a +0 -0
  325. casadi/libcasadi_linsol_csparsecholesky.dll +0 -0
  326. casadi/libcasadi_linsol_csparsecholesky.dll.a +0 -0
  327. casadi/libcasadi_linsol_lapacklu.dll +0 -0
  328. casadi/libcasadi_linsol_lapacklu.dll.a +0 -0
  329. casadi/libcasadi_linsol_lapackqr.dll +0 -0
  330. casadi/libcasadi_linsol_lapackqr.dll.a +0 -0
  331. casadi/libcasadi_linsol_ldl.dll +0 -0
  332. casadi/libcasadi_linsol_ldl.dll.a +0 -0
  333. casadi/libcasadi_linsol_lsqr.dll +0 -0
  334. casadi/libcasadi_linsol_lsqr.dll.a +0 -0
  335. casadi/libcasadi_linsol_ma27.dll +0 -0
  336. casadi/libcasadi_linsol_ma27.dll.a +0 -0
  337. casadi/libcasadi_linsol_mumps.dll +0 -0
  338. casadi/libcasadi_linsol_mumps.dll.a +0 -0
  339. casadi/libcasadi_linsol_qr.dll +0 -0
  340. casadi/libcasadi_linsol_qr.dll.a +0 -0
  341. casadi/libcasadi_linsol_symbolicqr.dll +0 -0
  342. casadi/libcasadi_linsol_symbolicqr.dll.a +0 -0
  343. casadi/libcasadi_linsol_tridiag.dll +0 -0
  344. casadi/libcasadi_linsol_tridiag.dll.a +0 -0
  345. casadi/libcasadi_nlpsol_alpaqa.dll +0 -0
  346. casadi/libcasadi_nlpsol_alpaqa.dll.a +0 -0
  347. casadi/libcasadi_nlpsol_ampl.dll +0 -0
  348. casadi/libcasadi_nlpsol_ampl.dll.a +0 -0
  349. casadi/libcasadi_nlpsol_blocksqp.dll +0 -0
  350. casadi/libcasadi_nlpsol_blocksqp.dll.a +0 -0
  351. casadi/libcasadi_nlpsol_bonmin.dll +0 -0
  352. casadi/libcasadi_nlpsol_bonmin.dll.a +0 -0
  353. casadi/libcasadi_nlpsol_fatrop.dll +0 -0
  354. casadi/libcasadi_nlpsol_fatrop.dll.a +0 -0
  355. casadi/libcasadi_nlpsol_feasiblesqpmethod.dll +0 -0
  356. casadi/libcasadi_nlpsol_feasiblesqpmethod.dll.a +0 -0
  357. casadi/libcasadi_nlpsol_ipopt.dll +0 -0
  358. casadi/libcasadi_nlpsol_ipopt.dll.a +0 -0
  359. casadi/libcasadi_nlpsol_knitro.dll +0 -0
  360. casadi/libcasadi_nlpsol_knitro.dll.a +0 -0
  361. casadi/libcasadi_nlpsol_madnlp.dll +0 -0
  362. casadi/libcasadi_nlpsol_madnlp.dll.a +0 -0
  363. casadi/libcasadi_nlpsol_qrsqp.dll +0 -0
  364. casadi/libcasadi_nlpsol_qrsqp.dll.a +0 -0
  365. casadi/libcasadi_nlpsol_scpgen.dll +0 -0
  366. casadi/libcasadi_nlpsol_scpgen.dll.a +0 -0
  367. casadi/libcasadi_nlpsol_sleqp.dll +0 -0
  368. casadi/libcasadi_nlpsol_sleqp.dll.a +0 -0
  369. casadi/libcasadi_nlpsol_snopt.dll +0 -0
  370. casadi/libcasadi_nlpsol_snopt.dll.a +0 -0
  371. casadi/libcasadi_nlpsol_sqpmethod.dll +0 -0
  372. casadi/libcasadi_nlpsol_sqpmethod.dll.a +0 -0
  373. casadi/libcasadi_nlpsol_worhp.dll +0 -0
  374. casadi/libcasadi_nlpsol_worhp.dll.a +0 -0
  375. casadi/libcasadi_rootfinder_fast_newton.dll +0 -0
  376. casadi/libcasadi_rootfinder_fast_newton.dll.a +0 -0
  377. casadi/libcasadi_rootfinder_kinsol.dll +0 -0
  378. casadi/libcasadi_rootfinder_kinsol.dll.a +0 -0
  379. casadi/libcasadi_rootfinder_newton.dll +0 -0
  380. casadi/libcasadi_rootfinder_newton.dll.a +0 -0
  381. casadi/libcasadi_rootfinder_nlpsol.dll +0 -0
  382. casadi/libcasadi_rootfinder_nlpsol.dll.a +0 -0
  383. casadi/libcasadi_sundials_common.dll +0 -0
  384. casadi/libcasadi_sundials_common.dll.a +0 -0
  385. casadi/libcasadi_xmlfile_tinyxml.dll +0 -0
  386. casadi/libcasadi_xmlfile_tinyxml.dll.a +0 -0
  387. casadi/libcoinmetis-2.dll +0 -0
  388. casadi/libcoinmetis.dll.a +0 -0
  389. casadi/libcoinmumps-3.dll +0 -0
  390. casadi/libcoinmumps.dll.a +0 -0
  391. casadi/libcplex_adaptor.dll +0 -0
  392. casadi/libdaqp.dll +0 -0
  393. casadi/libdaqp.dll.a +0 -0
  394. casadi/libdl.dll +0 -0
  395. casadi/libdl.dll.a +0 -0
  396. casadi/libfatrop.dll +0 -0
  397. casadi/libfatrop.dll.a +0 -0
  398. casadi/libgurobi_adaptor.dll +0 -0
  399. casadi/libhighs.dll +0 -0
  400. casadi/libhighs.dll.a +0 -0
  401. casadi/libhpipm.dll +0 -0
  402. casadi/libhpipm.dll.a +0 -0
  403. casadi/libipopt-3.dll +0 -0
  404. casadi/libipopt.dll.a +0 -0
  405. casadi/libmatlab_ipc.dll +0 -0
  406. casadi/libmatlab_ipc.dll.a +0 -0
  407. casadi/libosqp.dll +0 -0
  408. casadi/libosqp.dll.a +0 -0
  409. casadi/libqdldl.dll +0 -0
  410. casadi/libqdldl.dll.a +0 -0
  411. casadi/libsipopt-3.dll +0 -0
  412. casadi/libsipopt.dll.a +0 -0
  413. casadi/libsleqp.dll +0 -0
  414. casadi/libsleqp.dll.a +0 -0
  415. casadi/libtrlib.dll +0 -0
  416. casadi/libtrlib.dll.a +0 -0
  417. casadi/libzip.a +0 -0
  418. casadi/libzlib.dll +0 -0
  419. casadi/libzlib.dll.a +0 -0
  420. casadi/libzlibstatic.a +0 -0
  421. casadi/matlab_ipc.lib +0 -0
  422. casadi/osqp.lib +0 -0
  423. casadi/pkgconfig/casadi.pc +1 -1
  424. casadi/pkgconfig/highs.pc +1 -1
  425. casadi/pkgconfig/libzip.pc +14 -0
  426. casadi/sleqp.lib +0 -0
  427. casadi/spral_ssids.exe +0 -0
  428. casadi/std-e414687e404555e5.dll +0 -0
  429. casadi/tools/__init__.py +3 -1
  430. casadi/tools/graph/graph.py +1 -1
  431. casadi/tools/structure3.py +2 -2
  432. {casadi-3.6.7.dist-info → casadi-3.7.0.dist-info}/METADATA +1 -1
  433. {casadi-3.6.7.dist-info → casadi-3.7.0.dist-info}/RECORD +434 -393
  434. casadi/bin/libtinyxml2.dll +0 -0
  435. casadi/include/tinyxml2.h +0 -2380
  436. casadi/lib/cmake/tinyxml2/tinyxml2-config.cmake +0 -57
  437. casadi/lib/cmake/tinyxml2/tinyxml2-shared-targets-release.cmake +0 -19
  438. casadi/lib/libtinyxml2.dll.a +0 -0
  439. casadi/lib/pkgconfig/tinyxml2.pc +0 -10
  440. casadi/libtinyxml2.dll +0 -0
  441. casadi/tools/structure.py +0 -1446
  442. {casadi-3.6.7.dist-info → casadi-3.7.0.dist-info}/WHEEL +0 -0
@@ -0,0 +1,645 @@
1
+ //
2
+ // MIT No Attribution
3
+ //
4
+ // Copyright (C) 2010-2023 Joel Andersson, Joris Gillis, Moritz Diehl, KU Leuven.
5
+ //
6
+ // Permission is hereby granted, free of charge, to any person obtaining a copy of this
7
+ // software and associated documentation files (the "Software"), to deal in the Software
8
+ // without restriction, including without limitation the rights to use, copy, modify,
9
+ // merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
10
+ // permit persons to whom the Software is furnished to do so.
11
+ //
12
+ // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
13
+ // INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
14
+ // PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
15
+ // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
16
+ // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
17
+ // SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
18
+ //
19
+
20
+ // SYMBOL "blazing_3d_boor_eval"
21
+ template<typename T1>
22
+ void casadi_blazing_3d_boor_eval(T1* f, T1* J, T1* H, const T1* all_knots, const casadi_int* offset, const T1* c, const T1* dc, const T1* ddc, const T1* all_x, const casadi_int* lookup_mode, casadi_int* iw, T1* w) { // NOLINT(whitespace/line_length)
23
+ casadi_int n_dims = 3;
24
+ casadi_int m = 1;
25
+ casadi_int n_iter, k, i, pivot;
26
+ casadi_int *boor_offset, *starts, *index, *coeff_offset;
27
+ T1 *cumprod;
28
+ boor_offset = iw; iw+=n_dims+1;
29
+ starts = iw; iw+=n_dims;
30
+ index = iw; iw+=n_dims;
31
+ coeff_offset = iw;
32
+ cumprod = w; w+= n_dims+1;
33
+ boor_offset[0] = 0;
34
+ cumprod[n_dims] = 1;
35
+ coeff_offset[n_dims] = 0;
36
+
37
+ casadi_int stride1 = offset[1]-offset[0]-4;
38
+ casadi_int stride2 = (offset[2]-offset[1]-4)*stride1;
39
+
40
+ simde__m256d zero = simde_mm256_set1_pd(0.0);
41
+
42
+ simde__m256d boor_start_0000 = zero;
43
+ simde__m256d boor_start_1111 = simde_mm256_set1_pd(1.0);
44
+ simde__m256d boor_start_0001 = simde_mm256_set_pd(1.0, 0.0, 0.0, 0.0);
45
+ simde__m256d boor_start_0010 = simde_mm256_set_pd(0.0, 1.0, 0.0, 0.0);
46
+
47
+ simde__m256d boor0_d3;
48
+ simde__m256d boor0_d2;
49
+ simde__m256d boor0_d1;
50
+ simde__m256d boor0_d0;
51
+
52
+ simde__m256d boor1_d3;
53
+ simde__m256d boor1_d2;
54
+ simde__m256d boor1_d1;
55
+ simde__m256d boor1_d0;
56
+
57
+ simde__m256d boor2_d3;
58
+ simde__m256d boor2_d2;
59
+ simde__m256d boor2_d1;
60
+ simde__m256d boor2_d0;
61
+
62
+ const T1* knots;
63
+ T1 x;
64
+ casadi_int degree, n_knots, n_b, L, start;
65
+ degree = 3;
66
+ knots = all_knots + offset[0];
67
+ n_knots = offset[0+1]-offset[0];
68
+ n_b = n_knots-degree-1;
69
+ x = all_x[0];
70
+ L = casadi_low(x, knots+degree, n_knots-2*degree, lookup_mode[0]);
71
+ start = L;
72
+ if (start>n_b-degree-1) start = n_b-degree-1;
73
+ starts[0] = start;
74
+ boor0_d3 = boor_start_0000;
75
+ if (x>=knots[0] && x<=knots[n_knots-1]) {
76
+ if (x==knots[1]) {
77
+ boor0_d3 = boor_start_1111;
78
+ } else if (x==knots[n_knots-1]) {
79
+ boor0_d3 = boor_start_0001;
80
+ } else if (knots[L+degree]==x) {
81
+ boor0_d3 = boor_start_0010;
82
+ } else {
83
+ boor0_d3 = boor_start_0001;
84
+ }
85
+ }
86
+ casadi_blazing_de_boor(x, knots+start, &boor0_d0, &boor0_d1, &boor0_d2, &boor0_d3);
87
+
88
+ knots = all_knots + offset[1];
89
+ n_knots = offset[1+1]-offset[1];
90
+ n_b = n_knots-degree-1;
91
+ x = all_x[1];
92
+ L = casadi_low(x, knots+degree, n_knots-2*degree, lookup_mode[1]);
93
+ start = L;
94
+ if (start>n_b-degree-1) start = n_b-degree-1;
95
+ starts[1] = start;
96
+ boor1_d3 = boor_start_0000;
97
+ if (x>=knots[0] && x<=knots[n_knots-1]) {
98
+ if (x==knots[1]) {
99
+ boor1_d3 = boor_start_1111;
100
+ } else if (x==knots[n_knots-1]) {
101
+ boor1_d3 = boor_start_0001;
102
+ } else if (knots[L+degree]==x) {
103
+ boor1_d3 = boor_start_0010;
104
+ } else {
105
+ boor1_d3 = boor_start_0001;
106
+ }
107
+ }
108
+ casadi_blazing_de_boor(x, knots+start, &boor1_d0, &boor1_d1, &boor1_d2, &boor1_d3);
109
+
110
+ knots = all_knots + offset[2];
111
+ n_knots = offset[2+1]-offset[2];
112
+ n_b = n_knots-degree-1;
113
+ x = all_x[2];
114
+ L = casadi_low(x, knots+degree, n_knots-2*degree, lookup_mode[2]);
115
+ start = L;
116
+ if (start>n_b-degree-1) start = n_b-degree-1;
117
+ starts[2] = start;
118
+ boor2_d3 = boor_start_0000;
119
+ if (x>=knots[0] && x<=knots[n_knots-1]) {
120
+ if (x==knots[1]) {
121
+ boor2_d3 = boor_start_1111;
122
+ } else if (x==knots[n_knots-1]) {
123
+ boor2_d3 = boor_start_0001;
124
+ } else if (knots[L+degree]==x) {
125
+ boor2_d3 = boor_start_0010;
126
+ } else {
127
+ boor2_d3 = boor_start_0001;
128
+ }
129
+ }
130
+ casadi_blazing_de_boor(x, knots+start, &boor2_d0, &boor2_d1, &boor2_d2, &boor2_d3);
131
+
132
+ simde__m256d C[16];
133
+
134
+ for (int j=0;j<4;++j) {
135
+ for (int k=0;k<4;++k) {
136
+ C[j+4*k] = simde_mm256_loadu_pd(c+(starts[1]+j)*stride1+(starts[2]+k)*stride2+starts[0]);
137
+ }
138
+ }
139
+
140
+ simde__m256d a, b0, b1, b2, b3, c0, c1, c2, c3, r;
141
+ simde__m256d ab[4], cab[4];
142
+ simde__m128d r0, r1;
143
+
144
+ a = boor0_d0;
145
+ b0 = simde_mm256_permute4x64_pd(boor1_d0, SIMDE_MM_SHUFFLE(0, 0, 0, 0));
146
+ b1 = simde_mm256_permute4x64_pd(boor1_d0, SIMDE_MM_SHUFFLE(1, 1, 1, 1));
147
+ b2 = simde_mm256_permute4x64_pd(boor1_d0, SIMDE_MM_SHUFFLE(2, 2, 2, 2));
148
+ b3 = simde_mm256_permute4x64_pd(boor1_d0, SIMDE_MM_SHUFFLE(3, 3, 3, 3));
149
+
150
+ c0 = simde_mm256_permute4x64_pd(boor2_d0, SIMDE_MM_SHUFFLE(0, 0, 0, 0));
151
+ c1 = simde_mm256_permute4x64_pd(boor2_d0, SIMDE_MM_SHUFFLE(1, 1, 1, 1));
152
+ c2 = simde_mm256_permute4x64_pd(boor2_d0, SIMDE_MM_SHUFFLE(2, 2, 2, 2));
153
+ c3 = simde_mm256_permute4x64_pd(boor2_d0, SIMDE_MM_SHUFFLE(3, 3, 3, 3));
154
+
155
+ // Need to compute sum_abc C_abc A_a B_b C_c
156
+
157
+ // Step 1: Outer product a b: A_a B_b
158
+ ab[0] = simde_mm256_mul_pd(a, b0);
159
+ ab[1] = simde_mm256_mul_pd(a, b1);
160
+ ab[2] = simde_mm256_mul_pd(a, b2);
161
+ ab[3] = simde_mm256_mul_pd(a, b3);
162
+
163
+ // Sum over b axis: sum_b C_abc * (A_a B_b)_b
164
+ // cab <- cab + ab[i]*C[i]
165
+ for (int i=0;i<4;++i) {
166
+ cab[i] = simde_mm256_set1_pd(0);
167
+ cab[i] = simde_mm256_fmadd_pd(ab[0], C[4*i+0], cab[i]);
168
+ cab[i] = simde_mm256_fmadd_pd(ab[1], C[4*i+1], cab[i]);
169
+ cab[i] = simde_mm256_fmadd_pd(ab[2], C[4*i+2], cab[i]);
170
+ cab[i] = simde_mm256_fmadd_pd(ab[3], C[4*i+3], cab[i]);
171
+ }
172
+
173
+ if (f) {
174
+ // Reduce over the c direction
175
+ r = simde_mm256_set1_pd(0);
176
+ r = simde_mm256_fmadd_pd(cab[0], c0, r);
177
+ r = simde_mm256_fmadd_pd(cab[1], c1, r);
178
+ r = simde_mm256_fmadd_pd(cab[2], c2, r);
179
+ r = simde_mm256_fmadd_pd(cab[3], c3, r);
180
+
181
+ // Sum all r entries
182
+ r0 = simde_mm256_castpd256_pd128(r);
183
+ r1 = simde_mm256_extractf128_pd(r, 1);
184
+ r0 = simde_mm_add_pd(r0, r1);
185
+ f[0] = simde_mm_cvtsd_f64(simde_mm_add_sd(r0, simde_mm_unpackhi_pd(r0, r0)));
186
+ }
187
+
188
+ // First derivative
189
+ if (dc && J) {
190
+ stride1 = offset[1]-offset[0]-4-1;
191
+ stride2 = (offset[2]-offset[1]-4)*stride1;
192
+ for (int j=0;j<4;++j) {
193
+ for (int k=0;k<4;++k) {
194
+ C[j+4*k] = simde_mm256_loadu_pd(
195
+ dc+(starts[1]+j)*stride1+(starts[2]+k)*stride2+starts[0]-1);
196
+ }
197
+ }
198
+ dc += stride2*(offset[3]-offset[2]-4);
199
+
200
+ a = boor0_d1;
201
+ ab[0] = simde_mm256_mul_pd(a, b0);
202
+ ab[1] = simde_mm256_mul_pd(a, b1);
203
+ ab[2] = simde_mm256_mul_pd(a, b2);
204
+ ab[3] = simde_mm256_mul_pd(a, b3);
205
+ // Sum over b axis: sum_b C_abc * (A_a B_b)_b
206
+ // cab <- cab + ab[i]*C[i]
207
+ for (int i=0;i<4;++i) {
208
+ cab[i] = simde_mm256_set1_pd(0);
209
+ cab[i] = simde_mm256_fmadd_pd(ab[0], C[4*i+0], cab[i]);
210
+ cab[i] = simde_mm256_fmadd_pd(ab[1], C[4*i+1], cab[i]);
211
+ cab[i] = simde_mm256_fmadd_pd(ab[2], C[4*i+2], cab[i]);
212
+ cab[i] = simde_mm256_fmadd_pd(ab[3], C[4*i+3], cab[i]);
213
+ }
214
+
215
+ // Reduce over the c direction
216
+ r = simde_mm256_set1_pd(0);
217
+ r = simde_mm256_fmadd_pd(cab[0], c0, r);
218
+ r = simde_mm256_fmadd_pd(cab[1], c1, r);
219
+ r = simde_mm256_fmadd_pd(cab[2], c2, r);
220
+ r = simde_mm256_fmadd_pd(cab[3], c3, r);
221
+
222
+ // Sum all r entries
223
+ r0 = simde_mm256_castpd256_pd128(r);
224
+ r1 = simde_mm256_extractf128_pd(r, 1);
225
+ r0 = simde_mm_add_pd(r0, r1);
226
+ J[0] = simde_mm_cvtsd_f64(simde_mm_add_sd(r0, simde_mm_unpackhi_pd(r0, r0)));
227
+
228
+
229
+ stride1 = offset[1]-offset[0]-4;
230
+ stride2 = (offset[2]-offset[1]-4-1)*stride1;
231
+ for (int j=0;j<4;++j) {
232
+ for (int k=0;k<4;++k) {
233
+ if (j==0) {
234
+ C[j+4*k] = zero;
235
+ } else {
236
+ C[j+4*k] = simde_mm256_loadu_pd(
237
+ dc+(starts[1]+j-1)*stride1+(starts[2]+k)*stride2+starts[0]);
238
+ }
239
+ }
240
+ }
241
+ dc += stride2*(offset[3]-offset[2]-4);
242
+
243
+ a = boor0_d0;
244
+
245
+ b0 = simde_mm256_permute4x64_pd(boor1_d1, SIMDE_MM_SHUFFLE(0, 0, 0, 0));
246
+ b1 = simde_mm256_permute4x64_pd(boor1_d1, SIMDE_MM_SHUFFLE(1, 1, 1, 1));
247
+ b2 = simde_mm256_permute4x64_pd(boor1_d1, SIMDE_MM_SHUFFLE(2, 2, 2, 2));
248
+ b3 = simde_mm256_permute4x64_pd(boor1_d1, SIMDE_MM_SHUFFLE(3, 3, 3, 3));
249
+
250
+ ab[0] = simde_mm256_mul_pd(a, b0);
251
+ ab[1] = simde_mm256_mul_pd(a, b1);
252
+ ab[2] = simde_mm256_mul_pd(a, b2);
253
+ ab[3] = simde_mm256_mul_pd(a, b3);
254
+
255
+ // Sum over b axis: sum_b C_abc * (A_a B_b)_b
256
+ // cab <- cab + ab[i]*C[i]
257
+ for (int i=0;i<4;++i) {
258
+ cab[i] = simde_mm256_set1_pd(0);
259
+ cab[i] = simde_mm256_fmadd_pd(ab[0], C[4*i+0], cab[i]);
260
+ cab[i] = simde_mm256_fmadd_pd(ab[1], C[4*i+1], cab[i]);
261
+ cab[i] = simde_mm256_fmadd_pd(ab[2], C[4*i+2], cab[i]);
262
+ cab[i] = simde_mm256_fmadd_pd(ab[3], C[4*i+3], cab[i]);
263
+ }
264
+
265
+ // Reduce over the c direction
266
+ r = simde_mm256_set1_pd(0);
267
+ r = simde_mm256_fmadd_pd(cab[0], c0, r);
268
+ r = simde_mm256_fmadd_pd(cab[1], c1, r);
269
+ r = simde_mm256_fmadd_pd(cab[2], c2, r);
270
+ r = simde_mm256_fmadd_pd(cab[3], c3, r);
271
+
272
+ // Sum all r entries
273
+ r0 = simde_mm256_castpd256_pd128(r);
274
+ r1 = simde_mm256_extractf128_pd(r, 1);
275
+ r0 = simde_mm_add_pd(r0, r1);
276
+ J[1] = simde_mm_cvtsd_f64(simde_mm_add_sd(r0, simde_mm_unpackhi_pd(r0, r0)));
277
+
278
+ stride1 = offset[1]-offset[0]-4;
279
+ stride2 = (offset[2]-offset[1]-4)*stride1;
280
+ for (int j=0;j<4;++j) {
281
+ for (int k=0;k<4;++k) {
282
+ if (k==0) {
283
+ C[j+4*k] = zero;
284
+ } else {
285
+ C[j+4*k] = simde_mm256_loadu_pd(
286
+ dc+(starts[1]+j)*stride1+(starts[2]+k-1)*stride2+starts[0]);
287
+ }
288
+ }
289
+ }
290
+
291
+ b0 = simde_mm256_permute4x64_pd(boor1_d0, SIMDE_MM_SHUFFLE(0, 0, 0, 0));
292
+ b1 = simde_mm256_permute4x64_pd(boor1_d0, SIMDE_MM_SHUFFLE(1, 1, 1, 1));
293
+ b2 = simde_mm256_permute4x64_pd(boor1_d0, SIMDE_MM_SHUFFLE(2, 2, 2, 2));
294
+ b3 = simde_mm256_permute4x64_pd(boor1_d0, SIMDE_MM_SHUFFLE(3, 3, 3, 3));
295
+
296
+ c0 = simde_mm256_permute4x64_pd(boor2_d1, SIMDE_MM_SHUFFLE(0, 0, 0, 0));
297
+ c1 = simde_mm256_permute4x64_pd(boor2_d1, SIMDE_MM_SHUFFLE(1, 1, 1, 1));
298
+ c2 = simde_mm256_permute4x64_pd(boor2_d1, SIMDE_MM_SHUFFLE(2, 2, 2, 2));
299
+ c3 = simde_mm256_permute4x64_pd(boor2_d1, SIMDE_MM_SHUFFLE(3, 3, 3, 3));
300
+
301
+ ab[0] = simde_mm256_mul_pd(a, b0);
302
+ ab[1] = simde_mm256_mul_pd(a, b1);
303
+ ab[2] = simde_mm256_mul_pd(a, b2);
304
+ ab[3] = simde_mm256_mul_pd(a, b3);
305
+
306
+ // Sum over b axis: sum_b C_abc * (A_a B_b)_b
307
+ // cab <- cab + ab[i]*C[i]
308
+ for (int i=0;i<4;++i) {
309
+ cab[i] = simde_mm256_set1_pd(0);
310
+ cab[i] = simde_mm256_fmadd_pd(ab[0], C[4*i+0], cab[i]);
311
+ cab[i] = simde_mm256_fmadd_pd(ab[1], C[4*i+1], cab[i]);
312
+ cab[i] = simde_mm256_fmadd_pd(ab[2], C[4*i+2], cab[i]);
313
+ cab[i] = simde_mm256_fmadd_pd(ab[3], C[4*i+3], cab[i]);
314
+ }
315
+
316
+ // Reduce over the c direction
317
+ r = simde_mm256_set1_pd(0);
318
+ r = simde_mm256_fmadd_pd(cab[0], c0, r);
319
+ r = simde_mm256_fmadd_pd(cab[1], c1, r);
320
+ r = simde_mm256_fmadd_pd(cab[2], c2, r);
321
+ r = simde_mm256_fmadd_pd(cab[3], c3, r);
322
+
323
+ // Sum all r entries
324
+ r0 = simde_mm256_castpd256_pd128(r);
325
+ r1 = simde_mm256_extractf128_pd(r, 1);
326
+ r0 = simde_mm_add_pd(r0, r1);
327
+ J[2] = simde_mm_cvtsd_f64(simde_mm_add_sd(r0, simde_mm_unpackhi_pd(r0, r0)));
328
+
329
+ }
330
+
331
+ if (ddc && H) {
332
+ stride1 = offset[1]-offset[0]-4-2;
333
+ stride2 = (offset[2]-offset[1]-4)*stride1;
334
+ for (int j=0;j<4;++j) {
335
+ for (int k=0;k<4;++k) {
336
+ C[j+4*k] = simde_mm256_loadu_pd(
337
+ ddc+(starts[1]+j)*stride1+(starts[2]+k)*stride2+starts[0]-2);
338
+ }
339
+ }
340
+ ddc += stride2*(offset[3]-offset[2]-4);
341
+
342
+ a = boor0_d2;
343
+ b0 = simde_mm256_permute4x64_pd(boor1_d0, SIMDE_MM_SHUFFLE(0, 0, 0, 0));
344
+ b1 = simde_mm256_permute4x64_pd(boor1_d0, SIMDE_MM_SHUFFLE(1, 1, 1, 1));
345
+ b2 = simde_mm256_permute4x64_pd(boor1_d0, SIMDE_MM_SHUFFLE(2, 2, 2, 2));
346
+ b3 = simde_mm256_permute4x64_pd(boor1_d0, SIMDE_MM_SHUFFLE(3, 3, 3, 3));
347
+
348
+ c0 = simde_mm256_permute4x64_pd(boor2_d0, SIMDE_MM_SHUFFLE(0, 0, 0, 0));
349
+ c1 = simde_mm256_permute4x64_pd(boor2_d0, SIMDE_MM_SHUFFLE(1, 1, 1, 1));
350
+ c2 = simde_mm256_permute4x64_pd(boor2_d0, SIMDE_MM_SHUFFLE(2, 2, 2, 2));
351
+ c3 = simde_mm256_permute4x64_pd(boor2_d0, SIMDE_MM_SHUFFLE(3, 3, 3, 3));
352
+
353
+ ab[0] = simde_mm256_mul_pd(a, b0);
354
+ ab[1] = simde_mm256_mul_pd(a, b1);
355
+ ab[2] = simde_mm256_mul_pd(a, b2);
356
+ ab[3] = simde_mm256_mul_pd(a, b3);
357
+ // Sum over b axis: sum_b C_abc * (A_a B_b)_b
358
+ // cab <- cab + ab[i]*C[i]
359
+ for (int i=0;i<4;++i) {
360
+ cab[i] = simde_mm256_set1_pd(0);
361
+ cab[i] = simde_mm256_fmadd_pd(ab[0], C[4*i+0], cab[i]);
362
+ cab[i] = simde_mm256_fmadd_pd(ab[1], C[4*i+1], cab[i]);
363
+ cab[i] = simde_mm256_fmadd_pd(ab[2], C[4*i+2], cab[i]);
364
+ cab[i] = simde_mm256_fmadd_pd(ab[3], C[4*i+3], cab[i]);
365
+ }
366
+
367
+ // Reduce over the c direction
368
+ r = simde_mm256_set1_pd(0);
369
+ r = simde_mm256_fmadd_pd(cab[0], c0, r);
370
+ r = simde_mm256_fmadd_pd(cab[1], c1, r);
371
+ r = simde_mm256_fmadd_pd(cab[2], c2, r);
372
+ r = simde_mm256_fmadd_pd(cab[3], c3, r);
373
+
374
+ // Sum all r entries
375
+ r0 = simde_mm256_castpd256_pd128(r);
376
+ r1 = simde_mm256_extractf128_pd(r, 1);
377
+ r0 = simde_mm_add_pd(r0, r1);
378
+ H[0] = simde_mm_cvtsd_f64(simde_mm_add_sd(r0, simde_mm_unpackhi_pd(r0, r0)));
379
+
380
+ stride1 = offset[1]-offset[0]-4;
381
+ stride2 = (offset[2]-offset[1]-4-2)*stride1;
382
+ for (int j=0;j<4;++j) {
383
+ for (int k=0;k<4;++k) {
384
+ if (j<=1) {
385
+ C[j+4*k] = zero;
386
+ } else {
387
+ C[j+4*k] = simde_mm256_loadu_pd(
388
+ ddc+(starts[1]+j-2)*stride1+(starts[2]+k)*stride2+starts[0]);
389
+ }
390
+ }
391
+ }
392
+ ddc += stride2*(offset[3]-offset[2]-4);
393
+
394
+ a = boor0_d0;
395
+ b0 = simde_mm256_permute4x64_pd(boor1_d2, SIMDE_MM_SHUFFLE(0, 0, 0, 0));
396
+ b1 = simde_mm256_permute4x64_pd(boor1_d2, SIMDE_MM_SHUFFLE(1, 1, 1, 1));
397
+ b2 = simde_mm256_permute4x64_pd(boor1_d2, SIMDE_MM_SHUFFLE(2, 2, 2, 2));
398
+ b3 = simde_mm256_permute4x64_pd(boor1_d2, SIMDE_MM_SHUFFLE(3, 3, 3, 3));
399
+
400
+ c0 = simde_mm256_permute4x64_pd(boor2_d0, SIMDE_MM_SHUFFLE(0, 0, 0, 0));
401
+ c1 = simde_mm256_permute4x64_pd(boor2_d0, SIMDE_MM_SHUFFLE(1, 1, 1, 1));
402
+ c2 = simde_mm256_permute4x64_pd(boor2_d0, SIMDE_MM_SHUFFLE(2, 2, 2, 2));
403
+ c3 = simde_mm256_permute4x64_pd(boor2_d0, SIMDE_MM_SHUFFLE(3, 3, 3, 3));
404
+
405
+ ab[0] = simde_mm256_mul_pd(a, b0);
406
+ ab[1] = simde_mm256_mul_pd(a, b1);
407
+ ab[2] = simde_mm256_mul_pd(a, b2);
408
+ ab[3] = simde_mm256_mul_pd(a, b3);
409
+ // Sum over b axis: sum_b C_abc * (A_a B_b)_b
410
+ // cab <- cab + ab[i]*C[i]
411
+ for (int i=0;i<4;++i) {
412
+ cab[i] = simde_mm256_set1_pd(0);
413
+ cab[i] = simde_mm256_fmadd_pd(ab[0], C[4*i+0], cab[i]);
414
+ cab[i] = simde_mm256_fmadd_pd(ab[1], C[4*i+1], cab[i]);
415
+ cab[i] = simde_mm256_fmadd_pd(ab[2], C[4*i+2], cab[i]);
416
+ cab[i] = simde_mm256_fmadd_pd(ab[3], C[4*i+3], cab[i]);
417
+ }
418
+
419
+ // Reduce over the c direction
420
+ r = simde_mm256_set1_pd(0);
421
+ r = simde_mm256_fmadd_pd(cab[0], c0, r);
422
+ r = simde_mm256_fmadd_pd(cab[1], c1, r);
423
+ r = simde_mm256_fmadd_pd(cab[2], c2, r);
424
+ r = simde_mm256_fmadd_pd(cab[3], c3, r);
425
+
426
+ // Sum all r entries
427
+ r0 = simde_mm256_castpd256_pd128(r);
428
+ r1 = simde_mm256_extractf128_pd(r, 1);
429
+ r0 = simde_mm_add_pd(r0, r1);
430
+ H[4] = simde_mm_cvtsd_f64(simde_mm_add_sd(r0, simde_mm_unpackhi_pd(r0, r0)));
431
+
432
+ stride1 = offset[1]-offset[0]-4;
433
+ stride2 = (offset[2]-offset[1]-4)*stride1;
434
+ for (int j=0;j<4;++j) {
435
+ for (int k=0;k<4;++k) {
436
+ if (k<=1) {
437
+ C[j+4*k] = zero;
438
+ } else {
439
+ C[j+4*k] = simde_mm256_loadu_pd(
440
+ ddc+(starts[1]+j)*stride1+(starts[2]+k-2)*stride2+starts[0]);
441
+ }
442
+ }
443
+ }
444
+ ddc += stride2*(offset[3]-offset[2]-4-2);
445
+
446
+ a = boor0_d0;
447
+ b0 = simde_mm256_permute4x64_pd(boor1_d0, SIMDE_MM_SHUFFLE(0, 0, 0, 0));
448
+ b1 = simde_mm256_permute4x64_pd(boor1_d0, SIMDE_MM_SHUFFLE(1, 1, 1, 1));
449
+ b2 = simde_mm256_permute4x64_pd(boor1_d0, SIMDE_MM_SHUFFLE(2, 2, 2, 2));
450
+ b3 = simde_mm256_permute4x64_pd(boor1_d0, SIMDE_MM_SHUFFLE(3, 3, 3, 3));
451
+
452
+ c0 = simde_mm256_permute4x64_pd(boor2_d2, SIMDE_MM_SHUFFLE(0, 0, 0, 0));
453
+ c1 = simde_mm256_permute4x64_pd(boor2_d2, SIMDE_MM_SHUFFLE(1, 1, 1, 1));
454
+ c2 = simde_mm256_permute4x64_pd(boor2_d2, SIMDE_MM_SHUFFLE(2, 2, 2, 2));
455
+ c3 = simde_mm256_permute4x64_pd(boor2_d2, SIMDE_MM_SHUFFLE(3, 3, 3, 3));
456
+
457
+ ab[0] = simde_mm256_mul_pd(a, b0);
458
+ ab[1] = simde_mm256_mul_pd(a, b1);
459
+ ab[2] = simde_mm256_mul_pd(a, b2);
460
+ ab[3] = simde_mm256_mul_pd(a, b3);
461
+ // Sum over b axis: sum_b C_abc * (A_a B_b)_b
462
+ // cab <- cab + ab[i]*C[i]
463
+ for (int i=0;i<4;++i) {
464
+ cab[i] = simde_mm256_set1_pd(0);
465
+ cab[i] = simde_mm256_fmadd_pd(ab[0], C[4*i+0], cab[i]);
466
+ cab[i] = simde_mm256_fmadd_pd(ab[1], C[4*i+1], cab[i]);
467
+ cab[i] = simde_mm256_fmadd_pd(ab[2], C[4*i+2], cab[i]);
468
+ cab[i] = simde_mm256_fmadd_pd(ab[3], C[4*i+3], cab[i]);
469
+ }
470
+
471
+ // Reduce over the c direction
472
+ r = simde_mm256_set1_pd(0);
473
+ r = simde_mm256_fmadd_pd(cab[0], c0, r);
474
+ r = simde_mm256_fmadd_pd(cab[1], c1, r);
475
+ r = simde_mm256_fmadd_pd(cab[2], c2, r);
476
+ r = simde_mm256_fmadd_pd(cab[3], c3, r);
477
+
478
+ // Sum all r entries
479
+ r0 = simde_mm256_castpd256_pd128(r);
480
+ r1 = simde_mm256_extractf128_pd(r, 1);
481
+ r0 = simde_mm_add_pd(r0, r1);
482
+ H[8] = simde_mm_cvtsd_f64(simde_mm_add_sd(r0, simde_mm_unpackhi_pd(r0, r0)));
483
+
484
+ stride1 = offset[1]-offset[0]-5;
485
+ stride2 = (offset[2]-offset[1]-5)*stride1;
486
+ for (int j=0;j<4;++j) {
487
+ for (int k=0;k<4;++k) {
488
+ if (j==0) {
489
+ C[j+4*k] = zero;
490
+ } else {
491
+ C[j+4*k] = simde_mm256_loadu_pd(
492
+ ddc+(starts[1]+j-1)*stride1+(starts[2]+k)*stride2+starts[0]-1);
493
+ }
494
+ }
495
+ }
496
+ ddc += stride2*(offset[3]-offset[2]-4);
497
+
498
+ a = boor0_d1;
499
+
500
+ b0 = simde_mm256_permute4x64_pd(boor1_d1, SIMDE_MM_SHUFFLE(0, 0, 0, 0));
501
+ b1 = simde_mm256_permute4x64_pd(boor1_d1, SIMDE_MM_SHUFFLE(1, 1, 1, 1));
502
+ b2 = simde_mm256_permute4x64_pd(boor1_d1, SIMDE_MM_SHUFFLE(2, 2, 2, 2));
503
+ b3 = simde_mm256_permute4x64_pd(boor1_d1, SIMDE_MM_SHUFFLE(3, 3, 3, 3));
504
+
505
+ c0 = simde_mm256_permute4x64_pd(boor2_d0, SIMDE_MM_SHUFFLE(0, 0, 0, 0));
506
+ c1 = simde_mm256_permute4x64_pd(boor2_d0, SIMDE_MM_SHUFFLE(1, 1, 1, 1));
507
+ c2 = simde_mm256_permute4x64_pd(boor2_d0, SIMDE_MM_SHUFFLE(2, 2, 2, 2));
508
+ c3 = simde_mm256_permute4x64_pd(boor2_d0, SIMDE_MM_SHUFFLE(3, 3, 3, 3));
509
+
510
+ ab[0] = simde_mm256_mul_pd(a, b0);
511
+ ab[1] = simde_mm256_mul_pd(a, b1);
512
+ ab[2] = simde_mm256_mul_pd(a, b2);
513
+ ab[3] = simde_mm256_mul_pd(a, b3);
514
+ // Sum over b axis: sum_b C_abc * (A_a B_b)_b
515
+ // cab <- cab + ab[i]*C[i]
516
+ for (int i=0;i<4;++i) {
517
+ cab[i] = simde_mm256_set1_pd(0);
518
+ cab[i] = simde_mm256_fmadd_pd(ab[0], C[4*i+0], cab[i]);
519
+ cab[i] = simde_mm256_fmadd_pd(ab[1], C[4*i+1], cab[i]);
520
+ cab[i] = simde_mm256_fmadd_pd(ab[2], C[4*i+2], cab[i]);
521
+ cab[i] = simde_mm256_fmadd_pd(ab[3], C[4*i+3], cab[i]);
522
+ }
523
+
524
+ // Reduce over the c direction
525
+ r = simde_mm256_set1_pd(0);
526
+ r = simde_mm256_fmadd_pd(cab[0], c0, r);
527
+ r = simde_mm256_fmadd_pd(cab[1], c1, r);
528
+ r = simde_mm256_fmadd_pd(cab[2], c2, r);
529
+ r = simde_mm256_fmadd_pd(cab[3], c3, r);
530
+
531
+ // Sum all r entries
532
+ r0 = simde_mm256_castpd256_pd128(r);
533
+ r1 = simde_mm256_extractf128_pd(r, 1);
534
+ r0 = simde_mm_add_pd(r0, r1);
535
+ H[1] = H[3] = simde_mm_cvtsd_f64(simde_mm_add_sd(r0, simde_mm_unpackhi_pd(r0, r0)));
536
+
537
+ stride1 = offset[1]-offset[0]-4;
538
+ stride2 = (offset[2]-offset[1]-5)*stride1;
539
+ for (int j=0;j<4;++j) {
540
+ for (int k=0;k<4;++k) {
541
+ if (k==0) {
542
+ C[j+4*k] = zero;
543
+ } else {
544
+ C[j+4*k] = simde_mm256_loadu_pd(
545
+ ddc+(starts[1]+j-1)*stride1+(starts[2]+k-1)*stride2+starts[0]);
546
+ }
547
+ }
548
+ }
549
+ ddc += stride2*(offset[3]-offset[2]-5);
550
+
551
+ a = boor0_d0;
552
+
553
+ b0 = simde_mm256_permute4x64_pd(boor1_d1, SIMDE_MM_SHUFFLE(0, 0, 0, 0));
554
+ b1 = simde_mm256_permute4x64_pd(boor1_d1, SIMDE_MM_SHUFFLE(1, 1, 1, 1));
555
+ b2 = simde_mm256_permute4x64_pd(boor1_d1, SIMDE_MM_SHUFFLE(2, 2, 2, 2));
556
+ b3 = simde_mm256_permute4x64_pd(boor1_d1, SIMDE_MM_SHUFFLE(3, 3, 3, 3));
557
+
558
+ c0 = simde_mm256_permute4x64_pd(boor2_d1, SIMDE_MM_SHUFFLE(0, 0, 0, 0));
559
+ c1 = simde_mm256_permute4x64_pd(boor2_d1, SIMDE_MM_SHUFFLE(1, 1, 1, 1));
560
+ c2 = simde_mm256_permute4x64_pd(boor2_d1, SIMDE_MM_SHUFFLE(2, 2, 2, 2));
561
+ c3 = simde_mm256_permute4x64_pd(boor2_d1, SIMDE_MM_SHUFFLE(3, 3, 3, 3));
562
+
563
+ ab[0] = simde_mm256_mul_pd(a, b0);
564
+ ab[1] = simde_mm256_mul_pd(a, b1);
565
+ ab[2] = simde_mm256_mul_pd(a, b2);
566
+ ab[3] = simde_mm256_mul_pd(a, b3);
567
+ // Sum over b axis: sum_b C_abc * (A_a B_b)_b
568
+ // cab <- cab + ab[i]*C[i]
569
+ for (int i=0;i<4;++i) {
570
+ cab[i] = simde_mm256_set1_pd(0);
571
+ cab[i] = simde_mm256_fmadd_pd(ab[0], C[4*i+0], cab[i]);
572
+ cab[i] = simde_mm256_fmadd_pd(ab[1], C[4*i+1], cab[i]);
573
+ cab[i] = simde_mm256_fmadd_pd(ab[2], C[4*i+2], cab[i]);
574
+ cab[i] = simde_mm256_fmadd_pd(ab[3], C[4*i+3], cab[i]);
575
+ }
576
+
577
+ // Reduce over the c direction
578
+ r = simde_mm256_set1_pd(0);
579
+ r = simde_mm256_fmadd_pd(cab[0], c0, r);
580
+ r = simde_mm256_fmadd_pd(cab[1], c1, r);
581
+ r = simde_mm256_fmadd_pd(cab[2], c2, r);
582
+ r = simde_mm256_fmadd_pd(cab[3], c3, r);
583
+
584
+ // Sum all r entries
585
+ r0 = simde_mm256_castpd256_pd128(r);
586
+ r1 = simde_mm256_extractf128_pd(r, 1);
587
+ r0 = simde_mm_add_pd(r0, r1);
588
+ H[5] = H[7] = simde_mm_cvtsd_f64(simde_mm_add_sd(r0, simde_mm_unpackhi_pd(r0, r0)));
589
+
590
+
591
+
592
+ stride1 = offset[1]-offset[0]-5;
593
+ stride2 = (offset[2]-offset[1]-4)*stride1;
594
+ for (int j=0;j<4;++j) {
595
+ for (int k=0;k<4;++k) {
596
+ if (k==0) {
597
+ C[j+4*k] = zero;
598
+ } else {
599
+ C[j+4*k] = simde_mm256_loadu_pd(
600
+ ddc+(starts[1]+j)*stride1+(starts[2]+k-1)*stride2+starts[0]-1);
601
+ }
602
+ }
603
+ }
604
+ ddc += stride2*(offset[3]-offset[2]-5);
605
+
606
+ a = boor0_d1;
607
+
608
+ b0 = simde_mm256_permute4x64_pd(boor1_d0, SIMDE_MM_SHUFFLE(0, 0, 0, 0));
609
+ b1 = simde_mm256_permute4x64_pd(boor1_d0, SIMDE_MM_SHUFFLE(1, 1, 1, 1));
610
+ b2 = simde_mm256_permute4x64_pd(boor1_d0, SIMDE_MM_SHUFFLE(2, 2, 2, 2));
611
+ b3 = simde_mm256_permute4x64_pd(boor1_d0, SIMDE_MM_SHUFFLE(3, 3, 3, 3));
612
+
613
+ c0 = simde_mm256_permute4x64_pd(boor2_d1, SIMDE_MM_SHUFFLE(0, 0, 0, 0));
614
+ c1 = simde_mm256_permute4x64_pd(boor2_d1, SIMDE_MM_SHUFFLE(1, 1, 1, 1));
615
+ c2 = simde_mm256_permute4x64_pd(boor2_d1, SIMDE_MM_SHUFFLE(2, 2, 2, 2));
616
+ c3 = simde_mm256_permute4x64_pd(boor2_d1, SIMDE_MM_SHUFFLE(3, 3, 3, 3));
617
+
618
+ ab[0] = simde_mm256_mul_pd(a, b0);
619
+ ab[1] = simde_mm256_mul_pd(a, b1);
620
+ ab[2] = simde_mm256_mul_pd(a, b2);
621
+ ab[3] = simde_mm256_mul_pd(a, b3);
622
+ // Sum over b axis: sum_b C_abc * (A_a B_b)_b
623
+ // cab <- cab + ab[i]*C[i]
624
+ for (int i=0;i<4;++i) {
625
+ cab[i] = simde_mm256_set1_pd(0);
626
+ cab[i] = simde_mm256_fmadd_pd(ab[0], C[4*i+0], cab[i]);
627
+ cab[i] = simde_mm256_fmadd_pd(ab[1], C[4*i+1], cab[i]);
628
+ cab[i] = simde_mm256_fmadd_pd(ab[2], C[4*i+2], cab[i]);
629
+ cab[i] = simde_mm256_fmadd_pd(ab[3], C[4*i+3], cab[i]);
630
+ }
631
+
632
+ // Reduce over the c direction
633
+ r = simde_mm256_set1_pd(0);
634
+ r = simde_mm256_fmadd_pd(cab[0], c0, r);
635
+ r = simde_mm256_fmadd_pd(cab[1], c1, r);
636
+ r = simde_mm256_fmadd_pd(cab[2], c2, r);
637
+ r = simde_mm256_fmadd_pd(cab[3], c3, r);
638
+
639
+ // Sum all r entries
640
+ r0 = simde_mm256_castpd256_pd128(r);
641
+ r1 = simde_mm256_extractf128_pd(r, 1);
642
+ r0 = simde_mm_add_pd(r0, r1);
643
+ H[2] = H[6] = simde_mm_cvtsd_f64(simde_mm_add_sd(r0, simde_mm_unpackhi_pd(r0, r0)));
644
+ }
645
+ }