netgen-mesher 6.2.2506.post35.dev0__cp314-cp314-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (340) hide show
  1. netgen/NgOCC.py +7 -0
  2. netgen/__init__.py +114 -0
  3. netgen/__init__.pyi +22 -0
  4. netgen/__main__.py +53 -0
  5. netgen/cmake/NetgenConfig.cmake +79 -0
  6. netgen/cmake/netgen-targets-release.cmake +69 -0
  7. netgen/cmake/netgen-targets.cmake +146 -0
  8. netgen/config/__init__.py +1 -0
  9. netgen/config/__init__.pyi +52 -0
  10. netgen/config/__main__.py +4 -0
  11. netgen/config/config.py +68 -0
  12. netgen/config/config.pyi +54 -0
  13. netgen/csg.py +25 -0
  14. netgen/geom2d.py +178 -0
  15. netgen/gui.py +82 -0
  16. netgen/include/core/archive.hpp +1256 -0
  17. netgen/include/core/array.hpp +1760 -0
  18. netgen/include/core/autodiff.hpp +1131 -0
  19. netgen/include/core/autodiffdiff.hpp +733 -0
  20. netgen/include/core/bitarray.hpp +240 -0
  21. netgen/include/core/concurrentqueue.h +3619 -0
  22. netgen/include/core/exception.hpp +145 -0
  23. netgen/include/core/flags.hpp +199 -0
  24. netgen/include/core/hashtable.hpp +1281 -0
  25. netgen/include/core/localheap.hpp +318 -0
  26. netgen/include/core/logging.hpp +117 -0
  27. netgen/include/core/memtracer.hpp +221 -0
  28. netgen/include/core/mpi4py_pycapi.h +245 -0
  29. netgen/include/core/mpi_wrapper.hpp +643 -0
  30. netgen/include/core/ng_mpi.hpp +94 -0
  31. netgen/include/core/ng_mpi_generated_declarations.hpp +155 -0
  32. netgen/include/core/ng_mpi_native.hpp +25 -0
  33. netgen/include/core/ngcore.hpp +32 -0
  34. netgen/include/core/ngcore_api.hpp +152 -0
  35. netgen/include/core/ngstream.hpp +115 -0
  36. netgen/include/core/paje_trace.hpp +279 -0
  37. netgen/include/core/profiler.hpp +382 -0
  38. netgen/include/core/python_ngcore.hpp +457 -0
  39. netgen/include/core/ranges.hpp +109 -0
  40. netgen/include/core/register_archive.hpp +100 -0
  41. netgen/include/core/signal.hpp +82 -0
  42. netgen/include/core/simd.hpp +160 -0
  43. netgen/include/core/simd_arm64.hpp +407 -0
  44. netgen/include/core/simd_avx.hpp +394 -0
  45. netgen/include/core/simd_avx512.hpp +285 -0
  46. netgen/include/core/simd_generic.hpp +1053 -0
  47. netgen/include/core/simd_math.hpp +178 -0
  48. netgen/include/core/simd_sse.hpp +289 -0
  49. netgen/include/core/statushandler.hpp +37 -0
  50. netgen/include/core/symboltable.hpp +153 -0
  51. netgen/include/core/table.hpp +810 -0
  52. netgen/include/core/taskmanager.hpp +1161 -0
  53. netgen/include/core/type_traits.hpp +65 -0
  54. netgen/include/core/utils.hpp +385 -0
  55. netgen/include/core/version.hpp +102 -0
  56. netgen/include/core/xbool.hpp +47 -0
  57. netgen/include/csg/algprim.hpp +563 -0
  58. netgen/include/csg/brick.hpp +150 -0
  59. netgen/include/csg/csg.hpp +43 -0
  60. netgen/include/csg/csgeom.hpp +389 -0
  61. netgen/include/csg/csgparser.hpp +101 -0
  62. netgen/include/csg/curve2d.hpp +67 -0
  63. netgen/include/csg/edgeflw.hpp +112 -0
  64. netgen/include/csg/explicitcurve2d.hpp +113 -0
  65. netgen/include/csg/extrusion.hpp +185 -0
  66. netgen/include/csg/gencyl.hpp +70 -0
  67. netgen/include/csg/geoml.hpp +16 -0
  68. netgen/include/csg/identify.hpp +213 -0
  69. netgen/include/csg/manifold.hpp +29 -0
  70. netgen/include/csg/meshsurf.hpp +46 -0
  71. netgen/include/csg/polyhedra.hpp +121 -0
  72. netgen/include/csg/revolution.hpp +180 -0
  73. netgen/include/csg/singularref.hpp +84 -0
  74. netgen/include/csg/solid.hpp +295 -0
  75. netgen/include/csg/specpoin.hpp +194 -0
  76. netgen/include/csg/spline3d.hpp +99 -0
  77. netgen/include/csg/splinesurface.hpp +85 -0
  78. netgen/include/csg/surface.hpp +394 -0
  79. netgen/include/csg/triapprox.hpp +63 -0
  80. netgen/include/csg/vscsg.hpp +34 -0
  81. netgen/include/general/autodiff.hpp +356 -0
  82. netgen/include/general/autoptr.hpp +39 -0
  83. netgen/include/general/gzstream.h +121 -0
  84. netgen/include/general/hashtabl.hpp +1692 -0
  85. netgen/include/general/myadt.hpp +48 -0
  86. netgen/include/general/mystring.hpp +226 -0
  87. netgen/include/general/netgenout.hpp +205 -0
  88. netgen/include/general/ngarray.hpp +797 -0
  89. netgen/include/general/ngbitarray.hpp +149 -0
  90. netgen/include/general/ngpython.hpp +74 -0
  91. netgen/include/general/optmem.hpp +44 -0
  92. netgen/include/general/parthreads.hpp +138 -0
  93. netgen/include/general/seti.hpp +50 -0
  94. netgen/include/general/sort.hpp +47 -0
  95. netgen/include/general/spbita2d.hpp +59 -0
  96. netgen/include/general/stack.hpp +114 -0
  97. netgen/include/general/table.hpp +280 -0
  98. netgen/include/general/template.hpp +509 -0
  99. netgen/include/geom2d/csg2d.hpp +750 -0
  100. netgen/include/geom2d/geometry2d.hpp +280 -0
  101. netgen/include/geom2d/spline2d.hpp +234 -0
  102. netgen/include/geom2d/vsgeom2d.hpp +28 -0
  103. netgen/include/gprim/adtree.hpp +1392 -0
  104. netgen/include/gprim/geom2d.hpp +858 -0
  105. netgen/include/gprim/geom3d.hpp +749 -0
  106. netgen/include/gprim/geomfuncs.hpp +212 -0
  107. netgen/include/gprim/geomobjects.hpp +544 -0
  108. netgen/include/gprim/geomops.hpp +404 -0
  109. netgen/include/gprim/geomtest3d.hpp +101 -0
  110. netgen/include/gprim/gprim.hpp +33 -0
  111. netgen/include/gprim/spline.hpp +778 -0
  112. netgen/include/gprim/splinegeometry.hpp +73 -0
  113. netgen/include/gprim/transform3d.hpp +216 -0
  114. netgen/include/include/acisgeom.hpp +3 -0
  115. netgen/include/include/csg.hpp +1 -0
  116. netgen/include/include/geometry2d.hpp +1 -0
  117. netgen/include/include/gprim.hpp +1 -0
  118. netgen/include/include/incopengl.hpp +62 -0
  119. netgen/include/include/inctcl.hpp +13 -0
  120. netgen/include/include/incvis.hpp +6 -0
  121. netgen/include/include/linalg.hpp +1 -0
  122. netgen/include/include/meshing.hpp +1 -0
  123. netgen/include/include/myadt.hpp +1 -0
  124. netgen/include/include/mydefs.hpp +70 -0
  125. netgen/include/include/mystdlib.h +59 -0
  126. netgen/include/include/netgen_config.hpp +27 -0
  127. netgen/include/include/netgen_version.hpp +9 -0
  128. netgen/include/include/nginterface_v2_impl.hpp +395 -0
  129. netgen/include/include/ngsimd.hpp +1 -0
  130. netgen/include/include/occgeom.hpp +1 -0
  131. netgen/include/include/opti.hpp +1 -0
  132. netgen/include/include/parallel.hpp +1 -0
  133. netgen/include/include/stlgeom.hpp +1 -0
  134. netgen/include/include/visual.hpp +1 -0
  135. netgen/include/interface/rw_medit.hpp +11 -0
  136. netgen/include/interface/writeuser.hpp +80 -0
  137. netgen/include/linalg/densemat.hpp +414 -0
  138. netgen/include/linalg/linalg.hpp +29 -0
  139. netgen/include/linalg/opti.hpp +142 -0
  140. netgen/include/linalg/polynomial.hpp +47 -0
  141. netgen/include/linalg/vector.hpp +217 -0
  142. netgen/include/meshing/adfront2.hpp +274 -0
  143. netgen/include/meshing/adfront3.hpp +332 -0
  144. netgen/include/meshing/basegeom.hpp +370 -0
  145. netgen/include/meshing/bcfunctions.hpp +53 -0
  146. netgen/include/meshing/bisect.hpp +72 -0
  147. netgen/include/meshing/boundarylayer.hpp +113 -0
  148. netgen/include/meshing/classifyhpel.hpp +1984 -0
  149. netgen/include/meshing/clusters.hpp +46 -0
  150. netgen/include/meshing/curvedelems.hpp +274 -0
  151. netgen/include/meshing/delaunay2d.hpp +73 -0
  152. netgen/include/meshing/fieldlines.hpp +103 -0
  153. netgen/include/meshing/findip.hpp +198 -0
  154. netgen/include/meshing/findip2.hpp +103 -0
  155. netgen/include/meshing/geomsearch.hpp +69 -0
  156. netgen/include/meshing/global.hpp +54 -0
  157. netgen/include/meshing/hpref_hex.hpp +330 -0
  158. netgen/include/meshing/hpref_prism.hpp +3405 -0
  159. netgen/include/meshing/hpref_pyramid.hpp +154 -0
  160. netgen/include/meshing/hpref_quad.hpp +2082 -0
  161. netgen/include/meshing/hpref_segm.hpp +122 -0
  162. netgen/include/meshing/hpref_tet.hpp +4230 -0
  163. netgen/include/meshing/hpref_trig.hpp +848 -0
  164. netgen/include/meshing/hprefinement.hpp +366 -0
  165. netgen/include/meshing/improve2.hpp +178 -0
  166. netgen/include/meshing/improve3.hpp +151 -0
  167. netgen/include/meshing/localh.hpp +223 -0
  168. netgen/include/meshing/meshclass.hpp +1076 -0
  169. netgen/include/meshing/meshfunc.hpp +47 -0
  170. netgen/include/meshing/meshing.hpp +63 -0
  171. netgen/include/meshing/meshing2.hpp +163 -0
  172. netgen/include/meshing/meshing3.hpp +123 -0
  173. netgen/include/meshing/meshtool.hpp +90 -0
  174. netgen/include/meshing/meshtype.hpp +1930 -0
  175. netgen/include/meshing/msghandler.hpp +62 -0
  176. netgen/include/meshing/paralleltop.hpp +172 -0
  177. netgen/include/meshing/python_mesh.hpp +206 -0
  178. netgen/include/meshing/ruler2.hpp +172 -0
  179. netgen/include/meshing/ruler3.hpp +211 -0
  180. netgen/include/meshing/soldata.hpp +141 -0
  181. netgen/include/meshing/specials.hpp +17 -0
  182. netgen/include/meshing/surfacegeom.hpp +73 -0
  183. netgen/include/meshing/topology.hpp +1003 -0
  184. netgen/include/meshing/validate.hpp +21 -0
  185. netgen/include/meshing/visual_interface.hpp +71 -0
  186. netgen/include/mydefs.hpp +70 -0
  187. netgen/include/nginterface.h +474 -0
  188. netgen/include/nginterface_v2.hpp +406 -0
  189. netgen/include/nglib.h +697 -0
  190. netgen/include/nglib_occ.h +50 -0
  191. netgen/include/occ/occ_edge.hpp +47 -0
  192. netgen/include/occ/occ_face.hpp +52 -0
  193. netgen/include/occ/occ_solid.hpp +23 -0
  194. netgen/include/occ/occ_utils.hpp +376 -0
  195. netgen/include/occ/occ_vertex.hpp +30 -0
  196. netgen/include/occ/occgeom.hpp +659 -0
  197. netgen/include/occ/occmeshsurf.hpp +168 -0
  198. netgen/include/occ/vsocc.hpp +33 -0
  199. netgen/include/pybind11/LICENSE +29 -0
  200. netgen/include/pybind11/attr.h +722 -0
  201. netgen/include/pybind11/buffer_info.h +208 -0
  202. netgen/include/pybind11/cast.h +2361 -0
  203. netgen/include/pybind11/chrono.h +228 -0
  204. netgen/include/pybind11/common.h +2 -0
  205. netgen/include/pybind11/complex.h +74 -0
  206. netgen/include/pybind11/conduit/README.txt +15 -0
  207. netgen/include/pybind11/conduit/pybind11_conduit_v1.h +116 -0
  208. netgen/include/pybind11/conduit/pybind11_platform_abi_id.h +87 -0
  209. netgen/include/pybind11/conduit/wrap_include_python_h.h +72 -0
  210. netgen/include/pybind11/critical_section.h +56 -0
  211. netgen/include/pybind11/detail/class.h +823 -0
  212. netgen/include/pybind11/detail/common.h +1348 -0
  213. netgen/include/pybind11/detail/cpp_conduit.h +75 -0
  214. netgen/include/pybind11/detail/descr.h +226 -0
  215. netgen/include/pybind11/detail/dynamic_raw_ptr_cast_if_possible.h +39 -0
  216. netgen/include/pybind11/detail/exception_translation.h +71 -0
  217. netgen/include/pybind11/detail/function_record_pyobject.h +191 -0
  218. netgen/include/pybind11/detail/init.h +538 -0
  219. netgen/include/pybind11/detail/internals.h +799 -0
  220. netgen/include/pybind11/detail/native_enum_data.h +209 -0
  221. netgen/include/pybind11/detail/pybind11_namespace_macros.h +82 -0
  222. netgen/include/pybind11/detail/struct_smart_holder.h +378 -0
  223. netgen/include/pybind11/detail/type_caster_base.h +1591 -0
  224. netgen/include/pybind11/detail/typeid.h +65 -0
  225. netgen/include/pybind11/detail/using_smart_holder.h +22 -0
  226. netgen/include/pybind11/detail/value_and_holder.h +90 -0
  227. netgen/include/pybind11/eigen/common.h +9 -0
  228. netgen/include/pybind11/eigen/matrix.h +723 -0
  229. netgen/include/pybind11/eigen/tensor.h +521 -0
  230. netgen/include/pybind11/eigen.h +12 -0
  231. netgen/include/pybind11/embed.h +320 -0
  232. netgen/include/pybind11/eval.h +161 -0
  233. netgen/include/pybind11/functional.h +147 -0
  234. netgen/include/pybind11/gil.h +199 -0
  235. netgen/include/pybind11/gil_safe_call_once.h +102 -0
  236. netgen/include/pybind11/gil_simple.h +37 -0
  237. netgen/include/pybind11/iostream.h +265 -0
  238. netgen/include/pybind11/native_enum.h +67 -0
  239. netgen/include/pybind11/numpy.h +2312 -0
  240. netgen/include/pybind11/operators.h +202 -0
  241. netgen/include/pybind11/options.h +92 -0
  242. netgen/include/pybind11/pybind11.h +3645 -0
  243. netgen/include/pybind11/pytypes.h +2680 -0
  244. netgen/include/pybind11/stl/filesystem.h +114 -0
  245. netgen/include/pybind11/stl.h +666 -0
  246. netgen/include/pybind11/stl_bind.h +858 -0
  247. netgen/include/pybind11/subinterpreter.h +299 -0
  248. netgen/include/pybind11/trampoline_self_life_support.h +65 -0
  249. netgen/include/pybind11/type_caster_pyobject_ptr.h +61 -0
  250. netgen/include/pybind11/typing.h +298 -0
  251. netgen/include/pybind11/warnings.h +75 -0
  252. netgen/include/stlgeom/meshstlsurface.hpp +67 -0
  253. netgen/include/stlgeom/stlgeom.hpp +491 -0
  254. netgen/include/stlgeom/stlline.hpp +193 -0
  255. netgen/include/stlgeom/stltool.hpp +331 -0
  256. netgen/include/stlgeom/stltopology.hpp +419 -0
  257. netgen/include/stlgeom/vsstl.hpp +58 -0
  258. netgen/include/visualization/meshdoc.hpp +42 -0
  259. netgen/include/visualization/mvdraw.hpp +325 -0
  260. netgen/include/visualization/vispar.hpp +128 -0
  261. netgen/include/visualization/visual.hpp +28 -0
  262. netgen/include/visualization/visual_api.hpp +10 -0
  263. netgen/include/visualization/vssolution.hpp +399 -0
  264. netgen/lib/libnggui.lib +0 -0
  265. netgen/lib/ngcore.lib +0 -0
  266. netgen/lib/nglib.lib +0 -0
  267. netgen/lib/togl.lib +0 -0
  268. netgen/libnggui.dll +0 -0
  269. netgen/libngguipy.lib +0 -0
  270. netgen/libngguipy.pyd +0 -0
  271. netgen/libngpy/_NgOCC.pyi +1545 -0
  272. netgen/libngpy/__init__.pyi +7 -0
  273. netgen/libngpy/_csg.pyi +259 -0
  274. netgen/libngpy/_geom2d.pyi +323 -0
  275. netgen/libngpy/_meshing.pyi +1111 -0
  276. netgen/libngpy/_stl.pyi +131 -0
  277. netgen/libngpy.lib +0 -0
  278. netgen/libngpy.pyd +0 -0
  279. netgen/meshing.py +65 -0
  280. netgen/ngcore.dll +0 -0
  281. netgen/nglib.dll +0 -0
  282. netgen/occ.py +52 -0
  283. netgen/read_gmsh.py +259 -0
  284. netgen/read_meshio.py +22 -0
  285. netgen/stl.py +2 -0
  286. netgen/togl.dll +0 -0
  287. netgen/version.py +2 -0
  288. netgen/webgui.py +529 -0
  289. netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/boundarycondition.geo +16 -0
  290. netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/boxcyl.geo +32 -0
  291. netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/circle_on_cube.geo +27 -0
  292. netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/cone.geo +13 -0
  293. netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/cube.geo +16 -0
  294. netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/cubeandring.geo +55 -0
  295. netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/cubeandspheres.geo +21 -0
  296. netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/cubemcyl.geo +18 -0
  297. netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/cubemsphere.geo +19 -0
  298. netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/cylinder.geo +12 -0
  299. netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/cylsphere.geo +12 -0
  300. netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/doc/ng4.pdf +0 -0
  301. netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/ellipsoid.geo +8 -0
  302. netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/ellipticcyl.geo +10 -0
  303. netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/extrusion.geo +99 -0
  304. netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/fichera.geo +24 -0
  305. netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/frame.step +11683 -0
  306. netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/hinge.stl +8486 -0
  307. netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/lshape3d.geo +26 -0
  308. netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/manyholes.geo +26 -0
  309. netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/manyholes2.geo +26 -0
  310. netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/matrix.geo +27 -0
  311. netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/ortho.geo +11 -0
  312. netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/part1.stl +2662 -0
  313. netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/period.geo +33 -0
  314. netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/py_tutorials/exportNeutral.py +26 -0
  315. netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/py_tutorials/mesh.py +19 -0
  316. netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/py_tutorials/shaft.geo +65 -0
  317. netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/revolution.geo +18 -0
  318. netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/screw.step +1694 -0
  319. netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/sculpture.geo +13 -0
  320. netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/shaft.geo +65 -0
  321. netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/shell.geo +10 -0
  322. netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/sphere.geo +8 -0
  323. netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/sphereincube.geo +17 -0
  324. netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/square.in2d +35 -0
  325. netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/squarecircle.in2d +48 -0
  326. netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/squarehole.in2d +47 -0
  327. netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/torus.geo +8 -0
  328. netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/trafo.geo +57 -0
  329. netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/twobricks.geo +15 -0
  330. netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/twocubes.geo +18 -0
  331. netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/twocyl.geo +16 -0
  332. netgen_mesher-6.2.2506.post35.dev0.dist-info/METADATA +15 -0
  333. netgen_mesher-6.2.2506.post35.dev0.dist-info/RECORD +340 -0
  334. netgen_mesher-6.2.2506.post35.dev0.dist-info/WHEEL +5 -0
  335. netgen_mesher-6.2.2506.post35.dev0.dist-info/entry_points.txt +2 -0
  336. netgen_mesher-6.2.2506.post35.dev0.dist-info/licenses/AUTHORS +1 -0
  337. netgen_mesher-6.2.2506.post35.dev0.dist-info/licenses/LICENSE +504 -0
  338. netgen_mesher-6.2.2506.post35.dev0.dist-info/top_level.txt +2 -0
  339. pyngcore/__init__.py +1 -0
  340. pyngcore/pyngcore.cp314-win_amd64.pyd +0 -0
@@ -0,0 +1,1053 @@
1
+ #ifndef NETGEN_CORE_SIMD_GENERIC_HPP
2
+ #define NETGEN_CORE_SIMD_GENERIC_HPP
3
+
4
+ /**************************************************************************/
5
+ /* File: simd_base.hpp */
6
+ /* Author: Joachim Schoeberl, Matthias Hochsteger */
7
+ /* Date: 25. Mar. 16 */
8
+ /**************************************************************************/
9
+
10
+ #include <type_traits>
11
+ #include <functional>
12
+ #include <tuple>
13
+ #include <cmath>
14
+
15
+ #include "array.hpp"
16
+
17
+ namespace ngcore
18
+ {
19
+ #if defined __AVX512F__
20
+ #define NETGEN_DEFAULT_SIMD_SIZE 8
21
+ #elif defined __AVX__
22
+ #define NETGEN_DEFAULT_SIMD_SIZE 4
23
+ #else
24
+ #define NETGEN_DEFAULT_SIMD_SIZE 2
25
+ #endif
26
+
27
+ constexpr int GetDefaultSIMDSize() {
28
+ return NETGEN_DEFAULT_SIMD_SIZE;
29
+ }
30
+
31
+ constexpr bool IsNativeSIMDSize(int n) {
32
+ if(n==1) return true;
33
+ if(n==2) return true;
34
+ #if defined __AVX__
35
+ if(n==4) return true;
36
+ #endif
37
+ #if defined __AVX512F__
38
+ if(n==8) return true;
39
+ #endif
40
+ return false;
41
+ }
42
+
43
+ // split n = k+l such that k is the largest natively supported simd size < n
44
+ constexpr int GetLargestNativeSIMDPart(int n) {
45
+ int k = n-1;
46
+ while(!IsNativeSIMDSize(k))
47
+ k--;
48
+ return k;
49
+ }
50
+
51
+ constexpr size_t LargestPowerOfTwo (size_t x)
52
+ {
53
+ size_t y = 1;
54
+ while (2*y <= x) y *= 2;
55
+ return y;
56
+ }
57
+
58
+
59
+ template <typename T, int N=GetDefaultSIMDSize()> class SIMD;
60
+
61
+ class mask64;
62
+
63
+ ////////////////////////////////////////////////////////////////////////////
64
+ namespace detail {
65
+ template <typename T, size_t N, size_t... I>
66
+ auto array_range_impl(std::array<T, N> const& arr,
67
+ size_t first,
68
+ std::index_sequence<I...>)
69
+ -> std::array<T, sizeof...(I)> {
70
+ return {arr[first + I]...};
71
+ }
72
+
73
+ template <size_t S, typename T, size_t N>
74
+ auto array_range(std::array<T, N> const& arr, size_t first) {
75
+ return array_range_impl(arr, first, std::make_index_sequence<S>{});
76
+ }
77
+
78
+ } // namespace detail
79
+
80
+ ////////////////////////////////////////////////////////////////////////////
81
+ // mask
82
+
83
+ template <>
84
+ class SIMD<mask64,1>
85
+ {
86
+ int64_t mask;
87
+ public:
88
+ SIMD (int64_t i)
89
+ : mask(i > 0 ? -1 : 0) { ; }
90
+ bool Data() const { return mask; }
91
+ static constexpr int Size() { return 1; }
92
+ auto operator[] (int /* i */) const { return mask; }
93
+ };
94
+
95
+
96
+ template <int N>
97
+ class alignas(GetLargestNativeSIMDPart(N)*sizeof(int64_t)) SIMD<mask64,N>
98
+ {
99
+ // static constexpr int N1 = GetLargestNativeSIMDPart(N);
100
+ static constexpr size_t N1 = LargestPowerOfTwo(N-1);
101
+ static constexpr int N2 = N-N1;
102
+
103
+ SIMD<mask64,N1> lo;
104
+ SIMD<mask64,N2> hi;
105
+ public:
106
+
107
+ SIMD (int64_t i) : lo(i), hi(i-N1 ) { ; }
108
+ SIMD (SIMD<mask64,N1> lo_, SIMD<mask64,N2> hi_) : lo(lo_), hi(hi_) { ; }
109
+ SIMD<mask64,N1> Lo() const { return lo; }
110
+ SIMD<mask64,N2> Hi() const { return hi; }
111
+ static constexpr int Size() { return N; }
112
+ };
113
+
114
+ template<int N>
115
+ NETGEN_INLINE SIMD<mask64,N> operator&& (SIMD<mask64,N> a, SIMD<mask64,N> b)
116
+ {
117
+ if constexpr(N==1) return a.Data() && b.Data();
118
+ else return { a.Lo() && b.Lo(), a.Hi() && b.Hi() };
119
+ }
120
+
121
+
122
+ ////////////////////////////////////////////////////////////////////////////
123
+ // int32
124
+
125
+ template<>
126
+ class SIMD<int32_t,1>
127
+ {
128
+ int32_t data;
129
+
130
+ public:
131
+ static constexpr int Size() { return 1; }
132
+ SIMD () {}
133
+ SIMD (const SIMD &) = default;
134
+ SIMD & operator= (const SIMD &) = default;
135
+ // SIMD (int val) : data{val} {}
136
+ SIMD (int32_t val) : data{val} {}
137
+ SIMD (size_t val) : data(val) {}
138
+ explicit SIMD (std::array<int32_t, 1> arr) : data{arr[0]} {}
139
+
140
+
141
+
142
+ int32_t operator[] (int i) const { return ((int32_t*)(&data))[i]; }
143
+ auto Data() const { return data; }
144
+ static SIMD FirstInt(int32_t n0=0) { return {n0}; }
145
+ template <int I>
146
+ int32_t Get()
147
+ {
148
+ static_assert(I==0);
149
+ return data;
150
+ }
151
+ };
152
+
153
+ template<int N>
154
+ class alignas(GetLargestNativeSIMDPart(N)*sizeof(int64_t)) SIMD<int32_t,N>
155
+ {
156
+ // static constexpr int N1 = GetLargestNativeSIMDPart(N);
157
+ static constexpr size_t N1 = LargestPowerOfTwo(N-1);
158
+ static constexpr int N2 = N-N1;
159
+
160
+ SIMD<int32_t,N1> lo;
161
+ SIMD<int32_t,N2> high;
162
+
163
+ public:
164
+ static constexpr int Size() { return N; }
165
+
166
+ SIMD () {}
167
+ SIMD (const SIMD &) = default;
168
+ SIMD & operator= (const SIMD &) = default;
169
+
170
+ // SIMD (int val) : lo{val}, high{val} { ; }
171
+ SIMD (int32_t val) : lo{val}, high{val} { ; }
172
+ SIMD (size_t val) : lo{val}, high{val} { ; }
173
+ SIMD (int32_t * p) : lo{p}, high{p+N1} { ; }
174
+
175
+ SIMD (SIMD<int32_t,N1> lo_, SIMD<int32_t,N2> high_) : lo(lo_), high(high_) { ; }
176
+
177
+ explicit SIMD( std::array<int32_t, N> arr )
178
+ : lo(detail::array_range<N1>(arr, 0)),
179
+ high(detail::array_range<N2>(arr, N1))
180
+ {}
181
+
182
+
183
+ template<typename ...T>
184
+ explicit SIMD(const T... vals)
185
+ : lo(detail::array_range<N1>(std::array<int32_t, N>{vals...}, 0)),
186
+ high(detail::array_range<N2>(std::array<int32_t, N>{vals...}, N1))
187
+ {
188
+ static_assert(sizeof...(vals)==N, "wrong number of arguments");
189
+ }
190
+
191
+
192
+ template<typename T, typename std::enable_if<std::is_convertible<T, std::function<int32_t(int)>>::value, int>::type = 0>
193
+ SIMD (const T & func)
194
+ {
195
+ for(auto i : IntRange(N1))
196
+ lo[i] = func(i);
197
+ for(auto i : IntRange(N2))
198
+ high[i] = func(N1+i);
199
+ }
200
+
201
+ auto Lo() const { return lo; }
202
+ auto Hi() const { return high; }
203
+
204
+ int32_t operator[] (int i) const { return ((int32_t*)(&lo))[i]; }
205
+
206
+ void Store (int32_t * p) { lo.Store(p); high.Store(p+N1); }
207
+
208
+
209
+ /*
210
+ operator tuple<int32_t&,int32_t&,int32_t&,int32_t&> ()
211
+ { return tuple<int32_t&,int32_t&,int32_t&,int32_t&>((*this)[0], (*this)[1], (*this)[2], (*this)[3]); }
212
+ */
213
+
214
+ /*
215
+ static SIMD FirstInt() { return { 0, 1, 2, 3 }; }
216
+ */
217
+ static SIMD FirstInt(int32_t n0=0) { return {SIMD<int32_t,N1>::FirstInt(n0), SIMD<int32_t,N2>::FirstInt(n0+N1)}; }
218
+ template <int I>
219
+ int32_t Get()
220
+ {
221
+ static_assert(I>=0 && I<N, "Index out of range");
222
+ if constexpr(I<N1) return lo.template Get<I>();
223
+ else return high.template Get<I-N1>();
224
+ }
225
+ };
226
+
227
+
228
+
229
+ ////////////////////////////////////////////////////////////////////////////
230
+ // int64
231
+
232
+ template<>
233
+ class SIMD<int64_t,1>
234
+ {
235
+ int64_t data;
236
+
237
+ public:
238
+ static constexpr int Size() { return 1; }
239
+ SIMD () {}
240
+ SIMD (const SIMD &) = default;
241
+ SIMD & operator= (const SIMD &) = default;
242
+ SIMD (int val) : data{val} {}
243
+ SIMD (int64_t val) : data{val} {}
244
+ SIMD (size_t val) : data(val) {}
245
+ explicit SIMD (std::array<int64_t, 1> arr)
246
+ : data{arr[0]}
247
+ {}
248
+
249
+ int64_t operator[] (int i) const { return ((int64_t*)(&data))[i]; }
250
+ auto Data() const { return data; }
251
+ static SIMD FirstInt(int64_t n0=0) { return {n0}; }
252
+ template <int I>
253
+ int64_t Get()
254
+ {
255
+ static_assert(I==0);
256
+ return data;
257
+ }
258
+ };
259
+
260
+ template<int N>
261
+ class alignas(GetLargestNativeSIMDPart(N)*sizeof(int64_t)) SIMD<int64_t,N>
262
+ {
263
+ // static constexpr int N1 = GetLargestNativeSIMDPart(N);
264
+ static constexpr size_t N1 = LargestPowerOfTwo(N-1);
265
+ static constexpr int N2 = N-N1;
266
+
267
+ SIMD<int64_t,N1> lo;
268
+ SIMD<int64_t,N2> high;
269
+
270
+ public:
271
+ static constexpr int Size() { return N; }
272
+
273
+ SIMD () {}
274
+ SIMD (const SIMD &) = default;
275
+ SIMD & operator= (const SIMD &) = default;
276
+
277
+ SIMD (int val) : lo{val}, high{val} { ; }
278
+ SIMD (int64_t val) : lo{val}, high{val} { ; }
279
+ SIMD (size_t val) : lo{val}, high{val} { ; }
280
+ SIMD (SIMD<int64_t,N1> lo_, SIMD<int64_t,N2> high_) : lo(lo_), high(high_) { ; }
281
+
282
+ explicit SIMD( std::array<int64_t, N> arr )
283
+ : lo(detail::array_range<N1>(arr, 0)),
284
+ high(detail::array_range<N2>(arr, N1))
285
+ {}
286
+
287
+ template<typename ...T>
288
+ explicit SIMD(const T... vals)
289
+ : lo(detail::array_range<N1>(std::array<int64_t, N>{vals...}, 0)),
290
+ high(detail::array_range<N2>(std::array<int64_t, N>{vals...}, N1))
291
+ {
292
+ static_assert(sizeof...(vals)==N, "wrong number of arguments");
293
+ }
294
+
295
+
296
+ template<typename T, typename std::enable_if<std::is_convertible<T, std::function<int64_t(int)>>::value, int>::type = 0>
297
+ SIMD (const T & func)
298
+ {
299
+ for(auto i : IntRange(N1))
300
+ lo[i] = func(i);
301
+ for(auto i : IntRange(N2))
302
+ high[i] = func(N1+i);
303
+ }
304
+
305
+ auto Lo() const { return lo; }
306
+ auto Hi() const { return high; }
307
+
308
+ int64_t operator[] (int i) const { return ((int64_t*)(&lo))[i]; }
309
+
310
+ /*
311
+ operator tuple<int64_t&,int64_t&,int64_t&,int64_t&> ()
312
+ { return tuple<int64_t&,int64_t&,int64_t&,int64_t&>((*this)[0], (*this)[1], (*this)[2], (*this)[3]); }
313
+ */
314
+
315
+ /*
316
+ static SIMD FirstInt() { return { 0, 1, 2, 3 }; }
317
+ */
318
+ static SIMD FirstInt(int64_t n0=0) { return {SIMD<int64_t,N1>::FirstInt(n0), SIMD<int64_t,N2>::FirstInt(n0+N1)}; }
319
+ template <int I>
320
+ int64_t Get()
321
+ {
322
+ static_assert(I>=0 && I<N, "Index out of range");
323
+ if constexpr(I<N1) return lo.template Get<I>();
324
+ else return high.template Get<I-N1>();
325
+ }
326
+ };
327
+
328
+
329
+
330
+ ////////////////////////////////////////////////////////////////////////////
331
+ // double
332
+
333
+ template<>
334
+ class SIMD<double,1>
335
+ {
336
+ double data;
337
+
338
+ public:
339
+ static constexpr int Size() { return 1; }
340
+ SIMD () {}
341
+ SIMD (const SIMD &) = default;
342
+ SIMD & operator= (const SIMD &) = default;
343
+ SIMD (double val) { data = val; }
344
+ SIMD (int val) { data = val; }
345
+ SIMD (size_t val) { data = val; }
346
+ SIMD (double const * p) { data = *p; }
347
+ SIMD (double const * p, SIMD<mask64,1> mask) { data = mask.Data() ? *p : 0.0; }
348
+ explicit SIMD (std::array<double, 1> arr)
349
+ : data{arr[0]}
350
+ {}
351
+
352
+ template <typename T, typename std::enable_if<std::is_convertible<T,std::function<double(int)>>::value,int>::type = 0>
353
+ SIMD (const T & func)
354
+ {
355
+ data = func(0);
356
+ }
357
+
358
+ template <typename T, typename std::enable_if<std::is_convertible<T,std::function<double(int)>>::value,int>::type = 0>
359
+ SIMD & operator= (const T & func)
360
+ {
361
+ data = func(0);
362
+ return *this;
363
+ }
364
+
365
+ void Store (double * p) { *p = data; }
366
+ void Store (double * p, SIMD<mask64,1> mask) { if (mask.Data()) *p = data; }
367
+
368
+ double operator[] (int i) const { return ((double*)(&data))[i]; }
369
+ double Data() const { return data; }
370
+ template <int I>
371
+ double Get()
372
+ {
373
+ static_assert(I==0);
374
+ return data;
375
+ }
376
+ };
377
+
378
+
379
+ template<int N>
380
+ class alignas(GetLargestNativeSIMDPart(N)*sizeof(double)) SIMD<double, N>
381
+ {
382
+ // static constexpr int N1 = GetLargestNativeSIMDPart(N);
383
+ static constexpr size_t N1 = LargestPowerOfTwo(N-1);
384
+ static constexpr int N2 = N-N1;
385
+
386
+ SIMD<double, N1> lo;
387
+ SIMD<double, N2> high;
388
+
389
+ public:
390
+ static constexpr int Size() { return N; }
391
+ SIMD () {}
392
+ SIMD (const SIMD &) = default;
393
+ SIMD (SIMD<double,N1> lo_, SIMD<double,N2> hi_) : lo(lo_), high(hi_) { ; }
394
+
395
+ template <typename T, typename std::enable_if<std::is_convertible<T,std::function<double(int)>>::value,int>::type = 0>
396
+ SIMD (const T & func)
397
+ {
398
+ double *p = (double*)this;
399
+ for(auto i : IntRange(N))
400
+ p[i] = func(i);
401
+ }
402
+
403
+ template <typename T, typename std::enable_if<std::is_convertible<T,std::function<double(int)>>::value,int>::type = 0>
404
+ SIMD & operator= (const T & func)
405
+ {
406
+ double *p = (double*)this;
407
+ for(auto i : IntRange(N))
408
+ p[i] = func(i);
409
+ return *this;
410
+ }
411
+
412
+
413
+ SIMD & operator= (const SIMD &) = default;
414
+
415
+ SIMD (double val) : lo{val}, high{val} { ; }
416
+ SIMD (int val) : lo{val}, high{val} { ; }
417
+ SIMD (size_t val) : lo{val}, high{val} { ; }
418
+
419
+ SIMD (double const * p) : lo{p}, high{p+N1} { ; }
420
+ SIMD (double const * p, SIMD<mask64,N> mask)
421
+ : lo{p, mask.Lo()}, high{p+N1, mask.Hi()}
422
+ { }
423
+ SIMD (double * p) : lo{p}, high{p+N1} { ; }
424
+ SIMD (double * p, SIMD<mask64,N> mask)
425
+ : lo{p, mask.Lo()}, high{p+N1, mask.Hi()}
426
+ { }
427
+
428
+ explicit SIMD( std::array<double, N> arr )
429
+ : lo(detail::array_range<N1>(arr, 0)),
430
+ high(detail::array_range<N2>(arr, N1))
431
+ {}
432
+
433
+ template<typename ...T>
434
+ explicit SIMD(const T... vals)
435
+ : lo(detail::array_range<N1>(std::array<double, N>{vals...}, 0)),
436
+ high(detail::array_range<N2>(std::array<double, N>{vals...}, N1))
437
+ {
438
+ static_assert(sizeof...(vals)==N, "wrong number of arguments");
439
+ }
440
+
441
+ void Store (double * p) { lo.Store(p); high.Store(p+N1); }
442
+ void Store (double * p, SIMD<mask64,N> mask)
443
+ {
444
+ lo.Store(p, mask.Lo());
445
+ high.Store(p+N1, mask.Hi());
446
+ }
447
+
448
+ auto Lo() const { return lo; }
449
+ auto Hi() const { return high; }
450
+
451
+ double operator[] (int i) const { return ((double*)(&lo))[i]; }
452
+
453
+ template<typename=std::enable_if<N==2>>
454
+ operator std::tuple<double&,double&> ()
455
+ {
456
+ double *p = (double*)this;
457
+ return std::tuple<double&,double&>(p[0], p[1]);
458
+ }
459
+
460
+ template<typename=std::enable_if<N==4>>
461
+ operator std::tuple<double&,double&,double&,double&> ()
462
+ { return std::tuple<double&,double&,double&,double&>((*this)[0], (*this)[1], (*this)[2], (*this)[3]); }
463
+
464
+ template <int I>
465
+ double Get()
466
+ {
467
+ static_assert(I>=0 && I<N, "Index out of range");
468
+ if constexpr(I<N1) return lo.template Get<I>();
469
+ else return high.template Get<I-N1>();
470
+ }
471
+ auto Data() const { return *this; }
472
+ };
473
+
474
+
475
+ // Generic operators for any arithmetic type/simd width
476
+ template <typename T, int N>
477
+ NETGEN_INLINE SIMD<T,N> operator+ (SIMD<T,N> a, SIMD<T,N> b) {
478
+ if constexpr(N==1) return a.Data()+b.Data();
479
+ else return { a.Lo()+b.Lo(), a.Hi()+b.Hi() };
480
+ }
481
+
482
+ template <typename T, int N>
483
+ NETGEN_INLINE SIMD<T,N> operator- (SIMD<T,N> a, SIMD<T,N> b) {
484
+ if constexpr(N==1) return a.Data()-b.Data();
485
+ else return { a.Lo()-b.Lo(), a.Hi()-b.Hi() };
486
+ }
487
+ template <typename T, int N>
488
+ NETGEN_INLINE SIMD<T,N> operator- (SIMD<T,N> a) {
489
+ if constexpr(N==1) return -a.Data();
490
+ else return { -a.Lo(), -a.Hi() };
491
+ }
492
+
493
+ template <typename T, int N>
494
+ NETGEN_INLINE SIMD<T,N> operator* (SIMD<T,N> a, SIMD<T,N> b) {
495
+ if constexpr(N==1) return a.Data()*b.Data();
496
+ else return { a.Lo()*b.Lo(), a.Hi()*b.Hi() };
497
+ }
498
+
499
+ template <typename T, int N>
500
+ NETGEN_INLINE SIMD<T,N> operator/ (SIMD<T,N> a, SIMD<T,N> b) {
501
+ if constexpr(N==1) return a.Data()/b.Data();
502
+ else return { a.Lo()/b.Lo(), a.Hi()/b.Hi() };
503
+ }
504
+
505
+ template <typename T, int N>
506
+ NETGEN_INLINE SIMD<mask64,N> operator< (SIMD<T,N> a, SIMD<T,N> b)
507
+ {
508
+ if constexpr(N==1) return a.Data() < b.Data();
509
+ else return { a.Lo()<b.Lo(), a.Hi()<b.Hi() };
510
+ }
511
+
512
+ template <typename T, int N>
513
+ NETGEN_INLINE SIMD<mask64,N> operator<= (SIMD<T,N> a, SIMD<T,N> b)
514
+ {
515
+ if constexpr(N==1) return a.Data() <= b.Data();
516
+ else return { a.Lo()<=b.Lo(), a.Hi()<=b.Hi() };
517
+ }
518
+
519
+ template <typename T, int N>
520
+ NETGEN_INLINE SIMD<mask64,N> operator> (SIMD<T,N> a, SIMD<T,N> b)
521
+ {
522
+ if constexpr(N==1) return a.Data() > b.Data();
523
+ else return { a.Lo()>b.Lo(), a.Hi()>b.Hi() };
524
+ }
525
+
526
+ template <typename T, int N>
527
+ NETGEN_INLINE SIMD<mask64,N> operator>= (SIMD<T,N> a, SIMD<T,N> b)
528
+ {
529
+ if constexpr(N==1) return a.Data() >= b.Data();
530
+ else return { a.Lo()>=b.Lo(), a.Hi()>=b.Hi() };
531
+ }
532
+
533
+ template <typename T, int N>
534
+ NETGEN_INLINE SIMD<mask64,N> operator== (SIMD<T,N> a, SIMD<T,N> b)
535
+ {
536
+ if constexpr(N==1) return a.Data() == b.Data();
537
+ else return { a.Lo()==b.Lo(), a.Hi()==b.Hi() };
538
+ }
539
+
540
+ template <typename T, int N>
541
+ NETGEN_INLINE SIMD<mask64,N> operator!= (SIMD<T,N> a, SIMD<T,N> b)
542
+ {
543
+ if constexpr(N==1) return a.Data() != b.Data();
544
+ else return { a.Lo()!=b.Lo(), a.Hi()!=b.Hi() };
545
+ }
546
+
547
+ template <int N>
548
+ NETGEN_INLINE SIMD<int64_t,N> operator& (SIMD<int64_t,N> a, SIMD<int64_t,N> b)
549
+ {
550
+ if constexpr(N==1) return a.Data() & b.Data();
551
+ else return { (a.Lo()&b.Lo()), (a.Hi()&b.Hi()) };
552
+ }
553
+ template <int N>
554
+ NETGEN_INLINE SIMD<int64_t,N> operator| (SIMD<int64_t,N> a, SIMD<int64_t,N> b)
555
+ {
556
+ if constexpr(N==1) return a.Data() & b.Data();
557
+ else return { (a.Lo()|b.Lo()), (a.Hi()|b.Hi()) };
558
+ }
559
+
560
+
561
+ // int64_t operators with scalar operand (implement overloads to allow implicit casts for second operand)
562
+ template <int N>
563
+ NETGEN_INLINE SIMD<int64_t,N> operator+ (SIMD<int64_t,N> a, int64_t b) { return a+SIMD<int64_t,N>(b); }
564
+ template <int N>
565
+ NETGEN_INLINE SIMD<int64_t,N> operator+ (int64_t a, SIMD<int64_t,N> b) { return SIMD<int64_t,N>(a)+b; }
566
+ template <int N>
567
+ NETGEN_INLINE SIMD<int64_t,N> operator- (int64_t a, SIMD<int64_t,N> b) { return SIMD<int64_t,N>(a)-b; }
568
+ template <int N>
569
+ NETGEN_INLINE SIMD<int64_t,N> operator- (SIMD<int64_t,N> a, int64_t b) { return a-SIMD<int64_t,N>(b); }
570
+ template <int N>
571
+ NETGEN_INLINE SIMD<int64_t,N> operator* (int64_t a, SIMD<int64_t,N> b) { return SIMD<int64_t,N>(a)*b; }
572
+ template <int N>
573
+ NETGEN_INLINE SIMD<int64_t,N> operator* (SIMD<int64_t,N> b, int64_t a) { return SIMD<int64_t,N>(a)*b; }
574
+ template <int N>
575
+ NETGEN_INLINE SIMD<int64_t,N> operator/ (SIMD<int64_t,N> a, int64_t b) { return a/SIMD<int64_t,N>(b); }
576
+ template <int N>
577
+ NETGEN_INLINE SIMD<int64_t,N> operator/ (int64_t a, SIMD<int64_t,N> b) { return SIMD<int64_t,N>(a)/b; }
578
+ template <int N>
579
+ NETGEN_INLINE SIMD<int64_t,N> & operator+= (SIMD<int64_t,N> & a, SIMD<int64_t,N> b) { a=a+b; return a; }
580
+ template <int N>
581
+ NETGEN_INLINE SIMD<int64_t,N> & operator+= (SIMD<int64_t,N> & a, int64_t b) { a+=SIMD<int64_t,N>(b); return a; }
582
+ template <int N>
583
+ NETGEN_INLINE SIMD<int64_t,N> & operator-= (SIMD<int64_t,N> & a, SIMD<int64_t,N> b) { a = a-b; return a; }
584
+ template <int N>
585
+ NETGEN_INLINE SIMD<int64_t,N> & operator-= (SIMD<int64_t,N> & a, int64_t b) { a-=SIMD<int64_t,N>(b); return a; }
586
+ template <int N>
587
+ NETGEN_INLINE SIMD<int64_t,N> & operator*= (SIMD<int64_t,N> & a, SIMD<int64_t,N> b) { a=a*b; return a; }
588
+ template <int N>
589
+ NETGEN_INLINE SIMD<int64_t,N> & operator*= (SIMD<int64_t,N> & a, int64_t b) { a*=SIMD<int64_t,N>(b); return a; }
590
+ template <int N>
591
+ NETGEN_INLINE SIMD<int64_t,N> & operator/= (SIMD<int64_t,N> & a, SIMD<int64_t,N> b) { a = a/b; return a; }
592
+
593
+
594
+ // double operators with scalar operand (implement overloads to allow implicit casts for second operand)
595
+ template <int N>
596
+ NETGEN_INLINE SIMD<double,N> operator+ (SIMD<double,N> a, double b) { return a+SIMD<double,N>(b); }
597
+ template <int N>
598
+ NETGEN_INLINE SIMD<double,N> operator+ (double a, SIMD<double,N> b) { return SIMD<double,N>(a)+b; }
599
+ template <int N>
600
+ NETGEN_INLINE SIMD<double,N> operator- (double a, SIMD<double,N> b) { return SIMD<double,N>(a)-b; }
601
+ template <int N>
602
+ NETGEN_INLINE SIMD<double,N> operator- (SIMD<double,N> a, double b) { return a-SIMD<double,N>(b); }
603
+ template <int N>
604
+ NETGEN_INLINE SIMD<double,N> operator* (double a, SIMD<double,N> b) { return SIMD<double,N>(a)*b; }
605
+ template <int N>
606
+ NETGEN_INLINE SIMD<double,N> operator* (SIMD<double,N> b, double a) { return SIMD<double,N>(a)*b; }
607
+ template <int N>
608
+ NETGEN_INLINE SIMD<double,N> operator/ (SIMD<double,N> a, double b) { return a/SIMD<double,N>(b); }
609
+ template <int N>
610
+ NETGEN_INLINE SIMD<double,N> operator/ (double a, SIMD<double,N> b) { return SIMD<double,N>(a)/b; }
611
+ template <int N>
612
+ NETGEN_INLINE SIMD<double,N> & operator+= (SIMD<double,N> & a, SIMD<double,N> b) { a=a+b; return a; }
613
+ template <int N>
614
+ NETGEN_INLINE SIMD<double,N> & operator+= (SIMD<double,N> & a, double b) { a+=SIMD<double,N>(b); return a; }
615
+ template <int N>
616
+ NETGEN_INLINE SIMD<double,N> & operator-= (SIMD<double,N> & a, SIMD<double,N> b) { a = a-b; return a; }
617
+ template <int N>
618
+ NETGEN_INLINE SIMD<double,N> & operator-= (SIMD<double,N> & a, double b) { a-=SIMD<double,N>(b); return a; }
619
+ template <int N>
620
+ NETGEN_INLINE SIMD<double,N> & operator*= (SIMD<double,N> & a, SIMD<double,N> b) { a=a*b; return a; }
621
+ template <int N>
622
+ NETGEN_INLINE SIMD<double,N> & operator*= (SIMD<double,N> & a, double b) { a*=SIMD<double,N>(b); return a; }
623
+ template <int N>
624
+ NETGEN_INLINE SIMD<double,N> & operator/= (SIMD<double,N> & a, SIMD<double,N> b) { a = a/b; return a; }
625
+
626
+ template <int N>
627
+ NETGEN_INLINE auto operator> (SIMD<double,N> & a, double b) { return a > SIMD<double,N>(b); }
628
+
629
+
630
+ // double functions
631
+
632
+ template <int N>
633
+ NETGEN_INLINE SIMD<double,N> L2Norm2 (SIMD<double,N> a) { return a*a; }
634
+ template <int N>
635
+ NETGEN_INLINE SIMD<double,N> Trans (SIMD<double,N> a) { return a; }
636
+
637
+ template <int N>
638
+ NETGEN_INLINE double HSum (SIMD<double,N> a)
639
+ {
640
+ if constexpr(N==1)
641
+ return a.Data();
642
+ else
643
+ return HSum(a.Lo()) + HSum(a.Hi());
644
+ }
645
+
646
+
647
+ template<typename T, int N>
648
+ NETGEN_INLINE SIMD<T,N> IfPos (SIMD<T,N> a, SIMD<T,N> b, SIMD<T,N> c)
649
+ {
650
+ if constexpr(N==1) return a.Data()>0.0 ? b : c;
651
+ else return { IfPos(a.Lo(), b.Lo(), c.Lo()), IfPos(a.Hi(), b.Hi(), c.Hi())};
652
+
653
+ }
654
+
655
+ template<typename T, int N>
656
+ NETGEN_INLINE SIMD<T,N> IfZero (SIMD<T,N> a, SIMD<T,N> b, SIMD<T,N> c)
657
+ {
658
+ if constexpr(N==1) return a.Data()==0.0 ? b : c;
659
+ else return { IfZero(a.Lo(), b.Lo(), c.Lo()), IfZero(a.Hi(), b.Hi(), c.Hi())};
660
+
661
+ }
662
+
663
+ template<typename T, int N>
664
+ NETGEN_INLINE SIMD<T,N> If (SIMD<mask64,N> a, SIMD<T,N> b, SIMD<T,N> c)
665
+ {
666
+ if constexpr(N==1) return a.Data() ? b : c;
667
+ else return { If(a.Lo(), b.Lo(), c.Lo()), If(a.Hi(), b.Hi(), c.Hi())};
668
+
669
+ }
670
+
671
+ // a*b+c
672
+ template <typename T1, typename T2, typename T3>
673
+ NETGEN_INLINE auto FMA(T1 a, T2 b, T3 c)
674
+ {
675
+ return c+a*b;
676
+ }
677
+
678
+ template <typename T1, typename T2, typename T3>
679
+ NETGEN_INLINE auto FNMA(T1 a, T2 b, T3 c)
680
+ {
681
+ return c-a*b;
682
+ }
683
+
684
+ // update form of fma
685
+ template <int N>
686
+ void FMAasm (SIMD<double,N> a, SIMD<double,N> b, SIMD<double,N> & sum)
687
+ {
688
+ sum = FMA(a,b,sum);
689
+ }
690
+
691
+ // update form of fms
692
+ template <int N>
693
+ void FNMAasm (SIMD<double,N> a, SIMD<double,N> b, SIMD<double,N> & sum)
694
+ {
695
+ // sum -= a*b;
696
+ sum = FNMA(a,b,sum);
697
+ }
698
+
699
+ // c += a*b (a0re, a0im, a1re, a1im, ...),
700
+ template <int N>
701
+ void FMAComplex (SIMD<double,N> a, SIMD<double,N> b, SIMD<double,N> & c)
702
+ {
703
+ auto [are, aim] = Unpack(a, a);
704
+ SIMD<double,N> bswap = SwapPairs(b);
705
+ SIMD<double,N> aim_bswap = aim*bswap;
706
+ c += FMAddSub (are, b, aim_bswap);
707
+ }
708
+
709
+ template <int i, typename T, int N>
710
+ T get(SIMD<T,N> a) { return a.template Get<i>(); }
711
+
712
+ template <int NUM, typename FUNC>
713
+ NETGEN_INLINE void Iterate2 (FUNC f)
714
+ {
715
+ if constexpr (NUM > 1) Iterate2<NUM-1> (f);
716
+ if constexpr (NUM >= 1) f(std::integral_constant<int,NUM-1>());
717
+ }
718
+
719
+
720
+ template<typename T2, typename T1>
721
+ T2 BitCast(T1 a)
722
+ {
723
+ T2 result;
724
+ static_assert(sizeof(T1) == sizeof(T2), "BitCast requires same size");
725
+ memcpy(&result, &a, sizeof(T1));
726
+ return result;
727
+ }
728
+
729
+ template <typename T, typename T1, int N>
730
+ SIMD<T, N> Reinterpret (SIMD<T1,N> a)
731
+ {
732
+ if constexpr (N == 1)
733
+ return SIMD<T,N> ( * (T*)(void*) & a.Data());
734
+ else if constexpr (N == 2)
735
+ return SIMD<T,N> { BitCast<T> (a.Lo()),
736
+ BitCast<T> (a.Hi()) };
737
+ else
738
+ return SIMD<T,N> (Reinterpret<T> (a.Lo()), Reinterpret<T> (a.Hi()));
739
+ }
740
+
741
+
742
+ using std::round;
743
+ template <int N>
744
+ SIMD<double,N> round (SIMD<double,N> x)
745
+ {
746
+ if constexpr (N == 1) return round(x);
747
+ else return { round(x.Lo()), round(x.Hi()) };
748
+ }
749
+
750
+ // NETGEN_INLINE int64_t RoundI (double x) { return lround(x); }
751
+ using std::lround;
752
+ template <int N>
753
+ SIMD<int64_t,N> lround (SIMD<double,N> x)
754
+ {
755
+ if constexpr (N == 1) return SIMD<int64_t,1> (lround(x));
756
+ else return { lround(x.Lo()), lround(x.Hi()) };
757
+ }
758
+
759
+ /*
760
+ reciprocal square root
761
+ Quake III algorithm, or intrinsics
762
+ */
763
+ NETGEN_INLINE double rsqrt (double x) { return 1.0/sqrt(x); }
764
+ template <int N>
765
+ SIMD<double,N> rsqrt (SIMD<double,N> x)
766
+ {
767
+ if constexpr (N == 1) return 1.0/sqrt(x.Data());
768
+ else return { rsqrt(x.Lo()), rsqrt(x.Hi()) };
769
+ }
770
+
771
+ template <int N>
772
+ int64_t operator<< (int64_t a, IC<N> n) { return a << n.value; }
773
+
774
+ template <int S, int N>
775
+ SIMD<int64_t,S> operator<< (SIMD<int64_t,S> a, IC<N> n)
776
+ {
777
+ if constexpr (S == 1) return SIMD<int64_t,1> (a.Data() << n);
778
+ else return SIMD<int64_t,S> (a.Lo() << n, a.Hi() << n);
779
+ }
780
+
781
+
782
+
783
+
784
+ template <typename T, int N>
785
+ auto Min (SIMD<T,N> a, SIMD<T,N> b)
786
+ {
787
+ if constexpr (N==1)
788
+ return SIMD<T,1> (std::min(a[0], b[0]));
789
+ else
790
+ return SIMD<T,N> (Min(a.Lo(), b.Lo()), Min(a.Hi(), b.Hi()));
791
+ }
792
+
793
+ template <typename T, int N>
794
+ auto Max (SIMD<T,N> a, SIMD<T,N> b)
795
+ {
796
+ if constexpr (N==1)
797
+ return SIMD<T,1> (std::max(a[0], b[0]));
798
+ else
799
+ return SIMD<T,N> (Max(a.Lo(), b.Lo()), Max(a.Hi(), b.Hi()));
800
+ }
801
+
802
+
803
+
804
+
805
+
806
+ template <typename T, int N>
807
+ ostream & operator<< (ostream & ost, SIMD<T,N> simd)
808
+ {
809
+ /*
810
+ ost << simd[0];
811
+ for (int i = 1; i < simd.Size(); i++)
812
+ ost << " " << simd[i];
813
+ */
814
+ Iterate2<simd.Size()> ([&] (auto I) {
815
+ if (I.value != 0) ost << " ";
816
+ ost << get<I.value>(simd);
817
+ });
818
+ return ost;
819
+ }
820
+
821
+ using std::sqrt;
822
+ template <int N>
823
+ NETGEN_INLINE ngcore::SIMD<double,N> sqrt (ngcore::SIMD<double,N> a)
824
+ {
825
+ if constexpr (N == 1) return sqrt(a.Data());
826
+ else return { sqrt(a.Lo()), sqrt(a.Hi()) };
827
+ // return ngcore::SIMD<double,N>([a](int i)->double { return sqrt(a[i]); } );
828
+ }
829
+
830
+ using std::fabs;
831
+ template <int N>
832
+ NETGEN_INLINE ngcore::SIMD<double,N> fabs (ngcore::SIMD<double,N> a) {
833
+ return ngcore::SIMD<double,N>([a](int i)->double { return fabs(a[i]); } );
834
+ }
835
+
836
+ using std::floor;
837
+ template <int N>
838
+ NETGEN_INLINE ngcore::SIMD<double,N> floor (ngcore::SIMD<double,N> a) {
839
+ return ngcore::SIMD<double,N>([a](int i)->double { return floor(a[i]); } );
840
+ }
841
+
842
+ using std::ceil;
843
+ template <int N>
844
+ NETGEN_INLINE ngcore::SIMD<double,N> ceil (ngcore::SIMD<double,N> a) {
845
+ return ngcore::SIMD<double,N>([a](int i)->double { return ceil(a[i]); } );
846
+ }
847
+
848
+ using std::exp;
849
+ template <int N>
850
+ NETGEN_INLINE ngcore::SIMD<double,N> exp (ngcore::SIMD<double,N> a) {
851
+ return ngcore::SIMD<double,N>([a](int i)->double { return exp(a[i]); } );
852
+ }
853
+
854
+ using std::log;
855
+ template <int N>
856
+ NETGEN_INLINE ngcore::SIMD<double,N> log (ngcore::SIMD<double,N> a) {
857
+ return ngcore::SIMD<double,N>([a](int i)->double { return log(a[i]); } );
858
+ }
859
+
860
+ using std::erf;
861
+ template <int N>
862
+ NETGEN_INLINE ngcore::SIMD<double,N> erf (ngcore::SIMD<double,N> a) {
863
+ return ngcore::SIMD<double,N>([a](int i)->double { return erf(a[i]); } );
864
+ }
865
+
866
+ using std::pow;
867
+ template <int N>
868
+ NETGEN_INLINE ngcore::SIMD<double,N> pow (ngcore::SIMD<double,N> a, double x) {
869
+ return ngcore::SIMD<double,N>([a,x](int i)->double { return pow(a[i],x); } );
870
+ }
871
+
872
+ template <int N>
873
+ NETGEN_INLINE ngcore::SIMD<double,N> pow (ngcore::SIMD<double,N> a, ngcore::SIMD<double,N> b) {
874
+ return ngcore::SIMD<double,N>([a,b](int i)->double { return pow(a[i],b[i]); } );
875
+ }
876
+
877
+ using std::sin;
878
+ template <int N>
879
+ NETGEN_INLINE ngcore::SIMD<double,N> sin (ngcore::SIMD<double,N> a) {
880
+ return ngcore::SIMD<double,N>([a](int i)->double { return sin(a[i]); } );
881
+ }
882
+
883
+ using std::cos;
884
+ template <int N>
885
+ NETGEN_INLINE ngcore::SIMD<double,N> cos (ngcore::SIMD<double,N> a) {
886
+ return ngcore::SIMD<double,N>([a](int i)->double { return cos(a[i]); } );
887
+ }
888
+
889
+ using std::tan;
890
+ template <int N>
891
+ NETGEN_INLINE ngcore::SIMD<double,N> tan (ngcore::SIMD<double,N> a) {
892
+ return ngcore::SIMD<double,N>([a](int i)->double { return tan(a[i]); } );
893
+ }
894
+
895
+ using std::atan;
896
+ template <int N>
897
+ NETGEN_INLINE ngcore::SIMD<double,N> atan (ngcore::SIMD<double,N> a) {
898
+ return ngcore::SIMD<double,N>([a](int i)->double { return atan(a[i]); } );
899
+ }
900
+
901
+ using std::atan2;
902
+ template <int N>
903
+ NETGEN_INLINE ngcore::SIMD<double,N> atan2 (ngcore::SIMD<double,N> y, ngcore::SIMD<double,N> x) {
904
+ return ngcore::SIMD<double,N>([y,x](int i)->double { return atan2(y[i], x[i]); } );
905
+ }
906
+
907
+ using std::acos;
908
+ template <int N>
909
+ NETGEN_INLINE ngcore::SIMD<double,N> acos (ngcore::SIMD<double,N> a) {
910
+ return ngcore::SIMD<double,N>([a](int i)->double { return acos(a[i]); } );
911
+ }
912
+
913
+ using std::asin;
914
+ template <int N>
915
+ NETGEN_INLINE ngcore::SIMD<double,N> asin (ngcore::SIMD<double,N> a) {
916
+ return ngcore::SIMD<double,N>([a](int i)->double { return asin(a[i]); } );
917
+ }
918
+
919
+ using std::sinh;
920
+ template <int N>
921
+ NETGEN_INLINE ngcore::SIMD<double,N> sinh (ngcore::SIMD<double,N> a) {
922
+ return ngcore::SIMD<double,N>([a](int i)->double { return sinh(a[i]); } );
923
+ }
924
+
925
+ using std::cosh;
926
+ template <int N>
927
+ NETGEN_INLINE ngcore::SIMD<double,N> cosh (ngcore::SIMD<double,N> a) {
928
+ return ngcore::SIMD<double,N>([a](int i)->double { return cosh(a[i]); } );
929
+ }
930
+
931
+ template<int N, typename T>
932
+ using MultiSIMD = SIMD<T, N*GetDefaultSIMDSize()>;
933
+
934
+ template<int N>
935
+ NETGEN_INLINE auto Unpack (SIMD<double,N> a, SIMD<double,N> b)
936
+ {
937
+ if constexpr(N==1)
938
+ {
939
+ return std::make_tuple(SIMD<double,N>{a.Data()}, SIMD<double,N>{b.Data()} );
940
+ }
941
+ else if constexpr(N==2)
942
+ {
943
+ return std::make_tuple(SIMD<double,N>{ a.Lo(), b.Lo() },
944
+ SIMD<double,N>{ a.Hi(), b.Hi() });
945
+ }
946
+ else
947
+ {
948
+ auto [a1,b1] = Unpack(a.Lo(), b.Lo());
949
+ auto [a2,b2] = Unpack(a.Hi(), b.Hi());
950
+ return std::make_tuple(SIMD<double,N>{ a1, a2 },
951
+ SIMD<double,N>{ b1, b2 });
952
+ }
953
+ }
954
+
955
+ // TODO: specialize for AVX, ...
956
+ template<int N>
957
+ NETGEN_INLINE auto SwapPairs (SIMD<double,N> a)
958
+ {
959
+ if constexpr(N==1) {
960
+ // static_assert(false);
961
+ return a;
962
+ }
963
+ else if constexpr(N==2) {
964
+ return SIMD<double,N> (a.Hi(), a.Lo());
965
+ }
966
+ else {
967
+ return SIMD<double,N> (SwapPairs(a.Lo()), SwapPairs(a.Hi()));
968
+ }
969
+ }
970
+
971
+
972
+ template<int N>
973
+ NETGEN_INLINE auto HSum128 (SIMD<double,N> a)
974
+ {
975
+ if constexpr(N==1) {
976
+ // static_assert(false);
977
+ return a;
978
+ }
979
+ else if constexpr(N==2) {
980
+ return a;
981
+ }
982
+ else {
983
+ return HSum128(a.Lo()) + HSum128(a.Hi());
984
+ }
985
+ }
986
+
987
+
988
+ // TODO: specialize for AVX, ...
989
+ // a*b+-c (even: -, odd: +)
990
+ template<int N>
991
+ NETGEN_INLINE auto FMAddSub (SIMD<double,N> a, SIMD<double,N> b, SIMD<double,N> c)
992
+ {
993
+ if constexpr(N==1) {
994
+ // static_assert(false);
995
+ return a*b-c;
996
+ }
997
+ else if constexpr(N==2) {
998
+ return SIMD<double,N> (a.Lo()*b.Lo()-c.Lo(),
999
+ a.Hi()*b.Hi()+c.Hi());
1000
+ }
1001
+ else {
1002
+ return SIMD<double,N> (FMAddSub(a.Lo(), b.Lo(), c.Lo()),
1003
+ FMAddSub(a.Hi(), b.Hi(), c.Hi()));
1004
+ }
1005
+ }
1006
+
1007
+
1008
+
1009
+
1010
+ template <int BASE, typename Tuple, std::size_t ... Is>
1011
+ auto subtuple (const Tuple& tup, std::index_sequence<Is...>)
1012
+ {
1013
+ return std::make_tuple(std::get<BASE+Is>(tup)...);
1014
+ }
1015
+
1016
+ template <typename ...Args, typename T, int M>
1017
+ auto Concat (std::tuple<SIMD<T,M>, Args...> tup)
1018
+ {
1019
+ constexpr size_t N = std::tuple_size<std::tuple<SIMD<T,M>, Args...>>();
1020
+
1021
+ if constexpr (N == 1)
1022
+ return get<0>(tup);
1023
+ else
1024
+ {
1025
+ static constexpr size_t N1 = LargestPowerOfTwo(N-1);
1026
+ static constexpr int N2 = N-N1;
1027
+
1028
+ auto SEQ1 = std::make_index_sequence<N1>();
1029
+ auto sub1 = subtuple<0>(tup, SEQ1);
1030
+
1031
+ auto SEQ2 = std::make_index_sequence<N2>();
1032
+ auto sub2 = subtuple<N1>(tup, SEQ2);
1033
+
1034
+ auto S1 = Concat(sub1);
1035
+ auto S2 = Concat(sub2);
1036
+ return SIMD<T,S1.Size()+S2.Size()>(S1, S2);
1037
+ }
1038
+ }
1039
+
1040
+
1041
+
1042
+ }
1043
+
1044
+
1045
+ namespace std
1046
+ {
1047
+ // structured binding support
1048
+ template <typename T, int N >
1049
+ struct tuple_size<ngcore::SIMD<T,N>> : std::integral_constant<std::size_t, N> {};
1050
+ template<size_t N, typename T, int M> struct tuple_element<N,ngcore::SIMD<T,M>> { using type = T; };
1051
+ }
1052
+
1053
+ #endif // NETGEN_CORE_SIMD_GENERIC_HPP