netgen-mesher 6.2.2506.post35.dev0__cp314-cp314-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (340) hide show
  1. netgen/NgOCC.py +7 -0
  2. netgen/__init__.py +114 -0
  3. netgen/__init__.pyi +22 -0
  4. netgen/__main__.py +53 -0
  5. netgen/cmake/NetgenConfig.cmake +79 -0
  6. netgen/cmake/netgen-targets-release.cmake +69 -0
  7. netgen/cmake/netgen-targets.cmake +146 -0
  8. netgen/config/__init__.py +1 -0
  9. netgen/config/__init__.pyi +52 -0
  10. netgen/config/__main__.py +4 -0
  11. netgen/config/config.py +68 -0
  12. netgen/config/config.pyi +54 -0
  13. netgen/csg.py +25 -0
  14. netgen/geom2d.py +178 -0
  15. netgen/gui.py +82 -0
  16. netgen/include/core/archive.hpp +1256 -0
  17. netgen/include/core/array.hpp +1760 -0
  18. netgen/include/core/autodiff.hpp +1131 -0
  19. netgen/include/core/autodiffdiff.hpp +733 -0
  20. netgen/include/core/bitarray.hpp +240 -0
  21. netgen/include/core/concurrentqueue.h +3619 -0
  22. netgen/include/core/exception.hpp +145 -0
  23. netgen/include/core/flags.hpp +199 -0
  24. netgen/include/core/hashtable.hpp +1281 -0
  25. netgen/include/core/localheap.hpp +318 -0
  26. netgen/include/core/logging.hpp +117 -0
  27. netgen/include/core/memtracer.hpp +221 -0
  28. netgen/include/core/mpi4py_pycapi.h +245 -0
  29. netgen/include/core/mpi_wrapper.hpp +643 -0
  30. netgen/include/core/ng_mpi.hpp +94 -0
  31. netgen/include/core/ng_mpi_generated_declarations.hpp +155 -0
  32. netgen/include/core/ng_mpi_native.hpp +25 -0
  33. netgen/include/core/ngcore.hpp +32 -0
  34. netgen/include/core/ngcore_api.hpp +152 -0
  35. netgen/include/core/ngstream.hpp +115 -0
  36. netgen/include/core/paje_trace.hpp +279 -0
  37. netgen/include/core/profiler.hpp +382 -0
  38. netgen/include/core/python_ngcore.hpp +457 -0
  39. netgen/include/core/ranges.hpp +109 -0
  40. netgen/include/core/register_archive.hpp +100 -0
  41. netgen/include/core/signal.hpp +82 -0
  42. netgen/include/core/simd.hpp +160 -0
  43. netgen/include/core/simd_arm64.hpp +407 -0
  44. netgen/include/core/simd_avx.hpp +394 -0
  45. netgen/include/core/simd_avx512.hpp +285 -0
  46. netgen/include/core/simd_generic.hpp +1053 -0
  47. netgen/include/core/simd_math.hpp +178 -0
  48. netgen/include/core/simd_sse.hpp +289 -0
  49. netgen/include/core/statushandler.hpp +37 -0
  50. netgen/include/core/symboltable.hpp +153 -0
  51. netgen/include/core/table.hpp +810 -0
  52. netgen/include/core/taskmanager.hpp +1161 -0
  53. netgen/include/core/type_traits.hpp +65 -0
  54. netgen/include/core/utils.hpp +385 -0
  55. netgen/include/core/version.hpp +102 -0
  56. netgen/include/core/xbool.hpp +47 -0
  57. netgen/include/csg/algprim.hpp +563 -0
  58. netgen/include/csg/brick.hpp +150 -0
  59. netgen/include/csg/csg.hpp +43 -0
  60. netgen/include/csg/csgeom.hpp +389 -0
  61. netgen/include/csg/csgparser.hpp +101 -0
  62. netgen/include/csg/curve2d.hpp +67 -0
  63. netgen/include/csg/edgeflw.hpp +112 -0
  64. netgen/include/csg/explicitcurve2d.hpp +113 -0
  65. netgen/include/csg/extrusion.hpp +185 -0
  66. netgen/include/csg/gencyl.hpp +70 -0
  67. netgen/include/csg/geoml.hpp +16 -0
  68. netgen/include/csg/identify.hpp +213 -0
  69. netgen/include/csg/manifold.hpp +29 -0
  70. netgen/include/csg/meshsurf.hpp +46 -0
  71. netgen/include/csg/polyhedra.hpp +121 -0
  72. netgen/include/csg/revolution.hpp +180 -0
  73. netgen/include/csg/singularref.hpp +84 -0
  74. netgen/include/csg/solid.hpp +295 -0
  75. netgen/include/csg/specpoin.hpp +194 -0
  76. netgen/include/csg/spline3d.hpp +99 -0
  77. netgen/include/csg/splinesurface.hpp +85 -0
  78. netgen/include/csg/surface.hpp +394 -0
  79. netgen/include/csg/triapprox.hpp +63 -0
  80. netgen/include/csg/vscsg.hpp +34 -0
  81. netgen/include/general/autodiff.hpp +356 -0
  82. netgen/include/general/autoptr.hpp +39 -0
  83. netgen/include/general/gzstream.h +121 -0
  84. netgen/include/general/hashtabl.hpp +1692 -0
  85. netgen/include/general/myadt.hpp +48 -0
  86. netgen/include/general/mystring.hpp +226 -0
  87. netgen/include/general/netgenout.hpp +205 -0
  88. netgen/include/general/ngarray.hpp +797 -0
  89. netgen/include/general/ngbitarray.hpp +149 -0
  90. netgen/include/general/ngpython.hpp +74 -0
  91. netgen/include/general/optmem.hpp +44 -0
  92. netgen/include/general/parthreads.hpp +138 -0
  93. netgen/include/general/seti.hpp +50 -0
  94. netgen/include/general/sort.hpp +47 -0
  95. netgen/include/general/spbita2d.hpp +59 -0
  96. netgen/include/general/stack.hpp +114 -0
  97. netgen/include/general/table.hpp +280 -0
  98. netgen/include/general/template.hpp +509 -0
  99. netgen/include/geom2d/csg2d.hpp +750 -0
  100. netgen/include/geom2d/geometry2d.hpp +280 -0
  101. netgen/include/geom2d/spline2d.hpp +234 -0
  102. netgen/include/geom2d/vsgeom2d.hpp +28 -0
  103. netgen/include/gprim/adtree.hpp +1392 -0
  104. netgen/include/gprim/geom2d.hpp +858 -0
  105. netgen/include/gprim/geom3d.hpp +749 -0
  106. netgen/include/gprim/geomfuncs.hpp +212 -0
  107. netgen/include/gprim/geomobjects.hpp +544 -0
  108. netgen/include/gprim/geomops.hpp +404 -0
  109. netgen/include/gprim/geomtest3d.hpp +101 -0
  110. netgen/include/gprim/gprim.hpp +33 -0
  111. netgen/include/gprim/spline.hpp +778 -0
  112. netgen/include/gprim/splinegeometry.hpp +73 -0
  113. netgen/include/gprim/transform3d.hpp +216 -0
  114. netgen/include/include/acisgeom.hpp +3 -0
  115. netgen/include/include/csg.hpp +1 -0
  116. netgen/include/include/geometry2d.hpp +1 -0
  117. netgen/include/include/gprim.hpp +1 -0
  118. netgen/include/include/incopengl.hpp +62 -0
  119. netgen/include/include/inctcl.hpp +13 -0
  120. netgen/include/include/incvis.hpp +6 -0
  121. netgen/include/include/linalg.hpp +1 -0
  122. netgen/include/include/meshing.hpp +1 -0
  123. netgen/include/include/myadt.hpp +1 -0
  124. netgen/include/include/mydefs.hpp +70 -0
  125. netgen/include/include/mystdlib.h +59 -0
  126. netgen/include/include/netgen_config.hpp +27 -0
  127. netgen/include/include/netgen_version.hpp +9 -0
  128. netgen/include/include/nginterface_v2_impl.hpp +395 -0
  129. netgen/include/include/ngsimd.hpp +1 -0
  130. netgen/include/include/occgeom.hpp +1 -0
  131. netgen/include/include/opti.hpp +1 -0
  132. netgen/include/include/parallel.hpp +1 -0
  133. netgen/include/include/stlgeom.hpp +1 -0
  134. netgen/include/include/visual.hpp +1 -0
  135. netgen/include/interface/rw_medit.hpp +11 -0
  136. netgen/include/interface/writeuser.hpp +80 -0
  137. netgen/include/linalg/densemat.hpp +414 -0
  138. netgen/include/linalg/linalg.hpp +29 -0
  139. netgen/include/linalg/opti.hpp +142 -0
  140. netgen/include/linalg/polynomial.hpp +47 -0
  141. netgen/include/linalg/vector.hpp +217 -0
  142. netgen/include/meshing/adfront2.hpp +274 -0
  143. netgen/include/meshing/adfront3.hpp +332 -0
  144. netgen/include/meshing/basegeom.hpp +370 -0
  145. netgen/include/meshing/bcfunctions.hpp +53 -0
  146. netgen/include/meshing/bisect.hpp +72 -0
  147. netgen/include/meshing/boundarylayer.hpp +113 -0
  148. netgen/include/meshing/classifyhpel.hpp +1984 -0
  149. netgen/include/meshing/clusters.hpp +46 -0
  150. netgen/include/meshing/curvedelems.hpp +274 -0
  151. netgen/include/meshing/delaunay2d.hpp +73 -0
  152. netgen/include/meshing/fieldlines.hpp +103 -0
  153. netgen/include/meshing/findip.hpp +198 -0
  154. netgen/include/meshing/findip2.hpp +103 -0
  155. netgen/include/meshing/geomsearch.hpp +69 -0
  156. netgen/include/meshing/global.hpp +54 -0
  157. netgen/include/meshing/hpref_hex.hpp +330 -0
  158. netgen/include/meshing/hpref_prism.hpp +3405 -0
  159. netgen/include/meshing/hpref_pyramid.hpp +154 -0
  160. netgen/include/meshing/hpref_quad.hpp +2082 -0
  161. netgen/include/meshing/hpref_segm.hpp +122 -0
  162. netgen/include/meshing/hpref_tet.hpp +4230 -0
  163. netgen/include/meshing/hpref_trig.hpp +848 -0
  164. netgen/include/meshing/hprefinement.hpp +366 -0
  165. netgen/include/meshing/improve2.hpp +178 -0
  166. netgen/include/meshing/improve3.hpp +151 -0
  167. netgen/include/meshing/localh.hpp +223 -0
  168. netgen/include/meshing/meshclass.hpp +1076 -0
  169. netgen/include/meshing/meshfunc.hpp +47 -0
  170. netgen/include/meshing/meshing.hpp +63 -0
  171. netgen/include/meshing/meshing2.hpp +163 -0
  172. netgen/include/meshing/meshing3.hpp +123 -0
  173. netgen/include/meshing/meshtool.hpp +90 -0
  174. netgen/include/meshing/meshtype.hpp +1930 -0
  175. netgen/include/meshing/msghandler.hpp +62 -0
  176. netgen/include/meshing/paralleltop.hpp +172 -0
  177. netgen/include/meshing/python_mesh.hpp +206 -0
  178. netgen/include/meshing/ruler2.hpp +172 -0
  179. netgen/include/meshing/ruler3.hpp +211 -0
  180. netgen/include/meshing/soldata.hpp +141 -0
  181. netgen/include/meshing/specials.hpp +17 -0
  182. netgen/include/meshing/surfacegeom.hpp +73 -0
  183. netgen/include/meshing/topology.hpp +1003 -0
  184. netgen/include/meshing/validate.hpp +21 -0
  185. netgen/include/meshing/visual_interface.hpp +71 -0
  186. netgen/include/mydefs.hpp +70 -0
  187. netgen/include/nginterface.h +474 -0
  188. netgen/include/nginterface_v2.hpp +406 -0
  189. netgen/include/nglib.h +697 -0
  190. netgen/include/nglib_occ.h +50 -0
  191. netgen/include/occ/occ_edge.hpp +47 -0
  192. netgen/include/occ/occ_face.hpp +52 -0
  193. netgen/include/occ/occ_solid.hpp +23 -0
  194. netgen/include/occ/occ_utils.hpp +376 -0
  195. netgen/include/occ/occ_vertex.hpp +30 -0
  196. netgen/include/occ/occgeom.hpp +659 -0
  197. netgen/include/occ/occmeshsurf.hpp +168 -0
  198. netgen/include/occ/vsocc.hpp +33 -0
  199. netgen/include/pybind11/LICENSE +29 -0
  200. netgen/include/pybind11/attr.h +722 -0
  201. netgen/include/pybind11/buffer_info.h +208 -0
  202. netgen/include/pybind11/cast.h +2361 -0
  203. netgen/include/pybind11/chrono.h +228 -0
  204. netgen/include/pybind11/common.h +2 -0
  205. netgen/include/pybind11/complex.h +74 -0
  206. netgen/include/pybind11/conduit/README.txt +15 -0
  207. netgen/include/pybind11/conduit/pybind11_conduit_v1.h +116 -0
  208. netgen/include/pybind11/conduit/pybind11_platform_abi_id.h +87 -0
  209. netgen/include/pybind11/conduit/wrap_include_python_h.h +72 -0
  210. netgen/include/pybind11/critical_section.h +56 -0
  211. netgen/include/pybind11/detail/class.h +823 -0
  212. netgen/include/pybind11/detail/common.h +1348 -0
  213. netgen/include/pybind11/detail/cpp_conduit.h +75 -0
  214. netgen/include/pybind11/detail/descr.h +226 -0
  215. netgen/include/pybind11/detail/dynamic_raw_ptr_cast_if_possible.h +39 -0
  216. netgen/include/pybind11/detail/exception_translation.h +71 -0
  217. netgen/include/pybind11/detail/function_record_pyobject.h +191 -0
  218. netgen/include/pybind11/detail/init.h +538 -0
  219. netgen/include/pybind11/detail/internals.h +799 -0
  220. netgen/include/pybind11/detail/native_enum_data.h +209 -0
  221. netgen/include/pybind11/detail/pybind11_namespace_macros.h +82 -0
  222. netgen/include/pybind11/detail/struct_smart_holder.h +378 -0
  223. netgen/include/pybind11/detail/type_caster_base.h +1591 -0
  224. netgen/include/pybind11/detail/typeid.h +65 -0
  225. netgen/include/pybind11/detail/using_smart_holder.h +22 -0
  226. netgen/include/pybind11/detail/value_and_holder.h +90 -0
  227. netgen/include/pybind11/eigen/common.h +9 -0
  228. netgen/include/pybind11/eigen/matrix.h +723 -0
  229. netgen/include/pybind11/eigen/tensor.h +521 -0
  230. netgen/include/pybind11/eigen.h +12 -0
  231. netgen/include/pybind11/embed.h +320 -0
  232. netgen/include/pybind11/eval.h +161 -0
  233. netgen/include/pybind11/functional.h +147 -0
  234. netgen/include/pybind11/gil.h +199 -0
  235. netgen/include/pybind11/gil_safe_call_once.h +102 -0
  236. netgen/include/pybind11/gil_simple.h +37 -0
  237. netgen/include/pybind11/iostream.h +265 -0
  238. netgen/include/pybind11/native_enum.h +67 -0
  239. netgen/include/pybind11/numpy.h +2312 -0
  240. netgen/include/pybind11/operators.h +202 -0
  241. netgen/include/pybind11/options.h +92 -0
  242. netgen/include/pybind11/pybind11.h +3645 -0
  243. netgen/include/pybind11/pytypes.h +2680 -0
  244. netgen/include/pybind11/stl/filesystem.h +114 -0
  245. netgen/include/pybind11/stl.h +666 -0
  246. netgen/include/pybind11/stl_bind.h +858 -0
  247. netgen/include/pybind11/subinterpreter.h +299 -0
  248. netgen/include/pybind11/trampoline_self_life_support.h +65 -0
  249. netgen/include/pybind11/type_caster_pyobject_ptr.h +61 -0
  250. netgen/include/pybind11/typing.h +298 -0
  251. netgen/include/pybind11/warnings.h +75 -0
  252. netgen/include/stlgeom/meshstlsurface.hpp +67 -0
  253. netgen/include/stlgeom/stlgeom.hpp +491 -0
  254. netgen/include/stlgeom/stlline.hpp +193 -0
  255. netgen/include/stlgeom/stltool.hpp +331 -0
  256. netgen/include/stlgeom/stltopology.hpp +419 -0
  257. netgen/include/stlgeom/vsstl.hpp +58 -0
  258. netgen/include/visualization/meshdoc.hpp +42 -0
  259. netgen/include/visualization/mvdraw.hpp +325 -0
  260. netgen/include/visualization/vispar.hpp +128 -0
  261. netgen/include/visualization/visual.hpp +28 -0
  262. netgen/include/visualization/visual_api.hpp +10 -0
  263. netgen/include/visualization/vssolution.hpp +399 -0
  264. netgen/lib/libnggui.lib +0 -0
  265. netgen/lib/ngcore.lib +0 -0
  266. netgen/lib/nglib.lib +0 -0
  267. netgen/lib/togl.lib +0 -0
  268. netgen/libnggui.dll +0 -0
  269. netgen/libngguipy.lib +0 -0
  270. netgen/libngguipy.pyd +0 -0
  271. netgen/libngpy/_NgOCC.pyi +1545 -0
  272. netgen/libngpy/__init__.pyi +7 -0
  273. netgen/libngpy/_csg.pyi +259 -0
  274. netgen/libngpy/_geom2d.pyi +323 -0
  275. netgen/libngpy/_meshing.pyi +1111 -0
  276. netgen/libngpy/_stl.pyi +131 -0
  277. netgen/libngpy.lib +0 -0
  278. netgen/libngpy.pyd +0 -0
  279. netgen/meshing.py +65 -0
  280. netgen/ngcore.dll +0 -0
  281. netgen/nglib.dll +0 -0
  282. netgen/occ.py +52 -0
  283. netgen/read_gmsh.py +259 -0
  284. netgen/read_meshio.py +22 -0
  285. netgen/stl.py +2 -0
  286. netgen/togl.dll +0 -0
  287. netgen/version.py +2 -0
  288. netgen/webgui.py +529 -0
  289. netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/boundarycondition.geo +16 -0
  290. netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/boxcyl.geo +32 -0
  291. netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/circle_on_cube.geo +27 -0
  292. netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/cone.geo +13 -0
  293. netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/cube.geo +16 -0
  294. netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/cubeandring.geo +55 -0
  295. netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/cubeandspheres.geo +21 -0
  296. netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/cubemcyl.geo +18 -0
  297. netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/cubemsphere.geo +19 -0
  298. netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/cylinder.geo +12 -0
  299. netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/cylsphere.geo +12 -0
  300. netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/doc/ng4.pdf +0 -0
  301. netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/ellipsoid.geo +8 -0
  302. netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/ellipticcyl.geo +10 -0
  303. netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/extrusion.geo +99 -0
  304. netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/fichera.geo +24 -0
  305. netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/frame.step +11683 -0
  306. netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/hinge.stl +8486 -0
  307. netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/lshape3d.geo +26 -0
  308. netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/manyholes.geo +26 -0
  309. netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/manyholes2.geo +26 -0
  310. netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/matrix.geo +27 -0
  311. netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/ortho.geo +11 -0
  312. netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/part1.stl +2662 -0
  313. netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/period.geo +33 -0
  314. netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/py_tutorials/exportNeutral.py +26 -0
  315. netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/py_tutorials/mesh.py +19 -0
  316. netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/py_tutorials/shaft.geo +65 -0
  317. netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/revolution.geo +18 -0
  318. netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/screw.step +1694 -0
  319. netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/sculpture.geo +13 -0
  320. netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/shaft.geo +65 -0
  321. netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/shell.geo +10 -0
  322. netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/sphere.geo +8 -0
  323. netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/sphereincube.geo +17 -0
  324. netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/square.in2d +35 -0
  325. netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/squarecircle.in2d +48 -0
  326. netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/squarehole.in2d +47 -0
  327. netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/torus.geo +8 -0
  328. netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/trafo.geo +57 -0
  329. netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/twobricks.geo +15 -0
  330. netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/twocubes.geo +18 -0
  331. netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/twocyl.geo +16 -0
  332. netgen_mesher-6.2.2506.post35.dev0.dist-info/METADATA +15 -0
  333. netgen_mesher-6.2.2506.post35.dev0.dist-info/RECORD +340 -0
  334. netgen_mesher-6.2.2506.post35.dev0.dist-info/WHEEL +5 -0
  335. netgen_mesher-6.2.2506.post35.dev0.dist-info/entry_points.txt +2 -0
  336. netgen_mesher-6.2.2506.post35.dev0.dist-info/licenses/AUTHORS +1 -0
  337. netgen_mesher-6.2.2506.post35.dev0.dist-info/licenses/LICENSE +504 -0
  338. netgen_mesher-6.2.2506.post35.dev0.dist-info/top_level.txt +2 -0
  339. pyngcore/__init__.py +1 -0
  340. pyngcore/pyngcore.cp314-win_amd64.pyd +0 -0
@@ -0,0 +1,1161 @@
1
+ #ifndef NETGEN_CORE_TASKMANAGER_HPP
2
+ #define NETGEN_CORE_TASKMANAGER_HPP
3
+
4
+ /*********************************************************************/
5
+ /* File: taskmanager.hpp */
6
+ /* Author: M. Hochsterger, J. Schoeberl */
7
+ /* Date: 10. Mar. 2015 */
8
+ /*********************************************************************/
9
+
10
+ #include <atomic>
11
+ #include <functional>
12
+ #include <list>
13
+ #include <cmath>
14
+ #include <ostream>
15
+ #include <thread>
16
+
17
+ #include "array.hpp"
18
+ #include "paje_trace.hpp"
19
+ #include "taskmanager.hpp"
20
+
21
+ #ifdef USE_NUMA
22
+ #include <numa.h>
23
+ #include <sched.h>
24
+ #endif
25
+
26
+
27
+ namespace ngcore
28
+ {
29
+ using std::atomic;
30
+ using std::function;
31
+
32
+ class TaskInfo
33
+ {
34
+ public:
35
+ int task_nr;
36
+ int ntasks;
37
+
38
+ int thread_nr;
39
+ int nthreads;
40
+
41
+ // int node_nr;
42
+ // int nnodes;
43
+ };
44
+
45
+ NGCORE_API extern class TaskManager * task_manager;
46
+
47
+ class TaskManager
48
+ {
49
+ // PajeTrace *trace;
50
+
51
+ class alignas(64) NodeData
52
+ {
53
+ public:
54
+ atomic<int> start_cnt{0};
55
+ atomic<int> participate{0};
56
+ };
57
+
58
+ NGCORE_API static const function<void(TaskInfo&)> * func;
59
+ NGCORE_API static const function<void()> * startup_function;
60
+ NGCORE_API static const function<void()> * cleanup_function;
61
+ NGCORE_API static atomic<int> ntasks;
62
+ NGCORE_API static Exception * ex;
63
+
64
+ NGCORE_API static atomic<int> jobnr;
65
+
66
+ static atomic<int> complete[8]; // max nodes
67
+ static atomic<int> done;
68
+ static atomic<int> active_workers;
69
+ static atomic<int> workers_on_node[8]; // max nodes
70
+ // Array<atomic<int>*> sync;
71
+ NGCORE_API static int sleep_usecs;
72
+ NGCORE_API static bool sleep;
73
+
74
+ static NodeData *nodedata[8];
75
+
76
+ static int num_nodes;
77
+ NGCORE_API static int num_threads;
78
+ NGCORE_API static int max_threads;
79
+
80
+
81
+
82
+ #ifdef WIN32 // no exported thread_local in dlls on Windows
83
+ static thread_local int thread_id;
84
+ #else
85
+ NGCORE_API static thread_local int thread_id;
86
+ #endif
87
+ NGCORE_API static bool use_paje_trace;
88
+ public:
89
+
90
+ NGCORE_API TaskManager();
91
+ NGCORE_API ~TaskManager();
92
+
93
+
94
+ NGCORE_API void StartWorkers();
95
+ NGCORE_API void StopWorkers();
96
+
97
+ bool IsSleeping() const { return sleep; }
98
+
99
+ int SuspendWorkers(int asleep_usecs = 1000 )
100
+ {
101
+ int old_sleep_usecs = sleep_usecs;
102
+ sleep_usecs = asleep_usecs;
103
+ sleep = true;
104
+ return old_sleep_usecs;
105
+ }
106
+ void ResumeWorkers() { sleep = false; }
107
+
108
+ NGCORE_API static void SetNumThreads(int amax_threads);
109
+ static int GetMaxThreads() { return max_threads; }
110
+ // static int GetNumThreads() { return task_manager ? task_manager->num_threads : 1; }
111
+ static int GetNumThreads() { return num_threads; }
112
+ #ifdef WIN32
113
+ NGCORE_API static int GetThreadId();
114
+ #else
115
+ static int GetThreadId() { return thread_id; }
116
+ #endif
117
+ int GetNumNodes() const { return num_nodes; }
118
+
119
+ static void SetPajeTrace (bool use) { use_paje_trace = use; }
120
+
121
+ NGCORE_API static bool ProcessTask();
122
+
123
+ NGCORE_API static void CreateJob (const function<void(TaskInfo&)> & afunc,
124
+ int antasks = task_manager->GetNumThreads());
125
+
126
+ static void SetStartupFunction (const function<void()> & func) { startup_function = &func; }
127
+ static void SetStartupFunction () { startup_function = nullptr; }
128
+ static void SetCleanupFunction (const function<void()> & func) { cleanup_function = &func; }
129
+ static void SetCleanupFunction () { cleanup_function = nullptr; }
130
+
131
+ void Done() { done = true; }
132
+ NGCORE_API void Loop(int thread_num);
133
+
134
+ NGCORE_API static std::list<std::tuple<std::string,double>> Timing ();
135
+ };
136
+
137
+
138
+
139
+
140
+
141
+
142
+
143
+
144
+
145
+ NGCORE_API void RunWithTaskManager (function<void()> alg);
146
+
147
+ // For Python context manager
148
+ NGCORE_API int EnterTaskManager ();
149
+ NGCORE_API void ExitTaskManager (int num_threads);
150
+
151
+ class RegionTaskManager
152
+ {
153
+ int nthreads_before;
154
+ int nthreads;
155
+ bool started_taskmanager;
156
+
157
+ public:
158
+ RegionTaskManager(int anthreads=TaskManager::GetMaxThreads())
159
+ : nthreads(anthreads)
160
+ {
161
+ if(task_manager || nthreads==0)
162
+ {
163
+ // already running, no need to do anything
164
+ started_taskmanager = false;
165
+ return;
166
+ }
167
+ else
168
+ {
169
+ nthreads_before = TaskManager::GetMaxThreads();
170
+ TaskManager::SetNumThreads(nthreads);
171
+ nthreads = EnterTaskManager();
172
+ started_taskmanager = true;
173
+ }
174
+ }
175
+
176
+ ~RegionTaskManager()
177
+ {
178
+ if(started_taskmanager)
179
+ {
180
+ ExitTaskManager(nthreads);
181
+ TaskManager::SetNumThreads(nthreads_before);
182
+ }
183
+ }
184
+ };
185
+
186
+ class SuspendTaskManager
187
+ {
188
+ int old_sleep_usecs = 0;
189
+ bool old_sleep = false;
190
+ TaskManager * tm = nullptr;
191
+
192
+ public:
193
+ SuspendTaskManager(int asleep_usecs=1000)
194
+ : tm(task_manager)
195
+ {
196
+ if(!tm)
197
+ return;
198
+
199
+ old_sleep = tm->IsSleeping();
200
+ old_sleep_usecs = tm->SuspendWorkers(asleep_usecs);
201
+ }
202
+
203
+ ~SuspendTaskManager()
204
+ {
205
+ if(!tm)
206
+ return;
207
+
208
+ if(old_sleep) // restore old sleep time
209
+ tm->SuspendWorkers(old_sleep_usecs);
210
+ else
211
+ tm->ResumeWorkers();
212
+ }
213
+ };
214
+
215
+ NETGEN_INLINE int TasksPerThread (int tpt)
216
+ {
217
+ // return task_manager ? tpt*task_manager->GetNumThreads() : 1;
218
+ return tpt*TaskManager::GetNumThreads();
219
+ }
220
+
221
+
222
+ class TotalCosts
223
+ {
224
+ size_t cost;
225
+ public:
226
+ TotalCosts (size_t _cost) : cost(_cost) { ; }
227
+ size_t operator ()() { return cost; }
228
+ };
229
+
230
+ template <typename TR, typename TFUNC>
231
+ NETGEN_INLINE void ParallelFor (T_Range<TR> r, TFUNC f,
232
+ int antasks = TaskManager::GetNumThreads(),
233
+ TotalCosts costs = 1000)
234
+ {
235
+ // if (task_manager && costs() >= 1000)
236
+
237
+ TaskManager::CreateJob
238
+ ([r, f] (TaskInfo & ti)
239
+ {
240
+ auto myrange = r.Split (ti.task_nr, ti.ntasks);
241
+ for (auto i : myrange) f(i);
242
+ },
243
+ antasks);
244
+
245
+ /*
246
+ else
247
+ for (auto i : r) f(i);
248
+ */
249
+ }
250
+
251
+ /*
252
+ template <typename TFUNC>
253
+ NETGEN_INLINE void ParallelFor (size_t n, TFUNC f,
254
+ int antasks = task_manager ? task_manager->GetNumThreads() : 0)
255
+ {
256
+ ParallelFor (IntRange (n), f, antasks);
257
+ }
258
+ */
259
+ template <typename ...Args>
260
+ NETGEN_INLINE void ParallelFor (size_t n, Args...args)
261
+ {
262
+ ParallelFor (IntRange (n), args...);
263
+ }
264
+
265
+ template <typename TR, typename TFUNC>
266
+ NETGEN_INLINE void ParallelForRange (T_Range<TR> r, TFUNC f,
267
+ int antasks = TaskManager::GetNumThreads(),
268
+ TotalCosts costs = 1000)
269
+ {
270
+ // if (task_manager && costs() >= 1000)
271
+
272
+ TaskManager::CreateJob
273
+ ([r, f] (TaskInfo & ti)
274
+ {
275
+ auto myrange = r.Split (ti.task_nr, ti.ntasks);
276
+ f(myrange);
277
+ },
278
+ antasks);
279
+ /*
280
+ else
281
+ f(r);
282
+ */
283
+ }
284
+
285
+ /*
286
+ template <typename TFUNC>
287
+ NETGEN_INLINE void ParallelForRange (size_t n, TFUNC f,
288
+ int antasks = task_manager ? task_manager->GetNumThreads() : 0)
289
+ {
290
+ ParallelForRange (IntRange(n), f, antasks);
291
+ }
292
+ */
293
+ template <typename ...Args>
294
+ NETGEN_INLINE void ParallelForRange (size_t n, Args...args)
295
+ {
296
+ ParallelForRange (IntRange(n), args...);
297
+ }
298
+
299
+ template <typename TFUNC>
300
+ NETGEN_INLINE void ParallelJob (TFUNC f,
301
+ int antasks = TaskManager::GetNumThreads())
302
+ {
303
+ TaskManager::CreateJob (f, antasks);
304
+ }
305
+
306
+
307
+ /*
308
+ Usage example:
309
+
310
+ ShareLoop myloop(100);
311
+ task_manager->CreateJob ([]()
312
+ {
313
+ for (int i : myloop)
314
+ cout << "i = " << i << endl;
315
+ });
316
+
317
+ */
318
+
319
+ class SharedLoop
320
+ {
321
+ atomic<int> cnt;
322
+ IntRange r;
323
+
324
+
325
+ class SharedIterator
326
+ {
327
+ atomic<int> & cnt;
328
+ int myval;
329
+ int endval;
330
+ public:
331
+ SharedIterator (atomic<int> & acnt, int aendval, bool begin_iterator)
332
+ : cnt (acnt)
333
+ {
334
+ endval = aendval;
335
+ myval = begin_iterator ? cnt++ : endval;
336
+ if (myval > endval) myval = endval;
337
+ }
338
+
339
+ SharedIterator & operator++ ()
340
+ {
341
+ myval = cnt++;
342
+ if (myval > endval) myval = endval;
343
+ return *this;
344
+ }
345
+
346
+ int operator* () const { return myval; }
347
+ bool operator!= (const SharedIterator & it2) const { return myval != it2.myval; }
348
+ };
349
+
350
+
351
+ public:
352
+ SharedLoop (IntRange ar) : r(ar) { cnt = r.begin(); }
353
+ SharedLoop (size_t s) : SharedLoop (IntRange{s}) { ; }
354
+ SharedIterator begin() { return SharedIterator (cnt, r.end(), true); }
355
+ SharedIterator end() { return SharedIterator (cnt, r.end(), false); }
356
+ };
357
+
358
+
359
+ /*
360
+ class alignas(4096) AtomicRange
361
+ {
362
+ mutex lock;
363
+ int begin;
364
+ int end;
365
+ public:
366
+
367
+ void Set (IntRange r)
368
+ {
369
+ lock_guard<mutex> guard(lock);
370
+ begin = r.begin();
371
+ end = r.end();
372
+ }
373
+
374
+ IntRange Get()
375
+ {
376
+ lock_guard<mutex> guard(lock);
377
+ return IntRange(begin, end);
378
+ }
379
+
380
+ bool PopFirst (int & first)
381
+ {
382
+ lock_guard<mutex> guard(lock);
383
+ bool non_empty = end > begin;
384
+ first = begin;
385
+ if (non_empty) begin++;
386
+ return non_empty;
387
+ }
388
+
389
+ bool PopHalf (IntRange & r)
390
+ {
391
+ lock_guard<mutex> guard(lock);
392
+ bool non_empty = end > begin;
393
+ if (non_empty)
394
+ {
395
+ int mid = (begin+end+1)/2;
396
+ r = IntRange(begin, mid);
397
+ begin = mid;
398
+ }
399
+ return non_empty;
400
+ }
401
+ };
402
+ */
403
+
404
+
405
+
406
+ // lock free popfirst
407
+ // faster for large loops, bug slower for small loops (~1000) ????
408
+ /*
409
+ class alignas(4096) AtomicRange
410
+ {
411
+ mutex lock;
412
+ atomic<int> begin;
413
+ int end;
414
+ public:
415
+
416
+ void Set (IntRange r)
417
+ {
418
+ lock_guard<mutex> guard(lock);
419
+ // begin = r.begin();
420
+ begin.store(r.begin(), std::memory_order_relaxed);
421
+ end = r.end();
422
+ }
423
+
424
+ void SetNoLock (IntRange r)
425
+ {
426
+ begin.store(r.begin(), std::memory_order_relaxed);
427
+ end = r.end();
428
+ }
429
+
430
+ // IntRange Get()
431
+ // {
432
+ // lock_guard<mutex> guard(lock);
433
+ // return IntRange(begin, end);
434
+ // }
435
+
436
+ bool PopFirst (int & first)
437
+ {
438
+ // int oldbegin = begin;
439
+ int oldbegin = begin.load(std::memory_order_relaxed);
440
+ if (oldbegin >= end) return false;
441
+ while (!begin.compare_exchange_weak (oldbegin, oldbegin+1,
442
+ std::memory_order_relaxed, std::memory_order_relaxed))
443
+ if (oldbegin >= end) return false;
444
+
445
+ first = oldbegin;
446
+ return true;
447
+ }
448
+
449
+ bool PopHalf (IntRange & r)
450
+ {
451
+ // int oldbegin = begin;
452
+ int oldbegin = begin.load(std::memory_order_relaxed);
453
+ if (oldbegin >= end) return false;
454
+
455
+ lock_guard<mutex> guard(lock);
456
+ while (!begin.compare_exchange_weak (oldbegin, (oldbegin+end+1)/2,
457
+ std::memory_order_relaxed, std::memory_order_relaxed))
458
+ if (oldbegin >= end) return false;
459
+
460
+ r = IntRange(oldbegin, (oldbegin+end+1)/2);
461
+ return true;
462
+ }
463
+ };
464
+
465
+
466
+ // inline ostream & operator<< (ostream & ost, AtomicRange & r)
467
+ // {
468
+ // ost << r.Get();
469
+ // return ost;
470
+ // }
471
+ */
472
+
473
+
474
+
475
+ class alignas(4096) AtomicRange
476
+ {
477
+ atomic<size_t> begin;
478
+ atomic<size_t> end;
479
+ public:
480
+
481
+ void Set (IntRange r)
482
+ {
483
+ begin.store(std::numeric_limits<size_t>::max(), std::memory_order_release);
484
+ end.store(r.end(), std::memory_order_release);
485
+ begin.store(r.begin(), std::memory_order_release);
486
+ }
487
+
488
+ void SetNoLock (IntRange r)
489
+ {
490
+ end.store(r.end(), std::memory_order_release);
491
+ begin.store(r.begin(), std::memory_order_release);
492
+ }
493
+
494
+ // IntRange Get()
495
+ // {
496
+ // lock_guard<mutex> guard(lock);
497
+ // return IntRange(begin, end);
498
+ // }
499
+
500
+ bool PopFirst (size_t & hfirst)
501
+ {
502
+ // first = begin++;
503
+ // return first < end;
504
+
505
+ size_t first = begin.load(std::memory_order_relaxed);
506
+
507
+ size_t nextfirst = first+1;
508
+ if (first >= end) nextfirst = std::numeric_limits<size_t>::max()-1;
509
+
510
+ // while (!begin.compare_exchange_weak (first, nextfirst))
511
+ while (!begin.compare_exchange_weak (first, nextfirst,
512
+ std::memory_order_relaxed,
513
+ std::memory_order_relaxed))
514
+ {
515
+ first = begin;
516
+ nextfirst = first+1;
517
+ if (nextfirst >= end) nextfirst = std::numeric_limits<size_t>::max()-1;
518
+ }
519
+ hfirst = first;
520
+ return first < end;
521
+ }
522
+
523
+ bool PopHalf (IntRange & r)
524
+ {
525
+ /*
526
+ // int oldbegin = begin;
527
+ size_t oldbegin = begin.load(std::memory_order_acquire);
528
+ size_t oldend = end.load(std::memory_order_acquire);
529
+ if (oldbegin >= oldend) return false;
530
+
531
+ // lock_guard<mutex> guard(lock);
532
+ while (!begin.compare_exchange_weak (oldbegin, (oldbegin+oldend+1)/2,
533
+ std::memory_order_relaxed, std::memory_order_relaxed))
534
+ {
535
+ oldend = end.load(std::memory_order_acquire);
536
+ if (oldbegin >= oldend) return false;
537
+ }
538
+
539
+ r = IntRange(oldbegin, (oldbegin+oldend+1)/2);
540
+ return true;
541
+ */
542
+
543
+
544
+ size_t oldbegin = begin; // .load(std::memory_order_acquire);
545
+ size_t oldend = end; // .load(std::memory_order_acquire);
546
+ if (oldbegin >= oldend) return false;
547
+
548
+ size_t nextbegin = (oldbegin+oldend+1)/2;
549
+ if (nextbegin >= oldend) nextbegin = std::numeric_limits<size_t>::max()-1;
550
+
551
+ while (!begin.compare_exchange_weak (oldbegin, nextbegin))
552
+ // std::memory_order_relaxed, std::memory_order_relaxed))
553
+ {
554
+ oldend = end; // .load(std::memory_order_acquire);
555
+ if (oldbegin >= oldend) return false;
556
+
557
+ nextbegin = (oldbegin+oldend+1)/2;
558
+ if (nextbegin >= oldend) nextbegin = std::numeric_limits<size_t>::max()-1;
559
+ }
560
+
561
+ r = IntRange(oldbegin, (oldbegin+oldend+1)/2);
562
+ return true;
563
+ }
564
+ };
565
+
566
+
567
+
568
+
569
+ class SharedLoop2
570
+ {
571
+ Array<AtomicRange> ranges;
572
+ atomic<size_t> processed;
573
+ atomic<size_t> total;
574
+ atomic<int> participants;
575
+
576
+ class SharedIterator
577
+ {
578
+ FlatArray<AtomicRange> ranges;
579
+ atomic<size_t> & processed;
580
+ size_t total;
581
+ size_t myval;
582
+ size_t processed_by_me = 0;
583
+ int me;
584
+ int steal_from;
585
+ public:
586
+ SharedIterator (FlatArray<AtomicRange> _ranges, atomic<size_t> & _processed, size_t _total,
587
+ int _me, bool begin_it)
588
+ : ranges(_ranges), processed(_processed), total(_total)
589
+ {
590
+ if (begin_it)
591
+ {
592
+ // me = TaskManager::GetThreadId();
593
+ me = _me;
594
+ steal_from = me;
595
+ GetNext();
596
+ }
597
+ }
598
+ ~SharedIterator()
599
+ {
600
+ if (processed_by_me)
601
+ processed += processed_by_me;
602
+ }
603
+
604
+ SharedIterator & operator++ () { GetNext(); return *this;}
605
+
606
+ void GetNext()
607
+ {
608
+ size_t nr;
609
+ if (ranges[me].PopFirst(nr))
610
+ {
611
+ processed_by_me++;
612
+ myval = nr;
613
+ return;
614
+ }
615
+ GetNext2();
616
+ }
617
+
618
+ void GetNext2()
619
+ {
620
+ processed += processed_by_me;
621
+ processed_by_me = 0;
622
+
623
+ // done with my work, going to steal ...
624
+ while (1)
625
+ {
626
+ if (processed >= total) return;
627
+
628
+ steal_from++;
629
+ if (steal_from == ranges.Size()) steal_from = 0;
630
+
631
+ // steal half of the work reserved for 'from':
632
+ IntRange steal;
633
+ if (ranges[steal_from].PopHalf(steal))
634
+ {
635
+ myval = steal.First();
636
+ processed_by_me++;
637
+ if (myval+1 < steal.Next())
638
+ ranges[me].Set (IntRange(myval+1, steal.Next()));
639
+ return;
640
+ }
641
+ }
642
+ }
643
+
644
+ size_t operator* () const { return myval; }
645
+ bool operator!= (const SharedIterator & it2) const { return processed < total; }
646
+ };
647
+
648
+
649
+ public:
650
+ SharedLoop2 ()
651
+ : ranges(TaskManager::GetNumThreads())
652
+ { ; }
653
+
654
+ SharedLoop2 (IntRange r)
655
+ : ranges(TaskManager::GetNumThreads())
656
+ {
657
+ Reset (r);
658
+ }
659
+
660
+ SharedLoop2 (size_t s) : SharedLoop2 (IntRange{s}) { }
661
+
662
+ void Reset (IntRange r)
663
+ {
664
+ for (size_t i = 0; i < ranges.Size(); i++)
665
+ ranges[i].SetNoLock (r.Split(i,ranges.Size()));
666
+
667
+ total.store(r.Size(), std::memory_order_relaxed);
668
+ participants.store(0, std::memory_order_relaxed);
669
+ processed.store(0, std::memory_order_release);
670
+ }
671
+
672
+ void Reset (size_t s) { Reset(IntRange{s}); }
673
+
674
+
675
+ SharedIterator begin()
676
+ {
677
+ /*
678
+ int me = participants++;
679
+ if (me < ranges.Size())
680
+ return SharedIterator (ranges, processed, total, me, true);
681
+ else
682
+ // more participants than buckets. set processed to total, and the loop is terminated immediately
683
+ return SharedIterator (ranges, total, total, me, true);
684
+ */
685
+ return SharedIterator (ranges, processed, total, TaskManager::GetThreadId(), true);
686
+ }
687
+
688
+ SharedIterator end() { return SharedIterator (ranges, processed, total, -1, false); }
689
+ };
690
+
691
+
692
+
693
+
694
+
695
+ class Partitioning
696
+ {
697
+ Array<size_t> part;
698
+ size_t total_costs;
699
+ public:
700
+ Partitioning () { ; }
701
+
702
+ template <typename T>
703
+ Partitioning (const Array<T> & apart) { part = apart; }
704
+
705
+ template <typename T>
706
+ Partitioning & operator= (const Array<T> & apart) { part = apart; return *this; }
707
+
708
+ size_t GetTotalCosts() const { return total_costs; }
709
+
710
+ template <typename TFUNC>
711
+ void Calc (size_t n, TFUNC costs, int size = task_manager ? task_manager->GetNumThreads() : 1)
712
+ {
713
+ Array<size_t> prefix (n);
714
+
715
+ /*
716
+ size_t sum = 0;
717
+ for (auto i : ngstd::Range(n))
718
+ {
719
+ sum += costs(i);
720
+ prefix[i] = sum;
721
+ }
722
+ total_costs = sum;
723
+ */
724
+
725
+ Array<size_t> partial_sums(TaskManager::GetNumThreads()+1);
726
+ partial_sums[0] = 0;
727
+ ParallelJob
728
+ ([&] (TaskInfo ti)
729
+ {
730
+ IntRange r = IntRange(n).Split(ti.task_nr, ti.ntasks);
731
+ size_t mysum = 0;
732
+ for (size_t i : r)
733
+ {
734
+ size_t c = costs(i);
735
+ mysum += c;
736
+ prefix[i] = c;
737
+ }
738
+ partial_sums[ti.task_nr+1] = mysum;
739
+ });
740
+
741
+ for (size_t i = 1; i < partial_sums.Size(); i++)
742
+ partial_sums[i] += partial_sums[i-1];
743
+ total_costs = partial_sums.Last();
744
+
745
+ ParallelJob
746
+ ([&] (TaskInfo ti)
747
+ {
748
+ IntRange r = IntRange(n).Split(ti.task_nr, ti.ntasks);
749
+ size_t mysum = partial_sums[ti.task_nr];
750
+ for (size_t i : r)
751
+ {
752
+ mysum += prefix[i];
753
+ prefix[i] = mysum;
754
+ }
755
+ });
756
+
757
+
758
+ part.SetSize (size+1);
759
+ part[0] = 0;
760
+
761
+ for (int i = 1; i <= size; i++)
762
+ part[i] = BinSearch (prefix, total_costs*i/size);
763
+ }
764
+
765
+ size_t Size() const { return part.Size()-1; }
766
+ IntRange operator[] (size_t i) const { return ngcore::Range(part[i], part[i+1]); }
767
+ IntRange Range() const { return ngcore::Range(part[0], part[Size()]); }
768
+
769
+
770
+
771
+
772
+ private:
773
+ template <typename Tarray>
774
+ int BinSearch(const Tarray & v, size_t i) {
775
+ int n = v.Size();
776
+ if (n == 0) return 0;
777
+
778
+ int first = 0;
779
+ int last = n-1;
780
+ if(v[0]>i) return 0;
781
+ if(v[n-1] <= i) return n;
782
+ while(last-first>1) {
783
+ int m = (first+last)/2;
784
+ if(v[m]<i)
785
+ first = m;
786
+ else
787
+ last = m;
788
+ }
789
+ return first;
790
+ }
791
+ };
792
+
793
+
794
+ inline std::ostream & operator<< (std::ostream & ost, const Partitioning & part)
795
+ {
796
+ for (int i : Range(part.Size()))
797
+ ost << part[i] << " ";
798
+ return ost;
799
+ }
800
+
801
+
802
+ // tasks must be a multiple of part.size
803
+ template <typename TFUNC>
804
+ NETGEN_INLINE void ParallelFor (const Partitioning & part, TFUNC f, int tasks_per_thread = 1)
805
+ {
806
+ if (task_manager)
807
+ {
808
+ int ntasks = tasks_per_thread * task_manager->GetNumThreads();
809
+ if (ntasks % part.Size() != 0)
810
+ throw Exception ("tasks must be a multiple of part.size");
811
+
812
+ task_manager -> CreateJob
813
+ ([&] (TaskInfo & ti)
814
+ {
815
+ int tasks_per_part = ti.ntasks / part.Size();
816
+ int mypart = ti.task_nr / tasks_per_part;
817
+ int num_in_part = ti.task_nr % tasks_per_part;
818
+
819
+ auto myrange = part[mypart].Split (num_in_part, tasks_per_part);
820
+ for (auto i : myrange) f(i);
821
+ }, ntasks);
822
+ }
823
+ else
824
+ {
825
+ for (auto i : part.Range())
826
+ f(i);
827
+ }
828
+ }
829
+
830
+
831
+
832
+
833
+
834
+ template <typename TFUNC>
835
+ NETGEN_INLINE void ParallelForRange (const Partitioning & part, TFUNC f,
836
+ int tasks_per_thread = 1, TotalCosts costs = 1000)
837
+ {
838
+ if (task_manager && costs() >= 1000)
839
+ {
840
+ int ntasks = tasks_per_thread * task_manager->GetNumThreads();
841
+ if (ntasks % part.Size() != 0)
842
+ throw Exception ("tasks must be a multiple of part.size");
843
+
844
+ task_manager -> CreateJob
845
+ ([&] (TaskInfo & ti)
846
+ {
847
+ int tasks_per_part = ti.ntasks / part.Size();
848
+ int mypart = ti.task_nr / tasks_per_part;
849
+ int num_in_part = ti.task_nr % tasks_per_part;
850
+
851
+ auto myrange = part[mypart].Split (num_in_part, tasks_per_part);
852
+ f(myrange);
853
+ }, ntasks);
854
+ }
855
+ else
856
+ {
857
+ f(part.Range());
858
+ }
859
+ }
860
+
861
+
862
+
863
+
864
+
865
+ template <typename FUNC, typename OP, typename T>
866
+ auto ParallelReduce (size_t n, FUNC f, OP op, T initial1)
867
+ {
868
+ typedef decltype (op(initial1,initial1)) TRES;
869
+ TRES initial(initial1);
870
+ /*
871
+ for (size_t i = 0; i < n; i++)
872
+ initial = op(initial, f(i));
873
+ */
874
+ Array<TRES> part_reduce(TaskManager::GetNumThreads());
875
+ ParallelJob ([&] (TaskInfo ti)
876
+ {
877
+ auto r = Range(n).Split(ti.task_nr, ti.ntasks);
878
+ auto var = initial;
879
+ for (auto i : r)
880
+ var = op(var, f(i));
881
+ part_reduce[ti.task_nr] = var;
882
+ });
883
+ for (auto v : part_reduce)
884
+ initial = op(initial, v);
885
+ return initial;
886
+ }
887
+
888
+
889
+
890
+
891
+
892
+
893
+
894
+ // // some suggar for working with arrays
895
+ //
896
+ // template <typename T> template <typename T2>
897
+ // const FlatArray<T> FlatArray<T>::operator= (ParallelValue<T2> val)
898
+ // {
899
+ // ParallelForRange (Size(),
900
+ // [this, val] (IntRange r)
901
+ // {
902
+ // for (auto i : r)
903
+ // (*this)[i] = val;
904
+ // });
905
+ // return *this;
906
+ // }
907
+ //
908
+ // template <typename T> template <typename T2>
909
+ // const FlatArray<T> FlatArray<T>::operator= (ParallelFunction<T2> func)
910
+ // {
911
+ // ParallelForRange (Size(),
912
+ // [this, func] (IntRange r)
913
+ // {
914
+ // for (auto i : r)
915
+ // (*this)[i] = func(i);
916
+ // });
917
+ // return *this;
918
+ // }
919
+
920
+ class Tasks
921
+ {
922
+ size_t num;
923
+ public:
924
+ explicit Tasks (size_t _num = TaskManager::GetNumThreads()) : num(_num) { ; }
925
+ auto GetNum() const { return num; }
926
+ };
927
+
928
+
929
+ /*
930
+ // some idea, not yet supported
931
+
932
+ using namespace std;
933
+ template <typename T>
934
+ class ParallelValue
935
+ {
936
+ T val;
937
+ public:
938
+ ParallelValue (const T & _val) : val(_val) { ; }
939
+ operator T () const { return val; }
940
+ };
941
+
942
+ template <typename FUNC> class ParallelFunction
943
+ {
944
+ FUNC f;
945
+ public:
946
+ ParallelFunction (const FUNC & _f) : f(_f) { ; }
947
+ operator FUNC () const { return f; }
948
+ auto operator() (size_t i) const { return f(i); }
949
+ };
950
+ */
951
+
952
+ /* currently not used, plus causing problems on MSVC 2017
953
+ template <typename T, typename std::enable_if<ngstd::has_call_operator<T>::value, int>::type = 0>
954
+ inline ParallelFunction<T> operator| (const T & func, Tasks tasks)
955
+ {
956
+ return func;
957
+ }
958
+
959
+ template <typename T, typename std::enable_if<!ngstd::has_call_operator<T>::value, int>::type = 0>
960
+ inline ParallelValue<T> operator| (const T & obj, Tasks tasks)
961
+ {
962
+ return obj;
963
+ }
964
+
965
+ inline Tasks operator "" _tasks_per_thread (unsigned long long n)
966
+ {
967
+ return Tasks(n * TaskManager::GetNumThreads());
968
+ }
969
+ */
970
+
971
+ /*
972
+ thought to be used as: array = 1 | tasks
973
+ class DefaultTasks
974
+ {
975
+ public:
976
+ operator Tasks () const { return TaskManager::GetNumThreads(); }
977
+ };
978
+ static DefaultTasks tasks;
979
+ */
980
+
981
+
982
+
983
+
984
+
985
+
986
+
987
+ #ifdef USE_NUMA
988
+
989
+ template <typename T>
990
+ class NumaInterleavedArray : public Array<T>
991
+ {
992
+ T * numa_ptr;
993
+ size_t numa_size;
994
+ public:
995
+ NumaInterleavedArray () { numa_size = 0; numa_ptr = nullptr; }
996
+ NumaInterleavedArray (size_t s)
997
+ : Array<T> (s, (T*)numa_alloc_interleaved(s*sizeof(T)))
998
+ {
999
+ numa_ptr = this->data;
1000
+ numa_size = s;
1001
+ }
1002
+
1003
+ ~NumaInterleavedArray ()
1004
+ {
1005
+ numa_free (numa_ptr, numa_size*sizeof(T));
1006
+ }
1007
+
1008
+ NumaInterleavedArray & operator= (T val)
1009
+ {
1010
+ Array<T>::operator= (val);
1011
+ return *this;
1012
+ }
1013
+
1014
+ NumaInterleavedArray & operator= (NumaInterleavedArray && a2)
1015
+ {
1016
+ Array<T>::operator= ((Array<T>&&)a2);
1017
+ ngcore::Swap (numa_ptr, a2.numa_ptr);
1018
+ ngcore::Swap (numa_size, a2.numa_size);
1019
+ return *this;
1020
+ }
1021
+
1022
+ void Swap (NumaInterleavedArray & b)
1023
+ {
1024
+ Array<T>::Swap(b);
1025
+ ngcore::Swap (numa_ptr, b.numa_ptr);
1026
+ ngcore::Swap (numa_size, b.numa_size);
1027
+ }
1028
+
1029
+ void SetSize (size_t size)
1030
+ {
1031
+ std::cerr << "************************* NumaDistArray::SetSize not overloaded" << std::endl;
1032
+ Array<T>::SetSize(size);
1033
+ }
1034
+ };
1035
+
1036
+ template <typename T>
1037
+ class NumaDistributedArray : public Array<T>
1038
+ {
1039
+ T * numa_ptr;
1040
+ size_t numa_size;
1041
+ public:
1042
+ NumaDistributedArray () { numa_size = 0; numa_ptr = nullptr; }
1043
+ NumaDistributedArray (size_t s)
1044
+ : Array<T> (s, (T*)numa_alloc_local(s*sizeof(T)))
1045
+ {
1046
+ numa_ptr = this->data;
1047
+ numa_size = s;
1048
+
1049
+ /* int avail = */ numa_available(); // initialize libnuma
1050
+ int num_nodes = numa_num_configured_nodes();
1051
+ size_t pagesize = numa_pagesize();
1052
+
1053
+ int npages = std::ceil ( double(s)*sizeof(T) / pagesize );
1054
+
1055
+ // cout << "size = " << numa_size << endl;
1056
+ // cout << "npages = " << npages << endl;
1057
+
1058
+ for (int i = 0; i < num_nodes; i++)
1059
+ {
1060
+ int beg = (i * npages) / num_nodes;
1061
+ int end = ( (i+1) * npages) / num_nodes;
1062
+ // cout << "node " << i << " : [" << beg << "-" << end << ")" << endl;
1063
+ numa_tonode_memory(numa_ptr+beg*pagesize/sizeof(T), (end-beg)*pagesize, i);
1064
+ }
1065
+ }
1066
+
1067
+ ~NumaDistributedArray ()
1068
+ {
1069
+ numa_free (numa_ptr, numa_size*sizeof(T));
1070
+ }
1071
+
1072
+ NumaDistributedArray & operator= (NumaDistributedArray && a2)
1073
+ {
1074
+ Array<T>::operator= ((Array<T>&&)a2);
1075
+ ngcore::Swap (numa_ptr, a2.numa_ptr);
1076
+ ngcore::Swap (numa_size, a2.numa_size);
1077
+ return *this;
1078
+ }
1079
+
1080
+ void Swap (NumaDistributedArray & b)
1081
+ {
1082
+ Array<T>::Swap(b);
1083
+ ngcore::Swap (numa_ptr, b.numa_ptr);
1084
+ ngcore::Swap (numa_size, b.numa_size);
1085
+ }
1086
+
1087
+ void SetSize (size_t size)
1088
+ {
1089
+ std::cerr << "************************* NumaDistArray::SetSize not overloaded" << std::endl;
1090
+ Array<T>::SetSize(size);
1091
+ }
1092
+ };
1093
+
1094
+
1095
+
1096
+ template <typename T>
1097
+ class NumaLocalArray : public Array<T>
1098
+ {
1099
+ T * numa_ptr;
1100
+ size_t numa_size;
1101
+ public:
1102
+ NumaLocalArray () { numa_size = 0; numa_ptr = nullptr; }
1103
+ NumaLocalArray (size_t s)
1104
+ : Array<T> (s, (T*)numa_alloc_local(s*sizeof(T)))
1105
+ {
1106
+ numa_ptr = this->data;
1107
+ numa_size = s;
1108
+ }
1109
+
1110
+ ~NumaLocalArray ()
1111
+ {
1112
+ numa_free (numa_ptr, numa_size*sizeof(T));
1113
+ }
1114
+
1115
+ NumaLocalArray & operator= (T val)
1116
+ {
1117
+ Array<T>::operator= (val);
1118
+ return *this;
1119
+ }
1120
+
1121
+ NumaLocalArray & operator= (NumaLocalArray && a2)
1122
+ {
1123
+ Array<T>::operator= ((Array<T>&&)a2);
1124
+ ngcore::Swap (numa_ptr, a2.numa_ptr);
1125
+ ngcore::Swap (numa_size, a2.numa_size);
1126
+ return *this;
1127
+ }
1128
+
1129
+ void Swap (NumaLocalArray & b)
1130
+ {
1131
+ Array<T>::Swap(b);
1132
+ ngcore::Swap (numa_ptr, b.numa_ptr);
1133
+ ngcore::Swap (numa_size, b.numa_size);
1134
+ }
1135
+
1136
+ void SetSize (size_t size)
1137
+ {
1138
+ std::cerr << "************************* NumaDistArray::SetSize not overloaded" << std::endl;
1139
+ Array<T>::SetSize(size);
1140
+ }
1141
+ };
1142
+
1143
+
1144
+ #else // USE_NUMA
1145
+
1146
+ template <typename T>
1147
+ using NumaDistributedArray = Array<T>;
1148
+
1149
+ template <typename T>
1150
+ using NumaInterleavedArray = Array<T>;
1151
+
1152
+ template <typename T>
1153
+ using NumaLocalArray = Array<T>;
1154
+
1155
+ #endif // USE_NUMA
1156
+
1157
+ }
1158
+
1159
+
1160
+
1161
+ #endif // NETGEN_CORE_TASKMANAGER_HPP