netgen-mesher 6.2.2506.post35.dev0__cp314-cp314-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- netgen/NgOCC.py +7 -0
- netgen/__init__.py +114 -0
- netgen/__init__.pyi +22 -0
- netgen/__main__.py +53 -0
- netgen/cmake/NetgenConfig.cmake +79 -0
- netgen/cmake/netgen-targets-release.cmake +69 -0
- netgen/cmake/netgen-targets.cmake +146 -0
- netgen/config/__init__.py +1 -0
- netgen/config/__init__.pyi +52 -0
- netgen/config/__main__.py +4 -0
- netgen/config/config.py +68 -0
- netgen/config/config.pyi +54 -0
- netgen/csg.py +25 -0
- netgen/geom2d.py +178 -0
- netgen/gui.py +82 -0
- netgen/include/core/archive.hpp +1256 -0
- netgen/include/core/array.hpp +1760 -0
- netgen/include/core/autodiff.hpp +1131 -0
- netgen/include/core/autodiffdiff.hpp +733 -0
- netgen/include/core/bitarray.hpp +240 -0
- netgen/include/core/concurrentqueue.h +3619 -0
- netgen/include/core/exception.hpp +145 -0
- netgen/include/core/flags.hpp +199 -0
- netgen/include/core/hashtable.hpp +1281 -0
- netgen/include/core/localheap.hpp +318 -0
- netgen/include/core/logging.hpp +117 -0
- netgen/include/core/memtracer.hpp +221 -0
- netgen/include/core/mpi4py_pycapi.h +245 -0
- netgen/include/core/mpi_wrapper.hpp +643 -0
- netgen/include/core/ng_mpi.hpp +94 -0
- netgen/include/core/ng_mpi_generated_declarations.hpp +155 -0
- netgen/include/core/ng_mpi_native.hpp +25 -0
- netgen/include/core/ngcore.hpp +32 -0
- netgen/include/core/ngcore_api.hpp +152 -0
- netgen/include/core/ngstream.hpp +115 -0
- netgen/include/core/paje_trace.hpp +279 -0
- netgen/include/core/profiler.hpp +382 -0
- netgen/include/core/python_ngcore.hpp +457 -0
- netgen/include/core/ranges.hpp +109 -0
- netgen/include/core/register_archive.hpp +100 -0
- netgen/include/core/signal.hpp +82 -0
- netgen/include/core/simd.hpp +160 -0
- netgen/include/core/simd_arm64.hpp +407 -0
- netgen/include/core/simd_avx.hpp +394 -0
- netgen/include/core/simd_avx512.hpp +285 -0
- netgen/include/core/simd_generic.hpp +1053 -0
- netgen/include/core/simd_math.hpp +178 -0
- netgen/include/core/simd_sse.hpp +289 -0
- netgen/include/core/statushandler.hpp +37 -0
- netgen/include/core/symboltable.hpp +153 -0
- netgen/include/core/table.hpp +810 -0
- netgen/include/core/taskmanager.hpp +1161 -0
- netgen/include/core/type_traits.hpp +65 -0
- netgen/include/core/utils.hpp +385 -0
- netgen/include/core/version.hpp +102 -0
- netgen/include/core/xbool.hpp +47 -0
- netgen/include/csg/algprim.hpp +563 -0
- netgen/include/csg/brick.hpp +150 -0
- netgen/include/csg/csg.hpp +43 -0
- netgen/include/csg/csgeom.hpp +389 -0
- netgen/include/csg/csgparser.hpp +101 -0
- netgen/include/csg/curve2d.hpp +67 -0
- netgen/include/csg/edgeflw.hpp +112 -0
- netgen/include/csg/explicitcurve2d.hpp +113 -0
- netgen/include/csg/extrusion.hpp +185 -0
- netgen/include/csg/gencyl.hpp +70 -0
- netgen/include/csg/geoml.hpp +16 -0
- netgen/include/csg/identify.hpp +213 -0
- netgen/include/csg/manifold.hpp +29 -0
- netgen/include/csg/meshsurf.hpp +46 -0
- netgen/include/csg/polyhedra.hpp +121 -0
- netgen/include/csg/revolution.hpp +180 -0
- netgen/include/csg/singularref.hpp +84 -0
- netgen/include/csg/solid.hpp +295 -0
- netgen/include/csg/specpoin.hpp +194 -0
- netgen/include/csg/spline3d.hpp +99 -0
- netgen/include/csg/splinesurface.hpp +85 -0
- netgen/include/csg/surface.hpp +394 -0
- netgen/include/csg/triapprox.hpp +63 -0
- netgen/include/csg/vscsg.hpp +34 -0
- netgen/include/general/autodiff.hpp +356 -0
- netgen/include/general/autoptr.hpp +39 -0
- netgen/include/general/gzstream.h +121 -0
- netgen/include/general/hashtabl.hpp +1692 -0
- netgen/include/general/myadt.hpp +48 -0
- netgen/include/general/mystring.hpp +226 -0
- netgen/include/general/netgenout.hpp +205 -0
- netgen/include/general/ngarray.hpp +797 -0
- netgen/include/general/ngbitarray.hpp +149 -0
- netgen/include/general/ngpython.hpp +74 -0
- netgen/include/general/optmem.hpp +44 -0
- netgen/include/general/parthreads.hpp +138 -0
- netgen/include/general/seti.hpp +50 -0
- netgen/include/general/sort.hpp +47 -0
- netgen/include/general/spbita2d.hpp +59 -0
- netgen/include/general/stack.hpp +114 -0
- netgen/include/general/table.hpp +280 -0
- netgen/include/general/template.hpp +509 -0
- netgen/include/geom2d/csg2d.hpp +750 -0
- netgen/include/geom2d/geometry2d.hpp +280 -0
- netgen/include/geom2d/spline2d.hpp +234 -0
- netgen/include/geom2d/vsgeom2d.hpp +28 -0
- netgen/include/gprim/adtree.hpp +1392 -0
- netgen/include/gprim/geom2d.hpp +858 -0
- netgen/include/gprim/geom3d.hpp +749 -0
- netgen/include/gprim/geomfuncs.hpp +212 -0
- netgen/include/gprim/geomobjects.hpp +544 -0
- netgen/include/gprim/geomops.hpp +404 -0
- netgen/include/gprim/geomtest3d.hpp +101 -0
- netgen/include/gprim/gprim.hpp +33 -0
- netgen/include/gprim/spline.hpp +778 -0
- netgen/include/gprim/splinegeometry.hpp +73 -0
- netgen/include/gprim/transform3d.hpp +216 -0
- netgen/include/include/acisgeom.hpp +3 -0
- netgen/include/include/csg.hpp +1 -0
- netgen/include/include/geometry2d.hpp +1 -0
- netgen/include/include/gprim.hpp +1 -0
- netgen/include/include/incopengl.hpp +62 -0
- netgen/include/include/inctcl.hpp +13 -0
- netgen/include/include/incvis.hpp +6 -0
- netgen/include/include/linalg.hpp +1 -0
- netgen/include/include/meshing.hpp +1 -0
- netgen/include/include/myadt.hpp +1 -0
- netgen/include/include/mydefs.hpp +70 -0
- netgen/include/include/mystdlib.h +59 -0
- netgen/include/include/netgen_config.hpp +27 -0
- netgen/include/include/netgen_version.hpp +9 -0
- netgen/include/include/nginterface_v2_impl.hpp +395 -0
- netgen/include/include/ngsimd.hpp +1 -0
- netgen/include/include/occgeom.hpp +1 -0
- netgen/include/include/opti.hpp +1 -0
- netgen/include/include/parallel.hpp +1 -0
- netgen/include/include/stlgeom.hpp +1 -0
- netgen/include/include/visual.hpp +1 -0
- netgen/include/interface/rw_medit.hpp +11 -0
- netgen/include/interface/writeuser.hpp +80 -0
- netgen/include/linalg/densemat.hpp +414 -0
- netgen/include/linalg/linalg.hpp +29 -0
- netgen/include/linalg/opti.hpp +142 -0
- netgen/include/linalg/polynomial.hpp +47 -0
- netgen/include/linalg/vector.hpp +217 -0
- netgen/include/meshing/adfront2.hpp +274 -0
- netgen/include/meshing/adfront3.hpp +332 -0
- netgen/include/meshing/basegeom.hpp +370 -0
- netgen/include/meshing/bcfunctions.hpp +53 -0
- netgen/include/meshing/bisect.hpp +72 -0
- netgen/include/meshing/boundarylayer.hpp +113 -0
- netgen/include/meshing/classifyhpel.hpp +1984 -0
- netgen/include/meshing/clusters.hpp +46 -0
- netgen/include/meshing/curvedelems.hpp +274 -0
- netgen/include/meshing/delaunay2d.hpp +73 -0
- netgen/include/meshing/fieldlines.hpp +103 -0
- netgen/include/meshing/findip.hpp +198 -0
- netgen/include/meshing/findip2.hpp +103 -0
- netgen/include/meshing/geomsearch.hpp +69 -0
- netgen/include/meshing/global.hpp +54 -0
- netgen/include/meshing/hpref_hex.hpp +330 -0
- netgen/include/meshing/hpref_prism.hpp +3405 -0
- netgen/include/meshing/hpref_pyramid.hpp +154 -0
- netgen/include/meshing/hpref_quad.hpp +2082 -0
- netgen/include/meshing/hpref_segm.hpp +122 -0
- netgen/include/meshing/hpref_tet.hpp +4230 -0
- netgen/include/meshing/hpref_trig.hpp +848 -0
- netgen/include/meshing/hprefinement.hpp +366 -0
- netgen/include/meshing/improve2.hpp +178 -0
- netgen/include/meshing/improve3.hpp +151 -0
- netgen/include/meshing/localh.hpp +223 -0
- netgen/include/meshing/meshclass.hpp +1076 -0
- netgen/include/meshing/meshfunc.hpp +47 -0
- netgen/include/meshing/meshing.hpp +63 -0
- netgen/include/meshing/meshing2.hpp +163 -0
- netgen/include/meshing/meshing3.hpp +123 -0
- netgen/include/meshing/meshtool.hpp +90 -0
- netgen/include/meshing/meshtype.hpp +1930 -0
- netgen/include/meshing/msghandler.hpp +62 -0
- netgen/include/meshing/paralleltop.hpp +172 -0
- netgen/include/meshing/python_mesh.hpp +206 -0
- netgen/include/meshing/ruler2.hpp +172 -0
- netgen/include/meshing/ruler3.hpp +211 -0
- netgen/include/meshing/soldata.hpp +141 -0
- netgen/include/meshing/specials.hpp +17 -0
- netgen/include/meshing/surfacegeom.hpp +73 -0
- netgen/include/meshing/topology.hpp +1003 -0
- netgen/include/meshing/validate.hpp +21 -0
- netgen/include/meshing/visual_interface.hpp +71 -0
- netgen/include/mydefs.hpp +70 -0
- netgen/include/nginterface.h +474 -0
- netgen/include/nginterface_v2.hpp +406 -0
- netgen/include/nglib.h +697 -0
- netgen/include/nglib_occ.h +50 -0
- netgen/include/occ/occ_edge.hpp +47 -0
- netgen/include/occ/occ_face.hpp +52 -0
- netgen/include/occ/occ_solid.hpp +23 -0
- netgen/include/occ/occ_utils.hpp +376 -0
- netgen/include/occ/occ_vertex.hpp +30 -0
- netgen/include/occ/occgeom.hpp +659 -0
- netgen/include/occ/occmeshsurf.hpp +168 -0
- netgen/include/occ/vsocc.hpp +33 -0
- netgen/include/pybind11/LICENSE +29 -0
- netgen/include/pybind11/attr.h +722 -0
- netgen/include/pybind11/buffer_info.h +208 -0
- netgen/include/pybind11/cast.h +2361 -0
- netgen/include/pybind11/chrono.h +228 -0
- netgen/include/pybind11/common.h +2 -0
- netgen/include/pybind11/complex.h +74 -0
- netgen/include/pybind11/conduit/README.txt +15 -0
- netgen/include/pybind11/conduit/pybind11_conduit_v1.h +116 -0
- netgen/include/pybind11/conduit/pybind11_platform_abi_id.h +87 -0
- netgen/include/pybind11/conduit/wrap_include_python_h.h +72 -0
- netgen/include/pybind11/critical_section.h +56 -0
- netgen/include/pybind11/detail/class.h +823 -0
- netgen/include/pybind11/detail/common.h +1348 -0
- netgen/include/pybind11/detail/cpp_conduit.h +75 -0
- netgen/include/pybind11/detail/descr.h +226 -0
- netgen/include/pybind11/detail/dynamic_raw_ptr_cast_if_possible.h +39 -0
- netgen/include/pybind11/detail/exception_translation.h +71 -0
- netgen/include/pybind11/detail/function_record_pyobject.h +191 -0
- netgen/include/pybind11/detail/init.h +538 -0
- netgen/include/pybind11/detail/internals.h +799 -0
- netgen/include/pybind11/detail/native_enum_data.h +209 -0
- netgen/include/pybind11/detail/pybind11_namespace_macros.h +82 -0
- netgen/include/pybind11/detail/struct_smart_holder.h +378 -0
- netgen/include/pybind11/detail/type_caster_base.h +1591 -0
- netgen/include/pybind11/detail/typeid.h +65 -0
- netgen/include/pybind11/detail/using_smart_holder.h +22 -0
- netgen/include/pybind11/detail/value_and_holder.h +90 -0
- netgen/include/pybind11/eigen/common.h +9 -0
- netgen/include/pybind11/eigen/matrix.h +723 -0
- netgen/include/pybind11/eigen/tensor.h +521 -0
- netgen/include/pybind11/eigen.h +12 -0
- netgen/include/pybind11/embed.h +320 -0
- netgen/include/pybind11/eval.h +161 -0
- netgen/include/pybind11/functional.h +147 -0
- netgen/include/pybind11/gil.h +199 -0
- netgen/include/pybind11/gil_safe_call_once.h +102 -0
- netgen/include/pybind11/gil_simple.h +37 -0
- netgen/include/pybind11/iostream.h +265 -0
- netgen/include/pybind11/native_enum.h +67 -0
- netgen/include/pybind11/numpy.h +2312 -0
- netgen/include/pybind11/operators.h +202 -0
- netgen/include/pybind11/options.h +92 -0
- netgen/include/pybind11/pybind11.h +3645 -0
- netgen/include/pybind11/pytypes.h +2680 -0
- netgen/include/pybind11/stl/filesystem.h +114 -0
- netgen/include/pybind11/stl.h +666 -0
- netgen/include/pybind11/stl_bind.h +858 -0
- netgen/include/pybind11/subinterpreter.h +299 -0
- netgen/include/pybind11/trampoline_self_life_support.h +65 -0
- netgen/include/pybind11/type_caster_pyobject_ptr.h +61 -0
- netgen/include/pybind11/typing.h +298 -0
- netgen/include/pybind11/warnings.h +75 -0
- netgen/include/stlgeom/meshstlsurface.hpp +67 -0
- netgen/include/stlgeom/stlgeom.hpp +491 -0
- netgen/include/stlgeom/stlline.hpp +193 -0
- netgen/include/stlgeom/stltool.hpp +331 -0
- netgen/include/stlgeom/stltopology.hpp +419 -0
- netgen/include/stlgeom/vsstl.hpp +58 -0
- netgen/include/visualization/meshdoc.hpp +42 -0
- netgen/include/visualization/mvdraw.hpp +325 -0
- netgen/include/visualization/vispar.hpp +128 -0
- netgen/include/visualization/visual.hpp +28 -0
- netgen/include/visualization/visual_api.hpp +10 -0
- netgen/include/visualization/vssolution.hpp +399 -0
- netgen/lib/libnggui.lib +0 -0
- netgen/lib/ngcore.lib +0 -0
- netgen/lib/nglib.lib +0 -0
- netgen/lib/togl.lib +0 -0
- netgen/libnggui.dll +0 -0
- netgen/libngguipy.lib +0 -0
- netgen/libngguipy.pyd +0 -0
- netgen/libngpy/_NgOCC.pyi +1545 -0
- netgen/libngpy/__init__.pyi +7 -0
- netgen/libngpy/_csg.pyi +259 -0
- netgen/libngpy/_geom2d.pyi +323 -0
- netgen/libngpy/_meshing.pyi +1111 -0
- netgen/libngpy/_stl.pyi +131 -0
- netgen/libngpy.lib +0 -0
- netgen/libngpy.pyd +0 -0
- netgen/meshing.py +65 -0
- netgen/ngcore.dll +0 -0
- netgen/nglib.dll +0 -0
- netgen/occ.py +52 -0
- netgen/read_gmsh.py +259 -0
- netgen/read_meshio.py +22 -0
- netgen/stl.py +2 -0
- netgen/togl.dll +0 -0
- netgen/version.py +2 -0
- netgen/webgui.py +529 -0
- netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/boundarycondition.geo +16 -0
- netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/boxcyl.geo +32 -0
- netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/circle_on_cube.geo +27 -0
- netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/cone.geo +13 -0
- netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/cube.geo +16 -0
- netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/cubeandring.geo +55 -0
- netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/cubeandspheres.geo +21 -0
- netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/cubemcyl.geo +18 -0
- netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/cubemsphere.geo +19 -0
- netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/cylinder.geo +12 -0
- netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/cylsphere.geo +12 -0
- netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/doc/ng4.pdf +0 -0
- netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/ellipsoid.geo +8 -0
- netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/ellipticcyl.geo +10 -0
- netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/extrusion.geo +99 -0
- netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/fichera.geo +24 -0
- netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/frame.step +11683 -0
- netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/hinge.stl +8486 -0
- netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/lshape3d.geo +26 -0
- netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/manyholes.geo +26 -0
- netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/manyholes2.geo +26 -0
- netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/matrix.geo +27 -0
- netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/ortho.geo +11 -0
- netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/part1.stl +2662 -0
- netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/period.geo +33 -0
- netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/py_tutorials/exportNeutral.py +26 -0
- netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/py_tutorials/mesh.py +19 -0
- netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/py_tutorials/shaft.geo +65 -0
- netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/revolution.geo +18 -0
- netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/screw.step +1694 -0
- netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/sculpture.geo +13 -0
- netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/shaft.geo +65 -0
- netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/shell.geo +10 -0
- netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/sphere.geo +8 -0
- netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/sphereincube.geo +17 -0
- netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/square.in2d +35 -0
- netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/squarecircle.in2d +48 -0
- netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/squarehole.in2d +47 -0
- netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/torus.geo +8 -0
- netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/trafo.geo +57 -0
- netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/twobricks.geo +15 -0
- netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/twocubes.geo +18 -0
- netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/twocyl.geo +16 -0
- netgen_mesher-6.2.2506.post35.dev0.dist-info/METADATA +15 -0
- netgen_mesher-6.2.2506.post35.dev0.dist-info/RECORD +340 -0
- netgen_mesher-6.2.2506.post35.dev0.dist-info/WHEEL +5 -0
- netgen_mesher-6.2.2506.post35.dev0.dist-info/entry_points.txt +2 -0
- netgen_mesher-6.2.2506.post35.dev0.dist-info/licenses/AUTHORS +1 -0
- netgen_mesher-6.2.2506.post35.dev0.dist-info/licenses/LICENSE +504 -0
- netgen_mesher-6.2.2506.post35.dev0.dist-info/top_level.txt +2 -0
- pyngcore/__init__.py +1 -0
- pyngcore/pyngcore.cp314-win_amd64.pyd +0 -0
|
@@ -0,0 +1,1161 @@
|
|
|
1
|
+
#ifndef NETGEN_CORE_TASKMANAGER_HPP
|
|
2
|
+
#define NETGEN_CORE_TASKMANAGER_HPP
|
|
3
|
+
|
|
4
|
+
/*********************************************************************/
|
|
5
|
+
/* File: taskmanager.hpp */
|
|
6
|
+
/* Author: M. Hochsterger, J. Schoeberl */
|
|
7
|
+
/* Date: 10. Mar. 2015 */
|
|
8
|
+
/*********************************************************************/
|
|
9
|
+
|
|
10
|
+
#include <atomic>
|
|
11
|
+
#include <functional>
|
|
12
|
+
#include <list>
|
|
13
|
+
#include <cmath>
|
|
14
|
+
#include <ostream>
|
|
15
|
+
#include <thread>
|
|
16
|
+
|
|
17
|
+
#include "array.hpp"
|
|
18
|
+
#include "paje_trace.hpp"
|
|
19
|
+
#include "taskmanager.hpp"
|
|
20
|
+
|
|
21
|
+
#ifdef USE_NUMA
|
|
22
|
+
#include <numa.h>
|
|
23
|
+
#include <sched.h>
|
|
24
|
+
#endif
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
namespace ngcore
|
|
28
|
+
{
|
|
29
|
+
using std::atomic;
|
|
30
|
+
using std::function;
|
|
31
|
+
|
|
32
|
+
class TaskInfo
|
|
33
|
+
{
|
|
34
|
+
public:
|
|
35
|
+
int task_nr;
|
|
36
|
+
int ntasks;
|
|
37
|
+
|
|
38
|
+
int thread_nr;
|
|
39
|
+
int nthreads;
|
|
40
|
+
|
|
41
|
+
// int node_nr;
|
|
42
|
+
// int nnodes;
|
|
43
|
+
};
|
|
44
|
+
|
|
45
|
+
NGCORE_API extern class TaskManager * task_manager;
|
|
46
|
+
|
|
47
|
+
class TaskManager
|
|
48
|
+
{
|
|
49
|
+
// PajeTrace *trace;
|
|
50
|
+
|
|
51
|
+
class alignas(64) NodeData
|
|
52
|
+
{
|
|
53
|
+
public:
|
|
54
|
+
atomic<int> start_cnt{0};
|
|
55
|
+
atomic<int> participate{0};
|
|
56
|
+
};
|
|
57
|
+
|
|
58
|
+
NGCORE_API static const function<void(TaskInfo&)> * func;
|
|
59
|
+
NGCORE_API static const function<void()> * startup_function;
|
|
60
|
+
NGCORE_API static const function<void()> * cleanup_function;
|
|
61
|
+
NGCORE_API static atomic<int> ntasks;
|
|
62
|
+
NGCORE_API static Exception * ex;
|
|
63
|
+
|
|
64
|
+
NGCORE_API static atomic<int> jobnr;
|
|
65
|
+
|
|
66
|
+
static atomic<int> complete[8]; // max nodes
|
|
67
|
+
static atomic<int> done;
|
|
68
|
+
static atomic<int> active_workers;
|
|
69
|
+
static atomic<int> workers_on_node[8]; // max nodes
|
|
70
|
+
// Array<atomic<int>*> sync;
|
|
71
|
+
NGCORE_API static int sleep_usecs;
|
|
72
|
+
NGCORE_API static bool sleep;
|
|
73
|
+
|
|
74
|
+
static NodeData *nodedata[8];
|
|
75
|
+
|
|
76
|
+
static int num_nodes;
|
|
77
|
+
NGCORE_API static int num_threads;
|
|
78
|
+
NGCORE_API static int max_threads;
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
#ifdef WIN32 // no exported thread_local in dlls on Windows
|
|
83
|
+
static thread_local int thread_id;
|
|
84
|
+
#else
|
|
85
|
+
NGCORE_API static thread_local int thread_id;
|
|
86
|
+
#endif
|
|
87
|
+
NGCORE_API static bool use_paje_trace;
|
|
88
|
+
public:
|
|
89
|
+
|
|
90
|
+
NGCORE_API TaskManager();
|
|
91
|
+
NGCORE_API ~TaskManager();
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
NGCORE_API void StartWorkers();
|
|
95
|
+
NGCORE_API void StopWorkers();
|
|
96
|
+
|
|
97
|
+
bool IsSleeping() const { return sleep; }
|
|
98
|
+
|
|
99
|
+
int SuspendWorkers(int asleep_usecs = 1000 )
|
|
100
|
+
{
|
|
101
|
+
int old_sleep_usecs = sleep_usecs;
|
|
102
|
+
sleep_usecs = asleep_usecs;
|
|
103
|
+
sleep = true;
|
|
104
|
+
return old_sleep_usecs;
|
|
105
|
+
}
|
|
106
|
+
void ResumeWorkers() { sleep = false; }
|
|
107
|
+
|
|
108
|
+
NGCORE_API static void SetNumThreads(int amax_threads);
|
|
109
|
+
static int GetMaxThreads() { return max_threads; }
|
|
110
|
+
// static int GetNumThreads() { return task_manager ? task_manager->num_threads : 1; }
|
|
111
|
+
static int GetNumThreads() { return num_threads; }
|
|
112
|
+
#ifdef WIN32
|
|
113
|
+
NGCORE_API static int GetThreadId();
|
|
114
|
+
#else
|
|
115
|
+
static int GetThreadId() { return thread_id; }
|
|
116
|
+
#endif
|
|
117
|
+
int GetNumNodes() const { return num_nodes; }
|
|
118
|
+
|
|
119
|
+
static void SetPajeTrace (bool use) { use_paje_trace = use; }
|
|
120
|
+
|
|
121
|
+
NGCORE_API static bool ProcessTask();
|
|
122
|
+
|
|
123
|
+
NGCORE_API static void CreateJob (const function<void(TaskInfo&)> & afunc,
|
|
124
|
+
int antasks = task_manager->GetNumThreads());
|
|
125
|
+
|
|
126
|
+
static void SetStartupFunction (const function<void()> & func) { startup_function = &func; }
|
|
127
|
+
static void SetStartupFunction () { startup_function = nullptr; }
|
|
128
|
+
static void SetCleanupFunction (const function<void()> & func) { cleanup_function = &func; }
|
|
129
|
+
static void SetCleanupFunction () { cleanup_function = nullptr; }
|
|
130
|
+
|
|
131
|
+
void Done() { done = true; }
|
|
132
|
+
NGCORE_API void Loop(int thread_num);
|
|
133
|
+
|
|
134
|
+
NGCORE_API static std::list<std::tuple<std::string,double>> Timing ();
|
|
135
|
+
};
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
NGCORE_API void RunWithTaskManager (function<void()> alg);
|
|
146
|
+
|
|
147
|
+
// For Python context manager
|
|
148
|
+
NGCORE_API int EnterTaskManager ();
|
|
149
|
+
NGCORE_API void ExitTaskManager (int num_threads);
|
|
150
|
+
|
|
151
|
+
class RegionTaskManager
|
|
152
|
+
{
|
|
153
|
+
int nthreads_before;
|
|
154
|
+
int nthreads;
|
|
155
|
+
bool started_taskmanager;
|
|
156
|
+
|
|
157
|
+
public:
|
|
158
|
+
RegionTaskManager(int anthreads=TaskManager::GetMaxThreads())
|
|
159
|
+
: nthreads(anthreads)
|
|
160
|
+
{
|
|
161
|
+
if(task_manager || nthreads==0)
|
|
162
|
+
{
|
|
163
|
+
// already running, no need to do anything
|
|
164
|
+
started_taskmanager = false;
|
|
165
|
+
return;
|
|
166
|
+
}
|
|
167
|
+
else
|
|
168
|
+
{
|
|
169
|
+
nthreads_before = TaskManager::GetMaxThreads();
|
|
170
|
+
TaskManager::SetNumThreads(nthreads);
|
|
171
|
+
nthreads = EnterTaskManager();
|
|
172
|
+
started_taskmanager = true;
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
~RegionTaskManager()
|
|
177
|
+
{
|
|
178
|
+
if(started_taskmanager)
|
|
179
|
+
{
|
|
180
|
+
ExitTaskManager(nthreads);
|
|
181
|
+
TaskManager::SetNumThreads(nthreads_before);
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
};
|
|
185
|
+
|
|
186
|
+
class SuspendTaskManager
|
|
187
|
+
{
|
|
188
|
+
int old_sleep_usecs = 0;
|
|
189
|
+
bool old_sleep = false;
|
|
190
|
+
TaskManager * tm = nullptr;
|
|
191
|
+
|
|
192
|
+
public:
|
|
193
|
+
SuspendTaskManager(int asleep_usecs=1000)
|
|
194
|
+
: tm(task_manager)
|
|
195
|
+
{
|
|
196
|
+
if(!tm)
|
|
197
|
+
return;
|
|
198
|
+
|
|
199
|
+
old_sleep = tm->IsSleeping();
|
|
200
|
+
old_sleep_usecs = tm->SuspendWorkers(asleep_usecs);
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
~SuspendTaskManager()
|
|
204
|
+
{
|
|
205
|
+
if(!tm)
|
|
206
|
+
return;
|
|
207
|
+
|
|
208
|
+
if(old_sleep) // restore old sleep time
|
|
209
|
+
tm->SuspendWorkers(old_sleep_usecs);
|
|
210
|
+
else
|
|
211
|
+
tm->ResumeWorkers();
|
|
212
|
+
}
|
|
213
|
+
};
|
|
214
|
+
|
|
215
|
+
NETGEN_INLINE int TasksPerThread (int tpt)
|
|
216
|
+
{
|
|
217
|
+
// return task_manager ? tpt*task_manager->GetNumThreads() : 1;
|
|
218
|
+
return tpt*TaskManager::GetNumThreads();
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
class TotalCosts
|
|
223
|
+
{
|
|
224
|
+
size_t cost;
|
|
225
|
+
public:
|
|
226
|
+
TotalCosts (size_t _cost) : cost(_cost) { ; }
|
|
227
|
+
size_t operator ()() { return cost; }
|
|
228
|
+
};
|
|
229
|
+
|
|
230
|
+
template <typename TR, typename TFUNC>
|
|
231
|
+
NETGEN_INLINE void ParallelFor (T_Range<TR> r, TFUNC f,
|
|
232
|
+
int antasks = TaskManager::GetNumThreads(),
|
|
233
|
+
TotalCosts costs = 1000)
|
|
234
|
+
{
|
|
235
|
+
// if (task_manager && costs() >= 1000)
|
|
236
|
+
|
|
237
|
+
TaskManager::CreateJob
|
|
238
|
+
([r, f] (TaskInfo & ti)
|
|
239
|
+
{
|
|
240
|
+
auto myrange = r.Split (ti.task_nr, ti.ntasks);
|
|
241
|
+
for (auto i : myrange) f(i);
|
|
242
|
+
},
|
|
243
|
+
antasks);
|
|
244
|
+
|
|
245
|
+
/*
|
|
246
|
+
else
|
|
247
|
+
for (auto i : r) f(i);
|
|
248
|
+
*/
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
/*
|
|
252
|
+
template <typename TFUNC>
|
|
253
|
+
NETGEN_INLINE void ParallelFor (size_t n, TFUNC f,
|
|
254
|
+
int antasks = task_manager ? task_manager->GetNumThreads() : 0)
|
|
255
|
+
{
|
|
256
|
+
ParallelFor (IntRange (n), f, antasks);
|
|
257
|
+
}
|
|
258
|
+
*/
|
|
259
|
+
template <typename ...Args>
|
|
260
|
+
NETGEN_INLINE void ParallelFor (size_t n, Args...args)
|
|
261
|
+
{
|
|
262
|
+
ParallelFor (IntRange (n), args...);
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
template <typename TR, typename TFUNC>
|
|
266
|
+
NETGEN_INLINE void ParallelForRange (T_Range<TR> r, TFUNC f,
|
|
267
|
+
int antasks = TaskManager::GetNumThreads(),
|
|
268
|
+
TotalCosts costs = 1000)
|
|
269
|
+
{
|
|
270
|
+
// if (task_manager && costs() >= 1000)
|
|
271
|
+
|
|
272
|
+
TaskManager::CreateJob
|
|
273
|
+
([r, f] (TaskInfo & ti)
|
|
274
|
+
{
|
|
275
|
+
auto myrange = r.Split (ti.task_nr, ti.ntasks);
|
|
276
|
+
f(myrange);
|
|
277
|
+
},
|
|
278
|
+
antasks);
|
|
279
|
+
/*
|
|
280
|
+
else
|
|
281
|
+
f(r);
|
|
282
|
+
*/
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
/*
|
|
286
|
+
template <typename TFUNC>
|
|
287
|
+
NETGEN_INLINE void ParallelForRange (size_t n, TFUNC f,
|
|
288
|
+
int antasks = task_manager ? task_manager->GetNumThreads() : 0)
|
|
289
|
+
{
|
|
290
|
+
ParallelForRange (IntRange(n), f, antasks);
|
|
291
|
+
}
|
|
292
|
+
*/
|
|
293
|
+
template <typename ...Args>
|
|
294
|
+
NETGEN_INLINE void ParallelForRange (size_t n, Args...args)
|
|
295
|
+
{
|
|
296
|
+
ParallelForRange (IntRange(n), args...);
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
template <typename TFUNC>
|
|
300
|
+
NETGEN_INLINE void ParallelJob (TFUNC f,
|
|
301
|
+
int antasks = TaskManager::GetNumThreads())
|
|
302
|
+
{
|
|
303
|
+
TaskManager::CreateJob (f, antasks);
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
|
|
307
|
+
/*
|
|
308
|
+
Usage example:
|
|
309
|
+
|
|
310
|
+
ShareLoop myloop(100);
|
|
311
|
+
task_manager->CreateJob ([]()
|
|
312
|
+
{
|
|
313
|
+
for (int i : myloop)
|
|
314
|
+
cout << "i = " << i << endl;
|
|
315
|
+
});
|
|
316
|
+
|
|
317
|
+
*/
|
|
318
|
+
|
|
319
|
+
class SharedLoop
|
|
320
|
+
{
|
|
321
|
+
atomic<int> cnt;
|
|
322
|
+
IntRange r;
|
|
323
|
+
|
|
324
|
+
|
|
325
|
+
class SharedIterator
|
|
326
|
+
{
|
|
327
|
+
atomic<int> & cnt;
|
|
328
|
+
int myval;
|
|
329
|
+
int endval;
|
|
330
|
+
public:
|
|
331
|
+
SharedIterator (atomic<int> & acnt, int aendval, bool begin_iterator)
|
|
332
|
+
: cnt (acnt)
|
|
333
|
+
{
|
|
334
|
+
endval = aendval;
|
|
335
|
+
myval = begin_iterator ? cnt++ : endval;
|
|
336
|
+
if (myval > endval) myval = endval;
|
|
337
|
+
}
|
|
338
|
+
|
|
339
|
+
SharedIterator & operator++ ()
|
|
340
|
+
{
|
|
341
|
+
myval = cnt++;
|
|
342
|
+
if (myval > endval) myval = endval;
|
|
343
|
+
return *this;
|
|
344
|
+
}
|
|
345
|
+
|
|
346
|
+
int operator* () const { return myval; }
|
|
347
|
+
bool operator!= (const SharedIterator & it2) const { return myval != it2.myval; }
|
|
348
|
+
};
|
|
349
|
+
|
|
350
|
+
|
|
351
|
+
public:
|
|
352
|
+
SharedLoop (IntRange ar) : r(ar) { cnt = r.begin(); }
|
|
353
|
+
SharedLoop (size_t s) : SharedLoop (IntRange{s}) { ; }
|
|
354
|
+
SharedIterator begin() { return SharedIterator (cnt, r.end(), true); }
|
|
355
|
+
SharedIterator end() { return SharedIterator (cnt, r.end(), false); }
|
|
356
|
+
};
|
|
357
|
+
|
|
358
|
+
|
|
359
|
+
/*
|
|
360
|
+
class alignas(4096) AtomicRange
|
|
361
|
+
{
|
|
362
|
+
mutex lock;
|
|
363
|
+
int begin;
|
|
364
|
+
int end;
|
|
365
|
+
public:
|
|
366
|
+
|
|
367
|
+
void Set (IntRange r)
|
|
368
|
+
{
|
|
369
|
+
lock_guard<mutex> guard(lock);
|
|
370
|
+
begin = r.begin();
|
|
371
|
+
end = r.end();
|
|
372
|
+
}
|
|
373
|
+
|
|
374
|
+
IntRange Get()
|
|
375
|
+
{
|
|
376
|
+
lock_guard<mutex> guard(lock);
|
|
377
|
+
return IntRange(begin, end);
|
|
378
|
+
}
|
|
379
|
+
|
|
380
|
+
bool PopFirst (int & first)
|
|
381
|
+
{
|
|
382
|
+
lock_guard<mutex> guard(lock);
|
|
383
|
+
bool non_empty = end > begin;
|
|
384
|
+
first = begin;
|
|
385
|
+
if (non_empty) begin++;
|
|
386
|
+
return non_empty;
|
|
387
|
+
}
|
|
388
|
+
|
|
389
|
+
bool PopHalf (IntRange & r)
|
|
390
|
+
{
|
|
391
|
+
lock_guard<mutex> guard(lock);
|
|
392
|
+
bool non_empty = end > begin;
|
|
393
|
+
if (non_empty)
|
|
394
|
+
{
|
|
395
|
+
int mid = (begin+end+1)/2;
|
|
396
|
+
r = IntRange(begin, mid);
|
|
397
|
+
begin = mid;
|
|
398
|
+
}
|
|
399
|
+
return non_empty;
|
|
400
|
+
}
|
|
401
|
+
};
|
|
402
|
+
*/
|
|
403
|
+
|
|
404
|
+
|
|
405
|
+
|
|
406
|
+
// lock free popfirst
|
|
407
|
+
// faster for large loops, bug slower for small loops (~1000) ????
|
|
408
|
+
/*
|
|
409
|
+
class alignas(4096) AtomicRange
|
|
410
|
+
{
|
|
411
|
+
mutex lock;
|
|
412
|
+
atomic<int> begin;
|
|
413
|
+
int end;
|
|
414
|
+
public:
|
|
415
|
+
|
|
416
|
+
void Set (IntRange r)
|
|
417
|
+
{
|
|
418
|
+
lock_guard<mutex> guard(lock);
|
|
419
|
+
// begin = r.begin();
|
|
420
|
+
begin.store(r.begin(), std::memory_order_relaxed);
|
|
421
|
+
end = r.end();
|
|
422
|
+
}
|
|
423
|
+
|
|
424
|
+
void SetNoLock (IntRange r)
|
|
425
|
+
{
|
|
426
|
+
begin.store(r.begin(), std::memory_order_relaxed);
|
|
427
|
+
end = r.end();
|
|
428
|
+
}
|
|
429
|
+
|
|
430
|
+
// IntRange Get()
|
|
431
|
+
// {
|
|
432
|
+
// lock_guard<mutex> guard(lock);
|
|
433
|
+
// return IntRange(begin, end);
|
|
434
|
+
// }
|
|
435
|
+
|
|
436
|
+
bool PopFirst (int & first)
|
|
437
|
+
{
|
|
438
|
+
// int oldbegin = begin;
|
|
439
|
+
int oldbegin = begin.load(std::memory_order_relaxed);
|
|
440
|
+
if (oldbegin >= end) return false;
|
|
441
|
+
while (!begin.compare_exchange_weak (oldbegin, oldbegin+1,
|
|
442
|
+
std::memory_order_relaxed, std::memory_order_relaxed))
|
|
443
|
+
if (oldbegin >= end) return false;
|
|
444
|
+
|
|
445
|
+
first = oldbegin;
|
|
446
|
+
return true;
|
|
447
|
+
}
|
|
448
|
+
|
|
449
|
+
bool PopHalf (IntRange & r)
|
|
450
|
+
{
|
|
451
|
+
// int oldbegin = begin;
|
|
452
|
+
int oldbegin = begin.load(std::memory_order_relaxed);
|
|
453
|
+
if (oldbegin >= end) return false;
|
|
454
|
+
|
|
455
|
+
lock_guard<mutex> guard(lock);
|
|
456
|
+
while (!begin.compare_exchange_weak (oldbegin, (oldbegin+end+1)/2,
|
|
457
|
+
std::memory_order_relaxed, std::memory_order_relaxed))
|
|
458
|
+
if (oldbegin >= end) return false;
|
|
459
|
+
|
|
460
|
+
r = IntRange(oldbegin, (oldbegin+end+1)/2);
|
|
461
|
+
return true;
|
|
462
|
+
}
|
|
463
|
+
};
|
|
464
|
+
|
|
465
|
+
|
|
466
|
+
// inline ostream & operator<< (ostream & ost, AtomicRange & r)
|
|
467
|
+
// {
|
|
468
|
+
// ost << r.Get();
|
|
469
|
+
// return ost;
|
|
470
|
+
// }
|
|
471
|
+
*/
|
|
472
|
+
|
|
473
|
+
|
|
474
|
+
|
|
475
|
+
class alignas(4096) AtomicRange
|
|
476
|
+
{
|
|
477
|
+
atomic<size_t> begin;
|
|
478
|
+
atomic<size_t> end;
|
|
479
|
+
public:
|
|
480
|
+
|
|
481
|
+
void Set (IntRange r)
|
|
482
|
+
{
|
|
483
|
+
begin.store(std::numeric_limits<size_t>::max(), std::memory_order_release);
|
|
484
|
+
end.store(r.end(), std::memory_order_release);
|
|
485
|
+
begin.store(r.begin(), std::memory_order_release);
|
|
486
|
+
}
|
|
487
|
+
|
|
488
|
+
void SetNoLock (IntRange r)
|
|
489
|
+
{
|
|
490
|
+
end.store(r.end(), std::memory_order_release);
|
|
491
|
+
begin.store(r.begin(), std::memory_order_release);
|
|
492
|
+
}
|
|
493
|
+
|
|
494
|
+
// IntRange Get()
|
|
495
|
+
// {
|
|
496
|
+
// lock_guard<mutex> guard(lock);
|
|
497
|
+
// return IntRange(begin, end);
|
|
498
|
+
// }
|
|
499
|
+
|
|
500
|
+
bool PopFirst (size_t & hfirst)
|
|
501
|
+
{
|
|
502
|
+
// first = begin++;
|
|
503
|
+
// return first < end;
|
|
504
|
+
|
|
505
|
+
size_t first = begin.load(std::memory_order_relaxed);
|
|
506
|
+
|
|
507
|
+
size_t nextfirst = first+1;
|
|
508
|
+
if (first >= end) nextfirst = std::numeric_limits<size_t>::max()-1;
|
|
509
|
+
|
|
510
|
+
// while (!begin.compare_exchange_weak (first, nextfirst))
|
|
511
|
+
while (!begin.compare_exchange_weak (first, nextfirst,
|
|
512
|
+
std::memory_order_relaxed,
|
|
513
|
+
std::memory_order_relaxed))
|
|
514
|
+
{
|
|
515
|
+
first = begin;
|
|
516
|
+
nextfirst = first+1;
|
|
517
|
+
if (nextfirst >= end) nextfirst = std::numeric_limits<size_t>::max()-1;
|
|
518
|
+
}
|
|
519
|
+
hfirst = first;
|
|
520
|
+
return first < end;
|
|
521
|
+
}
|
|
522
|
+
|
|
523
|
+
bool PopHalf (IntRange & r)
|
|
524
|
+
{
|
|
525
|
+
/*
|
|
526
|
+
// int oldbegin = begin;
|
|
527
|
+
size_t oldbegin = begin.load(std::memory_order_acquire);
|
|
528
|
+
size_t oldend = end.load(std::memory_order_acquire);
|
|
529
|
+
if (oldbegin >= oldend) return false;
|
|
530
|
+
|
|
531
|
+
// lock_guard<mutex> guard(lock);
|
|
532
|
+
while (!begin.compare_exchange_weak (oldbegin, (oldbegin+oldend+1)/2,
|
|
533
|
+
std::memory_order_relaxed, std::memory_order_relaxed))
|
|
534
|
+
{
|
|
535
|
+
oldend = end.load(std::memory_order_acquire);
|
|
536
|
+
if (oldbegin >= oldend) return false;
|
|
537
|
+
}
|
|
538
|
+
|
|
539
|
+
r = IntRange(oldbegin, (oldbegin+oldend+1)/2);
|
|
540
|
+
return true;
|
|
541
|
+
*/
|
|
542
|
+
|
|
543
|
+
|
|
544
|
+
size_t oldbegin = begin; // .load(std::memory_order_acquire);
|
|
545
|
+
size_t oldend = end; // .load(std::memory_order_acquire);
|
|
546
|
+
if (oldbegin >= oldend) return false;
|
|
547
|
+
|
|
548
|
+
size_t nextbegin = (oldbegin+oldend+1)/2;
|
|
549
|
+
if (nextbegin >= oldend) nextbegin = std::numeric_limits<size_t>::max()-1;
|
|
550
|
+
|
|
551
|
+
while (!begin.compare_exchange_weak (oldbegin, nextbegin))
|
|
552
|
+
// std::memory_order_relaxed, std::memory_order_relaxed))
|
|
553
|
+
{
|
|
554
|
+
oldend = end; // .load(std::memory_order_acquire);
|
|
555
|
+
if (oldbegin >= oldend) return false;
|
|
556
|
+
|
|
557
|
+
nextbegin = (oldbegin+oldend+1)/2;
|
|
558
|
+
if (nextbegin >= oldend) nextbegin = std::numeric_limits<size_t>::max()-1;
|
|
559
|
+
}
|
|
560
|
+
|
|
561
|
+
r = IntRange(oldbegin, (oldbegin+oldend+1)/2);
|
|
562
|
+
return true;
|
|
563
|
+
}
|
|
564
|
+
};
|
|
565
|
+
|
|
566
|
+
|
|
567
|
+
|
|
568
|
+
|
|
569
|
+
class SharedLoop2
|
|
570
|
+
{
|
|
571
|
+
Array<AtomicRange> ranges;
|
|
572
|
+
atomic<size_t> processed;
|
|
573
|
+
atomic<size_t> total;
|
|
574
|
+
atomic<int> participants;
|
|
575
|
+
|
|
576
|
+
class SharedIterator
|
|
577
|
+
{
|
|
578
|
+
FlatArray<AtomicRange> ranges;
|
|
579
|
+
atomic<size_t> & processed;
|
|
580
|
+
size_t total;
|
|
581
|
+
size_t myval;
|
|
582
|
+
size_t processed_by_me = 0;
|
|
583
|
+
int me;
|
|
584
|
+
int steal_from;
|
|
585
|
+
public:
|
|
586
|
+
SharedIterator (FlatArray<AtomicRange> _ranges, atomic<size_t> & _processed, size_t _total,
|
|
587
|
+
int _me, bool begin_it)
|
|
588
|
+
: ranges(_ranges), processed(_processed), total(_total)
|
|
589
|
+
{
|
|
590
|
+
if (begin_it)
|
|
591
|
+
{
|
|
592
|
+
// me = TaskManager::GetThreadId();
|
|
593
|
+
me = _me;
|
|
594
|
+
steal_from = me;
|
|
595
|
+
GetNext();
|
|
596
|
+
}
|
|
597
|
+
}
|
|
598
|
+
~SharedIterator()
|
|
599
|
+
{
|
|
600
|
+
if (processed_by_me)
|
|
601
|
+
processed += processed_by_me;
|
|
602
|
+
}
|
|
603
|
+
|
|
604
|
+
SharedIterator & operator++ () { GetNext(); return *this;}
|
|
605
|
+
|
|
606
|
+
void GetNext()
|
|
607
|
+
{
|
|
608
|
+
size_t nr;
|
|
609
|
+
if (ranges[me].PopFirst(nr))
|
|
610
|
+
{
|
|
611
|
+
processed_by_me++;
|
|
612
|
+
myval = nr;
|
|
613
|
+
return;
|
|
614
|
+
}
|
|
615
|
+
GetNext2();
|
|
616
|
+
}
|
|
617
|
+
|
|
618
|
+
void GetNext2()
|
|
619
|
+
{
|
|
620
|
+
processed += processed_by_me;
|
|
621
|
+
processed_by_me = 0;
|
|
622
|
+
|
|
623
|
+
// done with my work, going to steal ...
|
|
624
|
+
while (1)
|
|
625
|
+
{
|
|
626
|
+
if (processed >= total) return;
|
|
627
|
+
|
|
628
|
+
steal_from++;
|
|
629
|
+
if (steal_from == ranges.Size()) steal_from = 0;
|
|
630
|
+
|
|
631
|
+
// steal half of the work reserved for 'from':
|
|
632
|
+
IntRange steal;
|
|
633
|
+
if (ranges[steal_from].PopHalf(steal))
|
|
634
|
+
{
|
|
635
|
+
myval = steal.First();
|
|
636
|
+
processed_by_me++;
|
|
637
|
+
if (myval+1 < steal.Next())
|
|
638
|
+
ranges[me].Set (IntRange(myval+1, steal.Next()));
|
|
639
|
+
return;
|
|
640
|
+
}
|
|
641
|
+
}
|
|
642
|
+
}
|
|
643
|
+
|
|
644
|
+
size_t operator* () const { return myval; }
|
|
645
|
+
bool operator!= (const SharedIterator & it2) const { return processed < total; }
|
|
646
|
+
};
|
|
647
|
+
|
|
648
|
+
|
|
649
|
+
public:
|
|
650
|
+
SharedLoop2 ()
|
|
651
|
+
: ranges(TaskManager::GetNumThreads())
|
|
652
|
+
{ ; }
|
|
653
|
+
|
|
654
|
+
SharedLoop2 (IntRange r)
|
|
655
|
+
: ranges(TaskManager::GetNumThreads())
|
|
656
|
+
{
|
|
657
|
+
Reset (r);
|
|
658
|
+
}
|
|
659
|
+
|
|
660
|
+
SharedLoop2 (size_t s) : SharedLoop2 (IntRange{s}) { }
|
|
661
|
+
|
|
662
|
+
void Reset (IntRange r)
|
|
663
|
+
{
|
|
664
|
+
for (size_t i = 0; i < ranges.Size(); i++)
|
|
665
|
+
ranges[i].SetNoLock (r.Split(i,ranges.Size()));
|
|
666
|
+
|
|
667
|
+
total.store(r.Size(), std::memory_order_relaxed);
|
|
668
|
+
participants.store(0, std::memory_order_relaxed);
|
|
669
|
+
processed.store(0, std::memory_order_release);
|
|
670
|
+
}
|
|
671
|
+
|
|
672
|
+
void Reset (size_t s) { Reset(IntRange{s}); }
|
|
673
|
+
|
|
674
|
+
|
|
675
|
+
SharedIterator begin()
|
|
676
|
+
{
|
|
677
|
+
/*
|
|
678
|
+
int me = participants++;
|
|
679
|
+
if (me < ranges.Size())
|
|
680
|
+
return SharedIterator (ranges, processed, total, me, true);
|
|
681
|
+
else
|
|
682
|
+
// more participants than buckets. set processed to total, and the loop is terminated immediately
|
|
683
|
+
return SharedIterator (ranges, total, total, me, true);
|
|
684
|
+
*/
|
|
685
|
+
return SharedIterator (ranges, processed, total, TaskManager::GetThreadId(), true);
|
|
686
|
+
}
|
|
687
|
+
|
|
688
|
+
SharedIterator end() { return SharedIterator (ranges, processed, total, -1, false); }
|
|
689
|
+
};
|
|
690
|
+
|
|
691
|
+
|
|
692
|
+
|
|
693
|
+
|
|
694
|
+
|
|
695
|
+
class Partitioning
|
|
696
|
+
{
|
|
697
|
+
Array<size_t> part;
|
|
698
|
+
size_t total_costs;
|
|
699
|
+
public:
|
|
700
|
+
Partitioning () { ; }
|
|
701
|
+
|
|
702
|
+
template <typename T>
|
|
703
|
+
Partitioning (const Array<T> & apart) { part = apart; }
|
|
704
|
+
|
|
705
|
+
template <typename T>
|
|
706
|
+
Partitioning & operator= (const Array<T> & apart) { part = apart; return *this; }
|
|
707
|
+
|
|
708
|
+
size_t GetTotalCosts() const { return total_costs; }
|
|
709
|
+
|
|
710
|
+
template <typename TFUNC>
|
|
711
|
+
void Calc (size_t n, TFUNC costs, int size = task_manager ? task_manager->GetNumThreads() : 1)
|
|
712
|
+
{
|
|
713
|
+
Array<size_t> prefix (n);
|
|
714
|
+
|
|
715
|
+
/*
|
|
716
|
+
size_t sum = 0;
|
|
717
|
+
for (auto i : ngstd::Range(n))
|
|
718
|
+
{
|
|
719
|
+
sum += costs(i);
|
|
720
|
+
prefix[i] = sum;
|
|
721
|
+
}
|
|
722
|
+
total_costs = sum;
|
|
723
|
+
*/
|
|
724
|
+
|
|
725
|
+
Array<size_t> partial_sums(TaskManager::GetNumThreads()+1);
|
|
726
|
+
partial_sums[0] = 0;
|
|
727
|
+
ParallelJob
|
|
728
|
+
([&] (TaskInfo ti)
|
|
729
|
+
{
|
|
730
|
+
IntRange r = IntRange(n).Split(ti.task_nr, ti.ntasks);
|
|
731
|
+
size_t mysum = 0;
|
|
732
|
+
for (size_t i : r)
|
|
733
|
+
{
|
|
734
|
+
size_t c = costs(i);
|
|
735
|
+
mysum += c;
|
|
736
|
+
prefix[i] = c;
|
|
737
|
+
}
|
|
738
|
+
partial_sums[ti.task_nr+1] = mysum;
|
|
739
|
+
});
|
|
740
|
+
|
|
741
|
+
for (size_t i = 1; i < partial_sums.Size(); i++)
|
|
742
|
+
partial_sums[i] += partial_sums[i-1];
|
|
743
|
+
total_costs = partial_sums.Last();
|
|
744
|
+
|
|
745
|
+
ParallelJob
|
|
746
|
+
([&] (TaskInfo ti)
|
|
747
|
+
{
|
|
748
|
+
IntRange r = IntRange(n).Split(ti.task_nr, ti.ntasks);
|
|
749
|
+
size_t mysum = partial_sums[ti.task_nr];
|
|
750
|
+
for (size_t i : r)
|
|
751
|
+
{
|
|
752
|
+
mysum += prefix[i];
|
|
753
|
+
prefix[i] = mysum;
|
|
754
|
+
}
|
|
755
|
+
});
|
|
756
|
+
|
|
757
|
+
|
|
758
|
+
part.SetSize (size+1);
|
|
759
|
+
part[0] = 0;
|
|
760
|
+
|
|
761
|
+
for (int i = 1; i <= size; i++)
|
|
762
|
+
part[i] = BinSearch (prefix, total_costs*i/size);
|
|
763
|
+
}
|
|
764
|
+
|
|
765
|
+
size_t Size() const { return part.Size()-1; }
|
|
766
|
+
IntRange operator[] (size_t i) const { return ngcore::Range(part[i], part[i+1]); }
|
|
767
|
+
IntRange Range() const { return ngcore::Range(part[0], part[Size()]); }
|
|
768
|
+
|
|
769
|
+
|
|
770
|
+
|
|
771
|
+
|
|
772
|
+
private:
|
|
773
|
+
template <typename Tarray>
|
|
774
|
+
int BinSearch(const Tarray & v, size_t i) {
|
|
775
|
+
int n = v.Size();
|
|
776
|
+
if (n == 0) return 0;
|
|
777
|
+
|
|
778
|
+
int first = 0;
|
|
779
|
+
int last = n-1;
|
|
780
|
+
if(v[0]>i) return 0;
|
|
781
|
+
if(v[n-1] <= i) return n;
|
|
782
|
+
while(last-first>1) {
|
|
783
|
+
int m = (first+last)/2;
|
|
784
|
+
if(v[m]<i)
|
|
785
|
+
first = m;
|
|
786
|
+
else
|
|
787
|
+
last = m;
|
|
788
|
+
}
|
|
789
|
+
return first;
|
|
790
|
+
}
|
|
791
|
+
};
|
|
792
|
+
|
|
793
|
+
|
|
794
|
+
inline std::ostream & operator<< (std::ostream & ost, const Partitioning & part)
|
|
795
|
+
{
|
|
796
|
+
for (int i : Range(part.Size()))
|
|
797
|
+
ost << part[i] << " ";
|
|
798
|
+
return ost;
|
|
799
|
+
}
|
|
800
|
+
|
|
801
|
+
|
|
802
|
+
// tasks must be a multiple of part.size
|
|
803
|
+
template <typename TFUNC>
|
|
804
|
+
NETGEN_INLINE void ParallelFor (const Partitioning & part, TFUNC f, int tasks_per_thread = 1)
|
|
805
|
+
{
|
|
806
|
+
if (task_manager)
|
|
807
|
+
{
|
|
808
|
+
int ntasks = tasks_per_thread * task_manager->GetNumThreads();
|
|
809
|
+
if (ntasks % part.Size() != 0)
|
|
810
|
+
throw Exception ("tasks must be a multiple of part.size");
|
|
811
|
+
|
|
812
|
+
task_manager -> CreateJob
|
|
813
|
+
([&] (TaskInfo & ti)
|
|
814
|
+
{
|
|
815
|
+
int tasks_per_part = ti.ntasks / part.Size();
|
|
816
|
+
int mypart = ti.task_nr / tasks_per_part;
|
|
817
|
+
int num_in_part = ti.task_nr % tasks_per_part;
|
|
818
|
+
|
|
819
|
+
auto myrange = part[mypart].Split (num_in_part, tasks_per_part);
|
|
820
|
+
for (auto i : myrange) f(i);
|
|
821
|
+
}, ntasks);
|
|
822
|
+
}
|
|
823
|
+
else
|
|
824
|
+
{
|
|
825
|
+
for (auto i : part.Range())
|
|
826
|
+
f(i);
|
|
827
|
+
}
|
|
828
|
+
}
|
|
829
|
+
|
|
830
|
+
|
|
831
|
+
|
|
832
|
+
|
|
833
|
+
|
|
834
|
+
template <typename TFUNC>
|
|
835
|
+
NETGEN_INLINE void ParallelForRange (const Partitioning & part, TFUNC f,
|
|
836
|
+
int tasks_per_thread = 1, TotalCosts costs = 1000)
|
|
837
|
+
{
|
|
838
|
+
if (task_manager && costs() >= 1000)
|
|
839
|
+
{
|
|
840
|
+
int ntasks = tasks_per_thread * task_manager->GetNumThreads();
|
|
841
|
+
if (ntasks % part.Size() != 0)
|
|
842
|
+
throw Exception ("tasks must be a multiple of part.size");
|
|
843
|
+
|
|
844
|
+
task_manager -> CreateJob
|
|
845
|
+
([&] (TaskInfo & ti)
|
|
846
|
+
{
|
|
847
|
+
int tasks_per_part = ti.ntasks / part.Size();
|
|
848
|
+
int mypart = ti.task_nr / tasks_per_part;
|
|
849
|
+
int num_in_part = ti.task_nr % tasks_per_part;
|
|
850
|
+
|
|
851
|
+
auto myrange = part[mypart].Split (num_in_part, tasks_per_part);
|
|
852
|
+
f(myrange);
|
|
853
|
+
}, ntasks);
|
|
854
|
+
}
|
|
855
|
+
else
|
|
856
|
+
{
|
|
857
|
+
f(part.Range());
|
|
858
|
+
}
|
|
859
|
+
}
|
|
860
|
+
|
|
861
|
+
|
|
862
|
+
|
|
863
|
+
|
|
864
|
+
|
|
865
|
+
template <typename FUNC, typename OP, typename T>
|
|
866
|
+
auto ParallelReduce (size_t n, FUNC f, OP op, T initial1)
|
|
867
|
+
{
|
|
868
|
+
typedef decltype (op(initial1,initial1)) TRES;
|
|
869
|
+
TRES initial(initial1);
|
|
870
|
+
/*
|
|
871
|
+
for (size_t i = 0; i < n; i++)
|
|
872
|
+
initial = op(initial, f(i));
|
|
873
|
+
*/
|
|
874
|
+
Array<TRES> part_reduce(TaskManager::GetNumThreads());
|
|
875
|
+
ParallelJob ([&] (TaskInfo ti)
|
|
876
|
+
{
|
|
877
|
+
auto r = Range(n).Split(ti.task_nr, ti.ntasks);
|
|
878
|
+
auto var = initial;
|
|
879
|
+
for (auto i : r)
|
|
880
|
+
var = op(var, f(i));
|
|
881
|
+
part_reduce[ti.task_nr] = var;
|
|
882
|
+
});
|
|
883
|
+
for (auto v : part_reduce)
|
|
884
|
+
initial = op(initial, v);
|
|
885
|
+
return initial;
|
|
886
|
+
}
|
|
887
|
+
|
|
888
|
+
|
|
889
|
+
|
|
890
|
+
|
|
891
|
+
|
|
892
|
+
|
|
893
|
+
|
|
894
|
+
// // some suggar for working with arrays
|
|
895
|
+
//
|
|
896
|
+
// template <typename T> template <typename T2>
|
|
897
|
+
// const FlatArray<T> FlatArray<T>::operator= (ParallelValue<T2> val)
|
|
898
|
+
// {
|
|
899
|
+
// ParallelForRange (Size(),
|
|
900
|
+
// [this, val] (IntRange r)
|
|
901
|
+
// {
|
|
902
|
+
// for (auto i : r)
|
|
903
|
+
// (*this)[i] = val;
|
|
904
|
+
// });
|
|
905
|
+
// return *this;
|
|
906
|
+
// }
|
|
907
|
+
//
|
|
908
|
+
// template <typename T> template <typename T2>
|
|
909
|
+
// const FlatArray<T> FlatArray<T>::operator= (ParallelFunction<T2> func)
|
|
910
|
+
// {
|
|
911
|
+
// ParallelForRange (Size(),
|
|
912
|
+
// [this, func] (IntRange r)
|
|
913
|
+
// {
|
|
914
|
+
// for (auto i : r)
|
|
915
|
+
// (*this)[i] = func(i);
|
|
916
|
+
// });
|
|
917
|
+
// return *this;
|
|
918
|
+
// }
|
|
919
|
+
|
|
920
|
+
class Tasks
|
|
921
|
+
{
|
|
922
|
+
size_t num;
|
|
923
|
+
public:
|
|
924
|
+
explicit Tasks (size_t _num = TaskManager::GetNumThreads()) : num(_num) { ; }
|
|
925
|
+
auto GetNum() const { return num; }
|
|
926
|
+
};
|
|
927
|
+
|
|
928
|
+
|
|
929
|
+
/*
|
|
930
|
+
// some idea, not yet supported
|
|
931
|
+
|
|
932
|
+
using namespace std;
|
|
933
|
+
template <typename T>
|
|
934
|
+
class ParallelValue
|
|
935
|
+
{
|
|
936
|
+
T val;
|
|
937
|
+
public:
|
|
938
|
+
ParallelValue (const T & _val) : val(_val) { ; }
|
|
939
|
+
operator T () const { return val; }
|
|
940
|
+
};
|
|
941
|
+
|
|
942
|
+
template <typename FUNC> class ParallelFunction
|
|
943
|
+
{
|
|
944
|
+
FUNC f;
|
|
945
|
+
public:
|
|
946
|
+
ParallelFunction (const FUNC & _f) : f(_f) { ; }
|
|
947
|
+
operator FUNC () const { return f; }
|
|
948
|
+
auto operator() (size_t i) const { return f(i); }
|
|
949
|
+
};
|
|
950
|
+
*/
|
|
951
|
+
|
|
952
|
+
/* currently not used, plus causing problems on MSVC 2017
|
|
953
|
+
template <typename T, typename std::enable_if<ngstd::has_call_operator<T>::value, int>::type = 0>
|
|
954
|
+
inline ParallelFunction<T> operator| (const T & func, Tasks tasks)
|
|
955
|
+
{
|
|
956
|
+
return func;
|
|
957
|
+
}
|
|
958
|
+
|
|
959
|
+
template <typename T, typename std::enable_if<!ngstd::has_call_operator<T>::value, int>::type = 0>
|
|
960
|
+
inline ParallelValue<T> operator| (const T & obj, Tasks tasks)
|
|
961
|
+
{
|
|
962
|
+
return obj;
|
|
963
|
+
}
|
|
964
|
+
|
|
965
|
+
inline Tasks operator "" _tasks_per_thread (unsigned long long n)
|
|
966
|
+
{
|
|
967
|
+
return Tasks(n * TaskManager::GetNumThreads());
|
|
968
|
+
}
|
|
969
|
+
*/
|
|
970
|
+
|
|
971
|
+
/*
|
|
972
|
+
thought to be used as: array = 1 | tasks
|
|
973
|
+
class DefaultTasks
|
|
974
|
+
{
|
|
975
|
+
public:
|
|
976
|
+
operator Tasks () const { return TaskManager::GetNumThreads(); }
|
|
977
|
+
};
|
|
978
|
+
static DefaultTasks tasks;
|
|
979
|
+
*/
|
|
980
|
+
|
|
981
|
+
|
|
982
|
+
|
|
983
|
+
|
|
984
|
+
|
|
985
|
+
|
|
986
|
+
|
|
987
|
+
#ifdef USE_NUMA
|
|
988
|
+
|
|
989
|
+
template <typename T>
|
|
990
|
+
class NumaInterleavedArray : public Array<T>
|
|
991
|
+
{
|
|
992
|
+
T * numa_ptr;
|
|
993
|
+
size_t numa_size;
|
|
994
|
+
public:
|
|
995
|
+
NumaInterleavedArray () { numa_size = 0; numa_ptr = nullptr; }
|
|
996
|
+
NumaInterleavedArray (size_t s)
|
|
997
|
+
: Array<T> (s, (T*)numa_alloc_interleaved(s*sizeof(T)))
|
|
998
|
+
{
|
|
999
|
+
numa_ptr = this->data;
|
|
1000
|
+
numa_size = s;
|
|
1001
|
+
}
|
|
1002
|
+
|
|
1003
|
+
~NumaInterleavedArray ()
|
|
1004
|
+
{
|
|
1005
|
+
numa_free (numa_ptr, numa_size*sizeof(T));
|
|
1006
|
+
}
|
|
1007
|
+
|
|
1008
|
+
NumaInterleavedArray & operator= (T val)
|
|
1009
|
+
{
|
|
1010
|
+
Array<T>::operator= (val);
|
|
1011
|
+
return *this;
|
|
1012
|
+
}
|
|
1013
|
+
|
|
1014
|
+
NumaInterleavedArray & operator= (NumaInterleavedArray && a2)
|
|
1015
|
+
{
|
|
1016
|
+
Array<T>::operator= ((Array<T>&&)a2);
|
|
1017
|
+
ngcore::Swap (numa_ptr, a2.numa_ptr);
|
|
1018
|
+
ngcore::Swap (numa_size, a2.numa_size);
|
|
1019
|
+
return *this;
|
|
1020
|
+
}
|
|
1021
|
+
|
|
1022
|
+
void Swap (NumaInterleavedArray & b)
|
|
1023
|
+
{
|
|
1024
|
+
Array<T>::Swap(b);
|
|
1025
|
+
ngcore::Swap (numa_ptr, b.numa_ptr);
|
|
1026
|
+
ngcore::Swap (numa_size, b.numa_size);
|
|
1027
|
+
}
|
|
1028
|
+
|
|
1029
|
+
void SetSize (size_t size)
|
|
1030
|
+
{
|
|
1031
|
+
std::cerr << "************************* NumaDistArray::SetSize not overloaded" << std::endl;
|
|
1032
|
+
Array<T>::SetSize(size);
|
|
1033
|
+
}
|
|
1034
|
+
};
|
|
1035
|
+
|
|
1036
|
+
template <typename T>
|
|
1037
|
+
class NumaDistributedArray : public Array<T>
|
|
1038
|
+
{
|
|
1039
|
+
T * numa_ptr;
|
|
1040
|
+
size_t numa_size;
|
|
1041
|
+
public:
|
|
1042
|
+
NumaDistributedArray () { numa_size = 0; numa_ptr = nullptr; }
|
|
1043
|
+
NumaDistributedArray (size_t s)
|
|
1044
|
+
: Array<T> (s, (T*)numa_alloc_local(s*sizeof(T)))
|
|
1045
|
+
{
|
|
1046
|
+
numa_ptr = this->data;
|
|
1047
|
+
numa_size = s;
|
|
1048
|
+
|
|
1049
|
+
/* int avail = */ numa_available(); // initialize libnuma
|
|
1050
|
+
int num_nodes = numa_num_configured_nodes();
|
|
1051
|
+
size_t pagesize = numa_pagesize();
|
|
1052
|
+
|
|
1053
|
+
int npages = std::ceil ( double(s)*sizeof(T) / pagesize );
|
|
1054
|
+
|
|
1055
|
+
// cout << "size = " << numa_size << endl;
|
|
1056
|
+
// cout << "npages = " << npages << endl;
|
|
1057
|
+
|
|
1058
|
+
for (int i = 0; i < num_nodes; i++)
|
|
1059
|
+
{
|
|
1060
|
+
int beg = (i * npages) / num_nodes;
|
|
1061
|
+
int end = ( (i+1) * npages) / num_nodes;
|
|
1062
|
+
// cout << "node " << i << " : [" << beg << "-" << end << ")" << endl;
|
|
1063
|
+
numa_tonode_memory(numa_ptr+beg*pagesize/sizeof(T), (end-beg)*pagesize, i);
|
|
1064
|
+
}
|
|
1065
|
+
}
|
|
1066
|
+
|
|
1067
|
+
~NumaDistributedArray ()
|
|
1068
|
+
{
|
|
1069
|
+
numa_free (numa_ptr, numa_size*sizeof(T));
|
|
1070
|
+
}
|
|
1071
|
+
|
|
1072
|
+
NumaDistributedArray & operator= (NumaDistributedArray && a2)
|
|
1073
|
+
{
|
|
1074
|
+
Array<T>::operator= ((Array<T>&&)a2);
|
|
1075
|
+
ngcore::Swap (numa_ptr, a2.numa_ptr);
|
|
1076
|
+
ngcore::Swap (numa_size, a2.numa_size);
|
|
1077
|
+
return *this;
|
|
1078
|
+
}
|
|
1079
|
+
|
|
1080
|
+
void Swap (NumaDistributedArray & b)
|
|
1081
|
+
{
|
|
1082
|
+
Array<T>::Swap(b);
|
|
1083
|
+
ngcore::Swap (numa_ptr, b.numa_ptr);
|
|
1084
|
+
ngcore::Swap (numa_size, b.numa_size);
|
|
1085
|
+
}
|
|
1086
|
+
|
|
1087
|
+
void SetSize (size_t size)
|
|
1088
|
+
{
|
|
1089
|
+
std::cerr << "************************* NumaDistArray::SetSize not overloaded" << std::endl;
|
|
1090
|
+
Array<T>::SetSize(size);
|
|
1091
|
+
}
|
|
1092
|
+
};
|
|
1093
|
+
|
|
1094
|
+
|
|
1095
|
+
|
|
1096
|
+
template <typename T>
|
|
1097
|
+
class NumaLocalArray : public Array<T>
|
|
1098
|
+
{
|
|
1099
|
+
T * numa_ptr;
|
|
1100
|
+
size_t numa_size;
|
|
1101
|
+
public:
|
|
1102
|
+
NumaLocalArray () { numa_size = 0; numa_ptr = nullptr; }
|
|
1103
|
+
NumaLocalArray (size_t s)
|
|
1104
|
+
: Array<T> (s, (T*)numa_alloc_local(s*sizeof(T)))
|
|
1105
|
+
{
|
|
1106
|
+
numa_ptr = this->data;
|
|
1107
|
+
numa_size = s;
|
|
1108
|
+
}
|
|
1109
|
+
|
|
1110
|
+
~NumaLocalArray ()
|
|
1111
|
+
{
|
|
1112
|
+
numa_free (numa_ptr, numa_size*sizeof(T));
|
|
1113
|
+
}
|
|
1114
|
+
|
|
1115
|
+
NumaLocalArray & operator= (T val)
|
|
1116
|
+
{
|
|
1117
|
+
Array<T>::operator= (val);
|
|
1118
|
+
return *this;
|
|
1119
|
+
}
|
|
1120
|
+
|
|
1121
|
+
NumaLocalArray & operator= (NumaLocalArray && a2)
|
|
1122
|
+
{
|
|
1123
|
+
Array<T>::operator= ((Array<T>&&)a2);
|
|
1124
|
+
ngcore::Swap (numa_ptr, a2.numa_ptr);
|
|
1125
|
+
ngcore::Swap (numa_size, a2.numa_size);
|
|
1126
|
+
return *this;
|
|
1127
|
+
}
|
|
1128
|
+
|
|
1129
|
+
void Swap (NumaLocalArray & b)
|
|
1130
|
+
{
|
|
1131
|
+
Array<T>::Swap(b);
|
|
1132
|
+
ngcore::Swap (numa_ptr, b.numa_ptr);
|
|
1133
|
+
ngcore::Swap (numa_size, b.numa_size);
|
|
1134
|
+
}
|
|
1135
|
+
|
|
1136
|
+
void SetSize (size_t size)
|
|
1137
|
+
{
|
|
1138
|
+
std::cerr << "************************* NumaDistArray::SetSize not overloaded" << std::endl;
|
|
1139
|
+
Array<T>::SetSize(size);
|
|
1140
|
+
}
|
|
1141
|
+
};
|
|
1142
|
+
|
|
1143
|
+
|
|
1144
|
+
#else // USE_NUMA
|
|
1145
|
+
|
|
1146
|
+
template <typename T>
|
|
1147
|
+
using NumaDistributedArray = Array<T>;
|
|
1148
|
+
|
|
1149
|
+
template <typename T>
|
|
1150
|
+
using NumaInterleavedArray = Array<T>;
|
|
1151
|
+
|
|
1152
|
+
template <typename T>
|
|
1153
|
+
using NumaLocalArray = Array<T>;
|
|
1154
|
+
|
|
1155
|
+
#endif // USE_NUMA
|
|
1156
|
+
|
|
1157
|
+
}
|
|
1158
|
+
|
|
1159
|
+
|
|
1160
|
+
|
|
1161
|
+
#endif // NETGEN_CORE_TASKMANAGER_HPP
|