netgen-mesher 6.2.2506.post35.dev0__cp314-cp314-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- netgen/NgOCC.py +7 -0
- netgen/__init__.py +114 -0
- netgen/__init__.pyi +22 -0
- netgen/__main__.py +53 -0
- netgen/cmake/NetgenConfig.cmake +79 -0
- netgen/cmake/netgen-targets-release.cmake +69 -0
- netgen/cmake/netgen-targets.cmake +146 -0
- netgen/config/__init__.py +1 -0
- netgen/config/__init__.pyi +52 -0
- netgen/config/__main__.py +4 -0
- netgen/config/config.py +68 -0
- netgen/config/config.pyi +54 -0
- netgen/csg.py +25 -0
- netgen/geom2d.py +178 -0
- netgen/gui.py +82 -0
- netgen/include/core/archive.hpp +1256 -0
- netgen/include/core/array.hpp +1760 -0
- netgen/include/core/autodiff.hpp +1131 -0
- netgen/include/core/autodiffdiff.hpp +733 -0
- netgen/include/core/bitarray.hpp +240 -0
- netgen/include/core/concurrentqueue.h +3619 -0
- netgen/include/core/exception.hpp +145 -0
- netgen/include/core/flags.hpp +199 -0
- netgen/include/core/hashtable.hpp +1281 -0
- netgen/include/core/localheap.hpp +318 -0
- netgen/include/core/logging.hpp +117 -0
- netgen/include/core/memtracer.hpp +221 -0
- netgen/include/core/mpi4py_pycapi.h +245 -0
- netgen/include/core/mpi_wrapper.hpp +643 -0
- netgen/include/core/ng_mpi.hpp +94 -0
- netgen/include/core/ng_mpi_generated_declarations.hpp +155 -0
- netgen/include/core/ng_mpi_native.hpp +25 -0
- netgen/include/core/ngcore.hpp +32 -0
- netgen/include/core/ngcore_api.hpp +152 -0
- netgen/include/core/ngstream.hpp +115 -0
- netgen/include/core/paje_trace.hpp +279 -0
- netgen/include/core/profiler.hpp +382 -0
- netgen/include/core/python_ngcore.hpp +457 -0
- netgen/include/core/ranges.hpp +109 -0
- netgen/include/core/register_archive.hpp +100 -0
- netgen/include/core/signal.hpp +82 -0
- netgen/include/core/simd.hpp +160 -0
- netgen/include/core/simd_arm64.hpp +407 -0
- netgen/include/core/simd_avx.hpp +394 -0
- netgen/include/core/simd_avx512.hpp +285 -0
- netgen/include/core/simd_generic.hpp +1053 -0
- netgen/include/core/simd_math.hpp +178 -0
- netgen/include/core/simd_sse.hpp +289 -0
- netgen/include/core/statushandler.hpp +37 -0
- netgen/include/core/symboltable.hpp +153 -0
- netgen/include/core/table.hpp +810 -0
- netgen/include/core/taskmanager.hpp +1161 -0
- netgen/include/core/type_traits.hpp +65 -0
- netgen/include/core/utils.hpp +385 -0
- netgen/include/core/version.hpp +102 -0
- netgen/include/core/xbool.hpp +47 -0
- netgen/include/csg/algprim.hpp +563 -0
- netgen/include/csg/brick.hpp +150 -0
- netgen/include/csg/csg.hpp +43 -0
- netgen/include/csg/csgeom.hpp +389 -0
- netgen/include/csg/csgparser.hpp +101 -0
- netgen/include/csg/curve2d.hpp +67 -0
- netgen/include/csg/edgeflw.hpp +112 -0
- netgen/include/csg/explicitcurve2d.hpp +113 -0
- netgen/include/csg/extrusion.hpp +185 -0
- netgen/include/csg/gencyl.hpp +70 -0
- netgen/include/csg/geoml.hpp +16 -0
- netgen/include/csg/identify.hpp +213 -0
- netgen/include/csg/manifold.hpp +29 -0
- netgen/include/csg/meshsurf.hpp +46 -0
- netgen/include/csg/polyhedra.hpp +121 -0
- netgen/include/csg/revolution.hpp +180 -0
- netgen/include/csg/singularref.hpp +84 -0
- netgen/include/csg/solid.hpp +295 -0
- netgen/include/csg/specpoin.hpp +194 -0
- netgen/include/csg/spline3d.hpp +99 -0
- netgen/include/csg/splinesurface.hpp +85 -0
- netgen/include/csg/surface.hpp +394 -0
- netgen/include/csg/triapprox.hpp +63 -0
- netgen/include/csg/vscsg.hpp +34 -0
- netgen/include/general/autodiff.hpp +356 -0
- netgen/include/general/autoptr.hpp +39 -0
- netgen/include/general/gzstream.h +121 -0
- netgen/include/general/hashtabl.hpp +1692 -0
- netgen/include/general/myadt.hpp +48 -0
- netgen/include/general/mystring.hpp +226 -0
- netgen/include/general/netgenout.hpp +205 -0
- netgen/include/general/ngarray.hpp +797 -0
- netgen/include/general/ngbitarray.hpp +149 -0
- netgen/include/general/ngpython.hpp +74 -0
- netgen/include/general/optmem.hpp +44 -0
- netgen/include/general/parthreads.hpp +138 -0
- netgen/include/general/seti.hpp +50 -0
- netgen/include/general/sort.hpp +47 -0
- netgen/include/general/spbita2d.hpp +59 -0
- netgen/include/general/stack.hpp +114 -0
- netgen/include/general/table.hpp +280 -0
- netgen/include/general/template.hpp +509 -0
- netgen/include/geom2d/csg2d.hpp +750 -0
- netgen/include/geom2d/geometry2d.hpp +280 -0
- netgen/include/geom2d/spline2d.hpp +234 -0
- netgen/include/geom2d/vsgeom2d.hpp +28 -0
- netgen/include/gprim/adtree.hpp +1392 -0
- netgen/include/gprim/geom2d.hpp +858 -0
- netgen/include/gprim/geom3d.hpp +749 -0
- netgen/include/gprim/geomfuncs.hpp +212 -0
- netgen/include/gprim/geomobjects.hpp +544 -0
- netgen/include/gprim/geomops.hpp +404 -0
- netgen/include/gprim/geomtest3d.hpp +101 -0
- netgen/include/gprim/gprim.hpp +33 -0
- netgen/include/gprim/spline.hpp +778 -0
- netgen/include/gprim/splinegeometry.hpp +73 -0
- netgen/include/gprim/transform3d.hpp +216 -0
- netgen/include/include/acisgeom.hpp +3 -0
- netgen/include/include/csg.hpp +1 -0
- netgen/include/include/geometry2d.hpp +1 -0
- netgen/include/include/gprim.hpp +1 -0
- netgen/include/include/incopengl.hpp +62 -0
- netgen/include/include/inctcl.hpp +13 -0
- netgen/include/include/incvis.hpp +6 -0
- netgen/include/include/linalg.hpp +1 -0
- netgen/include/include/meshing.hpp +1 -0
- netgen/include/include/myadt.hpp +1 -0
- netgen/include/include/mydefs.hpp +70 -0
- netgen/include/include/mystdlib.h +59 -0
- netgen/include/include/netgen_config.hpp +27 -0
- netgen/include/include/netgen_version.hpp +9 -0
- netgen/include/include/nginterface_v2_impl.hpp +395 -0
- netgen/include/include/ngsimd.hpp +1 -0
- netgen/include/include/occgeom.hpp +1 -0
- netgen/include/include/opti.hpp +1 -0
- netgen/include/include/parallel.hpp +1 -0
- netgen/include/include/stlgeom.hpp +1 -0
- netgen/include/include/visual.hpp +1 -0
- netgen/include/interface/rw_medit.hpp +11 -0
- netgen/include/interface/writeuser.hpp +80 -0
- netgen/include/linalg/densemat.hpp +414 -0
- netgen/include/linalg/linalg.hpp +29 -0
- netgen/include/linalg/opti.hpp +142 -0
- netgen/include/linalg/polynomial.hpp +47 -0
- netgen/include/linalg/vector.hpp +217 -0
- netgen/include/meshing/adfront2.hpp +274 -0
- netgen/include/meshing/adfront3.hpp +332 -0
- netgen/include/meshing/basegeom.hpp +370 -0
- netgen/include/meshing/bcfunctions.hpp +53 -0
- netgen/include/meshing/bisect.hpp +72 -0
- netgen/include/meshing/boundarylayer.hpp +113 -0
- netgen/include/meshing/classifyhpel.hpp +1984 -0
- netgen/include/meshing/clusters.hpp +46 -0
- netgen/include/meshing/curvedelems.hpp +274 -0
- netgen/include/meshing/delaunay2d.hpp +73 -0
- netgen/include/meshing/fieldlines.hpp +103 -0
- netgen/include/meshing/findip.hpp +198 -0
- netgen/include/meshing/findip2.hpp +103 -0
- netgen/include/meshing/geomsearch.hpp +69 -0
- netgen/include/meshing/global.hpp +54 -0
- netgen/include/meshing/hpref_hex.hpp +330 -0
- netgen/include/meshing/hpref_prism.hpp +3405 -0
- netgen/include/meshing/hpref_pyramid.hpp +154 -0
- netgen/include/meshing/hpref_quad.hpp +2082 -0
- netgen/include/meshing/hpref_segm.hpp +122 -0
- netgen/include/meshing/hpref_tet.hpp +4230 -0
- netgen/include/meshing/hpref_trig.hpp +848 -0
- netgen/include/meshing/hprefinement.hpp +366 -0
- netgen/include/meshing/improve2.hpp +178 -0
- netgen/include/meshing/improve3.hpp +151 -0
- netgen/include/meshing/localh.hpp +223 -0
- netgen/include/meshing/meshclass.hpp +1076 -0
- netgen/include/meshing/meshfunc.hpp +47 -0
- netgen/include/meshing/meshing.hpp +63 -0
- netgen/include/meshing/meshing2.hpp +163 -0
- netgen/include/meshing/meshing3.hpp +123 -0
- netgen/include/meshing/meshtool.hpp +90 -0
- netgen/include/meshing/meshtype.hpp +1930 -0
- netgen/include/meshing/msghandler.hpp +62 -0
- netgen/include/meshing/paralleltop.hpp +172 -0
- netgen/include/meshing/python_mesh.hpp +206 -0
- netgen/include/meshing/ruler2.hpp +172 -0
- netgen/include/meshing/ruler3.hpp +211 -0
- netgen/include/meshing/soldata.hpp +141 -0
- netgen/include/meshing/specials.hpp +17 -0
- netgen/include/meshing/surfacegeom.hpp +73 -0
- netgen/include/meshing/topology.hpp +1003 -0
- netgen/include/meshing/validate.hpp +21 -0
- netgen/include/meshing/visual_interface.hpp +71 -0
- netgen/include/mydefs.hpp +70 -0
- netgen/include/nginterface.h +474 -0
- netgen/include/nginterface_v2.hpp +406 -0
- netgen/include/nglib.h +697 -0
- netgen/include/nglib_occ.h +50 -0
- netgen/include/occ/occ_edge.hpp +47 -0
- netgen/include/occ/occ_face.hpp +52 -0
- netgen/include/occ/occ_solid.hpp +23 -0
- netgen/include/occ/occ_utils.hpp +376 -0
- netgen/include/occ/occ_vertex.hpp +30 -0
- netgen/include/occ/occgeom.hpp +659 -0
- netgen/include/occ/occmeshsurf.hpp +168 -0
- netgen/include/occ/vsocc.hpp +33 -0
- netgen/include/pybind11/LICENSE +29 -0
- netgen/include/pybind11/attr.h +722 -0
- netgen/include/pybind11/buffer_info.h +208 -0
- netgen/include/pybind11/cast.h +2361 -0
- netgen/include/pybind11/chrono.h +228 -0
- netgen/include/pybind11/common.h +2 -0
- netgen/include/pybind11/complex.h +74 -0
- netgen/include/pybind11/conduit/README.txt +15 -0
- netgen/include/pybind11/conduit/pybind11_conduit_v1.h +116 -0
- netgen/include/pybind11/conduit/pybind11_platform_abi_id.h +87 -0
- netgen/include/pybind11/conduit/wrap_include_python_h.h +72 -0
- netgen/include/pybind11/critical_section.h +56 -0
- netgen/include/pybind11/detail/class.h +823 -0
- netgen/include/pybind11/detail/common.h +1348 -0
- netgen/include/pybind11/detail/cpp_conduit.h +75 -0
- netgen/include/pybind11/detail/descr.h +226 -0
- netgen/include/pybind11/detail/dynamic_raw_ptr_cast_if_possible.h +39 -0
- netgen/include/pybind11/detail/exception_translation.h +71 -0
- netgen/include/pybind11/detail/function_record_pyobject.h +191 -0
- netgen/include/pybind11/detail/init.h +538 -0
- netgen/include/pybind11/detail/internals.h +799 -0
- netgen/include/pybind11/detail/native_enum_data.h +209 -0
- netgen/include/pybind11/detail/pybind11_namespace_macros.h +82 -0
- netgen/include/pybind11/detail/struct_smart_holder.h +378 -0
- netgen/include/pybind11/detail/type_caster_base.h +1591 -0
- netgen/include/pybind11/detail/typeid.h +65 -0
- netgen/include/pybind11/detail/using_smart_holder.h +22 -0
- netgen/include/pybind11/detail/value_and_holder.h +90 -0
- netgen/include/pybind11/eigen/common.h +9 -0
- netgen/include/pybind11/eigen/matrix.h +723 -0
- netgen/include/pybind11/eigen/tensor.h +521 -0
- netgen/include/pybind11/eigen.h +12 -0
- netgen/include/pybind11/embed.h +320 -0
- netgen/include/pybind11/eval.h +161 -0
- netgen/include/pybind11/functional.h +147 -0
- netgen/include/pybind11/gil.h +199 -0
- netgen/include/pybind11/gil_safe_call_once.h +102 -0
- netgen/include/pybind11/gil_simple.h +37 -0
- netgen/include/pybind11/iostream.h +265 -0
- netgen/include/pybind11/native_enum.h +67 -0
- netgen/include/pybind11/numpy.h +2312 -0
- netgen/include/pybind11/operators.h +202 -0
- netgen/include/pybind11/options.h +92 -0
- netgen/include/pybind11/pybind11.h +3645 -0
- netgen/include/pybind11/pytypes.h +2680 -0
- netgen/include/pybind11/stl/filesystem.h +114 -0
- netgen/include/pybind11/stl.h +666 -0
- netgen/include/pybind11/stl_bind.h +858 -0
- netgen/include/pybind11/subinterpreter.h +299 -0
- netgen/include/pybind11/trampoline_self_life_support.h +65 -0
- netgen/include/pybind11/type_caster_pyobject_ptr.h +61 -0
- netgen/include/pybind11/typing.h +298 -0
- netgen/include/pybind11/warnings.h +75 -0
- netgen/include/stlgeom/meshstlsurface.hpp +67 -0
- netgen/include/stlgeom/stlgeom.hpp +491 -0
- netgen/include/stlgeom/stlline.hpp +193 -0
- netgen/include/stlgeom/stltool.hpp +331 -0
- netgen/include/stlgeom/stltopology.hpp +419 -0
- netgen/include/stlgeom/vsstl.hpp +58 -0
- netgen/include/visualization/meshdoc.hpp +42 -0
- netgen/include/visualization/mvdraw.hpp +325 -0
- netgen/include/visualization/vispar.hpp +128 -0
- netgen/include/visualization/visual.hpp +28 -0
- netgen/include/visualization/visual_api.hpp +10 -0
- netgen/include/visualization/vssolution.hpp +399 -0
- netgen/lib/libnggui.lib +0 -0
- netgen/lib/ngcore.lib +0 -0
- netgen/lib/nglib.lib +0 -0
- netgen/lib/togl.lib +0 -0
- netgen/libnggui.dll +0 -0
- netgen/libngguipy.lib +0 -0
- netgen/libngguipy.pyd +0 -0
- netgen/libngpy/_NgOCC.pyi +1545 -0
- netgen/libngpy/__init__.pyi +7 -0
- netgen/libngpy/_csg.pyi +259 -0
- netgen/libngpy/_geom2d.pyi +323 -0
- netgen/libngpy/_meshing.pyi +1111 -0
- netgen/libngpy/_stl.pyi +131 -0
- netgen/libngpy.lib +0 -0
- netgen/libngpy.pyd +0 -0
- netgen/meshing.py +65 -0
- netgen/ngcore.dll +0 -0
- netgen/nglib.dll +0 -0
- netgen/occ.py +52 -0
- netgen/read_gmsh.py +259 -0
- netgen/read_meshio.py +22 -0
- netgen/stl.py +2 -0
- netgen/togl.dll +0 -0
- netgen/version.py +2 -0
- netgen/webgui.py +529 -0
- netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/boundarycondition.geo +16 -0
- netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/boxcyl.geo +32 -0
- netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/circle_on_cube.geo +27 -0
- netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/cone.geo +13 -0
- netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/cube.geo +16 -0
- netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/cubeandring.geo +55 -0
- netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/cubeandspheres.geo +21 -0
- netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/cubemcyl.geo +18 -0
- netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/cubemsphere.geo +19 -0
- netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/cylinder.geo +12 -0
- netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/cylsphere.geo +12 -0
- netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/doc/ng4.pdf +0 -0
- netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/ellipsoid.geo +8 -0
- netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/ellipticcyl.geo +10 -0
- netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/extrusion.geo +99 -0
- netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/fichera.geo +24 -0
- netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/frame.step +11683 -0
- netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/hinge.stl +8486 -0
- netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/lshape3d.geo +26 -0
- netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/manyholes.geo +26 -0
- netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/manyholes2.geo +26 -0
- netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/matrix.geo +27 -0
- netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/ortho.geo +11 -0
- netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/part1.stl +2662 -0
- netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/period.geo +33 -0
- netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/py_tutorials/exportNeutral.py +26 -0
- netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/py_tutorials/mesh.py +19 -0
- netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/py_tutorials/shaft.geo +65 -0
- netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/revolution.geo +18 -0
- netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/screw.step +1694 -0
- netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/sculpture.geo +13 -0
- netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/shaft.geo +65 -0
- netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/shell.geo +10 -0
- netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/sphere.geo +8 -0
- netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/sphereincube.geo +17 -0
- netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/square.in2d +35 -0
- netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/squarecircle.in2d +48 -0
- netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/squarehole.in2d +47 -0
- netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/torus.geo +8 -0
- netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/trafo.geo +57 -0
- netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/twobricks.geo +15 -0
- netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/twocubes.geo +18 -0
- netgen_mesher-6.2.2506.post35.dev0.data/data/share/netgen/twocyl.geo +16 -0
- netgen_mesher-6.2.2506.post35.dev0.dist-info/METADATA +15 -0
- netgen_mesher-6.2.2506.post35.dev0.dist-info/RECORD +340 -0
- netgen_mesher-6.2.2506.post35.dev0.dist-info/WHEEL +5 -0
- netgen_mesher-6.2.2506.post35.dev0.dist-info/entry_points.txt +2 -0
- netgen_mesher-6.2.2506.post35.dev0.dist-info/licenses/AUTHORS +1 -0
- netgen_mesher-6.2.2506.post35.dev0.dist-info/licenses/LICENSE +504 -0
- netgen_mesher-6.2.2506.post35.dev0.dist-info/top_level.txt +2 -0
- pyngcore/__init__.py +1 -0
- pyngcore/pyngcore.cp314-win_amd64.pyd +0 -0
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
#ifndef NETGEN_CORE_SIMD_MATH_HPP
|
|
2
|
+
#define NETGEN_CORE_SIMD_MATH_HPP
|
|
3
|
+
|
|
4
|
+
#include <tuple>
|
|
5
|
+
|
|
6
|
+
#ifndef M_PI
|
|
7
|
+
#define M_PI 3.14159265358979323846
|
|
8
|
+
#endif
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
namespace ngcore
|
|
12
|
+
{
|
|
13
|
+
|
|
14
|
+
/*
|
|
15
|
+
based on:
|
|
16
|
+
Stephen L. Moshier: Methods and Programs For Mathematical Functions
|
|
17
|
+
https://www.moshier.net/methprog.pdf
|
|
18
|
+
|
|
19
|
+
CEPHES MATHEMATICAL FUNCTION LIBRARY
|
|
20
|
+
https://www.netlib.org/cephes/
|
|
21
|
+
*/
|
|
22
|
+
|
|
23
|
+
static constexpr double sincof[] = {
|
|
24
|
+
1.58962301576546568060E-10,
|
|
25
|
+
-2.50507477628578072866E-8,
|
|
26
|
+
2.75573136213857245213E-6,
|
|
27
|
+
-1.98412698295895385996E-4,
|
|
28
|
+
8.33333333332211858878E-3,
|
|
29
|
+
-1.66666666666666307295E-1,
|
|
30
|
+
};
|
|
31
|
+
|
|
32
|
+
static constexpr double coscof[6] = {
|
|
33
|
+
-1.13585365213876817300E-11,
|
|
34
|
+
2.08757008419747316778E-9,
|
|
35
|
+
-2.75573141792967388112E-7,
|
|
36
|
+
2.48015872888517045348E-5,
|
|
37
|
+
-1.38888888888730564116E-3,
|
|
38
|
+
4.16666666666665929218E-2,
|
|
39
|
+
};
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
// highly accurate on [-pi/4, pi/4]
|
|
43
|
+
template <int N>
|
|
44
|
+
auto sincos_reduced (SIMD<double,N> x)
|
|
45
|
+
{
|
|
46
|
+
auto x2 = x*x;
|
|
47
|
+
|
|
48
|
+
auto s = ((((( sincof[0]*x2 + sincof[1]) * x2 + sincof[2]) * x2 + sincof[3]) * x2 + sincof[4]) * x2 + sincof[5]);
|
|
49
|
+
s = x + x*x*x * s;
|
|
50
|
+
|
|
51
|
+
auto c = ((((( coscof[0]*x2 + coscof[1]) * x2 + coscof[2]) * x2 + coscof[3]) * x2 + coscof[4]) * x2 + coscof[5]);
|
|
52
|
+
c = 1.0 - 0.5*x2 + x2*x2*c;
|
|
53
|
+
|
|
54
|
+
return std::tuple{ s, c };
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
template <int N>
|
|
58
|
+
auto sincos (SIMD<double,N> x)
|
|
59
|
+
{
|
|
60
|
+
auto y = round((2/M_PI) * x);
|
|
61
|
+
auto q = lround(y);
|
|
62
|
+
|
|
63
|
+
auto [s1,c1] = sincos_reduced(x - y * (M_PI/2));
|
|
64
|
+
|
|
65
|
+
auto s2 = If((q & SIMD<int64_t,N>(1)) == SIMD<int64_t,N>(0), s1, c1);
|
|
66
|
+
auto s = If((q & SIMD<int64_t,N>(2)) == SIMD<int64_t,N>(0), s2, -s2);
|
|
67
|
+
|
|
68
|
+
auto c2 = If((q & SIMD<int64_t,N>(1)) == SIMD<int64_t,N>(0), c1, -s1);
|
|
69
|
+
auto c = If((q & SIMD<int64_t,N>(2)) == SIMD<int64_t,N>(0), c2, -c2);
|
|
70
|
+
|
|
71
|
+
return std::tuple{ s, c };
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
template <int N>
|
|
81
|
+
SIMD<double,N> exp_reduced (SIMD<double,N> x)
|
|
82
|
+
{
|
|
83
|
+
static constexpr double P[] = {
|
|
84
|
+
1.26177193074810590878E-4,
|
|
85
|
+
3.02994407707441961300E-2,
|
|
86
|
+
9.99999999999999999910E-1,
|
|
87
|
+
};
|
|
88
|
+
|
|
89
|
+
static constexpr double Q[] = {
|
|
90
|
+
3.00198505138664455042E-6,
|
|
91
|
+
2.52448340349684104192E-3,
|
|
92
|
+
2.27265548208155028766E-1,
|
|
93
|
+
2.00000000000000000009E0,
|
|
94
|
+
};
|
|
95
|
+
|
|
96
|
+
/*
|
|
97
|
+
// from: https://www.netlib.org/cephes/
|
|
98
|
+
rational approximation for exponential
|
|
99
|
+
* of the fractional part:
|
|
100
|
+
* e**x = 1 + 2x P(x**2)/( Q(x**2) - x P(x**2) )
|
|
101
|
+
|
|
102
|
+
xx = x * x;
|
|
103
|
+
px = x * polevl( xx, P, 2 );
|
|
104
|
+
x = px/( polevl( xx, Q, 3 ) - px );
|
|
105
|
+
x = 1.0 + 2.0 * x;
|
|
106
|
+
*/
|
|
107
|
+
|
|
108
|
+
auto xx = x*x;
|
|
109
|
+
auto px = (P[0]*xx + P[1]) * xx + P[2];
|
|
110
|
+
auto qx = ((Q[0]*xx+Q[1])*xx+Q[2])*xx+Q[3];
|
|
111
|
+
return 1.0 + 2.0*x * px / (qx- x * px);
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
template <int N>
|
|
116
|
+
SIMD<double,N> pow2_int64_to_float64(SIMD<int64_t,N> n)
|
|
117
|
+
{
|
|
118
|
+
// thx to deepseek
|
|
119
|
+
|
|
120
|
+
// Step 1: Clamp the input to valid exponent range [-1022, 1023]
|
|
121
|
+
// (We use saturated operations to handle out-of-range values)
|
|
122
|
+
SIMD<int64_t,N> max_exp(1023);
|
|
123
|
+
SIMD<int64_t,N> min_exp(-1022);
|
|
124
|
+
n = If(n > max_exp, max_exp, n);
|
|
125
|
+
n = If(min_exp > n, min_exp, n);
|
|
126
|
+
|
|
127
|
+
// Step 2: Add exponent bias (1023)
|
|
128
|
+
n = n + SIMD<int64_t,N>(1023);
|
|
129
|
+
|
|
130
|
+
// Step 3: Shift to exponent bit position (bit 52)
|
|
131
|
+
auto shifted_exp = (n << IC<52>());
|
|
132
|
+
|
|
133
|
+
// Step 4: Reinterpret as double
|
|
134
|
+
return Reinterpret<double> (shifted_exp);
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
template <int N>
|
|
139
|
+
SIMD<double,N> myexp (SIMD<double,N> x)
|
|
140
|
+
{
|
|
141
|
+
constexpr double log2 = 0.693147180559945286; // log(2.0);
|
|
142
|
+
|
|
143
|
+
auto r = round(1/log2 * x);
|
|
144
|
+
auto rI = lround(r);
|
|
145
|
+
r *= log2;
|
|
146
|
+
|
|
147
|
+
SIMD<double,N> pow2 = pow2_int64_to_float64 (rI);
|
|
148
|
+
return exp_reduced(x-r) * pow2;
|
|
149
|
+
|
|
150
|
+
// maybe better:
|
|
151
|
+
// x = ldexp( x, n );
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
/*
|
|
155
|
+
inline auto Test1 (SIMD<double> x)
|
|
156
|
+
{
|
|
157
|
+
return myexp(x);
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
inline auto Test2 (SIMD<double> x)
|
|
161
|
+
{
|
|
162
|
+
return sincos(x);
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
inline auto Test3 (SIMD<double,4> x)
|
|
166
|
+
{
|
|
167
|
+
return myexp(x);
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
inline auto Test4 (SIMD<double,4> x)
|
|
171
|
+
{
|
|
172
|
+
return sincos(x);
|
|
173
|
+
}
|
|
174
|
+
*/
|
|
175
|
+
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
#endif
|
|
@@ -0,0 +1,289 @@
|
|
|
1
|
+
#ifndef NETGEN_CORE_SIMD_SSE_HPP
|
|
2
|
+
#define NETGEN_CORE_SIMD_SSE_HPP
|
|
3
|
+
|
|
4
|
+
/**************************************************************************/
|
|
5
|
+
/* File: simd_sse.hpp */
|
|
6
|
+
/* Author: Joachim Schoeberl, Matthias Hochsteger */
|
|
7
|
+
/* Date: 25. Mar. 16 */
|
|
8
|
+
/**************************************************************************/
|
|
9
|
+
|
|
10
|
+
#include <immintrin.h>
|
|
11
|
+
|
|
12
|
+
namespace ngcore
|
|
13
|
+
{
|
|
14
|
+
|
|
15
|
+
template <>
|
|
16
|
+
class SIMD<mask64,2>
|
|
17
|
+
{
|
|
18
|
+
__m128i mask;
|
|
19
|
+
public:
|
|
20
|
+
SIMD (int i)
|
|
21
|
+
: mask(_mm_cmpgt_epi32(_mm_set1_epi32(i),
|
|
22
|
+
_mm_set_epi32(1, 1, 0, 0)))
|
|
23
|
+
{ ; }
|
|
24
|
+
|
|
25
|
+
SIMD (bool i0, bool i1) { mask = _mm_set_epi64x(i1?-1:0, i0?-1:0); }
|
|
26
|
+
|
|
27
|
+
SIMD (__m128i _mask) : mask(_mask) { ; }
|
|
28
|
+
__m128i Data() const { return mask; }
|
|
29
|
+
static constexpr int Size() { return 2; }
|
|
30
|
+
static NETGEN_INLINE SIMD<mask64, 2> GetMaskFromBits (unsigned int i);
|
|
31
|
+
};
|
|
32
|
+
|
|
33
|
+
static SIMD<mask64, 2> masks_from_2bits[4] = {
|
|
34
|
+
_mm_set_epi32 (0,0,0,0), _mm_set_epi32 (0,0,-1,0),
|
|
35
|
+
_mm_set_epi32 (-1,0,0,0), _mm_set_epi32 (-1,0,-1,0),
|
|
36
|
+
};
|
|
37
|
+
|
|
38
|
+
NETGEN_INLINE SIMD<mask64, 2> SIMD<mask64, 2> :: GetMaskFromBits (unsigned int i)
|
|
39
|
+
{
|
|
40
|
+
return masks_from_2bits[i & 3];
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
template<>
|
|
45
|
+
class alignas(16) SIMD<int64_t,2>
|
|
46
|
+
{
|
|
47
|
+
__m128i data;
|
|
48
|
+
|
|
49
|
+
public:
|
|
50
|
+
static constexpr int Size() { return 2; }
|
|
51
|
+
SIMD () {}
|
|
52
|
+
SIMD (const SIMD &) = default;
|
|
53
|
+
SIMD (int64_t v0, int64_t v1) { data = _mm_set_epi64x(v1,v0); }
|
|
54
|
+
SIMD (std::array<int64_t, 2> arr)
|
|
55
|
+
: data{_mm_set_epi64x(arr[1],arr[0])}
|
|
56
|
+
{}
|
|
57
|
+
|
|
58
|
+
SIMD & operator= (const SIMD &) = default;
|
|
59
|
+
|
|
60
|
+
SIMD (int64_t val) { data = _mm_set1_epi64x(val); }
|
|
61
|
+
SIMD (__m128i _data) { data = _data; }
|
|
62
|
+
|
|
63
|
+
template<typename T, typename std::enable_if<std::is_convertible<T, std::function<int64_t(int)>>::value, int>::type = 0>
|
|
64
|
+
SIMD (const T & func)
|
|
65
|
+
{
|
|
66
|
+
data = _mm_set_epi64(func(1), func(0));
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
NETGEN_INLINE auto operator[] (int i) const { return ((int64_t*)(&data))[i]; }
|
|
70
|
+
NETGEN_INLINE __m128i Data() const { return data; }
|
|
71
|
+
NETGEN_INLINE __m128i & Data() { return data; }
|
|
72
|
+
// NETGEN_INLINE int64_t Lo() const { return _mm_extract_epi64(data, 0); }
|
|
73
|
+
// NETGEN_INLINE int64_t Hi() const { return _mm_extract_epi64(data, 1); }
|
|
74
|
+
NETGEN_INLINE int64_t Lo() const { return ((int64_t*)(&data))[0]; }
|
|
75
|
+
NETGEN_INLINE int64_t Hi() const { return ((int64_t*)(&data))[1]; }
|
|
76
|
+
static SIMD FirstInt(int n0=0) { return { n0, n0+1 }; }
|
|
77
|
+
};
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
NETGEN_INLINE SIMD<int64_t,2> operator-(SIMD<int64_t,2> a) { return _mm_sub_epi64(_mm_setzero_si128(), a.Data()); }
|
|
82
|
+
NETGEN_INLINE SIMD<int64_t,2> operator+ (SIMD<int64_t,2> a, SIMD<int64_t,2> b) { return _mm_add_epi64(a.Data(),b.Data()); }
|
|
83
|
+
NETGEN_INLINE SIMD<int64_t,2> operator- (SIMD<int64_t,2> a, SIMD<int64_t,2> b) { return _mm_sub_epi64(a.Data(),b.Data()); }
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
template<>
|
|
87
|
+
class alignas(16) SIMD<double,2>
|
|
88
|
+
{
|
|
89
|
+
__m128d data;
|
|
90
|
+
|
|
91
|
+
public:
|
|
92
|
+
static constexpr int Size() { return 2; }
|
|
93
|
+
SIMD () {}
|
|
94
|
+
SIMD (const SIMD &) = default;
|
|
95
|
+
SIMD (double v0, double v1) { data = _mm_set_pd(v1,v0); }
|
|
96
|
+
SIMD (SIMD<double,1> v0, SIMD<double,1> v1)
|
|
97
|
+
: data{_mm_set_pd(v0.Data(), v1.Data())}
|
|
98
|
+
{ }
|
|
99
|
+
SIMD (std::array<double, 2> arr)
|
|
100
|
+
: data{_mm_set_pd(arr[1], arr[0])}
|
|
101
|
+
{}
|
|
102
|
+
|
|
103
|
+
SIMD & operator= (const SIMD &) = default;
|
|
104
|
+
|
|
105
|
+
SIMD (double val) { data = _mm_set1_pd(val); }
|
|
106
|
+
SIMD (int val) { data = _mm_set1_pd(val); }
|
|
107
|
+
SIMD (size_t val) { data = _mm_set1_pd(val); }
|
|
108
|
+
|
|
109
|
+
SIMD (double const * p) { data = _mm_loadu_pd(p); }
|
|
110
|
+
SIMD (double const * p, SIMD<mask64,2> mask)
|
|
111
|
+
{
|
|
112
|
+
#ifdef __AVX__
|
|
113
|
+
data = _mm_maskload_pd(p, mask.Data());
|
|
114
|
+
#else
|
|
115
|
+
// this versions segfaults if p points to the last allowed element
|
|
116
|
+
// happened on Mac with the new SparseCholesky-factorization
|
|
117
|
+
// data = _mm_and_pd(_mm_castsi128_pd(mask.Data()), _mm_loadu_pd(p));
|
|
118
|
+
auto pmask = (int64_t*)&mask;
|
|
119
|
+
data = _mm_set_pd (pmask[1] ? p[1] : 0.0, pmask[0] ? p[0] : 0.0);
|
|
120
|
+
#endif
|
|
121
|
+
}
|
|
122
|
+
SIMD (__m128d _data) { data = _data; }
|
|
123
|
+
|
|
124
|
+
void Store (double * p) { _mm_storeu_pd(p, data); }
|
|
125
|
+
void Store (double * p, SIMD<mask64,2> mask)
|
|
126
|
+
{
|
|
127
|
+
#ifdef __AVX__
|
|
128
|
+
_mm_maskstore_pd(p, mask.Data(), data);
|
|
129
|
+
#else
|
|
130
|
+
/*
|
|
131
|
+
_mm_storeu_pd (p, _mm_or_pd (_mm_and_pd(_mm_castsi128_pd(mask.Data()), data),
|
|
132
|
+
_mm_andnot_pd(_mm_castsi128_pd(mask.Data()), _mm_loadu_pd(p))));
|
|
133
|
+
*/
|
|
134
|
+
auto pmask = (int64_t*)&mask;
|
|
135
|
+
if (pmask[0]) p[0] = (*this)[0];
|
|
136
|
+
if (pmask[1]) p[1] = (*this)[1];
|
|
137
|
+
#endif
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
template<typename T, typename std::enable_if<std::is_convertible<T, std::function<double(int)>>::value, int>::type = 0>
|
|
141
|
+
SIMD (const T & func)
|
|
142
|
+
{
|
|
143
|
+
data = _mm_set_pd(func(1), func(0));
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
NETGEN_INLINE double operator[] (int i) const { return ((double*)(&data))[i]; }
|
|
147
|
+
NETGEN_INLINE __m128d Data() const { return data; }
|
|
148
|
+
NETGEN_INLINE __m128d & Data() { return data; }
|
|
149
|
+
|
|
150
|
+
template <int I>
|
|
151
|
+
double Get() const
|
|
152
|
+
{
|
|
153
|
+
static_assert(I>=0 && I<2, "Index out of range");
|
|
154
|
+
return (*this)[I];
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
double Lo() const { return Get<0>(); }
|
|
158
|
+
double Hi() const { return Get<1>(); }
|
|
159
|
+
|
|
160
|
+
operator std::tuple<double&,double&> ()
|
|
161
|
+
{
|
|
162
|
+
auto pdata = (double*)&data;
|
|
163
|
+
return std::tuple<double&,double&>(pdata[0], pdata[1]);
|
|
164
|
+
}
|
|
165
|
+
};
|
|
166
|
+
|
|
167
|
+
NETGEN_INLINE SIMD<double,2> operator- (SIMD<double,2> a) { return _mm_xor_pd(a.Data(), _mm_set1_pd(-0.0)); }
|
|
168
|
+
NETGEN_INLINE SIMD<double,2> operator+ (SIMD<double,2> a, SIMD<double,2> b) { return _mm_add_pd(a.Data(),b.Data()); }
|
|
169
|
+
NETGEN_INLINE SIMD<double,2> operator- (SIMD<double,2> a, SIMD<double,2> b) { return _mm_sub_pd(a.Data(),b.Data()); }
|
|
170
|
+
NETGEN_INLINE SIMD<double,2> operator* (SIMD<double,2> a, SIMD<double,2> b) { return _mm_mul_pd(a.Data(),b.Data()); }
|
|
171
|
+
NETGEN_INLINE SIMD<double,2> operator/ (SIMD<double,2> a, SIMD<double,2> b) { return _mm_div_pd(a.Data(),b.Data()); }
|
|
172
|
+
NETGEN_INLINE SIMD<double,2> operator* (double a, SIMD<double,2> b) { return _mm_set1_pd(a)*b; }
|
|
173
|
+
NETGEN_INLINE SIMD<double,2> operator* (SIMD<double,2> b, double a) { return _mm_set1_pd(a)*b; }
|
|
174
|
+
|
|
175
|
+
template<>
|
|
176
|
+
NETGEN_INLINE auto Unpack (SIMD<double,2> a, SIMD<double,2> b)
|
|
177
|
+
{
|
|
178
|
+
return std::make_tuple(SIMD<double,2>(_mm_unpacklo_pd(a.Data(),b.Data())),
|
|
179
|
+
SIMD<double,2>(_mm_unpackhi_pd(a.Data(),b.Data())));
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
NETGEN_INLINE __m128d my_mm_hadd_pd(__m128d a, __m128d b) {
|
|
183
|
+
#if defined(__SSE3__) || defined(__AVX__)
|
|
184
|
+
return _mm_hadd_pd(a,b);
|
|
185
|
+
#else
|
|
186
|
+
return _mm_add_pd( _mm_unpacklo_pd(a,b), _mm_unpackhi_pd(a,b) );
|
|
187
|
+
#endif
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
#ifndef __AVX__
|
|
191
|
+
NETGEN_INLINE __m128i my_mm_cmpgt_epi64(__m128i a, __m128i b) {
|
|
192
|
+
auto res_lo = _mm_cvtsi128_si64(a) > _mm_cvtsi128_si64(b) ? -1:0;
|
|
193
|
+
auto res_hi = _mm_cvtsi128_si64(_mm_srli_si128(a,8)) > _mm_cvtsi128_si64(_mm_srli_si128(b,8)) ? -1 : 0;
|
|
194
|
+
return _mm_set_epi64x(res_hi,res_lo);
|
|
195
|
+
}
|
|
196
|
+
#else
|
|
197
|
+
NETGEN_INLINE __m128i my_mm_cmpgt_epi64(__m128i a, __m128i b) {
|
|
198
|
+
return _mm_cmpgt_epi64(a,b);
|
|
199
|
+
}
|
|
200
|
+
#endif
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
NETGEN_INLINE SIMD<double,2> sqrt (SIMD<double,2> a) { return _mm_sqrt_pd(a.Data()); }
|
|
204
|
+
NETGEN_INLINE SIMD<double,2> fabs (SIMD<double,2> a) { return _mm_max_pd(a.Data(), (-a).Data()); }
|
|
205
|
+
using std::floor;
|
|
206
|
+
NETGEN_INLINE SIMD<double,2> floor (SIMD<double,2> a)
|
|
207
|
+
{ return ngcore::SIMD<double,2>([&](int i)->double { return floor(a[i]); } ); }
|
|
208
|
+
using std::ceil;
|
|
209
|
+
NETGEN_INLINE SIMD<double,2> ceil (SIMD<double,2> a)
|
|
210
|
+
{ return ngcore::SIMD<double,2>([&](int i)->double { return ceil(a[i]); } ); }
|
|
211
|
+
|
|
212
|
+
NETGEN_INLINE SIMD<mask64,2> operator<= (SIMD<double,2> a , SIMD<double,2> b)
|
|
213
|
+
{ return _mm_castpd_si128( _mm_cmple_pd(a.Data(),b.Data())); }
|
|
214
|
+
NETGEN_INLINE SIMD<mask64,2> operator< (SIMD<double,2> a , SIMD<double,2> b)
|
|
215
|
+
{ return _mm_castpd_si128( _mm_cmplt_pd(a.Data(),b.Data())); }
|
|
216
|
+
NETGEN_INLINE SIMD<mask64,2> operator>= (SIMD<double,2> a , SIMD<double,2> b)
|
|
217
|
+
{ return _mm_castpd_si128( _mm_cmpge_pd(a.Data(),b.Data())); }
|
|
218
|
+
NETGEN_INLINE SIMD<mask64,2> operator> (SIMD<double,2> a , SIMD<double,2> b)
|
|
219
|
+
{ return _mm_castpd_si128( _mm_cmpgt_pd(a.Data(),b.Data())); }
|
|
220
|
+
NETGEN_INLINE SIMD<mask64,2> operator== (SIMD<double,2> a , SIMD<double,2> b)
|
|
221
|
+
{ return _mm_castpd_si128( _mm_cmpeq_pd(a.Data(),b.Data())); }
|
|
222
|
+
NETGEN_INLINE SIMD<mask64,2> operator!= (SIMD<double,2> a , SIMD<double,2> b)
|
|
223
|
+
{ return _mm_castpd_si128( _mm_cmpneq_pd(a.Data(),b.Data())); }
|
|
224
|
+
|
|
225
|
+
#ifdef __SSE4_2__
|
|
226
|
+
NETGEN_INLINE SIMD<mask64,2> operator<= (SIMD<int64_t,2> a , SIMD<int64_t,2> b)
|
|
227
|
+
{ return _mm_xor_si128(_mm_cmpgt_epi64(a.Data(),b.Data()),_mm_set1_epi32(-1)); }
|
|
228
|
+
NETGEN_INLINE SIMD<mask64,2> operator< (SIMD<int64_t,2> a , SIMD<int64_t,2> b)
|
|
229
|
+
{ return my_mm_cmpgt_epi64(b.Data(),a.Data()); }
|
|
230
|
+
NETGEN_INLINE SIMD<mask64,2> operator>= (SIMD<int64_t,2> a , SIMD<int64_t,2> b)
|
|
231
|
+
{ return _mm_xor_si128(_mm_cmpgt_epi64(b.Data(),a.Data()),_mm_set1_epi32(-1)); }
|
|
232
|
+
NETGEN_INLINE SIMD<mask64,2> operator> (SIMD<int64_t,2> a , SIMD<int64_t,2> b)
|
|
233
|
+
{ return my_mm_cmpgt_epi64(a.Data(),b.Data()); }
|
|
234
|
+
#endif
|
|
235
|
+
#ifdef __SSE4_1__
|
|
236
|
+
NETGEN_INLINE SIMD<mask64,2> operator== (SIMD<int64_t,2> a , SIMD<int64_t,2> b)
|
|
237
|
+
{ return _mm_cmpeq_epi64(a.Data(),b.Data()); }
|
|
238
|
+
NETGEN_INLINE SIMD<mask64,2> operator!= (SIMD<int64_t,2> a , SIMD<int64_t,2> b)
|
|
239
|
+
{ return _mm_xor_si128(_mm_cmpeq_epi64(a.Data(),b.Data()),_mm_set1_epi32(-1)); }
|
|
240
|
+
#endif
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
NETGEN_INLINE SIMD<mask64,2> operator&& (SIMD<mask64,2> a, SIMD<mask64,2> b)
|
|
244
|
+
{ return _mm_castpd_si128(_mm_and_pd (_mm_castsi128_pd(a.Data()),_mm_castsi128_pd( b.Data()))); }
|
|
245
|
+
NETGEN_INLINE SIMD<mask64,2> operator|| (SIMD<mask64,2> a, SIMD<mask64,2> b)
|
|
246
|
+
{ return _mm_castpd_si128(_mm_or_pd (_mm_castsi128_pd(a.Data()), _mm_castsi128_pd(b.Data()))); }
|
|
247
|
+
NETGEN_INLINE SIMD<mask64,2> operator! (SIMD<mask64,2> a)
|
|
248
|
+
{ return _mm_castpd_si128(_mm_xor_pd (_mm_castsi128_pd(a.Data()),_mm_castsi128_pd( _mm_cmpeq_epi64(a.Data(),a.Data())))); }
|
|
249
|
+
#ifdef __SSE4_1__
|
|
250
|
+
NETGEN_INLINE SIMD<double,2> If (SIMD<mask64,2> a, SIMD<double,2> b, SIMD<double,2> c)
|
|
251
|
+
{ return _mm_blendv_pd(c.Data(), b.Data(), _mm_castsi128_pd(a.Data())); }
|
|
252
|
+
#else
|
|
253
|
+
NETGEN_INLINE SIMD<double,2> If (SIMD<mask64,2> a, SIMD<double,2> b, SIMD<double,2> c)
|
|
254
|
+
{
|
|
255
|
+
return _mm_or_pd(
|
|
256
|
+
_mm_andnot_pd(_mm_castsi128_pd(a.Data()),c.Data()),
|
|
257
|
+
_mm_and_pd(b.Data(),_mm_castsi128_pd(a.Data()))
|
|
258
|
+
);}
|
|
259
|
+
#endif // __SSE4_1__
|
|
260
|
+
|
|
261
|
+
NETGEN_INLINE SIMD<double,2> IfPos (SIMD<double,2> a, SIMD<double,2> b, SIMD<double,2> c)
|
|
262
|
+
{ return ngcore::SIMD<double,2>([&](int i)->double { return a[i]>0 ? b[i] : c[i]; }); }
|
|
263
|
+
NETGEN_INLINE SIMD<double,2> IfZero (SIMD<double,2> a, SIMD<double,2> b, SIMD<double,2> c)
|
|
264
|
+
{ return ngcore::SIMD<double,2>([&](int i)->double { return a[i]==0. ? b[i] : c[i]; }); }
|
|
265
|
+
|
|
266
|
+
|
|
267
|
+
NETGEN_INLINE double HSum (SIMD<double,2> sd)
|
|
268
|
+
{
|
|
269
|
+
return _mm_cvtsd_f64 (my_mm_hadd_pd (sd.Data(), sd.Data()));
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
NETGEN_INLINE auto HSum (SIMD<double,2> sd1, SIMD<double,2> sd2)
|
|
273
|
+
{
|
|
274
|
+
__m128d hv2 = my_mm_hadd_pd(sd1.Data(), sd2.Data());
|
|
275
|
+
return SIMD<double,2> (hv2);
|
|
276
|
+
// return SIMD<double,2>(_mm_cvtsd_f64 (hv2), _mm_cvtsd_f64(_mm_shuffle_pd (hv2, hv2, 3)));
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
NETGEN_INLINE SIMD<int64_t, 2> If(SIMD<mask64, 2> a, SIMD<int64_t, 2> b,
|
|
280
|
+
SIMD<int64_t, 2> c) {
|
|
281
|
+
return _mm_or_si128(
|
|
282
|
+
_mm_andnot_si128(a.Data(),c.Data()),
|
|
283
|
+
_mm_and_si128(b.Data(),a.Data())
|
|
284
|
+
);
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
#endif // NETGEN_CORE_SIMD_SSE_HPP
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
#ifndef NETGEN_CORE_STATUSHANDLER
|
|
2
|
+
#define NETGEN_CORE_STATUSHANDLER
|
|
3
|
+
|
|
4
|
+
#include <string>
|
|
5
|
+
#include "utils.hpp"
|
|
6
|
+
|
|
7
|
+
namespace ngcore
|
|
8
|
+
{
|
|
9
|
+
|
|
10
|
+
class NGCORE_API multithreadt
|
|
11
|
+
{
|
|
12
|
+
public:
|
|
13
|
+
int pause;
|
|
14
|
+
int testmode;
|
|
15
|
+
int redraw;
|
|
16
|
+
int drawing;
|
|
17
|
+
int terminate;
|
|
18
|
+
int running;
|
|
19
|
+
double percent;
|
|
20
|
+
const char * task;
|
|
21
|
+
bool demorunning;
|
|
22
|
+
std::string * tcl_todo = new std::string(""); // tcl commands set from parallel thread
|
|
23
|
+
multithreadt();
|
|
24
|
+
};
|
|
25
|
+
|
|
26
|
+
NGCORE_API extern volatile multithreadt multithread;
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
extern NGCORE_API void SetStatMsg(const std::string& s);
|
|
30
|
+
|
|
31
|
+
extern NGCORE_API void PushStatus(const std::string& s);
|
|
32
|
+
extern NGCORE_API void PushStatusF(const std::string& s);
|
|
33
|
+
extern NGCORE_API void PopStatus();
|
|
34
|
+
extern NGCORE_API void SetThreadPercent(double percent);
|
|
35
|
+
extern NGCORE_API void GetStatus(std::string & s, double & percentage);
|
|
36
|
+
}
|
|
37
|
+
#endif
|
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
#ifndef NETGEN_CORE_SYMBOLTABLE_HPP
|
|
2
|
+
#define NETGEN_CORE_SYMBOLTABLE_HPP
|
|
3
|
+
|
|
4
|
+
#include <ostream>
|
|
5
|
+
#include <string>
|
|
6
|
+
#include <vector>
|
|
7
|
+
|
|
8
|
+
#include "exception.hpp"
|
|
9
|
+
#include "ngcore_api.hpp"
|
|
10
|
+
|
|
11
|
+
namespace ngcore
|
|
12
|
+
{
|
|
13
|
+
/**
|
|
14
|
+
A symbol table.
|
|
15
|
+
|
|
16
|
+
The symboltable provides a mapping from string identifiers
|
|
17
|
+
to the generic type T. The strings are copied.
|
|
18
|
+
Complexity by name access is linear, by index is constant.
|
|
19
|
+
*/
|
|
20
|
+
template <class T>
|
|
21
|
+
class SymbolTable
|
|
22
|
+
{
|
|
23
|
+
std::vector<std::string> names;
|
|
24
|
+
std::vector<T> data;
|
|
25
|
+
public:
|
|
26
|
+
using value_type = T;
|
|
27
|
+
using reference = typename std::vector<T>::reference;
|
|
28
|
+
using const_reference = typename std::vector<T>::const_reference;
|
|
29
|
+
|
|
30
|
+
/// Creates a symboltable
|
|
31
|
+
SymbolTable () = default;
|
|
32
|
+
SymbolTable (const SymbolTable<T> &) = default;
|
|
33
|
+
SymbolTable (SymbolTable<T> &&) noexcept = default;
|
|
34
|
+
|
|
35
|
+
~SymbolTable() = default;
|
|
36
|
+
|
|
37
|
+
SymbolTable& operator=(const SymbolTable<T>&) = default;
|
|
38
|
+
SymbolTable& operator=(SymbolTable<T>&&) = default;
|
|
39
|
+
|
|
40
|
+
template<typename ARCHIVE>
|
|
41
|
+
auto DoArchive(ARCHIVE& ar)
|
|
42
|
+
-> typename std::enable_if_t<ARCHIVE::template is_archivable<T>, void>
|
|
43
|
+
{
|
|
44
|
+
ar & names & data;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
/// INDEX of symbol name, throws exception if unused
|
|
48
|
+
size_t Index (std::string_view name) const
|
|
49
|
+
{
|
|
50
|
+
for (size_t i = 0; i < names.size(); i++)
|
|
51
|
+
if (names[i] == name) return i;
|
|
52
|
+
throw RangeException("SymbolTable", name);
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
/// Index of symbol name, returns -1 if unused
|
|
56
|
+
int CheckIndex (std::string_view name) const
|
|
57
|
+
{
|
|
58
|
+
for (int i = 0; i < names.size(); i++)
|
|
59
|
+
if (names[i] == name) return i;
|
|
60
|
+
return -1;
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
/// number of identifiers
|
|
64
|
+
size_t Size() const
|
|
65
|
+
{
|
|
66
|
+
return data.size();
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
/// Returns reference to element. exception for unused identifier
|
|
70
|
+
reference operator[] (std::string_view name)
|
|
71
|
+
{
|
|
72
|
+
return data[Index (name)];
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
const_reference operator[] (std::string_view name) const
|
|
76
|
+
{
|
|
77
|
+
return data[Index (name)];
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
/// Returns reference to i-th element, range check only in debug build
|
|
81
|
+
reference operator[] (size_t i)
|
|
82
|
+
{
|
|
83
|
+
NETGEN_CHECK_RANGE(i, 0, data.size());
|
|
84
|
+
return data[i];
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
/// Returns const reference to i-th element, range check only in debug build
|
|
88
|
+
const_reference operator[] (size_t i) const
|
|
89
|
+
{
|
|
90
|
+
NETGEN_CHECK_RANGE(i, 0, data.size());
|
|
91
|
+
return data[i];
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
/// Returns name of i-th element, range check only in debug build
|
|
95
|
+
const std::string & GetName (size_t i) const
|
|
96
|
+
{
|
|
97
|
+
NETGEN_CHECK_RANGE(i, 0, names.size());
|
|
98
|
+
return names[i];
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
/// Associates el to the string name, overrides if name is used
|
|
102
|
+
void Set (std::string_view name, const T & el)
|
|
103
|
+
{
|
|
104
|
+
int i = CheckIndex (name);
|
|
105
|
+
if (i >= 0)
|
|
106
|
+
data[i] = el;
|
|
107
|
+
else
|
|
108
|
+
{
|
|
109
|
+
data.push_back(el);
|
|
110
|
+
names.push_back(std::string(name));
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
/*
|
|
117
|
+
bool Used (const std::string & name) const
|
|
118
|
+
{
|
|
119
|
+
return CheckIndex(name) >= 0;
|
|
120
|
+
}
|
|
121
|
+
*/
|
|
122
|
+
|
|
123
|
+
bool Used (std::string_view name) const
|
|
124
|
+
{
|
|
125
|
+
return CheckIndex(name) >= 0;
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
/// Deletes symboltable
|
|
129
|
+
inline void DeleteAll ()
|
|
130
|
+
{
|
|
131
|
+
names.clear();
|
|
132
|
+
data.clear();
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
// Adds all elements from other symboltable
|
|
136
|
+
SymbolTable<T>& Update(const SymbolTable<T>& tbl2)
|
|
137
|
+
{
|
|
138
|
+
for (size_t i = 0; i < tbl2.Size(); i++)
|
|
139
|
+
Set (tbl2.GetName(i), tbl2[i]);
|
|
140
|
+
return *this;
|
|
141
|
+
}
|
|
142
|
+
};
|
|
143
|
+
|
|
144
|
+
template <typename T>
|
|
145
|
+
std::ostream & operator<< (std::ostream & ost, const SymbolTable<T> & st)
|
|
146
|
+
{
|
|
147
|
+
for (int i = 0; i < st.Size(); i++)
|
|
148
|
+
ost << st.GetName(i) << " : " << st[i] << std::endl;
|
|
149
|
+
return ost;
|
|
150
|
+
}
|
|
151
|
+
} // namespace ngcore
|
|
152
|
+
|
|
153
|
+
#endif // NETGEN_CORE_SYMBOLTABLE_HPP
|