netgen-mesher 6.2.2504.post11.dev0__cp313-cp313-win_amd64.whl → 6.2.2506.post48.dev0__cp313-cp313-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- netgen/__init__.pyi +3 -3
- netgen/cmake/NetgenConfig.cmake +10 -9
- netgen/config/__init__.pyi +8 -8
- netgen/config/config.py +7 -7
- netgen/config/config.pyi +8 -8
- netgen/include/core/archive.hpp +18 -3
- netgen/include/core/array.hpp +20 -4
- netgen/include/core/autodiff.hpp +9 -11
- netgen/include/core/autodiffdiff.hpp +0 -2
- netgen/include/core/bitarray.hpp +1 -1
- netgen/include/core/flags.hpp +1 -1
- netgen/include/core/hashtable.hpp +1 -1
- netgen/include/core/memtracer.hpp +7 -7
- netgen/include/core/ngcore.hpp +5 -0
- netgen/include/core/ngcore_api.hpp +11 -0
- netgen/include/core/paje_trace.hpp +9 -8
- netgen/include/core/profiler.hpp +5 -5
- netgen/include/core/register_archive.hpp +8 -0
- netgen/include/core/simd.hpp +69 -1
- netgen/include/core/simd_arm64.hpp +205 -1
- netgen/include/core/simd_avx.hpp +72 -4
- netgen/include/core/simd_avx512.hpp +9 -0
- netgen/include/core/simd_generic.hpp +274 -8
- netgen/include/core/simd_math.hpp +178 -0
- netgen/include/core/simd_sse.hpp +11 -1
- netgen/include/core/statushandler.hpp +37 -0
- netgen/include/core/table.hpp +3 -2
- netgen/include/core/taskmanager.hpp +34 -1
- netgen/include/core/utils.hpp +3 -8
- netgen/include/include/netgen_version.hpp +4 -4
- netgen/include/meshing/basegeom.hpp +1 -4
- netgen/include/meshing/global.hpp +0 -17
- netgen/include/meshing/hpref_tet.hpp +41 -0
- netgen/include/meshing/hprefinement.hpp +2 -0
- netgen/include/meshing/meshtype.hpp +2 -1
- netgen/include/meshing/msghandler.hpp +9 -6
- netgen/include/meshing/topology.hpp +2 -2
- netgen/include/nginterface.h +3 -2
- netgen/include/occ/occ_utils.hpp +26 -0
- netgen/include/occ/occgeom.hpp +8 -0
- netgen/include/pybind11/attr.h +40 -8
- netgen/include/pybind11/buffer_info.h +14 -14
- netgen/include/pybind11/cast.h +553 -29
- netgen/include/pybind11/chrono.h +4 -1
- netgen/include/pybind11/conduit/README.txt +15 -0
- netgen/include/pybind11/conduit/pybind11_conduit_v1.h +116 -0
- netgen/include/pybind11/conduit/pybind11_platform_abi_id.h +87 -0
- netgen/include/pybind11/conduit/wrap_include_python_h.h +72 -0
- netgen/include/pybind11/critical_section.h +56 -0
- netgen/include/pybind11/detail/class.h +172 -97
- netgen/include/pybind11/detail/common.h +270 -189
- netgen/include/pybind11/detail/cpp_conduit.h +75 -0
- netgen/include/pybind11/detail/descr.h +55 -0
- netgen/include/pybind11/detail/dynamic_raw_ptr_cast_if_possible.h +39 -0
- netgen/include/pybind11/detail/exception_translation.h +71 -0
- netgen/include/pybind11/detail/function_record_pyobject.h +191 -0
- netgen/include/pybind11/detail/init.h +113 -9
- netgen/include/pybind11/detail/internals.h +479 -344
- netgen/include/pybind11/detail/native_enum_data.h +209 -0
- netgen/include/pybind11/detail/pybind11_namespace_macros.h +82 -0
- netgen/include/pybind11/detail/struct_smart_holder.h +378 -0
- netgen/include/pybind11/detail/type_caster_base.h +506 -133
- netgen/include/pybind11/detail/using_smart_holder.h +22 -0
- netgen/include/pybind11/detail/value_and_holder.h +90 -0
- netgen/include/pybind11/eigen/matrix.h +19 -10
- netgen/include/pybind11/eigen/tensor.h +15 -11
- netgen/include/pybind11/embed.h +50 -46
- netgen/include/pybind11/eval.h +11 -6
- netgen/include/pybind11/functional.h +58 -49
- netgen/include/pybind11/gil.h +34 -82
- netgen/include/pybind11/gil_safe_call_once.h +12 -1
- netgen/include/pybind11/gil_simple.h +37 -0
- netgen/include/pybind11/native_enum.h +67 -0
- netgen/include/pybind11/numpy.h +272 -93
- netgen/include/pybind11/pybind11.h +947 -265
- netgen/include/pybind11/pytypes.h +127 -21
- netgen/include/pybind11/stl/filesystem.h +23 -25
- netgen/include/pybind11/stl.h +277 -59
- netgen/include/pybind11/stl_bind.h +42 -7
- netgen/include/pybind11/subinterpreter.h +299 -0
- netgen/include/pybind11/trampoline_self_life_support.h +65 -0
- netgen/include/pybind11/typing.h +177 -4
- netgen/include/pybind11/warnings.h +75 -0
- netgen/include/visualization/mvdraw.hpp +48 -12
- netgen/include/visualization/vssolution.hpp +3 -1
- netgen/lib/libnggui.lib +0 -0
- netgen/lib/ngcore.lib +0 -0
- netgen/lib/nglib.lib +0 -0
- netgen/libnggui.dll +0 -0
- netgen/libngguipy.pyd +0 -0
- netgen/libngpy/_NgOCC.pyi +224 -139
- netgen/libngpy/_csg.pyi +26 -26
- netgen/libngpy/_geom2d.pyi +34 -25
- netgen/libngpy/_meshing.pyi +262 -111
- netgen/libngpy/_stl.pyi +3 -4
- netgen/libngpy.pyd +0 -0
- netgen/ngcore.dll +0 -0
- netgen/nglib.dll +0 -0
- netgen/read_gmsh.py +41 -0
- netgen/togl.dll +0 -0
- netgen/version.py +1 -1
- netgen/webgui.py +38 -2
- {netgen_mesher-6.2.2504.post11.dev0.dist-info → netgen_mesher-6.2.2506.post48.dev0.dist-info}/METADATA +2 -1
- {netgen_mesher-6.2.2504.post11.dev0.dist-info → netgen_mesher-6.2.2506.post48.dev0.dist-info}/RECORD +153 -132
- pyngcore/pyngcore.cp313-win_amd64.pyd +0 -0
- {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/boundarycondition.geo +0 -0
- {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/boxcyl.geo +0 -0
- {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/circle_on_cube.geo +0 -0
- {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/cone.geo +0 -0
- {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/cube.geo +0 -0
- {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/cubeandring.geo +0 -0
- {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/cubeandspheres.geo +0 -0
- {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/cubemcyl.geo +0 -0
- {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/cubemsphere.geo +0 -0
- {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/cylinder.geo +0 -0
- {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/cylsphere.geo +0 -0
- {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/doc/ng4.pdf +0 -0
- {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/ellipsoid.geo +0 -0
- {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/ellipticcyl.geo +0 -0
- {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/extrusion.geo +0 -0
- {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/fichera.geo +0 -0
- {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/frame.step +0 -0
- {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/hinge.stl +0 -0
- {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/lshape3d.geo +0 -0
- {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/manyholes.geo +0 -0
- {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/manyholes2.geo +0 -0
- {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/matrix.geo +0 -0
- {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/ortho.geo +0 -0
- {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/part1.stl +0 -0
- {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/period.geo +0 -0
- {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/py_tutorials/exportNeutral.py +0 -0
- {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/py_tutorials/mesh.py +0 -0
- {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/py_tutorials/shaft.geo +0 -0
- {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/revolution.geo +0 -0
- {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/screw.step +0 -0
- {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/sculpture.geo +0 -0
- {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/shaft.geo +0 -0
- {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/shell.geo +0 -0
- {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/sphere.geo +0 -0
- {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/sphereincube.geo +0 -0
- {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/square.in2d +0 -0
- {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/squarecircle.in2d +0 -0
- {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/squarehole.in2d +0 -0
- {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/torus.geo +0 -0
- {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/trafo.geo +0 -0
- {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/twobricks.geo +0 -0
- {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/twocubes.geo +0 -0
- {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/twocyl.geo +0 -0
- {netgen_mesher-6.2.2504.post11.dev0.dist-info → netgen_mesher-6.2.2506.post48.dev0.dist-info}/AUTHORS +0 -0
- {netgen_mesher-6.2.2504.post11.dev0.dist-info → netgen_mesher-6.2.2506.post48.dev0.dist-info}/LICENSE +0 -0
- {netgen_mesher-6.2.2504.post11.dev0.dist-info → netgen_mesher-6.2.2506.post48.dev0.dist-info}/WHEEL +0 -0
- {netgen_mesher-6.2.2504.post11.dev0.dist-info → netgen_mesher-6.2.2506.post48.dev0.dist-info}/entry_points.txt +0 -0
- {netgen_mesher-6.2.2504.post11.dev0.dist-info → netgen_mesher-6.2.2506.post48.dev0.dist-info}/top_level.txt +0 -0
|
@@ -30,6 +30,152 @@ namespace ngcore
|
|
|
30
30
|
auto Hi() const { return mask[1]; }
|
|
31
31
|
};
|
|
32
32
|
|
|
33
|
+
|
|
34
|
+
// *************************** int32 ***************************
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
template<>
|
|
39
|
+
class SIMD<int32_t,2>
|
|
40
|
+
{
|
|
41
|
+
int32x2_t data;
|
|
42
|
+
public:
|
|
43
|
+
static constexpr int Size() { return 2; }
|
|
44
|
+
SIMD() {}
|
|
45
|
+
SIMD (int32_t val) : data{val,val} {}
|
|
46
|
+
SIMD (int32_t v0, int32_t v1) : data{v0,v1} { }
|
|
47
|
+
SIMD (SIMD<int32_t,1> lo, SIMD<int32_t,1> hi) : data{lo[0], hi[0] } { }
|
|
48
|
+
SIMD (std::array<int32_t, 2> arr) : data{arr[0], arr[1]} { }
|
|
49
|
+
|
|
50
|
+
SIMD (int32x2_t _data) { data = _data; }
|
|
51
|
+
|
|
52
|
+
NETGEN_INLINE auto Data() const { return data; }
|
|
53
|
+
NETGEN_INLINE auto & Data() { return data; }
|
|
54
|
+
|
|
55
|
+
SIMD<int32_t,1> Lo() const { return Get<0>(); }
|
|
56
|
+
SIMD<int32_t,1> Hi() const { return Get<1>(); }
|
|
57
|
+
|
|
58
|
+
int32_t operator[] (int i) const { return data[i]; }
|
|
59
|
+
int32_t & operator[] (int i) { return ((int32_t*)&data)[i]; }
|
|
60
|
+
|
|
61
|
+
template <int I>
|
|
62
|
+
int32_t Get() const { return data[I]; }
|
|
63
|
+
static SIMD FirstInt(int n0=0) { return { n0+0, n0+1 }; }
|
|
64
|
+
};
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
template<>
|
|
68
|
+
class SIMD<int32_t,4>
|
|
69
|
+
{
|
|
70
|
+
int32x4_t data;
|
|
71
|
+
public:
|
|
72
|
+
static constexpr int Size() { return 4; }
|
|
73
|
+
SIMD() {}
|
|
74
|
+
SIMD (int32_t val) : data{val,val,val,val} {}
|
|
75
|
+
SIMD (int32_t v0, int32_t v1, int32_t v2, int32_t v3) : data{v0,v1,v2,v3} { }
|
|
76
|
+
SIMD (std::array<int32_t, 4> arr) : data{arr[0], arr[1], arr[2], arr[3]} { }
|
|
77
|
+
|
|
78
|
+
SIMD (int32x4_t _data) { data = _data; }
|
|
79
|
+
SIMD (SIMD<int32_t,2> lo, SIMD<int32_t,2> hi) : data{vcombine_s32(lo.Data(), hi.Data())} {}
|
|
80
|
+
SIMD (int32_t * p) : data{vld1q_s32(p)} { }
|
|
81
|
+
|
|
82
|
+
NETGEN_INLINE auto Data() const { return data; }
|
|
83
|
+
NETGEN_INLINE auto & Data() { return data; }
|
|
84
|
+
|
|
85
|
+
SIMD<int32_t,2> Lo() const { return vget_low_s32(data); }
|
|
86
|
+
SIMD<int32_t,2> Hi() const { return vget_high_s32(data); }
|
|
87
|
+
|
|
88
|
+
int32_t operator[] (int i) const { return data[i]; }
|
|
89
|
+
int32_t & operator[] (int i) { return ((int32_t*)&data)[i]; }
|
|
90
|
+
|
|
91
|
+
void Store (int32_t * p) { vst1q_s32(p, data); }
|
|
92
|
+
|
|
93
|
+
template <int I>
|
|
94
|
+
int32_t Get() const { return data[I]; }
|
|
95
|
+
static SIMD FirstInt(int n0=0) { return { n0+0, n0+1, n0+2, n0+3 }; }
|
|
96
|
+
};
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
NETGEN_INLINE auto Min (SIMD<int32_t,2> a, SIMD<int32_t,2> b) {
|
|
101
|
+
return SIMD<int32_t,2>(vmin_s32(a.Data(), b.Data()));
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
NETGEN_INLINE auto Max (SIMD<int32_t,2> a, SIMD<int32_t,2> b) {
|
|
105
|
+
return SIMD<int32_t,2>(vmax_s32(a.Data(), b.Data()));
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
NETGEN_INLINE auto Min (SIMD<int32_t,4> a, SIMD<int32_t,4> b) {
|
|
110
|
+
return SIMD<int32_t,4>(vminq_s32(a.Data(), b.Data()));
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
NETGEN_INLINE auto Max (SIMD<int32_t,4> a, SIMD<int32_t,4> b) {
|
|
114
|
+
return SIMD<int32_t,4>(vmaxq_s32(a.Data(), b.Data()));
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
// *************************** int64 ***************************
|
|
121
|
+
|
|
122
|
+
template<>
|
|
123
|
+
class SIMD<int64_t,2>
|
|
124
|
+
{
|
|
125
|
+
int64x2_t data;
|
|
126
|
+
public:
|
|
127
|
+
static constexpr int Size() { return 2; }
|
|
128
|
+
SIMD() {}
|
|
129
|
+
SIMD (int64_t val) : data{val,val} {}
|
|
130
|
+
SIMD (int64_t v0, int64_t v1) : data{vcombine_s64(int64x1_t{v0}, int64x1_t{v1})} { }
|
|
131
|
+
SIMD (std::array<int64_t, 2> arr) : data{arr[0], arr[1]} { }
|
|
132
|
+
|
|
133
|
+
SIMD (int64x2_t _data) { data = _data; }
|
|
134
|
+
|
|
135
|
+
NETGEN_INLINE auto Data() const { return data; }
|
|
136
|
+
NETGEN_INLINE auto & Data() { return data; }
|
|
137
|
+
|
|
138
|
+
int64_t Lo() const { return Get<0>(); }
|
|
139
|
+
int64_t Hi() const { return Get<1>(); }
|
|
140
|
+
|
|
141
|
+
int64_t operator[] (int i) const { return data[i]; }
|
|
142
|
+
int64_t & operator[] (int i) { return ((int64_t*)&data)[i]; }
|
|
143
|
+
|
|
144
|
+
template <int I>
|
|
145
|
+
int64_t Get() const { return data[I]; }
|
|
146
|
+
static SIMD FirstInt(int n0=0) { return { n0+0, n0+1 }; }
|
|
147
|
+
};
|
|
148
|
+
|
|
149
|
+
NETGEN_INLINE SIMD<int64_t,2> operator& (SIMD<int64_t,2> a, SIMD<int64_t,2> b)
|
|
150
|
+
{
|
|
151
|
+
return vandq_s64(a.Data(), b.Data());
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
NETGEN_INLINE SIMD<int64_t,2> operator+ (SIMD<int64_t,2> a, SIMD<int64_t,2> b)
|
|
155
|
+
{
|
|
156
|
+
return vaddq_s64(a.Data(), b.Data());
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
NETGEN_INLINE SIMD<mask64,2> operator== (SIMD<int64_t> a, SIMD<int64_t> b)
|
|
160
|
+
{
|
|
161
|
+
return vceqq_u64(a.Data(), b.Data());
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
NETGEN_INLINE SIMD<mask64,2> operator> (SIMD<int64_t> a, SIMD<int64_t> b)
|
|
165
|
+
{
|
|
166
|
+
return vcgtq_s64(a.Data(), b.Data());
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
template <int N>
|
|
171
|
+
SIMD<int64_t,2> operator<< (SIMD<int64_t,2> a, IC<N> n)
|
|
172
|
+
{
|
|
173
|
+
return vshlq_n_s64(a.Data(), N);
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
// *************************** double ***************************
|
|
33
179
|
|
|
34
180
|
template<>
|
|
35
181
|
class SIMD<double,2>
|
|
@@ -162,6 +308,16 @@ namespace ngcore
|
|
|
162
308
|
auto tmp = vcmlaq_f64(c.Data(), a.Data(), b.Data()); // are * b
|
|
163
309
|
c = vcmlaq_rot90_f64(tmp, a.Data(), b.Data()); // += i*aim * b
|
|
164
310
|
}
|
|
311
|
+
|
|
312
|
+
NETGEN_INLINE void FMAComplex (SIMD<double,4> a, SIMD<double,4> b, SIMD<double,4> & c)
|
|
313
|
+
{
|
|
314
|
+
SIMD<double,2> clo = c.Lo();
|
|
315
|
+
SIMD<double,2> chi = c.Hi();
|
|
316
|
+
FMAComplex (a.Lo(), b.Lo(), clo);
|
|
317
|
+
FMAComplex (a.Hi(), b.Hi(), chi);
|
|
318
|
+
c = SIMD<double,4> (clo, chi);
|
|
319
|
+
}
|
|
320
|
+
|
|
165
321
|
|
|
166
322
|
|
|
167
323
|
NETGEN_INLINE SIMD<double,2> operator+ (SIMD<double,2> a, SIMD<double,2> b)
|
|
@@ -178,6 +334,52 @@ namespace ngcore
|
|
|
178
334
|
NETGEN_INLINE SIMD<double,2> operator/ (SIMD<double,2> a, SIMD<double,2> b)
|
|
179
335
|
{ return a.Data()/b.Data(); }
|
|
180
336
|
|
|
337
|
+
NETGEN_INLINE SIMD<double,2> sqrt (SIMD<double,2> x)
|
|
338
|
+
{ return vsqrtq_f64(x.Data()); }
|
|
339
|
+
|
|
340
|
+
|
|
341
|
+
NETGEN_INLINE SIMD<double,2> round (SIMD<double,2> x)
|
|
342
|
+
{
|
|
343
|
+
return vrndnq_f64(x.Data());
|
|
344
|
+
}
|
|
345
|
+
|
|
346
|
+
NETGEN_INLINE SIMD<int64_t,2> lround (SIMD<double,2> x)
|
|
347
|
+
{
|
|
348
|
+
return vcvtq_s64_f64(x.Data());
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
|
|
352
|
+
|
|
353
|
+
NETGEN_INLINE SIMD<double,2> rsqrt (SIMD<double,2> x)
|
|
354
|
+
{
|
|
355
|
+
return 1.0 / sqrt(x);
|
|
356
|
+
|
|
357
|
+
// SIMD<double,2> y = vrsqrteq_f64(x.Data());
|
|
358
|
+
|
|
359
|
+
/*
|
|
360
|
+
y = y * vrsqrtsq_f64( (x*y).Data(), y.Data());
|
|
361
|
+
y = y * vrsqrtsq_f64( (x*y).Data(), y.Data());
|
|
362
|
+
y = y * vrsqrtsq_f64( (x*y).Data(), y.Data());
|
|
363
|
+
*/
|
|
364
|
+
|
|
365
|
+
/*
|
|
366
|
+
auto x_half = 0.5*x;
|
|
367
|
+
y = y * (1.5 - (x_half * y * y));
|
|
368
|
+
y = y * (1.5 - (x_half * y * y));
|
|
369
|
+
y = y * (1.5 - (x_half * y * y));
|
|
370
|
+
|
|
371
|
+
return y;
|
|
372
|
+
*/
|
|
373
|
+
}
|
|
374
|
+
|
|
375
|
+
|
|
376
|
+
|
|
377
|
+
|
|
378
|
+
template <>
|
|
379
|
+
NETGEN_INLINE SIMD<double,2> Reinterpret (SIMD<int64_t,2> a)
|
|
380
|
+
{
|
|
381
|
+
return vreinterpretq_f64_s64(a.Data());
|
|
382
|
+
}
|
|
181
383
|
|
|
182
384
|
|
|
183
385
|
NETGEN_INLINE SIMD<double,2> If (SIMD<mask64,2> a, SIMD<double,2> b, SIMD<double,2> c)
|
|
@@ -188,7 +390,9 @@ namespace ngcore
|
|
|
188
390
|
}
|
|
189
391
|
NETGEN_INLINE SIMD<int64_t,2> If (SIMD<mask64,2> a, SIMD<int64_t,2> b, SIMD<int64_t,2> c)
|
|
190
392
|
{
|
|
191
|
-
return SIMD<int64_t,2> (a[0] ? b[0] : c[0], a[1] ? b[1] : c[1]);
|
|
393
|
+
// return SIMD<int64_t,2> (a[0] ? b[0] : c[0], a[1] ? b[1] : c[1]);
|
|
394
|
+
uint64x2_t mask = vreinterpretq_u64_s64(a.Data());
|
|
395
|
+
return vbslq_s64(mask, b.Data(), c.Data());
|
|
192
396
|
}
|
|
193
397
|
|
|
194
398
|
NETGEN_INLINE SIMD<mask64,2> operator&& (SIMD<mask64,2> a, SIMD<mask64,2> b)
|
netgen/include/core/simd_avx.hpp
CHANGED
|
@@ -21,11 +21,31 @@ namespace ngcore
|
|
|
21
21
|
#endif // defined(__GNUC__) && (__GNUC__ == 7)
|
|
22
22
|
|
|
23
23
|
#if defined(__AVX2__)
|
|
24
|
+
NETGEN_INLINE __m256i my_mm256_cmpeq_epi64 (__m256i a, __m256i b)
|
|
25
|
+
{
|
|
26
|
+
return _mm256_cmpeq_epi64 (a,b);
|
|
27
|
+
}
|
|
28
|
+
|
|
24
29
|
NETGEN_INLINE __m256i my_mm256_cmpgt_epi64 (__m256i a, __m256i b)
|
|
25
30
|
{
|
|
26
31
|
return _mm256_cmpgt_epi64 (a,b);
|
|
27
32
|
}
|
|
33
|
+
|
|
34
|
+
NETGEN_INLINE __m256i my_mm256_cvtepi32_epi64 (__m128i a)
|
|
35
|
+
{
|
|
36
|
+
return _mm256_cvtepi32_epi64 (a);
|
|
37
|
+
}
|
|
38
|
+
|
|
28
39
|
#else
|
|
40
|
+
NETGEN_INLINE __m256i my_mm256_cmpeq_epi64 (__m256i a, __m256i b)
|
|
41
|
+
{
|
|
42
|
+
__m128i rlo = _mm_cmpeq_epi64(_mm256_extractf128_si256(a, 0),
|
|
43
|
+
_mm256_extractf128_si256(b, 0));
|
|
44
|
+
__m128i rhi = _mm_cmpeq_epi64(_mm256_extractf128_si256(a, 1),
|
|
45
|
+
_mm256_extractf128_si256(b, 1));
|
|
46
|
+
return _mm256_insertf128_si256 (_mm256_castsi128_si256(rlo), rhi, 1);
|
|
47
|
+
}
|
|
48
|
+
|
|
29
49
|
NETGEN_INLINE __m256i my_mm256_cmpgt_epi64 (__m256i a, __m256i b)
|
|
30
50
|
{
|
|
31
51
|
__m128i rlo = _mm_cmpgt_epi64(_mm256_extractf128_si256(a, 0),
|
|
@@ -34,6 +54,13 @@ namespace ngcore
|
|
|
34
54
|
_mm256_extractf128_si256(b, 1));
|
|
35
55
|
return _mm256_insertf128_si256 (_mm256_castsi128_si256(rlo), rhi, 1);
|
|
36
56
|
}
|
|
57
|
+
|
|
58
|
+
NETGEN_INLINE __m256i my_mm256_cvtepi32_epi64 (__m128i a)
|
|
59
|
+
{
|
|
60
|
+
__m128i rlo = _mm_cvtepi32_epi64(a); // First two 32-bit integers
|
|
61
|
+
__m128i rhi = _mm_cvtepi32_epi64(_mm_shuffle_epi32(a, _MM_SHUFFLE(3, 2, 3, 2))); // Next two 32-bit integers
|
|
62
|
+
return _mm256_insertf128_si256 (_mm256_castsi128_si256(rlo), rhi, 1);
|
|
63
|
+
}
|
|
37
64
|
#endif
|
|
38
65
|
|
|
39
66
|
|
|
@@ -86,7 +113,7 @@ namespace ngcore
|
|
|
86
113
|
: data{_mm256_set_epi64x(a[3],a[2],a[1],a[0])}
|
|
87
114
|
{}
|
|
88
115
|
SIMD (SIMD<int64_t,2> v0, SIMD<int64_t,2> v1)
|
|
89
|
-
: data(_mm256_set_m128i(
|
|
116
|
+
: data(_mm256_set_m128i(v1.Data(),v0.Data()))
|
|
90
117
|
{}
|
|
91
118
|
SIMD (__m256i _data) { data = _data; }
|
|
92
119
|
|
|
@@ -97,6 +124,13 @@ namespace ngcore
|
|
|
97
124
|
SIMD<int64_t,2> Lo() const { return _mm256_extractf128_si256(data, 0); }
|
|
98
125
|
SIMD<int64_t,2> Hi() const { return _mm256_extractf128_si256(data, 1); }
|
|
99
126
|
static SIMD FirstInt(int n0=0) { return { n0+0, n0+1, n0+2, n0+3 }; }
|
|
127
|
+
|
|
128
|
+
template <int I>
|
|
129
|
+
double Get() const
|
|
130
|
+
{
|
|
131
|
+
static_assert(I>=0 && I<4, "Index out of range");
|
|
132
|
+
return (*this)[I];
|
|
133
|
+
}
|
|
100
134
|
};
|
|
101
135
|
|
|
102
136
|
|
|
@@ -105,6 +139,11 @@ namespace ngcore
|
|
|
105
139
|
#ifdef __AVX2__
|
|
106
140
|
NETGEN_INLINE SIMD<int64_t,4> operator+ (SIMD<int64_t,4> a, SIMD<int64_t,4> b) { return _mm256_add_epi64(a.Data(),b.Data()); }
|
|
107
141
|
NETGEN_INLINE SIMD<int64_t,4> operator- (SIMD<int64_t,4> a, SIMD<int64_t,4> b) { return _mm256_sub_epi64(a.Data(),b.Data()); }
|
|
142
|
+
NETGEN_INLINE SIMD<int64_t,4> operator& (SIMD<int64_t,4> a, SIMD<int64_t,4> b)
|
|
143
|
+
{ return _mm256_castpd_si256(_mm256_and_pd (_mm256_castsi256_pd(a.Data()),_mm256_castsi256_pd( b.Data()))); }
|
|
144
|
+
|
|
145
|
+
template <int N>
|
|
146
|
+
SIMD<int64_t,4> operator<< (SIMD<int64_t,4> a, IC<N> n) { return _mm256_sll_epi64(a.Data(),_mm_set_epi32(0,0,0,N)); }
|
|
108
147
|
#endif // __AVX2__
|
|
109
148
|
|
|
110
149
|
template<>
|
|
@@ -178,7 +217,11 @@ namespace ngcore
|
|
|
178
217
|
NETGEN_INLINE SIMD<double,4> floor (SIMD<double,4> a) { return _mm256_floor_pd(a.Data()); }
|
|
179
218
|
NETGEN_INLINE SIMD<double,4> ceil (SIMD<double,4> a) { return _mm256_ceil_pd(a.Data()); }
|
|
180
219
|
NETGEN_INLINE SIMD<double,4> fabs (SIMD<double,4> a) { return _mm256_max_pd(a.Data(), (-a).Data()); }
|
|
181
|
-
|
|
220
|
+
NETGEN_INLINE SIMD<double,4> round(SIMD<double,4> a) { return _mm256_round_pd(a.Data(), _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); }
|
|
221
|
+
NETGEN_INLINE SIMD<int64_t,4> lround (SIMD<double,4> a)
|
|
222
|
+
{
|
|
223
|
+
return my_mm256_cvtepi32_epi64(_mm256_cvtpd_epi32(_mm256_round_pd(a.Data(), _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC)));
|
|
224
|
+
}
|
|
182
225
|
|
|
183
226
|
#ifdef __FMA__
|
|
184
227
|
NETGEN_INLINE SIMD<double,4> FMA (SIMD<double,4> a, SIMD<double,4> b, SIMD<double,4> c)
|
|
@@ -255,9 +298,9 @@ namespace ngcore
|
|
|
255
298
|
NETGEN_INLINE SIMD<mask64,4> operator> (SIMD<int64_t,4> a , SIMD<int64_t,4> b)
|
|
256
299
|
{ return my_mm256_cmpgt_epi64(a.Data(),b.Data()); }
|
|
257
300
|
NETGEN_INLINE SIMD<mask64,4> operator== (SIMD<int64_t,4> a , SIMD<int64_t,4> b)
|
|
258
|
-
{ return
|
|
301
|
+
{ return my_mm256_cmpeq_epi64(a.Data(),b.Data()); }
|
|
259
302
|
NETGEN_INLINE SIMD<mask64,4> operator!= (SIMD<int64_t,4> a , SIMD<int64_t,4> b)
|
|
260
|
-
{ return _mm256_xor_si256(
|
|
303
|
+
{ return _mm256_xor_si256(my_mm256_cmpeq_epi64(a.Data(),b.Data()),_mm256_set1_epi32(-1)); }
|
|
261
304
|
|
|
262
305
|
#ifdef __AVX2__
|
|
263
306
|
NETGEN_INLINE SIMD<mask64,4> operator&& (SIMD<mask64,4> a, SIMD<mask64,4> b)
|
|
@@ -274,6 +317,15 @@ namespace ngcore
|
|
|
274
317
|
NETGEN_INLINE SIMD<mask64,4> operator! (SIMD<mask64,4> a)
|
|
275
318
|
{ return _mm256_castpd_si256(_mm256_xor_pd (_mm256_castsi256_pd(a.Data()),_mm256_castsi256_pd( _mm256_cmpeq_epi64(a.Data(),a.Data())))); }
|
|
276
319
|
#endif
|
|
320
|
+
|
|
321
|
+
template <>
|
|
322
|
+
NETGEN_INLINE SIMD<double,4> Reinterpret (SIMD<int64_t,4> a)
|
|
323
|
+
{
|
|
324
|
+
return _mm256_castsi256_pd (a.Data());
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
|
|
328
|
+
|
|
277
329
|
NETGEN_INLINE SIMD<double,4> If (SIMD<mask64,4> a, SIMD<double,4> b, SIMD<double,4> c)
|
|
278
330
|
{ return _mm256_blendv_pd(c.Data(), b.Data(), _mm256_castsi256_pd(a.Data())); }
|
|
279
331
|
|
|
@@ -314,6 +366,22 @@ namespace ngcore
|
|
|
314
366
|
}
|
|
315
367
|
|
|
316
368
|
|
|
369
|
+
/*
|
|
370
|
+
// untested ...
|
|
371
|
+
NETGEN_INLINE SIMD<double,4> rsqrt (SIMD<double,4> x)
|
|
372
|
+
{
|
|
373
|
+
// return 1.0 / sqrt(x);
|
|
374
|
+
// SIMD<double,4> y = _mm256_rsqrt14_pd(x.Data()); // only avx512
|
|
375
|
+
SIMD<double,4> y = _mm256_cvtps_pd ( _mm_rsqrt_ps ( _mm256_cvtpd_ps (x.Data())));
|
|
376
|
+
auto x_half = 0.5*x;
|
|
377
|
+
y = y * (1.5 - (x_half * y * y));
|
|
378
|
+
y = y * (1.5 - (x_half * y * y));
|
|
379
|
+
return y;
|
|
380
|
+
}
|
|
381
|
+
*/
|
|
382
|
+
|
|
383
|
+
|
|
384
|
+
|
|
317
385
|
NETGEN_INLINE SIMD<int64_t,4> If (SIMD<mask64,4> a, SIMD<int64_t,4> b, SIMD<int64_t,4> c)
|
|
318
386
|
{ return _mm256_castpd_si256(_mm256_blendv_pd(_mm256_castsi256_pd(c.Data()), _mm256_castsi256_pd(b.Data()),
|
|
319
387
|
_mm256_castsi256_pd(a.Data()))); }
|
|
@@ -59,6 +59,15 @@ namespace ngcore
|
|
|
59
59
|
data = _mm512_set_epi64(func(7), func(6), func(5), func(4), func(3), func(2), func(1), func(0));
|
|
60
60
|
}
|
|
61
61
|
|
|
62
|
+
SIMD (SIMD<int64_t,4> v0, SIMD<int64_t,4> v1)
|
|
63
|
+
: data(_mm512_castsi256_si512(v0.Data()))
|
|
64
|
+
{
|
|
65
|
+
data = _mm512_inserti64x4(data, v1.Data(), 1);
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
SIMD<int64_t,4> Lo() const { return _mm512_castsi512_si256(data); }
|
|
69
|
+
SIMD<int64_t,4> Hi() const { return _mm512_extracti64x4_epi64(data, 1); }
|
|
70
|
+
|
|
62
71
|
|
|
63
72
|
NETGEN_INLINE auto operator[] (int i) const { return ((int64_t*)(&data))[i]; }
|
|
64
73
|
NETGEN_INLINE auto & operator[] (int i) { return ((int64_t*)(&data))[i]; }
|