netgen-mesher 6.2.2504.post11.dev0__cp313-cp313-win_amd64.whl → 6.2.2506.post48.dev0__cp313-cp313-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- netgen/__init__.pyi +3 -3
- netgen/cmake/NetgenConfig.cmake +10 -9
- netgen/config/__init__.pyi +8 -8
- netgen/config/config.py +7 -7
- netgen/config/config.pyi +8 -8
- netgen/include/core/archive.hpp +18 -3
- netgen/include/core/array.hpp +20 -4
- netgen/include/core/autodiff.hpp +9 -11
- netgen/include/core/autodiffdiff.hpp +0 -2
- netgen/include/core/bitarray.hpp +1 -1
- netgen/include/core/flags.hpp +1 -1
- netgen/include/core/hashtable.hpp +1 -1
- netgen/include/core/memtracer.hpp +7 -7
- netgen/include/core/ngcore.hpp +5 -0
- netgen/include/core/ngcore_api.hpp +11 -0
- netgen/include/core/paje_trace.hpp +9 -8
- netgen/include/core/profiler.hpp +5 -5
- netgen/include/core/register_archive.hpp +8 -0
- netgen/include/core/simd.hpp +69 -1
- netgen/include/core/simd_arm64.hpp +205 -1
- netgen/include/core/simd_avx.hpp +72 -4
- netgen/include/core/simd_avx512.hpp +9 -0
- netgen/include/core/simd_generic.hpp +274 -8
- netgen/include/core/simd_math.hpp +178 -0
- netgen/include/core/simd_sse.hpp +11 -1
- netgen/include/core/statushandler.hpp +37 -0
- netgen/include/core/table.hpp +3 -2
- netgen/include/core/taskmanager.hpp +34 -1
- netgen/include/core/utils.hpp +3 -8
- netgen/include/include/netgen_version.hpp +4 -4
- netgen/include/meshing/basegeom.hpp +1 -4
- netgen/include/meshing/global.hpp +0 -17
- netgen/include/meshing/hpref_tet.hpp +41 -0
- netgen/include/meshing/hprefinement.hpp +2 -0
- netgen/include/meshing/meshtype.hpp +2 -1
- netgen/include/meshing/msghandler.hpp +9 -6
- netgen/include/meshing/topology.hpp +2 -2
- netgen/include/nginterface.h +3 -2
- netgen/include/occ/occ_utils.hpp +26 -0
- netgen/include/occ/occgeom.hpp +8 -0
- netgen/include/pybind11/attr.h +40 -8
- netgen/include/pybind11/buffer_info.h +14 -14
- netgen/include/pybind11/cast.h +553 -29
- netgen/include/pybind11/chrono.h +4 -1
- netgen/include/pybind11/conduit/README.txt +15 -0
- netgen/include/pybind11/conduit/pybind11_conduit_v1.h +116 -0
- netgen/include/pybind11/conduit/pybind11_platform_abi_id.h +87 -0
- netgen/include/pybind11/conduit/wrap_include_python_h.h +72 -0
- netgen/include/pybind11/critical_section.h +56 -0
- netgen/include/pybind11/detail/class.h +172 -97
- netgen/include/pybind11/detail/common.h +270 -189
- netgen/include/pybind11/detail/cpp_conduit.h +75 -0
- netgen/include/pybind11/detail/descr.h +55 -0
- netgen/include/pybind11/detail/dynamic_raw_ptr_cast_if_possible.h +39 -0
- netgen/include/pybind11/detail/exception_translation.h +71 -0
- netgen/include/pybind11/detail/function_record_pyobject.h +191 -0
- netgen/include/pybind11/detail/init.h +113 -9
- netgen/include/pybind11/detail/internals.h +479 -344
- netgen/include/pybind11/detail/native_enum_data.h +209 -0
- netgen/include/pybind11/detail/pybind11_namespace_macros.h +82 -0
- netgen/include/pybind11/detail/struct_smart_holder.h +378 -0
- netgen/include/pybind11/detail/type_caster_base.h +506 -133
- netgen/include/pybind11/detail/using_smart_holder.h +22 -0
- netgen/include/pybind11/detail/value_and_holder.h +90 -0
- netgen/include/pybind11/eigen/matrix.h +19 -10
- netgen/include/pybind11/eigen/tensor.h +15 -11
- netgen/include/pybind11/embed.h +50 -46
- netgen/include/pybind11/eval.h +11 -6
- netgen/include/pybind11/functional.h +58 -49
- netgen/include/pybind11/gil.h +34 -82
- netgen/include/pybind11/gil_safe_call_once.h +12 -1
- netgen/include/pybind11/gil_simple.h +37 -0
- netgen/include/pybind11/native_enum.h +67 -0
- netgen/include/pybind11/numpy.h +272 -93
- netgen/include/pybind11/pybind11.h +947 -265
- netgen/include/pybind11/pytypes.h +127 -21
- netgen/include/pybind11/stl/filesystem.h +23 -25
- netgen/include/pybind11/stl.h +277 -59
- netgen/include/pybind11/stl_bind.h +42 -7
- netgen/include/pybind11/subinterpreter.h +299 -0
- netgen/include/pybind11/trampoline_self_life_support.h +65 -0
- netgen/include/pybind11/typing.h +177 -4
- netgen/include/pybind11/warnings.h +75 -0
- netgen/include/visualization/mvdraw.hpp +48 -12
- netgen/include/visualization/vssolution.hpp +3 -1
- netgen/lib/libnggui.lib +0 -0
- netgen/lib/ngcore.lib +0 -0
- netgen/lib/nglib.lib +0 -0
- netgen/libnggui.dll +0 -0
- netgen/libngguipy.pyd +0 -0
- netgen/libngpy/_NgOCC.pyi +224 -139
- netgen/libngpy/_csg.pyi +26 -26
- netgen/libngpy/_geom2d.pyi +34 -25
- netgen/libngpy/_meshing.pyi +262 -111
- netgen/libngpy/_stl.pyi +3 -4
- netgen/libngpy.pyd +0 -0
- netgen/ngcore.dll +0 -0
- netgen/nglib.dll +0 -0
- netgen/read_gmsh.py +41 -0
- netgen/togl.dll +0 -0
- netgen/version.py +1 -1
- netgen/webgui.py +38 -2
- {netgen_mesher-6.2.2504.post11.dev0.dist-info → netgen_mesher-6.2.2506.post48.dev0.dist-info}/METADATA +2 -1
- {netgen_mesher-6.2.2504.post11.dev0.dist-info → netgen_mesher-6.2.2506.post48.dev0.dist-info}/RECORD +153 -132
- pyngcore/pyngcore.cp313-win_amd64.pyd +0 -0
- {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/boundarycondition.geo +0 -0
- {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/boxcyl.geo +0 -0
- {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/circle_on_cube.geo +0 -0
- {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/cone.geo +0 -0
- {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/cube.geo +0 -0
- {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/cubeandring.geo +0 -0
- {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/cubeandspheres.geo +0 -0
- {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/cubemcyl.geo +0 -0
- {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/cubemsphere.geo +0 -0
- {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/cylinder.geo +0 -0
- {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/cylsphere.geo +0 -0
- {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/doc/ng4.pdf +0 -0
- {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/ellipsoid.geo +0 -0
- {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/ellipticcyl.geo +0 -0
- {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/extrusion.geo +0 -0
- {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/fichera.geo +0 -0
- {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/frame.step +0 -0
- {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/hinge.stl +0 -0
- {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/lshape3d.geo +0 -0
- {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/manyholes.geo +0 -0
- {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/manyholes2.geo +0 -0
- {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/matrix.geo +0 -0
- {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/ortho.geo +0 -0
- {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/part1.stl +0 -0
- {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/period.geo +0 -0
- {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/py_tutorials/exportNeutral.py +0 -0
- {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/py_tutorials/mesh.py +0 -0
- {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/py_tutorials/shaft.geo +0 -0
- {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/revolution.geo +0 -0
- {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/screw.step +0 -0
- {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/sculpture.geo +0 -0
- {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/shaft.geo +0 -0
- {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/shell.geo +0 -0
- {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/sphere.geo +0 -0
- {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/sphereincube.geo +0 -0
- {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/square.in2d +0 -0
- {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/squarecircle.in2d +0 -0
- {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/squarehole.in2d +0 -0
- {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/torus.geo +0 -0
- {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/trafo.geo +0 -0
- {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/twobricks.geo +0 -0
- {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/twocubes.geo +0 -0
- {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/twocyl.geo +0 -0
- {netgen_mesher-6.2.2504.post11.dev0.dist-info → netgen_mesher-6.2.2506.post48.dev0.dist-info}/AUTHORS +0 -0
- {netgen_mesher-6.2.2504.post11.dev0.dist-info → netgen_mesher-6.2.2506.post48.dev0.dist-info}/LICENSE +0 -0
- {netgen_mesher-6.2.2504.post11.dev0.dist-info → netgen_mesher-6.2.2506.post48.dev0.dist-info}/WHEEL +0 -0
- {netgen_mesher-6.2.2504.post11.dev0.dist-info → netgen_mesher-6.2.2506.post48.dev0.dist-info}/entry_points.txt +0 -0
- {netgen_mesher-6.2.2504.post11.dev0.dist-info → netgen_mesher-6.2.2506.post48.dev0.dist-info}/top_level.txt +0 -0
|
@@ -48,6 +48,13 @@ namespace ngcore
|
|
|
48
48
|
return k;
|
|
49
49
|
}
|
|
50
50
|
|
|
51
|
+
constexpr size_t LargestPowerOfTwo (size_t x)
|
|
52
|
+
{
|
|
53
|
+
size_t y = 1;
|
|
54
|
+
while (2*y <= x) y *= 2;
|
|
55
|
+
return y;
|
|
56
|
+
}
|
|
57
|
+
|
|
51
58
|
|
|
52
59
|
template <typename T, int N=GetDefaultSIMDSize()> class SIMD;
|
|
53
60
|
|
|
@@ -89,7 +96,8 @@ namespace ngcore
|
|
|
89
96
|
template <int N>
|
|
90
97
|
class alignas(GetLargestNativeSIMDPart(N)*sizeof(int64_t)) SIMD<mask64,N>
|
|
91
98
|
{
|
|
92
|
-
static constexpr int N1 = GetLargestNativeSIMDPart(N);
|
|
99
|
+
// static constexpr int N1 = GetLargestNativeSIMDPart(N);
|
|
100
|
+
static constexpr size_t N1 = LargestPowerOfTwo(N-1);
|
|
93
101
|
static constexpr int N2 = N-N1;
|
|
94
102
|
|
|
95
103
|
SIMD<mask64,N1> lo;
|
|
@@ -111,6 +119,113 @@ namespace ngcore
|
|
|
111
119
|
}
|
|
112
120
|
|
|
113
121
|
|
|
122
|
+
////////////////////////////////////////////////////////////////////////////
|
|
123
|
+
// int32
|
|
124
|
+
|
|
125
|
+
template<>
|
|
126
|
+
class SIMD<int32_t,1>
|
|
127
|
+
{
|
|
128
|
+
int32_t data;
|
|
129
|
+
|
|
130
|
+
public:
|
|
131
|
+
static constexpr int Size() { return 1; }
|
|
132
|
+
SIMD () {}
|
|
133
|
+
SIMD (const SIMD &) = default;
|
|
134
|
+
SIMD & operator= (const SIMD &) = default;
|
|
135
|
+
// SIMD (int val) : data{val} {}
|
|
136
|
+
SIMD (int32_t val) : data{val} {}
|
|
137
|
+
SIMD (size_t val) : data(val) {}
|
|
138
|
+
explicit SIMD (std::array<int32_t, 1> arr) : data{arr[0]} {}
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
int32_t operator[] (int i) const { return ((int32_t*)(&data))[i]; }
|
|
143
|
+
auto Data() const { return data; }
|
|
144
|
+
static SIMD FirstInt(int32_t n0=0) { return {n0}; }
|
|
145
|
+
template <int I>
|
|
146
|
+
int32_t Get()
|
|
147
|
+
{
|
|
148
|
+
static_assert(I==0);
|
|
149
|
+
return data;
|
|
150
|
+
}
|
|
151
|
+
};
|
|
152
|
+
|
|
153
|
+
template<int N>
|
|
154
|
+
class alignas(GetLargestNativeSIMDPart(N)*sizeof(int64_t)) SIMD<int32_t,N>
|
|
155
|
+
{
|
|
156
|
+
// static constexpr int N1 = GetLargestNativeSIMDPart(N);
|
|
157
|
+
static constexpr size_t N1 = LargestPowerOfTwo(N-1);
|
|
158
|
+
static constexpr int N2 = N-N1;
|
|
159
|
+
|
|
160
|
+
SIMD<int32_t,N1> lo;
|
|
161
|
+
SIMD<int32_t,N2> high;
|
|
162
|
+
|
|
163
|
+
public:
|
|
164
|
+
static constexpr int Size() { return N; }
|
|
165
|
+
|
|
166
|
+
SIMD () {}
|
|
167
|
+
SIMD (const SIMD &) = default;
|
|
168
|
+
SIMD & operator= (const SIMD &) = default;
|
|
169
|
+
|
|
170
|
+
// SIMD (int val) : lo{val}, high{val} { ; }
|
|
171
|
+
SIMD (int32_t val) : lo{val}, high{val} { ; }
|
|
172
|
+
SIMD (size_t val) : lo{val}, high{val} { ; }
|
|
173
|
+
SIMD (int32_t * p) : lo{p}, high{p+N1} { ; }
|
|
174
|
+
|
|
175
|
+
SIMD (SIMD<int32_t,N1> lo_, SIMD<int32_t,N2> high_) : lo(lo_), high(high_) { ; }
|
|
176
|
+
|
|
177
|
+
explicit SIMD( std::array<int32_t, N> arr )
|
|
178
|
+
: lo(detail::array_range<N1>(arr, 0)),
|
|
179
|
+
high(detail::array_range<N2>(arr, N1))
|
|
180
|
+
{}
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
template<typename ...T>
|
|
184
|
+
explicit SIMD(const T... vals)
|
|
185
|
+
: lo(detail::array_range<N1>(std::array<int32_t, N>{vals...}, 0)),
|
|
186
|
+
high(detail::array_range<N2>(std::array<int32_t, N>{vals...}, N1))
|
|
187
|
+
{
|
|
188
|
+
static_assert(sizeof...(vals)==N, "wrong number of arguments");
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
template<typename T, typename std::enable_if<std::is_convertible<T, std::function<int32_t(int)>>::value, int>::type = 0>
|
|
193
|
+
SIMD (const T & func)
|
|
194
|
+
{
|
|
195
|
+
for(auto i : IntRange(N1))
|
|
196
|
+
lo[i] = func(i);
|
|
197
|
+
for(auto i : IntRange(N2))
|
|
198
|
+
high[i] = func(N1+i);
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
auto Lo() const { return lo; }
|
|
202
|
+
auto Hi() const { return high; }
|
|
203
|
+
|
|
204
|
+
int32_t operator[] (int i) const { return ((int32_t*)(&lo))[i]; }
|
|
205
|
+
|
|
206
|
+
void Store (int32_t * p) { lo.Store(p); high.Store(p+N1); }
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
/*
|
|
210
|
+
operator tuple<int32_t&,int32_t&,int32_t&,int32_t&> ()
|
|
211
|
+
{ return tuple<int32_t&,int32_t&,int32_t&,int32_t&>((*this)[0], (*this)[1], (*this)[2], (*this)[3]); }
|
|
212
|
+
*/
|
|
213
|
+
|
|
214
|
+
/*
|
|
215
|
+
static SIMD FirstInt() { return { 0, 1, 2, 3 }; }
|
|
216
|
+
*/
|
|
217
|
+
static SIMD FirstInt(int32_t n0=0) { return {SIMD<int32_t,N1>::FirstInt(n0), SIMD<int32_t,N2>::FirstInt(n0+N1)}; }
|
|
218
|
+
template <int I>
|
|
219
|
+
int32_t Get()
|
|
220
|
+
{
|
|
221
|
+
static_assert(I>=0 && I<N, "Index out of range");
|
|
222
|
+
if constexpr(I<N1) return lo.template Get<I>();
|
|
223
|
+
else return high.template Get<I-N1>();
|
|
224
|
+
}
|
|
225
|
+
};
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
|
|
114
229
|
////////////////////////////////////////////////////////////////////////////
|
|
115
230
|
// int64
|
|
116
231
|
|
|
@@ -145,7 +260,8 @@ namespace ngcore
|
|
|
145
260
|
template<int N>
|
|
146
261
|
class alignas(GetLargestNativeSIMDPart(N)*sizeof(int64_t)) SIMD<int64_t,N>
|
|
147
262
|
{
|
|
148
|
-
static constexpr int N1 = GetLargestNativeSIMDPart(N);
|
|
263
|
+
// static constexpr int N1 = GetLargestNativeSIMDPart(N);
|
|
264
|
+
static constexpr size_t N1 = LargestPowerOfTwo(N-1);
|
|
149
265
|
static constexpr int N2 = N-N1;
|
|
150
266
|
|
|
151
267
|
SIMD<int64_t,N1> lo;
|
|
@@ -209,6 +325,7 @@ namespace ngcore
|
|
|
209
325
|
}
|
|
210
326
|
};
|
|
211
327
|
|
|
328
|
+
|
|
212
329
|
|
|
213
330
|
////////////////////////////////////////////////////////////////////////////
|
|
214
331
|
// double
|
|
@@ -262,7 +379,8 @@ namespace ngcore
|
|
|
262
379
|
template<int N>
|
|
263
380
|
class alignas(GetLargestNativeSIMDPart(N)*sizeof(double)) SIMD<double, N>
|
|
264
381
|
{
|
|
265
|
-
static constexpr int N1 = GetLargestNativeSIMDPart(N);
|
|
382
|
+
// static constexpr int N1 = GetLargestNativeSIMDPart(N);
|
|
383
|
+
static constexpr size_t N1 = LargestPowerOfTwo(N-1);
|
|
266
384
|
static constexpr int N2 = N-N1;
|
|
267
385
|
|
|
268
386
|
SIMD<double, N1> lo;
|
|
@@ -314,7 +432,7 @@ namespace ngcore
|
|
|
314
432
|
|
|
315
433
|
template<typename ...T>
|
|
316
434
|
explicit SIMD(const T... vals)
|
|
317
|
-
|
|
435
|
+
: lo(detail::array_range<N1>(std::array<double, N>{vals...}, 0)),
|
|
318
436
|
high(detail::array_range<N2>(std::array<double, N>{vals...}, N1))
|
|
319
437
|
{
|
|
320
438
|
static_assert(sizeof...(vals)==N, "wrong number of arguments");
|
|
@@ -327,8 +445,8 @@ namespace ngcore
|
|
|
327
445
|
high.Store(p+N1, mask.Hi());
|
|
328
446
|
}
|
|
329
447
|
|
|
330
|
-
auto Lo() const { return lo; }
|
|
331
|
-
auto Hi() const { return high; }
|
|
448
|
+
NETGEN_INLINE auto Lo() const { return lo; }
|
|
449
|
+
NETGEN_INLINE auto Hi() const { return high; }
|
|
332
450
|
|
|
333
451
|
double operator[] (int i) const { return ((double*)(&lo))[i]; }
|
|
334
452
|
|
|
@@ -426,6 +544,20 @@ namespace ngcore
|
|
|
426
544
|
else return { a.Lo()!=b.Lo(), a.Hi()!=b.Hi() };
|
|
427
545
|
}
|
|
428
546
|
|
|
547
|
+
template <int N>
|
|
548
|
+
NETGEN_INLINE SIMD<int64_t,N> operator& (SIMD<int64_t,N> a, SIMD<int64_t,N> b)
|
|
549
|
+
{
|
|
550
|
+
if constexpr(N==1) return a.Data() & b.Data();
|
|
551
|
+
else return { (a.Lo()&b.Lo()), (a.Hi()&b.Hi()) };
|
|
552
|
+
}
|
|
553
|
+
template <int N>
|
|
554
|
+
NETGEN_INLINE SIMD<int64_t,N> operator| (SIMD<int64_t,N> a, SIMD<int64_t,N> b)
|
|
555
|
+
{
|
|
556
|
+
if constexpr(N==1) return a.Data() & b.Data();
|
|
557
|
+
else return { (a.Lo()|b.Lo()), (a.Hi()|b.Hi()) };
|
|
558
|
+
}
|
|
559
|
+
|
|
560
|
+
|
|
429
561
|
// int64_t operators with scalar operand (implement overloads to allow implicit casts for second operand)
|
|
430
562
|
template <int N>
|
|
431
563
|
NETGEN_INLINE SIMD<int64_t,N> operator+ (SIMD<int64_t,N> a, int64_t b) { return a+SIMD<int64_t,N>(b); }
|
|
@@ -458,6 +590,7 @@ namespace ngcore
|
|
|
458
590
|
template <int N>
|
|
459
591
|
NETGEN_INLINE SIMD<int64_t,N> & operator/= (SIMD<int64_t,N> & a, SIMD<int64_t,N> b) { a = a/b; return a; }
|
|
460
592
|
|
|
593
|
+
|
|
461
594
|
// double operators with scalar operand (implement overloads to allow implicit casts for second operand)
|
|
462
595
|
template <int N>
|
|
463
596
|
NETGEN_INLINE SIMD<double,N> operator+ (SIMD<double,N> a, double b) { return a+SIMD<double,N>(b); }
|
|
@@ -490,6 +623,10 @@ namespace ngcore
|
|
|
490
623
|
template <int N>
|
|
491
624
|
NETGEN_INLINE SIMD<double,N> & operator/= (SIMD<double,N> & a, SIMD<double,N> b) { a = a/b; return a; }
|
|
492
625
|
|
|
626
|
+
template <int N>
|
|
627
|
+
NETGEN_INLINE auto operator> (SIMD<double,N> & a, double b) { return a > SIMD<double,N>(b); }
|
|
628
|
+
|
|
629
|
+
|
|
493
630
|
// double functions
|
|
494
631
|
|
|
495
632
|
template <int N>
|
|
@@ -580,6 +717,96 @@ namespace ngcore
|
|
|
580
717
|
}
|
|
581
718
|
|
|
582
719
|
|
|
720
|
+
template<typename T2, typename T1>
|
|
721
|
+
T2 BitCast(T1 a)
|
|
722
|
+
{
|
|
723
|
+
T2 result;
|
|
724
|
+
static_assert(sizeof(T1) == sizeof(T2), "BitCast requires same size");
|
|
725
|
+
memcpy(&result, &a, sizeof(T1));
|
|
726
|
+
return result;
|
|
727
|
+
}
|
|
728
|
+
|
|
729
|
+
template <typename T, typename T1, int N>
|
|
730
|
+
SIMD<T, N> Reinterpret (SIMD<T1,N> a)
|
|
731
|
+
{
|
|
732
|
+
if constexpr (N == 1)
|
|
733
|
+
return SIMD<T,N> ( * (T*)(void*) & a.Data());
|
|
734
|
+
else if constexpr (N == 2)
|
|
735
|
+
return SIMD<T,N> { BitCast<T> (a.Lo()),
|
|
736
|
+
BitCast<T> (a.Hi()) };
|
|
737
|
+
else
|
|
738
|
+
return SIMD<T,N> (Reinterpret<T> (a.Lo()), Reinterpret<T> (a.Hi()));
|
|
739
|
+
}
|
|
740
|
+
|
|
741
|
+
|
|
742
|
+
using std::round;
|
|
743
|
+
template <int N>
|
|
744
|
+
SIMD<double,N> round (SIMD<double,N> x)
|
|
745
|
+
{
|
|
746
|
+
if constexpr (N == 1) return round(x);
|
|
747
|
+
else return { round(x.Lo()), round(x.Hi()) };
|
|
748
|
+
}
|
|
749
|
+
|
|
750
|
+
// NETGEN_INLINE int64_t RoundI (double x) { return lround(x); }
|
|
751
|
+
using std::lround;
|
|
752
|
+
template <int N>
|
|
753
|
+
SIMD<int64_t,N> lround (SIMD<double,N> x)
|
|
754
|
+
{
|
|
755
|
+
if constexpr (N == 1) return SIMD<int64_t,1> (lround(x));
|
|
756
|
+
else return { lround(x.Lo()), lround(x.Hi()) };
|
|
757
|
+
}
|
|
758
|
+
|
|
759
|
+
/*
|
|
760
|
+
reciprocal square root
|
|
761
|
+
Quake III algorithm, or intrinsics
|
|
762
|
+
*/
|
|
763
|
+
//
|
|
764
|
+
#ifndef __CUDACC__
|
|
765
|
+
NETGEN_INLINE double rsqrt (double x) { return 1.0/sqrt(x); }
|
|
766
|
+
#endif
|
|
767
|
+
|
|
768
|
+
template <int N>
|
|
769
|
+
SIMD<double,N> rsqrt (SIMD<double,N> x)
|
|
770
|
+
{
|
|
771
|
+
if constexpr (N == 1) return 1.0/sqrt(x.Data());
|
|
772
|
+
else return { rsqrt(x.Lo()), rsqrt(x.Hi()) };
|
|
773
|
+
}
|
|
774
|
+
|
|
775
|
+
template <int N>
|
|
776
|
+
int64_t operator<< (int64_t a, IC<N> n) { return a << n.value; }
|
|
777
|
+
|
|
778
|
+
template <int S, int N>
|
|
779
|
+
SIMD<int64_t,S> operator<< (SIMD<int64_t,S> a, IC<N> n)
|
|
780
|
+
{
|
|
781
|
+
if constexpr (S == 1) return SIMD<int64_t,1> (a.Data() << n);
|
|
782
|
+
else return SIMD<int64_t,S> (a.Lo() << n, a.Hi() << n);
|
|
783
|
+
}
|
|
784
|
+
|
|
785
|
+
|
|
786
|
+
|
|
787
|
+
|
|
788
|
+
template <typename T, int N>
|
|
789
|
+
auto Min (SIMD<T,N> a, SIMD<T,N> b)
|
|
790
|
+
{
|
|
791
|
+
if constexpr (N==1)
|
|
792
|
+
return SIMD<T,1> (std::min(a[0], b[0]));
|
|
793
|
+
else
|
|
794
|
+
return SIMD<T,N> (Min(a.Lo(), b.Lo()), Min(a.Hi(), b.Hi()));
|
|
795
|
+
}
|
|
796
|
+
|
|
797
|
+
template <typename T, int N>
|
|
798
|
+
auto Max (SIMD<T,N> a, SIMD<T,N> b)
|
|
799
|
+
{
|
|
800
|
+
if constexpr (N==1)
|
|
801
|
+
return SIMD<T,1> (std::max(a[0], b[0]));
|
|
802
|
+
else
|
|
803
|
+
return SIMD<T,N> (Max(a.Lo(), b.Lo()), Max(a.Hi(), b.Hi()));
|
|
804
|
+
}
|
|
805
|
+
|
|
806
|
+
|
|
807
|
+
|
|
808
|
+
|
|
809
|
+
|
|
583
810
|
template <typename T, int N>
|
|
584
811
|
ostream & operator<< (ostream & ost, SIMD<T,N> simd)
|
|
585
812
|
{
|
|
@@ -597,8 +824,11 @@ namespace ngcore
|
|
|
597
824
|
|
|
598
825
|
using std::sqrt;
|
|
599
826
|
template <int N>
|
|
600
|
-
NETGEN_INLINE ngcore::SIMD<double,N> sqrt (ngcore::SIMD<double,N> a)
|
|
601
|
-
|
|
827
|
+
NETGEN_INLINE ngcore::SIMD<double,N> sqrt (ngcore::SIMD<double,N> a)
|
|
828
|
+
{
|
|
829
|
+
if constexpr (N == 1) return sqrt(a.Data());
|
|
830
|
+
else return { sqrt(a.Lo()), sqrt(a.Hi()) };
|
|
831
|
+
// return ngcore::SIMD<double,N>([a](int i)->double { return sqrt(a[i]); } );
|
|
602
832
|
}
|
|
603
833
|
|
|
604
834
|
using std::fabs;
|
|
@@ -777,6 +1007,42 @@ namespace ngcore
|
|
|
777
1007
|
FMAddSub(a.Hi(), b.Hi(), c.Hi()));
|
|
778
1008
|
}
|
|
779
1009
|
}
|
|
1010
|
+
|
|
1011
|
+
|
|
1012
|
+
|
|
1013
|
+
|
|
1014
|
+
template <int BASE, typename Tuple, std::size_t ... Is>
|
|
1015
|
+
auto subtuple (const Tuple& tup, std::index_sequence<Is...>)
|
|
1016
|
+
{
|
|
1017
|
+
return std::make_tuple(std::get<BASE+Is>(tup)...);
|
|
1018
|
+
}
|
|
1019
|
+
|
|
1020
|
+
template <typename ...Args, typename T, int M>
|
|
1021
|
+
auto Concat (std::tuple<SIMD<T,M>, Args...> tup)
|
|
1022
|
+
{
|
|
1023
|
+
constexpr size_t N = std::tuple_size<std::tuple<SIMD<T,M>, Args...>>();
|
|
1024
|
+
|
|
1025
|
+
if constexpr (N == 1)
|
|
1026
|
+
return get<0>(tup);
|
|
1027
|
+
else
|
|
1028
|
+
{
|
|
1029
|
+
static constexpr size_t N1 = LargestPowerOfTwo(N-1);
|
|
1030
|
+
static constexpr int N2 = N-N1;
|
|
1031
|
+
|
|
1032
|
+
auto SEQ1 = std::make_index_sequence<N1>();
|
|
1033
|
+
auto sub1 = subtuple<0>(tup, SEQ1);
|
|
1034
|
+
|
|
1035
|
+
auto SEQ2 = std::make_index_sequence<N2>();
|
|
1036
|
+
auto sub2 = subtuple<N1>(tup, SEQ2);
|
|
1037
|
+
|
|
1038
|
+
auto S1 = Concat(sub1);
|
|
1039
|
+
auto S2 = Concat(sub2);
|
|
1040
|
+
return SIMD<T,S1.Size()+S2.Size()>(S1, S2);
|
|
1041
|
+
}
|
|
1042
|
+
}
|
|
1043
|
+
|
|
1044
|
+
|
|
1045
|
+
|
|
780
1046
|
}
|
|
781
1047
|
|
|
782
1048
|
|
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
#ifndef NETGEN_CORE_SIMD_MATH_HPP
|
|
2
|
+
#define NETGEN_CORE_SIMD_MATH_HPP
|
|
3
|
+
|
|
4
|
+
#include <tuple>
|
|
5
|
+
|
|
6
|
+
#ifndef M_PI
|
|
7
|
+
#define M_PI 3.14159265358979323846
|
|
8
|
+
#endif
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
namespace ngcore
|
|
12
|
+
{
|
|
13
|
+
|
|
14
|
+
/*
|
|
15
|
+
based on:
|
|
16
|
+
Stephen L. Moshier: Methods and Programs For Mathematical Functions
|
|
17
|
+
https://www.moshier.net/methprog.pdf
|
|
18
|
+
|
|
19
|
+
CEPHES MATHEMATICAL FUNCTION LIBRARY
|
|
20
|
+
https://www.netlib.org/cephes/
|
|
21
|
+
*/
|
|
22
|
+
|
|
23
|
+
static constexpr double sincof[] = {
|
|
24
|
+
1.58962301576546568060E-10,
|
|
25
|
+
-2.50507477628578072866E-8,
|
|
26
|
+
2.75573136213857245213E-6,
|
|
27
|
+
-1.98412698295895385996E-4,
|
|
28
|
+
8.33333333332211858878E-3,
|
|
29
|
+
-1.66666666666666307295E-1,
|
|
30
|
+
};
|
|
31
|
+
|
|
32
|
+
static constexpr double coscof[6] = {
|
|
33
|
+
-1.13585365213876817300E-11,
|
|
34
|
+
2.08757008419747316778E-9,
|
|
35
|
+
-2.75573141792967388112E-7,
|
|
36
|
+
2.48015872888517045348E-5,
|
|
37
|
+
-1.38888888888730564116E-3,
|
|
38
|
+
4.16666666666665929218E-2,
|
|
39
|
+
};
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
// highly accurate on [-pi/4, pi/4]
|
|
43
|
+
template <int N>
|
|
44
|
+
auto sincos_reduced (SIMD<double,N> x)
|
|
45
|
+
{
|
|
46
|
+
auto x2 = x*x;
|
|
47
|
+
|
|
48
|
+
auto s = ((((( sincof[0]*x2 + sincof[1]) * x2 + sincof[2]) * x2 + sincof[3]) * x2 + sincof[4]) * x2 + sincof[5]);
|
|
49
|
+
s = x + x*x*x * s;
|
|
50
|
+
|
|
51
|
+
auto c = ((((( coscof[0]*x2 + coscof[1]) * x2 + coscof[2]) * x2 + coscof[3]) * x2 + coscof[4]) * x2 + coscof[5]);
|
|
52
|
+
c = 1.0 - 0.5*x2 + x2*x2*c;
|
|
53
|
+
|
|
54
|
+
return std::tuple{ s, c };
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
template <int N>
|
|
58
|
+
auto sincos (SIMD<double,N> x)
|
|
59
|
+
{
|
|
60
|
+
auto y = round((2/M_PI) * x);
|
|
61
|
+
auto q = lround(y);
|
|
62
|
+
|
|
63
|
+
auto [s1,c1] = sincos_reduced(x - y * (M_PI/2));
|
|
64
|
+
|
|
65
|
+
auto s2 = If((q & SIMD<int64_t,N>(1)) == SIMD<int64_t,N>(0), s1, c1);
|
|
66
|
+
auto s = If((q & SIMD<int64_t,N>(2)) == SIMD<int64_t,N>(0), s2, -s2);
|
|
67
|
+
|
|
68
|
+
auto c2 = If((q & SIMD<int64_t,N>(1)) == SIMD<int64_t,N>(0), c1, -s1);
|
|
69
|
+
auto c = If((q & SIMD<int64_t,N>(2)) == SIMD<int64_t,N>(0), c2, -c2);
|
|
70
|
+
|
|
71
|
+
return std::tuple{ s, c };
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
template <int N>
|
|
81
|
+
SIMD<double,N> exp_reduced (SIMD<double,N> x)
|
|
82
|
+
{
|
|
83
|
+
static constexpr double P[] = {
|
|
84
|
+
1.26177193074810590878E-4,
|
|
85
|
+
3.02994407707441961300E-2,
|
|
86
|
+
9.99999999999999999910E-1,
|
|
87
|
+
};
|
|
88
|
+
|
|
89
|
+
static constexpr double Q[] = {
|
|
90
|
+
3.00198505138664455042E-6,
|
|
91
|
+
2.52448340349684104192E-3,
|
|
92
|
+
2.27265548208155028766E-1,
|
|
93
|
+
2.00000000000000000009E0,
|
|
94
|
+
};
|
|
95
|
+
|
|
96
|
+
/*
|
|
97
|
+
// from: https://www.netlib.org/cephes/
|
|
98
|
+
rational approximation for exponential
|
|
99
|
+
* of the fractional part:
|
|
100
|
+
* e**x = 1 + 2x P(x**2)/( Q(x**2) - x P(x**2) )
|
|
101
|
+
|
|
102
|
+
xx = x * x;
|
|
103
|
+
px = x * polevl( xx, P, 2 );
|
|
104
|
+
x = px/( polevl( xx, Q, 3 ) - px );
|
|
105
|
+
x = 1.0 + 2.0 * x;
|
|
106
|
+
*/
|
|
107
|
+
|
|
108
|
+
auto xx = x*x;
|
|
109
|
+
auto px = (P[0]*xx + P[1]) * xx + P[2];
|
|
110
|
+
auto qx = ((Q[0]*xx+Q[1])*xx+Q[2])*xx+Q[3];
|
|
111
|
+
return 1.0 + 2.0*x * px / (qx- x * px);
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
template <int N>
|
|
116
|
+
SIMD<double,N> pow2_int64_to_float64(SIMD<int64_t,N> n)
|
|
117
|
+
{
|
|
118
|
+
// thx to deepseek
|
|
119
|
+
|
|
120
|
+
// Step 1: Clamp the input to valid exponent range [-1022, 1023]
|
|
121
|
+
// (We use saturated operations to handle out-of-range values)
|
|
122
|
+
SIMD<int64_t,N> max_exp(1023);
|
|
123
|
+
SIMD<int64_t,N> min_exp(-1022);
|
|
124
|
+
n = If(n > max_exp, max_exp, n);
|
|
125
|
+
n = If(min_exp > n, min_exp, n);
|
|
126
|
+
|
|
127
|
+
// Step 2: Add exponent bias (1023)
|
|
128
|
+
n = n + SIMD<int64_t,N>(1023);
|
|
129
|
+
|
|
130
|
+
// Step 3: Shift to exponent bit position (bit 52)
|
|
131
|
+
auto shifted_exp = (n << IC<52>());
|
|
132
|
+
|
|
133
|
+
// Step 4: Reinterpret as double
|
|
134
|
+
return Reinterpret<double> (shifted_exp);
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
template <int N>
|
|
139
|
+
SIMD<double,N> myexp (SIMD<double,N> x)
|
|
140
|
+
{
|
|
141
|
+
constexpr double log2 = 0.693147180559945286; // log(2.0);
|
|
142
|
+
|
|
143
|
+
auto r = round(1/log2 * x);
|
|
144
|
+
auto rI = lround(r);
|
|
145
|
+
r *= log2;
|
|
146
|
+
|
|
147
|
+
SIMD<double,N> pow2 = pow2_int64_to_float64 (rI);
|
|
148
|
+
return exp_reduced(x-r) * pow2;
|
|
149
|
+
|
|
150
|
+
// maybe better:
|
|
151
|
+
// x = ldexp( x, n );
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
/*
|
|
155
|
+
inline auto Test1 (SIMD<double> x)
|
|
156
|
+
{
|
|
157
|
+
return myexp(x);
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
inline auto Test2 (SIMD<double> x)
|
|
161
|
+
{
|
|
162
|
+
return sincos(x);
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
inline auto Test3 (SIMD<double,4> x)
|
|
166
|
+
{
|
|
167
|
+
return myexp(x);
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
inline auto Test4 (SIMD<double,4> x)
|
|
171
|
+
{
|
|
172
|
+
return sincos(x);
|
|
173
|
+
}
|
|
174
|
+
*/
|
|
175
|
+
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
#endif
|
netgen/include/core/simd_sse.hpp
CHANGED
|
@@ -21,6 +21,9 @@ namespace ngcore
|
|
|
21
21
|
: mask(_mm_cmpgt_epi32(_mm_set1_epi32(i),
|
|
22
22
|
_mm_set_epi32(1, 1, 0, 0)))
|
|
23
23
|
{ ; }
|
|
24
|
+
|
|
25
|
+
SIMD (bool i0, bool i1) { mask = _mm_set_epi64x(i1?-1:0, i0?-1:0); }
|
|
26
|
+
|
|
24
27
|
SIMD (__m128i _mask) : mask(_mask) { ; }
|
|
25
28
|
__m128i Data() const { return mask; }
|
|
26
29
|
static constexpr int Size() { return 2; }
|
|
@@ -66,6 +69,10 @@ namespace ngcore
|
|
|
66
69
|
NETGEN_INLINE auto operator[] (int i) const { return ((int64_t*)(&data))[i]; }
|
|
67
70
|
NETGEN_INLINE __m128i Data() const { return data; }
|
|
68
71
|
NETGEN_INLINE __m128i & Data() { return data; }
|
|
72
|
+
// NETGEN_INLINE int64_t Lo() const { return _mm_extract_epi64(data, 0); }
|
|
73
|
+
// NETGEN_INLINE int64_t Hi() const { return _mm_extract_epi64(data, 1); }
|
|
74
|
+
NETGEN_INLINE int64_t Lo() const { return ((int64_t*)(&data))[0]; }
|
|
75
|
+
NETGEN_INLINE int64_t Hi() const { return ((int64_t*)(&data))[1]; }
|
|
69
76
|
static SIMD FirstInt(int n0=0) { return { n0, n0+1 }; }
|
|
70
77
|
};
|
|
71
78
|
|
|
@@ -215,6 +222,7 @@ NETGEN_INLINE SIMD<int64_t,2> operator- (SIMD<int64_t,2> a, SIMD<int64_t,2> b) {
|
|
|
215
222
|
NETGEN_INLINE SIMD<mask64,2> operator!= (SIMD<double,2> a , SIMD<double,2> b)
|
|
216
223
|
{ return _mm_castpd_si128( _mm_cmpneq_pd(a.Data(),b.Data())); }
|
|
217
224
|
|
|
225
|
+
#ifdef __SSE4_2__
|
|
218
226
|
NETGEN_INLINE SIMD<mask64,2> operator<= (SIMD<int64_t,2> a , SIMD<int64_t,2> b)
|
|
219
227
|
{ return _mm_xor_si128(_mm_cmpgt_epi64(a.Data(),b.Data()),_mm_set1_epi32(-1)); }
|
|
220
228
|
NETGEN_INLINE SIMD<mask64,2> operator< (SIMD<int64_t,2> a , SIMD<int64_t,2> b)
|
|
@@ -223,11 +231,13 @@ NETGEN_INLINE SIMD<int64_t,2> operator- (SIMD<int64_t,2> a, SIMD<int64_t,2> b) {
|
|
|
223
231
|
{ return _mm_xor_si128(_mm_cmpgt_epi64(b.Data(),a.Data()),_mm_set1_epi32(-1)); }
|
|
224
232
|
NETGEN_INLINE SIMD<mask64,2> operator> (SIMD<int64_t,2> a , SIMD<int64_t,2> b)
|
|
225
233
|
{ return my_mm_cmpgt_epi64(a.Data(),b.Data()); }
|
|
234
|
+
#endif
|
|
235
|
+
#ifdef __SSE4_1__
|
|
226
236
|
NETGEN_INLINE SIMD<mask64,2> operator== (SIMD<int64_t,2> a , SIMD<int64_t,2> b)
|
|
227
237
|
{ return _mm_cmpeq_epi64(a.Data(),b.Data()); }
|
|
228
238
|
NETGEN_INLINE SIMD<mask64,2> operator!= (SIMD<int64_t,2> a , SIMD<int64_t,2> b)
|
|
229
239
|
{ return _mm_xor_si128(_mm_cmpeq_epi64(a.Data(),b.Data()),_mm_set1_epi32(-1)); }
|
|
230
|
-
|
|
240
|
+
#endif
|
|
231
241
|
|
|
232
242
|
|
|
233
243
|
NETGEN_INLINE SIMD<mask64,2> operator&& (SIMD<mask64,2> a, SIMD<mask64,2> b)
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
#ifndef NETGEN_CORE_STATUSHANDLER
|
|
2
|
+
#define NETGEN_CORE_STATUSHANDLER
|
|
3
|
+
|
|
4
|
+
#include <string>
|
|
5
|
+
#include "utils.hpp"
|
|
6
|
+
|
|
7
|
+
namespace ngcore
|
|
8
|
+
{
|
|
9
|
+
|
|
10
|
+
class NGCORE_API multithreadt
|
|
11
|
+
{
|
|
12
|
+
public:
|
|
13
|
+
int pause;
|
|
14
|
+
int testmode;
|
|
15
|
+
int redraw;
|
|
16
|
+
int drawing;
|
|
17
|
+
int terminate;
|
|
18
|
+
int running;
|
|
19
|
+
double percent;
|
|
20
|
+
const char * task;
|
|
21
|
+
bool demorunning;
|
|
22
|
+
std::string * tcl_todo = new std::string(""); // tcl commands set from parallel thread
|
|
23
|
+
multithreadt();
|
|
24
|
+
};
|
|
25
|
+
|
|
26
|
+
NGCORE_API extern volatile multithreadt multithread;
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
extern NGCORE_API void SetStatMsg(const std::string& s);
|
|
30
|
+
|
|
31
|
+
extern NGCORE_API void PushStatus(const std::string& s);
|
|
32
|
+
extern NGCORE_API void PushStatusF(const std::string& s);
|
|
33
|
+
extern NGCORE_API void PopStatus();
|
|
34
|
+
extern NGCORE_API void SetThreadPercent(double percent);
|
|
35
|
+
extern NGCORE_API void GetStatus(std::string & s, double & percentage);
|
|
36
|
+
}
|
|
37
|
+
#endif
|
netgen/include/core/table.hpp
CHANGED
|
@@ -262,14 +262,14 @@ namespace ngcore
|
|
|
262
262
|
const MemoryTracer& GetMemoryTracer() const { return mt; }
|
|
263
263
|
|
|
264
264
|
private:
|
|
265
|
-
size_t GetMemUsage() const { return size == 0 ? 0 : sizeof(T)*index[size] + sizeof(IndexType) * size+1; }
|
|
265
|
+
NETGEN_INLINE size_t GetMemUsage() const { return size == 0 ? 0 : sizeof(T)*index[size] + sizeof(IndexType) * size+1; }
|
|
266
266
|
MemoryTracer mt;
|
|
267
267
|
};
|
|
268
268
|
|
|
269
269
|
|
|
270
270
|
/// Print table
|
|
271
271
|
template <class T, typename IndexType>
|
|
272
|
-
inline ostream & operator<< (ostream & s,
|
|
272
|
+
inline ostream & operator<< (ostream & s, FlatTable<T,IndexType> table)
|
|
273
273
|
{
|
|
274
274
|
for (auto i : table.Range())
|
|
275
275
|
{
|
|
@@ -462,6 +462,7 @@ namespace ngcore
|
|
|
462
462
|
: TableCreator<int>(), takedofs(atakedofs) { };
|
|
463
463
|
FilteredTableCreator(int acnt, const BitArray* atakedofs)
|
|
464
464
|
: TableCreator<int>(acnt),takedofs(atakedofs) { };
|
|
465
|
+
void SetFilter (const BitArray * atakedofs) { takedofs = atakedofs; }
|
|
465
466
|
void Add (size_t blocknr, int data);
|
|
466
467
|
void Add (size_t blocknr, IntRange range);
|
|
467
468
|
void Add (size_t blocknr, FlatArray<int> dofs);
|