netgen-mesher 6.2.2504.post11.dev0__cp313-cp313-win_amd64.whl → 6.2.2506.post48.dev0__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (153) hide show
  1. netgen/__init__.pyi +3 -3
  2. netgen/cmake/NetgenConfig.cmake +10 -9
  3. netgen/config/__init__.pyi +8 -8
  4. netgen/config/config.py +7 -7
  5. netgen/config/config.pyi +8 -8
  6. netgen/include/core/archive.hpp +18 -3
  7. netgen/include/core/array.hpp +20 -4
  8. netgen/include/core/autodiff.hpp +9 -11
  9. netgen/include/core/autodiffdiff.hpp +0 -2
  10. netgen/include/core/bitarray.hpp +1 -1
  11. netgen/include/core/flags.hpp +1 -1
  12. netgen/include/core/hashtable.hpp +1 -1
  13. netgen/include/core/memtracer.hpp +7 -7
  14. netgen/include/core/ngcore.hpp +5 -0
  15. netgen/include/core/ngcore_api.hpp +11 -0
  16. netgen/include/core/paje_trace.hpp +9 -8
  17. netgen/include/core/profiler.hpp +5 -5
  18. netgen/include/core/register_archive.hpp +8 -0
  19. netgen/include/core/simd.hpp +69 -1
  20. netgen/include/core/simd_arm64.hpp +205 -1
  21. netgen/include/core/simd_avx.hpp +72 -4
  22. netgen/include/core/simd_avx512.hpp +9 -0
  23. netgen/include/core/simd_generic.hpp +274 -8
  24. netgen/include/core/simd_math.hpp +178 -0
  25. netgen/include/core/simd_sse.hpp +11 -1
  26. netgen/include/core/statushandler.hpp +37 -0
  27. netgen/include/core/table.hpp +3 -2
  28. netgen/include/core/taskmanager.hpp +34 -1
  29. netgen/include/core/utils.hpp +3 -8
  30. netgen/include/include/netgen_version.hpp +4 -4
  31. netgen/include/meshing/basegeom.hpp +1 -4
  32. netgen/include/meshing/global.hpp +0 -17
  33. netgen/include/meshing/hpref_tet.hpp +41 -0
  34. netgen/include/meshing/hprefinement.hpp +2 -0
  35. netgen/include/meshing/meshtype.hpp +2 -1
  36. netgen/include/meshing/msghandler.hpp +9 -6
  37. netgen/include/meshing/topology.hpp +2 -2
  38. netgen/include/nginterface.h +3 -2
  39. netgen/include/occ/occ_utils.hpp +26 -0
  40. netgen/include/occ/occgeom.hpp +8 -0
  41. netgen/include/pybind11/attr.h +40 -8
  42. netgen/include/pybind11/buffer_info.h +14 -14
  43. netgen/include/pybind11/cast.h +553 -29
  44. netgen/include/pybind11/chrono.h +4 -1
  45. netgen/include/pybind11/conduit/README.txt +15 -0
  46. netgen/include/pybind11/conduit/pybind11_conduit_v1.h +116 -0
  47. netgen/include/pybind11/conduit/pybind11_platform_abi_id.h +87 -0
  48. netgen/include/pybind11/conduit/wrap_include_python_h.h +72 -0
  49. netgen/include/pybind11/critical_section.h +56 -0
  50. netgen/include/pybind11/detail/class.h +172 -97
  51. netgen/include/pybind11/detail/common.h +270 -189
  52. netgen/include/pybind11/detail/cpp_conduit.h +75 -0
  53. netgen/include/pybind11/detail/descr.h +55 -0
  54. netgen/include/pybind11/detail/dynamic_raw_ptr_cast_if_possible.h +39 -0
  55. netgen/include/pybind11/detail/exception_translation.h +71 -0
  56. netgen/include/pybind11/detail/function_record_pyobject.h +191 -0
  57. netgen/include/pybind11/detail/init.h +113 -9
  58. netgen/include/pybind11/detail/internals.h +479 -344
  59. netgen/include/pybind11/detail/native_enum_data.h +209 -0
  60. netgen/include/pybind11/detail/pybind11_namespace_macros.h +82 -0
  61. netgen/include/pybind11/detail/struct_smart_holder.h +378 -0
  62. netgen/include/pybind11/detail/type_caster_base.h +506 -133
  63. netgen/include/pybind11/detail/using_smart_holder.h +22 -0
  64. netgen/include/pybind11/detail/value_and_holder.h +90 -0
  65. netgen/include/pybind11/eigen/matrix.h +19 -10
  66. netgen/include/pybind11/eigen/tensor.h +15 -11
  67. netgen/include/pybind11/embed.h +50 -46
  68. netgen/include/pybind11/eval.h +11 -6
  69. netgen/include/pybind11/functional.h +58 -49
  70. netgen/include/pybind11/gil.h +34 -82
  71. netgen/include/pybind11/gil_safe_call_once.h +12 -1
  72. netgen/include/pybind11/gil_simple.h +37 -0
  73. netgen/include/pybind11/native_enum.h +67 -0
  74. netgen/include/pybind11/numpy.h +272 -93
  75. netgen/include/pybind11/pybind11.h +947 -265
  76. netgen/include/pybind11/pytypes.h +127 -21
  77. netgen/include/pybind11/stl/filesystem.h +23 -25
  78. netgen/include/pybind11/stl.h +277 -59
  79. netgen/include/pybind11/stl_bind.h +42 -7
  80. netgen/include/pybind11/subinterpreter.h +299 -0
  81. netgen/include/pybind11/trampoline_self_life_support.h +65 -0
  82. netgen/include/pybind11/typing.h +177 -4
  83. netgen/include/pybind11/warnings.h +75 -0
  84. netgen/include/visualization/mvdraw.hpp +48 -12
  85. netgen/include/visualization/vssolution.hpp +3 -1
  86. netgen/lib/libnggui.lib +0 -0
  87. netgen/lib/ngcore.lib +0 -0
  88. netgen/lib/nglib.lib +0 -0
  89. netgen/libnggui.dll +0 -0
  90. netgen/libngguipy.pyd +0 -0
  91. netgen/libngpy/_NgOCC.pyi +224 -139
  92. netgen/libngpy/_csg.pyi +26 -26
  93. netgen/libngpy/_geom2d.pyi +34 -25
  94. netgen/libngpy/_meshing.pyi +262 -111
  95. netgen/libngpy/_stl.pyi +3 -4
  96. netgen/libngpy.pyd +0 -0
  97. netgen/ngcore.dll +0 -0
  98. netgen/nglib.dll +0 -0
  99. netgen/read_gmsh.py +41 -0
  100. netgen/togl.dll +0 -0
  101. netgen/version.py +1 -1
  102. netgen/webgui.py +38 -2
  103. {netgen_mesher-6.2.2504.post11.dev0.dist-info → netgen_mesher-6.2.2506.post48.dev0.dist-info}/METADATA +2 -1
  104. {netgen_mesher-6.2.2504.post11.dev0.dist-info → netgen_mesher-6.2.2506.post48.dev0.dist-info}/RECORD +153 -132
  105. pyngcore/pyngcore.cp313-win_amd64.pyd +0 -0
  106. {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/boundarycondition.geo +0 -0
  107. {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/boxcyl.geo +0 -0
  108. {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/circle_on_cube.geo +0 -0
  109. {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/cone.geo +0 -0
  110. {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/cube.geo +0 -0
  111. {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/cubeandring.geo +0 -0
  112. {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/cubeandspheres.geo +0 -0
  113. {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/cubemcyl.geo +0 -0
  114. {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/cubemsphere.geo +0 -0
  115. {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/cylinder.geo +0 -0
  116. {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/cylsphere.geo +0 -0
  117. {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/doc/ng4.pdf +0 -0
  118. {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/ellipsoid.geo +0 -0
  119. {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/ellipticcyl.geo +0 -0
  120. {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/extrusion.geo +0 -0
  121. {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/fichera.geo +0 -0
  122. {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/frame.step +0 -0
  123. {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/hinge.stl +0 -0
  124. {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/lshape3d.geo +0 -0
  125. {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/manyholes.geo +0 -0
  126. {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/manyholes2.geo +0 -0
  127. {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/matrix.geo +0 -0
  128. {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/ortho.geo +0 -0
  129. {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/part1.stl +0 -0
  130. {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/period.geo +0 -0
  131. {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/py_tutorials/exportNeutral.py +0 -0
  132. {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/py_tutorials/mesh.py +0 -0
  133. {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/py_tutorials/shaft.geo +0 -0
  134. {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/revolution.geo +0 -0
  135. {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/screw.step +0 -0
  136. {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/sculpture.geo +0 -0
  137. {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/shaft.geo +0 -0
  138. {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/shell.geo +0 -0
  139. {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/sphere.geo +0 -0
  140. {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/sphereincube.geo +0 -0
  141. {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/square.in2d +0 -0
  142. {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/squarecircle.in2d +0 -0
  143. {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/squarehole.in2d +0 -0
  144. {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/torus.geo +0 -0
  145. {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/trafo.geo +0 -0
  146. {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/twobricks.geo +0 -0
  147. {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/twocubes.geo +0 -0
  148. {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/twocyl.geo +0 -0
  149. {netgen_mesher-6.2.2504.post11.dev0.dist-info → netgen_mesher-6.2.2506.post48.dev0.dist-info}/AUTHORS +0 -0
  150. {netgen_mesher-6.2.2504.post11.dev0.dist-info → netgen_mesher-6.2.2506.post48.dev0.dist-info}/LICENSE +0 -0
  151. {netgen_mesher-6.2.2504.post11.dev0.dist-info → netgen_mesher-6.2.2506.post48.dev0.dist-info}/WHEEL +0 -0
  152. {netgen_mesher-6.2.2504.post11.dev0.dist-info → netgen_mesher-6.2.2506.post48.dev0.dist-info}/entry_points.txt +0 -0
  153. {netgen_mesher-6.2.2504.post11.dev0.dist-info → netgen_mesher-6.2.2506.post48.dev0.dist-info}/top_level.txt +0 -0
@@ -48,6 +48,13 @@ namespace ngcore
48
48
  return k;
49
49
  }
50
50
 
51
+ constexpr size_t LargestPowerOfTwo (size_t x)
52
+ {
53
+ size_t y = 1;
54
+ while (2*y <= x) y *= 2;
55
+ return y;
56
+ }
57
+
51
58
 
52
59
  template <typename T, int N=GetDefaultSIMDSize()> class SIMD;
53
60
 
@@ -89,7 +96,8 @@ namespace ngcore
89
96
  template <int N>
90
97
  class alignas(GetLargestNativeSIMDPart(N)*sizeof(int64_t)) SIMD<mask64,N>
91
98
  {
92
- static constexpr int N1 = GetLargestNativeSIMDPart(N);
99
+ // static constexpr int N1 = GetLargestNativeSIMDPart(N);
100
+ static constexpr size_t N1 = LargestPowerOfTwo(N-1);
93
101
  static constexpr int N2 = N-N1;
94
102
 
95
103
  SIMD<mask64,N1> lo;
@@ -111,6 +119,113 @@ namespace ngcore
111
119
  }
112
120
 
113
121
 
122
+ ////////////////////////////////////////////////////////////////////////////
123
+ // int32
124
+
125
+ template<>
126
+ class SIMD<int32_t,1>
127
+ {
128
+ int32_t data;
129
+
130
+ public:
131
+ static constexpr int Size() { return 1; }
132
+ SIMD () {}
133
+ SIMD (const SIMD &) = default;
134
+ SIMD & operator= (const SIMD &) = default;
135
+ // SIMD (int val) : data{val} {}
136
+ SIMD (int32_t val) : data{val} {}
137
+ SIMD (size_t val) : data(val) {}
138
+ explicit SIMD (std::array<int32_t, 1> arr) : data{arr[0]} {}
139
+
140
+
141
+
142
+ int32_t operator[] (int i) const { return ((int32_t*)(&data))[i]; }
143
+ auto Data() const { return data; }
144
+ static SIMD FirstInt(int32_t n0=0) { return {n0}; }
145
+ template <int I>
146
+ int32_t Get()
147
+ {
148
+ static_assert(I==0);
149
+ return data;
150
+ }
151
+ };
152
+
153
+ template<int N>
154
+ class alignas(GetLargestNativeSIMDPart(N)*sizeof(int64_t)) SIMD<int32_t,N>
155
+ {
156
+ // static constexpr int N1 = GetLargestNativeSIMDPart(N);
157
+ static constexpr size_t N1 = LargestPowerOfTwo(N-1);
158
+ static constexpr int N2 = N-N1;
159
+
160
+ SIMD<int32_t,N1> lo;
161
+ SIMD<int32_t,N2> high;
162
+
163
+ public:
164
+ static constexpr int Size() { return N; }
165
+
166
+ SIMD () {}
167
+ SIMD (const SIMD &) = default;
168
+ SIMD & operator= (const SIMD &) = default;
169
+
170
+ // SIMD (int val) : lo{val}, high{val} { ; }
171
+ SIMD (int32_t val) : lo{val}, high{val} { ; }
172
+ SIMD (size_t val) : lo{val}, high{val} { ; }
173
+ SIMD (int32_t * p) : lo{p}, high{p+N1} { ; }
174
+
175
+ SIMD (SIMD<int32_t,N1> lo_, SIMD<int32_t,N2> high_) : lo(lo_), high(high_) { ; }
176
+
177
+ explicit SIMD( std::array<int32_t, N> arr )
178
+ : lo(detail::array_range<N1>(arr, 0)),
179
+ high(detail::array_range<N2>(arr, N1))
180
+ {}
181
+
182
+
183
+ template<typename ...T>
184
+ explicit SIMD(const T... vals)
185
+ : lo(detail::array_range<N1>(std::array<int32_t, N>{vals...}, 0)),
186
+ high(detail::array_range<N2>(std::array<int32_t, N>{vals...}, N1))
187
+ {
188
+ static_assert(sizeof...(vals)==N, "wrong number of arguments");
189
+ }
190
+
191
+
192
+ template<typename T, typename std::enable_if<std::is_convertible<T, std::function<int32_t(int)>>::value, int>::type = 0>
193
+ SIMD (const T & func)
194
+ {
195
+ for(auto i : IntRange(N1))
196
+ lo[i] = func(i);
197
+ for(auto i : IntRange(N2))
198
+ high[i] = func(N1+i);
199
+ }
200
+
201
+ auto Lo() const { return lo; }
202
+ auto Hi() const { return high; }
203
+
204
+ int32_t operator[] (int i) const { return ((int32_t*)(&lo))[i]; }
205
+
206
+ void Store (int32_t * p) { lo.Store(p); high.Store(p+N1); }
207
+
208
+
209
+ /*
210
+ operator tuple<int32_t&,int32_t&,int32_t&,int32_t&> ()
211
+ { return tuple<int32_t&,int32_t&,int32_t&,int32_t&>((*this)[0], (*this)[1], (*this)[2], (*this)[3]); }
212
+ */
213
+
214
+ /*
215
+ static SIMD FirstInt() { return { 0, 1, 2, 3 }; }
216
+ */
217
+ static SIMD FirstInt(int32_t n0=0) { return {SIMD<int32_t,N1>::FirstInt(n0), SIMD<int32_t,N2>::FirstInt(n0+N1)}; }
218
+ template <int I>
219
+ int32_t Get()
220
+ {
221
+ static_assert(I>=0 && I<N, "Index out of range");
222
+ if constexpr(I<N1) return lo.template Get<I>();
223
+ else return high.template Get<I-N1>();
224
+ }
225
+ };
226
+
227
+
228
+
114
229
  ////////////////////////////////////////////////////////////////////////////
115
230
  // int64
116
231
 
@@ -145,7 +260,8 @@ namespace ngcore
145
260
  template<int N>
146
261
  class alignas(GetLargestNativeSIMDPart(N)*sizeof(int64_t)) SIMD<int64_t,N>
147
262
  {
148
- static constexpr int N1 = GetLargestNativeSIMDPart(N);
263
+ // static constexpr int N1 = GetLargestNativeSIMDPart(N);
264
+ static constexpr size_t N1 = LargestPowerOfTwo(N-1);
149
265
  static constexpr int N2 = N-N1;
150
266
 
151
267
  SIMD<int64_t,N1> lo;
@@ -209,6 +325,7 @@ namespace ngcore
209
325
  }
210
326
  };
211
327
 
328
+
212
329
 
213
330
  ////////////////////////////////////////////////////////////////////////////
214
331
  // double
@@ -262,7 +379,8 @@ namespace ngcore
262
379
  template<int N>
263
380
  class alignas(GetLargestNativeSIMDPart(N)*sizeof(double)) SIMD<double, N>
264
381
  {
265
- static constexpr int N1 = GetLargestNativeSIMDPart(N);
382
+ // static constexpr int N1 = GetLargestNativeSIMDPart(N);
383
+ static constexpr size_t N1 = LargestPowerOfTwo(N-1);
266
384
  static constexpr int N2 = N-N1;
267
385
 
268
386
  SIMD<double, N1> lo;
@@ -314,7 +432,7 @@ namespace ngcore
314
432
 
315
433
  template<typename ...T>
316
434
  explicit SIMD(const T... vals)
317
- : lo(detail::array_range<N1>(std::array<double, N>{vals...}, 0)),
435
+ : lo(detail::array_range<N1>(std::array<double, N>{vals...}, 0)),
318
436
  high(detail::array_range<N2>(std::array<double, N>{vals...}, N1))
319
437
  {
320
438
  static_assert(sizeof...(vals)==N, "wrong number of arguments");
@@ -327,8 +445,8 @@ namespace ngcore
327
445
  high.Store(p+N1, mask.Hi());
328
446
  }
329
447
 
330
- auto Lo() const { return lo; }
331
- auto Hi() const { return high; }
448
+ NETGEN_INLINE auto Lo() const { return lo; }
449
+ NETGEN_INLINE auto Hi() const { return high; }
332
450
 
333
451
  double operator[] (int i) const { return ((double*)(&lo))[i]; }
334
452
 
@@ -426,6 +544,20 @@ namespace ngcore
426
544
  else return { a.Lo()!=b.Lo(), a.Hi()!=b.Hi() };
427
545
  }
428
546
 
547
+ template <int N>
548
+ NETGEN_INLINE SIMD<int64_t,N> operator& (SIMD<int64_t,N> a, SIMD<int64_t,N> b)
549
+ {
550
+ if constexpr(N==1) return a.Data() & b.Data();
551
+ else return { (a.Lo()&b.Lo()), (a.Hi()&b.Hi()) };
552
+ }
553
+ template <int N>
554
+ NETGEN_INLINE SIMD<int64_t,N> operator| (SIMD<int64_t,N> a, SIMD<int64_t,N> b)
555
+ {
556
+ if constexpr(N==1) return a.Data() & b.Data();
557
+ else return { (a.Lo()|b.Lo()), (a.Hi()|b.Hi()) };
558
+ }
559
+
560
+
429
561
  // int64_t operators with scalar operand (implement overloads to allow implicit casts for second operand)
430
562
  template <int N>
431
563
  NETGEN_INLINE SIMD<int64_t,N> operator+ (SIMD<int64_t,N> a, int64_t b) { return a+SIMD<int64_t,N>(b); }
@@ -458,6 +590,7 @@ namespace ngcore
458
590
  template <int N>
459
591
  NETGEN_INLINE SIMD<int64_t,N> & operator/= (SIMD<int64_t,N> & a, SIMD<int64_t,N> b) { a = a/b; return a; }
460
592
 
593
+
461
594
  // double operators with scalar operand (implement overloads to allow implicit casts for second operand)
462
595
  template <int N>
463
596
  NETGEN_INLINE SIMD<double,N> operator+ (SIMD<double,N> a, double b) { return a+SIMD<double,N>(b); }
@@ -490,6 +623,10 @@ namespace ngcore
490
623
  template <int N>
491
624
  NETGEN_INLINE SIMD<double,N> & operator/= (SIMD<double,N> & a, SIMD<double,N> b) { a = a/b; return a; }
492
625
 
626
+ template <int N>
627
+ NETGEN_INLINE auto operator> (SIMD<double,N> & a, double b) { return a > SIMD<double,N>(b); }
628
+
629
+
493
630
  // double functions
494
631
 
495
632
  template <int N>
@@ -580,6 +717,96 @@ namespace ngcore
580
717
  }
581
718
 
582
719
 
720
+ template<typename T2, typename T1>
721
+ T2 BitCast(T1 a)
722
+ {
723
+ T2 result;
724
+ static_assert(sizeof(T1) == sizeof(T2), "BitCast requires same size");
725
+ memcpy(&result, &a, sizeof(T1));
726
+ return result;
727
+ }
728
+
729
+ template <typename T, typename T1, int N>
730
+ SIMD<T, N> Reinterpret (SIMD<T1,N> a)
731
+ {
732
+ if constexpr (N == 1)
733
+ return SIMD<T,N> ( * (T*)(void*) & a.Data());
734
+ else if constexpr (N == 2)
735
+ return SIMD<T,N> { BitCast<T> (a.Lo()),
736
+ BitCast<T> (a.Hi()) };
737
+ else
738
+ return SIMD<T,N> (Reinterpret<T> (a.Lo()), Reinterpret<T> (a.Hi()));
739
+ }
740
+
741
+
742
+ using std::round;
743
+ template <int N>
744
+ SIMD<double,N> round (SIMD<double,N> x)
745
+ {
746
+ if constexpr (N == 1) return round(x);
747
+ else return { round(x.Lo()), round(x.Hi()) };
748
+ }
749
+
750
+ // NETGEN_INLINE int64_t RoundI (double x) { return lround(x); }
751
+ using std::lround;
752
+ template <int N>
753
+ SIMD<int64_t,N> lround (SIMD<double,N> x)
754
+ {
755
+ if constexpr (N == 1) return SIMD<int64_t,1> (lround(x));
756
+ else return { lround(x.Lo()), lround(x.Hi()) };
757
+ }
758
+
759
+ /*
760
+ reciprocal square root
761
+ Quake III algorithm, or intrinsics
762
+ */
763
+ //
764
+ #ifndef __CUDACC__
765
+ NETGEN_INLINE double rsqrt (double x) { return 1.0/sqrt(x); }
766
+ #endif
767
+
768
+ template <int N>
769
+ SIMD<double,N> rsqrt (SIMD<double,N> x)
770
+ {
771
+ if constexpr (N == 1) return 1.0/sqrt(x.Data());
772
+ else return { rsqrt(x.Lo()), rsqrt(x.Hi()) };
773
+ }
774
+
775
+ template <int N>
776
+ int64_t operator<< (int64_t a, IC<N> n) { return a << n.value; }
777
+
778
+ template <int S, int N>
779
+ SIMD<int64_t,S> operator<< (SIMD<int64_t,S> a, IC<N> n)
780
+ {
781
+ if constexpr (S == 1) return SIMD<int64_t,1> (a.Data() << n);
782
+ else return SIMD<int64_t,S> (a.Lo() << n, a.Hi() << n);
783
+ }
784
+
785
+
786
+
787
+
788
+ template <typename T, int N>
789
+ auto Min (SIMD<T,N> a, SIMD<T,N> b)
790
+ {
791
+ if constexpr (N==1)
792
+ return SIMD<T,1> (std::min(a[0], b[0]));
793
+ else
794
+ return SIMD<T,N> (Min(a.Lo(), b.Lo()), Min(a.Hi(), b.Hi()));
795
+ }
796
+
797
+ template <typename T, int N>
798
+ auto Max (SIMD<T,N> a, SIMD<T,N> b)
799
+ {
800
+ if constexpr (N==1)
801
+ return SIMD<T,1> (std::max(a[0], b[0]));
802
+ else
803
+ return SIMD<T,N> (Max(a.Lo(), b.Lo()), Max(a.Hi(), b.Hi()));
804
+ }
805
+
806
+
807
+
808
+
809
+
583
810
  template <typename T, int N>
584
811
  ostream & operator<< (ostream & ost, SIMD<T,N> simd)
585
812
  {
@@ -597,8 +824,11 @@ namespace ngcore
597
824
 
598
825
  using std::sqrt;
599
826
  template <int N>
600
- NETGEN_INLINE ngcore::SIMD<double,N> sqrt (ngcore::SIMD<double,N> a) {
601
- return ngcore::SIMD<double,N>([a](int i)->double { return sqrt(a[i]); } );
827
+ NETGEN_INLINE ngcore::SIMD<double,N> sqrt (ngcore::SIMD<double,N> a)
828
+ {
829
+ if constexpr (N == 1) return sqrt(a.Data());
830
+ else return { sqrt(a.Lo()), sqrt(a.Hi()) };
831
+ // return ngcore::SIMD<double,N>([a](int i)->double { return sqrt(a[i]); } );
602
832
  }
603
833
 
604
834
  using std::fabs;
@@ -777,6 +1007,42 @@ namespace ngcore
777
1007
  FMAddSub(a.Hi(), b.Hi(), c.Hi()));
778
1008
  }
779
1009
  }
1010
+
1011
+
1012
+
1013
+
1014
+ template <int BASE, typename Tuple, std::size_t ... Is>
1015
+ auto subtuple (const Tuple& tup, std::index_sequence<Is...>)
1016
+ {
1017
+ return std::make_tuple(std::get<BASE+Is>(tup)...);
1018
+ }
1019
+
1020
+ template <typename ...Args, typename T, int M>
1021
+ auto Concat (std::tuple<SIMD<T,M>, Args...> tup)
1022
+ {
1023
+ constexpr size_t N = std::tuple_size<std::tuple<SIMD<T,M>, Args...>>();
1024
+
1025
+ if constexpr (N == 1)
1026
+ return get<0>(tup);
1027
+ else
1028
+ {
1029
+ static constexpr size_t N1 = LargestPowerOfTwo(N-1);
1030
+ static constexpr int N2 = N-N1;
1031
+
1032
+ auto SEQ1 = std::make_index_sequence<N1>();
1033
+ auto sub1 = subtuple<0>(tup, SEQ1);
1034
+
1035
+ auto SEQ2 = std::make_index_sequence<N2>();
1036
+ auto sub2 = subtuple<N1>(tup, SEQ2);
1037
+
1038
+ auto S1 = Concat(sub1);
1039
+ auto S2 = Concat(sub2);
1040
+ return SIMD<T,S1.Size()+S2.Size()>(S1, S2);
1041
+ }
1042
+ }
1043
+
1044
+
1045
+
780
1046
  }
781
1047
 
782
1048
 
@@ -0,0 +1,178 @@
1
+ #ifndef NETGEN_CORE_SIMD_MATH_HPP
2
+ #define NETGEN_CORE_SIMD_MATH_HPP
3
+
4
+ #include <tuple>
5
+
6
+ #ifndef M_PI
7
+ #define M_PI 3.14159265358979323846
8
+ #endif
9
+
10
+
11
+ namespace ngcore
12
+ {
13
+
14
+ /*
15
+ based on:
16
+ Stephen L. Moshier: Methods and Programs For Mathematical Functions
17
+ https://www.moshier.net/methprog.pdf
18
+
19
+ CEPHES MATHEMATICAL FUNCTION LIBRARY
20
+ https://www.netlib.org/cephes/
21
+ */
22
+
23
+ static constexpr double sincof[] = {
24
+ 1.58962301576546568060E-10,
25
+ -2.50507477628578072866E-8,
26
+ 2.75573136213857245213E-6,
27
+ -1.98412698295895385996E-4,
28
+ 8.33333333332211858878E-3,
29
+ -1.66666666666666307295E-1,
30
+ };
31
+
32
+ static constexpr double coscof[6] = {
33
+ -1.13585365213876817300E-11,
34
+ 2.08757008419747316778E-9,
35
+ -2.75573141792967388112E-7,
36
+ 2.48015872888517045348E-5,
37
+ -1.38888888888730564116E-3,
38
+ 4.16666666666665929218E-2,
39
+ };
40
+
41
+
42
+ // highly accurate on [-pi/4, pi/4]
43
+ template <int N>
44
+ auto sincos_reduced (SIMD<double,N> x)
45
+ {
46
+ auto x2 = x*x;
47
+
48
+ auto s = ((((( sincof[0]*x2 + sincof[1]) * x2 + sincof[2]) * x2 + sincof[3]) * x2 + sincof[4]) * x2 + sincof[5]);
49
+ s = x + x*x*x * s;
50
+
51
+ auto c = ((((( coscof[0]*x2 + coscof[1]) * x2 + coscof[2]) * x2 + coscof[3]) * x2 + coscof[4]) * x2 + coscof[5]);
52
+ c = 1.0 - 0.5*x2 + x2*x2*c;
53
+
54
+ return std::tuple{ s, c };
55
+ }
56
+
57
+ template <int N>
58
+ auto sincos (SIMD<double,N> x)
59
+ {
60
+ auto y = round((2/M_PI) * x);
61
+ auto q = lround(y);
62
+
63
+ auto [s1,c1] = sincos_reduced(x - y * (M_PI/2));
64
+
65
+ auto s2 = If((q & SIMD<int64_t,N>(1)) == SIMD<int64_t,N>(0), s1, c1);
66
+ auto s = If((q & SIMD<int64_t,N>(2)) == SIMD<int64_t,N>(0), s2, -s2);
67
+
68
+ auto c2 = If((q & SIMD<int64_t,N>(1)) == SIMD<int64_t,N>(0), c1, -s1);
69
+ auto c = If((q & SIMD<int64_t,N>(2)) == SIMD<int64_t,N>(0), c2, -c2);
70
+
71
+ return std::tuple{ s, c };
72
+ }
73
+
74
+
75
+
76
+
77
+
78
+
79
+
80
+ template <int N>
81
+ SIMD<double,N> exp_reduced (SIMD<double,N> x)
82
+ {
83
+ static constexpr double P[] = {
84
+ 1.26177193074810590878E-4,
85
+ 3.02994407707441961300E-2,
86
+ 9.99999999999999999910E-1,
87
+ };
88
+
89
+ static constexpr double Q[] = {
90
+ 3.00198505138664455042E-6,
91
+ 2.52448340349684104192E-3,
92
+ 2.27265548208155028766E-1,
93
+ 2.00000000000000000009E0,
94
+ };
95
+
96
+ /*
97
+ // from: https://www.netlib.org/cephes/
98
+ rational approximation for exponential
99
+ * of the fractional part:
100
+ * e**x = 1 + 2x P(x**2)/( Q(x**2) - x P(x**2) )
101
+
102
+ xx = x * x;
103
+ px = x * polevl( xx, P, 2 );
104
+ x = px/( polevl( xx, Q, 3 ) - px );
105
+ x = 1.0 + 2.0 * x;
106
+ */
107
+
108
+ auto xx = x*x;
109
+ auto px = (P[0]*xx + P[1]) * xx + P[2];
110
+ auto qx = ((Q[0]*xx+Q[1])*xx+Q[2])*xx+Q[3];
111
+ return 1.0 + 2.0*x * px / (qx- x * px);
112
+ }
113
+
114
+
115
+ template <int N>
116
+ SIMD<double,N> pow2_int64_to_float64(SIMD<int64_t,N> n)
117
+ {
118
+ // thx to deepseek
119
+
120
+ // Step 1: Clamp the input to valid exponent range [-1022, 1023]
121
+ // (We use saturated operations to handle out-of-range values)
122
+ SIMD<int64_t,N> max_exp(1023);
123
+ SIMD<int64_t,N> min_exp(-1022);
124
+ n = If(n > max_exp, max_exp, n);
125
+ n = If(min_exp > n, min_exp, n);
126
+
127
+ // Step 2: Add exponent bias (1023)
128
+ n = n + SIMD<int64_t,N>(1023);
129
+
130
+ // Step 3: Shift to exponent bit position (bit 52)
131
+ auto shifted_exp = (n << IC<52>());
132
+
133
+ // Step 4: Reinterpret as double
134
+ return Reinterpret<double> (shifted_exp);
135
+ }
136
+
137
+
138
+ template <int N>
139
+ SIMD<double,N> myexp (SIMD<double,N> x)
140
+ {
141
+ constexpr double log2 = 0.693147180559945286; // log(2.0);
142
+
143
+ auto r = round(1/log2 * x);
144
+ auto rI = lround(r);
145
+ r *= log2;
146
+
147
+ SIMD<double,N> pow2 = pow2_int64_to_float64 (rI);
148
+ return exp_reduced(x-r) * pow2;
149
+
150
+ // maybe better:
151
+ // x = ldexp( x, n );
152
+ }
153
+
154
+ /*
155
+ inline auto Test1 (SIMD<double> x)
156
+ {
157
+ return myexp(x);
158
+ }
159
+
160
+ inline auto Test2 (SIMD<double> x)
161
+ {
162
+ return sincos(x);
163
+ }
164
+
165
+ inline auto Test3 (SIMD<double,4> x)
166
+ {
167
+ return myexp(x);
168
+ }
169
+
170
+ inline auto Test4 (SIMD<double,4> x)
171
+ {
172
+ return sincos(x);
173
+ }
174
+ */
175
+
176
+ }
177
+
178
+ #endif
@@ -21,6 +21,9 @@ namespace ngcore
21
21
  : mask(_mm_cmpgt_epi32(_mm_set1_epi32(i),
22
22
  _mm_set_epi32(1, 1, 0, 0)))
23
23
  { ; }
24
+
25
+ SIMD (bool i0, bool i1) { mask = _mm_set_epi64x(i1?-1:0, i0?-1:0); }
26
+
24
27
  SIMD (__m128i _mask) : mask(_mask) { ; }
25
28
  __m128i Data() const { return mask; }
26
29
  static constexpr int Size() { return 2; }
@@ -66,6 +69,10 @@ namespace ngcore
66
69
  NETGEN_INLINE auto operator[] (int i) const { return ((int64_t*)(&data))[i]; }
67
70
  NETGEN_INLINE __m128i Data() const { return data; }
68
71
  NETGEN_INLINE __m128i & Data() { return data; }
72
+ // NETGEN_INLINE int64_t Lo() const { return _mm_extract_epi64(data, 0); }
73
+ // NETGEN_INLINE int64_t Hi() const { return _mm_extract_epi64(data, 1); }
74
+ NETGEN_INLINE int64_t Lo() const { return ((int64_t*)(&data))[0]; }
75
+ NETGEN_INLINE int64_t Hi() const { return ((int64_t*)(&data))[1]; }
69
76
  static SIMD FirstInt(int n0=0) { return { n0, n0+1 }; }
70
77
  };
71
78
 
@@ -215,6 +222,7 @@ NETGEN_INLINE SIMD<int64_t,2> operator- (SIMD<int64_t,2> a, SIMD<int64_t,2> b) {
215
222
  NETGEN_INLINE SIMD<mask64,2> operator!= (SIMD<double,2> a , SIMD<double,2> b)
216
223
  { return _mm_castpd_si128( _mm_cmpneq_pd(a.Data(),b.Data())); }
217
224
 
225
+ #ifdef __SSE4_2__
218
226
  NETGEN_INLINE SIMD<mask64,2> operator<= (SIMD<int64_t,2> a , SIMD<int64_t,2> b)
219
227
  { return _mm_xor_si128(_mm_cmpgt_epi64(a.Data(),b.Data()),_mm_set1_epi32(-1)); }
220
228
  NETGEN_INLINE SIMD<mask64,2> operator< (SIMD<int64_t,2> a , SIMD<int64_t,2> b)
@@ -223,11 +231,13 @@ NETGEN_INLINE SIMD<int64_t,2> operator- (SIMD<int64_t,2> a, SIMD<int64_t,2> b) {
223
231
  { return _mm_xor_si128(_mm_cmpgt_epi64(b.Data(),a.Data()),_mm_set1_epi32(-1)); }
224
232
  NETGEN_INLINE SIMD<mask64,2> operator> (SIMD<int64_t,2> a , SIMD<int64_t,2> b)
225
233
  { return my_mm_cmpgt_epi64(a.Data(),b.Data()); }
234
+ #endif
235
+ #ifdef __SSE4_1__
226
236
  NETGEN_INLINE SIMD<mask64,2> operator== (SIMD<int64_t,2> a , SIMD<int64_t,2> b)
227
237
  { return _mm_cmpeq_epi64(a.Data(),b.Data()); }
228
238
  NETGEN_INLINE SIMD<mask64,2> operator!= (SIMD<int64_t,2> a , SIMD<int64_t,2> b)
229
239
  { return _mm_xor_si128(_mm_cmpeq_epi64(a.Data(),b.Data()),_mm_set1_epi32(-1)); }
230
-
240
+ #endif
231
241
 
232
242
 
233
243
  NETGEN_INLINE SIMD<mask64,2> operator&& (SIMD<mask64,2> a, SIMD<mask64,2> b)
@@ -0,0 +1,37 @@
1
+ #ifndef NETGEN_CORE_STATUSHANDLER
2
+ #define NETGEN_CORE_STATUSHANDLER
3
+
4
+ #include <string>
5
+ #include "utils.hpp"
6
+
7
+ namespace ngcore
8
+ {
9
+
10
+ class NGCORE_API multithreadt
11
+ {
12
+ public:
13
+ int pause;
14
+ int testmode;
15
+ int redraw;
16
+ int drawing;
17
+ int terminate;
18
+ int running;
19
+ double percent;
20
+ const char * task;
21
+ bool demorunning;
22
+ std::string * tcl_todo = new std::string(""); // tcl commands set from parallel thread
23
+ multithreadt();
24
+ };
25
+
26
+ NGCORE_API extern volatile multithreadt multithread;
27
+
28
+
29
+ extern NGCORE_API void SetStatMsg(const std::string& s);
30
+
31
+ extern NGCORE_API void PushStatus(const std::string& s);
32
+ extern NGCORE_API void PushStatusF(const std::string& s);
33
+ extern NGCORE_API void PopStatus();
34
+ extern NGCORE_API void SetThreadPercent(double percent);
35
+ extern NGCORE_API void GetStatus(std::string & s, double & percentage);
36
+ }
37
+ #endif
@@ -262,14 +262,14 @@ namespace ngcore
262
262
  const MemoryTracer& GetMemoryTracer() const { return mt; }
263
263
 
264
264
  private:
265
- size_t GetMemUsage() const { return size == 0 ? 0 : sizeof(T)*index[size] + sizeof(IndexType) * size+1; }
265
+ NETGEN_INLINE size_t GetMemUsage() const { return size == 0 ? 0 : sizeof(T)*index[size] + sizeof(IndexType) * size+1; }
266
266
  MemoryTracer mt;
267
267
  };
268
268
 
269
269
 
270
270
  /// Print table
271
271
  template <class T, typename IndexType>
272
- inline ostream & operator<< (ostream & s, const Table<T,IndexType> & table)
272
+ inline ostream & operator<< (ostream & s, FlatTable<T,IndexType> table)
273
273
  {
274
274
  for (auto i : table.Range())
275
275
  {
@@ -462,6 +462,7 @@ namespace ngcore
462
462
  : TableCreator<int>(), takedofs(atakedofs) { };
463
463
  FilteredTableCreator(int acnt, const BitArray* atakedofs)
464
464
  : TableCreator<int>(acnt),takedofs(atakedofs) { };
465
+ void SetFilter (const BitArray * atakedofs) { takedofs = atakedofs; }
465
466
  void Add (size_t blocknr, int data);
466
467
  void Add (size_t blocknr, IntRange range);
467
468
  void Add (size_t blocknr, FlatArray<int> dofs);