netgen-mesher 6.2.2504.post11.dev0__cp313-cp313-win_amd64.whl → 6.2.2506.post48.dev0__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (153) hide show
  1. netgen/__init__.pyi +3 -3
  2. netgen/cmake/NetgenConfig.cmake +10 -9
  3. netgen/config/__init__.pyi +8 -8
  4. netgen/config/config.py +7 -7
  5. netgen/config/config.pyi +8 -8
  6. netgen/include/core/archive.hpp +18 -3
  7. netgen/include/core/array.hpp +20 -4
  8. netgen/include/core/autodiff.hpp +9 -11
  9. netgen/include/core/autodiffdiff.hpp +0 -2
  10. netgen/include/core/bitarray.hpp +1 -1
  11. netgen/include/core/flags.hpp +1 -1
  12. netgen/include/core/hashtable.hpp +1 -1
  13. netgen/include/core/memtracer.hpp +7 -7
  14. netgen/include/core/ngcore.hpp +5 -0
  15. netgen/include/core/ngcore_api.hpp +11 -0
  16. netgen/include/core/paje_trace.hpp +9 -8
  17. netgen/include/core/profiler.hpp +5 -5
  18. netgen/include/core/register_archive.hpp +8 -0
  19. netgen/include/core/simd.hpp +69 -1
  20. netgen/include/core/simd_arm64.hpp +205 -1
  21. netgen/include/core/simd_avx.hpp +72 -4
  22. netgen/include/core/simd_avx512.hpp +9 -0
  23. netgen/include/core/simd_generic.hpp +274 -8
  24. netgen/include/core/simd_math.hpp +178 -0
  25. netgen/include/core/simd_sse.hpp +11 -1
  26. netgen/include/core/statushandler.hpp +37 -0
  27. netgen/include/core/table.hpp +3 -2
  28. netgen/include/core/taskmanager.hpp +34 -1
  29. netgen/include/core/utils.hpp +3 -8
  30. netgen/include/include/netgen_version.hpp +4 -4
  31. netgen/include/meshing/basegeom.hpp +1 -4
  32. netgen/include/meshing/global.hpp +0 -17
  33. netgen/include/meshing/hpref_tet.hpp +41 -0
  34. netgen/include/meshing/hprefinement.hpp +2 -0
  35. netgen/include/meshing/meshtype.hpp +2 -1
  36. netgen/include/meshing/msghandler.hpp +9 -6
  37. netgen/include/meshing/topology.hpp +2 -2
  38. netgen/include/nginterface.h +3 -2
  39. netgen/include/occ/occ_utils.hpp +26 -0
  40. netgen/include/occ/occgeom.hpp +8 -0
  41. netgen/include/pybind11/attr.h +40 -8
  42. netgen/include/pybind11/buffer_info.h +14 -14
  43. netgen/include/pybind11/cast.h +553 -29
  44. netgen/include/pybind11/chrono.h +4 -1
  45. netgen/include/pybind11/conduit/README.txt +15 -0
  46. netgen/include/pybind11/conduit/pybind11_conduit_v1.h +116 -0
  47. netgen/include/pybind11/conduit/pybind11_platform_abi_id.h +87 -0
  48. netgen/include/pybind11/conduit/wrap_include_python_h.h +72 -0
  49. netgen/include/pybind11/critical_section.h +56 -0
  50. netgen/include/pybind11/detail/class.h +172 -97
  51. netgen/include/pybind11/detail/common.h +270 -189
  52. netgen/include/pybind11/detail/cpp_conduit.h +75 -0
  53. netgen/include/pybind11/detail/descr.h +55 -0
  54. netgen/include/pybind11/detail/dynamic_raw_ptr_cast_if_possible.h +39 -0
  55. netgen/include/pybind11/detail/exception_translation.h +71 -0
  56. netgen/include/pybind11/detail/function_record_pyobject.h +191 -0
  57. netgen/include/pybind11/detail/init.h +113 -9
  58. netgen/include/pybind11/detail/internals.h +479 -344
  59. netgen/include/pybind11/detail/native_enum_data.h +209 -0
  60. netgen/include/pybind11/detail/pybind11_namespace_macros.h +82 -0
  61. netgen/include/pybind11/detail/struct_smart_holder.h +378 -0
  62. netgen/include/pybind11/detail/type_caster_base.h +506 -133
  63. netgen/include/pybind11/detail/using_smart_holder.h +22 -0
  64. netgen/include/pybind11/detail/value_and_holder.h +90 -0
  65. netgen/include/pybind11/eigen/matrix.h +19 -10
  66. netgen/include/pybind11/eigen/tensor.h +15 -11
  67. netgen/include/pybind11/embed.h +50 -46
  68. netgen/include/pybind11/eval.h +11 -6
  69. netgen/include/pybind11/functional.h +58 -49
  70. netgen/include/pybind11/gil.h +34 -82
  71. netgen/include/pybind11/gil_safe_call_once.h +12 -1
  72. netgen/include/pybind11/gil_simple.h +37 -0
  73. netgen/include/pybind11/native_enum.h +67 -0
  74. netgen/include/pybind11/numpy.h +272 -93
  75. netgen/include/pybind11/pybind11.h +947 -265
  76. netgen/include/pybind11/pytypes.h +127 -21
  77. netgen/include/pybind11/stl/filesystem.h +23 -25
  78. netgen/include/pybind11/stl.h +277 -59
  79. netgen/include/pybind11/stl_bind.h +42 -7
  80. netgen/include/pybind11/subinterpreter.h +299 -0
  81. netgen/include/pybind11/trampoline_self_life_support.h +65 -0
  82. netgen/include/pybind11/typing.h +177 -4
  83. netgen/include/pybind11/warnings.h +75 -0
  84. netgen/include/visualization/mvdraw.hpp +48 -12
  85. netgen/include/visualization/vssolution.hpp +3 -1
  86. netgen/lib/libnggui.lib +0 -0
  87. netgen/lib/ngcore.lib +0 -0
  88. netgen/lib/nglib.lib +0 -0
  89. netgen/libnggui.dll +0 -0
  90. netgen/libngguipy.pyd +0 -0
  91. netgen/libngpy/_NgOCC.pyi +224 -139
  92. netgen/libngpy/_csg.pyi +26 -26
  93. netgen/libngpy/_geom2d.pyi +34 -25
  94. netgen/libngpy/_meshing.pyi +262 -111
  95. netgen/libngpy/_stl.pyi +3 -4
  96. netgen/libngpy.pyd +0 -0
  97. netgen/ngcore.dll +0 -0
  98. netgen/nglib.dll +0 -0
  99. netgen/read_gmsh.py +41 -0
  100. netgen/togl.dll +0 -0
  101. netgen/version.py +1 -1
  102. netgen/webgui.py +38 -2
  103. {netgen_mesher-6.2.2504.post11.dev0.dist-info → netgen_mesher-6.2.2506.post48.dev0.dist-info}/METADATA +2 -1
  104. {netgen_mesher-6.2.2504.post11.dev0.dist-info → netgen_mesher-6.2.2506.post48.dev0.dist-info}/RECORD +153 -132
  105. pyngcore/pyngcore.cp313-win_amd64.pyd +0 -0
  106. {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/boundarycondition.geo +0 -0
  107. {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/boxcyl.geo +0 -0
  108. {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/circle_on_cube.geo +0 -0
  109. {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/cone.geo +0 -0
  110. {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/cube.geo +0 -0
  111. {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/cubeandring.geo +0 -0
  112. {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/cubeandspheres.geo +0 -0
  113. {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/cubemcyl.geo +0 -0
  114. {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/cubemsphere.geo +0 -0
  115. {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/cylinder.geo +0 -0
  116. {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/cylsphere.geo +0 -0
  117. {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/doc/ng4.pdf +0 -0
  118. {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/ellipsoid.geo +0 -0
  119. {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/ellipticcyl.geo +0 -0
  120. {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/extrusion.geo +0 -0
  121. {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/fichera.geo +0 -0
  122. {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/frame.step +0 -0
  123. {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/hinge.stl +0 -0
  124. {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/lshape3d.geo +0 -0
  125. {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/manyholes.geo +0 -0
  126. {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/manyholes2.geo +0 -0
  127. {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/matrix.geo +0 -0
  128. {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/ortho.geo +0 -0
  129. {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/part1.stl +0 -0
  130. {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/period.geo +0 -0
  131. {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/py_tutorials/exportNeutral.py +0 -0
  132. {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/py_tutorials/mesh.py +0 -0
  133. {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/py_tutorials/shaft.geo +0 -0
  134. {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/revolution.geo +0 -0
  135. {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/screw.step +0 -0
  136. {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/sculpture.geo +0 -0
  137. {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/shaft.geo +0 -0
  138. {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/shell.geo +0 -0
  139. {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/sphere.geo +0 -0
  140. {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/sphereincube.geo +0 -0
  141. {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/square.in2d +0 -0
  142. {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/squarecircle.in2d +0 -0
  143. {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/squarehole.in2d +0 -0
  144. {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/torus.geo +0 -0
  145. {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/trafo.geo +0 -0
  146. {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/twobricks.geo +0 -0
  147. {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/twocubes.geo +0 -0
  148. {netgen_mesher-6.2.2504.post11.dev0.data → netgen_mesher-6.2.2506.post48.dev0.data}/data/share/netgen/twocyl.geo +0 -0
  149. {netgen_mesher-6.2.2504.post11.dev0.dist-info → netgen_mesher-6.2.2506.post48.dev0.dist-info}/AUTHORS +0 -0
  150. {netgen_mesher-6.2.2504.post11.dev0.dist-info → netgen_mesher-6.2.2506.post48.dev0.dist-info}/LICENSE +0 -0
  151. {netgen_mesher-6.2.2504.post11.dev0.dist-info → netgen_mesher-6.2.2506.post48.dev0.dist-info}/WHEEL +0 -0
  152. {netgen_mesher-6.2.2504.post11.dev0.dist-info → netgen_mesher-6.2.2506.post48.dev0.dist-info}/entry_points.txt +0 -0
  153. {netgen_mesher-6.2.2504.post11.dev0.dist-info → netgen_mesher-6.2.2506.post48.dev0.dist-info}/top_level.txt +0 -0
@@ -30,6 +30,152 @@ namespace ngcore
30
30
  auto Hi() const { return mask[1]; }
31
31
  };
32
32
 
33
+
34
+ // *************************** int32 ***************************
35
+
36
+
37
+
38
+ template<>
39
+ class SIMD<int32_t,2>
40
+ {
41
+ int32x2_t data;
42
+ public:
43
+ static constexpr int Size() { return 2; }
44
+ SIMD() {}
45
+ SIMD (int32_t val) : data{val,val} {}
46
+ SIMD (int32_t v0, int32_t v1) : data{v0,v1} { }
47
+ SIMD (SIMD<int32_t,1> lo, SIMD<int32_t,1> hi) : data{lo[0], hi[0] } { }
48
+ SIMD (std::array<int32_t, 2> arr) : data{arr[0], arr[1]} { }
49
+
50
+ SIMD (int32x2_t _data) { data = _data; }
51
+
52
+ NETGEN_INLINE auto Data() const { return data; }
53
+ NETGEN_INLINE auto & Data() { return data; }
54
+
55
+ SIMD<int32_t,1> Lo() const { return Get<0>(); }
56
+ SIMD<int32_t,1> Hi() const { return Get<1>(); }
57
+
58
+ int32_t operator[] (int i) const { return data[i]; }
59
+ int32_t & operator[] (int i) { return ((int32_t*)&data)[i]; }
60
+
61
+ template <int I>
62
+ int32_t Get() const { return data[I]; }
63
+ static SIMD FirstInt(int n0=0) { return { n0+0, n0+1 }; }
64
+ };
65
+
66
+
67
+ template<>
68
+ class SIMD<int32_t,4>
69
+ {
70
+ int32x4_t data;
71
+ public:
72
+ static constexpr int Size() { return 4; }
73
+ SIMD() {}
74
+ SIMD (int32_t val) : data{val,val,val,val} {}
75
+ SIMD (int32_t v0, int32_t v1, int32_t v2, int32_t v3) : data{v0,v1,v2,v3} { }
76
+ SIMD (std::array<int32_t, 4> arr) : data{arr[0], arr[1], arr[2], arr[3]} { }
77
+
78
+ SIMD (int32x4_t _data) { data = _data; }
79
+ SIMD (SIMD<int32_t,2> lo, SIMD<int32_t,2> hi) : data{vcombine_s32(lo.Data(), hi.Data())} {}
80
+ SIMD (int32_t * p) : data{vld1q_s32(p)} { }
81
+
82
+ NETGEN_INLINE auto Data() const { return data; }
83
+ NETGEN_INLINE auto & Data() { return data; }
84
+
85
+ SIMD<int32_t,2> Lo() const { return vget_low_s32(data); }
86
+ SIMD<int32_t,2> Hi() const { return vget_high_s32(data); }
87
+
88
+ int32_t operator[] (int i) const { return data[i]; }
89
+ int32_t & operator[] (int i) { return ((int32_t*)&data)[i]; }
90
+
91
+ void Store (int32_t * p) { vst1q_s32(p, data); }
92
+
93
+ template <int I>
94
+ int32_t Get() const { return data[I]; }
95
+ static SIMD FirstInt(int n0=0) { return { n0+0, n0+1, n0+2, n0+3 }; }
96
+ };
97
+
98
+
99
+
100
+ NETGEN_INLINE auto Min (SIMD<int32_t,2> a, SIMD<int32_t,2> b) {
101
+ return SIMD<int32_t,2>(vmin_s32(a.Data(), b.Data()));
102
+ }
103
+
104
+ NETGEN_INLINE auto Max (SIMD<int32_t,2> a, SIMD<int32_t,2> b) {
105
+ return SIMD<int32_t,2>(vmax_s32(a.Data(), b.Data()));
106
+ }
107
+
108
+
109
+ NETGEN_INLINE auto Min (SIMD<int32_t,4> a, SIMD<int32_t,4> b) {
110
+ return SIMD<int32_t,4>(vminq_s32(a.Data(), b.Data()));
111
+ }
112
+
113
+ NETGEN_INLINE auto Max (SIMD<int32_t,4> a, SIMD<int32_t,4> b) {
114
+ return SIMD<int32_t,4>(vmaxq_s32(a.Data(), b.Data()));
115
+ }
116
+
117
+
118
+
119
+
120
+ // *************************** int64 ***************************
121
+
122
+ template<>
123
+ class SIMD<int64_t,2>
124
+ {
125
+ int64x2_t data;
126
+ public:
127
+ static constexpr int Size() { return 2; }
128
+ SIMD() {}
129
+ SIMD (int64_t val) : data{val,val} {}
130
+ SIMD (int64_t v0, int64_t v1) : data{vcombine_s64(int64x1_t{v0}, int64x1_t{v1})} { }
131
+ SIMD (std::array<int64_t, 2> arr) : data{arr[0], arr[1]} { }
132
+
133
+ SIMD (int64x2_t _data) { data = _data; }
134
+
135
+ NETGEN_INLINE auto Data() const { return data; }
136
+ NETGEN_INLINE auto & Data() { return data; }
137
+
138
+ int64_t Lo() const { return Get<0>(); }
139
+ int64_t Hi() const { return Get<1>(); }
140
+
141
+ int64_t operator[] (int i) const { return data[i]; }
142
+ int64_t & operator[] (int i) { return ((int64_t*)&data)[i]; }
143
+
144
+ template <int I>
145
+ int64_t Get() const { return data[I]; }
146
+ static SIMD FirstInt(int n0=0) { return { n0+0, n0+1 }; }
147
+ };
148
+
149
+ NETGEN_INLINE SIMD<int64_t,2> operator& (SIMD<int64_t,2> a, SIMD<int64_t,2> b)
150
+ {
151
+ return vandq_s64(a.Data(), b.Data());
152
+ }
153
+
154
+ NETGEN_INLINE SIMD<int64_t,2> operator+ (SIMD<int64_t,2> a, SIMD<int64_t,2> b)
155
+ {
156
+ return vaddq_s64(a.Data(), b.Data());
157
+ }
158
+
159
+ NETGEN_INLINE SIMD<mask64,2> operator== (SIMD<int64_t> a, SIMD<int64_t> b)
160
+ {
161
+ return vceqq_u64(a.Data(), b.Data());
162
+ }
163
+
164
+ NETGEN_INLINE SIMD<mask64,2> operator> (SIMD<int64_t> a, SIMD<int64_t> b)
165
+ {
166
+ return vcgtq_s64(a.Data(), b.Data());
167
+ }
168
+
169
+
170
+ template <int N>
171
+ SIMD<int64_t,2> operator<< (SIMD<int64_t,2> a, IC<N> n)
172
+ {
173
+ return vshlq_n_s64(a.Data(), N);
174
+ }
175
+
176
+
177
+
178
+ // *************************** double ***************************
33
179
 
34
180
  template<>
35
181
  class SIMD<double,2>
@@ -162,6 +308,16 @@ namespace ngcore
162
308
  auto tmp = vcmlaq_f64(c.Data(), a.Data(), b.Data()); // are * b
163
309
  c = vcmlaq_rot90_f64(tmp, a.Data(), b.Data()); // += i*aim * b
164
310
  }
311
+
312
+ NETGEN_INLINE void FMAComplex (SIMD<double,4> a, SIMD<double,4> b, SIMD<double,4> & c)
313
+ {
314
+ SIMD<double,2> clo = c.Lo();
315
+ SIMD<double,2> chi = c.Hi();
316
+ FMAComplex (a.Lo(), b.Lo(), clo);
317
+ FMAComplex (a.Hi(), b.Hi(), chi);
318
+ c = SIMD<double,4> (clo, chi);
319
+ }
320
+
165
321
 
166
322
 
167
323
  NETGEN_INLINE SIMD<double,2> operator+ (SIMD<double,2> a, SIMD<double,2> b)
@@ -178,6 +334,52 @@ namespace ngcore
178
334
  NETGEN_INLINE SIMD<double,2> operator/ (SIMD<double,2> a, SIMD<double,2> b)
179
335
  { return a.Data()/b.Data(); }
180
336
 
337
+ NETGEN_INLINE SIMD<double,2> sqrt (SIMD<double,2> x)
338
+ { return vsqrtq_f64(x.Data()); }
339
+
340
+
341
+ NETGEN_INLINE SIMD<double,2> round (SIMD<double,2> x)
342
+ {
343
+ return vrndnq_f64(x.Data());
344
+ }
345
+
346
+ NETGEN_INLINE SIMD<int64_t,2> lround (SIMD<double,2> x)
347
+ {
348
+ return vcvtq_s64_f64(x.Data());
349
+ }
350
+
351
+
352
+
353
+ NETGEN_INLINE SIMD<double,2> rsqrt (SIMD<double,2> x)
354
+ {
355
+ return 1.0 / sqrt(x);
356
+
357
+ // SIMD<double,2> y = vrsqrteq_f64(x.Data());
358
+
359
+ /*
360
+ y = y * vrsqrtsq_f64( (x*y).Data(), y.Data());
361
+ y = y * vrsqrtsq_f64( (x*y).Data(), y.Data());
362
+ y = y * vrsqrtsq_f64( (x*y).Data(), y.Data());
363
+ */
364
+
365
+ /*
366
+ auto x_half = 0.5*x;
367
+ y = y * (1.5 - (x_half * y * y));
368
+ y = y * (1.5 - (x_half * y * y));
369
+ y = y * (1.5 - (x_half * y * y));
370
+
371
+ return y;
372
+ */
373
+ }
374
+
375
+
376
+
377
+
378
+ template <>
379
+ NETGEN_INLINE SIMD<double,2> Reinterpret (SIMD<int64_t,2> a)
380
+ {
381
+ return vreinterpretq_f64_s64(a.Data());
382
+ }
181
383
 
182
384
 
183
385
  NETGEN_INLINE SIMD<double,2> If (SIMD<mask64,2> a, SIMD<double,2> b, SIMD<double,2> c)
@@ -188,7 +390,9 @@ namespace ngcore
188
390
  }
189
391
  NETGEN_INLINE SIMD<int64_t,2> If (SIMD<mask64,2> a, SIMD<int64_t,2> b, SIMD<int64_t,2> c)
190
392
  {
191
- return SIMD<int64_t,2> (a[0] ? b[0] : c[0], a[1] ? b[1] : c[1]);
393
+ // return SIMD<int64_t,2> (a[0] ? b[0] : c[0], a[1] ? b[1] : c[1]);
394
+ uint64x2_t mask = vreinterpretq_u64_s64(a.Data());
395
+ return vbslq_s64(mask, b.Data(), c.Data());
192
396
  }
193
397
 
194
398
  NETGEN_INLINE SIMD<mask64,2> operator&& (SIMD<mask64,2> a, SIMD<mask64,2> b)
@@ -21,11 +21,31 @@ namespace ngcore
21
21
  #endif // defined(__GNUC__) && (__GNUC__ == 7)
22
22
 
23
23
  #if defined(__AVX2__)
24
+ NETGEN_INLINE __m256i my_mm256_cmpeq_epi64 (__m256i a, __m256i b)
25
+ {
26
+ return _mm256_cmpeq_epi64 (a,b);
27
+ }
28
+
24
29
  NETGEN_INLINE __m256i my_mm256_cmpgt_epi64 (__m256i a, __m256i b)
25
30
  {
26
31
  return _mm256_cmpgt_epi64 (a,b);
27
32
  }
33
+
34
+ NETGEN_INLINE __m256i my_mm256_cvtepi32_epi64 (__m128i a)
35
+ {
36
+ return _mm256_cvtepi32_epi64 (a);
37
+ }
38
+
28
39
  #else
40
+ NETGEN_INLINE __m256i my_mm256_cmpeq_epi64 (__m256i a, __m256i b)
41
+ {
42
+ __m128i rlo = _mm_cmpeq_epi64(_mm256_extractf128_si256(a, 0),
43
+ _mm256_extractf128_si256(b, 0));
44
+ __m128i rhi = _mm_cmpeq_epi64(_mm256_extractf128_si256(a, 1),
45
+ _mm256_extractf128_si256(b, 1));
46
+ return _mm256_insertf128_si256 (_mm256_castsi128_si256(rlo), rhi, 1);
47
+ }
48
+
29
49
  NETGEN_INLINE __m256i my_mm256_cmpgt_epi64 (__m256i a, __m256i b)
30
50
  {
31
51
  __m128i rlo = _mm_cmpgt_epi64(_mm256_extractf128_si256(a, 0),
@@ -34,6 +54,13 @@ namespace ngcore
34
54
  _mm256_extractf128_si256(b, 1));
35
55
  return _mm256_insertf128_si256 (_mm256_castsi128_si256(rlo), rhi, 1);
36
56
  }
57
+
58
+ NETGEN_INLINE __m256i my_mm256_cvtepi32_epi64 (__m128i a)
59
+ {
60
+ __m128i rlo = _mm_cvtepi32_epi64(a); // First two 32-bit integers
61
+ __m128i rhi = _mm_cvtepi32_epi64(_mm_shuffle_epi32(a, _MM_SHUFFLE(3, 2, 3, 2))); // Next two 32-bit integers
62
+ return _mm256_insertf128_si256 (_mm256_castsi128_si256(rlo), rhi, 1);
63
+ }
37
64
  #endif
38
65
 
39
66
 
@@ -86,7 +113,7 @@ namespace ngcore
86
113
  : data{_mm256_set_epi64x(a[3],a[2],a[1],a[0])}
87
114
  {}
88
115
  SIMD (SIMD<int64_t,2> v0, SIMD<int64_t,2> v1)
89
- : data(_mm256_set_m128i(v0.Data(),v1.Data()))
116
+ : data(_mm256_set_m128i(v1.Data(),v0.Data()))
90
117
  {}
91
118
  SIMD (__m256i _data) { data = _data; }
92
119
 
@@ -97,6 +124,13 @@ namespace ngcore
97
124
  SIMD<int64_t,2> Lo() const { return _mm256_extractf128_si256(data, 0); }
98
125
  SIMD<int64_t,2> Hi() const { return _mm256_extractf128_si256(data, 1); }
99
126
  static SIMD FirstInt(int n0=0) { return { n0+0, n0+1, n0+2, n0+3 }; }
127
+
128
+ template <int I>
129
+ double Get() const
130
+ {
131
+ static_assert(I>=0 && I<4, "Index out of range");
132
+ return (*this)[I];
133
+ }
100
134
  };
101
135
 
102
136
 
@@ -105,6 +139,11 @@ namespace ngcore
105
139
  #ifdef __AVX2__
106
140
  NETGEN_INLINE SIMD<int64_t,4> operator+ (SIMD<int64_t,4> a, SIMD<int64_t,4> b) { return _mm256_add_epi64(a.Data(),b.Data()); }
107
141
  NETGEN_INLINE SIMD<int64_t,4> operator- (SIMD<int64_t,4> a, SIMD<int64_t,4> b) { return _mm256_sub_epi64(a.Data(),b.Data()); }
142
+ NETGEN_INLINE SIMD<int64_t,4> operator& (SIMD<int64_t,4> a, SIMD<int64_t,4> b)
143
+ { return _mm256_castpd_si256(_mm256_and_pd (_mm256_castsi256_pd(a.Data()),_mm256_castsi256_pd( b.Data()))); }
144
+
145
+ template <int N>
146
+ SIMD<int64_t,4> operator<< (SIMD<int64_t,4> a, IC<N> n) { return _mm256_sll_epi64(a.Data(),_mm_set_epi32(0,0,0,N)); }
108
147
  #endif // __AVX2__
109
148
 
110
149
  template<>
@@ -178,7 +217,11 @@ namespace ngcore
178
217
  NETGEN_INLINE SIMD<double,4> floor (SIMD<double,4> a) { return _mm256_floor_pd(a.Data()); }
179
218
  NETGEN_INLINE SIMD<double,4> ceil (SIMD<double,4> a) { return _mm256_ceil_pd(a.Data()); }
180
219
  NETGEN_INLINE SIMD<double,4> fabs (SIMD<double,4> a) { return _mm256_max_pd(a.Data(), (-a).Data()); }
181
-
220
+ NETGEN_INLINE SIMD<double,4> round(SIMD<double,4> a) { return _mm256_round_pd(a.Data(), _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); }
221
+ NETGEN_INLINE SIMD<int64_t,4> lround (SIMD<double,4> a)
222
+ {
223
+ return my_mm256_cvtepi32_epi64(_mm256_cvtpd_epi32(_mm256_round_pd(a.Data(), _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC)));
224
+ }
182
225
 
183
226
  #ifdef __FMA__
184
227
  NETGEN_INLINE SIMD<double,4> FMA (SIMD<double,4> a, SIMD<double,4> b, SIMD<double,4> c)
@@ -255,9 +298,9 @@ namespace ngcore
255
298
  NETGEN_INLINE SIMD<mask64,4> operator> (SIMD<int64_t,4> a , SIMD<int64_t,4> b)
256
299
  { return my_mm256_cmpgt_epi64(a.Data(),b.Data()); }
257
300
  NETGEN_INLINE SIMD<mask64,4> operator== (SIMD<int64_t,4> a , SIMD<int64_t,4> b)
258
- { return _mm256_cmpeq_epi64(a.Data(),b.Data()); }
301
+ { return my_mm256_cmpeq_epi64(a.Data(),b.Data()); }
259
302
  NETGEN_INLINE SIMD<mask64,4> operator!= (SIMD<int64_t,4> a , SIMD<int64_t,4> b)
260
- { return _mm256_xor_si256(_mm256_cmpeq_epi64(a.Data(),b.Data()),_mm256_set1_epi32(-1)); }
303
+ { return _mm256_xor_si256(my_mm256_cmpeq_epi64(a.Data(),b.Data()),_mm256_set1_epi32(-1)); }
261
304
 
262
305
  #ifdef __AVX2__
263
306
  NETGEN_INLINE SIMD<mask64,4> operator&& (SIMD<mask64,4> a, SIMD<mask64,4> b)
@@ -274,6 +317,15 @@ namespace ngcore
274
317
  NETGEN_INLINE SIMD<mask64,4> operator! (SIMD<mask64,4> a)
275
318
  { return _mm256_castpd_si256(_mm256_xor_pd (_mm256_castsi256_pd(a.Data()),_mm256_castsi256_pd( _mm256_cmpeq_epi64(a.Data(),a.Data())))); }
276
319
  #endif
320
+
321
+ template <>
322
+ NETGEN_INLINE SIMD<double,4> Reinterpret (SIMD<int64_t,4> a)
323
+ {
324
+ return _mm256_castsi256_pd (a.Data());
325
+ }
326
+
327
+
328
+
277
329
  NETGEN_INLINE SIMD<double,4> If (SIMD<mask64,4> a, SIMD<double,4> b, SIMD<double,4> c)
278
330
  { return _mm256_blendv_pd(c.Data(), b.Data(), _mm256_castsi256_pd(a.Data())); }
279
331
 
@@ -314,6 +366,22 @@ namespace ngcore
314
366
  }
315
367
 
316
368
 
369
+ /*
370
+ // untested ...
371
+ NETGEN_INLINE SIMD<double,4> rsqrt (SIMD<double,4> x)
372
+ {
373
+ // return 1.0 / sqrt(x);
374
+ // SIMD<double,4> y = _mm256_rsqrt14_pd(x.Data()); // only avx512
375
+ SIMD<double,4> y = _mm256_cvtps_pd ( _mm_rsqrt_ps ( _mm256_cvtpd_ps (x.Data())));
376
+ auto x_half = 0.5*x;
377
+ y = y * (1.5 - (x_half * y * y));
378
+ y = y * (1.5 - (x_half * y * y));
379
+ return y;
380
+ }
381
+ */
382
+
383
+
384
+
317
385
  NETGEN_INLINE SIMD<int64_t,4> If (SIMD<mask64,4> a, SIMD<int64_t,4> b, SIMD<int64_t,4> c)
318
386
  { return _mm256_castpd_si256(_mm256_blendv_pd(_mm256_castsi256_pd(c.Data()), _mm256_castsi256_pd(b.Data()),
319
387
  _mm256_castsi256_pd(a.Data()))); }
@@ -59,6 +59,15 @@ namespace ngcore
59
59
  data = _mm512_set_epi64(func(7), func(6), func(5), func(4), func(3), func(2), func(1), func(0));
60
60
  }
61
61
 
62
+ SIMD (SIMD<int64_t,4> v0, SIMD<int64_t,4> v1)
63
+ : data(_mm512_castsi256_si512(v0.Data()))
64
+ {
65
+ data = _mm512_inserti64x4(data, v1.Data(), 1);
66
+ }
67
+
68
+ SIMD<int64_t,4> Lo() const { return _mm512_castsi512_si256(data); }
69
+ SIMD<int64_t,4> Hi() const { return _mm512_extracti64x4_epi64(data, 1); }
70
+
62
71
 
63
72
  NETGEN_INLINE auto operator[] (int i) const { return ((int64_t*)(&data))[i]; }
64
73
  NETGEN_INLINE auto & operator[] (int i) { return ((int64_t*)(&data))[i]; }