ngsolve 6.2.2505__cp311-cp311-macosx_10_15_universal2.whl → 6.2.2505.post94.dev0__cp311-cp311-macosx_10_15_universal2.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ngsolve might be problematic. Click here for more details.
- netgen/include/bilinearform.hpp +1 -1
- netgen/include/diffop_impl.hpp +3 -1
- netgen/include/fespace.hpp +4 -2
- netgen/include/gridfunction.hpp +1 -1
- netgen/include/h1amg.hpp +24 -1
- netgen/include/hcurlcurlfe.hpp +20 -0
- netgen/include/hdivhofespace.hpp +2 -0
- netgen/include/mptools.hpp +832 -97
- netgen/include/ngblas.hpp +113 -4
- netgen/include/recursive_pol.hpp +63 -11
- netgen/include/simd_complex.hpp +20 -0
- netgen/include/sparsematrix_dyn.hpp +2 -2
- netgen/include/sparsematrix_impl.hpp +25 -0
- netgen/include/vector.hpp +15 -2
- netgen/libngbla.dylib +0 -0
- netgen/libngcomp.dylib +0 -0
- netgen/libngfem.dylib +0 -0
- netgen/libngla.dylib +0 -0
- netgen/libngsbem.dylib +0 -0
- netgen/libngstd.dylib +0 -0
- ngsolve/cmake/NGSolveConfig.cmake +1 -1
- ngsolve/config/config.py +5 -5
- {ngsolve-6.2.2505.dist-info → ngsolve-6.2.2505.post94.dev0.dist-info}/METADATA +2 -2
- {ngsolve-6.2.2505.dist-info → ngsolve-6.2.2505.post94.dev0.dist-info}/RECORD +60 -60
- {ngsolve-6.2.2505.data → ngsolve-6.2.2505.post94.dev0.data}/data/Netgen.icns +0 -0
- {ngsolve-6.2.2505.data → ngsolve-6.2.2505.post94.dev0.data}/data/bin/ngscxx +0 -0
- {ngsolve-6.2.2505.data → ngsolve-6.2.2505.post94.dev0.data}/data/bin/ngsld +0 -0
- {ngsolve-6.2.2505.data → ngsolve-6.2.2505.post94.dev0.data}/data/bin/ngsolve.tcl +0 -0
- {ngsolve-6.2.2505.data → ngsolve-6.2.2505.post94.dev0.data}/data/bin/ngspy +0 -0
- {ngsolve-6.2.2505.data → ngsolve-6.2.2505.post94.dev0.data}/data/share/ngsolve/beam.geo +0 -0
- {ngsolve-6.2.2505.data → ngsolve-6.2.2505.post94.dev0.data}/data/share/ngsolve/beam.vol +0 -0
- {ngsolve-6.2.2505.data → ngsolve-6.2.2505.post94.dev0.data}/data/share/ngsolve/chip.in2d +0 -0
- {ngsolve-6.2.2505.data → ngsolve-6.2.2505.post94.dev0.data}/data/share/ngsolve/chip.vol +0 -0
- {ngsolve-6.2.2505.data → ngsolve-6.2.2505.post94.dev0.data}/data/share/ngsolve/coil.geo +0 -0
- {ngsolve-6.2.2505.data → ngsolve-6.2.2505.post94.dev0.data}/data/share/ngsolve/coil.vol +0 -0
- {ngsolve-6.2.2505.data → ngsolve-6.2.2505.post94.dev0.data}/data/share/ngsolve/coilshield.geo +0 -0
- {ngsolve-6.2.2505.data → ngsolve-6.2.2505.post94.dev0.data}/data/share/ngsolve/coilshield.vol +0 -0
- {ngsolve-6.2.2505.data → ngsolve-6.2.2505.post94.dev0.data}/data/share/ngsolve/cube.geo +0 -0
- {ngsolve-6.2.2505.data → ngsolve-6.2.2505.post94.dev0.data}/data/share/ngsolve/cube.vol +0 -0
- {ngsolve-6.2.2505.data → ngsolve-6.2.2505.post94.dev0.data}/data/share/ngsolve/d10_DGdoubleglazing.pde +0 -0
- {ngsolve-6.2.2505.data → ngsolve-6.2.2505.post94.dev0.data}/data/share/ngsolve/d11_chip_nitsche.pde +0 -0
- {ngsolve-6.2.2505.data → ngsolve-6.2.2505.post94.dev0.data}/data/share/ngsolve/d1_square.pde +0 -0
- {ngsolve-6.2.2505.data → ngsolve-6.2.2505.post94.dev0.data}/data/share/ngsolve/d2_chip.pde +0 -0
- {ngsolve-6.2.2505.data → ngsolve-6.2.2505.post94.dev0.data}/data/share/ngsolve/d3_helmholtz.pde +0 -0
- {ngsolve-6.2.2505.data → ngsolve-6.2.2505.post94.dev0.data}/data/share/ngsolve/d4_cube.pde +0 -0
- {ngsolve-6.2.2505.data → ngsolve-6.2.2505.post94.dev0.data}/data/share/ngsolve/d5_beam.pde +0 -0
- {ngsolve-6.2.2505.data → ngsolve-6.2.2505.post94.dev0.data}/data/share/ngsolve/d6_shaft.pde +0 -0
- {ngsolve-6.2.2505.data → ngsolve-6.2.2505.post94.dev0.data}/data/share/ngsolve/d7_coil.pde +0 -0
- {ngsolve-6.2.2505.data → ngsolve-6.2.2505.post94.dev0.data}/data/share/ngsolve/d8_coilshield.pde +0 -0
- {ngsolve-6.2.2505.data → ngsolve-6.2.2505.post94.dev0.data}/data/share/ngsolve/d9_hybridDG.pde +0 -0
- {ngsolve-6.2.2505.data → ngsolve-6.2.2505.post94.dev0.data}/data/share/ngsolve/doubleglazing.in2d +0 -0
- {ngsolve-6.2.2505.data → ngsolve-6.2.2505.post94.dev0.data}/data/share/ngsolve/doubleglazing.vol +0 -0
- {ngsolve-6.2.2505.data → ngsolve-6.2.2505.post94.dev0.data}/data/share/ngsolve/piezo2d40round4.vol.gz +0 -0
- {ngsolve-6.2.2505.data → ngsolve-6.2.2505.post94.dev0.data}/data/share/ngsolve/shaft.geo +0 -0
- {ngsolve-6.2.2505.data → ngsolve-6.2.2505.post94.dev0.data}/data/share/ngsolve/shaft.vol +0 -0
- {ngsolve-6.2.2505.data → ngsolve-6.2.2505.post94.dev0.data}/data/share/ngsolve/square.in2d +0 -0
- {ngsolve-6.2.2505.data → ngsolve-6.2.2505.post94.dev0.data}/data/share/ngsolve/square.vol +0 -0
- {ngsolve-6.2.2505.dist-info → ngsolve-6.2.2505.post94.dev0.dist-info}/LICENSE +0 -0
- {ngsolve-6.2.2505.dist-info → ngsolve-6.2.2505.post94.dev0.dist-info}/WHEEL +0 -0
- {ngsolve-6.2.2505.dist-info → ngsolve-6.2.2505.post94.dev0.dist-info}/top_level.txt +0 -0
netgen/include/mptools.hpp
CHANGED
|
@@ -20,6 +20,157 @@ namespace ngsbem
|
|
|
20
20
|
{
|
|
21
21
|
using namespace ngfem;
|
|
22
22
|
|
|
23
|
+
template<typename T>
|
|
24
|
+
constexpr int VecLength = 1; // Default: Complex has length 1
|
|
25
|
+
|
|
26
|
+
template<int N>
|
|
27
|
+
constexpr int VecLength<Vec<N, Complex>> = N; // Specialization: Vec<N,Complex> has length N
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
constexpr int FMM_SW = 4;
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
// ************************ SIMD - creation (should end up in simd.hpp) *************
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
template <int S, typename T, int SW>
|
|
38
|
+
Vec<S,T> HSum (Vec<S,SIMD<T,SW>> v)
|
|
39
|
+
{
|
|
40
|
+
Vec<S,T> res;
|
|
41
|
+
for (int i = 0; i < S; i++)
|
|
42
|
+
res(i) = HSum(v(i));
|
|
43
|
+
// Iterate<S> ([&](auto i) {
|
|
44
|
+
// res.HTData().template Elem<i.value>() = HSum(v.HTData().template Elem<i.value>());
|
|
45
|
+
// });
|
|
46
|
+
return res;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
template <typename T, size_t S> class MakeSimdCl;
|
|
51
|
+
|
|
52
|
+
template <typename T, size_t S>
|
|
53
|
+
auto MakeSimd (array<T,S> aa) { return MakeSimdCl(aa).Get(); }
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
template <typename T, size_t S>
|
|
57
|
+
class MakeSimdCl
|
|
58
|
+
{
|
|
59
|
+
array<T,S> a;
|
|
60
|
+
public:
|
|
61
|
+
MakeSimdCl (array<T,S> aa) : a(aa) { ; }
|
|
62
|
+
auto Get() const
|
|
63
|
+
{
|
|
64
|
+
SIMD<T,S> sa( [this] (auto i) { return (this->a)[i]; });
|
|
65
|
+
return sa;
|
|
66
|
+
}
|
|
67
|
+
};
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
template <typename T, size_t S, int VS>
|
|
71
|
+
class MakeSimdCl<Vec<VS,T>,S>
|
|
72
|
+
{
|
|
73
|
+
array<Vec<VS,T>,S> a;
|
|
74
|
+
public:
|
|
75
|
+
MakeSimdCl (array<Vec<VS,T>,S> aa) : a(aa) { ; }
|
|
76
|
+
|
|
77
|
+
auto Get() const
|
|
78
|
+
{
|
|
79
|
+
array<T,S> ai;
|
|
80
|
+
Vec<VS, decltype(MakeSimd(ai))> res;
|
|
81
|
+
for (int i = 0; i < VS; i++)
|
|
82
|
+
{
|
|
83
|
+
for (int j = 0; j < S; j++)
|
|
84
|
+
ai[j] = a[j](i);
|
|
85
|
+
res(i) = MakeSimd(ai);
|
|
86
|
+
}
|
|
87
|
+
return res;
|
|
88
|
+
}
|
|
89
|
+
};
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
template <size_t S>
|
|
94
|
+
class MakeSimdCl<Complex,S>
|
|
95
|
+
{
|
|
96
|
+
array<Complex,S> a;
|
|
97
|
+
public:
|
|
98
|
+
MakeSimdCl (array<Complex,S> aa) : a(aa) { ; }
|
|
99
|
+
auto Get() const
|
|
100
|
+
{
|
|
101
|
+
array<double,S> ar, ai;
|
|
102
|
+
for (int j = 0; j < S; j++)
|
|
103
|
+
{
|
|
104
|
+
ar[j] = Real(a[j]);
|
|
105
|
+
ai[j] = Imag(a[j]);
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
return SIMD<Complex,S> (MakeSimd(ar), MakeSimd(ai));
|
|
109
|
+
}
|
|
110
|
+
};
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
template <typename Tfirst, size_t S, typename ...Trest>
|
|
118
|
+
class MakeSimdCl<std::tuple<Tfirst,Trest...>,S>
|
|
119
|
+
{
|
|
120
|
+
array<std::tuple<Tfirst,Trest...>,S> a;
|
|
121
|
+
public:
|
|
122
|
+
MakeSimdCl (array<std::tuple<Tfirst,Trest...>,S> aa) : a(aa) { ; }
|
|
123
|
+
auto Get() const
|
|
124
|
+
{
|
|
125
|
+
array<Tfirst,S> a0;
|
|
126
|
+
for (int i = 0; i < S; i++)
|
|
127
|
+
a0[i] = std::get<0> (a[i]);
|
|
128
|
+
|
|
129
|
+
if constexpr (std::tuple_size<tuple<Tfirst,Trest...>>::value == 1)
|
|
130
|
+
{
|
|
131
|
+
return tuple(MakeSimd(a0));
|
|
132
|
+
}
|
|
133
|
+
else
|
|
134
|
+
{
|
|
135
|
+
array<tuple<Trest...>,S> arest;
|
|
136
|
+
for (int i = 0; i < S; i++)
|
|
137
|
+
arest[i] = skip_first(a[i]);
|
|
138
|
+
|
|
139
|
+
return tuple_cat ( tuple (MakeSimd(a0)), MakeSimd(arest) );
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
template <typename... Ts>
|
|
144
|
+
static auto skip_first(const std::tuple<Ts...>& t) {
|
|
145
|
+
return std::apply([](auto first, auto... rest) {
|
|
146
|
+
return std::make_tuple(rest...);
|
|
147
|
+
}, t);
|
|
148
|
+
}
|
|
149
|
+
};
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
inline std::tuple<double, double, double> SphericalCoordinates(Vec<3> dist){
|
|
161
|
+
double len, theta, phi;
|
|
162
|
+
len = L2Norm(dist);
|
|
163
|
+
if (len < 1e-30)
|
|
164
|
+
theta = 0;
|
|
165
|
+
else
|
|
166
|
+
theta = acos (dist(2) / len);
|
|
167
|
+
if (sqr(dist(0))+sqr(dist(1)) < 1e-30)
|
|
168
|
+
phi = 0;
|
|
169
|
+
else
|
|
170
|
+
phi = atan2(dist(1), dist(0));
|
|
171
|
+
return {len, theta, phi};
|
|
172
|
+
}
|
|
173
|
+
|
|
23
174
|
|
|
24
175
|
template <typename entry_type = Complex>
|
|
25
176
|
class NGS_DLL_HEADER SphericalHarmonics
|
|
@@ -84,9 +235,69 @@ namespace ngsbem
|
|
|
84
235
|
|
|
85
236
|
void Calc (Vec<3> x, FlatVector<Complex> shapes);
|
|
86
237
|
|
|
87
|
-
|
|
238
|
+
|
|
239
|
+
void FlipZ ();
|
|
88
240
|
void RotateZ (double alpha);
|
|
89
|
-
|
|
241
|
+
|
|
242
|
+
template <typename FUNC>
|
|
243
|
+
void RotateZ (double alpha, FUNC func) const
|
|
244
|
+
{
|
|
245
|
+
if (order < 0) return;
|
|
246
|
+
|
|
247
|
+
Vector<Complex> exp_imalpha(order+1);
|
|
248
|
+
Complex exp_ialpha(cos(alpha), sin(alpha));
|
|
249
|
+
Complex prod = 1.0;
|
|
250
|
+
for (int i = 0; i <= order; i++)
|
|
251
|
+
{
|
|
252
|
+
exp_imalpha(i) = prod;
|
|
253
|
+
prod *= exp_ialpha;
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
int ii = 0;
|
|
257
|
+
for (int n = 0; n <= order; n++)
|
|
258
|
+
{
|
|
259
|
+
for (int m = -n; m < 0; m++, ii++)
|
|
260
|
+
func(ii, conj(exp_imalpha(-m)));
|
|
261
|
+
for (int m = 0; m <= n; m++, ii++)
|
|
262
|
+
func(ii, exp_imalpha(m));
|
|
263
|
+
};
|
|
264
|
+
};
|
|
265
|
+
|
|
266
|
+
template <typename FUNC>
|
|
267
|
+
void RotateZFlip (double alpha, bool flip, FUNC func) const
|
|
268
|
+
{
|
|
269
|
+
if (order < 0) return;
|
|
270
|
+
|
|
271
|
+
Vector<Complex> exp_imalpha(order+1);
|
|
272
|
+
Complex exp_ialpha(cos(alpha), sin(alpha));
|
|
273
|
+
Complex prod = 1.0;
|
|
274
|
+
for (int i = 0; i <= order; i++)
|
|
275
|
+
{
|
|
276
|
+
exp_imalpha(i) = prod;
|
|
277
|
+
prod *= exp_ialpha;
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
int ii = 0;
|
|
281
|
+
|
|
282
|
+
auto FlipFactor = [] (int n, int m, bool flip)->double
|
|
283
|
+
{
|
|
284
|
+
if (flip)
|
|
285
|
+
return ((n-m)%2) == 1 ? -1 : 1;
|
|
286
|
+
return 1.0;
|
|
287
|
+
};
|
|
288
|
+
|
|
289
|
+
for (int n = 0; n <= order; n++)
|
|
290
|
+
{
|
|
291
|
+
for (int m = -n; m < 0; m++, ii++)
|
|
292
|
+
func(ii, FlipFactor(n,m,flip)*conj(exp_imalpha(-m)));
|
|
293
|
+
for (int m = 0; m <= n; m++, ii++)
|
|
294
|
+
func(ii, FlipFactor(n,m,flip)*exp_imalpha(m));
|
|
295
|
+
};
|
|
296
|
+
};
|
|
297
|
+
|
|
298
|
+
|
|
299
|
+
|
|
300
|
+
void RotateY (double alpha, bool parallel = false);
|
|
90
301
|
|
|
91
302
|
|
|
92
303
|
static double CalcAmn (int m, int n)
|
|
@@ -119,11 +330,11 @@ namespace ngsbem
|
|
|
119
330
|
// https://fortran-lang.discourse.group/t/looking-for-spherical-bessel-and-hankel-functions-of-first-and-second-kind-and-arbitrary-order/2308/2
|
|
120
331
|
NGS_DLL_HEADER
|
|
121
332
|
void besseljs3d (int nterms, double z, double scale,
|
|
122
|
-
|
|
333
|
+
SliceVector<double> fjs, SliceVector<double> fjder = FlatVector<double>(0, nullptr));
|
|
123
334
|
|
|
124
335
|
NGS_DLL_HEADER
|
|
125
336
|
void besseljs3d (int nterms, Complex z, double scale,
|
|
126
|
-
|
|
337
|
+
SliceVector<Complex> fjs, SliceVector<Complex> fjder = FlatVector<Complex>(0, nullptr));
|
|
127
338
|
|
|
128
339
|
|
|
129
340
|
/*
|
|
@@ -142,14 +353,17 @@ namespace ngsbem
|
|
|
142
353
|
FlatVector<double> jp,
|
|
143
354
|
FlatVector<double> yp);
|
|
144
355
|
|
|
145
|
-
|
|
356
|
+
|
|
146
357
|
|
|
147
358
|
template <typename T>
|
|
148
359
|
void SphericalBessel (int n, double rho, double scale, T && values)
|
|
149
360
|
{
|
|
361
|
+
besseljs3d (n, rho, scale, values);
|
|
362
|
+
/*
|
|
150
363
|
Vector<double> j(n+1), jp(n+1);
|
|
151
364
|
besseljs3d (n, rho, scale, j, jp);
|
|
152
365
|
values = j;
|
|
366
|
+
*/
|
|
153
367
|
}
|
|
154
368
|
|
|
155
369
|
|
|
@@ -173,21 +387,6 @@ namespace ngsbem
|
|
|
173
387
|
return;
|
|
174
388
|
}
|
|
175
389
|
Vector j(n+1), y(n+1), jp(n+1), yp(n+1);
|
|
176
|
-
// SBESJY (rho, n, j, y, jp, yp);
|
|
177
|
-
|
|
178
|
-
/*
|
|
179
|
-
values = j + Complex(0,1) * y;
|
|
180
|
-
if (scale != 1.0)
|
|
181
|
-
{
|
|
182
|
-
double prod = 1.0;
|
|
183
|
-
for (int i = 0; i <= n; i++)
|
|
184
|
-
{
|
|
185
|
-
values(i) *= prod;
|
|
186
|
-
prod *= scale;
|
|
187
|
-
}
|
|
188
|
-
}
|
|
189
|
-
*/
|
|
190
|
-
|
|
191
390
|
|
|
192
391
|
// the bessel-evaluation with scale
|
|
193
392
|
besseljs3d (n, rho, 1/scale, j, jp);
|
|
@@ -358,18 +557,7 @@ namespace ngsbem
|
|
|
358
557
|
// static Timer t("mptool Transform "+ToString(typeid(RADIAL).name())+ToString(typeid(TARGET).name()));
|
|
359
558
|
// RegionTimer reg(t);
|
|
360
559
|
|
|
361
|
-
|
|
362
|
-
double theta, phi;
|
|
363
|
-
|
|
364
|
-
if (len < 1e-30)
|
|
365
|
-
theta = 0;
|
|
366
|
-
else
|
|
367
|
-
theta = acos (dist(2) / len);
|
|
368
|
-
|
|
369
|
-
if (sqr(dist(0))+sqr(dist(1)) < 1e-30)
|
|
370
|
-
phi = 0;
|
|
371
|
-
else
|
|
372
|
-
phi = atan2(dist(1), dist(0));
|
|
560
|
+
auto [len, theta, phi] = SphericalCoordinates(dist);
|
|
373
561
|
|
|
374
562
|
|
|
375
563
|
// MultiPole<RADIAL,entry_type> tmp{*this};
|
|
@@ -386,14 +574,18 @@ namespace ngsbem
|
|
|
386
574
|
}
|
|
387
575
|
|
|
388
576
|
template <typename TARGET>
|
|
389
|
-
void TransformAdd (MultiPole<TARGET,entry_type> & target, Vec<3> dist) const
|
|
577
|
+
void TransformAdd (MultiPole<TARGET,entry_type> & target, Vec<3> dist, bool atomic = false) const
|
|
390
578
|
{
|
|
391
579
|
if (SH().Order() < 0) return;
|
|
392
580
|
if (target.SH().Order() < 0) return;
|
|
393
581
|
|
|
394
582
|
MultiPole<TARGET,entry_type> tmp{target};
|
|
395
583
|
Transform(tmp, dist);
|
|
396
|
-
|
|
584
|
+
if (!atomic)
|
|
585
|
+
target.SH().Coefs() += tmp.SH().Coefs();
|
|
586
|
+
else
|
|
587
|
+
for (int j = 0; j < target.SH().Coefs().Size(); j++)
|
|
588
|
+
AtomicAdd(target.SH().Coefs()[j], tmp.SH().Coefs()[j]);
|
|
397
589
|
}
|
|
398
590
|
|
|
399
591
|
template <typename TARGET>
|
|
@@ -412,11 +604,124 @@ namespace ngsbem
|
|
|
412
604
|
static constexpr int maxdirect = 100;
|
|
413
605
|
|
|
414
606
|
|
|
607
|
+
template <typename SCAL, auto S>
|
|
608
|
+
inline auto VecVector2Matrix (FlatVector<Vec<S,SCAL>> vec)
|
|
609
|
+
{
|
|
610
|
+
return FlatMatrixFixWidth<S,SCAL> (vec.Size(), vec.Data()->Data());
|
|
611
|
+
}
|
|
612
|
+
|
|
613
|
+
inline auto VecVector2Matrix (FlatVector<Complex> vec)
|
|
614
|
+
{
|
|
615
|
+
return FlatMatrixFixWidth<1,Complex> (vec.Size(), vec.Data());
|
|
616
|
+
}
|
|
617
|
+
|
|
618
|
+
|
|
415
619
|
template <typename entry_type=Complex>
|
|
416
620
|
class SingularMLMultiPole
|
|
417
621
|
{
|
|
622
|
+
using simd_entry_type = decltype(MakeSimd(declval<std::array<entry_type,FMM_SW>>()));
|
|
418
623
|
static Array<size_t> nodes_on_level;
|
|
419
624
|
|
|
625
|
+
struct RecordingSS
|
|
626
|
+
{
|
|
627
|
+
const MultiPole<MPSingular,entry_type> * mp_source;
|
|
628
|
+
MultiPole<MPSingular,entry_type> * mp_target;
|
|
629
|
+
Vec<3> dist;
|
|
630
|
+
double len, theta, phi;
|
|
631
|
+
bool flipz;
|
|
632
|
+
public:
|
|
633
|
+
RecordingSS() = default;
|
|
634
|
+
RecordingSS (const MultiPole<MPSingular,entry_type> * amp_source,
|
|
635
|
+
MultiPole<MPSingular,entry_type> * amp_target,
|
|
636
|
+
Vec<3> adist)
|
|
637
|
+
: mp_source(amp_source), mp_target(amp_target), dist(adist)
|
|
638
|
+
{
|
|
639
|
+
std::tie(len, theta, phi) = SphericalCoordinates(adist);
|
|
640
|
+
// flipz = false;
|
|
641
|
+
flipz = theta > M_PI/2;
|
|
642
|
+
if (flipz) theta = M_PI-theta;
|
|
643
|
+
}
|
|
644
|
+
};
|
|
645
|
+
|
|
646
|
+
|
|
647
|
+
static void ProcessBatch(FlatArray<RecordingSS*> batch, double len, double theta) {
|
|
648
|
+
constexpr int vec_length = VecLength<entry_type>;
|
|
649
|
+
int batch_size = batch.Size();
|
|
650
|
+
int N = batch_size * vec_length;
|
|
651
|
+
// *testout << "Processing batch of size " << batch.Size() << ", with N = " << N << ", vec_length = " << vec_length << ", Type: " << typeid(entry_type).name() << ", len = " << len << ", theta = " << theta << endl;
|
|
652
|
+
|
|
653
|
+
if (N <= 1 || batch_size <= 1) {
|
|
654
|
+
for (auto* rec : batch) {
|
|
655
|
+
rec->mp_source->TransformAdd(*rec->mp_target, rec->dist, true);
|
|
656
|
+
}
|
|
657
|
+
}
|
|
658
|
+
else if (N <= 3) {
|
|
659
|
+
ProcessVectorizedBatch<3, vec_length>(batch, len, theta);
|
|
660
|
+
}
|
|
661
|
+
else if (N <= 4) {
|
|
662
|
+
ProcessVectorizedBatch<4, vec_length>(batch, len, theta);
|
|
663
|
+
}
|
|
664
|
+
else if (N <= 6) {
|
|
665
|
+
ProcessVectorizedBatch<6, vec_length>(batch, len, theta);
|
|
666
|
+
}
|
|
667
|
+
else if (N <= 12) {
|
|
668
|
+
ProcessVectorizedBatch<12, vec_length>(batch, len, theta);
|
|
669
|
+
}
|
|
670
|
+
else if (N <= 24) {
|
|
671
|
+
ProcessVectorizedBatch<24, vec_length>(batch, len, theta);
|
|
672
|
+
}
|
|
673
|
+
else if (N <= 48) {
|
|
674
|
+
ProcessVectorizedBatch<48, vec_length>(batch, len, theta);
|
|
675
|
+
}
|
|
676
|
+
else if (N <= 96) {
|
|
677
|
+
ProcessVectorizedBatch<96, vec_length>(batch, len, theta);
|
|
678
|
+
}
|
|
679
|
+
else if (N <= 192) {
|
|
680
|
+
ProcessVectorizedBatch<192, vec_length>(batch, len, theta);
|
|
681
|
+
}
|
|
682
|
+
else {
|
|
683
|
+
// Split large batches
|
|
684
|
+
ProcessBatch(batch.Range(0, 192 / vec_length), len, theta);
|
|
685
|
+
ProcessBatch(batch.Range(192 / vec_length, batch_size), len, theta);
|
|
686
|
+
}
|
|
687
|
+
}
|
|
688
|
+
|
|
689
|
+
template<int N, int vec_length>
|
|
690
|
+
static void ProcessVectorizedBatch(FlatArray<RecordingSS*> batch, double len, double theta) {
|
|
691
|
+
|
|
692
|
+
// *testout << "Processing vectorized S->S batch of size " << batch.Size() << ", with N = " << N << ", vec_length = " << vec_length << ", len = " << len << ", theta = " << theta << endl;
|
|
693
|
+
MultiPole<MPSingular, Vec<N,Complex>> vec_source(batch[0]->mp_source->Order(), batch[0]->mp_source->Kappa(), batch[0]->mp_source->RTyp());
|
|
694
|
+
MultiPole<MPSingular, Vec<N,Complex>> vec_target(batch[0]->mp_target->Order(), batch[0]->mp_target->Kappa(), batch[0]->mp_target->RTyp());
|
|
695
|
+
|
|
696
|
+
// Copy multipoles into vectorized multipole
|
|
697
|
+
for (int i = 0; i < batch.Size(); i++)
|
|
698
|
+
{
|
|
699
|
+
auto source_i = VecVector2Matrix (batch[i]->mp_source->SH().Coefs());
|
|
700
|
+
auto source_mati = VecVector2Matrix (vec_source.SH().Coefs()).Cols(i*vec_length, (i+1)*vec_length);
|
|
701
|
+
batch[i]->mp_source->SH().RotateZFlip(batch[i]->phi, batch[i]->flipz,
|
|
702
|
+
[source_i, source_mati] (size_t ii, Complex factor)
|
|
703
|
+
{
|
|
704
|
+
source_mati.Row(ii) = factor * source_i.Row(ii);
|
|
705
|
+
});
|
|
706
|
+
}
|
|
707
|
+
|
|
708
|
+
vec_source.SH().RotateY(theta, vec_source.SH().Order() >= 100);
|
|
709
|
+
vec_source.ShiftZ(-len, vec_target);
|
|
710
|
+
vec_target.SH().RotateY(-theta, vec_target.SH().Order() >= 100);
|
|
711
|
+
|
|
712
|
+
// Copy vectorized multipole into individual multipoles
|
|
713
|
+
for (int i = 0; i < batch.Size(); i++)
|
|
714
|
+
{
|
|
715
|
+
auto source_mati = VecVector2Matrix (vec_target.SH().Coefs()).Cols(i*vec_length, (i+1)*vec_length);
|
|
716
|
+
auto target_mati = VecVector2Matrix (batch[i]->mp_target->SH().Coefs());
|
|
717
|
+
batch[i]->mp_target->SH().RotateZFlip(-batch[i]->phi, batch[i]->flipz,
|
|
718
|
+
[source_mati, target_mati] (size_t ii, Complex factor)
|
|
719
|
+
{
|
|
720
|
+
AtomicAdd (target_mati.Row(ii), factor * source_mati.Row(ii));
|
|
721
|
+
});
|
|
722
|
+
}
|
|
723
|
+
}
|
|
724
|
+
|
|
420
725
|
struct Node
|
|
421
726
|
{
|
|
422
727
|
Vec<3> center;
|
|
@@ -428,7 +733,14 @@ namespace ngsbem
|
|
|
428
733
|
Array<tuple<Vec<3>, entry_type>> charges;
|
|
429
734
|
Array<tuple<Vec<3>, Vec<3>, entry_type>> dipoles;
|
|
430
735
|
Array<tuple<Vec<3>, Vec<3>, Complex,int>> currents;
|
|
736
|
+
|
|
737
|
+
using simd_entry_type = decltype(MakeSimd(declval<std::array<entry_type,FMM_SW>>()));
|
|
738
|
+
Array<tuple<Vec<3,SIMD<double,FMM_SW>>, simd_entry_type>> simd_charges;
|
|
739
|
+
Array<tuple<Vec<3,SIMD<double,FMM_SW>>, Vec<3,SIMD<double,FMM_SW>>, simd_entry_type>> simd_dipoles;
|
|
740
|
+
|
|
431
741
|
int total_sources;
|
|
742
|
+
std::mutex node_mutex;
|
|
743
|
+
atomic<bool> have_childs{false};
|
|
432
744
|
|
|
433
745
|
Node (Vec<3> acenter, double ar, int alevel, double akappa)
|
|
434
746
|
: center(acenter), r(ar), level(alevel), mp(MPOrder(ar*akappa), akappa, ar) // min(1.0, ar*akappa))
|
|
@@ -449,12 +761,13 @@ namespace ngsbem
|
|
|
449
761
|
cc(2) += (i&4) ? r/2 : -r/2;
|
|
450
762
|
childs[i] = make_unique<Node> (cc, r/2, level+1, mp.Kappa());
|
|
451
763
|
}
|
|
764
|
+
have_childs = true;
|
|
452
765
|
}
|
|
453
766
|
|
|
454
767
|
|
|
455
768
|
void AddCharge (Vec<3> x, entry_type c)
|
|
456
769
|
{
|
|
457
|
-
if (
|
|
770
|
+
if (have_childs) // quick check without locking
|
|
458
771
|
{
|
|
459
772
|
// directly send to childs:
|
|
460
773
|
int childnum = 0;
|
|
@@ -465,6 +778,21 @@ namespace ngsbem
|
|
|
465
778
|
return;
|
|
466
779
|
}
|
|
467
780
|
|
|
781
|
+
lock_guard<mutex> guard(node_mutex);
|
|
782
|
+
|
|
783
|
+
if (have_childs) // test again after locking
|
|
784
|
+
{
|
|
785
|
+
// directly send to childs:
|
|
786
|
+
int childnum = 0;
|
|
787
|
+
if (x(0) > center(0)) childnum += 1;
|
|
788
|
+
if (x(1) > center(1)) childnum += 2;
|
|
789
|
+
if (x(2) > center(2)) childnum += 4;
|
|
790
|
+
childs[childnum] -> AddCharge(x, c);
|
|
791
|
+
return;
|
|
792
|
+
}
|
|
793
|
+
|
|
794
|
+
|
|
795
|
+
|
|
468
796
|
charges.Append( tuple{x,c} );
|
|
469
797
|
|
|
470
798
|
// if (r*mp.Kappa() < 1e-8) return;
|
|
@@ -489,7 +817,7 @@ namespace ngsbem
|
|
|
489
817
|
|
|
490
818
|
void AddDipole (Vec<3> x, Vec<3> d, entry_type c)
|
|
491
819
|
{
|
|
492
|
-
if (
|
|
820
|
+
if (have_childs)
|
|
493
821
|
{
|
|
494
822
|
// directly send to childs:
|
|
495
823
|
|
|
@@ -501,6 +829,23 @@ namespace ngsbem
|
|
|
501
829
|
return;
|
|
502
830
|
}
|
|
503
831
|
|
|
832
|
+
lock_guard<mutex> guard(node_mutex);
|
|
833
|
+
|
|
834
|
+
if (have_childs)
|
|
835
|
+
{
|
|
836
|
+
// directly send to childs:
|
|
837
|
+
|
|
838
|
+
int childnum = 0;
|
|
839
|
+
if (x(0) > center(0)) childnum += 1;
|
|
840
|
+
if (x(1) > center(1)) childnum += 2;
|
|
841
|
+
if (x(2) > center(2)) childnum += 4;
|
|
842
|
+
childs[childnum] -> AddDipole(x, d, c);
|
|
843
|
+
return;
|
|
844
|
+
}
|
|
845
|
+
|
|
846
|
+
|
|
847
|
+
|
|
848
|
+
|
|
504
849
|
dipoles.Append (tuple{x,d,c});
|
|
505
850
|
|
|
506
851
|
if (dipoles.Size() < maxdirect || r < 1e-8)
|
|
@@ -520,6 +865,7 @@ namespace ngsbem
|
|
|
520
865
|
currents.SetSize0();
|
|
521
866
|
}
|
|
522
867
|
|
|
868
|
+
// not parallel yet
|
|
523
869
|
void AddCurrent (Vec<3> sp, Vec<3> ep, Complex j, int num)
|
|
524
870
|
{
|
|
525
871
|
if (childs[0])
|
|
@@ -549,7 +895,7 @@ namespace ngsbem
|
|
|
549
895
|
}
|
|
550
896
|
return;
|
|
551
897
|
}
|
|
552
|
-
|
|
898
|
+
|
|
553
899
|
currents.Append (tuple{sp,ep,j,num});
|
|
554
900
|
|
|
555
901
|
// if (currents.Size() < maxdirect || r < 1e-8)
|
|
@@ -583,26 +929,74 @@ namespace ngsbem
|
|
|
583
929
|
return sum;
|
|
584
930
|
}
|
|
585
931
|
|
|
586
|
-
|
|
587
|
-
|
|
932
|
+
{
|
|
933
|
+
// static Timer t("fmm direct eval"); RegionTimer reg(t);
|
|
934
|
+
// t.AddFlops (charges.Size());
|
|
935
|
+
if (simd_charges.Size())
|
|
936
|
+
{
|
|
937
|
+
simd_entry_type vsum{0.0};
|
|
938
|
+
if (mp.Kappa() < 1e-8)
|
|
939
|
+
for (auto [x,c] : simd_charges)
|
|
940
|
+
{
|
|
941
|
+
auto rho = L2Norm(p-x);
|
|
942
|
+
auto kernel = (1/(4*M_PI))*SIMD<Complex,FMM_SW> (1,rho*mp.Kappa()) / rho;
|
|
943
|
+
kernel = If(rho > 0.0, kernel, SIMD<Complex,FMM_SW>(0.0));
|
|
944
|
+
vsum += kernel * c;
|
|
945
|
+
}
|
|
946
|
+
else
|
|
947
|
+
for (auto [x,c] : simd_charges)
|
|
948
|
+
{
|
|
949
|
+
auto rho = L2Norm(p-x);
|
|
950
|
+
auto [si,co] = sincos(rho*mp.Kappa());
|
|
951
|
+
auto kernel = (1/(4*M_PI))*SIMD<Complex,FMM_SW>(co,si) / rho;
|
|
952
|
+
kernel = If(rho > 0.0, kernel, SIMD<Complex,FMM_SW>(0.0));
|
|
953
|
+
vsum += kernel * c;
|
|
954
|
+
}
|
|
955
|
+
|
|
956
|
+
sum += HSum(vsum);
|
|
957
|
+
}
|
|
958
|
+
else
|
|
959
|
+
{
|
|
960
|
+
if (mp.Kappa() < 1e-8)
|
|
961
|
+
{
|
|
962
|
+
for (auto [x,c] : charges)
|
|
963
|
+
if (double rho = L2Norm(p-x); rho > 0)
|
|
964
|
+
sum += (1/(4*M_PI))*Complex(1,rho*mp.Kappa()) / rho * c;
|
|
965
|
+
}
|
|
966
|
+
else
|
|
967
|
+
for (auto [x,c] : charges)
|
|
968
|
+
if (double rho = L2Norm(p-x); rho > 0)
|
|
969
|
+
sum += (1/(4*M_PI))*exp(Complex(0,rho*mp.Kappa())) / rho * c;
|
|
970
|
+
}
|
|
971
|
+
}
|
|
972
|
+
|
|
973
|
+
if (simd_dipoles.Size())
|
|
974
|
+
{
|
|
975
|
+
simd_entry_type vsum{0.0};
|
|
976
|
+
for (auto [x,d,c] : simd_dipoles)
|
|
588
977
|
{
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
978
|
+
auto rho = L2Norm(p-x);
|
|
979
|
+
auto drhodp = (1.0/rho) * (p-x);
|
|
980
|
+
auto [si,co] = sincos(rho*mp.Kappa());
|
|
981
|
+
auto dGdrho = (1/(4*M_PI))*SIMD<Complex,FMM_SW>(co,si) *
|
|
982
|
+
(-1.0/(rho*rho) + SIMD<Complex,FMM_SW>(0, mp.Kappa())/rho);
|
|
983
|
+
auto kernel = dGdrho * InnerProduct(drhodp, d);
|
|
984
|
+
kernel = If(rho > 0.0, kernel, SIMD<Complex,FMM_SW>(0.0));
|
|
985
|
+
vsum += kernel * c;
|
|
592
986
|
}
|
|
987
|
+
sum += HSum(vsum);
|
|
988
|
+
}
|
|
593
989
|
else
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
sum += (1/(4*M_PI))*exp(Complex(0,rho*mp.Kappa())) / rho * c;
|
|
597
|
-
|
|
598
|
-
for (auto [x,d,c] : dipoles)
|
|
990
|
+
{
|
|
991
|
+
for (auto [x,d,c] : dipoles)
|
|
599
992
|
if (double rho = L2Norm(p-x); rho > 0)
|
|
600
|
-
|
|
993
|
+
{
|
|
601
994
|
Vec<3> drhodp = 1.0/rho * (p-x);
|
|
602
995
|
Complex dGdrho = (1/(4*M_PI))*exp(Complex(0,rho*mp.Kappa())) *
|
|
603
|
-
|
|
996
|
+
(Complex(0, mp.Kappa())/rho - 1.0/sqr(rho));
|
|
604
997
|
sum += dGdrho * InnerProduct(drhodp, d) * c;
|
|
605
|
-
|
|
998
|
+
}
|
|
999
|
+
}
|
|
606
1000
|
|
|
607
1001
|
for (auto [sp,ep,j,num] : currents)
|
|
608
1002
|
{
|
|
@@ -664,23 +1058,27 @@ namespace ngsbem
|
|
|
664
1058
|
}
|
|
665
1059
|
}
|
|
666
1060
|
|
|
667
|
-
void CalcMP()
|
|
1061
|
+
void CalcMP(Array<RecordingSS> * recording, Array<Node*> * nodes_to_process)
|
|
668
1062
|
{
|
|
669
|
-
mp.SH().Coefs() = 0.0;
|
|
1063
|
+
// mp.SH().Coefs() = 0.0;
|
|
670
1064
|
if (childs[0])
|
|
671
1065
|
{
|
|
672
|
-
if (total_sources < 1000)
|
|
1066
|
+
if (total_sources < 1000 || recording)
|
|
673
1067
|
for (auto & child : childs)
|
|
674
|
-
child->CalcMP();
|
|
1068
|
+
child->CalcMP(recording, nodes_to_process);
|
|
675
1069
|
else
|
|
676
1070
|
ParallelFor (8, [&] (int nr)
|
|
677
1071
|
{
|
|
678
|
-
childs[nr] -> CalcMP();
|
|
1072
|
+
childs[nr] -> CalcMP(recording, nodes_to_process);
|
|
679
1073
|
});
|
|
680
1074
|
|
|
681
1075
|
|
|
682
|
-
for (auto & child : childs)
|
|
683
|
-
child->mp.
|
|
1076
|
+
for (auto & child : childs){
|
|
1077
|
+
if (recording && child->mp.SH().Coefs().Size() > 0)
|
|
1078
|
+
*recording += RecordingSS(&child->mp, &mp, center-child->center);
|
|
1079
|
+
else
|
|
1080
|
+
child->mp.TransformAdd(mp, center-child->center);
|
|
1081
|
+
}
|
|
684
1082
|
}
|
|
685
1083
|
else
|
|
686
1084
|
{
|
|
@@ -690,14 +1088,54 @@ namespace ngsbem
|
|
|
690
1088
|
return;
|
|
691
1089
|
}
|
|
692
1090
|
|
|
693
|
-
|
|
694
|
-
|
|
1091
|
+
// make simd charges, comment this block for testing ...
|
|
1092
|
+
simd_charges.SetSize( (charges.Size()+FMM_SW-1)/FMM_SW);
|
|
1093
|
+
size_t i = 0, ii = 0;
|
|
1094
|
+
for ( ; i+FMM_SW <= charges.Size(); i+=FMM_SW, ii++)
|
|
1095
|
+
{
|
|
1096
|
+
std::array<tuple<Vec<3>,entry_type>, FMM_SW> ca;
|
|
1097
|
+
for (int j = 0; j < FMM_SW; j++) ca[j] = charges[i+j];
|
|
1098
|
+
simd_charges[ii] = MakeSimd(ca);
|
|
1099
|
+
}
|
|
1100
|
+
if (i < charges.Size())
|
|
1101
|
+
{
|
|
1102
|
+
std::array<tuple<Vec<3>,entry_type>, FMM_SW> ca;
|
|
1103
|
+
int j = 0;
|
|
1104
|
+
for ( ; i+j < charges.Size(); j++) ca[j] = charges[i+j];
|
|
1105
|
+
for ( ; j < FMM_SW; j++) ca[j] = tuple( get<0>(ca[0]), entry_type{0.0} );
|
|
1106
|
+
simd_charges[ii] = MakeSimd(ca);
|
|
1107
|
+
}
|
|
1108
|
+
|
|
1109
|
+
simd_dipoles.SetSize( (dipoles.Size()+FMM_SW-1)/FMM_SW);
|
|
1110
|
+
i = 0, ii = 0;
|
|
1111
|
+
for ( ; i+FMM_SW <= dipoles.Size(); i+=FMM_SW, ii++)
|
|
1112
|
+
{
|
|
1113
|
+
std::array<tuple<Vec<3>,Vec<3>,entry_type>, FMM_SW> di;
|
|
1114
|
+
for (int j = 0; j < FMM_SW; j++) di[j] = dipoles[i+j];
|
|
1115
|
+
simd_dipoles[ii] = MakeSimd(di);
|
|
1116
|
+
}
|
|
1117
|
+
if (i < dipoles.Size())
|
|
1118
|
+
{
|
|
1119
|
+
std::array<tuple<Vec<3>,Vec<3>,entry_type>, FMM_SW> di;
|
|
1120
|
+
int j = 0;
|
|
1121
|
+
for ( ; i+j < dipoles.Size(); j++) di[j] = dipoles[i+j];
|
|
1122
|
+
for ( ; j < FMM_SW; j++) di[j] = tuple( get<0>(di[0]), get<1>(di[0]), entry_type{0.0} );
|
|
1123
|
+
simd_dipoles[ii] = MakeSimd(di);
|
|
1124
|
+
}
|
|
1125
|
+
|
|
695
1126
|
|
|
696
|
-
|
|
697
|
-
|
|
1127
|
+
if (nodes_to_process)
|
|
1128
|
+
*nodes_to_process += this;
|
|
1129
|
+
else {
|
|
1130
|
+
for (auto [x,c] : charges)
|
|
1131
|
+
mp.AddCharge (x-center,c);
|
|
1132
|
+
|
|
1133
|
+
for (auto [x,d,c] : dipoles)
|
|
1134
|
+
mp.AddDipole (x-center, d, c);
|
|
698
1135
|
|
|
699
|
-
|
|
700
|
-
|
|
1136
|
+
for (auto [sp,ep,j,num] : currents)
|
|
1137
|
+
mp.AddCurrent (sp-center, ep-center, j, num);
|
|
1138
|
+
}
|
|
701
1139
|
}
|
|
702
1140
|
}
|
|
703
1141
|
|
|
@@ -836,6 +1274,10 @@ namespace ngsbem
|
|
|
836
1274
|
void CalcMP()
|
|
837
1275
|
{
|
|
838
1276
|
static Timer t("mptool compute singular MLMP"); RegionTimer rg(t);
|
|
1277
|
+
static Timer ts2mp("mptool compute singular MLMP - source2mp");
|
|
1278
|
+
static Timer tS2S("mptool compute singular MLMP - S->S");
|
|
1279
|
+
static Timer trec("mptool comput singular recording");
|
|
1280
|
+
static Timer tsort("mptool comput singular sort");
|
|
839
1281
|
|
|
840
1282
|
/*
|
|
841
1283
|
int maxlevel = 0;
|
|
@@ -847,7 +1289,87 @@ namespace ngsbem
|
|
|
847
1289
|
*/
|
|
848
1290
|
|
|
849
1291
|
root.CalcTotalSources();
|
|
850
|
-
|
|
1292
|
+
|
|
1293
|
+
if (false)
|
|
1294
|
+
// direct evaluation of S->S
|
|
1295
|
+
root.CalcMP(nullptr, nullptr);
|
|
1296
|
+
else
|
|
1297
|
+
{
|
|
1298
|
+
|
|
1299
|
+
Array<RecordingSS> recording;
|
|
1300
|
+
Array<Node*> nodes_to_process;
|
|
1301
|
+
|
|
1302
|
+
{
|
|
1303
|
+
RegionTimer reg(trec);
|
|
1304
|
+
root.CalcMP(&recording, &nodes_to_process);
|
|
1305
|
+
}
|
|
1306
|
+
|
|
1307
|
+
{
|
|
1308
|
+
RegionTimer rs2mp(ts2mp);
|
|
1309
|
+
ParallelFor(nodes_to_process.Size(), [&](int i){
|
|
1310
|
+
auto node = nodes_to_process[i];
|
|
1311
|
+
for (auto [x,c]: node->charges)
|
|
1312
|
+
node->mp.AddCharge(x-node->center, c);
|
|
1313
|
+
for (auto [x,d,c]: node->dipoles)
|
|
1314
|
+
node->mp.AddDipole(x-node->center, d, c);
|
|
1315
|
+
for (auto [sp,ep,j,num]: node->currents)
|
|
1316
|
+
node->mp.AddCurrent(sp-node->center, ep-node->center, j, num);
|
|
1317
|
+
}, TasksPerThread(4));
|
|
1318
|
+
}
|
|
1319
|
+
|
|
1320
|
+
{
|
|
1321
|
+
RegionTimer reg(tsort);
|
|
1322
|
+
QuickSort (recording, [] (auto & a, auto & b)
|
|
1323
|
+
{
|
|
1324
|
+
if (a.len < (1-1e-8) * b.len) return true;
|
|
1325
|
+
if (a.len > (1+1e-8) * b.len) return false;
|
|
1326
|
+
return a.theta < b.theta;
|
|
1327
|
+
});
|
|
1328
|
+
}
|
|
1329
|
+
|
|
1330
|
+
double current_len = -1e100;
|
|
1331
|
+
double current_theta = -1e100;
|
|
1332
|
+
Array<RecordingSS*> current_batch;
|
|
1333
|
+
Array<Array<RecordingSS*>> batch_group;
|
|
1334
|
+
Array<double> group_lengths;
|
|
1335
|
+
Array<double> group_thetas;
|
|
1336
|
+
for (auto & record : recording)
|
|
1337
|
+
{
|
|
1338
|
+
bool len_changed = fabs(record.len - current_len) > 1e-8;
|
|
1339
|
+
bool theta_changed = fabs(record.theta - current_theta) > 1e-8;
|
|
1340
|
+
if ((len_changed || theta_changed) && current_batch.Size() > 0) {
|
|
1341
|
+
batch_group.Append(current_batch);
|
|
1342
|
+
group_lengths.Append(current_len);
|
|
1343
|
+
group_thetas.Append(current_theta);
|
|
1344
|
+
current_batch.SetSize(0);
|
|
1345
|
+
}
|
|
1346
|
+
|
|
1347
|
+
current_len = record.len;
|
|
1348
|
+
current_theta = record.theta;
|
|
1349
|
+
current_batch.Append(&record);
|
|
1350
|
+
}
|
|
1351
|
+
if (current_batch.Size() > 0) {
|
|
1352
|
+
batch_group.Append(current_batch);
|
|
1353
|
+
group_lengths.Append(current_len);
|
|
1354
|
+
group_thetas.Append(current_theta);
|
|
1355
|
+
}
|
|
1356
|
+
|
|
1357
|
+
{
|
|
1358
|
+
RegionTimer rS2S(tS2S);
|
|
1359
|
+
// ParallelFor(batch_group.Size(), [&](int i) {
|
|
1360
|
+
for (int i = 0; i < batch_group.Size(); i++){
|
|
1361
|
+
// *testout << "Processing batch " << i << " of size " << batch_group[i].Size() << ", with len = " << group_lengths[i] << ", theta = " << group_thetas[i] << endl;
|
|
1362
|
+
int chunk_size = 24;
|
|
1363
|
+
if (batch_group[i].Size() < chunk_size)
|
|
1364
|
+
ProcessBatch(batch_group[i], group_lengths[i], group_thetas[i]);
|
|
1365
|
+
else
|
|
1366
|
+
ParallelForRange(IntRange(batch_group[i].Size()), [&](IntRange range) {
|
|
1367
|
+
auto sub_batch = batch_group[i].Range(range.First(), range.Next());
|
|
1368
|
+
ProcessBatch(sub_batch, group_lengths[i], group_thetas[i]);
|
|
1369
|
+
}, TasksPerThread(4));
|
|
1370
|
+
}
|
|
1371
|
+
}
|
|
1372
|
+
}
|
|
851
1373
|
|
|
852
1374
|
havemp = true;
|
|
853
1375
|
}
|
|
@@ -877,6 +1399,143 @@ namespace ngsbem
|
|
|
877
1399
|
class NGS_DLL_HEADER RegularMLMultiPole
|
|
878
1400
|
{
|
|
879
1401
|
static Array<size_t> nodes_on_level;
|
|
1402
|
+
|
|
1403
|
+
|
|
1404
|
+
struct RecordingRS
|
|
1405
|
+
{
|
|
1406
|
+
const MultiPole<MPSingular,elem_type> * mpS;
|
|
1407
|
+
MultiPole<MPRegular,elem_type> * mpR;
|
|
1408
|
+
Vec<3> dist;
|
|
1409
|
+
double len, theta, phi;
|
|
1410
|
+
public:
|
|
1411
|
+
RecordingRS() = default;
|
|
1412
|
+
RecordingRS (const MultiPole<MPSingular,elem_type> * ampS,
|
|
1413
|
+
MultiPole<MPRegular,elem_type> * ampR,
|
|
1414
|
+
Vec<3> adist)
|
|
1415
|
+
: mpS(ampS), mpR(ampR), dist(adist)
|
|
1416
|
+
{
|
|
1417
|
+
std::tie(len, theta, phi) = SphericalCoordinates(dist);
|
|
1418
|
+
}
|
|
1419
|
+
};
|
|
1420
|
+
|
|
1421
|
+
static void ProcessBatchRS(FlatArray<RecordingRS*> batch, double len, double theta) {
|
|
1422
|
+
// static Timer t("ProcessBatchRS"); RegionTimer reg(t, batch.Size());
|
|
1423
|
+
constexpr int vec_length = VecLength<elem_type>;
|
|
1424
|
+
int batch_size = batch.Size();
|
|
1425
|
+
int N = batch_size * vec_length;
|
|
1426
|
+
// *testout << "Processing batch of size " << batch.Size() << ", with N = " << N << ", vec_length = " << vec_length << ", Type: " << typeid(elem_type).name() << ", len = " << len << ", theta = " << theta << endl;
|
|
1427
|
+
|
|
1428
|
+
if (N <= 1 || batch_size <= 1) {
|
|
1429
|
+
for (auto* rec : batch) {
|
|
1430
|
+
rec->mpS->TransformAdd(*rec->mpR, rec->dist);
|
|
1431
|
+
}
|
|
1432
|
+
}
|
|
1433
|
+
else if (N <= 3) {
|
|
1434
|
+
ProcessVectorizedBatch<3, vec_length>(batch, len, theta);
|
|
1435
|
+
}
|
|
1436
|
+
else if (N <= 4) {
|
|
1437
|
+
ProcessVectorizedBatch<4, vec_length>(batch, len, theta);
|
|
1438
|
+
}
|
|
1439
|
+
else if (N <= 6) {
|
|
1440
|
+
ProcessVectorizedBatch<6, vec_length>(batch, len, theta);
|
|
1441
|
+
}
|
|
1442
|
+
else if (N <= 12) {
|
|
1443
|
+
ProcessVectorizedBatch<12, vec_length>(batch, len, theta);
|
|
1444
|
+
}
|
|
1445
|
+
else if (N <= 24) {
|
|
1446
|
+
ProcessVectorizedBatch<24, vec_length>(batch, len, theta);
|
|
1447
|
+
}
|
|
1448
|
+
else if (N <= 48) {
|
|
1449
|
+
ProcessVectorizedBatch<48, vec_length>(batch, len, theta);
|
|
1450
|
+
}
|
|
1451
|
+
else if (N <= 96) {
|
|
1452
|
+
ProcessVectorizedBatch<96, vec_length>(batch, len, theta);
|
|
1453
|
+
}
|
|
1454
|
+
else if (N <= 192) {
|
|
1455
|
+
ProcessVectorizedBatch<192, vec_length>(batch, len, theta);
|
|
1456
|
+
}
|
|
1457
|
+
else {
|
|
1458
|
+
// Split large batches
|
|
1459
|
+
/*
|
|
1460
|
+
ProcessBatch(batch.Range(0, 192 / vec_length), len, theta);
|
|
1461
|
+
ProcessBatch(batch.Range(192 / vec_length, batch_size), len, theta);
|
|
1462
|
+
*/
|
|
1463
|
+
|
|
1464
|
+
/*
|
|
1465
|
+
ParallelFor (2, [&] (int i)
|
|
1466
|
+
{
|
|
1467
|
+
if (i == 0)
|
|
1468
|
+
ProcessBatchRS(batch.Range(0, 192 / vec_length), len, theta);
|
|
1469
|
+
else
|
|
1470
|
+
ProcessBatchRS(batch.Range(192 / vec_length, batch_size), len, theta);
|
|
1471
|
+
}, 2);
|
|
1472
|
+
*/
|
|
1473
|
+
|
|
1474
|
+
|
|
1475
|
+
size_t chunksize = 192/vec_length;
|
|
1476
|
+
size_t num = (batch.Size()+chunksize-1) / chunksize;
|
|
1477
|
+
ParallelFor (num, [&](int i)
|
|
1478
|
+
{
|
|
1479
|
+
ProcessBatchRS(batch.Range(i*chunksize, min((i+1)*chunksize, batch.Size())), len, theta);
|
|
1480
|
+
}, num);
|
|
1481
|
+
|
|
1482
|
+
}
|
|
1483
|
+
}
|
|
1484
|
+
|
|
1485
|
+
|
|
1486
|
+
template<int N, int vec_length>
|
|
1487
|
+
static void ProcessVectorizedBatch(FlatArray<RecordingRS*> batch, double len, double theta) {
|
|
1488
|
+
|
|
1489
|
+
// static Timer t("ProcessVectorizedBatch, N = "+ToString(N) + ", vec_len = " + ToString(vec_length));
|
|
1490
|
+
// RegionTimer reg(t, batch[0]->mpS->SH().Order());
|
|
1491
|
+
// static Timer ttobatch("mptools - copy to batch 2");
|
|
1492
|
+
// static Timer tfrombatch("mptools - copy from batch 2");
|
|
1493
|
+
|
|
1494
|
+
// *testout << "Processing vectorized batch of size " << batch.Size() << ", with N = " << N << ", vec_length = " << vec_length << ", len = " << len << ", theta = " << theta << endl;
|
|
1495
|
+
MultiPole<MPSingular, Vec<N,Complex>> vec_source(batch[0]->mpS->Order(), batch[0]->mpS->Kappa(), batch[0]->mpS->RTyp());
|
|
1496
|
+
// MultiPole<MPSingular, elem_type> tmp_source{*batch[0]->mpS};
|
|
1497
|
+
MultiPole<MPRegular, elem_type> tmp_target{*batch[0]->mpR};
|
|
1498
|
+
MultiPole<MPRegular, Vec<N,Complex>> vec_target(batch[0]->mpR->Order(), batch[0]->mpR->Kappa(), batch[0]->mpR->RTyp());
|
|
1499
|
+
|
|
1500
|
+
// Copy multipoles into vectorized multipole
|
|
1501
|
+
// ttobatch.Start();
|
|
1502
|
+
for (int i = 0; i < batch.Size(); i++)
|
|
1503
|
+
{
|
|
1504
|
+
auto source_i = VecVector2Matrix (batch[i]->mpS->SH().Coefs());
|
|
1505
|
+
auto source_mati = VecVector2Matrix (vec_source.SH().Coefs()).Cols(i*vec_length, (i+1)*vec_length);
|
|
1506
|
+
batch[i]->mpS->SH().RotateZ(batch[i]->phi,
|
|
1507
|
+
[source_i, source_mati] (size_t ii, Complex factor)
|
|
1508
|
+
{
|
|
1509
|
+
source_mati.Row(ii) = factor * source_i.Row(ii);
|
|
1510
|
+
});
|
|
1511
|
+
}
|
|
1512
|
+
|
|
1513
|
+
// ttobatch.Stop();
|
|
1514
|
+
|
|
1515
|
+
vec_source.SH().RotateY(theta);
|
|
1516
|
+
vec_source.ShiftZ(-len, vec_target);
|
|
1517
|
+
vec_target.SH().RotateY(-theta);
|
|
1518
|
+
|
|
1519
|
+
// Copy vectorized multipole into individual multipoles
|
|
1520
|
+
// tfrombatch.Start();
|
|
1521
|
+
for (int i = 0; i < batch.Size(); i++) {
|
|
1522
|
+
// auto source_i = VecVector2Matrix (tmp_target.SH().Coefs());
|
|
1523
|
+
auto source_mati = VecVector2Matrix (vec_target.SH().Coefs()).Cols(i*vec_length, (i+1)*vec_length);
|
|
1524
|
+
auto targeti = VecVector2Matrix(batch[i]->mpR->SH().Coefs());
|
|
1525
|
+
|
|
1526
|
+
tmp_target.SH().RotateZ(-batch[i]->phi,
|
|
1527
|
+
[source_mati, targeti] (size_t ii, Complex factor)
|
|
1528
|
+
{
|
|
1529
|
+
// source_i.Row(ii) = factor * source_mati.Row(ii);
|
|
1530
|
+
AtomicAdd (VectorView(targeti.Row(ii)), factor * source_mati.Row(ii));
|
|
1531
|
+
});
|
|
1532
|
+
// for (int j = 0; j < tmp_target.SH().Coefs().Size(); j++)
|
|
1533
|
+
// AtomicAdd(batch[i]->mpR->SH().Coefs()[j], tmp_target.SH().Coefs()[j]);
|
|
1534
|
+
}
|
|
1535
|
+
// tfrombatch.Stop();
|
|
1536
|
+
|
|
1537
|
+
}
|
|
1538
|
+
|
|
880
1539
|
|
|
881
1540
|
struct Node
|
|
882
1541
|
{
|
|
@@ -887,6 +1546,8 @@ namespace ngsbem
|
|
|
887
1546
|
MultiPole<MPRegular,elem_type> mp;
|
|
888
1547
|
Array<Vec<3>> targets;
|
|
889
1548
|
int total_targets;
|
|
1549
|
+
std::mutex node_mutex;
|
|
1550
|
+
atomic<bool> have_childs{false};
|
|
890
1551
|
|
|
891
1552
|
Array<const typename SingularMLMultiPole<elem_type>::Node*> singnodes;
|
|
892
1553
|
|
|
@@ -911,13 +1572,15 @@ namespace ngsbem
|
|
|
911
1572
|
cc(2) += (i&4) ? r/2 : -r/2;
|
|
912
1573
|
childs[i] = make_unique<Node> (cc, r/2, level+1, mp.Kappa());
|
|
913
1574
|
}
|
|
1575
|
+
have_childs = true;
|
|
914
1576
|
}
|
|
915
|
-
|
|
916
|
-
void AddSingularNode (const typename SingularMLMultiPole<elem_type>::Node & singnode, bool allow_refine
|
|
1577
|
+
|
|
1578
|
+
void AddSingularNode (const typename SingularMLMultiPole<elem_type>::Node & singnode, bool allow_refine,
|
|
1579
|
+
Array<RecordingRS> * recording)
|
|
917
1580
|
{
|
|
918
1581
|
if (mp.SH().Order() < 0) return;
|
|
919
1582
|
if (singnode.mp.SH().Order() < 0) return;
|
|
920
|
-
if (L2Norm(singnode.mp.SH().Coefs()) == 0) return;
|
|
1583
|
+
// if (L2Norm(singnode.mp.SH().Coefs()) == 0) return;
|
|
921
1584
|
if (level > 20)
|
|
922
1585
|
{
|
|
923
1586
|
singnodes.Append(&singnode);
|
|
@@ -936,12 +1599,15 @@ namespace ngsbem
|
|
|
936
1599
|
singnode.childs[0]->mp.Order() < singnode.mp.Order())
|
|
937
1600
|
{
|
|
938
1601
|
for (auto & child : singnode.childs)
|
|
939
|
-
AddSingularNode (*child, allow_refine);
|
|
1602
|
+
AddSingularNode (*child, allow_refine, recording);
|
|
940
1603
|
return;
|
|
941
1604
|
}
|
|
942
1605
|
|
|
943
1606
|
// static Timer t("mptool transform Helmholtz-criterion"); RegionTimer r(t);
|
|
944
|
-
|
|
1607
|
+
if (recording)
|
|
1608
|
+
*recording += RecordingRS(&singnode.mp, &mp, dist);
|
|
1609
|
+
else
|
|
1610
|
+
singnode.mp.TransformAdd(mp, dist);
|
|
945
1611
|
return;
|
|
946
1612
|
}
|
|
947
1613
|
|
|
@@ -960,21 +1626,21 @@ namespace ngsbem
|
|
|
960
1626
|
CreateChilds();
|
|
961
1627
|
|
|
962
1628
|
for (auto & ch : childs)
|
|
963
|
-
ch -> AddSingularNode (singnode, allow_refine);
|
|
1629
|
+
ch -> AddSingularNode (singnode, allow_refine, recording);
|
|
964
1630
|
}
|
|
965
1631
|
else
|
|
966
1632
|
{
|
|
967
|
-
if (total_targets < 1000)
|
|
1633
|
+
if (total_targets < 1000 || recording)
|
|
968
1634
|
{
|
|
969
1635
|
for (auto & ch : childs)
|
|
970
1636
|
if (ch)
|
|
971
|
-
ch -> AddSingularNode (singnode, allow_refine);
|
|
1637
|
+
ch -> AddSingularNode (singnode, allow_refine, recording);
|
|
972
1638
|
}
|
|
973
1639
|
else
|
|
974
1640
|
ParallelFor (8, [&] (int nr)
|
|
975
1641
|
{
|
|
976
1642
|
if (childs[nr])
|
|
977
|
-
childs[nr] -> AddSingularNode (singnode, allow_refine);
|
|
1643
|
+
childs[nr] -> AddSingularNode (singnode, allow_refine, recording);
|
|
978
1644
|
});
|
|
979
1645
|
|
|
980
1646
|
if (targets.Size())
|
|
@@ -984,7 +1650,7 @@ namespace ngsbem
|
|
|
984
1650
|
else
|
|
985
1651
|
{
|
|
986
1652
|
for (auto & childsing : singnode.childs)
|
|
987
|
-
AddSingularNode (*childsing, allow_refine);
|
|
1653
|
+
AddSingularNode (*childsing, allow_refine, recording);
|
|
988
1654
|
}
|
|
989
1655
|
}
|
|
990
1656
|
|
|
@@ -996,12 +1662,22 @@ namespace ngsbem
|
|
|
996
1662
|
|
|
997
1663
|
if (childs[0])
|
|
998
1664
|
{
|
|
999
|
-
|
|
1665
|
+
if (total_targets < 1000)
|
|
1000
1666
|
{
|
|
1001
|
-
|
|
1002
|
-
|
|
1003
|
-
|
|
1667
|
+
for (int nr = 0; nr < 8; nr++)
|
|
1668
|
+
{
|
|
1669
|
+
if (L2Norm(mp.SH().Coefs()) > 0)
|
|
1670
|
+
mp.TransformAdd (childs[nr]->mp, childs[nr]->center-center);
|
|
1671
|
+
childs[nr]->LocalizeExpansion(allow_refine);
|
|
1672
|
+
}
|
|
1004
1673
|
}
|
|
1674
|
+
else
|
|
1675
|
+
ParallelFor(8, [&] (int nr)
|
|
1676
|
+
{
|
|
1677
|
+
if (L2Norm(mp.SH().Coefs()) > 0)
|
|
1678
|
+
mp.TransformAdd (childs[nr]->mp, childs[nr]->center-center);
|
|
1679
|
+
childs[nr]->LocalizeExpansion(allow_refine);
|
|
1680
|
+
});
|
|
1005
1681
|
mp = MultiPole<MPRegular,elem_type>(-1, mp.Kappa(), 1.);
|
|
1006
1682
|
//mp.SH().Coefs()=0.0;
|
|
1007
1683
|
}
|
|
@@ -1009,18 +1685,8 @@ namespace ngsbem
|
|
|
1009
1685
|
|
|
1010
1686
|
elem_type Evaluate (Vec<3> p) const
|
|
1011
1687
|
{
|
|
1012
|
-
// *testout << "eval p = " << p << ", level = " << level << ", center = " << center << ", r = " << r << endl;
|
|
1013
1688
|
elem_type sum{0.0};
|
|
1014
|
-
|
|
1015
|
-
if (childs[0])
|
|
1016
|
-
{
|
|
1017
|
-
int childnum = 0;
|
|
1018
|
-
if (p(0) > center(0)) childnum += 1;
|
|
1019
|
-
if (p(1) > center(1)) childnum += 2;
|
|
1020
|
-
if (p(2) > center(2)) childnum += 4;
|
|
1021
|
-
sum = childs[childnum]->Evaluate(p);
|
|
1022
|
-
}
|
|
1023
|
-
*/
|
|
1689
|
+
|
|
1024
1690
|
int childnum = 0;
|
|
1025
1691
|
if (p(0) > center(0)) childnum += 1;
|
|
1026
1692
|
if (p(1) > center(1)) childnum += 2;
|
|
@@ -1030,8 +1696,6 @@ namespace ngsbem
|
|
|
1030
1696
|
else
|
|
1031
1697
|
sum = mp.Eval(p-center);
|
|
1032
1698
|
|
|
1033
|
-
|
|
1034
|
-
// static Timer t("mptool direct evaluate"); RegionTimer r(t);
|
|
1035
1699
|
for (auto sn : singnodes)
|
|
1036
1700
|
sum += sn->EvaluateMP(p);
|
|
1037
1701
|
|
|
@@ -1080,7 +1744,8 @@ namespace ngsbem
|
|
|
1080
1744
|
|
|
1081
1745
|
void AddTarget (Vec<3> x)
|
|
1082
1746
|
{
|
|
1083
|
-
if (childs[0])
|
|
1747
|
+
// if (childs[0])
|
|
1748
|
+
if (have_childs) // quick check without locking
|
|
1084
1749
|
{
|
|
1085
1750
|
// directly send to childs:
|
|
1086
1751
|
int childnum = 0;
|
|
@@ -1091,6 +1756,20 @@ namespace ngsbem
|
|
|
1091
1756
|
return;
|
|
1092
1757
|
}
|
|
1093
1758
|
|
|
1759
|
+
lock_guard<mutex> guard(node_mutex);
|
|
1760
|
+
|
|
1761
|
+
if (have_childs) // test again after locking
|
|
1762
|
+
{
|
|
1763
|
+
// directly send to childs:
|
|
1764
|
+
int childnum = 0;
|
|
1765
|
+
if (x(0) > center(0)) childnum += 1;
|
|
1766
|
+
if (x(1) > center(1)) childnum += 2;
|
|
1767
|
+
if (x(2) > center(2)) childnum += 4;
|
|
1768
|
+
childs[childnum] -> AddTarget(x);
|
|
1769
|
+
return;
|
|
1770
|
+
}
|
|
1771
|
+
|
|
1772
|
+
|
|
1094
1773
|
targets.Append( x );
|
|
1095
1774
|
|
|
1096
1775
|
// if (r*mp.Kappa() < 1e-8) return;
|
|
@@ -1158,8 +1837,8 @@ namespace ngsbem
|
|
|
1158
1837
|
nodes_on_level = 0;
|
|
1159
1838
|
nodes_on_level[0] = 1;
|
|
1160
1839
|
{
|
|
1161
|
-
static Timer t("mptool compute regular MLMP"); RegionTimer rg(t);
|
|
1162
|
-
root.AddSingularNode(singmp->root, true);
|
|
1840
|
+
static Timer t("mptool compute regular MLMP"); RegionTimer rg(t);
|
|
1841
|
+
root.AddSingularNode(singmp->root, true, nullptr);
|
|
1163
1842
|
// cout << "norm after S->R conversion: " << root.Norm() << endl;
|
|
1164
1843
|
}
|
|
1165
1844
|
|
|
@@ -1195,14 +1874,69 @@ namespace ngsbem
|
|
|
1195
1874
|
void CalcMP(shared_ptr<SingularMLMultiPole<elem_type>> asingmp)
|
|
1196
1875
|
{
|
|
1197
1876
|
static Timer t("mptool regular MLMP"); RegionTimer rg(t);
|
|
1877
|
+
static Timer trec("mptool regular MLMP - recording");
|
|
1878
|
+
static Timer tsort("mptool regular MLMP - sort");
|
|
1198
1879
|
|
|
1199
1880
|
singmp = asingmp;
|
|
1200
1881
|
|
|
1201
1882
|
root.CalcTotalTargets();
|
|
1202
1883
|
root.RemoveEmptyTrees();
|
|
1203
|
-
|
|
1204
|
-
root.AddSingularNode(singmp->root, false);
|
|
1205
1884
|
|
|
1885
|
+
|
|
1886
|
+
// root.AddSingularNode(singmp->root, false, nullptr);
|
|
1887
|
+
// /*
|
|
1888
|
+
Array<RecordingRS> recording;
|
|
1889
|
+
{
|
|
1890
|
+
RegionTimer rrec(trec);
|
|
1891
|
+
root.AddSingularNode(singmp->root, false, &recording);
|
|
1892
|
+
}
|
|
1893
|
+
|
|
1894
|
+
// cout << "recorded: " << recording.Size() << endl;
|
|
1895
|
+
{
|
|
1896
|
+
RegionTimer reg(tsort);
|
|
1897
|
+
QuickSort (recording, [] (auto & a, auto & b)
|
|
1898
|
+
{
|
|
1899
|
+
if (a.len < (1-1e-8) * b.len) return true;
|
|
1900
|
+
if (a.len > (1+1e-8) * b.len) return false;
|
|
1901
|
+
return a.theta < b.theta;
|
|
1902
|
+
});
|
|
1903
|
+
}
|
|
1904
|
+
|
|
1905
|
+
double current_len = -1e100;
|
|
1906
|
+
double current_theta = -1e100;
|
|
1907
|
+
Array<RecordingRS*> current_batch;
|
|
1908
|
+
Array<Array<RecordingRS*>> batch_group;
|
|
1909
|
+
Array<double> group_lengths;
|
|
1910
|
+
Array<double> group_thetas;
|
|
1911
|
+
for (auto & record : recording)
|
|
1912
|
+
{
|
|
1913
|
+
bool len_changed = fabs(record.len - current_len) > 1e-8;
|
|
1914
|
+
bool theta_changed = fabs(record.theta - current_theta) > 1e-8;
|
|
1915
|
+
if ((len_changed || theta_changed) && current_batch.Size() > 0) {
|
|
1916
|
+
// ProcessBatch(current_batch, current_len, current_theta);
|
|
1917
|
+
batch_group.Append(current_batch);
|
|
1918
|
+
group_lengths.Append(current_len);
|
|
1919
|
+
group_thetas.Append(current_theta);
|
|
1920
|
+
current_batch.SetSize(0);
|
|
1921
|
+
}
|
|
1922
|
+
|
|
1923
|
+
current_len = record.len;
|
|
1924
|
+
current_theta = record.theta;
|
|
1925
|
+
current_batch.Append(&record);
|
|
1926
|
+
}
|
|
1927
|
+
if (current_batch.Size() > 0) {
|
|
1928
|
+
// ProcessBatch(current_batch, current_len, current_theta);
|
|
1929
|
+
batch_group.Append(current_batch);
|
|
1930
|
+
group_lengths.Append(current_len);
|
|
1931
|
+
group_thetas.Append(current_theta);
|
|
1932
|
+
}
|
|
1933
|
+
|
|
1934
|
+
ParallelFor(batch_group.Size(), [&](int i) {
|
|
1935
|
+
ProcessBatchRS(batch_group[i], group_lengths[i], group_thetas[i]);
|
|
1936
|
+
}, TasksPerThread(4));
|
|
1937
|
+
// */
|
|
1938
|
+
|
|
1939
|
+
|
|
1206
1940
|
/*
|
|
1207
1941
|
int maxlevel = 0;
|
|
1208
1942
|
for (auto [i,num] : Enumerate(RegularMLMultiPole::nodes_on_level))
|
|
@@ -1212,7 +1946,7 @@ namespace ngsbem
|
|
|
1212
1946
|
cout << "reg " << i << ": " << RegularMLMultiPole::nodes_on_level[i] << endl;
|
|
1213
1947
|
*/
|
|
1214
1948
|
|
|
1215
|
-
static Timer tloc("mptool regular localize expansion"); RegionTimer rloc(tloc);
|
|
1949
|
+
static Timer tloc("mptool regular localize expansion"); RegionTimer rloc(tloc);
|
|
1216
1950
|
root.LocalizeExpansion(false);
|
|
1217
1951
|
}
|
|
1218
1952
|
|
|
@@ -1246,6 +1980,7 @@ namespace ngsbem
|
|
|
1246
1980
|
|
|
1247
1981
|
};
|
|
1248
1982
|
|
|
1983
|
+
|
|
1249
1984
|
template <typename elem_type>
|
|
1250
1985
|
inline ostream & operator<< (ostream & ost, const RegularMLMultiPole<elem_type> & mlmp)
|
|
1251
1986
|
{
|