ngsolve 6.2.2504.post44.dev0__cp311-cp311-win_amd64.whl → 6.2.2601__cp311-cp311-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- netgen/include/analytic_integrals.hpp +10 -0
- netgen/include/basematrix.hpp +6 -0
- netgen/include/bdbequations.hpp +55 -0
- netgen/include/bem_diffops.hpp +475 -0
- netgen/include/bilinearform.hpp +4 -1
- netgen/include/bspline.hpp +2 -0
- netgen/include/code_generation.hpp +2 -2
- netgen/include/complex_wrapper.hpp +30 -2
- netgen/include/contact.hpp +8 -0
- netgen/include/diagonalmatrix.hpp +6 -0
- netgen/include/diffop_impl.hpp +3 -1
- netgen/include/diffopwithfactor.hpp +123 -0
- netgen/include/elementbyelement.hpp +9 -3
- netgen/include/expr.hpp +45 -7
- netgen/include/fespace.hpp +9 -2
- netgen/include/gridfunction.hpp +3 -3
- netgen/include/h1amg.hpp +24 -1
- netgen/include/h1lumping.hpp +6 -0
- netgen/include/hcurl_equations.hpp +29 -0
- netgen/include/hcurlcurlfe.hpp +20 -0
- netgen/include/hdivfe_utils.hpp +1 -0
- netgen/include/hdivhofespace.hpp +2 -0
- netgen/include/kernels.hpp +724 -0
- netgen/include/l2hofe.hpp +1 -0
- netgen/include/matrix.hpp +8 -3
- netgen/include/meshaccess.hpp +1 -1
- netgen/include/mp_coefficient.hpp +24 -19
- netgen/include/mptools.hpp +1255 -237
- netgen/include/mycomplex.hpp +1 -1
- netgen/include/ngblas.hpp +116 -7
- netgen/include/potentialtools.hpp +2 -2
- netgen/include/preconditioner.hpp +2 -2
- netgen/include/prolongation.hpp +6 -3
- netgen/include/recursive_pol.hpp +63 -11
- netgen/include/simd_complex.hpp +45 -0
- netgen/include/sparsecholesky.hpp +6 -2
- netgen/include/sparsefactorization_interface.hpp +159 -0
- netgen/include/sparsematrix.hpp +21 -7
- netgen/include/sparsematrix_dyn.hpp +2 -2
- netgen/include/sparsematrix_impl.hpp +100 -33
- netgen/include/statushandler.hpp +8 -8
- netgen/include/thdivfe_impl.hpp +66 -0
- netgen/include/tscalarfe.hpp +1 -1
- netgen/include/vector.hpp +272 -47
- netgen/lib/libngsolve.lib +0 -0
- netgen/libngsolve.dll +0 -0
- netgen/ngscxx.bat +1 -1
- netgen/ngsld.bat +1 -1
- ngsolve/cmake/NGSolveConfig.cmake +8 -8
- ngsolve/cmake/ngsolve-targets.cmake +17 -10
- ngsolve/config/config.py +8 -8
- ngsolve/demos/intro/cmagnet.py +19 -22
- ngsolve/directsolvers.py +9 -21
- ngsolve/krylovspace.py +172 -3
- ngsolve/ngslib.pyd +0 -0
- ngsolve/nonlinearsolvers.py +2 -2
- ngsolve/solve_implementation.py +14 -1
- ngsolve/{solvers.py → solvers/__init__.py} +1 -1
- ngsolve/solvers/cudss.py +112 -0
- ngsolve/webgui.py +1 -0
- {ngsolve-6.2.2504.post44.dev0.dist-info → ngsolve-6.2.2601.dist-info}/METADATA +2 -2
- {ngsolve-6.2.2504.post44.dev0.dist-info → ngsolve-6.2.2601.dist-info}/RECORD +94 -88
- {ngsolve-6.2.2504.post44.dev0.data → ngsolve-6.2.2601.data}/data/Scripts/ngsolve.tcl +0 -0
- {ngsolve-6.2.2504.post44.dev0.data → ngsolve-6.2.2601.data}/data/share/ngsolve/beam.geo +0 -0
- {ngsolve-6.2.2504.post44.dev0.data → ngsolve-6.2.2601.data}/data/share/ngsolve/beam.vol +0 -0
- {ngsolve-6.2.2504.post44.dev0.data → ngsolve-6.2.2601.data}/data/share/ngsolve/chip.in2d +0 -0
- {ngsolve-6.2.2504.post44.dev0.data → ngsolve-6.2.2601.data}/data/share/ngsolve/chip.vol +0 -0
- {ngsolve-6.2.2504.post44.dev0.data → ngsolve-6.2.2601.data}/data/share/ngsolve/coil.geo +0 -0
- {ngsolve-6.2.2504.post44.dev0.data → ngsolve-6.2.2601.data}/data/share/ngsolve/coil.vol +0 -0
- {ngsolve-6.2.2504.post44.dev0.data → ngsolve-6.2.2601.data}/data/share/ngsolve/coilshield.geo +0 -0
- {ngsolve-6.2.2504.post44.dev0.data → ngsolve-6.2.2601.data}/data/share/ngsolve/coilshield.vol +0 -0
- {ngsolve-6.2.2504.post44.dev0.data → ngsolve-6.2.2601.data}/data/share/ngsolve/cube.geo +0 -0
- {ngsolve-6.2.2504.post44.dev0.data → ngsolve-6.2.2601.data}/data/share/ngsolve/cube.vol +0 -0
- {ngsolve-6.2.2504.post44.dev0.data → ngsolve-6.2.2601.data}/data/share/ngsolve/d10_DGdoubleglazing.pde +0 -0
- {ngsolve-6.2.2504.post44.dev0.data → ngsolve-6.2.2601.data}/data/share/ngsolve/d11_chip_nitsche.pde +0 -0
- {ngsolve-6.2.2504.post44.dev0.data → ngsolve-6.2.2601.data}/data/share/ngsolve/d1_square.pde +0 -0
- {ngsolve-6.2.2504.post44.dev0.data → ngsolve-6.2.2601.data}/data/share/ngsolve/d2_chip.pde +0 -0
- {ngsolve-6.2.2504.post44.dev0.data → ngsolve-6.2.2601.data}/data/share/ngsolve/d3_helmholtz.pde +0 -0
- {ngsolve-6.2.2504.post44.dev0.data → ngsolve-6.2.2601.data}/data/share/ngsolve/d4_cube.pde +0 -0
- {ngsolve-6.2.2504.post44.dev0.data → ngsolve-6.2.2601.data}/data/share/ngsolve/d5_beam.pde +0 -0
- {ngsolve-6.2.2504.post44.dev0.data → ngsolve-6.2.2601.data}/data/share/ngsolve/d6_shaft.pde +0 -0
- {ngsolve-6.2.2504.post44.dev0.data → ngsolve-6.2.2601.data}/data/share/ngsolve/d7_coil.pde +0 -0
- {ngsolve-6.2.2504.post44.dev0.data → ngsolve-6.2.2601.data}/data/share/ngsolve/d8_coilshield.pde +0 -0
- {ngsolve-6.2.2504.post44.dev0.data → ngsolve-6.2.2601.data}/data/share/ngsolve/d9_hybridDG.pde +0 -0
- {ngsolve-6.2.2504.post44.dev0.data → ngsolve-6.2.2601.data}/data/share/ngsolve/doubleglazing.in2d +0 -0
- {ngsolve-6.2.2504.post44.dev0.data → ngsolve-6.2.2601.data}/data/share/ngsolve/doubleglazing.vol +0 -0
- {ngsolve-6.2.2504.post44.dev0.data → ngsolve-6.2.2601.data}/data/share/ngsolve/piezo2d40round4.vol.gz +0 -0
- {ngsolve-6.2.2504.post44.dev0.data → ngsolve-6.2.2601.data}/data/share/ngsolve/shaft.geo +0 -0
- {ngsolve-6.2.2504.post44.dev0.data → ngsolve-6.2.2601.data}/data/share/ngsolve/shaft.vol +0 -0
- {ngsolve-6.2.2504.post44.dev0.data → ngsolve-6.2.2601.data}/data/share/ngsolve/square.in2d +0 -0
- {ngsolve-6.2.2504.post44.dev0.data → ngsolve-6.2.2601.data}/data/share/ngsolve/square.vol +0 -0
- {ngsolve-6.2.2504.post44.dev0.dist-info → ngsolve-6.2.2601.dist-info}/LICENSE +0 -0
- {ngsolve-6.2.2504.post44.dev0.dist-info → ngsolve-6.2.2601.dist-info}/WHEEL +0 -0
- {ngsolve-6.2.2504.post44.dev0.dist-info → ngsolve-6.2.2601.dist-info}/top_level.txt +0 -0
netgen/include/mptools.hpp
CHANGED
|
@@ -11,15 +11,77 @@
|
|
|
11
11
|
#include <recursive_pol.hpp>
|
|
12
12
|
|
|
13
13
|
|
|
14
|
-
namespace ngcomp
|
|
15
|
-
{
|
|
16
|
-
class Region;
|
|
17
|
-
}
|
|
18
|
-
|
|
19
14
|
namespace ngsbem
|
|
20
15
|
{
|
|
21
16
|
using namespace ngfem;
|
|
22
17
|
|
|
18
|
+
template<typename T>
|
|
19
|
+
constexpr int VecLength = 1; // Default: Complex has length 1
|
|
20
|
+
|
|
21
|
+
template<int N>
|
|
22
|
+
constexpr int VecLength<Vec<N, Complex>> = N; // Specialization: Vec<N,Complex> has length N
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
constexpr int FMM_SW = 4;
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
// ************************ SIMD - creation (should end up in simd.hpp) *************
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
template <int S, typename T, int SW>
|
|
34
|
+
Vec<S,T> HSum (Vec<S,SIMD<T,SW>> v)
|
|
35
|
+
{
|
|
36
|
+
Vec<S,T> res;
|
|
37
|
+
for (int i = 0; i < S; i++)
|
|
38
|
+
res(i) = HSum(v(i));
|
|
39
|
+
// Iterate<S> ([&](auto i) {
|
|
40
|
+
// res.HTData().template Elem<i.value>() = HSum(v.HTData().template Elem<i.value>());
|
|
41
|
+
// });
|
|
42
|
+
return res;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class NGS_DLL_HEADER PrecomputedSqrts
|
|
47
|
+
{
|
|
48
|
+
public:
|
|
49
|
+
Array<double> sqrt_int;
|
|
50
|
+
// Array<double> inv_sqrt_int;
|
|
51
|
+
Array<double> sqrt_n_np1; // sqrt(n*(n+1))
|
|
52
|
+
Array<double> inv_sqrt_2np1_2np3; // 1/sqrt( (2n+1)*(2n+3) )
|
|
53
|
+
|
|
54
|
+
PrecomputedSqrts();
|
|
55
|
+
};
|
|
56
|
+
|
|
57
|
+
extern NGS_DLL_HEADER PrecomputedSqrts presqrt;
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
class FMM_Parameters
|
|
62
|
+
{
|
|
63
|
+
public:
|
|
64
|
+
int maxdirect = 100;
|
|
65
|
+
int minorder = 20; // order = minorder + 2 kappa r
|
|
66
|
+
};
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
inline std::tuple<double, double, double> SphericalCoordinates(Vec<3> dist){
|
|
72
|
+
double len, theta, phi;
|
|
73
|
+
len = L2Norm(dist);
|
|
74
|
+
if (len < 1e-30)
|
|
75
|
+
theta = 0;
|
|
76
|
+
else
|
|
77
|
+
theta = acos (dist(2) / len);
|
|
78
|
+
if (sqr(dist(0))+sqr(dist(1)) < 1e-30)
|
|
79
|
+
phi = 0;
|
|
80
|
+
else
|
|
81
|
+
phi = atan2(dist(1), dist(0));
|
|
82
|
+
return {len, theta, phi};
|
|
83
|
+
}
|
|
84
|
+
|
|
23
85
|
|
|
24
86
|
template <typename entry_type = Complex>
|
|
25
87
|
class NGS_DLL_HEADER SphericalHarmonics
|
|
@@ -84,23 +146,91 @@ namespace ngsbem
|
|
|
84
146
|
|
|
85
147
|
void Calc (Vec<3> x, FlatVector<Complex> shapes);
|
|
86
148
|
|
|
87
|
-
|
|
149
|
+
|
|
150
|
+
void FlipZ ();
|
|
88
151
|
void RotateZ (double alpha);
|
|
89
|
-
|
|
152
|
+
|
|
153
|
+
template <typename FUNC>
|
|
154
|
+
void RotateZ (double alpha, FUNC func) const
|
|
155
|
+
{
|
|
156
|
+
if (order < 0) return;
|
|
157
|
+
|
|
158
|
+
Vector<Complex> exp_imalpha(order+1);
|
|
159
|
+
Complex exp_ialpha(cos(alpha), sin(alpha));
|
|
160
|
+
Complex prod = 1.0;
|
|
161
|
+
for (int i = 0; i <= order; i++)
|
|
162
|
+
{
|
|
163
|
+
exp_imalpha(i) = prod;
|
|
164
|
+
prod *= exp_ialpha;
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
int ii = 0;
|
|
168
|
+
for (int n = 0; n <= order; n++)
|
|
169
|
+
{
|
|
170
|
+
for (int m = -n; m < 0; m++, ii++)
|
|
171
|
+
func(ii, conj(exp_imalpha(-m)));
|
|
172
|
+
for (int m = 0; m <= n; m++, ii++)
|
|
173
|
+
func(ii, exp_imalpha(m));
|
|
174
|
+
};
|
|
175
|
+
};
|
|
176
|
+
|
|
177
|
+
template <typename FUNC>
|
|
178
|
+
void RotateZFlip (double alpha, bool flip, FUNC func) const
|
|
179
|
+
{
|
|
180
|
+
if (order < 0) return;
|
|
181
|
+
|
|
182
|
+
Vector<Complex> exp_imalpha(order+1);
|
|
183
|
+
Complex exp_ialpha(cos(alpha), sin(alpha));
|
|
184
|
+
Complex prod = 1.0;
|
|
185
|
+
for (int i = 0; i <= order; i++)
|
|
186
|
+
{
|
|
187
|
+
exp_imalpha(i) = prod;
|
|
188
|
+
prod *= exp_ialpha;
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
int ii = 0;
|
|
192
|
+
|
|
193
|
+
auto FlipFactor = [] (int n, int m, bool flip)->double
|
|
194
|
+
{
|
|
195
|
+
if (flip)
|
|
196
|
+
return ((n-m)%2) == 1 ? -1 : 1;
|
|
197
|
+
return 1.0;
|
|
198
|
+
};
|
|
199
|
+
|
|
200
|
+
for (int n = 0; n <= order; n++)
|
|
201
|
+
{
|
|
202
|
+
for (int m = -n; m < 0; m++, ii++)
|
|
203
|
+
func(ii, FlipFactor(n,m,flip)*conj(exp_imalpha(-m)));
|
|
204
|
+
for (int m = 0; m <= n; m++, ii++)
|
|
205
|
+
func(ii, FlipFactor(n,m,flip)*exp_imalpha(m));
|
|
206
|
+
};
|
|
207
|
+
};
|
|
90
208
|
|
|
91
209
|
|
|
210
|
+
|
|
211
|
+
void RotateY (double alpha, bool parallel = false);
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
|
|
92
215
|
static double CalcAmn (int m, int n)
|
|
93
216
|
{
|
|
94
217
|
if (m < 0) m=-m;
|
|
95
218
|
if (n < m) return 0;
|
|
96
|
-
|
|
219
|
+
|
|
220
|
+
if (2*n+1 < presqrt.sqrt_int.Size())
|
|
221
|
+
return presqrt.sqrt_int[n+1+m]*presqrt.sqrt_int[n+1-m] * presqrt.inv_sqrt_2np1_2np3[n];
|
|
222
|
+
else
|
|
223
|
+
return sqrt( (n+1.0+m)*(n+1.0-m) / ( (2*n+1)*(2*n+3) ));
|
|
97
224
|
}
|
|
98
225
|
|
|
99
226
|
static double CalcBmn (int m, int n)
|
|
100
227
|
{
|
|
101
228
|
double sgn = (m >= 0) ? 1 : -1;
|
|
102
|
-
if ( (m
|
|
103
|
-
|
|
229
|
+
if ( (m >= n) || (-m > n) ) return 0;
|
|
230
|
+
if (n <= presqrt.inv_sqrt_2np1_2np3.Size())
|
|
231
|
+
return sgn * presqrt.sqrt_n_np1[n-m-1] * presqrt.inv_sqrt_2np1_2np3[n-1];
|
|
232
|
+
else
|
|
233
|
+
return sgn * sqrt( (n-m-1.0)*(n-m) / ( (2*n-1.0)*(2*n+1)));
|
|
104
234
|
}
|
|
105
235
|
|
|
106
236
|
static double CalcDmn (int m, int n)
|
|
@@ -119,11 +249,11 @@ namespace ngsbem
|
|
|
119
249
|
// https://fortran-lang.discourse.group/t/looking-for-spherical-bessel-and-hankel-functions-of-first-and-second-kind-and-arbitrary-order/2308/2
|
|
120
250
|
NGS_DLL_HEADER
|
|
121
251
|
void besseljs3d (int nterms, double z, double scale,
|
|
122
|
-
|
|
252
|
+
SliceVector<double> fjs, SliceVector<double> fjder = FlatVector<double>(0, nullptr));
|
|
123
253
|
|
|
124
254
|
NGS_DLL_HEADER
|
|
125
255
|
void besseljs3d (int nterms, Complex z, double scale,
|
|
126
|
-
|
|
256
|
+
SliceVector<Complex> fjs, SliceVector<Complex> fjder = FlatVector<Complex>(0, nullptr));
|
|
127
257
|
|
|
128
258
|
|
|
129
259
|
/*
|
|
@@ -142,14 +272,17 @@ namespace ngsbem
|
|
|
142
272
|
FlatVector<double> jp,
|
|
143
273
|
FlatVector<double> yp);
|
|
144
274
|
|
|
145
|
-
|
|
275
|
+
|
|
146
276
|
|
|
147
277
|
template <typename T>
|
|
148
278
|
void SphericalBessel (int n, double rho, double scale, T && values)
|
|
149
279
|
{
|
|
280
|
+
besseljs3d (n, rho, scale, values);
|
|
281
|
+
/*
|
|
150
282
|
Vector<double> j(n+1), jp(n+1);
|
|
151
283
|
besseljs3d (n, rho, scale, j, jp);
|
|
152
284
|
values = j;
|
|
285
|
+
*/
|
|
153
286
|
}
|
|
154
287
|
|
|
155
288
|
|
|
@@ -173,21 +306,6 @@ namespace ngsbem
|
|
|
173
306
|
return;
|
|
174
307
|
}
|
|
175
308
|
Vector j(n+1), y(n+1), jp(n+1), yp(n+1);
|
|
176
|
-
// SBESJY (rho, n, j, y, jp, yp);
|
|
177
|
-
|
|
178
|
-
/*
|
|
179
|
-
values = j + Complex(0,1) * y;
|
|
180
|
-
if (scale != 1.0)
|
|
181
|
-
{
|
|
182
|
-
double prod = 1.0;
|
|
183
|
-
for (int i = 0; i <= n; i++)
|
|
184
|
-
{
|
|
185
|
-
values(i) *= prod;
|
|
186
|
-
prod *= scale;
|
|
187
|
-
}
|
|
188
|
-
}
|
|
189
|
-
*/
|
|
190
|
-
|
|
191
309
|
|
|
192
310
|
// the bessel-evaluation with scale
|
|
193
311
|
besseljs3d (n, rho, 1/scale, j, jp);
|
|
@@ -215,7 +333,7 @@ namespace ngsbem
|
|
|
215
333
|
|
|
216
334
|
|
|
217
335
|
// hn1 = jn+ i*yn
|
|
218
|
-
class
|
|
336
|
+
class Singular
|
|
219
337
|
{
|
|
220
338
|
public:
|
|
221
339
|
template <typename T>
|
|
@@ -241,7 +359,7 @@ namespace ngsbem
|
|
|
241
359
|
|
|
242
360
|
|
|
243
361
|
// jn
|
|
244
|
-
class
|
|
362
|
+
class Regular
|
|
245
363
|
{
|
|
246
364
|
public:
|
|
247
365
|
template <typename T>
|
|
@@ -269,14 +387,14 @@ namespace ngsbem
|
|
|
269
387
|
|
|
270
388
|
|
|
271
389
|
template <typename RADIAL, typename entry_type=Complex>
|
|
272
|
-
class NGS_DLL_HEADER
|
|
390
|
+
class NGS_DLL_HEADER SphericalExpansion
|
|
273
391
|
{
|
|
274
392
|
SphericalHarmonics<entry_type> sh;
|
|
275
393
|
double kappa;
|
|
276
394
|
double rtyp;
|
|
277
395
|
public:
|
|
278
396
|
|
|
279
|
-
|
|
397
|
+
SphericalExpansion (int aorder, double akappa, double artyp)
|
|
280
398
|
: sh(aorder), kappa(akappa), rtyp(artyp) { }
|
|
281
399
|
|
|
282
400
|
|
|
@@ -288,15 +406,15 @@ namespace ngsbem
|
|
|
288
406
|
double RTyp() const { return rtyp; }
|
|
289
407
|
int Order() const { return sh.Order(); }
|
|
290
408
|
|
|
291
|
-
|
|
409
|
+
SphericalExpansion Truncate(int neworder) const
|
|
292
410
|
{
|
|
293
411
|
if (neworder > sh.Order()) neworder=sh.Order();
|
|
294
|
-
|
|
412
|
+
SphericalExpansion nmp(neworder, kappa, rtyp);
|
|
295
413
|
nmp.sh.Coefs() = sh.Coefs().Range(sqr(neworder+1));
|
|
296
414
|
return nmp;
|
|
297
415
|
}
|
|
298
416
|
|
|
299
|
-
|
|
417
|
+
SphericalExpansion & operator+= (const SphericalExpansion & mp2)
|
|
300
418
|
{
|
|
301
419
|
size_t commonsize = min(SH().Coefs().Size(), mp2.SH().Coefs().Size());
|
|
302
420
|
SH().Coefs().Range(commonsize) += mp2.SH().Coefs().Range(commonsize);
|
|
@@ -307,27 +425,24 @@ namespace ngsbem
|
|
|
307
425
|
entry_type EvalDirectionalDerivative (Vec<3> x, Vec<3> d) const;
|
|
308
426
|
|
|
309
427
|
void AddCharge (Vec<3> x, entry_type c);
|
|
310
|
-
void AddDipole (Vec<3> x, Vec<3>
|
|
311
|
-
void
|
|
312
|
-
|
|
313
|
-
/*
|
|
314
|
-
void ChangeScaleTo (double newscale)
|
|
428
|
+
void AddDipole (Vec<3> x, Vec<3> dir, entry_type c);
|
|
429
|
+
void AddChargeDipole (Vec<3> x, entry_type c, Vec<3> dir, entry_type c2)
|
|
315
430
|
{
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
sh.CoefsN(n) *= prod;
|
|
320
|
-
scale = newscale;
|
|
431
|
+
// TODO: add them at once
|
|
432
|
+
AddCharge (x, c);
|
|
433
|
+
AddDipole (x, dir, c2);
|
|
321
434
|
}
|
|
322
|
-
|
|
435
|
+
|
|
436
|
+
void AddPlaneWave (Vec<3> d, entry_type c);
|
|
437
|
+
void AddCurrent (Vec<3> ap, Vec<3> ep, Complex j, int num=100);
|
|
438
|
+
|
|
439
|
+
|
|
323
440
|
void ChangeRTypTo (double new_rtyp)
|
|
324
441
|
{
|
|
325
|
-
// double fac = Scale()/newscale;
|
|
326
442
|
double fac = RADIAL::Scale(kappa, rtyp) / RADIAL::Scale(kappa, new_rtyp);
|
|
327
443
|
double prod = 1;
|
|
328
444
|
for (int n = 0; n <= sh.Order(); n++, prod*= fac)
|
|
329
445
|
sh.CoefsN(n) *= prod;
|
|
330
|
-
// scale = newscale;
|
|
331
446
|
rtyp = new_rtyp;
|
|
332
447
|
}
|
|
333
448
|
|
|
@@ -346,7 +461,7 @@ namespace ngsbem
|
|
|
346
461
|
|
|
347
462
|
|
|
348
463
|
template <typename TARGET>
|
|
349
|
-
void Transform (
|
|
464
|
+
void Transform (SphericalExpansion<TARGET,entry_type> & target, Vec<3> dist) const
|
|
350
465
|
{
|
|
351
466
|
if (target.SH().Order() < 0) return;
|
|
352
467
|
if (SH().Order() < 0)
|
|
@@ -358,22 +473,11 @@ namespace ngsbem
|
|
|
358
473
|
// static Timer t("mptool Transform "+ToString(typeid(RADIAL).name())+ToString(typeid(TARGET).name()));
|
|
359
474
|
// RegionTimer reg(t);
|
|
360
475
|
|
|
361
|
-
|
|
362
|
-
double theta, phi;
|
|
363
|
-
|
|
364
|
-
if (len < 1e-30)
|
|
365
|
-
theta = 0;
|
|
366
|
-
else
|
|
367
|
-
theta = acos (dist(2) / len);
|
|
368
|
-
|
|
369
|
-
if (sqr(dist(0))+sqr(dist(1)) < 1e-30)
|
|
370
|
-
phi = 0;
|
|
371
|
-
else
|
|
372
|
-
phi = atan2(dist(1), dist(0));
|
|
476
|
+
auto [len, theta, phi] = SphericalCoordinates(dist);
|
|
373
477
|
|
|
374
478
|
|
|
375
|
-
//
|
|
376
|
-
|
|
479
|
+
// SphericalExpansion<RADIAL,entry_type> tmp{*this};
|
|
480
|
+
SphericalExpansion<RADIAL,entry_type> tmp(Order(), kappa, rtyp);
|
|
377
481
|
tmp.SH().Coefs() = SH().Coefs();
|
|
378
482
|
|
|
379
483
|
tmp.SH().RotateZ(phi);
|
|
@@ -386,58 +490,213 @@ namespace ngsbem
|
|
|
386
490
|
}
|
|
387
491
|
|
|
388
492
|
template <typename TARGET>
|
|
389
|
-
void TransformAdd (
|
|
493
|
+
void TransformAdd (SphericalExpansion<TARGET,entry_type> & target, Vec<3> dist, bool atomic = false) const
|
|
390
494
|
{
|
|
391
495
|
if (SH().Order() < 0) return;
|
|
392
496
|
if (target.SH().Order() < 0) return;
|
|
393
497
|
|
|
394
|
-
|
|
498
|
+
SphericalExpansion<TARGET,entry_type> tmp{target};
|
|
395
499
|
Transform(tmp, dist);
|
|
396
|
-
|
|
500
|
+
if (!atomic)
|
|
501
|
+
target.SH().Coefs() += tmp.SH().Coefs();
|
|
502
|
+
else
|
|
503
|
+
for (int j = 0; j < target.SH().Coefs().Size(); j++)
|
|
504
|
+
AtomicAdd(target.SH().Coefs()[j], tmp.SH().Coefs()[j]);
|
|
397
505
|
}
|
|
398
506
|
|
|
399
507
|
template <typename TARGET>
|
|
400
|
-
void ShiftZ (double z,
|
|
508
|
+
void ShiftZ (double z, SphericalExpansion<TARGET,entry_type> & target);
|
|
509
|
+
|
|
401
510
|
|
|
511
|
+
template <typename TARGET>
|
|
512
|
+
void In2Out (SphericalExpansion<TARGET,entry_type> & target, double r) const
|
|
513
|
+
{
|
|
514
|
+
Vector<Complex> rad(Order()+1);
|
|
515
|
+
Vector<Complex> radout(target.Order()+1);
|
|
516
|
+
RADIAL::Eval(Order(), kappa, r, RTyp(), rad);
|
|
517
|
+
TARGET::Eval(target.Order(), kappa, r, target.RTyp(), radout);
|
|
518
|
+
target.SH().Coefs() = 0;
|
|
519
|
+
for (int j = 0; j <= std::min(Order(), target.Order()); j++)
|
|
520
|
+
target.SH().CoefsN(j) = rad(j)/radout(j) * SH().CoefsN(j);
|
|
521
|
+
}
|
|
402
522
|
};
|
|
403
523
|
|
|
404
524
|
|
|
405
525
|
|
|
406
526
|
// ***************** parameters ****************
|
|
407
527
|
|
|
528
|
+
/*
|
|
408
529
|
static constexpr int MPOrder (double rho_kappa)
|
|
409
530
|
{
|
|
410
|
-
return max (20, int(2*rho_kappa));
|
|
531
|
+
// return max (20, int(2*rho_kappa));
|
|
532
|
+
return 20+int(2*rho_kappa);
|
|
411
533
|
}
|
|
412
534
|
static constexpr int maxdirect = 100;
|
|
535
|
+
*/
|
|
536
|
+
|
|
537
|
+
|
|
538
|
+
template <typename SCAL, auto S>
|
|
539
|
+
inline auto VecVector2Matrix (FlatVector<Vec<S,SCAL>> vec)
|
|
540
|
+
{
|
|
541
|
+
return FlatMatrixFixWidth<S,SCAL> (vec.Size(), vec.Data()->Data());
|
|
542
|
+
}
|
|
543
|
+
|
|
544
|
+
inline auto VecVector2Matrix (FlatVector<Complex> vec)
|
|
545
|
+
{
|
|
546
|
+
return FlatMatrixFixWidth<1,Complex> (vec.Size(), vec.Data());
|
|
547
|
+
}
|
|
413
548
|
|
|
414
549
|
|
|
415
550
|
template <typename entry_type=Complex>
|
|
416
|
-
class
|
|
551
|
+
class SingularMLExpansion
|
|
417
552
|
{
|
|
553
|
+
using simd_entry_type = decltype(MakeSimd(declval<std::array<entry_type,FMM_SW>>()));
|
|
418
554
|
static Array<size_t> nodes_on_level;
|
|
419
555
|
|
|
556
|
+
struct RecordingSS
|
|
557
|
+
{
|
|
558
|
+
const SphericalExpansion<Singular,entry_type> * mp_source;
|
|
559
|
+
SphericalExpansion<Singular,entry_type> * mp_target;
|
|
560
|
+
Vec<3> dist;
|
|
561
|
+
double len, theta, phi;
|
|
562
|
+
bool flipz;
|
|
563
|
+
public:
|
|
564
|
+
RecordingSS() = default;
|
|
565
|
+
RecordingSS (const SphericalExpansion<Singular,entry_type> * amp_source,
|
|
566
|
+
SphericalExpansion<Singular,entry_type> * amp_target,
|
|
567
|
+
Vec<3> adist)
|
|
568
|
+
: mp_source(amp_source), mp_target(amp_target), dist(adist)
|
|
569
|
+
{
|
|
570
|
+
std::tie(len, theta, phi) = SphericalCoordinates(adist);
|
|
571
|
+
// flipz = false;
|
|
572
|
+
flipz = theta > M_PI/2;
|
|
573
|
+
if (flipz) theta = M_PI-theta;
|
|
574
|
+
}
|
|
575
|
+
};
|
|
576
|
+
|
|
577
|
+
|
|
578
|
+
static void ProcessBatchSS(FlatArray<RecordingSS*> batch, double len, double theta) {
|
|
579
|
+
constexpr int vec_length = VecLength<entry_type>;
|
|
580
|
+
int batch_size = batch.Size();
|
|
581
|
+
int N = batch_size * vec_length;
|
|
582
|
+
// *testout << "Processing batch of size " << batch.Size() << ", with N = " << N << ", vec_length = " << vec_length << ", Type: " << typeid(entry_type).name() << ", len = " << len << ", theta = " << theta << endl;
|
|
583
|
+
|
|
584
|
+
if (N <= 1 || batch_size <= 1) {
|
|
585
|
+
for (auto* rec : batch) {
|
|
586
|
+
rec->mp_source->TransformAdd(*rec->mp_target, rec->dist, true);
|
|
587
|
+
}
|
|
588
|
+
}
|
|
589
|
+
else if (N <= 3) {
|
|
590
|
+
ProcessVectorizedBatchSS<3, vec_length>(batch, len, theta);
|
|
591
|
+
}
|
|
592
|
+
else if (N <= 4) {
|
|
593
|
+
ProcessVectorizedBatchSS<4, vec_length>(batch, len, theta);
|
|
594
|
+
}
|
|
595
|
+
else if (N <= 6) {
|
|
596
|
+
ProcessVectorizedBatchSS<6, vec_length>(batch, len, theta);
|
|
597
|
+
}
|
|
598
|
+
else if (N <= 12) {
|
|
599
|
+
ProcessVectorizedBatchSS<12, vec_length>(batch, len, theta);
|
|
600
|
+
}
|
|
601
|
+
else if (N <= 24) {
|
|
602
|
+
ProcessVectorizedBatchSS<24, vec_length>(batch, len, theta);
|
|
603
|
+
}
|
|
604
|
+
else if (N <= 48) {
|
|
605
|
+
ProcessVectorizedBatchSS<48, vec_length>(batch, len, theta);
|
|
606
|
+
}
|
|
607
|
+
else if (N <= 96) {
|
|
608
|
+
ProcessVectorizedBatchSS<96, vec_length>(batch, len, theta);
|
|
609
|
+
}
|
|
610
|
+
else if (N <= 192) {
|
|
611
|
+
ProcessVectorizedBatchSS<192, vec_length>(batch, len, theta);
|
|
612
|
+
}
|
|
613
|
+
else {
|
|
614
|
+
// Split large batches
|
|
615
|
+
ProcessBatchSS(batch.Range(0, 192 / vec_length), len, theta);
|
|
616
|
+
ProcessBatchSS(batch.Range(192 / vec_length, batch_size), len, theta);
|
|
617
|
+
}
|
|
618
|
+
}
|
|
619
|
+
|
|
620
|
+
template<int N, int vec_length>
|
|
621
|
+
static void ProcessVectorizedBatchSS(FlatArray<RecordingSS*> batch, double len, double theta) {
|
|
622
|
+
|
|
623
|
+
// *testout << "Processing vectorized S->S batch of size " << batch.Size() << ", with N = " << N << ", vec_length = " << vec_length << ", len = " << len << ", theta = " << theta << endl;
|
|
624
|
+
double kappa = batch[0]->mp_source->Kappa();
|
|
625
|
+
int so = batch[0]->mp_source->Order();
|
|
626
|
+
int to = batch[0]->mp_target->Order();
|
|
627
|
+
SphericalExpansion<Singular, Vec<N,Complex>> vec_source(so, kappa, batch[0]->mp_source->RTyp());
|
|
628
|
+
SphericalExpansion<Singular, Vec<N,Complex>> vec_target(to, kappa, batch[0]->mp_target->RTyp());
|
|
629
|
+
|
|
630
|
+
// Copy multipoles into vectorized multipole
|
|
631
|
+
for (int i = 0; i < batch.Size(); i++)
|
|
632
|
+
{
|
|
633
|
+
auto source_i = VecVector2Matrix (batch[i]->mp_source->SH().Coefs());
|
|
634
|
+
auto source_mati = VecVector2Matrix (vec_source.SH().Coefs()).Cols(i*vec_length, (i+1)*vec_length);
|
|
635
|
+
batch[i]->mp_source->SH().RotateZFlip(batch[i]->phi, batch[i]->flipz,
|
|
636
|
+
[source_i, source_mati] (size_t ii, Complex factor)
|
|
637
|
+
{
|
|
638
|
+
source_mati.Row(ii) = factor * source_i.Row(ii);
|
|
639
|
+
});
|
|
640
|
+
}
|
|
641
|
+
|
|
642
|
+
vec_source.SH().RotateY(theta, vec_source.SH().Order() >= 100);
|
|
643
|
+
vec_source.ShiftZ(-len, vec_target);
|
|
644
|
+
vec_target.SH().RotateY(-theta, vec_target.SH().Order() >= 100);
|
|
645
|
+
|
|
646
|
+
// Copy vectorized multipole into individual multipoles
|
|
647
|
+
for (int i = 0; i < batch.Size(); i++)
|
|
648
|
+
{
|
|
649
|
+
auto source_mati = VecVector2Matrix (vec_target.SH().Coefs()).Cols(i*vec_length, (i+1)*vec_length);
|
|
650
|
+
auto target_mati = VecVector2Matrix (batch[i]->mp_target->SH().Coefs());
|
|
651
|
+
batch[i]->mp_target->SH().RotateZFlip(-batch[i]->phi, batch[i]->flipz,
|
|
652
|
+
[source_mati, target_mati] (size_t ii, Complex factor)
|
|
653
|
+
{
|
|
654
|
+
AtomicAdd (target_mati.Row(ii), factor * source_mati.Row(ii));
|
|
655
|
+
});
|
|
656
|
+
}
|
|
657
|
+
}
|
|
658
|
+
|
|
420
659
|
struct Node
|
|
421
660
|
{
|
|
422
661
|
Vec<3> center;
|
|
423
662
|
double r;
|
|
424
663
|
int level;
|
|
425
664
|
std::array<unique_ptr<Node>,8> childs;
|
|
426
|
-
|
|
665
|
+
SphericalExpansion<Singular, entry_type> mp;
|
|
427
666
|
|
|
428
667
|
Array<tuple<Vec<3>, entry_type>> charges;
|
|
429
668
|
Array<tuple<Vec<3>, Vec<3>, entry_type>> dipoles;
|
|
669
|
+
Array<tuple<Vec<3>, entry_type, Vec<3>, entry_type>> chargedipoles;
|
|
430
670
|
Array<tuple<Vec<3>, Vec<3>, Complex,int>> currents;
|
|
671
|
+
|
|
672
|
+
using simd_entry_type = decltype(MakeSimd(declval<std::array<entry_type,FMM_SW>>()));
|
|
673
|
+
Array<tuple<Vec<3,SIMD<double,FMM_SW>>, simd_entry_type>> simd_charges;
|
|
674
|
+
Array<tuple<Vec<3,SIMD<double,FMM_SW>>, Vec<3,SIMD<double,FMM_SW>>, simd_entry_type>> simd_dipoles;
|
|
675
|
+
Array<tuple<Vec<3,SIMD<double,FMM_SW>>, simd_entry_type,
|
|
676
|
+
Vec<3,SIMD<double,FMM_SW>>, simd_entry_type>> simd_chargedipoles;
|
|
677
|
+
|
|
431
678
|
int total_sources;
|
|
679
|
+
const FMM_Parameters & fmm_params;
|
|
680
|
+
std::mutex node_mutex;
|
|
681
|
+
atomic<bool> have_childs{false};
|
|
432
682
|
|
|
433
|
-
Node (Vec<3> acenter, double ar, int alevel, double akappa)
|
|
434
|
-
|
|
683
|
+
Node (Vec<3> acenter, double ar, int alevel, double akappa, const FMM_Parameters & afmm_params)
|
|
684
|
+
// : center(acenter), r(ar), level(alevel), mp(MPOrder(ar*akappa), akappa, ar), fmm_params(afmm_params)
|
|
685
|
+
: center(acenter), r(ar), level(alevel), mp(afmm_params.minorder+2*ar*akappa, akappa, ar), fmm_params(afmm_params)
|
|
435
686
|
{
|
|
436
687
|
if (level < nodes_on_level.Size())
|
|
437
688
|
nodes_on_level[level]++;
|
|
438
689
|
}
|
|
439
690
|
|
|
440
|
-
|
|
691
|
+
int GetChildNum (Vec<3> x) const
|
|
692
|
+
{
|
|
693
|
+
int childnum = 0;
|
|
694
|
+
if (x(0) > center(0)) childnum += 1;
|
|
695
|
+
if (x(1) > center(1)) childnum += 2;
|
|
696
|
+
if (x(2) > center(2)) childnum += 4;
|
|
697
|
+
return childnum;
|
|
698
|
+
}
|
|
699
|
+
|
|
441
700
|
void CreateChilds()
|
|
442
701
|
{
|
|
443
702
|
if (childs[0]) throw Exception("have already childs");
|
|
@@ -447,20 +706,47 @@ namespace ngsbem
|
|
|
447
706
|
cc(0) += (i&1) ? r/2 : -r/2;
|
|
448
707
|
cc(1) += (i&2) ? r/2 : -r/2;
|
|
449
708
|
cc(2) += (i&4) ? r/2 : -r/2;
|
|
450
|
-
childs[i] = make_unique<Node> (cc, r/2, level+1, mp.Kappa());
|
|
709
|
+
childs[i] = make_unique<Node> (cc, r/2, level+1, mp.Kappa(), fmm_params);
|
|
451
710
|
}
|
|
711
|
+
have_childs = true;
|
|
452
712
|
}
|
|
453
713
|
|
|
454
714
|
|
|
715
|
+
void SendSourcesToChilds()
|
|
716
|
+
{
|
|
717
|
+
CreateChilds();
|
|
718
|
+
|
|
719
|
+
for (auto [x,c] : charges)
|
|
720
|
+
AddCharge (x,c);
|
|
721
|
+
for (auto [x,d,c] : dipoles)
|
|
722
|
+
AddDipole (x,d,c);
|
|
723
|
+
for (auto [x,c,d,c2] : chargedipoles)
|
|
724
|
+
AddChargeDipole (x,c,d,c2);
|
|
725
|
+
for (auto [sp,ep,j,num] : currents)
|
|
726
|
+
AddCurrent (sp,ep,j,num);
|
|
727
|
+
|
|
728
|
+
charges.DeleteAll();
|
|
729
|
+
dipoles.DeleteAll();
|
|
730
|
+
chargedipoles.DeleteAll();
|
|
731
|
+
currents.DeleteAll();
|
|
732
|
+
}
|
|
733
|
+
|
|
734
|
+
|
|
455
735
|
void AddCharge (Vec<3> x, entry_type c)
|
|
456
736
|
{
|
|
457
|
-
if (
|
|
737
|
+
if (have_childs) // quick check without locking
|
|
458
738
|
{
|
|
459
739
|
// directly send to childs:
|
|
460
|
-
int childnum
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
740
|
+
int childnum = GetChildNum(x);
|
|
741
|
+
childs[childnum] -> AddCharge(x, c);
|
|
742
|
+
return;
|
|
743
|
+
}
|
|
744
|
+
|
|
745
|
+
lock_guard<mutex> guard(node_mutex);
|
|
746
|
+
|
|
747
|
+
if (have_childs) // test again after locking
|
|
748
|
+
{
|
|
749
|
+
int childnum = GetChildNum(x);
|
|
464
750
|
childs[childnum] -> AddCharge(x, c);
|
|
465
751
|
return;
|
|
466
752
|
}
|
|
@@ -469,57 +755,78 @@ namespace ngsbem
|
|
|
469
755
|
|
|
470
756
|
// if (r*mp.Kappa() < 1e-8) return;
|
|
471
757
|
if (level > 20) return;
|
|
472
|
-
if (charges.Size() < maxdirect && r*mp.Kappa() <
|
|
758
|
+
if (charges.Size() < fmm_params.maxdirect && r*mp.Kappa() < 5)
|
|
473
759
|
return;
|
|
474
|
-
|
|
475
|
-
CreateChilds();
|
|
476
|
-
|
|
477
|
-
for (auto [x,c] : charges)
|
|
478
|
-
AddCharge (x,c);
|
|
479
|
-
for (auto [x,d,c] : dipoles)
|
|
480
|
-
AddDipole (x,d,c);
|
|
481
|
-
for (auto [sp,ep,j,num] : currents)
|
|
482
|
-
AddCurrent (sp,ep,j,num);
|
|
483
760
|
|
|
484
|
-
|
|
485
|
-
dipoles.SetSize0();
|
|
486
|
-
currents.SetSize0();
|
|
761
|
+
SendSourcesToChilds();
|
|
487
762
|
}
|
|
488
763
|
|
|
489
764
|
|
|
490
765
|
void AddDipole (Vec<3> x, Vec<3> d, entry_type c)
|
|
491
766
|
{
|
|
492
|
-
if (
|
|
767
|
+
if (have_childs)
|
|
493
768
|
{
|
|
494
769
|
// directly send to childs:
|
|
495
|
-
|
|
496
|
-
int childnum = 0;
|
|
497
|
-
if (x(0) > center(0)) childnum += 1;
|
|
498
|
-
if (x(1) > center(1)) childnum += 2;
|
|
499
|
-
if (x(2) > center(2)) childnum += 4;
|
|
770
|
+
int childnum = GetChildNum(x);
|
|
500
771
|
childs[childnum] -> AddDipole(x, d, c);
|
|
501
772
|
return;
|
|
502
773
|
}
|
|
503
774
|
|
|
504
|
-
|
|
775
|
+
lock_guard<mutex> guard(node_mutex);
|
|
505
776
|
|
|
506
|
-
if (
|
|
777
|
+
if (have_childs)
|
|
778
|
+
{
|
|
779
|
+
// directly send to childs:
|
|
780
|
+
int childnum = GetChildNum(x);
|
|
781
|
+
childs[childnum] -> AddDipole(x, d, c);
|
|
782
|
+
return;
|
|
783
|
+
}
|
|
784
|
+
|
|
785
|
+
dipoles.Append (tuple{x,d,c});
|
|
786
|
+
|
|
787
|
+
if (level > 20) return;
|
|
788
|
+
if (dipoles.Size() < fmm_params.maxdirect)
|
|
507
789
|
return;
|
|
790
|
+
|
|
791
|
+
SendSourcesToChilds();
|
|
792
|
+
}
|
|
793
|
+
|
|
794
|
+
|
|
795
|
+
void AddChargeDipole (Vec<3> x, entry_type c, Vec<3> dir, entry_type c2)
|
|
796
|
+
{
|
|
797
|
+
if (have_childs)
|
|
798
|
+
{
|
|
799
|
+
// directly send to childs:
|
|
800
|
+
int childnum = GetChildNum(x);
|
|
801
|
+
childs[childnum] -> AddChargeDipole(x, c, dir, c2);
|
|
802
|
+
return;
|
|
803
|
+
}
|
|
804
|
+
|
|
805
|
+
lock_guard<mutex> guard(node_mutex);
|
|
806
|
+
|
|
807
|
+
if (have_childs)
|
|
808
|
+
{
|
|
809
|
+
// directly send to childs:
|
|
810
|
+
int childnum = GetChildNum(x);
|
|
811
|
+
childs[childnum] -> AddChargeDipole(x, c, dir, c2);
|
|
812
|
+
return;
|
|
813
|
+
}
|
|
508
814
|
|
|
509
|
-
|
|
815
|
+
chargedipoles.Append (tuple{x,c,dir,c2});
|
|
510
816
|
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
for (auto [x,d,c] : dipoles)
|
|
514
|
-
AddDipole (x,d,c);
|
|
515
|
-
for (auto [sp,ep,j,num] : currents)
|
|
516
|
-
AddCurrent (sp,ep,j,num);
|
|
817
|
+
if (chargedipoles.Size() < fmm_params.maxdirect || r < 1e-8)
|
|
818
|
+
return;
|
|
517
819
|
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
820
|
+
SendSourcesToChilds();
|
|
821
|
+
|
|
822
|
+
/*
|
|
823
|
+
AddCharge (x, c);
|
|
824
|
+
AddDipole (x, dir, c2);
|
|
825
|
+
*/
|
|
521
826
|
}
|
|
522
827
|
|
|
828
|
+
|
|
829
|
+
// not parallel yet
|
|
523
830
|
void AddCurrent (Vec<3> sp, Vec<3> ep, Complex j, int num)
|
|
524
831
|
{
|
|
525
832
|
if (childs[0])
|
|
@@ -528,7 +835,7 @@ namespace ngsbem
|
|
|
528
835
|
Array<double> split;
|
|
529
836
|
split.Append(0);
|
|
530
837
|
for (int i = 0; i < 3; i++)
|
|
531
|
-
if (sp(i) < center(i) != ep(i) < center(i))
|
|
838
|
+
if ((sp(i) < center(i)) != (ep(i) < center(i)))
|
|
532
839
|
split += (center(i)-sp(i)) / (ep(i)-sp(i)); // segment cuts i-th coordinate plane
|
|
533
840
|
split.Append(1);
|
|
534
841
|
BubbleSort(split);
|
|
@@ -549,9 +856,15 @@ namespace ngsbem
|
|
|
549
856
|
}
|
|
550
857
|
return;
|
|
551
858
|
}
|
|
552
|
-
|
|
859
|
+
|
|
553
860
|
currents.Append (tuple{sp,ep,j,num});
|
|
554
861
|
|
|
862
|
+
// if (currents.Size() < maxdirect || r < 1e-8)
|
|
863
|
+
if (currents.Size() < 4 || r < 1e-8)
|
|
864
|
+
return;
|
|
865
|
+
|
|
866
|
+
SendSourcesToChilds();
|
|
867
|
+
/*
|
|
555
868
|
// if (currents.Size() < maxdirect || r < 1e-8)
|
|
556
869
|
if (currents.Size() < 4 || r < 1e-8)
|
|
557
870
|
return;
|
|
@@ -568,6 +881,7 @@ namespace ngsbem
|
|
|
568
881
|
charges.SetSize0();
|
|
569
882
|
dipoles.SetSize0();
|
|
570
883
|
currents.SetSize0();
|
|
884
|
+
*/
|
|
571
885
|
}
|
|
572
886
|
|
|
573
887
|
|
|
@@ -583,27 +897,141 @@ namespace ngsbem
|
|
|
583
897
|
return sum;
|
|
584
898
|
}
|
|
585
899
|
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
900
|
+
if (simd_charges.Size())
|
|
901
|
+
{
|
|
902
|
+
// static Timer t("mptool singmp, evaluate, simd charges"); RegionTimer r(t);
|
|
903
|
+
// t.AddFlops (charges.Size());
|
|
904
|
+
|
|
905
|
+
simd_entry_type vsum{0.0};
|
|
906
|
+
if (mp.Kappa() < 1e-12)
|
|
907
|
+
{
|
|
908
|
+
for (auto [x,c] : simd_charges)
|
|
909
|
+
{
|
|
910
|
+
auto rho = L2Norm(p-x);
|
|
911
|
+
auto kernel = 1/(4*M_PI)/rho;
|
|
912
|
+
kernel = If(rho > 0.0, kernel, SIMD<double,FMM_SW>(0.0));
|
|
913
|
+
vsum += kernel * c;
|
|
914
|
+
|
|
915
|
+
/*
|
|
916
|
+
auto rho2 = L2Norm2(p-x);
|
|
917
|
+
auto kernel = (1/(4*M_PI)) * rsqrt(rho2);
|
|
918
|
+
kernel = If(rho2 > 0.0, kernel, SIMD<double,FMM_SW>(0.0));
|
|
919
|
+
vsum += kernel * c;
|
|
920
|
+
*/
|
|
921
|
+
}
|
|
922
|
+
}
|
|
923
|
+
else if (mp.Kappa() < 1e-8)
|
|
924
|
+
for (auto [x,c] : simd_charges)
|
|
925
|
+
{
|
|
926
|
+
auto rho = L2Norm(p-x);
|
|
927
|
+
auto kernel = (1/(4*M_PI))*SIMD<Complex,FMM_SW> (1,rho*mp.Kappa()) / rho;
|
|
928
|
+
kernel = If(rho > 0.0, kernel, SIMD<Complex,FMM_SW>(0.0));
|
|
929
|
+
vsum += kernel * c;
|
|
930
|
+
}
|
|
931
|
+
else
|
|
932
|
+
for (auto [x,c] : simd_charges)
|
|
933
|
+
{
|
|
934
|
+
auto rho = L2Norm(p-x);
|
|
935
|
+
auto [si,co] = sincos(rho*mp.Kappa());
|
|
936
|
+
auto kernel = (1/(4*M_PI))*SIMD<Complex,FMM_SW>(co,si) / rho;
|
|
937
|
+
kernel = If(rho > 0.0, kernel, SIMD<Complex,FMM_SW>(0.0));
|
|
938
|
+
vsum += kernel * c;
|
|
939
|
+
}
|
|
940
|
+
|
|
941
|
+
sum += HSum(vsum);
|
|
942
|
+
}
|
|
943
|
+
else
|
|
944
|
+
{
|
|
945
|
+
if (mp.Kappa() < 1e-8)
|
|
946
|
+
{
|
|
947
|
+
for (auto [x,c] : charges)
|
|
948
|
+
if (double rho = L2Norm(p-x); rho > 0)
|
|
949
|
+
sum += (1/(4*M_PI))*Complex(1,rho*mp.Kappa()) / rho * c;
|
|
950
|
+
}
|
|
951
|
+
else
|
|
952
|
+
for (auto [x,c] : charges)
|
|
953
|
+
if (double rho = L2Norm(p-x); rho > 0)
|
|
954
|
+
sum += (1/(4*M_PI))*exp(Complex(0,rho*mp.Kappa())) / rho * c;
|
|
955
|
+
}
|
|
956
|
+
|
|
957
|
+
if (simd_dipoles.Size())
|
|
958
|
+
{
|
|
959
|
+
// static Timer t("mptool singmp, evaluate, simd dipoles"); RegionTimer r(t);
|
|
960
|
+
|
|
961
|
+
simd_entry_type vsum{0.0};
|
|
962
|
+
for (auto [x,d,c] : simd_dipoles)
|
|
963
|
+
{
|
|
964
|
+
auto rho = L2Norm(p-x);
|
|
965
|
+
auto drhodp = (1.0/rho) * (p-x);
|
|
966
|
+
auto [si,co] = sincos(rho*mp.Kappa());
|
|
967
|
+
auto dGdrho = (1/(4*M_PI))*SIMD<Complex,FMM_SW>(co,si) *
|
|
968
|
+
(-1.0/(rho*rho) + SIMD<Complex,FMM_SW>(0, mp.Kappa())/rho);
|
|
969
|
+
auto kernel = dGdrho * InnerProduct(drhodp, d);
|
|
970
|
+
kernel = If(rho > 0.0, kernel, SIMD<Complex,FMM_SW>(0.0));
|
|
971
|
+
vsum += kernel * c;
|
|
972
|
+
}
|
|
973
|
+
sum += HSum(vsum);
|
|
974
|
+
}
|
|
975
|
+
else
|
|
976
|
+
{
|
|
977
|
+
for (auto [x,d,c] : dipoles)
|
|
978
|
+
if (double rho = L2Norm(p-x); rho > 0)
|
|
979
|
+
{
|
|
980
|
+
Vec<3> drhodp = 1.0/rho * (p-x);
|
|
981
|
+
Complex dGdrho = (1/(4*M_PI))*exp(Complex(0,rho*mp.Kappa())) *
|
|
982
|
+
(Complex(0, mp.Kappa())/rho - 1.0/sqr(rho));
|
|
983
|
+
sum += dGdrho * InnerProduct(drhodp, d) * c;
|
|
984
|
+
}
|
|
985
|
+
}
|
|
986
|
+
|
|
987
|
+
|
|
988
|
+
|
|
989
|
+
if (simd_chargedipoles.Size())
|
|
990
|
+
{
|
|
991
|
+
// static Timer t("mptool singmp, evaluate, simd chargedipoles"); RegionTimer r(t);
|
|
992
|
+
// t.AddFlops (simd_chargedipoles.Size()*FMM_SW);
|
|
993
|
+
|
|
994
|
+
simd_entry_type vsum{0.0};
|
|
995
|
+
for (auto [x,c,d,c2] : simd_chargedipoles)
|
|
996
|
+
{
|
|
997
|
+
auto rho = L2Norm(p-x);
|
|
998
|
+
auto rhokappa = rho*mp.Kappa();
|
|
999
|
+
auto invrho = If(rho>0.0, 1.0/rho, SIMD<double,FMM_SW>(0.0));
|
|
1000
|
+
auto [si,co] = sincos(rhokappa);
|
|
1001
|
+
|
|
1002
|
+
auto kernelc = (1/(4*M_PI))*invrho*SIMD<Complex,FMM_SW>(co,si);
|
|
1003
|
+
vsum += kernelc * c;
|
|
1004
|
+
|
|
1005
|
+
auto kernel =
|
|
1006
|
+
invrho*invrho * InnerProduct(p-x, d) *
|
|
1007
|
+
kernelc * SIMD<Complex,FMM_SW>(-1.0, rhokappa);
|
|
1008
|
+
|
|
1009
|
+
vsum += kernel * c2;
|
|
1010
|
+
}
|
|
1011
|
+
sum += HSum(vsum);
|
|
1012
|
+
}
|
|
1013
|
+
else
|
|
1014
|
+
{
|
|
1015
|
+
// static Timer t("mptool singmp, evaluate, chargedipoles"); RegionTimer r(t);
|
|
1016
|
+
// t.AddFlops (chargedipoles.Size());
|
|
1017
|
+
|
|
1018
|
+
for (auto [x,c,d,c2] : chargedipoles)
|
|
1019
|
+
if (double rho = L2Norm(p-x); rho > 0)
|
|
1020
|
+
{
|
|
1021
|
+
sum += (1/(4*M_PI))*exp(Complex(0,rho*mp.Kappa())) / rho * c;
|
|
1022
|
+
|
|
1023
|
+
Vec<3> drhodp = 1.0/rho * (p-x);
|
|
1024
|
+
Complex dGdrho = (1/(4*M_PI))*exp(Complex(0,rho*mp.Kappa())) *
|
|
1025
|
+
(Complex(0, mp.Kappa())/rho - 1.0/sqr(rho));
|
|
1026
|
+
|
|
1027
|
+
sum += dGdrho * InnerProduct(drhodp, d) * c2;
|
|
1028
|
+
}
|
|
1029
|
+
}
|
|
1030
|
+
|
|
1031
|
+
|
|
1032
|
+
|
|
1033
|
+
|
|
597
1034
|
|
|
598
|
-
for (auto [x,d,c] : dipoles)
|
|
599
|
-
if (double rho = L2Norm(p-x); rho > 0)
|
|
600
|
-
{
|
|
601
|
-
Vec<3> drhodp = 1.0/rho * (p-x);
|
|
602
|
-
Complex dGdrho = (1/(4*M_PI))*exp(Complex(0,rho*mp.Kappa())) *
|
|
603
|
-
(Complex(0, mp.Kappa())/rho - 1.0/sqr(rho));
|
|
604
|
-
sum += dGdrho * InnerProduct(drhodp, d) * c;
|
|
605
|
-
}
|
|
606
|
-
|
|
607
1035
|
for (auto [sp,ep,j,num] : currents)
|
|
608
1036
|
{
|
|
609
1037
|
// should use explizit formula instead ...
|
|
@@ -640,7 +1068,16 @@ namespace ngsbem
|
|
|
640
1068
|
}
|
|
641
1069
|
|
|
642
1070
|
if (dipoles.Size())
|
|
643
|
-
|
|
1071
|
+
{
|
|
1072
|
+
static int cnt = 0;
|
|
1073
|
+
cnt++;
|
|
1074
|
+
if (cnt < 3)
|
|
1075
|
+
cout << "we know what we do - evaluateDeriv not implemented for dipoles in SingularMLExpansion" << endl;
|
|
1076
|
+
// return sum;
|
|
1077
|
+
// throw Exception("EvaluateDeriv not implemented for dipoles in SingularMLExpansion");
|
|
1078
|
+
}
|
|
1079
|
+
if (chargedipoles.Size())
|
|
1080
|
+
throw Exception("EvaluateDeriv not implemented for dipoles in SingularMLExpansion");
|
|
644
1081
|
|
|
645
1082
|
for (auto [x,c] : charges)
|
|
646
1083
|
if (double rho = L2Norm(p-x); rho > 0)
|
|
@@ -655,7 +1092,7 @@ namespace ngsbem
|
|
|
655
1092
|
|
|
656
1093
|
void CalcTotalSources()
|
|
657
1094
|
{
|
|
658
|
-
total_sources = charges.Size() + dipoles.Size();
|
|
1095
|
+
total_sources = charges.Size() + dipoles.Size() + chargedipoles.Size();
|
|
659
1096
|
for (auto & child : childs)
|
|
660
1097
|
if (child)
|
|
661
1098
|
{
|
|
@@ -664,46 +1101,111 @@ namespace ngsbem
|
|
|
664
1101
|
}
|
|
665
1102
|
}
|
|
666
1103
|
|
|
667
|
-
void CalcMP()
|
|
1104
|
+
void CalcMP(Array<RecordingSS> * recording, Array<Node*> * nodes_to_process)
|
|
668
1105
|
{
|
|
669
|
-
mp.SH().Coefs() = 0.0;
|
|
1106
|
+
// mp.SH().Coefs() = 0.0;
|
|
670
1107
|
if (childs[0])
|
|
671
1108
|
{
|
|
672
|
-
if (total_sources < 1000)
|
|
1109
|
+
if (total_sources < 1000 || recording)
|
|
673
1110
|
for (auto & child : childs)
|
|
674
|
-
child->CalcMP();
|
|
1111
|
+
child->CalcMP(recording, nodes_to_process);
|
|
675
1112
|
else
|
|
676
1113
|
ParallelFor (8, [&] (int nr)
|
|
677
1114
|
{
|
|
678
|
-
childs[nr] -> CalcMP();
|
|
1115
|
+
childs[nr] -> CalcMP(recording, nodes_to_process);
|
|
679
1116
|
});
|
|
680
1117
|
|
|
681
1118
|
|
|
682
|
-
for (auto & child : childs)
|
|
683
|
-
child->mp.
|
|
1119
|
+
for (auto & child : childs){
|
|
1120
|
+
if (recording && child->mp.SH().Coefs().Size() > 0)
|
|
1121
|
+
*recording += RecordingSS(&child->mp, &mp, center-child->center);
|
|
1122
|
+
else
|
|
1123
|
+
child->mp.TransformAdd(mp, center-child->center);
|
|
1124
|
+
}
|
|
684
1125
|
}
|
|
685
1126
|
else
|
|
686
1127
|
{
|
|
687
|
-
if (charges.Size()+dipoles.Size()+currents.Size() == 0)
|
|
1128
|
+
if (charges.Size()+dipoles.Size()+chargedipoles.Size()+currents.Size() == 0)
|
|
688
1129
|
{
|
|
689
|
-
mp =
|
|
1130
|
+
mp = SphericalExpansion<Singular,entry_type> (-1, mp.Kappa(), 1.);
|
|
690
1131
|
return;
|
|
691
1132
|
}
|
|
692
1133
|
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
for (
|
|
697
|
-
|
|
1134
|
+
// make simd charges, comment this block for testing ...
|
|
1135
|
+
simd_charges.SetSize( (charges.Size()+FMM_SW-1)/FMM_SW);
|
|
1136
|
+
size_t i = 0, ii = 0;
|
|
1137
|
+
for ( ; i+FMM_SW <= charges.Size(); i+=FMM_SW, ii++)
|
|
1138
|
+
{
|
|
1139
|
+
std::array<tuple<Vec<3>,entry_type>, FMM_SW> ca;
|
|
1140
|
+
for (int j = 0; j < FMM_SW; j++) ca[j] = charges[i+j];
|
|
1141
|
+
simd_charges[ii] = MakeSimd(ca);
|
|
1142
|
+
}
|
|
1143
|
+
if (i < charges.Size())
|
|
1144
|
+
{
|
|
1145
|
+
std::array<tuple<Vec<3>,entry_type>, FMM_SW> ca;
|
|
1146
|
+
int j = 0;
|
|
1147
|
+
for ( ; i+j < charges.Size(); j++) ca[j] = charges[i+j];
|
|
1148
|
+
for ( ; j < FMM_SW; j++) ca[j] = tuple( get<0>(ca[0]), entry_type{0.0} );
|
|
1149
|
+
simd_charges[ii] = MakeSimd(ca);
|
|
1150
|
+
}
|
|
1151
|
+
|
|
1152
|
+
simd_dipoles.SetSize( (dipoles.Size()+FMM_SW-1)/FMM_SW);
|
|
1153
|
+
i = 0, ii = 0;
|
|
1154
|
+
for ( ; i+FMM_SW <= dipoles.Size(); i+=FMM_SW, ii++)
|
|
1155
|
+
{
|
|
1156
|
+
std::array<tuple<Vec<3>,Vec<3>,entry_type>, FMM_SW> di;
|
|
1157
|
+
for (int j = 0; j < FMM_SW; j++) di[j] = dipoles[i+j];
|
|
1158
|
+
simd_dipoles[ii] = MakeSimd(di);
|
|
1159
|
+
}
|
|
1160
|
+
if (i < dipoles.Size())
|
|
1161
|
+
{
|
|
1162
|
+
std::array<tuple<Vec<3>,Vec<3>,entry_type>, FMM_SW> di;
|
|
1163
|
+
int j = 0;
|
|
1164
|
+
for ( ; i+j < dipoles.Size(); j++) di[j] = dipoles[i+j];
|
|
1165
|
+
for ( ; j < FMM_SW; j++) di[j] = tuple( get<0>(di[0]), get<1>(di[0]), entry_type{0.0} );
|
|
1166
|
+
simd_dipoles[ii] = MakeSimd(di);
|
|
1167
|
+
}
|
|
698
1168
|
|
|
699
|
-
|
|
700
|
-
|
|
1169
|
+
|
|
1170
|
+
simd_chargedipoles.SetSize( (chargedipoles.Size()+FMM_SW-1)/FMM_SW);
|
|
1171
|
+
i = 0, ii = 0;
|
|
1172
|
+
for ( ; i+FMM_SW <= chargedipoles.Size(); i+=FMM_SW, ii++)
|
|
1173
|
+
{
|
|
1174
|
+
std::array<tuple<Vec<3>,entry_type,Vec<3>,entry_type>, FMM_SW> di;
|
|
1175
|
+
for (int j = 0; j < FMM_SW; j++) di[j] = chargedipoles[i+j];
|
|
1176
|
+
simd_chargedipoles[ii] = MakeSimd(di);
|
|
1177
|
+
}
|
|
1178
|
+
if (i < chargedipoles.Size())
|
|
1179
|
+
{
|
|
1180
|
+
std::array<tuple<Vec<3>,entry_type,Vec<3>,entry_type>, FMM_SW> di;
|
|
1181
|
+
int j = 0;
|
|
1182
|
+
for ( ; i+j < chargedipoles.Size(); j++) di[j] = chargedipoles[i+j];
|
|
1183
|
+
for ( ; j < FMM_SW; j++) di[j] = tuple( get<0>(di[0]), entry_type{0.0}, get<2>(di[0]), entry_type{0.0} );
|
|
1184
|
+
simd_chargedipoles[ii] = MakeSimd(di);
|
|
1185
|
+
}
|
|
1186
|
+
|
|
1187
|
+
|
|
1188
|
+
if (nodes_to_process)
|
|
1189
|
+
*nodes_to_process += this;
|
|
1190
|
+
else {
|
|
1191
|
+
for (auto [x,c] : charges)
|
|
1192
|
+
mp.AddCharge (x-center,c);
|
|
1193
|
+
|
|
1194
|
+
for (auto [x,d,c] : dipoles)
|
|
1195
|
+
mp.AddDipole (x-center, d, c);
|
|
1196
|
+
|
|
1197
|
+
for (auto [x,c,d,c2] : chargedipoles)
|
|
1198
|
+
mp.AddChargeDipole (x-center, c, d, c2);
|
|
1199
|
+
|
|
1200
|
+
for (auto [sp,ep,j,num] : currents)
|
|
1201
|
+
mp.AddCurrent (sp-center, ep-center, j, num);
|
|
1202
|
+
}
|
|
701
1203
|
}
|
|
702
1204
|
}
|
|
703
1205
|
|
|
704
1206
|
entry_type EvaluateMP(Vec<3> p) const
|
|
705
1207
|
{
|
|
706
|
-
if (charges.Size() || dipoles.Size())
|
|
1208
|
+
if (charges.Size() || dipoles.Size() || chargedipoles.Size())
|
|
707
1209
|
return Evaluate(p);
|
|
708
1210
|
|
|
709
1211
|
if (L2Norm(p-center) > 3*r)
|
|
@@ -723,7 +1225,7 @@ namespace ngsbem
|
|
|
723
1225
|
// cout << "EvaluateMPDeriv Singular, p = " << p << ", d = " << d << ", r = " << r << ", center = " << center << endl;
|
|
724
1226
|
// cout << "Norm: " << L2Norm(p-center) << " > " << 3*r << endl;
|
|
725
1227
|
// cout << "charges.Size() = " << charges.Size() << ", dipoles.Size() = " << dipoles.Size() << endl;
|
|
726
|
-
if (charges.Size() || dipoles.Size() || !childs[0])
|
|
1228
|
+
if (charges.Size() || dipoles.Size() || chargedipoles.Size() || !childs[0])
|
|
727
1229
|
return EvaluateDeriv(p, d);
|
|
728
1230
|
|
|
729
1231
|
if (L2Norm(p-center) > 3*r)
|
|
@@ -746,6 +1248,8 @@ namespace ngsbem
|
|
|
746
1248
|
ost << "xi = " << x << ", ci = " << c << endl;
|
|
747
1249
|
for (auto [x,d,c] : dipoles)
|
|
748
1250
|
ost << "xi = " << x << ", di = " << d << ", ci = " << c << endl;
|
|
1251
|
+
for (auto [x,c,d,c2] : chargedipoles)
|
|
1252
|
+
ost << "xi = " << x << ", c = " << c << ", di = " << d << ", ci = " << c2 << endl;
|
|
749
1253
|
|
|
750
1254
|
for (int i = 0; i < 8; i++)
|
|
751
1255
|
if (childs[i]) childs[i] -> Print (ost, i);
|
|
@@ -768,14 +1272,23 @@ namespace ngsbem
|
|
|
768
1272
|
num += ch->NumCoefficients();
|
|
769
1273
|
return num;
|
|
770
1274
|
}
|
|
1275
|
+
|
|
1276
|
+
void TraverseTree (const std::function<void(Node&)> & func)
|
|
1277
|
+
{
|
|
1278
|
+
func(*this);
|
|
1279
|
+
for (auto & child : childs)
|
|
1280
|
+
if (child)
|
|
1281
|
+
child->TraverseTree(func);
|
|
1282
|
+
}
|
|
771
1283
|
};
|
|
772
1284
|
|
|
773
|
-
|
|
1285
|
+
FMM_Parameters fmm_params;
|
|
1286
|
+
Node root;
|
|
774
1287
|
bool havemp = false;
|
|
775
1288
|
|
|
776
1289
|
public:
|
|
777
|
-
|
|
778
|
-
: root(center, r, 0, kappa)
|
|
1290
|
+
SingularMLExpansion (Vec<3> center, double r, double kappa, FMM_Parameters _params = FMM_Parameters())
|
|
1291
|
+
: fmm_params(_params), root(center, r, 0, kappa, fmm_params)
|
|
779
1292
|
{
|
|
780
1293
|
nodes_on_level = 0;
|
|
781
1294
|
nodes_on_level[0] = 1;
|
|
@@ -793,6 +1306,11 @@ namespace ngsbem
|
|
|
793
1306
|
root.AddDipole(x, d, c);
|
|
794
1307
|
}
|
|
795
1308
|
|
|
1309
|
+
void AddChargeDipole(Vec<3> x, entry_type c, Vec<3> dir, entry_type c2)
|
|
1310
|
+
{
|
|
1311
|
+
root.AddChargeDipole(x, c, dir, c2);
|
|
1312
|
+
}
|
|
1313
|
+
|
|
796
1314
|
void AddCurrent (Vec<3> sp, Vec<3> ep, Complex j, int num)
|
|
797
1315
|
{
|
|
798
1316
|
if constexpr (!std::is_same<entry_type, Vec<3,Complex>>())
|
|
@@ -836,6 +1354,10 @@ namespace ngsbem
|
|
|
836
1354
|
void CalcMP()
|
|
837
1355
|
{
|
|
838
1356
|
static Timer t("mptool compute singular MLMP"); RegionTimer rg(t);
|
|
1357
|
+
static Timer ts2mp("mptool compute singular MLMP - source2mp");
|
|
1358
|
+
static Timer tS2S("mptool compute singular MLMP - S->S");
|
|
1359
|
+
static Timer trec("mptool comput singular recording");
|
|
1360
|
+
static Timer tsort("mptool comput singular sort");
|
|
839
1361
|
|
|
840
1362
|
/*
|
|
841
1363
|
int maxlevel = 0;
|
|
@@ -847,8 +1369,94 @@ namespace ngsbem
|
|
|
847
1369
|
*/
|
|
848
1370
|
|
|
849
1371
|
root.CalcTotalSources();
|
|
850
|
-
|
|
1372
|
+
|
|
1373
|
+
if constexpr (false)
|
|
1374
|
+
// direct evaluation of S->S
|
|
1375
|
+
root.CalcMP(nullptr, nullptr);
|
|
1376
|
+
else
|
|
1377
|
+
{
|
|
1378
|
+
|
|
1379
|
+
Array<RecordingSS> recording;
|
|
1380
|
+
Array<Node*> nodes_to_process;
|
|
1381
|
+
|
|
1382
|
+
{
|
|
1383
|
+
RegionTimer reg(trec);
|
|
1384
|
+
root.CalcMP(&recording, &nodes_to_process);
|
|
1385
|
+
}
|
|
851
1386
|
|
|
1387
|
+
{
|
|
1388
|
+
RegionTimer rs2mp(ts2mp);
|
|
1389
|
+
ParallelFor(nodes_to_process.Size(), [&](int i)
|
|
1390
|
+
{
|
|
1391
|
+
auto node = nodes_to_process[i];
|
|
1392
|
+
for (auto [x,c]: node->charges)
|
|
1393
|
+
node->mp.AddCharge(x-node->center, c);
|
|
1394
|
+
for (auto [x,d,c]: node->dipoles)
|
|
1395
|
+
node->mp.AddDipole(x-node->center, d, c);
|
|
1396
|
+
for (auto [x,c,d,c2]: node->chargedipoles)
|
|
1397
|
+
node->mp.AddChargeDipole(x-node->center, c, d, c2);
|
|
1398
|
+
for (auto [sp,ep,j,num]: node->currents)
|
|
1399
|
+
node->mp.AddCurrent(sp-node->center, ep-node->center, j, num);
|
|
1400
|
+
}, TasksPerThread(4));
|
|
1401
|
+
}
|
|
1402
|
+
|
|
1403
|
+
{
|
|
1404
|
+
RegionTimer reg(tsort);
|
|
1405
|
+
QuickSort (recording, [] (auto & a, auto & b)
|
|
1406
|
+
{
|
|
1407
|
+
if (a.len < (1-1e-8) * b.len) return true;
|
|
1408
|
+
if (a.len > (1+1e-8) * b.len) return false;
|
|
1409
|
+
return a.theta < b.theta;
|
|
1410
|
+
});
|
|
1411
|
+
}
|
|
1412
|
+
|
|
1413
|
+
double current_len = -1e100;
|
|
1414
|
+
double current_theta = -1e100;
|
|
1415
|
+
Array<RecordingSS*> current_batch;
|
|
1416
|
+
Array<Array<RecordingSS*>> batch_group;
|
|
1417
|
+
Array<double> group_lengths;
|
|
1418
|
+
Array<double> group_thetas;
|
|
1419
|
+
for (auto & record : recording)
|
|
1420
|
+
{
|
|
1421
|
+
bool len_changed = fabs(record.len - current_len) > 1e-8;
|
|
1422
|
+
bool theta_changed = fabs(record.theta - current_theta) > 1e-8;
|
|
1423
|
+
if ((len_changed || theta_changed) && current_batch.Size() > 0) {
|
|
1424
|
+
batch_group.Append(current_batch);
|
|
1425
|
+
group_lengths.Append(current_len);
|
|
1426
|
+
group_thetas.Append(current_theta);
|
|
1427
|
+
current_batch.SetSize(0);
|
|
1428
|
+
}
|
|
1429
|
+
|
|
1430
|
+
current_len = record.len;
|
|
1431
|
+
current_theta = record.theta;
|
|
1432
|
+
current_batch.Append(&record);
|
|
1433
|
+
}
|
|
1434
|
+
|
|
1435
|
+
if (current_batch.Size() > 0) {
|
|
1436
|
+
batch_group.Append(current_batch);
|
|
1437
|
+
group_lengths.Append(current_len);
|
|
1438
|
+
group_thetas.Append(current_theta);
|
|
1439
|
+
}
|
|
1440
|
+
|
|
1441
|
+
{
|
|
1442
|
+
RegionTimer rS2S(tS2S);
|
|
1443
|
+
// ParallelFor(batch_group.Size(), [&](int i) {
|
|
1444
|
+
for (int i = 0; i < batch_group.Size(); i++){
|
|
1445
|
+
// *testout << "Processing batch " << i << " of size " << batch_group[i].Size() << ", with len = " << group_lengths[i] << ", theta = " << group_thetas[i] << endl;
|
|
1446
|
+
int chunk_size = 24;
|
|
1447
|
+
if (batch_group[i].Size() < chunk_size)
|
|
1448
|
+
ProcessBatchSS(batch_group[i], group_lengths[i], group_thetas[i]);
|
|
1449
|
+
else
|
|
1450
|
+
ParallelForRange(IntRange(batch_group[i].Size()), [&](IntRange range) {
|
|
1451
|
+
auto sub_batch = batch_group[i].Range(range.First(), range.Next());
|
|
1452
|
+
ProcessBatchSS(sub_batch, group_lengths[i], group_thetas[i]);
|
|
1453
|
+
}, TasksPerThread(4));
|
|
1454
|
+
}
|
|
1455
|
+
}
|
|
1456
|
+
}
|
|
1457
|
+
|
|
1458
|
+
// cout << "have singular:" << endl;
|
|
1459
|
+
// PrintStatistics (cout);
|
|
852
1460
|
havemp = true;
|
|
853
1461
|
}
|
|
854
1462
|
|
|
@@ -860,23 +1468,198 @@ namespace ngsbem
|
|
|
860
1468
|
return root.Evaluate(p);
|
|
861
1469
|
}
|
|
862
1470
|
|
|
1471
|
+
|
|
1472
|
+
void PrintStatistics (ostream & ost)
|
|
1473
|
+
{
|
|
1474
|
+
int levels = 0;
|
|
1475
|
+
int cnt = 0;
|
|
1476
|
+
root.TraverseTree( [&](Node & node) {
|
|
1477
|
+
levels = max(levels, node.level);
|
|
1478
|
+
cnt++;
|
|
1479
|
+
});
|
|
1480
|
+
ost << "levels: " << levels << endl;
|
|
1481
|
+
ost << "nodes: " << cnt << endl;
|
|
1482
|
+
|
|
1483
|
+
Array<int> num_on_level(levels+1);
|
|
1484
|
+
Array<int> order_on_level(levels+1);
|
|
1485
|
+
Array<size_t> coefs_on_level(levels+1);
|
|
1486
|
+
num_on_level = 0;
|
|
1487
|
+
order_on_level = 0;
|
|
1488
|
+
root.TraverseTree( [&](Node & node) {
|
|
1489
|
+
num_on_level[node.level]++;
|
|
1490
|
+
order_on_level[node.level] = max(order_on_level[node.level],node.mp.Order());
|
|
1491
|
+
coefs_on_level[node.level] += node.mp.SH().Coefs().Size();
|
|
1492
|
+
});
|
|
1493
|
+
|
|
1494
|
+
cout << "num on level" << endl;
|
|
1495
|
+
for (int i = 0; i < num_on_level.Size(); i++)
|
|
1496
|
+
cout << i << ": " << num_on_level[i] << ", order = " << order_on_level[i] << ", coefs " << coefs_on_level[i] << endl;
|
|
1497
|
+
|
|
1498
|
+
size_t totcoefs = 0;
|
|
1499
|
+
for (auto n : coefs_on_level)
|
|
1500
|
+
totcoefs += n;
|
|
1501
|
+
cout << "total mem in coefs: " << sizeof(entry_type)*totcoefs / sqr(1024) << " MB" << endl;
|
|
1502
|
+
}
|
|
1503
|
+
|
|
1504
|
+
|
|
1505
|
+
|
|
863
1506
|
template <typename entry_type2>
|
|
864
|
-
friend class
|
|
1507
|
+
friend class RegularMLExpansion;
|
|
865
1508
|
};
|
|
866
1509
|
|
|
867
1510
|
|
|
868
1511
|
template <typename entry_type>
|
|
869
|
-
inline ostream & operator<< (ostream & ost, const
|
|
1512
|
+
inline ostream & operator<< (ostream & ost, const SingularMLExpansion<entry_type> & mlmp)
|
|
870
1513
|
{
|
|
871
1514
|
mlmp.Print(ost);
|
|
872
1515
|
return ost;
|
|
873
1516
|
}
|
|
874
1517
|
|
|
875
1518
|
|
|
1519
|
+
// *********************************** Regular multilevel Expansion
|
|
1520
|
+
|
|
1521
|
+
|
|
876
1522
|
template <typename elem_type=Complex>
|
|
877
|
-
class NGS_DLL_HEADER
|
|
1523
|
+
class NGS_DLL_HEADER RegularMLExpansion
|
|
878
1524
|
{
|
|
879
1525
|
static Array<size_t> nodes_on_level;
|
|
1526
|
+
|
|
1527
|
+
|
|
1528
|
+
struct RecordingRS
|
|
1529
|
+
{
|
|
1530
|
+
const SphericalExpansion<Singular,elem_type> * mpS;
|
|
1531
|
+
SphericalExpansion<Regular,elem_type> * mpR;
|
|
1532
|
+
Vec<3> dist;
|
|
1533
|
+
double len, theta, phi;
|
|
1534
|
+
public:
|
|
1535
|
+
RecordingRS() = default;
|
|
1536
|
+
RecordingRS (const SphericalExpansion<Singular,elem_type> * ampS,
|
|
1537
|
+
SphericalExpansion<Regular,elem_type> * ampR,
|
|
1538
|
+
Vec<3> adist)
|
|
1539
|
+
: mpS(ampS), mpR(ampR), dist(adist)
|
|
1540
|
+
{
|
|
1541
|
+
std::tie(len, theta, phi) = SphericalCoordinates(dist);
|
|
1542
|
+
}
|
|
1543
|
+
};
|
|
1544
|
+
|
|
1545
|
+
static void ProcessBatchRS(FlatArray<RecordingRS*> batch, double len, double theta) {
|
|
1546
|
+
// static Timer t("ProcessBatchRS"); RegionTimer reg(t, batch.Size());
|
|
1547
|
+
constexpr int vec_length = VecLength<elem_type>;
|
|
1548
|
+
int batch_size = batch.Size();
|
|
1549
|
+
int N = batch_size * vec_length;
|
|
1550
|
+
// *testout << "Processing batch of size " << batch.Size() << ", with N = " << N << ", vec_length = " << vec_length << ", Type: " << typeid(elem_type).name() << ", len = " << len << ", theta = " << theta << endl;
|
|
1551
|
+
|
|
1552
|
+
if (N <= 1 || batch_size <= 1) {
|
|
1553
|
+
for (auto* rec : batch) {
|
|
1554
|
+
rec->mpS->TransformAdd(*rec->mpR, rec->dist);
|
|
1555
|
+
}
|
|
1556
|
+
}
|
|
1557
|
+
else if (N <= 3) {
|
|
1558
|
+
ProcessVectorizedBatchRS<3, vec_length>(batch, len, theta);
|
|
1559
|
+
}
|
|
1560
|
+
else if (N <= 4) {
|
|
1561
|
+
ProcessVectorizedBatchRS<4, vec_length>(batch, len, theta);
|
|
1562
|
+
}
|
|
1563
|
+
else if (N <= 6) {
|
|
1564
|
+
ProcessVectorizedBatchRS<6, vec_length>(batch, len, theta);
|
|
1565
|
+
}
|
|
1566
|
+
else if (N <= 12) {
|
|
1567
|
+
ProcessVectorizedBatchRS<12, vec_length>(batch, len, theta);
|
|
1568
|
+
}
|
|
1569
|
+
else if (N <= 24) {
|
|
1570
|
+
ProcessVectorizedBatchRS<24, vec_length>(batch, len, theta);
|
|
1571
|
+
}
|
|
1572
|
+
else if (N <= 48) {
|
|
1573
|
+
ProcessVectorizedBatchRS<48, vec_length>(batch, len, theta);
|
|
1574
|
+
}
|
|
1575
|
+
else if (N <= 96) {
|
|
1576
|
+
ProcessVectorizedBatchRS<96, vec_length>(batch, len, theta);
|
|
1577
|
+
}
|
|
1578
|
+
else if (N <= 192) {
|
|
1579
|
+
ProcessVectorizedBatchRS<192, vec_length>(batch, len, theta);
|
|
1580
|
+
}
|
|
1581
|
+
else {
|
|
1582
|
+
// Split large batches
|
|
1583
|
+
/*
|
|
1584
|
+
ProcessBatch(batch.Range(0, 192 / vec_length), len, theta);
|
|
1585
|
+
ProcessBatch(batch.Range(192 / vec_length, batch_size), len, theta);
|
|
1586
|
+
*/
|
|
1587
|
+
|
|
1588
|
+
/*
|
|
1589
|
+
ParallelFor (2, [&] (int i)
|
|
1590
|
+
{
|
|
1591
|
+
if (i == 0)
|
|
1592
|
+
ProcessBatchRS(batch.Range(0, 192 / vec_length), len, theta);
|
|
1593
|
+
else
|
|
1594
|
+
ProcessBatchRS(batch.Range(192 / vec_length, batch_size), len, theta);
|
|
1595
|
+
}, 2);
|
|
1596
|
+
*/
|
|
1597
|
+
|
|
1598
|
+
|
|
1599
|
+
size_t chunksize = 192/vec_length;
|
|
1600
|
+
size_t num = (batch.Size()+chunksize-1) / chunksize;
|
|
1601
|
+
ParallelFor (num, [&](int i)
|
|
1602
|
+
{
|
|
1603
|
+
ProcessBatchRS(batch.Range(i*chunksize, min((i+1)*chunksize, batch.Size())), len, theta);
|
|
1604
|
+
}, num);
|
|
1605
|
+
|
|
1606
|
+
}
|
|
1607
|
+
}
|
|
1608
|
+
|
|
1609
|
+
|
|
1610
|
+
template<int N, int vec_length>
|
|
1611
|
+
static void ProcessVectorizedBatchRS(FlatArray<RecordingRS*> batch, double len, double theta) {
|
|
1612
|
+
|
|
1613
|
+
// static Timer t("ProcessVectorizedBatch, N = "+ToString(N) + ", vec_len = " + ToString(vec_length));
|
|
1614
|
+
// RegionTimer reg(t, batch[0]->mpS->SH().Order());
|
|
1615
|
+
// static Timer ttobatch("mptools - copy to batch 2");
|
|
1616
|
+
// static Timer tfrombatch("mptools - copy from batch 2");
|
|
1617
|
+
|
|
1618
|
+
// *testout << "Processing vectorized batch of size " << batch.Size() << ", with N = " << N << ", vec_length = " << vec_length << ", len = " << len << ", theta = " << theta << endl;
|
|
1619
|
+
SphericalExpansion<Singular, Vec<N,Complex>> vec_source(batch[0]->mpS->Order(), batch[0]->mpS->Kappa(), batch[0]->mpS->RTyp());
|
|
1620
|
+
// SphericalExpansion<Singular, elem_type> tmp_source{*batch[0]->mpS};
|
|
1621
|
+
SphericalExpansion<Regular, elem_type> tmp_target{*batch[0]->mpR};
|
|
1622
|
+
SphericalExpansion<Regular, Vec<N,Complex>> vec_target(batch[0]->mpR->Order(), batch[0]->mpR->Kappa(), batch[0]->mpR->RTyp());
|
|
1623
|
+
|
|
1624
|
+
// Copy multipoles into vectorized multipole
|
|
1625
|
+
// ttobatch.Start();
|
|
1626
|
+
for (int i = 0; i < batch.Size(); i++)
|
|
1627
|
+
{
|
|
1628
|
+
auto source_i = VecVector2Matrix (batch[i]->mpS->SH().Coefs());
|
|
1629
|
+
auto source_mati = VecVector2Matrix (vec_source.SH().Coefs()).Cols(i*vec_length, (i+1)*vec_length);
|
|
1630
|
+
batch[i]->mpS->SH().RotateZ(batch[i]->phi,
|
|
1631
|
+
[source_i, source_mati] (size_t ii, Complex factor)
|
|
1632
|
+
{
|
|
1633
|
+
source_mati.Row(ii) = factor * source_i.Row(ii);
|
|
1634
|
+
});
|
|
1635
|
+
}
|
|
1636
|
+
|
|
1637
|
+
// ttobatch.Stop();
|
|
1638
|
+
|
|
1639
|
+
vec_source.SH().RotateY(theta);
|
|
1640
|
+
vec_source.ShiftZ(-len, vec_target);
|
|
1641
|
+
vec_target.SH().RotateY(-theta);
|
|
1642
|
+
|
|
1643
|
+
// Copy vectorized multipole into individual multipoles
|
|
1644
|
+
// tfrombatch.Start();
|
|
1645
|
+
for (int i = 0; i < batch.Size(); i++) {
|
|
1646
|
+
// auto source_i = VecVector2Matrix (tmp_target.SH().Coefs());
|
|
1647
|
+
auto source_mati = VecVector2Matrix (vec_target.SH().Coefs()).Cols(i*vec_length, (i+1)*vec_length);
|
|
1648
|
+
auto targeti = VecVector2Matrix(batch[i]->mpR->SH().Coefs());
|
|
1649
|
+
|
|
1650
|
+
tmp_target.SH().RotateZ(-batch[i]->phi,
|
|
1651
|
+
[source_mati, targeti] (size_t ii, Complex factor)
|
|
1652
|
+
{
|
|
1653
|
+
// source_i.Row(ii) = factor * source_mati.Row(ii);
|
|
1654
|
+
AtomicAdd (VectorView(targeti.Row(ii)), factor * source_mati.Row(ii));
|
|
1655
|
+
});
|
|
1656
|
+
// for (int j = 0; j < tmp_target.SH().Coefs().Size(); j++)
|
|
1657
|
+
// AtomicAdd(batch[i]->mpR->SH().Coefs()[j], tmp_target.SH().Coefs()[j]);
|
|
1658
|
+
}
|
|
1659
|
+
// tfrombatch.Stop();
|
|
1660
|
+
|
|
1661
|
+
}
|
|
1662
|
+
|
|
880
1663
|
|
|
881
1664
|
struct Node
|
|
882
1665
|
{
|
|
@@ -884,22 +1667,35 @@ namespace ngsbem
|
|
|
884
1667
|
double r;
|
|
885
1668
|
int level;
|
|
886
1669
|
std::array<unique_ptr<Node>,8> childs;
|
|
887
|
-
|
|
1670
|
+
SphericalExpansion<Regular,elem_type> mp;
|
|
888
1671
|
Array<Vec<3>> targets;
|
|
1672
|
+
Array<tuple<Vec<3>,double>> vol_targets;
|
|
889
1673
|
int total_targets;
|
|
1674
|
+
std::mutex node_mutex;
|
|
1675
|
+
atomic<bool> have_childs{false};
|
|
890
1676
|
|
|
891
|
-
Array<const typename
|
|
1677
|
+
Array<const typename SingularMLExpansion<elem_type>::Node*> singnodes;
|
|
1678
|
+
const FMM_Parameters & params;
|
|
892
1679
|
|
|
893
|
-
|
|
894
|
-
|
|
1680
|
+
|
|
1681
|
+
Node (Vec<3> acenter, double ar, int alevel, double kappa, const FMM_Parameters & _params)
|
|
1682
|
+
: center(acenter), r(ar), level(alevel),
|
|
1683
|
+
// mp(MPOrder(ar*kappa), kappa, ar) // 1.0/min(1.0, 0.25*r*kappa))
|
|
1684
|
+
mp(-1, kappa, ar), params(_params)
|
|
895
1685
|
// : center(acenter), r(ar), level(alevel), mp(MPOrder(ar*kappa), kappa, 1.0)
|
|
896
1686
|
{
|
|
897
1687
|
if (level < nodes_on_level.Size())
|
|
898
1688
|
nodes_on_level[level]++;
|
|
899
1689
|
}
|
|
900
1690
|
|
|
901
|
-
|
|
902
|
-
|
|
1691
|
+
void Allocate()
|
|
1692
|
+
{
|
|
1693
|
+
// mp = SphericalExpansion<Regular,elem_type>(MPOrder(r*mp.Kappa()), mp.Kappa(), r);
|
|
1694
|
+
mp = SphericalExpansion<Regular,elem_type>(params.minorder+2*r*mp.Kappa(), mp.Kappa(), r);
|
|
1695
|
+
}
|
|
1696
|
+
|
|
1697
|
+
|
|
1698
|
+
void CreateChilds(bool allocate = false)
|
|
903
1699
|
{
|
|
904
1700
|
if (childs[0]) throw Exception("have already childs");
|
|
905
1701
|
// create children nodes:
|
|
@@ -909,15 +1705,19 @@ namespace ngsbem
|
|
|
909
1705
|
cc(0) += (i&1) ? r/2 : -r/2;
|
|
910
1706
|
cc(1) += (i&2) ? r/2 : -r/2;
|
|
911
1707
|
cc(2) += (i&4) ? r/2 : -r/2;
|
|
912
|
-
childs[i] = make_unique<Node> (cc, r/2, level+1, mp.Kappa());
|
|
1708
|
+
childs[i] = make_unique<Node> (cc, r/2, level+1, mp.Kappa(), params);
|
|
1709
|
+
if (allocate)
|
|
1710
|
+
childs[i] -> Allocate();
|
|
913
1711
|
}
|
|
1712
|
+
have_childs = true;
|
|
914
1713
|
}
|
|
915
|
-
|
|
916
|
-
void AddSingularNode (const typename
|
|
1714
|
+
|
|
1715
|
+
void AddSingularNode (const typename SingularMLExpansion<elem_type>::Node & singnode, bool allow_refine,
|
|
1716
|
+
Array<RecordingRS> * recording)
|
|
917
1717
|
{
|
|
918
1718
|
if (mp.SH().Order() < 0) return;
|
|
919
1719
|
if (singnode.mp.SH().Order() < 0) return;
|
|
920
|
-
if (L2Norm(singnode.mp.SH().Coefs()) == 0) return;
|
|
1720
|
+
// if (L2Norm(singnode.mp.SH().Coefs()) == 0) return;
|
|
921
1721
|
if (level > 20)
|
|
922
1722
|
{
|
|
923
1723
|
singnodes.Append(&singnode);
|
|
@@ -936,12 +1736,15 @@ namespace ngsbem
|
|
|
936
1736
|
singnode.childs[0]->mp.Order() < singnode.mp.Order())
|
|
937
1737
|
{
|
|
938
1738
|
for (auto & child : singnode.childs)
|
|
939
|
-
AddSingularNode (*child, allow_refine);
|
|
1739
|
+
AddSingularNode (*child, allow_refine, recording);
|
|
940
1740
|
return;
|
|
941
1741
|
}
|
|
942
1742
|
|
|
943
1743
|
// static Timer t("mptool transform Helmholtz-criterion"); RegionTimer r(t);
|
|
944
|
-
|
|
1744
|
+
if (recording)
|
|
1745
|
+
*recording += RecordingRS(&singnode.mp, &mp, dist);
|
|
1746
|
+
else
|
|
1747
|
+
singnode.mp.TransformAdd(mp, dist);
|
|
945
1748
|
return;
|
|
946
1749
|
}
|
|
947
1750
|
|
|
@@ -957,70 +1760,70 @@ namespace ngsbem
|
|
|
957
1760
|
if (allow_refine)
|
|
958
1761
|
{
|
|
959
1762
|
if (!childs[0])
|
|
960
|
-
CreateChilds();
|
|
1763
|
+
CreateChilds(true);
|
|
961
1764
|
|
|
962
1765
|
for (auto & ch : childs)
|
|
963
|
-
ch -> AddSingularNode (singnode, allow_refine);
|
|
1766
|
+
ch -> AddSingularNode (singnode, allow_refine, recording);
|
|
964
1767
|
}
|
|
965
1768
|
else
|
|
966
1769
|
{
|
|
967
|
-
if (total_targets < 1000)
|
|
1770
|
+
if (total_targets < 1000 || recording)
|
|
968
1771
|
{
|
|
969
1772
|
for (auto & ch : childs)
|
|
970
1773
|
if (ch)
|
|
971
|
-
ch -> AddSingularNode (singnode, allow_refine);
|
|
1774
|
+
ch -> AddSingularNode (singnode, allow_refine, recording);
|
|
972
1775
|
}
|
|
973
1776
|
else
|
|
974
1777
|
ParallelFor (8, [&] (int nr)
|
|
975
1778
|
{
|
|
976
1779
|
if (childs[nr])
|
|
977
|
-
childs[nr] -> AddSingularNode (singnode, allow_refine);
|
|
1780
|
+
childs[nr] -> AddSingularNode (singnode, allow_refine, recording);
|
|
978
1781
|
});
|
|
979
1782
|
|
|
980
|
-
if (targets.Size())
|
|
1783
|
+
if (targets.Size()+vol_targets.Size())
|
|
981
1784
|
singnodes.Append(&singnode);
|
|
982
1785
|
}
|
|
983
1786
|
}
|
|
984
1787
|
else
|
|
985
1788
|
{
|
|
986
1789
|
for (auto & childsing : singnode.childs)
|
|
987
|
-
AddSingularNode (*childsing, allow_refine);
|
|
1790
|
+
AddSingularNode (*childsing, allow_refine, recording);
|
|
988
1791
|
}
|
|
989
1792
|
}
|
|
990
1793
|
|
|
991
1794
|
void LocalizeExpansion(bool allow_refine)
|
|
992
1795
|
{
|
|
993
1796
|
if (allow_refine)
|
|
994
|
-
if (mp.Order() >
|
|
995
|
-
CreateChilds();
|
|
1797
|
+
if (mp.Order() > 30 && !childs[0])
|
|
1798
|
+
CreateChilds(allow_refine);
|
|
996
1799
|
|
|
997
1800
|
if (childs[0])
|
|
998
1801
|
{
|
|
999
|
-
|
|
1802
|
+
if (total_targets < 1000)
|
|
1000
1803
|
{
|
|
1001
|
-
|
|
1002
|
-
|
|
1003
|
-
|
|
1804
|
+
for (int nr = 0; nr < 8; nr++)
|
|
1805
|
+
{
|
|
1806
|
+
if (L2Norm(mp.SH().Coefs()) > 0)
|
|
1807
|
+
mp.TransformAdd (childs[nr]->mp, childs[nr]->center-center);
|
|
1808
|
+
childs[nr]->LocalizeExpansion(allow_refine);
|
|
1809
|
+
}
|
|
1004
1810
|
}
|
|
1005
|
-
|
|
1811
|
+
else
|
|
1812
|
+
ParallelFor(8, [&] (int nr)
|
|
1813
|
+
{
|
|
1814
|
+
if (L2Norm(mp.SH().Coefs()) > 0)
|
|
1815
|
+
mp.TransformAdd (childs[nr]->mp, childs[nr]->center-center);
|
|
1816
|
+
childs[nr]->LocalizeExpansion(allow_refine);
|
|
1817
|
+
});
|
|
1818
|
+
mp = SphericalExpansion<Regular,elem_type>(-1, mp.Kappa(), 1.);
|
|
1006
1819
|
//mp.SH().Coefs()=0.0;
|
|
1007
1820
|
}
|
|
1008
1821
|
}
|
|
1009
1822
|
|
|
1010
1823
|
elem_type Evaluate (Vec<3> p) const
|
|
1011
1824
|
{
|
|
1012
|
-
// *testout << "eval p = " << p << ", level = " << level << ", center = " << center << ", r = " << r << endl;
|
|
1013
1825
|
elem_type sum{0.0};
|
|
1014
|
-
|
|
1015
|
-
if (childs[0])
|
|
1016
|
-
{
|
|
1017
|
-
int childnum = 0;
|
|
1018
|
-
if (p(0) > center(0)) childnum += 1;
|
|
1019
|
-
if (p(1) > center(1)) childnum += 2;
|
|
1020
|
-
if (p(2) > center(2)) childnum += 4;
|
|
1021
|
-
sum = childs[childnum]->Evaluate(p);
|
|
1022
|
-
}
|
|
1023
|
-
*/
|
|
1826
|
+
|
|
1024
1827
|
int childnum = 0;
|
|
1025
1828
|
if (p(0) > center(0)) childnum += 1;
|
|
1026
1829
|
if (p(1) > center(1)) childnum += 2;
|
|
@@ -1028,13 +1831,16 @@ namespace ngsbem
|
|
|
1028
1831
|
if (childs[childnum])
|
|
1029
1832
|
sum = childs[childnum]->Evaluate(p);
|
|
1030
1833
|
else
|
|
1031
|
-
|
|
1032
|
-
|
|
1033
|
-
|
|
1034
|
-
|
|
1035
|
-
for (auto sn : singnodes)
|
|
1036
|
-
sum += sn->EvaluateMP(p);
|
|
1834
|
+
{
|
|
1835
|
+
// static Timer t("mptool regmp, evaluate reg"); RegionTimer r(t);
|
|
1836
|
+
sum = mp.Eval(p-center);
|
|
1837
|
+
}
|
|
1037
1838
|
|
|
1839
|
+
{
|
|
1840
|
+
// static Timer t("mptool regmp, evaluate, singnode"); RegionTimer r(t);
|
|
1841
|
+
for (auto sn : singnodes)
|
|
1842
|
+
sum += sn->EvaluateMP(p);
|
|
1843
|
+
}
|
|
1038
1844
|
return sum;
|
|
1039
1845
|
}
|
|
1040
1846
|
|
|
@@ -1060,6 +1866,14 @@ namespace ngsbem
|
|
|
1060
1866
|
return sum;
|
|
1061
1867
|
}
|
|
1062
1868
|
|
|
1869
|
+
void TraverseTree (const std::function<void(Node&)> & func)
|
|
1870
|
+
{
|
|
1871
|
+
func(*this);
|
|
1872
|
+
for (auto & child : childs)
|
|
1873
|
+
if (child)
|
|
1874
|
+
child->TraverseTree(func);
|
|
1875
|
+
}
|
|
1876
|
+
|
|
1063
1877
|
double Norm() const
|
|
1064
1878
|
{
|
|
1065
1879
|
double norm = L2Norm(mp.SH().Coefs());
|
|
@@ -1077,37 +1891,100 @@ namespace ngsbem
|
|
|
1077
1891
|
num += ch->NumCoefficients();
|
|
1078
1892
|
return num;
|
|
1079
1893
|
}
|
|
1080
|
-
|
|
1894
|
+
|
|
1895
|
+
int GetChildNum (Vec<3> x) const
|
|
1896
|
+
{
|
|
1897
|
+
int childnum = 0;
|
|
1898
|
+
if (x(0) > center(0)) childnum += 1;
|
|
1899
|
+
if (x(1) > center(1)) childnum += 2;
|
|
1900
|
+
if (x(2) > center(2)) childnum += 4;
|
|
1901
|
+
return childnum;
|
|
1902
|
+
}
|
|
1903
|
+
|
|
1081
1904
|
void AddTarget (Vec<3> x)
|
|
1082
1905
|
{
|
|
1083
|
-
if (childs[0])
|
|
1906
|
+
// if (childs[0])
|
|
1907
|
+
if (have_childs) // quick check without locking
|
|
1084
1908
|
{
|
|
1085
1909
|
// directly send to childs:
|
|
1086
|
-
int childnum =
|
|
1087
|
-
if (x(0) > center(0)) childnum += 1;
|
|
1088
|
-
if (x(1) > center(1)) childnum += 2;
|
|
1089
|
-
if (x(2) > center(2)) childnum += 4;
|
|
1910
|
+
int childnum = GetChildNum(x);
|
|
1090
1911
|
childs[childnum] -> AddTarget( x );
|
|
1091
1912
|
return;
|
|
1092
1913
|
}
|
|
1093
1914
|
|
|
1915
|
+
lock_guard<mutex> guard(node_mutex);
|
|
1916
|
+
|
|
1917
|
+
if (have_childs) // test again after locking
|
|
1918
|
+
{
|
|
1919
|
+
// directly send to childs:
|
|
1920
|
+
int childnum = GetChildNum(x);
|
|
1921
|
+
childs[childnum] -> AddTarget(x);
|
|
1922
|
+
return;
|
|
1923
|
+
}
|
|
1924
|
+
|
|
1094
1925
|
targets.Append( x );
|
|
1095
1926
|
|
|
1096
1927
|
// if (r*mp.Kappa() < 1e-8) return;
|
|
1097
1928
|
if (level > 20) return;
|
|
1098
|
-
if (targets.Size() < maxdirect && r*mp.Kappa() <
|
|
1929
|
+
if (targets.Size() < params.maxdirect && r*mp.Kappa() < 5)
|
|
1930
|
+
return;
|
|
1931
|
+
|
|
1932
|
+
CreateChilds();
|
|
1933
|
+
|
|
1934
|
+
for (auto t : targets)
|
|
1935
|
+
AddTarget (t);
|
|
1936
|
+
for (auto [x,r] : vol_targets)
|
|
1937
|
+
AddVolumeTarget (x,r);
|
|
1938
|
+
|
|
1939
|
+
targets.SetSize0();
|
|
1940
|
+
vol_targets.SetSize0();
|
|
1941
|
+
}
|
|
1942
|
+
|
|
1943
|
+
|
|
1944
|
+
void AddVolumeTarget (Vec<3> x, double tr)
|
|
1945
|
+
{
|
|
1946
|
+
if (MaxNorm(x-center) > r+tr) return;
|
|
1947
|
+
|
|
1948
|
+
if (have_childs)
|
|
1949
|
+
{
|
|
1950
|
+
for (auto & child : childs)
|
|
1951
|
+
child->AddVolumeTarget(x, tr);
|
|
1952
|
+
return;
|
|
1953
|
+
}
|
|
1954
|
+
|
|
1955
|
+
|
|
1956
|
+
lock_guard<mutex> guard(node_mutex);
|
|
1957
|
+
|
|
1958
|
+
if (have_childs)
|
|
1959
|
+
{
|
|
1960
|
+
for (auto & child : childs)
|
|
1961
|
+
child->AddVolumeTarget(x, tr);
|
|
1962
|
+
return;
|
|
1963
|
+
}
|
|
1964
|
+
|
|
1965
|
+
|
|
1966
|
+
vol_targets.Append (tuple(x,tr));
|
|
1967
|
+
|
|
1968
|
+
if (level > 20) return;
|
|
1969
|
+
if (vol_targets.Size() < params.maxdirect && (r*mp.Kappa() < 5))
|
|
1099
1970
|
return;
|
|
1100
1971
|
|
|
1101
1972
|
CreateChilds();
|
|
1102
1973
|
|
|
1103
1974
|
for (auto t : targets)
|
|
1104
1975
|
AddTarget (t);
|
|
1976
|
+
for (auto [x,r] : vol_targets)
|
|
1977
|
+
AddVolumeTarget (x,r);
|
|
1978
|
+
|
|
1105
1979
|
targets.SetSize0();
|
|
1980
|
+
vol_targets.SetSize0();
|
|
1106
1981
|
}
|
|
1107
1982
|
|
|
1983
|
+
|
|
1984
|
+
|
|
1108
1985
|
void CalcTotalTargets()
|
|
1109
1986
|
{
|
|
1110
|
-
total_targets = targets.Size();
|
|
1987
|
+
total_targets = targets.Size() + vol_targets.Size();
|
|
1111
1988
|
for (auto & child : childs)
|
|
1112
1989
|
if (child)
|
|
1113
1990
|
{
|
|
@@ -1127,8 +2004,21 @@ namespace ngsbem
|
|
|
1127
2004
|
}
|
|
1128
2005
|
|
|
1129
2006
|
if (total_targets == 0)
|
|
1130
|
-
mp =
|
|
2007
|
+
mp = SphericalExpansion<Regular,elem_type>(-1, mp.Kappa(),1.);
|
|
2008
|
+
}
|
|
2009
|
+
|
|
2010
|
+
void AllocateMemory()
|
|
2011
|
+
{
|
|
2012
|
+
for (auto & child : childs)
|
|
2013
|
+
if (child)
|
|
2014
|
+
child->AllocateMemory();
|
|
2015
|
+
|
|
2016
|
+
if (total_targets > 0)
|
|
2017
|
+
Allocate();
|
|
2018
|
+
// mp = SphericalExpansion<Regular,elem_type>(MPOrder(r*mp.Kappa()), mp.Kappa(), r); // -1, mp.Kappa(),1.);
|
|
1131
2019
|
}
|
|
2020
|
+
|
|
2021
|
+
|
|
1132
2022
|
|
|
1133
2023
|
|
|
1134
2024
|
void Print (ostream & ost, size_t childnr = -1) const
|
|
@@ -1145,21 +2035,24 @@ namespace ngsbem
|
|
|
1145
2035
|
}
|
|
1146
2036
|
|
|
1147
2037
|
};
|
|
1148
|
-
|
|
2038
|
+
|
|
2039
|
+
FMM_Parameters fmm_params;
|
|
1149
2040
|
Node root;
|
|
1150
|
-
shared_ptr<
|
|
2041
|
+
shared_ptr<SingularMLExpansion<elem_type>> singmp;
|
|
1151
2042
|
|
|
1152
2043
|
public:
|
|
1153
|
-
|
|
1154
|
-
|
|
1155
|
-
|
|
2044
|
+
RegularMLExpansion (shared_ptr<SingularMLExpansion<elem_type>> asingmp, Vec<3> center, double r,
|
|
2045
|
+
const FMM_Parameters & _params)
|
|
2046
|
+
: fmm_params(_params), root(center, r, 0, asingmp->Kappa(), fmm_params), singmp(asingmp)
|
|
2047
|
+
{
|
|
1156
2048
|
if (!singmp->havemp) throw Exception("first call Calc for singular MP");
|
|
1157
|
-
|
|
2049
|
+
root.Allocate();
|
|
2050
|
+
|
|
1158
2051
|
nodes_on_level = 0;
|
|
1159
2052
|
nodes_on_level[0] = 1;
|
|
1160
2053
|
{
|
|
1161
|
-
static Timer t("mptool compute regular MLMP"); RegionTimer rg(t);
|
|
1162
|
-
root.AddSingularNode(singmp->root, true);
|
|
2054
|
+
static Timer t("mptool compute regular MLMP"); RegionTimer rg(t);
|
|
2055
|
+
root.AddSingularNode(singmp->root, true, nullptr);
|
|
1163
2056
|
// cout << "norm after S->R conversion: " << root.Norm() << endl;
|
|
1164
2057
|
}
|
|
1165
2058
|
|
|
@@ -1180,42 +2073,163 @@ namespace ngsbem
|
|
|
1180
2073
|
}
|
|
1181
2074
|
}
|
|
1182
2075
|
|
|
1183
|
-
|
|
1184
|
-
|
|
1185
|
-
|
|
1186
|
-
|
|
1187
|
-
|
|
1188
|
-
|
|
1189
|
-
|
|
2076
|
+
RegularMLExpansion (Vec<3> center, double r, double kappa, const FMM_Parameters & _params)
|
|
2077
|
+
: fmm_params(_params), root(center, r, 0, kappa, fmm_params)
|
|
2078
|
+
{
|
|
2079
|
+
nodes_on_level = 0;
|
|
2080
|
+
nodes_on_level[0] = 1;
|
|
2081
|
+
}
|
|
2082
|
+
|
|
1190
2083
|
void AddTarget (Vec<3> t)
|
|
1191
2084
|
{
|
|
1192
2085
|
root.AddTarget (t);
|
|
1193
2086
|
}
|
|
1194
2087
|
|
|
1195
|
-
void
|
|
2088
|
+
void AddVolumeTarget (Vec<3> t, double r)
|
|
2089
|
+
{
|
|
2090
|
+
root.AddVolumeTarget (t, r);
|
|
2091
|
+
}
|
|
2092
|
+
|
|
2093
|
+
void CalcMP(shared_ptr<SingularMLExpansion<elem_type>> asingmp, bool onlytargets = true)
|
|
1196
2094
|
{
|
|
1197
2095
|
static Timer t("mptool regular MLMP"); RegionTimer rg(t);
|
|
2096
|
+
static Timer tremove("removeempty");
|
|
2097
|
+
static Timer trec("mptool regular MLMP - recording");
|
|
2098
|
+
static Timer tsort("mptool regular MLMP - sort");
|
|
1198
2099
|
|
|
1199
2100
|
singmp = asingmp;
|
|
1200
2101
|
|
|
2102
|
+
|
|
1201
2103
|
root.CalcTotalTargets();
|
|
1202
|
-
|
|
1203
|
-
|
|
1204
|
-
|
|
2104
|
+
// cout << "before remove empty trees:" << endl;
|
|
2105
|
+
// PrintStatistics(cout);
|
|
2106
|
+
|
|
2107
|
+
/*
|
|
2108
|
+
tremove.Start();
|
|
2109
|
+
if (onlytargets)
|
|
2110
|
+
root.RemoveEmptyTrees();
|
|
2111
|
+
tremove.Stop();
|
|
2112
|
+
*/
|
|
2113
|
+
|
|
2114
|
+
root.AllocateMemory();
|
|
2115
|
+
|
|
2116
|
+
// cout << "after allocating regular:" << endl;
|
|
2117
|
+
// PrintStatistics(cout);
|
|
1205
2118
|
|
|
2119
|
+
// cout << "starting S-R converion" << endl;
|
|
2120
|
+
// PrintStatistics(cout);
|
|
2121
|
+
|
|
2122
|
+
|
|
2123
|
+
if constexpr (false)
|
|
2124
|
+
{
|
|
2125
|
+
root.AddSingularNode(singmp->root, !onlytargets, nullptr);
|
|
2126
|
+
}
|
|
2127
|
+
else
|
|
2128
|
+
{ // use recording
|
|
2129
|
+
Array<RecordingRS> recording;
|
|
2130
|
+
{
|
|
2131
|
+
RegionTimer rrec(trec);
|
|
2132
|
+
root.AddSingularNode(singmp->root, !onlytargets, &recording);
|
|
2133
|
+
}
|
|
2134
|
+
|
|
2135
|
+
// cout << "recorded: " << recording.Size() << endl;
|
|
2136
|
+
{
|
|
2137
|
+
RegionTimer reg(tsort);
|
|
2138
|
+
QuickSort (recording, [] (auto & a, auto & b)
|
|
2139
|
+
{
|
|
2140
|
+
if (a.len < (1-1e-8) * b.len) return true;
|
|
2141
|
+
if (a.len > (1+1e-8) * b.len) return false;
|
|
2142
|
+
return a.theta < b.theta;
|
|
2143
|
+
});
|
|
2144
|
+
}
|
|
2145
|
+
|
|
2146
|
+
double current_len = -1e100;
|
|
2147
|
+
double current_theta = -1e100;
|
|
2148
|
+
Array<RecordingRS*> current_batch;
|
|
2149
|
+
Array<Array<RecordingRS*>> batch_group;
|
|
2150
|
+
Array<double> group_lengths;
|
|
2151
|
+
Array<double> group_thetas;
|
|
2152
|
+
for (auto & record : recording)
|
|
2153
|
+
{
|
|
2154
|
+
bool len_changed = fabs(record.len - current_len) > 1e-8;
|
|
2155
|
+
bool theta_changed = fabs(record.theta - current_theta) > 1e-8;
|
|
2156
|
+
if ((len_changed || theta_changed) && current_batch.Size() > 0) {
|
|
2157
|
+
// ProcessBatch(current_batch, current_len, current_theta);
|
|
2158
|
+
batch_group.Append(current_batch);
|
|
2159
|
+
group_lengths.Append(current_len);
|
|
2160
|
+
group_thetas.Append(current_theta);
|
|
2161
|
+
current_batch.SetSize(0);
|
|
2162
|
+
}
|
|
2163
|
+
|
|
2164
|
+
current_len = record.len;
|
|
2165
|
+
current_theta = record.theta;
|
|
2166
|
+
current_batch.Append(&record);
|
|
2167
|
+
}
|
|
2168
|
+
if (current_batch.Size() > 0) {
|
|
2169
|
+
// ProcessBatch(current_batch, current_len, current_theta);
|
|
2170
|
+
batch_group.Append(current_batch);
|
|
2171
|
+
group_lengths.Append(current_len);
|
|
2172
|
+
group_thetas.Append(current_theta);
|
|
2173
|
+
}
|
|
2174
|
+
|
|
2175
|
+
ParallelFor(batch_group.Size(), [&](int i) {
|
|
2176
|
+
ProcessBatchRS(batch_group[i], group_lengths[i], group_thetas[i]);
|
|
2177
|
+
}, TasksPerThread(4));
|
|
2178
|
+
}
|
|
2179
|
+
|
|
2180
|
+
|
|
1206
2181
|
/*
|
|
1207
2182
|
int maxlevel = 0;
|
|
1208
|
-
for (auto [i,num] : Enumerate(
|
|
2183
|
+
for (auto [i,num] : Enumerate(RegularMLExpansion::nodes_on_level))
|
|
1209
2184
|
if (num > 0) maxlevel = i;
|
|
1210
2185
|
|
|
1211
2186
|
for (int i = 0; i <= maxlevel; i++)
|
|
1212
|
-
cout << "reg " << i << ": " <<
|
|
2187
|
+
cout << "reg " << i << ": " << RegularMLExpansion::nodes_on_level[i] << endl;
|
|
1213
2188
|
*/
|
|
1214
2189
|
|
|
1215
|
-
|
|
1216
|
-
|
|
2190
|
+
// cout << "starting R-R converion" << endl;
|
|
2191
|
+
// PrintStatistics(cout);
|
|
2192
|
+
|
|
2193
|
+
static Timer tloc("mptool regular localize expansion"); RegionTimer rloc(tloc);
|
|
2194
|
+
root.LocalizeExpansion(!onlytargets);
|
|
2195
|
+
|
|
2196
|
+
|
|
2197
|
+
// cout << "R-R conversion done" << endl;
|
|
2198
|
+
// PrintStatistics(cout);
|
|
1217
2199
|
}
|
|
1218
2200
|
|
|
2201
|
+
void PrintStatistics (ostream & ost)
|
|
2202
|
+
{
|
|
2203
|
+
int levels = 0;
|
|
2204
|
+
int cnt = 0;
|
|
2205
|
+
root.TraverseTree( [&](Node & node) {
|
|
2206
|
+
levels = max(levels, node.level);
|
|
2207
|
+
cnt++;
|
|
2208
|
+
});
|
|
2209
|
+
ost << "levels: " << levels << endl;
|
|
2210
|
+
ost << "nodes: " << cnt << endl;
|
|
2211
|
+
|
|
2212
|
+
Array<int> num_on_level(levels+1);
|
|
2213
|
+
Array<int> order_on_level(levels+1);
|
|
2214
|
+
Array<size_t> coefs_on_level(levels+1);
|
|
2215
|
+
num_on_level = 0;
|
|
2216
|
+
order_on_level = 0;
|
|
2217
|
+
root.TraverseTree( [&](Node & node) {
|
|
2218
|
+
num_on_level[node.level]++;
|
|
2219
|
+
order_on_level[node.level] = max(order_on_level[node.level],node.mp.Order());
|
|
2220
|
+
coefs_on_level[node.level] += node.mp.SH().Coefs().Size();
|
|
2221
|
+
});
|
|
2222
|
+
|
|
2223
|
+
cout << "num on level" << endl;
|
|
2224
|
+
for (int i = 0; i < num_on_level.Size(); i++)
|
|
2225
|
+
cout << i << ": " << num_on_level[i] << ", order = " << order_on_level[i] << ", coefs " << coefs_on_level[i] << endl;
|
|
2226
|
+
|
|
2227
|
+
size_t totcoefs = 0;
|
|
2228
|
+
for (auto n : coefs_on_level)
|
|
2229
|
+
totcoefs += n;
|
|
2230
|
+
cout << "total mem in coefs: " << sizeof(elem_type)*totcoefs / sqr(1024) << " MB" << endl;
|
|
2231
|
+
}
|
|
2232
|
+
|
|
1219
2233
|
void Print (ostream & ost) const
|
|
1220
2234
|
{
|
|
1221
2235
|
root.Print(ost);
|
|
@@ -1234,7 +2248,10 @@ namespace ngsbem
|
|
|
1234
2248
|
elem_type Evaluate (Vec<3> p) const
|
|
1235
2249
|
{
|
|
1236
2250
|
// static Timer t("mptool Eval MLMP regular"); RegionTimer r(t);
|
|
1237
|
-
if (L2Norm(p-root.center) > root.r) return elem_type{0.0};
|
|
2251
|
+
// if (L2Norm(p-root.center) > root.r) return elem_type{0.0};
|
|
2252
|
+
|
|
2253
|
+
if (MaxNorm(p-root.center) > root.r)
|
|
2254
|
+
return singmp->Evaluate(p);
|
|
1238
2255
|
return root.Evaluate(p);
|
|
1239
2256
|
}
|
|
1240
2257
|
|
|
@@ -1246,11 +2263,12 @@ namespace ngsbem
|
|
|
1246
2263
|
|
|
1247
2264
|
};
|
|
1248
2265
|
|
|
2266
|
+
|
|
1249
2267
|
template <typename elem_type>
|
|
1250
|
-
inline ostream & operator<< (ostream & ost, const
|
|
2268
|
+
inline ostream & operator<< (ostream & ost, const RegularMLExpansion<elem_type> & mlmp)
|
|
1251
2269
|
{
|
|
1252
2270
|
mlmp.Print(ost);
|
|
1253
|
-
// ost << "
|
|
2271
|
+
// ost << "RegularMLExpansion" << endl;
|
|
1254
2272
|
return ost;
|
|
1255
2273
|
}
|
|
1256
2274
|
|