ngsolve 6.2.2502__cp311-cp311-win_amd64.whl → 6.2.2601__cp311-cp311-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- netgen/include/analytic_integrals.hpp +10 -0
- netgen/include/basematrix.hpp +6 -0
- netgen/include/bdbequations.hpp +55 -0
- netgen/include/bem_diffops.hpp +475 -0
- netgen/include/bilinearform.hpp +4 -1
- netgen/include/blockjacobi.hpp +17 -5
- netgen/include/bspline.hpp +2 -0
- netgen/include/cholesky.hpp +2 -2
- netgen/include/code_generation.hpp +2 -2
- netgen/include/complex_wrapper.hpp +30 -2
- netgen/include/contact.hpp +8 -0
- netgen/include/diagonalmatrix.hpp +6 -0
- netgen/include/diffop_impl.hpp +3 -1
- netgen/include/diffopwithfactor.hpp +123 -0
- netgen/include/elementbyelement.hpp +9 -3
- netgen/include/expr.hpp +45 -7
- netgen/include/fespace.hpp +12 -4
- netgen/include/gridfunction.hpp +3 -3
- netgen/include/h1amg.hpp +85 -2
- netgen/include/h1lumping.hpp +6 -0
- netgen/include/hcurl_equations.hpp +29 -0
- netgen/include/hcurlcurlfe.hpp +20 -0
- netgen/include/hdiv_equations.hpp +15 -0
- netgen/include/hdivfe_utils.hpp +1 -0
- netgen/include/hdivhofespace.hpp +2 -0
- netgen/include/integrator.hpp +4 -16
- netgen/include/intrule.hpp +2 -1
- netgen/include/intrules_SauterSchwab.hpp +25 -0
- netgen/include/jacobi.hpp +35 -18
- netgen/include/kernels.hpp +724 -0
- netgen/include/l2hofe.hpp +1 -0
- netgen/include/matrix.hpp +8 -3
- netgen/include/meshaccess.hpp +4 -3
- netgen/include/mp_coefficient.hpp +145 -0
- netgen/include/mptools.hpp +1331 -368
- netgen/include/mycomplex.hpp +1 -1
- netgen/include/ngblas.hpp +116 -7
- netgen/include/potentialtools.hpp +22 -0
- netgen/include/preconditioner.hpp +23 -23
- netgen/include/prolongation.hpp +132 -6
- netgen/include/recursive_pol.hpp +63 -11
- netgen/include/simd_complex.hpp +45 -0
- netgen/include/sparsecholesky.hpp +6 -2
- netgen/include/sparsefactorization_interface.hpp +159 -0
- netgen/include/sparsematrix.hpp +21 -7
- netgen/include/sparsematrix_dyn.hpp +6 -7
- netgen/include/sparsematrix_impl.hpp +175 -40
- netgen/include/special_matrix.hpp +2 -0
- netgen/include/statushandler.hpp +8 -8
- netgen/include/symbolicintegrator.hpp +2 -1
- netgen/include/tangentialfacetfespace.hpp +7 -22
- netgen/include/thdivfe_impl.hpp +66 -0
- netgen/include/tscalarfe.hpp +1 -1
- netgen/include/vector.hpp +272 -47
- netgen/lib/libngsolve.lib +0 -0
- netgen/libngsolve.dll +0 -0
- netgen/ngscxx.bat +1 -1
- netgen/ngsld.bat +1 -1
- ngsolve/__init__.py +1 -0
- ngsolve/cmake/NGSolveConfig.cmake +8 -8
- ngsolve/cmake/ngsolve-targets.cmake +24 -18
- ngsolve/config/config.py +7 -7
- ngsolve/demos/intro/cmagnet.py +19 -22
- ngsolve/directsolvers.py +9 -21
- ngsolve/krylovspace.py +172 -3
- ngsolve/ngslib.lib +0 -0
- ngsolve/ngslib.pyd +0 -0
- ngsolve/nonlinearsolvers.py +2 -2
- ngsolve/preconditioners.py +1 -0
- ngsolve/solve_implementation.py +168 -0
- ngsolve/{solvers.py → solvers/__init__.py} +1 -1
- ngsolve/solvers/cudss.py +112 -0
- ngsolve/webgui.py +2 -0
- {ngsolve-6.2.2502.dist-info → ngsolve-6.2.2601.dist-info}/METADATA +2 -2
- {ngsolve-6.2.2502.dist-info → ngsolve-6.2.2601.dist-info}/RECORD +107 -97
- {ngsolve-6.2.2502.data → ngsolve-6.2.2601.data}/data/Scripts/ngsolve.tcl +0 -0
- {ngsolve-6.2.2502.data → ngsolve-6.2.2601.data}/data/share/ngsolve/beam.geo +0 -0
- {ngsolve-6.2.2502.data → ngsolve-6.2.2601.data}/data/share/ngsolve/beam.vol +0 -0
- {ngsolve-6.2.2502.data → ngsolve-6.2.2601.data}/data/share/ngsolve/chip.in2d +0 -0
- {ngsolve-6.2.2502.data → ngsolve-6.2.2601.data}/data/share/ngsolve/chip.vol +0 -0
- {ngsolve-6.2.2502.data → ngsolve-6.2.2601.data}/data/share/ngsolve/coil.geo +0 -0
- {ngsolve-6.2.2502.data → ngsolve-6.2.2601.data}/data/share/ngsolve/coil.vol +0 -0
- {ngsolve-6.2.2502.data → ngsolve-6.2.2601.data}/data/share/ngsolve/coilshield.geo +0 -0
- {ngsolve-6.2.2502.data → ngsolve-6.2.2601.data}/data/share/ngsolve/coilshield.vol +0 -0
- {ngsolve-6.2.2502.data → ngsolve-6.2.2601.data}/data/share/ngsolve/cube.geo +0 -0
- {ngsolve-6.2.2502.data → ngsolve-6.2.2601.data}/data/share/ngsolve/cube.vol +0 -0
- {ngsolve-6.2.2502.data → ngsolve-6.2.2601.data}/data/share/ngsolve/d10_DGdoubleglazing.pde +0 -0
- {ngsolve-6.2.2502.data → ngsolve-6.2.2601.data}/data/share/ngsolve/d11_chip_nitsche.pde +0 -0
- {ngsolve-6.2.2502.data → ngsolve-6.2.2601.data}/data/share/ngsolve/d1_square.pde +0 -0
- {ngsolve-6.2.2502.data → ngsolve-6.2.2601.data}/data/share/ngsolve/d2_chip.pde +0 -0
- {ngsolve-6.2.2502.data → ngsolve-6.2.2601.data}/data/share/ngsolve/d3_helmholtz.pde +0 -0
- {ngsolve-6.2.2502.data → ngsolve-6.2.2601.data}/data/share/ngsolve/d4_cube.pde +0 -0
- {ngsolve-6.2.2502.data → ngsolve-6.2.2601.data}/data/share/ngsolve/d5_beam.pde +0 -0
- {ngsolve-6.2.2502.data → ngsolve-6.2.2601.data}/data/share/ngsolve/d6_shaft.pde +0 -0
- {ngsolve-6.2.2502.data → ngsolve-6.2.2601.data}/data/share/ngsolve/d7_coil.pde +0 -0
- {ngsolve-6.2.2502.data → ngsolve-6.2.2601.data}/data/share/ngsolve/d8_coilshield.pde +0 -0
- {ngsolve-6.2.2502.data → ngsolve-6.2.2601.data}/data/share/ngsolve/d9_hybridDG.pde +0 -0
- {ngsolve-6.2.2502.data → ngsolve-6.2.2601.data}/data/share/ngsolve/doubleglazing.in2d +0 -0
- {ngsolve-6.2.2502.data → ngsolve-6.2.2601.data}/data/share/ngsolve/doubleglazing.vol +0 -0
- {ngsolve-6.2.2502.data → ngsolve-6.2.2601.data}/data/share/ngsolve/piezo2d40round4.vol.gz +0 -0
- {ngsolve-6.2.2502.data → ngsolve-6.2.2601.data}/data/share/ngsolve/shaft.geo +0 -0
- {ngsolve-6.2.2502.data → ngsolve-6.2.2601.data}/data/share/ngsolve/shaft.vol +0 -0
- {ngsolve-6.2.2502.data → ngsolve-6.2.2601.data}/data/share/ngsolve/square.in2d +0 -0
- {ngsolve-6.2.2502.data → ngsolve-6.2.2601.data}/data/share/ngsolve/square.vol +0 -0
- {ngsolve-6.2.2502.dist-info → ngsolve-6.2.2601.dist-info}/LICENSE +0 -0
- {ngsolve-6.2.2502.dist-info → ngsolve-6.2.2601.dist-info}/WHEEL +0 -0
- {ngsolve-6.2.2502.dist-info → ngsolve-6.2.2601.dist-info}/top_level.txt +0 -0
netgen/include/mptools.hpp
CHANGED
|
@@ -10,8 +10,77 @@
|
|
|
10
10
|
#include <coefficient.hpp>
|
|
11
11
|
#include <recursive_pol.hpp>
|
|
12
12
|
|
|
13
|
-
|
|
13
|
+
|
|
14
|
+
namespace ngsbem
|
|
14
15
|
{
|
|
16
|
+
using namespace ngfem;
|
|
17
|
+
|
|
18
|
+
template<typename T>
|
|
19
|
+
constexpr int VecLength = 1; // Default: Complex has length 1
|
|
20
|
+
|
|
21
|
+
template<int N>
|
|
22
|
+
constexpr int VecLength<Vec<N, Complex>> = N; // Specialization: Vec<N,Complex> has length N
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
constexpr int FMM_SW = 4;
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
// ************************ SIMD - creation (should end up in simd.hpp) *************
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
template <int S, typename T, int SW>
|
|
34
|
+
Vec<S,T> HSum (Vec<S,SIMD<T,SW>> v)
|
|
35
|
+
{
|
|
36
|
+
Vec<S,T> res;
|
|
37
|
+
for (int i = 0; i < S; i++)
|
|
38
|
+
res(i) = HSum(v(i));
|
|
39
|
+
// Iterate<S> ([&](auto i) {
|
|
40
|
+
// res.HTData().template Elem<i.value>() = HSum(v.HTData().template Elem<i.value>());
|
|
41
|
+
// });
|
|
42
|
+
return res;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class NGS_DLL_HEADER PrecomputedSqrts
|
|
47
|
+
{
|
|
48
|
+
public:
|
|
49
|
+
Array<double> sqrt_int;
|
|
50
|
+
// Array<double> inv_sqrt_int;
|
|
51
|
+
Array<double> sqrt_n_np1; // sqrt(n*(n+1))
|
|
52
|
+
Array<double> inv_sqrt_2np1_2np3; // 1/sqrt( (2n+1)*(2n+3) )
|
|
53
|
+
|
|
54
|
+
PrecomputedSqrts();
|
|
55
|
+
};
|
|
56
|
+
|
|
57
|
+
extern NGS_DLL_HEADER PrecomputedSqrts presqrt;
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
class FMM_Parameters
|
|
62
|
+
{
|
|
63
|
+
public:
|
|
64
|
+
int maxdirect = 100;
|
|
65
|
+
int minorder = 20; // order = minorder + 2 kappa r
|
|
66
|
+
};
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
inline std::tuple<double, double, double> SphericalCoordinates(Vec<3> dist){
|
|
72
|
+
double len, theta, phi;
|
|
73
|
+
len = L2Norm(dist);
|
|
74
|
+
if (len < 1e-30)
|
|
75
|
+
theta = 0;
|
|
76
|
+
else
|
|
77
|
+
theta = acos (dist(2) / len);
|
|
78
|
+
if (sqr(dist(0))+sqr(dist(1)) < 1e-30)
|
|
79
|
+
phi = 0;
|
|
80
|
+
else
|
|
81
|
+
phi = atan2(dist(1), dist(0));
|
|
82
|
+
return {len, theta, phi};
|
|
83
|
+
}
|
|
15
84
|
|
|
16
85
|
|
|
17
86
|
template <typename entry_type = Complex>
|
|
@@ -77,23 +146,91 @@ namespace ngfem
|
|
|
77
146
|
|
|
78
147
|
void Calc (Vec<3> x, FlatVector<Complex> shapes);
|
|
79
148
|
|
|
80
|
-
|
|
149
|
+
|
|
150
|
+
void FlipZ ();
|
|
81
151
|
void RotateZ (double alpha);
|
|
82
|
-
void RotateY (double alpha);
|
|
83
152
|
|
|
153
|
+
template <typename FUNC>
|
|
154
|
+
void RotateZ (double alpha, FUNC func) const
|
|
155
|
+
{
|
|
156
|
+
if (order < 0) return;
|
|
157
|
+
|
|
158
|
+
Vector<Complex> exp_imalpha(order+1);
|
|
159
|
+
Complex exp_ialpha(cos(alpha), sin(alpha));
|
|
160
|
+
Complex prod = 1.0;
|
|
161
|
+
for (int i = 0; i <= order; i++)
|
|
162
|
+
{
|
|
163
|
+
exp_imalpha(i) = prod;
|
|
164
|
+
prod *= exp_ialpha;
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
int ii = 0;
|
|
168
|
+
for (int n = 0; n <= order; n++)
|
|
169
|
+
{
|
|
170
|
+
for (int m = -n; m < 0; m++, ii++)
|
|
171
|
+
func(ii, conj(exp_imalpha(-m)));
|
|
172
|
+
for (int m = 0; m <= n; m++, ii++)
|
|
173
|
+
func(ii, exp_imalpha(m));
|
|
174
|
+
};
|
|
175
|
+
};
|
|
176
|
+
|
|
177
|
+
template <typename FUNC>
|
|
178
|
+
void RotateZFlip (double alpha, bool flip, FUNC func) const
|
|
179
|
+
{
|
|
180
|
+
if (order < 0) return;
|
|
181
|
+
|
|
182
|
+
Vector<Complex> exp_imalpha(order+1);
|
|
183
|
+
Complex exp_ialpha(cos(alpha), sin(alpha));
|
|
184
|
+
Complex prod = 1.0;
|
|
185
|
+
for (int i = 0; i <= order; i++)
|
|
186
|
+
{
|
|
187
|
+
exp_imalpha(i) = prod;
|
|
188
|
+
prod *= exp_ialpha;
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
int ii = 0;
|
|
192
|
+
|
|
193
|
+
auto FlipFactor = [] (int n, int m, bool flip)->double
|
|
194
|
+
{
|
|
195
|
+
if (flip)
|
|
196
|
+
return ((n-m)%2) == 1 ? -1 : 1;
|
|
197
|
+
return 1.0;
|
|
198
|
+
};
|
|
199
|
+
|
|
200
|
+
for (int n = 0; n <= order; n++)
|
|
201
|
+
{
|
|
202
|
+
for (int m = -n; m < 0; m++, ii++)
|
|
203
|
+
func(ii, FlipFactor(n,m,flip)*conj(exp_imalpha(-m)));
|
|
204
|
+
for (int m = 0; m <= n; m++, ii++)
|
|
205
|
+
func(ii, FlipFactor(n,m,flip)*exp_imalpha(m));
|
|
206
|
+
};
|
|
207
|
+
};
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
void RotateY (double alpha, bool parallel = false);
|
|
212
|
+
|
|
213
|
+
|
|
84
214
|
|
|
85
215
|
static double CalcAmn (int m, int n)
|
|
86
216
|
{
|
|
87
217
|
if (m < 0) m=-m;
|
|
88
218
|
if (n < m) return 0;
|
|
89
|
-
|
|
219
|
+
|
|
220
|
+
if (2*n+1 < presqrt.sqrt_int.Size())
|
|
221
|
+
return presqrt.sqrt_int[n+1+m]*presqrt.sqrt_int[n+1-m] * presqrt.inv_sqrt_2np1_2np3[n];
|
|
222
|
+
else
|
|
223
|
+
return sqrt( (n+1.0+m)*(n+1.0-m) / ( (2*n+1)*(2*n+3) ));
|
|
90
224
|
}
|
|
91
225
|
|
|
92
226
|
static double CalcBmn (int m, int n)
|
|
93
227
|
{
|
|
94
228
|
double sgn = (m >= 0) ? 1 : -1;
|
|
95
|
-
if ( (m
|
|
96
|
-
|
|
229
|
+
if ( (m >= n) || (-m > n) ) return 0;
|
|
230
|
+
if (n <= presqrt.inv_sqrt_2np1_2np3.Size())
|
|
231
|
+
return sgn * presqrt.sqrt_n_np1[n-m-1] * presqrt.inv_sqrt_2np1_2np3[n-1];
|
|
232
|
+
else
|
|
233
|
+
return sgn * sqrt( (n-m-1.0)*(n-m) / ( (2*n-1.0)*(2*n+1)));
|
|
97
234
|
}
|
|
98
235
|
|
|
99
236
|
static double CalcDmn (int m, int n)
|
|
@@ -112,11 +249,11 @@ namespace ngfem
|
|
|
112
249
|
// https://fortran-lang.discourse.group/t/looking-for-spherical-bessel-and-hankel-functions-of-first-and-second-kind-and-arbitrary-order/2308/2
|
|
113
250
|
NGS_DLL_HEADER
|
|
114
251
|
void besseljs3d (int nterms, double z, double scale,
|
|
115
|
-
|
|
252
|
+
SliceVector<double> fjs, SliceVector<double> fjder = FlatVector<double>(0, nullptr));
|
|
116
253
|
|
|
117
254
|
NGS_DLL_HEADER
|
|
118
255
|
void besseljs3d (int nterms, Complex z, double scale,
|
|
119
|
-
|
|
256
|
+
SliceVector<Complex> fjs, SliceVector<Complex> fjder = FlatVector<Complex>(0, nullptr));
|
|
120
257
|
|
|
121
258
|
|
|
122
259
|
/*
|
|
@@ -135,14 +272,17 @@ namespace ngfem
|
|
|
135
272
|
FlatVector<double> jp,
|
|
136
273
|
FlatVector<double> yp);
|
|
137
274
|
|
|
138
|
-
|
|
275
|
+
|
|
139
276
|
|
|
140
277
|
template <typename T>
|
|
141
278
|
void SphericalBessel (int n, double rho, double scale, T && values)
|
|
142
279
|
{
|
|
280
|
+
besseljs3d (n, rho, scale, values);
|
|
281
|
+
/*
|
|
143
282
|
Vector<double> j(n+1), jp(n+1);
|
|
144
283
|
besseljs3d (n, rho, scale, j, jp);
|
|
145
284
|
values = j;
|
|
285
|
+
*/
|
|
146
286
|
}
|
|
147
287
|
|
|
148
288
|
|
|
@@ -166,21 +306,6 @@ namespace ngfem
|
|
|
166
306
|
return;
|
|
167
307
|
}
|
|
168
308
|
Vector j(n+1), y(n+1), jp(n+1), yp(n+1);
|
|
169
|
-
// SBESJY (rho, n, j, y, jp, yp);
|
|
170
|
-
|
|
171
|
-
/*
|
|
172
|
-
values = j + Complex(0,1) * y;
|
|
173
|
-
if (scale != 1.0)
|
|
174
|
-
{
|
|
175
|
-
double prod = 1.0;
|
|
176
|
-
for (int i = 0; i <= n; i++)
|
|
177
|
-
{
|
|
178
|
-
values(i) *= prod;
|
|
179
|
-
prod *= scale;
|
|
180
|
-
}
|
|
181
|
-
}
|
|
182
|
-
*/
|
|
183
|
-
|
|
184
309
|
|
|
185
310
|
// the bessel-evaluation with scale
|
|
186
311
|
besseljs3d (n, rho, 1/scale, j, jp);
|
|
@@ -208,7 +333,7 @@ namespace ngfem
|
|
|
208
333
|
|
|
209
334
|
|
|
210
335
|
// hn1 = jn+ i*yn
|
|
211
|
-
class
|
|
336
|
+
class Singular
|
|
212
337
|
{
|
|
213
338
|
public:
|
|
214
339
|
template <typename T>
|
|
@@ -216,48 +341,80 @@ namespace ngfem
|
|
|
216
341
|
{
|
|
217
342
|
SphericalHankel1(order, r, scale, values);
|
|
218
343
|
}
|
|
344
|
+
|
|
345
|
+
template <typename T>
|
|
346
|
+
static void Eval (int order, double kappa, double r, double rtyp, T && values)
|
|
347
|
+
{
|
|
348
|
+
double scale = Scale(kappa, rtyp);
|
|
349
|
+
SphericalHankel1(order, r*kappa, scale, values);
|
|
350
|
+
}
|
|
351
|
+
|
|
352
|
+
static double Scale (double kappa, double rtyp)
|
|
353
|
+
{
|
|
354
|
+
// return min(1.0, rtyp*kappa);
|
|
355
|
+
return min(1.0, 0.5*rtyp*kappa);
|
|
356
|
+
}
|
|
219
357
|
};
|
|
358
|
+
|
|
359
|
+
|
|
220
360
|
|
|
221
361
|
// jn
|
|
222
|
-
class
|
|
362
|
+
class Regular
|
|
223
363
|
{
|
|
224
|
-
public:
|
|
364
|
+
public:
|
|
225
365
|
template <typename T>
|
|
226
366
|
static void Eval (int order, double r, double scale, T && values)
|
|
227
367
|
{
|
|
228
368
|
SphericalBessel (order, r, 1.0/scale, values);
|
|
229
369
|
}
|
|
370
|
+
|
|
371
|
+
template <typename T>
|
|
372
|
+
static void Eval (int order, double kappa, double r, double rtyp, T && values)
|
|
373
|
+
{
|
|
374
|
+
double scale = Scale(kappa, rtyp);
|
|
375
|
+
SphericalBessel (order, r*kappa, 1.0/scale, values);
|
|
376
|
+
}
|
|
377
|
+
|
|
378
|
+
static double Scale (double kappa, double rtyp)
|
|
379
|
+
{
|
|
380
|
+
// return 1.0/ min(1.0, 0.25*rtyp*kappa);
|
|
381
|
+
return 1.0/ min(1.0, 0.5*rtyp*kappa);
|
|
382
|
+
}
|
|
383
|
+
|
|
230
384
|
};
|
|
231
385
|
|
|
232
386
|
|
|
233
387
|
|
|
234
388
|
|
|
235
389
|
template <typename RADIAL, typename entry_type=Complex>
|
|
236
|
-
class NGS_DLL_HEADER
|
|
390
|
+
class NGS_DLL_HEADER SphericalExpansion
|
|
237
391
|
{
|
|
238
392
|
SphericalHarmonics<entry_type> sh;
|
|
239
393
|
double kappa;
|
|
240
|
-
double
|
|
394
|
+
double rtyp;
|
|
241
395
|
public:
|
|
242
|
-
MultiPole (int aorder, double akappa, double ascale = 1)
|
|
243
|
-
: sh(aorder), kappa(akappa), scale(ascale) { }
|
|
244
396
|
|
|
397
|
+
SphericalExpansion (int aorder, double akappa, double artyp)
|
|
398
|
+
: sh(aorder), kappa(akappa), rtyp(artyp) { }
|
|
399
|
+
|
|
400
|
+
|
|
245
401
|
entry_type & Coef(int n, int m) { return sh.Coef(n,m); }
|
|
246
402
|
auto & SH() { return sh; }
|
|
247
403
|
const auto & SH() const { return sh; }
|
|
248
404
|
double Kappa() const { return kappa; }
|
|
249
|
-
double Scale() const { return
|
|
405
|
+
double Scale() const { return RADIAL::Scale(kappa, rtyp); }
|
|
406
|
+
double RTyp() const { return rtyp; }
|
|
250
407
|
int Order() const { return sh.Order(); }
|
|
251
408
|
|
|
252
|
-
|
|
409
|
+
SphericalExpansion Truncate(int neworder) const
|
|
253
410
|
{
|
|
254
411
|
if (neworder > sh.Order()) neworder=sh.Order();
|
|
255
|
-
|
|
412
|
+
SphericalExpansion nmp(neworder, kappa, rtyp);
|
|
256
413
|
nmp.sh.Coefs() = sh.Coefs().Range(sqr(neworder+1));
|
|
257
414
|
return nmp;
|
|
258
415
|
}
|
|
259
416
|
|
|
260
|
-
|
|
417
|
+
SphericalExpansion & operator+= (const SphericalExpansion & mp2)
|
|
261
418
|
{
|
|
262
419
|
size_t commonsize = min(SH().Coefs().Size(), mp2.SH().Coefs().Size());
|
|
263
420
|
SH().Coefs().Range(commonsize) += mp2.SH().Coefs().Range(commonsize);
|
|
@@ -268,18 +425,27 @@ namespace ngfem
|
|
|
268
425
|
entry_type EvalDirectionalDerivative (Vec<3> x, Vec<3> d) const;
|
|
269
426
|
|
|
270
427
|
void AddCharge (Vec<3> x, entry_type c);
|
|
271
|
-
void AddDipole (Vec<3> x, Vec<3>
|
|
272
|
-
void
|
|
428
|
+
void AddDipole (Vec<3> x, Vec<3> dir, entry_type c);
|
|
429
|
+
void AddChargeDipole (Vec<3> x, entry_type c, Vec<3> dir, entry_type c2)
|
|
430
|
+
{
|
|
431
|
+
// TODO: add them at once
|
|
432
|
+
AddCharge (x, c);
|
|
433
|
+
AddDipole (x, dir, c2);
|
|
434
|
+
}
|
|
273
435
|
|
|
436
|
+
void AddPlaneWave (Vec<3> d, entry_type c);
|
|
437
|
+
void AddCurrent (Vec<3> ap, Vec<3> ep, Complex j, int num=100);
|
|
274
438
|
|
|
275
|
-
|
|
439
|
+
|
|
440
|
+
void ChangeRTypTo (double new_rtyp)
|
|
276
441
|
{
|
|
277
|
-
double fac =
|
|
442
|
+
double fac = RADIAL::Scale(kappa, rtyp) / RADIAL::Scale(kappa, new_rtyp);
|
|
278
443
|
double prod = 1;
|
|
279
444
|
for (int n = 0; n <= sh.Order(); n++, prod*= fac)
|
|
280
445
|
sh.CoefsN(n) *= prod;
|
|
281
|
-
|
|
446
|
+
rtyp = new_rtyp;
|
|
282
447
|
}
|
|
448
|
+
|
|
283
449
|
|
|
284
450
|
Vector<double> Spectrum (bool scaled) const
|
|
285
451
|
{
|
|
@@ -288,14 +454,14 @@ namespace ngfem
|
|
|
288
454
|
for (int n = 0; n <= Order(); n++)
|
|
289
455
|
{
|
|
290
456
|
spec(n) = fac * L2Norm2(sh.CoefsN(n));
|
|
291
|
-
if (!scaled) fac *= sqr(
|
|
457
|
+
if (!scaled) fac *= sqr(Scale());
|
|
292
458
|
}
|
|
293
459
|
return spec;
|
|
294
460
|
}
|
|
295
461
|
|
|
296
462
|
|
|
297
463
|
template <typename TARGET>
|
|
298
|
-
void Transform (
|
|
464
|
+
void Transform (SphericalExpansion<TARGET,entry_type> & target, Vec<3> dist) const
|
|
299
465
|
{
|
|
300
466
|
if (target.SH().Order() < 0) return;
|
|
301
467
|
if (SH().Order() < 0)
|
|
@@ -304,24 +470,16 @@ namespace ngfem
|
|
|
304
470
|
return;
|
|
305
471
|
}
|
|
306
472
|
|
|
307
|
-
static Timer t("mptool Transform "+ToString(typeid(RADIAL).name())+ToString(typeid(TARGET).name()));
|
|
308
|
-
RegionTimer reg(t);
|
|
473
|
+
// static Timer t("mptool Transform "+ToString(typeid(RADIAL).name())+ToString(typeid(TARGET).name()));
|
|
474
|
+
// RegionTimer reg(t);
|
|
309
475
|
|
|
310
|
-
|
|
311
|
-
double theta, phi;
|
|
312
|
-
|
|
313
|
-
if (len < 1e-30)
|
|
314
|
-
theta = 0;
|
|
315
|
-
else
|
|
316
|
-
theta = acos (dist(2) / len);
|
|
317
|
-
|
|
318
|
-
if (sqr(dist(0))+sqr(dist(1)) < 1e-30)
|
|
319
|
-
phi = 0;
|
|
320
|
-
else
|
|
321
|
-
phi = atan2(dist(1), dist(0));
|
|
476
|
+
auto [len, theta, phi] = SphericalCoordinates(dist);
|
|
322
477
|
|
|
323
478
|
|
|
324
|
-
|
|
479
|
+
// SphericalExpansion<RADIAL,entry_type> tmp{*this};
|
|
480
|
+
SphericalExpansion<RADIAL,entry_type> tmp(Order(), kappa, rtyp);
|
|
481
|
+
tmp.SH().Coefs() = SH().Coefs();
|
|
482
|
+
|
|
325
483
|
tmp.SH().RotateZ(phi);
|
|
326
484
|
tmp.SH().RotateY(theta);
|
|
327
485
|
|
|
@@ -332,60 +490,213 @@ namespace ngfem
|
|
|
332
490
|
}
|
|
333
491
|
|
|
334
492
|
template <typename TARGET>
|
|
335
|
-
void TransformAdd (
|
|
493
|
+
void TransformAdd (SphericalExpansion<TARGET,entry_type> & target, Vec<3> dist, bool atomic = false) const
|
|
336
494
|
{
|
|
337
495
|
if (SH().Order() < 0) return;
|
|
338
496
|
if (target.SH().Order() < 0) return;
|
|
339
497
|
|
|
340
|
-
|
|
498
|
+
SphericalExpansion<TARGET,entry_type> tmp{target};
|
|
341
499
|
Transform(tmp, dist);
|
|
342
|
-
|
|
500
|
+
if (!atomic)
|
|
501
|
+
target.SH().Coefs() += tmp.SH().Coefs();
|
|
502
|
+
else
|
|
503
|
+
for (int j = 0; j < target.SH().Coefs().Size(); j++)
|
|
504
|
+
AtomicAdd(target.SH().Coefs()[j], tmp.SH().Coefs()[j]);
|
|
343
505
|
}
|
|
344
506
|
|
|
345
507
|
template <typename TARGET>
|
|
346
|
-
void ShiftZ (double z,
|
|
508
|
+
void ShiftZ (double z, SphericalExpansion<TARGET,entry_type> & target);
|
|
509
|
+
|
|
347
510
|
|
|
511
|
+
template <typename TARGET>
|
|
512
|
+
void In2Out (SphericalExpansion<TARGET,entry_type> & target, double r) const
|
|
513
|
+
{
|
|
514
|
+
Vector<Complex> rad(Order()+1);
|
|
515
|
+
Vector<Complex> radout(target.Order()+1);
|
|
516
|
+
RADIAL::Eval(Order(), kappa, r, RTyp(), rad);
|
|
517
|
+
TARGET::Eval(target.Order(), kappa, r, target.RTyp(), radout);
|
|
518
|
+
target.SH().Coefs() = 0;
|
|
519
|
+
for (int j = 0; j <= std::min(Order(), target.Order()); j++)
|
|
520
|
+
target.SH().CoefsN(j) = rad(j)/radout(j) * SH().CoefsN(j);
|
|
521
|
+
}
|
|
348
522
|
};
|
|
349
523
|
|
|
350
524
|
|
|
351
525
|
|
|
352
526
|
// ***************** parameters ****************
|
|
353
527
|
|
|
528
|
+
/*
|
|
354
529
|
static constexpr int MPOrder (double rho_kappa)
|
|
355
530
|
{
|
|
356
|
-
return max (20, int(2*rho_kappa));
|
|
531
|
+
// return max (20, int(2*rho_kappa));
|
|
532
|
+
return 20+int(2*rho_kappa);
|
|
357
533
|
}
|
|
358
534
|
static constexpr int maxdirect = 100;
|
|
535
|
+
*/
|
|
536
|
+
|
|
537
|
+
|
|
538
|
+
template <typename SCAL, auto S>
|
|
539
|
+
inline auto VecVector2Matrix (FlatVector<Vec<S,SCAL>> vec)
|
|
540
|
+
{
|
|
541
|
+
return FlatMatrixFixWidth<S,SCAL> (vec.Size(), vec.Data()->Data());
|
|
542
|
+
}
|
|
543
|
+
|
|
544
|
+
inline auto VecVector2Matrix (FlatVector<Complex> vec)
|
|
545
|
+
{
|
|
546
|
+
return FlatMatrixFixWidth<1,Complex> (vec.Size(), vec.Data());
|
|
547
|
+
}
|
|
359
548
|
|
|
360
549
|
|
|
361
550
|
template <typename entry_type=Complex>
|
|
362
|
-
class
|
|
551
|
+
class SingularMLExpansion
|
|
363
552
|
{
|
|
553
|
+
using simd_entry_type = decltype(MakeSimd(declval<std::array<entry_type,FMM_SW>>()));
|
|
364
554
|
static Array<size_t> nodes_on_level;
|
|
365
555
|
|
|
556
|
+
struct RecordingSS
|
|
557
|
+
{
|
|
558
|
+
const SphericalExpansion<Singular,entry_type> * mp_source;
|
|
559
|
+
SphericalExpansion<Singular,entry_type> * mp_target;
|
|
560
|
+
Vec<3> dist;
|
|
561
|
+
double len, theta, phi;
|
|
562
|
+
bool flipz;
|
|
563
|
+
public:
|
|
564
|
+
RecordingSS() = default;
|
|
565
|
+
RecordingSS (const SphericalExpansion<Singular,entry_type> * amp_source,
|
|
566
|
+
SphericalExpansion<Singular,entry_type> * amp_target,
|
|
567
|
+
Vec<3> adist)
|
|
568
|
+
: mp_source(amp_source), mp_target(amp_target), dist(adist)
|
|
569
|
+
{
|
|
570
|
+
std::tie(len, theta, phi) = SphericalCoordinates(adist);
|
|
571
|
+
// flipz = false;
|
|
572
|
+
flipz = theta > M_PI/2;
|
|
573
|
+
if (flipz) theta = M_PI-theta;
|
|
574
|
+
}
|
|
575
|
+
};
|
|
576
|
+
|
|
577
|
+
|
|
578
|
+
static void ProcessBatchSS(FlatArray<RecordingSS*> batch, double len, double theta) {
|
|
579
|
+
constexpr int vec_length = VecLength<entry_type>;
|
|
580
|
+
int batch_size = batch.Size();
|
|
581
|
+
int N = batch_size * vec_length;
|
|
582
|
+
// *testout << "Processing batch of size " << batch.Size() << ", with N = " << N << ", vec_length = " << vec_length << ", Type: " << typeid(entry_type).name() << ", len = " << len << ", theta = " << theta << endl;
|
|
583
|
+
|
|
584
|
+
if (N <= 1 || batch_size <= 1) {
|
|
585
|
+
for (auto* rec : batch) {
|
|
586
|
+
rec->mp_source->TransformAdd(*rec->mp_target, rec->dist, true);
|
|
587
|
+
}
|
|
588
|
+
}
|
|
589
|
+
else if (N <= 3) {
|
|
590
|
+
ProcessVectorizedBatchSS<3, vec_length>(batch, len, theta);
|
|
591
|
+
}
|
|
592
|
+
else if (N <= 4) {
|
|
593
|
+
ProcessVectorizedBatchSS<4, vec_length>(batch, len, theta);
|
|
594
|
+
}
|
|
595
|
+
else if (N <= 6) {
|
|
596
|
+
ProcessVectorizedBatchSS<6, vec_length>(batch, len, theta);
|
|
597
|
+
}
|
|
598
|
+
else if (N <= 12) {
|
|
599
|
+
ProcessVectorizedBatchSS<12, vec_length>(batch, len, theta);
|
|
600
|
+
}
|
|
601
|
+
else if (N <= 24) {
|
|
602
|
+
ProcessVectorizedBatchSS<24, vec_length>(batch, len, theta);
|
|
603
|
+
}
|
|
604
|
+
else if (N <= 48) {
|
|
605
|
+
ProcessVectorizedBatchSS<48, vec_length>(batch, len, theta);
|
|
606
|
+
}
|
|
607
|
+
else if (N <= 96) {
|
|
608
|
+
ProcessVectorizedBatchSS<96, vec_length>(batch, len, theta);
|
|
609
|
+
}
|
|
610
|
+
else if (N <= 192) {
|
|
611
|
+
ProcessVectorizedBatchSS<192, vec_length>(batch, len, theta);
|
|
612
|
+
}
|
|
613
|
+
else {
|
|
614
|
+
// Split large batches
|
|
615
|
+
ProcessBatchSS(batch.Range(0, 192 / vec_length), len, theta);
|
|
616
|
+
ProcessBatchSS(batch.Range(192 / vec_length, batch_size), len, theta);
|
|
617
|
+
}
|
|
618
|
+
}
|
|
619
|
+
|
|
620
|
+
template<int N, int vec_length>
|
|
621
|
+
static void ProcessVectorizedBatchSS(FlatArray<RecordingSS*> batch, double len, double theta) {
|
|
622
|
+
|
|
623
|
+
// *testout << "Processing vectorized S->S batch of size " << batch.Size() << ", with N = " << N << ", vec_length = " << vec_length << ", len = " << len << ", theta = " << theta << endl;
|
|
624
|
+
double kappa = batch[0]->mp_source->Kappa();
|
|
625
|
+
int so = batch[0]->mp_source->Order();
|
|
626
|
+
int to = batch[0]->mp_target->Order();
|
|
627
|
+
SphericalExpansion<Singular, Vec<N,Complex>> vec_source(so, kappa, batch[0]->mp_source->RTyp());
|
|
628
|
+
SphericalExpansion<Singular, Vec<N,Complex>> vec_target(to, kappa, batch[0]->mp_target->RTyp());
|
|
629
|
+
|
|
630
|
+
// Copy multipoles into vectorized multipole
|
|
631
|
+
for (int i = 0; i < batch.Size(); i++)
|
|
632
|
+
{
|
|
633
|
+
auto source_i = VecVector2Matrix (batch[i]->mp_source->SH().Coefs());
|
|
634
|
+
auto source_mati = VecVector2Matrix (vec_source.SH().Coefs()).Cols(i*vec_length, (i+1)*vec_length);
|
|
635
|
+
batch[i]->mp_source->SH().RotateZFlip(batch[i]->phi, batch[i]->flipz,
|
|
636
|
+
[source_i, source_mati] (size_t ii, Complex factor)
|
|
637
|
+
{
|
|
638
|
+
source_mati.Row(ii) = factor * source_i.Row(ii);
|
|
639
|
+
});
|
|
640
|
+
}
|
|
641
|
+
|
|
642
|
+
vec_source.SH().RotateY(theta, vec_source.SH().Order() >= 100);
|
|
643
|
+
vec_source.ShiftZ(-len, vec_target);
|
|
644
|
+
vec_target.SH().RotateY(-theta, vec_target.SH().Order() >= 100);
|
|
645
|
+
|
|
646
|
+
// Copy vectorized multipole into individual multipoles
|
|
647
|
+
for (int i = 0; i < batch.Size(); i++)
|
|
648
|
+
{
|
|
649
|
+
auto source_mati = VecVector2Matrix (vec_target.SH().Coefs()).Cols(i*vec_length, (i+1)*vec_length);
|
|
650
|
+
auto target_mati = VecVector2Matrix (batch[i]->mp_target->SH().Coefs());
|
|
651
|
+
batch[i]->mp_target->SH().RotateZFlip(-batch[i]->phi, batch[i]->flipz,
|
|
652
|
+
[source_mati, target_mati] (size_t ii, Complex factor)
|
|
653
|
+
{
|
|
654
|
+
AtomicAdd (target_mati.Row(ii), factor * source_mati.Row(ii));
|
|
655
|
+
});
|
|
656
|
+
}
|
|
657
|
+
}
|
|
658
|
+
|
|
366
659
|
struct Node
|
|
367
660
|
{
|
|
368
661
|
Vec<3> center;
|
|
369
662
|
double r;
|
|
370
663
|
int level;
|
|
371
664
|
std::array<unique_ptr<Node>,8> childs;
|
|
372
|
-
|
|
665
|
+
SphericalExpansion<Singular, entry_type> mp;
|
|
373
666
|
|
|
374
667
|
Array<tuple<Vec<3>, entry_type>> charges;
|
|
375
668
|
Array<tuple<Vec<3>, Vec<3>, entry_type>> dipoles;
|
|
669
|
+
Array<tuple<Vec<3>, entry_type, Vec<3>, entry_type>> chargedipoles;
|
|
376
670
|
Array<tuple<Vec<3>, Vec<3>, Complex,int>> currents;
|
|
671
|
+
|
|
672
|
+
using simd_entry_type = decltype(MakeSimd(declval<std::array<entry_type,FMM_SW>>()));
|
|
673
|
+
Array<tuple<Vec<3,SIMD<double,FMM_SW>>, simd_entry_type>> simd_charges;
|
|
674
|
+
Array<tuple<Vec<3,SIMD<double,FMM_SW>>, Vec<3,SIMD<double,FMM_SW>>, simd_entry_type>> simd_dipoles;
|
|
675
|
+
Array<tuple<Vec<3,SIMD<double,FMM_SW>>, simd_entry_type,
|
|
676
|
+
Vec<3,SIMD<double,FMM_SW>>, simd_entry_type>> simd_chargedipoles;
|
|
677
|
+
|
|
377
678
|
int total_sources;
|
|
679
|
+
const FMM_Parameters & fmm_params;
|
|
680
|
+
std::mutex node_mutex;
|
|
681
|
+
atomic<bool> have_childs{false};
|
|
378
682
|
|
|
379
|
-
Node (Vec<3> acenter, double ar, int alevel,
|
|
380
|
-
|
|
381
|
-
|
|
683
|
+
Node (Vec<3> acenter, double ar, int alevel, double akappa, const FMM_Parameters & afmm_params)
|
|
684
|
+
// : center(acenter), r(ar), level(alevel), mp(MPOrder(ar*akappa), akappa, ar), fmm_params(afmm_params)
|
|
685
|
+
: center(acenter), r(ar), level(alevel), mp(afmm_params.minorder+2*ar*akappa, akappa, ar), fmm_params(afmm_params)
|
|
382
686
|
{
|
|
383
|
-
// cout << "singml, add node, level = " << level << endl;
|
|
384
687
|
if (level < nodes_on_level.Size())
|
|
385
688
|
nodes_on_level[level]++;
|
|
386
689
|
}
|
|
387
690
|
|
|
388
|
-
|
|
691
|
+
int GetChildNum (Vec<3> x) const
|
|
692
|
+
{
|
|
693
|
+
int childnum = 0;
|
|
694
|
+
if (x(0) > center(0)) childnum += 1;
|
|
695
|
+
if (x(1) > center(1)) childnum += 2;
|
|
696
|
+
if (x(2) > center(2)) childnum += 4;
|
|
697
|
+
return childnum;
|
|
698
|
+
}
|
|
699
|
+
|
|
389
700
|
void CreateChilds()
|
|
390
701
|
{
|
|
391
702
|
if (childs[0]) throw Exception("have already childs");
|
|
@@ -395,78 +706,127 @@ namespace ngfem
|
|
|
395
706
|
cc(0) += (i&1) ? r/2 : -r/2;
|
|
396
707
|
cc(1) += (i&2) ? r/2 : -r/2;
|
|
397
708
|
cc(2) += (i&4) ? r/2 : -r/2;
|
|
398
|
-
childs[i] = make_unique<Node> (cc, r/2, level+1,
|
|
709
|
+
childs[i] = make_unique<Node> (cc, r/2, level+1, mp.Kappa(), fmm_params);
|
|
399
710
|
}
|
|
711
|
+
have_childs = true;
|
|
400
712
|
}
|
|
401
713
|
|
|
402
714
|
|
|
715
|
+
void SendSourcesToChilds()
|
|
716
|
+
{
|
|
717
|
+
CreateChilds();
|
|
718
|
+
|
|
719
|
+
for (auto [x,c] : charges)
|
|
720
|
+
AddCharge (x,c);
|
|
721
|
+
for (auto [x,d,c] : dipoles)
|
|
722
|
+
AddDipole (x,d,c);
|
|
723
|
+
for (auto [x,c,d,c2] : chargedipoles)
|
|
724
|
+
AddChargeDipole (x,c,d,c2);
|
|
725
|
+
for (auto [sp,ep,j,num] : currents)
|
|
726
|
+
AddCurrent (sp,ep,j,num);
|
|
727
|
+
|
|
728
|
+
charges.DeleteAll();
|
|
729
|
+
dipoles.DeleteAll();
|
|
730
|
+
chargedipoles.DeleteAll();
|
|
731
|
+
currents.DeleteAll();
|
|
732
|
+
}
|
|
733
|
+
|
|
734
|
+
|
|
403
735
|
void AddCharge (Vec<3> x, entry_type c)
|
|
404
736
|
{
|
|
405
|
-
if (
|
|
737
|
+
if (have_childs) // quick check without locking
|
|
406
738
|
{
|
|
407
739
|
// directly send to childs:
|
|
408
|
-
int childnum
|
|
409
|
-
if (x(0) > center(0)) childnum += 1;
|
|
410
|
-
if (x(1) > center(1)) childnum += 2;
|
|
411
|
-
if (x(2) > center(2)) childnum += 4;
|
|
740
|
+
int childnum = GetChildNum(x);
|
|
412
741
|
childs[childnum] -> AddCharge(x, c);
|
|
413
742
|
return;
|
|
414
743
|
}
|
|
415
744
|
|
|
416
|
-
|
|
745
|
+
lock_guard<mutex> guard(node_mutex);
|
|
417
746
|
|
|
418
|
-
if (
|
|
419
|
-
|
|
420
|
-
|
|
747
|
+
if (have_childs) // test again after locking
|
|
748
|
+
{
|
|
749
|
+
int childnum = GetChildNum(x);
|
|
750
|
+
childs[childnum] -> AddCharge(x, c);
|
|
751
|
+
return;
|
|
752
|
+
}
|
|
421
753
|
|
|
422
|
-
|
|
754
|
+
charges.Append( tuple{x,c} );
|
|
423
755
|
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
for (auto [sp,ep,j,num] : currents)
|
|
429
|
-
AddCurrent (sp,ep,j,num);
|
|
756
|
+
// if (r*mp.Kappa() < 1e-8) return;
|
|
757
|
+
if (level > 20) return;
|
|
758
|
+
if (charges.Size() < fmm_params.maxdirect && r*mp.Kappa() < 5)
|
|
759
|
+
return;
|
|
430
760
|
|
|
431
|
-
|
|
432
|
-
dipoles.SetSize0();
|
|
433
|
-
currents.SetSize0();
|
|
761
|
+
SendSourcesToChilds();
|
|
434
762
|
}
|
|
435
763
|
|
|
436
764
|
|
|
437
765
|
void AddDipole (Vec<3> x, Vec<3> d, entry_type c)
|
|
438
766
|
{
|
|
439
|
-
if (
|
|
767
|
+
if (have_childs)
|
|
440
768
|
{
|
|
441
769
|
// directly send to childs:
|
|
442
|
-
|
|
443
|
-
int childnum = 0;
|
|
444
|
-
if (x(0) > center(0)) childnum += 1;
|
|
445
|
-
if (x(1) > center(1)) childnum += 2;
|
|
446
|
-
if (x(2) > center(2)) childnum += 4;
|
|
770
|
+
int childnum = GetChildNum(x);
|
|
447
771
|
childs[childnum] -> AddDipole(x, d, c);
|
|
448
772
|
return;
|
|
449
773
|
}
|
|
450
774
|
|
|
451
|
-
|
|
775
|
+
lock_guard<mutex> guard(node_mutex);
|
|
452
776
|
|
|
453
|
-
if (
|
|
777
|
+
if (have_childs)
|
|
778
|
+
{
|
|
779
|
+
// directly send to childs:
|
|
780
|
+
int childnum = GetChildNum(x);
|
|
781
|
+
childs[childnum] -> AddDipole(x, d, c);
|
|
782
|
+
return;
|
|
783
|
+
}
|
|
784
|
+
|
|
785
|
+
dipoles.Append (tuple{x,d,c});
|
|
786
|
+
|
|
787
|
+
if (level > 20) return;
|
|
788
|
+
if (dipoles.Size() < fmm_params.maxdirect)
|
|
454
789
|
return;
|
|
790
|
+
|
|
791
|
+
SendSourcesToChilds();
|
|
792
|
+
}
|
|
793
|
+
|
|
794
|
+
|
|
795
|
+
void AddChargeDipole (Vec<3> x, entry_type c, Vec<3> dir, entry_type c2)
|
|
796
|
+
{
|
|
797
|
+
if (have_childs)
|
|
798
|
+
{
|
|
799
|
+
// directly send to childs:
|
|
800
|
+
int childnum = GetChildNum(x);
|
|
801
|
+
childs[childnum] -> AddChargeDipole(x, c, dir, c2);
|
|
802
|
+
return;
|
|
803
|
+
}
|
|
804
|
+
|
|
805
|
+
lock_guard<mutex> guard(node_mutex);
|
|
806
|
+
|
|
807
|
+
if (have_childs)
|
|
808
|
+
{
|
|
809
|
+
// directly send to childs:
|
|
810
|
+
int childnum = GetChildNum(x);
|
|
811
|
+
childs[childnum] -> AddChargeDipole(x, c, dir, c2);
|
|
812
|
+
return;
|
|
813
|
+
}
|
|
455
814
|
|
|
456
|
-
|
|
815
|
+
chargedipoles.Append (tuple{x,c,dir,c2});
|
|
457
816
|
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
for (auto [x,d,c] : dipoles)
|
|
461
|
-
AddDipole (x,d,c);
|
|
462
|
-
for (auto [sp,ep,j,num] : currents)
|
|
463
|
-
AddCurrent (sp,ep,j,num);
|
|
817
|
+
if (chargedipoles.Size() < fmm_params.maxdirect || r < 1e-8)
|
|
818
|
+
return;
|
|
464
819
|
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
820
|
+
SendSourcesToChilds();
|
|
821
|
+
|
|
822
|
+
/*
|
|
823
|
+
AddCharge (x, c);
|
|
824
|
+
AddDipole (x, dir, c2);
|
|
825
|
+
*/
|
|
468
826
|
}
|
|
469
827
|
|
|
828
|
+
|
|
829
|
+
// not parallel yet
|
|
470
830
|
void AddCurrent (Vec<3> sp, Vec<3> ep, Complex j, int num)
|
|
471
831
|
{
|
|
472
832
|
if (childs[0])
|
|
@@ -475,7 +835,7 @@ namespace ngfem
|
|
|
475
835
|
Array<double> split;
|
|
476
836
|
split.Append(0);
|
|
477
837
|
for (int i = 0; i < 3; i++)
|
|
478
|
-
if (sp(i) < center(i) != ep(i) < center(i))
|
|
838
|
+
if ((sp(i) < center(i)) != (ep(i) < center(i)))
|
|
479
839
|
split += (center(i)-sp(i)) / (ep(i)-sp(i)); // segment cuts i-th coordinate plane
|
|
480
840
|
split.Append(1);
|
|
481
841
|
BubbleSort(split);
|
|
@@ -496,9 +856,15 @@ namespace ngfem
|
|
|
496
856
|
}
|
|
497
857
|
return;
|
|
498
858
|
}
|
|
499
|
-
|
|
859
|
+
|
|
500
860
|
currents.Append (tuple{sp,ep,j,num});
|
|
501
861
|
|
|
862
|
+
// if (currents.Size() < maxdirect || r < 1e-8)
|
|
863
|
+
if (currents.Size() < 4 || r < 1e-8)
|
|
864
|
+
return;
|
|
865
|
+
|
|
866
|
+
SendSourcesToChilds();
|
|
867
|
+
/*
|
|
502
868
|
// if (currents.Size() < maxdirect || r < 1e-8)
|
|
503
869
|
if (currents.Size() < 4 || r < 1e-8)
|
|
504
870
|
return;
|
|
@@ -515,6 +881,7 @@ namespace ngfem
|
|
|
515
881
|
charges.SetSize0();
|
|
516
882
|
dipoles.SetSize0();
|
|
517
883
|
currents.SetSize0();
|
|
884
|
+
*/
|
|
518
885
|
}
|
|
519
886
|
|
|
520
887
|
|
|
@@ -530,47 +897,169 @@ namespace ngfem
|
|
|
530
897
|
return sum;
|
|
531
898
|
}
|
|
532
899
|
|
|
533
|
-
|
|
534
|
-
if (double rho = L2Norm(p-x); rho > 0)
|
|
535
|
-
sum += (1/(4*M_PI))*exp(Complex(0,rho*mp.Kappa())) / rho * c;
|
|
536
|
-
|
|
537
|
-
for (auto [x,d,c] : dipoles)
|
|
538
|
-
if (double rho = L2Norm(p-x); rho > 0)
|
|
539
|
-
{
|
|
540
|
-
Vec<3> drhodp = 1.0/rho * (p-x);
|
|
541
|
-
Complex dGdrho = (1/(4*M_PI))*exp(Complex(0,rho*mp.Kappa())) *
|
|
542
|
-
(Complex(0, mp.Kappa())/rho - 1.0/sqr(rho));
|
|
543
|
-
sum += dGdrho * InnerProduct(drhodp, d) * c;
|
|
544
|
-
}
|
|
545
|
-
|
|
546
|
-
for (auto [sp,ep,j,num] : currents)
|
|
900
|
+
if (simd_charges.Size())
|
|
547
901
|
{
|
|
548
|
-
//
|
|
902
|
+
// static Timer t("mptool singmp, evaluate, simd charges"); RegionTimer r(t);
|
|
903
|
+
// t.AddFlops (charges.Size());
|
|
549
904
|
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
for (int i = 0; i < num; i++)
|
|
905
|
+
simd_entry_type vsum{0.0};
|
|
906
|
+
if (mp.Kappa() < 1e-12)
|
|
553
907
|
{
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
if (double rho = L2Norm(p-x); rho > 0)
|
|
908
|
+
for (auto [x,c] : simd_charges)
|
|
557
909
|
{
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
910
|
+
auto rho = L2Norm(p-x);
|
|
911
|
+
auto kernel = 1/(4*M_PI)/rho;
|
|
912
|
+
kernel = If(rho > 0.0, kernel, SIMD<double,FMM_SW>(0.0));
|
|
913
|
+
vsum += kernel * c;
|
|
914
|
+
|
|
915
|
+
/*
|
|
916
|
+
auto rho2 = L2Norm2(p-x);
|
|
917
|
+
auto kernel = (1/(4*M_PI)) * rsqrt(rho2);
|
|
918
|
+
kernel = If(rho2 > 0.0, kernel, SIMD<double,FMM_SW>(0.0));
|
|
919
|
+
vsum += kernel * c;
|
|
920
|
+
*/
|
|
564
921
|
}
|
|
565
922
|
}
|
|
566
|
-
|
|
567
|
-
|
|
923
|
+
else if (mp.Kappa() < 1e-8)
|
|
924
|
+
for (auto [x,c] : simd_charges)
|
|
925
|
+
{
|
|
926
|
+
auto rho = L2Norm(p-x);
|
|
927
|
+
auto kernel = (1/(4*M_PI))*SIMD<Complex,FMM_SW> (1,rho*mp.Kappa()) / rho;
|
|
928
|
+
kernel = If(rho > 0.0, kernel, SIMD<Complex,FMM_SW>(0.0));
|
|
929
|
+
vsum += kernel * c;
|
|
930
|
+
}
|
|
931
|
+
else
|
|
932
|
+
for (auto [x,c] : simd_charges)
|
|
933
|
+
{
|
|
934
|
+
auto rho = L2Norm(p-x);
|
|
935
|
+
auto [si,co] = sincos(rho*mp.Kappa());
|
|
936
|
+
auto kernel = (1/(4*M_PI))*SIMD<Complex,FMM_SW>(co,si) / rho;
|
|
937
|
+
kernel = If(rho > 0.0, kernel, SIMD<Complex,FMM_SW>(0.0));
|
|
938
|
+
vsum += kernel * c;
|
|
939
|
+
}
|
|
940
|
+
|
|
941
|
+
sum += HSum(vsum);
|
|
942
|
+
}
|
|
943
|
+
else
|
|
944
|
+
{
|
|
945
|
+
if (mp.Kappa() < 1e-8)
|
|
946
|
+
{
|
|
947
|
+
for (auto [x,c] : charges)
|
|
948
|
+
if (double rho = L2Norm(p-x); rho > 0)
|
|
949
|
+
sum += (1/(4*M_PI))*Complex(1,rho*mp.Kappa()) / rho * c;
|
|
950
|
+
}
|
|
951
|
+
else
|
|
952
|
+
for (auto [x,c] : charges)
|
|
953
|
+
if (double rho = L2Norm(p-x); rho > 0)
|
|
954
|
+
sum += (1/(4*M_PI))*exp(Complex(0,rho*mp.Kappa())) / rho * c;
|
|
955
|
+
}
|
|
956
|
+
|
|
957
|
+
if (simd_dipoles.Size())
|
|
958
|
+
{
|
|
959
|
+
// static Timer t("mptool singmp, evaluate, simd dipoles"); RegionTimer r(t);
|
|
960
|
+
|
|
961
|
+
simd_entry_type vsum{0.0};
|
|
962
|
+
for (auto [x,d,c] : simd_dipoles)
|
|
963
|
+
{
|
|
964
|
+
auto rho = L2Norm(p-x);
|
|
965
|
+
auto drhodp = (1.0/rho) * (p-x);
|
|
966
|
+
auto [si,co] = sincos(rho*mp.Kappa());
|
|
967
|
+
auto dGdrho = (1/(4*M_PI))*SIMD<Complex,FMM_SW>(co,si) *
|
|
968
|
+
(-1.0/(rho*rho) + SIMD<Complex,FMM_SW>(0, mp.Kappa())/rho);
|
|
969
|
+
auto kernel = dGdrho * InnerProduct(drhodp, d);
|
|
970
|
+
kernel = If(rho > 0.0, kernel, SIMD<Complex,FMM_SW>(0.0));
|
|
971
|
+
vsum += kernel * c;
|
|
972
|
+
}
|
|
973
|
+
sum += HSum(vsum);
|
|
974
|
+
}
|
|
975
|
+
else
|
|
976
|
+
{
|
|
977
|
+
for (auto [x,d,c] : dipoles)
|
|
978
|
+
if (double rho = L2Norm(p-x); rho > 0)
|
|
979
|
+
{
|
|
980
|
+
Vec<3> drhodp = 1.0/rho * (p-x);
|
|
981
|
+
Complex dGdrho = (1/(4*M_PI))*exp(Complex(0,rho*mp.Kappa())) *
|
|
982
|
+
(Complex(0, mp.Kappa())/rho - 1.0/sqr(rho));
|
|
983
|
+
sum += dGdrho * InnerProduct(drhodp, d) * c;
|
|
984
|
+
}
|
|
985
|
+
}
|
|
986
|
+
|
|
987
|
+
|
|
988
|
+
|
|
989
|
+
if (simd_chargedipoles.Size())
|
|
990
|
+
{
|
|
991
|
+
// static Timer t("mptool singmp, evaluate, simd chargedipoles"); RegionTimer r(t);
|
|
992
|
+
// t.AddFlops (simd_chargedipoles.Size()*FMM_SW);
|
|
993
|
+
|
|
994
|
+
simd_entry_type vsum{0.0};
|
|
995
|
+
for (auto [x,c,d,c2] : simd_chargedipoles)
|
|
996
|
+
{
|
|
997
|
+
auto rho = L2Norm(p-x);
|
|
998
|
+
auto rhokappa = rho*mp.Kappa();
|
|
999
|
+
auto invrho = If(rho>0.0, 1.0/rho, SIMD<double,FMM_SW>(0.0));
|
|
1000
|
+
auto [si,co] = sincos(rhokappa);
|
|
1001
|
+
|
|
1002
|
+
auto kernelc = (1/(4*M_PI))*invrho*SIMD<Complex,FMM_SW>(co,si);
|
|
1003
|
+
vsum += kernelc * c;
|
|
1004
|
+
|
|
1005
|
+
auto kernel =
|
|
1006
|
+
invrho*invrho * InnerProduct(p-x, d) *
|
|
1007
|
+
kernelc * SIMD<Complex,FMM_SW>(-1.0, rhokappa);
|
|
1008
|
+
|
|
1009
|
+
vsum += kernel * c2;
|
|
1010
|
+
}
|
|
1011
|
+
sum += HSum(vsum);
|
|
1012
|
+
}
|
|
1013
|
+
else
|
|
1014
|
+
{
|
|
1015
|
+
// static Timer t("mptool singmp, evaluate, chargedipoles"); RegionTimer r(t);
|
|
1016
|
+
// t.AddFlops (chargedipoles.Size());
|
|
1017
|
+
|
|
1018
|
+
for (auto [x,c,d,c2] : chargedipoles)
|
|
1019
|
+
if (double rho = L2Norm(p-x); rho > 0)
|
|
1020
|
+
{
|
|
1021
|
+
sum += (1/(4*M_PI))*exp(Complex(0,rho*mp.Kappa())) / rho * c;
|
|
1022
|
+
|
|
1023
|
+
Vec<3> drhodp = 1.0/rho * (p-x);
|
|
1024
|
+
Complex dGdrho = (1/(4*M_PI))*exp(Complex(0,rho*mp.Kappa())) *
|
|
1025
|
+
(Complex(0, mp.Kappa())/rho - 1.0/sqr(rho));
|
|
1026
|
+
|
|
1027
|
+
sum += dGdrho * InnerProduct(drhodp, d) * c2;
|
|
1028
|
+
}
|
|
1029
|
+
}
|
|
1030
|
+
|
|
1031
|
+
|
|
1032
|
+
|
|
1033
|
+
|
|
1034
|
+
|
|
1035
|
+
for (auto [sp,ep,j,num] : currents)
|
|
1036
|
+
{
|
|
1037
|
+
// should use explizit formula instead ...
|
|
1038
|
+
|
|
1039
|
+
Vec<3> tau = ep-sp;
|
|
1040
|
+
Vec<3> tau_num = 1.0/num * tau;
|
|
1041
|
+
for (int i = 0; i < num; i++)
|
|
1042
|
+
{
|
|
1043
|
+
Vec<3> x = sp+(i+0.5)*tau_num;
|
|
1044
|
+
|
|
1045
|
+
if (double rho = L2Norm(p-x); rho > 0)
|
|
1046
|
+
{
|
|
1047
|
+
Vec<3> drhodp = 1.0/rho * (p-x);
|
|
1048
|
+
Complex dGdrho = (1/(4*M_PI))*exp(Complex(0,rho*mp.Kappa())) *
|
|
1049
|
+
(Complex(0, mp.Kappa())/rho - 1.0/sqr(rho));
|
|
1050
|
+
|
|
1051
|
+
if constexpr (std::is_same<entry_type, Vec<3,Complex>>())
|
|
1052
|
+
sum += j*dGdrho * Cross(drhodp, tau_num);
|
|
1053
|
+
}
|
|
1054
|
+
}
|
|
1055
|
+
}
|
|
1056
|
+
|
|
568
1057
|
return sum;
|
|
569
1058
|
}
|
|
570
1059
|
|
|
571
1060
|
entry_type EvaluateDeriv(Vec<3> p, Vec<3> d) const
|
|
572
1061
|
{
|
|
573
|
-
entry_type sum
|
|
1062
|
+
entry_type sum{0.0};
|
|
574
1063
|
if (childs[0])
|
|
575
1064
|
{
|
|
576
1065
|
for (auto & child : childs)
|
|
@@ -579,7 +1068,16 @@ namespace ngfem
|
|
|
579
1068
|
}
|
|
580
1069
|
|
|
581
1070
|
if (dipoles.Size())
|
|
582
|
-
|
|
1071
|
+
{
|
|
1072
|
+
static int cnt = 0;
|
|
1073
|
+
cnt++;
|
|
1074
|
+
if (cnt < 3)
|
|
1075
|
+
cout << "we know what we do - evaluateDeriv not implemented for dipoles in SingularMLExpansion" << endl;
|
|
1076
|
+
// return sum;
|
|
1077
|
+
// throw Exception("EvaluateDeriv not implemented for dipoles in SingularMLExpansion");
|
|
1078
|
+
}
|
|
1079
|
+
if (chargedipoles.Size())
|
|
1080
|
+
throw Exception("EvaluateDeriv not implemented for dipoles in SingularMLExpansion");
|
|
583
1081
|
|
|
584
1082
|
for (auto [x,c] : charges)
|
|
585
1083
|
if (double rho = L2Norm(p-x); rho > 0)
|
|
@@ -594,7 +1092,7 @@ namespace ngfem
|
|
|
594
1092
|
|
|
595
1093
|
void CalcTotalSources()
|
|
596
1094
|
{
|
|
597
|
-
total_sources = charges.Size() + dipoles.Size();
|
|
1095
|
+
total_sources = charges.Size() + dipoles.Size() + chargedipoles.Size();
|
|
598
1096
|
for (auto & child : childs)
|
|
599
1097
|
if (child)
|
|
600
1098
|
{
|
|
@@ -603,46 +1101,111 @@ namespace ngfem
|
|
|
603
1101
|
}
|
|
604
1102
|
}
|
|
605
1103
|
|
|
606
|
-
void CalcMP()
|
|
1104
|
+
void CalcMP(Array<RecordingSS> * recording, Array<Node*> * nodes_to_process)
|
|
607
1105
|
{
|
|
608
|
-
mp.SH().Coefs() = 0.0;
|
|
1106
|
+
// mp.SH().Coefs() = 0.0;
|
|
609
1107
|
if (childs[0])
|
|
610
1108
|
{
|
|
611
|
-
if (total_sources < 1000)
|
|
1109
|
+
if (total_sources < 1000 || recording)
|
|
612
1110
|
for (auto & child : childs)
|
|
613
|
-
child->CalcMP();
|
|
1111
|
+
child->CalcMP(recording, nodes_to_process);
|
|
614
1112
|
else
|
|
615
1113
|
ParallelFor (8, [&] (int nr)
|
|
616
1114
|
{
|
|
617
|
-
childs[nr] -> CalcMP();
|
|
1115
|
+
childs[nr] -> CalcMP(recording, nodes_to_process);
|
|
618
1116
|
});
|
|
619
1117
|
|
|
620
1118
|
|
|
621
|
-
for (auto & child : childs)
|
|
622
|
-
child->mp.
|
|
1119
|
+
for (auto & child : childs){
|
|
1120
|
+
if (recording && child->mp.SH().Coefs().Size() > 0)
|
|
1121
|
+
*recording += RecordingSS(&child->mp, &mp, center-child->center);
|
|
1122
|
+
else
|
|
1123
|
+
child->mp.TransformAdd(mp, center-child->center);
|
|
1124
|
+
}
|
|
623
1125
|
}
|
|
624
1126
|
else
|
|
625
1127
|
{
|
|
626
|
-
if (charges.Size()+dipoles.Size()+currents.Size() == 0)
|
|
1128
|
+
if (charges.Size()+dipoles.Size()+chargedipoles.Size()+currents.Size() == 0)
|
|
627
1129
|
{
|
|
628
|
-
mp =
|
|
1130
|
+
mp = SphericalExpansion<Singular,entry_type> (-1, mp.Kappa(), 1.);
|
|
629
1131
|
return;
|
|
630
1132
|
}
|
|
631
1133
|
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
for (
|
|
636
|
-
|
|
1134
|
+
// make simd charges, comment this block for testing ...
|
|
1135
|
+
simd_charges.SetSize( (charges.Size()+FMM_SW-1)/FMM_SW);
|
|
1136
|
+
size_t i = 0, ii = 0;
|
|
1137
|
+
for ( ; i+FMM_SW <= charges.Size(); i+=FMM_SW, ii++)
|
|
1138
|
+
{
|
|
1139
|
+
std::array<tuple<Vec<3>,entry_type>, FMM_SW> ca;
|
|
1140
|
+
for (int j = 0; j < FMM_SW; j++) ca[j] = charges[i+j];
|
|
1141
|
+
simd_charges[ii] = MakeSimd(ca);
|
|
1142
|
+
}
|
|
1143
|
+
if (i < charges.Size())
|
|
1144
|
+
{
|
|
1145
|
+
std::array<tuple<Vec<3>,entry_type>, FMM_SW> ca;
|
|
1146
|
+
int j = 0;
|
|
1147
|
+
for ( ; i+j < charges.Size(); j++) ca[j] = charges[i+j];
|
|
1148
|
+
for ( ; j < FMM_SW; j++) ca[j] = tuple( get<0>(ca[0]), entry_type{0.0} );
|
|
1149
|
+
simd_charges[ii] = MakeSimd(ca);
|
|
1150
|
+
}
|
|
1151
|
+
|
|
1152
|
+
simd_dipoles.SetSize( (dipoles.Size()+FMM_SW-1)/FMM_SW);
|
|
1153
|
+
i = 0, ii = 0;
|
|
1154
|
+
for ( ; i+FMM_SW <= dipoles.Size(); i+=FMM_SW, ii++)
|
|
1155
|
+
{
|
|
1156
|
+
std::array<tuple<Vec<3>,Vec<3>,entry_type>, FMM_SW> di;
|
|
1157
|
+
for (int j = 0; j < FMM_SW; j++) di[j] = dipoles[i+j];
|
|
1158
|
+
simd_dipoles[ii] = MakeSimd(di);
|
|
1159
|
+
}
|
|
1160
|
+
if (i < dipoles.Size())
|
|
1161
|
+
{
|
|
1162
|
+
std::array<tuple<Vec<3>,Vec<3>,entry_type>, FMM_SW> di;
|
|
1163
|
+
int j = 0;
|
|
1164
|
+
for ( ; i+j < dipoles.Size(); j++) di[j] = dipoles[i+j];
|
|
1165
|
+
for ( ; j < FMM_SW; j++) di[j] = tuple( get<0>(di[0]), get<1>(di[0]), entry_type{0.0} );
|
|
1166
|
+
simd_dipoles[ii] = MakeSimd(di);
|
|
1167
|
+
}
|
|
637
1168
|
|
|
638
|
-
|
|
639
|
-
|
|
1169
|
+
|
|
1170
|
+
simd_chargedipoles.SetSize( (chargedipoles.Size()+FMM_SW-1)/FMM_SW);
|
|
1171
|
+
i = 0, ii = 0;
|
|
1172
|
+
for ( ; i+FMM_SW <= chargedipoles.Size(); i+=FMM_SW, ii++)
|
|
1173
|
+
{
|
|
1174
|
+
std::array<tuple<Vec<3>,entry_type,Vec<3>,entry_type>, FMM_SW> di;
|
|
1175
|
+
for (int j = 0; j < FMM_SW; j++) di[j] = chargedipoles[i+j];
|
|
1176
|
+
simd_chargedipoles[ii] = MakeSimd(di);
|
|
1177
|
+
}
|
|
1178
|
+
if (i < chargedipoles.Size())
|
|
1179
|
+
{
|
|
1180
|
+
std::array<tuple<Vec<3>,entry_type,Vec<3>,entry_type>, FMM_SW> di;
|
|
1181
|
+
int j = 0;
|
|
1182
|
+
for ( ; i+j < chargedipoles.Size(); j++) di[j] = chargedipoles[i+j];
|
|
1183
|
+
for ( ; j < FMM_SW; j++) di[j] = tuple( get<0>(di[0]), entry_type{0.0}, get<2>(di[0]), entry_type{0.0} );
|
|
1184
|
+
simd_chargedipoles[ii] = MakeSimd(di);
|
|
1185
|
+
}
|
|
1186
|
+
|
|
1187
|
+
|
|
1188
|
+
if (nodes_to_process)
|
|
1189
|
+
*nodes_to_process += this;
|
|
1190
|
+
else {
|
|
1191
|
+
for (auto [x,c] : charges)
|
|
1192
|
+
mp.AddCharge (x-center,c);
|
|
1193
|
+
|
|
1194
|
+
for (auto [x,d,c] : dipoles)
|
|
1195
|
+
mp.AddDipole (x-center, d, c);
|
|
1196
|
+
|
|
1197
|
+
for (auto [x,c,d,c2] : chargedipoles)
|
|
1198
|
+
mp.AddChargeDipole (x-center, c, d, c2);
|
|
1199
|
+
|
|
1200
|
+
for (auto [sp,ep,j,num] : currents)
|
|
1201
|
+
mp.AddCurrent (sp-center, ep-center, j, num);
|
|
1202
|
+
}
|
|
640
1203
|
}
|
|
641
1204
|
}
|
|
642
1205
|
|
|
643
1206
|
entry_type EvaluateMP(Vec<3> p) const
|
|
644
1207
|
{
|
|
645
|
-
if (charges.Size() || dipoles.Size())
|
|
1208
|
+
if (charges.Size() || dipoles.Size() || chargedipoles.Size())
|
|
646
1209
|
return Evaluate(p);
|
|
647
1210
|
|
|
648
1211
|
if (L2Norm(p-center) > 3*r)
|
|
@@ -662,7 +1225,7 @@ namespace ngfem
|
|
|
662
1225
|
// cout << "EvaluateMPDeriv Singular, p = " << p << ", d = " << d << ", r = " << r << ", center = " << center << endl;
|
|
663
1226
|
// cout << "Norm: " << L2Norm(p-center) << " > " << 3*r << endl;
|
|
664
1227
|
// cout << "charges.Size() = " << charges.Size() << ", dipoles.Size() = " << dipoles.Size() << endl;
|
|
665
|
-
if (charges.Size() || dipoles.Size() || !childs[0])
|
|
1228
|
+
if (charges.Size() || dipoles.Size() || chargedipoles.Size() || !childs[0])
|
|
666
1229
|
return EvaluateDeriv(p, d);
|
|
667
1230
|
|
|
668
1231
|
if (L2Norm(p-center) > 3*r)
|
|
@@ -685,6 +1248,8 @@ namespace ngfem
|
|
|
685
1248
|
ost << "xi = " << x << ", ci = " << c << endl;
|
|
686
1249
|
for (auto [x,d,c] : dipoles)
|
|
687
1250
|
ost << "xi = " << x << ", di = " << d << ", ci = " << c << endl;
|
|
1251
|
+
for (auto [x,c,d,c2] : chargedipoles)
|
|
1252
|
+
ost << "xi = " << x << ", c = " << c << ", di = " << d << ", ci = " << c2 << endl;
|
|
688
1253
|
|
|
689
1254
|
for (int i = 0; i < 8; i++)
|
|
690
1255
|
if (childs[i]) childs[i] -> Print (ost, i);
|
|
@@ -707,14 +1272,23 @@ namespace ngfem
|
|
|
707
1272
|
num += ch->NumCoefficients();
|
|
708
1273
|
return num;
|
|
709
1274
|
}
|
|
1275
|
+
|
|
1276
|
+
void TraverseTree (const std::function<void(Node&)> & func)
|
|
1277
|
+
{
|
|
1278
|
+
func(*this);
|
|
1279
|
+
for (auto & child : childs)
|
|
1280
|
+
if (child)
|
|
1281
|
+
child->TraverseTree(func);
|
|
1282
|
+
}
|
|
710
1283
|
};
|
|
711
1284
|
|
|
712
|
-
|
|
1285
|
+
FMM_Parameters fmm_params;
|
|
1286
|
+
Node root;
|
|
713
1287
|
bool havemp = false;
|
|
714
1288
|
|
|
715
1289
|
public:
|
|
716
|
-
|
|
717
|
-
: root(center, r, 0,
|
|
1290
|
+
SingularMLExpansion (Vec<3> center, double r, double kappa, FMM_Parameters _params = FMM_Parameters())
|
|
1291
|
+
: fmm_params(_params), root(center, r, 0, kappa, fmm_params)
|
|
718
1292
|
{
|
|
719
1293
|
nodes_on_level = 0;
|
|
720
1294
|
nodes_on_level[0] = 1;
|
|
@@ -722,16 +1296,21 @@ namespace ngfem
|
|
|
722
1296
|
|
|
723
1297
|
double Kappa() const { return root.mp.Kappa(); }
|
|
724
1298
|
|
|
725
|
-
void AddCharge(Vec<3> x,
|
|
1299
|
+
void AddCharge(Vec<3> x, entry_type c)
|
|
726
1300
|
{
|
|
727
1301
|
root.AddCharge(x, c);
|
|
728
1302
|
}
|
|
729
1303
|
|
|
730
|
-
void AddDipole(Vec<3> x, Vec<3> d,
|
|
1304
|
+
void AddDipole(Vec<3> x, Vec<3> d, entry_type c)
|
|
731
1305
|
{
|
|
732
1306
|
root.AddDipole(x, d, c);
|
|
733
1307
|
}
|
|
734
1308
|
|
|
1309
|
+
void AddChargeDipole(Vec<3> x, entry_type c, Vec<3> dir, entry_type c2)
|
|
1310
|
+
{
|
|
1311
|
+
root.AddChargeDipole(x, c, dir, c2);
|
|
1312
|
+
}
|
|
1313
|
+
|
|
735
1314
|
void AddCurrent (Vec<3> sp, Vec<3> ep, Complex j, int num)
|
|
736
1315
|
{
|
|
737
1316
|
if constexpr (!std::is_same<entry_type, Vec<3,Complex>>())
|
|
@@ -756,7 +1335,7 @@ namespace ngfem
|
|
|
756
1335
|
}
|
|
757
1336
|
*/
|
|
758
1337
|
}
|
|
759
|
-
|
|
1338
|
+
|
|
760
1339
|
void Print (ostream & ost) const
|
|
761
1340
|
{
|
|
762
1341
|
root.Print(ost);
|
|
@@ -775,6 +1354,10 @@ namespace ngfem
|
|
|
775
1354
|
void CalcMP()
|
|
776
1355
|
{
|
|
777
1356
|
static Timer t("mptool compute singular MLMP"); RegionTimer rg(t);
|
|
1357
|
+
static Timer ts2mp("mptool compute singular MLMP - source2mp");
|
|
1358
|
+
static Timer tS2S("mptool compute singular MLMP - S->S");
|
|
1359
|
+
static Timer trec("mptool comput singular recording");
|
|
1360
|
+
static Timer tsort("mptool comput singular sort");
|
|
778
1361
|
|
|
779
1362
|
/*
|
|
780
1363
|
int maxlevel = 0;
|
|
@@ -784,9 +1367,96 @@ namespace ngfem
|
|
|
784
1367
|
for (int i = 0; i <= maxlevel; i++)
|
|
785
1368
|
cout << "sing " << i << ": " << nodes_on_level[i] << endl;
|
|
786
1369
|
*/
|
|
1370
|
+
|
|
787
1371
|
root.CalcTotalSources();
|
|
788
|
-
|
|
1372
|
+
|
|
1373
|
+
if constexpr (false)
|
|
1374
|
+
// direct evaluation of S->S
|
|
1375
|
+
root.CalcMP(nullptr, nullptr);
|
|
1376
|
+
else
|
|
1377
|
+
{
|
|
1378
|
+
|
|
1379
|
+
Array<RecordingSS> recording;
|
|
1380
|
+
Array<Node*> nodes_to_process;
|
|
1381
|
+
|
|
1382
|
+
{
|
|
1383
|
+
RegionTimer reg(trec);
|
|
1384
|
+
root.CalcMP(&recording, &nodes_to_process);
|
|
1385
|
+
}
|
|
1386
|
+
|
|
1387
|
+
{
|
|
1388
|
+
RegionTimer rs2mp(ts2mp);
|
|
1389
|
+
ParallelFor(nodes_to_process.Size(), [&](int i)
|
|
1390
|
+
{
|
|
1391
|
+
auto node = nodes_to_process[i];
|
|
1392
|
+
for (auto [x,c]: node->charges)
|
|
1393
|
+
node->mp.AddCharge(x-node->center, c);
|
|
1394
|
+
for (auto [x,d,c]: node->dipoles)
|
|
1395
|
+
node->mp.AddDipole(x-node->center, d, c);
|
|
1396
|
+
for (auto [x,c,d,c2]: node->chargedipoles)
|
|
1397
|
+
node->mp.AddChargeDipole(x-node->center, c, d, c2);
|
|
1398
|
+
for (auto [sp,ep,j,num]: node->currents)
|
|
1399
|
+
node->mp.AddCurrent(sp-node->center, ep-node->center, j, num);
|
|
1400
|
+
}, TasksPerThread(4));
|
|
1401
|
+
}
|
|
1402
|
+
|
|
1403
|
+
{
|
|
1404
|
+
RegionTimer reg(tsort);
|
|
1405
|
+
QuickSort (recording, [] (auto & a, auto & b)
|
|
1406
|
+
{
|
|
1407
|
+
if (a.len < (1-1e-8) * b.len) return true;
|
|
1408
|
+
if (a.len > (1+1e-8) * b.len) return false;
|
|
1409
|
+
return a.theta < b.theta;
|
|
1410
|
+
});
|
|
1411
|
+
}
|
|
789
1412
|
|
|
1413
|
+
double current_len = -1e100;
|
|
1414
|
+
double current_theta = -1e100;
|
|
1415
|
+
Array<RecordingSS*> current_batch;
|
|
1416
|
+
Array<Array<RecordingSS*>> batch_group;
|
|
1417
|
+
Array<double> group_lengths;
|
|
1418
|
+
Array<double> group_thetas;
|
|
1419
|
+
for (auto & record : recording)
|
|
1420
|
+
{
|
|
1421
|
+
bool len_changed = fabs(record.len - current_len) > 1e-8;
|
|
1422
|
+
bool theta_changed = fabs(record.theta - current_theta) > 1e-8;
|
|
1423
|
+
if ((len_changed || theta_changed) && current_batch.Size() > 0) {
|
|
1424
|
+
batch_group.Append(current_batch);
|
|
1425
|
+
group_lengths.Append(current_len);
|
|
1426
|
+
group_thetas.Append(current_theta);
|
|
1427
|
+
current_batch.SetSize(0);
|
|
1428
|
+
}
|
|
1429
|
+
|
|
1430
|
+
current_len = record.len;
|
|
1431
|
+
current_theta = record.theta;
|
|
1432
|
+
current_batch.Append(&record);
|
|
1433
|
+
}
|
|
1434
|
+
|
|
1435
|
+
if (current_batch.Size() > 0) {
|
|
1436
|
+
batch_group.Append(current_batch);
|
|
1437
|
+
group_lengths.Append(current_len);
|
|
1438
|
+
group_thetas.Append(current_theta);
|
|
1439
|
+
}
|
|
1440
|
+
|
|
1441
|
+
{
|
|
1442
|
+
RegionTimer rS2S(tS2S);
|
|
1443
|
+
// ParallelFor(batch_group.Size(), [&](int i) {
|
|
1444
|
+
for (int i = 0; i < batch_group.Size(); i++){
|
|
1445
|
+
// *testout << "Processing batch " << i << " of size " << batch_group[i].Size() << ", with len = " << group_lengths[i] << ", theta = " << group_thetas[i] << endl;
|
|
1446
|
+
int chunk_size = 24;
|
|
1447
|
+
if (batch_group[i].Size() < chunk_size)
|
|
1448
|
+
ProcessBatchSS(batch_group[i], group_lengths[i], group_thetas[i]);
|
|
1449
|
+
else
|
|
1450
|
+
ParallelForRange(IntRange(batch_group[i].Size()), [&](IntRange range) {
|
|
1451
|
+
auto sub_batch = batch_group[i].Range(range.First(), range.Next());
|
|
1452
|
+
ProcessBatchSS(sub_batch, group_lengths[i], group_thetas[i]);
|
|
1453
|
+
}, TasksPerThread(4));
|
|
1454
|
+
}
|
|
1455
|
+
}
|
|
1456
|
+
}
|
|
1457
|
+
|
|
1458
|
+
// cout << "have singular:" << endl;
|
|
1459
|
+
// PrintStatistics (cout);
|
|
790
1460
|
havemp = true;
|
|
791
1461
|
}
|
|
792
1462
|
|
|
@@ -798,23 +1468,198 @@ namespace ngfem
|
|
|
798
1468
|
return root.Evaluate(p);
|
|
799
1469
|
}
|
|
800
1470
|
|
|
1471
|
+
|
|
1472
|
+
void PrintStatistics (ostream & ost)
|
|
1473
|
+
{
|
|
1474
|
+
int levels = 0;
|
|
1475
|
+
int cnt = 0;
|
|
1476
|
+
root.TraverseTree( [&](Node & node) {
|
|
1477
|
+
levels = max(levels, node.level);
|
|
1478
|
+
cnt++;
|
|
1479
|
+
});
|
|
1480
|
+
ost << "levels: " << levels << endl;
|
|
1481
|
+
ost << "nodes: " << cnt << endl;
|
|
1482
|
+
|
|
1483
|
+
Array<int> num_on_level(levels+1);
|
|
1484
|
+
Array<int> order_on_level(levels+1);
|
|
1485
|
+
Array<size_t> coefs_on_level(levels+1);
|
|
1486
|
+
num_on_level = 0;
|
|
1487
|
+
order_on_level = 0;
|
|
1488
|
+
root.TraverseTree( [&](Node & node) {
|
|
1489
|
+
num_on_level[node.level]++;
|
|
1490
|
+
order_on_level[node.level] = max(order_on_level[node.level],node.mp.Order());
|
|
1491
|
+
coefs_on_level[node.level] += node.mp.SH().Coefs().Size();
|
|
1492
|
+
});
|
|
1493
|
+
|
|
1494
|
+
cout << "num on level" << endl;
|
|
1495
|
+
for (int i = 0; i < num_on_level.Size(); i++)
|
|
1496
|
+
cout << i << ": " << num_on_level[i] << ", order = " << order_on_level[i] << ", coefs " << coefs_on_level[i] << endl;
|
|
1497
|
+
|
|
1498
|
+
size_t totcoefs = 0;
|
|
1499
|
+
for (auto n : coefs_on_level)
|
|
1500
|
+
totcoefs += n;
|
|
1501
|
+
cout << "total mem in coefs: " << sizeof(entry_type)*totcoefs / sqr(1024) << " MB" << endl;
|
|
1502
|
+
}
|
|
1503
|
+
|
|
1504
|
+
|
|
1505
|
+
|
|
801
1506
|
template <typename entry_type2>
|
|
802
|
-
friend class
|
|
1507
|
+
friend class RegularMLExpansion;
|
|
803
1508
|
};
|
|
804
1509
|
|
|
805
1510
|
|
|
806
1511
|
template <typename entry_type>
|
|
807
|
-
inline ostream & operator<< (ostream & ost, const
|
|
1512
|
+
inline ostream & operator<< (ostream & ost, const SingularMLExpansion<entry_type> & mlmp)
|
|
808
1513
|
{
|
|
809
1514
|
mlmp.Print(ost);
|
|
810
1515
|
return ost;
|
|
811
1516
|
}
|
|
812
1517
|
|
|
813
1518
|
|
|
1519
|
+
// *********************************** Regular multilevel Expansion
|
|
1520
|
+
|
|
1521
|
+
|
|
814
1522
|
template <typename elem_type=Complex>
|
|
815
|
-
class NGS_DLL_HEADER
|
|
1523
|
+
class NGS_DLL_HEADER RegularMLExpansion
|
|
816
1524
|
{
|
|
817
1525
|
static Array<size_t> nodes_on_level;
|
|
1526
|
+
|
|
1527
|
+
|
|
1528
|
+
struct RecordingRS
|
|
1529
|
+
{
|
|
1530
|
+
const SphericalExpansion<Singular,elem_type> * mpS;
|
|
1531
|
+
SphericalExpansion<Regular,elem_type> * mpR;
|
|
1532
|
+
Vec<3> dist;
|
|
1533
|
+
double len, theta, phi;
|
|
1534
|
+
public:
|
|
1535
|
+
RecordingRS() = default;
|
|
1536
|
+
RecordingRS (const SphericalExpansion<Singular,elem_type> * ampS,
|
|
1537
|
+
SphericalExpansion<Regular,elem_type> * ampR,
|
|
1538
|
+
Vec<3> adist)
|
|
1539
|
+
: mpS(ampS), mpR(ampR), dist(adist)
|
|
1540
|
+
{
|
|
1541
|
+
std::tie(len, theta, phi) = SphericalCoordinates(dist);
|
|
1542
|
+
}
|
|
1543
|
+
};
|
|
1544
|
+
|
|
1545
|
+
static void ProcessBatchRS(FlatArray<RecordingRS*> batch, double len, double theta) {
|
|
1546
|
+
// static Timer t("ProcessBatchRS"); RegionTimer reg(t, batch.Size());
|
|
1547
|
+
constexpr int vec_length = VecLength<elem_type>;
|
|
1548
|
+
int batch_size = batch.Size();
|
|
1549
|
+
int N = batch_size * vec_length;
|
|
1550
|
+
// *testout << "Processing batch of size " << batch.Size() << ", with N = " << N << ", vec_length = " << vec_length << ", Type: " << typeid(elem_type).name() << ", len = " << len << ", theta = " << theta << endl;
|
|
1551
|
+
|
|
1552
|
+
if (N <= 1 || batch_size <= 1) {
|
|
1553
|
+
for (auto* rec : batch) {
|
|
1554
|
+
rec->mpS->TransformAdd(*rec->mpR, rec->dist);
|
|
1555
|
+
}
|
|
1556
|
+
}
|
|
1557
|
+
else if (N <= 3) {
|
|
1558
|
+
ProcessVectorizedBatchRS<3, vec_length>(batch, len, theta);
|
|
1559
|
+
}
|
|
1560
|
+
else if (N <= 4) {
|
|
1561
|
+
ProcessVectorizedBatchRS<4, vec_length>(batch, len, theta);
|
|
1562
|
+
}
|
|
1563
|
+
else if (N <= 6) {
|
|
1564
|
+
ProcessVectorizedBatchRS<6, vec_length>(batch, len, theta);
|
|
1565
|
+
}
|
|
1566
|
+
else if (N <= 12) {
|
|
1567
|
+
ProcessVectorizedBatchRS<12, vec_length>(batch, len, theta);
|
|
1568
|
+
}
|
|
1569
|
+
else if (N <= 24) {
|
|
1570
|
+
ProcessVectorizedBatchRS<24, vec_length>(batch, len, theta);
|
|
1571
|
+
}
|
|
1572
|
+
else if (N <= 48) {
|
|
1573
|
+
ProcessVectorizedBatchRS<48, vec_length>(batch, len, theta);
|
|
1574
|
+
}
|
|
1575
|
+
else if (N <= 96) {
|
|
1576
|
+
ProcessVectorizedBatchRS<96, vec_length>(batch, len, theta);
|
|
1577
|
+
}
|
|
1578
|
+
else if (N <= 192) {
|
|
1579
|
+
ProcessVectorizedBatchRS<192, vec_length>(batch, len, theta);
|
|
1580
|
+
}
|
|
1581
|
+
else {
|
|
1582
|
+
// Split large batches
|
|
1583
|
+
/*
|
|
1584
|
+
ProcessBatch(batch.Range(0, 192 / vec_length), len, theta);
|
|
1585
|
+
ProcessBatch(batch.Range(192 / vec_length, batch_size), len, theta);
|
|
1586
|
+
*/
|
|
1587
|
+
|
|
1588
|
+
/*
|
|
1589
|
+
ParallelFor (2, [&] (int i)
|
|
1590
|
+
{
|
|
1591
|
+
if (i == 0)
|
|
1592
|
+
ProcessBatchRS(batch.Range(0, 192 / vec_length), len, theta);
|
|
1593
|
+
else
|
|
1594
|
+
ProcessBatchRS(batch.Range(192 / vec_length, batch_size), len, theta);
|
|
1595
|
+
}, 2);
|
|
1596
|
+
*/
|
|
1597
|
+
|
|
1598
|
+
|
|
1599
|
+
size_t chunksize = 192/vec_length;
|
|
1600
|
+
size_t num = (batch.Size()+chunksize-1) / chunksize;
|
|
1601
|
+
ParallelFor (num, [&](int i)
|
|
1602
|
+
{
|
|
1603
|
+
ProcessBatchRS(batch.Range(i*chunksize, min((i+1)*chunksize, batch.Size())), len, theta);
|
|
1604
|
+
}, num);
|
|
1605
|
+
|
|
1606
|
+
}
|
|
1607
|
+
}
|
|
1608
|
+
|
|
1609
|
+
|
|
1610
|
+
template<int N, int vec_length>
|
|
1611
|
+
static void ProcessVectorizedBatchRS(FlatArray<RecordingRS*> batch, double len, double theta) {
|
|
1612
|
+
|
|
1613
|
+
// static Timer t("ProcessVectorizedBatch, N = "+ToString(N) + ", vec_len = " + ToString(vec_length));
|
|
1614
|
+
// RegionTimer reg(t, batch[0]->mpS->SH().Order());
|
|
1615
|
+
// static Timer ttobatch("mptools - copy to batch 2");
|
|
1616
|
+
// static Timer tfrombatch("mptools - copy from batch 2");
|
|
1617
|
+
|
|
1618
|
+
// *testout << "Processing vectorized batch of size " << batch.Size() << ", with N = " << N << ", vec_length = " << vec_length << ", len = " << len << ", theta = " << theta << endl;
|
|
1619
|
+
SphericalExpansion<Singular, Vec<N,Complex>> vec_source(batch[0]->mpS->Order(), batch[0]->mpS->Kappa(), batch[0]->mpS->RTyp());
|
|
1620
|
+
// SphericalExpansion<Singular, elem_type> tmp_source{*batch[0]->mpS};
|
|
1621
|
+
SphericalExpansion<Regular, elem_type> tmp_target{*batch[0]->mpR};
|
|
1622
|
+
SphericalExpansion<Regular, Vec<N,Complex>> vec_target(batch[0]->mpR->Order(), batch[0]->mpR->Kappa(), batch[0]->mpR->RTyp());
|
|
1623
|
+
|
|
1624
|
+
// Copy multipoles into vectorized multipole
|
|
1625
|
+
// ttobatch.Start();
|
|
1626
|
+
for (int i = 0; i < batch.Size(); i++)
|
|
1627
|
+
{
|
|
1628
|
+
auto source_i = VecVector2Matrix (batch[i]->mpS->SH().Coefs());
|
|
1629
|
+
auto source_mati = VecVector2Matrix (vec_source.SH().Coefs()).Cols(i*vec_length, (i+1)*vec_length);
|
|
1630
|
+
batch[i]->mpS->SH().RotateZ(batch[i]->phi,
|
|
1631
|
+
[source_i, source_mati] (size_t ii, Complex factor)
|
|
1632
|
+
{
|
|
1633
|
+
source_mati.Row(ii) = factor * source_i.Row(ii);
|
|
1634
|
+
});
|
|
1635
|
+
}
|
|
1636
|
+
|
|
1637
|
+
// ttobatch.Stop();
|
|
1638
|
+
|
|
1639
|
+
vec_source.SH().RotateY(theta);
|
|
1640
|
+
vec_source.ShiftZ(-len, vec_target);
|
|
1641
|
+
vec_target.SH().RotateY(-theta);
|
|
1642
|
+
|
|
1643
|
+
// Copy vectorized multipole into individual multipoles
|
|
1644
|
+
// tfrombatch.Start();
|
|
1645
|
+
for (int i = 0; i < batch.Size(); i++) {
|
|
1646
|
+
// auto source_i = VecVector2Matrix (tmp_target.SH().Coefs());
|
|
1647
|
+
auto source_mati = VecVector2Matrix (vec_target.SH().Coefs()).Cols(i*vec_length, (i+1)*vec_length);
|
|
1648
|
+
auto targeti = VecVector2Matrix(batch[i]->mpR->SH().Coefs());
|
|
1649
|
+
|
|
1650
|
+
tmp_target.SH().RotateZ(-batch[i]->phi,
|
|
1651
|
+
[source_mati, targeti] (size_t ii, Complex factor)
|
|
1652
|
+
{
|
|
1653
|
+
// source_i.Row(ii) = factor * source_mati.Row(ii);
|
|
1654
|
+
AtomicAdd (VectorView(targeti.Row(ii)), factor * source_mati.Row(ii));
|
|
1655
|
+
});
|
|
1656
|
+
// for (int j = 0; j < tmp_target.SH().Coefs().Size(); j++)
|
|
1657
|
+
// AtomicAdd(batch[i]->mpR->SH().Coefs()[j], tmp_target.SH().Coefs()[j]);
|
|
1658
|
+
}
|
|
1659
|
+
// tfrombatch.Stop();
|
|
1660
|
+
|
|
1661
|
+
}
|
|
1662
|
+
|
|
818
1663
|
|
|
819
1664
|
struct Node
|
|
820
1665
|
{
|
|
@@ -822,22 +1667,35 @@ namespace ngfem
|
|
|
822
1667
|
double r;
|
|
823
1668
|
int level;
|
|
824
1669
|
std::array<unique_ptr<Node>,8> childs;
|
|
825
|
-
|
|
1670
|
+
SphericalExpansion<Regular,elem_type> mp;
|
|
826
1671
|
Array<Vec<3>> targets;
|
|
1672
|
+
Array<tuple<Vec<3>,double>> vol_targets;
|
|
827
1673
|
int total_targets;
|
|
1674
|
+
std::mutex node_mutex;
|
|
1675
|
+
atomic<bool> have_childs{false};
|
|
828
1676
|
|
|
829
|
-
Array<const typename
|
|
1677
|
+
Array<const typename SingularMLExpansion<elem_type>::Node*> singnodes;
|
|
1678
|
+
const FMM_Parameters & params;
|
|
830
1679
|
|
|
831
|
-
|
|
832
|
-
|
|
1680
|
+
|
|
1681
|
+
Node (Vec<3> acenter, double ar, int alevel, double kappa, const FMM_Parameters & _params)
|
|
1682
|
+
: center(acenter), r(ar), level(alevel),
|
|
1683
|
+
// mp(MPOrder(ar*kappa), kappa, ar) // 1.0/min(1.0, 0.25*r*kappa))
|
|
1684
|
+
mp(-1, kappa, ar), params(_params)
|
|
833
1685
|
// : center(acenter), r(ar), level(alevel), mp(MPOrder(ar*kappa), kappa, 1.0)
|
|
834
1686
|
{
|
|
835
1687
|
if (level < nodes_on_level.Size())
|
|
836
1688
|
nodes_on_level[level]++;
|
|
837
1689
|
}
|
|
838
1690
|
|
|
839
|
-
|
|
840
|
-
|
|
1691
|
+
void Allocate()
|
|
1692
|
+
{
|
|
1693
|
+
// mp = SphericalExpansion<Regular,elem_type>(MPOrder(r*mp.Kappa()), mp.Kappa(), r);
|
|
1694
|
+
mp = SphericalExpansion<Regular,elem_type>(params.minorder+2*r*mp.Kappa(), mp.Kappa(), r);
|
|
1695
|
+
}
|
|
1696
|
+
|
|
1697
|
+
|
|
1698
|
+
void CreateChilds(bool allocate = false)
|
|
841
1699
|
{
|
|
842
1700
|
if (childs[0]) throw Exception("have already childs");
|
|
843
1701
|
// create children nodes:
|
|
@@ -847,15 +1705,19 @@ namespace ngfem
|
|
|
847
1705
|
cc(0) += (i&1) ? r/2 : -r/2;
|
|
848
1706
|
cc(1) += (i&2) ? r/2 : -r/2;
|
|
849
1707
|
cc(2) += (i&4) ? r/2 : -r/2;
|
|
850
|
-
childs[i] = make_unique<Node> (cc, r/2, level+1,
|
|
1708
|
+
childs[i] = make_unique<Node> (cc, r/2, level+1, mp.Kappa(), params);
|
|
1709
|
+
if (allocate)
|
|
1710
|
+
childs[i] -> Allocate();
|
|
851
1711
|
}
|
|
1712
|
+
have_childs = true;
|
|
852
1713
|
}
|
|
853
|
-
|
|
854
|
-
void AddSingularNode (const typename
|
|
1714
|
+
|
|
1715
|
+
void AddSingularNode (const typename SingularMLExpansion<elem_type>::Node & singnode, bool allow_refine,
|
|
1716
|
+
Array<RecordingRS> * recording)
|
|
855
1717
|
{
|
|
856
1718
|
if (mp.SH().Order() < 0) return;
|
|
857
1719
|
if (singnode.mp.SH().Order() < 0) return;
|
|
858
|
-
if (L2Norm(singnode.mp.SH().Coefs()) == 0) return;
|
|
1720
|
+
// if (L2Norm(singnode.mp.SH().Coefs()) == 0) return;
|
|
859
1721
|
if (level > 20)
|
|
860
1722
|
{
|
|
861
1723
|
singnodes.Append(&singnode);
|
|
@@ -874,12 +1736,15 @@ namespace ngfem
|
|
|
874
1736
|
singnode.childs[0]->mp.Order() < singnode.mp.Order())
|
|
875
1737
|
{
|
|
876
1738
|
for (auto & child : singnode.childs)
|
|
877
|
-
AddSingularNode (*child, allow_refine);
|
|
1739
|
+
AddSingularNode (*child, allow_refine, recording);
|
|
878
1740
|
return;
|
|
879
1741
|
}
|
|
880
1742
|
|
|
881
1743
|
// static Timer t("mptool transform Helmholtz-criterion"); RegionTimer r(t);
|
|
882
|
-
|
|
1744
|
+
if (recording)
|
|
1745
|
+
*recording += RecordingRS(&singnode.mp, &mp, dist);
|
|
1746
|
+
else
|
|
1747
|
+
singnode.mp.TransformAdd(mp, dist);
|
|
883
1748
|
return;
|
|
884
1749
|
}
|
|
885
1750
|
|
|
@@ -895,70 +1760,70 @@ namespace ngfem
|
|
|
895
1760
|
if (allow_refine)
|
|
896
1761
|
{
|
|
897
1762
|
if (!childs[0])
|
|
898
|
-
CreateChilds();
|
|
1763
|
+
CreateChilds(true);
|
|
899
1764
|
|
|
900
1765
|
for (auto & ch : childs)
|
|
901
|
-
ch -> AddSingularNode (singnode, allow_refine);
|
|
1766
|
+
ch -> AddSingularNode (singnode, allow_refine, recording);
|
|
902
1767
|
}
|
|
903
1768
|
else
|
|
904
1769
|
{
|
|
905
|
-
if (total_targets < 1000)
|
|
1770
|
+
if (total_targets < 1000 || recording)
|
|
906
1771
|
{
|
|
907
1772
|
for (auto & ch : childs)
|
|
908
1773
|
if (ch)
|
|
909
|
-
ch -> AddSingularNode (singnode, allow_refine);
|
|
1774
|
+
ch -> AddSingularNode (singnode, allow_refine, recording);
|
|
910
1775
|
}
|
|
911
1776
|
else
|
|
912
1777
|
ParallelFor (8, [&] (int nr)
|
|
913
1778
|
{
|
|
914
1779
|
if (childs[nr])
|
|
915
|
-
childs[nr] -> AddSingularNode (singnode, allow_refine);
|
|
1780
|
+
childs[nr] -> AddSingularNode (singnode, allow_refine, recording);
|
|
916
1781
|
});
|
|
917
1782
|
|
|
918
|
-
if (targets.Size())
|
|
1783
|
+
if (targets.Size()+vol_targets.Size())
|
|
919
1784
|
singnodes.Append(&singnode);
|
|
920
1785
|
}
|
|
921
1786
|
}
|
|
922
1787
|
else
|
|
923
1788
|
{
|
|
924
1789
|
for (auto & childsing : singnode.childs)
|
|
925
|
-
AddSingularNode (*childsing, allow_refine);
|
|
1790
|
+
AddSingularNode (*childsing, allow_refine, recording);
|
|
926
1791
|
}
|
|
927
1792
|
}
|
|
928
1793
|
|
|
929
1794
|
void LocalizeExpansion(bool allow_refine)
|
|
930
1795
|
{
|
|
931
1796
|
if (allow_refine)
|
|
932
|
-
if (mp.Order() >
|
|
933
|
-
CreateChilds();
|
|
1797
|
+
if (mp.Order() > 30 && !childs[0])
|
|
1798
|
+
CreateChilds(allow_refine);
|
|
934
1799
|
|
|
935
1800
|
if (childs[0])
|
|
936
1801
|
{
|
|
937
|
-
|
|
1802
|
+
if (total_targets < 1000)
|
|
938
1803
|
{
|
|
939
|
-
|
|
940
|
-
|
|
941
|
-
|
|
1804
|
+
for (int nr = 0; nr < 8; nr++)
|
|
1805
|
+
{
|
|
1806
|
+
if (L2Norm(mp.SH().Coefs()) > 0)
|
|
1807
|
+
mp.TransformAdd (childs[nr]->mp, childs[nr]->center-center);
|
|
1808
|
+
childs[nr]->LocalizeExpansion(allow_refine);
|
|
1809
|
+
}
|
|
942
1810
|
}
|
|
943
|
-
|
|
1811
|
+
else
|
|
1812
|
+
ParallelFor(8, [&] (int nr)
|
|
1813
|
+
{
|
|
1814
|
+
if (L2Norm(mp.SH().Coefs()) > 0)
|
|
1815
|
+
mp.TransformAdd (childs[nr]->mp, childs[nr]->center-center);
|
|
1816
|
+
childs[nr]->LocalizeExpansion(allow_refine);
|
|
1817
|
+
});
|
|
1818
|
+
mp = SphericalExpansion<Regular,elem_type>(-1, mp.Kappa(), 1.);
|
|
944
1819
|
//mp.SH().Coefs()=0.0;
|
|
945
1820
|
}
|
|
946
1821
|
}
|
|
947
1822
|
|
|
948
1823
|
elem_type Evaluate (Vec<3> p) const
|
|
949
1824
|
{
|
|
950
|
-
// *testout << "eval p = " << p << ", level = " << level << ", center = " << center << ", r = " << r << endl;
|
|
951
1825
|
elem_type sum{0.0};
|
|
952
|
-
|
|
953
|
-
if (childs[0])
|
|
954
|
-
{
|
|
955
|
-
int childnum = 0;
|
|
956
|
-
if (p(0) > center(0)) childnum += 1;
|
|
957
|
-
if (p(1) > center(1)) childnum += 2;
|
|
958
|
-
if (p(2) > center(2)) childnum += 4;
|
|
959
|
-
sum = childs[childnum]->Evaluate(p);
|
|
960
|
-
}
|
|
961
|
-
*/
|
|
1826
|
+
|
|
962
1827
|
int childnum = 0;
|
|
963
1828
|
if (p(0) > center(0)) childnum += 1;
|
|
964
1829
|
if (p(1) > center(1)) childnum += 2;
|
|
@@ -966,13 +1831,16 @@ namespace ngfem
|
|
|
966
1831
|
if (childs[childnum])
|
|
967
1832
|
sum = childs[childnum]->Evaluate(p);
|
|
968
1833
|
else
|
|
969
|
-
|
|
970
|
-
|
|
971
|
-
|
|
972
|
-
|
|
973
|
-
for (auto sn : singnodes)
|
|
974
|
-
sum += sn->EvaluateMP(p);
|
|
1834
|
+
{
|
|
1835
|
+
// static Timer t("mptool regmp, evaluate reg"); RegionTimer r(t);
|
|
1836
|
+
sum = mp.Eval(p-center);
|
|
1837
|
+
}
|
|
975
1838
|
|
|
1839
|
+
{
|
|
1840
|
+
// static Timer t("mptool regmp, evaluate, singnode"); RegionTimer r(t);
|
|
1841
|
+
for (auto sn : singnodes)
|
|
1842
|
+
sum += sn->EvaluateMP(p);
|
|
1843
|
+
}
|
|
976
1844
|
return sum;
|
|
977
1845
|
}
|
|
978
1846
|
|
|
@@ -998,6 +1866,14 @@ namespace ngfem
|
|
|
998
1866
|
return sum;
|
|
999
1867
|
}
|
|
1000
1868
|
|
|
1869
|
+
void TraverseTree (const std::function<void(Node&)> & func)
|
|
1870
|
+
{
|
|
1871
|
+
func(*this);
|
|
1872
|
+
for (auto & child : childs)
|
|
1873
|
+
if (child)
|
|
1874
|
+
child->TraverseTree(func);
|
|
1875
|
+
}
|
|
1876
|
+
|
|
1001
1877
|
double Norm() const
|
|
1002
1878
|
{
|
|
1003
1879
|
double norm = L2Norm(mp.SH().Coefs());
|
|
@@ -1015,36 +1891,100 @@ namespace ngfem
|
|
|
1015
1891
|
num += ch->NumCoefficients();
|
|
1016
1892
|
return num;
|
|
1017
1893
|
}
|
|
1018
|
-
|
|
1894
|
+
|
|
1895
|
+
int GetChildNum (Vec<3> x) const
|
|
1896
|
+
{
|
|
1897
|
+
int childnum = 0;
|
|
1898
|
+
if (x(0) > center(0)) childnum += 1;
|
|
1899
|
+
if (x(1) > center(1)) childnum += 2;
|
|
1900
|
+
if (x(2) > center(2)) childnum += 4;
|
|
1901
|
+
return childnum;
|
|
1902
|
+
}
|
|
1903
|
+
|
|
1019
1904
|
void AddTarget (Vec<3> x)
|
|
1020
1905
|
{
|
|
1021
|
-
if (childs[0])
|
|
1906
|
+
// if (childs[0])
|
|
1907
|
+
if (have_childs) // quick check without locking
|
|
1022
1908
|
{
|
|
1023
1909
|
// directly send to childs:
|
|
1024
|
-
int childnum =
|
|
1025
|
-
if (x(0) > center(0)) childnum += 1;
|
|
1026
|
-
if (x(1) > center(1)) childnum += 2;
|
|
1027
|
-
if (x(2) > center(2)) childnum += 4;
|
|
1910
|
+
int childnum = GetChildNum(x);
|
|
1028
1911
|
childs[childnum] -> AddTarget( x );
|
|
1029
1912
|
return;
|
|
1030
1913
|
}
|
|
1031
1914
|
|
|
1915
|
+
lock_guard<mutex> guard(node_mutex);
|
|
1916
|
+
|
|
1917
|
+
if (have_childs) // test again after locking
|
|
1918
|
+
{
|
|
1919
|
+
// directly send to childs:
|
|
1920
|
+
int childnum = GetChildNum(x);
|
|
1921
|
+
childs[childnum] -> AddTarget(x);
|
|
1922
|
+
return;
|
|
1923
|
+
}
|
|
1924
|
+
|
|
1032
1925
|
targets.Append( x );
|
|
1033
1926
|
|
|
1034
|
-
if (r*mp.Kappa() < 1e-8) return;
|
|
1035
|
-
if (
|
|
1927
|
+
// if (r*mp.Kappa() < 1e-8) return;
|
|
1928
|
+
if (level > 20) return;
|
|
1929
|
+
if (targets.Size() < params.maxdirect && r*mp.Kappa() < 5)
|
|
1930
|
+
return;
|
|
1931
|
+
|
|
1932
|
+
CreateChilds();
|
|
1933
|
+
|
|
1934
|
+
for (auto t : targets)
|
|
1935
|
+
AddTarget (t);
|
|
1936
|
+
for (auto [x,r] : vol_targets)
|
|
1937
|
+
AddVolumeTarget (x,r);
|
|
1938
|
+
|
|
1939
|
+
targets.SetSize0();
|
|
1940
|
+
vol_targets.SetSize0();
|
|
1941
|
+
}
|
|
1942
|
+
|
|
1943
|
+
|
|
1944
|
+
void AddVolumeTarget (Vec<3> x, double tr)
|
|
1945
|
+
{
|
|
1946
|
+
if (MaxNorm(x-center) > r+tr) return;
|
|
1947
|
+
|
|
1948
|
+
if (have_childs)
|
|
1949
|
+
{
|
|
1950
|
+
for (auto & child : childs)
|
|
1951
|
+
child->AddVolumeTarget(x, tr);
|
|
1952
|
+
return;
|
|
1953
|
+
}
|
|
1954
|
+
|
|
1955
|
+
|
|
1956
|
+
lock_guard<mutex> guard(node_mutex);
|
|
1957
|
+
|
|
1958
|
+
if (have_childs)
|
|
1959
|
+
{
|
|
1960
|
+
for (auto & child : childs)
|
|
1961
|
+
child->AddVolumeTarget(x, tr);
|
|
1962
|
+
return;
|
|
1963
|
+
}
|
|
1964
|
+
|
|
1965
|
+
|
|
1966
|
+
vol_targets.Append (tuple(x,tr));
|
|
1967
|
+
|
|
1968
|
+
if (level > 20) return;
|
|
1969
|
+
if (vol_targets.Size() < params.maxdirect && (r*mp.Kappa() < 5))
|
|
1036
1970
|
return;
|
|
1037
1971
|
|
|
1038
1972
|
CreateChilds();
|
|
1039
1973
|
|
|
1040
1974
|
for (auto t : targets)
|
|
1041
1975
|
AddTarget (t);
|
|
1976
|
+
for (auto [x,r] : vol_targets)
|
|
1977
|
+
AddVolumeTarget (x,r);
|
|
1978
|
+
|
|
1042
1979
|
targets.SetSize0();
|
|
1980
|
+
vol_targets.SetSize0();
|
|
1043
1981
|
}
|
|
1044
1982
|
|
|
1983
|
+
|
|
1984
|
+
|
|
1045
1985
|
void CalcTotalTargets()
|
|
1046
1986
|
{
|
|
1047
|
-
total_targets = targets.Size();
|
|
1987
|
+
total_targets = targets.Size() + vol_targets.Size();
|
|
1048
1988
|
for (auto & child : childs)
|
|
1049
1989
|
if (child)
|
|
1050
1990
|
{
|
|
@@ -1064,8 +2004,21 @@ namespace ngfem
|
|
|
1064
2004
|
}
|
|
1065
2005
|
|
|
1066
2006
|
if (total_targets == 0)
|
|
1067
|
-
mp =
|
|
2007
|
+
mp = SphericalExpansion<Regular,elem_type>(-1, mp.Kappa(),1.);
|
|
2008
|
+
}
|
|
2009
|
+
|
|
2010
|
+
void AllocateMemory()
|
|
2011
|
+
{
|
|
2012
|
+
for (auto & child : childs)
|
|
2013
|
+
if (child)
|
|
2014
|
+
child->AllocateMemory();
|
|
2015
|
+
|
|
2016
|
+
if (total_targets > 0)
|
|
2017
|
+
Allocate();
|
|
2018
|
+
// mp = SphericalExpansion<Regular,elem_type>(MPOrder(r*mp.Kappa()), mp.Kappa(), r); // -1, mp.Kappa(),1.);
|
|
1068
2019
|
}
|
|
2020
|
+
|
|
2021
|
+
|
|
1069
2022
|
|
|
1070
2023
|
|
|
1071
2024
|
void Print (ostream & ost, size_t childnr = -1) const
|
|
@@ -1082,21 +2035,24 @@ namespace ngfem
|
|
|
1082
2035
|
}
|
|
1083
2036
|
|
|
1084
2037
|
};
|
|
1085
|
-
|
|
2038
|
+
|
|
2039
|
+
FMM_Parameters fmm_params;
|
|
1086
2040
|
Node root;
|
|
1087
|
-
shared_ptr<
|
|
2041
|
+
shared_ptr<SingularMLExpansion<elem_type>> singmp;
|
|
1088
2042
|
|
|
1089
2043
|
public:
|
|
1090
|
-
|
|
1091
|
-
|
|
1092
|
-
|
|
2044
|
+
RegularMLExpansion (shared_ptr<SingularMLExpansion<elem_type>> asingmp, Vec<3> center, double r,
|
|
2045
|
+
const FMM_Parameters & _params)
|
|
2046
|
+
: fmm_params(_params), root(center, r, 0, asingmp->Kappa(), fmm_params), singmp(asingmp)
|
|
2047
|
+
{
|
|
1093
2048
|
if (!singmp->havemp) throw Exception("first call Calc for singular MP");
|
|
1094
|
-
|
|
2049
|
+
root.Allocate();
|
|
2050
|
+
|
|
1095
2051
|
nodes_on_level = 0;
|
|
1096
2052
|
nodes_on_level[0] = 1;
|
|
1097
2053
|
{
|
|
1098
|
-
static Timer t("mptool compute regular MLMP"); RegionTimer rg(t);
|
|
1099
|
-
root.AddSingularNode(singmp->root, true);
|
|
2054
|
+
static Timer t("mptool compute regular MLMP"); RegionTimer rg(t);
|
|
2055
|
+
root.AddSingularNode(singmp->root, true, nullptr);
|
|
1100
2056
|
// cout << "norm after S->R conversion: " << root.Norm() << endl;
|
|
1101
2057
|
}
|
|
1102
2058
|
|
|
@@ -1117,39 +2073,163 @@ namespace ngfem
|
|
|
1117
2073
|
}
|
|
1118
2074
|
}
|
|
1119
2075
|
|
|
1120
|
-
|
|
1121
|
-
|
|
1122
|
-
|
|
1123
|
-
|
|
1124
|
-
|
|
1125
|
-
|
|
1126
|
-
|
|
2076
|
+
RegularMLExpansion (Vec<3> center, double r, double kappa, const FMM_Parameters & _params)
|
|
2077
|
+
: fmm_params(_params), root(center, r, 0, kappa, fmm_params)
|
|
2078
|
+
{
|
|
2079
|
+
nodes_on_level = 0;
|
|
2080
|
+
nodes_on_level[0] = 1;
|
|
2081
|
+
}
|
|
2082
|
+
|
|
1127
2083
|
void AddTarget (Vec<3> t)
|
|
1128
2084
|
{
|
|
1129
2085
|
root.AddTarget (t);
|
|
1130
2086
|
}
|
|
1131
2087
|
|
|
1132
|
-
void
|
|
2088
|
+
void AddVolumeTarget (Vec<3> t, double r)
|
|
2089
|
+
{
|
|
2090
|
+
root.AddVolumeTarget (t, r);
|
|
2091
|
+
}
|
|
2092
|
+
|
|
2093
|
+
void CalcMP(shared_ptr<SingularMLExpansion<elem_type>> asingmp, bool onlytargets = true)
|
|
1133
2094
|
{
|
|
2095
|
+
static Timer t("mptool regular MLMP"); RegionTimer rg(t);
|
|
2096
|
+
static Timer tremove("removeempty");
|
|
2097
|
+
static Timer trec("mptool regular MLMP - recording");
|
|
2098
|
+
static Timer tsort("mptool regular MLMP - sort");
|
|
2099
|
+
|
|
1134
2100
|
singmp = asingmp;
|
|
1135
2101
|
|
|
2102
|
+
|
|
1136
2103
|
root.CalcTotalTargets();
|
|
1137
|
-
|
|
1138
|
-
|
|
1139
|
-
root.AddSingularNode(singmp->root, false);
|
|
2104
|
+
// cout << "before remove empty trees:" << endl;
|
|
2105
|
+
// PrintStatistics(cout);
|
|
1140
2106
|
|
|
2107
|
+
/*
|
|
2108
|
+
tremove.Start();
|
|
2109
|
+
if (onlytargets)
|
|
2110
|
+
root.RemoveEmptyTrees();
|
|
2111
|
+
tremove.Stop();
|
|
2112
|
+
*/
|
|
2113
|
+
|
|
2114
|
+
root.AllocateMemory();
|
|
2115
|
+
|
|
2116
|
+
// cout << "after allocating regular:" << endl;
|
|
2117
|
+
// PrintStatistics(cout);
|
|
2118
|
+
|
|
2119
|
+
// cout << "starting S-R converion" << endl;
|
|
2120
|
+
// PrintStatistics(cout);
|
|
2121
|
+
|
|
2122
|
+
|
|
2123
|
+
if constexpr (false)
|
|
2124
|
+
{
|
|
2125
|
+
root.AddSingularNode(singmp->root, !onlytargets, nullptr);
|
|
2126
|
+
}
|
|
2127
|
+
else
|
|
2128
|
+
{ // use recording
|
|
2129
|
+
Array<RecordingRS> recording;
|
|
2130
|
+
{
|
|
2131
|
+
RegionTimer rrec(trec);
|
|
2132
|
+
root.AddSingularNode(singmp->root, !onlytargets, &recording);
|
|
2133
|
+
}
|
|
2134
|
+
|
|
2135
|
+
// cout << "recorded: " << recording.Size() << endl;
|
|
2136
|
+
{
|
|
2137
|
+
RegionTimer reg(tsort);
|
|
2138
|
+
QuickSort (recording, [] (auto & a, auto & b)
|
|
2139
|
+
{
|
|
2140
|
+
if (a.len < (1-1e-8) * b.len) return true;
|
|
2141
|
+
if (a.len > (1+1e-8) * b.len) return false;
|
|
2142
|
+
return a.theta < b.theta;
|
|
2143
|
+
});
|
|
2144
|
+
}
|
|
2145
|
+
|
|
2146
|
+
double current_len = -1e100;
|
|
2147
|
+
double current_theta = -1e100;
|
|
2148
|
+
Array<RecordingRS*> current_batch;
|
|
2149
|
+
Array<Array<RecordingRS*>> batch_group;
|
|
2150
|
+
Array<double> group_lengths;
|
|
2151
|
+
Array<double> group_thetas;
|
|
2152
|
+
for (auto & record : recording)
|
|
2153
|
+
{
|
|
2154
|
+
bool len_changed = fabs(record.len - current_len) > 1e-8;
|
|
2155
|
+
bool theta_changed = fabs(record.theta - current_theta) > 1e-8;
|
|
2156
|
+
if ((len_changed || theta_changed) && current_batch.Size() > 0) {
|
|
2157
|
+
// ProcessBatch(current_batch, current_len, current_theta);
|
|
2158
|
+
batch_group.Append(current_batch);
|
|
2159
|
+
group_lengths.Append(current_len);
|
|
2160
|
+
group_thetas.Append(current_theta);
|
|
2161
|
+
current_batch.SetSize(0);
|
|
2162
|
+
}
|
|
2163
|
+
|
|
2164
|
+
current_len = record.len;
|
|
2165
|
+
current_theta = record.theta;
|
|
2166
|
+
current_batch.Append(&record);
|
|
2167
|
+
}
|
|
2168
|
+
if (current_batch.Size() > 0) {
|
|
2169
|
+
// ProcessBatch(current_batch, current_len, current_theta);
|
|
2170
|
+
batch_group.Append(current_batch);
|
|
2171
|
+
group_lengths.Append(current_len);
|
|
2172
|
+
group_thetas.Append(current_theta);
|
|
2173
|
+
}
|
|
2174
|
+
|
|
2175
|
+
ParallelFor(batch_group.Size(), [&](int i) {
|
|
2176
|
+
ProcessBatchRS(batch_group[i], group_lengths[i], group_thetas[i]);
|
|
2177
|
+
}, TasksPerThread(4));
|
|
2178
|
+
}
|
|
2179
|
+
|
|
2180
|
+
|
|
1141
2181
|
/*
|
|
1142
2182
|
int maxlevel = 0;
|
|
1143
|
-
for (auto [i,num] : Enumerate(
|
|
2183
|
+
for (auto [i,num] : Enumerate(RegularMLExpansion::nodes_on_level))
|
|
1144
2184
|
if (num > 0) maxlevel = i;
|
|
1145
2185
|
|
|
1146
2186
|
for (int i = 0; i <= maxlevel; i++)
|
|
1147
|
-
cout << "reg " << i << ": " <<
|
|
2187
|
+
cout << "reg " << i << ": " << RegularMLExpansion::nodes_on_level[i] << endl;
|
|
1148
2188
|
*/
|
|
1149
2189
|
|
|
1150
|
-
|
|
2190
|
+
// cout << "starting R-R converion" << endl;
|
|
2191
|
+
// PrintStatistics(cout);
|
|
2192
|
+
|
|
2193
|
+
static Timer tloc("mptool regular localize expansion"); RegionTimer rloc(tloc);
|
|
2194
|
+
root.LocalizeExpansion(!onlytargets);
|
|
2195
|
+
|
|
2196
|
+
|
|
2197
|
+
// cout << "R-R conversion done" << endl;
|
|
2198
|
+
// PrintStatistics(cout);
|
|
1151
2199
|
}
|
|
1152
2200
|
|
|
2201
|
+
void PrintStatistics (ostream & ost)
|
|
2202
|
+
{
|
|
2203
|
+
int levels = 0;
|
|
2204
|
+
int cnt = 0;
|
|
2205
|
+
root.TraverseTree( [&](Node & node) {
|
|
2206
|
+
levels = max(levels, node.level);
|
|
2207
|
+
cnt++;
|
|
2208
|
+
});
|
|
2209
|
+
ost << "levels: " << levels << endl;
|
|
2210
|
+
ost << "nodes: " << cnt << endl;
|
|
2211
|
+
|
|
2212
|
+
Array<int> num_on_level(levels+1);
|
|
2213
|
+
Array<int> order_on_level(levels+1);
|
|
2214
|
+
Array<size_t> coefs_on_level(levels+1);
|
|
2215
|
+
num_on_level = 0;
|
|
2216
|
+
order_on_level = 0;
|
|
2217
|
+
root.TraverseTree( [&](Node & node) {
|
|
2218
|
+
num_on_level[node.level]++;
|
|
2219
|
+
order_on_level[node.level] = max(order_on_level[node.level],node.mp.Order());
|
|
2220
|
+
coefs_on_level[node.level] += node.mp.SH().Coefs().Size();
|
|
2221
|
+
});
|
|
2222
|
+
|
|
2223
|
+
cout << "num on level" << endl;
|
|
2224
|
+
for (int i = 0; i < num_on_level.Size(); i++)
|
|
2225
|
+
cout << i << ": " << num_on_level[i] << ", order = " << order_on_level[i] << ", coefs " << coefs_on_level[i] << endl;
|
|
2226
|
+
|
|
2227
|
+
size_t totcoefs = 0;
|
|
2228
|
+
for (auto n : coefs_on_level)
|
|
2229
|
+
totcoefs += n;
|
|
2230
|
+
cout << "total mem in coefs: " << sizeof(elem_type)*totcoefs / sqr(1024) << " MB" << endl;
|
|
2231
|
+
}
|
|
2232
|
+
|
|
1153
2233
|
void Print (ostream & ost) const
|
|
1154
2234
|
{
|
|
1155
2235
|
root.Print(ost);
|
|
@@ -1168,7 +2248,10 @@ namespace ngfem
|
|
|
1168
2248
|
elem_type Evaluate (Vec<3> p) const
|
|
1169
2249
|
{
|
|
1170
2250
|
// static Timer t("mptool Eval MLMP regular"); RegionTimer r(t);
|
|
1171
|
-
if (L2Norm(p-root.center) > root.r) return elem_type{0.0};
|
|
2251
|
+
// if (L2Norm(p-root.center) > root.r) return elem_type{0.0};
|
|
2252
|
+
|
|
2253
|
+
if (MaxNorm(p-root.center) > root.r)
|
|
2254
|
+
return singmp->Evaluate(p);
|
|
1172
2255
|
return root.Evaluate(p);
|
|
1173
2256
|
}
|
|
1174
2257
|
|
|
@@ -1180,11 +2263,12 @@ namespace ngfem
|
|
|
1180
2263
|
|
|
1181
2264
|
};
|
|
1182
2265
|
|
|
2266
|
+
|
|
1183
2267
|
template <typename elem_type>
|
|
1184
|
-
inline ostream & operator<< (ostream & ost, const
|
|
2268
|
+
inline ostream & operator<< (ostream & ost, const RegularMLExpansion<elem_type> & mlmp)
|
|
1185
2269
|
{
|
|
1186
2270
|
mlmp.Print(ost);
|
|
1187
|
-
// ost << "
|
|
2271
|
+
// ost << "RegularMLExpansion" << endl;
|
|
1188
2272
|
return ost;
|
|
1189
2273
|
}
|
|
1190
2274
|
|
|
@@ -1193,126 +2277,5 @@ namespace ngfem
|
|
|
1193
2277
|
|
|
1194
2278
|
|
|
1195
2279
|
|
|
1196
|
-
// ******************** Coefficient Functions *********************
|
|
1197
|
-
|
|
1198
|
-
|
|
1199
|
-
class SphericalHarmonicsCF : public CoefficientFunction
|
|
1200
|
-
{
|
|
1201
|
-
SphericalHarmonics<Complex> sh;
|
|
1202
|
-
public:
|
|
1203
|
-
SphericalHarmonicsCF (int order)
|
|
1204
|
-
: CoefficientFunction(1, true), sh(order) { }
|
|
1205
|
-
Complex & Coef(int n, int m) { return sh.Coef(n,m); }
|
|
1206
|
-
|
|
1207
|
-
virtual double Evaluate (const BaseMappedIntegrationPoint & ip) const override
|
|
1208
|
-
{ throw Exception("real eval not available"); }
|
|
1209
|
-
|
|
1210
|
-
virtual void Evaluate (const BaseMappedIntegrationPoint & mip, FlatVector<Complex> values) const override
|
|
1211
|
-
{
|
|
1212
|
-
values(0) = sh.Eval(mip.GetPoint());
|
|
1213
|
-
}
|
|
1214
|
-
|
|
1215
|
-
virtual void Evaluate (const BaseMappedIntegrationRule & ir, BareSliceMatrix<Complex> values) const override
|
|
1216
|
-
{
|
|
1217
|
-
for (int i = 0; i < ir.Size(); i++)
|
|
1218
|
-
{
|
|
1219
|
-
auto & mip = ir[i];
|
|
1220
|
-
values(i,0) = sh.Eval(mip.GetPoint());
|
|
1221
|
-
}
|
|
1222
|
-
}
|
|
1223
|
-
|
|
1224
|
-
auto & SH() { return sh; }
|
|
1225
|
-
};
|
|
1226
|
-
|
|
1227
|
-
|
|
1228
|
-
|
|
1229
|
-
template <typename RADIAL, typename entry_type=Complex>
|
|
1230
|
-
class MultiPoleCF : public CoefficientFunction
|
|
1231
|
-
{
|
|
1232
|
-
MultiPole<RADIAL, entry_type> mp;
|
|
1233
|
-
Vec<3> center;
|
|
1234
|
-
public:
|
|
1235
|
-
MultiPoleCF (int order, double kappa, Vec<3> acenter, double scale = 1)
|
|
1236
|
-
: CoefficientFunction(sizeof(entry_type)/sizeof(Complex), true), mp(order, kappa, scale), center(acenter) { }
|
|
1237
|
-
|
|
1238
|
-
entry_type & Coef(int n, int m) { return mp.Coef(n,m); }
|
|
1239
|
-
auto & SH() { return mp.SH(); }
|
|
1240
|
-
auto & MP() { return mp; }
|
|
1241
|
-
Vec<3> Center() const { return center; }
|
|
1242
|
-
|
|
1243
|
-
virtual double Evaluate (const BaseMappedIntegrationPoint & ip) const override
|
|
1244
|
-
{ throw Exception("real eval not available"); }
|
|
1245
|
-
|
|
1246
|
-
virtual void Evaluate (const BaseMappedIntegrationPoint & mip, FlatVector<Complex> values) const override
|
|
1247
|
-
{
|
|
1248
|
-
if constexpr (std::is_same<entry_type, Complex>())
|
|
1249
|
-
values(0) = mp.Eval(mip.GetPoint()-center);
|
|
1250
|
-
else
|
|
1251
|
-
values = mp.Eval(mip.GetPoint()-center);
|
|
1252
|
-
}
|
|
1253
|
-
|
|
1254
|
-
template <typename TARGET>
|
|
1255
|
-
void ShiftZ (double z, MultiPole<TARGET, entry_type> & target) { mp.ShiftZ(z, target); }
|
|
1256
|
-
|
|
1257
|
-
using CoefficientFunction::Transform;
|
|
1258
|
-
template <typename TARGET>
|
|
1259
|
-
void Transform (MultiPoleCF<TARGET, entry_type> & target)
|
|
1260
|
-
{
|
|
1261
|
-
mp.Transform (target.MP(), target.Center()-center);
|
|
1262
|
-
}
|
|
1263
|
-
};
|
|
1264
|
-
|
|
1265
|
-
template <typename entry_type>
|
|
1266
|
-
class SingularMLMultiPoleCF : public CoefficientFunction
|
|
1267
|
-
{
|
|
1268
|
-
shared_ptr<SingularMLMultiPole<entry_type>> mlmp;
|
|
1269
|
-
public:
|
|
1270
|
-
SingularMLMultiPoleCF (Vec<3> center, double r, int order, double kappa)
|
|
1271
|
-
: CoefficientFunction(sizeof(entry_type)/sizeof(Complex), true), mlmp{make_shared<SingularMLMultiPole<entry_type>>(center, r, order, kappa)} { }
|
|
1272
|
-
|
|
1273
|
-
virtual double Evaluate (const BaseMappedIntegrationPoint & ip) const override
|
|
1274
|
-
{ throw Exception("real eval not available"); }
|
|
1275
|
-
|
|
1276
|
-
virtual void Evaluate (const BaseMappedIntegrationPoint & mip, FlatVector<Complex> values) const override
|
|
1277
|
-
{
|
|
1278
|
-
// values(0) = mlmp->Evaluate(mip.GetPoint());
|
|
1279
|
-
|
|
1280
|
-
if constexpr (std::is_same<entry_type, Complex>())
|
|
1281
|
-
values(0) = mlmp->Evaluate(mip.GetPoint());
|
|
1282
|
-
else
|
|
1283
|
-
values = mlmp->Evaluate(mip.GetPoint());
|
|
1284
|
-
|
|
1285
|
-
|
|
1286
|
-
}
|
|
1287
|
-
|
|
1288
|
-
shared_ptr<SingularMLMultiPole<entry_type>> MLMP() { return mlmp; }
|
|
1289
|
-
};
|
|
1290
|
-
|
|
1291
|
-
|
|
1292
|
-
template <typename entry_type>
|
|
1293
|
-
class RegularMLMultiPoleCF : public CoefficientFunction
|
|
1294
|
-
{
|
|
1295
|
-
shared_ptr<RegularMLMultiPole<entry_type>> mlmp;
|
|
1296
|
-
public:
|
|
1297
|
-
RegularMLMultiPoleCF (shared_ptr<SingularMLMultiPoleCF<entry_type>> asingmp, Vec<3> center, double r, int order)
|
|
1298
|
-
: CoefficientFunction(sizeof(entry_type)/sizeof(Complex), true), mlmp{make_shared<RegularMLMultiPole<entry_type>>(asingmp->MLMP(), center, r, order)} { }
|
|
1299
|
-
|
|
1300
|
-
virtual double Evaluate (const BaseMappedIntegrationPoint & ip) const override
|
|
1301
|
-
{ throw Exception("real eval not available"); }
|
|
1302
|
-
|
|
1303
|
-
virtual void Evaluate (const BaseMappedIntegrationPoint & mip, FlatVector<Complex> values) const override
|
|
1304
|
-
{
|
|
1305
|
-
// values(0) = mlmp->Evaluate(mip.GetPoint());
|
|
1306
|
-
|
|
1307
|
-
if constexpr (std::is_same<entry_type, Complex>())
|
|
1308
|
-
values(0) = mlmp->Evaluate(mip.GetPoint());
|
|
1309
|
-
else
|
|
1310
|
-
values = mlmp->Evaluate(mip.GetPoint());
|
|
1311
|
-
}
|
|
1312
|
-
|
|
1313
|
-
shared_ptr<RegularMLMultiPole<entry_type>> MLMP() { return mlmp; }
|
|
1314
|
-
};
|
|
1315
|
-
|
|
1316
|
-
|
|
1317
2280
|
}
|
|
1318
2281
|
#endif
|