ngsolve 6.2.2505__cp313-cp313-macosx_10_15_universal2.whl → 6.2.2505.post70.dev0__cp313-cp313-macosx_10_15_universal2.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ngsolve might be problematic. Click here for more details.
- netgen/include/bilinearform.hpp +1 -1
- netgen/include/diffop_impl.hpp +3 -1
- netgen/include/fespace.hpp +4 -2
- netgen/include/gridfunction.hpp +1 -1
- netgen/include/h1amg.hpp +24 -1
- netgen/include/hdivhofespace.hpp +2 -0
- netgen/include/mptools.hpp +599 -85
- netgen/include/ngblas.hpp +113 -4
- netgen/include/recursive_pol.hpp +63 -11
- netgen/include/sparsematrix_dyn.hpp +2 -2
- netgen/include/sparsematrix_impl.hpp +25 -0
- netgen/include/vector.hpp +13 -1
- netgen/libngbla.dylib +0 -0
- netgen/libngcomp.dylib +0 -0
- netgen/libngfem.dylib +0 -0
- netgen/libngla.dylib +0 -0
- netgen/libngsbem.dylib +0 -0
- netgen/libngstd.dylib +0 -0
- ngsolve/__init__.pyi +2 -2
- ngsolve/bla.pyi +4 -0
- ngsolve/cmake/NGSolveConfig.cmake +5 -5
- ngsolve/comp/__init__.pyi +14 -1
- ngsolve/config/__init__.pyi +6 -6
- ngsolve/config/config.py +5 -5
- ngsolve/config/config.pyi +6 -6
- ngsolve/directsolvers.pyi +1 -1
- ngsolve/fem.pyi +1 -1
- ngsolve/krylovspace.pyi +3 -3
- ngsolve/nonlinearsolvers.pyi +1 -1
- ngsolve/solve_implementation.pyi +1 -1
- ngsolve/timestepping.pyi +3 -3
- ngsolve/timing.pyi +1 -1
- {ngsolve-6.2.2505.dist-info → ngsolve-6.2.2505.post70.dev0.dist-info}/METADATA +2 -2
- {ngsolve-6.2.2505.dist-info → ngsolve-6.2.2505.post70.dev0.dist-info}/RECORD +70 -70
- {ngsolve-6.2.2505.data → ngsolve-6.2.2505.post70.dev0.data}/data/Netgen.icns +0 -0
- {ngsolve-6.2.2505.data → ngsolve-6.2.2505.post70.dev0.data}/data/bin/ngscxx +0 -0
- {ngsolve-6.2.2505.data → ngsolve-6.2.2505.post70.dev0.data}/data/bin/ngsld +0 -0
- {ngsolve-6.2.2505.data → ngsolve-6.2.2505.post70.dev0.data}/data/bin/ngsolve.tcl +0 -0
- {ngsolve-6.2.2505.data → ngsolve-6.2.2505.post70.dev0.data}/data/bin/ngspy +0 -0
- {ngsolve-6.2.2505.data → ngsolve-6.2.2505.post70.dev0.data}/data/share/ngsolve/beam.geo +0 -0
- {ngsolve-6.2.2505.data → ngsolve-6.2.2505.post70.dev0.data}/data/share/ngsolve/beam.vol +0 -0
- {ngsolve-6.2.2505.data → ngsolve-6.2.2505.post70.dev0.data}/data/share/ngsolve/chip.in2d +0 -0
- {ngsolve-6.2.2505.data → ngsolve-6.2.2505.post70.dev0.data}/data/share/ngsolve/chip.vol +0 -0
- {ngsolve-6.2.2505.data → ngsolve-6.2.2505.post70.dev0.data}/data/share/ngsolve/coil.geo +0 -0
- {ngsolve-6.2.2505.data → ngsolve-6.2.2505.post70.dev0.data}/data/share/ngsolve/coil.vol +0 -0
- {ngsolve-6.2.2505.data → ngsolve-6.2.2505.post70.dev0.data}/data/share/ngsolve/coilshield.geo +0 -0
- {ngsolve-6.2.2505.data → ngsolve-6.2.2505.post70.dev0.data}/data/share/ngsolve/coilshield.vol +0 -0
- {ngsolve-6.2.2505.data → ngsolve-6.2.2505.post70.dev0.data}/data/share/ngsolve/cube.geo +0 -0
- {ngsolve-6.2.2505.data → ngsolve-6.2.2505.post70.dev0.data}/data/share/ngsolve/cube.vol +0 -0
- {ngsolve-6.2.2505.data → ngsolve-6.2.2505.post70.dev0.data}/data/share/ngsolve/d10_DGdoubleglazing.pde +0 -0
- {ngsolve-6.2.2505.data → ngsolve-6.2.2505.post70.dev0.data}/data/share/ngsolve/d11_chip_nitsche.pde +0 -0
- {ngsolve-6.2.2505.data → ngsolve-6.2.2505.post70.dev0.data}/data/share/ngsolve/d1_square.pde +0 -0
- {ngsolve-6.2.2505.data → ngsolve-6.2.2505.post70.dev0.data}/data/share/ngsolve/d2_chip.pde +0 -0
- {ngsolve-6.2.2505.data → ngsolve-6.2.2505.post70.dev0.data}/data/share/ngsolve/d3_helmholtz.pde +0 -0
- {ngsolve-6.2.2505.data → ngsolve-6.2.2505.post70.dev0.data}/data/share/ngsolve/d4_cube.pde +0 -0
- {ngsolve-6.2.2505.data → ngsolve-6.2.2505.post70.dev0.data}/data/share/ngsolve/d5_beam.pde +0 -0
- {ngsolve-6.2.2505.data → ngsolve-6.2.2505.post70.dev0.data}/data/share/ngsolve/d6_shaft.pde +0 -0
- {ngsolve-6.2.2505.data → ngsolve-6.2.2505.post70.dev0.data}/data/share/ngsolve/d7_coil.pde +0 -0
- {ngsolve-6.2.2505.data → ngsolve-6.2.2505.post70.dev0.data}/data/share/ngsolve/d8_coilshield.pde +0 -0
- {ngsolve-6.2.2505.data → ngsolve-6.2.2505.post70.dev0.data}/data/share/ngsolve/d9_hybridDG.pde +0 -0
- {ngsolve-6.2.2505.data → ngsolve-6.2.2505.post70.dev0.data}/data/share/ngsolve/doubleglazing.in2d +0 -0
- {ngsolve-6.2.2505.data → ngsolve-6.2.2505.post70.dev0.data}/data/share/ngsolve/doubleglazing.vol +0 -0
- {ngsolve-6.2.2505.data → ngsolve-6.2.2505.post70.dev0.data}/data/share/ngsolve/piezo2d40round4.vol.gz +0 -0
- {ngsolve-6.2.2505.data → ngsolve-6.2.2505.post70.dev0.data}/data/share/ngsolve/shaft.geo +0 -0
- {ngsolve-6.2.2505.data → ngsolve-6.2.2505.post70.dev0.data}/data/share/ngsolve/shaft.vol +0 -0
- {ngsolve-6.2.2505.data → ngsolve-6.2.2505.post70.dev0.data}/data/share/ngsolve/square.in2d +0 -0
- {ngsolve-6.2.2505.data → ngsolve-6.2.2505.post70.dev0.data}/data/share/ngsolve/square.vol +0 -0
- {ngsolve-6.2.2505.dist-info → ngsolve-6.2.2505.post70.dev0.dist-info}/LICENSE +0 -0
- {ngsolve-6.2.2505.dist-info → ngsolve-6.2.2505.post70.dev0.dist-info}/WHEEL +0 -0
- {ngsolve-6.2.2505.dist-info → ngsolve-6.2.2505.post70.dev0.dist-info}/top_level.txt +0 -0
netgen/include/mptools.hpp
CHANGED
|
@@ -20,6 +20,26 @@ namespace ngsbem
|
|
|
20
20
|
{
|
|
21
21
|
using namespace ngfem;
|
|
22
22
|
|
|
23
|
+
template<typename T>
|
|
24
|
+
constexpr int VecLength = 1; // Default: Complex has length 1
|
|
25
|
+
|
|
26
|
+
template<int N>
|
|
27
|
+
constexpr int VecLength<Vec<N, Complex>> = N; // Specialization: Vec<N,Complex> has length N
|
|
28
|
+
|
|
29
|
+
inline std::tuple<double, double, double> SphericalCoordinates(Vec<3> dist){
|
|
30
|
+
double len, theta, phi;
|
|
31
|
+
len = L2Norm(dist);
|
|
32
|
+
if (len < 1e-30)
|
|
33
|
+
theta = 0;
|
|
34
|
+
else
|
|
35
|
+
theta = acos (dist(2) / len);
|
|
36
|
+
if (sqr(dist(0))+sqr(dist(1)) < 1e-30)
|
|
37
|
+
phi = 0;
|
|
38
|
+
else
|
|
39
|
+
phi = atan2(dist(1), dist(0));
|
|
40
|
+
return {len, theta, phi};
|
|
41
|
+
}
|
|
42
|
+
|
|
23
43
|
|
|
24
44
|
template <typename entry_type = Complex>
|
|
25
45
|
class NGS_DLL_HEADER SphericalHarmonics
|
|
@@ -84,9 +104,69 @@ namespace ngsbem
|
|
|
84
104
|
|
|
85
105
|
void Calc (Vec<3> x, FlatVector<Complex> shapes);
|
|
86
106
|
|
|
87
|
-
|
|
107
|
+
|
|
108
|
+
void FlipZ ();
|
|
88
109
|
void RotateZ (double alpha);
|
|
89
|
-
|
|
110
|
+
|
|
111
|
+
template <typename FUNC>
|
|
112
|
+
void RotateZ (double alpha, FUNC func) const
|
|
113
|
+
{
|
|
114
|
+
if (order < 0) return;
|
|
115
|
+
|
|
116
|
+
Vector<Complex> exp_imalpha(order+1);
|
|
117
|
+
Complex exp_ialpha(cos(alpha), sin(alpha));
|
|
118
|
+
Complex prod = 1.0;
|
|
119
|
+
for (int i = 0; i <= order; i++)
|
|
120
|
+
{
|
|
121
|
+
exp_imalpha(i) = prod;
|
|
122
|
+
prod *= exp_ialpha;
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
int ii = 0;
|
|
126
|
+
for (int n = 0; n <= order; n++)
|
|
127
|
+
{
|
|
128
|
+
for (int m = -n; m < 0; m++, ii++)
|
|
129
|
+
func(ii, conj(exp_imalpha(-m)));
|
|
130
|
+
for (int m = 0; m <= n; m++, ii++)
|
|
131
|
+
func(ii, exp_imalpha(m));
|
|
132
|
+
};
|
|
133
|
+
};
|
|
134
|
+
|
|
135
|
+
template <typename FUNC>
|
|
136
|
+
void RotateZFlip (double alpha, bool flip, FUNC func) const
|
|
137
|
+
{
|
|
138
|
+
if (order < 0) return;
|
|
139
|
+
|
|
140
|
+
Vector<Complex> exp_imalpha(order+1);
|
|
141
|
+
Complex exp_ialpha(cos(alpha), sin(alpha));
|
|
142
|
+
Complex prod = 1.0;
|
|
143
|
+
for (int i = 0; i <= order; i++)
|
|
144
|
+
{
|
|
145
|
+
exp_imalpha(i) = prod;
|
|
146
|
+
prod *= exp_ialpha;
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
int ii = 0;
|
|
150
|
+
|
|
151
|
+
auto FlipFactor = [] (int n, int m, bool flip)->double
|
|
152
|
+
{
|
|
153
|
+
if (flip)
|
|
154
|
+
return ((n-m)%2) == 1 ? -1 : 1;
|
|
155
|
+
return 1.0;
|
|
156
|
+
};
|
|
157
|
+
|
|
158
|
+
for (int n = 0; n <= order; n++)
|
|
159
|
+
{
|
|
160
|
+
for (int m = -n; m < 0; m++, ii++)
|
|
161
|
+
func(ii, FlipFactor(n,m,flip)*conj(exp_imalpha(-m)));
|
|
162
|
+
for (int m = 0; m <= n; m++, ii++)
|
|
163
|
+
func(ii, FlipFactor(n,m,flip)*exp_imalpha(m));
|
|
164
|
+
};
|
|
165
|
+
};
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
void RotateY (double alpha, bool parallel = false);
|
|
90
170
|
|
|
91
171
|
|
|
92
172
|
static double CalcAmn (int m, int n)
|
|
@@ -119,11 +199,11 @@ namespace ngsbem
|
|
|
119
199
|
// https://fortran-lang.discourse.group/t/looking-for-spherical-bessel-and-hankel-functions-of-first-and-second-kind-and-arbitrary-order/2308/2
|
|
120
200
|
NGS_DLL_HEADER
|
|
121
201
|
void besseljs3d (int nterms, double z, double scale,
|
|
122
|
-
|
|
202
|
+
SliceVector<double> fjs, SliceVector<double> fjder = FlatVector<double>(0, nullptr));
|
|
123
203
|
|
|
124
204
|
NGS_DLL_HEADER
|
|
125
205
|
void besseljs3d (int nterms, Complex z, double scale,
|
|
126
|
-
|
|
206
|
+
SliceVector<Complex> fjs, SliceVector<Complex> fjder = FlatVector<Complex>(0, nullptr));
|
|
127
207
|
|
|
128
208
|
|
|
129
209
|
/*
|
|
@@ -142,14 +222,17 @@ namespace ngsbem
|
|
|
142
222
|
FlatVector<double> jp,
|
|
143
223
|
FlatVector<double> yp);
|
|
144
224
|
|
|
145
|
-
|
|
225
|
+
|
|
146
226
|
|
|
147
227
|
template <typename T>
|
|
148
228
|
void SphericalBessel (int n, double rho, double scale, T && values)
|
|
149
229
|
{
|
|
230
|
+
besseljs3d (n, rho, scale, values);
|
|
231
|
+
/*
|
|
150
232
|
Vector<double> j(n+1), jp(n+1);
|
|
151
233
|
besseljs3d (n, rho, scale, j, jp);
|
|
152
234
|
values = j;
|
|
235
|
+
*/
|
|
153
236
|
}
|
|
154
237
|
|
|
155
238
|
|
|
@@ -173,21 +256,6 @@ namespace ngsbem
|
|
|
173
256
|
return;
|
|
174
257
|
}
|
|
175
258
|
Vector j(n+1), y(n+1), jp(n+1), yp(n+1);
|
|
176
|
-
// SBESJY (rho, n, j, y, jp, yp);
|
|
177
|
-
|
|
178
|
-
/*
|
|
179
|
-
values = j + Complex(0,1) * y;
|
|
180
|
-
if (scale != 1.0)
|
|
181
|
-
{
|
|
182
|
-
double prod = 1.0;
|
|
183
|
-
for (int i = 0; i <= n; i++)
|
|
184
|
-
{
|
|
185
|
-
values(i) *= prod;
|
|
186
|
-
prod *= scale;
|
|
187
|
-
}
|
|
188
|
-
}
|
|
189
|
-
*/
|
|
190
|
-
|
|
191
259
|
|
|
192
260
|
// the bessel-evaluation with scale
|
|
193
261
|
besseljs3d (n, rho, 1/scale, j, jp);
|
|
@@ -358,18 +426,7 @@ namespace ngsbem
|
|
|
358
426
|
// static Timer t("mptool Transform "+ToString(typeid(RADIAL).name())+ToString(typeid(TARGET).name()));
|
|
359
427
|
// RegionTimer reg(t);
|
|
360
428
|
|
|
361
|
-
|
|
362
|
-
double theta, phi;
|
|
363
|
-
|
|
364
|
-
if (len < 1e-30)
|
|
365
|
-
theta = 0;
|
|
366
|
-
else
|
|
367
|
-
theta = acos (dist(2) / len);
|
|
368
|
-
|
|
369
|
-
if (sqr(dist(0))+sqr(dist(1)) < 1e-30)
|
|
370
|
-
phi = 0;
|
|
371
|
-
else
|
|
372
|
-
phi = atan2(dist(1), dist(0));
|
|
429
|
+
auto [len, theta, phi] = SphericalCoordinates(dist);
|
|
373
430
|
|
|
374
431
|
|
|
375
432
|
// MultiPole<RADIAL,entry_type> tmp{*this};
|
|
@@ -386,14 +443,18 @@ namespace ngsbem
|
|
|
386
443
|
}
|
|
387
444
|
|
|
388
445
|
template <typename TARGET>
|
|
389
|
-
void TransformAdd (MultiPole<TARGET,entry_type> & target, Vec<3> dist) const
|
|
446
|
+
void TransformAdd (MultiPole<TARGET,entry_type> & target, Vec<3> dist, bool atomic = false) const
|
|
390
447
|
{
|
|
391
448
|
if (SH().Order() < 0) return;
|
|
392
449
|
if (target.SH().Order() < 0) return;
|
|
393
450
|
|
|
394
451
|
MultiPole<TARGET,entry_type> tmp{target};
|
|
395
452
|
Transform(tmp, dist);
|
|
396
|
-
|
|
453
|
+
if (!atomic)
|
|
454
|
+
target.SH().Coefs() += tmp.SH().Coefs();
|
|
455
|
+
else
|
|
456
|
+
for (int j = 0; j < target.SH().Coefs().Size(); j++)
|
|
457
|
+
AtomicAdd(target.SH().Coefs()[j], tmp.SH().Coefs()[j]);
|
|
397
458
|
}
|
|
398
459
|
|
|
399
460
|
template <typename TARGET>
|
|
@@ -412,11 +473,123 @@ namespace ngsbem
|
|
|
412
473
|
static constexpr int maxdirect = 100;
|
|
413
474
|
|
|
414
475
|
|
|
476
|
+
template <typename SCAL, auto S>
|
|
477
|
+
inline auto VecVector2Matrix (FlatVector<Vec<S,SCAL>> vec)
|
|
478
|
+
{
|
|
479
|
+
return FlatMatrixFixWidth<S,SCAL> (vec.Size(), vec.Data()->Data());
|
|
480
|
+
}
|
|
481
|
+
|
|
482
|
+
inline auto VecVector2Matrix (FlatVector<Complex> vec)
|
|
483
|
+
{
|
|
484
|
+
return FlatMatrixFixWidth<1,Complex> (vec.Size(), vec.Data());
|
|
485
|
+
}
|
|
486
|
+
|
|
487
|
+
|
|
415
488
|
template <typename entry_type=Complex>
|
|
416
489
|
class SingularMLMultiPole
|
|
417
490
|
{
|
|
418
491
|
static Array<size_t> nodes_on_level;
|
|
419
492
|
|
|
493
|
+
struct RecordingSS
|
|
494
|
+
{
|
|
495
|
+
const MultiPole<MPSingular,entry_type> * mp_source;
|
|
496
|
+
MultiPole<MPSingular,entry_type> * mp_target;
|
|
497
|
+
Vec<3> dist;
|
|
498
|
+
double len, theta, phi;
|
|
499
|
+
bool flipz;
|
|
500
|
+
public:
|
|
501
|
+
RecordingSS() = default;
|
|
502
|
+
RecordingSS (const MultiPole<MPSingular,entry_type> * amp_source,
|
|
503
|
+
MultiPole<MPSingular,entry_type> * amp_target,
|
|
504
|
+
Vec<3> adist)
|
|
505
|
+
: mp_source(amp_source), mp_target(amp_target), dist(adist)
|
|
506
|
+
{
|
|
507
|
+
std::tie(len, theta, phi) = SphericalCoordinates(adist);
|
|
508
|
+
// flipz = false;
|
|
509
|
+
flipz = theta > M_PI/2;
|
|
510
|
+
if (flipz) theta = M_PI-theta;
|
|
511
|
+
}
|
|
512
|
+
};
|
|
513
|
+
|
|
514
|
+
|
|
515
|
+
static void ProcessBatch(FlatArray<RecordingSS*> batch, double len, double theta) {
|
|
516
|
+
constexpr int vec_length = VecLength<entry_type>;
|
|
517
|
+
int batch_size = batch.Size();
|
|
518
|
+
int N = batch_size * vec_length;
|
|
519
|
+
// *testout << "Processing batch of size " << batch.Size() << ", with N = " << N << ", vec_length = " << vec_length << ", Type: " << typeid(entry_type).name() << ", len = " << len << ", theta = " << theta << endl;
|
|
520
|
+
|
|
521
|
+
if (N <= 1 || batch_size <= 1) {
|
|
522
|
+
for (auto* rec : batch) {
|
|
523
|
+
rec->mp_source->TransformAdd(*rec->mp_target, rec->dist, true);
|
|
524
|
+
}
|
|
525
|
+
}
|
|
526
|
+
else if (N <= 3) {
|
|
527
|
+
ProcessVectorizedBatch<3, vec_length>(batch, len, theta);
|
|
528
|
+
}
|
|
529
|
+
else if (N <= 4) {
|
|
530
|
+
ProcessVectorizedBatch<4, vec_length>(batch, len, theta);
|
|
531
|
+
}
|
|
532
|
+
else if (N <= 6) {
|
|
533
|
+
ProcessVectorizedBatch<6, vec_length>(batch, len, theta);
|
|
534
|
+
}
|
|
535
|
+
else if (N <= 12) {
|
|
536
|
+
ProcessVectorizedBatch<12, vec_length>(batch, len, theta);
|
|
537
|
+
}
|
|
538
|
+
else if (N <= 24) {
|
|
539
|
+
ProcessVectorizedBatch<24, vec_length>(batch, len, theta);
|
|
540
|
+
}
|
|
541
|
+
else if (N <= 48) {
|
|
542
|
+
ProcessVectorizedBatch<48, vec_length>(batch, len, theta);
|
|
543
|
+
}
|
|
544
|
+
else if (N <= 96) {
|
|
545
|
+
ProcessVectorizedBatch<96, vec_length>(batch, len, theta);
|
|
546
|
+
}
|
|
547
|
+
else if (N <= 192) {
|
|
548
|
+
ProcessVectorizedBatch<192, vec_length>(batch, len, theta);
|
|
549
|
+
}
|
|
550
|
+
else {
|
|
551
|
+
// Split large batches
|
|
552
|
+
ProcessBatch(batch.Range(0, 192 / vec_length), len, theta);
|
|
553
|
+
ProcessBatch(batch.Range(192 / vec_length, batch_size), len, theta);
|
|
554
|
+
}
|
|
555
|
+
}
|
|
556
|
+
|
|
557
|
+
template<int N, int vec_length>
|
|
558
|
+
static void ProcessVectorizedBatch(FlatArray<RecordingSS*> batch, double len, double theta) {
|
|
559
|
+
|
|
560
|
+
// *testout << "Processing vectorized S->S batch of size " << batch.Size() << ", with N = " << N << ", vec_length = " << vec_length << ", len = " << len << ", theta = " << theta << endl;
|
|
561
|
+
MultiPole<MPSingular, Vec<N,Complex>> vec_source(batch[0]->mp_source->Order(), batch[0]->mp_source->Kappa(), batch[0]->mp_source->RTyp());
|
|
562
|
+
MultiPole<MPSingular, Vec<N,Complex>> vec_target(batch[0]->mp_target->Order(), batch[0]->mp_target->Kappa(), batch[0]->mp_target->RTyp());
|
|
563
|
+
|
|
564
|
+
// Copy multipoles into vectorized multipole
|
|
565
|
+
for (int i = 0; i < batch.Size(); i++)
|
|
566
|
+
{
|
|
567
|
+
auto source_i = VecVector2Matrix (batch[i]->mp_source->SH().Coefs());
|
|
568
|
+
auto source_mati = VecVector2Matrix (vec_source.SH().Coefs()).Cols(i*vec_length, (i+1)*vec_length);
|
|
569
|
+
batch[i]->mp_source->SH().RotateZFlip(batch[i]->phi, batch[i]->flipz,
|
|
570
|
+
[source_i, source_mati] (size_t ii, Complex factor)
|
|
571
|
+
{
|
|
572
|
+
source_mati.Row(ii) = factor * source_i.Row(ii);
|
|
573
|
+
});
|
|
574
|
+
}
|
|
575
|
+
|
|
576
|
+
vec_source.SH().RotateY(theta, vec_source.SH().Order() >= 100);
|
|
577
|
+
vec_source.ShiftZ(-len, vec_target);
|
|
578
|
+
vec_target.SH().RotateY(-theta, vec_target.SH().Order() >= 100);
|
|
579
|
+
|
|
580
|
+
// Copy vectorized multipole into individual multipoles
|
|
581
|
+
for (int i = 0; i < batch.Size(); i++)
|
|
582
|
+
{
|
|
583
|
+
auto source_mati = VecVector2Matrix (vec_target.SH().Coefs()).Cols(i*vec_length, (i+1)*vec_length);
|
|
584
|
+
auto target_mati = VecVector2Matrix (batch[i]->mp_target->SH().Coefs());
|
|
585
|
+
batch[i]->mp_target->SH().RotateZFlip(-batch[i]->phi, batch[i]->flipz,
|
|
586
|
+
[source_mati, target_mati] (size_t ii, Complex factor)
|
|
587
|
+
{
|
|
588
|
+
AtomicAdd (target_mati.Row(ii), factor * source_mati.Row(ii));
|
|
589
|
+
});
|
|
590
|
+
}
|
|
591
|
+
}
|
|
592
|
+
|
|
420
593
|
struct Node
|
|
421
594
|
{
|
|
422
595
|
Vec<3> center;
|
|
@@ -429,6 +602,8 @@ namespace ngsbem
|
|
|
429
602
|
Array<tuple<Vec<3>, Vec<3>, entry_type>> dipoles;
|
|
430
603
|
Array<tuple<Vec<3>, Vec<3>, Complex,int>> currents;
|
|
431
604
|
int total_sources;
|
|
605
|
+
std::mutex node_mutex;
|
|
606
|
+
atomic<bool> have_childs{false};
|
|
432
607
|
|
|
433
608
|
Node (Vec<3> acenter, double ar, int alevel, double akappa)
|
|
434
609
|
: center(acenter), r(ar), level(alevel), mp(MPOrder(ar*akappa), akappa, ar) // min(1.0, ar*akappa))
|
|
@@ -449,12 +624,13 @@ namespace ngsbem
|
|
|
449
624
|
cc(2) += (i&4) ? r/2 : -r/2;
|
|
450
625
|
childs[i] = make_unique<Node> (cc, r/2, level+1, mp.Kappa());
|
|
451
626
|
}
|
|
627
|
+
have_childs = true;
|
|
452
628
|
}
|
|
453
629
|
|
|
454
630
|
|
|
455
631
|
void AddCharge (Vec<3> x, entry_type c)
|
|
456
632
|
{
|
|
457
|
-
if (
|
|
633
|
+
if (have_childs) // quick check without locking
|
|
458
634
|
{
|
|
459
635
|
// directly send to childs:
|
|
460
636
|
int childnum = 0;
|
|
@@ -465,6 +641,21 @@ namespace ngsbem
|
|
|
465
641
|
return;
|
|
466
642
|
}
|
|
467
643
|
|
|
644
|
+
lock_guard<mutex> guard(node_mutex);
|
|
645
|
+
|
|
646
|
+
if (have_childs) // test again after locking
|
|
647
|
+
{
|
|
648
|
+
// directly send to childs:
|
|
649
|
+
int childnum = 0;
|
|
650
|
+
if (x(0) > center(0)) childnum += 1;
|
|
651
|
+
if (x(1) > center(1)) childnum += 2;
|
|
652
|
+
if (x(2) > center(2)) childnum += 4;
|
|
653
|
+
childs[childnum] -> AddCharge(x, c);
|
|
654
|
+
return;
|
|
655
|
+
}
|
|
656
|
+
|
|
657
|
+
|
|
658
|
+
|
|
468
659
|
charges.Append( tuple{x,c} );
|
|
469
660
|
|
|
470
661
|
// if (r*mp.Kappa() < 1e-8) return;
|
|
@@ -489,7 +680,21 @@ namespace ngsbem
|
|
|
489
680
|
|
|
490
681
|
void AddDipole (Vec<3> x, Vec<3> d, entry_type c)
|
|
491
682
|
{
|
|
492
|
-
if (
|
|
683
|
+
if (have_childs)
|
|
684
|
+
{
|
|
685
|
+
// directly send to childs:
|
|
686
|
+
|
|
687
|
+
int childnum = 0;
|
|
688
|
+
if (x(0) > center(0)) childnum += 1;
|
|
689
|
+
if (x(1) > center(1)) childnum += 2;
|
|
690
|
+
if (x(2) > center(2)) childnum += 4;
|
|
691
|
+
childs[childnum] -> AddDipole(x, d, c);
|
|
692
|
+
return;
|
|
693
|
+
}
|
|
694
|
+
|
|
695
|
+
lock_guard<mutex> guard(node_mutex);
|
|
696
|
+
|
|
697
|
+
if (have_childs)
|
|
493
698
|
{
|
|
494
699
|
// directly send to childs:
|
|
495
700
|
|
|
@@ -501,6 +706,9 @@ namespace ngsbem
|
|
|
501
706
|
return;
|
|
502
707
|
}
|
|
503
708
|
|
|
709
|
+
|
|
710
|
+
|
|
711
|
+
|
|
504
712
|
dipoles.Append (tuple{x,d,c});
|
|
505
713
|
|
|
506
714
|
if (dipoles.Size() < maxdirect || r < 1e-8)
|
|
@@ -520,6 +728,7 @@ namespace ngsbem
|
|
|
520
728
|
currents.SetSize0();
|
|
521
729
|
}
|
|
522
730
|
|
|
731
|
+
// not parallel yet
|
|
523
732
|
void AddCurrent (Vec<3> sp, Vec<3> ep, Complex j, int num)
|
|
524
733
|
{
|
|
525
734
|
if (childs[0])
|
|
@@ -549,7 +758,7 @@ namespace ngsbem
|
|
|
549
758
|
}
|
|
550
759
|
return;
|
|
551
760
|
}
|
|
552
|
-
|
|
761
|
+
|
|
553
762
|
currents.Append (tuple{sp,ep,j,num});
|
|
554
763
|
|
|
555
764
|
// if (currents.Size() < maxdirect || r < 1e-8)
|
|
@@ -664,23 +873,27 @@ namespace ngsbem
|
|
|
664
873
|
}
|
|
665
874
|
}
|
|
666
875
|
|
|
667
|
-
void CalcMP()
|
|
876
|
+
void CalcMP(Array<RecordingSS> * recording, Array<Node*> * nodes_to_process)
|
|
668
877
|
{
|
|
669
|
-
mp.SH().Coefs() = 0.0;
|
|
878
|
+
// mp.SH().Coefs() = 0.0;
|
|
670
879
|
if (childs[0])
|
|
671
880
|
{
|
|
672
|
-
if (total_sources < 1000)
|
|
881
|
+
if (total_sources < 1000 || recording)
|
|
673
882
|
for (auto & child : childs)
|
|
674
|
-
child->CalcMP();
|
|
883
|
+
child->CalcMP(recording, nodes_to_process);
|
|
675
884
|
else
|
|
676
885
|
ParallelFor (8, [&] (int nr)
|
|
677
886
|
{
|
|
678
|
-
childs[nr] -> CalcMP();
|
|
887
|
+
childs[nr] -> CalcMP(recording, nodes_to_process);
|
|
679
888
|
});
|
|
680
889
|
|
|
681
890
|
|
|
682
|
-
for (auto & child : childs)
|
|
683
|
-
child->mp.
|
|
891
|
+
for (auto & child : childs){
|
|
892
|
+
if (recording && child->mp.SH().Coefs().Size() > 0)
|
|
893
|
+
*recording += RecordingSS(&child->mp, &mp, center-child->center);
|
|
894
|
+
else
|
|
895
|
+
child->mp.TransformAdd(mp, center-child->center);
|
|
896
|
+
}
|
|
684
897
|
}
|
|
685
898
|
else
|
|
686
899
|
{
|
|
@@ -690,14 +903,18 @@ namespace ngsbem
|
|
|
690
903
|
return;
|
|
691
904
|
}
|
|
692
905
|
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
906
|
+
if (nodes_to_process)
|
|
907
|
+
*nodes_to_process += this;
|
|
908
|
+
else {
|
|
909
|
+
for (auto [x,c] : charges)
|
|
910
|
+
mp.AddCharge (x-center,c);
|
|
698
911
|
|
|
699
|
-
|
|
700
|
-
|
|
912
|
+
for (auto [x,d,c] : dipoles)
|
|
913
|
+
mp.AddDipole (x-center, d, c);
|
|
914
|
+
|
|
915
|
+
for (auto [sp,ep,j,num] : currents)
|
|
916
|
+
mp.AddCurrent (sp-center, ep-center, j, num);
|
|
917
|
+
}
|
|
701
918
|
}
|
|
702
919
|
}
|
|
703
920
|
|
|
@@ -836,6 +1053,10 @@ namespace ngsbem
|
|
|
836
1053
|
void CalcMP()
|
|
837
1054
|
{
|
|
838
1055
|
static Timer t("mptool compute singular MLMP"); RegionTimer rg(t);
|
|
1056
|
+
static Timer ts2mp("mptool compute singular MLMP - source2mp");
|
|
1057
|
+
static Timer tS2S("mptool compute singular MLMP - S->S");
|
|
1058
|
+
static Timer trec("mptool comput singular recording");
|
|
1059
|
+
static Timer tsort("mptool comput singular sort");
|
|
839
1060
|
|
|
840
1061
|
/*
|
|
841
1062
|
int maxlevel = 0;
|
|
@@ -847,7 +1068,87 @@ namespace ngsbem
|
|
|
847
1068
|
*/
|
|
848
1069
|
|
|
849
1070
|
root.CalcTotalSources();
|
|
850
|
-
|
|
1071
|
+
|
|
1072
|
+
if (false)
|
|
1073
|
+
// direct evaluation of S->S
|
|
1074
|
+
root.CalcMP(nullptr, nullptr);
|
|
1075
|
+
else
|
|
1076
|
+
{
|
|
1077
|
+
|
|
1078
|
+
Array<RecordingSS> recording;
|
|
1079
|
+
Array<Node*> nodes_to_process;
|
|
1080
|
+
|
|
1081
|
+
{
|
|
1082
|
+
RegionTimer reg(trec);
|
|
1083
|
+
root.CalcMP(&recording, &nodes_to_process);
|
|
1084
|
+
}
|
|
1085
|
+
|
|
1086
|
+
{
|
|
1087
|
+
RegionTimer rs2mp(ts2mp);
|
|
1088
|
+
ParallelFor(nodes_to_process.Size(), [&](int i){
|
|
1089
|
+
auto node = nodes_to_process[i];
|
|
1090
|
+
for (auto [x,c]: node->charges)
|
|
1091
|
+
node->mp.AddCharge(x-node->center, c);
|
|
1092
|
+
for (auto [x,d,c]: node->dipoles)
|
|
1093
|
+
node->mp.AddDipole(x-node->center, d, c);
|
|
1094
|
+
for (auto [sp,ep,j,num]: node->currents)
|
|
1095
|
+
node->mp.AddCurrent(sp-node->center, ep-node->center, j, num);
|
|
1096
|
+
}, TasksPerThread(4));
|
|
1097
|
+
}
|
|
1098
|
+
|
|
1099
|
+
{
|
|
1100
|
+
RegionTimer reg(tsort);
|
|
1101
|
+
QuickSort (recording, [] (auto & a, auto & b)
|
|
1102
|
+
{
|
|
1103
|
+
if (a.len < (1-1e-8) * b.len) return true;
|
|
1104
|
+
if (a.len > (1+1e-8) * b.len) return false;
|
|
1105
|
+
return a.theta < b.theta;
|
|
1106
|
+
});
|
|
1107
|
+
}
|
|
1108
|
+
|
|
1109
|
+
double current_len = -1e100;
|
|
1110
|
+
double current_theta = -1e100;
|
|
1111
|
+
Array<RecordingSS*> current_batch;
|
|
1112
|
+
Array<Array<RecordingSS*>> batch_group;
|
|
1113
|
+
Array<double> group_lengths;
|
|
1114
|
+
Array<double> group_thetas;
|
|
1115
|
+
for (auto & record : recording)
|
|
1116
|
+
{
|
|
1117
|
+
bool len_changed = fabs(record.len - current_len) > 1e-8;
|
|
1118
|
+
bool theta_changed = fabs(record.theta - current_theta) > 1e-8;
|
|
1119
|
+
if ((len_changed || theta_changed) && current_batch.Size() > 0) {
|
|
1120
|
+
batch_group.Append(current_batch);
|
|
1121
|
+
group_lengths.Append(current_len);
|
|
1122
|
+
group_thetas.Append(current_theta);
|
|
1123
|
+
current_batch.SetSize(0);
|
|
1124
|
+
}
|
|
1125
|
+
|
|
1126
|
+
current_len = record.len;
|
|
1127
|
+
current_theta = record.theta;
|
|
1128
|
+
current_batch.Append(&record);
|
|
1129
|
+
}
|
|
1130
|
+
if (current_batch.Size() > 0) {
|
|
1131
|
+
batch_group.Append(current_batch);
|
|
1132
|
+
group_lengths.Append(current_len);
|
|
1133
|
+
group_thetas.Append(current_theta);
|
|
1134
|
+
}
|
|
1135
|
+
|
|
1136
|
+
{
|
|
1137
|
+
RegionTimer rS2S(tS2S);
|
|
1138
|
+
// ParallelFor(batch_group.Size(), [&](int i) {
|
|
1139
|
+
for (int i = 0; i < batch_group.Size(); i++){
|
|
1140
|
+
// *testout << "Processing batch " << i << " of size " << batch_group[i].Size() << ", with len = " << group_lengths[i] << ", theta = " << group_thetas[i] << endl;
|
|
1141
|
+
int chunk_size = 24;
|
|
1142
|
+
if (batch_group[i].Size() < chunk_size)
|
|
1143
|
+
ProcessBatch(batch_group[i], group_lengths[i], group_thetas[i]);
|
|
1144
|
+
else
|
|
1145
|
+
ParallelForRange(IntRange(batch_group[i].Size()), [&](IntRange range) {
|
|
1146
|
+
auto sub_batch = batch_group[i].Range(range.First(), range.Next());
|
|
1147
|
+
ProcessBatch(sub_batch, group_lengths[i], group_thetas[i]);
|
|
1148
|
+
}, TasksPerThread(4));
|
|
1149
|
+
}
|
|
1150
|
+
}
|
|
1151
|
+
}
|
|
851
1152
|
|
|
852
1153
|
havemp = true;
|
|
853
1154
|
}
|
|
@@ -877,6 +1178,143 @@ namespace ngsbem
|
|
|
877
1178
|
class NGS_DLL_HEADER RegularMLMultiPole
|
|
878
1179
|
{
|
|
879
1180
|
static Array<size_t> nodes_on_level;
|
|
1181
|
+
|
|
1182
|
+
|
|
1183
|
+
struct RecordingRS
|
|
1184
|
+
{
|
|
1185
|
+
const MultiPole<MPSingular,elem_type> * mpS;
|
|
1186
|
+
MultiPole<MPRegular,elem_type> * mpR;
|
|
1187
|
+
Vec<3> dist;
|
|
1188
|
+
double len, theta, phi;
|
|
1189
|
+
public:
|
|
1190
|
+
RecordingRS() = default;
|
|
1191
|
+
RecordingRS (const MultiPole<MPSingular,elem_type> * ampS,
|
|
1192
|
+
MultiPole<MPRegular,elem_type> * ampR,
|
|
1193
|
+
Vec<3> adist)
|
|
1194
|
+
: mpS(ampS), mpR(ampR), dist(adist)
|
|
1195
|
+
{
|
|
1196
|
+
std::tie(len, theta, phi) = SphericalCoordinates(dist);
|
|
1197
|
+
}
|
|
1198
|
+
};
|
|
1199
|
+
|
|
1200
|
+
static void ProcessBatchRS(FlatArray<RecordingRS*> batch, double len, double theta) {
|
|
1201
|
+
// static Timer t("ProcessBatchRS"); RegionTimer reg(t, batch.Size());
|
|
1202
|
+
constexpr int vec_length = VecLength<elem_type>;
|
|
1203
|
+
int batch_size = batch.Size();
|
|
1204
|
+
int N = batch_size * vec_length;
|
|
1205
|
+
// *testout << "Processing batch of size " << batch.Size() << ", with N = " << N << ", vec_length = " << vec_length << ", Type: " << typeid(elem_type).name() << ", len = " << len << ", theta = " << theta << endl;
|
|
1206
|
+
|
|
1207
|
+
if (N <= 1 || batch_size <= 1) {
|
|
1208
|
+
for (auto* rec : batch) {
|
|
1209
|
+
rec->mpS->TransformAdd(*rec->mpR, rec->dist);
|
|
1210
|
+
}
|
|
1211
|
+
}
|
|
1212
|
+
else if (N <= 3) {
|
|
1213
|
+
ProcessVectorizedBatch<3, vec_length>(batch, len, theta);
|
|
1214
|
+
}
|
|
1215
|
+
else if (N <= 4) {
|
|
1216
|
+
ProcessVectorizedBatch<4, vec_length>(batch, len, theta);
|
|
1217
|
+
}
|
|
1218
|
+
else if (N <= 6) {
|
|
1219
|
+
ProcessVectorizedBatch<6, vec_length>(batch, len, theta);
|
|
1220
|
+
}
|
|
1221
|
+
else if (N <= 12) {
|
|
1222
|
+
ProcessVectorizedBatch<12, vec_length>(batch, len, theta);
|
|
1223
|
+
}
|
|
1224
|
+
else if (N <= 24) {
|
|
1225
|
+
ProcessVectorizedBatch<24, vec_length>(batch, len, theta);
|
|
1226
|
+
}
|
|
1227
|
+
else if (N <= 48) {
|
|
1228
|
+
ProcessVectorizedBatch<48, vec_length>(batch, len, theta);
|
|
1229
|
+
}
|
|
1230
|
+
else if (N <= 96) {
|
|
1231
|
+
ProcessVectorizedBatch<96, vec_length>(batch, len, theta);
|
|
1232
|
+
}
|
|
1233
|
+
else if (N <= 192) {
|
|
1234
|
+
ProcessVectorizedBatch<192, vec_length>(batch, len, theta);
|
|
1235
|
+
}
|
|
1236
|
+
else {
|
|
1237
|
+
// Split large batches
|
|
1238
|
+
/*
|
|
1239
|
+
ProcessBatch(batch.Range(0, 192 / vec_length), len, theta);
|
|
1240
|
+
ProcessBatch(batch.Range(192 / vec_length, batch_size), len, theta);
|
|
1241
|
+
*/
|
|
1242
|
+
|
|
1243
|
+
/*
|
|
1244
|
+
ParallelFor (2, [&] (int i)
|
|
1245
|
+
{
|
|
1246
|
+
if (i == 0)
|
|
1247
|
+
ProcessBatchRS(batch.Range(0, 192 / vec_length), len, theta);
|
|
1248
|
+
else
|
|
1249
|
+
ProcessBatchRS(batch.Range(192 / vec_length, batch_size), len, theta);
|
|
1250
|
+
}, 2);
|
|
1251
|
+
*/
|
|
1252
|
+
|
|
1253
|
+
|
|
1254
|
+
size_t chunksize = 192/vec_length;
|
|
1255
|
+
size_t num = (batch.Size()+chunksize-1) / chunksize;
|
|
1256
|
+
ParallelFor (num, [&](int i)
|
|
1257
|
+
{
|
|
1258
|
+
ProcessBatchRS(batch.Range(i*chunksize, min((i+1)*chunksize, batch.Size())), len, theta);
|
|
1259
|
+
}, num);
|
|
1260
|
+
|
|
1261
|
+
}
|
|
1262
|
+
}
|
|
1263
|
+
|
|
1264
|
+
|
|
1265
|
+
template<int N, int vec_length>
|
|
1266
|
+
static void ProcessVectorizedBatch(FlatArray<RecordingRS*> batch, double len, double theta) {
|
|
1267
|
+
|
|
1268
|
+
// static Timer t("ProcessVectorizedBatch, N = "+ToString(N) + ", vec_len = " + ToString(vec_length));
|
|
1269
|
+
// RegionTimer reg(t, batch[0]->mpS->SH().Order());
|
|
1270
|
+
// static Timer ttobatch("mptools - copy to batch 2");
|
|
1271
|
+
// static Timer tfrombatch("mptools - copy from batch 2");
|
|
1272
|
+
|
|
1273
|
+
// *testout << "Processing vectorized batch of size " << batch.Size() << ", with N = " << N << ", vec_length = " << vec_length << ", len = " << len << ", theta = " << theta << endl;
|
|
1274
|
+
MultiPole<MPSingular, Vec<N,Complex>> vec_source(batch[0]->mpS->Order(), batch[0]->mpS->Kappa(), batch[0]->mpS->RTyp());
|
|
1275
|
+
// MultiPole<MPSingular, elem_type> tmp_source{*batch[0]->mpS};
|
|
1276
|
+
MultiPole<MPRegular, elem_type> tmp_target{*batch[0]->mpR};
|
|
1277
|
+
MultiPole<MPRegular, Vec<N,Complex>> vec_target(batch[0]->mpR->Order(), batch[0]->mpR->Kappa(), batch[0]->mpR->RTyp());
|
|
1278
|
+
|
|
1279
|
+
// Copy multipoles into vectorized multipole
|
|
1280
|
+
// ttobatch.Start();
|
|
1281
|
+
for (int i = 0; i < batch.Size(); i++)
|
|
1282
|
+
{
|
|
1283
|
+
auto source_i = VecVector2Matrix (batch[i]->mpS->SH().Coefs());
|
|
1284
|
+
auto source_mati = VecVector2Matrix (vec_source.SH().Coefs()).Cols(i*vec_length, (i+1)*vec_length);
|
|
1285
|
+
batch[i]->mpS->SH().RotateZ(batch[i]->phi,
|
|
1286
|
+
[source_i, source_mati] (size_t ii, Complex factor)
|
|
1287
|
+
{
|
|
1288
|
+
source_mati.Row(ii) = factor * source_i.Row(ii);
|
|
1289
|
+
});
|
|
1290
|
+
}
|
|
1291
|
+
|
|
1292
|
+
// ttobatch.Stop();
|
|
1293
|
+
|
|
1294
|
+
vec_source.SH().RotateY(theta);
|
|
1295
|
+
vec_source.ShiftZ(-len, vec_target);
|
|
1296
|
+
vec_target.SH().RotateY(-theta);
|
|
1297
|
+
|
|
1298
|
+
// Copy vectorized multipole into individual multipoles
|
|
1299
|
+
// tfrombatch.Start();
|
|
1300
|
+
for (int i = 0; i < batch.Size(); i++) {
|
|
1301
|
+
// auto source_i = VecVector2Matrix (tmp_target.SH().Coefs());
|
|
1302
|
+
auto source_mati = VecVector2Matrix (vec_target.SH().Coefs()).Cols(i*vec_length, (i+1)*vec_length);
|
|
1303
|
+
auto targeti = VecVector2Matrix(batch[i]->mpR->SH().Coefs());
|
|
1304
|
+
|
|
1305
|
+
tmp_target.SH().RotateZ(-batch[i]->phi,
|
|
1306
|
+
[source_mati, targeti] (size_t ii, Complex factor)
|
|
1307
|
+
{
|
|
1308
|
+
// source_i.Row(ii) = factor * source_mati.Row(ii);
|
|
1309
|
+
AtomicAdd (VectorView(targeti.Row(ii)), factor * source_mati.Row(ii));
|
|
1310
|
+
});
|
|
1311
|
+
// for (int j = 0; j < tmp_target.SH().Coefs().Size(); j++)
|
|
1312
|
+
// AtomicAdd(batch[i]->mpR->SH().Coefs()[j], tmp_target.SH().Coefs()[j]);
|
|
1313
|
+
}
|
|
1314
|
+
// tfrombatch.Stop();
|
|
1315
|
+
|
|
1316
|
+
}
|
|
1317
|
+
|
|
880
1318
|
|
|
881
1319
|
struct Node
|
|
882
1320
|
{
|
|
@@ -887,6 +1325,8 @@ namespace ngsbem
|
|
|
887
1325
|
MultiPole<MPRegular,elem_type> mp;
|
|
888
1326
|
Array<Vec<3>> targets;
|
|
889
1327
|
int total_targets;
|
|
1328
|
+
std::mutex node_mutex;
|
|
1329
|
+
atomic<bool> have_childs{false};
|
|
890
1330
|
|
|
891
1331
|
Array<const typename SingularMLMultiPole<elem_type>::Node*> singnodes;
|
|
892
1332
|
|
|
@@ -911,13 +1351,15 @@ namespace ngsbem
|
|
|
911
1351
|
cc(2) += (i&4) ? r/2 : -r/2;
|
|
912
1352
|
childs[i] = make_unique<Node> (cc, r/2, level+1, mp.Kappa());
|
|
913
1353
|
}
|
|
1354
|
+
have_childs = true;
|
|
914
1355
|
}
|
|
915
|
-
|
|
916
|
-
void AddSingularNode (const typename SingularMLMultiPole<elem_type>::Node & singnode, bool allow_refine
|
|
1356
|
+
|
|
1357
|
+
void AddSingularNode (const typename SingularMLMultiPole<elem_type>::Node & singnode, bool allow_refine,
|
|
1358
|
+
Array<RecordingRS> * recording)
|
|
917
1359
|
{
|
|
918
1360
|
if (mp.SH().Order() < 0) return;
|
|
919
1361
|
if (singnode.mp.SH().Order() < 0) return;
|
|
920
|
-
if (L2Norm(singnode.mp.SH().Coefs()) == 0) return;
|
|
1362
|
+
// if (L2Norm(singnode.mp.SH().Coefs()) == 0) return;
|
|
921
1363
|
if (level > 20)
|
|
922
1364
|
{
|
|
923
1365
|
singnodes.Append(&singnode);
|
|
@@ -936,12 +1378,15 @@ namespace ngsbem
|
|
|
936
1378
|
singnode.childs[0]->mp.Order() < singnode.mp.Order())
|
|
937
1379
|
{
|
|
938
1380
|
for (auto & child : singnode.childs)
|
|
939
|
-
AddSingularNode (*child, allow_refine);
|
|
1381
|
+
AddSingularNode (*child, allow_refine, recording);
|
|
940
1382
|
return;
|
|
941
1383
|
}
|
|
942
1384
|
|
|
943
1385
|
// static Timer t("mptool transform Helmholtz-criterion"); RegionTimer r(t);
|
|
944
|
-
|
|
1386
|
+
if (recording)
|
|
1387
|
+
*recording += RecordingRS(&singnode.mp, &mp, dist);
|
|
1388
|
+
else
|
|
1389
|
+
singnode.mp.TransformAdd(mp, dist);
|
|
945
1390
|
return;
|
|
946
1391
|
}
|
|
947
1392
|
|
|
@@ -960,21 +1405,21 @@ namespace ngsbem
|
|
|
960
1405
|
CreateChilds();
|
|
961
1406
|
|
|
962
1407
|
for (auto & ch : childs)
|
|
963
|
-
ch -> AddSingularNode (singnode, allow_refine);
|
|
1408
|
+
ch -> AddSingularNode (singnode, allow_refine, recording);
|
|
964
1409
|
}
|
|
965
1410
|
else
|
|
966
1411
|
{
|
|
967
|
-
if (total_targets < 1000)
|
|
1412
|
+
if (total_targets < 1000 || recording)
|
|
968
1413
|
{
|
|
969
1414
|
for (auto & ch : childs)
|
|
970
1415
|
if (ch)
|
|
971
|
-
ch -> AddSingularNode (singnode, allow_refine);
|
|
1416
|
+
ch -> AddSingularNode (singnode, allow_refine, recording);
|
|
972
1417
|
}
|
|
973
1418
|
else
|
|
974
1419
|
ParallelFor (8, [&] (int nr)
|
|
975
1420
|
{
|
|
976
1421
|
if (childs[nr])
|
|
977
|
-
childs[nr] -> AddSingularNode (singnode, allow_refine);
|
|
1422
|
+
childs[nr] -> AddSingularNode (singnode, allow_refine, recording);
|
|
978
1423
|
});
|
|
979
1424
|
|
|
980
1425
|
if (targets.Size())
|
|
@@ -984,7 +1429,7 @@ namespace ngsbem
|
|
|
984
1429
|
else
|
|
985
1430
|
{
|
|
986
1431
|
for (auto & childsing : singnode.childs)
|
|
987
|
-
AddSingularNode (*childsing, allow_refine);
|
|
1432
|
+
AddSingularNode (*childsing, allow_refine, recording);
|
|
988
1433
|
}
|
|
989
1434
|
}
|
|
990
1435
|
|
|
@@ -996,12 +1441,22 @@ namespace ngsbem
|
|
|
996
1441
|
|
|
997
1442
|
if (childs[0])
|
|
998
1443
|
{
|
|
999
|
-
|
|
1444
|
+
if (total_targets < 1000)
|
|
1000
1445
|
{
|
|
1001
|
-
|
|
1002
|
-
|
|
1003
|
-
|
|
1446
|
+
for (int nr = 0; nr < 8; nr++)
|
|
1447
|
+
{
|
|
1448
|
+
if (L2Norm(mp.SH().Coefs()) > 0)
|
|
1449
|
+
mp.TransformAdd (childs[nr]->mp, childs[nr]->center-center);
|
|
1450
|
+
childs[nr]->LocalizeExpansion(allow_refine);
|
|
1451
|
+
}
|
|
1004
1452
|
}
|
|
1453
|
+
else
|
|
1454
|
+
ParallelFor(8, [&] (int nr)
|
|
1455
|
+
{
|
|
1456
|
+
if (L2Norm(mp.SH().Coefs()) > 0)
|
|
1457
|
+
mp.TransformAdd (childs[nr]->mp, childs[nr]->center-center);
|
|
1458
|
+
childs[nr]->LocalizeExpansion(allow_refine);
|
|
1459
|
+
});
|
|
1005
1460
|
mp = MultiPole<MPRegular,elem_type>(-1, mp.Kappa(), 1.);
|
|
1006
1461
|
//mp.SH().Coefs()=0.0;
|
|
1007
1462
|
}
|
|
@@ -1009,18 +1464,8 @@ namespace ngsbem
|
|
|
1009
1464
|
|
|
1010
1465
|
elem_type Evaluate (Vec<3> p) const
|
|
1011
1466
|
{
|
|
1012
|
-
// *testout << "eval p = " << p << ", level = " << level << ", center = " << center << ", r = " << r << endl;
|
|
1013
1467
|
elem_type sum{0.0};
|
|
1014
|
-
|
|
1015
|
-
if (childs[0])
|
|
1016
|
-
{
|
|
1017
|
-
int childnum = 0;
|
|
1018
|
-
if (p(0) > center(0)) childnum += 1;
|
|
1019
|
-
if (p(1) > center(1)) childnum += 2;
|
|
1020
|
-
if (p(2) > center(2)) childnum += 4;
|
|
1021
|
-
sum = childs[childnum]->Evaluate(p);
|
|
1022
|
-
}
|
|
1023
|
-
*/
|
|
1468
|
+
|
|
1024
1469
|
int childnum = 0;
|
|
1025
1470
|
if (p(0) > center(0)) childnum += 1;
|
|
1026
1471
|
if (p(1) > center(1)) childnum += 2;
|
|
@@ -1030,8 +1475,6 @@ namespace ngsbem
|
|
|
1030
1475
|
else
|
|
1031
1476
|
sum = mp.Eval(p-center);
|
|
1032
1477
|
|
|
1033
|
-
|
|
1034
|
-
// static Timer t("mptool direct evaluate"); RegionTimer r(t);
|
|
1035
1478
|
for (auto sn : singnodes)
|
|
1036
1479
|
sum += sn->EvaluateMP(p);
|
|
1037
1480
|
|
|
@@ -1080,7 +1523,8 @@ namespace ngsbem
|
|
|
1080
1523
|
|
|
1081
1524
|
void AddTarget (Vec<3> x)
|
|
1082
1525
|
{
|
|
1083
|
-
if (childs[0])
|
|
1526
|
+
// if (childs[0])
|
|
1527
|
+
if (have_childs) // quick check without locking
|
|
1084
1528
|
{
|
|
1085
1529
|
// directly send to childs:
|
|
1086
1530
|
int childnum = 0;
|
|
@@ -1091,6 +1535,20 @@ namespace ngsbem
|
|
|
1091
1535
|
return;
|
|
1092
1536
|
}
|
|
1093
1537
|
|
|
1538
|
+
lock_guard<mutex> guard(node_mutex);
|
|
1539
|
+
|
|
1540
|
+
if (have_childs) // test again after locking
|
|
1541
|
+
{
|
|
1542
|
+
// directly send to childs:
|
|
1543
|
+
int childnum = 0;
|
|
1544
|
+
if (x(0) > center(0)) childnum += 1;
|
|
1545
|
+
if (x(1) > center(1)) childnum += 2;
|
|
1546
|
+
if (x(2) > center(2)) childnum += 4;
|
|
1547
|
+
childs[childnum] -> AddTarget(x);
|
|
1548
|
+
return;
|
|
1549
|
+
}
|
|
1550
|
+
|
|
1551
|
+
|
|
1094
1552
|
targets.Append( x );
|
|
1095
1553
|
|
|
1096
1554
|
// if (r*mp.Kappa() < 1e-8) return;
|
|
@@ -1158,8 +1616,8 @@ namespace ngsbem
|
|
|
1158
1616
|
nodes_on_level = 0;
|
|
1159
1617
|
nodes_on_level[0] = 1;
|
|
1160
1618
|
{
|
|
1161
|
-
static Timer t("mptool compute regular MLMP"); RegionTimer rg(t);
|
|
1162
|
-
root.AddSingularNode(singmp->root, true);
|
|
1619
|
+
static Timer t("mptool compute regular MLMP"); RegionTimer rg(t);
|
|
1620
|
+
root.AddSingularNode(singmp->root, true, nullptr);
|
|
1163
1621
|
// cout << "norm after S->R conversion: " << root.Norm() << endl;
|
|
1164
1622
|
}
|
|
1165
1623
|
|
|
@@ -1195,14 +1653,69 @@ namespace ngsbem
|
|
|
1195
1653
|
void CalcMP(shared_ptr<SingularMLMultiPole<elem_type>> asingmp)
|
|
1196
1654
|
{
|
|
1197
1655
|
static Timer t("mptool regular MLMP"); RegionTimer rg(t);
|
|
1656
|
+
static Timer trec("mptool regular MLMP - recording");
|
|
1657
|
+
static Timer tsort("mptool regular MLMP - sort");
|
|
1198
1658
|
|
|
1199
1659
|
singmp = asingmp;
|
|
1200
1660
|
|
|
1201
1661
|
root.CalcTotalTargets();
|
|
1202
1662
|
root.RemoveEmptyTrees();
|
|
1203
|
-
|
|
1204
|
-
root.AddSingularNode(singmp->root, false);
|
|
1205
1663
|
|
|
1664
|
+
|
|
1665
|
+
// root.AddSingularNode(singmp->root, false, nullptr);
|
|
1666
|
+
// /*
|
|
1667
|
+
Array<RecordingRS> recording;
|
|
1668
|
+
{
|
|
1669
|
+
RegionTimer rrec(trec);
|
|
1670
|
+
root.AddSingularNode(singmp->root, false, &recording);
|
|
1671
|
+
}
|
|
1672
|
+
|
|
1673
|
+
// cout << "recorded: " << recording.Size() << endl;
|
|
1674
|
+
{
|
|
1675
|
+
RegionTimer reg(tsort);
|
|
1676
|
+
QuickSort (recording, [] (auto & a, auto & b)
|
|
1677
|
+
{
|
|
1678
|
+
if (a.len < (1-1e-8) * b.len) return true;
|
|
1679
|
+
if (a.len > (1+1e-8) * b.len) return false;
|
|
1680
|
+
return a.theta < b.theta;
|
|
1681
|
+
});
|
|
1682
|
+
}
|
|
1683
|
+
|
|
1684
|
+
double current_len = -1e100;
|
|
1685
|
+
double current_theta = -1e100;
|
|
1686
|
+
Array<RecordingRS*> current_batch;
|
|
1687
|
+
Array<Array<RecordingRS*>> batch_group;
|
|
1688
|
+
Array<double> group_lengths;
|
|
1689
|
+
Array<double> group_thetas;
|
|
1690
|
+
for (auto & record : recording)
|
|
1691
|
+
{
|
|
1692
|
+
bool len_changed = fabs(record.len - current_len) > 1e-8;
|
|
1693
|
+
bool theta_changed = fabs(record.theta - current_theta) > 1e-8;
|
|
1694
|
+
if ((len_changed || theta_changed) && current_batch.Size() > 0) {
|
|
1695
|
+
// ProcessBatch(current_batch, current_len, current_theta);
|
|
1696
|
+
batch_group.Append(current_batch);
|
|
1697
|
+
group_lengths.Append(current_len);
|
|
1698
|
+
group_thetas.Append(current_theta);
|
|
1699
|
+
current_batch.SetSize(0);
|
|
1700
|
+
}
|
|
1701
|
+
|
|
1702
|
+
current_len = record.len;
|
|
1703
|
+
current_theta = record.theta;
|
|
1704
|
+
current_batch.Append(&record);
|
|
1705
|
+
}
|
|
1706
|
+
if (current_batch.Size() > 0) {
|
|
1707
|
+
// ProcessBatch(current_batch, current_len, current_theta);
|
|
1708
|
+
batch_group.Append(current_batch);
|
|
1709
|
+
group_lengths.Append(current_len);
|
|
1710
|
+
group_thetas.Append(current_theta);
|
|
1711
|
+
}
|
|
1712
|
+
|
|
1713
|
+
ParallelFor(batch_group.Size(), [&](int i) {
|
|
1714
|
+
ProcessBatchRS(batch_group[i], group_lengths[i], group_thetas[i]);
|
|
1715
|
+
}, TasksPerThread(4));
|
|
1716
|
+
// */
|
|
1717
|
+
|
|
1718
|
+
|
|
1206
1719
|
/*
|
|
1207
1720
|
int maxlevel = 0;
|
|
1208
1721
|
for (auto [i,num] : Enumerate(RegularMLMultiPole::nodes_on_level))
|
|
@@ -1212,7 +1725,7 @@ namespace ngsbem
|
|
|
1212
1725
|
cout << "reg " << i << ": " << RegularMLMultiPole::nodes_on_level[i] << endl;
|
|
1213
1726
|
*/
|
|
1214
1727
|
|
|
1215
|
-
static Timer tloc("mptool regular localize expansion"); RegionTimer rloc(tloc);
|
|
1728
|
+
static Timer tloc("mptool regular localize expansion"); RegionTimer rloc(tloc);
|
|
1216
1729
|
root.LocalizeExpansion(false);
|
|
1217
1730
|
}
|
|
1218
1731
|
|
|
@@ -1246,6 +1759,7 @@ namespace ngsbem
|
|
|
1246
1759
|
|
|
1247
1760
|
};
|
|
1248
1761
|
|
|
1762
|
+
|
|
1249
1763
|
template <typename elem_type>
|
|
1250
1764
|
inline ostream & operator<< (ostream & ost, const RegularMLMultiPole<elem_type> & mlmp)
|
|
1251
1765
|
{
|