ngsolve 6.2.2505.post17.dev0__cp311-cp311-win_amd64.whl → 6.2.2505.post70.dev0__cp311-cp311-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ngsolve might be problematic. Click here for more details.
- netgen/include/bilinearform.hpp +1 -1
- netgen/include/diffop_impl.hpp +3 -1
- netgen/include/gridfunction.hpp +1 -1
- netgen/include/mptools.hpp +553 -89
- netgen/include/ngblas.hpp +11 -0
- netgen/include/recursive_pol.hpp +63 -11
- netgen/include/sparsematrix_impl.hpp +25 -0
- netgen/include/vector.hpp +13 -1
- netgen/lib/libngsolve.lib +0 -0
- netgen/libngsolve.dll +0 -0
- ngsolve/cmake/NGSolveConfig.cmake +5 -5
- ngsolve/config/config.py +5 -5
- ngsolve/ngslib.pyd +0 -0
- {ngsolve-6.2.2505.post17.dev0.dist-info → ngsolve-6.2.2505.post70.dev0.dist-info}/METADATA +2 -2
- {ngsolve-6.2.2505.post17.dev0.dist-info → ngsolve-6.2.2505.post70.dev0.dist-info}/RECORD +47 -47
- {ngsolve-6.2.2505.post17.dev0.data → ngsolve-6.2.2505.post70.dev0.data}/data/Scripts/ngsolve.tcl +0 -0
- {ngsolve-6.2.2505.post17.dev0.data → ngsolve-6.2.2505.post70.dev0.data}/data/share/ngsolve/beam.geo +0 -0
- {ngsolve-6.2.2505.post17.dev0.data → ngsolve-6.2.2505.post70.dev0.data}/data/share/ngsolve/beam.vol +0 -0
- {ngsolve-6.2.2505.post17.dev0.data → ngsolve-6.2.2505.post70.dev0.data}/data/share/ngsolve/chip.in2d +0 -0
- {ngsolve-6.2.2505.post17.dev0.data → ngsolve-6.2.2505.post70.dev0.data}/data/share/ngsolve/chip.vol +0 -0
- {ngsolve-6.2.2505.post17.dev0.data → ngsolve-6.2.2505.post70.dev0.data}/data/share/ngsolve/coil.geo +0 -0
- {ngsolve-6.2.2505.post17.dev0.data → ngsolve-6.2.2505.post70.dev0.data}/data/share/ngsolve/coil.vol +0 -0
- {ngsolve-6.2.2505.post17.dev0.data → ngsolve-6.2.2505.post70.dev0.data}/data/share/ngsolve/coilshield.geo +0 -0
- {ngsolve-6.2.2505.post17.dev0.data → ngsolve-6.2.2505.post70.dev0.data}/data/share/ngsolve/coilshield.vol +0 -0
- {ngsolve-6.2.2505.post17.dev0.data → ngsolve-6.2.2505.post70.dev0.data}/data/share/ngsolve/cube.geo +0 -0
- {ngsolve-6.2.2505.post17.dev0.data → ngsolve-6.2.2505.post70.dev0.data}/data/share/ngsolve/cube.vol +0 -0
- {ngsolve-6.2.2505.post17.dev0.data → ngsolve-6.2.2505.post70.dev0.data}/data/share/ngsolve/d10_DGdoubleglazing.pde +0 -0
- {ngsolve-6.2.2505.post17.dev0.data → ngsolve-6.2.2505.post70.dev0.data}/data/share/ngsolve/d11_chip_nitsche.pde +0 -0
- {ngsolve-6.2.2505.post17.dev0.data → ngsolve-6.2.2505.post70.dev0.data}/data/share/ngsolve/d1_square.pde +0 -0
- {ngsolve-6.2.2505.post17.dev0.data → ngsolve-6.2.2505.post70.dev0.data}/data/share/ngsolve/d2_chip.pde +0 -0
- {ngsolve-6.2.2505.post17.dev0.data → ngsolve-6.2.2505.post70.dev0.data}/data/share/ngsolve/d3_helmholtz.pde +0 -0
- {ngsolve-6.2.2505.post17.dev0.data → ngsolve-6.2.2505.post70.dev0.data}/data/share/ngsolve/d4_cube.pde +0 -0
- {ngsolve-6.2.2505.post17.dev0.data → ngsolve-6.2.2505.post70.dev0.data}/data/share/ngsolve/d5_beam.pde +0 -0
- {ngsolve-6.2.2505.post17.dev0.data → ngsolve-6.2.2505.post70.dev0.data}/data/share/ngsolve/d6_shaft.pde +0 -0
- {ngsolve-6.2.2505.post17.dev0.data → ngsolve-6.2.2505.post70.dev0.data}/data/share/ngsolve/d7_coil.pde +0 -0
- {ngsolve-6.2.2505.post17.dev0.data → ngsolve-6.2.2505.post70.dev0.data}/data/share/ngsolve/d8_coilshield.pde +0 -0
- {ngsolve-6.2.2505.post17.dev0.data → ngsolve-6.2.2505.post70.dev0.data}/data/share/ngsolve/d9_hybridDG.pde +0 -0
- {ngsolve-6.2.2505.post17.dev0.data → ngsolve-6.2.2505.post70.dev0.data}/data/share/ngsolve/doubleglazing.in2d +0 -0
- {ngsolve-6.2.2505.post17.dev0.data → ngsolve-6.2.2505.post70.dev0.data}/data/share/ngsolve/doubleglazing.vol +0 -0
- {ngsolve-6.2.2505.post17.dev0.data → ngsolve-6.2.2505.post70.dev0.data}/data/share/ngsolve/piezo2d40round4.vol.gz +0 -0
- {ngsolve-6.2.2505.post17.dev0.data → ngsolve-6.2.2505.post70.dev0.data}/data/share/ngsolve/shaft.geo +0 -0
- {ngsolve-6.2.2505.post17.dev0.data → ngsolve-6.2.2505.post70.dev0.data}/data/share/ngsolve/shaft.vol +0 -0
- {ngsolve-6.2.2505.post17.dev0.data → ngsolve-6.2.2505.post70.dev0.data}/data/share/ngsolve/square.in2d +0 -0
- {ngsolve-6.2.2505.post17.dev0.data → ngsolve-6.2.2505.post70.dev0.data}/data/share/ngsolve/square.vol +0 -0
- {ngsolve-6.2.2505.post17.dev0.dist-info → ngsolve-6.2.2505.post70.dev0.dist-info}/LICENSE +0 -0
- {ngsolve-6.2.2505.post17.dev0.dist-info → ngsolve-6.2.2505.post70.dev0.dist-info}/WHEEL +0 -0
- {ngsolve-6.2.2505.post17.dev0.dist-info → ngsolve-6.2.2505.post70.dev0.dist-info}/top_level.txt +0 -0
netgen/include/mptools.hpp
CHANGED
|
@@ -20,6 +20,26 @@ namespace ngsbem
|
|
|
20
20
|
{
|
|
21
21
|
using namespace ngfem;
|
|
22
22
|
|
|
23
|
+
template<typename T>
|
|
24
|
+
constexpr int VecLength = 1; // Default: Complex has length 1
|
|
25
|
+
|
|
26
|
+
template<int N>
|
|
27
|
+
constexpr int VecLength<Vec<N, Complex>> = N; // Specialization: Vec<N,Complex> has length N
|
|
28
|
+
|
|
29
|
+
inline std::tuple<double, double, double> SphericalCoordinates(Vec<3> dist){
|
|
30
|
+
double len, theta, phi;
|
|
31
|
+
len = L2Norm(dist);
|
|
32
|
+
if (len < 1e-30)
|
|
33
|
+
theta = 0;
|
|
34
|
+
else
|
|
35
|
+
theta = acos (dist(2) / len);
|
|
36
|
+
if (sqr(dist(0))+sqr(dist(1)) < 1e-30)
|
|
37
|
+
phi = 0;
|
|
38
|
+
else
|
|
39
|
+
phi = atan2(dist(1), dist(0));
|
|
40
|
+
return {len, theta, phi};
|
|
41
|
+
}
|
|
42
|
+
|
|
23
43
|
|
|
24
44
|
template <typename entry_type = Complex>
|
|
25
45
|
class NGS_DLL_HEADER SphericalHarmonics
|
|
@@ -84,9 +104,69 @@ namespace ngsbem
|
|
|
84
104
|
|
|
85
105
|
void Calc (Vec<3> x, FlatVector<Complex> shapes);
|
|
86
106
|
|
|
87
|
-
|
|
107
|
+
|
|
108
|
+
void FlipZ ();
|
|
88
109
|
void RotateZ (double alpha);
|
|
89
|
-
|
|
110
|
+
|
|
111
|
+
template <typename FUNC>
|
|
112
|
+
void RotateZ (double alpha, FUNC func) const
|
|
113
|
+
{
|
|
114
|
+
if (order < 0) return;
|
|
115
|
+
|
|
116
|
+
Vector<Complex> exp_imalpha(order+1);
|
|
117
|
+
Complex exp_ialpha(cos(alpha), sin(alpha));
|
|
118
|
+
Complex prod = 1.0;
|
|
119
|
+
for (int i = 0; i <= order; i++)
|
|
120
|
+
{
|
|
121
|
+
exp_imalpha(i) = prod;
|
|
122
|
+
prod *= exp_ialpha;
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
int ii = 0;
|
|
126
|
+
for (int n = 0; n <= order; n++)
|
|
127
|
+
{
|
|
128
|
+
for (int m = -n; m < 0; m++, ii++)
|
|
129
|
+
func(ii, conj(exp_imalpha(-m)));
|
|
130
|
+
for (int m = 0; m <= n; m++, ii++)
|
|
131
|
+
func(ii, exp_imalpha(m));
|
|
132
|
+
};
|
|
133
|
+
};
|
|
134
|
+
|
|
135
|
+
template <typename FUNC>
|
|
136
|
+
void RotateZFlip (double alpha, bool flip, FUNC func) const
|
|
137
|
+
{
|
|
138
|
+
if (order < 0) return;
|
|
139
|
+
|
|
140
|
+
Vector<Complex> exp_imalpha(order+1);
|
|
141
|
+
Complex exp_ialpha(cos(alpha), sin(alpha));
|
|
142
|
+
Complex prod = 1.0;
|
|
143
|
+
for (int i = 0; i <= order; i++)
|
|
144
|
+
{
|
|
145
|
+
exp_imalpha(i) = prod;
|
|
146
|
+
prod *= exp_ialpha;
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
int ii = 0;
|
|
150
|
+
|
|
151
|
+
auto FlipFactor = [] (int n, int m, bool flip)->double
|
|
152
|
+
{
|
|
153
|
+
if (flip)
|
|
154
|
+
return ((n-m)%2) == 1 ? -1 : 1;
|
|
155
|
+
return 1.0;
|
|
156
|
+
};
|
|
157
|
+
|
|
158
|
+
for (int n = 0; n <= order; n++)
|
|
159
|
+
{
|
|
160
|
+
for (int m = -n; m < 0; m++, ii++)
|
|
161
|
+
func(ii, FlipFactor(n,m,flip)*conj(exp_imalpha(-m)));
|
|
162
|
+
for (int m = 0; m <= n; m++, ii++)
|
|
163
|
+
func(ii, FlipFactor(n,m,flip)*exp_imalpha(m));
|
|
164
|
+
};
|
|
165
|
+
};
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
void RotateY (double alpha, bool parallel = false);
|
|
90
170
|
|
|
91
171
|
|
|
92
172
|
static double CalcAmn (int m, int n)
|
|
@@ -119,11 +199,11 @@ namespace ngsbem
|
|
|
119
199
|
// https://fortran-lang.discourse.group/t/looking-for-spherical-bessel-and-hankel-functions-of-first-and-second-kind-and-arbitrary-order/2308/2
|
|
120
200
|
NGS_DLL_HEADER
|
|
121
201
|
void besseljs3d (int nterms, double z, double scale,
|
|
122
|
-
|
|
202
|
+
SliceVector<double> fjs, SliceVector<double> fjder = FlatVector<double>(0, nullptr));
|
|
123
203
|
|
|
124
204
|
NGS_DLL_HEADER
|
|
125
205
|
void besseljs3d (int nterms, Complex z, double scale,
|
|
126
|
-
|
|
206
|
+
SliceVector<Complex> fjs, SliceVector<Complex> fjder = FlatVector<Complex>(0, nullptr));
|
|
127
207
|
|
|
128
208
|
|
|
129
209
|
/*
|
|
@@ -142,14 +222,17 @@ namespace ngsbem
|
|
|
142
222
|
FlatVector<double> jp,
|
|
143
223
|
FlatVector<double> yp);
|
|
144
224
|
|
|
145
|
-
|
|
225
|
+
|
|
146
226
|
|
|
147
227
|
template <typename T>
|
|
148
228
|
void SphericalBessel (int n, double rho, double scale, T && values)
|
|
149
229
|
{
|
|
230
|
+
besseljs3d (n, rho, scale, values);
|
|
231
|
+
/*
|
|
150
232
|
Vector<double> j(n+1), jp(n+1);
|
|
151
233
|
besseljs3d (n, rho, scale, j, jp);
|
|
152
234
|
values = j;
|
|
235
|
+
*/
|
|
153
236
|
}
|
|
154
237
|
|
|
155
238
|
|
|
@@ -173,21 +256,6 @@ namespace ngsbem
|
|
|
173
256
|
return;
|
|
174
257
|
}
|
|
175
258
|
Vector j(n+1), y(n+1), jp(n+1), yp(n+1);
|
|
176
|
-
// SBESJY (rho, n, j, y, jp, yp);
|
|
177
|
-
|
|
178
|
-
/*
|
|
179
|
-
values = j + Complex(0,1) * y;
|
|
180
|
-
if (scale != 1.0)
|
|
181
|
-
{
|
|
182
|
-
double prod = 1.0;
|
|
183
|
-
for (int i = 0; i <= n; i++)
|
|
184
|
-
{
|
|
185
|
-
values(i) *= prod;
|
|
186
|
-
prod *= scale;
|
|
187
|
-
}
|
|
188
|
-
}
|
|
189
|
-
*/
|
|
190
|
-
|
|
191
259
|
|
|
192
260
|
// the bessel-evaluation with scale
|
|
193
261
|
besseljs3d (n, rho, 1/scale, j, jp);
|
|
@@ -358,18 +426,7 @@ namespace ngsbem
|
|
|
358
426
|
// static Timer t("mptool Transform "+ToString(typeid(RADIAL).name())+ToString(typeid(TARGET).name()));
|
|
359
427
|
// RegionTimer reg(t);
|
|
360
428
|
|
|
361
|
-
|
|
362
|
-
double theta, phi;
|
|
363
|
-
|
|
364
|
-
if (len < 1e-30)
|
|
365
|
-
theta = 0;
|
|
366
|
-
else
|
|
367
|
-
theta = acos (dist(2) / len);
|
|
368
|
-
|
|
369
|
-
if (sqr(dist(0))+sqr(dist(1)) < 1e-30)
|
|
370
|
-
phi = 0;
|
|
371
|
-
else
|
|
372
|
-
phi = atan2(dist(1), dist(0));
|
|
429
|
+
auto [len, theta, phi] = SphericalCoordinates(dist);
|
|
373
430
|
|
|
374
431
|
|
|
375
432
|
// MultiPole<RADIAL,entry_type> tmp{*this};
|
|
@@ -386,14 +443,18 @@ namespace ngsbem
|
|
|
386
443
|
}
|
|
387
444
|
|
|
388
445
|
template <typename TARGET>
|
|
389
|
-
void TransformAdd (MultiPole<TARGET,entry_type> & target, Vec<3> dist) const
|
|
446
|
+
void TransformAdd (MultiPole<TARGET,entry_type> & target, Vec<3> dist, bool atomic = false) const
|
|
390
447
|
{
|
|
391
448
|
if (SH().Order() < 0) return;
|
|
392
449
|
if (target.SH().Order() < 0) return;
|
|
393
450
|
|
|
394
451
|
MultiPole<TARGET,entry_type> tmp{target};
|
|
395
452
|
Transform(tmp, dist);
|
|
396
|
-
|
|
453
|
+
if (!atomic)
|
|
454
|
+
target.SH().Coefs() += tmp.SH().Coefs();
|
|
455
|
+
else
|
|
456
|
+
for (int j = 0; j < target.SH().Coefs().Size(); j++)
|
|
457
|
+
AtomicAdd(target.SH().Coefs()[j], tmp.SH().Coefs()[j]);
|
|
397
458
|
}
|
|
398
459
|
|
|
399
460
|
template <typename TARGET>
|
|
@@ -412,11 +473,123 @@ namespace ngsbem
|
|
|
412
473
|
static constexpr int maxdirect = 100;
|
|
413
474
|
|
|
414
475
|
|
|
476
|
+
template <typename SCAL, auto S>
|
|
477
|
+
inline auto VecVector2Matrix (FlatVector<Vec<S,SCAL>> vec)
|
|
478
|
+
{
|
|
479
|
+
return FlatMatrixFixWidth<S,SCAL> (vec.Size(), vec.Data()->Data());
|
|
480
|
+
}
|
|
481
|
+
|
|
482
|
+
inline auto VecVector2Matrix (FlatVector<Complex> vec)
|
|
483
|
+
{
|
|
484
|
+
return FlatMatrixFixWidth<1,Complex> (vec.Size(), vec.Data());
|
|
485
|
+
}
|
|
486
|
+
|
|
487
|
+
|
|
415
488
|
template <typename entry_type=Complex>
|
|
416
489
|
class SingularMLMultiPole
|
|
417
490
|
{
|
|
418
491
|
static Array<size_t> nodes_on_level;
|
|
419
492
|
|
|
493
|
+
struct RecordingSS
|
|
494
|
+
{
|
|
495
|
+
const MultiPole<MPSingular,entry_type> * mp_source;
|
|
496
|
+
MultiPole<MPSingular,entry_type> * mp_target;
|
|
497
|
+
Vec<3> dist;
|
|
498
|
+
double len, theta, phi;
|
|
499
|
+
bool flipz;
|
|
500
|
+
public:
|
|
501
|
+
RecordingSS() = default;
|
|
502
|
+
RecordingSS (const MultiPole<MPSingular,entry_type> * amp_source,
|
|
503
|
+
MultiPole<MPSingular,entry_type> * amp_target,
|
|
504
|
+
Vec<3> adist)
|
|
505
|
+
: mp_source(amp_source), mp_target(amp_target), dist(adist)
|
|
506
|
+
{
|
|
507
|
+
std::tie(len, theta, phi) = SphericalCoordinates(adist);
|
|
508
|
+
// flipz = false;
|
|
509
|
+
flipz = theta > M_PI/2;
|
|
510
|
+
if (flipz) theta = M_PI-theta;
|
|
511
|
+
}
|
|
512
|
+
};
|
|
513
|
+
|
|
514
|
+
|
|
515
|
+
static void ProcessBatch(FlatArray<RecordingSS*> batch, double len, double theta) {
|
|
516
|
+
constexpr int vec_length = VecLength<entry_type>;
|
|
517
|
+
int batch_size = batch.Size();
|
|
518
|
+
int N = batch_size * vec_length;
|
|
519
|
+
// *testout << "Processing batch of size " << batch.Size() << ", with N = " << N << ", vec_length = " << vec_length << ", Type: " << typeid(entry_type).name() << ", len = " << len << ", theta = " << theta << endl;
|
|
520
|
+
|
|
521
|
+
if (N <= 1 || batch_size <= 1) {
|
|
522
|
+
for (auto* rec : batch) {
|
|
523
|
+
rec->mp_source->TransformAdd(*rec->mp_target, rec->dist, true);
|
|
524
|
+
}
|
|
525
|
+
}
|
|
526
|
+
else if (N <= 3) {
|
|
527
|
+
ProcessVectorizedBatch<3, vec_length>(batch, len, theta);
|
|
528
|
+
}
|
|
529
|
+
else if (N <= 4) {
|
|
530
|
+
ProcessVectorizedBatch<4, vec_length>(batch, len, theta);
|
|
531
|
+
}
|
|
532
|
+
else if (N <= 6) {
|
|
533
|
+
ProcessVectorizedBatch<6, vec_length>(batch, len, theta);
|
|
534
|
+
}
|
|
535
|
+
else if (N <= 12) {
|
|
536
|
+
ProcessVectorizedBatch<12, vec_length>(batch, len, theta);
|
|
537
|
+
}
|
|
538
|
+
else if (N <= 24) {
|
|
539
|
+
ProcessVectorizedBatch<24, vec_length>(batch, len, theta);
|
|
540
|
+
}
|
|
541
|
+
else if (N <= 48) {
|
|
542
|
+
ProcessVectorizedBatch<48, vec_length>(batch, len, theta);
|
|
543
|
+
}
|
|
544
|
+
else if (N <= 96) {
|
|
545
|
+
ProcessVectorizedBatch<96, vec_length>(batch, len, theta);
|
|
546
|
+
}
|
|
547
|
+
else if (N <= 192) {
|
|
548
|
+
ProcessVectorizedBatch<192, vec_length>(batch, len, theta);
|
|
549
|
+
}
|
|
550
|
+
else {
|
|
551
|
+
// Split large batches
|
|
552
|
+
ProcessBatch(batch.Range(0, 192 / vec_length), len, theta);
|
|
553
|
+
ProcessBatch(batch.Range(192 / vec_length, batch_size), len, theta);
|
|
554
|
+
}
|
|
555
|
+
}
|
|
556
|
+
|
|
557
|
+
template<int N, int vec_length>
|
|
558
|
+
static void ProcessVectorizedBatch(FlatArray<RecordingSS*> batch, double len, double theta) {
|
|
559
|
+
|
|
560
|
+
// *testout << "Processing vectorized S->S batch of size " << batch.Size() << ", with N = " << N << ", vec_length = " << vec_length << ", len = " << len << ", theta = " << theta << endl;
|
|
561
|
+
MultiPole<MPSingular, Vec<N,Complex>> vec_source(batch[0]->mp_source->Order(), batch[0]->mp_source->Kappa(), batch[0]->mp_source->RTyp());
|
|
562
|
+
MultiPole<MPSingular, Vec<N,Complex>> vec_target(batch[0]->mp_target->Order(), batch[0]->mp_target->Kappa(), batch[0]->mp_target->RTyp());
|
|
563
|
+
|
|
564
|
+
// Copy multipoles into vectorized multipole
|
|
565
|
+
for (int i = 0; i < batch.Size(); i++)
|
|
566
|
+
{
|
|
567
|
+
auto source_i = VecVector2Matrix (batch[i]->mp_source->SH().Coefs());
|
|
568
|
+
auto source_mati = VecVector2Matrix (vec_source.SH().Coefs()).Cols(i*vec_length, (i+1)*vec_length);
|
|
569
|
+
batch[i]->mp_source->SH().RotateZFlip(batch[i]->phi, batch[i]->flipz,
|
|
570
|
+
[source_i, source_mati] (size_t ii, Complex factor)
|
|
571
|
+
{
|
|
572
|
+
source_mati.Row(ii) = factor * source_i.Row(ii);
|
|
573
|
+
});
|
|
574
|
+
}
|
|
575
|
+
|
|
576
|
+
vec_source.SH().RotateY(theta, vec_source.SH().Order() >= 100);
|
|
577
|
+
vec_source.ShiftZ(-len, vec_target);
|
|
578
|
+
vec_target.SH().RotateY(-theta, vec_target.SH().Order() >= 100);
|
|
579
|
+
|
|
580
|
+
// Copy vectorized multipole into individual multipoles
|
|
581
|
+
for (int i = 0; i < batch.Size(); i++)
|
|
582
|
+
{
|
|
583
|
+
auto source_mati = VecVector2Matrix (vec_target.SH().Coefs()).Cols(i*vec_length, (i+1)*vec_length);
|
|
584
|
+
auto target_mati = VecVector2Matrix (batch[i]->mp_target->SH().Coefs());
|
|
585
|
+
batch[i]->mp_target->SH().RotateZFlip(-batch[i]->phi, batch[i]->flipz,
|
|
586
|
+
[source_mati, target_mati] (size_t ii, Complex factor)
|
|
587
|
+
{
|
|
588
|
+
AtomicAdd (target_mati.Row(ii), factor * source_mati.Row(ii));
|
|
589
|
+
});
|
|
590
|
+
}
|
|
591
|
+
}
|
|
592
|
+
|
|
420
593
|
struct Node
|
|
421
594
|
{
|
|
422
595
|
Vec<3> center;
|
|
@@ -429,6 +602,8 @@ namespace ngsbem
|
|
|
429
602
|
Array<tuple<Vec<3>, Vec<3>, entry_type>> dipoles;
|
|
430
603
|
Array<tuple<Vec<3>, Vec<3>, Complex,int>> currents;
|
|
431
604
|
int total_sources;
|
|
605
|
+
std::mutex node_mutex;
|
|
606
|
+
atomic<bool> have_childs{false};
|
|
432
607
|
|
|
433
608
|
Node (Vec<3> acenter, double ar, int alevel, double akappa)
|
|
434
609
|
: center(acenter), r(ar), level(alevel), mp(MPOrder(ar*akappa), akappa, ar) // min(1.0, ar*akappa))
|
|
@@ -449,12 +624,13 @@ namespace ngsbem
|
|
|
449
624
|
cc(2) += (i&4) ? r/2 : -r/2;
|
|
450
625
|
childs[i] = make_unique<Node> (cc, r/2, level+1, mp.Kappa());
|
|
451
626
|
}
|
|
627
|
+
have_childs = true;
|
|
452
628
|
}
|
|
453
629
|
|
|
454
630
|
|
|
455
631
|
void AddCharge (Vec<3> x, entry_type c)
|
|
456
632
|
{
|
|
457
|
-
if (
|
|
633
|
+
if (have_childs) // quick check without locking
|
|
458
634
|
{
|
|
459
635
|
// directly send to childs:
|
|
460
636
|
int childnum = 0;
|
|
@@ -465,6 +641,21 @@ namespace ngsbem
|
|
|
465
641
|
return;
|
|
466
642
|
}
|
|
467
643
|
|
|
644
|
+
lock_guard<mutex> guard(node_mutex);
|
|
645
|
+
|
|
646
|
+
if (have_childs) // test again after locking
|
|
647
|
+
{
|
|
648
|
+
// directly send to childs:
|
|
649
|
+
int childnum = 0;
|
|
650
|
+
if (x(0) > center(0)) childnum += 1;
|
|
651
|
+
if (x(1) > center(1)) childnum += 2;
|
|
652
|
+
if (x(2) > center(2)) childnum += 4;
|
|
653
|
+
childs[childnum] -> AddCharge(x, c);
|
|
654
|
+
return;
|
|
655
|
+
}
|
|
656
|
+
|
|
657
|
+
|
|
658
|
+
|
|
468
659
|
charges.Append( tuple{x,c} );
|
|
469
660
|
|
|
470
661
|
// if (r*mp.Kappa() < 1e-8) return;
|
|
@@ -489,7 +680,21 @@ namespace ngsbem
|
|
|
489
680
|
|
|
490
681
|
void AddDipole (Vec<3> x, Vec<3> d, entry_type c)
|
|
491
682
|
{
|
|
492
|
-
if (
|
|
683
|
+
if (have_childs)
|
|
684
|
+
{
|
|
685
|
+
// directly send to childs:
|
|
686
|
+
|
|
687
|
+
int childnum = 0;
|
|
688
|
+
if (x(0) > center(0)) childnum += 1;
|
|
689
|
+
if (x(1) > center(1)) childnum += 2;
|
|
690
|
+
if (x(2) > center(2)) childnum += 4;
|
|
691
|
+
childs[childnum] -> AddDipole(x, d, c);
|
|
692
|
+
return;
|
|
693
|
+
}
|
|
694
|
+
|
|
695
|
+
lock_guard<mutex> guard(node_mutex);
|
|
696
|
+
|
|
697
|
+
if (have_childs)
|
|
493
698
|
{
|
|
494
699
|
// directly send to childs:
|
|
495
700
|
|
|
@@ -501,6 +706,9 @@ namespace ngsbem
|
|
|
501
706
|
return;
|
|
502
707
|
}
|
|
503
708
|
|
|
709
|
+
|
|
710
|
+
|
|
711
|
+
|
|
504
712
|
dipoles.Append (tuple{x,d,c});
|
|
505
713
|
|
|
506
714
|
if (dipoles.Size() < maxdirect || r < 1e-8)
|
|
@@ -520,6 +728,7 @@ namespace ngsbem
|
|
|
520
728
|
currents.SetSize0();
|
|
521
729
|
}
|
|
522
730
|
|
|
731
|
+
// not parallel yet
|
|
523
732
|
void AddCurrent (Vec<3> sp, Vec<3> ep, Complex j, int num)
|
|
524
733
|
{
|
|
525
734
|
if (childs[0])
|
|
@@ -549,7 +758,7 @@ namespace ngsbem
|
|
|
549
758
|
}
|
|
550
759
|
return;
|
|
551
760
|
}
|
|
552
|
-
|
|
761
|
+
|
|
553
762
|
currents.Append (tuple{sp,ep,j,num});
|
|
554
763
|
|
|
555
764
|
// if (currents.Size() < maxdirect || r < 1e-8)
|
|
@@ -664,23 +873,27 @@ namespace ngsbem
|
|
|
664
873
|
}
|
|
665
874
|
}
|
|
666
875
|
|
|
667
|
-
void CalcMP()
|
|
876
|
+
void CalcMP(Array<RecordingSS> * recording, Array<Node*> * nodes_to_process)
|
|
668
877
|
{
|
|
669
|
-
mp.SH().Coefs() = 0.0;
|
|
878
|
+
// mp.SH().Coefs() = 0.0;
|
|
670
879
|
if (childs[0])
|
|
671
880
|
{
|
|
672
|
-
if (total_sources < 1000)
|
|
881
|
+
if (total_sources < 1000 || recording)
|
|
673
882
|
for (auto & child : childs)
|
|
674
|
-
child->CalcMP();
|
|
883
|
+
child->CalcMP(recording, nodes_to_process);
|
|
675
884
|
else
|
|
676
885
|
ParallelFor (8, [&] (int nr)
|
|
677
886
|
{
|
|
678
|
-
childs[nr] -> CalcMP();
|
|
887
|
+
childs[nr] -> CalcMP(recording, nodes_to_process);
|
|
679
888
|
});
|
|
680
889
|
|
|
681
890
|
|
|
682
|
-
for (auto & child : childs)
|
|
683
|
-
child->mp.
|
|
891
|
+
for (auto & child : childs){
|
|
892
|
+
if (recording && child->mp.SH().Coefs().Size() > 0)
|
|
893
|
+
*recording += RecordingSS(&child->mp, &mp, center-child->center);
|
|
894
|
+
else
|
|
895
|
+
child->mp.TransformAdd(mp, center-child->center);
|
|
896
|
+
}
|
|
684
897
|
}
|
|
685
898
|
else
|
|
686
899
|
{
|
|
@@ -690,14 +903,18 @@ namespace ngsbem
|
|
|
690
903
|
return;
|
|
691
904
|
}
|
|
692
905
|
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
906
|
+
if (nodes_to_process)
|
|
907
|
+
*nodes_to_process += this;
|
|
908
|
+
else {
|
|
909
|
+
for (auto [x,c] : charges)
|
|
910
|
+
mp.AddCharge (x-center,c);
|
|
911
|
+
|
|
912
|
+
for (auto [x,d,c] : dipoles)
|
|
913
|
+
mp.AddDipole (x-center, d, c);
|
|
698
914
|
|
|
699
|
-
|
|
700
|
-
|
|
915
|
+
for (auto [sp,ep,j,num] : currents)
|
|
916
|
+
mp.AddCurrent (sp-center, ep-center, j, num);
|
|
917
|
+
}
|
|
701
918
|
}
|
|
702
919
|
}
|
|
703
920
|
|
|
@@ -836,6 +1053,10 @@ namespace ngsbem
|
|
|
836
1053
|
void CalcMP()
|
|
837
1054
|
{
|
|
838
1055
|
static Timer t("mptool compute singular MLMP"); RegionTimer rg(t);
|
|
1056
|
+
static Timer ts2mp("mptool compute singular MLMP - source2mp");
|
|
1057
|
+
static Timer tS2S("mptool compute singular MLMP - S->S");
|
|
1058
|
+
static Timer trec("mptool comput singular recording");
|
|
1059
|
+
static Timer tsort("mptool comput singular sort");
|
|
839
1060
|
|
|
840
1061
|
/*
|
|
841
1062
|
int maxlevel = 0;
|
|
@@ -847,7 +1068,87 @@ namespace ngsbem
|
|
|
847
1068
|
*/
|
|
848
1069
|
|
|
849
1070
|
root.CalcTotalSources();
|
|
850
|
-
|
|
1071
|
+
|
|
1072
|
+
if (false)
|
|
1073
|
+
// direct evaluation of S->S
|
|
1074
|
+
root.CalcMP(nullptr, nullptr);
|
|
1075
|
+
else
|
|
1076
|
+
{
|
|
1077
|
+
|
|
1078
|
+
Array<RecordingSS> recording;
|
|
1079
|
+
Array<Node*> nodes_to_process;
|
|
1080
|
+
|
|
1081
|
+
{
|
|
1082
|
+
RegionTimer reg(trec);
|
|
1083
|
+
root.CalcMP(&recording, &nodes_to_process);
|
|
1084
|
+
}
|
|
1085
|
+
|
|
1086
|
+
{
|
|
1087
|
+
RegionTimer rs2mp(ts2mp);
|
|
1088
|
+
ParallelFor(nodes_to_process.Size(), [&](int i){
|
|
1089
|
+
auto node = nodes_to_process[i];
|
|
1090
|
+
for (auto [x,c]: node->charges)
|
|
1091
|
+
node->mp.AddCharge(x-node->center, c);
|
|
1092
|
+
for (auto [x,d,c]: node->dipoles)
|
|
1093
|
+
node->mp.AddDipole(x-node->center, d, c);
|
|
1094
|
+
for (auto [sp,ep,j,num]: node->currents)
|
|
1095
|
+
node->mp.AddCurrent(sp-node->center, ep-node->center, j, num);
|
|
1096
|
+
}, TasksPerThread(4));
|
|
1097
|
+
}
|
|
1098
|
+
|
|
1099
|
+
{
|
|
1100
|
+
RegionTimer reg(tsort);
|
|
1101
|
+
QuickSort (recording, [] (auto & a, auto & b)
|
|
1102
|
+
{
|
|
1103
|
+
if (a.len < (1-1e-8) * b.len) return true;
|
|
1104
|
+
if (a.len > (1+1e-8) * b.len) return false;
|
|
1105
|
+
return a.theta < b.theta;
|
|
1106
|
+
});
|
|
1107
|
+
}
|
|
1108
|
+
|
|
1109
|
+
double current_len = -1e100;
|
|
1110
|
+
double current_theta = -1e100;
|
|
1111
|
+
Array<RecordingSS*> current_batch;
|
|
1112
|
+
Array<Array<RecordingSS*>> batch_group;
|
|
1113
|
+
Array<double> group_lengths;
|
|
1114
|
+
Array<double> group_thetas;
|
|
1115
|
+
for (auto & record : recording)
|
|
1116
|
+
{
|
|
1117
|
+
bool len_changed = fabs(record.len - current_len) > 1e-8;
|
|
1118
|
+
bool theta_changed = fabs(record.theta - current_theta) > 1e-8;
|
|
1119
|
+
if ((len_changed || theta_changed) && current_batch.Size() > 0) {
|
|
1120
|
+
batch_group.Append(current_batch);
|
|
1121
|
+
group_lengths.Append(current_len);
|
|
1122
|
+
group_thetas.Append(current_theta);
|
|
1123
|
+
current_batch.SetSize(0);
|
|
1124
|
+
}
|
|
1125
|
+
|
|
1126
|
+
current_len = record.len;
|
|
1127
|
+
current_theta = record.theta;
|
|
1128
|
+
current_batch.Append(&record);
|
|
1129
|
+
}
|
|
1130
|
+
if (current_batch.Size() > 0) {
|
|
1131
|
+
batch_group.Append(current_batch);
|
|
1132
|
+
group_lengths.Append(current_len);
|
|
1133
|
+
group_thetas.Append(current_theta);
|
|
1134
|
+
}
|
|
1135
|
+
|
|
1136
|
+
{
|
|
1137
|
+
RegionTimer rS2S(tS2S);
|
|
1138
|
+
// ParallelFor(batch_group.Size(), [&](int i) {
|
|
1139
|
+
for (int i = 0; i < batch_group.Size(); i++){
|
|
1140
|
+
// *testout << "Processing batch " << i << " of size " << batch_group[i].Size() << ", with len = " << group_lengths[i] << ", theta = " << group_thetas[i] << endl;
|
|
1141
|
+
int chunk_size = 24;
|
|
1142
|
+
if (batch_group[i].Size() < chunk_size)
|
|
1143
|
+
ProcessBatch(batch_group[i], group_lengths[i], group_thetas[i]);
|
|
1144
|
+
else
|
|
1145
|
+
ParallelForRange(IntRange(batch_group[i].Size()), [&](IntRange range) {
|
|
1146
|
+
auto sub_batch = batch_group[i].Range(range.First(), range.Next());
|
|
1147
|
+
ProcessBatch(sub_batch, group_lengths[i], group_thetas[i]);
|
|
1148
|
+
}, TasksPerThread(4));
|
|
1149
|
+
}
|
|
1150
|
+
}
|
|
1151
|
+
}
|
|
851
1152
|
|
|
852
1153
|
havemp = true;
|
|
853
1154
|
}
|
|
@@ -892,19 +1193,128 @@ namespace ngsbem
|
|
|
892
1193
|
Vec<3> adist)
|
|
893
1194
|
: mpS(ampS), mpR(ampR), dist(adist)
|
|
894
1195
|
{
|
|
895
|
-
len =
|
|
896
|
-
if (len < 1e-30)
|
|
897
|
-
theta = 0;
|
|
898
|
-
else
|
|
899
|
-
theta = acos (dist(2) / len);
|
|
900
|
-
|
|
901
|
-
if (sqr(dist(0))+sqr(dist(1)) < 1e-30)
|
|
902
|
-
phi = 0;
|
|
903
|
-
else
|
|
904
|
-
phi = atan2(dist(1), dist(0));
|
|
1196
|
+
std::tie(len, theta, phi) = SphericalCoordinates(dist);
|
|
905
1197
|
}
|
|
906
1198
|
};
|
|
907
1199
|
|
|
1200
|
+
static void ProcessBatchRS(FlatArray<RecordingRS*> batch, double len, double theta) {
|
|
1201
|
+
// static Timer t("ProcessBatchRS"); RegionTimer reg(t, batch.Size());
|
|
1202
|
+
constexpr int vec_length = VecLength<elem_type>;
|
|
1203
|
+
int batch_size = batch.Size();
|
|
1204
|
+
int N = batch_size * vec_length;
|
|
1205
|
+
// *testout << "Processing batch of size " << batch.Size() << ", with N = " << N << ", vec_length = " << vec_length << ", Type: " << typeid(elem_type).name() << ", len = " << len << ", theta = " << theta << endl;
|
|
1206
|
+
|
|
1207
|
+
if (N <= 1 || batch_size <= 1) {
|
|
1208
|
+
for (auto* rec : batch) {
|
|
1209
|
+
rec->mpS->TransformAdd(*rec->mpR, rec->dist);
|
|
1210
|
+
}
|
|
1211
|
+
}
|
|
1212
|
+
else if (N <= 3) {
|
|
1213
|
+
ProcessVectorizedBatch<3, vec_length>(batch, len, theta);
|
|
1214
|
+
}
|
|
1215
|
+
else if (N <= 4) {
|
|
1216
|
+
ProcessVectorizedBatch<4, vec_length>(batch, len, theta);
|
|
1217
|
+
}
|
|
1218
|
+
else if (N <= 6) {
|
|
1219
|
+
ProcessVectorizedBatch<6, vec_length>(batch, len, theta);
|
|
1220
|
+
}
|
|
1221
|
+
else if (N <= 12) {
|
|
1222
|
+
ProcessVectorizedBatch<12, vec_length>(batch, len, theta);
|
|
1223
|
+
}
|
|
1224
|
+
else if (N <= 24) {
|
|
1225
|
+
ProcessVectorizedBatch<24, vec_length>(batch, len, theta);
|
|
1226
|
+
}
|
|
1227
|
+
else if (N <= 48) {
|
|
1228
|
+
ProcessVectorizedBatch<48, vec_length>(batch, len, theta);
|
|
1229
|
+
}
|
|
1230
|
+
else if (N <= 96) {
|
|
1231
|
+
ProcessVectorizedBatch<96, vec_length>(batch, len, theta);
|
|
1232
|
+
}
|
|
1233
|
+
else if (N <= 192) {
|
|
1234
|
+
ProcessVectorizedBatch<192, vec_length>(batch, len, theta);
|
|
1235
|
+
}
|
|
1236
|
+
else {
|
|
1237
|
+
// Split large batches
|
|
1238
|
+
/*
|
|
1239
|
+
ProcessBatch(batch.Range(0, 192 / vec_length), len, theta);
|
|
1240
|
+
ProcessBatch(batch.Range(192 / vec_length, batch_size), len, theta);
|
|
1241
|
+
*/
|
|
1242
|
+
|
|
1243
|
+
/*
|
|
1244
|
+
ParallelFor (2, [&] (int i)
|
|
1245
|
+
{
|
|
1246
|
+
if (i == 0)
|
|
1247
|
+
ProcessBatchRS(batch.Range(0, 192 / vec_length), len, theta);
|
|
1248
|
+
else
|
|
1249
|
+
ProcessBatchRS(batch.Range(192 / vec_length, batch_size), len, theta);
|
|
1250
|
+
}, 2);
|
|
1251
|
+
*/
|
|
1252
|
+
|
|
1253
|
+
|
|
1254
|
+
size_t chunksize = 192/vec_length;
|
|
1255
|
+
size_t num = (batch.Size()+chunksize-1) / chunksize;
|
|
1256
|
+
ParallelFor (num, [&](int i)
|
|
1257
|
+
{
|
|
1258
|
+
ProcessBatchRS(batch.Range(i*chunksize, min((i+1)*chunksize, batch.Size())), len, theta);
|
|
1259
|
+
}, num);
|
|
1260
|
+
|
|
1261
|
+
}
|
|
1262
|
+
}
|
|
1263
|
+
|
|
1264
|
+
|
|
1265
|
+
template<int N, int vec_length>
|
|
1266
|
+
static void ProcessVectorizedBatch(FlatArray<RecordingRS*> batch, double len, double theta) {
|
|
1267
|
+
|
|
1268
|
+
// static Timer t("ProcessVectorizedBatch, N = "+ToString(N) + ", vec_len = " + ToString(vec_length));
|
|
1269
|
+
// RegionTimer reg(t, batch[0]->mpS->SH().Order());
|
|
1270
|
+
// static Timer ttobatch("mptools - copy to batch 2");
|
|
1271
|
+
// static Timer tfrombatch("mptools - copy from batch 2");
|
|
1272
|
+
|
|
1273
|
+
// *testout << "Processing vectorized batch of size " << batch.Size() << ", with N = " << N << ", vec_length = " << vec_length << ", len = " << len << ", theta = " << theta << endl;
|
|
1274
|
+
MultiPole<MPSingular, Vec<N,Complex>> vec_source(batch[0]->mpS->Order(), batch[0]->mpS->Kappa(), batch[0]->mpS->RTyp());
|
|
1275
|
+
// MultiPole<MPSingular, elem_type> tmp_source{*batch[0]->mpS};
|
|
1276
|
+
MultiPole<MPRegular, elem_type> tmp_target{*batch[0]->mpR};
|
|
1277
|
+
MultiPole<MPRegular, Vec<N,Complex>> vec_target(batch[0]->mpR->Order(), batch[0]->mpR->Kappa(), batch[0]->mpR->RTyp());
|
|
1278
|
+
|
|
1279
|
+
// Copy multipoles into vectorized multipole
|
|
1280
|
+
// ttobatch.Start();
|
|
1281
|
+
for (int i = 0; i < batch.Size(); i++)
|
|
1282
|
+
{
|
|
1283
|
+
auto source_i = VecVector2Matrix (batch[i]->mpS->SH().Coefs());
|
|
1284
|
+
auto source_mati = VecVector2Matrix (vec_source.SH().Coefs()).Cols(i*vec_length, (i+1)*vec_length);
|
|
1285
|
+
batch[i]->mpS->SH().RotateZ(batch[i]->phi,
|
|
1286
|
+
[source_i, source_mati] (size_t ii, Complex factor)
|
|
1287
|
+
{
|
|
1288
|
+
source_mati.Row(ii) = factor * source_i.Row(ii);
|
|
1289
|
+
});
|
|
1290
|
+
}
|
|
1291
|
+
|
|
1292
|
+
// ttobatch.Stop();
|
|
1293
|
+
|
|
1294
|
+
vec_source.SH().RotateY(theta);
|
|
1295
|
+
vec_source.ShiftZ(-len, vec_target);
|
|
1296
|
+
vec_target.SH().RotateY(-theta);
|
|
1297
|
+
|
|
1298
|
+
// Copy vectorized multipole into individual multipoles
|
|
1299
|
+
// tfrombatch.Start();
|
|
1300
|
+
for (int i = 0; i < batch.Size(); i++) {
|
|
1301
|
+
// auto source_i = VecVector2Matrix (tmp_target.SH().Coefs());
|
|
1302
|
+
auto source_mati = VecVector2Matrix (vec_target.SH().Coefs()).Cols(i*vec_length, (i+1)*vec_length);
|
|
1303
|
+
auto targeti = VecVector2Matrix(batch[i]->mpR->SH().Coefs());
|
|
1304
|
+
|
|
1305
|
+
tmp_target.SH().RotateZ(-batch[i]->phi,
|
|
1306
|
+
[source_mati, targeti] (size_t ii, Complex factor)
|
|
1307
|
+
{
|
|
1308
|
+
// source_i.Row(ii) = factor * source_mati.Row(ii);
|
|
1309
|
+
AtomicAdd (VectorView(targeti.Row(ii)), factor * source_mati.Row(ii));
|
|
1310
|
+
});
|
|
1311
|
+
// for (int j = 0; j < tmp_target.SH().Coefs().Size(); j++)
|
|
1312
|
+
// AtomicAdd(batch[i]->mpR->SH().Coefs()[j], tmp_target.SH().Coefs()[j]);
|
|
1313
|
+
}
|
|
1314
|
+
// tfrombatch.Stop();
|
|
1315
|
+
|
|
1316
|
+
}
|
|
1317
|
+
|
|
908
1318
|
|
|
909
1319
|
struct Node
|
|
910
1320
|
{
|
|
@@ -915,6 +1325,8 @@ namespace ngsbem
|
|
|
915
1325
|
MultiPole<MPRegular,elem_type> mp;
|
|
916
1326
|
Array<Vec<3>> targets;
|
|
917
1327
|
int total_targets;
|
|
1328
|
+
std::mutex node_mutex;
|
|
1329
|
+
atomic<bool> have_childs{false};
|
|
918
1330
|
|
|
919
1331
|
Array<const typename SingularMLMultiPole<elem_type>::Node*> singnodes;
|
|
920
1332
|
|
|
@@ -939,6 +1351,7 @@ namespace ngsbem
|
|
|
939
1351
|
cc(2) += (i&4) ? r/2 : -r/2;
|
|
940
1352
|
childs[i] = make_unique<Node> (cc, r/2, level+1, mp.Kappa());
|
|
941
1353
|
}
|
|
1354
|
+
have_childs = true;
|
|
942
1355
|
}
|
|
943
1356
|
|
|
944
1357
|
void AddSingularNode (const typename SingularMLMultiPole<elem_type>::Node & singnode, bool allow_refine,
|
|
@@ -946,7 +1359,7 @@ namespace ngsbem
|
|
|
946
1359
|
{
|
|
947
1360
|
if (mp.SH().Order() < 0) return;
|
|
948
1361
|
if (singnode.mp.SH().Order() < 0) return;
|
|
949
|
-
if (L2Norm(singnode.mp.SH().Coefs()) == 0) return;
|
|
1362
|
+
// if (L2Norm(singnode.mp.SH().Coefs()) == 0) return;
|
|
950
1363
|
if (level > 20)
|
|
951
1364
|
{
|
|
952
1365
|
singnodes.Append(&singnode);
|
|
@@ -1028,12 +1441,22 @@ namespace ngsbem
|
|
|
1028
1441
|
|
|
1029
1442
|
if (childs[0])
|
|
1030
1443
|
{
|
|
1031
|
-
|
|
1444
|
+
if (total_targets < 1000)
|
|
1032
1445
|
{
|
|
1033
|
-
|
|
1034
|
-
|
|
1035
|
-
|
|
1446
|
+
for (int nr = 0; nr < 8; nr++)
|
|
1447
|
+
{
|
|
1448
|
+
if (L2Norm(mp.SH().Coefs()) > 0)
|
|
1449
|
+
mp.TransformAdd (childs[nr]->mp, childs[nr]->center-center);
|
|
1450
|
+
childs[nr]->LocalizeExpansion(allow_refine);
|
|
1451
|
+
}
|
|
1036
1452
|
}
|
|
1453
|
+
else
|
|
1454
|
+
ParallelFor(8, [&] (int nr)
|
|
1455
|
+
{
|
|
1456
|
+
if (L2Norm(mp.SH().Coefs()) > 0)
|
|
1457
|
+
mp.TransformAdd (childs[nr]->mp, childs[nr]->center-center);
|
|
1458
|
+
childs[nr]->LocalizeExpansion(allow_refine);
|
|
1459
|
+
});
|
|
1037
1460
|
mp = MultiPole<MPRegular,elem_type>(-1, mp.Kappa(), 1.);
|
|
1038
1461
|
//mp.SH().Coefs()=0.0;
|
|
1039
1462
|
}
|
|
@@ -1041,18 +1464,8 @@ namespace ngsbem
|
|
|
1041
1464
|
|
|
1042
1465
|
elem_type Evaluate (Vec<3> p) const
|
|
1043
1466
|
{
|
|
1044
|
-
// *testout << "eval p = " << p << ", level = " << level << ", center = " << center << ", r = " << r << endl;
|
|
1045
1467
|
elem_type sum{0.0};
|
|
1046
|
-
|
|
1047
|
-
if (childs[0])
|
|
1048
|
-
{
|
|
1049
|
-
int childnum = 0;
|
|
1050
|
-
if (p(0) > center(0)) childnum += 1;
|
|
1051
|
-
if (p(1) > center(1)) childnum += 2;
|
|
1052
|
-
if (p(2) > center(2)) childnum += 4;
|
|
1053
|
-
sum = childs[childnum]->Evaluate(p);
|
|
1054
|
-
}
|
|
1055
|
-
*/
|
|
1468
|
+
|
|
1056
1469
|
int childnum = 0;
|
|
1057
1470
|
if (p(0) > center(0)) childnum += 1;
|
|
1058
1471
|
if (p(1) > center(1)) childnum += 2;
|
|
@@ -1062,8 +1475,6 @@ namespace ngsbem
|
|
|
1062
1475
|
else
|
|
1063
1476
|
sum = mp.Eval(p-center);
|
|
1064
1477
|
|
|
1065
|
-
|
|
1066
|
-
// static Timer t("mptool direct evaluate"); RegionTimer r(t);
|
|
1067
1478
|
for (auto sn : singnodes)
|
|
1068
1479
|
sum += sn->EvaluateMP(p);
|
|
1069
1480
|
|
|
@@ -1112,7 +1523,8 @@ namespace ngsbem
|
|
|
1112
1523
|
|
|
1113
1524
|
void AddTarget (Vec<3> x)
|
|
1114
1525
|
{
|
|
1115
|
-
if (childs[0])
|
|
1526
|
+
// if (childs[0])
|
|
1527
|
+
if (have_childs) // quick check without locking
|
|
1116
1528
|
{
|
|
1117
1529
|
// directly send to childs:
|
|
1118
1530
|
int childnum = 0;
|
|
@@ -1123,6 +1535,20 @@ namespace ngsbem
|
|
|
1123
1535
|
return;
|
|
1124
1536
|
}
|
|
1125
1537
|
|
|
1538
|
+
lock_guard<mutex> guard(node_mutex);
|
|
1539
|
+
|
|
1540
|
+
if (have_childs) // test again after locking
|
|
1541
|
+
{
|
|
1542
|
+
// directly send to childs:
|
|
1543
|
+
int childnum = 0;
|
|
1544
|
+
if (x(0) > center(0)) childnum += 1;
|
|
1545
|
+
if (x(1) > center(1)) childnum += 2;
|
|
1546
|
+
if (x(2) > center(2)) childnum += 4;
|
|
1547
|
+
childs[childnum] -> AddTarget(x);
|
|
1548
|
+
return;
|
|
1549
|
+
}
|
|
1550
|
+
|
|
1551
|
+
|
|
1126
1552
|
targets.Append( x );
|
|
1127
1553
|
|
|
1128
1554
|
// if (r*mp.Kappa() < 1e-8) return;
|
|
@@ -1227,6 +1653,8 @@ namespace ngsbem
|
|
|
1227
1653
|
void CalcMP(shared_ptr<SingularMLMultiPole<elem_type>> asingmp)
|
|
1228
1654
|
{
|
|
1229
1655
|
static Timer t("mptool regular MLMP"); RegionTimer rg(t);
|
|
1656
|
+
static Timer trec("mptool regular MLMP - recording");
|
|
1657
|
+
static Timer tsort("mptool regular MLMP - sort");
|
|
1230
1658
|
|
|
1231
1659
|
singmp = asingmp;
|
|
1232
1660
|
|
|
@@ -1234,23 +1662,58 @@ namespace ngsbem
|
|
|
1234
1662
|
root.RemoveEmptyTrees();
|
|
1235
1663
|
|
|
1236
1664
|
|
|
1237
|
-
root.AddSingularNode(singmp->root, false, nullptr);
|
|
1238
|
-
/*
|
|
1239
|
-
Array<RecordingRS> recording;
|
|
1240
|
-
|
|
1665
|
+
// root.AddSingularNode(singmp->root, false, nullptr);
|
|
1666
|
+
// /*
|
|
1667
|
+
Array<RecordingRS> recording;
|
|
1668
|
+
{
|
|
1669
|
+
RegionTimer rrec(trec);
|
|
1670
|
+
root.AddSingularNode(singmp->root, false, &recording);
|
|
1671
|
+
}
|
|
1672
|
+
|
|
1241
1673
|
// cout << "recorded: " << recording.Size() << endl;
|
|
1674
|
+
{
|
|
1675
|
+
RegionTimer reg(tsort);
|
|
1242
1676
|
QuickSort (recording, [] (auto & a, auto & b)
|
|
1243
1677
|
{
|
|
1244
1678
|
if (a.len < (1-1e-8) * b.len) return true;
|
|
1245
1679
|
if (a.len > (1+1e-8) * b.len) return false;
|
|
1246
1680
|
return a.theta < b.theta;
|
|
1247
1681
|
});
|
|
1682
|
+
}
|
|
1683
|
+
|
|
1684
|
+
double current_len = -1e100;
|
|
1685
|
+
double current_theta = -1e100;
|
|
1686
|
+
Array<RecordingRS*> current_batch;
|
|
1687
|
+
Array<Array<RecordingRS*>> batch_group;
|
|
1688
|
+
Array<double> group_lengths;
|
|
1689
|
+
Array<double> group_thetas;
|
|
1248
1690
|
for (auto & record : recording)
|
|
1249
1691
|
{
|
|
1250
|
-
|
|
1251
|
-
|
|
1692
|
+
bool len_changed = fabs(record.len - current_len) > 1e-8;
|
|
1693
|
+
bool theta_changed = fabs(record.theta - current_theta) > 1e-8;
|
|
1694
|
+
if ((len_changed || theta_changed) && current_batch.Size() > 0) {
|
|
1695
|
+
// ProcessBatch(current_batch, current_len, current_theta);
|
|
1696
|
+
batch_group.Append(current_batch);
|
|
1697
|
+
group_lengths.Append(current_len);
|
|
1698
|
+
group_thetas.Append(current_theta);
|
|
1699
|
+
current_batch.SetSize(0);
|
|
1700
|
+
}
|
|
1701
|
+
|
|
1702
|
+
current_len = record.len;
|
|
1703
|
+
current_theta = record.theta;
|
|
1704
|
+
current_batch.Append(&record);
|
|
1252
1705
|
}
|
|
1253
|
-
|
|
1706
|
+
if (current_batch.Size() > 0) {
|
|
1707
|
+
// ProcessBatch(current_batch, current_len, current_theta);
|
|
1708
|
+
batch_group.Append(current_batch);
|
|
1709
|
+
group_lengths.Append(current_len);
|
|
1710
|
+
group_thetas.Append(current_theta);
|
|
1711
|
+
}
|
|
1712
|
+
|
|
1713
|
+
ParallelFor(batch_group.Size(), [&](int i) {
|
|
1714
|
+
ProcessBatchRS(batch_group[i], group_lengths[i], group_thetas[i]);
|
|
1715
|
+
}, TasksPerThread(4));
|
|
1716
|
+
// */
|
|
1254
1717
|
|
|
1255
1718
|
|
|
1256
1719
|
/*
|
|
@@ -1262,7 +1725,7 @@ namespace ngsbem
|
|
|
1262
1725
|
cout << "reg " << i << ": " << RegularMLMultiPole::nodes_on_level[i] << endl;
|
|
1263
1726
|
*/
|
|
1264
1727
|
|
|
1265
|
-
static Timer tloc("mptool regular localize expansion"); RegionTimer rloc(tloc);
|
|
1728
|
+
static Timer tloc("mptool regular localize expansion"); RegionTimer rloc(tloc);
|
|
1266
1729
|
root.LocalizeExpansion(false);
|
|
1267
1730
|
}
|
|
1268
1731
|
|
|
@@ -1296,6 +1759,7 @@ namespace ngsbem
|
|
|
1296
1759
|
|
|
1297
1760
|
};
|
|
1298
1761
|
|
|
1762
|
+
|
|
1299
1763
|
template <typename elem_type>
|
|
1300
1764
|
inline ostream & operator<< (ostream & ost, const RegularMLMultiPole<elem_type> & mlmp)
|
|
1301
1765
|
{
|