ngsolve 6.2.2506__cp310-cp310-macosx_10_15_universal2.whl → 6.2.2506.post38.dev0__cp310-cp310-macosx_10_15_universal2.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ngsolve might be problematic. Click here for more details.
- netgen/include/bem_diffops.hpp +475 -0
- netgen/include/bspline.hpp +2 -0
- netgen/include/contact.hpp +4 -0
- netgen/include/h1lumping.hpp +6 -0
- netgen/include/kernels.hpp +653 -0
- netgen/include/mp_coefficient.hpp +20 -20
- netgen/include/mptools.hpp +625 -279
- netgen/include/potentialtools.hpp +2 -2
- netgen/include/recursive_pol.hpp +2 -2
- netgen/include/sparsematrix.hpp +1 -1
- netgen/libngbla.dylib +0 -0
- netgen/libngcomp.dylib +0 -0
- netgen/libngfem.dylib +0 -0
- netgen/libngla.dylib +0 -0
- netgen/libngsbem.dylib +0 -0
- netgen/libngsolve.dylib +0 -0
- netgen/libngstd.dylib +0 -0
- ngsolve/cmake/NGSolveConfig.cmake +1 -1
- ngsolve/config/config.py +5 -5
- ngsolve/demos/intro/cmagnet.py +19 -22
- {ngsolve-6.2.2506.dist-info → ngsolve-6.2.2506.post38.dev0.dist-info}/METADATA +2 -2
- {ngsolve-6.2.2506.dist-info → ngsolve-6.2.2506.post38.dev0.dist-info}/RECORD +58 -56
- {ngsolve-6.2.2506.data → ngsolve-6.2.2506.post38.dev0.data}/data/Netgen.icns +0 -0
- {ngsolve-6.2.2506.data → ngsolve-6.2.2506.post38.dev0.data}/data/bin/ngscxx +0 -0
- {ngsolve-6.2.2506.data → ngsolve-6.2.2506.post38.dev0.data}/data/bin/ngsld +0 -0
- {ngsolve-6.2.2506.data → ngsolve-6.2.2506.post38.dev0.data}/data/bin/ngsolve.tcl +0 -0
- {ngsolve-6.2.2506.data → ngsolve-6.2.2506.post38.dev0.data}/data/bin/ngspy +0 -0
- {ngsolve-6.2.2506.data → ngsolve-6.2.2506.post38.dev0.data}/data/share/ngsolve/beam.geo +0 -0
- {ngsolve-6.2.2506.data → ngsolve-6.2.2506.post38.dev0.data}/data/share/ngsolve/beam.vol +0 -0
- {ngsolve-6.2.2506.data → ngsolve-6.2.2506.post38.dev0.data}/data/share/ngsolve/chip.in2d +0 -0
- {ngsolve-6.2.2506.data → ngsolve-6.2.2506.post38.dev0.data}/data/share/ngsolve/chip.vol +0 -0
- {ngsolve-6.2.2506.data → ngsolve-6.2.2506.post38.dev0.data}/data/share/ngsolve/coil.geo +0 -0
- {ngsolve-6.2.2506.data → ngsolve-6.2.2506.post38.dev0.data}/data/share/ngsolve/coil.vol +0 -0
- {ngsolve-6.2.2506.data → ngsolve-6.2.2506.post38.dev0.data}/data/share/ngsolve/coilshield.geo +0 -0
- {ngsolve-6.2.2506.data → ngsolve-6.2.2506.post38.dev0.data}/data/share/ngsolve/coilshield.vol +0 -0
- {ngsolve-6.2.2506.data → ngsolve-6.2.2506.post38.dev0.data}/data/share/ngsolve/cube.geo +0 -0
- {ngsolve-6.2.2506.data → ngsolve-6.2.2506.post38.dev0.data}/data/share/ngsolve/cube.vol +0 -0
- {ngsolve-6.2.2506.data → ngsolve-6.2.2506.post38.dev0.data}/data/share/ngsolve/d10_DGdoubleglazing.pde +0 -0
- {ngsolve-6.2.2506.data → ngsolve-6.2.2506.post38.dev0.data}/data/share/ngsolve/d11_chip_nitsche.pde +0 -0
- {ngsolve-6.2.2506.data → ngsolve-6.2.2506.post38.dev0.data}/data/share/ngsolve/d1_square.pde +0 -0
- {ngsolve-6.2.2506.data → ngsolve-6.2.2506.post38.dev0.data}/data/share/ngsolve/d2_chip.pde +0 -0
- {ngsolve-6.2.2506.data → ngsolve-6.2.2506.post38.dev0.data}/data/share/ngsolve/d3_helmholtz.pde +0 -0
- {ngsolve-6.2.2506.data → ngsolve-6.2.2506.post38.dev0.data}/data/share/ngsolve/d4_cube.pde +0 -0
- {ngsolve-6.2.2506.data → ngsolve-6.2.2506.post38.dev0.data}/data/share/ngsolve/d5_beam.pde +0 -0
- {ngsolve-6.2.2506.data → ngsolve-6.2.2506.post38.dev0.data}/data/share/ngsolve/d6_shaft.pde +0 -0
- {ngsolve-6.2.2506.data → ngsolve-6.2.2506.post38.dev0.data}/data/share/ngsolve/d7_coil.pde +0 -0
- {ngsolve-6.2.2506.data → ngsolve-6.2.2506.post38.dev0.data}/data/share/ngsolve/d8_coilshield.pde +0 -0
- {ngsolve-6.2.2506.data → ngsolve-6.2.2506.post38.dev0.data}/data/share/ngsolve/d9_hybridDG.pde +0 -0
- {ngsolve-6.2.2506.data → ngsolve-6.2.2506.post38.dev0.data}/data/share/ngsolve/doubleglazing.in2d +0 -0
- {ngsolve-6.2.2506.data → ngsolve-6.2.2506.post38.dev0.data}/data/share/ngsolve/doubleglazing.vol +0 -0
- {ngsolve-6.2.2506.data → ngsolve-6.2.2506.post38.dev0.data}/data/share/ngsolve/piezo2d40round4.vol.gz +0 -0
- {ngsolve-6.2.2506.data → ngsolve-6.2.2506.post38.dev0.data}/data/share/ngsolve/shaft.geo +0 -0
- {ngsolve-6.2.2506.data → ngsolve-6.2.2506.post38.dev0.data}/data/share/ngsolve/shaft.vol +0 -0
- {ngsolve-6.2.2506.data → ngsolve-6.2.2506.post38.dev0.data}/data/share/ngsolve/square.in2d +0 -0
- {ngsolve-6.2.2506.data → ngsolve-6.2.2506.post38.dev0.data}/data/share/ngsolve/square.vol +0 -0
- {ngsolve-6.2.2506.dist-info → ngsolve-6.2.2506.post38.dev0.dist-info}/LICENSE +0 -0
- {ngsolve-6.2.2506.dist-info → ngsolve-6.2.2506.post38.dev0.dist-info}/WHEEL +0 -0
- {ngsolve-6.2.2506.dist-info → ngsolve-6.2.2506.post38.dev0.dist-info}/top_level.txt +0 -0
netgen/include/mptools.hpp
CHANGED
|
@@ -25,6 +25,7 @@ namespace ngsbem
|
|
|
25
25
|
|
|
26
26
|
constexpr int FMM_SW = 4;
|
|
27
27
|
|
|
28
|
+
|
|
28
29
|
|
|
29
30
|
// ************************ SIMD - creation (should end up in simd.hpp) *************
|
|
30
31
|
|
|
@@ -42,7 +43,19 @@ namespace ngsbem
|
|
|
42
43
|
}
|
|
43
44
|
|
|
44
45
|
|
|
45
|
-
|
|
46
|
+
class NGS_DLL_HEADER PrecomputedSqrts
|
|
47
|
+
{
|
|
48
|
+
public:
|
|
49
|
+
Array<double> sqrt_int;
|
|
50
|
+
// Array<double> inv_sqrt_int;
|
|
51
|
+
Array<double> sqrt_n_np1; // sqrt(n*(n+1))
|
|
52
|
+
Array<double> inv_sqrt_2np1_2np3; // 1/sqrt( (2n+1)*(2n+3) )
|
|
53
|
+
|
|
54
|
+
PrecomputedSqrts();
|
|
55
|
+
};
|
|
56
|
+
|
|
57
|
+
extern NGS_DLL_HEADER PrecomputedSqrts presqrt;
|
|
58
|
+
|
|
46
59
|
|
|
47
60
|
|
|
48
61
|
|
|
@@ -189,18 +202,26 @@ namespace ngsbem
|
|
|
189
202
|
void RotateY (double alpha, bool parallel = false);
|
|
190
203
|
|
|
191
204
|
|
|
205
|
+
|
|
192
206
|
static double CalcAmn (int m, int n)
|
|
193
207
|
{
|
|
194
208
|
if (m < 0) m=-m;
|
|
195
209
|
if (n < m) return 0;
|
|
196
|
-
|
|
210
|
+
|
|
211
|
+
if (2*n+1 < presqrt.sqrt_int.Size())
|
|
212
|
+
return presqrt.sqrt_int[n+1+m]*presqrt.sqrt_int[n+1-m] * presqrt.inv_sqrt_2np1_2np3[n];
|
|
213
|
+
else
|
|
214
|
+
return sqrt( (n+1.0+m)*(n+1.0-m) / ( (2*n+1)*(2*n+3) ));
|
|
197
215
|
}
|
|
198
216
|
|
|
199
217
|
static double CalcBmn (int m, int n)
|
|
200
218
|
{
|
|
201
219
|
double sgn = (m >= 0) ? 1 : -1;
|
|
202
|
-
if ( (m
|
|
203
|
-
|
|
220
|
+
if ( (m >= n) || (-m > n) ) return 0;
|
|
221
|
+
if (n <= presqrt.inv_sqrt_2np1_2np3.Size())
|
|
222
|
+
return sgn * presqrt.sqrt_n_np1[n-m-1] * presqrt.inv_sqrt_2np1_2np3[n-1];
|
|
223
|
+
else
|
|
224
|
+
return sgn * sqrt( (n-m-1.0)*(n-m) / ( (2*n-1.0)*(2*n+1)));
|
|
204
225
|
}
|
|
205
226
|
|
|
206
227
|
static double CalcDmn (int m, int n)
|
|
@@ -303,7 +324,7 @@ namespace ngsbem
|
|
|
303
324
|
|
|
304
325
|
|
|
305
326
|
// hn1 = jn+ i*yn
|
|
306
|
-
class
|
|
327
|
+
class Singular
|
|
307
328
|
{
|
|
308
329
|
public:
|
|
309
330
|
template <typename T>
|
|
@@ -329,7 +350,7 @@ namespace ngsbem
|
|
|
329
350
|
|
|
330
351
|
|
|
331
352
|
// jn
|
|
332
|
-
class
|
|
353
|
+
class Regular
|
|
333
354
|
{
|
|
334
355
|
public:
|
|
335
356
|
template <typename T>
|
|
@@ -357,14 +378,14 @@ namespace ngsbem
|
|
|
357
378
|
|
|
358
379
|
|
|
359
380
|
template <typename RADIAL, typename entry_type=Complex>
|
|
360
|
-
class NGS_DLL_HEADER
|
|
381
|
+
class NGS_DLL_HEADER SphericalExpansion
|
|
361
382
|
{
|
|
362
383
|
SphericalHarmonics<entry_type> sh;
|
|
363
384
|
double kappa;
|
|
364
385
|
double rtyp;
|
|
365
386
|
public:
|
|
366
387
|
|
|
367
|
-
|
|
388
|
+
SphericalExpansion (int aorder, double akappa, double artyp)
|
|
368
389
|
: sh(aorder), kappa(akappa), rtyp(artyp) { }
|
|
369
390
|
|
|
370
391
|
|
|
@@ -376,15 +397,15 @@ namespace ngsbem
|
|
|
376
397
|
double RTyp() const { return rtyp; }
|
|
377
398
|
int Order() const { return sh.Order(); }
|
|
378
399
|
|
|
379
|
-
|
|
400
|
+
SphericalExpansion Truncate(int neworder) const
|
|
380
401
|
{
|
|
381
402
|
if (neworder > sh.Order()) neworder=sh.Order();
|
|
382
|
-
|
|
403
|
+
SphericalExpansion nmp(neworder, kappa, rtyp);
|
|
383
404
|
nmp.sh.Coefs() = sh.Coefs().Range(sqr(neworder+1));
|
|
384
405
|
return nmp;
|
|
385
406
|
}
|
|
386
407
|
|
|
387
|
-
|
|
408
|
+
SphericalExpansion & operator+= (const SphericalExpansion & mp2)
|
|
388
409
|
{
|
|
389
410
|
size_t commonsize = min(SH().Coefs().Size(), mp2.SH().Coefs().Size());
|
|
390
411
|
SH().Coefs().Range(commonsize) += mp2.SH().Coefs().Range(commonsize);
|
|
@@ -395,27 +416,24 @@ namespace ngsbem
|
|
|
395
416
|
entry_type EvalDirectionalDerivative (Vec<3> x, Vec<3> d) const;
|
|
396
417
|
|
|
397
418
|
void AddCharge (Vec<3> x, entry_type c);
|
|
398
|
-
void AddDipole (Vec<3> x, Vec<3>
|
|
399
|
-
void
|
|
400
|
-
|
|
401
|
-
/*
|
|
402
|
-
void ChangeScaleTo (double newscale)
|
|
419
|
+
void AddDipole (Vec<3> x, Vec<3> dir, entry_type c);
|
|
420
|
+
void AddChargeDipole (Vec<3> x, entry_type c, Vec<3> dir, entry_type c2)
|
|
403
421
|
{
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
sh.CoefsN(n) *= prod;
|
|
408
|
-
scale = newscale;
|
|
422
|
+
// TODO: add them at once
|
|
423
|
+
AddCharge (x, c);
|
|
424
|
+
AddDipole (x, dir, c2);
|
|
409
425
|
}
|
|
410
|
-
|
|
426
|
+
|
|
427
|
+
void AddPlaneWave (Vec<3> d, entry_type c);
|
|
428
|
+
void AddCurrent (Vec<3> ap, Vec<3> ep, Complex j, int num=100);
|
|
429
|
+
|
|
430
|
+
|
|
411
431
|
void ChangeRTypTo (double new_rtyp)
|
|
412
432
|
{
|
|
413
|
-
// double fac = Scale()/newscale;
|
|
414
433
|
double fac = RADIAL::Scale(kappa, rtyp) / RADIAL::Scale(kappa, new_rtyp);
|
|
415
434
|
double prod = 1;
|
|
416
435
|
for (int n = 0; n <= sh.Order(); n++, prod*= fac)
|
|
417
436
|
sh.CoefsN(n) *= prod;
|
|
418
|
-
// scale = newscale;
|
|
419
437
|
rtyp = new_rtyp;
|
|
420
438
|
}
|
|
421
439
|
|
|
@@ -434,7 +452,7 @@ namespace ngsbem
|
|
|
434
452
|
|
|
435
453
|
|
|
436
454
|
template <typename TARGET>
|
|
437
|
-
void Transform (
|
|
455
|
+
void Transform (SphericalExpansion<TARGET,entry_type> & target, Vec<3> dist) const
|
|
438
456
|
{
|
|
439
457
|
if (target.SH().Order() < 0) return;
|
|
440
458
|
if (SH().Order() < 0)
|
|
@@ -449,8 +467,8 @@ namespace ngsbem
|
|
|
449
467
|
auto [len, theta, phi] = SphericalCoordinates(dist);
|
|
450
468
|
|
|
451
469
|
|
|
452
|
-
//
|
|
453
|
-
|
|
470
|
+
// SphericalExpansion<RADIAL,entry_type> tmp{*this};
|
|
471
|
+
SphericalExpansion<RADIAL,entry_type> tmp(Order(), kappa, rtyp);
|
|
454
472
|
tmp.SH().Coefs() = SH().Coefs();
|
|
455
473
|
|
|
456
474
|
tmp.SH().RotateZ(phi);
|
|
@@ -463,12 +481,12 @@ namespace ngsbem
|
|
|
463
481
|
}
|
|
464
482
|
|
|
465
483
|
template <typename TARGET>
|
|
466
|
-
void TransformAdd (
|
|
484
|
+
void TransformAdd (SphericalExpansion<TARGET,entry_type> & target, Vec<3> dist, bool atomic = false) const
|
|
467
485
|
{
|
|
468
486
|
if (SH().Order() < 0) return;
|
|
469
487
|
if (target.SH().Order() < 0) return;
|
|
470
488
|
|
|
471
|
-
|
|
489
|
+
SphericalExpansion<TARGET,entry_type> tmp{target};
|
|
472
490
|
Transform(tmp, dist);
|
|
473
491
|
if (!atomic)
|
|
474
492
|
target.SH().Coefs() += tmp.SH().Coefs();
|
|
@@ -478,8 +496,20 @@ namespace ngsbem
|
|
|
478
496
|
}
|
|
479
497
|
|
|
480
498
|
template <typename TARGET>
|
|
481
|
-
void ShiftZ (double z,
|
|
499
|
+
void ShiftZ (double z, SphericalExpansion<TARGET,entry_type> & target);
|
|
500
|
+
|
|
482
501
|
|
|
502
|
+
template <typename TARGET>
|
|
503
|
+
void In2Out (SphericalExpansion<TARGET,entry_type> & target, double r) const
|
|
504
|
+
{
|
|
505
|
+
Vector<Complex> rad(Order()+1);
|
|
506
|
+
Vector<Complex> radout(target.Order()+1);
|
|
507
|
+
RADIAL::Eval(Order(), kappa, r, RTyp(), rad);
|
|
508
|
+
TARGET::Eval(target.Order(), kappa, r, target.RTyp(), radout);
|
|
509
|
+
target.SH().Coefs() = 0;
|
|
510
|
+
for (int j = 0; j <= std::min(Order(), target.Order()); j++)
|
|
511
|
+
target.SH().CoefsN(j) = rad(j)/radout(j) * SH().CoefsN(j);
|
|
512
|
+
}
|
|
483
513
|
};
|
|
484
514
|
|
|
485
515
|
|
|
@@ -507,22 +537,22 @@ namespace ngsbem
|
|
|
507
537
|
|
|
508
538
|
|
|
509
539
|
template <typename entry_type=Complex>
|
|
510
|
-
class
|
|
540
|
+
class SingularMLExpansion
|
|
511
541
|
{
|
|
512
542
|
using simd_entry_type = decltype(MakeSimd(declval<std::array<entry_type,FMM_SW>>()));
|
|
513
543
|
static Array<size_t> nodes_on_level;
|
|
514
544
|
|
|
515
545
|
struct RecordingSS
|
|
516
546
|
{
|
|
517
|
-
const
|
|
518
|
-
|
|
547
|
+
const SphericalExpansion<Singular,entry_type> * mp_source;
|
|
548
|
+
SphericalExpansion<Singular,entry_type> * mp_target;
|
|
519
549
|
Vec<3> dist;
|
|
520
550
|
double len, theta, phi;
|
|
521
551
|
bool flipz;
|
|
522
552
|
public:
|
|
523
553
|
RecordingSS() = default;
|
|
524
|
-
RecordingSS (const
|
|
525
|
-
|
|
554
|
+
RecordingSS (const SphericalExpansion<Singular,entry_type> * amp_source,
|
|
555
|
+
SphericalExpansion<Singular,entry_type> * amp_target,
|
|
526
556
|
Vec<3> adist)
|
|
527
557
|
: mp_source(amp_source), mp_target(amp_target), dist(adist)
|
|
528
558
|
{
|
|
@@ -534,7 +564,7 @@ namespace ngsbem
|
|
|
534
564
|
};
|
|
535
565
|
|
|
536
566
|
|
|
537
|
-
static void
|
|
567
|
+
static void ProcessBatchSS(FlatArray<RecordingSS*> batch, double len, double theta) {
|
|
538
568
|
constexpr int vec_length = VecLength<entry_type>;
|
|
539
569
|
int batch_size = batch.Size();
|
|
540
570
|
int N = batch_size * vec_length;
|
|
@@ -546,42 +576,45 @@ namespace ngsbem
|
|
|
546
576
|
}
|
|
547
577
|
}
|
|
548
578
|
else if (N <= 3) {
|
|
549
|
-
|
|
579
|
+
ProcessVectorizedBatchSS<3, vec_length>(batch, len, theta);
|
|
550
580
|
}
|
|
551
581
|
else if (N <= 4) {
|
|
552
|
-
|
|
582
|
+
ProcessVectorizedBatchSS<4, vec_length>(batch, len, theta);
|
|
553
583
|
}
|
|
554
584
|
else if (N <= 6) {
|
|
555
|
-
|
|
585
|
+
ProcessVectorizedBatchSS<6, vec_length>(batch, len, theta);
|
|
556
586
|
}
|
|
557
587
|
else if (N <= 12) {
|
|
558
|
-
|
|
588
|
+
ProcessVectorizedBatchSS<12, vec_length>(batch, len, theta);
|
|
559
589
|
}
|
|
560
590
|
else if (N <= 24) {
|
|
561
|
-
|
|
591
|
+
ProcessVectorizedBatchSS<24, vec_length>(batch, len, theta);
|
|
562
592
|
}
|
|
563
593
|
else if (N <= 48) {
|
|
564
|
-
|
|
594
|
+
ProcessVectorizedBatchSS<48, vec_length>(batch, len, theta);
|
|
565
595
|
}
|
|
566
596
|
else if (N <= 96) {
|
|
567
|
-
|
|
597
|
+
ProcessVectorizedBatchSS<96, vec_length>(batch, len, theta);
|
|
568
598
|
}
|
|
569
599
|
else if (N <= 192) {
|
|
570
|
-
|
|
600
|
+
ProcessVectorizedBatchSS<192, vec_length>(batch, len, theta);
|
|
571
601
|
}
|
|
572
602
|
else {
|
|
573
603
|
// Split large batches
|
|
574
|
-
|
|
575
|
-
|
|
604
|
+
ProcessBatchSS(batch.Range(0, 192 / vec_length), len, theta);
|
|
605
|
+
ProcessBatchSS(batch.Range(192 / vec_length, batch_size), len, theta);
|
|
576
606
|
}
|
|
577
607
|
}
|
|
578
608
|
|
|
579
609
|
template<int N, int vec_length>
|
|
580
|
-
static void
|
|
610
|
+
static void ProcessVectorizedBatchSS(FlatArray<RecordingSS*> batch, double len, double theta) {
|
|
581
611
|
|
|
582
612
|
// *testout << "Processing vectorized S->S batch of size " << batch.Size() << ", with N = " << N << ", vec_length = " << vec_length << ", len = " << len << ", theta = " << theta << endl;
|
|
583
|
-
|
|
584
|
-
|
|
613
|
+
double kappa = batch[0]->mp_source->Kappa();
|
|
614
|
+
int so = batch[0]->mp_source->Order();
|
|
615
|
+
int to = batch[0]->mp_target->Order();
|
|
616
|
+
SphericalExpansion<Singular, Vec<N,Complex>> vec_source(so, kappa, batch[0]->mp_source->RTyp());
|
|
617
|
+
SphericalExpansion<Singular, Vec<N,Complex>> vec_target(to, kappa, batch[0]->mp_target->RTyp());
|
|
585
618
|
|
|
586
619
|
// Copy multipoles into vectorized multipole
|
|
587
620
|
for (int i = 0; i < batch.Size(); i++)
|
|
@@ -618,15 +651,18 @@ namespace ngsbem
|
|
|
618
651
|
double r;
|
|
619
652
|
int level;
|
|
620
653
|
std::array<unique_ptr<Node>,8> childs;
|
|
621
|
-
|
|
654
|
+
SphericalExpansion<Singular, entry_type> mp;
|
|
622
655
|
|
|
623
656
|
Array<tuple<Vec<3>, entry_type>> charges;
|
|
624
657
|
Array<tuple<Vec<3>, Vec<3>, entry_type>> dipoles;
|
|
658
|
+
Array<tuple<Vec<3>, entry_type, Vec<3>, entry_type>> chargedipoles;
|
|
625
659
|
Array<tuple<Vec<3>, Vec<3>, Complex,int>> currents;
|
|
626
660
|
|
|
627
661
|
using simd_entry_type = decltype(MakeSimd(declval<std::array<entry_type,FMM_SW>>()));
|
|
628
662
|
Array<tuple<Vec<3,SIMD<double,FMM_SW>>, simd_entry_type>> simd_charges;
|
|
629
663
|
Array<tuple<Vec<3,SIMD<double,FMM_SW>>, Vec<3,SIMD<double,FMM_SW>>, simd_entry_type>> simd_dipoles;
|
|
664
|
+
Array<tuple<Vec<3,SIMD<double,FMM_SW>>, simd_entry_type,
|
|
665
|
+
Vec<3,SIMD<double,FMM_SW>>, simd_entry_type>> simd_chargedipoles;
|
|
630
666
|
|
|
631
667
|
int total_sources;
|
|
632
668
|
std::mutex node_mutex;
|
|
@@ -639,7 +675,15 @@ namespace ngsbem
|
|
|
639
675
|
nodes_on_level[level]++;
|
|
640
676
|
}
|
|
641
677
|
|
|
642
|
-
|
|
678
|
+
int GetChildNum (Vec<3> x) const
|
|
679
|
+
{
|
|
680
|
+
int childnum = 0;
|
|
681
|
+
if (x(0) > center(0)) childnum += 1;
|
|
682
|
+
if (x(1) > center(1)) childnum += 2;
|
|
683
|
+
if (x(2) > center(2)) childnum += 4;
|
|
684
|
+
return childnum;
|
|
685
|
+
}
|
|
686
|
+
|
|
643
687
|
void CreateChilds()
|
|
644
688
|
{
|
|
645
689
|
if (childs[0]) throw Exception("have already childs");
|
|
@@ -655,15 +699,32 @@ namespace ngsbem
|
|
|
655
699
|
}
|
|
656
700
|
|
|
657
701
|
|
|
702
|
+
void SendSourcesToChilds()
|
|
703
|
+
{
|
|
704
|
+
CreateChilds();
|
|
705
|
+
|
|
706
|
+
for (auto [x,c] : charges)
|
|
707
|
+
AddCharge (x,c);
|
|
708
|
+
for (auto [x,d,c] : dipoles)
|
|
709
|
+
AddDipole (x,d,c);
|
|
710
|
+
for (auto [x,c,d,c2] : chargedipoles)
|
|
711
|
+
AddChargeDipole (x,c,d,c2);
|
|
712
|
+
for (auto [sp,ep,j,num] : currents)
|
|
713
|
+
AddCurrent (sp,ep,j,num);
|
|
714
|
+
|
|
715
|
+
charges.DeleteAll();
|
|
716
|
+
dipoles.DeleteAll();
|
|
717
|
+
chargedipoles.DeleteAll();
|
|
718
|
+
currents.DeleteAll();
|
|
719
|
+
}
|
|
720
|
+
|
|
721
|
+
|
|
658
722
|
void AddCharge (Vec<3> x, entry_type c)
|
|
659
723
|
{
|
|
660
724
|
if (have_childs) // quick check without locking
|
|
661
725
|
{
|
|
662
726
|
// directly send to childs:
|
|
663
|
-
int childnum
|
|
664
|
-
if (x(0) > center(0)) childnum += 1;
|
|
665
|
-
if (x(1) > center(1)) childnum += 2;
|
|
666
|
-
if (x(2) > center(2)) childnum += 4;
|
|
727
|
+
int childnum = GetChildNum(x);
|
|
667
728
|
childs[childnum] -> AddCharge(x, c);
|
|
668
729
|
return;
|
|
669
730
|
}
|
|
@@ -672,36 +733,19 @@ namespace ngsbem
|
|
|
672
733
|
|
|
673
734
|
if (have_childs) // test again after locking
|
|
674
735
|
{
|
|
675
|
-
|
|
676
|
-
int childnum = 0;
|
|
677
|
-
if (x(0) > center(0)) childnum += 1;
|
|
678
|
-
if (x(1) > center(1)) childnum += 2;
|
|
679
|
-
if (x(2) > center(2)) childnum += 4;
|
|
736
|
+
int childnum = GetChildNum(x);
|
|
680
737
|
childs[childnum] -> AddCharge(x, c);
|
|
681
738
|
return;
|
|
682
739
|
}
|
|
683
740
|
|
|
684
|
-
|
|
685
|
-
|
|
686
741
|
charges.Append( tuple{x,c} );
|
|
687
742
|
|
|
688
743
|
// if (r*mp.Kappa() < 1e-8) return;
|
|
689
744
|
if (level > 20) return;
|
|
690
|
-
if (charges.Size() < maxdirect && r*mp.Kappa() <
|
|
745
|
+
if (charges.Size() < maxdirect && r*mp.Kappa() < 5)
|
|
691
746
|
return;
|
|
692
|
-
|
|
693
|
-
CreateChilds();
|
|
694
|
-
|
|
695
|
-
for (auto [x,c] : charges)
|
|
696
|
-
AddCharge (x,c);
|
|
697
|
-
for (auto [x,d,c] : dipoles)
|
|
698
|
-
AddDipole (x,d,c);
|
|
699
|
-
for (auto [sp,ep,j,num] : currents)
|
|
700
|
-
AddCurrent (sp,ep,j,num);
|
|
701
747
|
|
|
702
|
-
|
|
703
|
-
dipoles.SetSize0();
|
|
704
|
-
currents.SetSize0();
|
|
748
|
+
SendSourcesToChilds();
|
|
705
749
|
}
|
|
706
750
|
|
|
707
751
|
|
|
@@ -710,11 +754,7 @@ namespace ngsbem
|
|
|
710
754
|
if (have_childs)
|
|
711
755
|
{
|
|
712
756
|
// directly send to childs:
|
|
713
|
-
|
|
714
|
-
int childnum = 0;
|
|
715
|
-
if (x(0) > center(0)) childnum += 1;
|
|
716
|
-
if (x(1) > center(1)) childnum += 2;
|
|
717
|
-
if (x(2) > center(2)) childnum += 4;
|
|
757
|
+
int childnum = GetChildNum(x);
|
|
718
758
|
childs[childnum] -> AddDipole(x, d, c);
|
|
719
759
|
return;
|
|
720
760
|
}
|
|
@@ -724,37 +764,55 @@ namespace ngsbem
|
|
|
724
764
|
if (have_childs)
|
|
725
765
|
{
|
|
726
766
|
// directly send to childs:
|
|
727
|
-
|
|
728
|
-
int childnum = 0;
|
|
729
|
-
if (x(0) > center(0)) childnum += 1;
|
|
730
|
-
if (x(1) > center(1)) childnum += 2;
|
|
731
|
-
if (x(2) > center(2)) childnum += 4;
|
|
767
|
+
int childnum = GetChildNum(x);
|
|
732
768
|
childs[childnum] -> AddDipole(x, d, c);
|
|
733
769
|
return;
|
|
734
770
|
}
|
|
771
|
+
|
|
772
|
+
dipoles.Append (tuple{x,d,c});
|
|
773
|
+
|
|
774
|
+
if (level > 20) return;
|
|
775
|
+
if (dipoles.Size() < maxdirect)
|
|
776
|
+
return;
|
|
735
777
|
|
|
778
|
+
SendSourcesToChilds();
|
|
779
|
+
}
|
|
736
780
|
|
|
737
781
|
|
|
782
|
+
void AddChargeDipole (Vec<3> x, entry_type c, Vec<3> dir, entry_type c2)
|
|
783
|
+
{
|
|
784
|
+
if (have_childs)
|
|
785
|
+
{
|
|
786
|
+
// directly send to childs:
|
|
787
|
+
int childnum = GetChildNum(x);
|
|
788
|
+
childs[childnum] -> AddChargeDipole(x, c, dir, c2);
|
|
789
|
+
return;
|
|
790
|
+
}
|
|
791
|
+
|
|
792
|
+
lock_guard<mutex> guard(node_mutex);
|
|
793
|
+
|
|
794
|
+
if (have_childs)
|
|
795
|
+
{
|
|
796
|
+
// directly send to childs:
|
|
797
|
+
int childnum = GetChildNum(x);
|
|
798
|
+
childs[childnum] -> AddChargeDipole(x, c, dir, c2);
|
|
799
|
+
return;
|
|
800
|
+
}
|
|
738
801
|
|
|
739
|
-
|
|
802
|
+
chargedipoles.Append (tuple{x,c,dir,c2});
|
|
740
803
|
|
|
741
|
-
if (
|
|
804
|
+
if (chargedipoles.Size() < maxdirect || r < 1e-8)
|
|
742
805
|
return;
|
|
743
|
-
|
|
744
|
-
CreateChilds();
|
|
745
806
|
|
|
746
|
-
|
|
747
|
-
AddCharge (x,c);
|
|
748
|
-
for (auto [x,d,c] : dipoles)
|
|
749
|
-
AddDipole (x,d,c);
|
|
750
|
-
for (auto [sp,ep,j,num] : currents)
|
|
751
|
-
AddCurrent (sp,ep,j,num);
|
|
807
|
+
SendSourcesToChilds();
|
|
752
808
|
|
|
753
|
-
|
|
754
|
-
|
|
755
|
-
|
|
809
|
+
/*
|
|
810
|
+
AddCharge (x, c);
|
|
811
|
+
AddDipole (x, dir, c2);
|
|
812
|
+
*/
|
|
756
813
|
}
|
|
757
814
|
|
|
815
|
+
|
|
758
816
|
// not parallel yet
|
|
759
817
|
void AddCurrent (Vec<3> sp, Vec<3> ep, Complex j, int num)
|
|
760
818
|
{
|
|
@@ -764,7 +822,7 @@ namespace ngsbem
|
|
|
764
822
|
Array<double> split;
|
|
765
823
|
split.Append(0);
|
|
766
824
|
for (int i = 0; i < 3; i++)
|
|
767
|
-
if (sp(i) < center(i) != ep(i) < center(i))
|
|
825
|
+
if ((sp(i) < center(i)) != (ep(i) < center(i)))
|
|
768
826
|
split += (center(i)-sp(i)) / (ep(i)-sp(i)); // segment cuts i-th coordinate plane
|
|
769
827
|
split.Append(1);
|
|
770
828
|
BubbleSort(split);
|
|
@@ -788,6 +846,12 @@ namespace ngsbem
|
|
|
788
846
|
|
|
789
847
|
currents.Append (tuple{sp,ep,j,num});
|
|
790
848
|
|
|
849
|
+
// if (currents.Size() < maxdirect || r < 1e-8)
|
|
850
|
+
if (currents.Size() < 4 || r < 1e-8)
|
|
851
|
+
return;
|
|
852
|
+
|
|
853
|
+
SendSourcesToChilds();
|
|
854
|
+
/*
|
|
791
855
|
// if (currents.Size() < maxdirect || r < 1e-8)
|
|
792
856
|
if (currents.Size() < 4 || r < 1e-8)
|
|
793
857
|
return;
|
|
@@ -804,6 +868,7 @@ namespace ngsbem
|
|
|
804
868
|
charges.SetSize0();
|
|
805
869
|
dipoles.SetSize0();
|
|
806
870
|
currents.SetSize0();
|
|
871
|
+
*/
|
|
807
872
|
}
|
|
808
873
|
|
|
809
874
|
|
|
@@ -824,7 +889,7 @@ namespace ngsbem
|
|
|
824
889
|
// t.AddFlops (charges.Size());
|
|
825
890
|
if (simd_charges.Size())
|
|
826
891
|
{
|
|
827
|
-
// static Timer t("
|
|
892
|
+
// static Timer t("mptool singmp, evaluate, simd charges"); RegionTimer r(t);
|
|
828
893
|
|
|
829
894
|
simd_entry_type vsum{0.0};
|
|
830
895
|
if (mp.Kappa() < 1e-12)
|
|
@@ -875,6 +940,8 @@ namespace ngsbem
|
|
|
875
940
|
|
|
876
941
|
if (simd_dipoles.Size())
|
|
877
942
|
{
|
|
943
|
+
// static Timer t("mptool singmp, evaluate, simd dipoles"); RegionTimer r(t);
|
|
944
|
+
|
|
878
945
|
simd_entry_type vsum{0.0};
|
|
879
946
|
for (auto [x,d,c] : simd_dipoles)
|
|
880
947
|
{
|
|
@@ -901,6 +968,54 @@ namespace ngsbem
|
|
|
901
968
|
}
|
|
902
969
|
}
|
|
903
970
|
|
|
971
|
+
|
|
972
|
+
|
|
973
|
+
if (simd_chargedipoles.Size())
|
|
974
|
+
{
|
|
975
|
+
// static Timer t("mptool singmp, evaluate, simd chargedipoles"); RegionTimer r(t);
|
|
976
|
+
// t.AddFlops (simd_chargedipoles.Size()*FMM_SW);
|
|
977
|
+
|
|
978
|
+
simd_entry_type vsum{0.0};
|
|
979
|
+
for (auto [x,c,d,c2] : simd_chargedipoles)
|
|
980
|
+
{
|
|
981
|
+
auto rho = L2Norm(p-x);
|
|
982
|
+
auto rhokappa = rho*mp.Kappa();
|
|
983
|
+
auto invrho = If(rho>0.0, 1.0/rho, SIMD<double,FMM_SW>(0.0));
|
|
984
|
+
auto [si,co] = sincos(rhokappa);
|
|
985
|
+
|
|
986
|
+
auto kernelc = (1/(4*M_PI))*invrho*SIMD<Complex,FMM_SW>(co,si);
|
|
987
|
+
vsum += kernelc * c;
|
|
988
|
+
|
|
989
|
+
auto kernel =
|
|
990
|
+
invrho*invrho * InnerProduct(p-x, d) *
|
|
991
|
+
kernelc * SIMD<Complex,FMM_SW>(-1.0, rhokappa);
|
|
992
|
+
|
|
993
|
+
vsum += kernel * c2;
|
|
994
|
+
}
|
|
995
|
+
sum += HSum(vsum);
|
|
996
|
+
}
|
|
997
|
+
else
|
|
998
|
+
{
|
|
999
|
+
// static Timer t("mptool singmp, evaluate, chargedipoles"); RegionTimer r(t);
|
|
1000
|
+
// t.AddFlops (chargedipoles.Size());
|
|
1001
|
+
|
|
1002
|
+
for (auto [x,c,d,c2] : chargedipoles)
|
|
1003
|
+
if (double rho = L2Norm(p-x); rho > 0)
|
|
1004
|
+
{
|
|
1005
|
+
sum += (1/(4*M_PI))*exp(Complex(0,rho*mp.Kappa())) / rho * c;
|
|
1006
|
+
|
|
1007
|
+
Vec<3> drhodp = 1.0/rho * (p-x);
|
|
1008
|
+
Complex dGdrho = (1/(4*M_PI))*exp(Complex(0,rho*mp.Kappa())) *
|
|
1009
|
+
(Complex(0, mp.Kappa())/rho - 1.0/sqr(rho));
|
|
1010
|
+
|
|
1011
|
+
sum += dGdrho * InnerProduct(drhodp, d) * c2;
|
|
1012
|
+
}
|
|
1013
|
+
}
|
|
1014
|
+
|
|
1015
|
+
|
|
1016
|
+
|
|
1017
|
+
|
|
1018
|
+
|
|
904
1019
|
for (auto [sp,ep,j,num] : currents)
|
|
905
1020
|
{
|
|
906
1021
|
// should use explizit formula instead ...
|
|
@@ -937,7 +1052,9 @@ namespace ngsbem
|
|
|
937
1052
|
}
|
|
938
1053
|
|
|
939
1054
|
if (dipoles.Size())
|
|
940
|
-
throw Exception("EvaluateDeriv not implemented for dipoles in
|
|
1055
|
+
throw Exception("EvaluateDeriv not implemented for dipoles in SingularMLExpansion");
|
|
1056
|
+
if (chargedipoles.Size())
|
|
1057
|
+
throw Exception("EvaluateDeriv not implemented for dipoles in SingularMLExpansion");
|
|
941
1058
|
|
|
942
1059
|
for (auto [x,c] : charges)
|
|
943
1060
|
if (double rho = L2Norm(p-x); rho > 0)
|
|
@@ -952,7 +1069,7 @@ namespace ngsbem
|
|
|
952
1069
|
|
|
953
1070
|
void CalcTotalSources()
|
|
954
1071
|
{
|
|
955
|
-
total_sources = charges.Size() + dipoles.Size();
|
|
1072
|
+
total_sources = charges.Size() + dipoles.Size() + chargedipoles.Size();
|
|
956
1073
|
for (auto & child : childs)
|
|
957
1074
|
if (child)
|
|
958
1075
|
{
|
|
@@ -985,9 +1102,9 @@ namespace ngsbem
|
|
|
985
1102
|
}
|
|
986
1103
|
else
|
|
987
1104
|
{
|
|
988
|
-
if (charges.Size()+dipoles.Size()+currents.Size() == 0)
|
|
1105
|
+
if (charges.Size()+dipoles.Size()+chargedipoles.Size()+currents.Size() == 0)
|
|
989
1106
|
{
|
|
990
|
-
mp =
|
|
1107
|
+
mp = SphericalExpansion<Singular,entry_type> (-1, mp.Kappa(), 1.);
|
|
991
1108
|
return;
|
|
992
1109
|
}
|
|
993
1110
|
|
|
@@ -1026,6 +1143,24 @@ namespace ngsbem
|
|
|
1026
1143
|
simd_dipoles[ii] = MakeSimd(di);
|
|
1027
1144
|
}
|
|
1028
1145
|
|
|
1146
|
+
|
|
1147
|
+
simd_chargedipoles.SetSize( (chargedipoles.Size()+FMM_SW-1)/FMM_SW);
|
|
1148
|
+
i = 0, ii = 0;
|
|
1149
|
+
for ( ; i+FMM_SW <= chargedipoles.Size(); i+=FMM_SW, ii++)
|
|
1150
|
+
{
|
|
1151
|
+
std::array<tuple<Vec<3>,entry_type,Vec<3>,entry_type>, FMM_SW> di;
|
|
1152
|
+
for (int j = 0; j < FMM_SW; j++) di[j] = chargedipoles[i+j];
|
|
1153
|
+
simd_chargedipoles[ii] = MakeSimd(di);
|
|
1154
|
+
}
|
|
1155
|
+
if (i < chargedipoles.Size())
|
|
1156
|
+
{
|
|
1157
|
+
std::array<tuple<Vec<3>,entry_type,Vec<3>,entry_type>, FMM_SW> di;
|
|
1158
|
+
int j = 0;
|
|
1159
|
+
for ( ; i+j < chargedipoles.Size(); j++) di[j] = chargedipoles[i+j];
|
|
1160
|
+
for ( ; j < FMM_SW; j++) di[j] = tuple( get<0>(di[0]), entry_type{0.0}, get<2>(di[0]), entry_type{0.0} );
|
|
1161
|
+
simd_chargedipoles[ii] = MakeSimd(di);
|
|
1162
|
+
}
|
|
1163
|
+
|
|
1029
1164
|
|
|
1030
1165
|
if (nodes_to_process)
|
|
1031
1166
|
*nodes_to_process += this;
|
|
@@ -1036,6 +1171,9 @@ namespace ngsbem
|
|
|
1036
1171
|
for (auto [x,d,c] : dipoles)
|
|
1037
1172
|
mp.AddDipole (x-center, d, c);
|
|
1038
1173
|
|
|
1174
|
+
for (auto [x,c,d,c2] : chargedipoles)
|
|
1175
|
+
mp.AddChargeDipole (x-center, c, d, c2);
|
|
1176
|
+
|
|
1039
1177
|
for (auto [sp,ep,j,num] : currents)
|
|
1040
1178
|
mp.AddCurrent (sp-center, ep-center, j, num);
|
|
1041
1179
|
}
|
|
@@ -1044,7 +1182,7 @@ namespace ngsbem
|
|
|
1044
1182
|
|
|
1045
1183
|
entry_type EvaluateMP(Vec<3> p) const
|
|
1046
1184
|
{
|
|
1047
|
-
if (charges.Size() || dipoles.Size())
|
|
1185
|
+
if (charges.Size() || dipoles.Size() || chargedipoles.Size())
|
|
1048
1186
|
return Evaluate(p);
|
|
1049
1187
|
|
|
1050
1188
|
if (L2Norm(p-center) > 3*r)
|
|
@@ -1064,7 +1202,7 @@ namespace ngsbem
|
|
|
1064
1202
|
// cout << "EvaluateMPDeriv Singular, p = " << p << ", d = " << d << ", r = " << r << ", center = " << center << endl;
|
|
1065
1203
|
// cout << "Norm: " << L2Norm(p-center) << " > " << 3*r << endl;
|
|
1066
1204
|
// cout << "charges.Size() = " << charges.Size() << ", dipoles.Size() = " << dipoles.Size() << endl;
|
|
1067
|
-
if (charges.Size() || dipoles.Size() || !childs[0])
|
|
1205
|
+
if (charges.Size() || dipoles.Size() || chargedipoles.Size() || !childs[0])
|
|
1068
1206
|
return EvaluateDeriv(p, d);
|
|
1069
1207
|
|
|
1070
1208
|
if (L2Norm(p-center) > 3*r)
|
|
@@ -1087,6 +1225,8 @@ namespace ngsbem
|
|
|
1087
1225
|
ost << "xi = " << x << ", ci = " << c << endl;
|
|
1088
1226
|
for (auto [x,d,c] : dipoles)
|
|
1089
1227
|
ost << "xi = " << x << ", di = " << d << ", ci = " << c << endl;
|
|
1228
|
+
for (auto [x,c,d,c2] : chargedipoles)
|
|
1229
|
+
ost << "xi = " << x << ", c = " << c << ", di = " << d << ", ci = " << c2 << endl;
|
|
1090
1230
|
|
|
1091
1231
|
for (int i = 0; i < 8; i++)
|
|
1092
1232
|
if (childs[i]) childs[i] -> Print (ost, i);
|
|
@@ -1109,13 +1249,21 @@ namespace ngsbem
|
|
|
1109
1249
|
num += ch->NumCoefficients();
|
|
1110
1250
|
return num;
|
|
1111
1251
|
}
|
|
1252
|
+
|
|
1253
|
+
void TraverseTree (const std::function<void(Node&)> & func)
|
|
1254
|
+
{
|
|
1255
|
+
func(*this);
|
|
1256
|
+
for (auto & child : childs)
|
|
1257
|
+
if (child)
|
|
1258
|
+
child->TraverseTree(func);
|
|
1259
|
+
}
|
|
1112
1260
|
};
|
|
1113
1261
|
|
|
1114
1262
|
Node root;
|
|
1115
1263
|
bool havemp = false;
|
|
1116
1264
|
|
|
1117
1265
|
public:
|
|
1118
|
-
|
|
1266
|
+
SingularMLExpansion (Vec<3> center, double r, double kappa)
|
|
1119
1267
|
: root(center, r, 0, kappa)
|
|
1120
1268
|
{
|
|
1121
1269
|
nodes_on_level = 0;
|
|
@@ -1134,6 +1282,11 @@ namespace ngsbem
|
|
|
1134
1282
|
root.AddDipole(x, d, c);
|
|
1135
1283
|
}
|
|
1136
1284
|
|
|
1285
|
+
void AddChargeDipole(Vec<3> x, entry_type c, Vec<3> dir, entry_type c2)
|
|
1286
|
+
{
|
|
1287
|
+
root.AddChargeDipole(x, c, dir, c2);
|
|
1288
|
+
}
|
|
1289
|
+
|
|
1137
1290
|
void AddCurrent (Vec<3> sp, Vec<3> ep, Complex j, int num)
|
|
1138
1291
|
{
|
|
1139
1292
|
if constexpr (!std::is_same<entry_type, Vec<3,Complex>>())
|
|
@@ -1193,87 +1346,93 @@ namespace ngsbem
|
|
|
1193
1346
|
|
|
1194
1347
|
root.CalcTotalSources();
|
|
1195
1348
|
|
|
1196
|
-
if (false)
|
|
1349
|
+
if constexpr (false)
|
|
1197
1350
|
// direct evaluation of S->S
|
|
1198
1351
|
root.CalcMP(nullptr, nullptr);
|
|
1199
1352
|
else
|
|
1200
1353
|
{
|
|
1201
1354
|
|
|
1202
|
-
|
|
1203
|
-
|
|
1355
|
+
Array<RecordingSS> recording;
|
|
1356
|
+
Array<Node*> nodes_to_process;
|
|
1204
1357
|
|
|
1205
|
-
|
|
1206
|
-
|
|
1207
|
-
|
|
1208
|
-
|
|
1358
|
+
{
|
|
1359
|
+
RegionTimer reg(trec);
|
|
1360
|
+
root.CalcMP(&recording, &nodes_to_process);
|
|
1361
|
+
}
|
|
1209
1362
|
|
|
1210
|
-
|
|
1211
|
-
|
|
1212
|
-
|
|
1213
|
-
|
|
1214
|
-
|
|
1215
|
-
|
|
1216
|
-
|
|
1217
|
-
|
|
1218
|
-
|
|
1219
|
-
|
|
1220
|
-
|
|
1221
|
-
|
|
1222
|
-
|
|
1223
|
-
|
|
1224
|
-
|
|
1225
|
-
|
|
1226
|
-
|
|
1227
|
-
|
|
1228
|
-
|
|
1229
|
-
|
|
1230
|
-
|
|
1231
|
-
|
|
1363
|
+
{
|
|
1364
|
+
RegionTimer rs2mp(ts2mp);
|
|
1365
|
+
ParallelFor(nodes_to_process.Size(), [&](int i)
|
|
1366
|
+
{
|
|
1367
|
+
auto node = nodes_to_process[i];
|
|
1368
|
+
for (auto [x,c]: node->charges)
|
|
1369
|
+
node->mp.AddCharge(x-node->center, c);
|
|
1370
|
+
for (auto [x,d,c]: node->dipoles)
|
|
1371
|
+
node->mp.AddDipole(x-node->center, d, c);
|
|
1372
|
+
for (auto [x,c,d,c2]: node->chargedipoles)
|
|
1373
|
+
node->mp.AddChargeDipole(x-node->center, c, d, c2);
|
|
1374
|
+
for (auto [sp,ep,j,num]: node->currents)
|
|
1375
|
+
node->mp.AddCurrent(sp-node->center, ep-node->center, j, num);
|
|
1376
|
+
}, TasksPerThread(4));
|
|
1377
|
+
}
|
|
1378
|
+
|
|
1379
|
+
{
|
|
1380
|
+
RegionTimer reg(tsort);
|
|
1381
|
+
QuickSort (recording, [] (auto & a, auto & b)
|
|
1382
|
+
{
|
|
1383
|
+
if (a.len < (1-1e-8) * b.len) return true;
|
|
1384
|
+
if (a.len > (1+1e-8) * b.len) return false;
|
|
1385
|
+
return a.theta < b.theta;
|
|
1386
|
+
});
|
|
1387
|
+
}
|
|
1232
1388
|
|
|
1233
|
-
|
|
1234
|
-
|
|
1235
|
-
|
|
1236
|
-
|
|
1237
|
-
|
|
1238
|
-
|
|
1239
|
-
|
|
1240
|
-
|
|
1241
|
-
|
|
1242
|
-
|
|
1243
|
-
|
|
1389
|
+
double current_len = -1e100;
|
|
1390
|
+
double current_theta = -1e100;
|
|
1391
|
+
Array<RecordingSS*> current_batch;
|
|
1392
|
+
Array<Array<RecordingSS*>> batch_group;
|
|
1393
|
+
Array<double> group_lengths;
|
|
1394
|
+
Array<double> group_thetas;
|
|
1395
|
+
for (auto & record : recording)
|
|
1396
|
+
{
|
|
1397
|
+
bool len_changed = fabs(record.len - current_len) > 1e-8;
|
|
1398
|
+
bool theta_changed = fabs(record.theta - current_theta) > 1e-8;
|
|
1399
|
+
if ((len_changed || theta_changed) && current_batch.Size() > 0) {
|
|
1400
|
+
batch_group.Append(current_batch);
|
|
1401
|
+
group_lengths.Append(current_len);
|
|
1402
|
+
group_thetas.Append(current_theta);
|
|
1403
|
+
current_batch.SetSize(0);
|
|
1404
|
+
}
|
|
1405
|
+
|
|
1406
|
+
current_len = record.len;
|
|
1407
|
+
current_theta = record.theta;
|
|
1408
|
+
current_batch.Append(&record);
|
|
1409
|
+
}
|
|
1410
|
+
|
|
1411
|
+
if (current_batch.Size() > 0) {
|
|
1244
1412
|
batch_group.Append(current_batch);
|
|
1245
1413
|
group_lengths.Append(current_len);
|
|
1246
1414
|
group_thetas.Append(current_theta);
|
|
1247
|
-
|
|
1248
|
-
}
|
|
1415
|
+
}
|
|
1249
1416
|
|
|
1250
|
-
|
|
1251
|
-
|
|
1252
|
-
|
|
1417
|
+
{
|
|
1418
|
+
RegionTimer rS2S(tS2S);
|
|
1419
|
+
// ParallelFor(batch_group.Size(), [&](int i) {
|
|
1420
|
+
for (int i = 0; i < batch_group.Size(); i++){
|
|
1421
|
+
// *testout << "Processing batch " << i << " of size " << batch_group[i].Size() << ", with len = " << group_lengths[i] << ", theta = " << group_thetas[i] << endl;
|
|
1422
|
+
int chunk_size = 24;
|
|
1423
|
+
if (batch_group[i].Size() < chunk_size)
|
|
1424
|
+
ProcessBatchSS(batch_group[i], group_lengths[i], group_thetas[i]);
|
|
1425
|
+
else
|
|
1426
|
+
ParallelForRange(IntRange(batch_group[i].Size()), [&](IntRange range) {
|
|
1427
|
+
auto sub_batch = batch_group[i].Range(range.First(), range.Next());
|
|
1428
|
+
ProcessBatchSS(sub_batch, group_lengths[i], group_thetas[i]);
|
|
1429
|
+
}, TasksPerThread(4));
|
|
1430
|
+
}
|
|
1431
|
+
}
|
|
1253
1432
|
}
|
|
1254
|
-
if (current_batch.Size() > 0) {
|
|
1255
|
-
batch_group.Append(current_batch);
|
|
1256
|
-
group_lengths.Append(current_len);
|
|
1257
|
-
group_thetas.Append(current_theta);
|
|
1258
|
-
}
|
|
1259
1433
|
|
|
1260
|
-
|
|
1261
|
-
|
|
1262
|
-
// ParallelFor(batch_group.Size(), [&](int i) {
|
|
1263
|
-
for (int i = 0; i < batch_group.Size(); i++){
|
|
1264
|
-
// *testout << "Processing batch " << i << " of size " << batch_group[i].Size() << ", with len = " << group_lengths[i] << ", theta = " << group_thetas[i] << endl;
|
|
1265
|
-
int chunk_size = 24;
|
|
1266
|
-
if (batch_group[i].Size() < chunk_size)
|
|
1267
|
-
ProcessBatch(batch_group[i], group_lengths[i], group_thetas[i]);
|
|
1268
|
-
else
|
|
1269
|
-
ParallelForRange(IntRange(batch_group[i].Size()), [&](IntRange range) {
|
|
1270
|
-
auto sub_batch = batch_group[i].Range(range.First(), range.Next());
|
|
1271
|
-
ProcessBatch(sub_batch, group_lengths[i], group_thetas[i]);
|
|
1272
|
-
}, TasksPerThread(4));
|
|
1273
|
-
}
|
|
1274
|
-
}
|
|
1275
|
-
}
|
|
1276
|
-
|
|
1434
|
+
// cout << "have singular:" << endl;
|
|
1435
|
+
// PrintStatistics (cout);
|
|
1277
1436
|
havemp = true;
|
|
1278
1437
|
}
|
|
1279
1438
|
|
|
@@ -1285,35 +1444,73 @@ namespace ngsbem
|
|
|
1285
1444
|
return root.Evaluate(p);
|
|
1286
1445
|
}
|
|
1287
1446
|
|
|
1447
|
+
|
|
1448
|
+
void PrintStatistics (ostream & ost)
|
|
1449
|
+
{
|
|
1450
|
+
int levels = 0;
|
|
1451
|
+
int cnt = 0;
|
|
1452
|
+
root.TraverseTree( [&](Node & node) {
|
|
1453
|
+
levels = max(levels, node.level);
|
|
1454
|
+
cnt++;
|
|
1455
|
+
});
|
|
1456
|
+
ost << "levels: " << levels << endl;
|
|
1457
|
+
ost << "nodes: " << cnt << endl;
|
|
1458
|
+
|
|
1459
|
+
Array<int> num_on_level(levels+1);
|
|
1460
|
+
Array<int> order_on_level(levels+1);
|
|
1461
|
+
Array<size_t> coefs_on_level(levels+1);
|
|
1462
|
+
num_on_level = 0;
|
|
1463
|
+
order_on_level = 0;
|
|
1464
|
+
root.TraverseTree( [&](Node & node) {
|
|
1465
|
+
num_on_level[node.level]++;
|
|
1466
|
+
order_on_level[node.level] = max(order_on_level[node.level],node.mp.Order());
|
|
1467
|
+
coefs_on_level[node.level] += node.mp.SH().Coefs().Size();
|
|
1468
|
+
});
|
|
1469
|
+
|
|
1470
|
+
cout << "num on level" << endl;
|
|
1471
|
+
for (int i = 0; i < num_on_level.Size(); i++)
|
|
1472
|
+
cout << i << ": " << num_on_level[i] << ", order = " << order_on_level[i] << ", coefs " << coefs_on_level[i] << endl;
|
|
1473
|
+
|
|
1474
|
+
size_t totcoefs = 0;
|
|
1475
|
+
for (auto n : coefs_on_level)
|
|
1476
|
+
totcoefs += n;
|
|
1477
|
+
cout << "total mem in coefs: " << sizeof(entry_type)*totcoefs / sqr(1024) << " MB" << endl;
|
|
1478
|
+
}
|
|
1479
|
+
|
|
1480
|
+
|
|
1481
|
+
|
|
1288
1482
|
template <typename entry_type2>
|
|
1289
|
-
friend class
|
|
1483
|
+
friend class RegularMLExpansion;
|
|
1290
1484
|
};
|
|
1291
1485
|
|
|
1292
1486
|
|
|
1293
1487
|
template <typename entry_type>
|
|
1294
|
-
inline ostream & operator<< (ostream & ost, const
|
|
1488
|
+
inline ostream & operator<< (ostream & ost, const SingularMLExpansion<entry_type> & mlmp)
|
|
1295
1489
|
{
|
|
1296
1490
|
mlmp.Print(ost);
|
|
1297
1491
|
return ost;
|
|
1298
1492
|
}
|
|
1299
1493
|
|
|
1300
1494
|
|
|
1495
|
+
// *********************************** Regular multilevel Expansion
|
|
1496
|
+
|
|
1497
|
+
|
|
1301
1498
|
template <typename elem_type=Complex>
|
|
1302
|
-
class NGS_DLL_HEADER
|
|
1499
|
+
class NGS_DLL_HEADER RegularMLExpansion
|
|
1303
1500
|
{
|
|
1304
1501
|
static Array<size_t> nodes_on_level;
|
|
1305
1502
|
|
|
1306
1503
|
|
|
1307
1504
|
struct RecordingRS
|
|
1308
1505
|
{
|
|
1309
|
-
const
|
|
1310
|
-
|
|
1506
|
+
const SphericalExpansion<Singular,elem_type> * mpS;
|
|
1507
|
+
SphericalExpansion<Regular,elem_type> * mpR;
|
|
1311
1508
|
Vec<3> dist;
|
|
1312
1509
|
double len, theta, phi;
|
|
1313
1510
|
public:
|
|
1314
1511
|
RecordingRS() = default;
|
|
1315
|
-
RecordingRS (const
|
|
1316
|
-
|
|
1512
|
+
RecordingRS (const SphericalExpansion<Singular,elem_type> * ampS,
|
|
1513
|
+
SphericalExpansion<Regular,elem_type> * ampR,
|
|
1317
1514
|
Vec<3> adist)
|
|
1318
1515
|
: mpS(ampS), mpR(ampR), dist(adist)
|
|
1319
1516
|
{
|
|
@@ -1334,28 +1531,28 @@ namespace ngsbem
|
|
|
1334
1531
|
}
|
|
1335
1532
|
}
|
|
1336
1533
|
else if (N <= 3) {
|
|
1337
|
-
|
|
1534
|
+
ProcessVectorizedBatchRS<3, vec_length>(batch, len, theta);
|
|
1338
1535
|
}
|
|
1339
1536
|
else if (N <= 4) {
|
|
1340
|
-
|
|
1537
|
+
ProcessVectorizedBatchRS<4, vec_length>(batch, len, theta);
|
|
1341
1538
|
}
|
|
1342
1539
|
else if (N <= 6) {
|
|
1343
|
-
|
|
1540
|
+
ProcessVectorizedBatchRS<6, vec_length>(batch, len, theta);
|
|
1344
1541
|
}
|
|
1345
1542
|
else if (N <= 12) {
|
|
1346
|
-
|
|
1543
|
+
ProcessVectorizedBatchRS<12, vec_length>(batch, len, theta);
|
|
1347
1544
|
}
|
|
1348
1545
|
else if (N <= 24) {
|
|
1349
|
-
|
|
1546
|
+
ProcessVectorizedBatchRS<24, vec_length>(batch, len, theta);
|
|
1350
1547
|
}
|
|
1351
1548
|
else if (N <= 48) {
|
|
1352
|
-
|
|
1549
|
+
ProcessVectorizedBatchRS<48, vec_length>(batch, len, theta);
|
|
1353
1550
|
}
|
|
1354
1551
|
else if (N <= 96) {
|
|
1355
|
-
|
|
1552
|
+
ProcessVectorizedBatchRS<96, vec_length>(batch, len, theta);
|
|
1356
1553
|
}
|
|
1357
1554
|
else if (N <= 192) {
|
|
1358
|
-
|
|
1555
|
+
ProcessVectorizedBatchRS<192, vec_length>(batch, len, theta);
|
|
1359
1556
|
}
|
|
1360
1557
|
else {
|
|
1361
1558
|
// Split large batches
|
|
@@ -1387,7 +1584,7 @@ namespace ngsbem
|
|
|
1387
1584
|
|
|
1388
1585
|
|
|
1389
1586
|
template<int N, int vec_length>
|
|
1390
|
-
static void
|
|
1587
|
+
static void ProcessVectorizedBatchRS(FlatArray<RecordingRS*> batch, double len, double theta) {
|
|
1391
1588
|
|
|
1392
1589
|
// static Timer t("ProcessVectorizedBatch, N = "+ToString(N) + ", vec_len = " + ToString(vec_length));
|
|
1393
1590
|
// RegionTimer reg(t, batch[0]->mpS->SH().Order());
|
|
@@ -1395,10 +1592,10 @@ namespace ngsbem
|
|
|
1395
1592
|
// static Timer tfrombatch("mptools - copy from batch 2");
|
|
1396
1593
|
|
|
1397
1594
|
// *testout << "Processing vectorized batch of size " << batch.Size() << ", with N = " << N << ", vec_length = " << vec_length << ", len = " << len << ", theta = " << theta << endl;
|
|
1398
|
-
|
|
1399
|
-
//
|
|
1400
|
-
|
|
1401
|
-
|
|
1595
|
+
SphericalExpansion<Singular, Vec<N,Complex>> vec_source(batch[0]->mpS->Order(), batch[0]->mpS->Kappa(), batch[0]->mpS->RTyp());
|
|
1596
|
+
// SphericalExpansion<Singular, elem_type> tmp_source{*batch[0]->mpS};
|
|
1597
|
+
SphericalExpansion<Regular, elem_type> tmp_target{*batch[0]->mpR};
|
|
1598
|
+
SphericalExpansion<Regular, Vec<N,Complex>> vec_target(batch[0]->mpR->Order(), batch[0]->mpR->Kappa(), batch[0]->mpR->RTyp());
|
|
1402
1599
|
|
|
1403
1600
|
// Copy multipoles into vectorized multipole
|
|
1404
1601
|
// ttobatch.Start();
|
|
@@ -1446,24 +1643,32 @@ namespace ngsbem
|
|
|
1446
1643
|
double r;
|
|
1447
1644
|
int level;
|
|
1448
1645
|
std::array<unique_ptr<Node>,8> childs;
|
|
1449
|
-
|
|
1646
|
+
SphericalExpansion<Regular,elem_type> mp;
|
|
1450
1647
|
Array<Vec<3>> targets;
|
|
1648
|
+
Array<tuple<Vec<3>,double>> vol_targets;
|
|
1451
1649
|
int total_targets;
|
|
1452
1650
|
std::mutex node_mutex;
|
|
1453
1651
|
atomic<bool> have_childs{false};
|
|
1454
1652
|
|
|
1455
|
-
Array<const typename
|
|
1653
|
+
Array<const typename SingularMLExpansion<elem_type>::Node*> singnodes;
|
|
1456
1654
|
|
|
1457
1655
|
Node (Vec<3> acenter, double ar, int alevel, double kappa)
|
|
1458
|
-
: center(acenter), r(ar), level(alevel),
|
|
1656
|
+
: center(acenter), r(ar), level(alevel),
|
|
1657
|
+
// mp(MPOrder(ar*kappa), kappa, ar) // 1.0/min(1.0, 0.25*r*kappa))
|
|
1658
|
+
mp(-1, kappa, ar)
|
|
1459
1659
|
// : center(acenter), r(ar), level(alevel), mp(MPOrder(ar*kappa), kappa, 1.0)
|
|
1460
1660
|
{
|
|
1461
1661
|
if (level < nodes_on_level.Size())
|
|
1462
1662
|
nodes_on_level[level]++;
|
|
1463
1663
|
}
|
|
1464
1664
|
|
|
1465
|
-
|
|
1466
|
-
|
|
1665
|
+
void Allocate()
|
|
1666
|
+
{
|
|
1667
|
+
mp = SphericalExpansion<Regular,elem_type>(MPOrder(r*mp.Kappa()), mp.Kappa(), r);
|
|
1668
|
+
}
|
|
1669
|
+
|
|
1670
|
+
|
|
1671
|
+
void CreateChilds(bool allocate = false)
|
|
1467
1672
|
{
|
|
1468
1673
|
if (childs[0]) throw Exception("have already childs");
|
|
1469
1674
|
// create children nodes:
|
|
@@ -1474,11 +1679,13 @@ namespace ngsbem
|
|
|
1474
1679
|
cc(1) += (i&2) ? r/2 : -r/2;
|
|
1475
1680
|
cc(2) += (i&4) ? r/2 : -r/2;
|
|
1476
1681
|
childs[i] = make_unique<Node> (cc, r/2, level+1, mp.Kappa());
|
|
1682
|
+
if (allocate)
|
|
1683
|
+
childs[i] -> Allocate();
|
|
1477
1684
|
}
|
|
1478
1685
|
have_childs = true;
|
|
1479
1686
|
}
|
|
1480
1687
|
|
|
1481
|
-
void AddSingularNode (const typename
|
|
1688
|
+
void AddSingularNode (const typename SingularMLExpansion<elem_type>::Node & singnode, bool allow_refine,
|
|
1482
1689
|
Array<RecordingRS> * recording)
|
|
1483
1690
|
{
|
|
1484
1691
|
if (mp.SH().Order() < 0) return;
|
|
@@ -1526,7 +1733,7 @@ namespace ngsbem
|
|
|
1526
1733
|
if (allow_refine)
|
|
1527
1734
|
{
|
|
1528
1735
|
if (!childs[0])
|
|
1529
|
-
CreateChilds();
|
|
1736
|
+
CreateChilds(true);
|
|
1530
1737
|
|
|
1531
1738
|
for (auto & ch : childs)
|
|
1532
1739
|
ch -> AddSingularNode (singnode, allow_refine, recording);
|
|
@@ -1546,7 +1753,7 @@ namespace ngsbem
|
|
|
1546
1753
|
childs[nr] -> AddSingularNode (singnode, allow_refine, recording);
|
|
1547
1754
|
});
|
|
1548
1755
|
|
|
1549
|
-
if (targets.Size())
|
|
1756
|
+
if (targets.Size()+vol_targets.Size())
|
|
1550
1757
|
singnodes.Append(&singnode);
|
|
1551
1758
|
}
|
|
1552
1759
|
}
|
|
@@ -1561,7 +1768,7 @@ namespace ngsbem
|
|
|
1561
1768
|
{
|
|
1562
1769
|
if (allow_refine)
|
|
1563
1770
|
if (mp.Order() > 30 && !childs[0])
|
|
1564
|
-
CreateChilds();
|
|
1771
|
+
CreateChilds(allow_refine);
|
|
1565
1772
|
|
|
1566
1773
|
if (childs[0])
|
|
1567
1774
|
{
|
|
@@ -1581,7 +1788,7 @@ namespace ngsbem
|
|
|
1581
1788
|
mp.TransformAdd (childs[nr]->mp, childs[nr]->center-center);
|
|
1582
1789
|
childs[nr]->LocalizeExpansion(allow_refine);
|
|
1583
1790
|
});
|
|
1584
|
-
mp =
|
|
1791
|
+
mp = SphericalExpansion<Regular,elem_type>(-1, mp.Kappa(), 1.);
|
|
1585
1792
|
//mp.SH().Coefs()=0.0;
|
|
1586
1793
|
}
|
|
1587
1794
|
}
|
|
@@ -1597,12 +1804,16 @@ namespace ngsbem
|
|
|
1597
1804
|
if (childs[childnum])
|
|
1598
1805
|
sum = childs[childnum]->Evaluate(p);
|
|
1599
1806
|
else
|
|
1600
|
-
|
|
1601
|
-
|
|
1602
|
-
|
|
1603
|
-
|
|
1604
|
-
sum += sn->EvaluateMP(p);
|
|
1807
|
+
{
|
|
1808
|
+
// static Timer t("mptool regmp, evaluate reg"); RegionTimer r(t);
|
|
1809
|
+
sum = mp.Eval(p-center);
|
|
1810
|
+
}
|
|
1605
1811
|
|
|
1812
|
+
{
|
|
1813
|
+
// static Timer t("mptool regmp, evaluate, singnode"); RegionTimer r(t);
|
|
1814
|
+
for (auto sn : singnodes)
|
|
1815
|
+
sum += sn->EvaluateMP(p);
|
|
1816
|
+
}
|
|
1606
1817
|
return sum;
|
|
1607
1818
|
}
|
|
1608
1819
|
|
|
@@ -1628,6 +1839,14 @@ namespace ngsbem
|
|
|
1628
1839
|
return sum;
|
|
1629
1840
|
}
|
|
1630
1841
|
|
|
1842
|
+
void TraverseTree (const std::function<void(Node&)> & func)
|
|
1843
|
+
{
|
|
1844
|
+
func(*this);
|
|
1845
|
+
for (auto & child : childs)
|
|
1846
|
+
if (child)
|
|
1847
|
+
child->TraverseTree(func);
|
|
1848
|
+
}
|
|
1849
|
+
|
|
1631
1850
|
double Norm() const
|
|
1632
1851
|
{
|
|
1633
1852
|
double norm = L2Norm(mp.SH().Coefs());
|
|
@@ -1645,17 +1864,23 @@ namespace ngsbem
|
|
|
1645
1864
|
num += ch->NumCoefficients();
|
|
1646
1865
|
return num;
|
|
1647
1866
|
}
|
|
1648
|
-
|
|
1867
|
+
|
|
1868
|
+
int GetChildNum (Vec<3> x) const
|
|
1869
|
+
{
|
|
1870
|
+
int childnum = 0;
|
|
1871
|
+
if (x(0) > center(0)) childnum += 1;
|
|
1872
|
+
if (x(1) > center(1)) childnum += 2;
|
|
1873
|
+
if (x(2) > center(2)) childnum += 4;
|
|
1874
|
+
return childnum;
|
|
1875
|
+
}
|
|
1876
|
+
|
|
1649
1877
|
void AddTarget (Vec<3> x)
|
|
1650
1878
|
{
|
|
1651
1879
|
// if (childs[0])
|
|
1652
1880
|
if (have_childs) // quick check without locking
|
|
1653
1881
|
{
|
|
1654
1882
|
// directly send to childs:
|
|
1655
|
-
int childnum =
|
|
1656
|
-
if (x(0) > center(0)) childnum += 1;
|
|
1657
|
-
if (x(1) > center(1)) childnum += 2;
|
|
1658
|
-
if (x(2) > center(2)) childnum += 4;
|
|
1883
|
+
int childnum = GetChildNum(x);
|
|
1659
1884
|
childs[childnum] -> AddTarget( x );
|
|
1660
1885
|
return;
|
|
1661
1886
|
}
|
|
@@ -1665,32 +1890,74 @@ namespace ngsbem
|
|
|
1665
1890
|
if (have_childs) // test again after locking
|
|
1666
1891
|
{
|
|
1667
1892
|
// directly send to childs:
|
|
1668
|
-
int childnum =
|
|
1669
|
-
if (x(0) > center(0)) childnum += 1;
|
|
1670
|
-
if (x(1) > center(1)) childnum += 2;
|
|
1671
|
-
if (x(2) > center(2)) childnum += 4;
|
|
1893
|
+
int childnum = GetChildNum(x);
|
|
1672
1894
|
childs[childnum] -> AddTarget(x);
|
|
1673
1895
|
return;
|
|
1674
1896
|
}
|
|
1675
1897
|
|
|
1676
|
-
|
|
1677
1898
|
targets.Append( x );
|
|
1678
1899
|
|
|
1679
1900
|
// if (r*mp.Kappa() < 1e-8) return;
|
|
1680
1901
|
if (level > 20) return;
|
|
1681
|
-
if (targets.Size() < maxdirect && r*mp.Kappa() <
|
|
1902
|
+
if (targets.Size() < maxdirect && r*mp.Kappa() < 5)
|
|
1903
|
+
return;
|
|
1904
|
+
|
|
1905
|
+
CreateChilds();
|
|
1906
|
+
|
|
1907
|
+
for (auto t : targets)
|
|
1908
|
+
AddTarget (t);
|
|
1909
|
+
for (auto [x,r] : vol_targets)
|
|
1910
|
+
AddVolumeTarget (x,r);
|
|
1911
|
+
|
|
1912
|
+
targets.SetSize0();
|
|
1913
|
+
vol_targets.SetSize0();
|
|
1914
|
+
}
|
|
1915
|
+
|
|
1916
|
+
|
|
1917
|
+
void AddVolumeTarget (Vec<3> x, double tr)
|
|
1918
|
+
{
|
|
1919
|
+
if (MaxNorm(x-center) > r+tr) return;
|
|
1920
|
+
|
|
1921
|
+
if (have_childs)
|
|
1922
|
+
{
|
|
1923
|
+
for (auto & child : childs)
|
|
1924
|
+
child->AddVolumeTarget(x, tr);
|
|
1925
|
+
return;
|
|
1926
|
+
}
|
|
1927
|
+
|
|
1928
|
+
|
|
1929
|
+
lock_guard<mutex> guard(node_mutex);
|
|
1930
|
+
|
|
1931
|
+
if (have_childs)
|
|
1932
|
+
{
|
|
1933
|
+
for (auto & child : childs)
|
|
1934
|
+
child->AddVolumeTarget(x, tr);
|
|
1935
|
+
return;
|
|
1936
|
+
}
|
|
1937
|
+
|
|
1938
|
+
|
|
1939
|
+
vol_targets.Append (tuple(x,tr));
|
|
1940
|
+
|
|
1941
|
+
if (level > 20) return;
|
|
1942
|
+
if (vol_targets.Size() < maxdirect && (r*mp.Kappa() < 5))
|
|
1682
1943
|
return;
|
|
1683
1944
|
|
|
1684
1945
|
CreateChilds();
|
|
1685
1946
|
|
|
1686
1947
|
for (auto t : targets)
|
|
1687
1948
|
AddTarget (t);
|
|
1949
|
+
for (auto [x,r] : vol_targets)
|
|
1950
|
+
AddVolumeTarget (x,r);
|
|
1951
|
+
|
|
1688
1952
|
targets.SetSize0();
|
|
1953
|
+
vol_targets.SetSize0();
|
|
1689
1954
|
}
|
|
1690
1955
|
|
|
1956
|
+
|
|
1957
|
+
|
|
1691
1958
|
void CalcTotalTargets()
|
|
1692
1959
|
{
|
|
1693
|
-
total_targets = targets.Size();
|
|
1960
|
+
total_targets = targets.Size() + vol_targets.Size();
|
|
1694
1961
|
for (auto & child : childs)
|
|
1695
1962
|
if (child)
|
|
1696
1963
|
{
|
|
@@ -1710,8 +1977,21 @@ namespace ngsbem
|
|
|
1710
1977
|
}
|
|
1711
1978
|
|
|
1712
1979
|
if (total_targets == 0)
|
|
1713
|
-
mp =
|
|
1980
|
+
mp = SphericalExpansion<Regular,elem_type>(-1, mp.Kappa(),1.);
|
|
1981
|
+
}
|
|
1982
|
+
|
|
1983
|
+
void AllocateMemory()
|
|
1984
|
+
{
|
|
1985
|
+
for (auto & child : childs)
|
|
1986
|
+
if (child)
|
|
1987
|
+
child->AllocateMemory();
|
|
1988
|
+
|
|
1989
|
+
if (total_targets > 0)
|
|
1990
|
+
Allocate();
|
|
1991
|
+
// mp = SphericalExpansion<Regular,elem_type>(MPOrder(r*mp.Kappa()), mp.Kappa(), r); // -1, mp.Kappa(),1.);
|
|
1714
1992
|
}
|
|
1993
|
+
|
|
1994
|
+
|
|
1715
1995
|
|
|
1716
1996
|
|
|
1717
1997
|
void Print (ostream & ost, size_t childnr = -1) const
|
|
@@ -1730,14 +2010,15 @@ namespace ngsbem
|
|
|
1730
2010
|
};
|
|
1731
2011
|
|
|
1732
2012
|
Node root;
|
|
1733
|
-
shared_ptr<
|
|
2013
|
+
shared_ptr<SingularMLExpansion<elem_type>> singmp;
|
|
1734
2014
|
|
|
1735
2015
|
public:
|
|
1736
|
-
|
|
2016
|
+
RegularMLExpansion (shared_ptr<SingularMLExpansion<elem_type>> asingmp, Vec<3> center, double r)
|
|
1737
2017
|
: root(center, r, 0, asingmp->Kappa()), singmp(asingmp)
|
|
1738
2018
|
{
|
|
1739
2019
|
if (!singmp->havemp) throw Exception("first call Calc for singular MP");
|
|
1740
|
-
|
|
2020
|
+
root.Allocate();
|
|
2021
|
+
|
|
1741
2022
|
nodes_on_level = 0;
|
|
1742
2023
|
nodes_on_level[0] = 1;
|
|
1743
2024
|
{
|
|
@@ -1763,7 +2044,7 @@ namespace ngsbem
|
|
|
1763
2044
|
}
|
|
1764
2045
|
}
|
|
1765
2046
|
|
|
1766
|
-
|
|
2047
|
+
RegularMLExpansion (Vec<3> center, double r, double kappa)
|
|
1767
2048
|
: root(center, r, 0, kappa)
|
|
1768
2049
|
{
|
|
1769
2050
|
nodes_on_level = 0;
|
|
@@ -1775,87 +2056,151 @@ namespace ngsbem
|
|
|
1775
2056
|
root.AddTarget (t);
|
|
1776
2057
|
}
|
|
1777
2058
|
|
|
1778
|
-
void
|
|
2059
|
+
void AddVolumeTarget (Vec<3> t, double r)
|
|
2060
|
+
{
|
|
2061
|
+
root.AddVolumeTarget (t, r);
|
|
2062
|
+
}
|
|
2063
|
+
|
|
2064
|
+
void CalcMP(shared_ptr<SingularMLExpansion<elem_type>> asingmp, bool onlytargets = true)
|
|
1779
2065
|
{
|
|
1780
2066
|
static Timer t("mptool regular MLMP"); RegionTimer rg(t);
|
|
2067
|
+
static Timer tremove("removeempty");
|
|
1781
2068
|
static Timer trec("mptool regular MLMP - recording");
|
|
1782
2069
|
static Timer tsort("mptool regular MLMP - sort");
|
|
1783
2070
|
|
|
1784
2071
|
singmp = asingmp;
|
|
1785
2072
|
|
|
2073
|
+
|
|
1786
2074
|
root.CalcTotalTargets();
|
|
2075
|
+
// cout << "before remove empty trees:" << endl;
|
|
2076
|
+
// PrintStatistics(cout);
|
|
2077
|
+
|
|
2078
|
+
/*
|
|
2079
|
+
tremove.Start();
|
|
1787
2080
|
if (onlytargets)
|
|
1788
2081
|
root.RemoveEmptyTrees();
|
|
1789
|
-
|
|
2082
|
+
tremove.Stop();
|
|
2083
|
+
*/
|
|
1790
2084
|
|
|
1791
|
-
|
|
2085
|
+
root.AllocateMemory();
|
|
1792
2086
|
|
|
1793
|
-
//
|
|
1794
|
-
|
|
1795
|
-
|
|
1796
|
-
|
|
1797
|
-
|
|
1798
|
-
|
|
1799
|
-
|
|
1800
|
-
|
|
1801
|
-
{
|
|
1802
|
-
RegionTimer reg(tsort);
|
|
1803
|
-
QuickSort (recording, [] (auto & a, auto & b)
|
|
1804
|
-
{
|
|
1805
|
-
if (a.len < (1-1e-8) * b.len) return true;
|
|
1806
|
-
if (a.len > (1+1e-8) * b.len) return false;
|
|
1807
|
-
return a.theta < b.theta;
|
|
1808
|
-
});
|
|
1809
|
-
}
|
|
1810
|
-
|
|
1811
|
-
double current_len = -1e100;
|
|
1812
|
-
double current_theta = -1e100;
|
|
1813
|
-
Array<RecordingRS*> current_batch;
|
|
1814
|
-
Array<Array<RecordingRS*>> batch_group;
|
|
1815
|
-
Array<double> group_lengths;
|
|
1816
|
-
Array<double> group_thetas;
|
|
1817
|
-
for (auto & record : recording)
|
|
2087
|
+
// cout << "after allocating regular:" << endl;
|
|
2088
|
+
// PrintStatistics(cout);
|
|
2089
|
+
|
|
2090
|
+
// cout << "starting S-R converion" << endl;
|
|
2091
|
+
// PrintStatistics(cout);
|
|
2092
|
+
|
|
2093
|
+
|
|
2094
|
+
if constexpr (false)
|
|
1818
2095
|
{
|
|
1819
|
-
|
|
1820
|
-
|
|
1821
|
-
|
|
2096
|
+
root.AddSingularNode(singmp->root, !onlytargets, nullptr);
|
|
2097
|
+
}
|
|
2098
|
+
else
|
|
2099
|
+
{ // use recording
|
|
2100
|
+
Array<RecordingRS> recording;
|
|
2101
|
+
{
|
|
2102
|
+
RegionTimer rrec(trec);
|
|
2103
|
+
root.AddSingularNode(singmp->root, !onlytargets, &recording);
|
|
2104
|
+
}
|
|
2105
|
+
|
|
2106
|
+
// cout << "recorded: " << recording.Size() << endl;
|
|
2107
|
+
{
|
|
2108
|
+
RegionTimer reg(tsort);
|
|
2109
|
+
QuickSort (recording, [] (auto & a, auto & b)
|
|
2110
|
+
{
|
|
2111
|
+
if (a.len < (1-1e-8) * b.len) return true;
|
|
2112
|
+
if (a.len > (1+1e-8) * b.len) return false;
|
|
2113
|
+
return a.theta < b.theta;
|
|
2114
|
+
});
|
|
2115
|
+
}
|
|
2116
|
+
|
|
2117
|
+
double current_len = -1e100;
|
|
2118
|
+
double current_theta = -1e100;
|
|
2119
|
+
Array<RecordingRS*> current_batch;
|
|
2120
|
+
Array<Array<RecordingRS*>> batch_group;
|
|
2121
|
+
Array<double> group_lengths;
|
|
2122
|
+
Array<double> group_thetas;
|
|
2123
|
+
for (auto & record : recording)
|
|
2124
|
+
{
|
|
2125
|
+
bool len_changed = fabs(record.len - current_len) > 1e-8;
|
|
2126
|
+
bool theta_changed = fabs(record.theta - current_theta) > 1e-8;
|
|
2127
|
+
if ((len_changed || theta_changed) && current_batch.Size() > 0) {
|
|
2128
|
+
// ProcessBatch(current_batch, current_len, current_theta);
|
|
2129
|
+
batch_group.Append(current_batch);
|
|
2130
|
+
group_lengths.Append(current_len);
|
|
2131
|
+
group_thetas.Append(current_theta);
|
|
2132
|
+
current_batch.SetSize(0);
|
|
2133
|
+
}
|
|
2134
|
+
|
|
2135
|
+
current_len = record.len;
|
|
2136
|
+
current_theta = record.theta;
|
|
2137
|
+
current_batch.Append(&record);
|
|
2138
|
+
}
|
|
2139
|
+
if (current_batch.Size() > 0) {
|
|
1822
2140
|
// ProcessBatch(current_batch, current_len, current_theta);
|
|
1823
2141
|
batch_group.Append(current_batch);
|
|
1824
2142
|
group_lengths.Append(current_len);
|
|
1825
2143
|
group_thetas.Append(current_theta);
|
|
1826
|
-
|
|
1827
|
-
|
|
1828
|
-
|
|
1829
|
-
|
|
1830
|
-
|
|
1831
|
-
current_batch.Append(&record);
|
|
2144
|
+
}
|
|
2145
|
+
|
|
2146
|
+
ParallelFor(batch_group.Size(), [&](int i) {
|
|
2147
|
+
ProcessBatchRS(batch_group[i], group_lengths[i], group_thetas[i]);
|
|
2148
|
+
}, TasksPerThread(4));
|
|
1832
2149
|
}
|
|
1833
|
-
|
|
1834
|
-
// ProcessBatch(current_batch, current_len, current_theta);
|
|
1835
|
-
batch_group.Append(current_batch);
|
|
1836
|
-
group_lengths.Append(current_len);
|
|
1837
|
-
group_thetas.Append(current_theta);
|
|
1838
|
-
}
|
|
1839
|
-
|
|
1840
|
-
ParallelFor(batch_group.Size(), [&](int i) {
|
|
1841
|
-
ProcessBatchRS(batch_group[i], group_lengths[i], group_thetas[i]);
|
|
1842
|
-
}, TasksPerThread(4));
|
|
1843
|
-
// */
|
|
1844
|
-
|
|
2150
|
+
|
|
1845
2151
|
|
|
1846
2152
|
/*
|
|
1847
2153
|
int maxlevel = 0;
|
|
1848
|
-
for (auto [i,num] : Enumerate(
|
|
2154
|
+
for (auto [i,num] : Enumerate(RegularMLExpansion::nodes_on_level))
|
|
1849
2155
|
if (num > 0) maxlevel = i;
|
|
1850
2156
|
|
|
1851
2157
|
for (int i = 0; i <= maxlevel; i++)
|
|
1852
|
-
cout << "reg " << i << ": " <<
|
|
2158
|
+
cout << "reg " << i << ": " << RegularMLExpansion::nodes_on_level[i] << endl;
|
|
1853
2159
|
*/
|
|
1854
2160
|
|
|
2161
|
+
// cout << "starting R-R converion" << endl;
|
|
2162
|
+
// PrintStatistics(cout);
|
|
2163
|
+
|
|
1855
2164
|
static Timer tloc("mptool regular localize expansion"); RegionTimer rloc(tloc);
|
|
1856
2165
|
root.LocalizeExpansion(!onlytargets);
|
|
2166
|
+
|
|
2167
|
+
|
|
2168
|
+
// cout << "R-R conversion done" << endl;
|
|
2169
|
+
// PrintStatistics(cout);
|
|
1857
2170
|
}
|
|
1858
2171
|
|
|
2172
|
+
void PrintStatistics (ostream & ost)
|
|
2173
|
+
{
|
|
2174
|
+
int levels = 0;
|
|
2175
|
+
int cnt = 0;
|
|
2176
|
+
root.TraverseTree( [&](Node & node) {
|
|
2177
|
+
levels = max(levels, node.level);
|
|
2178
|
+
cnt++;
|
|
2179
|
+
});
|
|
2180
|
+
ost << "levels: " << levels << endl;
|
|
2181
|
+
ost << "nodes: " << cnt << endl;
|
|
2182
|
+
|
|
2183
|
+
Array<int> num_on_level(levels+1);
|
|
2184
|
+
Array<int> order_on_level(levels+1);
|
|
2185
|
+
Array<size_t> coefs_on_level(levels+1);
|
|
2186
|
+
num_on_level = 0;
|
|
2187
|
+
order_on_level = 0;
|
|
2188
|
+
root.TraverseTree( [&](Node & node) {
|
|
2189
|
+
num_on_level[node.level]++;
|
|
2190
|
+
order_on_level[node.level] = max(order_on_level[node.level],node.mp.Order());
|
|
2191
|
+
coefs_on_level[node.level] += node.mp.SH().Coefs().Size();
|
|
2192
|
+
});
|
|
2193
|
+
|
|
2194
|
+
cout << "num on level" << endl;
|
|
2195
|
+
for (int i = 0; i < num_on_level.Size(); i++)
|
|
2196
|
+
cout << i << ": " << num_on_level[i] << ", order = " << order_on_level[i] << ", coefs " << coefs_on_level[i] << endl;
|
|
2197
|
+
|
|
2198
|
+
size_t totcoefs = 0;
|
|
2199
|
+
for (auto n : coefs_on_level)
|
|
2200
|
+
totcoefs += n;
|
|
2201
|
+
cout << "total mem in coefs: " << sizeof(elem_type)*totcoefs / sqr(1024) << " MB" << endl;
|
|
2202
|
+
}
|
|
2203
|
+
|
|
1859
2204
|
void Print (ostream & ost) const
|
|
1860
2205
|
{
|
|
1861
2206
|
root.Print(ost);
|
|
@@ -1875,6 +2220,7 @@ namespace ngsbem
|
|
|
1875
2220
|
{
|
|
1876
2221
|
// static Timer t("mptool Eval MLMP regular"); RegionTimer r(t);
|
|
1877
2222
|
// if (L2Norm(p-root.center) > root.r) return elem_type{0.0};
|
|
2223
|
+
|
|
1878
2224
|
if (MaxNorm(p-root.center) > root.r)
|
|
1879
2225
|
return singmp->Evaluate(p);
|
|
1880
2226
|
return root.Evaluate(p);
|
|
@@ -1890,10 +2236,10 @@ namespace ngsbem
|
|
|
1890
2236
|
|
|
1891
2237
|
|
|
1892
2238
|
template <typename elem_type>
|
|
1893
|
-
inline ostream & operator<< (ostream & ost, const
|
|
2239
|
+
inline ostream & operator<< (ostream & ost, const RegularMLExpansion<elem_type> & mlmp)
|
|
1894
2240
|
{
|
|
1895
2241
|
mlmp.Print(ost);
|
|
1896
|
-
// ost << "
|
|
2242
|
+
// ost << "RegularMLExpansion" << endl;
|
|
1897
2243
|
return ost;
|
|
1898
2244
|
}
|
|
1899
2245
|
|