ngsolve 6.2.2506.post33.dev0__cp39-cp39-macosx_10_15_universal2.whl → 6.2.2506.post45.dev0__cp39-cp39-macosx_10_15_universal2.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ngsolve might be problematic. Click here for more details.
- netgen/include/expr.hpp +32 -1
- netgen/include/kernels.hpp +124 -4
- netgen/include/l2hofe.hpp +1 -0
- netgen/include/mptools.hpp +442 -212
- netgen/include/recursive_pol.hpp +2 -2
- netgen/include/sparsematrix.hpp +1 -1
- netgen/include/thdivfe_impl.hpp +1 -1
- netgen/include/vector.hpp +7 -2
- netgen/libngbla.dylib +0 -0
- netgen/libngcomp.dylib +0 -0
- netgen/libngla.dylib +0 -0
- netgen/libngsbem.dylib +0 -0
- netgen/libngstd.dylib +0 -0
- ngsolve/cmake/NGSolveConfig.cmake +1 -1
- ngsolve/config/config.py +5 -5
- {ngsolve-6.2.2506.post33.dev0.dist-info → ngsolve-6.2.2506.post45.dev0.dist-info}/METADATA +1 -1
- {ngsolve-6.2.2506.post33.dev0.dist-info → ngsolve-6.2.2506.post45.dev0.dist-info}/RECORD +53 -53
- {ngsolve-6.2.2506.post33.dev0.data → ngsolve-6.2.2506.post45.dev0.data}/data/Netgen.icns +0 -0
- {ngsolve-6.2.2506.post33.dev0.data → ngsolve-6.2.2506.post45.dev0.data}/data/bin/ngscxx +0 -0
- {ngsolve-6.2.2506.post33.dev0.data → ngsolve-6.2.2506.post45.dev0.data}/data/bin/ngsld +0 -0
- {ngsolve-6.2.2506.post33.dev0.data → ngsolve-6.2.2506.post45.dev0.data}/data/bin/ngsolve.tcl +0 -0
- {ngsolve-6.2.2506.post33.dev0.data → ngsolve-6.2.2506.post45.dev0.data}/data/bin/ngspy +0 -0
- {ngsolve-6.2.2506.post33.dev0.data → ngsolve-6.2.2506.post45.dev0.data}/data/share/ngsolve/beam.geo +0 -0
- {ngsolve-6.2.2506.post33.dev0.data → ngsolve-6.2.2506.post45.dev0.data}/data/share/ngsolve/beam.vol +0 -0
- {ngsolve-6.2.2506.post33.dev0.data → ngsolve-6.2.2506.post45.dev0.data}/data/share/ngsolve/chip.in2d +0 -0
- {ngsolve-6.2.2506.post33.dev0.data → ngsolve-6.2.2506.post45.dev0.data}/data/share/ngsolve/chip.vol +0 -0
- {ngsolve-6.2.2506.post33.dev0.data → ngsolve-6.2.2506.post45.dev0.data}/data/share/ngsolve/coil.geo +0 -0
- {ngsolve-6.2.2506.post33.dev0.data → ngsolve-6.2.2506.post45.dev0.data}/data/share/ngsolve/coil.vol +0 -0
- {ngsolve-6.2.2506.post33.dev0.data → ngsolve-6.2.2506.post45.dev0.data}/data/share/ngsolve/coilshield.geo +0 -0
- {ngsolve-6.2.2506.post33.dev0.data → ngsolve-6.2.2506.post45.dev0.data}/data/share/ngsolve/coilshield.vol +0 -0
- {ngsolve-6.2.2506.post33.dev0.data → ngsolve-6.2.2506.post45.dev0.data}/data/share/ngsolve/cube.geo +0 -0
- {ngsolve-6.2.2506.post33.dev0.data → ngsolve-6.2.2506.post45.dev0.data}/data/share/ngsolve/cube.vol +0 -0
- {ngsolve-6.2.2506.post33.dev0.data → ngsolve-6.2.2506.post45.dev0.data}/data/share/ngsolve/d10_DGdoubleglazing.pde +0 -0
- {ngsolve-6.2.2506.post33.dev0.data → ngsolve-6.2.2506.post45.dev0.data}/data/share/ngsolve/d11_chip_nitsche.pde +0 -0
- {ngsolve-6.2.2506.post33.dev0.data → ngsolve-6.2.2506.post45.dev0.data}/data/share/ngsolve/d1_square.pde +0 -0
- {ngsolve-6.2.2506.post33.dev0.data → ngsolve-6.2.2506.post45.dev0.data}/data/share/ngsolve/d2_chip.pde +0 -0
- {ngsolve-6.2.2506.post33.dev0.data → ngsolve-6.2.2506.post45.dev0.data}/data/share/ngsolve/d3_helmholtz.pde +0 -0
- {ngsolve-6.2.2506.post33.dev0.data → ngsolve-6.2.2506.post45.dev0.data}/data/share/ngsolve/d4_cube.pde +0 -0
- {ngsolve-6.2.2506.post33.dev0.data → ngsolve-6.2.2506.post45.dev0.data}/data/share/ngsolve/d5_beam.pde +0 -0
- {ngsolve-6.2.2506.post33.dev0.data → ngsolve-6.2.2506.post45.dev0.data}/data/share/ngsolve/d6_shaft.pde +0 -0
- {ngsolve-6.2.2506.post33.dev0.data → ngsolve-6.2.2506.post45.dev0.data}/data/share/ngsolve/d7_coil.pde +0 -0
- {ngsolve-6.2.2506.post33.dev0.data → ngsolve-6.2.2506.post45.dev0.data}/data/share/ngsolve/d8_coilshield.pde +0 -0
- {ngsolve-6.2.2506.post33.dev0.data → ngsolve-6.2.2506.post45.dev0.data}/data/share/ngsolve/d9_hybridDG.pde +0 -0
- {ngsolve-6.2.2506.post33.dev0.data → ngsolve-6.2.2506.post45.dev0.data}/data/share/ngsolve/doubleglazing.in2d +0 -0
- {ngsolve-6.2.2506.post33.dev0.data → ngsolve-6.2.2506.post45.dev0.data}/data/share/ngsolve/doubleglazing.vol +0 -0
- {ngsolve-6.2.2506.post33.dev0.data → ngsolve-6.2.2506.post45.dev0.data}/data/share/ngsolve/piezo2d40round4.vol.gz +0 -0
- {ngsolve-6.2.2506.post33.dev0.data → ngsolve-6.2.2506.post45.dev0.data}/data/share/ngsolve/shaft.geo +0 -0
- {ngsolve-6.2.2506.post33.dev0.data → ngsolve-6.2.2506.post45.dev0.data}/data/share/ngsolve/shaft.vol +0 -0
- {ngsolve-6.2.2506.post33.dev0.data → ngsolve-6.2.2506.post45.dev0.data}/data/share/ngsolve/square.in2d +0 -0
- {ngsolve-6.2.2506.post33.dev0.data → ngsolve-6.2.2506.post45.dev0.data}/data/share/ngsolve/square.vol +0 -0
- {ngsolve-6.2.2506.post33.dev0.dist-info → ngsolve-6.2.2506.post45.dev0.dist-info}/LICENSE +0 -0
- {ngsolve-6.2.2506.post33.dev0.dist-info → ngsolve-6.2.2506.post45.dev0.dist-info}/WHEEL +0 -0
- {ngsolve-6.2.2506.post33.dev0.dist-info → ngsolve-6.2.2506.post45.dev0.dist-info}/top_level.txt +0 -0
netgen/include/mptools.hpp
CHANGED
|
@@ -25,6 +25,7 @@ namespace ngsbem
|
|
|
25
25
|
|
|
26
26
|
constexpr int FMM_SW = 4;
|
|
27
27
|
|
|
28
|
+
|
|
28
29
|
|
|
29
30
|
// ************************ SIMD - creation (should end up in simd.hpp) *************
|
|
30
31
|
|
|
@@ -42,7 +43,19 @@ namespace ngsbem
|
|
|
42
43
|
}
|
|
43
44
|
|
|
44
45
|
|
|
45
|
-
|
|
46
|
+
class NGS_DLL_HEADER PrecomputedSqrts
|
|
47
|
+
{
|
|
48
|
+
public:
|
|
49
|
+
Array<double> sqrt_int;
|
|
50
|
+
// Array<double> inv_sqrt_int;
|
|
51
|
+
Array<double> sqrt_n_np1; // sqrt(n*(n+1))
|
|
52
|
+
Array<double> inv_sqrt_2np1_2np3; // 1/sqrt( (2n+1)*(2n+3) )
|
|
53
|
+
|
|
54
|
+
PrecomputedSqrts();
|
|
55
|
+
};
|
|
56
|
+
|
|
57
|
+
extern NGS_DLL_HEADER PrecomputedSqrts presqrt;
|
|
58
|
+
|
|
46
59
|
|
|
47
60
|
|
|
48
61
|
|
|
@@ -189,18 +202,26 @@ namespace ngsbem
|
|
|
189
202
|
void RotateY (double alpha, bool parallel = false);
|
|
190
203
|
|
|
191
204
|
|
|
205
|
+
|
|
192
206
|
static double CalcAmn (int m, int n)
|
|
193
207
|
{
|
|
194
208
|
if (m < 0) m=-m;
|
|
195
209
|
if (n < m) return 0;
|
|
196
|
-
|
|
210
|
+
|
|
211
|
+
if (2*n+1 < presqrt.sqrt_int.Size())
|
|
212
|
+
return presqrt.sqrt_int[n+1+m]*presqrt.sqrt_int[n+1-m] * presqrt.inv_sqrt_2np1_2np3[n];
|
|
213
|
+
else
|
|
214
|
+
return sqrt( (n+1.0+m)*(n+1.0-m) / ( (2*n+1)*(2*n+3) ));
|
|
197
215
|
}
|
|
198
216
|
|
|
199
217
|
static double CalcBmn (int m, int n)
|
|
200
218
|
{
|
|
201
219
|
double sgn = (m >= 0) ? 1 : -1;
|
|
202
|
-
if ( (m
|
|
203
|
-
|
|
220
|
+
if ( (m >= n) || (-m > n) ) return 0;
|
|
221
|
+
if (n <= presqrt.inv_sqrt_2np1_2np3.Size())
|
|
222
|
+
return sgn * presqrt.sqrt_n_np1[n-m-1] * presqrt.inv_sqrt_2np1_2np3[n-1];
|
|
223
|
+
else
|
|
224
|
+
return sgn * sqrt( (n-m-1.0)*(n-m) / ( (2*n-1.0)*(2*n+1)));
|
|
204
225
|
}
|
|
205
226
|
|
|
206
227
|
static double CalcDmn (int m, int n)
|
|
@@ -543,7 +564,7 @@ namespace ngsbem
|
|
|
543
564
|
};
|
|
544
565
|
|
|
545
566
|
|
|
546
|
-
static void
|
|
567
|
+
static void ProcessBatchSS(FlatArray<RecordingSS*> batch, double len, double theta) {
|
|
547
568
|
constexpr int vec_length = VecLength<entry_type>;
|
|
548
569
|
int batch_size = batch.Size();
|
|
549
570
|
int N = batch_size * vec_length;
|
|
@@ -555,42 +576,45 @@ namespace ngsbem
|
|
|
555
576
|
}
|
|
556
577
|
}
|
|
557
578
|
else if (N <= 3) {
|
|
558
|
-
|
|
579
|
+
ProcessVectorizedBatchSS<3, vec_length>(batch, len, theta);
|
|
559
580
|
}
|
|
560
581
|
else if (N <= 4) {
|
|
561
|
-
|
|
582
|
+
ProcessVectorizedBatchSS<4, vec_length>(batch, len, theta);
|
|
562
583
|
}
|
|
563
584
|
else if (N <= 6) {
|
|
564
|
-
|
|
585
|
+
ProcessVectorizedBatchSS<6, vec_length>(batch, len, theta);
|
|
565
586
|
}
|
|
566
587
|
else if (N <= 12) {
|
|
567
|
-
|
|
588
|
+
ProcessVectorizedBatchSS<12, vec_length>(batch, len, theta);
|
|
568
589
|
}
|
|
569
590
|
else if (N <= 24) {
|
|
570
|
-
|
|
591
|
+
ProcessVectorizedBatchSS<24, vec_length>(batch, len, theta);
|
|
571
592
|
}
|
|
572
593
|
else if (N <= 48) {
|
|
573
|
-
|
|
594
|
+
ProcessVectorizedBatchSS<48, vec_length>(batch, len, theta);
|
|
574
595
|
}
|
|
575
596
|
else if (N <= 96) {
|
|
576
|
-
|
|
597
|
+
ProcessVectorizedBatchSS<96, vec_length>(batch, len, theta);
|
|
577
598
|
}
|
|
578
599
|
else if (N <= 192) {
|
|
579
|
-
|
|
600
|
+
ProcessVectorizedBatchSS<192, vec_length>(batch, len, theta);
|
|
580
601
|
}
|
|
581
602
|
else {
|
|
582
603
|
// Split large batches
|
|
583
|
-
|
|
584
|
-
|
|
604
|
+
ProcessBatchSS(batch.Range(0, 192 / vec_length), len, theta);
|
|
605
|
+
ProcessBatchSS(batch.Range(192 / vec_length, batch_size), len, theta);
|
|
585
606
|
}
|
|
586
607
|
}
|
|
587
608
|
|
|
588
609
|
template<int N, int vec_length>
|
|
589
|
-
static void
|
|
610
|
+
static void ProcessVectorizedBatchSS(FlatArray<RecordingSS*> batch, double len, double theta) {
|
|
590
611
|
|
|
591
612
|
// *testout << "Processing vectorized S->S batch of size " << batch.Size() << ", with N = " << N << ", vec_length = " << vec_length << ", len = " << len << ", theta = " << theta << endl;
|
|
592
|
-
|
|
593
|
-
|
|
613
|
+
double kappa = batch[0]->mp_source->Kappa();
|
|
614
|
+
int so = batch[0]->mp_source->Order();
|
|
615
|
+
int to = batch[0]->mp_target->Order();
|
|
616
|
+
SphericalExpansion<Singular, Vec<N,Complex>> vec_source(so, kappa, batch[0]->mp_source->RTyp());
|
|
617
|
+
SphericalExpansion<Singular, Vec<N,Complex>> vec_target(to, kappa, batch[0]->mp_target->RTyp());
|
|
594
618
|
|
|
595
619
|
// Copy multipoles into vectorized multipole
|
|
596
620
|
for (int i = 0; i < batch.Size(); i++)
|
|
@@ -688,10 +712,10 @@ namespace ngsbem
|
|
|
688
712
|
for (auto [sp,ep,j,num] : currents)
|
|
689
713
|
AddCurrent (sp,ep,j,num);
|
|
690
714
|
|
|
691
|
-
charges.
|
|
692
|
-
dipoles.
|
|
693
|
-
chargedipoles.
|
|
694
|
-
currents.
|
|
715
|
+
charges.DeleteAll();
|
|
716
|
+
dipoles.DeleteAll();
|
|
717
|
+
chargedipoles.DeleteAll();
|
|
718
|
+
currents.DeleteAll();
|
|
695
719
|
}
|
|
696
720
|
|
|
697
721
|
|
|
@@ -718,7 +742,7 @@ namespace ngsbem
|
|
|
718
742
|
|
|
719
743
|
// if (r*mp.Kappa() < 1e-8) return;
|
|
720
744
|
if (level > 20) return;
|
|
721
|
-
if (charges.Size() < maxdirect && r*mp.Kappa() <
|
|
745
|
+
if (charges.Size() < maxdirect && r*mp.Kappa() < 5)
|
|
722
746
|
return;
|
|
723
747
|
|
|
724
748
|
SendSourcesToChilds();
|
|
@@ -746,8 +770,9 @@ namespace ngsbem
|
|
|
746
770
|
}
|
|
747
771
|
|
|
748
772
|
dipoles.Append (tuple{x,d,c});
|
|
749
|
-
|
|
750
|
-
if (
|
|
773
|
+
|
|
774
|
+
if (level > 20) return;
|
|
775
|
+
if (dipoles.Size() < maxdirect)
|
|
751
776
|
return;
|
|
752
777
|
|
|
753
778
|
SendSourcesToChilds();
|
|
@@ -797,7 +822,7 @@ namespace ngsbem
|
|
|
797
822
|
Array<double> split;
|
|
798
823
|
split.Append(0);
|
|
799
824
|
for (int i = 0; i < 3; i++)
|
|
800
|
-
if (sp(i) < center(i) != ep(i) < center(i))
|
|
825
|
+
if ((sp(i) < center(i)) != (ep(i) < center(i)))
|
|
801
826
|
split += (center(i)-sp(i)) / (ep(i)-sp(i)); // segment cuts i-th coordinate plane
|
|
802
827
|
split.Append(1);
|
|
803
828
|
BubbleSort(split);
|
|
@@ -859,26 +884,30 @@ namespace ngsbem
|
|
|
859
884
|
return sum;
|
|
860
885
|
}
|
|
861
886
|
|
|
862
|
-
{
|
|
863
|
-
// static Timer t("fmm direct eval"); RegionTimer reg(t);
|
|
864
|
-
// t.AddFlops (charges.Size());
|
|
865
887
|
if (simd_charges.Size())
|
|
866
888
|
{
|
|
867
889
|
// static Timer t("mptool singmp, evaluate, simd charges"); RegionTimer r(t);
|
|
890
|
+
// t.AddFlops (charges.Size());
|
|
868
891
|
|
|
869
892
|
simd_entry_type vsum{0.0};
|
|
870
893
|
if (mp.Kappa() < 1e-12)
|
|
871
894
|
{
|
|
872
895
|
for (auto [x,c] : simd_charges)
|
|
873
896
|
{
|
|
897
|
+
auto rho = L2Norm(p-x);
|
|
898
|
+
auto kernel = 1/(4*M_PI)/rho;
|
|
899
|
+
kernel = If(rho > 0.0, kernel, SIMD<double,FMM_SW>(0.0));
|
|
900
|
+
vsum += kernel * c;
|
|
901
|
+
|
|
902
|
+
/*
|
|
874
903
|
auto rho2 = L2Norm2(p-x);
|
|
875
904
|
auto kernel = (1/(4*M_PI)) * rsqrt(rho2);
|
|
876
905
|
kernel = If(rho2 > 0.0, kernel, SIMD<double,FMM_SW>(0.0));
|
|
877
906
|
vsum += kernel * c;
|
|
907
|
+
*/
|
|
878
908
|
}
|
|
879
909
|
}
|
|
880
|
-
else
|
|
881
|
-
if (mp.Kappa() < 1e-8)
|
|
910
|
+
else if (mp.Kappa() < 1e-8)
|
|
882
911
|
for (auto [x,c] : simd_charges)
|
|
883
912
|
{
|
|
884
913
|
auto rho = L2Norm(p-x);
|
|
@@ -895,7 +924,7 @@ namespace ngsbem
|
|
|
895
924
|
kernel = If(rho > 0.0, kernel, SIMD<Complex,FMM_SW>(0.0));
|
|
896
925
|
vsum += kernel * c;
|
|
897
926
|
}
|
|
898
|
-
|
|
927
|
+
|
|
899
928
|
sum += HSum(vsum);
|
|
900
929
|
}
|
|
901
930
|
else
|
|
@@ -911,43 +940,43 @@ namespace ngsbem
|
|
|
911
940
|
if (double rho = L2Norm(p-x); rho > 0)
|
|
912
941
|
sum += (1/(4*M_PI))*exp(Complex(0,rho*mp.Kappa())) / rho * c;
|
|
913
942
|
}
|
|
914
|
-
}
|
|
915
943
|
|
|
916
944
|
if (simd_dipoles.Size())
|
|
917
|
-
{
|
|
918
|
-
// static Timer t("mptool singmp, evaluate, simd dipoles"); RegionTimer r(t);
|
|
919
|
-
|
|
920
|
-
simd_entry_type vsum{0.0};
|
|
921
|
-
for (auto [x,d,c] : simd_dipoles)
|
|
922
945
|
{
|
|
923
|
-
|
|
924
|
-
|
|
925
|
-
|
|
926
|
-
auto
|
|
927
|
-
|
|
928
|
-
|
|
929
|
-
|
|
930
|
-
|
|
946
|
+
// static Timer t("mptool singmp, evaluate, simd dipoles"); RegionTimer r(t);
|
|
947
|
+
|
|
948
|
+
simd_entry_type vsum{0.0};
|
|
949
|
+
for (auto [x,d,c] : simd_dipoles)
|
|
950
|
+
{
|
|
951
|
+
auto rho = L2Norm(p-x);
|
|
952
|
+
auto drhodp = (1.0/rho) * (p-x);
|
|
953
|
+
auto [si,co] = sincos(rho*mp.Kappa());
|
|
954
|
+
auto dGdrho = (1/(4*M_PI))*SIMD<Complex,FMM_SW>(co,si) *
|
|
955
|
+
(-1.0/(rho*rho) + SIMD<Complex,FMM_SW>(0, mp.Kappa())/rho);
|
|
956
|
+
auto kernel = dGdrho * InnerProduct(drhodp, d);
|
|
957
|
+
kernel = If(rho > 0.0, kernel, SIMD<Complex,FMM_SW>(0.0));
|
|
958
|
+
vsum += kernel * c;
|
|
959
|
+
}
|
|
960
|
+
sum += HSum(vsum);
|
|
931
961
|
}
|
|
932
|
-
sum += HSum(vsum);
|
|
933
|
-
}
|
|
934
962
|
else
|
|
935
|
-
{
|
|
936
|
-
for (auto [x,d,c] : dipoles)
|
|
937
|
-
if (double rho = L2Norm(p-x); rho > 0)
|
|
938
963
|
{
|
|
939
|
-
|
|
940
|
-
|
|
941
|
-
|
|
942
|
-
|
|
964
|
+
for (auto [x,d,c] : dipoles)
|
|
965
|
+
if (double rho = L2Norm(p-x); rho > 0)
|
|
966
|
+
{
|
|
967
|
+
Vec<3> drhodp = 1.0/rho * (p-x);
|
|
968
|
+
Complex dGdrho = (1/(4*M_PI))*exp(Complex(0,rho*mp.Kappa())) *
|
|
969
|
+
(Complex(0, mp.Kappa())/rho - 1.0/sqr(rho));
|
|
970
|
+
sum += dGdrho * InnerProduct(drhodp, d) * c;
|
|
971
|
+
}
|
|
943
972
|
}
|
|
944
|
-
|
|
945
|
-
|
|
946
|
-
|
|
947
|
-
|
|
948
|
-
if (simd_chargedipoles.Size())
|
|
973
|
+
|
|
974
|
+
|
|
975
|
+
|
|
976
|
+
if (simd_chargedipoles.Size())
|
|
949
977
|
{
|
|
950
978
|
// static Timer t("mptool singmp, evaluate, simd chargedipoles"); RegionTimer r(t);
|
|
979
|
+
// t.AddFlops (simd_chargedipoles.Size()*FMM_SW);
|
|
951
980
|
|
|
952
981
|
simd_entry_type vsum{0.0};
|
|
953
982
|
for (auto [x,c,d,c2] : simd_chargedipoles)
|
|
@@ -956,10 +985,10 @@ namespace ngsbem
|
|
|
956
985
|
auto rhokappa = rho*mp.Kappa();
|
|
957
986
|
auto invrho = If(rho>0.0, 1.0/rho, SIMD<double,FMM_SW>(0.0));
|
|
958
987
|
auto [si,co] = sincos(rhokappa);
|
|
959
|
-
|
|
988
|
+
|
|
960
989
|
auto kernelc = (1/(4*M_PI))*invrho*SIMD<Complex,FMM_SW>(co,si);
|
|
961
990
|
vsum += kernelc * c;
|
|
962
|
-
|
|
991
|
+
|
|
963
992
|
auto kernel =
|
|
964
993
|
invrho*invrho * InnerProduct(p-x, d) *
|
|
965
994
|
kernelc * SIMD<Complex,FMM_SW>(-1.0, rhokappa);
|
|
@@ -968,15 +997,16 @@ namespace ngsbem
|
|
|
968
997
|
}
|
|
969
998
|
sum += HSum(vsum);
|
|
970
999
|
}
|
|
971
|
-
|
|
1000
|
+
else
|
|
972
1001
|
{
|
|
973
1002
|
// static Timer t("mptool singmp, evaluate, chargedipoles"); RegionTimer r(t);
|
|
1003
|
+
// t.AddFlops (chargedipoles.Size());
|
|
974
1004
|
|
|
975
1005
|
for (auto [x,c,d,c2] : chargedipoles)
|
|
976
1006
|
if (double rho = L2Norm(p-x); rho > 0)
|
|
977
1007
|
{
|
|
978
1008
|
sum += (1/(4*M_PI))*exp(Complex(0,rho*mp.Kappa())) / rho * c;
|
|
979
|
-
|
|
1009
|
+
|
|
980
1010
|
Vec<3> drhodp = 1.0/rho * (p-x);
|
|
981
1011
|
Complex dGdrho = (1/(4*M_PI))*exp(Complex(0,rho*mp.Kappa())) *
|
|
982
1012
|
(Complex(0, mp.Kappa())/rho - 1.0/sqr(rho));
|
|
@@ -1025,7 +1055,14 @@ namespace ngsbem
|
|
|
1025
1055
|
}
|
|
1026
1056
|
|
|
1027
1057
|
if (dipoles.Size())
|
|
1028
|
-
|
|
1058
|
+
{
|
|
1059
|
+
static int cnt = 0;
|
|
1060
|
+
cnt++;
|
|
1061
|
+
if (cnt < 3)
|
|
1062
|
+
cout << "we know what we do - evaluateDeriv not implemented for dipoles in SingularMLExpansion" << endl;
|
|
1063
|
+
// return sum;
|
|
1064
|
+
// throw Exception("EvaluateDeriv not implemented for dipoles in SingularMLExpansion");
|
|
1065
|
+
}
|
|
1029
1066
|
if (chargedipoles.Size())
|
|
1030
1067
|
throw Exception("EvaluateDeriv not implemented for dipoles in SingularMLExpansion");
|
|
1031
1068
|
|
|
@@ -1134,8 +1171,6 @@ namespace ngsbem
|
|
|
1134
1171
|
simd_chargedipoles[ii] = MakeSimd(di);
|
|
1135
1172
|
}
|
|
1136
1173
|
|
|
1137
|
-
|
|
1138
|
-
|
|
1139
1174
|
|
|
1140
1175
|
if (nodes_to_process)
|
|
1141
1176
|
*nodes_to_process += this;
|
|
@@ -1224,6 +1259,14 @@ namespace ngsbem
|
|
|
1224
1259
|
num += ch->NumCoefficients();
|
|
1225
1260
|
return num;
|
|
1226
1261
|
}
|
|
1262
|
+
|
|
1263
|
+
void TraverseTree (const std::function<void(Node&)> & func)
|
|
1264
|
+
{
|
|
1265
|
+
func(*this);
|
|
1266
|
+
for (auto & child : childs)
|
|
1267
|
+
if (child)
|
|
1268
|
+
child->TraverseTree(func);
|
|
1269
|
+
}
|
|
1227
1270
|
};
|
|
1228
1271
|
|
|
1229
1272
|
Node root;
|
|
@@ -1313,89 +1356,93 @@ namespace ngsbem
|
|
|
1313
1356
|
|
|
1314
1357
|
root.CalcTotalSources();
|
|
1315
1358
|
|
|
1316
|
-
if (false)
|
|
1359
|
+
if constexpr (false)
|
|
1317
1360
|
// direct evaluation of S->S
|
|
1318
1361
|
root.CalcMP(nullptr, nullptr);
|
|
1319
1362
|
else
|
|
1320
1363
|
{
|
|
1321
1364
|
|
|
1322
|
-
|
|
1323
|
-
|
|
1365
|
+
Array<RecordingSS> recording;
|
|
1366
|
+
Array<Node*> nodes_to_process;
|
|
1324
1367
|
|
|
1325
|
-
|
|
1326
|
-
|
|
1327
|
-
|
|
1328
|
-
|
|
1368
|
+
{
|
|
1369
|
+
RegionTimer reg(trec);
|
|
1370
|
+
root.CalcMP(&recording, &nodes_to_process);
|
|
1371
|
+
}
|
|
1329
1372
|
|
|
1330
|
-
|
|
1331
|
-
|
|
1332
|
-
|
|
1333
|
-
|
|
1334
|
-
|
|
1335
|
-
|
|
1336
|
-
|
|
1337
|
-
|
|
1338
|
-
|
|
1339
|
-
|
|
1340
|
-
|
|
1341
|
-
|
|
1342
|
-
|
|
1343
|
-
|
|
1344
|
-
|
|
1345
|
-
|
|
1346
|
-
|
|
1347
|
-
|
|
1348
|
-
|
|
1349
|
-
|
|
1350
|
-
|
|
1351
|
-
|
|
1352
|
-
|
|
1353
|
-
|
|
1373
|
+
{
|
|
1374
|
+
RegionTimer rs2mp(ts2mp);
|
|
1375
|
+
ParallelFor(nodes_to_process.Size(), [&](int i)
|
|
1376
|
+
{
|
|
1377
|
+
auto node = nodes_to_process[i];
|
|
1378
|
+
for (auto [x,c]: node->charges)
|
|
1379
|
+
node->mp.AddCharge(x-node->center, c);
|
|
1380
|
+
for (auto [x,d,c]: node->dipoles)
|
|
1381
|
+
node->mp.AddDipole(x-node->center, d, c);
|
|
1382
|
+
for (auto [x,c,d,c2]: node->chargedipoles)
|
|
1383
|
+
node->mp.AddChargeDipole(x-node->center, c, d, c2);
|
|
1384
|
+
for (auto [sp,ep,j,num]: node->currents)
|
|
1385
|
+
node->mp.AddCurrent(sp-node->center, ep-node->center, j, num);
|
|
1386
|
+
}, TasksPerThread(4));
|
|
1387
|
+
}
|
|
1388
|
+
|
|
1389
|
+
{
|
|
1390
|
+
RegionTimer reg(tsort);
|
|
1391
|
+
QuickSort (recording, [] (auto & a, auto & b)
|
|
1392
|
+
{
|
|
1393
|
+
if (a.len < (1-1e-8) * b.len) return true;
|
|
1394
|
+
if (a.len > (1+1e-8) * b.len) return false;
|
|
1395
|
+
return a.theta < b.theta;
|
|
1396
|
+
});
|
|
1397
|
+
}
|
|
1354
1398
|
|
|
1355
|
-
|
|
1356
|
-
|
|
1357
|
-
|
|
1358
|
-
|
|
1359
|
-
|
|
1360
|
-
|
|
1361
|
-
|
|
1362
|
-
|
|
1363
|
-
|
|
1364
|
-
|
|
1365
|
-
|
|
1399
|
+
double current_len = -1e100;
|
|
1400
|
+
double current_theta = -1e100;
|
|
1401
|
+
Array<RecordingSS*> current_batch;
|
|
1402
|
+
Array<Array<RecordingSS*>> batch_group;
|
|
1403
|
+
Array<double> group_lengths;
|
|
1404
|
+
Array<double> group_thetas;
|
|
1405
|
+
for (auto & record : recording)
|
|
1406
|
+
{
|
|
1407
|
+
bool len_changed = fabs(record.len - current_len) > 1e-8;
|
|
1408
|
+
bool theta_changed = fabs(record.theta - current_theta) > 1e-8;
|
|
1409
|
+
if ((len_changed || theta_changed) && current_batch.Size() > 0) {
|
|
1410
|
+
batch_group.Append(current_batch);
|
|
1411
|
+
group_lengths.Append(current_len);
|
|
1412
|
+
group_thetas.Append(current_theta);
|
|
1413
|
+
current_batch.SetSize(0);
|
|
1414
|
+
}
|
|
1415
|
+
|
|
1416
|
+
current_len = record.len;
|
|
1417
|
+
current_theta = record.theta;
|
|
1418
|
+
current_batch.Append(&record);
|
|
1419
|
+
}
|
|
1420
|
+
|
|
1421
|
+
if (current_batch.Size() > 0) {
|
|
1366
1422
|
batch_group.Append(current_batch);
|
|
1367
1423
|
group_lengths.Append(current_len);
|
|
1368
1424
|
group_thetas.Append(current_theta);
|
|
1369
|
-
|
|
1370
|
-
}
|
|
1425
|
+
}
|
|
1371
1426
|
|
|
1372
|
-
|
|
1373
|
-
|
|
1374
|
-
|
|
1427
|
+
{
|
|
1428
|
+
RegionTimer rS2S(tS2S);
|
|
1429
|
+
// ParallelFor(batch_group.Size(), [&](int i) {
|
|
1430
|
+
for (int i = 0; i < batch_group.Size(); i++){
|
|
1431
|
+
// *testout << "Processing batch " << i << " of size " << batch_group[i].Size() << ", with len = " << group_lengths[i] << ", theta = " << group_thetas[i] << endl;
|
|
1432
|
+
int chunk_size = 24;
|
|
1433
|
+
if (batch_group[i].Size() < chunk_size)
|
|
1434
|
+
ProcessBatchSS(batch_group[i], group_lengths[i], group_thetas[i]);
|
|
1435
|
+
else
|
|
1436
|
+
ParallelForRange(IntRange(batch_group[i].Size()), [&](IntRange range) {
|
|
1437
|
+
auto sub_batch = batch_group[i].Range(range.First(), range.Next());
|
|
1438
|
+
ProcessBatchSS(sub_batch, group_lengths[i], group_thetas[i]);
|
|
1439
|
+
}, TasksPerThread(4));
|
|
1440
|
+
}
|
|
1441
|
+
}
|
|
1375
1442
|
}
|
|
1376
|
-
if (current_batch.Size() > 0) {
|
|
1377
|
-
batch_group.Append(current_batch);
|
|
1378
|
-
group_lengths.Append(current_len);
|
|
1379
|
-
group_thetas.Append(current_theta);
|
|
1380
|
-
}
|
|
1381
1443
|
|
|
1382
|
-
|
|
1383
|
-
|
|
1384
|
-
// ParallelFor(batch_group.Size(), [&](int i) {
|
|
1385
|
-
for (int i = 0; i < batch_group.Size(); i++){
|
|
1386
|
-
// *testout << "Processing batch " << i << " of size " << batch_group[i].Size() << ", with len = " << group_lengths[i] << ", theta = " << group_thetas[i] << endl;
|
|
1387
|
-
int chunk_size = 24;
|
|
1388
|
-
if (batch_group[i].Size() < chunk_size)
|
|
1389
|
-
ProcessBatch(batch_group[i], group_lengths[i], group_thetas[i]);
|
|
1390
|
-
else
|
|
1391
|
-
ParallelForRange(IntRange(batch_group[i].Size()), [&](IntRange range) {
|
|
1392
|
-
auto sub_batch = batch_group[i].Range(range.First(), range.Next());
|
|
1393
|
-
ProcessBatch(sub_batch, group_lengths[i], group_thetas[i]);
|
|
1394
|
-
}, TasksPerThread(4));
|
|
1395
|
-
}
|
|
1396
|
-
}
|
|
1397
|
-
}
|
|
1398
|
-
|
|
1444
|
+
// cout << "have singular:" << endl;
|
|
1445
|
+
// PrintStatistics (cout);
|
|
1399
1446
|
havemp = true;
|
|
1400
1447
|
}
|
|
1401
1448
|
|
|
@@ -1407,6 +1454,41 @@ namespace ngsbem
|
|
|
1407
1454
|
return root.Evaluate(p);
|
|
1408
1455
|
}
|
|
1409
1456
|
|
|
1457
|
+
|
|
1458
|
+
void PrintStatistics (ostream & ost)
|
|
1459
|
+
{
|
|
1460
|
+
int levels = 0;
|
|
1461
|
+
int cnt = 0;
|
|
1462
|
+
root.TraverseTree( [&](Node & node) {
|
|
1463
|
+
levels = max(levels, node.level);
|
|
1464
|
+
cnt++;
|
|
1465
|
+
});
|
|
1466
|
+
ost << "levels: " << levels << endl;
|
|
1467
|
+
ost << "nodes: " << cnt << endl;
|
|
1468
|
+
|
|
1469
|
+
Array<int> num_on_level(levels+1);
|
|
1470
|
+
Array<int> order_on_level(levels+1);
|
|
1471
|
+
Array<size_t> coefs_on_level(levels+1);
|
|
1472
|
+
num_on_level = 0;
|
|
1473
|
+
order_on_level = 0;
|
|
1474
|
+
root.TraverseTree( [&](Node & node) {
|
|
1475
|
+
num_on_level[node.level]++;
|
|
1476
|
+
order_on_level[node.level] = max(order_on_level[node.level],node.mp.Order());
|
|
1477
|
+
coefs_on_level[node.level] += node.mp.SH().Coefs().Size();
|
|
1478
|
+
});
|
|
1479
|
+
|
|
1480
|
+
cout << "num on level" << endl;
|
|
1481
|
+
for (int i = 0; i < num_on_level.Size(); i++)
|
|
1482
|
+
cout << i << ": " << num_on_level[i] << ", order = " << order_on_level[i] << ", coefs " << coefs_on_level[i] << endl;
|
|
1483
|
+
|
|
1484
|
+
size_t totcoefs = 0;
|
|
1485
|
+
for (auto n : coefs_on_level)
|
|
1486
|
+
totcoefs += n;
|
|
1487
|
+
cout << "total mem in coefs: " << sizeof(entry_type)*totcoefs / sqr(1024) << " MB" << endl;
|
|
1488
|
+
}
|
|
1489
|
+
|
|
1490
|
+
|
|
1491
|
+
|
|
1410
1492
|
template <typename entry_type2>
|
|
1411
1493
|
friend class RegularMLExpansion;
|
|
1412
1494
|
};
|
|
@@ -1420,6 +1502,9 @@ namespace ngsbem
|
|
|
1420
1502
|
}
|
|
1421
1503
|
|
|
1422
1504
|
|
|
1505
|
+
// *********************************** Regular multilevel Expansion
|
|
1506
|
+
|
|
1507
|
+
|
|
1423
1508
|
template <typename elem_type=Complex>
|
|
1424
1509
|
class NGS_DLL_HEADER RegularMLExpansion
|
|
1425
1510
|
{
|
|
@@ -1456,28 +1541,28 @@ namespace ngsbem
|
|
|
1456
1541
|
}
|
|
1457
1542
|
}
|
|
1458
1543
|
else if (N <= 3) {
|
|
1459
|
-
|
|
1544
|
+
ProcessVectorizedBatchRS<3, vec_length>(batch, len, theta);
|
|
1460
1545
|
}
|
|
1461
1546
|
else if (N <= 4) {
|
|
1462
|
-
|
|
1547
|
+
ProcessVectorizedBatchRS<4, vec_length>(batch, len, theta);
|
|
1463
1548
|
}
|
|
1464
1549
|
else if (N <= 6) {
|
|
1465
|
-
|
|
1550
|
+
ProcessVectorizedBatchRS<6, vec_length>(batch, len, theta);
|
|
1466
1551
|
}
|
|
1467
1552
|
else if (N <= 12) {
|
|
1468
|
-
|
|
1553
|
+
ProcessVectorizedBatchRS<12, vec_length>(batch, len, theta);
|
|
1469
1554
|
}
|
|
1470
1555
|
else if (N <= 24) {
|
|
1471
|
-
|
|
1556
|
+
ProcessVectorizedBatchRS<24, vec_length>(batch, len, theta);
|
|
1472
1557
|
}
|
|
1473
1558
|
else if (N <= 48) {
|
|
1474
|
-
|
|
1559
|
+
ProcessVectorizedBatchRS<48, vec_length>(batch, len, theta);
|
|
1475
1560
|
}
|
|
1476
1561
|
else if (N <= 96) {
|
|
1477
|
-
|
|
1562
|
+
ProcessVectorizedBatchRS<96, vec_length>(batch, len, theta);
|
|
1478
1563
|
}
|
|
1479
1564
|
else if (N <= 192) {
|
|
1480
|
-
|
|
1565
|
+
ProcessVectorizedBatchRS<192, vec_length>(batch, len, theta);
|
|
1481
1566
|
}
|
|
1482
1567
|
else {
|
|
1483
1568
|
// Split large batches
|
|
@@ -1509,7 +1594,7 @@ namespace ngsbem
|
|
|
1509
1594
|
|
|
1510
1595
|
|
|
1511
1596
|
template<int N, int vec_length>
|
|
1512
|
-
static void
|
|
1597
|
+
static void ProcessVectorizedBatchRS(FlatArray<RecordingRS*> batch, double len, double theta) {
|
|
1513
1598
|
|
|
1514
1599
|
// static Timer t("ProcessVectorizedBatch, N = "+ToString(N) + ", vec_len = " + ToString(vec_length));
|
|
1515
1600
|
// RegionTimer reg(t, batch[0]->mpS->SH().Order());
|
|
@@ -1570,6 +1655,7 @@ namespace ngsbem
|
|
|
1570
1655
|
std::array<unique_ptr<Node>,8> childs;
|
|
1571
1656
|
SphericalExpansion<Regular,elem_type> mp;
|
|
1572
1657
|
Array<Vec<3>> targets;
|
|
1658
|
+
Array<tuple<Vec<3>,double>> vol_targets;
|
|
1573
1659
|
int total_targets;
|
|
1574
1660
|
std::mutex node_mutex;
|
|
1575
1661
|
atomic<bool> have_childs{false};
|
|
@@ -1577,15 +1663,22 @@ namespace ngsbem
|
|
|
1577
1663
|
Array<const typename SingularMLExpansion<elem_type>::Node*> singnodes;
|
|
1578
1664
|
|
|
1579
1665
|
Node (Vec<3> acenter, double ar, int alevel, double kappa)
|
|
1580
|
-
: center(acenter), r(ar), level(alevel),
|
|
1666
|
+
: center(acenter), r(ar), level(alevel),
|
|
1667
|
+
// mp(MPOrder(ar*kappa), kappa, ar) // 1.0/min(1.0, 0.25*r*kappa))
|
|
1668
|
+
mp(-1, kappa, ar)
|
|
1581
1669
|
// : center(acenter), r(ar), level(alevel), mp(MPOrder(ar*kappa), kappa, 1.0)
|
|
1582
1670
|
{
|
|
1583
1671
|
if (level < nodes_on_level.Size())
|
|
1584
1672
|
nodes_on_level[level]++;
|
|
1585
1673
|
}
|
|
1586
1674
|
|
|
1587
|
-
|
|
1588
|
-
|
|
1675
|
+
void Allocate()
|
|
1676
|
+
{
|
|
1677
|
+
mp = SphericalExpansion<Regular,elem_type>(MPOrder(r*mp.Kappa()), mp.Kappa(), r);
|
|
1678
|
+
}
|
|
1679
|
+
|
|
1680
|
+
|
|
1681
|
+
void CreateChilds(bool allocate = false)
|
|
1589
1682
|
{
|
|
1590
1683
|
if (childs[0]) throw Exception("have already childs");
|
|
1591
1684
|
// create children nodes:
|
|
@@ -1596,6 +1689,8 @@ namespace ngsbem
|
|
|
1596
1689
|
cc(1) += (i&2) ? r/2 : -r/2;
|
|
1597
1690
|
cc(2) += (i&4) ? r/2 : -r/2;
|
|
1598
1691
|
childs[i] = make_unique<Node> (cc, r/2, level+1, mp.Kappa());
|
|
1692
|
+
if (allocate)
|
|
1693
|
+
childs[i] -> Allocate();
|
|
1599
1694
|
}
|
|
1600
1695
|
have_childs = true;
|
|
1601
1696
|
}
|
|
@@ -1648,7 +1743,7 @@ namespace ngsbem
|
|
|
1648
1743
|
if (allow_refine)
|
|
1649
1744
|
{
|
|
1650
1745
|
if (!childs[0])
|
|
1651
|
-
CreateChilds();
|
|
1746
|
+
CreateChilds(true);
|
|
1652
1747
|
|
|
1653
1748
|
for (auto & ch : childs)
|
|
1654
1749
|
ch -> AddSingularNode (singnode, allow_refine, recording);
|
|
@@ -1668,7 +1763,7 @@ namespace ngsbem
|
|
|
1668
1763
|
childs[nr] -> AddSingularNode (singnode, allow_refine, recording);
|
|
1669
1764
|
});
|
|
1670
1765
|
|
|
1671
|
-
if (targets.Size())
|
|
1766
|
+
if (targets.Size()+vol_targets.Size())
|
|
1672
1767
|
singnodes.Append(&singnode);
|
|
1673
1768
|
}
|
|
1674
1769
|
}
|
|
@@ -1683,7 +1778,7 @@ namespace ngsbem
|
|
|
1683
1778
|
{
|
|
1684
1779
|
if (allow_refine)
|
|
1685
1780
|
if (mp.Order() > 30 && !childs[0])
|
|
1686
|
-
CreateChilds();
|
|
1781
|
+
CreateChilds(allow_refine);
|
|
1687
1782
|
|
|
1688
1783
|
if (childs[0])
|
|
1689
1784
|
{
|
|
@@ -1754,6 +1849,14 @@ namespace ngsbem
|
|
|
1754
1849
|
return sum;
|
|
1755
1850
|
}
|
|
1756
1851
|
|
|
1852
|
+
void TraverseTree (const std::function<void(Node&)> & func)
|
|
1853
|
+
{
|
|
1854
|
+
func(*this);
|
|
1855
|
+
for (auto & child : childs)
|
|
1856
|
+
if (child)
|
|
1857
|
+
child->TraverseTree(func);
|
|
1858
|
+
}
|
|
1859
|
+
|
|
1757
1860
|
double Norm() const
|
|
1758
1861
|
{
|
|
1759
1862
|
double norm = L2Norm(mp.SH().Coefs());
|
|
@@ -1771,17 +1874,23 @@ namespace ngsbem
|
|
|
1771
1874
|
num += ch->NumCoefficients();
|
|
1772
1875
|
return num;
|
|
1773
1876
|
}
|
|
1774
|
-
|
|
1877
|
+
|
|
1878
|
+
int GetChildNum (Vec<3> x) const
|
|
1879
|
+
{
|
|
1880
|
+
int childnum = 0;
|
|
1881
|
+
if (x(0) > center(0)) childnum += 1;
|
|
1882
|
+
if (x(1) > center(1)) childnum += 2;
|
|
1883
|
+
if (x(2) > center(2)) childnum += 4;
|
|
1884
|
+
return childnum;
|
|
1885
|
+
}
|
|
1886
|
+
|
|
1775
1887
|
void AddTarget (Vec<3> x)
|
|
1776
1888
|
{
|
|
1777
1889
|
// if (childs[0])
|
|
1778
1890
|
if (have_childs) // quick check without locking
|
|
1779
1891
|
{
|
|
1780
1892
|
// directly send to childs:
|
|
1781
|
-
int childnum =
|
|
1782
|
-
if (x(0) > center(0)) childnum += 1;
|
|
1783
|
-
if (x(1) > center(1)) childnum += 2;
|
|
1784
|
-
if (x(2) > center(2)) childnum += 4;
|
|
1893
|
+
int childnum = GetChildNum(x);
|
|
1785
1894
|
childs[childnum] -> AddTarget( x );
|
|
1786
1895
|
return;
|
|
1787
1896
|
}
|
|
@@ -1791,32 +1900,74 @@ namespace ngsbem
|
|
|
1791
1900
|
if (have_childs) // test again after locking
|
|
1792
1901
|
{
|
|
1793
1902
|
// directly send to childs:
|
|
1794
|
-
int childnum =
|
|
1795
|
-
if (x(0) > center(0)) childnum += 1;
|
|
1796
|
-
if (x(1) > center(1)) childnum += 2;
|
|
1797
|
-
if (x(2) > center(2)) childnum += 4;
|
|
1903
|
+
int childnum = GetChildNum(x);
|
|
1798
1904
|
childs[childnum] -> AddTarget(x);
|
|
1799
1905
|
return;
|
|
1800
1906
|
}
|
|
1801
1907
|
|
|
1802
|
-
|
|
1803
1908
|
targets.Append( x );
|
|
1804
1909
|
|
|
1805
1910
|
// if (r*mp.Kappa() < 1e-8) return;
|
|
1806
1911
|
if (level > 20) return;
|
|
1807
|
-
if (targets.Size() < maxdirect && r*mp.Kappa() <
|
|
1912
|
+
if (targets.Size() < maxdirect && r*mp.Kappa() < 5)
|
|
1913
|
+
return;
|
|
1914
|
+
|
|
1915
|
+
CreateChilds();
|
|
1916
|
+
|
|
1917
|
+
for (auto t : targets)
|
|
1918
|
+
AddTarget (t);
|
|
1919
|
+
for (auto [x,r] : vol_targets)
|
|
1920
|
+
AddVolumeTarget (x,r);
|
|
1921
|
+
|
|
1922
|
+
targets.SetSize0();
|
|
1923
|
+
vol_targets.SetSize0();
|
|
1924
|
+
}
|
|
1925
|
+
|
|
1926
|
+
|
|
1927
|
+
void AddVolumeTarget (Vec<3> x, double tr)
|
|
1928
|
+
{
|
|
1929
|
+
if (MaxNorm(x-center) > r+tr) return;
|
|
1930
|
+
|
|
1931
|
+
if (have_childs)
|
|
1932
|
+
{
|
|
1933
|
+
for (auto & child : childs)
|
|
1934
|
+
child->AddVolumeTarget(x, tr);
|
|
1935
|
+
return;
|
|
1936
|
+
}
|
|
1937
|
+
|
|
1938
|
+
|
|
1939
|
+
lock_guard<mutex> guard(node_mutex);
|
|
1940
|
+
|
|
1941
|
+
if (have_childs)
|
|
1942
|
+
{
|
|
1943
|
+
for (auto & child : childs)
|
|
1944
|
+
child->AddVolumeTarget(x, tr);
|
|
1945
|
+
return;
|
|
1946
|
+
}
|
|
1947
|
+
|
|
1948
|
+
|
|
1949
|
+
vol_targets.Append (tuple(x,tr));
|
|
1950
|
+
|
|
1951
|
+
if (level > 20) return;
|
|
1952
|
+
if (vol_targets.Size() < maxdirect && (r*mp.Kappa() < 5))
|
|
1808
1953
|
return;
|
|
1809
1954
|
|
|
1810
1955
|
CreateChilds();
|
|
1811
1956
|
|
|
1812
1957
|
for (auto t : targets)
|
|
1813
1958
|
AddTarget (t);
|
|
1959
|
+
for (auto [x,r] : vol_targets)
|
|
1960
|
+
AddVolumeTarget (x,r);
|
|
1961
|
+
|
|
1814
1962
|
targets.SetSize0();
|
|
1963
|
+
vol_targets.SetSize0();
|
|
1815
1964
|
}
|
|
1816
1965
|
|
|
1966
|
+
|
|
1967
|
+
|
|
1817
1968
|
void CalcTotalTargets()
|
|
1818
1969
|
{
|
|
1819
|
-
total_targets = targets.Size();
|
|
1970
|
+
total_targets = targets.Size() + vol_targets.Size();
|
|
1820
1971
|
for (auto & child : childs)
|
|
1821
1972
|
if (child)
|
|
1822
1973
|
{
|
|
@@ -1838,6 +1989,19 @@ namespace ngsbem
|
|
|
1838
1989
|
if (total_targets == 0)
|
|
1839
1990
|
mp = SphericalExpansion<Regular,elem_type>(-1, mp.Kappa(),1.);
|
|
1840
1991
|
}
|
|
1992
|
+
|
|
1993
|
+
void AllocateMemory()
|
|
1994
|
+
{
|
|
1995
|
+
for (auto & child : childs)
|
|
1996
|
+
if (child)
|
|
1997
|
+
child->AllocateMemory();
|
|
1998
|
+
|
|
1999
|
+
if (total_targets > 0)
|
|
2000
|
+
Allocate();
|
|
2001
|
+
// mp = SphericalExpansion<Regular,elem_type>(MPOrder(r*mp.Kappa()), mp.Kappa(), r); // -1, mp.Kappa(),1.);
|
|
2002
|
+
}
|
|
2003
|
+
|
|
2004
|
+
|
|
1841
2005
|
|
|
1842
2006
|
|
|
1843
2007
|
void Print (ostream & ost, size_t childnr = -1) const
|
|
@@ -1863,7 +2027,8 @@ namespace ngsbem
|
|
|
1863
2027
|
: root(center, r, 0, asingmp->Kappa()), singmp(asingmp)
|
|
1864
2028
|
{
|
|
1865
2029
|
if (!singmp->havemp) throw Exception("first call Calc for singular MP");
|
|
1866
|
-
|
|
2030
|
+
root.Allocate();
|
|
2031
|
+
|
|
1867
2032
|
nodes_on_level = 0;
|
|
1868
2033
|
nodes_on_level[0] = 1;
|
|
1869
2034
|
{
|
|
@@ -1901,73 +2066,98 @@ namespace ngsbem
|
|
|
1901
2066
|
root.AddTarget (t);
|
|
1902
2067
|
}
|
|
1903
2068
|
|
|
2069
|
+
void AddVolumeTarget (Vec<3> t, double r)
|
|
2070
|
+
{
|
|
2071
|
+
root.AddVolumeTarget (t, r);
|
|
2072
|
+
}
|
|
2073
|
+
|
|
1904
2074
|
void CalcMP(shared_ptr<SingularMLExpansion<elem_type>> asingmp, bool onlytargets = true)
|
|
1905
2075
|
{
|
|
1906
2076
|
static Timer t("mptool regular MLMP"); RegionTimer rg(t);
|
|
2077
|
+
static Timer tremove("removeempty");
|
|
1907
2078
|
static Timer trec("mptool regular MLMP - recording");
|
|
1908
2079
|
static Timer tsort("mptool regular MLMP - sort");
|
|
1909
2080
|
|
|
1910
2081
|
singmp = asingmp;
|
|
1911
2082
|
|
|
2083
|
+
|
|
1912
2084
|
root.CalcTotalTargets();
|
|
2085
|
+
// cout << "before remove empty trees:" << endl;
|
|
2086
|
+
// PrintStatistics(cout);
|
|
2087
|
+
|
|
2088
|
+
/*
|
|
2089
|
+
tremove.Start();
|
|
1913
2090
|
if (onlytargets)
|
|
1914
2091
|
root.RemoveEmptyTrees();
|
|
1915
|
-
|
|
2092
|
+
tremove.Stop();
|
|
2093
|
+
*/
|
|
1916
2094
|
|
|
1917
|
-
|
|
2095
|
+
root.AllocateMemory();
|
|
1918
2096
|
|
|
1919
|
-
//
|
|
1920
|
-
|
|
1921
|
-
|
|
1922
|
-
|
|
1923
|
-
|
|
1924
|
-
|
|
1925
|
-
|
|
1926
|
-
|
|
1927
|
-
{
|
|
1928
|
-
RegionTimer reg(tsort);
|
|
1929
|
-
QuickSort (recording, [] (auto & a, auto & b)
|
|
1930
|
-
{
|
|
1931
|
-
if (a.len < (1-1e-8) * b.len) return true;
|
|
1932
|
-
if (a.len > (1+1e-8) * b.len) return false;
|
|
1933
|
-
return a.theta < b.theta;
|
|
1934
|
-
});
|
|
1935
|
-
}
|
|
1936
|
-
|
|
1937
|
-
double current_len = -1e100;
|
|
1938
|
-
double current_theta = -1e100;
|
|
1939
|
-
Array<RecordingRS*> current_batch;
|
|
1940
|
-
Array<Array<RecordingRS*>> batch_group;
|
|
1941
|
-
Array<double> group_lengths;
|
|
1942
|
-
Array<double> group_thetas;
|
|
1943
|
-
for (auto & record : recording)
|
|
2097
|
+
// cout << "after allocating regular:" << endl;
|
|
2098
|
+
// PrintStatistics(cout);
|
|
2099
|
+
|
|
2100
|
+
// cout << "starting S-R converion" << endl;
|
|
2101
|
+
// PrintStatistics(cout);
|
|
2102
|
+
|
|
2103
|
+
|
|
2104
|
+
if constexpr (false)
|
|
1944
2105
|
{
|
|
1945
|
-
|
|
1946
|
-
|
|
1947
|
-
|
|
2106
|
+
root.AddSingularNode(singmp->root, !onlytargets, nullptr);
|
|
2107
|
+
}
|
|
2108
|
+
else
|
|
2109
|
+
{ // use recording
|
|
2110
|
+
Array<RecordingRS> recording;
|
|
2111
|
+
{
|
|
2112
|
+
RegionTimer rrec(trec);
|
|
2113
|
+
root.AddSingularNode(singmp->root, !onlytargets, &recording);
|
|
2114
|
+
}
|
|
2115
|
+
|
|
2116
|
+
// cout << "recorded: " << recording.Size() << endl;
|
|
2117
|
+
{
|
|
2118
|
+
RegionTimer reg(tsort);
|
|
2119
|
+
QuickSort (recording, [] (auto & a, auto & b)
|
|
2120
|
+
{
|
|
2121
|
+
if (a.len < (1-1e-8) * b.len) return true;
|
|
2122
|
+
if (a.len > (1+1e-8) * b.len) return false;
|
|
2123
|
+
return a.theta < b.theta;
|
|
2124
|
+
});
|
|
2125
|
+
}
|
|
2126
|
+
|
|
2127
|
+
double current_len = -1e100;
|
|
2128
|
+
double current_theta = -1e100;
|
|
2129
|
+
Array<RecordingRS*> current_batch;
|
|
2130
|
+
Array<Array<RecordingRS*>> batch_group;
|
|
2131
|
+
Array<double> group_lengths;
|
|
2132
|
+
Array<double> group_thetas;
|
|
2133
|
+
for (auto & record : recording)
|
|
2134
|
+
{
|
|
2135
|
+
bool len_changed = fabs(record.len - current_len) > 1e-8;
|
|
2136
|
+
bool theta_changed = fabs(record.theta - current_theta) > 1e-8;
|
|
2137
|
+
if ((len_changed || theta_changed) && current_batch.Size() > 0) {
|
|
2138
|
+
// ProcessBatch(current_batch, current_len, current_theta);
|
|
2139
|
+
batch_group.Append(current_batch);
|
|
2140
|
+
group_lengths.Append(current_len);
|
|
2141
|
+
group_thetas.Append(current_theta);
|
|
2142
|
+
current_batch.SetSize(0);
|
|
2143
|
+
}
|
|
2144
|
+
|
|
2145
|
+
current_len = record.len;
|
|
2146
|
+
current_theta = record.theta;
|
|
2147
|
+
current_batch.Append(&record);
|
|
2148
|
+
}
|
|
2149
|
+
if (current_batch.Size() > 0) {
|
|
1948
2150
|
// ProcessBatch(current_batch, current_len, current_theta);
|
|
1949
2151
|
batch_group.Append(current_batch);
|
|
1950
2152
|
group_lengths.Append(current_len);
|
|
1951
2153
|
group_thetas.Append(current_theta);
|
|
1952
|
-
|
|
1953
|
-
|
|
1954
|
-
|
|
1955
|
-
|
|
1956
|
-
|
|
1957
|
-
current_batch.Append(&record);
|
|
2154
|
+
}
|
|
2155
|
+
|
|
2156
|
+
ParallelFor(batch_group.Size(), [&](int i) {
|
|
2157
|
+
ProcessBatchRS(batch_group[i], group_lengths[i], group_thetas[i]);
|
|
2158
|
+
}, TasksPerThread(4));
|
|
1958
2159
|
}
|
|
1959
|
-
|
|
1960
|
-
// ProcessBatch(current_batch, current_len, current_theta);
|
|
1961
|
-
batch_group.Append(current_batch);
|
|
1962
|
-
group_lengths.Append(current_len);
|
|
1963
|
-
group_thetas.Append(current_theta);
|
|
1964
|
-
}
|
|
1965
|
-
|
|
1966
|
-
ParallelFor(batch_group.Size(), [&](int i) {
|
|
1967
|
-
ProcessBatchRS(batch_group[i], group_lengths[i], group_thetas[i]);
|
|
1968
|
-
}, TasksPerThread(4));
|
|
1969
|
-
// */
|
|
1970
|
-
|
|
2160
|
+
|
|
1971
2161
|
|
|
1972
2162
|
/*
|
|
1973
2163
|
int maxlevel = 0;
|
|
@@ -1978,10 +2168,49 @@ namespace ngsbem
|
|
|
1978
2168
|
cout << "reg " << i << ": " << RegularMLExpansion::nodes_on_level[i] << endl;
|
|
1979
2169
|
*/
|
|
1980
2170
|
|
|
2171
|
+
// cout << "starting R-R converion" << endl;
|
|
2172
|
+
// PrintStatistics(cout);
|
|
2173
|
+
|
|
1981
2174
|
static Timer tloc("mptool regular localize expansion"); RegionTimer rloc(tloc);
|
|
1982
2175
|
root.LocalizeExpansion(!onlytargets);
|
|
2176
|
+
|
|
2177
|
+
|
|
2178
|
+
// cout << "R-R conversion done" << endl;
|
|
2179
|
+
// PrintStatistics(cout);
|
|
1983
2180
|
}
|
|
1984
2181
|
|
|
2182
|
+
void PrintStatistics (ostream & ost)
|
|
2183
|
+
{
|
|
2184
|
+
int levels = 0;
|
|
2185
|
+
int cnt = 0;
|
|
2186
|
+
root.TraverseTree( [&](Node & node) {
|
|
2187
|
+
levels = max(levels, node.level);
|
|
2188
|
+
cnt++;
|
|
2189
|
+
});
|
|
2190
|
+
ost << "levels: " << levels << endl;
|
|
2191
|
+
ost << "nodes: " << cnt << endl;
|
|
2192
|
+
|
|
2193
|
+
Array<int> num_on_level(levels+1);
|
|
2194
|
+
Array<int> order_on_level(levels+1);
|
|
2195
|
+
Array<size_t> coefs_on_level(levels+1);
|
|
2196
|
+
num_on_level = 0;
|
|
2197
|
+
order_on_level = 0;
|
|
2198
|
+
root.TraverseTree( [&](Node & node) {
|
|
2199
|
+
num_on_level[node.level]++;
|
|
2200
|
+
order_on_level[node.level] = max(order_on_level[node.level],node.mp.Order());
|
|
2201
|
+
coefs_on_level[node.level] += node.mp.SH().Coefs().Size();
|
|
2202
|
+
});
|
|
2203
|
+
|
|
2204
|
+
cout << "num on level" << endl;
|
|
2205
|
+
for (int i = 0; i < num_on_level.Size(); i++)
|
|
2206
|
+
cout << i << ": " << num_on_level[i] << ", order = " << order_on_level[i] << ", coefs " << coefs_on_level[i] << endl;
|
|
2207
|
+
|
|
2208
|
+
size_t totcoefs = 0;
|
|
2209
|
+
for (auto n : coefs_on_level)
|
|
2210
|
+
totcoefs += n;
|
|
2211
|
+
cout << "total mem in coefs: " << sizeof(elem_type)*totcoefs / sqr(1024) << " MB" << endl;
|
|
2212
|
+
}
|
|
2213
|
+
|
|
1985
2214
|
void Print (ostream & ost) const
|
|
1986
2215
|
{
|
|
1987
2216
|
root.Print(ost);
|
|
@@ -2001,6 +2230,7 @@ namespace ngsbem
|
|
|
2001
2230
|
{
|
|
2002
2231
|
// static Timer t("mptool Eval MLMP regular"); RegionTimer r(t);
|
|
2003
2232
|
// if (L2Norm(p-root.center) > root.r) return elem_type{0.0};
|
|
2233
|
+
|
|
2004
2234
|
if (MaxNorm(p-root.center) > root.r)
|
|
2005
2235
|
return singmp->Evaluate(p);
|
|
2006
2236
|
return root.Evaluate(p);
|