ngsolve 6.2.2506.post33.dev0__cp310-cp310-macosx_10_15_universal2.whl → 6.2.2506.post45.dev0__cp310-cp310-macosx_10_15_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ngsolve might be problematic. Click here for more details.

Files changed (53) hide show
  1. netgen/include/expr.hpp +32 -1
  2. netgen/include/kernels.hpp +124 -4
  3. netgen/include/l2hofe.hpp +1 -0
  4. netgen/include/mptools.hpp +442 -212
  5. netgen/include/recursive_pol.hpp +2 -2
  6. netgen/include/sparsematrix.hpp +1 -1
  7. netgen/include/thdivfe_impl.hpp +1 -1
  8. netgen/include/vector.hpp +7 -2
  9. netgen/libngbla.dylib +0 -0
  10. netgen/libngcomp.dylib +0 -0
  11. netgen/libngla.dylib +0 -0
  12. netgen/libngsbem.dylib +0 -0
  13. netgen/libngstd.dylib +0 -0
  14. ngsolve/cmake/NGSolveConfig.cmake +1 -1
  15. ngsolve/config/config.py +5 -5
  16. {ngsolve-6.2.2506.post33.dev0.dist-info → ngsolve-6.2.2506.post45.dev0.dist-info}/METADATA +1 -1
  17. {ngsolve-6.2.2506.post33.dev0.dist-info → ngsolve-6.2.2506.post45.dev0.dist-info}/RECORD +53 -53
  18. {ngsolve-6.2.2506.post33.dev0.data → ngsolve-6.2.2506.post45.dev0.data}/data/Netgen.icns +0 -0
  19. {ngsolve-6.2.2506.post33.dev0.data → ngsolve-6.2.2506.post45.dev0.data}/data/bin/ngscxx +0 -0
  20. {ngsolve-6.2.2506.post33.dev0.data → ngsolve-6.2.2506.post45.dev0.data}/data/bin/ngsld +0 -0
  21. {ngsolve-6.2.2506.post33.dev0.data → ngsolve-6.2.2506.post45.dev0.data}/data/bin/ngsolve.tcl +0 -0
  22. {ngsolve-6.2.2506.post33.dev0.data → ngsolve-6.2.2506.post45.dev0.data}/data/bin/ngspy +0 -0
  23. {ngsolve-6.2.2506.post33.dev0.data → ngsolve-6.2.2506.post45.dev0.data}/data/share/ngsolve/beam.geo +0 -0
  24. {ngsolve-6.2.2506.post33.dev0.data → ngsolve-6.2.2506.post45.dev0.data}/data/share/ngsolve/beam.vol +0 -0
  25. {ngsolve-6.2.2506.post33.dev0.data → ngsolve-6.2.2506.post45.dev0.data}/data/share/ngsolve/chip.in2d +0 -0
  26. {ngsolve-6.2.2506.post33.dev0.data → ngsolve-6.2.2506.post45.dev0.data}/data/share/ngsolve/chip.vol +0 -0
  27. {ngsolve-6.2.2506.post33.dev0.data → ngsolve-6.2.2506.post45.dev0.data}/data/share/ngsolve/coil.geo +0 -0
  28. {ngsolve-6.2.2506.post33.dev0.data → ngsolve-6.2.2506.post45.dev0.data}/data/share/ngsolve/coil.vol +0 -0
  29. {ngsolve-6.2.2506.post33.dev0.data → ngsolve-6.2.2506.post45.dev0.data}/data/share/ngsolve/coilshield.geo +0 -0
  30. {ngsolve-6.2.2506.post33.dev0.data → ngsolve-6.2.2506.post45.dev0.data}/data/share/ngsolve/coilshield.vol +0 -0
  31. {ngsolve-6.2.2506.post33.dev0.data → ngsolve-6.2.2506.post45.dev0.data}/data/share/ngsolve/cube.geo +0 -0
  32. {ngsolve-6.2.2506.post33.dev0.data → ngsolve-6.2.2506.post45.dev0.data}/data/share/ngsolve/cube.vol +0 -0
  33. {ngsolve-6.2.2506.post33.dev0.data → ngsolve-6.2.2506.post45.dev0.data}/data/share/ngsolve/d10_DGdoubleglazing.pde +0 -0
  34. {ngsolve-6.2.2506.post33.dev0.data → ngsolve-6.2.2506.post45.dev0.data}/data/share/ngsolve/d11_chip_nitsche.pde +0 -0
  35. {ngsolve-6.2.2506.post33.dev0.data → ngsolve-6.2.2506.post45.dev0.data}/data/share/ngsolve/d1_square.pde +0 -0
  36. {ngsolve-6.2.2506.post33.dev0.data → ngsolve-6.2.2506.post45.dev0.data}/data/share/ngsolve/d2_chip.pde +0 -0
  37. {ngsolve-6.2.2506.post33.dev0.data → ngsolve-6.2.2506.post45.dev0.data}/data/share/ngsolve/d3_helmholtz.pde +0 -0
  38. {ngsolve-6.2.2506.post33.dev0.data → ngsolve-6.2.2506.post45.dev0.data}/data/share/ngsolve/d4_cube.pde +0 -0
  39. {ngsolve-6.2.2506.post33.dev0.data → ngsolve-6.2.2506.post45.dev0.data}/data/share/ngsolve/d5_beam.pde +0 -0
  40. {ngsolve-6.2.2506.post33.dev0.data → ngsolve-6.2.2506.post45.dev0.data}/data/share/ngsolve/d6_shaft.pde +0 -0
  41. {ngsolve-6.2.2506.post33.dev0.data → ngsolve-6.2.2506.post45.dev0.data}/data/share/ngsolve/d7_coil.pde +0 -0
  42. {ngsolve-6.2.2506.post33.dev0.data → ngsolve-6.2.2506.post45.dev0.data}/data/share/ngsolve/d8_coilshield.pde +0 -0
  43. {ngsolve-6.2.2506.post33.dev0.data → ngsolve-6.2.2506.post45.dev0.data}/data/share/ngsolve/d9_hybridDG.pde +0 -0
  44. {ngsolve-6.2.2506.post33.dev0.data → ngsolve-6.2.2506.post45.dev0.data}/data/share/ngsolve/doubleglazing.in2d +0 -0
  45. {ngsolve-6.2.2506.post33.dev0.data → ngsolve-6.2.2506.post45.dev0.data}/data/share/ngsolve/doubleglazing.vol +0 -0
  46. {ngsolve-6.2.2506.post33.dev0.data → ngsolve-6.2.2506.post45.dev0.data}/data/share/ngsolve/piezo2d40round4.vol.gz +0 -0
  47. {ngsolve-6.2.2506.post33.dev0.data → ngsolve-6.2.2506.post45.dev0.data}/data/share/ngsolve/shaft.geo +0 -0
  48. {ngsolve-6.2.2506.post33.dev0.data → ngsolve-6.2.2506.post45.dev0.data}/data/share/ngsolve/shaft.vol +0 -0
  49. {ngsolve-6.2.2506.post33.dev0.data → ngsolve-6.2.2506.post45.dev0.data}/data/share/ngsolve/square.in2d +0 -0
  50. {ngsolve-6.2.2506.post33.dev0.data → ngsolve-6.2.2506.post45.dev0.data}/data/share/ngsolve/square.vol +0 -0
  51. {ngsolve-6.2.2506.post33.dev0.dist-info → ngsolve-6.2.2506.post45.dev0.dist-info}/LICENSE +0 -0
  52. {ngsolve-6.2.2506.post33.dev0.dist-info → ngsolve-6.2.2506.post45.dev0.dist-info}/WHEEL +0 -0
  53. {ngsolve-6.2.2506.post33.dev0.dist-info → ngsolve-6.2.2506.post45.dev0.dist-info}/top_level.txt +0 -0
@@ -25,6 +25,7 @@ namespace ngsbem
25
25
 
26
26
  constexpr int FMM_SW = 4;
27
27
 
28
+
28
29
 
29
30
  // ************************ SIMD - creation (should end up in simd.hpp) *************
30
31
 
@@ -42,7 +43,19 @@ namespace ngsbem
42
43
  }
43
44
 
44
45
 
45
-
46
+ class NGS_DLL_HEADER PrecomputedSqrts
47
+ {
48
+ public:
49
+ Array<double> sqrt_int;
50
+ // Array<double> inv_sqrt_int;
51
+ Array<double> sqrt_n_np1; // sqrt(n*(n+1))
52
+ Array<double> inv_sqrt_2np1_2np3; // 1/sqrt( (2n+1)*(2n+3) )
53
+
54
+ PrecomputedSqrts();
55
+ };
56
+
57
+ extern NGS_DLL_HEADER PrecomputedSqrts presqrt;
58
+
46
59
 
47
60
 
48
61
 
@@ -189,18 +202,26 @@ namespace ngsbem
189
202
  void RotateY (double alpha, bool parallel = false);
190
203
 
191
204
 
205
+
192
206
  static double CalcAmn (int m, int n)
193
207
  {
194
208
  if (m < 0) m=-m;
195
209
  if (n < m) return 0;
196
- return sqrt( (n+1.0+m)*(n+1.0-m) / ( (2*n+1)*(2*n+3) ));
210
+
211
+ if (2*n+1 < presqrt.sqrt_int.Size())
212
+ return presqrt.sqrt_int[n+1+m]*presqrt.sqrt_int[n+1-m] * presqrt.inv_sqrt_2np1_2np3[n];
213
+ else
214
+ return sqrt( (n+1.0+m)*(n+1.0-m) / ( (2*n+1)*(2*n+3) ));
197
215
  }
198
216
 
199
217
  static double CalcBmn (int m, int n)
200
218
  {
201
219
  double sgn = (m >= 0) ? 1 : -1;
202
- if ( (m > n) || (-m > n) ) return 0;
203
- return sgn * sqrt( (n-m-1.0)*(n-m) / ( (2*n-1.0)*(2*n+1)));
220
+ if ( (m >= n) || (-m > n) ) return 0;
221
+ if (n <= presqrt.inv_sqrt_2np1_2np3.Size())
222
+ return sgn * presqrt.sqrt_n_np1[n-m-1] * presqrt.inv_sqrt_2np1_2np3[n-1];
223
+ else
224
+ return sgn * sqrt( (n-m-1.0)*(n-m) / ( (2*n-1.0)*(2*n+1)));
204
225
  }
205
226
 
206
227
  static double CalcDmn (int m, int n)
@@ -543,7 +564,7 @@ namespace ngsbem
543
564
  };
544
565
 
545
566
 
546
- static void ProcessBatch(FlatArray<RecordingSS*> batch, double len, double theta) {
567
+ static void ProcessBatchSS(FlatArray<RecordingSS*> batch, double len, double theta) {
547
568
  constexpr int vec_length = VecLength<entry_type>;
548
569
  int batch_size = batch.Size();
549
570
  int N = batch_size * vec_length;
@@ -555,42 +576,45 @@ namespace ngsbem
555
576
  }
556
577
  }
557
578
  else if (N <= 3) {
558
- ProcessVectorizedBatch<3, vec_length>(batch, len, theta);
579
+ ProcessVectorizedBatchSS<3, vec_length>(batch, len, theta);
559
580
  }
560
581
  else if (N <= 4) {
561
- ProcessVectorizedBatch<4, vec_length>(batch, len, theta);
582
+ ProcessVectorizedBatchSS<4, vec_length>(batch, len, theta);
562
583
  }
563
584
  else if (N <= 6) {
564
- ProcessVectorizedBatch<6, vec_length>(batch, len, theta);
585
+ ProcessVectorizedBatchSS<6, vec_length>(batch, len, theta);
565
586
  }
566
587
  else if (N <= 12) {
567
- ProcessVectorizedBatch<12, vec_length>(batch, len, theta);
588
+ ProcessVectorizedBatchSS<12, vec_length>(batch, len, theta);
568
589
  }
569
590
  else if (N <= 24) {
570
- ProcessVectorizedBatch<24, vec_length>(batch, len, theta);
591
+ ProcessVectorizedBatchSS<24, vec_length>(batch, len, theta);
571
592
  }
572
593
  else if (N <= 48) {
573
- ProcessVectorizedBatch<48, vec_length>(batch, len, theta);
594
+ ProcessVectorizedBatchSS<48, vec_length>(batch, len, theta);
574
595
  }
575
596
  else if (N <= 96) {
576
- ProcessVectorizedBatch<96, vec_length>(batch, len, theta);
597
+ ProcessVectorizedBatchSS<96, vec_length>(batch, len, theta);
577
598
  }
578
599
  else if (N <= 192) {
579
- ProcessVectorizedBatch<192, vec_length>(batch, len, theta);
600
+ ProcessVectorizedBatchSS<192, vec_length>(batch, len, theta);
580
601
  }
581
602
  else {
582
603
  // Split large batches
583
- ProcessBatch(batch.Range(0, 192 / vec_length), len, theta);
584
- ProcessBatch(batch.Range(192 / vec_length, batch_size), len, theta);
604
+ ProcessBatchSS(batch.Range(0, 192 / vec_length), len, theta);
605
+ ProcessBatchSS(batch.Range(192 / vec_length, batch_size), len, theta);
585
606
  }
586
607
  }
587
608
 
588
609
  template<int N, int vec_length>
589
- static void ProcessVectorizedBatch(FlatArray<RecordingSS*> batch, double len, double theta) {
610
+ static void ProcessVectorizedBatchSS(FlatArray<RecordingSS*> batch, double len, double theta) {
590
611
 
591
612
  // *testout << "Processing vectorized S->S batch of size " << batch.Size() << ", with N = " << N << ", vec_length = " << vec_length << ", len = " << len << ", theta = " << theta << endl;
592
- SphericalExpansion<Singular, Vec<N,Complex>> vec_source(batch[0]->mp_source->Order(), batch[0]->mp_source->Kappa(), batch[0]->mp_source->RTyp());
593
- SphericalExpansion<Singular, Vec<N,Complex>> vec_target(batch[0]->mp_target->Order(), batch[0]->mp_target->Kappa(), batch[0]->mp_target->RTyp());
613
+ double kappa = batch[0]->mp_source->Kappa();
614
+ int so = batch[0]->mp_source->Order();
615
+ int to = batch[0]->mp_target->Order();
616
+ SphericalExpansion<Singular, Vec<N,Complex>> vec_source(so, kappa, batch[0]->mp_source->RTyp());
617
+ SphericalExpansion<Singular, Vec<N,Complex>> vec_target(to, kappa, batch[0]->mp_target->RTyp());
594
618
 
595
619
  // Copy multipoles into vectorized multipole
596
620
  for (int i = 0; i < batch.Size(); i++)
@@ -688,10 +712,10 @@ namespace ngsbem
688
712
  for (auto [sp,ep,j,num] : currents)
689
713
  AddCurrent (sp,ep,j,num);
690
714
 
691
- charges.SetSize0();
692
- dipoles.SetSize0();
693
- chargedipoles.SetSize0();
694
- currents.SetSize0();
715
+ charges.DeleteAll();
716
+ dipoles.DeleteAll();
717
+ chargedipoles.DeleteAll();
718
+ currents.DeleteAll();
695
719
  }
696
720
 
697
721
 
@@ -718,7 +742,7 @@ namespace ngsbem
718
742
 
719
743
  // if (r*mp.Kappa() < 1e-8) return;
720
744
  if (level > 20) return;
721
- if (charges.Size() < maxdirect && r*mp.Kappa() < 1)
745
+ if (charges.Size() < maxdirect && r*mp.Kappa() < 5)
722
746
  return;
723
747
 
724
748
  SendSourcesToChilds();
@@ -746,8 +770,9 @@ namespace ngsbem
746
770
  }
747
771
 
748
772
  dipoles.Append (tuple{x,d,c});
749
-
750
- if (dipoles.Size() < maxdirect || r < 1e-8)
773
+
774
+ if (level > 20) return;
775
+ if (dipoles.Size() < maxdirect)
751
776
  return;
752
777
 
753
778
  SendSourcesToChilds();
@@ -797,7 +822,7 @@ namespace ngsbem
797
822
  Array<double> split;
798
823
  split.Append(0);
799
824
  for (int i = 0; i < 3; i++)
800
- if (sp(i) < center(i) != ep(i) < center(i))
825
+ if ((sp(i) < center(i)) != (ep(i) < center(i)))
801
826
  split += (center(i)-sp(i)) / (ep(i)-sp(i)); // segment cuts i-th coordinate plane
802
827
  split.Append(1);
803
828
  BubbleSort(split);
@@ -859,26 +884,30 @@ namespace ngsbem
859
884
  return sum;
860
885
  }
861
886
 
862
- {
863
- // static Timer t("fmm direct eval"); RegionTimer reg(t);
864
- // t.AddFlops (charges.Size());
865
887
  if (simd_charges.Size())
866
888
  {
867
889
  // static Timer t("mptool singmp, evaluate, simd charges"); RegionTimer r(t);
890
+ // t.AddFlops (charges.Size());
868
891
 
869
892
  simd_entry_type vsum{0.0};
870
893
  if (mp.Kappa() < 1e-12)
871
894
  {
872
895
  for (auto [x,c] : simd_charges)
873
896
  {
897
+ auto rho = L2Norm(p-x);
898
+ auto kernel = 1/(4*M_PI)/rho;
899
+ kernel = If(rho > 0.0, kernel, SIMD<double,FMM_SW>(0.0));
900
+ vsum += kernel * c;
901
+
902
+ /*
874
903
  auto rho2 = L2Norm2(p-x);
875
904
  auto kernel = (1/(4*M_PI)) * rsqrt(rho2);
876
905
  kernel = If(rho2 > 0.0, kernel, SIMD<double,FMM_SW>(0.0));
877
906
  vsum += kernel * c;
907
+ */
878
908
  }
879
909
  }
880
- else
881
- if (mp.Kappa() < 1e-8)
910
+ else if (mp.Kappa() < 1e-8)
882
911
  for (auto [x,c] : simd_charges)
883
912
  {
884
913
  auto rho = L2Norm(p-x);
@@ -895,7 +924,7 @@ namespace ngsbem
895
924
  kernel = If(rho > 0.0, kernel, SIMD<Complex,FMM_SW>(0.0));
896
925
  vsum += kernel * c;
897
926
  }
898
-
927
+
899
928
  sum += HSum(vsum);
900
929
  }
901
930
  else
@@ -911,43 +940,43 @@ namespace ngsbem
911
940
  if (double rho = L2Norm(p-x); rho > 0)
912
941
  sum += (1/(4*M_PI))*exp(Complex(0,rho*mp.Kappa())) / rho * c;
913
942
  }
914
- }
915
943
 
916
944
  if (simd_dipoles.Size())
917
- {
918
- // static Timer t("mptool singmp, evaluate, simd dipoles"); RegionTimer r(t);
919
-
920
- simd_entry_type vsum{0.0};
921
- for (auto [x,d,c] : simd_dipoles)
922
945
  {
923
- auto rho = L2Norm(p-x);
924
- auto drhodp = (1.0/rho) * (p-x);
925
- auto [si,co] = sincos(rho*mp.Kappa());
926
- auto dGdrho = (1/(4*M_PI))*SIMD<Complex,FMM_SW>(co,si) *
927
- (-1.0/(rho*rho) + SIMD<Complex,FMM_SW>(0, mp.Kappa())/rho);
928
- auto kernel = dGdrho * InnerProduct(drhodp, d);
929
- kernel = If(rho > 0.0, kernel, SIMD<Complex,FMM_SW>(0.0));
930
- vsum += kernel * c;
946
+ // static Timer t("mptool singmp, evaluate, simd dipoles"); RegionTimer r(t);
947
+
948
+ simd_entry_type vsum{0.0};
949
+ for (auto [x,d,c] : simd_dipoles)
950
+ {
951
+ auto rho = L2Norm(p-x);
952
+ auto drhodp = (1.0/rho) * (p-x);
953
+ auto [si,co] = sincos(rho*mp.Kappa());
954
+ auto dGdrho = (1/(4*M_PI))*SIMD<Complex,FMM_SW>(co,si) *
955
+ (-1.0/(rho*rho) + SIMD<Complex,FMM_SW>(0, mp.Kappa())/rho);
956
+ auto kernel = dGdrho * InnerProduct(drhodp, d);
957
+ kernel = If(rho > 0.0, kernel, SIMD<Complex,FMM_SW>(0.0));
958
+ vsum += kernel * c;
959
+ }
960
+ sum += HSum(vsum);
931
961
  }
932
- sum += HSum(vsum);
933
- }
934
962
  else
935
- {
936
- for (auto [x,d,c] : dipoles)
937
- if (double rho = L2Norm(p-x); rho > 0)
938
963
  {
939
- Vec<3> drhodp = 1.0/rho * (p-x);
940
- Complex dGdrho = (1/(4*M_PI))*exp(Complex(0,rho*mp.Kappa())) *
941
- (Complex(0, mp.Kappa())/rho - 1.0/sqr(rho));
942
- sum += dGdrho * InnerProduct(drhodp, d) * c;
964
+ for (auto [x,d,c] : dipoles)
965
+ if (double rho = L2Norm(p-x); rho > 0)
966
+ {
967
+ Vec<3> drhodp = 1.0/rho * (p-x);
968
+ Complex dGdrho = (1/(4*M_PI))*exp(Complex(0,rho*mp.Kappa())) *
969
+ (Complex(0, mp.Kappa())/rho - 1.0/sqr(rho));
970
+ sum += dGdrho * InnerProduct(drhodp, d) * c;
971
+ }
943
972
  }
944
- }
945
-
946
-
947
-
948
- if (simd_chargedipoles.Size())
973
+
974
+
975
+
976
+ if (simd_chargedipoles.Size())
949
977
  {
950
978
  // static Timer t("mptool singmp, evaluate, simd chargedipoles"); RegionTimer r(t);
979
+ // t.AddFlops (simd_chargedipoles.Size()*FMM_SW);
951
980
 
952
981
  simd_entry_type vsum{0.0};
953
982
  for (auto [x,c,d,c2] : simd_chargedipoles)
@@ -956,10 +985,10 @@ namespace ngsbem
956
985
  auto rhokappa = rho*mp.Kappa();
957
986
  auto invrho = If(rho>0.0, 1.0/rho, SIMD<double,FMM_SW>(0.0));
958
987
  auto [si,co] = sincos(rhokappa);
959
-
988
+
960
989
  auto kernelc = (1/(4*M_PI))*invrho*SIMD<Complex,FMM_SW>(co,si);
961
990
  vsum += kernelc * c;
962
-
991
+
963
992
  auto kernel =
964
993
  invrho*invrho * InnerProduct(p-x, d) *
965
994
  kernelc * SIMD<Complex,FMM_SW>(-1.0, rhokappa);
@@ -968,15 +997,16 @@ namespace ngsbem
968
997
  }
969
998
  sum += HSum(vsum);
970
999
  }
971
- else
1000
+ else
972
1001
  {
973
1002
  // static Timer t("mptool singmp, evaluate, chargedipoles"); RegionTimer r(t);
1003
+ // t.AddFlops (chargedipoles.Size());
974
1004
 
975
1005
  for (auto [x,c,d,c2] : chargedipoles)
976
1006
  if (double rho = L2Norm(p-x); rho > 0)
977
1007
  {
978
1008
  sum += (1/(4*M_PI))*exp(Complex(0,rho*mp.Kappa())) / rho * c;
979
-
1009
+
980
1010
  Vec<3> drhodp = 1.0/rho * (p-x);
981
1011
  Complex dGdrho = (1/(4*M_PI))*exp(Complex(0,rho*mp.Kappa())) *
982
1012
  (Complex(0, mp.Kappa())/rho - 1.0/sqr(rho));
@@ -1025,7 +1055,14 @@ namespace ngsbem
1025
1055
  }
1026
1056
 
1027
1057
  if (dipoles.Size())
1028
- throw Exception("EvaluateDeriv not implemented for dipoles in SingularMLExpansion");
1058
+ {
1059
+ static int cnt = 0;
1060
+ cnt++;
1061
+ if (cnt < 3)
1062
+ cout << "we know what we do - evaluateDeriv not implemented for dipoles in SingularMLExpansion" << endl;
1063
+ // return sum;
1064
+ // throw Exception("EvaluateDeriv not implemented for dipoles in SingularMLExpansion");
1065
+ }
1029
1066
  if (chargedipoles.Size())
1030
1067
  throw Exception("EvaluateDeriv not implemented for dipoles in SingularMLExpansion");
1031
1068
 
@@ -1134,8 +1171,6 @@ namespace ngsbem
1134
1171
  simd_chargedipoles[ii] = MakeSimd(di);
1135
1172
  }
1136
1173
 
1137
-
1138
-
1139
1174
 
1140
1175
  if (nodes_to_process)
1141
1176
  *nodes_to_process += this;
@@ -1224,6 +1259,14 @@ namespace ngsbem
1224
1259
  num += ch->NumCoefficients();
1225
1260
  return num;
1226
1261
  }
1262
+
1263
+ void TraverseTree (const std::function<void(Node&)> & func)
1264
+ {
1265
+ func(*this);
1266
+ for (auto & child : childs)
1267
+ if (child)
1268
+ child->TraverseTree(func);
1269
+ }
1227
1270
  };
1228
1271
 
1229
1272
  Node root;
@@ -1313,89 +1356,93 @@ namespace ngsbem
1313
1356
 
1314
1357
  root.CalcTotalSources();
1315
1358
 
1316
- if (false)
1359
+ if constexpr (false)
1317
1360
  // direct evaluation of S->S
1318
1361
  root.CalcMP(nullptr, nullptr);
1319
1362
  else
1320
1363
  {
1321
1364
 
1322
- Array<RecordingSS> recording;
1323
- Array<Node*> nodes_to_process;
1365
+ Array<RecordingSS> recording;
1366
+ Array<Node*> nodes_to_process;
1324
1367
 
1325
- {
1326
- RegionTimer reg(trec);
1327
- root.CalcMP(&recording, &nodes_to_process);
1328
- }
1368
+ {
1369
+ RegionTimer reg(trec);
1370
+ root.CalcMP(&recording, &nodes_to_process);
1371
+ }
1329
1372
 
1330
- {
1331
- RegionTimer rs2mp(ts2mp);
1332
- ParallelFor(nodes_to_process.Size(), [&](int i){
1333
- auto node = nodes_to_process[i];
1334
- for (auto [x,c]: node->charges)
1335
- node->mp.AddCharge(x-node->center, c);
1336
- for (auto [x,d,c]: node->dipoles)
1337
- node->mp.AddDipole(x-node->center, d, c);
1338
- for (auto [x,c,d,c2]: node->chargedipoles)
1339
- node->mp.AddChargeDipole(x-node->center, c, d, c2);
1340
- for (auto [sp,ep,j,num]: node->currents)
1341
- node->mp.AddCurrent(sp-node->center, ep-node->center, j, num);
1342
- }, TasksPerThread(4));
1343
- }
1344
-
1345
- {
1346
- RegionTimer reg(tsort);
1347
- QuickSort (recording, [] (auto & a, auto & b)
1348
- {
1349
- if (a.len < (1-1e-8) * b.len) return true;
1350
- if (a.len > (1+1e-8) * b.len) return false;
1351
- return a.theta < b.theta;
1352
- });
1353
- }
1373
+ {
1374
+ RegionTimer rs2mp(ts2mp);
1375
+ ParallelFor(nodes_to_process.Size(), [&](int i)
1376
+ {
1377
+ auto node = nodes_to_process[i];
1378
+ for (auto [x,c]: node->charges)
1379
+ node->mp.AddCharge(x-node->center, c);
1380
+ for (auto [x,d,c]: node->dipoles)
1381
+ node->mp.AddDipole(x-node->center, d, c);
1382
+ for (auto [x,c,d,c2]: node->chargedipoles)
1383
+ node->mp.AddChargeDipole(x-node->center, c, d, c2);
1384
+ for (auto [sp,ep,j,num]: node->currents)
1385
+ node->mp.AddCurrent(sp-node->center, ep-node->center, j, num);
1386
+ }, TasksPerThread(4));
1387
+ }
1388
+
1389
+ {
1390
+ RegionTimer reg(tsort);
1391
+ QuickSort (recording, [] (auto & a, auto & b)
1392
+ {
1393
+ if (a.len < (1-1e-8) * b.len) return true;
1394
+ if (a.len > (1+1e-8) * b.len) return false;
1395
+ return a.theta < b.theta;
1396
+ });
1397
+ }
1354
1398
 
1355
- double current_len = -1e100;
1356
- double current_theta = -1e100;
1357
- Array<RecordingSS*> current_batch;
1358
- Array<Array<RecordingSS*>> batch_group;
1359
- Array<double> group_lengths;
1360
- Array<double> group_thetas;
1361
- for (auto & record : recording)
1362
- {
1363
- bool len_changed = fabs(record.len - current_len) > 1e-8;
1364
- bool theta_changed = fabs(record.theta - current_theta) > 1e-8;
1365
- if ((len_changed || theta_changed) && current_batch.Size() > 0) {
1399
+ double current_len = -1e100;
1400
+ double current_theta = -1e100;
1401
+ Array<RecordingSS*> current_batch;
1402
+ Array<Array<RecordingSS*>> batch_group;
1403
+ Array<double> group_lengths;
1404
+ Array<double> group_thetas;
1405
+ for (auto & record : recording)
1406
+ {
1407
+ bool len_changed = fabs(record.len - current_len) > 1e-8;
1408
+ bool theta_changed = fabs(record.theta - current_theta) > 1e-8;
1409
+ if ((len_changed || theta_changed) && current_batch.Size() > 0) {
1410
+ batch_group.Append(current_batch);
1411
+ group_lengths.Append(current_len);
1412
+ group_thetas.Append(current_theta);
1413
+ current_batch.SetSize(0);
1414
+ }
1415
+
1416
+ current_len = record.len;
1417
+ current_theta = record.theta;
1418
+ current_batch.Append(&record);
1419
+ }
1420
+
1421
+ if (current_batch.Size() > 0) {
1366
1422
  batch_group.Append(current_batch);
1367
1423
  group_lengths.Append(current_len);
1368
1424
  group_thetas.Append(current_theta);
1369
- current_batch.SetSize(0);
1370
- }
1425
+ }
1371
1426
 
1372
- current_len = record.len;
1373
- current_theta = record.theta;
1374
- current_batch.Append(&record);
1427
+ {
1428
+ RegionTimer rS2S(tS2S);
1429
+ // ParallelFor(batch_group.Size(), [&](int i) {
1430
+ for (int i = 0; i < batch_group.Size(); i++){
1431
+ // *testout << "Processing batch " << i << " of size " << batch_group[i].Size() << ", with len = " << group_lengths[i] << ", theta = " << group_thetas[i] << endl;
1432
+ int chunk_size = 24;
1433
+ if (batch_group[i].Size() < chunk_size)
1434
+ ProcessBatchSS(batch_group[i], group_lengths[i], group_thetas[i]);
1435
+ else
1436
+ ParallelForRange(IntRange(batch_group[i].Size()), [&](IntRange range) {
1437
+ auto sub_batch = batch_group[i].Range(range.First(), range.Next());
1438
+ ProcessBatchSS(sub_batch, group_lengths[i], group_thetas[i]);
1439
+ }, TasksPerThread(4));
1440
+ }
1441
+ }
1375
1442
  }
1376
- if (current_batch.Size() > 0) {
1377
- batch_group.Append(current_batch);
1378
- group_lengths.Append(current_len);
1379
- group_thetas.Append(current_theta);
1380
- }
1381
1443
 
1382
- {
1383
- RegionTimer rS2S(tS2S);
1384
- // ParallelFor(batch_group.Size(), [&](int i) {
1385
- for (int i = 0; i < batch_group.Size(); i++){
1386
- // *testout << "Processing batch " << i << " of size " << batch_group[i].Size() << ", with len = " << group_lengths[i] << ", theta = " << group_thetas[i] << endl;
1387
- int chunk_size = 24;
1388
- if (batch_group[i].Size() < chunk_size)
1389
- ProcessBatch(batch_group[i], group_lengths[i], group_thetas[i]);
1390
- else
1391
- ParallelForRange(IntRange(batch_group[i].Size()), [&](IntRange range) {
1392
- auto sub_batch = batch_group[i].Range(range.First(), range.Next());
1393
- ProcessBatch(sub_batch, group_lengths[i], group_thetas[i]);
1394
- }, TasksPerThread(4));
1395
- }
1396
- }
1397
- }
1398
-
1444
+ // cout << "have singular:" << endl;
1445
+ // PrintStatistics (cout);
1399
1446
  havemp = true;
1400
1447
  }
1401
1448
 
@@ -1407,6 +1454,41 @@ namespace ngsbem
1407
1454
  return root.Evaluate(p);
1408
1455
  }
1409
1456
 
1457
+
1458
+ void PrintStatistics (ostream & ost)
1459
+ {
1460
+ int levels = 0;
1461
+ int cnt = 0;
1462
+ root.TraverseTree( [&](Node & node) {
1463
+ levels = max(levels, node.level);
1464
+ cnt++;
1465
+ });
1466
+ ost << "levels: " << levels << endl;
1467
+ ost << "nodes: " << cnt << endl;
1468
+
1469
+ Array<int> num_on_level(levels+1);
1470
+ Array<int> order_on_level(levels+1);
1471
+ Array<size_t> coefs_on_level(levels+1);
1472
+ num_on_level = 0;
1473
+ order_on_level = 0;
1474
+ root.TraverseTree( [&](Node & node) {
1475
+ num_on_level[node.level]++;
1476
+ order_on_level[node.level] = max(order_on_level[node.level],node.mp.Order());
1477
+ coefs_on_level[node.level] += node.mp.SH().Coefs().Size();
1478
+ });
1479
+
1480
+ cout << "num on level" << endl;
1481
+ for (int i = 0; i < num_on_level.Size(); i++)
1482
+ cout << i << ": " << num_on_level[i] << ", order = " << order_on_level[i] << ", coefs " << coefs_on_level[i] << endl;
1483
+
1484
+ size_t totcoefs = 0;
1485
+ for (auto n : coefs_on_level)
1486
+ totcoefs += n;
1487
+ cout << "total mem in coefs: " << sizeof(entry_type)*totcoefs / sqr(1024) << " MB" << endl;
1488
+ }
1489
+
1490
+
1491
+
1410
1492
  template <typename entry_type2>
1411
1493
  friend class RegularMLExpansion;
1412
1494
  };
@@ -1420,6 +1502,9 @@ namespace ngsbem
1420
1502
  }
1421
1503
 
1422
1504
 
1505
+ // *********************************** Regular multilevel Expansion
1506
+
1507
+
1423
1508
  template <typename elem_type=Complex>
1424
1509
  class NGS_DLL_HEADER RegularMLExpansion
1425
1510
  {
@@ -1456,28 +1541,28 @@ namespace ngsbem
1456
1541
  }
1457
1542
  }
1458
1543
  else if (N <= 3) {
1459
- ProcessVectorizedBatch<3, vec_length>(batch, len, theta);
1544
+ ProcessVectorizedBatchRS<3, vec_length>(batch, len, theta);
1460
1545
  }
1461
1546
  else if (N <= 4) {
1462
- ProcessVectorizedBatch<4, vec_length>(batch, len, theta);
1547
+ ProcessVectorizedBatchRS<4, vec_length>(batch, len, theta);
1463
1548
  }
1464
1549
  else if (N <= 6) {
1465
- ProcessVectorizedBatch<6, vec_length>(batch, len, theta);
1550
+ ProcessVectorizedBatchRS<6, vec_length>(batch, len, theta);
1466
1551
  }
1467
1552
  else if (N <= 12) {
1468
- ProcessVectorizedBatch<12, vec_length>(batch, len, theta);
1553
+ ProcessVectorizedBatchRS<12, vec_length>(batch, len, theta);
1469
1554
  }
1470
1555
  else if (N <= 24) {
1471
- ProcessVectorizedBatch<24, vec_length>(batch, len, theta);
1556
+ ProcessVectorizedBatchRS<24, vec_length>(batch, len, theta);
1472
1557
  }
1473
1558
  else if (N <= 48) {
1474
- ProcessVectorizedBatch<48, vec_length>(batch, len, theta);
1559
+ ProcessVectorizedBatchRS<48, vec_length>(batch, len, theta);
1475
1560
  }
1476
1561
  else if (N <= 96) {
1477
- ProcessVectorizedBatch<96, vec_length>(batch, len, theta);
1562
+ ProcessVectorizedBatchRS<96, vec_length>(batch, len, theta);
1478
1563
  }
1479
1564
  else if (N <= 192) {
1480
- ProcessVectorizedBatch<192, vec_length>(batch, len, theta);
1565
+ ProcessVectorizedBatchRS<192, vec_length>(batch, len, theta);
1481
1566
  }
1482
1567
  else {
1483
1568
  // Split large batches
@@ -1509,7 +1594,7 @@ namespace ngsbem
1509
1594
 
1510
1595
 
1511
1596
  template<int N, int vec_length>
1512
- static void ProcessVectorizedBatch(FlatArray<RecordingRS*> batch, double len, double theta) {
1597
+ static void ProcessVectorizedBatchRS(FlatArray<RecordingRS*> batch, double len, double theta) {
1513
1598
 
1514
1599
  // static Timer t("ProcessVectorizedBatch, N = "+ToString(N) + ", vec_len = " + ToString(vec_length));
1515
1600
  // RegionTimer reg(t, batch[0]->mpS->SH().Order());
@@ -1570,6 +1655,7 @@ namespace ngsbem
1570
1655
  std::array<unique_ptr<Node>,8> childs;
1571
1656
  SphericalExpansion<Regular,elem_type> mp;
1572
1657
  Array<Vec<3>> targets;
1658
+ Array<tuple<Vec<3>,double>> vol_targets;
1573
1659
  int total_targets;
1574
1660
  std::mutex node_mutex;
1575
1661
  atomic<bool> have_childs{false};
@@ -1577,15 +1663,22 @@ namespace ngsbem
1577
1663
  Array<const typename SingularMLExpansion<elem_type>::Node*> singnodes;
1578
1664
 
1579
1665
  Node (Vec<3> acenter, double ar, int alevel, double kappa)
1580
- : center(acenter), r(ar), level(alevel), mp(MPOrder(ar*kappa), kappa, ar) // 1.0/min(1.0, 0.25*r*kappa))
1666
+ : center(acenter), r(ar), level(alevel),
1667
+ // mp(MPOrder(ar*kappa), kappa, ar) // 1.0/min(1.0, 0.25*r*kappa))
1668
+ mp(-1, kappa, ar)
1581
1669
  // : center(acenter), r(ar), level(alevel), mp(MPOrder(ar*kappa), kappa, 1.0)
1582
1670
  {
1583
1671
  if (level < nodes_on_level.Size())
1584
1672
  nodes_on_level[level]++;
1585
1673
  }
1586
1674
 
1587
-
1588
- void CreateChilds()
1675
+ void Allocate()
1676
+ {
1677
+ mp = SphericalExpansion<Regular,elem_type>(MPOrder(r*mp.Kappa()), mp.Kappa(), r);
1678
+ }
1679
+
1680
+
1681
+ void CreateChilds(bool allocate = false)
1589
1682
  {
1590
1683
  if (childs[0]) throw Exception("have already childs");
1591
1684
  // create children nodes:
@@ -1596,6 +1689,8 @@ namespace ngsbem
1596
1689
  cc(1) += (i&2) ? r/2 : -r/2;
1597
1690
  cc(2) += (i&4) ? r/2 : -r/2;
1598
1691
  childs[i] = make_unique<Node> (cc, r/2, level+1, mp.Kappa());
1692
+ if (allocate)
1693
+ childs[i] -> Allocate();
1599
1694
  }
1600
1695
  have_childs = true;
1601
1696
  }
@@ -1648,7 +1743,7 @@ namespace ngsbem
1648
1743
  if (allow_refine)
1649
1744
  {
1650
1745
  if (!childs[0])
1651
- CreateChilds();
1746
+ CreateChilds(true);
1652
1747
 
1653
1748
  for (auto & ch : childs)
1654
1749
  ch -> AddSingularNode (singnode, allow_refine, recording);
@@ -1668,7 +1763,7 @@ namespace ngsbem
1668
1763
  childs[nr] -> AddSingularNode (singnode, allow_refine, recording);
1669
1764
  });
1670
1765
 
1671
- if (targets.Size())
1766
+ if (targets.Size()+vol_targets.Size())
1672
1767
  singnodes.Append(&singnode);
1673
1768
  }
1674
1769
  }
@@ -1683,7 +1778,7 @@ namespace ngsbem
1683
1778
  {
1684
1779
  if (allow_refine)
1685
1780
  if (mp.Order() > 30 && !childs[0])
1686
- CreateChilds();
1781
+ CreateChilds(allow_refine);
1687
1782
 
1688
1783
  if (childs[0])
1689
1784
  {
@@ -1754,6 +1849,14 @@ namespace ngsbem
1754
1849
  return sum;
1755
1850
  }
1756
1851
 
1852
+ void TraverseTree (const std::function<void(Node&)> & func)
1853
+ {
1854
+ func(*this);
1855
+ for (auto & child : childs)
1856
+ if (child)
1857
+ child->TraverseTree(func);
1858
+ }
1859
+
1757
1860
  double Norm() const
1758
1861
  {
1759
1862
  double norm = L2Norm(mp.SH().Coefs());
@@ -1771,17 +1874,23 @@ namespace ngsbem
1771
1874
  num += ch->NumCoefficients();
1772
1875
  return num;
1773
1876
  }
1774
-
1877
+
1878
+ int GetChildNum (Vec<3> x) const
1879
+ {
1880
+ int childnum = 0;
1881
+ if (x(0) > center(0)) childnum += 1;
1882
+ if (x(1) > center(1)) childnum += 2;
1883
+ if (x(2) > center(2)) childnum += 4;
1884
+ return childnum;
1885
+ }
1886
+
1775
1887
  void AddTarget (Vec<3> x)
1776
1888
  {
1777
1889
  // if (childs[0])
1778
1890
  if (have_childs) // quick check without locking
1779
1891
  {
1780
1892
  // directly send to childs:
1781
- int childnum = 0;
1782
- if (x(0) > center(0)) childnum += 1;
1783
- if (x(1) > center(1)) childnum += 2;
1784
- if (x(2) > center(2)) childnum += 4;
1893
+ int childnum = GetChildNum(x);
1785
1894
  childs[childnum] -> AddTarget( x );
1786
1895
  return;
1787
1896
  }
@@ -1791,32 +1900,74 @@ namespace ngsbem
1791
1900
  if (have_childs) // test again after locking
1792
1901
  {
1793
1902
  // directly send to childs:
1794
- int childnum = 0;
1795
- if (x(0) > center(0)) childnum += 1;
1796
- if (x(1) > center(1)) childnum += 2;
1797
- if (x(2) > center(2)) childnum += 4;
1903
+ int childnum = GetChildNum(x);
1798
1904
  childs[childnum] -> AddTarget(x);
1799
1905
  return;
1800
1906
  }
1801
1907
 
1802
-
1803
1908
  targets.Append( x );
1804
1909
 
1805
1910
  // if (r*mp.Kappa() < 1e-8) return;
1806
1911
  if (level > 20) return;
1807
- if (targets.Size() < maxdirect && r*mp.Kappa() < 1)
1912
+ if (targets.Size() < maxdirect && r*mp.Kappa() < 5)
1913
+ return;
1914
+
1915
+ CreateChilds();
1916
+
1917
+ for (auto t : targets)
1918
+ AddTarget (t);
1919
+ for (auto [x,r] : vol_targets)
1920
+ AddVolumeTarget (x,r);
1921
+
1922
+ targets.SetSize0();
1923
+ vol_targets.SetSize0();
1924
+ }
1925
+
1926
+
1927
+ void AddVolumeTarget (Vec<3> x, double tr)
1928
+ {
1929
+ if (MaxNorm(x-center) > r+tr) return;
1930
+
1931
+ if (have_childs)
1932
+ {
1933
+ for (auto & child : childs)
1934
+ child->AddVolumeTarget(x, tr);
1935
+ return;
1936
+ }
1937
+
1938
+
1939
+ lock_guard<mutex> guard(node_mutex);
1940
+
1941
+ if (have_childs)
1942
+ {
1943
+ for (auto & child : childs)
1944
+ child->AddVolumeTarget(x, tr);
1945
+ return;
1946
+ }
1947
+
1948
+
1949
+ vol_targets.Append (tuple(x,tr));
1950
+
1951
+ if (level > 20) return;
1952
+ if (vol_targets.Size() < maxdirect && (r*mp.Kappa() < 5))
1808
1953
  return;
1809
1954
 
1810
1955
  CreateChilds();
1811
1956
 
1812
1957
  for (auto t : targets)
1813
1958
  AddTarget (t);
1959
+ for (auto [x,r] : vol_targets)
1960
+ AddVolumeTarget (x,r);
1961
+
1814
1962
  targets.SetSize0();
1963
+ vol_targets.SetSize0();
1815
1964
  }
1816
1965
 
1966
+
1967
+
1817
1968
  void CalcTotalTargets()
1818
1969
  {
1819
- total_targets = targets.Size();
1970
+ total_targets = targets.Size() + vol_targets.Size();
1820
1971
  for (auto & child : childs)
1821
1972
  if (child)
1822
1973
  {
@@ -1838,6 +1989,19 @@ namespace ngsbem
1838
1989
  if (total_targets == 0)
1839
1990
  mp = SphericalExpansion<Regular,elem_type>(-1, mp.Kappa(),1.);
1840
1991
  }
1992
+
1993
+ void AllocateMemory()
1994
+ {
1995
+ for (auto & child : childs)
1996
+ if (child)
1997
+ child->AllocateMemory();
1998
+
1999
+ if (total_targets > 0)
2000
+ Allocate();
2001
+ // mp = SphericalExpansion<Regular,elem_type>(MPOrder(r*mp.Kappa()), mp.Kappa(), r); // -1, mp.Kappa(),1.);
2002
+ }
2003
+
2004
+
1841
2005
 
1842
2006
 
1843
2007
  void Print (ostream & ost, size_t childnr = -1) const
@@ -1863,7 +2027,8 @@ namespace ngsbem
1863
2027
  : root(center, r, 0, asingmp->Kappa()), singmp(asingmp)
1864
2028
  {
1865
2029
  if (!singmp->havemp) throw Exception("first call Calc for singular MP");
1866
-
2030
+ root.Allocate();
2031
+
1867
2032
  nodes_on_level = 0;
1868
2033
  nodes_on_level[0] = 1;
1869
2034
  {
@@ -1901,73 +2066,98 @@ namespace ngsbem
1901
2066
  root.AddTarget (t);
1902
2067
  }
1903
2068
 
2069
+ void AddVolumeTarget (Vec<3> t, double r)
2070
+ {
2071
+ root.AddVolumeTarget (t, r);
2072
+ }
2073
+
1904
2074
  void CalcMP(shared_ptr<SingularMLExpansion<elem_type>> asingmp, bool onlytargets = true)
1905
2075
  {
1906
2076
  static Timer t("mptool regular MLMP"); RegionTimer rg(t);
2077
+ static Timer tremove("removeempty");
1907
2078
  static Timer trec("mptool regular MLMP - recording");
1908
2079
  static Timer tsort("mptool regular MLMP - sort");
1909
2080
 
1910
2081
  singmp = asingmp;
1911
2082
 
2083
+
1912
2084
  root.CalcTotalTargets();
2085
+ // cout << "before remove empty trees:" << endl;
2086
+ // PrintStatistics(cout);
2087
+
2088
+ /*
2089
+ tremove.Start();
1913
2090
  if (onlytargets)
1914
2091
  root.RemoveEmptyTrees();
1915
-
2092
+ tremove.Stop();
2093
+ */
1916
2094
 
1917
- // root.AddSingularNode(singmp->root, !onlytargets, nullptr);
2095
+ root.AllocateMemory();
1918
2096
 
1919
- // /*
1920
- Array<RecordingRS> recording;
1921
- {
1922
- RegionTimer rrec(trec);
1923
- root.AddSingularNode(singmp->root, !onlytargets, &recording);
1924
- }
1925
-
1926
- // cout << "recorded: " << recording.Size() << endl;
1927
- {
1928
- RegionTimer reg(tsort);
1929
- QuickSort (recording, [] (auto & a, auto & b)
1930
- {
1931
- if (a.len < (1-1e-8) * b.len) return true;
1932
- if (a.len > (1+1e-8) * b.len) return false;
1933
- return a.theta < b.theta;
1934
- });
1935
- }
1936
-
1937
- double current_len = -1e100;
1938
- double current_theta = -1e100;
1939
- Array<RecordingRS*> current_batch;
1940
- Array<Array<RecordingRS*>> batch_group;
1941
- Array<double> group_lengths;
1942
- Array<double> group_thetas;
1943
- for (auto & record : recording)
2097
+ // cout << "after allocating regular:" << endl;
2098
+ // PrintStatistics(cout);
2099
+
2100
+ // cout << "starting S-R converion" << endl;
2101
+ // PrintStatistics(cout);
2102
+
2103
+
2104
+ if constexpr (false)
1944
2105
  {
1945
- bool len_changed = fabs(record.len - current_len) > 1e-8;
1946
- bool theta_changed = fabs(record.theta - current_theta) > 1e-8;
1947
- if ((len_changed || theta_changed) && current_batch.Size() > 0) {
2106
+ root.AddSingularNode(singmp->root, !onlytargets, nullptr);
2107
+ }
2108
+ else
2109
+ { // use recording
2110
+ Array<RecordingRS> recording;
2111
+ {
2112
+ RegionTimer rrec(trec);
2113
+ root.AddSingularNode(singmp->root, !onlytargets, &recording);
2114
+ }
2115
+
2116
+ // cout << "recorded: " << recording.Size() << endl;
2117
+ {
2118
+ RegionTimer reg(tsort);
2119
+ QuickSort (recording, [] (auto & a, auto & b)
2120
+ {
2121
+ if (a.len < (1-1e-8) * b.len) return true;
2122
+ if (a.len > (1+1e-8) * b.len) return false;
2123
+ return a.theta < b.theta;
2124
+ });
2125
+ }
2126
+
2127
+ double current_len = -1e100;
2128
+ double current_theta = -1e100;
2129
+ Array<RecordingRS*> current_batch;
2130
+ Array<Array<RecordingRS*>> batch_group;
2131
+ Array<double> group_lengths;
2132
+ Array<double> group_thetas;
2133
+ for (auto & record : recording)
2134
+ {
2135
+ bool len_changed = fabs(record.len - current_len) > 1e-8;
2136
+ bool theta_changed = fabs(record.theta - current_theta) > 1e-8;
2137
+ if ((len_changed || theta_changed) && current_batch.Size() > 0) {
2138
+ // ProcessBatch(current_batch, current_len, current_theta);
2139
+ batch_group.Append(current_batch);
2140
+ group_lengths.Append(current_len);
2141
+ group_thetas.Append(current_theta);
2142
+ current_batch.SetSize(0);
2143
+ }
2144
+
2145
+ current_len = record.len;
2146
+ current_theta = record.theta;
2147
+ current_batch.Append(&record);
2148
+ }
2149
+ if (current_batch.Size() > 0) {
1948
2150
  // ProcessBatch(current_batch, current_len, current_theta);
1949
2151
  batch_group.Append(current_batch);
1950
2152
  group_lengths.Append(current_len);
1951
2153
  group_thetas.Append(current_theta);
1952
- current_batch.SetSize(0);
1953
- }
1954
-
1955
- current_len = record.len;
1956
- current_theta = record.theta;
1957
- current_batch.Append(&record);
2154
+ }
2155
+
2156
+ ParallelFor(batch_group.Size(), [&](int i) {
2157
+ ProcessBatchRS(batch_group[i], group_lengths[i], group_thetas[i]);
2158
+ }, TasksPerThread(4));
1958
2159
  }
1959
- if (current_batch.Size() > 0) {
1960
- // ProcessBatch(current_batch, current_len, current_theta);
1961
- batch_group.Append(current_batch);
1962
- group_lengths.Append(current_len);
1963
- group_thetas.Append(current_theta);
1964
- }
1965
-
1966
- ParallelFor(batch_group.Size(), [&](int i) {
1967
- ProcessBatchRS(batch_group[i], group_lengths[i], group_thetas[i]);
1968
- }, TasksPerThread(4));
1969
- // */
1970
-
2160
+
1971
2161
 
1972
2162
  /*
1973
2163
  int maxlevel = 0;
@@ -1978,10 +2168,49 @@ namespace ngsbem
1978
2168
  cout << "reg " << i << ": " << RegularMLExpansion::nodes_on_level[i] << endl;
1979
2169
  */
1980
2170
 
2171
+ // cout << "starting R-R converion" << endl;
2172
+ // PrintStatistics(cout);
2173
+
1981
2174
  static Timer tloc("mptool regular localize expansion"); RegionTimer rloc(tloc);
1982
2175
  root.LocalizeExpansion(!onlytargets);
2176
+
2177
+
2178
+ // cout << "R-R conversion done" << endl;
2179
+ // PrintStatistics(cout);
1983
2180
  }
1984
2181
 
2182
+ void PrintStatistics (ostream & ost)
2183
+ {
2184
+ int levels = 0;
2185
+ int cnt = 0;
2186
+ root.TraverseTree( [&](Node & node) {
2187
+ levels = max(levels, node.level);
2188
+ cnt++;
2189
+ });
2190
+ ost << "levels: " << levels << endl;
2191
+ ost << "nodes: " << cnt << endl;
2192
+
2193
+ Array<int> num_on_level(levels+1);
2194
+ Array<int> order_on_level(levels+1);
2195
+ Array<size_t> coefs_on_level(levels+1);
2196
+ num_on_level = 0;
2197
+ order_on_level = 0;
2198
+ root.TraverseTree( [&](Node & node) {
2199
+ num_on_level[node.level]++;
2200
+ order_on_level[node.level] = max(order_on_level[node.level],node.mp.Order());
2201
+ coefs_on_level[node.level] += node.mp.SH().Coefs().Size();
2202
+ });
2203
+
2204
+ cout << "num on level" << endl;
2205
+ for (int i = 0; i < num_on_level.Size(); i++)
2206
+ cout << i << ": " << num_on_level[i] << ", order = " << order_on_level[i] << ", coefs " << coefs_on_level[i] << endl;
2207
+
2208
+ size_t totcoefs = 0;
2209
+ for (auto n : coefs_on_level)
2210
+ totcoefs += n;
2211
+ cout << "total mem in coefs: " << sizeof(elem_type)*totcoefs / sqr(1024) << " MB" << endl;
2212
+ }
2213
+
1985
2214
  void Print (ostream & ost) const
1986
2215
  {
1987
2216
  root.Print(ost);
@@ -2001,6 +2230,7 @@ namespace ngsbem
2001
2230
  {
2002
2231
  // static Timer t("mptool Eval MLMP regular"); RegionTimer r(t);
2003
2232
  // if (L2Norm(p-root.center) > root.r) return elem_type{0.0};
2233
+
2004
2234
  if (MaxNorm(p-root.center) > root.r)
2005
2235
  return singmp->Evaluate(p);
2006
2236
  return root.Evaluate(p);