ngsolve 6.2.2506__cp311-cp311-win_amd64.whl → 6.2.2506.post38.dev0__cp311-cp311-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ngsolve might be problematic. Click here for more details.

Files changed (50) hide show
  1. netgen/include/bem_diffops.hpp +475 -0
  2. netgen/include/bspline.hpp +2 -0
  3. netgen/include/contact.hpp +4 -0
  4. netgen/include/h1lumping.hpp +6 -0
  5. netgen/include/kernels.hpp +653 -0
  6. netgen/include/mp_coefficient.hpp +20 -20
  7. netgen/include/mptools.hpp +625 -279
  8. netgen/include/potentialtools.hpp +2 -2
  9. netgen/include/recursive_pol.hpp +2 -2
  10. netgen/include/sparsematrix.hpp +1 -1
  11. netgen/lib/libngsolve.lib +0 -0
  12. netgen/libngsolve.dll +0 -0
  13. ngsolve/cmake/NGSolveConfig.cmake +1 -1
  14. ngsolve/config/config.py +5 -5
  15. ngsolve/demos/intro/cmagnet.py +19 -22
  16. ngsolve/ngslib.pyd +0 -0
  17. {ngsolve-6.2.2506.dist-info → ngsolve-6.2.2506.post38.dev0.dist-info}/METADATA +2 -2
  18. {ngsolve-6.2.2506.dist-info → ngsolve-6.2.2506.post38.dev0.dist-info}/RECORD +50 -48
  19. {ngsolve-6.2.2506.data → ngsolve-6.2.2506.post38.dev0.data}/data/Scripts/ngsolve.tcl +0 -0
  20. {ngsolve-6.2.2506.data → ngsolve-6.2.2506.post38.dev0.data}/data/share/ngsolve/beam.geo +0 -0
  21. {ngsolve-6.2.2506.data → ngsolve-6.2.2506.post38.dev0.data}/data/share/ngsolve/beam.vol +0 -0
  22. {ngsolve-6.2.2506.data → ngsolve-6.2.2506.post38.dev0.data}/data/share/ngsolve/chip.in2d +0 -0
  23. {ngsolve-6.2.2506.data → ngsolve-6.2.2506.post38.dev0.data}/data/share/ngsolve/chip.vol +0 -0
  24. {ngsolve-6.2.2506.data → ngsolve-6.2.2506.post38.dev0.data}/data/share/ngsolve/coil.geo +0 -0
  25. {ngsolve-6.2.2506.data → ngsolve-6.2.2506.post38.dev0.data}/data/share/ngsolve/coil.vol +0 -0
  26. {ngsolve-6.2.2506.data → ngsolve-6.2.2506.post38.dev0.data}/data/share/ngsolve/coilshield.geo +0 -0
  27. {ngsolve-6.2.2506.data → ngsolve-6.2.2506.post38.dev0.data}/data/share/ngsolve/coilshield.vol +0 -0
  28. {ngsolve-6.2.2506.data → ngsolve-6.2.2506.post38.dev0.data}/data/share/ngsolve/cube.geo +0 -0
  29. {ngsolve-6.2.2506.data → ngsolve-6.2.2506.post38.dev0.data}/data/share/ngsolve/cube.vol +0 -0
  30. {ngsolve-6.2.2506.data → ngsolve-6.2.2506.post38.dev0.data}/data/share/ngsolve/d10_DGdoubleglazing.pde +0 -0
  31. {ngsolve-6.2.2506.data → ngsolve-6.2.2506.post38.dev0.data}/data/share/ngsolve/d11_chip_nitsche.pde +0 -0
  32. {ngsolve-6.2.2506.data → ngsolve-6.2.2506.post38.dev0.data}/data/share/ngsolve/d1_square.pde +0 -0
  33. {ngsolve-6.2.2506.data → ngsolve-6.2.2506.post38.dev0.data}/data/share/ngsolve/d2_chip.pde +0 -0
  34. {ngsolve-6.2.2506.data → ngsolve-6.2.2506.post38.dev0.data}/data/share/ngsolve/d3_helmholtz.pde +0 -0
  35. {ngsolve-6.2.2506.data → ngsolve-6.2.2506.post38.dev0.data}/data/share/ngsolve/d4_cube.pde +0 -0
  36. {ngsolve-6.2.2506.data → ngsolve-6.2.2506.post38.dev0.data}/data/share/ngsolve/d5_beam.pde +0 -0
  37. {ngsolve-6.2.2506.data → ngsolve-6.2.2506.post38.dev0.data}/data/share/ngsolve/d6_shaft.pde +0 -0
  38. {ngsolve-6.2.2506.data → ngsolve-6.2.2506.post38.dev0.data}/data/share/ngsolve/d7_coil.pde +0 -0
  39. {ngsolve-6.2.2506.data → ngsolve-6.2.2506.post38.dev0.data}/data/share/ngsolve/d8_coilshield.pde +0 -0
  40. {ngsolve-6.2.2506.data → ngsolve-6.2.2506.post38.dev0.data}/data/share/ngsolve/d9_hybridDG.pde +0 -0
  41. {ngsolve-6.2.2506.data → ngsolve-6.2.2506.post38.dev0.data}/data/share/ngsolve/doubleglazing.in2d +0 -0
  42. {ngsolve-6.2.2506.data → ngsolve-6.2.2506.post38.dev0.data}/data/share/ngsolve/doubleglazing.vol +0 -0
  43. {ngsolve-6.2.2506.data → ngsolve-6.2.2506.post38.dev0.data}/data/share/ngsolve/piezo2d40round4.vol.gz +0 -0
  44. {ngsolve-6.2.2506.data → ngsolve-6.2.2506.post38.dev0.data}/data/share/ngsolve/shaft.geo +0 -0
  45. {ngsolve-6.2.2506.data → ngsolve-6.2.2506.post38.dev0.data}/data/share/ngsolve/shaft.vol +0 -0
  46. {ngsolve-6.2.2506.data → ngsolve-6.2.2506.post38.dev0.data}/data/share/ngsolve/square.in2d +0 -0
  47. {ngsolve-6.2.2506.data → ngsolve-6.2.2506.post38.dev0.data}/data/share/ngsolve/square.vol +0 -0
  48. {ngsolve-6.2.2506.dist-info → ngsolve-6.2.2506.post38.dev0.dist-info}/LICENSE +0 -0
  49. {ngsolve-6.2.2506.dist-info → ngsolve-6.2.2506.post38.dev0.dist-info}/WHEEL +0 -0
  50. {ngsolve-6.2.2506.dist-info → ngsolve-6.2.2506.post38.dev0.dist-info}/top_level.txt +0 -0
@@ -25,6 +25,7 @@ namespace ngsbem
25
25
 
26
26
  constexpr int FMM_SW = 4;
27
27
 
28
+
28
29
 
29
30
  // ************************ SIMD - creation (should end up in simd.hpp) *************
30
31
 
@@ -42,7 +43,19 @@ namespace ngsbem
42
43
  }
43
44
 
44
45
 
45
-
46
+ class NGS_DLL_HEADER PrecomputedSqrts
47
+ {
48
+ public:
49
+ Array<double> sqrt_int;
50
+ // Array<double> inv_sqrt_int;
51
+ Array<double> sqrt_n_np1; // sqrt(n*(n+1))
52
+ Array<double> inv_sqrt_2np1_2np3; // 1/sqrt( (2n+1)*(2n+3) )
53
+
54
+ PrecomputedSqrts();
55
+ };
56
+
57
+ extern NGS_DLL_HEADER PrecomputedSqrts presqrt;
58
+
46
59
 
47
60
 
48
61
 
@@ -189,18 +202,26 @@ namespace ngsbem
189
202
  void RotateY (double alpha, bool parallel = false);
190
203
 
191
204
 
205
+
192
206
  static double CalcAmn (int m, int n)
193
207
  {
194
208
  if (m < 0) m=-m;
195
209
  if (n < m) return 0;
196
- return sqrt( (n+1.0+m)*(n+1.0-m) / ( (2*n+1)*(2*n+3) ));
210
+
211
+ if (2*n+1 < presqrt.sqrt_int.Size())
212
+ return presqrt.sqrt_int[n+1+m]*presqrt.sqrt_int[n+1-m] * presqrt.inv_sqrt_2np1_2np3[n];
213
+ else
214
+ return sqrt( (n+1.0+m)*(n+1.0-m) / ( (2*n+1)*(2*n+3) ));
197
215
  }
198
216
 
199
217
  static double CalcBmn (int m, int n)
200
218
  {
201
219
  double sgn = (m >= 0) ? 1 : -1;
202
- if ( (m > n) || (-m > n) ) return 0;
203
- return sgn * sqrt( (n-m-1.0)*(n-m) / ( (2*n-1.0)*(2*n+1)));
220
+ if ( (m >= n) || (-m > n) ) return 0;
221
+ if (n <= presqrt.inv_sqrt_2np1_2np3.Size())
222
+ return sgn * presqrt.sqrt_n_np1[n-m-1] * presqrt.inv_sqrt_2np1_2np3[n-1];
223
+ else
224
+ return sgn * sqrt( (n-m-1.0)*(n-m) / ( (2*n-1.0)*(2*n+1)));
204
225
  }
205
226
 
206
227
  static double CalcDmn (int m, int n)
@@ -303,7 +324,7 @@ namespace ngsbem
303
324
 
304
325
 
305
326
  // hn1 = jn+ i*yn
306
- class MPSingular
327
+ class Singular
307
328
  {
308
329
  public:
309
330
  template <typename T>
@@ -329,7 +350,7 @@ namespace ngsbem
329
350
 
330
351
 
331
352
  // jn
332
- class MPRegular
353
+ class Regular
333
354
  {
334
355
  public:
335
356
  template <typename T>
@@ -357,14 +378,14 @@ namespace ngsbem
357
378
 
358
379
 
359
380
  template <typename RADIAL, typename entry_type=Complex>
360
- class NGS_DLL_HEADER MultiPole
381
+ class NGS_DLL_HEADER SphericalExpansion
361
382
  {
362
383
  SphericalHarmonics<entry_type> sh;
363
384
  double kappa;
364
385
  double rtyp;
365
386
  public:
366
387
 
367
- MultiPole (int aorder, double akappa, double artyp)
388
+ SphericalExpansion (int aorder, double akappa, double artyp)
368
389
  : sh(aorder), kappa(akappa), rtyp(artyp) { }
369
390
 
370
391
 
@@ -376,15 +397,15 @@ namespace ngsbem
376
397
  double RTyp() const { return rtyp; }
377
398
  int Order() const { return sh.Order(); }
378
399
 
379
- MultiPole Truncate(int neworder) const
400
+ SphericalExpansion Truncate(int neworder) const
380
401
  {
381
402
  if (neworder > sh.Order()) neworder=sh.Order();
382
- MultiPole nmp(neworder, kappa, rtyp);
403
+ SphericalExpansion nmp(neworder, kappa, rtyp);
383
404
  nmp.sh.Coefs() = sh.Coefs().Range(sqr(neworder+1));
384
405
  return nmp;
385
406
  }
386
407
 
387
- MultiPole & operator+= (const MultiPole & mp2)
408
+ SphericalExpansion & operator+= (const SphericalExpansion & mp2)
388
409
  {
389
410
  size_t commonsize = min(SH().Coefs().Size(), mp2.SH().Coefs().Size());
390
411
  SH().Coefs().Range(commonsize) += mp2.SH().Coefs().Range(commonsize);
@@ -395,27 +416,24 @@ namespace ngsbem
395
416
  entry_type EvalDirectionalDerivative (Vec<3> x, Vec<3> d) const;
396
417
 
397
418
  void AddCharge (Vec<3> x, entry_type c);
398
- void AddDipole (Vec<3> x, Vec<3> d, entry_type c);
399
- void AddCurrent (Vec<3> ap, Vec<3> ep, Complex j, int num=100);
400
-
401
- /*
402
- void ChangeScaleTo (double newscale)
419
+ void AddDipole (Vec<3> x, Vec<3> dir, entry_type c);
420
+ void AddChargeDipole (Vec<3> x, entry_type c, Vec<3> dir, entry_type c2)
403
421
  {
404
- double fac = Scale()/newscale;
405
- double prod = 1;
406
- for (int n = 0; n <= sh.Order(); n++, prod*= fac)
407
- sh.CoefsN(n) *= prod;
408
- scale = newscale;
422
+ // TODO: add them at once
423
+ AddCharge (x, c);
424
+ AddDipole (x, dir, c2);
409
425
  }
410
- */
426
+
427
+ void AddPlaneWave (Vec<3> d, entry_type c);
428
+ void AddCurrent (Vec<3> ap, Vec<3> ep, Complex j, int num=100);
429
+
430
+
411
431
  void ChangeRTypTo (double new_rtyp)
412
432
  {
413
- // double fac = Scale()/newscale;
414
433
  double fac = RADIAL::Scale(kappa, rtyp) / RADIAL::Scale(kappa, new_rtyp);
415
434
  double prod = 1;
416
435
  for (int n = 0; n <= sh.Order(); n++, prod*= fac)
417
436
  sh.CoefsN(n) *= prod;
418
- // scale = newscale;
419
437
  rtyp = new_rtyp;
420
438
  }
421
439
 
@@ -434,7 +452,7 @@ namespace ngsbem
434
452
 
435
453
 
436
454
  template <typename TARGET>
437
- void Transform (MultiPole<TARGET,entry_type> & target, Vec<3> dist) const
455
+ void Transform (SphericalExpansion<TARGET,entry_type> & target, Vec<3> dist) const
438
456
  {
439
457
  if (target.SH().Order() < 0) return;
440
458
  if (SH().Order() < 0)
@@ -449,8 +467,8 @@ namespace ngsbem
449
467
  auto [len, theta, phi] = SphericalCoordinates(dist);
450
468
 
451
469
 
452
- // MultiPole<RADIAL,entry_type> tmp{*this};
453
- MultiPole<RADIAL,entry_type> tmp(Order(), kappa, rtyp);
470
+ // SphericalExpansion<RADIAL,entry_type> tmp{*this};
471
+ SphericalExpansion<RADIAL,entry_type> tmp(Order(), kappa, rtyp);
454
472
  tmp.SH().Coefs() = SH().Coefs();
455
473
 
456
474
  tmp.SH().RotateZ(phi);
@@ -463,12 +481,12 @@ namespace ngsbem
463
481
  }
464
482
 
465
483
  template <typename TARGET>
466
- void TransformAdd (MultiPole<TARGET,entry_type> & target, Vec<3> dist, bool atomic = false) const
484
+ void TransformAdd (SphericalExpansion<TARGET,entry_type> & target, Vec<3> dist, bool atomic = false) const
467
485
  {
468
486
  if (SH().Order() < 0) return;
469
487
  if (target.SH().Order() < 0) return;
470
488
 
471
- MultiPole<TARGET,entry_type> tmp{target};
489
+ SphericalExpansion<TARGET,entry_type> tmp{target};
472
490
  Transform(tmp, dist);
473
491
  if (!atomic)
474
492
  target.SH().Coefs() += tmp.SH().Coefs();
@@ -478,8 +496,20 @@ namespace ngsbem
478
496
  }
479
497
 
480
498
  template <typename TARGET>
481
- void ShiftZ (double z, MultiPole<TARGET,entry_type> & target);
499
+ void ShiftZ (double z, SphericalExpansion<TARGET,entry_type> & target);
500
+
482
501
 
502
+ template <typename TARGET>
503
+ void In2Out (SphericalExpansion<TARGET,entry_type> & target, double r) const
504
+ {
505
+ Vector<Complex> rad(Order()+1);
506
+ Vector<Complex> radout(target.Order()+1);
507
+ RADIAL::Eval(Order(), kappa, r, RTyp(), rad);
508
+ TARGET::Eval(target.Order(), kappa, r, target.RTyp(), radout);
509
+ target.SH().Coefs() = 0;
510
+ for (int j = 0; j <= std::min(Order(), target.Order()); j++)
511
+ target.SH().CoefsN(j) = rad(j)/radout(j) * SH().CoefsN(j);
512
+ }
483
513
  };
484
514
 
485
515
 
@@ -507,22 +537,22 @@ namespace ngsbem
507
537
 
508
538
 
509
539
  template <typename entry_type=Complex>
510
- class SingularMLMultiPole
540
+ class SingularMLExpansion
511
541
  {
512
542
  using simd_entry_type = decltype(MakeSimd(declval<std::array<entry_type,FMM_SW>>()));
513
543
  static Array<size_t> nodes_on_level;
514
544
 
515
545
  struct RecordingSS
516
546
  {
517
- const MultiPole<MPSingular,entry_type> * mp_source;
518
- MultiPole<MPSingular,entry_type> * mp_target;
547
+ const SphericalExpansion<Singular,entry_type> * mp_source;
548
+ SphericalExpansion<Singular,entry_type> * mp_target;
519
549
  Vec<3> dist;
520
550
  double len, theta, phi;
521
551
  bool flipz;
522
552
  public:
523
553
  RecordingSS() = default;
524
- RecordingSS (const MultiPole<MPSingular,entry_type> * amp_source,
525
- MultiPole<MPSingular,entry_type> * amp_target,
554
+ RecordingSS (const SphericalExpansion<Singular,entry_type> * amp_source,
555
+ SphericalExpansion<Singular,entry_type> * amp_target,
526
556
  Vec<3> adist)
527
557
  : mp_source(amp_source), mp_target(amp_target), dist(adist)
528
558
  {
@@ -534,7 +564,7 @@ namespace ngsbem
534
564
  };
535
565
 
536
566
 
537
- static void ProcessBatch(FlatArray<RecordingSS*> batch, double len, double theta) {
567
+ static void ProcessBatchSS(FlatArray<RecordingSS*> batch, double len, double theta) {
538
568
  constexpr int vec_length = VecLength<entry_type>;
539
569
  int batch_size = batch.Size();
540
570
  int N = batch_size * vec_length;
@@ -546,42 +576,45 @@ namespace ngsbem
546
576
  }
547
577
  }
548
578
  else if (N <= 3) {
549
- ProcessVectorizedBatch<3, vec_length>(batch, len, theta);
579
+ ProcessVectorizedBatchSS<3, vec_length>(batch, len, theta);
550
580
  }
551
581
  else if (N <= 4) {
552
- ProcessVectorizedBatch<4, vec_length>(batch, len, theta);
582
+ ProcessVectorizedBatchSS<4, vec_length>(batch, len, theta);
553
583
  }
554
584
  else if (N <= 6) {
555
- ProcessVectorizedBatch<6, vec_length>(batch, len, theta);
585
+ ProcessVectorizedBatchSS<6, vec_length>(batch, len, theta);
556
586
  }
557
587
  else if (N <= 12) {
558
- ProcessVectorizedBatch<12, vec_length>(batch, len, theta);
588
+ ProcessVectorizedBatchSS<12, vec_length>(batch, len, theta);
559
589
  }
560
590
  else if (N <= 24) {
561
- ProcessVectorizedBatch<24, vec_length>(batch, len, theta);
591
+ ProcessVectorizedBatchSS<24, vec_length>(batch, len, theta);
562
592
  }
563
593
  else if (N <= 48) {
564
- ProcessVectorizedBatch<48, vec_length>(batch, len, theta);
594
+ ProcessVectorizedBatchSS<48, vec_length>(batch, len, theta);
565
595
  }
566
596
  else if (N <= 96) {
567
- ProcessVectorizedBatch<96, vec_length>(batch, len, theta);
597
+ ProcessVectorizedBatchSS<96, vec_length>(batch, len, theta);
568
598
  }
569
599
  else if (N <= 192) {
570
- ProcessVectorizedBatch<192, vec_length>(batch, len, theta);
600
+ ProcessVectorizedBatchSS<192, vec_length>(batch, len, theta);
571
601
  }
572
602
  else {
573
603
  // Split large batches
574
- ProcessBatch(batch.Range(0, 192 / vec_length), len, theta);
575
- ProcessBatch(batch.Range(192 / vec_length, batch_size), len, theta);
604
+ ProcessBatchSS(batch.Range(0, 192 / vec_length), len, theta);
605
+ ProcessBatchSS(batch.Range(192 / vec_length, batch_size), len, theta);
576
606
  }
577
607
  }
578
608
 
579
609
  template<int N, int vec_length>
580
- static void ProcessVectorizedBatch(FlatArray<RecordingSS*> batch, double len, double theta) {
610
+ static void ProcessVectorizedBatchSS(FlatArray<RecordingSS*> batch, double len, double theta) {
581
611
 
582
612
  // *testout << "Processing vectorized S->S batch of size " << batch.Size() << ", with N = " << N << ", vec_length = " << vec_length << ", len = " << len << ", theta = " << theta << endl;
583
- MultiPole<MPSingular, Vec<N,Complex>> vec_source(batch[0]->mp_source->Order(), batch[0]->mp_source->Kappa(), batch[0]->mp_source->RTyp());
584
- MultiPole<MPSingular, Vec<N,Complex>> vec_target(batch[0]->mp_target->Order(), batch[0]->mp_target->Kappa(), batch[0]->mp_target->RTyp());
613
+ double kappa = batch[0]->mp_source->Kappa();
614
+ int so = batch[0]->mp_source->Order();
615
+ int to = batch[0]->mp_target->Order();
616
+ SphericalExpansion<Singular, Vec<N,Complex>> vec_source(so, kappa, batch[0]->mp_source->RTyp());
617
+ SphericalExpansion<Singular, Vec<N,Complex>> vec_target(to, kappa, batch[0]->mp_target->RTyp());
585
618
 
586
619
  // Copy multipoles into vectorized multipole
587
620
  for (int i = 0; i < batch.Size(); i++)
@@ -618,15 +651,18 @@ namespace ngsbem
618
651
  double r;
619
652
  int level;
620
653
  std::array<unique_ptr<Node>,8> childs;
621
- MultiPole<MPSingular, entry_type> mp;
654
+ SphericalExpansion<Singular, entry_type> mp;
622
655
 
623
656
  Array<tuple<Vec<3>, entry_type>> charges;
624
657
  Array<tuple<Vec<3>, Vec<3>, entry_type>> dipoles;
658
+ Array<tuple<Vec<3>, entry_type, Vec<3>, entry_type>> chargedipoles;
625
659
  Array<tuple<Vec<3>, Vec<3>, Complex,int>> currents;
626
660
 
627
661
  using simd_entry_type = decltype(MakeSimd(declval<std::array<entry_type,FMM_SW>>()));
628
662
  Array<tuple<Vec<3,SIMD<double,FMM_SW>>, simd_entry_type>> simd_charges;
629
663
  Array<tuple<Vec<3,SIMD<double,FMM_SW>>, Vec<3,SIMD<double,FMM_SW>>, simd_entry_type>> simd_dipoles;
664
+ Array<tuple<Vec<3,SIMD<double,FMM_SW>>, simd_entry_type,
665
+ Vec<3,SIMD<double,FMM_SW>>, simd_entry_type>> simd_chargedipoles;
630
666
 
631
667
  int total_sources;
632
668
  std::mutex node_mutex;
@@ -639,7 +675,15 @@ namespace ngsbem
639
675
  nodes_on_level[level]++;
640
676
  }
641
677
 
642
-
678
+ int GetChildNum (Vec<3> x) const
679
+ {
680
+ int childnum = 0;
681
+ if (x(0) > center(0)) childnum += 1;
682
+ if (x(1) > center(1)) childnum += 2;
683
+ if (x(2) > center(2)) childnum += 4;
684
+ return childnum;
685
+ }
686
+
643
687
  void CreateChilds()
644
688
  {
645
689
  if (childs[0]) throw Exception("have already childs");
@@ -655,15 +699,32 @@ namespace ngsbem
655
699
  }
656
700
 
657
701
 
702
+ void SendSourcesToChilds()
703
+ {
704
+ CreateChilds();
705
+
706
+ for (auto [x,c] : charges)
707
+ AddCharge (x,c);
708
+ for (auto [x,d,c] : dipoles)
709
+ AddDipole (x,d,c);
710
+ for (auto [x,c,d,c2] : chargedipoles)
711
+ AddChargeDipole (x,c,d,c2);
712
+ for (auto [sp,ep,j,num] : currents)
713
+ AddCurrent (sp,ep,j,num);
714
+
715
+ charges.DeleteAll();
716
+ dipoles.DeleteAll();
717
+ chargedipoles.DeleteAll();
718
+ currents.DeleteAll();
719
+ }
720
+
721
+
658
722
  void AddCharge (Vec<3> x, entry_type c)
659
723
  {
660
724
  if (have_childs) // quick check without locking
661
725
  {
662
726
  // directly send to childs:
663
- int childnum = 0;
664
- if (x(0) > center(0)) childnum += 1;
665
- if (x(1) > center(1)) childnum += 2;
666
- if (x(2) > center(2)) childnum += 4;
727
+ int childnum = GetChildNum(x);
667
728
  childs[childnum] -> AddCharge(x, c);
668
729
  return;
669
730
  }
@@ -672,36 +733,19 @@ namespace ngsbem
672
733
 
673
734
  if (have_childs) // test again after locking
674
735
  {
675
- // directly send to childs:
676
- int childnum = 0;
677
- if (x(0) > center(0)) childnum += 1;
678
- if (x(1) > center(1)) childnum += 2;
679
- if (x(2) > center(2)) childnum += 4;
736
+ int childnum = GetChildNum(x);
680
737
  childs[childnum] -> AddCharge(x, c);
681
738
  return;
682
739
  }
683
740
 
684
-
685
-
686
741
  charges.Append( tuple{x,c} );
687
742
 
688
743
  // if (r*mp.Kappa() < 1e-8) return;
689
744
  if (level > 20) return;
690
- if (charges.Size() < maxdirect && r*mp.Kappa() < 1)
745
+ if (charges.Size() < maxdirect && r*mp.Kappa() < 5)
691
746
  return;
692
-
693
- CreateChilds();
694
-
695
- for (auto [x,c] : charges)
696
- AddCharge (x,c);
697
- for (auto [x,d,c] : dipoles)
698
- AddDipole (x,d,c);
699
- for (auto [sp,ep,j,num] : currents)
700
- AddCurrent (sp,ep,j,num);
701
747
 
702
- charges.SetSize0();
703
- dipoles.SetSize0();
704
- currents.SetSize0();
748
+ SendSourcesToChilds();
705
749
  }
706
750
 
707
751
 
@@ -710,11 +754,7 @@ namespace ngsbem
710
754
  if (have_childs)
711
755
  {
712
756
  // directly send to childs:
713
-
714
- int childnum = 0;
715
- if (x(0) > center(0)) childnum += 1;
716
- if (x(1) > center(1)) childnum += 2;
717
- if (x(2) > center(2)) childnum += 4;
757
+ int childnum = GetChildNum(x);
718
758
  childs[childnum] -> AddDipole(x, d, c);
719
759
  return;
720
760
  }
@@ -724,37 +764,55 @@ namespace ngsbem
724
764
  if (have_childs)
725
765
  {
726
766
  // directly send to childs:
727
-
728
- int childnum = 0;
729
- if (x(0) > center(0)) childnum += 1;
730
- if (x(1) > center(1)) childnum += 2;
731
- if (x(2) > center(2)) childnum += 4;
767
+ int childnum = GetChildNum(x);
732
768
  childs[childnum] -> AddDipole(x, d, c);
733
769
  return;
734
770
  }
771
+
772
+ dipoles.Append (tuple{x,d,c});
773
+
774
+ if (level > 20) return;
775
+ if (dipoles.Size() < maxdirect)
776
+ return;
735
777
 
778
+ SendSourcesToChilds();
779
+ }
736
780
 
737
781
 
782
+ void AddChargeDipole (Vec<3> x, entry_type c, Vec<3> dir, entry_type c2)
783
+ {
784
+ if (have_childs)
785
+ {
786
+ // directly send to childs:
787
+ int childnum = GetChildNum(x);
788
+ childs[childnum] -> AddChargeDipole(x, c, dir, c2);
789
+ return;
790
+ }
791
+
792
+ lock_guard<mutex> guard(node_mutex);
793
+
794
+ if (have_childs)
795
+ {
796
+ // directly send to childs:
797
+ int childnum = GetChildNum(x);
798
+ childs[childnum] -> AddChargeDipole(x, c, dir, c2);
799
+ return;
800
+ }
738
801
 
739
- dipoles.Append (tuple{x,d,c});
802
+ chargedipoles.Append (tuple{x,c,dir,c2});
740
803
 
741
- if (dipoles.Size() < maxdirect || r < 1e-8)
804
+ if (chargedipoles.Size() < maxdirect || r < 1e-8)
742
805
  return;
743
-
744
- CreateChilds();
745
806
 
746
- for (auto [x,c] : charges)
747
- AddCharge (x,c);
748
- for (auto [x,d,c] : dipoles)
749
- AddDipole (x,d,c);
750
- for (auto [sp,ep,j,num] : currents)
751
- AddCurrent (sp,ep,j,num);
807
+ SendSourcesToChilds();
752
808
 
753
- charges.SetSize0();
754
- dipoles.SetSize0();
755
- currents.SetSize0();
809
+ /*
810
+ AddCharge (x, c);
811
+ AddDipole (x, dir, c2);
812
+ */
756
813
  }
757
814
 
815
+
758
816
  // not parallel yet
759
817
  void AddCurrent (Vec<3> sp, Vec<3> ep, Complex j, int num)
760
818
  {
@@ -764,7 +822,7 @@ namespace ngsbem
764
822
  Array<double> split;
765
823
  split.Append(0);
766
824
  for (int i = 0; i < 3; i++)
767
- if (sp(i) < center(i) != ep(i) < center(i))
825
+ if ((sp(i) < center(i)) != (ep(i) < center(i)))
768
826
  split += (center(i)-sp(i)) / (ep(i)-sp(i)); // segment cuts i-th coordinate plane
769
827
  split.Append(1);
770
828
  BubbleSort(split);
@@ -788,6 +846,12 @@ namespace ngsbem
788
846
 
789
847
  currents.Append (tuple{sp,ep,j,num});
790
848
 
849
+ // if (currents.Size() < maxdirect || r < 1e-8)
850
+ if (currents.Size() < 4 || r < 1e-8)
851
+ return;
852
+
853
+ SendSourcesToChilds();
854
+ /*
791
855
  // if (currents.Size() < maxdirect || r < 1e-8)
792
856
  if (currents.Size() < 4 || r < 1e-8)
793
857
  return;
@@ -804,6 +868,7 @@ namespace ngsbem
804
868
  charges.SetSize0();
805
869
  dipoles.SetSize0();
806
870
  currents.SetSize0();
871
+ */
807
872
  }
808
873
 
809
874
 
@@ -824,7 +889,7 @@ namespace ngsbem
824
889
  // t.AddFlops (charges.Size());
825
890
  if (simd_charges.Size())
826
891
  {
827
- // static Timer t("regmp, evaluate, simd charges"); RegionTimer r(t);
892
+ // static Timer t("mptool singmp, evaluate, simd charges"); RegionTimer r(t);
828
893
 
829
894
  simd_entry_type vsum{0.0};
830
895
  if (mp.Kappa() < 1e-12)
@@ -875,6 +940,8 @@ namespace ngsbem
875
940
 
876
941
  if (simd_dipoles.Size())
877
942
  {
943
+ // static Timer t("mptool singmp, evaluate, simd dipoles"); RegionTimer r(t);
944
+
878
945
  simd_entry_type vsum{0.0};
879
946
  for (auto [x,d,c] : simd_dipoles)
880
947
  {
@@ -901,6 +968,54 @@ namespace ngsbem
901
968
  }
902
969
  }
903
970
 
971
+
972
+
973
+ if (simd_chargedipoles.Size())
974
+ {
975
+ // static Timer t("mptool singmp, evaluate, simd chargedipoles"); RegionTimer r(t);
976
+ // t.AddFlops (simd_chargedipoles.Size()*FMM_SW);
977
+
978
+ simd_entry_type vsum{0.0};
979
+ for (auto [x,c,d,c2] : simd_chargedipoles)
980
+ {
981
+ auto rho = L2Norm(p-x);
982
+ auto rhokappa = rho*mp.Kappa();
983
+ auto invrho = If(rho>0.0, 1.0/rho, SIMD<double,FMM_SW>(0.0));
984
+ auto [si,co] = sincos(rhokappa);
985
+
986
+ auto kernelc = (1/(4*M_PI))*invrho*SIMD<Complex,FMM_SW>(co,si);
987
+ vsum += kernelc * c;
988
+
989
+ auto kernel =
990
+ invrho*invrho * InnerProduct(p-x, d) *
991
+ kernelc * SIMD<Complex,FMM_SW>(-1.0, rhokappa);
992
+
993
+ vsum += kernel * c2;
994
+ }
995
+ sum += HSum(vsum);
996
+ }
997
+ else
998
+ {
999
+ // static Timer t("mptool singmp, evaluate, chargedipoles"); RegionTimer r(t);
1000
+ // t.AddFlops (chargedipoles.Size());
1001
+
1002
+ for (auto [x,c,d,c2] : chargedipoles)
1003
+ if (double rho = L2Norm(p-x); rho > 0)
1004
+ {
1005
+ sum += (1/(4*M_PI))*exp(Complex(0,rho*mp.Kappa())) / rho * c;
1006
+
1007
+ Vec<3> drhodp = 1.0/rho * (p-x);
1008
+ Complex dGdrho = (1/(4*M_PI))*exp(Complex(0,rho*mp.Kappa())) *
1009
+ (Complex(0, mp.Kappa())/rho - 1.0/sqr(rho));
1010
+
1011
+ sum += dGdrho * InnerProduct(drhodp, d) * c2;
1012
+ }
1013
+ }
1014
+
1015
+
1016
+
1017
+
1018
+
904
1019
  for (auto [sp,ep,j,num] : currents)
905
1020
  {
906
1021
  // should use explizit formula instead ...
@@ -937,7 +1052,9 @@ namespace ngsbem
937
1052
  }
938
1053
 
939
1054
  if (dipoles.Size())
940
- throw Exception("EvaluateDeriv not implemented for dipoles in SingularMLMultiPole");
1055
+ throw Exception("EvaluateDeriv not implemented for dipoles in SingularMLExpansion");
1056
+ if (chargedipoles.Size())
1057
+ throw Exception("EvaluateDeriv not implemented for dipoles in SingularMLExpansion");
941
1058
 
942
1059
  for (auto [x,c] : charges)
943
1060
  if (double rho = L2Norm(p-x); rho > 0)
@@ -952,7 +1069,7 @@ namespace ngsbem
952
1069
 
953
1070
  void CalcTotalSources()
954
1071
  {
955
- total_sources = charges.Size() + dipoles.Size();
1072
+ total_sources = charges.Size() + dipoles.Size() + chargedipoles.Size();
956
1073
  for (auto & child : childs)
957
1074
  if (child)
958
1075
  {
@@ -985,9 +1102,9 @@ namespace ngsbem
985
1102
  }
986
1103
  else
987
1104
  {
988
- if (charges.Size()+dipoles.Size()+currents.Size() == 0)
1105
+ if (charges.Size()+dipoles.Size()+chargedipoles.Size()+currents.Size() == 0)
989
1106
  {
990
- mp = MultiPole<MPSingular,entry_type> (-1, mp.Kappa(), 1.);
1107
+ mp = SphericalExpansion<Singular,entry_type> (-1, mp.Kappa(), 1.);
991
1108
  return;
992
1109
  }
993
1110
 
@@ -1026,6 +1143,24 @@ namespace ngsbem
1026
1143
  simd_dipoles[ii] = MakeSimd(di);
1027
1144
  }
1028
1145
 
1146
+
1147
+ simd_chargedipoles.SetSize( (chargedipoles.Size()+FMM_SW-1)/FMM_SW);
1148
+ i = 0, ii = 0;
1149
+ for ( ; i+FMM_SW <= chargedipoles.Size(); i+=FMM_SW, ii++)
1150
+ {
1151
+ std::array<tuple<Vec<3>,entry_type,Vec<3>,entry_type>, FMM_SW> di;
1152
+ for (int j = 0; j < FMM_SW; j++) di[j] = chargedipoles[i+j];
1153
+ simd_chargedipoles[ii] = MakeSimd(di);
1154
+ }
1155
+ if (i < chargedipoles.Size())
1156
+ {
1157
+ std::array<tuple<Vec<3>,entry_type,Vec<3>,entry_type>, FMM_SW> di;
1158
+ int j = 0;
1159
+ for ( ; i+j < chargedipoles.Size(); j++) di[j] = chargedipoles[i+j];
1160
+ for ( ; j < FMM_SW; j++) di[j] = tuple( get<0>(di[0]), entry_type{0.0}, get<2>(di[0]), entry_type{0.0} );
1161
+ simd_chargedipoles[ii] = MakeSimd(di);
1162
+ }
1163
+
1029
1164
 
1030
1165
  if (nodes_to_process)
1031
1166
  *nodes_to_process += this;
@@ -1036,6 +1171,9 @@ namespace ngsbem
1036
1171
  for (auto [x,d,c] : dipoles)
1037
1172
  mp.AddDipole (x-center, d, c);
1038
1173
 
1174
+ for (auto [x,c,d,c2] : chargedipoles)
1175
+ mp.AddChargeDipole (x-center, c, d, c2);
1176
+
1039
1177
  for (auto [sp,ep,j,num] : currents)
1040
1178
  mp.AddCurrent (sp-center, ep-center, j, num);
1041
1179
  }
@@ -1044,7 +1182,7 @@ namespace ngsbem
1044
1182
 
1045
1183
  entry_type EvaluateMP(Vec<3> p) const
1046
1184
  {
1047
- if (charges.Size() || dipoles.Size())
1185
+ if (charges.Size() || dipoles.Size() || chargedipoles.Size())
1048
1186
  return Evaluate(p);
1049
1187
 
1050
1188
  if (L2Norm(p-center) > 3*r)
@@ -1064,7 +1202,7 @@ namespace ngsbem
1064
1202
  // cout << "EvaluateMPDeriv Singular, p = " << p << ", d = " << d << ", r = " << r << ", center = " << center << endl;
1065
1203
  // cout << "Norm: " << L2Norm(p-center) << " > " << 3*r << endl;
1066
1204
  // cout << "charges.Size() = " << charges.Size() << ", dipoles.Size() = " << dipoles.Size() << endl;
1067
- if (charges.Size() || dipoles.Size() || !childs[0])
1205
+ if (charges.Size() || dipoles.Size() || chargedipoles.Size() || !childs[0])
1068
1206
  return EvaluateDeriv(p, d);
1069
1207
 
1070
1208
  if (L2Norm(p-center) > 3*r)
@@ -1087,6 +1225,8 @@ namespace ngsbem
1087
1225
  ost << "xi = " << x << ", ci = " << c << endl;
1088
1226
  for (auto [x,d,c] : dipoles)
1089
1227
  ost << "xi = " << x << ", di = " << d << ", ci = " << c << endl;
1228
+ for (auto [x,c,d,c2] : chargedipoles)
1229
+ ost << "xi = " << x << ", c = " << c << ", di = " << d << ", ci = " << c2 << endl;
1090
1230
 
1091
1231
  for (int i = 0; i < 8; i++)
1092
1232
  if (childs[i]) childs[i] -> Print (ost, i);
@@ -1109,13 +1249,21 @@ namespace ngsbem
1109
1249
  num += ch->NumCoefficients();
1110
1250
  return num;
1111
1251
  }
1252
+
1253
+ void TraverseTree (const std::function<void(Node&)> & func)
1254
+ {
1255
+ func(*this);
1256
+ for (auto & child : childs)
1257
+ if (child)
1258
+ child->TraverseTree(func);
1259
+ }
1112
1260
  };
1113
1261
 
1114
1262
  Node root;
1115
1263
  bool havemp = false;
1116
1264
 
1117
1265
  public:
1118
- SingularMLMultiPole (Vec<3> center, double r, double kappa)
1266
+ SingularMLExpansion (Vec<3> center, double r, double kappa)
1119
1267
  : root(center, r, 0, kappa)
1120
1268
  {
1121
1269
  nodes_on_level = 0;
@@ -1134,6 +1282,11 @@ namespace ngsbem
1134
1282
  root.AddDipole(x, d, c);
1135
1283
  }
1136
1284
 
1285
+ void AddChargeDipole(Vec<3> x, entry_type c, Vec<3> dir, entry_type c2)
1286
+ {
1287
+ root.AddChargeDipole(x, c, dir, c2);
1288
+ }
1289
+
1137
1290
  void AddCurrent (Vec<3> sp, Vec<3> ep, Complex j, int num)
1138
1291
  {
1139
1292
  if constexpr (!std::is_same<entry_type, Vec<3,Complex>>())
@@ -1193,87 +1346,93 @@ namespace ngsbem
1193
1346
 
1194
1347
  root.CalcTotalSources();
1195
1348
 
1196
- if (false)
1349
+ if constexpr (false)
1197
1350
  // direct evaluation of S->S
1198
1351
  root.CalcMP(nullptr, nullptr);
1199
1352
  else
1200
1353
  {
1201
1354
 
1202
- Array<RecordingSS> recording;
1203
- Array<Node*> nodes_to_process;
1355
+ Array<RecordingSS> recording;
1356
+ Array<Node*> nodes_to_process;
1204
1357
 
1205
- {
1206
- RegionTimer reg(trec);
1207
- root.CalcMP(&recording, &nodes_to_process);
1208
- }
1358
+ {
1359
+ RegionTimer reg(trec);
1360
+ root.CalcMP(&recording, &nodes_to_process);
1361
+ }
1209
1362
 
1210
- {
1211
- RegionTimer rs2mp(ts2mp);
1212
- ParallelFor(nodes_to_process.Size(), [&](int i){
1213
- auto node = nodes_to_process[i];
1214
- for (auto [x,c]: node->charges)
1215
- node->mp.AddCharge(x-node->center, c);
1216
- for (auto [x,d,c]: node->dipoles)
1217
- node->mp.AddDipole(x-node->center, d, c);
1218
- for (auto [sp,ep,j,num]: node->currents)
1219
- node->mp.AddCurrent(sp-node->center, ep-node->center, j, num);
1220
- }, TasksPerThread(4));
1221
- }
1222
-
1223
- {
1224
- RegionTimer reg(tsort);
1225
- QuickSort (recording, [] (auto & a, auto & b)
1226
- {
1227
- if (a.len < (1-1e-8) * b.len) return true;
1228
- if (a.len > (1+1e-8) * b.len) return false;
1229
- return a.theta < b.theta;
1230
- });
1231
- }
1363
+ {
1364
+ RegionTimer rs2mp(ts2mp);
1365
+ ParallelFor(nodes_to_process.Size(), [&](int i)
1366
+ {
1367
+ auto node = nodes_to_process[i];
1368
+ for (auto [x,c]: node->charges)
1369
+ node->mp.AddCharge(x-node->center, c);
1370
+ for (auto [x,d,c]: node->dipoles)
1371
+ node->mp.AddDipole(x-node->center, d, c);
1372
+ for (auto [x,c,d,c2]: node->chargedipoles)
1373
+ node->mp.AddChargeDipole(x-node->center, c, d, c2);
1374
+ for (auto [sp,ep,j,num]: node->currents)
1375
+ node->mp.AddCurrent(sp-node->center, ep-node->center, j, num);
1376
+ }, TasksPerThread(4));
1377
+ }
1378
+
1379
+ {
1380
+ RegionTimer reg(tsort);
1381
+ QuickSort (recording, [] (auto & a, auto & b)
1382
+ {
1383
+ if (a.len < (1-1e-8) * b.len) return true;
1384
+ if (a.len > (1+1e-8) * b.len) return false;
1385
+ return a.theta < b.theta;
1386
+ });
1387
+ }
1232
1388
 
1233
- double current_len = -1e100;
1234
- double current_theta = -1e100;
1235
- Array<RecordingSS*> current_batch;
1236
- Array<Array<RecordingSS*>> batch_group;
1237
- Array<double> group_lengths;
1238
- Array<double> group_thetas;
1239
- for (auto & record : recording)
1240
- {
1241
- bool len_changed = fabs(record.len - current_len) > 1e-8;
1242
- bool theta_changed = fabs(record.theta - current_theta) > 1e-8;
1243
- if ((len_changed || theta_changed) && current_batch.Size() > 0) {
1389
+ double current_len = -1e100;
1390
+ double current_theta = -1e100;
1391
+ Array<RecordingSS*> current_batch;
1392
+ Array<Array<RecordingSS*>> batch_group;
1393
+ Array<double> group_lengths;
1394
+ Array<double> group_thetas;
1395
+ for (auto & record : recording)
1396
+ {
1397
+ bool len_changed = fabs(record.len - current_len) > 1e-8;
1398
+ bool theta_changed = fabs(record.theta - current_theta) > 1e-8;
1399
+ if ((len_changed || theta_changed) && current_batch.Size() > 0) {
1400
+ batch_group.Append(current_batch);
1401
+ group_lengths.Append(current_len);
1402
+ group_thetas.Append(current_theta);
1403
+ current_batch.SetSize(0);
1404
+ }
1405
+
1406
+ current_len = record.len;
1407
+ current_theta = record.theta;
1408
+ current_batch.Append(&record);
1409
+ }
1410
+
1411
+ if (current_batch.Size() > 0) {
1244
1412
  batch_group.Append(current_batch);
1245
1413
  group_lengths.Append(current_len);
1246
1414
  group_thetas.Append(current_theta);
1247
- current_batch.SetSize(0);
1248
- }
1415
+ }
1249
1416
 
1250
- current_len = record.len;
1251
- current_theta = record.theta;
1252
- current_batch.Append(&record);
1417
+ {
1418
+ RegionTimer rS2S(tS2S);
1419
+ // ParallelFor(batch_group.Size(), [&](int i) {
1420
+ for (int i = 0; i < batch_group.Size(); i++){
1421
+ // *testout << "Processing batch " << i << " of size " << batch_group[i].Size() << ", with len = " << group_lengths[i] << ", theta = " << group_thetas[i] << endl;
1422
+ int chunk_size = 24;
1423
+ if (batch_group[i].Size() < chunk_size)
1424
+ ProcessBatchSS(batch_group[i], group_lengths[i], group_thetas[i]);
1425
+ else
1426
+ ParallelForRange(IntRange(batch_group[i].Size()), [&](IntRange range) {
1427
+ auto sub_batch = batch_group[i].Range(range.First(), range.Next());
1428
+ ProcessBatchSS(sub_batch, group_lengths[i], group_thetas[i]);
1429
+ }, TasksPerThread(4));
1430
+ }
1431
+ }
1253
1432
  }
1254
- if (current_batch.Size() > 0) {
1255
- batch_group.Append(current_batch);
1256
- group_lengths.Append(current_len);
1257
- group_thetas.Append(current_theta);
1258
- }
1259
1433
 
1260
- {
1261
- RegionTimer rS2S(tS2S);
1262
- // ParallelFor(batch_group.Size(), [&](int i) {
1263
- for (int i = 0; i < batch_group.Size(); i++){
1264
- // *testout << "Processing batch " << i << " of size " << batch_group[i].Size() << ", with len = " << group_lengths[i] << ", theta = " << group_thetas[i] << endl;
1265
- int chunk_size = 24;
1266
- if (batch_group[i].Size() < chunk_size)
1267
- ProcessBatch(batch_group[i], group_lengths[i], group_thetas[i]);
1268
- else
1269
- ParallelForRange(IntRange(batch_group[i].Size()), [&](IntRange range) {
1270
- auto sub_batch = batch_group[i].Range(range.First(), range.Next());
1271
- ProcessBatch(sub_batch, group_lengths[i], group_thetas[i]);
1272
- }, TasksPerThread(4));
1273
- }
1274
- }
1275
- }
1276
-
1434
+ // cout << "have singular:" << endl;
1435
+ // PrintStatistics (cout);
1277
1436
  havemp = true;
1278
1437
  }
1279
1438
 
@@ -1285,35 +1444,73 @@ namespace ngsbem
1285
1444
  return root.Evaluate(p);
1286
1445
  }
1287
1446
 
1447
+
1448
+ void PrintStatistics (ostream & ost)
1449
+ {
1450
+ int levels = 0;
1451
+ int cnt = 0;
1452
+ root.TraverseTree( [&](Node & node) {
1453
+ levels = max(levels, node.level);
1454
+ cnt++;
1455
+ });
1456
+ ost << "levels: " << levels << endl;
1457
+ ost << "nodes: " << cnt << endl;
1458
+
1459
+ Array<int> num_on_level(levels+1);
1460
+ Array<int> order_on_level(levels+1);
1461
+ Array<size_t> coefs_on_level(levels+1);
1462
+ num_on_level = 0;
1463
+ order_on_level = 0;
1464
+ root.TraverseTree( [&](Node & node) {
1465
+ num_on_level[node.level]++;
1466
+ order_on_level[node.level] = max(order_on_level[node.level],node.mp.Order());
1467
+ coefs_on_level[node.level] += node.mp.SH().Coefs().Size();
1468
+ });
1469
+
1470
+ cout << "num on level" << endl;
1471
+ for (int i = 0; i < num_on_level.Size(); i++)
1472
+ cout << i << ": " << num_on_level[i] << ", order = " << order_on_level[i] << ", coefs " << coefs_on_level[i] << endl;
1473
+
1474
+ size_t totcoefs = 0;
1475
+ for (auto n : coefs_on_level)
1476
+ totcoefs += n;
1477
+ cout << "total mem in coefs: " << sizeof(entry_type)*totcoefs / sqr(1024) << " MB" << endl;
1478
+ }
1479
+
1480
+
1481
+
1288
1482
  template <typename entry_type2>
1289
- friend class RegularMLMultiPole;
1483
+ friend class RegularMLExpansion;
1290
1484
  };
1291
1485
 
1292
1486
 
1293
1487
  template <typename entry_type>
1294
- inline ostream & operator<< (ostream & ost, const SingularMLMultiPole<entry_type> & mlmp)
1488
+ inline ostream & operator<< (ostream & ost, const SingularMLExpansion<entry_type> & mlmp)
1295
1489
  {
1296
1490
  mlmp.Print(ost);
1297
1491
  return ost;
1298
1492
  }
1299
1493
 
1300
1494
 
1495
+ // *********************************** Regular multilevel Expansion
1496
+
1497
+
1301
1498
  template <typename elem_type=Complex>
1302
- class NGS_DLL_HEADER RegularMLMultiPole
1499
+ class NGS_DLL_HEADER RegularMLExpansion
1303
1500
  {
1304
1501
  static Array<size_t> nodes_on_level;
1305
1502
 
1306
1503
 
1307
1504
  struct RecordingRS
1308
1505
  {
1309
- const MultiPole<MPSingular,elem_type> * mpS;
1310
- MultiPole<MPRegular,elem_type> * mpR;
1506
+ const SphericalExpansion<Singular,elem_type> * mpS;
1507
+ SphericalExpansion<Regular,elem_type> * mpR;
1311
1508
  Vec<3> dist;
1312
1509
  double len, theta, phi;
1313
1510
  public:
1314
1511
  RecordingRS() = default;
1315
- RecordingRS (const MultiPole<MPSingular,elem_type> * ampS,
1316
- MultiPole<MPRegular,elem_type> * ampR,
1512
+ RecordingRS (const SphericalExpansion<Singular,elem_type> * ampS,
1513
+ SphericalExpansion<Regular,elem_type> * ampR,
1317
1514
  Vec<3> adist)
1318
1515
  : mpS(ampS), mpR(ampR), dist(adist)
1319
1516
  {
@@ -1334,28 +1531,28 @@ namespace ngsbem
1334
1531
  }
1335
1532
  }
1336
1533
  else if (N <= 3) {
1337
- ProcessVectorizedBatch<3, vec_length>(batch, len, theta);
1534
+ ProcessVectorizedBatchRS<3, vec_length>(batch, len, theta);
1338
1535
  }
1339
1536
  else if (N <= 4) {
1340
- ProcessVectorizedBatch<4, vec_length>(batch, len, theta);
1537
+ ProcessVectorizedBatchRS<4, vec_length>(batch, len, theta);
1341
1538
  }
1342
1539
  else if (N <= 6) {
1343
- ProcessVectorizedBatch<6, vec_length>(batch, len, theta);
1540
+ ProcessVectorizedBatchRS<6, vec_length>(batch, len, theta);
1344
1541
  }
1345
1542
  else if (N <= 12) {
1346
- ProcessVectorizedBatch<12, vec_length>(batch, len, theta);
1543
+ ProcessVectorizedBatchRS<12, vec_length>(batch, len, theta);
1347
1544
  }
1348
1545
  else if (N <= 24) {
1349
- ProcessVectorizedBatch<24, vec_length>(batch, len, theta);
1546
+ ProcessVectorizedBatchRS<24, vec_length>(batch, len, theta);
1350
1547
  }
1351
1548
  else if (N <= 48) {
1352
- ProcessVectorizedBatch<48, vec_length>(batch, len, theta);
1549
+ ProcessVectorizedBatchRS<48, vec_length>(batch, len, theta);
1353
1550
  }
1354
1551
  else if (N <= 96) {
1355
- ProcessVectorizedBatch<96, vec_length>(batch, len, theta);
1552
+ ProcessVectorizedBatchRS<96, vec_length>(batch, len, theta);
1356
1553
  }
1357
1554
  else if (N <= 192) {
1358
- ProcessVectorizedBatch<192, vec_length>(batch, len, theta);
1555
+ ProcessVectorizedBatchRS<192, vec_length>(batch, len, theta);
1359
1556
  }
1360
1557
  else {
1361
1558
  // Split large batches
@@ -1387,7 +1584,7 @@ namespace ngsbem
1387
1584
 
1388
1585
 
1389
1586
  template<int N, int vec_length>
1390
- static void ProcessVectorizedBatch(FlatArray<RecordingRS*> batch, double len, double theta) {
1587
+ static void ProcessVectorizedBatchRS(FlatArray<RecordingRS*> batch, double len, double theta) {
1391
1588
 
1392
1589
  // static Timer t("ProcessVectorizedBatch, N = "+ToString(N) + ", vec_len = " + ToString(vec_length));
1393
1590
  // RegionTimer reg(t, batch[0]->mpS->SH().Order());
@@ -1395,10 +1592,10 @@ namespace ngsbem
1395
1592
  // static Timer tfrombatch("mptools - copy from batch 2");
1396
1593
 
1397
1594
  // *testout << "Processing vectorized batch of size " << batch.Size() << ", with N = " << N << ", vec_length = " << vec_length << ", len = " << len << ", theta = " << theta << endl;
1398
- MultiPole<MPSingular, Vec<N,Complex>> vec_source(batch[0]->mpS->Order(), batch[0]->mpS->Kappa(), batch[0]->mpS->RTyp());
1399
- // MultiPole<MPSingular, elem_type> tmp_source{*batch[0]->mpS};
1400
- MultiPole<MPRegular, elem_type> tmp_target{*batch[0]->mpR};
1401
- MultiPole<MPRegular, Vec<N,Complex>> vec_target(batch[0]->mpR->Order(), batch[0]->mpR->Kappa(), batch[0]->mpR->RTyp());
1595
+ SphericalExpansion<Singular, Vec<N,Complex>> vec_source(batch[0]->mpS->Order(), batch[0]->mpS->Kappa(), batch[0]->mpS->RTyp());
1596
+ // SphericalExpansion<Singular, elem_type> tmp_source{*batch[0]->mpS};
1597
+ SphericalExpansion<Regular, elem_type> tmp_target{*batch[0]->mpR};
1598
+ SphericalExpansion<Regular, Vec<N,Complex>> vec_target(batch[0]->mpR->Order(), batch[0]->mpR->Kappa(), batch[0]->mpR->RTyp());
1402
1599
 
1403
1600
  // Copy multipoles into vectorized multipole
1404
1601
  // ttobatch.Start();
@@ -1446,24 +1643,32 @@ namespace ngsbem
1446
1643
  double r;
1447
1644
  int level;
1448
1645
  std::array<unique_ptr<Node>,8> childs;
1449
- MultiPole<MPRegular,elem_type> mp;
1646
+ SphericalExpansion<Regular,elem_type> mp;
1450
1647
  Array<Vec<3>> targets;
1648
+ Array<tuple<Vec<3>,double>> vol_targets;
1451
1649
  int total_targets;
1452
1650
  std::mutex node_mutex;
1453
1651
  atomic<bool> have_childs{false};
1454
1652
 
1455
- Array<const typename SingularMLMultiPole<elem_type>::Node*> singnodes;
1653
+ Array<const typename SingularMLExpansion<elem_type>::Node*> singnodes;
1456
1654
 
1457
1655
  Node (Vec<3> acenter, double ar, int alevel, double kappa)
1458
- : center(acenter), r(ar), level(alevel), mp(MPOrder(ar*kappa), kappa, ar) // 1.0/min(1.0, 0.25*r*kappa))
1656
+ : center(acenter), r(ar), level(alevel),
1657
+ // mp(MPOrder(ar*kappa), kappa, ar) // 1.0/min(1.0, 0.25*r*kappa))
1658
+ mp(-1, kappa, ar)
1459
1659
  // : center(acenter), r(ar), level(alevel), mp(MPOrder(ar*kappa), kappa, 1.0)
1460
1660
  {
1461
1661
  if (level < nodes_on_level.Size())
1462
1662
  nodes_on_level[level]++;
1463
1663
  }
1464
1664
 
1465
-
1466
- void CreateChilds()
1665
+ void Allocate()
1666
+ {
1667
+ mp = SphericalExpansion<Regular,elem_type>(MPOrder(r*mp.Kappa()), mp.Kappa(), r);
1668
+ }
1669
+
1670
+
1671
+ void CreateChilds(bool allocate = false)
1467
1672
  {
1468
1673
  if (childs[0]) throw Exception("have already childs");
1469
1674
  // create children nodes:
@@ -1474,11 +1679,13 @@ namespace ngsbem
1474
1679
  cc(1) += (i&2) ? r/2 : -r/2;
1475
1680
  cc(2) += (i&4) ? r/2 : -r/2;
1476
1681
  childs[i] = make_unique<Node> (cc, r/2, level+1, mp.Kappa());
1682
+ if (allocate)
1683
+ childs[i] -> Allocate();
1477
1684
  }
1478
1685
  have_childs = true;
1479
1686
  }
1480
1687
 
1481
- void AddSingularNode (const typename SingularMLMultiPole<elem_type>::Node & singnode, bool allow_refine,
1688
+ void AddSingularNode (const typename SingularMLExpansion<elem_type>::Node & singnode, bool allow_refine,
1482
1689
  Array<RecordingRS> * recording)
1483
1690
  {
1484
1691
  if (mp.SH().Order() < 0) return;
@@ -1526,7 +1733,7 @@ namespace ngsbem
1526
1733
  if (allow_refine)
1527
1734
  {
1528
1735
  if (!childs[0])
1529
- CreateChilds();
1736
+ CreateChilds(true);
1530
1737
 
1531
1738
  for (auto & ch : childs)
1532
1739
  ch -> AddSingularNode (singnode, allow_refine, recording);
@@ -1546,7 +1753,7 @@ namespace ngsbem
1546
1753
  childs[nr] -> AddSingularNode (singnode, allow_refine, recording);
1547
1754
  });
1548
1755
 
1549
- if (targets.Size())
1756
+ if (targets.Size()+vol_targets.Size())
1550
1757
  singnodes.Append(&singnode);
1551
1758
  }
1552
1759
  }
@@ -1561,7 +1768,7 @@ namespace ngsbem
1561
1768
  {
1562
1769
  if (allow_refine)
1563
1770
  if (mp.Order() > 30 && !childs[0])
1564
- CreateChilds();
1771
+ CreateChilds(allow_refine);
1565
1772
 
1566
1773
  if (childs[0])
1567
1774
  {
@@ -1581,7 +1788,7 @@ namespace ngsbem
1581
1788
  mp.TransformAdd (childs[nr]->mp, childs[nr]->center-center);
1582
1789
  childs[nr]->LocalizeExpansion(allow_refine);
1583
1790
  });
1584
- mp = MultiPole<MPRegular,elem_type>(-1, mp.Kappa(), 1.);
1791
+ mp = SphericalExpansion<Regular,elem_type>(-1, mp.Kappa(), 1.);
1585
1792
  //mp.SH().Coefs()=0.0;
1586
1793
  }
1587
1794
  }
@@ -1597,12 +1804,16 @@ namespace ngsbem
1597
1804
  if (childs[childnum])
1598
1805
  sum = childs[childnum]->Evaluate(p);
1599
1806
  else
1600
- sum = mp.Eval(p-center);
1601
-
1602
- // static Timer t("regmp, evaluate, singnode"); RegionTimer r(t);
1603
- for (auto sn : singnodes)
1604
- sum += sn->EvaluateMP(p);
1807
+ {
1808
+ // static Timer t("mptool regmp, evaluate reg"); RegionTimer r(t);
1809
+ sum = mp.Eval(p-center);
1810
+ }
1605
1811
 
1812
+ {
1813
+ // static Timer t("mptool regmp, evaluate, singnode"); RegionTimer r(t);
1814
+ for (auto sn : singnodes)
1815
+ sum += sn->EvaluateMP(p);
1816
+ }
1606
1817
  return sum;
1607
1818
  }
1608
1819
 
@@ -1628,6 +1839,14 @@ namespace ngsbem
1628
1839
  return sum;
1629
1840
  }
1630
1841
 
1842
+ void TraverseTree (const std::function<void(Node&)> & func)
1843
+ {
1844
+ func(*this);
1845
+ for (auto & child : childs)
1846
+ if (child)
1847
+ child->TraverseTree(func);
1848
+ }
1849
+
1631
1850
  double Norm() const
1632
1851
  {
1633
1852
  double norm = L2Norm(mp.SH().Coefs());
@@ -1645,17 +1864,23 @@ namespace ngsbem
1645
1864
  num += ch->NumCoefficients();
1646
1865
  return num;
1647
1866
  }
1648
-
1867
+
1868
+ int GetChildNum (Vec<3> x) const
1869
+ {
1870
+ int childnum = 0;
1871
+ if (x(0) > center(0)) childnum += 1;
1872
+ if (x(1) > center(1)) childnum += 2;
1873
+ if (x(2) > center(2)) childnum += 4;
1874
+ return childnum;
1875
+ }
1876
+
1649
1877
  void AddTarget (Vec<3> x)
1650
1878
  {
1651
1879
  // if (childs[0])
1652
1880
  if (have_childs) // quick check without locking
1653
1881
  {
1654
1882
  // directly send to childs:
1655
- int childnum = 0;
1656
- if (x(0) > center(0)) childnum += 1;
1657
- if (x(1) > center(1)) childnum += 2;
1658
- if (x(2) > center(2)) childnum += 4;
1883
+ int childnum = GetChildNum(x);
1659
1884
  childs[childnum] -> AddTarget( x );
1660
1885
  return;
1661
1886
  }
@@ -1665,32 +1890,74 @@ namespace ngsbem
1665
1890
  if (have_childs) // test again after locking
1666
1891
  {
1667
1892
  // directly send to childs:
1668
- int childnum = 0;
1669
- if (x(0) > center(0)) childnum += 1;
1670
- if (x(1) > center(1)) childnum += 2;
1671
- if (x(2) > center(2)) childnum += 4;
1893
+ int childnum = GetChildNum(x);
1672
1894
  childs[childnum] -> AddTarget(x);
1673
1895
  return;
1674
1896
  }
1675
1897
 
1676
-
1677
1898
  targets.Append( x );
1678
1899
 
1679
1900
  // if (r*mp.Kappa() < 1e-8) return;
1680
1901
  if (level > 20) return;
1681
- if (targets.Size() < maxdirect && r*mp.Kappa() < 1)
1902
+ if (targets.Size() < maxdirect && r*mp.Kappa() < 5)
1903
+ return;
1904
+
1905
+ CreateChilds();
1906
+
1907
+ for (auto t : targets)
1908
+ AddTarget (t);
1909
+ for (auto [x,r] : vol_targets)
1910
+ AddVolumeTarget (x,r);
1911
+
1912
+ targets.SetSize0();
1913
+ vol_targets.SetSize0();
1914
+ }
1915
+
1916
+
1917
+ void AddVolumeTarget (Vec<3> x, double tr)
1918
+ {
1919
+ if (MaxNorm(x-center) > r+tr) return;
1920
+
1921
+ if (have_childs)
1922
+ {
1923
+ for (auto & child : childs)
1924
+ child->AddVolumeTarget(x, tr);
1925
+ return;
1926
+ }
1927
+
1928
+
1929
+ lock_guard<mutex> guard(node_mutex);
1930
+
1931
+ if (have_childs)
1932
+ {
1933
+ for (auto & child : childs)
1934
+ child->AddVolumeTarget(x, tr);
1935
+ return;
1936
+ }
1937
+
1938
+
1939
+ vol_targets.Append (tuple(x,tr));
1940
+
1941
+ if (level > 20) return;
1942
+ if (vol_targets.Size() < maxdirect && (r*mp.Kappa() < 5))
1682
1943
  return;
1683
1944
 
1684
1945
  CreateChilds();
1685
1946
 
1686
1947
  for (auto t : targets)
1687
1948
  AddTarget (t);
1949
+ for (auto [x,r] : vol_targets)
1950
+ AddVolumeTarget (x,r);
1951
+
1688
1952
  targets.SetSize0();
1953
+ vol_targets.SetSize0();
1689
1954
  }
1690
1955
 
1956
+
1957
+
1691
1958
  void CalcTotalTargets()
1692
1959
  {
1693
- total_targets = targets.Size();
1960
+ total_targets = targets.Size() + vol_targets.Size();
1694
1961
  for (auto & child : childs)
1695
1962
  if (child)
1696
1963
  {
@@ -1710,8 +1977,21 @@ namespace ngsbem
1710
1977
  }
1711
1978
 
1712
1979
  if (total_targets == 0)
1713
- mp = MultiPole<MPRegular,elem_type>(-1, mp.Kappa(),1.);
1980
+ mp = SphericalExpansion<Regular,elem_type>(-1, mp.Kappa(),1.);
1981
+ }
1982
+
1983
+ void AllocateMemory()
1984
+ {
1985
+ for (auto & child : childs)
1986
+ if (child)
1987
+ child->AllocateMemory();
1988
+
1989
+ if (total_targets > 0)
1990
+ Allocate();
1991
+ // mp = SphericalExpansion<Regular,elem_type>(MPOrder(r*mp.Kappa()), mp.Kappa(), r); // -1, mp.Kappa(),1.);
1714
1992
  }
1993
+
1994
+
1715
1995
 
1716
1996
 
1717
1997
  void Print (ostream & ost, size_t childnr = -1) const
@@ -1730,14 +2010,15 @@ namespace ngsbem
1730
2010
  };
1731
2011
 
1732
2012
  Node root;
1733
- shared_ptr<SingularMLMultiPole<elem_type>> singmp;
2013
+ shared_ptr<SingularMLExpansion<elem_type>> singmp;
1734
2014
 
1735
2015
  public:
1736
- RegularMLMultiPole (shared_ptr<SingularMLMultiPole<elem_type>> asingmp, Vec<3> center, double r)
2016
+ RegularMLExpansion (shared_ptr<SingularMLExpansion<elem_type>> asingmp, Vec<3> center, double r)
1737
2017
  : root(center, r, 0, asingmp->Kappa()), singmp(asingmp)
1738
2018
  {
1739
2019
  if (!singmp->havemp) throw Exception("first call Calc for singular MP");
1740
-
2020
+ root.Allocate();
2021
+
1741
2022
  nodes_on_level = 0;
1742
2023
  nodes_on_level[0] = 1;
1743
2024
  {
@@ -1763,7 +2044,7 @@ namespace ngsbem
1763
2044
  }
1764
2045
  }
1765
2046
 
1766
- RegularMLMultiPole (Vec<3> center, double r, double kappa)
2047
+ RegularMLExpansion (Vec<3> center, double r, double kappa)
1767
2048
  : root(center, r, 0, kappa)
1768
2049
  {
1769
2050
  nodes_on_level = 0;
@@ -1775,87 +2056,151 @@ namespace ngsbem
1775
2056
  root.AddTarget (t);
1776
2057
  }
1777
2058
 
1778
- void CalcMP(shared_ptr<SingularMLMultiPole<elem_type>> asingmp, bool onlytargets = true)
2059
+ void AddVolumeTarget (Vec<3> t, double r)
2060
+ {
2061
+ root.AddVolumeTarget (t, r);
2062
+ }
2063
+
2064
+ void CalcMP(shared_ptr<SingularMLExpansion<elem_type>> asingmp, bool onlytargets = true)
1779
2065
  {
1780
2066
  static Timer t("mptool regular MLMP"); RegionTimer rg(t);
2067
+ static Timer tremove("removeempty");
1781
2068
  static Timer trec("mptool regular MLMP - recording");
1782
2069
  static Timer tsort("mptool regular MLMP - sort");
1783
2070
 
1784
2071
  singmp = asingmp;
1785
2072
 
2073
+
1786
2074
  root.CalcTotalTargets();
2075
+ // cout << "before remove empty trees:" << endl;
2076
+ // PrintStatistics(cout);
2077
+
2078
+ /*
2079
+ tremove.Start();
1787
2080
  if (onlytargets)
1788
2081
  root.RemoveEmptyTrees();
1789
-
2082
+ tremove.Stop();
2083
+ */
1790
2084
 
1791
- // root.AddSingularNode(singmp->root, !onlytargets, nullptr);
2085
+ root.AllocateMemory();
1792
2086
 
1793
- // /*
1794
- Array<RecordingRS> recording;
1795
- {
1796
- RegionTimer rrec(trec);
1797
- root.AddSingularNode(singmp->root, !onlytargets, &recording);
1798
- }
1799
-
1800
- // cout << "recorded: " << recording.Size() << endl;
1801
- {
1802
- RegionTimer reg(tsort);
1803
- QuickSort (recording, [] (auto & a, auto & b)
1804
- {
1805
- if (a.len < (1-1e-8) * b.len) return true;
1806
- if (a.len > (1+1e-8) * b.len) return false;
1807
- return a.theta < b.theta;
1808
- });
1809
- }
1810
-
1811
- double current_len = -1e100;
1812
- double current_theta = -1e100;
1813
- Array<RecordingRS*> current_batch;
1814
- Array<Array<RecordingRS*>> batch_group;
1815
- Array<double> group_lengths;
1816
- Array<double> group_thetas;
1817
- for (auto & record : recording)
2087
+ // cout << "after allocating regular:" << endl;
2088
+ // PrintStatistics(cout);
2089
+
2090
+ // cout << "starting S-R converion" << endl;
2091
+ // PrintStatistics(cout);
2092
+
2093
+
2094
+ if constexpr (false)
1818
2095
  {
1819
- bool len_changed = fabs(record.len - current_len) > 1e-8;
1820
- bool theta_changed = fabs(record.theta - current_theta) > 1e-8;
1821
- if ((len_changed || theta_changed) && current_batch.Size() > 0) {
2096
+ root.AddSingularNode(singmp->root, !onlytargets, nullptr);
2097
+ }
2098
+ else
2099
+ { // use recording
2100
+ Array<RecordingRS> recording;
2101
+ {
2102
+ RegionTimer rrec(trec);
2103
+ root.AddSingularNode(singmp->root, !onlytargets, &recording);
2104
+ }
2105
+
2106
+ // cout << "recorded: " << recording.Size() << endl;
2107
+ {
2108
+ RegionTimer reg(tsort);
2109
+ QuickSort (recording, [] (auto & a, auto & b)
2110
+ {
2111
+ if (a.len < (1-1e-8) * b.len) return true;
2112
+ if (a.len > (1+1e-8) * b.len) return false;
2113
+ return a.theta < b.theta;
2114
+ });
2115
+ }
2116
+
2117
+ double current_len = -1e100;
2118
+ double current_theta = -1e100;
2119
+ Array<RecordingRS*> current_batch;
2120
+ Array<Array<RecordingRS*>> batch_group;
2121
+ Array<double> group_lengths;
2122
+ Array<double> group_thetas;
2123
+ for (auto & record : recording)
2124
+ {
2125
+ bool len_changed = fabs(record.len - current_len) > 1e-8;
2126
+ bool theta_changed = fabs(record.theta - current_theta) > 1e-8;
2127
+ if ((len_changed || theta_changed) && current_batch.Size() > 0) {
2128
+ // ProcessBatch(current_batch, current_len, current_theta);
2129
+ batch_group.Append(current_batch);
2130
+ group_lengths.Append(current_len);
2131
+ group_thetas.Append(current_theta);
2132
+ current_batch.SetSize(0);
2133
+ }
2134
+
2135
+ current_len = record.len;
2136
+ current_theta = record.theta;
2137
+ current_batch.Append(&record);
2138
+ }
2139
+ if (current_batch.Size() > 0) {
1822
2140
  // ProcessBatch(current_batch, current_len, current_theta);
1823
2141
  batch_group.Append(current_batch);
1824
2142
  group_lengths.Append(current_len);
1825
2143
  group_thetas.Append(current_theta);
1826
- current_batch.SetSize(0);
1827
- }
1828
-
1829
- current_len = record.len;
1830
- current_theta = record.theta;
1831
- current_batch.Append(&record);
2144
+ }
2145
+
2146
+ ParallelFor(batch_group.Size(), [&](int i) {
2147
+ ProcessBatchRS(batch_group[i], group_lengths[i], group_thetas[i]);
2148
+ }, TasksPerThread(4));
1832
2149
  }
1833
- if (current_batch.Size() > 0) {
1834
- // ProcessBatch(current_batch, current_len, current_theta);
1835
- batch_group.Append(current_batch);
1836
- group_lengths.Append(current_len);
1837
- group_thetas.Append(current_theta);
1838
- }
1839
-
1840
- ParallelFor(batch_group.Size(), [&](int i) {
1841
- ProcessBatchRS(batch_group[i], group_lengths[i], group_thetas[i]);
1842
- }, TasksPerThread(4));
1843
- // */
1844
-
2150
+
1845
2151
 
1846
2152
  /*
1847
2153
  int maxlevel = 0;
1848
- for (auto [i,num] : Enumerate(RegularMLMultiPole::nodes_on_level))
2154
+ for (auto [i,num] : Enumerate(RegularMLExpansion::nodes_on_level))
1849
2155
  if (num > 0) maxlevel = i;
1850
2156
 
1851
2157
  for (int i = 0; i <= maxlevel; i++)
1852
- cout << "reg " << i << ": " << RegularMLMultiPole::nodes_on_level[i] << endl;
2158
+ cout << "reg " << i << ": " << RegularMLExpansion::nodes_on_level[i] << endl;
1853
2159
  */
1854
2160
 
2161
+ // cout << "starting R-R converion" << endl;
2162
+ // PrintStatistics(cout);
2163
+
1855
2164
  static Timer tloc("mptool regular localize expansion"); RegionTimer rloc(tloc);
1856
2165
  root.LocalizeExpansion(!onlytargets);
2166
+
2167
+
2168
+ // cout << "R-R conversion done" << endl;
2169
+ // PrintStatistics(cout);
1857
2170
  }
1858
2171
 
2172
+ void PrintStatistics (ostream & ost)
2173
+ {
2174
+ int levels = 0;
2175
+ int cnt = 0;
2176
+ root.TraverseTree( [&](Node & node) {
2177
+ levels = max(levels, node.level);
2178
+ cnt++;
2179
+ });
2180
+ ost << "levels: " << levels << endl;
2181
+ ost << "nodes: " << cnt << endl;
2182
+
2183
+ Array<int> num_on_level(levels+1);
2184
+ Array<int> order_on_level(levels+1);
2185
+ Array<size_t> coefs_on_level(levels+1);
2186
+ num_on_level = 0;
2187
+ order_on_level = 0;
2188
+ root.TraverseTree( [&](Node & node) {
2189
+ num_on_level[node.level]++;
2190
+ order_on_level[node.level] = max(order_on_level[node.level],node.mp.Order());
2191
+ coefs_on_level[node.level] += node.mp.SH().Coefs().Size();
2192
+ });
2193
+
2194
+ cout << "num on level" << endl;
2195
+ for (int i = 0; i < num_on_level.Size(); i++)
2196
+ cout << i << ": " << num_on_level[i] << ", order = " << order_on_level[i] << ", coefs " << coefs_on_level[i] << endl;
2197
+
2198
+ size_t totcoefs = 0;
2199
+ for (auto n : coefs_on_level)
2200
+ totcoefs += n;
2201
+ cout << "total mem in coefs: " << sizeof(elem_type)*totcoefs / sqr(1024) << " MB" << endl;
2202
+ }
2203
+
1859
2204
  void Print (ostream & ost) const
1860
2205
  {
1861
2206
  root.Print(ost);
@@ -1875,6 +2220,7 @@ namespace ngsbem
1875
2220
  {
1876
2221
  // static Timer t("mptool Eval MLMP regular"); RegionTimer r(t);
1877
2222
  // if (L2Norm(p-root.center) > root.r) return elem_type{0.0};
2223
+
1878
2224
  if (MaxNorm(p-root.center) > root.r)
1879
2225
  return singmp->Evaluate(p);
1880
2226
  return root.Evaluate(p);
@@ -1890,10 +2236,10 @@ namespace ngsbem
1890
2236
 
1891
2237
 
1892
2238
  template <typename elem_type>
1893
- inline ostream & operator<< (ostream & ost, const RegularMLMultiPole<elem_type> & mlmp)
2239
+ inline ostream & operator<< (ostream & ost, const RegularMLExpansion<elem_type> & mlmp)
1894
2240
  {
1895
2241
  mlmp.Print(ost);
1896
- // ost << "RegularMLMultiPole" << endl;
2242
+ // ost << "RegularMLExpansion" << endl;
1897
2243
  return ost;
1898
2244
  }
1899
2245