ngsolve 6.2.2504.post44.dev0__cp311-cp311-win_amd64.whl → 6.2.2601__cp311-cp311-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. netgen/include/analytic_integrals.hpp +10 -0
  2. netgen/include/basematrix.hpp +6 -0
  3. netgen/include/bdbequations.hpp +55 -0
  4. netgen/include/bem_diffops.hpp +475 -0
  5. netgen/include/bilinearform.hpp +4 -1
  6. netgen/include/bspline.hpp +2 -0
  7. netgen/include/code_generation.hpp +2 -2
  8. netgen/include/complex_wrapper.hpp +30 -2
  9. netgen/include/contact.hpp +8 -0
  10. netgen/include/diagonalmatrix.hpp +6 -0
  11. netgen/include/diffop_impl.hpp +3 -1
  12. netgen/include/diffopwithfactor.hpp +123 -0
  13. netgen/include/elementbyelement.hpp +9 -3
  14. netgen/include/expr.hpp +45 -7
  15. netgen/include/fespace.hpp +9 -2
  16. netgen/include/gridfunction.hpp +3 -3
  17. netgen/include/h1amg.hpp +24 -1
  18. netgen/include/h1lumping.hpp +6 -0
  19. netgen/include/hcurl_equations.hpp +29 -0
  20. netgen/include/hcurlcurlfe.hpp +20 -0
  21. netgen/include/hdivfe_utils.hpp +1 -0
  22. netgen/include/hdivhofespace.hpp +2 -0
  23. netgen/include/kernels.hpp +724 -0
  24. netgen/include/l2hofe.hpp +1 -0
  25. netgen/include/matrix.hpp +8 -3
  26. netgen/include/meshaccess.hpp +1 -1
  27. netgen/include/mp_coefficient.hpp +24 -19
  28. netgen/include/mptools.hpp +1255 -237
  29. netgen/include/mycomplex.hpp +1 -1
  30. netgen/include/ngblas.hpp +116 -7
  31. netgen/include/potentialtools.hpp +2 -2
  32. netgen/include/preconditioner.hpp +2 -2
  33. netgen/include/prolongation.hpp +6 -3
  34. netgen/include/recursive_pol.hpp +63 -11
  35. netgen/include/simd_complex.hpp +45 -0
  36. netgen/include/sparsecholesky.hpp +6 -2
  37. netgen/include/sparsefactorization_interface.hpp +159 -0
  38. netgen/include/sparsematrix.hpp +21 -7
  39. netgen/include/sparsematrix_dyn.hpp +2 -2
  40. netgen/include/sparsematrix_impl.hpp +100 -33
  41. netgen/include/statushandler.hpp +8 -8
  42. netgen/include/thdivfe_impl.hpp +66 -0
  43. netgen/include/tscalarfe.hpp +1 -1
  44. netgen/include/vector.hpp +272 -47
  45. netgen/lib/libngsolve.lib +0 -0
  46. netgen/libngsolve.dll +0 -0
  47. netgen/ngscxx.bat +1 -1
  48. netgen/ngsld.bat +1 -1
  49. ngsolve/cmake/NGSolveConfig.cmake +8 -8
  50. ngsolve/cmake/ngsolve-targets.cmake +17 -10
  51. ngsolve/config/config.py +8 -8
  52. ngsolve/demos/intro/cmagnet.py +19 -22
  53. ngsolve/directsolvers.py +9 -21
  54. ngsolve/krylovspace.py +172 -3
  55. ngsolve/ngslib.pyd +0 -0
  56. ngsolve/nonlinearsolvers.py +2 -2
  57. ngsolve/solve_implementation.py +14 -1
  58. ngsolve/{solvers.py → solvers/__init__.py} +1 -1
  59. ngsolve/solvers/cudss.py +112 -0
  60. ngsolve/webgui.py +1 -0
  61. {ngsolve-6.2.2504.post44.dev0.dist-info → ngsolve-6.2.2601.dist-info}/METADATA +2 -2
  62. {ngsolve-6.2.2504.post44.dev0.dist-info → ngsolve-6.2.2601.dist-info}/RECORD +94 -88
  63. {ngsolve-6.2.2504.post44.dev0.data → ngsolve-6.2.2601.data}/data/Scripts/ngsolve.tcl +0 -0
  64. {ngsolve-6.2.2504.post44.dev0.data → ngsolve-6.2.2601.data}/data/share/ngsolve/beam.geo +0 -0
  65. {ngsolve-6.2.2504.post44.dev0.data → ngsolve-6.2.2601.data}/data/share/ngsolve/beam.vol +0 -0
  66. {ngsolve-6.2.2504.post44.dev0.data → ngsolve-6.2.2601.data}/data/share/ngsolve/chip.in2d +0 -0
  67. {ngsolve-6.2.2504.post44.dev0.data → ngsolve-6.2.2601.data}/data/share/ngsolve/chip.vol +0 -0
  68. {ngsolve-6.2.2504.post44.dev0.data → ngsolve-6.2.2601.data}/data/share/ngsolve/coil.geo +0 -0
  69. {ngsolve-6.2.2504.post44.dev0.data → ngsolve-6.2.2601.data}/data/share/ngsolve/coil.vol +0 -0
  70. {ngsolve-6.2.2504.post44.dev0.data → ngsolve-6.2.2601.data}/data/share/ngsolve/coilshield.geo +0 -0
  71. {ngsolve-6.2.2504.post44.dev0.data → ngsolve-6.2.2601.data}/data/share/ngsolve/coilshield.vol +0 -0
  72. {ngsolve-6.2.2504.post44.dev0.data → ngsolve-6.2.2601.data}/data/share/ngsolve/cube.geo +0 -0
  73. {ngsolve-6.2.2504.post44.dev0.data → ngsolve-6.2.2601.data}/data/share/ngsolve/cube.vol +0 -0
  74. {ngsolve-6.2.2504.post44.dev0.data → ngsolve-6.2.2601.data}/data/share/ngsolve/d10_DGdoubleglazing.pde +0 -0
  75. {ngsolve-6.2.2504.post44.dev0.data → ngsolve-6.2.2601.data}/data/share/ngsolve/d11_chip_nitsche.pde +0 -0
  76. {ngsolve-6.2.2504.post44.dev0.data → ngsolve-6.2.2601.data}/data/share/ngsolve/d1_square.pde +0 -0
  77. {ngsolve-6.2.2504.post44.dev0.data → ngsolve-6.2.2601.data}/data/share/ngsolve/d2_chip.pde +0 -0
  78. {ngsolve-6.2.2504.post44.dev0.data → ngsolve-6.2.2601.data}/data/share/ngsolve/d3_helmholtz.pde +0 -0
  79. {ngsolve-6.2.2504.post44.dev0.data → ngsolve-6.2.2601.data}/data/share/ngsolve/d4_cube.pde +0 -0
  80. {ngsolve-6.2.2504.post44.dev0.data → ngsolve-6.2.2601.data}/data/share/ngsolve/d5_beam.pde +0 -0
  81. {ngsolve-6.2.2504.post44.dev0.data → ngsolve-6.2.2601.data}/data/share/ngsolve/d6_shaft.pde +0 -0
  82. {ngsolve-6.2.2504.post44.dev0.data → ngsolve-6.2.2601.data}/data/share/ngsolve/d7_coil.pde +0 -0
  83. {ngsolve-6.2.2504.post44.dev0.data → ngsolve-6.2.2601.data}/data/share/ngsolve/d8_coilshield.pde +0 -0
  84. {ngsolve-6.2.2504.post44.dev0.data → ngsolve-6.2.2601.data}/data/share/ngsolve/d9_hybridDG.pde +0 -0
  85. {ngsolve-6.2.2504.post44.dev0.data → ngsolve-6.2.2601.data}/data/share/ngsolve/doubleglazing.in2d +0 -0
  86. {ngsolve-6.2.2504.post44.dev0.data → ngsolve-6.2.2601.data}/data/share/ngsolve/doubleglazing.vol +0 -0
  87. {ngsolve-6.2.2504.post44.dev0.data → ngsolve-6.2.2601.data}/data/share/ngsolve/piezo2d40round4.vol.gz +0 -0
  88. {ngsolve-6.2.2504.post44.dev0.data → ngsolve-6.2.2601.data}/data/share/ngsolve/shaft.geo +0 -0
  89. {ngsolve-6.2.2504.post44.dev0.data → ngsolve-6.2.2601.data}/data/share/ngsolve/shaft.vol +0 -0
  90. {ngsolve-6.2.2504.post44.dev0.data → ngsolve-6.2.2601.data}/data/share/ngsolve/square.in2d +0 -0
  91. {ngsolve-6.2.2504.post44.dev0.data → ngsolve-6.2.2601.data}/data/share/ngsolve/square.vol +0 -0
  92. {ngsolve-6.2.2504.post44.dev0.dist-info → ngsolve-6.2.2601.dist-info}/LICENSE +0 -0
  93. {ngsolve-6.2.2504.post44.dev0.dist-info → ngsolve-6.2.2601.dist-info}/WHEEL +0 -0
  94. {ngsolve-6.2.2504.post44.dev0.dist-info → ngsolve-6.2.2601.dist-info}/top_level.txt +0 -0
@@ -76,7 +76,7 @@ INLINE const T arg(const MyComplex<T> &A)
76
76
  }
77
77
 
78
78
  template <class T>
79
- const MyComplex<T> conj(const MyComplex<T> &A)
79
+ INLINE const MyComplex<T> conj(const MyComplex<T> &A)
80
80
  {
81
81
  return MyComplex<T>(A.a, -A.b);
82
82
  }
netgen/include/ngblas.hpp CHANGED
@@ -412,10 +412,10 @@ namespace ngbla
412
412
 
413
413
 
414
414
  template <bool ADD, bool POS, ORDERING orda, ORDERING ordb>
415
- void NgGEMM (SliceMatrix<double,orda> a, SliceMatrix<double, ordb> b, SliceMatrix<double> c);
415
+ INLINE void NgGEMM (SliceMatrix<double,orda> a, SliceMatrix<double, ordb> b, SliceMatrix<double> c);
416
416
 
417
417
  template <bool ADD, bool POS, ORDERING orda, ORDERING ordb>
418
- void NgGEMM (SliceMatrix<double,orda> a, SliceMatrix<double, ordb> b, SliceMatrix<double,ColMajor> c);
418
+ INLINE void NgGEMM (SliceMatrix<double,orda> a, SliceMatrix<double, ordb> b, SliceMatrix<double,ColMajor> c);
419
419
 
420
420
 
421
421
 
@@ -428,7 +428,7 @@ namespace ngbla
428
428
  // t t C += A*B
429
429
 
430
430
  template <bool ADD, bool POS, ORDERING orda, ORDERING ordb>
431
- inline void NgGEMM (SliceMatrix<double,orda> a, SliceMatrix<double, ordb> b, SliceMatrix<double> c)
431
+ INLINE void NgGEMM (SliceMatrix<double,orda> a, SliceMatrix<double, ordb> b, SliceMatrix<double> c)
432
432
  {
433
433
  // static Timer t("generic MM, add/pos/ord="+ToString(ADD)+ToString(POS)+ToString(orda)+ToString(ordb));
434
434
  // RegionTimer r(t);
@@ -584,12 +584,31 @@ namespace ngbla
584
584
  }
585
585
  }
586
586
 
587
- template <typename TM, typename TVX, typename TVY>
588
- extern void TestFunc (TM m, TVX x, TVY y);
589
-
587
+ // template <typename TM, typename TVX, typename TVY>
588
+ // extern void TestFunc (TM m, TVX x, TVY y);
589
+
590
+
591
+ template <typename TS, typename T> constexpr bool IsVec = false;
592
+ template <typename TS, int S> constexpr bool IsVec<TS, Vec<S,TS>> = true;
593
+
590
594
  template <bool ADD, bool POS, typename TM, ORDERING ORD, typename TX, typename TY>
591
595
  INLINE void NgGEMV (BareSliceMatrix<TM,ORD> a, FlatVector<const TX> x, FlatVector<TY> y)
592
596
  {
597
+ if constexpr (std::is_same<TM,double>() && std::is_same<TX,TY>() && IsVec<Complex,TX>)
598
+ {
599
+ FlatMatrix<double> mx(x.Size(), sizeof(TX)/sizeof(double), (double*)(void*)x.Addr(0));
600
+ FlatMatrix<double> my(y.Size(), sizeof(TX)/sizeof(double), (double*)(void*)y.Addr(0));
601
+ NgGEMM<ADD,POS> (a.AddSize(y.Size(), x.Size()),make_SliceMatrix(mx), make_SliceMatrix(my));
602
+ return;
603
+ }
604
+ if constexpr (std::is_same<TM,Complex>() && std::is_same<TX,TY>() && IsVec<Complex,TX>)
605
+ {
606
+ FlatMatrix<Complex> mx(x.Size(), sizeof(TX)/sizeof(Complex), &const_cast<Complex&>(*(x.Data()->Data())));
607
+ FlatMatrix<Complex> my(y.Size(), sizeof(TX)/sizeof(Complex), y.Data()->Data());
608
+ NgGEMM<ADD,POS> (a.AddSize(y.Size(), x.Size()),make_SliceMatrix(mx), make_SliceMatrix(my));
609
+ return;
610
+ }
611
+
593
612
  if (!ADD)
594
613
  {
595
614
  if (!POS)
@@ -635,6 +654,16 @@ namespace ngbla
635
654
  extern NGS_DLL_HEADER
636
655
  void NgGEMV (double s, BareSliceMatrix<double,ord> a, SliceVector<double> x, SliceVector<double> y) NETGEN_NOEXCEPT;
637
656
  */
657
+
658
+
659
+
660
+
661
+
662
+
663
+
664
+ /* *********************** GEMV - SliceVector **************************** */
665
+
666
+
638
667
 
639
668
  template <bool ADD, ORDERING ord>
640
669
  extern NGS_DLL_HEADER
@@ -714,7 +743,30 @@ namespace ngbla
714
743
  }
715
744
 
716
745
 
717
-
746
+
747
+
748
+ template <bool ADD, bool POS, typename TM, ORDERING ORD, typename TX, typename TY>
749
+ INLINE void NgGEMV (BareSliceMatrix<TM,ORD> a, SliceVector<TX> x, SliceVector<TY> y)
750
+ {
751
+ if constexpr (std::is_same<TM,double>() && std::is_same<TX,TY>() && IsVec<Complex,TX>)
752
+ {
753
+ constexpr int VS = sizeof(TX)/sizeof(double);
754
+ SliceMatrix<double> mx(x.Size(), VS, x.Dist()*VS, (double*)(void*)x.Addr(0));
755
+ SliceMatrix<double> my(y.Size(), VS, y.Dist()*VS, (double*)(void*)y.Addr(0));
756
+ NgGEMM<ADD,POS> (a.AddSize(y.Size(), x.Size()),make_SliceMatrix(mx), make_SliceMatrix(my));
757
+ return;
758
+ }
759
+ else if constexpr (std::is_same<TM,Complex>() && std::is_same<TX,TY>() && IsVec<Complex,TX>)
760
+ {
761
+ constexpr int VS = sizeof(TX)/sizeof(Complex);
762
+ SliceMatrix<Complex> mx(x.Size(), VS, x.Dist()*VS, &const_cast<Complex&>(*(x.Data()->Data())));
763
+ SliceMatrix<Complex> my(y.Size(), VS, y.Dist()*VS, y.Data()->Data());
764
+ NgGEMM<ADD,POS> (a.AddSize(y.Size(), x.Size()),make_SliceMatrix(mx), make_SliceMatrix(my));
765
+ return;
766
+ }
767
+ else
768
+ NgGEMV<ADD> (POS ? 1.0 : -1.0, a, x, y);
769
+ }
718
770
 
719
771
 
720
772
 
@@ -880,9 +932,14 @@ namespace ngbla
880
932
  FlatVector<const TB>(prod.View().B().Range(0,w)),
881
933
  FlatVector<T>(self.Spec().Range(0,h)));
882
934
  else
935
+ NgGEMV<ADD,POS> (make_BareSliceMatrix(prod.View().A()),
936
+ SliceVector<TB>(prod.View().B().Range(0,w)),
937
+ SliceVector<T>(self.Spec().Range(0,h)));
938
+ /*
883
939
  NgGEMV<ADD> (POS ? 1.0 : -1.0, make_BareSliceMatrix(prod.View().A()),
884
940
  SliceVector<TB>(prod.View().B().Range(0,w)),
885
941
  SliceVector<T>(self.Spec().Range(0,h)));
942
+ */
886
943
  return self.Spec();
887
944
  }
888
945
  };
@@ -1099,8 +1156,60 @@ namespace ngbla
1099
1156
  }
1100
1157
  };
1101
1158
 
1159
+ // typedef void (*pmatmatcRR)(size_t, size_t, BareSliceMatrix<Complex,RowMajor>, BareSliceMatrix<Complex,RowMajor>,BareSliceMatrix<Complex,RowMajor>);
1160
+
1161
+ template <ORDERING OA, ORDERING OB>
1162
+ using pmatmatc = void (*)(size_t, size_t, BareSliceMatrix<Complex, OA>, BareSliceMatrix<Complex,OB>,BareSliceMatrix<Complex,RowMajor>);
1163
+ template <bool ADD, bool POS, ORDERING OA, ORDERING OB>
1164
+ extern NGS_DLL_HEADER pmatmatc<OA,OB> dispatch_matmatc[9];
1165
+
1166
+ template <bool ADD, bool POS, ORDERING OA, ORDERING OB>
1167
+ extern NGS_DLL_HEADER void NgGEMMBare (size_t ah, size_t aw, size_t bw, BareSliceMatrix<Complex,OA> a, BareSliceMatrix<Complex,OB> b, BareSliceMatrix<Complex,RowMajor> c);
1102
1168
 
1103
1169
 
1170
+ template <bool ADD, bool POS, ORDERING OA, ORDERING OB>
1171
+ void NgGEMM (SliceMatrix<Complex,OA> a, SliceMatrix<Complex,OB> b, SliceMatrix<Complex,RowMajor> c)
1172
+ {
1173
+ size_t ah = a.Height();
1174
+ size_t aw = a.Width();
1175
+ size_t bw = b.Width();
1176
+ if (aw < std::size(dispatch_matmatc<ADD,POS,OA,OB>))
1177
+ {
1178
+ (*dispatch_matmatc<ADD,POS,OA,OB>[aw])(ah, bw, make_BareSliceMatrix(a), make_BareSliceMatrix(b), make_BareSliceMatrix(c));
1179
+ return;
1180
+ }
1181
+
1182
+ NgGEMMBare<ADD,POS>(ah, aw, bw, make_BareSliceMatrix(a), make_BareSliceMatrix(b), make_BareSliceMatrix(c));
1183
+ }
1184
+
1185
+ template <bool ADD, bool POS, ORDERING OA, ORDERING OB>
1186
+ void NgGEMM (SliceMatrix<Complex,OA> a, SliceMatrix<Complex,OB> b, SliceMatrix<Complex,ColMajor> c)
1187
+ {
1188
+ NgGEMM<ADD,POS> (Trans(b), Trans(a), Trans(c));
1189
+ }
1190
+
1191
+ template <typename OP, typename T, typename TA, typename TB>
1192
+ class assign_trait<OP, T, MultExpr<TA, TB>,
1193
+ enable_if_t<IsConvertibleToSliceMatrix<TA,Complex>() &&
1194
+ IsConvertibleToSliceMatrix<TB,Complex>() &&
1195
+ IsConvertibleToSliceMatrix<T,Complex>(), int>>
1196
+ {
1197
+ public:
1198
+ static inline T & Assign (MatExpr<T> & self, const Expr<MultExpr<TA, TB>> & prod)
1199
+ {
1200
+ constexpr bool ADD = std::is_same<OP,typename MatExpr<T>::AsAdd>::value || std::is_same<OP,typename MatExpr<T>::AsSub>::value;
1201
+ constexpr bool POS = std::is_same<OP,typename MatExpr<T>::As>::value || std::is_same<OP,typename MatExpr<T>::AsAdd>::value;
1202
+
1203
+ size_t n = CombinedSize(prod.View().A().Height(), self.Spec().Height());
1204
+ size_t m = CombinedSize(prod.View().B().Width(), self.Spec().Width());
1205
+ size_t k = CombinedSize(prod.View().A().Width(), prod.View().B().Height());
1206
+
1207
+ NgGEMM<ADD,POS> (make_BareSliceMatrix(prod.View().A()).AddSize(n,k).RemoveConst(),
1208
+ make_BareSliceMatrix(prod.View().B()).AddSize(k,m).RemoveConst(),
1209
+ make_BareSliceMatrix(self.Spec()).AddSize(n,m));
1210
+ return self.Spec();
1211
+ }
1212
+ };
1104
1213
 
1105
1214
 
1106
1215
 
@@ -13,9 +13,9 @@
13
13
  namespace ngsbem
14
14
  {
15
15
 
16
- extern void AddChargeDensity (SingularMLMultiPole<Complex> & mp, shared_ptr<CoefficientFunction> current, ngcomp::Region reg);
16
+ extern void AddChargeDensity (SingularMLExpansion<Complex> & mp, shared_ptr<CoefficientFunction> current, ngcomp::Region reg);
17
17
 
18
- extern void AddCurrentDensity (SingularMLMultiPole<Vec<3,Complex>> & mp, shared_ptr<CoefficientFunction> current, ngcomp::Region reg);
18
+ extern void AddCurrentDensity (SingularMLExpansion<Vec<3,Complex>> & mp, shared_ptr<CoefficientFunction> current, ngcomp::Region reg);
19
19
 
20
20
  }
21
21
 
@@ -414,7 +414,7 @@ namespace ngcomp
414
414
  ///
415
415
  // MGPreconditioner * low_order_preconditioner;
416
416
  ///
417
- shared_ptr<Preconditioner> coarse_pre;
417
+ shared_ptr<BaseMatrix> coarse_pre;
418
418
  ///
419
419
  int finesmoothingsteps;
420
420
  ///
@@ -462,7 +462,7 @@ namespace ngcomp
462
462
  void MgTest () const;
463
463
 
464
464
  void SetDirectSolverCluster(shared_ptr<Array<int>> cluster);
465
- void SetCoarsePreconditioner(shared_ptr<Preconditioner> prec);
465
+ void SetCoarsePreconditioner(shared_ptr<BaseMatrix> prec);
466
466
  };
467
467
 
468
468
  class CommutingAMGPreconditioner : public Preconditioner
@@ -67,6 +67,11 @@ namespace ngmg
67
67
  y = tmp.Range(0, VWidth());
68
68
  }
69
69
 
70
+ shared_ptr<BaseSparseMatrix> CreateSparseMatrix() const override
71
+ {
72
+ return prol->CreateProlongationMatrix(level);
73
+ }
74
+
70
75
  AutoVector CreateRowVector() const override { return make_unique<VVector<double>> (VWidth()); }
71
76
  AutoVector CreateColVector() const override { return make_unique<VVector<double>> (VHeight()); }
72
77
  };
@@ -219,9 +224,7 @@ namespace ngmg
219
224
  }
220
225
 
221
226
  ///
222
- virtual shared_ptr<SparseMatrix< double >> CreateProlongationMatrix( int finelevel ) const override
223
- { return NULL; }
224
-
227
+ shared_ptr<SparseMatrix< double >> CreateProlongationMatrix( int finelevel ) const override;
225
228
 
226
229
  ///
227
230
  virtual void ProlongateInline (int finelevel, BaseVector & v) const override;
@@ -1507,9 +1507,24 @@ namespace ngfem
1507
1507
  class NormalizedLegendreFunctions : public RecursivePolynomialNonStatic<NormalizedLegendreFunctions>
1508
1508
  {
1509
1509
  int m;
1510
+ static Matrix<double,ColMajor> matA, matB;
1510
1511
  public:
1511
1512
  NormalizedLegendreFunctions (size_t _m) : m(_m) { }
1512
1513
 
1514
+
1515
+ static void Calc(int mmax)
1516
+ {
1517
+ if (matA.Height() >= mmax+1) return;
1518
+ matA.SetSize(mmax+1, mmax+1);
1519
+ matB.SetSize(mmax+1, mmax+1);
1520
+ for (int m = 0; m <= mmax; m++)
1521
+ for (int n = m+2; n <= mmax; n++)
1522
+ {
1523
+ matA(n,m) = (2*n-1) / sqrt((n-m+0.0)*(n+m));
1524
+ matB(n,m) = sqrt((n+m-1.0)*(n-m-1.0)) / sqrt((n-m+0.0)*(n+m));
1525
+ }
1526
+ }
1527
+
1513
1528
  template <typename T, typename S>
1514
1529
  NormalizedLegendreFunctions (size_t mmax, size_t nmax, T x, S && values)
1515
1530
  {
@@ -1524,23 +1539,60 @@ namespace ngfem
1524
1539
  auto y = Trans(values);
1525
1540
 
1526
1541
  y = 0.0;
1527
- double u = -sqrt((1-x)*(1+x));
1542
+ T u = -sqrt((1-x)*(1+x));
1528
1543
  y(0,0)=1;
1529
-
1530
- for (double m = 0; m <= mmax; m++)
1544
+
1545
+ if (mmax+1 > matA.Height() || nmax > matA.Width())
1546
+ {
1547
+ for (int m = 0; m <= mmax; m++)
1548
+ {
1549
+ if (m > 0)
1550
+ y(m,m)=y(m-1,m-1)*u*sqrt((2*m-1.0)/(2*m));
1551
+ if (m < nmax)
1552
+ y(m+1,m)=x*y(m,m)*sqrt(2*m+1.0);
1553
+ for (int n = m+2; n <= nmax; n++)
1554
+ y(n,m)=((2*n-1)*x*y(n-1,m) -
1555
+ sqrt((n+m-1.0)*(n-m-1.0))*y(n-2,m))
1556
+ /sqrt((n-m+0.0)*(n+m));
1557
+ }
1558
+ }
1559
+ else
1531
1560
  {
1532
- if (m > 0)
1533
- y(m,m)=y(m-1,m-1)*u*sqrt((2*m-1.0)/(2*m));
1534
- if (m < nmax)
1535
- y(m+1,m)=x*y(m,m)*sqrt(2*m+1.0);
1536
- for (int n = m+2; n <= nmax; n++)
1537
- y(n,m)=((2*n-1)*x*y(n-1,m) -
1538
- sqrt((n+m-1.0)*(n-m-1.0))*y(n-2,m))
1539
- /sqrt((n-m+0.0)*(n+m));
1561
+ for (int m = 0; m <= mmax; m++)
1562
+ {
1563
+ if (m > 0)
1564
+ y(m,m)=y(m-1,m-1)*u*sqrt((2*m-1.0)/(2*m));
1565
+ if (m < nmax)
1566
+ {
1567
+ T valold = y(m,m);
1568
+
1569
+ T val = x*valold*sqrt(2*m+1.0);
1570
+ y(m+1,m)=val;
1571
+
1572
+ auto coefsA = matA.Col(m);
1573
+ auto coefsB = matB.Col(m);
1574
+
1575
+ for (int n = m+2; n <= nmax; n++)
1576
+ {
1577
+ T valnew = coefsA(n) * x * val - coefsB(n) * valold;
1578
+ y(n,m) = valnew;
1579
+ valold = val;
1580
+ val = valnew;
1581
+ }
1582
+ }
1583
+ /*
1584
+ for (int n = m+2; n <= nmax; n++)
1585
+ y(n,m) = matA(n,m) * x * y(n-1,m) - matB(n,m) * y(n-2,m);
1586
+ */
1587
+ }
1540
1588
  }
1589
+
1541
1590
  for (int n = 0; n <= nmax; n++)
1591
+ y.Row(n).Range(0,n+1) *= sqrt(2*n+1.0);
1592
+ /*
1542
1593
  for (int m = 0; m <= n; m++)
1543
1594
  y(n,m)=y(n,m)*sqrt(2*n+1.0);
1595
+ */
1544
1596
  }
1545
1597
 
1546
1598
  template <class S>
@@ -56,6 +56,17 @@ namespace ngcore
56
56
  auto & imag() { return im; }
57
57
 
58
58
 
59
+ auto Lo() const
60
+ {
61
+ if constexpr (N == 2) return Complex(re.Lo(), im.Lo());
62
+ else return SIMD<Complex,N/2> (re.Lo(), im.Lo());
63
+ }
64
+ auto Hi() const
65
+ {
66
+ if constexpr (N == 2) return Complex(re.Hi(), im.Hi());
67
+ else return SIMD<Complex,N/2> (re.Hi(), im.Hi());
68
+ }
69
+
59
70
  // Numbers in SIMD structure are not necessarily in same order as in memory
60
71
  // for instance:
61
72
  // [x0,y0,x1,y1,x2,y2,x3,y3] -> [x0,x2,x1,x3,y0,y2,y1,y3]
@@ -266,6 +277,40 @@ namespace ngcore
266
277
  return SIMD<Complex, N> (IfPos (a.real(), b.real(), c.real()),
267
278
  IfPos (a.real(), b.imag(), c.imag()));
268
279
  }
280
+
281
+
282
+ template <int N>
283
+ INLINE SIMD<Complex, N> If (SIMD<mask64, N> a, SIMD<Complex, N> b, SIMD<Complex, N> c)
284
+ {
285
+ return SIMD<Complex, N> (If (a, b.real(), c.real()),
286
+ If (a, b.imag(), c.imag()));
287
+ }
288
+
289
+
290
+
291
+
292
+ template <typename T, size_t S> class MakeSimdCl;
293
+
294
+ template <size_t S>
295
+ class MakeSimdCl<Complex,S>
296
+ {
297
+ std::array<Complex,S> a;
298
+ public:
299
+ MakeSimdCl (std::array<Complex,S> aa) : a(aa) { ; }
300
+ auto Get() const
301
+ {
302
+ std::array<double,S> ar, ai;
303
+ for (int j = 0; j < S; j++)
304
+ {
305
+ ar[j] = ngbla::Real(a[j]);
306
+ ai[j] = ngbla::Imag(a[j]);
307
+ }
308
+
309
+ return SIMD<Complex,S> (MakeSimd(ar), MakeSimd(ai));
310
+ }
311
+ };
312
+
313
+
269
314
  }
270
315
 
271
316
 
@@ -28,10 +28,14 @@ namespace ngla
28
28
 
29
29
  public:
30
30
  SparseFactorization (shared_ptr<const BaseSparseMatrix> amatrix,
31
- shared_ptr<BitArray> ainner,
32
- shared_ptr<const Array<int>> acluster);
31
+ shared_ptr<BitArray> ainner = nullptr,
32
+ shared_ptr<const Array<int>> acluster = nullptr);
33
33
  SparseFactorization() {}
34
34
 
35
+ virtual void SetSubset
36
+ (shared_ptr<BitArray> ainner,
37
+ shared_ptr<const Array<int>> acluster=nullptr);
38
+
35
39
  void DoArchive(Archive& ar) override;
36
40
 
37
41
  // virtual bool IsComplex() const override { return matrix.lock()->IsComplex(); }
@@ -0,0 +1,159 @@
1
+ #ifndef SPARSEFACTORIZATION_INTERFACE_HPP
2
+ #define SPARSEFACTORIZATION_INTERFACE_HPP
3
+
4
+ #include "basevector.hpp"
5
+ #include "sparsecholesky.hpp"
6
+ #include "sparsematrix.hpp"
7
+ #include "../ngstd/python_ngstd.hpp"
8
+
9
+ namespace ngla {
10
+
11
+ void ExportSparseFactorizationInterface(py::module &m);
12
+
13
+ struct MapInnerDofs {
14
+ shared_ptr<BitArray> inner;
15
+ shared_ptr<const Array<int>> cluster;
16
+ Array<int> project;
17
+ Array<int> embed;
18
+ size_t size = 0;
19
+
20
+ MapInnerDofs() {}
21
+
22
+ void Init(shared_ptr<BitArray> ainner,
23
+ shared_ptr<const Array<int>> acluster = nullptr)
24
+ {
25
+ inner = ainner;
26
+ cluster = acluster;
27
+ if (!inner && !cluster) {
28
+ size = 0;
29
+ return;
30
+ }
31
+ if (inner) {
32
+ size = inner->NumSet();
33
+ project.SetSize(size);
34
+ embed.SetSize(inner->Size());
35
+ int j = 0;
36
+ for (int i = 0; i < inner->Size(); i++) {
37
+ if ((*inner)[i]) {
38
+ project[j] = i;
39
+ embed[i] = j++;
40
+ } else
41
+ embed[i] = -1;
42
+ }
43
+ return;
44
+ }
45
+
46
+ int j = 0;
47
+ for (int i = 0; i < cluster->Size(); i++) {
48
+ if ((*cluster)[i]) {
49
+ project.Append(i);
50
+ embed.Append(j++);
51
+ } else {
52
+ embed.Append(-1);
53
+ }
54
+ }
55
+ size = project.Size();
56
+ }
57
+
58
+ operator bool() const { return inner || cluster; }
59
+
60
+ template <typename T>
61
+ void Project(FlatVector<T> dst, FlatVector<T> src) const {
62
+ for (size_t i = 0; i < project.Size(); i++)
63
+ dst[i] = src[project[i]];
64
+ }
65
+
66
+ template <typename T> void Embed(T &dst, const T &src) const {
67
+ for (size_t i : Range(embed)) {
68
+ if (embed[i] >= 0)
69
+ dst[i] = src[embed[i]];
70
+ else
71
+ dst[i] = 0.0;
72
+ }
73
+ }
74
+
75
+ template <typename T>
76
+ void EmbedAdd(FlatVector<T> dst, FlatVector<T> src, T scale) const {
77
+ for (size_t i : Range(embed))
78
+ if (embed[i] >= 0)
79
+ dst[i] += scale * src[embed[i]];
80
+ }
81
+
82
+ template <typename T>
83
+ shared_ptr<SparseMatrixTM<T>>
84
+ ProjectMatrix(shared_ptr<const SparseMatrixTM<T>> m) {
85
+ Array<int> rowi, coli;
86
+ Array<T> vals;
87
+ // auto &dofs = *inner;
88
+
89
+ auto vals_ori = m->GetValues();
90
+
91
+ auto &cluster_array = *cluster;
92
+ auto &inner_bitarray = *inner;
93
+ auto is_used = [this, &inner_bitarray, &cluster_array](int i, int j) {
94
+ if (inner)
95
+ return inner_bitarray[i] && inner_bitarray[j];
96
+ return cluster_array[i] == cluster_array[j];
97
+ };
98
+
99
+ for (auto i : project)
100
+ for (auto j : m->GetRowIndices(i))
101
+ if (is_used(i, j)) {
102
+ rowi.Append(embed[i]);
103
+ coli.Append(embed[j]);
104
+ vals.Append(vals_ori[m->GetPosition(i, j)]);
105
+ }
106
+
107
+ auto res = SparseMatrixTM<T>::CreateFromCOO(rowi, coli, vals,
108
+ project.Size(), project.Size());
109
+ res->SetSPD(m->IsSPD());
110
+ return res;
111
+ }
112
+ };
113
+
114
+ class SparseFactorizationInterface : public SparseFactorization {
115
+ protected:
116
+ shared_ptr<const BaseSparseMatrix> inner_mat;
117
+ shared_ptr<BaseVector> inner_rhs, inner_solution;
118
+ MapInnerDofs map_inner_dofs;
119
+ bool is_complex = false;
120
+ bool is_symmetric = false;
121
+ bool is_symmetric_storage = false;
122
+ bool is_analyzed = false;
123
+ int width, height, inner_width, inner_height;
124
+
125
+ public:
126
+ SparseFactorizationInterface() = delete;
127
+ SparseFactorizationInterface(shared_ptr<const BaseMatrix> m,
128
+ shared_ptr<BitArray> ainner = nullptr,
129
+ shared_ptr<const Array<int>> acluster = nullptr);
130
+
131
+ virtual ~SparseFactorizationInterface() {}
132
+
133
+ void SetSubset(shared_ptr<BitArray> inner, shared_ptr<const Array<int>> cluster) override;
134
+
135
+ AutoVector CreateRowVector() const override {
136
+ return make_unique<VVector<double>>(Width());
137
+ }
138
+
139
+ AutoVector CreateColVector() const override {
140
+ return make_unique<VVector<double>>(Height());
141
+ }
142
+
143
+ shared_ptr<const BaseSparseMatrix> GetInnerMatrix() const {
144
+ return inner_mat;
145
+ }
146
+
147
+ void MultAdd(double s, const BaseVector &x, BaseVector &y) const override;
148
+ void MultAdd(Complex s, const BaseVector &x, BaseVector &y) const override;
149
+
150
+ virtual void Update() override;
151
+
152
+ virtual void Analyze() {}
153
+ virtual void Factor() {}
154
+ virtual void Solve(const BaseVector &rhs, BaseVector &solution) const = 0;
155
+ };
156
+
157
+ } // namespace ngla
158
+
159
+ #endif // SPARSEFACTORIZATION_INTERFACE_HPP
@@ -258,7 +258,7 @@ namespace ngla
258
258
  throw Exception ("BaseSparseMatrix::CreateBlockJacobiPrecond");
259
259
  }
260
260
 
261
- virtual shared_ptr<BaseSparseMatrix> CreateTranspose() const
261
+ virtual shared_ptr<BaseSparseMatrix> CreateTranspose(bool sorted = true) const
262
262
  {
263
263
  throw Exception ("BaseSparseMatrix::CreateTranspose");
264
264
  }
@@ -561,7 +561,7 @@ namespace ngla
561
561
  virtual tuple<int,int> EntrySizes() const override { return { ngbla::Height<TM>(), ngbla::Width<TM>() }; }
562
562
 
563
563
  shared_ptr<BaseSparseMatrix>
564
- CreateTransposeTM (const function<shared_ptr<SparseMatrixTM<decltype(ngbla::Trans(TM()))>>(const Array<int>&, int)> & creator) const;
564
+ CreateTransposeTM (const function<shared_ptr<SparseMatrixTM<decltype(ngbla::Trans(TM()))>>(const Array<int>&, int)> & creator, bool sorted) const;
565
565
 
566
566
  public:
567
567
  using BaseMatrix::GetMemoryTracer;
@@ -619,7 +619,7 @@ namespace ngla
619
619
 
620
620
 
621
621
  BaseMatrix::OperatorInfo GetOperatorInfo () const override
622
- { return { string("SparseMatrix")+typeid(TM).name(), this->Height(), this->Width() }; }
622
+ { return { string("SparseMatrix")+typeid(TM).name()+" (nze="+ToString(this->NZE())+")", this->Height(), this->Width() }; }
623
623
 
624
624
  virtual shared_ptr<BaseJacobiPrecond>
625
625
  CreateJacobiPrecond (shared_ptr<BitArray> inner) const override;
@@ -638,11 +638,11 @@ namespace ngla
638
638
 
639
639
  virtual shared_ptr<BaseSparseMatrix> Reorder (const Array<size_t> & reorder) const override;
640
640
 
641
- virtual shared_ptr<BaseSparseMatrix> CreateTranspose() const override
641
+ virtual shared_ptr<BaseSparseMatrix> CreateTranspose(bool sorted) const override
642
642
  {
643
643
  return this->CreateTransposeTM
644
644
  ( [](const Array<int> & elsperrow, int width) -> shared_ptr<SparseMatrixTM<decltype(Trans(TM()))>>
645
- { return make_shared<SparseMatrix<decltype(Trans(TM())), TV_COL, TV_ROW>> (elsperrow, width); } );
645
+ { return make_shared<SparseMatrix<decltype(Trans(TM())), TV_COL, TV_ROW>> (elsperrow, width); }, sorted );
646
646
  }
647
647
 
648
648
  virtual shared_ptr<BaseMatrix> DeleteZeroElements(double tol) const override;
@@ -669,6 +669,20 @@ namespace ngla
669
669
  for (size_t j = first; j < last; j++)
670
670
  vec[colpi[j]] += Trans(datap[j]) * el;
671
671
  }
672
+
673
+ void AddRowTransToVectorAtomic (int row, TVY el, FlatVector<TVX> vec) const
674
+ {
675
+ size_t first = firsti[row];
676
+ size_t last = firsti[row+1];
677
+
678
+ const ColIdx * colpi = colnr.Addr(0);
679
+ const TM * datap = data.Addr(0);
680
+
681
+ for (size_t j = first; j < last; j++)
682
+ // vec[colpi[j]] += Trans(datap[j]) * el;
683
+ AtomicAdd (vec[colpi[j]], Trans(datap[j]) * el);
684
+ }
685
+
672
686
 
673
687
  ///
674
688
  void AddRowConjTransToVector (int row, TVY el, FlatVector<TVX> vec) const
@@ -858,9 +872,9 @@ namespace ngla
858
872
  double sb, const SparseMatrixTM<double> & matb);
859
873
 
860
874
  NGS_DLL_HEADER shared_ptr<SparseMatrixTM<double>>
861
- MatMult (const SparseMatrixTM<double> & mata, const SparseMatrixTM<double> & matb);
875
+ MatMult (const SparseMatrixTM<double> & mata, const SparseMatrixTM<double> & matb, bool sort_output = true);
862
876
  NGS_DLL_HEADER shared_ptr<SparseMatrixTM<Complex>>
863
- MatMult (const SparseMatrixTM<Complex> & mata, const SparseMatrixTM<Complex> & matb);
877
+ MatMult (const SparseMatrixTM<Complex> & mata, const SparseMatrixTM<Complex> & matb, bool sort_output = true);
864
878
 
865
879
  #ifdef GOLD
866
880
  #include <sparsematrix_spec.hpp>
@@ -50,9 +50,9 @@ namespace ngla
50
50
  virtual void MultAdd (double s, const BaseVector & x, BaseVector & y) const override;
51
51
 
52
52
  AutoVector CreateRowVector() const override
53
- { throw make_unique<VVector<TSCAL>> (VWidth()); }
53
+ { return make_unique<VVector<TSCAL>> (VWidth()); }
54
54
  AutoVector CreateColVector() const override
55
- { throw make_unique<VVector<TSCAL>> (VHeight()); }
55
+ { return make_unique<VVector<TSCAL>> (VHeight()); }
56
56
 
57
57
  virtual tuple<int,int> EntrySizes() const override { return { bh, bw }; }
58
58
  };