ngsolve 6.2.2502__cp311-cp311-win_amd64.whl → 6.2.2601__cp311-cp311-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (107) hide show
  1. netgen/include/analytic_integrals.hpp +10 -0
  2. netgen/include/basematrix.hpp +6 -0
  3. netgen/include/bdbequations.hpp +55 -0
  4. netgen/include/bem_diffops.hpp +475 -0
  5. netgen/include/bilinearform.hpp +4 -1
  6. netgen/include/blockjacobi.hpp +17 -5
  7. netgen/include/bspline.hpp +2 -0
  8. netgen/include/cholesky.hpp +2 -2
  9. netgen/include/code_generation.hpp +2 -2
  10. netgen/include/complex_wrapper.hpp +30 -2
  11. netgen/include/contact.hpp +8 -0
  12. netgen/include/diagonalmatrix.hpp +6 -0
  13. netgen/include/diffop_impl.hpp +3 -1
  14. netgen/include/diffopwithfactor.hpp +123 -0
  15. netgen/include/elementbyelement.hpp +9 -3
  16. netgen/include/expr.hpp +45 -7
  17. netgen/include/fespace.hpp +12 -4
  18. netgen/include/gridfunction.hpp +3 -3
  19. netgen/include/h1amg.hpp +85 -2
  20. netgen/include/h1lumping.hpp +6 -0
  21. netgen/include/hcurl_equations.hpp +29 -0
  22. netgen/include/hcurlcurlfe.hpp +20 -0
  23. netgen/include/hdiv_equations.hpp +15 -0
  24. netgen/include/hdivfe_utils.hpp +1 -0
  25. netgen/include/hdivhofespace.hpp +2 -0
  26. netgen/include/integrator.hpp +4 -16
  27. netgen/include/intrule.hpp +2 -1
  28. netgen/include/intrules_SauterSchwab.hpp +25 -0
  29. netgen/include/jacobi.hpp +35 -18
  30. netgen/include/kernels.hpp +724 -0
  31. netgen/include/l2hofe.hpp +1 -0
  32. netgen/include/matrix.hpp +8 -3
  33. netgen/include/meshaccess.hpp +4 -3
  34. netgen/include/mp_coefficient.hpp +145 -0
  35. netgen/include/mptools.hpp +1331 -368
  36. netgen/include/mycomplex.hpp +1 -1
  37. netgen/include/ngblas.hpp +116 -7
  38. netgen/include/potentialtools.hpp +22 -0
  39. netgen/include/preconditioner.hpp +23 -23
  40. netgen/include/prolongation.hpp +132 -6
  41. netgen/include/recursive_pol.hpp +63 -11
  42. netgen/include/simd_complex.hpp +45 -0
  43. netgen/include/sparsecholesky.hpp +6 -2
  44. netgen/include/sparsefactorization_interface.hpp +159 -0
  45. netgen/include/sparsematrix.hpp +21 -7
  46. netgen/include/sparsematrix_dyn.hpp +6 -7
  47. netgen/include/sparsematrix_impl.hpp +175 -40
  48. netgen/include/special_matrix.hpp +2 -0
  49. netgen/include/statushandler.hpp +8 -8
  50. netgen/include/symbolicintegrator.hpp +2 -1
  51. netgen/include/tangentialfacetfespace.hpp +7 -22
  52. netgen/include/thdivfe_impl.hpp +66 -0
  53. netgen/include/tscalarfe.hpp +1 -1
  54. netgen/include/vector.hpp +272 -47
  55. netgen/lib/libngsolve.lib +0 -0
  56. netgen/libngsolve.dll +0 -0
  57. netgen/ngscxx.bat +1 -1
  58. netgen/ngsld.bat +1 -1
  59. ngsolve/__init__.py +1 -0
  60. ngsolve/cmake/NGSolveConfig.cmake +8 -8
  61. ngsolve/cmake/ngsolve-targets.cmake +24 -18
  62. ngsolve/config/config.py +7 -7
  63. ngsolve/demos/intro/cmagnet.py +19 -22
  64. ngsolve/directsolvers.py +9 -21
  65. ngsolve/krylovspace.py +172 -3
  66. ngsolve/ngslib.lib +0 -0
  67. ngsolve/ngslib.pyd +0 -0
  68. ngsolve/nonlinearsolvers.py +2 -2
  69. ngsolve/preconditioners.py +1 -0
  70. ngsolve/solve_implementation.py +168 -0
  71. ngsolve/{solvers.py → solvers/__init__.py} +1 -1
  72. ngsolve/solvers/cudss.py +112 -0
  73. ngsolve/webgui.py +2 -0
  74. {ngsolve-6.2.2502.dist-info → ngsolve-6.2.2601.dist-info}/METADATA +2 -2
  75. {ngsolve-6.2.2502.dist-info → ngsolve-6.2.2601.dist-info}/RECORD +107 -97
  76. {ngsolve-6.2.2502.data → ngsolve-6.2.2601.data}/data/Scripts/ngsolve.tcl +0 -0
  77. {ngsolve-6.2.2502.data → ngsolve-6.2.2601.data}/data/share/ngsolve/beam.geo +0 -0
  78. {ngsolve-6.2.2502.data → ngsolve-6.2.2601.data}/data/share/ngsolve/beam.vol +0 -0
  79. {ngsolve-6.2.2502.data → ngsolve-6.2.2601.data}/data/share/ngsolve/chip.in2d +0 -0
  80. {ngsolve-6.2.2502.data → ngsolve-6.2.2601.data}/data/share/ngsolve/chip.vol +0 -0
  81. {ngsolve-6.2.2502.data → ngsolve-6.2.2601.data}/data/share/ngsolve/coil.geo +0 -0
  82. {ngsolve-6.2.2502.data → ngsolve-6.2.2601.data}/data/share/ngsolve/coil.vol +0 -0
  83. {ngsolve-6.2.2502.data → ngsolve-6.2.2601.data}/data/share/ngsolve/coilshield.geo +0 -0
  84. {ngsolve-6.2.2502.data → ngsolve-6.2.2601.data}/data/share/ngsolve/coilshield.vol +0 -0
  85. {ngsolve-6.2.2502.data → ngsolve-6.2.2601.data}/data/share/ngsolve/cube.geo +0 -0
  86. {ngsolve-6.2.2502.data → ngsolve-6.2.2601.data}/data/share/ngsolve/cube.vol +0 -0
  87. {ngsolve-6.2.2502.data → ngsolve-6.2.2601.data}/data/share/ngsolve/d10_DGdoubleglazing.pde +0 -0
  88. {ngsolve-6.2.2502.data → ngsolve-6.2.2601.data}/data/share/ngsolve/d11_chip_nitsche.pde +0 -0
  89. {ngsolve-6.2.2502.data → ngsolve-6.2.2601.data}/data/share/ngsolve/d1_square.pde +0 -0
  90. {ngsolve-6.2.2502.data → ngsolve-6.2.2601.data}/data/share/ngsolve/d2_chip.pde +0 -0
  91. {ngsolve-6.2.2502.data → ngsolve-6.2.2601.data}/data/share/ngsolve/d3_helmholtz.pde +0 -0
  92. {ngsolve-6.2.2502.data → ngsolve-6.2.2601.data}/data/share/ngsolve/d4_cube.pde +0 -0
  93. {ngsolve-6.2.2502.data → ngsolve-6.2.2601.data}/data/share/ngsolve/d5_beam.pde +0 -0
  94. {ngsolve-6.2.2502.data → ngsolve-6.2.2601.data}/data/share/ngsolve/d6_shaft.pde +0 -0
  95. {ngsolve-6.2.2502.data → ngsolve-6.2.2601.data}/data/share/ngsolve/d7_coil.pde +0 -0
  96. {ngsolve-6.2.2502.data → ngsolve-6.2.2601.data}/data/share/ngsolve/d8_coilshield.pde +0 -0
  97. {ngsolve-6.2.2502.data → ngsolve-6.2.2601.data}/data/share/ngsolve/d9_hybridDG.pde +0 -0
  98. {ngsolve-6.2.2502.data → ngsolve-6.2.2601.data}/data/share/ngsolve/doubleglazing.in2d +0 -0
  99. {ngsolve-6.2.2502.data → ngsolve-6.2.2601.data}/data/share/ngsolve/doubleglazing.vol +0 -0
  100. {ngsolve-6.2.2502.data → ngsolve-6.2.2601.data}/data/share/ngsolve/piezo2d40round4.vol.gz +0 -0
  101. {ngsolve-6.2.2502.data → ngsolve-6.2.2601.data}/data/share/ngsolve/shaft.geo +0 -0
  102. {ngsolve-6.2.2502.data → ngsolve-6.2.2601.data}/data/share/ngsolve/shaft.vol +0 -0
  103. {ngsolve-6.2.2502.data → ngsolve-6.2.2601.data}/data/share/ngsolve/square.in2d +0 -0
  104. {ngsolve-6.2.2502.data → ngsolve-6.2.2601.data}/data/share/ngsolve/square.vol +0 -0
  105. {ngsolve-6.2.2502.dist-info → ngsolve-6.2.2601.dist-info}/LICENSE +0 -0
  106. {ngsolve-6.2.2502.dist-info → ngsolve-6.2.2601.dist-info}/WHEEL +0 -0
  107. {ngsolve-6.2.2502.dist-info → ngsolve-6.2.2601.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,159 @@
1
+ #ifndef SPARSEFACTORIZATION_INTERFACE_HPP
2
+ #define SPARSEFACTORIZATION_INTERFACE_HPP
3
+
4
+ #include "basevector.hpp"
5
+ #include "sparsecholesky.hpp"
6
+ #include "sparsematrix.hpp"
7
+ #include "../ngstd/python_ngstd.hpp"
8
+
9
+ namespace ngla {
10
+
11
+ void ExportSparseFactorizationInterface(py::module &m);
12
+
13
+ struct MapInnerDofs {
14
+ shared_ptr<BitArray> inner;
15
+ shared_ptr<const Array<int>> cluster;
16
+ Array<int> project;
17
+ Array<int> embed;
18
+ size_t size = 0;
19
+
20
+ MapInnerDofs() {}
21
+
22
+ void Init(shared_ptr<BitArray> ainner,
23
+ shared_ptr<const Array<int>> acluster = nullptr)
24
+ {
25
+ inner = ainner;
26
+ cluster = acluster;
27
+ if (!inner && !cluster) {
28
+ size = 0;
29
+ return;
30
+ }
31
+ if (inner) {
32
+ size = inner->NumSet();
33
+ project.SetSize(size);
34
+ embed.SetSize(inner->Size());
35
+ int j = 0;
36
+ for (int i = 0; i < inner->Size(); i++) {
37
+ if ((*inner)[i]) {
38
+ project[j] = i;
39
+ embed[i] = j++;
40
+ } else
41
+ embed[i] = -1;
42
+ }
43
+ return;
44
+ }
45
+
46
+ int j = 0;
47
+ for (int i = 0; i < cluster->Size(); i++) {
48
+ if ((*cluster)[i]) {
49
+ project.Append(i);
50
+ embed.Append(j++);
51
+ } else {
52
+ embed.Append(-1);
53
+ }
54
+ }
55
+ size = project.Size();
56
+ }
57
+
58
+ operator bool() const { return inner || cluster; }
59
+
60
+ template <typename T>
61
+ void Project(FlatVector<T> dst, FlatVector<T> src) const {
62
+ for (size_t i = 0; i < project.Size(); i++)
63
+ dst[i] = src[project[i]];
64
+ }
65
+
66
+ template <typename T> void Embed(T &dst, const T &src) const {
67
+ for (size_t i : Range(embed)) {
68
+ if (embed[i] >= 0)
69
+ dst[i] = src[embed[i]];
70
+ else
71
+ dst[i] = 0.0;
72
+ }
73
+ }
74
+
75
+ template <typename T>
76
+ void EmbedAdd(FlatVector<T> dst, FlatVector<T> src, T scale) const {
77
+ for (size_t i : Range(embed))
78
+ if (embed[i] >= 0)
79
+ dst[i] += scale * src[embed[i]];
80
+ }
81
+
82
+ template <typename T>
83
+ shared_ptr<SparseMatrixTM<T>>
84
+ ProjectMatrix(shared_ptr<const SparseMatrixTM<T>> m) {
85
+ Array<int> rowi, coli;
86
+ Array<T> vals;
87
+ // auto &dofs = *inner;
88
+
89
+ auto vals_ori = m->GetValues();
90
+
91
+ auto &cluster_array = *cluster;
92
+ auto &inner_bitarray = *inner;
93
+ auto is_used = [this, &inner_bitarray, &cluster_array](int i, int j) {
94
+ if (inner)
95
+ return inner_bitarray[i] && inner_bitarray[j];
96
+ return cluster_array[i] == cluster_array[j];
97
+ };
98
+
99
+ for (auto i : project)
100
+ for (auto j : m->GetRowIndices(i))
101
+ if (is_used(i, j)) {
102
+ rowi.Append(embed[i]);
103
+ coli.Append(embed[j]);
104
+ vals.Append(vals_ori[m->GetPosition(i, j)]);
105
+ }
106
+
107
+ auto res = SparseMatrixTM<T>::CreateFromCOO(rowi, coli, vals,
108
+ project.Size(), project.Size());
109
+ res->SetSPD(m->IsSPD());
110
+ return res;
111
+ }
112
+ };
113
+
114
+ class SparseFactorizationInterface : public SparseFactorization {
115
+ protected:
116
+ shared_ptr<const BaseSparseMatrix> inner_mat;
117
+ shared_ptr<BaseVector> inner_rhs, inner_solution;
118
+ MapInnerDofs map_inner_dofs;
119
+ bool is_complex = false;
120
+ bool is_symmetric = false;
121
+ bool is_symmetric_storage = false;
122
+ bool is_analyzed = false;
123
+ int width, height, inner_width, inner_height;
124
+
125
+ public:
126
+ SparseFactorizationInterface() = delete;
127
+ SparseFactorizationInterface(shared_ptr<const BaseMatrix> m,
128
+ shared_ptr<BitArray> ainner = nullptr,
129
+ shared_ptr<const Array<int>> acluster = nullptr);
130
+
131
+ virtual ~SparseFactorizationInterface() {}
132
+
133
+ void SetSubset(shared_ptr<BitArray> inner, shared_ptr<const Array<int>> cluster) override;
134
+
135
+ AutoVector CreateRowVector() const override {
136
+ return make_unique<VVector<double>>(Width());
137
+ }
138
+
139
+ AutoVector CreateColVector() const override {
140
+ return make_unique<VVector<double>>(Height());
141
+ }
142
+
143
+ shared_ptr<const BaseSparseMatrix> GetInnerMatrix() const {
144
+ return inner_mat;
145
+ }
146
+
147
+ void MultAdd(double s, const BaseVector &x, BaseVector &y) const override;
148
+ void MultAdd(Complex s, const BaseVector &x, BaseVector &y) const override;
149
+
150
+ virtual void Update() override;
151
+
152
+ virtual void Analyze() {}
153
+ virtual void Factor() {}
154
+ virtual void Solve(const BaseVector &rhs, BaseVector &solution) const = 0;
155
+ };
156
+
157
+ } // namespace ngla
158
+
159
+ #endif // SPARSEFACTORIZATION_INTERFACE_HPP
@@ -258,7 +258,7 @@ namespace ngla
258
258
  throw Exception ("BaseSparseMatrix::CreateBlockJacobiPrecond");
259
259
  }
260
260
 
261
- virtual shared_ptr<BaseSparseMatrix> CreateTranspose() const
261
+ virtual shared_ptr<BaseSparseMatrix> CreateTranspose(bool sorted = true) const
262
262
  {
263
263
  throw Exception ("BaseSparseMatrix::CreateTranspose");
264
264
  }
@@ -561,7 +561,7 @@ namespace ngla
561
561
  virtual tuple<int,int> EntrySizes() const override { return { ngbla::Height<TM>(), ngbla::Width<TM>() }; }
562
562
 
563
563
  shared_ptr<BaseSparseMatrix>
564
- CreateTransposeTM (const function<shared_ptr<SparseMatrixTM<decltype(ngbla::Trans(TM()))>>(const Array<int>&, int)> & creator) const;
564
+ CreateTransposeTM (const function<shared_ptr<SparseMatrixTM<decltype(ngbla::Trans(TM()))>>(const Array<int>&, int)> & creator, bool sorted) const;
565
565
 
566
566
  public:
567
567
  using BaseMatrix::GetMemoryTracer;
@@ -619,7 +619,7 @@ namespace ngla
619
619
 
620
620
 
621
621
  BaseMatrix::OperatorInfo GetOperatorInfo () const override
622
- { return { string("SparseMatrix")+typeid(TM).name(), this->Height(), this->Width() }; }
622
+ { return { string("SparseMatrix")+typeid(TM).name()+" (nze="+ToString(this->NZE())+")", this->Height(), this->Width() }; }
623
623
 
624
624
  virtual shared_ptr<BaseJacobiPrecond>
625
625
  CreateJacobiPrecond (shared_ptr<BitArray> inner) const override;
@@ -638,11 +638,11 @@ namespace ngla
638
638
 
639
639
  virtual shared_ptr<BaseSparseMatrix> Reorder (const Array<size_t> & reorder) const override;
640
640
 
641
- virtual shared_ptr<BaseSparseMatrix> CreateTranspose() const override
641
+ virtual shared_ptr<BaseSparseMatrix> CreateTranspose(bool sorted) const override
642
642
  {
643
643
  return this->CreateTransposeTM
644
644
  ( [](const Array<int> & elsperrow, int width) -> shared_ptr<SparseMatrixTM<decltype(Trans(TM()))>>
645
- { return make_shared<SparseMatrix<decltype(Trans(TM())), TV_COL, TV_ROW>> (elsperrow, width); } );
645
+ { return make_shared<SparseMatrix<decltype(Trans(TM())), TV_COL, TV_ROW>> (elsperrow, width); }, sorted );
646
646
  }
647
647
 
648
648
  virtual shared_ptr<BaseMatrix> DeleteZeroElements(double tol) const override;
@@ -669,6 +669,20 @@ namespace ngla
669
669
  for (size_t j = first; j < last; j++)
670
670
  vec[colpi[j]] += Trans(datap[j]) * el;
671
671
  }
672
+
673
+ void AddRowTransToVectorAtomic (int row, TVY el, FlatVector<TVX> vec) const
674
+ {
675
+ size_t first = firsti[row];
676
+ size_t last = firsti[row+1];
677
+
678
+ const ColIdx * colpi = colnr.Addr(0);
679
+ const TM * datap = data.Addr(0);
680
+
681
+ for (size_t j = first; j < last; j++)
682
+ // vec[colpi[j]] += Trans(datap[j]) * el;
683
+ AtomicAdd (vec[colpi[j]], Trans(datap[j]) * el);
684
+ }
685
+
672
686
 
673
687
  ///
674
688
  void AddRowConjTransToVector (int row, TVY el, FlatVector<TVX> vec) const
@@ -858,9 +872,9 @@ namespace ngla
858
872
  double sb, const SparseMatrixTM<double> & matb);
859
873
 
860
874
  NGS_DLL_HEADER shared_ptr<SparseMatrixTM<double>>
861
- MatMult (const SparseMatrixTM<double> & mata, const SparseMatrixTM<double> & matb);
875
+ MatMult (const SparseMatrixTM<double> & mata, const SparseMatrixTM<double> & matb, bool sort_output = true);
862
876
  NGS_DLL_HEADER shared_ptr<SparseMatrixTM<Complex>>
863
- MatMult (const SparseMatrixTM<Complex> & mata, const SparseMatrixTM<Complex> & matb);
877
+ MatMult (const SparseMatrixTM<Complex> & mata, const SparseMatrixTM<Complex> & matb, bool sort_output = true);
864
878
 
865
879
  #ifdef GOLD
866
880
  #include <sparsematrix_spec.hpp>
@@ -42,20 +42,19 @@ namespace ngla
42
42
  fm = matvec(i);
43
43
  }
44
44
  }
45
-
45
+
46
46
  virtual int VHeight() const override { return size; }
47
47
  virtual int VWidth() const override { return width; }
48
-
48
+
49
49
  virtual void Mult (const BaseVector & x, BaseVector & y) const override;
50
50
  virtual void MultAdd (double s, const BaseVector & x, BaseVector & y) const override;
51
-
51
+
52
52
  AutoVector CreateRowVector() const override
53
- { throw Exception("CreateRowVector not implemented for SparseMatrixDynamic!"); }
53
+ { return make_unique<VVector<TSCAL>> (VWidth()); }
54
54
  AutoVector CreateColVector() const override
55
- { throw Exception("CreateColVector not implemented for SparseMatrixDynamic!"); }
56
-
55
+ { return make_unique<VVector<TSCAL>> (VHeight()); }
56
+
57
57
  virtual tuple<int,int> EntrySizes() const override { return { bh, bw }; }
58
-
59
58
  };
60
59
 
61
60
 
@@ -51,28 +51,94 @@ namespace ngla
51
51
  FlatArray<TM> val, size_t h, size_t w)
52
52
  {
53
53
  static Timer t("SparseMatrix::CreateFromCOO"); RegionTimer r(t);
54
- Array<int> cnt(h);
54
+ static Timer t1("SparseMatrix::CreateFromCOO 1");
55
+ static Timer t2("SparseMatrix::CreateFromCOO 2");
56
+ static Timer t3("SparseMatrix::CreateFromCOO 3");
55
57
 
56
58
  /*
57
- cnt = 0;
58
- for (auto i : indi) cnt[i]++;
59
- */
59
+ {
60
+ Array<int> cnt(h);
60
61
 
62
+ t1.Start();
61
63
  DynamicTable<int> tab(h);
62
64
  for (size_t i = 0; i < indi.Size(); i++)
63
65
  tab.AddUnique(indi[i], indj[i]);
66
+ t1.Stop();
64
67
  for (size_t i = 0; i < h; i++)
65
68
  cnt[i] = tab.EntrySize(i);
66
69
 
67
70
  auto matrix = make_shared<SparseMatrix<TM>> (cnt, w);
71
+ t2.Start();
68
72
  for (auto k : ngstd::Range(indi))
69
73
  matrix->CreatePosition(indi[k], indj[k]);
74
+ t2.Stop();
70
75
  matrix->SetZero();
71
76
 
77
+ t3.Start();
72
78
  for (auto k : ngstd::Range(indi))
73
79
  (*matrix)(indi[k], indj[k]) += val[k];
80
+ t3.Stop();
81
+ // return matrix;
82
+ }
83
+ */
84
+
85
+ Array<int> cnt(h);
86
+ cnt = 0;
87
+ for (auto i : indi)
88
+ cnt[i]++;
89
+
90
+ Table<int> tab(cnt);
91
+ cnt = 0;
92
+
93
+ for (auto [i,j] : Zip(indi, indj))
94
+ tab[i][cnt[i]++] = j;
74
95
 
75
- return matrix;
96
+ cnt = 0;
97
+ // for (int i = 0; i < tab.Size(); i++)
98
+ ParallelFor (tab.Size(), [&] (size_t i)
99
+ {
100
+ QuickSort (tab[i]);
101
+
102
+ int prev = -1;
103
+ for (auto j : tab[i])
104
+ {
105
+ if (j != prev) cnt[i]++;
106
+ prev = j;
107
+ }
108
+ });
109
+
110
+ auto matrix = make_shared<SparseMatrix<TM>> (cnt, w);
111
+ t2.Start();
112
+ // for (auto k : ngstd::Range(indi))
113
+ // matrix->CreatePosition(indi[k], indj[k]);
114
+
115
+ cnt = 0;
116
+ for (int i = 0; i < tab.Size(); i++)
117
+ {
118
+ int prev = -1;
119
+ for (auto j : tab[i])
120
+ {
121
+ auto cols = matrix->GetRowIndices(i);
122
+ if (j != prev)
123
+ cols[cnt[i]++] = j;
124
+ prev = j;
125
+ }
126
+ }
127
+
128
+ t2.Stop();
129
+ matrix->SetZero();
130
+
131
+ t3.Start();
132
+ /*
133
+ for (auto k : ngstd::Range(indi))
134
+ (*matrix)(indi[k], indj[k]) += val[k];
135
+ */
136
+ ParallelFor (indi.Size(), [&](size_t k)
137
+ {
138
+ AtomicAdd ( (*matrix)(indi[k], indj[k]), val[k]);
139
+ });
140
+ t3.Stop();
141
+ return matrix;
76
142
  }
77
143
 
78
144
 
@@ -167,7 +233,8 @@ namespace ngla
167
233
  throw Exception(string("MAX_SYS_DIM = ")+to_string(MAX_SYS_DIM)+string(", need ")+to_string(mat_traits<TM>::HEIGHT));
168
234
  return nullptr;
169
235
  }
170
- else return make_shared<JacobiPrecond<TM,TV_ROW,TV_COL>> (*this, inner);
236
+ else return make_shared<JacobiPrecond<TM,TV_ROW,TV_COL>> ( dynamic_pointer_cast<SparseMatrix>
237
+ (const_cast<SparseMatrix*>(this)->shared_from_this()), inner);
171
238
  }
172
239
 
173
240
  template <class TM, class TV_ROW, class TV_COL>
@@ -288,6 +355,23 @@ namespace ngla
288
355
  AddRowTransToVector (i, s*fx(i), fy);
289
356
 
290
357
  timer.AddFlops (this->NZE());
358
+
359
+
360
+ /*
361
+ static Timer t("SparseMatrix::MultTransAdd"); RegionTimer reg(t);
362
+ t.AddFlops (this->NZE()*sizeof(TV_ROW)*sizeof(TV_COL)/sqr(sizeof(double)));
363
+
364
+ ParallelForRange
365
+ (balance, [&] (IntRange myrange)
366
+ {
367
+ FlatVector<TVY> fx = x.FV<TVY>();
368
+ FlatVector<TVX> fy = y.FV<TVX>();
369
+
370
+ for (auto i : myrange)
371
+ AddRowTransToVectorAtomic (i, s*fx(i), fy);
372
+ });
373
+ */
374
+
291
375
  }
292
376
 
293
377
 
@@ -393,21 +477,69 @@ namespace ngla
393
477
  DeleteZeroElements(double tol) const
394
478
  {
395
479
  static Timer t("SparseMatrix::DeleteZeroElements"); RegionTimer reg(t);
396
- Array<int> indi, indj;
397
- Array<TM> val;
398
- for (auto i : Range(this->Height()))
480
+ size_t h = this->Height();
481
+ size_t w = this->Width();
482
+ Array<int> cnt(h);
483
+ Array<bool> keep(data.Size());
484
+ cnt = 0;
485
+ keep = false;
486
+ ParallelForRange( h, [&](IntRange r)
399
487
  {
400
- for (auto j : Range(firsti[i], firsti[i+1]))
401
- {
488
+ for (auto i : r)
489
+ for (auto j : Range(firsti[i], firsti[i+1]))
402
490
  if (ngbla::L2Norm2(data[j]) > tol*tol)
491
+ {
492
+ keep[j] = true;
493
+ cnt[i]++;
494
+ }
495
+ }, 5*TaskManager::GetNumThreads());
496
+
497
+ auto matrix = make_shared<SparseMatrix<TM>> (cnt, w);
498
+ cnt = 0;
499
+
500
+ ParallelForRange( h, [&](IntRange r)
501
+ {
502
+ for (auto i : r)
503
+ {
504
+ auto cols = matrix->GetRowIndices(i);
505
+ auto vals = matrix->GetRowValues(i);
506
+ int icol = 0;
507
+ // size_t firsti_new = matrix->firsti[i];
508
+ for (auto j : Range(firsti[i], firsti[i+1]))
509
+ if (keep[j])
403
510
  {
404
- indi.Append (i);
405
- indj.Append (colnr[j]);
406
- val.Append (data[j]);
511
+ cols[icol] = colnr[j];
512
+ vals[icol] = data[j];
513
+ icol++;
407
514
  }
408
- }
409
- }
410
- return this->CreateFromCOO(indi, indj, val, this->Height(), this->Width());
515
+ }
516
+ }, 5*TaskManager::GetNumThreads());
517
+ return matrix;
518
+
519
+ /*
520
+ // needs parallelization and testing
521
+ Array<int> cnt(this->Height());
522
+ cnt = 0;
523
+ size_t cnt0 = 0;
524
+ for (auto i : Range(this->Height()))
525
+ for (auto j : Range(firsti[i], firsti[i+1]))
526
+ if (ngbla::L2Norm2(data[j]) > tol*tol)
527
+ cnt[i]++;
528
+ else
529
+ cnt0++;
530
+
531
+ cout << "zero-els = " << cnt0 << endl;
532
+
533
+
534
+ auto matrix = make_shared<SparseMatrix<TM>> (cnt, this->Width());
535
+
536
+ for (auto i : Range(this->Height()))
537
+ for (auto j : Range(firsti[i], firsti[i+1]))
538
+ if (ngbla::L2Norm2(data[j]) > tol*tol)
539
+ (*matrix)(i, colnr[i]) = data[j];
540
+
541
+ return matrix;
542
+ */
411
543
  }
412
544
 
413
545
  template <class TM>
@@ -640,36 +772,38 @@ namespace ngla
640
772
 
641
773
  template <class TM>
642
774
  shared_ptr<BaseSparseMatrix> SparseMatrixTM<TM> ::
643
- CreateTransposeTM (const function<shared_ptr<SparseMatrixTM<decltype(ngbla::Trans(TM()))>>(const Array<int>&,int)> & creator) const
775
+ CreateTransposeTM (const function<shared_ptr<SparseMatrixTM<decltype(ngbla::Trans(TM()))>>(const Array<int>&,int)> & creator,
776
+ bool sorted) const
644
777
  {
645
778
  Array<int> cnt(this->Width());
646
779
  cnt = 0;
647
780
  ParallelFor (this->Height(), [&] (int i)
648
- {
649
- for (int c : this->GetRowIndices(i))
650
- AsAtomic (cnt[c]) ++;
651
- });
652
-
781
+ {
782
+ for (int c : this->GetRowIndices(i))
783
+ AsAtomic (cnt[c]) ++;
784
+ });
785
+
653
786
  auto trans = creator(cnt, this->Height());
654
787
 
655
788
  cnt = 0;
656
789
  ParallelFor (this->Height(), [&] (int i)
657
- {
658
- for (int ci : Range(this->GetRowIndices(i)))
659
- {
660
- int c = this->GetRowIndices(i)[ci];
661
- int pos = AsAtomic(cnt[c])++;
662
- trans -> GetRowIndices(c)[pos] = i;
663
- trans -> GetRowValues(c)[pos] = Trans(this->GetRowValues(i)[ci]);
664
- }
665
- });
666
-
667
- ParallelFor (trans->Height(), [&] (int r)
668
- {
669
- auto rowvals = trans->GetRowValues(r);
670
- BubbleSort (trans->GetRowIndices(r),
671
- FlatArray(rowvals.Size(), rowvals.Data()));
672
- });
790
+ {
791
+ for (int ci : Range(this->GetRowIndices(i)))
792
+ {
793
+ int c = this->GetRowIndices(i)[ci];
794
+ int pos = AsAtomic(cnt[c])++;
795
+ trans -> GetRowIndices(c)[pos] = i;
796
+ trans -> GetRowValues(c)[pos] = Trans(this->GetRowValues(i)[ci]);
797
+ }
798
+ });
799
+
800
+ if (sorted)
801
+ ParallelFor (trans->Height(), [&] (int r)
802
+ {
803
+ auto rowvals = trans->GetRowValues(r);
804
+ BubbleSort (trans->GetRowIndices(r),
805
+ FlatArray(rowvals.Size(), rowvals.Data()));
806
+ });
673
807
 
674
808
  return trans;
675
809
  }
@@ -794,7 +928,8 @@ namespace ngla
794
928
  shared_ptr<BaseJacobiPrecond>
795
929
  SparseMatrixSymmetric<TM,TV> :: CreateJacobiPrecond (shared_ptr<BitArray> inner) const
796
930
  {
797
- return make_shared<JacobiPrecondSymmetric<TM,TV>> (*this, inner);
931
+ return make_shared<JacobiPrecondSymmetric<TM,TV>> ( dynamic_pointer_cast<SparseMatrixSymmetric>
932
+ (const_cast<SparseMatrixSymmetric*>(this)->shared_from_this()), inner);
798
933
  }
799
934
 
800
935
 
@@ -182,6 +182,8 @@ namespace ngla
182
182
 
183
183
  virtual void MultAdd (double s, const BaseVector & x, BaseVector & y) const override;
184
184
  virtual void MultTransAdd (double s, const BaseVector & x, BaseVector & y) const override;
185
+
186
+ virtual shared_ptr<BaseSparseMatrix> CreateSparseMatrix() const override;
185
187
  };
186
188
 
187
189
 
@@ -3,27 +3,27 @@
3
3
 
4
4
  namespace ngstd
5
5
  {
6
-
6
+
7
7
  /** Access to statusbar. (and more)
8
8
  */
9
9
 
10
10
  class NGS_DLL_HEADER BaseStatusHandler
11
- {
12
- public:
13
- static void PushStatus (const char * str);
11
+ {
12
+ public:
13
+ static void PushStatus (const std::string& str);
14
14
  static void PopStatus ();
15
15
  static void SetThreadPercentage (double percent);
16
-
16
+
17
17
  static void GetStatus (string & str, double & percent);
18
-
18
+
19
19
  static void SetTerminate(void);
20
20
  static void UnSetTerminate(void);
21
21
  static bool ShouldTerminate(void);
22
-
22
+
23
23
  class Region
24
24
  {
25
25
  public:
26
- Region(const char * str) { PushStatus(str); }
26
+ Region(const string& str) { PushStatus(str); }
27
27
  ~Region() { PopStatus(); }
28
28
  };
29
29
  };
@@ -604,6 +604,7 @@ public:
604
604
 
605
605
  class SymbolicFacetLinearFormIntegrator : public FacetLinearFormIntegrator
606
606
  {
607
+ protected:
607
608
  shared_ptr<CoefficientFunction> cf;
608
609
  Array<ProxyFunction*> proxies;
609
610
  Array<CoefficientFunction*> cache_cfs;
@@ -614,7 +615,7 @@ public:
614
615
  SIMD_IntegrationRule simd_ir; // if non-empty use this integration-rule
615
616
 
616
617
  public:
617
- SymbolicFacetLinearFormIntegrator (shared_ptr<CoefficientFunction> acf, VorB avb);
618
+ NGS_DLL_HEADER SymbolicFacetLinearFormIntegrator (shared_ptr<CoefficientFunction> acf, VorB avb);
618
619
 
619
620
  virtual VorB VB() const override { return vb; }
620
621
  virtual bool BoundaryForm() const override { return vb == BND; }
@@ -16,34 +16,22 @@ namespace ngcomp
16
16
  class NGS_DLL_HEADER TangentialFacetFESpace : public FESpace
17
17
  {
18
18
  protected:
19
- /// Level
20
- // int level;
21
- /// Number of Facets
22
- // int nfacets;
23
- ///
24
- // int ncfacets;
25
- ///
26
- // int nel;
27
-
28
19
  Array<int> first_facet_dof;
29
20
  Array<int> first_inner_dof; // for highest_order_dc
30
- // int ndof_lo;
31
21
 
32
22
  int rel_order;
33
23
 
34
24
  Array<IVec<2> > order_facet;
35
25
  Array<bool> fine_facet;
36
26
 
37
- // int ndof;
38
- // Array<int> ndlevel;
39
27
  bool var_order;
40
28
  bool print;
41
29
 
42
30
  bool highest_order_dc;
43
31
  bool hide_highest_order_dc;
32
+ bool all_dofs_together;
44
33
 
45
34
  public:
46
- ///
47
35
  TangentialFacetFESpace (shared_ptr<MeshAccess> ama, const Flags & flags,
48
36
  bool parseflags = false );
49
37
 
@@ -61,20 +49,11 @@ namespace ngcomp
61
49
  virtual void SetOrder (NodeId ni, int order) override;
62
50
  virtual int GetOrder (NodeId ni) const override;
63
51
 
64
-
65
- // virtual size_t GetNDof() const throw() override { return ndof; }
66
- // virtual size_t GetNDofLevel ( int i ) const override { return ndlevel[i]; }
67
-
68
52
  virtual FlatArray<VorB> GetDualShapeNodes (VorB vb) const override;
69
53
 
70
- // virtual int GetNDofLowOrder () const
71
- // { return ndof_lo; }
72
54
 
73
55
  virtual FiniteElement & GetFE(ElementId ei, Allocator & lh) const override;
74
56
 
75
- // virtual const FiniteElement & GetFE ( int elnr, LocalHeap & lh ) const;
76
- // virtual const FiniteElement & GetSFE ( int selnr, LocalHeap & lh ) const;
77
-
78
57
  virtual void GetFacetDofNrs (int felnr, Array<DofId> & dnums) const;
79
58
 
80
59
  virtual int GetNFacetDofs (int felnr) const;
@@ -90,6 +69,12 @@ namespace ngcomp
90
69
  virtual IVec<2> GetFacetOrder(int fnr) const;
91
70
 
92
71
  virtual int GetFirstFacetDof(int fanr) const;
72
+ const auto & GetFirstFacetDof() const { return first_facet_dof; }
73
+
74
+ IntRange GetFacetDofs (int nr) const
75
+ {
76
+ return IntRange (first_facet_dof[nr], first_facet_dof[nr+1]);
77
+ }
93
78
 
94
79
  virtual bool UsesHighestOrderDiscontinuous() const {return highest_order_dc;};
95
80