ngsolve 6.2.2502__cp311-cp311-win_amd64.whl → 6.2.2601__cp311-cp311-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- netgen/include/analytic_integrals.hpp +10 -0
- netgen/include/basematrix.hpp +6 -0
- netgen/include/bdbequations.hpp +55 -0
- netgen/include/bem_diffops.hpp +475 -0
- netgen/include/bilinearform.hpp +4 -1
- netgen/include/blockjacobi.hpp +17 -5
- netgen/include/bspline.hpp +2 -0
- netgen/include/cholesky.hpp +2 -2
- netgen/include/code_generation.hpp +2 -2
- netgen/include/complex_wrapper.hpp +30 -2
- netgen/include/contact.hpp +8 -0
- netgen/include/diagonalmatrix.hpp +6 -0
- netgen/include/diffop_impl.hpp +3 -1
- netgen/include/diffopwithfactor.hpp +123 -0
- netgen/include/elementbyelement.hpp +9 -3
- netgen/include/expr.hpp +45 -7
- netgen/include/fespace.hpp +12 -4
- netgen/include/gridfunction.hpp +3 -3
- netgen/include/h1amg.hpp +85 -2
- netgen/include/h1lumping.hpp +6 -0
- netgen/include/hcurl_equations.hpp +29 -0
- netgen/include/hcurlcurlfe.hpp +20 -0
- netgen/include/hdiv_equations.hpp +15 -0
- netgen/include/hdivfe_utils.hpp +1 -0
- netgen/include/hdivhofespace.hpp +2 -0
- netgen/include/integrator.hpp +4 -16
- netgen/include/intrule.hpp +2 -1
- netgen/include/intrules_SauterSchwab.hpp +25 -0
- netgen/include/jacobi.hpp +35 -18
- netgen/include/kernels.hpp +724 -0
- netgen/include/l2hofe.hpp +1 -0
- netgen/include/matrix.hpp +8 -3
- netgen/include/meshaccess.hpp +4 -3
- netgen/include/mp_coefficient.hpp +145 -0
- netgen/include/mptools.hpp +1331 -368
- netgen/include/mycomplex.hpp +1 -1
- netgen/include/ngblas.hpp +116 -7
- netgen/include/potentialtools.hpp +22 -0
- netgen/include/preconditioner.hpp +23 -23
- netgen/include/prolongation.hpp +132 -6
- netgen/include/recursive_pol.hpp +63 -11
- netgen/include/simd_complex.hpp +45 -0
- netgen/include/sparsecholesky.hpp +6 -2
- netgen/include/sparsefactorization_interface.hpp +159 -0
- netgen/include/sparsematrix.hpp +21 -7
- netgen/include/sparsematrix_dyn.hpp +6 -7
- netgen/include/sparsematrix_impl.hpp +175 -40
- netgen/include/special_matrix.hpp +2 -0
- netgen/include/statushandler.hpp +8 -8
- netgen/include/symbolicintegrator.hpp +2 -1
- netgen/include/tangentialfacetfespace.hpp +7 -22
- netgen/include/thdivfe_impl.hpp +66 -0
- netgen/include/tscalarfe.hpp +1 -1
- netgen/include/vector.hpp +272 -47
- netgen/lib/libngsolve.lib +0 -0
- netgen/libngsolve.dll +0 -0
- netgen/ngscxx.bat +1 -1
- netgen/ngsld.bat +1 -1
- ngsolve/__init__.py +1 -0
- ngsolve/cmake/NGSolveConfig.cmake +8 -8
- ngsolve/cmake/ngsolve-targets.cmake +24 -18
- ngsolve/config/config.py +7 -7
- ngsolve/demos/intro/cmagnet.py +19 -22
- ngsolve/directsolvers.py +9 -21
- ngsolve/krylovspace.py +172 -3
- ngsolve/ngslib.lib +0 -0
- ngsolve/ngslib.pyd +0 -0
- ngsolve/nonlinearsolvers.py +2 -2
- ngsolve/preconditioners.py +1 -0
- ngsolve/solve_implementation.py +168 -0
- ngsolve/{solvers.py → solvers/__init__.py} +1 -1
- ngsolve/solvers/cudss.py +112 -0
- ngsolve/webgui.py +2 -0
- {ngsolve-6.2.2502.dist-info → ngsolve-6.2.2601.dist-info}/METADATA +2 -2
- {ngsolve-6.2.2502.dist-info → ngsolve-6.2.2601.dist-info}/RECORD +107 -97
- {ngsolve-6.2.2502.data → ngsolve-6.2.2601.data}/data/Scripts/ngsolve.tcl +0 -0
- {ngsolve-6.2.2502.data → ngsolve-6.2.2601.data}/data/share/ngsolve/beam.geo +0 -0
- {ngsolve-6.2.2502.data → ngsolve-6.2.2601.data}/data/share/ngsolve/beam.vol +0 -0
- {ngsolve-6.2.2502.data → ngsolve-6.2.2601.data}/data/share/ngsolve/chip.in2d +0 -0
- {ngsolve-6.2.2502.data → ngsolve-6.2.2601.data}/data/share/ngsolve/chip.vol +0 -0
- {ngsolve-6.2.2502.data → ngsolve-6.2.2601.data}/data/share/ngsolve/coil.geo +0 -0
- {ngsolve-6.2.2502.data → ngsolve-6.2.2601.data}/data/share/ngsolve/coil.vol +0 -0
- {ngsolve-6.2.2502.data → ngsolve-6.2.2601.data}/data/share/ngsolve/coilshield.geo +0 -0
- {ngsolve-6.2.2502.data → ngsolve-6.2.2601.data}/data/share/ngsolve/coilshield.vol +0 -0
- {ngsolve-6.2.2502.data → ngsolve-6.2.2601.data}/data/share/ngsolve/cube.geo +0 -0
- {ngsolve-6.2.2502.data → ngsolve-6.2.2601.data}/data/share/ngsolve/cube.vol +0 -0
- {ngsolve-6.2.2502.data → ngsolve-6.2.2601.data}/data/share/ngsolve/d10_DGdoubleglazing.pde +0 -0
- {ngsolve-6.2.2502.data → ngsolve-6.2.2601.data}/data/share/ngsolve/d11_chip_nitsche.pde +0 -0
- {ngsolve-6.2.2502.data → ngsolve-6.2.2601.data}/data/share/ngsolve/d1_square.pde +0 -0
- {ngsolve-6.2.2502.data → ngsolve-6.2.2601.data}/data/share/ngsolve/d2_chip.pde +0 -0
- {ngsolve-6.2.2502.data → ngsolve-6.2.2601.data}/data/share/ngsolve/d3_helmholtz.pde +0 -0
- {ngsolve-6.2.2502.data → ngsolve-6.2.2601.data}/data/share/ngsolve/d4_cube.pde +0 -0
- {ngsolve-6.2.2502.data → ngsolve-6.2.2601.data}/data/share/ngsolve/d5_beam.pde +0 -0
- {ngsolve-6.2.2502.data → ngsolve-6.2.2601.data}/data/share/ngsolve/d6_shaft.pde +0 -0
- {ngsolve-6.2.2502.data → ngsolve-6.2.2601.data}/data/share/ngsolve/d7_coil.pde +0 -0
- {ngsolve-6.2.2502.data → ngsolve-6.2.2601.data}/data/share/ngsolve/d8_coilshield.pde +0 -0
- {ngsolve-6.2.2502.data → ngsolve-6.2.2601.data}/data/share/ngsolve/d9_hybridDG.pde +0 -0
- {ngsolve-6.2.2502.data → ngsolve-6.2.2601.data}/data/share/ngsolve/doubleglazing.in2d +0 -0
- {ngsolve-6.2.2502.data → ngsolve-6.2.2601.data}/data/share/ngsolve/doubleglazing.vol +0 -0
- {ngsolve-6.2.2502.data → ngsolve-6.2.2601.data}/data/share/ngsolve/piezo2d40round4.vol.gz +0 -0
- {ngsolve-6.2.2502.data → ngsolve-6.2.2601.data}/data/share/ngsolve/shaft.geo +0 -0
- {ngsolve-6.2.2502.data → ngsolve-6.2.2601.data}/data/share/ngsolve/shaft.vol +0 -0
- {ngsolve-6.2.2502.data → ngsolve-6.2.2601.data}/data/share/ngsolve/square.in2d +0 -0
- {ngsolve-6.2.2502.data → ngsolve-6.2.2601.data}/data/share/ngsolve/square.vol +0 -0
- {ngsolve-6.2.2502.dist-info → ngsolve-6.2.2601.dist-info}/LICENSE +0 -0
- {ngsolve-6.2.2502.dist-info → ngsolve-6.2.2601.dist-info}/WHEEL +0 -0
- {ngsolve-6.2.2502.dist-info → ngsolve-6.2.2601.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
#ifndef SPARSEFACTORIZATION_INTERFACE_HPP
|
|
2
|
+
#define SPARSEFACTORIZATION_INTERFACE_HPP
|
|
3
|
+
|
|
4
|
+
#include "basevector.hpp"
|
|
5
|
+
#include "sparsecholesky.hpp"
|
|
6
|
+
#include "sparsematrix.hpp"
|
|
7
|
+
#include "../ngstd/python_ngstd.hpp"
|
|
8
|
+
|
|
9
|
+
namespace ngla {
|
|
10
|
+
|
|
11
|
+
void ExportSparseFactorizationInterface(py::module &m);
|
|
12
|
+
|
|
13
|
+
struct MapInnerDofs {
|
|
14
|
+
shared_ptr<BitArray> inner;
|
|
15
|
+
shared_ptr<const Array<int>> cluster;
|
|
16
|
+
Array<int> project;
|
|
17
|
+
Array<int> embed;
|
|
18
|
+
size_t size = 0;
|
|
19
|
+
|
|
20
|
+
MapInnerDofs() {}
|
|
21
|
+
|
|
22
|
+
void Init(shared_ptr<BitArray> ainner,
|
|
23
|
+
shared_ptr<const Array<int>> acluster = nullptr)
|
|
24
|
+
{
|
|
25
|
+
inner = ainner;
|
|
26
|
+
cluster = acluster;
|
|
27
|
+
if (!inner && !cluster) {
|
|
28
|
+
size = 0;
|
|
29
|
+
return;
|
|
30
|
+
}
|
|
31
|
+
if (inner) {
|
|
32
|
+
size = inner->NumSet();
|
|
33
|
+
project.SetSize(size);
|
|
34
|
+
embed.SetSize(inner->Size());
|
|
35
|
+
int j = 0;
|
|
36
|
+
for (int i = 0; i < inner->Size(); i++) {
|
|
37
|
+
if ((*inner)[i]) {
|
|
38
|
+
project[j] = i;
|
|
39
|
+
embed[i] = j++;
|
|
40
|
+
} else
|
|
41
|
+
embed[i] = -1;
|
|
42
|
+
}
|
|
43
|
+
return;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
int j = 0;
|
|
47
|
+
for (int i = 0; i < cluster->Size(); i++) {
|
|
48
|
+
if ((*cluster)[i]) {
|
|
49
|
+
project.Append(i);
|
|
50
|
+
embed.Append(j++);
|
|
51
|
+
} else {
|
|
52
|
+
embed.Append(-1);
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
size = project.Size();
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
operator bool() const { return inner || cluster; }
|
|
59
|
+
|
|
60
|
+
template <typename T>
|
|
61
|
+
void Project(FlatVector<T> dst, FlatVector<T> src) const {
|
|
62
|
+
for (size_t i = 0; i < project.Size(); i++)
|
|
63
|
+
dst[i] = src[project[i]];
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
template <typename T> void Embed(T &dst, const T &src) const {
|
|
67
|
+
for (size_t i : Range(embed)) {
|
|
68
|
+
if (embed[i] >= 0)
|
|
69
|
+
dst[i] = src[embed[i]];
|
|
70
|
+
else
|
|
71
|
+
dst[i] = 0.0;
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
template <typename T>
|
|
76
|
+
void EmbedAdd(FlatVector<T> dst, FlatVector<T> src, T scale) const {
|
|
77
|
+
for (size_t i : Range(embed))
|
|
78
|
+
if (embed[i] >= 0)
|
|
79
|
+
dst[i] += scale * src[embed[i]];
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
template <typename T>
|
|
83
|
+
shared_ptr<SparseMatrixTM<T>>
|
|
84
|
+
ProjectMatrix(shared_ptr<const SparseMatrixTM<T>> m) {
|
|
85
|
+
Array<int> rowi, coli;
|
|
86
|
+
Array<T> vals;
|
|
87
|
+
// auto &dofs = *inner;
|
|
88
|
+
|
|
89
|
+
auto vals_ori = m->GetValues();
|
|
90
|
+
|
|
91
|
+
auto &cluster_array = *cluster;
|
|
92
|
+
auto &inner_bitarray = *inner;
|
|
93
|
+
auto is_used = [this, &inner_bitarray, &cluster_array](int i, int j) {
|
|
94
|
+
if (inner)
|
|
95
|
+
return inner_bitarray[i] && inner_bitarray[j];
|
|
96
|
+
return cluster_array[i] == cluster_array[j];
|
|
97
|
+
};
|
|
98
|
+
|
|
99
|
+
for (auto i : project)
|
|
100
|
+
for (auto j : m->GetRowIndices(i))
|
|
101
|
+
if (is_used(i, j)) {
|
|
102
|
+
rowi.Append(embed[i]);
|
|
103
|
+
coli.Append(embed[j]);
|
|
104
|
+
vals.Append(vals_ori[m->GetPosition(i, j)]);
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
auto res = SparseMatrixTM<T>::CreateFromCOO(rowi, coli, vals,
|
|
108
|
+
project.Size(), project.Size());
|
|
109
|
+
res->SetSPD(m->IsSPD());
|
|
110
|
+
return res;
|
|
111
|
+
}
|
|
112
|
+
};
|
|
113
|
+
|
|
114
|
+
class SparseFactorizationInterface : public SparseFactorization {
|
|
115
|
+
protected:
|
|
116
|
+
shared_ptr<const BaseSparseMatrix> inner_mat;
|
|
117
|
+
shared_ptr<BaseVector> inner_rhs, inner_solution;
|
|
118
|
+
MapInnerDofs map_inner_dofs;
|
|
119
|
+
bool is_complex = false;
|
|
120
|
+
bool is_symmetric = false;
|
|
121
|
+
bool is_symmetric_storage = false;
|
|
122
|
+
bool is_analyzed = false;
|
|
123
|
+
int width, height, inner_width, inner_height;
|
|
124
|
+
|
|
125
|
+
public:
|
|
126
|
+
SparseFactorizationInterface() = delete;
|
|
127
|
+
SparseFactorizationInterface(shared_ptr<const BaseMatrix> m,
|
|
128
|
+
shared_ptr<BitArray> ainner = nullptr,
|
|
129
|
+
shared_ptr<const Array<int>> acluster = nullptr);
|
|
130
|
+
|
|
131
|
+
virtual ~SparseFactorizationInterface() {}
|
|
132
|
+
|
|
133
|
+
void SetSubset(shared_ptr<BitArray> inner, shared_ptr<const Array<int>> cluster) override;
|
|
134
|
+
|
|
135
|
+
AutoVector CreateRowVector() const override {
|
|
136
|
+
return make_unique<VVector<double>>(Width());
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
AutoVector CreateColVector() const override {
|
|
140
|
+
return make_unique<VVector<double>>(Height());
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
shared_ptr<const BaseSparseMatrix> GetInnerMatrix() const {
|
|
144
|
+
return inner_mat;
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
void MultAdd(double s, const BaseVector &x, BaseVector &y) const override;
|
|
148
|
+
void MultAdd(Complex s, const BaseVector &x, BaseVector &y) const override;
|
|
149
|
+
|
|
150
|
+
virtual void Update() override;
|
|
151
|
+
|
|
152
|
+
virtual void Analyze() {}
|
|
153
|
+
virtual void Factor() {}
|
|
154
|
+
virtual void Solve(const BaseVector &rhs, BaseVector &solution) const = 0;
|
|
155
|
+
};
|
|
156
|
+
|
|
157
|
+
} // namespace ngla
|
|
158
|
+
|
|
159
|
+
#endif // SPARSEFACTORIZATION_INTERFACE_HPP
|
netgen/include/sparsematrix.hpp
CHANGED
|
@@ -258,7 +258,7 @@ namespace ngla
|
|
|
258
258
|
throw Exception ("BaseSparseMatrix::CreateBlockJacobiPrecond");
|
|
259
259
|
}
|
|
260
260
|
|
|
261
|
-
virtual shared_ptr<BaseSparseMatrix> CreateTranspose() const
|
|
261
|
+
virtual shared_ptr<BaseSparseMatrix> CreateTranspose(bool sorted = true) const
|
|
262
262
|
{
|
|
263
263
|
throw Exception ("BaseSparseMatrix::CreateTranspose");
|
|
264
264
|
}
|
|
@@ -561,7 +561,7 @@ namespace ngla
|
|
|
561
561
|
virtual tuple<int,int> EntrySizes() const override { return { ngbla::Height<TM>(), ngbla::Width<TM>() }; }
|
|
562
562
|
|
|
563
563
|
shared_ptr<BaseSparseMatrix>
|
|
564
|
-
CreateTransposeTM (const function<shared_ptr<SparseMatrixTM<decltype(ngbla::Trans(TM()))>>(const Array<int>&, int)> & creator) const;
|
|
564
|
+
CreateTransposeTM (const function<shared_ptr<SparseMatrixTM<decltype(ngbla::Trans(TM()))>>(const Array<int>&, int)> & creator, bool sorted) const;
|
|
565
565
|
|
|
566
566
|
public:
|
|
567
567
|
using BaseMatrix::GetMemoryTracer;
|
|
@@ -619,7 +619,7 @@ namespace ngla
|
|
|
619
619
|
|
|
620
620
|
|
|
621
621
|
BaseMatrix::OperatorInfo GetOperatorInfo () const override
|
|
622
|
-
{ return { string("SparseMatrix")+typeid(TM).name(), this->Height(), this->Width() }; }
|
|
622
|
+
{ return { string("SparseMatrix")+typeid(TM).name()+" (nze="+ToString(this->NZE())+")", this->Height(), this->Width() }; }
|
|
623
623
|
|
|
624
624
|
virtual shared_ptr<BaseJacobiPrecond>
|
|
625
625
|
CreateJacobiPrecond (shared_ptr<BitArray> inner) const override;
|
|
@@ -638,11 +638,11 @@ namespace ngla
|
|
|
638
638
|
|
|
639
639
|
virtual shared_ptr<BaseSparseMatrix> Reorder (const Array<size_t> & reorder) const override;
|
|
640
640
|
|
|
641
|
-
virtual shared_ptr<BaseSparseMatrix> CreateTranspose() const override
|
|
641
|
+
virtual shared_ptr<BaseSparseMatrix> CreateTranspose(bool sorted) const override
|
|
642
642
|
{
|
|
643
643
|
return this->CreateTransposeTM
|
|
644
644
|
( [](const Array<int> & elsperrow, int width) -> shared_ptr<SparseMatrixTM<decltype(Trans(TM()))>>
|
|
645
|
-
{ return make_shared<SparseMatrix<decltype(Trans(TM())), TV_COL, TV_ROW>> (elsperrow, width); } );
|
|
645
|
+
{ return make_shared<SparseMatrix<decltype(Trans(TM())), TV_COL, TV_ROW>> (elsperrow, width); }, sorted );
|
|
646
646
|
}
|
|
647
647
|
|
|
648
648
|
virtual shared_ptr<BaseMatrix> DeleteZeroElements(double tol) const override;
|
|
@@ -669,6 +669,20 @@ namespace ngla
|
|
|
669
669
|
for (size_t j = first; j < last; j++)
|
|
670
670
|
vec[colpi[j]] += Trans(datap[j]) * el;
|
|
671
671
|
}
|
|
672
|
+
|
|
673
|
+
void AddRowTransToVectorAtomic (int row, TVY el, FlatVector<TVX> vec) const
|
|
674
|
+
{
|
|
675
|
+
size_t first = firsti[row];
|
|
676
|
+
size_t last = firsti[row+1];
|
|
677
|
+
|
|
678
|
+
const ColIdx * colpi = colnr.Addr(0);
|
|
679
|
+
const TM * datap = data.Addr(0);
|
|
680
|
+
|
|
681
|
+
for (size_t j = first; j < last; j++)
|
|
682
|
+
// vec[colpi[j]] += Trans(datap[j]) * el;
|
|
683
|
+
AtomicAdd (vec[colpi[j]], Trans(datap[j]) * el);
|
|
684
|
+
}
|
|
685
|
+
|
|
672
686
|
|
|
673
687
|
///
|
|
674
688
|
void AddRowConjTransToVector (int row, TVY el, FlatVector<TVX> vec) const
|
|
@@ -858,9 +872,9 @@ namespace ngla
|
|
|
858
872
|
double sb, const SparseMatrixTM<double> & matb);
|
|
859
873
|
|
|
860
874
|
NGS_DLL_HEADER shared_ptr<SparseMatrixTM<double>>
|
|
861
|
-
MatMult (const SparseMatrixTM<double> & mata, const SparseMatrixTM<double> & matb);
|
|
875
|
+
MatMult (const SparseMatrixTM<double> & mata, const SparseMatrixTM<double> & matb, bool sort_output = true);
|
|
862
876
|
NGS_DLL_HEADER shared_ptr<SparseMatrixTM<Complex>>
|
|
863
|
-
MatMult (const SparseMatrixTM<Complex> & mata, const SparseMatrixTM<Complex> & matb);
|
|
877
|
+
MatMult (const SparseMatrixTM<Complex> & mata, const SparseMatrixTM<Complex> & matb, bool sort_output = true);
|
|
864
878
|
|
|
865
879
|
#ifdef GOLD
|
|
866
880
|
#include <sparsematrix_spec.hpp>
|
|
@@ -42,20 +42,19 @@ namespace ngla
|
|
|
42
42
|
fm = matvec(i);
|
|
43
43
|
}
|
|
44
44
|
}
|
|
45
|
-
|
|
45
|
+
|
|
46
46
|
virtual int VHeight() const override { return size; }
|
|
47
47
|
virtual int VWidth() const override { return width; }
|
|
48
|
-
|
|
48
|
+
|
|
49
49
|
virtual void Mult (const BaseVector & x, BaseVector & y) const override;
|
|
50
50
|
virtual void MultAdd (double s, const BaseVector & x, BaseVector & y) const override;
|
|
51
|
-
|
|
51
|
+
|
|
52
52
|
AutoVector CreateRowVector() const override
|
|
53
|
-
{
|
|
53
|
+
{ return make_unique<VVector<TSCAL>> (VWidth()); }
|
|
54
54
|
AutoVector CreateColVector() const override
|
|
55
|
-
{
|
|
56
|
-
|
|
55
|
+
{ return make_unique<VVector<TSCAL>> (VHeight()); }
|
|
56
|
+
|
|
57
57
|
virtual tuple<int,int> EntrySizes() const override { return { bh, bw }; }
|
|
58
|
-
|
|
59
58
|
};
|
|
60
59
|
|
|
61
60
|
|
|
@@ -51,28 +51,94 @@ namespace ngla
|
|
|
51
51
|
FlatArray<TM> val, size_t h, size_t w)
|
|
52
52
|
{
|
|
53
53
|
static Timer t("SparseMatrix::CreateFromCOO"); RegionTimer r(t);
|
|
54
|
-
|
|
54
|
+
static Timer t1("SparseMatrix::CreateFromCOO 1");
|
|
55
|
+
static Timer t2("SparseMatrix::CreateFromCOO 2");
|
|
56
|
+
static Timer t3("SparseMatrix::CreateFromCOO 3");
|
|
55
57
|
|
|
56
58
|
/*
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
*/
|
|
59
|
+
{
|
|
60
|
+
Array<int> cnt(h);
|
|
60
61
|
|
|
62
|
+
t1.Start();
|
|
61
63
|
DynamicTable<int> tab(h);
|
|
62
64
|
for (size_t i = 0; i < indi.Size(); i++)
|
|
63
65
|
tab.AddUnique(indi[i], indj[i]);
|
|
66
|
+
t1.Stop();
|
|
64
67
|
for (size_t i = 0; i < h; i++)
|
|
65
68
|
cnt[i] = tab.EntrySize(i);
|
|
66
69
|
|
|
67
70
|
auto matrix = make_shared<SparseMatrix<TM>> (cnt, w);
|
|
71
|
+
t2.Start();
|
|
68
72
|
for (auto k : ngstd::Range(indi))
|
|
69
73
|
matrix->CreatePosition(indi[k], indj[k]);
|
|
74
|
+
t2.Stop();
|
|
70
75
|
matrix->SetZero();
|
|
71
76
|
|
|
77
|
+
t3.Start();
|
|
72
78
|
for (auto k : ngstd::Range(indi))
|
|
73
79
|
(*matrix)(indi[k], indj[k]) += val[k];
|
|
80
|
+
t3.Stop();
|
|
81
|
+
// return matrix;
|
|
82
|
+
}
|
|
83
|
+
*/
|
|
84
|
+
|
|
85
|
+
Array<int> cnt(h);
|
|
86
|
+
cnt = 0;
|
|
87
|
+
for (auto i : indi)
|
|
88
|
+
cnt[i]++;
|
|
89
|
+
|
|
90
|
+
Table<int> tab(cnt);
|
|
91
|
+
cnt = 0;
|
|
92
|
+
|
|
93
|
+
for (auto [i,j] : Zip(indi, indj))
|
|
94
|
+
tab[i][cnt[i]++] = j;
|
|
74
95
|
|
|
75
|
-
|
|
96
|
+
cnt = 0;
|
|
97
|
+
// for (int i = 0; i < tab.Size(); i++)
|
|
98
|
+
ParallelFor (tab.Size(), [&] (size_t i)
|
|
99
|
+
{
|
|
100
|
+
QuickSort (tab[i]);
|
|
101
|
+
|
|
102
|
+
int prev = -1;
|
|
103
|
+
for (auto j : tab[i])
|
|
104
|
+
{
|
|
105
|
+
if (j != prev) cnt[i]++;
|
|
106
|
+
prev = j;
|
|
107
|
+
}
|
|
108
|
+
});
|
|
109
|
+
|
|
110
|
+
auto matrix = make_shared<SparseMatrix<TM>> (cnt, w);
|
|
111
|
+
t2.Start();
|
|
112
|
+
// for (auto k : ngstd::Range(indi))
|
|
113
|
+
// matrix->CreatePosition(indi[k], indj[k]);
|
|
114
|
+
|
|
115
|
+
cnt = 0;
|
|
116
|
+
for (int i = 0; i < tab.Size(); i++)
|
|
117
|
+
{
|
|
118
|
+
int prev = -1;
|
|
119
|
+
for (auto j : tab[i])
|
|
120
|
+
{
|
|
121
|
+
auto cols = matrix->GetRowIndices(i);
|
|
122
|
+
if (j != prev)
|
|
123
|
+
cols[cnt[i]++] = j;
|
|
124
|
+
prev = j;
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
t2.Stop();
|
|
129
|
+
matrix->SetZero();
|
|
130
|
+
|
|
131
|
+
t3.Start();
|
|
132
|
+
/*
|
|
133
|
+
for (auto k : ngstd::Range(indi))
|
|
134
|
+
(*matrix)(indi[k], indj[k]) += val[k];
|
|
135
|
+
*/
|
|
136
|
+
ParallelFor (indi.Size(), [&](size_t k)
|
|
137
|
+
{
|
|
138
|
+
AtomicAdd ( (*matrix)(indi[k], indj[k]), val[k]);
|
|
139
|
+
});
|
|
140
|
+
t3.Stop();
|
|
141
|
+
return matrix;
|
|
76
142
|
}
|
|
77
143
|
|
|
78
144
|
|
|
@@ -167,7 +233,8 @@ namespace ngla
|
|
|
167
233
|
throw Exception(string("MAX_SYS_DIM = ")+to_string(MAX_SYS_DIM)+string(", need ")+to_string(mat_traits<TM>::HEIGHT));
|
|
168
234
|
return nullptr;
|
|
169
235
|
}
|
|
170
|
-
else return make_shared<JacobiPrecond<TM,TV_ROW,TV_COL>> (
|
|
236
|
+
else return make_shared<JacobiPrecond<TM,TV_ROW,TV_COL>> ( dynamic_pointer_cast<SparseMatrix>
|
|
237
|
+
(const_cast<SparseMatrix*>(this)->shared_from_this()), inner);
|
|
171
238
|
}
|
|
172
239
|
|
|
173
240
|
template <class TM, class TV_ROW, class TV_COL>
|
|
@@ -288,6 +355,23 @@ namespace ngla
|
|
|
288
355
|
AddRowTransToVector (i, s*fx(i), fy);
|
|
289
356
|
|
|
290
357
|
timer.AddFlops (this->NZE());
|
|
358
|
+
|
|
359
|
+
|
|
360
|
+
/*
|
|
361
|
+
static Timer t("SparseMatrix::MultTransAdd"); RegionTimer reg(t);
|
|
362
|
+
t.AddFlops (this->NZE()*sizeof(TV_ROW)*sizeof(TV_COL)/sqr(sizeof(double)));
|
|
363
|
+
|
|
364
|
+
ParallelForRange
|
|
365
|
+
(balance, [&] (IntRange myrange)
|
|
366
|
+
{
|
|
367
|
+
FlatVector<TVY> fx = x.FV<TVY>();
|
|
368
|
+
FlatVector<TVX> fy = y.FV<TVX>();
|
|
369
|
+
|
|
370
|
+
for (auto i : myrange)
|
|
371
|
+
AddRowTransToVectorAtomic (i, s*fx(i), fy);
|
|
372
|
+
});
|
|
373
|
+
*/
|
|
374
|
+
|
|
291
375
|
}
|
|
292
376
|
|
|
293
377
|
|
|
@@ -393,21 +477,69 @@ namespace ngla
|
|
|
393
477
|
DeleteZeroElements(double tol) const
|
|
394
478
|
{
|
|
395
479
|
static Timer t("SparseMatrix::DeleteZeroElements"); RegionTimer reg(t);
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
480
|
+
size_t h = this->Height();
|
|
481
|
+
size_t w = this->Width();
|
|
482
|
+
Array<int> cnt(h);
|
|
483
|
+
Array<bool> keep(data.Size());
|
|
484
|
+
cnt = 0;
|
|
485
|
+
keep = false;
|
|
486
|
+
ParallelForRange( h, [&](IntRange r)
|
|
399
487
|
{
|
|
400
|
-
for (auto
|
|
401
|
-
|
|
488
|
+
for (auto i : r)
|
|
489
|
+
for (auto j : Range(firsti[i], firsti[i+1]))
|
|
402
490
|
if (ngbla::L2Norm2(data[j]) > tol*tol)
|
|
491
|
+
{
|
|
492
|
+
keep[j] = true;
|
|
493
|
+
cnt[i]++;
|
|
494
|
+
}
|
|
495
|
+
}, 5*TaskManager::GetNumThreads());
|
|
496
|
+
|
|
497
|
+
auto matrix = make_shared<SparseMatrix<TM>> (cnt, w);
|
|
498
|
+
cnt = 0;
|
|
499
|
+
|
|
500
|
+
ParallelForRange( h, [&](IntRange r)
|
|
501
|
+
{
|
|
502
|
+
for (auto i : r)
|
|
503
|
+
{
|
|
504
|
+
auto cols = matrix->GetRowIndices(i);
|
|
505
|
+
auto vals = matrix->GetRowValues(i);
|
|
506
|
+
int icol = 0;
|
|
507
|
+
// size_t firsti_new = matrix->firsti[i];
|
|
508
|
+
for (auto j : Range(firsti[i], firsti[i+1]))
|
|
509
|
+
if (keep[j])
|
|
403
510
|
{
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
511
|
+
cols[icol] = colnr[j];
|
|
512
|
+
vals[icol] = data[j];
|
|
513
|
+
icol++;
|
|
407
514
|
}
|
|
408
|
-
|
|
409
|
-
}
|
|
410
|
-
return
|
|
515
|
+
}
|
|
516
|
+
}, 5*TaskManager::GetNumThreads());
|
|
517
|
+
return matrix;
|
|
518
|
+
|
|
519
|
+
/*
|
|
520
|
+
// needs parallelization and testing
|
|
521
|
+
Array<int> cnt(this->Height());
|
|
522
|
+
cnt = 0;
|
|
523
|
+
size_t cnt0 = 0;
|
|
524
|
+
for (auto i : Range(this->Height()))
|
|
525
|
+
for (auto j : Range(firsti[i], firsti[i+1]))
|
|
526
|
+
if (ngbla::L2Norm2(data[j]) > tol*tol)
|
|
527
|
+
cnt[i]++;
|
|
528
|
+
else
|
|
529
|
+
cnt0++;
|
|
530
|
+
|
|
531
|
+
cout << "zero-els = " << cnt0 << endl;
|
|
532
|
+
|
|
533
|
+
|
|
534
|
+
auto matrix = make_shared<SparseMatrix<TM>> (cnt, this->Width());
|
|
535
|
+
|
|
536
|
+
for (auto i : Range(this->Height()))
|
|
537
|
+
for (auto j : Range(firsti[i], firsti[i+1]))
|
|
538
|
+
if (ngbla::L2Norm2(data[j]) > tol*tol)
|
|
539
|
+
(*matrix)(i, colnr[i]) = data[j];
|
|
540
|
+
|
|
541
|
+
return matrix;
|
|
542
|
+
*/
|
|
411
543
|
}
|
|
412
544
|
|
|
413
545
|
template <class TM>
|
|
@@ -640,36 +772,38 @@ namespace ngla
|
|
|
640
772
|
|
|
641
773
|
template <class TM>
|
|
642
774
|
shared_ptr<BaseSparseMatrix> SparseMatrixTM<TM> ::
|
|
643
|
-
CreateTransposeTM (const function<shared_ptr<SparseMatrixTM<decltype(ngbla::Trans(TM()))>>(const Array<int>&,int)> & creator
|
|
775
|
+
CreateTransposeTM (const function<shared_ptr<SparseMatrixTM<decltype(ngbla::Trans(TM()))>>(const Array<int>&,int)> & creator,
|
|
776
|
+
bool sorted) const
|
|
644
777
|
{
|
|
645
778
|
Array<int> cnt(this->Width());
|
|
646
779
|
cnt = 0;
|
|
647
780
|
ParallelFor (this->Height(), [&] (int i)
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
|
|
781
|
+
{
|
|
782
|
+
for (int c : this->GetRowIndices(i))
|
|
783
|
+
AsAtomic (cnt[c]) ++;
|
|
784
|
+
});
|
|
785
|
+
|
|
653
786
|
auto trans = creator(cnt, this->Height());
|
|
654
787
|
|
|
655
788
|
cnt = 0;
|
|
656
789
|
ParallelFor (this->Height(), [&] (int i)
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
|
|
790
|
+
{
|
|
791
|
+
for (int ci : Range(this->GetRowIndices(i)))
|
|
792
|
+
{
|
|
793
|
+
int c = this->GetRowIndices(i)[ci];
|
|
794
|
+
int pos = AsAtomic(cnt[c])++;
|
|
795
|
+
trans -> GetRowIndices(c)[pos] = i;
|
|
796
|
+
trans -> GetRowValues(c)[pos] = Trans(this->GetRowValues(i)[ci]);
|
|
797
|
+
}
|
|
798
|
+
});
|
|
799
|
+
|
|
800
|
+
if (sorted)
|
|
801
|
+
ParallelFor (trans->Height(), [&] (int r)
|
|
802
|
+
{
|
|
803
|
+
auto rowvals = trans->GetRowValues(r);
|
|
804
|
+
BubbleSort (trans->GetRowIndices(r),
|
|
805
|
+
FlatArray(rowvals.Size(), rowvals.Data()));
|
|
806
|
+
});
|
|
673
807
|
|
|
674
808
|
return trans;
|
|
675
809
|
}
|
|
@@ -794,7 +928,8 @@ namespace ngla
|
|
|
794
928
|
shared_ptr<BaseJacobiPrecond>
|
|
795
929
|
SparseMatrixSymmetric<TM,TV> :: CreateJacobiPrecond (shared_ptr<BitArray> inner) const
|
|
796
930
|
{
|
|
797
|
-
return make_shared<JacobiPrecondSymmetric<TM,TV>> (
|
|
931
|
+
return make_shared<JacobiPrecondSymmetric<TM,TV>> ( dynamic_pointer_cast<SparseMatrixSymmetric>
|
|
932
|
+
(const_cast<SparseMatrixSymmetric*>(this)->shared_from_this()), inner);
|
|
798
933
|
}
|
|
799
934
|
|
|
800
935
|
|
|
@@ -182,6 +182,8 @@ namespace ngla
|
|
|
182
182
|
|
|
183
183
|
virtual void MultAdd (double s, const BaseVector & x, BaseVector & y) const override;
|
|
184
184
|
virtual void MultTransAdd (double s, const BaseVector & x, BaseVector & y) const override;
|
|
185
|
+
|
|
186
|
+
virtual shared_ptr<BaseSparseMatrix> CreateSparseMatrix() const override;
|
|
185
187
|
};
|
|
186
188
|
|
|
187
189
|
|
netgen/include/statushandler.hpp
CHANGED
|
@@ -3,27 +3,27 @@
|
|
|
3
3
|
|
|
4
4
|
namespace ngstd
|
|
5
5
|
{
|
|
6
|
-
|
|
6
|
+
|
|
7
7
|
/** Access to statusbar. (and more)
|
|
8
8
|
*/
|
|
9
9
|
|
|
10
10
|
class NGS_DLL_HEADER BaseStatusHandler
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
static void PushStatus (const
|
|
11
|
+
{
|
|
12
|
+
public:
|
|
13
|
+
static void PushStatus (const std::string& str);
|
|
14
14
|
static void PopStatus ();
|
|
15
15
|
static void SetThreadPercentage (double percent);
|
|
16
|
-
|
|
16
|
+
|
|
17
17
|
static void GetStatus (string & str, double & percent);
|
|
18
|
-
|
|
18
|
+
|
|
19
19
|
static void SetTerminate(void);
|
|
20
20
|
static void UnSetTerminate(void);
|
|
21
21
|
static bool ShouldTerminate(void);
|
|
22
|
-
|
|
22
|
+
|
|
23
23
|
class Region
|
|
24
24
|
{
|
|
25
25
|
public:
|
|
26
|
-
Region(const
|
|
26
|
+
Region(const string& str) { PushStatus(str); }
|
|
27
27
|
~Region() { PopStatus(); }
|
|
28
28
|
};
|
|
29
29
|
};
|
|
@@ -604,6 +604,7 @@ public:
|
|
|
604
604
|
|
|
605
605
|
class SymbolicFacetLinearFormIntegrator : public FacetLinearFormIntegrator
|
|
606
606
|
{
|
|
607
|
+
protected:
|
|
607
608
|
shared_ptr<CoefficientFunction> cf;
|
|
608
609
|
Array<ProxyFunction*> proxies;
|
|
609
610
|
Array<CoefficientFunction*> cache_cfs;
|
|
@@ -614,7 +615,7 @@ public:
|
|
|
614
615
|
SIMD_IntegrationRule simd_ir; // if non-empty use this integration-rule
|
|
615
616
|
|
|
616
617
|
public:
|
|
617
|
-
SymbolicFacetLinearFormIntegrator (shared_ptr<CoefficientFunction> acf, VorB avb);
|
|
618
|
+
NGS_DLL_HEADER SymbolicFacetLinearFormIntegrator (shared_ptr<CoefficientFunction> acf, VorB avb);
|
|
618
619
|
|
|
619
620
|
virtual VorB VB() const override { return vb; }
|
|
620
621
|
virtual bool BoundaryForm() const override { return vb == BND; }
|
|
@@ -16,34 +16,22 @@ namespace ngcomp
|
|
|
16
16
|
class NGS_DLL_HEADER TangentialFacetFESpace : public FESpace
|
|
17
17
|
{
|
|
18
18
|
protected:
|
|
19
|
-
/// Level
|
|
20
|
-
// int level;
|
|
21
|
-
/// Number of Facets
|
|
22
|
-
// int nfacets;
|
|
23
|
-
///
|
|
24
|
-
// int ncfacets;
|
|
25
|
-
///
|
|
26
|
-
// int nel;
|
|
27
|
-
|
|
28
19
|
Array<int> first_facet_dof;
|
|
29
20
|
Array<int> first_inner_dof; // for highest_order_dc
|
|
30
|
-
// int ndof_lo;
|
|
31
21
|
|
|
32
22
|
int rel_order;
|
|
33
23
|
|
|
34
24
|
Array<IVec<2> > order_facet;
|
|
35
25
|
Array<bool> fine_facet;
|
|
36
26
|
|
|
37
|
-
// int ndof;
|
|
38
|
-
// Array<int> ndlevel;
|
|
39
27
|
bool var_order;
|
|
40
28
|
bool print;
|
|
41
29
|
|
|
42
30
|
bool highest_order_dc;
|
|
43
31
|
bool hide_highest_order_dc;
|
|
32
|
+
bool all_dofs_together;
|
|
44
33
|
|
|
45
34
|
public:
|
|
46
|
-
///
|
|
47
35
|
TangentialFacetFESpace (shared_ptr<MeshAccess> ama, const Flags & flags,
|
|
48
36
|
bool parseflags = false );
|
|
49
37
|
|
|
@@ -61,20 +49,11 @@ namespace ngcomp
|
|
|
61
49
|
virtual void SetOrder (NodeId ni, int order) override;
|
|
62
50
|
virtual int GetOrder (NodeId ni) const override;
|
|
63
51
|
|
|
64
|
-
|
|
65
|
-
// virtual size_t GetNDof() const throw() override { return ndof; }
|
|
66
|
-
// virtual size_t GetNDofLevel ( int i ) const override { return ndlevel[i]; }
|
|
67
|
-
|
|
68
52
|
virtual FlatArray<VorB> GetDualShapeNodes (VorB vb) const override;
|
|
69
53
|
|
|
70
|
-
// virtual int GetNDofLowOrder () const
|
|
71
|
-
// { return ndof_lo; }
|
|
72
54
|
|
|
73
55
|
virtual FiniteElement & GetFE(ElementId ei, Allocator & lh) const override;
|
|
74
56
|
|
|
75
|
-
// virtual const FiniteElement & GetFE ( int elnr, LocalHeap & lh ) const;
|
|
76
|
-
// virtual const FiniteElement & GetSFE ( int selnr, LocalHeap & lh ) const;
|
|
77
|
-
|
|
78
57
|
virtual void GetFacetDofNrs (int felnr, Array<DofId> & dnums) const;
|
|
79
58
|
|
|
80
59
|
virtual int GetNFacetDofs (int felnr) const;
|
|
@@ -90,6 +69,12 @@ namespace ngcomp
|
|
|
90
69
|
virtual IVec<2> GetFacetOrder(int fnr) const;
|
|
91
70
|
|
|
92
71
|
virtual int GetFirstFacetDof(int fanr) const;
|
|
72
|
+
const auto & GetFirstFacetDof() const { return first_facet_dof; }
|
|
73
|
+
|
|
74
|
+
IntRange GetFacetDofs (int nr) const
|
|
75
|
+
{
|
|
76
|
+
return IntRange (first_facet_dof[nr], first_facet_dof[nr+1]);
|
|
77
|
+
}
|
|
93
78
|
|
|
94
79
|
virtual bool UsesHighestOrderDiscontinuous() const {return highest_order_dc;};
|
|
95
80
|
|