ngsolve 6.2.2501.post47.dev1__cp313-cp313-macosx_10_15_universal2.whl → 6.2.2501.post48.dev1__cp313-cp313-macosx_10_15_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ngsolve might be problematic. Click here for more details.

Files changed (285) hide show
  1. netgen/include/arnoldi.hpp +55 -0
  2. netgen/include/bandmatrix.hpp +334 -0
  3. netgen/include/basematrix.hpp +957 -0
  4. netgen/include/basevector.hpp +1268 -0
  5. netgen/include/bdbequations.hpp +2752 -0
  6. netgen/include/bdbintegrator.hpp +1660 -0
  7. netgen/include/bessel.hpp +1064 -0
  8. netgen/include/bilinearform.hpp +963 -0
  9. netgen/include/bla.hpp +29 -0
  10. netgen/include/blockalloc.hpp +95 -0
  11. netgen/include/blockjacobi.hpp +316 -0
  12. netgen/include/bspline.hpp +114 -0
  13. netgen/include/calcinverse.hpp +141 -0
  14. netgen/include/cg.hpp +368 -0
  15. netgen/include/chebyshev.hpp +44 -0
  16. netgen/include/cholesky.hpp +720 -0
  17. netgen/include/clapack.h +7254 -0
  18. netgen/include/code_generation.hpp +296 -0
  19. netgen/include/coefficient.hpp +2033 -0
  20. netgen/include/coefficient_impl.hpp +19 -0
  21. netgen/include/coefficient_stdmath.hpp +167 -0
  22. netgen/include/commutingAMG.hpp +106 -0
  23. netgen/include/comp.hpp +79 -0
  24. netgen/include/compatibility.hpp +41 -0
  25. netgen/include/complex_wrapper.hpp +73 -0
  26. netgen/include/compressedfespace.hpp +110 -0
  27. netgen/include/contact.hpp +231 -0
  28. netgen/include/diagonalmatrix.hpp +154 -0
  29. netgen/include/differentialoperator.hpp +276 -0
  30. netgen/include/diffop.hpp +1286 -0
  31. netgen/include/diffop_impl.hpp +326 -0
  32. netgen/include/discontinuous.hpp +84 -0
  33. netgen/include/dump.hpp +949 -0
  34. netgen/include/eigen.hpp +60 -0
  35. netgen/include/eigensystem.hpp +18 -0
  36. netgen/include/elasticity_equations.hpp +595 -0
  37. netgen/include/elementbyelement.hpp +195 -0
  38. netgen/include/elementtopology.hpp +1760 -0
  39. netgen/include/elementtransformation.hpp +339 -0
  40. netgen/include/evalfunc.hpp +405 -0
  41. netgen/include/expr.hpp +1655 -0
  42. netgen/include/facetfe.hpp +175 -0
  43. netgen/include/facetfespace.hpp +178 -0
  44. netgen/include/facethofe.hpp +111 -0
  45. netgen/include/facetsurffespace.hpp +112 -0
  46. netgen/include/fe_interfaces.hpp +32 -0
  47. netgen/include/fem.hpp +87 -0
  48. netgen/include/fesconvert.hpp +14 -0
  49. netgen/include/fespace.hpp +1445 -0
  50. netgen/include/finiteelement.hpp +286 -0
  51. netgen/include/globalinterfacespace.hpp +77 -0
  52. netgen/include/globalspace.hpp +115 -0
  53. netgen/include/gridfunction.hpp +525 -0
  54. netgen/include/h1amg.hpp +41 -0
  55. netgen/include/h1hofe.hpp +188 -0
  56. netgen/include/h1hofe_impl.hpp +1262 -0
  57. netgen/include/h1hofefo.hpp +148 -0
  58. netgen/include/h1hofefo_impl.hpp +185 -0
  59. netgen/include/h1hofespace.hpp +167 -0
  60. netgen/include/h1lofe.hpp +1237 -0
  61. netgen/include/h1lumping.hpp +35 -0
  62. netgen/include/hcurl_equations.hpp +1352 -0
  63. netgen/include/hcurlcurlfe.hpp +2221 -0
  64. netgen/include/hcurlcurlfespace.hpp +78 -0
  65. netgen/include/hcurlfe.hpp +259 -0
  66. netgen/include/hcurlfe_utils.hpp +107 -0
  67. netgen/include/hcurlhdiv_dshape.hpp +857 -0
  68. netgen/include/hcurlhdivfes.hpp +308 -0
  69. netgen/include/hcurlhofe.hpp +175 -0
  70. netgen/include/hcurlhofe_impl.hpp +1871 -0
  71. netgen/include/hcurlhofespace.hpp +193 -0
  72. netgen/include/hcurllofe.hpp +1146 -0
  73. netgen/include/hdiv_equations.hpp +865 -0
  74. netgen/include/hdivdivfe.hpp +2923 -0
  75. netgen/include/hdivdivsurfacespace.hpp +76 -0
  76. netgen/include/hdivfe.hpp +206 -0
  77. netgen/include/hdivfe_utils.hpp +716 -0
  78. netgen/include/hdivfes.hpp +75 -0
  79. netgen/include/hdivhofe.hpp +447 -0
  80. netgen/include/hdivhofe_impl.hpp +1107 -0
  81. netgen/include/hdivhofefo.hpp +229 -0
  82. netgen/include/hdivhofespace.hpp +175 -0
  83. netgen/include/hdivhosurfacefespace.hpp +106 -0
  84. netgen/include/hdivlofe.hpp +773 -0
  85. netgen/include/hidden.hpp +74 -0
  86. netgen/include/householder.hpp +181 -0
  87. netgen/include/hypre_ams_precond.hpp +123 -0
  88. netgen/include/hypre_precond.hpp +73 -0
  89. netgen/include/integrator.hpp +2024 -0
  90. netgen/include/integratorcf.hpp +253 -0
  91. netgen/include/interpolate.hpp +49 -0
  92. netgen/include/intrule.hpp +2541 -0
  93. netgen/include/irspace.hpp +49 -0
  94. netgen/include/jacobi.hpp +136 -0
  95. netgen/include/l2hofe.hpp +193 -0
  96. netgen/include/l2hofe_impl.hpp +564 -0
  97. netgen/include/l2hofefo.hpp +542 -0
  98. netgen/include/l2hofespace.hpp +344 -0
  99. netgen/include/la.hpp +38 -0
  100. netgen/include/linearform.hpp +266 -0
  101. netgen/include/matrix.hpp +2140 -0
  102. netgen/include/memusage.hpp +41 -0
  103. netgen/include/meshaccess.hpp +1358 -0
  104. netgen/include/mgpre.hpp +204 -0
  105. netgen/include/mptools.hpp +2145 -0
  106. netgen/include/multigrid.hpp +42 -0
  107. netgen/include/multivector.hpp +447 -0
  108. netgen/include/mumpsinverse.hpp +187 -0
  109. netgen/include/mycomplex.hpp +361 -0
  110. netgen/include/ng_lapack.hpp +1661 -0
  111. netgen/include/ngblas.hpp +1099 -0
  112. netgen/include/ngs_defines.hpp +30 -0
  113. netgen/include/ngs_stdcpp_include.hpp +106 -0
  114. netgen/include/ngs_utils.hpp +121 -0
  115. netgen/include/ngsobject.hpp +1019 -0
  116. netgen/include/ngsstream.hpp +113 -0
  117. netgen/include/ngstd.hpp +72 -0
  118. netgen/include/nodalhofe.hpp +96 -0
  119. netgen/include/nodalhofe_impl.hpp +141 -0
  120. netgen/include/normalfacetfe.hpp +223 -0
  121. netgen/include/normalfacetfespace.hpp +98 -0
  122. netgen/include/normalfacetsurfacefespace.hpp +84 -0
  123. netgen/include/order.hpp +251 -0
  124. netgen/include/parallel_matrices.hpp +222 -0
  125. netgen/include/paralleldofs.hpp +340 -0
  126. netgen/include/parallelngs.hpp +23 -0
  127. netgen/include/parallelvector.hpp +269 -0
  128. netgen/include/pardisoinverse.hpp +200 -0
  129. netgen/include/periodic.hpp +125 -0
  130. netgen/include/plateaufespace.hpp +25 -0
  131. netgen/include/pml.hpp +275 -0
  132. netgen/include/pmltrafo.hpp +631 -0
  133. netgen/include/postproc.hpp +142 -0
  134. netgen/include/precomp.hpp +60 -0
  135. netgen/include/preconditioner.hpp +602 -0
  136. netgen/include/prolongation.hpp +235 -0
  137. netgen/include/python_comp.hpp +107 -0
  138. netgen/include/python_fem.hpp +89 -0
  139. netgen/include/python_linalg.hpp +58 -0
  140. netgen/include/python_ngstd.hpp +385 -0
  141. netgen/include/recursive_pol.hpp +4844 -0
  142. netgen/include/recursive_pol_tet.hpp +395 -0
  143. netgen/include/recursive_pol_trig.hpp +492 -0
  144. netgen/include/reorderedfespace.hpp +81 -0
  145. netgen/include/sample_sort.hpp +105 -0
  146. netgen/include/scalarfe.hpp +335 -0
  147. netgen/include/shapefunction_utils.hpp +113 -0
  148. netgen/include/simd_complex.hpp +284 -0
  149. netgen/include/smoother.hpp +253 -0
  150. netgen/include/solve.hpp +89 -0
  151. netgen/include/sparsecholesky.hpp +313 -0
  152. netgen/include/sparsematrix.hpp +1038 -0
  153. netgen/include/sparsematrix_dyn.hpp +91 -0
  154. netgen/include/sparsematrix_impl.hpp +920 -0
  155. netgen/include/special_matrix.hpp +461 -0
  156. netgen/include/specialelement.hpp +125 -0
  157. netgen/include/statushandler.hpp +33 -0
  158. netgen/include/stringops.hpp +12 -0
  159. netgen/include/superluinverse.hpp +136 -0
  160. netgen/include/symbolicintegrator.hpp +849 -0
  161. netgen/include/symmetricmatrix.hpp +144 -0
  162. netgen/include/tangentialfacetfe.hpp +224 -0
  163. netgen/include/tangentialfacetfespace.hpp +106 -0
  164. netgen/include/tensor.hpp +522 -0
  165. netgen/include/tensorcoefficient.hpp +446 -0
  166. netgen/include/tensorproductintegrator.hpp +113 -0
  167. netgen/include/thcurlfe.hpp +128 -0
  168. netgen/include/thcurlfe_impl.hpp +380 -0
  169. netgen/include/thdivfe.hpp +80 -0
  170. netgen/include/thdivfe_impl.hpp +426 -0
  171. netgen/include/tpdiffop.hpp +461 -0
  172. netgen/include/tpfes.hpp +133 -0
  173. netgen/include/tpintrule.hpp +224 -0
  174. netgen/include/triangular.hpp +465 -0
  175. netgen/include/tscalarfe.hpp +245 -0
  176. netgen/include/tscalarfe_impl.hpp +1029 -0
  177. netgen/include/umfpackinverse.hpp +148 -0
  178. netgen/include/vector.hpp +1219 -0
  179. netgen/include/voxelcoefficientfunction.hpp +41 -0
  180. netgen/include/vtkoutput.hpp +198 -0
  181. netgen/include/vvector.hpp +208 -0
  182. netgen/include/webgui.hpp +92 -0
  183. netgen/libngbla.dylib +0 -0
  184. netgen/libngcomp.dylib +0 -0
  185. netgen/libngfem.dylib +0 -0
  186. netgen/libngla.dylib +0 -0
  187. netgen/libngsolve.dylib +0 -0
  188. netgen/libngstd.dylib +0 -0
  189. ngsolve/__init__.pyi +231 -0
  190. ngsolve/bla.pyi +1139 -0
  191. ngsolve/bvp.pyi +32 -0
  192. ngsolve/cmake/NGSolveConfig.cmake +102 -0
  193. ngsolve/cmake/ngsolve-targets-release.cmake +69 -0
  194. ngsolve/cmake/ngsolve-targets.cmake +163 -0
  195. ngsolve/comp/__init__.pyi +5382 -0
  196. ngsolve/comp/pml.pyi +89 -0
  197. ngsolve/config/__init__.py +1 -0
  198. ngsolve/config/__init__.pyi +43 -0
  199. ngsolve/config/__main__.py +4 -0
  200. ngsolve/config/config.py +60 -0
  201. ngsolve/config/config.pyi +45 -0
  202. ngsolve/demos/TensorProduct/__init__.py +0 -0
  203. ngsolve/demos/TensorProduct/tp_dg_1d_1d.py +80 -0
  204. ngsolve/demos/TensorProduct/tp_dg_1d_2d.py +73 -0
  205. ngsolve/demos/TensorProduct/tp_dg_2d_1d.py +72 -0
  206. ngsolve/demos/TensorProduct/tp_dg_2d_2d.py +66 -0
  207. ngsolve/demos/__init__.py +0 -0
  208. ngsolve/demos/howto/__init__.py +0 -0
  209. ngsolve/demos/howto/hhj.py +44 -0
  210. ngsolve/demos/howto/hybrid_dg.py +53 -0
  211. ngsolve/demos/howto/mixed.py +30 -0
  212. ngsolve/demos/howto/nonlin.py +29 -0
  213. ngsolve/demos/howto/pickling.py +26 -0
  214. ngsolve/demos/howto/pml.py +31 -0
  215. ngsolve/demos/howto/taskmanager.py +20 -0
  216. ngsolve/demos/howto/tdnns.py +47 -0
  217. ngsolve/demos/howto/timeDG-skeleton.py +45 -0
  218. ngsolve/demos/howto/timeDG.py +38 -0
  219. ngsolve/demos/howto/timeDGlap.py +42 -0
  220. ngsolve/demos/howto/timeDGwave.py +61 -0
  221. ngsolve/demos/intro/__init__.py +0 -0
  222. ngsolve/demos/intro/adaptive.py +123 -0
  223. ngsolve/demos/intro/cmagnet.py +62 -0
  224. ngsolve/demos/intro/elasticity.py +76 -0
  225. ngsolve/demos/intro/navierstokes.py +74 -0
  226. ngsolve/demos/intro/poisson.ipynb +170 -0
  227. ngsolve/demos/intro/poisson.py +41 -0
  228. ngsolve/demos/mpi/__init__.py +0 -0
  229. ngsolve/demos/mpi/mpi_cmagnet.py +87 -0
  230. ngsolve/demos/mpi/mpi_navierstokes.py +117 -0
  231. ngsolve/demos/mpi/mpi_poisson.py +89 -0
  232. ngsolve/demos/mpi/mpi_timeDG.py +82 -0
  233. ngsolve/directsolvers.pyi +18 -0
  234. ngsolve/eigenvalues.pyi +30 -0
  235. ngsolve/fem.pyi +1707 -0
  236. ngsolve/krylovspace.pyi +309 -0
  237. ngsolve/la.pyi +1218 -0
  238. ngsolve/ngslib.so +0 -0
  239. ngsolve/ngstd.pyi +58 -0
  240. ngsolve/nonlinearsolvers.pyi +98 -0
  241. ngsolve/preconditioners.pyi +6 -0
  242. ngsolve/solve.pyi +108 -0
  243. ngsolve/solvers.pyi +14 -0
  244. ngsolve/timestepping.pyi +34 -0
  245. ngsolve/timing.pyi +57 -0
  246. ngsolve/utils.pyi +279 -0
  247. ngsolve-6.2.2501.post48.dev1.data/data/Netgen.icns +0 -0
  248. ngsolve-6.2.2501.post48.dev1.data/data/bin/ngscxx +17 -0
  249. ngsolve-6.2.2501.post48.dev1.data/data/bin/ngsld +13 -0
  250. ngsolve-6.2.2501.post48.dev1.data/data/bin/ngsolve.tcl +648 -0
  251. ngsolve-6.2.2501.post48.dev1.data/data/bin/ngspy +2 -0
  252. ngsolve-6.2.2501.post48.dev1.data/data/share/ngsolve/beam.geo +17 -0
  253. ngsolve-6.2.2501.post48.dev1.data/data/share/ngsolve/beam.vol +240 -0
  254. ngsolve-6.2.2501.post48.dev1.data/data/share/ngsolve/chip.in2d +41 -0
  255. ngsolve-6.2.2501.post48.dev1.data/data/share/ngsolve/chip.vol +614 -0
  256. ngsolve-6.2.2501.post48.dev1.data/data/share/ngsolve/coil.geo +12 -0
  257. ngsolve-6.2.2501.post48.dev1.data/data/share/ngsolve/coil.vol +2560 -0
  258. ngsolve-6.2.2501.post48.dev1.data/data/share/ngsolve/coilshield.geo +24 -0
  259. ngsolve-6.2.2501.post48.dev1.data/data/share/ngsolve/coilshield.vol +3179 -0
  260. ngsolve-6.2.2501.post48.dev1.data/data/share/ngsolve/cube.geo +19 -0
  261. ngsolve-6.2.2501.post48.dev1.data/data/share/ngsolve/cube.vol +1832 -0
  262. ngsolve-6.2.2501.post48.dev1.data/data/share/ngsolve/d10_DGdoubleglazing.pde +50 -0
  263. ngsolve-6.2.2501.post48.dev1.data/data/share/ngsolve/d11_chip_nitsche.pde +40 -0
  264. ngsolve-6.2.2501.post48.dev1.data/data/share/ngsolve/d1_square.pde +43 -0
  265. ngsolve-6.2.2501.post48.dev1.data/data/share/ngsolve/d2_chip.pde +35 -0
  266. ngsolve-6.2.2501.post48.dev1.data/data/share/ngsolve/d3_helmholtz.pde +22 -0
  267. ngsolve-6.2.2501.post48.dev1.data/data/share/ngsolve/d4_cube.pde +46 -0
  268. ngsolve-6.2.2501.post48.dev1.data/data/share/ngsolve/d5_beam.pde +74 -0
  269. ngsolve-6.2.2501.post48.dev1.data/data/share/ngsolve/d6_shaft.pde +73 -0
  270. ngsolve-6.2.2501.post48.dev1.data/data/share/ngsolve/d7_coil.pde +50 -0
  271. ngsolve-6.2.2501.post48.dev1.data/data/share/ngsolve/d8_coilshield.pde +49 -0
  272. ngsolve-6.2.2501.post48.dev1.data/data/share/ngsolve/d9_hybridDG.pde +72 -0
  273. ngsolve-6.2.2501.post48.dev1.data/data/share/ngsolve/doubleglazing.in2d +27 -0
  274. ngsolve-6.2.2501.post48.dev1.data/data/share/ngsolve/doubleglazing.vol +737 -0
  275. ngsolve-6.2.2501.post48.dev1.data/data/share/ngsolve/piezo2d40round4.vol.gz +0 -0
  276. ngsolve-6.2.2501.post48.dev1.data/data/share/ngsolve/shaft.geo +73 -0
  277. ngsolve-6.2.2501.post48.dev1.data/data/share/ngsolve/shaft.vol +4291 -0
  278. ngsolve-6.2.2501.post48.dev1.data/data/share/ngsolve/square.in2d +17 -0
  279. ngsolve-6.2.2501.post48.dev1.data/data/share/ngsolve/square.vol +149 -0
  280. {ngsolve-6.2.2501.post47.dev1.dist-info → ngsolve-6.2.2501.post48.dev1.dist-info}/METADATA +1 -1
  281. ngsolve-6.2.2501.post48.dev1.dist-info/RECORD +304 -0
  282. ngsolve-6.2.2501.post47.dev1.dist-info/RECORD +0 -25
  283. {ngsolve-6.2.2501.post47.dev1.dist-info → ngsolve-6.2.2501.post48.dev1.dist-info}/LICENSE +0 -0
  284. {ngsolve-6.2.2501.post47.dev1.dist-info → ngsolve-6.2.2501.post48.dev1.dist-info}/WHEEL +0 -0
  285. {ngsolve-6.2.2501.post47.dev1.dist-info → ngsolve-6.2.2501.post48.dev1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,920 @@
1
+ #ifndef FILE_NGS_SPARSEMATRIX_IMPL
2
+ #define FILE_NGS_SPARSEMATRIX_IMPL
3
+
4
+ /**************************************************************************/
5
+ /* File: sparsematrix_impl.hpp */
6
+ /* Author: Joachim Schoeberl */
7
+ /* Date: 01. Oct. 94, 15 Jan. 02 */
8
+ /* redesign: Lukas Kogler, July 2019 */
9
+ /**************************************************************************/
10
+
11
+
12
+ #include "sparsematrix.hpp"
13
+ namespace ngla
14
+ {
15
+
16
+ template <class TM>
17
+ SparseMatrixTM<TM> :: ~SparseMatrixTM ()
18
+ { ; }
19
+
20
+
21
+ template <class TM>
22
+ void SparseMatrixTM<TM> ::
23
+ PrefetchRow (size_t rownr) const
24
+ {
25
+ #ifdef NETGEN_ARCH_AMD64
26
+ #ifdef __GNUC__
27
+ size_t fi = firsti[rownr], fin = firsti[rownr+1];
28
+ // int * pi = &colnr[fi], * pin = &colnr[fin];
29
+ int *pi = colnr.Data()+fi, *pin = colnr.Data()+fin;
30
+ while (pi < pin)
31
+ {
32
+ _mm_prefetch (reinterpret_cast<void*>(pi), _MM_HINT_T2);
33
+ pi += 64/sizeof(int);
34
+ }
35
+
36
+ TM * vi = &data[fi], * vin = (&data[fin-1])+1;
37
+ while (vi < vin)
38
+ {
39
+ _mm_prefetch (reinterpret_cast<void*>(vi), _MM_HINT_T2);
40
+ vi += 64/sizeof(double);
41
+ }
42
+ #endif
43
+ #endif // NETGEN_ARCH_AMD64
44
+ ;
45
+ }
46
+
47
+
48
+ template <class TM>
49
+ shared_ptr<SparseMatrixTM<TM>> SparseMatrixTM<TM> ::
50
+ CreateFromCOO (FlatArray<int> indi, FlatArray<int> indj,
51
+ FlatArray<TM> val, size_t h, size_t w)
52
+ {
53
+ static Timer t("SparseMatrix::CreateFromCOO"); RegionTimer r(t);
54
+ Array<int> cnt(h);
55
+
56
+ /*
57
+ cnt = 0;
58
+ for (auto i : indi) cnt[i]++;
59
+ */
60
+
61
+ DynamicTable<int> tab(h);
62
+ for (size_t i = 0; i < indi.Size(); i++)
63
+ tab.AddUnique(indi[i], indj[i]);
64
+ for (size_t i = 0; i < h; i++)
65
+ cnt[i] = tab.EntrySize(i);
66
+
67
+ auto matrix = make_shared<SparseMatrix<TM>> (cnt, w);
68
+ for (auto k : ngstd::Range(indi))
69
+ matrix->CreatePosition(indi[k], indj[k]);
70
+ matrix->SetZero();
71
+
72
+ for (auto k : ngstd::Range(indi))
73
+ (*matrix)(indi[k], indj[k]) += val[k];
74
+
75
+ return matrix;
76
+ }
77
+
78
+
79
+
80
+
81
+
82
+ template <class TM>
83
+ void SparseMatrixTM<TM> ::
84
+ AddElementMatrix(FlatArray<int> dnums1, FlatArray<int> dnums2,
85
+ BareSliceMatrix<TSCAL> elmat1, bool use_atomic)
86
+ {
87
+ static Timer timer_addelmat_nonsym("SparseMatrix::AddElementMatrix", NoTracing);
88
+ RegionTimer reg (timer_addelmat_nonsym);
89
+ NgProfiler::AddThreadFlops (timer_addelmat_nonsym, TaskManager::GetThreadId(), dnums1.Size()*dnums2.Size());
90
+
91
+ ArrayMem<int, 50> map(dnums2.Size());
92
+ for (int i = 0; i < map.Size(); i++) map[i] = i;
93
+ QuickSortI (dnums2, map);
94
+ Scalar2ElemMatrix<TM, TSCAL> elmat (elmat1);
95
+ // .AddSize(mat_traits<TM>::HEIGHT*dnums1.Size(),
96
+ // mat_traits<TM>::WIDTH*dnums2.Size()));
97
+
98
+ for (int i = 0; i < dnums1.Size(); i++)
99
+ if (IsRegularIndex(dnums1[i]))
100
+ {
101
+ FlatArray rowind = this->GetRowIndices(dnums1[i]);
102
+ FlatVector<TM> rowvals = this->GetRowValues(dnums1[i]);
103
+
104
+ int k = 0;
105
+ for (int j1 = 0; j1 < dnums2.Size(); j1++)
106
+ {
107
+ int j = map[j1];
108
+ if (IsRegularIndex(dnums2[j]))
109
+ {
110
+ while (rowind[k] != dnums2[j])
111
+ {
112
+ k++;
113
+ if (k >= rowind.Size())
114
+ throw Exception ("SparseMatrixTM::AddElementMatrix: illegal dnums");
115
+ }
116
+ if (use_atomic)
117
+ AtomicAdd (rowvals(k), elmat(i,j));
118
+ else
119
+ rowvals(k) += elmat(i,j);
120
+ }
121
+ }
122
+ }
123
+ }
124
+
125
+
126
+ template <class TM>
127
+ void SparseMatrixTM<TM> :: SetZero ()
128
+ {
129
+ static Timer t("SparseMatrix::SetZero (taskhandler)");
130
+ t.AddFlops (this->NZE());
131
+ RegionTimer reg(t);
132
+
133
+ /*
134
+ ParallelFor (balance, [&](int row)
135
+ {
136
+ data.Range(firsti[row], firsti[row+1]) = TM(0.0);
137
+ });
138
+ */
139
+ ParallelForRange (balance, [&](IntRange r)
140
+ {
141
+ data.Range(firsti[r.First()], firsti[r.Next()]) = TM(0.0);
142
+ });
143
+
144
+ }
145
+
146
+
147
+
148
+ template <class TM, class TV_ROW, class TV_COL>
149
+ SparseMatrix<TM,TV_ROW,TV_COL> :: SparseMatrix (const MatrixGraph & agraph)
150
+ : SparseMatrixTM<TM> (agraph)
151
+ { ; }
152
+
153
+ template <class TM, class TV_ROW, class TV_COL>
154
+ SparseMatrix<TM,TV_ROW,TV_COL> :: SparseMatrix (MatrixGraph && agraph)
155
+ : SparseMatrixTM<TM> (std::move(agraph))
156
+ { ; }
157
+
158
+
159
+
160
+ template <class TM, class TV_ROW, class TV_COL>
161
+ shared_ptr<BaseJacobiPrecond> SparseMatrix<TM,TV_ROW,TV_COL> ::
162
+ CreateJacobiPrecond (shared_ptr<BitArray> inner) const
163
+ {
164
+ // if constexpr(mat_traits<TM>::HEIGHT != mat_traits<TM>::WIDTH) return nullptr;
165
+ if constexpr(ngbla::Height<TM>() != ngbla::Width<TM>()) return nullptr;
166
+ else if constexpr(ngbla::Height<TM>() > MAX_SYS_DIM) {
167
+ throw Exception(string("MAX_SYS_DIM = ")+to_string(MAX_SYS_DIM)+string(", need ")+to_string(mat_traits<TM>::HEIGHT));
168
+ return nullptr;
169
+ }
170
+ else return make_shared<JacobiPrecond<TM,TV_ROW,TV_COL>> (*this, inner);
171
+ }
172
+
173
+ template <class TM, class TV_ROW, class TV_COL>
174
+ shared_ptr<BaseBlockJacobiPrecond> SparseMatrix<TM,TV_ROW,TV_COL> ::
175
+ CreateBlockJacobiPrecond (shared_ptr<Table<int>> blocks,
176
+ const BaseVector * constraint,
177
+ bool parallel,
178
+ shared_ptr<BitArray> freedofs) const
179
+ {
180
+ // if constexpr(mat_traits<TM>::HEIGHT != mat_traits<TM>::WIDTH) return nullptr;
181
+ if constexpr(ngbla::Height<TM>() != ngbla::Width<TM>()) return nullptr;
182
+ else if constexpr(ngbla::Height<TM>() > MAX_SYS_DIM) {
183
+ throw Exception(string("MAX_SYS_DIM = ")+to_string(MAX_SYS_DIM)+string(", need ")+to_string(ngbla::Height<TM>()));
184
+ return nullptr;
185
+ }
186
+ else
187
+ // return make_shared<BlockJacobiPrecond<TM,TV_ROW,TV_COL>> (*this, blocks, parallel);
188
+
189
+ return make_shared<BlockJacobiPrecond<TM,TV_ROW,TV_COL>>
190
+ ( dynamic_pointer_cast<const SparseMatrix>
191
+ (this->shared_from_this()),
192
+ blocks, parallel);
193
+ }
194
+
195
+
196
+
197
+ template <class TM, class TV_ROW, class TV_COL>
198
+ void SparseMatrix<TM,TV_ROW,TV_COL> ::
199
+ MultAdd (double s, const BaseVector & x, BaseVector & y) const
200
+ {
201
+ static Timer t("SparseMatrix::MultAdd"); RegionTimer reg(t);
202
+ t.AddFlops (this->NZE()*sizeof(TV_ROW)*sizeof(TV_COL)/sqr(sizeof(double)));
203
+
204
+ ParallelForRange
205
+ (balance, [&] (IntRange myrange)
206
+ {
207
+ FlatVector<TVX> fx = x.FV<TVX>();
208
+ FlatVector<TVY> fy = y.FV<TVY>();
209
+
210
+ for (auto i : myrange)
211
+ fy(i) += s * RowTimesVector (i, fx);
212
+ });
213
+
214
+ #ifdef OLD
215
+ if (task_manager)
216
+ {
217
+ FlatVector<TVX> fx = x.FV<TVX>();
218
+ FlatVector<TVY> fy = y.FV<TVY>();
219
+
220
+ // int ntasks = task_manager->GetNumThreads();
221
+
222
+ task_manager -> CreateJob
223
+ ([&] (TaskInfo & ti)
224
+ {
225
+ int tasks_per_part = ti.ntasks / balance.Size();
226
+ int mypart = ti.task_nr / tasks_per_part;
227
+ int num_in_part = ti.task_nr % tasks_per_part;
228
+
229
+ auto myrange = balance[mypart].Split (num_in_part, tasks_per_part);
230
+
231
+ for (auto row : myrange)
232
+ fy(row) += s * RowTimesVector (row, fx);
233
+
234
+ });
235
+ return;
236
+ }
237
+
238
+
239
+ FlatVector<TVX> fx = x.FV<TVX>();
240
+ FlatVector<TVY> fy = y.FV<TVY>();
241
+
242
+ int h = this->Height();
243
+ for (int i = 0; i < h; i++)
244
+ fy(i) += s * RowTimesVector (i, fx);
245
+ #endif
246
+
247
+
248
+ }
249
+
250
+ template <class TM, class TV_ROW, class TV_COL>
251
+ void SparseMatrix<TM,TV_ROW,TV_COL> ::
252
+ MultAdd1 (double s, const BaseVector & x, BaseVector & y,
253
+ const BitArray * ainner,
254
+ const Array<int> * acluster) const
255
+ {
256
+ if (!ainner || acluster)
257
+ {
258
+ MultAdd (s, x, y);
259
+ return;
260
+ }
261
+
262
+ FlatVector<TVX> fx = x.FV<TVX>();
263
+ FlatVector<TVY> fy = y.FV<TVY>();
264
+
265
+ SharedLoop2 sl(ainner->Size());
266
+ ParallelJob
267
+ ( [&] (const TaskInfo & ti)
268
+ {
269
+ for (size_t row : sl)
270
+ if ( (*ainner).Test(row))
271
+ fy(row) += s * RowTimesVector (row, fx);
272
+ });
273
+ }
274
+
275
+
276
+
277
+ template <class TM, class TV_ROW, class TV_COL>
278
+ void SparseMatrix<TM,TV_ROW,TV_COL> ::
279
+ MultTransAdd (double s, const BaseVector & x, BaseVector & y) const
280
+ {
281
+ static Timer timer ("SparseMatrix::MultTransAdd");
282
+ RegionTimer reg (timer);
283
+
284
+ FlatVector<TVY> fx = x.FV<TVY>();
285
+ FlatVector<TVX> fy = y.FV<TVX>();
286
+
287
+ for (int i = 0; i < this->Height(); i++)
288
+ AddRowTransToVector (i, s*fx(i), fy);
289
+
290
+ timer.AddFlops (this->NZE());
291
+ }
292
+
293
+
294
+ template <class TM, class TV_ROW, class TV_COL>
295
+ void SparseMatrix<TM,TV_ROW,TV_COL> ::
296
+ MultAdd (Complex s, const BaseVector & x, BaseVector & y) const
297
+ {
298
+ static Timer timer("SparseMatrix::MultAdd Complex");
299
+ RegionTimer reg (timer);
300
+
301
+ FlatVector<TVX> fx = x.FV<TVX> (); // (x.Size(), x.Memory());
302
+ FlatVector<TVY> fy = y.FV<TVY> (); // (y.Size(), y.Memory());
303
+
304
+ if constexpr (std::is_constructible<TSCAL,Complex>())
305
+ {
306
+ int h = this->Height();
307
+ for (int i = 0; i < h; i++)
308
+ fy(i) += TSCAL(s) * RowTimesVector (i, fx);
309
+ }
310
+ else
311
+ throw Exception("MultAdd(complex) called for real matrix");
312
+ }
313
+
314
+
315
+ template <class TM, class TV_ROW, class TV_COL>
316
+ void SparseMatrix<TM,TV_ROW,TV_COL> ::
317
+ MultTransAdd (Complex s, const BaseVector & x, BaseVector & y) const
318
+ {
319
+ static Timer timer("SparseMatrix::MultTransAdd Complex");
320
+ RegionTimer reg (timer);
321
+
322
+ FlatVector<TVY> fx = x.FV<TVY>(); // (x.Size(), x.Memory());
323
+ FlatVector<TVX> fy = y.FV<TVX>(); // (y.Size(), y.Memory());
324
+
325
+ if constexpr (std::is_constructible<TSCAL,Complex>())
326
+ for (int i = 0; i < this->Height(); i++)
327
+ AddRowTransToVector (i, TSCAL(s)*fx(i), fy);
328
+ else
329
+ throw Exception("MultTransAdd(complex) called for real matrix");
330
+ }
331
+
332
+ template <class TM, class TV_ROW, class TV_COL>
333
+ void SparseMatrix<TM,TV_ROW,TV_COL> ::
334
+ MultConjTransAdd (Complex s, const BaseVector & x, BaseVector & y) const
335
+ {
336
+ static Timer timer("SparseMatrix::MultTransAdd Complex");
337
+ RegionTimer reg (timer);
338
+
339
+ FlatVector<TVY> fx = x.FV<TVY>(); // (x.Size(), x.Memory());
340
+ FlatVector<TVX> fy = y.FV<TVX>(); // (y.Size(), y.Memory());
341
+
342
+ if constexpr (std::is_constructible<TSCAL,Complex>())
343
+ for (int i = 0; i < this->Height(); i++)
344
+ AddRowConjTransToVector (i, TSCAL(s)*fx(i), fy);
345
+ else
346
+ throw Exception("MultConjTransAdd(complex) called for real matrix");
347
+ }
348
+
349
+ template <class TM, class TV_ROW, class TV_COL>
350
+ void SparseMatrix<TM,TV_ROW,TV_COL> ::
351
+ MultAdd (FlatVector<double> alpha, const MultiVector & x, MultiVector & y) const
352
+ {
353
+ BaseMatrix::MultAdd (alpha, x, y);
354
+ }
355
+
356
+
357
+
358
+ template <class TM, class TV_ROW, class TV_COL>
359
+ void SparseMatrix<TM,TV_ROW,TV_COL> :: DoArchive (Archive & ar)
360
+ {
361
+ ar & this->size;
362
+ ar & this->width;
363
+ ar & this->nze;
364
+ ar & firsti;
365
+ ar & colnr;
366
+ ar & data;
367
+ cout << "sparsemat, doarch, sizeof (firstint) = " << firsti.Size() << endl;
368
+ }
369
+
370
+
371
+
372
+
373
+
374
+ template <class TM, class TV_ROW, class TV_COL>
375
+ shared_ptr<BaseMatrix> SparseMatrix<TM,TV_ROW,TV_COL> ::
376
+ InverseMatrix (shared_ptr<BitArray> subset) const
377
+ {
378
+ return CreateSparseMatrixInverse(dynamic_pointer_cast<const BaseSparseMatrix>(this->shared_from_this()), subset, nullptr);
379
+ }
380
+
381
+ // template <class TM>
382
+ // BaseMatrix * SparseMatrix<TM> ::
383
+
384
+ template <class TM, class TV_ROW, class TV_COL>
385
+ shared_ptr<BaseMatrix> SparseMatrix<TM,TV_ROW,TV_COL> ::
386
+ InverseMatrix (shared_ptr<const Array<int>> clusters) const
387
+ {
388
+ return CreateSparseMatrixInverse(dynamic_pointer_cast<const BaseSparseMatrix>(this->shared_from_this()), nullptr, clusters);
389
+ }
390
+
391
+ template <class TM, class TV_ROW, class TV_COL>
392
+ shared_ptr<BaseMatrix> SparseMatrix<TM,TV_ROW,TV_COL> ::
393
+ DeleteZeroElements(double tol) const
394
+ {
395
+ static Timer t("SparseMatrix::DeleteZeroElements"); RegionTimer reg(t);
396
+ Array<int> indi, indj;
397
+ Array<TM> val;
398
+ for (auto i : Range(this->Height()))
399
+ {
400
+ for (auto j : Range(firsti[i], firsti[i+1]))
401
+ {
402
+ if (ngbla::L2Norm2(data[j]) > tol*tol)
403
+ {
404
+ indi.Append (i);
405
+ indj.Append (colnr[j]);
406
+ val.Append (data[j]);
407
+ }
408
+ }
409
+ }
410
+ return this->CreateFromCOO(indi, indj, val, this->Height(), this->Width());
411
+ }
412
+
413
+ template <class TM>
414
+ ostream & SparseMatrixTM<TM> ::
415
+ Print (ostream & ost) const
416
+ {
417
+ for (int i = 0; i < size; i++)
418
+ {
419
+ ost << "Row " << i << ":";
420
+
421
+ for (size_t j = firsti[i]; j < firsti[i+1]; j++)
422
+ ost << " " << colnr[j] << ": " << data[j];
423
+ ost << "\n";
424
+ }
425
+ return ost;
426
+ }
427
+
428
+
429
+ template <class TM>
430
+ Array<MemoryUsage> SparseMatrixTM<TM> ::
431
+ GetMemoryUsage () const
432
+ {
433
+ Array<MemoryUsage> mu;
434
+ mu += { "SparseMatrix", nze*sizeof(TM), 1 };
435
+ if (owner) mu += MatrixGraph::GetMemoryUsage ();
436
+ return mu;
437
+ }
438
+
439
+
440
+ template <class TM> AutoVector SparseMatrixTM<TM> :: CreateVector () const
441
+ { throw Exception("SparseMatrixTM::CreateVector"); }
442
+
443
+ template <class TM> AutoVector SparseMatrixTM<TM> :: CreateRowVector () const
444
+ { throw Exception("SparseMatrixTM::CreateRowVector"); }
445
+
446
+ template <class TM> AutoVector SparseMatrixTM<TM> :: CreateColVector () const
447
+ { throw Exception("SparseMatrixTM::CreateColVector"); }
448
+
449
+
450
+ template <class TM, class TV_ROW, class TV_COL>
451
+ shared_ptr<BaseMatrix> SparseMatrix<TM,TV_ROW,TV_COL> ::
452
+ CreateMatrix () const
453
+ {
454
+ return make_shared<SparseMatrix> (*this);
455
+ }
456
+
457
+ template <class TM, class TV_ROW, class TV_COL>
458
+ AutoVector SparseMatrix<TM,TV_ROW,TV_COL> ::
459
+ CreateVector () const
460
+ {
461
+ if (this->size==this->width)
462
+ return make_unique<VVector<TVY>> (this->size);
463
+ throw Exception ("SparseMatrix::CreateVector for rectangular does not make sense, use either CreateColVector or CreateRowVector");
464
+ }
465
+
466
+ template <class TM, class TV_ROW, class TV_COL>
467
+ AutoVector SparseMatrix<TM,TV_ROW,TV_COL> ::
468
+ CreateRowVector () const
469
+ {
470
+ return make_unique<VVector<TVX>> (this->width);
471
+ }
472
+
473
+ template <class TM, class TV_ROW, class TV_COL>
474
+ AutoVector SparseMatrix<TM,TV_ROW,TV_COL> ::
475
+ CreateColVector () const
476
+ {
477
+ return make_unique<VVector<TVY>> (this->size);
478
+ }
479
+
480
+
481
+ template<class TM, class TV_ROW, class TV_COL>
482
+ shared_ptr<BaseSparseMatrix>
483
+ SparseMatrix<TM,TV_ROW,TV_COL> :: Restrict (const SparseMatrixTM<double> & prol,
484
+ shared_ptr<BaseSparseMatrix> acmat ) const
485
+ {
486
+ static Timer t ("sparsematrix - restrict");
487
+ static Timer tbuild ("sparsematrix - restrict, build matrix");
488
+ static Timer tcomp ("sparsematrix - restrict, compute matrix");
489
+ RegionTimer reg(t);
490
+
491
+ int n = this->Height();
492
+
493
+ auto cmat = dynamic_pointer_cast<SparseMatrixTM<TM>> (acmat);
494
+
495
+ // if no coarse matrix, build up matrix-graph!
496
+ if ( !cmat )
497
+ {
498
+ RegionTimer reg(tbuild);
499
+
500
+ Array<int> marks(n);
501
+ Array<IVec<2> > e2v;
502
+ for (int i = 0; i < n; i++)
503
+ for (int j = 0; j < this->GetRowIndices(i).Size(); j++)
504
+ {
505
+ int col = this->GetRowIndices(i)[j];
506
+ FlatArray<ColIdx> prol_rowind = prol.GetRowIndices(i);
507
+ FlatArray<ColIdx> prol_colind = prol.GetRowIndices(col);
508
+
509
+ for (int k = 0; k < prol_rowind.Size(); k++)
510
+ for (int l = 0; l < prol_colind.Size(); l++)
511
+ {
512
+ int kk = prol_rowind[k];
513
+ int ll = prol_colind[l];
514
+
515
+ // if (kk >= ll) swap (kk,ll);
516
+ e2v.Append (IVec<2> (kk,ll));
517
+ }
518
+ }
519
+
520
+ int nc = 0;
521
+ for (int i = 0; i < e2v.Size(); i++)
522
+ nc = max2 (nc, e2v[i][1]);
523
+ nc++;
524
+
525
+ // *testout << "e2v = " << endl << e2v << endl;
526
+
527
+ // count all entries in row with multiplicity
528
+ Array<int> cnt(nc);
529
+ cnt = 0;
530
+ for (int i = 0; i < e2v.Size(); i++)
531
+ cnt[e2v[i][1]]++;
532
+
533
+ Table<int> v2e(cnt);
534
+ cnt = 0;
535
+ for (int i = 0; i < e2v.Size(); i++)
536
+ {
537
+ int v1 = e2v[i][1];
538
+ v2e[v1][cnt[v1]++] = i;
539
+ }
540
+
541
+ cnt = 0;
542
+ marks = -1;
543
+
544
+ // count all entries in row withOUT multiplicity
545
+ for (int i = 0; i < nc; i++)
546
+ for (int j = 0; j < v2e[i].Size(); j++)
547
+ {
548
+ int jj = v2e[i][j];
549
+ int v0 = e2v[jj][0];
550
+ if (marks[v0] != i)
551
+ {
552
+ cnt[i]++;
553
+ marks[v0] = i;
554
+ }
555
+ }
556
+
557
+ cmat = make_shared<SparseMatrix<TM,TV_ROW,TV_COL>> (cnt);
558
+
559
+ marks = -1;
560
+ for (int i = 0; i < nc; i++)
561
+ for (int j = 0; j < v2e[i].Size(); j++)
562
+ {
563
+ int jj = v2e[i][j];
564
+ int v0 = e2v[jj][0];
565
+ if (marks[v0] != i)
566
+ {
567
+ marks[v0] = i;
568
+ cmat -> CreatePosition (i, v0);
569
+ }
570
+ }
571
+ }
572
+
573
+ cmat->AsVector() = 0.0;
574
+ RegionTimer reg2(tcomp);
575
+
576
+ for (int i = 0; i < n; i++)
577
+ {
578
+ FlatArray mat_ri = this->GetRowIndices(i);
579
+ FlatVector<TM> mat_rval = this->GetRowValues(i);
580
+
581
+ for (int j = 0; j < mat_ri.Size(); j++)
582
+ {
583
+ int col = mat_ri[j];
584
+ TM mat_val = mat_rval[j];
585
+
586
+ FlatArray prol_ri_i = prol.GetRowIndices(i);
587
+ FlatArray prol_ri_col = prol.GetRowIndices(col);
588
+ FlatVector<double> prol_rval_i = prol.GetRowValues(i);
589
+ FlatVector<double> prol_rval_col = prol.GetRowValues(col);
590
+
591
+ for (int k = 0; k < prol_ri_i.Size(); k++)
592
+ for (int l = 0; l < prol_ri_col.Size(); l++)
593
+ {
594
+ int kk = prol_ri_i[k];
595
+ int ll = prol_ri_col[l];
596
+
597
+ if ( /*kk>=ll &&*/ kk < cmat->Height() )
598
+ {
599
+ (*cmat)(kk,ll) +=
600
+ prol_rval_i[k] * prol_rval_col[l] * mat_val;
601
+ }
602
+
603
+ // if (ll >= kk && i != col && ll < cmat->Height() )
604
+ // {
605
+ // (*cmat)(ll,kk) +=
606
+ // prol_rval_col[l] * prol_rval_i[k] * Trans(mat_val);
607
+ // }
608
+
609
+ }
610
+ }
611
+ }
612
+ return cmat;
613
+ }
614
+
615
+
616
+
617
+ template <class TM, class TV_ROW, class TV_COL>
618
+ shared_ptr<BaseSparseMatrix> SparseMatrix<TM, TV_ROW, TV_COL> ::
619
+ Reorder (const Array<size_t> & reorder) const
620
+ {
621
+ Array<size_t> inv_reorder(reorder.Size());
622
+ for (size_t i : Range(reorder))
623
+ inv_reorder[reorder[i]] = i;
624
+
625
+ Array<int> cnt(this->Height());
626
+ for (size_t i : Range(cnt))
627
+ cnt[i] = this->GetRowIndices(reorder[i]).Size();
628
+ auto newmat = make_shared<SparseMatrix>(cnt);
629
+ for (size_t i : Range(cnt))
630
+ for (auto col : this->GetRowIndices(reorder[i]))
631
+ newmat->CreatePosition(i, inv_reorder[col]);
632
+
633
+ for (size_t i : Range(cnt))
634
+ for (auto col : this->GetRowIndices(reorder[i]))
635
+ (*newmat)(i, inv_reorder[col]) = (*this)(reorder[i], col);
636
+
637
+ return newmat;
638
+ }
639
+
640
+
641
+ template <class TM>
642
+ shared_ptr<BaseSparseMatrix> SparseMatrixTM<TM> ::
643
+ CreateTransposeTM (const function<shared_ptr<SparseMatrixTM<decltype(ngbla::Trans(TM()))>>(const Array<int>&,int)> & creator) const
644
+ {
645
+ Array<int> cnt(this->Width());
646
+ cnt = 0;
647
+ ParallelFor (this->Height(), [&] (int i)
648
+ {
649
+ for (int c : this->GetRowIndices(i))
650
+ AsAtomic (cnt[c]) ++;
651
+ });
652
+
653
+ auto trans = creator(cnt, this->Height());
654
+
655
+ cnt = 0;
656
+ ParallelFor (this->Height(), [&] (int i)
657
+ {
658
+ for (int ci : Range(this->GetRowIndices(i)))
659
+ {
660
+ int c = this->GetRowIndices(i)[ci];
661
+ int pos = AsAtomic(cnt[c])++;
662
+ trans -> GetRowIndices(c)[pos] = i;
663
+ trans -> GetRowValues(c)[pos] = Trans(this->GetRowValues(i)[ci]);
664
+ }
665
+ });
666
+
667
+ ParallelFor (trans->Height(), [&] (int r)
668
+ {
669
+ auto rowvals = trans->GetRowValues(r);
670
+ BubbleSort (trans->GetRowIndices(r),
671
+ FlatArray(rowvals.Size(), rowvals.Data()));
672
+ });
673
+
674
+ return trans;
675
+ }
676
+
677
+
678
+
679
+
680
+ template <class TM>
681
+ void SparseMatrixTM<TM> ::
682
+ AddElementMatrixSymmetric(FlatArray<int> dnums, BareSliceMatrix<TSCAL> elmat1, bool use_atomic)
683
+ {
684
+ static Timer timer_addelmat("SparseMatrixSymmetric::AddElementMatrix", NoTracing);
685
+ // static Timer timer ("SparseMatrixSymmetric::AddElementMatrix", NoTracing);
686
+ // RegionTimer reg (timer);
687
+ RegionTimer reg (timer_addelmat);
688
+ NgProfiler::AddThreadFlops (timer_addelmat, TaskManager::GetThreadId(), dnums.Size()*(dnums.Size()+1)/2);
689
+
690
+ // ArrayMem<int, 50> map(dnums.Size());
691
+ STACK_ARRAY(int, hmap, dnums.Size());
692
+ FlatArray<int> map(dnums.Size(), hmap);
693
+
694
+ {
695
+ for (int i = 0; i < dnums.Size(); i++) map[i] = i;
696
+ QuickSortI (dnums, map);
697
+ }
698
+
699
+ STACK_ARRAY(int, dnumsmap, dnums.Size());
700
+ for (int i = 0; i < dnums.Size(); i++)
701
+ dnumsmap[i] = dnums[map[i]];
702
+
703
+ Scalar2ElemMatrix<TM, TSCAL> elmat (elmat1);
704
+ // .AddSize(mat_traits<TM>::HEIGHT*dnums.Size(),
705
+ // mat_traits<TM>::WIDTH*dnums.Size()));
706
+
707
+ int first_used = 0;
708
+ while (first_used < dnums.Size() && !IsRegularIndex(dnums[map[first_used]]) ) first_used++;
709
+
710
+ if (use_atomic)
711
+ for (int i1 = first_used; i1 < dnums.Size(); i1++)
712
+ {
713
+ // FlatArray<int> rowind = this->GetRowIndices(dnums[map[i1]]);
714
+ // FlatVector<TM> rowvals = this->GetRowValues(dnums[map[i1]]);
715
+ FlatArray rowind = this->GetRowIndices(dnumsmap[i1]);
716
+ FlatVector<TM> rowvals = this->GetRowValues(dnumsmap[i1]);
717
+ auto elmat_row = elmat.Rows(map[i1], map[i1]+1);
718
+
719
+ size_t k = 0;
720
+ for (int j1 = first_used; j1 <= i1; j1++, k++)
721
+ {
722
+ // while (rowind[k] != dnums[map[j1]])
723
+ while (rowind[k] != dnumsmap[j1])
724
+ {
725
+ k++;
726
+ if (k >= rowind.Size())
727
+ throw Exception ("SparseMatrixSymmetricTM::AddElementMatrix: illegal dnums");
728
+ }
729
+ AtomicAdd (rowvals(k), elmat_row(0, map[j1]));
730
+ }
731
+ }
732
+ else
733
+ {
734
+ if (first_used+1 < dnums.Size())
735
+ {
736
+ this->PrefetchRow(dnums[map[first_used+1]]);
737
+ // _mm_prefetch (reinterpret_cast<void*>(&this->GetRowIndices(dnums[map[first_used+1]])[0]), _MM_HINT_T2);
738
+ // _mm_prefetch (reinterpret_cast<void*>(&this->GetRowValues(dnums[map[first_used+1]])[0]), _MM_HINT_T2);
739
+ }
740
+
741
+ for (int i1 = first_used; i1 < dnums.Size(); i1++)
742
+ {
743
+ if (i1+2 < dnums.Size())
744
+ this->PrefetchRow(dnums[map[i1+2]]);
745
+
746
+ // FlatArray<int> rowind = this->GetRowIndices(dnums[map[i1]]);
747
+ // FlatVector<TM> rowvals = this->GetRowValues(dnums[map[i1]]);
748
+ FlatArray rowind = this->GetRowIndices(dnumsmap[i1]);
749
+ FlatVector<TM> rowvals = this->GetRowValues(dnumsmap[i1]);
750
+ auto elmat_row = elmat.Rows(map[i1], map[i1]+1);
751
+
752
+ size_t k = 0;
753
+ for (int j1 = first_used; j1 <= i1; j1++, k++)
754
+ {
755
+ // while (rowind[k] != dnums[map[j1]])
756
+ while (rowind[k] != dnumsmap[j1])
757
+ {
758
+ k++;
759
+ if (unlikely(k >= rowind.Size()))
760
+ throw Exception ("SparseMatrixSymmetricTM::AddElementMatrix: illegal dnums");
761
+ }
762
+ rowvals(k) += elmat_row(0, map[j1]);
763
+ }
764
+ }
765
+ }
766
+ }
767
+
768
+
769
+ template <class TM, class TV>
770
+ SparseMatrixSymmetric<TM,TV> ::
771
+ SparseMatrixSymmetric (const MatrixGraph & agraph)
772
+ // : SparseMatrixTM<TM> (agraph, stealgraph),
773
+ // SparseMatrixSymmetricTM<TM> (agraph, stealgraph),
774
+ : SparseMatrix<TM,TV,TV> (agraph)
775
+ { ; }
776
+
777
+ template <class TM, class TV>
778
+ SparseMatrixSymmetric<TM,TV> ::
779
+ SparseMatrixSymmetric (MatrixGraph && agraph)
780
+ // : SparseMatrixTM<TM> (agraph, stealgraph),
781
+ // SparseMatrixSymmetricTM<TM> (agraph, stealgraph),
782
+ : SparseMatrix<TM,TV,TV> (std::move(agraph))
783
+ { ; }
784
+
785
+
786
+ template <class TM, class TV>
787
+ SparseMatrixSymmetric<TM,TV> :: ~SparseMatrixSymmetric ()
788
+ {
789
+ ;
790
+ }
791
+
792
+
793
+ template <class TM, class TV>
794
+ shared_ptr<BaseJacobiPrecond>
795
+ SparseMatrixSymmetric<TM,TV> :: CreateJacobiPrecond (shared_ptr<BitArray> inner) const
796
+ {
797
+ return make_shared<JacobiPrecondSymmetric<TM,TV>> (*this, inner);
798
+ }
799
+
800
+
801
+ template <class TM, class TV>
802
+ shared_ptr<BaseBlockJacobiPrecond>
803
+ SparseMatrixSymmetric<TM,TV> :: CreateBlockJacobiPrecond (shared_ptr<Table<int>> blocks,
804
+ const BaseVector * constraint,
805
+ bool parallel,
806
+ shared_ptr<BitArray> freedofs) const
807
+ {
808
+ // return make_shared<BlockJacobiPrecondSymmetric<TM,TV>> (*this, blocks);
809
+ return make_shared<BlockJacobiPrecondSymmetric<TM,TV>>
810
+ ( dynamic_pointer_cast<const SparseMatrixSymmetric>
811
+ (this->shared_from_this()),
812
+ blocks);
813
+ }
814
+
815
+
816
+
817
+
818
+
819
+ template <class TM, class TV>
820
+ void SparseMatrixSymmetric<TM,TV> ::
821
+ MultAdd (double s, const BaseVector & x, BaseVector & y) const
822
+ {
823
+ static Timer timer("SparseMatrixSymmetric::MultAdd");
824
+ RegionTimer reg (timer);
825
+ timer.AddFlops (2*this->nze);
826
+
827
+ const FlatVector<TV_ROW> fx = x.FV<TV_ROW>();
828
+ FlatVector<TV_COL> fy = y.FV<TV_COL>();
829
+
830
+ for (int i = 0; i < this->Height(); i++)
831
+ {
832
+ fy(i) += s * RowTimesVector (i, fx);
833
+ AddRowTransToVectorNoDiag (i, s * fx(i), fy);
834
+ }
835
+ }
836
+
837
+ template <class TM, class TV>
838
+ void SparseMatrixSymmetric<TM,TV> ::
839
+ MultAdd1 (double s, const BaseVector & x, BaseVector & y,
840
+ const BitArray * inner,
841
+ const Array<int> * cluster) const
842
+ {
843
+ const FlatVector<TV_ROW> fx = x.FV<TV_ROW> ();
844
+ FlatVector<TV_COL> fy = y.FV<TV_COL> ();
845
+
846
+ if (inner)
847
+ {
848
+ static Timer timer("SparseMatrixSymmetric::MultAdd1 - inner");
849
+ RegionTimer reg (timer);
850
+
851
+ for (int i = 0; i < this->Height(); i++)
852
+ if (inner->Test(i))
853
+ fy(i) += s * RowTimesVectorNoDiag (i, fx);
854
+ }
855
+ else if (cluster)
856
+ {
857
+ static Timer timer("SparseMatrixSymmetric::MultAdd1 - cluster");
858
+ RegionTimer reg (timer);
859
+
860
+ for (int i = 0; i < this->Height(); i++)
861
+ if ( (*cluster)[i])
862
+ fy(i) += s * RowTimesVectorNoDiag (i, fx);
863
+ }
864
+ else
865
+ {
866
+ static Timer timer("SparseMatrixSymmetric::MultAdd1");
867
+ RegionTimer reg (timer);
868
+
869
+
870
+ for (int i = 0; i < this->Height(); i++)
871
+ fy(i) += s * RowTimesVectorNoDiag (i, fx);
872
+ }
873
+ }
874
+
875
+
876
+ template <class TM, class TV>
877
+ void SparseMatrixSymmetric<TM,TV> ::
878
+ MultAdd2 (double s, const BaseVector & x, BaseVector & y,
879
+ const BitArray * inner,
880
+ const Array<int> * cluster) const
881
+ {
882
+ static Timer timer("SparseMatrixSymmetric::MultAdd2");
883
+ RegionTimer reg (timer);
884
+ timer.AddFlops (this->NZE());
885
+
886
+ const FlatVector<TV_ROW> fx = x.FV<TV_ROW> ();
887
+ FlatVector<TV_COL> fy = y.FV<TV_COL> ();
888
+
889
+ if (inner)
890
+ {
891
+ for (int i = 0; i < this->Height(); i++)
892
+ if (inner->Test(i))
893
+ AddRowTransToVector (i, s * fx(i), fy);
894
+ }
895
+ else if (cluster)
896
+ {
897
+ for (int i = 0; i < this->Height(); i++)
898
+ if ( (*cluster)[i])
899
+ AddRowTransToVector (i, s * fx(i), fy);
900
+ }
901
+ else
902
+ for (int i = 0; i < this->Height(); i++)
903
+ AddRowTransToVector (i, s * fx(i), fy);
904
+ }
905
+
906
+
907
+
908
+ template <class TM, class TV>
909
+ BaseSparseMatrix & SparseMatrixSymmetric<TM,TV> ::
910
+ AddMerge (double s, const SparseMatrixSymmetric & m2)
911
+ {
912
+ for (int i = 0; i < m2.Height(); i++)
913
+ for (int j = 0; j < m2.GetRowIndices(i).Size(); j++)
914
+ (*this)(i, m2.GetRowIndices(i)[j]) += s * m2(i, m2.GetRowIndices(i)[j]);
915
+ return *this;
916
+ }
917
+
918
+ }
919
+
920
+ #endif