ngsolve 6.2.2506.post74.dev0__cp314-cp314-macosx_10_15_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ngsolve might be problematic. Click here for more details.

Files changed (315) hide show
  1. netgen/include/analytic_integrals.hpp +10 -0
  2. netgen/include/arnoldi.hpp +55 -0
  3. netgen/include/bandmatrix.hpp +334 -0
  4. netgen/include/basematrix.hpp +957 -0
  5. netgen/include/basevector.hpp +1268 -0
  6. netgen/include/bdbequations.hpp +2805 -0
  7. netgen/include/bdbintegrator.hpp +1660 -0
  8. netgen/include/bem_diffops.hpp +475 -0
  9. netgen/include/bessel.hpp +1064 -0
  10. netgen/include/bilinearform.hpp +963 -0
  11. netgen/include/bla.hpp +29 -0
  12. netgen/include/blockalloc.hpp +95 -0
  13. netgen/include/blockjacobi.hpp +328 -0
  14. netgen/include/bspline.hpp +116 -0
  15. netgen/include/calcinverse.hpp +141 -0
  16. netgen/include/cg.hpp +368 -0
  17. netgen/include/chebyshev.hpp +44 -0
  18. netgen/include/cholesky.hpp +720 -0
  19. netgen/include/clapack.h +7254 -0
  20. netgen/include/code_generation.hpp +296 -0
  21. netgen/include/coefficient.hpp +2033 -0
  22. netgen/include/coefficient_impl.hpp +19 -0
  23. netgen/include/coefficient_stdmath.hpp +167 -0
  24. netgen/include/commutingAMG.hpp +106 -0
  25. netgen/include/comp.hpp +79 -0
  26. netgen/include/compatibility.hpp +41 -0
  27. netgen/include/complex_wrapper.hpp +73 -0
  28. netgen/include/compressedfespace.hpp +110 -0
  29. netgen/include/contact.hpp +235 -0
  30. netgen/include/diagonalmatrix.hpp +154 -0
  31. netgen/include/differentialoperator.hpp +276 -0
  32. netgen/include/diffop.hpp +1286 -0
  33. netgen/include/diffop_impl.hpp +328 -0
  34. netgen/include/diffopwithfactor.hpp +123 -0
  35. netgen/include/discontinuous.hpp +84 -0
  36. netgen/include/dump.hpp +949 -0
  37. netgen/include/ectypes.hpp +121 -0
  38. netgen/include/eigen.hpp +60 -0
  39. netgen/include/eigensystem.hpp +18 -0
  40. netgen/include/elasticity_equations.hpp +595 -0
  41. netgen/include/elementbyelement.hpp +195 -0
  42. netgen/include/elementtopology.hpp +1760 -0
  43. netgen/include/elementtransformation.hpp +339 -0
  44. netgen/include/evalfunc.hpp +405 -0
  45. netgen/include/expr.hpp +1686 -0
  46. netgen/include/facetfe.hpp +175 -0
  47. netgen/include/facetfespace.hpp +180 -0
  48. netgen/include/facethofe.hpp +111 -0
  49. netgen/include/facetsurffespace.hpp +112 -0
  50. netgen/include/fe_interfaces.hpp +32 -0
  51. netgen/include/fem.hpp +87 -0
  52. netgen/include/fesconvert.hpp +14 -0
  53. netgen/include/fespace.hpp +1449 -0
  54. netgen/include/finiteelement.hpp +286 -0
  55. netgen/include/globalinterfacespace.hpp +77 -0
  56. netgen/include/globalspace.hpp +115 -0
  57. netgen/include/gridfunction.hpp +525 -0
  58. netgen/include/h1amg.hpp +124 -0
  59. netgen/include/h1hofe.hpp +188 -0
  60. netgen/include/h1hofe_impl.hpp +1262 -0
  61. netgen/include/h1hofefo.hpp +148 -0
  62. netgen/include/h1hofefo_impl.hpp +185 -0
  63. netgen/include/h1hofespace.hpp +167 -0
  64. netgen/include/h1lofe.hpp +1240 -0
  65. netgen/include/h1lumping.hpp +41 -0
  66. netgen/include/hcurl_equations.hpp +1381 -0
  67. netgen/include/hcurlcurlfe.hpp +2241 -0
  68. netgen/include/hcurlcurlfespace.hpp +78 -0
  69. netgen/include/hcurlfe.hpp +259 -0
  70. netgen/include/hcurlfe_utils.hpp +107 -0
  71. netgen/include/hcurlhdiv_dshape.hpp +857 -0
  72. netgen/include/hcurlhdivfes.hpp +308 -0
  73. netgen/include/hcurlhofe.hpp +175 -0
  74. netgen/include/hcurlhofe_impl.hpp +1871 -0
  75. netgen/include/hcurlhofespace.hpp +193 -0
  76. netgen/include/hcurllofe.hpp +1146 -0
  77. netgen/include/hdiv_equations.hpp +880 -0
  78. netgen/include/hdivdivfe.hpp +2923 -0
  79. netgen/include/hdivdivsurfacespace.hpp +76 -0
  80. netgen/include/hdivfe.hpp +206 -0
  81. netgen/include/hdivfe_utils.hpp +717 -0
  82. netgen/include/hdivfes.hpp +75 -0
  83. netgen/include/hdivhofe.hpp +447 -0
  84. netgen/include/hdivhofe_impl.hpp +1107 -0
  85. netgen/include/hdivhofefo.hpp +229 -0
  86. netgen/include/hdivhofespace.hpp +177 -0
  87. netgen/include/hdivhosurfacefespace.hpp +106 -0
  88. netgen/include/hdivlofe.hpp +773 -0
  89. netgen/include/hidden.hpp +74 -0
  90. netgen/include/householder.hpp +181 -0
  91. netgen/include/hypre_ams_precond.hpp +123 -0
  92. netgen/include/hypre_precond.hpp +73 -0
  93. netgen/include/integrator.hpp +2012 -0
  94. netgen/include/integratorcf.hpp +253 -0
  95. netgen/include/interpolate.hpp +49 -0
  96. netgen/include/intrule.hpp +2542 -0
  97. netgen/include/intrules_SauterSchwab.hpp +25 -0
  98. netgen/include/irspace.hpp +49 -0
  99. netgen/include/jacobi.hpp +153 -0
  100. netgen/include/kernels.hpp +762 -0
  101. netgen/include/l2hofe.hpp +194 -0
  102. netgen/include/l2hofe_impl.hpp +564 -0
  103. netgen/include/l2hofefo.hpp +542 -0
  104. netgen/include/l2hofespace.hpp +344 -0
  105. netgen/include/la.hpp +38 -0
  106. netgen/include/linearform.hpp +266 -0
  107. netgen/include/matrix.hpp +2140 -0
  108. netgen/include/memusage.hpp +41 -0
  109. netgen/include/meshaccess.hpp +1359 -0
  110. netgen/include/mgpre.hpp +204 -0
  111. netgen/include/mp_coefficient.hpp +145 -0
  112. netgen/include/mptools.hpp +2281 -0
  113. netgen/include/multigrid.hpp +42 -0
  114. netgen/include/multivector.hpp +447 -0
  115. netgen/include/mumpsinverse.hpp +187 -0
  116. netgen/include/mycomplex.hpp +361 -0
  117. netgen/include/ng_lapack.hpp +1661 -0
  118. netgen/include/ngblas.hpp +1232 -0
  119. netgen/include/ngs_defines.hpp +30 -0
  120. netgen/include/ngs_stdcpp_include.hpp +106 -0
  121. netgen/include/ngs_utils.hpp +121 -0
  122. netgen/include/ngsobject.hpp +1019 -0
  123. netgen/include/ngsstream.hpp +113 -0
  124. netgen/include/ngstd.hpp +72 -0
  125. netgen/include/nodalhofe.hpp +96 -0
  126. netgen/include/nodalhofe_impl.hpp +141 -0
  127. netgen/include/normalfacetfe.hpp +223 -0
  128. netgen/include/normalfacetfespace.hpp +98 -0
  129. netgen/include/normalfacetsurfacefespace.hpp +84 -0
  130. netgen/include/order.hpp +251 -0
  131. netgen/include/parallel_matrices.hpp +222 -0
  132. netgen/include/paralleldofs.hpp +340 -0
  133. netgen/include/parallelngs.hpp +23 -0
  134. netgen/include/parallelvector.hpp +269 -0
  135. netgen/include/pardisoinverse.hpp +200 -0
  136. netgen/include/periodic.hpp +129 -0
  137. netgen/include/plateaufespace.hpp +25 -0
  138. netgen/include/pml.hpp +275 -0
  139. netgen/include/pmltrafo.hpp +631 -0
  140. netgen/include/postproc.hpp +142 -0
  141. netgen/include/potentialtools.hpp +22 -0
  142. netgen/include/precomp.hpp +60 -0
  143. netgen/include/preconditioner.hpp +602 -0
  144. netgen/include/prolongation.hpp +377 -0
  145. netgen/include/python_comp.hpp +107 -0
  146. netgen/include/python_fem.hpp +89 -0
  147. netgen/include/python_linalg.hpp +58 -0
  148. netgen/include/python_ngstd.hpp +386 -0
  149. netgen/include/recursive_pol.hpp +4896 -0
  150. netgen/include/recursive_pol_tet.hpp +395 -0
  151. netgen/include/recursive_pol_trig.hpp +492 -0
  152. netgen/include/reorderedfespace.hpp +81 -0
  153. netgen/include/sample_sort.hpp +105 -0
  154. netgen/include/scalarfe.hpp +335 -0
  155. netgen/include/shapefunction_utils.hpp +113 -0
  156. netgen/include/simd_complex.hpp +329 -0
  157. netgen/include/smoother.hpp +253 -0
  158. netgen/include/solve.hpp +89 -0
  159. netgen/include/sparsecholesky.hpp +313 -0
  160. netgen/include/sparsematrix.hpp +1038 -0
  161. netgen/include/sparsematrix_dyn.hpp +90 -0
  162. netgen/include/sparsematrix_impl.hpp +1013 -0
  163. netgen/include/special_matrix.hpp +463 -0
  164. netgen/include/specialelement.hpp +125 -0
  165. netgen/include/statushandler.hpp +33 -0
  166. netgen/include/stringops.hpp +12 -0
  167. netgen/include/superluinverse.hpp +136 -0
  168. netgen/include/symbolicintegrator.hpp +850 -0
  169. netgen/include/symmetricmatrix.hpp +144 -0
  170. netgen/include/tangentialfacetfe.hpp +224 -0
  171. netgen/include/tangentialfacetfespace.hpp +91 -0
  172. netgen/include/tensor.hpp +522 -0
  173. netgen/include/tensorcoefficient.hpp +446 -0
  174. netgen/include/tensorproductintegrator.hpp +113 -0
  175. netgen/include/thcurlfe.hpp +128 -0
  176. netgen/include/thcurlfe_impl.hpp +380 -0
  177. netgen/include/thdivfe.hpp +80 -0
  178. netgen/include/thdivfe_impl.hpp +492 -0
  179. netgen/include/tpdiffop.hpp +461 -0
  180. netgen/include/tpfes.hpp +133 -0
  181. netgen/include/tpintrule.hpp +224 -0
  182. netgen/include/triangular.hpp +465 -0
  183. netgen/include/tscalarfe.hpp +245 -0
  184. netgen/include/tscalarfe_impl.hpp +1029 -0
  185. netgen/include/umfpackinverse.hpp +148 -0
  186. netgen/include/vector.hpp +1273 -0
  187. netgen/include/voxelcoefficientfunction.hpp +41 -0
  188. netgen/include/vtkoutput.hpp +198 -0
  189. netgen/include/vvector.hpp +208 -0
  190. netgen/include/webgui.hpp +92 -0
  191. netgen/libngbla.dylib +0 -0
  192. netgen/libngcomp.dylib +0 -0
  193. netgen/libngfem.dylib +0 -0
  194. netgen/libngla.dylib +0 -0
  195. netgen/libngsbem.dylib +0 -0
  196. netgen/libngsolve.dylib +0 -0
  197. netgen/libngstd.dylib +0 -0
  198. ngsolve/TensorProductTools.py +210 -0
  199. ngsolve/__console.py +94 -0
  200. ngsolve/__expr.py +181 -0
  201. ngsolve/__init__.py +148 -0
  202. ngsolve/__init__.pyi +233 -0
  203. ngsolve/_scikit_build_core_dependencies.py +30 -0
  204. ngsolve/bla.pyi +1153 -0
  205. ngsolve/bvp.py +78 -0
  206. ngsolve/bvp.pyi +32 -0
  207. ngsolve/cmake/NGSolveConfig.cmake +102 -0
  208. ngsolve/cmake/ngsolve-targets-release.cmake +79 -0
  209. ngsolve/cmake/ngsolve-targets.cmake +163 -0
  210. ngsolve/comp/__init__.pyi +5449 -0
  211. ngsolve/comp/pml.pyi +89 -0
  212. ngsolve/config/__init__.py +1 -0
  213. ngsolve/config/__init__.pyi +43 -0
  214. ngsolve/config/__main__.py +4 -0
  215. ngsolve/config/config.py +60 -0
  216. ngsolve/config/config.pyi +45 -0
  217. ngsolve/demos/TensorProduct/__init__.py +0 -0
  218. ngsolve/demos/TensorProduct/tp_dg_1d_1d.py +80 -0
  219. ngsolve/demos/TensorProduct/tp_dg_1d_2d.py +73 -0
  220. ngsolve/demos/TensorProduct/tp_dg_2d_1d.py +72 -0
  221. ngsolve/demos/TensorProduct/tp_dg_2d_2d.py +66 -0
  222. ngsolve/demos/__init__.py +0 -0
  223. ngsolve/demos/howto/__init__.py +0 -0
  224. ngsolve/demos/howto/hhj.py +44 -0
  225. ngsolve/demos/howto/hybrid_dg.py +53 -0
  226. ngsolve/demos/howto/mixed.py +30 -0
  227. ngsolve/demos/howto/nonlin.py +29 -0
  228. ngsolve/demos/howto/pickling.py +26 -0
  229. ngsolve/demos/howto/pml.py +31 -0
  230. ngsolve/demos/howto/taskmanager.py +20 -0
  231. ngsolve/demos/howto/tdnns.py +47 -0
  232. ngsolve/demos/howto/timeDG-skeleton.py +45 -0
  233. ngsolve/demos/howto/timeDG.py +38 -0
  234. ngsolve/demos/howto/timeDGlap.py +42 -0
  235. ngsolve/demos/howto/timeDGwave.py +61 -0
  236. ngsolve/demos/intro/__init__.py +0 -0
  237. ngsolve/demos/intro/adaptive.py +123 -0
  238. ngsolve/demos/intro/cmagnet.py +59 -0
  239. ngsolve/demos/intro/elasticity.py +76 -0
  240. ngsolve/demos/intro/navierstokes.py +74 -0
  241. ngsolve/demos/intro/poisson.ipynb +170 -0
  242. ngsolve/demos/intro/poisson.py +41 -0
  243. ngsolve/demos/mpi/__init__.py +0 -0
  244. ngsolve/demos/mpi/mpi_cmagnet.py +87 -0
  245. ngsolve/demos/mpi/mpi_navierstokes.py +117 -0
  246. ngsolve/demos/mpi/mpi_poisson.py +89 -0
  247. ngsolve/demos/mpi/mpi_timeDG.py +82 -0
  248. ngsolve/directsolvers.py +26 -0
  249. ngsolve/directsolvers.pyi +15 -0
  250. ngsolve/eigenvalues.py +364 -0
  251. ngsolve/eigenvalues.pyi +30 -0
  252. ngsolve/fem.pyi +1647 -0
  253. ngsolve/internal.py +89 -0
  254. ngsolve/krylovspace.py +1013 -0
  255. ngsolve/krylovspace.pyi +298 -0
  256. ngsolve/la.pyi +1230 -0
  257. ngsolve/meshes.py +748 -0
  258. ngsolve/ngs2petsc.py +310 -0
  259. ngsolve/ngscxx.py +42 -0
  260. ngsolve/ngslib.so +0 -0
  261. ngsolve/ngstd.pyi +59 -0
  262. ngsolve/nonlinearsolvers.py +203 -0
  263. ngsolve/nonlinearsolvers.pyi +95 -0
  264. ngsolve/preconditioners.py +11 -0
  265. ngsolve/preconditioners.pyi +7 -0
  266. ngsolve/solve.pyi +109 -0
  267. ngsolve/solve_implementation.py +168 -0
  268. ngsolve/solve_implementation.pyi +42 -0
  269. ngsolve/solvers.py +7 -0
  270. ngsolve/solvers.pyi +14 -0
  271. ngsolve/timestepping.py +185 -0
  272. ngsolve/timestepping.pyi +28 -0
  273. ngsolve/timing.py +108 -0
  274. ngsolve/timing.pyi +54 -0
  275. ngsolve/utils.py +167 -0
  276. ngsolve/utils.pyi +273 -0
  277. ngsolve/webgui.py +670 -0
  278. ngsolve-6.2.2506.post74.dev0.data/data/Netgen.icns +0 -0
  279. ngsolve-6.2.2506.post74.dev0.data/data/bin/ngscxx +17 -0
  280. ngsolve-6.2.2506.post74.dev0.data/data/bin/ngsld +13 -0
  281. ngsolve-6.2.2506.post74.dev0.data/data/bin/ngsolve.tcl +648 -0
  282. ngsolve-6.2.2506.post74.dev0.data/data/bin/ngspy +2 -0
  283. ngsolve-6.2.2506.post74.dev0.data/data/share/ngsolve/beam.geo +17 -0
  284. ngsolve-6.2.2506.post74.dev0.data/data/share/ngsolve/beam.vol +240 -0
  285. ngsolve-6.2.2506.post74.dev0.data/data/share/ngsolve/chip.in2d +41 -0
  286. ngsolve-6.2.2506.post74.dev0.data/data/share/ngsolve/chip.vol +614 -0
  287. ngsolve-6.2.2506.post74.dev0.data/data/share/ngsolve/coil.geo +12 -0
  288. ngsolve-6.2.2506.post74.dev0.data/data/share/ngsolve/coil.vol +2560 -0
  289. ngsolve-6.2.2506.post74.dev0.data/data/share/ngsolve/coilshield.geo +24 -0
  290. ngsolve-6.2.2506.post74.dev0.data/data/share/ngsolve/coilshield.vol +3179 -0
  291. ngsolve-6.2.2506.post74.dev0.data/data/share/ngsolve/cube.geo +19 -0
  292. ngsolve-6.2.2506.post74.dev0.data/data/share/ngsolve/cube.vol +1832 -0
  293. ngsolve-6.2.2506.post74.dev0.data/data/share/ngsolve/d10_DGdoubleglazing.pde +50 -0
  294. ngsolve-6.2.2506.post74.dev0.data/data/share/ngsolve/d11_chip_nitsche.pde +40 -0
  295. ngsolve-6.2.2506.post74.dev0.data/data/share/ngsolve/d1_square.pde +43 -0
  296. ngsolve-6.2.2506.post74.dev0.data/data/share/ngsolve/d2_chip.pde +35 -0
  297. ngsolve-6.2.2506.post74.dev0.data/data/share/ngsolve/d3_helmholtz.pde +22 -0
  298. ngsolve-6.2.2506.post74.dev0.data/data/share/ngsolve/d4_cube.pde +46 -0
  299. ngsolve-6.2.2506.post74.dev0.data/data/share/ngsolve/d5_beam.pde +74 -0
  300. ngsolve-6.2.2506.post74.dev0.data/data/share/ngsolve/d6_shaft.pde +73 -0
  301. ngsolve-6.2.2506.post74.dev0.data/data/share/ngsolve/d7_coil.pde +50 -0
  302. ngsolve-6.2.2506.post74.dev0.data/data/share/ngsolve/d8_coilshield.pde +49 -0
  303. ngsolve-6.2.2506.post74.dev0.data/data/share/ngsolve/d9_hybridDG.pde +72 -0
  304. ngsolve-6.2.2506.post74.dev0.data/data/share/ngsolve/doubleglazing.in2d +27 -0
  305. ngsolve-6.2.2506.post74.dev0.data/data/share/ngsolve/doubleglazing.vol +737 -0
  306. ngsolve-6.2.2506.post74.dev0.data/data/share/ngsolve/piezo2d40round4.vol.gz +0 -0
  307. ngsolve-6.2.2506.post74.dev0.data/data/share/ngsolve/shaft.geo +73 -0
  308. ngsolve-6.2.2506.post74.dev0.data/data/share/ngsolve/shaft.vol +4291 -0
  309. ngsolve-6.2.2506.post74.dev0.data/data/share/ngsolve/square.in2d +17 -0
  310. ngsolve-6.2.2506.post74.dev0.data/data/share/ngsolve/square.vol +149 -0
  311. ngsolve-6.2.2506.post74.dev0.dist-info/METADATA +13 -0
  312. ngsolve-6.2.2506.post74.dev0.dist-info/RECORD +315 -0
  313. ngsolve-6.2.2506.post74.dev0.dist-info/WHEEL +5 -0
  314. ngsolve-6.2.2506.post74.dev0.dist-info/licenses/LICENSE +504 -0
  315. ngsolve-6.2.2506.post74.dev0.dist-info/top_level.txt +2 -0
@@ -0,0 +1,1013 @@
1
+ #ifndef FILE_NGS_SPARSEMATRIX_IMPL
2
+ #define FILE_NGS_SPARSEMATRIX_IMPL
3
+
4
+ /**************************************************************************/
5
+ /* File: sparsematrix_impl.hpp */
6
+ /* Author: Joachim Schoeberl */
7
+ /* Date: 01. Oct. 94, 15 Jan. 02 */
8
+ /* redesign: Lukas Kogler, July 2019 */
9
+ /**************************************************************************/
10
+
11
+
12
+ #include "sparsematrix.hpp"
13
+ namespace ngla
14
+ {
15
+
16
+ template <class TM>
17
+ SparseMatrixTM<TM> :: ~SparseMatrixTM ()
18
+ { ; }
19
+
20
+
21
+ template <class TM>
22
+ void SparseMatrixTM<TM> ::
23
+ PrefetchRow (size_t rownr) const
24
+ {
25
+ #ifdef NETGEN_ARCH_AMD64
26
+ #ifdef __GNUC__
27
+ size_t fi = firsti[rownr], fin = firsti[rownr+1];
28
+ // int * pi = &colnr[fi], * pin = &colnr[fin];
29
+ int *pi = colnr.Data()+fi, *pin = colnr.Data()+fin;
30
+ while (pi < pin)
31
+ {
32
+ _mm_prefetch (reinterpret_cast<void*>(pi), _MM_HINT_T2);
33
+ pi += 64/sizeof(int);
34
+ }
35
+
36
+ TM * vi = &data[fi], * vin = (&data[fin-1])+1;
37
+ while (vi < vin)
38
+ {
39
+ _mm_prefetch (reinterpret_cast<void*>(vi), _MM_HINT_T2);
40
+ vi += 64/sizeof(double);
41
+ }
42
+ #endif
43
+ #endif // NETGEN_ARCH_AMD64
44
+ ;
45
+ }
46
+
47
+
48
+ template <class TM>
49
+ shared_ptr<SparseMatrixTM<TM>> SparseMatrixTM<TM> ::
50
+ CreateFromCOO (FlatArray<int> indi, FlatArray<int> indj,
51
+ FlatArray<TM> val, size_t h, size_t w)
52
+ {
53
+ static Timer t("SparseMatrix::CreateFromCOO"); RegionTimer r(t);
54
+ static Timer t1("SparseMatrix::CreateFromCOO 1");
55
+ static Timer t2("SparseMatrix::CreateFromCOO 2");
56
+ static Timer t3("SparseMatrix::CreateFromCOO 3");
57
+
58
+ /*
59
+ {
60
+ Array<int> cnt(h);
61
+
62
+ t1.Start();
63
+ DynamicTable<int> tab(h);
64
+ for (size_t i = 0; i < indi.Size(); i++)
65
+ tab.AddUnique(indi[i], indj[i]);
66
+ t1.Stop();
67
+ for (size_t i = 0; i < h; i++)
68
+ cnt[i] = tab.EntrySize(i);
69
+
70
+ auto matrix = make_shared<SparseMatrix<TM>> (cnt, w);
71
+ t2.Start();
72
+ for (auto k : ngstd::Range(indi))
73
+ matrix->CreatePosition(indi[k], indj[k]);
74
+ t2.Stop();
75
+ matrix->SetZero();
76
+
77
+ t3.Start();
78
+ for (auto k : ngstd::Range(indi))
79
+ (*matrix)(indi[k], indj[k]) += val[k];
80
+ t3.Stop();
81
+ // return matrix;
82
+ }
83
+ */
84
+
85
+ Array<int> cnt(h);
86
+ cnt = 0;
87
+ for (auto i : indi)
88
+ cnt[i]++;
89
+
90
+ Table<int> tab(cnt);
91
+ cnt = 0;
92
+
93
+ for (auto [i,j] : Zip(indi, indj))
94
+ tab[i][cnt[i]++] = j;
95
+
96
+ cnt = 0;
97
+ // for (int i = 0; i < tab.Size(); i++)
98
+ ParallelFor (tab.Size(), [&] (size_t i)
99
+ {
100
+ QuickSort (tab[i]);
101
+
102
+ int prev = -1;
103
+ for (auto j : tab[i])
104
+ {
105
+ if (j != prev) cnt[i]++;
106
+ prev = j;
107
+ }
108
+ });
109
+
110
+ auto matrix = make_shared<SparseMatrix<TM>> (cnt, w);
111
+ t2.Start();
112
+ // for (auto k : ngstd::Range(indi))
113
+ // matrix->CreatePosition(indi[k], indj[k]);
114
+
115
+ cnt = 0;
116
+ for (int i = 0; i < tab.Size(); i++)
117
+ {
118
+ int prev = -1;
119
+ for (auto j : tab[i])
120
+ {
121
+ auto cols = matrix->GetRowIndices(i);
122
+ if (j != prev)
123
+ cols[cnt[i]++] = j;
124
+ prev = j;
125
+ }
126
+ }
127
+
128
+ t2.Stop();
129
+ matrix->SetZero();
130
+
131
+ t3.Start();
132
+ /*
133
+ for (auto k : ngstd::Range(indi))
134
+ (*matrix)(indi[k], indj[k]) += val[k];
135
+ */
136
+ ParallelFor (indi.Size(), [&](size_t k)
137
+ {
138
+ AtomicAdd ( (*matrix)(indi[k], indj[k]), val[k]);
139
+ });
140
+ t3.Stop();
141
+ return matrix;
142
+ }
143
+
144
+
145
+
146
+
147
+
148
+ template <class TM>
149
+ void SparseMatrixTM<TM> ::
150
+ AddElementMatrix(FlatArray<int> dnums1, FlatArray<int> dnums2,
151
+ BareSliceMatrix<TSCAL> elmat1, bool use_atomic)
152
+ {
153
+ static Timer timer_addelmat_nonsym("SparseMatrix::AddElementMatrix", NoTracing);
154
+ RegionTimer reg (timer_addelmat_nonsym);
155
+ NgProfiler::AddThreadFlops (timer_addelmat_nonsym, TaskManager::GetThreadId(), dnums1.Size()*dnums2.Size());
156
+
157
+ ArrayMem<int, 50> map(dnums2.Size());
158
+ for (int i = 0; i < map.Size(); i++) map[i] = i;
159
+ QuickSortI (dnums2, map);
160
+ Scalar2ElemMatrix<TM, TSCAL> elmat (elmat1);
161
+ // .AddSize(mat_traits<TM>::HEIGHT*dnums1.Size(),
162
+ // mat_traits<TM>::WIDTH*dnums2.Size()));
163
+
164
+ for (int i = 0; i < dnums1.Size(); i++)
165
+ if (IsRegularIndex(dnums1[i]))
166
+ {
167
+ FlatArray rowind = this->GetRowIndices(dnums1[i]);
168
+ FlatVector<TM> rowvals = this->GetRowValues(dnums1[i]);
169
+
170
+ int k = 0;
171
+ for (int j1 = 0; j1 < dnums2.Size(); j1++)
172
+ {
173
+ int j = map[j1];
174
+ if (IsRegularIndex(dnums2[j]))
175
+ {
176
+ while (rowind[k] != dnums2[j])
177
+ {
178
+ k++;
179
+ if (k >= rowind.Size())
180
+ throw Exception ("SparseMatrixTM::AddElementMatrix: illegal dnums");
181
+ }
182
+ if (use_atomic)
183
+ AtomicAdd (rowvals(k), elmat(i,j));
184
+ else
185
+ rowvals(k) += elmat(i,j);
186
+ }
187
+ }
188
+ }
189
+ }
190
+
191
+
192
+ template <class TM>
193
+ void SparseMatrixTM<TM> :: SetZero ()
194
+ {
195
+ static Timer t("SparseMatrix::SetZero (taskhandler)");
196
+ t.AddFlops (this->NZE());
197
+ RegionTimer reg(t);
198
+
199
+ /*
200
+ ParallelFor (balance, [&](int row)
201
+ {
202
+ data.Range(firsti[row], firsti[row+1]) = TM(0.0);
203
+ });
204
+ */
205
+ ParallelForRange (balance, [&](IntRange r)
206
+ {
207
+ data.Range(firsti[r.First()], firsti[r.Next()]) = TM(0.0);
208
+ });
209
+
210
+ }
211
+
212
+
213
+
214
+ template <class TM, class TV_ROW, class TV_COL>
215
+ SparseMatrix<TM,TV_ROW,TV_COL> :: SparseMatrix (const MatrixGraph & agraph)
216
+ : SparseMatrixTM<TM> (agraph)
217
+ { ; }
218
+
219
+ template <class TM, class TV_ROW, class TV_COL>
220
+ SparseMatrix<TM,TV_ROW,TV_COL> :: SparseMatrix (MatrixGraph && agraph)
221
+ : SparseMatrixTM<TM> (std::move(agraph))
222
+ { ; }
223
+
224
+
225
+
226
+ template <class TM, class TV_ROW, class TV_COL>
227
+ shared_ptr<BaseJacobiPrecond> SparseMatrix<TM,TV_ROW,TV_COL> ::
228
+ CreateJacobiPrecond (shared_ptr<BitArray> inner) const
229
+ {
230
+ // if constexpr(mat_traits<TM>::HEIGHT != mat_traits<TM>::WIDTH) return nullptr;
231
+ if constexpr(ngbla::Height<TM>() != ngbla::Width<TM>()) return nullptr;
232
+ else if constexpr(ngbla::Height<TM>() > MAX_SYS_DIM) {
233
+ throw Exception(string("MAX_SYS_DIM = ")+to_string(MAX_SYS_DIM)+string(", need ")+to_string(mat_traits<TM>::HEIGHT));
234
+ return nullptr;
235
+ }
236
+ else return make_shared<JacobiPrecond<TM,TV_ROW,TV_COL>> ( dynamic_pointer_cast<SparseMatrix>
237
+ (const_cast<SparseMatrix*>(this)->shared_from_this()), inner);
238
+ }
239
+
240
+ template <class TM, class TV_ROW, class TV_COL>
241
+ shared_ptr<BaseBlockJacobiPrecond> SparseMatrix<TM,TV_ROW,TV_COL> ::
242
+ CreateBlockJacobiPrecond (shared_ptr<Table<int>> blocks,
243
+ const BaseVector * constraint,
244
+ bool parallel,
245
+ shared_ptr<BitArray> freedofs) const
246
+ {
247
+ // if constexpr(mat_traits<TM>::HEIGHT != mat_traits<TM>::WIDTH) return nullptr;
248
+ if constexpr(ngbla::Height<TM>() != ngbla::Width<TM>()) return nullptr;
249
+ else if constexpr(ngbla::Height<TM>() > MAX_SYS_DIM) {
250
+ throw Exception(string("MAX_SYS_DIM = ")+to_string(MAX_SYS_DIM)+string(", need ")+to_string(ngbla::Height<TM>()));
251
+ return nullptr;
252
+ }
253
+ else
254
+ // return make_shared<BlockJacobiPrecond<TM,TV_ROW,TV_COL>> (*this, blocks, parallel);
255
+
256
+ return make_shared<BlockJacobiPrecond<TM,TV_ROW,TV_COL>>
257
+ ( dynamic_pointer_cast<const SparseMatrix>
258
+ (this->shared_from_this()),
259
+ blocks, parallel);
260
+ }
261
+
262
+
263
+
264
+ template <class TM, class TV_ROW, class TV_COL>
265
+ void SparseMatrix<TM,TV_ROW,TV_COL> ::
266
+ MultAdd (double s, const BaseVector & x, BaseVector & y) const
267
+ {
268
+ static Timer t("SparseMatrix::MultAdd"); RegionTimer reg(t);
269
+ t.AddFlops (this->NZE()*sizeof(TV_ROW)*sizeof(TV_COL)/sqr(sizeof(double)));
270
+
271
+ ParallelForRange
272
+ (balance, [&] (IntRange myrange)
273
+ {
274
+ FlatVector<TVX> fx = x.FV<TVX>();
275
+ FlatVector<TVY> fy = y.FV<TVY>();
276
+
277
+ for (auto i : myrange)
278
+ fy(i) += s * RowTimesVector (i, fx);
279
+ });
280
+
281
+ #ifdef OLD
282
+ if (task_manager)
283
+ {
284
+ FlatVector<TVX> fx = x.FV<TVX>();
285
+ FlatVector<TVY> fy = y.FV<TVY>();
286
+
287
+ // int ntasks = task_manager->GetNumThreads();
288
+
289
+ task_manager -> CreateJob
290
+ ([&] (TaskInfo & ti)
291
+ {
292
+ int tasks_per_part = ti.ntasks / balance.Size();
293
+ int mypart = ti.task_nr / tasks_per_part;
294
+ int num_in_part = ti.task_nr % tasks_per_part;
295
+
296
+ auto myrange = balance[mypart].Split (num_in_part, tasks_per_part);
297
+
298
+ for (auto row : myrange)
299
+ fy(row) += s * RowTimesVector (row, fx);
300
+
301
+ });
302
+ return;
303
+ }
304
+
305
+
306
+ FlatVector<TVX> fx = x.FV<TVX>();
307
+ FlatVector<TVY> fy = y.FV<TVY>();
308
+
309
+ int h = this->Height();
310
+ for (int i = 0; i < h; i++)
311
+ fy(i) += s * RowTimesVector (i, fx);
312
+ #endif
313
+
314
+
315
+ }
316
+
317
+ template <class TM, class TV_ROW, class TV_COL>
318
+ void SparseMatrix<TM,TV_ROW,TV_COL> ::
319
+ MultAdd1 (double s, const BaseVector & x, BaseVector & y,
320
+ const BitArray * ainner,
321
+ const Array<int> * acluster) const
322
+ {
323
+ if (!ainner || acluster)
324
+ {
325
+ MultAdd (s, x, y);
326
+ return;
327
+ }
328
+
329
+ FlatVector<TVX> fx = x.FV<TVX>();
330
+ FlatVector<TVY> fy = y.FV<TVY>();
331
+
332
+ SharedLoop2 sl(ainner->Size());
333
+ ParallelJob
334
+ ( [&] (const TaskInfo & ti)
335
+ {
336
+ for (size_t row : sl)
337
+ if ( (*ainner).Test(row))
338
+ fy(row) += s * RowTimesVector (row, fx);
339
+ });
340
+ }
341
+
342
+
343
+
344
+ template <class TM, class TV_ROW, class TV_COL>
345
+ void SparseMatrix<TM,TV_ROW,TV_COL> ::
346
+ MultTransAdd (double s, const BaseVector & x, BaseVector & y) const
347
+ {
348
+ static Timer timer ("SparseMatrix::MultTransAdd");
349
+ RegionTimer reg (timer);
350
+
351
+ FlatVector<TVY> fx = x.FV<TVY>();
352
+ FlatVector<TVX> fy = y.FV<TVX>();
353
+
354
+ for (int i = 0; i < this->Height(); i++)
355
+ AddRowTransToVector (i, s*fx(i), fy);
356
+
357
+ timer.AddFlops (this->NZE());
358
+ }
359
+
360
+
361
+ template <class TM, class TV_ROW, class TV_COL>
362
+ void SparseMatrix<TM,TV_ROW,TV_COL> ::
363
+ MultAdd (Complex s, const BaseVector & x, BaseVector & y) const
364
+ {
365
+ static Timer timer("SparseMatrix::MultAdd Complex");
366
+ RegionTimer reg (timer);
367
+
368
+ FlatVector<TVX> fx = x.FV<TVX> (); // (x.Size(), x.Memory());
369
+ FlatVector<TVY> fy = y.FV<TVY> (); // (y.Size(), y.Memory());
370
+
371
+ if constexpr (std::is_constructible<TSCAL,Complex>())
372
+ {
373
+ int h = this->Height();
374
+ for (int i = 0; i < h; i++)
375
+ fy(i) += TSCAL(s) * RowTimesVector (i, fx);
376
+ }
377
+ else
378
+ throw Exception("MultAdd(complex) called for real matrix");
379
+ }
380
+
381
+
382
+ template <class TM, class TV_ROW, class TV_COL>
383
+ void SparseMatrix<TM,TV_ROW,TV_COL> ::
384
+ MultTransAdd (Complex s, const BaseVector & x, BaseVector & y) const
385
+ {
386
+ static Timer timer("SparseMatrix::MultTransAdd Complex");
387
+ RegionTimer reg (timer);
388
+
389
+ FlatVector<TVY> fx = x.FV<TVY>(); // (x.Size(), x.Memory());
390
+ FlatVector<TVX> fy = y.FV<TVX>(); // (y.Size(), y.Memory());
391
+
392
+ if constexpr (std::is_constructible<TSCAL,Complex>())
393
+ for (int i = 0; i < this->Height(); i++)
394
+ AddRowTransToVector (i, TSCAL(s)*fx(i), fy);
395
+ else
396
+ throw Exception("MultTransAdd(complex) called for real matrix");
397
+ }
398
+
399
+ template <class TM, class TV_ROW, class TV_COL>
400
+ void SparseMatrix<TM,TV_ROW,TV_COL> ::
401
+ MultConjTransAdd (Complex s, const BaseVector & x, BaseVector & y) const
402
+ {
403
+ static Timer timer("SparseMatrix::MultTransAdd Complex");
404
+ RegionTimer reg (timer);
405
+
406
+ FlatVector<TVY> fx = x.FV<TVY>(); // (x.Size(), x.Memory());
407
+ FlatVector<TVX> fy = y.FV<TVX>(); // (y.Size(), y.Memory());
408
+
409
+ if constexpr (std::is_constructible<TSCAL,Complex>())
410
+ for (int i = 0; i < this->Height(); i++)
411
+ AddRowConjTransToVector (i, TSCAL(s)*fx(i), fy);
412
+ else
413
+ throw Exception("MultConjTransAdd(complex) called for real matrix");
414
+ }
415
+
416
+ template <class TM, class TV_ROW, class TV_COL>
417
+ void SparseMatrix<TM,TV_ROW,TV_COL> ::
418
+ MultAdd (FlatVector<double> alpha, const MultiVector & x, MultiVector & y) const
419
+ {
420
+ BaseMatrix::MultAdd (alpha, x, y);
421
+ }
422
+
423
+
424
+
425
+ template <class TM, class TV_ROW, class TV_COL>
426
+ void SparseMatrix<TM,TV_ROW,TV_COL> :: DoArchive (Archive & ar)
427
+ {
428
+ ar & this->size;
429
+ ar & this->width;
430
+ ar & this->nze;
431
+ ar & firsti;
432
+ ar & colnr;
433
+ ar & data;
434
+ cout << "sparsemat, doarch, sizeof (firstint) = " << firsti.Size() << endl;
435
+ }
436
+
437
+
438
+
439
+
440
+
441
+ template <class TM, class TV_ROW, class TV_COL>
442
+ shared_ptr<BaseMatrix> SparseMatrix<TM,TV_ROW,TV_COL> ::
443
+ InverseMatrix (shared_ptr<BitArray> subset) const
444
+ {
445
+ return CreateSparseMatrixInverse(dynamic_pointer_cast<const BaseSparseMatrix>(this->shared_from_this()), subset, nullptr);
446
+ }
447
+
448
+ // template <class TM>
449
+ // BaseMatrix * SparseMatrix<TM> ::
450
+
451
+ template <class TM, class TV_ROW, class TV_COL>
452
+ shared_ptr<BaseMatrix> SparseMatrix<TM,TV_ROW,TV_COL> ::
453
+ InverseMatrix (shared_ptr<const Array<int>> clusters) const
454
+ {
455
+ return CreateSparseMatrixInverse(dynamic_pointer_cast<const BaseSparseMatrix>(this->shared_from_this()), nullptr, clusters);
456
+ }
457
+
458
+ template <class TM, class TV_ROW, class TV_COL>
459
+ shared_ptr<BaseMatrix> SparseMatrix<TM,TV_ROW,TV_COL> ::
460
+ DeleteZeroElements(double tol) const
461
+ {
462
+ static Timer t("SparseMatrix::DeleteZeroElements"); RegionTimer reg(t);
463
+ Array<int> indi, indj;
464
+ Array<TM> val;
465
+ for (auto i : Range(this->Height()))
466
+ {
467
+ for (auto j : Range(firsti[i], firsti[i+1]))
468
+ {
469
+ if (ngbla::L2Norm2(data[j]) > tol*tol)
470
+ {
471
+ indi.Append (i);
472
+ indj.Append (colnr[j]);
473
+ val.Append (data[j]);
474
+ }
475
+ }
476
+ }
477
+ return this->CreateFromCOO(indi, indj, val, this->Height(), this->Width());
478
+
479
+ /*
480
+ // needs parallelization and testing
481
+ Array<int> cnt(this->Height());
482
+ cnt = 0;
483
+ size_t cnt0 = 0;
484
+ for (auto i : Range(this->Height()))
485
+ for (auto j : Range(firsti[i], firsti[i+1]))
486
+ if (ngbla::L2Norm2(data[j]) > tol*tol)
487
+ cnt[i]++;
488
+ else
489
+ cnt0++;
490
+
491
+ cout << "zero-els = " << cnt0 << endl;
492
+
493
+
494
+ auto matrix = make_shared<SparseMatrix<TM>> (cnt, this->Width());
495
+
496
+ for (auto i : Range(this->Height()))
497
+ for (auto j : Range(firsti[i], firsti[i+1]))
498
+ if (ngbla::L2Norm2(data[j]) > tol*tol)
499
+ (*matrix)(i, colnr[i]) = data[j];
500
+
501
+ return matrix;
502
+ */
503
+ }
504
+
505
+ template <class TM>
506
+ ostream & SparseMatrixTM<TM> ::
507
+ Print (ostream & ost) const
508
+ {
509
+ for (int i = 0; i < size; i++)
510
+ {
511
+ ost << "Row " << i << ":";
512
+
513
+ for (size_t j = firsti[i]; j < firsti[i+1]; j++)
514
+ ost << " " << colnr[j] << ": " << data[j];
515
+ ost << "\n";
516
+ }
517
+ return ost;
518
+ }
519
+
520
+
521
+ template <class TM>
522
+ Array<MemoryUsage> SparseMatrixTM<TM> ::
523
+ GetMemoryUsage () const
524
+ {
525
+ Array<MemoryUsage> mu;
526
+ mu += { "SparseMatrix", nze*sizeof(TM), 1 };
527
+ if (owner) mu += MatrixGraph::GetMemoryUsage ();
528
+ return mu;
529
+ }
530
+
531
+
532
+ template <class TM> AutoVector SparseMatrixTM<TM> :: CreateVector () const
533
+ { throw Exception("SparseMatrixTM::CreateVector"); }
534
+
535
+ template <class TM> AutoVector SparseMatrixTM<TM> :: CreateRowVector () const
536
+ { throw Exception("SparseMatrixTM::CreateRowVector"); }
537
+
538
+ template <class TM> AutoVector SparseMatrixTM<TM> :: CreateColVector () const
539
+ { throw Exception("SparseMatrixTM::CreateColVector"); }
540
+
541
+
542
+ template <class TM, class TV_ROW, class TV_COL>
543
+ shared_ptr<BaseMatrix> SparseMatrix<TM,TV_ROW,TV_COL> ::
544
+ CreateMatrix () const
545
+ {
546
+ return make_shared<SparseMatrix> (*this);
547
+ }
548
+
549
+ template <class TM, class TV_ROW, class TV_COL>
550
+ AutoVector SparseMatrix<TM,TV_ROW,TV_COL> ::
551
+ CreateVector () const
552
+ {
553
+ if (this->size==this->width)
554
+ return make_unique<VVector<TVY>> (this->size);
555
+ throw Exception ("SparseMatrix::CreateVector for rectangular does not make sense, use either CreateColVector or CreateRowVector");
556
+ }
557
+
558
+ template <class TM, class TV_ROW, class TV_COL>
559
+ AutoVector SparseMatrix<TM,TV_ROW,TV_COL> ::
560
+ CreateRowVector () const
561
+ {
562
+ return make_unique<VVector<TVX>> (this->width);
563
+ }
564
+
565
+ template <class TM, class TV_ROW, class TV_COL>
566
+ AutoVector SparseMatrix<TM,TV_ROW,TV_COL> ::
567
+ CreateColVector () const
568
+ {
569
+ return make_unique<VVector<TVY>> (this->size);
570
+ }
571
+
572
+
573
+ template<class TM, class TV_ROW, class TV_COL>
574
+ shared_ptr<BaseSparseMatrix>
575
+ SparseMatrix<TM,TV_ROW,TV_COL> :: Restrict (const SparseMatrixTM<double> & prol,
576
+ shared_ptr<BaseSparseMatrix> acmat ) const
577
+ {
578
+ static Timer t ("sparsematrix - restrict");
579
+ static Timer tbuild ("sparsematrix - restrict, build matrix");
580
+ static Timer tcomp ("sparsematrix - restrict, compute matrix");
581
+ RegionTimer reg(t);
582
+
583
+ int n = this->Height();
584
+
585
+ auto cmat = dynamic_pointer_cast<SparseMatrixTM<TM>> (acmat);
586
+
587
+ // if no coarse matrix, build up matrix-graph!
588
+ if ( !cmat )
589
+ {
590
+ RegionTimer reg(tbuild);
591
+
592
+ Array<int> marks(n);
593
+ Array<IVec<2> > e2v;
594
+ for (int i = 0; i < n; i++)
595
+ for (int j = 0; j < this->GetRowIndices(i).Size(); j++)
596
+ {
597
+ int col = this->GetRowIndices(i)[j];
598
+ FlatArray<ColIdx> prol_rowind = prol.GetRowIndices(i);
599
+ FlatArray<ColIdx> prol_colind = prol.GetRowIndices(col);
600
+
601
+ for (int k = 0; k < prol_rowind.Size(); k++)
602
+ for (int l = 0; l < prol_colind.Size(); l++)
603
+ {
604
+ int kk = prol_rowind[k];
605
+ int ll = prol_colind[l];
606
+
607
+ // if (kk >= ll) swap (kk,ll);
608
+ e2v.Append (IVec<2> (kk,ll));
609
+ }
610
+ }
611
+
612
+ int nc = 0;
613
+ for (int i = 0; i < e2v.Size(); i++)
614
+ nc = max2 (nc, e2v[i][1]);
615
+ nc++;
616
+
617
+ // *testout << "e2v = " << endl << e2v << endl;
618
+
619
+ // count all entries in row with multiplicity
620
+ Array<int> cnt(nc);
621
+ cnt = 0;
622
+ for (int i = 0; i < e2v.Size(); i++)
623
+ cnt[e2v[i][1]]++;
624
+
625
+ Table<int> v2e(cnt);
626
+ cnt = 0;
627
+ for (int i = 0; i < e2v.Size(); i++)
628
+ {
629
+ int v1 = e2v[i][1];
630
+ v2e[v1][cnt[v1]++] = i;
631
+ }
632
+
633
+ cnt = 0;
634
+ marks = -1;
635
+
636
+ // count all entries in row withOUT multiplicity
637
+ for (int i = 0; i < nc; i++)
638
+ for (int j = 0; j < v2e[i].Size(); j++)
639
+ {
640
+ int jj = v2e[i][j];
641
+ int v0 = e2v[jj][0];
642
+ if (marks[v0] != i)
643
+ {
644
+ cnt[i]++;
645
+ marks[v0] = i;
646
+ }
647
+ }
648
+
649
+ cmat = make_shared<SparseMatrix<TM,TV_ROW,TV_COL>> (cnt);
650
+
651
+ marks = -1;
652
+ for (int i = 0; i < nc; i++)
653
+ for (int j = 0; j < v2e[i].Size(); j++)
654
+ {
655
+ int jj = v2e[i][j];
656
+ int v0 = e2v[jj][0];
657
+ if (marks[v0] != i)
658
+ {
659
+ marks[v0] = i;
660
+ cmat -> CreatePosition (i, v0);
661
+ }
662
+ }
663
+ }
664
+
665
+ cmat->AsVector() = 0.0;
666
+ RegionTimer reg2(tcomp);
667
+
668
+ for (int i = 0; i < n; i++)
669
+ {
670
+ FlatArray mat_ri = this->GetRowIndices(i);
671
+ FlatVector<TM> mat_rval = this->GetRowValues(i);
672
+
673
+ for (int j = 0; j < mat_ri.Size(); j++)
674
+ {
675
+ int col = mat_ri[j];
676
+ TM mat_val = mat_rval[j];
677
+
678
+ FlatArray prol_ri_i = prol.GetRowIndices(i);
679
+ FlatArray prol_ri_col = prol.GetRowIndices(col);
680
+ FlatVector<double> prol_rval_i = prol.GetRowValues(i);
681
+ FlatVector<double> prol_rval_col = prol.GetRowValues(col);
682
+
683
+ for (int k = 0; k < prol_ri_i.Size(); k++)
684
+ for (int l = 0; l < prol_ri_col.Size(); l++)
685
+ {
686
+ int kk = prol_ri_i[k];
687
+ int ll = prol_ri_col[l];
688
+
689
+ if ( /*kk>=ll &&*/ kk < cmat->Height() )
690
+ {
691
+ (*cmat)(kk,ll) +=
692
+ prol_rval_i[k] * prol_rval_col[l] * mat_val;
693
+ }
694
+
695
+ // if (ll >= kk && i != col && ll < cmat->Height() )
696
+ // {
697
+ // (*cmat)(ll,kk) +=
698
+ // prol_rval_col[l] * prol_rval_i[k] * Trans(mat_val);
699
+ // }
700
+
701
+ }
702
+ }
703
+ }
704
+ return cmat;
705
+ }
706
+
707
+
708
+
709
+ template <class TM, class TV_ROW, class TV_COL>
710
+ shared_ptr<BaseSparseMatrix> SparseMatrix<TM, TV_ROW, TV_COL> ::
711
+ Reorder (const Array<size_t> & reorder) const
712
+ {
713
+ Array<size_t> inv_reorder(reorder.Size());
714
+ for (size_t i : Range(reorder))
715
+ inv_reorder[reorder[i]] = i;
716
+
717
+ Array<int> cnt(this->Height());
718
+ for (size_t i : Range(cnt))
719
+ cnt[i] = this->GetRowIndices(reorder[i]).Size();
720
+ auto newmat = make_shared<SparseMatrix>(cnt);
721
+ for (size_t i : Range(cnt))
722
+ for (auto col : this->GetRowIndices(reorder[i]))
723
+ newmat->CreatePosition(i, inv_reorder[col]);
724
+
725
+ for (size_t i : Range(cnt))
726
+ for (auto col : this->GetRowIndices(reorder[i]))
727
+ (*newmat)(i, inv_reorder[col]) = (*this)(reorder[i], col);
728
+
729
+ return newmat;
730
+ }
731
+
732
+
733
+ template <class TM>
734
+ shared_ptr<BaseSparseMatrix> SparseMatrixTM<TM> ::
735
+ CreateTransposeTM (const function<shared_ptr<SparseMatrixTM<decltype(ngbla::Trans(TM()))>>(const Array<int>&,int)> & creator) const
736
+ {
737
+ Array<int> cnt(this->Width());
738
+ cnt = 0;
739
+ ParallelFor (this->Height(), [&] (int i)
740
+ {
741
+ for (int c : this->GetRowIndices(i))
742
+ AsAtomic (cnt[c]) ++;
743
+ });
744
+
745
+ auto trans = creator(cnt, this->Height());
746
+
747
+ cnt = 0;
748
+ ParallelFor (this->Height(), [&] (int i)
749
+ {
750
+ for (int ci : Range(this->GetRowIndices(i)))
751
+ {
752
+ int c = this->GetRowIndices(i)[ci];
753
+ int pos = AsAtomic(cnt[c])++;
754
+ trans -> GetRowIndices(c)[pos] = i;
755
+ trans -> GetRowValues(c)[pos] = Trans(this->GetRowValues(i)[ci]);
756
+ }
757
+ });
758
+
759
+ ParallelFor (trans->Height(), [&] (int r)
760
+ {
761
+ auto rowvals = trans->GetRowValues(r);
762
+ BubbleSort (trans->GetRowIndices(r),
763
+ FlatArray(rowvals.Size(), rowvals.Data()));
764
+ });
765
+
766
+ return trans;
767
+ }
768
+
769
+
770
+
771
+
772
+ template <class TM>
773
+ void SparseMatrixTM<TM> ::
774
+ AddElementMatrixSymmetric(FlatArray<int> dnums, BareSliceMatrix<TSCAL> elmat1, bool use_atomic)
775
+ {
776
+ static Timer timer_addelmat("SparseMatrixSymmetric::AddElementMatrix", NoTracing);
777
+ // static Timer timer ("SparseMatrixSymmetric::AddElementMatrix", NoTracing);
778
+ // RegionTimer reg (timer);
779
+ RegionTimer reg (timer_addelmat);
780
+ NgProfiler::AddThreadFlops (timer_addelmat, TaskManager::GetThreadId(), dnums.Size()*(dnums.Size()+1)/2);
781
+
782
+ // ArrayMem<int, 50> map(dnums.Size());
783
+ STACK_ARRAY(int, hmap, dnums.Size());
784
+ FlatArray<int> map(dnums.Size(), hmap);
785
+
786
+ {
787
+ for (int i = 0; i < dnums.Size(); i++) map[i] = i;
788
+ QuickSortI (dnums, map);
789
+ }
790
+
791
+ STACK_ARRAY(int, dnumsmap, dnums.Size());
792
+ for (int i = 0; i < dnums.Size(); i++)
793
+ dnumsmap[i] = dnums[map[i]];
794
+
795
+ Scalar2ElemMatrix<TM, TSCAL> elmat (elmat1);
796
+ // .AddSize(mat_traits<TM>::HEIGHT*dnums.Size(),
797
+ // mat_traits<TM>::WIDTH*dnums.Size()));
798
+
799
+ int first_used = 0;
800
+ while (first_used < dnums.Size() && !IsRegularIndex(dnums[map[first_used]]) ) first_used++;
801
+
802
+ if (use_atomic)
803
+ for (int i1 = first_used; i1 < dnums.Size(); i1++)
804
+ {
805
+ // FlatArray<int> rowind = this->GetRowIndices(dnums[map[i1]]);
806
+ // FlatVector<TM> rowvals = this->GetRowValues(dnums[map[i1]]);
807
+ FlatArray rowind = this->GetRowIndices(dnumsmap[i1]);
808
+ FlatVector<TM> rowvals = this->GetRowValues(dnumsmap[i1]);
809
+ auto elmat_row = elmat.Rows(map[i1], map[i1]+1);
810
+
811
+ size_t k = 0;
812
+ for (int j1 = first_used; j1 <= i1; j1++, k++)
813
+ {
814
+ // while (rowind[k] != dnums[map[j1]])
815
+ while (rowind[k] != dnumsmap[j1])
816
+ {
817
+ k++;
818
+ if (k >= rowind.Size())
819
+ throw Exception ("SparseMatrixSymmetricTM::AddElementMatrix: illegal dnums");
820
+ }
821
+ AtomicAdd (rowvals(k), elmat_row(0, map[j1]));
822
+ }
823
+ }
824
+ else
825
+ {
826
+ if (first_used+1 < dnums.Size())
827
+ {
828
+ this->PrefetchRow(dnums[map[first_used+1]]);
829
+ // _mm_prefetch (reinterpret_cast<void*>(&this->GetRowIndices(dnums[map[first_used+1]])[0]), _MM_HINT_T2);
830
+ // _mm_prefetch (reinterpret_cast<void*>(&this->GetRowValues(dnums[map[first_used+1]])[0]), _MM_HINT_T2);
831
+ }
832
+
833
+ for (int i1 = first_used; i1 < dnums.Size(); i1++)
834
+ {
835
+ if (i1+2 < dnums.Size())
836
+ this->PrefetchRow(dnums[map[i1+2]]);
837
+
838
+ // FlatArray<int> rowind = this->GetRowIndices(dnums[map[i1]]);
839
+ // FlatVector<TM> rowvals = this->GetRowValues(dnums[map[i1]]);
840
+ FlatArray rowind = this->GetRowIndices(dnumsmap[i1]);
841
+ FlatVector<TM> rowvals = this->GetRowValues(dnumsmap[i1]);
842
+ auto elmat_row = elmat.Rows(map[i1], map[i1]+1);
843
+
844
+ size_t k = 0;
845
+ for (int j1 = first_used; j1 <= i1; j1++, k++)
846
+ {
847
+ // while (rowind[k] != dnums[map[j1]])
848
+ while (rowind[k] != dnumsmap[j1])
849
+ {
850
+ k++;
851
+ if (unlikely(k >= rowind.Size()))
852
+ throw Exception ("SparseMatrixSymmetricTM::AddElementMatrix: illegal dnums");
853
+ }
854
+ rowvals(k) += elmat_row(0, map[j1]);
855
+ }
856
+ }
857
+ }
858
+ }
859
+
860
+
861
+ template <class TM, class TV>
862
+ SparseMatrixSymmetric<TM,TV> ::
863
+ SparseMatrixSymmetric (const MatrixGraph & agraph)
864
+ // : SparseMatrixTM<TM> (agraph, stealgraph),
865
+ // SparseMatrixSymmetricTM<TM> (agraph, stealgraph),
866
+ : SparseMatrix<TM,TV,TV> (agraph)
867
+ { ; }
868
+
869
+ template <class TM, class TV>
870
+ SparseMatrixSymmetric<TM,TV> ::
871
+ SparseMatrixSymmetric (MatrixGraph && agraph)
872
+ // : SparseMatrixTM<TM> (agraph, stealgraph),
873
+ // SparseMatrixSymmetricTM<TM> (agraph, stealgraph),
874
+ : SparseMatrix<TM,TV,TV> (std::move(agraph))
875
+ { ; }
876
+
877
+
878
+ template <class TM, class TV>
879
+ SparseMatrixSymmetric<TM,TV> :: ~SparseMatrixSymmetric ()
880
+ {
881
+ ;
882
+ }
883
+
884
+
885
+ template <class TM, class TV>
886
+ shared_ptr<BaseJacobiPrecond>
887
+ SparseMatrixSymmetric<TM,TV> :: CreateJacobiPrecond (shared_ptr<BitArray> inner) const
888
+ {
889
+ return make_shared<JacobiPrecondSymmetric<TM,TV>> ( dynamic_pointer_cast<SparseMatrixSymmetric>
890
+ (const_cast<SparseMatrixSymmetric*>(this)->shared_from_this()), inner);
891
+ }
892
+
893
+
894
+ template <class TM, class TV>
895
+ shared_ptr<BaseBlockJacobiPrecond>
896
+ SparseMatrixSymmetric<TM,TV> :: CreateBlockJacobiPrecond (shared_ptr<Table<int>> blocks,
897
+ const BaseVector * constraint,
898
+ bool parallel,
899
+ shared_ptr<BitArray> freedofs) const
900
+ {
901
+ // return make_shared<BlockJacobiPrecondSymmetric<TM,TV>> (*this, blocks);
902
+ return make_shared<BlockJacobiPrecondSymmetric<TM,TV>>
903
+ ( dynamic_pointer_cast<const SparseMatrixSymmetric>
904
+ (this->shared_from_this()),
905
+ blocks);
906
+ }
907
+
908
+
909
+
910
+
911
+
912
+ template <class TM, class TV>
913
+ void SparseMatrixSymmetric<TM,TV> ::
914
+ MultAdd (double s, const BaseVector & x, BaseVector & y) const
915
+ {
916
+ static Timer timer("SparseMatrixSymmetric::MultAdd");
917
+ RegionTimer reg (timer);
918
+ timer.AddFlops (2*this->nze);
919
+
920
+ const FlatVector<TV_ROW> fx = x.FV<TV_ROW>();
921
+ FlatVector<TV_COL> fy = y.FV<TV_COL>();
922
+
923
+ for (int i = 0; i < this->Height(); i++)
924
+ {
925
+ fy(i) += s * RowTimesVector (i, fx);
926
+ AddRowTransToVectorNoDiag (i, s * fx(i), fy);
927
+ }
928
+ }
929
+
930
+ template <class TM, class TV>
931
+ void SparseMatrixSymmetric<TM,TV> ::
932
+ MultAdd1 (double s, const BaseVector & x, BaseVector & y,
933
+ const BitArray * inner,
934
+ const Array<int> * cluster) const
935
+ {
936
+ const FlatVector<TV_ROW> fx = x.FV<TV_ROW> ();
937
+ FlatVector<TV_COL> fy = y.FV<TV_COL> ();
938
+
939
+ if (inner)
940
+ {
941
+ static Timer timer("SparseMatrixSymmetric::MultAdd1 - inner");
942
+ RegionTimer reg (timer);
943
+
944
+ for (int i = 0; i < this->Height(); i++)
945
+ if (inner->Test(i))
946
+ fy(i) += s * RowTimesVectorNoDiag (i, fx);
947
+ }
948
+ else if (cluster)
949
+ {
950
+ static Timer timer("SparseMatrixSymmetric::MultAdd1 - cluster");
951
+ RegionTimer reg (timer);
952
+
953
+ for (int i = 0; i < this->Height(); i++)
954
+ if ( (*cluster)[i])
955
+ fy(i) += s * RowTimesVectorNoDiag (i, fx);
956
+ }
957
+ else
958
+ {
959
+ static Timer timer("SparseMatrixSymmetric::MultAdd1");
960
+ RegionTimer reg (timer);
961
+
962
+
963
+ for (int i = 0; i < this->Height(); i++)
964
+ fy(i) += s * RowTimesVectorNoDiag (i, fx);
965
+ }
966
+ }
967
+
968
+
969
+ template <class TM, class TV>
970
+ void SparseMatrixSymmetric<TM,TV> ::
971
+ MultAdd2 (double s, const BaseVector & x, BaseVector & y,
972
+ const BitArray * inner,
973
+ const Array<int> * cluster) const
974
+ {
975
+ static Timer timer("SparseMatrixSymmetric::MultAdd2");
976
+ RegionTimer reg (timer);
977
+ timer.AddFlops (this->NZE());
978
+
979
+ const FlatVector<TV_ROW> fx = x.FV<TV_ROW> ();
980
+ FlatVector<TV_COL> fy = y.FV<TV_COL> ();
981
+
982
+ if (inner)
983
+ {
984
+ for (int i = 0; i < this->Height(); i++)
985
+ if (inner->Test(i))
986
+ AddRowTransToVector (i, s * fx(i), fy);
987
+ }
988
+ else if (cluster)
989
+ {
990
+ for (int i = 0; i < this->Height(); i++)
991
+ if ( (*cluster)[i])
992
+ AddRowTransToVector (i, s * fx(i), fy);
993
+ }
994
+ else
995
+ for (int i = 0; i < this->Height(); i++)
996
+ AddRowTransToVector (i, s * fx(i), fy);
997
+ }
998
+
999
+
1000
+
1001
+ template <class TM, class TV>
1002
+ BaseSparseMatrix & SparseMatrixSymmetric<TM,TV> ::
1003
+ AddMerge (double s, const SparseMatrixSymmetric & m2)
1004
+ {
1005
+ for (int i = 0; i < m2.Height(); i++)
1006
+ for (int j = 0; j < m2.GetRowIndices(i).Size(); j++)
1007
+ (*this)(i, m2.GetRowIndices(i)[j]) += s * m2(i, m2.GetRowIndices(i)[j]);
1008
+ return *this;
1009
+ }
1010
+
1011
+ }
1012
+
1013
+ #endif