ngsolve 6.2.2506.post74.dev0__cp314-cp314-macosx_10_15_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ngsolve might be problematic. Click here for more details.

Files changed (315) hide show
  1. netgen/include/analytic_integrals.hpp +10 -0
  2. netgen/include/arnoldi.hpp +55 -0
  3. netgen/include/bandmatrix.hpp +334 -0
  4. netgen/include/basematrix.hpp +957 -0
  5. netgen/include/basevector.hpp +1268 -0
  6. netgen/include/bdbequations.hpp +2805 -0
  7. netgen/include/bdbintegrator.hpp +1660 -0
  8. netgen/include/bem_diffops.hpp +475 -0
  9. netgen/include/bessel.hpp +1064 -0
  10. netgen/include/bilinearform.hpp +963 -0
  11. netgen/include/bla.hpp +29 -0
  12. netgen/include/blockalloc.hpp +95 -0
  13. netgen/include/blockjacobi.hpp +328 -0
  14. netgen/include/bspline.hpp +116 -0
  15. netgen/include/calcinverse.hpp +141 -0
  16. netgen/include/cg.hpp +368 -0
  17. netgen/include/chebyshev.hpp +44 -0
  18. netgen/include/cholesky.hpp +720 -0
  19. netgen/include/clapack.h +7254 -0
  20. netgen/include/code_generation.hpp +296 -0
  21. netgen/include/coefficient.hpp +2033 -0
  22. netgen/include/coefficient_impl.hpp +19 -0
  23. netgen/include/coefficient_stdmath.hpp +167 -0
  24. netgen/include/commutingAMG.hpp +106 -0
  25. netgen/include/comp.hpp +79 -0
  26. netgen/include/compatibility.hpp +41 -0
  27. netgen/include/complex_wrapper.hpp +73 -0
  28. netgen/include/compressedfespace.hpp +110 -0
  29. netgen/include/contact.hpp +235 -0
  30. netgen/include/diagonalmatrix.hpp +154 -0
  31. netgen/include/differentialoperator.hpp +276 -0
  32. netgen/include/diffop.hpp +1286 -0
  33. netgen/include/diffop_impl.hpp +328 -0
  34. netgen/include/diffopwithfactor.hpp +123 -0
  35. netgen/include/discontinuous.hpp +84 -0
  36. netgen/include/dump.hpp +949 -0
  37. netgen/include/ectypes.hpp +121 -0
  38. netgen/include/eigen.hpp +60 -0
  39. netgen/include/eigensystem.hpp +18 -0
  40. netgen/include/elasticity_equations.hpp +595 -0
  41. netgen/include/elementbyelement.hpp +195 -0
  42. netgen/include/elementtopology.hpp +1760 -0
  43. netgen/include/elementtransformation.hpp +339 -0
  44. netgen/include/evalfunc.hpp +405 -0
  45. netgen/include/expr.hpp +1686 -0
  46. netgen/include/facetfe.hpp +175 -0
  47. netgen/include/facetfespace.hpp +180 -0
  48. netgen/include/facethofe.hpp +111 -0
  49. netgen/include/facetsurffespace.hpp +112 -0
  50. netgen/include/fe_interfaces.hpp +32 -0
  51. netgen/include/fem.hpp +87 -0
  52. netgen/include/fesconvert.hpp +14 -0
  53. netgen/include/fespace.hpp +1449 -0
  54. netgen/include/finiteelement.hpp +286 -0
  55. netgen/include/globalinterfacespace.hpp +77 -0
  56. netgen/include/globalspace.hpp +115 -0
  57. netgen/include/gridfunction.hpp +525 -0
  58. netgen/include/h1amg.hpp +124 -0
  59. netgen/include/h1hofe.hpp +188 -0
  60. netgen/include/h1hofe_impl.hpp +1262 -0
  61. netgen/include/h1hofefo.hpp +148 -0
  62. netgen/include/h1hofefo_impl.hpp +185 -0
  63. netgen/include/h1hofespace.hpp +167 -0
  64. netgen/include/h1lofe.hpp +1240 -0
  65. netgen/include/h1lumping.hpp +41 -0
  66. netgen/include/hcurl_equations.hpp +1381 -0
  67. netgen/include/hcurlcurlfe.hpp +2241 -0
  68. netgen/include/hcurlcurlfespace.hpp +78 -0
  69. netgen/include/hcurlfe.hpp +259 -0
  70. netgen/include/hcurlfe_utils.hpp +107 -0
  71. netgen/include/hcurlhdiv_dshape.hpp +857 -0
  72. netgen/include/hcurlhdivfes.hpp +308 -0
  73. netgen/include/hcurlhofe.hpp +175 -0
  74. netgen/include/hcurlhofe_impl.hpp +1871 -0
  75. netgen/include/hcurlhofespace.hpp +193 -0
  76. netgen/include/hcurllofe.hpp +1146 -0
  77. netgen/include/hdiv_equations.hpp +880 -0
  78. netgen/include/hdivdivfe.hpp +2923 -0
  79. netgen/include/hdivdivsurfacespace.hpp +76 -0
  80. netgen/include/hdivfe.hpp +206 -0
  81. netgen/include/hdivfe_utils.hpp +717 -0
  82. netgen/include/hdivfes.hpp +75 -0
  83. netgen/include/hdivhofe.hpp +447 -0
  84. netgen/include/hdivhofe_impl.hpp +1107 -0
  85. netgen/include/hdivhofefo.hpp +229 -0
  86. netgen/include/hdivhofespace.hpp +177 -0
  87. netgen/include/hdivhosurfacefespace.hpp +106 -0
  88. netgen/include/hdivlofe.hpp +773 -0
  89. netgen/include/hidden.hpp +74 -0
  90. netgen/include/householder.hpp +181 -0
  91. netgen/include/hypre_ams_precond.hpp +123 -0
  92. netgen/include/hypre_precond.hpp +73 -0
  93. netgen/include/integrator.hpp +2012 -0
  94. netgen/include/integratorcf.hpp +253 -0
  95. netgen/include/interpolate.hpp +49 -0
  96. netgen/include/intrule.hpp +2542 -0
  97. netgen/include/intrules_SauterSchwab.hpp +25 -0
  98. netgen/include/irspace.hpp +49 -0
  99. netgen/include/jacobi.hpp +153 -0
  100. netgen/include/kernels.hpp +762 -0
  101. netgen/include/l2hofe.hpp +194 -0
  102. netgen/include/l2hofe_impl.hpp +564 -0
  103. netgen/include/l2hofefo.hpp +542 -0
  104. netgen/include/l2hofespace.hpp +344 -0
  105. netgen/include/la.hpp +38 -0
  106. netgen/include/linearform.hpp +266 -0
  107. netgen/include/matrix.hpp +2140 -0
  108. netgen/include/memusage.hpp +41 -0
  109. netgen/include/meshaccess.hpp +1359 -0
  110. netgen/include/mgpre.hpp +204 -0
  111. netgen/include/mp_coefficient.hpp +145 -0
  112. netgen/include/mptools.hpp +2281 -0
  113. netgen/include/multigrid.hpp +42 -0
  114. netgen/include/multivector.hpp +447 -0
  115. netgen/include/mumpsinverse.hpp +187 -0
  116. netgen/include/mycomplex.hpp +361 -0
  117. netgen/include/ng_lapack.hpp +1661 -0
  118. netgen/include/ngblas.hpp +1232 -0
  119. netgen/include/ngs_defines.hpp +30 -0
  120. netgen/include/ngs_stdcpp_include.hpp +106 -0
  121. netgen/include/ngs_utils.hpp +121 -0
  122. netgen/include/ngsobject.hpp +1019 -0
  123. netgen/include/ngsstream.hpp +113 -0
  124. netgen/include/ngstd.hpp +72 -0
  125. netgen/include/nodalhofe.hpp +96 -0
  126. netgen/include/nodalhofe_impl.hpp +141 -0
  127. netgen/include/normalfacetfe.hpp +223 -0
  128. netgen/include/normalfacetfespace.hpp +98 -0
  129. netgen/include/normalfacetsurfacefespace.hpp +84 -0
  130. netgen/include/order.hpp +251 -0
  131. netgen/include/parallel_matrices.hpp +222 -0
  132. netgen/include/paralleldofs.hpp +340 -0
  133. netgen/include/parallelngs.hpp +23 -0
  134. netgen/include/parallelvector.hpp +269 -0
  135. netgen/include/pardisoinverse.hpp +200 -0
  136. netgen/include/periodic.hpp +129 -0
  137. netgen/include/plateaufespace.hpp +25 -0
  138. netgen/include/pml.hpp +275 -0
  139. netgen/include/pmltrafo.hpp +631 -0
  140. netgen/include/postproc.hpp +142 -0
  141. netgen/include/potentialtools.hpp +22 -0
  142. netgen/include/precomp.hpp +60 -0
  143. netgen/include/preconditioner.hpp +602 -0
  144. netgen/include/prolongation.hpp +377 -0
  145. netgen/include/python_comp.hpp +107 -0
  146. netgen/include/python_fem.hpp +89 -0
  147. netgen/include/python_linalg.hpp +58 -0
  148. netgen/include/python_ngstd.hpp +386 -0
  149. netgen/include/recursive_pol.hpp +4896 -0
  150. netgen/include/recursive_pol_tet.hpp +395 -0
  151. netgen/include/recursive_pol_trig.hpp +492 -0
  152. netgen/include/reorderedfespace.hpp +81 -0
  153. netgen/include/sample_sort.hpp +105 -0
  154. netgen/include/scalarfe.hpp +335 -0
  155. netgen/include/shapefunction_utils.hpp +113 -0
  156. netgen/include/simd_complex.hpp +329 -0
  157. netgen/include/smoother.hpp +253 -0
  158. netgen/include/solve.hpp +89 -0
  159. netgen/include/sparsecholesky.hpp +313 -0
  160. netgen/include/sparsematrix.hpp +1038 -0
  161. netgen/include/sparsematrix_dyn.hpp +90 -0
  162. netgen/include/sparsematrix_impl.hpp +1013 -0
  163. netgen/include/special_matrix.hpp +463 -0
  164. netgen/include/specialelement.hpp +125 -0
  165. netgen/include/statushandler.hpp +33 -0
  166. netgen/include/stringops.hpp +12 -0
  167. netgen/include/superluinverse.hpp +136 -0
  168. netgen/include/symbolicintegrator.hpp +850 -0
  169. netgen/include/symmetricmatrix.hpp +144 -0
  170. netgen/include/tangentialfacetfe.hpp +224 -0
  171. netgen/include/tangentialfacetfespace.hpp +91 -0
  172. netgen/include/tensor.hpp +522 -0
  173. netgen/include/tensorcoefficient.hpp +446 -0
  174. netgen/include/tensorproductintegrator.hpp +113 -0
  175. netgen/include/thcurlfe.hpp +128 -0
  176. netgen/include/thcurlfe_impl.hpp +380 -0
  177. netgen/include/thdivfe.hpp +80 -0
  178. netgen/include/thdivfe_impl.hpp +492 -0
  179. netgen/include/tpdiffop.hpp +461 -0
  180. netgen/include/tpfes.hpp +133 -0
  181. netgen/include/tpintrule.hpp +224 -0
  182. netgen/include/triangular.hpp +465 -0
  183. netgen/include/tscalarfe.hpp +245 -0
  184. netgen/include/tscalarfe_impl.hpp +1029 -0
  185. netgen/include/umfpackinverse.hpp +148 -0
  186. netgen/include/vector.hpp +1273 -0
  187. netgen/include/voxelcoefficientfunction.hpp +41 -0
  188. netgen/include/vtkoutput.hpp +198 -0
  189. netgen/include/vvector.hpp +208 -0
  190. netgen/include/webgui.hpp +92 -0
  191. netgen/libngbla.dylib +0 -0
  192. netgen/libngcomp.dylib +0 -0
  193. netgen/libngfem.dylib +0 -0
  194. netgen/libngla.dylib +0 -0
  195. netgen/libngsbem.dylib +0 -0
  196. netgen/libngsolve.dylib +0 -0
  197. netgen/libngstd.dylib +0 -0
  198. ngsolve/TensorProductTools.py +210 -0
  199. ngsolve/__console.py +94 -0
  200. ngsolve/__expr.py +181 -0
  201. ngsolve/__init__.py +148 -0
  202. ngsolve/__init__.pyi +233 -0
  203. ngsolve/_scikit_build_core_dependencies.py +30 -0
  204. ngsolve/bla.pyi +1153 -0
  205. ngsolve/bvp.py +78 -0
  206. ngsolve/bvp.pyi +32 -0
  207. ngsolve/cmake/NGSolveConfig.cmake +102 -0
  208. ngsolve/cmake/ngsolve-targets-release.cmake +79 -0
  209. ngsolve/cmake/ngsolve-targets.cmake +163 -0
  210. ngsolve/comp/__init__.pyi +5449 -0
  211. ngsolve/comp/pml.pyi +89 -0
  212. ngsolve/config/__init__.py +1 -0
  213. ngsolve/config/__init__.pyi +43 -0
  214. ngsolve/config/__main__.py +4 -0
  215. ngsolve/config/config.py +60 -0
  216. ngsolve/config/config.pyi +45 -0
  217. ngsolve/demos/TensorProduct/__init__.py +0 -0
  218. ngsolve/demos/TensorProduct/tp_dg_1d_1d.py +80 -0
  219. ngsolve/demos/TensorProduct/tp_dg_1d_2d.py +73 -0
  220. ngsolve/demos/TensorProduct/tp_dg_2d_1d.py +72 -0
  221. ngsolve/demos/TensorProduct/tp_dg_2d_2d.py +66 -0
  222. ngsolve/demos/__init__.py +0 -0
  223. ngsolve/demos/howto/__init__.py +0 -0
  224. ngsolve/demos/howto/hhj.py +44 -0
  225. ngsolve/demos/howto/hybrid_dg.py +53 -0
  226. ngsolve/demos/howto/mixed.py +30 -0
  227. ngsolve/demos/howto/nonlin.py +29 -0
  228. ngsolve/demos/howto/pickling.py +26 -0
  229. ngsolve/demos/howto/pml.py +31 -0
  230. ngsolve/demos/howto/taskmanager.py +20 -0
  231. ngsolve/demos/howto/tdnns.py +47 -0
  232. ngsolve/demos/howto/timeDG-skeleton.py +45 -0
  233. ngsolve/demos/howto/timeDG.py +38 -0
  234. ngsolve/demos/howto/timeDGlap.py +42 -0
  235. ngsolve/demos/howto/timeDGwave.py +61 -0
  236. ngsolve/demos/intro/__init__.py +0 -0
  237. ngsolve/demos/intro/adaptive.py +123 -0
  238. ngsolve/demos/intro/cmagnet.py +59 -0
  239. ngsolve/demos/intro/elasticity.py +76 -0
  240. ngsolve/demos/intro/navierstokes.py +74 -0
  241. ngsolve/demos/intro/poisson.ipynb +170 -0
  242. ngsolve/demos/intro/poisson.py +41 -0
  243. ngsolve/demos/mpi/__init__.py +0 -0
  244. ngsolve/demos/mpi/mpi_cmagnet.py +87 -0
  245. ngsolve/demos/mpi/mpi_navierstokes.py +117 -0
  246. ngsolve/demos/mpi/mpi_poisson.py +89 -0
  247. ngsolve/demos/mpi/mpi_timeDG.py +82 -0
  248. ngsolve/directsolvers.py +26 -0
  249. ngsolve/directsolvers.pyi +15 -0
  250. ngsolve/eigenvalues.py +364 -0
  251. ngsolve/eigenvalues.pyi +30 -0
  252. ngsolve/fem.pyi +1647 -0
  253. ngsolve/internal.py +89 -0
  254. ngsolve/krylovspace.py +1013 -0
  255. ngsolve/krylovspace.pyi +298 -0
  256. ngsolve/la.pyi +1230 -0
  257. ngsolve/meshes.py +748 -0
  258. ngsolve/ngs2petsc.py +310 -0
  259. ngsolve/ngscxx.py +42 -0
  260. ngsolve/ngslib.so +0 -0
  261. ngsolve/ngstd.pyi +59 -0
  262. ngsolve/nonlinearsolvers.py +203 -0
  263. ngsolve/nonlinearsolvers.pyi +95 -0
  264. ngsolve/preconditioners.py +11 -0
  265. ngsolve/preconditioners.pyi +7 -0
  266. ngsolve/solve.pyi +109 -0
  267. ngsolve/solve_implementation.py +168 -0
  268. ngsolve/solve_implementation.pyi +42 -0
  269. ngsolve/solvers.py +7 -0
  270. ngsolve/solvers.pyi +14 -0
  271. ngsolve/timestepping.py +185 -0
  272. ngsolve/timestepping.pyi +28 -0
  273. ngsolve/timing.py +108 -0
  274. ngsolve/timing.pyi +54 -0
  275. ngsolve/utils.py +167 -0
  276. ngsolve/utils.pyi +273 -0
  277. ngsolve/webgui.py +670 -0
  278. ngsolve-6.2.2506.post74.dev0.data/data/Netgen.icns +0 -0
  279. ngsolve-6.2.2506.post74.dev0.data/data/bin/ngscxx +17 -0
  280. ngsolve-6.2.2506.post74.dev0.data/data/bin/ngsld +13 -0
  281. ngsolve-6.2.2506.post74.dev0.data/data/bin/ngsolve.tcl +648 -0
  282. ngsolve-6.2.2506.post74.dev0.data/data/bin/ngspy +2 -0
  283. ngsolve-6.2.2506.post74.dev0.data/data/share/ngsolve/beam.geo +17 -0
  284. ngsolve-6.2.2506.post74.dev0.data/data/share/ngsolve/beam.vol +240 -0
  285. ngsolve-6.2.2506.post74.dev0.data/data/share/ngsolve/chip.in2d +41 -0
  286. ngsolve-6.2.2506.post74.dev0.data/data/share/ngsolve/chip.vol +614 -0
  287. ngsolve-6.2.2506.post74.dev0.data/data/share/ngsolve/coil.geo +12 -0
  288. ngsolve-6.2.2506.post74.dev0.data/data/share/ngsolve/coil.vol +2560 -0
  289. ngsolve-6.2.2506.post74.dev0.data/data/share/ngsolve/coilshield.geo +24 -0
  290. ngsolve-6.2.2506.post74.dev0.data/data/share/ngsolve/coilshield.vol +3179 -0
  291. ngsolve-6.2.2506.post74.dev0.data/data/share/ngsolve/cube.geo +19 -0
  292. ngsolve-6.2.2506.post74.dev0.data/data/share/ngsolve/cube.vol +1832 -0
  293. ngsolve-6.2.2506.post74.dev0.data/data/share/ngsolve/d10_DGdoubleglazing.pde +50 -0
  294. ngsolve-6.2.2506.post74.dev0.data/data/share/ngsolve/d11_chip_nitsche.pde +40 -0
  295. ngsolve-6.2.2506.post74.dev0.data/data/share/ngsolve/d1_square.pde +43 -0
  296. ngsolve-6.2.2506.post74.dev0.data/data/share/ngsolve/d2_chip.pde +35 -0
  297. ngsolve-6.2.2506.post74.dev0.data/data/share/ngsolve/d3_helmholtz.pde +22 -0
  298. ngsolve-6.2.2506.post74.dev0.data/data/share/ngsolve/d4_cube.pde +46 -0
  299. ngsolve-6.2.2506.post74.dev0.data/data/share/ngsolve/d5_beam.pde +74 -0
  300. ngsolve-6.2.2506.post74.dev0.data/data/share/ngsolve/d6_shaft.pde +73 -0
  301. ngsolve-6.2.2506.post74.dev0.data/data/share/ngsolve/d7_coil.pde +50 -0
  302. ngsolve-6.2.2506.post74.dev0.data/data/share/ngsolve/d8_coilshield.pde +49 -0
  303. ngsolve-6.2.2506.post74.dev0.data/data/share/ngsolve/d9_hybridDG.pde +72 -0
  304. ngsolve-6.2.2506.post74.dev0.data/data/share/ngsolve/doubleglazing.in2d +27 -0
  305. ngsolve-6.2.2506.post74.dev0.data/data/share/ngsolve/doubleglazing.vol +737 -0
  306. ngsolve-6.2.2506.post74.dev0.data/data/share/ngsolve/piezo2d40round4.vol.gz +0 -0
  307. ngsolve-6.2.2506.post74.dev0.data/data/share/ngsolve/shaft.geo +73 -0
  308. ngsolve-6.2.2506.post74.dev0.data/data/share/ngsolve/shaft.vol +4291 -0
  309. ngsolve-6.2.2506.post74.dev0.data/data/share/ngsolve/square.in2d +17 -0
  310. ngsolve-6.2.2506.post74.dev0.data/data/share/ngsolve/square.vol +149 -0
  311. ngsolve-6.2.2506.post74.dev0.dist-info/METADATA +13 -0
  312. ngsolve-6.2.2506.post74.dev0.dist-info/RECORD +315 -0
  313. ngsolve-6.2.2506.post74.dev0.dist-info/WHEEL +5 -0
  314. ngsolve-6.2.2506.post74.dev0.dist-info/licenses/LICENSE +504 -0
  315. ngsolve-6.2.2506.post74.dev0.dist-info/top_level.txt +2 -0
@@ -0,0 +1,720 @@
1
+ #ifndef FILE_CHOLESKY
2
+ #define FILE_CHOLESKY
3
+
4
+ /****************************************************************************/
5
+ /* File: cholesky.hpp */
6
+ /* Author: Joachim Schoeberl */
7
+ /* Date: 25. Mar. 2000, 16. June 2002 */
8
+ /****************************************************************************/
9
+
10
+ #include "matrix.hpp"
11
+
12
+ namespace ngbla
13
+ {
14
+
15
+ /**
16
+ The Cholesky-factorization of a symmetric dense matrix.
17
+ A = L D L^T
18
+ */
19
+ template <class T>
20
+ class FlatCholeskyFactors
21
+ {
22
+ protected:
23
+ /// matrix size
24
+ int n;
25
+ /// left factor
26
+ T * lfact;
27
+ /// inverse diagonal
28
+ T * diag;
29
+ public:
30
+ // typedef typename mat_traits<T>::TV_COL TV;
31
+ /// Factor the matrix A
32
+ FlatCholeskyFactors (const FlatMatrix<T> & a, T * data)
33
+ {
34
+ diag = data;
35
+ Factor (a);
36
+ }
37
+
38
+ /// Factor the matrix A
39
+ FlatCholeskyFactors (const FlatMatrix<T> & a, LocalHeap & lh)
40
+ {
41
+ diag = (T*)lh.Alloc(sizeof(T)*RequiredMem(a.Height()));
42
+ Factor (a);
43
+ }
44
+
45
+ ///
46
+ NGS_DLL_HEADER void Factor (const FlatMatrix<T> & a);
47
+ /// Multiply with the inverse of A
48
+ template <typename TV1, typename TV2>
49
+ // NGS_DLL_HEADER void Mult (SliceVector<TV> x, SliceVector<TV> y) const
50
+ void Mult (TV1 && x, TV2 && y) const
51
+ {
52
+ // TV sum, val;
53
+ // decltype (y(0)) sum, val;
54
+
55
+
56
+ const T *pj;
57
+
58
+ for (int i = 0; i < n; i++)
59
+ y(i) = x(i);
60
+
61
+ for (int i = 0; i < n; i++)
62
+ {
63
+ auto sum = y(i);
64
+
65
+ pj = PRow(i);
66
+ for (int j = 0; j < i; ++j)
67
+ sum -= pj[j] * y(j);
68
+
69
+ y(i) = sum;
70
+ }
71
+
72
+ for (int i = 0; i < n; i++)
73
+ {
74
+ auto sum = diag[i] * y(i);
75
+ y(i) = sum;
76
+ }
77
+
78
+ for (int i = n-1; i >= 0; i--)
79
+ {
80
+ pj = PRow(i);
81
+ auto val = y(i);
82
+ for (int j = 0; j < i; ++j)
83
+ y(j) -= pj[j] * val;
84
+ }
85
+ }
86
+
87
+ /// Print factorization
88
+ NGS_DLL_HEADER ostream & Print (ostream & ost) const;
89
+
90
+
91
+ /// computes required memory
92
+ static int RequiredMem (int n)
93
+ { return n*(n+1)/2; }
94
+
95
+ private:
96
+ /// first element in row
97
+ T * PRow (int i) const { return lfact + (i*(i-1)) / 2; }
98
+ };
99
+
100
+
101
+ /// output operator.
102
+ template<typename T>
103
+ inline std::ostream & operator<< (std::ostream & s, const FlatCholeskyFactors<T> & m)
104
+ {
105
+ m.Print (s);
106
+ return s;
107
+ }
108
+
109
+
110
+
111
+ template <class T>
112
+ class CholeskyFactors : public FlatCholeskyFactors<T>
113
+ {
114
+ public:
115
+ /// Factor the matrix A
116
+ CholeskyFactors (const FlatMatrix<T> & a)
117
+ : FlatCholeskyFactors<T> (a, new T[this->RequiredMem(a.Height())])
118
+ { ; }
119
+ /// Delete memory
120
+ ~CholeskyFactors ()
121
+ {
122
+ delete [] this->diag;
123
+ }
124
+ };
125
+
126
+
127
+
128
+
129
+
130
+
131
+
132
+
133
+ // high performance LDL factorization as used for SparseCholesky
134
+ /*
135
+ template <typename T, ORDERING ORD>
136
+ INLINE void MySubABt (SliceMatrix<T,ORD> a,
137
+ SliceMatrix<T,ORD> b,
138
+ SliceMatrix<T,ORD> c)
139
+ {
140
+ static Timer timer1("SparseCholesky::Factor gemm 1", NoTracing);
141
+ static Timer timer2("SparseCholesky::Factor gemm 2", NoTracing);
142
+ static Timer timer3("SparseCholesky::Factor gemm 3", NoTracing);
143
+
144
+ // if (c.Height() < 10 && c.Width() < 10) // && a.Width() < 10)
145
+ if (c.Height() < 10 || c.Width() < 10 || a.Width() < 10)
146
+ // if (false)
147
+ {
148
+ // timer1.Start();
149
+ c -= a * Trans(b);
150
+ // timer1.Stop();
151
+ // timer1.AddFlops(c.Height()*c.Width()*a.Width());
152
+ }
153
+ else
154
+ {
155
+ if (c.Height() < 128 && c.Width() < 128)
156
+ // if (true)
157
+ {
158
+ // timer2.Start();
159
+ // c -= a * Trans(b) | Lapack;
160
+ ngbla::SubABt(a,b,c);
161
+ // timer2.Stop();
162
+ // timer2.AddFlops(c.Height()*c.Width()*a.Width());
163
+ }
164
+ else
165
+ {
166
+ timer3.Start();
167
+ int nr = c.Height()/128+1;
168
+ int nc = c.Width()/128+1;
169
+ task_manager -> CreateJob
170
+ ( [&] (const TaskInfo & ti)
171
+ {
172
+ int br = ti.task_nr % nr;
173
+ int bc = ti.task_nr / nr;
174
+ auto rowr = Range(c.Height()).Split (br, nr);
175
+ auto colr = Range(c.Width()).Split (bc, nc);
176
+ // c.Rows(rowr).Cols(colr) -= a.Rows(rowr) * Trans(b.Rows(colr)) | Lapack;
177
+ ngbla::SubABt(a.Rows(rowr),b.Rows(colr), c.Rows(rowr).Cols(colr));
178
+ }, nr*nc);
179
+ timer3.AddFlops(c.Height()*c.Width()*a.Width());
180
+ timer3.Stop();
181
+ }
182
+ }
183
+ }
184
+ */
185
+
186
+
187
+ /*
188
+ A B^t = L1 D1 0 L1^t B1
189
+ B C = B1 L2 0 D2 L2^t
190
+ */
191
+
192
+ /*
193
+ template <typename T, ORDERING ORD>
194
+ void MySubADBt (SliceMatrix<T,ORD> A,
195
+ SliceVector<T> diag,
196
+ SliceMatrix<T,ORD> B,
197
+ SliceMatrix<T,ORD> C)
198
+ {
199
+ Matrix<T,ORD> hB(B.Height(), B.Width());
200
+ for (int i = 0; i < hB.Width(); i++)
201
+ hB.Col(i) = diag(i) * B.Col(i);
202
+ C -= A * Trans(hB);
203
+ }
204
+ */
205
+
206
+
207
+ template <typename T, ORDERING ORD>
208
+ INLINE void MySubADBt (SliceMatrix<T,ORD> a,
209
+ SliceVector<T> diag,
210
+ SliceMatrix<T,ORD> b,
211
+ SliceMatrix<T,ORD> c,
212
+ bool symmetric)
213
+ {
214
+ // static Timer timer1("SparseCholesky::Factor gemm 1", NoTracing);
215
+ // static Timer timer2("SparseCholesky::Factor gemm 2", NoTracing);
216
+ // static Timer timer3("SparseCholesky::Factor gemm 3", NoTracing);
217
+
218
+ /*
219
+ // if (c.Height() < 10 && c.Width() < 10) // && a.Width() < 10)
220
+ if (c.Height() < 10 && c.Width() < 10 && a.Width() < 10)
221
+ // if (false)
222
+ {
223
+ // timer1.Start();
224
+ T hmem[100];
225
+ FlatMatrix<T,ORD> hb(b.Height(), b.Width(), &hmem[0]);
226
+ for (int i = 0; i < hb.Width(); i++)
227
+ hb.Col(i) = diag(i) * b.Col(i);
228
+ c -= a * Trans(hb);
229
+ // timer1.Stop();
230
+ // timer1.AddFlops(c.Height()*c.Width()*a.Width());
231
+ }
232
+ else
233
+ */
234
+ {
235
+ if ( (c.Height() < 128 && c.Width() < 128) ||
236
+ (size_t(c.Height())*c.Width()*a.Width() < 10000) )
237
+ // if (true)
238
+ {
239
+ // timer2.Start();
240
+ ngbla::SubADBt(a,diag,b,c);
241
+ // timer2.Stop();
242
+ // timer2.AddFlops(size_t(c.Height())*c.Width()*a.Width());
243
+ }
244
+ else
245
+ {
246
+ // timer3.Start();
247
+ // int nr = c.Height()/128+1;
248
+ // int nc = c.Width()/128+1;
249
+
250
+ // constexpr int BH = 96;
251
+ // constexpr int BW = 128;
252
+ // avoid warning for capturing constexpr int, but still needed on MSVC 19.16
253
+ IC<96> BH;
254
+ IC<128> BW;
255
+ int nr = (c.Height()+BH-1) / BH;
256
+ int nc = (c.Width()+BW-1) / BW;
257
+ task_manager -> CreateJob
258
+ ( [a,b,c,diag,nr,symmetric,BH,BW] (const TaskInfo & ti)
259
+ {
260
+ size_t br = ti.task_nr % nr;
261
+ size_t bc = ti.task_nr / nr;
262
+ // auto rowr = Range(c.Height()).Split (br, nr);
263
+ // auto colr = Range(c.Width()).Split (bc, nc);
264
+ auto rowr = Range(BH*br, min(BH*(br+1), c.Height()));
265
+ auto colr = Range(BW*bc, min(BW*(bc+1), c.Width()));
266
+ if (symmetric)
267
+ if (rowr.Next() <= colr.First())
268
+ return; // need only lower half
269
+
270
+ // c.Rows(rowr).Cols(colr) -= a.Rows(rowr) * Trans(b.Rows(colr)) | Lapack;
271
+ ngbla::SubADBt(a.Rows(rowr),diag, b.Rows(colr), c.Rows(rowr).Cols(colr));
272
+ }, nr*nc);
273
+ // timer3.AddFlops(size_t(c.Height())*c.Width()*a.Width());
274
+ // timer3.Stop();
275
+ }
276
+ }
277
+ }
278
+
279
+ template <typename T, ORDERING ORD>
280
+ INLINE void MySubADBh (SliceMatrix<T,ORD> a,
281
+ SliceVector<T> diag,
282
+ SliceMatrix<T,ORD> b,
283
+ SliceMatrix<T,ORD> c,
284
+ bool symmetric)
285
+ {
286
+ Matrix<T,ORD> bconj(b.Height(), b.Width());
287
+ bconj = Conj(b);
288
+ MySubADBt (a, diag, SliceMatrix<T,ORD>(bconj), c, false); // symmetric);
289
+ }
290
+
291
+
292
+
293
+
294
+ // Solve for B1: B1 D1 L1^t = B
295
+ template <typename T, ORDERING ORD>
296
+ void CalcLDL_SolveL (SliceMatrix<T,ORD> L, SliceMatrix<T,ORD> B)
297
+ {
298
+ size_t n = L.Height();
299
+ if (n == 1) return;
300
+
301
+ if (n >= 2)
302
+ {
303
+ IntRange r1(0,n/2), r2(n/2,n);
304
+ auto L1 = L.Rows(r1).Cols(r1);
305
+ auto L21 = L.Rows(r2).Cols(r1);
306
+ auto L2 = L.Rows(r2).Cols(r2);
307
+ auto B1 = B.Cols(r1);
308
+ auto B2 = B.Cols(r2);
309
+
310
+ CalcLDL_SolveL(L1, B1);
311
+ MySubADBt (B1, L1.Diag(), L21, B2, false);
312
+ CalcLDL_SolveL(L2, B2);
313
+ return;
314
+ }
315
+
316
+ static Timer t("LDL - Solve L work", NoTracing);
317
+ t.Start();
318
+ /*
319
+ for (int i = 0; i < L.Height(); i++)
320
+ for (int j = i+1; j < L.Height(); j++)
321
+ for (int k = 0; k < B.Height(); k++)
322
+ B(k,j) -= L(j,i) * B(k,i);
323
+ // B.Col(j) -= L(j,i) * B.Col(i);
324
+ */
325
+ /*
326
+ for (int k = 0; k < B.Height(); k++)
327
+ for (int i = 0; i < L.Height(); i++)
328
+ for (int j = i+1; j < L.Height(); j++)
329
+ B(k,j) -= L(j,i) * B(k,i);
330
+ */
331
+ auto solve_row = [&] (size_t k)
332
+ {
333
+ auto Brow = B.Row(k);
334
+ for (size_t i = 0; i < L.Height(); i++)
335
+ for (size_t j = i+1; j < L.Height(); j++)
336
+ Brow(j) -= L(j,i) * Brow(i);
337
+ };
338
+ if (B.Height() < 1000)
339
+ for (size_t k = 0; k < B.Height(); k++)
340
+ solve_row(k);
341
+ else
342
+ ParallelFor (B.Height(), solve_row);
343
+
344
+ t.Stop();
345
+ }
346
+
347
+ // calc new A22-block
348
+ // A2 -= B D B^t
349
+ template <typename T, ORDERING ORD>
350
+ void CalcLDL_A2 (SliceVector<T> diag, SliceMatrix<T,ORD> B, SliceMatrix<T,ORD> A2)
351
+ {
352
+ MySubADBt (B, diag, B, A2, true);
353
+ }
354
+
355
+ // calc new A22-block hermitsch
356
+ // A2 -= B D B^h
357
+ template <typename T, ORDERING ORD>
358
+ void CalcLDL_A2H (SliceVector<T> diag, SliceMatrix<T,ORD> B, SliceMatrix<T,ORD> A2)
359
+ {
360
+ MySubADBh (B, diag, B, A2, true);
361
+ }
362
+
363
+
364
+
365
+ // Calc A = L D L^t
366
+ template <typename T, ORDERING ORD>
367
+ void CalcLDL (SliceMatrix<T,ORD> mat)
368
+ {
369
+ size_t n = mat.Height();
370
+
371
+ if (n >= 2)
372
+ {
373
+ size_t n1 = n/2;
374
+ auto L1 = mat.Rows(0,n1).Cols(0,n1);
375
+ auto L2 = mat.Rows(n1,n).Cols(n1,n);
376
+ auto B = mat.Rows(n1,n).Cols(0,n1);
377
+ CalcLDL (L1);
378
+ CalcLDL_SolveL (L1,B);
379
+ CalcLDL_A2 (L1.Diag(),B,L2);
380
+ CalcLDL (L2);
381
+ return;
382
+ }
383
+
384
+ if (n == 1)
385
+ {
386
+ // auto hm = mat(0,0);
387
+ // CalcInverse (hm, mat(0,0));
388
+ mat(0,0) = Inv(mat(0,0));
389
+ return;
390
+ }
391
+
392
+ /*
393
+ not working anymore
394
+ for (size_t i = 0; i < n; i++)
395
+ {
396
+ T dii = mat(i,i);
397
+ T inv_dii;
398
+ CalcInverse (dii, inv_dii);
399
+ for (size_t j = i+1; j < n; j++)
400
+ {
401
+ T hji = mat(j,i);
402
+ T hjiD = hji * inv_dii;
403
+ mat(j,i) = hjiD;
404
+ for (size_t k = i+1; k <= j; k++)
405
+ mat(j,k) -= hji * Trans(mat(k,i));
406
+ }
407
+ }
408
+ */
409
+ }
410
+
411
+
412
+ // Calc A = L D L^h
413
+ template <typename T, ORDERING ORD>
414
+ void CalcLDLH (SliceMatrix<T,ORD> mat)
415
+ {
416
+ size_t n = mat.Height();
417
+
418
+ if (n >= 2)
419
+ {
420
+ size_t n1 = n/2;
421
+ auto L1 = mat.Rows(0,n1).Cols(0,n1);
422
+ auto L2 = mat.Rows(n1,n).Cols(n1,n);
423
+ auto B = mat.Rows(n1,n).Cols(0,n1);
424
+ CalcLDLH (L1);
425
+ CalcLDL_SolveL (L1,B);
426
+ CalcLDL_A2H (L1.Diag(),B,L2);
427
+ CalcLDLH (L2);
428
+ return;
429
+ }
430
+
431
+ if (n == 1)
432
+ {
433
+ // auto hm = mat(0,0);
434
+ // CalcInverse (hm, mat(0,0));
435
+ mat(0,0) = Inv(mat(0,0));
436
+ return;
437
+ }
438
+ }
439
+
440
+
441
+
442
+ template <typename T, ORDERING ORD>
443
+ void SolveLDL (SliceMatrix<T,ORD> mat, FlatVector<T> sol)
444
+ {
445
+ size_t n = mat.Height();
446
+
447
+ for (size_t i = 0; i < n; i++)
448
+ {
449
+ T tmp = mat(i,i)*sol(i);
450
+ for (size_t j = i+1; j < n; j++)
451
+ sol(j) -= mat(j,i) * tmp;
452
+ }
453
+
454
+ for (size_t i = 0; i < n; i++)
455
+ sol(i) *= mat(i,i);
456
+
457
+ for (size_t i = n; i--> 0; )
458
+ {
459
+ T hsum{0};
460
+ for (size_t j = i+1; j < n; j++)
461
+ hsum += mat(j,i)*sol(j);
462
+ sol(i) -= mat(i,i) * hsum;
463
+ }
464
+ }
465
+
466
+ template <typename T, ORDERING ORD>
467
+ void SolveLDLH (SliceMatrix<T,ORD> mat, FlatVector<T> sol)
468
+ {
469
+ size_t n = mat.Height();
470
+
471
+ for (size_t i = 0; i < n; i++)
472
+ {
473
+ T tmp = mat(i,i)*sol(i);
474
+ for (size_t j = i+1; j < n; j++)
475
+ sol(j) -= mat(j,i) * tmp;
476
+ }
477
+
478
+ for (size_t i = 0; i < n; i++)
479
+ sol(i) *= mat(i,i);
480
+
481
+ for (size_t i = n; i--> 0; )
482
+ {
483
+ T hsum{0};
484
+ for (size_t j = i+1; j < n; j++)
485
+ hsum += Conj(mat(j,i))*sol(j);
486
+ sol(i) -= mat(i,i) * hsum;
487
+ }
488
+ }
489
+
490
+
491
+
492
+
493
+
494
+
495
+ // invert lower left matrix
496
+ template <typename T, ORDERING ORD>
497
+ void CalcInverseL (FlatMatrix<T,ORD> mat)
498
+ {
499
+ // M = L^{-1}
500
+ // i>j: (M L)(i,j) = M_ik L_kj = 0 j <= k <= i
501
+ // M_ij L_jj + M_ik L_kj = 0 j < k <= i
502
+ // M_ij = -1/{L_jj} sum j < k <= i: M_ik L_kj
503
+
504
+ int n = mat.Height();
505
+ STACK_ARRAY(T, mem, n);
506
+ FlatVector<T> dinv(n, &mem[0]);
507
+
508
+ for (size_t i = 0; i < n; i++)
509
+ // CalcInverse (mat(i,i), dinv(i));
510
+ dinv(i) = Inv(mat(i,i));
511
+
512
+ for (int i = n-1; i >= 0; i--)
513
+ {
514
+ mat(i,i) = dinv(i);
515
+ for (int j = i-1; j >= 0; j--)
516
+ {
517
+ T sum(0.0);
518
+ for (int k = j+1; k <= i; k++)
519
+ sum += mat(i,k) * mat(k,j);
520
+ mat(i,j) = -dinv(j) * sum;
521
+ }
522
+ }
523
+ }
524
+
525
+ // calculate inverse from LDL factorization
526
+ template <typename T, ORDERING ORD>
527
+ void CalcInverseLDL (FlatMatrix<T,ORD> mat)
528
+ {
529
+ size_t n = mat.Height();
530
+ STACK_ARRAY(T,mem, n);
531
+ FlatVector<T> dinv(n, &mem[0]);
532
+
533
+ for (size_t i = 0; i < n; i++)
534
+ {
535
+ // CalcInverse (mat(i,i), dinv(i));
536
+ dinv(i) = Inv(mat(i,i));
537
+ mat(i,i) = dinv(i);
538
+ }
539
+
540
+ CalcInverseL (mat);
541
+
542
+ for (size_t i = 0; i < n; i++)
543
+ for (size_t j = 0; j <= i; j++)
544
+ {
545
+ T sum = mat(i,j);
546
+ for (int k = i+1; k < n; k++)
547
+ sum += Trans(mat(k,i)) * dinv(k) * mat(k,j);
548
+ mat(i,j) = sum;
549
+ }
550
+ for (size_t i = 0; i < n; i++)
551
+ for (size_t j = i+1; j < n; j++)
552
+ mat(i,j) = mat(j,i);
553
+ }
554
+
555
+
556
+
557
+
558
+
559
+
560
+ // Solve for B1: B1 D1 L1^t = B
561
+ template <typename T, ORDERING ORD>
562
+ void CalcLDLNew_SolveL (SliceMatrix<T,ORD> L, SliceMatrix<T,ORD> B)
563
+ {
564
+ size_t n = L.Height();
565
+ if (n <= 1) return;
566
+
567
+ IntRange r1(0,n/2), r2(n/2,n);
568
+ auto L1 = L.Rows(r1).Cols(r1);
569
+ auto L21 = L.Rows(r2).Cols(r1);
570
+ auto L2 = L.Rows(r2).Cols(r2);
571
+ auto B1 = B.Cols(r1);
572
+ auto B2 = B.Cols(r2);
573
+
574
+ CalcLDLNew_SolveL(L1, B1);
575
+ // MySubADBt (B1, L1.Diag(), L21, B2, false);
576
+ B2 -= B1 * Trans(L21);
577
+ CalcLDLNew_SolveL(L2, B2);
578
+ }
579
+
580
+
581
+ // Calc A = L D^{-1} L^t
582
+ template <typename T, ORDERING ORD>
583
+ void CalcLDLNew (SliceMatrix<T,ORD> mat)
584
+ {
585
+ size_t n = mat.Height();
586
+ if (n == 0) return;
587
+ if (n == 1)
588
+ {
589
+ mat(0,0) = Inv(mat(0,0));
590
+ // CalcInverse (mat(0,0));
591
+ return;
592
+ }
593
+
594
+ size_t n1 = n/2;
595
+ auto L1 = mat.Rows(0,n1).Cols(0,n1);
596
+ auto L2 = mat.Rows(n1,n).Cols(n1,n);
597
+ auto B = mat.Rows(n1,n).Cols(0,n1);
598
+ CalcLDLNew (L1);
599
+ CalcLDLNew_SolveL (L1,B);
600
+ auto diag = L1.Diag();
601
+ MySubADBt (B, diag, B, L2, true);
602
+ /*
603
+ for (int i = 0; i < B.Height(); i++)
604
+ for (int j = 0; j < B.Height(); j++)
605
+ for (int k = 0; k < B.Width(); k++)
606
+ L2(i,j) -= diag(k)*B(i,k)*B(j,k);
607
+ */
608
+ CalcLDLNew (L2);
609
+ ScaleCols(B, diag);
610
+ /*
611
+ for (int i = 0; i < B.Width(); i++)
612
+ B.Col(i) *= diag(i);
613
+ */
614
+ }
615
+
616
+
617
+
618
+ /*
619
+ template <typename T, ORDERING ORD>
620
+ void SolveLDLNew (SliceMatrix<T,ORD> mat, FlatVector<T> sol)
621
+ {
622
+ size_t n = mat.Height();
623
+
624
+ for (size_t i = 0; i < n; i++)
625
+ {
626
+ T tmp = sol(i);
627
+ for (size_t j = i+1; j < n; j++)
628
+ sol(j) -= mat(j,i) * tmp;
629
+ }
630
+
631
+ for (size_t i = 0; i < n; i++)
632
+ sol(i) *= mat(i,i);
633
+
634
+ for (size_t i = n; i--> 0; )
635
+ {
636
+ T hsum{0};
637
+ for (size_t j = i+1; j < n; j++)
638
+ hsum += mat(j,i)*sol(j);
639
+ sol(i) -= hsum;
640
+ }
641
+ }
642
+ */
643
+
644
+ template <typename T, ORDERING ORD>
645
+ void SolveL (SliceMatrix<T,ORD> mat, FlatVector<T> sol)
646
+ {
647
+ size_t n = mat.Height();
648
+ if (n <= 1) return;
649
+
650
+ if (n < 32)
651
+ {
652
+ for (size_t i = 0; i < n; i++)
653
+ {
654
+ T tmp = sol(i);
655
+ for (size_t j = i+1; j < n; j++)
656
+ sol(j) -= mat(j,i) * tmp;
657
+ }
658
+ return;
659
+ }
660
+
661
+ IntRange r1(0,n/2), r2(n/2,n);
662
+ auto L1 = mat.Rows(r1).Cols(r1);
663
+ auto L21 = mat.Rows(r2).Cols(r1);
664
+ auto L2 = mat.Rows(r2).Cols(r2);
665
+ auto sol1 = sol.Range(r1);
666
+ auto sol2 = sol.Range(r2);
667
+
668
+ SolveL (L1, sol1);
669
+ sol2 -= L21 * sol1;
670
+ SolveL (L2, sol2);
671
+ }
672
+
673
+ template <typename T, ORDERING ORD>
674
+ void SolveLT (SliceMatrix<T,ORD> mat, FlatVector<T> sol)
675
+ {
676
+ size_t n = mat.Height();
677
+
678
+ if (n <= 1) return;
679
+
680
+ if (n < 32)
681
+ {
682
+ for (size_t i = n; i--> 0; )
683
+ {
684
+ T hsum{0};
685
+ for (size_t j = i+1; j < n; j++)
686
+ hsum += mat(j,i)*sol(j);
687
+ sol(i) -= hsum;
688
+ }
689
+ return;
690
+ }
691
+
692
+ IntRange r1(0,n/2), r2(n/2,n);
693
+ auto L1 = mat.Rows(r1).Cols(r1);
694
+ auto L21 = mat.Rows(r2).Cols(r1);
695
+ auto L2 = mat.Rows(r2).Cols(r2);
696
+ auto sol1 = sol.Range(r1);
697
+ auto sol2 = sol.Range(r2);
698
+
699
+ SolveLT (L2, sol2);
700
+ sol1 -= Trans(L21) * sol2;
701
+ SolveLT (L1, sol1);
702
+ }
703
+
704
+ template <typename T, ORDERING ORD>
705
+ void SolveLDLNew (SliceMatrix<T,ORD> mat, FlatVector<T> sol)
706
+ {
707
+ size_t n = mat.Height();
708
+
709
+ SolveL (mat, sol);
710
+
711
+ auto diag = mat.Diag();
712
+ for (size_t i = 0; i < n; i++)
713
+ sol(i) *= diag(i);
714
+
715
+ SolveLT (mat, sol);
716
+ }
717
+
718
+ }
719
+
720
+ #endif