ngsolve 6.2.2506.post216.dev0__cp314-cp314-manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (306) hide show
  1. ngsolve-6.2.2506.post216.dev0.data/data/bin/ngs_nvcc +22 -0
  2. ngsolve-6.2.2506.post216.dev0.data/data/bin/ngs_nvlink +17 -0
  3. ngsolve-6.2.2506.post216.dev0.data/data/bin/ngscxx +15 -0
  4. ngsolve-6.2.2506.post216.dev0.data/data/bin/ngsld +11 -0
  5. ngsolve-6.2.2506.post216.dev0.data/data/bin/ngsolve.tcl +648 -0
  6. ngsolve-6.2.2506.post216.dev0.data/data/bin/ngspy +2 -0
  7. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/analytic_integrals.hpp +10 -0
  8. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/arnoldi.hpp +55 -0
  9. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/bandmatrix.hpp +334 -0
  10. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/basematrix.hpp +963 -0
  11. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/basevector.hpp +1268 -0
  12. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/bdbequations.hpp +2807 -0
  13. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/bdbintegrator.hpp +1660 -0
  14. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/bem_diffops.hpp +475 -0
  15. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/bessel.hpp +1064 -0
  16. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/bilinearform.hpp +966 -0
  17. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/bla.hpp +29 -0
  18. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/blockalloc.hpp +95 -0
  19. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/blockjacobi.hpp +328 -0
  20. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/bspline.hpp +116 -0
  21. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/calcinverse.hpp +141 -0
  22. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/cg.hpp +368 -0
  23. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/chebyshev.hpp +44 -0
  24. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/cholesky.hpp +720 -0
  25. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/clapack.h +7254 -0
  26. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/code_generation.hpp +296 -0
  27. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/coefficient.hpp +2033 -0
  28. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/coefficient_impl.hpp +19 -0
  29. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/coefficient_stdmath.hpp +167 -0
  30. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/commutingAMG.hpp +106 -0
  31. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/comp.hpp +79 -0
  32. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/compatibility.hpp +41 -0
  33. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/complex_wrapper.hpp +101 -0
  34. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/compressedfespace.hpp +110 -0
  35. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/contact.hpp +239 -0
  36. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/cuda_core.hpp +216 -0
  37. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/cuda_linalg.hpp +185 -0
  38. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/cuda_ngbla.hpp +317 -0
  39. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/cuda_ngstd.hpp +414 -0
  40. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/cuda_profiler.hpp +240 -0
  41. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/diagonalmatrix.hpp +160 -0
  42. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/differentialoperator.hpp +276 -0
  43. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/diffop.hpp +1286 -0
  44. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/diffop_impl.hpp +328 -0
  45. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/diffopwithfactor.hpp +123 -0
  46. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/discontinuous.hpp +84 -0
  47. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/dump.hpp +949 -0
  48. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/ectypes.hpp +121 -0
  49. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/eigen.hpp +60 -0
  50. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/eigensystem.hpp +18 -0
  51. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/elasticity_equations.hpp +595 -0
  52. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/elementbyelement.hpp +201 -0
  53. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/elementtopology.hpp +1760 -0
  54. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/elementtransformation.hpp +339 -0
  55. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/evalfunc.hpp +405 -0
  56. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/expr.hpp +1693 -0
  57. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/facetfe.hpp +175 -0
  58. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/facetfespace.hpp +180 -0
  59. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/facethofe.hpp +111 -0
  60. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/facetsurffespace.hpp +112 -0
  61. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/fe_interfaces.hpp +32 -0
  62. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/fem.hpp +87 -0
  63. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/fesconvert.hpp +14 -0
  64. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/fespace.hpp +1454 -0
  65. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/finiteelement.hpp +286 -0
  66. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/globalinterfacespace.hpp +77 -0
  67. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/globalspace.hpp +115 -0
  68. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/gridfunction.hpp +525 -0
  69. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/h1amg.hpp +124 -0
  70. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/h1hofe.hpp +188 -0
  71. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/h1hofe_impl.hpp +1262 -0
  72. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/h1hofefo.hpp +148 -0
  73. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/h1hofefo_impl.hpp +185 -0
  74. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/h1hofespace.hpp +167 -0
  75. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/h1lofe.hpp +1240 -0
  76. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/h1lumping.hpp +41 -0
  77. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/hcurl_equations.hpp +1381 -0
  78. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/hcurlcurlfe.hpp +2241 -0
  79. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/hcurlcurlfespace.hpp +78 -0
  80. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/hcurlfe.hpp +259 -0
  81. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/hcurlfe_utils.hpp +107 -0
  82. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/hcurlhdiv_dshape.hpp +857 -0
  83. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/hcurlhdivfes.hpp +308 -0
  84. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/hcurlhofe.hpp +175 -0
  85. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/hcurlhofe_impl.hpp +1871 -0
  86. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/hcurlhofespace.hpp +193 -0
  87. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/hcurllofe.hpp +1146 -0
  88. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/hdiv_equations.hpp +880 -0
  89. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/hdivdivfe.hpp +2923 -0
  90. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/hdivdivsurfacespace.hpp +76 -0
  91. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/hdivfe.hpp +206 -0
  92. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/hdivfe_utils.hpp +717 -0
  93. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/hdivfes.hpp +75 -0
  94. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/hdivhofe.hpp +447 -0
  95. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/hdivhofe_impl.hpp +1107 -0
  96. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/hdivhofefo.hpp +229 -0
  97. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/hdivhofespace.hpp +177 -0
  98. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/hdivhosurfacefespace.hpp +106 -0
  99. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/hdivlofe.hpp +773 -0
  100. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/hidden.hpp +74 -0
  101. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/householder.hpp +181 -0
  102. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/hypre_ams_precond.hpp +123 -0
  103. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/hypre_precond.hpp +73 -0
  104. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/integrator.hpp +2012 -0
  105. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/integratorcf.hpp +253 -0
  106. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/interpolate.hpp +49 -0
  107. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/intrule.hpp +2542 -0
  108. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/intrules_SauterSchwab.hpp +25 -0
  109. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/irspace.hpp +49 -0
  110. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/jacobi.hpp +153 -0
  111. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/kernels.hpp +724 -0
  112. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/l2hofe.hpp +194 -0
  113. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/l2hofe_impl.hpp +564 -0
  114. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/l2hofefo.hpp +542 -0
  115. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/l2hofespace.hpp +344 -0
  116. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/la.hpp +38 -0
  117. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/linalg_kernels.hpp +70 -0
  118. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/linearform.hpp +266 -0
  119. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/matrix.hpp +2145 -0
  120. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/memusage.hpp +41 -0
  121. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/meshaccess.hpp +1359 -0
  122. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/mgpre.hpp +204 -0
  123. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/mp_coefficient.hpp +145 -0
  124. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/mptools.hpp +2281 -0
  125. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/multigrid.hpp +42 -0
  126. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/multivector.hpp +447 -0
  127. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/mumpsinverse.hpp +187 -0
  128. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/mycomplex.hpp +361 -0
  129. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/ng_lapack.hpp +1661 -0
  130. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/ngblas.hpp +1232 -0
  131. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/ngs_defines.hpp +30 -0
  132. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/ngs_stdcpp_include.hpp +106 -0
  133. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/ngs_utils.hpp +121 -0
  134. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/ngsobject.hpp +1019 -0
  135. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/ngsstream.hpp +113 -0
  136. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/ngstd.hpp +72 -0
  137. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/nodalhofe.hpp +96 -0
  138. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/nodalhofe_impl.hpp +141 -0
  139. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/normalfacetfe.hpp +223 -0
  140. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/normalfacetfespace.hpp +98 -0
  141. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/normalfacetsurfacefespace.hpp +84 -0
  142. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/order.hpp +251 -0
  143. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/parallel_matrices.hpp +222 -0
  144. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/paralleldofs.hpp +340 -0
  145. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/parallelngs.hpp +23 -0
  146. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/parallelvector.hpp +269 -0
  147. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/pardisoinverse.hpp +200 -0
  148. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/periodic.hpp +129 -0
  149. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/plateaufespace.hpp +25 -0
  150. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/pml.hpp +275 -0
  151. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/pmltrafo.hpp +631 -0
  152. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/postproc.hpp +142 -0
  153. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/potentialtools.hpp +22 -0
  154. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/precomp.hpp +60 -0
  155. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/preconditioner.hpp +602 -0
  156. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/prolongation.hpp +380 -0
  157. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/python_comp.hpp +107 -0
  158. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/python_fem.hpp +89 -0
  159. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/python_linalg.hpp +58 -0
  160. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/python_ngstd.hpp +386 -0
  161. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/recursive_pol.hpp +4896 -0
  162. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/recursive_pol_tet.hpp +395 -0
  163. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/recursive_pol_trig.hpp +492 -0
  164. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/reorderedfespace.hpp +81 -0
  165. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/sample_sort.hpp +105 -0
  166. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/scalarfe.hpp +335 -0
  167. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/shapefunction_utils.hpp +113 -0
  168. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/simd_complex.hpp +329 -0
  169. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/smoother.hpp +253 -0
  170. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/solve.hpp +89 -0
  171. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/sparsecholesky.hpp +317 -0
  172. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/sparsefactorization_interface.hpp +159 -0
  173. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/sparsematrix.hpp +1052 -0
  174. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/sparsematrix_dyn.hpp +90 -0
  175. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/sparsematrix_impl.hpp +1055 -0
  176. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/special_matrix.hpp +463 -0
  177. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/specialelement.hpp +125 -0
  178. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/statushandler.hpp +33 -0
  179. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/stringops.hpp +12 -0
  180. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/superluinverse.hpp +136 -0
  181. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/symbolicintegrator.hpp +850 -0
  182. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/symmetricmatrix.hpp +144 -0
  183. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/tangentialfacetfe.hpp +224 -0
  184. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/tangentialfacetfespace.hpp +91 -0
  185. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/tensor.hpp +522 -0
  186. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/tensorcoefficient.hpp +446 -0
  187. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/tensorproductintegrator.hpp +113 -0
  188. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/thcurlfe.hpp +128 -0
  189. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/thcurlfe_impl.hpp +380 -0
  190. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/thdivfe.hpp +80 -0
  191. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/thdivfe_impl.hpp +492 -0
  192. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/tpdiffop.hpp +461 -0
  193. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/tpfes.hpp +133 -0
  194. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/tpintrule.hpp +224 -0
  195. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/triangular.hpp +465 -0
  196. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/tscalarfe.hpp +245 -0
  197. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/tscalarfe_impl.hpp +1029 -0
  198. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/umfpackinverse.hpp +148 -0
  199. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/unifiedvector.hpp +103 -0
  200. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/vector.hpp +1452 -0
  201. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/voxelcoefficientfunction.hpp +41 -0
  202. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/vtkoutput.hpp +198 -0
  203. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/vvector.hpp +208 -0
  204. ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/webgui.hpp +92 -0
  205. ngsolve-6.2.2506.post216.dev0.data/data/lib/cmake/ngsolve/NGSolveConfig.cmake +102 -0
  206. ngsolve-6.2.2506.post216.dev0.data/data/lib/cmake/ngsolve/ngsolve-targets-release.cmake +89 -0
  207. ngsolve-6.2.2506.post216.dev0.data/data/lib/cmake/ngsolve/ngsolve-targets.cmake +180 -0
  208. ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/netgen_mesher.libs/libngbla.so +0 -0
  209. ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/netgen_mesher.libs/libngcomp.so +0 -0
  210. ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/netgen_mesher.libs/libngfem.so +0 -0
  211. ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/netgen_mesher.libs/libngla.so +0 -0
  212. ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/netgen_mesher.libs/libngsbem.so +0 -0
  213. ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/netgen_mesher.libs/libngscudalib.so +0 -0
  214. ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/netgen_mesher.libs/libngsolve.so +0 -0
  215. ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/netgen_mesher.libs/libngstd.so +0 -0
  216. ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/TensorProductTools.py +210 -0
  217. ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/__console.py +94 -0
  218. ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/__expr.py +181 -0
  219. ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/__init__.py +148 -0
  220. ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/_scikit_build_core_dependencies.py +30 -0
  221. ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/bvp.py +78 -0
  222. ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/config/__init__.py +1 -0
  223. ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/config/__main__.py +4 -0
  224. ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/config/config.py +60 -0
  225. ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/TensorProduct/__init__.py +0 -0
  226. ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/TensorProduct/tp_dg_1d_1d.py +80 -0
  227. ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/TensorProduct/tp_dg_1d_2d.py +73 -0
  228. ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/TensorProduct/tp_dg_2d_1d.py +72 -0
  229. ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/TensorProduct/tp_dg_2d_2d.py +66 -0
  230. ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/__init__.py +0 -0
  231. ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/howto/__init__.py +0 -0
  232. ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/howto/hhj.py +44 -0
  233. ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/howto/hybrid_dg.py +53 -0
  234. ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/howto/mixed.py +30 -0
  235. ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/howto/nonlin.py +29 -0
  236. ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/howto/pickling.py +26 -0
  237. ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/howto/pml.py +31 -0
  238. ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/howto/taskmanager.py +20 -0
  239. ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/howto/tdnns.py +47 -0
  240. ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/howto/timeDG-skeleton.py +45 -0
  241. ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/howto/timeDG.py +38 -0
  242. ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/howto/timeDGlap.py +42 -0
  243. ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/howto/timeDGwave.py +61 -0
  244. ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/intro/__init__.py +0 -0
  245. ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/intro/adaptive.py +123 -0
  246. ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/intro/cmagnet.py +59 -0
  247. ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/intro/elasticity.py +76 -0
  248. ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/intro/navierstokes.py +74 -0
  249. ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/intro/poisson.ipynb +170 -0
  250. ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/intro/poisson.py +41 -0
  251. ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/mpi/__init__.py +0 -0
  252. ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/mpi/mpi_cmagnet.py +87 -0
  253. ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/mpi/mpi_navierstokes.py +117 -0
  254. ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/mpi/mpi_poisson.py +89 -0
  255. ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/mpi/mpi_timeDG.py +82 -0
  256. ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/directsolvers.py +14 -0
  257. ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/eigenvalues.py +364 -0
  258. ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/internal.py +89 -0
  259. ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/krylovspace.py +1182 -0
  260. ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/meshes.py +748 -0
  261. ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/ngs2petsc.py +310 -0
  262. ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/ngscuda.so +0 -0
  263. ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/ngscxx.py +42 -0
  264. ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/ngslib.so +0 -0
  265. ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/nonlinearsolvers.py +203 -0
  266. ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/preconditioners.py +11 -0
  267. ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/solve_implementation.py +168 -0
  268. ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/solvers/__init__.py +7 -0
  269. ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/solvers/cudss.py +112 -0
  270. ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/timestepping.py +185 -0
  271. ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/timing.py +108 -0
  272. ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/utils.py +167 -0
  273. ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/webgui.py +671 -0
  274. ngsolve-6.2.2506.post216.dev0.data/data/share/ngsolve/beam.geo +17 -0
  275. ngsolve-6.2.2506.post216.dev0.data/data/share/ngsolve/beam.vol +240 -0
  276. ngsolve-6.2.2506.post216.dev0.data/data/share/ngsolve/chip.in2d +41 -0
  277. ngsolve-6.2.2506.post216.dev0.data/data/share/ngsolve/chip.vol +614 -0
  278. ngsolve-6.2.2506.post216.dev0.data/data/share/ngsolve/coil.geo +12 -0
  279. ngsolve-6.2.2506.post216.dev0.data/data/share/ngsolve/coil.vol +2560 -0
  280. ngsolve-6.2.2506.post216.dev0.data/data/share/ngsolve/coilshield.geo +24 -0
  281. ngsolve-6.2.2506.post216.dev0.data/data/share/ngsolve/coilshield.vol +3179 -0
  282. ngsolve-6.2.2506.post216.dev0.data/data/share/ngsolve/cube.geo +19 -0
  283. ngsolve-6.2.2506.post216.dev0.data/data/share/ngsolve/cube.vol +1832 -0
  284. ngsolve-6.2.2506.post216.dev0.data/data/share/ngsolve/d10_DGdoubleglazing.pde +50 -0
  285. ngsolve-6.2.2506.post216.dev0.data/data/share/ngsolve/d11_chip_nitsche.pde +40 -0
  286. ngsolve-6.2.2506.post216.dev0.data/data/share/ngsolve/d1_square.pde +43 -0
  287. ngsolve-6.2.2506.post216.dev0.data/data/share/ngsolve/d2_chip.pde +35 -0
  288. ngsolve-6.2.2506.post216.dev0.data/data/share/ngsolve/d3_helmholtz.pde +22 -0
  289. ngsolve-6.2.2506.post216.dev0.data/data/share/ngsolve/d4_cube.pde +46 -0
  290. ngsolve-6.2.2506.post216.dev0.data/data/share/ngsolve/d5_beam.pde +74 -0
  291. ngsolve-6.2.2506.post216.dev0.data/data/share/ngsolve/d6_shaft.pde +73 -0
  292. ngsolve-6.2.2506.post216.dev0.data/data/share/ngsolve/d7_coil.pde +50 -0
  293. ngsolve-6.2.2506.post216.dev0.data/data/share/ngsolve/d8_coilshield.pde +49 -0
  294. ngsolve-6.2.2506.post216.dev0.data/data/share/ngsolve/d9_hybridDG.pde +72 -0
  295. ngsolve-6.2.2506.post216.dev0.data/data/share/ngsolve/doubleglazing.in2d +27 -0
  296. ngsolve-6.2.2506.post216.dev0.data/data/share/ngsolve/doubleglazing.vol +737 -0
  297. ngsolve-6.2.2506.post216.dev0.data/data/share/ngsolve/piezo2d40round4.vol.gz +0 -0
  298. ngsolve-6.2.2506.post216.dev0.data/data/share/ngsolve/shaft.geo +73 -0
  299. ngsolve-6.2.2506.post216.dev0.data/data/share/ngsolve/shaft.vol +4291 -0
  300. ngsolve-6.2.2506.post216.dev0.data/data/share/ngsolve/square.in2d +17 -0
  301. ngsolve-6.2.2506.post216.dev0.data/data/share/ngsolve/square.vol +149 -0
  302. ngsolve-6.2.2506.post216.dev0.dist-info/METADATA +14 -0
  303. ngsolve-6.2.2506.post216.dev0.dist-info/RECORD +306 -0
  304. ngsolve-6.2.2506.post216.dev0.dist-info/WHEEL +5 -0
  305. ngsolve-6.2.2506.post216.dev0.dist-info/licenses/LICENSE +504 -0
  306. ngsolve-6.2.2506.post216.dev0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,317 @@
1
+ #ifndef CUDA_NGBLA
2
+ #define CUDA_NGBLA
3
+
4
+ #include <cuda_runtime.h>
5
+ #include <cublas_v2.h>
6
+
7
+ #include <vector.hpp>
8
+ #include <matrix.hpp>
9
+
10
+ #include "cuda_ngstd.hpp"
11
+ #include "linalg_kernels.hpp"
12
+
13
+
14
+ namespace ngla
15
+ {
16
+ cublasHandle_t Get_CuBlas_Handle ();
17
+ }
18
+
19
+
20
+ namespace ngbla
21
+ {
22
+ using namespace ngs_cuda;
23
+
24
+ // template<> struct trivtrans<Dev<double>> { static constexpr bool value = true; };
25
+ template<> struct is_scalar_type<Dev<double>> { static constexpr bool value = true; };
26
+
27
+
28
+ template <typename T>
29
+ class Vector<Dev<T>> : public FlatVector<Dev<T>>
30
+ {
31
+ using FlatVector<Dev<T>>::Size;
32
+ using FlatVector<Dev<T>>::Data;
33
+
34
+ public:
35
+ Vector (Vector&) = delete;
36
+ Vector (Vector&&v2)
37
+ : FlatVector<Dev<T>>(v2.Size(), v2.Data())
38
+ {
39
+ v2.layout = { nullptr, 0 };
40
+ }
41
+
42
+ Vector (size_t asize)
43
+ : FlatVector<Dev<T>>(asize, Dev<T>::Malloc(asize)) { ; }
44
+
45
+ Vector (FlatVector<T> vec)
46
+ : FlatVector<Dev<T>>(vec.Size(), Dev<T>::Malloc(vec.Size()))
47
+ {
48
+ H2D(vec);
49
+ }
50
+
51
+ ~Vector()
52
+ {
53
+ Dev<T>::Free(Data());
54
+ }
55
+
56
+
57
+ template<typename TB>
58
+ Vector & operator= (const Expr<TB> & v)
59
+ {
60
+ MatExpr<FlatVector<Dev<T>> >::operator= (v);
61
+ return *this;
62
+ }
63
+
64
+
65
+ void D2H (FlatVector<T> vec) const
66
+ {
67
+ cudaMemcpy (vec.Data(), Data(), sizeof(T)*Size(), cudaMemcpyDeviceToHost);
68
+ }
69
+
70
+ void H2D (FlatVector<T> vec)
71
+ {
72
+ cudaMemcpy (Data(), vec.Data(), sizeof(T)*Size(), cudaMemcpyHostToDevice);
73
+ }
74
+
75
+ Vector<T> D2H() const
76
+ {
77
+ Vector<T> vh(Size());
78
+ D2H (vh);
79
+ return vh;
80
+ }
81
+ };
82
+
83
+ inline Vector<double> D2H (FlatVector<Dev<double>> dvec)
84
+ {
85
+ Vector<double> hvec(dvec.Size());
86
+ cudaMemcpy (hvec.Data(), dvec.Data(), sizeof(double)*hvec.Size(), cudaMemcpyDeviceToHost);
87
+ return hvec;
88
+ }
89
+
90
+ #ifdef OLDOLD
91
+ #ifdef __CUDACC__
92
+ template <typename TS, typename TD>
93
+ __global__ void kernel_Assign (size_t n, TD pod_dst, TS pod_src)
94
+ {
95
+ auto dst = *pod_dst;
96
+ auto src = *pod_src;
97
+
98
+ int tid = blockIdx.x*blockDim.x+threadIdx.x;
99
+ for (int i = tid; i < n; i += blockDim.x*gridDim.x)
100
+ if (i < 5)
101
+ dst(i) = src.S();
102
+ else
103
+ dst(i) = src.A()(i);
104
+ }
105
+ #endif
106
+
107
+ template <typename T>
108
+ class AsPOD
109
+ {
110
+ std::array<char, sizeof(T)> data;
111
+ public:
112
+ AsPOD(const AsPOD&) = default;
113
+ INLINE AsPOD (const T & adata)
114
+ {
115
+ char * pdata = (char*)(void*)&adata;
116
+ for (int i = 0; i < sizeof(T); i++)
117
+ data[i] = pdata[i];
118
+ }
119
+
120
+ INLINE const T & operator* () const
121
+ {
122
+ T * val = (T*)(void*)&data[0];
123
+ return *val;
124
+ }
125
+ INLINE int operator[] (int i) const { return data[i]; }
126
+ };
127
+ #endif
128
+
129
+
130
+
131
+ template <typename TOP, typename T, typename TS, typename TDIST, typename TB>
132
+ class assign_trait<TOP, VectorView<T,TS,TDIST>, TB,
133
+ enable_if_t < std::is_same<std::invoke_result_t<VectorView<T,TS,TDIST>,size_t>, Dev<double>&>::value, int>>
134
+ {
135
+ public:
136
+ static INLINE VectorView<T,TS,TDIST> & Assign (MatExpr<VectorView<T,TS,TDIST>> & self, const Expr<TB> & v)
137
+ {
138
+
139
+ #ifdef __CUDACC__
140
+
141
+ ngs_cuda::DeviceParallelFor
142
+ (self.Height(),
143
+ [devself=self.Spec(), devv=v.Spec()] DEVICE_LAMBDA (auto tid) -> void
144
+ {
145
+ // devself(tid) = devv(tid);
146
+ TOP()(devself(tid),devv(tid));
147
+ });
148
+
149
+ #endif
150
+
151
+ return self.Spec();
152
+ }
153
+ };
154
+
155
+
156
+
157
+
158
+
159
+ template <typename T>
160
+ class Matrix<Dev<T>> : public FlatMatrix<Dev<T>>
161
+ {
162
+ using FlatMatrix<Dev<T>>::h;
163
+ using FlatMatrix<Dev<T>>::w;
164
+ using FlatMatrix<Dev<T>>::data;
165
+
166
+ public:
167
+ Matrix (Matrix&) = delete;
168
+ Matrix (Matrix&&m2)
169
+ : FlatMatrix<Dev<T>>(m2.Height(), m2.Width(), m2.Data())
170
+ {
171
+ m2.data = nullptr;
172
+ }
173
+
174
+ Matrix (size_t h_, size_t w_)
175
+ : FlatMatrix<Dev<T>>(h_, w_, Dev<T>::Malloc(h_*w_)) { ; }
176
+
177
+ Matrix (FlatMatrix<T> mat)
178
+ : FlatMatrix<Dev<T>>(mat.Height(), mat.Width(),
179
+ Dev<T>::Malloc(mat.Height()*mat.Width()))
180
+ {
181
+ H2D(mat);
182
+ }
183
+
184
+ ~Matrix()
185
+ {
186
+ Dev<T>::Free(data);
187
+ }
188
+
189
+ void D2H (FlatMatrix<T> mat) const
190
+ {
191
+ cudaMemcpy (mat.Data(), data, sizeof(T)*h*w, cudaMemcpyDeviceToHost);
192
+ }
193
+
194
+ void H2D (FlatMatrix<T> mat)
195
+ {
196
+ cudaMemcpy (data, mat.Data(), sizeof(T)*h*w, cudaMemcpyHostToDevice);
197
+ }
198
+
199
+ Matrix<T> D2H() const
200
+ {
201
+ Matrix<T> mh(h, w);
202
+ D2H (mh);
203
+ return mh;
204
+ }
205
+ };
206
+
207
+ inline Matrix<double> D2H (SliceMatrix<Dev<double>> dmat)
208
+ {
209
+ Matrix<double> hmat(dmat.Height(), dmat.Width());
210
+ for (size_t i = 0; i < hmat.Height(); i++)
211
+ cudaMemcpy (&hmat(i,0), &dmat(i,0), sizeof(double)*hmat.Width(), cudaMemcpyDeviceToHost);
212
+ return hmat;
213
+ }
214
+
215
+ inline Matrix<double,ColMajor> D2H (SliceMatrix<Dev<double>,ColMajor> dmat)
216
+ {
217
+ return Trans(D2H(Trans(dmat)));
218
+ }
219
+
220
+
221
+
222
+
223
+ template <ORDERING ORDA, ORDERING ORDB>
224
+ void CudaMultMatMat2 (SliceMatrix<Dev<double>, ORDA> a, SliceMatrix<Dev<double>,ORDB> b,
225
+ SliceMatrix<Dev<double>, ORDERING::ColMajor> c,
226
+ double alpha, double beta)
227
+ {
228
+ static Timer t("cublasDgemm");
229
+ CudaRegionTimer rt(t);
230
+ cublasSetStream(ngla::Get_CuBlas_Handle(), ngs_cuda_stream);
231
+ cublasStatus_t stat =
232
+ cublasDgemm(ngla::Get_CuBlas_Handle(),
233
+ ORDA==ORDERING::RowMajor ? CUBLAS_OP_T : CUBLAS_OP_N,
234
+ ORDB==ORDERING::RowMajor ? CUBLAS_OP_T : CUBLAS_OP_N,
235
+ c.Height(), c.Width(), a.Width(),
236
+ &alpha, (double*)a.Data(), a.Dist(), (double*)b.Data(), b.Dist(),
237
+ &beta, (double*)c.Data(), c.Dist());
238
+ }
239
+
240
+ template <ORDERING ORDA, ORDERING ORDB>
241
+ void CudaMultMatMat2 (SliceMatrix<Dev<double>, ORDA> a, SliceMatrix<Dev<double>,ORDB> b,
242
+ SliceMatrix<Dev<double>, ORDERING::RowMajor> c,
243
+ double alpha, double beta)
244
+ {
245
+ CudaMultMatMat2 (Trans(b), Trans(a), Trans(c), alpha, beta);
246
+ }
247
+
248
+
249
+ template <typename TA, typename TB, typename TC,
250
+ enable_if_t<IsConvertibleToSliceMatrix<TA,Dev<double>>(),int> = 0,
251
+ enable_if_t<IsConvertibleToSliceMatrix<TB,Dev<double>>(),int> = 0,
252
+ enable_if_t<IsConvertibleToSliceMatrix<TC,Dev<double>>(),int> = 0>
253
+ void MultMatMat (const TA & a, const TB & b, const TC & c, double alpha=1, double beta=0)
254
+ {
255
+ CudaMultMatMat2(make_SliceMatrix(a), make_SliceMatrix(b), make_SliceMatrix(c), alpha, beta);
256
+ }
257
+
258
+
259
+ template <typename TOP, typename T, typename TB1, typename TB2>
260
+ class assign_trait<TOP, T, MultExpr<TB1,TB2>,
261
+ enable_if_t<IsConvertibleToSliceMatrix<T,Dev<double>>(),int>>
262
+ {
263
+ public:
264
+ static INLINE T & Assign (MatExpr<T> & self, const Expr<MultExpr<TB1,TB2>> & v)
265
+ {
266
+ auto res = self.View();
267
+
268
+ double alpha = std::is_same_v<TOP,typename MatExpr<T>::AsSub> ? -1 : 1;
269
+ double beta = std::is_same_v<TOP,typename MatExpr<T>::As> ? 0 : 1;
270
+
271
+ MultMatMat (v.Spec().A(), v.Spec().B(), self.Spec(), alpha, beta);
272
+ return self.Spec();
273
+ }
274
+ };
275
+
276
+ template <typename TOP, typename T, typename TB1, typename TB2>
277
+ class assign_trait<TOP, T, ScaleExpr<MultExpr<TB1,TB2>,double>,
278
+ enable_if_t<IsConvertibleToSliceMatrix<T,Dev<double>>(),int>>
279
+ {
280
+ public:
281
+ static inline T & Assign (MatExpr<T> & self, const Expr<ScaleExpr<MultExpr<TB1,TB2>,double>> & v)
282
+ {
283
+ auto res = self.View();
284
+
285
+ double alpha = is_same_v<TOP,typename MatExpr<T>::AsSub> ? -1 : 1;
286
+ double beta = is_same_v<TOP,typename MatExpr<T>::As> ? 0 : 1;
287
+
288
+ alpha *= v.View().S();
289
+
290
+ MultMatMat (v.View().A().A(), v.View().A().B(), self.ViewRW(), alpha, beta);
291
+ return self.Spec();
292
+ }
293
+ };
294
+
295
+ template <typename TOP, typename T, typename TB1, typename TB2>
296
+ class assign_trait<TOP, T, MultExpr<ScaleExpr<TB1,double>,TB2>,
297
+ enable_if_t<IsConvertibleToSliceMatrix<T,Dev<double>>(),int>>
298
+ {
299
+ public:
300
+ static inline T & Assign (MatExpr<T> & self, const Expr<MultExpr<ScaleExpr<TB1,double>,TB2>> & v)
301
+ {
302
+ auto res = self.View();
303
+
304
+ double alpha = is_same_v<TOP,typename MatExpr<T>::AsSub> ? -1 : 1;
305
+ double beta = is_same_v<TOP,typename MatExpr<T>::As> ? 0 : 1;
306
+
307
+ alpha *= v.View().A().S();
308
+
309
+ MultMatMat (v.View().A().A(), v.View().B(), self.ViewRW(), alpha, beta);
310
+ return self.Spec();
311
+ }
312
+ };
313
+
314
+
315
+ }
316
+
317
+ #endif
@@ -0,0 +1,414 @@
1
+ #ifndef CUDA_NGSTD_HPP
2
+ #define CUDA_NGSTD_HPP
3
+
4
+ #include <cuda_runtime.h>
5
+ #include <ngstd.hpp>
6
+
7
+ #include "cuda_core.hpp"
8
+ #include "cuda_profiler.hpp"
9
+
10
+ namespace ngs_cuda
11
+ {
12
+ using namespace ngstd;
13
+
14
+
15
+
16
+ extern int gpu_clock;
17
+ void InitCUDA (int verbose = 2);
18
+
19
+ }
20
+
21
+ namespace ngcore {
22
+ template <typename T>
23
+ struct IsSafe<ngs_cuda::Dev<T>> {
24
+ constexpr operator bool() const { return true; }
25
+ };
26
+ }
27
+
28
+
29
+ namespace ngs_cuda
30
+ {
31
+
32
+ // TODO: Resize + error checking
33
+ class DevStackMemory
34
+ {
35
+ char * data;
36
+ char * stackptr;
37
+ public:
38
+ DevStackMemory (size_t s = 512*1024*1025)
39
+ {
40
+ cudaMalloc (&data, s);
41
+ stackptr = data;
42
+ }
43
+
44
+ ~DevStackMemory ()
45
+ {
46
+ cudaFree (data);
47
+ }
48
+
49
+ template <typename T>
50
+ T * Alloc (size_t s)
51
+ {
52
+ char * tmp = stackptr;
53
+ s *= sizeof(T);
54
+ s = (s+255) & size_t(-256);
55
+ stackptr += s;
56
+ return reinterpret_cast<T*>(tmp);
57
+ }
58
+
59
+ void Free (void * ptr)
60
+ {
61
+ stackptr = reinterpret_cast<char*> (ptr);
62
+ }
63
+ };
64
+
65
+ extern DevStackMemory stackmemory;
66
+
67
+ template <typename T>
68
+ class DevStackArray : public FlatArray<Dev<T>>
69
+ {
70
+ public:
71
+ DevStackArray (size_t s)
72
+ : FlatArray<Dev<T>> (s, (Dev<T>*)stackmemory.Alloc<T>(s))
73
+ { ; }
74
+ ~DevStackArray ()
75
+ {
76
+ stackmemory.Free(this->data);
77
+ }
78
+ T * DevData () const { return (T*)this->data; }
79
+ };
80
+ }
81
+
82
+ namespace std {
83
+ template <typename T>
84
+ struct is_integral<ngs_cuda::Dev<T>> {
85
+ static constexpr bool value = is_integral<T>::value;
86
+ };
87
+ }
88
+
89
+
90
+
91
+ namespace ngcore
92
+ {
93
+ using ngs_cuda::Dev;
94
+ template <typename T>
95
+ class Array<Dev<T>> : public FlatArray<Dev<T>>
96
+ {
97
+ public:
98
+ Array() = default;
99
+ Array (size_t s)
100
+ : FlatArray<Dev<T>>(s, Dev<T>::Malloc(s)) { } ;
101
+ Array (FlatArray<T> a2)
102
+ : Array(a2.Size())
103
+ {
104
+ this->data->H2D(a2);
105
+ }
106
+
107
+ Array& operator= (Array<Dev<T>> && a2)
108
+ {
109
+ Swap (this->data, a2.data);
110
+ Swap (this->size, a2.size);
111
+ return *this;
112
+ }
113
+
114
+ Array& operator= (FlatArray<T> a2)
115
+ {
116
+ SetSize(a2.Size());
117
+ this->data->H2D(a2);
118
+ return *this;
119
+ }
120
+
121
+ void SetSize(size_t s)
122
+ {
123
+ if (this->Size() != s)
124
+ {
125
+ Dev<T>::Free(this->data);
126
+ this->data = Dev<T>::Malloc(s);
127
+ this->size = s;
128
+ }
129
+ }
130
+
131
+ ~Array()
132
+ {
133
+ Dev<T>::Free(this->data);
134
+ }
135
+ };
136
+ }
137
+
138
+
139
+ namespace ngs_cuda
140
+ {
141
+ // use Array<Dev<T>> instead
142
+ template <typename T>
143
+ class [[deprecated]] DevArray
144
+ {
145
+ int size;
146
+ T * dev_data;
147
+
148
+ public:
149
+ DevArray (int asize)
150
+ {
151
+ size = asize;
152
+ cudaMalloc((T**)&dev_data, size*sizeof(T));
153
+ }
154
+
155
+ DevArray (FlatArray<T> a2)
156
+ {
157
+ size = a2.Size();
158
+ cudaMalloc((T**)&dev_data, size*sizeof(T));
159
+ cudaMemcpy (dev_data, &a2[0], sizeof(T)*size, cudaMemcpyHostToDevice);
160
+ }
161
+
162
+ ~DevArray ()
163
+ {
164
+ cudaFree (dev_data);
165
+ }
166
+
167
+ T * DevPtr() { return dev_data; }
168
+
169
+ DevArray & operator= (FlatArray<T> a2)
170
+ {
171
+ cudaMemcpy (dev_data, &a2[0], sizeof(T)*size, cudaMemcpyHostToDevice);
172
+ return *this;
173
+ }
174
+
175
+ void D2H (FlatArray<T> a2) const
176
+ {
177
+ cudaMemcpy (&a2[0], dev_data, sizeof(T)*size, cudaMemcpyDeviceToHost);
178
+ }
179
+
180
+ INLINE int Size() const { return size; }
181
+
182
+ /*
183
+ INLINE operator FlatArray<T> ()
184
+ {
185
+ return FlatArray<T> (size, dev_data);
186
+ }
187
+ */
188
+ INLINE FlatArray<T> Dev() const
189
+ {
190
+ return FlatArray<T> (size, dev_data);
191
+ }
192
+
193
+ explicit INLINE operator Array<T> () const
194
+ {
195
+ Array<T> temp(size);
196
+ #ifdef __CUDA_ARCH__
197
+ temp = FlatArray<T> (*this);
198
+ #else
199
+ D2H (temp);
200
+ #endif
201
+ return temp;
202
+ }
203
+
204
+ INLINE Array<T> Host() const
205
+ {
206
+ return Array<T> (*this);
207
+ }
208
+
209
+ T * DevData() const { return dev_data; }
210
+ };
211
+
212
+
213
+
214
+ template <typename T>
215
+ inline Array<T> D2H (FlatArray<Dev<T>> deva)
216
+ {
217
+ Array<T> hosta(deva.Size());
218
+ cudaMemcpy (hosta.Data(), deva.Data(), sizeof(T)*hosta.Size(), cudaMemcpyDeviceToHost);
219
+ return hosta;
220
+ }
221
+
222
+ template <typename T>
223
+ inline void H2D (FlatArray<Dev<T>> deva, FlatArray<T> hosta)
224
+ {
225
+ cudaMemcpy (deva.Data(), hosta.Data(), sizeof(T)*hosta.Size(), cudaMemcpyHostToDevice);
226
+ }
227
+
228
+ /*
229
+ template <class T>
230
+ class TableWrapper : public Table<T>
231
+ {
232
+ using Table<T>::size;
233
+ using Table<T>::data;
234
+ using Table<T>::index;
235
+ public:
236
+ INLINE TableWrapper (int asize, int * aindex, T * adata)
237
+ // : Table<T> (0,0)
238
+ {
239
+ size = asize;
240
+ index = aindex;
241
+ data = adata;
242
+ }
243
+
244
+ INLINE TableWrapper (const Table<T> & tab)
245
+ // : Table<T> (0,0)
246
+ {
247
+ const TableWrapper<T> & htab = static_cast<const TableWrapper<T>&> (tab);
248
+ size = htab.size;
249
+ data = htab.data;
250
+ index = htab.index;
251
+ }
252
+ INLINE ~TableWrapper ()
253
+ {
254
+ data = NULL;
255
+ index = NULL;
256
+ }
257
+
258
+ INLINE int SizeData() { return index[size]; }
259
+ INLINE int* & Index() { return index; }
260
+ INLINE T* & Data() { return data; }
261
+
262
+ // HD const int * & Index() const { return index; }
263
+ // HD const T * & Data() const { return data; }
264
+ };
265
+ */
266
+
267
+
268
+
269
+ // only data at device, but index at host
270
+ template <typename T>
271
+ class DevDataTable
272
+ {
273
+ int size;
274
+ size_t * index = nullptr;
275
+ Dev<T> * dev_data = nullptr;
276
+
277
+ public:
278
+
279
+ DevDataTable (FlatTable<T> t2)
280
+ {
281
+ size = t2.Size();
282
+ if (size == 0) return;
283
+
284
+ index = new size_t[size+1];
285
+ for (int i = 0; i <= size; i++)
286
+ index[i] = t2.IndexArray()[i];
287
+
288
+ int sizedata = t2.AsArray().Size();
289
+ dev_data = Dev<T>::Malloc(sizedata);
290
+ cudaMemcpy (dev_data, t2.Data(), sizeof(T)*sizedata, cudaMemcpyHostToDevice);
291
+ }
292
+
293
+ ~DevDataTable ()
294
+ {
295
+ Dev<T>::Free (dev_data);
296
+ delete [] index;
297
+ }
298
+
299
+ void D2H (FlatTable<T> & t2) const
300
+ {
301
+ int sizedata = t2.AsArray().Size();
302
+ cudaMemcpy (&t2[0][0], dev_data, sizeof(T)*sizedata, cudaMemcpyDeviceToHost);
303
+ }
304
+
305
+ operator FlatTable<Dev<T>> () const
306
+ {
307
+ return FlatTable<Dev<T>> (size, index, dev_data);
308
+ }
309
+
310
+ auto Index() const { return index; }
311
+ auto DevData() const { return dev_data; }
312
+
313
+ FlatArray<Dev<T>> Row(int i) const { return { index[i+1]-index[i], dev_data+index[i] }; }
314
+
315
+ class Iterator
316
+ {
317
+ const DevDataTable & tab;
318
+ size_t row;
319
+ public:
320
+ Iterator (const DevDataTable & _tab, size_t _row) : tab(_tab), row(_row) { ; }
321
+ Iterator & operator++ () { ++row; return *this; }
322
+ auto operator* () const { return tab.Row(row); }
323
+ bool operator!= (const Iterator & it2) { return row != it2.row; }
324
+ };
325
+
326
+ Iterator begin() const { return Iterator(*this, 0); }
327
+ Iterator end() const { return Iterator(*this, size); }
328
+ };
329
+
330
+
331
+ template <typename T>
332
+ class DevTable
333
+ {
334
+ int size;
335
+ Dev<size_t> * dev_index = nullptr;
336
+ Dev<T> * dev_data = nullptr;
337
+
338
+ public:
339
+
340
+ DevTable (FlatTable<T> t2)
341
+ {
342
+ size = t2.Size();
343
+ if (size == 0) return;
344
+
345
+ cudaMalloc((size_t**)&dev_index, (size+1)*sizeof(size_t));
346
+ cudaMemcpy (dev_index, &t2.IndexArray()[0], sizeof(size_t)*(size+1), cudaMemcpyHostToDevice);
347
+ // cout << "res = " << cudaMemcpy (dev_index, t2.Index(), sizeof(int)*(size+1), cudaMemcpyHostToDevice) << endl;
348
+
349
+ int sizedata = t2.AsArray().Size();
350
+ cudaMalloc((int**)&dev_data, sizedata*sizeof(T));
351
+ cudaMemcpy (dev_data, t2.Data(), sizeof(T)*sizedata, cudaMemcpyHostToDevice);
352
+ }
353
+
354
+ ~DevTable ()
355
+ {
356
+ cudaFree (dev_data);
357
+ cudaFree (dev_index);
358
+ }
359
+
360
+ void D2H (FlatTable<T> & t2) const
361
+ {
362
+ int sizedata = t2.AsArray().Size();
363
+ cudaMemcpy (&t2[0][0], dev_data, sizeof(T)*sizedata, cudaMemcpyDeviceToHost);
364
+ }
365
+
366
+ operator FlatTable<T> () const
367
+ {
368
+ return FlatTable<T> (size, (size_t*)dev_index, (T*)dev_data);
369
+ }
370
+
371
+ size_t * DevIndex() const { return (size_t*)dev_index; }
372
+ T * DevData() const { return (T*)dev_data; }
373
+
374
+ FlatArray<Dev<T>> AsArray() const
375
+ {
376
+ return FlatArray<Dev<T>> ( dev_index[size].D2H(), dev_data );
377
+ }
378
+ };
379
+
380
+
381
+
382
+
383
+
384
+
385
+ class DevBitArray
386
+ {
387
+ protected:
388
+ size_t size = 0;
389
+ unsigned char * dev_data = nullptr;
390
+
391
+ public:
392
+ DevBitArray (size_t asize);
393
+ DevBitArray (const ngcore::BitArray & ba);
394
+
395
+ ~DevBitArray ();
396
+
397
+ DevBitArray & operator= (const ngcore::BitArray &ba);
398
+
399
+ size_t Size () const { return size; }
400
+ auto Data () const { return dev_data; }
401
+
402
+ void SetSize (size_t asize);
403
+
404
+ private:
405
+ size_t Addr (size_t i) const
406
+ {
407
+ return (i / CHAR_BIT);
408
+ }
409
+ };
410
+
411
+ }
412
+
413
+
414
+ #endif