ngsolve 6.2.2506.post216.dev0__cp314-cp314-manylinux_2_28_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ngsolve-6.2.2506.post216.dev0.data/data/bin/ngs_nvcc +22 -0
- ngsolve-6.2.2506.post216.dev0.data/data/bin/ngs_nvlink +17 -0
- ngsolve-6.2.2506.post216.dev0.data/data/bin/ngscxx +15 -0
- ngsolve-6.2.2506.post216.dev0.data/data/bin/ngsld +11 -0
- ngsolve-6.2.2506.post216.dev0.data/data/bin/ngsolve.tcl +648 -0
- ngsolve-6.2.2506.post216.dev0.data/data/bin/ngspy +2 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/analytic_integrals.hpp +10 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/arnoldi.hpp +55 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/bandmatrix.hpp +334 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/basematrix.hpp +963 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/basevector.hpp +1268 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/bdbequations.hpp +2807 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/bdbintegrator.hpp +1660 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/bem_diffops.hpp +475 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/bessel.hpp +1064 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/bilinearform.hpp +966 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/bla.hpp +29 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/blockalloc.hpp +95 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/blockjacobi.hpp +328 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/bspline.hpp +116 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/calcinverse.hpp +141 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/cg.hpp +368 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/chebyshev.hpp +44 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/cholesky.hpp +720 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/clapack.h +7254 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/code_generation.hpp +296 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/coefficient.hpp +2033 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/coefficient_impl.hpp +19 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/coefficient_stdmath.hpp +167 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/commutingAMG.hpp +106 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/comp.hpp +79 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/compatibility.hpp +41 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/complex_wrapper.hpp +101 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/compressedfespace.hpp +110 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/contact.hpp +239 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/cuda_core.hpp +216 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/cuda_linalg.hpp +185 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/cuda_ngbla.hpp +317 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/cuda_ngstd.hpp +414 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/cuda_profiler.hpp +240 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/diagonalmatrix.hpp +160 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/differentialoperator.hpp +276 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/diffop.hpp +1286 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/diffop_impl.hpp +328 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/diffopwithfactor.hpp +123 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/discontinuous.hpp +84 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/dump.hpp +949 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/ectypes.hpp +121 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/eigen.hpp +60 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/eigensystem.hpp +18 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/elasticity_equations.hpp +595 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/elementbyelement.hpp +201 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/elementtopology.hpp +1760 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/elementtransformation.hpp +339 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/evalfunc.hpp +405 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/expr.hpp +1693 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/facetfe.hpp +175 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/facetfespace.hpp +180 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/facethofe.hpp +111 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/facetsurffespace.hpp +112 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/fe_interfaces.hpp +32 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/fem.hpp +87 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/fesconvert.hpp +14 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/fespace.hpp +1454 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/finiteelement.hpp +286 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/globalinterfacespace.hpp +77 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/globalspace.hpp +115 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/gridfunction.hpp +525 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/h1amg.hpp +124 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/h1hofe.hpp +188 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/h1hofe_impl.hpp +1262 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/h1hofefo.hpp +148 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/h1hofefo_impl.hpp +185 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/h1hofespace.hpp +167 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/h1lofe.hpp +1240 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/h1lumping.hpp +41 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/hcurl_equations.hpp +1381 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/hcurlcurlfe.hpp +2241 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/hcurlcurlfespace.hpp +78 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/hcurlfe.hpp +259 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/hcurlfe_utils.hpp +107 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/hcurlhdiv_dshape.hpp +857 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/hcurlhdivfes.hpp +308 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/hcurlhofe.hpp +175 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/hcurlhofe_impl.hpp +1871 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/hcurlhofespace.hpp +193 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/hcurllofe.hpp +1146 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/hdiv_equations.hpp +880 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/hdivdivfe.hpp +2923 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/hdivdivsurfacespace.hpp +76 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/hdivfe.hpp +206 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/hdivfe_utils.hpp +717 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/hdivfes.hpp +75 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/hdivhofe.hpp +447 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/hdivhofe_impl.hpp +1107 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/hdivhofefo.hpp +229 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/hdivhofespace.hpp +177 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/hdivhosurfacefespace.hpp +106 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/hdivlofe.hpp +773 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/hidden.hpp +74 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/householder.hpp +181 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/hypre_ams_precond.hpp +123 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/hypre_precond.hpp +73 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/integrator.hpp +2012 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/integratorcf.hpp +253 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/interpolate.hpp +49 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/intrule.hpp +2542 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/intrules_SauterSchwab.hpp +25 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/irspace.hpp +49 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/jacobi.hpp +153 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/kernels.hpp +724 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/l2hofe.hpp +194 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/l2hofe_impl.hpp +564 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/l2hofefo.hpp +542 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/l2hofespace.hpp +344 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/la.hpp +38 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/linalg_kernels.hpp +70 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/linearform.hpp +266 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/matrix.hpp +2145 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/memusage.hpp +41 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/meshaccess.hpp +1359 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/mgpre.hpp +204 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/mp_coefficient.hpp +145 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/mptools.hpp +2281 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/multigrid.hpp +42 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/multivector.hpp +447 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/mumpsinverse.hpp +187 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/mycomplex.hpp +361 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/ng_lapack.hpp +1661 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/ngblas.hpp +1232 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/ngs_defines.hpp +30 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/ngs_stdcpp_include.hpp +106 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/ngs_utils.hpp +121 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/ngsobject.hpp +1019 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/ngsstream.hpp +113 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/ngstd.hpp +72 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/nodalhofe.hpp +96 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/nodalhofe_impl.hpp +141 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/normalfacetfe.hpp +223 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/normalfacetfespace.hpp +98 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/normalfacetsurfacefespace.hpp +84 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/order.hpp +251 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/parallel_matrices.hpp +222 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/paralleldofs.hpp +340 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/parallelngs.hpp +23 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/parallelvector.hpp +269 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/pardisoinverse.hpp +200 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/periodic.hpp +129 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/plateaufespace.hpp +25 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/pml.hpp +275 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/pmltrafo.hpp +631 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/postproc.hpp +142 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/potentialtools.hpp +22 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/precomp.hpp +60 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/preconditioner.hpp +602 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/prolongation.hpp +380 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/python_comp.hpp +107 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/python_fem.hpp +89 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/python_linalg.hpp +58 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/python_ngstd.hpp +386 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/recursive_pol.hpp +4896 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/recursive_pol_tet.hpp +395 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/recursive_pol_trig.hpp +492 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/reorderedfespace.hpp +81 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/sample_sort.hpp +105 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/scalarfe.hpp +335 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/shapefunction_utils.hpp +113 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/simd_complex.hpp +329 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/smoother.hpp +253 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/solve.hpp +89 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/sparsecholesky.hpp +317 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/sparsefactorization_interface.hpp +159 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/sparsematrix.hpp +1052 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/sparsematrix_dyn.hpp +90 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/sparsematrix_impl.hpp +1055 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/special_matrix.hpp +463 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/specialelement.hpp +125 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/statushandler.hpp +33 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/stringops.hpp +12 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/superluinverse.hpp +136 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/symbolicintegrator.hpp +850 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/symmetricmatrix.hpp +144 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/tangentialfacetfe.hpp +224 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/tangentialfacetfespace.hpp +91 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/tensor.hpp +522 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/tensorcoefficient.hpp +446 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/tensorproductintegrator.hpp +113 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/thcurlfe.hpp +128 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/thcurlfe_impl.hpp +380 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/thdivfe.hpp +80 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/thdivfe_impl.hpp +492 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/tpdiffop.hpp +461 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/tpfes.hpp +133 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/tpintrule.hpp +224 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/triangular.hpp +465 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/tscalarfe.hpp +245 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/tscalarfe_impl.hpp +1029 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/umfpackinverse.hpp +148 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/unifiedvector.hpp +103 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/vector.hpp +1452 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/voxelcoefficientfunction.hpp +41 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/vtkoutput.hpp +198 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/vvector.hpp +208 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/webgui.hpp +92 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/cmake/ngsolve/NGSolveConfig.cmake +102 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/cmake/ngsolve/ngsolve-targets-release.cmake +89 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/cmake/ngsolve/ngsolve-targets.cmake +180 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/netgen_mesher.libs/libngbla.so +0 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/netgen_mesher.libs/libngcomp.so +0 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/netgen_mesher.libs/libngfem.so +0 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/netgen_mesher.libs/libngla.so +0 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/netgen_mesher.libs/libngsbem.so +0 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/netgen_mesher.libs/libngscudalib.so +0 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/netgen_mesher.libs/libngsolve.so +0 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/netgen_mesher.libs/libngstd.so +0 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/TensorProductTools.py +210 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/__console.py +94 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/__expr.py +181 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/__init__.py +148 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/_scikit_build_core_dependencies.py +30 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/bvp.py +78 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/config/__init__.py +1 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/config/__main__.py +4 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/config/config.py +60 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/TensorProduct/__init__.py +0 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/TensorProduct/tp_dg_1d_1d.py +80 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/TensorProduct/tp_dg_1d_2d.py +73 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/TensorProduct/tp_dg_2d_1d.py +72 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/TensorProduct/tp_dg_2d_2d.py +66 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/__init__.py +0 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/howto/__init__.py +0 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/howto/hhj.py +44 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/howto/hybrid_dg.py +53 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/howto/mixed.py +30 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/howto/nonlin.py +29 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/howto/pickling.py +26 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/howto/pml.py +31 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/howto/taskmanager.py +20 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/howto/tdnns.py +47 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/howto/timeDG-skeleton.py +45 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/howto/timeDG.py +38 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/howto/timeDGlap.py +42 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/howto/timeDGwave.py +61 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/intro/__init__.py +0 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/intro/adaptive.py +123 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/intro/cmagnet.py +59 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/intro/elasticity.py +76 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/intro/navierstokes.py +74 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/intro/poisson.ipynb +170 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/intro/poisson.py +41 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/mpi/__init__.py +0 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/mpi/mpi_cmagnet.py +87 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/mpi/mpi_navierstokes.py +117 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/mpi/mpi_poisson.py +89 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/mpi/mpi_timeDG.py +82 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/directsolvers.py +14 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/eigenvalues.py +364 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/internal.py +89 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/krylovspace.py +1182 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/meshes.py +748 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/ngs2petsc.py +310 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/ngscuda.so +0 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/ngscxx.py +42 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/ngslib.so +0 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/nonlinearsolvers.py +203 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/preconditioners.py +11 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/solve_implementation.py +168 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/solvers/__init__.py +7 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/solvers/cudss.py +112 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/timestepping.py +185 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/timing.py +108 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/utils.py +167 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/webgui.py +671 -0
- ngsolve-6.2.2506.post216.dev0.data/data/share/ngsolve/beam.geo +17 -0
- ngsolve-6.2.2506.post216.dev0.data/data/share/ngsolve/beam.vol +240 -0
- ngsolve-6.2.2506.post216.dev0.data/data/share/ngsolve/chip.in2d +41 -0
- ngsolve-6.2.2506.post216.dev0.data/data/share/ngsolve/chip.vol +614 -0
- ngsolve-6.2.2506.post216.dev0.data/data/share/ngsolve/coil.geo +12 -0
- ngsolve-6.2.2506.post216.dev0.data/data/share/ngsolve/coil.vol +2560 -0
- ngsolve-6.2.2506.post216.dev0.data/data/share/ngsolve/coilshield.geo +24 -0
- ngsolve-6.2.2506.post216.dev0.data/data/share/ngsolve/coilshield.vol +3179 -0
- ngsolve-6.2.2506.post216.dev0.data/data/share/ngsolve/cube.geo +19 -0
- ngsolve-6.2.2506.post216.dev0.data/data/share/ngsolve/cube.vol +1832 -0
- ngsolve-6.2.2506.post216.dev0.data/data/share/ngsolve/d10_DGdoubleglazing.pde +50 -0
- ngsolve-6.2.2506.post216.dev0.data/data/share/ngsolve/d11_chip_nitsche.pde +40 -0
- ngsolve-6.2.2506.post216.dev0.data/data/share/ngsolve/d1_square.pde +43 -0
- ngsolve-6.2.2506.post216.dev0.data/data/share/ngsolve/d2_chip.pde +35 -0
- ngsolve-6.2.2506.post216.dev0.data/data/share/ngsolve/d3_helmholtz.pde +22 -0
- ngsolve-6.2.2506.post216.dev0.data/data/share/ngsolve/d4_cube.pde +46 -0
- ngsolve-6.2.2506.post216.dev0.data/data/share/ngsolve/d5_beam.pde +74 -0
- ngsolve-6.2.2506.post216.dev0.data/data/share/ngsolve/d6_shaft.pde +73 -0
- ngsolve-6.2.2506.post216.dev0.data/data/share/ngsolve/d7_coil.pde +50 -0
- ngsolve-6.2.2506.post216.dev0.data/data/share/ngsolve/d8_coilshield.pde +49 -0
- ngsolve-6.2.2506.post216.dev0.data/data/share/ngsolve/d9_hybridDG.pde +72 -0
- ngsolve-6.2.2506.post216.dev0.data/data/share/ngsolve/doubleglazing.in2d +27 -0
- ngsolve-6.2.2506.post216.dev0.data/data/share/ngsolve/doubleglazing.vol +737 -0
- ngsolve-6.2.2506.post216.dev0.data/data/share/ngsolve/piezo2d40round4.vol.gz +0 -0
- ngsolve-6.2.2506.post216.dev0.data/data/share/ngsolve/shaft.geo +73 -0
- ngsolve-6.2.2506.post216.dev0.data/data/share/ngsolve/shaft.vol +4291 -0
- ngsolve-6.2.2506.post216.dev0.data/data/share/ngsolve/square.in2d +17 -0
- ngsolve-6.2.2506.post216.dev0.data/data/share/ngsolve/square.vol +149 -0
- ngsolve-6.2.2506.post216.dev0.dist-info/METADATA +14 -0
- ngsolve-6.2.2506.post216.dev0.dist-info/RECORD +306 -0
- ngsolve-6.2.2506.post216.dev0.dist-info/WHEEL +5 -0
- ngsolve-6.2.2506.post216.dev0.dist-info/licenses/LICENSE +504 -0
- ngsolve-6.2.2506.post216.dev0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,317 @@
|
|
|
1
|
+
#ifndef CUDA_NGBLA
|
|
2
|
+
#define CUDA_NGBLA
|
|
3
|
+
|
|
4
|
+
#include <cuda_runtime.h>
|
|
5
|
+
#include <cublas_v2.h>
|
|
6
|
+
|
|
7
|
+
#include <vector.hpp>
|
|
8
|
+
#include <matrix.hpp>
|
|
9
|
+
|
|
10
|
+
#include "cuda_ngstd.hpp"
|
|
11
|
+
#include "linalg_kernels.hpp"
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
namespace ngla
|
|
15
|
+
{
|
|
16
|
+
cublasHandle_t Get_CuBlas_Handle ();
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
namespace ngbla
|
|
21
|
+
{
|
|
22
|
+
using namespace ngs_cuda;
|
|
23
|
+
|
|
24
|
+
// template<> struct trivtrans<Dev<double>> { static constexpr bool value = true; };
|
|
25
|
+
template<> struct is_scalar_type<Dev<double>> { static constexpr bool value = true; };
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
template <typename T>
|
|
29
|
+
class Vector<Dev<T>> : public FlatVector<Dev<T>>
|
|
30
|
+
{
|
|
31
|
+
using FlatVector<Dev<T>>::Size;
|
|
32
|
+
using FlatVector<Dev<T>>::Data;
|
|
33
|
+
|
|
34
|
+
public:
|
|
35
|
+
Vector (Vector&) = delete;
|
|
36
|
+
Vector (Vector&&v2)
|
|
37
|
+
: FlatVector<Dev<T>>(v2.Size(), v2.Data())
|
|
38
|
+
{
|
|
39
|
+
v2.layout = { nullptr, 0 };
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
Vector (size_t asize)
|
|
43
|
+
: FlatVector<Dev<T>>(asize, Dev<T>::Malloc(asize)) { ; }
|
|
44
|
+
|
|
45
|
+
Vector (FlatVector<T> vec)
|
|
46
|
+
: FlatVector<Dev<T>>(vec.Size(), Dev<T>::Malloc(vec.Size()))
|
|
47
|
+
{
|
|
48
|
+
H2D(vec);
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
~Vector()
|
|
52
|
+
{
|
|
53
|
+
Dev<T>::Free(Data());
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
template<typename TB>
|
|
58
|
+
Vector & operator= (const Expr<TB> & v)
|
|
59
|
+
{
|
|
60
|
+
MatExpr<FlatVector<Dev<T>> >::operator= (v);
|
|
61
|
+
return *this;
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
void D2H (FlatVector<T> vec) const
|
|
66
|
+
{
|
|
67
|
+
cudaMemcpy (vec.Data(), Data(), sizeof(T)*Size(), cudaMemcpyDeviceToHost);
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
void H2D (FlatVector<T> vec)
|
|
71
|
+
{
|
|
72
|
+
cudaMemcpy (Data(), vec.Data(), sizeof(T)*Size(), cudaMemcpyHostToDevice);
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
Vector<T> D2H() const
|
|
76
|
+
{
|
|
77
|
+
Vector<T> vh(Size());
|
|
78
|
+
D2H (vh);
|
|
79
|
+
return vh;
|
|
80
|
+
}
|
|
81
|
+
};
|
|
82
|
+
|
|
83
|
+
inline Vector<double> D2H (FlatVector<Dev<double>> dvec)
|
|
84
|
+
{
|
|
85
|
+
Vector<double> hvec(dvec.Size());
|
|
86
|
+
cudaMemcpy (hvec.Data(), dvec.Data(), sizeof(double)*hvec.Size(), cudaMemcpyDeviceToHost);
|
|
87
|
+
return hvec;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
#ifdef OLDOLD
|
|
91
|
+
#ifdef __CUDACC__
|
|
92
|
+
template <typename TS, typename TD>
|
|
93
|
+
__global__ void kernel_Assign (size_t n, TD pod_dst, TS pod_src)
|
|
94
|
+
{
|
|
95
|
+
auto dst = *pod_dst;
|
|
96
|
+
auto src = *pod_src;
|
|
97
|
+
|
|
98
|
+
int tid = blockIdx.x*blockDim.x+threadIdx.x;
|
|
99
|
+
for (int i = tid; i < n; i += blockDim.x*gridDim.x)
|
|
100
|
+
if (i < 5)
|
|
101
|
+
dst(i) = src.S();
|
|
102
|
+
else
|
|
103
|
+
dst(i) = src.A()(i);
|
|
104
|
+
}
|
|
105
|
+
#endif
|
|
106
|
+
|
|
107
|
+
template <typename T>
|
|
108
|
+
class AsPOD
|
|
109
|
+
{
|
|
110
|
+
std::array<char, sizeof(T)> data;
|
|
111
|
+
public:
|
|
112
|
+
AsPOD(const AsPOD&) = default;
|
|
113
|
+
INLINE AsPOD (const T & adata)
|
|
114
|
+
{
|
|
115
|
+
char * pdata = (char*)(void*)&adata;
|
|
116
|
+
for (int i = 0; i < sizeof(T); i++)
|
|
117
|
+
data[i] = pdata[i];
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
INLINE const T & operator* () const
|
|
121
|
+
{
|
|
122
|
+
T * val = (T*)(void*)&data[0];
|
|
123
|
+
return *val;
|
|
124
|
+
}
|
|
125
|
+
INLINE int operator[] (int i) const { return data[i]; }
|
|
126
|
+
};
|
|
127
|
+
#endif
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
template <typename TOP, typename T, typename TS, typename TDIST, typename TB>
|
|
132
|
+
class assign_trait<TOP, VectorView<T,TS,TDIST>, TB,
|
|
133
|
+
enable_if_t < std::is_same<std::invoke_result_t<VectorView<T,TS,TDIST>,size_t>, Dev<double>&>::value, int>>
|
|
134
|
+
{
|
|
135
|
+
public:
|
|
136
|
+
static INLINE VectorView<T,TS,TDIST> & Assign (MatExpr<VectorView<T,TS,TDIST>> & self, const Expr<TB> & v)
|
|
137
|
+
{
|
|
138
|
+
|
|
139
|
+
#ifdef __CUDACC__
|
|
140
|
+
|
|
141
|
+
ngs_cuda::DeviceParallelFor
|
|
142
|
+
(self.Height(),
|
|
143
|
+
[devself=self.Spec(), devv=v.Spec()] DEVICE_LAMBDA (auto tid) -> void
|
|
144
|
+
{
|
|
145
|
+
// devself(tid) = devv(tid);
|
|
146
|
+
TOP()(devself(tid),devv(tid));
|
|
147
|
+
});
|
|
148
|
+
|
|
149
|
+
#endif
|
|
150
|
+
|
|
151
|
+
return self.Spec();
|
|
152
|
+
}
|
|
153
|
+
};
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
template <typename T>
|
|
160
|
+
class Matrix<Dev<T>> : public FlatMatrix<Dev<T>>
|
|
161
|
+
{
|
|
162
|
+
using FlatMatrix<Dev<T>>::h;
|
|
163
|
+
using FlatMatrix<Dev<T>>::w;
|
|
164
|
+
using FlatMatrix<Dev<T>>::data;
|
|
165
|
+
|
|
166
|
+
public:
|
|
167
|
+
Matrix (Matrix&) = delete;
|
|
168
|
+
Matrix (Matrix&&m2)
|
|
169
|
+
: FlatMatrix<Dev<T>>(m2.Height(), m2.Width(), m2.Data())
|
|
170
|
+
{
|
|
171
|
+
m2.data = nullptr;
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
Matrix (size_t h_, size_t w_)
|
|
175
|
+
: FlatMatrix<Dev<T>>(h_, w_, Dev<T>::Malloc(h_*w_)) { ; }
|
|
176
|
+
|
|
177
|
+
Matrix (FlatMatrix<T> mat)
|
|
178
|
+
: FlatMatrix<Dev<T>>(mat.Height(), mat.Width(),
|
|
179
|
+
Dev<T>::Malloc(mat.Height()*mat.Width()))
|
|
180
|
+
{
|
|
181
|
+
H2D(mat);
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
~Matrix()
|
|
185
|
+
{
|
|
186
|
+
Dev<T>::Free(data);
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
void D2H (FlatMatrix<T> mat) const
|
|
190
|
+
{
|
|
191
|
+
cudaMemcpy (mat.Data(), data, sizeof(T)*h*w, cudaMemcpyDeviceToHost);
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
void H2D (FlatMatrix<T> mat)
|
|
195
|
+
{
|
|
196
|
+
cudaMemcpy (data, mat.Data(), sizeof(T)*h*w, cudaMemcpyHostToDevice);
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
Matrix<T> D2H() const
|
|
200
|
+
{
|
|
201
|
+
Matrix<T> mh(h, w);
|
|
202
|
+
D2H (mh);
|
|
203
|
+
return mh;
|
|
204
|
+
}
|
|
205
|
+
};
|
|
206
|
+
|
|
207
|
+
inline Matrix<double> D2H (SliceMatrix<Dev<double>> dmat)
|
|
208
|
+
{
|
|
209
|
+
Matrix<double> hmat(dmat.Height(), dmat.Width());
|
|
210
|
+
for (size_t i = 0; i < hmat.Height(); i++)
|
|
211
|
+
cudaMemcpy (&hmat(i,0), &dmat(i,0), sizeof(double)*hmat.Width(), cudaMemcpyDeviceToHost);
|
|
212
|
+
return hmat;
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
inline Matrix<double,ColMajor> D2H (SliceMatrix<Dev<double>,ColMajor> dmat)
|
|
216
|
+
{
|
|
217
|
+
return Trans(D2H(Trans(dmat)));
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
template <ORDERING ORDA, ORDERING ORDB>
|
|
224
|
+
void CudaMultMatMat2 (SliceMatrix<Dev<double>, ORDA> a, SliceMatrix<Dev<double>,ORDB> b,
|
|
225
|
+
SliceMatrix<Dev<double>, ORDERING::ColMajor> c,
|
|
226
|
+
double alpha, double beta)
|
|
227
|
+
{
|
|
228
|
+
static Timer t("cublasDgemm");
|
|
229
|
+
CudaRegionTimer rt(t);
|
|
230
|
+
cublasSetStream(ngla::Get_CuBlas_Handle(), ngs_cuda_stream);
|
|
231
|
+
cublasStatus_t stat =
|
|
232
|
+
cublasDgemm(ngla::Get_CuBlas_Handle(),
|
|
233
|
+
ORDA==ORDERING::RowMajor ? CUBLAS_OP_T : CUBLAS_OP_N,
|
|
234
|
+
ORDB==ORDERING::RowMajor ? CUBLAS_OP_T : CUBLAS_OP_N,
|
|
235
|
+
c.Height(), c.Width(), a.Width(),
|
|
236
|
+
&alpha, (double*)a.Data(), a.Dist(), (double*)b.Data(), b.Dist(),
|
|
237
|
+
&beta, (double*)c.Data(), c.Dist());
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
template <ORDERING ORDA, ORDERING ORDB>
|
|
241
|
+
void CudaMultMatMat2 (SliceMatrix<Dev<double>, ORDA> a, SliceMatrix<Dev<double>,ORDB> b,
|
|
242
|
+
SliceMatrix<Dev<double>, ORDERING::RowMajor> c,
|
|
243
|
+
double alpha, double beta)
|
|
244
|
+
{
|
|
245
|
+
CudaMultMatMat2 (Trans(b), Trans(a), Trans(c), alpha, beta);
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
|
|
249
|
+
template <typename TA, typename TB, typename TC,
|
|
250
|
+
enable_if_t<IsConvertibleToSliceMatrix<TA,Dev<double>>(),int> = 0,
|
|
251
|
+
enable_if_t<IsConvertibleToSliceMatrix<TB,Dev<double>>(),int> = 0,
|
|
252
|
+
enable_if_t<IsConvertibleToSliceMatrix<TC,Dev<double>>(),int> = 0>
|
|
253
|
+
void MultMatMat (const TA & a, const TB & b, const TC & c, double alpha=1, double beta=0)
|
|
254
|
+
{
|
|
255
|
+
CudaMultMatMat2(make_SliceMatrix(a), make_SliceMatrix(b), make_SliceMatrix(c), alpha, beta);
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
|
|
259
|
+
template <typename TOP, typename T, typename TB1, typename TB2>
|
|
260
|
+
class assign_trait<TOP, T, MultExpr<TB1,TB2>,
|
|
261
|
+
enable_if_t<IsConvertibleToSliceMatrix<T,Dev<double>>(),int>>
|
|
262
|
+
{
|
|
263
|
+
public:
|
|
264
|
+
static INLINE T & Assign (MatExpr<T> & self, const Expr<MultExpr<TB1,TB2>> & v)
|
|
265
|
+
{
|
|
266
|
+
auto res = self.View();
|
|
267
|
+
|
|
268
|
+
double alpha = std::is_same_v<TOP,typename MatExpr<T>::AsSub> ? -1 : 1;
|
|
269
|
+
double beta = std::is_same_v<TOP,typename MatExpr<T>::As> ? 0 : 1;
|
|
270
|
+
|
|
271
|
+
MultMatMat (v.Spec().A(), v.Spec().B(), self.Spec(), alpha, beta);
|
|
272
|
+
return self.Spec();
|
|
273
|
+
}
|
|
274
|
+
};
|
|
275
|
+
|
|
276
|
+
template <typename TOP, typename T, typename TB1, typename TB2>
|
|
277
|
+
class assign_trait<TOP, T, ScaleExpr<MultExpr<TB1,TB2>,double>,
|
|
278
|
+
enable_if_t<IsConvertibleToSliceMatrix<T,Dev<double>>(),int>>
|
|
279
|
+
{
|
|
280
|
+
public:
|
|
281
|
+
static inline T & Assign (MatExpr<T> & self, const Expr<ScaleExpr<MultExpr<TB1,TB2>,double>> & v)
|
|
282
|
+
{
|
|
283
|
+
auto res = self.View();
|
|
284
|
+
|
|
285
|
+
double alpha = is_same_v<TOP,typename MatExpr<T>::AsSub> ? -1 : 1;
|
|
286
|
+
double beta = is_same_v<TOP,typename MatExpr<T>::As> ? 0 : 1;
|
|
287
|
+
|
|
288
|
+
alpha *= v.View().S();
|
|
289
|
+
|
|
290
|
+
MultMatMat (v.View().A().A(), v.View().A().B(), self.ViewRW(), alpha, beta);
|
|
291
|
+
return self.Spec();
|
|
292
|
+
}
|
|
293
|
+
};
|
|
294
|
+
|
|
295
|
+
template <typename TOP, typename T, typename TB1, typename TB2>
|
|
296
|
+
class assign_trait<TOP, T, MultExpr<ScaleExpr<TB1,double>,TB2>,
|
|
297
|
+
enable_if_t<IsConvertibleToSliceMatrix<T,Dev<double>>(),int>>
|
|
298
|
+
{
|
|
299
|
+
public:
|
|
300
|
+
static inline T & Assign (MatExpr<T> & self, const Expr<MultExpr<ScaleExpr<TB1,double>,TB2>> & v)
|
|
301
|
+
{
|
|
302
|
+
auto res = self.View();
|
|
303
|
+
|
|
304
|
+
double alpha = is_same_v<TOP,typename MatExpr<T>::AsSub> ? -1 : 1;
|
|
305
|
+
double beta = is_same_v<TOP,typename MatExpr<T>::As> ? 0 : 1;
|
|
306
|
+
|
|
307
|
+
alpha *= v.View().A().S();
|
|
308
|
+
|
|
309
|
+
MultMatMat (v.View().A().A(), v.View().B(), self.ViewRW(), alpha, beta);
|
|
310
|
+
return self.Spec();
|
|
311
|
+
}
|
|
312
|
+
};
|
|
313
|
+
|
|
314
|
+
|
|
315
|
+
}
|
|
316
|
+
|
|
317
|
+
#endif
|
|
@@ -0,0 +1,414 @@
|
|
|
1
|
+
#ifndef CUDA_NGSTD_HPP
|
|
2
|
+
#define CUDA_NGSTD_HPP
|
|
3
|
+
|
|
4
|
+
#include <cuda_runtime.h>
|
|
5
|
+
#include <ngstd.hpp>
|
|
6
|
+
|
|
7
|
+
#include "cuda_core.hpp"
|
|
8
|
+
#include "cuda_profiler.hpp"
|
|
9
|
+
|
|
10
|
+
namespace ngs_cuda
|
|
11
|
+
{
|
|
12
|
+
using namespace ngstd;
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
extern int gpu_clock;
|
|
17
|
+
void InitCUDA (int verbose = 2);
|
|
18
|
+
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
namespace ngcore {
|
|
22
|
+
template <typename T>
|
|
23
|
+
struct IsSafe<ngs_cuda::Dev<T>> {
|
|
24
|
+
constexpr operator bool() const { return true; }
|
|
25
|
+
};
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
namespace ngs_cuda
|
|
30
|
+
{
|
|
31
|
+
|
|
32
|
+
// TODO: Resize + error checking
|
|
33
|
+
class DevStackMemory
|
|
34
|
+
{
|
|
35
|
+
char * data;
|
|
36
|
+
char * stackptr;
|
|
37
|
+
public:
|
|
38
|
+
DevStackMemory (size_t s = 512*1024*1025)
|
|
39
|
+
{
|
|
40
|
+
cudaMalloc (&data, s);
|
|
41
|
+
stackptr = data;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
~DevStackMemory ()
|
|
45
|
+
{
|
|
46
|
+
cudaFree (data);
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
template <typename T>
|
|
50
|
+
T * Alloc (size_t s)
|
|
51
|
+
{
|
|
52
|
+
char * tmp = stackptr;
|
|
53
|
+
s *= sizeof(T);
|
|
54
|
+
s = (s+255) & size_t(-256);
|
|
55
|
+
stackptr += s;
|
|
56
|
+
return reinterpret_cast<T*>(tmp);
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
void Free (void * ptr)
|
|
60
|
+
{
|
|
61
|
+
stackptr = reinterpret_cast<char*> (ptr);
|
|
62
|
+
}
|
|
63
|
+
};
|
|
64
|
+
|
|
65
|
+
extern DevStackMemory stackmemory;
|
|
66
|
+
|
|
67
|
+
template <typename T>
|
|
68
|
+
class DevStackArray : public FlatArray<Dev<T>>
|
|
69
|
+
{
|
|
70
|
+
public:
|
|
71
|
+
DevStackArray (size_t s)
|
|
72
|
+
: FlatArray<Dev<T>> (s, (Dev<T>*)stackmemory.Alloc<T>(s))
|
|
73
|
+
{ ; }
|
|
74
|
+
~DevStackArray ()
|
|
75
|
+
{
|
|
76
|
+
stackmemory.Free(this->data);
|
|
77
|
+
}
|
|
78
|
+
T * DevData () const { return (T*)this->data; }
|
|
79
|
+
};
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
namespace std {
|
|
83
|
+
template <typename T>
|
|
84
|
+
struct is_integral<ngs_cuda::Dev<T>> {
|
|
85
|
+
static constexpr bool value = is_integral<T>::value;
|
|
86
|
+
};
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
namespace ngcore
|
|
92
|
+
{
|
|
93
|
+
using ngs_cuda::Dev;
|
|
94
|
+
template <typename T>
|
|
95
|
+
class Array<Dev<T>> : public FlatArray<Dev<T>>
|
|
96
|
+
{
|
|
97
|
+
public:
|
|
98
|
+
Array() = default;
|
|
99
|
+
Array (size_t s)
|
|
100
|
+
: FlatArray<Dev<T>>(s, Dev<T>::Malloc(s)) { } ;
|
|
101
|
+
Array (FlatArray<T> a2)
|
|
102
|
+
: Array(a2.Size())
|
|
103
|
+
{
|
|
104
|
+
this->data->H2D(a2);
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
Array& operator= (Array<Dev<T>> && a2)
|
|
108
|
+
{
|
|
109
|
+
Swap (this->data, a2.data);
|
|
110
|
+
Swap (this->size, a2.size);
|
|
111
|
+
return *this;
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
Array& operator= (FlatArray<T> a2)
|
|
115
|
+
{
|
|
116
|
+
SetSize(a2.Size());
|
|
117
|
+
this->data->H2D(a2);
|
|
118
|
+
return *this;
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
void SetSize(size_t s)
|
|
122
|
+
{
|
|
123
|
+
if (this->Size() != s)
|
|
124
|
+
{
|
|
125
|
+
Dev<T>::Free(this->data);
|
|
126
|
+
this->data = Dev<T>::Malloc(s);
|
|
127
|
+
this->size = s;
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
~Array()
|
|
132
|
+
{
|
|
133
|
+
Dev<T>::Free(this->data);
|
|
134
|
+
}
|
|
135
|
+
};
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
namespace ngs_cuda
|
|
140
|
+
{
|
|
141
|
+
// use Array<Dev<T>> instead
|
|
142
|
+
template <typename T>
|
|
143
|
+
class [[deprecated]] DevArray
|
|
144
|
+
{
|
|
145
|
+
int size;
|
|
146
|
+
T * dev_data;
|
|
147
|
+
|
|
148
|
+
public:
|
|
149
|
+
DevArray (int asize)
|
|
150
|
+
{
|
|
151
|
+
size = asize;
|
|
152
|
+
cudaMalloc((T**)&dev_data, size*sizeof(T));
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
DevArray (FlatArray<T> a2)
|
|
156
|
+
{
|
|
157
|
+
size = a2.Size();
|
|
158
|
+
cudaMalloc((T**)&dev_data, size*sizeof(T));
|
|
159
|
+
cudaMemcpy (dev_data, &a2[0], sizeof(T)*size, cudaMemcpyHostToDevice);
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
~DevArray ()
|
|
163
|
+
{
|
|
164
|
+
cudaFree (dev_data);
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
T * DevPtr() { return dev_data; }
|
|
168
|
+
|
|
169
|
+
DevArray & operator= (FlatArray<T> a2)
|
|
170
|
+
{
|
|
171
|
+
cudaMemcpy (dev_data, &a2[0], sizeof(T)*size, cudaMemcpyHostToDevice);
|
|
172
|
+
return *this;
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
void D2H (FlatArray<T> a2) const
|
|
176
|
+
{
|
|
177
|
+
cudaMemcpy (&a2[0], dev_data, sizeof(T)*size, cudaMemcpyDeviceToHost);
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
INLINE int Size() const { return size; }
|
|
181
|
+
|
|
182
|
+
/*
|
|
183
|
+
INLINE operator FlatArray<T> ()
|
|
184
|
+
{
|
|
185
|
+
return FlatArray<T> (size, dev_data);
|
|
186
|
+
}
|
|
187
|
+
*/
|
|
188
|
+
INLINE FlatArray<T> Dev() const
|
|
189
|
+
{
|
|
190
|
+
return FlatArray<T> (size, dev_data);
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
explicit INLINE operator Array<T> () const
|
|
194
|
+
{
|
|
195
|
+
Array<T> temp(size);
|
|
196
|
+
#ifdef __CUDA_ARCH__
|
|
197
|
+
temp = FlatArray<T> (*this);
|
|
198
|
+
#else
|
|
199
|
+
D2H (temp);
|
|
200
|
+
#endif
|
|
201
|
+
return temp;
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
INLINE Array<T> Host() const
|
|
205
|
+
{
|
|
206
|
+
return Array<T> (*this);
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
T * DevData() const { return dev_data; }
|
|
210
|
+
};
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
template <typename T>
|
|
215
|
+
inline Array<T> D2H (FlatArray<Dev<T>> deva)
|
|
216
|
+
{
|
|
217
|
+
Array<T> hosta(deva.Size());
|
|
218
|
+
cudaMemcpy (hosta.Data(), deva.Data(), sizeof(T)*hosta.Size(), cudaMemcpyDeviceToHost);
|
|
219
|
+
return hosta;
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
template <typename T>
|
|
223
|
+
inline void H2D (FlatArray<Dev<T>> deva, FlatArray<T> hosta)
|
|
224
|
+
{
|
|
225
|
+
cudaMemcpy (deva.Data(), hosta.Data(), sizeof(T)*hosta.Size(), cudaMemcpyHostToDevice);
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
/*
|
|
229
|
+
template <class T>
|
|
230
|
+
class TableWrapper : public Table<T>
|
|
231
|
+
{
|
|
232
|
+
using Table<T>::size;
|
|
233
|
+
using Table<T>::data;
|
|
234
|
+
using Table<T>::index;
|
|
235
|
+
public:
|
|
236
|
+
INLINE TableWrapper (int asize, int * aindex, T * adata)
|
|
237
|
+
// : Table<T> (0,0)
|
|
238
|
+
{
|
|
239
|
+
size = asize;
|
|
240
|
+
index = aindex;
|
|
241
|
+
data = adata;
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
INLINE TableWrapper (const Table<T> & tab)
|
|
245
|
+
// : Table<T> (0,0)
|
|
246
|
+
{
|
|
247
|
+
const TableWrapper<T> & htab = static_cast<const TableWrapper<T>&> (tab);
|
|
248
|
+
size = htab.size;
|
|
249
|
+
data = htab.data;
|
|
250
|
+
index = htab.index;
|
|
251
|
+
}
|
|
252
|
+
INLINE ~TableWrapper ()
|
|
253
|
+
{
|
|
254
|
+
data = NULL;
|
|
255
|
+
index = NULL;
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
INLINE int SizeData() { return index[size]; }
|
|
259
|
+
INLINE int* & Index() { return index; }
|
|
260
|
+
INLINE T* & Data() { return data; }
|
|
261
|
+
|
|
262
|
+
// HD const int * & Index() const { return index; }
|
|
263
|
+
// HD const T * & Data() const { return data; }
|
|
264
|
+
};
|
|
265
|
+
*/
|
|
266
|
+
|
|
267
|
+
|
|
268
|
+
|
|
269
|
+
// only data at device, but index at host
|
|
270
|
+
template <typename T>
|
|
271
|
+
class DevDataTable
|
|
272
|
+
{
|
|
273
|
+
int size;
|
|
274
|
+
size_t * index = nullptr;
|
|
275
|
+
Dev<T> * dev_data = nullptr;
|
|
276
|
+
|
|
277
|
+
public:
|
|
278
|
+
|
|
279
|
+
DevDataTable (FlatTable<T> t2)
|
|
280
|
+
{
|
|
281
|
+
size = t2.Size();
|
|
282
|
+
if (size == 0) return;
|
|
283
|
+
|
|
284
|
+
index = new size_t[size+1];
|
|
285
|
+
for (int i = 0; i <= size; i++)
|
|
286
|
+
index[i] = t2.IndexArray()[i];
|
|
287
|
+
|
|
288
|
+
int sizedata = t2.AsArray().Size();
|
|
289
|
+
dev_data = Dev<T>::Malloc(sizedata);
|
|
290
|
+
cudaMemcpy (dev_data, t2.Data(), sizeof(T)*sizedata, cudaMemcpyHostToDevice);
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
~DevDataTable ()
|
|
294
|
+
{
|
|
295
|
+
Dev<T>::Free (dev_data);
|
|
296
|
+
delete [] index;
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
void D2H (FlatTable<T> & t2) const
|
|
300
|
+
{
|
|
301
|
+
int sizedata = t2.AsArray().Size();
|
|
302
|
+
cudaMemcpy (&t2[0][0], dev_data, sizeof(T)*sizedata, cudaMemcpyDeviceToHost);
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
operator FlatTable<Dev<T>> () const
|
|
306
|
+
{
|
|
307
|
+
return FlatTable<Dev<T>> (size, index, dev_data);
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
auto Index() const { return index; }
|
|
311
|
+
auto DevData() const { return dev_data; }
|
|
312
|
+
|
|
313
|
+
FlatArray<Dev<T>> Row(int i) const { return { index[i+1]-index[i], dev_data+index[i] }; }
|
|
314
|
+
|
|
315
|
+
class Iterator
|
|
316
|
+
{
|
|
317
|
+
const DevDataTable & tab;
|
|
318
|
+
size_t row;
|
|
319
|
+
public:
|
|
320
|
+
Iterator (const DevDataTable & _tab, size_t _row) : tab(_tab), row(_row) { ; }
|
|
321
|
+
Iterator & operator++ () { ++row; return *this; }
|
|
322
|
+
auto operator* () const { return tab.Row(row); }
|
|
323
|
+
bool operator!= (const Iterator & it2) { return row != it2.row; }
|
|
324
|
+
};
|
|
325
|
+
|
|
326
|
+
Iterator begin() const { return Iterator(*this, 0); }
|
|
327
|
+
Iterator end() const { return Iterator(*this, size); }
|
|
328
|
+
};
|
|
329
|
+
|
|
330
|
+
|
|
331
|
+
template <typename T>
|
|
332
|
+
class DevTable
|
|
333
|
+
{
|
|
334
|
+
int size;
|
|
335
|
+
Dev<size_t> * dev_index = nullptr;
|
|
336
|
+
Dev<T> * dev_data = nullptr;
|
|
337
|
+
|
|
338
|
+
public:
|
|
339
|
+
|
|
340
|
+
DevTable (FlatTable<T> t2)
|
|
341
|
+
{
|
|
342
|
+
size = t2.Size();
|
|
343
|
+
if (size == 0) return;
|
|
344
|
+
|
|
345
|
+
cudaMalloc((size_t**)&dev_index, (size+1)*sizeof(size_t));
|
|
346
|
+
cudaMemcpy (dev_index, &t2.IndexArray()[0], sizeof(size_t)*(size+1), cudaMemcpyHostToDevice);
|
|
347
|
+
// cout << "res = " << cudaMemcpy (dev_index, t2.Index(), sizeof(int)*(size+1), cudaMemcpyHostToDevice) << endl;
|
|
348
|
+
|
|
349
|
+
int sizedata = t2.AsArray().Size();
|
|
350
|
+
cudaMalloc((int**)&dev_data, sizedata*sizeof(T));
|
|
351
|
+
cudaMemcpy (dev_data, t2.Data(), sizeof(T)*sizedata, cudaMemcpyHostToDevice);
|
|
352
|
+
}
|
|
353
|
+
|
|
354
|
+
~DevTable ()
|
|
355
|
+
{
|
|
356
|
+
cudaFree (dev_data);
|
|
357
|
+
cudaFree (dev_index);
|
|
358
|
+
}
|
|
359
|
+
|
|
360
|
+
void D2H (FlatTable<T> & t2) const
|
|
361
|
+
{
|
|
362
|
+
int sizedata = t2.AsArray().Size();
|
|
363
|
+
cudaMemcpy (&t2[0][0], dev_data, sizeof(T)*sizedata, cudaMemcpyDeviceToHost);
|
|
364
|
+
}
|
|
365
|
+
|
|
366
|
+
operator FlatTable<T> () const
|
|
367
|
+
{
|
|
368
|
+
return FlatTable<T> (size, (size_t*)dev_index, (T*)dev_data);
|
|
369
|
+
}
|
|
370
|
+
|
|
371
|
+
size_t * DevIndex() const { return (size_t*)dev_index; }
|
|
372
|
+
T * DevData() const { return (T*)dev_data; }
|
|
373
|
+
|
|
374
|
+
FlatArray<Dev<T>> AsArray() const
|
|
375
|
+
{
|
|
376
|
+
return FlatArray<Dev<T>> ( dev_index[size].D2H(), dev_data );
|
|
377
|
+
}
|
|
378
|
+
};
|
|
379
|
+
|
|
380
|
+
|
|
381
|
+
|
|
382
|
+
|
|
383
|
+
|
|
384
|
+
|
|
385
|
+
class DevBitArray
|
|
386
|
+
{
|
|
387
|
+
protected:
|
|
388
|
+
size_t size = 0;
|
|
389
|
+
unsigned char * dev_data = nullptr;
|
|
390
|
+
|
|
391
|
+
public:
|
|
392
|
+
DevBitArray (size_t asize);
|
|
393
|
+
DevBitArray (const ngcore::BitArray & ba);
|
|
394
|
+
|
|
395
|
+
~DevBitArray ();
|
|
396
|
+
|
|
397
|
+
DevBitArray & operator= (const ngcore::BitArray &ba);
|
|
398
|
+
|
|
399
|
+
size_t Size () const { return size; }
|
|
400
|
+
auto Data () const { return dev_data; }
|
|
401
|
+
|
|
402
|
+
void SetSize (size_t asize);
|
|
403
|
+
|
|
404
|
+
private:
|
|
405
|
+
size_t Addr (size_t i) const
|
|
406
|
+
{
|
|
407
|
+
return (i / CHAR_BIT);
|
|
408
|
+
}
|
|
409
|
+
};
|
|
410
|
+
|
|
411
|
+
}
|
|
412
|
+
|
|
413
|
+
|
|
414
|
+
#endif
|