ngsolve 6.2.2506.post216.dev0__cp314-cp314-manylinux_2_28_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ngsolve-6.2.2506.post216.dev0.data/data/bin/ngs_nvcc +22 -0
- ngsolve-6.2.2506.post216.dev0.data/data/bin/ngs_nvlink +17 -0
- ngsolve-6.2.2506.post216.dev0.data/data/bin/ngscxx +15 -0
- ngsolve-6.2.2506.post216.dev0.data/data/bin/ngsld +11 -0
- ngsolve-6.2.2506.post216.dev0.data/data/bin/ngsolve.tcl +648 -0
- ngsolve-6.2.2506.post216.dev0.data/data/bin/ngspy +2 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/analytic_integrals.hpp +10 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/arnoldi.hpp +55 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/bandmatrix.hpp +334 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/basematrix.hpp +963 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/basevector.hpp +1268 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/bdbequations.hpp +2807 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/bdbintegrator.hpp +1660 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/bem_diffops.hpp +475 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/bessel.hpp +1064 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/bilinearform.hpp +966 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/bla.hpp +29 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/blockalloc.hpp +95 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/blockjacobi.hpp +328 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/bspline.hpp +116 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/calcinverse.hpp +141 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/cg.hpp +368 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/chebyshev.hpp +44 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/cholesky.hpp +720 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/clapack.h +7254 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/code_generation.hpp +296 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/coefficient.hpp +2033 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/coefficient_impl.hpp +19 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/coefficient_stdmath.hpp +167 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/commutingAMG.hpp +106 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/comp.hpp +79 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/compatibility.hpp +41 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/complex_wrapper.hpp +101 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/compressedfespace.hpp +110 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/contact.hpp +239 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/cuda_core.hpp +216 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/cuda_linalg.hpp +185 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/cuda_ngbla.hpp +317 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/cuda_ngstd.hpp +414 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/cuda_profiler.hpp +240 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/diagonalmatrix.hpp +160 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/differentialoperator.hpp +276 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/diffop.hpp +1286 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/diffop_impl.hpp +328 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/diffopwithfactor.hpp +123 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/discontinuous.hpp +84 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/dump.hpp +949 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/ectypes.hpp +121 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/eigen.hpp +60 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/eigensystem.hpp +18 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/elasticity_equations.hpp +595 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/elementbyelement.hpp +201 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/elementtopology.hpp +1760 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/elementtransformation.hpp +339 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/evalfunc.hpp +405 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/expr.hpp +1693 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/facetfe.hpp +175 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/facetfespace.hpp +180 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/facethofe.hpp +111 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/facetsurffespace.hpp +112 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/fe_interfaces.hpp +32 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/fem.hpp +87 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/fesconvert.hpp +14 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/fespace.hpp +1454 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/finiteelement.hpp +286 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/globalinterfacespace.hpp +77 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/globalspace.hpp +115 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/gridfunction.hpp +525 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/h1amg.hpp +124 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/h1hofe.hpp +188 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/h1hofe_impl.hpp +1262 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/h1hofefo.hpp +148 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/h1hofefo_impl.hpp +185 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/h1hofespace.hpp +167 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/h1lofe.hpp +1240 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/h1lumping.hpp +41 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/hcurl_equations.hpp +1381 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/hcurlcurlfe.hpp +2241 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/hcurlcurlfespace.hpp +78 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/hcurlfe.hpp +259 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/hcurlfe_utils.hpp +107 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/hcurlhdiv_dshape.hpp +857 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/hcurlhdivfes.hpp +308 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/hcurlhofe.hpp +175 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/hcurlhofe_impl.hpp +1871 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/hcurlhofespace.hpp +193 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/hcurllofe.hpp +1146 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/hdiv_equations.hpp +880 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/hdivdivfe.hpp +2923 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/hdivdivsurfacespace.hpp +76 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/hdivfe.hpp +206 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/hdivfe_utils.hpp +717 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/hdivfes.hpp +75 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/hdivhofe.hpp +447 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/hdivhofe_impl.hpp +1107 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/hdivhofefo.hpp +229 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/hdivhofespace.hpp +177 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/hdivhosurfacefespace.hpp +106 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/hdivlofe.hpp +773 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/hidden.hpp +74 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/householder.hpp +181 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/hypre_ams_precond.hpp +123 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/hypre_precond.hpp +73 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/integrator.hpp +2012 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/integratorcf.hpp +253 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/interpolate.hpp +49 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/intrule.hpp +2542 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/intrules_SauterSchwab.hpp +25 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/irspace.hpp +49 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/jacobi.hpp +153 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/kernels.hpp +724 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/l2hofe.hpp +194 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/l2hofe_impl.hpp +564 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/l2hofefo.hpp +542 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/l2hofespace.hpp +344 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/la.hpp +38 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/linalg_kernels.hpp +70 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/linearform.hpp +266 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/matrix.hpp +2145 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/memusage.hpp +41 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/meshaccess.hpp +1359 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/mgpre.hpp +204 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/mp_coefficient.hpp +145 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/mptools.hpp +2281 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/multigrid.hpp +42 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/multivector.hpp +447 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/mumpsinverse.hpp +187 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/mycomplex.hpp +361 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/ng_lapack.hpp +1661 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/ngblas.hpp +1232 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/ngs_defines.hpp +30 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/ngs_stdcpp_include.hpp +106 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/ngs_utils.hpp +121 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/ngsobject.hpp +1019 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/ngsstream.hpp +113 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/ngstd.hpp +72 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/nodalhofe.hpp +96 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/nodalhofe_impl.hpp +141 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/normalfacetfe.hpp +223 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/normalfacetfespace.hpp +98 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/normalfacetsurfacefespace.hpp +84 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/order.hpp +251 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/parallel_matrices.hpp +222 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/paralleldofs.hpp +340 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/parallelngs.hpp +23 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/parallelvector.hpp +269 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/pardisoinverse.hpp +200 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/periodic.hpp +129 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/plateaufespace.hpp +25 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/pml.hpp +275 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/pmltrafo.hpp +631 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/postproc.hpp +142 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/potentialtools.hpp +22 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/precomp.hpp +60 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/preconditioner.hpp +602 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/prolongation.hpp +380 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/python_comp.hpp +107 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/python_fem.hpp +89 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/python_linalg.hpp +58 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/python_ngstd.hpp +386 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/recursive_pol.hpp +4896 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/recursive_pol_tet.hpp +395 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/recursive_pol_trig.hpp +492 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/reorderedfespace.hpp +81 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/sample_sort.hpp +105 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/scalarfe.hpp +335 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/shapefunction_utils.hpp +113 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/simd_complex.hpp +329 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/smoother.hpp +253 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/solve.hpp +89 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/sparsecholesky.hpp +317 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/sparsefactorization_interface.hpp +159 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/sparsematrix.hpp +1052 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/sparsematrix_dyn.hpp +90 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/sparsematrix_impl.hpp +1055 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/special_matrix.hpp +463 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/specialelement.hpp +125 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/statushandler.hpp +33 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/stringops.hpp +12 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/superluinverse.hpp +136 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/symbolicintegrator.hpp +850 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/symmetricmatrix.hpp +144 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/tangentialfacetfe.hpp +224 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/tangentialfacetfespace.hpp +91 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/tensor.hpp +522 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/tensorcoefficient.hpp +446 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/tensorproductintegrator.hpp +113 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/thcurlfe.hpp +128 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/thcurlfe_impl.hpp +380 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/thdivfe.hpp +80 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/thdivfe_impl.hpp +492 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/tpdiffop.hpp +461 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/tpfes.hpp +133 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/tpintrule.hpp +224 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/triangular.hpp +465 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/tscalarfe.hpp +245 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/tscalarfe_impl.hpp +1029 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/umfpackinverse.hpp +148 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/unifiedvector.hpp +103 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/vector.hpp +1452 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/voxelcoefficientfunction.hpp +41 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/vtkoutput.hpp +198 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/vvector.hpp +208 -0
- ngsolve-6.2.2506.post216.dev0.data/data/include/netgen/webgui.hpp +92 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/cmake/ngsolve/NGSolveConfig.cmake +102 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/cmake/ngsolve/ngsolve-targets-release.cmake +89 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/cmake/ngsolve/ngsolve-targets.cmake +180 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/netgen_mesher.libs/libngbla.so +0 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/netgen_mesher.libs/libngcomp.so +0 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/netgen_mesher.libs/libngfem.so +0 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/netgen_mesher.libs/libngla.so +0 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/netgen_mesher.libs/libngsbem.so +0 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/netgen_mesher.libs/libngscudalib.so +0 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/netgen_mesher.libs/libngsolve.so +0 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/netgen_mesher.libs/libngstd.so +0 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/TensorProductTools.py +210 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/__console.py +94 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/__expr.py +181 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/__init__.py +148 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/_scikit_build_core_dependencies.py +30 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/bvp.py +78 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/config/__init__.py +1 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/config/__main__.py +4 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/config/config.py +60 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/TensorProduct/__init__.py +0 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/TensorProduct/tp_dg_1d_1d.py +80 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/TensorProduct/tp_dg_1d_2d.py +73 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/TensorProduct/tp_dg_2d_1d.py +72 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/TensorProduct/tp_dg_2d_2d.py +66 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/__init__.py +0 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/howto/__init__.py +0 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/howto/hhj.py +44 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/howto/hybrid_dg.py +53 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/howto/mixed.py +30 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/howto/nonlin.py +29 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/howto/pickling.py +26 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/howto/pml.py +31 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/howto/taskmanager.py +20 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/howto/tdnns.py +47 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/howto/timeDG-skeleton.py +45 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/howto/timeDG.py +38 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/howto/timeDGlap.py +42 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/howto/timeDGwave.py +61 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/intro/__init__.py +0 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/intro/adaptive.py +123 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/intro/cmagnet.py +59 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/intro/elasticity.py +76 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/intro/navierstokes.py +74 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/intro/poisson.ipynb +170 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/intro/poisson.py +41 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/mpi/__init__.py +0 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/mpi/mpi_cmagnet.py +87 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/mpi/mpi_navierstokes.py +117 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/mpi/mpi_poisson.py +89 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/mpi/mpi_timeDG.py +82 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/directsolvers.py +14 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/eigenvalues.py +364 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/internal.py +89 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/krylovspace.py +1182 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/meshes.py +748 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/ngs2petsc.py +310 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/ngscuda.so +0 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/ngscxx.py +42 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/ngslib.so +0 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/nonlinearsolvers.py +203 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/preconditioners.py +11 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/solve_implementation.py +168 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/solvers/__init__.py +7 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/solvers/cudss.py +112 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/timestepping.py +185 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/timing.py +108 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/utils.py +167 -0
- ngsolve-6.2.2506.post216.dev0.data/data/lib/python3.14/site-packages/ngsolve/webgui.py +671 -0
- ngsolve-6.2.2506.post216.dev0.data/data/share/ngsolve/beam.geo +17 -0
- ngsolve-6.2.2506.post216.dev0.data/data/share/ngsolve/beam.vol +240 -0
- ngsolve-6.2.2506.post216.dev0.data/data/share/ngsolve/chip.in2d +41 -0
- ngsolve-6.2.2506.post216.dev0.data/data/share/ngsolve/chip.vol +614 -0
- ngsolve-6.2.2506.post216.dev0.data/data/share/ngsolve/coil.geo +12 -0
- ngsolve-6.2.2506.post216.dev0.data/data/share/ngsolve/coil.vol +2560 -0
- ngsolve-6.2.2506.post216.dev0.data/data/share/ngsolve/coilshield.geo +24 -0
- ngsolve-6.2.2506.post216.dev0.data/data/share/ngsolve/coilshield.vol +3179 -0
- ngsolve-6.2.2506.post216.dev0.data/data/share/ngsolve/cube.geo +19 -0
- ngsolve-6.2.2506.post216.dev0.data/data/share/ngsolve/cube.vol +1832 -0
- ngsolve-6.2.2506.post216.dev0.data/data/share/ngsolve/d10_DGdoubleglazing.pde +50 -0
- ngsolve-6.2.2506.post216.dev0.data/data/share/ngsolve/d11_chip_nitsche.pde +40 -0
- ngsolve-6.2.2506.post216.dev0.data/data/share/ngsolve/d1_square.pde +43 -0
- ngsolve-6.2.2506.post216.dev0.data/data/share/ngsolve/d2_chip.pde +35 -0
- ngsolve-6.2.2506.post216.dev0.data/data/share/ngsolve/d3_helmholtz.pde +22 -0
- ngsolve-6.2.2506.post216.dev0.data/data/share/ngsolve/d4_cube.pde +46 -0
- ngsolve-6.2.2506.post216.dev0.data/data/share/ngsolve/d5_beam.pde +74 -0
- ngsolve-6.2.2506.post216.dev0.data/data/share/ngsolve/d6_shaft.pde +73 -0
- ngsolve-6.2.2506.post216.dev0.data/data/share/ngsolve/d7_coil.pde +50 -0
- ngsolve-6.2.2506.post216.dev0.data/data/share/ngsolve/d8_coilshield.pde +49 -0
- ngsolve-6.2.2506.post216.dev0.data/data/share/ngsolve/d9_hybridDG.pde +72 -0
- ngsolve-6.2.2506.post216.dev0.data/data/share/ngsolve/doubleglazing.in2d +27 -0
- ngsolve-6.2.2506.post216.dev0.data/data/share/ngsolve/doubleglazing.vol +737 -0
- ngsolve-6.2.2506.post216.dev0.data/data/share/ngsolve/piezo2d40round4.vol.gz +0 -0
- ngsolve-6.2.2506.post216.dev0.data/data/share/ngsolve/shaft.geo +73 -0
- ngsolve-6.2.2506.post216.dev0.data/data/share/ngsolve/shaft.vol +4291 -0
- ngsolve-6.2.2506.post216.dev0.data/data/share/ngsolve/square.in2d +17 -0
- ngsolve-6.2.2506.post216.dev0.data/data/share/ngsolve/square.vol +149 -0
- ngsolve-6.2.2506.post216.dev0.dist-info/METADATA +14 -0
- ngsolve-6.2.2506.post216.dev0.dist-info/RECORD +306 -0
- ngsolve-6.2.2506.post216.dev0.dist-info/WHEEL +5 -0
- ngsolve-6.2.2506.post216.dev0.dist-info/licenses/LICENSE +504 -0
- ngsolve-6.2.2506.post216.dev0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,2281 @@
|
|
|
1
|
+
#ifndef FILE_MPTOOLS
|
|
2
|
+
#define FILE_MPTOOLS
|
|
3
|
+
|
|
4
|
+
/*
|
|
5
|
+
tools for computing with spherical harmonics and multi-poles
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
#include <bla.hpp>
|
|
10
|
+
#include <coefficient.hpp>
|
|
11
|
+
#include <recursive_pol.hpp>
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
namespace ngsbem
|
|
15
|
+
{
|
|
16
|
+
using namespace ngfem;
|
|
17
|
+
|
|
18
|
+
template<typename T>
|
|
19
|
+
constexpr int VecLength = 1; // Default: Complex has length 1
|
|
20
|
+
|
|
21
|
+
template<int N>
|
|
22
|
+
constexpr int VecLength<Vec<N, Complex>> = N; // Specialization: Vec<N,Complex> has length N
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
constexpr int FMM_SW = 4;
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
// ************************ SIMD - creation (should end up in simd.hpp) *************
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
template <int S, typename T, int SW>
|
|
34
|
+
Vec<S,T> HSum (Vec<S,SIMD<T,SW>> v)
|
|
35
|
+
{
|
|
36
|
+
Vec<S,T> res;
|
|
37
|
+
for (int i = 0; i < S; i++)
|
|
38
|
+
res(i) = HSum(v(i));
|
|
39
|
+
// Iterate<S> ([&](auto i) {
|
|
40
|
+
// res.HTData().template Elem<i.value>() = HSum(v.HTData().template Elem<i.value>());
|
|
41
|
+
// });
|
|
42
|
+
return res;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class NGS_DLL_HEADER PrecomputedSqrts
|
|
47
|
+
{
|
|
48
|
+
public:
|
|
49
|
+
Array<double> sqrt_int;
|
|
50
|
+
// Array<double> inv_sqrt_int;
|
|
51
|
+
Array<double> sqrt_n_np1; // sqrt(n*(n+1))
|
|
52
|
+
Array<double> inv_sqrt_2np1_2np3; // 1/sqrt( (2n+1)*(2n+3) )
|
|
53
|
+
|
|
54
|
+
PrecomputedSqrts();
|
|
55
|
+
};
|
|
56
|
+
|
|
57
|
+
extern NGS_DLL_HEADER PrecomputedSqrts presqrt;
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
class FMM_Parameters
|
|
62
|
+
{
|
|
63
|
+
public:
|
|
64
|
+
int maxdirect = 100;
|
|
65
|
+
int minorder = 20; // order = minorder + 2 kappa r
|
|
66
|
+
};
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
inline std::tuple<double, double, double> SphericalCoordinates(Vec<3> dist){
|
|
72
|
+
double len, theta, phi;
|
|
73
|
+
len = L2Norm(dist);
|
|
74
|
+
if (len < 1e-30)
|
|
75
|
+
theta = 0;
|
|
76
|
+
else
|
|
77
|
+
theta = acos (dist(2) / len);
|
|
78
|
+
if (sqr(dist(0))+sqr(dist(1)) < 1e-30)
|
|
79
|
+
phi = 0;
|
|
80
|
+
else
|
|
81
|
+
phi = atan2(dist(1), dist(0));
|
|
82
|
+
return {len, theta, phi};
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
template <typename entry_type = Complex>
|
|
87
|
+
class NGS_DLL_HEADER SphericalHarmonics
|
|
88
|
+
{
|
|
89
|
+
int order;
|
|
90
|
+
Vector<entry_type> coefs;
|
|
91
|
+
|
|
92
|
+
public:
|
|
93
|
+
SphericalHarmonics (int aorder)
|
|
94
|
+
: order(aorder), coefs(sqr(order+1)) { coefs=0.0; }
|
|
95
|
+
|
|
96
|
+
int Order() const { return order; }
|
|
97
|
+
FlatVector<entry_type> Coefs() const { return coefs; }
|
|
98
|
+
|
|
99
|
+
entry_type & Coef(int n, int m) { return coefs(n*(n+1) + m); }
|
|
100
|
+
entry_type Coef(int n, int m) const { return coefs(n*(n+1) + m); }
|
|
101
|
+
|
|
102
|
+
auto CoefsN (int n) const
|
|
103
|
+
{
|
|
104
|
+
return coefs.RangeN(n*n, 2*n+1);
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
static std::tuple<double,double> Polar (Vec<3> x)
|
|
108
|
+
{
|
|
109
|
+
double phi, theta;
|
|
110
|
+
if (x(0) == 0 && x(1) == 0)
|
|
111
|
+
{
|
|
112
|
+
phi = 0;
|
|
113
|
+
theta = x(2) > 0 ? 0 : M_PI;
|
|
114
|
+
}
|
|
115
|
+
else
|
|
116
|
+
{
|
|
117
|
+
phi = atan2(x(1), x(0));
|
|
118
|
+
theta = acos(x(2)/L2Norm(x));
|
|
119
|
+
}
|
|
120
|
+
return { theta, phi };
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
entry_type Eval (Vec<3> x) const
|
|
124
|
+
{
|
|
125
|
+
auto [theta, phi] = Polar(x);
|
|
126
|
+
return Eval(theta, phi);
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
entry_type Eval (double theta, double phi) const;
|
|
130
|
+
|
|
131
|
+
entry_type EvalOrder (int n, Vec<3> x) const
|
|
132
|
+
{
|
|
133
|
+
auto [theta, phi] = Polar (x);
|
|
134
|
+
return EvalOrder(n, theta, phi);
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
entry_type EvalOrder (int n, double theta, double phi) const;
|
|
138
|
+
|
|
139
|
+
void EvalOrders (Vec<3> x, FlatVector<entry_type> vals) const
|
|
140
|
+
{
|
|
141
|
+
auto [theta, phi] = Polar(x);
|
|
142
|
+
return EvalOrders(theta, phi, vals);
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
void EvalOrders (double theta, double phi, FlatVector<entry_type> vals) const;
|
|
146
|
+
|
|
147
|
+
void Calc (Vec<3> x, FlatVector<Complex> shapes);
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
void FlipZ ();
|
|
151
|
+
void RotateZ (double alpha);
|
|
152
|
+
|
|
153
|
+
template <typename FUNC>
|
|
154
|
+
void RotateZ (double alpha, FUNC func) const
|
|
155
|
+
{
|
|
156
|
+
if (order < 0) return;
|
|
157
|
+
|
|
158
|
+
Vector<Complex> exp_imalpha(order+1);
|
|
159
|
+
Complex exp_ialpha(cos(alpha), sin(alpha));
|
|
160
|
+
Complex prod = 1.0;
|
|
161
|
+
for (int i = 0; i <= order; i++)
|
|
162
|
+
{
|
|
163
|
+
exp_imalpha(i) = prod;
|
|
164
|
+
prod *= exp_ialpha;
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
int ii = 0;
|
|
168
|
+
for (int n = 0; n <= order; n++)
|
|
169
|
+
{
|
|
170
|
+
for (int m = -n; m < 0; m++, ii++)
|
|
171
|
+
func(ii, conj(exp_imalpha(-m)));
|
|
172
|
+
for (int m = 0; m <= n; m++, ii++)
|
|
173
|
+
func(ii, exp_imalpha(m));
|
|
174
|
+
};
|
|
175
|
+
};
|
|
176
|
+
|
|
177
|
+
template <typename FUNC>
|
|
178
|
+
void RotateZFlip (double alpha, bool flip, FUNC func) const
|
|
179
|
+
{
|
|
180
|
+
if (order < 0) return;
|
|
181
|
+
|
|
182
|
+
Vector<Complex> exp_imalpha(order+1);
|
|
183
|
+
Complex exp_ialpha(cos(alpha), sin(alpha));
|
|
184
|
+
Complex prod = 1.0;
|
|
185
|
+
for (int i = 0; i <= order; i++)
|
|
186
|
+
{
|
|
187
|
+
exp_imalpha(i) = prod;
|
|
188
|
+
prod *= exp_ialpha;
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
int ii = 0;
|
|
192
|
+
|
|
193
|
+
auto FlipFactor = [] (int n, int m, bool flip)->double
|
|
194
|
+
{
|
|
195
|
+
if (flip)
|
|
196
|
+
return ((n-m)%2) == 1 ? -1 : 1;
|
|
197
|
+
return 1.0;
|
|
198
|
+
};
|
|
199
|
+
|
|
200
|
+
for (int n = 0; n <= order; n++)
|
|
201
|
+
{
|
|
202
|
+
for (int m = -n; m < 0; m++, ii++)
|
|
203
|
+
func(ii, FlipFactor(n,m,flip)*conj(exp_imalpha(-m)));
|
|
204
|
+
for (int m = 0; m <= n; m++, ii++)
|
|
205
|
+
func(ii, FlipFactor(n,m,flip)*exp_imalpha(m));
|
|
206
|
+
};
|
|
207
|
+
};
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
void RotateY (double alpha, bool parallel = false);
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
static double CalcAmn (int m, int n)
|
|
216
|
+
{
|
|
217
|
+
if (m < 0) m=-m;
|
|
218
|
+
if (n < m) return 0;
|
|
219
|
+
|
|
220
|
+
if (2*n+1 < presqrt.sqrt_int.Size())
|
|
221
|
+
return presqrt.sqrt_int[n+1+m]*presqrt.sqrt_int[n+1-m] * presqrt.inv_sqrt_2np1_2np3[n];
|
|
222
|
+
else
|
|
223
|
+
return sqrt( (n+1.0+m)*(n+1.0-m) / ( (2*n+1)*(2*n+3) ));
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
static double CalcBmn (int m, int n)
|
|
227
|
+
{
|
|
228
|
+
double sgn = (m >= 0) ? 1 : -1;
|
|
229
|
+
if ( (m >= n) || (-m > n) ) return 0;
|
|
230
|
+
if (n <= presqrt.inv_sqrt_2np1_2np3.Size())
|
|
231
|
+
return sgn * presqrt.sqrt_n_np1[n-m-1] * presqrt.inv_sqrt_2np1_2np3[n-1];
|
|
232
|
+
else
|
|
233
|
+
return sgn * sqrt( (n-m-1.0)*(n-m) / ( (2*n-1.0)*(2*n+1)));
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
static double CalcDmn (int m, int n)
|
|
237
|
+
{
|
|
238
|
+
double sgn = (m >= 0) ? 1 : -1;
|
|
239
|
+
return sgn/2 * sqrt((n-m)*(n+m+1));
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
// Nail A. Gumerov and Ramani Duraiswami book, formula (2.2.12)
|
|
243
|
+
// add directional derivative divided by kappa to res, both multipoles need same scaling
|
|
244
|
+
void DirectionalDiffAdd (Vec<3> d, SphericalHarmonics<entry_type> & res, double scale = 1) const;
|
|
245
|
+
|
|
246
|
+
};
|
|
247
|
+
|
|
248
|
+
|
|
249
|
+
// https://fortran-lang.discourse.group/t/looking-for-spherical-bessel-and-hankel-functions-of-first-and-second-kind-and-arbitrary-order/2308/2
|
|
250
|
+
NGS_DLL_HEADER
|
|
251
|
+
void besseljs3d (int nterms, double z, double scale,
|
|
252
|
+
SliceVector<double> fjs, SliceVector<double> fjder = FlatVector<double>(0, nullptr));
|
|
253
|
+
|
|
254
|
+
NGS_DLL_HEADER
|
|
255
|
+
void besseljs3d (int nterms, Complex z, double scale,
|
|
256
|
+
SliceVector<Complex> fjs, SliceVector<Complex> fjder = FlatVector<Complex>(0, nullptr));
|
|
257
|
+
|
|
258
|
+
|
|
259
|
+
/*
|
|
260
|
+
spherical bessel functions of first (the j_n) and second (the y_n) kind.
|
|
261
|
+
|
|
262
|
+
j0(r) = sin(r)/r
|
|
263
|
+
j1(r) = (sin(r)-r cos(r)) / r**2
|
|
264
|
+
|
|
265
|
+
y0(r) = -cos(r)/r
|
|
266
|
+
y1(r) = (-cos(r)-r*sin(r)) / r**2
|
|
267
|
+
*/
|
|
268
|
+
NGS_DLL_HEADER
|
|
269
|
+
void SBESJY (double x, int lmax,
|
|
270
|
+
FlatVector<double> j,
|
|
271
|
+
FlatVector<double> y,
|
|
272
|
+
FlatVector<double> jp,
|
|
273
|
+
FlatVector<double> yp);
|
|
274
|
+
|
|
275
|
+
|
|
276
|
+
|
|
277
|
+
template <typename T>
|
|
278
|
+
void SphericalBessel (int n, double rho, double scale, T && values)
|
|
279
|
+
{
|
|
280
|
+
besseljs3d (n, rho, scale, values);
|
|
281
|
+
/*
|
|
282
|
+
Vector<double> j(n+1), jp(n+1);
|
|
283
|
+
besseljs3d (n, rho, scale, j, jp);
|
|
284
|
+
values = j;
|
|
285
|
+
*/
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
|
|
289
|
+
template <typename T>
|
|
290
|
+
void SphericalHankel1 (int n, double rho, double scale, T && values)
|
|
291
|
+
{
|
|
292
|
+
// Complex imag(0,1);
|
|
293
|
+
/*
|
|
294
|
+
if (n >= 0)
|
|
295
|
+
values(0) = exp(imag*rho) / (imag*rho);
|
|
296
|
+
if (n >= 1)
|
|
297
|
+
values(1) = -imag*values(0)*(1.0-1.0/(imag*rho));
|
|
298
|
+
|
|
299
|
+
for (int i = 2; i <= n; i++)
|
|
300
|
+
values(i) = (2*i-1)/rho * values(i-1) - values(i-2);
|
|
301
|
+
*/
|
|
302
|
+
|
|
303
|
+
if (rho < 1e-100)
|
|
304
|
+
{
|
|
305
|
+
values = Complex(0);
|
|
306
|
+
return;
|
|
307
|
+
}
|
|
308
|
+
Vector j(n+1), y(n+1), jp(n+1), yp(n+1);
|
|
309
|
+
|
|
310
|
+
// the bessel-evaluation with scale
|
|
311
|
+
besseljs3d (n, rho, 1/scale, j, jp);
|
|
312
|
+
|
|
313
|
+
// Bessel y directly with the recurrence formula for (y, yp):
|
|
314
|
+
double x = rho;
|
|
315
|
+
double xinv = 1/x;
|
|
316
|
+
y(0) = -xinv * cos(x);
|
|
317
|
+
yp(0) = j(0)-xinv*y(0);
|
|
318
|
+
|
|
319
|
+
double sl = 0;
|
|
320
|
+
for (int l = 1; l <= n; l++)
|
|
321
|
+
{
|
|
322
|
+
y(l) = scale * (sl*y(l-1) - yp(l-1));
|
|
323
|
+
sl += xinv;
|
|
324
|
+
yp(l) = scale * y(l-1) - (sl+xinv)*y(l);
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
for (int i = 0; i <= n; i++)
|
|
328
|
+
values(i) = Complex (j(i), y(i));
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
|
|
332
|
+
|
|
333
|
+
|
|
334
|
+
|
|
335
|
+
// hn1 = jn+ i*yn
|
|
336
|
+
class Singular
|
|
337
|
+
{
|
|
338
|
+
public:
|
|
339
|
+
template <typename T>
|
|
340
|
+
static void Eval (int order, double r, double scale, T && values)
|
|
341
|
+
{
|
|
342
|
+
SphericalHankel1(order, r, scale, values);
|
|
343
|
+
}
|
|
344
|
+
|
|
345
|
+
template <typename T>
|
|
346
|
+
static void Eval (int order, double kappa, double r, double rtyp, T && values)
|
|
347
|
+
{
|
|
348
|
+
double scale = Scale(kappa, rtyp);
|
|
349
|
+
SphericalHankel1(order, r*kappa, scale, values);
|
|
350
|
+
}
|
|
351
|
+
|
|
352
|
+
static double Scale (double kappa, double rtyp)
|
|
353
|
+
{
|
|
354
|
+
// return min(1.0, rtyp*kappa);
|
|
355
|
+
return min(1.0, 0.5*rtyp*kappa);
|
|
356
|
+
}
|
|
357
|
+
};
|
|
358
|
+
|
|
359
|
+
|
|
360
|
+
|
|
361
|
+
// jn
|
|
362
|
+
class Regular
|
|
363
|
+
{
|
|
364
|
+
public:
|
|
365
|
+
template <typename T>
|
|
366
|
+
static void Eval (int order, double r, double scale, T && values)
|
|
367
|
+
{
|
|
368
|
+
SphericalBessel (order, r, 1.0/scale, values);
|
|
369
|
+
}
|
|
370
|
+
|
|
371
|
+
template <typename T>
|
|
372
|
+
static void Eval (int order, double kappa, double r, double rtyp, T && values)
|
|
373
|
+
{
|
|
374
|
+
double scale = Scale(kappa, rtyp);
|
|
375
|
+
SphericalBessel (order, r*kappa, 1.0/scale, values);
|
|
376
|
+
}
|
|
377
|
+
|
|
378
|
+
static double Scale (double kappa, double rtyp)
|
|
379
|
+
{
|
|
380
|
+
// return 1.0/ min(1.0, 0.25*rtyp*kappa);
|
|
381
|
+
return 1.0/ min(1.0, 0.5*rtyp*kappa);
|
|
382
|
+
}
|
|
383
|
+
|
|
384
|
+
};
|
|
385
|
+
|
|
386
|
+
|
|
387
|
+
|
|
388
|
+
|
|
389
|
+
template <typename RADIAL, typename entry_type=Complex>
|
|
390
|
+
class NGS_DLL_HEADER SphericalExpansion
|
|
391
|
+
{
|
|
392
|
+
SphericalHarmonics<entry_type> sh;
|
|
393
|
+
double kappa;
|
|
394
|
+
double rtyp;
|
|
395
|
+
public:
|
|
396
|
+
|
|
397
|
+
SphericalExpansion (int aorder, double akappa, double artyp)
|
|
398
|
+
: sh(aorder), kappa(akappa), rtyp(artyp) { }
|
|
399
|
+
|
|
400
|
+
|
|
401
|
+
entry_type & Coef(int n, int m) { return sh.Coef(n,m); }
|
|
402
|
+
auto & SH() { return sh; }
|
|
403
|
+
const auto & SH() const { return sh; }
|
|
404
|
+
double Kappa() const { return kappa; }
|
|
405
|
+
double Scale() const { return RADIAL::Scale(kappa, rtyp); }
|
|
406
|
+
double RTyp() const { return rtyp; }
|
|
407
|
+
int Order() const { return sh.Order(); }
|
|
408
|
+
|
|
409
|
+
SphericalExpansion Truncate(int neworder) const
|
|
410
|
+
{
|
|
411
|
+
if (neworder > sh.Order()) neworder=sh.Order();
|
|
412
|
+
SphericalExpansion nmp(neworder, kappa, rtyp);
|
|
413
|
+
nmp.sh.Coefs() = sh.Coefs().Range(sqr(neworder+1));
|
|
414
|
+
return nmp;
|
|
415
|
+
}
|
|
416
|
+
|
|
417
|
+
SphericalExpansion & operator+= (const SphericalExpansion & mp2)
|
|
418
|
+
{
|
|
419
|
+
size_t commonsize = min(SH().Coefs().Size(), mp2.SH().Coefs().Size());
|
|
420
|
+
SH().Coefs().Range(commonsize) += mp2.SH().Coefs().Range(commonsize);
|
|
421
|
+
return *this;
|
|
422
|
+
}
|
|
423
|
+
|
|
424
|
+
entry_type Eval (Vec<3> x) const;
|
|
425
|
+
entry_type EvalDirectionalDerivative (Vec<3> x, Vec<3> d) const;
|
|
426
|
+
|
|
427
|
+
void AddCharge (Vec<3> x, entry_type c);
|
|
428
|
+
void AddDipole (Vec<3> x, Vec<3> dir, entry_type c);
|
|
429
|
+
void AddChargeDipole (Vec<3> x, entry_type c, Vec<3> dir, entry_type c2)
|
|
430
|
+
{
|
|
431
|
+
// TODO: add them at once
|
|
432
|
+
AddCharge (x, c);
|
|
433
|
+
AddDipole (x, dir, c2);
|
|
434
|
+
}
|
|
435
|
+
|
|
436
|
+
void AddPlaneWave (Vec<3> d, entry_type c);
|
|
437
|
+
void AddCurrent (Vec<3> ap, Vec<3> ep, Complex j, int num=100);
|
|
438
|
+
|
|
439
|
+
|
|
440
|
+
void ChangeRTypTo (double new_rtyp)
|
|
441
|
+
{
|
|
442
|
+
double fac = RADIAL::Scale(kappa, rtyp) / RADIAL::Scale(kappa, new_rtyp);
|
|
443
|
+
double prod = 1;
|
|
444
|
+
for (int n = 0; n <= sh.Order(); n++, prod*= fac)
|
|
445
|
+
sh.CoefsN(n) *= prod;
|
|
446
|
+
rtyp = new_rtyp;
|
|
447
|
+
}
|
|
448
|
+
|
|
449
|
+
|
|
450
|
+
Vector<double> Spectrum (bool scaled) const
|
|
451
|
+
{
|
|
452
|
+
Vector<double> spec(Order()+1);
|
|
453
|
+
double fac = 1;
|
|
454
|
+
for (int n = 0; n <= Order(); n++)
|
|
455
|
+
{
|
|
456
|
+
spec(n) = fac * L2Norm2(sh.CoefsN(n));
|
|
457
|
+
if (!scaled) fac *= sqr(Scale());
|
|
458
|
+
}
|
|
459
|
+
return spec;
|
|
460
|
+
}
|
|
461
|
+
|
|
462
|
+
|
|
463
|
+
template <typename TARGET>
|
|
464
|
+
void Transform (SphericalExpansion<TARGET,entry_type> & target, Vec<3> dist) const
|
|
465
|
+
{
|
|
466
|
+
if (target.SH().Order() < 0) return;
|
|
467
|
+
if (SH().Order() < 0)
|
|
468
|
+
{
|
|
469
|
+
target.SH().Coefs() = 0.0;
|
|
470
|
+
return;
|
|
471
|
+
}
|
|
472
|
+
|
|
473
|
+
// static Timer t("mptool Transform "+ToString(typeid(RADIAL).name())+ToString(typeid(TARGET).name()));
|
|
474
|
+
// RegionTimer reg(t);
|
|
475
|
+
|
|
476
|
+
auto [len, theta, phi] = SphericalCoordinates(dist);
|
|
477
|
+
|
|
478
|
+
|
|
479
|
+
// SphericalExpansion<RADIAL,entry_type> tmp{*this};
|
|
480
|
+
SphericalExpansion<RADIAL,entry_type> tmp(Order(), kappa, rtyp);
|
|
481
|
+
tmp.SH().Coefs() = SH().Coefs();
|
|
482
|
+
|
|
483
|
+
tmp.SH().RotateZ(phi);
|
|
484
|
+
tmp.SH().RotateY(theta);
|
|
485
|
+
|
|
486
|
+
tmp.ShiftZ(-len, target);
|
|
487
|
+
|
|
488
|
+
target.SH().RotateY(-theta);
|
|
489
|
+
target.SH().RotateZ(-phi);
|
|
490
|
+
}
|
|
491
|
+
|
|
492
|
+
template <typename TARGET>
|
|
493
|
+
void TransformAdd (SphericalExpansion<TARGET,entry_type> & target, Vec<3> dist, bool atomic = false) const
|
|
494
|
+
{
|
|
495
|
+
if (SH().Order() < 0) return;
|
|
496
|
+
if (target.SH().Order() < 0) return;
|
|
497
|
+
|
|
498
|
+
SphericalExpansion<TARGET,entry_type> tmp{target};
|
|
499
|
+
Transform(tmp, dist);
|
|
500
|
+
if (!atomic)
|
|
501
|
+
target.SH().Coefs() += tmp.SH().Coefs();
|
|
502
|
+
else
|
|
503
|
+
for (int j = 0; j < target.SH().Coefs().Size(); j++)
|
|
504
|
+
AtomicAdd(target.SH().Coefs()[j], tmp.SH().Coefs()[j]);
|
|
505
|
+
}
|
|
506
|
+
|
|
507
|
+
template <typename TARGET>
|
|
508
|
+
void ShiftZ (double z, SphericalExpansion<TARGET,entry_type> & target);
|
|
509
|
+
|
|
510
|
+
|
|
511
|
+
template <typename TARGET>
|
|
512
|
+
void In2Out (SphericalExpansion<TARGET,entry_type> & target, double r) const
|
|
513
|
+
{
|
|
514
|
+
Vector<Complex> rad(Order()+1);
|
|
515
|
+
Vector<Complex> radout(target.Order()+1);
|
|
516
|
+
RADIAL::Eval(Order(), kappa, r, RTyp(), rad);
|
|
517
|
+
TARGET::Eval(target.Order(), kappa, r, target.RTyp(), radout);
|
|
518
|
+
target.SH().Coefs() = 0;
|
|
519
|
+
for (int j = 0; j <= std::min(Order(), target.Order()); j++)
|
|
520
|
+
target.SH().CoefsN(j) = rad(j)/radout(j) * SH().CoefsN(j);
|
|
521
|
+
}
|
|
522
|
+
};
|
|
523
|
+
|
|
524
|
+
|
|
525
|
+
|
|
526
|
+
// ***************** parameters ****************
|
|
527
|
+
|
|
528
|
+
/*
|
|
529
|
+
static constexpr int MPOrder (double rho_kappa)
|
|
530
|
+
{
|
|
531
|
+
// return max (20, int(2*rho_kappa));
|
|
532
|
+
return 20+int(2*rho_kappa);
|
|
533
|
+
}
|
|
534
|
+
static constexpr int maxdirect = 100;
|
|
535
|
+
*/
|
|
536
|
+
|
|
537
|
+
|
|
538
|
+
template <typename SCAL, auto S>
|
|
539
|
+
inline auto VecVector2Matrix (FlatVector<Vec<S,SCAL>> vec)
|
|
540
|
+
{
|
|
541
|
+
return FlatMatrixFixWidth<S,SCAL> (vec.Size(), vec.Data()->Data());
|
|
542
|
+
}
|
|
543
|
+
|
|
544
|
+
inline auto VecVector2Matrix (FlatVector<Complex> vec)
|
|
545
|
+
{
|
|
546
|
+
return FlatMatrixFixWidth<1,Complex> (vec.Size(), vec.Data());
|
|
547
|
+
}
|
|
548
|
+
|
|
549
|
+
|
|
550
|
+
template <typename entry_type=Complex>
|
|
551
|
+
class SingularMLExpansion
|
|
552
|
+
{
|
|
553
|
+
using simd_entry_type = decltype(MakeSimd(declval<std::array<entry_type,FMM_SW>>()));
|
|
554
|
+
static Array<size_t> nodes_on_level;
|
|
555
|
+
|
|
556
|
+
struct RecordingSS
|
|
557
|
+
{
|
|
558
|
+
const SphericalExpansion<Singular,entry_type> * mp_source;
|
|
559
|
+
SphericalExpansion<Singular,entry_type> * mp_target;
|
|
560
|
+
Vec<3> dist;
|
|
561
|
+
double len, theta, phi;
|
|
562
|
+
bool flipz;
|
|
563
|
+
public:
|
|
564
|
+
RecordingSS() = default;
|
|
565
|
+
RecordingSS (const SphericalExpansion<Singular,entry_type> * amp_source,
|
|
566
|
+
SphericalExpansion<Singular,entry_type> * amp_target,
|
|
567
|
+
Vec<3> adist)
|
|
568
|
+
: mp_source(amp_source), mp_target(amp_target), dist(adist)
|
|
569
|
+
{
|
|
570
|
+
std::tie(len, theta, phi) = SphericalCoordinates(adist);
|
|
571
|
+
// flipz = false;
|
|
572
|
+
flipz = theta > M_PI/2;
|
|
573
|
+
if (flipz) theta = M_PI-theta;
|
|
574
|
+
}
|
|
575
|
+
};
|
|
576
|
+
|
|
577
|
+
|
|
578
|
+
static void ProcessBatchSS(FlatArray<RecordingSS*> batch, double len, double theta) {
|
|
579
|
+
constexpr int vec_length = VecLength<entry_type>;
|
|
580
|
+
int batch_size = batch.Size();
|
|
581
|
+
int N = batch_size * vec_length;
|
|
582
|
+
// *testout << "Processing batch of size " << batch.Size() << ", with N = " << N << ", vec_length = " << vec_length << ", Type: " << typeid(entry_type).name() << ", len = " << len << ", theta = " << theta << endl;
|
|
583
|
+
|
|
584
|
+
if (N <= 1 || batch_size <= 1) {
|
|
585
|
+
for (auto* rec : batch) {
|
|
586
|
+
rec->mp_source->TransformAdd(*rec->mp_target, rec->dist, true);
|
|
587
|
+
}
|
|
588
|
+
}
|
|
589
|
+
else if (N <= 3) {
|
|
590
|
+
ProcessVectorizedBatchSS<3, vec_length>(batch, len, theta);
|
|
591
|
+
}
|
|
592
|
+
else if (N <= 4) {
|
|
593
|
+
ProcessVectorizedBatchSS<4, vec_length>(batch, len, theta);
|
|
594
|
+
}
|
|
595
|
+
else if (N <= 6) {
|
|
596
|
+
ProcessVectorizedBatchSS<6, vec_length>(batch, len, theta);
|
|
597
|
+
}
|
|
598
|
+
else if (N <= 12) {
|
|
599
|
+
ProcessVectorizedBatchSS<12, vec_length>(batch, len, theta);
|
|
600
|
+
}
|
|
601
|
+
else if (N <= 24) {
|
|
602
|
+
ProcessVectorizedBatchSS<24, vec_length>(batch, len, theta);
|
|
603
|
+
}
|
|
604
|
+
else if (N <= 48) {
|
|
605
|
+
ProcessVectorizedBatchSS<48, vec_length>(batch, len, theta);
|
|
606
|
+
}
|
|
607
|
+
else if (N <= 96) {
|
|
608
|
+
ProcessVectorizedBatchSS<96, vec_length>(batch, len, theta);
|
|
609
|
+
}
|
|
610
|
+
else if (N <= 192) {
|
|
611
|
+
ProcessVectorizedBatchSS<192, vec_length>(batch, len, theta);
|
|
612
|
+
}
|
|
613
|
+
else {
|
|
614
|
+
// Split large batches
|
|
615
|
+
ProcessBatchSS(batch.Range(0, 192 / vec_length), len, theta);
|
|
616
|
+
ProcessBatchSS(batch.Range(192 / vec_length, batch_size), len, theta);
|
|
617
|
+
}
|
|
618
|
+
}
|
|
619
|
+
|
|
620
|
+
template<int N, int vec_length>
|
|
621
|
+
static void ProcessVectorizedBatchSS(FlatArray<RecordingSS*> batch, double len, double theta) {
|
|
622
|
+
|
|
623
|
+
// *testout << "Processing vectorized S->S batch of size " << batch.Size() << ", with N = " << N << ", vec_length = " << vec_length << ", len = " << len << ", theta = " << theta << endl;
|
|
624
|
+
double kappa = batch[0]->mp_source->Kappa();
|
|
625
|
+
int so = batch[0]->mp_source->Order();
|
|
626
|
+
int to = batch[0]->mp_target->Order();
|
|
627
|
+
SphericalExpansion<Singular, Vec<N,Complex>> vec_source(so, kappa, batch[0]->mp_source->RTyp());
|
|
628
|
+
SphericalExpansion<Singular, Vec<N,Complex>> vec_target(to, kappa, batch[0]->mp_target->RTyp());
|
|
629
|
+
|
|
630
|
+
// Copy multipoles into vectorized multipole
|
|
631
|
+
for (int i = 0; i < batch.Size(); i++)
|
|
632
|
+
{
|
|
633
|
+
auto source_i = VecVector2Matrix (batch[i]->mp_source->SH().Coefs());
|
|
634
|
+
auto source_mati = VecVector2Matrix (vec_source.SH().Coefs()).Cols(i*vec_length, (i+1)*vec_length);
|
|
635
|
+
batch[i]->mp_source->SH().RotateZFlip(batch[i]->phi, batch[i]->flipz,
|
|
636
|
+
[source_i, source_mati] (size_t ii, Complex factor)
|
|
637
|
+
{
|
|
638
|
+
source_mati.Row(ii) = factor * source_i.Row(ii);
|
|
639
|
+
});
|
|
640
|
+
}
|
|
641
|
+
|
|
642
|
+
vec_source.SH().RotateY(theta, vec_source.SH().Order() >= 100);
|
|
643
|
+
vec_source.ShiftZ(-len, vec_target);
|
|
644
|
+
vec_target.SH().RotateY(-theta, vec_target.SH().Order() >= 100);
|
|
645
|
+
|
|
646
|
+
// Copy vectorized multipole into individual multipoles
|
|
647
|
+
for (int i = 0; i < batch.Size(); i++)
|
|
648
|
+
{
|
|
649
|
+
auto source_mati = VecVector2Matrix (vec_target.SH().Coefs()).Cols(i*vec_length, (i+1)*vec_length);
|
|
650
|
+
auto target_mati = VecVector2Matrix (batch[i]->mp_target->SH().Coefs());
|
|
651
|
+
batch[i]->mp_target->SH().RotateZFlip(-batch[i]->phi, batch[i]->flipz,
|
|
652
|
+
[source_mati, target_mati] (size_t ii, Complex factor)
|
|
653
|
+
{
|
|
654
|
+
AtomicAdd (target_mati.Row(ii), factor * source_mati.Row(ii));
|
|
655
|
+
});
|
|
656
|
+
}
|
|
657
|
+
}
|
|
658
|
+
|
|
659
|
+
struct Node
|
|
660
|
+
{
|
|
661
|
+
Vec<3> center;
|
|
662
|
+
double r;
|
|
663
|
+
int level;
|
|
664
|
+
std::array<unique_ptr<Node>,8> childs;
|
|
665
|
+
SphericalExpansion<Singular, entry_type> mp;
|
|
666
|
+
|
|
667
|
+
Array<tuple<Vec<3>, entry_type>> charges;
|
|
668
|
+
Array<tuple<Vec<3>, Vec<3>, entry_type>> dipoles;
|
|
669
|
+
Array<tuple<Vec<3>, entry_type, Vec<3>, entry_type>> chargedipoles;
|
|
670
|
+
Array<tuple<Vec<3>, Vec<3>, Complex,int>> currents;
|
|
671
|
+
|
|
672
|
+
using simd_entry_type = decltype(MakeSimd(declval<std::array<entry_type,FMM_SW>>()));
|
|
673
|
+
Array<tuple<Vec<3,SIMD<double,FMM_SW>>, simd_entry_type>> simd_charges;
|
|
674
|
+
Array<tuple<Vec<3,SIMD<double,FMM_SW>>, Vec<3,SIMD<double,FMM_SW>>, simd_entry_type>> simd_dipoles;
|
|
675
|
+
Array<tuple<Vec<3,SIMD<double,FMM_SW>>, simd_entry_type,
|
|
676
|
+
Vec<3,SIMD<double,FMM_SW>>, simd_entry_type>> simd_chargedipoles;
|
|
677
|
+
|
|
678
|
+
int total_sources;
|
|
679
|
+
const FMM_Parameters & fmm_params;
|
|
680
|
+
std::mutex node_mutex;
|
|
681
|
+
atomic<bool> have_childs{false};
|
|
682
|
+
|
|
683
|
+
Node (Vec<3> acenter, double ar, int alevel, double akappa, const FMM_Parameters & afmm_params)
|
|
684
|
+
// : center(acenter), r(ar), level(alevel), mp(MPOrder(ar*akappa), akappa, ar), fmm_params(afmm_params)
|
|
685
|
+
: center(acenter), r(ar), level(alevel), mp(afmm_params.minorder+2*ar*akappa, akappa, ar), fmm_params(afmm_params)
|
|
686
|
+
{
|
|
687
|
+
if (level < nodes_on_level.Size())
|
|
688
|
+
nodes_on_level[level]++;
|
|
689
|
+
}
|
|
690
|
+
|
|
691
|
+
int GetChildNum (Vec<3> x) const
|
|
692
|
+
{
|
|
693
|
+
int childnum = 0;
|
|
694
|
+
if (x(0) > center(0)) childnum += 1;
|
|
695
|
+
if (x(1) > center(1)) childnum += 2;
|
|
696
|
+
if (x(2) > center(2)) childnum += 4;
|
|
697
|
+
return childnum;
|
|
698
|
+
}
|
|
699
|
+
|
|
700
|
+
void CreateChilds()
|
|
701
|
+
{
|
|
702
|
+
if (childs[0]) throw Exception("have already childs");
|
|
703
|
+
for (int i = 0; i < 8; i++)
|
|
704
|
+
{
|
|
705
|
+
Vec<3> cc = center;
|
|
706
|
+
cc(0) += (i&1) ? r/2 : -r/2;
|
|
707
|
+
cc(1) += (i&2) ? r/2 : -r/2;
|
|
708
|
+
cc(2) += (i&4) ? r/2 : -r/2;
|
|
709
|
+
childs[i] = make_unique<Node> (cc, r/2, level+1, mp.Kappa(), fmm_params);
|
|
710
|
+
}
|
|
711
|
+
have_childs = true;
|
|
712
|
+
}
|
|
713
|
+
|
|
714
|
+
|
|
715
|
+
void SendSourcesToChilds()
|
|
716
|
+
{
|
|
717
|
+
CreateChilds();
|
|
718
|
+
|
|
719
|
+
for (auto [x,c] : charges)
|
|
720
|
+
AddCharge (x,c);
|
|
721
|
+
for (auto [x,d,c] : dipoles)
|
|
722
|
+
AddDipole (x,d,c);
|
|
723
|
+
for (auto [x,c,d,c2] : chargedipoles)
|
|
724
|
+
AddChargeDipole (x,c,d,c2);
|
|
725
|
+
for (auto [sp,ep,j,num] : currents)
|
|
726
|
+
AddCurrent (sp,ep,j,num);
|
|
727
|
+
|
|
728
|
+
charges.DeleteAll();
|
|
729
|
+
dipoles.DeleteAll();
|
|
730
|
+
chargedipoles.DeleteAll();
|
|
731
|
+
currents.DeleteAll();
|
|
732
|
+
}
|
|
733
|
+
|
|
734
|
+
|
|
735
|
+
void AddCharge (Vec<3> x, entry_type c)
|
|
736
|
+
{
|
|
737
|
+
if (have_childs) // quick check without locking
|
|
738
|
+
{
|
|
739
|
+
// directly send to childs:
|
|
740
|
+
int childnum = GetChildNum(x);
|
|
741
|
+
childs[childnum] -> AddCharge(x, c);
|
|
742
|
+
return;
|
|
743
|
+
}
|
|
744
|
+
|
|
745
|
+
lock_guard<mutex> guard(node_mutex);
|
|
746
|
+
|
|
747
|
+
if (have_childs) // test again after locking
|
|
748
|
+
{
|
|
749
|
+
int childnum = GetChildNum(x);
|
|
750
|
+
childs[childnum] -> AddCharge(x, c);
|
|
751
|
+
return;
|
|
752
|
+
}
|
|
753
|
+
|
|
754
|
+
charges.Append( tuple{x,c} );
|
|
755
|
+
|
|
756
|
+
// if (r*mp.Kappa() < 1e-8) return;
|
|
757
|
+
if (level > 20) return;
|
|
758
|
+
if (charges.Size() < fmm_params.maxdirect && r*mp.Kappa() < 5)
|
|
759
|
+
return;
|
|
760
|
+
|
|
761
|
+
SendSourcesToChilds();
|
|
762
|
+
}
|
|
763
|
+
|
|
764
|
+
|
|
765
|
+
void AddDipole (Vec<3> x, Vec<3> d, entry_type c)
|
|
766
|
+
{
|
|
767
|
+
if (have_childs)
|
|
768
|
+
{
|
|
769
|
+
// directly send to childs:
|
|
770
|
+
int childnum = GetChildNum(x);
|
|
771
|
+
childs[childnum] -> AddDipole(x, d, c);
|
|
772
|
+
return;
|
|
773
|
+
}
|
|
774
|
+
|
|
775
|
+
lock_guard<mutex> guard(node_mutex);
|
|
776
|
+
|
|
777
|
+
if (have_childs)
|
|
778
|
+
{
|
|
779
|
+
// directly send to childs:
|
|
780
|
+
int childnum = GetChildNum(x);
|
|
781
|
+
childs[childnum] -> AddDipole(x, d, c);
|
|
782
|
+
return;
|
|
783
|
+
}
|
|
784
|
+
|
|
785
|
+
dipoles.Append (tuple{x,d,c});
|
|
786
|
+
|
|
787
|
+
if (level > 20) return;
|
|
788
|
+
if (dipoles.Size() < fmm_params.maxdirect)
|
|
789
|
+
return;
|
|
790
|
+
|
|
791
|
+
SendSourcesToChilds();
|
|
792
|
+
}
|
|
793
|
+
|
|
794
|
+
|
|
795
|
+
void AddChargeDipole (Vec<3> x, entry_type c, Vec<3> dir, entry_type c2)
|
|
796
|
+
{
|
|
797
|
+
if (have_childs)
|
|
798
|
+
{
|
|
799
|
+
// directly send to childs:
|
|
800
|
+
int childnum = GetChildNum(x);
|
|
801
|
+
childs[childnum] -> AddChargeDipole(x, c, dir, c2);
|
|
802
|
+
return;
|
|
803
|
+
}
|
|
804
|
+
|
|
805
|
+
lock_guard<mutex> guard(node_mutex);
|
|
806
|
+
|
|
807
|
+
if (have_childs)
|
|
808
|
+
{
|
|
809
|
+
// directly send to childs:
|
|
810
|
+
int childnum = GetChildNum(x);
|
|
811
|
+
childs[childnum] -> AddChargeDipole(x, c, dir, c2);
|
|
812
|
+
return;
|
|
813
|
+
}
|
|
814
|
+
|
|
815
|
+
chargedipoles.Append (tuple{x,c,dir,c2});
|
|
816
|
+
|
|
817
|
+
if (chargedipoles.Size() < fmm_params.maxdirect || r < 1e-8)
|
|
818
|
+
return;
|
|
819
|
+
|
|
820
|
+
SendSourcesToChilds();
|
|
821
|
+
|
|
822
|
+
/*
|
|
823
|
+
AddCharge (x, c);
|
|
824
|
+
AddDipole (x, dir, c2);
|
|
825
|
+
*/
|
|
826
|
+
}
|
|
827
|
+
|
|
828
|
+
|
|
829
|
+
// not parallel yet
|
|
830
|
+
void AddCurrent (Vec<3> sp, Vec<3> ep, Complex j, int num)
|
|
831
|
+
{
|
|
832
|
+
if (childs[0])
|
|
833
|
+
{
|
|
834
|
+
// split line and send to childs
|
|
835
|
+
Array<double> split;
|
|
836
|
+
split.Append(0);
|
|
837
|
+
for (int i = 0; i < 3; i++)
|
|
838
|
+
if ((sp(i) < center(i)) != (ep(i) < center(i)))
|
|
839
|
+
split += (center(i)-sp(i)) / (ep(i)-sp(i)); // segment cuts i-th coordinate plane
|
|
840
|
+
split.Append(1);
|
|
841
|
+
BubbleSort(split);
|
|
842
|
+
|
|
843
|
+
for (int i = 0; i < split.Size()-1; i++)
|
|
844
|
+
if (split[i+1] > split[i])
|
|
845
|
+
{
|
|
846
|
+
Vec<3> spi = sp + split[i]*(ep-sp);
|
|
847
|
+
Vec<3> epi = sp + split[i+1]*(ep-sp);
|
|
848
|
+
|
|
849
|
+
Vec<3> x = 0.5*(spi+epi);
|
|
850
|
+
|
|
851
|
+
int childnum = 0;
|
|
852
|
+
if (x(0) > center(0)) childnum += 1;
|
|
853
|
+
if (x(1) > center(1)) childnum += 2;
|
|
854
|
+
if (x(2) > center(2)) childnum += 4;
|
|
855
|
+
childs[childnum] -> AddCurrent(spi, epi, j, num);
|
|
856
|
+
}
|
|
857
|
+
return;
|
|
858
|
+
}
|
|
859
|
+
|
|
860
|
+
currents.Append (tuple{sp,ep,j,num});
|
|
861
|
+
|
|
862
|
+
// if (currents.Size() < maxdirect || r < 1e-8)
|
|
863
|
+
if (currents.Size() < 4 || r < 1e-8)
|
|
864
|
+
return;
|
|
865
|
+
|
|
866
|
+
SendSourcesToChilds();
|
|
867
|
+
/*
|
|
868
|
+
// if (currents.Size() < maxdirect || r < 1e-8)
|
|
869
|
+
if (currents.Size() < 4 || r < 1e-8)
|
|
870
|
+
return;
|
|
871
|
+
|
|
872
|
+
CreateChilds();
|
|
873
|
+
|
|
874
|
+
for (auto [x,c] : charges)
|
|
875
|
+
AddCharge (x,c);
|
|
876
|
+
for (auto [x,d,c] : dipoles)
|
|
877
|
+
AddDipole (x,d,c);
|
|
878
|
+
for (auto [sp,ep,j,num] : currents)
|
|
879
|
+
AddCurrent (sp,ep,j,num);
|
|
880
|
+
|
|
881
|
+
charges.SetSize0();
|
|
882
|
+
dipoles.SetSize0();
|
|
883
|
+
currents.SetSize0();
|
|
884
|
+
*/
|
|
885
|
+
}
|
|
886
|
+
|
|
887
|
+
|
|
888
|
+
|
|
889
|
+
|
|
890
|
+
entry_type Evaluate(Vec<3> p) const
|
|
891
|
+
{
|
|
892
|
+
entry_type sum{0.0};
|
|
893
|
+
if (childs[0])
|
|
894
|
+
{
|
|
895
|
+
for (auto & child : childs)
|
|
896
|
+
sum += child->Evaluate(p);
|
|
897
|
+
return sum;
|
|
898
|
+
}
|
|
899
|
+
|
|
900
|
+
if (simd_charges.Size())
|
|
901
|
+
{
|
|
902
|
+
// static Timer t("mptool singmp, evaluate, simd charges"); RegionTimer r(t);
|
|
903
|
+
// t.AddFlops (charges.Size());
|
|
904
|
+
|
|
905
|
+
simd_entry_type vsum{0.0};
|
|
906
|
+
if (mp.Kappa() < 1e-12)
|
|
907
|
+
{
|
|
908
|
+
for (auto [x,c] : simd_charges)
|
|
909
|
+
{
|
|
910
|
+
auto rho = L2Norm(p-x);
|
|
911
|
+
auto kernel = 1/(4*M_PI)/rho;
|
|
912
|
+
kernel = If(rho > 0.0, kernel, SIMD<double,FMM_SW>(0.0));
|
|
913
|
+
vsum += kernel * c;
|
|
914
|
+
|
|
915
|
+
/*
|
|
916
|
+
auto rho2 = L2Norm2(p-x);
|
|
917
|
+
auto kernel = (1/(4*M_PI)) * rsqrt(rho2);
|
|
918
|
+
kernel = If(rho2 > 0.0, kernel, SIMD<double,FMM_SW>(0.0));
|
|
919
|
+
vsum += kernel * c;
|
|
920
|
+
*/
|
|
921
|
+
}
|
|
922
|
+
}
|
|
923
|
+
else if (mp.Kappa() < 1e-8)
|
|
924
|
+
for (auto [x,c] : simd_charges)
|
|
925
|
+
{
|
|
926
|
+
auto rho = L2Norm(p-x);
|
|
927
|
+
auto kernel = (1/(4*M_PI))*SIMD<Complex,FMM_SW> (1,rho*mp.Kappa()) / rho;
|
|
928
|
+
kernel = If(rho > 0.0, kernel, SIMD<Complex,FMM_SW>(0.0));
|
|
929
|
+
vsum += kernel * c;
|
|
930
|
+
}
|
|
931
|
+
else
|
|
932
|
+
for (auto [x,c] : simd_charges)
|
|
933
|
+
{
|
|
934
|
+
auto rho = L2Norm(p-x);
|
|
935
|
+
auto [si,co] = sincos(rho*mp.Kappa());
|
|
936
|
+
auto kernel = (1/(4*M_PI))*SIMD<Complex,FMM_SW>(co,si) / rho;
|
|
937
|
+
kernel = If(rho > 0.0, kernel, SIMD<Complex,FMM_SW>(0.0));
|
|
938
|
+
vsum += kernel * c;
|
|
939
|
+
}
|
|
940
|
+
|
|
941
|
+
sum += HSum(vsum);
|
|
942
|
+
}
|
|
943
|
+
else
|
|
944
|
+
{
|
|
945
|
+
if (mp.Kappa() < 1e-8)
|
|
946
|
+
{
|
|
947
|
+
for (auto [x,c] : charges)
|
|
948
|
+
if (double rho = L2Norm(p-x); rho > 0)
|
|
949
|
+
sum += (1/(4*M_PI))*Complex(1,rho*mp.Kappa()) / rho * c;
|
|
950
|
+
}
|
|
951
|
+
else
|
|
952
|
+
for (auto [x,c] : charges)
|
|
953
|
+
if (double rho = L2Norm(p-x); rho > 0)
|
|
954
|
+
sum += (1/(4*M_PI))*exp(Complex(0,rho*mp.Kappa())) / rho * c;
|
|
955
|
+
}
|
|
956
|
+
|
|
957
|
+
if (simd_dipoles.Size())
|
|
958
|
+
{
|
|
959
|
+
// static Timer t("mptool singmp, evaluate, simd dipoles"); RegionTimer r(t);
|
|
960
|
+
|
|
961
|
+
simd_entry_type vsum{0.0};
|
|
962
|
+
for (auto [x,d,c] : simd_dipoles)
|
|
963
|
+
{
|
|
964
|
+
auto rho = L2Norm(p-x);
|
|
965
|
+
auto drhodp = (1.0/rho) * (p-x);
|
|
966
|
+
auto [si,co] = sincos(rho*mp.Kappa());
|
|
967
|
+
auto dGdrho = (1/(4*M_PI))*SIMD<Complex,FMM_SW>(co,si) *
|
|
968
|
+
(-1.0/(rho*rho) + SIMD<Complex,FMM_SW>(0, mp.Kappa())/rho);
|
|
969
|
+
auto kernel = dGdrho * InnerProduct(drhodp, d);
|
|
970
|
+
kernel = If(rho > 0.0, kernel, SIMD<Complex,FMM_SW>(0.0));
|
|
971
|
+
vsum += kernel * c;
|
|
972
|
+
}
|
|
973
|
+
sum += HSum(vsum);
|
|
974
|
+
}
|
|
975
|
+
else
|
|
976
|
+
{
|
|
977
|
+
for (auto [x,d,c] : dipoles)
|
|
978
|
+
if (double rho = L2Norm(p-x); rho > 0)
|
|
979
|
+
{
|
|
980
|
+
Vec<3> drhodp = 1.0/rho * (p-x);
|
|
981
|
+
Complex dGdrho = (1/(4*M_PI))*exp(Complex(0,rho*mp.Kappa())) *
|
|
982
|
+
(Complex(0, mp.Kappa())/rho - 1.0/sqr(rho));
|
|
983
|
+
sum += dGdrho * InnerProduct(drhodp, d) * c;
|
|
984
|
+
}
|
|
985
|
+
}
|
|
986
|
+
|
|
987
|
+
|
|
988
|
+
|
|
989
|
+
if (simd_chargedipoles.Size())
|
|
990
|
+
{
|
|
991
|
+
// static Timer t("mptool singmp, evaluate, simd chargedipoles"); RegionTimer r(t);
|
|
992
|
+
// t.AddFlops (simd_chargedipoles.Size()*FMM_SW);
|
|
993
|
+
|
|
994
|
+
simd_entry_type vsum{0.0};
|
|
995
|
+
for (auto [x,c,d,c2] : simd_chargedipoles)
|
|
996
|
+
{
|
|
997
|
+
auto rho = L2Norm(p-x);
|
|
998
|
+
auto rhokappa = rho*mp.Kappa();
|
|
999
|
+
auto invrho = If(rho>0.0, 1.0/rho, SIMD<double,FMM_SW>(0.0));
|
|
1000
|
+
auto [si,co] = sincos(rhokappa);
|
|
1001
|
+
|
|
1002
|
+
auto kernelc = (1/(4*M_PI))*invrho*SIMD<Complex,FMM_SW>(co,si);
|
|
1003
|
+
vsum += kernelc * c;
|
|
1004
|
+
|
|
1005
|
+
auto kernel =
|
|
1006
|
+
invrho*invrho * InnerProduct(p-x, d) *
|
|
1007
|
+
kernelc * SIMD<Complex,FMM_SW>(-1.0, rhokappa);
|
|
1008
|
+
|
|
1009
|
+
vsum += kernel * c2;
|
|
1010
|
+
}
|
|
1011
|
+
sum += HSum(vsum);
|
|
1012
|
+
}
|
|
1013
|
+
else
|
|
1014
|
+
{
|
|
1015
|
+
// static Timer t("mptool singmp, evaluate, chargedipoles"); RegionTimer r(t);
|
|
1016
|
+
// t.AddFlops (chargedipoles.Size());
|
|
1017
|
+
|
|
1018
|
+
for (auto [x,c,d,c2] : chargedipoles)
|
|
1019
|
+
if (double rho = L2Norm(p-x); rho > 0)
|
|
1020
|
+
{
|
|
1021
|
+
sum += (1/(4*M_PI))*exp(Complex(0,rho*mp.Kappa())) / rho * c;
|
|
1022
|
+
|
|
1023
|
+
Vec<3> drhodp = 1.0/rho * (p-x);
|
|
1024
|
+
Complex dGdrho = (1/(4*M_PI))*exp(Complex(0,rho*mp.Kappa())) *
|
|
1025
|
+
(Complex(0, mp.Kappa())/rho - 1.0/sqr(rho));
|
|
1026
|
+
|
|
1027
|
+
sum += dGdrho * InnerProduct(drhodp, d) * c2;
|
|
1028
|
+
}
|
|
1029
|
+
}
|
|
1030
|
+
|
|
1031
|
+
|
|
1032
|
+
|
|
1033
|
+
|
|
1034
|
+
|
|
1035
|
+
for (auto [sp,ep,j,num] : currents)
|
|
1036
|
+
{
|
|
1037
|
+
// should use explizit formula instead ...
|
|
1038
|
+
|
|
1039
|
+
Vec<3> tau = ep-sp;
|
|
1040
|
+
Vec<3> tau_num = 1.0/num * tau;
|
|
1041
|
+
for (int i = 0; i < num; i++)
|
|
1042
|
+
{
|
|
1043
|
+
Vec<3> x = sp+(i+0.5)*tau_num;
|
|
1044
|
+
|
|
1045
|
+
if (double rho = L2Norm(p-x); rho > 0)
|
|
1046
|
+
{
|
|
1047
|
+
Vec<3> drhodp = 1.0/rho * (p-x);
|
|
1048
|
+
Complex dGdrho = (1/(4*M_PI))*exp(Complex(0,rho*mp.Kappa())) *
|
|
1049
|
+
(Complex(0, mp.Kappa())/rho - 1.0/sqr(rho));
|
|
1050
|
+
|
|
1051
|
+
if constexpr (std::is_same<entry_type, Vec<3,Complex>>())
|
|
1052
|
+
sum += j*dGdrho * Cross(drhodp, tau_num);
|
|
1053
|
+
}
|
|
1054
|
+
}
|
|
1055
|
+
}
|
|
1056
|
+
|
|
1057
|
+
return sum;
|
|
1058
|
+
}
|
|
1059
|
+
|
|
1060
|
+
entry_type EvaluateDeriv(Vec<3> p, Vec<3> d) const
|
|
1061
|
+
{
|
|
1062
|
+
entry_type sum{0.0};
|
|
1063
|
+
if (childs[0])
|
|
1064
|
+
{
|
|
1065
|
+
for (auto & child : childs)
|
|
1066
|
+
sum += child->EvaluateDeriv(p, d);
|
|
1067
|
+
return sum;
|
|
1068
|
+
}
|
|
1069
|
+
|
|
1070
|
+
if (dipoles.Size())
|
|
1071
|
+
{
|
|
1072
|
+
static int cnt = 0;
|
|
1073
|
+
cnt++;
|
|
1074
|
+
if (cnt < 3)
|
|
1075
|
+
cout << "we know what we do - evaluateDeriv not implemented for dipoles in SingularMLExpansion" << endl;
|
|
1076
|
+
// return sum;
|
|
1077
|
+
// throw Exception("EvaluateDeriv not implemented for dipoles in SingularMLExpansion");
|
|
1078
|
+
}
|
|
1079
|
+
if (chargedipoles.Size())
|
|
1080
|
+
throw Exception("EvaluateDeriv not implemented for dipoles in SingularMLExpansion");
|
|
1081
|
+
|
|
1082
|
+
for (auto [x,c] : charges)
|
|
1083
|
+
if (double rho = L2Norm(p-x); rho > 0)
|
|
1084
|
+
{
|
|
1085
|
+
Vec<3> drhodp = 1.0/rho * (p-x);
|
|
1086
|
+
Complex dGdrho = (1/(4*M_PI))*exp(Complex(0,rho*mp.Kappa())) *
|
|
1087
|
+
(Complex(0, mp.Kappa())/rho - 1.0/sqr(rho));
|
|
1088
|
+
sum += dGdrho * InnerProduct(drhodp, d) * c;
|
|
1089
|
+
}
|
|
1090
|
+
return sum;
|
|
1091
|
+
}
|
|
1092
|
+
|
|
1093
|
+
void CalcTotalSources()
|
|
1094
|
+
{
|
|
1095
|
+
total_sources = charges.Size() + dipoles.Size() + chargedipoles.Size();
|
|
1096
|
+
for (auto & child : childs)
|
|
1097
|
+
if (child)
|
|
1098
|
+
{
|
|
1099
|
+
child->CalcTotalSources();
|
|
1100
|
+
total_sources += child->total_sources;
|
|
1101
|
+
}
|
|
1102
|
+
}
|
|
1103
|
+
|
|
1104
|
+
void CalcMP(Array<RecordingSS> * recording, Array<Node*> * nodes_to_process)
|
|
1105
|
+
{
|
|
1106
|
+
// mp.SH().Coefs() = 0.0;
|
|
1107
|
+
if (childs[0])
|
|
1108
|
+
{
|
|
1109
|
+
if (total_sources < 1000 || recording)
|
|
1110
|
+
for (auto & child : childs)
|
|
1111
|
+
child->CalcMP(recording, nodes_to_process);
|
|
1112
|
+
else
|
|
1113
|
+
ParallelFor (8, [&] (int nr)
|
|
1114
|
+
{
|
|
1115
|
+
childs[nr] -> CalcMP(recording, nodes_to_process);
|
|
1116
|
+
});
|
|
1117
|
+
|
|
1118
|
+
|
|
1119
|
+
for (auto & child : childs){
|
|
1120
|
+
if (recording && child->mp.SH().Coefs().Size() > 0)
|
|
1121
|
+
*recording += RecordingSS(&child->mp, &mp, center-child->center);
|
|
1122
|
+
else
|
|
1123
|
+
child->mp.TransformAdd(mp, center-child->center);
|
|
1124
|
+
}
|
|
1125
|
+
}
|
|
1126
|
+
else
|
|
1127
|
+
{
|
|
1128
|
+
if (charges.Size()+dipoles.Size()+chargedipoles.Size()+currents.Size() == 0)
|
|
1129
|
+
{
|
|
1130
|
+
mp = SphericalExpansion<Singular,entry_type> (-1, mp.Kappa(), 1.);
|
|
1131
|
+
return;
|
|
1132
|
+
}
|
|
1133
|
+
|
|
1134
|
+
// make simd charges, comment this block for testing ...
|
|
1135
|
+
simd_charges.SetSize( (charges.Size()+FMM_SW-1)/FMM_SW);
|
|
1136
|
+
size_t i = 0, ii = 0;
|
|
1137
|
+
for ( ; i+FMM_SW <= charges.Size(); i+=FMM_SW, ii++)
|
|
1138
|
+
{
|
|
1139
|
+
std::array<tuple<Vec<3>,entry_type>, FMM_SW> ca;
|
|
1140
|
+
for (int j = 0; j < FMM_SW; j++) ca[j] = charges[i+j];
|
|
1141
|
+
simd_charges[ii] = MakeSimd(ca);
|
|
1142
|
+
}
|
|
1143
|
+
if (i < charges.Size())
|
|
1144
|
+
{
|
|
1145
|
+
std::array<tuple<Vec<3>,entry_type>, FMM_SW> ca;
|
|
1146
|
+
int j = 0;
|
|
1147
|
+
for ( ; i+j < charges.Size(); j++) ca[j] = charges[i+j];
|
|
1148
|
+
for ( ; j < FMM_SW; j++) ca[j] = tuple( get<0>(ca[0]), entry_type{0.0} );
|
|
1149
|
+
simd_charges[ii] = MakeSimd(ca);
|
|
1150
|
+
}
|
|
1151
|
+
|
|
1152
|
+
simd_dipoles.SetSize( (dipoles.Size()+FMM_SW-1)/FMM_SW);
|
|
1153
|
+
i = 0, ii = 0;
|
|
1154
|
+
for ( ; i+FMM_SW <= dipoles.Size(); i+=FMM_SW, ii++)
|
|
1155
|
+
{
|
|
1156
|
+
std::array<tuple<Vec<3>,Vec<3>,entry_type>, FMM_SW> di;
|
|
1157
|
+
for (int j = 0; j < FMM_SW; j++) di[j] = dipoles[i+j];
|
|
1158
|
+
simd_dipoles[ii] = MakeSimd(di);
|
|
1159
|
+
}
|
|
1160
|
+
if (i < dipoles.Size())
|
|
1161
|
+
{
|
|
1162
|
+
std::array<tuple<Vec<3>,Vec<3>,entry_type>, FMM_SW> di;
|
|
1163
|
+
int j = 0;
|
|
1164
|
+
for ( ; i+j < dipoles.Size(); j++) di[j] = dipoles[i+j];
|
|
1165
|
+
for ( ; j < FMM_SW; j++) di[j] = tuple( get<0>(di[0]), get<1>(di[0]), entry_type{0.0} );
|
|
1166
|
+
simd_dipoles[ii] = MakeSimd(di);
|
|
1167
|
+
}
|
|
1168
|
+
|
|
1169
|
+
|
|
1170
|
+
simd_chargedipoles.SetSize( (chargedipoles.Size()+FMM_SW-1)/FMM_SW);
|
|
1171
|
+
i = 0, ii = 0;
|
|
1172
|
+
for ( ; i+FMM_SW <= chargedipoles.Size(); i+=FMM_SW, ii++)
|
|
1173
|
+
{
|
|
1174
|
+
std::array<tuple<Vec<3>,entry_type,Vec<3>,entry_type>, FMM_SW> di;
|
|
1175
|
+
for (int j = 0; j < FMM_SW; j++) di[j] = chargedipoles[i+j];
|
|
1176
|
+
simd_chargedipoles[ii] = MakeSimd(di);
|
|
1177
|
+
}
|
|
1178
|
+
if (i < chargedipoles.Size())
|
|
1179
|
+
{
|
|
1180
|
+
std::array<tuple<Vec<3>,entry_type,Vec<3>,entry_type>, FMM_SW> di;
|
|
1181
|
+
int j = 0;
|
|
1182
|
+
for ( ; i+j < chargedipoles.Size(); j++) di[j] = chargedipoles[i+j];
|
|
1183
|
+
for ( ; j < FMM_SW; j++) di[j] = tuple( get<0>(di[0]), entry_type{0.0}, get<2>(di[0]), entry_type{0.0} );
|
|
1184
|
+
simd_chargedipoles[ii] = MakeSimd(di);
|
|
1185
|
+
}
|
|
1186
|
+
|
|
1187
|
+
|
|
1188
|
+
if (nodes_to_process)
|
|
1189
|
+
*nodes_to_process += this;
|
|
1190
|
+
else {
|
|
1191
|
+
for (auto [x,c] : charges)
|
|
1192
|
+
mp.AddCharge (x-center,c);
|
|
1193
|
+
|
|
1194
|
+
for (auto [x,d,c] : dipoles)
|
|
1195
|
+
mp.AddDipole (x-center, d, c);
|
|
1196
|
+
|
|
1197
|
+
for (auto [x,c,d,c2] : chargedipoles)
|
|
1198
|
+
mp.AddChargeDipole (x-center, c, d, c2);
|
|
1199
|
+
|
|
1200
|
+
for (auto [sp,ep,j,num] : currents)
|
|
1201
|
+
mp.AddCurrent (sp-center, ep-center, j, num);
|
|
1202
|
+
}
|
|
1203
|
+
}
|
|
1204
|
+
}
|
|
1205
|
+
|
|
1206
|
+
entry_type EvaluateMP(Vec<3> p) const
|
|
1207
|
+
{
|
|
1208
|
+
if (charges.Size() || dipoles.Size() || chargedipoles.Size())
|
|
1209
|
+
return Evaluate(p);
|
|
1210
|
+
|
|
1211
|
+
if (L2Norm(p-center) > 3*r)
|
|
1212
|
+
return mp.Eval(p-center);
|
|
1213
|
+
|
|
1214
|
+
if (!childs[0]) // || level==1)
|
|
1215
|
+
return Evaluate(p);
|
|
1216
|
+
|
|
1217
|
+
entry_type sum{0.0};
|
|
1218
|
+
for (auto & child : childs)
|
|
1219
|
+
sum += child->EvaluateMP(p);
|
|
1220
|
+
return sum;
|
|
1221
|
+
}
|
|
1222
|
+
|
|
1223
|
+
entry_type EvaluateMPDeriv(Vec<3> p, Vec<3> d) const
|
|
1224
|
+
{
|
|
1225
|
+
// cout << "EvaluateMPDeriv Singular, p = " << p << ", d = " << d << ", r = " << r << ", center = " << center << endl;
|
|
1226
|
+
// cout << "Norm: " << L2Norm(p-center) << " > " << 3*r << endl;
|
|
1227
|
+
// cout << "charges.Size() = " << charges.Size() << ", dipoles.Size() = " << dipoles.Size() << endl;
|
|
1228
|
+
if (charges.Size() || dipoles.Size() || chargedipoles.Size() || !childs[0])
|
|
1229
|
+
return EvaluateDeriv(p, d);
|
|
1230
|
+
|
|
1231
|
+
if (L2Norm(p-center) > 3*r)
|
|
1232
|
+
return mp.EvalDirectionalDerivative(p-center, d);
|
|
1233
|
+
|
|
1234
|
+
entry_type sum{0.0};
|
|
1235
|
+
for (auto & child : childs)
|
|
1236
|
+
sum += child->EvaluateMPDeriv(p, d);
|
|
1237
|
+
return sum;
|
|
1238
|
+
}
|
|
1239
|
+
|
|
1240
|
+
void Print (ostream & ost, size_t childnr = -1) const
|
|
1241
|
+
{
|
|
1242
|
+
if (childnr == -1)
|
|
1243
|
+
ost << "c = " << center << ", r = " << r << ", level = " << level << endl;
|
|
1244
|
+
else
|
|
1245
|
+
ost << "c = " << center << ", r = " << r << ", level = " << level << ", childnr = " << childnr << endl;
|
|
1246
|
+
// for (int i = 0; i < loc_pnts.Size(); i++)
|
|
1247
|
+
for (auto [x,c] : charges)
|
|
1248
|
+
ost << "xi = " << x << ", ci = " << c << endl;
|
|
1249
|
+
for (auto [x,d,c] : dipoles)
|
|
1250
|
+
ost << "xi = " << x << ", di = " << d << ", ci = " << c << endl;
|
|
1251
|
+
for (auto [x,c,d,c2] : chargedipoles)
|
|
1252
|
+
ost << "xi = " << x << ", c = " << c << ", di = " << d << ", ci = " << c2 << endl;
|
|
1253
|
+
|
|
1254
|
+
for (int i = 0; i < 8; i++)
|
|
1255
|
+
if (childs[i]) childs[i] -> Print (ost, i);
|
|
1256
|
+
}
|
|
1257
|
+
|
|
1258
|
+
double Norm () const
|
|
1259
|
+
{
|
|
1260
|
+
double norm = L2Norm(mp.SH().Coefs());
|
|
1261
|
+
if (childs[0])
|
|
1262
|
+
for (auto & ch : childs)
|
|
1263
|
+
norm += ch->Norm();
|
|
1264
|
+
return norm;
|
|
1265
|
+
}
|
|
1266
|
+
|
|
1267
|
+
size_t NumCoefficients() const
|
|
1268
|
+
{
|
|
1269
|
+
size_t num = sqr(mp.SH().Order()+1);
|
|
1270
|
+
if (childs[0])
|
|
1271
|
+
for (auto & ch : childs)
|
|
1272
|
+
num += ch->NumCoefficients();
|
|
1273
|
+
return num;
|
|
1274
|
+
}
|
|
1275
|
+
|
|
1276
|
+
void TraverseTree (const std::function<void(Node&)> & func)
|
|
1277
|
+
{
|
|
1278
|
+
func(*this);
|
|
1279
|
+
for (auto & child : childs)
|
|
1280
|
+
if (child)
|
|
1281
|
+
child->TraverseTree(func);
|
|
1282
|
+
}
|
|
1283
|
+
};
|
|
1284
|
+
|
|
1285
|
+
FMM_Parameters fmm_params;
|
|
1286
|
+
Node root;
|
|
1287
|
+
bool havemp = false;
|
|
1288
|
+
|
|
1289
|
+
public:
|
|
1290
|
+
SingularMLExpansion (Vec<3> center, double r, double kappa, FMM_Parameters _params = FMM_Parameters())
|
|
1291
|
+
: fmm_params(_params), root(center, r, 0, kappa, fmm_params)
|
|
1292
|
+
{
|
|
1293
|
+
nodes_on_level = 0;
|
|
1294
|
+
nodes_on_level[0] = 1;
|
|
1295
|
+
}
|
|
1296
|
+
|
|
1297
|
+
double Kappa() const { return root.mp.Kappa(); }
|
|
1298
|
+
|
|
1299
|
+
void AddCharge(Vec<3> x, entry_type c)
|
|
1300
|
+
{
|
|
1301
|
+
root.AddCharge(x, c);
|
|
1302
|
+
}
|
|
1303
|
+
|
|
1304
|
+
void AddDipole(Vec<3> x, Vec<3> d, entry_type c)
|
|
1305
|
+
{
|
|
1306
|
+
root.AddDipole(x, d, c);
|
|
1307
|
+
}
|
|
1308
|
+
|
|
1309
|
+
void AddChargeDipole(Vec<3> x, entry_type c, Vec<3> dir, entry_type c2)
|
|
1310
|
+
{
|
|
1311
|
+
root.AddChargeDipole(x, c, dir, c2);
|
|
1312
|
+
}
|
|
1313
|
+
|
|
1314
|
+
void AddCurrent (Vec<3> sp, Vec<3> ep, Complex j, int num)
|
|
1315
|
+
{
|
|
1316
|
+
if constexpr (!std::is_same<entry_type, Vec<3,Complex>>())
|
|
1317
|
+
throw Exception("AddCurrent needs a singular vectorial MP");
|
|
1318
|
+
|
|
1319
|
+
root.AddCurrent (sp, ep, j, num);
|
|
1320
|
+
/*
|
|
1321
|
+
// for testing
|
|
1322
|
+
Vec<3> tau = ep-sp;
|
|
1323
|
+
Vec<3> tau_num = 1.0/num * tau;
|
|
1324
|
+
for (int i = 0; i < num; i++)
|
|
1325
|
+
{
|
|
1326
|
+
for (int k = 0; k < 3; k++)
|
|
1327
|
+
{
|
|
1328
|
+
Vec<3> ek{0.0}; ek(k) = 1;
|
|
1329
|
+
Vec<3> cp = Cross(tau, ek);
|
|
1330
|
+
Vec<3,Complex> source{0.0};
|
|
1331
|
+
source(k) = j/double(num);
|
|
1332
|
+
if constexpr (std::is_same<entry_type, Vec<3,Complex>>())
|
|
1333
|
+
root.AddDipole (sp+(i+0.5)*tau_num, cp, source);
|
|
1334
|
+
}
|
|
1335
|
+
}
|
|
1336
|
+
*/
|
|
1337
|
+
}
|
|
1338
|
+
|
|
1339
|
+
void Print (ostream & ost) const
|
|
1340
|
+
{
|
|
1341
|
+
root.Print(ost);
|
|
1342
|
+
}
|
|
1343
|
+
|
|
1344
|
+
double Norm() const
|
|
1345
|
+
{
|
|
1346
|
+
return root.Norm();
|
|
1347
|
+
}
|
|
1348
|
+
|
|
1349
|
+
size_t NumCoefficients() const
|
|
1350
|
+
{
|
|
1351
|
+
return root.NumCoefficients();
|
|
1352
|
+
}
|
|
1353
|
+
|
|
1354
|
+
void CalcMP()
|
|
1355
|
+
{
|
|
1356
|
+
static Timer t("mptool compute singular MLMP"); RegionTimer rg(t);
|
|
1357
|
+
static Timer ts2mp("mptool compute singular MLMP - source2mp");
|
|
1358
|
+
static Timer tS2S("mptool compute singular MLMP - S->S");
|
|
1359
|
+
static Timer trec("mptool comput singular recording");
|
|
1360
|
+
static Timer tsort("mptool comput singular sort");
|
|
1361
|
+
|
|
1362
|
+
/*
|
|
1363
|
+
int maxlevel = 0;
|
|
1364
|
+
for (auto [i,num] : Enumerate(nodes_on_level))
|
|
1365
|
+
if (num > 0) maxlevel = i;
|
|
1366
|
+
|
|
1367
|
+
for (int i = 0; i <= maxlevel; i++)
|
|
1368
|
+
cout << "sing " << i << ": " << nodes_on_level[i] << endl;
|
|
1369
|
+
*/
|
|
1370
|
+
|
|
1371
|
+
root.CalcTotalSources();
|
|
1372
|
+
|
|
1373
|
+
if constexpr (false)
|
|
1374
|
+
// direct evaluation of S->S
|
|
1375
|
+
root.CalcMP(nullptr, nullptr);
|
|
1376
|
+
else
|
|
1377
|
+
{
|
|
1378
|
+
|
|
1379
|
+
Array<RecordingSS> recording;
|
|
1380
|
+
Array<Node*> nodes_to_process;
|
|
1381
|
+
|
|
1382
|
+
{
|
|
1383
|
+
RegionTimer reg(trec);
|
|
1384
|
+
root.CalcMP(&recording, &nodes_to_process);
|
|
1385
|
+
}
|
|
1386
|
+
|
|
1387
|
+
{
|
|
1388
|
+
RegionTimer rs2mp(ts2mp);
|
|
1389
|
+
ParallelFor(nodes_to_process.Size(), [&](int i)
|
|
1390
|
+
{
|
|
1391
|
+
auto node = nodes_to_process[i];
|
|
1392
|
+
for (auto [x,c]: node->charges)
|
|
1393
|
+
node->mp.AddCharge(x-node->center, c);
|
|
1394
|
+
for (auto [x,d,c]: node->dipoles)
|
|
1395
|
+
node->mp.AddDipole(x-node->center, d, c);
|
|
1396
|
+
for (auto [x,c,d,c2]: node->chargedipoles)
|
|
1397
|
+
node->mp.AddChargeDipole(x-node->center, c, d, c2);
|
|
1398
|
+
for (auto [sp,ep,j,num]: node->currents)
|
|
1399
|
+
node->mp.AddCurrent(sp-node->center, ep-node->center, j, num);
|
|
1400
|
+
}, TasksPerThread(4));
|
|
1401
|
+
}
|
|
1402
|
+
|
|
1403
|
+
{
|
|
1404
|
+
RegionTimer reg(tsort);
|
|
1405
|
+
QuickSort (recording, [] (auto & a, auto & b)
|
|
1406
|
+
{
|
|
1407
|
+
if (a.len < (1-1e-8) * b.len) return true;
|
|
1408
|
+
if (a.len > (1+1e-8) * b.len) return false;
|
|
1409
|
+
return a.theta < b.theta;
|
|
1410
|
+
});
|
|
1411
|
+
}
|
|
1412
|
+
|
|
1413
|
+
double current_len = -1e100;
|
|
1414
|
+
double current_theta = -1e100;
|
|
1415
|
+
Array<RecordingSS*> current_batch;
|
|
1416
|
+
Array<Array<RecordingSS*>> batch_group;
|
|
1417
|
+
Array<double> group_lengths;
|
|
1418
|
+
Array<double> group_thetas;
|
|
1419
|
+
for (auto & record : recording)
|
|
1420
|
+
{
|
|
1421
|
+
bool len_changed = fabs(record.len - current_len) > 1e-8;
|
|
1422
|
+
bool theta_changed = fabs(record.theta - current_theta) > 1e-8;
|
|
1423
|
+
if ((len_changed || theta_changed) && current_batch.Size() > 0) {
|
|
1424
|
+
batch_group.Append(current_batch);
|
|
1425
|
+
group_lengths.Append(current_len);
|
|
1426
|
+
group_thetas.Append(current_theta);
|
|
1427
|
+
current_batch.SetSize(0);
|
|
1428
|
+
}
|
|
1429
|
+
|
|
1430
|
+
current_len = record.len;
|
|
1431
|
+
current_theta = record.theta;
|
|
1432
|
+
current_batch.Append(&record);
|
|
1433
|
+
}
|
|
1434
|
+
|
|
1435
|
+
if (current_batch.Size() > 0) {
|
|
1436
|
+
batch_group.Append(current_batch);
|
|
1437
|
+
group_lengths.Append(current_len);
|
|
1438
|
+
group_thetas.Append(current_theta);
|
|
1439
|
+
}
|
|
1440
|
+
|
|
1441
|
+
{
|
|
1442
|
+
RegionTimer rS2S(tS2S);
|
|
1443
|
+
// ParallelFor(batch_group.Size(), [&](int i) {
|
|
1444
|
+
for (int i = 0; i < batch_group.Size(); i++){
|
|
1445
|
+
// *testout << "Processing batch " << i << " of size " << batch_group[i].Size() << ", with len = " << group_lengths[i] << ", theta = " << group_thetas[i] << endl;
|
|
1446
|
+
int chunk_size = 24;
|
|
1447
|
+
if (batch_group[i].Size() < chunk_size)
|
|
1448
|
+
ProcessBatchSS(batch_group[i], group_lengths[i], group_thetas[i]);
|
|
1449
|
+
else
|
|
1450
|
+
ParallelForRange(IntRange(batch_group[i].Size()), [&](IntRange range) {
|
|
1451
|
+
auto sub_batch = batch_group[i].Range(range.First(), range.Next());
|
|
1452
|
+
ProcessBatchSS(sub_batch, group_lengths[i], group_thetas[i]);
|
|
1453
|
+
}, TasksPerThread(4));
|
|
1454
|
+
}
|
|
1455
|
+
}
|
|
1456
|
+
}
|
|
1457
|
+
|
|
1458
|
+
// cout << "have singular:" << endl;
|
|
1459
|
+
// PrintStatistics (cout);
|
|
1460
|
+
havemp = true;
|
|
1461
|
+
}
|
|
1462
|
+
|
|
1463
|
+
entry_type Evaluate (Vec<3> p) const
|
|
1464
|
+
{
|
|
1465
|
+
if (havemp)
|
|
1466
|
+
return root.EvaluateMP(p);
|
|
1467
|
+
else
|
|
1468
|
+
return root.Evaluate(p);
|
|
1469
|
+
}
|
|
1470
|
+
|
|
1471
|
+
|
|
1472
|
+
void PrintStatistics (ostream & ost)
|
|
1473
|
+
{
|
|
1474
|
+
int levels = 0;
|
|
1475
|
+
int cnt = 0;
|
|
1476
|
+
root.TraverseTree( [&](Node & node) {
|
|
1477
|
+
levels = max(levels, node.level);
|
|
1478
|
+
cnt++;
|
|
1479
|
+
});
|
|
1480
|
+
ost << "levels: " << levels << endl;
|
|
1481
|
+
ost << "nodes: " << cnt << endl;
|
|
1482
|
+
|
|
1483
|
+
Array<int> num_on_level(levels+1);
|
|
1484
|
+
Array<int> order_on_level(levels+1);
|
|
1485
|
+
Array<size_t> coefs_on_level(levels+1);
|
|
1486
|
+
num_on_level = 0;
|
|
1487
|
+
order_on_level = 0;
|
|
1488
|
+
root.TraverseTree( [&](Node & node) {
|
|
1489
|
+
num_on_level[node.level]++;
|
|
1490
|
+
order_on_level[node.level] = max(order_on_level[node.level],node.mp.Order());
|
|
1491
|
+
coefs_on_level[node.level] += node.mp.SH().Coefs().Size();
|
|
1492
|
+
});
|
|
1493
|
+
|
|
1494
|
+
cout << "num on level" << endl;
|
|
1495
|
+
for (int i = 0; i < num_on_level.Size(); i++)
|
|
1496
|
+
cout << i << ": " << num_on_level[i] << ", order = " << order_on_level[i] << ", coefs " << coefs_on_level[i] << endl;
|
|
1497
|
+
|
|
1498
|
+
size_t totcoefs = 0;
|
|
1499
|
+
for (auto n : coefs_on_level)
|
|
1500
|
+
totcoefs += n;
|
|
1501
|
+
cout << "total mem in coefs: " << sizeof(entry_type)*totcoefs / sqr(1024) << " MB" << endl;
|
|
1502
|
+
}
|
|
1503
|
+
|
|
1504
|
+
|
|
1505
|
+
|
|
1506
|
+
template <typename entry_type2>
|
|
1507
|
+
friend class RegularMLExpansion;
|
|
1508
|
+
};
|
|
1509
|
+
|
|
1510
|
+
|
|
1511
|
+
template <typename entry_type>
|
|
1512
|
+
inline ostream & operator<< (ostream & ost, const SingularMLExpansion<entry_type> & mlmp)
|
|
1513
|
+
{
|
|
1514
|
+
mlmp.Print(ost);
|
|
1515
|
+
return ost;
|
|
1516
|
+
}
|
|
1517
|
+
|
|
1518
|
+
|
|
1519
|
+
// *********************************** Regular multilevel Expansion
|
|
1520
|
+
|
|
1521
|
+
|
|
1522
|
+
template <typename elem_type=Complex>
|
|
1523
|
+
class NGS_DLL_HEADER RegularMLExpansion
|
|
1524
|
+
{
|
|
1525
|
+
static Array<size_t> nodes_on_level;
|
|
1526
|
+
|
|
1527
|
+
|
|
1528
|
+
struct RecordingRS
|
|
1529
|
+
{
|
|
1530
|
+
const SphericalExpansion<Singular,elem_type> * mpS;
|
|
1531
|
+
SphericalExpansion<Regular,elem_type> * mpR;
|
|
1532
|
+
Vec<3> dist;
|
|
1533
|
+
double len, theta, phi;
|
|
1534
|
+
public:
|
|
1535
|
+
RecordingRS() = default;
|
|
1536
|
+
RecordingRS (const SphericalExpansion<Singular,elem_type> * ampS,
|
|
1537
|
+
SphericalExpansion<Regular,elem_type> * ampR,
|
|
1538
|
+
Vec<3> adist)
|
|
1539
|
+
: mpS(ampS), mpR(ampR), dist(adist)
|
|
1540
|
+
{
|
|
1541
|
+
std::tie(len, theta, phi) = SphericalCoordinates(dist);
|
|
1542
|
+
}
|
|
1543
|
+
};
|
|
1544
|
+
|
|
1545
|
+
static void ProcessBatchRS(FlatArray<RecordingRS*> batch, double len, double theta) {
|
|
1546
|
+
// static Timer t("ProcessBatchRS"); RegionTimer reg(t, batch.Size());
|
|
1547
|
+
constexpr int vec_length = VecLength<elem_type>;
|
|
1548
|
+
int batch_size = batch.Size();
|
|
1549
|
+
int N = batch_size * vec_length;
|
|
1550
|
+
// *testout << "Processing batch of size " << batch.Size() << ", with N = " << N << ", vec_length = " << vec_length << ", Type: " << typeid(elem_type).name() << ", len = " << len << ", theta = " << theta << endl;
|
|
1551
|
+
|
|
1552
|
+
if (N <= 1 || batch_size <= 1) {
|
|
1553
|
+
for (auto* rec : batch) {
|
|
1554
|
+
rec->mpS->TransformAdd(*rec->mpR, rec->dist);
|
|
1555
|
+
}
|
|
1556
|
+
}
|
|
1557
|
+
else if (N <= 3) {
|
|
1558
|
+
ProcessVectorizedBatchRS<3, vec_length>(batch, len, theta);
|
|
1559
|
+
}
|
|
1560
|
+
else if (N <= 4) {
|
|
1561
|
+
ProcessVectorizedBatchRS<4, vec_length>(batch, len, theta);
|
|
1562
|
+
}
|
|
1563
|
+
else if (N <= 6) {
|
|
1564
|
+
ProcessVectorizedBatchRS<6, vec_length>(batch, len, theta);
|
|
1565
|
+
}
|
|
1566
|
+
else if (N <= 12) {
|
|
1567
|
+
ProcessVectorizedBatchRS<12, vec_length>(batch, len, theta);
|
|
1568
|
+
}
|
|
1569
|
+
else if (N <= 24) {
|
|
1570
|
+
ProcessVectorizedBatchRS<24, vec_length>(batch, len, theta);
|
|
1571
|
+
}
|
|
1572
|
+
else if (N <= 48) {
|
|
1573
|
+
ProcessVectorizedBatchRS<48, vec_length>(batch, len, theta);
|
|
1574
|
+
}
|
|
1575
|
+
else if (N <= 96) {
|
|
1576
|
+
ProcessVectorizedBatchRS<96, vec_length>(batch, len, theta);
|
|
1577
|
+
}
|
|
1578
|
+
else if (N <= 192) {
|
|
1579
|
+
ProcessVectorizedBatchRS<192, vec_length>(batch, len, theta);
|
|
1580
|
+
}
|
|
1581
|
+
else {
|
|
1582
|
+
// Split large batches
|
|
1583
|
+
/*
|
|
1584
|
+
ProcessBatch(batch.Range(0, 192 / vec_length), len, theta);
|
|
1585
|
+
ProcessBatch(batch.Range(192 / vec_length, batch_size), len, theta);
|
|
1586
|
+
*/
|
|
1587
|
+
|
|
1588
|
+
/*
|
|
1589
|
+
ParallelFor (2, [&] (int i)
|
|
1590
|
+
{
|
|
1591
|
+
if (i == 0)
|
|
1592
|
+
ProcessBatchRS(batch.Range(0, 192 / vec_length), len, theta);
|
|
1593
|
+
else
|
|
1594
|
+
ProcessBatchRS(batch.Range(192 / vec_length, batch_size), len, theta);
|
|
1595
|
+
}, 2);
|
|
1596
|
+
*/
|
|
1597
|
+
|
|
1598
|
+
|
|
1599
|
+
size_t chunksize = 192/vec_length;
|
|
1600
|
+
size_t num = (batch.Size()+chunksize-1) / chunksize;
|
|
1601
|
+
ParallelFor (num, [&](int i)
|
|
1602
|
+
{
|
|
1603
|
+
ProcessBatchRS(batch.Range(i*chunksize, min((i+1)*chunksize, batch.Size())), len, theta);
|
|
1604
|
+
}, num);
|
|
1605
|
+
|
|
1606
|
+
}
|
|
1607
|
+
}
|
|
1608
|
+
|
|
1609
|
+
|
|
1610
|
+
template<int N, int vec_length>
|
|
1611
|
+
static void ProcessVectorizedBatchRS(FlatArray<RecordingRS*> batch, double len, double theta) {
|
|
1612
|
+
|
|
1613
|
+
// static Timer t("ProcessVectorizedBatch, N = "+ToString(N) + ", vec_len = " + ToString(vec_length));
|
|
1614
|
+
// RegionTimer reg(t, batch[0]->mpS->SH().Order());
|
|
1615
|
+
// static Timer ttobatch("mptools - copy to batch 2");
|
|
1616
|
+
// static Timer tfrombatch("mptools - copy from batch 2");
|
|
1617
|
+
|
|
1618
|
+
// *testout << "Processing vectorized batch of size " << batch.Size() << ", with N = " << N << ", vec_length = " << vec_length << ", len = " << len << ", theta = " << theta << endl;
|
|
1619
|
+
SphericalExpansion<Singular, Vec<N,Complex>> vec_source(batch[0]->mpS->Order(), batch[0]->mpS->Kappa(), batch[0]->mpS->RTyp());
|
|
1620
|
+
// SphericalExpansion<Singular, elem_type> tmp_source{*batch[0]->mpS};
|
|
1621
|
+
SphericalExpansion<Regular, elem_type> tmp_target{*batch[0]->mpR};
|
|
1622
|
+
SphericalExpansion<Regular, Vec<N,Complex>> vec_target(batch[0]->mpR->Order(), batch[0]->mpR->Kappa(), batch[0]->mpR->RTyp());
|
|
1623
|
+
|
|
1624
|
+
// Copy multipoles into vectorized multipole
|
|
1625
|
+
// ttobatch.Start();
|
|
1626
|
+
for (int i = 0; i < batch.Size(); i++)
|
|
1627
|
+
{
|
|
1628
|
+
auto source_i = VecVector2Matrix (batch[i]->mpS->SH().Coefs());
|
|
1629
|
+
auto source_mati = VecVector2Matrix (vec_source.SH().Coefs()).Cols(i*vec_length, (i+1)*vec_length);
|
|
1630
|
+
batch[i]->mpS->SH().RotateZ(batch[i]->phi,
|
|
1631
|
+
[source_i, source_mati] (size_t ii, Complex factor)
|
|
1632
|
+
{
|
|
1633
|
+
source_mati.Row(ii) = factor * source_i.Row(ii);
|
|
1634
|
+
});
|
|
1635
|
+
}
|
|
1636
|
+
|
|
1637
|
+
// ttobatch.Stop();
|
|
1638
|
+
|
|
1639
|
+
vec_source.SH().RotateY(theta);
|
|
1640
|
+
vec_source.ShiftZ(-len, vec_target);
|
|
1641
|
+
vec_target.SH().RotateY(-theta);
|
|
1642
|
+
|
|
1643
|
+
// Copy vectorized multipole into individual multipoles
|
|
1644
|
+
// tfrombatch.Start();
|
|
1645
|
+
for (int i = 0; i < batch.Size(); i++) {
|
|
1646
|
+
// auto source_i = VecVector2Matrix (tmp_target.SH().Coefs());
|
|
1647
|
+
auto source_mati = VecVector2Matrix (vec_target.SH().Coefs()).Cols(i*vec_length, (i+1)*vec_length);
|
|
1648
|
+
auto targeti = VecVector2Matrix(batch[i]->mpR->SH().Coefs());
|
|
1649
|
+
|
|
1650
|
+
tmp_target.SH().RotateZ(-batch[i]->phi,
|
|
1651
|
+
[source_mati, targeti] (size_t ii, Complex factor)
|
|
1652
|
+
{
|
|
1653
|
+
// source_i.Row(ii) = factor * source_mati.Row(ii);
|
|
1654
|
+
AtomicAdd (VectorView(targeti.Row(ii)), factor * source_mati.Row(ii));
|
|
1655
|
+
});
|
|
1656
|
+
// for (int j = 0; j < tmp_target.SH().Coefs().Size(); j++)
|
|
1657
|
+
// AtomicAdd(batch[i]->mpR->SH().Coefs()[j], tmp_target.SH().Coefs()[j]);
|
|
1658
|
+
}
|
|
1659
|
+
// tfrombatch.Stop();
|
|
1660
|
+
|
|
1661
|
+
}
|
|
1662
|
+
|
|
1663
|
+
|
|
1664
|
+
struct Node
|
|
1665
|
+
{
|
|
1666
|
+
Vec<3> center;
|
|
1667
|
+
double r;
|
|
1668
|
+
int level;
|
|
1669
|
+
std::array<unique_ptr<Node>,8> childs;
|
|
1670
|
+
SphericalExpansion<Regular,elem_type> mp;
|
|
1671
|
+
Array<Vec<3>> targets;
|
|
1672
|
+
Array<tuple<Vec<3>,double>> vol_targets;
|
|
1673
|
+
int total_targets;
|
|
1674
|
+
std::mutex node_mutex;
|
|
1675
|
+
atomic<bool> have_childs{false};
|
|
1676
|
+
|
|
1677
|
+
Array<const typename SingularMLExpansion<elem_type>::Node*> singnodes;
|
|
1678
|
+
const FMM_Parameters & params;
|
|
1679
|
+
|
|
1680
|
+
|
|
1681
|
+
Node (Vec<3> acenter, double ar, int alevel, double kappa, const FMM_Parameters & _params)
|
|
1682
|
+
: center(acenter), r(ar), level(alevel),
|
|
1683
|
+
// mp(MPOrder(ar*kappa), kappa, ar) // 1.0/min(1.0, 0.25*r*kappa))
|
|
1684
|
+
mp(-1, kappa, ar), params(_params)
|
|
1685
|
+
// : center(acenter), r(ar), level(alevel), mp(MPOrder(ar*kappa), kappa, 1.0)
|
|
1686
|
+
{
|
|
1687
|
+
if (level < nodes_on_level.Size())
|
|
1688
|
+
nodes_on_level[level]++;
|
|
1689
|
+
}
|
|
1690
|
+
|
|
1691
|
+
void Allocate()
|
|
1692
|
+
{
|
|
1693
|
+
// mp = SphericalExpansion<Regular,elem_type>(MPOrder(r*mp.Kappa()), mp.Kappa(), r);
|
|
1694
|
+
mp = SphericalExpansion<Regular,elem_type>(params.minorder+2*r*mp.Kappa(), mp.Kappa(), r);
|
|
1695
|
+
}
|
|
1696
|
+
|
|
1697
|
+
|
|
1698
|
+
void CreateChilds(bool allocate = false)
|
|
1699
|
+
{
|
|
1700
|
+
if (childs[0]) throw Exception("have already childs");
|
|
1701
|
+
// create children nodes:
|
|
1702
|
+
for (int i = 0; i < 8; i++)
|
|
1703
|
+
{
|
|
1704
|
+
Vec<3> cc = center;
|
|
1705
|
+
cc(0) += (i&1) ? r/2 : -r/2;
|
|
1706
|
+
cc(1) += (i&2) ? r/2 : -r/2;
|
|
1707
|
+
cc(2) += (i&4) ? r/2 : -r/2;
|
|
1708
|
+
childs[i] = make_unique<Node> (cc, r/2, level+1, mp.Kappa(), params);
|
|
1709
|
+
if (allocate)
|
|
1710
|
+
childs[i] -> Allocate();
|
|
1711
|
+
}
|
|
1712
|
+
have_childs = true;
|
|
1713
|
+
}
|
|
1714
|
+
|
|
1715
|
+
void AddSingularNode (const typename SingularMLExpansion<elem_type>::Node & singnode, bool allow_refine,
|
|
1716
|
+
Array<RecordingRS> * recording)
|
|
1717
|
+
{
|
|
1718
|
+
if (mp.SH().Order() < 0) return;
|
|
1719
|
+
if (singnode.mp.SH().Order() < 0) return;
|
|
1720
|
+
// if (L2Norm(singnode.mp.SH().Coefs()) == 0) return;
|
|
1721
|
+
if (level > 20)
|
|
1722
|
+
{
|
|
1723
|
+
singnodes.Append(&singnode);
|
|
1724
|
+
return;
|
|
1725
|
+
}
|
|
1726
|
+
|
|
1727
|
+
// static Timer t("AddSingularNode"); RegionTimer reg(t);
|
|
1728
|
+
|
|
1729
|
+
Vec<3> dist = center-singnode.center;
|
|
1730
|
+
|
|
1731
|
+
// if (L2Norm(dist)*mp.Kappa() > (mp.Order()+singnode.mp.Order()))
|
|
1732
|
+
if (L2Norm(dist) > 2*(r + singnode.r))
|
|
1733
|
+
{
|
|
1734
|
+
if (singnode.mp.Order() > 2 * mp.Order() &&
|
|
1735
|
+
singnode.childs[0] &&
|
|
1736
|
+
singnode.childs[0]->mp.Order() < singnode.mp.Order())
|
|
1737
|
+
{
|
|
1738
|
+
for (auto & child : singnode.childs)
|
|
1739
|
+
AddSingularNode (*child, allow_refine, recording);
|
|
1740
|
+
return;
|
|
1741
|
+
}
|
|
1742
|
+
|
|
1743
|
+
// static Timer t("mptool transform Helmholtz-criterion"); RegionTimer r(t);
|
|
1744
|
+
if (recording)
|
|
1745
|
+
*recording += RecordingRS(&singnode.mp, &mp, dist);
|
|
1746
|
+
else
|
|
1747
|
+
singnode.mp.TransformAdd(mp, dist);
|
|
1748
|
+
return;
|
|
1749
|
+
}
|
|
1750
|
+
|
|
1751
|
+
|
|
1752
|
+
if ( singnode.childs[0]==nullptr )
|
|
1753
|
+
{
|
|
1754
|
+
singnodes.Append(&singnode);
|
|
1755
|
+
return;
|
|
1756
|
+
}
|
|
1757
|
+
|
|
1758
|
+
if (r > singnode.r)
|
|
1759
|
+
{
|
|
1760
|
+
if (allow_refine)
|
|
1761
|
+
{
|
|
1762
|
+
if (!childs[0])
|
|
1763
|
+
CreateChilds(true);
|
|
1764
|
+
|
|
1765
|
+
for (auto & ch : childs)
|
|
1766
|
+
ch -> AddSingularNode (singnode, allow_refine, recording);
|
|
1767
|
+
}
|
|
1768
|
+
else
|
|
1769
|
+
{
|
|
1770
|
+
if (total_targets < 1000 || recording)
|
|
1771
|
+
{
|
|
1772
|
+
for (auto & ch : childs)
|
|
1773
|
+
if (ch)
|
|
1774
|
+
ch -> AddSingularNode (singnode, allow_refine, recording);
|
|
1775
|
+
}
|
|
1776
|
+
else
|
|
1777
|
+
ParallelFor (8, [&] (int nr)
|
|
1778
|
+
{
|
|
1779
|
+
if (childs[nr])
|
|
1780
|
+
childs[nr] -> AddSingularNode (singnode, allow_refine, recording);
|
|
1781
|
+
});
|
|
1782
|
+
|
|
1783
|
+
if (targets.Size()+vol_targets.Size())
|
|
1784
|
+
singnodes.Append(&singnode);
|
|
1785
|
+
}
|
|
1786
|
+
}
|
|
1787
|
+
else
|
|
1788
|
+
{
|
|
1789
|
+
for (auto & childsing : singnode.childs)
|
|
1790
|
+
AddSingularNode (*childsing, allow_refine, recording);
|
|
1791
|
+
}
|
|
1792
|
+
}
|
|
1793
|
+
|
|
1794
|
+
void LocalizeExpansion(bool allow_refine)
|
|
1795
|
+
{
|
|
1796
|
+
if (allow_refine)
|
|
1797
|
+
if (mp.Order() > 30 && !childs[0])
|
|
1798
|
+
CreateChilds(allow_refine);
|
|
1799
|
+
|
|
1800
|
+
if (childs[0])
|
|
1801
|
+
{
|
|
1802
|
+
if (total_targets < 1000)
|
|
1803
|
+
{
|
|
1804
|
+
for (int nr = 0; nr < 8; nr++)
|
|
1805
|
+
{
|
|
1806
|
+
if (L2Norm(mp.SH().Coefs()) > 0)
|
|
1807
|
+
mp.TransformAdd (childs[nr]->mp, childs[nr]->center-center);
|
|
1808
|
+
childs[nr]->LocalizeExpansion(allow_refine);
|
|
1809
|
+
}
|
|
1810
|
+
}
|
|
1811
|
+
else
|
|
1812
|
+
ParallelFor(8, [&] (int nr)
|
|
1813
|
+
{
|
|
1814
|
+
if (L2Norm(mp.SH().Coefs()) > 0)
|
|
1815
|
+
mp.TransformAdd (childs[nr]->mp, childs[nr]->center-center);
|
|
1816
|
+
childs[nr]->LocalizeExpansion(allow_refine);
|
|
1817
|
+
});
|
|
1818
|
+
mp = SphericalExpansion<Regular,elem_type>(-1, mp.Kappa(), 1.);
|
|
1819
|
+
//mp.SH().Coefs()=0.0;
|
|
1820
|
+
}
|
|
1821
|
+
}
|
|
1822
|
+
|
|
1823
|
+
elem_type Evaluate (Vec<3> p) const
|
|
1824
|
+
{
|
|
1825
|
+
elem_type sum{0.0};
|
|
1826
|
+
|
|
1827
|
+
int childnum = 0;
|
|
1828
|
+
if (p(0) > center(0)) childnum += 1;
|
|
1829
|
+
if (p(1) > center(1)) childnum += 2;
|
|
1830
|
+
if (p(2) > center(2)) childnum += 4;
|
|
1831
|
+
if (childs[childnum])
|
|
1832
|
+
sum = childs[childnum]->Evaluate(p);
|
|
1833
|
+
else
|
|
1834
|
+
{
|
|
1835
|
+
// static Timer t("mptool regmp, evaluate reg"); RegionTimer r(t);
|
|
1836
|
+
sum = mp.Eval(p-center);
|
|
1837
|
+
}
|
|
1838
|
+
|
|
1839
|
+
{
|
|
1840
|
+
// static Timer t("mptool regmp, evaluate, singnode"); RegionTimer r(t);
|
|
1841
|
+
for (auto sn : singnodes)
|
|
1842
|
+
sum += sn->EvaluateMP(p);
|
|
1843
|
+
}
|
|
1844
|
+
return sum;
|
|
1845
|
+
}
|
|
1846
|
+
|
|
1847
|
+
elem_type EvaluateDirectionalDerivative (Vec<3> p, Vec<3> d) const
|
|
1848
|
+
{
|
|
1849
|
+
elem_type sum{0.0};
|
|
1850
|
+
// cout << "EvaluateDirectionalDerivative RegularMLMP, r = " << r << ", level = " << level << ", center = " << center << endl;
|
|
1851
|
+
// cout << "Singnodes: " << singnodes.Size() << ", childs: " << childs[0] << endl;
|
|
1852
|
+
|
|
1853
|
+
int childnum = 0;
|
|
1854
|
+
if (p(0) > center(0)) childnum += 1;
|
|
1855
|
+
if (p(1) > center(1)) childnum += 2;
|
|
1856
|
+
if (p(2) > center(2)) childnum += 4;
|
|
1857
|
+
if (childs[childnum])
|
|
1858
|
+
sum = childs[childnum]->EvaluateDirectionalDerivative(p, d);
|
|
1859
|
+
else
|
|
1860
|
+
sum = mp.EvalDirectionalDerivative(p-center, d);
|
|
1861
|
+
|
|
1862
|
+
static Timer t("mptool direct evaluate deriv"); RegionTimer r(t);
|
|
1863
|
+
for (auto sn : singnodes)
|
|
1864
|
+
sum += sn->EvaluateMPDeriv(p, d);
|
|
1865
|
+
|
|
1866
|
+
return sum;
|
|
1867
|
+
}
|
|
1868
|
+
|
|
1869
|
+
void TraverseTree (const std::function<void(Node&)> & func)
|
|
1870
|
+
{
|
|
1871
|
+
func(*this);
|
|
1872
|
+
for (auto & child : childs)
|
|
1873
|
+
if (child)
|
|
1874
|
+
child->TraverseTree(func);
|
|
1875
|
+
}
|
|
1876
|
+
|
|
1877
|
+
double Norm() const
|
|
1878
|
+
{
|
|
1879
|
+
double norm = L2Norm(mp.SH().Coefs());
|
|
1880
|
+
if (childs[0])
|
|
1881
|
+
for (auto & ch : childs)
|
|
1882
|
+
norm += ch->Norm();
|
|
1883
|
+
return norm;
|
|
1884
|
+
}
|
|
1885
|
+
|
|
1886
|
+
size_t NumCoefficients() const
|
|
1887
|
+
{
|
|
1888
|
+
size_t num = sqr(mp.SH().Order()+1);
|
|
1889
|
+
if (childs[0])
|
|
1890
|
+
for (auto & ch : childs)
|
|
1891
|
+
num += ch->NumCoefficients();
|
|
1892
|
+
return num;
|
|
1893
|
+
}
|
|
1894
|
+
|
|
1895
|
+
int GetChildNum (Vec<3> x) const
|
|
1896
|
+
{
|
|
1897
|
+
int childnum = 0;
|
|
1898
|
+
if (x(0) > center(0)) childnum += 1;
|
|
1899
|
+
if (x(1) > center(1)) childnum += 2;
|
|
1900
|
+
if (x(2) > center(2)) childnum += 4;
|
|
1901
|
+
return childnum;
|
|
1902
|
+
}
|
|
1903
|
+
|
|
1904
|
+
void AddTarget (Vec<3> x)
|
|
1905
|
+
{
|
|
1906
|
+
// if (childs[0])
|
|
1907
|
+
if (have_childs) // quick check without locking
|
|
1908
|
+
{
|
|
1909
|
+
// directly send to childs:
|
|
1910
|
+
int childnum = GetChildNum(x);
|
|
1911
|
+
childs[childnum] -> AddTarget( x );
|
|
1912
|
+
return;
|
|
1913
|
+
}
|
|
1914
|
+
|
|
1915
|
+
lock_guard<mutex> guard(node_mutex);
|
|
1916
|
+
|
|
1917
|
+
if (have_childs) // test again after locking
|
|
1918
|
+
{
|
|
1919
|
+
// directly send to childs:
|
|
1920
|
+
int childnum = GetChildNum(x);
|
|
1921
|
+
childs[childnum] -> AddTarget(x);
|
|
1922
|
+
return;
|
|
1923
|
+
}
|
|
1924
|
+
|
|
1925
|
+
targets.Append( x );
|
|
1926
|
+
|
|
1927
|
+
// if (r*mp.Kappa() < 1e-8) return;
|
|
1928
|
+
if (level > 20) return;
|
|
1929
|
+
if (targets.Size() < params.maxdirect && r*mp.Kappa() < 5)
|
|
1930
|
+
return;
|
|
1931
|
+
|
|
1932
|
+
CreateChilds();
|
|
1933
|
+
|
|
1934
|
+
for (auto t : targets)
|
|
1935
|
+
AddTarget (t);
|
|
1936
|
+
for (auto [x,r] : vol_targets)
|
|
1937
|
+
AddVolumeTarget (x,r);
|
|
1938
|
+
|
|
1939
|
+
targets.SetSize0();
|
|
1940
|
+
vol_targets.SetSize0();
|
|
1941
|
+
}
|
|
1942
|
+
|
|
1943
|
+
|
|
1944
|
+
void AddVolumeTarget (Vec<3> x, double tr)
|
|
1945
|
+
{
|
|
1946
|
+
if (MaxNorm(x-center) > r+tr) return;
|
|
1947
|
+
|
|
1948
|
+
if (have_childs)
|
|
1949
|
+
{
|
|
1950
|
+
for (auto & child : childs)
|
|
1951
|
+
child->AddVolumeTarget(x, tr);
|
|
1952
|
+
return;
|
|
1953
|
+
}
|
|
1954
|
+
|
|
1955
|
+
|
|
1956
|
+
lock_guard<mutex> guard(node_mutex);
|
|
1957
|
+
|
|
1958
|
+
if (have_childs)
|
|
1959
|
+
{
|
|
1960
|
+
for (auto & child : childs)
|
|
1961
|
+
child->AddVolumeTarget(x, tr);
|
|
1962
|
+
return;
|
|
1963
|
+
}
|
|
1964
|
+
|
|
1965
|
+
|
|
1966
|
+
vol_targets.Append (tuple(x,tr));
|
|
1967
|
+
|
|
1968
|
+
if (level > 20) return;
|
|
1969
|
+
if (vol_targets.Size() < params.maxdirect && (r*mp.Kappa() < 5))
|
|
1970
|
+
return;
|
|
1971
|
+
|
|
1972
|
+
CreateChilds();
|
|
1973
|
+
|
|
1974
|
+
for (auto t : targets)
|
|
1975
|
+
AddTarget (t);
|
|
1976
|
+
for (auto [x,r] : vol_targets)
|
|
1977
|
+
AddVolumeTarget (x,r);
|
|
1978
|
+
|
|
1979
|
+
targets.SetSize0();
|
|
1980
|
+
vol_targets.SetSize0();
|
|
1981
|
+
}
|
|
1982
|
+
|
|
1983
|
+
|
|
1984
|
+
|
|
1985
|
+
void CalcTotalTargets()
|
|
1986
|
+
{
|
|
1987
|
+
total_targets = targets.Size() + vol_targets.Size();
|
|
1988
|
+
for (auto & child : childs)
|
|
1989
|
+
if (child)
|
|
1990
|
+
{
|
|
1991
|
+
child->CalcTotalTargets();
|
|
1992
|
+
total_targets += child->total_targets;
|
|
1993
|
+
}
|
|
1994
|
+
}
|
|
1995
|
+
|
|
1996
|
+
void RemoveEmptyTrees()
|
|
1997
|
+
{
|
|
1998
|
+
for (auto & child : childs)
|
|
1999
|
+
if (child)
|
|
2000
|
+
{
|
|
2001
|
+
child->RemoveEmptyTrees();
|
|
2002
|
+
// if (child->total_targets == 0)
|
|
2003
|
+
// child = nullptr;
|
|
2004
|
+
}
|
|
2005
|
+
|
|
2006
|
+
if (total_targets == 0)
|
|
2007
|
+
mp = SphericalExpansion<Regular,elem_type>(-1, mp.Kappa(),1.);
|
|
2008
|
+
}
|
|
2009
|
+
|
|
2010
|
+
void AllocateMemory()
|
|
2011
|
+
{
|
|
2012
|
+
for (auto & child : childs)
|
|
2013
|
+
if (child)
|
|
2014
|
+
child->AllocateMemory();
|
|
2015
|
+
|
|
2016
|
+
if (total_targets > 0)
|
|
2017
|
+
Allocate();
|
|
2018
|
+
// mp = SphericalExpansion<Regular,elem_type>(MPOrder(r*mp.Kappa()), mp.Kappa(), r); // -1, mp.Kappa(),1.);
|
|
2019
|
+
}
|
|
2020
|
+
|
|
2021
|
+
|
|
2022
|
+
|
|
2023
|
+
|
|
2024
|
+
void Print (ostream & ost, size_t childnr = -1) const
|
|
2025
|
+
{
|
|
2026
|
+
if (childnr == -1)
|
|
2027
|
+
ost << "c = " << center << ", r = " << r << ", level = " << level << endl;
|
|
2028
|
+
else
|
|
2029
|
+
ost << "c = " << center << ", r = " << r << ", level = " << level << ", childnr = " << childnr << endl;
|
|
2030
|
+
for (auto x : targets)
|
|
2031
|
+
ost << "xi = " << x << endl;
|
|
2032
|
+
|
|
2033
|
+
for (int i = 0; i < 8; i++)
|
|
2034
|
+
if (childs[i]) childs[i] -> Print (ost, i);
|
|
2035
|
+
}
|
|
2036
|
+
|
|
2037
|
+
};
|
|
2038
|
+
|
|
2039
|
+
FMM_Parameters fmm_params;
|
|
2040
|
+
Node root;
|
|
2041
|
+
shared_ptr<SingularMLExpansion<elem_type>> singmp;
|
|
2042
|
+
|
|
2043
|
+
public:
|
|
2044
|
+
RegularMLExpansion (shared_ptr<SingularMLExpansion<elem_type>> asingmp, Vec<3> center, double r,
|
|
2045
|
+
const FMM_Parameters & _params)
|
|
2046
|
+
: fmm_params(_params), root(center, r, 0, asingmp->Kappa(), fmm_params), singmp(asingmp)
|
|
2047
|
+
{
|
|
2048
|
+
if (!singmp->havemp) throw Exception("first call Calc for singular MP");
|
|
2049
|
+
root.Allocate();
|
|
2050
|
+
|
|
2051
|
+
nodes_on_level = 0;
|
|
2052
|
+
nodes_on_level[0] = 1;
|
|
2053
|
+
{
|
|
2054
|
+
static Timer t("mptool compute regular MLMP"); RegionTimer rg(t);
|
|
2055
|
+
root.AddSingularNode(singmp->root, true, nullptr);
|
|
2056
|
+
// cout << "norm after S->R conversion: " << root.Norm() << endl;
|
|
2057
|
+
}
|
|
2058
|
+
|
|
2059
|
+
|
|
2060
|
+
/*
|
|
2061
|
+
int maxlevel = 0;
|
|
2062
|
+
for (auto [i,num] : Enumerate(nodes_on_level))
|
|
2063
|
+
if (num > 0) maxlevel = i;
|
|
2064
|
+
|
|
2065
|
+
for (int i = 0; i <= maxlevel; i++)
|
|
2066
|
+
cout << "reg " << i << ": " << nodes_on_level[i] << endl;
|
|
2067
|
+
*/
|
|
2068
|
+
|
|
2069
|
+
{
|
|
2070
|
+
static Timer t("mptool expand regular MLMP"); RegionTimer rg(t);
|
|
2071
|
+
root.LocalizeExpansion(true);
|
|
2072
|
+
// cout << "norm after local expansion: " << root.Norm() << endl;
|
|
2073
|
+
}
|
|
2074
|
+
}
|
|
2075
|
+
|
|
2076
|
+
RegularMLExpansion (Vec<3> center, double r, double kappa, const FMM_Parameters & _params)
|
|
2077
|
+
: fmm_params(_params), root(center, r, 0, kappa, fmm_params)
|
|
2078
|
+
{
|
|
2079
|
+
nodes_on_level = 0;
|
|
2080
|
+
nodes_on_level[0] = 1;
|
|
2081
|
+
}
|
|
2082
|
+
|
|
2083
|
+
void AddTarget (Vec<3> t)
|
|
2084
|
+
{
|
|
2085
|
+
root.AddTarget (t);
|
|
2086
|
+
}
|
|
2087
|
+
|
|
2088
|
+
void AddVolumeTarget (Vec<3> t, double r)
|
|
2089
|
+
{
|
|
2090
|
+
root.AddVolumeTarget (t, r);
|
|
2091
|
+
}
|
|
2092
|
+
|
|
2093
|
+
void CalcMP(shared_ptr<SingularMLExpansion<elem_type>> asingmp, bool onlytargets = true)
|
|
2094
|
+
{
|
|
2095
|
+
static Timer t("mptool regular MLMP"); RegionTimer rg(t);
|
|
2096
|
+
static Timer tremove("removeempty");
|
|
2097
|
+
static Timer trec("mptool regular MLMP - recording");
|
|
2098
|
+
static Timer tsort("mptool regular MLMP - sort");
|
|
2099
|
+
|
|
2100
|
+
singmp = asingmp;
|
|
2101
|
+
|
|
2102
|
+
|
|
2103
|
+
root.CalcTotalTargets();
|
|
2104
|
+
// cout << "before remove empty trees:" << endl;
|
|
2105
|
+
// PrintStatistics(cout);
|
|
2106
|
+
|
|
2107
|
+
/*
|
|
2108
|
+
tremove.Start();
|
|
2109
|
+
if (onlytargets)
|
|
2110
|
+
root.RemoveEmptyTrees();
|
|
2111
|
+
tremove.Stop();
|
|
2112
|
+
*/
|
|
2113
|
+
|
|
2114
|
+
root.AllocateMemory();
|
|
2115
|
+
|
|
2116
|
+
// cout << "after allocating regular:" << endl;
|
|
2117
|
+
// PrintStatistics(cout);
|
|
2118
|
+
|
|
2119
|
+
// cout << "starting S-R converion" << endl;
|
|
2120
|
+
// PrintStatistics(cout);
|
|
2121
|
+
|
|
2122
|
+
|
|
2123
|
+
if constexpr (false)
|
|
2124
|
+
{
|
|
2125
|
+
root.AddSingularNode(singmp->root, !onlytargets, nullptr);
|
|
2126
|
+
}
|
|
2127
|
+
else
|
|
2128
|
+
{ // use recording
|
|
2129
|
+
Array<RecordingRS> recording;
|
|
2130
|
+
{
|
|
2131
|
+
RegionTimer rrec(trec);
|
|
2132
|
+
root.AddSingularNode(singmp->root, !onlytargets, &recording);
|
|
2133
|
+
}
|
|
2134
|
+
|
|
2135
|
+
// cout << "recorded: " << recording.Size() << endl;
|
|
2136
|
+
{
|
|
2137
|
+
RegionTimer reg(tsort);
|
|
2138
|
+
QuickSort (recording, [] (auto & a, auto & b)
|
|
2139
|
+
{
|
|
2140
|
+
if (a.len < (1-1e-8) * b.len) return true;
|
|
2141
|
+
if (a.len > (1+1e-8) * b.len) return false;
|
|
2142
|
+
return a.theta < b.theta;
|
|
2143
|
+
});
|
|
2144
|
+
}
|
|
2145
|
+
|
|
2146
|
+
double current_len = -1e100;
|
|
2147
|
+
double current_theta = -1e100;
|
|
2148
|
+
Array<RecordingRS*> current_batch;
|
|
2149
|
+
Array<Array<RecordingRS*>> batch_group;
|
|
2150
|
+
Array<double> group_lengths;
|
|
2151
|
+
Array<double> group_thetas;
|
|
2152
|
+
for (auto & record : recording)
|
|
2153
|
+
{
|
|
2154
|
+
bool len_changed = fabs(record.len - current_len) > 1e-8;
|
|
2155
|
+
bool theta_changed = fabs(record.theta - current_theta) > 1e-8;
|
|
2156
|
+
if ((len_changed || theta_changed) && current_batch.Size() > 0) {
|
|
2157
|
+
// ProcessBatch(current_batch, current_len, current_theta);
|
|
2158
|
+
batch_group.Append(current_batch);
|
|
2159
|
+
group_lengths.Append(current_len);
|
|
2160
|
+
group_thetas.Append(current_theta);
|
|
2161
|
+
current_batch.SetSize(0);
|
|
2162
|
+
}
|
|
2163
|
+
|
|
2164
|
+
current_len = record.len;
|
|
2165
|
+
current_theta = record.theta;
|
|
2166
|
+
current_batch.Append(&record);
|
|
2167
|
+
}
|
|
2168
|
+
if (current_batch.Size() > 0) {
|
|
2169
|
+
// ProcessBatch(current_batch, current_len, current_theta);
|
|
2170
|
+
batch_group.Append(current_batch);
|
|
2171
|
+
group_lengths.Append(current_len);
|
|
2172
|
+
group_thetas.Append(current_theta);
|
|
2173
|
+
}
|
|
2174
|
+
|
|
2175
|
+
ParallelFor(batch_group.Size(), [&](int i) {
|
|
2176
|
+
ProcessBatchRS(batch_group[i], group_lengths[i], group_thetas[i]);
|
|
2177
|
+
}, TasksPerThread(4));
|
|
2178
|
+
}
|
|
2179
|
+
|
|
2180
|
+
|
|
2181
|
+
/*
|
|
2182
|
+
int maxlevel = 0;
|
|
2183
|
+
for (auto [i,num] : Enumerate(RegularMLExpansion::nodes_on_level))
|
|
2184
|
+
if (num > 0) maxlevel = i;
|
|
2185
|
+
|
|
2186
|
+
for (int i = 0; i <= maxlevel; i++)
|
|
2187
|
+
cout << "reg " << i << ": " << RegularMLExpansion::nodes_on_level[i] << endl;
|
|
2188
|
+
*/
|
|
2189
|
+
|
|
2190
|
+
// cout << "starting R-R converion" << endl;
|
|
2191
|
+
// PrintStatistics(cout);
|
|
2192
|
+
|
|
2193
|
+
static Timer tloc("mptool regular localize expansion"); RegionTimer rloc(tloc);
|
|
2194
|
+
root.LocalizeExpansion(!onlytargets);
|
|
2195
|
+
|
|
2196
|
+
|
|
2197
|
+
// cout << "R-R conversion done" << endl;
|
|
2198
|
+
// PrintStatistics(cout);
|
|
2199
|
+
}
|
|
2200
|
+
|
|
2201
|
+
void PrintStatistics (ostream & ost)
|
|
2202
|
+
{
|
|
2203
|
+
int levels = 0;
|
|
2204
|
+
int cnt = 0;
|
|
2205
|
+
root.TraverseTree( [&](Node & node) {
|
|
2206
|
+
levels = max(levels, node.level);
|
|
2207
|
+
cnt++;
|
|
2208
|
+
});
|
|
2209
|
+
ost << "levels: " << levels << endl;
|
|
2210
|
+
ost << "nodes: " << cnt << endl;
|
|
2211
|
+
|
|
2212
|
+
Array<int> num_on_level(levels+1);
|
|
2213
|
+
Array<int> order_on_level(levels+1);
|
|
2214
|
+
Array<size_t> coefs_on_level(levels+1);
|
|
2215
|
+
num_on_level = 0;
|
|
2216
|
+
order_on_level = 0;
|
|
2217
|
+
root.TraverseTree( [&](Node & node) {
|
|
2218
|
+
num_on_level[node.level]++;
|
|
2219
|
+
order_on_level[node.level] = max(order_on_level[node.level],node.mp.Order());
|
|
2220
|
+
coefs_on_level[node.level] += node.mp.SH().Coefs().Size();
|
|
2221
|
+
});
|
|
2222
|
+
|
|
2223
|
+
cout << "num on level" << endl;
|
|
2224
|
+
for (int i = 0; i < num_on_level.Size(); i++)
|
|
2225
|
+
cout << i << ": " << num_on_level[i] << ", order = " << order_on_level[i] << ", coefs " << coefs_on_level[i] << endl;
|
|
2226
|
+
|
|
2227
|
+
size_t totcoefs = 0;
|
|
2228
|
+
for (auto n : coefs_on_level)
|
|
2229
|
+
totcoefs += n;
|
|
2230
|
+
cout << "total mem in coefs: " << sizeof(elem_type)*totcoefs / sqr(1024) << " MB" << endl;
|
|
2231
|
+
}
|
|
2232
|
+
|
|
2233
|
+
void Print (ostream & ost) const
|
|
2234
|
+
{
|
|
2235
|
+
root.Print(ost);
|
|
2236
|
+
}
|
|
2237
|
+
|
|
2238
|
+
double Norm() const
|
|
2239
|
+
{
|
|
2240
|
+
return root.Norm();
|
|
2241
|
+
}
|
|
2242
|
+
|
|
2243
|
+
size_t NumCoefficients() const
|
|
2244
|
+
{
|
|
2245
|
+
return root.NumCoefficients();
|
|
2246
|
+
}
|
|
2247
|
+
|
|
2248
|
+
elem_type Evaluate (Vec<3> p) const
|
|
2249
|
+
{
|
|
2250
|
+
// static Timer t("mptool Eval MLMP regular"); RegionTimer r(t);
|
|
2251
|
+
// if (L2Norm(p-root.center) > root.r) return elem_type{0.0};
|
|
2252
|
+
|
|
2253
|
+
if (MaxNorm(p-root.center) > root.r)
|
|
2254
|
+
return singmp->Evaluate(p);
|
|
2255
|
+
return root.Evaluate(p);
|
|
2256
|
+
}
|
|
2257
|
+
|
|
2258
|
+
elem_type EvaluateDirectionalDerivative (Vec<3> p, Vec<3> d) const
|
|
2259
|
+
{
|
|
2260
|
+
if (L2Norm(p-root.center) > root.r) return elem_type{0.0};
|
|
2261
|
+
return root.EvaluateDirectionalDerivative(p, d);
|
|
2262
|
+
}
|
|
2263
|
+
|
|
2264
|
+
};
|
|
2265
|
+
|
|
2266
|
+
|
|
2267
|
+
template <typename elem_type>
|
|
2268
|
+
inline ostream & operator<< (ostream & ost, const RegularMLExpansion<elem_type> & mlmp)
|
|
2269
|
+
{
|
|
2270
|
+
mlmp.Print(ost);
|
|
2271
|
+
// ost << "RegularMLExpansion" << endl;
|
|
2272
|
+
return ost;
|
|
2273
|
+
}
|
|
2274
|
+
|
|
2275
|
+
|
|
2276
|
+
|
|
2277
|
+
|
|
2278
|
+
|
|
2279
|
+
|
|
2280
|
+
}
|
|
2281
|
+
#endif
|