ngsolve 6.2.2506.post75.dev0__cp314-cp314-manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ngsolve might be problematic. Click here for more details.

Files changed (303) hide show
  1. ngsolve-6.2.2506.post75.dev0.data/data/bin/ngs_nvcc +14 -0
  2. ngsolve-6.2.2506.post75.dev0.data/data/bin/ngs_nvlink +14 -0
  3. ngsolve-6.2.2506.post75.dev0.data/data/bin/ngscxx +15 -0
  4. ngsolve-6.2.2506.post75.dev0.data/data/bin/ngsld +11 -0
  5. ngsolve-6.2.2506.post75.dev0.data/data/bin/ngsolve.tcl +648 -0
  6. ngsolve-6.2.2506.post75.dev0.data/data/bin/ngspy +2 -0
  7. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/analytic_integrals.hpp +10 -0
  8. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/arnoldi.hpp +55 -0
  9. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/bandmatrix.hpp +334 -0
  10. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/basematrix.hpp +957 -0
  11. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/basevector.hpp +1268 -0
  12. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/bdbequations.hpp +2805 -0
  13. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/bdbintegrator.hpp +1660 -0
  14. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/bem_diffops.hpp +475 -0
  15. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/bessel.hpp +1064 -0
  16. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/bilinearform.hpp +963 -0
  17. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/bla.hpp +29 -0
  18. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/blockalloc.hpp +95 -0
  19. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/blockjacobi.hpp +328 -0
  20. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/bspline.hpp +116 -0
  21. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/calcinverse.hpp +141 -0
  22. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/cg.hpp +368 -0
  23. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/chebyshev.hpp +44 -0
  24. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/cholesky.hpp +720 -0
  25. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/clapack.h +7254 -0
  26. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/code_generation.hpp +296 -0
  27. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/coefficient.hpp +2033 -0
  28. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/coefficient_impl.hpp +19 -0
  29. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/coefficient_stdmath.hpp +167 -0
  30. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/commutingAMG.hpp +106 -0
  31. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/comp.hpp +79 -0
  32. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/compatibility.hpp +41 -0
  33. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/complex_wrapper.hpp +73 -0
  34. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/compressedfespace.hpp +110 -0
  35. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/contact.hpp +235 -0
  36. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/cuda_linalg.hpp +175 -0
  37. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/cuda_ngbla.hpp +226 -0
  38. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/cuda_ngstd.hpp +527 -0
  39. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/cuda_profiler.hpp +240 -0
  40. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/diagonalmatrix.hpp +154 -0
  41. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/differentialoperator.hpp +276 -0
  42. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/diffop.hpp +1286 -0
  43. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/diffop_impl.hpp +328 -0
  44. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/diffopwithfactor.hpp +123 -0
  45. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/discontinuous.hpp +84 -0
  46. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/dump.hpp +949 -0
  47. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/ectypes.hpp +121 -0
  48. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/eigen.hpp +60 -0
  49. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/eigensystem.hpp +18 -0
  50. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/elasticity_equations.hpp +595 -0
  51. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/elementbyelement.hpp +195 -0
  52. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/elementtopology.hpp +1760 -0
  53. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/elementtransformation.hpp +339 -0
  54. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/evalfunc.hpp +405 -0
  55. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/expr.hpp +1686 -0
  56. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/facetfe.hpp +175 -0
  57. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/facetfespace.hpp +180 -0
  58. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/facethofe.hpp +111 -0
  59. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/facetsurffespace.hpp +112 -0
  60. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/fe_interfaces.hpp +32 -0
  61. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/fem.hpp +87 -0
  62. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/fesconvert.hpp +14 -0
  63. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/fespace.hpp +1449 -0
  64. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/finiteelement.hpp +286 -0
  65. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/globalinterfacespace.hpp +77 -0
  66. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/globalspace.hpp +115 -0
  67. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/gridfunction.hpp +525 -0
  68. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/h1amg.hpp +124 -0
  69. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/h1hofe.hpp +188 -0
  70. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/h1hofe_impl.hpp +1262 -0
  71. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/h1hofefo.hpp +148 -0
  72. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/h1hofefo_impl.hpp +185 -0
  73. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/h1hofespace.hpp +167 -0
  74. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/h1lofe.hpp +1240 -0
  75. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/h1lumping.hpp +41 -0
  76. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/hcurl_equations.hpp +1381 -0
  77. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/hcurlcurlfe.hpp +2241 -0
  78. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/hcurlcurlfespace.hpp +78 -0
  79. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/hcurlfe.hpp +259 -0
  80. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/hcurlfe_utils.hpp +107 -0
  81. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/hcurlhdiv_dshape.hpp +857 -0
  82. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/hcurlhdivfes.hpp +308 -0
  83. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/hcurlhofe.hpp +175 -0
  84. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/hcurlhofe_impl.hpp +1871 -0
  85. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/hcurlhofespace.hpp +193 -0
  86. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/hcurllofe.hpp +1146 -0
  87. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/hdiv_equations.hpp +880 -0
  88. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/hdivdivfe.hpp +2923 -0
  89. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/hdivdivsurfacespace.hpp +76 -0
  90. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/hdivfe.hpp +206 -0
  91. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/hdivfe_utils.hpp +717 -0
  92. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/hdivfes.hpp +75 -0
  93. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/hdivhofe.hpp +447 -0
  94. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/hdivhofe_impl.hpp +1107 -0
  95. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/hdivhofefo.hpp +229 -0
  96. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/hdivhofespace.hpp +177 -0
  97. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/hdivhosurfacefespace.hpp +106 -0
  98. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/hdivlofe.hpp +773 -0
  99. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/hidden.hpp +74 -0
  100. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/householder.hpp +181 -0
  101. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/hypre_ams_precond.hpp +123 -0
  102. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/hypre_precond.hpp +73 -0
  103. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/integrator.hpp +2012 -0
  104. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/integratorcf.hpp +253 -0
  105. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/interpolate.hpp +49 -0
  106. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/intrule.hpp +2542 -0
  107. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/intrules_SauterSchwab.hpp +25 -0
  108. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/irspace.hpp +49 -0
  109. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/jacobi.hpp +153 -0
  110. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/kernels.hpp +762 -0
  111. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/l2hofe.hpp +194 -0
  112. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/l2hofe_impl.hpp +564 -0
  113. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/l2hofefo.hpp +542 -0
  114. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/l2hofespace.hpp +344 -0
  115. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/la.hpp +38 -0
  116. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/linalg_kernels.hpp +102 -0
  117. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/linearform.hpp +266 -0
  118. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/matrix.hpp +2140 -0
  119. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/memusage.hpp +41 -0
  120. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/meshaccess.hpp +1359 -0
  121. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/mgpre.hpp +204 -0
  122. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/mp_coefficient.hpp +145 -0
  123. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/mptools.hpp +2281 -0
  124. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/multigrid.hpp +42 -0
  125. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/multivector.hpp +447 -0
  126. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/mumpsinverse.hpp +187 -0
  127. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/mycomplex.hpp +361 -0
  128. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/ng_lapack.hpp +1661 -0
  129. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/ngblas.hpp +1232 -0
  130. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/ngs_defines.hpp +30 -0
  131. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/ngs_stdcpp_include.hpp +106 -0
  132. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/ngs_utils.hpp +121 -0
  133. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/ngsobject.hpp +1019 -0
  134. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/ngsstream.hpp +113 -0
  135. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/ngstd.hpp +72 -0
  136. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/nodalhofe.hpp +96 -0
  137. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/nodalhofe_impl.hpp +141 -0
  138. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/normalfacetfe.hpp +223 -0
  139. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/normalfacetfespace.hpp +98 -0
  140. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/normalfacetsurfacefespace.hpp +84 -0
  141. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/order.hpp +251 -0
  142. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/parallel_matrices.hpp +222 -0
  143. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/paralleldofs.hpp +340 -0
  144. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/parallelngs.hpp +23 -0
  145. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/parallelvector.hpp +269 -0
  146. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/pardisoinverse.hpp +200 -0
  147. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/periodic.hpp +129 -0
  148. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/plateaufespace.hpp +25 -0
  149. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/pml.hpp +275 -0
  150. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/pmltrafo.hpp +631 -0
  151. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/postproc.hpp +142 -0
  152. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/potentialtools.hpp +22 -0
  153. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/precomp.hpp +60 -0
  154. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/preconditioner.hpp +602 -0
  155. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/prolongation.hpp +377 -0
  156. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/python_comp.hpp +107 -0
  157. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/python_fem.hpp +89 -0
  158. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/python_linalg.hpp +58 -0
  159. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/python_ngstd.hpp +386 -0
  160. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/recursive_pol.hpp +4896 -0
  161. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/recursive_pol_tet.hpp +395 -0
  162. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/recursive_pol_trig.hpp +492 -0
  163. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/reorderedfespace.hpp +81 -0
  164. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/sample_sort.hpp +105 -0
  165. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/scalarfe.hpp +335 -0
  166. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/shapefunction_utils.hpp +113 -0
  167. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/simd_complex.hpp +329 -0
  168. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/smoother.hpp +253 -0
  169. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/solve.hpp +89 -0
  170. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/sparsecholesky.hpp +313 -0
  171. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/sparsematrix.hpp +1038 -0
  172. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/sparsematrix_dyn.hpp +90 -0
  173. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/sparsematrix_impl.hpp +1013 -0
  174. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/special_matrix.hpp +463 -0
  175. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/specialelement.hpp +125 -0
  176. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/statushandler.hpp +33 -0
  177. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/stringops.hpp +12 -0
  178. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/superluinverse.hpp +136 -0
  179. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/symbolicintegrator.hpp +850 -0
  180. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/symmetricmatrix.hpp +144 -0
  181. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/tangentialfacetfe.hpp +224 -0
  182. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/tangentialfacetfespace.hpp +91 -0
  183. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/tensor.hpp +522 -0
  184. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/tensorcoefficient.hpp +446 -0
  185. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/tensorproductintegrator.hpp +113 -0
  186. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/thcurlfe.hpp +128 -0
  187. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/thcurlfe_impl.hpp +380 -0
  188. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/thdivfe.hpp +80 -0
  189. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/thdivfe_impl.hpp +492 -0
  190. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/tpdiffop.hpp +461 -0
  191. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/tpfes.hpp +133 -0
  192. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/tpintrule.hpp +224 -0
  193. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/triangular.hpp +465 -0
  194. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/tscalarfe.hpp +245 -0
  195. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/tscalarfe_impl.hpp +1029 -0
  196. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/umfpackinverse.hpp +148 -0
  197. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/unifiedvector.hpp +103 -0
  198. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/vector.hpp +1273 -0
  199. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/voxelcoefficientfunction.hpp +41 -0
  200. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/vtkoutput.hpp +198 -0
  201. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/vvector.hpp +208 -0
  202. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/webgui.hpp +92 -0
  203. ngsolve-6.2.2506.post75.dev0.data/data/lib/cmake/ngsolve/NGSolveConfig.cmake +102 -0
  204. ngsolve-6.2.2506.post75.dev0.data/data/lib/cmake/ngsolve/ngsolve-targets-release.cmake +89 -0
  205. ngsolve-6.2.2506.post75.dev0.data/data/lib/cmake/ngsolve/ngsolve-targets.cmake +173 -0
  206. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/netgen_mesher.libs/libngbla.so +0 -0
  207. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/netgen_mesher.libs/libngcomp.so +0 -0
  208. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/netgen_mesher.libs/libngfem.so +0 -0
  209. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/netgen_mesher.libs/libngla.so +0 -0
  210. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/netgen_mesher.libs/libngsbem.so +0 -0
  211. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/netgen_mesher.libs/libngscudalib.so +0 -0
  212. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/netgen_mesher.libs/libngsolve.so +0 -0
  213. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/netgen_mesher.libs/libngstd.so +0 -0
  214. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/ngsolve/TensorProductTools.py +210 -0
  215. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/ngsolve/__console.py +94 -0
  216. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/ngsolve/__expr.py +181 -0
  217. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/ngsolve/__init__.py +148 -0
  218. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/ngsolve/_scikit_build_core_dependencies.py +30 -0
  219. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/ngsolve/bvp.py +78 -0
  220. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/ngsolve/config/__init__.py +1 -0
  221. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/ngsolve/config/__main__.py +4 -0
  222. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/ngsolve/config/config.py +60 -0
  223. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/TensorProduct/__init__.py +0 -0
  224. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/TensorProduct/tp_dg_1d_1d.py +80 -0
  225. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/TensorProduct/tp_dg_1d_2d.py +73 -0
  226. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/TensorProduct/tp_dg_2d_1d.py +72 -0
  227. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/TensorProduct/tp_dg_2d_2d.py +66 -0
  228. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/__init__.py +0 -0
  229. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/howto/__init__.py +0 -0
  230. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/howto/hhj.py +44 -0
  231. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/howto/hybrid_dg.py +53 -0
  232. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/howto/mixed.py +30 -0
  233. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/howto/nonlin.py +29 -0
  234. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/howto/pickling.py +26 -0
  235. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/howto/pml.py +31 -0
  236. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/howto/taskmanager.py +20 -0
  237. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/howto/tdnns.py +47 -0
  238. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/howto/timeDG-skeleton.py +45 -0
  239. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/howto/timeDG.py +38 -0
  240. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/howto/timeDGlap.py +42 -0
  241. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/howto/timeDGwave.py +61 -0
  242. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/intro/__init__.py +0 -0
  243. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/intro/adaptive.py +123 -0
  244. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/intro/cmagnet.py +59 -0
  245. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/intro/elasticity.py +76 -0
  246. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/intro/navierstokes.py +74 -0
  247. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/intro/poisson.ipynb +170 -0
  248. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/intro/poisson.py +41 -0
  249. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/mpi/__init__.py +0 -0
  250. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/mpi/mpi_cmagnet.py +87 -0
  251. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/mpi/mpi_navierstokes.py +117 -0
  252. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/mpi/mpi_poisson.py +89 -0
  253. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/mpi/mpi_timeDG.py +82 -0
  254. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/ngsolve/directsolvers.py +26 -0
  255. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/ngsolve/eigenvalues.py +364 -0
  256. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/ngsolve/internal.py +89 -0
  257. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/ngsolve/krylovspace.py +1013 -0
  258. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/ngsolve/meshes.py +748 -0
  259. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/ngsolve/ngs2petsc.py +310 -0
  260. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/ngsolve/ngscuda.so +0 -0
  261. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/ngsolve/ngscxx.py +42 -0
  262. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/ngsolve/ngslib.so +0 -0
  263. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/ngsolve/nonlinearsolvers.py +203 -0
  264. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/ngsolve/preconditioners.py +11 -0
  265. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/ngsolve/solve_implementation.py +168 -0
  266. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/ngsolve/solvers.py +7 -0
  267. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/ngsolve/timestepping.py +185 -0
  268. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/ngsolve/timing.py +108 -0
  269. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/ngsolve/utils.py +167 -0
  270. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/ngsolve/webgui.py +670 -0
  271. ngsolve-6.2.2506.post75.dev0.data/data/share/ngsolve/beam.geo +17 -0
  272. ngsolve-6.2.2506.post75.dev0.data/data/share/ngsolve/beam.vol +240 -0
  273. ngsolve-6.2.2506.post75.dev0.data/data/share/ngsolve/chip.in2d +41 -0
  274. ngsolve-6.2.2506.post75.dev0.data/data/share/ngsolve/chip.vol +614 -0
  275. ngsolve-6.2.2506.post75.dev0.data/data/share/ngsolve/coil.geo +12 -0
  276. ngsolve-6.2.2506.post75.dev0.data/data/share/ngsolve/coil.vol +2560 -0
  277. ngsolve-6.2.2506.post75.dev0.data/data/share/ngsolve/coilshield.geo +24 -0
  278. ngsolve-6.2.2506.post75.dev0.data/data/share/ngsolve/coilshield.vol +3179 -0
  279. ngsolve-6.2.2506.post75.dev0.data/data/share/ngsolve/cube.geo +19 -0
  280. ngsolve-6.2.2506.post75.dev0.data/data/share/ngsolve/cube.vol +1832 -0
  281. ngsolve-6.2.2506.post75.dev0.data/data/share/ngsolve/d10_DGdoubleglazing.pde +50 -0
  282. ngsolve-6.2.2506.post75.dev0.data/data/share/ngsolve/d11_chip_nitsche.pde +40 -0
  283. ngsolve-6.2.2506.post75.dev0.data/data/share/ngsolve/d1_square.pde +43 -0
  284. ngsolve-6.2.2506.post75.dev0.data/data/share/ngsolve/d2_chip.pde +35 -0
  285. ngsolve-6.2.2506.post75.dev0.data/data/share/ngsolve/d3_helmholtz.pde +22 -0
  286. ngsolve-6.2.2506.post75.dev0.data/data/share/ngsolve/d4_cube.pde +46 -0
  287. ngsolve-6.2.2506.post75.dev0.data/data/share/ngsolve/d5_beam.pde +74 -0
  288. ngsolve-6.2.2506.post75.dev0.data/data/share/ngsolve/d6_shaft.pde +73 -0
  289. ngsolve-6.2.2506.post75.dev0.data/data/share/ngsolve/d7_coil.pde +50 -0
  290. ngsolve-6.2.2506.post75.dev0.data/data/share/ngsolve/d8_coilshield.pde +49 -0
  291. ngsolve-6.2.2506.post75.dev0.data/data/share/ngsolve/d9_hybridDG.pde +72 -0
  292. ngsolve-6.2.2506.post75.dev0.data/data/share/ngsolve/doubleglazing.in2d +27 -0
  293. ngsolve-6.2.2506.post75.dev0.data/data/share/ngsolve/doubleglazing.vol +737 -0
  294. ngsolve-6.2.2506.post75.dev0.data/data/share/ngsolve/piezo2d40round4.vol.gz +0 -0
  295. ngsolve-6.2.2506.post75.dev0.data/data/share/ngsolve/shaft.geo +73 -0
  296. ngsolve-6.2.2506.post75.dev0.data/data/share/ngsolve/shaft.vol +4291 -0
  297. ngsolve-6.2.2506.post75.dev0.data/data/share/ngsolve/square.in2d +17 -0
  298. ngsolve-6.2.2506.post75.dev0.data/data/share/ngsolve/square.vol +149 -0
  299. ngsolve-6.2.2506.post75.dev0.dist-info/METADATA +14 -0
  300. ngsolve-6.2.2506.post75.dev0.dist-info/RECORD +303 -0
  301. ngsolve-6.2.2506.post75.dev0.dist-info/WHEEL +5 -0
  302. ngsolve-6.2.2506.post75.dev0.dist-info/licenses/LICENSE +504 -0
  303. ngsolve-6.2.2506.post75.dev0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,2281 @@
1
+ #ifndef FILE_MPTOOLS
2
+ #define FILE_MPTOOLS
3
+
4
+ /*
5
+ tools for computing with spherical harmonics and multi-poles
6
+ */
7
+
8
+
9
+ #include <bla.hpp>
10
+ #include <coefficient.hpp>
11
+ #include <recursive_pol.hpp>
12
+
13
+
14
+ namespace ngsbem
15
+ {
16
+ using namespace ngfem;
17
+
18
+ template<typename T>
19
+ constexpr int VecLength = 1; // Default: Complex has length 1
20
+
21
+ template<int N>
22
+ constexpr int VecLength<Vec<N, Complex>> = N; // Specialization: Vec<N,Complex> has length N
23
+
24
+
25
+
26
+ constexpr int FMM_SW = 4;
27
+
28
+
29
+
30
+ // ************************ SIMD - creation (should end up in simd.hpp) *************
31
+
32
+
33
+ template <int S, typename T, int SW>
34
+ Vec<S,T> HSum (Vec<S,SIMD<T,SW>> v)
35
+ {
36
+ Vec<S,T> res;
37
+ for (int i = 0; i < S; i++)
38
+ res(i) = HSum(v(i));
39
+ // Iterate<S> ([&](auto i) {
40
+ // res.HTData().template Elem<i.value>() = HSum(v.HTData().template Elem<i.value>());
41
+ // });
42
+ return res;
43
+ }
44
+
45
+
46
+ class NGS_DLL_HEADER PrecomputedSqrts
47
+ {
48
+ public:
49
+ Array<double> sqrt_int;
50
+ // Array<double> inv_sqrt_int;
51
+ Array<double> sqrt_n_np1; // sqrt(n*(n+1))
52
+ Array<double> inv_sqrt_2np1_2np3; // 1/sqrt( (2n+1)*(2n+3) )
53
+
54
+ PrecomputedSqrts();
55
+ };
56
+
57
+ extern NGS_DLL_HEADER PrecomputedSqrts presqrt;
58
+
59
+
60
+
61
+ class FMM_Parameters
62
+ {
63
+ public:
64
+ int maxdirect = 100;
65
+ int minorder = 20; // order = minorder + 2 kappa r
66
+ };
67
+
68
+
69
+
70
+
71
+ inline std::tuple<double, double, double> SphericalCoordinates(Vec<3> dist){
72
+ double len, theta, phi;
73
+ len = L2Norm(dist);
74
+ if (len < 1e-30)
75
+ theta = 0;
76
+ else
77
+ theta = acos (dist(2) / len);
78
+ if (sqr(dist(0))+sqr(dist(1)) < 1e-30)
79
+ phi = 0;
80
+ else
81
+ phi = atan2(dist(1), dist(0));
82
+ return {len, theta, phi};
83
+ }
84
+
85
+
86
+ template <typename entry_type = Complex>
87
+ class NGS_DLL_HEADER SphericalHarmonics
88
+ {
89
+ int order;
90
+ Vector<entry_type> coefs;
91
+
92
+ public:
93
+ SphericalHarmonics (int aorder)
94
+ : order(aorder), coefs(sqr(order+1)) { coefs=0.0; }
95
+
96
+ int Order() const { return order; }
97
+ FlatVector<entry_type> Coefs() const { return coefs; }
98
+
99
+ entry_type & Coef(int n, int m) { return coefs(n*(n+1) + m); }
100
+ entry_type Coef(int n, int m) const { return coefs(n*(n+1) + m); }
101
+
102
+ auto CoefsN (int n) const
103
+ {
104
+ return coefs.RangeN(n*n, 2*n+1);
105
+ }
106
+
107
+ static std::tuple<double,double> Polar (Vec<3> x)
108
+ {
109
+ double phi, theta;
110
+ if (x(0) == 0 && x(1) == 0)
111
+ {
112
+ phi = 0;
113
+ theta = x(2) > 0 ? 0 : M_PI;
114
+ }
115
+ else
116
+ {
117
+ phi = atan2(x(1), x(0));
118
+ theta = acos(x(2)/L2Norm(x));
119
+ }
120
+ return { theta, phi };
121
+ }
122
+
123
+ entry_type Eval (Vec<3> x) const
124
+ {
125
+ auto [theta, phi] = Polar(x);
126
+ return Eval(theta, phi);
127
+ }
128
+
129
+ entry_type Eval (double theta, double phi) const;
130
+
131
+ entry_type EvalOrder (int n, Vec<3> x) const
132
+ {
133
+ auto [theta, phi] = Polar (x);
134
+ return EvalOrder(n, theta, phi);
135
+ }
136
+
137
+ entry_type EvalOrder (int n, double theta, double phi) const;
138
+
139
+ void EvalOrders (Vec<3> x, FlatVector<entry_type> vals) const
140
+ {
141
+ auto [theta, phi] = Polar(x);
142
+ return EvalOrders(theta, phi, vals);
143
+ }
144
+
145
+ void EvalOrders (double theta, double phi, FlatVector<entry_type> vals) const;
146
+
147
+ void Calc (Vec<3> x, FlatVector<Complex> shapes);
148
+
149
+
150
+ void FlipZ ();
151
+ void RotateZ (double alpha);
152
+
153
+ template <typename FUNC>
154
+ void RotateZ (double alpha, FUNC func) const
155
+ {
156
+ if (order < 0) return;
157
+
158
+ Vector<Complex> exp_imalpha(order+1);
159
+ Complex exp_ialpha(cos(alpha), sin(alpha));
160
+ Complex prod = 1.0;
161
+ for (int i = 0; i <= order; i++)
162
+ {
163
+ exp_imalpha(i) = prod;
164
+ prod *= exp_ialpha;
165
+ }
166
+
167
+ int ii = 0;
168
+ for (int n = 0; n <= order; n++)
169
+ {
170
+ for (int m = -n; m < 0; m++, ii++)
171
+ func(ii, conj(exp_imalpha(-m)));
172
+ for (int m = 0; m <= n; m++, ii++)
173
+ func(ii, exp_imalpha(m));
174
+ };
175
+ };
176
+
177
+ template <typename FUNC>
178
+ void RotateZFlip (double alpha, bool flip, FUNC func) const
179
+ {
180
+ if (order < 0) return;
181
+
182
+ Vector<Complex> exp_imalpha(order+1);
183
+ Complex exp_ialpha(cos(alpha), sin(alpha));
184
+ Complex prod = 1.0;
185
+ for (int i = 0; i <= order; i++)
186
+ {
187
+ exp_imalpha(i) = prod;
188
+ prod *= exp_ialpha;
189
+ }
190
+
191
+ int ii = 0;
192
+
193
+ auto FlipFactor = [] (int n, int m, bool flip)->double
194
+ {
195
+ if (flip)
196
+ return ((n-m)%2) == 1 ? -1 : 1;
197
+ return 1.0;
198
+ };
199
+
200
+ for (int n = 0; n <= order; n++)
201
+ {
202
+ for (int m = -n; m < 0; m++, ii++)
203
+ func(ii, FlipFactor(n,m,flip)*conj(exp_imalpha(-m)));
204
+ for (int m = 0; m <= n; m++, ii++)
205
+ func(ii, FlipFactor(n,m,flip)*exp_imalpha(m));
206
+ };
207
+ };
208
+
209
+
210
+
211
+ void RotateY (double alpha, bool parallel = false);
212
+
213
+
214
+
215
+ static double CalcAmn (int m, int n)
216
+ {
217
+ if (m < 0) m=-m;
218
+ if (n < m) return 0;
219
+
220
+ if (2*n+1 < presqrt.sqrt_int.Size())
221
+ return presqrt.sqrt_int[n+1+m]*presqrt.sqrt_int[n+1-m] * presqrt.inv_sqrt_2np1_2np3[n];
222
+ else
223
+ return sqrt( (n+1.0+m)*(n+1.0-m) / ( (2*n+1)*(2*n+3) ));
224
+ }
225
+
226
+ static double CalcBmn (int m, int n)
227
+ {
228
+ double sgn = (m >= 0) ? 1 : -1;
229
+ if ( (m >= n) || (-m > n) ) return 0;
230
+ if (n <= presqrt.inv_sqrt_2np1_2np3.Size())
231
+ return sgn * presqrt.sqrt_n_np1[n-m-1] * presqrt.inv_sqrt_2np1_2np3[n-1];
232
+ else
233
+ return sgn * sqrt( (n-m-1.0)*(n-m) / ( (2*n-1.0)*(2*n+1)));
234
+ }
235
+
236
+ static double CalcDmn (int m, int n)
237
+ {
238
+ double sgn = (m >= 0) ? 1 : -1;
239
+ return sgn/2 * sqrt((n-m)*(n+m+1));
240
+ }
241
+
242
+ // Nail A. Gumerov and Ramani Duraiswami book, formula (2.2.12)
243
+ // add directional derivative divided by kappa to res, both multipoles need same scaling
244
+ void DirectionalDiffAdd (Vec<3> d, SphericalHarmonics<entry_type> & res, double scale = 1) const;
245
+
246
+ };
247
+
248
+
249
+ // https://fortran-lang.discourse.group/t/looking-for-spherical-bessel-and-hankel-functions-of-first-and-second-kind-and-arbitrary-order/2308/2
250
+ NGS_DLL_HEADER
251
+ void besseljs3d (int nterms, double z, double scale,
252
+ SliceVector<double> fjs, SliceVector<double> fjder = FlatVector<double>(0, nullptr));
253
+
254
+ NGS_DLL_HEADER
255
+ void besseljs3d (int nterms, Complex z, double scale,
256
+ SliceVector<Complex> fjs, SliceVector<Complex> fjder = FlatVector<Complex>(0, nullptr));
257
+
258
+
259
+ /*
260
+ spherical bessel functions of first (the j_n) and second (the y_n) kind.
261
+
262
+ j0(r) = sin(r)/r
263
+ j1(r) = (sin(r)-r cos(r)) / r**2
264
+
265
+ y0(r) = -cos(r)/r
266
+ y1(r) = (-cos(r)-r*sin(r)) / r**2
267
+ */
268
+ NGS_DLL_HEADER
269
+ void SBESJY (double x, int lmax,
270
+ FlatVector<double> j,
271
+ FlatVector<double> y,
272
+ FlatVector<double> jp,
273
+ FlatVector<double> yp);
274
+
275
+
276
+
277
+ template <typename T>
278
+ void SphericalBessel (int n, double rho, double scale, T && values)
279
+ {
280
+ besseljs3d (n, rho, scale, values);
281
+ /*
282
+ Vector<double> j(n+1), jp(n+1);
283
+ besseljs3d (n, rho, scale, j, jp);
284
+ values = j;
285
+ */
286
+ }
287
+
288
+
289
+ template <typename T>
290
+ void SphericalHankel1 (int n, double rho, double scale, T && values)
291
+ {
292
+ // Complex imag(0,1);
293
+ /*
294
+ if (n >= 0)
295
+ values(0) = exp(imag*rho) / (imag*rho);
296
+ if (n >= 1)
297
+ values(1) = -imag*values(0)*(1.0-1.0/(imag*rho));
298
+
299
+ for (int i = 2; i <= n; i++)
300
+ values(i) = (2*i-1)/rho * values(i-1) - values(i-2);
301
+ */
302
+
303
+ if (rho < 1e-100)
304
+ {
305
+ values = Complex(0);
306
+ return;
307
+ }
308
+ Vector j(n+1), y(n+1), jp(n+1), yp(n+1);
309
+
310
+ // the bessel-evaluation with scale
311
+ besseljs3d (n, rho, 1/scale, j, jp);
312
+
313
+ // Bessel y directly with the recurrence formula for (y, yp):
314
+ double x = rho;
315
+ double xinv = 1/x;
316
+ y(0) = -xinv * cos(x);
317
+ yp(0) = j(0)-xinv*y(0);
318
+
319
+ double sl = 0;
320
+ for (int l = 1; l <= n; l++)
321
+ {
322
+ y(l) = scale * (sl*y(l-1) - yp(l-1));
323
+ sl += xinv;
324
+ yp(l) = scale * y(l-1) - (sl+xinv)*y(l);
325
+ }
326
+
327
+ for (int i = 0; i <= n; i++)
328
+ values(i) = Complex (j(i), y(i));
329
+ }
330
+
331
+
332
+
333
+
334
+
335
+ // hn1 = jn+ i*yn
336
+ class Singular
337
+ {
338
+ public:
339
+ template <typename T>
340
+ static void Eval (int order, double r, double scale, T && values)
341
+ {
342
+ SphericalHankel1(order, r, scale, values);
343
+ }
344
+
345
+ template <typename T>
346
+ static void Eval (int order, double kappa, double r, double rtyp, T && values)
347
+ {
348
+ double scale = Scale(kappa, rtyp);
349
+ SphericalHankel1(order, r*kappa, scale, values);
350
+ }
351
+
352
+ static double Scale (double kappa, double rtyp)
353
+ {
354
+ // return min(1.0, rtyp*kappa);
355
+ return min(1.0, 0.5*rtyp*kappa);
356
+ }
357
+ };
358
+
359
+
360
+
361
+ // jn
362
+ class Regular
363
+ {
364
+ public:
365
+ template <typename T>
366
+ static void Eval (int order, double r, double scale, T && values)
367
+ {
368
+ SphericalBessel (order, r, 1.0/scale, values);
369
+ }
370
+
371
+ template <typename T>
372
+ static void Eval (int order, double kappa, double r, double rtyp, T && values)
373
+ {
374
+ double scale = Scale(kappa, rtyp);
375
+ SphericalBessel (order, r*kappa, 1.0/scale, values);
376
+ }
377
+
378
+ static double Scale (double kappa, double rtyp)
379
+ {
380
+ // return 1.0/ min(1.0, 0.25*rtyp*kappa);
381
+ return 1.0/ min(1.0, 0.5*rtyp*kappa);
382
+ }
383
+
384
+ };
385
+
386
+
387
+
388
+
389
+ template <typename RADIAL, typename entry_type=Complex>
390
+ class NGS_DLL_HEADER SphericalExpansion
391
+ {
392
+ SphericalHarmonics<entry_type> sh;
393
+ double kappa;
394
+ double rtyp;
395
+ public:
396
+
397
+ SphericalExpansion (int aorder, double akappa, double artyp)
398
+ : sh(aorder), kappa(akappa), rtyp(artyp) { }
399
+
400
+
401
+ entry_type & Coef(int n, int m) { return sh.Coef(n,m); }
402
+ auto & SH() { return sh; }
403
+ const auto & SH() const { return sh; }
404
+ double Kappa() const { return kappa; }
405
+ double Scale() const { return RADIAL::Scale(kappa, rtyp); }
406
+ double RTyp() const { return rtyp; }
407
+ int Order() const { return sh.Order(); }
408
+
409
+ SphericalExpansion Truncate(int neworder) const
410
+ {
411
+ if (neworder > sh.Order()) neworder=sh.Order();
412
+ SphericalExpansion nmp(neworder, kappa, rtyp);
413
+ nmp.sh.Coefs() = sh.Coefs().Range(sqr(neworder+1));
414
+ return nmp;
415
+ }
416
+
417
+ SphericalExpansion & operator+= (const SphericalExpansion & mp2)
418
+ {
419
+ size_t commonsize = min(SH().Coefs().Size(), mp2.SH().Coefs().Size());
420
+ SH().Coefs().Range(commonsize) += mp2.SH().Coefs().Range(commonsize);
421
+ return *this;
422
+ }
423
+
424
+ entry_type Eval (Vec<3> x) const;
425
+ entry_type EvalDirectionalDerivative (Vec<3> x, Vec<3> d) const;
426
+
427
+ void AddCharge (Vec<3> x, entry_type c);
428
+ void AddDipole (Vec<3> x, Vec<3> dir, entry_type c);
429
+ void AddChargeDipole (Vec<3> x, entry_type c, Vec<3> dir, entry_type c2)
430
+ {
431
+ // TODO: add them at once
432
+ AddCharge (x, c);
433
+ AddDipole (x, dir, c2);
434
+ }
435
+
436
+ void AddPlaneWave (Vec<3> d, entry_type c);
437
+ void AddCurrent (Vec<3> ap, Vec<3> ep, Complex j, int num=100);
438
+
439
+
440
+ void ChangeRTypTo (double new_rtyp)
441
+ {
442
+ double fac = RADIAL::Scale(kappa, rtyp) / RADIAL::Scale(kappa, new_rtyp);
443
+ double prod = 1;
444
+ for (int n = 0; n <= sh.Order(); n++, prod*= fac)
445
+ sh.CoefsN(n) *= prod;
446
+ rtyp = new_rtyp;
447
+ }
448
+
449
+
450
+ Vector<double> Spectrum (bool scaled) const
451
+ {
452
+ Vector<double> spec(Order()+1);
453
+ double fac = 1;
454
+ for (int n = 0; n <= Order(); n++)
455
+ {
456
+ spec(n) = fac * L2Norm2(sh.CoefsN(n));
457
+ if (!scaled) fac *= sqr(Scale());
458
+ }
459
+ return spec;
460
+ }
461
+
462
+
463
+ template <typename TARGET>
464
+ void Transform (SphericalExpansion<TARGET,entry_type> & target, Vec<3> dist) const
465
+ {
466
+ if (target.SH().Order() < 0) return;
467
+ if (SH().Order() < 0)
468
+ {
469
+ target.SH().Coefs() = 0.0;
470
+ return;
471
+ }
472
+
473
+ // static Timer t("mptool Transform "+ToString(typeid(RADIAL).name())+ToString(typeid(TARGET).name()));
474
+ // RegionTimer reg(t);
475
+
476
+ auto [len, theta, phi] = SphericalCoordinates(dist);
477
+
478
+
479
+ // SphericalExpansion<RADIAL,entry_type> tmp{*this};
480
+ SphericalExpansion<RADIAL,entry_type> tmp(Order(), kappa, rtyp);
481
+ tmp.SH().Coefs() = SH().Coefs();
482
+
483
+ tmp.SH().RotateZ(phi);
484
+ tmp.SH().RotateY(theta);
485
+
486
+ tmp.ShiftZ(-len, target);
487
+
488
+ target.SH().RotateY(-theta);
489
+ target.SH().RotateZ(-phi);
490
+ }
491
+
492
+ template <typename TARGET>
493
+ void TransformAdd (SphericalExpansion<TARGET,entry_type> & target, Vec<3> dist, bool atomic = false) const
494
+ {
495
+ if (SH().Order() < 0) return;
496
+ if (target.SH().Order() < 0) return;
497
+
498
+ SphericalExpansion<TARGET,entry_type> tmp{target};
499
+ Transform(tmp, dist);
500
+ if (!atomic)
501
+ target.SH().Coefs() += tmp.SH().Coefs();
502
+ else
503
+ for (int j = 0; j < target.SH().Coefs().Size(); j++)
504
+ AtomicAdd(target.SH().Coefs()[j], tmp.SH().Coefs()[j]);
505
+ }
506
+
507
+ template <typename TARGET>
508
+ void ShiftZ (double z, SphericalExpansion<TARGET,entry_type> & target);
509
+
510
+
511
+ template <typename TARGET>
512
+ void In2Out (SphericalExpansion<TARGET,entry_type> & target, double r) const
513
+ {
514
+ Vector<Complex> rad(Order()+1);
515
+ Vector<Complex> radout(target.Order()+1);
516
+ RADIAL::Eval(Order(), kappa, r, RTyp(), rad);
517
+ TARGET::Eval(target.Order(), kappa, r, target.RTyp(), radout);
518
+ target.SH().Coefs() = 0;
519
+ for (int j = 0; j <= std::min(Order(), target.Order()); j++)
520
+ target.SH().CoefsN(j) = rad(j)/radout(j) * SH().CoefsN(j);
521
+ }
522
+ };
523
+
524
+
525
+
526
+ // ***************** parameters ****************
527
+
528
+ /*
529
+ static constexpr int MPOrder (double rho_kappa)
530
+ {
531
+ // return max (20, int(2*rho_kappa));
532
+ return 20+int(2*rho_kappa);
533
+ }
534
+ static constexpr int maxdirect = 100;
535
+ */
536
+
537
+
538
+ template <typename SCAL, auto S>
539
+ inline auto VecVector2Matrix (FlatVector<Vec<S,SCAL>> vec)
540
+ {
541
+ return FlatMatrixFixWidth<S,SCAL> (vec.Size(), vec.Data()->Data());
542
+ }
543
+
544
+ inline auto VecVector2Matrix (FlatVector<Complex> vec)
545
+ {
546
+ return FlatMatrixFixWidth<1,Complex> (vec.Size(), vec.Data());
547
+ }
548
+
549
+
550
+ template <typename entry_type=Complex>
551
+ class SingularMLExpansion
552
+ {
553
+ using simd_entry_type = decltype(MakeSimd(declval<std::array<entry_type,FMM_SW>>()));
554
+ static Array<size_t> nodes_on_level;
555
+
556
+ struct RecordingSS
557
+ {
558
+ const SphericalExpansion<Singular,entry_type> * mp_source;
559
+ SphericalExpansion<Singular,entry_type> * mp_target;
560
+ Vec<3> dist;
561
+ double len, theta, phi;
562
+ bool flipz;
563
+ public:
564
+ RecordingSS() = default;
565
+ RecordingSS (const SphericalExpansion<Singular,entry_type> * amp_source,
566
+ SphericalExpansion<Singular,entry_type> * amp_target,
567
+ Vec<3> adist)
568
+ : mp_source(amp_source), mp_target(amp_target), dist(adist)
569
+ {
570
+ std::tie(len, theta, phi) = SphericalCoordinates(adist);
571
+ // flipz = false;
572
+ flipz = theta > M_PI/2;
573
+ if (flipz) theta = M_PI-theta;
574
+ }
575
+ };
576
+
577
+
578
+ static void ProcessBatchSS(FlatArray<RecordingSS*> batch, double len, double theta) {
579
+ constexpr int vec_length = VecLength<entry_type>;
580
+ int batch_size = batch.Size();
581
+ int N = batch_size * vec_length;
582
+ // *testout << "Processing batch of size " << batch.Size() << ", with N = " << N << ", vec_length = " << vec_length << ", Type: " << typeid(entry_type).name() << ", len = " << len << ", theta = " << theta << endl;
583
+
584
+ if (N <= 1 || batch_size <= 1) {
585
+ for (auto* rec : batch) {
586
+ rec->mp_source->TransformAdd(*rec->mp_target, rec->dist, true);
587
+ }
588
+ }
589
+ else if (N <= 3) {
590
+ ProcessVectorizedBatchSS<3, vec_length>(batch, len, theta);
591
+ }
592
+ else if (N <= 4) {
593
+ ProcessVectorizedBatchSS<4, vec_length>(batch, len, theta);
594
+ }
595
+ else if (N <= 6) {
596
+ ProcessVectorizedBatchSS<6, vec_length>(batch, len, theta);
597
+ }
598
+ else if (N <= 12) {
599
+ ProcessVectorizedBatchSS<12, vec_length>(batch, len, theta);
600
+ }
601
+ else if (N <= 24) {
602
+ ProcessVectorizedBatchSS<24, vec_length>(batch, len, theta);
603
+ }
604
+ else if (N <= 48) {
605
+ ProcessVectorizedBatchSS<48, vec_length>(batch, len, theta);
606
+ }
607
+ else if (N <= 96) {
608
+ ProcessVectorizedBatchSS<96, vec_length>(batch, len, theta);
609
+ }
610
+ else if (N <= 192) {
611
+ ProcessVectorizedBatchSS<192, vec_length>(batch, len, theta);
612
+ }
613
+ else {
614
+ // Split large batches
615
+ ProcessBatchSS(batch.Range(0, 192 / vec_length), len, theta);
616
+ ProcessBatchSS(batch.Range(192 / vec_length, batch_size), len, theta);
617
+ }
618
+ }
619
+
620
+ template<int N, int vec_length>
621
+ static void ProcessVectorizedBatchSS(FlatArray<RecordingSS*> batch, double len, double theta) {
622
+
623
+ // *testout << "Processing vectorized S->S batch of size " << batch.Size() << ", with N = " << N << ", vec_length = " << vec_length << ", len = " << len << ", theta = " << theta << endl;
624
+ double kappa = batch[0]->mp_source->Kappa();
625
+ int so = batch[0]->mp_source->Order();
626
+ int to = batch[0]->mp_target->Order();
627
+ SphericalExpansion<Singular, Vec<N,Complex>> vec_source(so, kappa, batch[0]->mp_source->RTyp());
628
+ SphericalExpansion<Singular, Vec<N,Complex>> vec_target(to, kappa, batch[0]->mp_target->RTyp());
629
+
630
+ // Copy multipoles into vectorized multipole
631
+ for (int i = 0; i < batch.Size(); i++)
632
+ {
633
+ auto source_i = VecVector2Matrix (batch[i]->mp_source->SH().Coefs());
634
+ auto source_mati = VecVector2Matrix (vec_source.SH().Coefs()).Cols(i*vec_length, (i+1)*vec_length);
635
+ batch[i]->mp_source->SH().RotateZFlip(batch[i]->phi, batch[i]->flipz,
636
+ [source_i, source_mati] (size_t ii, Complex factor)
637
+ {
638
+ source_mati.Row(ii) = factor * source_i.Row(ii);
639
+ });
640
+ }
641
+
642
+ vec_source.SH().RotateY(theta, vec_source.SH().Order() >= 100);
643
+ vec_source.ShiftZ(-len, vec_target);
644
+ vec_target.SH().RotateY(-theta, vec_target.SH().Order() >= 100);
645
+
646
+ // Copy vectorized multipole into individual multipoles
647
+ for (int i = 0; i < batch.Size(); i++)
648
+ {
649
+ auto source_mati = VecVector2Matrix (vec_target.SH().Coefs()).Cols(i*vec_length, (i+1)*vec_length);
650
+ auto target_mati = VecVector2Matrix (batch[i]->mp_target->SH().Coefs());
651
+ batch[i]->mp_target->SH().RotateZFlip(-batch[i]->phi, batch[i]->flipz,
652
+ [source_mati, target_mati] (size_t ii, Complex factor)
653
+ {
654
+ AtomicAdd (target_mati.Row(ii), factor * source_mati.Row(ii));
655
+ });
656
+ }
657
+ }
658
+
659
+ struct Node
660
+ {
661
+ Vec<3> center;
662
+ double r;
663
+ int level;
664
+ std::array<unique_ptr<Node>,8> childs;
665
+ SphericalExpansion<Singular, entry_type> mp;
666
+
667
+ Array<tuple<Vec<3>, entry_type>> charges;
668
+ Array<tuple<Vec<3>, Vec<3>, entry_type>> dipoles;
669
+ Array<tuple<Vec<3>, entry_type, Vec<3>, entry_type>> chargedipoles;
670
+ Array<tuple<Vec<3>, Vec<3>, Complex,int>> currents;
671
+
672
+ using simd_entry_type = decltype(MakeSimd(declval<std::array<entry_type,FMM_SW>>()));
673
+ Array<tuple<Vec<3,SIMD<double,FMM_SW>>, simd_entry_type>> simd_charges;
674
+ Array<tuple<Vec<3,SIMD<double,FMM_SW>>, Vec<3,SIMD<double,FMM_SW>>, simd_entry_type>> simd_dipoles;
675
+ Array<tuple<Vec<3,SIMD<double,FMM_SW>>, simd_entry_type,
676
+ Vec<3,SIMD<double,FMM_SW>>, simd_entry_type>> simd_chargedipoles;
677
+
678
+ int total_sources;
679
+ const FMM_Parameters & fmm_params;
680
+ std::mutex node_mutex;
681
+ atomic<bool> have_childs{false};
682
+
683
+ Node (Vec<3> acenter, double ar, int alevel, double akappa, const FMM_Parameters & afmm_params)
684
+ // : center(acenter), r(ar), level(alevel), mp(MPOrder(ar*akappa), akappa, ar), fmm_params(afmm_params)
685
+ : center(acenter), r(ar), level(alevel), mp(afmm_params.minorder+2*ar*akappa, akappa, ar), fmm_params(afmm_params)
686
+ {
687
+ if (level < nodes_on_level.Size())
688
+ nodes_on_level[level]++;
689
+ }
690
+
691
+ int GetChildNum (Vec<3> x) const
692
+ {
693
+ int childnum = 0;
694
+ if (x(0) > center(0)) childnum += 1;
695
+ if (x(1) > center(1)) childnum += 2;
696
+ if (x(2) > center(2)) childnum += 4;
697
+ return childnum;
698
+ }
699
+
700
+ void CreateChilds()
701
+ {
702
+ if (childs[0]) throw Exception("have already childs");
703
+ for (int i = 0; i < 8; i++)
704
+ {
705
+ Vec<3> cc = center;
706
+ cc(0) += (i&1) ? r/2 : -r/2;
707
+ cc(1) += (i&2) ? r/2 : -r/2;
708
+ cc(2) += (i&4) ? r/2 : -r/2;
709
+ childs[i] = make_unique<Node> (cc, r/2, level+1, mp.Kappa(), fmm_params);
710
+ }
711
+ have_childs = true;
712
+ }
713
+
714
+
715
+ void SendSourcesToChilds()
716
+ {
717
+ CreateChilds();
718
+
719
+ for (auto [x,c] : charges)
720
+ AddCharge (x,c);
721
+ for (auto [x,d,c] : dipoles)
722
+ AddDipole (x,d,c);
723
+ for (auto [x,c,d,c2] : chargedipoles)
724
+ AddChargeDipole (x,c,d,c2);
725
+ for (auto [sp,ep,j,num] : currents)
726
+ AddCurrent (sp,ep,j,num);
727
+
728
+ charges.DeleteAll();
729
+ dipoles.DeleteAll();
730
+ chargedipoles.DeleteAll();
731
+ currents.DeleteAll();
732
+ }
733
+
734
+
735
+ void AddCharge (Vec<3> x, entry_type c)
736
+ {
737
+ if (have_childs) // quick check without locking
738
+ {
739
+ // directly send to childs:
740
+ int childnum = GetChildNum(x);
741
+ childs[childnum] -> AddCharge(x, c);
742
+ return;
743
+ }
744
+
745
+ lock_guard<mutex> guard(node_mutex);
746
+
747
+ if (have_childs) // test again after locking
748
+ {
749
+ int childnum = GetChildNum(x);
750
+ childs[childnum] -> AddCharge(x, c);
751
+ return;
752
+ }
753
+
754
+ charges.Append( tuple{x,c} );
755
+
756
+ // if (r*mp.Kappa() < 1e-8) return;
757
+ if (level > 20) return;
758
+ if (charges.Size() < fmm_params.maxdirect && r*mp.Kappa() < 5)
759
+ return;
760
+
761
+ SendSourcesToChilds();
762
+ }
763
+
764
+
765
+ void AddDipole (Vec<3> x, Vec<3> d, entry_type c)
766
+ {
767
+ if (have_childs)
768
+ {
769
+ // directly send to childs:
770
+ int childnum = GetChildNum(x);
771
+ childs[childnum] -> AddDipole(x, d, c);
772
+ return;
773
+ }
774
+
775
+ lock_guard<mutex> guard(node_mutex);
776
+
777
+ if (have_childs)
778
+ {
779
+ // directly send to childs:
780
+ int childnum = GetChildNum(x);
781
+ childs[childnum] -> AddDipole(x, d, c);
782
+ return;
783
+ }
784
+
785
+ dipoles.Append (tuple{x,d,c});
786
+
787
+ if (level > 20) return;
788
+ if (dipoles.Size() < fmm_params.maxdirect)
789
+ return;
790
+
791
+ SendSourcesToChilds();
792
+ }
793
+
794
+
795
+ void AddChargeDipole (Vec<3> x, entry_type c, Vec<3> dir, entry_type c2)
796
+ {
797
+ if (have_childs)
798
+ {
799
+ // directly send to childs:
800
+ int childnum = GetChildNum(x);
801
+ childs[childnum] -> AddChargeDipole(x, c, dir, c2);
802
+ return;
803
+ }
804
+
805
+ lock_guard<mutex> guard(node_mutex);
806
+
807
+ if (have_childs)
808
+ {
809
+ // directly send to childs:
810
+ int childnum = GetChildNum(x);
811
+ childs[childnum] -> AddChargeDipole(x, c, dir, c2);
812
+ return;
813
+ }
814
+
815
+ chargedipoles.Append (tuple{x,c,dir,c2});
816
+
817
+ if (chargedipoles.Size() < fmm_params.maxdirect || r < 1e-8)
818
+ return;
819
+
820
+ SendSourcesToChilds();
821
+
822
+ /*
823
+ AddCharge (x, c);
824
+ AddDipole (x, dir, c2);
825
+ */
826
+ }
827
+
828
+
829
+ // not parallel yet
830
+ void AddCurrent (Vec<3> sp, Vec<3> ep, Complex j, int num)
831
+ {
832
+ if (childs[0])
833
+ {
834
+ // split line and send to childs
835
+ Array<double> split;
836
+ split.Append(0);
837
+ for (int i = 0; i < 3; i++)
838
+ if ((sp(i) < center(i)) != (ep(i) < center(i)))
839
+ split += (center(i)-sp(i)) / (ep(i)-sp(i)); // segment cuts i-th coordinate plane
840
+ split.Append(1);
841
+ BubbleSort(split);
842
+
843
+ for (int i = 0; i < split.Size()-1; i++)
844
+ if (split[i+1] > split[i])
845
+ {
846
+ Vec<3> spi = sp + split[i]*(ep-sp);
847
+ Vec<3> epi = sp + split[i+1]*(ep-sp);
848
+
849
+ Vec<3> x = 0.5*(spi+epi);
850
+
851
+ int childnum = 0;
852
+ if (x(0) > center(0)) childnum += 1;
853
+ if (x(1) > center(1)) childnum += 2;
854
+ if (x(2) > center(2)) childnum += 4;
855
+ childs[childnum] -> AddCurrent(spi, epi, j, num);
856
+ }
857
+ return;
858
+ }
859
+
860
+ currents.Append (tuple{sp,ep,j,num});
861
+
862
+ // if (currents.Size() < maxdirect || r < 1e-8)
863
+ if (currents.Size() < 4 || r < 1e-8)
864
+ return;
865
+
866
+ SendSourcesToChilds();
867
+ /*
868
+ // if (currents.Size() < maxdirect || r < 1e-8)
869
+ if (currents.Size() < 4 || r < 1e-8)
870
+ return;
871
+
872
+ CreateChilds();
873
+
874
+ for (auto [x,c] : charges)
875
+ AddCharge (x,c);
876
+ for (auto [x,d,c] : dipoles)
877
+ AddDipole (x,d,c);
878
+ for (auto [sp,ep,j,num] : currents)
879
+ AddCurrent (sp,ep,j,num);
880
+
881
+ charges.SetSize0();
882
+ dipoles.SetSize0();
883
+ currents.SetSize0();
884
+ */
885
+ }
886
+
887
+
888
+
889
+
890
+ entry_type Evaluate(Vec<3> p) const
891
+ {
892
+ entry_type sum{0.0};
893
+ if (childs[0])
894
+ {
895
+ for (auto & child : childs)
896
+ sum += child->Evaluate(p);
897
+ return sum;
898
+ }
899
+
900
+ if (simd_charges.Size())
901
+ {
902
+ // static Timer t("mptool singmp, evaluate, simd charges"); RegionTimer r(t);
903
+ // t.AddFlops (charges.Size());
904
+
905
+ simd_entry_type vsum{0.0};
906
+ if (mp.Kappa() < 1e-12)
907
+ {
908
+ for (auto [x,c] : simd_charges)
909
+ {
910
+ auto rho = L2Norm(p-x);
911
+ auto kernel = 1/(4*M_PI)/rho;
912
+ kernel = If(rho > 0.0, kernel, SIMD<double,FMM_SW>(0.0));
913
+ vsum += kernel * c;
914
+
915
+ /*
916
+ auto rho2 = L2Norm2(p-x);
917
+ auto kernel = (1/(4*M_PI)) * rsqrt(rho2);
918
+ kernel = If(rho2 > 0.0, kernel, SIMD<double,FMM_SW>(0.0));
919
+ vsum += kernel * c;
920
+ */
921
+ }
922
+ }
923
+ else if (mp.Kappa() < 1e-8)
924
+ for (auto [x,c] : simd_charges)
925
+ {
926
+ auto rho = L2Norm(p-x);
927
+ auto kernel = (1/(4*M_PI))*SIMD<Complex,FMM_SW> (1,rho*mp.Kappa()) / rho;
928
+ kernel = If(rho > 0.0, kernel, SIMD<Complex,FMM_SW>(0.0));
929
+ vsum += kernel * c;
930
+ }
931
+ else
932
+ for (auto [x,c] : simd_charges)
933
+ {
934
+ auto rho = L2Norm(p-x);
935
+ auto [si,co] = sincos(rho*mp.Kappa());
936
+ auto kernel = (1/(4*M_PI))*SIMD<Complex,FMM_SW>(co,si) / rho;
937
+ kernel = If(rho > 0.0, kernel, SIMD<Complex,FMM_SW>(0.0));
938
+ vsum += kernel * c;
939
+ }
940
+
941
+ sum += HSum(vsum);
942
+ }
943
+ else
944
+ {
945
+ if (mp.Kappa() < 1e-8)
946
+ {
947
+ for (auto [x,c] : charges)
948
+ if (double rho = L2Norm(p-x); rho > 0)
949
+ sum += (1/(4*M_PI))*Complex(1,rho*mp.Kappa()) / rho * c;
950
+ }
951
+ else
952
+ for (auto [x,c] : charges)
953
+ if (double rho = L2Norm(p-x); rho > 0)
954
+ sum += (1/(4*M_PI))*exp(Complex(0,rho*mp.Kappa())) / rho * c;
955
+ }
956
+
957
+ if (simd_dipoles.Size())
958
+ {
959
+ // static Timer t("mptool singmp, evaluate, simd dipoles"); RegionTimer r(t);
960
+
961
+ simd_entry_type vsum{0.0};
962
+ for (auto [x,d,c] : simd_dipoles)
963
+ {
964
+ auto rho = L2Norm(p-x);
965
+ auto drhodp = (1.0/rho) * (p-x);
966
+ auto [si,co] = sincos(rho*mp.Kappa());
967
+ auto dGdrho = (1/(4*M_PI))*SIMD<Complex,FMM_SW>(co,si) *
968
+ (-1.0/(rho*rho) + SIMD<Complex,FMM_SW>(0, mp.Kappa())/rho);
969
+ auto kernel = dGdrho * InnerProduct(drhodp, d);
970
+ kernel = If(rho > 0.0, kernel, SIMD<Complex,FMM_SW>(0.0));
971
+ vsum += kernel * c;
972
+ }
973
+ sum += HSum(vsum);
974
+ }
975
+ else
976
+ {
977
+ for (auto [x,d,c] : dipoles)
978
+ if (double rho = L2Norm(p-x); rho > 0)
979
+ {
980
+ Vec<3> drhodp = 1.0/rho * (p-x);
981
+ Complex dGdrho = (1/(4*M_PI))*exp(Complex(0,rho*mp.Kappa())) *
982
+ (Complex(0, mp.Kappa())/rho - 1.0/sqr(rho));
983
+ sum += dGdrho * InnerProduct(drhodp, d) * c;
984
+ }
985
+ }
986
+
987
+
988
+
989
+ if (simd_chargedipoles.Size())
990
+ {
991
+ // static Timer t("mptool singmp, evaluate, simd chargedipoles"); RegionTimer r(t);
992
+ // t.AddFlops (simd_chargedipoles.Size()*FMM_SW);
993
+
994
+ simd_entry_type vsum{0.0};
995
+ for (auto [x,c,d,c2] : simd_chargedipoles)
996
+ {
997
+ auto rho = L2Norm(p-x);
998
+ auto rhokappa = rho*mp.Kappa();
999
+ auto invrho = If(rho>0.0, 1.0/rho, SIMD<double,FMM_SW>(0.0));
1000
+ auto [si,co] = sincos(rhokappa);
1001
+
1002
+ auto kernelc = (1/(4*M_PI))*invrho*SIMD<Complex,FMM_SW>(co,si);
1003
+ vsum += kernelc * c;
1004
+
1005
+ auto kernel =
1006
+ invrho*invrho * InnerProduct(p-x, d) *
1007
+ kernelc * SIMD<Complex,FMM_SW>(-1.0, rhokappa);
1008
+
1009
+ vsum += kernel * c2;
1010
+ }
1011
+ sum += HSum(vsum);
1012
+ }
1013
+ else
1014
+ {
1015
+ // static Timer t("mptool singmp, evaluate, chargedipoles"); RegionTimer r(t);
1016
+ // t.AddFlops (chargedipoles.Size());
1017
+
1018
+ for (auto [x,c,d,c2] : chargedipoles)
1019
+ if (double rho = L2Norm(p-x); rho > 0)
1020
+ {
1021
+ sum += (1/(4*M_PI))*exp(Complex(0,rho*mp.Kappa())) / rho * c;
1022
+
1023
+ Vec<3> drhodp = 1.0/rho * (p-x);
1024
+ Complex dGdrho = (1/(4*M_PI))*exp(Complex(0,rho*mp.Kappa())) *
1025
+ (Complex(0, mp.Kappa())/rho - 1.0/sqr(rho));
1026
+
1027
+ sum += dGdrho * InnerProduct(drhodp, d) * c2;
1028
+ }
1029
+ }
1030
+
1031
+
1032
+
1033
+
1034
+
1035
+ for (auto [sp,ep,j,num] : currents)
1036
+ {
1037
+ // should use explizit formula instead ...
1038
+
1039
+ Vec<3> tau = ep-sp;
1040
+ Vec<3> tau_num = 1.0/num * tau;
1041
+ for (int i = 0; i < num; i++)
1042
+ {
1043
+ Vec<3> x = sp+(i+0.5)*tau_num;
1044
+
1045
+ if (double rho = L2Norm(p-x); rho > 0)
1046
+ {
1047
+ Vec<3> drhodp = 1.0/rho * (p-x);
1048
+ Complex dGdrho = (1/(4*M_PI))*exp(Complex(0,rho*mp.Kappa())) *
1049
+ (Complex(0, mp.Kappa())/rho - 1.0/sqr(rho));
1050
+
1051
+ if constexpr (std::is_same<entry_type, Vec<3,Complex>>())
1052
+ sum += j*dGdrho * Cross(drhodp, tau_num);
1053
+ }
1054
+ }
1055
+ }
1056
+
1057
+ return sum;
1058
+ }
1059
+
1060
+ entry_type EvaluateDeriv(Vec<3> p, Vec<3> d) const
1061
+ {
1062
+ entry_type sum{0.0};
1063
+ if (childs[0])
1064
+ {
1065
+ for (auto & child : childs)
1066
+ sum += child->EvaluateDeriv(p, d);
1067
+ return sum;
1068
+ }
1069
+
1070
+ if (dipoles.Size())
1071
+ {
1072
+ static int cnt = 0;
1073
+ cnt++;
1074
+ if (cnt < 3)
1075
+ cout << "we know what we do - evaluateDeriv not implemented for dipoles in SingularMLExpansion" << endl;
1076
+ // return sum;
1077
+ // throw Exception("EvaluateDeriv not implemented for dipoles in SingularMLExpansion");
1078
+ }
1079
+ if (chargedipoles.Size())
1080
+ throw Exception("EvaluateDeriv not implemented for dipoles in SingularMLExpansion");
1081
+
1082
+ for (auto [x,c] : charges)
1083
+ if (double rho = L2Norm(p-x); rho > 0)
1084
+ {
1085
+ Vec<3> drhodp = 1.0/rho * (p-x);
1086
+ Complex dGdrho = (1/(4*M_PI))*exp(Complex(0,rho*mp.Kappa())) *
1087
+ (Complex(0, mp.Kappa())/rho - 1.0/sqr(rho));
1088
+ sum += dGdrho * InnerProduct(drhodp, d) * c;
1089
+ }
1090
+ return sum;
1091
+ }
1092
+
1093
+ void CalcTotalSources()
1094
+ {
1095
+ total_sources = charges.Size() + dipoles.Size() + chargedipoles.Size();
1096
+ for (auto & child : childs)
1097
+ if (child)
1098
+ {
1099
+ child->CalcTotalSources();
1100
+ total_sources += child->total_sources;
1101
+ }
1102
+ }
1103
+
1104
+ void CalcMP(Array<RecordingSS> * recording, Array<Node*> * nodes_to_process)
1105
+ {
1106
+ // mp.SH().Coefs() = 0.0;
1107
+ if (childs[0])
1108
+ {
1109
+ if (total_sources < 1000 || recording)
1110
+ for (auto & child : childs)
1111
+ child->CalcMP(recording, nodes_to_process);
1112
+ else
1113
+ ParallelFor (8, [&] (int nr)
1114
+ {
1115
+ childs[nr] -> CalcMP(recording, nodes_to_process);
1116
+ });
1117
+
1118
+
1119
+ for (auto & child : childs){
1120
+ if (recording && child->mp.SH().Coefs().Size() > 0)
1121
+ *recording += RecordingSS(&child->mp, &mp, center-child->center);
1122
+ else
1123
+ child->mp.TransformAdd(mp, center-child->center);
1124
+ }
1125
+ }
1126
+ else
1127
+ {
1128
+ if (charges.Size()+dipoles.Size()+chargedipoles.Size()+currents.Size() == 0)
1129
+ {
1130
+ mp = SphericalExpansion<Singular,entry_type> (-1, mp.Kappa(), 1.);
1131
+ return;
1132
+ }
1133
+
1134
+ // make simd charges, comment this block for testing ...
1135
+ simd_charges.SetSize( (charges.Size()+FMM_SW-1)/FMM_SW);
1136
+ size_t i = 0, ii = 0;
1137
+ for ( ; i+FMM_SW <= charges.Size(); i+=FMM_SW, ii++)
1138
+ {
1139
+ std::array<tuple<Vec<3>,entry_type>, FMM_SW> ca;
1140
+ for (int j = 0; j < FMM_SW; j++) ca[j] = charges[i+j];
1141
+ simd_charges[ii] = MakeSimd(ca);
1142
+ }
1143
+ if (i < charges.Size())
1144
+ {
1145
+ std::array<tuple<Vec<3>,entry_type>, FMM_SW> ca;
1146
+ int j = 0;
1147
+ for ( ; i+j < charges.Size(); j++) ca[j] = charges[i+j];
1148
+ for ( ; j < FMM_SW; j++) ca[j] = tuple( get<0>(ca[0]), entry_type{0.0} );
1149
+ simd_charges[ii] = MakeSimd(ca);
1150
+ }
1151
+
1152
+ simd_dipoles.SetSize( (dipoles.Size()+FMM_SW-1)/FMM_SW);
1153
+ i = 0, ii = 0;
1154
+ for ( ; i+FMM_SW <= dipoles.Size(); i+=FMM_SW, ii++)
1155
+ {
1156
+ std::array<tuple<Vec<3>,Vec<3>,entry_type>, FMM_SW> di;
1157
+ for (int j = 0; j < FMM_SW; j++) di[j] = dipoles[i+j];
1158
+ simd_dipoles[ii] = MakeSimd(di);
1159
+ }
1160
+ if (i < dipoles.Size())
1161
+ {
1162
+ std::array<tuple<Vec<3>,Vec<3>,entry_type>, FMM_SW> di;
1163
+ int j = 0;
1164
+ for ( ; i+j < dipoles.Size(); j++) di[j] = dipoles[i+j];
1165
+ for ( ; j < FMM_SW; j++) di[j] = tuple( get<0>(di[0]), get<1>(di[0]), entry_type{0.0} );
1166
+ simd_dipoles[ii] = MakeSimd(di);
1167
+ }
1168
+
1169
+
1170
+ simd_chargedipoles.SetSize( (chargedipoles.Size()+FMM_SW-1)/FMM_SW);
1171
+ i = 0, ii = 0;
1172
+ for ( ; i+FMM_SW <= chargedipoles.Size(); i+=FMM_SW, ii++)
1173
+ {
1174
+ std::array<tuple<Vec<3>,entry_type,Vec<3>,entry_type>, FMM_SW> di;
1175
+ for (int j = 0; j < FMM_SW; j++) di[j] = chargedipoles[i+j];
1176
+ simd_chargedipoles[ii] = MakeSimd(di);
1177
+ }
1178
+ if (i < chargedipoles.Size())
1179
+ {
1180
+ std::array<tuple<Vec<3>,entry_type,Vec<3>,entry_type>, FMM_SW> di;
1181
+ int j = 0;
1182
+ for ( ; i+j < chargedipoles.Size(); j++) di[j] = chargedipoles[i+j];
1183
+ for ( ; j < FMM_SW; j++) di[j] = tuple( get<0>(di[0]), entry_type{0.0}, get<2>(di[0]), entry_type{0.0} );
1184
+ simd_chargedipoles[ii] = MakeSimd(di);
1185
+ }
1186
+
1187
+
1188
+ if (nodes_to_process)
1189
+ *nodes_to_process += this;
1190
+ else {
1191
+ for (auto [x,c] : charges)
1192
+ mp.AddCharge (x-center,c);
1193
+
1194
+ for (auto [x,d,c] : dipoles)
1195
+ mp.AddDipole (x-center, d, c);
1196
+
1197
+ for (auto [x,c,d,c2] : chargedipoles)
1198
+ mp.AddChargeDipole (x-center, c, d, c2);
1199
+
1200
+ for (auto [sp,ep,j,num] : currents)
1201
+ mp.AddCurrent (sp-center, ep-center, j, num);
1202
+ }
1203
+ }
1204
+ }
1205
+
1206
+ entry_type EvaluateMP(Vec<3> p) const
1207
+ {
1208
+ if (charges.Size() || dipoles.Size() || chargedipoles.Size())
1209
+ return Evaluate(p);
1210
+
1211
+ if (L2Norm(p-center) > 3*r)
1212
+ return mp.Eval(p-center);
1213
+
1214
+ if (!childs[0]) // || level==1)
1215
+ return Evaluate(p);
1216
+
1217
+ entry_type sum{0.0};
1218
+ for (auto & child : childs)
1219
+ sum += child->EvaluateMP(p);
1220
+ return sum;
1221
+ }
1222
+
1223
+ entry_type EvaluateMPDeriv(Vec<3> p, Vec<3> d) const
1224
+ {
1225
+ // cout << "EvaluateMPDeriv Singular, p = " << p << ", d = " << d << ", r = " << r << ", center = " << center << endl;
1226
+ // cout << "Norm: " << L2Norm(p-center) << " > " << 3*r << endl;
1227
+ // cout << "charges.Size() = " << charges.Size() << ", dipoles.Size() = " << dipoles.Size() << endl;
1228
+ if (charges.Size() || dipoles.Size() || chargedipoles.Size() || !childs[0])
1229
+ return EvaluateDeriv(p, d);
1230
+
1231
+ if (L2Norm(p-center) > 3*r)
1232
+ return mp.EvalDirectionalDerivative(p-center, d);
1233
+
1234
+ entry_type sum{0.0};
1235
+ for (auto & child : childs)
1236
+ sum += child->EvaluateMPDeriv(p, d);
1237
+ return sum;
1238
+ }
1239
+
1240
+ void Print (ostream & ost, size_t childnr = -1) const
1241
+ {
1242
+ if (childnr == -1)
1243
+ ost << "c = " << center << ", r = " << r << ", level = " << level << endl;
1244
+ else
1245
+ ost << "c = " << center << ", r = " << r << ", level = " << level << ", childnr = " << childnr << endl;
1246
+ // for (int i = 0; i < loc_pnts.Size(); i++)
1247
+ for (auto [x,c] : charges)
1248
+ ost << "xi = " << x << ", ci = " << c << endl;
1249
+ for (auto [x,d,c] : dipoles)
1250
+ ost << "xi = " << x << ", di = " << d << ", ci = " << c << endl;
1251
+ for (auto [x,c,d,c2] : chargedipoles)
1252
+ ost << "xi = " << x << ", c = " << c << ", di = " << d << ", ci = " << c2 << endl;
1253
+
1254
+ for (int i = 0; i < 8; i++)
1255
+ if (childs[i]) childs[i] -> Print (ost, i);
1256
+ }
1257
+
1258
+ double Norm () const
1259
+ {
1260
+ double norm = L2Norm(mp.SH().Coefs());
1261
+ if (childs[0])
1262
+ for (auto & ch : childs)
1263
+ norm += ch->Norm();
1264
+ return norm;
1265
+ }
1266
+
1267
+ size_t NumCoefficients() const
1268
+ {
1269
+ size_t num = sqr(mp.SH().Order()+1);
1270
+ if (childs[0])
1271
+ for (auto & ch : childs)
1272
+ num += ch->NumCoefficients();
1273
+ return num;
1274
+ }
1275
+
1276
+ void TraverseTree (const std::function<void(Node&)> & func)
1277
+ {
1278
+ func(*this);
1279
+ for (auto & child : childs)
1280
+ if (child)
1281
+ child->TraverseTree(func);
1282
+ }
1283
+ };
1284
+
1285
+ FMM_Parameters fmm_params;
1286
+ Node root;
1287
+ bool havemp = false;
1288
+
1289
+ public:
1290
+ SingularMLExpansion (Vec<3> center, double r, double kappa, FMM_Parameters _params = FMM_Parameters())
1291
+ : fmm_params(_params), root(center, r, 0, kappa, fmm_params)
1292
+ {
1293
+ nodes_on_level = 0;
1294
+ nodes_on_level[0] = 1;
1295
+ }
1296
+
1297
+ double Kappa() const { return root.mp.Kappa(); }
1298
+
1299
+ void AddCharge(Vec<3> x, entry_type c)
1300
+ {
1301
+ root.AddCharge(x, c);
1302
+ }
1303
+
1304
+ void AddDipole(Vec<3> x, Vec<3> d, entry_type c)
1305
+ {
1306
+ root.AddDipole(x, d, c);
1307
+ }
1308
+
1309
+ void AddChargeDipole(Vec<3> x, entry_type c, Vec<3> dir, entry_type c2)
1310
+ {
1311
+ root.AddChargeDipole(x, c, dir, c2);
1312
+ }
1313
+
1314
+ void AddCurrent (Vec<3> sp, Vec<3> ep, Complex j, int num)
1315
+ {
1316
+ if constexpr (!std::is_same<entry_type, Vec<3,Complex>>())
1317
+ throw Exception("AddCurrent needs a singular vectorial MP");
1318
+
1319
+ root.AddCurrent (sp, ep, j, num);
1320
+ /*
1321
+ // for testing
1322
+ Vec<3> tau = ep-sp;
1323
+ Vec<3> tau_num = 1.0/num * tau;
1324
+ for (int i = 0; i < num; i++)
1325
+ {
1326
+ for (int k = 0; k < 3; k++)
1327
+ {
1328
+ Vec<3> ek{0.0}; ek(k) = 1;
1329
+ Vec<3> cp = Cross(tau, ek);
1330
+ Vec<3,Complex> source{0.0};
1331
+ source(k) = j/double(num);
1332
+ if constexpr (std::is_same<entry_type, Vec<3,Complex>>())
1333
+ root.AddDipole (sp+(i+0.5)*tau_num, cp, source);
1334
+ }
1335
+ }
1336
+ */
1337
+ }
1338
+
1339
+ void Print (ostream & ost) const
1340
+ {
1341
+ root.Print(ost);
1342
+ }
1343
+
1344
+ double Norm() const
1345
+ {
1346
+ return root.Norm();
1347
+ }
1348
+
1349
+ size_t NumCoefficients() const
1350
+ {
1351
+ return root.NumCoefficients();
1352
+ }
1353
+
1354
+ void CalcMP()
1355
+ {
1356
+ static Timer t("mptool compute singular MLMP"); RegionTimer rg(t);
1357
+ static Timer ts2mp("mptool compute singular MLMP - source2mp");
1358
+ static Timer tS2S("mptool compute singular MLMP - S->S");
1359
+ static Timer trec("mptool comput singular recording");
1360
+ static Timer tsort("mptool comput singular sort");
1361
+
1362
+ /*
1363
+ int maxlevel = 0;
1364
+ for (auto [i,num] : Enumerate(nodes_on_level))
1365
+ if (num > 0) maxlevel = i;
1366
+
1367
+ for (int i = 0; i <= maxlevel; i++)
1368
+ cout << "sing " << i << ": " << nodes_on_level[i] << endl;
1369
+ */
1370
+
1371
+ root.CalcTotalSources();
1372
+
1373
+ if constexpr (false)
1374
+ // direct evaluation of S->S
1375
+ root.CalcMP(nullptr, nullptr);
1376
+ else
1377
+ {
1378
+
1379
+ Array<RecordingSS> recording;
1380
+ Array<Node*> nodes_to_process;
1381
+
1382
+ {
1383
+ RegionTimer reg(trec);
1384
+ root.CalcMP(&recording, &nodes_to_process);
1385
+ }
1386
+
1387
+ {
1388
+ RegionTimer rs2mp(ts2mp);
1389
+ ParallelFor(nodes_to_process.Size(), [&](int i)
1390
+ {
1391
+ auto node = nodes_to_process[i];
1392
+ for (auto [x,c]: node->charges)
1393
+ node->mp.AddCharge(x-node->center, c);
1394
+ for (auto [x,d,c]: node->dipoles)
1395
+ node->mp.AddDipole(x-node->center, d, c);
1396
+ for (auto [x,c,d,c2]: node->chargedipoles)
1397
+ node->mp.AddChargeDipole(x-node->center, c, d, c2);
1398
+ for (auto [sp,ep,j,num]: node->currents)
1399
+ node->mp.AddCurrent(sp-node->center, ep-node->center, j, num);
1400
+ }, TasksPerThread(4));
1401
+ }
1402
+
1403
+ {
1404
+ RegionTimer reg(tsort);
1405
+ QuickSort (recording, [] (auto & a, auto & b)
1406
+ {
1407
+ if (a.len < (1-1e-8) * b.len) return true;
1408
+ if (a.len > (1+1e-8) * b.len) return false;
1409
+ return a.theta < b.theta;
1410
+ });
1411
+ }
1412
+
1413
+ double current_len = -1e100;
1414
+ double current_theta = -1e100;
1415
+ Array<RecordingSS*> current_batch;
1416
+ Array<Array<RecordingSS*>> batch_group;
1417
+ Array<double> group_lengths;
1418
+ Array<double> group_thetas;
1419
+ for (auto & record : recording)
1420
+ {
1421
+ bool len_changed = fabs(record.len - current_len) > 1e-8;
1422
+ bool theta_changed = fabs(record.theta - current_theta) > 1e-8;
1423
+ if ((len_changed || theta_changed) && current_batch.Size() > 0) {
1424
+ batch_group.Append(current_batch);
1425
+ group_lengths.Append(current_len);
1426
+ group_thetas.Append(current_theta);
1427
+ current_batch.SetSize(0);
1428
+ }
1429
+
1430
+ current_len = record.len;
1431
+ current_theta = record.theta;
1432
+ current_batch.Append(&record);
1433
+ }
1434
+
1435
+ if (current_batch.Size() > 0) {
1436
+ batch_group.Append(current_batch);
1437
+ group_lengths.Append(current_len);
1438
+ group_thetas.Append(current_theta);
1439
+ }
1440
+
1441
+ {
1442
+ RegionTimer rS2S(tS2S);
1443
+ // ParallelFor(batch_group.Size(), [&](int i) {
1444
+ for (int i = 0; i < batch_group.Size(); i++){
1445
+ // *testout << "Processing batch " << i << " of size " << batch_group[i].Size() << ", with len = " << group_lengths[i] << ", theta = " << group_thetas[i] << endl;
1446
+ int chunk_size = 24;
1447
+ if (batch_group[i].Size() < chunk_size)
1448
+ ProcessBatchSS(batch_group[i], group_lengths[i], group_thetas[i]);
1449
+ else
1450
+ ParallelForRange(IntRange(batch_group[i].Size()), [&](IntRange range) {
1451
+ auto sub_batch = batch_group[i].Range(range.First(), range.Next());
1452
+ ProcessBatchSS(sub_batch, group_lengths[i], group_thetas[i]);
1453
+ }, TasksPerThread(4));
1454
+ }
1455
+ }
1456
+ }
1457
+
1458
+ // cout << "have singular:" << endl;
1459
+ // PrintStatistics (cout);
1460
+ havemp = true;
1461
+ }
1462
+
1463
+ entry_type Evaluate (Vec<3> p) const
1464
+ {
1465
+ if (havemp)
1466
+ return root.EvaluateMP(p);
1467
+ else
1468
+ return root.Evaluate(p);
1469
+ }
1470
+
1471
+
1472
+ void PrintStatistics (ostream & ost)
1473
+ {
1474
+ int levels = 0;
1475
+ int cnt = 0;
1476
+ root.TraverseTree( [&](Node & node) {
1477
+ levels = max(levels, node.level);
1478
+ cnt++;
1479
+ });
1480
+ ost << "levels: " << levels << endl;
1481
+ ost << "nodes: " << cnt << endl;
1482
+
1483
+ Array<int> num_on_level(levels+1);
1484
+ Array<int> order_on_level(levels+1);
1485
+ Array<size_t> coefs_on_level(levels+1);
1486
+ num_on_level = 0;
1487
+ order_on_level = 0;
1488
+ root.TraverseTree( [&](Node & node) {
1489
+ num_on_level[node.level]++;
1490
+ order_on_level[node.level] = max(order_on_level[node.level],node.mp.Order());
1491
+ coefs_on_level[node.level] += node.mp.SH().Coefs().Size();
1492
+ });
1493
+
1494
+ cout << "num on level" << endl;
1495
+ for (int i = 0; i < num_on_level.Size(); i++)
1496
+ cout << i << ": " << num_on_level[i] << ", order = " << order_on_level[i] << ", coefs " << coefs_on_level[i] << endl;
1497
+
1498
+ size_t totcoefs = 0;
1499
+ for (auto n : coefs_on_level)
1500
+ totcoefs += n;
1501
+ cout << "total mem in coefs: " << sizeof(entry_type)*totcoefs / sqr(1024) << " MB" << endl;
1502
+ }
1503
+
1504
+
1505
+
1506
+ template <typename entry_type2>
1507
+ friend class RegularMLExpansion;
1508
+ };
1509
+
1510
+
1511
+ template <typename entry_type>
1512
+ inline ostream & operator<< (ostream & ost, const SingularMLExpansion<entry_type> & mlmp)
1513
+ {
1514
+ mlmp.Print(ost);
1515
+ return ost;
1516
+ }
1517
+
1518
+
1519
+ // *********************************** Regular multilevel Expansion
1520
+
1521
+
1522
+ template <typename elem_type=Complex>
1523
+ class NGS_DLL_HEADER RegularMLExpansion
1524
+ {
1525
+ static Array<size_t> nodes_on_level;
1526
+
1527
+
1528
+ struct RecordingRS
1529
+ {
1530
+ const SphericalExpansion<Singular,elem_type> * mpS;
1531
+ SphericalExpansion<Regular,elem_type> * mpR;
1532
+ Vec<3> dist;
1533
+ double len, theta, phi;
1534
+ public:
1535
+ RecordingRS() = default;
1536
+ RecordingRS (const SphericalExpansion<Singular,elem_type> * ampS,
1537
+ SphericalExpansion<Regular,elem_type> * ampR,
1538
+ Vec<3> adist)
1539
+ : mpS(ampS), mpR(ampR), dist(adist)
1540
+ {
1541
+ std::tie(len, theta, phi) = SphericalCoordinates(dist);
1542
+ }
1543
+ };
1544
+
1545
+ static void ProcessBatchRS(FlatArray<RecordingRS*> batch, double len, double theta) {
1546
+ // static Timer t("ProcessBatchRS"); RegionTimer reg(t, batch.Size());
1547
+ constexpr int vec_length = VecLength<elem_type>;
1548
+ int batch_size = batch.Size();
1549
+ int N = batch_size * vec_length;
1550
+ // *testout << "Processing batch of size " << batch.Size() << ", with N = " << N << ", vec_length = " << vec_length << ", Type: " << typeid(elem_type).name() << ", len = " << len << ", theta = " << theta << endl;
1551
+
1552
+ if (N <= 1 || batch_size <= 1) {
1553
+ for (auto* rec : batch) {
1554
+ rec->mpS->TransformAdd(*rec->mpR, rec->dist);
1555
+ }
1556
+ }
1557
+ else if (N <= 3) {
1558
+ ProcessVectorizedBatchRS<3, vec_length>(batch, len, theta);
1559
+ }
1560
+ else if (N <= 4) {
1561
+ ProcessVectorizedBatchRS<4, vec_length>(batch, len, theta);
1562
+ }
1563
+ else if (N <= 6) {
1564
+ ProcessVectorizedBatchRS<6, vec_length>(batch, len, theta);
1565
+ }
1566
+ else if (N <= 12) {
1567
+ ProcessVectorizedBatchRS<12, vec_length>(batch, len, theta);
1568
+ }
1569
+ else if (N <= 24) {
1570
+ ProcessVectorizedBatchRS<24, vec_length>(batch, len, theta);
1571
+ }
1572
+ else if (N <= 48) {
1573
+ ProcessVectorizedBatchRS<48, vec_length>(batch, len, theta);
1574
+ }
1575
+ else if (N <= 96) {
1576
+ ProcessVectorizedBatchRS<96, vec_length>(batch, len, theta);
1577
+ }
1578
+ else if (N <= 192) {
1579
+ ProcessVectorizedBatchRS<192, vec_length>(batch, len, theta);
1580
+ }
1581
+ else {
1582
+ // Split large batches
1583
+ /*
1584
+ ProcessBatch(batch.Range(0, 192 / vec_length), len, theta);
1585
+ ProcessBatch(batch.Range(192 / vec_length, batch_size), len, theta);
1586
+ */
1587
+
1588
+ /*
1589
+ ParallelFor (2, [&] (int i)
1590
+ {
1591
+ if (i == 0)
1592
+ ProcessBatchRS(batch.Range(0, 192 / vec_length), len, theta);
1593
+ else
1594
+ ProcessBatchRS(batch.Range(192 / vec_length, batch_size), len, theta);
1595
+ }, 2);
1596
+ */
1597
+
1598
+
1599
+ size_t chunksize = 192/vec_length;
1600
+ size_t num = (batch.Size()+chunksize-1) / chunksize;
1601
+ ParallelFor (num, [&](int i)
1602
+ {
1603
+ ProcessBatchRS(batch.Range(i*chunksize, min((i+1)*chunksize, batch.Size())), len, theta);
1604
+ }, num);
1605
+
1606
+ }
1607
+ }
1608
+
1609
+
1610
+ template<int N, int vec_length>
1611
+ static void ProcessVectorizedBatchRS(FlatArray<RecordingRS*> batch, double len, double theta) {
1612
+
1613
+ // static Timer t("ProcessVectorizedBatch, N = "+ToString(N) + ", vec_len = " + ToString(vec_length));
1614
+ // RegionTimer reg(t, batch[0]->mpS->SH().Order());
1615
+ // static Timer ttobatch("mptools - copy to batch 2");
1616
+ // static Timer tfrombatch("mptools - copy from batch 2");
1617
+
1618
+ // *testout << "Processing vectorized batch of size " << batch.Size() << ", with N = " << N << ", vec_length = " << vec_length << ", len = " << len << ", theta = " << theta << endl;
1619
+ SphericalExpansion<Singular, Vec<N,Complex>> vec_source(batch[0]->mpS->Order(), batch[0]->mpS->Kappa(), batch[0]->mpS->RTyp());
1620
+ // SphericalExpansion<Singular, elem_type> tmp_source{*batch[0]->mpS};
1621
+ SphericalExpansion<Regular, elem_type> tmp_target{*batch[0]->mpR};
1622
+ SphericalExpansion<Regular, Vec<N,Complex>> vec_target(batch[0]->mpR->Order(), batch[0]->mpR->Kappa(), batch[0]->mpR->RTyp());
1623
+
1624
+ // Copy multipoles into vectorized multipole
1625
+ // ttobatch.Start();
1626
+ for (int i = 0; i < batch.Size(); i++)
1627
+ {
1628
+ auto source_i = VecVector2Matrix (batch[i]->mpS->SH().Coefs());
1629
+ auto source_mati = VecVector2Matrix (vec_source.SH().Coefs()).Cols(i*vec_length, (i+1)*vec_length);
1630
+ batch[i]->mpS->SH().RotateZ(batch[i]->phi,
1631
+ [source_i, source_mati] (size_t ii, Complex factor)
1632
+ {
1633
+ source_mati.Row(ii) = factor * source_i.Row(ii);
1634
+ });
1635
+ }
1636
+
1637
+ // ttobatch.Stop();
1638
+
1639
+ vec_source.SH().RotateY(theta);
1640
+ vec_source.ShiftZ(-len, vec_target);
1641
+ vec_target.SH().RotateY(-theta);
1642
+
1643
+ // Copy vectorized multipole into individual multipoles
1644
+ // tfrombatch.Start();
1645
+ for (int i = 0; i < batch.Size(); i++) {
1646
+ // auto source_i = VecVector2Matrix (tmp_target.SH().Coefs());
1647
+ auto source_mati = VecVector2Matrix (vec_target.SH().Coefs()).Cols(i*vec_length, (i+1)*vec_length);
1648
+ auto targeti = VecVector2Matrix(batch[i]->mpR->SH().Coefs());
1649
+
1650
+ tmp_target.SH().RotateZ(-batch[i]->phi,
1651
+ [source_mati, targeti] (size_t ii, Complex factor)
1652
+ {
1653
+ // source_i.Row(ii) = factor * source_mati.Row(ii);
1654
+ AtomicAdd (VectorView(targeti.Row(ii)), factor * source_mati.Row(ii));
1655
+ });
1656
+ // for (int j = 0; j < tmp_target.SH().Coefs().Size(); j++)
1657
+ // AtomicAdd(batch[i]->mpR->SH().Coefs()[j], tmp_target.SH().Coefs()[j]);
1658
+ }
1659
+ // tfrombatch.Stop();
1660
+
1661
+ }
1662
+
1663
+
1664
+ struct Node
1665
+ {
1666
+ Vec<3> center;
1667
+ double r;
1668
+ int level;
1669
+ std::array<unique_ptr<Node>,8> childs;
1670
+ SphericalExpansion<Regular,elem_type> mp;
1671
+ Array<Vec<3>> targets;
1672
+ Array<tuple<Vec<3>,double>> vol_targets;
1673
+ int total_targets;
1674
+ std::mutex node_mutex;
1675
+ atomic<bool> have_childs{false};
1676
+
1677
+ Array<const typename SingularMLExpansion<elem_type>::Node*> singnodes;
1678
+ const FMM_Parameters & params;
1679
+
1680
+
1681
+ Node (Vec<3> acenter, double ar, int alevel, double kappa, const FMM_Parameters & _params)
1682
+ : center(acenter), r(ar), level(alevel),
1683
+ // mp(MPOrder(ar*kappa), kappa, ar) // 1.0/min(1.0, 0.25*r*kappa))
1684
+ mp(-1, kappa, ar), params(_params)
1685
+ // : center(acenter), r(ar), level(alevel), mp(MPOrder(ar*kappa), kappa, 1.0)
1686
+ {
1687
+ if (level < nodes_on_level.Size())
1688
+ nodes_on_level[level]++;
1689
+ }
1690
+
1691
+ void Allocate()
1692
+ {
1693
+ // mp = SphericalExpansion<Regular,elem_type>(MPOrder(r*mp.Kappa()), mp.Kappa(), r);
1694
+ mp = SphericalExpansion<Regular,elem_type>(params.minorder+2*r*mp.Kappa(), mp.Kappa(), r);
1695
+ }
1696
+
1697
+
1698
+ void CreateChilds(bool allocate = false)
1699
+ {
1700
+ if (childs[0]) throw Exception("have already childs");
1701
+ // create children nodes:
1702
+ for (int i = 0; i < 8; i++)
1703
+ {
1704
+ Vec<3> cc = center;
1705
+ cc(0) += (i&1) ? r/2 : -r/2;
1706
+ cc(1) += (i&2) ? r/2 : -r/2;
1707
+ cc(2) += (i&4) ? r/2 : -r/2;
1708
+ childs[i] = make_unique<Node> (cc, r/2, level+1, mp.Kappa(), params);
1709
+ if (allocate)
1710
+ childs[i] -> Allocate();
1711
+ }
1712
+ have_childs = true;
1713
+ }
1714
+
1715
+ void AddSingularNode (const typename SingularMLExpansion<elem_type>::Node & singnode, bool allow_refine,
1716
+ Array<RecordingRS> * recording)
1717
+ {
1718
+ if (mp.SH().Order() < 0) return;
1719
+ if (singnode.mp.SH().Order() < 0) return;
1720
+ // if (L2Norm(singnode.mp.SH().Coefs()) == 0) return;
1721
+ if (level > 20)
1722
+ {
1723
+ singnodes.Append(&singnode);
1724
+ return;
1725
+ }
1726
+
1727
+ // static Timer t("AddSingularNode"); RegionTimer reg(t);
1728
+
1729
+ Vec<3> dist = center-singnode.center;
1730
+
1731
+ // if (L2Norm(dist)*mp.Kappa() > (mp.Order()+singnode.mp.Order()))
1732
+ if (L2Norm(dist) > 2*(r + singnode.r))
1733
+ {
1734
+ if (singnode.mp.Order() > 2 * mp.Order() &&
1735
+ singnode.childs[0] &&
1736
+ singnode.childs[0]->mp.Order() < singnode.mp.Order())
1737
+ {
1738
+ for (auto & child : singnode.childs)
1739
+ AddSingularNode (*child, allow_refine, recording);
1740
+ return;
1741
+ }
1742
+
1743
+ // static Timer t("mptool transform Helmholtz-criterion"); RegionTimer r(t);
1744
+ if (recording)
1745
+ *recording += RecordingRS(&singnode.mp, &mp, dist);
1746
+ else
1747
+ singnode.mp.TransformAdd(mp, dist);
1748
+ return;
1749
+ }
1750
+
1751
+
1752
+ if ( singnode.childs[0]==nullptr )
1753
+ {
1754
+ singnodes.Append(&singnode);
1755
+ return;
1756
+ }
1757
+
1758
+ if (r > singnode.r)
1759
+ {
1760
+ if (allow_refine)
1761
+ {
1762
+ if (!childs[0])
1763
+ CreateChilds(true);
1764
+
1765
+ for (auto & ch : childs)
1766
+ ch -> AddSingularNode (singnode, allow_refine, recording);
1767
+ }
1768
+ else
1769
+ {
1770
+ if (total_targets < 1000 || recording)
1771
+ {
1772
+ for (auto & ch : childs)
1773
+ if (ch)
1774
+ ch -> AddSingularNode (singnode, allow_refine, recording);
1775
+ }
1776
+ else
1777
+ ParallelFor (8, [&] (int nr)
1778
+ {
1779
+ if (childs[nr])
1780
+ childs[nr] -> AddSingularNode (singnode, allow_refine, recording);
1781
+ });
1782
+
1783
+ if (targets.Size()+vol_targets.Size())
1784
+ singnodes.Append(&singnode);
1785
+ }
1786
+ }
1787
+ else
1788
+ {
1789
+ for (auto & childsing : singnode.childs)
1790
+ AddSingularNode (*childsing, allow_refine, recording);
1791
+ }
1792
+ }
1793
+
1794
+ void LocalizeExpansion(bool allow_refine)
1795
+ {
1796
+ if (allow_refine)
1797
+ if (mp.Order() > 30 && !childs[0])
1798
+ CreateChilds(allow_refine);
1799
+
1800
+ if (childs[0])
1801
+ {
1802
+ if (total_targets < 1000)
1803
+ {
1804
+ for (int nr = 0; nr < 8; nr++)
1805
+ {
1806
+ if (L2Norm(mp.SH().Coefs()) > 0)
1807
+ mp.TransformAdd (childs[nr]->mp, childs[nr]->center-center);
1808
+ childs[nr]->LocalizeExpansion(allow_refine);
1809
+ }
1810
+ }
1811
+ else
1812
+ ParallelFor(8, [&] (int nr)
1813
+ {
1814
+ if (L2Norm(mp.SH().Coefs()) > 0)
1815
+ mp.TransformAdd (childs[nr]->mp, childs[nr]->center-center);
1816
+ childs[nr]->LocalizeExpansion(allow_refine);
1817
+ });
1818
+ mp = SphericalExpansion<Regular,elem_type>(-1, mp.Kappa(), 1.);
1819
+ //mp.SH().Coefs()=0.0;
1820
+ }
1821
+ }
1822
+
1823
+ elem_type Evaluate (Vec<3> p) const
1824
+ {
1825
+ elem_type sum{0.0};
1826
+
1827
+ int childnum = 0;
1828
+ if (p(0) > center(0)) childnum += 1;
1829
+ if (p(1) > center(1)) childnum += 2;
1830
+ if (p(2) > center(2)) childnum += 4;
1831
+ if (childs[childnum])
1832
+ sum = childs[childnum]->Evaluate(p);
1833
+ else
1834
+ {
1835
+ // static Timer t("mptool regmp, evaluate reg"); RegionTimer r(t);
1836
+ sum = mp.Eval(p-center);
1837
+ }
1838
+
1839
+ {
1840
+ // static Timer t("mptool regmp, evaluate, singnode"); RegionTimer r(t);
1841
+ for (auto sn : singnodes)
1842
+ sum += sn->EvaluateMP(p);
1843
+ }
1844
+ return sum;
1845
+ }
1846
+
1847
+ elem_type EvaluateDirectionalDerivative (Vec<3> p, Vec<3> d) const
1848
+ {
1849
+ elem_type sum{0.0};
1850
+ // cout << "EvaluateDirectionalDerivative RegularMLMP, r = " << r << ", level = " << level << ", center = " << center << endl;
1851
+ // cout << "Singnodes: " << singnodes.Size() << ", childs: " << childs[0] << endl;
1852
+
1853
+ int childnum = 0;
1854
+ if (p(0) > center(0)) childnum += 1;
1855
+ if (p(1) > center(1)) childnum += 2;
1856
+ if (p(2) > center(2)) childnum += 4;
1857
+ if (childs[childnum])
1858
+ sum = childs[childnum]->EvaluateDirectionalDerivative(p, d);
1859
+ else
1860
+ sum = mp.EvalDirectionalDerivative(p-center, d);
1861
+
1862
+ static Timer t("mptool direct evaluate deriv"); RegionTimer r(t);
1863
+ for (auto sn : singnodes)
1864
+ sum += sn->EvaluateMPDeriv(p, d);
1865
+
1866
+ return sum;
1867
+ }
1868
+
1869
+ void TraverseTree (const std::function<void(Node&)> & func)
1870
+ {
1871
+ func(*this);
1872
+ for (auto & child : childs)
1873
+ if (child)
1874
+ child->TraverseTree(func);
1875
+ }
1876
+
1877
+ double Norm() const
1878
+ {
1879
+ double norm = L2Norm(mp.SH().Coefs());
1880
+ if (childs[0])
1881
+ for (auto & ch : childs)
1882
+ norm += ch->Norm();
1883
+ return norm;
1884
+ }
1885
+
1886
+ size_t NumCoefficients() const
1887
+ {
1888
+ size_t num = sqr(mp.SH().Order()+1);
1889
+ if (childs[0])
1890
+ for (auto & ch : childs)
1891
+ num += ch->NumCoefficients();
1892
+ return num;
1893
+ }
1894
+
1895
+ int GetChildNum (Vec<3> x) const
1896
+ {
1897
+ int childnum = 0;
1898
+ if (x(0) > center(0)) childnum += 1;
1899
+ if (x(1) > center(1)) childnum += 2;
1900
+ if (x(2) > center(2)) childnum += 4;
1901
+ return childnum;
1902
+ }
1903
+
1904
+ void AddTarget (Vec<3> x)
1905
+ {
1906
+ // if (childs[0])
1907
+ if (have_childs) // quick check without locking
1908
+ {
1909
+ // directly send to childs:
1910
+ int childnum = GetChildNum(x);
1911
+ childs[childnum] -> AddTarget( x );
1912
+ return;
1913
+ }
1914
+
1915
+ lock_guard<mutex> guard(node_mutex);
1916
+
1917
+ if (have_childs) // test again after locking
1918
+ {
1919
+ // directly send to childs:
1920
+ int childnum = GetChildNum(x);
1921
+ childs[childnum] -> AddTarget(x);
1922
+ return;
1923
+ }
1924
+
1925
+ targets.Append( x );
1926
+
1927
+ // if (r*mp.Kappa() < 1e-8) return;
1928
+ if (level > 20) return;
1929
+ if (targets.Size() < params.maxdirect && r*mp.Kappa() < 5)
1930
+ return;
1931
+
1932
+ CreateChilds();
1933
+
1934
+ for (auto t : targets)
1935
+ AddTarget (t);
1936
+ for (auto [x,r] : vol_targets)
1937
+ AddVolumeTarget (x,r);
1938
+
1939
+ targets.SetSize0();
1940
+ vol_targets.SetSize0();
1941
+ }
1942
+
1943
+
1944
+ void AddVolumeTarget (Vec<3> x, double tr)
1945
+ {
1946
+ if (MaxNorm(x-center) > r+tr) return;
1947
+
1948
+ if (have_childs)
1949
+ {
1950
+ for (auto & child : childs)
1951
+ child->AddVolumeTarget(x, tr);
1952
+ return;
1953
+ }
1954
+
1955
+
1956
+ lock_guard<mutex> guard(node_mutex);
1957
+
1958
+ if (have_childs)
1959
+ {
1960
+ for (auto & child : childs)
1961
+ child->AddVolumeTarget(x, tr);
1962
+ return;
1963
+ }
1964
+
1965
+
1966
+ vol_targets.Append (tuple(x,tr));
1967
+
1968
+ if (level > 20) return;
1969
+ if (vol_targets.Size() < params.maxdirect && (r*mp.Kappa() < 5))
1970
+ return;
1971
+
1972
+ CreateChilds();
1973
+
1974
+ for (auto t : targets)
1975
+ AddTarget (t);
1976
+ for (auto [x,r] : vol_targets)
1977
+ AddVolumeTarget (x,r);
1978
+
1979
+ targets.SetSize0();
1980
+ vol_targets.SetSize0();
1981
+ }
1982
+
1983
+
1984
+
1985
+ void CalcTotalTargets()
1986
+ {
1987
+ total_targets = targets.Size() + vol_targets.Size();
1988
+ for (auto & child : childs)
1989
+ if (child)
1990
+ {
1991
+ child->CalcTotalTargets();
1992
+ total_targets += child->total_targets;
1993
+ }
1994
+ }
1995
+
1996
+ void RemoveEmptyTrees()
1997
+ {
1998
+ for (auto & child : childs)
1999
+ if (child)
2000
+ {
2001
+ child->RemoveEmptyTrees();
2002
+ // if (child->total_targets == 0)
2003
+ // child = nullptr;
2004
+ }
2005
+
2006
+ if (total_targets == 0)
2007
+ mp = SphericalExpansion<Regular,elem_type>(-1, mp.Kappa(),1.);
2008
+ }
2009
+
2010
+ void AllocateMemory()
2011
+ {
2012
+ for (auto & child : childs)
2013
+ if (child)
2014
+ child->AllocateMemory();
2015
+
2016
+ if (total_targets > 0)
2017
+ Allocate();
2018
+ // mp = SphericalExpansion<Regular,elem_type>(MPOrder(r*mp.Kappa()), mp.Kappa(), r); // -1, mp.Kappa(),1.);
2019
+ }
2020
+
2021
+
2022
+
2023
+
2024
+ void Print (ostream & ost, size_t childnr = -1) const
2025
+ {
2026
+ if (childnr == -1)
2027
+ ost << "c = " << center << ", r = " << r << ", level = " << level << endl;
2028
+ else
2029
+ ost << "c = " << center << ", r = " << r << ", level = " << level << ", childnr = " << childnr << endl;
2030
+ for (auto x : targets)
2031
+ ost << "xi = " << x << endl;
2032
+
2033
+ for (int i = 0; i < 8; i++)
2034
+ if (childs[i]) childs[i] -> Print (ost, i);
2035
+ }
2036
+
2037
+ };
2038
+
2039
+ FMM_Parameters fmm_params;
2040
+ Node root;
2041
+ shared_ptr<SingularMLExpansion<elem_type>> singmp;
2042
+
2043
+ public:
2044
+ RegularMLExpansion (shared_ptr<SingularMLExpansion<elem_type>> asingmp, Vec<3> center, double r,
2045
+ const FMM_Parameters & _params)
2046
+ : fmm_params(_params), root(center, r, 0, asingmp->Kappa(), fmm_params), singmp(asingmp)
2047
+ {
2048
+ if (!singmp->havemp) throw Exception("first call Calc for singular MP");
2049
+ root.Allocate();
2050
+
2051
+ nodes_on_level = 0;
2052
+ nodes_on_level[0] = 1;
2053
+ {
2054
+ static Timer t("mptool compute regular MLMP"); RegionTimer rg(t);
2055
+ root.AddSingularNode(singmp->root, true, nullptr);
2056
+ // cout << "norm after S->R conversion: " << root.Norm() << endl;
2057
+ }
2058
+
2059
+
2060
+ /*
2061
+ int maxlevel = 0;
2062
+ for (auto [i,num] : Enumerate(nodes_on_level))
2063
+ if (num > 0) maxlevel = i;
2064
+
2065
+ for (int i = 0; i <= maxlevel; i++)
2066
+ cout << "reg " << i << ": " << nodes_on_level[i] << endl;
2067
+ */
2068
+
2069
+ {
2070
+ static Timer t("mptool expand regular MLMP"); RegionTimer rg(t);
2071
+ root.LocalizeExpansion(true);
2072
+ // cout << "norm after local expansion: " << root.Norm() << endl;
2073
+ }
2074
+ }
2075
+
2076
+ RegularMLExpansion (Vec<3> center, double r, double kappa, const FMM_Parameters & _params)
2077
+ : fmm_params(_params), root(center, r, 0, kappa, fmm_params)
2078
+ {
2079
+ nodes_on_level = 0;
2080
+ nodes_on_level[0] = 1;
2081
+ }
2082
+
2083
+ void AddTarget (Vec<3> t)
2084
+ {
2085
+ root.AddTarget (t);
2086
+ }
2087
+
2088
+ void AddVolumeTarget (Vec<3> t, double r)
2089
+ {
2090
+ root.AddVolumeTarget (t, r);
2091
+ }
2092
+
2093
+ void CalcMP(shared_ptr<SingularMLExpansion<elem_type>> asingmp, bool onlytargets = true)
2094
+ {
2095
+ static Timer t("mptool regular MLMP"); RegionTimer rg(t);
2096
+ static Timer tremove("removeempty");
2097
+ static Timer trec("mptool regular MLMP - recording");
2098
+ static Timer tsort("mptool regular MLMP - sort");
2099
+
2100
+ singmp = asingmp;
2101
+
2102
+
2103
+ root.CalcTotalTargets();
2104
+ // cout << "before remove empty trees:" << endl;
2105
+ // PrintStatistics(cout);
2106
+
2107
+ /*
2108
+ tremove.Start();
2109
+ if (onlytargets)
2110
+ root.RemoveEmptyTrees();
2111
+ tremove.Stop();
2112
+ */
2113
+
2114
+ root.AllocateMemory();
2115
+
2116
+ // cout << "after allocating regular:" << endl;
2117
+ // PrintStatistics(cout);
2118
+
2119
+ // cout << "starting S-R converion" << endl;
2120
+ // PrintStatistics(cout);
2121
+
2122
+
2123
+ if constexpr (false)
2124
+ {
2125
+ root.AddSingularNode(singmp->root, !onlytargets, nullptr);
2126
+ }
2127
+ else
2128
+ { // use recording
2129
+ Array<RecordingRS> recording;
2130
+ {
2131
+ RegionTimer rrec(trec);
2132
+ root.AddSingularNode(singmp->root, !onlytargets, &recording);
2133
+ }
2134
+
2135
+ // cout << "recorded: " << recording.Size() << endl;
2136
+ {
2137
+ RegionTimer reg(tsort);
2138
+ QuickSort (recording, [] (auto & a, auto & b)
2139
+ {
2140
+ if (a.len < (1-1e-8) * b.len) return true;
2141
+ if (a.len > (1+1e-8) * b.len) return false;
2142
+ return a.theta < b.theta;
2143
+ });
2144
+ }
2145
+
2146
+ double current_len = -1e100;
2147
+ double current_theta = -1e100;
2148
+ Array<RecordingRS*> current_batch;
2149
+ Array<Array<RecordingRS*>> batch_group;
2150
+ Array<double> group_lengths;
2151
+ Array<double> group_thetas;
2152
+ for (auto & record : recording)
2153
+ {
2154
+ bool len_changed = fabs(record.len - current_len) > 1e-8;
2155
+ bool theta_changed = fabs(record.theta - current_theta) > 1e-8;
2156
+ if ((len_changed || theta_changed) && current_batch.Size() > 0) {
2157
+ // ProcessBatch(current_batch, current_len, current_theta);
2158
+ batch_group.Append(current_batch);
2159
+ group_lengths.Append(current_len);
2160
+ group_thetas.Append(current_theta);
2161
+ current_batch.SetSize(0);
2162
+ }
2163
+
2164
+ current_len = record.len;
2165
+ current_theta = record.theta;
2166
+ current_batch.Append(&record);
2167
+ }
2168
+ if (current_batch.Size() > 0) {
2169
+ // ProcessBatch(current_batch, current_len, current_theta);
2170
+ batch_group.Append(current_batch);
2171
+ group_lengths.Append(current_len);
2172
+ group_thetas.Append(current_theta);
2173
+ }
2174
+
2175
+ ParallelFor(batch_group.Size(), [&](int i) {
2176
+ ProcessBatchRS(batch_group[i], group_lengths[i], group_thetas[i]);
2177
+ }, TasksPerThread(4));
2178
+ }
2179
+
2180
+
2181
+ /*
2182
+ int maxlevel = 0;
2183
+ for (auto [i,num] : Enumerate(RegularMLExpansion::nodes_on_level))
2184
+ if (num > 0) maxlevel = i;
2185
+
2186
+ for (int i = 0; i <= maxlevel; i++)
2187
+ cout << "reg " << i << ": " << RegularMLExpansion::nodes_on_level[i] << endl;
2188
+ */
2189
+
2190
+ // cout << "starting R-R converion" << endl;
2191
+ // PrintStatistics(cout);
2192
+
2193
+ static Timer tloc("mptool regular localize expansion"); RegionTimer rloc(tloc);
2194
+ root.LocalizeExpansion(!onlytargets);
2195
+
2196
+
2197
+ // cout << "R-R conversion done" << endl;
2198
+ // PrintStatistics(cout);
2199
+ }
2200
+
2201
+ void PrintStatistics (ostream & ost)
2202
+ {
2203
+ int levels = 0;
2204
+ int cnt = 0;
2205
+ root.TraverseTree( [&](Node & node) {
2206
+ levels = max(levels, node.level);
2207
+ cnt++;
2208
+ });
2209
+ ost << "levels: " << levels << endl;
2210
+ ost << "nodes: " << cnt << endl;
2211
+
2212
+ Array<int> num_on_level(levels+1);
2213
+ Array<int> order_on_level(levels+1);
2214
+ Array<size_t> coefs_on_level(levels+1);
2215
+ num_on_level = 0;
2216
+ order_on_level = 0;
2217
+ root.TraverseTree( [&](Node & node) {
2218
+ num_on_level[node.level]++;
2219
+ order_on_level[node.level] = max(order_on_level[node.level],node.mp.Order());
2220
+ coefs_on_level[node.level] += node.mp.SH().Coefs().Size();
2221
+ });
2222
+
2223
+ cout << "num on level" << endl;
2224
+ for (int i = 0; i < num_on_level.Size(); i++)
2225
+ cout << i << ": " << num_on_level[i] << ", order = " << order_on_level[i] << ", coefs " << coefs_on_level[i] << endl;
2226
+
2227
+ size_t totcoefs = 0;
2228
+ for (auto n : coefs_on_level)
2229
+ totcoefs += n;
2230
+ cout << "total mem in coefs: " << sizeof(elem_type)*totcoefs / sqr(1024) << " MB" << endl;
2231
+ }
2232
+
2233
+ void Print (ostream & ost) const
2234
+ {
2235
+ root.Print(ost);
2236
+ }
2237
+
2238
+ double Norm() const
2239
+ {
2240
+ return root.Norm();
2241
+ }
2242
+
2243
+ size_t NumCoefficients() const
2244
+ {
2245
+ return root.NumCoefficients();
2246
+ }
2247
+
2248
+ elem_type Evaluate (Vec<3> p) const
2249
+ {
2250
+ // static Timer t("mptool Eval MLMP regular"); RegionTimer r(t);
2251
+ // if (L2Norm(p-root.center) > root.r) return elem_type{0.0};
2252
+
2253
+ if (MaxNorm(p-root.center) > root.r)
2254
+ return singmp->Evaluate(p);
2255
+ return root.Evaluate(p);
2256
+ }
2257
+
2258
+ elem_type EvaluateDirectionalDerivative (Vec<3> p, Vec<3> d) const
2259
+ {
2260
+ if (L2Norm(p-root.center) > root.r) return elem_type{0.0};
2261
+ return root.EvaluateDirectionalDerivative(p, d);
2262
+ }
2263
+
2264
+ };
2265
+
2266
+
2267
+ template <typename elem_type>
2268
+ inline ostream & operator<< (ostream & ost, const RegularMLExpansion<elem_type> & mlmp)
2269
+ {
2270
+ mlmp.Print(ost);
2271
+ // ost << "RegularMLExpansion" << endl;
2272
+ return ost;
2273
+ }
2274
+
2275
+
2276
+
2277
+
2278
+
2279
+
2280
+ }
2281
+ #endif