ngsolve 6.2.2506.post74.dev0__cp314-cp314-macosx_10_15_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ngsolve might be problematic. Click here for more details.

Files changed (315) hide show
  1. netgen/include/analytic_integrals.hpp +10 -0
  2. netgen/include/arnoldi.hpp +55 -0
  3. netgen/include/bandmatrix.hpp +334 -0
  4. netgen/include/basematrix.hpp +957 -0
  5. netgen/include/basevector.hpp +1268 -0
  6. netgen/include/bdbequations.hpp +2805 -0
  7. netgen/include/bdbintegrator.hpp +1660 -0
  8. netgen/include/bem_diffops.hpp +475 -0
  9. netgen/include/bessel.hpp +1064 -0
  10. netgen/include/bilinearform.hpp +963 -0
  11. netgen/include/bla.hpp +29 -0
  12. netgen/include/blockalloc.hpp +95 -0
  13. netgen/include/blockjacobi.hpp +328 -0
  14. netgen/include/bspline.hpp +116 -0
  15. netgen/include/calcinverse.hpp +141 -0
  16. netgen/include/cg.hpp +368 -0
  17. netgen/include/chebyshev.hpp +44 -0
  18. netgen/include/cholesky.hpp +720 -0
  19. netgen/include/clapack.h +7254 -0
  20. netgen/include/code_generation.hpp +296 -0
  21. netgen/include/coefficient.hpp +2033 -0
  22. netgen/include/coefficient_impl.hpp +19 -0
  23. netgen/include/coefficient_stdmath.hpp +167 -0
  24. netgen/include/commutingAMG.hpp +106 -0
  25. netgen/include/comp.hpp +79 -0
  26. netgen/include/compatibility.hpp +41 -0
  27. netgen/include/complex_wrapper.hpp +73 -0
  28. netgen/include/compressedfespace.hpp +110 -0
  29. netgen/include/contact.hpp +235 -0
  30. netgen/include/diagonalmatrix.hpp +154 -0
  31. netgen/include/differentialoperator.hpp +276 -0
  32. netgen/include/diffop.hpp +1286 -0
  33. netgen/include/diffop_impl.hpp +328 -0
  34. netgen/include/diffopwithfactor.hpp +123 -0
  35. netgen/include/discontinuous.hpp +84 -0
  36. netgen/include/dump.hpp +949 -0
  37. netgen/include/ectypes.hpp +121 -0
  38. netgen/include/eigen.hpp +60 -0
  39. netgen/include/eigensystem.hpp +18 -0
  40. netgen/include/elasticity_equations.hpp +595 -0
  41. netgen/include/elementbyelement.hpp +195 -0
  42. netgen/include/elementtopology.hpp +1760 -0
  43. netgen/include/elementtransformation.hpp +339 -0
  44. netgen/include/evalfunc.hpp +405 -0
  45. netgen/include/expr.hpp +1686 -0
  46. netgen/include/facetfe.hpp +175 -0
  47. netgen/include/facetfespace.hpp +180 -0
  48. netgen/include/facethofe.hpp +111 -0
  49. netgen/include/facetsurffespace.hpp +112 -0
  50. netgen/include/fe_interfaces.hpp +32 -0
  51. netgen/include/fem.hpp +87 -0
  52. netgen/include/fesconvert.hpp +14 -0
  53. netgen/include/fespace.hpp +1449 -0
  54. netgen/include/finiteelement.hpp +286 -0
  55. netgen/include/globalinterfacespace.hpp +77 -0
  56. netgen/include/globalspace.hpp +115 -0
  57. netgen/include/gridfunction.hpp +525 -0
  58. netgen/include/h1amg.hpp +124 -0
  59. netgen/include/h1hofe.hpp +188 -0
  60. netgen/include/h1hofe_impl.hpp +1262 -0
  61. netgen/include/h1hofefo.hpp +148 -0
  62. netgen/include/h1hofefo_impl.hpp +185 -0
  63. netgen/include/h1hofespace.hpp +167 -0
  64. netgen/include/h1lofe.hpp +1240 -0
  65. netgen/include/h1lumping.hpp +41 -0
  66. netgen/include/hcurl_equations.hpp +1381 -0
  67. netgen/include/hcurlcurlfe.hpp +2241 -0
  68. netgen/include/hcurlcurlfespace.hpp +78 -0
  69. netgen/include/hcurlfe.hpp +259 -0
  70. netgen/include/hcurlfe_utils.hpp +107 -0
  71. netgen/include/hcurlhdiv_dshape.hpp +857 -0
  72. netgen/include/hcurlhdivfes.hpp +308 -0
  73. netgen/include/hcurlhofe.hpp +175 -0
  74. netgen/include/hcurlhofe_impl.hpp +1871 -0
  75. netgen/include/hcurlhofespace.hpp +193 -0
  76. netgen/include/hcurllofe.hpp +1146 -0
  77. netgen/include/hdiv_equations.hpp +880 -0
  78. netgen/include/hdivdivfe.hpp +2923 -0
  79. netgen/include/hdivdivsurfacespace.hpp +76 -0
  80. netgen/include/hdivfe.hpp +206 -0
  81. netgen/include/hdivfe_utils.hpp +717 -0
  82. netgen/include/hdivfes.hpp +75 -0
  83. netgen/include/hdivhofe.hpp +447 -0
  84. netgen/include/hdivhofe_impl.hpp +1107 -0
  85. netgen/include/hdivhofefo.hpp +229 -0
  86. netgen/include/hdivhofespace.hpp +177 -0
  87. netgen/include/hdivhosurfacefespace.hpp +106 -0
  88. netgen/include/hdivlofe.hpp +773 -0
  89. netgen/include/hidden.hpp +74 -0
  90. netgen/include/householder.hpp +181 -0
  91. netgen/include/hypre_ams_precond.hpp +123 -0
  92. netgen/include/hypre_precond.hpp +73 -0
  93. netgen/include/integrator.hpp +2012 -0
  94. netgen/include/integratorcf.hpp +253 -0
  95. netgen/include/interpolate.hpp +49 -0
  96. netgen/include/intrule.hpp +2542 -0
  97. netgen/include/intrules_SauterSchwab.hpp +25 -0
  98. netgen/include/irspace.hpp +49 -0
  99. netgen/include/jacobi.hpp +153 -0
  100. netgen/include/kernels.hpp +762 -0
  101. netgen/include/l2hofe.hpp +194 -0
  102. netgen/include/l2hofe_impl.hpp +564 -0
  103. netgen/include/l2hofefo.hpp +542 -0
  104. netgen/include/l2hofespace.hpp +344 -0
  105. netgen/include/la.hpp +38 -0
  106. netgen/include/linearform.hpp +266 -0
  107. netgen/include/matrix.hpp +2140 -0
  108. netgen/include/memusage.hpp +41 -0
  109. netgen/include/meshaccess.hpp +1359 -0
  110. netgen/include/mgpre.hpp +204 -0
  111. netgen/include/mp_coefficient.hpp +145 -0
  112. netgen/include/mptools.hpp +2281 -0
  113. netgen/include/multigrid.hpp +42 -0
  114. netgen/include/multivector.hpp +447 -0
  115. netgen/include/mumpsinverse.hpp +187 -0
  116. netgen/include/mycomplex.hpp +361 -0
  117. netgen/include/ng_lapack.hpp +1661 -0
  118. netgen/include/ngblas.hpp +1232 -0
  119. netgen/include/ngs_defines.hpp +30 -0
  120. netgen/include/ngs_stdcpp_include.hpp +106 -0
  121. netgen/include/ngs_utils.hpp +121 -0
  122. netgen/include/ngsobject.hpp +1019 -0
  123. netgen/include/ngsstream.hpp +113 -0
  124. netgen/include/ngstd.hpp +72 -0
  125. netgen/include/nodalhofe.hpp +96 -0
  126. netgen/include/nodalhofe_impl.hpp +141 -0
  127. netgen/include/normalfacetfe.hpp +223 -0
  128. netgen/include/normalfacetfespace.hpp +98 -0
  129. netgen/include/normalfacetsurfacefespace.hpp +84 -0
  130. netgen/include/order.hpp +251 -0
  131. netgen/include/parallel_matrices.hpp +222 -0
  132. netgen/include/paralleldofs.hpp +340 -0
  133. netgen/include/parallelngs.hpp +23 -0
  134. netgen/include/parallelvector.hpp +269 -0
  135. netgen/include/pardisoinverse.hpp +200 -0
  136. netgen/include/periodic.hpp +129 -0
  137. netgen/include/plateaufespace.hpp +25 -0
  138. netgen/include/pml.hpp +275 -0
  139. netgen/include/pmltrafo.hpp +631 -0
  140. netgen/include/postproc.hpp +142 -0
  141. netgen/include/potentialtools.hpp +22 -0
  142. netgen/include/precomp.hpp +60 -0
  143. netgen/include/preconditioner.hpp +602 -0
  144. netgen/include/prolongation.hpp +377 -0
  145. netgen/include/python_comp.hpp +107 -0
  146. netgen/include/python_fem.hpp +89 -0
  147. netgen/include/python_linalg.hpp +58 -0
  148. netgen/include/python_ngstd.hpp +386 -0
  149. netgen/include/recursive_pol.hpp +4896 -0
  150. netgen/include/recursive_pol_tet.hpp +395 -0
  151. netgen/include/recursive_pol_trig.hpp +492 -0
  152. netgen/include/reorderedfespace.hpp +81 -0
  153. netgen/include/sample_sort.hpp +105 -0
  154. netgen/include/scalarfe.hpp +335 -0
  155. netgen/include/shapefunction_utils.hpp +113 -0
  156. netgen/include/simd_complex.hpp +329 -0
  157. netgen/include/smoother.hpp +253 -0
  158. netgen/include/solve.hpp +89 -0
  159. netgen/include/sparsecholesky.hpp +313 -0
  160. netgen/include/sparsematrix.hpp +1038 -0
  161. netgen/include/sparsematrix_dyn.hpp +90 -0
  162. netgen/include/sparsematrix_impl.hpp +1013 -0
  163. netgen/include/special_matrix.hpp +463 -0
  164. netgen/include/specialelement.hpp +125 -0
  165. netgen/include/statushandler.hpp +33 -0
  166. netgen/include/stringops.hpp +12 -0
  167. netgen/include/superluinverse.hpp +136 -0
  168. netgen/include/symbolicintegrator.hpp +850 -0
  169. netgen/include/symmetricmatrix.hpp +144 -0
  170. netgen/include/tangentialfacetfe.hpp +224 -0
  171. netgen/include/tangentialfacetfespace.hpp +91 -0
  172. netgen/include/tensor.hpp +522 -0
  173. netgen/include/tensorcoefficient.hpp +446 -0
  174. netgen/include/tensorproductintegrator.hpp +113 -0
  175. netgen/include/thcurlfe.hpp +128 -0
  176. netgen/include/thcurlfe_impl.hpp +380 -0
  177. netgen/include/thdivfe.hpp +80 -0
  178. netgen/include/thdivfe_impl.hpp +492 -0
  179. netgen/include/tpdiffop.hpp +461 -0
  180. netgen/include/tpfes.hpp +133 -0
  181. netgen/include/tpintrule.hpp +224 -0
  182. netgen/include/triangular.hpp +465 -0
  183. netgen/include/tscalarfe.hpp +245 -0
  184. netgen/include/tscalarfe_impl.hpp +1029 -0
  185. netgen/include/umfpackinverse.hpp +148 -0
  186. netgen/include/vector.hpp +1273 -0
  187. netgen/include/voxelcoefficientfunction.hpp +41 -0
  188. netgen/include/vtkoutput.hpp +198 -0
  189. netgen/include/vvector.hpp +208 -0
  190. netgen/include/webgui.hpp +92 -0
  191. netgen/libngbla.dylib +0 -0
  192. netgen/libngcomp.dylib +0 -0
  193. netgen/libngfem.dylib +0 -0
  194. netgen/libngla.dylib +0 -0
  195. netgen/libngsbem.dylib +0 -0
  196. netgen/libngsolve.dylib +0 -0
  197. netgen/libngstd.dylib +0 -0
  198. ngsolve/TensorProductTools.py +210 -0
  199. ngsolve/__console.py +94 -0
  200. ngsolve/__expr.py +181 -0
  201. ngsolve/__init__.py +148 -0
  202. ngsolve/__init__.pyi +233 -0
  203. ngsolve/_scikit_build_core_dependencies.py +30 -0
  204. ngsolve/bla.pyi +1153 -0
  205. ngsolve/bvp.py +78 -0
  206. ngsolve/bvp.pyi +32 -0
  207. ngsolve/cmake/NGSolveConfig.cmake +102 -0
  208. ngsolve/cmake/ngsolve-targets-release.cmake +79 -0
  209. ngsolve/cmake/ngsolve-targets.cmake +163 -0
  210. ngsolve/comp/__init__.pyi +5449 -0
  211. ngsolve/comp/pml.pyi +89 -0
  212. ngsolve/config/__init__.py +1 -0
  213. ngsolve/config/__init__.pyi +43 -0
  214. ngsolve/config/__main__.py +4 -0
  215. ngsolve/config/config.py +60 -0
  216. ngsolve/config/config.pyi +45 -0
  217. ngsolve/demos/TensorProduct/__init__.py +0 -0
  218. ngsolve/demos/TensorProduct/tp_dg_1d_1d.py +80 -0
  219. ngsolve/demos/TensorProduct/tp_dg_1d_2d.py +73 -0
  220. ngsolve/demos/TensorProduct/tp_dg_2d_1d.py +72 -0
  221. ngsolve/demos/TensorProduct/tp_dg_2d_2d.py +66 -0
  222. ngsolve/demos/__init__.py +0 -0
  223. ngsolve/demos/howto/__init__.py +0 -0
  224. ngsolve/demos/howto/hhj.py +44 -0
  225. ngsolve/demos/howto/hybrid_dg.py +53 -0
  226. ngsolve/demos/howto/mixed.py +30 -0
  227. ngsolve/demos/howto/nonlin.py +29 -0
  228. ngsolve/demos/howto/pickling.py +26 -0
  229. ngsolve/demos/howto/pml.py +31 -0
  230. ngsolve/demos/howto/taskmanager.py +20 -0
  231. ngsolve/demos/howto/tdnns.py +47 -0
  232. ngsolve/demos/howto/timeDG-skeleton.py +45 -0
  233. ngsolve/demos/howto/timeDG.py +38 -0
  234. ngsolve/demos/howto/timeDGlap.py +42 -0
  235. ngsolve/demos/howto/timeDGwave.py +61 -0
  236. ngsolve/demos/intro/__init__.py +0 -0
  237. ngsolve/demos/intro/adaptive.py +123 -0
  238. ngsolve/demos/intro/cmagnet.py +59 -0
  239. ngsolve/demos/intro/elasticity.py +76 -0
  240. ngsolve/demos/intro/navierstokes.py +74 -0
  241. ngsolve/demos/intro/poisson.ipynb +170 -0
  242. ngsolve/demos/intro/poisson.py +41 -0
  243. ngsolve/demos/mpi/__init__.py +0 -0
  244. ngsolve/demos/mpi/mpi_cmagnet.py +87 -0
  245. ngsolve/demos/mpi/mpi_navierstokes.py +117 -0
  246. ngsolve/demos/mpi/mpi_poisson.py +89 -0
  247. ngsolve/demos/mpi/mpi_timeDG.py +82 -0
  248. ngsolve/directsolvers.py +26 -0
  249. ngsolve/directsolvers.pyi +15 -0
  250. ngsolve/eigenvalues.py +364 -0
  251. ngsolve/eigenvalues.pyi +30 -0
  252. ngsolve/fem.pyi +1647 -0
  253. ngsolve/internal.py +89 -0
  254. ngsolve/krylovspace.py +1013 -0
  255. ngsolve/krylovspace.pyi +298 -0
  256. ngsolve/la.pyi +1230 -0
  257. ngsolve/meshes.py +748 -0
  258. ngsolve/ngs2petsc.py +310 -0
  259. ngsolve/ngscxx.py +42 -0
  260. ngsolve/ngslib.so +0 -0
  261. ngsolve/ngstd.pyi +59 -0
  262. ngsolve/nonlinearsolvers.py +203 -0
  263. ngsolve/nonlinearsolvers.pyi +95 -0
  264. ngsolve/preconditioners.py +11 -0
  265. ngsolve/preconditioners.pyi +7 -0
  266. ngsolve/solve.pyi +109 -0
  267. ngsolve/solve_implementation.py +168 -0
  268. ngsolve/solve_implementation.pyi +42 -0
  269. ngsolve/solvers.py +7 -0
  270. ngsolve/solvers.pyi +14 -0
  271. ngsolve/timestepping.py +185 -0
  272. ngsolve/timestepping.pyi +28 -0
  273. ngsolve/timing.py +108 -0
  274. ngsolve/timing.pyi +54 -0
  275. ngsolve/utils.py +167 -0
  276. ngsolve/utils.pyi +273 -0
  277. ngsolve/webgui.py +670 -0
  278. ngsolve-6.2.2506.post74.dev0.data/data/Netgen.icns +0 -0
  279. ngsolve-6.2.2506.post74.dev0.data/data/bin/ngscxx +17 -0
  280. ngsolve-6.2.2506.post74.dev0.data/data/bin/ngsld +13 -0
  281. ngsolve-6.2.2506.post74.dev0.data/data/bin/ngsolve.tcl +648 -0
  282. ngsolve-6.2.2506.post74.dev0.data/data/bin/ngspy +2 -0
  283. ngsolve-6.2.2506.post74.dev0.data/data/share/ngsolve/beam.geo +17 -0
  284. ngsolve-6.2.2506.post74.dev0.data/data/share/ngsolve/beam.vol +240 -0
  285. ngsolve-6.2.2506.post74.dev0.data/data/share/ngsolve/chip.in2d +41 -0
  286. ngsolve-6.2.2506.post74.dev0.data/data/share/ngsolve/chip.vol +614 -0
  287. ngsolve-6.2.2506.post74.dev0.data/data/share/ngsolve/coil.geo +12 -0
  288. ngsolve-6.2.2506.post74.dev0.data/data/share/ngsolve/coil.vol +2560 -0
  289. ngsolve-6.2.2506.post74.dev0.data/data/share/ngsolve/coilshield.geo +24 -0
  290. ngsolve-6.2.2506.post74.dev0.data/data/share/ngsolve/coilshield.vol +3179 -0
  291. ngsolve-6.2.2506.post74.dev0.data/data/share/ngsolve/cube.geo +19 -0
  292. ngsolve-6.2.2506.post74.dev0.data/data/share/ngsolve/cube.vol +1832 -0
  293. ngsolve-6.2.2506.post74.dev0.data/data/share/ngsolve/d10_DGdoubleglazing.pde +50 -0
  294. ngsolve-6.2.2506.post74.dev0.data/data/share/ngsolve/d11_chip_nitsche.pde +40 -0
  295. ngsolve-6.2.2506.post74.dev0.data/data/share/ngsolve/d1_square.pde +43 -0
  296. ngsolve-6.2.2506.post74.dev0.data/data/share/ngsolve/d2_chip.pde +35 -0
  297. ngsolve-6.2.2506.post74.dev0.data/data/share/ngsolve/d3_helmholtz.pde +22 -0
  298. ngsolve-6.2.2506.post74.dev0.data/data/share/ngsolve/d4_cube.pde +46 -0
  299. ngsolve-6.2.2506.post74.dev0.data/data/share/ngsolve/d5_beam.pde +74 -0
  300. ngsolve-6.2.2506.post74.dev0.data/data/share/ngsolve/d6_shaft.pde +73 -0
  301. ngsolve-6.2.2506.post74.dev0.data/data/share/ngsolve/d7_coil.pde +50 -0
  302. ngsolve-6.2.2506.post74.dev0.data/data/share/ngsolve/d8_coilshield.pde +49 -0
  303. ngsolve-6.2.2506.post74.dev0.data/data/share/ngsolve/d9_hybridDG.pde +72 -0
  304. ngsolve-6.2.2506.post74.dev0.data/data/share/ngsolve/doubleglazing.in2d +27 -0
  305. ngsolve-6.2.2506.post74.dev0.data/data/share/ngsolve/doubleglazing.vol +737 -0
  306. ngsolve-6.2.2506.post74.dev0.data/data/share/ngsolve/piezo2d40round4.vol.gz +0 -0
  307. ngsolve-6.2.2506.post74.dev0.data/data/share/ngsolve/shaft.geo +73 -0
  308. ngsolve-6.2.2506.post74.dev0.data/data/share/ngsolve/shaft.vol +4291 -0
  309. ngsolve-6.2.2506.post74.dev0.data/data/share/ngsolve/square.in2d +17 -0
  310. ngsolve-6.2.2506.post74.dev0.data/data/share/ngsolve/square.vol +149 -0
  311. ngsolve-6.2.2506.post74.dev0.dist-info/METADATA +13 -0
  312. ngsolve-6.2.2506.post74.dev0.dist-info/RECORD +315 -0
  313. ngsolve-6.2.2506.post74.dev0.dist-info/WHEEL +5 -0
  314. ngsolve-6.2.2506.post74.dev0.dist-info/licenses/LICENSE +504 -0
  315. ngsolve-6.2.2506.post74.dev0.dist-info/top_level.txt +2 -0
@@ -0,0 +1,1029 @@
1
+ #ifdef __CUDA_ARCH__
2
+ #include "tscalarfe_impl_cuda.hpp"
3
+ #endif
4
+
5
+
6
+ #ifndef FILE_TSCALARFE_IMPL
7
+ #define FILE_TSCALARFE_IMPL
8
+
9
+
10
+ #include "tscalarfe.hpp"
11
+ #include "recursive_pol.hpp"
12
+ #include "shapefunction_utils.hpp"
13
+
14
+ namespace ngfem
15
+ {
16
+
17
+ template <class FEL, ELEMENT_TYPE ET, class BASE>
18
+ void T_ScalarFiniteElement<FEL,ET,BASE> ::
19
+ CalcShape (const IntegrationPoint & ip, BareSliceVector<> shape) const
20
+ {
21
+ T_CalcShape (GetTIP<DIM>(ip), shape);
22
+ }
23
+
24
+ template <class FEL, ELEMENT_TYPE ET, class BASE>
25
+ void T_ScalarFiniteElement<FEL,ET,BASE> ::
26
+ CalcDShape (const IntegrationPoint & ip,
27
+ BareSliceMatrix<> dshape) const
28
+ {
29
+ T_CalcShape (GetTIPGrad<DIM> (ip),
30
+ SBLambda ([dshape] (int i, auto shape)
31
+ { dshape.Row(i) = ngfem::GetGradient(shape); }));
32
+ }
33
+
34
+ #ifndef FASTCOMPILE
35
+
36
+ template <class FEL, ELEMENT_TYPE ET, class BASE>
37
+ void T_ScalarFiniteElement<FEL,ET,BASE> ::
38
+ CalcShape (const IntegrationRule & ir, BareSliceMatrix<> shape) const
39
+ {
40
+ for (int i = 0; i < ir.Size(); i++)
41
+ T_CalcShape (GetTIP<DIM>(ir[i]), shape.Col(i));
42
+ }
43
+
44
+ template <class FEL, ELEMENT_TYPE ET, class BASE>
45
+ void T_ScalarFiniteElement<FEL,ET,BASE> ::
46
+ CalcShape (const SIMD_IntegrationRule & ir, BareSliceMatrix<SIMD<double>> shapes) const
47
+ {
48
+ /*
49
+ for (size_t i = 0; i < ir.Size(); i++)
50
+ T_CalcShape (GetTIP<DIM>(ir[i]),
51
+ SBLambda([&](size_t j, SIMD<double> shape)
52
+ { shapes(j,i) = shape; } ));
53
+ */
54
+ for (size_t i = 0; i < ir.Size(); i++)
55
+ T_CalcShape (GetTIP<DIM>(ir[i]), shapes.Col(i));
56
+ }
57
+
58
+
59
+ template <class FEL, ELEMENT_TYPE ET, class BASE>
60
+ double T_ScalarFiniteElement<FEL,ET,BASE> ::
61
+ Evaluate (const IntegrationPoint & ip, BareSliceVector<double> x) const
62
+ {
63
+ double sum = 0;
64
+ T_CalcShape (GetTIP<DIM>(ip),
65
+ SBLambda ([x,&sum](size_t i, double val) { sum += x(i)*val; } ));
66
+ return sum;
67
+ }
68
+
69
+
70
+ template <class FEL, ELEMENT_TYPE ET, class BASE>
71
+ void T_ScalarFiniteElement<FEL,ET,BASE> ::
72
+ Evaluate (const IntegrationRule & ir, BareSliceVector<double> coefs, BareSliceVector<double> vals) const
73
+ {
74
+ for (size_t i = 0; i < ir.GetNIP(); i++)
75
+ {
76
+ double sum = 0;
77
+ T_CalcShape (GetTIP<DIM>(ir[i]),
78
+ SBLambda ( [coefs,&sum](size_t j, double shape) { sum += coefs(j)*shape; } ));
79
+ vals(i) = sum;
80
+ }
81
+ }
82
+
83
+ template <class FEL, ELEMENT_TYPE ET, class BASE>
84
+ void T_ScalarFiniteElement<FEL,ET,BASE> ::
85
+ Evaluate (const SIMD_IntegrationRule & ir, BareSliceVector<> coefs, BareVector<SIMD<double>> values) const
86
+ {
87
+ FlatArray<SIMD<IntegrationPoint>> hir = ir;
88
+ size_t i = 0;
89
+ for ( ; i+2 <= hir.Size(); i+=2)
90
+ {
91
+ MultiSIMD<2,double> sum = 0;
92
+ auto tip1 = GetTIP<DIM>(hir[i]);
93
+ auto tip2 = GetTIP<DIM>(hir[i+1]);
94
+ TIP<DIM,MultiSIMD<2,double>> tip(tip1,tip2);
95
+
96
+ double * pcoefs = coefs.Data();
97
+ size_t dist = coefs.Dist();
98
+ T_CalcShape (tip,
99
+ SBLambda ( [&](size_t j, MultiSIMD<2,double> shape)
100
+ {
101
+ // sum += *pcoefs * shape;
102
+ sum = FMA(MultiSIMD<2,double>(*pcoefs), shape, sum);
103
+ pcoefs += dist; }
104
+ ));
105
+
106
+ // std::tie(values(i), values(i+1)) = sum;
107
+ values(i) = sum.Lo();
108
+ values(i+1) = sum.Hi();
109
+ }
110
+
111
+ if (i < hir.Size())
112
+ {
113
+ SIMD<double> sum = 0;
114
+ double * pcoefs = coefs.Data();
115
+ size_t dist = coefs.Dist();
116
+ T_CalcShape (GetTIP<DIM>(hir[i]),
117
+ SBLambda ( [&](int j, SIMD<double> shape)
118
+ { sum += (*pcoefs)*shape; pcoefs += dist; } ));
119
+ values(i) = sum;
120
+ }
121
+ }
122
+
123
+
124
+ template <class FEL, ELEMENT_TYPE ET, class BASE>
125
+ void T_ScalarFiniteElement<FEL,ET,BASE> ::
126
+ Evaluate (const SIMD_IntegrationRule & ir,
127
+ SliceMatrix<> coefs,
128
+ BareSliceMatrix<SIMD<double>> values) const
129
+ {
130
+ FlatArray<SIMD<IntegrationPoint>> hir = ir;
131
+ size_t j = 0;
132
+ for ( ; j+4 <= coefs.Width(); j+=4)
133
+ {
134
+ for (size_t i = 0; i < hir.Size(); i++)
135
+ {
136
+ SIMD<double> sum1 = 0, sum2 = 0, sum3 = 0, sum4 = 0;
137
+ // double * pcoefs = &coefs(j);
138
+ double * pcoefs = coefs.Addr(0,j);
139
+ size_t dist = coefs.Dist();
140
+ T_CalcShape (GetTIP<DIM>(hir[i]),
141
+ SBLambda ( [&pcoefs, dist, &sum1, &sum2, &sum3, &sum4](int j, SIMD<double> shape)
142
+ {
143
+ sum1 += pcoefs[0]*shape;
144
+ sum2 += pcoefs[1]*shape;
145
+ sum3 += pcoefs[2]*shape;
146
+ sum4 += pcoefs[3]*shape;
147
+ pcoefs += dist;
148
+ } ));
149
+ values(j,i) = sum1;
150
+ values(j+1,i) = sum2;
151
+ values(j+2,i) = sum3;
152
+ values(j+3,i) = sum4;
153
+ }
154
+ }
155
+ switch (coefs.Width()&3)
156
+ {
157
+ case 0: break;
158
+ case 1: Evaluate (ir, coefs.Col(j), values.Row(j)); break;
159
+ case 2:
160
+ {
161
+ for (size_t i = 0; i < hir.Size(); i++)
162
+ {
163
+ SIMD<double> sum1 = 0, sum2 = 0;
164
+ // double * pcoefs = &coefs(j);
165
+ double * pcoefs = coefs.Addr(0,j);
166
+ size_t dist = coefs.Dist();
167
+ T_CalcShape (GetTIP<DIM>(hir[i]),
168
+ SBLambda ( [&pcoefs, dist,&sum1, &sum2](int j, SIMD<double> shape)
169
+ {
170
+ sum1 += pcoefs[0]*shape;
171
+ sum2 += pcoefs[1]*shape;
172
+ pcoefs += dist;
173
+ } ));
174
+ values(j,i) = sum1;
175
+ values(j+1,i) = sum2;
176
+ }
177
+ break;
178
+ case 3:
179
+ {
180
+ for (size_t i = 0; i < hir.Size(); i++)
181
+ {
182
+ SIMD<double> sum1 = 0, sum2 = 0, sum3 = 0;
183
+ // double * pcoefs = &coefs(j);
184
+ double * pcoefs = coefs.Addr(0,j);
185
+ size_t dist = coefs.Dist();
186
+ T_CalcShape (GetTIP<DIM>(hir[i]),
187
+ SBLambda ( [&pcoefs, dist, &sum1,&sum2,&sum3](int j, SIMD<double> shape)
188
+ {
189
+ sum1 += pcoefs[0]*shape;
190
+ sum2 += pcoefs[1]*shape;
191
+ sum3 += pcoefs[2]*shape;
192
+ pcoefs += dist;
193
+ } ));
194
+ values(j,i) = sum1;
195
+ values(j+1,i) = sum2;
196
+ values(j+2,i) = sum3;
197
+ }
198
+ break;
199
+ }
200
+ }
201
+ }
202
+ /*
203
+ for ( ; j < coefs.Width(); j++)
204
+ Evaluate (ir, coefs.Col(j), values.Row(j));
205
+ */
206
+ }
207
+
208
+ template <class FEL, ELEMENT_TYPE ET, class BASE>
209
+ void T_ScalarFiniteElement<FEL,ET,BASE> ::
210
+ Evaluate (const IntegrationRule & ir, SliceMatrix<> coefs, BareSliceMatrix<> values) const
211
+ {
212
+ for (size_t i = 0; i < ir.GetNIP(); i++)
213
+ {
214
+ auto hrow = values.Row(i).Range(coefs.Width());
215
+ hrow = 0.0;
216
+ T_CalcShape (GetTIP<DIM>(ir[i]),
217
+ SBLambda ( [&](size_t j, double shape)
218
+ {
219
+ hrow += shape * coefs.Row(j);
220
+ }));
221
+ }
222
+ }
223
+
224
+ template <class FEL, ELEMENT_TYPE ET, class BASE>
225
+ void T_ScalarFiniteElement<FEL,ET,BASE> ::
226
+ EvaluateTrans (const IntegrationRule & ir, BareSliceVector<> vals, BareSliceVector<double> coefs) const
227
+ {
228
+ coefs.Range(0,ndof) = 0.0;
229
+ for (size_t i = 0; i < ir.GetNIP(); i++)
230
+ {
231
+ double vali = vals(i);
232
+ T_CalcShape (GetTIP<DIM>(ir[i]),
233
+ SBLambda ( [coefs, vali](size_t j, double shape)
234
+ { coefs(j) += vali*shape; } ));
235
+ }
236
+ }
237
+
238
+ template <class FEL, ELEMENT_TYPE ET, class BASE>
239
+ void T_ScalarFiniteElement<FEL,ET,BASE> ::
240
+ AddTrans (const SIMD_IntegrationRule & ir, BareVector<SIMD<double>> values,
241
+ BareSliceVector<> coefs) const
242
+ {
243
+ FlatArray<SIMD<IntegrationPoint>> hir = ir;
244
+ /*
245
+ for (int i = 0; i < hir.Size(); i++)
246
+ {
247
+ Vec<DIM,SIMD<double>> pt = hir[i];
248
+ SIMD<double> val = values.Get(i);
249
+ T_CalcShape (&pt(0), SBLambda ( [&](int j, SIMD<double> shape) { coefs(j) += HSum(val*shape); } ));
250
+ }
251
+ */
252
+
253
+ size_t i = 0;
254
+ for ( ; i+2 <= hir.Size(); i+=2)
255
+ {
256
+ TIP<DIM,SIMD<double>> tip1 = hir[i].TIp<DIM>();
257
+ TIP<DIM,SIMD<double>> tip2 = hir[i+1].TIp<DIM>();
258
+ TIP<DIM,MultiSIMD<2,double>> tip(tip1,tip2);
259
+
260
+ MultiSIMD<2,double> val (values(i), values(i+1));
261
+
262
+ double * pcoefs = coefs.Data();
263
+ size_t dist = coefs.Dist();
264
+ /*
265
+ T_CalcShape (tip,
266
+ SBLambda
267
+ ([&](int j, MultiSIMD<2,double> shape)
268
+ { *pcoefs += HSum(val*shape); pcoefs += dist; }
269
+ ));
270
+ */
271
+ T_CalcShape (tip,
272
+ SBLambdaDuo
273
+ ([&](int j, MultiSIMD<2,double> shape)
274
+ {
275
+ *pcoefs += HSum(val*shape); pcoefs += dist;
276
+ },
277
+ [&](int j, MultiSIMD<2,double> shape, int j2, MultiSIMD<2,double> shape2)
278
+ {
279
+ auto v2 = HSum(val*shape, val*shape2);
280
+ *pcoefs += get<0>(v2); pcoefs += dist;
281
+ *pcoefs += get<1>(v2); pcoefs += dist;
282
+ }
283
+ ));
284
+ }
285
+
286
+ for ( ; i < hir.Size(); i++)
287
+ {
288
+ TIP<DIM,SIMD<double>> tip = hir[i].TIp<DIM>();
289
+ SIMD<double> val (values(i));
290
+
291
+ double * pcoefs = coefs.Data();
292
+ size_t dist = coefs.Dist();
293
+ T_CalcShape (tip,
294
+ SBLambdaDuo
295
+ ([&](int j, SIMD<double> shape)
296
+ {
297
+ *pcoefs += HSum(val*shape); pcoefs += dist;
298
+ },
299
+ [&](int j, SIMD<double> shape, int j2, SIMD<double> shape2)
300
+ {
301
+ auto v2 = HSum(val*shape, val*shape2);
302
+ *pcoefs += get<0>(v2); pcoefs += dist;
303
+ *pcoefs += get<1>(v2); pcoefs += dist;
304
+ }
305
+ ));
306
+ }
307
+
308
+
309
+
310
+
311
+ /*
312
+ for (int i = 0; i < hir.Size(); i+=3)
313
+ {
314
+ Vec<DIM,SIMD<double>> pt1 = hir[i];
315
+ Vec<DIM,SIMD<double>> pt2 = hir[(i+1 < hir.Size()) ? i+1 : i];
316
+ Vec<DIM,SIMD<double>> pt3 = hir[(i+2 < hir.Size()) ? i+2 : i];
317
+
318
+ Vec<DIM,MultiSIMD<3,double>> pt;
319
+ for (int i = 0; i < DIM; i++)
320
+ pt(i) = MultiSIMD<3,double> (pt1(i), pt2(i), pt3(i));
321
+ MultiSIMD<3,double> val (values.Get(i),
322
+ i+1 < hir.Size() ? values.Get(i+1) : SIMD<double> (0.0),
323
+ i+2 < hir.Size() ? values.Get(i+2) : SIMD<double> (0.0));
324
+
325
+ // T_CalcShape (&pt(0), SBLambda ( [&](int j, MultiSIMD<3,double> shape) { coefs(j) += HSum(val*shape); } ));
326
+
327
+ double * pcoefs = &coefs(0);
328
+ size_t dist = coefs.Dist();
329
+ T_CalcShape (TIP<DIM,MultiSIMD<3,double>> (pt),
330
+ SBLambda ( [&](int j, MultiSIMD<3,double> shape)
331
+ { *pcoefs += HSum(val*shape); pcoefs += dist; } ));
332
+
333
+ }
334
+ */
335
+ }
336
+
337
+ // #endif // FASTCOMPILE
338
+ template <class FEL, ELEMENT_TYPE ET, class BASE>
339
+ void T_ScalarFiniteElement<FEL,ET,BASE> ::
340
+ AddDualTrans (const SIMD_IntegrationRule & ir, BareVector<SIMD<double>> values,
341
+ BareSliceVector<> coefs) const
342
+ {
343
+ FlatArray<SIMD<IntegrationPoint>> hir = ir;
344
+ for (int i = 0; i < hir.Size(); i++)
345
+ {
346
+ TIP<DIM,SIMD<double>> tip = hir[i].TIp<DIM>();
347
+ SIMD<double> val = values(i);
348
+ static_cast<const FEL*> (this)->
349
+ T_CalcDualShape (tip, SBLambda ( [&](int j, SIMD<double> shape) { coefs(j) += HSum(val*shape); } ));
350
+ }
351
+ }
352
+
353
+ template <class FEL, ELEMENT_TYPE ET, class BASE>
354
+ void T_ScalarFiniteElement<FEL,ET,BASE> ::
355
+ AddDualTrans (const IntegrationRule & ir, BareSliceVector<double> values,
356
+ BareSliceVector<> coefs) const
357
+ {
358
+ FlatArray<IntegrationPoint> hir = ir;
359
+ for (int i = 0; i < hir.Size(); i++)
360
+ {
361
+ TIP<DIM,double> tip = hir[i].TIp<DIM>();
362
+ double val = values(i);
363
+ static_cast<const FEL*> (this)->
364
+ T_CalcDualShape (tip, SBLambda ( [&](int j, double shape) { coefs(j) += val*shape; } ));
365
+ }
366
+ }
367
+
368
+
369
+ // #ifndef FASTCOMPILE
370
+
371
+ template <class FEL, ELEMENT_TYPE ET, class BASE>
372
+ void T_ScalarFiniteElement<FEL,ET,BASE> ::
373
+ AddTrans (const SIMD_IntegrationRule & ir,
374
+ BareSliceMatrix<SIMD<double>> values,
375
+ SliceMatrix<> coefs) const
376
+ {
377
+ FlatArray<SIMD<IntegrationPoint>> hir = ir;
378
+ size_t j = 0;
379
+ for ( ; j+4 <= coefs.Width(); j+=4)
380
+ {
381
+ for (size_t i = 0; i < hir.Size(); i++)
382
+ {
383
+ TIP<DIM,SIMD<double>> pt = hir[i].TIp<DIM>();
384
+ SIMD<double> val1 = values(j,i);
385
+ SIMD<double> val2 = values(j+1,i);
386
+ SIMD<double> val3 = values(j+2,i);
387
+ SIMD<double> val4 = values(j+3,i);
388
+ double * pcoefs = &coefs(j);
389
+ size_t dist = coefs.Dist();
390
+ T_CalcShape (pt,
391
+ SBLambda ( [&](int j, SIMD<double> shape)
392
+ {
393
+ auto val = HSum(shape*val1, shape*val2, shape*val3, shape*val4);
394
+ val += SIMD<double,4> (pcoefs);
395
+ // _mm256_storeu_pd (pcoefs, val.Data());
396
+ val.Store(pcoefs);
397
+ pcoefs += dist;
398
+ } ));
399
+ }
400
+ }
401
+ switch (coefs.Width()&3)
402
+ {
403
+ case 0: break;
404
+ case 1: AddTrans (ir, values.Row(j), coefs.Col(j)); break;
405
+ case 2:
406
+ {
407
+ /*
408
+ for (size_t i = 0; i < hir.Size(); i++)
409
+ {
410
+ TIP<DIM,SIMD<double>> pt = hir[i].TIp<DIM>();
411
+ SIMD<double> val1 = values(j,i);
412
+ SIMD<double> val2 = values(j+1,i);
413
+ __m256i mask = _mm256_set_epi64x(0, 0, -1, -1);
414
+ double * pcoefs = &coefs(j);
415
+ size_t dist = coefs.Dist();
416
+ T_CalcShape (pt,
417
+ SBLambda ( [&](int j, SIMD<double> shape)
418
+ {
419
+ auto val = HSum(shape*val1, shape*val2, shape*val2, shape*val2);
420
+ val += SIMD<double,4> (_mm256_maskload_pd (pcoefs, mask));
421
+ _mm256_maskstore_pd (pcoefs, mask, val.Data());
422
+ pcoefs += dist;
423
+ } ));
424
+ }
425
+ */
426
+ /*
427
+ SIMD<mask64,4> mask(2);
428
+ for (size_t i = 0; i < hir.Size(); i++)
429
+ {
430
+ TIP<DIM,SIMD<double>> pt = hir[i].TIp<DIM>();
431
+ SIMD<double> val1 = values(j,i);
432
+ SIMD<double> val2 = values(j+1,i);
433
+ double * pcoefs = &coefs(j);
434
+ size_t dist = coefs.Dist();
435
+ T_CalcShape (pt,
436
+ SBLambda ( [val1,val2,mask,&pcoefs,dist](int j, SIMD<double> shape)
437
+ {
438
+ auto val = HSum(shape*val1, shape*val2, shape*val2, shape*val2);
439
+ val += SIMD<double,4> (pcoefs, mask);
440
+ val.Store(pcoefs, mask);
441
+ pcoefs += dist;
442
+ } ));
443
+ }
444
+ */
445
+ for (size_t i = 0; i < hir.Size(); i++)
446
+ {
447
+ TIP<DIM,SIMD<double>> pt = hir[i].TIp<DIM>();
448
+ SIMD<double> val1 = values(j,i);
449
+ SIMD<double> val2 = values(j+1,i);
450
+ double * pcoefs = &coefs(j);
451
+ size_t dist = coefs.Dist();
452
+ T_CalcShape (pt,
453
+ SBLambda ( [val1,val2,&pcoefs,dist](int j, SIMD<double> shape)
454
+ {
455
+ auto val = HSum(shape*val1, shape*val2);
456
+ val += SIMD<double,2> (pcoefs);
457
+ val.Store(pcoefs);
458
+ pcoefs += dist;
459
+ } ));
460
+ }
461
+ break;
462
+ }
463
+ case 3:
464
+ {
465
+ SIMD<mask64,4> mask(3);
466
+ for (size_t i = 0; i < hir.Size(); i++)
467
+ {
468
+ TIP<DIM,SIMD<double>> pt = hir[i].TIp<DIM>();
469
+ SIMD<double> val1 = values(j,i);
470
+ SIMD<double> val2 = values(j+1,i);
471
+ SIMD<double> val3 = values(j+2,i);
472
+ double * pcoefs = &coefs(j);
473
+ size_t dist = coefs.Dist();
474
+ T_CalcShape (pt,
475
+ SBLambda ( [val1,val2,val3,mask,dist,&pcoefs](int j, SIMD<double> shape)
476
+ {
477
+ auto val = HSum(shape*val1, shape*val2, shape*val3, shape*val3);
478
+ val += SIMD<double,4> (pcoefs, mask);
479
+ val.Store(pcoefs, mask);
480
+ pcoefs += dist;
481
+ } ));
482
+ }
483
+ break;
484
+ }
485
+ }
486
+ /*
487
+ for ( ; j < coefs.Width(); j++)
488
+ Evaluate (ir, coefs.Col(j), values.Row(j));
489
+ */
490
+ }
491
+
492
+
493
+
494
+ template <class FEL, ELEMENT_TYPE ET, class BASE>
495
+ auto T_ScalarFiniteElement<FEL,ET,BASE> ::
496
+ EvaluateGrad (const IntegrationPoint & ip, BareSliceVector<double> coefs) const -> Vec<DIM>
497
+ {
498
+ Vec<DIM> sum = 0.0;
499
+ T_CalcShape (GetTIPGrad<DIM>(ip),
500
+ SBLambda ( [&](int i, auto val)
501
+ {
502
+ sum += coefs(i) * ngfem::GetGradient(val);
503
+ }));
504
+ return sum;
505
+ }
506
+
507
+ template <class FEL, ELEMENT_TYPE ET, class BASE>
508
+ void T_ScalarFiniteElement<FEL,ET,BASE> ::
509
+ EvaluateGrad (const IntegrationRule & ir, BareSliceVector<double> coefs,
510
+ BareSliceMatrix<> vals) const
511
+ {
512
+ for (int i = 0; i < ir.GetNIP(); i++)
513
+ {
514
+ Vec<DIM> sum = 0.0;
515
+ T_CalcShape (GetTIPGrad<DIM>(ir[i]),
516
+ SBLambda ([&sum, coefs] (size_t j, auto shape)
517
+ { sum += coefs(j) * ngfem::GetGradient(shape); }));
518
+ vals.Row(i) = sum;
519
+ }
520
+ }
521
+
522
+
523
+ template <class FEL, ELEMENT_TYPE ET, class BASE>
524
+ void T_ScalarFiniteElement<FEL,ET,BASE> ::
525
+ EvaluateGrad (const SIMD_BaseMappedIntegrationRule & bmir,
526
+ BareSliceVector<> coefs,
527
+ BareSliceMatrix<SIMD<double>> values) const
528
+ {
529
+ Switch<4-DIM>
530
+ (bmir.DimSpace()-DIM, [this,&bmir,coefs,values] (auto CODIM)
531
+ {
532
+ constexpr int DIMSPACE = DIM+CODIM.value;
533
+ auto & mir = static_cast<const SIMD_MappedIntegrationRule<DIM,DIMSPACE>&> (bmir);
534
+ for (size_t i = 0; i < mir.Size(); i++)
535
+ {
536
+ double *pcoefs = &coefs(0);
537
+ const size_t dist = coefs.Dist();
538
+
539
+ Vec<DIMSPACE,SIMD<double>> sum(0.0);
540
+ this->T_CalcShape (GetTIP(mir[i]),
541
+ SBLambda ([&pcoefs,dist,&sum]
542
+ (size_t j, auto shape)
543
+ {
544
+ sum += *pcoefs * ngfem::GetGradient(shape);
545
+ pcoefs += dist;
546
+ }));
547
+ values.Col(i).Range(DIMSPACE) = sum;
548
+ }
549
+ });
550
+ }
551
+
552
+
553
+ template <class FEL, ELEMENT_TYPE ET, class BASE>
554
+ void T_ScalarFiniteElement<FEL,ET,BASE> ::
555
+ EvaluateGrad (const SIMD_IntegrationRule & ir,
556
+ BareSliceVector<> coefs,
557
+ BareSliceMatrix<SIMD<double>> values) const
558
+ {
559
+ for (int i = 0; i < ir.Size(); i++)
560
+ {
561
+ Vec<DIM,SIMD<double>> sum(0.0);
562
+ T_CalcShape (GetTIPGrad<DIM> (ir[i]),
563
+ SBLambda ([&sum, coefs] (size_t j, auto shape)
564
+ { sum += coefs(j) * ngfem::GetGradient(shape); }));
565
+ values.Col(i).Range(DIM) = sum;
566
+ }
567
+ }
568
+
569
+
570
+ template <class FEL, ELEMENT_TYPE ET, class BASE>
571
+ void T_ScalarFiniteElement<FEL,ET,BASE> ::
572
+ EvaluateGradTrans (const IntegrationRule & ir,
573
+ BareSliceMatrix<> vals, BareSliceVector<double> coefs) const
574
+ {
575
+ coefs.Range(0,ndof) = 0.0;
576
+ for (int i = 0; i < ir.GetNIP(); i++)
577
+ {
578
+ Vec<DIM> vali = vals.Row(i);
579
+ T_CalcShape (GetTIPGrad<DIM>(ir[i]),
580
+ SBLambda ([coefs, vali] (int j, auto shape)
581
+ { coefs(j) += InnerProduct (vali, ngfem::GetGradient(shape)); }));
582
+ }
583
+ }
584
+
585
+ template <class FEL, ELEMENT_TYPE ET, class BASE>
586
+ void T_ScalarFiniteElement<FEL,ET,BASE> ::
587
+ EvaluateGradTrans (const IntegrationRule & ir, SliceMatrix<> values, SliceMatrix<> coefs) const
588
+ {
589
+ int nels = coefs.Width();
590
+ coefs = 0.0;
591
+ for (int i = 0; i < ir.GetNIP(); i++)
592
+ {
593
+ // Vec<DIM, AutoDiff<DIM>> adp = ir[i];
594
+ T_CalcShape (// TIP<DIM, AutoDiff<DIM>> (adp),
595
+ GetTIPGrad<DIM>(ir[i]),
596
+ SBLambda ([&] (int j, auto shape)
597
+ {
598
+ FlatMatrixFixWidth<DIM> mvals(nels, &values(i,0));
599
+ coefs.Row(j) += mvals * ngfem::GetGradient(shape);
600
+ }));
601
+ }
602
+ }
603
+
604
+ template <class FEL, ELEMENT_TYPE ET, class BASE>
605
+ void T_ScalarFiniteElement<FEL,ET,BASE> ::
606
+ AddGradTrans (const SIMD_BaseMappedIntegrationRule & bmir,
607
+ BareSliceMatrix<SIMD<double>> values,
608
+ BareSliceVector<> coefs) const
609
+ {
610
+ if constexpr (DIM == 0) return;
611
+ Iterate<4-DIM>
612
+ ([&](auto CODIM)
613
+ {
614
+ constexpr auto DIMSPACE = DIM+CODIM.value;
615
+ if (bmir.DimSpace() == DIMSPACE)
616
+ {
617
+ auto & mir = static_cast<const SIMD_MappedIntegrationRule<DIM,DIMSPACE>&> (bmir);
618
+ for (size_t i = 0; i < mir.Size(); i++)
619
+ {
620
+ // Directional derivative
621
+ [[maybe_unused]]
622
+ Vec<DIM, SIMD<double>> jac_dir = mir[i].GetJacobianInverse() * values.Col(i);
623
+
624
+ const auto &ip = mir[i].IP();
625
+ TIP<DIM,AutoDiff<1,SIMD<double>>>adp(ip.FacetNr(), ip.VB());
626
+ if constexpr(DIM>0)
627
+ adp.x = AutoDiff<1, SIMD<double>>( ip(0), jac_dir(0) );
628
+ if constexpr(DIM>1)
629
+ adp.y = AutoDiff<1, SIMD<double>>( ip(1), jac_dir(1) );
630
+ if constexpr(DIM>2)
631
+ adp.z = AutoDiff<1, SIMD<double>>( ip(2), jac_dir(2) );
632
+
633
+ double * pcoef = &coefs(0);
634
+ size_t dist = coefs.Dist();
635
+ this->T_CalcShape (adp,
636
+ SBLambda ([dist,&pcoef] (size_t j, auto shape)
637
+ {
638
+ *pcoef += HSum(shape.DValue(0));
639
+ pcoef += dist;
640
+ }));
641
+ }
642
+ }
643
+ });
644
+ }
645
+
646
+
647
+ template <class FEL, ELEMENT_TYPE ET, class BASE>
648
+ void T_ScalarFiniteElement<FEL,ET,BASE> ::
649
+ AddGradTrans (const SIMD_BaseMappedIntegrationRule & bmir,
650
+ BareSliceMatrix<SIMD<double>> values,
651
+ SliceMatrix<> coefs) const
652
+ {
653
+ Iterate<4-DIM>
654
+ ([&](auto CODIM)
655
+ {
656
+ constexpr auto DIMSPACE = DIM+CODIM.value;
657
+ if (bmir.DimSpace() == DIMSPACE)
658
+ {
659
+ auto & mir = static_cast<const SIMD_MappedIntegrationRule<DIM,DIMSPACE>&> (bmir);
660
+
661
+ size_t j = 0;
662
+ for ( ; j+4 <= coefs.Width(); j+=4)
663
+ {
664
+ for (size_t i = 0; i < mir.Size(); i++)
665
+ {
666
+ TIP<DIM,AutoDiff<DIMSPACE,SIMD<double>>>adp = GetTIP(mir[i]);
667
+ double * pcoef = &coefs(0,j);
668
+ size_t dist = coefs.Dist();
669
+ // Vec<4*DIMSPACE,SIMD<double>> vals = values.Col(i).Range(j*DIMSPACE, (j+4)*DIMSPACE);
670
+ Vec<DIMSPACE,SIMD<double>> vals1 = values.Col(i).Range(j*DIMSPACE, (j+1)*DIMSPACE);
671
+ Vec<DIMSPACE,SIMD<double>> vals2 = values.Col(i).Range((j+1)*DIMSPACE, (j+2)*DIMSPACE);
672
+ Vec<DIMSPACE,SIMD<double>> vals3 = values.Col(i).Range((j+2)*DIMSPACE, (j+3)*DIMSPACE);
673
+ Vec<DIMSPACE,SIMD<double>> vals4 = values.Col(i).Range((j+3)*DIMSPACE, (j+4)*DIMSPACE);
674
+
675
+
676
+ this->T_CalcShape (adp,
677
+ SBLambda ([=,&pcoef] (size_t j, auto shape)
678
+ {
679
+ auto grad = ngfem::GetGradient(shape);
680
+ SIMD<double> sum1 = InnerProduct(vals1, grad);
681
+ SIMD<double> sum2 = InnerProduct(vals2, grad);
682
+ SIMD<double> sum3 = InnerProduct(vals3, grad);
683
+ SIMD<double> sum4 = InnerProduct(vals4, grad);
684
+
685
+ SIMD<double,4> allsum = HSum(sum1, sum2, sum3, sum4);
686
+ allsum += SIMD<double,4> (pcoef);
687
+ allsum.Store(pcoef);
688
+ pcoef += dist;
689
+ }));
690
+ }
691
+ }
692
+
693
+ for ( ; j+1 <= coefs.Width(); j++)
694
+ {
695
+ for (size_t i = 0; i < mir.Size(); i++)
696
+ {
697
+ // TIP<DIM,AutoDiff<DIMSPACE,SIMD<double>>>adp = GetTIP(mir[i]);
698
+ double * pcoef = &coefs(0,j);
699
+ size_t dist = coefs.Dist();
700
+ Vec<DIMSPACE,SIMD<double>> vals = values.Col(i).Range(j*DIMSPACE, (j+1)*DIMSPACE);
701
+ this->T_CalcShape (GetTIP(mir[i]), // adp
702
+ SBLambda ([=,&pcoef] (size_t j, auto shape)
703
+ {
704
+ *pcoef += HSum(InnerProduct(ngfem::GetGradient(shape), vals));
705
+ pcoef += dist;
706
+ }));
707
+ }
708
+ }
709
+ }
710
+ });
711
+ }
712
+
713
+
714
+ /*
715
+ template <class FEL, ELEMENT_TYPE ET, class BASE>
716
+ void T_ScalarFiniteElement<FEL,ET,BASE> ::
717
+ CalcDShape (const IntegrationPoint & ip,
718
+ const std::function<void(int,Vec<DIM>)> & callback) const
719
+ {
720
+ Vec<DIM, AutoDiff<DIM> > adp;
721
+ for (int i = 0; i < DIM; i++)
722
+ adp[i] = AutoDiff<DIM> (ip(i), i);
723
+
724
+ // DShapeAssign<DIM> ds(dshape);
725
+ // T_CalcShape (&adp(0), ds);
726
+
727
+
728
+ T_CalcShape (&adp(0), SBLambda ([&] (int i, AutoDiff<DIM> shape)
729
+ {
730
+ Vec<DIM> v;
731
+ shape.StoreGradient (&v(0));
732
+ callback (i,v);
733
+ }));
734
+ }
735
+ */
736
+
737
+
738
+
739
+ /*
740
+ template <class FEL, ELEMENT_TYPE ET, class BASE>
741
+ void T_ScalarFiniteElement<FEL,ET,BASE> ::
742
+ CalcMappedDShape (const MappedIntegrationPoint<DIM,DIM> & mip,
743
+ FlatMatrixFixWidth<DIM> dshape) const
744
+ {
745
+ Vec<DIM, AutoDiff<DIM> > adp;
746
+ for (int i = 0; i < DIM; i++)
747
+ adp[i].Value() = mip.IP()(i);
748
+
749
+ for (int i = 0; i < DIM; i++)
750
+ for (int j = 0; j < DIM; j++)
751
+ adp[i].DValue(j) = mip.GetJacobianInverse()(i,j);
752
+
753
+ T_CalcShape (&adp(0), SBLambda ([&] (int i, AutoDiff<DIM> shape)
754
+ { shape.StoreGradient (&dshape(i,0)) ; }));
755
+ }
756
+ */
757
+
758
+ template <class FEL, ELEMENT_TYPE ET, class BASE>
759
+ void T_ScalarFiniteElement<FEL,ET,BASE> ::
760
+ CalcMappedDShape (const BaseMappedIntegrationPoint & bmip,
761
+ BareSliceMatrix<> dshape) const
762
+ {
763
+ Switch<4-DIM>
764
+ (bmip.DimSpace()-DIM, [&bmip, dshape, this](auto CODIM)
765
+ {
766
+ constexpr int DIM_ = DIM;
767
+ constexpr int DIMSPACE = int(DIM)+int(CODIM.value);
768
+ static_assert(DIM<=DIMSPACE, "dim<=dimspace");
769
+
770
+ auto & mip = static_cast<const MappedIntegrationPoint<DIM_,DIMSPACE> &> (bmip);
771
+ auto dshapes = dshape.AddSize(ndof, DIMSPACE);
772
+
773
+ this->T_CalcShape (GetTIP(mip),
774
+ SBLambda ([dshapes] (size_t i, auto shape)
775
+ { dshapes.Row(i) = ngfem::GetGradient(shape); }));
776
+ });
777
+
778
+ /*
779
+ if (bmip.DimSpace() == DIM)
780
+ {
781
+ auto & mip = static_cast<const MappedIntegrationPoint<DIM,DIM> &> (bmip);
782
+ auto dshapes = dshape.AddSize(ndof, DIM);
783
+
784
+ T_CalcShape (GetTIP(mip),
785
+ SBLambda ([dshapes] (int i, auto shape)
786
+ { dshapes.Row(i) = ngfem::GetGradient(shape); }));
787
+ }
788
+ else if (bmip.DimSpace() == DIM+1)
789
+ {
790
+ constexpr int DIM1 = DIM<3 ? DIM+1 : DIM;
791
+ auto & mip = static_cast<const MappedIntegrationPoint<DIM,DIM1> &> (bmip);
792
+ auto dshapes = dshape.AddSize(ndof, DIM1);
793
+
794
+ T_CalcShape (GetTIP(mip),
795
+ SBLambda ([dshapes] (int i, auto shape)
796
+ {dshapes.Row(i) = ngfem::GetGradient(shape);}));
797
+ }
798
+ else
799
+ {
800
+ cout << "CalcMappedDShape called for bboundary (not implemented)" << endl;
801
+ }
802
+ */
803
+ }
804
+
805
+
806
+ template <class FEL, ELEMENT_TYPE ET, class BASE>
807
+ void T_ScalarFiniteElement<FEL,ET,BASE> ::
808
+ CalcMappedDShape (const BaseMappedIntegrationRule & bmir,
809
+ BareSliceMatrix<> dshape) const
810
+ {
811
+ /*
812
+ // auto & mir = static_cast<const MappedIntegrationRule<DIM,DIM> &> (bmir);
813
+ for (size_t i = 0; i < bmir.Size(); i++)
814
+ T_ScalarFiniteElement::CalcMappedDShape (bmir[i], dshape.Cols(i*DIM,(i+1)*DIM));
815
+ */
816
+
817
+ Switch<4-DIM>
818
+ (bmir.DimSpace()-DIM, [&bmir, dshape, this](auto CODIM)
819
+ {
820
+ constexpr int DIM_ = DIM;
821
+ constexpr int DIMSPACE = int(DIM)+int(CODIM.value);
822
+ auto & mir = static_cast<const MappedIntegrationRule<DIM_,DIMSPACE> &> (bmir);
823
+ for (size_t i = 0; i < mir.Size(); i++)
824
+ {
825
+ auto dshapes = dshape.Cols(i*DIMSPACE, (i+1)*DIMSPACE).AddSize(ndof, DIMSPACE);
826
+ this->T_CalcShape (GetTIP(mir[i]),
827
+ SBLambda ([dshapes] (size_t j, auto shape)
828
+ { dshapes.Row(j) = ngfem::GetGradient(shape); }));
829
+ }
830
+ });
831
+ }
832
+
833
+
834
+ template <class FEL, ELEMENT_TYPE ET, class BASE>
835
+ void T_ScalarFiniteElement<FEL,ET,BASE> ::
836
+ CalcMappedDShape (const SIMD_BaseMappedIntegrationRule & bmir,
837
+ BareSliceMatrix<SIMD<double>> dshapes) const
838
+ {
839
+ if (bmir.DimSpace() == DIM)
840
+ {
841
+ auto & mir = static_cast<const SIMD_MappedIntegrationRule<DIM,DIM>&> (bmir);
842
+ for (size_t i = 0; i < mir.Size(); i++)
843
+ {
844
+ SIMD<double> * pdshapes = dshapes.Col(i).Data();
845
+ size_t dist = dshapes.Dist();
846
+
847
+ // TIP<DIM,AutoDiff<DIM,SIMD<double>>> adp = GetTIP(mir[i]);
848
+ T_CalcShape (GetTIP(mir[i]), // adp,
849
+ SBLambda ([&] (size_t j, AutoDiff<DIM,SIMD<double>> shape)
850
+ {
851
+ Iterate<DIM> ( [&] (size_t ii) {
852
+ *pdshapes = shape.DValue(ii);
853
+ pdshapes += dist;
854
+ });
855
+ }));
856
+ }
857
+ }
858
+ else if (bmir.DimSpace() == DIM+1)
859
+ {
860
+ constexpr int DIM1 = DIM<3 ? DIM+1 : DIM;
861
+ auto & mir = static_cast<const SIMD_MappedIntegrationRule<DIM,DIM1>&> (bmir);
862
+ for (size_t i = 0; i < mir.Size(); i++)
863
+ {
864
+ SIMD<double> * pdshapes = dshapes.Col(i).Data();
865
+ size_t dist = dshapes.Dist();
866
+
867
+ // TIP<DIM,AutoDiff<DIM1,SIMD<double>>> adp = GetTIP(mir[i]);
868
+ T_CalcShape (GetTIP(mir[i]), // adp,
869
+ SBLambda ([&] (size_t j, AutoDiff<DIM1,SIMD<double>> shape)
870
+ {
871
+ /*
872
+ Iterate<DIM1> ( [&] (size_t ii) {
873
+ *pdshapes = shape.DValue(ii);
874
+ pdshapes += dist;
875
+ });
876
+ */
877
+ for (size_t k = 0; k < DIM1; k++)
878
+ {
879
+ *pdshapes = shape.DValue(k);
880
+ pdshapes += dist;
881
+ }
882
+ }));
883
+ }
884
+ }
885
+ else
886
+ {
887
+ cout << "EvaluateGrad(simd) called for bboundary (not implemented)" << endl;
888
+ }
889
+ }
890
+
891
+
892
+ template <class FEL, ELEMENT_TYPE ET, class BASE>
893
+ void T_ScalarFiniteElement<FEL,ET,BASE> ::
894
+ CalcDDShape (const IntegrationPoint & ip,
895
+ BareSliceMatrix<> ddshape) const
896
+ {
897
+ TIP<DIM, AutoDiff<DIM>> t1 = ip;
898
+ TIP<DIM, AutoDiffDiff<DIM>> tip = t1;
899
+
900
+ T_CalcShape (tip,
901
+ SBLambda ([ddshape] (size_t i, auto shape)
902
+ {
903
+ auto row = ddshape.Row(i);
904
+ for (int d1 = 0; d1 < DIM; d1++)
905
+ for (int d2 = 0; d2 < DIM; d2++)
906
+ row(d1*DIM+d2) = shape.DDValue(d1,d2);
907
+ }));
908
+ }
909
+
910
+
911
+
912
+ template <class FEL, ELEMENT_TYPE ET, class BASE>
913
+ void T_ScalarFiniteElement<FEL,ET,BASE> ::
914
+ CalcMappedDDShape (const BaseMappedIntegrationPoint & bmip,
915
+ BareSliceMatrix<> ddshape) const
916
+ {
917
+ /*
918
+ auto & mip = static_cast<const MappedIntegrationPoint<DIM,DIM>&> (bmip);
919
+ T_CalcShape (GetTIPHesse (mip),
920
+ SBLambda ([ddshape] (size_t i, auto shape)
921
+ {
922
+ auto row = ddshape.Row(i);
923
+ for (int d1 = 0; d1 < DIM; d1++)
924
+ for (int d2 = 0; d2 < DIM; d2++)
925
+ row(d1*DIM+d2) = shape.DDValue(d1,d2);
926
+ }));
927
+ */
928
+ /*
929
+ Iterate<4-DIM>
930
+ ([&](auto CODIM)
931
+ {
932
+ constexpr auto DIMSPACE = DIM+CODIM.value;
933
+ if (bmip.DimSpace() == DIMSPACE)
934
+ */
935
+ Switch<4-DIM>
936
+ (bmip.DimSpace()-DIM, [&] (auto CODIM)
937
+ {
938
+ constexpr int DIM_ = DIM;
939
+ constexpr int DIMSPACE = int(DIM)+int(CODIM.value);
940
+ auto & mip = static_cast<const MappedIntegrationPoint<DIM_,DIMSPACE>&> (bmip);
941
+ T_CalcShape (GetTIPHesse (mip),
942
+ SBLambda ([ddshape,DIMSPACE] (size_t i, auto shape)
943
+ {
944
+ auto row = ddshape.Row(i);
945
+ for (int d1 = 0; d1 < DIMSPACE; d1++)
946
+ for (int d2 = 0; d2 < DIMSPACE; d2++)
947
+ row(d1*DIMSPACE+d2) = shape.DDValue(d1,d2);
948
+ }));
949
+ });
950
+ }
951
+
952
+
953
+
954
+ template <class FEL, ELEMENT_TYPE ET, class BASE>
955
+ void T_ScalarFiniteElement<FEL,ET,BASE> ::
956
+ CalcMappedDDShape (const SIMD<BaseMappedIntegrationPoint> & bmip,
957
+ BareSliceMatrix<SIMD<double>> ddshape) const
958
+ {
959
+ /*
960
+ Iterate<4-DIM>
961
+ ([&](auto CODIM)
962
+ {
963
+ constexpr auto DIMSPACE = DIM+CODIM.value;
964
+ if (bmip.DimSpace() == DIMSPACE)
965
+ {
966
+ */
967
+
968
+ Switch<4-DIM>
969
+ (bmip.DimSpace()-DIM, [&] (auto CODIM)
970
+ {
971
+ constexpr int DIMSPACE = DIM+CODIM.value;
972
+
973
+ auto & mip = static_cast<const SIMD<MappedIntegrationPoint<DIM,DIMSPACE>>&> (bmip);
974
+ T_CalcShape (GetTIPHesse (mip),
975
+ SBLambda ([ddshape,DIMSPACE] (size_t i, auto shape)
976
+ {
977
+ auto row = ddshape.Row(i);
978
+ for (int d1 = 0; d1 < DIMSPACE; d1++)
979
+ for (int d2 = 0; d2 < DIMSPACE; d2++)
980
+ row(d1*DIMSPACE+d2) = shape.DDValue(d1,d2);
981
+ }));
982
+ });
983
+ }
984
+
985
+
986
+
987
+
988
+ #endif
989
+
990
+ template <class FEL, ELEMENT_TYPE ET, class BASE>
991
+ bool T_ScalarFiniteElement<FEL,ET,BASE> :: GetDiagDualityMassInverse (FlatVector<> diag) const
992
+ {
993
+ return static_cast<const FEL*>(this)->GetDiagDualityMassInverse2(diag);
994
+ }
995
+
996
+
997
+
998
+ template <class FEL, ELEMENT_TYPE ET, class BASE>
999
+ void T_ScalarFiniteElement<FEL,ET,BASE> ::
1000
+ CalcDualShape (const BaseMappedIntegrationPoint & mip, BareSliceVector<> shape) const
1001
+ {
1002
+ // static_cast<const FEL*>(this) -> CalcDualShape2 (mip, shape);
1003
+ /*
1004
+ try
1005
+ {
1006
+ static_cast<const FEL*>(this) -> CalcDualShape2 (mip, shape);
1007
+ }
1008
+ catch (const Exception& e)
1009
+ {
1010
+ double imeas = 1.0/mip.GetMeasure();
1011
+ shape = 0.0;
1012
+ static_cast<const FEL*> (this)->
1013
+ T_CalcDualShape (GetTIP<DIM>(mip.IP()), SBLambda ( [&](int j, double val) { shape(j) = imeas * val; }));
1014
+ }
1015
+ */
1016
+ double imeas = 1.0/mip.GetMeasure();
1017
+ shape.Range(ndof) = 0.0;
1018
+ static_cast<const FEL*> (this)->
1019
+ T_CalcDualShape (GetTIP<DIM>(mip.IP()), SBLambda ( [&](int j, double val) { shape(j) = imeas * val; }));
1020
+ }
1021
+
1022
+
1023
+
1024
+
1025
+ }
1026
+
1027
+
1028
+
1029
+ #endif