ngsolve 6.2.2506.post75.dev0__cp314-cp314-manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ngsolve might be problematic. Click here for more details.

Files changed (303) hide show
  1. ngsolve-6.2.2506.post75.dev0.data/data/bin/ngs_nvcc +14 -0
  2. ngsolve-6.2.2506.post75.dev0.data/data/bin/ngs_nvlink +14 -0
  3. ngsolve-6.2.2506.post75.dev0.data/data/bin/ngscxx +15 -0
  4. ngsolve-6.2.2506.post75.dev0.data/data/bin/ngsld +11 -0
  5. ngsolve-6.2.2506.post75.dev0.data/data/bin/ngsolve.tcl +648 -0
  6. ngsolve-6.2.2506.post75.dev0.data/data/bin/ngspy +2 -0
  7. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/analytic_integrals.hpp +10 -0
  8. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/arnoldi.hpp +55 -0
  9. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/bandmatrix.hpp +334 -0
  10. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/basematrix.hpp +957 -0
  11. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/basevector.hpp +1268 -0
  12. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/bdbequations.hpp +2805 -0
  13. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/bdbintegrator.hpp +1660 -0
  14. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/bem_diffops.hpp +475 -0
  15. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/bessel.hpp +1064 -0
  16. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/bilinearform.hpp +963 -0
  17. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/bla.hpp +29 -0
  18. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/blockalloc.hpp +95 -0
  19. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/blockjacobi.hpp +328 -0
  20. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/bspline.hpp +116 -0
  21. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/calcinverse.hpp +141 -0
  22. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/cg.hpp +368 -0
  23. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/chebyshev.hpp +44 -0
  24. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/cholesky.hpp +720 -0
  25. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/clapack.h +7254 -0
  26. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/code_generation.hpp +296 -0
  27. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/coefficient.hpp +2033 -0
  28. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/coefficient_impl.hpp +19 -0
  29. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/coefficient_stdmath.hpp +167 -0
  30. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/commutingAMG.hpp +106 -0
  31. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/comp.hpp +79 -0
  32. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/compatibility.hpp +41 -0
  33. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/complex_wrapper.hpp +73 -0
  34. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/compressedfespace.hpp +110 -0
  35. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/contact.hpp +235 -0
  36. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/cuda_linalg.hpp +175 -0
  37. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/cuda_ngbla.hpp +226 -0
  38. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/cuda_ngstd.hpp +527 -0
  39. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/cuda_profiler.hpp +240 -0
  40. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/diagonalmatrix.hpp +154 -0
  41. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/differentialoperator.hpp +276 -0
  42. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/diffop.hpp +1286 -0
  43. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/diffop_impl.hpp +328 -0
  44. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/diffopwithfactor.hpp +123 -0
  45. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/discontinuous.hpp +84 -0
  46. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/dump.hpp +949 -0
  47. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/ectypes.hpp +121 -0
  48. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/eigen.hpp +60 -0
  49. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/eigensystem.hpp +18 -0
  50. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/elasticity_equations.hpp +595 -0
  51. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/elementbyelement.hpp +195 -0
  52. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/elementtopology.hpp +1760 -0
  53. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/elementtransformation.hpp +339 -0
  54. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/evalfunc.hpp +405 -0
  55. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/expr.hpp +1686 -0
  56. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/facetfe.hpp +175 -0
  57. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/facetfespace.hpp +180 -0
  58. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/facethofe.hpp +111 -0
  59. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/facetsurffespace.hpp +112 -0
  60. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/fe_interfaces.hpp +32 -0
  61. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/fem.hpp +87 -0
  62. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/fesconvert.hpp +14 -0
  63. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/fespace.hpp +1449 -0
  64. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/finiteelement.hpp +286 -0
  65. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/globalinterfacespace.hpp +77 -0
  66. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/globalspace.hpp +115 -0
  67. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/gridfunction.hpp +525 -0
  68. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/h1amg.hpp +124 -0
  69. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/h1hofe.hpp +188 -0
  70. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/h1hofe_impl.hpp +1262 -0
  71. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/h1hofefo.hpp +148 -0
  72. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/h1hofefo_impl.hpp +185 -0
  73. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/h1hofespace.hpp +167 -0
  74. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/h1lofe.hpp +1240 -0
  75. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/h1lumping.hpp +41 -0
  76. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/hcurl_equations.hpp +1381 -0
  77. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/hcurlcurlfe.hpp +2241 -0
  78. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/hcurlcurlfespace.hpp +78 -0
  79. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/hcurlfe.hpp +259 -0
  80. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/hcurlfe_utils.hpp +107 -0
  81. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/hcurlhdiv_dshape.hpp +857 -0
  82. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/hcurlhdivfes.hpp +308 -0
  83. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/hcurlhofe.hpp +175 -0
  84. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/hcurlhofe_impl.hpp +1871 -0
  85. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/hcurlhofespace.hpp +193 -0
  86. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/hcurllofe.hpp +1146 -0
  87. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/hdiv_equations.hpp +880 -0
  88. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/hdivdivfe.hpp +2923 -0
  89. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/hdivdivsurfacespace.hpp +76 -0
  90. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/hdivfe.hpp +206 -0
  91. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/hdivfe_utils.hpp +717 -0
  92. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/hdivfes.hpp +75 -0
  93. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/hdivhofe.hpp +447 -0
  94. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/hdivhofe_impl.hpp +1107 -0
  95. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/hdivhofefo.hpp +229 -0
  96. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/hdivhofespace.hpp +177 -0
  97. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/hdivhosurfacefespace.hpp +106 -0
  98. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/hdivlofe.hpp +773 -0
  99. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/hidden.hpp +74 -0
  100. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/householder.hpp +181 -0
  101. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/hypre_ams_precond.hpp +123 -0
  102. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/hypre_precond.hpp +73 -0
  103. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/integrator.hpp +2012 -0
  104. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/integratorcf.hpp +253 -0
  105. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/interpolate.hpp +49 -0
  106. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/intrule.hpp +2542 -0
  107. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/intrules_SauterSchwab.hpp +25 -0
  108. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/irspace.hpp +49 -0
  109. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/jacobi.hpp +153 -0
  110. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/kernels.hpp +762 -0
  111. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/l2hofe.hpp +194 -0
  112. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/l2hofe_impl.hpp +564 -0
  113. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/l2hofefo.hpp +542 -0
  114. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/l2hofespace.hpp +344 -0
  115. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/la.hpp +38 -0
  116. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/linalg_kernels.hpp +102 -0
  117. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/linearform.hpp +266 -0
  118. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/matrix.hpp +2140 -0
  119. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/memusage.hpp +41 -0
  120. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/meshaccess.hpp +1359 -0
  121. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/mgpre.hpp +204 -0
  122. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/mp_coefficient.hpp +145 -0
  123. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/mptools.hpp +2281 -0
  124. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/multigrid.hpp +42 -0
  125. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/multivector.hpp +447 -0
  126. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/mumpsinverse.hpp +187 -0
  127. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/mycomplex.hpp +361 -0
  128. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/ng_lapack.hpp +1661 -0
  129. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/ngblas.hpp +1232 -0
  130. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/ngs_defines.hpp +30 -0
  131. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/ngs_stdcpp_include.hpp +106 -0
  132. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/ngs_utils.hpp +121 -0
  133. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/ngsobject.hpp +1019 -0
  134. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/ngsstream.hpp +113 -0
  135. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/ngstd.hpp +72 -0
  136. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/nodalhofe.hpp +96 -0
  137. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/nodalhofe_impl.hpp +141 -0
  138. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/normalfacetfe.hpp +223 -0
  139. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/normalfacetfespace.hpp +98 -0
  140. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/normalfacetsurfacefespace.hpp +84 -0
  141. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/order.hpp +251 -0
  142. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/parallel_matrices.hpp +222 -0
  143. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/paralleldofs.hpp +340 -0
  144. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/parallelngs.hpp +23 -0
  145. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/parallelvector.hpp +269 -0
  146. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/pardisoinverse.hpp +200 -0
  147. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/periodic.hpp +129 -0
  148. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/plateaufespace.hpp +25 -0
  149. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/pml.hpp +275 -0
  150. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/pmltrafo.hpp +631 -0
  151. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/postproc.hpp +142 -0
  152. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/potentialtools.hpp +22 -0
  153. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/precomp.hpp +60 -0
  154. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/preconditioner.hpp +602 -0
  155. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/prolongation.hpp +377 -0
  156. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/python_comp.hpp +107 -0
  157. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/python_fem.hpp +89 -0
  158. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/python_linalg.hpp +58 -0
  159. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/python_ngstd.hpp +386 -0
  160. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/recursive_pol.hpp +4896 -0
  161. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/recursive_pol_tet.hpp +395 -0
  162. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/recursive_pol_trig.hpp +492 -0
  163. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/reorderedfespace.hpp +81 -0
  164. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/sample_sort.hpp +105 -0
  165. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/scalarfe.hpp +335 -0
  166. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/shapefunction_utils.hpp +113 -0
  167. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/simd_complex.hpp +329 -0
  168. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/smoother.hpp +253 -0
  169. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/solve.hpp +89 -0
  170. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/sparsecholesky.hpp +313 -0
  171. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/sparsematrix.hpp +1038 -0
  172. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/sparsematrix_dyn.hpp +90 -0
  173. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/sparsematrix_impl.hpp +1013 -0
  174. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/special_matrix.hpp +463 -0
  175. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/specialelement.hpp +125 -0
  176. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/statushandler.hpp +33 -0
  177. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/stringops.hpp +12 -0
  178. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/superluinverse.hpp +136 -0
  179. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/symbolicintegrator.hpp +850 -0
  180. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/symmetricmatrix.hpp +144 -0
  181. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/tangentialfacetfe.hpp +224 -0
  182. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/tangentialfacetfespace.hpp +91 -0
  183. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/tensor.hpp +522 -0
  184. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/tensorcoefficient.hpp +446 -0
  185. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/tensorproductintegrator.hpp +113 -0
  186. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/thcurlfe.hpp +128 -0
  187. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/thcurlfe_impl.hpp +380 -0
  188. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/thdivfe.hpp +80 -0
  189. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/thdivfe_impl.hpp +492 -0
  190. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/tpdiffop.hpp +461 -0
  191. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/tpfes.hpp +133 -0
  192. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/tpintrule.hpp +224 -0
  193. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/triangular.hpp +465 -0
  194. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/tscalarfe.hpp +245 -0
  195. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/tscalarfe_impl.hpp +1029 -0
  196. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/umfpackinverse.hpp +148 -0
  197. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/unifiedvector.hpp +103 -0
  198. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/vector.hpp +1273 -0
  199. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/voxelcoefficientfunction.hpp +41 -0
  200. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/vtkoutput.hpp +198 -0
  201. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/vvector.hpp +208 -0
  202. ngsolve-6.2.2506.post75.dev0.data/data/include/netgen/webgui.hpp +92 -0
  203. ngsolve-6.2.2506.post75.dev0.data/data/lib/cmake/ngsolve/NGSolveConfig.cmake +102 -0
  204. ngsolve-6.2.2506.post75.dev0.data/data/lib/cmake/ngsolve/ngsolve-targets-release.cmake +89 -0
  205. ngsolve-6.2.2506.post75.dev0.data/data/lib/cmake/ngsolve/ngsolve-targets.cmake +173 -0
  206. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/netgen_mesher.libs/libngbla.so +0 -0
  207. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/netgen_mesher.libs/libngcomp.so +0 -0
  208. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/netgen_mesher.libs/libngfem.so +0 -0
  209. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/netgen_mesher.libs/libngla.so +0 -0
  210. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/netgen_mesher.libs/libngsbem.so +0 -0
  211. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/netgen_mesher.libs/libngscudalib.so +0 -0
  212. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/netgen_mesher.libs/libngsolve.so +0 -0
  213. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/netgen_mesher.libs/libngstd.so +0 -0
  214. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/ngsolve/TensorProductTools.py +210 -0
  215. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/ngsolve/__console.py +94 -0
  216. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/ngsolve/__expr.py +181 -0
  217. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/ngsolve/__init__.py +148 -0
  218. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/ngsolve/_scikit_build_core_dependencies.py +30 -0
  219. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/ngsolve/bvp.py +78 -0
  220. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/ngsolve/config/__init__.py +1 -0
  221. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/ngsolve/config/__main__.py +4 -0
  222. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/ngsolve/config/config.py +60 -0
  223. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/TensorProduct/__init__.py +0 -0
  224. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/TensorProduct/tp_dg_1d_1d.py +80 -0
  225. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/TensorProduct/tp_dg_1d_2d.py +73 -0
  226. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/TensorProduct/tp_dg_2d_1d.py +72 -0
  227. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/TensorProduct/tp_dg_2d_2d.py +66 -0
  228. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/__init__.py +0 -0
  229. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/howto/__init__.py +0 -0
  230. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/howto/hhj.py +44 -0
  231. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/howto/hybrid_dg.py +53 -0
  232. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/howto/mixed.py +30 -0
  233. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/howto/nonlin.py +29 -0
  234. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/howto/pickling.py +26 -0
  235. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/howto/pml.py +31 -0
  236. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/howto/taskmanager.py +20 -0
  237. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/howto/tdnns.py +47 -0
  238. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/howto/timeDG-skeleton.py +45 -0
  239. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/howto/timeDG.py +38 -0
  240. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/howto/timeDGlap.py +42 -0
  241. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/howto/timeDGwave.py +61 -0
  242. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/intro/__init__.py +0 -0
  243. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/intro/adaptive.py +123 -0
  244. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/intro/cmagnet.py +59 -0
  245. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/intro/elasticity.py +76 -0
  246. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/intro/navierstokes.py +74 -0
  247. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/intro/poisson.ipynb +170 -0
  248. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/intro/poisson.py +41 -0
  249. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/mpi/__init__.py +0 -0
  250. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/mpi/mpi_cmagnet.py +87 -0
  251. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/mpi/mpi_navierstokes.py +117 -0
  252. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/mpi/mpi_poisson.py +89 -0
  253. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/ngsolve/demos/mpi/mpi_timeDG.py +82 -0
  254. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/ngsolve/directsolvers.py +26 -0
  255. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/ngsolve/eigenvalues.py +364 -0
  256. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/ngsolve/internal.py +89 -0
  257. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/ngsolve/krylovspace.py +1013 -0
  258. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/ngsolve/meshes.py +748 -0
  259. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/ngsolve/ngs2petsc.py +310 -0
  260. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/ngsolve/ngscuda.so +0 -0
  261. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/ngsolve/ngscxx.py +42 -0
  262. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/ngsolve/ngslib.so +0 -0
  263. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/ngsolve/nonlinearsolvers.py +203 -0
  264. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/ngsolve/preconditioners.py +11 -0
  265. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/ngsolve/solve_implementation.py +168 -0
  266. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/ngsolve/solvers.py +7 -0
  267. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/ngsolve/timestepping.py +185 -0
  268. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/ngsolve/timing.py +108 -0
  269. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/ngsolve/utils.py +167 -0
  270. ngsolve-6.2.2506.post75.dev0.data/data/lib/python3.14/site-packages/ngsolve/webgui.py +670 -0
  271. ngsolve-6.2.2506.post75.dev0.data/data/share/ngsolve/beam.geo +17 -0
  272. ngsolve-6.2.2506.post75.dev0.data/data/share/ngsolve/beam.vol +240 -0
  273. ngsolve-6.2.2506.post75.dev0.data/data/share/ngsolve/chip.in2d +41 -0
  274. ngsolve-6.2.2506.post75.dev0.data/data/share/ngsolve/chip.vol +614 -0
  275. ngsolve-6.2.2506.post75.dev0.data/data/share/ngsolve/coil.geo +12 -0
  276. ngsolve-6.2.2506.post75.dev0.data/data/share/ngsolve/coil.vol +2560 -0
  277. ngsolve-6.2.2506.post75.dev0.data/data/share/ngsolve/coilshield.geo +24 -0
  278. ngsolve-6.2.2506.post75.dev0.data/data/share/ngsolve/coilshield.vol +3179 -0
  279. ngsolve-6.2.2506.post75.dev0.data/data/share/ngsolve/cube.geo +19 -0
  280. ngsolve-6.2.2506.post75.dev0.data/data/share/ngsolve/cube.vol +1832 -0
  281. ngsolve-6.2.2506.post75.dev0.data/data/share/ngsolve/d10_DGdoubleglazing.pde +50 -0
  282. ngsolve-6.2.2506.post75.dev0.data/data/share/ngsolve/d11_chip_nitsche.pde +40 -0
  283. ngsolve-6.2.2506.post75.dev0.data/data/share/ngsolve/d1_square.pde +43 -0
  284. ngsolve-6.2.2506.post75.dev0.data/data/share/ngsolve/d2_chip.pde +35 -0
  285. ngsolve-6.2.2506.post75.dev0.data/data/share/ngsolve/d3_helmholtz.pde +22 -0
  286. ngsolve-6.2.2506.post75.dev0.data/data/share/ngsolve/d4_cube.pde +46 -0
  287. ngsolve-6.2.2506.post75.dev0.data/data/share/ngsolve/d5_beam.pde +74 -0
  288. ngsolve-6.2.2506.post75.dev0.data/data/share/ngsolve/d6_shaft.pde +73 -0
  289. ngsolve-6.2.2506.post75.dev0.data/data/share/ngsolve/d7_coil.pde +50 -0
  290. ngsolve-6.2.2506.post75.dev0.data/data/share/ngsolve/d8_coilshield.pde +49 -0
  291. ngsolve-6.2.2506.post75.dev0.data/data/share/ngsolve/d9_hybridDG.pde +72 -0
  292. ngsolve-6.2.2506.post75.dev0.data/data/share/ngsolve/doubleglazing.in2d +27 -0
  293. ngsolve-6.2.2506.post75.dev0.data/data/share/ngsolve/doubleglazing.vol +737 -0
  294. ngsolve-6.2.2506.post75.dev0.data/data/share/ngsolve/piezo2d40round4.vol.gz +0 -0
  295. ngsolve-6.2.2506.post75.dev0.data/data/share/ngsolve/shaft.geo +73 -0
  296. ngsolve-6.2.2506.post75.dev0.data/data/share/ngsolve/shaft.vol +4291 -0
  297. ngsolve-6.2.2506.post75.dev0.data/data/share/ngsolve/square.in2d +17 -0
  298. ngsolve-6.2.2506.post75.dev0.data/data/share/ngsolve/square.vol +149 -0
  299. ngsolve-6.2.2506.post75.dev0.dist-info/METADATA +14 -0
  300. ngsolve-6.2.2506.post75.dev0.dist-info/RECORD +303 -0
  301. ngsolve-6.2.2506.post75.dev0.dist-info/WHEEL +5 -0
  302. ngsolve-6.2.2506.post75.dev0.dist-info/licenses/LICENSE +504 -0
  303. ngsolve-6.2.2506.post75.dev0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,1232 @@
1
+ #ifndef FILE_NGBLAS
2
+ #define FILE_NGBLAS
3
+
4
+ // optimized matrix kernels
5
+
6
+ #if defined(__clang__ ) && defined(NETGEN_ARCH_AMD64)
7
+ #define REGCALL __regcall
8
+ #else
9
+ #define REGCALL
10
+ #endif
11
+
12
+ /*
13
+ namespace ngcore
14
+ {
15
+ template <int S>
16
+ INLINE auto Range (IC<S> s)
17
+ {
18
+ return IntRange(0,s);
19
+ }
20
+ }
21
+ */
22
+
23
+
24
+ namespace ngbla
25
+ {
26
+
27
+ // ***************************** vector operations **************************
28
+
29
+
30
+ extern NGS_DLL_HEADER void SetVector (double val, FlatVector<double> vec) NETGEN_NOEXCEPT;
31
+ extern NGS_DLL_HEADER void SetVector (Complex val, FlatVector<Complex> vec) NETGEN_NOEXCEPT;
32
+
33
+ extern NGS_DLL_HEADER void SetVector (double val, BareSliceVector<double> dest, size_t size) NETGEN_NOEXCEPT;
34
+
35
+ INLINE void SetVector (double val, SliceVector<double> vec) NETGEN_NOEXCEPT
36
+ {
37
+ SetVector (val, vec, vec.Size());
38
+ }
39
+
40
+ extern NGS_DLL_HEADER void SetVector (Complex val, SliceVector<Complex> vec) NETGEN_NOEXCEPT;
41
+
42
+
43
+
44
+
45
+ template <typename T1, typename T2, typename T1S, typename T2S>
46
+ void CopyVector (LinearVector<T1,T1S> src, LinearVector<T2,T2S> dest) NETGEN_NOEXCEPT
47
+ {
48
+ auto cs = CombinedSize(src.Size(), dest.Size());
49
+ for (size_t i : Range(cs))
50
+ dest[i] = src[i];
51
+ }
52
+
53
+ template <typename T1, typename T2>
54
+ void CopyVector (BareSliceVector<T1> src, BareSliceVector<T2> dest, size_t size) NETGEN_NOEXCEPT
55
+ {
56
+ for (size_t i : Range(size))
57
+ dest[i] = src[i];
58
+ }
59
+
60
+ extern NGS_DLL_HEADER void CopyVector (BareVector<double> src, FlatVector<double> dest) NETGEN_NOEXCEPT;
61
+ extern NGS_DLL_HEADER void CopyVector (BareSliceVector<double> src, BareSliceVector<double> dest, size_t size) NETGEN_NOEXCEPT;
62
+ extern NGS_DLL_HEADER void CopyVector (BareVector<Complex> src, FlatVector<Complex> dest) NETGEN_NOEXCEPT;
63
+ extern NGS_DLL_HEADER void CopyVector (BareSliceVector<Complex> src, BareSliceVector<Complex> dest, size_t size) NETGEN_NOEXCEPT;
64
+
65
+
66
+ template <typename T0, typename T1, typename T2>
67
+ void CopyVector (T0 alpha, BareVector<T1> src, FlatVector<T2> dest) NETGEN_NOEXCEPT
68
+ {
69
+ for (size_t i : Range(dest))
70
+ dest[i] = alpha * src[i];
71
+ }
72
+
73
+ template <typename T0, typename T1, typename T2>
74
+ void CopyVector (T0 alpha, BareSliceVector<T1> src, SliceVector<T2> dest) NETGEN_NOEXCEPT
75
+ {
76
+ for (size_t i : Range(dest))
77
+ dest[i] = alpha * src[i];
78
+ }
79
+
80
+ extern NGS_DLL_HEADER void CopyVector (double alpha, BareVector<double> src, FlatVector<double> dest) NETGEN_NOEXCEPT;
81
+ extern NGS_DLL_HEADER void CopyVector (double alpha, BareSliceVector<double> src, SliceVector<double> dest) NETGEN_NOEXCEPT;
82
+ extern NGS_DLL_HEADER void CopyVector (Complex alpha, BareVector<Complex> src, FlatVector<Complex> dest) NETGEN_NOEXCEPT;
83
+ extern NGS_DLL_HEADER void CopyVector (Complex alpha, BareSliceVector<Complex> src, SliceVector<Complex> dest) NETGEN_NOEXCEPT;
84
+
85
+
86
+
87
+ template <typename T0, typename T1, typename T2>
88
+ void AddVector (T0 alpha, BareVector<const T1> src, FlatVector<T2> dest) NETGEN_NOEXCEPT
89
+ {
90
+ for (size_t i : Range(dest))
91
+ dest[i] += alpha*src[i];
92
+ }
93
+
94
+ template <typename T0, typename T1, typename T2>
95
+ void AddVector (T0 alpha, BareSliceVector<const T1> src, SliceVector<T2> dest) NETGEN_NOEXCEPT
96
+ {
97
+ for (size_t i : Range(dest))
98
+ dest[i] += alpha*src[i];
99
+ }
100
+
101
+ extern NGS_DLL_HEADER void AddVector (double alpha, BareVector<const double> src, FlatVector<double> dest) NETGEN_NOEXCEPT;
102
+ extern NGS_DLL_HEADER void AddVector (double alpha, BareSliceVector<const double> src, BareSliceVector<double> dest, size_t size) NETGEN_NOEXCEPT;
103
+ inline void AddVector (double alpha, BareSliceVector<const double> src, SliceVector<double> dest)
104
+ {
105
+ AddVector (alpha, src, dest, dest.Size());
106
+ }
107
+
108
+
109
+
110
+ // ************************ matrix and matrix-vector ops ****************
111
+
112
+ template <typename TA, typename TB>
113
+ void TransposeMatrix(SliceMatrix<TA> a, SliceMatrix<TB> b)
114
+ {
115
+ b = Trans(a);
116
+ }
117
+
118
+ void TransposeMatrix(SliceMatrix<> a, SliceMatrix<> b);
119
+
120
+
121
+
122
+ typedef void (*pmult_matvec)(BareSliceMatrix<>, FlatVector<>, FlatVector<>) NETGEN_NOEXCEPT;
123
+ extern NGS_DLL_HEADER pmult_matvec dispatch_matvec[26];
124
+
125
+ inline void MultMatVec (BareSliceMatrix<> a, FlatVector<> x, FlatVector<> y) NETGEN_NOEXCEPT
126
+ {
127
+ size_t dsx = min(x.Size(), std::size(dispatch_matvec)-1);
128
+ (*dispatch_matvec[dsx]) (a, x, y);
129
+ }
130
+
131
+
132
+ typedef void (*pmultadd_matvec)(double s, BareSliceMatrix<>, FlatVector<>, FlatVector<>);
133
+ extern NGS_DLL_HEADER pmultadd_matvec dispatch_addmatvec[25];
134
+
135
+ inline void MultAddMatVec (double s, BareSliceMatrix<> a, FlatVector<> x, FlatVector<> y)
136
+ {
137
+ size_t dsx = min(x.Size(), std::size(dispatch_addmatvec)-1);
138
+ (*dispatch_addmatvec[dsx]) (s, a, x, y);
139
+ }
140
+
141
+
142
+ // typedef void (*pmult_mattransvec)(BareSliceMatrix<>, FlatVector<>, FlatVector<>);
143
+ extern NGS_DLL_HEADER pmult_matvec dispatch_mattransvec[13];
144
+ inline void MultMatTransVec (BareSliceMatrix<> a, FlatVector<> x, FlatVector<> y)
145
+ {
146
+ size_t dsx = min(x.Size(), std::size(dispatch_mattransvec)-1);
147
+ (*dispatch_mattransvec[dsx]) (a, x, y);
148
+ }
149
+
150
+ // typedef void (*pmultadd_mattransvec)(double s, BareSliceMatrix<>, FlatVector<>, FlatVector<>);
151
+ extern NGS_DLL_HEADER pmultadd_matvec dispatch_addmattransvec[13];
152
+ inline void MultAddMatTransVec (double s, BareSliceMatrix<> a, FlatVector<> x, FlatVector<> y)
153
+ {
154
+ size_t dsx = min(x.Size(), std::size(dispatch_addmattransvec)-1);
155
+ (*dispatch_addmattransvec[dsx]) (s, a, x, y);
156
+ }
157
+
158
+
159
+ inline void MultAddMatVec (double s, BareSliceMatrix<double, ColMajor> a, FlatVector<> x, FlatVector<> y)
160
+ {
161
+ MultAddMatTransVec (s, Trans(a), x, y);
162
+ }
163
+
164
+
165
+
166
+
167
+ extern NGS_DLL_HEADER void MultAddMatTransVecIndirect_intern
168
+ (double s, BareSliceMatrix<> a, FlatVector<> x, FlatVector<> y, FlatArray<int> ind);
169
+ typedef void (*pmultadd_mattransvecind)(double s, BareSliceMatrix<>, FlatVector<>, FlatVector<>, FlatArray<int>);
170
+ extern NGS_DLL_HEADER pmultadd_mattransvecind dispatch_addmattransvecI[25];
171
+
172
+ inline void MultAddMatTransVecIndirect (double s, BareSliceMatrix<> a,
173
+ FlatVector<> x, FlatVector<> y, FlatArray<int> ind)
174
+ {
175
+ size_t sy = y.Size();
176
+ if (sy <= 24)
177
+ (*dispatch_addmattransvecI[sy]) (s, a, x, y, ind);
178
+ else
179
+ MultAddMatTransVecIndirect_intern (s, a, x, y, ind);
180
+ }
181
+
182
+
183
+
184
+
185
+
186
+
187
+ template <typename TA, typename TB, typename TC>
188
+ INLINE void MultMatMat(SliceMatrix<TA> a, SliceMatrix<TB> b, SliceMatrix<TC> c)
189
+ {
190
+ c = a * b;
191
+ }
192
+ extern NGS_DLL_HEADER void REGCALL MultMatMat_intern (size_t ha, size_t wa, size_t wb,
193
+ BareSliceMatrix<> a, BareSliceMatrix<> b, BareSliceMatrix<> c);
194
+
195
+ typedef void REGCALL (*pmultABW)(size_t, size_t, size_t, BareSliceMatrix<>, BareSliceMatrix<>, BareSliceMatrix<>);
196
+
197
+ extern NGS_DLL_HEADER pmultABW dispatch_multAB[14];
198
+ inline void MultMatMat (SliceMatrix<> a, SliceMatrix<> b, SliceMatrix<> c)
199
+ {
200
+ if (a.Height() == 0 || b.Width() == 0) return;
201
+ size_t wa = std::min(a.Width(), std::size(dispatch_multAB)-1);
202
+ (*dispatch_multAB[wa]) (a.Height(), a.Width(), b.Width(), a, b, c);
203
+ }
204
+
205
+ extern NGS_DLL_HEADER pmultABW dispatch_minusmultAB[14];
206
+ inline void MinusMultAB (SliceMatrix<> a, SliceMatrix<> b, SliceMatrix<> c)
207
+ {
208
+ if (a.Height() == 0 || b.Width() == 0) return;
209
+ /*
210
+ size_t wa = a.Width();
211
+ if (wa >= std::size(dispatch_minusmultAB))
212
+ wa = std::size(dispatch_minusmultAB)-1;
213
+ */
214
+ size_t wa = std::min(a.Width(), std::size(dispatch_minusmultAB)-1);
215
+ (*dispatch_minusmultAB[wa]) (a.Height(), a.Width(), b.Width(), a, b, c);
216
+ }
217
+
218
+ extern NGS_DLL_HEADER pmultABW dispatch_addAB[14];
219
+ inline void AddAB (SliceMatrix<> a, SliceMatrix<> b, SliceMatrix<> c)
220
+ {
221
+ if (a.Height() == 0 || b.Width() == 0) return;
222
+ // size_t wa = a.Width();
223
+ // if (wa >= std::size(dispatch_addAB))
224
+ // wa = std::size(dispatch_addAB)-1;
225
+ size_t wa = std::min(a.Width(), std::size(dispatch_addAB)-1);
226
+ (*dispatch_addAB[wa]) (a.Height(), a.Width(), b.Width(), a, b, c);
227
+ }
228
+
229
+ extern NGS_DLL_HEADER pmultABW dispatch_subAB[14];
230
+ inline void SubAB (SliceMatrix<> a, SliceMatrix<> b, SliceMatrix<> c)
231
+ {
232
+ if (a.Height() == 0 || b.Width() == 0) return;
233
+ // size_t wa = a.Width();
234
+ // if (wa >= std::size(dispatch_subAB))
235
+ // wa = std::size(dispatch_subAB)-1;
236
+ size_t wa = std::min(a.Width(), std::size(dispatch_subAB)-1);
237
+ (*dispatch_subAB[wa]) (a.Height(), a.Width(), b.Width(), a, b, c);
238
+ }
239
+
240
+
241
+ extern NGS_DLL_HEADER void MultMatMat_intern (size_t ha, size_t wa, size_t wb,
242
+ BareSliceMatrix<> a, BareSliceMatrix<SIMD<double>> b, BareSliceMatrix<SIMD<double>> c);
243
+
244
+ inline void MultMatMat (SliceMatrix<> a, SliceMatrix<SIMD<double>> b, SliceMatrix<SIMD<double>> c)
245
+ {
246
+ MultMatMat_intern (a.Height(), a.Width(), b.Width(), a, b, c);
247
+ }
248
+
249
+
250
+
251
+
252
+
253
+ template <bool ADD, bool POS>
254
+ struct NGS_DLL_HEADER dispatch_atb { static pmultABW ptrs[14]; };
255
+
256
+ template <bool ADD, bool POS>
257
+ inline void MatMat_AtB (SliceMatrix<double> a, SliceMatrix<double> b, BareSliceMatrix<double> c)
258
+ {
259
+ if (a.Height() == 0 || b.Width() == 0) return;
260
+ /*
261
+ size_t wa = a.Width();
262
+ if (wa >= std::size(dispatch_atb<ADD,POS>::ptrs))
263
+ wa = std::size(dispatch_atb<ADD,POS>::ptrs)-1;
264
+ */
265
+ size_t wa = std::min(a.Width(), std::size(dispatch_atb<ADD,POS>::ptrs)-1);
266
+ (*dispatch_atb<ADD,POS>::ptrs[wa]) (a.Height(), a.Width(), b.Width(), a, b, c);
267
+ }
268
+
269
+ inline void MultAtB (SliceMatrix<double> a, SliceMatrix<double> b, BareSliceMatrix<double> c)
270
+ { MatMat_AtB<false,true> (a, b, c); }
271
+
272
+
273
+
274
+
275
+
276
+ //extern NGS_DLL_HEADER void MultABt (SliceMatrix<double> a, SliceMatrix<double> b, BareSliceMatrix<double> c);
277
+
278
+ typedef void REGCALL (*pfunc_abt)(size_t, size_t, BareSliceMatrix<>, BareSliceMatrix<>, BareSliceMatrix<>);
279
+ extern NGS_DLL_HEADER pfunc_abt dispatch_abt[25];
280
+ extern NGS_DLL_HEADER pfunc_abt dispatch_addabt[25];
281
+
282
+ extern NGS_DLL_HEADER void MultABt_intern (SliceMatrix<double> a, SliceMatrix<double> b, BareSliceMatrix<double> c);
283
+ extern NGS_DLL_HEADER void AddABt_intern (SliceMatrix<double> a, SliceMatrix<double> b, BareSliceMatrix<double> c);
284
+
285
+ inline void MultABt (SliceMatrix<double> a, SliceMatrix<double> b, BareSliceMatrix<double> c)
286
+ {
287
+ size_t wa = a.Width();
288
+ if (wa <= 24)
289
+ (*dispatch_abt[wa]) (a.Height(), b.Height(), a, b, c);
290
+ else
291
+ MultABt_intern (a,b,c);
292
+ }
293
+
294
+ inline void AddABt (SliceMatrix<double> a, SliceMatrix<double> b, BareSliceMatrix<double> c)
295
+ {
296
+ size_t wa = a.Width();
297
+ if (wa <= 24)
298
+ (*dispatch_addabt[wa]) (a.Height(), b.Height(), a, b, c);
299
+ else
300
+ AddABt_intern (a,b,c);
301
+ }
302
+
303
+
304
+ extern NGS_DLL_HEADER void MinusMultABt (SliceMatrix<double> a, SliceMatrix<double> b, BareSliceMatrix<double> c);
305
+ extern NGS_DLL_HEADER void SubABt (SliceMatrix<double> a, SliceMatrix<double> b, BareSliceMatrix<double> c);
306
+
307
+ extern NGS_DLL_HEADER void AddABt (SliceMatrix<SIMD<double>> a, SliceMatrix<SIMD<double>> b, BareSliceMatrix<double> c);
308
+ extern NGS_DLL_HEADER void SubABt (SliceMatrix<SIMD<double>> a, SliceMatrix<SIMD<double>> b, BareSliceMatrix<double> c);
309
+
310
+
311
+ // copied from symbolicintegrator, needs some rework
312
+ extern NGS_DLL_HEADER void AddABtSym (SliceMatrix<double> a, SliceMatrix<double> b, BareSliceMatrix<double> c);
313
+ extern NGS_DLL_HEADER void AddABtSym (SliceMatrix<SIMD<double>> a, SliceMatrix<SIMD<double>> b, BareSliceMatrix<double> c);
314
+
315
+ extern NGS_DLL_HEADER void AddABt (FlatMatrix<SIMD<Complex>> a, FlatMatrix<SIMD<Complex>> b, SliceMatrix<Complex> c);
316
+ extern NGS_DLL_HEADER void AddABtSym (FlatMatrix<SIMD<Complex>> a, FlatMatrix<SIMD<Complex>> b, SliceMatrix<Complex> c);
317
+ extern NGS_DLL_HEADER void AddABt (SliceMatrix<SIMD<double>> a, SliceMatrix<SIMD<Complex>> b, SliceMatrix<Complex> c);
318
+ extern NGS_DLL_HEADER void AddABt (SliceMatrix<SIMD<Complex>> a, SliceMatrix<SIMD<double>> b, SliceMatrix<Complex> c);
319
+
320
+ extern NGS_DLL_HEADER void AddABtSym (FlatMatrix<SIMD<double>> a,
321
+ FlatMatrix<SIMD<Complex>> b,
322
+ SliceMatrix<Complex> c);
323
+ extern NGS_DLL_HEADER void AddABt (FlatMatrix<SIMD<double>> a,
324
+ FlatMatrix<SIMD<double>> b,
325
+ SliceMatrix<Complex> c);
326
+
327
+ extern NGS_DLL_HEADER void AddABtSym (FlatMatrix<SIMD<double>> a,
328
+ FlatMatrix<SIMD<double>> b,
329
+ SliceMatrix<Complex> c);
330
+
331
+ extern NGS_DLL_HEADER void AddABt (SliceMatrix<double> a,
332
+ SliceMatrix<double> b,
333
+ SliceMatrix<Complex> c);
334
+
335
+ extern NGS_DLL_HEADER void AddABtSym (SliceMatrix<double> a,
336
+ SliceMatrix<double> b,
337
+ SliceMatrix<Complex> c);
338
+
339
+
340
+
341
+ extern NGS_DLL_HEADER void AddABt (SliceMatrix<double> a, SliceMatrix<Complex> b, BareSliceMatrix<Complex> c);
342
+ extern NGS_DLL_HEADER void AddABt (SliceMatrix<Complex> a, SliceMatrix<Complex> b, BareSliceMatrix<Complex> c);
343
+ extern NGS_DLL_HEADER void SubABt (SliceMatrix<Complex> a, SliceMatrix<Complex> b, BareSliceMatrix<Complex> c);
344
+
345
+ extern NGS_DLL_HEADER void AddABtSym (SliceMatrix<double> a, SliceMatrix<Complex> b, BareSliceMatrix<Complex> c);
346
+ extern NGS_DLL_HEADER void AddABtSym (SliceMatrix<Complex> a, SliceMatrix<Complex> b, BareSliceMatrix<Complex> c);
347
+
348
+
349
+ extern NGS_DLL_HEADER
350
+ void ScaleCols (SliceMatrix<double,RowMajor> a, BareSliceVector<double> diag);
351
+ extern NGS_DLL_HEADER
352
+ void ScaleCols (SliceMatrix<double,ColMajor> a, BareSliceVector<double> diag);
353
+
354
+ template <ORDERING ord>
355
+ INLINE void ScaleRows (SliceMatrix<double,ord> a, BareSliceVector<double> diag)
356
+ {
357
+ ScaleCols (Trans(a), diag);
358
+ }
359
+
360
+
361
+
362
+ // for Cholesky and SparseCholesky
363
+ extern NGS_DLL_HEADER
364
+ void SubADBt (SliceMatrix<double> a,
365
+ SliceVector<double> diag,
366
+ SliceMatrix<double> b, SliceMatrix<double> c);
367
+
368
+ extern NGS_DLL_HEADER
369
+ void SubAtDB (SliceMatrix<double> a,
370
+ SliceVector<double> diag,
371
+ SliceMatrix<double> b, SliceMatrix<double> c);
372
+
373
+ extern NGS_DLL_HEADER
374
+ void SubAtDB (SliceMatrix<Complex> a,
375
+ SliceVector<Complex> diag,
376
+ SliceMatrix<Complex> b, SliceMatrix<Complex> c);
377
+
378
+ template <typename T>
379
+ void SubADBt (SliceMatrix<T,ColMajor> a,
380
+ SliceVector<T> diag,
381
+ SliceMatrix<T,ColMajor> b, SliceMatrix<T,ColMajor> c)
382
+ {
383
+ SubAtDB (Trans(b), diag, Trans(a), Trans(c));
384
+ }
385
+
386
+
387
+
388
+
389
+
390
+ // MultiVector operations:
391
+
392
+ // ip(i,j) = InnerProduct(x_i, y_j)
393
+ extern NGS_DLL_HEADER
394
+ void PairwiseInnerProduct (size_t n, FlatArray<double*> x, FlatArray<double*> y, BareSliceMatrix<double> ip);
395
+
396
+ extern NGS_DLL_HEADER
397
+ void PairwiseInnerProduct (size_t n, FlatArray<Complex*> x, FlatArray<Complex*> y, BareSliceMatrix<Complex> ip, bool conj);
398
+
399
+
400
+ // x_i += sum_j a(i,j) y_j
401
+ extern NGS_DLL_HEADER
402
+ void MultiVectorAdd (size_t n, FlatArray<double*> x, FlatArray<double*> y, BareSliceMatrix<double> a);
403
+
404
+ extern NGS_DLL_HEADER
405
+ void MultiVectorAdd (size_t n, FlatArray<Complex*> x, FlatArray<Complex*> y, BareSliceMatrix<Complex> a);
406
+
407
+
408
+
409
+
410
+
411
+
412
+
413
+
414
+ template <bool ADD, bool POS, ORDERING orda, ORDERING ordb>
415
+ void NgGEMM (SliceMatrix<double,orda> a, SliceMatrix<double, ordb> b, SliceMatrix<double> c);
416
+
417
+ template <bool ADD, bool POS, ORDERING orda, ORDERING ordb>
418
+ void NgGEMM (SliceMatrix<double,orda> a, SliceMatrix<double, ordb> b, SliceMatrix<double,ColMajor> c);
419
+
420
+
421
+
422
+
423
+
424
+ // ADD/POS
425
+ // f f C = -A*B
426
+ // f t C = A*B
427
+ // t f C -= A*B
428
+ // t t C += A*B
429
+
430
+ template <bool ADD, bool POS, ORDERING orda, ORDERING ordb>
431
+ inline void NgGEMM (SliceMatrix<double,orda> a, SliceMatrix<double, ordb> b, SliceMatrix<double> c)
432
+ {
433
+ // static Timer t("generic MM, add/pos/ord="+ToString(ADD)+ToString(POS)+ToString(orda)+ToString(ordb));
434
+ // RegionTimer r(t);
435
+
436
+ // static Timer t("NgGEMM unresolved" + ToString(ADD) + ToString(POS) + ToString(orda) + ToString(ordb));
437
+ // RegionTimer reg(t);
438
+ // NgProfiler::AddThreadFlops (t, TaskManager::GetThreadId(), a.Height()*a.Width()*b.Height());
439
+
440
+ if (!ADD)
441
+ {
442
+ if (!POS)
443
+ c = -1*a*b;
444
+ else
445
+ c = 1*a*b;
446
+ }
447
+ else
448
+ {
449
+ if (!POS)
450
+ c -= 1*a*b;
451
+ else
452
+ c += 1*a*b;
453
+ }
454
+ }
455
+
456
+ template <> INLINE void NgGEMM<false,true> (SliceMatrix<> a, SliceMatrix<> b, SliceMatrix<> c)
457
+ {
458
+ // static Timer t("NgGEMM MultMatMat");
459
+ // RegionTimer reg(t);
460
+ // NgProfiler::AddThreadFlops (t, TaskManager::GetThreadId(), a.Height()*a.Width()*b.Width());
461
+
462
+ MultMatMat (a,b,c);
463
+ }
464
+
465
+
466
+ // C ??? A * B
467
+
468
+ template <> INLINE void NgGEMM<true,true> (SliceMatrix<> a, SliceMatrix<> b, SliceMatrix<> c)
469
+ {
470
+ // nstatic Timer t("NgGEMM AddAB");
471
+ // RegionTimer reg(t);
472
+ // NgProfiler::AddThreadFlops (t, TaskManager::GetThreadId(), a.Height()*a.Width()*b.Width());
473
+
474
+ AddAB (a,b,c);
475
+ }
476
+
477
+ template <> INLINE void NgGEMM<true,false> (SliceMatrix<> a, SliceMatrix<> b, SliceMatrix<> c)
478
+ {
479
+ // static Timer t("NgGEMM SubAB");
480
+ // RegionTimer reg(t);
481
+ // NgProfiler::AddThreadFlops (t, TaskManager::GetThreadId(), a.Height()*a.Width()*b.Width());
482
+
483
+ SubAB (a,b,c);
484
+ }
485
+
486
+ template <> INLINE void NgGEMM<false,false> (SliceMatrix<> a, SliceMatrix<> b, SliceMatrix<> c)
487
+ {
488
+ // static Timer t("NgGEMM MinusAB");
489
+ // RegionTimer reg(t);
490
+ // NgProfiler::AddThreadFlops (t, TaskManager::GetThreadId(), a.Height()*a.Width()*b.Width());
491
+
492
+ MinusMultAB (a, b, c);
493
+ }
494
+
495
+ // C ??? A * Bt
496
+
497
+ template <> INLINE void NgGEMM<false,false> (SliceMatrix<> a, SliceMatrix<double,ColMajor> b, SliceMatrix<> c)
498
+ {
499
+ // static Timer t("NgGEMM MinusABt");
500
+ // RegionTimer reg(t);
501
+ // NgProfiler::AddThreadFlops (t, TaskManager::GetThreadId(), a.Height()*a.Width()*b.Height());
502
+
503
+ MinusMultABt (a, Trans(b), c);
504
+ }
505
+
506
+ template <> INLINE void NgGEMM<false,true> (SliceMatrix<> a, SliceMatrix<double,ColMajor> b, SliceMatrix<> c)
507
+ {
508
+ // static Timer t("NgGEMM MultABt");
509
+ // RegionTimer reg(t);
510
+ // NgProfiler::AddThreadFlops (t, TaskManager::GetThreadId(), a.Height()*a.Width()*b.Height());
511
+
512
+ MultABt (a, Trans(b), c);
513
+ }
514
+
515
+ template <> INLINE void NgGEMM<true,false> (SliceMatrix<> a, SliceMatrix<double,ColMajor> b, SliceMatrix<> c)
516
+ {
517
+ // static Timer t("NgGEMM SubABt");
518
+ // RegionTimer reg(t);
519
+ // NgProfiler::AddThreadFlops (t, TaskManager::GetThreadId(), a.Height()*a.Width()*b.Height());
520
+
521
+ SubABt (a, Trans(b), BareSliceMatrix<>(c));
522
+ }
523
+
524
+ template <> INLINE void NgGEMM<true,true> (SliceMatrix<> a, SliceMatrix<double,ColMajor> b, SliceMatrix<> c)
525
+ {
526
+ // static Timer t("NgGEMM AddABt");
527
+ // RegionTimer reg(t);
528
+ // NgProfiler::AddThreadFlops (t, TaskManager::GetThreadId(), a.Height()*a.Width()*b.Height());
529
+
530
+ AddABt (a, Trans(b), c);
531
+ }
532
+
533
+
534
+ // C ??? At * B
535
+
536
+ template <> INLINE void NgGEMM<false,true> (SliceMatrix<double,ColMajor> a, SliceMatrix<> b, SliceMatrix<> c)
537
+ {
538
+ // MultAtB (Trans(a), b, c);
539
+ MatMat_AtB<false, true> (Trans(a), b, c);
540
+ }
541
+ template <> INLINE void NgGEMM<true,true> (SliceMatrix<double,ColMajor> a, SliceMatrix<> b, SliceMatrix<> c)
542
+ {
543
+ MatMat_AtB<true, true> (Trans(a), b, c);
544
+ }
545
+ template <> INLINE void NgGEMM<true,false> (SliceMatrix<double,ColMajor> a, SliceMatrix<> b, SliceMatrix<> c)
546
+ {
547
+ MatMat_AtB<true, false> (Trans(a), b, c);
548
+ }
549
+ template <> INLINE void NgGEMM<false,false> (SliceMatrix<double,ColMajor> a, SliceMatrix<> b, SliceMatrix<> c)
550
+ {
551
+ MatMat_AtB<false, false> (Trans(a), b, c);
552
+ }
553
+
554
+
555
+ template <bool ADD, bool POS, ORDERING orda, ORDERING ordb>
556
+ INLINE void NgGEMM (SliceMatrix<double,orda> a, SliceMatrix<double, ordb> b, SliceMatrix<double,ColMajor> c)
557
+ {
558
+ NgGEMM<ADD,POS> (Trans(b), Trans(a), Trans(c));
559
+ }
560
+
561
+
562
+ template <typename TM, typename FUNC, typename TX, typename TY>
563
+ void NgGEMV_fallback (BareSliceMatrix<TM,RowMajor> a, FlatVector<TX> x, FlatVector<TY> y,
564
+ FUNC func) NETGEN_NOEXCEPT
565
+ {
566
+ for (size_t i = 0; i < y.Size(); i++)
567
+ {
568
+ TY sum{0.0};
569
+ for (size_t j = 0; j < x.Size(); j++)
570
+ sum += a(i,j) * x(j);
571
+ func(y(i), sum);
572
+ }
573
+ }
574
+ template <typename TM, typename FUNC, typename TX, typename TY>
575
+ void NgGEMV_fallback (BareSliceMatrix<TM,ColMajor> a, FlatVector<TX> x, FlatVector<TY> y,
576
+ FUNC func) NETGEN_NOEXCEPT
577
+ {
578
+ for (size_t i = 0; i < y.Size(); i++)
579
+ {
580
+ TY sum{0.0};
581
+ for (size_t j = 0; j < x.Size(); j++)
582
+ sum += a(i,j) * x(j);
583
+ func(y(i), sum);
584
+ }
585
+ }
586
+
587
+ // template <typename TM, typename TVX, typename TVY>
588
+ // extern void TestFunc (TM m, TVX x, TVY y);
589
+
590
+
591
+ template <typename TS, typename T> constexpr bool IsVec = false;
592
+ template <typename TS, int S> constexpr bool IsVec<TS, Vec<S,TS>> = true;
593
+
594
+ template <bool ADD, bool POS, typename TM, ORDERING ORD, typename TX, typename TY>
595
+ INLINE void NgGEMV (BareSliceMatrix<TM,ORD> a, FlatVector<const TX> x, FlatVector<TY> y)
596
+ {
597
+ if constexpr (std::is_same<TM,double>() && std::is_same<TX,TY>() && IsVec<Complex,TX>)
598
+ {
599
+ FlatMatrix<double> mx(x.Size(), sizeof(TX)/sizeof(double), (double*)(void*)x.Addr(0));
600
+ FlatMatrix<double> my(y.Size(), sizeof(TX)/sizeof(double), (double*)(void*)y.Addr(0));
601
+ NgGEMM<ADD,POS> (a.AddSize(y.Size(), x.Size()),make_SliceMatrix(mx), make_SliceMatrix(my));
602
+ return;
603
+ }
604
+ if constexpr (std::is_same<TM,Complex>() && std::is_same<TX,TY>() && IsVec<Complex,TX>)
605
+ {
606
+ FlatMatrix<Complex> mx(x.Size(), sizeof(TX)/sizeof(Complex), &const_cast<Complex&>(*(x.Data()->Data())));
607
+ FlatMatrix<Complex> my(y.Size(), sizeof(TX)/sizeof(Complex), y.Data()->Data());
608
+ NgGEMM<ADD,POS> (a.AddSize(y.Size(), x.Size()),make_SliceMatrix(mx), make_SliceMatrix(my));
609
+ return;
610
+ }
611
+
612
+ if (!ADD)
613
+ {
614
+ if (!POS)
615
+ // y = -1*a*x;
616
+ NgGEMV_fallback(a, x, y, [](auto & y, auto sum) { y=-sum; });
617
+ else
618
+ // y = 1*a*x;
619
+ NgGEMV_fallback(a, x, y, [](auto & y, auto sum) { y=sum; });
620
+ }
621
+ else
622
+ {
623
+ if (!POS)
624
+ // y -= 1*a*x;
625
+ NgGEMV_fallback(a, x, y, [](auto & y, auto sum) { y-=sum; });
626
+ else
627
+ // y += 1*a*x;
628
+ NgGEMV_fallback(a, x, y, [](auto & y, auto sum) { y+=sum; });
629
+ }
630
+ }
631
+
632
+
633
+
634
+ // template <bool ADD, ORDERING ord>
635
+ // void NgGEMV (double s, SliceMatrix<double,ord> a, BareSliceVector<double> x, BareSliceVector<double> y) NETGEN_NOEXCEPT;
636
+
637
+ template <bool ADD, ORDERING ord>
638
+ extern NGS_DLL_HEADER
639
+ void NgGEMV (double s, BareSliceMatrix<double,ord> a, FlatVector<const double> x, FlatVector<double> y) NETGEN_NOEXCEPT;
640
+
641
+ template <bool ADD, ORDERING ord>
642
+ extern NGS_DLL_HEADER
643
+ void NgGEMV (const Complex s, BareSliceMatrix<Complex,ord> a, FlatVector<const Complex> x, FlatVector<Complex> y) NETGEN_NOEXCEPT;
644
+
645
+ template <bool ADD, ORDERING ord>
646
+ extern NGS_DLL_HEADER
647
+ void NgGEMV (Complex s, BareSliceMatrix<Complex,ord> a, FlatVector<const double> x, FlatVector<Complex> y) NETGEN_NOEXCEPT;
648
+ template <bool ADD, ORDERING ord>
649
+ extern NGS_DLL_HEADER
650
+ void NgGEMV (Complex s, BareSliceMatrix<double,ord> a, FlatVector<const Complex> x, FlatVector<Complex> y) NETGEN_NOEXCEPT;
651
+
652
+ /*
653
+ template <bool ADD, ORDERING ord>
654
+ extern NGS_DLL_HEADER
655
+ void NgGEMV (double s, BareSliceMatrix<double,ord> a, SliceVector<double> x, SliceVector<double> y) NETGEN_NOEXCEPT;
656
+ */
657
+
658
+
659
+
660
+
661
+
662
+
663
+
664
+ /* *********************** GEMV - SliceVector **************************** */
665
+
666
+
667
+
668
+ template <bool ADD, ORDERING ord>
669
+ extern NGS_DLL_HEADER
670
+ void NgGEMV (double s, BareSliceMatrix<double,ord> a, BareSliceVector<double> x, size_t sx,
671
+ BareSliceVector<double> y, size_t sy) NETGEN_NOEXCEPT;
672
+
673
+ template <bool ADD, ORDERING ord>
674
+ INLINE void NgGEMV (double s, BareSliceMatrix<double,ord> a, SliceVector<double> x, SliceVector<double> y) NETGEN_NOEXCEPT
675
+ {
676
+ NgGEMV<ADD,ord> (s, a, x, x.Size(), y, y.Size());
677
+ }
678
+
679
+
680
+ template <bool ADD, ORDERING ord>
681
+ extern NGS_DLL_HEADER
682
+ void NgGEMV (Complex s, BareSliceMatrix<double,ord> a, SliceVector<Complex> x, SliceVector<Complex> y) NETGEN_NOEXCEPT;
683
+
684
+ template <bool ADD, ORDERING ord>
685
+ extern NGS_DLL_HEADER
686
+ void NgGEMV (Complex s, BareSliceMatrix<double,ord> a, FlatVector<const Complex> x, FlatVector<Complex> y) NETGEN_NOEXCEPT;
687
+
688
+
689
+
690
+ template <bool ADD, ORDERING ord, int S>
691
+ void NgGEMV (Complex s, BareSliceMatrix<double,ord> a, SliceVector<Vec<S,Complex>> x, SliceVector<Vec<S,Complex>> y) NETGEN_NOEXCEPT
692
+ {
693
+ SliceMatrix<Complex> mx(x.Size(), S, S*x.Dist(), x.Data()->Data());
694
+ SliceMatrix<Complex> my(y.Size(), S, S*y.Dist(), y.Data()->Data());
695
+ if constexpr (ADD)
696
+ my += s * a.AddSize(y.Size(), x.Size()) * mx;
697
+ else
698
+ my = s * a.AddSize(y.Size(), x.Size()) * mx;
699
+ }
700
+
701
+
702
+
703
+ template <bool ADD, ORDERING ord>
704
+ extern NGS_DLL_HEADER
705
+ void NgGEMV (Complex s, BareSliceMatrix<Complex,ord> a, SliceVector<Complex> x, SliceVector<Complex> y) NETGEN_NOEXCEPT;
706
+
707
+ template <bool ADD, bool POS, ORDERING ord>
708
+ INLINE void NgGEMV (BareSliceMatrix<double,ord> a, FlatVector<const Complex> x, FlatVector<Complex> y)
709
+ {
710
+ NgGEMV<ADD,ord> (POS ? 1.0 : -1.0, a, x, y);
711
+ }
712
+
713
+
714
+ template <> INLINE void NgGEMV<false,true> (BareSliceMatrix<double,RowMajor> a, FlatVector<const double> x, FlatVector<double> y)
715
+ {
716
+ MultMatVec (a,x.RemoveConst(),y);
717
+ }
718
+
719
+ template <> INLINE void NgGEMV<false,true> (BareSliceMatrix<double,ColMajor> a, FlatVector<const double> x, FlatVector<> y)
720
+ {
721
+ MultMatTransVec (Trans(a),x.RemoveConst(),y);
722
+ }
723
+
724
+
725
+ template <> INLINE void NgGEMV<true,true> (BareSliceMatrix<> a, FlatVector<const double> x, FlatVector<> y)
726
+ {
727
+ MultAddMatVec (1,a,x.RemoveConst(),y);
728
+ }
729
+
730
+ template <> INLINE void NgGEMV<true,true> (BareSliceMatrix<double,ColMajor> a, FlatVector<const double> x, FlatVector<> y)
731
+ {
732
+ MultAddMatTransVec (1,Trans(a),x.RemoveConst(),y);
733
+ }
734
+
735
+ template <> INLINE void NgGEMV<true,false> (BareSliceMatrix<> a, FlatVector<const double> x, FlatVector<> y)
736
+ {
737
+ MultAddMatVec (-1,a,x.RemoveConst(),y);
738
+ }
739
+
740
+ template <> INLINE void NgGEMV<true,false> (BareSliceMatrix<double,ColMajor> a, FlatVector<const double> x, FlatVector<> y)
741
+ {
742
+ MultAddMatTransVec (-1,Trans(a),x.RemoveConst(),y);
743
+ }
744
+
745
+
746
+
747
+
748
+ template <bool ADD, bool POS, typename TM, ORDERING ORD, typename TX, typename TY>
749
+ INLINE void NgGEMV (BareSliceMatrix<TM,ORD> a, SliceVector<TX> x, SliceVector<TY> y)
750
+ {
751
+ if constexpr (std::is_same<TM,double>() && std::is_same<TX,TY>() && IsVec<Complex,TX>)
752
+ {
753
+ constexpr int VS = sizeof(TX)/sizeof(double);
754
+ SliceMatrix<double> mx(x.Size(), VS, x.Dist()*VS, (double*)(void*)x.Addr(0));
755
+ SliceMatrix<double> my(y.Size(), VS, y.Dist()*VS, (double*)(void*)y.Addr(0));
756
+ NgGEMM<ADD,POS> (a.AddSize(y.Size(), x.Size()),make_SliceMatrix(mx), make_SliceMatrix(my));
757
+ return;
758
+ }
759
+ else if constexpr (std::is_same<TM,Complex>() && std::is_same<TX,TY>() && IsVec<Complex,TX>)
760
+ {
761
+ constexpr int VS = sizeof(TX)/sizeof(Complex);
762
+ SliceMatrix<Complex> mx(x.Size(), VS, x.Dist()*VS, &const_cast<Complex&>(*(x.Data()->Data())));
763
+ SliceMatrix<Complex> my(y.Size(), VS, y.Dist()*VS, y.Data()->Data());
764
+ NgGEMM<ADD,POS> (a.AddSize(y.Size(), x.Size()),make_SliceMatrix(mx), make_SliceMatrix(my));
765
+ return;
766
+ }
767
+ else
768
+ NgGEMV<ADD> (POS ? 1.0 : -1.0, a, x, y);
769
+ }
770
+
771
+
772
+
773
+
774
+
775
+ // bla dispatsches
776
+
777
+ // vector-vector
778
+
779
+ /*
780
+ template <typename OP, typename T, typename TB>
781
+ class assign_trait<OP, T, TB,
782
+ enable_if_t<std::is_same_v<OP,typename MatExpr<T>::As> == true &&
783
+ IsConvertibleToFlatVector<TB>()&&
784
+ IsConvertibleToFlatVector<T>(), int>>
785
+ {
786
+ public:
787
+ static inline T & Assign (MatExpr<T> & self, const Expr<TB> & v)
788
+ {
789
+ CopyVector(make_BareVector(v.Spec()), make_FlatVector(self.Spec()));
790
+ return self.Spec();
791
+ }
792
+ };
793
+ */
794
+
795
+ /*
796
+ template <typename OP, typename T, typename TS, typename TB, typename TBS>
797
+ class assign_trait<OP, LinearVector<T,TS>, LinearVector<TB,TBS>,
798
+ enable_if_t<std::is_same_v<OP,typename MatExpr<LinearVector<T,TS>>::As> == true, int>>
799
+ {
800
+ public:
801
+ static inline auto & Assign (MatExpr<LinearVector<T,TS>> & self, const Expr<LinearVector<TB,TBS>> & v)
802
+ {
803
+ auto cs = CombinedSize (self.Spec().Size(), v.Spec().Size());
804
+ CopyVector(BareVector<TB>(v.Spec()), self.Spec().Range(0, cs));
805
+ return self.Spec();
806
+ }
807
+ };
808
+ */
809
+
810
+ /*
811
+ template <typename OP, typename T, typename TB>
812
+ class assign_trait<OP, T, TB,
813
+ enable_if_t<std::is_same_v<OP,typename MatExpr<T>::As> == true &&
814
+ ! (IsConvertibleToFlatVector<TB>() && IsConvertibleToFlatVector<T>()) &&
815
+ IsConvertibleToSliceVector<TB>()&&
816
+ IsConvertibleToSliceVector<T>(), int>>
817
+ {
818
+ public:
819
+ static inline T & Assign (MatExpr<T> & self, const Expr<TB> & v)
820
+ {
821
+ CopyVector(make_BareSliceVector(v.Spec()), make_SliceVector(self.Spec()));
822
+ return self.Spec();
823
+ }
824
+ };
825
+ */
826
+
827
+ /*
828
+ template <typename OP, typename T, typename TS, typename TD, typename TB, typename TBS, typename TBD>
829
+ class assign_trait<OP, VectorView<T,TS,TD>, VectorView<TB,TBS,TBD>,
830
+ enable_if_t<std::is_same_v<OP,typename MatExpr<VectorView<T,TS,TD>>::As> == true, int>>
831
+ {
832
+ typedef VectorView<T,TS,TD> TVec;
833
+ typedef VectorView<TB,TBS,TBD> TVecB;
834
+ public:
835
+ static inline auto & Assign (MatExpr<TVec> & self, const Expr<TVecB> & v)
836
+ {
837
+ auto cs = CombinedSize (self.Spec().Size(), v.Spec().Size());
838
+ CopyVector(BareSliceVector<TB>(v.Spec()), SliceVector<T>(self.Spec().Range(0,cs)));
839
+ return self.Spec();
840
+ }
841
+ };
842
+ */
843
+
844
+
845
+ // x = y
846
+ template <typename OP, typename T, typename ...Args, typename TB, typename ...BArgs>
847
+ class assign_trait<OP, VectorView<T,Args...>, VectorView<TB, BArgs...>,
848
+ enable_if_t<std::is_same_v<OP,typename MatExpr<VectorView<T,Args...>>::As> == true, int>>
849
+ {
850
+ typedef VectorView<T,Args...> TVec;
851
+ typedef VectorView<TB, BArgs...> TVecB;
852
+ public:
853
+ static inline auto & Assign (MatExpr<TVec> & self, const Expr<TVecB> & v)
854
+ {
855
+ auto cs = CombinedSize (self.Spec().Size(), v.Spec().Size());
856
+
857
+ if constexpr (is_IC<decltype(cs)>())
858
+ {
859
+ Vec<cs,typename remove_const<TB>::type> tmp;
860
+ for (size_t i = 0; i<cs; i++)
861
+ tmp[i] = v.Spec()[i];
862
+ for (size_t i = 0; i<cs; i++)
863
+ self.Spec()[i] = tmp[i];
864
+ /*
865
+ // does not allow auto-vectorization
866
+ for (size_t i = 0; i<cs; i++)
867
+ self.Spec()[i] = v.Spec()[i];
868
+ */
869
+ }
870
+ else if constexpr (TVec::IsLinear() && TVecB::IsLinear())
871
+ CopyVector(BareVector<TB>(v.Spec()), FlatVector<T>(self.Spec().Range(0,cs)));
872
+ else
873
+ CopyVector(BareSliceVector<TB>(v.Spec()), BareSliceVector<T>(self.Spec()), cs);
874
+ return self.Spec();
875
+ }
876
+ };
877
+
878
+
879
+ // x += s*y
880
+ template <typename OP, typename T, typename TS, typename TD, typename TB, typename TBS, typename TBD, typename TSCAL>
881
+ class assign_trait<OP, VectorView<T,TS,TD>, ScaleExpr<VectorView<TB,TBS,TBD>,TSCAL>,
882
+ enable_if_t<OP::IsAdd(), int>>
883
+ {
884
+ typedef VectorView<T,TS,TD> TVec;
885
+ typedef VectorView<TB,TBS,TBD> TVecB;
886
+ public:
887
+ static inline auto & Assign (MatExpr<TVec> & self, const Expr<ScaleExpr<TVecB,TSCAL>> & v)
888
+ {
889
+ auto cs = CombinedSize (self.Spec().Size(), v.Spec().A().Size());
890
+ auto s = v.View().S();
891
+ if constexpr (!OP::IsPos()) s = -s;
892
+ if constexpr (is_IC<decltype(cs)>())
893
+ {
894
+ Vec<cs,typename remove_const<TB>::type> tmp;
895
+ for (size_t i = 0; i<cs; i++)
896
+ tmp[i] = v.Spec()[i];
897
+ for (size_t i = 0; i<cs; i++)
898
+ self.Spec()[i] += s * tmp[i];
899
+ }
900
+ else
901
+ if constexpr (TVec::IsLinear() && TVecB::IsLinear())
902
+ AddVector(s, BareVector<const TB>(v.View().A()), FlatVector<T>(self.Spec().Range(0,cs)));
903
+ else
904
+ AddVector(s, BareSliceVector<const TB>(v.View().A()), SliceVector<T>(self.Spec().Range(0,cs)));
905
+ return self.Spec();
906
+ }
907
+ };
908
+
909
+
910
+
911
+
912
+ // matrix-vector
913
+ // x OP= M*y
914
+ template <typename OP, typename T, typename TS, typename TD, typename TA, typename TB, typename TBS, typename TBD>
915
+ class assign_trait<OP, VectorView<T,TS,TD>, MultExpr<TA,VectorView<TB,TBS,TBD>>,
916
+ enable_if_t<IsConvertibleToBareSliceMatrix<TA>(), int>>
917
+
918
+ {
919
+ typedef VectorView<T,TS,TD> TVec;
920
+ typedef VectorView<TB,TBS,TBD> TVecB;
921
+ public:
922
+ static inline auto & Assign (MatExpr<TVec> & self, const Expr<MultExpr<TA,VectorView<TB,TBS,TBD>>> & prod)
923
+ {
924
+ auto h = CombinedSize(get<0>(self.Spec().Shape()), get<0>(prod.View().A().Shape()));
925
+ auto w = CombinedSize(get<0>(prod.View().B().Shape()), get<1>(prod.View().A().Shape()));
926
+
927
+ constexpr bool ADD = OP::IsAdd();
928
+ constexpr bool POS = OP::IsPos();
929
+
930
+ if constexpr (TVec::IsLinear() && TVecB::IsLinear())
931
+ NgGEMV<ADD,POS> (make_BareSliceMatrix(prod.View().A()).RemoveConst(),
932
+ FlatVector<const TB>(prod.View().B().Range(0,w)),
933
+ FlatVector<T>(self.Spec().Range(0,h)));
934
+ else
935
+ NgGEMV<ADD,POS> (make_BareSliceMatrix(prod.View().A()),
936
+ SliceVector<TB>(prod.View().B().Range(0,w)),
937
+ SliceVector<T>(self.Spec().Range(0,h)));
938
+ /*
939
+ NgGEMV<ADD> (POS ? 1.0 : -1.0, make_BareSliceMatrix(prod.View().A()),
940
+ SliceVector<TB>(prod.View().B().Range(0,w)),
941
+ SliceVector<T>(self.Spec().Range(0,h)));
942
+ */
943
+ return self.Spec();
944
+ }
945
+ };
946
+
947
+ // x OP= (s*M)*y
948
+ template <typename OP, typename T, typename TS, typename TD, typename TA, typename TB, typename TBS, typename TBD, typename TC>
949
+ class assign_trait<OP, VectorView<T,TS,TD>, MultExpr<ScaleExpr<TA,TC>,VectorView<TB,TBS,TBD>>,
950
+ enable_if_t<IsConvertibleToBareSliceMatrix<TA>(), int>>
951
+
952
+ {
953
+ typedef VectorView<T,TS,TD> TVec;
954
+ typedef VectorView<TB,TBS,TBD> TVecB;
955
+ public:
956
+ static inline auto & Assign (MatExpr<TVec> & self, const Expr<MultExpr<ScaleExpr<TA,TC>,VectorView<TB,TBS,TBD>>> & prod)
957
+ {
958
+ auto h = CombinedSize(get<0>(self.Spec().Shape()), get<0>(prod.View().A().Shape()));
959
+ auto w = CombinedSize(get<0>(prod.View().B().Shape()), get<1>(prod.View().A().Shape()));
960
+
961
+ constexpr bool ADD = OP::IsAdd();
962
+ double POS = OP::IsPos() ? 1.0 : -1.0;
963
+
964
+ if constexpr (TVec::IsLinear() && TVecB::IsLinear())
965
+ NgGEMV<ADD> (POS*prod.View().A().S(), make_BareSliceMatrix(prod.View().A().A()).RemoveConst(),
966
+ FlatVector<const TB>(prod.View().B().Range(0,w)),
967
+ FlatVector<T>(self.Spec().Range(0,h)));
968
+ else
969
+ NgGEMV<ADD> (POS*prod.View().A().S(), make_BareSliceMatrix(prod.View().A().A()),
970
+ SliceVector<TB>(prod.View().B().Range(0,w)),
971
+ SliceVector<T>(self.Spec().Range(0,h)));
972
+ return self.Spec();
973
+ }
974
+ };
975
+
976
+
977
+
978
+ #ifdef OLDMatVec
979
+
980
+ template <typename OP, typename T, typename TA, typename TB>
981
+ class assign_trait<OP, T, MultExpr<TA,TB>,
982
+ enable_if_t<IsConvertibleToSliceMatrix<TA,double>() &&
983
+ is_convertible<TB,FlatVector<double>>::value &&
984
+ is_convertible<T,FlatVector<double>>::value, int>>
985
+ {
986
+ public:
987
+ static inline T & Assign (MatExpr<T> & self, const Expr<MultExpr<TA, TB>> & prod)
988
+ {
989
+ auto h = CombinedSize(get<0>(self.Spec().Shape()), get<0>(prod.View().A().Shape()));
990
+ auto w = CombinedSize(get<0>(prod.View().B().Shape()), get<1>(prod.View().A().Shape()));
991
+
992
+ constexpr bool ADD = std::is_same<OP,typename MatExpr<T>::AsAdd>::value || std::is_same<OP,typename MatExpr<T>::AsSub>::value;
993
+ constexpr bool POS = std::is_same<OP,typename MatExpr<T>::As>::value || std::is_same<OP,typename MatExpr<T>::AsAdd>::value;
994
+ NgGEMV<ADD,POS> (make_SliceMatrix(prod.View().A()),
995
+ make_FlatVector(prod.View().B().Range(0,w)),
996
+ make_FlatVector(self.Spec().Range(0,h)));
997
+ return self.Spec();
998
+ }
999
+ };
1000
+
1001
+
1002
+ template <typename OP, typename T, typename TA, typename TB>
1003
+ class assign_trait<OP, T, MultExpr<TA,TB>,
1004
+ enable_if_t<IsConvertibleToSliceMatrix<TA,Complex>() &&
1005
+ IsConvertibleToFlatVector<TB>() &&
1006
+ IsConvertibleToFlatVector<T>(), int>>
1007
+ {
1008
+ public:
1009
+ static inline T & Assign (MatExpr<T> & self, const Expr<MultExpr<TA, TB>> & prod)
1010
+ {
1011
+ constexpr bool ADD = std::is_same<OP,typename MatExpr<T>::AsAdd>::value || std::is_same<OP,typename MatExpr<T>::AsSub>::value;
1012
+ constexpr double POS = (std::is_same<OP,typename MatExpr<T>::As>::value || std::is_same<OP,typename MatExpr<T>::AsAdd>::value) ? 1 : -1;
1013
+ NgGEMV<ADD> (POS, BareSliceMatrix(prod.View().A()),
1014
+ make_FlatVector(prod.View().B()),
1015
+ make_FlatVector(self.Spec()));
1016
+ return self.Spec();
1017
+ }
1018
+ };
1019
+
1020
+ template <typename OP, typename T, typename TA, typename TB, typename TC>
1021
+ class assign_trait<OP, T, MultExpr<ScaleExpr<TA,TC>,TB>,
1022
+ enable_if_t<IsConvertibleToSliceMatrix<TA,Complex>() &&
1023
+ IsConvertibleToFlatVector<TB>() &&
1024
+ IsConvertibleToFlatVector<T>(), int>>
1025
+ {
1026
+ public:
1027
+ static inline T & Assign (MatExpr<T> & self, const Expr<MultExpr<ScaleExpr<TA,TC>, TB>> & prod)
1028
+ {
1029
+ constexpr bool ADD = std::is_same<OP,typename MatExpr<T>::AsAdd>::value || std::is_same<OP,typename MatExpr<T>::AsSub>::value;
1030
+ constexpr double POS = (std::is_same<OP,typename MatExpr<T>::As>::value || std::is_same<OP,typename MatExpr<T>::AsAdd>::value) ? 1 : -1;
1031
+ NgGEMV<ADD> (POS*prod.View().A().S(), BareSliceMatrix(prod.View().A().A()),
1032
+ make_FlatVector(prod.View().B()),
1033
+ make_FlatVector(self.Spec()));
1034
+ return self.Spec();
1035
+ }
1036
+ };
1037
+
1038
+ template <typename OP, typename T, typename TA, typename TB>
1039
+ class assign_trait<OP, T, MultExpr<TA,TB>,
1040
+ enable_if_t< ( (is_same_v<typename T::TELEM,double> ==true)||(is_same_v<typename T::TELEM,Complex> ==true) )&&
1041
+ IsConvertibleToBareSliceVector<T>() &&
1042
+ IsConvertibleToBareSliceVector<TB>() &&
1043
+ (!IsConvertibleToFlatVector<TB>()||!IsConvertibleToFlatVector<T>()) &&
1044
+ IsConvertibleToBareSliceMatrix<TA>(),int>>
1045
+ {
1046
+ public:
1047
+ static inline T & Assign (MatExpr<T> & self, const Expr<MultExpr<TA,TB>> & prod)
1048
+ {
1049
+ auto h = CombinedSize(get<0>(self.Spec().Shape()), get<0>(prod.View().A().Shape()));
1050
+ auto w = CombinedSize(get<0>(prod.View().B().Shape()), get<1>(prod.View().A().Shape()));
1051
+
1052
+ constexpr bool ADD = std::is_same<OP,typename MatExpr<T>::AsAdd>::value || std::is_same<OP,typename MatExpr<T>::AsSub>::value;
1053
+ constexpr double POS = (std::is_same<OP,typename MatExpr<T>::As>::value || std::is_same<OP,typename MatExpr<T>::AsAdd>::value) ? 1 : -1;
1054
+ NgGEMV<ADD> (POS, BareSliceMatrix(prod.View().A()),
1055
+ make_BareSliceVector(prod.View().B()).Range(0, w).RemoveConst(),
1056
+ make_BareSliceVector(self.Spec()).Range(0, h));
1057
+ return self.Spec();
1058
+ }
1059
+ };
1060
+
1061
+
1062
+
1063
+ template <typename OP, typename T, typename TA, typename TB, typename TC>
1064
+ class assign_trait<OP, T, MultExpr<ScaleExpr<TA,TC>,TB>,
1065
+ enable_if_t< ( (is_same_v<typename T::TELEM,double> ==true)||(is_same_v<typename T::TELEM,Complex> ==true) )&&
1066
+ IsConvertibleToBareSliceVector<T>() &&
1067
+ IsConvertibleToBareSliceVector<TB>() &&
1068
+ (!IsConvertibleToFlatVector<TB>()||!IsConvertibleToFlatVector<T>()) &&
1069
+ IsConvertibleToBareSliceMatrix<TA>(),int>>
1070
+ {
1071
+ public:
1072
+ static inline T & Assign (MatExpr<T> & self, const Expr<MultExpr<ScaleExpr<TA,TC>, TB>> & prod)
1073
+ {
1074
+ auto h = CombinedSize(get<0>(self.Spec().Shape()), get<0>(prod.View().A().Shape()));
1075
+ auto w = CombinedSize(get<0>(prod.View().B().Shape()), get<1>(prod.View().A().Shape()));
1076
+
1077
+ constexpr bool ADD = std::is_same<OP,typename MatExpr<T>::AsAdd>::value || std::is_same<OP,typename MatExpr<T>::AsSub>::value;
1078
+ constexpr double POS = std::is_same<OP,typename MatExpr<T>::As>::value || std::is_same<OP,typename MatExpr<T>::AsAdd>::value ? 1 : -1;
1079
+ NgGEMV<ADD> (POS*prod.View().A().S(), BareSliceMatrix(prod.View().A().A()),
1080
+ make_BareSliceVector(prod.View().B()).Range(0, w),
1081
+ make_BareSliceVector(self.Spec()).Range(0, h));
1082
+ return self.Spec();
1083
+ }
1084
+ };
1085
+
1086
+ #endif
1087
+
1088
+
1089
+
1090
+
1091
+ template <typename OP, typename T, typename TA, typename TB>
1092
+ class assign_trait<OP, T, MultExpr<TA, TB>,
1093
+ enable_if_t<IsConvertibleToSliceMatrix<TA>() &&
1094
+ IsConvertibleToSliceMatrix<TB>() &&
1095
+ IsConvertibleToSliceMatrix<T,double>(), int>>
1096
+ {
1097
+ public:
1098
+ static inline T & Assign (MatExpr<T> & self, const Expr<MultExpr<TA, TB>> & prod)
1099
+ {
1100
+ constexpr bool ADD = std::is_same<OP,typename MatExpr<T>::AsAdd>::value || std::is_same<OP,typename MatExpr<T>::AsSub>::value;
1101
+ constexpr bool POS = std::is_same<OP,typename MatExpr<T>::As>::value || std::is_same<OP,typename MatExpr<T>::AsAdd>::value;
1102
+
1103
+ size_t n = CombinedSize(prod.View().A().Height(), self.Spec().Height());
1104
+ size_t m = CombinedSize(prod.View().B().Width(), self.Spec().Width());
1105
+ size_t k = CombinedSize(prod.View().A().Width(), prod.View().B().Height());
1106
+
1107
+ NgGEMM<ADD,POS> (make_BareSliceMatrix(prod.View().A()).AddSize(n,k).RemoveConst(),
1108
+ make_BareSliceMatrix(prod.View().B()).AddSize(k,m).RemoveConst(),
1109
+ make_BareSliceMatrix(self.Spec()).AddSize(n,m));
1110
+ return self.Spec();
1111
+ }
1112
+ };
1113
+
1114
+
1115
+ template <typename OP, typename T, typename TA, typename TB>
1116
+ class assign_trait<OP, T, MultExpr<MinusExpr<TA>, TB>,
1117
+ enable_if_t<IsConvertibleToSliceMatrix<TA,double>() &&
1118
+ IsConvertibleToSliceMatrix<TB,double>() &&
1119
+ IsConvertibleToSliceMatrix<T, double>(), int>>
1120
+ {
1121
+ public:
1122
+ static inline T & Assign (MatExpr<T> & self, const Expr<MultExpr<MinusExpr<TA>, TB>> & prod)
1123
+ {
1124
+ constexpr bool ADD = std::is_same<OP,typename MatExpr<T>::AsAdd>::value || std::is_same<OP,typename MatExpr<T>::AsSub>::value;
1125
+ constexpr bool POS = std::is_same<OP,typename MatExpr<T>::As>::value || std::is_same<OP,typename MatExpr<T>::AsAdd>::value;
1126
+
1127
+ NgGEMM<ADD,!POS> (make_SliceMatrix(prod.View().A().A()),
1128
+ make_SliceMatrix(prod.View().B()),
1129
+ make_SliceMatrix(self.Spec()));
1130
+ return self.Spec();
1131
+ }
1132
+ };
1133
+
1134
+ // rank 1 update
1135
+ template <typename OP, typename T, typename TA, typename TB>
1136
+ class assign_trait<OP, T, MultExpr<TA, TransExpr<TB>>,
1137
+ enable_if_t<IsConvertibleToSliceMatrix<T,double>() &&
1138
+ is_convertible<TA,FlatVector<double>>() &&
1139
+ is_convertible<TB,FlatVector<double>>(), int>>
1140
+ {
1141
+ public:
1142
+ static inline T & Assign (MatExpr<T> & self, const Expr<MultExpr<TA, TransExpr<TB>>> & prod)
1143
+ {
1144
+ constexpr bool ADD = std::is_same<OP,typename MatExpr<T>::AsAdd>::value || std::is_same<OP,typename MatExpr<T>::AsSub>::value;
1145
+ constexpr bool POS = std::is_same<OP,typename MatExpr<T>::As>::value || std::is_same<OP,typename MatExpr<T>::AsAdd>::value;
1146
+
1147
+ auto veca = prod.Spec().A();
1148
+ FlatMatrix<> mata(veca.Height(), 1, veca.Data());
1149
+ auto vecb = prod.Spec().B().A();
1150
+ FlatMatrix<> matb(1, vecb.Height(), vecb.Data());
1151
+
1152
+ NgGEMM<ADD,POS> (make_SliceMatrix(mata),
1153
+ make_SliceMatrix(matb),
1154
+ make_SliceMatrix(self.Spec()));
1155
+ return self.Spec();
1156
+ }
1157
+ };
1158
+
1159
+ // typedef void (*pmatmatcRR)(size_t, size_t, BareSliceMatrix<Complex,RowMajor>, BareSliceMatrix<Complex,RowMajor>,BareSliceMatrix<Complex,RowMajor>);
1160
+
1161
+ template <ORDERING OA, ORDERING OB>
1162
+ using pmatmatc = void (*)(size_t, size_t, BareSliceMatrix<Complex, OA>, BareSliceMatrix<Complex,OB>,BareSliceMatrix<Complex,RowMajor>);
1163
+ template <bool ADD, bool POS, ORDERING OA, ORDERING OB>
1164
+ extern NGS_DLL_HEADER pmatmatc<OA,OB> dispatch_matmatc[9];
1165
+
1166
+ template <bool ADD, bool POS, ORDERING OA, ORDERING OB>
1167
+ extern NGS_DLL_HEADER void NgGEMMBare (size_t ah, size_t aw, size_t bw, BareSliceMatrix<Complex,OA> a, BareSliceMatrix<Complex,OB> b, BareSliceMatrix<Complex,RowMajor> c);
1168
+
1169
+
1170
+ template <bool ADD, bool POS, ORDERING OA, ORDERING OB>
1171
+ void NgGEMM (SliceMatrix<Complex,OA> a, SliceMatrix<Complex,OB> b, SliceMatrix<Complex,RowMajor> c)
1172
+ {
1173
+ size_t ah = a.Height();
1174
+ size_t aw = a.Width();
1175
+ size_t bw = b.Width();
1176
+ if (aw < std::size(dispatch_matmatc<ADD,POS,OA,OB>))
1177
+ {
1178
+ (*dispatch_matmatc<ADD,POS,OA,OB>[aw])(ah, bw, make_BareSliceMatrix(a), make_BareSliceMatrix(b), make_BareSliceMatrix(c));
1179
+ return;
1180
+ }
1181
+
1182
+ NgGEMMBare<ADD,POS>(ah, aw, bw, make_BareSliceMatrix(a), make_BareSliceMatrix(b), make_BareSliceMatrix(c));
1183
+ }
1184
+
1185
+ template <bool ADD, bool POS, ORDERING OA, ORDERING OB>
1186
+ void NgGEMM (SliceMatrix<Complex,OA> a, SliceMatrix<Complex,OB> b, SliceMatrix<Complex,ColMajor> c)
1187
+ {
1188
+ NgGEMM<ADD,POS> (Trans(b), Trans(a), Trans(c));
1189
+ }
1190
+
1191
+ template <typename OP, typename T, typename TA, typename TB>
1192
+ class assign_trait<OP, T, MultExpr<TA, TB>,
1193
+ enable_if_t<IsConvertibleToSliceMatrix<TA,Complex>() &&
1194
+ IsConvertibleToSliceMatrix<TB,Complex>() &&
1195
+ IsConvertibleToSliceMatrix<T,Complex>(), int>>
1196
+ {
1197
+ public:
1198
+ static inline T & Assign (MatExpr<T> & self, const Expr<MultExpr<TA, TB>> & prod)
1199
+ {
1200
+ constexpr bool ADD = std::is_same<OP,typename MatExpr<T>::AsAdd>::value || std::is_same<OP,typename MatExpr<T>::AsSub>::value;
1201
+ constexpr bool POS = std::is_same<OP,typename MatExpr<T>::As>::value || std::is_same<OP,typename MatExpr<T>::AsAdd>::value;
1202
+
1203
+ size_t n = CombinedSize(prod.View().A().Height(), self.Spec().Height());
1204
+ size_t m = CombinedSize(prod.View().B().Width(), self.Spec().Width());
1205
+ size_t k = CombinedSize(prod.View().A().Width(), prod.View().B().Height());
1206
+
1207
+ NgGEMM<ADD,POS> (make_BareSliceMatrix(prod.View().A()).AddSize(n,k).RemoveConst(),
1208
+ make_BareSliceMatrix(prod.View().B()).AddSize(k,m).RemoveConst(),
1209
+ make_BareSliceMatrix(self.Spec()).AddSize(n,m));
1210
+ return self.Spec();
1211
+ }
1212
+ };
1213
+
1214
+
1215
+
1216
+
1217
+
1218
+ extern NGS_DLL_HEADER
1219
+ double MatKernelMaskedScalAB (size_t n,
1220
+ double * pa, size_t da,
1221
+ double * pb, size_t db,
1222
+ const BitArray & ba);
1223
+
1224
+ extern string GetTimingHelpString();
1225
+ extern list<tuple<string,double>> Timing (int what, size_t n, size_t m, size_t k,
1226
+ bool lapack, bool doubleprec, size_t maxits);
1227
+ }
1228
+
1229
+
1230
+ #endif
1231
+
1232
+