sequenzo 0.1.18__cp310-cp310-macosx_10_9_universal2.whl → 0.1.19__cp310-cp310-macosx_10_9_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sequenzo might be problematic. Click here for more details.

Files changed (357) hide show
  1. sequenzo/__init__.py +39 -7
  2. sequenzo/big_data/clara/utils/get_weighted_diss.c +155 -155
  3. sequenzo/big_data/clara/utils/get_weighted_diss.cpython-310-darwin.so +0 -0
  4. sequenzo/clustering/KMedoids.py +39 -0
  5. sequenzo/clustering/hierarchical_clustering.py +107 -5
  6. sequenzo/define_sequence_data.py +10 -1
  7. sequenzo/dissimilarity_measures/get_distance_matrix.py +1 -2
  8. sequenzo/dissimilarity_measures/utils/get_sm_trate_substitution_cost_matrix.c +155 -155
  9. sequenzo/dissimilarity_measures/utils/get_sm_trate_substitution_cost_matrix.cpython-310-darwin.so +0 -0
  10. sequenzo/dissimilarity_measures/utils/seqconc.c +155 -155
  11. sequenzo/dissimilarity_measures/utils/seqconc.cpython-310-darwin.so +0 -0
  12. sequenzo/dissimilarity_measures/utils/seqdss.c +155 -155
  13. sequenzo/dissimilarity_measures/utils/seqdss.cpython-310-darwin.so +0 -0
  14. sequenzo/dissimilarity_measures/utils/seqdur.c +155 -155
  15. sequenzo/dissimilarity_measures/utils/seqdur.cpython-310-darwin.so +0 -0
  16. sequenzo/dissimilarity_measures/utils/seqlength.c +155 -155
  17. sequenzo/dissimilarity_measures/utils/seqlength.cpython-310-darwin.so +0 -0
  18. sequenzo/multidomain/cat.py +0 -53
  19. sequenzo/multidomain/idcd.py +0 -1
  20. sequenzo/openmp_setup.py +233 -0
  21. sequenzo/visualization/plot_transition_matrix.py +21 -22
  22. {sequenzo-0.1.18.dist-info → sequenzo-0.1.19.dist-info}/METADATA +43 -10
  23. sequenzo-0.1.19.dist-info/RECORD +215 -0
  24. sequenzo/dissimilarity_measures/setup.py +0 -35
  25. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Cholesky/LDLT.h +0 -688
  26. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Cholesky/LLT.h +0 -558
  27. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Cholesky/LLT_LAPACKE.h +0 -99
  28. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +0 -682
  29. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +0 -346
  30. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +0 -462
  31. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/ComplexSchur_LAPACKE.h +0 -91
  32. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/EigenSolver.h +0 -622
  33. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h +0 -418
  34. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +0 -226
  35. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +0 -374
  36. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +0 -158
  37. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/RealQZ.h +0 -657
  38. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/RealSchur.h +0 -558
  39. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/RealSchur_LAPACKE.h +0 -77
  40. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +0 -904
  41. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h +0 -87
  42. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +0 -561
  43. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/AlignedBox.h +0 -486
  44. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/AngleAxis.h +0 -247
  45. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/EulerAngles.h +0 -114
  46. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/Homogeneous.h +0 -501
  47. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/Hyperplane.h +0 -282
  48. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/OrthoMethods.h +0 -235
  49. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/ParametrizedLine.h +0 -232
  50. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/Quaternion.h +0 -870
  51. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/Rotation2D.h +0 -199
  52. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/RotationBase.h +0 -206
  53. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/Scaling.h +0 -188
  54. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/Transform.h +0 -1563
  55. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/Translation.h +0 -202
  56. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/Umeyama.h +0 -166
  57. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/arch/Geometry_SIMD.h +0 -168
  58. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Householder/BlockHouseholder.h +0 -110
  59. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Householder/Householder.h +0 -176
  60. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Householder/HouseholderSequence.h +0 -545
  61. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +0 -226
  62. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +0 -212
  63. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +0 -229
  64. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +0 -394
  65. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +0 -453
  66. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +0 -444
  67. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +0 -198
  68. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +0 -117
  69. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Jacobi/Jacobi.h +0 -483
  70. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/KLUSupport/KLUSupport.h +0 -358
  71. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/LU/Determinant.h +0 -117
  72. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/LU/FullPivLU.h +0 -877
  73. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/LU/InverseImpl.h +0 -432
  74. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/LU/PartialPivLU.h +0 -624
  75. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/LU/PartialPivLU_LAPACKE.h +0 -83
  76. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/LU/arch/InverseSize4.h +0 -351
  77. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/MetisSupport/MetisSupport.h +0 -137
  78. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/OrderingMethods/Amd.h +0 -435
  79. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +0 -1863
  80. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/OrderingMethods/Ordering.h +0 -153
  81. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +0 -678
  82. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +0 -545
  83. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/QR/ColPivHouseholderQR.h +0 -674
  84. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/QR/ColPivHouseholderQR_LAPACKE.h +0 -97
  85. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +0 -635
  86. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/QR/FullPivHouseholderQR.h +0 -713
  87. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/QR/HouseholderQR.h +0 -434
  88. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/QR/HouseholderQR_LAPACKE.h +0 -68
  89. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +0 -335
  90. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SVD/BDCSVD.h +0 -1366
  91. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SVD/JacobiSVD.h +0 -812
  92. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SVD/JacobiSVD_LAPACKE.h +0 -91
  93. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SVD/SVDBase.h +0 -376
  94. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SVD/UpperBidiagonalization.h +0 -414
  95. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +0 -697
  96. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +0 -174
  97. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/AmbiVector.h +0 -378
  98. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/CompressedStorage.h +0 -274
  99. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +0 -352
  100. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/MappedSparseMatrix.h +0 -67
  101. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseAssign.h +0 -270
  102. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseBlock.h +0 -571
  103. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseColEtree.h +0 -206
  104. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +0 -370
  105. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +0 -722
  106. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +0 -150
  107. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +0 -342
  108. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseDiagonalProduct.h +0 -138
  109. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseDot.h +0 -98
  110. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseFuzzy.h +0 -29
  111. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseMap.h +0 -305
  112. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseMatrix.h +0 -1518
  113. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +0 -398
  114. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparsePermutation.h +0 -178
  115. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseProduct.h +0 -181
  116. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseRedux.h +0 -49
  117. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseRef.h +0 -397
  118. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +0 -659
  119. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseSolverBase.h +0 -124
  120. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseSparseProductWithPruning.h +0 -198
  121. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseTranspose.h +0 -92
  122. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseTriangularView.h +0 -189
  123. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseUtil.h +0 -186
  124. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseVector.h +0 -478
  125. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseView.h +0 -254
  126. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/TriangularSolver.h +0 -315
  127. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU.h +0 -923
  128. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLUImpl.h +0 -66
  129. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +0 -226
  130. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_Structs.h +0 -110
  131. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +0 -375
  132. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_Utils.h +0 -80
  133. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_column_bmod.h +0 -181
  134. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +0 -179
  135. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h +0 -107
  136. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +0 -280
  137. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h +0 -126
  138. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_kernel_bmod.h +0 -130
  139. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +0 -223
  140. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_panel_dfs.h +0 -258
  141. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_pivotL.h +0 -137
  142. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_pruneL.h +0 -136
  143. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_relax_snode.h +0 -83
  144. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseQR/SparseQR.h +0 -758
  145. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/StlSupport/StdDeque.h +0 -116
  146. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/StlSupport/StdList.h +0 -106
  147. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/StlSupport/StdVector.h +0 -131
  148. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/StlSupport/details.h +0 -84
  149. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +0 -1025
  150. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +0 -642
  151. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/misc/Image.h +0 -82
  152. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/misc/Kernel.h +0 -79
  153. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/misc/RealSvd2x2.h +0 -55
  154. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/misc/blas.h +0 -440
  155. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/misc/lapack.h +0 -152
  156. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/misc/lapacke.h +0 -16292
  157. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/misc/lapacke_mangling.h +0 -17
  158. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +0 -358
  159. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +0 -696
  160. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/plugins/BlockMethods.h +0 -1442
  161. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.h +0 -115
  162. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.h +0 -177
  163. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/plugins/IndexedViewMethods.h +0 -262
  164. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +0 -152
  165. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +0 -95
  166. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/plugins/ReshapedMethods.h +0 -149
  167. sequenzo/dissimilarity_measures/src/eigen/blas/BandTriangularSolver.h +0 -97
  168. sequenzo/dissimilarity_measures/src/eigen/blas/GeneralRank1Update.h +0 -44
  169. sequenzo/dissimilarity_measures/src/eigen/blas/PackedSelfadjointProduct.h +0 -53
  170. sequenzo/dissimilarity_measures/src/eigen/blas/PackedTriangularMatrixVector.h +0 -79
  171. sequenzo/dissimilarity_measures/src/eigen/blas/PackedTriangularSolverVector.h +0 -88
  172. sequenzo/dissimilarity_measures/src/eigen/blas/Rank2Update.h +0 -57
  173. sequenzo/dissimilarity_measures/src/eigen/blas/common.h +0 -175
  174. sequenzo/dissimilarity_measures/src/eigen/blas/f2c/datatypes.h +0 -24
  175. sequenzo/dissimilarity_measures/src/eigen/blas/level1_cplx_impl.h +0 -155
  176. sequenzo/dissimilarity_measures/src/eigen/blas/level1_impl.h +0 -144
  177. sequenzo/dissimilarity_measures/src/eigen/blas/level1_real_impl.h +0 -122
  178. sequenzo/dissimilarity_measures/src/eigen/blas/level2_cplx_impl.h +0 -360
  179. sequenzo/dissimilarity_measures/src/eigen/blas/level2_impl.h +0 -553
  180. sequenzo/dissimilarity_measures/src/eigen/blas/level2_real_impl.h +0 -306
  181. sequenzo/dissimilarity_measures/src/eigen/blas/level3_impl.h +0 -702
  182. sequenzo/dissimilarity_measures/src/eigen/debug/gdb/__init__.py +0 -1
  183. sequenzo/dissimilarity_measures/src/eigen/debug/gdb/printers.py +0 -314
  184. sequenzo/dissimilarity_measures/src/eigen/lapack/lapack_common.h +0 -29
  185. sequenzo/dissimilarity_measures/src/eigen/scripts/relicense.py +0 -69
  186. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/Tensor.h +0 -554
  187. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorArgMax.h +0 -329
  188. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h +0 -247
  189. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h +0 -1176
  190. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h +0 -1559
  191. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h +0 -1093
  192. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h +0 -518
  193. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h +0 -377
  194. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h +0 -1023
  195. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionBlocking.h +0 -73
  196. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionCuda.h +0 -6
  197. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionGpu.h +0 -1413
  198. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionMapper.h +0 -575
  199. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionSycl.h +0 -1650
  200. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h +0 -1679
  201. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h +0 -456
  202. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h +0 -1132
  203. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorConvolutionSycl.h +0 -544
  204. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorCostModel.h +0 -214
  205. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h +0 -347
  206. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h +0 -137
  207. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h +0 -6
  208. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h +0 -104
  209. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceGpu.h +0 -389
  210. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h +0 -1048
  211. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h +0 -409
  212. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDimensionList.h +0 -236
  213. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h +0 -490
  214. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h +0 -236
  215. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h +0 -983
  216. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h +0 -703
  217. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h +0 -388
  218. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h +0 -669
  219. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorFixedSize.h +0 -379
  220. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h +0 -237
  221. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h +0 -191
  222. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h +0 -488
  223. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h +0 -302
  224. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorGlobalFunctions.h +0 -33
  225. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorGpuHipCudaDefines.h +0 -99
  226. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorGpuHipCudaUndefines.h +0 -44
  227. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h +0 -79
  228. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h +0 -603
  229. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h +0 -738
  230. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorInflation.h +0 -247
  231. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorInitializer.h +0 -82
  232. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h +0 -263
  233. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorLayoutSwap.h +0 -216
  234. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorMacros.h +0 -98
  235. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h +0 -327
  236. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h +0 -311
  237. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h +0 -1102
  238. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h +0 -708
  239. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorPatch.h +0 -291
  240. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorRandom.h +0 -322
  241. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h +0 -998
  242. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h +0 -6
  243. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorReductionGpu.h +0 -966
  244. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorReductionSycl.h +0 -582
  245. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorRef.h +0 -454
  246. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h +0 -465
  247. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h +0 -528
  248. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorScanSycl.h +0 -513
  249. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h +0 -471
  250. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorStorage.h +0 -161
  251. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorStriding.h +0 -346
  252. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorTrace.h +0 -303
  253. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h +0 -264
  254. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorUInt128.h +0 -249
  255. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorVolumePatch.h +0 -629
  256. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/TensorSymmetry/DynamicSymmetry.h +0 -293
  257. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/TensorSymmetry/StaticSymmetry.h +0 -236
  258. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/TensorSymmetry/Symmetry.h +0 -338
  259. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/TensorSymmetry/util/TemplateGroupTheory.h +0 -669
  260. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/ThreadPool/Barrier.h +0 -67
  261. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/ThreadPool/EventCount.h +0 -249
  262. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h +0 -486
  263. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/ThreadPool/RunQueue.h +0 -236
  264. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/ThreadPool/ThreadCancel.h +0 -23
  265. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/ThreadPool/ThreadEnvironment.h +0 -40
  266. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/ThreadPool/ThreadLocal.h +0 -301
  267. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/ThreadPool/ThreadPoolInterface.h +0 -48
  268. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/ThreadPool/ThreadYield.h +0 -20
  269. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/util/CXX11Meta.h +0 -537
  270. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/util/CXX11Workarounds.h +0 -88
  271. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/util/EmulateArray.h +0 -261
  272. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/util/MaxSizeVector.h +0 -158
  273. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/AutoDiff/AutoDiffJacobian.h +0 -108
  274. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h +0 -730
  275. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/AutoDiff/AutoDiffVector.h +0 -220
  276. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/BVH/BVAlgorithms.h +0 -293
  277. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/BVH/KdBVH.h +0 -223
  278. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/Eigenvalues/ArpackSelfAdjointEigenSolver.h +0 -790
  279. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/EulerAngles/EulerAngles.h +0 -355
  280. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/EulerAngles/EulerSystem.h +0 -305
  281. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/FFT/ei_fftw_impl.h +0 -261
  282. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/FFT/ei_kissfft_impl.h +0 -449
  283. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/IterativeSolvers/ConstrainedConjGrad.h +0 -187
  284. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/IterativeSolvers/DGMRES.h +0 -511
  285. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/IterativeSolvers/GMRES.h +0 -335
  286. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/IterativeSolvers/IDRS.h +0 -436
  287. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/IterativeSolvers/IncompleteLU.h +0 -90
  288. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/IterativeSolvers/IterationController.h +0 -154
  289. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/IterativeSolvers/MINRES.h +0 -267
  290. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/IterativeSolvers/Scaling.h +0 -193
  291. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/KroneckerProduct/KroneckerTensorProduct.h +0 -305
  292. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/LevenbergMarquardt/LMcovar.h +0 -84
  293. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/LevenbergMarquardt/LMonestep.h +0 -202
  294. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/LevenbergMarquardt/LMpar.h +0 -160
  295. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/LevenbergMarquardt/LMqrsolv.h +0 -188
  296. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/LevenbergMarquardt/LevenbergMarquardt.h +0 -396
  297. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/MatrixFunctions/MatrixExponential.h +0 -441
  298. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/MatrixFunctions/MatrixFunction.h +0 -569
  299. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/MatrixFunctions/MatrixLogarithm.h +0 -373
  300. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/MatrixFunctions/MatrixPower.h +0 -705
  301. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/MatrixFunctions/MatrixSquareRoot.h +0 -368
  302. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/MatrixFunctions/StemFunction.h +0 -117
  303. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/MoreVectorization/MathFunctions.h +0 -95
  304. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/NonLinearOptimization/HybridNonLinearSolver.h +0 -601
  305. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/NonLinearOptimization/LevenbergMarquardt.h +0 -657
  306. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/NonLinearOptimization/chkder.h +0 -66
  307. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/NonLinearOptimization/covar.h +0 -70
  308. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/NonLinearOptimization/dogleg.h +0 -107
  309. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/NonLinearOptimization/fdjac1.h +0 -79
  310. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/NonLinearOptimization/lmpar.h +0 -298
  311. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/NonLinearOptimization/qrsolv.h +0 -91
  312. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/NonLinearOptimization/r1mpyq.h +0 -30
  313. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/NonLinearOptimization/r1updt.h +0 -99
  314. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/NonLinearOptimization/rwupdt.h +0 -49
  315. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/NumericalDiff/NumericalDiff.h +0 -130
  316. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/Polynomials/Companion.h +0 -280
  317. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/Polynomials/PolynomialSolver.h +0 -428
  318. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/Polynomials/PolynomialUtils.h +0 -143
  319. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/Skyline/SkylineInplaceLU.h +0 -352
  320. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/Skyline/SkylineMatrix.h +0 -862
  321. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/Skyline/SkylineMatrixBase.h +0 -212
  322. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/Skyline/SkylineProduct.h +0 -295
  323. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/Skyline/SkylineStorage.h +0 -259
  324. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/Skyline/SkylineUtil.h +0 -89
  325. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SparseExtra/BlockOfDynamicSparseMatrix.h +0 -122
  326. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SparseExtra/BlockSparseMatrix.h +0 -1079
  327. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SparseExtra/DynamicSparseMatrix.h +0 -404
  328. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SparseExtra/MarketIO.h +0 -282
  329. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SparseExtra/MatrixMarketIterator.h +0 -247
  330. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SparseExtra/RandomSetter.h +0 -349
  331. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsArrayAPI.h +0 -286
  332. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsBFloat16.h +0 -68
  333. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsFunctors.h +0 -357
  334. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsHalf.h +0 -66
  335. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsImpl.h +0 -1959
  336. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsPacketMath.h +0 -118
  337. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/HipVectorCompatibility.h +0 -67
  338. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsArrayAPI.h +0 -167
  339. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsBFloat16.h +0 -58
  340. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsFunctors.h +0 -330
  341. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsHalf.h +0 -58
  342. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h +0 -2045
  343. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsPacketMath.h +0 -79
  344. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/arch/AVX/BesselFunctions.h +0 -46
  345. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/arch/AVX/SpecialFunctions.h +0 -16
  346. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/arch/AVX512/BesselFunctions.h +0 -46
  347. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/arch/AVX512/SpecialFunctions.h +0 -16
  348. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/arch/GPU/SpecialFunctions.h +0 -369
  349. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/arch/NEON/BesselFunctions.h +0 -54
  350. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/arch/NEON/SpecialFunctions.h +0 -34
  351. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/Splines/Spline.h +0 -507
  352. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/Splines/SplineFitting.h +0 -431
  353. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/Splines/SplineFwd.h +0 -93
  354. sequenzo-0.1.18.dist-info/RECORD +0 -544
  355. {sequenzo-0.1.18.dist-info → sequenzo-0.1.19.dist-info}/WHEEL +0 -0
  356. {sequenzo-0.1.18.dist-info → sequenzo-0.1.19.dist-info}/licenses/LICENSE +0 -0
  357. {sequenzo-0.1.18.dist-info → sequenzo-0.1.19.dist-info}/top_level.txt +0 -0
@@ -1,1132 +0,0 @@
1
- // This file is part of Eigen, a lightweight C++ template library
2
- // for linear algebra.
3
- //
4
- // Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
5
- //
6
- // This Source Code Form is subject to the terms of the Mozilla
7
- // Public License v. 2.0. If a copy of the MPL was not distributed
8
- // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9
-
10
- #ifndef EIGEN_CXX11_TENSOR_TENSOR_CONVOLUTION_H
11
- #define EIGEN_CXX11_TENSOR_TENSOR_CONVOLUTION_H
12
-
13
- namespace Eigen {
14
-
15
- /** \class TensorConvolution
16
- * \ingroup CXX11_Tensor_Module
17
- *
18
- * \brief Tensor convolution class.
19
- *
20
- *
21
- */
22
- namespace internal {
23
-
24
- template <typename Index, typename InputDims, int NumKernelDims, int Layout>
25
- class IndexMapper {
26
- public:
27
- IndexMapper(const InputDims& input_dims, const array<Index, NumKernelDims>& kernel_dims,
28
- const array<Index, NumKernelDims>& indices) {
29
-
30
- array<Index, NumDims> dimensions = input_dims;
31
- for (int i = 0; i < NumKernelDims; ++i) {
32
- const Index index = indices[i];
33
- const Index input_dim = input_dims[index];
34
- const Index kernel_dim = kernel_dims[i];
35
- const Index result_dim = input_dim - kernel_dim + 1;
36
- dimensions[index] = result_dim;
37
- }
38
-
39
- array<Index, NumDims> inputStrides;
40
- array<Index, NumDims> outputStrides;
41
- if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
42
- inputStrides[0] = 1;
43
- outputStrides[0] = 1;
44
- for (int i = 1; i < NumDims; ++i) {
45
- inputStrides[i] = inputStrides[i-1] * input_dims[i-1];
46
- outputStrides[i] = outputStrides[i-1] * dimensions[i-1];
47
- }
48
- } else {
49
- inputStrides[NumDims - 1] = 1;
50
- outputStrides[NumDims - 1] = 1;
51
- for (int i = static_cast<int>(NumDims) - 2; i >= 0; --i) {
52
- inputStrides[i] = inputStrides[i + 1] * input_dims[i + 1];
53
- outputStrides[i] = outputStrides[i + 1] * dimensions[i + 1];
54
- }
55
- }
56
-
57
- array<Index, NumDims> gpuInputDimensions;
58
- array<Index, NumDims> gpuOutputDimensions;
59
- array<Index, NumDims> tmp = dimensions;
60
- array<Index, NumDims> ordering;
61
- const size_t offset = static_cast<int>(Layout) == static_cast<int>(ColMajor)
62
- ? 0
63
- : NumDims - NumKernelDims;
64
- for (int i = 0; i < NumKernelDims; ++i) {
65
- const Index index = i + offset;
66
- ordering[index] = indices[i];
67
- tmp[indices[i]] = -1;
68
- gpuInputDimensions[index] = input_dims[indices[i]];
69
- gpuOutputDimensions[index] = dimensions[indices[i]];
70
- }
71
-
72
- int written = static_cast<int>(Layout) == static_cast<int>(ColMajor)
73
- ? NumKernelDims
74
- : 0;
75
- for (int i = 0; i < NumDims; ++i) {
76
- if (tmp[i] >= 0) {
77
- ordering[written] = i;
78
- gpuInputDimensions[written] = input_dims[i];
79
- gpuOutputDimensions[written] = dimensions[i];
80
- ++written;
81
- }
82
- }
83
-
84
- for (int i = 0; i < NumDims; ++i) {
85
- m_inputStrides[i] = inputStrides[ordering[i]];
86
- m_outputStrides[i] = outputStrides[ordering[i]];
87
- }
88
-
89
- if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
90
- for (int i = 0; i < NumDims; ++i) {
91
- if (i > NumKernelDims) {
92
- m_gpuInputStrides[i] =
93
- m_gpuInputStrides[i - 1] * gpuInputDimensions[i - 1];
94
- m_gpuOutputStrides[i] =
95
- m_gpuOutputStrides[i - 1] * gpuOutputDimensions[i - 1];
96
- } else {
97
- m_gpuInputStrides[i] = 1;
98
- m_gpuOutputStrides[i] = 1;
99
- }
100
- }
101
- } else {
102
- for (int i = NumDims - 1; i >= 0; --i) {
103
- if (static_cast<size_t>(i + 1) < offset) {
104
- m_gpuInputStrides[i] =
105
- m_gpuInputStrides[i + 1] * gpuInputDimensions[i + 1];
106
- m_gpuOutputStrides[i] =
107
- m_gpuOutputStrides[i + 1] * gpuOutputDimensions[i + 1];
108
- } else {
109
- m_gpuInputStrides[i] = 1;
110
- m_gpuOutputStrides[i] = 1;
111
- }
112
- }
113
- }
114
- }
115
-
116
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Index mapGpuInputPlaneToTensorInputOffset(Index p) const {
117
- Index inputIndex = 0;
118
- if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
119
- for (int d = NumDims - 1; d > NumKernelDims; --d) {
120
- const Index idx = p / m_gpuInputStrides[d];
121
- inputIndex += idx * m_inputStrides[d];
122
- p -= idx * m_gpuInputStrides[d];
123
- }
124
- inputIndex += p * m_inputStrides[NumKernelDims];
125
- } else {
126
- std::ptrdiff_t limit = 0;
127
- if (NumKernelDims < NumDims) {
128
- limit = NumDims - NumKernelDims - 1;
129
- }
130
- for (int d = 0; d < limit; ++d) {
131
- const Index idx = p / m_gpuInputStrides[d];
132
- inputIndex += idx * m_inputStrides[d];
133
- p -= idx * m_gpuInputStrides[d];
134
- }
135
- inputIndex += p * m_inputStrides[limit];
136
- }
137
- return inputIndex;
138
- }
139
-
140
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Index mapGpuOutputPlaneToTensorOutputOffset(Index p) const {
141
- Index outputIndex = 0;
142
- if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
143
- for (int d = NumDims - 1; d > NumKernelDims; --d) {
144
- const Index idx = p / m_gpuOutputStrides[d];
145
- outputIndex += idx * m_outputStrides[d];
146
- p -= idx * m_gpuOutputStrides[d];
147
- }
148
- outputIndex += p * m_outputStrides[NumKernelDims];
149
- } else {
150
- std::ptrdiff_t limit = 0;
151
- if (NumKernelDims < NumDims) {
152
- limit = NumDims - NumKernelDims - 1;
153
- }
154
- for (int d = 0; d < limit; ++d) {
155
- const Index idx = p / m_gpuOutputStrides[d];
156
- outputIndex += idx * m_outputStrides[d];
157
- p -= idx * m_gpuOutputStrides[d];
158
- }
159
- outputIndex += p * m_outputStrides[limit];
160
- }
161
- return outputIndex;
162
- }
163
-
164
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Index mapGpuInputKernelToTensorInputOffset(Index i) const {
165
- const size_t offset = static_cast<int>(Layout) == static_cast<int>(ColMajor)
166
- ? 0
167
- : NumDims - NumKernelDims;
168
- return i * m_inputStrides[offset];
169
- }
170
-
171
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Index mapGpuOutputKernelToTensorOutputOffset(Index i) const {
172
- const size_t offset = static_cast<int>(Layout) == static_cast<int>(ColMajor)
173
- ? 0
174
- : NumDims - NumKernelDims;
175
- return i * m_outputStrides[offset];
176
- }
177
-
178
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Index mapGpuInputKernelToTensorInputOffset(Index i, Index j) const {
179
- const size_t offset = static_cast<int>(Layout) == static_cast<int>(ColMajor)
180
- ? 0
181
- : NumDims - NumKernelDims;
182
- return i * m_inputStrides[offset] + j * m_inputStrides[offset + 1];
183
- }
184
-
185
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Index mapGpuOutputKernelToTensorOutputOffset(Index i, Index j) const {
186
- const size_t offset = static_cast<int>(Layout) == static_cast<int>(ColMajor)
187
- ? 0
188
- : NumDims - NumKernelDims;
189
- return i * m_outputStrides[offset] + j * m_outputStrides[offset + 1];
190
- }
191
-
192
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Index mapGpuInputKernelToTensorInputOffset(Index i, Index j, Index k) const {
193
- const size_t offset = static_cast<int>(Layout) == static_cast<int>(ColMajor)
194
- ? 0
195
- : NumDims - NumKernelDims;
196
- return i * m_inputStrides[offset] + j * m_inputStrides[offset + 1] +
197
- k * m_inputStrides[offset + 2];
198
- }
199
-
200
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Index mapGpuOutputKernelToTensorOutputOffset(Index i, Index j, Index k) const {
201
- const size_t offset = static_cast<int>(Layout) == static_cast<int>(ColMajor)
202
- ? 0
203
- : NumDims - NumKernelDims;
204
- return i * m_outputStrides[offset] + j * m_outputStrides[offset + 1] +
205
- k * m_outputStrides[offset + 2];
206
- }
207
-
208
- private:
209
- static const int NumDims = internal::array_size<InputDims>::value;
210
- array<Index, NumDims> m_inputStrides;
211
- array<Index, NumDims> m_outputStrides;
212
- array<Index, NumDims> m_gpuInputStrides;
213
- array<Index, NumDims> m_gpuOutputStrides;
214
- };
215
-
216
-
217
-
218
- template<typename Dimensions, typename InputXprType, typename KernelXprType>
219
- struct traits<TensorConvolutionOp<Dimensions, InputXprType, KernelXprType> >
220
- {
221
- // Type promotion to handle the case where the types of the lhs and the rhs are different.
222
- typedef typename promote_storage_type<typename InputXprType::Scalar,
223
- typename KernelXprType::Scalar>::ret Scalar;
224
- typedef typename promote_storage_type<typename traits<InputXprType>::StorageKind,
225
- typename traits<KernelXprType>::StorageKind>::ret StorageKind;
226
- typedef typename promote_index_type<typename traits<InputXprType>::Index,
227
- typename traits<KernelXprType>::Index>::type Index;
228
- typedef typename InputXprType::Nested LhsNested;
229
- typedef typename KernelXprType::Nested RhsNested;
230
- typedef typename remove_reference<LhsNested>::type _LhsNested;
231
- typedef typename remove_reference<RhsNested>::type _RhsNested;
232
- static const int NumDimensions = traits<InputXprType>::NumDimensions;
233
- static const int Layout = traits<InputXprType>::Layout;
234
- typedef typename conditional<Pointer_type_promotion<typename InputXprType::Scalar, Scalar>::val,
235
- typename traits<InputXprType>::PointerType, typename traits<KernelXprType>::PointerType>::type PointerType;
236
-
237
- enum {
238
- Flags = 0
239
- };
240
- };
241
-
242
- template<typename Dimensions, typename InputXprType, typename KernelXprType>
243
- struct eval<TensorConvolutionOp<Dimensions, InputXprType, KernelXprType>, Eigen::Dense>
244
- {
245
- typedef const TensorConvolutionOp<Dimensions, InputXprType, KernelXprType>& type;
246
- };
247
-
248
- template<typename Dimensions, typename InputXprType, typename KernelXprType>
249
- struct nested<TensorConvolutionOp<Dimensions, InputXprType, KernelXprType>, 1, typename eval<TensorConvolutionOp<Dimensions, InputXprType, KernelXprType> >::type>
250
- {
251
- typedef TensorConvolutionOp<Dimensions, InputXprType, KernelXprType> type;
252
- };
253
-
254
- } // end namespace internal
255
-
256
-
257
-
258
- template<typename Indices, typename InputXprType, typename KernelXprType>
259
- class TensorConvolutionOp : public TensorBase<TensorConvolutionOp<Indices, InputXprType, KernelXprType>, ReadOnlyAccessors>
260
- {
261
- public:
262
- typedef typename Eigen::internal::traits<TensorConvolutionOp>::Scalar Scalar;
263
- typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
264
- typedef typename internal::promote_storage_type<typename InputXprType::CoeffReturnType,
265
- typename KernelXprType::CoeffReturnType>::ret CoeffReturnType;
266
- typedef typename Eigen::internal::nested<TensorConvolutionOp>::type Nested;
267
- typedef typename Eigen::internal::traits<TensorConvolutionOp>::StorageKind StorageKind;
268
- typedef typename Eigen::internal::traits<TensorConvolutionOp>::Index Index;
269
-
270
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorConvolutionOp(const InputXprType& input, const KernelXprType& kernel, const Indices& dims)
271
- : m_input_xpr(input), m_kernel_xpr(kernel), m_indices(dims) {}
272
-
273
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
274
- const Indices& indices() const { return m_indices; }
275
-
276
- /** \returns the nested expressions */
277
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
278
- const typename internal::remove_all<typename InputXprType::Nested>::type&
279
- inputExpression() const { return m_input_xpr; }
280
-
281
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
282
- const typename internal::remove_all<typename KernelXprType::Nested>::type&
283
- kernelExpression() const { return m_kernel_xpr; }
284
-
285
- protected:
286
- typename InputXprType::Nested m_input_xpr;
287
- typename KernelXprType::Nested m_kernel_xpr;
288
- const Indices m_indices;
289
- };
290
-
291
-
292
- template<typename Indices, typename InputArgType, typename KernelArgType, typename Device>
293
- struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelArgType>, Device>
294
- {
295
- typedef TensorConvolutionOp<Indices, InputArgType, KernelArgType> XprType;
296
-
297
- static const int NumDims = internal::array_size<typename TensorEvaluator<InputArgType, Device>::Dimensions>::value;
298
- static const int NumKernelDims = internal::array_size<Indices>::value;
299
- typedef typename XprType::Index Index;
300
- typedef DSizes<Index, NumDims> Dimensions;
301
-
302
- typedef typename XprType::Scalar Scalar;
303
- typedef typename XprType::CoeffReturnType CoeffReturnType;
304
- typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
305
- static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
306
- typedef StorageMemory<Scalar, Device> Storage;
307
- typedef typename Storage::Type EvaluatorPointerType;
308
-
309
- enum {
310
- IsAligned = int(TensorEvaluator<InputArgType, Device>::IsAligned) & int(TensorEvaluator<KernelArgType, Device>::IsAligned),
311
- PacketAccess = int(TensorEvaluator<InputArgType, Device>::PacketAccess) & int(TensorEvaluator<KernelArgType, Device>::PacketAccess),
312
- BlockAccess = false,
313
- PreferBlockAccess = false,
314
- Layout = TensorEvaluator<InputArgType, Device>::Layout,
315
- CoordAccess = false, // to be implemented
316
- RawAccess = false
317
- };
318
-
319
- //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
320
- typedef internal::TensorBlockNotImplemented TensorBlock;
321
- //===--------------------------------------------------------------------===//
322
-
323
- EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
324
- : m_inputImpl(op.inputExpression(), device), m_kernelImpl(op.kernelExpression(), device), m_kernelArg(op.kernelExpression()), m_kernel(NULL), m_local_kernel(false), m_device(device)
325
- {
326
- EIGEN_STATIC_ASSERT((static_cast<int>(TensorEvaluator<InputArgType, Device>::Layout) == static_cast<int>(TensorEvaluator<KernelArgType, Device>::Layout)), YOU_MADE_A_PROGRAMMING_MISTAKE);
327
-
328
- const typename TensorEvaluator<InputArgType, Device>::Dimensions& input_dims = m_inputImpl.dimensions();
329
- const typename TensorEvaluator<KernelArgType, Device>::Dimensions& kernel_dims = m_kernelImpl.dimensions();
330
-
331
- if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
332
- m_inputStride[0] = 1;
333
- for (int i = 1; i < NumDims; ++i) {
334
- m_inputStride[i] = m_inputStride[i - 1] * input_dims[i - 1];
335
- }
336
- } else {
337
- m_inputStride[NumDims - 1] = 1;
338
- for (int i = NumDims - 2; i >= 0; --i) {
339
- m_inputStride[i] = m_inputStride[i + 1] * input_dims[i + 1];
340
- }
341
- }
342
-
343
- m_dimensions = m_inputImpl.dimensions();
344
- if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
345
- for (int i = 0; i < NumKernelDims; ++i) {
346
- const Index index = op.indices()[i];
347
- const Index input_dim = input_dims[index];
348
- const Index kernel_dim = kernel_dims[i];
349
- const Index result_dim = input_dim - kernel_dim + 1;
350
- m_dimensions[index] = result_dim;
351
- if (i > 0) {
352
- m_kernelStride[i] = m_kernelStride[i - 1] * kernel_dims[i - 1];
353
- } else {
354
- m_kernelStride[0] = 1;
355
- }
356
- m_indexStride[i] = m_inputStride[index];
357
- }
358
-
359
- m_outputStride[0] = 1;
360
- for (int i = 1; i < NumDims; ++i) {
361
- m_outputStride[i] = m_outputStride[i - 1] * m_dimensions[i - 1];
362
- }
363
- } else {
364
- for (int i = NumKernelDims - 1; i >= 0; --i) {
365
- const Index index = op.indices()[i];
366
- const Index input_dim = input_dims[index];
367
- const Index kernel_dim = kernel_dims[i];
368
- const Index result_dim = input_dim - kernel_dim + 1;
369
- m_dimensions[index] = result_dim;
370
- if (i < NumKernelDims - 1) {
371
- m_kernelStride[i] = m_kernelStride[i + 1] * kernel_dims[i + 1];
372
- } else {
373
- m_kernelStride[NumKernelDims - 1] = 1;
374
- }
375
- m_indexStride[i] = m_inputStride[index];
376
- }
377
-
378
- m_outputStride[NumDims - 1] = 1;
379
- for (int i = NumDims - 2; i >= 0; --i) {
380
- m_outputStride[i] = m_outputStride[i + 1] * m_dimensions[i + 1];
381
- }
382
- }
383
- }
384
-
385
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
386
-
387
- EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar*) {
388
- m_inputImpl.evalSubExprsIfNeeded(NULL);
389
- preloadKernel();
390
- return true;
391
- }
392
- EIGEN_STRONG_INLINE void cleanup() {
393
- m_inputImpl.cleanup();
394
- if (m_local_kernel) {
395
- m_device.deallocate((void*)m_kernel);
396
- m_local_kernel = false;
397
- }
398
- m_kernel = NULL;
399
- }
400
-
401
- void evalTo(typename XprType::Scalar* buffer) {
402
- evalSubExprsIfNeeded(NULL);
403
- for (int i = 0; i < dimensions().TotalSize(); ++i) {
404
- buffer[i] += coeff(i);
405
- }
406
- cleanup();
407
- }
408
-
409
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
410
- {
411
- CoeffReturnType result = CoeffReturnType(0);
412
- convolve(firstInput(index), 0, NumKernelDims-1, result);
413
- return result;
414
- }
415
-
416
- template<int LoadMode>
417
- EIGEN_DEVICE_FUNC PacketReturnType packet(const Index index) const
418
- {
419
- Index indices[2] = {index, index+PacketSize-1};
420
- Index startInputs[2] = {0, 0};
421
- if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
422
- for (int i = NumDims - 1; i > 0; --i) {
423
- const Index idx0 = indices[0] / m_outputStride[i];
424
- const Index idx1 = indices[1] / m_outputStride[i];
425
- startInputs[0] += idx0 * m_inputStride[i];
426
- startInputs[1] += idx1 * m_inputStride[i];
427
- indices[0] -= idx0 * m_outputStride[i];
428
- indices[1] -= idx1 * m_outputStride[i];
429
- }
430
- } else {
431
- for (int i = 0; i < NumDims - 1; ++i) {
432
- const Index idx0 = indices[0] / m_outputStride[i];
433
- const Index idx1 = indices[1] / m_outputStride[i];
434
- startInputs[0] += idx0 * m_inputStride[i];
435
- startInputs[1] += idx1 * m_inputStride[i];
436
- indices[0] -= idx0 * m_outputStride[i];
437
- indices[1] -= idx1 * m_outputStride[i];
438
- }
439
- }
440
- startInputs[0] += indices[0];
441
- startInputs[1] += indices[1];
442
-
443
- if (startInputs[1]-startInputs[0] == PacketSize-1) {
444
- PacketReturnType result = internal::pset1<PacketReturnType>(0);
445
- convolvePacket(startInputs[0], 0, NumKernelDims-1, result);
446
- return result;
447
- } else {
448
- EIGEN_ALIGN_MAX Scalar data[PacketSize];
449
- data[0] = Scalar(0);
450
- convolve(startInputs[0], 0, NumKernelDims-1, data[0]);
451
- for (int i = 1; i < PacketSize-1; ++i) {
452
- data[i] = Scalar(0);
453
- convolve(firstInput(index+i), 0, NumKernelDims-1, data[i]);
454
- }
455
- data[PacketSize-1] = Scalar(0);
456
- convolve(startInputs[1], 0, NumKernelDims-1, data[PacketSize-1]);
457
- return internal::pload<PacketReturnType>(data);
458
- }
459
- }
460
-
461
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost
462
- costPerCoeff(bool vectorized) const {
463
- const double kernel_size = m_kernelImpl.dimensions().TotalSize();
464
- // We ignore the use of fused multiply-add.
465
- const double convolve_compute_cost =
466
- TensorOpCost::AddCost<Scalar>() + TensorOpCost::MulCost<Scalar>();
467
- const double firstIndex_compute_cost =
468
- NumDims *
469
- (2 * TensorOpCost::AddCost<Index>() + 2 * TensorOpCost::MulCost<Index>() +
470
- TensorOpCost::DivCost<Index>());
471
- return TensorOpCost(0, 0, firstIndex_compute_cost, vectorized, PacketSize) +
472
- kernel_size * (m_inputImpl.costPerCoeff(vectorized) +
473
- m_kernelImpl.costPerCoeff(vectorized) +
474
- TensorOpCost(0, 0, convolve_compute_cost, vectorized,
475
- PacketSize));
476
- }
477
-
478
- EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return NULL; }
479
-
480
- private:
481
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index firstInput(Index index) const {
482
- Index startInput = 0;
483
- if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
484
- for (int i = NumDims - 1; i > 0; --i) {
485
- const Index idx = index / m_outputStride[i];
486
- startInput += idx * m_inputStride[i];
487
- index -= idx * m_outputStride[i];
488
- }
489
- } else {
490
- for (int i = 0; i < NumDims - 1; ++i) {
491
- const Index idx = index / m_outputStride[i];
492
- startInput += idx * m_inputStride[i];
493
- index -= idx * m_outputStride[i];
494
- }
495
- }
496
- startInput += index;
497
- return startInput;
498
- }
499
-
500
- EIGEN_DEVICE_FUNC void convolve(Index firstIndex, Index firstKernel, int DimIndex, CoeffReturnType& accum) const {
501
- for (int j = 0; j < m_kernelImpl.dimensions()[DimIndex]; ++j) {
502
- const Index input = firstIndex + j * m_indexStride[DimIndex];
503
- const Index kernel = firstKernel + j * m_kernelStride[DimIndex];
504
- if (DimIndex > 0) {
505
- convolve(input, kernel, DimIndex-1, accum);
506
- } else {
507
- accum += m_inputImpl.coeff(input) * m_kernel[kernel];
508
- }
509
- }
510
- }
511
-
512
- template <typename Packet>
513
- EIGEN_DEVICE_FUNC void convolvePacket(Index firstIndex, Index firstKernel, int DimIndex, Packet& accum) const {
514
- for (int j = 0; j < m_kernelImpl.dimensions()[DimIndex]; ++j) {
515
- const Index input = firstIndex + j * m_indexStride[DimIndex];
516
- const Index kernel = firstKernel + j * m_kernelStride[DimIndex];
517
- if (DimIndex > 0) {
518
- convolvePacket(input, kernel, DimIndex-1, accum);
519
- } else {
520
- accum = internal::pmadd<Packet>(m_inputImpl.template packet<Unaligned>(input), internal::pset1<Packet>(m_kernel[kernel]), accum);
521
- }
522
- }
523
- }
524
-
525
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void preloadKernel() {
526
- // Don't make a local copy of the kernel unless we have to (i.e. it's an
527
- // expression that needs to be evaluated)
528
- const Scalar* in_place = m_kernelImpl.data();
529
- if (in_place) {
530
- m_kernel = in_place;
531
- m_local_kernel = false;
532
- } else {
533
- size_t kernel_sz = m_kernelImpl.dimensions().TotalSize() * sizeof(Scalar);
534
- Scalar* local = (Scalar*)m_device.allocate_temp(kernel_sz);
535
- typedef TensorEvalToOp<const KernelArgType> EvalTo;
536
- EvalTo evalToTmp(local, m_kernelArg);
537
- const bool Vectorize = internal::IsVectorizable<Device, KernelArgType>::value;
538
- internal::TensorExecutor<const EvalTo, Device, Vectorize>::run(evalToTmp, m_device);
539
-
540
- m_kernel = local;
541
- m_local_kernel = true;
542
- }
543
- }
544
-
545
- array<Index, NumDims> m_inputStride;
546
- array<Index, NumDims> m_outputStride;
547
-
548
- array<Index, NumKernelDims> m_indexStride;
549
- array<Index, NumKernelDims> m_kernelStride;
550
- TensorEvaluator<InputArgType, Device> m_inputImpl;
551
- TensorEvaluator<KernelArgType, Device> m_kernelImpl;
552
- Dimensions m_dimensions;
553
-
554
- KernelArgType m_kernelArg;
555
- const Scalar* m_kernel;
556
- bool m_local_kernel;
557
- const Device EIGEN_DEVICE_REF m_device;
558
- };
559
-
560
-
561
-
562
-
563
- // Use an optimized implementation of the evaluation code for GPUs whenever possible.
564
- #if defined(EIGEN_USE_GPU) && defined(EIGEN_GPUCC)
565
-
566
- template <int StaticKernelSize>
567
- struct GetKernelSize {
568
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int operator() (const int /*kernelSize*/) const {
569
- return StaticKernelSize;
570
- }
571
- };
572
- template <>
573
- struct GetKernelSize<Dynamic> {
574
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int operator() (const int kernelSize) const {
575
- return kernelSize;
576
- }
577
- };
578
-
579
- template <typename InputEvaluator, typename Index, typename InputDims,
580
- int StaticKernelSize>
581
- __global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void EigenConvolutionKernel1D(
582
- InputEvaluator eval,
583
- const internal::IndexMapper<Index, InputDims, 1, InputEvaluator::Layout>
584
- indexMapper,
585
- const float* __restrict kernel, const int numPlanes, const int numX,
586
- const int maxX, const int kernelSize, float* buffer) {
587
- #if defined(EIGEN_HIPCC)
588
- HIP_DYNAMIC_SHARED(float, s)
589
- #else
590
- extern __shared__ float s[];
591
- #endif
592
-
593
- const int first_x = blockIdx.x * maxX;
594
- const int last_x = (first_x + maxX < numX ? first_x + maxX : numX) - 1;
595
- const int num_x_input = last_x - first_x + GetKernelSize<StaticKernelSize>()(kernelSize);
596
- const int num_x_output = last_x - first_x + 1;
597
-
598
- const int first_plane = blockIdx.y * blockDim.y;
599
- const int plane_stride = blockDim.y * gridDim.y;
600
-
601
- for (int p = first_plane + threadIdx.y; p < numPlanes; p += plane_stride) {
602
- // Load inputs to shared memory
603
- const int plane_input_offset = indexMapper.mapGpuInputPlaneToTensorInputOffset(p);
604
- const int plane_kernel_offset = threadIdx.y * num_x_input;
605
- #pragma unroll
606
- for (int i = threadIdx.x; i < num_x_input; i += blockDim.x) {
607
- const int tensor_index = plane_input_offset + indexMapper.mapGpuInputKernelToTensorInputOffset(i+first_x);
608
- s[i + plane_kernel_offset] = eval.coeff(tensor_index);
609
- }
610
-
611
- __syncthreads();
612
-
613
- // Compute the convolution
614
- const int plane_output_offset = indexMapper.mapGpuOutputPlaneToTensorOutputOffset(p);
615
-
616
- #pragma unroll
617
- for (int i = threadIdx.x; i < num_x_output; i += blockDim.x) {
618
- const int kernel_offset = plane_kernel_offset + i;
619
- float result = 0.0f;
620
- #pragma unroll
621
- for (int k = 0; k < GetKernelSize<StaticKernelSize>()(kernelSize); ++k) {
622
- result += s[k + kernel_offset] * kernel[k];
623
- }
624
- const int tensor_index = plane_output_offset + indexMapper.mapGpuOutputKernelToTensorOutputOffset(i+first_x);
625
- buffer[tensor_index] = result;
626
- }
627
- __syncthreads();
628
- }
629
- };
630
-
631
- template <typename InputEvaluator, typename Index, typename InputDims,
632
- int StaticKernelSizeX, int StaticKernelSizeY>
633
- __global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void EigenConvolutionKernel2D(
634
- InputEvaluator eval,
635
- const internal::IndexMapper<Index, InputDims, 2, InputEvaluator::Layout>
636
- indexMapper,
637
- const float* __restrict kernel, const int numPlanes, const int numX,
638
- const int maxX, const int numY, const int maxY, const int kernelSizeX,
639
- const int kernelSizeY, float* buffer) {
640
- #if defined(EIGEN_HIPCC)
641
- HIP_DYNAMIC_SHARED(float, s)
642
- #else
643
- extern __shared__ float s[];
644
- #endif
645
-
646
- const int first_x = blockIdx.x * maxX;
647
- const int last_x = (first_x + maxX < numX ? first_x + maxX : numX) - 1;
648
- const int num_x_input = last_x - first_x + GetKernelSize<StaticKernelSizeX>()(kernelSizeX);
649
- const int num_x_output = last_x - first_x + 1;
650
-
651
- const int first_y = blockIdx.y * maxY;
652
- const int last_y = (first_y + maxY < numY ? first_y + maxY : numY) - 1;
653
- const int num_y_input = last_y - first_y + GetKernelSize<StaticKernelSizeY>()(kernelSizeY);
654
- const int num_y_output = last_y - first_y + 1;
655
-
656
- const int first_plane = blockIdx.z * blockDim.z;
657
- const int plane_stride = blockDim.z * gridDim.z;
658
-
659
- for (int p = first_plane + threadIdx.z; p < numPlanes; p += plane_stride) {
660
-
661
- const int plane_input_offset = indexMapper.mapGpuInputPlaneToTensorInputOffset(p);
662
- const int plane_kernel_offset = threadIdx.z * num_y_input;
663
-
664
- // Load inputs to shared memory
665
- #pragma unroll
666
- for (int j = threadIdx.y; j < num_y_input; j += blockDim.y) {
667
- const int input_offset = num_x_input * (j + plane_kernel_offset);
668
- #pragma unroll
669
- for (int i = threadIdx.x; i < num_x_input; i += blockDim.x) {
670
- const int tensor_index = plane_input_offset + indexMapper.mapGpuInputKernelToTensorInputOffset(i+first_x, j+first_y);
671
- s[i + input_offset] = eval.coeff(tensor_index);
672
- }
673
- }
674
-
675
- __syncthreads();
676
-
677
- // Convolution
678
- const int plane_output_offset = indexMapper.mapGpuOutputPlaneToTensorOutputOffset(p);
679
-
680
- #pragma unroll
681
- for (int j = threadIdx.y; j < num_y_output; j += blockDim.y) {
682
- #pragma unroll
683
- for (int i = threadIdx.x; i < num_x_output; i += blockDim.x) {
684
- float result = 0.0f;
685
- #pragma unroll
686
- for (int l = 0; l < GetKernelSize<StaticKernelSizeY>()(kernelSizeY); ++l) {
687
- const int kernel_offset = kernelSizeX * l;
688
- const int input_offset = i + num_x_input * (j + l + plane_kernel_offset);
689
- #pragma unroll
690
- for (int k = 0; k < GetKernelSize<StaticKernelSizeX>()(kernelSizeX); ++k) {
691
- result += s[k + input_offset] * kernel[k + kernel_offset];
692
- }
693
- }
694
- const int tensor_index = plane_output_offset + indexMapper.mapGpuOutputKernelToTensorOutputOffset(i+first_x, j+first_y);
695
- buffer[tensor_index] = result;
696
- }
697
- }
698
-
699
- __syncthreads();
700
- }
701
- };
702
-
703
- template <typename InputEvaluator, typename Index, typename InputDims>
704
- __global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void EigenConvolutionKernel3D(
705
- InputEvaluator eval,
706
- const internal::IndexMapper<Index, InputDims, 3, InputEvaluator::Layout>
707
- indexMapper,
708
- const float* __restrict kernel, const size_t numPlanes, const size_t numX,
709
- const size_t maxX, const size_t numY, const size_t maxY, const size_t numZ,
710
- const size_t maxZ, const size_t kernelSizeX, const size_t kernelSizeY,
711
- const size_t kernelSizeZ, float* buffer) {
712
- #if defined(EIGEN_HIPCC)
713
- HIP_DYNAMIC_SHARED(float, s)
714
- #else
715
- extern __shared__ float s[];
716
- #endif
717
-
718
- // Load inputs to shared memory
719
- const int first_x = blockIdx.x * maxX;
720
- const int last_x = (first_x + maxX < numX ? first_x + maxX : numX) - 1;
721
- const int num_x_input = last_x - first_x + kernelSizeX;
722
-
723
- const int first_y = blockIdx.y * maxY;
724
- const int last_y = (first_y + maxY < numY ? first_y + maxY : numY) - 1;
725
- const int num_y_input = last_y - first_y + kernelSizeY;
726
-
727
- const int first_z = blockIdx.z * maxZ;
728
- const int last_z = (first_z + maxZ < numZ ? first_z + maxZ : numZ) - 1;
729
- const int num_z_input = last_z - first_z + kernelSizeZ;
730
-
731
- for (int p = 0; p < numPlanes; ++p) {
732
-
733
- const int plane_input_offset = indexMapper.mapGpuInputPlaneToTensorInputOffset(p);
734
- const int plane_kernel_offset = 0;
735
-
736
- for (int k = threadIdx.z; k < num_z_input; k += blockDim.z) {
737
- for (int j = threadIdx.y; j < num_y_input; j += blockDim.y) {
738
- for (int i = threadIdx.x; i < num_x_input; i += blockDim.x) {
739
- const int tensor_index = plane_input_offset + indexMapper.mapGpuInputKernelToTensorInputOffset(i+first_x, j+first_y, k+first_z);
740
- s[i + num_x_input * (j + num_y_input * (k + plane_kernel_offset))] = eval.coeff(tensor_index);
741
- }
742
- }
743
- }
744
-
745
- __syncthreads();
746
-
747
- // Convolution
748
- const int num_z_output = last_z - first_z + 1;
749
- const int num_y_output = last_y - first_y + 1;
750
- const int num_x_output = last_x - first_x + 1;
751
- const int plane_output_offset = indexMapper.mapGpuOutputPlaneToTensorOutputOffset(p);
752
-
753
- for (int k = threadIdx.z; k < num_z_output; k += blockDim.z) {
754
- for (int j = threadIdx.y; j < num_y_output; j += blockDim.y) {
755
- for (int i = threadIdx.x; i < num_x_output; i += blockDim.x) {
756
- float result = 0.0f;
757
- for (int n = 0; n < kernelSizeZ; ++n) {
758
- for (int m = 0; m < kernelSizeY; ++m) {
759
- for (int l = 0; l < kernelSizeX; ++l) {
760
- result += s[i + l + num_x_input * (j + m + num_y_input * (k + n + plane_kernel_offset))] * kernel[l + kernelSizeX * (m + kernelSizeY * n)];
761
- }
762
- }
763
- }
764
- const int tensor_index = plane_output_offset + indexMapper.mapGpuOutputKernelToTensorOutputOffset(i+first_x, j+first_y, k+first_z);
765
- buffer[tensor_index] = result;
766
- }
767
- }
768
- }
769
- __syncthreads();
770
- }
771
- };
772
-
773
-
774
-
775
- template<typename Indices, typename InputArgType, typename KernelArgType>
776
- struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelArgType>, GpuDevice>
777
- {
778
- typedef TensorConvolutionOp<Indices, InputArgType, KernelArgType> XprType;
779
-
780
- static const int NumDims = internal::array_size<typename TensorEvaluator<InputArgType, GpuDevice>::Dimensions>::value;
781
- static const int NumKernelDims = internal::array_size<Indices>::value;
782
- typedef typename XprType::Index Index;
783
- typedef DSizes<Index, NumDims> Dimensions;
784
- typedef typename TensorEvaluator<KernelArgType, GpuDevice>::Dimensions KernelDimensions;
785
-
786
- enum {
787
- IsAligned = TensorEvaluator<InputArgType, GpuDevice>::IsAligned & TensorEvaluator<KernelArgType, GpuDevice>::IsAligned,
788
- PacketAccess = false,
789
- BlockAccess = false,
790
- PreferBlockAccess = false,
791
- Layout = TensorEvaluator<InputArgType, GpuDevice>::Layout,
792
- CoordAccess = false, // to be implemented
793
- RawAccess = false
794
- };
795
-
796
- //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
797
- typedef internal::TensorBlockNotImplemented TensorBlock;
798
- //===--------------------------------------------------------------------===//
799
-
800
- TensorEvaluator(const XprType& op, const GpuDevice& device)
801
- : m_inputImpl(op.inputExpression(), device), m_kernelImpl(op.kernelExpression(), device), m_kernelArg(op.kernelExpression()), m_indices(op.indices()), m_buf(NULL), m_kernel(NULL), m_local_kernel(false), m_device(device)
802
- {
803
- EIGEN_STATIC_ASSERT((static_cast<int>(TensorEvaluator<InputArgType, GpuDevice>::Layout) == static_cast<int>(TensorEvaluator<KernelArgType, GpuDevice>::Layout)), YOU_MADE_A_PROGRAMMING_MISTAKE);
804
-
805
- const typename TensorEvaluator<InputArgType, GpuDevice>::Dimensions& input_dims = m_inputImpl.dimensions();
806
- const typename TensorEvaluator<KernelArgType, GpuDevice>::Dimensions& kernel_dims = m_kernelImpl.dimensions();
807
-
808
- m_dimensions = m_inputImpl.dimensions();
809
- for (int i = 0; i < NumKernelDims; ++i) {
810
- const Index index = op.indices()[i];
811
- const Index input_dim = input_dims[index];
812
- const Index kernel_dim = kernel_dims[i];
813
- const Index result_dim = input_dim - kernel_dim + 1;
814
- m_dimensions[index] = result_dim;
815
- }
816
- }
817
-
818
- typedef typename XprType::CoeffReturnType CoeffReturnType;
819
- typedef typename PacketType<CoeffReturnType, GpuDevice>::type PacketReturnType;
820
- typedef typename InputArgType::Scalar Scalar;
821
- static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
822
-
823
- EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_dimensions; }
824
-
825
- EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* data) {
826
- preloadKernel();
827
- m_inputImpl.evalSubExprsIfNeeded(NULL);
828
- if (data) {
829
- executeEval(data);
830
- return false;
831
- } else {
832
- m_buf = (Scalar*)m_device.allocate(dimensions().TotalSize() * sizeof(Scalar));
833
- executeEval(m_buf);
834
- return true;
835
- }
836
- }
837
-
838
- EIGEN_STRONG_INLINE void cleanup() {
839
- m_inputImpl.cleanup();
840
- if (m_buf) {
841
- m_device.deallocate(m_buf);
842
- m_buf = NULL;
843
- }
844
- if (m_local_kernel) {
845
- m_device.deallocate((void*)m_kernel);
846
- m_local_kernel = false;
847
- }
848
- m_kernel = NULL;
849
- }
850
-
851
- EIGEN_STRONG_INLINE void preloadKernel() {
852
- // Don't make a local copy of the kernel unless we have to (i.e. it's an
853
- // expression that needs to be evaluated)
854
- const Scalar* in_place = m_kernelImpl.data();
855
- if (in_place) {
856
- m_kernel = in_place;
857
- m_local_kernel = false;
858
- } else {
859
- size_t kernel_sz = m_kernelImpl.dimensions().TotalSize() * sizeof(Scalar);
860
- Scalar* local = (Scalar*)m_device.allocate(kernel_sz);
861
- typedef TensorEvalToOp<const KernelArgType> EvalTo;
862
- EvalTo evalToTmp(local, m_kernelArg);
863
- const bool PacketAccess = internal::IsVectorizable<GpuDevice, KernelArgType>::value;
864
- internal::TensorExecutor<const EvalTo, GpuDevice, PacketAccess>::run(evalToTmp, m_device);
865
-
866
- m_kernel = local;
867
- m_local_kernel = true;
868
- }
869
- }
870
-
871
- static unsigned int ceil(unsigned int num, unsigned int denom) {
872
- const unsigned int rounded_toward_zero = num / denom;
873
- if (num > rounded_toward_zero * denom) {
874
- return rounded_toward_zero + 1;
875
- }
876
- return rounded_toward_zero;
877
- }
878
-
879
- void executeEval(Scalar* data) const {
880
- typedef typename TensorEvaluator<InputArgType, GpuDevice>::Dimensions InputDims;
881
-
882
- const int maxSharedMem = m_device.sharedMemPerBlock();
883
- const int maxThreadsPerBlock = m_device.maxGpuThreadsPerBlock();
884
- const int maxBlocksPerProcessor = m_device.maxGpuThreadsPerMultiProcessor() / maxThreadsPerBlock;
885
- const int numMultiProcessors = m_device.getNumGpuMultiProcessors();
886
- const int warpSize = 32;
887
-
888
- switch (NumKernelDims) {
889
- case 1: {
890
- const int kernel_size = m_kernelImpl.dimensions().TotalSize();
891
-
892
- const int numX = dimensions()[m_indices[0]];
893
- const int numP = dimensions().TotalSize() / numX;
894
- int maxX;
895
- dim3 block_size;
896
-
897
- const int single_stride_dim =
898
- static_cast<int>(Layout) == static_cast<int>(ColMajor)
899
- ? 0
900
- : m_inputImpl.dimensions().rank() - 1;
901
- if (m_indices[0] == single_stride_dim) {
902
- // Maximum the reuse
903
- const int inner_dim = ((maxSharedMem / (sizeof(Scalar)) - kernel_size + 1 + 31) / 32) * 32;
904
- maxX = numext::mini<int>(inner_dim, numX);
905
- const int maxP = numext::mini<int>(maxSharedMem / ((kernel_size - 1 + maxX) * sizeof(Scalar)), numP);
906
- block_size.x = numext::mini(maxThreadsPerBlock, maxX);
907
- block_size.y = numext::mini<int>(maxThreadsPerBlock / block_size.x, maxP);
908
- }
909
- else {
910
- // Read as much as possible alongside the inner most dimension, that is the plane
911
- const int inner_dim = maxSharedMem / ((warpSize + kernel_size) * sizeof(Scalar));
912
- const int maxP = numext::mini<int>(inner_dim, numP);
913
- maxX = numext::mini<int>(maxSharedMem / (inner_dim * sizeof(Scalar)) - kernel_size + 1, numX);
914
-
915
- block_size.x = numext::mini(warpSize, maxX);
916
- block_size.y = numext::mini<int>(maxThreadsPerBlock/block_size.x, maxP);
917
- }
918
-
919
- const int shared_mem = block_size.y * (maxX + kernel_size - 1) * sizeof(Scalar);
920
- gpu_assert(shared_mem <= maxSharedMem);
921
-
922
- const int num_x_blocks = ceil(numX, maxX);
923
- const int blocksPerProcessor = numext::mini(maxBlocksPerProcessor, maxSharedMem / shared_mem);
924
- const int num_y_blocks = ceil(numMultiProcessors * blocksPerProcessor, num_x_blocks);
925
-
926
- dim3 num_blocks(num_x_blocks, numext::mini<int>(num_y_blocks, ceil(numP, block_size.y)));
927
-
928
-
929
- //cout << "launching 1D kernel with block_size.x: " << block_size.x << " block_size.y: " << block_size.y << " num_blocks.x: " << num_blocks.x << " num_blocks.y: " << num_blocks.y << " maxX: " << maxX << " shared_mem: " << shared_mem << " in stream " << m_device.stream() << endl;
930
-
931
- const array<Index, 1> indices(m_indices[0]);
932
- const array<Index, 1> kernel_dims(m_kernelImpl.dimensions()[0]);
933
- internal::IndexMapper<Index, InputDims, 1, Layout> indexMapper(
934
- m_inputImpl.dimensions(), kernel_dims, indices);
935
- switch(kernel_size) {
936
- case 4: {
937
- LAUNCH_GPU_KERNEL((EigenConvolutionKernel1D<TensorEvaluator<InputArgType, GpuDevice>, Index, InputDims, 4>), num_blocks, block_size, shared_mem, m_device, m_inputImpl, indexMapper, m_kernel, numP, numX, maxX, 4, data);
938
- break;
939
- }
940
- case 7: {
941
- LAUNCH_GPU_KERNEL((EigenConvolutionKernel1D<TensorEvaluator<InputArgType, GpuDevice>, Index, InputDims, 7>), num_blocks, block_size, shared_mem, m_device, m_inputImpl, indexMapper, m_kernel, numP, numX, maxX, 7, data);
942
- break;
943
- }
944
- default: {
945
- LAUNCH_GPU_KERNEL((EigenConvolutionKernel1D<TensorEvaluator<InputArgType, GpuDevice>, Index, InputDims, Dynamic>), num_blocks, block_size, shared_mem, m_device, m_inputImpl, indexMapper, m_kernel, numP, numX, maxX, kernel_size, data);
946
- }
947
- }
948
- break;
949
- }
950
-
951
- case 2: {
952
- const int idxX =
953
- static_cast<int>(Layout) == static_cast<int>(ColMajor) ? 0 : 1;
954
- const int idxY =
955
- static_cast<int>(Layout) == static_cast<int>(ColMajor) ? 1 : 0;
956
- const int kernel_size_x = m_kernelImpl.dimensions()[idxX];
957
- const int kernel_size_y = m_kernelImpl.dimensions()[idxY];
958
-
959
- const int numX = dimensions()[m_indices[idxX]];
960
- const int numY = dimensions()[m_indices[idxY]];
961
- const int numP = dimensions().TotalSize() / (numX*numY);
962
-
963
- const float scaling_factor = sqrtf(static_cast<float>(maxSharedMem) / (sizeof(Scalar) * kernel_size_y * kernel_size_x));
964
-
965
- // Snap maxX to warp size
966
- int inner_dim = ((static_cast<int>(scaling_factor * kernel_size_x) - kernel_size_x + 1 + 32) / 32) * 32;
967
- const int maxX = numext::mini<int>(inner_dim, numX);
968
- const int maxY = numext::mini<int>(maxSharedMem / (sizeof(Scalar) * (maxX + kernel_size_x - 1)) - kernel_size_y + 1, numY);
969
- const int maxP = numext::mini<int>(maxSharedMem / ((kernel_size_x - 1 + maxX) * (kernel_size_y - 1 + maxY) * sizeof(Scalar)), numP);
970
-
971
- dim3 block_size;
972
- block_size.x = numext::mini(1024, maxX);
973
- block_size.y = numext::mini<int>(1024/block_size.x, maxY);
974
- block_size.z = numext::mini<int>(1024/(block_size.x*block_size.y), maxP);
975
-
976
- const int shared_mem = block_size.z * (maxX + kernel_size_x - 1) * (maxY + kernel_size_y - 1) * sizeof(Scalar);
977
- gpu_assert(shared_mem <= maxSharedMem);
978
-
979
- const int num_x_blocks = ceil(numX, maxX);
980
- const int num_y_blocks = ceil(numY, maxY);
981
- const int blocksPerProcessor = numext::mini(maxBlocksPerProcessor, maxSharedMem / shared_mem);
982
- const int num_z_blocks = ceil(numMultiProcessors * blocksPerProcessor, num_x_blocks * num_y_blocks);
983
-
984
- dim3 num_blocks(num_x_blocks, num_y_blocks, numext::mini<int>(num_z_blocks, ceil(numP, block_size.z)));
985
-
986
-
987
- //cout << "launching 2D kernel with block_size.x: " << block_size.x << " block_size.y: " << block_size.y << " block_size.z: " << block_size.z << " num_blocks.x: " << num_blocks.x << " num_blocks.y: " << num_blocks.y << " num_blocks.z: " << num_blocks.z << " maxX: " << maxX << " maxY: " << maxY << " maxP: " << maxP << " shared_mem: " << shared_mem << " in stream " << m_device.stream() << endl;
988
-
989
- const array<Index, 2> indices(m_indices[idxX], m_indices[idxY]);
990
- const array<Index, 2> kernel_dims(m_kernelImpl.dimensions()[idxX],
991
- m_kernelImpl.dimensions()[idxY]);
992
- internal::IndexMapper<Index, InputDims, 2, Layout> indexMapper(
993
- m_inputImpl.dimensions(), kernel_dims, indices);
994
- switch (kernel_size_x) {
995
- case 4: {
996
- switch (kernel_size_y) {
997
- case 7: {
998
- LAUNCH_GPU_KERNEL((EigenConvolutionKernel2D<TensorEvaluator<InputArgType, GpuDevice>, Index, InputDims, 4, 7>), num_blocks, block_size, shared_mem, m_device, m_inputImpl, indexMapper, m_kernel, numP, numX, maxX, numY, maxY, 4, 7, data);
999
- break;
1000
- }
1001
- default: {
1002
- LAUNCH_GPU_KERNEL((EigenConvolutionKernel2D<TensorEvaluator<InputArgType, GpuDevice>, Index, InputDims, 4, Dynamic>), num_blocks, block_size, shared_mem, m_device, m_inputImpl, indexMapper, m_kernel, numP, numX, maxX, numY, maxY, 4, kernel_size_y, data);
1003
- break;
1004
- }
1005
- }
1006
- break;
1007
- }
1008
- case 7: {
1009
- switch (kernel_size_y) {
1010
- case 4: {
1011
- LAUNCH_GPU_KERNEL((EigenConvolutionKernel2D<TensorEvaluator<InputArgType, GpuDevice>, Index, InputDims, 7, 4>), num_blocks, block_size, shared_mem, m_device, m_inputImpl, indexMapper, m_kernel, numP, numX, maxX, numY, maxY, 7, 4, data);
1012
- break;
1013
- }
1014
- default: {
1015
- LAUNCH_GPU_KERNEL((EigenConvolutionKernel2D<TensorEvaluator<InputArgType, GpuDevice>, Index, InputDims, 7, Dynamic>), num_blocks, block_size, shared_mem, m_device, m_inputImpl, indexMapper, m_kernel, numP, numX, maxX, numY, maxY, 7, kernel_size_y, data);
1016
- break;
1017
- }
1018
- }
1019
- break;
1020
- }
1021
- default: {
1022
- LAUNCH_GPU_KERNEL((EigenConvolutionKernel2D<TensorEvaluator<InputArgType, GpuDevice>, Index, InputDims, Dynamic, Dynamic>), num_blocks, block_size, shared_mem, m_device, m_inputImpl, indexMapper, m_kernel, numP, numX, maxX, numY, maxY, kernel_size_x, kernel_size_y, data);
1023
- break;
1024
- }
1025
- }
1026
- break;
1027
- }
1028
-
1029
- case 3: {
1030
- const int idxX =
1031
- static_cast<int>(Layout) == static_cast<int>(ColMajor) ? 0 : 2;
1032
- const int idxY =
1033
- static_cast<int>(Layout) == static_cast<int>(ColMajor) ? 1 : 1;
1034
- const int idxZ =
1035
- static_cast<int>(Layout) == static_cast<int>(ColMajor) ? 2 : 0;
1036
-
1037
- const int kernel_size_x = m_kernelImpl.dimensions()[idxX];
1038
- const int kernel_size_y = m_kernelImpl.dimensions()[idxY];
1039
- const int kernel_size_z = m_kernelImpl.dimensions()[idxZ];
1040
-
1041
- const int numX = dimensions()[m_indices[idxX]];
1042
- const int numY = dimensions()[m_indices[idxY]];
1043
- const int numZ = dimensions()[m_indices[idxZ]];
1044
- const int numP = dimensions().TotalSize() / (numX*numY*numZ);
1045
-
1046
- const int maxX = numext::mini<int>(128, numext::mini<int>(maxSharedMem / (sizeof(Scalar) * kernel_size_y * kernel_size_z) - kernel_size_x + 1, numX));
1047
- const int maxY = numext::mini<int>(128, numext::mini<int>(maxSharedMem / (sizeof(Scalar) * (maxX + kernel_size_x - 1) * kernel_size_z) - kernel_size_y + 1, numY));
1048
- const int maxZ = numext::mini<int>(128, numext::mini<int>(maxSharedMem / (sizeof(Scalar) * (maxX + kernel_size_x - 1) * (maxY + kernel_size_y - 1)) - kernel_size_z + 1, numZ));
1049
-
1050
- dim3 block_size;
1051
- block_size.x = numext::mini(32, maxX);
1052
- block_size.y = numext::mini(32, maxY);
1053
- block_size.z = numext::mini<int>(1024/(block_size.x*block_size.y), maxZ);
1054
- dim3 num_blocks(ceil(numX, maxX), ceil(numY, maxY), ceil(numZ, maxZ));
1055
-
1056
- const int shared_mem = (maxX + kernel_size_x - 1) * (maxY + kernel_size_y - 1) * (maxZ + kernel_size_z - 1) * sizeof(Scalar);
1057
- gpu_assert(shared_mem <= maxSharedMem);
1058
-
1059
- //cout << "launching 3D kernel with block_size.x: " << block_size.x << " block_size.y: " << block_size.y << " block_size.z: " << block_size.z << " num_blocks.x: " << num_blocks.x << " num_blocks.y: " << num_blocks.y << " num_blocks.z: " << num_blocks.z << " shared_mem: " << shared_mem << " in stream " << m_device.stream() << endl;
1060
- const array<Index, 3> indices(m_indices[idxX], m_indices[idxY],
1061
- m_indices[idxZ]);
1062
- const array<Index, 3> kernel_dims(m_kernelImpl.dimensions()[idxX],
1063
- m_kernelImpl.dimensions()[idxY],
1064
- m_kernelImpl.dimensions()[idxZ]);
1065
- internal::IndexMapper<Index, InputDims, 3, Layout> indexMapper(
1066
- m_inputImpl.dimensions(), kernel_dims, indices);
1067
-
1068
- LAUNCH_GPU_KERNEL((EigenConvolutionKernel3D<TensorEvaluator<InputArgType, GpuDevice>, Index, InputDims>), num_blocks, block_size, shared_mem, m_device, m_inputImpl, indexMapper, m_kernel, numP, numX, maxX, numY, maxY, numZ, maxZ, kernel_size_x, kernel_size_y, kernel_size_z, data);
1069
- break;
1070
- }
1071
-
1072
- default: {
1073
- EIGEN_STATIC_ASSERT((NumKernelDims >= 1 && NumKernelDims <= 3), THIS_METHOD_IS_ONLY_FOR_OBJECTS_OF_A_SPECIFIC_SIZE);
1074
- }
1075
- }
1076
- }
1077
-
1078
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
1079
- {
1080
- eigen_assert(m_buf);
1081
- eigen_assert(index < m_dimensions.TotalSize());
1082
- return m_buf[index];
1083
- }
1084
-
1085
- template<int LoadMode>
1086
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(const Index index) const
1087
- {
1088
- eigen_assert(m_buf);
1089
- eigen_assert(index < m_dimensions.TotalSize());
1090
- return internal::ploadt<PacketReturnType, LoadMode>(m_buf+index);
1091
- }
1092
-
1093
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost
1094
- costPerCoeff(bool vectorized) const {
1095
- // TODO(rmlarsen): FIXME: For now, this is just a copy of the CPU cost
1096
- // model.
1097
- const double kernel_size = m_kernelImpl.dimensions().TotalSize();
1098
- // We ignore the use of fused multiply-add.
1099
- const double convolve_compute_cost =
1100
- TensorOpCost::AddCost<Scalar>() + TensorOpCost::MulCost<Scalar>();
1101
- const double firstIndex_compute_cost =
1102
- NumDims *
1103
- (2 * TensorOpCost::AddCost<Index>() + 2 * TensorOpCost::MulCost<Index>() +
1104
- TensorOpCost::DivCost<Index>());
1105
- return TensorOpCost(0, 0, firstIndex_compute_cost, vectorized, PacketSize) +
1106
- kernel_size * (m_inputImpl.costPerCoeff(vectorized) +
1107
- m_kernelImpl.costPerCoeff(vectorized) +
1108
- TensorOpCost(0, 0, convolve_compute_cost, vectorized,
1109
- PacketSize));
1110
- }
1111
-
1112
- private:
1113
- // No assignment (copies are needed by the kernels)
1114
- TensorEvaluator& operator = (const TensorEvaluator&);
1115
-
1116
- TensorEvaluator<InputArgType, GpuDevice> m_inputImpl;
1117
- TensorEvaluator<KernelArgType, GpuDevice> m_kernelImpl;
1118
- KernelArgType m_kernelArg;
1119
- Indices m_indices;
1120
- Dimensions m_dimensions;
1121
- Scalar* m_buf;
1122
- const Scalar* m_kernel;
1123
- bool m_local_kernel;
1124
-
1125
- const GpuDevice& m_device;
1126
- };
1127
- #endif
1128
-
1129
-
1130
- } // end namespace Eigen
1131
-
1132
- #endif // EIGEN_CXX11_TENSOR_TENSOR_CONVOLUTION_H