sequenzo 0.1.18__cp39-cp39-macosx_10_9_universal2.whl → 0.1.20__cp39-cp39-macosx_10_9_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sequenzo might be problematic. Click here for more details.

Files changed (360) hide show
  1. sequenzo/__init__.py +39 -7
  2. sequenzo/big_data/clara/utils/get_weighted_diss.c +157 -157
  3. sequenzo/big_data/clara/utils/get_weighted_diss.cpython-39-darwin.so +0 -0
  4. sequenzo/clustering/KMedoids.py +39 -0
  5. sequenzo/clustering/hierarchical_clustering.py +108 -6
  6. sequenzo/define_sequence_data.py +10 -1
  7. sequenzo/dissimilarity_measures/get_distance_matrix.py +2 -3
  8. sequenzo/dissimilarity_measures/get_substitution_cost_matrix.py +1 -1
  9. sequenzo/dissimilarity_measures/utils/get_sm_trate_substitution_cost_matrix.c +157 -157
  10. sequenzo/dissimilarity_measures/utils/get_sm_trate_substitution_cost_matrix.cpython-39-darwin.so +0 -0
  11. sequenzo/dissimilarity_measures/utils/seqconc.c +157 -157
  12. sequenzo/dissimilarity_measures/utils/seqconc.cpython-39-darwin.so +0 -0
  13. sequenzo/dissimilarity_measures/utils/seqdss.c +157 -157
  14. sequenzo/dissimilarity_measures/utils/seqdss.cpython-39-darwin.so +0 -0
  15. sequenzo/dissimilarity_measures/utils/seqdur.c +157 -157
  16. sequenzo/dissimilarity_measures/utils/seqdur.cpython-39-darwin.so +0 -0
  17. sequenzo/dissimilarity_measures/utils/seqlength.c +157 -157
  18. sequenzo/dissimilarity_measures/utils/seqlength.cpython-39-darwin.so +0 -0
  19. sequenzo/multidomain/cat.py +0 -53
  20. sequenzo/multidomain/dat.py +11 -3
  21. sequenzo/multidomain/idcd.py +0 -3
  22. sequenzo/multidomain/linked_polyad.py +0 -1
  23. sequenzo/openmp_setup.py +233 -0
  24. sequenzo/visualization/plot_transition_matrix.py +21 -22
  25. {sequenzo-0.1.18.dist-info → sequenzo-0.1.20.dist-info}/METADATA +71 -10
  26. sequenzo-0.1.20.dist-info/RECORD +215 -0
  27. sequenzo/dissimilarity_measures/setup.py +0 -35
  28. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Cholesky/LDLT.h +0 -688
  29. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Cholesky/LLT.h +0 -558
  30. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Cholesky/LLT_LAPACKE.h +0 -99
  31. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +0 -682
  32. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +0 -346
  33. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +0 -462
  34. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/ComplexSchur_LAPACKE.h +0 -91
  35. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/EigenSolver.h +0 -622
  36. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h +0 -418
  37. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +0 -226
  38. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +0 -374
  39. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +0 -158
  40. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/RealQZ.h +0 -657
  41. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/RealSchur.h +0 -558
  42. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/RealSchur_LAPACKE.h +0 -77
  43. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +0 -904
  44. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h +0 -87
  45. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +0 -561
  46. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/AlignedBox.h +0 -486
  47. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/AngleAxis.h +0 -247
  48. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/EulerAngles.h +0 -114
  49. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/Homogeneous.h +0 -501
  50. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/Hyperplane.h +0 -282
  51. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/OrthoMethods.h +0 -235
  52. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/ParametrizedLine.h +0 -232
  53. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/Quaternion.h +0 -870
  54. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/Rotation2D.h +0 -199
  55. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/RotationBase.h +0 -206
  56. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/Scaling.h +0 -188
  57. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/Transform.h +0 -1563
  58. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/Translation.h +0 -202
  59. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/Umeyama.h +0 -166
  60. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/arch/Geometry_SIMD.h +0 -168
  61. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Householder/BlockHouseholder.h +0 -110
  62. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Householder/Householder.h +0 -176
  63. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Householder/HouseholderSequence.h +0 -545
  64. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +0 -226
  65. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +0 -212
  66. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +0 -229
  67. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +0 -394
  68. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +0 -453
  69. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +0 -444
  70. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +0 -198
  71. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +0 -117
  72. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Jacobi/Jacobi.h +0 -483
  73. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/KLUSupport/KLUSupport.h +0 -358
  74. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/LU/Determinant.h +0 -117
  75. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/LU/FullPivLU.h +0 -877
  76. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/LU/InverseImpl.h +0 -432
  77. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/LU/PartialPivLU.h +0 -624
  78. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/LU/PartialPivLU_LAPACKE.h +0 -83
  79. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/LU/arch/InverseSize4.h +0 -351
  80. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/MetisSupport/MetisSupport.h +0 -137
  81. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/OrderingMethods/Amd.h +0 -435
  82. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +0 -1863
  83. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/OrderingMethods/Ordering.h +0 -153
  84. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +0 -678
  85. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +0 -545
  86. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/QR/ColPivHouseholderQR.h +0 -674
  87. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/QR/ColPivHouseholderQR_LAPACKE.h +0 -97
  88. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +0 -635
  89. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/QR/FullPivHouseholderQR.h +0 -713
  90. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/QR/HouseholderQR.h +0 -434
  91. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/QR/HouseholderQR_LAPACKE.h +0 -68
  92. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +0 -335
  93. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SVD/BDCSVD.h +0 -1366
  94. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SVD/JacobiSVD.h +0 -812
  95. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SVD/JacobiSVD_LAPACKE.h +0 -91
  96. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SVD/SVDBase.h +0 -376
  97. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SVD/UpperBidiagonalization.h +0 -414
  98. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +0 -697
  99. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +0 -174
  100. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/AmbiVector.h +0 -378
  101. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/CompressedStorage.h +0 -274
  102. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +0 -352
  103. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/MappedSparseMatrix.h +0 -67
  104. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseAssign.h +0 -270
  105. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseBlock.h +0 -571
  106. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseColEtree.h +0 -206
  107. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +0 -370
  108. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +0 -722
  109. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +0 -150
  110. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +0 -342
  111. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseDiagonalProduct.h +0 -138
  112. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseDot.h +0 -98
  113. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseFuzzy.h +0 -29
  114. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseMap.h +0 -305
  115. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseMatrix.h +0 -1518
  116. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +0 -398
  117. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparsePermutation.h +0 -178
  118. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseProduct.h +0 -181
  119. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseRedux.h +0 -49
  120. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseRef.h +0 -397
  121. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +0 -659
  122. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseSolverBase.h +0 -124
  123. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseSparseProductWithPruning.h +0 -198
  124. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseTranspose.h +0 -92
  125. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseTriangularView.h +0 -189
  126. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseUtil.h +0 -186
  127. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseVector.h +0 -478
  128. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseView.h +0 -254
  129. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/TriangularSolver.h +0 -315
  130. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU.h +0 -923
  131. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLUImpl.h +0 -66
  132. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +0 -226
  133. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_Structs.h +0 -110
  134. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +0 -375
  135. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_Utils.h +0 -80
  136. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_column_bmod.h +0 -181
  137. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +0 -179
  138. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h +0 -107
  139. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +0 -280
  140. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h +0 -126
  141. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_kernel_bmod.h +0 -130
  142. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +0 -223
  143. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_panel_dfs.h +0 -258
  144. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_pivotL.h +0 -137
  145. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_pruneL.h +0 -136
  146. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_relax_snode.h +0 -83
  147. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseQR/SparseQR.h +0 -758
  148. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/StlSupport/StdDeque.h +0 -116
  149. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/StlSupport/StdList.h +0 -106
  150. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/StlSupport/StdVector.h +0 -131
  151. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/StlSupport/details.h +0 -84
  152. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +0 -1025
  153. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +0 -642
  154. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/misc/Image.h +0 -82
  155. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/misc/Kernel.h +0 -79
  156. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/misc/RealSvd2x2.h +0 -55
  157. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/misc/blas.h +0 -440
  158. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/misc/lapack.h +0 -152
  159. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/misc/lapacke.h +0 -16292
  160. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/misc/lapacke_mangling.h +0 -17
  161. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +0 -358
  162. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +0 -696
  163. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/plugins/BlockMethods.h +0 -1442
  164. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.h +0 -115
  165. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.h +0 -177
  166. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/plugins/IndexedViewMethods.h +0 -262
  167. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +0 -152
  168. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +0 -95
  169. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/plugins/ReshapedMethods.h +0 -149
  170. sequenzo/dissimilarity_measures/src/eigen/blas/BandTriangularSolver.h +0 -97
  171. sequenzo/dissimilarity_measures/src/eigen/blas/GeneralRank1Update.h +0 -44
  172. sequenzo/dissimilarity_measures/src/eigen/blas/PackedSelfadjointProduct.h +0 -53
  173. sequenzo/dissimilarity_measures/src/eigen/blas/PackedTriangularMatrixVector.h +0 -79
  174. sequenzo/dissimilarity_measures/src/eigen/blas/PackedTriangularSolverVector.h +0 -88
  175. sequenzo/dissimilarity_measures/src/eigen/blas/Rank2Update.h +0 -57
  176. sequenzo/dissimilarity_measures/src/eigen/blas/common.h +0 -175
  177. sequenzo/dissimilarity_measures/src/eigen/blas/f2c/datatypes.h +0 -24
  178. sequenzo/dissimilarity_measures/src/eigen/blas/level1_cplx_impl.h +0 -155
  179. sequenzo/dissimilarity_measures/src/eigen/blas/level1_impl.h +0 -144
  180. sequenzo/dissimilarity_measures/src/eigen/blas/level1_real_impl.h +0 -122
  181. sequenzo/dissimilarity_measures/src/eigen/blas/level2_cplx_impl.h +0 -360
  182. sequenzo/dissimilarity_measures/src/eigen/blas/level2_impl.h +0 -553
  183. sequenzo/dissimilarity_measures/src/eigen/blas/level2_real_impl.h +0 -306
  184. sequenzo/dissimilarity_measures/src/eigen/blas/level3_impl.h +0 -702
  185. sequenzo/dissimilarity_measures/src/eigen/debug/gdb/__init__.py +0 -1
  186. sequenzo/dissimilarity_measures/src/eigen/debug/gdb/printers.py +0 -314
  187. sequenzo/dissimilarity_measures/src/eigen/lapack/lapack_common.h +0 -29
  188. sequenzo/dissimilarity_measures/src/eigen/scripts/relicense.py +0 -69
  189. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/Tensor.h +0 -554
  190. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorArgMax.h +0 -329
  191. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h +0 -247
  192. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h +0 -1176
  193. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h +0 -1559
  194. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h +0 -1093
  195. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h +0 -518
  196. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h +0 -377
  197. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h +0 -1023
  198. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionBlocking.h +0 -73
  199. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionCuda.h +0 -6
  200. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionGpu.h +0 -1413
  201. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionMapper.h +0 -575
  202. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionSycl.h +0 -1650
  203. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h +0 -1679
  204. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h +0 -456
  205. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h +0 -1132
  206. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorConvolutionSycl.h +0 -544
  207. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorCostModel.h +0 -214
  208. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h +0 -347
  209. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h +0 -137
  210. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h +0 -6
  211. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h +0 -104
  212. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceGpu.h +0 -389
  213. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h +0 -1048
  214. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h +0 -409
  215. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDimensionList.h +0 -236
  216. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h +0 -490
  217. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h +0 -236
  218. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h +0 -983
  219. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h +0 -703
  220. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h +0 -388
  221. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h +0 -669
  222. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorFixedSize.h +0 -379
  223. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h +0 -237
  224. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h +0 -191
  225. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h +0 -488
  226. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h +0 -302
  227. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorGlobalFunctions.h +0 -33
  228. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorGpuHipCudaDefines.h +0 -99
  229. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorGpuHipCudaUndefines.h +0 -44
  230. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h +0 -79
  231. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h +0 -603
  232. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h +0 -738
  233. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorInflation.h +0 -247
  234. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorInitializer.h +0 -82
  235. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h +0 -263
  236. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorLayoutSwap.h +0 -216
  237. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorMacros.h +0 -98
  238. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h +0 -327
  239. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h +0 -311
  240. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h +0 -1102
  241. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h +0 -708
  242. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorPatch.h +0 -291
  243. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorRandom.h +0 -322
  244. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h +0 -998
  245. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h +0 -6
  246. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorReductionGpu.h +0 -966
  247. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorReductionSycl.h +0 -582
  248. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorRef.h +0 -454
  249. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h +0 -465
  250. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h +0 -528
  251. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorScanSycl.h +0 -513
  252. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h +0 -471
  253. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorStorage.h +0 -161
  254. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorStriding.h +0 -346
  255. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorTrace.h +0 -303
  256. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h +0 -264
  257. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorUInt128.h +0 -249
  258. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorVolumePatch.h +0 -629
  259. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/TensorSymmetry/DynamicSymmetry.h +0 -293
  260. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/TensorSymmetry/StaticSymmetry.h +0 -236
  261. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/TensorSymmetry/Symmetry.h +0 -338
  262. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/TensorSymmetry/util/TemplateGroupTheory.h +0 -669
  263. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/ThreadPool/Barrier.h +0 -67
  264. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/ThreadPool/EventCount.h +0 -249
  265. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h +0 -486
  266. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/ThreadPool/RunQueue.h +0 -236
  267. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/ThreadPool/ThreadCancel.h +0 -23
  268. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/ThreadPool/ThreadEnvironment.h +0 -40
  269. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/ThreadPool/ThreadLocal.h +0 -301
  270. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/ThreadPool/ThreadPoolInterface.h +0 -48
  271. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/ThreadPool/ThreadYield.h +0 -20
  272. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/util/CXX11Meta.h +0 -537
  273. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/util/CXX11Workarounds.h +0 -88
  274. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/util/EmulateArray.h +0 -261
  275. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/util/MaxSizeVector.h +0 -158
  276. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/AutoDiff/AutoDiffJacobian.h +0 -108
  277. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h +0 -730
  278. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/AutoDiff/AutoDiffVector.h +0 -220
  279. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/BVH/BVAlgorithms.h +0 -293
  280. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/BVH/KdBVH.h +0 -223
  281. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/Eigenvalues/ArpackSelfAdjointEigenSolver.h +0 -790
  282. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/EulerAngles/EulerAngles.h +0 -355
  283. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/EulerAngles/EulerSystem.h +0 -305
  284. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/FFT/ei_fftw_impl.h +0 -261
  285. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/FFT/ei_kissfft_impl.h +0 -449
  286. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/IterativeSolvers/ConstrainedConjGrad.h +0 -187
  287. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/IterativeSolvers/DGMRES.h +0 -511
  288. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/IterativeSolvers/GMRES.h +0 -335
  289. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/IterativeSolvers/IDRS.h +0 -436
  290. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/IterativeSolvers/IncompleteLU.h +0 -90
  291. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/IterativeSolvers/IterationController.h +0 -154
  292. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/IterativeSolvers/MINRES.h +0 -267
  293. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/IterativeSolvers/Scaling.h +0 -193
  294. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/KroneckerProduct/KroneckerTensorProduct.h +0 -305
  295. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/LevenbergMarquardt/LMcovar.h +0 -84
  296. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/LevenbergMarquardt/LMonestep.h +0 -202
  297. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/LevenbergMarquardt/LMpar.h +0 -160
  298. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/LevenbergMarquardt/LMqrsolv.h +0 -188
  299. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/LevenbergMarquardt/LevenbergMarquardt.h +0 -396
  300. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/MatrixFunctions/MatrixExponential.h +0 -441
  301. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/MatrixFunctions/MatrixFunction.h +0 -569
  302. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/MatrixFunctions/MatrixLogarithm.h +0 -373
  303. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/MatrixFunctions/MatrixPower.h +0 -705
  304. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/MatrixFunctions/MatrixSquareRoot.h +0 -368
  305. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/MatrixFunctions/StemFunction.h +0 -117
  306. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/MoreVectorization/MathFunctions.h +0 -95
  307. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/NonLinearOptimization/HybridNonLinearSolver.h +0 -601
  308. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/NonLinearOptimization/LevenbergMarquardt.h +0 -657
  309. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/NonLinearOptimization/chkder.h +0 -66
  310. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/NonLinearOptimization/covar.h +0 -70
  311. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/NonLinearOptimization/dogleg.h +0 -107
  312. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/NonLinearOptimization/fdjac1.h +0 -79
  313. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/NonLinearOptimization/lmpar.h +0 -298
  314. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/NonLinearOptimization/qrsolv.h +0 -91
  315. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/NonLinearOptimization/r1mpyq.h +0 -30
  316. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/NonLinearOptimization/r1updt.h +0 -99
  317. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/NonLinearOptimization/rwupdt.h +0 -49
  318. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/NumericalDiff/NumericalDiff.h +0 -130
  319. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/Polynomials/Companion.h +0 -280
  320. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/Polynomials/PolynomialSolver.h +0 -428
  321. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/Polynomials/PolynomialUtils.h +0 -143
  322. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/Skyline/SkylineInplaceLU.h +0 -352
  323. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/Skyline/SkylineMatrix.h +0 -862
  324. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/Skyline/SkylineMatrixBase.h +0 -212
  325. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/Skyline/SkylineProduct.h +0 -295
  326. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/Skyline/SkylineStorage.h +0 -259
  327. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/Skyline/SkylineUtil.h +0 -89
  328. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SparseExtra/BlockOfDynamicSparseMatrix.h +0 -122
  329. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SparseExtra/BlockSparseMatrix.h +0 -1079
  330. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SparseExtra/DynamicSparseMatrix.h +0 -404
  331. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SparseExtra/MarketIO.h +0 -282
  332. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SparseExtra/MatrixMarketIterator.h +0 -247
  333. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SparseExtra/RandomSetter.h +0 -349
  334. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsArrayAPI.h +0 -286
  335. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsBFloat16.h +0 -68
  336. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsFunctors.h +0 -357
  337. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsHalf.h +0 -66
  338. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsImpl.h +0 -1959
  339. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsPacketMath.h +0 -118
  340. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/HipVectorCompatibility.h +0 -67
  341. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsArrayAPI.h +0 -167
  342. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsBFloat16.h +0 -58
  343. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsFunctors.h +0 -330
  344. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsHalf.h +0 -58
  345. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h +0 -2045
  346. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsPacketMath.h +0 -79
  347. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/arch/AVX/BesselFunctions.h +0 -46
  348. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/arch/AVX/SpecialFunctions.h +0 -16
  349. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/arch/AVX512/BesselFunctions.h +0 -46
  350. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/arch/AVX512/SpecialFunctions.h +0 -16
  351. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/arch/GPU/SpecialFunctions.h +0 -369
  352. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/arch/NEON/BesselFunctions.h +0 -54
  353. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/arch/NEON/SpecialFunctions.h +0 -34
  354. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/Splines/Spline.h +0 -507
  355. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/Splines/SplineFitting.h +0 -431
  356. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/Splines/SplineFwd.h +0 -93
  357. sequenzo-0.1.18.dist-info/RECORD +0 -544
  358. {sequenzo-0.1.18.dist-info → sequenzo-0.1.20.dist-info}/WHEEL +0 -0
  359. {sequenzo-0.1.18.dist-info → sequenzo-0.1.20.dist-info}/licenses/LICENSE +0 -0
  360. {sequenzo-0.1.18.dist-info → sequenzo-0.1.20.dist-info}/top_level.txt +0 -0
@@ -1,1132 +0,0 @@
1
- // This file is part of Eigen, a lightweight C++ template library
2
- // for linear algebra.
3
- //
4
- // Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
5
- //
6
- // This Source Code Form is subject to the terms of the Mozilla
7
- // Public License v. 2.0. If a copy of the MPL was not distributed
8
- // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9
-
10
- #ifndef EIGEN_CXX11_TENSOR_TENSOR_CONVOLUTION_H
11
- #define EIGEN_CXX11_TENSOR_TENSOR_CONVOLUTION_H
12
-
13
- namespace Eigen {
14
-
15
- /** \class TensorConvolution
16
- * \ingroup CXX11_Tensor_Module
17
- *
18
- * \brief Tensor convolution class.
19
- *
20
- *
21
- */
22
- namespace internal {
23
-
24
- template <typename Index, typename InputDims, int NumKernelDims, int Layout>
25
- class IndexMapper {
26
- public:
27
- IndexMapper(const InputDims& input_dims, const array<Index, NumKernelDims>& kernel_dims,
28
- const array<Index, NumKernelDims>& indices) {
29
-
30
- array<Index, NumDims> dimensions = input_dims;
31
- for (int i = 0; i < NumKernelDims; ++i) {
32
- const Index index = indices[i];
33
- const Index input_dim = input_dims[index];
34
- const Index kernel_dim = kernel_dims[i];
35
- const Index result_dim = input_dim - kernel_dim + 1;
36
- dimensions[index] = result_dim;
37
- }
38
-
39
- array<Index, NumDims> inputStrides;
40
- array<Index, NumDims> outputStrides;
41
- if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
42
- inputStrides[0] = 1;
43
- outputStrides[0] = 1;
44
- for (int i = 1; i < NumDims; ++i) {
45
- inputStrides[i] = inputStrides[i-1] * input_dims[i-1];
46
- outputStrides[i] = outputStrides[i-1] * dimensions[i-1];
47
- }
48
- } else {
49
- inputStrides[NumDims - 1] = 1;
50
- outputStrides[NumDims - 1] = 1;
51
- for (int i = static_cast<int>(NumDims) - 2; i >= 0; --i) {
52
- inputStrides[i] = inputStrides[i + 1] * input_dims[i + 1];
53
- outputStrides[i] = outputStrides[i + 1] * dimensions[i + 1];
54
- }
55
- }
56
-
57
- array<Index, NumDims> gpuInputDimensions;
58
- array<Index, NumDims> gpuOutputDimensions;
59
- array<Index, NumDims> tmp = dimensions;
60
- array<Index, NumDims> ordering;
61
- const size_t offset = static_cast<int>(Layout) == static_cast<int>(ColMajor)
62
- ? 0
63
- : NumDims - NumKernelDims;
64
- for (int i = 0; i < NumKernelDims; ++i) {
65
- const Index index = i + offset;
66
- ordering[index] = indices[i];
67
- tmp[indices[i]] = -1;
68
- gpuInputDimensions[index] = input_dims[indices[i]];
69
- gpuOutputDimensions[index] = dimensions[indices[i]];
70
- }
71
-
72
- int written = static_cast<int>(Layout) == static_cast<int>(ColMajor)
73
- ? NumKernelDims
74
- : 0;
75
- for (int i = 0; i < NumDims; ++i) {
76
- if (tmp[i] >= 0) {
77
- ordering[written] = i;
78
- gpuInputDimensions[written] = input_dims[i];
79
- gpuOutputDimensions[written] = dimensions[i];
80
- ++written;
81
- }
82
- }
83
-
84
- for (int i = 0; i < NumDims; ++i) {
85
- m_inputStrides[i] = inputStrides[ordering[i]];
86
- m_outputStrides[i] = outputStrides[ordering[i]];
87
- }
88
-
89
- if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
90
- for (int i = 0; i < NumDims; ++i) {
91
- if (i > NumKernelDims) {
92
- m_gpuInputStrides[i] =
93
- m_gpuInputStrides[i - 1] * gpuInputDimensions[i - 1];
94
- m_gpuOutputStrides[i] =
95
- m_gpuOutputStrides[i - 1] * gpuOutputDimensions[i - 1];
96
- } else {
97
- m_gpuInputStrides[i] = 1;
98
- m_gpuOutputStrides[i] = 1;
99
- }
100
- }
101
- } else {
102
- for (int i = NumDims - 1; i >= 0; --i) {
103
- if (static_cast<size_t>(i + 1) < offset) {
104
- m_gpuInputStrides[i] =
105
- m_gpuInputStrides[i + 1] * gpuInputDimensions[i + 1];
106
- m_gpuOutputStrides[i] =
107
- m_gpuOutputStrides[i + 1] * gpuOutputDimensions[i + 1];
108
- } else {
109
- m_gpuInputStrides[i] = 1;
110
- m_gpuOutputStrides[i] = 1;
111
- }
112
- }
113
- }
114
- }
115
-
116
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Index mapGpuInputPlaneToTensorInputOffset(Index p) const {
117
- Index inputIndex = 0;
118
- if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
119
- for (int d = NumDims - 1; d > NumKernelDims; --d) {
120
- const Index idx = p / m_gpuInputStrides[d];
121
- inputIndex += idx * m_inputStrides[d];
122
- p -= idx * m_gpuInputStrides[d];
123
- }
124
- inputIndex += p * m_inputStrides[NumKernelDims];
125
- } else {
126
- std::ptrdiff_t limit = 0;
127
- if (NumKernelDims < NumDims) {
128
- limit = NumDims - NumKernelDims - 1;
129
- }
130
- for (int d = 0; d < limit; ++d) {
131
- const Index idx = p / m_gpuInputStrides[d];
132
- inputIndex += idx * m_inputStrides[d];
133
- p -= idx * m_gpuInputStrides[d];
134
- }
135
- inputIndex += p * m_inputStrides[limit];
136
- }
137
- return inputIndex;
138
- }
139
-
140
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Index mapGpuOutputPlaneToTensorOutputOffset(Index p) const {
141
- Index outputIndex = 0;
142
- if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
143
- for (int d = NumDims - 1; d > NumKernelDims; --d) {
144
- const Index idx = p / m_gpuOutputStrides[d];
145
- outputIndex += idx * m_outputStrides[d];
146
- p -= idx * m_gpuOutputStrides[d];
147
- }
148
- outputIndex += p * m_outputStrides[NumKernelDims];
149
- } else {
150
- std::ptrdiff_t limit = 0;
151
- if (NumKernelDims < NumDims) {
152
- limit = NumDims - NumKernelDims - 1;
153
- }
154
- for (int d = 0; d < limit; ++d) {
155
- const Index idx = p / m_gpuOutputStrides[d];
156
- outputIndex += idx * m_outputStrides[d];
157
- p -= idx * m_gpuOutputStrides[d];
158
- }
159
- outputIndex += p * m_outputStrides[limit];
160
- }
161
- return outputIndex;
162
- }
163
-
164
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Index mapGpuInputKernelToTensorInputOffset(Index i) const {
165
- const size_t offset = static_cast<int>(Layout) == static_cast<int>(ColMajor)
166
- ? 0
167
- : NumDims - NumKernelDims;
168
- return i * m_inputStrides[offset];
169
- }
170
-
171
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Index mapGpuOutputKernelToTensorOutputOffset(Index i) const {
172
- const size_t offset = static_cast<int>(Layout) == static_cast<int>(ColMajor)
173
- ? 0
174
- : NumDims - NumKernelDims;
175
- return i * m_outputStrides[offset];
176
- }
177
-
178
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Index mapGpuInputKernelToTensorInputOffset(Index i, Index j) const {
179
- const size_t offset = static_cast<int>(Layout) == static_cast<int>(ColMajor)
180
- ? 0
181
- : NumDims - NumKernelDims;
182
- return i * m_inputStrides[offset] + j * m_inputStrides[offset + 1];
183
- }
184
-
185
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Index mapGpuOutputKernelToTensorOutputOffset(Index i, Index j) const {
186
- const size_t offset = static_cast<int>(Layout) == static_cast<int>(ColMajor)
187
- ? 0
188
- : NumDims - NumKernelDims;
189
- return i * m_outputStrides[offset] + j * m_outputStrides[offset + 1];
190
- }
191
-
192
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Index mapGpuInputKernelToTensorInputOffset(Index i, Index j, Index k) const {
193
- const size_t offset = static_cast<int>(Layout) == static_cast<int>(ColMajor)
194
- ? 0
195
- : NumDims - NumKernelDims;
196
- return i * m_inputStrides[offset] + j * m_inputStrides[offset + 1] +
197
- k * m_inputStrides[offset + 2];
198
- }
199
-
200
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Index mapGpuOutputKernelToTensorOutputOffset(Index i, Index j, Index k) const {
201
- const size_t offset = static_cast<int>(Layout) == static_cast<int>(ColMajor)
202
- ? 0
203
- : NumDims - NumKernelDims;
204
- return i * m_outputStrides[offset] + j * m_outputStrides[offset + 1] +
205
- k * m_outputStrides[offset + 2];
206
- }
207
-
208
- private:
209
- static const int NumDims = internal::array_size<InputDims>::value;
210
- array<Index, NumDims> m_inputStrides;
211
- array<Index, NumDims> m_outputStrides;
212
- array<Index, NumDims> m_gpuInputStrides;
213
- array<Index, NumDims> m_gpuOutputStrides;
214
- };
215
-
216
-
217
-
218
- template<typename Dimensions, typename InputXprType, typename KernelXprType>
219
- struct traits<TensorConvolutionOp<Dimensions, InputXprType, KernelXprType> >
220
- {
221
- // Type promotion to handle the case where the types of the lhs and the rhs are different.
222
- typedef typename promote_storage_type<typename InputXprType::Scalar,
223
- typename KernelXprType::Scalar>::ret Scalar;
224
- typedef typename promote_storage_type<typename traits<InputXprType>::StorageKind,
225
- typename traits<KernelXprType>::StorageKind>::ret StorageKind;
226
- typedef typename promote_index_type<typename traits<InputXprType>::Index,
227
- typename traits<KernelXprType>::Index>::type Index;
228
- typedef typename InputXprType::Nested LhsNested;
229
- typedef typename KernelXprType::Nested RhsNested;
230
- typedef typename remove_reference<LhsNested>::type _LhsNested;
231
- typedef typename remove_reference<RhsNested>::type _RhsNested;
232
- static const int NumDimensions = traits<InputXprType>::NumDimensions;
233
- static const int Layout = traits<InputXprType>::Layout;
234
- typedef typename conditional<Pointer_type_promotion<typename InputXprType::Scalar, Scalar>::val,
235
- typename traits<InputXprType>::PointerType, typename traits<KernelXprType>::PointerType>::type PointerType;
236
-
237
- enum {
238
- Flags = 0
239
- };
240
- };
241
-
242
- template<typename Dimensions, typename InputXprType, typename KernelXprType>
243
- struct eval<TensorConvolutionOp<Dimensions, InputXprType, KernelXprType>, Eigen::Dense>
244
- {
245
- typedef const TensorConvolutionOp<Dimensions, InputXprType, KernelXprType>& type;
246
- };
247
-
248
- template<typename Dimensions, typename InputXprType, typename KernelXprType>
249
- struct nested<TensorConvolutionOp<Dimensions, InputXprType, KernelXprType>, 1, typename eval<TensorConvolutionOp<Dimensions, InputXprType, KernelXprType> >::type>
250
- {
251
- typedef TensorConvolutionOp<Dimensions, InputXprType, KernelXprType> type;
252
- };
253
-
254
- } // end namespace internal
255
-
256
-
257
-
258
- template<typename Indices, typename InputXprType, typename KernelXprType>
259
- class TensorConvolutionOp : public TensorBase<TensorConvolutionOp<Indices, InputXprType, KernelXprType>, ReadOnlyAccessors>
260
- {
261
- public:
262
- typedef typename Eigen::internal::traits<TensorConvolutionOp>::Scalar Scalar;
263
- typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
264
- typedef typename internal::promote_storage_type<typename InputXprType::CoeffReturnType,
265
- typename KernelXprType::CoeffReturnType>::ret CoeffReturnType;
266
- typedef typename Eigen::internal::nested<TensorConvolutionOp>::type Nested;
267
- typedef typename Eigen::internal::traits<TensorConvolutionOp>::StorageKind StorageKind;
268
- typedef typename Eigen::internal::traits<TensorConvolutionOp>::Index Index;
269
-
270
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorConvolutionOp(const InputXprType& input, const KernelXprType& kernel, const Indices& dims)
271
- : m_input_xpr(input), m_kernel_xpr(kernel), m_indices(dims) {}
272
-
273
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
274
- const Indices& indices() const { return m_indices; }
275
-
276
- /** \returns the nested expressions */
277
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
278
- const typename internal::remove_all<typename InputXprType::Nested>::type&
279
- inputExpression() const { return m_input_xpr; }
280
-
281
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
282
- const typename internal::remove_all<typename KernelXprType::Nested>::type&
283
- kernelExpression() const { return m_kernel_xpr; }
284
-
285
- protected:
286
- typename InputXprType::Nested m_input_xpr;
287
- typename KernelXprType::Nested m_kernel_xpr;
288
- const Indices m_indices;
289
- };
290
-
291
-
292
- template<typename Indices, typename InputArgType, typename KernelArgType, typename Device>
293
- struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelArgType>, Device>
294
- {
295
- typedef TensorConvolutionOp<Indices, InputArgType, KernelArgType> XprType;
296
-
297
- static const int NumDims = internal::array_size<typename TensorEvaluator<InputArgType, Device>::Dimensions>::value;
298
- static const int NumKernelDims = internal::array_size<Indices>::value;
299
- typedef typename XprType::Index Index;
300
- typedef DSizes<Index, NumDims> Dimensions;
301
-
302
- typedef typename XprType::Scalar Scalar;
303
- typedef typename XprType::CoeffReturnType CoeffReturnType;
304
- typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
305
- static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
306
- typedef StorageMemory<Scalar, Device> Storage;
307
- typedef typename Storage::Type EvaluatorPointerType;
308
-
309
- enum {
310
- IsAligned = int(TensorEvaluator<InputArgType, Device>::IsAligned) & int(TensorEvaluator<KernelArgType, Device>::IsAligned),
311
- PacketAccess = int(TensorEvaluator<InputArgType, Device>::PacketAccess) & int(TensorEvaluator<KernelArgType, Device>::PacketAccess),
312
- BlockAccess = false,
313
- PreferBlockAccess = false,
314
- Layout = TensorEvaluator<InputArgType, Device>::Layout,
315
- CoordAccess = false, // to be implemented
316
- RawAccess = false
317
- };
318
-
319
- //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
320
- typedef internal::TensorBlockNotImplemented TensorBlock;
321
- //===--------------------------------------------------------------------===//
322
-
323
- EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
324
- : m_inputImpl(op.inputExpression(), device), m_kernelImpl(op.kernelExpression(), device), m_kernelArg(op.kernelExpression()), m_kernel(NULL), m_local_kernel(false), m_device(device)
325
- {
326
- EIGEN_STATIC_ASSERT((static_cast<int>(TensorEvaluator<InputArgType, Device>::Layout) == static_cast<int>(TensorEvaluator<KernelArgType, Device>::Layout)), YOU_MADE_A_PROGRAMMING_MISTAKE);
327
-
328
- const typename TensorEvaluator<InputArgType, Device>::Dimensions& input_dims = m_inputImpl.dimensions();
329
- const typename TensorEvaluator<KernelArgType, Device>::Dimensions& kernel_dims = m_kernelImpl.dimensions();
330
-
331
- if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
332
- m_inputStride[0] = 1;
333
- for (int i = 1; i < NumDims; ++i) {
334
- m_inputStride[i] = m_inputStride[i - 1] * input_dims[i - 1];
335
- }
336
- } else {
337
- m_inputStride[NumDims - 1] = 1;
338
- for (int i = NumDims - 2; i >= 0; --i) {
339
- m_inputStride[i] = m_inputStride[i + 1] * input_dims[i + 1];
340
- }
341
- }
342
-
343
- m_dimensions = m_inputImpl.dimensions();
344
- if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
345
- for (int i = 0; i < NumKernelDims; ++i) {
346
- const Index index = op.indices()[i];
347
- const Index input_dim = input_dims[index];
348
- const Index kernel_dim = kernel_dims[i];
349
- const Index result_dim = input_dim - kernel_dim + 1;
350
- m_dimensions[index] = result_dim;
351
- if (i > 0) {
352
- m_kernelStride[i] = m_kernelStride[i - 1] * kernel_dims[i - 1];
353
- } else {
354
- m_kernelStride[0] = 1;
355
- }
356
- m_indexStride[i] = m_inputStride[index];
357
- }
358
-
359
- m_outputStride[0] = 1;
360
- for (int i = 1; i < NumDims; ++i) {
361
- m_outputStride[i] = m_outputStride[i - 1] * m_dimensions[i - 1];
362
- }
363
- } else {
364
- for (int i = NumKernelDims - 1; i >= 0; --i) {
365
- const Index index = op.indices()[i];
366
- const Index input_dim = input_dims[index];
367
- const Index kernel_dim = kernel_dims[i];
368
- const Index result_dim = input_dim - kernel_dim + 1;
369
- m_dimensions[index] = result_dim;
370
- if (i < NumKernelDims - 1) {
371
- m_kernelStride[i] = m_kernelStride[i + 1] * kernel_dims[i + 1];
372
- } else {
373
- m_kernelStride[NumKernelDims - 1] = 1;
374
- }
375
- m_indexStride[i] = m_inputStride[index];
376
- }
377
-
378
- m_outputStride[NumDims - 1] = 1;
379
- for (int i = NumDims - 2; i >= 0; --i) {
380
- m_outputStride[i] = m_outputStride[i + 1] * m_dimensions[i + 1];
381
- }
382
- }
383
- }
384
-
385
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
386
-
387
- EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar*) {
388
- m_inputImpl.evalSubExprsIfNeeded(NULL);
389
- preloadKernel();
390
- return true;
391
- }
392
- EIGEN_STRONG_INLINE void cleanup() {
393
- m_inputImpl.cleanup();
394
- if (m_local_kernel) {
395
- m_device.deallocate((void*)m_kernel);
396
- m_local_kernel = false;
397
- }
398
- m_kernel = NULL;
399
- }
400
-
401
- void evalTo(typename XprType::Scalar* buffer) {
402
- evalSubExprsIfNeeded(NULL);
403
- for (int i = 0; i < dimensions().TotalSize(); ++i) {
404
- buffer[i] += coeff(i);
405
- }
406
- cleanup();
407
- }
408
-
409
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
410
- {
411
- CoeffReturnType result = CoeffReturnType(0);
412
- convolve(firstInput(index), 0, NumKernelDims-1, result);
413
- return result;
414
- }
415
-
416
- template<int LoadMode>
417
- EIGEN_DEVICE_FUNC PacketReturnType packet(const Index index) const
418
- {
419
- Index indices[2] = {index, index+PacketSize-1};
420
- Index startInputs[2] = {0, 0};
421
- if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
422
- for (int i = NumDims - 1; i > 0; --i) {
423
- const Index idx0 = indices[0] / m_outputStride[i];
424
- const Index idx1 = indices[1] / m_outputStride[i];
425
- startInputs[0] += idx0 * m_inputStride[i];
426
- startInputs[1] += idx1 * m_inputStride[i];
427
- indices[0] -= idx0 * m_outputStride[i];
428
- indices[1] -= idx1 * m_outputStride[i];
429
- }
430
- } else {
431
- for (int i = 0; i < NumDims - 1; ++i) {
432
- const Index idx0 = indices[0] / m_outputStride[i];
433
- const Index idx1 = indices[1] / m_outputStride[i];
434
- startInputs[0] += idx0 * m_inputStride[i];
435
- startInputs[1] += idx1 * m_inputStride[i];
436
- indices[0] -= idx0 * m_outputStride[i];
437
- indices[1] -= idx1 * m_outputStride[i];
438
- }
439
- }
440
- startInputs[0] += indices[0];
441
- startInputs[1] += indices[1];
442
-
443
- if (startInputs[1]-startInputs[0] == PacketSize-1) {
444
- PacketReturnType result = internal::pset1<PacketReturnType>(0);
445
- convolvePacket(startInputs[0], 0, NumKernelDims-1, result);
446
- return result;
447
- } else {
448
- EIGEN_ALIGN_MAX Scalar data[PacketSize];
449
- data[0] = Scalar(0);
450
- convolve(startInputs[0], 0, NumKernelDims-1, data[0]);
451
- for (int i = 1; i < PacketSize-1; ++i) {
452
- data[i] = Scalar(0);
453
- convolve(firstInput(index+i), 0, NumKernelDims-1, data[i]);
454
- }
455
- data[PacketSize-1] = Scalar(0);
456
- convolve(startInputs[1], 0, NumKernelDims-1, data[PacketSize-1]);
457
- return internal::pload<PacketReturnType>(data);
458
- }
459
- }
460
-
461
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost
462
- costPerCoeff(bool vectorized) const {
463
- const double kernel_size = m_kernelImpl.dimensions().TotalSize();
464
- // We ignore the use of fused multiply-add.
465
- const double convolve_compute_cost =
466
- TensorOpCost::AddCost<Scalar>() + TensorOpCost::MulCost<Scalar>();
467
- const double firstIndex_compute_cost =
468
- NumDims *
469
- (2 * TensorOpCost::AddCost<Index>() + 2 * TensorOpCost::MulCost<Index>() +
470
- TensorOpCost::DivCost<Index>());
471
- return TensorOpCost(0, 0, firstIndex_compute_cost, vectorized, PacketSize) +
472
- kernel_size * (m_inputImpl.costPerCoeff(vectorized) +
473
- m_kernelImpl.costPerCoeff(vectorized) +
474
- TensorOpCost(0, 0, convolve_compute_cost, vectorized,
475
- PacketSize));
476
- }
477
-
478
- EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return NULL; }
479
-
480
- private:
481
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index firstInput(Index index) const {
482
- Index startInput = 0;
483
- if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
484
- for (int i = NumDims - 1; i > 0; --i) {
485
- const Index idx = index / m_outputStride[i];
486
- startInput += idx * m_inputStride[i];
487
- index -= idx * m_outputStride[i];
488
- }
489
- } else {
490
- for (int i = 0; i < NumDims - 1; ++i) {
491
- const Index idx = index / m_outputStride[i];
492
- startInput += idx * m_inputStride[i];
493
- index -= idx * m_outputStride[i];
494
- }
495
- }
496
- startInput += index;
497
- return startInput;
498
- }
499
-
500
- EIGEN_DEVICE_FUNC void convolve(Index firstIndex, Index firstKernel, int DimIndex, CoeffReturnType& accum) const {
501
- for (int j = 0; j < m_kernelImpl.dimensions()[DimIndex]; ++j) {
502
- const Index input = firstIndex + j * m_indexStride[DimIndex];
503
- const Index kernel = firstKernel + j * m_kernelStride[DimIndex];
504
- if (DimIndex > 0) {
505
- convolve(input, kernel, DimIndex-1, accum);
506
- } else {
507
- accum += m_inputImpl.coeff(input) * m_kernel[kernel];
508
- }
509
- }
510
- }
511
-
512
- template <typename Packet>
513
- EIGEN_DEVICE_FUNC void convolvePacket(Index firstIndex, Index firstKernel, int DimIndex, Packet& accum) const {
514
- for (int j = 0; j < m_kernelImpl.dimensions()[DimIndex]; ++j) {
515
- const Index input = firstIndex + j * m_indexStride[DimIndex];
516
- const Index kernel = firstKernel + j * m_kernelStride[DimIndex];
517
- if (DimIndex > 0) {
518
- convolvePacket(input, kernel, DimIndex-1, accum);
519
- } else {
520
- accum = internal::pmadd<Packet>(m_inputImpl.template packet<Unaligned>(input), internal::pset1<Packet>(m_kernel[kernel]), accum);
521
- }
522
- }
523
- }
524
-
525
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void preloadKernel() {
526
- // Don't make a local copy of the kernel unless we have to (i.e. it's an
527
- // expression that needs to be evaluated)
528
- const Scalar* in_place = m_kernelImpl.data();
529
- if (in_place) {
530
- m_kernel = in_place;
531
- m_local_kernel = false;
532
- } else {
533
- size_t kernel_sz = m_kernelImpl.dimensions().TotalSize() * sizeof(Scalar);
534
- Scalar* local = (Scalar*)m_device.allocate_temp(kernel_sz);
535
- typedef TensorEvalToOp<const KernelArgType> EvalTo;
536
- EvalTo evalToTmp(local, m_kernelArg);
537
- const bool Vectorize = internal::IsVectorizable<Device, KernelArgType>::value;
538
- internal::TensorExecutor<const EvalTo, Device, Vectorize>::run(evalToTmp, m_device);
539
-
540
- m_kernel = local;
541
- m_local_kernel = true;
542
- }
543
- }
544
-
545
- array<Index, NumDims> m_inputStride;
546
- array<Index, NumDims> m_outputStride;
547
-
548
- array<Index, NumKernelDims> m_indexStride;
549
- array<Index, NumKernelDims> m_kernelStride;
550
- TensorEvaluator<InputArgType, Device> m_inputImpl;
551
- TensorEvaluator<KernelArgType, Device> m_kernelImpl;
552
- Dimensions m_dimensions;
553
-
554
- KernelArgType m_kernelArg;
555
- const Scalar* m_kernel;
556
- bool m_local_kernel;
557
- const Device EIGEN_DEVICE_REF m_device;
558
- };
559
-
560
-
561
-
562
-
563
- // Use an optimized implementation of the evaluation code for GPUs whenever possible.
564
- #if defined(EIGEN_USE_GPU) && defined(EIGEN_GPUCC)
565
-
566
- template <int StaticKernelSize>
567
- struct GetKernelSize {
568
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int operator() (const int /*kernelSize*/) const {
569
- return StaticKernelSize;
570
- }
571
- };
572
- template <>
573
- struct GetKernelSize<Dynamic> {
574
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int operator() (const int kernelSize) const {
575
- return kernelSize;
576
- }
577
- };
578
-
579
- template <typename InputEvaluator, typename Index, typename InputDims,
580
- int StaticKernelSize>
581
- __global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void EigenConvolutionKernel1D(
582
- InputEvaluator eval,
583
- const internal::IndexMapper<Index, InputDims, 1, InputEvaluator::Layout>
584
- indexMapper,
585
- const float* __restrict kernel, const int numPlanes, const int numX,
586
- const int maxX, const int kernelSize, float* buffer) {
587
- #if defined(EIGEN_HIPCC)
588
- HIP_DYNAMIC_SHARED(float, s)
589
- #else
590
- extern __shared__ float s[];
591
- #endif
592
-
593
- const int first_x = blockIdx.x * maxX;
594
- const int last_x = (first_x + maxX < numX ? first_x + maxX : numX) - 1;
595
- const int num_x_input = last_x - first_x + GetKernelSize<StaticKernelSize>()(kernelSize);
596
- const int num_x_output = last_x - first_x + 1;
597
-
598
- const int first_plane = blockIdx.y * blockDim.y;
599
- const int plane_stride = blockDim.y * gridDim.y;
600
-
601
- for (int p = first_plane + threadIdx.y; p < numPlanes; p += plane_stride) {
602
- // Load inputs to shared memory
603
- const int plane_input_offset = indexMapper.mapGpuInputPlaneToTensorInputOffset(p);
604
- const int plane_kernel_offset = threadIdx.y * num_x_input;
605
- #pragma unroll
606
- for (int i = threadIdx.x; i < num_x_input; i += blockDim.x) {
607
- const int tensor_index = plane_input_offset + indexMapper.mapGpuInputKernelToTensorInputOffset(i+first_x);
608
- s[i + plane_kernel_offset] = eval.coeff(tensor_index);
609
- }
610
-
611
- __syncthreads();
612
-
613
- // Compute the convolution
614
- const int plane_output_offset = indexMapper.mapGpuOutputPlaneToTensorOutputOffset(p);
615
-
616
- #pragma unroll
617
- for (int i = threadIdx.x; i < num_x_output; i += blockDim.x) {
618
- const int kernel_offset = plane_kernel_offset + i;
619
- float result = 0.0f;
620
- #pragma unroll
621
- for (int k = 0; k < GetKernelSize<StaticKernelSize>()(kernelSize); ++k) {
622
- result += s[k + kernel_offset] * kernel[k];
623
- }
624
- const int tensor_index = plane_output_offset + indexMapper.mapGpuOutputKernelToTensorOutputOffset(i+first_x);
625
- buffer[tensor_index] = result;
626
- }
627
- __syncthreads();
628
- }
629
- };
630
-
631
- template <typename InputEvaluator, typename Index, typename InputDims,
632
- int StaticKernelSizeX, int StaticKernelSizeY>
633
- __global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void EigenConvolutionKernel2D(
634
- InputEvaluator eval,
635
- const internal::IndexMapper<Index, InputDims, 2, InputEvaluator::Layout>
636
- indexMapper,
637
- const float* __restrict kernel, const int numPlanes, const int numX,
638
- const int maxX, const int numY, const int maxY, const int kernelSizeX,
639
- const int kernelSizeY, float* buffer) {
640
- #if defined(EIGEN_HIPCC)
641
- HIP_DYNAMIC_SHARED(float, s)
642
- #else
643
- extern __shared__ float s[];
644
- #endif
645
-
646
- const int first_x = blockIdx.x * maxX;
647
- const int last_x = (first_x + maxX < numX ? first_x + maxX : numX) - 1;
648
- const int num_x_input = last_x - first_x + GetKernelSize<StaticKernelSizeX>()(kernelSizeX);
649
- const int num_x_output = last_x - first_x + 1;
650
-
651
- const int first_y = blockIdx.y * maxY;
652
- const int last_y = (first_y + maxY < numY ? first_y + maxY : numY) - 1;
653
- const int num_y_input = last_y - first_y + GetKernelSize<StaticKernelSizeY>()(kernelSizeY);
654
- const int num_y_output = last_y - first_y + 1;
655
-
656
- const int first_plane = blockIdx.z * blockDim.z;
657
- const int plane_stride = blockDim.z * gridDim.z;
658
-
659
- for (int p = first_plane + threadIdx.z; p < numPlanes; p += plane_stride) {
660
-
661
- const int plane_input_offset = indexMapper.mapGpuInputPlaneToTensorInputOffset(p);
662
- const int plane_kernel_offset = threadIdx.z * num_y_input;
663
-
664
- // Load inputs to shared memory
665
- #pragma unroll
666
- for (int j = threadIdx.y; j < num_y_input; j += blockDim.y) {
667
- const int input_offset = num_x_input * (j + plane_kernel_offset);
668
- #pragma unroll
669
- for (int i = threadIdx.x; i < num_x_input; i += blockDim.x) {
670
- const int tensor_index = plane_input_offset + indexMapper.mapGpuInputKernelToTensorInputOffset(i+first_x, j+first_y);
671
- s[i + input_offset] = eval.coeff(tensor_index);
672
- }
673
- }
674
-
675
- __syncthreads();
676
-
677
- // Convolution
678
- const int plane_output_offset = indexMapper.mapGpuOutputPlaneToTensorOutputOffset(p);
679
-
680
- #pragma unroll
681
- for (int j = threadIdx.y; j < num_y_output; j += blockDim.y) {
682
- #pragma unroll
683
- for (int i = threadIdx.x; i < num_x_output; i += blockDim.x) {
684
- float result = 0.0f;
685
- #pragma unroll
686
- for (int l = 0; l < GetKernelSize<StaticKernelSizeY>()(kernelSizeY); ++l) {
687
- const int kernel_offset = kernelSizeX * l;
688
- const int input_offset = i + num_x_input * (j + l + plane_kernel_offset);
689
- #pragma unroll
690
- for (int k = 0; k < GetKernelSize<StaticKernelSizeX>()(kernelSizeX); ++k) {
691
- result += s[k + input_offset] * kernel[k + kernel_offset];
692
- }
693
- }
694
- const int tensor_index = plane_output_offset + indexMapper.mapGpuOutputKernelToTensorOutputOffset(i+first_x, j+first_y);
695
- buffer[tensor_index] = result;
696
- }
697
- }
698
-
699
- __syncthreads();
700
- }
701
- };
702
-
703
- template <typename InputEvaluator, typename Index, typename InputDims>
704
- __global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void EigenConvolutionKernel3D(
705
- InputEvaluator eval,
706
- const internal::IndexMapper<Index, InputDims, 3, InputEvaluator::Layout>
707
- indexMapper,
708
- const float* __restrict kernel, const size_t numPlanes, const size_t numX,
709
- const size_t maxX, const size_t numY, const size_t maxY, const size_t numZ,
710
- const size_t maxZ, const size_t kernelSizeX, const size_t kernelSizeY,
711
- const size_t kernelSizeZ, float* buffer) {
712
- #if defined(EIGEN_HIPCC)
713
- HIP_DYNAMIC_SHARED(float, s)
714
- #else
715
- extern __shared__ float s[];
716
- #endif
717
-
718
- // Load inputs to shared memory
719
- const int first_x = blockIdx.x * maxX;
720
- const int last_x = (first_x + maxX < numX ? first_x + maxX : numX) - 1;
721
- const int num_x_input = last_x - first_x + kernelSizeX;
722
-
723
- const int first_y = blockIdx.y * maxY;
724
- const int last_y = (first_y + maxY < numY ? first_y + maxY : numY) - 1;
725
- const int num_y_input = last_y - first_y + kernelSizeY;
726
-
727
- const int first_z = blockIdx.z * maxZ;
728
- const int last_z = (first_z + maxZ < numZ ? first_z + maxZ : numZ) - 1;
729
- const int num_z_input = last_z - first_z + kernelSizeZ;
730
-
731
- for (int p = 0; p < numPlanes; ++p) {
732
-
733
- const int plane_input_offset = indexMapper.mapGpuInputPlaneToTensorInputOffset(p);
734
- const int plane_kernel_offset = 0;
735
-
736
- for (int k = threadIdx.z; k < num_z_input; k += blockDim.z) {
737
- for (int j = threadIdx.y; j < num_y_input; j += blockDim.y) {
738
- for (int i = threadIdx.x; i < num_x_input; i += blockDim.x) {
739
- const int tensor_index = plane_input_offset + indexMapper.mapGpuInputKernelToTensorInputOffset(i+first_x, j+first_y, k+first_z);
740
- s[i + num_x_input * (j + num_y_input * (k + plane_kernel_offset))] = eval.coeff(tensor_index);
741
- }
742
- }
743
- }
744
-
745
- __syncthreads();
746
-
747
- // Convolution
748
- const int num_z_output = last_z - first_z + 1;
749
- const int num_y_output = last_y - first_y + 1;
750
- const int num_x_output = last_x - first_x + 1;
751
- const int plane_output_offset = indexMapper.mapGpuOutputPlaneToTensorOutputOffset(p);
752
-
753
- for (int k = threadIdx.z; k < num_z_output; k += blockDim.z) {
754
- for (int j = threadIdx.y; j < num_y_output; j += blockDim.y) {
755
- for (int i = threadIdx.x; i < num_x_output; i += blockDim.x) {
756
- float result = 0.0f;
757
- for (int n = 0; n < kernelSizeZ; ++n) {
758
- for (int m = 0; m < kernelSizeY; ++m) {
759
- for (int l = 0; l < kernelSizeX; ++l) {
760
- result += s[i + l + num_x_input * (j + m + num_y_input * (k + n + plane_kernel_offset))] * kernel[l + kernelSizeX * (m + kernelSizeY * n)];
761
- }
762
- }
763
- }
764
- const int tensor_index = plane_output_offset + indexMapper.mapGpuOutputKernelToTensorOutputOffset(i+first_x, j+first_y, k+first_z);
765
- buffer[tensor_index] = result;
766
- }
767
- }
768
- }
769
- __syncthreads();
770
- }
771
- };
772
-
773
-
774
-
775
- template<typename Indices, typename InputArgType, typename KernelArgType>
776
- struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelArgType>, GpuDevice>
777
- {
778
- typedef TensorConvolutionOp<Indices, InputArgType, KernelArgType> XprType;
779
-
780
- static const int NumDims = internal::array_size<typename TensorEvaluator<InputArgType, GpuDevice>::Dimensions>::value;
781
- static const int NumKernelDims = internal::array_size<Indices>::value;
782
- typedef typename XprType::Index Index;
783
- typedef DSizes<Index, NumDims> Dimensions;
784
- typedef typename TensorEvaluator<KernelArgType, GpuDevice>::Dimensions KernelDimensions;
785
-
786
- enum {
787
- IsAligned = TensorEvaluator<InputArgType, GpuDevice>::IsAligned & TensorEvaluator<KernelArgType, GpuDevice>::IsAligned,
788
- PacketAccess = false,
789
- BlockAccess = false,
790
- PreferBlockAccess = false,
791
- Layout = TensorEvaluator<InputArgType, GpuDevice>::Layout,
792
- CoordAccess = false, // to be implemented
793
- RawAccess = false
794
- };
795
-
796
- //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
797
- typedef internal::TensorBlockNotImplemented TensorBlock;
798
- //===--------------------------------------------------------------------===//
799
-
800
- TensorEvaluator(const XprType& op, const GpuDevice& device)
801
- : m_inputImpl(op.inputExpression(), device), m_kernelImpl(op.kernelExpression(), device), m_kernelArg(op.kernelExpression()), m_indices(op.indices()), m_buf(NULL), m_kernel(NULL), m_local_kernel(false), m_device(device)
802
- {
803
- EIGEN_STATIC_ASSERT((static_cast<int>(TensorEvaluator<InputArgType, GpuDevice>::Layout) == static_cast<int>(TensorEvaluator<KernelArgType, GpuDevice>::Layout)), YOU_MADE_A_PROGRAMMING_MISTAKE);
804
-
805
- const typename TensorEvaluator<InputArgType, GpuDevice>::Dimensions& input_dims = m_inputImpl.dimensions();
806
- const typename TensorEvaluator<KernelArgType, GpuDevice>::Dimensions& kernel_dims = m_kernelImpl.dimensions();
807
-
808
- m_dimensions = m_inputImpl.dimensions();
809
- for (int i = 0; i < NumKernelDims; ++i) {
810
- const Index index = op.indices()[i];
811
- const Index input_dim = input_dims[index];
812
- const Index kernel_dim = kernel_dims[i];
813
- const Index result_dim = input_dim - kernel_dim + 1;
814
- m_dimensions[index] = result_dim;
815
- }
816
- }
817
-
818
- typedef typename XprType::CoeffReturnType CoeffReturnType;
819
- typedef typename PacketType<CoeffReturnType, GpuDevice>::type PacketReturnType;
820
- typedef typename InputArgType::Scalar Scalar;
821
- static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
822
-
823
- EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_dimensions; }
824
-
825
- EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* data) {
826
- preloadKernel();
827
- m_inputImpl.evalSubExprsIfNeeded(NULL);
828
- if (data) {
829
- executeEval(data);
830
- return false;
831
- } else {
832
- m_buf = (Scalar*)m_device.allocate(dimensions().TotalSize() * sizeof(Scalar));
833
- executeEval(m_buf);
834
- return true;
835
- }
836
- }
837
-
838
- EIGEN_STRONG_INLINE void cleanup() {
839
- m_inputImpl.cleanup();
840
- if (m_buf) {
841
- m_device.deallocate(m_buf);
842
- m_buf = NULL;
843
- }
844
- if (m_local_kernel) {
845
- m_device.deallocate((void*)m_kernel);
846
- m_local_kernel = false;
847
- }
848
- m_kernel = NULL;
849
- }
850
-
851
- EIGEN_STRONG_INLINE void preloadKernel() {
852
- // Don't make a local copy of the kernel unless we have to (i.e. it's an
853
- // expression that needs to be evaluated)
854
- const Scalar* in_place = m_kernelImpl.data();
855
- if (in_place) {
856
- m_kernel = in_place;
857
- m_local_kernel = false;
858
- } else {
859
- size_t kernel_sz = m_kernelImpl.dimensions().TotalSize() * sizeof(Scalar);
860
- Scalar* local = (Scalar*)m_device.allocate(kernel_sz);
861
- typedef TensorEvalToOp<const KernelArgType> EvalTo;
862
- EvalTo evalToTmp(local, m_kernelArg);
863
- const bool PacketAccess = internal::IsVectorizable<GpuDevice, KernelArgType>::value;
864
- internal::TensorExecutor<const EvalTo, GpuDevice, PacketAccess>::run(evalToTmp, m_device);
865
-
866
- m_kernel = local;
867
- m_local_kernel = true;
868
- }
869
- }
870
-
871
- static unsigned int ceil(unsigned int num, unsigned int denom) {
872
- const unsigned int rounded_toward_zero = num / denom;
873
- if (num > rounded_toward_zero * denom) {
874
- return rounded_toward_zero + 1;
875
- }
876
- return rounded_toward_zero;
877
- }
878
-
879
- void executeEval(Scalar* data) const {
880
- typedef typename TensorEvaluator<InputArgType, GpuDevice>::Dimensions InputDims;
881
-
882
- const int maxSharedMem = m_device.sharedMemPerBlock();
883
- const int maxThreadsPerBlock = m_device.maxGpuThreadsPerBlock();
884
- const int maxBlocksPerProcessor = m_device.maxGpuThreadsPerMultiProcessor() / maxThreadsPerBlock;
885
- const int numMultiProcessors = m_device.getNumGpuMultiProcessors();
886
- const int warpSize = 32;
887
-
888
- switch (NumKernelDims) {
889
- case 1: {
890
- const int kernel_size = m_kernelImpl.dimensions().TotalSize();
891
-
892
- const int numX = dimensions()[m_indices[0]];
893
- const int numP = dimensions().TotalSize() / numX;
894
- int maxX;
895
- dim3 block_size;
896
-
897
- const int single_stride_dim =
898
- static_cast<int>(Layout) == static_cast<int>(ColMajor)
899
- ? 0
900
- : m_inputImpl.dimensions().rank() - 1;
901
- if (m_indices[0] == single_stride_dim) {
902
- // Maximum the reuse
903
- const int inner_dim = ((maxSharedMem / (sizeof(Scalar)) - kernel_size + 1 + 31) / 32) * 32;
904
- maxX = numext::mini<int>(inner_dim, numX);
905
- const int maxP = numext::mini<int>(maxSharedMem / ((kernel_size - 1 + maxX) * sizeof(Scalar)), numP);
906
- block_size.x = numext::mini(maxThreadsPerBlock, maxX);
907
- block_size.y = numext::mini<int>(maxThreadsPerBlock / block_size.x, maxP);
908
- }
909
- else {
910
- // Read as much as possible alongside the inner most dimension, that is the plane
911
- const int inner_dim = maxSharedMem / ((warpSize + kernel_size) * sizeof(Scalar));
912
- const int maxP = numext::mini<int>(inner_dim, numP);
913
- maxX = numext::mini<int>(maxSharedMem / (inner_dim * sizeof(Scalar)) - kernel_size + 1, numX);
914
-
915
- block_size.x = numext::mini(warpSize, maxX);
916
- block_size.y = numext::mini<int>(maxThreadsPerBlock/block_size.x, maxP);
917
- }
918
-
919
- const int shared_mem = block_size.y * (maxX + kernel_size - 1) * sizeof(Scalar);
920
- gpu_assert(shared_mem <= maxSharedMem);
921
-
922
- const int num_x_blocks = ceil(numX, maxX);
923
- const int blocksPerProcessor = numext::mini(maxBlocksPerProcessor, maxSharedMem / shared_mem);
924
- const int num_y_blocks = ceil(numMultiProcessors * blocksPerProcessor, num_x_blocks);
925
-
926
- dim3 num_blocks(num_x_blocks, numext::mini<int>(num_y_blocks, ceil(numP, block_size.y)));
927
-
928
-
929
- //cout << "launching 1D kernel with block_size.x: " << block_size.x << " block_size.y: " << block_size.y << " num_blocks.x: " << num_blocks.x << " num_blocks.y: " << num_blocks.y << " maxX: " << maxX << " shared_mem: " << shared_mem << " in stream " << m_device.stream() << endl;
930
-
931
- const array<Index, 1> indices(m_indices[0]);
932
- const array<Index, 1> kernel_dims(m_kernelImpl.dimensions()[0]);
933
- internal::IndexMapper<Index, InputDims, 1, Layout> indexMapper(
934
- m_inputImpl.dimensions(), kernel_dims, indices);
935
- switch(kernel_size) {
936
- case 4: {
937
- LAUNCH_GPU_KERNEL((EigenConvolutionKernel1D<TensorEvaluator<InputArgType, GpuDevice>, Index, InputDims, 4>), num_blocks, block_size, shared_mem, m_device, m_inputImpl, indexMapper, m_kernel, numP, numX, maxX, 4, data);
938
- break;
939
- }
940
- case 7: {
941
- LAUNCH_GPU_KERNEL((EigenConvolutionKernel1D<TensorEvaluator<InputArgType, GpuDevice>, Index, InputDims, 7>), num_blocks, block_size, shared_mem, m_device, m_inputImpl, indexMapper, m_kernel, numP, numX, maxX, 7, data);
942
- break;
943
- }
944
- default: {
945
- LAUNCH_GPU_KERNEL((EigenConvolutionKernel1D<TensorEvaluator<InputArgType, GpuDevice>, Index, InputDims, Dynamic>), num_blocks, block_size, shared_mem, m_device, m_inputImpl, indexMapper, m_kernel, numP, numX, maxX, kernel_size, data);
946
- }
947
- }
948
- break;
949
- }
950
-
951
- case 2: {
952
- const int idxX =
953
- static_cast<int>(Layout) == static_cast<int>(ColMajor) ? 0 : 1;
954
- const int idxY =
955
- static_cast<int>(Layout) == static_cast<int>(ColMajor) ? 1 : 0;
956
- const int kernel_size_x = m_kernelImpl.dimensions()[idxX];
957
- const int kernel_size_y = m_kernelImpl.dimensions()[idxY];
958
-
959
- const int numX = dimensions()[m_indices[idxX]];
960
- const int numY = dimensions()[m_indices[idxY]];
961
- const int numP = dimensions().TotalSize() / (numX*numY);
962
-
963
- const float scaling_factor = sqrtf(static_cast<float>(maxSharedMem) / (sizeof(Scalar) * kernel_size_y * kernel_size_x));
964
-
965
- // Snap maxX to warp size
966
- int inner_dim = ((static_cast<int>(scaling_factor * kernel_size_x) - kernel_size_x + 1 + 32) / 32) * 32;
967
- const int maxX = numext::mini<int>(inner_dim, numX);
968
- const int maxY = numext::mini<int>(maxSharedMem / (sizeof(Scalar) * (maxX + kernel_size_x - 1)) - kernel_size_y + 1, numY);
969
- const int maxP = numext::mini<int>(maxSharedMem / ((kernel_size_x - 1 + maxX) * (kernel_size_y - 1 + maxY) * sizeof(Scalar)), numP);
970
-
971
- dim3 block_size;
972
- block_size.x = numext::mini(1024, maxX);
973
- block_size.y = numext::mini<int>(1024/block_size.x, maxY);
974
- block_size.z = numext::mini<int>(1024/(block_size.x*block_size.y), maxP);
975
-
976
- const int shared_mem = block_size.z * (maxX + kernel_size_x - 1) * (maxY + kernel_size_y - 1) * sizeof(Scalar);
977
- gpu_assert(shared_mem <= maxSharedMem);
978
-
979
- const int num_x_blocks = ceil(numX, maxX);
980
- const int num_y_blocks = ceil(numY, maxY);
981
- const int blocksPerProcessor = numext::mini(maxBlocksPerProcessor, maxSharedMem / shared_mem);
982
- const int num_z_blocks = ceil(numMultiProcessors * blocksPerProcessor, num_x_blocks * num_y_blocks);
983
-
984
- dim3 num_blocks(num_x_blocks, num_y_blocks, numext::mini<int>(num_z_blocks, ceil(numP, block_size.z)));
985
-
986
-
987
- //cout << "launching 2D kernel with block_size.x: " << block_size.x << " block_size.y: " << block_size.y << " block_size.z: " << block_size.z << " num_blocks.x: " << num_blocks.x << " num_blocks.y: " << num_blocks.y << " num_blocks.z: " << num_blocks.z << " maxX: " << maxX << " maxY: " << maxY << " maxP: " << maxP << " shared_mem: " << shared_mem << " in stream " << m_device.stream() << endl;
988
-
989
- const array<Index, 2> indices(m_indices[idxX], m_indices[idxY]);
990
- const array<Index, 2> kernel_dims(m_kernelImpl.dimensions()[idxX],
991
- m_kernelImpl.dimensions()[idxY]);
992
- internal::IndexMapper<Index, InputDims, 2, Layout> indexMapper(
993
- m_inputImpl.dimensions(), kernel_dims, indices);
994
- switch (kernel_size_x) {
995
- case 4: {
996
- switch (kernel_size_y) {
997
- case 7: {
998
- LAUNCH_GPU_KERNEL((EigenConvolutionKernel2D<TensorEvaluator<InputArgType, GpuDevice>, Index, InputDims, 4, 7>), num_blocks, block_size, shared_mem, m_device, m_inputImpl, indexMapper, m_kernel, numP, numX, maxX, numY, maxY, 4, 7, data);
999
- break;
1000
- }
1001
- default: {
1002
- LAUNCH_GPU_KERNEL((EigenConvolutionKernel2D<TensorEvaluator<InputArgType, GpuDevice>, Index, InputDims, 4, Dynamic>), num_blocks, block_size, shared_mem, m_device, m_inputImpl, indexMapper, m_kernel, numP, numX, maxX, numY, maxY, 4, kernel_size_y, data);
1003
- break;
1004
- }
1005
- }
1006
- break;
1007
- }
1008
- case 7: {
1009
- switch (kernel_size_y) {
1010
- case 4: {
1011
- LAUNCH_GPU_KERNEL((EigenConvolutionKernel2D<TensorEvaluator<InputArgType, GpuDevice>, Index, InputDims, 7, 4>), num_blocks, block_size, shared_mem, m_device, m_inputImpl, indexMapper, m_kernel, numP, numX, maxX, numY, maxY, 7, 4, data);
1012
- break;
1013
- }
1014
- default: {
1015
- LAUNCH_GPU_KERNEL((EigenConvolutionKernel2D<TensorEvaluator<InputArgType, GpuDevice>, Index, InputDims, 7, Dynamic>), num_blocks, block_size, shared_mem, m_device, m_inputImpl, indexMapper, m_kernel, numP, numX, maxX, numY, maxY, 7, kernel_size_y, data);
1016
- break;
1017
- }
1018
- }
1019
- break;
1020
- }
1021
- default: {
1022
- LAUNCH_GPU_KERNEL((EigenConvolutionKernel2D<TensorEvaluator<InputArgType, GpuDevice>, Index, InputDims, Dynamic, Dynamic>), num_blocks, block_size, shared_mem, m_device, m_inputImpl, indexMapper, m_kernel, numP, numX, maxX, numY, maxY, kernel_size_x, kernel_size_y, data);
1023
- break;
1024
- }
1025
- }
1026
- break;
1027
- }
1028
-
1029
- case 3: {
1030
- const int idxX =
1031
- static_cast<int>(Layout) == static_cast<int>(ColMajor) ? 0 : 2;
1032
- const int idxY =
1033
- static_cast<int>(Layout) == static_cast<int>(ColMajor) ? 1 : 1;
1034
- const int idxZ =
1035
- static_cast<int>(Layout) == static_cast<int>(ColMajor) ? 2 : 0;
1036
-
1037
- const int kernel_size_x = m_kernelImpl.dimensions()[idxX];
1038
- const int kernel_size_y = m_kernelImpl.dimensions()[idxY];
1039
- const int kernel_size_z = m_kernelImpl.dimensions()[idxZ];
1040
-
1041
- const int numX = dimensions()[m_indices[idxX]];
1042
- const int numY = dimensions()[m_indices[idxY]];
1043
- const int numZ = dimensions()[m_indices[idxZ]];
1044
- const int numP = dimensions().TotalSize() / (numX*numY*numZ);
1045
-
1046
- const int maxX = numext::mini<int>(128, numext::mini<int>(maxSharedMem / (sizeof(Scalar) * kernel_size_y * kernel_size_z) - kernel_size_x + 1, numX));
1047
- const int maxY = numext::mini<int>(128, numext::mini<int>(maxSharedMem / (sizeof(Scalar) * (maxX + kernel_size_x - 1) * kernel_size_z) - kernel_size_y + 1, numY));
1048
- const int maxZ = numext::mini<int>(128, numext::mini<int>(maxSharedMem / (sizeof(Scalar) * (maxX + kernel_size_x - 1) * (maxY + kernel_size_y - 1)) - kernel_size_z + 1, numZ));
1049
-
1050
- dim3 block_size;
1051
- block_size.x = numext::mini(32, maxX);
1052
- block_size.y = numext::mini(32, maxY);
1053
- block_size.z = numext::mini<int>(1024/(block_size.x*block_size.y), maxZ);
1054
- dim3 num_blocks(ceil(numX, maxX), ceil(numY, maxY), ceil(numZ, maxZ));
1055
-
1056
- const int shared_mem = (maxX + kernel_size_x - 1) * (maxY + kernel_size_y - 1) * (maxZ + kernel_size_z - 1) * sizeof(Scalar);
1057
- gpu_assert(shared_mem <= maxSharedMem);
1058
-
1059
- //cout << "launching 3D kernel with block_size.x: " << block_size.x << " block_size.y: " << block_size.y << " block_size.z: " << block_size.z << " num_blocks.x: " << num_blocks.x << " num_blocks.y: " << num_blocks.y << " num_blocks.z: " << num_blocks.z << " shared_mem: " << shared_mem << " in stream " << m_device.stream() << endl;
1060
- const array<Index, 3> indices(m_indices[idxX], m_indices[idxY],
1061
- m_indices[idxZ]);
1062
- const array<Index, 3> kernel_dims(m_kernelImpl.dimensions()[idxX],
1063
- m_kernelImpl.dimensions()[idxY],
1064
- m_kernelImpl.dimensions()[idxZ]);
1065
- internal::IndexMapper<Index, InputDims, 3, Layout> indexMapper(
1066
- m_inputImpl.dimensions(), kernel_dims, indices);
1067
-
1068
- LAUNCH_GPU_KERNEL((EigenConvolutionKernel3D<TensorEvaluator<InputArgType, GpuDevice>, Index, InputDims>), num_blocks, block_size, shared_mem, m_device, m_inputImpl, indexMapper, m_kernel, numP, numX, maxX, numY, maxY, numZ, maxZ, kernel_size_x, kernel_size_y, kernel_size_z, data);
1069
- break;
1070
- }
1071
-
1072
- default: {
1073
- EIGEN_STATIC_ASSERT((NumKernelDims >= 1 && NumKernelDims <= 3), THIS_METHOD_IS_ONLY_FOR_OBJECTS_OF_A_SPECIFIC_SIZE);
1074
- }
1075
- }
1076
- }
1077
-
1078
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
1079
- {
1080
- eigen_assert(m_buf);
1081
- eigen_assert(index < m_dimensions.TotalSize());
1082
- return m_buf[index];
1083
- }
1084
-
1085
- template<int LoadMode>
1086
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(const Index index) const
1087
- {
1088
- eigen_assert(m_buf);
1089
- eigen_assert(index < m_dimensions.TotalSize());
1090
- return internal::ploadt<PacketReturnType, LoadMode>(m_buf+index);
1091
- }
1092
-
1093
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost
1094
- costPerCoeff(bool vectorized) const {
1095
- // TODO(rmlarsen): FIXME: For now, this is just a copy of the CPU cost
1096
- // model.
1097
- const double kernel_size = m_kernelImpl.dimensions().TotalSize();
1098
- // We ignore the use of fused multiply-add.
1099
- const double convolve_compute_cost =
1100
- TensorOpCost::AddCost<Scalar>() + TensorOpCost::MulCost<Scalar>();
1101
- const double firstIndex_compute_cost =
1102
- NumDims *
1103
- (2 * TensorOpCost::AddCost<Index>() + 2 * TensorOpCost::MulCost<Index>() +
1104
- TensorOpCost::DivCost<Index>());
1105
- return TensorOpCost(0, 0, firstIndex_compute_cost, vectorized, PacketSize) +
1106
- kernel_size * (m_inputImpl.costPerCoeff(vectorized) +
1107
- m_kernelImpl.costPerCoeff(vectorized) +
1108
- TensorOpCost(0, 0, convolve_compute_cost, vectorized,
1109
- PacketSize));
1110
- }
1111
-
1112
- private:
1113
- // No assignment (copies are needed by the kernels)
1114
- TensorEvaluator& operator = (const TensorEvaluator&);
1115
-
1116
- TensorEvaluator<InputArgType, GpuDevice> m_inputImpl;
1117
- TensorEvaluator<KernelArgType, GpuDevice> m_kernelImpl;
1118
- KernelArgType m_kernelArg;
1119
- Indices m_indices;
1120
- Dimensions m_dimensions;
1121
- Scalar* m_buf;
1122
- const Scalar* m_kernel;
1123
- bool m_local_kernel;
1124
-
1125
- const GpuDevice& m_device;
1126
- };
1127
- #endif
1128
-
1129
-
1130
- } // end namespace Eigen
1131
-
1132
- #endif // EIGEN_CXX11_TENSOR_TENSOR_CONVOLUTION_H