sequenzo 0.1.18__cp39-cp39-macosx_10_9_universal2.whl → 0.1.20__cp39-cp39-macosx_10_9_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sequenzo might be problematic. Click here for more details.

Files changed (360) hide show
  1. sequenzo/__init__.py +39 -7
  2. sequenzo/big_data/clara/utils/get_weighted_diss.c +157 -157
  3. sequenzo/big_data/clara/utils/get_weighted_diss.cpython-39-darwin.so +0 -0
  4. sequenzo/clustering/KMedoids.py +39 -0
  5. sequenzo/clustering/hierarchical_clustering.py +108 -6
  6. sequenzo/define_sequence_data.py +10 -1
  7. sequenzo/dissimilarity_measures/get_distance_matrix.py +2 -3
  8. sequenzo/dissimilarity_measures/get_substitution_cost_matrix.py +1 -1
  9. sequenzo/dissimilarity_measures/utils/get_sm_trate_substitution_cost_matrix.c +157 -157
  10. sequenzo/dissimilarity_measures/utils/get_sm_trate_substitution_cost_matrix.cpython-39-darwin.so +0 -0
  11. sequenzo/dissimilarity_measures/utils/seqconc.c +157 -157
  12. sequenzo/dissimilarity_measures/utils/seqconc.cpython-39-darwin.so +0 -0
  13. sequenzo/dissimilarity_measures/utils/seqdss.c +157 -157
  14. sequenzo/dissimilarity_measures/utils/seqdss.cpython-39-darwin.so +0 -0
  15. sequenzo/dissimilarity_measures/utils/seqdur.c +157 -157
  16. sequenzo/dissimilarity_measures/utils/seqdur.cpython-39-darwin.so +0 -0
  17. sequenzo/dissimilarity_measures/utils/seqlength.c +157 -157
  18. sequenzo/dissimilarity_measures/utils/seqlength.cpython-39-darwin.so +0 -0
  19. sequenzo/multidomain/cat.py +0 -53
  20. sequenzo/multidomain/dat.py +11 -3
  21. sequenzo/multidomain/idcd.py +0 -3
  22. sequenzo/multidomain/linked_polyad.py +0 -1
  23. sequenzo/openmp_setup.py +233 -0
  24. sequenzo/visualization/plot_transition_matrix.py +21 -22
  25. {sequenzo-0.1.18.dist-info → sequenzo-0.1.20.dist-info}/METADATA +71 -10
  26. sequenzo-0.1.20.dist-info/RECORD +215 -0
  27. sequenzo/dissimilarity_measures/setup.py +0 -35
  28. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Cholesky/LDLT.h +0 -688
  29. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Cholesky/LLT.h +0 -558
  30. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Cholesky/LLT_LAPACKE.h +0 -99
  31. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +0 -682
  32. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +0 -346
  33. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +0 -462
  34. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/ComplexSchur_LAPACKE.h +0 -91
  35. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/EigenSolver.h +0 -622
  36. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h +0 -418
  37. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +0 -226
  38. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +0 -374
  39. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +0 -158
  40. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/RealQZ.h +0 -657
  41. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/RealSchur.h +0 -558
  42. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/RealSchur_LAPACKE.h +0 -77
  43. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +0 -904
  44. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h +0 -87
  45. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +0 -561
  46. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/AlignedBox.h +0 -486
  47. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/AngleAxis.h +0 -247
  48. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/EulerAngles.h +0 -114
  49. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/Homogeneous.h +0 -501
  50. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/Hyperplane.h +0 -282
  51. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/OrthoMethods.h +0 -235
  52. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/ParametrizedLine.h +0 -232
  53. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/Quaternion.h +0 -870
  54. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/Rotation2D.h +0 -199
  55. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/RotationBase.h +0 -206
  56. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/Scaling.h +0 -188
  57. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/Transform.h +0 -1563
  58. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/Translation.h +0 -202
  59. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/Umeyama.h +0 -166
  60. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/arch/Geometry_SIMD.h +0 -168
  61. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Householder/BlockHouseholder.h +0 -110
  62. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Householder/Householder.h +0 -176
  63. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Householder/HouseholderSequence.h +0 -545
  64. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +0 -226
  65. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +0 -212
  66. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +0 -229
  67. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +0 -394
  68. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +0 -453
  69. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +0 -444
  70. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +0 -198
  71. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +0 -117
  72. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Jacobi/Jacobi.h +0 -483
  73. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/KLUSupport/KLUSupport.h +0 -358
  74. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/LU/Determinant.h +0 -117
  75. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/LU/FullPivLU.h +0 -877
  76. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/LU/InverseImpl.h +0 -432
  77. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/LU/PartialPivLU.h +0 -624
  78. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/LU/PartialPivLU_LAPACKE.h +0 -83
  79. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/LU/arch/InverseSize4.h +0 -351
  80. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/MetisSupport/MetisSupport.h +0 -137
  81. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/OrderingMethods/Amd.h +0 -435
  82. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +0 -1863
  83. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/OrderingMethods/Ordering.h +0 -153
  84. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +0 -678
  85. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +0 -545
  86. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/QR/ColPivHouseholderQR.h +0 -674
  87. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/QR/ColPivHouseholderQR_LAPACKE.h +0 -97
  88. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +0 -635
  89. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/QR/FullPivHouseholderQR.h +0 -713
  90. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/QR/HouseholderQR.h +0 -434
  91. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/QR/HouseholderQR_LAPACKE.h +0 -68
  92. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +0 -335
  93. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SVD/BDCSVD.h +0 -1366
  94. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SVD/JacobiSVD.h +0 -812
  95. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SVD/JacobiSVD_LAPACKE.h +0 -91
  96. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SVD/SVDBase.h +0 -376
  97. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SVD/UpperBidiagonalization.h +0 -414
  98. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +0 -697
  99. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +0 -174
  100. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/AmbiVector.h +0 -378
  101. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/CompressedStorage.h +0 -274
  102. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +0 -352
  103. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/MappedSparseMatrix.h +0 -67
  104. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseAssign.h +0 -270
  105. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseBlock.h +0 -571
  106. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseColEtree.h +0 -206
  107. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +0 -370
  108. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +0 -722
  109. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +0 -150
  110. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +0 -342
  111. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseDiagonalProduct.h +0 -138
  112. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseDot.h +0 -98
  113. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseFuzzy.h +0 -29
  114. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseMap.h +0 -305
  115. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseMatrix.h +0 -1518
  116. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +0 -398
  117. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparsePermutation.h +0 -178
  118. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseProduct.h +0 -181
  119. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseRedux.h +0 -49
  120. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseRef.h +0 -397
  121. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +0 -659
  122. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseSolverBase.h +0 -124
  123. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseSparseProductWithPruning.h +0 -198
  124. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseTranspose.h +0 -92
  125. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseTriangularView.h +0 -189
  126. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseUtil.h +0 -186
  127. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseVector.h +0 -478
  128. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseView.h +0 -254
  129. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/TriangularSolver.h +0 -315
  130. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU.h +0 -923
  131. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLUImpl.h +0 -66
  132. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +0 -226
  133. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_Structs.h +0 -110
  134. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +0 -375
  135. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_Utils.h +0 -80
  136. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_column_bmod.h +0 -181
  137. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +0 -179
  138. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h +0 -107
  139. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +0 -280
  140. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h +0 -126
  141. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_kernel_bmod.h +0 -130
  142. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +0 -223
  143. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_panel_dfs.h +0 -258
  144. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_pivotL.h +0 -137
  145. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_pruneL.h +0 -136
  146. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_relax_snode.h +0 -83
  147. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseQR/SparseQR.h +0 -758
  148. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/StlSupport/StdDeque.h +0 -116
  149. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/StlSupport/StdList.h +0 -106
  150. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/StlSupport/StdVector.h +0 -131
  151. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/StlSupport/details.h +0 -84
  152. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +0 -1025
  153. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +0 -642
  154. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/misc/Image.h +0 -82
  155. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/misc/Kernel.h +0 -79
  156. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/misc/RealSvd2x2.h +0 -55
  157. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/misc/blas.h +0 -440
  158. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/misc/lapack.h +0 -152
  159. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/misc/lapacke.h +0 -16292
  160. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/misc/lapacke_mangling.h +0 -17
  161. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +0 -358
  162. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +0 -696
  163. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/plugins/BlockMethods.h +0 -1442
  164. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.h +0 -115
  165. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.h +0 -177
  166. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/plugins/IndexedViewMethods.h +0 -262
  167. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +0 -152
  168. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +0 -95
  169. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/plugins/ReshapedMethods.h +0 -149
  170. sequenzo/dissimilarity_measures/src/eigen/blas/BandTriangularSolver.h +0 -97
  171. sequenzo/dissimilarity_measures/src/eigen/blas/GeneralRank1Update.h +0 -44
  172. sequenzo/dissimilarity_measures/src/eigen/blas/PackedSelfadjointProduct.h +0 -53
  173. sequenzo/dissimilarity_measures/src/eigen/blas/PackedTriangularMatrixVector.h +0 -79
  174. sequenzo/dissimilarity_measures/src/eigen/blas/PackedTriangularSolverVector.h +0 -88
  175. sequenzo/dissimilarity_measures/src/eigen/blas/Rank2Update.h +0 -57
  176. sequenzo/dissimilarity_measures/src/eigen/blas/common.h +0 -175
  177. sequenzo/dissimilarity_measures/src/eigen/blas/f2c/datatypes.h +0 -24
  178. sequenzo/dissimilarity_measures/src/eigen/blas/level1_cplx_impl.h +0 -155
  179. sequenzo/dissimilarity_measures/src/eigen/blas/level1_impl.h +0 -144
  180. sequenzo/dissimilarity_measures/src/eigen/blas/level1_real_impl.h +0 -122
  181. sequenzo/dissimilarity_measures/src/eigen/blas/level2_cplx_impl.h +0 -360
  182. sequenzo/dissimilarity_measures/src/eigen/blas/level2_impl.h +0 -553
  183. sequenzo/dissimilarity_measures/src/eigen/blas/level2_real_impl.h +0 -306
  184. sequenzo/dissimilarity_measures/src/eigen/blas/level3_impl.h +0 -702
  185. sequenzo/dissimilarity_measures/src/eigen/debug/gdb/__init__.py +0 -1
  186. sequenzo/dissimilarity_measures/src/eigen/debug/gdb/printers.py +0 -314
  187. sequenzo/dissimilarity_measures/src/eigen/lapack/lapack_common.h +0 -29
  188. sequenzo/dissimilarity_measures/src/eigen/scripts/relicense.py +0 -69
  189. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/Tensor.h +0 -554
  190. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorArgMax.h +0 -329
  191. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h +0 -247
  192. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h +0 -1176
  193. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h +0 -1559
  194. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h +0 -1093
  195. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h +0 -518
  196. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h +0 -377
  197. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h +0 -1023
  198. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionBlocking.h +0 -73
  199. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionCuda.h +0 -6
  200. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionGpu.h +0 -1413
  201. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionMapper.h +0 -575
  202. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionSycl.h +0 -1650
  203. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h +0 -1679
  204. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h +0 -456
  205. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h +0 -1132
  206. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorConvolutionSycl.h +0 -544
  207. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorCostModel.h +0 -214
  208. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h +0 -347
  209. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h +0 -137
  210. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h +0 -6
  211. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h +0 -104
  212. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceGpu.h +0 -389
  213. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h +0 -1048
  214. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h +0 -409
  215. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDimensionList.h +0 -236
  216. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h +0 -490
  217. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h +0 -236
  218. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h +0 -983
  219. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h +0 -703
  220. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h +0 -388
  221. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h +0 -669
  222. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorFixedSize.h +0 -379
  223. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h +0 -237
  224. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h +0 -191
  225. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h +0 -488
  226. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h +0 -302
  227. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorGlobalFunctions.h +0 -33
  228. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorGpuHipCudaDefines.h +0 -99
  229. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorGpuHipCudaUndefines.h +0 -44
  230. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h +0 -79
  231. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h +0 -603
  232. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h +0 -738
  233. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorInflation.h +0 -247
  234. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorInitializer.h +0 -82
  235. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h +0 -263
  236. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorLayoutSwap.h +0 -216
  237. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorMacros.h +0 -98
  238. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h +0 -327
  239. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h +0 -311
  240. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h +0 -1102
  241. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h +0 -708
  242. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorPatch.h +0 -291
  243. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorRandom.h +0 -322
  244. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h +0 -998
  245. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h +0 -6
  246. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorReductionGpu.h +0 -966
  247. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorReductionSycl.h +0 -582
  248. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorRef.h +0 -454
  249. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h +0 -465
  250. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h +0 -528
  251. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorScanSycl.h +0 -513
  252. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h +0 -471
  253. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorStorage.h +0 -161
  254. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorStriding.h +0 -346
  255. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorTrace.h +0 -303
  256. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h +0 -264
  257. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorUInt128.h +0 -249
  258. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorVolumePatch.h +0 -629
  259. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/TensorSymmetry/DynamicSymmetry.h +0 -293
  260. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/TensorSymmetry/StaticSymmetry.h +0 -236
  261. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/TensorSymmetry/Symmetry.h +0 -338
  262. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/TensorSymmetry/util/TemplateGroupTheory.h +0 -669
  263. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/ThreadPool/Barrier.h +0 -67
  264. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/ThreadPool/EventCount.h +0 -249
  265. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h +0 -486
  266. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/ThreadPool/RunQueue.h +0 -236
  267. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/ThreadPool/ThreadCancel.h +0 -23
  268. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/ThreadPool/ThreadEnvironment.h +0 -40
  269. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/ThreadPool/ThreadLocal.h +0 -301
  270. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/ThreadPool/ThreadPoolInterface.h +0 -48
  271. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/ThreadPool/ThreadYield.h +0 -20
  272. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/util/CXX11Meta.h +0 -537
  273. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/util/CXX11Workarounds.h +0 -88
  274. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/util/EmulateArray.h +0 -261
  275. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/util/MaxSizeVector.h +0 -158
  276. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/AutoDiff/AutoDiffJacobian.h +0 -108
  277. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h +0 -730
  278. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/AutoDiff/AutoDiffVector.h +0 -220
  279. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/BVH/BVAlgorithms.h +0 -293
  280. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/BVH/KdBVH.h +0 -223
  281. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/Eigenvalues/ArpackSelfAdjointEigenSolver.h +0 -790
  282. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/EulerAngles/EulerAngles.h +0 -355
  283. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/EulerAngles/EulerSystem.h +0 -305
  284. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/FFT/ei_fftw_impl.h +0 -261
  285. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/FFT/ei_kissfft_impl.h +0 -449
  286. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/IterativeSolvers/ConstrainedConjGrad.h +0 -187
  287. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/IterativeSolvers/DGMRES.h +0 -511
  288. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/IterativeSolvers/GMRES.h +0 -335
  289. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/IterativeSolvers/IDRS.h +0 -436
  290. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/IterativeSolvers/IncompleteLU.h +0 -90
  291. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/IterativeSolvers/IterationController.h +0 -154
  292. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/IterativeSolvers/MINRES.h +0 -267
  293. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/IterativeSolvers/Scaling.h +0 -193
  294. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/KroneckerProduct/KroneckerTensorProduct.h +0 -305
  295. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/LevenbergMarquardt/LMcovar.h +0 -84
  296. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/LevenbergMarquardt/LMonestep.h +0 -202
  297. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/LevenbergMarquardt/LMpar.h +0 -160
  298. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/LevenbergMarquardt/LMqrsolv.h +0 -188
  299. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/LevenbergMarquardt/LevenbergMarquardt.h +0 -396
  300. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/MatrixFunctions/MatrixExponential.h +0 -441
  301. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/MatrixFunctions/MatrixFunction.h +0 -569
  302. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/MatrixFunctions/MatrixLogarithm.h +0 -373
  303. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/MatrixFunctions/MatrixPower.h +0 -705
  304. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/MatrixFunctions/MatrixSquareRoot.h +0 -368
  305. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/MatrixFunctions/StemFunction.h +0 -117
  306. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/MoreVectorization/MathFunctions.h +0 -95
  307. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/NonLinearOptimization/HybridNonLinearSolver.h +0 -601
  308. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/NonLinearOptimization/LevenbergMarquardt.h +0 -657
  309. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/NonLinearOptimization/chkder.h +0 -66
  310. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/NonLinearOptimization/covar.h +0 -70
  311. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/NonLinearOptimization/dogleg.h +0 -107
  312. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/NonLinearOptimization/fdjac1.h +0 -79
  313. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/NonLinearOptimization/lmpar.h +0 -298
  314. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/NonLinearOptimization/qrsolv.h +0 -91
  315. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/NonLinearOptimization/r1mpyq.h +0 -30
  316. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/NonLinearOptimization/r1updt.h +0 -99
  317. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/NonLinearOptimization/rwupdt.h +0 -49
  318. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/NumericalDiff/NumericalDiff.h +0 -130
  319. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/Polynomials/Companion.h +0 -280
  320. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/Polynomials/PolynomialSolver.h +0 -428
  321. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/Polynomials/PolynomialUtils.h +0 -143
  322. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/Skyline/SkylineInplaceLU.h +0 -352
  323. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/Skyline/SkylineMatrix.h +0 -862
  324. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/Skyline/SkylineMatrixBase.h +0 -212
  325. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/Skyline/SkylineProduct.h +0 -295
  326. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/Skyline/SkylineStorage.h +0 -259
  327. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/Skyline/SkylineUtil.h +0 -89
  328. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SparseExtra/BlockOfDynamicSparseMatrix.h +0 -122
  329. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SparseExtra/BlockSparseMatrix.h +0 -1079
  330. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SparseExtra/DynamicSparseMatrix.h +0 -404
  331. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SparseExtra/MarketIO.h +0 -282
  332. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SparseExtra/MatrixMarketIterator.h +0 -247
  333. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SparseExtra/RandomSetter.h +0 -349
  334. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsArrayAPI.h +0 -286
  335. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsBFloat16.h +0 -68
  336. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsFunctors.h +0 -357
  337. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsHalf.h +0 -66
  338. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsImpl.h +0 -1959
  339. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsPacketMath.h +0 -118
  340. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/HipVectorCompatibility.h +0 -67
  341. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsArrayAPI.h +0 -167
  342. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsBFloat16.h +0 -58
  343. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsFunctors.h +0 -330
  344. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsHalf.h +0 -58
  345. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h +0 -2045
  346. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsPacketMath.h +0 -79
  347. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/arch/AVX/BesselFunctions.h +0 -46
  348. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/arch/AVX/SpecialFunctions.h +0 -16
  349. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/arch/AVX512/BesselFunctions.h +0 -46
  350. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/arch/AVX512/SpecialFunctions.h +0 -16
  351. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/arch/GPU/SpecialFunctions.h +0 -369
  352. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/arch/NEON/BesselFunctions.h +0 -54
  353. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/arch/NEON/SpecialFunctions.h +0 -34
  354. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/Splines/Spline.h +0 -507
  355. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/Splines/SplineFitting.h +0 -431
  356. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/Splines/SplineFwd.h +0 -93
  357. sequenzo-0.1.18.dist-info/RECORD +0 -544
  358. {sequenzo-0.1.18.dist-info → sequenzo-0.1.20.dist-info}/WHEEL +0 -0
  359. {sequenzo-0.1.18.dist-info → sequenzo-0.1.20.dist-info}/licenses/LICENSE +0 -0
  360. {sequenzo-0.1.18.dist-info → sequenzo-0.1.20.dist-info}/top_level.txt +0 -0
@@ -1,966 +0,0 @@
1
- // This file is part of Eigen, a lightweight C++ template library
2
- // for linear algebra.
3
- //
4
- // Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
5
- //
6
- // This Source Code Form is subject to the terms of the Mozilla
7
- // Public License v. 2.0. If a copy of the MPL was not distributed
8
- // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9
-
10
- #ifndef EIGEN_CXX11_TENSOR_TENSOR_REDUCTION_GPU_H
11
- #define EIGEN_CXX11_TENSOR_TENSOR_REDUCTION_GPU_H
12
-
13
- namespace Eigen {
14
- namespace internal {
15
-
16
-
17
- #if defined(EIGEN_USE_GPU) && defined(EIGEN_GPUCC)
18
- // Full reducers for GPU, don't vectorize for now
19
-
20
- // Reducer function that enables multiple gpu thread to safely accumulate at the same
21
- // output address. It basically reads the current value of the output variable, and
22
- // attempts to update it with the new value. If in the meantime another gpu thread
23
- // updated the content of the output address it will try again.
24
- template <typename T, typename R>
25
- __device__ EIGEN_ALWAYS_INLINE void atomicReduce(T* output, T accum, R& reducer) {
26
- #if (defined(EIGEN_HIP_DEVICE_COMPILE) && defined(__HIP_ARCH_HAS_WARP_SHUFFLE__)) || (EIGEN_CUDA_ARCH >= 300)
27
- if (sizeof(T) == 4)
28
- {
29
- unsigned int oldval = *reinterpret_cast<unsigned int*>(output);
30
- unsigned int newval = oldval;
31
- reducer.reduce(accum, reinterpret_cast<T*>(&newval));
32
- if (newval == oldval) {
33
- return;
34
- }
35
- unsigned int readback;
36
- while ((readback = atomicCAS((unsigned int*)output, oldval, newval)) != oldval) {
37
- oldval = readback;
38
- newval = oldval;
39
- reducer.reduce(accum, reinterpret_cast<T*>(&newval));
40
- if (newval == oldval) {
41
- return;
42
- }
43
- }
44
- }
45
- else if (sizeof(T) == 8) {
46
- unsigned long long oldval = *reinterpret_cast<unsigned long long*>(output);
47
- unsigned long long newval = oldval;
48
- reducer.reduce(accum, reinterpret_cast<T*>(&newval));
49
- if (newval == oldval) {
50
- return;
51
- }
52
- unsigned long long readback;
53
- while ((readback = atomicCAS((unsigned long long*)output, oldval, newval)) != oldval) {
54
- oldval = readback;
55
- newval = oldval;
56
- reducer.reduce(accum, reinterpret_cast<T*>(&newval));
57
- if (newval == oldval) {
58
- return;
59
- }
60
- }
61
- }
62
- else {
63
- gpu_assert(0 && "Wordsize not supported");
64
- }
65
- #else // EIGEN_CUDA_ARCH >= 300
66
- gpu_assert(0 && "Shouldn't be called on unsupported device");
67
- #endif // EIGEN_CUDA_ARCH >= 300
68
- }
69
-
70
- // We extend atomicExch to support extra data types
71
- template <typename Type>
72
- __device__ inline Type atomicExchCustom(Type* address, Type val) {
73
- return atomicExch(address, val);
74
- }
75
-
76
- template <>
77
- __device__ inline double atomicExchCustom(double* address, double val) {
78
- unsigned long long int* address_as_ull = reinterpret_cast<unsigned long long int*>(address);
79
- return __longlong_as_double(atomicExch(address_as_ull, __double_as_longlong(val)));
80
- }
81
-
82
- #ifdef EIGEN_HAS_GPU_FP16
83
- template <typename R>
84
- __device__ inline void atomicReduce(half2* output, half2 accum, R& reducer) {
85
- unsigned int oldval = *reinterpret_cast<unsigned int*>(output);
86
- unsigned int newval = oldval;
87
- reducer.reducePacket(accum, reinterpret_cast<half2*>(&newval));
88
- if (newval == oldval) {
89
- return;
90
- }
91
- unsigned int readback;
92
- while ((readback = atomicCAS((unsigned int*)output, oldval, newval)) != oldval) {
93
- oldval = readback;
94
- newval = oldval;
95
- reducer.reducePacket(accum, reinterpret_cast<half2*>(&newval));
96
- if (newval == oldval) {
97
- return;
98
- }
99
- }
100
- }
101
- // reduction should be associative since reduction is not atomic in wide vector but atomic in half2 operations
102
- template <typename R>
103
- __device__ inline void atomicReduce(Packet4h2* output, Packet4h2 accum, R& reducer) {
104
- half2* houtput=reinterpret_cast<half2*>(output);
105
- half2* haccum=reinterpret_cast<half2*>(&accum);
106
- for(int i=0;i<4;++i){
107
- atomicReduce(houtput+i,*(haccum+i),reducer);
108
- }
109
- }
110
- #endif // EIGEN_HAS_GPU_FP16
111
-
112
- template <>
113
- __device__ inline void atomicReduce(float* output, float accum, SumReducer<float>&) {
114
- #if (defined(EIGEN_HIP_DEVICE_COMPILE) && defined(__HIP_ARCH_HAS_WARP_SHUFFLE__)) || (EIGEN_CUDA_ARCH >= 300)
115
- atomicAdd(output, accum);
116
- #else // EIGEN_CUDA_ARCH >= 300
117
- gpu_assert(0 && "Shouldn't be called on unsupported device");
118
- #endif // EIGEN_CUDA_ARCH >= 300
119
- }
120
-
121
-
122
- template <typename CoeffType, typename Index>
123
- __global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void ReductionInitKernel(const CoeffType val, Index num_preserved_coeffs, CoeffType* output) {
124
- const Index thread_id = blockIdx.x * blockDim.x + threadIdx.x;
125
- const Index num_threads = blockDim.x * gridDim.x;
126
- for (Index i = thread_id; i < num_preserved_coeffs; i += num_threads) {
127
- output[i] = val;
128
- }
129
- }
130
-
131
-
132
- template <int BlockSize, int NumPerThread, typename Self,
133
- typename Reducer, typename Index>
134
- __global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void FullReductionKernel(Reducer reducer, const Self input, Index num_coeffs,
135
- typename Self::CoeffReturnType* output, unsigned int* semaphore) {
136
- #if (defined(EIGEN_HIP_DEVICE_COMPILE) && defined(__HIP_ARCH_HAS_WARP_SHUFFLE__)) || (EIGEN_CUDA_ARCH >= 300)
137
- // Initialize the output value
138
- const Index first_index = blockIdx.x * BlockSize * NumPerThread + threadIdx.x;
139
- if (gridDim.x == 1) {
140
- if (first_index == 0) {
141
- *output = reducer.initialize();
142
- }
143
- }
144
- else {
145
- if (threadIdx.x == 0) {
146
- unsigned int block = atomicCAS(semaphore, 0u, 1u);
147
- if (block == 0) {
148
- // We're the first block to run, initialize the output value
149
- atomicExchCustom(output, reducer.initialize());
150
- __threadfence();
151
- atomicExch(semaphore, 2u);
152
- }
153
- else {
154
- // Wait for the first block to initialize the output value.
155
- // Use atomicCAS here to ensure that the reads aren't cached
156
- unsigned int val;
157
- do {
158
- val = atomicCAS(semaphore, 2u, 2u);
159
- }
160
- while (val < 2u);
161
- }
162
- }
163
- }
164
-
165
- __syncthreads();
166
-
167
- eigen_assert(gridDim.x == 1 || *semaphore >= 2u);
168
-
169
- typename Self::CoeffReturnType accum = reducer.initialize();
170
- Index max_iter = numext::mini<Index>(num_coeffs - first_index, NumPerThread*BlockSize);
171
- for (Index i = 0; i < max_iter; i+=BlockSize) {
172
- const Index index = first_index + i;
173
- eigen_assert(index < num_coeffs);
174
- typename Self::CoeffReturnType val = input.m_impl.coeff(index);
175
- reducer.reduce(val, &accum);
176
- }
177
-
178
- #pragma unroll
179
- for (int offset = warpSize/2; offset > 0; offset /= 2) {
180
- #if defined(EIGEN_HIPCC)
181
- // use std::is_floating_point to determine the type of reduced_val
182
- // This is needed because when Type == double, hipcc will give a "call to __shfl_down is ambguous" error
183
- // and list the float and int versions of __shfl_down as the candidate functions.
184
- if (std::is_floating_point<typename Self::CoeffReturnType>::value) {
185
- reducer.reduce(__shfl_down(static_cast<float>(accum), offset, warpSize), &accum);
186
- } else {
187
- reducer.reduce(__shfl_down(static_cast<int>(accum), offset, warpSize), &accum);
188
- }
189
- #elif defined(EIGEN_CUDA_SDK_VER) && EIGEN_CUDA_SDK_VER < 90000
190
- reducer.reduce(__shfl_down(accum, offset, warpSize), &accum);
191
- #else
192
- reducer.reduce(__shfl_down_sync(0xFFFFFFFF, accum, offset, warpSize), &accum);
193
- #endif
194
- }
195
-
196
- if ((threadIdx.x & (warpSize - 1)) == 0) {
197
- atomicReduce(output, accum, reducer);
198
- }
199
-
200
- if (gridDim.x > 1 && threadIdx.x == 0) {
201
- // Let the last block reset the semaphore
202
- atomicInc(semaphore, gridDim.x + 1);
203
- #if defined(EIGEN_HIPCC)
204
- __threadfence_system();
205
- #endif
206
- }
207
- #else // EIGEN_CUDA_ARCH >= 300
208
- gpu_assert(0 && "Shouldn't be called on unsupported device");
209
- #endif // EIGEN_CUDA_ARCH >= 300
210
- }
211
-
212
-
213
- #ifdef EIGEN_HAS_GPU_FP16
214
- template <typename Self,
215
- typename Reducer, typename Index>
216
- __global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void ReductionInitFullReduxKernelHalfFloat(Reducer reducer, const Self input, Index num_coeffs,
217
- packet_traits<Eigen::half>::type* scratch) {
218
- eigen_assert(blockDim.x == 1);
219
- eigen_assert(gridDim.x == 1);
220
- typedef packet_traits<Eigen::half>::type packet_type;
221
- Index packet_remainder =
222
- num_coeffs % Index(unpacket_traits<packet_type>::size);
223
- if (packet_remainder != 0) {
224
- half2* h2scratch = reinterpret_cast<half2*>(scratch);
225
- for (Index i = num_coeffs - packet_remainder; i + 2 <= num_coeffs; i += 2) {
226
- *h2scratch =
227
- __halves2half2(input.m_impl.coeff(i), input.m_impl.coeff(i + 1));
228
- h2scratch++;
229
- }
230
- if ((num_coeffs & 1) != 0) {
231
- half lastCoeff = input.m_impl.coeff(num_coeffs - 1);
232
- *h2scratch = __halves2half2(lastCoeff, reducer.initialize());
233
- }
234
- } else {
235
- *scratch = reducer.template initializePacket<packet_type>();
236
- }
237
- }
238
-
239
- template <typename Self,
240
- typename Reducer, typename Index>
241
- __global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void ReductionInitKernelHalfFloat(Reducer reducer, const Self input, Index num_coeffs, half* output) {
242
- const Index thread_id = blockIdx.x * blockDim.x + threadIdx.x;
243
- const Index num_threads = blockDim.x * gridDim.x;
244
- typedef typename packet_traits<Eigen::half>::type PacketType;
245
-
246
- const Index num_packets =
247
- num_coeffs / Index(unpacket_traits<PacketType>::size);
248
- PacketType* p_output = reinterpret_cast<PacketType*>(output);
249
- for (Index i = thread_id; i < num_packets; i += num_threads) {
250
- p_output[i] = reducer.template initializePacket<PacketType>();
251
- }
252
- Index packet_remainder =
253
- num_coeffs % Index(unpacket_traits<PacketType>::size);
254
- if (thread_id < packet_remainder) {
255
- output[num_coeffs - packet_remainder + thread_id] = reducer.initialize();
256
- }
257
- }
258
-
259
- template <int BlockSize, int NumPerThread, typename Self,
260
- typename Reducer, typename Index>
261
- __global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void FullReductionKernelHalfFloat(Reducer reducer, const Self input, Index num_coeffs,
262
- half* output, packet_traits<Eigen::half>::type* scratch) {
263
- typedef typename packet_traits<Eigen::half>::type PacketType;
264
- const int packet_width = unpacket_traits<PacketType>::size;
265
- eigen_assert(NumPerThread % packet_width == 0);
266
- const Index first_index =
267
- blockIdx.x * BlockSize * NumPerThread + packet_width * threadIdx.x;
268
-
269
- // Initialize the output value if it wasn't initialized by the ReductionInitKernel
270
-
271
- if (gridDim.x == 1) {
272
- if (first_index == 0) {
273
- int rem = num_coeffs % packet_width;
274
- if (rem != 0) {
275
- half2* p_scratch = reinterpret_cast<half2*>(scratch);
276
- *scratch = reducer.template initializePacket<PacketType>();
277
- for (int i = 0; i < rem / 2; i++) {
278
- *p_scratch = __halves2half2(
279
- input.m_impl.coeff(num_coeffs - packet_width + 2 * i),
280
- input.m_impl.coeff(num_coeffs - packet_width + 2 * i + 1));
281
- p_scratch++;
282
- }
283
- if ((num_coeffs & 1) != 0) {
284
- half last = input.m_impl.coeff(num_coeffs - 1);
285
- *p_scratch = __halves2half2(last, reducer.initialize());
286
- }
287
- } else {
288
- *scratch = reducer.template initializePacket<PacketType>();
289
- }
290
- }
291
- __syncthreads();
292
- }
293
-
294
- PacketType accum = reducer.template initializePacket<PacketType>();
295
- const Index max_iter =
296
- numext::mini<Index>((num_coeffs - first_index) / packet_width,
297
- NumPerThread * BlockSize / packet_width);
298
- for (Index i = 0; i < max_iter; i += BlockSize) {
299
- const Index index = first_index + packet_width * i;
300
- eigen_assert(index + packet_width < num_coeffs);
301
- PacketType val = input.m_impl.template packet<Unaligned>(index);
302
- reducer.reducePacket(val, &accum);
303
- }
304
-
305
- #pragma unroll
306
- for (int offset = warpSize/2; offset > 0; offset /= 2) {
307
- #if defined(EIGEN_HIPCC)
308
- PacketType r1;
309
- half2* hr = reinterpret_cast<half2*>(&r1);
310
- half2* hacc = reinterpret_cast<half2*>(&accum);
311
- for (int i = 0; i < packet_width / 2; i++) {
312
- // FIXME : remove this workaround once we have native half/half2 support for __shfl_down
313
- union { int i; half2 h; } wka_in, wka_out;
314
- wka_in.h = hacc[i];
315
- wka_out.i = __shfl_down(wka_in.i, offset, warpSize);
316
- hr[i] = wka_out.h;
317
- }
318
- reducer.reducePacket(r1, &accum);
319
- #elif defined(EIGEN_CUDA_SDK_VER) && EIGEN_CUDA_SDK_VER < 90000
320
- PacketType r1;
321
- half2* hr = reinterpret_cast<half2*>(&r1);
322
- half2* hacc = reinterpret_cast<half2*>(&accum);
323
- for (int i = 0; i < packet_width / 2; i++) {
324
- hr[i] = __shfl_down(hacc[i], offset, warpSize);
325
- }
326
- reducer.reducePacket(r1, &accum);
327
- #else
328
- PacketType r1;
329
- half2* hr = reinterpret_cast<half2*>(&r1);
330
- half2* hacc = reinterpret_cast<half2*>(&accum);
331
- for (int i = 0; i < packet_width / 2; i++) {
332
- hr[i] = __shfl_down_sync(0xFFFFFFFF, hacc[i], (unsigned)offset, warpSize);
333
- }
334
- reducer.reducePacket(r1, &accum);
335
-
336
- #endif
337
- }
338
-
339
- if ((threadIdx.x & (warpSize - 1)) == 0) {
340
- atomicReduce(scratch, accum, reducer);
341
- }
342
-
343
- __syncthreads();
344
- half2* rv1 = reinterpret_cast<half2*>(scratch);
345
- if (packet_width > 2) {
346
- reducer.reducePacket(rv1[2], rv1);
347
- reducer.reducePacket(rv1[3], rv1 + 1);
348
- reducer.reducePacket(rv1[1], rv1);
349
- }
350
- if (gridDim.x == 1) {
351
- if (first_index == 0) {
352
- half tmp = __low2half(*rv1);
353
- reducer.reduce(__high2half(*rv1), &tmp);
354
- *output = tmp;
355
- }
356
- }
357
- }
358
-
359
- template <typename Op>
360
- __global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void ReductionCleanupKernelHalfFloat(Op reducer, half* output, packet_traits<Eigen::half>::type* scratch) {
361
- eigen_assert(threadIdx.x == 1);
362
- half2* pscratch = reinterpret_cast<half2*>(scratch);
363
- half tmp = __float2half(0.f);
364
- typedef packet_traits<Eigen::half>::type packet_type;
365
- for (int i = 0; i < unpacket_traits<packet_type>::size; i += 2) {
366
- reducer.reduce(__low2half(*pscratch), &tmp);
367
- reducer.reduce(__high2half(*pscratch), &tmp);
368
- pscratch++;
369
- }
370
- *output = tmp;
371
- }
372
-
373
- #endif // EIGEN_HAS_GPU_FP16
374
-
375
- template <typename Self, typename Op, typename OutputType, bool PacketAccess, typename Enabled = void>
376
- struct FullReductionLauncher {
377
- static void run(const Self&, Op&, const GpuDevice&, OutputType*, typename Self::Index) {
378
- gpu_assert(false && "Should only be called on doubles, floats and half floats");
379
- }
380
- };
381
-
382
- // Specialization for float and double
383
- template <typename Self, typename Op, typename OutputType, bool PacketAccess>
384
- struct FullReductionLauncher<
385
- Self, Op, OutputType, PacketAccess,
386
- typename internal::enable_if<
387
- internal::is_same<float, OutputType>::value ||
388
- internal::is_same<double, OutputType>::value,
389
- void>::type> {
390
- static void run(const Self& self, Op& reducer, const GpuDevice& device, OutputType* output, typename Self::Index num_coeffs) {
391
-
392
- typedef typename Self::Index Index;
393
- const int block_size = 256;
394
- const int num_per_thread = 128;
395
- const int num_blocks = divup<int>(num_coeffs, block_size * num_per_thread);
396
-
397
- unsigned int* semaphore = NULL;
398
- if (num_blocks > 1) {
399
- semaphore = device.semaphore();
400
- }
401
-
402
- LAUNCH_GPU_KERNEL((FullReductionKernel<block_size, num_per_thread, Self, Op, Index>),
403
- num_blocks, block_size, 0, device, reducer, self, num_coeffs, output, semaphore);
404
- }
405
- };
406
-
407
- #ifdef EIGEN_HAS_GPU_FP16
408
- template <typename Self, typename Op>
409
- struct FullReductionLauncher<Self, Op, Eigen::half, false> {
410
- static void run(const Self&, Op&, const GpuDevice&, half*, typename Self::Index) {
411
- gpu_assert(false && "Should not be called since there is no packet accessor");
412
- }
413
- };
414
-
415
- template <typename Self, typename Op>
416
- struct FullReductionLauncher<Self, Op, Eigen::half, true> {
417
- static void run(const Self& self, Op& reducer, const GpuDevice& device, half* output, typename Self::Index num_coeffs) {
418
- typedef typename Self::Index Index;
419
- typedef typename packet_traits<Eigen::half>::type PacketType;
420
-
421
- const int block_size = 256;
422
- const int num_per_thread = 128;
423
- const int num_blocks = divup<int>(num_coeffs, block_size * num_per_thread);
424
- PacketType* scratch = static_cast<PacketType*>(device.scratchpad());
425
- // half2* scratch = static_cast<half2*>(device.scratchpad());
426
-
427
- if (num_blocks > 1) {
428
- // We initialize the output and the scrathpad outside the reduction kernel when we can't be sure that there
429
- // won't be a race conditions between multiple thread blocks.
430
- LAUNCH_GPU_KERNEL((ReductionInitFullReduxKernelHalfFloat<Self, Op, Index>),
431
- 1, 1, 0, device, reducer, self, num_coeffs, scratch);
432
- }
433
-
434
- LAUNCH_GPU_KERNEL((FullReductionKernelHalfFloat<block_size, num_per_thread, Self, Op, Index>),
435
- num_blocks, block_size, 0, device, reducer, self, num_coeffs, output, scratch);
436
-
437
- if (num_blocks > 1) {
438
- LAUNCH_GPU_KERNEL((ReductionCleanupKernelHalfFloat<Op>),
439
- 1, 1, 0, device, reducer, output, scratch);
440
- }
441
- }
442
- };
443
- #endif // EIGEN_HAS_GPU_FP16
444
-
445
-
446
- template <typename Self, typename Op, bool Vectorizable>
447
- struct FullReducer<Self, Op, GpuDevice, Vectorizable> {
448
- // Unfortunately nvidia doesn't support well exotic types such as complex,
449
- // so reduce the scope of the optimized version of the code to the simple cases
450
- // of doubles, floats and half floats
451
- #ifdef EIGEN_HAS_GPU_FP16
452
- static const bool HasOptimizedImplementation = !Self::ReducerTraits::IsStateful &&
453
- (internal::is_same<typename Self::CoeffReturnType, float>::value ||
454
- internal::is_same<typename Self::CoeffReturnType, double>::value ||
455
- (internal::is_same<typename Self::CoeffReturnType, Eigen::half>::value && reducer_traits<Op, GpuDevice>::PacketAccess));
456
- #else // EIGEN_HAS_GPU_FP16
457
- static const bool HasOptimizedImplementation = !Self::ReducerTraits::IsStateful &&
458
- (internal::is_same<typename Self::CoeffReturnType, float>::value ||
459
- internal::is_same<typename Self::CoeffReturnType, double>::value);
460
- #endif // EIGEN_HAS_GPU_FP16
461
-
462
- template <typename OutputType>
463
- static void run(const Self& self, Op& reducer, const GpuDevice& device, OutputType* output) {
464
- gpu_assert(HasOptimizedImplementation && "Should only be called on doubles, floats or half floats");
465
- const Index num_coeffs = array_prod(self.m_impl.dimensions());
466
- // Don't crash when we're called with an input tensor of size 0.
467
- if (num_coeffs == 0) {
468
- return;
469
- }
470
-
471
- FullReductionLauncher<Self, Op, OutputType, reducer_traits<Op, GpuDevice>::PacketAccess>::run(self, reducer, device, output, num_coeffs);
472
- }
473
- };
474
-
475
-
476
- template <int NumPerThread, typename Self,
477
- typename Reducer, typename Index>
478
- __global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void InnerReductionKernel(Reducer reducer, const Self input, Index num_coeffs_to_reduce, Index num_preserved_coeffs,
479
- typename Self::CoeffReturnType* output) {
480
- #if (defined(EIGEN_HIP_DEVICE_COMPILE) && defined(__HIP_ARCH_HAS_WARP_SHUFFLE__)) || (EIGEN_CUDA_ARCH >= 300)
481
- typedef typename Self::CoeffReturnType Type;
482
- eigen_assert(blockDim.y == 1);
483
- eigen_assert(blockDim.z == 1);
484
- eigen_assert(gridDim.y == 1);
485
- eigen_assert(gridDim.z == 1);
486
-
487
- const int unroll_times = 16;
488
- eigen_assert(NumPerThread % unroll_times == 0);
489
-
490
- const Index input_col_blocks = divup<Index>(num_coeffs_to_reduce, blockDim.x * NumPerThread);
491
- const Index num_input_blocks = input_col_blocks * num_preserved_coeffs;
492
-
493
- const Index num_threads = blockDim.x * gridDim.x;
494
- const Index thread_id = blockIdx.x * blockDim.x + threadIdx.x;
495
-
496
- // Initialize the output values if they weren't initialized by the ReductionInitKernel
497
- if (gridDim.x == 1) {
498
- for (Index i = thread_id; i < num_preserved_coeffs; i += num_threads) {
499
- output[i] = reducer.initialize();
500
- }
501
- __syncthreads();
502
- }
503
-
504
- for (Index i = blockIdx.x; i < num_input_blocks; i += gridDim.x) {
505
- const Index row = i / input_col_blocks;
506
-
507
- if (row < num_preserved_coeffs) {
508
- const Index col_block = i % input_col_blocks;
509
- const Index col_begin = col_block * blockDim.x * NumPerThread + threadIdx.x;
510
-
511
- Type reduced_val = reducer.initialize();
512
-
513
- for (Index j = 0; j < NumPerThread; j += unroll_times) {
514
- const Index last_col = col_begin + blockDim.x * (j + unroll_times - 1);
515
- if (last_col >= num_coeffs_to_reduce) {
516
- for (Index col = col_begin + blockDim.x * j; col < num_coeffs_to_reduce; col += blockDim.x) {
517
- const Type val = input.m_impl.coeff(row * num_coeffs_to_reduce + col);
518
- reducer.reduce(val, &reduced_val);
519
- }
520
- break;
521
- } else {
522
- // Faster version of the loop with no branches after unrolling.
523
- #pragma unroll
524
- for (int k = 0; k < unroll_times; ++k) {
525
- const Index col = col_begin + blockDim.x * (j + k);
526
- reducer.reduce(input.m_impl.coeff(row * num_coeffs_to_reduce + col), &reduced_val);
527
- }
528
- }
529
- }
530
-
531
- #pragma unroll
532
- for (int offset = warpSize/2; offset > 0; offset /= 2) {
533
- #if defined(EIGEN_HIPCC)
534
- // use std::is_floating_point to determine the type of reduced_val
535
- // This is needed because when Type == double, hipcc will give a "call to __shfl_down is ambguous" error
536
- // and list the float and int versions of __shfl_down as the candidate functions.
537
- if (std::is_floating_point<Type>::value) {
538
- reducer.reduce(__shfl_down(static_cast<float>(reduced_val), offset), &reduced_val);
539
- } else {
540
- reducer.reduce(__shfl_down(static_cast<int>(reduced_val), offset), &reduced_val);
541
- }
542
- #elif defined(EIGEN_CUDA_SDK_VER) && EIGEN_CUDA_SDK_VER < 90000
543
- reducer.reduce(__shfl_down(reduced_val, offset), &reduced_val);
544
- #else
545
- reducer.reduce(__shfl_down_sync(0xFFFFFFFF, reduced_val, offset), &reduced_val);
546
- #endif
547
- }
548
-
549
- if ((threadIdx.x & (warpSize - 1)) == 0) {
550
- atomicReduce(&(output[row]), reduced_val, reducer);
551
- }
552
- }
553
- }
554
- #else // EIGEN_CUDA_ARCH >= 300
555
- gpu_assert(0 && "Shouldn't be called on unsupported device");
556
- #endif // EIGEN_CUDA_ARCH >= 300
557
- }
558
-
559
- #ifdef EIGEN_HAS_GPU_FP16
560
-
561
- template <int NumPerThread, typename Self,
562
- typename Reducer, typename Index>
563
- __global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void InnerReductionKernelHalfFloat(Reducer reducer, const Self input, Index num_coeffs_to_reduce, Index num_preserved_coeffs,
564
- half* output) {
565
- eigen_assert(blockDim.y == 1);
566
- eigen_assert(blockDim.z == 1);
567
- eigen_assert(gridDim.y == 1);
568
- eigen_assert(gridDim.z == 1);
569
-
570
- typedef typename packet_traits<Eigen::half>::type PacketType;
571
- const int packet_width = unpacket_traits<PacketType>::size;
572
- const int unroll_times = 16 / packet_width;
573
- eigen_assert(NumPerThread % unroll_times == 0);
574
- eigen_assert(unroll_times % 2 == 0);
575
-
576
- const Index input_col_blocks = divup<Index>(num_coeffs_to_reduce, blockDim.x * NumPerThread * 2);
577
- const Index num_input_blocks = divup<Index>(input_col_blocks * num_preserved_coeffs, 2);
578
-
579
- const Index num_threads = blockDim.x * gridDim.x;
580
- const Index thread_id = blockIdx.x * blockDim.x + threadIdx.x;
581
-
582
- // Initialize the output values if they weren't initialized by the ReductionInitKernel
583
- if (gridDim.x == 1) {
584
- Index i = packet_width * thread_id;
585
- for (; i + packet_width <= num_preserved_coeffs;
586
- i += packet_width * num_threads) {
587
- PacketType* poutput = reinterpret_cast<PacketType*>(output + i);
588
- *poutput = reducer.template initializePacket<PacketType>();
589
- }
590
- if (i < num_preserved_coeffs) {
591
- output[i] = reducer.initialize();
592
- }
593
- __syncthreads();
594
- }
595
-
596
- for (Index i = blockIdx.x; i < num_input_blocks; i += gridDim.x) {
597
- const Index row = 2 * (i / input_col_blocks); // everybody takes 2 rows
598
-
599
- if (row + 1 < num_preserved_coeffs) {
600
- const Index col_block = i % input_col_blocks;
601
- const Index col_begin =
602
- packet_width * (col_block * blockDim.x * NumPerThread + threadIdx.x);
603
-
604
- PacketType reduced_val1 = reducer.template initializePacket<PacketType>();
605
- PacketType reduced_val2 = reducer.template initializePacket<PacketType>();
606
-
607
- for (Index j = 0; j < NumPerThread; j += unroll_times) {
608
- const Index last_col =
609
- col_begin + blockDim.x * (j + unroll_times - 1) * packet_width;
610
- if (last_col >= num_coeffs_to_reduce) {
611
- Index col = col_begin + blockDim.x * j;
612
- for (; col + packet_width <= num_coeffs_to_reduce;
613
- col += blockDim.x) {
614
- const PacketType val1 = input.m_impl.template packet<Unaligned>(
615
- row * num_coeffs_to_reduce + col);
616
- reducer.reducePacket(val1, &reduced_val1);
617
- const PacketType val2 = input.m_impl.template packet<Unaligned>(
618
- (row + 1) * num_coeffs_to_reduce + col);
619
- reducer.reducePacket(val2, &reduced_val2);
620
- }
621
- if (col < num_coeffs_to_reduce) {
622
- PacketType r1 = reducer.template initializePacket<PacketType>();
623
- PacketType r2 = reducer.template initializePacket<PacketType>();
624
- half2* hr1 = reinterpret_cast<half2*>(&r1);
625
- half2* hr2 = reinterpret_cast<half2*>(&r2);
626
- while (col + 1 < num_coeffs_to_reduce) {
627
- *hr1 = __halves2half2(
628
- input.m_impl.coeff(row * num_coeffs_to_reduce + col),
629
- input.m_impl.coeff(row * num_coeffs_to_reduce + col + 1));
630
- *hr2 = __halves2half2(
631
- input.m_impl.coeff((row + 1) * num_coeffs_to_reduce + col),
632
- input.m_impl.coeff((row + 1) * num_coeffs_to_reduce + col +
633
- 1));
634
- hr1++;
635
- hr2++;
636
- col += 2;
637
- }
638
- if (col < num_coeffs_to_reduce) {
639
- // Peel;
640
- const half last1 =
641
- input.m_impl.coeff(row * num_coeffs_to_reduce + col);
642
- *hr1 = __halves2half2(last1, reducer.initialize());
643
- const half last2 =
644
- input.m_impl.coeff((row + 1) * num_coeffs_to_reduce + col);
645
- *hr2 = __halves2half2(last2, reducer.initialize());
646
- }
647
- reducer.reducePacket(r1, &reduced_val1);
648
- reducer.reducePacket(r2, &reduced_val2);
649
- }
650
- break;
651
- } else {
652
- // Faster version of the loop with no branches after unrolling.
653
- #pragma unroll
654
- for (int k = 0; k < unroll_times; ++k) {
655
- const Index col = col_begin + blockDim.x * (j + k) * packet_width;
656
- reducer.reducePacket(input.m_impl.template packet<Unaligned>(
657
- row * num_coeffs_to_reduce + col),
658
- &reduced_val1);
659
- reducer.reducePacket(input.m_impl.template packet<Unaligned>(
660
- (row + 1) * num_coeffs_to_reduce + col),
661
- &reduced_val2);
662
- }
663
- }
664
- }
665
-
666
- #pragma unroll
667
- for (int offset = warpSize/2; offset > 0; offset /= 2) {
668
- #if defined(EIGEN_HIPCC)
669
- PacketType r1;
670
- PacketType r2;
671
- half2* hr1 = reinterpret_cast<half2*>(&r1);
672
- half2* hr2 = reinterpret_cast<half2*>(&r2);
673
- half2* rv1 = reinterpret_cast<half2*>(&reduced_val1);
674
- half2* rv2 = reinterpret_cast<half2*>(&reduced_val2);
675
- for (int i = 0; i < packet_width / 2; i++) {
676
- // FIXME : remove this workaround once we have native half/half2 support for __shfl_down
677
- union { int i; half2 h; } wka_in1, wka_out1;
678
- wka_in1.h = rv1[i];
679
- wka_out1.i = __shfl_down(wka_in1.i, offset, warpSize);
680
- hr1[i] = wka_out1.h;
681
-
682
- union { int i; half2 h; } wka_in2, wka_out2;
683
- wka_in2.h = rv2[i];
684
- wka_out2.i = __shfl_down(wka_in2.i, offset, warpSize);
685
- hr2[i] = wka_out2.h;
686
- }
687
- reducer.reducePacket(r1, &reduced_val1);
688
- reducer.reducePacket(r2, &reduced_val2);
689
- #elif defined(EIGEN_CUDA_SDK_VER) && EIGEN_CUDA_SDK_VER < 90000
690
- PacketType r1;
691
- PacketType r2;
692
- half2* hr1 = reinterpret_cast<half2*>(&r1);
693
- half2* hr2 = reinterpret_cast<half2*>(&r2);
694
- half2* rv1 = reinterpret_cast<half2*>(&reduced_val1);
695
- half2* rv2 = reinterpret_cast<half2*>(&reduced_val2);
696
- for (int i = 0; i < packet_width / 2; i++) {
697
- hr1[i] = __shfl_down(rv1[i], offset, warpSize);
698
- hr2[i] = __shfl_down(rv2[i], offset, warpSize);
699
- }
700
- reducer.reducePacket(r1, &reduced_val1);
701
- reducer.reducePacket(r2, &reduced_val2);
702
- #else
703
- PacketType r1;
704
- PacketType r2;
705
- half2* hr1 = reinterpret_cast<half2*>(&r1);
706
- half2* hr2 = reinterpret_cast<half2*>(&r2);
707
- half2* rr1 = reinterpret_cast<half2*>(&reduced_val1);
708
- half2* rr2 = reinterpret_cast<half2*>(&reduced_val2);
709
- for (int i = 0; i < packet_width / 2; i++) {
710
- hr1[i] =
711
- __shfl_down_sync(0xFFFFFFFF, rr1[i], (unsigned)offset, warpSize);
712
- hr2[i] =
713
- __shfl_down_sync(0xFFFFFFFF, rr2[i], (unsigned)offset, warpSize);
714
- }
715
- reducer.reducePacket(r1, &reduced_val1);
716
- reducer.reducePacket(r2, &reduced_val2);
717
-
718
- #endif
719
- }
720
- half2* rv1 = reinterpret_cast<half2*>(&reduced_val1);
721
- half2* rv2 = reinterpret_cast<half2*>(&reduced_val2);
722
- half2 val;
723
- if (packet_width > 2) {
724
- reducer.reducePacket(rv1[2], rv1);
725
- reducer.reducePacket(rv1[3], rv1 + 1);
726
- reducer.reducePacket(rv1[1], rv1);
727
- reducer.reducePacket(rv2[2], rv2);
728
- reducer.reducePacket(rv2[3], rv2 + 1);
729
- reducer.reducePacket(rv2[1], rv2);
730
- }
731
- half val1 = __low2half(*rv1);
732
- reducer.reduce(__high2half(*rv1), &val1);
733
- half val2 = __low2half(*rv2);
734
- reducer.reduce(__high2half(*rv2), &val2);
735
- val = __halves2half2(val1, val2);
736
- if ((threadIdx.x & (warpSize - 1)) == 0) {
737
- half* loc = output + row;
738
- atomicReduce((half2*)loc, val, reducer);
739
- }
740
- }
741
- }
742
- }
743
-
744
- #endif // EIGEN_HAS_GPU_FP16
745
-
746
- template <typename Self, typename Op, typename OutputType, bool PacketAccess, typename Enabled = void>
747
- struct InnerReductionLauncher {
748
- static EIGEN_DEVICE_FUNC bool run(const Self&, Op&, const GpuDevice&, OutputType*, typename Self::Index, typename Self::Index) {
749
- gpu_assert(false && "Should only be called to reduce doubles, floats and half floats on a gpu device");
750
- return true;
751
- }
752
- };
753
-
754
- // Specialization for float and double
755
- template <typename Self, typename Op, typename OutputType, bool PacketAccess>
756
- struct InnerReductionLauncher<
757
- Self, Op, OutputType, PacketAccess,
758
- typename internal::enable_if<
759
- internal::is_same<float, OutputType>::value ||
760
- internal::is_same<double, OutputType>::value,
761
- void>::type> {
762
- static bool run(const Self& self, Op& reducer, const GpuDevice& device, OutputType* output, typename Self::Index num_coeffs_to_reduce, typename Self::Index num_preserved_vals) {
763
- typedef typename Self::Index Index;
764
-
765
- const Index num_coeffs = num_coeffs_to_reduce * num_preserved_vals;
766
- const int block_size = 256;
767
- const int num_per_thread = 128;
768
- const int dyn_blocks = divup<int>(num_coeffs, block_size * num_per_thread);
769
- const int max_blocks = device.getNumGpuMultiProcessors() *
770
- device.maxGpuThreadsPerMultiProcessor() / block_size;
771
- const int num_blocks = numext::mini<int>(max_blocks, dyn_blocks);
772
-
773
- if (num_blocks > 1) {
774
- // We initialize the outputs outside the reduction kernel when we can't be sure that there
775
- // won't be a race conditions between multiple thread blocks.
776
- const int dyn_blocks = divup<int>(num_preserved_vals, 1024);
777
- const int max_blocks = device.getNumGpuMultiProcessors() *
778
- device.maxGpuThreadsPerMultiProcessor() / 1024;
779
- const int num_blocks = numext::mini<int>(max_blocks, dyn_blocks);
780
- LAUNCH_GPU_KERNEL((ReductionInitKernel<OutputType, Index>),
781
- num_blocks, 1024, 0, device, reducer.initialize(),
782
- num_preserved_vals, output);
783
- }
784
-
785
- LAUNCH_GPU_KERNEL((InnerReductionKernel<num_per_thread, Self, Op, Index>),
786
- num_blocks, block_size, 0, device, reducer, self, num_coeffs_to_reduce, num_preserved_vals, output);
787
-
788
- return false;
789
- }
790
- };
791
-
792
- #ifdef EIGEN_HAS_GPU_FP16
793
- template <typename Self, typename Op>
794
- struct InnerReductionLauncher<Self, Op, Eigen::half, false> {
795
- static bool run(const Self&, Op&, const GpuDevice&, half*, typename Self::Index, typename Self::Index) {
796
- gpu_assert(false && "Should not be called since there is no packet accessor");
797
- return true;
798
- }
799
- };
800
-
801
- template <typename Self, typename Op>
802
- struct InnerReductionLauncher<Self, Op, Eigen::half, true> {
803
- static bool run(const Self& self, Op& reducer, const GpuDevice& device, half* output, typename Self::Index num_coeffs_to_reduce, typename Self::Index num_preserved_vals) {
804
- typedef typename Self::Index Index;
805
-
806
- if (num_preserved_vals % 2 != 0) {
807
- // Not supported yet, revert to the slower code path
808
- return true;
809
- }
810
-
811
- const Index num_coeffs = num_coeffs_to_reduce * num_preserved_vals;
812
- const int block_size = /*256*/128;
813
- const int num_per_thread = /*128*/64;
814
- const int dyn_blocks = divup<int>(num_coeffs, block_size * num_per_thread);
815
- const int max_blocks = device.getNumGpuMultiProcessors() *
816
- device.maxGpuThreadsPerMultiProcessor() / block_size;
817
- const int num_blocks = numext::mini<int>(max_blocks, dyn_blocks);
818
-
819
- if (num_blocks > 1) {
820
- // We initialize the outputs outside the reduction kernel when we can't be sure that there
821
- // won't be a race conditions between multiple thread blocks.
822
- LAUNCH_GPU_KERNEL((ReductionInitKernelHalfFloat<Self, Op, Index>),
823
- 1, 1, 0, device, reducer, self, num_preserved_vals, output);
824
- }
825
-
826
- LAUNCH_GPU_KERNEL((InnerReductionKernelHalfFloat<num_per_thread, Self, Op, Index>),
827
- num_blocks, block_size, 0, device, reducer, self, num_coeffs_to_reduce, num_preserved_vals, output);
828
-
829
- return false;
830
- }
831
- };
832
- #endif // EIGEN_HAS_GPU_FP16
833
-
834
-
835
- template <typename Self, typename Op>
836
- struct InnerReducer<Self, Op, GpuDevice> {
837
- // Unfortunately nvidia doesn't support well exotic types such as complex,
838
- // so reduce the scope of the optimized version of the code to the simple case
839
- // of floats and half floats.
840
- #ifdef EIGEN_HAS_GPU_FP16
841
- static const bool HasOptimizedImplementation = !Self::ReducerTraits::IsStateful &&
842
- (internal::is_same<typename Self::CoeffReturnType, float>::value ||
843
- internal::is_same<typename Self::CoeffReturnType, double>::value ||
844
- (internal::is_same<typename Self::CoeffReturnType, Eigen::half>::value && reducer_traits<Op, GpuDevice>::PacketAccess));
845
- #else // EIGEN_HAS_GPU_FP16
846
- static const bool HasOptimizedImplementation = !Self::ReducerTraits::IsStateful &&
847
- (internal::is_same<typename Self::CoeffReturnType, float>::value ||
848
- internal::is_same<typename Self::CoeffReturnType, double>::value);
849
- #endif // EIGEN_HAS_GPU_FP16
850
-
851
- template <typename OutputType>
852
- static bool run(const Self& self, Op& reducer, const GpuDevice& device, OutputType* output, typename Self::Index num_coeffs_to_reduce, typename Self::Index num_preserved_vals) {
853
- gpu_assert(HasOptimizedImplementation && "Should only be called on doubles, floats or half floats");
854
- const Index num_coeffs = array_prod(self.m_impl.dimensions());
855
- // Don't crash when we're called with an input tensor of size 0.
856
- if (num_coeffs == 0) {
857
- return true;
858
- }
859
- // It's faster to use the usual code.
860
- if (num_coeffs_to_reduce <= 128) {
861
- return true;
862
- }
863
-
864
- return InnerReductionLauncher<Self, Op, OutputType, reducer_traits<Op, GpuDevice>::PacketAccess>::run(self, reducer, device, output, num_coeffs_to_reduce, num_preserved_vals);
865
- }
866
- };
867
-
868
- template <int NumPerThread, typename Self,
869
- typename Reducer, typename Index>
870
- __global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void OuterReductionKernel(Reducer reducer, const Self input, Index num_coeffs_to_reduce, Index num_preserved_coeffs,
871
- typename Self::CoeffReturnType* output) {
872
- const Index num_threads = blockDim.x * gridDim.x;
873
- const Index thread_id = blockIdx.x * blockDim.x + threadIdx.x;
874
- // Initialize the output values if they weren't initialized by the ReductionInitKernel
875
- if (gridDim.x == 1) {
876
- for (Index i = thread_id; i < num_preserved_coeffs; i += num_threads) {
877
- output[i] = reducer.initialize();
878
- }
879
- __syncthreads();
880
- }
881
-
882
- // Do the reduction.
883
- const Index max_iter = num_preserved_coeffs * divup<Index>(num_coeffs_to_reduce, NumPerThread);
884
- for (Index i = thread_id; i < max_iter; i += num_threads) {
885
- const Index input_col = i % num_preserved_coeffs;
886
- const Index input_row = (i / num_preserved_coeffs) * NumPerThread;
887
- typename Self::CoeffReturnType reduced_val = reducer.initialize();
888
- const Index max_row = numext::mini(input_row + NumPerThread, num_coeffs_to_reduce);
889
- for (Index j = input_row; j < max_row; j++) {
890
- typename Self::CoeffReturnType val = input.m_impl.coeff(j * num_preserved_coeffs + input_col);
891
- reducer.reduce(val, &reduced_val);
892
- }
893
- atomicReduce(&(output[input_col]), reduced_val, reducer);
894
- }
895
- }
896
-
897
-
898
- template <typename Self, typename Op>
899
- struct OuterReducer<Self, Op, GpuDevice> {
900
- // Unfortunately nvidia doesn't support well exotic types such as complex,
901
- // so reduce the scope of the optimized version of the code to the simple case
902
- // of floats.
903
- static const bool HasOptimizedImplementation = !Self::ReducerTraits::IsStateful &&
904
- (internal::is_same<typename Self::CoeffReturnType, float>::value ||
905
- internal::is_same<typename Self::CoeffReturnType, double>::value);
906
- template <typename Device, typename OutputType>
907
- static
908
- #if !defined(EIGEN_HIPCC)
909
- // FIXME : leaving this EIGEN_DEVICE_FUNC in, results in the following runtime error
910
- // (in the cxx11_tensor_reduction_gpu test)
911
- //
912
- // terminate called after throwing an instance of 'std::runtime_error'
913
- // what(): No device code available for function: _ZN5Eigen8internal20OuterReductionKernelIL...
914
- //
915
- // don't know why this happens (and why is it a runtime error instead of a compile time error)
916
- //
917
- // this will be fixed by HIP PR#457
918
- EIGEN_DEVICE_FUNC
919
- #endif
920
- bool run(const Self&, Op&, const Device&, OutputType*, typename Self::Index, typename Self::Index) {
921
- gpu_assert(false && "Should only be called to reduce doubles or floats on a gpu device");
922
- return true;
923
- }
924
-
925
- static bool run(const Self& self, Op& reducer, const GpuDevice& device, float* output, typename Self::Index num_coeffs_to_reduce, typename Self::Index num_preserved_vals) {
926
- typedef typename Self::Index Index;
927
-
928
- // It's faster to use the usual code.
929
- if (num_coeffs_to_reduce <= 32) {
930
- return true;
931
- }
932
-
933
- const Index num_coeffs = num_coeffs_to_reduce * num_preserved_vals;
934
- const int block_size = 256;
935
- const int num_per_thread = 16;
936
- const int dyn_blocks = divup<int>(num_coeffs, block_size * num_per_thread);
937
- const int max_blocks = device.getNumGpuMultiProcessors() *
938
- device.maxGpuThreadsPerMultiProcessor() / block_size;
939
- const int num_blocks = numext::mini<int>(max_blocks, dyn_blocks);
940
-
941
- if (num_blocks > 1) {
942
- // We initialize the outputs in the reduction kernel itself when we don't have to worry
943
- // about race conditions between multiple thread blocks.
944
- const int dyn_blocks = divup<int>(num_preserved_vals, 1024);
945
- const int max_blocks = device.getNumGpuMultiProcessors() *
946
- device.maxGpuThreadsPerMultiProcessor() / 1024;
947
- const int num_blocks = numext::mini<int>(max_blocks, dyn_blocks);
948
- LAUNCH_GPU_KERNEL((ReductionInitKernel<float, Index>),
949
- num_blocks, 1024, 0, device, reducer.initialize(),
950
- num_preserved_vals, output);
951
- }
952
-
953
- LAUNCH_GPU_KERNEL((OuterReductionKernel<num_per_thread, Self, Op, Index>),
954
- num_blocks, block_size, 0, device, reducer, self, num_coeffs_to_reduce, num_preserved_vals, output);
955
-
956
- return false;
957
- }
958
- };
959
-
960
- #endif // defined(EIGEN_USE_GPU) && defined(EIGEN_GPUCC)
961
-
962
-
963
- } // end namespace internal
964
- } // end namespace Eigen
965
-
966
- #endif // EIGEN_CXX11_TENSOR_TENSOR_REDUCTION_GPU_H