sequenzo 0.1.18__cp310-cp310-win_amd64.whl → 0.1.20__cp310-cp310-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sequenzo might be problematic. Click here for more details.

Files changed (399) hide show
  1. sequenzo/__init__.py +39 -7
  2. sequenzo/big_data/clara/utils/get_weighted_diss.c +154 -154
  3. sequenzo/big_data/clara/utils/get_weighted_diss.cp310-win_amd64.pyd +0 -0
  4. sequenzo/clustering/KMedoids.py +39 -0
  5. sequenzo/clustering/clustering_c_code.cp310-win_amd64.pyd +0 -0
  6. sequenzo/clustering/hierarchical_clustering.py +108 -6
  7. sequenzo/define_sequence_data.py +10 -1
  8. sequenzo/dissimilarity_measures/c_code.cp310-win_amd64.pyd +0 -0
  9. sequenzo/dissimilarity_measures/get_distance_matrix.py +2 -3
  10. sequenzo/dissimilarity_measures/get_substitution_cost_matrix.py +1 -1
  11. sequenzo/dissimilarity_measures/utils/get_sm_trate_substitution_cost_matrix.c +154 -154
  12. sequenzo/dissimilarity_measures/utils/get_sm_trate_substitution_cost_matrix.cp310-win_amd64.pyd +0 -0
  13. sequenzo/dissimilarity_measures/utils/seqconc.c +154 -154
  14. sequenzo/dissimilarity_measures/utils/seqconc.cp310-win_amd64.pyd +0 -0
  15. sequenzo/dissimilarity_measures/utils/seqdss.c +154 -154
  16. sequenzo/dissimilarity_measures/utils/seqdss.cp310-win_amd64.pyd +0 -0
  17. sequenzo/dissimilarity_measures/utils/seqdur.c +154 -154
  18. sequenzo/dissimilarity_measures/utils/seqdur.cp310-win_amd64.pyd +0 -0
  19. sequenzo/dissimilarity_measures/utils/seqlength.c +154 -154
  20. sequenzo/dissimilarity_measures/utils/seqlength.cp310-win_amd64.pyd +0 -0
  21. sequenzo/multidomain/cat.py +0 -53
  22. sequenzo/multidomain/dat.py +11 -3
  23. sequenzo/multidomain/idcd.py +0 -3
  24. sequenzo/multidomain/linked_polyad.py +0 -1
  25. sequenzo/openmp_setup.py +233 -0
  26. sequenzo/visualization/plot_transition_matrix.py +21 -22
  27. {sequenzo-0.1.18.dist-info → sequenzo-0.1.20.dist-info}/METADATA +71 -10
  28. sequenzo-0.1.20.dist-info/RECORD +272 -0
  29. sequenzo/dissimilarity_measures/setup.py +0 -35
  30. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Cholesky/LDLT.h +0 -688
  31. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Cholesky/LLT.h +0 -558
  32. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Cholesky/LLT_LAPACKE.h +0 -99
  33. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +0 -682
  34. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +0 -346
  35. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +0 -462
  36. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/ComplexSchur_LAPACKE.h +0 -91
  37. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/EigenSolver.h +0 -622
  38. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h +0 -418
  39. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +0 -226
  40. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +0 -374
  41. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +0 -158
  42. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/RealQZ.h +0 -657
  43. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/RealSchur.h +0 -558
  44. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/RealSchur_LAPACKE.h +0 -77
  45. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +0 -904
  46. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h +0 -87
  47. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +0 -561
  48. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/AlignedBox.h +0 -486
  49. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/AngleAxis.h +0 -247
  50. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/EulerAngles.h +0 -114
  51. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/Homogeneous.h +0 -501
  52. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/Hyperplane.h +0 -282
  53. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/OrthoMethods.h +0 -235
  54. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/ParametrizedLine.h +0 -232
  55. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/Quaternion.h +0 -870
  56. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/Rotation2D.h +0 -199
  57. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/RotationBase.h +0 -206
  58. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/Scaling.h +0 -188
  59. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/Transform.h +0 -1563
  60. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/Translation.h +0 -202
  61. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/Umeyama.h +0 -166
  62. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/arch/Geometry_SIMD.h +0 -168
  63. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Householder/BlockHouseholder.h +0 -110
  64. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Householder/Householder.h +0 -176
  65. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Householder/HouseholderSequence.h +0 -545
  66. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +0 -226
  67. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +0 -212
  68. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +0 -229
  69. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +0 -394
  70. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +0 -453
  71. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +0 -444
  72. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +0 -198
  73. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +0 -117
  74. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Jacobi/Jacobi.h +0 -483
  75. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/KLUSupport/KLUSupport.h +0 -358
  76. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/LU/Determinant.h +0 -117
  77. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/LU/FullPivLU.h +0 -877
  78. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/LU/InverseImpl.h +0 -432
  79. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/LU/PartialPivLU.h +0 -624
  80. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/LU/PartialPivLU_LAPACKE.h +0 -83
  81. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/LU/arch/InverseSize4.h +0 -351
  82. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/MetisSupport/MetisSupport.h +0 -137
  83. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/OrderingMethods/Amd.h +0 -435
  84. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +0 -1863
  85. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/OrderingMethods/Ordering.h +0 -153
  86. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +0 -678
  87. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +0 -545
  88. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/QR/ColPivHouseholderQR.h +0 -674
  89. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/QR/ColPivHouseholderQR_LAPACKE.h +0 -97
  90. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +0 -635
  91. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/QR/FullPivHouseholderQR.h +0 -713
  92. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/QR/HouseholderQR.h +0 -434
  93. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/QR/HouseholderQR_LAPACKE.h +0 -68
  94. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +0 -335
  95. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SVD/BDCSVD.h +0 -1366
  96. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SVD/JacobiSVD.h +0 -812
  97. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SVD/JacobiSVD_LAPACKE.h +0 -91
  98. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SVD/SVDBase.h +0 -376
  99. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SVD/UpperBidiagonalization.h +0 -414
  100. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +0 -697
  101. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +0 -174
  102. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/AmbiVector.h +0 -378
  103. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/CompressedStorage.h +0 -274
  104. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +0 -352
  105. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/MappedSparseMatrix.h +0 -67
  106. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseAssign.h +0 -270
  107. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseBlock.h +0 -571
  108. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseColEtree.h +0 -206
  109. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +0 -370
  110. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +0 -722
  111. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +0 -150
  112. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +0 -342
  113. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseDiagonalProduct.h +0 -138
  114. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseDot.h +0 -98
  115. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseFuzzy.h +0 -29
  116. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseMap.h +0 -305
  117. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseMatrix.h +0 -1518
  118. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +0 -398
  119. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparsePermutation.h +0 -178
  120. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseProduct.h +0 -181
  121. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseRedux.h +0 -49
  122. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseRef.h +0 -397
  123. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +0 -659
  124. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseSolverBase.h +0 -124
  125. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseSparseProductWithPruning.h +0 -198
  126. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseTranspose.h +0 -92
  127. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseTriangularView.h +0 -189
  128. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseUtil.h +0 -186
  129. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseVector.h +0 -478
  130. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseView.h +0 -254
  131. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/TriangularSolver.h +0 -315
  132. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU.h +0 -923
  133. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLUImpl.h +0 -66
  134. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +0 -226
  135. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_Structs.h +0 -110
  136. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +0 -375
  137. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_Utils.h +0 -80
  138. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_column_bmod.h +0 -181
  139. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +0 -179
  140. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h +0 -107
  141. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +0 -280
  142. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h +0 -126
  143. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_kernel_bmod.h +0 -130
  144. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +0 -223
  145. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_panel_dfs.h +0 -258
  146. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_pivotL.h +0 -137
  147. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_pruneL.h +0 -136
  148. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_relax_snode.h +0 -83
  149. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseQR/SparseQR.h +0 -758
  150. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/StlSupport/StdDeque.h +0 -116
  151. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/StlSupport/StdList.h +0 -106
  152. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/StlSupport/StdVector.h +0 -131
  153. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/StlSupport/details.h +0 -84
  154. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +0 -1025
  155. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +0 -642
  156. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/misc/Image.h +0 -82
  157. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/misc/Kernel.h +0 -79
  158. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/misc/RealSvd2x2.h +0 -55
  159. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/misc/blas.h +0 -440
  160. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/misc/lapack.h +0 -152
  161. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/misc/lapacke.h +0 -16292
  162. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/misc/lapacke_mangling.h +0 -17
  163. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +0 -358
  164. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +0 -696
  165. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/plugins/BlockMethods.h +0 -1442
  166. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.h +0 -115
  167. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.h +0 -177
  168. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/plugins/IndexedViewMethods.h +0 -262
  169. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +0 -152
  170. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +0 -95
  171. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/plugins/ReshapedMethods.h +0 -149
  172. sequenzo/dissimilarity_measures/src/eigen/bench/BenchSparseUtil.h +0 -149
  173. sequenzo/dissimilarity_measures/src/eigen/bench/BenchTimer.h +0 -199
  174. sequenzo/dissimilarity_measures/src/eigen/bench/BenchUtil.h +0 -92
  175. sequenzo/dissimilarity_measures/src/eigen/bench/basicbenchmark.h +0 -63
  176. sequenzo/dissimilarity_measures/src/eigen/bench/btl/generic_bench/utils/utilities.h +0 -90
  177. sequenzo/dissimilarity_measures/src/eigen/bench/btl/libs/BLAS/blas.h +0 -675
  178. sequenzo/dissimilarity_measures/src/eigen/bench/btl/libs/BLAS/c_interface_base.h +0 -73
  179. sequenzo/dissimilarity_measures/src/eigen/bench/perf_monitoring/gemm_common.h +0 -67
  180. sequenzo/dissimilarity_measures/src/eigen/bench/perf_monitoring/gemv_common.h +0 -69
  181. sequenzo/dissimilarity_measures/src/eigen/bench/spbench/spbenchsolver.h +0 -573
  182. sequenzo/dissimilarity_measures/src/eigen/bench/spbench/spbenchstyle.h +0 -95
  183. sequenzo/dissimilarity_measures/src/eigen/bench/tensors/benchmark.h +0 -49
  184. sequenzo/dissimilarity_measures/src/eigen/bench/tensors/tensor_benchmarks.h +0 -597
  185. sequenzo/dissimilarity_measures/src/eigen/blas/BandTriangularSolver.h +0 -97
  186. sequenzo/dissimilarity_measures/src/eigen/blas/GeneralRank1Update.h +0 -44
  187. sequenzo/dissimilarity_measures/src/eigen/blas/PackedSelfadjointProduct.h +0 -53
  188. sequenzo/dissimilarity_measures/src/eigen/blas/PackedTriangularMatrixVector.h +0 -79
  189. sequenzo/dissimilarity_measures/src/eigen/blas/PackedTriangularSolverVector.h +0 -88
  190. sequenzo/dissimilarity_measures/src/eigen/blas/Rank2Update.h +0 -57
  191. sequenzo/dissimilarity_measures/src/eigen/blas/common.h +0 -175
  192. sequenzo/dissimilarity_measures/src/eigen/blas/f2c/datatypes.h +0 -24
  193. sequenzo/dissimilarity_measures/src/eigen/blas/level1_cplx_impl.h +0 -155
  194. sequenzo/dissimilarity_measures/src/eigen/blas/level1_impl.h +0 -144
  195. sequenzo/dissimilarity_measures/src/eigen/blas/level1_real_impl.h +0 -122
  196. sequenzo/dissimilarity_measures/src/eigen/blas/level2_cplx_impl.h +0 -360
  197. sequenzo/dissimilarity_measures/src/eigen/blas/level2_impl.h +0 -553
  198. sequenzo/dissimilarity_measures/src/eigen/blas/level2_real_impl.h +0 -306
  199. sequenzo/dissimilarity_measures/src/eigen/blas/level3_impl.h +0 -702
  200. sequenzo/dissimilarity_measures/src/eigen/debug/gdb/__init__.py +0 -1
  201. sequenzo/dissimilarity_measures/src/eigen/debug/gdb/printers.py +0 -314
  202. sequenzo/dissimilarity_measures/src/eigen/demos/mandelbrot/mandelbrot.h +0 -71
  203. sequenzo/dissimilarity_measures/src/eigen/demos/mix_eigen_and_c/binary_library.h +0 -71
  204. sequenzo/dissimilarity_measures/src/eigen/demos/opengl/camera.h +0 -118
  205. sequenzo/dissimilarity_measures/src/eigen/demos/opengl/gpuhelper.h +0 -207
  206. sequenzo/dissimilarity_measures/src/eigen/demos/opengl/icosphere.h +0 -30
  207. sequenzo/dissimilarity_measures/src/eigen/demos/opengl/quaternion_demo.h +0 -114
  208. sequenzo/dissimilarity_measures/src/eigen/demos/opengl/trackball.h +0 -42
  209. sequenzo/dissimilarity_measures/src/eigen/lapack/lapack_common.h +0 -29
  210. sequenzo/dissimilarity_measures/src/eigen/scripts/relicense.py +0 -69
  211. sequenzo/dissimilarity_measures/src/eigen/test/AnnoyingScalar.h +0 -165
  212. sequenzo/dissimilarity_measures/src/eigen/test/MovableScalar.h +0 -35
  213. sequenzo/dissimilarity_measures/src/eigen/test/SafeScalar.h +0 -30
  214. sequenzo/dissimilarity_measures/src/eigen/test/bug1213.h +0 -8
  215. sequenzo/dissimilarity_measures/src/eigen/test/evaluator_common.h +0 -0
  216. sequenzo/dissimilarity_measures/src/eigen/test/gpu_common.h +0 -176
  217. sequenzo/dissimilarity_measures/src/eigen/test/main.h +0 -857
  218. sequenzo/dissimilarity_measures/src/eigen/test/packetmath_test_shared.h +0 -275
  219. sequenzo/dissimilarity_measures/src/eigen/test/product.h +0 -259
  220. sequenzo/dissimilarity_measures/src/eigen/test/random_without_cast_overflow.h +0 -152
  221. sequenzo/dissimilarity_measures/src/eigen/test/solverbase.h +0 -36
  222. sequenzo/dissimilarity_measures/src/eigen/test/sparse.h +0 -204
  223. sequenzo/dissimilarity_measures/src/eigen/test/sparse_solver.h +0 -699
  224. sequenzo/dissimilarity_measures/src/eigen/test/split_test_helper.h +0 -5994
  225. sequenzo/dissimilarity_measures/src/eigen/test/svd_common.h +0 -521
  226. sequenzo/dissimilarity_measures/src/eigen/test/svd_fill.h +0 -118
  227. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/Tensor.h +0 -554
  228. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorArgMax.h +0 -329
  229. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h +0 -247
  230. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h +0 -1176
  231. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h +0 -1559
  232. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h +0 -1093
  233. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h +0 -518
  234. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h +0 -377
  235. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h +0 -1023
  236. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionBlocking.h +0 -73
  237. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionCuda.h +0 -6
  238. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionGpu.h +0 -1413
  239. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionMapper.h +0 -575
  240. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionSycl.h +0 -1650
  241. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h +0 -1679
  242. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h +0 -456
  243. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h +0 -1132
  244. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorConvolutionSycl.h +0 -544
  245. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorCostModel.h +0 -214
  246. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h +0 -347
  247. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h +0 -137
  248. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h +0 -6
  249. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h +0 -104
  250. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceGpu.h +0 -389
  251. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h +0 -1048
  252. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h +0 -409
  253. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDimensionList.h +0 -236
  254. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h +0 -490
  255. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h +0 -236
  256. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h +0 -983
  257. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h +0 -703
  258. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h +0 -388
  259. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h +0 -669
  260. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorFixedSize.h +0 -379
  261. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h +0 -237
  262. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h +0 -191
  263. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h +0 -488
  264. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h +0 -302
  265. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorGlobalFunctions.h +0 -33
  266. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorGpuHipCudaDefines.h +0 -99
  267. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorGpuHipCudaUndefines.h +0 -44
  268. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h +0 -79
  269. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h +0 -603
  270. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h +0 -738
  271. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorInflation.h +0 -247
  272. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorInitializer.h +0 -82
  273. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h +0 -263
  274. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorLayoutSwap.h +0 -216
  275. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorMacros.h +0 -98
  276. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h +0 -327
  277. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h +0 -311
  278. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h +0 -1102
  279. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h +0 -708
  280. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorPatch.h +0 -291
  281. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorRandom.h +0 -322
  282. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h +0 -998
  283. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h +0 -6
  284. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorReductionGpu.h +0 -966
  285. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorReductionSycl.h +0 -582
  286. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorRef.h +0 -454
  287. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h +0 -465
  288. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h +0 -528
  289. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorScanSycl.h +0 -513
  290. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h +0 -471
  291. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorStorage.h +0 -161
  292. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorStriding.h +0 -346
  293. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorTrace.h +0 -303
  294. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h +0 -264
  295. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorUInt128.h +0 -249
  296. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorVolumePatch.h +0 -629
  297. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/TensorSymmetry/DynamicSymmetry.h +0 -293
  298. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/TensorSymmetry/StaticSymmetry.h +0 -236
  299. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/TensorSymmetry/Symmetry.h +0 -338
  300. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/TensorSymmetry/util/TemplateGroupTheory.h +0 -669
  301. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/ThreadPool/Barrier.h +0 -67
  302. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/ThreadPool/EventCount.h +0 -249
  303. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h +0 -486
  304. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/ThreadPool/RunQueue.h +0 -236
  305. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/ThreadPool/ThreadCancel.h +0 -23
  306. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/ThreadPool/ThreadEnvironment.h +0 -40
  307. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/ThreadPool/ThreadLocal.h +0 -301
  308. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/ThreadPool/ThreadPoolInterface.h +0 -48
  309. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/ThreadPool/ThreadYield.h +0 -20
  310. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/util/CXX11Meta.h +0 -537
  311. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/util/CXX11Workarounds.h +0 -88
  312. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/util/EmulateArray.h +0 -261
  313. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/util/MaxSizeVector.h +0 -158
  314. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/AutoDiff/AutoDiffJacobian.h +0 -108
  315. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h +0 -730
  316. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/AutoDiff/AutoDiffVector.h +0 -220
  317. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/BVH/BVAlgorithms.h +0 -293
  318. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/BVH/KdBVH.h +0 -223
  319. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/Eigenvalues/ArpackSelfAdjointEigenSolver.h +0 -790
  320. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/EulerAngles/EulerAngles.h +0 -355
  321. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/EulerAngles/EulerSystem.h +0 -305
  322. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/FFT/ei_fftw_impl.h +0 -261
  323. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/FFT/ei_kissfft_impl.h +0 -449
  324. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/IterativeSolvers/ConstrainedConjGrad.h +0 -187
  325. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/IterativeSolvers/DGMRES.h +0 -511
  326. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/IterativeSolvers/GMRES.h +0 -335
  327. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/IterativeSolvers/IDRS.h +0 -436
  328. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/IterativeSolvers/IncompleteLU.h +0 -90
  329. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/IterativeSolvers/IterationController.h +0 -154
  330. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/IterativeSolvers/MINRES.h +0 -267
  331. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/IterativeSolvers/Scaling.h +0 -193
  332. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/KroneckerProduct/KroneckerTensorProduct.h +0 -305
  333. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/LevenbergMarquardt/LMcovar.h +0 -84
  334. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/LevenbergMarquardt/LMonestep.h +0 -202
  335. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/LevenbergMarquardt/LMpar.h +0 -160
  336. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/LevenbergMarquardt/LMqrsolv.h +0 -188
  337. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/LevenbergMarquardt/LevenbergMarquardt.h +0 -396
  338. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/MatrixFunctions/MatrixExponential.h +0 -441
  339. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/MatrixFunctions/MatrixFunction.h +0 -569
  340. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/MatrixFunctions/MatrixLogarithm.h +0 -373
  341. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/MatrixFunctions/MatrixPower.h +0 -705
  342. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/MatrixFunctions/MatrixSquareRoot.h +0 -368
  343. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/MatrixFunctions/StemFunction.h +0 -117
  344. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/MoreVectorization/MathFunctions.h +0 -95
  345. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/NonLinearOptimization/HybridNonLinearSolver.h +0 -601
  346. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/NonLinearOptimization/LevenbergMarquardt.h +0 -657
  347. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/NonLinearOptimization/chkder.h +0 -66
  348. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/NonLinearOptimization/covar.h +0 -70
  349. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/NonLinearOptimization/dogleg.h +0 -107
  350. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/NonLinearOptimization/fdjac1.h +0 -79
  351. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/NonLinearOptimization/lmpar.h +0 -298
  352. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/NonLinearOptimization/qrsolv.h +0 -91
  353. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/NonLinearOptimization/r1mpyq.h +0 -30
  354. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/NonLinearOptimization/r1updt.h +0 -99
  355. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/NonLinearOptimization/rwupdt.h +0 -49
  356. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/NumericalDiff/NumericalDiff.h +0 -130
  357. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/Polynomials/Companion.h +0 -280
  358. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/Polynomials/PolynomialSolver.h +0 -428
  359. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/Polynomials/PolynomialUtils.h +0 -143
  360. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/Skyline/SkylineInplaceLU.h +0 -352
  361. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/Skyline/SkylineMatrix.h +0 -862
  362. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/Skyline/SkylineMatrixBase.h +0 -212
  363. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/Skyline/SkylineProduct.h +0 -295
  364. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/Skyline/SkylineStorage.h +0 -259
  365. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/Skyline/SkylineUtil.h +0 -89
  366. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SparseExtra/BlockOfDynamicSparseMatrix.h +0 -122
  367. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SparseExtra/BlockSparseMatrix.h +0 -1079
  368. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SparseExtra/DynamicSparseMatrix.h +0 -404
  369. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SparseExtra/MarketIO.h +0 -282
  370. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SparseExtra/MatrixMarketIterator.h +0 -247
  371. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SparseExtra/RandomSetter.h +0 -349
  372. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsArrayAPI.h +0 -286
  373. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsBFloat16.h +0 -68
  374. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsFunctors.h +0 -357
  375. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsHalf.h +0 -66
  376. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsImpl.h +0 -1959
  377. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsPacketMath.h +0 -118
  378. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/HipVectorCompatibility.h +0 -67
  379. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsArrayAPI.h +0 -167
  380. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsBFloat16.h +0 -58
  381. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsFunctors.h +0 -330
  382. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsHalf.h +0 -58
  383. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h +0 -2045
  384. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsPacketMath.h +0 -79
  385. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/arch/AVX/BesselFunctions.h +0 -46
  386. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/arch/AVX/SpecialFunctions.h +0 -16
  387. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/arch/AVX512/BesselFunctions.h +0 -46
  388. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/arch/AVX512/SpecialFunctions.h +0 -16
  389. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/arch/GPU/SpecialFunctions.h +0 -369
  390. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/arch/NEON/BesselFunctions.h +0 -54
  391. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/arch/NEON/SpecialFunctions.h +0 -34
  392. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/Splines/Spline.h +0 -507
  393. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/Splines/SplineFitting.h +0 -431
  394. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/Splines/SplineFwd.h +0 -93
  395. sequenzo/dissimilarity_measures/src/eigen/unsupported/test/matrix_functions.h +0 -67
  396. sequenzo-0.1.18.dist-info/RECORD +0 -638
  397. {sequenzo-0.1.18.dist-info → sequenzo-0.1.20.dist-info}/WHEEL +0 -0
  398. {sequenzo-0.1.18.dist-info → sequenzo-0.1.20.dist-info}/licenses/LICENSE +0 -0
  399. {sequenzo-0.1.18.dist-info → sequenzo-0.1.20.dist-info}/top_level.txt +0 -0
@@ -1,966 +0,0 @@
1
- // This file is part of Eigen, a lightweight C++ template library
2
- // for linear algebra.
3
- //
4
- // Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
5
- //
6
- // This Source Code Form is subject to the terms of the Mozilla
7
- // Public License v. 2.0. If a copy of the MPL was not distributed
8
- // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9
-
10
- #ifndef EIGEN_CXX11_TENSOR_TENSOR_REDUCTION_GPU_H
11
- #define EIGEN_CXX11_TENSOR_TENSOR_REDUCTION_GPU_H
12
-
13
- namespace Eigen {
14
- namespace internal {
15
-
16
-
17
- #if defined(EIGEN_USE_GPU) && defined(EIGEN_GPUCC)
18
- // Full reducers for GPU, don't vectorize for now
19
-
20
- // Reducer function that enables multiple gpu thread to safely accumulate at the same
21
- // output address. It basically reads the current value of the output variable, and
22
- // attempts to update it with the new value. If in the meantime another gpu thread
23
- // updated the content of the output address it will try again.
24
- template <typename T, typename R>
25
- __device__ EIGEN_ALWAYS_INLINE void atomicReduce(T* output, T accum, R& reducer) {
26
- #if (defined(EIGEN_HIP_DEVICE_COMPILE) && defined(__HIP_ARCH_HAS_WARP_SHUFFLE__)) || (EIGEN_CUDA_ARCH >= 300)
27
- if (sizeof(T) == 4)
28
- {
29
- unsigned int oldval = *reinterpret_cast<unsigned int*>(output);
30
- unsigned int newval = oldval;
31
- reducer.reduce(accum, reinterpret_cast<T*>(&newval));
32
- if (newval == oldval) {
33
- return;
34
- }
35
- unsigned int readback;
36
- while ((readback = atomicCAS((unsigned int*)output, oldval, newval)) != oldval) {
37
- oldval = readback;
38
- newval = oldval;
39
- reducer.reduce(accum, reinterpret_cast<T*>(&newval));
40
- if (newval == oldval) {
41
- return;
42
- }
43
- }
44
- }
45
- else if (sizeof(T) == 8) {
46
- unsigned long long oldval = *reinterpret_cast<unsigned long long*>(output);
47
- unsigned long long newval = oldval;
48
- reducer.reduce(accum, reinterpret_cast<T*>(&newval));
49
- if (newval == oldval) {
50
- return;
51
- }
52
- unsigned long long readback;
53
- while ((readback = atomicCAS((unsigned long long*)output, oldval, newval)) != oldval) {
54
- oldval = readback;
55
- newval = oldval;
56
- reducer.reduce(accum, reinterpret_cast<T*>(&newval));
57
- if (newval == oldval) {
58
- return;
59
- }
60
- }
61
- }
62
- else {
63
- gpu_assert(0 && "Wordsize not supported");
64
- }
65
- #else // EIGEN_CUDA_ARCH >= 300
66
- gpu_assert(0 && "Shouldn't be called on unsupported device");
67
- #endif // EIGEN_CUDA_ARCH >= 300
68
- }
69
-
70
- // We extend atomicExch to support extra data types
71
- template <typename Type>
72
- __device__ inline Type atomicExchCustom(Type* address, Type val) {
73
- return atomicExch(address, val);
74
- }
75
-
76
- template <>
77
- __device__ inline double atomicExchCustom(double* address, double val) {
78
- unsigned long long int* address_as_ull = reinterpret_cast<unsigned long long int*>(address);
79
- return __longlong_as_double(atomicExch(address_as_ull, __double_as_longlong(val)));
80
- }
81
-
82
- #ifdef EIGEN_HAS_GPU_FP16
83
- template <typename R>
84
- __device__ inline void atomicReduce(half2* output, half2 accum, R& reducer) {
85
- unsigned int oldval = *reinterpret_cast<unsigned int*>(output);
86
- unsigned int newval = oldval;
87
- reducer.reducePacket(accum, reinterpret_cast<half2*>(&newval));
88
- if (newval == oldval) {
89
- return;
90
- }
91
- unsigned int readback;
92
- while ((readback = atomicCAS((unsigned int*)output, oldval, newval)) != oldval) {
93
- oldval = readback;
94
- newval = oldval;
95
- reducer.reducePacket(accum, reinterpret_cast<half2*>(&newval));
96
- if (newval == oldval) {
97
- return;
98
- }
99
- }
100
- }
101
- // reduction should be associative since reduction is not atomic in wide vector but atomic in half2 operations
102
- template <typename R>
103
- __device__ inline void atomicReduce(Packet4h2* output, Packet4h2 accum, R& reducer) {
104
- half2* houtput=reinterpret_cast<half2*>(output);
105
- half2* haccum=reinterpret_cast<half2*>(&accum);
106
- for(int i=0;i<4;++i){
107
- atomicReduce(houtput+i,*(haccum+i),reducer);
108
- }
109
- }
110
- #endif // EIGEN_HAS_GPU_FP16
111
-
112
- template <>
113
- __device__ inline void atomicReduce(float* output, float accum, SumReducer<float>&) {
114
- #if (defined(EIGEN_HIP_DEVICE_COMPILE) && defined(__HIP_ARCH_HAS_WARP_SHUFFLE__)) || (EIGEN_CUDA_ARCH >= 300)
115
- atomicAdd(output, accum);
116
- #else // EIGEN_CUDA_ARCH >= 300
117
- gpu_assert(0 && "Shouldn't be called on unsupported device");
118
- #endif // EIGEN_CUDA_ARCH >= 300
119
- }
120
-
121
-
122
- template <typename CoeffType, typename Index>
123
- __global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void ReductionInitKernel(const CoeffType val, Index num_preserved_coeffs, CoeffType* output) {
124
- const Index thread_id = blockIdx.x * blockDim.x + threadIdx.x;
125
- const Index num_threads = blockDim.x * gridDim.x;
126
- for (Index i = thread_id; i < num_preserved_coeffs; i += num_threads) {
127
- output[i] = val;
128
- }
129
- }
130
-
131
-
132
- template <int BlockSize, int NumPerThread, typename Self,
133
- typename Reducer, typename Index>
134
- __global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void FullReductionKernel(Reducer reducer, const Self input, Index num_coeffs,
135
- typename Self::CoeffReturnType* output, unsigned int* semaphore) {
136
- #if (defined(EIGEN_HIP_DEVICE_COMPILE) && defined(__HIP_ARCH_HAS_WARP_SHUFFLE__)) || (EIGEN_CUDA_ARCH >= 300)
137
- // Initialize the output value
138
- const Index first_index = blockIdx.x * BlockSize * NumPerThread + threadIdx.x;
139
- if (gridDim.x == 1) {
140
- if (first_index == 0) {
141
- *output = reducer.initialize();
142
- }
143
- }
144
- else {
145
- if (threadIdx.x == 0) {
146
- unsigned int block = atomicCAS(semaphore, 0u, 1u);
147
- if (block == 0) {
148
- // We're the first block to run, initialize the output value
149
- atomicExchCustom(output, reducer.initialize());
150
- __threadfence();
151
- atomicExch(semaphore, 2u);
152
- }
153
- else {
154
- // Wait for the first block to initialize the output value.
155
- // Use atomicCAS here to ensure that the reads aren't cached
156
- unsigned int val;
157
- do {
158
- val = atomicCAS(semaphore, 2u, 2u);
159
- }
160
- while (val < 2u);
161
- }
162
- }
163
- }
164
-
165
- __syncthreads();
166
-
167
- eigen_assert(gridDim.x == 1 || *semaphore >= 2u);
168
-
169
- typename Self::CoeffReturnType accum = reducer.initialize();
170
- Index max_iter = numext::mini<Index>(num_coeffs - first_index, NumPerThread*BlockSize);
171
- for (Index i = 0; i < max_iter; i+=BlockSize) {
172
- const Index index = first_index + i;
173
- eigen_assert(index < num_coeffs);
174
- typename Self::CoeffReturnType val = input.m_impl.coeff(index);
175
- reducer.reduce(val, &accum);
176
- }
177
-
178
- #pragma unroll
179
- for (int offset = warpSize/2; offset > 0; offset /= 2) {
180
- #if defined(EIGEN_HIPCC)
181
- // use std::is_floating_point to determine the type of reduced_val
182
- // This is needed because when Type == double, hipcc will give a "call to __shfl_down is ambguous" error
183
- // and list the float and int versions of __shfl_down as the candidate functions.
184
- if (std::is_floating_point<typename Self::CoeffReturnType>::value) {
185
- reducer.reduce(__shfl_down(static_cast<float>(accum), offset, warpSize), &accum);
186
- } else {
187
- reducer.reduce(__shfl_down(static_cast<int>(accum), offset, warpSize), &accum);
188
- }
189
- #elif defined(EIGEN_CUDA_SDK_VER) && EIGEN_CUDA_SDK_VER < 90000
190
- reducer.reduce(__shfl_down(accum, offset, warpSize), &accum);
191
- #else
192
- reducer.reduce(__shfl_down_sync(0xFFFFFFFF, accum, offset, warpSize), &accum);
193
- #endif
194
- }
195
-
196
- if ((threadIdx.x & (warpSize - 1)) == 0) {
197
- atomicReduce(output, accum, reducer);
198
- }
199
-
200
- if (gridDim.x > 1 && threadIdx.x == 0) {
201
- // Let the last block reset the semaphore
202
- atomicInc(semaphore, gridDim.x + 1);
203
- #if defined(EIGEN_HIPCC)
204
- __threadfence_system();
205
- #endif
206
- }
207
- #else // EIGEN_CUDA_ARCH >= 300
208
- gpu_assert(0 && "Shouldn't be called on unsupported device");
209
- #endif // EIGEN_CUDA_ARCH >= 300
210
- }
211
-
212
-
213
- #ifdef EIGEN_HAS_GPU_FP16
214
- template <typename Self,
215
- typename Reducer, typename Index>
216
- __global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void ReductionInitFullReduxKernelHalfFloat(Reducer reducer, const Self input, Index num_coeffs,
217
- packet_traits<Eigen::half>::type* scratch) {
218
- eigen_assert(blockDim.x == 1);
219
- eigen_assert(gridDim.x == 1);
220
- typedef packet_traits<Eigen::half>::type packet_type;
221
- Index packet_remainder =
222
- num_coeffs % Index(unpacket_traits<packet_type>::size);
223
- if (packet_remainder != 0) {
224
- half2* h2scratch = reinterpret_cast<half2*>(scratch);
225
- for (Index i = num_coeffs - packet_remainder; i + 2 <= num_coeffs; i += 2) {
226
- *h2scratch =
227
- __halves2half2(input.m_impl.coeff(i), input.m_impl.coeff(i + 1));
228
- h2scratch++;
229
- }
230
- if ((num_coeffs & 1) != 0) {
231
- half lastCoeff = input.m_impl.coeff(num_coeffs - 1);
232
- *h2scratch = __halves2half2(lastCoeff, reducer.initialize());
233
- }
234
- } else {
235
- *scratch = reducer.template initializePacket<packet_type>();
236
- }
237
- }
238
-
239
- template <typename Self,
240
- typename Reducer, typename Index>
241
- __global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void ReductionInitKernelHalfFloat(Reducer reducer, const Self input, Index num_coeffs, half* output) {
242
- const Index thread_id = blockIdx.x * blockDim.x + threadIdx.x;
243
- const Index num_threads = blockDim.x * gridDim.x;
244
- typedef typename packet_traits<Eigen::half>::type PacketType;
245
-
246
- const Index num_packets =
247
- num_coeffs / Index(unpacket_traits<PacketType>::size);
248
- PacketType* p_output = reinterpret_cast<PacketType*>(output);
249
- for (Index i = thread_id; i < num_packets; i += num_threads) {
250
- p_output[i] = reducer.template initializePacket<PacketType>();
251
- }
252
- Index packet_remainder =
253
- num_coeffs % Index(unpacket_traits<PacketType>::size);
254
- if (thread_id < packet_remainder) {
255
- output[num_coeffs - packet_remainder + thread_id] = reducer.initialize();
256
- }
257
- }
258
-
259
- template <int BlockSize, int NumPerThread, typename Self,
260
- typename Reducer, typename Index>
261
- __global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void FullReductionKernelHalfFloat(Reducer reducer, const Self input, Index num_coeffs,
262
- half* output, packet_traits<Eigen::half>::type* scratch) {
263
- typedef typename packet_traits<Eigen::half>::type PacketType;
264
- const int packet_width = unpacket_traits<PacketType>::size;
265
- eigen_assert(NumPerThread % packet_width == 0);
266
- const Index first_index =
267
- blockIdx.x * BlockSize * NumPerThread + packet_width * threadIdx.x;
268
-
269
- // Initialize the output value if it wasn't initialized by the ReductionInitKernel
270
-
271
- if (gridDim.x == 1) {
272
- if (first_index == 0) {
273
- int rem = num_coeffs % packet_width;
274
- if (rem != 0) {
275
- half2* p_scratch = reinterpret_cast<half2*>(scratch);
276
- *scratch = reducer.template initializePacket<PacketType>();
277
- for (int i = 0; i < rem / 2; i++) {
278
- *p_scratch = __halves2half2(
279
- input.m_impl.coeff(num_coeffs - packet_width + 2 * i),
280
- input.m_impl.coeff(num_coeffs - packet_width + 2 * i + 1));
281
- p_scratch++;
282
- }
283
- if ((num_coeffs & 1) != 0) {
284
- half last = input.m_impl.coeff(num_coeffs - 1);
285
- *p_scratch = __halves2half2(last, reducer.initialize());
286
- }
287
- } else {
288
- *scratch = reducer.template initializePacket<PacketType>();
289
- }
290
- }
291
- __syncthreads();
292
- }
293
-
294
- PacketType accum = reducer.template initializePacket<PacketType>();
295
- const Index max_iter =
296
- numext::mini<Index>((num_coeffs - first_index) / packet_width,
297
- NumPerThread * BlockSize / packet_width);
298
- for (Index i = 0; i < max_iter; i += BlockSize) {
299
- const Index index = first_index + packet_width * i;
300
- eigen_assert(index + packet_width < num_coeffs);
301
- PacketType val = input.m_impl.template packet<Unaligned>(index);
302
- reducer.reducePacket(val, &accum);
303
- }
304
-
305
- #pragma unroll
306
- for (int offset = warpSize/2; offset > 0; offset /= 2) {
307
- #if defined(EIGEN_HIPCC)
308
- PacketType r1;
309
- half2* hr = reinterpret_cast<half2*>(&r1);
310
- half2* hacc = reinterpret_cast<half2*>(&accum);
311
- for (int i = 0; i < packet_width / 2; i++) {
312
- // FIXME : remove this workaround once we have native half/half2 support for __shfl_down
313
- union { int i; half2 h; } wka_in, wka_out;
314
- wka_in.h = hacc[i];
315
- wka_out.i = __shfl_down(wka_in.i, offset, warpSize);
316
- hr[i] = wka_out.h;
317
- }
318
- reducer.reducePacket(r1, &accum);
319
- #elif defined(EIGEN_CUDA_SDK_VER) && EIGEN_CUDA_SDK_VER < 90000
320
- PacketType r1;
321
- half2* hr = reinterpret_cast<half2*>(&r1);
322
- half2* hacc = reinterpret_cast<half2*>(&accum);
323
- for (int i = 0; i < packet_width / 2; i++) {
324
- hr[i] = __shfl_down(hacc[i], offset, warpSize);
325
- }
326
- reducer.reducePacket(r1, &accum);
327
- #else
328
- PacketType r1;
329
- half2* hr = reinterpret_cast<half2*>(&r1);
330
- half2* hacc = reinterpret_cast<half2*>(&accum);
331
- for (int i = 0; i < packet_width / 2; i++) {
332
- hr[i] = __shfl_down_sync(0xFFFFFFFF, hacc[i], (unsigned)offset, warpSize);
333
- }
334
- reducer.reducePacket(r1, &accum);
335
-
336
- #endif
337
- }
338
-
339
- if ((threadIdx.x & (warpSize - 1)) == 0) {
340
- atomicReduce(scratch, accum, reducer);
341
- }
342
-
343
- __syncthreads();
344
- half2* rv1 = reinterpret_cast<half2*>(scratch);
345
- if (packet_width > 2) {
346
- reducer.reducePacket(rv1[2], rv1);
347
- reducer.reducePacket(rv1[3], rv1 + 1);
348
- reducer.reducePacket(rv1[1], rv1);
349
- }
350
- if (gridDim.x == 1) {
351
- if (first_index == 0) {
352
- half tmp = __low2half(*rv1);
353
- reducer.reduce(__high2half(*rv1), &tmp);
354
- *output = tmp;
355
- }
356
- }
357
- }
358
-
359
- template <typename Op>
360
- __global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void ReductionCleanupKernelHalfFloat(Op reducer, half* output, packet_traits<Eigen::half>::type* scratch) {
361
- eigen_assert(threadIdx.x == 1);
362
- half2* pscratch = reinterpret_cast<half2*>(scratch);
363
- half tmp = __float2half(0.f);
364
- typedef packet_traits<Eigen::half>::type packet_type;
365
- for (int i = 0; i < unpacket_traits<packet_type>::size; i += 2) {
366
- reducer.reduce(__low2half(*pscratch), &tmp);
367
- reducer.reduce(__high2half(*pscratch), &tmp);
368
- pscratch++;
369
- }
370
- *output = tmp;
371
- }
372
-
373
- #endif // EIGEN_HAS_GPU_FP16
374
-
375
- template <typename Self, typename Op, typename OutputType, bool PacketAccess, typename Enabled = void>
376
- struct FullReductionLauncher {
377
- static void run(const Self&, Op&, const GpuDevice&, OutputType*, typename Self::Index) {
378
- gpu_assert(false && "Should only be called on doubles, floats and half floats");
379
- }
380
- };
381
-
382
- // Specialization for float and double
383
- template <typename Self, typename Op, typename OutputType, bool PacketAccess>
384
- struct FullReductionLauncher<
385
- Self, Op, OutputType, PacketAccess,
386
- typename internal::enable_if<
387
- internal::is_same<float, OutputType>::value ||
388
- internal::is_same<double, OutputType>::value,
389
- void>::type> {
390
- static void run(const Self& self, Op& reducer, const GpuDevice& device, OutputType* output, typename Self::Index num_coeffs) {
391
-
392
- typedef typename Self::Index Index;
393
- const int block_size = 256;
394
- const int num_per_thread = 128;
395
- const int num_blocks = divup<int>(num_coeffs, block_size * num_per_thread);
396
-
397
- unsigned int* semaphore = NULL;
398
- if (num_blocks > 1) {
399
- semaphore = device.semaphore();
400
- }
401
-
402
- LAUNCH_GPU_KERNEL((FullReductionKernel<block_size, num_per_thread, Self, Op, Index>),
403
- num_blocks, block_size, 0, device, reducer, self, num_coeffs, output, semaphore);
404
- }
405
- };
406
-
407
- #ifdef EIGEN_HAS_GPU_FP16
408
- template <typename Self, typename Op>
409
- struct FullReductionLauncher<Self, Op, Eigen::half, false> {
410
- static void run(const Self&, Op&, const GpuDevice&, half*, typename Self::Index) {
411
- gpu_assert(false && "Should not be called since there is no packet accessor");
412
- }
413
- };
414
-
415
- template <typename Self, typename Op>
416
- struct FullReductionLauncher<Self, Op, Eigen::half, true> {
417
- static void run(const Self& self, Op& reducer, const GpuDevice& device, half* output, typename Self::Index num_coeffs) {
418
- typedef typename Self::Index Index;
419
- typedef typename packet_traits<Eigen::half>::type PacketType;
420
-
421
- const int block_size = 256;
422
- const int num_per_thread = 128;
423
- const int num_blocks = divup<int>(num_coeffs, block_size * num_per_thread);
424
- PacketType* scratch = static_cast<PacketType*>(device.scratchpad());
425
- // half2* scratch = static_cast<half2*>(device.scratchpad());
426
-
427
- if (num_blocks > 1) {
428
- // We initialize the output and the scrathpad outside the reduction kernel when we can't be sure that there
429
- // won't be a race conditions between multiple thread blocks.
430
- LAUNCH_GPU_KERNEL((ReductionInitFullReduxKernelHalfFloat<Self, Op, Index>),
431
- 1, 1, 0, device, reducer, self, num_coeffs, scratch);
432
- }
433
-
434
- LAUNCH_GPU_KERNEL((FullReductionKernelHalfFloat<block_size, num_per_thread, Self, Op, Index>),
435
- num_blocks, block_size, 0, device, reducer, self, num_coeffs, output, scratch);
436
-
437
- if (num_blocks > 1) {
438
- LAUNCH_GPU_KERNEL((ReductionCleanupKernelHalfFloat<Op>),
439
- 1, 1, 0, device, reducer, output, scratch);
440
- }
441
- }
442
- };
443
- #endif // EIGEN_HAS_GPU_FP16
444
-
445
-
446
- template <typename Self, typename Op, bool Vectorizable>
447
- struct FullReducer<Self, Op, GpuDevice, Vectorizable> {
448
- // Unfortunately nvidia doesn't support well exotic types such as complex,
449
- // so reduce the scope of the optimized version of the code to the simple cases
450
- // of doubles, floats and half floats
451
- #ifdef EIGEN_HAS_GPU_FP16
452
- static const bool HasOptimizedImplementation = !Self::ReducerTraits::IsStateful &&
453
- (internal::is_same<typename Self::CoeffReturnType, float>::value ||
454
- internal::is_same<typename Self::CoeffReturnType, double>::value ||
455
- (internal::is_same<typename Self::CoeffReturnType, Eigen::half>::value && reducer_traits<Op, GpuDevice>::PacketAccess));
456
- #else // EIGEN_HAS_GPU_FP16
457
- static const bool HasOptimizedImplementation = !Self::ReducerTraits::IsStateful &&
458
- (internal::is_same<typename Self::CoeffReturnType, float>::value ||
459
- internal::is_same<typename Self::CoeffReturnType, double>::value);
460
- #endif // EIGEN_HAS_GPU_FP16
461
-
462
- template <typename OutputType>
463
- static void run(const Self& self, Op& reducer, const GpuDevice& device, OutputType* output) {
464
- gpu_assert(HasOptimizedImplementation && "Should only be called on doubles, floats or half floats");
465
- const Index num_coeffs = array_prod(self.m_impl.dimensions());
466
- // Don't crash when we're called with an input tensor of size 0.
467
- if (num_coeffs == 0) {
468
- return;
469
- }
470
-
471
- FullReductionLauncher<Self, Op, OutputType, reducer_traits<Op, GpuDevice>::PacketAccess>::run(self, reducer, device, output, num_coeffs);
472
- }
473
- };
474
-
475
-
476
- template <int NumPerThread, typename Self,
477
- typename Reducer, typename Index>
478
- __global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void InnerReductionKernel(Reducer reducer, const Self input, Index num_coeffs_to_reduce, Index num_preserved_coeffs,
479
- typename Self::CoeffReturnType* output) {
480
- #if (defined(EIGEN_HIP_DEVICE_COMPILE) && defined(__HIP_ARCH_HAS_WARP_SHUFFLE__)) || (EIGEN_CUDA_ARCH >= 300)
481
- typedef typename Self::CoeffReturnType Type;
482
- eigen_assert(blockDim.y == 1);
483
- eigen_assert(blockDim.z == 1);
484
- eigen_assert(gridDim.y == 1);
485
- eigen_assert(gridDim.z == 1);
486
-
487
- const int unroll_times = 16;
488
- eigen_assert(NumPerThread % unroll_times == 0);
489
-
490
- const Index input_col_blocks = divup<Index>(num_coeffs_to_reduce, blockDim.x * NumPerThread);
491
- const Index num_input_blocks = input_col_blocks * num_preserved_coeffs;
492
-
493
- const Index num_threads = blockDim.x * gridDim.x;
494
- const Index thread_id = blockIdx.x * blockDim.x + threadIdx.x;
495
-
496
- // Initialize the output values if they weren't initialized by the ReductionInitKernel
497
- if (gridDim.x == 1) {
498
- for (Index i = thread_id; i < num_preserved_coeffs; i += num_threads) {
499
- output[i] = reducer.initialize();
500
- }
501
- __syncthreads();
502
- }
503
-
504
- for (Index i = blockIdx.x; i < num_input_blocks; i += gridDim.x) {
505
- const Index row = i / input_col_blocks;
506
-
507
- if (row < num_preserved_coeffs) {
508
- const Index col_block = i % input_col_blocks;
509
- const Index col_begin = col_block * blockDim.x * NumPerThread + threadIdx.x;
510
-
511
- Type reduced_val = reducer.initialize();
512
-
513
- for (Index j = 0; j < NumPerThread; j += unroll_times) {
514
- const Index last_col = col_begin + blockDim.x * (j + unroll_times - 1);
515
- if (last_col >= num_coeffs_to_reduce) {
516
- for (Index col = col_begin + blockDim.x * j; col < num_coeffs_to_reduce; col += blockDim.x) {
517
- const Type val = input.m_impl.coeff(row * num_coeffs_to_reduce + col);
518
- reducer.reduce(val, &reduced_val);
519
- }
520
- break;
521
- } else {
522
- // Faster version of the loop with no branches after unrolling.
523
- #pragma unroll
524
- for (int k = 0; k < unroll_times; ++k) {
525
- const Index col = col_begin + blockDim.x * (j + k);
526
- reducer.reduce(input.m_impl.coeff(row * num_coeffs_to_reduce + col), &reduced_val);
527
- }
528
- }
529
- }
530
-
531
- #pragma unroll
532
- for (int offset = warpSize/2; offset > 0; offset /= 2) {
533
- #if defined(EIGEN_HIPCC)
534
- // use std::is_floating_point to determine the type of reduced_val
535
- // This is needed because when Type == double, hipcc will give a "call to __shfl_down is ambguous" error
536
- // and list the float and int versions of __shfl_down as the candidate functions.
537
- if (std::is_floating_point<Type>::value) {
538
- reducer.reduce(__shfl_down(static_cast<float>(reduced_val), offset), &reduced_val);
539
- } else {
540
- reducer.reduce(__shfl_down(static_cast<int>(reduced_val), offset), &reduced_val);
541
- }
542
- #elif defined(EIGEN_CUDA_SDK_VER) && EIGEN_CUDA_SDK_VER < 90000
543
- reducer.reduce(__shfl_down(reduced_val, offset), &reduced_val);
544
- #else
545
- reducer.reduce(__shfl_down_sync(0xFFFFFFFF, reduced_val, offset), &reduced_val);
546
- #endif
547
- }
548
-
549
- if ((threadIdx.x & (warpSize - 1)) == 0) {
550
- atomicReduce(&(output[row]), reduced_val, reducer);
551
- }
552
- }
553
- }
554
- #else // EIGEN_CUDA_ARCH >= 300
555
- gpu_assert(0 && "Shouldn't be called on unsupported device");
556
- #endif // EIGEN_CUDA_ARCH >= 300
557
- }
558
-
559
- #ifdef EIGEN_HAS_GPU_FP16
560
-
561
- template <int NumPerThread, typename Self,
562
- typename Reducer, typename Index>
563
- __global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void InnerReductionKernelHalfFloat(Reducer reducer, const Self input, Index num_coeffs_to_reduce, Index num_preserved_coeffs,
564
- half* output) {
565
- eigen_assert(blockDim.y == 1);
566
- eigen_assert(blockDim.z == 1);
567
- eigen_assert(gridDim.y == 1);
568
- eigen_assert(gridDim.z == 1);
569
-
570
- typedef typename packet_traits<Eigen::half>::type PacketType;
571
- const int packet_width = unpacket_traits<PacketType>::size;
572
- const int unroll_times = 16 / packet_width;
573
- eigen_assert(NumPerThread % unroll_times == 0);
574
- eigen_assert(unroll_times % 2 == 0);
575
-
576
- const Index input_col_blocks = divup<Index>(num_coeffs_to_reduce, blockDim.x * NumPerThread * 2);
577
- const Index num_input_blocks = divup<Index>(input_col_blocks * num_preserved_coeffs, 2);
578
-
579
- const Index num_threads = blockDim.x * gridDim.x;
580
- const Index thread_id = blockIdx.x * blockDim.x + threadIdx.x;
581
-
582
- // Initialize the output values if they weren't initialized by the ReductionInitKernel
583
- if (gridDim.x == 1) {
584
- Index i = packet_width * thread_id;
585
- for (; i + packet_width <= num_preserved_coeffs;
586
- i += packet_width * num_threads) {
587
- PacketType* poutput = reinterpret_cast<PacketType*>(output + i);
588
- *poutput = reducer.template initializePacket<PacketType>();
589
- }
590
- if (i < num_preserved_coeffs) {
591
- output[i] = reducer.initialize();
592
- }
593
- __syncthreads();
594
- }
595
-
596
- for (Index i = blockIdx.x; i < num_input_blocks; i += gridDim.x) {
597
- const Index row = 2 * (i / input_col_blocks); // everybody takes 2 rows
598
-
599
- if (row + 1 < num_preserved_coeffs) {
600
- const Index col_block = i % input_col_blocks;
601
- const Index col_begin =
602
- packet_width * (col_block * blockDim.x * NumPerThread + threadIdx.x);
603
-
604
- PacketType reduced_val1 = reducer.template initializePacket<PacketType>();
605
- PacketType reduced_val2 = reducer.template initializePacket<PacketType>();
606
-
607
- for (Index j = 0; j < NumPerThread; j += unroll_times) {
608
- const Index last_col =
609
- col_begin + blockDim.x * (j + unroll_times - 1) * packet_width;
610
- if (last_col >= num_coeffs_to_reduce) {
611
- Index col = col_begin + blockDim.x * j;
612
- for (; col + packet_width <= num_coeffs_to_reduce;
613
- col += blockDim.x) {
614
- const PacketType val1 = input.m_impl.template packet<Unaligned>(
615
- row * num_coeffs_to_reduce + col);
616
- reducer.reducePacket(val1, &reduced_val1);
617
- const PacketType val2 = input.m_impl.template packet<Unaligned>(
618
- (row + 1) * num_coeffs_to_reduce + col);
619
- reducer.reducePacket(val2, &reduced_val2);
620
- }
621
- if (col < num_coeffs_to_reduce) {
622
- PacketType r1 = reducer.template initializePacket<PacketType>();
623
- PacketType r2 = reducer.template initializePacket<PacketType>();
624
- half2* hr1 = reinterpret_cast<half2*>(&r1);
625
- half2* hr2 = reinterpret_cast<half2*>(&r2);
626
- while (col + 1 < num_coeffs_to_reduce) {
627
- *hr1 = __halves2half2(
628
- input.m_impl.coeff(row * num_coeffs_to_reduce + col),
629
- input.m_impl.coeff(row * num_coeffs_to_reduce + col + 1));
630
- *hr2 = __halves2half2(
631
- input.m_impl.coeff((row + 1) * num_coeffs_to_reduce + col),
632
- input.m_impl.coeff((row + 1) * num_coeffs_to_reduce + col +
633
- 1));
634
- hr1++;
635
- hr2++;
636
- col += 2;
637
- }
638
- if (col < num_coeffs_to_reduce) {
639
- // Peel;
640
- const half last1 =
641
- input.m_impl.coeff(row * num_coeffs_to_reduce + col);
642
- *hr1 = __halves2half2(last1, reducer.initialize());
643
- const half last2 =
644
- input.m_impl.coeff((row + 1) * num_coeffs_to_reduce + col);
645
- *hr2 = __halves2half2(last2, reducer.initialize());
646
- }
647
- reducer.reducePacket(r1, &reduced_val1);
648
- reducer.reducePacket(r2, &reduced_val2);
649
- }
650
- break;
651
- } else {
652
- // Faster version of the loop with no branches after unrolling.
653
- #pragma unroll
654
- for (int k = 0; k < unroll_times; ++k) {
655
- const Index col = col_begin + blockDim.x * (j + k) * packet_width;
656
- reducer.reducePacket(input.m_impl.template packet<Unaligned>(
657
- row * num_coeffs_to_reduce + col),
658
- &reduced_val1);
659
- reducer.reducePacket(input.m_impl.template packet<Unaligned>(
660
- (row + 1) * num_coeffs_to_reduce + col),
661
- &reduced_val2);
662
- }
663
- }
664
- }
665
-
666
- #pragma unroll
667
- for (int offset = warpSize/2; offset > 0; offset /= 2) {
668
- #if defined(EIGEN_HIPCC)
669
- PacketType r1;
670
- PacketType r2;
671
- half2* hr1 = reinterpret_cast<half2*>(&r1);
672
- half2* hr2 = reinterpret_cast<half2*>(&r2);
673
- half2* rv1 = reinterpret_cast<half2*>(&reduced_val1);
674
- half2* rv2 = reinterpret_cast<half2*>(&reduced_val2);
675
- for (int i = 0; i < packet_width / 2; i++) {
676
- // FIXME : remove this workaround once we have native half/half2 support for __shfl_down
677
- union { int i; half2 h; } wka_in1, wka_out1;
678
- wka_in1.h = rv1[i];
679
- wka_out1.i = __shfl_down(wka_in1.i, offset, warpSize);
680
- hr1[i] = wka_out1.h;
681
-
682
- union { int i; half2 h; } wka_in2, wka_out2;
683
- wka_in2.h = rv2[i];
684
- wka_out2.i = __shfl_down(wka_in2.i, offset, warpSize);
685
- hr2[i] = wka_out2.h;
686
- }
687
- reducer.reducePacket(r1, &reduced_val1);
688
- reducer.reducePacket(r2, &reduced_val2);
689
- #elif defined(EIGEN_CUDA_SDK_VER) && EIGEN_CUDA_SDK_VER < 90000
690
- PacketType r1;
691
- PacketType r2;
692
- half2* hr1 = reinterpret_cast<half2*>(&r1);
693
- half2* hr2 = reinterpret_cast<half2*>(&r2);
694
- half2* rv1 = reinterpret_cast<half2*>(&reduced_val1);
695
- half2* rv2 = reinterpret_cast<half2*>(&reduced_val2);
696
- for (int i = 0; i < packet_width / 2; i++) {
697
- hr1[i] = __shfl_down(rv1[i], offset, warpSize);
698
- hr2[i] = __shfl_down(rv2[i], offset, warpSize);
699
- }
700
- reducer.reducePacket(r1, &reduced_val1);
701
- reducer.reducePacket(r2, &reduced_val2);
702
- #else
703
- PacketType r1;
704
- PacketType r2;
705
- half2* hr1 = reinterpret_cast<half2*>(&r1);
706
- half2* hr2 = reinterpret_cast<half2*>(&r2);
707
- half2* rr1 = reinterpret_cast<half2*>(&reduced_val1);
708
- half2* rr2 = reinterpret_cast<half2*>(&reduced_val2);
709
- for (int i = 0; i < packet_width / 2; i++) {
710
- hr1[i] =
711
- __shfl_down_sync(0xFFFFFFFF, rr1[i], (unsigned)offset, warpSize);
712
- hr2[i] =
713
- __shfl_down_sync(0xFFFFFFFF, rr2[i], (unsigned)offset, warpSize);
714
- }
715
- reducer.reducePacket(r1, &reduced_val1);
716
- reducer.reducePacket(r2, &reduced_val2);
717
-
718
- #endif
719
- }
720
- half2* rv1 = reinterpret_cast<half2*>(&reduced_val1);
721
- half2* rv2 = reinterpret_cast<half2*>(&reduced_val2);
722
- half2 val;
723
- if (packet_width > 2) {
724
- reducer.reducePacket(rv1[2], rv1);
725
- reducer.reducePacket(rv1[3], rv1 + 1);
726
- reducer.reducePacket(rv1[1], rv1);
727
- reducer.reducePacket(rv2[2], rv2);
728
- reducer.reducePacket(rv2[3], rv2 + 1);
729
- reducer.reducePacket(rv2[1], rv2);
730
- }
731
- half val1 = __low2half(*rv1);
732
- reducer.reduce(__high2half(*rv1), &val1);
733
- half val2 = __low2half(*rv2);
734
- reducer.reduce(__high2half(*rv2), &val2);
735
- val = __halves2half2(val1, val2);
736
- if ((threadIdx.x & (warpSize - 1)) == 0) {
737
- half* loc = output + row;
738
- atomicReduce((half2*)loc, val, reducer);
739
- }
740
- }
741
- }
742
- }
743
-
744
- #endif // EIGEN_HAS_GPU_FP16
745
-
746
- template <typename Self, typename Op, typename OutputType, bool PacketAccess, typename Enabled = void>
747
- struct InnerReductionLauncher {
748
- static EIGEN_DEVICE_FUNC bool run(const Self&, Op&, const GpuDevice&, OutputType*, typename Self::Index, typename Self::Index) {
749
- gpu_assert(false && "Should only be called to reduce doubles, floats and half floats on a gpu device");
750
- return true;
751
- }
752
- };
753
-
754
- // Specialization for float and double
755
- template <typename Self, typename Op, typename OutputType, bool PacketAccess>
756
- struct InnerReductionLauncher<
757
- Self, Op, OutputType, PacketAccess,
758
- typename internal::enable_if<
759
- internal::is_same<float, OutputType>::value ||
760
- internal::is_same<double, OutputType>::value,
761
- void>::type> {
762
- static bool run(const Self& self, Op& reducer, const GpuDevice& device, OutputType* output, typename Self::Index num_coeffs_to_reduce, typename Self::Index num_preserved_vals) {
763
- typedef typename Self::Index Index;
764
-
765
- const Index num_coeffs = num_coeffs_to_reduce * num_preserved_vals;
766
- const int block_size = 256;
767
- const int num_per_thread = 128;
768
- const int dyn_blocks = divup<int>(num_coeffs, block_size * num_per_thread);
769
- const int max_blocks = device.getNumGpuMultiProcessors() *
770
- device.maxGpuThreadsPerMultiProcessor() / block_size;
771
- const int num_blocks = numext::mini<int>(max_blocks, dyn_blocks);
772
-
773
- if (num_blocks > 1) {
774
- // We initialize the outputs outside the reduction kernel when we can't be sure that there
775
- // won't be a race conditions between multiple thread blocks.
776
- const int dyn_blocks = divup<int>(num_preserved_vals, 1024);
777
- const int max_blocks = device.getNumGpuMultiProcessors() *
778
- device.maxGpuThreadsPerMultiProcessor() / 1024;
779
- const int num_blocks = numext::mini<int>(max_blocks, dyn_blocks);
780
- LAUNCH_GPU_KERNEL((ReductionInitKernel<OutputType, Index>),
781
- num_blocks, 1024, 0, device, reducer.initialize(),
782
- num_preserved_vals, output);
783
- }
784
-
785
- LAUNCH_GPU_KERNEL((InnerReductionKernel<num_per_thread, Self, Op, Index>),
786
- num_blocks, block_size, 0, device, reducer, self, num_coeffs_to_reduce, num_preserved_vals, output);
787
-
788
- return false;
789
- }
790
- };
791
-
792
- #ifdef EIGEN_HAS_GPU_FP16
793
- template <typename Self, typename Op>
794
- struct InnerReductionLauncher<Self, Op, Eigen::half, false> {
795
- static bool run(const Self&, Op&, const GpuDevice&, half*, typename Self::Index, typename Self::Index) {
796
- gpu_assert(false && "Should not be called since there is no packet accessor");
797
- return true;
798
- }
799
- };
800
-
801
- template <typename Self, typename Op>
802
- struct InnerReductionLauncher<Self, Op, Eigen::half, true> {
803
- static bool run(const Self& self, Op& reducer, const GpuDevice& device, half* output, typename Self::Index num_coeffs_to_reduce, typename Self::Index num_preserved_vals) {
804
- typedef typename Self::Index Index;
805
-
806
- if (num_preserved_vals % 2 != 0) {
807
- // Not supported yet, revert to the slower code path
808
- return true;
809
- }
810
-
811
- const Index num_coeffs = num_coeffs_to_reduce * num_preserved_vals;
812
- const int block_size = /*256*/128;
813
- const int num_per_thread = /*128*/64;
814
- const int dyn_blocks = divup<int>(num_coeffs, block_size * num_per_thread);
815
- const int max_blocks = device.getNumGpuMultiProcessors() *
816
- device.maxGpuThreadsPerMultiProcessor() / block_size;
817
- const int num_blocks = numext::mini<int>(max_blocks, dyn_blocks);
818
-
819
- if (num_blocks > 1) {
820
- // We initialize the outputs outside the reduction kernel when we can't be sure that there
821
- // won't be a race conditions between multiple thread blocks.
822
- LAUNCH_GPU_KERNEL((ReductionInitKernelHalfFloat<Self, Op, Index>),
823
- 1, 1, 0, device, reducer, self, num_preserved_vals, output);
824
- }
825
-
826
- LAUNCH_GPU_KERNEL((InnerReductionKernelHalfFloat<num_per_thread, Self, Op, Index>),
827
- num_blocks, block_size, 0, device, reducer, self, num_coeffs_to_reduce, num_preserved_vals, output);
828
-
829
- return false;
830
- }
831
- };
832
- #endif // EIGEN_HAS_GPU_FP16
833
-
834
-
835
- template <typename Self, typename Op>
836
- struct InnerReducer<Self, Op, GpuDevice> {
837
- // Unfortunately nvidia doesn't support well exotic types such as complex,
838
- // so reduce the scope of the optimized version of the code to the simple case
839
- // of floats and half floats.
840
- #ifdef EIGEN_HAS_GPU_FP16
841
- static const bool HasOptimizedImplementation = !Self::ReducerTraits::IsStateful &&
842
- (internal::is_same<typename Self::CoeffReturnType, float>::value ||
843
- internal::is_same<typename Self::CoeffReturnType, double>::value ||
844
- (internal::is_same<typename Self::CoeffReturnType, Eigen::half>::value && reducer_traits<Op, GpuDevice>::PacketAccess));
845
- #else // EIGEN_HAS_GPU_FP16
846
- static const bool HasOptimizedImplementation = !Self::ReducerTraits::IsStateful &&
847
- (internal::is_same<typename Self::CoeffReturnType, float>::value ||
848
- internal::is_same<typename Self::CoeffReturnType, double>::value);
849
- #endif // EIGEN_HAS_GPU_FP16
850
-
851
- template <typename OutputType>
852
- static bool run(const Self& self, Op& reducer, const GpuDevice& device, OutputType* output, typename Self::Index num_coeffs_to_reduce, typename Self::Index num_preserved_vals) {
853
- gpu_assert(HasOptimizedImplementation && "Should only be called on doubles, floats or half floats");
854
- const Index num_coeffs = array_prod(self.m_impl.dimensions());
855
- // Don't crash when we're called with an input tensor of size 0.
856
- if (num_coeffs == 0) {
857
- return true;
858
- }
859
- // It's faster to use the usual code.
860
- if (num_coeffs_to_reduce <= 128) {
861
- return true;
862
- }
863
-
864
- return InnerReductionLauncher<Self, Op, OutputType, reducer_traits<Op, GpuDevice>::PacketAccess>::run(self, reducer, device, output, num_coeffs_to_reduce, num_preserved_vals);
865
- }
866
- };
867
-
868
- template <int NumPerThread, typename Self,
869
- typename Reducer, typename Index>
870
- __global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void OuterReductionKernel(Reducer reducer, const Self input, Index num_coeffs_to_reduce, Index num_preserved_coeffs,
871
- typename Self::CoeffReturnType* output) {
872
- const Index num_threads = blockDim.x * gridDim.x;
873
- const Index thread_id = blockIdx.x * blockDim.x + threadIdx.x;
874
- // Initialize the output values if they weren't initialized by the ReductionInitKernel
875
- if (gridDim.x == 1) {
876
- for (Index i = thread_id; i < num_preserved_coeffs; i += num_threads) {
877
- output[i] = reducer.initialize();
878
- }
879
- __syncthreads();
880
- }
881
-
882
- // Do the reduction.
883
- const Index max_iter = num_preserved_coeffs * divup<Index>(num_coeffs_to_reduce, NumPerThread);
884
- for (Index i = thread_id; i < max_iter; i += num_threads) {
885
- const Index input_col = i % num_preserved_coeffs;
886
- const Index input_row = (i / num_preserved_coeffs) * NumPerThread;
887
- typename Self::CoeffReturnType reduced_val = reducer.initialize();
888
- const Index max_row = numext::mini(input_row + NumPerThread, num_coeffs_to_reduce);
889
- for (Index j = input_row; j < max_row; j++) {
890
- typename Self::CoeffReturnType val = input.m_impl.coeff(j * num_preserved_coeffs + input_col);
891
- reducer.reduce(val, &reduced_val);
892
- }
893
- atomicReduce(&(output[input_col]), reduced_val, reducer);
894
- }
895
- }
896
-
897
-
898
- template <typename Self, typename Op>
899
- struct OuterReducer<Self, Op, GpuDevice> {
900
- // Unfortunately nvidia doesn't support well exotic types such as complex,
901
- // so reduce the scope of the optimized version of the code to the simple case
902
- // of floats.
903
- static const bool HasOptimizedImplementation = !Self::ReducerTraits::IsStateful &&
904
- (internal::is_same<typename Self::CoeffReturnType, float>::value ||
905
- internal::is_same<typename Self::CoeffReturnType, double>::value);
906
- template <typename Device, typename OutputType>
907
- static
908
- #if !defined(EIGEN_HIPCC)
909
- // FIXME : leaving this EIGEN_DEVICE_FUNC in, results in the following runtime error
910
- // (in the cxx11_tensor_reduction_gpu test)
911
- //
912
- // terminate called after throwing an instance of 'std::runtime_error'
913
- // what(): No device code available for function: _ZN5Eigen8internal20OuterReductionKernelIL...
914
- //
915
- // don't know why this happens (and why is it a runtime error instead of a compile time error)
916
- //
917
- // this will be fixed by HIP PR#457
918
- EIGEN_DEVICE_FUNC
919
- #endif
920
- bool run(const Self&, Op&, const Device&, OutputType*, typename Self::Index, typename Self::Index) {
921
- gpu_assert(false && "Should only be called to reduce doubles or floats on a gpu device");
922
- return true;
923
- }
924
-
925
- static bool run(const Self& self, Op& reducer, const GpuDevice& device, float* output, typename Self::Index num_coeffs_to_reduce, typename Self::Index num_preserved_vals) {
926
- typedef typename Self::Index Index;
927
-
928
- // It's faster to use the usual code.
929
- if (num_coeffs_to_reduce <= 32) {
930
- return true;
931
- }
932
-
933
- const Index num_coeffs = num_coeffs_to_reduce * num_preserved_vals;
934
- const int block_size = 256;
935
- const int num_per_thread = 16;
936
- const int dyn_blocks = divup<int>(num_coeffs, block_size * num_per_thread);
937
- const int max_blocks = device.getNumGpuMultiProcessors() *
938
- device.maxGpuThreadsPerMultiProcessor() / block_size;
939
- const int num_blocks = numext::mini<int>(max_blocks, dyn_blocks);
940
-
941
- if (num_blocks > 1) {
942
- // We initialize the outputs in the reduction kernel itself when we don't have to worry
943
- // about race conditions between multiple thread blocks.
944
- const int dyn_blocks = divup<int>(num_preserved_vals, 1024);
945
- const int max_blocks = device.getNumGpuMultiProcessors() *
946
- device.maxGpuThreadsPerMultiProcessor() / 1024;
947
- const int num_blocks = numext::mini<int>(max_blocks, dyn_blocks);
948
- LAUNCH_GPU_KERNEL((ReductionInitKernel<float, Index>),
949
- num_blocks, 1024, 0, device, reducer.initialize(),
950
- num_preserved_vals, output);
951
- }
952
-
953
- LAUNCH_GPU_KERNEL((OuterReductionKernel<num_per_thread, Self, Op, Index>),
954
- num_blocks, block_size, 0, device, reducer, self, num_coeffs_to_reduce, num_preserved_vals, output);
955
-
956
- return false;
957
- }
958
- };
959
-
960
- #endif // defined(EIGEN_USE_GPU) && defined(EIGEN_GPUCC)
961
-
962
-
963
- } // end namespace internal
964
- } // end namespace Eigen
965
-
966
- #endif // EIGEN_CXX11_TENSOR_TENSOR_REDUCTION_GPU_H