sequenzo 0.1.18__cp310-cp310-win_amd64.whl → 0.1.19__cp310-cp310-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sequenzo might be problematic. Click here for more details.

Files changed (396) hide show
  1. sequenzo/__init__.py +39 -7
  2. sequenzo/big_data/clara/utils/get_weighted_diss.c +154 -154
  3. sequenzo/big_data/clara/utils/get_weighted_diss.cp310-win_amd64.pyd +0 -0
  4. sequenzo/clustering/KMedoids.py +39 -0
  5. sequenzo/clustering/clustering_c_code.cp310-win_amd64.pyd +0 -0
  6. sequenzo/clustering/hierarchical_clustering.py +107 -5
  7. sequenzo/define_sequence_data.py +10 -1
  8. sequenzo/dissimilarity_measures/c_code.cp310-win_amd64.pyd +0 -0
  9. sequenzo/dissimilarity_measures/get_distance_matrix.py +1 -2
  10. sequenzo/dissimilarity_measures/utils/get_sm_trate_substitution_cost_matrix.c +154 -154
  11. sequenzo/dissimilarity_measures/utils/get_sm_trate_substitution_cost_matrix.cp310-win_amd64.pyd +0 -0
  12. sequenzo/dissimilarity_measures/utils/seqconc.c +154 -154
  13. sequenzo/dissimilarity_measures/utils/seqconc.cp310-win_amd64.pyd +0 -0
  14. sequenzo/dissimilarity_measures/utils/seqdss.c +154 -154
  15. sequenzo/dissimilarity_measures/utils/seqdss.cp310-win_amd64.pyd +0 -0
  16. sequenzo/dissimilarity_measures/utils/seqdur.c +154 -154
  17. sequenzo/dissimilarity_measures/utils/seqdur.cp310-win_amd64.pyd +0 -0
  18. sequenzo/dissimilarity_measures/utils/seqlength.c +154 -154
  19. sequenzo/dissimilarity_measures/utils/seqlength.cp310-win_amd64.pyd +0 -0
  20. sequenzo/multidomain/cat.py +0 -53
  21. sequenzo/multidomain/idcd.py +0 -1
  22. sequenzo/openmp_setup.py +233 -0
  23. sequenzo/visualization/plot_transition_matrix.py +21 -22
  24. {sequenzo-0.1.18.dist-info → sequenzo-0.1.19.dist-info}/METADATA +43 -10
  25. sequenzo-0.1.19.dist-info/RECORD +272 -0
  26. sequenzo/dissimilarity_measures/setup.py +0 -35
  27. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Cholesky/LDLT.h +0 -688
  28. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Cholesky/LLT.h +0 -558
  29. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Cholesky/LLT_LAPACKE.h +0 -99
  30. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +0 -682
  31. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +0 -346
  32. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +0 -462
  33. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/ComplexSchur_LAPACKE.h +0 -91
  34. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/EigenSolver.h +0 -622
  35. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h +0 -418
  36. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +0 -226
  37. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +0 -374
  38. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +0 -158
  39. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/RealQZ.h +0 -657
  40. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/RealSchur.h +0 -558
  41. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/RealSchur_LAPACKE.h +0 -77
  42. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +0 -904
  43. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h +0 -87
  44. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +0 -561
  45. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/AlignedBox.h +0 -486
  46. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/AngleAxis.h +0 -247
  47. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/EulerAngles.h +0 -114
  48. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/Homogeneous.h +0 -501
  49. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/Hyperplane.h +0 -282
  50. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/OrthoMethods.h +0 -235
  51. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/ParametrizedLine.h +0 -232
  52. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/Quaternion.h +0 -870
  53. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/Rotation2D.h +0 -199
  54. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/RotationBase.h +0 -206
  55. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/Scaling.h +0 -188
  56. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/Transform.h +0 -1563
  57. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/Translation.h +0 -202
  58. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/Umeyama.h +0 -166
  59. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/arch/Geometry_SIMD.h +0 -168
  60. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Householder/BlockHouseholder.h +0 -110
  61. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Householder/Householder.h +0 -176
  62. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Householder/HouseholderSequence.h +0 -545
  63. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +0 -226
  64. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +0 -212
  65. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +0 -229
  66. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +0 -394
  67. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +0 -453
  68. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +0 -444
  69. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +0 -198
  70. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +0 -117
  71. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Jacobi/Jacobi.h +0 -483
  72. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/KLUSupport/KLUSupport.h +0 -358
  73. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/LU/Determinant.h +0 -117
  74. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/LU/FullPivLU.h +0 -877
  75. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/LU/InverseImpl.h +0 -432
  76. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/LU/PartialPivLU.h +0 -624
  77. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/LU/PartialPivLU_LAPACKE.h +0 -83
  78. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/LU/arch/InverseSize4.h +0 -351
  79. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/MetisSupport/MetisSupport.h +0 -137
  80. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/OrderingMethods/Amd.h +0 -435
  81. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +0 -1863
  82. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/OrderingMethods/Ordering.h +0 -153
  83. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +0 -678
  84. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +0 -545
  85. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/QR/ColPivHouseholderQR.h +0 -674
  86. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/QR/ColPivHouseholderQR_LAPACKE.h +0 -97
  87. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +0 -635
  88. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/QR/FullPivHouseholderQR.h +0 -713
  89. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/QR/HouseholderQR.h +0 -434
  90. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/QR/HouseholderQR_LAPACKE.h +0 -68
  91. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +0 -335
  92. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SVD/BDCSVD.h +0 -1366
  93. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SVD/JacobiSVD.h +0 -812
  94. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SVD/JacobiSVD_LAPACKE.h +0 -91
  95. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SVD/SVDBase.h +0 -376
  96. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SVD/UpperBidiagonalization.h +0 -414
  97. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +0 -697
  98. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +0 -174
  99. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/AmbiVector.h +0 -378
  100. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/CompressedStorage.h +0 -274
  101. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +0 -352
  102. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/MappedSparseMatrix.h +0 -67
  103. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseAssign.h +0 -270
  104. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseBlock.h +0 -571
  105. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseColEtree.h +0 -206
  106. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +0 -370
  107. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +0 -722
  108. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +0 -150
  109. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +0 -342
  110. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseDiagonalProduct.h +0 -138
  111. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseDot.h +0 -98
  112. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseFuzzy.h +0 -29
  113. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseMap.h +0 -305
  114. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseMatrix.h +0 -1518
  115. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +0 -398
  116. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparsePermutation.h +0 -178
  117. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseProduct.h +0 -181
  118. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseRedux.h +0 -49
  119. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseRef.h +0 -397
  120. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +0 -659
  121. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseSolverBase.h +0 -124
  122. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseSparseProductWithPruning.h +0 -198
  123. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseTranspose.h +0 -92
  124. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseTriangularView.h +0 -189
  125. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseUtil.h +0 -186
  126. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseVector.h +0 -478
  127. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseView.h +0 -254
  128. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/TriangularSolver.h +0 -315
  129. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU.h +0 -923
  130. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLUImpl.h +0 -66
  131. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +0 -226
  132. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_Structs.h +0 -110
  133. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +0 -375
  134. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_Utils.h +0 -80
  135. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_column_bmod.h +0 -181
  136. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +0 -179
  137. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h +0 -107
  138. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +0 -280
  139. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h +0 -126
  140. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_kernel_bmod.h +0 -130
  141. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +0 -223
  142. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_panel_dfs.h +0 -258
  143. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_pivotL.h +0 -137
  144. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_pruneL.h +0 -136
  145. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_relax_snode.h +0 -83
  146. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseQR/SparseQR.h +0 -758
  147. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/StlSupport/StdDeque.h +0 -116
  148. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/StlSupport/StdList.h +0 -106
  149. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/StlSupport/StdVector.h +0 -131
  150. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/StlSupport/details.h +0 -84
  151. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +0 -1025
  152. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +0 -642
  153. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/misc/Image.h +0 -82
  154. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/misc/Kernel.h +0 -79
  155. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/misc/RealSvd2x2.h +0 -55
  156. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/misc/blas.h +0 -440
  157. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/misc/lapack.h +0 -152
  158. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/misc/lapacke.h +0 -16292
  159. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/misc/lapacke_mangling.h +0 -17
  160. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +0 -358
  161. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +0 -696
  162. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/plugins/BlockMethods.h +0 -1442
  163. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.h +0 -115
  164. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.h +0 -177
  165. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/plugins/IndexedViewMethods.h +0 -262
  166. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +0 -152
  167. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +0 -95
  168. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/plugins/ReshapedMethods.h +0 -149
  169. sequenzo/dissimilarity_measures/src/eigen/bench/BenchSparseUtil.h +0 -149
  170. sequenzo/dissimilarity_measures/src/eigen/bench/BenchTimer.h +0 -199
  171. sequenzo/dissimilarity_measures/src/eigen/bench/BenchUtil.h +0 -92
  172. sequenzo/dissimilarity_measures/src/eigen/bench/basicbenchmark.h +0 -63
  173. sequenzo/dissimilarity_measures/src/eigen/bench/btl/generic_bench/utils/utilities.h +0 -90
  174. sequenzo/dissimilarity_measures/src/eigen/bench/btl/libs/BLAS/blas.h +0 -675
  175. sequenzo/dissimilarity_measures/src/eigen/bench/btl/libs/BLAS/c_interface_base.h +0 -73
  176. sequenzo/dissimilarity_measures/src/eigen/bench/perf_monitoring/gemm_common.h +0 -67
  177. sequenzo/dissimilarity_measures/src/eigen/bench/perf_monitoring/gemv_common.h +0 -69
  178. sequenzo/dissimilarity_measures/src/eigen/bench/spbench/spbenchsolver.h +0 -573
  179. sequenzo/dissimilarity_measures/src/eigen/bench/spbench/spbenchstyle.h +0 -95
  180. sequenzo/dissimilarity_measures/src/eigen/bench/tensors/benchmark.h +0 -49
  181. sequenzo/dissimilarity_measures/src/eigen/bench/tensors/tensor_benchmarks.h +0 -597
  182. sequenzo/dissimilarity_measures/src/eigen/blas/BandTriangularSolver.h +0 -97
  183. sequenzo/dissimilarity_measures/src/eigen/blas/GeneralRank1Update.h +0 -44
  184. sequenzo/dissimilarity_measures/src/eigen/blas/PackedSelfadjointProduct.h +0 -53
  185. sequenzo/dissimilarity_measures/src/eigen/blas/PackedTriangularMatrixVector.h +0 -79
  186. sequenzo/dissimilarity_measures/src/eigen/blas/PackedTriangularSolverVector.h +0 -88
  187. sequenzo/dissimilarity_measures/src/eigen/blas/Rank2Update.h +0 -57
  188. sequenzo/dissimilarity_measures/src/eigen/blas/common.h +0 -175
  189. sequenzo/dissimilarity_measures/src/eigen/blas/f2c/datatypes.h +0 -24
  190. sequenzo/dissimilarity_measures/src/eigen/blas/level1_cplx_impl.h +0 -155
  191. sequenzo/dissimilarity_measures/src/eigen/blas/level1_impl.h +0 -144
  192. sequenzo/dissimilarity_measures/src/eigen/blas/level1_real_impl.h +0 -122
  193. sequenzo/dissimilarity_measures/src/eigen/blas/level2_cplx_impl.h +0 -360
  194. sequenzo/dissimilarity_measures/src/eigen/blas/level2_impl.h +0 -553
  195. sequenzo/dissimilarity_measures/src/eigen/blas/level2_real_impl.h +0 -306
  196. sequenzo/dissimilarity_measures/src/eigen/blas/level3_impl.h +0 -702
  197. sequenzo/dissimilarity_measures/src/eigen/debug/gdb/__init__.py +0 -1
  198. sequenzo/dissimilarity_measures/src/eigen/debug/gdb/printers.py +0 -314
  199. sequenzo/dissimilarity_measures/src/eigen/demos/mandelbrot/mandelbrot.h +0 -71
  200. sequenzo/dissimilarity_measures/src/eigen/demos/mix_eigen_and_c/binary_library.h +0 -71
  201. sequenzo/dissimilarity_measures/src/eigen/demos/opengl/camera.h +0 -118
  202. sequenzo/dissimilarity_measures/src/eigen/demos/opengl/gpuhelper.h +0 -207
  203. sequenzo/dissimilarity_measures/src/eigen/demos/opengl/icosphere.h +0 -30
  204. sequenzo/dissimilarity_measures/src/eigen/demos/opengl/quaternion_demo.h +0 -114
  205. sequenzo/dissimilarity_measures/src/eigen/demos/opengl/trackball.h +0 -42
  206. sequenzo/dissimilarity_measures/src/eigen/lapack/lapack_common.h +0 -29
  207. sequenzo/dissimilarity_measures/src/eigen/scripts/relicense.py +0 -69
  208. sequenzo/dissimilarity_measures/src/eigen/test/AnnoyingScalar.h +0 -165
  209. sequenzo/dissimilarity_measures/src/eigen/test/MovableScalar.h +0 -35
  210. sequenzo/dissimilarity_measures/src/eigen/test/SafeScalar.h +0 -30
  211. sequenzo/dissimilarity_measures/src/eigen/test/bug1213.h +0 -8
  212. sequenzo/dissimilarity_measures/src/eigen/test/evaluator_common.h +0 -0
  213. sequenzo/dissimilarity_measures/src/eigen/test/gpu_common.h +0 -176
  214. sequenzo/dissimilarity_measures/src/eigen/test/main.h +0 -857
  215. sequenzo/dissimilarity_measures/src/eigen/test/packetmath_test_shared.h +0 -275
  216. sequenzo/dissimilarity_measures/src/eigen/test/product.h +0 -259
  217. sequenzo/dissimilarity_measures/src/eigen/test/random_without_cast_overflow.h +0 -152
  218. sequenzo/dissimilarity_measures/src/eigen/test/solverbase.h +0 -36
  219. sequenzo/dissimilarity_measures/src/eigen/test/sparse.h +0 -204
  220. sequenzo/dissimilarity_measures/src/eigen/test/sparse_solver.h +0 -699
  221. sequenzo/dissimilarity_measures/src/eigen/test/split_test_helper.h +0 -5994
  222. sequenzo/dissimilarity_measures/src/eigen/test/svd_common.h +0 -521
  223. sequenzo/dissimilarity_measures/src/eigen/test/svd_fill.h +0 -118
  224. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/Tensor.h +0 -554
  225. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorArgMax.h +0 -329
  226. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h +0 -247
  227. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h +0 -1176
  228. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h +0 -1559
  229. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h +0 -1093
  230. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h +0 -518
  231. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h +0 -377
  232. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h +0 -1023
  233. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionBlocking.h +0 -73
  234. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionCuda.h +0 -6
  235. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionGpu.h +0 -1413
  236. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionMapper.h +0 -575
  237. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionSycl.h +0 -1650
  238. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h +0 -1679
  239. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h +0 -456
  240. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h +0 -1132
  241. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorConvolutionSycl.h +0 -544
  242. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorCostModel.h +0 -214
  243. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h +0 -347
  244. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h +0 -137
  245. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h +0 -6
  246. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h +0 -104
  247. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceGpu.h +0 -389
  248. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h +0 -1048
  249. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h +0 -409
  250. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDimensionList.h +0 -236
  251. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h +0 -490
  252. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h +0 -236
  253. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h +0 -983
  254. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h +0 -703
  255. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h +0 -388
  256. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h +0 -669
  257. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorFixedSize.h +0 -379
  258. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h +0 -237
  259. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h +0 -191
  260. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h +0 -488
  261. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h +0 -302
  262. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorGlobalFunctions.h +0 -33
  263. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorGpuHipCudaDefines.h +0 -99
  264. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorGpuHipCudaUndefines.h +0 -44
  265. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h +0 -79
  266. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h +0 -603
  267. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h +0 -738
  268. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorInflation.h +0 -247
  269. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorInitializer.h +0 -82
  270. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h +0 -263
  271. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorLayoutSwap.h +0 -216
  272. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorMacros.h +0 -98
  273. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h +0 -327
  274. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h +0 -311
  275. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h +0 -1102
  276. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h +0 -708
  277. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorPatch.h +0 -291
  278. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorRandom.h +0 -322
  279. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h +0 -998
  280. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h +0 -6
  281. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorReductionGpu.h +0 -966
  282. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorReductionSycl.h +0 -582
  283. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorRef.h +0 -454
  284. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h +0 -465
  285. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h +0 -528
  286. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorScanSycl.h +0 -513
  287. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h +0 -471
  288. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorStorage.h +0 -161
  289. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorStriding.h +0 -346
  290. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorTrace.h +0 -303
  291. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h +0 -264
  292. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorUInt128.h +0 -249
  293. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorVolumePatch.h +0 -629
  294. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/TensorSymmetry/DynamicSymmetry.h +0 -293
  295. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/TensorSymmetry/StaticSymmetry.h +0 -236
  296. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/TensorSymmetry/Symmetry.h +0 -338
  297. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/TensorSymmetry/util/TemplateGroupTheory.h +0 -669
  298. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/ThreadPool/Barrier.h +0 -67
  299. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/ThreadPool/EventCount.h +0 -249
  300. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h +0 -486
  301. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/ThreadPool/RunQueue.h +0 -236
  302. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/ThreadPool/ThreadCancel.h +0 -23
  303. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/ThreadPool/ThreadEnvironment.h +0 -40
  304. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/ThreadPool/ThreadLocal.h +0 -301
  305. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/ThreadPool/ThreadPoolInterface.h +0 -48
  306. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/ThreadPool/ThreadYield.h +0 -20
  307. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/util/CXX11Meta.h +0 -537
  308. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/util/CXX11Workarounds.h +0 -88
  309. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/util/EmulateArray.h +0 -261
  310. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/util/MaxSizeVector.h +0 -158
  311. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/AutoDiff/AutoDiffJacobian.h +0 -108
  312. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h +0 -730
  313. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/AutoDiff/AutoDiffVector.h +0 -220
  314. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/BVH/BVAlgorithms.h +0 -293
  315. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/BVH/KdBVH.h +0 -223
  316. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/Eigenvalues/ArpackSelfAdjointEigenSolver.h +0 -790
  317. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/EulerAngles/EulerAngles.h +0 -355
  318. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/EulerAngles/EulerSystem.h +0 -305
  319. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/FFT/ei_fftw_impl.h +0 -261
  320. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/FFT/ei_kissfft_impl.h +0 -449
  321. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/IterativeSolvers/ConstrainedConjGrad.h +0 -187
  322. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/IterativeSolvers/DGMRES.h +0 -511
  323. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/IterativeSolvers/GMRES.h +0 -335
  324. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/IterativeSolvers/IDRS.h +0 -436
  325. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/IterativeSolvers/IncompleteLU.h +0 -90
  326. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/IterativeSolvers/IterationController.h +0 -154
  327. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/IterativeSolvers/MINRES.h +0 -267
  328. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/IterativeSolvers/Scaling.h +0 -193
  329. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/KroneckerProduct/KroneckerTensorProduct.h +0 -305
  330. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/LevenbergMarquardt/LMcovar.h +0 -84
  331. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/LevenbergMarquardt/LMonestep.h +0 -202
  332. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/LevenbergMarquardt/LMpar.h +0 -160
  333. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/LevenbergMarquardt/LMqrsolv.h +0 -188
  334. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/LevenbergMarquardt/LevenbergMarquardt.h +0 -396
  335. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/MatrixFunctions/MatrixExponential.h +0 -441
  336. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/MatrixFunctions/MatrixFunction.h +0 -569
  337. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/MatrixFunctions/MatrixLogarithm.h +0 -373
  338. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/MatrixFunctions/MatrixPower.h +0 -705
  339. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/MatrixFunctions/MatrixSquareRoot.h +0 -368
  340. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/MatrixFunctions/StemFunction.h +0 -117
  341. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/MoreVectorization/MathFunctions.h +0 -95
  342. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/NonLinearOptimization/HybridNonLinearSolver.h +0 -601
  343. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/NonLinearOptimization/LevenbergMarquardt.h +0 -657
  344. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/NonLinearOptimization/chkder.h +0 -66
  345. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/NonLinearOptimization/covar.h +0 -70
  346. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/NonLinearOptimization/dogleg.h +0 -107
  347. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/NonLinearOptimization/fdjac1.h +0 -79
  348. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/NonLinearOptimization/lmpar.h +0 -298
  349. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/NonLinearOptimization/qrsolv.h +0 -91
  350. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/NonLinearOptimization/r1mpyq.h +0 -30
  351. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/NonLinearOptimization/r1updt.h +0 -99
  352. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/NonLinearOptimization/rwupdt.h +0 -49
  353. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/NumericalDiff/NumericalDiff.h +0 -130
  354. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/Polynomials/Companion.h +0 -280
  355. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/Polynomials/PolynomialSolver.h +0 -428
  356. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/Polynomials/PolynomialUtils.h +0 -143
  357. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/Skyline/SkylineInplaceLU.h +0 -352
  358. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/Skyline/SkylineMatrix.h +0 -862
  359. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/Skyline/SkylineMatrixBase.h +0 -212
  360. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/Skyline/SkylineProduct.h +0 -295
  361. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/Skyline/SkylineStorage.h +0 -259
  362. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/Skyline/SkylineUtil.h +0 -89
  363. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SparseExtra/BlockOfDynamicSparseMatrix.h +0 -122
  364. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SparseExtra/BlockSparseMatrix.h +0 -1079
  365. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SparseExtra/DynamicSparseMatrix.h +0 -404
  366. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SparseExtra/MarketIO.h +0 -282
  367. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SparseExtra/MatrixMarketIterator.h +0 -247
  368. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SparseExtra/RandomSetter.h +0 -349
  369. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsArrayAPI.h +0 -286
  370. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsBFloat16.h +0 -68
  371. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsFunctors.h +0 -357
  372. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsHalf.h +0 -66
  373. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsImpl.h +0 -1959
  374. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsPacketMath.h +0 -118
  375. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/HipVectorCompatibility.h +0 -67
  376. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsArrayAPI.h +0 -167
  377. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsBFloat16.h +0 -58
  378. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsFunctors.h +0 -330
  379. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsHalf.h +0 -58
  380. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h +0 -2045
  381. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsPacketMath.h +0 -79
  382. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/arch/AVX/BesselFunctions.h +0 -46
  383. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/arch/AVX/SpecialFunctions.h +0 -16
  384. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/arch/AVX512/BesselFunctions.h +0 -46
  385. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/arch/AVX512/SpecialFunctions.h +0 -16
  386. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/arch/GPU/SpecialFunctions.h +0 -369
  387. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/arch/NEON/BesselFunctions.h +0 -54
  388. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/arch/NEON/SpecialFunctions.h +0 -34
  389. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/Splines/Spline.h +0 -507
  390. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/Splines/SplineFitting.h +0 -431
  391. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/Splines/SplineFwd.h +0 -93
  392. sequenzo/dissimilarity_measures/src/eigen/unsupported/test/matrix_functions.h +0 -67
  393. sequenzo-0.1.18.dist-info/RECORD +0 -638
  394. {sequenzo-0.1.18.dist-info → sequenzo-0.1.19.dist-info}/WHEEL +0 -0
  395. {sequenzo-0.1.18.dist-info → sequenzo-0.1.19.dist-info}/licenses/LICENSE +0 -0
  396. {sequenzo-0.1.18.dist-info → sequenzo-0.1.19.dist-info}/top_level.txt +0 -0
@@ -1,966 +0,0 @@
1
- // This file is part of Eigen, a lightweight C++ template library
2
- // for linear algebra.
3
- //
4
- // Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
5
- //
6
- // This Source Code Form is subject to the terms of the Mozilla
7
- // Public License v. 2.0. If a copy of the MPL was not distributed
8
- // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9
-
10
- #ifndef EIGEN_CXX11_TENSOR_TENSOR_REDUCTION_GPU_H
11
- #define EIGEN_CXX11_TENSOR_TENSOR_REDUCTION_GPU_H
12
-
13
- namespace Eigen {
14
- namespace internal {
15
-
16
-
17
- #if defined(EIGEN_USE_GPU) && defined(EIGEN_GPUCC)
18
- // Full reducers for GPU, don't vectorize for now
19
-
20
- // Reducer function that enables multiple gpu thread to safely accumulate at the same
21
- // output address. It basically reads the current value of the output variable, and
22
- // attempts to update it with the new value. If in the meantime another gpu thread
23
- // updated the content of the output address it will try again.
24
- template <typename T, typename R>
25
- __device__ EIGEN_ALWAYS_INLINE void atomicReduce(T* output, T accum, R& reducer) {
26
- #if (defined(EIGEN_HIP_DEVICE_COMPILE) && defined(__HIP_ARCH_HAS_WARP_SHUFFLE__)) || (EIGEN_CUDA_ARCH >= 300)
27
- if (sizeof(T) == 4)
28
- {
29
- unsigned int oldval = *reinterpret_cast<unsigned int*>(output);
30
- unsigned int newval = oldval;
31
- reducer.reduce(accum, reinterpret_cast<T*>(&newval));
32
- if (newval == oldval) {
33
- return;
34
- }
35
- unsigned int readback;
36
- while ((readback = atomicCAS((unsigned int*)output, oldval, newval)) != oldval) {
37
- oldval = readback;
38
- newval = oldval;
39
- reducer.reduce(accum, reinterpret_cast<T*>(&newval));
40
- if (newval == oldval) {
41
- return;
42
- }
43
- }
44
- }
45
- else if (sizeof(T) == 8) {
46
- unsigned long long oldval = *reinterpret_cast<unsigned long long*>(output);
47
- unsigned long long newval = oldval;
48
- reducer.reduce(accum, reinterpret_cast<T*>(&newval));
49
- if (newval == oldval) {
50
- return;
51
- }
52
- unsigned long long readback;
53
- while ((readback = atomicCAS((unsigned long long*)output, oldval, newval)) != oldval) {
54
- oldval = readback;
55
- newval = oldval;
56
- reducer.reduce(accum, reinterpret_cast<T*>(&newval));
57
- if (newval == oldval) {
58
- return;
59
- }
60
- }
61
- }
62
- else {
63
- gpu_assert(0 && "Wordsize not supported");
64
- }
65
- #else // EIGEN_CUDA_ARCH >= 300
66
- gpu_assert(0 && "Shouldn't be called on unsupported device");
67
- #endif // EIGEN_CUDA_ARCH >= 300
68
- }
69
-
70
- // We extend atomicExch to support extra data types
71
- template <typename Type>
72
- __device__ inline Type atomicExchCustom(Type* address, Type val) {
73
- return atomicExch(address, val);
74
- }
75
-
76
- template <>
77
- __device__ inline double atomicExchCustom(double* address, double val) {
78
- unsigned long long int* address_as_ull = reinterpret_cast<unsigned long long int*>(address);
79
- return __longlong_as_double(atomicExch(address_as_ull, __double_as_longlong(val)));
80
- }
81
-
82
- #ifdef EIGEN_HAS_GPU_FP16
83
- template <typename R>
84
- __device__ inline void atomicReduce(half2* output, half2 accum, R& reducer) {
85
- unsigned int oldval = *reinterpret_cast<unsigned int*>(output);
86
- unsigned int newval = oldval;
87
- reducer.reducePacket(accum, reinterpret_cast<half2*>(&newval));
88
- if (newval == oldval) {
89
- return;
90
- }
91
- unsigned int readback;
92
- while ((readback = atomicCAS((unsigned int*)output, oldval, newval)) != oldval) {
93
- oldval = readback;
94
- newval = oldval;
95
- reducer.reducePacket(accum, reinterpret_cast<half2*>(&newval));
96
- if (newval == oldval) {
97
- return;
98
- }
99
- }
100
- }
101
- // reduction should be associative since reduction is not atomic in wide vector but atomic in half2 operations
102
- template <typename R>
103
- __device__ inline void atomicReduce(Packet4h2* output, Packet4h2 accum, R& reducer) {
104
- half2* houtput=reinterpret_cast<half2*>(output);
105
- half2* haccum=reinterpret_cast<half2*>(&accum);
106
- for(int i=0;i<4;++i){
107
- atomicReduce(houtput+i,*(haccum+i),reducer);
108
- }
109
- }
110
- #endif // EIGEN_HAS_GPU_FP16
111
-
112
- template <>
113
- __device__ inline void atomicReduce(float* output, float accum, SumReducer<float>&) {
114
- #if (defined(EIGEN_HIP_DEVICE_COMPILE) && defined(__HIP_ARCH_HAS_WARP_SHUFFLE__)) || (EIGEN_CUDA_ARCH >= 300)
115
- atomicAdd(output, accum);
116
- #else // EIGEN_CUDA_ARCH >= 300
117
- gpu_assert(0 && "Shouldn't be called on unsupported device");
118
- #endif // EIGEN_CUDA_ARCH >= 300
119
- }
120
-
121
-
122
- template <typename CoeffType, typename Index>
123
- __global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void ReductionInitKernel(const CoeffType val, Index num_preserved_coeffs, CoeffType* output) {
124
- const Index thread_id = blockIdx.x * blockDim.x + threadIdx.x;
125
- const Index num_threads = blockDim.x * gridDim.x;
126
- for (Index i = thread_id; i < num_preserved_coeffs; i += num_threads) {
127
- output[i] = val;
128
- }
129
- }
130
-
131
-
132
- template <int BlockSize, int NumPerThread, typename Self,
133
- typename Reducer, typename Index>
134
- __global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void FullReductionKernel(Reducer reducer, const Self input, Index num_coeffs,
135
- typename Self::CoeffReturnType* output, unsigned int* semaphore) {
136
- #if (defined(EIGEN_HIP_DEVICE_COMPILE) && defined(__HIP_ARCH_HAS_WARP_SHUFFLE__)) || (EIGEN_CUDA_ARCH >= 300)
137
- // Initialize the output value
138
- const Index first_index = blockIdx.x * BlockSize * NumPerThread + threadIdx.x;
139
- if (gridDim.x == 1) {
140
- if (first_index == 0) {
141
- *output = reducer.initialize();
142
- }
143
- }
144
- else {
145
- if (threadIdx.x == 0) {
146
- unsigned int block = atomicCAS(semaphore, 0u, 1u);
147
- if (block == 0) {
148
- // We're the first block to run, initialize the output value
149
- atomicExchCustom(output, reducer.initialize());
150
- __threadfence();
151
- atomicExch(semaphore, 2u);
152
- }
153
- else {
154
- // Wait for the first block to initialize the output value.
155
- // Use atomicCAS here to ensure that the reads aren't cached
156
- unsigned int val;
157
- do {
158
- val = atomicCAS(semaphore, 2u, 2u);
159
- }
160
- while (val < 2u);
161
- }
162
- }
163
- }
164
-
165
- __syncthreads();
166
-
167
- eigen_assert(gridDim.x == 1 || *semaphore >= 2u);
168
-
169
- typename Self::CoeffReturnType accum = reducer.initialize();
170
- Index max_iter = numext::mini<Index>(num_coeffs - first_index, NumPerThread*BlockSize);
171
- for (Index i = 0; i < max_iter; i+=BlockSize) {
172
- const Index index = first_index + i;
173
- eigen_assert(index < num_coeffs);
174
- typename Self::CoeffReturnType val = input.m_impl.coeff(index);
175
- reducer.reduce(val, &accum);
176
- }
177
-
178
- #pragma unroll
179
- for (int offset = warpSize/2; offset > 0; offset /= 2) {
180
- #if defined(EIGEN_HIPCC)
181
- // use std::is_floating_point to determine the type of reduced_val
182
- // This is needed because when Type == double, hipcc will give a "call to __shfl_down is ambguous" error
183
- // and list the float and int versions of __shfl_down as the candidate functions.
184
- if (std::is_floating_point<typename Self::CoeffReturnType>::value) {
185
- reducer.reduce(__shfl_down(static_cast<float>(accum), offset, warpSize), &accum);
186
- } else {
187
- reducer.reduce(__shfl_down(static_cast<int>(accum), offset, warpSize), &accum);
188
- }
189
- #elif defined(EIGEN_CUDA_SDK_VER) && EIGEN_CUDA_SDK_VER < 90000
190
- reducer.reduce(__shfl_down(accum, offset, warpSize), &accum);
191
- #else
192
- reducer.reduce(__shfl_down_sync(0xFFFFFFFF, accum, offset, warpSize), &accum);
193
- #endif
194
- }
195
-
196
- if ((threadIdx.x & (warpSize - 1)) == 0) {
197
- atomicReduce(output, accum, reducer);
198
- }
199
-
200
- if (gridDim.x > 1 && threadIdx.x == 0) {
201
- // Let the last block reset the semaphore
202
- atomicInc(semaphore, gridDim.x + 1);
203
- #if defined(EIGEN_HIPCC)
204
- __threadfence_system();
205
- #endif
206
- }
207
- #else // EIGEN_CUDA_ARCH >= 300
208
- gpu_assert(0 && "Shouldn't be called on unsupported device");
209
- #endif // EIGEN_CUDA_ARCH >= 300
210
- }
211
-
212
-
213
- #ifdef EIGEN_HAS_GPU_FP16
214
- template <typename Self,
215
- typename Reducer, typename Index>
216
- __global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void ReductionInitFullReduxKernelHalfFloat(Reducer reducer, const Self input, Index num_coeffs,
217
- packet_traits<Eigen::half>::type* scratch) {
218
- eigen_assert(blockDim.x == 1);
219
- eigen_assert(gridDim.x == 1);
220
- typedef packet_traits<Eigen::half>::type packet_type;
221
- Index packet_remainder =
222
- num_coeffs % Index(unpacket_traits<packet_type>::size);
223
- if (packet_remainder != 0) {
224
- half2* h2scratch = reinterpret_cast<half2*>(scratch);
225
- for (Index i = num_coeffs - packet_remainder; i + 2 <= num_coeffs; i += 2) {
226
- *h2scratch =
227
- __halves2half2(input.m_impl.coeff(i), input.m_impl.coeff(i + 1));
228
- h2scratch++;
229
- }
230
- if ((num_coeffs & 1) != 0) {
231
- half lastCoeff = input.m_impl.coeff(num_coeffs - 1);
232
- *h2scratch = __halves2half2(lastCoeff, reducer.initialize());
233
- }
234
- } else {
235
- *scratch = reducer.template initializePacket<packet_type>();
236
- }
237
- }
238
-
239
- template <typename Self,
240
- typename Reducer, typename Index>
241
- __global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void ReductionInitKernelHalfFloat(Reducer reducer, const Self input, Index num_coeffs, half* output) {
242
- const Index thread_id = blockIdx.x * blockDim.x + threadIdx.x;
243
- const Index num_threads = blockDim.x * gridDim.x;
244
- typedef typename packet_traits<Eigen::half>::type PacketType;
245
-
246
- const Index num_packets =
247
- num_coeffs / Index(unpacket_traits<PacketType>::size);
248
- PacketType* p_output = reinterpret_cast<PacketType*>(output);
249
- for (Index i = thread_id; i < num_packets; i += num_threads) {
250
- p_output[i] = reducer.template initializePacket<PacketType>();
251
- }
252
- Index packet_remainder =
253
- num_coeffs % Index(unpacket_traits<PacketType>::size);
254
- if (thread_id < packet_remainder) {
255
- output[num_coeffs - packet_remainder + thread_id] = reducer.initialize();
256
- }
257
- }
258
-
259
- template <int BlockSize, int NumPerThread, typename Self,
260
- typename Reducer, typename Index>
261
- __global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void FullReductionKernelHalfFloat(Reducer reducer, const Self input, Index num_coeffs,
262
- half* output, packet_traits<Eigen::half>::type* scratch) {
263
- typedef typename packet_traits<Eigen::half>::type PacketType;
264
- const int packet_width = unpacket_traits<PacketType>::size;
265
- eigen_assert(NumPerThread % packet_width == 0);
266
- const Index first_index =
267
- blockIdx.x * BlockSize * NumPerThread + packet_width * threadIdx.x;
268
-
269
- // Initialize the output value if it wasn't initialized by the ReductionInitKernel
270
-
271
- if (gridDim.x == 1) {
272
- if (first_index == 0) {
273
- int rem = num_coeffs % packet_width;
274
- if (rem != 0) {
275
- half2* p_scratch = reinterpret_cast<half2*>(scratch);
276
- *scratch = reducer.template initializePacket<PacketType>();
277
- for (int i = 0; i < rem / 2; i++) {
278
- *p_scratch = __halves2half2(
279
- input.m_impl.coeff(num_coeffs - packet_width + 2 * i),
280
- input.m_impl.coeff(num_coeffs - packet_width + 2 * i + 1));
281
- p_scratch++;
282
- }
283
- if ((num_coeffs & 1) != 0) {
284
- half last = input.m_impl.coeff(num_coeffs - 1);
285
- *p_scratch = __halves2half2(last, reducer.initialize());
286
- }
287
- } else {
288
- *scratch = reducer.template initializePacket<PacketType>();
289
- }
290
- }
291
- __syncthreads();
292
- }
293
-
294
- PacketType accum = reducer.template initializePacket<PacketType>();
295
- const Index max_iter =
296
- numext::mini<Index>((num_coeffs - first_index) / packet_width,
297
- NumPerThread * BlockSize / packet_width);
298
- for (Index i = 0; i < max_iter; i += BlockSize) {
299
- const Index index = first_index + packet_width * i;
300
- eigen_assert(index + packet_width < num_coeffs);
301
- PacketType val = input.m_impl.template packet<Unaligned>(index);
302
- reducer.reducePacket(val, &accum);
303
- }
304
-
305
- #pragma unroll
306
- for (int offset = warpSize/2; offset > 0; offset /= 2) {
307
- #if defined(EIGEN_HIPCC)
308
- PacketType r1;
309
- half2* hr = reinterpret_cast<half2*>(&r1);
310
- half2* hacc = reinterpret_cast<half2*>(&accum);
311
- for (int i = 0; i < packet_width / 2; i++) {
312
- // FIXME : remove this workaround once we have native half/half2 support for __shfl_down
313
- union { int i; half2 h; } wka_in, wka_out;
314
- wka_in.h = hacc[i];
315
- wka_out.i = __shfl_down(wka_in.i, offset, warpSize);
316
- hr[i] = wka_out.h;
317
- }
318
- reducer.reducePacket(r1, &accum);
319
- #elif defined(EIGEN_CUDA_SDK_VER) && EIGEN_CUDA_SDK_VER < 90000
320
- PacketType r1;
321
- half2* hr = reinterpret_cast<half2*>(&r1);
322
- half2* hacc = reinterpret_cast<half2*>(&accum);
323
- for (int i = 0; i < packet_width / 2; i++) {
324
- hr[i] = __shfl_down(hacc[i], offset, warpSize);
325
- }
326
- reducer.reducePacket(r1, &accum);
327
- #else
328
- PacketType r1;
329
- half2* hr = reinterpret_cast<half2*>(&r1);
330
- half2* hacc = reinterpret_cast<half2*>(&accum);
331
- for (int i = 0; i < packet_width / 2; i++) {
332
- hr[i] = __shfl_down_sync(0xFFFFFFFF, hacc[i], (unsigned)offset, warpSize);
333
- }
334
- reducer.reducePacket(r1, &accum);
335
-
336
- #endif
337
- }
338
-
339
- if ((threadIdx.x & (warpSize - 1)) == 0) {
340
- atomicReduce(scratch, accum, reducer);
341
- }
342
-
343
- __syncthreads();
344
- half2* rv1 = reinterpret_cast<half2*>(scratch);
345
- if (packet_width > 2) {
346
- reducer.reducePacket(rv1[2], rv1);
347
- reducer.reducePacket(rv1[3], rv1 + 1);
348
- reducer.reducePacket(rv1[1], rv1);
349
- }
350
- if (gridDim.x == 1) {
351
- if (first_index == 0) {
352
- half tmp = __low2half(*rv1);
353
- reducer.reduce(__high2half(*rv1), &tmp);
354
- *output = tmp;
355
- }
356
- }
357
- }
358
-
359
- template <typename Op>
360
- __global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void ReductionCleanupKernelHalfFloat(Op reducer, half* output, packet_traits<Eigen::half>::type* scratch) {
361
- eigen_assert(threadIdx.x == 1);
362
- half2* pscratch = reinterpret_cast<half2*>(scratch);
363
- half tmp = __float2half(0.f);
364
- typedef packet_traits<Eigen::half>::type packet_type;
365
- for (int i = 0; i < unpacket_traits<packet_type>::size; i += 2) {
366
- reducer.reduce(__low2half(*pscratch), &tmp);
367
- reducer.reduce(__high2half(*pscratch), &tmp);
368
- pscratch++;
369
- }
370
- *output = tmp;
371
- }
372
-
373
- #endif // EIGEN_HAS_GPU_FP16
374
-
375
- template <typename Self, typename Op, typename OutputType, bool PacketAccess, typename Enabled = void>
376
- struct FullReductionLauncher {
377
- static void run(const Self&, Op&, const GpuDevice&, OutputType*, typename Self::Index) {
378
- gpu_assert(false && "Should only be called on doubles, floats and half floats");
379
- }
380
- };
381
-
382
- // Specialization for float and double
383
- template <typename Self, typename Op, typename OutputType, bool PacketAccess>
384
- struct FullReductionLauncher<
385
- Self, Op, OutputType, PacketAccess,
386
- typename internal::enable_if<
387
- internal::is_same<float, OutputType>::value ||
388
- internal::is_same<double, OutputType>::value,
389
- void>::type> {
390
- static void run(const Self& self, Op& reducer, const GpuDevice& device, OutputType* output, typename Self::Index num_coeffs) {
391
-
392
- typedef typename Self::Index Index;
393
- const int block_size = 256;
394
- const int num_per_thread = 128;
395
- const int num_blocks = divup<int>(num_coeffs, block_size * num_per_thread);
396
-
397
- unsigned int* semaphore = NULL;
398
- if (num_blocks > 1) {
399
- semaphore = device.semaphore();
400
- }
401
-
402
- LAUNCH_GPU_KERNEL((FullReductionKernel<block_size, num_per_thread, Self, Op, Index>),
403
- num_blocks, block_size, 0, device, reducer, self, num_coeffs, output, semaphore);
404
- }
405
- };
406
-
407
- #ifdef EIGEN_HAS_GPU_FP16
408
- template <typename Self, typename Op>
409
- struct FullReductionLauncher<Self, Op, Eigen::half, false> {
410
- static void run(const Self&, Op&, const GpuDevice&, half*, typename Self::Index) {
411
- gpu_assert(false && "Should not be called since there is no packet accessor");
412
- }
413
- };
414
-
415
- template <typename Self, typename Op>
416
- struct FullReductionLauncher<Self, Op, Eigen::half, true> {
417
- static void run(const Self& self, Op& reducer, const GpuDevice& device, half* output, typename Self::Index num_coeffs) {
418
- typedef typename Self::Index Index;
419
- typedef typename packet_traits<Eigen::half>::type PacketType;
420
-
421
- const int block_size = 256;
422
- const int num_per_thread = 128;
423
- const int num_blocks = divup<int>(num_coeffs, block_size * num_per_thread);
424
- PacketType* scratch = static_cast<PacketType*>(device.scratchpad());
425
- // half2* scratch = static_cast<half2*>(device.scratchpad());
426
-
427
- if (num_blocks > 1) {
428
- // We initialize the output and the scrathpad outside the reduction kernel when we can't be sure that there
429
- // won't be a race conditions between multiple thread blocks.
430
- LAUNCH_GPU_KERNEL((ReductionInitFullReduxKernelHalfFloat<Self, Op, Index>),
431
- 1, 1, 0, device, reducer, self, num_coeffs, scratch);
432
- }
433
-
434
- LAUNCH_GPU_KERNEL((FullReductionKernelHalfFloat<block_size, num_per_thread, Self, Op, Index>),
435
- num_blocks, block_size, 0, device, reducer, self, num_coeffs, output, scratch);
436
-
437
- if (num_blocks > 1) {
438
- LAUNCH_GPU_KERNEL((ReductionCleanupKernelHalfFloat<Op>),
439
- 1, 1, 0, device, reducer, output, scratch);
440
- }
441
- }
442
- };
443
- #endif // EIGEN_HAS_GPU_FP16
444
-
445
-
446
- template <typename Self, typename Op, bool Vectorizable>
447
- struct FullReducer<Self, Op, GpuDevice, Vectorizable> {
448
- // Unfortunately nvidia doesn't support well exotic types such as complex,
449
- // so reduce the scope of the optimized version of the code to the simple cases
450
- // of doubles, floats and half floats
451
- #ifdef EIGEN_HAS_GPU_FP16
452
- static const bool HasOptimizedImplementation = !Self::ReducerTraits::IsStateful &&
453
- (internal::is_same<typename Self::CoeffReturnType, float>::value ||
454
- internal::is_same<typename Self::CoeffReturnType, double>::value ||
455
- (internal::is_same<typename Self::CoeffReturnType, Eigen::half>::value && reducer_traits<Op, GpuDevice>::PacketAccess));
456
- #else // EIGEN_HAS_GPU_FP16
457
- static const bool HasOptimizedImplementation = !Self::ReducerTraits::IsStateful &&
458
- (internal::is_same<typename Self::CoeffReturnType, float>::value ||
459
- internal::is_same<typename Self::CoeffReturnType, double>::value);
460
- #endif // EIGEN_HAS_GPU_FP16
461
-
462
- template <typename OutputType>
463
- static void run(const Self& self, Op& reducer, const GpuDevice& device, OutputType* output) {
464
- gpu_assert(HasOptimizedImplementation && "Should only be called on doubles, floats or half floats");
465
- const Index num_coeffs = array_prod(self.m_impl.dimensions());
466
- // Don't crash when we're called with an input tensor of size 0.
467
- if (num_coeffs == 0) {
468
- return;
469
- }
470
-
471
- FullReductionLauncher<Self, Op, OutputType, reducer_traits<Op, GpuDevice>::PacketAccess>::run(self, reducer, device, output, num_coeffs);
472
- }
473
- };
474
-
475
-
476
- template <int NumPerThread, typename Self,
477
- typename Reducer, typename Index>
478
- __global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void InnerReductionKernel(Reducer reducer, const Self input, Index num_coeffs_to_reduce, Index num_preserved_coeffs,
479
- typename Self::CoeffReturnType* output) {
480
- #if (defined(EIGEN_HIP_DEVICE_COMPILE) && defined(__HIP_ARCH_HAS_WARP_SHUFFLE__)) || (EIGEN_CUDA_ARCH >= 300)
481
- typedef typename Self::CoeffReturnType Type;
482
- eigen_assert(blockDim.y == 1);
483
- eigen_assert(blockDim.z == 1);
484
- eigen_assert(gridDim.y == 1);
485
- eigen_assert(gridDim.z == 1);
486
-
487
- const int unroll_times = 16;
488
- eigen_assert(NumPerThread % unroll_times == 0);
489
-
490
- const Index input_col_blocks = divup<Index>(num_coeffs_to_reduce, blockDim.x * NumPerThread);
491
- const Index num_input_blocks = input_col_blocks * num_preserved_coeffs;
492
-
493
- const Index num_threads = blockDim.x * gridDim.x;
494
- const Index thread_id = blockIdx.x * blockDim.x + threadIdx.x;
495
-
496
- // Initialize the output values if they weren't initialized by the ReductionInitKernel
497
- if (gridDim.x == 1) {
498
- for (Index i = thread_id; i < num_preserved_coeffs; i += num_threads) {
499
- output[i] = reducer.initialize();
500
- }
501
- __syncthreads();
502
- }
503
-
504
- for (Index i = blockIdx.x; i < num_input_blocks; i += gridDim.x) {
505
- const Index row = i / input_col_blocks;
506
-
507
- if (row < num_preserved_coeffs) {
508
- const Index col_block = i % input_col_blocks;
509
- const Index col_begin = col_block * blockDim.x * NumPerThread + threadIdx.x;
510
-
511
- Type reduced_val = reducer.initialize();
512
-
513
- for (Index j = 0; j < NumPerThread; j += unroll_times) {
514
- const Index last_col = col_begin + blockDim.x * (j + unroll_times - 1);
515
- if (last_col >= num_coeffs_to_reduce) {
516
- for (Index col = col_begin + blockDim.x * j; col < num_coeffs_to_reduce; col += blockDim.x) {
517
- const Type val = input.m_impl.coeff(row * num_coeffs_to_reduce + col);
518
- reducer.reduce(val, &reduced_val);
519
- }
520
- break;
521
- } else {
522
- // Faster version of the loop with no branches after unrolling.
523
- #pragma unroll
524
- for (int k = 0; k < unroll_times; ++k) {
525
- const Index col = col_begin + blockDim.x * (j + k);
526
- reducer.reduce(input.m_impl.coeff(row * num_coeffs_to_reduce + col), &reduced_val);
527
- }
528
- }
529
- }
530
-
531
- #pragma unroll
532
- for (int offset = warpSize/2; offset > 0; offset /= 2) {
533
- #if defined(EIGEN_HIPCC)
534
- // use std::is_floating_point to determine the type of reduced_val
535
- // This is needed because when Type == double, hipcc will give a "call to __shfl_down is ambguous" error
536
- // and list the float and int versions of __shfl_down as the candidate functions.
537
- if (std::is_floating_point<Type>::value) {
538
- reducer.reduce(__shfl_down(static_cast<float>(reduced_val), offset), &reduced_val);
539
- } else {
540
- reducer.reduce(__shfl_down(static_cast<int>(reduced_val), offset), &reduced_val);
541
- }
542
- #elif defined(EIGEN_CUDA_SDK_VER) && EIGEN_CUDA_SDK_VER < 90000
543
- reducer.reduce(__shfl_down(reduced_val, offset), &reduced_val);
544
- #else
545
- reducer.reduce(__shfl_down_sync(0xFFFFFFFF, reduced_val, offset), &reduced_val);
546
- #endif
547
- }
548
-
549
- if ((threadIdx.x & (warpSize - 1)) == 0) {
550
- atomicReduce(&(output[row]), reduced_val, reducer);
551
- }
552
- }
553
- }
554
- #else // EIGEN_CUDA_ARCH >= 300
555
- gpu_assert(0 && "Shouldn't be called on unsupported device");
556
- #endif // EIGEN_CUDA_ARCH >= 300
557
- }
558
-
559
- #ifdef EIGEN_HAS_GPU_FP16
560
-
561
- template <int NumPerThread, typename Self,
562
- typename Reducer, typename Index>
563
- __global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void InnerReductionKernelHalfFloat(Reducer reducer, const Self input, Index num_coeffs_to_reduce, Index num_preserved_coeffs,
564
- half* output) {
565
- eigen_assert(blockDim.y == 1);
566
- eigen_assert(blockDim.z == 1);
567
- eigen_assert(gridDim.y == 1);
568
- eigen_assert(gridDim.z == 1);
569
-
570
- typedef typename packet_traits<Eigen::half>::type PacketType;
571
- const int packet_width = unpacket_traits<PacketType>::size;
572
- const int unroll_times = 16 / packet_width;
573
- eigen_assert(NumPerThread % unroll_times == 0);
574
- eigen_assert(unroll_times % 2 == 0);
575
-
576
- const Index input_col_blocks = divup<Index>(num_coeffs_to_reduce, blockDim.x * NumPerThread * 2);
577
- const Index num_input_blocks = divup<Index>(input_col_blocks * num_preserved_coeffs, 2);
578
-
579
- const Index num_threads = blockDim.x * gridDim.x;
580
- const Index thread_id = blockIdx.x * blockDim.x + threadIdx.x;
581
-
582
- // Initialize the output values if they weren't initialized by the ReductionInitKernel
583
- if (gridDim.x == 1) {
584
- Index i = packet_width * thread_id;
585
- for (; i + packet_width <= num_preserved_coeffs;
586
- i += packet_width * num_threads) {
587
- PacketType* poutput = reinterpret_cast<PacketType*>(output + i);
588
- *poutput = reducer.template initializePacket<PacketType>();
589
- }
590
- if (i < num_preserved_coeffs) {
591
- output[i] = reducer.initialize();
592
- }
593
- __syncthreads();
594
- }
595
-
596
- for (Index i = blockIdx.x; i < num_input_blocks; i += gridDim.x) {
597
- const Index row = 2 * (i / input_col_blocks); // everybody takes 2 rows
598
-
599
- if (row + 1 < num_preserved_coeffs) {
600
- const Index col_block = i % input_col_blocks;
601
- const Index col_begin =
602
- packet_width * (col_block * blockDim.x * NumPerThread + threadIdx.x);
603
-
604
- PacketType reduced_val1 = reducer.template initializePacket<PacketType>();
605
- PacketType reduced_val2 = reducer.template initializePacket<PacketType>();
606
-
607
- for (Index j = 0; j < NumPerThread; j += unroll_times) {
608
- const Index last_col =
609
- col_begin + blockDim.x * (j + unroll_times - 1) * packet_width;
610
- if (last_col >= num_coeffs_to_reduce) {
611
- Index col = col_begin + blockDim.x * j;
612
- for (; col + packet_width <= num_coeffs_to_reduce;
613
- col += blockDim.x) {
614
- const PacketType val1 = input.m_impl.template packet<Unaligned>(
615
- row * num_coeffs_to_reduce + col);
616
- reducer.reducePacket(val1, &reduced_val1);
617
- const PacketType val2 = input.m_impl.template packet<Unaligned>(
618
- (row + 1) * num_coeffs_to_reduce + col);
619
- reducer.reducePacket(val2, &reduced_val2);
620
- }
621
- if (col < num_coeffs_to_reduce) {
622
- PacketType r1 = reducer.template initializePacket<PacketType>();
623
- PacketType r2 = reducer.template initializePacket<PacketType>();
624
- half2* hr1 = reinterpret_cast<half2*>(&r1);
625
- half2* hr2 = reinterpret_cast<half2*>(&r2);
626
- while (col + 1 < num_coeffs_to_reduce) {
627
- *hr1 = __halves2half2(
628
- input.m_impl.coeff(row * num_coeffs_to_reduce + col),
629
- input.m_impl.coeff(row * num_coeffs_to_reduce + col + 1));
630
- *hr2 = __halves2half2(
631
- input.m_impl.coeff((row + 1) * num_coeffs_to_reduce + col),
632
- input.m_impl.coeff((row + 1) * num_coeffs_to_reduce + col +
633
- 1));
634
- hr1++;
635
- hr2++;
636
- col += 2;
637
- }
638
- if (col < num_coeffs_to_reduce) {
639
- // Peel;
640
- const half last1 =
641
- input.m_impl.coeff(row * num_coeffs_to_reduce + col);
642
- *hr1 = __halves2half2(last1, reducer.initialize());
643
- const half last2 =
644
- input.m_impl.coeff((row + 1) * num_coeffs_to_reduce + col);
645
- *hr2 = __halves2half2(last2, reducer.initialize());
646
- }
647
- reducer.reducePacket(r1, &reduced_val1);
648
- reducer.reducePacket(r2, &reduced_val2);
649
- }
650
- break;
651
- } else {
652
- // Faster version of the loop with no branches after unrolling.
653
- #pragma unroll
654
- for (int k = 0; k < unroll_times; ++k) {
655
- const Index col = col_begin + blockDim.x * (j + k) * packet_width;
656
- reducer.reducePacket(input.m_impl.template packet<Unaligned>(
657
- row * num_coeffs_to_reduce + col),
658
- &reduced_val1);
659
- reducer.reducePacket(input.m_impl.template packet<Unaligned>(
660
- (row + 1) * num_coeffs_to_reduce + col),
661
- &reduced_val2);
662
- }
663
- }
664
- }
665
-
666
- #pragma unroll
667
- for (int offset = warpSize/2; offset > 0; offset /= 2) {
668
- #if defined(EIGEN_HIPCC)
669
- PacketType r1;
670
- PacketType r2;
671
- half2* hr1 = reinterpret_cast<half2*>(&r1);
672
- half2* hr2 = reinterpret_cast<half2*>(&r2);
673
- half2* rv1 = reinterpret_cast<half2*>(&reduced_val1);
674
- half2* rv2 = reinterpret_cast<half2*>(&reduced_val2);
675
- for (int i = 0; i < packet_width / 2; i++) {
676
- // FIXME : remove this workaround once we have native half/half2 support for __shfl_down
677
- union { int i; half2 h; } wka_in1, wka_out1;
678
- wka_in1.h = rv1[i];
679
- wka_out1.i = __shfl_down(wka_in1.i, offset, warpSize);
680
- hr1[i] = wka_out1.h;
681
-
682
- union { int i; half2 h; } wka_in2, wka_out2;
683
- wka_in2.h = rv2[i];
684
- wka_out2.i = __shfl_down(wka_in2.i, offset, warpSize);
685
- hr2[i] = wka_out2.h;
686
- }
687
- reducer.reducePacket(r1, &reduced_val1);
688
- reducer.reducePacket(r2, &reduced_val2);
689
- #elif defined(EIGEN_CUDA_SDK_VER) && EIGEN_CUDA_SDK_VER < 90000
690
- PacketType r1;
691
- PacketType r2;
692
- half2* hr1 = reinterpret_cast<half2*>(&r1);
693
- half2* hr2 = reinterpret_cast<half2*>(&r2);
694
- half2* rv1 = reinterpret_cast<half2*>(&reduced_val1);
695
- half2* rv2 = reinterpret_cast<half2*>(&reduced_val2);
696
- for (int i = 0; i < packet_width / 2; i++) {
697
- hr1[i] = __shfl_down(rv1[i], offset, warpSize);
698
- hr2[i] = __shfl_down(rv2[i], offset, warpSize);
699
- }
700
- reducer.reducePacket(r1, &reduced_val1);
701
- reducer.reducePacket(r2, &reduced_val2);
702
- #else
703
- PacketType r1;
704
- PacketType r2;
705
- half2* hr1 = reinterpret_cast<half2*>(&r1);
706
- half2* hr2 = reinterpret_cast<half2*>(&r2);
707
- half2* rr1 = reinterpret_cast<half2*>(&reduced_val1);
708
- half2* rr2 = reinterpret_cast<half2*>(&reduced_val2);
709
- for (int i = 0; i < packet_width / 2; i++) {
710
- hr1[i] =
711
- __shfl_down_sync(0xFFFFFFFF, rr1[i], (unsigned)offset, warpSize);
712
- hr2[i] =
713
- __shfl_down_sync(0xFFFFFFFF, rr2[i], (unsigned)offset, warpSize);
714
- }
715
- reducer.reducePacket(r1, &reduced_val1);
716
- reducer.reducePacket(r2, &reduced_val2);
717
-
718
- #endif
719
- }
720
- half2* rv1 = reinterpret_cast<half2*>(&reduced_val1);
721
- half2* rv2 = reinterpret_cast<half2*>(&reduced_val2);
722
- half2 val;
723
- if (packet_width > 2) {
724
- reducer.reducePacket(rv1[2], rv1);
725
- reducer.reducePacket(rv1[3], rv1 + 1);
726
- reducer.reducePacket(rv1[1], rv1);
727
- reducer.reducePacket(rv2[2], rv2);
728
- reducer.reducePacket(rv2[3], rv2 + 1);
729
- reducer.reducePacket(rv2[1], rv2);
730
- }
731
- half val1 = __low2half(*rv1);
732
- reducer.reduce(__high2half(*rv1), &val1);
733
- half val2 = __low2half(*rv2);
734
- reducer.reduce(__high2half(*rv2), &val2);
735
- val = __halves2half2(val1, val2);
736
- if ((threadIdx.x & (warpSize - 1)) == 0) {
737
- half* loc = output + row;
738
- atomicReduce((half2*)loc, val, reducer);
739
- }
740
- }
741
- }
742
- }
743
-
744
- #endif // EIGEN_HAS_GPU_FP16
745
-
746
- template <typename Self, typename Op, typename OutputType, bool PacketAccess, typename Enabled = void>
747
- struct InnerReductionLauncher {
748
- static EIGEN_DEVICE_FUNC bool run(const Self&, Op&, const GpuDevice&, OutputType*, typename Self::Index, typename Self::Index) {
749
- gpu_assert(false && "Should only be called to reduce doubles, floats and half floats on a gpu device");
750
- return true;
751
- }
752
- };
753
-
754
- // Specialization for float and double
755
- template <typename Self, typename Op, typename OutputType, bool PacketAccess>
756
- struct InnerReductionLauncher<
757
- Self, Op, OutputType, PacketAccess,
758
- typename internal::enable_if<
759
- internal::is_same<float, OutputType>::value ||
760
- internal::is_same<double, OutputType>::value,
761
- void>::type> {
762
- static bool run(const Self& self, Op& reducer, const GpuDevice& device, OutputType* output, typename Self::Index num_coeffs_to_reduce, typename Self::Index num_preserved_vals) {
763
- typedef typename Self::Index Index;
764
-
765
- const Index num_coeffs = num_coeffs_to_reduce * num_preserved_vals;
766
- const int block_size = 256;
767
- const int num_per_thread = 128;
768
- const int dyn_blocks = divup<int>(num_coeffs, block_size * num_per_thread);
769
- const int max_blocks = device.getNumGpuMultiProcessors() *
770
- device.maxGpuThreadsPerMultiProcessor() / block_size;
771
- const int num_blocks = numext::mini<int>(max_blocks, dyn_blocks);
772
-
773
- if (num_blocks > 1) {
774
- // We initialize the outputs outside the reduction kernel when we can't be sure that there
775
- // won't be a race conditions between multiple thread blocks.
776
- const int dyn_blocks = divup<int>(num_preserved_vals, 1024);
777
- const int max_blocks = device.getNumGpuMultiProcessors() *
778
- device.maxGpuThreadsPerMultiProcessor() / 1024;
779
- const int num_blocks = numext::mini<int>(max_blocks, dyn_blocks);
780
- LAUNCH_GPU_KERNEL((ReductionInitKernel<OutputType, Index>),
781
- num_blocks, 1024, 0, device, reducer.initialize(),
782
- num_preserved_vals, output);
783
- }
784
-
785
- LAUNCH_GPU_KERNEL((InnerReductionKernel<num_per_thread, Self, Op, Index>),
786
- num_blocks, block_size, 0, device, reducer, self, num_coeffs_to_reduce, num_preserved_vals, output);
787
-
788
- return false;
789
- }
790
- };
791
-
792
- #ifdef EIGEN_HAS_GPU_FP16
793
- template <typename Self, typename Op>
794
- struct InnerReductionLauncher<Self, Op, Eigen::half, false> {
795
- static bool run(const Self&, Op&, const GpuDevice&, half*, typename Self::Index, typename Self::Index) {
796
- gpu_assert(false && "Should not be called since there is no packet accessor");
797
- return true;
798
- }
799
- };
800
-
801
- template <typename Self, typename Op>
802
- struct InnerReductionLauncher<Self, Op, Eigen::half, true> {
803
- static bool run(const Self& self, Op& reducer, const GpuDevice& device, half* output, typename Self::Index num_coeffs_to_reduce, typename Self::Index num_preserved_vals) {
804
- typedef typename Self::Index Index;
805
-
806
- if (num_preserved_vals % 2 != 0) {
807
- // Not supported yet, revert to the slower code path
808
- return true;
809
- }
810
-
811
- const Index num_coeffs = num_coeffs_to_reduce * num_preserved_vals;
812
- const int block_size = /*256*/128;
813
- const int num_per_thread = /*128*/64;
814
- const int dyn_blocks = divup<int>(num_coeffs, block_size * num_per_thread);
815
- const int max_blocks = device.getNumGpuMultiProcessors() *
816
- device.maxGpuThreadsPerMultiProcessor() / block_size;
817
- const int num_blocks = numext::mini<int>(max_blocks, dyn_blocks);
818
-
819
- if (num_blocks > 1) {
820
- // We initialize the outputs outside the reduction kernel when we can't be sure that there
821
- // won't be a race conditions between multiple thread blocks.
822
- LAUNCH_GPU_KERNEL((ReductionInitKernelHalfFloat<Self, Op, Index>),
823
- 1, 1, 0, device, reducer, self, num_preserved_vals, output);
824
- }
825
-
826
- LAUNCH_GPU_KERNEL((InnerReductionKernelHalfFloat<num_per_thread, Self, Op, Index>),
827
- num_blocks, block_size, 0, device, reducer, self, num_coeffs_to_reduce, num_preserved_vals, output);
828
-
829
- return false;
830
- }
831
- };
832
- #endif // EIGEN_HAS_GPU_FP16
833
-
834
-
835
- template <typename Self, typename Op>
836
- struct InnerReducer<Self, Op, GpuDevice> {
837
- // Unfortunately nvidia doesn't support well exotic types such as complex,
838
- // so reduce the scope of the optimized version of the code to the simple case
839
- // of floats and half floats.
840
- #ifdef EIGEN_HAS_GPU_FP16
841
- static const bool HasOptimizedImplementation = !Self::ReducerTraits::IsStateful &&
842
- (internal::is_same<typename Self::CoeffReturnType, float>::value ||
843
- internal::is_same<typename Self::CoeffReturnType, double>::value ||
844
- (internal::is_same<typename Self::CoeffReturnType, Eigen::half>::value && reducer_traits<Op, GpuDevice>::PacketAccess));
845
- #else // EIGEN_HAS_GPU_FP16
846
- static const bool HasOptimizedImplementation = !Self::ReducerTraits::IsStateful &&
847
- (internal::is_same<typename Self::CoeffReturnType, float>::value ||
848
- internal::is_same<typename Self::CoeffReturnType, double>::value);
849
- #endif // EIGEN_HAS_GPU_FP16
850
-
851
- template <typename OutputType>
852
- static bool run(const Self& self, Op& reducer, const GpuDevice& device, OutputType* output, typename Self::Index num_coeffs_to_reduce, typename Self::Index num_preserved_vals) {
853
- gpu_assert(HasOptimizedImplementation && "Should only be called on doubles, floats or half floats");
854
- const Index num_coeffs = array_prod(self.m_impl.dimensions());
855
- // Don't crash when we're called with an input tensor of size 0.
856
- if (num_coeffs == 0) {
857
- return true;
858
- }
859
- // It's faster to use the usual code.
860
- if (num_coeffs_to_reduce <= 128) {
861
- return true;
862
- }
863
-
864
- return InnerReductionLauncher<Self, Op, OutputType, reducer_traits<Op, GpuDevice>::PacketAccess>::run(self, reducer, device, output, num_coeffs_to_reduce, num_preserved_vals);
865
- }
866
- };
867
-
868
- template <int NumPerThread, typename Self,
869
- typename Reducer, typename Index>
870
- __global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void OuterReductionKernel(Reducer reducer, const Self input, Index num_coeffs_to_reduce, Index num_preserved_coeffs,
871
- typename Self::CoeffReturnType* output) {
872
- const Index num_threads = blockDim.x * gridDim.x;
873
- const Index thread_id = blockIdx.x * blockDim.x + threadIdx.x;
874
- // Initialize the output values if they weren't initialized by the ReductionInitKernel
875
- if (gridDim.x == 1) {
876
- for (Index i = thread_id; i < num_preserved_coeffs; i += num_threads) {
877
- output[i] = reducer.initialize();
878
- }
879
- __syncthreads();
880
- }
881
-
882
- // Do the reduction.
883
- const Index max_iter = num_preserved_coeffs * divup<Index>(num_coeffs_to_reduce, NumPerThread);
884
- for (Index i = thread_id; i < max_iter; i += num_threads) {
885
- const Index input_col = i % num_preserved_coeffs;
886
- const Index input_row = (i / num_preserved_coeffs) * NumPerThread;
887
- typename Self::CoeffReturnType reduced_val = reducer.initialize();
888
- const Index max_row = numext::mini(input_row + NumPerThread, num_coeffs_to_reduce);
889
- for (Index j = input_row; j < max_row; j++) {
890
- typename Self::CoeffReturnType val = input.m_impl.coeff(j * num_preserved_coeffs + input_col);
891
- reducer.reduce(val, &reduced_val);
892
- }
893
- atomicReduce(&(output[input_col]), reduced_val, reducer);
894
- }
895
- }
896
-
897
-
898
- template <typename Self, typename Op>
899
- struct OuterReducer<Self, Op, GpuDevice> {
900
- // Unfortunately nvidia doesn't support well exotic types such as complex,
901
- // so reduce the scope of the optimized version of the code to the simple case
902
- // of floats.
903
- static const bool HasOptimizedImplementation = !Self::ReducerTraits::IsStateful &&
904
- (internal::is_same<typename Self::CoeffReturnType, float>::value ||
905
- internal::is_same<typename Self::CoeffReturnType, double>::value);
906
- template <typename Device, typename OutputType>
907
- static
908
- #if !defined(EIGEN_HIPCC)
909
- // FIXME : leaving this EIGEN_DEVICE_FUNC in, results in the following runtime error
910
- // (in the cxx11_tensor_reduction_gpu test)
911
- //
912
- // terminate called after throwing an instance of 'std::runtime_error'
913
- // what(): No device code available for function: _ZN5Eigen8internal20OuterReductionKernelIL...
914
- //
915
- // don't know why this happens (and why is it a runtime error instead of a compile time error)
916
- //
917
- // this will be fixed by HIP PR#457
918
- EIGEN_DEVICE_FUNC
919
- #endif
920
- bool run(const Self&, Op&, const Device&, OutputType*, typename Self::Index, typename Self::Index) {
921
- gpu_assert(false && "Should only be called to reduce doubles or floats on a gpu device");
922
- return true;
923
- }
924
-
925
- static bool run(const Self& self, Op& reducer, const GpuDevice& device, float* output, typename Self::Index num_coeffs_to_reduce, typename Self::Index num_preserved_vals) {
926
- typedef typename Self::Index Index;
927
-
928
- // It's faster to use the usual code.
929
- if (num_coeffs_to_reduce <= 32) {
930
- return true;
931
- }
932
-
933
- const Index num_coeffs = num_coeffs_to_reduce * num_preserved_vals;
934
- const int block_size = 256;
935
- const int num_per_thread = 16;
936
- const int dyn_blocks = divup<int>(num_coeffs, block_size * num_per_thread);
937
- const int max_blocks = device.getNumGpuMultiProcessors() *
938
- device.maxGpuThreadsPerMultiProcessor() / block_size;
939
- const int num_blocks = numext::mini<int>(max_blocks, dyn_blocks);
940
-
941
- if (num_blocks > 1) {
942
- // We initialize the outputs in the reduction kernel itself when we don't have to worry
943
- // about race conditions between multiple thread blocks.
944
- const int dyn_blocks = divup<int>(num_preserved_vals, 1024);
945
- const int max_blocks = device.getNumGpuMultiProcessors() *
946
- device.maxGpuThreadsPerMultiProcessor() / 1024;
947
- const int num_blocks = numext::mini<int>(max_blocks, dyn_blocks);
948
- LAUNCH_GPU_KERNEL((ReductionInitKernel<float, Index>),
949
- num_blocks, 1024, 0, device, reducer.initialize(),
950
- num_preserved_vals, output);
951
- }
952
-
953
- LAUNCH_GPU_KERNEL((OuterReductionKernel<num_per_thread, Self, Op, Index>),
954
- num_blocks, block_size, 0, device, reducer, self, num_coeffs_to_reduce, num_preserved_vals, output);
955
-
956
- return false;
957
- }
958
- };
959
-
960
- #endif // defined(EIGEN_USE_GPU) && defined(EIGEN_GPUCC)
961
-
962
-
963
- } // end namespace internal
964
- } // end namespace Eigen
965
-
966
- #endif // EIGEN_CXX11_TENSOR_TENSOR_REDUCTION_GPU_H