sequenzo 0.1.17__cp310-cp310-macosx_10_9_universal2.whl → 0.1.19__cp310-cp310-macosx_10_9_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sequenzo might be problematic. Click here for more details.

Files changed (423) hide show
  1. sequenzo/__init__.py +64 -8
  2. sequenzo/big_data/clara/clara.py +1 -1
  3. sequenzo/big_data/clara/utils/get_weighted_diss.c +155 -155
  4. sequenzo/big_data/clara/utils/get_weighted_diss.cpython-310-darwin.so +0 -0
  5. sequenzo/clustering/KMedoids.py +39 -0
  6. sequenzo/clustering/hierarchical_clustering.py +304 -8
  7. sequenzo/define_sequence_data.py +44 -3
  8. sequenzo/dissimilarity_measures/c_code.cpython-310-darwin.so +0 -0
  9. sequenzo/dissimilarity_measures/get_distance_matrix.py +1 -2
  10. sequenzo/dissimilarity_measures/get_substitution_cost_matrix.py +1 -1
  11. sequenzo/dissimilarity_measures/src/DHDdistance.cpp +13 -37
  12. sequenzo/dissimilarity_measures/src/LCPdistance.cpp +13 -37
  13. sequenzo/dissimilarity_measures/src/OMdistance.cpp +12 -47
  14. sequenzo/dissimilarity_measures/src/OMspellDistance.cpp +103 -67
  15. sequenzo/dissimilarity_measures/src/dp_utils.h +160 -0
  16. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_arithmetic.hpp +41 -16
  17. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_complex.hpp +4 -0
  18. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_details.hpp +7 -0
  19. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_logical.hpp +10 -0
  20. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_math.hpp +127 -43
  21. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_memory.hpp +30 -2
  22. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_swizzle.hpp +174 -0
  23. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_trigo.hpp +14 -5
  24. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx.hpp +111 -54
  25. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx2.hpp +131 -9
  26. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512bw.hpp +11 -113
  27. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512dq.hpp +39 -7
  28. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512f.hpp +336 -30
  29. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vbmi.hpp +9 -37
  30. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vbmi2.hpp +58 -0
  31. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_common.hpp +1 -0
  32. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_common_fwd.hpp +35 -2
  33. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_constants.hpp +3 -1
  34. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_emulated.hpp +17 -0
  35. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma3_avx.hpp +13 -0
  36. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma3_sse.hpp +18 -0
  37. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma4.hpp +13 -0
  38. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_isa.hpp +8 -0
  39. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_neon.hpp +363 -34
  40. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_neon64.hpp +7 -0
  41. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_rvv.hpp +13 -0
  42. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_scalar.hpp +41 -4
  43. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse2.hpp +252 -16
  44. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse3.hpp +9 -0
  45. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_ssse3.hpp +12 -1
  46. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sve.hpp +7 -0
  47. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_vsx.hpp +892 -0
  48. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_wasm.hpp +78 -1
  49. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_arch.hpp +3 -1
  50. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_config.hpp +13 -2
  51. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_cpuid.hpp +5 -0
  52. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_inline.hpp +5 -1
  53. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_all_registers.hpp +2 -0
  54. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_api.hpp +64 -1
  55. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_batch.hpp +36 -0
  56. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_rvv_register.hpp +40 -31
  57. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_traits.hpp +8 -0
  58. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_vsx_register.hpp +77 -0
  59. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/xsimd.hpp +6 -0
  60. sequenzo/dissimilarity_measures/utils/get_sm_trate_substitution_cost_matrix.c +155 -155
  61. sequenzo/dissimilarity_measures/utils/get_sm_trate_substitution_cost_matrix.cpython-310-darwin.so +0 -0
  62. sequenzo/dissimilarity_measures/utils/seqconc.c +155 -155
  63. sequenzo/dissimilarity_measures/utils/seqconc.cpython-310-darwin.so +0 -0
  64. sequenzo/dissimilarity_measures/utils/seqdss.c +155 -155
  65. sequenzo/dissimilarity_measures/utils/seqdss.cpython-310-darwin.so +0 -0
  66. sequenzo/dissimilarity_measures/utils/seqdur.c +155 -155
  67. sequenzo/dissimilarity_measures/utils/seqdur.cpython-310-darwin.so +0 -0
  68. sequenzo/dissimilarity_measures/utils/seqlength.c +155 -155
  69. sequenzo/dissimilarity_measures/utils/seqlength.cpython-310-darwin.so +0 -0
  70. sequenzo/multidomain/cat.py +0 -53
  71. sequenzo/multidomain/idcd.py +0 -1
  72. sequenzo/openmp_setup.py +233 -0
  73. sequenzo/sequence_characteristics/__init__.py +4 -0
  74. sequenzo/sequence_characteristics/complexity_index.py +17 -57
  75. sequenzo/sequence_characteristics/overall_cross_sectional_entropy.py +177 -111
  76. sequenzo/sequence_characteristics/plot_characteristics.py +30 -11
  77. sequenzo/sequence_characteristics/simple_characteristics.py +1 -0
  78. sequenzo/sequence_characteristics/state_frequencies_and_entropy_per_sequence.py +9 -3
  79. sequenzo/sequence_characteristics/turbulence.py +47 -67
  80. sequenzo/sequence_characteristics/variance_of_spell_durations.py +19 -9
  81. sequenzo/sequence_characteristics/within_sequence_entropy.py +5 -58
  82. sequenzo/visualization/plot_sequence_index.py +58 -35
  83. sequenzo/visualization/plot_state_distribution.py +57 -36
  84. sequenzo/visualization/plot_transition_matrix.py +21 -22
  85. sequenzo/with_event_history_analysis/__init__.py +35 -0
  86. sequenzo/with_event_history_analysis/sequence_analysis_multi_state_model.py +850 -0
  87. sequenzo/with_event_history_analysis/sequence_history_analysis.py +283 -0
  88. {sequenzo-0.1.17.dist-info → sequenzo-0.1.19.dist-info}/METADATA +48 -14
  89. sequenzo-0.1.19.dist-info/RECORD +215 -0
  90. sequenzo/dissimilarity_measures/setup.py +0 -35
  91. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Cholesky/LDLT.h +0 -688
  92. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Cholesky/LLT.h +0 -558
  93. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Cholesky/LLT_LAPACKE.h +0 -99
  94. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +0 -682
  95. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +0 -346
  96. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +0 -462
  97. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/ComplexSchur_LAPACKE.h +0 -91
  98. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/EigenSolver.h +0 -622
  99. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h +0 -418
  100. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +0 -226
  101. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +0 -374
  102. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +0 -158
  103. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/RealQZ.h +0 -657
  104. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/RealSchur.h +0 -558
  105. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/RealSchur_LAPACKE.h +0 -77
  106. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +0 -904
  107. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h +0 -87
  108. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +0 -561
  109. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/AlignedBox.h +0 -486
  110. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/AngleAxis.h +0 -247
  111. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/EulerAngles.h +0 -114
  112. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/Homogeneous.h +0 -501
  113. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/Hyperplane.h +0 -282
  114. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/OrthoMethods.h +0 -235
  115. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/ParametrizedLine.h +0 -232
  116. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/Quaternion.h +0 -870
  117. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/Rotation2D.h +0 -199
  118. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/RotationBase.h +0 -206
  119. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/Scaling.h +0 -188
  120. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/Transform.h +0 -1563
  121. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/Translation.h +0 -202
  122. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/Umeyama.h +0 -166
  123. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/arch/Geometry_SIMD.h +0 -168
  124. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Householder/BlockHouseholder.h +0 -110
  125. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Householder/Householder.h +0 -176
  126. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Householder/HouseholderSequence.h +0 -545
  127. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +0 -226
  128. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +0 -212
  129. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +0 -229
  130. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +0 -394
  131. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +0 -453
  132. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +0 -444
  133. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +0 -198
  134. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +0 -117
  135. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Jacobi/Jacobi.h +0 -483
  136. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/KLUSupport/KLUSupport.h +0 -358
  137. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/LU/Determinant.h +0 -117
  138. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/LU/FullPivLU.h +0 -877
  139. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/LU/InverseImpl.h +0 -432
  140. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/LU/PartialPivLU.h +0 -624
  141. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/LU/PartialPivLU_LAPACKE.h +0 -83
  142. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/LU/arch/InverseSize4.h +0 -351
  143. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/MetisSupport/MetisSupport.h +0 -137
  144. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/OrderingMethods/Amd.h +0 -435
  145. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +0 -1863
  146. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/OrderingMethods/Ordering.h +0 -153
  147. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +0 -678
  148. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +0 -545
  149. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/QR/ColPivHouseholderQR.h +0 -674
  150. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/QR/ColPivHouseholderQR_LAPACKE.h +0 -97
  151. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +0 -635
  152. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/QR/FullPivHouseholderQR.h +0 -713
  153. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/QR/HouseholderQR.h +0 -434
  154. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/QR/HouseholderQR_LAPACKE.h +0 -68
  155. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +0 -335
  156. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SVD/BDCSVD.h +0 -1366
  157. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SVD/JacobiSVD.h +0 -812
  158. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SVD/JacobiSVD_LAPACKE.h +0 -91
  159. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SVD/SVDBase.h +0 -376
  160. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SVD/UpperBidiagonalization.h +0 -414
  161. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +0 -697
  162. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +0 -174
  163. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/AmbiVector.h +0 -378
  164. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/CompressedStorage.h +0 -274
  165. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +0 -352
  166. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/MappedSparseMatrix.h +0 -67
  167. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseAssign.h +0 -270
  168. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseBlock.h +0 -571
  169. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseColEtree.h +0 -206
  170. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +0 -370
  171. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +0 -722
  172. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +0 -150
  173. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +0 -342
  174. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseDiagonalProduct.h +0 -138
  175. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseDot.h +0 -98
  176. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseFuzzy.h +0 -29
  177. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseMap.h +0 -305
  178. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseMatrix.h +0 -1518
  179. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +0 -398
  180. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparsePermutation.h +0 -178
  181. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseProduct.h +0 -181
  182. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseRedux.h +0 -49
  183. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseRef.h +0 -397
  184. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +0 -659
  185. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseSolverBase.h +0 -124
  186. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseSparseProductWithPruning.h +0 -198
  187. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseTranspose.h +0 -92
  188. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseTriangularView.h +0 -189
  189. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseUtil.h +0 -186
  190. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseVector.h +0 -478
  191. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseView.h +0 -254
  192. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/TriangularSolver.h +0 -315
  193. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU.h +0 -923
  194. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLUImpl.h +0 -66
  195. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +0 -226
  196. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_Structs.h +0 -110
  197. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +0 -375
  198. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_Utils.h +0 -80
  199. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_column_bmod.h +0 -181
  200. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +0 -179
  201. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h +0 -107
  202. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +0 -280
  203. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h +0 -126
  204. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_kernel_bmod.h +0 -130
  205. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +0 -223
  206. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_panel_dfs.h +0 -258
  207. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_pivotL.h +0 -137
  208. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_pruneL.h +0 -136
  209. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_relax_snode.h +0 -83
  210. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseQR/SparseQR.h +0 -758
  211. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/StlSupport/StdDeque.h +0 -116
  212. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/StlSupport/StdList.h +0 -106
  213. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/StlSupport/StdVector.h +0 -131
  214. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/StlSupport/details.h +0 -84
  215. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +0 -1025
  216. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +0 -642
  217. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/misc/Image.h +0 -82
  218. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/misc/Kernel.h +0 -79
  219. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/misc/RealSvd2x2.h +0 -55
  220. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/misc/blas.h +0 -440
  221. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/misc/lapack.h +0 -152
  222. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/misc/lapacke.h +0 -16292
  223. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/misc/lapacke_mangling.h +0 -17
  224. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +0 -358
  225. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +0 -696
  226. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/plugins/BlockMethods.h +0 -1442
  227. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.h +0 -115
  228. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.h +0 -177
  229. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/plugins/IndexedViewMethods.h +0 -262
  230. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +0 -152
  231. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +0 -95
  232. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/plugins/ReshapedMethods.h +0 -149
  233. sequenzo/dissimilarity_measures/src/eigen/blas/BandTriangularSolver.h +0 -97
  234. sequenzo/dissimilarity_measures/src/eigen/blas/GeneralRank1Update.h +0 -44
  235. sequenzo/dissimilarity_measures/src/eigen/blas/PackedSelfadjointProduct.h +0 -53
  236. sequenzo/dissimilarity_measures/src/eigen/blas/PackedTriangularMatrixVector.h +0 -79
  237. sequenzo/dissimilarity_measures/src/eigen/blas/PackedTriangularSolverVector.h +0 -88
  238. sequenzo/dissimilarity_measures/src/eigen/blas/Rank2Update.h +0 -57
  239. sequenzo/dissimilarity_measures/src/eigen/blas/common.h +0 -175
  240. sequenzo/dissimilarity_measures/src/eigen/blas/f2c/datatypes.h +0 -24
  241. sequenzo/dissimilarity_measures/src/eigen/blas/level1_cplx_impl.h +0 -155
  242. sequenzo/dissimilarity_measures/src/eigen/blas/level1_impl.h +0 -144
  243. sequenzo/dissimilarity_measures/src/eigen/blas/level1_real_impl.h +0 -122
  244. sequenzo/dissimilarity_measures/src/eigen/blas/level2_cplx_impl.h +0 -360
  245. sequenzo/dissimilarity_measures/src/eigen/blas/level2_impl.h +0 -553
  246. sequenzo/dissimilarity_measures/src/eigen/blas/level2_real_impl.h +0 -306
  247. sequenzo/dissimilarity_measures/src/eigen/blas/level3_impl.h +0 -702
  248. sequenzo/dissimilarity_measures/src/eigen/debug/gdb/__init__.py +0 -1
  249. sequenzo/dissimilarity_measures/src/eigen/debug/gdb/printers.py +0 -314
  250. sequenzo/dissimilarity_measures/src/eigen/lapack/lapack_common.h +0 -29
  251. sequenzo/dissimilarity_measures/src/eigen/scripts/relicense.py +0 -69
  252. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/Tensor.h +0 -554
  253. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorArgMax.h +0 -329
  254. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h +0 -247
  255. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h +0 -1176
  256. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h +0 -1559
  257. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h +0 -1093
  258. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h +0 -518
  259. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h +0 -377
  260. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h +0 -1023
  261. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionBlocking.h +0 -73
  262. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionCuda.h +0 -6
  263. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionGpu.h +0 -1413
  264. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionMapper.h +0 -575
  265. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionSycl.h +0 -1650
  266. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h +0 -1679
  267. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h +0 -456
  268. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h +0 -1132
  269. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorConvolutionSycl.h +0 -544
  270. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorCostModel.h +0 -214
  271. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h +0 -347
  272. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h +0 -137
  273. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h +0 -6
  274. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h +0 -104
  275. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceGpu.h +0 -389
  276. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h +0 -1048
  277. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h +0 -409
  278. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDimensionList.h +0 -236
  279. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h +0 -490
  280. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h +0 -236
  281. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h +0 -983
  282. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h +0 -703
  283. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h +0 -388
  284. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h +0 -669
  285. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorFixedSize.h +0 -379
  286. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h +0 -237
  287. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h +0 -191
  288. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h +0 -488
  289. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h +0 -302
  290. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorGlobalFunctions.h +0 -33
  291. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorGpuHipCudaDefines.h +0 -99
  292. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorGpuHipCudaUndefines.h +0 -44
  293. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h +0 -79
  294. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h +0 -603
  295. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h +0 -738
  296. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorInflation.h +0 -247
  297. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorInitializer.h +0 -82
  298. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h +0 -263
  299. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorLayoutSwap.h +0 -216
  300. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorMacros.h +0 -98
  301. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h +0 -327
  302. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h +0 -311
  303. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h +0 -1102
  304. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h +0 -708
  305. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorPatch.h +0 -291
  306. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorRandom.h +0 -322
  307. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h +0 -998
  308. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h +0 -6
  309. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorReductionGpu.h +0 -966
  310. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorReductionSycl.h +0 -582
  311. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorRef.h +0 -454
  312. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h +0 -465
  313. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h +0 -528
  314. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorScanSycl.h +0 -513
  315. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h +0 -471
  316. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorStorage.h +0 -161
  317. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorStriding.h +0 -346
  318. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorTrace.h +0 -303
  319. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h +0 -264
  320. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorUInt128.h +0 -249
  321. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorVolumePatch.h +0 -629
  322. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/TensorSymmetry/DynamicSymmetry.h +0 -293
  323. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/TensorSymmetry/StaticSymmetry.h +0 -236
  324. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/TensorSymmetry/Symmetry.h +0 -338
  325. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/TensorSymmetry/util/TemplateGroupTheory.h +0 -669
  326. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/ThreadPool/Barrier.h +0 -67
  327. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/ThreadPool/EventCount.h +0 -249
  328. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h +0 -486
  329. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/ThreadPool/RunQueue.h +0 -236
  330. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/ThreadPool/ThreadCancel.h +0 -23
  331. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/ThreadPool/ThreadEnvironment.h +0 -40
  332. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/ThreadPool/ThreadLocal.h +0 -301
  333. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/ThreadPool/ThreadPoolInterface.h +0 -48
  334. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/ThreadPool/ThreadYield.h +0 -20
  335. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/util/CXX11Meta.h +0 -537
  336. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/util/CXX11Workarounds.h +0 -88
  337. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/util/EmulateArray.h +0 -261
  338. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/util/MaxSizeVector.h +0 -158
  339. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/AutoDiff/AutoDiffJacobian.h +0 -108
  340. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h +0 -730
  341. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/AutoDiff/AutoDiffVector.h +0 -220
  342. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/BVH/BVAlgorithms.h +0 -293
  343. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/BVH/KdBVH.h +0 -223
  344. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/Eigenvalues/ArpackSelfAdjointEigenSolver.h +0 -790
  345. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/EulerAngles/EulerAngles.h +0 -355
  346. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/EulerAngles/EulerSystem.h +0 -305
  347. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/FFT/ei_fftw_impl.h +0 -261
  348. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/FFT/ei_kissfft_impl.h +0 -449
  349. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/IterativeSolvers/ConstrainedConjGrad.h +0 -187
  350. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/IterativeSolvers/DGMRES.h +0 -511
  351. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/IterativeSolvers/GMRES.h +0 -335
  352. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/IterativeSolvers/IDRS.h +0 -436
  353. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/IterativeSolvers/IncompleteLU.h +0 -90
  354. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/IterativeSolvers/IterationController.h +0 -154
  355. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/IterativeSolvers/MINRES.h +0 -267
  356. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/IterativeSolvers/Scaling.h +0 -193
  357. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/KroneckerProduct/KroneckerTensorProduct.h +0 -305
  358. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/LevenbergMarquardt/LMcovar.h +0 -84
  359. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/LevenbergMarquardt/LMonestep.h +0 -202
  360. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/LevenbergMarquardt/LMpar.h +0 -160
  361. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/LevenbergMarquardt/LMqrsolv.h +0 -188
  362. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/LevenbergMarquardt/LevenbergMarquardt.h +0 -396
  363. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/MatrixFunctions/MatrixExponential.h +0 -441
  364. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/MatrixFunctions/MatrixFunction.h +0 -569
  365. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/MatrixFunctions/MatrixLogarithm.h +0 -373
  366. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/MatrixFunctions/MatrixPower.h +0 -705
  367. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/MatrixFunctions/MatrixSquareRoot.h +0 -368
  368. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/MatrixFunctions/StemFunction.h +0 -117
  369. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/MoreVectorization/MathFunctions.h +0 -95
  370. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/NonLinearOptimization/HybridNonLinearSolver.h +0 -601
  371. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/NonLinearOptimization/LevenbergMarquardt.h +0 -657
  372. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/NonLinearOptimization/chkder.h +0 -66
  373. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/NonLinearOptimization/covar.h +0 -70
  374. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/NonLinearOptimization/dogleg.h +0 -107
  375. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/NonLinearOptimization/fdjac1.h +0 -79
  376. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/NonLinearOptimization/lmpar.h +0 -298
  377. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/NonLinearOptimization/qrsolv.h +0 -91
  378. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/NonLinearOptimization/r1mpyq.h +0 -30
  379. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/NonLinearOptimization/r1updt.h +0 -99
  380. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/NonLinearOptimization/rwupdt.h +0 -49
  381. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/NumericalDiff/NumericalDiff.h +0 -130
  382. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/Polynomials/Companion.h +0 -280
  383. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/Polynomials/PolynomialSolver.h +0 -428
  384. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/Polynomials/PolynomialUtils.h +0 -143
  385. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/Skyline/SkylineInplaceLU.h +0 -352
  386. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/Skyline/SkylineMatrix.h +0 -862
  387. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/Skyline/SkylineMatrixBase.h +0 -212
  388. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/Skyline/SkylineProduct.h +0 -295
  389. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/Skyline/SkylineStorage.h +0 -259
  390. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/Skyline/SkylineUtil.h +0 -89
  391. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SparseExtra/BlockOfDynamicSparseMatrix.h +0 -122
  392. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SparseExtra/BlockSparseMatrix.h +0 -1079
  393. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SparseExtra/DynamicSparseMatrix.h +0 -404
  394. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SparseExtra/MarketIO.h +0 -282
  395. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SparseExtra/MatrixMarketIterator.h +0 -247
  396. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SparseExtra/RandomSetter.h +0 -349
  397. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsArrayAPI.h +0 -286
  398. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsBFloat16.h +0 -68
  399. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsFunctors.h +0 -357
  400. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsHalf.h +0 -66
  401. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsImpl.h +0 -1959
  402. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsPacketMath.h +0 -118
  403. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/HipVectorCompatibility.h +0 -67
  404. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsArrayAPI.h +0 -167
  405. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsBFloat16.h +0 -58
  406. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsFunctors.h +0 -330
  407. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsHalf.h +0 -58
  408. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h +0 -2045
  409. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsPacketMath.h +0 -79
  410. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/arch/AVX/BesselFunctions.h +0 -46
  411. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/arch/AVX/SpecialFunctions.h +0 -16
  412. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/arch/AVX512/BesselFunctions.h +0 -46
  413. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/arch/AVX512/SpecialFunctions.h +0 -16
  414. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/arch/GPU/SpecialFunctions.h +0 -369
  415. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/arch/NEON/BesselFunctions.h +0 -54
  416. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/arch/NEON/SpecialFunctions.h +0 -34
  417. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/Splines/Spline.h +0 -507
  418. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/Splines/SplineFitting.h +0 -431
  419. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/Splines/SplineFwd.h +0 -93
  420. sequenzo-0.1.17.dist-info/RECORD +0 -537
  421. {sequenzo-0.1.17.dist-info → sequenzo-0.1.19.dist-info}/WHEEL +0 -0
  422. {sequenzo-0.1.17.dist-info → sequenzo-0.1.19.dist-info}/licenses/LICENSE +0 -0
  423. {sequenzo-0.1.17.dist-info → sequenzo-0.1.19.dist-info}/top_level.txt +0 -0
@@ -1,966 +0,0 @@
1
- // This file is part of Eigen, a lightweight C++ template library
2
- // for linear algebra.
3
- //
4
- // Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
5
- //
6
- // This Source Code Form is subject to the terms of the Mozilla
7
- // Public License v. 2.0. If a copy of the MPL was not distributed
8
- // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9
-
10
- #ifndef EIGEN_CXX11_TENSOR_TENSOR_REDUCTION_GPU_H
11
- #define EIGEN_CXX11_TENSOR_TENSOR_REDUCTION_GPU_H
12
-
13
- namespace Eigen {
14
- namespace internal {
15
-
16
-
17
- #if defined(EIGEN_USE_GPU) && defined(EIGEN_GPUCC)
18
- // Full reducers for GPU, don't vectorize for now
19
-
20
- // Reducer function that enables multiple gpu thread to safely accumulate at the same
21
- // output address. It basically reads the current value of the output variable, and
22
- // attempts to update it with the new value. If in the meantime another gpu thread
23
- // updated the content of the output address it will try again.
24
- template <typename T, typename R>
25
- __device__ EIGEN_ALWAYS_INLINE void atomicReduce(T* output, T accum, R& reducer) {
26
- #if (defined(EIGEN_HIP_DEVICE_COMPILE) && defined(__HIP_ARCH_HAS_WARP_SHUFFLE__)) || (EIGEN_CUDA_ARCH >= 300)
27
- if (sizeof(T) == 4)
28
- {
29
- unsigned int oldval = *reinterpret_cast<unsigned int*>(output);
30
- unsigned int newval = oldval;
31
- reducer.reduce(accum, reinterpret_cast<T*>(&newval));
32
- if (newval == oldval) {
33
- return;
34
- }
35
- unsigned int readback;
36
- while ((readback = atomicCAS((unsigned int*)output, oldval, newval)) != oldval) {
37
- oldval = readback;
38
- newval = oldval;
39
- reducer.reduce(accum, reinterpret_cast<T*>(&newval));
40
- if (newval == oldval) {
41
- return;
42
- }
43
- }
44
- }
45
- else if (sizeof(T) == 8) {
46
- unsigned long long oldval = *reinterpret_cast<unsigned long long*>(output);
47
- unsigned long long newval = oldval;
48
- reducer.reduce(accum, reinterpret_cast<T*>(&newval));
49
- if (newval == oldval) {
50
- return;
51
- }
52
- unsigned long long readback;
53
- while ((readback = atomicCAS((unsigned long long*)output, oldval, newval)) != oldval) {
54
- oldval = readback;
55
- newval = oldval;
56
- reducer.reduce(accum, reinterpret_cast<T*>(&newval));
57
- if (newval == oldval) {
58
- return;
59
- }
60
- }
61
- }
62
- else {
63
- gpu_assert(0 && "Wordsize not supported");
64
- }
65
- #else // EIGEN_CUDA_ARCH >= 300
66
- gpu_assert(0 && "Shouldn't be called on unsupported device");
67
- #endif // EIGEN_CUDA_ARCH >= 300
68
- }
69
-
70
- // We extend atomicExch to support extra data types
71
- template <typename Type>
72
- __device__ inline Type atomicExchCustom(Type* address, Type val) {
73
- return atomicExch(address, val);
74
- }
75
-
76
- template <>
77
- __device__ inline double atomicExchCustom(double* address, double val) {
78
- unsigned long long int* address_as_ull = reinterpret_cast<unsigned long long int*>(address);
79
- return __longlong_as_double(atomicExch(address_as_ull, __double_as_longlong(val)));
80
- }
81
-
82
- #ifdef EIGEN_HAS_GPU_FP16
83
- template <typename R>
84
- __device__ inline void atomicReduce(half2* output, half2 accum, R& reducer) {
85
- unsigned int oldval = *reinterpret_cast<unsigned int*>(output);
86
- unsigned int newval = oldval;
87
- reducer.reducePacket(accum, reinterpret_cast<half2*>(&newval));
88
- if (newval == oldval) {
89
- return;
90
- }
91
- unsigned int readback;
92
- while ((readback = atomicCAS((unsigned int*)output, oldval, newval)) != oldval) {
93
- oldval = readback;
94
- newval = oldval;
95
- reducer.reducePacket(accum, reinterpret_cast<half2*>(&newval));
96
- if (newval == oldval) {
97
- return;
98
- }
99
- }
100
- }
101
- // reduction should be associative since reduction is not atomic in wide vector but atomic in half2 operations
102
- template <typename R>
103
- __device__ inline void atomicReduce(Packet4h2* output, Packet4h2 accum, R& reducer) {
104
- half2* houtput=reinterpret_cast<half2*>(output);
105
- half2* haccum=reinterpret_cast<half2*>(&accum);
106
- for(int i=0;i<4;++i){
107
- atomicReduce(houtput+i,*(haccum+i),reducer);
108
- }
109
- }
110
- #endif // EIGEN_HAS_GPU_FP16
111
-
112
- template <>
113
- __device__ inline void atomicReduce(float* output, float accum, SumReducer<float>&) {
114
- #if (defined(EIGEN_HIP_DEVICE_COMPILE) && defined(__HIP_ARCH_HAS_WARP_SHUFFLE__)) || (EIGEN_CUDA_ARCH >= 300)
115
- atomicAdd(output, accum);
116
- #else // EIGEN_CUDA_ARCH >= 300
117
- gpu_assert(0 && "Shouldn't be called on unsupported device");
118
- #endif // EIGEN_CUDA_ARCH >= 300
119
- }
120
-
121
-
122
- template <typename CoeffType, typename Index>
123
- __global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void ReductionInitKernel(const CoeffType val, Index num_preserved_coeffs, CoeffType* output) {
124
- const Index thread_id = blockIdx.x * blockDim.x + threadIdx.x;
125
- const Index num_threads = blockDim.x * gridDim.x;
126
- for (Index i = thread_id; i < num_preserved_coeffs; i += num_threads) {
127
- output[i] = val;
128
- }
129
- }
130
-
131
-
132
- template <int BlockSize, int NumPerThread, typename Self,
133
- typename Reducer, typename Index>
134
- __global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void FullReductionKernel(Reducer reducer, const Self input, Index num_coeffs,
135
- typename Self::CoeffReturnType* output, unsigned int* semaphore) {
136
- #if (defined(EIGEN_HIP_DEVICE_COMPILE) && defined(__HIP_ARCH_HAS_WARP_SHUFFLE__)) || (EIGEN_CUDA_ARCH >= 300)
137
- // Initialize the output value
138
- const Index first_index = blockIdx.x * BlockSize * NumPerThread + threadIdx.x;
139
- if (gridDim.x == 1) {
140
- if (first_index == 0) {
141
- *output = reducer.initialize();
142
- }
143
- }
144
- else {
145
- if (threadIdx.x == 0) {
146
- unsigned int block = atomicCAS(semaphore, 0u, 1u);
147
- if (block == 0) {
148
- // We're the first block to run, initialize the output value
149
- atomicExchCustom(output, reducer.initialize());
150
- __threadfence();
151
- atomicExch(semaphore, 2u);
152
- }
153
- else {
154
- // Wait for the first block to initialize the output value.
155
- // Use atomicCAS here to ensure that the reads aren't cached
156
- unsigned int val;
157
- do {
158
- val = atomicCAS(semaphore, 2u, 2u);
159
- }
160
- while (val < 2u);
161
- }
162
- }
163
- }
164
-
165
- __syncthreads();
166
-
167
- eigen_assert(gridDim.x == 1 || *semaphore >= 2u);
168
-
169
- typename Self::CoeffReturnType accum = reducer.initialize();
170
- Index max_iter = numext::mini<Index>(num_coeffs - first_index, NumPerThread*BlockSize);
171
- for (Index i = 0; i < max_iter; i+=BlockSize) {
172
- const Index index = first_index + i;
173
- eigen_assert(index < num_coeffs);
174
- typename Self::CoeffReturnType val = input.m_impl.coeff(index);
175
- reducer.reduce(val, &accum);
176
- }
177
-
178
- #pragma unroll
179
- for (int offset = warpSize/2; offset > 0; offset /= 2) {
180
- #if defined(EIGEN_HIPCC)
181
- // use std::is_floating_point to determine the type of reduced_val
182
- // This is needed because when Type == double, hipcc will give a "call to __shfl_down is ambguous" error
183
- // and list the float and int versions of __shfl_down as the candidate functions.
184
- if (std::is_floating_point<typename Self::CoeffReturnType>::value) {
185
- reducer.reduce(__shfl_down(static_cast<float>(accum), offset, warpSize), &accum);
186
- } else {
187
- reducer.reduce(__shfl_down(static_cast<int>(accum), offset, warpSize), &accum);
188
- }
189
- #elif defined(EIGEN_CUDA_SDK_VER) && EIGEN_CUDA_SDK_VER < 90000
190
- reducer.reduce(__shfl_down(accum, offset, warpSize), &accum);
191
- #else
192
- reducer.reduce(__shfl_down_sync(0xFFFFFFFF, accum, offset, warpSize), &accum);
193
- #endif
194
- }
195
-
196
- if ((threadIdx.x & (warpSize - 1)) == 0) {
197
- atomicReduce(output, accum, reducer);
198
- }
199
-
200
- if (gridDim.x > 1 && threadIdx.x == 0) {
201
- // Let the last block reset the semaphore
202
- atomicInc(semaphore, gridDim.x + 1);
203
- #if defined(EIGEN_HIPCC)
204
- __threadfence_system();
205
- #endif
206
- }
207
- #else // EIGEN_CUDA_ARCH >= 300
208
- gpu_assert(0 && "Shouldn't be called on unsupported device");
209
- #endif // EIGEN_CUDA_ARCH >= 300
210
- }
211
-
212
-
213
- #ifdef EIGEN_HAS_GPU_FP16
214
- template <typename Self,
215
- typename Reducer, typename Index>
216
- __global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void ReductionInitFullReduxKernelHalfFloat(Reducer reducer, const Self input, Index num_coeffs,
217
- packet_traits<Eigen::half>::type* scratch) {
218
- eigen_assert(blockDim.x == 1);
219
- eigen_assert(gridDim.x == 1);
220
- typedef packet_traits<Eigen::half>::type packet_type;
221
- Index packet_remainder =
222
- num_coeffs % Index(unpacket_traits<packet_type>::size);
223
- if (packet_remainder != 0) {
224
- half2* h2scratch = reinterpret_cast<half2*>(scratch);
225
- for (Index i = num_coeffs - packet_remainder; i + 2 <= num_coeffs; i += 2) {
226
- *h2scratch =
227
- __halves2half2(input.m_impl.coeff(i), input.m_impl.coeff(i + 1));
228
- h2scratch++;
229
- }
230
- if ((num_coeffs & 1) != 0) {
231
- half lastCoeff = input.m_impl.coeff(num_coeffs - 1);
232
- *h2scratch = __halves2half2(lastCoeff, reducer.initialize());
233
- }
234
- } else {
235
- *scratch = reducer.template initializePacket<packet_type>();
236
- }
237
- }
238
-
239
- template <typename Self,
240
- typename Reducer, typename Index>
241
- __global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void ReductionInitKernelHalfFloat(Reducer reducer, const Self input, Index num_coeffs, half* output) {
242
- const Index thread_id = blockIdx.x * blockDim.x + threadIdx.x;
243
- const Index num_threads = blockDim.x * gridDim.x;
244
- typedef typename packet_traits<Eigen::half>::type PacketType;
245
-
246
- const Index num_packets =
247
- num_coeffs / Index(unpacket_traits<PacketType>::size);
248
- PacketType* p_output = reinterpret_cast<PacketType*>(output);
249
- for (Index i = thread_id; i < num_packets; i += num_threads) {
250
- p_output[i] = reducer.template initializePacket<PacketType>();
251
- }
252
- Index packet_remainder =
253
- num_coeffs % Index(unpacket_traits<PacketType>::size);
254
- if (thread_id < packet_remainder) {
255
- output[num_coeffs - packet_remainder + thread_id] = reducer.initialize();
256
- }
257
- }
258
-
259
- template <int BlockSize, int NumPerThread, typename Self,
260
- typename Reducer, typename Index>
261
- __global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void FullReductionKernelHalfFloat(Reducer reducer, const Self input, Index num_coeffs,
262
- half* output, packet_traits<Eigen::half>::type* scratch) {
263
- typedef typename packet_traits<Eigen::half>::type PacketType;
264
- const int packet_width = unpacket_traits<PacketType>::size;
265
- eigen_assert(NumPerThread % packet_width == 0);
266
- const Index first_index =
267
- blockIdx.x * BlockSize * NumPerThread + packet_width * threadIdx.x;
268
-
269
- // Initialize the output value if it wasn't initialized by the ReductionInitKernel
270
-
271
- if (gridDim.x == 1) {
272
- if (first_index == 0) {
273
- int rem = num_coeffs % packet_width;
274
- if (rem != 0) {
275
- half2* p_scratch = reinterpret_cast<half2*>(scratch);
276
- *scratch = reducer.template initializePacket<PacketType>();
277
- for (int i = 0; i < rem / 2; i++) {
278
- *p_scratch = __halves2half2(
279
- input.m_impl.coeff(num_coeffs - packet_width + 2 * i),
280
- input.m_impl.coeff(num_coeffs - packet_width + 2 * i + 1));
281
- p_scratch++;
282
- }
283
- if ((num_coeffs & 1) != 0) {
284
- half last = input.m_impl.coeff(num_coeffs - 1);
285
- *p_scratch = __halves2half2(last, reducer.initialize());
286
- }
287
- } else {
288
- *scratch = reducer.template initializePacket<PacketType>();
289
- }
290
- }
291
- __syncthreads();
292
- }
293
-
294
- PacketType accum = reducer.template initializePacket<PacketType>();
295
- const Index max_iter =
296
- numext::mini<Index>((num_coeffs - first_index) / packet_width,
297
- NumPerThread * BlockSize / packet_width);
298
- for (Index i = 0; i < max_iter; i += BlockSize) {
299
- const Index index = first_index + packet_width * i;
300
- eigen_assert(index + packet_width < num_coeffs);
301
- PacketType val = input.m_impl.template packet<Unaligned>(index);
302
- reducer.reducePacket(val, &accum);
303
- }
304
-
305
- #pragma unroll
306
- for (int offset = warpSize/2; offset > 0; offset /= 2) {
307
- #if defined(EIGEN_HIPCC)
308
- PacketType r1;
309
- half2* hr = reinterpret_cast<half2*>(&r1);
310
- half2* hacc = reinterpret_cast<half2*>(&accum);
311
- for (int i = 0; i < packet_width / 2; i++) {
312
- // FIXME : remove this workaround once we have native half/half2 support for __shfl_down
313
- union { int i; half2 h; } wka_in, wka_out;
314
- wka_in.h = hacc[i];
315
- wka_out.i = __shfl_down(wka_in.i, offset, warpSize);
316
- hr[i] = wka_out.h;
317
- }
318
- reducer.reducePacket(r1, &accum);
319
- #elif defined(EIGEN_CUDA_SDK_VER) && EIGEN_CUDA_SDK_VER < 90000
320
- PacketType r1;
321
- half2* hr = reinterpret_cast<half2*>(&r1);
322
- half2* hacc = reinterpret_cast<half2*>(&accum);
323
- for (int i = 0; i < packet_width / 2; i++) {
324
- hr[i] = __shfl_down(hacc[i], offset, warpSize);
325
- }
326
- reducer.reducePacket(r1, &accum);
327
- #else
328
- PacketType r1;
329
- half2* hr = reinterpret_cast<half2*>(&r1);
330
- half2* hacc = reinterpret_cast<half2*>(&accum);
331
- for (int i = 0; i < packet_width / 2; i++) {
332
- hr[i] = __shfl_down_sync(0xFFFFFFFF, hacc[i], (unsigned)offset, warpSize);
333
- }
334
- reducer.reducePacket(r1, &accum);
335
-
336
- #endif
337
- }
338
-
339
- if ((threadIdx.x & (warpSize - 1)) == 0) {
340
- atomicReduce(scratch, accum, reducer);
341
- }
342
-
343
- __syncthreads();
344
- half2* rv1 = reinterpret_cast<half2*>(scratch);
345
- if (packet_width > 2) {
346
- reducer.reducePacket(rv1[2], rv1);
347
- reducer.reducePacket(rv1[3], rv1 + 1);
348
- reducer.reducePacket(rv1[1], rv1);
349
- }
350
- if (gridDim.x == 1) {
351
- if (first_index == 0) {
352
- half tmp = __low2half(*rv1);
353
- reducer.reduce(__high2half(*rv1), &tmp);
354
- *output = tmp;
355
- }
356
- }
357
- }
358
-
359
- template <typename Op>
360
- __global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void ReductionCleanupKernelHalfFloat(Op reducer, half* output, packet_traits<Eigen::half>::type* scratch) {
361
- eigen_assert(threadIdx.x == 1);
362
- half2* pscratch = reinterpret_cast<half2*>(scratch);
363
- half tmp = __float2half(0.f);
364
- typedef packet_traits<Eigen::half>::type packet_type;
365
- for (int i = 0; i < unpacket_traits<packet_type>::size; i += 2) {
366
- reducer.reduce(__low2half(*pscratch), &tmp);
367
- reducer.reduce(__high2half(*pscratch), &tmp);
368
- pscratch++;
369
- }
370
- *output = tmp;
371
- }
372
-
373
- #endif // EIGEN_HAS_GPU_FP16
374
-
375
- template <typename Self, typename Op, typename OutputType, bool PacketAccess, typename Enabled = void>
376
- struct FullReductionLauncher {
377
- static void run(const Self&, Op&, const GpuDevice&, OutputType*, typename Self::Index) {
378
- gpu_assert(false && "Should only be called on doubles, floats and half floats");
379
- }
380
- };
381
-
382
- // Specialization for float and double
383
- template <typename Self, typename Op, typename OutputType, bool PacketAccess>
384
- struct FullReductionLauncher<
385
- Self, Op, OutputType, PacketAccess,
386
- typename internal::enable_if<
387
- internal::is_same<float, OutputType>::value ||
388
- internal::is_same<double, OutputType>::value,
389
- void>::type> {
390
- static void run(const Self& self, Op& reducer, const GpuDevice& device, OutputType* output, typename Self::Index num_coeffs) {
391
-
392
- typedef typename Self::Index Index;
393
- const int block_size = 256;
394
- const int num_per_thread = 128;
395
- const int num_blocks = divup<int>(num_coeffs, block_size * num_per_thread);
396
-
397
- unsigned int* semaphore = NULL;
398
- if (num_blocks > 1) {
399
- semaphore = device.semaphore();
400
- }
401
-
402
- LAUNCH_GPU_KERNEL((FullReductionKernel<block_size, num_per_thread, Self, Op, Index>),
403
- num_blocks, block_size, 0, device, reducer, self, num_coeffs, output, semaphore);
404
- }
405
- };
406
-
407
- #ifdef EIGEN_HAS_GPU_FP16
408
- template <typename Self, typename Op>
409
- struct FullReductionLauncher<Self, Op, Eigen::half, false> {
410
- static void run(const Self&, Op&, const GpuDevice&, half*, typename Self::Index) {
411
- gpu_assert(false && "Should not be called since there is no packet accessor");
412
- }
413
- };
414
-
415
- template <typename Self, typename Op>
416
- struct FullReductionLauncher<Self, Op, Eigen::half, true> {
417
- static void run(const Self& self, Op& reducer, const GpuDevice& device, half* output, typename Self::Index num_coeffs) {
418
- typedef typename Self::Index Index;
419
- typedef typename packet_traits<Eigen::half>::type PacketType;
420
-
421
- const int block_size = 256;
422
- const int num_per_thread = 128;
423
- const int num_blocks = divup<int>(num_coeffs, block_size * num_per_thread);
424
- PacketType* scratch = static_cast<PacketType*>(device.scratchpad());
425
- // half2* scratch = static_cast<half2*>(device.scratchpad());
426
-
427
- if (num_blocks > 1) {
428
- // We initialize the output and the scrathpad outside the reduction kernel when we can't be sure that there
429
- // won't be a race conditions between multiple thread blocks.
430
- LAUNCH_GPU_KERNEL((ReductionInitFullReduxKernelHalfFloat<Self, Op, Index>),
431
- 1, 1, 0, device, reducer, self, num_coeffs, scratch);
432
- }
433
-
434
- LAUNCH_GPU_KERNEL((FullReductionKernelHalfFloat<block_size, num_per_thread, Self, Op, Index>),
435
- num_blocks, block_size, 0, device, reducer, self, num_coeffs, output, scratch);
436
-
437
- if (num_blocks > 1) {
438
- LAUNCH_GPU_KERNEL((ReductionCleanupKernelHalfFloat<Op>),
439
- 1, 1, 0, device, reducer, output, scratch);
440
- }
441
- }
442
- };
443
- #endif // EIGEN_HAS_GPU_FP16
444
-
445
-
446
- template <typename Self, typename Op, bool Vectorizable>
447
- struct FullReducer<Self, Op, GpuDevice, Vectorizable> {
448
- // Unfortunately nvidia doesn't support well exotic types such as complex,
449
- // so reduce the scope of the optimized version of the code to the simple cases
450
- // of doubles, floats and half floats
451
- #ifdef EIGEN_HAS_GPU_FP16
452
- static const bool HasOptimizedImplementation = !Self::ReducerTraits::IsStateful &&
453
- (internal::is_same<typename Self::CoeffReturnType, float>::value ||
454
- internal::is_same<typename Self::CoeffReturnType, double>::value ||
455
- (internal::is_same<typename Self::CoeffReturnType, Eigen::half>::value && reducer_traits<Op, GpuDevice>::PacketAccess));
456
- #else // EIGEN_HAS_GPU_FP16
457
- static const bool HasOptimizedImplementation = !Self::ReducerTraits::IsStateful &&
458
- (internal::is_same<typename Self::CoeffReturnType, float>::value ||
459
- internal::is_same<typename Self::CoeffReturnType, double>::value);
460
- #endif // EIGEN_HAS_GPU_FP16
461
-
462
- template <typename OutputType>
463
- static void run(const Self& self, Op& reducer, const GpuDevice& device, OutputType* output) {
464
- gpu_assert(HasOptimizedImplementation && "Should only be called on doubles, floats or half floats");
465
- const Index num_coeffs = array_prod(self.m_impl.dimensions());
466
- // Don't crash when we're called with an input tensor of size 0.
467
- if (num_coeffs == 0) {
468
- return;
469
- }
470
-
471
- FullReductionLauncher<Self, Op, OutputType, reducer_traits<Op, GpuDevice>::PacketAccess>::run(self, reducer, device, output, num_coeffs);
472
- }
473
- };
474
-
475
-
476
- template <int NumPerThread, typename Self,
477
- typename Reducer, typename Index>
478
- __global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void InnerReductionKernel(Reducer reducer, const Self input, Index num_coeffs_to_reduce, Index num_preserved_coeffs,
479
- typename Self::CoeffReturnType* output) {
480
- #if (defined(EIGEN_HIP_DEVICE_COMPILE) && defined(__HIP_ARCH_HAS_WARP_SHUFFLE__)) || (EIGEN_CUDA_ARCH >= 300)
481
- typedef typename Self::CoeffReturnType Type;
482
- eigen_assert(blockDim.y == 1);
483
- eigen_assert(blockDim.z == 1);
484
- eigen_assert(gridDim.y == 1);
485
- eigen_assert(gridDim.z == 1);
486
-
487
- const int unroll_times = 16;
488
- eigen_assert(NumPerThread % unroll_times == 0);
489
-
490
- const Index input_col_blocks = divup<Index>(num_coeffs_to_reduce, blockDim.x * NumPerThread);
491
- const Index num_input_blocks = input_col_blocks * num_preserved_coeffs;
492
-
493
- const Index num_threads = blockDim.x * gridDim.x;
494
- const Index thread_id = blockIdx.x * blockDim.x + threadIdx.x;
495
-
496
- // Initialize the output values if they weren't initialized by the ReductionInitKernel
497
- if (gridDim.x == 1) {
498
- for (Index i = thread_id; i < num_preserved_coeffs; i += num_threads) {
499
- output[i] = reducer.initialize();
500
- }
501
- __syncthreads();
502
- }
503
-
504
- for (Index i = blockIdx.x; i < num_input_blocks; i += gridDim.x) {
505
- const Index row = i / input_col_blocks;
506
-
507
- if (row < num_preserved_coeffs) {
508
- const Index col_block = i % input_col_blocks;
509
- const Index col_begin = col_block * blockDim.x * NumPerThread + threadIdx.x;
510
-
511
- Type reduced_val = reducer.initialize();
512
-
513
- for (Index j = 0; j < NumPerThread; j += unroll_times) {
514
- const Index last_col = col_begin + blockDim.x * (j + unroll_times - 1);
515
- if (last_col >= num_coeffs_to_reduce) {
516
- for (Index col = col_begin + blockDim.x * j; col < num_coeffs_to_reduce; col += blockDim.x) {
517
- const Type val = input.m_impl.coeff(row * num_coeffs_to_reduce + col);
518
- reducer.reduce(val, &reduced_val);
519
- }
520
- break;
521
- } else {
522
- // Faster version of the loop with no branches after unrolling.
523
- #pragma unroll
524
- for (int k = 0; k < unroll_times; ++k) {
525
- const Index col = col_begin + blockDim.x * (j + k);
526
- reducer.reduce(input.m_impl.coeff(row * num_coeffs_to_reduce + col), &reduced_val);
527
- }
528
- }
529
- }
530
-
531
- #pragma unroll
532
- for (int offset = warpSize/2; offset > 0; offset /= 2) {
533
- #if defined(EIGEN_HIPCC)
534
- // use std::is_floating_point to determine the type of reduced_val
535
- // This is needed because when Type == double, hipcc will give a "call to __shfl_down is ambguous" error
536
- // and list the float and int versions of __shfl_down as the candidate functions.
537
- if (std::is_floating_point<Type>::value) {
538
- reducer.reduce(__shfl_down(static_cast<float>(reduced_val), offset), &reduced_val);
539
- } else {
540
- reducer.reduce(__shfl_down(static_cast<int>(reduced_val), offset), &reduced_val);
541
- }
542
- #elif defined(EIGEN_CUDA_SDK_VER) && EIGEN_CUDA_SDK_VER < 90000
543
- reducer.reduce(__shfl_down(reduced_val, offset), &reduced_val);
544
- #else
545
- reducer.reduce(__shfl_down_sync(0xFFFFFFFF, reduced_val, offset), &reduced_val);
546
- #endif
547
- }
548
-
549
- if ((threadIdx.x & (warpSize - 1)) == 0) {
550
- atomicReduce(&(output[row]), reduced_val, reducer);
551
- }
552
- }
553
- }
554
- #else // EIGEN_CUDA_ARCH >= 300
555
- gpu_assert(0 && "Shouldn't be called on unsupported device");
556
- #endif // EIGEN_CUDA_ARCH >= 300
557
- }
558
-
559
- #ifdef EIGEN_HAS_GPU_FP16
560
-
561
- template <int NumPerThread, typename Self,
562
- typename Reducer, typename Index>
563
- __global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void InnerReductionKernelHalfFloat(Reducer reducer, const Self input, Index num_coeffs_to_reduce, Index num_preserved_coeffs,
564
- half* output) {
565
- eigen_assert(blockDim.y == 1);
566
- eigen_assert(blockDim.z == 1);
567
- eigen_assert(gridDim.y == 1);
568
- eigen_assert(gridDim.z == 1);
569
-
570
- typedef typename packet_traits<Eigen::half>::type PacketType;
571
- const int packet_width = unpacket_traits<PacketType>::size;
572
- const int unroll_times = 16 / packet_width;
573
- eigen_assert(NumPerThread % unroll_times == 0);
574
- eigen_assert(unroll_times % 2 == 0);
575
-
576
- const Index input_col_blocks = divup<Index>(num_coeffs_to_reduce, blockDim.x * NumPerThread * 2);
577
- const Index num_input_blocks = divup<Index>(input_col_blocks * num_preserved_coeffs, 2);
578
-
579
- const Index num_threads = blockDim.x * gridDim.x;
580
- const Index thread_id = blockIdx.x * blockDim.x + threadIdx.x;
581
-
582
- // Initialize the output values if they weren't initialized by the ReductionInitKernel
583
- if (gridDim.x == 1) {
584
- Index i = packet_width * thread_id;
585
- for (; i + packet_width <= num_preserved_coeffs;
586
- i += packet_width * num_threads) {
587
- PacketType* poutput = reinterpret_cast<PacketType*>(output + i);
588
- *poutput = reducer.template initializePacket<PacketType>();
589
- }
590
- if (i < num_preserved_coeffs) {
591
- output[i] = reducer.initialize();
592
- }
593
- __syncthreads();
594
- }
595
-
596
- for (Index i = blockIdx.x; i < num_input_blocks; i += gridDim.x) {
597
- const Index row = 2 * (i / input_col_blocks); // everybody takes 2 rows
598
-
599
- if (row + 1 < num_preserved_coeffs) {
600
- const Index col_block = i % input_col_blocks;
601
- const Index col_begin =
602
- packet_width * (col_block * blockDim.x * NumPerThread + threadIdx.x);
603
-
604
- PacketType reduced_val1 = reducer.template initializePacket<PacketType>();
605
- PacketType reduced_val2 = reducer.template initializePacket<PacketType>();
606
-
607
- for (Index j = 0; j < NumPerThread; j += unroll_times) {
608
- const Index last_col =
609
- col_begin + blockDim.x * (j + unroll_times - 1) * packet_width;
610
- if (last_col >= num_coeffs_to_reduce) {
611
- Index col = col_begin + blockDim.x * j;
612
- for (; col + packet_width <= num_coeffs_to_reduce;
613
- col += blockDim.x) {
614
- const PacketType val1 = input.m_impl.template packet<Unaligned>(
615
- row * num_coeffs_to_reduce + col);
616
- reducer.reducePacket(val1, &reduced_val1);
617
- const PacketType val2 = input.m_impl.template packet<Unaligned>(
618
- (row + 1) * num_coeffs_to_reduce + col);
619
- reducer.reducePacket(val2, &reduced_val2);
620
- }
621
- if (col < num_coeffs_to_reduce) {
622
- PacketType r1 = reducer.template initializePacket<PacketType>();
623
- PacketType r2 = reducer.template initializePacket<PacketType>();
624
- half2* hr1 = reinterpret_cast<half2*>(&r1);
625
- half2* hr2 = reinterpret_cast<half2*>(&r2);
626
- while (col + 1 < num_coeffs_to_reduce) {
627
- *hr1 = __halves2half2(
628
- input.m_impl.coeff(row * num_coeffs_to_reduce + col),
629
- input.m_impl.coeff(row * num_coeffs_to_reduce + col + 1));
630
- *hr2 = __halves2half2(
631
- input.m_impl.coeff((row + 1) * num_coeffs_to_reduce + col),
632
- input.m_impl.coeff((row + 1) * num_coeffs_to_reduce + col +
633
- 1));
634
- hr1++;
635
- hr2++;
636
- col += 2;
637
- }
638
- if (col < num_coeffs_to_reduce) {
639
- // Peel;
640
- const half last1 =
641
- input.m_impl.coeff(row * num_coeffs_to_reduce + col);
642
- *hr1 = __halves2half2(last1, reducer.initialize());
643
- const half last2 =
644
- input.m_impl.coeff((row + 1) * num_coeffs_to_reduce + col);
645
- *hr2 = __halves2half2(last2, reducer.initialize());
646
- }
647
- reducer.reducePacket(r1, &reduced_val1);
648
- reducer.reducePacket(r2, &reduced_val2);
649
- }
650
- break;
651
- } else {
652
- // Faster version of the loop with no branches after unrolling.
653
- #pragma unroll
654
- for (int k = 0; k < unroll_times; ++k) {
655
- const Index col = col_begin + blockDim.x * (j + k) * packet_width;
656
- reducer.reducePacket(input.m_impl.template packet<Unaligned>(
657
- row * num_coeffs_to_reduce + col),
658
- &reduced_val1);
659
- reducer.reducePacket(input.m_impl.template packet<Unaligned>(
660
- (row + 1) * num_coeffs_to_reduce + col),
661
- &reduced_val2);
662
- }
663
- }
664
- }
665
-
666
- #pragma unroll
667
- for (int offset = warpSize/2; offset > 0; offset /= 2) {
668
- #if defined(EIGEN_HIPCC)
669
- PacketType r1;
670
- PacketType r2;
671
- half2* hr1 = reinterpret_cast<half2*>(&r1);
672
- half2* hr2 = reinterpret_cast<half2*>(&r2);
673
- half2* rv1 = reinterpret_cast<half2*>(&reduced_val1);
674
- half2* rv2 = reinterpret_cast<half2*>(&reduced_val2);
675
- for (int i = 0; i < packet_width / 2; i++) {
676
- // FIXME : remove this workaround once we have native half/half2 support for __shfl_down
677
- union { int i; half2 h; } wka_in1, wka_out1;
678
- wka_in1.h = rv1[i];
679
- wka_out1.i = __shfl_down(wka_in1.i, offset, warpSize);
680
- hr1[i] = wka_out1.h;
681
-
682
- union { int i; half2 h; } wka_in2, wka_out2;
683
- wka_in2.h = rv2[i];
684
- wka_out2.i = __shfl_down(wka_in2.i, offset, warpSize);
685
- hr2[i] = wka_out2.h;
686
- }
687
- reducer.reducePacket(r1, &reduced_val1);
688
- reducer.reducePacket(r2, &reduced_val2);
689
- #elif defined(EIGEN_CUDA_SDK_VER) && EIGEN_CUDA_SDK_VER < 90000
690
- PacketType r1;
691
- PacketType r2;
692
- half2* hr1 = reinterpret_cast<half2*>(&r1);
693
- half2* hr2 = reinterpret_cast<half2*>(&r2);
694
- half2* rv1 = reinterpret_cast<half2*>(&reduced_val1);
695
- half2* rv2 = reinterpret_cast<half2*>(&reduced_val2);
696
- for (int i = 0; i < packet_width / 2; i++) {
697
- hr1[i] = __shfl_down(rv1[i], offset, warpSize);
698
- hr2[i] = __shfl_down(rv2[i], offset, warpSize);
699
- }
700
- reducer.reducePacket(r1, &reduced_val1);
701
- reducer.reducePacket(r2, &reduced_val2);
702
- #else
703
- PacketType r1;
704
- PacketType r2;
705
- half2* hr1 = reinterpret_cast<half2*>(&r1);
706
- half2* hr2 = reinterpret_cast<half2*>(&r2);
707
- half2* rr1 = reinterpret_cast<half2*>(&reduced_val1);
708
- half2* rr2 = reinterpret_cast<half2*>(&reduced_val2);
709
- for (int i = 0; i < packet_width / 2; i++) {
710
- hr1[i] =
711
- __shfl_down_sync(0xFFFFFFFF, rr1[i], (unsigned)offset, warpSize);
712
- hr2[i] =
713
- __shfl_down_sync(0xFFFFFFFF, rr2[i], (unsigned)offset, warpSize);
714
- }
715
- reducer.reducePacket(r1, &reduced_val1);
716
- reducer.reducePacket(r2, &reduced_val2);
717
-
718
- #endif
719
- }
720
- half2* rv1 = reinterpret_cast<half2*>(&reduced_val1);
721
- half2* rv2 = reinterpret_cast<half2*>(&reduced_val2);
722
- half2 val;
723
- if (packet_width > 2) {
724
- reducer.reducePacket(rv1[2], rv1);
725
- reducer.reducePacket(rv1[3], rv1 + 1);
726
- reducer.reducePacket(rv1[1], rv1);
727
- reducer.reducePacket(rv2[2], rv2);
728
- reducer.reducePacket(rv2[3], rv2 + 1);
729
- reducer.reducePacket(rv2[1], rv2);
730
- }
731
- half val1 = __low2half(*rv1);
732
- reducer.reduce(__high2half(*rv1), &val1);
733
- half val2 = __low2half(*rv2);
734
- reducer.reduce(__high2half(*rv2), &val2);
735
- val = __halves2half2(val1, val2);
736
- if ((threadIdx.x & (warpSize - 1)) == 0) {
737
- half* loc = output + row;
738
- atomicReduce((half2*)loc, val, reducer);
739
- }
740
- }
741
- }
742
- }
743
-
744
- #endif // EIGEN_HAS_GPU_FP16
745
-
746
- template <typename Self, typename Op, typename OutputType, bool PacketAccess, typename Enabled = void>
747
- struct InnerReductionLauncher {
748
- static EIGEN_DEVICE_FUNC bool run(const Self&, Op&, const GpuDevice&, OutputType*, typename Self::Index, typename Self::Index) {
749
- gpu_assert(false && "Should only be called to reduce doubles, floats and half floats on a gpu device");
750
- return true;
751
- }
752
- };
753
-
754
- // Specialization for float and double
755
- template <typename Self, typename Op, typename OutputType, bool PacketAccess>
756
- struct InnerReductionLauncher<
757
- Self, Op, OutputType, PacketAccess,
758
- typename internal::enable_if<
759
- internal::is_same<float, OutputType>::value ||
760
- internal::is_same<double, OutputType>::value,
761
- void>::type> {
762
- static bool run(const Self& self, Op& reducer, const GpuDevice& device, OutputType* output, typename Self::Index num_coeffs_to_reduce, typename Self::Index num_preserved_vals) {
763
- typedef typename Self::Index Index;
764
-
765
- const Index num_coeffs = num_coeffs_to_reduce * num_preserved_vals;
766
- const int block_size = 256;
767
- const int num_per_thread = 128;
768
- const int dyn_blocks = divup<int>(num_coeffs, block_size * num_per_thread);
769
- const int max_blocks = device.getNumGpuMultiProcessors() *
770
- device.maxGpuThreadsPerMultiProcessor() / block_size;
771
- const int num_blocks = numext::mini<int>(max_blocks, dyn_blocks);
772
-
773
- if (num_blocks > 1) {
774
- // We initialize the outputs outside the reduction kernel when we can't be sure that there
775
- // won't be a race conditions between multiple thread blocks.
776
- const int dyn_blocks = divup<int>(num_preserved_vals, 1024);
777
- const int max_blocks = device.getNumGpuMultiProcessors() *
778
- device.maxGpuThreadsPerMultiProcessor() / 1024;
779
- const int num_blocks = numext::mini<int>(max_blocks, dyn_blocks);
780
- LAUNCH_GPU_KERNEL((ReductionInitKernel<OutputType, Index>),
781
- num_blocks, 1024, 0, device, reducer.initialize(),
782
- num_preserved_vals, output);
783
- }
784
-
785
- LAUNCH_GPU_KERNEL((InnerReductionKernel<num_per_thread, Self, Op, Index>),
786
- num_blocks, block_size, 0, device, reducer, self, num_coeffs_to_reduce, num_preserved_vals, output);
787
-
788
- return false;
789
- }
790
- };
791
-
792
- #ifdef EIGEN_HAS_GPU_FP16
793
- template <typename Self, typename Op>
794
- struct InnerReductionLauncher<Self, Op, Eigen::half, false> {
795
- static bool run(const Self&, Op&, const GpuDevice&, half*, typename Self::Index, typename Self::Index) {
796
- gpu_assert(false && "Should not be called since there is no packet accessor");
797
- return true;
798
- }
799
- };
800
-
801
- template <typename Self, typename Op>
802
- struct InnerReductionLauncher<Self, Op, Eigen::half, true> {
803
- static bool run(const Self& self, Op& reducer, const GpuDevice& device, half* output, typename Self::Index num_coeffs_to_reduce, typename Self::Index num_preserved_vals) {
804
- typedef typename Self::Index Index;
805
-
806
- if (num_preserved_vals % 2 != 0) {
807
- // Not supported yet, revert to the slower code path
808
- return true;
809
- }
810
-
811
- const Index num_coeffs = num_coeffs_to_reduce * num_preserved_vals;
812
- const int block_size = /*256*/128;
813
- const int num_per_thread = /*128*/64;
814
- const int dyn_blocks = divup<int>(num_coeffs, block_size * num_per_thread);
815
- const int max_blocks = device.getNumGpuMultiProcessors() *
816
- device.maxGpuThreadsPerMultiProcessor() / block_size;
817
- const int num_blocks = numext::mini<int>(max_blocks, dyn_blocks);
818
-
819
- if (num_blocks > 1) {
820
- // We initialize the outputs outside the reduction kernel when we can't be sure that there
821
- // won't be a race conditions between multiple thread blocks.
822
- LAUNCH_GPU_KERNEL((ReductionInitKernelHalfFloat<Self, Op, Index>),
823
- 1, 1, 0, device, reducer, self, num_preserved_vals, output);
824
- }
825
-
826
- LAUNCH_GPU_KERNEL((InnerReductionKernelHalfFloat<num_per_thread, Self, Op, Index>),
827
- num_blocks, block_size, 0, device, reducer, self, num_coeffs_to_reduce, num_preserved_vals, output);
828
-
829
- return false;
830
- }
831
- };
832
- #endif // EIGEN_HAS_GPU_FP16
833
-
834
-
835
- template <typename Self, typename Op>
836
- struct InnerReducer<Self, Op, GpuDevice> {
837
- // Unfortunately nvidia doesn't support well exotic types such as complex,
838
- // so reduce the scope of the optimized version of the code to the simple case
839
- // of floats and half floats.
840
- #ifdef EIGEN_HAS_GPU_FP16
841
- static const bool HasOptimizedImplementation = !Self::ReducerTraits::IsStateful &&
842
- (internal::is_same<typename Self::CoeffReturnType, float>::value ||
843
- internal::is_same<typename Self::CoeffReturnType, double>::value ||
844
- (internal::is_same<typename Self::CoeffReturnType, Eigen::half>::value && reducer_traits<Op, GpuDevice>::PacketAccess));
845
- #else // EIGEN_HAS_GPU_FP16
846
- static const bool HasOptimizedImplementation = !Self::ReducerTraits::IsStateful &&
847
- (internal::is_same<typename Self::CoeffReturnType, float>::value ||
848
- internal::is_same<typename Self::CoeffReturnType, double>::value);
849
- #endif // EIGEN_HAS_GPU_FP16
850
-
851
- template <typename OutputType>
852
- static bool run(const Self& self, Op& reducer, const GpuDevice& device, OutputType* output, typename Self::Index num_coeffs_to_reduce, typename Self::Index num_preserved_vals) {
853
- gpu_assert(HasOptimizedImplementation && "Should only be called on doubles, floats or half floats");
854
- const Index num_coeffs = array_prod(self.m_impl.dimensions());
855
- // Don't crash when we're called with an input tensor of size 0.
856
- if (num_coeffs == 0) {
857
- return true;
858
- }
859
- // It's faster to use the usual code.
860
- if (num_coeffs_to_reduce <= 128) {
861
- return true;
862
- }
863
-
864
- return InnerReductionLauncher<Self, Op, OutputType, reducer_traits<Op, GpuDevice>::PacketAccess>::run(self, reducer, device, output, num_coeffs_to_reduce, num_preserved_vals);
865
- }
866
- };
867
-
868
- template <int NumPerThread, typename Self,
869
- typename Reducer, typename Index>
870
- __global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void OuterReductionKernel(Reducer reducer, const Self input, Index num_coeffs_to_reduce, Index num_preserved_coeffs,
871
- typename Self::CoeffReturnType* output) {
872
- const Index num_threads = blockDim.x * gridDim.x;
873
- const Index thread_id = blockIdx.x * blockDim.x + threadIdx.x;
874
- // Initialize the output values if they weren't initialized by the ReductionInitKernel
875
- if (gridDim.x == 1) {
876
- for (Index i = thread_id; i < num_preserved_coeffs; i += num_threads) {
877
- output[i] = reducer.initialize();
878
- }
879
- __syncthreads();
880
- }
881
-
882
- // Do the reduction.
883
- const Index max_iter = num_preserved_coeffs * divup<Index>(num_coeffs_to_reduce, NumPerThread);
884
- for (Index i = thread_id; i < max_iter; i += num_threads) {
885
- const Index input_col = i % num_preserved_coeffs;
886
- const Index input_row = (i / num_preserved_coeffs) * NumPerThread;
887
- typename Self::CoeffReturnType reduced_val = reducer.initialize();
888
- const Index max_row = numext::mini(input_row + NumPerThread, num_coeffs_to_reduce);
889
- for (Index j = input_row; j < max_row; j++) {
890
- typename Self::CoeffReturnType val = input.m_impl.coeff(j * num_preserved_coeffs + input_col);
891
- reducer.reduce(val, &reduced_val);
892
- }
893
- atomicReduce(&(output[input_col]), reduced_val, reducer);
894
- }
895
- }
896
-
897
-
898
- template <typename Self, typename Op>
899
- struct OuterReducer<Self, Op, GpuDevice> {
900
- // Unfortunately nvidia doesn't support well exotic types such as complex,
901
- // so reduce the scope of the optimized version of the code to the simple case
902
- // of floats.
903
- static const bool HasOptimizedImplementation = !Self::ReducerTraits::IsStateful &&
904
- (internal::is_same<typename Self::CoeffReturnType, float>::value ||
905
- internal::is_same<typename Self::CoeffReturnType, double>::value);
906
- template <typename Device, typename OutputType>
907
- static
908
- #if !defined(EIGEN_HIPCC)
909
- // FIXME : leaving this EIGEN_DEVICE_FUNC in, results in the following runtime error
910
- // (in the cxx11_tensor_reduction_gpu test)
911
- //
912
- // terminate called after throwing an instance of 'std::runtime_error'
913
- // what(): No device code available for function: _ZN5Eigen8internal20OuterReductionKernelIL...
914
- //
915
- // don't know why this happens (and why is it a runtime error instead of a compile time error)
916
- //
917
- // this will be fixed by HIP PR#457
918
- EIGEN_DEVICE_FUNC
919
- #endif
920
- bool run(const Self&, Op&, const Device&, OutputType*, typename Self::Index, typename Self::Index) {
921
- gpu_assert(false && "Should only be called to reduce doubles or floats on a gpu device");
922
- return true;
923
- }
924
-
925
- static bool run(const Self& self, Op& reducer, const GpuDevice& device, float* output, typename Self::Index num_coeffs_to_reduce, typename Self::Index num_preserved_vals) {
926
- typedef typename Self::Index Index;
927
-
928
- // It's faster to use the usual code.
929
- if (num_coeffs_to_reduce <= 32) {
930
- return true;
931
- }
932
-
933
- const Index num_coeffs = num_coeffs_to_reduce * num_preserved_vals;
934
- const int block_size = 256;
935
- const int num_per_thread = 16;
936
- const int dyn_blocks = divup<int>(num_coeffs, block_size * num_per_thread);
937
- const int max_blocks = device.getNumGpuMultiProcessors() *
938
- device.maxGpuThreadsPerMultiProcessor() / block_size;
939
- const int num_blocks = numext::mini<int>(max_blocks, dyn_blocks);
940
-
941
- if (num_blocks > 1) {
942
- // We initialize the outputs in the reduction kernel itself when we don't have to worry
943
- // about race conditions between multiple thread blocks.
944
- const int dyn_blocks = divup<int>(num_preserved_vals, 1024);
945
- const int max_blocks = device.getNumGpuMultiProcessors() *
946
- device.maxGpuThreadsPerMultiProcessor() / 1024;
947
- const int num_blocks = numext::mini<int>(max_blocks, dyn_blocks);
948
- LAUNCH_GPU_KERNEL((ReductionInitKernel<float, Index>),
949
- num_blocks, 1024, 0, device, reducer.initialize(),
950
- num_preserved_vals, output);
951
- }
952
-
953
- LAUNCH_GPU_KERNEL((OuterReductionKernel<num_per_thread, Self, Op, Index>),
954
- num_blocks, block_size, 0, device, reducer, self, num_coeffs_to_reduce, num_preserved_vals, output);
955
-
956
- return false;
957
- }
958
- };
959
-
960
- #endif // defined(EIGEN_USE_GPU) && defined(EIGEN_GPUCC)
961
-
962
-
963
- } // end namespace internal
964
- } // end namespace Eigen
965
-
966
- #endif // EIGEN_CXX11_TENSOR_TENSOR_REDUCTION_GPU_H