sequenzo 0.1.17__cp311-cp311-macosx_10_9_universal2.whl → 0.1.19__cp311-cp311-macosx_10_9_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sequenzo might be problematic. Click here for more details.

Files changed (423) hide show
  1. sequenzo/__init__.py +64 -8
  2. sequenzo/big_data/clara/clara.py +1 -1
  3. sequenzo/big_data/clara/utils/get_weighted_diss.c +155 -155
  4. sequenzo/big_data/clara/utils/get_weighted_diss.cpython-311-darwin.so +0 -0
  5. sequenzo/clustering/KMedoids.py +39 -0
  6. sequenzo/clustering/hierarchical_clustering.py +304 -8
  7. sequenzo/define_sequence_data.py +44 -3
  8. sequenzo/dissimilarity_measures/c_code.cpython-311-darwin.so +0 -0
  9. sequenzo/dissimilarity_measures/get_distance_matrix.py +1 -2
  10. sequenzo/dissimilarity_measures/get_substitution_cost_matrix.py +1 -1
  11. sequenzo/dissimilarity_measures/src/DHDdistance.cpp +13 -37
  12. sequenzo/dissimilarity_measures/src/LCPdistance.cpp +13 -37
  13. sequenzo/dissimilarity_measures/src/OMdistance.cpp +12 -47
  14. sequenzo/dissimilarity_measures/src/OMspellDistance.cpp +103 -67
  15. sequenzo/dissimilarity_measures/src/dp_utils.h +160 -0
  16. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_arithmetic.hpp +41 -16
  17. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_complex.hpp +4 -0
  18. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_details.hpp +7 -0
  19. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_logical.hpp +10 -0
  20. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_math.hpp +127 -43
  21. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_memory.hpp +30 -2
  22. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_swizzle.hpp +174 -0
  23. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_trigo.hpp +14 -5
  24. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx.hpp +111 -54
  25. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx2.hpp +131 -9
  26. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512bw.hpp +11 -113
  27. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512dq.hpp +39 -7
  28. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512f.hpp +336 -30
  29. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vbmi.hpp +9 -37
  30. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vbmi2.hpp +58 -0
  31. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_common.hpp +1 -0
  32. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_common_fwd.hpp +35 -2
  33. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_constants.hpp +3 -1
  34. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_emulated.hpp +17 -0
  35. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma3_avx.hpp +13 -0
  36. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma3_sse.hpp +18 -0
  37. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma4.hpp +13 -0
  38. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_isa.hpp +8 -0
  39. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_neon.hpp +363 -34
  40. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_neon64.hpp +7 -0
  41. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_rvv.hpp +13 -0
  42. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_scalar.hpp +41 -4
  43. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse2.hpp +252 -16
  44. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse3.hpp +9 -0
  45. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_ssse3.hpp +12 -1
  46. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sve.hpp +7 -0
  47. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_vsx.hpp +892 -0
  48. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_wasm.hpp +78 -1
  49. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_arch.hpp +3 -1
  50. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_config.hpp +13 -2
  51. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_cpuid.hpp +5 -0
  52. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_inline.hpp +5 -1
  53. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_all_registers.hpp +2 -0
  54. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_api.hpp +64 -1
  55. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_batch.hpp +36 -0
  56. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_rvv_register.hpp +40 -31
  57. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_traits.hpp +8 -0
  58. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_vsx_register.hpp +77 -0
  59. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/xsimd.hpp +6 -0
  60. sequenzo/dissimilarity_measures/utils/get_sm_trate_substitution_cost_matrix.c +155 -155
  61. sequenzo/dissimilarity_measures/utils/get_sm_trate_substitution_cost_matrix.cpython-311-darwin.so +0 -0
  62. sequenzo/dissimilarity_measures/utils/seqconc.c +155 -155
  63. sequenzo/dissimilarity_measures/utils/seqconc.cpython-311-darwin.so +0 -0
  64. sequenzo/dissimilarity_measures/utils/seqdss.c +155 -155
  65. sequenzo/dissimilarity_measures/utils/seqdss.cpython-311-darwin.so +0 -0
  66. sequenzo/dissimilarity_measures/utils/seqdur.c +155 -155
  67. sequenzo/dissimilarity_measures/utils/seqdur.cpython-311-darwin.so +0 -0
  68. sequenzo/dissimilarity_measures/utils/seqlength.c +155 -155
  69. sequenzo/dissimilarity_measures/utils/seqlength.cpython-311-darwin.so +0 -0
  70. sequenzo/multidomain/cat.py +0 -53
  71. sequenzo/multidomain/idcd.py +0 -1
  72. sequenzo/openmp_setup.py +233 -0
  73. sequenzo/sequence_characteristics/__init__.py +4 -0
  74. sequenzo/sequence_characteristics/complexity_index.py +17 -57
  75. sequenzo/sequence_characteristics/overall_cross_sectional_entropy.py +177 -111
  76. sequenzo/sequence_characteristics/plot_characteristics.py +30 -11
  77. sequenzo/sequence_characteristics/simple_characteristics.py +1 -0
  78. sequenzo/sequence_characteristics/state_frequencies_and_entropy_per_sequence.py +9 -3
  79. sequenzo/sequence_characteristics/turbulence.py +47 -67
  80. sequenzo/sequence_characteristics/variance_of_spell_durations.py +19 -9
  81. sequenzo/sequence_characteristics/within_sequence_entropy.py +5 -58
  82. sequenzo/visualization/plot_sequence_index.py +58 -35
  83. sequenzo/visualization/plot_state_distribution.py +57 -36
  84. sequenzo/visualization/plot_transition_matrix.py +21 -22
  85. sequenzo/with_event_history_analysis/__init__.py +35 -0
  86. sequenzo/with_event_history_analysis/sequence_analysis_multi_state_model.py +850 -0
  87. sequenzo/with_event_history_analysis/sequence_history_analysis.py +283 -0
  88. {sequenzo-0.1.17.dist-info → sequenzo-0.1.19.dist-info}/METADATA +48 -14
  89. sequenzo-0.1.19.dist-info/RECORD +215 -0
  90. sequenzo/dissimilarity_measures/setup.py +0 -35
  91. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Cholesky/LDLT.h +0 -688
  92. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Cholesky/LLT.h +0 -558
  93. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Cholesky/LLT_LAPACKE.h +0 -99
  94. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +0 -682
  95. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +0 -346
  96. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +0 -462
  97. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/ComplexSchur_LAPACKE.h +0 -91
  98. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/EigenSolver.h +0 -622
  99. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h +0 -418
  100. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +0 -226
  101. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +0 -374
  102. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +0 -158
  103. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/RealQZ.h +0 -657
  104. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/RealSchur.h +0 -558
  105. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/RealSchur_LAPACKE.h +0 -77
  106. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +0 -904
  107. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h +0 -87
  108. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +0 -561
  109. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/AlignedBox.h +0 -486
  110. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/AngleAxis.h +0 -247
  111. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/EulerAngles.h +0 -114
  112. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/Homogeneous.h +0 -501
  113. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/Hyperplane.h +0 -282
  114. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/OrthoMethods.h +0 -235
  115. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/ParametrizedLine.h +0 -232
  116. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/Quaternion.h +0 -870
  117. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/Rotation2D.h +0 -199
  118. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/RotationBase.h +0 -206
  119. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/Scaling.h +0 -188
  120. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/Transform.h +0 -1563
  121. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/Translation.h +0 -202
  122. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/Umeyama.h +0 -166
  123. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/arch/Geometry_SIMD.h +0 -168
  124. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Householder/BlockHouseholder.h +0 -110
  125. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Householder/Householder.h +0 -176
  126. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Householder/HouseholderSequence.h +0 -545
  127. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +0 -226
  128. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +0 -212
  129. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +0 -229
  130. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +0 -394
  131. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +0 -453
  132. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +0 -444
  133. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +0 -198
  134. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +0 -117
  135. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Jacobi/Jacobi.h +0 -483
  136. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/KLUSupport/KLUSupport.h +0 -358
  137. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/LU/Determinant.h +0 -117
  138. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/LU/FullPivLU.h +0 -877
  139. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/LU/InverseImpl.h +0 -432
  140. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/LU/PartialPivLU.h +0 -624
  141. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/LU/PartialPivLU_LAPACKE.h +0 -83
  142. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/LU/arch/InverseSize4.h +0 -351
  143. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/MetisSupport/MetisSupport.h +0 -137
  144. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/OrderingMethods/Amd.h +0 -435
  145. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +0 -1863
  146. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/OrderingMethods/Ordering.h +0 -153
  147. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +0 -678
  148. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +0 -545
  149. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/QR/ColPivHouseholderQR.h +0 -674
  150. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/QR/ColPivHouseholderQR_LAPACKE.h +0 -97
  151. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +0 -635
  152. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/QR/FullPivHouseholderQR.h +0 -713
  153. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/QR/HouseholderQR.h +0 -434
  154. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/QR/HouseholderQR_LAPACKE.h +0 -68
  155. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +0 -335
  156. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SVD/BDCSVD.h +0 -1366
  157. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SVD/JacobiSVD.h +0 -812
  158. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SVD/JacobiSVD_LAPACKE.h +0 -91
  159. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SVD/SVDBase.h +0 -376
  160. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SVD/UpperBidiagonalization.h +0 -414
  161. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +0 -697
  162. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +0 -174
  163. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/AmbiVector.h +0 -378
  164. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/CompressedStorage.h +0 -274
  165. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +0 -352
  166. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/MappedSparseMatrix.h +0 -67
  167. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseAssign.h +0 -270
  168. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseBlock.h +0 -571
  169. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseColEtree.h +0 -206
  170. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +0 -370
  171. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +0 -722
  172. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +0 -150
  173. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +0 -342
  174. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseDiagonalProduct.h +0 -138
  175. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseDot.h +0 -98
  176. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseFuzzy.h +0 -29
  177. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseMap.h +0 -305
  178. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseMatrix.h +0 -1518
  179. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +0 -398
  180. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparsePermutation.h +0 -178
  181. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseProduct.h +0 -181
  182. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseRedux.h +0 -49
  183. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseRef.h +0 -397
  184. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +0 -659
  185. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseSolverBase.h +0 -124
  186. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseSparseProductWithPruning.h +0 -198
  187. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseTranspose.h +0 -92
  188. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseTriangularView.h +0 -189
  189. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseUtil.h +0 -186
  190. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseVector.h +0 -478
  191. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseView.h +0 -254
  192. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/TriangularSolver.h +0 -315
  193. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU.h +0 -923
  194. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLUImpl.h +0 -66
  195. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +0 -226
  196. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_Structs.h +0 -110
  197. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +0 -375
  198. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_Utils.h +0 -80
  199. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_column_bmod.h +0 -181
  200. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +0 -179
  201. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h +0 -107
  202. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +0 -280
  203. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h +0 -126
  204. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_kernel_bmod.h +0 -130
  205. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +0 -223
  206. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_panel_dfs.h +0 -258
  207. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_pivotL.h +0 -137
  208. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_pruneL.h +0 -136
  209. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_relax_snode.h +0 -83
  210. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseQR/SparseQR.h +0 -758
  211. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/StlSupport/StdDeque.h +0 -116
  212. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/StlSupport/StdList.h +0 -106
  213. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/StlSupport/StdVector.h +0 -131
  214. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/StlSupport/details.h +0 -84
  215. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +0 -1025
  216. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +0 -642
  217. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/misc/Image.h +0 -82
  218. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/misc/Kernel.h +0 -79
  219. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/misc/RealSvd2x2.h +0 -55
  220. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/misc/blas.h +0 -440
  221. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/misc/lapack.h +0 -152
  222. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/misc/lapacke.h +0 -16292
  223. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/misc/lapacke_mangling.h +0 -17
  224. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +0 -358
  225. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +0 -696
  226. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/plugins/BlockMethods.h +0 -1442
  227. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.h +0 -115
  228. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.h +0 -177
  229. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/plugins/IndexedViewMethods.h +0 -262
  230. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +0 -152
  231. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +0 -95
  232. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/plugins/ReshapedMethods.h +0 -149
  233. sequenzo/dissimilarity_measures/src/eigen/blas/BandTriangularSolver.h +0 -97
  234. sequenzo/dissimilarity_measures/src/eigen/blas/GeneralRank1Update.h +0 -44
  235. sequenzo/dissimilarity_measures/src/eigen/blas/PackedSelfadjointProduct.h +0 -53
  236. sequenzo/dissimilarity_measures/src/eigen/blas/PackedTriangularMatrixVector.h +0 -79
  237. sequenzo/dissimilarity_measures/src/eigen/blas/PackedTriangularSolverVector.h +0 -88
  238. sequenzo/dissimilarity_measures/src/eigen/blas/Rank2Update.h +0 -57
  239. sequenzo/dissimilarity_measures/src/eigen/blas/common.h +0 -175
  240. sequenzo/dissimilarity_measures/src/eigen/blas/f2c/datatypes.h +0 -24
  241. sequenzo/dissimilarity_measures/src/eigen/blas/level1_cplx_impl.h +0 -155
  242. sequenzo/dissimilarity_measures/src/eigen/blas/level1_impl.h +0 -144
  243. sequenzo/dissimilarity_measures/src/eigen/blas/level1_real_impl.h +0 -122
  244. sequenzo/dissimilarity_measures/src/eigen/blas/level2_cplx_impl.h +0 -360
  245. sequenzo/dissimilarity_measures/src/eigen/blas/level2_impl.h +0 -553
  246. sequenzo/dissimilarity_measures/src/eigen/blas/level2_real_impl.h +0 -306
  247. sequenzo/dissimilarity_measures/src/eigen/blas/level3_impl.h +0 -702
  248. sequenzo/dissimilarity_measures/src/eigen/debug/gdb/__init__.py +0 -1
  249. sequenzo/dissimilarity_measures/src/eigen/debug/gdb/printers.py +0 -314
  250. sequenzo/dissimilarity_measures/src/eigen/lapack/lapack_common.h +0 -29
  251. sequenzo/dissimilarity_measures/src/eigen/scripts/relicense.py +0 -69
  252. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/Tensor.h +0 -554
  253. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorArgMax.h +0 -329
  254. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h +0 -247
  255. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h +0 -1176
  256. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h +0 -1559
  257. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h +0 -1093
  258. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h +0 -518
  259. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h +0 -377
  260. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h +0 -1023
  261. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionBlocking.h +0 -73
  262. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionCuda.h +0 -6
  263. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionGpu.h +0 -1413
  264. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionMapper.h +0 -575
  265. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionSycl.h +0 -1650
  266. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h +0 -1679
  267. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h +0 -456
  268. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h +0 -1132
  269. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorConvolutionSycl.h +0 -544
  270. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorCostModel.h +0 -214
  271. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h +0 -347
  272. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h +0 -137
  273. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h +0 -6
  274. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h +0 -104
  275. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceGpu.h +0 -389
  276. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h +0 -1048
  277. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h +0 -409
  278. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDimensionList.h +0 -236
  279. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h +0 -490
  280. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h +0 -236
  281. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h +0 -983
  282. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h +0 -703
  283. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h +0 -388
  284. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h +0 -669
  285. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorFixedSize.h +0 -379
  286. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h +0 -237
  287. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h +0 -191
  288. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h +0 -488
  289. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h +0 -302
  290. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorGlobalFunctions.h +0 -33
  291. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorGpuHipCudaDefines.h +0 -99
  292. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorGpuHipCudaUndefines.h +0 -44
  293. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h +0 -79
  294. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h +0 -603
  295. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h +0 -738
  296. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorInflation.h +0 -247
  297. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorInitializer.h +0 -82
  298. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h +0 -263
  299. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorLayoutSwap.h +0 -216
  300. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorMacros.h +0 -98
  301. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h +0 -327
  302. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h +0 -311
  303. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h +0 -1102
  304. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h +0 -708
  305. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorPatch.h +0 -291
  306. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorRandom.h +0 -322
  307. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h +0 -998
  308. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h +0 -6
  309. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorReductionGpu.h +0 -966
  310. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorReductionSycl.h +0 -582
  311. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorRef.h +0 -454
  312. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h +0 -465
  313. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h +0 -528
  314. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorScanSycl.h +0 -513
  315. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h +0 -471
  316. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorStorage.h +0 -161
  317. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorStriding.h +0 -346
  318. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorTrace.h +0 -303
  319. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h +0 -264
  320. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorUInt128.h +0 -249
  321. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorVolumePatch.h +0 -629
  322. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/TensorSymmetry/DynamicSymmetry.h +0 -293
  323. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/TensorSymmetry/StaticSymmetry.h +0 -236
  324. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/TensorSymmetry/Symmetry.h +0 -338
  325. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/TensorSymmetry/util/TemplateGroupTheory.h +0 -669
  326. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/ThreadPool/Barrier.h +0 -67
  327. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/ThreadPool/EventCount.h +0 -249
  328. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h +0 -486
  329. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/ThreadPool/RunQueue.h +0 -236
  330. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/ThreadPool/ThreadCancel.h +0 -23
  331. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/ThreadPool/ThreadEnvironment.h +0 -40
  332. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/ThreadPool/ThreadLocal.h +0 -301
  333. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/ThreadPool/ThreadPoolInterface.h +0 -48
  334. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/ThreadPool/ThreadYield.h +0 -20
  335. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/util/CXX11Meta.h +0 -537
  336. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/util/CXX11Workarounds.h +0 -88
  337. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/util/EmulateArray.h +0 -261
  338. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/util/MaxSizeVector.h +0 -158
  339. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/AutoDiff/AutoDiffJacobian.h +0 -108
  340. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h +0 -730
  341. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/AutoDiff/AutoDiffVector.h +0 -220
  342. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/BVH/BVAlgorithms.h +0 -293
  343. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/BVH/KdBVH.h +0 -223
  344. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/Eigenvalues/ArpackSelfAdjointEigenSolver.h +0 -790
  345. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/EulerAngles/EulerAngles.h +0 -355
  346. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/EulerAngles/EulerSystem.h +0 -305
  347. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/FFT/ei_fftw_impl.h +0 -261
  348. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/FFT/ei_kissfft_impl.h +0 -449
  349. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/IterativeSolvers/ConstrainedConjGrad.h +0 -187
  350. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/IterativeSolvers/DGMRES.h +0 -511
  351. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/IterativeSolvers/GMRES.h +0 -335
  352. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/IterativeSolvers/IDRS.h +0 -436
  353. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/IterativeSolvers/IncompleteLU.h +0 -90
  354. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/IterativeSolvers/IterationController.h +0 -154
  355. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/IterativeSolvers/MINRES.h +0 -267
  356. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/IterativeSolvers/Scaling.h +0 -193
  357. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/KroneckerProduct/KroneckerTensorProduct.h +0 -305
  358. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/LevenbergMarquardt/LMcovar.h +0 -84
  359. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/LevenbergMarquardt/LMonestep.h +0 -202
  360. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/LevenbergMarquardt/LMpar.h +0 -160
  361. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/LevenbergMarquardt/LMqrsolv.h +0 -188
  362. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/LevenbergMarquardt/LevenbergMarquardt.h +0 -396
  363. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/MatrixFunctions/MatrixExponential.h +0 -441
  364. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/MatrixFunctions/MatrixFunction.h +0 -569
  365. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/MatrixFunctions/MatrixLogarithm.h +0 -373
  366. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/MatrixFunctions/MatrixPower.h +0 -705
  367. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/MatrixFunctions/MatrixSquareRoot.h +0 -368
  368. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/MatrixFunctions/StemFunction.h +0 -117
  369. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/MoreVectorization/MathFunctions.h +0 -95
  370. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/NonLinearOptimization/HybridNonLinearSolver.h +0 -601
  371. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/NonLinearOptimization/LevenbergMarquardt.h +0 -657
  372. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/NonLinearOptimization/chkder.h +0 -66
  373. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/NonLinearOptimization/covar.h +0 -70
  374. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/NonLinearOptimization/dogleg.h +0 -107
  375. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/NonLinearOptimization/fdjac1.h +0 -79
  376. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/NonLinearOptimization/lmpar.h +0 -298
  377. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/NonLinearOptimization/qrsolv.h +0 -91
  378. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/NonLinearOptimization/r1mpyq.h +0 -30
  379. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/NonLinearOptimization/r1updt.h +0 -99
  380. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/NonLinearOptimization/rwupdt.h +0 -49
  381. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/NumericalDiff/NumericalDiff.h +0 -130
  382. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/Polynomials/Companion.h +0 -280
  383. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/Polynomials/PolynomialSolver.h +0 -428
  384. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/Polynomials/PolynomialUtils.h +0 -143
  385. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/Skyline/SkylineInplaceLU.h +0 -352
  386. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/Skyline/SkylineMatrix.h +0 -862
  387. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/Skyline/SkylineMatrixBase.h +0 -212
  388. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/Skyline/SkylineProduct.h +0 -295
  389. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/Skyline/SkylineStorage.h +0 -259
  390. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/Skyline/SkylineUtil.h +0 -89
  391. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SparseExtra/BlockOfDynamicSparseMatrix.h +0 -122
  392. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SparseExtra/BlockSparseMatrix.h +0 -1079
  393. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SparseExtra/DynamicSparseMatrix.h +0 -404
  394. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SparseExtra/MarketIO.h +0 -282
  395. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SparseExtra/MatrixMarketIterator.h +0 -247
  396. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SparseExtra/RandomSetter.h +0 -349
  397. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsArrayAPI.h +0 -286
  398. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsBFloat16.h +0 -68
  399. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsFunctors.h +0 -357
  400. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsHalf.h +0 -66
  401. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsImpl.h +0 -1959
  402. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsPacketMath.h +0 -118
  403. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/HipVectorCompatibility.h +0 -67
  404. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsArrayAPI.h +0 -167
  405. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsBFloat16.h +0 -58
  406. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsFunctors.h +0 -330
  407. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsHalf.h +0 -58
  408. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h +0 -2045
  409. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsPacketMath.h +0 -79
  410. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/arch/AVX/BesselFunctions.h +0 -46
  411. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/arch/AVX/SpecialFunctions.h +0 -16
  412. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/arch/AVX512/BesselFunctions.h +0 -46
  413. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/arch/AVX512/SpecialFunctions.h +0 -16
  414. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/arch/GPU/SpecialFunctions.h +0 -369
  415. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/arch/NEON/BesselFunctions.h +0 -54
  416. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/arch/NEON/SpecialFunctions.h +0 -34
  417. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/Splines/Spline.h +0 -507
  418. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/Splines/SplineFitting.h +0 -431
  419. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/Splines/SplineFwd.h +0 -93
  420. sequenzo-0.1.17.dist-info/RECORD +0 -537
  421. {sequenzo-0.1.17.dist-info → sequenzo-0.1.19.dist-info}/WHEEL +0 -0
  422. {sequenzo-0.1.17.dist-info → sequenzo-0.1.19.dist-info}/licenses/LICENSE +0 -0
  423. {sequenzo-0.1.17.dist-info → sequenzo-0.1.19.dist-info}/top_level.txt +0 -0
@@ -1,1132 +0,0 @@
1
- // This file is part of Eigen, a lightweight C++ template library
2
- // for linear algebra.
3
- //
4
- // Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
5
- //
6
- // This Source Code Form is subject to the terms of the Mozilla
7
- // Public License v. 2.0. If a copy of the MPL was not distributed
8
- // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9
-
10
- #ifndef EIGEN_CXX11_TENSOR_TENSOR_CONVOLUTION_H
11
- #define EIGEN_CXX11_TENSOR_TENSOR_CONVOLUTION_H
12
-
13
- namespace Eigen {
14
-
15
- /** \class TensorConvolution
16
- * \ingroup CXX11_Tensor_Module
17
- *
18
- * \brief Tensor convolution class.
19
- *
20
- *
21
- */
22
- namespace internal {
23
-
24
- template <typename Index, typename InputDims, int NumKernelDims, int Layout>
25
- class IndexMapper {
26
- public:
27
- IndexMapper(const InputDims& input_dims, const array<Index, NumKernelDims>& kernel_dims,
28
- const array<Index, NumKernelDims>& indices) {
29
-
30
- array<Index, NumDims> dimensions = input_dims;
31
- for (int i = 0; i < NumKernelDims; ++i) {
32
- const Index index = indices[i];
33
- const Index input_dim = input_dims[index];
34
- const Index kernel_dim = kernel_dims[i];
35
- const Index result_dim = input_dim - kernel_dim + 1;
36
- dimensions[index] = result_dim;
37
- }
38
-
39
- array<Index, NumDims> inputStrides;
40
- array<Index, NumDims> outputStrides;
41
- if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
42
- inputStrides[0] = 1;
43
- outputStrides[0] = 1;
44
- for (int i = 1; i < NumDims; ++i) {
45
- inputStrides[i] = inputStrides[i-1] * input_dims[i-1];
46
- outputStrides[i] = outputStrides[i-1] * dimensions[i-1];
47
- }
48
- } else {
49
- inputStrides[NumDims - 1] = 1;
50
- outputStrides[NumDims - 1] = 1;
51
- for (int i = static_cast<int>(NumDims) - 2; i >= 0; --i) {
52
- inputStrides[i] = inputStrides[i + 1] * input_dims[i + 1];
53
- outputStrides[i] = outputStrides[i + 1] * dimensions[i + 1];
54
- }
55
- }
56
-
57
- array<Index, NumDims> gpuInputDimensions;
58
- array<Index, NumDims> gpuOutputDimensions;
59
- array<Index, NumDims> tmp = dimensions;
60
- array<Index, NumDims> ordering;
61
- const size_t offset = static_cast<int>(Layout) == static_cast<int>(ColMajor)
62
- ? 0
63
- : NumDims - NumKernelDims;
64
- for (int i = 0; i < NumKernelDims; ++i) {
65
- const Index index = i + offset;
66
- ordering[index] = indices[i];
67
- tmp[indices[i]] = -1;
68
- gpuInputDimensions[index] = input_dims[indices[i]];
69
- gpuOutputDimensions[index] = dimensions[indices[i]];
70
- }
71
-
72
- int written = static_cast<int>(Layout) == static_cast<int>(ColMajor)
73
- ? NumKernelDims
74
- : 0;
75
- for (int i = 0; i < NumDims; ++i) {
76
- if (tmp[i] >= 0) {
77
- ordering[written] = i;
78
- gpuInputDimensions[written] = input_dims[i];
79
- gpuOutputDimensions[written] = dimensions[i];
80
- ++written;
81
- }
82
- }
83
-
84
- for (int i = 0; i < NumDims; ++i) {
85
- m_inputStrides[i] = inputStrides[ordering[i]];
86
- m_outputStrides[i] = outputStrides[ordering[i]];
87
- }
88
-
89
- if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
90
- for (int i = 0; i < NumDims; ++i) {
91
- if (i > NumKernelDims) {
92
- m_gpuInputStrides[i] =
93
- m_gpuInputStrides[i - 1] * gpuInputDimensions[i - 1];
94
- m_gpuOutputStrides[i] =
95
- m_gpuOutputStrides[i - 1] * gpuOutputDimensions[i - 1];
96
- } else {
97
- m_gpuInputStrides[i] = 1;
98
- m_gpuOutputStrides[i] = 1;
99
- }
100
- }
101
- } else {
102
- for (int i = NumDims - 1; i >= 0; --i) {
103
- if (static_cast<size_t>(i + 1) < offset) {
104
- m_gpuInputStrides[i] =
105
- m_gpuInputStrides[i + 1] * gpuInputDimensions[i + 1];
106
- m_gpuOutputStrides[i] =
107
- m_gpuOutputStrides[i + 1] * gpuOutputDimensions[i + 1];
108
- } else {
109
- m_gpuInputStrides[i] = 1;
110
- m_gpuOutputStrides[i] = 1;
111
- }
112
- }
113
- }
114
- }
115
-
116
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Index mapGpuInputPlaneToTensorInputOffset(Index p) const {
117
- Index inputIndex = 0;
118
- if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
119
- for (int d = NumDims - 1; d > NumKernelDims; --d) {
120
- const Index idx = p / m_gpuInputStrides[d];
121
- inputIndex += idx * m_inputStrides[d];
122
- p -= idx * m_gpuInputStrides[d];
123
- }
124
- inputIndex += p * m_inputStrides[NumKernelDims];
125
- } else {
126
- std::ptrdiff_t limit = 0;
127
- if (NumKernelDims < NumDims) {
128
- limit = NumDims - NumKernelDims - 1;
129
- }
130
- for (int d = 0; d < limit; ++d) {
131
- const Index idx = p / m_gpuInputStrides[d];
132
- inputIndex += idx * m_inputStrides[d];
133
- p -= idx * m_gpuInputStrides[d];
134
- }
135
- inputIndex += p * m_inputStrides[limit];
136
- }
137
- return inputIndex;
138
- }
139
-
140
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Index mapGpuOutputPlaneToTensorOutputOffset(Index p) const {
141
- Index outputIndex = 0;
142
- if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
143
- for (int d = NumDims - 1; d > NumKernelDims; --d) {
144
- const Index idx = p / m_gpuOutputStrides[d];
145
- outputIndex += idx * m_outputStrides[d];
146
- p -= idx * m_gpuOutputStrides[d];
147
- }
148
- outputIndex += p * m_outputStrides[NumKernelDims];
149
- } else {
150
- std::ptrdiff_t limit = 0;
151
- if (NumKernelDims < NumDims) {
152
- limit = NumDims - NumKernelDims - 1;
153
- }
154
- for (int d = 0; d < limit; ++d) {
155
- const Index idx = p / m_gpuOutputStrides[d];
156
- outputIndex += idx * m_outputStrides[d];
157
- p -= idx * m_gpuOutputStrides[d];
158
- }
159
- outputIndex += p * m_outputStrides[limit];
160
- }
161
- return outputIndex;
162
- }
163
-
164
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Index mapGpuInputKernelToTensorInputOffset(Index i) const {
165
- const size_t offset = static_cast<int>(Layout) == static_cast<int>(ColMajor)
166
- ? 0
167
- : NumDims - NumKernelDims;
168
- return i * m_inputStrides[offset];
169
- }
170
-
171
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Index mapGpuOutputKernelToTensorOutputOffset(Index i) const {
172
- const size_t offset = static_cast<int>(Layout) == static_cast<int>(ColMajor)
173
- ? 0
174
- : NumDims - NumKernelDims;
175
- return i * m_outputStrides[offset];
176
- }
177
-
178
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Index mapGpuInputKernelToTensorInputOffset(Index i, Index j) const {
179
- const size_t offset = static_cast<int>(Layout) == static_cast<int>(ColMajor)
180
- ? 0
181
- : NumDims - NumKernelDims;
182
- return i * m_inputStrides[offset] + j * m_inputStrides[offset + 1];
183
- }
184
-
185
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Index mapGpuOutputKernelToTensorOutputOffset(Index i, Index j) const {
186
- const size_t offset = static_cast<int>(Layout) == static_cast<int>(ColMajor)
187
- ? 0
188
- : NumDims - NumKernelDims;
189
- return i * m_outputStrides[offset] + j * m_outputStrides[offset + 1];
190
- }
191
-
192
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Index mapGpuInputKernelToTensorInputOffset(Index i, Index j, Index k) const {
193
- const size_t offset = static_cast<int>(Layout) == static_cast<int>(ColMajor)
194
- ? 0
195
- : NumDims - NumKernelDims;
196
- return i * m_inputStrides[offset] + j * m_inputStrides[offset + 1] +
197
- k * m_inputStrides[offset + 2];
198
- }
199
-
200
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Index mapGpuOutputKernelToTensorOutputOffset(Index i, Index j, Index k) const {
201
- const size_t offset = static_cast<int>(Layout) == static_cast<int>(ColMajor)
202
- ? 0
203
- : NumDims - NumKernelDims;
204
- return i * m_outputStrides[offset] + j * m_outputStrides[offset + 1] +
205
- k * m_outputStrides[offset + 2];
206
- }
207
-
208
- private:
209
- static const int NumDims = internal::array_size<InputDims>::value;
210
- array<Index, NumDims> m_inputStrides;
211
- array<Index, NumDims> m_outputStrides;
212
- array<Index, NumDims> m_gpuInputStrides;
213
- array<Index, NumDims> m_gpuOutputStrides;
214
- };
215
-
216
-
217
-
218
- template<typename Dimensions, typename InputXprType, typename KernelXprType>
219
- struct traits<TensorConvolutionOp<Dimensions, InputXprType, KernelXprType> >
220
- {
221
- // Type promotion to handle the case where the types of the lhs and the rhs are different.
222
- typedef typename promote_storage_type<typename InputXprType::Scalar,
223
- typename KernelXprType::Scalar>::ret Scalar;
224
- typedef typename promote_storage_type<typename traits<InputXprType>::StorageKind,
225
- typename traits<KernelXprType>::StorageKind>::ret StorageKind;
226
- typedef typename promote_index_type<typename traits<InputXprType>::Index,
227
- typename traits<KernelXprType>::Index>::type Index;
228
- typedef typename InputXprType::Nested LhsNested;
229
- typedef typename KernelXprType::Nested RhsNested;
230
- typedef typename remove_reference<LhsNested>::type _LhsNested;
231
- typedef typename remove_reference<RhsNested>::type _RhsNested;
232
- static const int NumDimensions = traits<InputXprType>::NumDimensions;
233
- static const int Layout = traits<InputXprType>::Layout;
234
- typedef typename conditional<Pointer_type_promotion<typename InputXprType::Scalar, Scalar>::val,
235
- typename traits<InputXprType>::PointerType, typename traits<KernelXprType>::PointerType>::type PointerType;
236
-
237
- enum {
238
- Flags = 0
239
- };
240
- };
241
-
242
- template<typename Dimensions, typename InputXprType, typename KernelXprType>
243
- struct eval<TensorConvolutionOp<Dimensions, InputXprType, KernelXprType>, Eigen::Dense>
244
- {
245
- typedef const TensorConvolutionOp<Dimensions, InputXprType, KernelXprType>& type;
246
- };
247
-
248
- template<typename Dimensions, typename InputXprType, typename KernelXprType>
249
- struct nested<TensorConvolutionOp<Dimensions, InputXprType, KernelXprType>, 1, typename eval<TensorConvolutionOp<Dimensions, InputXprType, KernelXprType> >::type>
250
- {
251
- typedef TensorConvolutionOp<Dimensions, InputXprType, KernelXprType> type;
252
- };
253
-
254
- } // end namespace internal
255
-
256
-
257
-
258
- template<typename Indices, typename InputXprType, typename KernelXprType>
259
- class TensorConvolutionOp : public TensorBase<TensorConvolutionOp<Indices, InputXprType, KernelXprType>, ReadOnlyAccessors>
260
- {
261
- public:
262
- typedef typename Eigen::internal::traits<TensorConvolutionOp>::Scalar Scalar;
263
- typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
264
- typedef typename internal::promote_storage_type<typename InputXprType::CoeffReturnType,
265
- typename KernelXprType::CoeffReturnType>::ret CoeffReturnType;
266
- typedef typename Eigen::internal::nested<TensorConvolutionOp>::type Nested;
267
- typedef typename Eigen::internal::traits<TensorConvolutionOp>::StorageKind StorageKind;
268
- typedef typename Eigen::internal::traits<TensorConvolutionOp>::Index Index;
269
-
270
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorConvolutionOp(const InputXprType& input, const KernelXprType& kernel, const Indices& dims)
271
- : m_input_xpr(input), m_kernel_xpr(kernel), m_indices(dims) {}
272
-
273
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
274
- const Indices& indices() const { return m_indices; }
275
-
276
- /** \returns the nested expressions */
277
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
278
- const typename internal::remove_all<typename InputXprType::Nested>::type&
279
- inputExpression() const { return m_input_xpr; }
280
-
281
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
282
- const typename internal::remove_all<typename KernelXprType::Nested>::type&
283
- kernelExpression() const { return m_kernel_xpr; }
284
-
285
- protected:
286
- typename InputXprType::Nested m_input_xpr;
287
- typename KernelXprType::Nested m_kernel_xpr;
288
- const Indices m_indices;
289
- };
290
-
291
-
292
- template<typename Indices, typename InputArgType, typename KernelArgType, typename Device>
293
- struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelArgType>, Device>
294
- {
295
- typedef TensorConvolutionOp<Indices, InputArgType, KernelArgType> XprType;
296
-
297
- static const int NumDims = internal::array_size<typename TensorEvaluator<InputArgType, Device>::Dimensions>::value;
298
- static const int NumKernelDims = internal::array_size<Indices>::value;
299
- typedef typename XprType::Index Index;
300
- typedef DSizes<Index, NumDims> Dimensions;
301
-
302
- typedef typename XprType::Scalar Scalar;
303
- typedef typename XprType::CoeffReturnType CoeffReturnType;
304
- typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
305
- static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
306
- typedef StorageMemory<Scalar, Device> Storage;
307
- typedef typename Storage::Type EvaluatorPointerType;
308
-
309
- enum {
310
- IsAligned = int(TensorEvaluator<InputArgType, Device>::IsAligned) & int(TensorEvaluator<KernelArgType, Device>::IsAligned),
311
- PacketAccess = int(TensorEvaluator<InputArgType, Device>::PacketAccess) & int(TensorEvaluator<KernelArgType, Device>::PacketAccess),
312
- BlockAccess = false,
313
- PreferBlockAccess = false,
314
- Layout = TensorEvaluator<InputArgType, Device>::Layout,
315
- CoordAccess = false, // to be implemented
316
- RawAccess = false
317
- };
318
-
319
- //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
320
- typedef internal::TensorBlockNotImplemented TensorBlock;
321
- //===--------------------------------------------------------------------===//
322
-
323
- EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
324
- : m_inputImpl(op.inputExpression(), device), m_kernelImpl(op.kernelExpression(), device), m_kernelArg(op.kernelExpression()), m_kernel(NULL), m_local_kernel(false), m_device(device)
325
- {
326
- EIGEN_STATIC_ASSERT((static_cast<int>(TensorEvaluator<InputArgType, Device>::Layout) == static_cast<int>(TensorEvaluator<KernelArgType, Device>::Layout)), YOU_MADE_A_PROGRAMMING_MISTAKE);
327
-
328
- const typename TensorEvaluator<InputArgType, Device>::Dimensions& input_dims = m_inputImpl.dimensions();
329
- const typename TensorEvaluator<KernelArgType, Device>::Dimensions& kernel_dims = m_kernelImpl.dimensions();
330
-
331
- if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
332
- m_inputStride[0] = 1;
333
- for (int i = 1; i < NumDims; ++i) {
334
- m_inputStride[i] = m_inputStride[i - 1] * input_dims[i - 1];
335
- }
336
- } else {
337
- m_inputStride[NumDims - 1] = 1;
338
- for (int i = NumDims - 2; i >= 0; --i) {
339
- m_inputStride[i] = m_inputStride[i + 1] * input_dims[i + 1];
340
- }
341
- }
342
-
343
- m_dimensions = m_inputImpl.dimensions();
344
- if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
345
- for (int i = 0; i < NumKernelDims; ++i) {
346
- const Index index = op.indices()[i];
347
- const Index input_dim = input_dims[index];
348
- const Index kernel_dim = kernel_dims[i];
349
- const Index result_dim = input_dim - kernel_dim + 1;
350
- m_dimensions[index] = result_dim;
351
- if (i > 0) {
352
- m_kernelStride[i] = m_kernelStride[i - 1] * kernel_dims[i - 1];
353
- } else {
354
- m_kernelStride[0] = 1;
355
- }
356
- m_indexStride[i] = m_inputStride[index];
357
- }
358
-
359
- m_outputStride[0] = 1;
360
- for (int i = 1; i < NumDims; ++i) {
361
- m_outputStride[i] = m_outputStride[i - 1] * m_dimensions[i - 1];
362
- }
363
- } else {
364
- for (int i = NumKernelDims - 1; i >= 0; --i) {
365
- const Index index = op.indices()[i];
366
- const Index input_dim = input_dims[index];
367
- const Index kernel_dim = kernel_dims[i];
368
- const Index result_dim = input_dim - kernel_dim + 1;
369
- m_dimensions[index] = result_dim;
370
- if (i < NumKernelDims - 1) {
371
- m_kernelStride[i] = m_kernelStride[i + 1] * kernel_dims[i + 1];
372
- } else {
373
- m_kernelStride[NumKernelDims - 1] = 1;
374
- }
375
- m_indexStride[i] = m_inputStride[index];
376
- }
377
-
378
- m_outputStride[NumDims - 1] = 1;
379
- for (int i = NumDims - 2; i >= 0; --i) {
380
- m_outputStride[i] = m_outputStride[i + 1] * m_dimensions[i + 1];
381
- }
382
- }
383
- }
384
-
385
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
386
-
387
- EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar*) {
388
- m_inputImpl.evalSubExprsIfNeeded(NULL);
389
- preloadKernel();
390
- return true;
391
- }
392
- EIGEN_STRONG_INLINE void cleanup() {
393
- m_inputImpl.cleanup();
394
- if (m_local_kernel) {
395
- m_device.deallocate((void*)m_kernel);
396
- m_local_kernel = false;
397
- }
398
- m_kernel = NULL;
399
- }
400
-
401
- void evalTo(typename XprType::Scalar* buffer) {
402
- evalSubExprsIfNeeded(NULL);
403
- for (int i = 0; i < dimensions().TotalSize(); ++i) {
404
- buffer[i] += coeff(i);
405
- }
406
- cleanup();
407
- }
408
-
409
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
410
- {
411
- CoeffReturnType result = CoeffReturnType(0);
412
- convolve(firstInput(index), 0, NumKernelDims-1, result);
413
- return result;
414
- }
415
-
416
- template<int LoadMode>
417
- EIGEN_DEVICE_FUNC PacketReturnType packet(const Index index) const
418
- {
419
- Index indices[2] = {index, index+PacketSize-1};
420
- Index startInputs[2] = {0, 0};
421
- if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
422
- for (int i = NumDims - 1; i > 0; --i) {
423
- const Index idx0 = indices[0] / m_outputStride[i];
424
- const Index idx1 = indices[1] / m_outputStride[i];
425
- startInputs[0] += idx0 * m_inputStride[i];
426
- startInputs[1] += idx1 * m_inputStride[i];
427
- indices[0] -= idx0 * m_outputStride[i];
428
- indices[1] -= idx1 * m_outputStride[i];
429
- }
430
- } else {
431
- for (int i = 0; i < NumDims - 1; ++i) {
432
- const Index idx0 = indices[0] / m_outputStride[i];
433
- const Index idx1 = indices[1] / m_outputStride[i];
434
- startInputs[0] += idx0 * m_inputStride[i];
435
- startInputs[1] += idx1 * m_inputStride[i];
436
- indices[0] -= idx0 * m_outputStride[i];
437
- indices[1] -= idx1 * m_outputStride[i];
438
- }
439
- }
440
- startInputs[0] += indices[0];
441
- startInputs[1] += indices[1];
442
-
443
- if (startInputs[1]-startInputs[0] == PacketSize-1) {
444
- PacketReturnType result = internal::pset1<PacketReturnType>(0);
445
- convolvePacket(startInputs[0], 0, NumKernelDims-1, result);
446
- return result;
447
- } else {
448
- EIGEN_ALIGN_MAX Scalar data[PacketSize];
449
- data[0] = Scalar(0);
450
- convolve(startInputs[0], 0, NumKernelDims-1, data[0]);
451
- for (int i = 1; i < PacketSize-1; ++i) {
452
- data[i] = Scalar(0);
453
- convolve(firstInput(index+i), 0, NumKernelDims-1, data[i]);
454
- }
455
- data[PacketSize-1] = Scalar(0);
456
- convolve(startInputs[1], 0, NumKernelDims-1, data[PacketSize-1]);
457
- return internal::pload<PacketReturnType>(data);
458
- }
459
- }
460
-
461
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost
462
- costPerCoeff(bool vectorized) const {
463
- const double kernel_size = m_kernelImpl.dimensions().TotalSize();
464
- // We ignore the use of fused multiply-add.
465
- const double convolve_compute_cost =
466
- TensorOpCost::AddCost<Scalar>() + TensorOpCost::MulCost<Scalar>();
467
- const double firstIndex_compute_cost =
468
- NumDims *
469
- (2 * TensorOpCost::AddCost<Index>() + 2 * TensorOpCost::MulCost<Index>() +
470
- TensorOpCost::DivCost<Index>());
471
- return TensorOpCost(0, 0, firstIndex_compute_cost, vectorized, PacketSize) +
472
- kernel_size * (m_inputImpl.costPerCoeff(vectorized) +
473
- m_kernelImpl.costPerCoeff(vectorized) +
474
- TensorOpCost(0, 0, convolve_compute_cost, vectorized,
475
- PacketSize));
476
- }
477
-
478
- EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return NULL; }
479
-
480
- private:
481
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index firstInput(Index index) const {
482
- Index startInput = 0;
483
- if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
484
- for (int i = NumDims - 1; i > 0; --i) {
485
- const Index idx = index / m_outputStride[i];
486
- startInput += idx * m_inputStride[i];
487
- index -= idx * m_outputStride[i];
488
- }
489
- } else {
490
- for (int i = 0; i < NumDims - 1; ++i) {
491
- const Index idx = index / m_outputStride[i];
492
- startInput += idx * m_inputStride[i];
493
- index -= idx * m_outputStride[i];
494
- }
495
- }
496
- startInput += index;
497
- return startInput;
498
- }
499
-
500
- EIGEN_DEVICE_FUNC void convolve(Index firstIndex, Index firstKernel, int DimIndex, CoeffReturnType& accum) const {
501
- for (int j = 0; j < m_kernelImpl.dimensions()[DimIndex]; ++j) {
502
- const Index input = firstIndex + j * m_indexStride[DimIndex];
503
- const Index kernel = firstKernel + j * m_kernelStride[DimIndex];
504
- if (DimIndex > 0) {
505
- convolve(input, kernel, DimIndex-1, accum);
506
- } else {
507
- accum += m_inputImpl.coeff(input) * m_kernel[kernel];
508
- }
509
- }
510
- }
511
-
512
- template <typename Packet>
513
- EIGEN_DEVICE_FUNC void convolvePacket(Index firstIndex, Index firstKernel, int DimIndex, Packet& accum) const {
514
- for (int j = 0; j < m_kernelImpl.dimensions()[DimIndex]; ++j) {
515
- const Index input = firstIndex + j * m_indexStride[DimIndex];
516
- const Index kernel = firstKernel + j * m_kernelStride[DimIndex];
517
- if (DimIndex > 0) {
518
- convolvePacket(input, kernel, DimIndex-1, accum);
519
- } else {
520
- accum = internal::pmadd<Packet>(m_inputImpl.template packet<Unaligned>(input), internal::pset1<Packet>(m_kernel[kernel]), accum);
521
- }
522
- }
523
- }
524
-
525
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void preloadKernel() {
526
- // Don't make a local copy of the kernel unless we have to (i.e. it's an
527
- // expression that needs to be evaluated)
528
- const Scalar* in_place = m_kernelImpl.data();
529
- if (in_place) {
530
- m_kernel = in_place;
531
- m_local_kernel = false;
532
- } else {
533
- size_t kernel_sz = m_kernelImpl.dimensions().TotalSize() * sizeof(Scalar);
534
- Scalar* local = (Scalar*)m_device.allocate_temp(kernel_sz);
535
- typedef TensorEvalToOp<const KernelArgType> EvalTo;
536
- EvalTo evalToTmp(local, m_kernelArg);
537
- const bool Vectorize = internal::IsVectorizable<Device, KernelArgType>::value;
538
- internal::TensorExecutor<const EvalTo, Device, Vectorize>::run(evalToTmp, m_device);
539
-
540
- m_kernel = local;
541
- m_local_kernel = true;
542
- }
543
- }
544
-
545
- array<Index, NumDims> m_inputStride;
546
- array<Index, NumDims> m_outputStride;
547
-
548
- array<Index, NumKernelDims> m_indexStride;
549
- array<Index, NumKernelDims> m_kernelStride;
550
- TensorEvaluator<InputArgType, Device> m_inputImpl;
551
- TensorEvaluator<KernelArgType, Device> m_kernelImpl;
552
- Dimensions m_dimensions;
553
-
554
- KernelArgType m_kernelArg;
555
- const Scalar* m_kernel;
556
- bool m_local_kernel;
557
- const Device EIGEN_DEVICE_REF m_device;
558
- };
559
-
560
-
561
-
562
-
563
- // Use an optimized implementation of the evaluation code for GPUs whenever possible.
564
- #if defined(EIGEN_USE_GPU) && defined(EIGEN_GPUCC)
565
-
566
- template <int StaticKernelSize>
567
- struct GetKernelSize {
568
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int operator() (const int /*kernelSize*/) const {
569
- return StaticKernelSize;
570
- }
571
- };
572
- template <>
573
- struct GetKernelSize<Dynamic> {
574
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int operator() (const int kernelSize) const {
575
- return kernelSize;
576
- }
577
- };
578
-
579
- template <typename InputEvaluator, typename Index, typename InputDims,
580
- int StaticKernelSize>
581
- __global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void EigenConvolutionKernel1D(
582
- InputEvaluator eval,
583
- const internal::IndexMapper<Index, InputDims, 1, InputEvaluator::Layout>
584
- indexMapper,
585
- const float* __restrict kernel, const int numPlanes, const int numX,
586
- const int maxX, const int kernelSize, float* buffer) {
587
- #if defined(EIGEN_HIPCC)
588
- HIP_DYNAMIC_SHARED(float, s)
589
- #else
590
- extern __shared__ float s[];
591
- #endif
592
-
593
- const int first_x = blockIdx.x * maxX;
594
- const int last_x = (first_x + maxX < numX ? first_x + maxX : numX) - 1;
595
- const int num_x_input = last_x - first_x + GetKernelSize<StaticKernelSize>()(kernelSize);
596
- const int num_x_output = last_x - first_x + 1;
597
-
598
- const int first_plane = blockIdx.y * blockDim.y;
599
- const int plane_stride = blockDim.y * gridDim.y;
600
-
601
- for (int p = first_plane + threadIdx.y; p < numPlanes; p += plane_stride) {
602
- // Load inputs to shared memory
603
- const int plane_input_offset = indexMapper.mapGpuInputPlaneToTensorInputOffset(p);
604
- const int plane_kernel_offset = threadIdx.y * num_x_input;
605
- #pragma unroll
606
- for (int i = threadIdx.x; i < num_x_input; i += blockDim.x) {
607
- const int tensor_index = plane_input_offset + indexMapper.mapGpuInputKernelToTensorInputOffset(i+first_x);
608
- s[i + plane_kernel_offset] = eval.coeff(tensor_index);
609
- }
610
-
611
- __syncthreads();
612
-
613
- // Compute the convolution
614
- const int plane_output_offset = indexMapper.mapGpuOutputPlaneToTensorOutputOffset(p);
615
-
616
- #pragma unroll
617
- for (int i = threadIdx.x; i < num_x_output; i += blockDim.x) {
618
- const int kernel_offset = plane_kernel_offset + i;
619
- float result = 0.0f;
620
- #pragma unroll
621
- for (int k = 0; k < GetKernelSize<StaticKernelSize>()(kernelSize); ++k) {
622
- result += s[k + kernel_offset] * kernel[k];
623
- }
624
- const int tensor_index = plane_output_offset + indexMapper.mapGpuOutputKernelToTensorOutputOffset(i+first_x);
625
- buffer[tensor_index] = result;
626
- }
627
- __syncthreads();
628
- }
629
- };
630
-
631
- template <typename InputEvaluator, typename Index, typename InputDims,
632
- int StaticKernelSizeX, int StaticKernelSizeY>
633
- __global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void EigenConvolutionKernel2D(
634
- InputEvaluator eval,
635
- const internal::IndexMapper<Index, InputDims, 2, InputEvaluator::Layout>
636
- indexMapper,
637
- const float* __restrict kernel, const int numPlanes, const int numX,
638
- const int maxX, const int numY, const int maxY, const int kernelSizeX,
639
- const int kernelSizeY, float* buffer) {
640
- #if defined(EIGEN_HIPCC)
641
- HIP_DYNAMIC_SHARED(float, s)
642
- #else
643
- extern __shared__ float s[];
644
- #endif
645
-
646
- const int first_x = blockIdx.x * maxX;
647
- const int last_x = (first_x + maxX < numX ? first_x + maxX : numX) - 1;
648
- const int num_x_input = last_x - first_x + GetKernelSize<StaticKernelSizeX>()(kernelSizeX);
649
- const int num_x_output = last_x - first_x + 1;
650
-
651
- const int first_y = blockIdx.y * maxY;
652
- const int last_y = (first_y + maxY < numY ? first_y + maxY : numY) - 1;
653
- const int num_y_input = last_y - first_y + GetKernelSize<StaticKernelSizeY>()(kernelSizeY);
654
- const int num_y_output = last_y - first_y + 1;
655
-
656
- const int first_plane = blockIdx.z * blockDim.z;
657
- const int plane_stride = blockDim.z * gridDim.z;
658
-
659
- for (int p = first_plane + threadIdx.z; p < numPlanes; p += plane_stride) {
660
-
661
- const int plane_input_offset = indexMapper.mapGpuInputPlaneToTensorInputOffset(p);
662
- const int plane_kernel_offset = threadIdx.z * num_y_input;
663
-
664
- // Load inputs to shared memory
665
- #pragma unroll
666
- for (int j = threadIdx.y; j < num_y_input; j += blockDim.y) {
667
- const int input_offset = num_x_input * (j + plane_kernel_offset);
668
- #pragma unroll
669
- for (int i = threadIdx.x; i < num_x_input; i += blockDim.x) {
670
- const int tensor_index = plane_input_offset + indexMapper.mapGpuInputKernelToTensorInputOffset(i+first_x, j+first_y);
671
- s[i + input_offset] = eval.coeff(tensor_index);
672
- }
673
- }
674
-
675
- __syncthreads();
676
-
677
- // Convolution
678
- const int plane_output_offset = indexMapper.mapGpuOutputPlaneToTensorOutputOffset(p);
679
-
680
- #pragma unroll
681
- for (int j = threadIdx.y; j < num_y_output; j += blockDim.y) {
682
- #pragma unroll
683
- for (int i = threadIdx.x; i < num_x_output; i += blockDim.x) {
684
- float result = 0.0f;
685
- #pragma unroll
686
- for (int l = 0; l < GetKernelSize<StaticKernelSizeY>()(kernelSizeY); ++l) {
687
- const int kernel_offset = kernelSizeX * l;
688
- const int input_offset = i + num_x_input * (j + l + plane_kernel_offset);
689
- #pragma unroll
690
- for (int k = 0; k < GetKernelSize<StaticKernelSizeX>()(kernelSizeX); ++k) {
691
- result += s[k + input_offset] * kernel[k + kernel_offset];
692
- }
693
- }
694
- const int tensor_index = plane_output_offset + indexMapper.mapGpuOutputKernelToTensorOutputOffset(i+first_x, j+first_y);
695
- buffer[tensor_index] = result;
696
- }
697
- }
698
-
699
- __syncthreads();
700
- }
701
- };
702
-
703
- template <typename InputEvaluator, typename Index, typename InputDims>
704
- __global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void EigenConvolutionKernel3D(
705
- InputEvaluator eval,
706
- const internal::IndexMapper<Index, InputDims, 3, InputEvaluator::Layout>
707
- indexMapper,
708
- const float* __restrict kernel, const size_t numPlanes, const size_t numX,
709
- const size_t maxX, const size_t numY, const size_t maxY, const size_t numZ,
710
- const size_t maxZ, const size_t kernelSizeX, const size_t kernelSizeY,
711
- const size_t kernelSizeZ, float* buffer) {
712
- #if defined(EIGEN_HIPCC)
713
- HIP_DYNAMIC_SHARED(float, s)
714
- #else
715
- extern __shared__ float s[];
716
- #endif
717
-
718
- // Load inputs to shared memory
719
- const int first_x = blockIdx.x * maxX;
720
- const int last_x = (first_x + maxX < numX ? first_x + maxX : numX) - 1;
721
- const int num_x_input = last_x - first_x + kernelSizeX;
722
-
723
- const int first_y = blockIdx.y * maxY;
724
- const int last_y = (first_y + maxY < numY ? first_y + maxY : numY) - 1;
725
- const int num_y_input = last_y - first_y + kernelSizeY;
726
-
727
- const int first_z = blockIdx.z * maxZ;
728
- const int last_z = (first_z + maxZ < numZ ? first_z + maxZ : numZ) - 1;
729
- const int num_z_input = last_z - first_z + kernelSizeZ;
730
-
731
- for (int p = 0; p < numPlanes; ++p) {
732
-
733
- const int plane_input_offset = indexMapper.mapGpuInputPlaneToTensorInputOffset(p);
734
- const int plane_kernel_offset = 0;
735
-
736
- for (int k = threadIdx.z; k < num_z_input; k += blockDim.z) {
737
- for (int j = threadIdx.y; j < num_y_input; j += blockDim.y) {
738
- for (int i = threadIdx.x; i < num_x_input; i += blockDim.x) {
739
- const int tensor_index = plane_input_offset + indexMapper.mapGpuInputKernelToTensorInputOffset(i+first_x, j+first_y, k+first_z);
740
- s[i + num_x_input * (j + num_y_input * (k + plane_kernel_offset))] = eval.coeff(tensor_index);
741
- }
742
- }
743
- }
744
-
745
- __syncthreads();
746
-
747
- // Convolution
748
- const int num_z_output = last_z - first_z + 1;
749
- const int num_y_output = last_y - first_y + 1;
750
- const int num_x_output = last_x - first_x + 1;
751
- const int plane_output_offset = indexMapper.mapGpuOutputPlaneToTensorOutputOffset(p);
752
-
753
- for (int k = threadIdx.z; k < num_z_output; k += blockDim.z) {
754
- for (int j = threadIdx.y; j < num_y_output; j += blockDim.y) {
755
- for (int i = threadIdx.x; i < num_x_output; i += blockDim.x) {
756
- float result = 0.0f;
757
- for (int n = 0; n < kernelSizeZ; ++n) {
758
- for (int m = 0; m < kernelSizeY; ++m) {
759
- for (int l = 0; l < kernelSizeX; ++l) {
760
- result += s[i + l + num_x_input * (j + m + num_y_input * (k + n + plane_kernel_offset))] * kernel[l + kernelSizeX * (m + kernelSizeY * n)];
761
- }
762
- }
763
- }
764
- const int tensor_index = plane_output_offset + indexMapper.mapGpuOutputKernelToTensorOutputOffset(i+first_x, j+first_y, k+first_z);
765
- buffer[tensor_index] = result;
766
- }
767
- }
768
- }
769
- __syncthreads();
770
- }
771
- };
772
-
773
-
774
-
775
- template<typename Indices, typename InputArgType, typename KernelArgType>
776
- struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelArgType>, GpuDevice>
777
- {
778
- typedef TensorConvolutionOp<Indices, InputArgType, KernelArgType> XprType;
779
-
780
- static const int NumDims = internal::array_size<typename TensorEvaluator<InputArgType, GpuDevice>::Dimensions>::value;
781
- static const int NumKernelDims = internal::array_size<Indices>::value;
782
- typedef typename XprType::Index Index;
783
- typedef DSizes<Index, NumDims> Dimensions;
784
- typedef typename TensorEvaluator<KernelArgType, GpuDevice>::Dimensions KernelDimensions;
785
-
786
- enum {
787
- IsAligned = TensorEvaluator<InputArgType, GpuDevice>::IsAligned & TensorEvaluator<KernelArgType, GpuDevice>::IsAligned,
788
- PacketAccess = false,
789
- BlockAccess = false,
790
- PreferBlockAccess = false,
791
- Layout = TensorEvaluator<InputArgType, GpuDevice>::Layout,
792
- CoordAccess = false, // to be implemented
793
- RawAccess = false
794
- };
795
-
796
- //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
797
- typedef internal::TensorBlockNotImplemented TensorBlock;
798
- //===--------------------------------------------------------------------===//
799
-
800
- TensorEvaluator(const XprType& op, const GpuDevice& device)
801
- : m_inputImpl(op.inputExpression(), device), m_kernelImpl(op.kernelExpression(), device), m_kernelArg(op.kernelExpression()), m_indices(op.indices()), m_buf(NULL), m_kernel(NULL), m_local_kernel(false), m_device(device)
802
- {
803
- EIGEN_STATIC_ASSERT((static_cast<int>(TensorEvaluator<InputArgType, GpuDevice>::Layout) == static_cast<int>(TensorEvaluator<KernelArgType, GpuDevice>::Layout)), YOU_MADE_A_PROGRAMMING_MISTAKE);
804
-
805
- const typename TensorEvaluator<InputArgType, GpuDevice>::Dimensions& input_dims = m_inputImpl.dimensions();
806
- const typename TensorEvaluator<KernelArgType, GpuDevice>::Dimensions& kernel_dims = m_kernelImpl.dimensions();
807
-
808
- m_dimensions = m_inputImpl.dimensions();
809
- for (int i = 0; i < NumKernelDims; ++i) {
810
- const Index index = op.indices()[i];
811
- const Index input_dim = input_dims[index];
812
- const Index kernel_dim = kernel_dims[i];
813
- const Index result_dim = input_dim - kernel_dim + 1;
814
- m_dimensions[index] = result_dim;
815
- }
816
- }
817
-
818
- typedef typename XprType::CoeffReturnType CoeffReturnType;
819
- typedef typename PacketType<CoeffReturnType, GpuDevice>::type PacketReturnType;
820
- typedef typename InputArgType::Scalar Scalar;
821
- static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
822
-
823
- EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_dimensions; }
824
-
825
- EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* data) {
826
- preloadKernel();
827
- m_inputImpl.evalSubExprsIfNeeded(NULL);
828
- if (data) {
829
- executeEval(data);
830
- return false;
831
- } else {
832
- m_buf = (Scalar*)m_device.allocate(dimensions().TotalSize() * sizeof(Scalar));
833
- executeEval(m_buf);
834
- return true;
835
- }
836
- }
837
-
838
- EIGEN_STRONG_INLINE void cleanup() {
839
- m_inputImpl.cleanup();
840
- if (m_buf) {
841
- m_device.deallocate(m_buf);
842
- m_buf = NULL;
843
- }
844
- if (m_local_kernel) {
845
- m_device.deallocate((void*)m_kernel);
846
- m_local_kernel = false;
847
- }
848
- m_kernel = NULL;
849
- }
850
-
851
- EIGEN_STRONG_INLINE void preloadKernel() {
852
- // Don't make a local copy of the kernel unless we have to (i.e. it's an
853
- // expression that needs to be evaluated)
854
- const Scalar* in_place = m_kernelImpl.data();
855
- if (in_place) {
856
- m_kernel = in_place;
857
- m_local_kernel = false;
858
- } else {
859
- size_t kernel_sz = m_kernelImpl.dimensions().TotalSize() * sizeof(Scalar);
860
- Scalar* local = (Scalar*)m_device.allocate(kernel_sz);
861
- typedef TensorEvalToOp<const KernelArgType> EvalTo;
862
- EvalTo evalToTmp(local, m_kernelArg);
863
- const bool PacketAccess = internal::IsVectorizable<GpuDevice, KernelArgType>::value;
864
- internal::TensorExecutor<const EvalTo, GpuDevice, PacketAccess>::run(evalToTmp, m_device);
865
-
866
- m_kernel = local;
867
- m_local_kernel = true;
868
- }
869
- }
870
-
871
- static unsigned int ceil(unsigned int num, unsigned int denom) {
872
- const unsigned int rounded_toward_zero = num / denom;
873
- if (num > rounded_toward_zero * denom) {
874
- return rounded_toward_zero + 1;
875
- }
876
- return rounded_toward_zero;
877
- }
878
-
879
- void executeEval(Scalar* data) const {
880
- typedef typename TensorEvaluator<InputArgType, GpuDevice>::Dimensions InputDims;
881
-
882
- const int maxSharedMem = m_device.sharedMemPerBlock();
883
- const int maxThreadsPerBlock = m_device.maxGpuThreadsPerBlock();
884
- const int maxBlocksPerProcessor = m_device.maxGpuThreadsPerMultiProcessor() / maxThreadsPerBlock;
885
- const int numMultiProcessors = m_device.getNumGpuMultiProcessors();
886
- const int warpSize = 32;
887
-
888
- switch (NumKernelDims) {
889
- case 1: {
890
- const int kernel_size = m_kernelImpl.dimensions().TotalSize();
891
-
892
- const int numX = dimensions()[m_indices[0]];
893
- const int numP = dimensions().TotalSize() / numX;
894
- int maxX;
895
- dim3 block_size;
896
-
897
- const int single_stride_dim =
898
- static_cast<int>(Layout) == static_cast<int>(ColMajor)
899
- ? 0
900
- : m_inputImpl.dimensions().rank() - 1;
901
- if (m_indices[0] == single_stride_dim) {
902
- // Maximum the reuse
903
- const int inner_dim = ((maxSharedMem / (sizeof(Scalar)) - kernel_size + 1 + 31) / 32) * 32;
904
- maxX = numext::mini<int>(inner_dim, numX);
905
- const int maxP = numext::mini<int>(maxSharedMem / ((kernel_size - 1 + maxX) * sizeof(Scalar)), numP);
906
- block_size.x = numext::mini(maxThreadsPerBlock, maxX);
907
- block_size.y = numext::mini<int>(maxThreadsPerBlock / block_size.x, maxP);
908
- }
909
- else {
910
- // Read as much as possible alongside the inner most dimension, that is the plane
911
- const int inner_dim = maxSharedMem / ((warpSize + kernel_size) * sizeof(Scalar));
912
- const int maxP = numext::mini<int>(inner_dim, numP);
913
- maxX = numext::mini<int>(maxSharedMem / (inner_dim * sizeof(Scalar)) - kernel_size + 1, numX);
914
-
915
- block_size.x = numext::mini(warpSize, maxX);
916
- block_size.y = numext::mini<int>(maxThreadsPerBlock/block_size.x, maxP);
917
- }
918
-
919
- const int shared_mem = block_size.y * (maxX + kernel_size - 1) * sizeof(Scalar);
920
- gpu_assert(shared_mem <= maxSharedMem);
921
-
922
- const int num_x_blocks = ceil(numX, maxX);
923
- const int blocksPerProcessor = numext::mini(maxBlocksPerProcessor, maxSharedMem / shared_mem);
924
- const int num_y_blocks = ceil(numMultiProcessors * blocksPerProcessor, num_x_blocks);
925
-
926
- dim3 num_blocks(num_x_blocks, numext::mini<int>(num_y_blocks, ceil(numP, block_size.y)));
927
-
928
-
929
- //cout << "launching 1D kernel with block_size.x: " << block_size.x << " block_size.y: " << block_size.y << " num_blocks.x: " << num_blocks.x << " num_blocks.y: " << num_blocks.y << " maxX: " << maxX << " shared_mem: " << shared_mem << " in stream " << m_device.stream() << endl;
930
-
931
- const array<Index, 1> indices(m_indices[0]);
932
- const array<Index, 1> kernel_dims(m_kernelImpl.dimensions()[0]);
933
- internal::IndexMapper<Index, InputDims, 1, Layout> indexMapper(
934
- m_inputImpl.dimensions(), kernel_dims, indices);
935
- switch(kernel_size) {
936
- case 4: {
937
- LAUNCH_GPU_KERNEL((EigenConvolutionKernel1D<TensorEvaluator<InputArgType, GpuDevice>, Index, InputDims, 4>), num_blocks, block_size, shared_mem, m_device, m_inputImpl, indexMapper, m_kernel, numP, numX, maxX, 4, data);
938
- break;
939
- }
940
- case 7: {
941
- LAUNCH_GPU_KERNEL((EigenConvolutionKernel1D<TensorEvaluator<InputArgType, GpuDevice>, Index, InputDims, 7>), num_blocks, block_size, shared_mem, m_device, m_inputImpl, indexMapper, m_kernel, numP, numX, maxX, 7, data);
942
- break;
943
- }
944
- default: {
945
- LAUNCH_GPU_KERNEL((EigenConvolutionKernel1D<TensorEvaluator<InputArgType, GpuDevice>, Index, InputDims, Dynamic>), num_blocks, block_size, shared_mem, m_device, m_inputImpl, indexMapper, m_kernel, numP, numX, maxX, kernel_size, data);
946
- }
947
- }
948
- break;
949
- }
950
-
951
- case 2: {
952
- const int idxX =
953
- static_cast<int>(Layout) == static_cast<int>(ColMajor) ? 0 : 1;
954
- const int idxY =
955
- static_cast<int>(Layout) == static_cast<int>(ColMajor) ? 1 : 0;
956
- const int kernel_size_x = m_kernelImpl.dimensions()[idxX];
957
- const int kernel_size_y = m_kernelImpl.dimensions()[idxY];
958
-
959
- const int numX = dimensions()[m_indices[idxX]];
960
- const int numY = dimensions()[m_indices[idxY]];
961
- const int numP = dimensions().TotalSize() / (numX*numY);
962
-
963
- const float scaling_factor = sqrtf(static_cast<float>(maxSharedMem) / (sizeof(Scalar) * kernel_size_y * kernel_size_x));
964
-
965
- // Snap maxX to warp size
966
- int inner_dim = ((static_cast<int>(scaling_factor * kernel_size_x) - kernel_size_x + 1 + 32) / 32) * 32;
967
- const int maxX = numext::mini<int>(inner_dim, numX);
968
- const int maxY = numext::mini<int>(maxSharedMem / (sizeof(Scalar) * (maxX + kernel_size_x - 1)) - kernel_size_y + 1, numY);
969
- const int maxP = numext::mini<int>(maxSharedMem / ((kernel_size_x - 1 + maxX) * (kernel_size_y - 1 + maxY) * sizeof(Scalar)), numP);
970
-
971
- dim3 block_size;
972
- block_size.x = numext::mini(1024, maxX);
973
- block_size.y = numext::mini<int>(1024/block_size.x, maxY);
974
- block_size.z = numext::mini<int>(1024/(block_size.x*block_size.y), maxP);
975
-
976
- const int shared_mem = block_size.z * (maxX + kernel_size_x - 1) * (maxY + kernel_size_y - 1) * sizeof(Scalar);
977
- gpu_assert(shared_mem <= maxSharedMem);
978
-
979
- const int num_x_blocks = ceil(numX, maxX);
980
- const int num_y_blocks = ceil(numY, maxY);
981
- const int blocksPerProcessor = numext::mini(maxBlocksPerProcessor, maxSharedMem / shared_mem);
982
- const int num_z_blocks = ceil(numMultiProcessors * blocksPerProcessor, num_x_blocks * num_y_blocks);
983
-
984
- dim3 num_blocks(num_x_blocks, num_y_blocks, numext::mini<int>(num_z_blocks, ceil(numP, block_size.z)));
985
-
986
-
987
- //cout << "launching 2D kernel with block_size.x: " << block_size.x << " block_size.y: " << block_size.y << " block_size.z: " << block_size.z << " num_blocks.x: " << num_blocks.x << " num_blocks.y: " << num_blocks.y << " num_blocks.z: " << num_blocks.z << " maxX: " << maxX << " maxY: " << maxY << " maxP: " << maxP << " shared_mem: " << shared_mem << " in stream " << m_device.stream() << endl;
988
-
989
- const array<Index, 2> indices(m_indices[idxX], m_indices[idxY]);
990
- const array<Index, 2> kernel_dims(m_kernelImpl.dimensions()[idxX],
991
- m_kernelImpl.dimensions()[idxY]);
992
- internal::IndexMapper<Index, InputDims, 2, Layout> indexMapper(
993
- m_inputImpl.dimensions(), kernel_dims, indices);
994
- switch (kernel_size_x) {
995
- case 4: {
996
- switch (kernel_size_y) {
997
- case 7: {
998
- LAUNCH_GPU_KERNEL((EigenConvolutionKernel2D<TensorEvaluator<InputArgType, GpuDevice>, Index, InputDims, 4, 7>), num_blocks, block_size, shared_mem, m_device, m_inputImpl, indexMapper, m_kernel, numP, numX, maxX, numY, maxY, 4, 7, data);
999
- break;
1000
- }
1001
- default: {
1002
- LAUNCH_GPU_KERNEL((EigenConvolutionKernel2D<TensorEvaluator<InputArgType, GpuDevice>, Index, InputDims, 4, Dynamic>), num_blocks, block_size, shared_mem, m_device, m_inputImpl, indexMapper, m_kernel, numP, numX, maxX, numY, maxY, 4, kernel_size_y, data);
1003
- break;
1004
- }
1005
- }
1006
- break;
1007
- }
1008
- case 7: {
1009
- switch (kernel_size_y) {
1010
- case 4: {
1011
- LAUNCH_GPU_KERNEL((EigenConvolutionKernel2D<TensorEvaluator<InputArgType, GpuDevice>, Index, InputDims, 7, 4>), num_blocks, block_size, shared_mem, m_device, m_inputImpl, indexMapper, m_kernel, numP, numX, maxX, numY, maxY, 7, 4, data);
1012
- break;
1013
- }
1014
- default: {
1015
- LAUNCH_GPU_KERNEL((EigenConvolutionKernel2D<TensorEvaluator<InputArgType, GpuDevice>, Index, InputDims, 7, Dynamic>), num_blocks, block_size, shared_mem, m_device, m_inputImpl, indexMapper, m_kernel, numP, numX, maxX, numY, maxY, 7, kernel_size_y, data);
1016
- break;
1017
- }
1018
- }
1019
- break;
1020
- }
1021
- default: {
1022
- LAUNCH_GPU_KERNEL((EigenConvolutionKernel2D<TensorEvaluator<InputArgType, GpuDevice>, Index, InputDims, Dynamic, Dynamic>), num_blocks, block_size, shared_mem, m_device, m_inputImpl, indexMapper, m_kernel, numP, numX, maxX, numY, maxY, kernel_size_x, kernel_size_y, data);
1023
- break;
1024
- }
1025
- }
1026
- break;
1027
- }
1028
-
1029
- case 3: {
1030
- const int idxX =
1031
- static_cast<int>(Layout) == static_cast<int>(ColMajor) ? 0 : 2;
1032
- const int idxY =
1033
- static_cast<int>(Layout) == static_cast<int>(ColMajor) ? 1 : 1;
1034
- const int idxZ =
1035
- static_cast<int>(Layout) == static_cast<int>(ColMajor) ? 2 : 0;
1036
-
1037
- const int kernel_size_x = m_kernelImpl.dimensions()[idxX];
1038
- const int kernel_size_y = m_kernelImpl.dimensions()[idxY];
1039
- const int kernel_size_z = m_kernelImpl.dimensions()[idxZ];
1040
-
1041
- const int numX = dimensions()[m_indices[idxX]];
1042
- const int numY = dimensions()[m_indices[idxY]];
1043
- const int numZ = dimensions()[m_indices[idxZ]];
1044
- const int numP = dimensions().TotalSize() / (numX*numY*numZ);
1045
-
1046
- const int maxX = numext::mini<int>(128, numext::mini<int>(maxSharedMem / (sizeof(Scalar) * kernel_size_y * kernel_size_z) - kernel_size_x + 1, numX));
1047
- const int maxY = numext::mini<int>(128, numext::mini<int>(maxSharedMem / (sizeof(Scalar) * (maxX + kernel_size_x - 1) * kernel_size_z) - kernel_size_y + 1, numY));
1048
- const int maxZ = numext::mini<int>(128, numext::mini<int>(maxSharedMem / (sizeof(Scalar) * (maxX + kernel_size_x - 1) * (maxY + kernel_size_y - 1)) - kernel_size_z + 1, numZ));
1049
-
1050
- dim3 block_size;
1051
- block_size.x = numext::mini(32, maxX);
1052
- block_size.y = numext::mini(32, maxY);
1053
- block_size.z = numext::mini<int>(1024/(block_size.x*block_size.y), maxZ);
1054
- dim3 num_blocks(ceil(numX, maxX), ceil(numY, maxY), ceil(numZ, maxZ));
1055
-
1056
- const int shared_mem = (maxX + kernel_size_x - 1) * (maxY + kernel_size_y - 1) * (maxZ + kernel_size_z - 1) * sizeof(Scalar);
1057
- gpu_assert(shared_mem <= maxSharedMem);
1058
-
1059
- //cout << "launching 3D kernel with block_size.x: " << block_size.x << " block_size.y: " << block_size.y << " block_size.z: " << block_size.z << " num_blocks.x: " << num_blocks.x << " num_blocks.y: " << num_blocks.y << " num_blocks.z: " << num_blocks.z << " shared_mem: " << shared_mem << " in stream " << m_device.stream() << endl;
1060
- const array<Index, 3> indices(m_indices[idxX], m_indices[idxY],
1061
- m_indices[idxZ]);
1062
- const array<Index, 3> kernel_dims(m_kernelImpl.dimensions()[idxX],
1063
- m_kernelImpl.dimensions()[idxY],
1064
- m_kernelImpl.dimensions()[idxZ]);
1065
- internal::IndexMapper<Index, InputDims, 3, Layout> indexMapper(
1066
- m_inputImpl.dimensions(), kernel_dims, indices);
1067
-
1068
- LAUNCH_GPU_KERNEL((EigenConvolutionKernel3D<TensorEvaluator<InputArgType, GpuDevice>, Index, InputDims>), num_blocks, block_size, shared_mem, m_device, m_inputImpl, indexMapper, m_kernel, numP, numX, maxX, numY, maxY, numZ, maxZ, kernel_size_x, kernel_size_y, kernel_size_z, data);
1069
- break;
1070
- }
1071
-
1072
- default: {
1073
- EIGEN_STATIC_ASSERT((NumKernelDims >= 1 && NumKernelDims <= 3), THIS_METHOD_IS_ONLY_FOR_OBJECTS_OF_A_SPECIFIC_SIZE);
1074
- }
1075
- }
1076
- }
1077
-
1078
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
1079
- {
1080
- eigen_assert(m_buf);
1081
- eigen_assert(index < m_dimensions.TotalSize());
1082
- return m_buf[index];
1083
- }
1084
-
1085
- template<int LoadMode>
1086
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(const Index index) const
1087
- {
1088
- eigen_assert(m_buf);
1089
- eigen_assert(index < m_dimensions.TotalSize());
1090
- return internal::ploadt<PacketReturnType, LoadMode>(m_buf+index);
1091
- }
1092
-
1093
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost
1094
- costPerCoeff(bool vectorized) const {
1095
- // TODO(rmlarsen): FIXME: For now, this is just a copy of the CPU cost
1096
- // model.
1097
- const double kernel_size = m_kernelImpl.dimensions().TotalSize();
1098
- // We ignore the use of fused multiply-add.
1099
- const double convolve_compute_cost =
1100
- TensorOpCost::AddCost<Scalar>() + TensorOpCost::MulCost<Scalar>();
1101
- const double firstIndex_compute_cost =
1102
- NumDims *
1103
- (2 * TensorOpCost::AddCost<Index>() + 2 * TensorOpCost::MulCost<Index>() +
1104
- TensorOpCost::DivCost<Index>());
1105
- return TensorOpCost(0, 0, firstIndex_compute_cost, vectorized, PacketSize) +
1106
- kernel_size * (m_inputImpl.costPerCoeff(vectorized) +
1107
- m_kernelImpl.costPerCoeff(vectorized) +
1108
- TensorOpCost(0, 0, convolve_compute_cost, vectorized,
1109
- PacketSize));
1110
- }
1111
-
1112
- private:
1113
- // No assignment (copies are needed by the kernels)
1114
- TensorEvaluator& operator = (const TensorEvaluator&);
1115
-
1116
- TensorEvaluator<InputArgType, GpuDevice> m_inputImpl;
1117
- TensorEvaluator<KernelArgType, GpuDevice> m_kernelImpl;
1118
- KernelArgType m_kernelArg;
1119
- Indices m_indices;
1120
- Dimensions m_dimensions;
1121
- Scalar* m_buf;
1122
- const Scalar* m_kernel;
1123
- bool m_local_kernel;
1124
-
1125
- const GpuDevice& m_device;
1126
- };
1127
- #endif
1128
-
1129
-
1130
- } // end namespace Eigen
1131
-
1132
- #endif // EIGEN_CXX11_TENSOR_TENSOR_CONVOLUTION_H