sequenzo 0.1.18__cp39-cp39-macosx_10_9_universal2.whl → 0.1.20__cp39-cp39-macosx_10_9_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sequenzo might be problematic. Click here for more details.

Files changed (360) hide show
  1. sequenzo/__init__.py +39 -7
  2. sequenzo/big_data/clara/utils/get_weighted_diss.c +157 -157
  3. sequenzo/big_data/clara/utils/get_weighted_diss.cpython-39-darwin.so +0 -0
  4. sequenzo/clustering/KMedoids.py +39 -0
  5. sequenzo/clustering/hierarchical_clustering.py +108 -6
  6. sequenzo/define_sequence_data.py +10 -1
  7. sequenzo/dissimilarity_measures/get_distance_matrix.py +2 -3
  8. sequenzo/dissimilarity_measures/get_substitution_cost_matrix.py +1 -1
  9. sequenzo/dissimilarity_measures/utils/get_sm_trate_substitution_cost_matrix.c +157 -157
  10. sequenzo/dissimilarity_measures/utils/get_sm_trate_substitution_cost_matrix.cpython-39-darwin.so +0 -0
  11. sequenzo/dissimilarity_measures/utils/seqconc.c +157 -157
  12. sequenzo/dissimilarity_measures/utils/seqconc.cpython-39-darwin.so +0 -0
  13. sequenzo/dissimilarity_measures/utils/seqdss.c +157 -157
  14. sequenzo/dissimilarity_measures/utils/seqdss.cpython-39-darwin.so +0 -0
  15. sequenzo/dissimilarity_measures/utils/seqdur.c +157 -157
  16. sequenzo/dissimilarity_measures/utils/seqdur.cpython-39-darwin.so +0 -0
  17. sequenzo/dissimilarity_measures/utils/seqlength.c +157 -157
  18. sequenzo/dissimilarity_measures/utils/seqlength.cpython-39-darwin.so +0 -0
  19. sequenzo/multidomain/cat.py +0 -53
  20. sequenzo/multidomain/dat.py +11 -3
  21. sequenzo/multidomain/idcd.py +0 -3
  22. sequenzo/multidomain/linked_polyad.py +0 -1
  23. sequenzo/openmp_setup.py +233 -0
  24. sequenzo/visualization/plot_transition_matrix.py +21 -22
  25. {sequenzo-0.1.18.dist-info → sequenzo-0.1.20.dist-info}/METADATA +71 -10
  26. sequenzo-0.1.20.dist-info/RECORD +215 -0
  27. sequenzo/dissimilarity_measures/setup.py +0 -35
  28. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Cholesky/LDLT.h +0 -688
  29. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Cholesky/LLT.h +0 -558
  30. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Cholesky/LLT_LAPACKE.h +0 -99
  31. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +0 -682
  32. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +0 -346
  33. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +0 -462
  34. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/ComplexSchur_LAPACKE.h +0 -91
  35. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/EigenSolver.h +0 -622
  36. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h +0 -418
  37. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +0 -226
  38. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +0 -374
  39. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +0 -158
  40. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/RealQZ.h +0 -657
  41. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/RealSchur.h +0 -558
  42. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/RealSchur_LAPACKE.h +0 -77
  43. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +0 -904
  44. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h +0 -87
  45. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +0 -561
  46. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/AlignedBox.h +0 -486
  47. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/AngleAxis.h +0 -247
  48. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/EulerAngles.h +0 -114
  49. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/Homogeneous.h +0 -501
  50. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/Hyperplane.h +0 -282
  51. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/OrthoMethods.h +0 -235
  52. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/ParametrizedLine.h +0 -232
  53. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/Quaternion.h +0 -870
  54. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/Rotation2D.h +0 -199
  55. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/RotationBase.h +0 -206
  56. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/Scaling.h +0 -188
  57. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/Transform.h +0 -1563
  58. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/Translation.h +0 -202
  59. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/Umeyama.h +0 -166
  60. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/arch/Geometry_SIMD.h +0 -168
  61. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Householder/BlockHouseholder.h +0 -110
  62. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Householder/Householder.h +0 -176
  63. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Householder/HouseholderSequence.h +0 -545
  64. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +0 -226
  65. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +0 -212
  66. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +0 -229
  67. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +0 -394
  68. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +0 -453
  69. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +0 -444
  70. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +0 -198
  71. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +0 -117
  72. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Jacobi/Jacobi.h +0 -483
  73. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/KLUSupport/KLUSupport.h +0 -358
  74. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/LU/Determinant.h +0 -117
  75. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/LU/FullPivLU.h +0 -877
  76. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/LU/InverseImpl.h +0 -432
  77. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/LU/PartialPivLU.h +0 -624
  78. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/LU/PartialPivLU_LAPACKE.h +0 -83
  79. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/LU/arch/InverseSize4.h +0 -351
  80. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/MetisSupport/MetisSupport.h +0 -137
  81. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/OrderingMethods/Amd.h +0 -435
  82. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +0 -1863
  83. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/OrderingMethods/Ordering.h +0 -153
  84. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +0 -678
  85. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +0 -545
  86. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/QR/ColPivHouseholderQR.h +0 -674
  87. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/QR/ColPivHouseholderQR_LAPACKE.h +0 -97
  88. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +0 -635
  89. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/QR/FullPivHouseholderQR.h +0 -713
  90. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/QR/HouseholderQR.h +0 -434
  91. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/QR/HouseholderQR_LAPACKE.h +0 -68
  92. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +0 -335
  93. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SVD/BDCSVD.h +0 -1366
  94. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SVD/JacobiSVD.h +0 -812
  95. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SVD/JacobiSVD_LAPACKE.h +0 -91
  96. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SVD/SVDBase.h +0 -376
  97. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SVD/UpperBidiagonalization.h +0 -414
  98. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +0 -697
  99. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +0 -174
  100. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/AmbiVector.h +0 -378
  101. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/CompressedStorage.h +0 -274
  102. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +0 -352
  103. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/MappedSparseMatrix.h +0 -67
  104. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseAssign.h +0 -270
  105. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseBlock.h +0 -571
  106. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseColEtree.h +0 -206
  107. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +0 -370
  108. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +0 -722
  109. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +0 -150
  110. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +0 -342
  111. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseDiagonalProduct.h +0 -138
  112. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseDot.h +0 -98
  113. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseFuzzy.h +0 -29
  114. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseMap.h +0 -305
  115. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseMatrix.h +0 -1518
  116. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +0 -398
  117. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparsePermutation.h +0 -178
  118. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseProduct.h +0 -181
  119. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseRedux.h +0 -49
  120. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseRef.h +0 -397
  121. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +0 -659
  122. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseSolverBase.h +0 -124
  123. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseSparseProductWithPruning.h +0 -198
  124. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseTranspose.h +0 -92
  125. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseTriangularView.h +0 -189
  126. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseUtil.h +0 -186
  127. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseVector.h +0 -478
  128. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseView.h +0 -254
  129. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/TriangularSolver.h +0 -315
  130. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU.h +0 -923
  131. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLUImpl.h +0 -66
  132. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +0 -226
  133. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_Structs.h +0 -110
  134. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +0 -375
  135. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_Utils.h +0 -80
  136. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_column_bmod.h +0 -181
  137. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +0 -179
  138. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h +0 -107
  139. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +0 -280
  140. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h +0 -126
  141. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_kernel_bmod.h +0 -130
  142. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +0 -223
  143. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_panel_dfs.h +0 -258
  144. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_pivotL.h +0 -137
  145. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_pruneL.h +0 -136
  146. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_relax_snode.h +0 -83
  147. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseQR/SparseQR.h +0 -758
  148. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/StlSupport/StdDeque.h +0 -116
  149. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/StlSupport/StdList.h +0 -106
  150. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/StlSupport/StdVector.h +0 -131
  151. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/StlSupport/details.h +0 -84
  152. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +0 -1025
  153. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +0 -642
  154. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/misc/Image.h +0 -82
  155. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/misc/Kernel.h +0 -79
  156. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/misc/RealSvd2x2.h +0 -55
  157. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/misc/blas.h +0 -440
  158. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/misc/lapack.h +0 -152
  159. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/misc/lapacke.h +0 -16292
  160. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/misc/lapacke_mangling.h +0 -17
  161. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +0 -358
  162. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +0 -696
  163. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/plugins/BlockMethods.h +0 -1442
  164. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.h +0 -115
  165. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.h +0 -177
  166. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/plugins/IndexedViewMethods.h +0 -262
  167. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +0 -152
  168. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +0 -95
  169. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/plugins/ReshapedMethods.h +0 -149
  170. sequenzo/dissimilarity_measures/src/eigen/blas/BandTriangularSolver.h +0 -97
  171. sequenzo/dissimilarity_measures/src/eigen/blas/GeneralRank1Update.h +0 -44
  172. sequenzo/dissimilarity_measures/src/eigen/blas/PackedSelfadjointProduct.h +0 -53
  173. sequenzo/dissimilarity_measures/src/eigen/blas/PackedTriangularMatrixVector.h +0 -79
  174. sequenzo/dissimilarity_measures/src/eigen/blas/PackedTriangularSolverVector.h +0 -88
  175. sequenzo/dissimilarity_measures/src/eigen/blas/Rank2Update.h +0 -57
  176. sequenzo/dissimilarity_measures/src/eigen/blas/common.h +0 -175
  177. sequenzo/dissimilarity_measures/src/eigen/blas/f2c/datatypes.h +0 -24
  178. sequenzo/dissimilarity_measures/src/eigen/blas/level1_cplx_impl.h +0 -155
  179. sequenzo/dissimilarity_measures/src/eigen/blas/level1_impl.h +0 -144
  180. sequenzo/dissimilarity_measures/src/eigen/blas/level1_real_impl.h +0 -122
  181. sequenzo/dissimilarity_measures/src/eigen/blas/level2_cplx_impl.h +0 -360
  182. sequenzo/dissimilarity_measures/src/eigen/blas/level2_impl.h +0 -553
  183. sequenzo/dissimilarity_measures/src/eigen/blas/level2_real_impl.h +0 -306
  184. sequenzo/dissimilarity_measures/src/eigen/blas/level3_impl.h +0 -702
  185. sequenzo/dissimilarity_measures/src/eigen/debug/gdb/__init__.py +0 -1
  186. sequenzo/dissimilarity_measures/src/eigen/debug/gdb/printers.py +0 -314
  187. sequenzo/dissimilarity_measures/src/eigen/lapack/lapack_common.h +0 -29
  188. sequenzo/dissimilarity_measures/src/eigen/scripts/relicense.py +0 -69
  189. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/Tensor.h +0 -554
  190. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorArgMax.h +0 -329
  191. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h +0 -247
  192. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h +0 -1176
  193. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h +0 -1559
  194. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h +0 -1093
  195. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h +0 -518
  196. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h +0 -377
  197. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h +0 -1023
  198. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionBlocking.h +0 -73
  199. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionCuda.h +0 -6
  200. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionGpu.h +0 -1413
  201. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionMapper.h +0 -575
  202. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionSycl.h +0 -1650
  203. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h +0 -1679
  204. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h +0 -456
  205. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h +0 -1132
  206. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorConvolutionSycl.h +0 -544
  207. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorCostModel.h +0 -214
  208. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h +0 -347
  209. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h +0 -137
  210. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h +0 -6
  211. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h +0 -104
  212. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceGpu.h +0 -389
  213. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h +0 -1048
  214. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h +0 -409
  215. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDimensionList.h +0 -236
  216. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h +0 -490
  217. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h +0 -236
  218. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h +0 -983
  219. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h +0 -703
  220. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h +0 -388
  221. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h +0 -669
  222. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorFixedSize.h +0 -379
  223. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h +0 -237
  224. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h +0 -191
  225. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h +0 -488
  226. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h +0 -302
  227. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorGlobalFunctions.h +0 -33
  228. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorGpuHipCudaDefines.h +0 -99
  229. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorGpuHipCudaUndefines.h +0 -44
  230. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h +0 -79
  231. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h +0 -603
  232. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h +0 -738
  233. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorInflation.h +0 -247
  234. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorInitializer.h +0 -82
  235. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h +0 -263
  236. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorLayoutSwap.h +0 -216
  237. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorMacros.h +0 -98
  238. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h +0 -327
  239. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h +0 -311
  240. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h +0 -1102
  241. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h +0 -708
  242. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorPatch.h +0 -291
  243. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorRandom.h +0 -322
  244. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h +0 -998
  245. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h +0 -6
  246. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorReductionGpu.h +0 -966
  247. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorReductionSycl.h +0 -582
  248. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorRef.h +0 -454
  249. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h +0 -465
  250. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h +0 -528
  251. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorScanSycl.h +0 -513
  252. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h +0 -471
  253. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorStorage.h +0 -161
  254. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorStriding.h +0 -346
  255. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorTrace.h +0 -303
  256. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h +0 -264
  257. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorUInt128.h +0 -249
  258. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorVolumePatch.h +0 -629
  259. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/TensorSymmetry/DynamicSymmetry.h +0 -293
  260. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/TensorSymmetry/StaticSymmetry.h +0 -236
  261. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/TensorSymmetry/Symmetry.h +0 -338
  262. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/TensorSymmetry/util/TemplateGroupTheory.h +0 -669
  263. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/ThreadPool/Barrier.h +0 -67
  264. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/ThreadPool/EventCount.h +0 -249
  265. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h +0 -486
  266. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/ThreadPool/RunQueue.h +0 -236
  267. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/ThreadPool/ThreadCancel.h +0 -23
  268. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/ThreadPool/ThreadEnvironment.h +0 -40
  269. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/ThreadPool/ThreadLocal.h +0 -301
  270. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/ThreadPool/ThreadPoolInterface.h +0 -48
  271. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/ThreadPool/ThreadYield.h +0 -20
  272. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/util/CXX11Meta.h +0 -537
  273. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/util/CXX11Workarounds.h +0 -88
  274. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/util/EmulateArray.h +0 -261
  275. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/util/MaxSizeVector.h +0 -158
  276. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/AutoDiff/AutoDiffJacobian.h +0 -108
  277. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h +0 -730
  278. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/AutoDiff/AutoDiffVector.h +0 -220
  279. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/BVH/BVAlgorithms.h +0 -293
  280. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/BVH/KdBVH.h +0 -223
  281. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/Eigenvalues/ArpackSelfAdjointEigenSolver.h +0 -790
  282. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/EulerAngles/EulerAngles.h +0 -355
  283. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/EulerAngles/EulerSystem.h +0 -305
  284. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/FFT/ei_fftw_impl.h +0 -261
  285. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/FFT/ei_kissfft_impl.h +0 -449
  286. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/IterativeSolvers/ConstrainedConjGrad.h +0 -187
  287. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/IterativeSolvers/DGMRES.h +0 -511
  288. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/IterativeSolvers/GMRES.h +0 -335
  289. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/IterativeSolvers/IDRS.h +0 -436
  290. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/IterativeSolvers/IncompleteLU.h +0 -90
  291. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/IterativeSolvers/IterationController.h +0 -154
  292. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/IterativeSolvers/MINRES.h +0 -267
  293. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/IterativeSolvers/Scaling.h +0 -193
  294. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/KroneckerProduct/KroneckerTensorProduct.h +0 -305
  295. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/LevenbergMarquardt/LMcovar.h +0 -84
  296. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/LevenbergMarquardt/LMonestep.h +0 -202
  297. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/LevenbergMarquardt/LMpar.h +0 -160
  298. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/LevenbergMarquardt/LMqrsolv.h +0 -188
  299. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/LevenbergMarquardt/LevenbergMarquardt.h +0 -396
  300. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/MatrixFunctions/MatrixExponential.h +0 -441
  301. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/MatrixFunctions/MatrixFunction.h +0 -569
  302. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/MatrixFunctions/MatrixLogarithm.h +0 -373
  303. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/MatrixFunctions/MatrixPower.h +0 -705
  304. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/MatrixFunctions/MatrixSquareRoot.h +0 -368
  305. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/MatrixFunctions/StemFunction.h +0 -117
  306. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/MoreVectorization/MathFunctions.h +0 -95
  307. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/NonLinearOptimization/HybridNonLinearSolver.h +0 -601
  308. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/NonLinearOptimization/LevenbergMarquardt.h +0 -657
  309. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/NonLinearOptimization/chkder.h +0 -66
  310. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/NonLinearOptimization/covar.h +0 -70
  311. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/NonLinearOptimization/dogleg.h +0 -107
  312. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/NonLinearOptimization/fdjac1.h +0 -79
  313. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/NonLinearOptimization/lmpar.h +0 -298
  314. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/NonLinearOptimization/qrsolv.h +0 -91
  315. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/NonLinearOptimization/r1mpyq.h +0 -30
  316. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/NonLinearOptimization/r1updt.h +0 -99
  317. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/NonLinearOptimization/rwupdt.h +0 -49
  318. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/NumericalDiff/NumericalDiff.h +0 -130
  319. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/Polynomials/Companion.h +0 -280
  320. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/Polynomials/PolynomialSolver.h +0 -428
  321. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/Polynomials/PolynomialUtils.h +0 -143
  322. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/Skyline/SkylineInplaceLU.h +0 -352
  323. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/Skyline/SkylineMatrix.h +0 -862
  324. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/Skyline/SkylineMatrixBase.h +0 -212
  325. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/Skyline/SkylineProduct.h +0 -295
  326. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/Skyline/SkylineStorage.h +0 -259
  327. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/Skyline/SkylineUtil.h +0 -89
  328. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SparseExtra/BlockOfDynamicSparseMatrix.h +0 -122
  329. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SparseExtra/BlockSparseMatrix.h +0 -1079
  330. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SparseExtra/DynamicSparseMatrix.h +0 -404
  331. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SparseExtra/MarketIO.h +0 -282
  332. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SparseExtra/MatrixMarketIterator.h +0 -247
  333. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SparseExtra/RandomSetter.h +0 -349
  334. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsArrayAPI.h +0 -286
  335. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsBFloat16.h +0 -68
  336. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsFunctors.h +0 -357
  337. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsHalf.h +0 -66
  338. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsImpl.h +0 -1959
  339. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsPacketMath.h +0 -118
  340. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/HipVectorCompatibility.h +0 -67
  341. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsArrayAPI.h +0 -167
  342. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsBFloat16.h +0 -58
  343. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsFunctors.h +0 -330
  344. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsHalf.h +0 -58
  345. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h +0 -2045
  346. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsPacketMath.h +0 -79
  347. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/arch/AVX/BesselFunctions.h +0 -46
  348. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/arch/AVX/SpecialFunctions.h +0 -16
  349. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/arch/AVX512/BesselFunctions.h +0 -46
  350. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/arch/AVX512/SpecialFunctions.h +0 -16
  351. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/arch/GPU/SpecialFunctions.h +0 -369
  352. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/arch/NEON/BesselFunctions.h +0 -54
  353. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/arch/NEON/SpecialFunctions.h +0 -34
  354. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/Splines/Spline.h +0 -507
  355. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/Splines/SplineFitting.h +0 -431
  356. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/Splines/SplineFwd.h +0 -93
  357. sequenzo-0.1.18.dist-info/RECORD +0 -544
  358. {sequenzo-0.1.18.dist-info → sequenzo-0.1.20.dist-info}/WHEEL +0 -0
  359. {sequenzo-0.1.18.dist-info → sequenzo-0.1.20.dist-info}/licenses/LICENSE +0 -0
  360. {sequenzo-0.1.18.dist-info → sequenzo-0.1.20.dist-info}/top_level.txt +0 -0
@@ -1,582 +0,0 @@
1
- // This file is part of Eigen, a lightweight C++ template library
2
- // for linear algebra.
3
- //
4
- // Mehdi Goli Codeplay Software Ltd.
5
- // Ralph Potter Codeplay Software Ltd.
6
- // Luke Iwanski Codeplay Software Ltd.
7
- // Contact: <eigen@codeplay.com>
8
- //
9
- // This Source Code Form is subject to the terms of the Mozilla
10
- // Public License v. 2.0. If a copy of the MPL was not distributed
11
- // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
12
-
13
- /*****************************************************************
14
- * TensorReductionSycl.h
15
- *
16
- * \brief:
17
- * This is the specialization of the reduction operation. Two phase reduction approach
18
- * is used since the GPU does not have Global Synchronization for global memory among
19
- * different work-group/thread block. To solve the problem, we need to create two kernels
20
- * to reduce the data, where the first kernel reduce the data locally and each local
21
- * workgroup/thread-block save the input data into global memory. In the second phase (global reduction)
22
- * one work-group uses one work-group/thread-block to reduces the intermediate data into one single element.
23
- * Here is an NVIDIA presentation explaining the optimized two phase reduction algorithm on GPU:
24
- * https://developer.download.nvidia.com/assets/cuda/files/reduction.pdf
25
- *
26
- *****************************************************************/
27
-
28
- #ifndef UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSOR_REDUCTION_SYCL_HPP
29
- #define UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSOR_REDUCTION_SYCL_HPP
30
- namespace Eigen {
31
- namespace TensorSycl {
32
- namespace internal {
33
-
34
- template <typename Op, typename CoeffReturnType, typename Index, bool Vectorizable>
35
- struct OpDefiner {
36
- typedef typename Vectorise<CoeffReturnType, Eigen::SyclDevice, Vectorizable>::PacketReturnType PacketReturnType;
37
- typedef Op type;
38
- static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE type get_op(Op &op) { return op; }
39
-
40
- static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType finalise_op(const PacketReturnType &accumulator,
41
- const Index &) {
42
- return accumulator;
43
- }
44
- };
45
-
46
- template <typename CoeffReturnType, typename Index>
47
- struct OpDefiner<Eigen::internal::MeanReducer<CoeffReturnType>, CoeffReturnType, Index, false> {
48
- typedef Eigen::internal::SumReducer<CoeffReturnType> type;
49
- static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE type get_op(Eigen::internal::MeanReducer<CoeffReturnType> &) {
50
- return type();
51
- }
52
-
53
- static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType finalise_op(const CoeffReturnType &accumulator,
54
- const Index &scale) {
55
- ::Eigen::internal::scalar_quotient_op<CoeffReturnType> quotient_op;
56
- return quotient_op(accumulator, CoeffReturnType(scale));
57
- }
58
- };
59
-
60
- template <typename CoeffReturnType, typename Index>
61
- struct OpDefiner<Eigen::internal::MeanReducer<CoeffReturnType>, CoeffReturnType, Index, true> {
62
- typedef typename Vectorise<CoeffReturnType, Eigen::SyclDevice, true>::PacketReturnType PacketReturnType;
63
- typedef Eigen::internal::SumReducer<CoeffReturnType> type;
64
- static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE type get_op(Eigen::internal::MeanReducer<CoeffReturnType> &) {
65
- return type();
66
- }
67
-
68
- static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType finalise_op(const PacketReturnType &accumulator,
69
- const Index &scale) {
70
- return ::Eigen::internal::pdiv(accumulator, ::Eigen::internal::pset1<PacketReturnType>(CoeffReturnType(scale)));
71
- }
72
- };
73
-
74
- template <typename CoeffReturnType, typename OpType, typename InputAccessor, typename OutputAccessor, typename Index,
75
- Index local_range>
76
- struct SecondStepFullReducer {
77
- typedef cl::sycl::accessor<CoeffReturnType, 1, cl::sycl::access::mode::read_write, cl::sycl::access::target::local>
78
- LocalAccessor;
79
- typedef OpDefiner<OpType, CoeffReturnType, Index, true> OpDef;
80
- typedef typename OpDef::type Op;
81
- LocalAccessor scratch;
82
- InputAccessor aI;
83
- OutputAccessor outAcc;
84
- Op op;
85
- SecondStepFullReducer(LocalAccessor scratch_, InputAccessor aI_, OutputAccessor outAcc_, OpType op_)
86
- : scratch(scratch_), aI(aI_), outAcc(outAcc_), op(OpDef::get_op(op_)) {}
87
-
88
- void operator()(cl::sycl::nd_item<1> itemID) {
89
- // Our empirical research shows that the best performance will be achieved
90
- // when there is only one element per thread to reduce in the second step.
91
- // in this step the second step reduction time is almost negligible.
92
- // Hence, in the second step of reduction the input size is fixed to the
93
- // local size, thus, there is only one element read per thread. The
94
- // algorithm must be changed if the number of reduce per thread in the
95
- // second step is greater than 1. Otherwise, the result will be wrong.
96
- const Index localid = itemID.get_local_id(0);
97
- auto aInPtr = aI.get_pointer() + localid;
98
- auto aOutPtr = outAcc.get_pointer();
99
- CoeffReturnType *scratchptr = scratch.get_pointer();
100
- CoeffReturnType accumulator = *aInPtr;
101
-
102
- scratchptr[localid] = op.finalize(accumulator);
103
- for (Index offset = itemID.get_local_range(0) / 2; offset > 0; offset /= 2) {
104
- itemID.barrier(cl::sycl::access::fence_space::local_space);
105
- if (localid < offset) {
106
- op.reduce(scratchptr[localid + offset], &accumulator);
107
- scratchptr[localid] = op.finalize(accumulator);
108
- }
109
- }
110
- if (localid == 0) *aOutPtr = op.finalize(accumulator);
111
- }
112
- };
113
-
114
- // Full reduction first phase. In this version the vectorization is true and the reduction accept
115
- // any generic reducerOp e.g( max, min, sum, mean, iamax, iamin, etc ).
116
- template <typename Evaluator, typename OpType, typename Evaluator::Index local_range>
117
- class FullReductionKernelFunctor {
118
- public:
119
- typedef typename Evaluator::CoeffReturnType CoeffReturnType;
120
- typedef typename Evaluator::Index Index;
121
- typedef OpDefiner<OpType, typename Evaluator::CoeffReturnType, Index,
122
- (Evaluator::ReducerTraits::PacketAccess & Evaluator::InputPacketAccess)>
123
- OpDef;
124
-
125
- typedef typename OpDef::type Op;
126
- typedef typename Evaluator::EvaluatorPointerType EvaluatorPointerType;
127
- typedef typename Evaluator::PacketReturnType PacketReturnType;
128
- typedef
129
- typename ::Eigen::internal::conditional<(Evaluator::ReducerTraits::PacketAccess & Evaluator::InputPacketAccess),
130
- PacketReturnType, CoeffReturnType>::type OutType;
131
- typedef cl::sycl::accessor<OutType, 1, cl::sycl::access::mode::read_write, cl::sycl::access::target::local>
132
- LocalAccessor;
133
- LocalAccessor scratch;
134
- Evaluator evaluator;
135
- EvaluatorPointerType final_output;
136
- Index rng;
137
- Op op;
138
-
139
- FullReductionKernelFunctor(LocalAccessor scratch_, Evaluator evaluator_, EvaluatorPointerType final_output_,
140
- Index rng_, OpType op_)
141
- : scratch(scratch_), evaluator(evaluator_), final_output(final_output_), rng(rng_), op(OpDef::get_op(op_)) {}
142
-
143
- void operator()(cl::sycl::nd_item<1> itemID) { compute_reduction(itemID); }
144
-
145
- template <bool Vect = (Evaluator::ReducerTraits::PacketAccess & Evaluator::InputPacketAccess)>
146
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename ::Eigen::internal::enable_if<Vect>::type compute_reduction(
147
- const cl::sycl::nd_item<1> &itemID) {
148
- auto output_ptr = final_output.get_pointer();
149
- Index VectorizedRange = (rng / Evaluator::PacketSize) * Evaluator::PacketSize;
150
- Index globalid = itemID.get_global_id(0);
151
- Index localid = itemID.get_local_id(0);
152
- Index step = Evaluator::PacketSize * itemID.get_global_range(0);
153
- Index start = Evaluator::PacketSize * globalid;
154
- // vectorizable parts
155
- PacketReturnType packetAccumulator = op.template initializePacket<PacketReturnType>();
156
- for (Index i = start; i < VectorizedRange; i += step) {
157
- op.template reducePacket<PacketReturnType>(evaluator.impl().template packet<Unaligned>(i), &packetAccumulator);
158
- }
159
- globalid += VectorizedRange;
160
- // non vectorizable parts
161
- for (Index i = globalid; i < rng; i += itemID.get_global_range(0)) {
162
- op.template reducePacket<PacketReturnType>(
163
- ::Eigen::TensorSycl::internal::PacketWrapper<PacketReturnType, Evaluator::PacketSize>::convert_to_packet_type(
164
- evaluator.impl().coeff(i), op.initialize()),
165
- &packetAccumulator);
166
- }
167
- scratch[localid] = packetAccumulator =
168
- OpDef::finalise_op(op.template finalizePacket<PacketReturnType>(packetAccumulator), rng);
169
- // reduction parts // Local size is always power of 2
170
- EIGEN_UNROLL_LOOP
171
- for (Index offset = local_range / 2; offset > 0; offset /= 2) {
172
- itemID.barrier(cl::sycl::access::fence_space::local_space);
173
- if (localid < offset) {
174
- op.template reducePacket<PacketReturnType>(scratch[localid + offset], &packetAccumulator);
175
- scratch[localid] = op.template finalizePacket<PacketReturnType>(packetAccumulator);
176
- }
177
- }
178
- if (localid == 0) {
179
- output_ptr[itemID.get_group(0)] =
180
- op.finalizeBoth(op.initialize(), op.template finalizePacket<PacketReturnType>(packetAccumulator));
181
- }
182
- }
183
-
184
- template <bool Vect = (Evaluator::ReducerTraits::PacketAccess & Evaluator::InputPacketAccess)>
185
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename ::Eigen::internal::enable_if<!Vect>::type compute_reduction(
186
- const cl::sycl::nd_item<1> &itemID) {
187
- auto output_ptr = final_output.get_pointer();
188
- Index globalid = itemID.get_global_id(0);
189
- Index localid = itemID.get_local_id(0);
190
- // vectorizable parts
191
- CoeffReturnType accumulator = op.initialize();
192
- // non vectorizable parts
193
- for (Index i = globalid; i < rng; i += itemID.get_global_range(0)) {
194
- op.reduce(evaluator.impl().coeff(i), &accumulator);
195
- }
196
- scratch[localid] = accumulator = OpDef::finalise_op(op.finalize(accumulator), rng);
197
-
198
- // reduction parts. the local size is always power of 2
199
- EIGEN_UNROLL_LOOP
200
- for (Index offset = local_range / 2; offset > 0; offset /= 2) {
201
- itemID.barrier(cl::sycl::access::fence_space::local_space);
202
- if (localid < offset) {
203
- op.reduce(scratch[localid + offset], &accumulator);
204
- scratch[localid] = op.finalize(accumulator);
205
- }
206
- }
207
- if (localid == 0) {
208
- output_ptr[itemID.get_group(0)] = op.finalize(accumulator);
209
- }
210
- }
211
- };
212
-
213
- template <typename Evaluator, typename OpType>
214
- class GenericNondeterministicReducer {
215
- public:
216
- typedef typename Evaluator::CoeffReturnType CoeffReturnType;
217
- typedef typename Evaluator::EvaluatorPointerType EvaluatorPointerType;
218
- typedef typename Evaluator::Index Index;
219
- typedef OpDefiner<OpType, CoeffReturnType, Index, false> OpDef;
220
- typedef typename OpDef::type Op;
221
- template <typename Scratch>
222
- GenericNondeterministicReducer(Scratch, Evaluator evaluator_, EvaluatorPointerType output_accessor_, OpType functor_,
223
- Index range_, Index num_values_to_reduce_)
224
- : evaluator(evaluator_),
225
- output_accessor(output_accessor_),
226
- functor(OpDef::get_op(functor_)),
227
- range(range_),
228
- num_values_to_reduce(num_values_to_reduce_) {}
229
-
230
- void operator()(cl::sycl::nd_item<1> itemID) {
231
- auto output_accessor_ptr = output_accessor.get_pointer();
232
- /// const cast added as a naive solution to solve the qualifier drop error
233
- Index globalid = static_cast<Index>(itemID.get_global_linear_id());
234
- if (globalid < range) {
235
- CoeffReturnType accum = functor.initialize();
236
- Eigen::internal::GenericDimReducer<Evaluator::NumReducedDims - 1, Evaluator, Op>::reduce(
237
- evaluator, evaluator.firstInput(globalid), functor, &accum);
238
- output_accessor_ptr[globalid] = OpDef::finalise_op(functor.finalize(accum), num_values_to_reduce);
239
- }
240
- }
241
-
242
- private:
243
- Evaluator evaluator;
244
- EvaluatorPointerType output_accessor;
245
- Op functor;
246
- Index range;
247
- Index num_values_to_reduce;
248
- };
249
-
250
- enum class reduction_dim { inner_most, outer_most };
251
- // default is preserver
252
- template <typename Evaluator, typename OpType, typename PannelParameters, reduction_dim rt>
253
- struct PartialReductionKernel {
254
- typedef typename Evaluator::CoeffReturnType CoeffReturnType;
255
- typedef typename Evaluator::EvaluatorPointerType EvaluatorPointerType;
256
- typedef typename Evaluator::Index Index;
257
- typedef OpDefiner<OpType, CoeffReturnType, Index, false> OpDef;
258
- typedef typename OpDef::type Op;
259
- typedef cl::sycl::accessor<CoeffReturnType, 1, cl::sycl::access::mode::read_write, cl::sycl::access::target::local>
260
- ScratchAcc;
261
- ScratchAcc scratch;
262
- Evaluator evaluator;
263
- EvaluatorPointerType output_accessor;
264
- Op op;
265
- const Index preserve_elements_num_groups;
266
- const Index reduce_elements_num_groups;
267
- const Index num_coeffs_to_preserve;
268
- const Index num_coeffs_to_reduce;
269
-
270
- PartialReductionKernel(ScratchAcc scratch_, Evaluator evaluator_, EvaluatorPointerType output_accessor_, OpType op_,
271
- const Index preserve_elements_num_groups_, const Index reduce_elements_num_groups_,
272
- const Index num_coeffs_to_preserve_, const Index num_coeffs_to_reduce_)
273
- : scratch(scratch_),
274
- evaluator(evaluator_),
275
- output_accessor(output_accessor_),
276
- op(OpDef::get_op(op_)),
277
- preserve_elements_num_groups(preserve_elements_num_groups_),
278
- reduce_elements_num_groups(reduce_elements_num_groups_),
279
- num_coeffs_to_preserve(num_coeffs_to_preserve_),
280
- num_coeffs_to_reduce(num_coeffs_to_reduce_) {}
281
-
282
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void element_wise_reduce(Index globalRId, Index globalPId,
283
- CoeffReturnType &accumulator) {
284
- if (globalPId >= num_coeffs_to_preserve) {
285
- return;
286
- }
287
- Index global_offset = rt == reduction_dim::outer_most ? globalPId + (globalRId * num_coeffs_to_preserve)
288
- : globalRId + (globalPId * num_coeffs_to_reduce);
289
- Index localOffset = globalRId;
290
-
291
- const Index per_thread_local_stride = PannelParameters::LocalThreadSizeR * reduce_elements_num_groups;
292
- const Index per_thread_global_stride =
293
- rt == reduction_dim::outer_most ? num_coeffs_to_preserve * per_thread_local_stride : per_thread_local_stride;
294
- for (Index i = globalRId; i < num_coeffs_to_reduce; i += per_thread_local_stride) {
295
- op.reduce(evaluator.impl().coeff(global_offset), &accumulator);
296
- localOffset += per_thread_local_stride;
297
- global_offset += per_thread_global_stride;
298
- }
299
- }
300
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void operator()(cl::sycl::nd_item<1> itemID) {
301
- const Index linearLocalThreadId = itemID.get_local_id(0);
302
- Index pLocalThreadId = rt == reduction_dim::outer_most ? linearLocalThreadId % PannelParameters::LocalThreadSizeP
303
- : linearLocalThreadId / PannelParameters::LocalThreadSizeR;
304
- Index rLocalThreadId = rt == reduction_dim::outer_most ? linearLocalThreadId / PannelParameters::LocalThreadSizeP
305
- : linearLocalThreadId % PannelParameters::LocalThreadSizeR;
306
- const Index pGroupId = rt == reduction_dim::outer_most ? itemID.get_group(0) % preserve_elements_num_groups
307
- : itemID.get_group(0) / reduce_elements_num_groups;
308
- const Index rGroupId = rt == reduction_dim::outer_most ? itemID.get_group(0) / preserve_elements_num_groups
309
- : itemID.get_group(0) % reduce_elements_num_groups;
310
-
311
- Index globalPId = pGroupId * PannelParameters::LocalThreadSizeP + pLocalThreadId;
312
- const Index globalRId = rGroupId * PannelParameters::LocalThreadSizeR + rLocalThreadId;
313
- auto scratchPtr = scratch.get_pointer().get();
314
- auto outPtr =
315
- output_accessor.get_pointer() + (reduce_elements_num_groups > 1 ? rGroupId * num_coeffs_to_preserve : 0);
316
- CoeffReturnType accumulator = op.initialize();
317
-
318
- element_wise_reduce(globalRId, globalPId, accumulator);
319
-
320
- accumulator = OpDef::finalise_op(op.finalize(accumulator), num_coeffs_to_reduce);
321
- scratchPtr[pLocalThreadId + rLocalThreadId * (PannelParameters::LocalThreadSizeP + PannelParameters::BC)] =
322
- accumulator;
323
- if (rt == reduction_dim::inner_most) {
324
- pLocalThreadId = linearLocalThreadId % PannelParameters::LocalThreadSizeP;
325
- rLocalThreadId = linearLocalThreadId / PannelParameters::LocalThreadSizeP;
326
- globalPId = pGroupId * PannelParameters::LocalThreadSizeP + pLocalThreadId;
327
- }
328
-
329
- /* Apply the reduction operation between the current local
330
- * id and the one on the other half of the vector. */
331
- auto out_scratch_ptr =
332
- scratchPtr + (pLocalThreadId + (rLocalThreadId * (PannelParameters::LocalThreadSizeP + PannelParameters::BC)));
333
- itemID.barrier(cl::sycl::access::fence_space::local_space);
334
- if (rt == reduction_dim::inner_most) {
335
- accumulator = *out_scratch_ptr;
336
- }
337
- // The Local LocalThreadSizeR is always power of 2
338
- EIGEN_UNROLL_LOOP
339
- for (Index offset = PannelParameters::LocalThreadSizeR >> 1; offset > 0; offset >>= 1) {
340
- if (rLocalThreadId < offset) {
341
- op.reduce(out_scratch_ptr[(PannelParameters::LocalThreadSizeP + PannelParameters::BC) * offset], &accumulator);
342
- // The result has already been divided for mean reducer in the
343
- // previous reduction so no need to divide furthermore
344
- *out_scratch_ptr = op.finalize(accumulator);
345
- }
346
- /* All threads collectively read from global memory into local.
347
- * The barrier ensures all threads' IO is resolved before
348
- * execution continues (strictly speaking, all threads within
349
- * a single work-group - there is no co-ordination between
350
- * work-groups, only work-items). */
351
- itemID.barrier(cl::sycl::access::fence_space::local_space);
352
- }
353
-
354
- if (rLocalThreadId == 0 && (globalPId < num_coeffs_to_preserve)) {
355
- outPtr[globalPId] = op.finalize(accumulator);
356
- }
357
- }
358
- };
359
-
360
- template <typename OutScalar, typename Index, typename InputAccessor, typename OutputAccessor, typename OpType>
361
- struct SecondStepPartialReduction {
362
- typedef OpDefiner<OpType, OutScalar, Index, false> OpDef;
363
- typedef typename OpDef::type Op;
364
- typedef cl::sycl::accessor<OutScalar, 1, cl::sycl::access::mode::read_write, cl::sycl::access::target::local>
365
- ScratchAccessor;
366
- InputAccessor input_accessor;
367
- OutputAccessor output_accessor;
368
- Op op;
369
- const Index num_coeffs_to_preserve;
370
- const Index num_coeffs_to_reduce;
371
-
372
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE SecondStepPartialReduction(ScratchAccessor, InputAccessor input_accessor_,
373
- OutputAccessor output_accessor_, OpType op_,
374
- const Index num_coeffs_to_preserve_,
375
- const Index num_coeffs_to_reduce_)
376
- : input_accessor(input_accessor_),
377
- output_accessor(output_accessor_),
378
- op(OpDef::get_op(op_)),
379
- num_coeffs_to_preserve(num_coeffs_to_preserve_),
380
- num_coeffs_to_reduce(num_coeffs_to_reduce_) {}
381
-
382
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void operator()(cl::sycl::nd_item<1> itemID) {
383
- const Index globalId = itemID.get_global_id(0);
384
-
385
- if (globalId >= num_coeffs_to_preserve) return;
386
-
387
- auto in_ptr = input_accessor.get_pointer() + globalId;
388
-
389
- OutScalar accumulator = op.initialize();
390
- // num_coeffs_to_reduce is not bigger that 256
391
- for (Index i = 0; i < num_coeffs_to_reduce; i++) {
392
- op.reduce(*in_ptr, &accumulator);
393
- in_ptr += num_coeffs_to_preserve;
394
- }
395
- output_accessor.get_pointer()[globalId] = op.finalize(accumulator);
396
- }
397
- }; // namespace internal
398
-
399
- template <typename Index, Index LTP, Index LTR, bool BC_>
400
- struct ReductionPannel {
401
- static EIGEN_CONSTEXPR Index LocalThreadSizeP = LTP;
402
- static EIGEN_CONSTEXPR Index LocalThreadSizeR = LTR;
403
- static EIGEN_CONSTEXPR bool BC = BC_;
404
- };
405
-
406
- template <typename Self, typename Op, TensorSycl::internal::reduction_dim rt>
407
- struct PartialReducerLauncher {
408
- typedef typename Self::EvaluatorPointerType EvaluatorPointerType;
409
- typedef typename Self::CoeffReturnType CoeffReturnType;
410
- typedef typename Self::Storage Storage;
411
- typedef typename Self::Index Index;
412
- typedef ReductionPannel<typename Self::Index, EIGEN_SYCL_LOCAL_THREAD_DIM0, EIGEN_SYCL_LOCAL_THREAD_DIM1, true>
413
- PannelParameters;
414
-
415
- typedef PartialReductionKernel<Self, Op, PannelParameters, rt> SyclReducerKerneType;
416
-
417
- static bool run(const Self &self, const Op &reducer, const Eigen::SyclDevice &dev, EvaluatorPointerType output,
418
- Index num_coeffs_to_reduce, Index num_coeffs_to_preserve) {
419
- Index roundUpP = roundUp(num_coeffs_to_preserve, PannelParameters::LocalThreadSizeP);
420
-
421
- // getPowerOfTwo makes sure local range is power of 2 and <=
422
- // maxSyclThreadPerBlock this will help us to avoid extra check on the
423
- // kernel
424
- static_assert(!((PannelParameters::LocalThreadSizeP * PannelParameters::LocalThreadSizeR) &
425
- (PannelParameters::LocalThreadSizeP * PannelParameters::LocalThreadSizeR - 1)),
426
- "The Local thread size must be a power of 2 for the reduction "
427
- "operation");
428
-
429
- EIGEN_CONSTEXPR Index localRange = PannelParameters::LocalThreadSizeP * PannelParameters::LocalThreadSizeR;
430
- // In this step, we force the code not to be more than 2-step reduction:
431
- // Our empirical research shows that if each thread reduces at least 64
432
- // elemnts individually, we get better performance. However, this can change
433
- // on different platforms. In this step we force the code not to be
434
- // morthan step reduction: Our empirical research shows that for inner_most
435
- // dim reducer, it is better to have 8 group in a reduce dimension for sizes
436
- // > 1024 to achieve the best performance.
437
- const Index reductionPerThread = 64;
438
- Index cu = dev.getPowerOfTwo(dev.getNumSyclMultiProcessors(), true);
439
- const Index pNumGroups = roundUpP / PannelParameters::LocalThreadSizeP;
440
- Index rGroups = (cu + pNumGroups - 1) / pNumGroups;
441
- const Index rNumGroups = num_coeffs_to_reduce > reductionPerThread * localRange ? std::min(rGroups, localRange) : 1;
442
- const Index globalRange = pNumGroups * rNumGroups * localRange;
443
-
444
- EIGEN_CONSTEXPR Index scratchSize =
445
- PannelParameters::LocalThreadSizeR * (PannelParameters::LocalThreadSizeP + PannelParameters::BC);
446
- auto thread_range = cl::sycl::nd_range<1>(cl::sycl::range<1>(globalRange), cl::sycl::range<1>(localRange));
447
- if (rNumGroups > 1) {
448
- CoeffReturnType *temp_pointer = static_cast<CoeffReturnType *>(
449
- dev.allocate_temp(num_coeffs_to_preserve * rNumGroups * sizeof(CoeffReturnType)));
450
- EvaluatorPointerType temp_accessor = dev.get(temp_pointer);
451
- dev.template unary_kernel_launcher<CoeffReturnType, SyclReducerKerneType>(
452
- self, temp_accessor, thread_range, scratchSize, reducer, pNumGroups, rNumGroups, num_coeffs_to_preserve,
453
- num_coeffs_to_reduce);
454
-
455
- typedef SecondStepPartialReduction<CoeffReturnType, Index, EvaluatorPointerType, EvaluatorPointerType, Op>
456
- SecondStepPartialReductionKernel;
457
-
458
- dev.template unary_kernel_launcher<CoeffReturnType, SecondStepPartialReductionKernel>(
459
- temp_accessor, output,
460
- cl::sycl::nd_range<1>(cl::sycl::range<1>(pNumGroups * localRange), cl::sycl::range<1>(localRange)), Index(1),
461
- reducer, num_coeffs_to_preserve, rNumGroups);
462
-
463
- self.device().deallocate_temp(temp_pointer);
464
- } else {
465
- dev.template unary_kernel_launcher<CoeffReturnType, SyclReducerKerneType>(
466
- self, output, thread_range, scratchSize, reducer, pNumGroups, rNumGroups, num_coeffs_to_preserve,
467
- num_coeffs_to_reduce);
468
- }
469
- return false;
470
- }
471
- };
472
- } // namespace internal
473
- } // namespace TensorSycl
474
-
475
- namespace internal {
476
-
477
- template <typename Self, typename Op, bool Vectorizable>
478
- struct FullReducer<Self, Op, Eigen::SyclDevice, Vectorizable> {
479
- typedef typename Self::CoeffReturnType CoeffReturnType;
480
- typedef typename Self::EvaluatorPointerType EvaluatorPointerType;
481
- static EIGEN_CONSTEXPR bool HasOptimizedImplementation = true;
482
- static EIGEN_CONSTEXPR int PacketSize = Self::PacketAccess ? Self::PacketSize : 1;
483
- static void run(const Self &self, Op &reducer, const Eigen::SyclDevice &dev, EvaluatorPointerType data) {
484
- typedef typename conditional<Self::PacketAccess, typename Self::PacketReturnType, CoeffReturnType>::type OutType;
485
- static_assert(!((EIGEN_SYCL_LOCAL_THREAD_DIM0 * EIGEN_SYCL_LOCAL_THREAD_DIM1) &
486
- (EIGEN_SYCL_LOCAL_THREAD_DIM0 * EIGEN_SYCL_LOCAL_THREAD_DIM1 - 1)),
487
- "The Local thread size must be a power of 2 for the reduction "
488
- "operation");
489
- EIGEN_CONSTEXPR Index local_range = EIGEN_SYCL_LOCAL_THREAD_DIM0 * EIGEN_SYCL_LOCAL_THREAD_DIM1;
490
-
491
- typename Self::Index inputSize = self.impl().dimensions().TotalSize();
492
- // In this step we force the code not to be more than 2-step reduction:
493
- // Our empirical research shows that if each thread reduces at least 512
494
- // elemnts individually, we get better performance.
495
- const Index reductionPerThread = 2048;
496
- // const Index num_work_group =
497
- Index reductionGroup = dev.getPowerOfTwo(
498
- (inputSize + (reductionPerThread * local_range - 1)) / (reductionPerThread * local_range), true);
499
- const Index num_work_group = std::min(reductionGroup, local_range);
500
- // 1
501
- // ? local_range
502
- // : 1);
503
- const Index global_range = num_work_group * local_range;
504
-
505
- auto thread_range = cl::sycl::nd_range<1>(cl::sycl::range<1>(global_range), cl::sycl::range<1>(local_range));
506
- typedef TensorSycl::internal::FullReductionKernelFunctor<Self, Op, local_range> reduction_kernel_t;
507
- if (num_work_group > 1) {
508
- CoeffReturnType *temp_pointer =
509
- static_cast<CoeffReturnType *>(dev.allocate_temp(num_work_group * sizeof(CoeffReturnType)));
510
- typename Self::EvaluatorPointerType tmp_global_accessor = dev.get(temp_pointer);
511
- dev.template unary_kernel_launcher<OutType, reduction_kernel_t>(self, tmp_global_accessor, thread_range,
512
- local_range, inputSize, reducer);
513
-
514
- typedef TensorSycl::internal::SecondStepFullReducer<CoeffReturnType, Op, EvaluatorPointerType,
515
- EvaluatorPointerType, Index, local_range>
516
- GenericRKernel;
517
- dev.template unary_kernel_launcher<CoeffReturnType, GenericRKernel>(
518
- tmp_global_accessor, data,
519
- cl::sycl::nd_range<1>(cl::sycl::range<1>(num_work_group), cl::sycl::range<1>(num_work_group)), num_work_group,
520
- reducer);
521
-
522
- dev.deallocate_temp(temp_pointer);
523
- } else {
524
- dev.template unary_kernel_launcher<OutType, reduction_kernel_t>(self, data, thread_range, local_range, inputSize,
525
- reducer);
526
- }
527
- }
528
- };
529
- // vectorizable inner_most most dim preserver
530
- // col reduction
531
- template <typename Self, typename Op>
532
- struct OuterReducer<Self, Op, Eigen::SyclDevice> {
533
- static EIGEN_CONSTEXPR bool HasOptimizedImplementation = true;
534
-
535
- static bool run(const Self &self, const Op &reducer, const Eigen::SyclDevice &dev,
536
- typename Self::EvaluatorPointerType output, typename Self::Index num_coeffs_to_reduce,
537
- typename Self::Index num_coeffs_to_preserve) {
538
- return ::Eigen::TensorSycl::internal::PartialReducerLauncher<
539
- Self, Op, ::Eigen::TensorSycl::internal::reduction_dim::outer_most>::run(self, reducer, dev, output,
540
- num_coeffs_to_reduce,
541
- num_coeffs_to_preserve);
542
- }
543
- };
544
- // row reduction
545
- template <typename Self, typename Op>
546
- struct InnerReducer<Self, Op, Eigen::SyclDevice> {
547
- static EIGEN_CONSTEXPR bool HasOptimizedImplementation = true;
548
-
549
- static bool run(const Self &self, const Op &reducer, const Eigen::SyclDevice &dev,
550
- typename Self::EvaluatorPointerType output, typename Self::Index num_coeffs_to_reduce,
551
- typename Self::Index num_coeffs_to_preserve) {
552
- return ::Eigen::TensorSycl::internal::PartialReducerLauncher<
553
- Self, Op, ::Eigen::TensorSycl::internal::reduction_dim::inner_most>::run(self, reducer, dev, output,
554
- num_coeffs_to_reduce,
555
- num_coeffs_to_preserve);
556
- }
557
- };
558
-
559
- // ArmgMax uses this kernel for partial reduction//
560
- // TODO(@mehdi.goli) come up with a better kernel
561
- // generic partial reduction
562
- template <typename Self, typename Op>
563
- struct GenericReducer<Self, Op, Eigen::SyclDevice> {
564
- static EIGEN_CONSTEXPR bool HasOptimizedImplementation = false;
565
- static bool run(const Self &self, const Op &reducer, const Eigen::SyclDevice &dev,
566
- typename Self::EvaluatorPointerType output, typename Self::Index num_values_to_reduce,
567
- typename Self::Index num_coeffs_to_preserve) {
568
- typename Self::Index range, GRange, tileSize;
569
- dev.parallel_for_setup(num_coeffs_to_preserve, tileSize, range, GRange);
570
-
571
- dev.template unary_kernel_launcher<typename Self::CoeffReturnType,
572
- TensorSycl::internal::GenericNondeterministicReducer<Self, Op>>(
573
- self, output, cl::sycl::nd_range<1>(cl::sycl::range<1>(GRange), cl::sycl::range<1>(tileSize)), Index(1),
574
- reducer, range, (num_values_to_reduce != 0) ? num_values_to_reduce : static_cast<Index>(1));
575
- return false;
576
- }
577
- };
578
-
579
- } // namespace internal
580
- } // namespace Eigen
581
-
582
- #endif // UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSOR_REDUCTION_SYCL_HPP