sequenzo 0.1.18__cp310-cp310-macosx_10_9_universal2.whl → 0.1.19__cp310-cp310-macosx_10_9_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sequenzo might be problematic. Click here for more details.

Files changed (357) hide show
  1. sequenzo/__init__.py +39 -7
  2. sequenzo/big_data/clara/utils/get_weighted_diss.c +155 -155
  3. sequenzo/big_data/clara/utils/get_weighted_diss.cpython-310-darwin.so +0 -0
  4. sequenzo/clustering/KMedoids.py +39 -0
  5. sequenzo/clustering/hierarchical_clustering.py +107 -5
  6. sequenzo/define_sequence_data.py +10 -1
  7. sequenzo/dissimilarity_measures/get_distance_matrix.py +1 -2
  8. sequenzo/dissimilarity_measures/utils/get_sm_trate_substitution_cost_matrix.c +155 -155
  9. sequenzo/dissimilarity_measures/utils/get_sm_trate_substitution_cost_matrix.cpython-310-darwin.so +0 -0
  10. sequenzo/dissimilarity_measures/utils/seqconc.c +155 -155
  11. sequenzo/dissimilarity_measures/utils/seqconc.cpython-310-darwin.so +0 -0
  12. sequenzo/dissimilarity_measures/utils/seqdss.c +155 -155
  13. sequenzo/dissimilarity_measures/utils/seqdss.cpython-310-darwin.so +0 -0
  14. sequenzo/dissimilarity_measures/utils/seqdur.c +155 -155
  15. sequenzo/dissimilarity_measures/utils/seqdur.cpython-310-darwin.so +0 -0
  16. sequenzo/dissimilarity_measures/utils/seqlength.c +155 -155
  17. sequenzo/dissimilarity_measures/utils/seqlength.cpython-310-darwin.so +0 -0
  18. sequenzo/multidomain/cat.py +0 -53
  19. sequenzo/multidomain/idcd.py +0 -1
  20. sequenzo/openmp_setup.py +233 -0
  21. sequenzo/visualization/plot_transition_matrix.py +21 -22
  22. {sequenzo-0.1.18.dist-info → sequenzo-0.1.19.dist-info}/METADATA +43 -10
  23. sequenzo-0.1.19.dist-info/RECORD +215 -0
  24. sequenzo/dissimilarity_measures/setup.py +0 -35
  25. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Cholesky/LDLT.h +0 -688
  26. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Cholesky/LLT.h +0 -558
  27. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Cholesky/LLT_LAPACKE.h +0 -99
  28. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +0 -682
  29. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +0 -346
  30. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +0 -462
  31. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/ComplexSchur_LAPACKE.h +0 -91
  32. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/EigenSolver.h +0 -622
  33. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h +0 -418
  34. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +0 -226
  35. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +0 -374
  36. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +0 -158
  37. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/RealQZ.h +0 -657
  38. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/RealSchur.h +0 -558
  39. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/RealSchur_LAPACKE.h +0 -77
  40. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +0 -904
  41. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h +0 -87
  42. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +0 -561
  43. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/AlignedBox.h +0 -486
  44. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/AngleAxis.h +0 -247
  45. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/EulerAngles.h +0 -114
  46. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/Homogeneous.h +0 -501
  47. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/Hyperplane.h +0 -282
  48. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/OrthoMethods.h +0 -235
  49. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/ParametrizedLine.h +0 -232
  50. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/Quaternion.h +0 -870
  51. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/Rotation2D.h +0 -199
  52. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/RotationBase.h +0 -206
  53. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/Scaling.h +0 -188
  54. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/Transform.h +0 -1563
  55. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/Translation.h +0 -202
  56. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/Umeyama.h +0 -166
  57. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Geometry/arch/Geometry_SIMD.h +0 -168
  58. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Householder/BlockHouseholder.h +0 -110
  59. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Householder/Householder.h +0 -176
  60. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Householder/HouseholderSequence.h +0 -545
  61. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +0 -226
  62. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +0 -212
  63. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +0 -229
  64. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +0 -394
  65. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +0 -453
  66. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +0 -444
  67. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +0 -198
  68. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +0 -117
  69. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/Jacobi/Jacobi.h +0 -483
  70. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/KLUSupport/KLUSupport.h +0 -358
  71. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/LU/Determinant.h +0 -117
  72. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/LU/FullPivLU.h +0 -877
  73. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/LU/InverseImpl.h +0 -432
  74. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/LU/PartialPivLU.h +0 -624
  75. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/LU/PartialPivLU_LAPACKE.h +0 -83
  76. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/LU/arch/InverseSize4.h +0 -351
  77. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/MetisSupport/MetisSupport.h +0 -137
  78. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/OrderingMethods/Amd.h +0 -435
  79. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +0 -1863
  80. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/OrderingMethods/Ordering.h +0 -153
  81. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +0 -678
  82. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +0 -545
  83. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/QR/ColPivHouseholderQR.h +0 -674
  84. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/QR/ColPivHouseholderQR_LAPACKE.h +0 -97
  85. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +0 -635
  86. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/QR/FullPivHouseholderQR.h +0 -713
  87. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/QR/HouseholderQR.h +0 -434
  88. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/QR/HouseholderQR_LAPACKE.h +0 -68
  89. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +0 -335
  90. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SVD/BDCSVD.h +0 -1366
  91. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SVD/JacobiSVD.h +0 -812
  92. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SVD/JacobiSVD_LAPACKE.h +0 -91
  93. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SVD/SVDBase.h +0 -376
  94. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SVD/UpperBidiagonalization.h +0 -414
  95. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +0 -697
  96. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +0 -174
  97. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/AmbiVector.h +0 -378
  98. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/CompressedStorage.h +0 -274
  99. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +0 -352
  100. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/MappedSparseMatrix.h +0 -67
  101. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseAssign.h +0 -270
  102. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseBlock.h +0 -571
  103. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseColEtree.h +0 -206
  104. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +0 -370
  105. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +0 -722
  106. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +0 -150
  107. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +0 -342
  108. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseDiagonalProduct.h +0 -138
  109. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseDot.h +0 -98
  110. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseFuzzy.h +0 -29
  111. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseMap.h +0 -305
  112. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseMatrix.h +0 -1518
  113. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +0 -398
  114. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparsePermutation.h +0 -178
  115. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseProduct.h +0 -181
  116. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseRedux.h +0 -49
  117. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseRef.h +0 -397
  118. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +0 -659
  119. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseSolverBase.h +0 -124
  120. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseSparseProductWithPruning.h +0 -198
  121. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseTranspose.h +0 -92
  122. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseTriangularView.h +0 -189
  123. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseUtil.h +0 -186
  124. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseVector.h +0 -478
  125. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/SparseView.h +0 -254
  126. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseCore/TriangularSolver.h +0 -315
  127. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU.h +0 -923
  128. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLUImpl.h +0 -66
  129. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +0 -226
  130. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_Structs.h +0 -110
  131. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +0 -375
  132. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_Utils.h +0 -80
  133. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_column_bmod.h +0 -181
  134. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +0 -179
  135. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h +0 -107
  136. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +0 -280
  137. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h +0 -126
  138. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_kernel_bmod.h +0 -130
  139. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +0 -223
  140. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_panel_dfs.h +0 -258
  141. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_pivotL.h +0 -137
  142. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_pruneL.h +0 -136
  143. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseLU/SparseLU_relax_snode.h +0 -83
  144. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SparseQR/SparseQR.h +0 -758
  145. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/StlSupport/StdDeque.h +0 -116
  146. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/StlSupport/StdList.h +0 -106
  147. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/StlSupport/StdVector.h +0 -131
  148. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/StlSupport/details.h +0 -84
  149. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +0 -1025
  150. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +0 -642
  151. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/misc/Image.h +0 -82
  152. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/misc/Kernel.h +0 -79
  153. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/misc/RealSvd2x2.h +0 -55
  154. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/misc/blas.h +0 -440
  155. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/misc/lapack.h +0 -152
  156. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/misc/lapacke.h +0 -16292
  157. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/misc/lapacke_mangling.h +0 -17
  158. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +0 -358
  159. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +0 -696
  160. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/plugins/BlockMethods.h +0 -1442
  161. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.h +0 -115
  162. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.h +0 -177
  163. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/plugins/IndexedViewMethods.h +0 -262
  164. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +0 -152
  165. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +0 -95
  166. sequenzo/dissimilarity_measures/src/eigen/Eigen/src/plugins/ReshapedMethods.h +0 -149
  167. sequenzo/dissimilarity_measures/src/eigen/blas/BandTriangularSolver.h +0 -97
  168. sequenzo/dissimilarity_measures/src/eigen/blas/GeneralRank1Update.h +0 -44
  169. sequenzo/dissimilarity_measures/src/eigen/blas/PackedSelfadjointProduct.h +0 -53
  170. sequenzo/dissimilarity_measures/src/eigen/blas/PackedTriangularMatrixVector.h +0 -79
  171. sequenzo/dissimilarity_measures/src/eigen/blas/PackedTriangularSolverVector.h +0 -88
  172. sequenzo/dissimilarity_measures/src/eigen/blas/Rank2Update.h +0 -57
  173. sequenzo/dissimilarity_measures/src/eigen/blas/common.h +0 -175
  174. sequenzo/dissimilarity_measures/src/eigen/blas/f2c/datatypes.h +0 -24
  175. sequenzo/dissimilarity_measures/src/eigen/blas/level1_cplx_impl.h +0 -155
  176. sequenzo/dissimilarity_measures/src/eigen/blas/level1_impl.h +0 -144
  177. sequenzo/dissimilarity_measures/src/eigen/blas/level1_real_impl.h +0 -122
  178. sequenzo/dissimilarity_measures/src/eigen/blas/level2_cplx_impl.h +0 -360
  179. sequenzo/dissimilarity_measures/src/eigen/blas/level2_impl.h +0 -553
  180. sequenzo/dissimilarity_measures/src/eigen/blas/level2_real_impl.h +0 -306
  181. sequenzo/dissimilarity_measures/src/eigen/blas/level3_impl.h +0 -702
  182. sequenzo/dissimilarity_measures/src/eigen/debug/gdb/__init__.py +0 -1
  183. sequenzo/dissimilarity_measures/src/eigen/debug/gdb/printers.py +0 -314
  184. sequenzo/dissimilarity_measures/src/eigen/lapack/lapack_common.h +0 -29
  185. sequenzo/dissimilarity_measures/src/eigen/scripts/relicense.py +0 -69
  186. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/Tensor.h +0 -554
  187. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorArgMax.h +0 -329
  188. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h +0 -247
  189. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h +0 -1176
  190. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h +0 -1559
  191. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h +0 -1093
  192. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h +0 -518
  193. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h +0 -377
  194. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h +0 -1023
  195. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionBlocking.h +0 -73
  196. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionCuda.h +0 -6
  197. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionGpu.h +0 -1413
  198. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionMapper.h +0 -575
  199. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionSycl.h +0 -1650
  200. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h +0 -1679
  201. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h +0 -456
  202. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h +0 -1132
  203. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorConvolutionSycl.h +0 -544
  204. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorCostModel.h +0 -214
  205. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h +0 -347
  206. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h +0 -137
  207. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h +0 -6
  208. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h +0 -104
  209. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceGpu.h +0 -389
  210. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h +0 -1048
  211. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h +0 -409
  212. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDimensionList.h +0 -236
  213. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h +0 -490
  214. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h +0 -236
  215. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h +0 -983
  216. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h +0 -703
  217. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h +0 -388
  218. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h +0 -669
  219. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorFixedSize.h +0 -379
  220. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h +0 -237
  221. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h +0 -191
  222. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h +0 -488
  223. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h +0 -302
  224. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorGlobalFunctions.h +0 -33
  225. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorGpuHipCudaDefines.h +0 -99
  226. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorGpuHipCudaUndefines.h +0 -44
  227. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h +0 -79
  228. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h +0 -603
  229. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h +0 -738
  230. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorInflation.h +0 -247
  231. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorInitializer.h +0 -82
  232. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h +0 -263
  233. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorLayoutSwap.h +0 -216
  234. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorMacros.h +0 -98
  235. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h +0 -327
  236. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h +0 -311
  237. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h +0 -1102
  238. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h +0 -708
  239. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorPatch.h +0 -291
  240. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorRandom.h +0 -322
  241. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h +0 -998
  242. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h +0 -6
  243. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorReductionGpu.h +0 -966
  244. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorReductionSycl.h +0 -582
  245. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorRef.h +0 -454
  246. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h +0 -465
  247. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h +0 -528
  248. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorScanSycl.h +0 -513
  249. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h +0 -471
  250. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorStorage.h +0 -161
  251. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorStriding.h +0 -346
  252. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorTrace.h +0 -303
  253. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h +0 -264
  254. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorUInt128.h +0 -249
  255. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorVolumePatch.h +0 -629
  256. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/TensorSymmetry/DynamicSymmetry.h +0 -293
  257. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/TensorSymmetry/StaticSymmetry.h +0 -236
  258. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/TensorSymmetry/Symmetry.h +0 -338
  259. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/TensorSymmetry/util/TemplateGroupTheory.h +0 -669
  260. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/ThreadPool/Barrier.h +0 -67
  261. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/ThreadPool/EventCount.h +0 -249
  262. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h +0 -486
  263. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/ThreadPool/RunQueue.h +0 -236
  264. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/ThreadPool/ThreadCancel.h +0 -23
  265. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/ThreadPool/ThreadEnvironment.h +0 -40
  266. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/ThreadPool/ThreadLocal.h +0 -301
  267. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/ThreadPool/ThreadPoolInterface.h +0 -48
  268. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/ThreadPool/ThreadYield.h +0 -20
  269. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/util/CXX11Meta.h +0 -537
  270. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/util/CXX11Workarounds.h +0 -88
  271. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/util/EmulateArray.h +0 -261
  272. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/CXX11/src/util/MaxSizeVector.h +0 -158
  273. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/AutoDiff/AutoDiffJacobian.h +0 -108
  274. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h +0 -730
  275. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/AutoDiff/AutoDiffVector.h +0 -220
  276. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/BVH/BVAlgorithms.h +0 -293
  277. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/BVH/KdBVH.h +0 -223
  278. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/Eigenvalues/ArpackSelfAdjointEigenSolver.h +0 -790
  279. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/EulerAngles/EulerAngles.h +0 -355
  280. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/EulerAngles/EulerSystem.h +0 -305
  281. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/FFT/ei_fftw_impl.h +0 -261
  282. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/FFT/ei_kissfft_impl.h +0 -449
  283. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/IterativeSolvers/ConstrainedConjGrad.h +0 -187
  284. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/IterativeSolvers/DGMRES.h +0 -511
  285. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/IterativeSolvers/GMRES.h +0 -335
  286. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/IterativeSolvers/IDRS.h +0 -436
  287. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/IterativeSolvers/IncompleteLU.h +0 -90
  288. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/IterativeSolvers/IterationController.h +0 -154
  289. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/IterativeSolvers/MINRES.h +0 -267
  290. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/IterativeSolvers/Scaling.h +0 -193
  291. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/KroneckerProduct/KroneckerTensorProduct.h +0 -305
  292. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/LevenbergMarquardt/LMcovar.h +0 -84
  293. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/LevenbergMarquardt/LMonestep.h +0 -202
  294. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/LevenbergMarquardt/LMpar.h +0 -160
  295. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/LevenbergMarquardt/LMqrsolv.h +0 -188
  296. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/LevenbergMarquardt/LevenbergMarquardt.h +0 -396
  297. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/MatrixFunctions/MatrixExponential.h +0 -441
  298. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/MatrixFunctions/MatrixFunction.h +0 -569
  299. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/MatrixFunctions/MatrixLogarithm.h +0 -373
  300. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/MatrixFunctions/MatrixPower.h +0 -705
  301. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/MatrixFunctions/MatrixSquareRoot.h +0 -368
  302. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/MatrixFunctions/StemFunction.h +0 -117
  303. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/MoreVectorization/MathFunctions.h +0 -95
  304. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/NonLinearOptimization/HybridNonLinearSolver.h +0 -601
  305. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/NonLinearOptimization/LevenbergMarquardt.h +0 -657
  306. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/NonLinearOptimization/chkder.h +0 -66
  307. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/NonLinearOptimization/covar.h +0 -70
  308. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/NonLinearOptimization/dogleg.h +0 -107
  309. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/NonLinearOptimization/fdjac1.h +0 -79
  310. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/NonLinearOptimization/lmpar.h +0 -298
  311. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/NonLinearOptimization/qrsolv.h +0 -91
  312. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/NonLinearOptimization/r1mpyq.h +0 -30
  313. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/NonLinearOptimization/r1updt.h +0 -99
  314. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/NonLinearOptimization/rwupdt.h +0 -49
  315. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/NumericalDiff/NumericalDiff.h +0 -130
  316. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/Polynomials/Companion.h +0 -280
  317. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/Polynomials/PolynomialSolver.h +0 -428
  318. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/Polynomials/PolynomialUtils.h +0 -143
  319. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/Skyline/SkylineInplaceLU.h +0 -352
  320. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/Skyline/SkylineMatrix.h +0 -862
  321. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/Skyline/SkylineMatrixBase.h +0 -212
  322. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/Skyline/SkylineProduct.h +0 -295
  323. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/Skyline/SkylineStorage.h +0 -259
  324. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/Skyline/SkylineUtil.h +0 -89
  325. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SparseExtra/BlockOfDynamicSparseMatrix.h +0 -122
  326. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SparseExtra/BlockSparseMatrix.h +0 -1079
  327. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SparseExtra/DynamicSparseMatrix.h +0 -404
  328. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SparseExtra/MarketIO.h +0 -282
  329. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SparseExtra/MatrixMarketIterator.h +0 -247
  330. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SparseExtra/RandomSetter.h +0 -349
  331. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsArrayAPI.h +0 -286
  332. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsBFloat16.h +0 -68
  333. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsFunctors.h +0 -357
  334. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsHalf.h +0 -66
  335. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsImpl.h +0 -1959
  336. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsPacketMath.h +0 -118
  337. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/HipVectorCompatibility.h +0 -67
  338. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsArrayAPI.h +0 -167
  339. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsBFloat16.h +0 -58
  340. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsFunctors.h +0 -330
  341. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsHalf.h +0 -58
  342. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h +0 -2045
  343. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsPacketMath.h +0 -79
  344. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/arch/AVX/BesselFunctions.h +0 -46
  345. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/arch/AVX/SpecialFunctions.h +0 -16
  346. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/arch/AVX512/BesselFunctions.h +0 -46
  347. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/arch/AVX512/SpecialFunctions.h +0 -16
  348. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/arch/GPU/SpecialFunctions.h +0 -369
  349. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/arch/NEON/BesselFunctions.h +0 -54
  350. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/SpecialFunctions/arch/NEON/SpecialFunctions.h +0 -34
  351. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/Splines/Spline.h +0 -507
  352. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/Splines/SplineFitting.h +0 -431
  353. sequenzo/dissimilarity_measures/src/eigen/unsupported/Eigen/src/Splines/SplineFwd.h +0 -93
  354. sequenzo-0.1.18.dist-info/RECORD +0 -544
  355. {sequenzo-0.1.18.dist-info → sequenzo-0.1.19.dist-info}/WHEEL +0 -0
  356. {sequenzo-0.1.18.dist-info → sequenzo-0.1.19.dist-info}/licenses/LICENSE +0 -0
  357. {sequenzo-0.1.18.dist-info → sequenzo-0.1.19.dist-info}/top_level.txt +0 -0
@@ -1,582 +0,0 @@
1
- // This file is part of Eigen, a lightweight C++ template library
2
- // for linear algebra.
3
- //
4
- // Mehdi Goli Codeplay Software Ltd.
5
- // Ralph Potter Codeplay Software Ltd.
6
- // Luke Iwanski Codeplay Software Ltd.
7
- // Contact: <eigen@codeplay.com>
8
- //
9
- // This Source Code Form is subject to the terms of the Mozilla
10
- // Public License v. 2.0. If a copy of the MPL was not distributed
11
- // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
12
-
13
- /*****************************************************************
14
- * TensorReductionSycl.h
15
- *
16
- * \brief:
17
- * This is the specialization of the reduction operation. Two phase reduction approach
18
- * is used since the GPU does not have Global Synchronization for global memory among
19
- * different work-group/thread block. To solve the problem, we need to create two kernels
20
- * to reduce the data, where the first kernel reduce the data locally and each local
21
- * workgroup/thread-block save the input data into global memory. In the second phase (global reduction)
22
- * one work-group uses one work-group/thread-block to reduces the intermediate data into one single element.
23
- * Here is an NVIDIA presentation explaining the optimized two phase reduction algorithm on GPU:
24
- * https://developer.download.nvidia.com/assets/cuda/files/reduction.pdf
25
- *
26
- *****************************************************************/
27
-
28
- #ifndef UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSOR_REDUCTION_SYCL_HPP
29
- #define UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSOR_REDUCTION_SYCL_HPP
30
- namespace Eigen {
31
- namespace TensorSycl {
32
- namespace internal {
33
-
34
- template <typename Op, typename CoeffReturnType, typename Index, bool Vectorizable>
35
- struct OpDefiner {
36
- typedef typename Vectorise<CoeffReturnType, Eigen::SyclDevice, Vectorizable>::PacketReturnType PacketReturnType;
37
- typedef Op type;
38
- static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE type get_op(Op &op) { return op; }
39
-
40
- static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType finalise_op(const PacketReturnType &accumulator,
41
- const Index &) {
42
- return accumulator;
43
- }
44
- };
45
-
46
- template <typename CoeffReturnType, typename Index>
47
- struct OpDefiner<Eigen::internal::MeanReducer<CoeffReturnType>, CoeffReturnType, Index, false> {
48
- typedef Eigen::internal::SumReducer<CoeffReturnType> type;
49
- static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE type get_op(Eigen::internal::MeanReducer<CoeffReturnType> &) {
50
- return type();
51
- }
52
-
53
- static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType finalise_op(const CoeffReturnType &accumulator,
54
- const Index &scale) {
55
- ::Eigen::internal::scalar_quotient_op<CoeffReturnType> quotient_op;
56
- return quotient_op(accumulator, CoeffReturnType(scale));
57
- }
58
- };
59
-
60
- template <typename CoeffReturnType, typename Index>
61
- struct OpDefiner<Eigen::internal::MeanReducer<CoeffReturnType>, CoeffReturnType, Index, true> {
62
- typedef typename Vectorise<CoeffReturnType, Eigen::SyclDevice, true>::PacketReturnType PacketReturnType;
63
- typedef Eigen::internal::SumReducer<CoeffReturnType> type;
64
- static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE type get_op(Eigen::internal::MeanReducer<CoeffReturnType> &) {
65
- return type();
66
- }
67
-
68
- static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType finalise_op(const PacketReturnType &accumulator,
69
- const Index &scale) {
70
- return ::Eigen::internal::pdiv(accumulator, ::Eigen::internal::pset1<PacketReturnType>(CoeffReturnType(scale)));
71
- }
72
- };
73
-
74
- template <typename CoeffReturnType, typename OpType, typename InputAccessor, typename OutputAccessor, typename Index,
75
- Index local_range>
76
- struct SecondStepFullReducer {
77
- typedef cl::sycl::accessor<CoeffReturnType, 1, cl::sycl::access::mode::read_write, cl::sycl::access::target::local>
78
- LocalAccessor;
79
- typedef OpDefiner<OpType, CoeffReturnType, Index, true> OpDef;
80
- typedef typename OpDef::type Op;
81
- LocalAccessor scratch;
82
- InputAccessor aI;
83
- OutputAccessor outAcc;
84
- Op op;
85
- SecondStepFullReducer(LocalAccessor scratch_, InputAccessor aI_, OutputAccessor outAcc_, OpType op_)
86
- : scratch(scratch_), aI(aI_), outAcc(outAcc_), op(OpDef::get_op(op_)) {}
87
-
88
- void operator()(cl::sycl::nd_item<1> itemID) {
89
- // Our empirical research shows that the best performance will be achieved
90
- // when there is only one element per thread to reduce in the second step.
91
- // in this step the second step reduction time is almost negligible.
92
- // Hence, in the second step of reduction the input size is fixed to the
93
- // local size, thus, there is only one element read per thread. The
94
- // algorithm must be changed if the number of reduce per thread in the
95
- // second step is greater than 1. Otherwise, the result will be wrong.
96
- const Index localid = itemID.get_local_id(0);
97
- auto aInPtr = aI.get_pointer() + localid;
98
- auto aOutPtr = outAcc.get_pointer();
99
- CoeffReturnType *scratchptr = scratch.get_pointer();
100
- CoeffReturnType accumulator = *aInPtr;
101
-
102
- scratchptr[localid] = op.finalize(accumulator);
103
- for (Index offset = itemID.get_local_range(0) / 2; offset > 0; offset /= 2) {
104
- itemID.barrier(cl::sycl::access::fence_space::local_space);
105
- if (localid < offset) {
106
- op.reduce(scratchptr[localid + offset], &accumulator);
107
- scratchptr[localid] = op.finalize(accumulator);
108
- }
109
- }
110
- if (localid == 0) *aOutPtr = op.finalize(accumulator);
111
- }
112
- };
113
-
114
- // Full reduction first phase. In this version the vectorization is true and the reduction accept
115
- // any generic reducerOp e.g( max, min, sum, mean, iamax, iamin, etc ).
116
- template <typename Evaluator, typename OpType, typename Evaluator::Index local_range>
117
- class FullReductionKernelFunctor {
118
- public:
119
- typedef typename Evaluator::CoeffReturnType CoeffReturnType;
120
- typedef typename Evaluator::Index Index;
121
- typedef OpDefiner<OpType, typename Evaluator::CoeffReturnType, Index,
122
- (Evaluator::ReducerTraits::PacketAccess & Evaluator::InputPacketAccess)>
123
- OpDef;
124
-
125
- typedef typename OpDef::type Op;
126
- typedef typename Evaluator::EvaluatorPointerType EvaluatorPointerType;
127
- typedef typename Evaluator::PacketReturnType PacketReturnType;
128
- typedef
129
- typename ::Eigen::internal::conditional<(Evaluator::ReducerTraits::PacketAccess & Evaluator::InputPacketAccess),
130
- PacketReturnType, CoeffReturnType>::type OutType;
131
- typedef cl::sycl::accessor<OutType, 1, cl::sycl::access::mode::read_write, cl::sycl::access::target::local>
132
- LocalAccessor;
133
- LocalAccessor scratch;
134
- Evaluator evaluator;
135
- EvaluatorPointerType final_output;
136
- Index rng;
137
- Op op;
138
-
139
- FullReductionKernelFunctor(LocalAccessor scratch_, Evaluator evaluator_, EvaluatorPointerType final_output_,
140
- Index rng_, OpType op_)
141
- : scratch(scratch_), evaluator(evaluator_), final_output(final_output_), rng(rng_), op(OpDef::get_op(op_)) {}
142
-
143
- void operator()(cl::sycl::nd_item<1> itemID) { compute_reduction(itemID); }
144
-
145
- template <bool Vect = (Evaluator::ReducerTraits::PacketAccess & Evaluator::InputPacketAccess)>
146
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename ::Eigen::internal::enable_if<Vect>::type compute_reduction(
147
- const cl::sycl::nd_item<1> &itemID) {
148
- auto output_ptr = final_output.get_pointer();
149
- Index VectorizedRange = (rng / Evaluator::PacketSize) * Evaluator::PacketSize;
150
- Index globalid = itemID.get_global_id(0);
151
- Index localid = itemID.get_local_id(0);
152
- Index step = Evaluator::PacketSize * itemID.get_global_range(0);
153
- Index start = Evaluator::PacketSize * globalid;
154
- // vectorizable parts
155
- PacketReturnType packetAccumulator = op.template initializePacket<PacketReturnType>();
156
- for (Index i = start; i < VectorizedRange; i += step) {
157
- op.template reducePacket<PacketReturnType>(evaluator.impl().template packet<Unaligned>(i), &packetAccumulator);
158
- }
159
- globalid += VectorizedRange;
160
- // non vectorizable parts
161
- for (Index i = globalid; i < rng; i += itemID.get_global_range(0)) {
162
- op.template reducePacket<PacketReturnType>(
163
- ::Eigen::TensorSycl::internal::PacketWrapper<PacketReturnType, Evaluator::PacketSize>::convert_to_packet_type(
164
- evaluator.impl().coeff(i), op.initialize()),
165
- &packetAccumulator);
166
- }
167
- scratch[localid] = packetAccumulator =
168
- OpDef::finalise_op(op.template finalizePacket<PacketReturnType>(packetAccumulator), rng);
169
- // reduction parts // Local size is always power of 2
170
- EIGEN_UNROLL_LOOP
171
- for (Index offset = local_range / 2; offset > 0; offset /= 2) {
172
- itemID.barrier(cl::sycl::access::fence_space::local_space);
173
- if (localid < offset) {
174
- op.template reducePacket<PacketReturnType>(scratch[localid + offset], &packetAccumulator);
175
- scratch[localid] = op.template finalizePacket<PacketReturnType>(packetAccumulator);
176
- }
177
- }
178
- if (localid == 0) {
179
- output_ptr[itemID.get_group(0)] =
180
- op.finalizeBoth(op.initialize(), op.template finalizePacket<PacketReturnType>(packetAccumulator));
181
- }
182
- }
183
-
184
- template <bool Vect = (Evaluator::ReducerTraits::PacketAccess & Evaluator::InputPacketAccess)>
185
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename ::Eigen::internal::enable_if<!Vect>::type compute_reduction(
186
- const cl::sycl::nd_item<1> &itemID) {
187
- auto output_ptr = final_output.get_pointer();
188
- Index globalid = itemID.get_global_id(0);
189
- Index localid = itemID.get_local_id(0);
190
- // vectorizable parts
191
- CoeffReturnType accumulator = op.initialize();
192
- // non vectorizable parts
193
- for (Index i = globalid; i < rng; i += itemID.get_global_range(0)) {
194
- op.reduce(evaluator.impl().coeff(i), &accumulator);
195
- }
196
- scratch[localid] = accumulator = OpDef::finalise_op(op.finalize(accumulator), rng);
197
-
198
- // reduction parts. the local size is always power of 2
199
- EIGEN_UNROLL_LOOP
200
- for (Index offset = local_range / 2; offset > 0; offset /= 2) {
201
- itemID.barrier(cl::sycl::access::fence_space::local_space);
202
- if (localid < offset) {
203
- op.reduce(scratch[localid + offset], &accumulator);
204
- scratch[localid] = op.finalize(accumulator);
205
- }
206
- }
207
- if (localid == 0) {
208
- output_ptr[itemID.get_group(0)] = op.finalize(accumulator);
209
- }
210
- }
211
- };
212
-
213
- template <typename Evaluator, typename OpType>
214
- class GenericNondeterministicReducer {
215
- public:
216
- typedef typename Evaluator::CoeffReturnType CoeffReturnType;
217
- typedef typename Evaluator::EvaluatorPointerType EvaluatorPointerType;
218
- typedef typename Evaluator::Index Index;
219
- typedef OpDefiner<OpType, CoeffReturnType, Index, false> OpDef;
220
- typedef typename OpDef::type Op;
221
- template <typename Scratch>
222
- GenericNondeterministicReducer(Scratch, Evaluator evaluator_, EvaluatorPointerType output_accessor_, OpType functor_,
223
- Index range_, Index num_values_to_reduce_)
224
- : evaluator(evaluator_),
225
- output_accessor(output_accessor_),
226
- functor(OpDef::get_op(functor_)),
227
- range(range_),
228
- num_values_to_reduce(num_values_to_reduce_) {}
229
-
230
- void operator()(cl::sycl::nd_item<1> itemID) {
231
- auto output_accessor_ptr = output_accessor.get_pointer();
232
- /// const cast added as a naive solution to solve the qualifier drop error
233
- Index globalid = static_cast<Index>(itemID.get_global_linear_id());
234
- if (globalid < range) {
235
- CoeffReturnType accum = functor.initialize();
236
- Eigen::internal::GenericDimReducer<Evaluator::NumReducedDims - 1, Evaluator, Op>::reduce(
237
- evaluator, evaluator.firstInput(globalid), functor, &accum);
238
- output_accessor_ptr[globalid] = OpDef::finalise_op(functor.finalize(accum), num_values_to_reduce);
239
- }
240
- }
241
-
242
- private:
243
- Evaluator evaluator;
244
- EvaluatorPointerType output_accessor;
245
- Op functor;
246
- Index range;
247
- Index num_values_to_reduce;
248
- };
249
-
250
- enum class reduction_dim { inner_most, outer_most };
251
- // default is preserver
252
- template <typename Evaluator, typename OpType, typename PannelParameters, reduction_dim rt>
253
- struct PartialReductionKernel {
254
- typedef typename Evaluator::CoeffReturnType CoeffReturnType;
255
- typedef typename Evaluator::EvaluatorPointerType EvaluatorPointerType;
256
- typedef typename Evaluator::Index Index;
257
- typedef OpDefiner<OpType, CoeffReturnType, Index, false> OpDef;
258
- typedef typename OpDef::type Op;
259
- typedef cl::sycl::accessor<CoeffReturnType, 1, cl::sycl::access::mode::read_write, cl::sycl::access::target::local>
260
- ScratchAcc;
261
- ScratchAcc scratch;
262
- Evaluator evaluator;
263
- EvaluatorPointerType output_accessor;
264
- Op op;
265
- const Index preserve_elements_num_groups;
266
- const Index reduce_elements_num_groups;
267
- const Index num_coeffs_to_preserve;
268
- const Index num_coeffs_to_reduce;
269
-
270
- PartialReductionKernel(ScratchAcc scratch_, Evaluator evaluator_, EvaluatorPointerType output_accessor_, OpType op_,
271
- const Index preserve_elements_num_groups_, const Index reduce_elements_num_groups_,
272
- const Index num_coeffs_to_preserve_, const Index num_coeffs_to_reduce_)
273
- : scratch(scratch_),
274
- evaluator(evaluator_),
275
- output_accessor(output_accessor_),
276
- op(OpDef::get_op(op_)),
277
- preserve_elements_num_groups(preserve_elements_num_groups_),
278
- reduce_elements_num_groups(reduce_elements_num_groups_),
279
- num_coeffs_to_preserve(num_coeffs_to_preserve_),
280
- num_coeffs_to_reduce(num_coeffs_to_reduce_) {}
281
-
282
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void element_wise_reduce(Index globalRId, Index globalPId,
283
- CoeffReturnType &accumulator) {
284
- if (globalPId >= num_coeffs_to_preserve) {
285
- return;
286
- }
287
- Index global_offset = rt == reduction_dim::outer_most ? globalPId + (globalRId * num_coeffs_to_preserve)
288
- : globalRId + (globalPId * num_coeffs_to_reduce);
289
- Index localOffset = globalRId;
290
-
291
- const Index per_thread_local_stride = PannelParameters::LocalThreadSizeR * reduce_elements_num_groups;
292
- const Index per_thread_global_stride =
293
- rt == reduction_dim::outer_most ? num_coeffs_to_preserve * per_thread_local_stride : per_thread_local_stride;
294
- for (Index i = globalRId; i < num_coeffs_to_reduce; i += per_thread_local_stride) {
295
- op.reduce(evaluator.impl().coeff(global_offset), &accumulator);
296
- localOffset += per_thread_local_stride;
297
- global_offset += per_thread_global_stride;
298
- }
299
- }
300
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void operator()(cl::sycl::nd_item<1> itemID) {
301
- const Index linearLocalThreadId = itemID.get_local_id(0);
302
- Index pLocalThreadId = rt == reduction_dim::outer_most ? linearLocalThreadId % PannelParameters::LocalThreadSizeP
303
- : linearLocalThreadId / PannelParameters::LocalThreadSizeR;
304
- Index rLocalThreadId = rt == reduction_dim::outer_most ? linearLocalThreadId / PannelParameters::LocalThreadSizeP
305
- : linearLocalThreadId % PannelParameters::LocalThreadSizeR;
306
- const Index pGroupId = rt == reduction_dim::outer_most ? itemID.get_group(0) % preserve_elements_num_groups
307
- : itemID.get_group(0) / reduce_elements_num_groups;
308
- const Index rGroupId = rt == reduction_dim::outer_most ? itemID.get_group(0) / preserve_elements_num_groups
309
- : itemID.get_group(0) % reduce_elements_num_groups;
310
-
311
- Index globalPId = pGroupId * PannelParameters::LocalThreadSizeP + pLocalThreadId;
312
- const Index globalRId = rGroupId * PannelParameters::LocalThreadSizeR + rLocalThreadId;
313
- auto scratchPtr = scratch.get_pointer().get();
314
- auto outPtr =
315
- output_accessor.get_pointer() + (reduce_elements_num_groups > 1 ? rGroupId * num_coeffs_to_preserve : 0);
316
- CoeffReturnType accumulator = op.initialize();
317
-
318
- element_wise_reduce(globalRId, globalPId, accumulator);
319
-
320
- accumulator = OpDef::finalise_op(op.finalize(accumulator), num_coeffs_to_reduce);
321
- scratchPtr[pLocalThreadId + rLocalThreadId * (PannelParameters::LocalThreadSizeP + PannelParameters::BC)] =
322
- accumulator;
323
- if (rt == reduction_dim::inner_most) {
324
- pLocalThreadId = linearLocalThreadId % PannelParameters::LocalThreadSizeP;
325
- rLocalThreadId = linearLocalThreadId / PannelParameters::LocalThreadSizeP;
326
- globalPId = pGroupId * PannelParameters::LocalThreadSizeP + pLocalThreadId;
327
- }
328
-
329
- /* Apply the reduction operation between the current local
330
- * id and the one on the other half of the vector. */
331
- auto out_scratch_ptr =
332
- scratchPtr + (pLocalThreadId + (rLocalThreadId * (PannelParameters::LocalThreadSizeP + PannelParameters::BC)));
333
- itemID.barrier(cl::sycl::access::fence_space::local_space);
334
- if (rt == reduction_dim::inner_most) {
335
- accumulator = *out_scratch_ptr;
336
- }
337
- // The Local LocalThreadSizeR is always power of 2
338
- EIGEN_UNROLL_LOOP
339
- for (Index offset = PannelParameters::LocalThreadSizeR >> 1; offset > 0; offset >>= 1) {
340
- if (rLocalThreadId < offset) {
341
- op.reduce(out_scratch_ptr[(PannelParameters::LocalThreadSizeP + PannelParameters::BC) * offset], &accumulator);
342
- // The result has already been divided for mean reducer in the
343
- // previous reduction so no need to divide furthermore
344
- *out_scratch_ptr = op.finalize(accumulator);
345
- }
346
- /* All threads collectively read from global memory into local.
347
- * The barrier ensures all threads' IO is resolved before
348
- * execution continues (strictly speaking, all threads within
349
- * a single work-group - there is no co-ordination between
350
- * work-groups, only work-items). */
351
- itemID.barrier(cl::sycl::access::fence_space::local_space);
352
- }
353
-
354
- if (rLocalThreadId == 0 && (globalPId < num_coeffs_to_preserve)) {
355
- outPtr[globalPId] = op.finalize(accumulator);
356
- }
357
- }
358
- };
359
-
360
- template <typename OutScalar, typename Index, typename InputAccessor, typename OutputAccessor, typename OpType>
361
- struct SecondStepPartialReduction {
362
- typedef OpDefiner<OpType, OutScalar, Index, false> OpDef;
363
- typedef typename OpDef::type Op;
364
- typedef cl::sycl::accessor<OutScalar, 1, cl::sycl::access::mode::read_write, cl::sycl::access::target::local>
365
- ScratchAccessor;
366
- InputAccessor input_accessor;
367
- OutputAccessor output_accessor;
368
- Op op;
369
- const Index num_coeffs_to_preserve;
370
- const Index num_coeffs_to_reduce;
371
-
372
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE SecondStepPartialReduction(ScratchAccessor, InputAccessor input_accessor_,
373
- OutputAccessor output_accessor_, OpType op_,
374
- const Index num_coeffs_to_preserve_,
375
- const Index num_coeffs_to_reduce_)
376
- : input_accessor(input_accessor_),
377
- output_accessor(output_accessor_),
378
- op(OpDef::get_op(op_)),
379
- num_coeffs_to_preserve(num_coeffs_to_preserve_),
380
- num_coeffs_to_reduce(num_coeffs_to_reduce_) {}
381
-
382
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void operator()(cl::sycl::nd_item<1> itemID) {
383
- const Index globalId = itemID.get_global_id(0);
384
-
385
- if (globalId >= num_coeffs_to_preserve) return;
386
-
387
- auto in_ptr = input_accessor.get_pointer() + globalId;
388
-
389
- OutScalar accumulator = op.initialize();
390
- // num_coeffs_to_reduce is not bigger that 256
391
- for (Index i = 0; i < num_coeffs_to_reduce; i++) {
392
- op.reduce(*in_ptr, &accumulator);
393
- in_ptr += num_coeffs_to_preserve;
394
- }
395
- output_accessor.get_pointer()[globalId] = op.finalize(accumulator);
396
- }
397
- }; // namespace internal
398
-
399
- template <typename Index, Index LTP, Index LTR, bool BC_>
400
- struct ReductionPannel {
401
- static EIGEN_CONSTEXPR Index LocalThreadSizeP = LTP;
402
- static EIGEN_CONSTEXPR Index LocalThreadSizeR = LTR;
403
- static EIGEN_CONSTEXPR bool BC = BC_;
404
- };
405
-
406
- template <typename Self, typename Op, TensorSycl::internal::reduction_dim rt>
407
- struct PartialReducerLauncher {
408
- typedef typename Self::EvaluatorPointerType EvaluatorPointerType;
409
- typedef typename Self::CoeffReturnType CoeffReturnType;
410
- typedef typename Self::Storage Storage;
411
- typedef typename Self::Index Index;
412
- typedef ReductionPannel<typename Self::Index, EIGEN_SYCL_LOCAL_THREAD_DIM0, EIGEN_SYCL_LOCAL_THREAD_DIM1, true>
413
- PannelParameters;
414
-
415
- typedef PartialReductionKernel<Self, Op, PannelParameters, rt> SyclReducerKerneType;
416
-
417
- static bool run(const Self &self, const Op &reducer, const Eigen::SyclDevice &dev, EvaluatorPointerType output,
418
- Index num_coeffs_to_reduce, Index num_coeffs_to_preserve) {
419
- Index roundUpP = roundUp(num_coeffs_to_preserve, PannelParameters::LocalThreadSizeP);
420
-
421
- // getPowerOfTwo makes sure local range is power of 2 and <=
422
- // maxSyclThreadPerBlock this will help us to avoid extra check on the
423
- // kernel
424
- static_assert(!((PannelParameters::LocalThreadSizeP * PannelParameters::LocalThreadSizeR) &
425
- (PannelParameters::LocalThreadSizeP * PannelParameters::LocalThreadSizeR - 1)),
426
- "The Local thread size must be a power of 2 for the reduction "
427
- "operation");
428
-
429
- EIGEN_CONSTEXPR Index localRange = PannelParameters::LocalThreadSizeP * PannelParameters::LocalThreadSizeR;
430
- // In this step, we force the code not to be more than 2-step reduction:
431
- // Our empirical research shows that if each thread reduces at least 64
432
- // elemnts individually, we get better performance. However, this can change
433
- // on different platforms. In this step we force the code not to be
434
- // morthan step reduction: Our empirical research shows that for inner_most
435
- // dim reducer, it is better to have 8 group in a reduce dimension for sizes
436
- // > 1024 to achieve the best performance.
437
- const Index reductionPerThread = 64;
438
- Index cu = dev.getPowerOfTwo(dev.getNumSyclMultiProcessors(), true);
439
- const Index pNumGroups = roundUpP / PannelParameters::LocalThreadSizeP;
440
- Index rGroups = (cu + pNumGroups - 1) / pNumGroups;
441
- const Index rNumGroups = num_coeffs_to_reduce > reductionPerThread * localRange ? std::min(rGroups, localRange) : 1;
442
- const Index globalRange = pNumGroups * rNumGroups * localRange;
443
-
444
- EIGEN_CONSTEXPR Index scratchSize =
445
- PannelParameters::LocalThreadSizeR * (PannelParameters::LocalThreadSizeP + PannelParameters::BC);
446
- auto thread_range = cl::sycl::nd_range<1>(cl::sycl::range<1>(globalRange), cl::sycl::range<1>(localRange));
447
- if (rNumGroups > 1) {
448
- CoeffReturnType *temp_pointer = static_cast<CoeffReturnType *>(
449
- dev.allocate_temp(num_coeffs_to_preserve * rNumGroups * sizeof(CoeffReturnType)));
450
- EvaluatorPointerType temp_accessor = dev.get(temp_pointer);
451
- dev.template unary_kernel_launcher<CoeffReturnType, SyclReducerKerneType>(
452
- self, temp_accessor, thread_range, scratchSize, reducer, pNumGroups, rNumGroups, num_coeffs_to_preserve,
453
- num_coeffs_to_reduce);
454
-
455
- typedef SecondStepPartialReduction<CoeffReturnType, Index, EvaluatorPointerType, EvaluatorPointerType, Op>
456
- SecondStepPartialReductionKernel;
457
-
458
- dev.template unary_kernel_launcher<CoeffReturnType, SecondStepPartialReductionKernel>(
459
- temp_accessor, output,
460
- cl::sycl::nd_range<1>(cl::sycl::range<1>(pNumGroups * localRange), cl::sycl::range<1>(localRange)), Index(1),
461
- reducer, num_coeffs_to_preserve, rNumGroups);
462
-
463
- self.device().deallocate_temp(temp_pointer);
464
- } else {
465
- dev.template unary_kernel_launcher<CoeffReturnType, SyclReducerKerneType>(
466
- self, output, thread_range, scratchSize, reducer, pNumGroups, rNumGroups, num_coeffs_to_preserve,
467
- num_coeffs_to_reduce);
468
- }
469
- return false;
470
- }
471
- };
472
- } // namespace internal
473
- } // namespace TensorSycl
474
-
475
- namespace internal {
476
-
477
- template <typename Self, typename Op, bool Vectorizable>
478
- struct FullReducer<Self, Op, Eigen::SyclDevice, Vectorizable> {
479
- typedef typename Self::CoeffReturnType CoeffReturnType;
480
- typedef typename Self::EvaluatorPointerType EvaluatorPointerType;
481
- static EIGEN_CONSTEXPR bool HasOptimizedImplementation = true;
482
- static EIGEN_CONSTEXPR int PacketSize = Self::PacketAccess ? Self::PacketSize : 1;
483
- static void run(const Self &self, Op &reducer, const Eigen::SyclDevice &dev, EvaluatorPointerType data) {
484
- typedef typename conditional<Self::PacketAccess, typename Self::PacketReturnType, CoeffReturnType>::type OutType;
485
- static_assert(!((EIGEN_SYCL_LOCAL_THREAD_DIM0 * EIGEN_SYCL_LOCAL_THREAD_DIM1) &
486
- (EIGEN_SYCL_LOCAL_THREAD_DIM0 * EIGEN_SYCL_LOCAL_THREAD_DIM1 - 1)),
487
- "The Local thread size must be a power of 2 for the reduction "
488
- "operation");
489
- EIGEN_CONSTEXPR Index local_range = EIGEN_SYCL_LOCAL_THREAD_DIM0 * EIGEN_SYCL_LOCAL_THREAD_DIM1;
490
-
491
- typename Self::Index inputSize = self.impl().dimensions().TotalSize();
492
- // In this step we force the code not to be more than 2-step reduction:
493
- // Our empirical research shows that if each thread reduces at least 512
494
- // elemnts individually, we get better performance.
495
- const Index reductionPerThread = 2048;
496
- // const Index num_work_group =
497
- Index reductionGroup = dev.getPowerOfTwo(
498
- (inputSize + (reductionPerThread * local_range - 1)) / (reductionPerThread * local_range), true);
499
- const Index num_work_group = std::min(reductionGroup, local_range);
500
- // 1
501
- // ? local_range
502
- // : 1);
503
- const Index global_range = num_work_group * local_range;
504
-
505
- auto thread_range = cl::sycl::nd_range<1>(cl::sycl::range<1>(global_range), cl::sycl::range<1>(local_range));
506
- typedef TensorSycl::internal::FullReductionKernelFunctor<Self, Op, local_range> reduction_kernel_t;
507
- if (num_work_group > 1) {
508
- CoeffReturnType *temp_pointer =
509
- static_cast<CoeffReturnType *>(dev.allocate_temp(num_work_group * sizeof(CoeffReturnType)));
510
- typename Self::EvaluatorPointerType tmp_global_accessor = dev.get(temp_pointer);
511
- dev.template unary_kernel_launcher<OutType, reduction_kernel_t>(self, tmp_global_accessor, thread_range,
512
- local_range, inputSize, reducer);
513
-
514
- typedef TensorSycl::internal::SecondStepFullReducer<CoeffReturnType, Op, EvaluatorPointerType,
515
- EvaluatorPointerType, Index, local_range>
516
- GenericRKernel;
517
- dev.template unary_kernel_launcher<CoeffReturnType, GenericRKernel>(
518
- tmp_global_accessor, data,
519
- cl::sycl::nd_range<1>(cl::sycl::range<1>(num_work_group), cl::sycl::range<1>(num_work_group)), num_work_group,
520
- reducer);
521
-
522
- dev.deallocate_temp(temp_pointer);
523
- } else {
524
- dev.template unary_kernel_launcher<OutType, reduction_kernel_t>(self, data, thread_range, local_range, inputSize,
525
- reducer);
526
- }
527
- }
528
- };
529
- // vectorizable inner_most most dim preserver
530
- // col reduction
531
- template <typename Self, typename Op>
532
- struct OuterReducer<Self, Op, Eigen::SyclDevice> {
533
- static EIGEN_CONSTEXPR bool HasOptimizedImplementation = true;
534
-
535
- static bool run(const Self &self, const Op &reducer, const Eigen::SyclDevice &dev,
536
- typename Self::EvaluatorPointerType output, typename Self::Index num_coeffs_to_reduce,
537
- typename Self::Index num_coeffs_to_preserve) {
538
- return ::Eigen::TensorSycl::internal::PartialReducerLauncher<
539
- Self, Op, ::Eigen::TensorSycl::internal::reduction_dim::outer_most>::run(self, reducer, dev, output,
540
- num_coeffs_to_reduce,
541
- num_coeffs_to_preserve);
542
- }
543
- };
544
- // row reduction
545
- template <typename Self, typename Op>
546
- struct InnerReducer<Self, Op, Eigen::SyclDevice> {
547
- static EIGEN_CONSTEXPR bool HasOptimizedImplementation = true;
548
-
549
- static bool run(const Self &self, const Op &reducer, const Eigen::SyclDevice &dev,
550
- typename Self::EvaluatorPointerType output, typename Self::Index num_coeffs_to_reduce,
551
- typename Self::Index num_coeffs_to_preserve) {
552
- return ::Eigen::TensorSycl::internal::PartialReducerLauncher<
553
- Self, Op, ::Eigen::TensorSycl::internal::reduction_dim::inner_most>::run(self, reducer, dev, output,
554
- num_coeffs_to_reduce,
555
- num_coeffs_to_preserve);
556
- }
557
- };
558
-
559
- // ArmgMax uses this kernel for partial reduction//
560
- // TODO(@mehdi.goli) come up with a better kernel
561
- // generic partial reduction
562
- template <typename Self, typename Op>
563
- struct GenericReducer<Self, Op, Eigen::SyclDevice> {
564
- static EIGEN_CONSTEXPR bool HasOptimizedImplementation = false;
565
- static bool run(const Self &self, const Op &reducer, const Eigen::SyclDevice &dev,
566
- typename Self::EvaluatorPointerType output, typename Self::Index num_values_to_reduce,
567
- typename Self::Index num_coeffs_to_preserve) {
568
- typename Self::Index range, GRange, tileSize;
569
- dev.parallel_for_setup(num_coeffs_to_preserve, tileSize, range, GRange);
570
-
571
- dev.template unary_kernel_launcher<typename Self::CoeffReturnType,
572
- TensorSycl::internal::GenericNondeterministicReducer<Self, Op>>(
573
- self, output, cl::sycl::nd_range<1>(cl::sycl::range<1>(GRange), cl::sycl::range<1>(tileSize)), Index(1),
574
- reducer, range, (num_values_to_reduce != 0) ? num_values_to_reduce : static_cast<Index>(1));
575
- return false;
576
- }
577
- };
578
-
579
- } // namespace internal
580
- } // namespace Eigen
581
-
582
- #endif // UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSOR_REDUCTION_SYCL_HPP