sequenzo 0.1.24__cp311-cp311-macosx_10_9_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sequenzo might be problematic. Click here for more details.

Files changed (264) hide show
  1. _sequenzo_fastcluster.cpython-311-darwin.so +0 -0
  2. sequenzo/__init__.py +240 -0
  3. sequenzo/big_data/__init__.py +12 -0
  4. sequenzo/big_data/clara/__init__.py +26 -0
  5. sequenzo/big_data/clara/clara.py +474 -0
  6. sequenzo/big_data/clara/utils/__init__.py +27 -0
  7. sequenzo/big_data/clara/utils/aggregatecases.py +92 -0
  8. sequenzo/big_data/clara/utils/davies_bouldin.py +91 -0
  9. sequenzo/big_data/clara/utils/get_weighted_diss.cpython-311-darwin.so +0 -0
  10. sequenzo/big_data/clara/utils/wfcmdd.py +205 -0
  11. sequenzo/big_data/clara/visualization.py +88 -0
  12. sequenzo/clustering/KMedoids.py +178 -0
  13. sequenzo/clustering/__init__.py +30 -0
  14. sequenzo/clustering/clustering_c_code.cpython-311-darwin.so +0 -0
  15. sequenzo/clustering/hierarchical_clustering.py +1256 -0
  16. sequenzo/clustering/sequenzo_fastcluster/fastcluster.py +495 -0
  17. sequenzo/clustering/sequenzo_fastcluster/src/fastcluster.cpp +1877 -0
  18. sequenzo/clustering/sequenzo_fastcluster/src/fastcluster_python.cpp +1264 -0
  19. sequenzo/clustering/src/KMedoid.cpp +263 -0
  20. sequenzo/clustering/src/PAM.cpp +237 -0
  21. sequenzo/clustering/src/PAMonce.cpp +265 -0
  22. sequenzo/clustering/src/cluster_quality.cpp +496 -0
  23. sequenzo/clustering/src/cluster_quality.h +128 -0
  24. sequenzo/clustering/src/cluster_quality_backup.cpp +570 -0
  25. sequenzo/clustering/src/module.cpp +228 -0
  26. sequenzo/clustering/src/weightedinertia.cpp +111 -0
  27. sequenzo/clustering/utils/__init__.py +27 -0
  28. sequenzo/clustering/utils/disscenter.py +122 -0
  29. sequenzo/data_preprocessing/__init__.py +20 -0
  30. sequenzo/data_preprocessing/helpers.py +256 -0
  31. sequenzo/datasets/__init__.py +41 -0
  32. sequenzo/datasets/biofam.csv +2001 -0
  33. sequenzo/datasets/biofam_child_domain.csv +2001 -0
  34. sequenzo/datasets/biofam_left_domain.csv +2001 -0
  35. sequenzo/datasets/biofam_married_domain.csv +2001 -0
  36. sequenzo/datasets/chinese_colonial_territories.csv +12 -0
  37. sequenzo/datasets/country_co2_emissions.csv +194 -0
  38. sequenzo/datasets/country_co2_emissions_global_deciles.csv +195 -0
  39. sequenzo/datasets/country_co2_emissions_global_quintiles.csv +195 -0
  40. sequenzo/datasets/country_co2_emissions_local_deciles.csv +195 -0
  41. sequenzo/datasets/country_co2_emissions_local_quintiles.csv +195 -0
  42. sequenzo/datasets/country_gdp_per_capita.csv +194 -0
  43. sequenzo/datasets/mvad.csv +713 -0
  44. sequenzo/datasets/pairfam_family.csv +1867 -0
  45. sequenzo/datasets/polyadic_samplec1.csv +61 -0
  46. sequenzo/datasets/polyadic_samplep1.csv +61 -0
  47. sequenzo/datasets/polyadic_seqc1.csv +61 -0
  48. sequenzo/datasets/polyadic_seqp1.csv +61 -0
  49. sequenzo/define_sequence_data.py +609 -0
  50. sequenzo/dissimilarity_measures/__init__.py +31 -0
  51. sequenzo/dissimilarity_measures/c_code.cpython-311-darwin.so +0 -0
  52. sequenzo/dissimilarity_measures/get_distance_matrix.py +702 -0
  53. sequenzo/dissimilarity_measures/get_substitution_cost_matrix.py +241 -0
  54. sequenzo/dissimilarity_measures/src/DHDdistance.cpp +148 -0
  55. sequenzo/dissimilarity_measures/src/LCPdistance.cpp +114 -0
  56. sequenzo/dissimilarity_measures/src/OMdistance.cpp +247 -0
  57. sequenzo/dissimilarity_measures/src/OMspellDistance.cpp +281 -0
  58. sequenzo/dissimilarity_measures/src/__init__.py +0 -0
  59. sequenzo/dissimilarity_measures/src/dist2matrix.cpp +63 -0
  60. sequenzo/dissimilarity_measures/src/dp_utils.h +160 -0
  61. sequenzo/dissimilarity_measures/src/module.cpp +34 -0
  62. sequenzo/dissimilarity_measures/src/setup.py +30 -0
  63. sequenzo/dissimilarity_measures/src/utils.h +25 -0
  64. sequenzo/dissimilarity_measures/src/xsimd/.github/cmake-test/main.cpp +6 -0
  65. sequenzo/dissimilarity_measures/src/xsimd/benchmark/main.cpp +159 -0
  66. sequenzo/dissimilarity_measures/src/xsimd/benchmark/xsimd_benchmark.hpp +565 -0
  67. sequenzo/dissimilarity_measures/src/xsimd/docs/source/conf.py +37 -0
  68. sequenzo/dissimilarity_measures/src/xsimd/examples/mandelbrot.cpp +330 -0
  69. sequenzo/dissimilarity_measures/src/xsimd/examples/pico_bench.hpp +246 -0
  70. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_arithmetic.hpp +266 -0
  71. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_complex.hpp +112 -0
  72. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_details.hpp +323 -0
  73. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_logical.hpp +218 -0
  74. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_math.hpp +2583 -0
  75. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_memory.hpp +880 -0
  76. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_rounding.hpp +72 -0
  77. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_swizzle.hpp +174 -0
  78. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_trigo.hpp +978 -0
  79. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx.hpp +1924 -0
  80. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx2.hpp +1144 -0
  81. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512bw.hpp +656 -0
  82. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512cd.hpp +28 -0
  83. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512dq.hpp +244 -0
  84. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512er.hpp +20 -0
  85. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512f.hpp +2650 -0
  86. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512ifma.hpp +20 -0
  87. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512pf.hpp +20 -0
  88. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vbmi.hpp +77 -0
  89. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vbmi2.hpp +131 -0
  90. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vnni_avx512bw.hpp +20 -0
  91. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vnni_avx512vbmi2.hpp +20 -0
  92. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avxvnni.hpp +20 -0
  93. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_common.hpp +24 -0
  94. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_common_fwd.hpp +77 -0
  95. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_constants.hpp +393 -0
  96. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_emulated.hpp +788 -0
  97. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma3_avx.hpp +93 -0
  98. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma3_avx2.hpp +46 -0
  99. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma3_sse.hpp +97 -0
  100. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma4.hpp +92 -0
  101. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_i8mm_neon64.hpp +17 -0
  102. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_isa.hpp +142 -0
  103. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_neon.hpp +3142 -0
  104. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_neon64.hpp +1543 -0
  105. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_rvv.hpp +1513 -0
  106. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_scalar.hpp +1260 -0
  107. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse2.hpp +2024 -0
  108. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse3.hpp +67 -0
  109. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse4_1.hpp +339 -0
  110. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse4_2.hpp +44 -0
  111. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_ssse3.hpp +186 -0
  112. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sve.hpp +1155 -0
  113. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_vsx.hpp +892 -0
  114. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_wasm.hpp +1780 -0
  115. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_arch.hpp +240 -0
  116. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_config.hpp +484 -0
  117. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_cpuid.hpp +269 -0
  118. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_inline.hpp +27 -0
  119. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/math/xsimd_rem_pio2.hpp +719 -0
  120. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/memory/xsimd_aligned_allocator.hpp +349 -0
  121. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/memory/xsimd_alignment.hpp +91 -0
  122. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_all_registers.hpp +55 -0
  123. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_api.hpp +2765 -0
  124. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx2_register.hpp +44 -0
  125. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512bw_register.hpp +51 -0
  126. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512cd_register.hpp +51 -0
  127. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512dq_register.hpp +51 -0
  128. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512er_register.hpp +51 -0
  129. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512f_register.hpp +77 -0
  130. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512ifma_register.hpp +51 -0
  131. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512pf_register.hpp +51 -0
  132. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512vbmi2_register.hpp +51 -0
  133. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512vbmi_register.hpp +51 -0
  134. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512vnni_avx512bw_register.hpp +54 -0
  135. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512vnni_avx512vbmi2_register.hpp +53 -0
  136. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx_register.hpp +64 -0
  137. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avxvnni_register.hpp +44 -0
  138. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_batch.hpp +1524 -0
  139. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_batch_constant.hpp +300 -0
  140. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_common_arch.hpp +47 -0
  141. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_emulated_register.hpp +80 -0
  142. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_fma3_avx2_register.hpp +50 -0
  143. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_fma3_avx_register.hpp +50 -0
  144. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_fma3_sse_register.hpp +50 -0
  145. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_fma4_register.hpp +50 -0
  146. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_i8mm_neon64_register.hpp +55 -0
  147. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_neon64_register.hpp +55 -0
  148. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_neon_register.hpp +154 -0
  149. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_register.hpp +94 -0
  150. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_rvv_register.hpp +506 -0
  151. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sse2_register.hpp +59 -0
  152. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sse3_register.hpp +49 -0
  153. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sse4_1_register.hpp +48 -0
  154. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sse4_2_register.hpp +48 -0
  155. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_ssse3_register.hpp +48 -0
  156. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sve_register.hpp +156 -0
  157. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_traits.hpp +337 -0
  158. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_utils.hpp +536 -0
  159. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_vsx_register.hpp +77 -0
  160. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_wasm_register.hpp +59 -0
  161. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/xsimd.hpp +75 -0
  162. sequenzo/dissimilarity_measures/src/xsimd/test/architectures/dummy.cpp +7 -0
  163. sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set.cpp +13 -0
  164. sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set_mean.cpp +24 -0
  165. sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set_mean_aligned.cpp +25 -0
  166. sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set_mean_arch_independent.cpp +28 -0
  167. sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set_mean_tag_dispatch.cpp +25 -0
  168. sequenzo/dissimilarity_measures/src/xsimd/test/doc/manipulating_abstract_batches.cpp +7 -0
  169. sequenzo/dissimilarity_measures/src/xsimd/test/doc/manipulating_parametric_batches.cpp +8 -0
  170. sequenzo/dissimilarity_measures/src/xsimd/test/doc/sum.hpp +31 -0
  171. sequenzo/dissimilarity_measures/src/xsimd/test/doc/sum_avx2.cpp +3 -0
  172. sequenzo/dissimilarity_measures/src/xsimd/test/doc/sum_sse2.cpp +3 -0
  173. sequenzo/dissimilarity_measures/src/xsimd/test/doc/writing_vectorized_code.cpp +11 -0
  174. sequenzo/dissimilarity_measures/src/xsimd/test/main.cpp +31 -0
  175. sequenzo/dissimilarity_measures/src/xsimd/test/test_api.cpp +230 -0
  176. sequenzo/dissimilarity_measures/src/xsimd/test/test_arch.cpp +217 -0
  177. sequenzo/dissimilarity_measures/src/xsimd/test/test_basic_math.cpp +183 -0
  178. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch.cpp +1049 -0
  179. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_bool.cpp +508 -0
  180. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_cast.cpp +409 -0
  181. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_complex.cpp +712 -0
  182. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_constant.cpp +286 -0
  183. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_float.cpp +141 -0
  184. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_int.cpp +365 -0
  185. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_manip.cpp +308 -0
  186. sequenzo/dissimilarity_measures/src/xsimd/test/test_bitwise_cast.cpp +222 -0
  187. sequenzo/dissimilarity_measures/src/xsimd/test/test_complex_exponential.cpp +226 -0
  188. sequenzo/dissimilarity_measures/src/xsimd/test/test_complex_hyperbolic.cpp +183 -0
  189. sequenzo/dissimilarity_measures/src/xsimd/test/test_complex_power.cpp +265 -0
  190. sequenzo/dissimilarity_measures/src/xsimd/test/test_complex_trigonometric.cpp +236 -0
  191. sequenzo/dissimilarity_measures/src/xsimd/test/test_conversion.cpp +248 -0
  192. sequenzo/dissimilarity_measures/src/xsimd/test/test_custom_default_arch.cpp +28 -0
  193. sequenzo/dissimilarity_measures/src/xsimd/test/test_error_gamma.cpp +170 -0
  194. sequenzo/dissimilarity_measures/src/xsimd/test/test_explicit_batch_instantiation.cpp +32 -0
  195. sequenzo/dissimilarity_measures/src/xsimd/test/test_exponential.cpp +202 -0
  196. sequenzo/dissimilarity_measures/src/xsimd/test/test_extract_pair.cpp +92 -0
  197. sequenzo/dissimilarity_measures/src/xsimd/test/test_fp_manipulation.cpp +77 -0
  198. sequenzo/dissimilarity_measures/src/xsimd/test/test_gnu_source.cpp +30 -0
  199. sequenzo/dissimilarity_measures/src/xsimd/test/test_hyperbolic.cpp +167 -0
  200. sequenzo/dissimilarity_measures/src/xsimd/test/test_load_store.cpp +304 -0
  201. sequenzo/dissimilarity_measures/src/xsimd/test/test_memory.cpp +61 -0
  202. sequenzo/dissimilarity_measures/src/xsimd/test/test_poly_evaluation.cpp +64 -0
  203. sequenzo/dissimilarity_measures/src/xsimd/test/test_power.cpp +184 -0
  204. sequenzo/dissimilarity_measures/src/xsimd/test/test_rounding.cpp +199 -0
  205. sequenzo/dissimilarity_measures/src/xsimd/test/test_select.cpp +101 -0
  206. sequenzo/dissimilarity_measures/src/xsimd/test/test_shuffle.cpp +760 -0
  207. sequenzo/dissimilarity_measures/src/xsimd/test/test_sum.cpp +4 -0
  208. sequenzo/dissimilarity_measures/src/xsimd/test/test_sum.hpp +34 -0
  209. sequenzo/dissimilarity_measures/src/xsimd/test/test_traits.cpp +172 -0
  210. sequenzo/dissimilarity_measures/src/xsimd/test/test_trigonometric.cpp +208 -0
  211. sequenzo/dissimilarity_measures/src/xsimd/test/test_utils.hpp +611 -0
  212. sequenzo/dissimilarity_measures/src/xsimd/test/test_wasm/test_wasm_playwright.py +123 -0
  213. sequenzo/dissimilarity_measures/src/xsimd/test/test_xsimd_api.cpp +1460 -0
  214. sequenzo/dissimilarity_measures/utils/__init__.py +16 -0
  215. sequenzo/dissimilarity_measures/utils/get_LCP_length_for_2_seq.py +44 -0
  216. sequenzo/dissimilarity_measures/utils/get_sm_trate_substitution_cost_matrix.cpython-311-darwin.so +0 -0
  217. sequenzo/dissimilarity_measures/utils/seqconc.cpython-311-darwin.so +0 -0
  218. sequenzo/dissimilarity_measures/utils/seqdss.cpython-311-darwin.so +0 -0
  219. sequenzo/dissimilarity_measures/utils/seqdur.cpython-311-darwin.so +0 -0
  220. sequenzo/dissimilarity_measures/utils/seqlength.cpython-311-darwin.so +0 -0
  221. sequenzo/multidomain/__init__.py +23 -0
  222. sequenzo/multidomain/association_between_domains.py +311 -0
  223. sequenzo/multidomain/cat.py +431 -0
  224. sequenzo/multidomain/combt.py +519 -0
  225. sequenzo/multidomain/dat.py +89 -0
  226. sequenzo/multidomain/idcd.py +139 -0
  227. sequenzo/multidomain/linked_polyad.py +292 -0
  228. sequenzo/openmp_setup.py +233 -0
  229. sequenzo/prefix_tree/__init__.py +43 -0
  230. sequenzo/prefix_tree/individual_level_indicators.py +1274 -0
  231. sequenzo/prefix_tree/system_level_indicators.py +465 -0
  232. sequenzo/prefix_tree/utils.py +54 -0
  233. sequenzo/sequence_characteristics/__init__.py +40 -0
  234. sequenzo/sequence_characteristics/complexity_index.py +49 -0
  235. sequenzo/sequence_characteristics/overall_cross_sectional_entropy.py +220 -0
  236. sequenzo/sequence_characteristics/plot_characteristics.py +593 -0
  237. sequenzo/sequence_characteristics/simple_characteristics.py +311 -0
  238. sequenzo/sequence_characteristics/state_frequencies_and_entropy_per_sequence.py +39 -0
  239. sequenzo/sequence_characteristics/turbulence.py +155 -0
  240. sequenzo/sequence_characteristics/variance_of_spell_durations.py +86 -0
  241. sequenzo/sequence_characteristics/within_sequence_entropy.py +43 -0
  242. sequenzo/suffix_tree/__init__.py +48 -0
  243. sequenzo/suffix_tree/individual_level_indicators.py +1638 -0
  244. sequenzo/suffix_tree/system_level_indicators.py +456 -0
  245. sequenzo/suffix_tree/utils.py +56 -0
  246. sequenzo/visualization/__init__.py +29 -0
  247. sequenzo/visualization/plot_mean_time.py +194 -0
  248. sequenzo/visualization/plot_modal_state.py +276 -0
  249. sequenzo/visualization/plot_most_frequent_sequences.py +147 -0
  250. sequenzo/visualization/plot_relative_frequency.py +404 -0
  251. sequenzo/visualization/plot_sequence_index.py +951 -0
  252. sequenzo/visualization/plot_single_medoid.py +153 -0
  253. sequenzo/visualization/plot_state_distribution.py +627 -0
  254. sequenzo/visualization/plot_transition_matrix.py +190 -0
  255. sequenzo/visualization/utils/__init__.py +23 -0
  256. sequenzo/visualization/utils/utils.py +310 -0
  257. sequenzo/with_event_history_analysis/__init__.py +35 -0
  258. sequenzo/with_event_history_analysis/sequence_analysis_multi_state_model.py +850 -0
  259. sequenzo/with_event_history_analysis/sequence_history_analysis.py +283 -0
  260. sequenzo-0.1.24.dist-info/METADATA +255 -0
  261. sequenzo-0.1.24.dist-info/RECORD +264 -0
  262. sequenzo-0.1.24.dist-info/WHEEL +5 -0
  263. sequenzo-0.1.24.dist-info/licenses/LICENSE +28 -0
  264. sequenzo-0.1.24.dist-info/top_level.txt +2 -0
@@ -0,0 +1,205 @@
1
+ """
2
+ @Author : 李欣怡
3
+ @File : wfcmdd.py
4
+ @Time : 2024/12/28 13:38
5
+ @Desc :
6
+ """
7
+
8
+ import numpy as np
9
+ import pandas as pd
10
+ import warnings
11
+
12
+
13
+ def wfcmdd(diss, memb, weights=None, method="FCMdd", m=2, dnoise=None, eta=None, alpha=0.001,
14
+ iter_max=100, verbose=False, dlambda=None):
15
+ # Setting and checking argument values
16
+ METHODS = ["NCdd", "HNCdd", "FCMdd", "PCMdd"]
17
+
18
+ if method not in METHODS:
19
+ raise ValueError(f" [!] Method must be one of {METHODS}.")
20
+
21
+ # TODO:源码中没有 weights = null 时的处理
22
+ if weights is None:
23
+ weights = np.ones(len(diss), dtype=int)
24
+
25
+ # R 源码中只定义未使用
26
+ # pweights = weights / np.sum(weights)
27
+ d = np.array(diss)
28
+ n = d.shape[0]
29
+
30
+ if method == "NCdd":
31
+ if dnoise is None and dlambda is None:
32
+ raise ValueError(" [!] Must provide a value for dnoise or dlambda.")
33
+ if dlambda is not None:
34
+ dnoise = 1
35
+ elif method == "HNCdd":
36
+ if dnoise is None:
37
+ raise ValueError(" [!] Must provide a value for dnoise.")
38
+ m = 1
39
+ elif method == "PCMdd":
40
+ if eta is None:
41
+ raise ValueError(" [!] Must provide a vector of values for eta.")
42
+
43
+ # Checking the membership matrix (memb)
44
+ if isinstance(memb, (pd.DataFrame, np.ndarray)): # Check if memb is matrix or dataframe-like
45
+ if memb.shape[0] != d.shape[1]:
46
+ raise ValueError(" [!] The number of rows in memb must be the same as the number of rows and columns of d.")
47
+ u = memb.to_numpy() if isinstance(memb, pd.DataFrame) else memb
48
+
49
+ elif isinstance(memb, list) and all(isinstance(x, (int, float)) for x in memb):
50
+ # else if (is.vector(memb) && is.numeric(memb))
51
+ u = np.zeros((n, len(memb)))
52
+ for k in range(len(memb)):
53
+ u[memb[k], k] = 1
54
+
55
+ else:
56
+ raise ValueError("[!] Provide a number, a vector of seeds, or membership matrix for mobile clusters.")
57
+
58
+ kMov = u.shape[1]
59
+ med = np.full(kMov, np.nan)
60
+
61
+ if method == "PCMdd" and len(eta) != kMov:
62
+ raise ValueError(" [!] Vector of reference distances (eta) must have a length equal to the number of clusters.")
63
+
64
+ if method in ["NCdd", "HNCdd"]:
65
+ # u <- cbind(u, vector("numeric", length = n))
66
+ u = np.hstack([u, np.zeros((n, 1))])
67
+
68
+ kMovNC = u.shape[1]
69
+ # print("kMovNC = ", kMovNC)
70
+ uPrev = np.zeros((n, kMovNC))
71
+
72
+ if dlambda is not None:
73
+ kdiv = kMov * np.sum(weights)
74
+
75
+ dist2med = np.zeros((n, kMovNC))
76
+ # print("dist2med = ", dist2med)
77
+
78
+ if method in ["NCdd", "HNCdd"]:
79
+ dist2med[:, kMovNC - 1] = dnoise
80
+
81
+ continue_flag = True
82
+ iter_count = 1
83
+ uPrev2 = 0
84
+ # print("u = ", u)
85
+ # print("d = ", d)
86
+ # print("med = ", med)
87
+ while continue_flag:
88
+ # Finding centers
89
+ for k in range(kMov):
90
+ # candidates < - which(apply(u[, -k, drop=FALSE], 1, max) < 1 & (!1:n % in %med[0:(k - 1)]))
91
+ # med[k] < - candidates[which.min((u[, k] ^ m * weights) % * % d[, candidates])]
92
+ # dist2med[, k] < - d[, med[k]]
93
+
94
+ u_removed_k = np.delete(u, k, axis=1) # 去掉第 k 列
95
+ max_per_row = np.max(u_removed_k, axis=1) # 每行的最大值
96
+
97
+ # 查找最大值小于 1 的行
98
+ candidates = np.where((max_per_row < 1) & (~np.isin(np.arange(1, len(u) + 1), med[:k])))[0]
99
+ # print("candidates = ", candidates)
100
+
101
+ u_k_m = u[:, k] ** m
102
+ # print("u_k_m = ", u_k_m)
103
+
104
+ # 按照权重与距离矩阵进行矩阵乘法
105
+ weighted_u_k_m = u_k_m * weights
106
+ # print("weighted_u_k_m =", weighted_u_k_m)
107
+
108
+ # 从 d 中选择 candidates 列
109
+ d_candidates = d[:, candidates]
110
+ # print("d_candidates =", d_candidates)
111
+
112
+ # 进行矩阵乘法
113
+ product = weighted_u_k_m @ d_candidates
114
+ # print("product = ", product)
115
+ # 选取最小值对应的索引
116
+ min_index = np.argmin(product)
117
+ # print("min_index = ", min_index)
118
+
119
+ med[k] = candidates[min_index] # 更新 med[k]
120
+ # print("med[k] = ", med[k])
121
+
122
+ dist2med[:, k] = d[:, int(med[k])]
123
+ # print("dist2med[:, k] = ", dist2med[:, k])
124
+
125
+ # Updating dnoise for adaptive dnoise clustering
126
+ if dlambda is not None and method == "NCdd":
127
+ dnoise = dlambda * np.sum(dist2med[:, :-1] * weights[:, None]) / (kMov * np.sum(weights))
128
+ dist2med[:, kMovNC - 1] = dnoise
129
+
130
+
131
+ # Updating membership
132
+ if method == "HNCdd":
133
+ d2cm = np.hstack([dist2med, np.full((dist2med.shape[0], 1), dnoise)])
134
+ u = np.zeros_like(u)
135
+ minC = np.argmin(d2cm, axis=1)
136
+ for i in range(len(minC)):
137
+ u[i, minC[i]] = 1
138
+
139
+ elif method in ["FCMdd", "NCdd"]:
140
+ with warnings.catch_warnings():
141
+ warnings.simplefilter("ignore")
142
+ # dist2med_safe = np.where(dist2med == 0, 1e-10, dist2med)
143
+ # TODO : 不显示中间报错
144
+ u = (1 / dist2med) ** (1 / (m - 1))
145
+ u /= np.sum(u, axis=1, keepdims=True)
146
+ u[dist2med == 0] = 1
147
+
148
+ elif method == "PCMdd":
149
+ for k in range(kMov):
150
+ u[:, k] = 1 / (1 + (dist2med[:, k] / eta[k]) ** (1 / (m - 1)))
151
+ u[dist2med == 0] = 1
152
+
153
+ # Checking convergence
154
+ if iter_count > 2:
155
+ continue_flag = np.max(np.abs(u - uPrev)) > alpha and iter_count <= iter_max \
156
+ and np.max(np.abs(u - uPrev2)) > alpha
157
+
158
+ if continue_flag:
159
+ uPrev2 = uPrev
160
+ uPrev = u
161
+ iter_count += 1
162
+ if verbose:
163
+ print(".", end="")
164
+
165
+ # Calculate the functional value
166
+ if method in ["NCdd", "FCMdd"]:
167
+ functional = np.sum(dist2med * (u ** m) * weights[:, None])
168
+ elif method == "HNCdd":
169
+ functional = np.sum(dist2med * (u ** m) * weights[:, None])
170
+ elif method == "PCMdd":
171
+ functional = 0
172
+ for k in range(kMov):
173
+ functional += np.sum(dist2med[:, k] * (u[:, k] ** m) * weights) + np.sum(
174
+ eta[k] * (1 - u[:, k]) ** m * weights)
175
+
176
+ if verbose:
177
+ print(f"\nIterations: {iter_count}, Functional: {functional}")
178
+
179
+ mobile_centers = med[:kMov]
180
+
181
+ return {
182
+ "dnoise": dnoise,
183
+ "memb": u,
184
+ "mobileCenters": mobile_centers,
185
+ "functional": functional
186
+ }
187
+
188
+
189
+ if __name__ == "__main__":
190
+ diss = np.array([[0.0, 1.0, 2.0],
191
+ [1.0, 0.0, 1.0],
192
+ [2.0, 1.0, 0.0]])
193
+ diss = pd.DataFrame(diss)
194
+
195
+ memb = np.array([[0.7, 0.3],
196
+ [0.2, 0.8],
197
+ [0.5, 0.5]])
198
+
199
+ result = wfcmdd(diss=diss, memb=memb, method="FCMdd")
200
+
201
+ print("result['dnoise'] = ", result['dnoise'])
202
+ print("result['memb'] =")
203
+ print(result['memb'])
204
+ print("result['mobileCenters'] = ", result['mobileCenters'])
205
+ print("result['functional'] = ", result['functional'])
@@ -0,0 +1,88 @@
1
+ """
2
+ @Author : Yuqi Liang 梁彧祺
3
+ @File : visualization.py
4
+ @Time : 04/04/2025 15:21
5
+ @Desc :
6
+
7
+ """
8
+ import matplotlib.pyplot as plt
9
+ import seaborn as sns
10
+ import numpy as np
11
+ import pandas as pd
12
+
13
+
14
+ def plot_scores_from_dataframe(df,
15
+ k_col="k",
16
+ metrics=None,
17
+ norm="zscore",
18
+ title="CLARA Cluster Quality Metrics",
19
+ palette="Set2",
20
+ line_width=2,
21
+ style="whitegrid",
22
+ xlabel="Number of Clusters",
23
+ ylabel="Normalized Score",
24
+ grid=True,
25
+ save_as=None,
26
+ dpi=200,
27
+ figsize=(12, 8)):
28
+ """
29
+ Plot clustering metrics directly from a summary DataFrame (e.g., loaded from CSV).
30
+
31
+ :param df: DataFrame with clustering metrics. Must include a 'k' column.
32
+ :param k_col: Column name indicating the number of clusters.
33
+ :param metrics: List of metric columns to plot. If None, auto-detect numeric columns.
34
+ :param norm: Normalization method for plotting ('zscore', 'range', or 'none')
35
+ :param title: Plot title
36
+ :param palette: Color palette for the plot
37
+ :param line_width: Width of plotted lines
38
+ :param style: Seaborn style for the plot
39
+ :param xlabel: X-axis label
40
+ :param ylabel: Y-axis label
41
+ :param grid: Whether to show grid lines
42
+ :param save_as: File path to save the plot (optional)
43
+ :param dpi: DPI for saved image
44
+ :param figsize: Figure size in inches
45
+ """
46
+ df = df.copy()
47
+ df = df.sort_values(by=k_col)
48
+
49
+ if metrics is None:
50
+ metrics = df.select_dtypes(include=[float, int]).columns.tolist()
51
+ blacklist = ["Best iter", k_col] # Removed best iter as it is not part of the indicators for cluster quality evaluation
52
+ metrics = [m for m in metrics if m not in blacklist]
53
+
54
+ normed = {}
55
+ for metric in metrics:
56
+ values = df[metric].values.astype(float)
57
+ if norm == "zscore":
58
+ mean = np.nanmean(values)
59
+ std = np.nanstd(values)
60
+ normed[metric] = (values - mean) / std if std > 0 else values
61
+ elif norm == "range":
62
+ min_val = np.nanmin(values)
63
+ max_val = np.nanmax(values)
64
+ normed[metric] = (values - min_val) / (max_val - min_val) if max_val > min_val else values
65
+ else:
66
+ normed[metric] = values
67
+
68
+ sns.set(style=style)
69
+ palette_colors = sns.color_palette(palette, len(metrics))
70
+ plt.figure(figsize=figsize)
71
+
72
+ for idx, metric in enumerate(metrics):
73
+ plt.plot(df[k_col], normed[metric],
74
+ label=metric,
75
+ linewidth=line_width,
76
+ color=palette_colors[idx])
77
+
78
+ plt.title(title, fontsize=14, fontweight="bold")
79
+ plt.xlabel(xlabel)
80
+ plt.ylabel(ylabel)
81
+ plt.xticks(df[k_col])
82
+ plt.grid(grid, linestyle="--", alpha=0.6)
83
+ plt.legend(title="Metric", fontsize=10)
84
+ plt.tight_layout()
85
+
86
+ if save_as:
87
+ plt.savefig(save_as, dpi=dpi)
88
+ plt.show()
@@ -0,0 +1,178 @@
1
+ """
2
+ @Author : 李欣怡 Xinyi Li
3
+ @File : KMedoids.py
4
+ @Time : 2025/2/8 11:53
5
+ @Desc :
6
+ """
7
+
8
+ import numpy as np
9
+ from scipy.cluster.hierarchy import cut_tree
10
+
11
+ import importlib
12
+ import sequenzo.clustering.clustering_c_code
13
+ clustering_c_code = importlib.import_module("sequenzo.clustering.clustering_c_code")
14
+
15
+ from sequenzo.clustering.utils.disscenter import disscentertrim
16
+
17
+ def KMedoids(diss, k, weights=None, npass=1, initialclust=None, method='PAMonce', cluster_only=False, verbose=True):
18
+
19
+ # Lazily import the c_code module to avoid circular dependencies during installation
20
+ # from .__init__ import _import_c_code
21
+ # c_code = _import_c_code()
22
+
23
+ # Convert method to integer if it's a string
24
+ method_original = method
25
+ if isinstance(method, str):
26
+ method = method.lower()
27
+ method_map = ["kmedoids", "pam", "pamonce"]
28
+ if method in method_map:
29
+ method = method_map.index(method) + 1 # 1-based index
30
+
31
+ if not (isinstance(method, int) and method in {1, 2, 3}):
32
+ raise ValueError(f"[!] Unknown clustering method: {method_original}.")
33
+
34
+ if verbose:
35
+ method_names = ["KMedoids", "PAM", "PAMonce"]
36
+ method_name = method_names[method - 1]
37
+ print(f"[>] Starting KMedoids clustering (method: {method_name}, k={k})...")
38
+
39
+ nelements = diss.shape[0]
40
+ if nelements != diss.shape[1]:
41
+ raise ValueError(f"[!] Dissipation matrix has {nelements} elements.")
42
+
43
+ def internal_random_sample(nelements, k):
44
+ return np.random.choice(nelements, k, replace=False) # 0-based 直接适用
45
+
46
+ if weights is None:
47
+ weights = np.ones(diss.shape[1], dtype=float)
48
+
49
+ if len(weights) != nelements:
50
+ raise ValueError(f"[!] 'weights' should be a vector of length {nelements}.")
51
+
52
+ if initialclust is None:
53
+ initialclust = internal_random_sample(nelements, k)
54
+ else:
55
+ if _validate_linkage_matrix(initialclust):
56
+ # initialclust = fcluster(initialclust, k, criterion='maxclust') # 1-based 索引
57
+ initialclust = cut_tree(initialclust, n_clusters=k).flatten() + 1 # 1-based 索引
58
+ # TODO : 现在已经得到一个组了,为什么不用这个组当作 PAMonce/PAM 算法的初始化?反而利用这个组去选中心点?
59
+ # 初始化中心点的必要性为什么大于组?初始化中心点无论好不好,最后经过不断迭代肯定能选出较好的
60
+ # TODO : 就算想要从子样本扩展到全数据,入口参数的这个组也是可以的呀?
61
+ if len(initialclust) == nelements:
62
+ initialclust = disscentertrim(diss=diss, group=initialclust, medoids_index="first", weights=weights)
63
+
64
+ if len(initialclust) != k:
65
+ raise ValueError(f"[!] 'initialclust' should be a vector of cluster membership with k={k}.")
66
+
67
+ npass = 0
68
+
69
+ if len(initialclust) != k:
70
+ raise ValueError(f"[!] 'initialclust' should be a vector of medoids index of length :{k}.")
71
+
72
+ if isinstance(initialclust, list):
73
+ initialclust = np.asarray(initialclust)
74
+ if np.any((initialclust >= nelements) | (initialclust < 0)):
75
+ raise ValueError(f"[!] Starting medoids should be in 1:{nelements}")
76
+
77
+ if npass < 0:
78
+ raise ValueError("[!] 'npass' should be greater than 0")
79
+
80
+ if k < 2 or k > nelements:
81
+ raise ValueError(f" [!] 'k' should be in [2, {nelements}]")
82
+
83
+ if method == 1: # KMedoid
84
+ memb = clustering_c_code.KMedoid(nelements,
85
+ diss.astype(np.float64),
86
+ initialclust.astype(np.int32),
87
+ npass,
88
+ weights.astype(np.float64))
89
+ elif method == 2: # PAM
90
+ memb = clustering_c_code.PAM(nelements,
91
+ diss.astype(np.float64),
92
+ initialclust.astype(np.int32),
93
+ npass,
94
+ weights.astype(np.float64))
95
+ else: # PAMonce
96
+ memb = clustering_c_code.PAMonce(nelements,
97
+ diss.astype(np.float64),
98
+ initialclust.astype(np.int32),
99
+ npass,
100
+ weights.astype(np.float64))
101
+
102
+ memb_matrix = memb.runclusterloop()
103
+
104
+ if verbose:
105
+ print("[>] Computed Successfully.")
106
+
107
+ return memb_matrix
108
+
109
+ def _validate_linkage_matrix(initialclust):
110
+ """
111
+ Check that the passed matrix matches the linkage matrix type requirements
112
+ """
113
+ if not isinstance(initialclust, np.ndarray):
114
+ return False # Linkage matrix must be a NumPy array
115
+
116
+ if initialclust.ndim != 2 or initialclust.shape[1] != 4:
117
+ return False # Linkage matrix must be a 2D array with 4 columns
118
+
119
+ if initialclust.dtype != np.float64:
120
+ return False # Linkage matrix 'Z' must contain doubles (np.float64).
121
+
122
+ return True
123
+
124
+
125
+ if __name__ == '__main__':
126
+ # TODO : KMeodis 在 python3.11 里导包有 numpy 的问题
127
+ # TODO : sequenzo 0.1.14 里找不到 KMeodis 模块(这是 init 的问题,现已修正)
128
+
129
+ from sequenzo import *
130
+ import pandas as pd
131
+
132
+ # =====
133
+ # CO2
134
+ # =====
135
+ # df = load_dataset('country_co2_emissions')
136
+ # time = list(df.columns)[1:]
137
+ # states = ['Very Low', 'Low', 'Middle', 'High', 'Very High']
138
+ # sequence_data = SequenceData(df, time=time, id_col="country", states=states)
139
+
140
+ # =========
141
+ # synthetic
142
+ # =========
143
+ df = pd.read_csv("/Users/xinyi/Projects/sequenzo/sequenzo/data_and_output/orignal data/not_real_detailed_data/synthetic_detailed_U5_N10000.csv")
144
+ _time = list(df.columns)[2:]
145
+ states = ["Data", "Data science", "Hardware", "Research", "Software", "Support & test", "Systems & infrastructure"]
146
+ df = df[['id', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10']]
147
+ sequence_data = SequenceData(df, time=_time, id_col="id", states=states)
148
+
149
+ om = get_distance_matrix(sequence_data, method="OM", sm="TRATE", indel="auto")
150
+ centroid_indices = [0, 50, 100, 150, 190]
151
+ n_pass = 1
152
+
153
+ weights = np.ones(len(om))
154
+
155
+ # Example 1: KMedoids algorithm without specifying the center point
156
+ # clustering = KMedoids(diss=om,
157
+ # k=5,
158
+ # method='KMedoids',
159
+ # npass=n_pass,
160
+ # weights=weights)
161
+ #
162
+ # # Example 2: PAM algorithm with a specified center point
163
+ # clustering = KMedoids(diss=om,
164
+ # k=5,
165
+ # method='PAM',
166
+ # initialclust=centroid_indices,
167
+ # npass=n_pass,
168
+ # weights=weights)
169
+
170
+ # Example 3: PAMonce algorithm with default parameters
171
+ clustering = KMedoids(diss=om,
172
+ k=5,
173
+ method='PAMonce',
174
+ npass=n_pass,
175
+ weights=weights)
176
+ print(clustering)
177
+ uniq = np.unique(clustering)
178
+ print(uniq.min(), uniq.max())
@@ -0,0 +1,30 @@
1
+ """
2
+ @Author : Yuqi Liang 梁彧祺
3
+ @File : __init__.py
4
+ @Time : 27/02/2025 09:58
5
+ @Desc :
6
+ """
7
+ from .hierarchical_clustering import Cluster, ClusterResults, ClusterQuality
8
+ from .KMedoids import KMedoids
9
+
10
+
11
+ def _import_c_code():
12
+ """Lazily import the c_code module to avoid circular dependencies during installation"""
13
+ try:
14
+ # Import built pybind11 extension placed under this package
15
+ from sequenzo.clustering import clustering_c_code
16
+ return clustering_c_code
17
+ except ImportError:
18
+ # If the C extension cannot be imported, return None
19
+ print(
20
+ "Warning: The C++ extension (c_code) could not be imported. Please ensure the extension module is compiled correctly.")
21
+ return None
22
+
23
+
24
+ __all__ = [
25
+ "Cluster",
26
+ "ClusterResults",
27
+ "ClusterQuality",
28
+ "KMedoids",
29
+ # Add other functions as needed
30
+ ]