sequenzo 0.1.24__cp311-cp311-macosx_10_9_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sequenzo might be problematic. Click here for more details.

Files changed (264) hide show
  1. _sequenzo_fastcluster.cpython-311-darwin.so +0 -0
  2. sequenzo/__init__.py +240 -0
  3. sequenzo/big_data/__init__.py +12 -0
  4. sequenzo/big_data/clara/__init__.py +26 -0
  5. sequenzo/big_data/clara/clara.py +474 -0
  6. sequenzo/big_data/clara/utils/__init__.py +27 -0
  7. sequenzo/big_data/clara/utils/aggregatecases.py +92 -0
  8. sequenzo/big_data/clara/utils/davies_bouldin.py +91 -0
  9. sequenzo/big_data/clara/utils/get_weighted_diss.cpython-311-darwin.so +0 -0
  10. sequenzo/big_data/clara/utils/wfcmdd.py +205 -0
  11. sequenzo/big_data/clara/visualization.py +88 -0
  12. sequenzo/clustering/KMedoids.py +178 -0
  13. sequenzo/clustering/__init__.py +30 -0
  14. sequenzo/clustering/clustering_c_code.cpython-311-darwin.so +0 -0
  15. sequenzo/clustering/hierarchical_clustering.py +1256 -0
  16. sequenzo/clustering/sequenzo_fastcluster/fastcluster.py +495 -0
  17. sequenzo/clustering/sequenzo_fastcluster/src/fastcluster.cpp +1877 -0
  18. sequenzo/clustering/sequenzo_fastcluster/src/fastcluster_python.cpp +1264 -0
  19. sequenzo/clustering/src/KMedoid.cpp +263 -0
  20. sequenzo/clustering/src/PAM.cpp +237 -0
  21. sequenzo/clustering/src/PAMonce.cpp +265 -0
  22. sequenzo/clustering/src/cluster_quality.cpp +496 -0
  23. sequenzo/clustering/src/cluster_quality.h +128 -0
  24. sequenzo/clustering/src/cluster_quality_backup.cpp +570 -0
  25. sequenzo/clustering/src/module.cpp +228 -0
  26. sequenzo/clustering/src/weightedinertia.cpp +111 -0
  27. sequenzo/clustering/utils/__init__.py +27 -0
  28. sequenzo/clustering/utils/disscenter.py +122 -0
  29. sequenzo/data_preprocessing/__init__.py +20 -0
  30. sequenzo/data_preprocessing/helpers.py +256 -0
  31. sequenzo/datasets/__init__.py +41 -0
  32. sequenzo/datasets/biofam.csv +2001 -0
  33. sequenzo/datasets/biofam_child_domain.csv +2001 -0
  34. sequenzo/datasets/biofam_left_domain.csv +2001 -0
  35. sequenzo/datasets/biofam_married_domain.csv +2001 -0
  36. sequenzo/datasets/chinese_colonial_territories.csv +12 -0
  37. sequenzo/datasets/country_co2_emissions.csv +194 -0
  38. sequenzo/datasets/country_co2_emissions_global_deciles.csv +195 -0
  39. sequenzo/datasets/country_co2_emissions_global_quintiles.csv +195 -0
  40. sequenzo/datasets/country_co2_emissions_local_deciles.csv +195 -0
  41. sequenzo/datasets/country_co2_emissions_local_quintiles.csv +195 -0
  42. sequenzo/datasets/country_gdp_per_capita.csv +194 -0
  43. sequenzo/datasets/mvad.csv +713 -0
  44. sequenzo/datasets/pairfam_family.csv +1867 -0
  45. sequenzo/datasets/polyadic_samplec1.csv +61 -0
  46. sequenzo/datasets/polyadic_samplep1.csv +61 -0
  47. sequenzo/datasets/polyadic_seqc1.csv +61 -0
  48. sequenzo/datasets/polyadic_seqp1.csv +61 -0
  49. sequenzo/define_sequence_data.py +609 -0
  50. sequenzo/dissimilarity_measures/__init__.py +31 -0
  51. sequenzo/dissimilarity_measures/c_code.cpython-311-darwin.so +0 -0
  52. sequenzo/dissimilarity_measures/get_distance_matrix.py +702 -0
  53. sequenzo/dissimilarity_measures/get_substitution_cost_matrix.py +241 -0
  54. sequenzo/dissimilarity_measures/src/DHDdistance.cpp +148 -0
  55. sequenzo/dissimilarity_measures/src/LCPdistance.cpp +114 -0
  56. sequenzo/dissimilarity_measures/src/OMdistance.cpp +247 -0
  57. sequenzo/dissimilarity_measures/src/OMspellDistance.cpp +281 -0
  58. sequenzo/dissimilarity_measures/src/__init__.py +0 -0
  59. sequenzo/dissimilarity_measures/src/dist2matrix.cpp +63 -0
  60. sequenzo/dissimilarity_measures/src/dp_utils.h +160 -0
  61. sequenzo/dissimilarity_measures/src/module.cpp +34 -0
  62. sequenzo/dissimilarity_measures/src/setup.py +30 -0
  63. sequenzo/dissimilarity_measures/src/utils.h +25 -0
  64. sequenzo/dissimilarity_measures/src/xsimd/.github/cmake-test/main.cpp +6 -0
  65. sequenzo/dissimilarity_measures/src/xsimd/benchmark/main.cpp +159 -0
  66. sequenzo/dissimilarity_measures/src/xsimd/benchmark/xsimd_benchmark.hpp +565 -0
  67. sequenzo/dissimilarity_measures/src/xsimd/docs/source/conf.py +37 -0
  68. sequenzo/dissimilarity_measures/src/xsimd/examples/mandelbrot.cpp +330 -0
  69. sequenzo/dissimilarity_measures/src/xsimd/examples/pico_bench.hpp +246 -0
  70. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_arithmetic.hpp +266 -0
  71. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_complex.hpp +112 -0
  72. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_details.hpp +323 -0
  73. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_logical.hpp +218 -0
  74. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_math.hpp +2583 -0
  75. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_memory.hpp +880 -0
  76. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_rounding.hpp +72 -0
  77. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_swizzle.hpp +174 -0
  78. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_trigo.hpp +978 -0
  79. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx.hpp +1924 -0
  80. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx2.hpp +1144 -0
  81. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512bw.hpp +656 -0
  82. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512cd.hpp +28 -0
  83. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512dq.hpp +244 -0
  84. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512er.hpp +20 -0
  85. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512f.hpp +2650 -0
  86. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512ifma.hpp +20 -0
  87. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512pf.hpp +20 -0
  88. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vbmi.hpp +77 -0
  89. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vbmi2.hpp +131 -0
  90. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vnni_avx512bw.hpp +20 -0
  91. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vnni_avx512vbmi2.hpp +20 -0
  92. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avxvnni.hpp +20 -0
  93. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_common.hpp +24 -0
  94. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_common_fwd.hpp +77 -0
  95. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_constants.hpp +393 -0
  96. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_emulated.hpp +788 -0
  97. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma3_avx.hpp +93 -0
  98. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma3_avx2.hpp +46 -0
  99. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma3_sse.hpp +97 -0
  100. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma4.hpp +92 -0
  101. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_i8mm_neon64.hpp +17 -0
  102. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_isa.hpp +142 -0
  103. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_neon.hpp +3142 -0
  104. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_neon64.hpp +1543 -0
  105. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_rvv.hpp +1513 -0
  106. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_scalar.hpp +1260 -0
  107. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse2.hpp +2024 -0
  108. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse3.hpp +67 -0
  109. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse4_1.hpp +339 -0
  110. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse4_2.hpp +44 -0
  111. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_ssse3.hpp +186 -0
  112. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sve.hpp +1155 -0
  113. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_vsx.hpp +892 -0
  114. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_wasm.hpp +1780 -0
  115. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_arch.hpp +240 -0
  116. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_config.hpp +484 -0
  117. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_cpuid.hpp +269 -0
  118. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_inline.hpp +27 -0
  119. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/math/xsimd_rem_pio2.hpp +719 -0
  120. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/memory/xsimd_aligned_allocator.hpp +349 -0
  121. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/memory/xsimd_alignment.hpp +91 -0
  122. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_all_registers.hpp +55 -0
  123. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_api.hpp +2765 -0
  124. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx2_register.hpp +44 -0
  125. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512bw_register.hpp +51 -0
  126. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512cd_register.hpp +51 -0
  127. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512dq_register.hpp +51 -0
  128. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512er_register.hpp +51 -0
  129. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512f_register.hpp +77 -0
  130. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512ifma_register.hpp +51 -0
  131. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512pf_register.hpp +51 -0
  132. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512vbmi2_register.hpp +51 -0
  133. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512vbmi_register.hpp +51 -0
  134. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512vnni_avx512bw_register.hpp +54 -0
  135. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512vnni_avx512vbmi2_register.hpp +53 -0
  136. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx_register.hpp +64 -0
  137. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avxvnni_register.hpp +44 -0
  138. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_batch.hpp +1524 -0
  139. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_batch_constant.hpp +300 -0
  140. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_common_arch.hpp +47 -0
  141. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_emulated_register.hpp +80 -0
  142. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_fma3_avx2_register.hpp +50 -0
  143. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_fma3_avx_register.hpp +50 -0
  144. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_fma3_sse_register.hpp +50 -0
  145. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_fma4_register.hpp +50 -0
  146. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_i8mm_neon64_register.hpp +55 -0
  147. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_neon64_register.hpp +55 -0
  148. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_neon_register.hpp +154 -0
  149. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_register.hpp +94 -0
  150. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_rvv_register.hpp +506 -0
  151. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sse2_register.hpp +59 -0
  152. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sse3_register.hpp +49 -0
  153. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sse4_1_register.hpp +48 -0
  154. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sse4_2_register.hpp +48 -0
  155. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_ssse3_register.hpp +48 -0
  156. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sve_register.hpp +156 -0
  157. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_traits.hpp +337 -0
  158. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_utils.hpp +536 -0
  159. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_vsx_register.hpp +77 -0
  160. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_wasm_register.hpp +59 -0
  161. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/xsimd.hpp +75 -0
  162. sequenzo/dissimilarity_measures/src/xsimd/test/architectures/dummy.cpp +7 -0
  163. sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set.cpp +13 -0
  164. sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set_mean.cpp +24 -0
  165. sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set_mean_aligned.cpp +25 -0
  166. sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set_mean_arch_independent.cpp +28 -0
  167. sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set_mean_tag_dispatch.cpp +25 -0
  168. sequenzo/dissimilarity_measures/src/xsimd/test/doc/manipulating_abstract_batches.cpp +7 -0
  169. sequenzo/dissimilarity_measures/src/xsimd/test/doc/manipulating_parametric_batches.cpp +8 -0
  170. sequenzo/dissimilarity_measures/src/xsimd/test/doc/sum.hpp +31 -0
  171. sequenzo/dissimilarity_measures/src/xsimd/test/doc/sum_avx2.cpp +3 -0
  172. sequenzo/dissimilarity_measures/src/xsimd/test/doc/sum_sse2.cpp +3 -0
  173. sequenzo/dissimilarity_measures/src/xsimd/test/doc/writing_vectorized_code.cpp +11 -0
  174. sequenzo/dissimilarity_measures/src/xsimd/test/main.cpp +31 -0
  175. sequenzo/dissimilarity_measures/src/xsimd/test/test_api.cpp +230 -0
  176. sequenzo/dissimilarity_measures/src/xsimd/test/test_arch.cpp +217 -0
  177. sequenzo/dissimilarity_measures/src/xsimd/test/test_basic_math.cpp +183 -0
  178. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch.cpp +1049 -0
  179. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_bool.cpp +508 -0
  180. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_cast.cpp +409 -0
  181. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_complex.cpp +712 -0
  182. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_constant.cpp +286 -0
  183. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_float.cpp +141 -0
  184. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_int.cpp +365 -0
  185. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_manip.cpp +308 -0
  186. sequenzo/dissimilarity_measures/src/xsimd/test/test_bitwise_cast.cpp +222 -0
  187. sequenzo/dissimilarity_measures/src/xsimd/test/test_complex_exponential.cpp +226 -0
  188. sequenzo/dissimilarity_measures/src/xsimd/test/test_complex_hyperbolic.cpp +183 -0
  189. sequenzo/dissimilarity_measures/src/xsimd/test/test_complex_power.cpp +265 -0
  190. sequenzo/dissimilarity_measures/src/xsimd/test/test_complex_trigonometric.cpp +236 -0
  191. sequenzo/dissimilarity_measures/src/xsimd/test/test_conversion.cpp +248 -0
  192. sequenzo/dissimilarity_measures/src/xsimd/test/test_custom_default_arch.cpp +28 -0
  193. sequenzo/dissimilarity_measures/src/xsimd/test/test_error_gamma.cpp +170 -0
  194. sequenzo/dissimilarity_measures/src/xsimd/test/test_explicit_batch_instantiation.cpp +32 -0
  195. sequenzo/dissimilarity_measures/src/xsimd/test/test_exponential.cpp +202 -0
  196. sequenzo/dissimilarity_measures/src/xsimd/test/test_extract_pair.cpp +92 -0
  197. sequenzo/dissimilarity_measures/src/xsimd/test/test_fp_manipulation.cpp +77 -0
  198. sequenzo/dissimilarity_measures/src/xsimd/test/test_gnu_source.cpp +30 -0
  199. sequenzo/dissimilarity_measures/src/xsimd/test/test_hyperbolic.cpp +167 -0
  200. sequenzo/dissimilarity_measures/src/xsimd/test/test_load_store.cpp +304 -0
  201. sequenzo/dissimilarity_measures/src/xsimd/test/test_memory.cpp +61 -0
  202. sequenzo/dissimilarity_measures/src/xsimd/test/test_poly_evaluation.cpp +64 -0
  203. sequenzo/dissimilarity_measures/src/xsimd/test/test_power.cpp +184 -0
  204. sequenzo/dissimilarity_measures/src/xsimd/test/test_rounding.cpp +199 -0
  205. sequenzo/dissimilarity_measures/src/xsimd/test/test_select.cpp +101 -0
  206. sequenzo/dissimilarity_measures/src/xsimd/test/test_shuffle.cpp +760 -0
  207. sequenzo/dissimilarity_measures/src/xsimd/test/test_sum.cpp +4 -0
  208. sequenzo/dissimilarity_measures/src/xsimd/test/test_sum.hpp +34 -0
  209. sequenzo/dissimilarity_measures/src/xsimd/test/test_traits.cpp +172 -0
  210. sequenzo/dissimilarity_measures/src/xsimd/test/test_trigonometric.cpp +208 -0
  211. sequenzo/dissimilarity_measures/src/xsimd/test/test_utils.hpp +611 -0
  212. sequenzo/dissimilarity_measures/src/xsimd/test/test_wasm/test_wasm_playwright.py +123 -0
  213. sequenzo/dissimilarity_measures/src/xsimd/test/test_xsimd_api.cpp +1460 -0
  214. sequenzo/dissimilarity_measures/utils/__init__.py +16 -0
  215. sequenzo/dissimilarity_measures/utils/get_LCP_length_for_2_seq.py +44 -0
  216. sequenzo/dissimilarity_measures/utils/get_sm_trate_substitution_cost_matrix.cpython-311-darwin.so +0 -0
  217. sequenzo/dissimilarity_measures/utils/seqconc.cpython-311-darwin.so +0 -0
  218. sequenzo/dissimilarity_measures/utils/seqdss.cpython-311-darwin.so +0 -0
  219. sequenzo/dissimilarity_measures/utils/seqdur.cpython-311-darwin.so +0 -0
  220. sequenzo/dissimilarity_measures/utils/seqlength.cpython-311-darwin.so +0 -0
  221. sequenzo/multidomain/__init__.py +23 -0
  222. sequenzo/multidomain/association_between_domains.py +311 -0
  223. sequenzo/multidomain/cat.py +431 -0
  224. sequenzo/multidomain/combt.py +519 -0
  225. sequenzo/multidomain/dat.py +89 -0
  226. sequenzo/multidomain/idcd.py +139 -0
  227. sequenzo/multidomain/linked_polyad.py +292 -0
  228. sequenzo/openmp_setup.py +233 -0
  229. sequenzo/prefix_tree/__init__.py +43 -0
  230. sequenzo/prefix_tree/individual_level_indicators.py +1274 -0
  231. sequenzo/prefix_tree/system_level_indicators.py +465 -0
  232. sequenzo/prefix_tree/utils.py +54 -0
  233. sequenzo/sequence_characteristics/__init__.py +40 -0
  234. sequenzo/sequence_characteristics/complexity_index.py +49 -0
  235. sequenzo/sequence_characteristics/overall_cross_sectional_entropy.py +220 -0
  236. sequenzo/sequence_characteristics/plot_characteristics.py +593 -0
  237. sequenzo/sequence_characteristics/simple_characteristics.py +311 -0
  238. sequenzo/sequence_characteristics/state_frequencies_and_entropy_per_sequence.py +39 -0
  239. sequenzo/sequence_characteristics/turbulence.py +155 -0
  240. sequenzo/sequence_characteristics/variance_of_spell_durations.py +86 -0
  241. sequenzo/sequence_characteristics/within_sequence_entropy.py +43 -0
  242. sequenzo/suffix_tree/__init__.py +48 -0
  243. sequenzo/suffix_tree/individual_level_indicators.py +1638 -0
  244. sequenzo/suffix_tree/system_level_indicators.py +456 -0
  245. sequenzo/suffix_tree/utils.py +56 -0
  246. sequenzo/visualization/__init__.py +29 -0
  247. sequenzo/visualization/plot_mean_time.py +194 -0
  248. sequenzo/visualization/plot_modal_state.py +276 -0
  249. sequenzo/visualization/plot_most_frequent_sequences.py +147 -0
  250. sequenzo/visualization/plot_relative_frequency.py +404 -0
  251. sequenzo/visualization/plot_sequence_index.py +951 -0
  252. sequenzo/visualization/plot_single_medoid.py +153 -0
  253. sequenzo/visualization/plot_state_distribution.py +627 -0
  254. sequenzo/visualization/plot_transition_matrix.py +190 -0
  255. sequenzo/visualization/utils/__init__.py +23 -0
  256. sequenzo/visualization/utils/utils.py +310 -0
  257. sequenzo/with_event_history_analysis/__init__.py +35 -0
  258. sequenzo/with_event_history_analysis/sequence_analysis_multi_state_model.py +850 -0
  259. sequenzo/with_event_history_analysis/sequence_history_analysis.py +283 -0
  260. sequenzo-0.1.24.dist-info/METADATA +255 -0
  261. sequenzo-0.1.24.dist-info/RECORD +264 -0
  262. sequenzo-0.1.24.dist-info/WHEEL +5 -0
  263. sequenzo-0.1.24.dist-info/licenses/LICENSE +28 -0
  264. sequenzo-0.1.24.dist-info/top_level.txt +2 -0
@@ -0,0 +1,627 @@
1
+ """
2
+ @Author : Yuqi Liang 梁彧祺
3
+ @File : plot_state_distribution.py
4
+ @Time : 15/02/2025 22:03
5
+ @Desc :
6
+ """
7
+ import numpy as np
8
+ import pandas as pd
9
+ import matplotlib.pyplot as plt
10
+
11
+ from sequenzo import SequenceData
12
+ from sequenzo.visualization.utils import (
13
+ set_up_time_labels_for_x_axis,
14
+ save_figure_to_buffer,
15
+ create_standalone_legend,
16
+ combine_plot_with_legend,
17
+ save_and_show_results,
18
+ determine_layout,
19
+ show_plot_title,
20
+ show_group_title
21
+ )
22
+
23
+
24
+ def smart_sort_groups(groups):
25
+ """
26
+ Smart sorting: prioritize numeric prefix, fallback to string sorting
27
+
28
+ :param groups: List of group names
29
+ :return: Sorted list of group names
30
+ """
31
+ import re
32
+
33
+ # Compile regex once for better performance
34
+ numeric_pattern = re.compile(r'^(\d+)')
35
+
36
+ def sort_key(item):
37
+ match = numeric_pattern.match(str(item))
38
+ return (int(match.group(1)), str(item)) if match else (float('inf'), str(item))
39
+
40
+ return sorted(groups, key=sort_key)
41
+
42
+
43
+ def plot_state_distribution(seqdata: SequenceData,
44
+ # Grouping parameters
45
+ group_by_column=None,
46
+ group_dataframe=None,
47
+ group_column_name=None,
48
+ group_labels=None,
49
+ # Other parameters
50
+ weights="auto",
51
+ figsize=(12, 7),
52
+ plot_style="standard",
53
+ title=None,
54
+ xlabel="Time",
55
+ ylabel="State Distribution (%)",
56
+ save_as=None,
57
+ dpi=200,
58
+ layout='column',
59
+ nrows: int = None,
60
+ ncols: int = None,
61
+ stacked=True,
62
+ show=True,
63
+ include_legend=True,
64
+ group_order=None,
65
+ fontsize=12,
66
+ sort_groups='auto',
67
+ show_group_titles: bool = True) -> None:
68
+ """
69
+ Creates state distribution plots for different groups, showing how state
70
+ prevalence changes over time within each group.
71
+
72
+ **Two API modes for grouping:**
73
+
74
+ 1. **Simplified API** (when grouping info is already in the data):
75
+ ```python
76
+ plot_state_distribution(seqdata, group_by_column="Cluster", group_labels=cluster_labels)
77
+ ```
78
+
79
+ 2. **Complete API** (when grouping info is in a separate dataframe):
80
+ ```python
81
+ plot_state_distribution(seqdata, group_dataframe=membership_df,
82
+ group_column_name="Cluster", group_labels=cluster_labels)
83
+ ```
84
+
85
+ :param seqdata: (SequenceData) A SequenceData object containing sequences
86
+
87
+ **Grouping parameters:**
88
+ :param group_by_column: (str, optional) Column name from seqdata.data to group by.
89
+ Use this when grouping information is already in your data.
90
+ Example: "Cluster", "sex", "education"
91
+ :param group_dataframe: (pd.DataFrame, optional) Separate dataframe containing grouping information.
92
+ Use this when grouping info is in a separate table (e.g., clustering results).
93
+ Must contain ID column and grouping column.
94
+ :param group_column_name: (str, optional) Name of the grouping column in group_dataframe.
95
+ Required when using group_dataframe.
96
+ :param group_labels: (dict, optional) Custom labels for group values.
97
+ Example: {1: "Late Family Formation", 2: "Early Partnership"}
98
+ Maps original values to display labels.
99
+ :param weights: (np.ndarray or "auto") Weights for sequences. If "auto", uses seqdata.weights if available
100
+ :param figsize: (tuple) Size of the figure (only used when plot_style="custom")
101
+ :param plot_style: Plot aspect style:
102
+ - 'standard': Standard proportions (12, 7) - balanced view
103
+ - 'compact': Compact/vertical proportions (10, 8) - more vertical like R plots
104
+ - 'wide': Wide proportions (14, 5) - emphasizes time progression
105
+ - 'narrow': Narrow/tall proportions (9, 11) - moderately vertical
106
+ - 'custom': Use the provided figsize parameter
107
+ :param title: (str) Optional title for the plot
108
+ :param xlabel: (str) Label for the x-axis
109
+ :param ylabel: (str) Label for the y-axis
110
+ :param save_as: (str) Optional file path to save the plot
111
+ :param dpi: (int) Resolution of the saved plot
112
+ :param layout: (str) Layout style - 'column' (default, 3xn), 'grid' (nxn)
113
+ :param stacked: (bool) Whether to create stacked area plots (True) or line plots (False)
114
+ :param group_order: List, manually specify group order (overrides sort_groups)
115
+ :param sort_groups: String, sorting method: 'auto'(smart numeric), 'numeric'(numeric prefix), 'alpha'(alphabetical), 'none'(original order)
116
+
117
+ :return: None
118
+ """
119
+ # Determine figure size based on plot style
120
+ style_sizes = {
121
+ 'standard': (12, 7), # Balanced view
122
+ 'compact': (10, 8), # More square, like R plots
123
+ 'wide': (14, 5), # Wide, emphasizes time
124
+ 'narrow': (9, 11), # Moderately vertical
125
+ 'custom': figsize # User-provided
126
+ }
127
+
128
+ if plot_style not in style_sizes:
129
+ raise ValueError(f"Invalid plot_style '{plot_style}'. "
130
+ f"Supported styles: {list(style_sizes.keys())}")
131
+
132
+ # Special validation for custom plot style
133
+ if plot_style == 'custom' and figsize == (12, 7):
134
+ raise ValueError(
135
+ "When using plot_style='custom', you must explicitly provide a figsize parameter "
136
+ "that differs from the default (12, 7). "
137
+ "Suggested custom sizes:\n"
138
+ " - For wide plots: figsize=(16, 6)\n"
139
+ " - For tall plots: figsize=(8, 12)\n"
140
+ " - For square plots: figsize=(10, 10)\n"
141
+ " - For small plots: figsize=(8, 5)\n"
142
+ "Example: plot_state_distribution(data, plot_style='custom', figsize=(14, 9))"
143
+ )
144
+
145
+ actual_figsize = style_sizes[plot_style]
146
+
147
+ # Handle the simplified API: group_by_column
148
+ if group_by_column is not None:
149
+ # Validate that the column exists in the original data
150
+ if group_by_column not in seqdata.data.columns:
151
+ available_cols = [col for col in seqdata.data.columns if col not in seqdata.time and col != seqdata.id_col]
152
+ raise ValueError(
153
+ f"Column '{group_by_column}' not found in the data. "
154
+ f"Available columns for grouping: {available_cols}"
155
+ )
156
+
157
+ # Automatically create group_dataframe and group_column_name from the simplified API
158
+ group_dataframe = seqdata.data[[seqdata.id_col, group_by_column]].copy()
159
+ group_dataframe.columns = ['Entity ID', 'Category']
160
+ group_column_name = 'Category'
161
+
162
+ # Handle group labels - flexible and user-controllable
163
+ unique_values = seqdata.data[group_by_column].unique()
164
+
165
+ if group_labels is not None:
166
+ # User provided custom labels - use them
167
+ missing_keys = set(unique_values) - set(group_labels.keys())
168
+ if missing_keys:
169
+ raise ValueError(
170
+ f"group_labels missing mappings for values: {missing_keys}. "
171
+ f"Please provide labels for all unique values in '{group_by_column}': {sorted(unique_values)}"
172
+ )
173
+ group_dataframe['Category'] = group_dataframe['Category'].map(group_labels)
174
+ else:
175
+ # No custom labels provided - use smart defaults
176
+ if all(isinstance(v, (int, float, np.integer, np.floating)) and not pd.isna(v) for v in unique_values):
177
+ # Numeric values - keep as is (user can provide group_labels if they want custom names)
178
+ pass
179
+ # For string/categorical values, keep original values
180
+ # This handles cases where users already have meaningful labels like "Male"/"Female"
181
+
182
+ print(f"[>] Creating grouped plots by '{group_by_column}' with {len(unique_values)} categories")
183
+
184
+ # If no grouping information, create a single plot
185
+ if group_dataframe is None or group_column_name is None:
186
+ return _plot_state_distribution_single(
187
+ seqdata=seqdata, weights=weights, figsize=actual_figsize,
188
+ plot_style=plot_style, title=title, xlabel=xlabel, ylabel=ylabel,
189
+ save_as=save_as, dpi=dpi, stacked=stacked,
190
+ show=show, include_legend=include_legend, fontsize=fontsize
191
+ )
192
+
193
+ # Process weights
194
+ if isinstance(weights, str) and weights == "auto":
195
+ weights = getattr(seqdata, "weights", None)
196
+
197
+ if weights is not None:
198
+ weights = np.asarray(weights, dtype=float).reshape(-1)
199
+ if len(weights) != len(seqdata.values):
200
+ raise ValueError("Length of weights must equal number of sequences.")
201
+
202
+ # Ensure ID columns match (convert if needed)
203
+ id_col_name = "Entity ID" if "Entity ID" in group_dataframe.columns else group_dataframe.columns[0]
204
+
205
+ # Apply group_labels if provided (for group_dataframe API)
206
+ if group_labels is not None and group_column_name in group_dataframe.columns:
207
+ # Validate that all values in the group column have labels
208
+ unique_values = group_dataframe[group_column_name].unique()
209
+ missing_keys = set(unique_values) - set(group_labels.keys())
210
+ if missing_keys:
211
+ raise ValueError(
212
+ f"group_labels missing mappings for values: {missing_keys}. "
213
+ f"Please provide labels for all unique values in '{group_column_name}': {sorted(unique_values)}"
214
+ )
215
+ # Apply the labels mapping
216
+ group_dataframe = group_dataframe.copy() # Avoid modifying original
217
+ group_dataframe[group_column_name] = group_dataframe[group_column_name].map(group_labels)
218
+
219
+ # Get unique groups and sort them based on user preference
220
+ if group_order:
221
+ # Use manually specified order, filter out non-existing groups
222
+ groups = [g for g in group_order if g in group_dataframe[group_column_name].unique()]
223
+ missing_groups = [g for g in group_dataframe[group_column_name].unique() if g not in group_order]
224
+ if missing_groups:
225
+ print(f"[Warning] Groups not in group_order will be excluded: {missing_groups}")
226
+ elif sort_groups == 'numeric' or sort_groups == 'auto':
227
+ groups = smart_sort_groups(group_dataframe[group_column_name].unique())
228
+ elif sort_groups == 'alpha':
229
+ groups = sorted(group_dataframe[group_column_name].unique())
230
+ elif sort_groups == 'none':
231
+ groups = list(group_dataframe[group_column_name].unique())
232
+ else:
233
+ raise ValueError(f"Invalid sort_groups value: {sort_groups}. Use 'auto', 'numeric', 'alpha', or 'none'.")
234
+
235
+ num_groups = len(groups)
236
+
237
+ # Calculate figure size and layout based on number of groups and specified layout
238
+ nrows, ncols = determine_layout(num_groups, layout=layout, nrows=nrows, ncols=ncols)
239
+
240
+ fig, axes = plt.subplots(
241
+ nrows=nrows,
242
+ ncols=ncols,
243
+ figsize=(actual_figsize[0] * ncols, actual_figsize[1] * nrows),
244
+ gridspec_kw={'wspace': 0.15, 'hspace': 0.25} # Reduced spacing for tighter layout
245
+ )
246
+ axes = axes.flatten()
247
+
248
+ # Create state mapping from numerical values back to state names
249
+ inv_state_mapping = {v: k for k, v in seqdata.state_mapping.items()}
250
+
251
+ # Process each group
252
+ for i, group in enumerate(groups):
253
+ # Get IDs for this group
254
+ group_ids = group_dataframe[group_dataframe[group_column_name] == group][id_col_name].values
255
+
256
+ # Match IDs with sequence data
257
+ mask = np.isin(seqdata.ids, group_ids)
258
+ if not np.any(mask):
259
+ print(f"Warning: No matching sequences found for group '{group}'")
260
+ continue
261
+
262
+ # Get sequences for this group
263
+ group_seq_df = seqdata.to_dataframe().loc[mask]
264
+
265
+ # Get weights for this group
266
+ if weights is None:
267
+ w = np.ones(len(group_seq_df))
268
+ else:
269
+ w = np.asarray(weights)[mask]
270
+
271
+ # Broadcast weights to each time point
272
+ W = np.repeat(w[:, None], group_seq_df.shape[1], axis=1)
273
+
274
+ # Calculate weighted state distributions at each time point
275
+ distributions = []
276
+ for t, col in enumerate(group_seq_df.columns):
277
+ col_vals = group_seq_df[col].to_numpy()
278
+
279
+ # Calculate weighted counts for each state
280
+ sums = {s: float(W[col_vals == s, t].sum()) for s in range(1, len(seqdata.states)+1)}
281
+ totw = float(W[:, t].sum())
282
+
283
+ # Convert to weighted percentages
284
+ dist = {inv_state_mapping.get(s, 'Missing'): 100.0 * (sums[s] / totw if totw > 0 else 0.0)
285
+ for s in range(1, len(seqdata.states) + 1)}
286
+
287
+ # Add time point and distribution to the list
288
+ distributions.append(dict({"time": col, **{str(k): v for k, v in dist.items()}}))
289
+
290
+ # Ensure percentages sum to exactly 100% to avoid gaps
291
+ for j in range(len(distributions)):
292
+ total_percentage = sum(distributions[j][str(state)] for state in seqdata.states)
293
+ if total_percentage < 100:
294
+ top_state = str(seqdata.states[-1])
295
+ distributions[j][str(top_state)] += (100 - total_percentage)
296
+
297
+ # Convert to DataFrame for plotting
298
+ dist_df = pd.DataFrame(distributions)
299
+
300
+ # Plot on the corresponding axis
301
+ ax = axes[i]
302
+
303
+ # Get colors for each state
304
+ # seqdata.states are integer encodings (e.g., 1, 2, ...)
305
+ # seqdata.state_mapping[state] maps integers to labels (e.g., 'Married', 'Single')
306
+ # seqdata.color_map[...] gets color by label
307
+ base_colors = [seqdata.color_map[seqdata.state_mapping[state]] for state in seqdata.states]
308
+
309
+ # Plot the data
310
+ if stacked:
311
+ # Create a stacked area plot
312
+ ax.stackplot(range(len(dist_df)),
313
+ [dist_df[str(state)] for state in seqdata.states],
314
+ labels=seqdata.labels,
315
+ colors=base_colors,
316
+ alpha=1.0)
317
+
318
+ # Add grid lines behind the stack plot
319
+ ax.grid(axis='y', linestyle='-', alpha=0.2)
320
+ ax.set_axisbelow(True)
321
+ else:
322
+ # Create a line plot
323
+ for state, label, color in zip(seqdata.states, seqdata.labels, base_colors):
324
+ ax.plot(range(len(dist_df)), dist_df[str(state)],
325
+ label=label, color=color,
326
+ linewidth=2.5, marker='o', markersize=5)
327
+
328
+ # Add grid lines
329
+ ax.grid(True, linestyle='-', alpha=0.2)
330
+
331
+ # Set group title with weighted sample size
332
+ # Check if we have effective weights (not all 1.0) and they were provided by user
333
+ original_weights = getattr(seqdata, "weights", None)
334
+ if original_weights is not None and not np.allclose(original_weights, 1.0):
335
+ sum_w = float(w.sum())
336
+ group_title = f"{group} (n = {len(group_seq_df)}, total weight = {sum_w:.1f})"
337
+ else:
338
+ group_title = f"{group} (n = {len(group_seq_df)})"
339
+ if show_group_titles:
340
+ show_group_title(ax, group_title, show=True, fontsize=fontsize)
341
+
342
+ # Set y-axis limits from 0 to 100%
343
+ ax.set_ylim(0, 100)
344
+
345
+ # Clean up axis aesthetics
346
+ ax.spines['top'].set_visible(False)
347
+ ax.spines['right'].set_visible(False)
348
+ ax.spines['left'].set_color('gray')
349
+ ax.spines['bottom'].set_color('gray')
350
+ ax.spines['left'].set_linewidth(0.7)
351
+ ax.spines['bottom'].set_linewidth(0.7)
352
+
353
+ # Move spines slightly away from the plot area for better aesthetics (same as index plot)
354
+ ax.spines['left'].set_position(('outward', 5))
355
+ ax.spines['bottom'].set_position(('outward', 5))
356
+
357
+ ax.tick_params(axis='x', colors='gray', length=4, width=0.7)
358
+ ax.tick_params(axis='y', colors='gray', length=4, width=0.7)
359
+
360
+ # Set x-axis labels
361
+ set_up_time_labels_for_x_axis(seqdata, ax)
362
+
363
+ # Set x-axis range to prevent over-extension like in the reference image
364
+ ax.set_xlim(-0.5, len(seqdata.cleaned_time) - 0.5)
365
+
366
+ # Add axis labels
367
+ if i % ncols == 0:
368
+ ax.set_ylabel(ylabel, fontsize=fontsize, labelpad=10, color='black')
369
+
370
+ # if i >= num_groups - ncols:
371
+ ax.set_xlabel(xlabel, fontsize=fontsize, labelpad=10, color='black')
372
+
373
+ # Hide unused subplots
374
+ for j in range(i + 1, len(axes)):
375
+ axes[j].set_visible(False)
376
+
377
+ # Add a common title if provided
378
+ if title:
379
+ fig.suptitle(title, fontsize=fontsize+2, y=1.02)
380
+
381
+ # Adjust layout to remove tight_layout warning and eliminate extra right space
382
+ fig.subplots_adjust(wspace=0.15, hspace=0.25, bottom=0.1, top=0.9, right=0.98, left=0.08)
383
+
384
+ # Save main figure to memory
385
+ main_buffer = save_figure_to_buffer(fig, dpi=dpi)
386
+
387
+ if include_legend:
388
+ # Create standalone legend
389
+ colors = seqdata.color_map_by_label
390
+ legend_buffer = create_standalone_legend(
391
+ colors=colors,
392
+ labels=seqdata.labels,
393
+ ncol=min(5, len(seqdata.states)),
394
+ figsize=(actual_figsize[0] * ncols, 1),
395
+ fontsize=fontsize-2,
396
+ dpi=dpi
397
+ )
398
+
399
+ # Combine plot with legend
400
+ if save_as and not save_as.lower().endswith(('.png', '.jpg', '.jpeg', '.pdf')):
401
+ save_as = save_as + '.png'
402
+
403
+ combined_img = combine_plot_with_legend(
404
+ main_buffer,
405
+ legend_buffer,
406
+ output_path=save_as,
407
+ dpi=dpi,
408
+ padding=20
409
+ )
410
+
411
+ # Display combined image
412
+ plt.figure(figsize=(actual_figsize[0] * ncols, actual_figsize[1] * nrows + 1))
413
+ plt.imshow(combined_img)
414
+ plt.axis('off')
415
+ if show or save_as: # Show if displaying or saving is needed
416
+ plt.show()
417
+ plt.close()
418
+ else:
419
+ # Display plot without legend
420
+ if save_as and not save_as.lower().endswith(('.png', '.jpg', '.jpeg', '.pdf')):
421
+ save_as = save_as + '.png'
422
+
423
+ # Save or show the main plot directly
424
+ plt.figure(figsize=(actual_figsize[0] * ncols, actual_figsize[1] * nrows))
425
+ plt.imshow(main_buffer)
426
+ plt.axis('off')
427
+
428
+ if save_as:
429
+ plt.savefig(save_as, dpi=dpi, bbox_inches='tight')
430
+ if show:
431
+ plt.show()
432
+ plt.close()
433
+
434
+ # No longer return fig to avoid duplicate rendering by environment
435
+ return None
436
+ # return fig
437
+
438
+
439
+ def _plot_state_distribution_single(seqdata: SequenceData,
440
+ weights="auto",
441
+ figsize=(12, 7),
442
+ plot_style="standard",
443
+ title=None,
444
+ xlabel="Time",
445
+ ylabel="State Distribution (%)",
446
+ stacked=True,
447
+ save_as=None,
448
+ dpi=200,
449
+ show=False,
450
+ include_legend=True,
451
+ fontsize=12) -> None:
452
+ """
453
+ Creates a state distribution plot showing how the prevalence of states changes over time,
454
+ with enhanced color vibrancy.
455
+
456
+ :param seqdata: (SequenceData) A SequenceData object containing sequences
457
+ :param weights: (np.ndarray or "auto") Weights for sequences. If "auto", uses seqdata.weights if available
458
+ :param figsize: (tuple) Size of the figure (only used when plot_style="custom")
459
+ :param plot_style: Plot aspect style ('standard', 'compact', 'wide', 'narrow', 'custom')
460
+ :param title: (str) Optional title for the plot
461
+ :param xlabel: (str) Label for the x-axis
462
+ :param ylabel: (str) Label for the y-axis
463
+ :param stacked: (bool) Whether to create a stacked area plot (True) or line plot (False)
464
+ :param save_as: (str) Optional file path to save the plot
465
+ :param dpi: (int) Resolution of the saved plot
466
+
467
+ :return: None
468
+ """
469
+ # Determine figure size based on plot style
470
+ style_sizes = {
471
+ 'standard': (12, 7), # Balanced view
472
+ 'compact': (10, 8), # More square, like R plots
473
+ 'wide': (14, 5), # Wide, emphasizes time
474
+ 'narrow': (9, 11), # Moderately vertical
475
+ 'custom': figsize # User-provided
476
+ }
477
+
478
+ if plot_style not in style_sizes:
479
+ raise ValueError(f"Invalid plot_style '{plot_style}'. "
480
+ f"Supported styles: {list(style_sizes.keys())}")
481
+
482
+ # Special validation for custom plot style
483
+ if plot_style == 'custom' and figsize == (12, 7):
484
+ raise ValueError(
485
+ "When using plot_style='custom', you must explicitly provide a figsize parameter "
486
+ "that differs from the default (12, 7). "
487
+ "Suggested custom sizes:\n"
488
+ " - For wide plots: figsize=(16, 6)\n"
489
+ " - For tall plots: figsize=(8, 12)\n"
490
+ " - For square plots: figsize=(10, 10)\n"
491
+ " - For small plots: figsize=(8, 5)\n"
492
+ "Example: plot_state_distribution(data, plot_style='custom', figsize=(14, 9))"
493
+ )
494
+
495
+ actual_figsize = style_sizes[plot_style]
496
+
497
+ # Process weights
498
+ if isinstance(weights, str) and weights == "auto":
499
+ weights = getattr(seqdata, "weights", None)
500
+
501
+ if weights is not None:
502
+ weights = np.asarray(weights, dtype=float).reshape(-1)
503
+ if len(weights) != len(seqdata.values):
504
+ raise ValueError("Length of weights must equal number of sequences.")
505
+
506
+ # Get sequence data as a DataFrame
507
+ seq_df = seqdata.to_dataframe()
508
+
509
+ # Get weights
510
+ if weights is None:
511
+ w = np.ones(len(seq_df))
512
+ else:
513
+ w = np.asarray(weights)
514
+
515
+ # Broadcast weights to each time point
516
+ W = np.repeat(w[:, None], seq_df.shape[1], axis=1)
517
+
518
+ # Create a state mapping from numerical values back to state names
519
+ inv_state_mapping = {v: k for k, v in seqdata.state_mapping.items()}
520
+
521
+ # Calculate weighted state distributions at each time point
522
+ distributions = []
523
+ for t, col in enumerate(seq_df.columns):
524
+ col_vals = seq_df[col].to_numpy()
525
+
526
+ # Calculate weighted counts for each state
527
+ sums = {s: float(W[col_vals == s, t].sum()) for s in range(1, len(seqdata.states)+1)}
528
+ totw = float(W[:, t].sum())
529
+
530
+ # Convert to weighted percentages
531
+ dist = {inv_state_mapping.get(s, 'Missing'): 100.0 * (sums[s] / totw if totw > 0 else 0.0)
532
+ for s in range(1, len(seqdata.states) + 1)}
533
+
534
+ # Add time point and distribution to the list
535
+ distributions.append(dict({"time": col, **{str(k): v for k, v in dist.items()}}))
536
+
537
+ # Ensure percentages sum to exactly 100% to avoid gaps
538
+ for i in range(len(distributions)):
539
+ # Get sum of all state percentages for this time point
540
+ total_percentage = sum(distributions[i][str(state)] for state in seqdata.states)
541
+
542
+ # If there's a gap, add the difference to the top-most state
543
+ if total_percentage < 100:
544
+ # Get the last (top-most) state in your stack
545
+ top_state = str(seqdata.states[-1])
546
+ # Add the difference to make total exactly 100%
547
+ distributions[i][top_state] += (100 - total_percentage)
548
+
549
+ # Convert to DataFrame for plotting
550
+ dist_df = pd.DataFrame(distributions)
551
+
552
+ # Create the plot
553
+ plt.style.use('default') # Start with default style for clean slate
554
+ fig, ax = plt.subplots(figsize=actual_figsize)
555
+
556
+ # Get colors for each state and enhance vibrancy
557
+ base_colors = [seqdata.color_map[seqdata.state_mapping[state]] for state in seqdata.states]
558
+
559
+ # Plot the data
560
+ if stacked:
561
+ # Create a stacked area plot with enhanced colors
562
+ ax.stackplot(range(len(dist_df)),
563
+ [dist_df[str(state)] for state in seqdata.states],
564
+ labels=seqdata.labels,
565
+ colors=base_colors,
566
+ alpha=1.0) # Full opacity for maximum vibrancy
567
+
568
+ # Add grid lines behind the stack plot
569
+ ax.grid(axis='y', linestyle='-', alpha=0.2)
570
+ ax.set_axisbelow(True)
571
+ else:
572
+ # Create a line plot with enhanced colors
573
+ for i, state in enumerate(seqdata.states):
574
+ ax.plot(range(len(dist_df)), dist_df[str(state)],
575
+ label=state, color=base_colors[i],
576
+ linewidth=2.5, marker='o', markersize=5)
577
+
578
+ # Add grid lines
579
+ ax.grid(True, linestyle='-', alpha=0.2)
580
+
581
+ # Set axis labels and title
582
+ ax.set_xlabel(xlabel, fontsize=fontsize, labelpad=10)
583
+ ax.set_ylabel(ylabel, fontsize=fontsize, labelpad=10)
584
+
585
+ if title:
586
+ ax.set_title(title, fontsize=fontsize+2, fontweight='bold', pad=20)
587
+
588
+ # Set x-axis labels based on time points
589
+ set_up_time_labels_for_x_axis(seqdata, ax)
590
+
591
+ # Set x-axis range to prevent over-extension like in the reference image
592
+ ax.set_xlim(-0.5, len(seqdata.cleaned_time) - 0.5)
593
+
594
+ # Enhance aesthetics
595
+ ax.spines['top'].set_visible(False)
596
+ ax.spines['right'].set_visible(False)
597
+ ax.spines['left'].set_color('gray')
598
+ ax.spines['bottom'].set_color('gray')
599
+ ax.spines['left'].set_linewidth(0.7)
600
+ ax.spines['bottom'].set_linewidth(0.7)
601
+
602
+ # Move spines slightly away from the plot area for better aesthetics (same as index plot)
603
+ ax.spines['left'].set_position(('outward', 5))
604
+ ax.spines['bottom'].set_position(('outward', 5))
605
+
606
+ # Ensure ticks are visible and styled consistently
607
+ ax.tick_params(axis='x', colors='gray', length=4, width=0.7, which='major')
608
+ ax.tick_params(axis='y', colors='gray', length=4, width=0.7, which='major')
609
+
610
+ # Set y-axis limits from 0 to 100%
611
+ ax.set_ylim(0, 100)
612
+
613
+ # Add legend
614
+ if include_legend:
615
+ legend = ax.legend(loc='center left', bbox_to_anchor=(1.01, 0.5),
616
+ frameon=False, fontsize=fontsize-2)
617
+
618
+ # Adjust layout to make room for the legend
619
+ plt.tight_layout()
620
+
621
+ save_and_show_results(save_as, dpi=dpi, show=show)
622
+
623
+ # return fig
624
+ # No longer return fig to avoid duplicate rendering by environment
625
+ return None
626
+
627
+