sequenzo 0.1.21__cp39-cp39-macosx_10_9_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sequenzo might be problematic. Click here for more details.

Files changed (260) hide show
  1. sequenzo/__init__.py +240 -0
  2. sequenzo/big_data/__init__.py +12 -0
  3. sequenzo/big_data/clara/__init__.py +26 -0
  4. sequenzo/big_data/clara/clara.py +467 -0
  5. sequenzo/big_data/clara/utils/__init__.py +27 -0
  6. sequenzo/big_data/clara/utils/aggregatecases.py +92 -0
  7. sequenzo/big_data/clara/utils/davies_bouldin.py +91 -0
  8. sequenzo/big_data/clara/utils/get_weighted_diss.cpython-39-darwin.so +0 -0
  9. sequenzo/big_data/clara/utils/wfcmdd.py +205 -0
  10. sequenzo/big_data/clara/visualization.py +88 -0
  11. sequenzo/clustering/KMedoids.py +196 -0
  12. sequenzo/clustering/__init__.py +30 -0
  13. sequenzo/clustering/clustering_c_code.cpython-39-darwin.so +0 -0
  14. sequenzo/clustering/hierarchical_clustering.py +1380 -0
  15. sequenzo/clustering/src/KMedoid.cpp +262 -0
  16. sequenzo/clustering/src/PAM.cpp +236 -0
  17. sequenzo/clustering/src/PAMonce.cpp +234 -0
  18. sequenzo/clustering/src/cluster_quality.cpp +496 -0
  19. sequenzo/clustering/src/cluster_quality.h +128 -0
  20. sequenzo/clustering/src/cluster_quality_backup.cpp +570 -0
  21. sequenzo/clustering/src/module.cpp +228 -0
  22. sequenzo/clustering/src/weightedinertia.cpp +111 -0
  23. sequenzo/clustering/utils/__init__.py +27 -0
  24. sequenzo/clustering/utils/disscenter.py +122 -0
  25. sequenzo/data_preprocessing/__init__.py +20 -0
  26. sequenzo/data_preprocessing/helpers.py +256 -0
  27. sequenzo/datasets/__init__.py +41 -0
  28. sequenzo/datasets/biofam.csv +2001 -0
  29. sequenzo/datasets/biofam_child_domain.csv +2001 -0
  30. sequenzo/datasets/biofam_left_domain.csv +2001 -0
  31. sequenzo/datasets/biofam_married_domain.csv +2001 -0
  32. sequenzo/datasets/chinese_colonial_territories.csv +12 -0
  33. sequenzo/datasets/country_co2_emissions.csv +194 -0
  34. sequenzo/datasets/country_co2_emissions_global_deciles.csv +195 -0
  35. sequenzo/datasets/country_co2_emissions_global_quintiles.csv +195 -0
  36. sequenzo/datasets/country_co2_emissions_local_deciles.csv +195 -0
  37. sequenzo/datasets/country_co2_emissions_local_quintiles.csv +195 -0
  38. sequenzo/datasets/country_gdp_per_capita.csv +194 -0
  39. sequenzo/datasets/mvad.csv +713 -0
  40. sequenzo/datasets/pairfam_family.csv +1867 -0
  41. sequenzo/datasets/polyadic_samplec1.csv +61 -0
  42. sequenzo/datasets/polyadic_samplep1.csv +61 -0
  43. sequenzo/datasets/polyadic_seqc1.csv +61 -0
  44. sequenzo/datasets/polyadic_seqp1.csv +61 -0
  45. sequenzo/define_sequence_data.py +609 -0
  46. sequenzo/dissimilarity_measures/__init__.py +31 -0
  47. sequenzo/dissimilarity_measures/c_code.cpython-39-darwin.so +0 -0
  48. sequenzo/dissimilarity_measures/get_distance_matrix.py +702 -0
  49. sequenzo/dissimilarity_measures/get_substitution_cost_matrix.py +241 -0
  50. sequenzo/dissimilarity_measures/src/DHDdistance.cpp +148 -0
  51. sequenzo/dissimilarity_measures/src/LCPdistance.cpp +114 -0
  52. sequenzo/dissimilarity_measures/src/OMdistance.cpp +247 -0
  53. sequenzo/dissimilarity_measures/src/OMspellDistance.cpp +281 -0
  54. sequenzo/dissimilarity_measures/src/__init__.py +0 -0
  55. sequenzo/dissimilarity_measures/src/dist2matrix.cpp +63 -0
  56. sequenzo/dissimilarity_measures/src/dp_utils.h +160 -0
  57. sequenzo/dissimilarity_measures/src/module.cpp +34 -0
  58. sequenzo/dissimilarity_measures/src/setup.py +30 -0
  59. sequenzo/dissimilarity_measures/src/utils.h +25 -0
  60. sequenzo/dissimilarity_measures/src/xsimd/.github/cmake-test/main.cpp +6 -0
  61. sequenzo/dissimilarity_measures/src/xsimd/benchmark/main.cpp +159 -0
  62. sequenzo/dissimilarity_measures/src/xsimd/benchmark/xsimd_benchmark.hpp +565 -0
  63. sequenzo/dissimilarity_measures/src/xsimd/docs/source/conf.py +37 -0
  64. sequenzo/dissimilarity_measures/src/xsimd/examples/mandelbrot.cpp +330 -0
  65. sequenzo/dissimilarity_measures/src/xsimd/examples/pico_bench.hpp +246 -0
  66. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_arithmetic.hpp +266 -0
  67. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_complex.hpp +112 -0
  68. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_details.hpp +323 -0
  69. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_logical.hpp +218 -0
  70. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_math.hpp +2583 -0
  71. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_memory.hpp +880 -0
  72. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_rounding.hpp +72 -0
  73. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_swizzle.hpp +174 -0
  74. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_trigo.hpp +978 -0
  75. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx.hpp +1924 -0
  76. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx2.hpp +1144 -0
  77. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512bw.hpp +656 -0
  78. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512cd.hpp +28 -0
  79. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512dq.hpp +244 -0
  80. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512er.hpp +20 -0
  81. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512f.hpp +2650 -0
  82. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512ifma.hpp +20 -0
  83. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512pf.hpp +20 -0
  84. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vbmi.hpp +77 -0
  85. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vbmi2.hpp +131 -0
  86. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vnni_avx512bw.hpp +20 -0
  87. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vnni_avx512vbmi2.hpp +20 -0
  88. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avxvnni.hpp +20 -0
  89. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_common.hpp +24 -0
  90. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_common_fwd.hpp +77 -0
  91. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_constants.hpp +393 -0
  92. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_emulated.hpp +788 -0
  93. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma3_avx.hpp +93 -0
  94. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma3_avx2.hpp +46 -0
  95. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma3_sse.hpp +97 -0
  96. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma4.hpp +92 -0
  97. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_i8mm_neon64.hpp +17 -0
  98. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_isa.hpp +142 -0
  99. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_neon.hpp +3142 -0
  100. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_neon64.hpp +1543 -0
  101. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_rvv.hpp +1513 -0
  102. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_scalar.hpp +1260 -0
  103. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse2.hpp +2024 -0
  104. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse3.hpp +67 -0
  105. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse4_1.hpp +339 -0
  106. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse4_2.hpp +44 -0
  107. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_ssse3.hpp +186 -0
  108. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sve.hpp +1155 -0
  109. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_vsx.hpp +892 -0
  110. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_wasm.hpp +1780 -0
  111. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_arch.hpp +240 -0
  112. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_config.hpp +484 -0
  113. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_cpuid.hpp +269 -0
  114. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_inline.hpp +27 -0
  115. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/math/xsimd_rem_pio2.hpp +719 -0
  116. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/memory/xsimd_aligned_allocator.hpp +349 -0
  117. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/memory/xsimd_alignment.hpp +91 -0
  118. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_all_registers.hpp +55 -0
  119. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_api.hpp +2765 -0
  120. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx2_register.hpp +44 -0
  121. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512bw_register.hpp +51 -0
  122. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512cd_register.hpp +51 -0
  123. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512dq_register.hpp +51 -0
  124. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512er_register.hpp +51 -0
  125. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512f_register.hpp +77 -0
  126. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512ifma_register.hpp +51 -0
  127. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512pf_register.hpp +51 -0
  128. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512vbmi2_register.hpp +51 -0
  129. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512vbmi_register.hpp +51 -0
  130. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512vnni_avx512bw_register.hpp +54 -0
  131. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512vnni_avx512vbmi2_register.hpp +53 -0
  132. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx_register.hpp +64 -0
  133. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avxvnni_register.hpp +44 -0
  134. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_batch.hpp +1524 -0
  135. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_batch_constant.hpp +300 -0
  136. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_common_arch.hpp +47 -0
  137. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_emulated_register.hpp +80 -0
  138. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_fma3_avx2_register.hpp +50 -0
  139. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_fma3_avx_register.hpp +50 -0
  140. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_fma3_sse_register.hpp +50 -0
  141. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_fma4_register.hpp +50 -0
  142. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_i8mm_neon64_register.hpp +55 -0
  143. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_neon64_register.hpp +55 -0
  144. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_neon_register.hpp +154 -0
  145. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_register.hpp +94 -0
  146. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_rvv_register.hpp +506 -0
  147. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sse2_register.hpp +59 -0
  148. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sse3_register.hpp +49 -0
  149. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sse4_1_register.hpp +48 -0
  150. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sse4_2_register.hpp +48 -0
  151. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_ssse3_register.hpp +48 -0
  152. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sve_register.hpp +156 -0
  153. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_traits.hpp +337 -0
  154. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_utils.hpp +536 -0
  155. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_vsx_register.hpp +77 -0
  156. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_wasm_register.hpp +59 -0
  157. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/xsimd.hpp +75 -0
  158. sequenzo/dissimilarity_measures/src/xsimd/test/architectures/dummy.cpp +7 -0
  159. sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set.cpp +13 -0
  160. sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set_mean.cpp +24 -0
  161. sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set_mean_aligned.cpp +25 -0
  162. sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set_mean_arch_independent.cpp +28 -0
  163. sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set_mean_tag_dispatch.cpp +25 -0
  164. sequenzo/dissimilarity_measures/src/xsimd/test/doc/manipulating_abstract_batches.cpp +7 -0
  165. sequenzo/dissimilarity_measures/src/xsimd/test/doc/manipulating_parametric_batches.cpp +8 -0
  166. sequenzo/dissimilarity_measures/src/xsimd/test/doc/sum.hpp +31 -0
  167. sequenzo/dissimilarity_measures/src/xsimd/test/doc/sum_avx2.cpp +3 -0
  168. sequenzo/dissimilarity_measures/src/xsimd/test/doc/sum_sse2.cpp +3 -0
  169. sequenzo/dissimilarity_measures/src/xsimd/test/doc/writing_vectorized_code.cpp +11 -0
  170. sequenzo/dissimilarity_measures/src/xsimd/test/main.cpp +31 -0
  171. sequenzo/dissimilarity_measures/src/xsimd/test/test_api.cpp +230 -0
  172. sequenzo/dissimilarity_measures/src/xsimd/test/test_arch.cpp +217 -0
  173. sequenzo/dissimilarity_measures/src/xsimd/test/test_basic_math.cpp +183 -0
  174. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch.cpp +1049 -0
  175. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_bool.cpp +508 -0
  176. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_cast.cpp +409 -0
  177. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_complex.cpp +712 -0
  178. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_constant.cpp +286 -0
  179. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_float.cpp +141 -0
  180. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_int.cpp +365 -0
  181. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_manip.cpp +308 -0
  182. sequenzo/dissimilarity_measures/src/xsimd/test/test_bitwise_cast.cpp +222 -0
  183. sequenzo/dissimilarity_measures/src/xsimd/test/test_complex_exponential.cpp +226 -0
  184. sequenzo/dissimilarity_measures/src/xsimd/test/test_complex_hyperbolic.cpp +183 -0
  185. sequenzo/dissimilarity_measures/src/xsimd/test/test_complex_power.cpp +265 -0
  186. sequenzo/dissimilarity_measures/src/xsimd/test/test_complex_trigonometric.cpp +236 -0
  187. sequenzo/dissimilarity_measures/src/xsimd/test/test_conversion.cpp +248 -0
  188. sequenzo/dissimilarity_measures/src/xsimd/test/test_custom_default_arch.cpp +28 -0
  189. sequenzo/dissimilarity_measures/src/xsimd/test/test_error_gamma.cpp +170 -0
  190. sequenzo/dissimilarity_measures/src/xsimd/test/test_explicit_batch_instantiation.cpp +32 -0
  191. sequenzo/dissimilarity_measures/src/xsimd/test/test_exponential.cpp +202 -0
  192. sequenzo/dissimilarity_measures/src/xsimd/test/test_extract_pair.cpp +92 -0
  193. sequenzo/dissimilarity_measures/src/xsimd/test/test_fp_manipulation.cpp +77 -0
  194. sequenzo/dissimilarity_measures/src/xsimd/test/test_gnu_source.cpp +30 -0
  195. sequenzo/dissimilarity_measures/src/xsimd/test/test_hyperbolic.cpp +167 -0
  196. sequenzo/dissimilarity_measures/src/xsimd/test/test_load_store.cpp +304 -0
  197. sequenzo/dissimilarity_measures/src/xsimd/test/test_memory.cpp +61 -0
  198. sequenzo/dissimilarity_measures/src/xsimd/test/test_poly_evaluation.cpp +64 -0
  199. sequenzo/dissimilarity_measures/src/xsimd/test/test_power.cpp +184 -0
  200. sequenzo/dissimilarity_measures/src/xsimd/test/test_rounding.cpp +199 -0
  201. sequenzo/dissimilarity_measures/src/xsimd/test/test_select.cpp +101 -0
  202. sequenzo/dissimilarity_measures/src/xsimd/test/test_shuffle.cpp +760 -0
  203. sequenzo/dissimilarity_measures/src/xsimd/test/test_sum.cpp +4 -0
  204. sequenzo/dissimilarity_measures/src/xsimd/test/test_sum.hpp +34 -0
  205. sequenzo/dissimilarity_measures/src/xsimd/test/test_traits.cpp +172 -0
  206. sequenzo/dissimilarity_measures/src/xsimd/test/test_trigonometric.cpp +208 -0
  207. sequenzo/dissimilarity_measures/src/xsimd/test/test_utils.hpp +611 -0
  208. sequenzo/dissimilarity_measures/src/xsimd/test/test_wasm/test_wasm_playwright.py +123 -0
  209. sequenzo/dissimilarity_measures/src/xsimd/test/test_xsimd_api.cpp +1460 -0
  210. sequenzo/dissimilarity_measures/utils/__init__.py +16 -0
  211. sequenzo/dissimilarity_measures/utils/get_LCP_length_for_2_seq.py +44 -0
  212. sequenzo/dissimilarity_measures/utils/get_sm_trate_substitution_cost_matrix.cpython-39-darwin.so +0 -0
  213. sequenzo/dissimilarity_measures/utils/seqconc.cpython-39-darwin.so +0 -0
  214. sequenzo/dissimilarity_measures/utils/seqdss.cpython-39-darwin.so +0 -0
  215. sequenzo/dissimilarity_measures/utils/seqdur.cpython-39-darwin.so +0 -0
  216. sequenzo/dissimilarity_measures/utils/seqlength.cpython-39-darwin.so +0 -0
  217. sequenzo/multidomain/__init__.py +23 -0
  218. sequenzo/multidomain/association_between_domains.py +311 -0
  219. sequenzo/multidomain/cat.py +431 -0
  220. sequenzo/multidomain/combt.py +519 -0
  221. sequenzo/multidomain/dat.py +89 -0
  222. sequenzo/multidomain/idcd.py +139 -0
  223. sequenzo/multidomain/linked_polyad.py +292 -0
  224. sequenzo/openmp_setup.py +233 -0
  225. sequenzo/prefix_tree/__init__.py +43 -0
  226. sequenzo/prefix_tree/individual_level_indicators.py +1274 -0
  227. sequenzo/prefix_tree/system_level_indicators.py +465 -0
  228. sequenzo/prefix_tree/utils.py +54 -0
  229. sequenzo/sequence_characteristics/__init__.py +40 -0
  230. sequenzo/sequence_characteristics/complexity_index.py +49 -0
  231. sequenzo/sequence_characteristics/overall_cross_sectional_entropy.py +220 -0
  232. sequenzo/sequence_characteristics/plot_characteristics.py +593 -0
  233. sequenzo/sequence_characteristics/simple_characteristics.py +311 -0
  234. sequenzo/sequence_characteristics/state_frequencies_and_entropy_per_sequence.py +39 -0
  235. sequenzo/sequence_characteristics/turbulence.py +155 -0
  236. sequenzo/sequence_characteristics/variance_of_spell_durations.py +86 -0
  237. sequenzo/sequence_characteristics/within_sequence_entropy.py +43 -0
  238. sequenzo/suffix_tree/__init__.py +48 -0
  239. sequenzo/suffix_tree/individual_level_indicators.py +1638 -0
  240. sequenzo/suffix_tree/system_level_indicators.py +456 -0
  241. sequenzo/suffix_tree/utils.py +56 -0
  242. sequenzo/visualization/__init__.py +29 -0
  243. sequenzo/visualization/plot_mean_time.py +194 -0
  244. sequenzo/visualization/plot_modal_state.py +276 -0
  245. sequenzo/visualization/plot_most_frequent_sequences.py +147 -0
  246. sequenzo/visualization/plot_relative_frequency.py +404 -0
  247. sequenzo/visualization/plot_sequence_index.py +937 -0
  248. sequenzo/visualization/plot_single_medoid.py +153 -0
  249. sequenzo/visualization/plot_state_distribution.py +613 -0
  250. sequenzo/visualization/plot_transition_matrix.py +190 -0
  251. sequenzo/visualization/utils/__init__.py +23 -0
  252. sequenzo/visualization/utils/utils.py +310 -0
  253. sequenzo/with_event_history_analysis/__init__.py +35 -0
  254. sequenzo/with_event_history_analysis/sequence_analysis_multi_state_model.py +850 -0
  255. sequenzo/with_event_history_analysis/sequence_history_analysis.py +283 -0
  256. sequenzo-0.1.21.dist-info/METADATA +308 -0
  257. sequenzo-0.1.21.dist-info/RECORD +254 -0
  258. sequenzo-0.1.21.dist-info/WHEEL +5 -0
  259. sequenzo-0.1.21.dist-info/licenses/LICENSE +28 -0
  260. sequenzo-0.1.21.dist-info/top_level.txt +1 -0
@@ -0,0 +1,613 @@
1
+ """
2
+ @Author : Yuqi Liang 梁彧祺
3
+ @File : plot_state_distribution.py
4
+ @Time : 15/02/2025 22:03
5
+ @Desc :
6
+ """
7
+ import numpy as np
8
+ import pandas as pd
9
+ import matplotlib.pyplot as plt
10
+
11
+ from sequenzo import SequenceData
12
+ from sequenzo.visualization.utils import (
13
+ set_up_time_labels_for_x_axis,
14
+ save_figure_to_buffer,
15
+ create_standalone_legend,
16
+ combine_plot_with_legend,
17
+ save_and_show_results,
18
+ determine_layout,
19
+ show_plot_title,
20
+ show_group_title
21
+ )
22
+
23
+
24
+ def smart_sort_groups(groups):
25
+ """
26
+ Smart sorting: prioritize numeric prefix, fallback to string sorting
27
+
28
+ :param groups: List of group names
29
+ :return: Sorted list of group names
30
+ """
31
+ import re
32
+
33
+ # Compile regex once for better performance
34
+ numeric_pattern = re.compile(r'^(\d+)')
35
+
36
+ def sort_key(item):
37
+ match = numeric_pattern.match(str(item))
38
+ return (int(match.group(1)), str(item)) if match else (float('inf'), str(item))
39
+
40
+ return sorted(groups, key=sort_key)
41
+
42
+
43
+ def plot_state_distribution(seqdata: SequenceData,
44
+ # Grouping parameters
45
+ group_by_column=None,
46
+ group_dataframe=None,
47
+ group_column_name=None,
48
+ group_labels=None,
49
+ # Other parameters
50
+ weights="auto",
51
+ figsize=(12, 7),
52
+ plot_style="standard",
53
+ title=None,
54
+ xlabel="Time",
55
+ ylabel="State Distribution (%)",
56
+ save_as=None,
57
+ dpi=200,
58
+ layout='column',
59
+ nrows: int = None,
60
+ ncols: int = None,
61
+ stacked=True,
62
+ show=True,
63
+ include_legend=True,
64
+ group_order=None,
65
+ fontsize=12,
66
+ sort_groups='auto',
67
+ show_group_titles: bool = True) -> None:
68
+ """
69
+ Creates state distribution plots for different groups, showing how state
70
+ prevalence changes over time within each group.
71
+
72
+ **Two API modes for grouping:**
73
+
74
+ 1. **Simplified API** (when grouping info is already in the data):
75
+ ```python
76
+ plot_state_distribution(seqdata, group_by_column="Cluster", group_labels=cluster_labels)
77
+ ```
78
+
79
+ 2. **Complete API** (when grouping info is in a separate dataframe):
80
+ ```python
81
+ plot_state_distribution(seqdata, group_dataframe=membership_df,
82
+ group_column_name="Cluster", group_labels=cluster_labels)
83
+ ```
84
+
85
+ :param seqdata: (SequenceData) A SequenceData object containing sequences
86
+
87
+ **Grouping parameters:**
88
+ :param group_by_column: (str, optional) Column name from seqdata.data to group by.
89
+ Use this when grouping information is already in your data.
90
+ Example: "Cluster", "sex", "education"
91
+ :param group_dataframe: (pd.DataFrame, optional) Separate dataframe containing grouping information.
92
+ Use this when grouping info is in a separate table (e.g., clustering results).
93
+ Must contain ID column and grouping column.
94
+ :param group_column_name: (str, optional) Name of the grouping column in group_dataframe.
95
+ Required when using group_dataframe.
96
+ :param group_labels: (dict, optional) Custom labels for group values.
97
+ Example: {1: "Late Family Formation", 2: "Early Partnership"}
98
+ Maps original values to display labels.
99
+ :param weights: (np.ndarray or "auto") Weights for sequences. If "auto", uses seqdata.weights if available
100
+ :param figsize: (tuple) Size of the figure (only used when plot_style="custom")
101
+ :param plot_style: Plot aspect style:
102
+ - 'standard': Standard proportions (12, 7) - balanced view
103
+ - 'compact': Compact/vertical proportions (10, 8) - more vertical like R plots
104
+ - 'wide': Wide proportions (14, 5) - emphasizes time progression
105
+ - 'narrow': Narrow/tall proportions (9, 11) - moderately vertical
106
+ - 'custom': Use the provided figsize parameter
107
+ :param title: (str) Optional title for the plot
108
+ :param xlabel: (str) Label for the x-axis
109
+ :param ylabel: (str) Label for the y-axis
110
+ :param save_as: (str) Optional file path to save the plot
111
+ :param dpi: (int) Resolution of the saved plot
112
+ :param layout: (str) Layout style - 'column' (default, 3xn), 'grid' (nxn)
113
+ :param stacked: (bool) Whether to create stacked area plots (True) or line plots (False)
114
+ :param group_order: List, manually specify group order (overrides sort_groups)
115
+ :param sort_groups: String, sorting method: 'auto'(smart numeric), 'numeric'(numeric prefix), 'alpha'(alphabetical), 'none'(original order)
116
+
117
+ :return: None
118
+ """
119
+ # Determine figure size based on plot style
120
+ style_sizes = {
121
+ 'standard': (12, 7), # Balanced view
122
+ 'compact': (10, 8), # More square, like R plots
123
+ 'wide': (14, 5), # Wide, emphasizes time
124
+ 'narrow': (9, 11), # Moderately vertical
125
+ 'custom': figsize # User-provided
126
+ }
127
+
128
+ if plot_style not in style_sizes:
129
+ raise ValueError(f"Invalid plot_style '{plot_style}'. "
130
+ f"Supported styles: {list(style_sizes.keys())}")
131
+
132
+ # Special validation for custom plot style
133
+ if plot_style == 'custom' and figsize == (12, 7):
134
+ raise ValueError(
135
+ "When using plot_style='custom', you must explicitly provide a figsize parameter "
136
+ "that differs from the default (12, 7). "
137
+ "Suggested custom sizes:\n"
138
+ " - For wide plots: figsize=(16, 6)\n"
139
+ " - For tall plots: figsize=(8, 12)\n"
140
+ " - For square plots: figsize=(10, 10)\n"
141
+ " - For small plots: figsize=(8, 5)\n"
142
+ "Example: plot_state_distribution(data, plot_style='custom', figsize=(14, 9))"
143
+ )
144
+
145
+ actual_figsize = style_sizes[plot_style]
146
+
147
+ # Handle the simplified API: group_by_column
148
+ if group_by_column is not None:
149
+ # Validate that the column exists in the original data
150
+ if group_by_column not in seqdata.data.columns:
151
+ available_cols = [col for col in seqdata.data.columns if col not in seqdata.time and col != seqdata.id_col]
152
+ raise ValueError(
153
+ f"Column '{group_by_column}' not found in the data. "
154
+ f"Available columns for grouping: {available_cols}"
155
+ )
156
+
157
+ # Automatically create group_dataframe and group_column_name from the simplified API
158
+ group_dataframe = seqdata.data[[seqdata.id_col, group_by_column]].copy()
159
+ group_dataframe.columns = ['Entity ID', 'Category']
160
+ group_column_name = 'Category'
161
+
162
+ # Handle group labels - flexible and user-controllable
163
+ unique_values = seqdata.data[group_by_column].unique()
164
+
165
+ if group_labels is not None:
166
+ # User provided custom labels - use them
167
+ missing_keys = set(unique_values) - set(group_labels.keys())
168
+ if missing_keys:
169
+ raise ValueError(
170
+ f"group_labels missing mappings for values: {missing_keys}. "
171
+ f"Please provide labels for all unique values in '{group_by_column}': {sorted(unique_values)}"
172
+ )
173
+ group_dataframe['Category'] = group_dataframe['Category'].map(group_labels)
174
+ else:
175
+ # No custom labels provided - use smart defaults
176
+ if all(isinstance(v, (int, float, np.integer, np.floating)) and not pd.isna(v) for v in unique_values):
177
+ # Numeric values - keep as is (user can provide group_labels if they want custom names)
178
+ pass
179
+ # For string/categorical values, keep original values
180
+ # This handles cases where users already have meaningful labels like "Male"/"Female"
181
+
182
+ print(f"[>] Creating grouped plots by '{group_by_column}' with {len(unique_values)} categories")
183
+
184
+ # If no grouping information, create a single plot
185
+ if group_dataframe is None or group_column_name is None:
186
+ return _plot_state_distribution_single(
187
+ seqdata=seqdata, weights=weights, figsize=actual_figsize,
188
+ plot_style=plot_style, title=title, xlabel=xlabel, ylabel=ylabel,
189
+ save_as=save_as, dpi=dpi, stacked=stacked,
190
+ show=show, include_legend=include_legend, fontsize=fontsize
191
+ )
192
+
193
+ # Process weights
194
+ if isinstance(weights, str) and weights == "auto":
195
+ weights = getattr(seqdata, "weights", None)
196
+
197
+ if weights is not None:
198
+ weights = np.asarray(weights, dtype=float).reshape(-1)
199
+ if len(weights) != len(seqdata.values):
200
+ raise ValueError("Length of weights must equal number of sequences.")
201
+
202
+ # Ensure ID columns match (convert if needed)
203
+ id_col_name = "Entity ID" if "Entity ID" in group_dataframe.columns else group_dataframe.columns[0]
204
+
205
+ # Get unique groups and sort them based on user preference
206
+ if group_order:
207
+ # Use manually specified order, filter out non-existing groups
208
+ groups = [g for g in group_order if g in group_dataframe[group_column_name].unique()]
209
+ missing_groups = [g for g in group_dataframe[group_column_name].unique() if g not in group_order]
210
+ if missing_groups:
211
+ print(f"[Warning] Groups not in group_order will be excluded: {missing_groups}")
212
+ elif sort_groups == 'numeric' or sort_groups == 'auto':
213
+ groups = smart_sort_groups(group_dataframe[group_column_name].unique())
214
+ elif sort_groups == 'alpha':
215
+ groups = sorted(group_dataframe[group_column_name].unique())
216
+ elif sort_groups == 'none':
217
+ groups = list(group_dataframe[group_column_name].unique())
218
+ else:
219
+ raise ValueError(f"Invalid sort_groups value: {sort_groups}. Use 'auto', 'numeric', 'alpha', or 'none'.")
220
+
221
+ num_groups = len(groups)
222
+
223
+ # Calculate figure size and layout based on number of groups and specified layout
224
+ nrows, ncols = determine_layout(num_groups, layout=layout, nrows=nrows, ncols=ncols)
225
+
226
+ fig, axes = plt.subplots(
227
+ nrows=nrows,
228
+ ncols=ncols,
229
+ figsize=(actual_figsize[0] * ncols, actual_figsize[1] * nrows),
230
+ gridspec_kw={'wspace': 0.15, 'hspace': 0.25} # Reduced spacing for tighter layout
231
+ )
232
+ axes = axes.flatten()
233
+
234
+ # Create state mapping from numerical values back to state names
235
+ inv_state_mapping = {v: k for k, v in seqdata.state_mapping.items()}
236
+
237
+ # Process each group
238
+ for i, group in enumerate(groups):
239
+ # Get IDs for this group
240
+ group_ids = group_dataframe[group_dataframe[group_column_name] == group][id_col_name].values
241
+
242
+ # Match IDs with sequence data
243
+ mask = np.isin(seqdata.ids, group_ids)
244
+ if not np.any(mask):
245
+ print(f"Warning: No matching sequences found for group '{group}'")
246
+ continue
247
+
248
+ # Get sequences for this group
249
+ group_seq_df = seqdata.to_dataframe().loc[mask]
250
+
251
+ # Get weights for this group
252
+ if weights is None:
253
+ w = np.ones(len(group_seq_df))
254
+ else:
255
+ w = np.asarray(weights)[mask]
256
+
257
+ # Broadcast weights to each time point
258
+ W = np.repeat(w[:, None], group_seq_df.shape[1], axis=1)
259
+
260
+ # Calculate weighted state distributions at each time point
261
+ distributions = []
262
+ for t, col in enumerate(group_seq_df.columns):
263
+ col_vals = group_seq_df[col].to_numpy()
264
+
265
+ # Calculate weighted counts for each state
266
+ sums = {s: float(W[col_vals == s, t].sum()) for s in range(1, len(seqdata.states)+1)}
267
+ totw = float(W[:, t].sum())
268
+
269
+ # Convert to weighted percentages
270
+ dist = {inv_state_mapping.get(s, 'Missing'): 100.0 * (sums[s] / totw if totw > 0 else 0.0)
271
+ for s in range(1, len(seqdata.states) + 1)}
272
+
273
+ # Add time point and distribution to the list
274
+ distributions.append(dict({"time": col, **{str(k): v for k, v in dist.items()}}))
275
+
276
+ # Ensure percentages sum to exactly 100% to avoid gaps
277
+ for j in range(len(distributions)):
278
+ total_percentage = sum(distributions[j][str(state)] for state in seqdata.states)
279
+ if total_percentage < 100:
280
+ top_state = str(seqdata.states[-1])
281
+ distributions[j][str(top_state)] += (100 - total_percentage)
282
+
283
+ # Convert to DataFrame for plotting
284
+ dist_df = pd.DataFrame(distributions)
285
+
286
+ # Plot on the corresponding axis
287
+ ax = axes[i]
288
+
289
+ # Get colors for each state
290
+ # seqdata.states are integer encodings (e.g., 1, 2, ...)
291
+ # seqdata.state_mapping[state] maps integers to labels (e.g., 'Married', 'Single')
292
+ # seqdata.color_map[...] gets color by label
293
+ base_colors = [seqdata.color_map[seqdata.state_mapping[state]] for state in seqdata.states]
294
+
295
+ # Plot the data
296
+ if stacked:
297
+ # Create a stacked area plot
298
+ ax.stackplot(range(len(dist_df)),
299
+ [dist_df[str(state)] for state in seqdata.states],
300
+ labels=seqdata.labels,
301
+ colors=base_colors,
302
+ alpha=1.0)
303
+
304
+ # Add grid lines behind the stack plot
305
+ ax.grid(axis='y', linestyle='-', alpha=0.2)
306
+ ax.set_axisbelow(True)
307
+ else:
308
+ # Create a line plot
309
+ for state, label, color in zip(seqdata.states, seqdata.labels, base_colors):
310
+ ax.plot(range(len(dist_df)), dist_df[str(state)],
311
+ label=label, color=color,
312
+ linewidth=2.5, marker='o', markersize=5)
313
+
314
+ # Add grid lines
315
+ ax.grid(True, linestyle='-', alpha=0.2)
316
+
317
+ # Set group title with weighted sample size
318
+ # Check if we have effective weights (not all 1.0) and they were provided by user
319
+ original_weights = getattr(seqdata, "weights", None)
320
+ if original_weights is not None and not np.allclose(original_weights, 1.0):
321
+ sum_w = float(w.sum())
322
+ group_title = f"{group} (n = {len(group_seq_df)}, total weight = {sum_w:.1f})"
323
+ else:
324
+ group_title = f"{group} (n = {len(group_seq_df)})"
325
+ if show_group_titles:
326
+ show_group_title(ax, group_title, show=True, fontsize=fontsize)
327
+
328
+ # Set y-axis limits from 0 to 100%
329
+ ax.set_ylim(0, 100)
330
+
331
+ # Clean up axis aesthetics
332
+ ax.spines['top'].set_visible(False)
333
+ ax.spines['right'].set_visible(False)
334
+ ax.spines['left'].set_color('gray')
335
+ ax.spines['bottom'].set_color('gray')
336
+ ax.spines['left'].set_linewidth(0.7)
337
+ ax.spines['bottom'].set_linewidth(0.7)
338
+
339
+ # Move spines slightly away from the plot area for better aesthetics (same as index plot)
340
+ ax.spines['left'].set_position(('outward', 5))
341
+ ax.spines['bottom'].set_position(('outward', 5))
342
+
343
+ ax.tick_params(axis='x', colors='gray', length=4, width=0.7)
344
+ ax.tick_params(axis='y', colors='gray', length=4, width=0.7)
345
+
346
+ # Set x-axis labels
347
+ set_up_time_labels_for_x_axis(seqdata, ax)
348
+
349
+ # Set x-axis range to prevent over-extension like in the reference image
350
+ ax.set_xlim(-0.5, len(seqdata.cleaned_time) - 0.5)
351
+
352
+ # Add axis labels
353
+ if i % ncols == 0:
354
+ ax.set_ylabel(ylabel, fontsize=fontsize, labelpad=10, color='black')
355
+
356
+ # if i >= num_groups - ncols:
357
+ ax.set_xlabel(xlabel, fontsize=fontsize, labelpad=10, color='black')
358
+
359
+ # Hide unused subplots
360
+ for j in range(i + 1, len(axes)):
361
+ axes[j].set_visible(False)
362
+
363
+ # Add a common title if provided
364
+ if title:
365
+ fig.suptitle(title, fontsize=fontsize+2, y=1.02)
366
+
367
+ # Adjust layout to remove tight_layout warning and eliminate extra right space
368
+ fig.subplots_adjust(wspace=0.15, hspace=0.25, bottom=0.1, top=0.9, right=0.98, left=0.08)
369
+
370
+ # Save main figure to memory
371
+ main_buffer = save_figure_to_buffer(fig, dpi=dpi)
372
+
373
+ if include_legend:
374
+ # Create standalone legend
375
+ colors = seqdata.color_map_by_label
376
+ legend_buffer = create_standalone_legend(
377
+ colors=colors,
378
+ labels=seqdata.labels,
379
+ ncol=min(5, len(seqdata.states)),
380
+ figsize=(actual_figsize[0] * ncols, 1),
381
+ fontsize=fontsize-2,
382
+ dpi=dpi
383
+ )
384
+
385
+ # Combine plot with legend
386
+ if save_as and not save_as.lower().endswith(('.png', '.jpg', '.jpeg', '.pdf')):
387
+ save_as = save_as + '.png'
388
+
389
+ combined_img = combine_plot_with_legend(
390
+ main_buffer,
391
+ legend_buffer,
392
+ output_path=save_as,
393
+ dpi=dpi,
394
+ padding=20
395
+ )
396
+
397
+ # Display combined image
398
+ plt.figure(figsize=(actual_figsize[0] * ncols, actual_figsize[1] * nrows + 1))
399
+ plt.imshow(combined_img)
400
+ plt.axis('off')
401
+ if show or save_as: # Show if displaying or saving is needed
402
+ plt.show()
403
+ plt.close()
404
+ else:
405
+ # Display plot without legend
406
+ if save_as and not save_as.lower().endswith(('.png', '.jpg', '.jpeg', '.pdf')):
407
+ save_as = save_as + '.png'
408
+
409
+ # Save or show the main plot directly
410
+ plt.figure(figsize=(actual_figsize[0] * ncols, actual_figsize[1] * nrows))
411
+ plt.imshow(main_buffer)
412
+ plt.axis('off')
413
+
414
+ if save_as:
415
+ plt.savefig(save_as, dpi=dpi, bbox_inches='tight')
416
+ if show:
417
+ plt.show()
418
+ plt.close()
419
+
420
+ # No longer return fig to avoid duplicate rendering by environment
421
+ return None
422
+ # return fig
423
+
424
+
425
+ def _plot_state_distribution_single(seqdata: SequenceData,
426
+ weights="auto",
427
+ figsize=(12, 7),
428
+ plot_style="standard",
429
+ title=None,
430
+ xlabel="Time",
431
+ ylabel="State Distribution (%)",
432
+ stacked=True,
433
+ save_as=None,
434
+ dpi=200,
435
+ show=False,
436
+ include_legend=True,
437
+ fontsize=12) -> None:
438
+ """
439
+ Creates a state distribution plot showing how the prevalence of states changes over time,
440
+ with enhanced color vibrancy.
441
+
442
+ :param seqdata: (SequenceData) A SequenceData object containing sequences
443
+ :param weights: (np.ndarray or "auto") Weights for sequences. If "auto", uses seqdata.weights if available
444
+ :param figsize: (tuple) Size of the figure (only used when plot_style="custom")
445
+ :param plot_style: Plot aspect style ('standard', 'compact', 'wide', 'narrow', 'custom')
446
+ :param title: (str) Optional title for the plot
447
+ :param xlabel: (str) Label for the x-axis
448
+ :param ylabel: (str) Label for the y-axis
449
+ :param stacked: (bool) Whether to create a stacked area plot (True) or line plot (False)
450
+ :param save_as: (str) Optional file path to save the plot
451
+ :param dpi: (int) Resolution of the saved plot
452
+
453
+ :return: None
454
+ """
455
+ # Determine figure size based on plot style
456
+ style_sizes = {
457
+ 'standard': (12, 7), # Balanced view
458
+ 'compact': (10, 8), # More square, like R plots
459
+ 'wide': (14, 5), # Wide, emphasizes time
460
+ 'narrow': (9, 11), # Moderately vertical
461
+ 'custom': figsize # User-provided
462
+ }
463
+
464
+ if plot_style not in style_sizes:
465
+ raise ValueError(f"Invalid plot_style '{plot_style}'. "
466
+ f"Supported styles: {list(style_sizes.keys())}")
467
+
468
+ # Special validation for custom plot style
469
+ if plot_style == 'custom' and figsize == (12, 7):
470
+ raise ValueError(
471
+ "When using plot_style='custom', you must explicitly provide a figsize parameter "
472
+ "that differs from the default (12, 7). "
473
+ "Suggested custom sizes:\n"
474
+ " - For wide plots: figsize=(16, 6)\n"
475
+ " - For tall plots: figsize=(8, 12)\n"
476
+ " - For square plots: figsize=(10, 10)\n"
477
+ " - For small plots: figsize=(8, 5)\n"
478
+ "Example: plot_state_distribution(data, plot_style='custom', figsize=(14, 9))"
479
+ )
480
+
481
+ actual_figsize = style_sizes[plot_style]
482
+
483
+ # Process weights
484
+ if isinstance(weights, str) and weights == "auto":
485
+ weights = getattr(seqdata, "weights", None)
486
+
487
+ if weights is not None:
488
+ weights = np.asarray(weights, dtype=float).reshape(-1)
489
+ if len(weights) != len(seqdata.values):
490
+ raise ValueError("Length of weights must equal number of sequences.")
491
+
492
+ # Get sequence data as a DataFrame
493
+ seq_df = seqdata.to_dataframe()
494
+
495
+ # Get weights
496
+ if weights is None:
497
+ w = np.ones(len(seq_df))
498
+ else:
499
+ w = np.asarray(weights)
500
+
501
+ # Broadcast weights to each time point
502
+ W = np.repeat(w[:, None], seq_df.shape[1], axis=1)
503
+
504
+ # Create a state mapping from numerical values back to state names
505
+ inv_state_mapping = {v: k for k, v in seqdata.state_mapping.items()}
506
+
507
+ # Calculate weighted state distributions at each time point
508
+ distributions = []
509
+ for t, col in enumerate(seq_df.columns):
510
+ col_vals = seq_df[col].to_numpy()
511
+
512
+ # Calculate weighted counts for each state
513
+ sums = {s: float(W[col_vals == s, t].sum()) for s in range(1, len(seqdata.states)+1)}
514
+ totw = float(W[:, t].sum())
515
+
516
+ # Convert to weighted percentages
517
+ dist = {inv_state_mapping.get(s, 'Missing'): 100.0 * (sums[s] / totw if totw > 0 else 0.0)
518
+ for s in range(1, len(seqdata.states) + 1)}
519
+
520
+ # Add time point and distribution to the list
521
+ distributions.append(dict({"time": col, **{str(k): v for k, v in dist.items()}}))
522
+
523
+ # Ensure percentages sum to exactly 100% to avoid gaps
524
+ for i in range(len(distributions)):
525
+ # Get sum of all state percentages for this time point
526
+ total_percentage = sum(distributions[i][str(state)] for state in seqdata.states)
527
+
528
+ # If there's a gap, add the difference to the top-most state
529
+ if total_percentage < 100:
530
+ # Get the last (top-most) state in your stack
531
+ top_state = str(seqdata.states[-1])
532
+ # Add the difference to make total exactly 100%
533
+ distributions[i][top_state] += (100 - total_percentage)
534
+
535
+ # Convert to DataFrame for plotting
536
+ dist_df = pd.DataFrame(distributions)
537
+
538
+ # Create the plot
539
+ plt.style.use('default') # Start with default style for clean slate
540
+ fig, ax = plt.subplots(figsize=actual_figsize)
541
+
542
+ # Get colors for each state and enhance vibrancy
543
+ base_colors = [seqdata.color_map[seqdata.state_mapping[state]] for state in seqdata.states]
544
+
545
+ # Plot the data
546
+ if stacked:
547
+ # Create a stacked area plot with enhanced colors
548
+ ax.stackplot(range(len(dist_df)),
549
+ [dist_df[str(state)] for state in seqdata.states],
550
+ labels=seqdata.labels,
551
+ colors=base_colors,
552
+ alpha=1.0) # Full opacity for maximum vibrancy
553
+
554
+ # Add grid lines behind the stack plot
555
+ ax.grid(axis='y', linestyle='-', alpha=0.2)
556
+ ax.set_axisbelow(True)
557
+ else:
558
+ # Create a line plot with enhanced colors
559
+ for i, state in enumerate(seqdata.states):
560
+ ax.plot(range(len(dist_df)), dist_df[str(state)],
561
+ label=state, color=base_colors[i],
562
+ linewidth=2.5, marker='o', markersize=5)
563
+
564
+ # Add grid lines
565
+ ax.grid(True, linestyle='-', alpha=0.2)
566
+
567
+ # Set axis labels and title
568
+ ax.set_xlabel(xlabel, fontsize=fontsize, labelpad=10)
569
+ ax.set_ylabel(ylabel, fontsize=fontsize, labelpad=10)
570
+
571
+ if title:
572
+ ax.set_title(title, fontsize=fontsize+2, fontweight='bold', pad=20)
573
+
574
+ # Set x-axis labels based on time points
575
+ set_up_time_labels_for_x_axis(seqdata, ax)
576
+
577
+ # Set x-axis range to prevent over-extension like in the reference image
578
+ ax.set_xlim(-0.5, len(seqdata.cleaned_time) - 0.5)
579
+
580
+ # Enhance aesthetics
581
+ ax.spines['top'].set_visible(False)
582
+ ax.spines['right'].set_visible(False)
583
+ ax.spines['left'].set_color('gray')
584
+ ax.spines['bottom'].set_color('gray')
585
+ ax.spines['left'].set_linewidth(0.7)
586
+ ax.spines['bottom'].set_linewidth(0.7)
587
+
588
+ # Move spines slightly away from the plot area for better aesthetics (same as index plot)
589
+ ax.spines['left'].set_position(('outward', 5))
590
+ ax.spines['bottom'].set_position(('outward', 5))
591
+
592
+ # Ensure ticks are visible and styled consistently
593
+ ax.tick_params(axis='x', colors='gray', length=4, width=0.7, which='major')
594
+ ax.tick_params(axis='y', colors='gray', length=4, width=0.7, which='major')
595
+
596
+ # Set y-axis limits from 0 to 100%
597
+ ax.set_ylim(0, 100)
598
+
599
+ # Add legend
600
+ if include_legend:
601
+ legend = ax.legend(loc='center left', bbox_to_anchor=(1.01, 0.5),
602
+ frameon=False, fontsize=fontsize-2)
603
+
604
+ # Adjust layout to make room for the legend
605
+ plt.tight_layout()
606
+
607
+ save_and_show_results(save_as, dpi=dpi, show=show)
608
+
609
+ # return fig
610
+ # No longer return fig to avoid duplicate rendering by environment
611
+ return None
612
+
613
+