sequenzo 0.1.31__cp310-cp310-macosx_10_9_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (299) hide show
  1. _sequenzo_fastcluster.cpython-310-darwin.so +0 -0
  2. sequenzo/__init__.py +349 -0
  3. sequenzo/big_data/__init__.py +12 -0
  4. sequenzo/big_data/clara/__init__.py +26 -0
  5. sequenzo/big_data/clara/clara.py +476 -0
  6. sequenzo/big_data/clara/utils/__init__.py +27 -0
  7. sequenzo/big_data/clara/utils/aggregatecases.py +92 -0
  8. sequenzo/big_data/clara/utils/davies_bouldin.py +91 -0
  9. sequenzo/big_data/clara/utils/get_weighted_diss.cpython-310-darwin.so +0 -0
  10. sequenzo/big_data/clara/utils/wfcmdd.py +205 -0
  11. sequenzo/big_data/clara/visualization.py +88 -0
  12. sequenzo/clustering/KMedoids.py +178 -0
  13. sequenzo/clustering/__init__.py +30 -0
  14. sequenzo/clustering/clustering_c_code.cpython-310-darwin.so +0 -0
  15. sequenzo/clustering/hierarchical_clustering.py +1256 -0
  16. sequenzo/clustering/sequenzo_fastcluster/fastcluster.py +495 -0
  17. sequenzo/clustering/sequenzo_fastcluster/src/fastcluster.cpp +1877 -0
  18. sequenzo/clustering/sequenzo_fastcluster/src/fastcluster_python.cpp +1264 -0
  19. sequenzo/clustering/src/KMedoid.cpp +263 -0
  20. sequenzo/clustering/src/PAM.cpp +237 -0
  21. sequenzo/clustering/src/PAMonce.cpp +265 -0
  22. sequenzo/clustering/src/cluster_quality.cpp +496 -0
  23. sequenzo/clustering/src/cluster_quality.h +128 -0
  24. sequenzo/clustering/src/cluster_quality_backup.cpp +570 -0
  25. sequenzo/clustering/src/module.cpp +228 -0
  26. sequenzo/clustering/src/weightedinertia.cpp +111 -0
  27. sequenzo/clustering/utils/__init__.py +27 -0
  28. sequenzo/clustering/utils/disscenter.py +122 -0
  29. sequenzo/data_preprocessing/__init__.py +22 -0
  30. sequenzo/data_preprocessing/helpers.py +303 -0
  31. sequenzo/datasets/__init__.py +41 -0
  32. sequenzo/datasets/biofam.csv +2001 -0
  33. sequenzo/datasets/biofam_child_domain.csv +2001 -0
  34. sequenzo/datasets/biofam_left_domain.csv +2001 -0
  35. sequenzo/datasets/biofam_married_domain.csv +2001 -0
  36. sequenzo/datasets/chinese_colonial_territories.csv +12 -0
  37. sequenzo/datasets/country_co2_emissions.csv +194 -0
  38. sequenzo/datasets/country_co2_emissions_global_deciles.csv +195 -0
  39. sequenzo/datasets/country_co2_emissions_global_quintiles.csv +195 -0
  40. sequenzo/datasets/country_co2_emissions_local_deciles.csv +195 -0
  41. sequenzo/datasets/country_co2_emissions_local_quintiles.csv +195 -0
  42. sequenzo/datasets/country_gdp_per_capita.csv +194 -0
  43. sequenzo/datasets/dyadic_children.csv +61 -0
  44. sequenzo/datasets/dyadic_parents.csv +61 -0
  45. sequenzo/datasets/mvad.csv +713 -0
  46. sequenzo/datasets/pairfam_activity_by_month.csv +1028 -0
  47. sequenzo/datasets/pairfam_activity_by_year.csv +1028 -0
  48. sequenzo/datasets/pairfam_family_by_month.csv +1028 -0
  49. sequenzo/datasets/pairfam_family_by_year.csv +1028 -0
  50. sequenzo/datasets/political_science_aid_shock.csv +166 -0
  51. sequenzo/datasets/political_science_donor_fragmentation.csv +157 -0
  52. sequenzo/define_sequence_data.py +1400 -0
  53. sequenzo/dissimilarity_measures/__init__.py +31 -0
  54. sequenzo/dissimilarity_measures/c_code.cpython-310-darwin.so +0 -0
  55. sequenzo/dissimilarity_measures/get_distance_matrix.py +762 -0
  56. sequenzo/dissimilarity_measures/get_substitution_cost_matrix.py +246 -0
  57. sequenzo/dissimilarity_measures/src/DHDdistance.cpp +148 -0
  58. sequenzo/dissimilarity_measures/src/LCPdistance.cpp +114 -0
  59. sequenzo/dissimilarity_measures/src/LCPspellDistance.cpp +215 -0
  60. sequenzo/dissimilarity_measures/src/OMdistance.cpp +247 -0
  61. sequenzo/dissimilarity_measures/src/OMspellDistance.cpp +281 -0
  62. sequenzo/dissimilarity_measures/src/__init__.py +0 -0
  63. sequenzo/dissimilarity_measures/src/dist2matrix.cpp +63 -0
  64. sequenzo/dissimilarity_measures/src/dp_utils.h +160 -0
  65. sequenzo/dissimilarity_measures/src/module.cpp +40 -0
  66. sequenzo/dissimilarity_measures/src/setup.py +30 -0
  67. sequenzo/dissimilarity_measures/src/utils.h +25 -0
  68. sequenzo/dissimilarity_measures/src/xsimd/.github/cmake-test/main.cpp +6 -0
  69. sequenzo/dissimilarity_measures/src/xsimd/benchmark/main.cpp +159 -0
  70. sequenzo/dissimilarity_measures/src/xsimd/benchmark/xsimd_benchmark.hpp +565 -0
  71. sequenzo/dissimilarity_measures/src/xsimd/docs/source/conf.py +37 -0
  72. sequenzo/dissimilarity_measures/src/xsimd/examples/mandelbrot.cpp +330 -0
  73. sequenzo/dissimilarity_measures/src/xsimd/examples/pico_bench.hpp +246 -0
  74. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_arithmetic.hpp +266 -0
  75. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_complex.hpp +112 -0
  76. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_details.hpp +323 -0
  77. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_logical.hpp +218 -0
  78. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_math.hpp +2583 -0
  79. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_memory.hpp +880 -0
  80. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_rounding.hpp +72 -0
  81. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_swizzle.hpp +174 -0
  82. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_trigo.hpp +978 -0
  83. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx.hpp +1924 -0
  84. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx2.hpp +1144 -0
  85. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512bw.hpp +656 -0
  86. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512cd.hpp +28 -0
  87. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512dq.hpp +244 -0
  88. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512er.hpp +20 -0
  89. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512f.hpp +2650 -0
  90. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512ifma.hpp +20 -0
  91. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512pf.hpp +20 -0
  92. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vbmi.hpp +77 -0
  93. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vbmi2.hpp +131 -0
  94. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vnni_avx512bw.hpp +20 -0
  95. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vnni_avx512vbmi2.hpp +20 -0
  96. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avxvnni.hpp +20 -0
  97. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_common.hpp +24 -0
  98. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_common_fwd.hpp +77 -0
  99. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_constants.hpp +393 -0
  100. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_emulated.hpp +788 -0
  101. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma3_avx.hpp +93 -0
  102. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma3_avx2.hpp +46 -0
  103. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma3_sse.hpp +97 -0
  104. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma4.hpp +92 -0
  105. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_i8mm_neon64.hpp +17 -0
  106. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_isa.hpp +142 -0
  107. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_neon.hpp +3142 -0
  108. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_neon64.hpp +1543 -0
  109. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_rvv.hpp +1513 -0
  110. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_scalar.hpp +1260 -0
  111. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse2.hpp +2024 -0
  112. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse3.hpp +67 -0
  113. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse4_1.hpp +339 -0
  114. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse4_2.hpp +44 -0
  115. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_ssse3.hpp +186 -0
  116. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sve.hpp +1155 -0
  117. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_vsx.hpp +892 -0
  118. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_wasm.hpp +1780 -0
  119. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_arch.hpp +240 -0
  120. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_config.hpp +484 -0
  121. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_cpuid.hpp +269 -0
  122. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_inline.hpp +27 -0
  123. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/math/xsimd_rem_pio2.hpp +719 -0
  124. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/memory/xsimd_aligned_allocator.hpp +349 -0
  125. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/memory/xsimd_alignment.hpp +91 -0
  126. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_all_registers.hpp +55 -0
  127. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_api.hpp +2765 -0
  128. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx2_register.hpp +44 -0
  129. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512bw_register.hpp +51 -0
  130. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512cd_register.hpp +51 -0
  131. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512dq_register.hpp +51 -0
  132. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512er_register.hpp +51 -0
  133. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512f_register.hpp +77 -0
  134. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512ifma_register.hpp +51 -0
  135. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512pf_register.hpp +51 -0
  136. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512vbmi2_register.hpp +51 -0
  137. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512vbmi_register.hpp +51 -0
  138. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512vnni_avx512bw_register.hpp +54 -0
  139. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512vnni_avx512vbmi2_register.hpp +53 -0
  140. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx_register.hpp +64 -0
  141. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avxvnni_register.hpp +44 -0
  142. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_batch.hpp +1524 -0
  143. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_batch_constant.hpp +300 -0
  144. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_common_arch.hpp +47 -0
  145. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_emulated_register.hpp +80 -0
  146. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_fma3_avx2_register.hpp +50 -0
  147. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_fma3_avx_register.hpp +50 -0
  148. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_fma3_sse_register.hpp +50 -0
  149. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_fma4_register.hpp +50 -0
  150. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_i8mm_neon64_register.hpp +55 -0
  151. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_neon64_register.hpp +55 -0
  152. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_neon_register.hpp +154 -0
  153. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_register.hpp +94 -0
  154. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_rvv_register.hpp +506 -0
  155. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sse2_register.hpp +59 -0
  156. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sse3_register.hpp +49 -0
  157. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sse4_1_register.hpp +48 -0
  158. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sse4_2_register.hpp +48 -0
  159. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_ssse3_register.hpp +48 -0
  160. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sve_register.hpp +156 -0
  161. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_traits.hpp +337 -0
  162. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_utils.hpp +536 -0
  163. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_vsx_register.hpp +77 -0
  164. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_wasm_register.hpp +59 -0
  165. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/xsimd.hpp +75 -0
  166. sequenzo/dissimilarity_measures/src/xsimd/test/architectures/dummy.cpp +7 -0
  167. sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set.cpp +13 -0
  168. sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set_mean.cpp +24 -0
  169. sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set_mean_aligned.cpp +25 -0
  170. sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set_mean_arch_independent.cpp +28 -0
  171. sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set_mean_tag_dispatch.cpp +25 -0
  172. sequenzo/dissimilarity_measures/src/xsimd/test/doc/manipulating_abstract_batches.cpp +7 -0
  173. sequenzo/dissimilarity_measures/src/xsimd/test/doc/manipulating_parametric_batches.cpp +8 -0
  174. sequenzo/dissimilarity_measures/src/xsimd/test/doc/sum.hpp +31 -0
  175. sequenzo/dissimilarity_measures/src/xsimd/test/doc/sum_avx2.cpp +3 -0
  176. sequenzo/dissimilarity_measures/src/xsimd/test/doc/sum_sse2.cpp +3 -0
  177. sequenzo/dissimilarity_measures/src/xsimd/test/doc/writing_vectorized_code.cpp +11 -0
  178. sequenzo/dissimilarity_measures/src/xsimd/test/main.cpp +31 -0
  179. sequenzo/dissimilarity_measures/src/xsimd/test/test_api.cpp +230 -0
  180. sequenzo/dissimilarity_measures/src/xsimd/test/test_arch.cpp +217 -0
  181. sequenzo/dissimilarity_measures/src/xsimd/test/test_basic_math.cpp +183 -0
  182. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch.cpp +1049 -0
  183. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_bool.cpp +508 -0
  184. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_cast.cpp +409 -0
  185. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_complex.cpp +712 -0
  186. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_constant.cpp +286 -0
  187. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_float.cpp +141 -0
  188. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_int.cpp +365 -0
  189. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_manip.cpp +308 -0
  190. sequenzo/dissimilarity_measures/src/xsimd/test/test_bitwise_cast.cpp +222 -0
  191. sequenzo/dissimilarity_measures/src/xsimd/test/test_complex_exponential.cpp +226 -0
  192. sequenzo/dissimilarity_measures/src/xsimd/test/test_complex_hyperbolic.cpp +183 -0
  193. sequenzo/dissimilarity_measures/src/xsimd/test/test_complex_power.cpp +265 -0
  194. sequenzo/dissimilarity_measures/src/xsimd/test/test_complex_trigonometric.cpp +236 -0
  195. sequenzo/dissimilarity_measures/src/xsimd/test/test_conversion.cpp +248 -0
  196. sequenzo/dissimilarity_measures/src/xsimd/test/test_custom_default_arch.cpp +28 -0
  197. sequenzo/dissimilarity_measures/src/xsimd/test/test_error_gamma.cpp +170 -0
  198. sequenzo/dissimilarity_measures/src/xsimd/test/test_explicit_batch_instantiation.cpp +32 -0
  199. sequenzo/dissimilarity_measures/src/xsimd/test/test_exponential.cpp +202 -0
  200. sequenzo/dissimilarity_measures/src/xsimd/test/test_extract_pair.cpp +92 -0
  201. sequenzo/dissimilarity_measures/src/xsimd/test/test_fp_manipulation.cpp +77 -0
  202. sequenzo/dissimilarity_measures/src/xsimd/test/test_gnu_source.cpp +30 -0
  203. sequenzo/dissimilarity_measures/src/xsimd/test/test_hyperbolic.cpp +167 -0
  204. sequenzo/dissimilarity_measures/src/xsimd/test/test_load_store.cpp +304 -0
  205. sequenzo/dissimilarity_measures/src/xsimd/test/test_memory.cpp +61 -0
  206. sequenzo/dissimilarity_measures/src/xsimd/test/test_poly_evaluation.cpp +64 -0
  207. sequenzo/dissimilarity_measures/src/xsimd/test/test_power.cpp +184 -0
  208. sequenzo/dissimilarity_measures/src/xsimd/test/test_rounding.cpp +199 -0
  209. sequenzo/dissimilarity_measures/src/xsimd/test/test_select.cpp +101 -0
  210. sequenzo/dissimilarity_measures/src/xsimd/test/test_shuffle.cpp +760 -0
  211. sequenzo/dissimilarity_measures/src/xsimd/test/test_sum.cpp +4 -0
  212. sequenzo/dissimilarity_measures/src/xsimd/test/test_sum.hpp +34 -0
  213. sequenzo/dissimilarity_measures/src/xsimd/test/test_traits.cpp +172 -0
  214. sequenzo/dissimilarity_measures/src/xsimd/test/test_trigonometric.cpp +208 -0
  215. sequenzo/dissimilarity_measures/src/xsimd/test/test_utils.hpp +611 -0
  216. sequenzo/dissimilarity_measures/src/xsimd/test/test_wasm/test_wasm_playwright.py +123 -0
  217. sequenzo/dissimilarity_measures/src/xsimd/test/test_xsimd_api.cpp +1460 -0
  218. sequenzo/dissimilarity_measures/utils/__init__.py +16 -0
  219. sequenzo/dissimilarity_measures/utils/get_LCP_length_for_2_seq.py +44 -0
  220. sequenzo/dissimilarity_measures/utils/get_sm_trate_substitution_cost_matrix.cpython-310-darwin.so +0 -0
  221. sequenzo/dissimilarity_measures/utils/seqconc.cpython-310-darwin.so +0 -0
  222. sequenzo/dissimilarity_measures/utils/seqdss.cpython-310-darwin.so +0 -0
  223. sequenzo/dissimilarity_measures/utils/seqdur.cpython-310-darwin.so +0 -0
  224. sequenzo/dissimilarity_measures/utils/seqlength.cpython-310-darwin.so +0 -0
  225. sequenzo/multidomain/__init__.py +23 -0
  226. sequenzo/multidomain/association_between_domains.py +311 -0
  227. sequenzo/multidomain/cat.py +597 -0
  228. sequenzo/multidomain/combt.py +519 -0
  229. sequenzo/multidomain/dat.py +81 -0
  230. sequenzo/multidomain/idcd.py +139 -0
  231. sequenzo/multidomain/linked_polyad.py +292 -0
  232. sequenzo/openmp_setup.py +233 -0
  233. sequenzo/prefix_tree/__init__.py +62 -0
  234. sequenzo/prefix_tree/hub.py +114 -0
  235. sequenzo/prefix_tree/individual_level_indicators.py +1321 -0
  236. sequenzo/prefix_tree/spell_individual_level_indicators.py +580 -0
  237. sequenzo/prefix_tree/spell_level_indicators.py +297 -0
  238. sequenzo/prefix_tree/system_level_indicators.py +544 -0
  239. sequenzo/prefix_tree/utils.py +54 -0
  240. sequenzo/seqhmm/__init__.py +95 -0
  241. sequenzo/seqhmm/advanced_optimization.py +305 -0
  242. sequenzo/seqhmm/bootstrap.py +411 -0
  243. sequenzo/seqhmm/build_hmm.py +142 -0
  244. sequenzo/seqhmm/build_mhmm.py +136 -0
  245. sequenzo/seqhmm/build_nhmm.py +121 -0
  246. sequenzo/seqhmm/fit_mhmm.py +62 -0
  247. sequenzo/seqhmm/fit_model.py +61 -0
  248. sequenzo/seqhmm/fit_nhmm.py +76 -0
  249. sequenzo/seqhmm/formulas.py +289 -0
  250. sequenzo/seqhmm/forward_backward_nhmm.py +276 -0
  251. sequenzo/seqhmm/gradients_nhmm.py +306 -0
  252. sequenzo/seqhmm/hmm.py +291 -0
  253. sequenzo/seqhmm/mhmm.py +314 -0
  254. sequenzo/seqhmm/model_comparison.py +238 -0
  255. sequenzo/seqhmm/multichannel_em.py +282 -0
  256. sequenzo/seqhmm/multichannel_utils.py +138 -0
  257. sequenzo/seqhmm/nhmm.py +270 -0
  258. sequenzo/seqhmm/nhmm_utils.py +191 -0
  259. sequenzo/seqhmm/predict.py +137 -0
  260. sequenzo/seqhmm/predict_mhmm.py +142 -0
  261. sequenzo/seqhmm/simulate.py +878 -0
  262. sequenzo/seqhmm/utils.py +218 -0
  263. sequenzo/seqhmm/visualization.py +910 -0
  264. sequenzo/sequence_characteristics/__init__.py +40 -0
  265. sequenzo/sequence_characteristics/complexity_index.py +49 -0
  266. sequenzo/sequence_characteristics/overall_cross_sectional_entropy.py +220 -0
  267. sequenzo/sequence_characteristics/plot_characteristics.py +593 -0
  268. sequenzo/sequence_characteristics/simple_characteristics.py +311 -0
  269. sequenzo/sequence_characteristics/state_frequencies_and_entropy_per_sequence.py +39 -0
  270. sequenzo/sequence_characteristics/turbulence.py +155 -0
  271. sequenzo/sequence_characteristics/variance_of_spell_durations.py +86 -0
  272. sequenzo/sequence_characteristics/within_sequence_entropy.py +43 -0
  273. sequenzo/suffix_tree/__init__.py +66 -0
  274. sequenzo/suffix_tree/hub.py +114 -0
  275. sequenzo/suffix_tree/individual_level_indicators.py +1679 -0
  276. sequenzo/suffix_tree/spell_individual_level_indicators.py +493 -0
  277. sequenzo/suffix_tree/spell_level_indicators.py +248 -0
  278. sequenzo/suffix_tree/system_level_indicators.py +535 -0
  279. sequenzo/suffix_tree/utils.py +56 -0
  280. sequenzo/version_check.py +283 -0
  281. sequenzo/visualization/__init__.py +29 -0
  282. sequenzo/visualization/plot_mean_time.py +222 -0
  283. sequenzo/visualization/plot_modal_state.py +276 -0
  284. sequenzo/visualization/plot_most_frequent_sequences.py +147 -0
  285. sequenzo/visualization/plot_relative_frequency.py +405 -0
  286. sequenzo/visualization/plot_sequence_index.py +1175 -0
  287. sequenzo/visualization/plot_single_medoid.py +153 -0
  288. sequenzo/visualization/plot_state_distribution.py +651 -0
  289. sequenzo/visualization/plot_transition_matrix.py +190 -0
  290. sequenzo/visualization/utils/__init__.py +23 -0
  291. sequenzo/visualization/utils/utils.py +310 -0
  292. sequenzo/with_event_history_analysis/__init__.py +35 -0
  293. sequenzo/with_event_history_analysis/sequence_analysis_multi_state_model.py +850 -0
  294. sequenzo/with_event_history_analysis/sequence_history_analysis.py +283 -0
  295. sequenzo-0.1.31.dist-info/METADATA +286 -0
  296. sequenzo-0.1.31.dist-info/RECORD +299 -0
  297. sequenzo-0.1.31.dist-info/WHEEL +5 -0
  298. sequenzo-0.1.31.dist-info/licenses/LICENSE +28 -0
  299. sequenzo-0.1.31.dist-info/top_level.txt +2 -0
@@ -0,0 +1,544 @@
1
+ """
2
+ @Author : Yuqi Liang 梁彧祺
3
+ @File : system_level_indicators.py
4
+ @Time : 02/05/2025 11:06
5
+ @Desc :
6
+ This module includes tools for building prefix trees, computing prefix counts, branching factors, and Jensen-Shannon divergence,
7
+ as well as generating composite scores to summarize system-level sequence diversity and complexity over time.
8
+ Visualization functions are also provided to plot these indicators and their distributions,
9
+ supporting comprehensive analysis of sequence system dynamics.
10
+ """
11
+ from collections import defaultdict, Counter
12
+ import numpy as np
13
+ from scipy.stats import zscore
14
+ from numpy import array
15
+ from scipy.spatial.distance import jensenshannon
16
+
17
+ from sequenzo.visualization.utils import save_and_show_results
18
+ import matplotlib.pyplot as plt
19
+ import seaborn as sns
20
+ from typing import List, Optional, Dict, Any, Tuple
21
+
22
+
23
+ class PrefixTree:
24
+ def __init__(self):
25
+ self.root = {}
26
+ self.counts = defaultdict(int) # prefix -> count
27
+ self.total_sequences = 0
28
+
29
+ def insert(self, sequence):
30
+ prefix = []
31
+ node = self.root
32
+ for state in sequence:
33
+ prefix.append(state)
34
+ key = tuple(prefix)
35
+ self.counts[key] += 1
36
+ if state not in node:
37
+ node[state] = {}
38
+ node = node[state]
39
+
40
+ def get_prefixes_at_depth(self, depth):
41
+ return [k for k in self.counts if len(k) == depth]
42
+
43
+ def get_children(self, prefix):
44
+ """
45
+ Given a prefix (as a list or tuple), return its immediate children in the tree.
46
+
47
+ Returns:
48
+ dict: mapping from child state -> subtree dict
49
+ """
50
+ node = self.root
51
+ for state in prefix:
52
+ node = node.get(state, {})
53
+ return node
54
+
55
+ def get_children_count(self, prefix):
56
+ node = self.root
57
+ for state in prefix:
58
+ node = node.get(state, {})
59
+ return len(node)
60
+
61
+ def describe(self):
62
+ depths = [len(k) for k in self.counts.keys()]
63
+ max_depth = max(depths) if depths else 0
64
+ total_prefixes = len(self.counts)
65
+ print("\n[PrefixTree Overview]")
66
+ print(f"[>] Total sequences inserted: {self.total_sequences}")
67
+ print(f"[>] Max depth (time points): {max_depth}")
68
+ print(f"[>] Total distinct prefixes: {total_prefixes}")
69
+
70
+ for t in range(1, max_depth + 1):
71
+ level_prefixes = self.get_prefixes_at_depth(t)
72
+ print(f" Level {t}: {len(level_prefixes)} unique prefixes")
73
+
74
+ def __repr__(self):
75
+ """
76
+ Returns a brief textual summary of the prefix tree object.
77
+
78
+ Note:
79
+ This method is intended to provide a lightweight, one-line overview
80
+ (e.g., max depth and total prefix count). For a full structural report
81
+ including per-level statistics, use the `.describe()` method instead.
82
+ """
83
+ depths = [len(k) for k in self.counts.keys()]
84
+ return f"PrefixTree(max_depth={max(depths) if depths else 0}, total_prefixes={len(self.counts)})"
85
+
86
+
87
+ def get_depth_stats(tree: "PrefixTree") -> Dict[str, Any]:
88
+ """
89
+ Build depth-level stats in a single pass over the tree's prefix counts.
90
+ Use this when calling both compute_prefix_count and compute_branching_factor
91
+ to avoid scanning the tree twice (important when T or prefix count is large).
92
+
93
+ Returns:
94
+ dict with keys:
95
+ - 'depth_counts': dict depth -> number of distinct prefixes at that depth
96
+ - 'depth_to_prefixes': dict depth -> list of prefix tuples at that depth
97
+ """
98
+ depth_counts = defaultdict(int)
99
+ depth_to_prefixes = defaultdict(list)
100
+ for k in tree.counts:
101
+ d = len(k)
102
+ depth_counts[d] += 1
103
+ depth_to_prefixes[d].append(k)
104
+ return {
105
+ "depth_counts": dict(depth_counts),
106
+ "depth_to_prefixes": dict(depth_to_prefixes),
107
+ }
108
+
109
+
110
+ def compute_prefix_count(
111
+ tree, max_depth, depth_stats: Optional[Dict[str, Any]] = None
112
+ ) -> List[int]:
113
+ """
114
+ Prefix counts per time step 1..max_depth.
115
+ When T is large, pass precomputed depth_stats from get_depth_stats(tree)
116
+ so that combined with compute_branching_factor only one pass over the tree is used.
117
+ """
118
+ if depth_stats is None:
119
+ depth_counts = defaultdict(int)
120
+ for k in tree.counts:
121
+ depth_counts[len(k)] += 1
122
+ depth_counts = dict(depth_counts)
123
+ else:
124
+ depth_counts = depth_stats["depth_counts"]
125
+ return [depth_counts.get(t, 0) for t in range(1, max_depth + 1)]
126
+
127
+
128
+ def compute_branching_factor(
129
+ tree, max_depth, depth_prefixes: Optional[Dict[int, List[Tuple]]] = None
130
+ ) -> List[float]:
131
+ """
132
+ Branching factor per time step; first element is 0 to align with prefix count.
133
+ When T is large, pass depth_prefixes from get_depth_stats(tree)['depth_to_prefixes']
134
+ to avoid an extra full scan of the tree.
135
+ """
136
+ if depth_prefixes is None:
137
+ depth_to_prefixes = defaultdict(list)
138
+ for k in tree.counts:
139
+ depth_to_prefixes[len(k)].append(k)
140
+ depth_to_prefixes = dict(depth_to_prefixes)
141
+ else:
142
+ depth_to_prefixes = depth_prefixes
143
+ result = []
144
+ for t in range(2, max_depth + 1):
145
+ prefixes = depth_to_prefixes.get(t - 1, [])
146
+ if not prefixes:
147
+ result.append(0.0)
148
+ continue
149
+ child_counts = [tree.get_children_count(p) for p in prefixes]
150
+ result.append(float(np.mean(child_counts)))
151
+ return [0.0] + result # pad to align with prefix count
152
+
153
+
154
+ def compute_js_divergence(sequences, state_set):
155
+ """
156
+ Jensen-Shannon divergence between consecutive time-step distributions.
157
+ Uses a single pass over sequences and vectorized numpy operations for speed
158
+ when T or N is large.
159
+ """
160
+ T = len(sequences[0])
161
+ state_list = list(state_set)
162
+ n_states = len(state_list)
163
+ state_to_idx = {s: i for i, s in enumerate(state_list)}
164
+ N = len(sequences)
165
+ # Build (N, T) matrix of state indices in one pass
166
+ mat = np.empty((N, T), dtype=np.intp)
167
+ for i, seq in enumerate(sequences):
168
+ for t in range(T):
169
+ mat[i, t] = state_to_idx[seq[t]]
170
+ # Per-time distributions via bincount
171
+ distros = np.zeros((T, n_states), dtype=float)
172
+ for t in range(T):
173
+ counts = np.bincount(mat[:, t], minlength=n_states)
174
+ total = counts.sum()
175
+ if total > 0:
176
+ distros[t] = counts / total
177
+ else:
178
+ distros[t] = counts
179
+ js_scores = [0.0]
180
+ for t in range(1, T):
181
+ js = jensenshannon(distros[t], distros[t - 1])
182
+ js_scores.append(float(js))
183
+ return js_scores
184
+
185
+
186
+ def _build_prefix_tree_position(sequences):
187
+ """Internal: build position-based prefix tree (level = time index)."""
188
+ tree = PrefixTree()
189
+ tree.total_sequences = len(sequences)
190
+ for seq in sequences:
191
+ for t in range(1, len(seq) + 1):
192
+ tree.insert(seq[:t])
193
+ return tree
194
+
195
+
196
+ def build_prefix_tree(sequences):
197
+ """
198
+ Build position-based prefix tree (level = time index).
199
+
200
+ For spell-based tree or unified hub with mode/expcost, use:
201
+ from sequenzo.prefix_tree.hub import build_prefix_tree
202
+ tree = build_prefix_tree(seqdata, mode="spell", expcost=0)
203
+ """
204
+ return _build_prefix_tree_position(sequences)
205
+
206
+
207
+ def plot_system_indicators(
208
+ prefix_counts: List[float],
209
+ branching_factors: List[float],
210
+ js_divergence: Optional[List[float]] = None,
211
+ x_values: Optional[List] = None,
212
+ x_label: str = "Time (t)",
213
+ legend_loc: str = 'lower right',
214
+ legend_fontsize: int = 10,
215
+ save_as: Optional[str] = None,
216
+ figsize: Optional[tuple] = None,
217
+ dpi: int = 300,
218
+ custom_colors: Optional[Dict[str, str]] = None,
219
+ show: bool = True,
220
+ plot_distributions: bool = False,
221
+ style: Optional[str] = None
222
+ ) -> None:
223
+ """
224
+ Plot a single group's system-level indicators using the same visual style as
225
+ `plot_system_indicators_multiple_comparison`, but for one subplot.
226
+
227
+ Design:
228
+ - Left y-axis: raw Prefix Count
229
+ - Right y-axis: z-score of Branching Factor and (optionally) JS Divergence
230
+ - Consistent colors/markers and legend handling with the multi-comparison API
231
+
232
+ Parameters:
233
+ - prefix_counts: List[float]
234
+ Raw prefix counts per time step
235
+ - branching_factors: List[float]
236
+ Branching factor per time step
237
+ - js_divergence: Optional[List[float]]
238
+ JS divergence per time step; if None, only branching factor is shown on right axis
239
+ - x_values: Optional[List]
240
+ Custom x-axis ticks (e.g., years). If None, uses 1..T. Length must equal data length
241
+ - x_label: str
242
+ Label for x-axis. Default: "Time (t)"
243
+ - legend_loc: str
244
+ Legend location, e.g., 'upper left', 'upper right', 'lower right', 'best', etc. Default: 'lower right'
245
+ - legend_fontsize: int
246
+ Font size for legend text. Default: 10
247
+ - save_as: Optional[str]
248
+ If provided, save the figure to this path (png). DPI controlled by `dpi`
249
+ - figsize: Optional[tuple]
250
+ Figure size (width, height). Default: (12, 6)
251
+ - dpi: int
252
+ Figure DPI when saving. Default: 300
253
+ - custom_colors: Optional[Dict[str, str]]
254
+ Optional color overrides. Keys: "Prefix Count", "Branching Factor", "JS Divergence"
255
+ - show: bool
256
+ Whether to display the figure
257
+ - plot_distributions: bool
258
+ If True, additionally show raw distributions (histograms) of indicators
259
+ - style: Optional[str]
260
+ Matplotlib/seaborn style to apply. Common options: 'whitegrid', 'darkgrid',
261
+ 'white', 'dark', 'ticks'. If None, uses default style. Default: None
262
+
263
+ Example:
264
+ >>> plot_system_indicators(
265
+ ... prefix_counts=india_prefix_counts,
266
+ ... branching_factors=india_branching_factors,
267
+ ... js_divergence=india_js_scores,
268
+ ... x_values=[2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019],
269
+ ... x_label="Year",
270
+ ... legend_loc="lower right",
271
+ ... figsize=(12, 6),
272
+ ... dpi=300,
273
+ ... )
274
+ """
275
+ T = len(prefix_counts)
276
+ # Set x values to align with multi-group API
277
+ if x_values is None:
278
+ x_values = list(range(1, T + 1))
279
+ if len(x_values) != T:
280
+ raise ValueError("Length of x_values must match data length")
281
+
282
+ # Normalize others
283
+ bf_z = zscore(array(branching_factors))
284
+ js_z = zscore(array(js_divergence)) if js_divergence else None
285
+
286
+ color_defaults = {
287
+ "Prefix Count": "#6BB6FF", # Soft sky blue (like Monet's water lilies)
288
+ "Branching Factor": "#FFB347", # Warm peach/coral (like sunset reflections)
289
+ "JS Divergence": "#F4A6CD", # Soft rose pink (divergence = different paths)
290
+ }
291
+ colors = {**color_defaults, **(custom_colors or {})}
292
+
293
+ # --- Main line plot with dual axes ---
294
+ if figsize is None:
295
+ figsize = (12, 6)
296
+
297
+ # Apply style if specified
298
+ if style is not None:
299
+ # Check if it's a seaborn style
300
+ seaborn_styles = ['whitegrid', 'darkgrid', 'white', 'dark', 'ticks']
301
+ if style in seaborn_styles:
302
+ sns.set_style(style)
303
+ else:
304
+ plt.style.use(style)
305
+
306
+ fig, ax1 = plt.subplots(figsize=figsize)
307
+ ax1.set_xlabel(x_label)
308
+ ax1.set_ylabel("Prefix Count", color=colors["Prefix Count"])
309
+ ax1.plot(x_values, prefix_counts, marker='o', color=colors["Prefix Count"], label="Prefix Count")
310
+ ax1.tick_params(axis='y', labelcolor=colors["Prefix Count"])
311
+
312
+ ax2 = ax1.twinx()
313
+ ax2.set_ylabel("Z-score (Other Indicators)")
314
+ ax2.plot(x_values, bf_z, marker='s', label='Branching Factor (z)', color=colors["Branching Factor"])
315
+ if js_z is not None:
316
+ ax2.plot(x_values, js_z, marker='^', label='JS Divergence (z)', color=colors["JS Divergence"])
317
+
318
+ lines1, labels1 = ax1.get_legend_handles_labels()
319
+ lines2, labels2 = ax2.get_legend_handles_labels()
320
+ ax2.legend(lines1 + lines2, labels1 + labels2, loc=legend_loc, fontsize=legend_fontsize)
321
+
322
+ ax1.set_title("System-Level Trajectory Indicators: Raw vs. Normalized")
323
+ fig.tight_layout()
324
+
325
+ save_and_show_results(save_as=save_as, dpi=dpi, show=show)
326
+
327
+ # --- Distribution plots if requested ---
328
+ if plot_distributions:
329
+ raw_data = {
330
+ "Prefix Count": prefix_counts,
331
+ "Branching Factor": branching_factors,
332
+ }
333
+ if js_divergence:
334
+ raw_data["JS Divergence"] = js_divergence
335
+
336
+ n = len(raw_data)
337
+ fig, axes = plt.subplots(1, n, figsize=(4 * n, 4))
338
+ if n == 1:
339
+ axes = [axes]
340
+
341
+ for ax, (label, values) in zip(axes, raw_data.items()):
342
+ sns.histplot(values, kde=True, ax=ax, color=colors.get(label, None))
343
+ ax.set_title(f"{label} Distribution")
344
+ ax.set_xlabel("Value")
345
+ ax.set_ylabel("Density")
346
+
347
+ fig.tight_layout()
348
+ suffix = "_distributions" if save_as else None
349
+ dist_path = save_as.replace(".png", f"{suffix}.png") if save_as else None
350
+ save_and_show_results(save_as=dist_path, dpi=dpi, show=show)
351
+
352
+
353
+ def plot_system_indicators_multiple_comparison(
354
+ groups_data: Dict[str, Dict[str, List[float]]],
355
+ group_names: Optional[List[str]] = None,
356
+ subplot_titles: Optional[List[str]] = None,
357
+ x_values: Optional[List] = None,
358
+ x_label: str = "Time (t)",
359
+ legend_loc: str = 'lower right',
360
+ legend_fontsize: int = 10,
361
+ save_as: Optional[str] = None,
362
+ figsize: Optional[tuple] = None,
363
+ dpi: int = 300,
364
+ custom_colors: Optional[Dict[str, str]] = None,
365
+ show: bool = True,
366
+ style: Optional[str] = None
367
+ ) -> None:
368
+ """
369
+ Plot system-level indicators comparison across multiple groups using dual y-axis design.
370
+
371
+ Parameters:
372
+ -----------
373
+ groups_data : Dict[str, Dict[str, List[float]]]
374
+ Dictionary with group names as keys and data dictionaries as values.
375
+ Each data dict should contain 'prefix_counts', 'branching_factors', and 'js_divergence'.
376
+ Example: {
377
+ "Group1": {
378
+ "prefix_counts": [10, 15, 20, ...],
379
+ "branching_factors": [1.2, 1.5, 1.8, ...],
380
+ "js_divergence": [0.1, 0.2, 0.15, ...]
381
+ },
382
+ "Group2": {...}
383
+ }
384
+ group_names : Optional[List[str]]
385
+ Custom names for groups. If None, uses keys from groups_data.
386
+ Used for default subplot titles if subplot_titles is not provided.
387
+ subplot_titles : Optional[List[str]]
388
+ Custom titles for each subplot. If None, uses default format:
389
+ "{group_name} - System-Level Trajectory Indicators: Raw vs. Normalized"
390
+ x_values : Optional[List]
391
+ Custom x-axis values. If None, uses 1, 2, 3, ...
392
+ x_label : str
393
+ Label for x-axis. Default: "Time (t)"
394
+ legend_loc : str
395
+ Legend location. Options: 'upper left', 'upper right', 'lower left',
396
+ 'lower right', 'center', 'best', etc. Default: 'lower right'
397
+ legend_fontsize : int
398
+ Font size for legend text. Default: 10
399
+ save_as : Optional[str]
400
+ File path to save the plot (without extension)
401
+ figsize : Optional[tuple]
402
+ Figure size (width, height). If None, auto-calculated based on number of groups
403
+ dpi : int
404
+ DPI for saving. Default: 300
405
+ custom_colors : Optional[Dict[str, str]]
406
+ Custom colors for indicators. Default uses standard colors.
407
+ show : bool
408
+ Whether to show the plot. Default: True
409
+ style : Optional[str]
410
+ Style to apply. Seaborn styles ('whitegrid', 'darkgrid', 'white', 'dark', 'ticks')
411
+ or matplotlib styles. If None, uses default style. Default: None
412
+
413
+ Example:
414
+ --------
415
+ >>> data = {
416
+ ... "India": {
417
+ ... "prefix_counts": india_prefix_counts,
418
+ ... "branching_factors": india_branching_factors,
419
+ ... "js_divergence": india_js_divergence
420
+ ... },
421
+ ... "US": {
422
+ ... "prefix_counts": us_prefix_counts,
423
+ ... "branching_factors": us_branching_factors,
424
+ ... "js_divergence": us_js_divergence
425
+ ... }
426
+ ... }
427
+ >>> plot_system_indicators_multiple_comparison(
428
+ ... groups_data=data,
429
+ ... x_label="Years",
430
+ ... legend_loc='upper right',
431
+ ... save_as="multi_country_comparison"
432
+ ... )
433
+
434
+ >>> # With custom subplot titles
435
+ >>> plot_system_indicators_multiple_comparison(
436
+ ... groups_data=data,
437
+ ... subplot_titles=["印度发展轨迹", "美国发展轨迹"],
438
+ ... x_label="年份",
439
+ ... save_as="custom_titles_comparison"
440
+ ... )
441
+ """
442
+
443
+ # Validate input
444
+ if not groups_data:
445
+ raise ValueError("groups_data cannot be empty")
446
+
447
+ # Get group names
448
+ if group_names is None:
449
+ group_names = list(groups_data.keys())
450
+
451
+ if len(group_names) != len(groups_data):
452
+ raise ValueError("Length of group_names must match number of groups in groups_data")
453
+
454
+ # Validate subplot_titles
455
+ if subplot_titles is not None and len(subplot_titles) != len(groups_data):
456
+ raise ValueError("Length of subplot_titles must match number of groups in groups_data")
457
+
458
+ # Get first group to determine data length
459
+ first_group_data = list(groups_data.values())[0]
460
+ T = len(first_group_data['prefix_counts'])
461
+
462
+ # Set x values
463
+ if x_values is None:
464
+ x_values = list(range(1, T + 1))
465
+
466
+ if len(x_values) != T:
467
+ raise ValueError("Length of x_values must match data length")
468
+
469
+ # Color settings - Monet-inspired watercolor palette for divergence analysis
470
+ color_defaults = {
471
+ "Prefix Count": "#6BB6FF", # Soft sky blue (like Monet's water lilies)
472
+ "Branching Factor": "#FFB347", # Warm peach/coral (like sunset reflections)
473
+ "JS Divergence": "#F4A6CD", # Soft rose pink (divergence = different paths)
474
+ }
475
+ colors = {**color_defaults, **(custom_colors or {})}
476
+
477
+ # Calculate figure size
478
+ n_groups = len(groups_data)
479
+ if figsize is None:
480
+ figsize = (12, 4 * n_groups + 2) # Dynamic height based on number of groups
481
+
482
+ # Apply style if specified
483
+ if style is not None:
484
+ # Check if it's a seaborn style
485
+ seaborn_styles = ['whitegrid', 'darkgrid', 'white', 'dark', 'ticks']
486
+ if style in seaborn_styles:
487
+ sns.set_style(style)
488
+ else:
489
+ plt.style.use(style)
490
+
491
+ # Create subplots
492
+ fig, axes = plt.subplots(n_groups, 1, figsize=figsize)
493
+
494
+ # Handle single group case
495
+ if n_groups == 1:
496
+ axes = [axes]
497
+
498
+ # Plot each group
499
+ for i, (group_key, group_name) in enumerate(zip(groups_data.keys(), group_names)):
500
+ data = groups_data[group_key]
501
+ ax = axes[i]
502
+
503
+ # Validate data completeness
504
+ required_keys = ['prefix_counts', 'branching_factors', 'js_divergence']
505
+ for key in required_keys:
506
+ if key not in data:
507
+ raise ValueError(f"Missing '{key}' in data for group '{group_key}'")
508
+
509
+ # Normalize data (z-score)
510
+ bf_z = zscore(array(data['branching_factors']))
511
+ js_z = zscore(array(data['js_divergence']))
512
+
513
+ # Left y-axis: raw prefix counts
514
+ ax.set_ylabel("Prefix Count", color=colors["Prefix Count"])
515
+ ax.plot(x_values, data['prefix_counts'], marker='o',
516
+ color=colors["Prefix Count"], label="Prefix Count")
517
+ ax.tick_params(axis='y', labelcolor=colors["Prefix Count"])
518
+
519
+ # Right y-axis: normalized indicators
520
+ ax_twin = ax.twinx()
521
+ ax_twin.set_ylabel("Z-score (Other Indicators)")
522
+ ax_twin.plot(x_values, bf_z, marker='s',
523
+ label='Branching Factor (z)', color=colors["Branching Factor"])
524
+ ax_twin.plot(x_values, js_z, marker='^',
525
+ label='JS Divergence (z)', color=colors["JS Divergence"])
526
+
527
+ # Legend
528
+ lines1, labels1 = ax.get_legend_handles_labels()
529
+ lines2, labels2 = ax_twin.get_legend_handles_labels()
530
+ ax_twin.legend(lines1 + lines2, labels1 + labels2, loc=legend_loc, fontsize=legend_fontsize)
531
+
532
+ # Title and labels
533
+ if subplot_titles is not None:
534
+ title = subplot_titles[i]
535
+ else:
536
+ title = f"{group_name} - System-Level Trajectory Indicators: Raw vs. Normalized"
537
+ ax.set_title(title)
538
+
539
+ # Only set x-label for the bottom subplot
540
+ if i == n_groups - 1:
541
+ ax.set_xlabel(x_label)
542
+
543
+ plt.tight_layout()
544
+ save_and_show_results(save_as=save_as, dpi=dpi, show=show)
@@ -0,0 +1,54 @@
1
+ """
2
+ @Author : Yuqi Liang 梁彧祺
3
+ @File : utils.py
4
+ @Time : 02/05/2025 12:26
5
+ @Desc :
6
+ """
7
+ import pandas as pd
8
+ from typing import List, Tuple
9
+
10
+
11
+ def extract_sequences(df: pd.DataFrame, time_cols: List[str]) -> List[List[str]]:
12
+ """
13
+ Efficiently extracts sequences from specified time columns.
14
+
15
+ Parameters:
16
+ df (pd.DataFrame): Input DataFrame.
17
+ time_cols (List[str]): Columns representing the sequence over time.
18
+
19
+ Returns:
20
+ List[List[str]]: List of sequences (each sequence is a list of states).
21
+ """
22
+ return df[time_cols].values.tolist()
23
+
24
+
25
+ def get_state_space(sequences: List[List[str]]) -> List[str]:
26
+ """
27
+ Efficiently extracts unique states from a list of sequences.
28
+
29
+ Parameters:
30
+ sequences (List[List[str]]): Sequence data.
31
+
32
+ Returns:
33
+ List[str]: Sorted list of unique states.
34
+ """
35
+ seen = set()
36
+ for seq in sequences:
37
+ seen.update(seq)
38
+ return sorted(seen)
39
+
40
+
41
+ def convert_to_prefix_tree_data(df: pd.DataFrame, time_cols: List[str]) -> Tuple[List[List[str]], List[str]]:
42
+ """
43
+ Wrapper to extract sequences and their state space from a DataFrame.
44
+
45
+ Parameters:
46
+ df (pd.DataFrame): Input DataFrame.
47
+ time_cols (List[str]): Sequence columns (e.g., ['C1', ..., 'C10'])
48
+
49
+ Returns:
50
+ Tuple[List[List[str]], List[str]]: sequences, unique states
51
+ """
52
+ sequences = df[time_cols].values.tolist()
53
+ states = get_state_space(sequences)
54
+ return sequences, states
@@ -0,0 +1,95 @@
1
+ """
2
+ @Author : Yuqi Liang 梁彧祺
3
+ @File : __init__.py
4
+ @Time : 2025-11-13 19:27
5
+ @Desc : Hidden Markov Models for sequence analysis in Sequenzo
6
+
7
+ This module provides Hidden Markov Model (HMM) functionality for sequence analysis,
8
+ inspired by the seqHMM R package but implemented natively in Python using hmmlearn.
9
+
10
+ Main features:
11
+ - Basic HMM: Standard hidden Markov models for sequence data
12
+ - Model building: Create HMM models from SequenceData
13
+ - Parameter estimation: Fit models using EM algorithm
14
+ - Prediction: Predict hidden states and compute posterior probabilities
15
+ - Visualization: Plot HMM models and results
16
+ """
17
+
18
+ from .hmm import HMM
19
+ from .build_hmm import build_hmm
20
+ from .fit_model import fit_model
21
+ from .predict import predict, posterior_probs
22
+ from .visualization import plot_hmm
23
+
24
+ # Mixture HMM
25
+ from .mhmm import MHMM
26
+ from .build_mhmm import build_mhmm
27
+ from .fit_mhmm import fit_mhmm
28
+ from .predict_mhmm import predict_mhmm, posterior_probs_mhmm
29
+ from .visualization import plot_mhmm
30
+
31
+ # Non-homogeneous HMM
32
+ from .nhmm import NHMM
33
+ from .build_nhmm import build_nhmm
34
+ from .fit_nhmm import fit_nhmm
35
+
36
+ # Model comparison and simulation
37
+ from .model_comparison import aic, bic, compare_models, compute_n_parameters, compute_n_observations
38
+ from .simulate import simulate_hmm, simulate_mhmm, simulate_nhmm
39
+ from .bootstrap import bootstrap_model
40
+
41
+ # Forward-backward for NHMM
42
+ from .forward_backward_nhmm import forward_backward_nhmm, log_likelihood_nhmm
43
+
44
+ # Gradients for NHMM
45
+ from .gradients_nhmm import compute_gradient_nhmm
46
+
47
+ # Formulas for NHMM and MHMM simulation
48
+ from .formulas import Formula, create_model_matrix, create_model_matrix_time_constant
49
+
50
+ # Advanced optimization
51
+ from .advanced_optimization import fit_model_advanced
52
+
53
+ __all__ = [
54
+ # Basic HMM
55
+ 'HMM',
56
+ 'build_hmm',
57
+ 'fit_model',
58
+ 'predict',
59
+ 'posterior_probs',
60
+ 'plot_hmm',
61
+ # Mixture HMM
62
+ 'MHMM',
63
+ 'build_mhmm',
64
+ 'fit_mhmm',
65
+ 'predict_mhmm',
66
+ 'posterior_probs_mhmm',
67
+ 'plot_mhmm',
68
+ # Non-homogeneous HMM
69
+ 'NHMM',
70
+ 'build_nhmm',
71
+ 'fit_nhmm',
72
+ # Model comparison
73
+ 'aic',
74
+ 'bic',
75
+ 'compare_models',
76
+ 'compute_n_parameters',
77
+ 'compute_n_observations',
78
+ # Simulation
79
+ 'simulate_hmm',
80
+ 'simulate_mhmm',
81
+ 'simulate_nhmm',
82
+ # Bootstrap
83
+ 'bootstrap_model',
84
+ # Forward-backward for NHMM
85
+ 'forward_backward_nhmm',
86
+ 'log_likelihood_nhmm',
87
+ # Gradients for NHMM
88
+ 'compute_gradient_nhmm',
89
+ # Formulas for NHMM and MHMM simulation
90
+ 'Formula',
91
+ 'create_model_matrix',
92
+ 'create_model_matrix_time_constant',
93
+ # Advanced optimization
94
+ 'fit_model_advanced',
95
+ ]