sequenzo 0.1.31__cp310-cp310-macosx_10_9_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (299) hide show
  1. _sequenzo_fastcluster.cpython-310-darwin.so +0 -0
  2. sequenzo/__init__.py +349 -0
  3. sequenzo/big_data/__init__.py +12 -0
  4. sequenzo/big_data/clara/__init__.py +26 -0
  5. sequenzo/big_data/clara/clara.py +476 -0
  6. sequenzo/big_data/clara/utils/__init__.py +27 -0
  7. sequenzo/big_data/clara/utils/aggregatecases.py +92 -0
  8. sequenzo/big_data/clara/utils/davies_bouldin.py +91 -0
  9. sequenzo/big_data/clara/utils/get_weighted_diss.cpython-310-darwin.so +0 -0
  10. sequenzo/big_data/clara/utils/wfcmdd.py +205 -0
  11. sequenzo/big_data/clara/visualization.py +88 -0
  12. sequenzo/clustering/KMedoids.py +178 -0
  13. sequenzo/clustering/__init__.py +30 -0
  14. sequenzo/clustering/clustering_c_code.cpython-310-darwin.so +0 -0
  15. sequenzo/clustering/hierarchical_clustering.py +1256 -0
  16. sequenzo/clustering/sequenzo_fastcluster/fastcluster.py +495 -0
  17. sequenzo/clustering/sequenzo_fastcluster/src/fastcluster.cpp +1877 -0
  18. sequenzo/clustering/sequenzo_fastcluster/src/fastcluster_python.cpp +1264 -0
  19. sequenzo/clustering/src/KMedoid.cpp +263 -0
  20. sequenzo/clustering/src/PAM.cpp +237 -0
  21. sequenzo/clustering/src/PAMonce.cpp +265 -0
  22. sequenzo/clustering/src/cluster_quality.cpp +496 -0
  23. sequenzo/clustering/src/cluster_quality.h +128 -0
  24. sequenzo/clustering/src/cluster_quality_backup.cpp +570 -0
  25. sequenzo/clustering/src/module.cpp +228 -0
  26. sequenzo/clustering/src/weightedinertia.cpp +111 -0
  27. sequenzo/clustering/utils/__init__.py +27 -0
  28. sequenzo/clustering/utils/disscenter.py +122 -0
  29. sequenzo/data_preprocessing/__init__.py +22 -0
  30. sequenzo/data_preprocessing/helpers.py +303 -0
  31. sequenzo/datasets/__init__.py +41 -0
  32. sequenzo/datasets/biofam.csv +2001 -0
  33. sequenzo/datasets/biofam_child_domain.csv +2001 -0
  34. sequenzo/datasets/biofam_left_domain.csv +2001 -0
  35. sequenzo/datasets/biofam_married_domain.csv +2001 -0
  36. sequenzo/datasets/chinese_colonial_territories.csv +12 -0
  37. sequenzo/datasets/country_co2_emissions.csv +194 -0
  38. sequenzo/datasets/country_co2_emissions_global_deciles.csv +195 -0
  39. sequenzo/datasets/country_co2_emissions_global_quintiles.csv +195 -0
  40. sequenzo/datasets/country_co2_emissions_local_deciles.csv +195 -0
  41. sequenzo/datasets/country_co2_emissions_local_quintiles.csv +195 -0
  42. sequenzo/datasets/country_gdp_per_capita.csv +194 -0
  43. sequenzo/datasets/dyadic_children.csv +61 -0
  44. sequenzo/datasets/dyadic_parents.csv +61 -0
  45. sequenzo/datasets/mvad.csv +713 -0
  46. sequenzo/datasets/pairfam_activity_by_month.csv +1028 -0
  47. sequenzo/datasets/pairfam_activity_by_year.csv +1028 -0
  48. sequenzo/datasets/pairfam_family_by_month.csv +1028 -0
  49. sequenzo/datasets/pairfam_family_by_year.csv +1028 -0
  50. sequenzo/datasets/political_science_aid_shock.csv +166 -0
  51. sequenzo/datasets/political_science_donor_fragmentation.csv +157 -0
  52. sequenzo/define_sequence_data.py +1400 -0
  53. sequenzo/dissimilarity_measures/__init__.py +31 -0
  54. sequenzo/dissimilarity_measures/c_code.cpython-310-darwin.so +0 -0
  55. sequenzo/dissimilarity_measures/get_distance_matrix.py +762 -0
  56. sequenzo/dissimilarity_measures/get_substitution_cost_matrix.py +246 -0
  57. sequenzo/dissimilarity_measures/src/DHDdistance.cpp +148 -0
  58. sequenzo/dissimilarity_measures/src/LCPdistance.cpp +114 -0
  59. sequenzo/dissimilarity_measures/src/LCPspellDistance.cpp +215 -0
  60. sequenzo/dissimilarity_measures/src/OMdistance.cpp +247 -0
  61. sequenzo/dissimilarity_measures/src/OMspellDistance.cpp +281 -0
  62. sequenzo/dissimilarity_measures/src/__init__.py +0 -0
  63. sequenzo/dissimilarity_measures/src/dist2matrix.cpp +63 -0
  64. sequenzo/dissimilarity_measures/src/dp_utils.h +160 -0
  65. sequenzo/dissimilarity_measures/src/module.cpp +40 -0
  66. sequenzo/dissimilarity_measures/src/setup.py +30 -0
  67. sequenzo/dissimilarity_measures/src/utils.h +25 -0
  68. sequenzo/dissimilarity_measures/src/xsimd/.github/cmake-test/main.cpp +6 -0
  69. sequenzo/dissimilarity_measures/src/xsimd/benchmark/main.cpp +159 -0
  70. sequenzo/dissimilarity_measures/src/xsimd/benchmark/xsimd_benchmark.hpp +565 -0
  71. sequenzo/dissimilarity_measures/src/xsimd/docs/source/conf.py +37 -0
  72. sequenzo/dissimilarity_measures/src/xsimd/examples/mandelbrot.cpp +330 -0
  73. sequenzo/dissimilarity_measures/src/xsimd/examples/pico_bench.hpp +246 -0
  74. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_arithmetic.hpp +266 -0
  75. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_complex.hpp +112 -0
  76. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_details.hpp +323 -0
  77. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_logical.hpp +218 -0
  78. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_math.hpp +2583 -0
  79. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_memory.hpp +880 -0
  80. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_rounding.hpp +72 -0
  81. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_swizzle.hpp +174 -0
  82. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_trigo.hpp +978 -0
  83. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx.hpp +1924 -0
  84. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx2.hpp +1144 -0
  85. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512bw.hpp +656 -0
  86. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512cd.hpp +28 -0
  87. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512dq.hpp +244 -0
  88. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512er.hpp +20 -0
  89. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512f.hpp +2650 -0
  90. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512ifma.hpp +20 -0
  91. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512pf.hpp +20 -0
  92. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vbmi.hpp +77 -0
  93. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vbmi2.hpp +131 -0
  94. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vnni_avx512bw.hpp +20 -0
  95. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vnni_avx512vbmi2.hpp +20 -0
  96. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avxvnni.hpp +20 -0
  97. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_common.hpp +24 -0
  98. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_common_fwd.hpp +77 -0
  99. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_constants.hpp +393 -0
  100. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_emulated.hpp +788 -0
  101. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma3_avx.hpp +93 -0
  102. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma3_avx2.hpp +46 -0
  103. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma3_sse.hpp +97 -0
  104. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma4.hpp +92 -0
  105. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_i8mm_neon64.hpp +17 -0
  106. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_isa.hpp +142 -0
  107. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_neon.hpp +3142 -0
  108. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_neon64.hpp +1543 -0
  109. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_rvv.hpp +1513 -0
  110. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_scalar.hpp +1260 -0
  111. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse2.hpp +2024 -0
  112. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse3.hpp +67 -0
  113. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse4_1.hpp +339 -0
  114. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse4_2.hpp +44 -0
  115. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_ssse3.hpp +186 -0
  116. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sve.hpp +1155 -0
  117. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_vsx.hpp +892 -0
  118. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_wasm.hpp +1780 -0
  119. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_arch.hpp +240 -0
  120. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_config.hpp +484 -0
  121. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_cpuid.hpp +269 -0
  122. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_inline.hpp +27 -0
  123. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/math/xsimd_rem_pio2.hpp +719 -0
  124. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/memory/xsimd_aligned_allocator.hpp +349 -0
  125. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/memory/xsimd_alignment.hpp +91 -0
  126. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_all_registers.hpp +55 -0
  127. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_api.hpp +2765 -0
  128. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx2_register.hpp +44 -0
  129. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512bw_register.hpp +51 -0
  130. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512cd_register.hpp +51 -0
  131. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512dq_register.hpp +51 -0
  132. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512er_register.hpp +51 -0
  133. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512f_register.hpp +77 -0
  134. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512ifma_register.hpp +51 -0
  135. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512pf_register.hpp +51 -0
  136. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512vbmi2_register.hpp +51 -0
  137. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512vbmi_register.hpp +51 -0
  138. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512vnni_avx512bw_register.hpp +54 -0
  139. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512vnni_avx512vbmi2_register.hpp +53 -0
  140. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx_register.hpp +64 -0
  141. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avxvnni_register.hpp +44 -0
  142. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_batch.hpp +1524 -0
  143. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_batch_constant.hpp +300 -0
  144. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_common_arch.hpp +47 -0
  145. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_emulated_register.hpp +80 -0
  146. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_fma3_avx2_register.hpp +50 -0
  147. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_fma3_avx_register.hpp +50 -0
  148. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_fma3_sse_register.hpp +50 -0
  149. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_fma4_register.hpp +50 -0
  150. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_i8mm_neon64_register.hpp +55 -0
  151. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_neon64_register.hpp +55 -0
  152. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_neon_register.hpp +154 -0
  153. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_register.hpp +94 -0
  154. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_rvv_register.hpp +506 -0
  155. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sse2_register.hpp +59 -0
  156. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sse3_register.hpp +49 -0
  157. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sse4_1_register.hpp +48 -0
  158. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sse4_2_register.hpp +48 -0
  159. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_ssse3_register.hpp +48 -0
  160. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sve_register.hpp +156 -0
  161. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_traits.hpp +337 -0
  162. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_utils.hpp +536 -0
  163. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_vsx_register.hpp +77 -0
  164. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_wasm_register.hpp +59 -0
  165. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/xsimd.hpp +75 -0
  166. sequenzo/dissimilarity_measures/src/xsimd/test/architectures/dummy.cpp +7 -0
  167. sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set.cpp +13 -0
  168. sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set_mean.cpp +24 -0
  169. sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set_mean_aligned.cpp +25 -0
  170. sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set_mean_arch_independent.cpp +28 -0
  171. sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set_mean_tag_dispatch.cpp +25 -0
  172. sequenzo/dissimilarity_measures/src/xsimd/test/doc/manipulating_abstract_batches.cpp +7 -0
  173. sequenzo/dissimilarity_measures/src/xsimd/test/doc/manipulating_parametric_batches.cpp +8 -0
  174. sequenzo/dissimilarity_measures/src/xsimd/test/doc/sum.hpp +31 -0
  175. sequenzo/dissimilarity_measures/src/xsimd/test/doc/sum_avx2.cpp +3 -0
  176. sequenzo/dissimilarity_measures/src/xsimd/test/doc/sum_sse2.cpp +3 -0
  177. sequenzo/dissimilarity_measures/src/xsimd/test/doc/writing_vectorized_code.cpp +11 -0
  178. sequenzo/dissimilarity_measures/src/xsimd/test/main.cpp +31 -0
  179. sequenzo/dissimilarity_measures/src/xsimd/test/test_api.cpp +230 -0
  180. sequenzo/dissimilarity_measures/src/xsimd/test/test_arch.cpp +217 -0
  181. sequenzo/dissimilarity_measures/src/xsimd/test/test_basic_math.cpp +183 -0
  182. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch.cpp +1049 -0
  183. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_bool.cpp +508 -0
  184. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_cast.cpp +409 -0
  185. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_complex.cpp +712 -0
  186. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_constant.cpp +286 -0
  187. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_float.cpp +141 -0
  188. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_int.cpp +365 -0
  189. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_manip.cpp +308 -0
  190. sequenzo/dissimilarity_measures/src/xsimd/test/test_bitwise_cast.cpp +222 -0
  191. sequenzo/dissimilarity_measures/src/xsimd/test/test_complex_exponential.cpp +226 -0
  192. sequenzo/dissimilarity_measures/src/xsimd/test/test_complex_hyperbolic.cpp +183 -0
  193. sequenzo/dissimilarity_measures/src/xsimd/test/test_complex_power.cpp +265 -0
  194. sequenzo/dissimilarity_measures/src/xsimd/test/test_complex_trigonometric.cpp +236 -0
  195. sequenzo/dissimilarity_measures/src/xsimd/test/test_conversion.cpp +248 -0
  196. sequenzo/dissimilarity_measures/src/xsimd/test/test_custom_default_arch.cpp +28 -0
  197. sequenzo/dissimilarity_measures/src/xsimd/test/test_error_gamma.cpp +170 -0
  198. sequenzo/dissimilarity_measures/src/xsimd/test/test_explicit_batch_instantiation.cpp +32 -0
  199. sequenzo/dissimilarity_measures/src/xsimd/test/test_exponential.cpp +202 -0
  200. sequenzo/dissimilarity_measures/src/xsimd/test/test_extract_pair.cpp +92 -0
  201. sequenzo/dissimilarity_measures/src/xsimd/test/test_fp_manipulation.cpp +77 -0
  202. sequenzo/dissimilarity_measures/src/xsimd/test/test_gnu_source.cpp +30 -0
  203. sequenzo/dissimilarity_measures/src/xsimd/test/test_hyperbolic.cpp +167 -0
  204. sequenzo/dissimilarity_measures/src/xsimd/test/test_load_store.cpp +304 -0
  205. sequenzo/dissimilarity_measures/src/xsimd/test/test_memory.cpp +61 -0
  206. sequenzo/dissimilarity_measures/src/xsimd/test/test_poly_evaluation.cpp +64 -0
  207. sequenzo/dissimilarity_measures/src/xsimd/test/test_power.cpp +184 -0
  208. sequenzo/dissimilarity_measures/src/xsimd/test/test_rounding.cpp +199 -0
  209. sequenzo/dissimilarity_measures/src/xsimd/test/test_select.cpp +101 -0
  210. sequenzo/dissimilarity_measures/src/xsimd/test/test_shuffle.cpp +760 -0
  211. sequenzo/dissimilarity_measures/src/xsimd/test/test_sum.cpp +4 -0
  212. sequenzo/dissimilarity_measures/src/xsimd/test/test_sum.hpp +34 -0
  213. sequenzo/dissimilarity_measures/src/xsimd/test/test_traits.cpp +172 -0
  214. sequenzo/dissimilarity_measures/src/xsimd/test/test_trigonometric.cpp +208 -0
  215. sequenzo/dissimilarity_measures/src/xsimd/test/test_utils.hpp +611 -0
  216. sequenzo/dissimilarity_measures/src/xsimd/test/test_wasm/test_wasm_playwright.py +123 -0
  217. sequenzo/dissimilarity_measures/src/xsimd/test/test_xsimd_api.cpp +1460 -0
  218. sequenzo/dissimilarity_measures/utils/__init__.py +16 -0
  219. sequenzo/dissimilarity_measures/utils/get_LCP_length_for_2_seq.py +44 -0
  220. sequenzo/dissimilarity_measures/utils/get_sm_trate_substitution_cost_matrix.cpython-310-darwin.so +0 -0
  221. sequenzo/dissimilarity_measures/utils/seqconc.cpython-310-darwin.so +0 -0
  222. sequenzo/dissimilarity_measures/utils/seqdss.cpython-310-darwin.so +0 -0
  223. sequenzo/dissimilarity_measures/utils/seqdur.cpython-310-darwin.so +0 -0
  224. sequenzo/dissimilarity_measures/utils/seqlength.cpython-310-darwin.so +0 -0
  225. sequenzo/multidomain/__init__.py +23 -0
  226. sequenzo/multidomain/association_between_domains.py +311 -0
  227. sequenzo/multidomain/cat.py +597 -0
  228. sequenzo/multidomain/combt.py +519 -0
  229. sequenzo/multidomain/dat.py +81 -0
  230. sequenzo/multidomain/idcd.py +139 -0
  231. sequenzo/multidomain/linked_polyad.py +292 -0
  232. sequenzo/openmp_setup.py +233 -0
  233. sequenzo/prefix_tree/__init__.py +62 -0
  234. sequenzo/prefix_tree/hub.py +114 -0
  235. sequenzo/prefix_tree/individual_level_indicators.py +1321 -0
  236. sequenzo/prefix_tree/spell_individual_level_indicators.py +580 -0
  237. sequenzo/prefix_tree/spell_level_indicators.py +297 -0
  238. sequenzo/prefix_tree/system_level_indicators.py +544 -0
  239. sequenzo/prefix_tree/utils.py +54 -0
  240. sequenzo/seqhmm/__init__.py +95 -0
  241. sequenzo/seqhmm/advanced_optimization.py +305 -0
  242. sequenzo/seqhmm/bootstrap.py +411 -0
  243. sequenzo/seqhmm/build_hmm.py +142 -0
  244. sequenzo/seqhmm/build_mhmm.py +136 -0
  245. sequenzo/seqhmm/build_nhmm.py +121 -0
  246. sequenzo/seqhmm/fit_mhmm.py +62 -0
  247. sequenzo/seqhmm/fit_model.py +61 -0
  248. sequenzo/seqhmm/fit_nhmm.py +76 -0
  249. sequenzo/seqhmm/formulas.py +289 -0
  250. sequenzo/seqhmm/forward_backward_nhmm.py +276 -0
  251. sequenzo/seqhmm/gradients_nhmm.py +306 -0
  252. sequenzo/seqhmm/hmm.py +291 -0
  253. sequenzo/seqhmm/mhmm.py +314 -0
  254. sequenzo/seqhmm/model_comparison.py +238 -0
  255. sequenzo/seqhmm/multichannel_em.py +282 -0
  256. sequenzo/seqhmm/multichannel_utils.py +138 -0
  257. sequenzo/seqhmm/nhmm.py +270 -0
  258. sequenzo/seqhmm/nhmm_utils.py +191 -0
  259. sequenzo/seqhmm/predict.py +137 -0
  260. sequenzo/seqhmm/predict_mhmm.py +142 -0
  261. sequenzo/seqhmm/simulate.py +878 -0
  262. sequenzo/seqhmm/utils.py +218 -0
  263. sequenzo/seqhmm/visualization.py +910 -0
  264. sequenzo/sequence_characteristics/__init__.py +40 -0
  265. sequenzo/sequence_characteristics/complexity_index.py +49 -0
  266. sequenzo/sequence_characteristics/overall_cross_sectional_entropy.py +220 -0
  267. sequenzo/sequence_characteristics/plot_characteristics.py +593 -0
  268. sequenzo/sequence_characteristics/simple_characteristics.py +311 -0
  269. sequenzo/sequence_characteristics/state_frequencies_and_entropy_per_sequence.py +39 -0
  270. sequenzo/sequence_characteristics/turbulence.py +155 -0
  271. sequenzo/sequence_characteristics/variance_of_spell_durations.py +86 -0
  272. sequenzo/sequence_characteristics/within_sequence_entropy.py +43 -0
  273. sequenzo/suffix_tree/__init__.py +66 -0
  274. sequenzo/suffix_tree/hub.py +114 -0
  275. sequenzo/suffix_tree/individual_level_indicators.py +1679 -0
  276. sequenzo/suffix_tree/spell_individual_level_indicators.py +493 -0
  277. sequenzo/suffix_tree/spell_level_indicators.py +248 -0
  278. sequenzo/suffix_tree/system_level_indicators.py +535 -0
  279. sequenzo/suffix_tree/utils.py +56 -0
  280. sequenzo/version_check.py +283 -0
  281. sequenzo/visualization/__init__.py +29 -0
  282. sequenzo/visualization/plot_mean_time.py +222 -0
  283. sequenzo/visualization/plot_modal_state.py +276 -0
  284. sequenzo/visualization/plot_most_frequent_sequences.py +147 -0
  285. sequenzo/visualization/plot_relative_frequency.py +405 -0
  286. sequenzo/visualization/plot_sequence_index.py +1175 -0
  287. sequenzo/visualization/plot_single_medoid.py +153 -0
  288. sequenzo/visualization/plot_state_distribution.py +651 -0
  289. sequenzo/visualization/plot_transition_matrix.py +190 -0
  290. sequenzo/visualization/utils/__init__.py +23 -0
  291. sequenzo/visualization/utils/utils.py +310 -0
  292. sequenzo/with_event_history_analysis/__init__.py +35 -0
  293. sequenzo/with_event_history_analysis/sequence_analysis_multi_state_model.py +850 -0
  294. sequenzo/with_event_history_analysis/sequence_history_analysis.py +283 -0
  295. sequenzo-0.1.31.dist-info/METADATA +286 -0
  296. sequenzo-0.1.31.dist-info/RECORD +299 -0
  297. sequenzo-0.1.31.dist-info/WHEEL +5 -0
  298. sequenzo-0.1.31.dist-info/licenses/LICENSE +28 -0
  299. sequenzo-0.1.31.dist-info/top_level.txt +2 -0
@@ -0,0 +1,535 @@
1
+ """
2
+ @Author : Yuqi Liang 梁彧祺
3
+ @File : system_level_indicators.py
4
+ @Time : 02/05/2025 11:06
5
+ @Desc :
6
+ This module includes tools for building suffix trees, computing suffix counts, merging factors, and Jensen-Shannon convergence,
7
+ as well as generating composite scores to summarize system-level sequence convergence and consolidation over time.
8
+ Visualization functions are also provided to plot these indicators and their distributions,
9
+ supporting comprehensive analysis of sequence system dynamics.
10
+ """
11
+ from collections import defaultdict, Counter
12
+ import numpy as np
13
+ from scipy.stats import zscore
14
+ from numpy import array
15
+ from scipy.spatial.distance import jensenshannon
16
+ from typing import Optional, Dict, List, Tuple, Any
17
+
18
+ from sequenzo.visualization.utils import save_and_show_results
19
+ import matplotlib.pyplot as plt
20
+ import seaborn as sns
21
+
22
+
23
+ class SuffixTree:
24
+ def __init__(self):
25
+ self.root = {}
26
+ self.counts = defaultdict(int) # suffix -> count
27
+ self.total_sequences = 0
28
+
29
+ def insert(self, sequence):
30
+ suffix = []
31
+ node = self.root
32
+ for state in sequence:
33
+ suffix.append(state)
34
+ key = tuple(suffix)
35
+ self.counts[key] += 1
36
+ if state not in node:
37
+ node[state] = {}
38
+ node = node[state]
39
+
40
+ def get_suffixes_at_depth(self, depth):
41
+ return [k for k in self.counts if len(k) == depth]
42
+
43
+ def get_children(self, suffix):
44
+ """
45
+ Given a suffix (as a list or tuple), return its immediate children in the tree.
46
+
47
+ Returns:
48
+ dict: mapping from child state -> subtree dict
49
+ """
50
+ node = self.root
51
+ for state in suffix:
52
+ node = node.get(state, {})
53
+ return node
54
+
55
+ def get_children_count(self, suffix):
56
+ node = self.root
57
+ for state in suffix:
58
+ node = node.get(state, {})
59
+ return len(node)
60
+
61
+ def describe(self):
62
+ depths = [len(k) for k in self.counts.keys()]
63
+ max_depth = max(depths) if depths else 0
64
+ total_suffixes = len(self.counts)
65
+ print("\n[SuffixTree Overview]")
66
+ print(f"[>] Total sequences inserted: {self.total_sequences}")
67
+ print(f"[>] Max depth (time points): {max_depth}")
68
+ print(f"[>] Total distinct suffixes: {total_suffixes}")
69
+
70
+ for t in range(1, max_depth + 1):
71
+ level_suffixes = self.get_suffixes_at_depth(t)
72
+ print(f" Level {t}: {len(level_suffixes)} unique suffixes")
73
+
74
+ def __repr__(self):
75
+ """
76
+ Returns a brief textual summary of the suffix tree object.
77
+
78
+ Note:
79
+ This method is intended to provide a lightweight, one-line overview
80
+ (e.g., max depth and total suffix count). For a full structural report
81
+ including per-level statistics, use the `.describe()` method instead.
82
+ """
83
+ depths = [len(k) for k in self.counts.keys()]
84
+ return f"SuffixTree(max_depth={max(depths) if depths else 0}, total_suffixes={len(self.counts)})"
85
+
86
+
87
+ def get_depth_stats(tree: "SuffixTree") -> Dict[str, Any]:
88
+ """
89
+ Build depth-level stats in a single pass over the tree's suffix counts.
90
+ Use this when calling both compute_suffix_count and compute_merging_factor
91
+ to avoid scanning the tree twice (important when T or suffix count is large).
92
+
93
+ Returns:
94
+ dict with keys:
95
+ - 'depth_counts': dict depth -> number of distinct suffixes at that depth
96
+ - 'depth_to_suffixes': dict depth -> list of suffix tuples at that depth
97
+ """
98
+ depth_counts = defaultdict(int)
99
+ depth_to_suffixes = defaultdict(list)
100
+ for k in tree.counts:
101
+ d = len(k)
102
+ depth_counts[d] += 1
103
+ depth_to_suffixes[d].append(k)
104
+ return {
105
+ "depth_counts": dict(depth_counts),
106
+ "depth_to_suffixes": dict(depth_to_suffixes),
107
+ }
108
+
109
+
110
+ def compute_suffix_count(
111
+ tree, max_depth, depth_stats: Optional[Dict[str, Any]] = None
112
+ ) -> List[int]:
113
+ """
114
+ Suffix counts per time step 1..max_depth.
115
+ When T is large, pass precomputed depth_stats from get_depth_stats(tree)
116
+ so that combined with compute_merging_factor only one pass over the tree is used.
117
+ """
118
+ if depth_stats is None:
119
+ depth_counts = defaultdict(int)
120
+ for k in tree.counts:
121
+ depth_counts[len(k)] += 1
122
+ depth_counts = dict(depth_counts)
123
+ else:
124
+ depth_counts = depth_stats["depth_counts"]
125
+ return [depth_counts.get(t, 0) for t in range(1, max_depth + 1)]
126
+
127
+
128
+ def compute_merging_factor(
129
+ tree, max_depth, depth_suffixes: Optional[Dict[int, List[Tuple]]] = None
130
+ ) -> List[float]:
131
+ """
132
+ Merging factor per time step; first element is 0 to align with suffix count.
133
+ When T is large, pass depth_suffixes from get_depth_stats(tree)['depth_to_suffixes']
134
+ to avoid an extra full scan of the tree.
135
+ """
136
+ if depth_suffixes is None:
137
+ depth_to_suffixes = defaultdict(list)
138
+ for k in tree.counts:
139
+ depth_to_suffixes[len(k)].append(k)
140
+ depth_to_suffixes = dict(depth_to_suffixes)
141
+ else:
142
+ depth_to_suffixes = depth_suffixes
143
+ result = []
144
+ for t in range(2, max_depth + 1):
145
+ suffixes = depth_to_suffixes.get(t - 1, [])
146
+ if not suffixes:
147
+ result.append(0.0)
148
+ continue
149
+ child_counts = [tree.get_children_count(s) for s in suffixes]
150
+ result.append(float(np.mean(child_counts)))
151
+ return [0.0] + result # pad to align with suffix count
152
+
153
+
154
+ def compute_js_convergence(sequences, state_set):
155
+ """
156
+ Jensen-Shannon divergence between consecutive time-step distributions.
157
+ Uses a single pass over sequences and vectorized numpy operations for speed
158
+ when T or N is large.
159
+ """
160
+ T = len(sequences[0])
161
+ state_list = list(state_set)
162
+ n_states = len(state_list)
163
+ state_to_idx = {s: i for i, s in enumerate(state_list)}
164
+ N = len(sequences)
165
+ # Build (N, T) matrix of state indices in one pass
166
+ mat = np.empty((N, T), dtype=np.intp)
167
+ for i, seq in enumerate(sequences):
168
+ for t in range(T):
169
+ mat[i, t] = state_to_idx[seq[t]]
170
+ # Per-time distributions via bincount
171
+ distros = np.zeros((T, n_states), dtype=float)
172
+ for t in range(T):
173
+ counts = np.bincount(mat[:, t], minlength=n_states)
174
+ total = counts.sum()
175
+ if total > 0:
176
+ distros[t] = counts / total
177
+ else:
178
+ distros[t] = counts
179
+ js_scores = [0.0]
180
+ for t in range(1, T):
181
+ js = jensenshannon(distros[t], distros[t - 1])
182
+ js_scores.append(float(js))
183
+ return js_scores
184
+
185
+
186
+ def _build_suffix_tree_position(sequences):
187
+ """Internal: build position-based suffix tree (level = time index from end)."""
188
+ tree = SuffixTree()
189
+ tree.total_sequences = len(sequences)
190
+ for seq in sequences:
191
+ for t in range(len(seq)):
192
+ tree.insert(seq[t:])
193
+ return tree
194
+
195
+
196
+ def build_suffix_tree(sequences):
197
+ """
198
+ Build position-based suffix tree (level = time index from end).
199
+
200
+ For spell-based tree or unified hub with mode/expcost, use:
201
+ from sequenzo.suffix_tree.hub import build_suffix_tree
202
+ tree = build_suffix_tree(seqdata, mode="spell", expcost=0)
203
+ """
204
+ return _build_suffix_tree_position(sequences)
205
+
206
+
207
+ def plot_system_indicators(
208
+ suffix_counts: List[float],
209
+ merging_factors: List[float],
210
+ js_convergence: Optional[List[float]] = None,
211
+ x_values: Optional[List] = None,
212
+ x_label: str = "Time (t)",
213
+ legend_loc: str = 'lower right',
214
+ save_as: Optional[str] = None,
215
+ figsize: Optional[tuple] = None,
216
+ dpi: int = 300,
217
+ custom_colors: Optional[Dict[str, str]] = None,
218
+ show: bool = True,
219
+ plot_distributions: bool = False,
220
+ style: Optional[str] = None
221
+ ) -> None:
222
+ """
223
+ Plot a single group's system-level indicators using the same visual style as
224
+ `plot_system_indicators_multiple_comparison`, but for one subplot.
225
+
226
+ Design:
227
+ - Left y-axis: raw Suffix Count
228
+ - Right y-axis: z-score of Merging Factor and (optionally) JS Convergence
229
+ - Consistent colors/markers and legend handling with the multi-comparison API
230
+
231
+ Parameters:
232
+ - suffix_counts: List[float]
233
+ Raw suffix counts per time step
234
+ - merging_factors: List[float]
235
+ Merging factor per time step
236
+ - js_convergence: Optional[List[float]]
237
+ JS convergence per time step; if None, only merging factor is shown on right axis
238
+ - x_values: Optional[List]
239
+ Custom x-axis ticks (e.g., years). If None, uses 1..T. Length must equal data length
240
+ - x_label: str
241
+ Label for x-axis. Default: "Time (t)"
242
+ - legend_loc: str
243
+ Legend location, e.g., 'upper left', 'upper right', 'lower right', 'best', etc. Default: 'lower right'
244
+ - save_as: Optional[str]
245
+ If provided, save the figure to this path (png). DPI controlled by `dpi`
246
+ - figsize: Optional[tuple]
247
+ Figure size (width, height). Default: (12, 6)
248
+ - dpi: int
249
+ Figure DPI when saving. Default: 300
250
+ - custom_colors: Optional[Dict[str, str]]
251
+ Optional color overrides. Keys: "Suffix Count", "Merging Factor", "JS Convergence"
252
+ - show: bool
253
+ Whether to display the figure
254
+ - plot_distributions: bool
255
+ If True, additionally show raw distributions (histograms) of indicators
256
+
257
+ Example:
258
+ >>> plot_system_indicators(
259
+ ... suffix_counts=india_suffix_counts,
260
+ ... merging_factors=india_merging_factors,
261
+ ... js_convergence=india_js_scores,
262
+ ... x_values=[2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019],
263
+ ... x_label="Year",
264
+ ... legend_loc="lower right",
265
+ ... figsize=(12, 6),
266
+ ... dpi=300,
267
+ ... )
268
+ """
269
+ T = len(suffix_counts)
270
+ # Set x values to align with multi-group API
271
+ if x_values is None:
272
+ x_values = list(range(1, T + 1))
273
+ if len(x_values) != T:
274
+ raise ValueError("Length of x_values must match data length")
275
+
276
+ # Normalize others
277
+ mf_z = zscore(array(merging_factors))
278
+ js_z = zscore(array(js_convergence)) if js_convergence else None
279
+
280
+ color_defaults = {
281
+ "Suffix Count": "#6BB6FF", # Soft sky blue (like Monet's water lilies)
282
+ "Merging Factor": "#FFB347", # Warm peach/coral (like sunset reflections)
283
+ "JS Convergence": "#98D8C8", # Soft mint green (convergence = coming together)
284
+ }
285
+ colors = {**color_defaults, **(custom_colors or {})}
286
+
287
+ # --- Main line plot with dual axes ---
288
+ if figsize is None:
289
+ figsize = (12, 6)
290
+
291
+ # Apply style if specified
292
+ if style is not None:
293
+ # Check if it's a seaborn style
294
+ seaborn_styles = ['whitegrid', 'darkgrid', 'white', 'dark', 'ticks']
295
+ if style in seaborn_styles:
296
+ sns.set_style(style)
297
+ else:
298
+ plt.style.use(style)
299
+
300
+ fig, ax1 = plt.subplots(figsize=figsize)
301
+ ax1.set_xlabel(x_label)
302
+ ax1.set_ylabel("Suffix Count", color=colors["Suffix Count"])
303
+ ax1.plot(x_values, suffix_counts, marker='o', color=colors["Suffix Count"], label="Suffix Count")
304
+ ax1.tick_params(axis='y', labelcolor=colors["Suffix Count"])
305
+
306
+ ax2 = ax1.twinx()
307
+ ax2.set_ylabel("Z-score (Other Indicators)")
308
+ ax2.plot(x_values, mf_z, marker='s', label='Merging Factor (z)', color=colors["Merging Factor"])
309
+ if js_z is not None:
310
+ ax2.plot(x_values, js_z, marker='^', label='JS Convergence (z)', color=colors["JS Convergence"])
311
+
312
+ lines1, labels1 = ax1.get_legend_handles_labels()
313
+ lines2, labels2 = ax2.get_legend_handles_labels()
314
+ ax2.legend(lines1 + lines2, labels1 + labels2, loc=legend_loc)
315
+
316
+ ax1.set_title("System-Level Trajectory Indicators: Raw vs. Normalized")
317
+ fig.tight_layout()
318
+
319
+ save_and_show_results(save_as=save_as, dpi=dpi, show=show)
320
+
321
+ # --- Distribution plots if requested ---
322
+ if plot_distributions:
323
+ raw_data = {
324
+ "Suffix Count": suffix_counts,
325
+ "Merging Factor": merging_factors,
326
+ }
327
+ if js_convergence:
328
+ raw_data["JS Convergence"] = js_convergence
329
+
330
+ n = len(raw_data)
331
+ fig, axes = plt.subplots(1, n, figsize=(4 * n, 4))
332
+ if n == 1:
333
+ axes = [axes]
334
+
335
+ for ax, (label, values) in zip(axes, raw_data.items()):
336
+ sns.histplot(values, kde=True, ax=ax, color=colors.get(label, None))
337
+ ax.set_title(f"{label} Distribution")
338
+ ax.set_xlabel("Value")
339
+ ax.set_ylabel("Density")
340
+
341
+ fig.tight_layout()
342
+ suffix = "_distributions" if save_as else None
343
+ dist_path = save_as.replace(".png", f"{suffix}.png") if save_as else None
344
+ save_and_show_results(save_as=dist_path, dpi=dpi, show=show)
345
+
346
+
347
+ def plot_system_indicators_multiple_comparison(
348
+ groups_data: Dict[str, Dict[str, List[float]]],
349
+ group_names: Optional[List[str]] = None,
350
+ subplot_titles: Optional[List[str]] = None,
351
+ x_values: Optional[List] = None,
352
+ x_label: str = "Time (t)",
353
+ legend_loc: str = 'lower right',
354
+ save_as: Optional[str] = None,
355
+ figsize: Optional[tuple] = None,
356
+ dpi: int = 300,
357
+ custom_colors: Optional[Dict[str, str]] = None,
358
+ show: bool = True,
359
+ style: Optional[str] = None
360
+ ) -> None:
361
+ """
362
+ Plot system-level indicators comparison across multiple groups using dual y-axis design.
363
+
364
+ Parameters:
365
+ -----------
366
+ groups_data : Dict[str, Dict[str, List[float]]]
367
+ Dictionary with group names as keys and data dictionaries as values.
368
+ Each data dict should contain 'suffix_counts', 'merging_factors', and 'js_convergence'.
369
+ Example: {
370
+ "Group1": {
371
+ "suffix_counts": [10, 15, 20, ...],
372
+ "merging_factors": [1.2, 1.5, 1.8, ...],
373
+ "js_convergence": [0.1, 0.2, 0.15, ...]
374
+ },
375
+ "Group2": {...}
376
+ }
377
+ group_names : Optional[List[str]]
378
+ Custom names for groups. If None, uses keys from groups_data.
379
+ Used for default subplot titles if subplot_titles is not provided.
380
+ subplot_titles : Optional[List[str]]
381
+ Custom titles for each subplot. If None, uses default format:
382
+ "{group_name} - System-Level Trajectory Indicators: Raw vs. Normalized"
383
+ x_values : Optional[List]
384
+ Custom x-axis values. If None, uses 1, 2, 3, ...
385
+ x_label : str
386
+ Label for x-axis. Default: "Time (t)"
387
+ legend_loc : str
388
+ Legend location. Options: 'upper left', 'upper right', 'lower left',
389
+ 'lower right', 'center', 'best', etc. Default: 'lower right'
390
+ save_as : Optional[str]
391
+ File path to save the plot (without extension)
392
+ figsize : Optional[tuple]
393
+ Figure size (width, height). If None, auto-calculated based on number of groups
394
+ dpi : int
395
+ DPI for saving. Default: 300
396
+ custom_colors : Optional[Dict[str, str]]
397
+ Custom colors for indicators. Default uses standard colors.
398
+ show : bool
399
+ Whether to show the plot. Default: True
400
+ style : Optional[str]
401
+ Style to apply. Seaborn styles ('whitegrid', 'darkgrid', 'white', 'dark', 'ticks')
402
+ or matplotlib styles. If None, uses default style. Default: None
403
+
404
+ Example:
405
+ --------
406
+ >>> data = {
407
+ ... "India": {
408
+ ... "suffix_counts": india_suffix_counts,
409
+ ... "merging_factors": india_merging_factors,
410
+ ... "js_convergence": india_js_scores
411
+ ... },
412
+ ... "US": {
413
+ ... "suffix_counts": us_suffix_counts,
414
+ ... "merging_factors": us_merging_factors,
415
+ ... "js_convergence": us_js_scores
416
+ ... }
417
+ ... }
418
+ >>> plot_system_indicators_multiple_comparison(
419
+ ... groups_data=data,
420
+ ... x_label="Years",
421
+ ... legend_loc='upper right',
422
+ ... save_as="multi_country_comparison"
423
+ ... )
424
+
425
+ >>> # With custom subplot titles
426
+ >>> plot_system_indicators_multiple_comparison(
427
+ ... groups_data=data,
428
+ ... subplot_titles=["印度发展轨迹", "美国发展轨迹"],
429
+ ... x_label="年份",
430
+ ... save_as="custom_titles_comparison"
431
+ ... )
432
+ """
433
+
434
+ # Validate input
435
+ if not groups_data:
436
+ raise ValueError("groups_data cannot be empty")
437
+
438
+ # Get group names
439
+ if group_names is None:
440
+ group_names = list(groups_data.keys())
441
+
442
+ if len(group_names) != len(groups_data):
443
+ raise ValueError("Length of group_names must match number of groups in groups_data")
444
+
445
+ # Validate subplot_titles
446
+ if subplot_titles is not None and len(subplot_titles) != len(groups_data):
447
+ raise ValueError("Length of subplot_titles must match number of groups in groups_data")
448
+
449
+ # Get first group to determine data length
450
+ first_group_data = list(groups_data.values())[0]
451
+ T = len(first_group_data['suffix_counts'])
452
+
453
+ # Set x values
454
+ if x_values is None:
455
+ x_values = list(range(1, T + 1))
456
+
457
+ if len(x_values) != T:
458
+ raise ValueError("Length of x_values must match data length")
459
+
460
+ # Color settings
461
+ color_defaults = {
462
+ "Suffix Count": "#6BB6FF", # Soft sky blue (like Monet's water lilies)
463
+ "Merging Factor": "#FFB347", # Warm peach/coral (like sunset reflections)
464
+ "JS Convergence": "#98D8C8", # Soft mint green (convergence = coming together)
465
+ }
466
+ colors = {**color_defaults, **(custom_colors or {})}
467
+
468
+ # Calculate figure size
469
+ n_groups = len(groups_data)
470
+ if figsize is None:
471
+ figsize = (12, 4 * n_groups + 2) # Dynamic height based on number of groups
472
+
473
+ # Apply style if specified
474
+ if style is not None:
475
+ # Check if it's a seaborn style
476
+ seaborn_styles = ['whitegrid', 'darkgrid', 'white', 'dark', 'ticks']
477
+ if style in seaborn_styles:
478
+ sns.set_style(style)
479
+ else:
480
+ plt.style.use(style)
481
+
482
+ # Create subplots
483
+ fig, axes = plt.subplots(n_groups, 1, figsize=figsize)
484
+
485
+ # Handle single group case
486
+ if n_groups == 1:
487
+ axes = [axes]
488
+
489
+ # Plot each group
490
+ for i, (group_key, group_name) in enumerate(zip(groups_data.keys(), group_names)):
491
+ data = groups_data[group_key]
492
+ ax = axes[i]
493
+
494
+ # Validate data completeness
495
+ required_keys = ['suffix_counts', 'merging_factors', 'js_convergence']
496
+ for key in required_keys:
497
+ if key not in data:
498
+ raise ValueError(f"Missing '{key}' in data for group '{group_key}'")
499
+
500
+ # Normalize data (z-score)
501
+ mf_z = zscore(array(data['merging_factors']))
502
+ js_z = zscore(array(data['js_convergence']))
503
+
504
+ # Left y-axis: raw suffix counts
505
+ ax.set_ylabel("Suffix Count", color=colors["Suffix Count"])
506
+ ax.plot(x_values, data['suffix_counts'], marker='o',
507
+ color=colors["Suffix Count"], label="Suffix Count")
508
+ ax.tick_params(axis='y', labelcolor=colors["Suffix Count"])
509
+
510
+ # Right y-axis: normalized indicators
511
+ ax_twin = ax.twinx()
512
+ ax_twin.set_ylabel("Z-score (Other Indicators)")
513
+ ax_twin.plot(x_values, mf_z, marker='s',
514
+ label='Merging Factor (z)', color=colors["Merging Factor"])
515
+ ax_twin.plot(x_values, js_z, marker='^',
516
+ label='JS Convergence (z)', color=colors["JS Convergence"])
517
+
518
+ # Legend
519
+ lines1, labels1 = ax.get_legend_handles_labels()
520
+ lines2, labels2 = ax_twin.get_legend_handles_labels()
521
+ ax_twin.legend(lines1 + lines2, labels1 + labels2, loc=legend_loc)
522
+
523
+ # Title and labels
524
+ if subplot_titles is not None:
525
+ title = subplot_titles[i]
526
+ else:
527
+ title = f"{group_name} - System-Level Trajectory Indicators: Raw vs. Normalized"
528
+ ax.set_title(title)
529
+
530
+ # Only set x-label for the bottom subplot
531
+ if i == n_groups - 1:
532
+ ax.set_xlabel(x_label)
533
+
534
+ plt.tight_layout()
535
+ save_and_show_results(save_as=save_as, dpi=dpi, show=show)
@@ -0,0 +1,56 @@
1
+ """
2
+ @Author : Yuqi Liang 梁彧祺
3
+ @File : utils.py
4
+ @Time : 08/08/2025 12:26
5
+ @Desc :
6
+ Utility functions for suffix tree convergence analysis,
7
+ including sequence extraction and state space processing.
8
+ """
9
+ import pandas as pd
10
+ from typing import List, Tuple
11
+
12
+
13
+ def extract_sequences(df: pd.DataFrame, time_cols: List[str]) -> List[List[str]]:
14
+ """
15
+ Efficiently extracts sequences from specified time columns.
16
+
17
+ Parameters:
18
+ df (pd.DataFrame): Input DataFrame.
19
+ time_cols (List[str]): Columns representing the sequence over time.
20
+
21
+ Returns:
22
+ List[List[str]]: List of sequences (each sequence is a list of states).
23
+ """
24
+ return df[time_cols].values.tolist()
25
+
26
+
27
+ def get_state_space(sequences: List[List[str]]) -> List[str]:
28
+ """
29
+ Efficiently extracts unique states from a list of sequences.
30
+
31
+ Parameters:
32
+ sequences (List[List[str]]): Sequence data.
33
+
34
+ Returns:
35
+ List[str]: Sorted list of unique states.
36
+ """
37
+ seen = set()
38
+ for seq in sequences:
39
+ seen.update(seq)
40
+ return sorted(seen)
41
+
42
+
43
+ def convert_to_suffix_tree_data(df: pd.DataFrame, time_cols: List[str]) -> Tuple[List[List[str]], List[str]]:
44
+ """
45
+ Wrapper to extract sequences and their state space from a DataFrame for suffix tree analysis.
46
+
47
+ Parameters:
48
+ df (pd.DataFrame): Input DataFrame.
49
+ time_cols (List[str]): Sequence columns (e.g., ['C1', ..., 'C10'])
50
+
51
+ Returns:
52
+ Tuple[List[List[str]], List[str]]: sequences, unique states
53
+ """
54
+ sequences = df[time_cols].values.tolist()
55
+ states = get_state_space(sequences)
56
+ return sequences, states