sequenzo 0.1.21__cp310-cp310-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sequenzo might be problematic. Click here for more details.

Files changed (260) hide show
  1. sequenzo/__init__.py +240 -0
  2. sequenzo/big_data/__init__.py +12 -0
  3. sequenzo/big_data/clara/__init__.py +26 -0
  4. sequenzo/big_data/clara/clara.py +467 -0
  5. sequenzo/big_data/clara/utils/__init__.py +27 -0
  6. sequenzo/big_data/clara/utils/aggregatecases.py +92 -0
  7. sequenzo/big_data/clara/utils/davies_bouldin.py +91 -0
  8. sequenzo/big_data/clara/utils/get_weighted_diss.cpython-310-darwin.so +0 -0
  9. sequenzo/big_data/clara/utils/wfcmdd.py +205 -0
  10. sequenzo/big_data/clara/visualization.py +88 -0
  11. sequenzo/clustering/KMedoids.py +196 -0
  12. sequenzo/clustering/__init__.py +30 -0
  13. sequenzo/clustering/clustering_c_code.cpython-310-darwin.so +0 -0
  14. sequenzo/clustering/hierarchical_clustering.py +1380 -0
  15. sequenzo/clustering/src/KMedoid.cpp +262 -0
  16. sequenzo/clustering/src/PAM.cpp +236 -0
  17. sequenzo/clustering/src/PAMonce.cpp +234 -0
  18. sequenzo/clustering/src/cluster_quality.cpp +496 -0
  19. sequenzo/clustering/src/cluster_quality.h +128 -0
  20. sequenzo/clustering/src/cluster_quality_backup.cpp +570 -0
  21. sequenzo/clustering/src/module.cpp +228 -0
  22. sequenzo/clustering/src/weightedinertia.cpp +111 -0
  23. sequenzo/clustering/utils/__init__.py +27 -0
  24. sequenzo/clustering/utils/disscenter.py +122 -0
  25. sequenzo/data_preprocessing/__init__.py +20 -0
  26. sequenzo/data_preprocessing/helpers.py +256 -0
  27. sequenzo/datasets/__init__.py +41 -0
  28. sequenzo/datasets/biofam.csv +2001 -0
  29. sequenzo/datasets/biofam_child_domain.csv +2001 -0
  30. sequenzo/datasets/biofam_left_domain.csv +2001 -0
  31. sequenzo/datasets/biofam_married_domain.csv +2001 -0
  32. sequenzo/datasets/chinese_colonial_territories.csv +12 -0
  33. sequenzo/datasets/country_co2_emissions.csv +194 -0
  34. sequenzo/datasets/country_co2_emissions_global_deciles.csv +195 -0
  35. sequenzo/datasets/country_co2_emissions_global_quintiles.csv +195 -0
  36. sequenzo/datasets/country_co2_emissions_local_deciles.csv +195 -0
  37. sequenzo/datasets/country_co2_emissions_local_quintiles.csv +195 -0
  38. sequenzo/datasets/country_gdp_per_capita.csv +194 -0
  39. sequenzo/datasets/mvad.csv +713 -0
  40. sequenzo/datasets/pairfam_family.csv +1867 -0
  41. sequenzo/datasets/polyadic_samplec1.csv +61 -0
  42. sequenzo/datasets/polyadic_samplep1.csv +61 -0
  43. sequenzo/datasets/polyadic_seqc1.csv +61 -0
  44. sequenzo/datasets/polyadic_seqp1.csv +61 -0
  45. sequenzo/define_sequence_data.py +609 -0
  46. sequenzo/dissimilarity_measures/__init__.py +31 -0
  47. sequenzo/dissimilarity_measures/c_code.cpython-310-darwin.so +0 -0
  48. sequenzo/dissimilarity_measures/get_distance_matrix.py +702 -0
  49. sequenzo/dissimilarity_measures/get_substitution_cost_matrix.py +241 -0
  50. sequenzo/dissimilarity_measures/src/DHDdistance.cpp +148 -0
  51. sequenzo/dissimilarity_measures/src/LCPdistance.cpp +114 -0
  52. sequenzo/dissimilarity_measures/src/OMdistance.cpp +247 -0
  53. sequenzo/dissimilarity_measures/src/OMspellDistance.cpp +281 -0
  54. sequenzo/dissimilarity_measures/src/__init__.py +0 -0
  55. sequenzo/dissimilarity_measures/src/dist2matrix.cpp +63 -0
  56. sequenzo/dissimilarity_measures/src/dp_utils.h +160 -0
  57. sequenzo/dissimilarity_measures/src/module.cpp +34 -0
  58. sequenzo/dissimilarity_measures/src/setup.py +30 -0
  59. sequenzo/dissimilarity_measures/src/utils.h +25 -0
  60. sequenzo/dissimilarity_measures/src/xsimd/.github/cmake-test/main.cpp +6 -0
  61. sequenzo/dissimilarity_measures/src/xsimd/benchmark/main.cpp +159 -0
  62. sequenzo/dissimilarity_measures/src/xsimd/benchmark/xsimd_benchmark.hpp +565 -0
  63. sequenzo/dissimilarity_measures/src/xsimd/docs/source/conf.py +37 -0
  64. sequenzo/dissimilarity_measures/src/xsimd/examples/mandelbrot.cpp +330 -0
  65. sequenzo/dissimilarity_measures/src/xsimd/examples/pico_bench.hpp +246 -0
  66. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_arithmetic.hpp +266 -0
  67. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_complex.hpp +112 -0
  68. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_details.hpp +323 -0
  69. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_logical.hpp +218 -0
  70. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_math.hpp +2583 -0
  71. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_memory.hpp +880 -0
  72. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_rounding.hpp +72 -0
  73. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_swizzle.hpp +174 -0
  74. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_trigo.hpp +978 -0
  75. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx.hpp +1924 -0
  76. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx2.hpp +1144 -0
  77. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512bw.hpp +656 -0
  78. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512cd.hpp +28 -0
  79. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512dq.hpp +244 -0
  80. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512er.hpp +20 -0
  81. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512f.hpp +2650 -0
  82. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512ifma.hpp +20 -0
  83. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512pf.hpp +20 -0
  84. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vbmi.hpp +77 -0
  85. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vbmi2.hpp +131 -0
  86. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vnni_avx512bw.hpp +20 -0
  87. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vnni_avx512vbmi2.hpp +20 -0
  88. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avxvnni.hpp +20 -0
  89. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_common.hpp +24 -0
  90. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_common_fwd.hpp +77 -0
  91. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_constants.hpp +393 -0
  92. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_emulated.hpp +788 -0
  93. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma3_avx.hpp +93 -0
  94. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma3_avx2.hpp +46 -0
  95. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma3_sse.hpp +97 -0
  96. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma4.hpp +92 -0
  97. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_i8mm_neon64.hpp +17 -0
  98. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_isa.hpp +142 -0
  99. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_neon.hpp +3142 -0
  100. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_neon64.hpp +1543 -0
  101. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_rvv.hpp +1513 -0
  102. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_scalar.hpp +1260 -0
  103. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse2.hpp +2024 -0
  104. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse3.hpp +67 -0
  105. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse4_1.hpp +339 -0
  106. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse4_2.hpp +44 -0
  107. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_ssse3.hpp +186 -0
  108. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sve.hpp +1155 -0
  109. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_vsx.hpp +892 -0
  110. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_wasm.hpp +1780 -0
  111. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_arch.hpp +240 -0
  112. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_config.hpp +484 -0
  113. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_cpuid.hpp +269 -0
  114. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_inline.hpp +27 -0
  115. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/math/xsimd_rem_pio2.hpp +719 -0
  116. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/memory/xsimd_aligned_allocator.hpp +349 -0
  117. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/memory/xsimd_alignment.hpp +91 -0
  118. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_all_registers.hpp +55 -0
  119. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_api.hpp +2765 -0
  120. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx2_register.hpp +44 -0
  121. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512bw_register.hpp +51 -0
  122. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512cd_register.hpp +51 -0
  123. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512dq_register.hpp +51 -0
  124. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512er_register.hpp +51 -0
  125. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512f_register.hpp +77 -0
  126. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512ifma_register.hpp +51 -0
  127. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512pf_register.hpp +51 -0
  128. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512vbmi2_register.hpp +51 -0
  129. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512vbmi_register.hpp +51 -0
  130. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512vnni_avx512bw_register.hpp +54 -0
  131. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512vnni_avx512vbmi2_register.hpp +53 -0
  132. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx_register.hpp +64 -0
  133. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avxvnni_register.hpp +44 -0
  134. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_batch.hpp +1524 -0
  135. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_batch_constant.hpp +300 -0
  136. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_common_arch.hpp +47 -0
  137. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_emulated_register.hpp +80 -0
  138. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_fma3_avx2_register.hpp +50 -0
  139. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_fma3_avx_register.hpp +50 -0
  140. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_fma3_sse_register.hpp +50 -0
  141. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_fma4_register.hpp +50 -0
  142. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_i8mm_neon64_register.hpp +55 -0
  143. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_neon64_register.hpp +55 -0
  144. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_neon_register.hpp +154 -0
  145. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_register.hpp +94 -0
  146. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_rvv_register.hpp +506 -0
  147. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sse2_register.hpp +59 -0
  148. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sse3_register.hpp +49 -0
  149. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sse4_1_register.hpp +48 -0
  150. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sse4_2_register.hpp +48 -0
  151. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_ssse3_register.hpp +48 -0
  152. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sve_register.hpp +156 -0
  153. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_traits.hpp +337 -0
  154. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_utils.hpp +536 -0
  155. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_vsx_register.hpp +77 -0
  156. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_wasm_register.hpp +59 -0
  157. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/xsimd.hpp +75 -0
  158. sequenzo/dissimilarity_measures/src/xsimd/test/architectures/dummy.cpp +7 -0
  159. sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set.cpp +13 -0
  160. sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set_mean.cpp +24 -0
  161. sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set_mean_aligned.cpp +25 -0
  162. sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set_mean_arch_independent.cpp +28 -0
  163. sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set_mean_tag_dispatch.cpp +25 -0
  164. sequenzo/dissimilarity_measures/src/xsimd/test/doc/manipulating_abstract_batches.cpp +7 -0
  165. sequenzo/dissimilarity_measures/src/xsimd/test/doc/manipulating_parametric_batches.cpp +8 -0
  166. sequenzo/dissimilarity_measures/src/xsimd/test/doc/sum.hpp +31 -0
  167. sequenzo/dissimilarity_measures/src/xsimd/test/doc/sum_avx2.cpp +3 -0
  168. sequenzo/dissimilarity_measures/src/xsimd/test/doc/sum_sse2.cpp +3 -0
  169. sequenzo/dissimilarity_measures/src/xsimd/test/doc/writing_vectorized_code.cpp +11 -0
  170. sequenzo/dissimilarity_measures/src/xsimd/test/main.cpp +31 -0
  171. sequenzo/dissimilarity_measures/src/xsimd/test/test_api.cpp +230 -0
  172. sequenzo/dissimilarity_measures/src/xsimd/test/test_arch.cpp +217 -0
  173. sequenzo/dissimilarity_measures/src/xsimd/test/test_basic_math.cpp +183 -0
  174. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch.cpp +1049 -0
  175. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_bool.cpp +508 -0
  176. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_cast.cpp +409 -0
  177. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_complex.cpp +712 -0
  178. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_constant.cpp +286 -0
  179. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_float.cpp +141 -0
  180. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_int.cpp +365 -0
  181. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_manip.cpp +308 -0
  182. sequenzo/dissimilarity_measures/src/xsimd/test/test_bitwise_cast.cpp +222 -0
  183. sequenzo/dissimilarity_measures/src/xsimd/test/test_complex_exponential.cpp +226 -0
  184. sequenzo/dissimilarity_measures/src/xsimd/test/test_complex_hyperbolic.cpp +183 -0
  185. sequenzo/dissimilarity_measures/src/xsimd/test/test_complex_power.cpp +265 -0
  186. sequenzo/dissimilarity_measures/src/xsimd/test/test_complex_trigonometric.cpp +236 -0
  187. sequenzo/dissimilarity_measures/src/xsimd/test/test_conversion.cpp +248 -0
  188. sequenzo/dissimilarity_measures/src/xsimd/test/test_custom_default_arch.cpp +28 -0
  189. sequenzo/dissimilarity_measures/src/xsimd/test/test_error_gamma.cpp +170 -0
  190. sequenzo/dissimilarity_measures/src/xsimd/test/test_explicit_batch_instantiation.cpp +32 -0
  191. sequenzo/dissimilarity_measures/src/xsimd/test/test_exponential.cpp +202 -0
  192. sequenzo/dissimilarity_measures/src/xsimd/test/test_extract_pair.cpp +92 -0
  193. sequenzo/dissimilarity_measures/src/xsimd/test/test_fp_manipulation.cpp +77 -0
  194. sequenzo/dissimilarity_measures/src/xsimd/test/test_gnu_source.cpp +30 -0
  195. sequenzo/dissimilarity_measures/src/xsimd/test/test_hyperbolic.cpp +167 -0
  196. sequenzo/dissimilarity_measures/src/xsimd/test/test_load_store.cpp +304 -0
  197. sequenzo/dissimilarity_measures/src/xsimd/test/test_memory.cpp +61 -0
  198. sequenzo/dissimilarity_measures/src/xsimd/test/test_poly_evaluation.cpp +64 -0
  199. sequenzo/dissimilarity_measures/src/xsimd/test/test_power.cpp +184 -0
  200. sequenzo/dissimilarity_measures/src/xsimd/test/test_rounding.cpp +199 -0
  201. sequenzo/dissimilarity_measures/src/xsimd/test/test_select.cpp +101 -0
  202. sequenzo/dissimilarity_measures/src/xsimd/test/test_shuffle.cpp +760 -0
  203. sequenzo/dissimilarity_measures/src/xsimd/test/test_sum.cpp +4 -0
  204. sequenzo/dissimilarity_measures/src/xsimd/test/test_sum.hpp +34 -0
  205. sequenzo/dissimilarity_measures/src/xsimd/test/test_traits.cpp +172 -0
  206. sequenzo/dissimilarity_measures/src/xsimd/test/test_trigonometric.cpp +208 -0
  207. sequenzo/dissimilarity_measures/src/xsimd/test/test_utils.hpp +611 -0
  208. sequenzo/dissimilarity_measures/src/xsimd/test/test_wasm/test_wasm_playwright.py +123 -0
  209. sequenzo/dissimilarity_measures/src/xsimd/test/test_xsimd_api.cpp +1460 -0
  210. sequenzo/dissimilarity_measures/utils/__init__.py +16 -0
  211. sequenzo/dissimilarity_measures/utils/get_LCP_length_for_2_seq.py +44 -0
  212. sequenzo/dissimilarity_measures/utils/get_sm_trate_substitution_cost_matrix.cpython-310-darwin.so +0 -0
  213. sequenzo/dissimilarity_measures/utils/seqconc.cpython-310-darwin.so +0 -0
  214. sequenzo/dissimilarity_measures/utils/seqdss.cpython-310-darwin.so +0 -0
  215. sequenzo/dissimilarity_measures/utils/seqdur.cpython-310-darwin.so +0 -0
  216. sequenzo/dissimilarity_measures/utils/seqlength.cpython-310-darwin.so +0 -0
  217. sequenzo/multidomain/__init__.py +23 -0
  218. sequenzo/multidomain/association_between_domains.py +311 -0
  219. sequenzo/multidomain/cat.py +431 -0
  220. sequenzo/multidomain/combt.py +519 -0
  221. sequenzo/multidomain/dat.py +89 -0
  222. sequenzo/multidomain/idcd.py +139 -0
  223. sequenzo/multidomain/linked_polyad.py +292 -0
  224. sequenzo/openmp_setup.py +233 -0
  225. sequenzo/prefix_tree/__init__.py +43 -0
  226. sequenzo/prefix_tree/individual_level_indicators.py +1274 -0
  227. sequenzo/prefix_tree/system_level_indicators.py +465 -0
  228. sequenzo/prefix_tree/utils.py +54 -0
  229. sequenzo/sequence_characteristics/__init__.py +40 -0
  230. sequenzo/sequence_characteristics/complexity_index.py +49 -0
  231. sequenzo/sequence_characteristics/overall_cross_sectional_entropy.py +220 -0
  232. sequenzo/sequence_characteristics/plot_characteristics.py +593 -0
  233. sequenzo/sequence_characteristics/simple_characteristics.py +311 -0
  234. sequenzo/sequence_characteristics/state_frequencies_and_entropy_per_sequence.py +39 -0
  235. sequenzo/sequence_characteristics/turbulence.py +155 -0
  236. sequenzo/sequence_characteristics/variance_of_spell_durations.py +86 -0
  237. sequenzo/sequence_characteristics/within_sequence_entropy.py +43 -0
  238. sequenzo/suffix_tree/__init__.py +48 -0
  239. sequenzo/suffix_tree/individual_level_indicators.py +1638 -0
  240. sequenzo/suffix_tree/system_level_indicators.py +456 -0
  241. sequenzo/suffix_tree/utils.py +56 -0
  242. sequenzo/visualization/__init__.py +29 -0
  243. sequenzo/visualization/plot_mean_time.py +194 -0
  244. sequenzo/visualization/plot_modal_state.py +276 -0
  245. sequenzo/visualization/plot_most_frequent_sequences.py +147 -0
  246. sequenzo/visualization/plot_relative_frequency.py +404 -0
  247. sequenzo/visualization/plot_sequence_index.py +937 -0
  248. sequenzo/visualization/plot_single_medoid.py +153 -0
  249. sequenzo/visualization/plot_state_distribution.py +613 -0
  250. sequenzo/visualization/plot_transition_matrix.py +190 -0
  251. sequenzo/visualization/utils/__init__.py +23 -0
  252. sequenzo/visualization/utils/utils.py +310 -0
  253. sequenzo/with_event_history_analysis/__init__.py +35 -0
  254. sequenzo/with_event_history_analysis/sequence_analysis_multi_state_model.py +850 -0
  255. sequenzo/with_event_history_analysis/sequence_history_analysis.py +283 -0
  256. sequenzo-0.1.21.dist-info/METADATA +308 -0
  257. sequenzo-0.1.21.dist-info/RECORD +254 -0
  258. sequenzo-0.1.21.dist-info/WHEEL +5 -0
  259. sequenzo-0.1.21.dist-info/licenses/LICENSE +28 -0
  260. sequenzo-0.1.21.dist-info/top_level.txt +1 -0
@@ -0,0 +1,465 @@
1
+ """
2
+ @Author : Yuqi Liang 梁彧祺
3
+ @File : system_level_indicators.py
4
+ @Time : 02/05/2025 11:06
5
+ @Desc :
6
+ This module includes tools for building prefix trees, computing prefix counts, branching factors, and Jensen-Shannon divergence,
7
+ as well as generating composite scores to summarize system-level sequence diversity and complexity over time.
8
+ Visualization functions are also provided to plot these indicators and their distributions,
9
+ supporting comprehensive analysis of sequence system dynamics.
10
+ """
11
+ from collections import defaultdict, Counter
12
+ import numpy as np
13
+ from scipy.stats import zscore
14
+ from numpy import array
15
+ from scipy.spatial.distance import jensenshannon
16
+
17
+ from sequenzo.visualization.utils import save_and_show_results
18
+ import matplotlib.pyplot as plt
19
+ import seaborn as sns
20
+ from typing import List, Optional, Dict
21
+
22
+
23
+ class PrefixTree:
24
+ def __init__(self):
25
+ self.root = {}
26
+ self.counts = defaultdict(int) # prefix -> count
27
+ self.total_sequences = 0
28
+
29
+ def insert(self, sequence):
30
+ prefix = []
31
+ node = self.root
32
+ for state in sequence:
33
+ prefix.append(state)
34
+ key = tuple(prefix)
35
+ self.counts[key] += 1
36
+ if state not in node:
37
+ node[state] = {}
38
+ node = node[state]
39
+
40
+ def get_prefixes_at_depth(self, depth):
41
+ return [k for k in self.counts if len(k) == depth]
42
+
43
+ def get_children(self, prefix):
44
+ """
45
+ Given a prefix (as a list or tuple), return its immediate children in the tree.
46
+
47
+ Returns:
48
+ dict: mapping from child state -> subtree dict
49
+ """
50
+ node = self.root
51
+ for state in prefix:
52
+ node = node.get(state, {})
53
+ return node
54
+
55
+ def get_children_count(self, prefix):
56
+ node = self.root
57
+ for state in prefix:
58
+ node = node.get(state, {})
59
+ return len(node)
60
+
61
+ def describe(self):
62
+ depths = [len(k) for k in self.counts.keys()]
63
+ max_depth = max(depths) if depths else 0
64
+ total_prefixes = len(self.counts)
65
+ print("\n[PrefixTree Overview]")
66
+ print(f"[>] Total sequences inserted: {self.total_sequences}")
67
+ print(f"[>] Max depth (time points): {max_depth}")
68
+ print(f"[>] Total distinct prefixes: {total_prefixes}")
69
+
70
+ for t in range(1, max_depth + 1):
71
+ level_prefixes = self.get_prefixes_at_depth(t)
72
+ print(f" Level {t}: {len(level_prefixes)} unique prefixes")
73
+
74
+ def __repr__(self):
75
+ """
76
+ Returns a brief textual summary of the prefix tree object.
77
+
78
+ Note:
79
+ This method is intended to provide a lightweight, one-line overview
80
+ (e.g., max depth and total prefix count). For a full structural report
81
+ including per-level statistics, use the `.describe()` method instead.
82
+ """
83
+ depths = [len(k) for k in self.counts.keys()]
84
+ return f"PrefixTree(max_depth={max(depths) if depths else 0}, total_prefixes={len(self.counts)})"
85
+
86
+
87
+ def compute_prefix_count(tree, max_depth):
88
+ return [len(tree.get_prefixes_at_depth(t)) for t in range(1, max_depth + 1)]
89
+
90
+
91
+ def compute_branching_factor(tree, max_depth):
92
+ result = []
93
+ for t in range(2, max_depth + 1):
94
+ prefixes = tree.get_prefixes_at_depth(t - 1)
95
+ if not prefixes:
96
+ result.append(0)
97
+ continue
98
+ child_counts = [tree.get_children_count(p) for p in prefixes]
99
+ result.append(np.mean(child_counts))
100
+ return [0] + result # pad to align with prefix count
101
+
102
+
103
+ def compute_js_divergence(sequences, state_set):
104
+ T = len(sequences[0])
105
+ distros = []
106
+ for t in range(T):
107
+ counter = Counter(seq[t] for seq in sequences)
108
+ dist = np.array([counter[s] for s in state_set], dtype=float)
109
+ dist = dist / dist.sum()
110
+ distros.append(dist)
111
+
112
+ js_scores = [0.0]
113
+ for t in range(1, T):
114
+ js = jensenshannon(distros[t], distros[t - 1])
115
+ js_scores.append(js)
116
+ return js_scores
117
+
118
+
119
+ def build_prefix_tree(sequences):
120
+ tree = PrefixTree()
121
+ tree.total_sequences = len(sequences)
122
+ for seq in sequences:
123
+ for t in range(1, len(seq) + 1):
124
+ tree.insert(seq[:t])
125
+ return tree
126
+
127
+
128
+ def plot_system_indicators(
129
+ prefix_counts: List[float],
130
+ branching_factors: List[float],
131
+ js_divergence: Optional[List[float]] = None,
132
+ x_values: Optional[List] = None,
133
+ x_label: str = "Time (t)",
134
+ legend_loc: str = 'lower right',
135
+ legend_fontsize: int = 10,
136
+ save_as: Optional[str] = None,
137
+ figsize: Optional[tuple] = None,
138
+ dpi: int = 300,
139
+ custom_colors: Optional[Dict[str, str]] = None,
140
+ show: bool = True,
141
+ plot_distributions: bool = False,
142
+ style: Optional[str] = None
143
+ ) -> None:
144
+ """
145
+ Plot a single group's system-level indicators using the same visual style as
146
+ `plot_system_indicators_multiple_comparison`, but for one subplot.
147
+
148
+ Design:
149
+ - Left y-axis: raw Prefix Count
150
+ - Right y-axis: z-score of Branching Factor and (optionally) JS Divergence
151
+ - Consistent colors/markers and legend handling with the multi-comparison API
152
+
153
+ Parameters:
154
+ - prefix_counts: List[float]
155
+ Raw prefix counts per time step
156
+ - branching_factors: List[float]
157
+ Branching factor per time step
158
+ - js_divergence: Optional[List[float]]
159
+ JS divergence per time step; if None, only branching factor is shown on right axis
160
+ - x_values: Optional[List]
161
+ Custom x-axis ticks (e.g., years). If None, uses 1..T. Length must equal data length
162
+ - x_label: str
163
+ Label for x-axis. Default: "Time (t)"
164
+ - legend_loc: str
165
+ Legend location, e.g., 'upper left', 'upper right', 'lower right', 'best', etc. Default: 'lower right'
166
+ - legend_fontsize: int
167
+ Font size for legend text. Default: 10
168
+ - save_as: Optional[str]
169
+ If provided, save the figure to this path (png). DPI controlled by `dpi`
170
+ - figsize: Optional[tuple]
171
+ Figure size (width, height). Default: (12, 6)
172
+ - dpi: int
173
+ Figure DPI when saving. Default: 300
174
+ - custom_colors: Optional[Dict[str, str]]
175
+ Optional color overrides. Keys: "Prefix Count", "Branching Factor", "JS Divergence"
176
+ - show: bool
177
+ Whether to display the figure
178
+ - plot_distributions: bool
179
+ If True, additionally show raw distributions (histograms) of indicators
180
+ - style: Optional[str]
181
+ Matplotlib/seaborn style to apply. Common options: 'whitegrid', 'darkgrid',
182
+ 'white', 'dark', 'ticks'. If None, uses default style. Default: None
183
+
184
+ Example:
185
+ >>> plot_system_indicators(
186
+ ... prefix_counts=india_prefix_counts,
187
+ ... branching_factors=india_branching_factors,
188
+ ... js_divergence=india_js_scores,
189
+ ... x_values=[2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019],
190
+ ... x_label="Year",
191
+ ... legend_loc="lower right",
192
+ ... figsize=(12, 6),
193
+ ... dpi=300,
194
+ ... )
195
+ """
196
+ T = len(prefix_counts)
197
+ # Set x values to align with multi-group API
198
+ if x_values is None:
199
+ x_values = list(range(1, T + 1))
200
+ if len(x_values) != T:
201
+ raise ValueError("Length of x_values must match data length")
202
+
203
+ # Normalize others
204
+ bf_z = zscore(array(branching_factors))
205
+ js_z = zscore(array(js_divergence)) if js_divergence else None
206
+
207
+ color_defaults = {
208
+ "Prefix Count": "#6BB6FF", # Soft sky blue (like Monet's water lilies)
209
+ "Branching Factor": "#FFB347", # Warm peach/coral (like sunset reflections)
210
+ "JS Divergence": "#F4A6CD", # Soft rose pink (divergence = different paths)
211
+ }
212
+ colors = {**color_defaults, **(custom_colors or {})}
213
+
214
+ # --- Main line plot with dual axes ---
215
+ if figsize is None:
216
+ figsize = (12, 6)
217
+
218
+ # Apply style if specified
219
+ if style is not None:
220
+ # Check if it's a seaborn style
221
+ seaborn_styles = ['whitegrid', 'darkgrid', 'white', 'dark', 'ticks']
222
+ if style in seaborn_styles:
223
+ sns.set_style(style)
224
+ else:
225
+ plt.style.use(style)
226
+
227
+ fig, ax1 = plt.subplots(figsize=figsize)
228
+ ax1.set_xlabel(x_label)
229
+ ax1.set_ylabel("Prefix Count", color=colors["Prefix Count"])
230
+ ax1.plot(x_values, prefix_counts, marker='o', color=colors["Prefix Count"], label="Prefix Count")
231
+ ax1.tick_params(axis='y', labelcolor=colors["Prefix Count"])
232
+
233
+ ax2 = ax1.twinx()
234
+ ax2.set_ylabel("Z-score (Other Indicators)")
235
+ ax2.plot(x_values, bf_z, marker='s', label='Branching Factor (z)', color=colors["Branching Factor"])
236
+ if js_z is not None:
237
+ ax2.plot(x_values, js_z, marker='^', label='JS Divergence (z)', color=colors["JS Divergence"])
238
+
239
+ lines1, labels1 = ax1.get_legend_handles_labels()
240
+ lines2, labels2 = ax2.get_legend_handles_labels()
241
+ ax2.legend(lines1 + lines2, labels1 + labels2, loc=legend_loc, fontsize=legend_fontsize)
242
+
243
+ ax1.set_title("System-Level Trajectory Indicators: Raw vs. Normalized")
244
+ fig.tight_layout()
245
+
246
+ save_and_show_results(save_as=save_as, dpi=dpi, show=show)
247
+
248
+ # --- Distribution plots if requested ---
249
+ if plot_distributions:
250
+ raw_data = {
251
+ "Prefix Count": prefix_counts,
252
+ "Branching Factor": branching_factors,
253
+ }
254
+ if js_divergence:
255
+ raw_data["JS Divergence"] = js_divergence
256
+
257
+ n = len(raw_data)
258
+ fig, axes = plt.subplots(1, n, figsize=(4 * n, 4))
259
+ if n == 1:
260
+ axes = [axes]
261
+
262
+ for ax, (label, values) in zip(axes, raw_data.items()):
263
+ sns.histplot(values, kde=True, ax=ax, color=colors.get(label, None))
264
+ ax.set_title(f"{label} Distribution")
265
+ ax.set_xlabel("Value")
266
+ ax.set_ylabel("Density")
267
+
268
+ fig.tight_layout()
269
+ suffix = "_distributions" if save_as else None
270
+ dist_path = save_as.replace(".png", f"{suffix}.png") if save_as else None
271
+ save_and_show_results(save_as=dist_path, dpi=dpi, show=show)
272
+
273
+
274
+ def plot_system_indicators_multiple_comparison(
275
+ groups_data: Dict[str, Dict[str, List[float]]],
276
+ group_names: Optional[List[str]] = None,
277
+ subplot_titles: Optional[List[str]] = None,
278
+ x_values: Optional[List] = None,
279
+ x_label: str = "Time (t)",
280
+ legend_loc: str = 'lower right',
281
+ legend_fontsize: int = 10,
282
+ save_as: Optional[str] = None,
283
+ figsize: Optional[tuple] = None,
284
+ dpi: int = 300,
285
+ custom_colors: Optional[Dict[str, str]] = None,
286
+ show: bool = True,
287
+ style: Optional[str] = None
288
+ ) -> None:
289
+ """
290
+ Plot system-level indicators comparison across multiple groups using dual y-axis design.
291
+
292
+ Parameters:
293
+ -----------
294
+ groups_data : Dict[str, Dict[str, List[float]]]
295
+ Dictionary with group names as keys and data dictionaries as values.
296
+ Each data dict should contain 'prefix_counts', 'branching_factors', and 'js_divergence'.
297
+ Example: {
298
+ "Group1": {
299
+ "prefix_counts": [10, 15, 20, ...],
300
+ "branching_factors": [1.2, 1.5, 1.8, ...],
301
+ "js_divergence": [0.1, 0.2, 0.15, ...]
302
+ },
303
+ "Group2": {...}
304
+ }
305
+ group_names : Optional[List[str]]
306
+ Custom names for groups. If None, uses keys from groups_data.
307
+ Used for default subplot titles if subplot_titles is not provided.
308
+ subplot_titles : Optional[List[str]]
309
+ Custom titles for each subplot. If None, uses default format:
310
+ "{group_name} - System-Level Trajectory Indicators: Raw vs. Normalized"
311
+ x_values : Optional[List]
312
+ Custom x-axis values. If None, uses 1, 2, 3, ...
313
+ x_label : str
314
+ Label for x-axis. Default: "Time (t)"
315
+ legend_loc : str
316
+ Legend location. Options: 'upper left', 'upper right', 'lower left',
317
+ 'lower right', 'center', 'best', etc. Default: 'lower right'
318
+ legend_fontsize : int
319
+ Font size for legend text. Default: 10
320
+ save_as : Optional[str]
321
+ File path to save the plot (without extension)
322
+ figsize : Optional[tuple]
323
+ Figure size (width, height). If None, auto-calculated based on number of groups
324
+ dpi : int
325
+ DPI for saving. Default: 300
326
+ custom_colors : Optional[Dict[str, str]]
327
+ Custom colors for indicators. Default uses standard colors.
328
+ show : bool
329
+ Whether to show the plot. Default: True
330
+ style : Optional[str]
331
+ Style to apply. Seaborn styles ('whitegrid', 'darkgrid', 'white', 'dark', 'ticks')
332
+ or matplotlib styles. If None, uses default style. Default: None
333
+
334
+ Example:
335
+ --------
336
+ >>> data = {
337
+ ... "India": {
338
+ ... "prefix_counts": india_prefix_counts,
339
+ ... "branching_factors": india_branching_factors,
340
+ ... "js_divergence": india_js_divergence
341
+ ... },
342
+ ... "US": {
343
+ ... "prefix_counts": us_prefix_counts,
344
+ ... "branching_factors": us_branching_factors,
345
+ ... "js_divergence": us_js_divergence
346
+ ... }
347
+ ... }
348
+ >>> plot_system_indicators_multiple_comparison(
349
+ ... groups_data=data,
350
+ ... x_label="Years",
351
+ ... legend_loc='upper right',
352
+ ... save_as="multi_country_comparison"
353
+ ... )
354
+
355
+ >>> # With custom subplot titles
356
+ >>> plot_system_indicators_multiple_comparison(
357
+ ... groups_data=data,
358
+ ... subplot_titles=["印度发展轨迹", "美国发展轨迹"],
359
+ ... x_label="年份",
360
+ ... save_as="custom_titles_comparison"
361
+ ... )
362
+ """
363
+
364
+ # Validate input
365
+ if not groups_data:
366
+ raise ValueError("groups_data cannot be empty")
367
+
368
+ # Get group names
369
+ if group_names is None:
370
+ group_names = list(groups_data.keys())
371
+
372
+ if len(group_names) != len(groups_data):
373
+ raise ValueError("Length of group_names must match number of groups in groups_data")
374
+
375
+ # Validate subplot_titles
376
+ if subplot_titles is not None and len(subplot_titles) != len(groups_data):
377
+ raise ValueError("Length of subplot_titles must match number of groups in groups_data")
378
+
379
+ # Get first group to determine data length
380
+ first_group_data = list(groups_data.values())[0]
381
+ T = len(first_group_data['prefix_counts'])
382
+
383
+ # Set x values
384
+ if x_values is None:
385
+ x_values = list(range(1, T + 1))
386
+
387
+ if len(x_values) != T:
388
+ raise ValueError("Length of x_values must match data length")
389
+
390
+ # Color settings - Monet-inspired watercolor palette for divergence analysis
391
+ color_defaults = {
392
+ "Prefix Count": "#6BB6FF", # Soft sky blue (like Monet's water lilies)
393
+ "Branching Factor": "#FFB347", # Warm peach/coral (like sunset reflections)
394
+ "JS Divergence": "#F4A6CD", # Soft rose pink (divergence = different paths)
395
+ }
396
+ colors = {**color_defaults, **(custom_colors or {})}
397
+
398
+ # Calculate figure size
399
+ n_groups = len(groups_data)
400
+ if figsize is None:
401
+ figsize = (12, 4 * n_groups + 2) # Dynamic height based on number of groups
402
+
403
+ # Apply style if specified
404
+ if style is not None:
405
+ # Check if it's a seaborn style
406
+ seaborn_styles = ['whitegrid', 'darkgrid', 'white', 'dark', 'ticks']
407
+ if style in seaborn_styles:
408
+ sns.set_style(style)
409
+ else:
410
+ plt.style.use(style)
411
+
412
+ # Create subplots
413
+ fig, axes = plt.subplots(n_groups, 1, figsize=figsize)
414
+
415
+ # Handle single group case
416
+ if n_groups == 1:
417
+ axes = [axes]
418
+
419
+ # Plot each group
420
+ for i, (group_key, group_name) in enumerate(zip(groups_data.keys(), group_names)):
421
+ data = groups_data[group_key]
422
+ ax = axes[i]
423
+
424
+ # Validate data completeness
425
+ required_keys = ['prefix_counts', 'branching_factors', 'js_divergence']
426
+ for key in required_keys:
427
+ if key not in data:
428
+ raise ValueError(f"Missing '{key}' in data for group '{group_key}'")
429
+
430
+ # Normalize data (z-score)
431
+ bf_z = zscore(array(data['branching_factors']))
432
+ js_z = zscore(array(data['js_divergence']))
433
+
434
+ # Left y-axis: raw prefix counts
435
+ ax.set_ylabel("Prefix Count", color=colors["Prefix Count"])
436
+ ax.plot(x_values, data['prefix_counts'], marker='o',
437
+ color=colors["Prefix Count"], label="Prefix Count")
438
+ ax.tick_params(axis='y', labelcolor=colors["Prefix Count"])
439
+
440
+ # Right y-axis: normalized indicators
441
+ ax_twin = ax.twinx()
442
+ ax_twin.set_ylabel("Z-score (Other Indicators)")
443
+ ax_twin.plot(x_values, bf_z, marker='s',
444
+ label='Branching Factor (z)', color=colors["Branching Factor"])
445
+ ax_twin.plot(x_values, js_z, marker='^',
446
+ label='JS Divergence (z)', color=colors["JS Divergence"])
447
+
448
+ # Legend
449
+ lines1, labels1 = ax.get_legend_handles_labels()
450
+ lines2, labels2 = ax_twin.get_legend_handles_labels()
451
+ ax_twin.legend(lines1 + lines2, labels1 + labels2, loc=legend_loc, fontsize=legend_fontsize)
452
+
453
+ # Title and labels
454
+ if subplot_titles is not None:
455
+ title = subplot_titles[i]
456
+ else:
457
+ title = f"{group_name} - System-Level Trajectory Indicators: Raw vs. Normalized"
458
+ ax.set_title(title)
459
+
460
+ # Only set x-label for the bottom subplot
461
+ if i == n_groups - 1:
462
+ ax.set_xlabel(x_label)
463
+
464
+ plt.tight_layout()
465
+ save_and_show_results(save_as=save_as, dpi=dpi, show=show)
@@ -0,0 +1,54 @@
1
+ """
2
+ @Author : Yuqi Liang 梁彧祺
3
+ @File : utils.py
4
+ @Time : 02/05/2025 12:26
5
+ @Desc :
6
+ """
7
+ import pandas as pd
8
+ from typing import List, Tuple
9
+
10
+
11
+ def extract_sequences(df: pd.DataFrame, time_cols: List[str]) -> List[List[str]]:
12
+ """
13
+ Efficiently extracts sequences from specified time columns.
14
+
15
+ Parameters:
16
+ df (pd.DataFrame): Input DataFrame.
17
+ time_cols (List[str]): Columns representing the sequence over time.
18
+
19
+ Returns:
20
+ List[List[str]]: List of sequences (each sequence is a list of states).
21
+ """
22
+ return df[time_cols].values.tolist()
23
+
24
+
25
+ def get_state_space(sequences: List[List[str]]) -> List[str]:
26
+ """
27
+ Efficiently extracts unique states from a list of sequences.
28
+
29
+ Parameters:
30
+ sequences (List[List[str]]): Sequence data.
31
+
32
+ Returns:
33
+ List[str]: Sorted list of unique states.
34
+ """
35
+ seen = set()
36
+ for seq in sequences:
37
+ seen.update(seq)
38
+ return sorted(seen)
39
+
40
+
41
+ def convert_to_prefix_tree_data(df: pd.DataFrame, time_cols: List[str]) -> Tuple[List[List[str]], List[str]]:
42
+ """
43
+ Wrapper to extract sequences and their state space from a DataFrame.
44
+
45
+ Parameters:
46
+ df (pd.DataFrame): Input DataFrame.
47
+ time_cols (List[str]): Sequence columns (e.g., ['C1', ..., 'C10'])
48
+
49
+ Returns:
50
+ Tuple[List[List[str]], List[str]]: sequences, unique states
51
+ """
52
+ sequences = df[time_cols].values.tolist()
53
+ states = get_state_space(sequences)
54
+ return sequences, states
@@ -0,0 +1,40 @@
1
+ """
2
+ @Author : Yuqi Liang 梁彧祺
3
+ @File : __init__.py
4
+ @Time : 22/09/2025 22:46
5
+ @Desc :
6
+ """
7
+ from .simple_characteristics import (get_subsequences_in_single_sequence,
8
+ get_subsequences_all_sequences,
9
+ get_number_of_transitions)
10
+
11
+ from .state_frequencies_and_entropy_per_sequence import get_state_freq_and_entropy_per_seq
12
+
13
+ from .within_sequence_entropy import get_within_sequence_entropy
14
+
15
+ from .overall_cross_sectional_entropy import get_cross_sectional_entropy
16
+
17
+ from .variance_of_spell_durations import get_spell_duration_variance
18
+
19
+ from .turbulence import get_turbulence
20
+
21
+ from .complexity_index import get_complexity_index
22
+
23
+ from .plot_characteristics import plot_longitudinal_characteristics, plot_cross_sectional_characteristics
24
+
25
+ __all__ = [
26
+ "get_subsequences_in_single_sequence",
27
+ "get_subsequences_all_sequences",
28
+ "get_number_of_transitions",
29
+
30
+ "get_complexity_index",
31
+
32
+ "get_state_freq_and_entropy_per_seq",
33
+ "get_within_sequence_entropy",
34
+ "get_cross_sectional_entropy",
35
+ "get_spell_duration_variance",
36
+ "get_turbulence",
37
+
38
+ "plot_longitudinal_characteristics",
39
+ "plot_cross_sectional_characteristics"
40
+ ]
@@ -0,0 +1,49 @@
1
+ """
2
+ @Author : 李欣怡
3
+ @File : complexity_index.py
4
+ @Time : 2025/9/23 23:45
5
+ @Desc : Complexity index
6
+
7
+ The corresponding function name in TraMineR is seqici.R,
8
+ with the source code available at: https://github.com/cran/TraMineR/blob/master/R/seqici.R
9
+ """
10
+ import os
11
+ from contextlib import redirect_stdout
12
+ import pandas as pd
13
+ import numpy as np
14
+
15
+ from sequenzo.define_sequence_data import SequenceData
16
+ from .simple_characteristics import get_number_of_transitions
17
+ from .within_sequence_entropy import get_within_sequence_entropy
18
+
19
+ def get_complexity_index(seqdata, silent=True):
20
+ if not isinstance(seqdata, SequenceData):
21
+ raise ValueError("[!] data is NOT a sequence object, see SequenceData function to create one.")
22
+
23
+ if not silent:
24
+ print(f"[>] Computing complexity index for {seqdata.seqdata.shape[0]} sequences ...")
25
+
26
+ trans = get_number_of_transitions(seqdata=seqdata, norm=True).iloc[:, 1]
27
+ trans.index = seqdata.seqdata.index
28
+
29
+ with open(os.devnull, 'w') as fnull:
30
+ with redirect_stdout(fnull):
31
+ ient = get_within_sequence_entropy(seqdata=seqdata, norm=True).iloc[:, 1]
32
+ ient.index = seqdata.seqdata.index
33
+
34
+ complxity = np.sqrt(trans * ient)
35
+ complxity = pd.DataFrame(complxity, index=seqdata.seqdata.index, columns=['Complexity Index'])
36
+ complxity = complxity.reset_index().rename(columns={'index': 'ID'})
37
+
38
+ return complxity
39
+
40
+ if __name__ == '__main__':
41
+ from sequenzo import *
42
+
43
+ df = load_dataset("country_co2_emissions")
44
+ _time = list(df.columns)[1:]
45
+ states = ['Very Low', 'Low', 'Middle', 'High', 'Very High']
46
+ sequence_data = SequenceData(df, time=_time, id_col="country", states=states)
47
+
48
+ res = get_complexity_index(sequence_data)
49
+ res