sequenzo 0.1.31__cp310-cp310-macosx_10_9_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (299) hide show
  1. _sequenzo_fastcluster.cpython-310-darwin.so +0 -0
  2. sequenzo/__init__.py +349 -0
  3. sequenzo/big_data/__init__.py +12 -0
  4. sequenzo/big_data/clara/__init__.py +26 -0
  5. sequenzo/big_data/clara/clara.py +476 -0
  6. sequenzo/big_data/clara/utils/__init__.py +27 -0
  7. sequenzo/big_data/clara/utils/aggregatecases.py +92 -0
  8. sequenzo/big_data/clara/utils/davies_bouldin.py +91 -0
  9. sequenzo/big_data/clara/utils/get_weighted_diss.cpython-310-darwin.so +0 -0
  10. sequenzo/big_data/clara/utils/wfcmdd.py +205 -0
  11. sequenzo/big_data/clara/visualization.py +88 -0
  12. sequenzo/clustering/KMedoids.py +178 -0
  13. sequenzo/clustering/__init__.py +30 -0
  14. sequenzo/clustering/clustering_c_code.cpython-310-darwin.so +0 -0
  15. sequenzo/clustering/hierarchical_clustering.py +1256 -0
  16. sequenzo/clustering/sequenzo_fastcluster/fastcluster.py +495 -0
  17. sequenzo/clustering/sequenzo_fastcluster/src/fastcluster.cpp +1877 -0
  18. sequenzo/clustering/sequenzo_fastcluster/src/fastcluster_python.cpp +1264 -0
  19. sequenzo/clustering/src/KMedoid.cpp +263 -0
  20. sequenzo/clustering/src/PAM.cpp +237 -0
  21. sequenzo/clustering/src/PAMonce.cpp +265 -0
  22. sequenzo/clustering/src/cluster_quality.cpp +496 -0
  23. sequenzo/clustering/src/cluster_quality.h +128 -0
  24. sequenzo/clustering/src/cluster_quality_backup.cpp +570 -0
  25. sequenzo/clustering/src/module.cpp +228 -0
  26. sequenzo/clustering/src/weightedinertia.cpp +111 -0
  27. sequenzo/clustering/utils/__init__.py +27 -0
  28. sequenzo/clustering/utils/disscenter.py +122 -0
  29. sequenzo/data_preprocessing/__init__.py +22 -0
  30. sequenzo/data_preprocessing/helpers.py +303 -0
  31. sequenzo/datasets/__init__.py +41 -0
  32. sequenzo/datasets/biofam.csv +2001 -0
  33. sequenzo/datasets/biofam_child_domain.csv +2001 -0
  34. sequenzo/datasets/biofam_left_domain.csv +2001 -0
  35. sequenzo/datasets/biofam_married_domain.csv +2001 -0
  36. sequenzo/datasets/chinese_colonial_territories.csv +12 -0
  37. sequenzo/datasets/country_co2_emissions.csv +194 -0
  38. sequenzo/datasets/country_co2_emissions_global_deciles.csv +195 -0
  39. sequenzo/datasets/country_co2_emissions_global_quintiles.csv +195 -0
  40. sequenzo/datasets/country_co2_emissions_local_deciles.csv +195 -0
  41. sequenzo/datasets/country_co2_emissions_local_quintiles.csv +195 -0
  42. sequenzo/datasets/country_gdp_per_capita.csv +194 -0
  43. sequenzo/datasets/dyadic_children.csv +61 -0
  44. sequenzo/datasets/dyadic_parents.csv +61 -0
  45. sequenzo/datasets/mvad.csv +713 -0
  46. sequenzo/datasets/pairfam_activity_by_month.csv +1028 -0
  47. sequenzo/datasets/pairfam_activity_by_year.csv +1028 -0
  48. sequenzo/datasets/pairfam_family_by_month.csv +1028 -0
  49. sequenzo/datasets/pairfam_family_by_year.csv +1028 -0
  50. sequenzo/datasets/political_science_aid_shock.csv +166 -0
  51. sequenzo/datasets/political_science_donor_fragmentation.csv +157 -0
  52. sequenzo/define_sequence_data.py +1400 -0
  53. sequenzo/dissimilarity_measures/__init__.py +31 -0
  54. sequenzo/dissimilarity_measures/c_code.cpython-310-darwin.so +0 -0
  55. sequenzo/dissimilarity_measures/get_distance_matrix.py +762 -0
  56. sequenzo/dissimilarity_measures/get_substitution_cost_matrix.py +246 -0
  57. sequenzo/dissimilarity_measures/src/DHDdistance.cpp +148 -0
  58. sequenzo/dissimilarity_measures/src/LCPdistance.cpp +114 -0
  59. sequenzo/dissimilarity_measures/src/LCPspellDistance.cpp +215 -0
  60. sequenzo/dissimilarity_measures/src/OMdistance.cpp +247 -0
  61. sequenzo/dissimilarity_measures/src/OMspellDistance.cpp +281 -0
  62. sequenzo/dissimilarity_measures/src/__init__.py +0 -0
  63. sequenzo/dissimilarity_measures/src/dist2matrix.cpp +63 -0
  64. sequenzo/dissimilarity_measures/src/dp_utils.h +160 -0
  65. sequenzo/dissimilarity_measures/src/module.cpp +40 -0
  66. sequenzo/dissimilarity_measures/src/setup.py +30 -0
  67. sequenzo/dissimilarity_measures/src/utils.h +25 -0
  68. sequenzo/dissimilarity_measures/src/xsimd/.github/cmake-test/main.cpp +6 -0
  69. sequenzo/dissimilarity_measures/src/xsimd/benchmark/main.cpp +159 -0
  70. sequenzo/dissimilarity_measures/src/xsimd/benchmark/xsimd_benchmark.hpp +565 -0
  71. sequenzo/dissimilarity_measures/src/xsimd/docs/source/conf.py +37 -0
  72. sequenzo/dissimilarity_measures/src/xsimd/examples/mandelbrot.cpp +330 -0
  73. sequenzo/dissimilarity_measures/src/xsimd/examples/pico_bench.hpp +246 -0
  74. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_arithmetic.hpp +266 -0
  75. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_complex.hpp +112 -0
  76. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_details.hpp +323 -0
  77. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_logical.hpp +218 -0
  78. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_math.hpp +2583 -0
  79. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_memory.hpp +880 -0
  80. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_rounding.hpp +72 -0
  81. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_swizzle.hpp +174 -0
  82. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_trigo.hpp +978 -0
  83. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx.hpp +1924 -0
  84. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx2.hpp +1144 -0
  85. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512bw.hpp +656 -0
  86. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512cd.hpp +28 -0
  87. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512dq.hpp +244 -0
  88. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512er.hpp +20 -0
  89. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512f.hpp +2650 -0
  90. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512ifma.hpp +20 -0
  91. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512pf.hpp +20 -0
  92. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vbmi.hpp +77 -0
  93. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vbmi2.hpp +131 -0
  94. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vnni_avx512bw.hpp +20 -0
  95. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vnni_avx512vbmi2.hpp +20 -0
  96. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avxvnni.hpp +20 -0
  97. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_common.hpp +24 -0
  98. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_common_fwd.hpp +77 -0
  99. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_constants.hpp +393 -0
  100. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_emulated.hpp +788 -0
  101. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma3_avx.hpp +93 -0
  102. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma3_avx2.hpp +46 -0
  103. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma3_sse.hpp +97 -0
  104. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma4.hpp +92 -0
  105. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_i8mm_neon64.hpp +17 -0
  106. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_isa.hpp +142 -0
  107. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_neon.hpp +3142 -0
  108. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_neon64.hpp +1543 -0
  109. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_rvv.hpp +1513 -0
  110. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_scalar.hpp +1260 -0
  111. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse2.hpp +2024 -0
  112. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse3.hpp +67 -0
  113. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse4_1.hpp +339 -0
  114. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse4_2.hpp +44 -0
  115. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_ssse3.hpp +186 -0
  116. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sve.hpp +1155 -0
  117. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_vsx.hpp +892 -0
  118. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_wasm.hpp +1780 -0
  119. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_arch.hpp +240 -0
  120. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_config.hpp +484 -0
  121. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_cpuid.hpp +269 -0
  122. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_inline.hpp +27 -0
  123. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/math/xsimd_rem_pio2.hpp +719 -0
  124. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/memory/xsimd_aligned_allocator.hpp +349 -0
  125. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/memory/xsimd_alignment.hpp +91 -0
  126. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_all_registers.hpp +55 -0
  127. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_api.hpp +2765 -0
  128. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx2_register.hpp +44 -0
  129. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512bw_register.hpp +51 -0
  130. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512cd_register.hpp +51 -0
  131. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512dq_register.hpp +51 -0
  132. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512er_register.hpp +51 -0
  133. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512f_register.hpp +77 -0
  134. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512ifma_register.hpp +51 -0
  135. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512pf_register.hpp +51 -0
  136. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512vbmi2_register.hpp +51 -0
  137. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512vbmi_register.hpp +51 -0
  138. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512vnni_avx512bw_register.hpp +54 -0
  139. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512vnni_avx512vbmi2_register.hpp +53 -0
  140. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx_register.hpp +64 -0
  141. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avxvnni_register.hpp +44 -0
  142. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_batch.hpp +1524 -0
  143. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_batch_constant.hpp +300 -0
  144. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_common_arch.hpp +47 -0
  145. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_emulated_register.hpp +80 -0
  146. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_fma3_avx2_register.hpp +50 -0
  147. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_fma3_avx_register.hpp +50 -0
  148. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_fma3_sse_register.hpp +50 -0
  149. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_fma4_register.hpp +50 -0
  150. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_i8mm_neon64_register.hpp +55 -0
  151. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_neon64_register.hpp +55 -0
  152. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_neon_register.hpp +154 -0
  153. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_register.hpp +94 -0
  154. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_rvv_register.hpp +506 -0
  155. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sse2_register.hpp +59 -0
  156. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sse3_register.hpp +49 -0
  157. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sse4_1_register.hpp +48 -0
  158. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sse4_2_register.hpp +48 -0
  159. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_ssse3_register.hpp +48 -0
  160. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sve_register.hpp +156 -0
  161. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_traits.hpp +337 -0
  162. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_utils.hpp +536 -0
  163. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_vsx_register.hpp +77 -0
  164. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_wasm_register.hpp +59 -0
  165. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/xsimd.hpp +75 -0
  166. sequenzo/dissimilarity_measures/src/xsimd/test/architectures/dummy.cpp +7 -0
  167. sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set.cpp +13 -0
  168. sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set_mean.cpp +24 -0
  169. sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set_mean_aligned.cpp +25 -0
  170. sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set_mean_arch_independent.cpp +28 -0
  171. sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set_mean_tag_dispatch.cpp +25 -0
  172. sequenzo/dissimilarity_measures/src/xsimd/test/doc/manipulating_abstract_batches.cpp +7 -0
  173. sequenzo/dissimilarity_measures/src/xsimd/test/doc/manipulating_parametric_batches.cpp +8 -0
  174. sequenzo/dissimilarity_measures/src/xsimd/test/doc/sum.hpp +31 -0
  175. sequenzo/dissimilarity_measures/src/xsimd/test/doc/sum_avx2.cpp +3 -0
  176. sequenzo/dissimilarity_measures/src/xsimd/test/doc/sum_sse2.cpp +3 -0
  177. sequenzo/dissimilarity_measures/src/xsimd/test/doc/writing_vectorized_code.cpp +11 -0
  178. sequenzo/dissimilarity_measures/src/xsimd/test/main.cpp +31 -0
  179. sequenzo/dissimilarity_measures/src/xsimd/test/test_api.cpp +230 -0
  180. sequenzo/dissimilarity_measures/src/xsimd/test/test_arch.cpp +217 -0
  181. sequenzo/dissimilarity_measures/src/xsimd/test/test_basic_math.cpp +183 -0
  182. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch.cpp +1049 -0
  183. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_bool.cpp +508 -0
  184. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_cast.cpp +409 -0
  185. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_complex.cpp +712 -0
  186. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_constant.cpp +286 -0
  187. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_float.cpp +141 -0
  188. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_int.cpp +365 -0
  189. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_manip.cpp +308 -0
  190. sequenzo/dissimilarity_measures/src/xsimd/test/test_bitwise_cast.cpp +222 -0
  191. sequenzo/dissimilarity_measures/src/xsimd/test/test_complex_exponential.cpp +226 -0
  192. sequenzo/dissimilarity_measures/src/xsimd/test/test_complex_hyperbolic.cpp +183 -0
  193. sequenzo/dissimilarity_measures/src/xsimd/test/test_complex_power.cpp +265 -0
  194. sequenzo/dissimilarity_measures/src/xsimd/test/test_complex_trigonometric.cpp +236 -0
  195. sequenzo/dissimilarity_measures/src/xsimd/test/test_conversion.cpp +248 -0
  196. sequenzo/dissimilarity_measures/src/xsimd/test/test_custom_default_arch.cpp +28 -0
  197. sequenzo/dissimilarity_measures/src/xsimd/test/test_error_gamma.cpp +170 -0
  198. sequenzo/dissimilarity_measures/src/xsimd/test/test_explicit_batch_instantiation.cpp +32 -0
  199. sequenzo/dissimilarity_measures/src/xsimd/test/test_exponential.cpp +202 -0
  200. sequenzo/dissimilarity_measures/src/xsimd/test/test_extract_pair.cpp +92 -0
  201. sequenzo/dissimilarity_measures/src/xsimd/test/test_fp_manipulation.cpp +77 -0
  202. sequenzo/dissimilarity_measures/src/xsimd/test/test_gnu_source.cpp +30 -0
  203. sequenzo/dissimilarity_measures/src/xsimd/test/test_hyperbolic.cpp +167 -0
  204. sequenzo/dissimilarity_measures/src/xsimd/test/test_load_store.cpp +304 -0
  205. sequenzo/dissimilarity_measures/src/xsimd/test/test_memory.cpp +61 -0
  206. sequenzo/dissimilarity_measures/src/xsimd/test/test_poly_evaluation.cpp +64 -0
  207. sequenzo/dissimilarity_measures/src/xsimd/test/test_power.cpp +184 -0
  208. sequenzo/dissimilarity_measures/src/xsimd/test/test_rounding.cpp +199 -0
  209. sequenzo/dissimilarity_measures/src/xsimd/test/test_select.cpp +101 -0
  210. sequenzo/dissimilarity_measures/src/xsimd/test/test_shuffle.cpp +760 -0
  211. sequenzo/dissimilarity_measures/src/xsimd/test/test_sum.cpp +4 -0
  212. sequenzo/dissimilarity_measures/src/xsimd/test/test_sum.hpp +34 -0
  213. sequenzo/dissimilarity_measures/src/xsimd/test/test_traits.cpp +172 -0
  214. sequenzo/dissimilarity_measures/src/xsimd/test/test_trigonometric.cpp +208 -0
  215. sequenzo/dissimilarity_measures/src/xsimd/test/test_utils.hpp +611 -0
  216. sequenzo/dissimilarity_measures/src/xsimd/test/test_wasm/test_wasm_playwright.py +123 -0
  217. sequenzo/dissimilarity_measures/src/xsimd/test/test_xsimd_api.cpp +1460 -0
  218. sequenzo/dissimilarity_measures/utils/__init__.py +16 -0
  219. sequenzo/dissimilarity_measures/utils/get_LCP_length_for_2_seq.py +44 -0
  220. sequenzo/dissimilarity_measures/utils/get_sm_trate_substitution_cost_matrix.cpython-310-darwin.so +0 -0
  221. sequenzo/dissimilarity_measures/utils/seqconc.cpython-310-darwin.so +0 -0
  222. sequenzo/dissimilarity_measures/utils/seqdss.cpython-310-darwin.so +0 -0
  223. sequenzo/dissimilarity_measures/utils/seqdur.cpython-310-darwin.so +0 -0
  224. sequenzo/dissimilarity_measures/utils/seqlength.cpython-310-darwin.so +0 -0
  225. sequenzo/multidomain/__init__.py +23 -0
  226. sequenzo/multidomain/association_between_domains.py +311 -0
  227. sequenzo/multidomain/cat.py +597 -0
  228. sequenzo/multidomain/combt.py +519 -0
  229. sequenzo/multidomain/dat.py +81 -0
  230. sequenzo/multidomain/idcd.py +139 -0
  231. sequenzo/multidomain/linked_polyad.py +292 -0
  232. sequenzo/openmp_setup.py +233 -0
  233. sequenzo/prefix_tree/__init__.py +62 -0
  234. sequenzo/prefix_tree/hub.py +114 -0
  235. sequenzo/prefix_tree/individual_level_indicators.py +1321 -0
  236. sequenzo/prefix_tree/spell_individual_level_indicators.py +580 -0
  237. sequenzo/prefix_tree/spell_level_indicators.py +297 -0
  238. sequenzo/prefix_tree/system_level_indicators.py +544 -0
  239. sequenzo/prefix_tree/utils.py +54 -0
  240. sequenzo/seqhmm/__init__.py +95 -0
  241. sequenzo/seqhmm/advanced_optimization.py +305 -0
  242. sequenzo/seqhmm/bootstrap.py +411 -0
  243. sequenzo/seqhmm/build_hmm.py +142 -0
  244. sequenzo/seqhmm/build_mhmm.py +136 -0
  245. sequenzo/seqhmm/build_nhmm.py +121 -0
  246. sequenzo/seqhmm/fit_mhmm.py +62 -0
  247. sequenzo/seqhmm/fit_model.py +61 -0
  248. sequenzo/seqhmm/fit_nhmm.py +76 -0
  249. sequenzo/seqhmm/formulas.py +289 -0
  250. sequenzo/seqhmm/forward_backward_nhmm.py +276 -0
  251. sequenzo/seqhmm/gradients_nhmm.py +306 -0
  252. sequenzo/seqhmm/hmm.py +291 -0
  253. sequenzo/seqhmm/mhmm.py +314 -0
  254. sequenzo/seqhmm/model_comparison.py +238 -0
  255. sequenzo/seqhmm/multichannel_em.py +282 -0
  256. sequenzo/seqhmm/multichannel_utils.py +138 -0
  257. sequenzo/seqhmm/nhmm.py +270 -0
  258. sequenzo/seqhmm/nhmm_utils.py +191 -0
  259. sequenzo/seqhmm/predict.py +137 -0
  260. sequenzo/seqhmm/predict_mhmm.py +142 -0
  261. sequenzo/seqhmm/simulate.py +878 -0
  262. sequenzo/seqhmm/utils.py +218 -0
  263. sequenzo/seqhmm/visualization.py +910 -0
  264. sequenzo/sequence_characteristics/__init__.py +40 -0
  265. sequenzo/sequence_characteristics/complexity_index.py +49 -0
  266. sequenzo/sequence_characteristics/overall_cross_sectional_entropy.py +220 -0
  267. sequenzo/sequence_characteristics/plot_characteristics.py +593 -0
  268. sequenzo/sequence_characteristics/simple_characteristics.py +311 -0
  269. sequenzo/sequence_characteristics/state_frequencies_and_entropy_per_sequence.py +39 -0
  270. sequenzo/sequence_characteristics/turbulence.py +155 -0
  271. sequenzo/sequence_characteristics/variance_of_spell_durations.py +86 -0
  272. sequenzo/sequence_characteristics/within_sequence_entropy.py +43 -0
  273. sequenzo/suffix_tree/__init__.py +66 -0
  274. sequenzo/suffix_tree/hub.py +114 -0
  275. sequenzo/suffix_tree/individual_level_indicators.py +1679 -0
  276. sequenzo/suffix_tree/spell_individual_level_indicators.py +493 -0
  277. sequenzo/suffix_tree/spell_level_indicators.py +248 -0
  278. sequenzo/suffix_tree/system_level_indicators.py +535 -0
  279. sequenzo/suffix_tree/utils.py +56 -0
  280. sequenzo/version_check.py +283 -0
  281. sequenzo/visualization/__init__.py +29 -0
  282. sequenzo/visualization/plot_mean_time.py +222 -0
  283. sequenzo/visualization/plot_modal_state.py +276 -0
  284. sequenzo/visualization/plot_most_frequent_sequences.py +147 -0
  285. sequenzo/visualization/plot_relative_frequency.py +405 -0
  286. sequenzo/visualization/plot_sequence_index.py +1175 -0
  287. sequenzo/visualization/plot_single_medoid.py +153 -0
  288. sequenzo/visualization/plot_state_distribution.py +651 -0
  289. sequenzo/visualization/plot_transition_matrix.py +190 -0
  290. sequenzo/visualization/utils/__init__.py +23 -0
  291. sequenzo/visualization/utils/utils.py +310 -0
  292. sequenzo/with_event_history_analysis/__init__.py +35 -0
  293. sequenzo/with_event_history_analysis/sequence_analysis_multi_state_model.py +850 -0
  294. sequenzo/with_event_history_analysis/sequence_history_analysis.py +283 -0
  295. sequenzo-0.1.31.dist-info/METADATA +286 -0
  296. sequenzo-0.1.31.dist-info/RECORD +299 -0
  297. sequenzo-0.1.31.dist-info/WHEEL +5 -0
  298. sequenzo-0.1.31.dist-info/licenses/LICENSE +28 -0
  299. sequenzo-0.1.31.dist-info/top_level.txt +2 -0
@@ -0,0 +1,276 @@
1
+ """
2
+ @Author : Yuqi Liang 梁彧祺
3
+ @File : plot_modal_state.py
4
+ @Time : 01/03/2025 13:45
5
+ @Desc :
6
+ """
7
+ import numpy as np
8
+ import pandas as pd
9
+ import matplotlib.pyplot as plt
10
+ from typing import Optional, Union, List
11
+ from sequenzo import SequenceData
12
+ from sequenzo.visualization.utils import (
13
+ set_up_time_labels_for_x_axis,
14
+ create_standalone_legend,
15
+ save_figure_to_buffer,
16
+ combine_plot_with_legend,
17
+ save_and_show_results,
18
+ show_plot_title
19
+ )
20
+ from PIL import Image
21
+
22
+
23
+ def plot_modal_state(seqdata: SequenceData,
24
+ group_by: Optional[Union[str, pd.Series, np.ndarray]] = None,
25
+ group_labels: Optional[List[str]] = None,
26
+ weights="auto",
27
+ xlabel: str = "Time",
28
+ ylabel: str = "Rel. Freq.",
29
+ fig_width: int = 12,
30
+ fig_height: Optional[int] = None,
31
+ show_counts: bool = True,
32
+ show_group_titles: bool = True,
33
+ fontsize: int = 12,
34
+ save_as: Optional[str] = None,
35
+ dpi: int = 200) -> None:
36
+ """
37
+ Creates a modal state frequency plot showing the most frequent state at each position
38
+ and its relative frequency, with optional grouping by a categorical variable.
39
+
40
+ :param seqdata: SequenceData object containing sequence information
41
+ :param group_by: Column name or array with grouping variable
42
+ :param group_labels: Optional custom labels for groups
43
+ :param weights: (np.ndarray or "auto") Weights for sequences. If "auto", uses seqdata.weights if available
44
+ :param xlabel: Label for the x-axis
45
+ :param ylabel: Label for the y-axis
46
+ :param fig_width: Width of the figure
47
+ :param fig_height: Height of the figure (auto-calculated based on groups if None)
48
+ :param show_counts: Whether to show the count of sequences in each group title
49
+ :param save_as: Optional file path to save the plot
50
+ :param dpi: Resolution of the saved plot
51
+
52
+ :return: None
53
+ """
54
+ # Process weights
55
+ if isinstance(weights, str) and weights == "auto":
56
+ weights = getattr(seqdata, "weights", None)
57
+
58
+ if weights is not None:
59
+ weights = np.asarray(weights, dtype=float).reshape(-1)
60
+ if len(weights) != len(seqdata.values):
61
+ raise ValueError("Length of weights must equal number of sequences.")
62
+
63
+ # Get sequence data as a DataFrame
64
+ seq_df = seqdata.to_dataframe()
65
+
66
+ # Ensure seq_df has the same index as the original data
67
+ # This is crucial to align the grouping variable with sequence data
68
+ seq_df.index = seqdata.data.index
69
+
70
+ # Get weights for all sequences
71
+ if weights is None:
72
+ w_all = np.ones(len(seq_df))
73
+ else:
74
+ w_all = np.asarray(weights)
75
+
76
+ # Create state mapping from numerical values back to state names
77
+ inv_state_mapping = {v: k for k, v in seqdata.state_mapping.items()}
78
+
79
+ # Process grouping variable
80
+ if group_by is None:
81
+ # If no grouping, create a single group with all sequences
82
+ groups = pd.Series(["All Sequences"] * len(seq_df), index=seq_df.index)
83
+ if group_labels is None:
84
+ group_labels = ["All Sequences"]
85
+ elif isinstance(group_by, str):
86
+ # If grouping by column name from original data
87
+ if group_by not in seqdata.data.columns:
88
+ raise ValueError(f"Column '{group_by}' not found in sequence data")
89
+ groups = seqdata.data[group_by]
90
+ if group_labels is None:
91
+ group_labels = sorted(groups.unique())
92
+ else:
93
+ # If grouping by external array or Series
94
+ if len(group_by) != len(seq_df):
95
+ raise ValueError("Length of group_by must match number of sequences")
96
+ groups = pd.Series(group_by)
97
+ if group_labels is None:
98
+ group_labels = sorted(set(groups))
99
+
100
+ # Prepare plotting
101
+ n_groups = len(group_labels)
102
+ n_time_points = len(seq_df.columns)
103
+
104
+ if fig_height is None:
105
+ # Auto-calculate height based on number of groups
106
+ fig_height = max(4, 3 * n_groups)
107
+
108
+ # TODO: Title is not very pretty here so I decided to remove it.
109
+ # But here I keep 1 to keep the space big enough for the distance
110
+ # between the second subplot and the upper first subplot
111
+ title_height = 1
112
+ adjusted_fig_height = fig_height + title_height
113
+
114
+ # Create main figure with additional space for title
115
+ main_fig = plt.figure(figsize=(fig_width, adjusted_fig_height))
116
+
117
+ # No title, use whole figure for plots
118
+ plot_gs = main_fig.add_gridspec(nrows=n_groups, height_ratios=[1] * n_groups, hspace=0.3)
119
+
120
+ # Create axes for each group
121
+ axes = []
122
+ for i in range(n_groups):
123
+ axes.append(main_fig.add_subplot(plot_gs[i]))
124
+
125
+ # Make sure all axes share x and y scales
126
+ for ax in axes[1:]:
127
+ ax.sharex(axes[0])
128
+ ax.sharey(axes[0])
129
+
130
+ # Get colors for states
131
+ colors = seqdata.color_map_by_label
132
+
133
+ # Process each group
134
+ for i, group in enumerate(group_labels):
135
+ ax = axes[i]
136
+
137
+ # Get indices for this group
138
+ group_indices = groups == group
139
+ group_count = group_indices.sum()
140
+
141
+ # Skip if no sequences in this group
142
+ if group_count == 0:
143
+ continue
144
+
145
+ # Subset data for this group and get corresponding weights
146
+ group_data = seq_df[group_indices]
147
+ w = w_all[group_indices.to_numpy()]
148
+
149
+ # Calculate modal states and their frequencies for each time point
150
+ modal_states = []
151
+ modal_freqs = []
152
+
153
+ for col in group_data.columns:
154
+ states_idx = group_data[col].to_numpy()
155
+
156
+ # Calculate weighted counts for each state
157
+ weighted_sum = {}
158
+ # Use numerical state indices (1, 2, 3, ...) instead of state labels
159
+ for s_num in range(1, len(seqdata.states) + 1): # s_num is the integer encoding
160
+ weighted_sum[s_num] = float(w[states_idx == s_num].sum())
161
+
162
+ totw = float(w.sum())
163
+
164
+ if totw > 0:
165
+ # Find the state with maximum weighted count
166
+ modal_s = max(weighted_sum, key=weighted_sum.get)
167
+ modal_state = inv_state_mapping[modal_s]
168
+ modal_freq = weighted_sum[modal_s] / totw
169
+ else:
170
+ modal_state, modal_freq = None, 0.0
171
+
172
+ modal_states.append(modal_state)
173
+ modal_freqs.append(modal_freq)
174
+
175
+ # Equal width for all bars
176
+ x = np.arange(n_time_points)
177
+ bar_width = 0.8 # Fixed width for all bars
178
+
179
+ # Create bars with consistent width
180
+ for j, (state, freq) in enumerate(zip(modal_states, modal_freqs)):
181
+ if state is not None:
182
+ # state is already a label from inv_state_mapping
183
+ ax.bar(x[j], freq, width=bar_width, color=colors[state],
184
+ edgecolor='white', linewidth=0.5)
185
+
186
+ # Set group title with count if requested
187
+ if show_group_titles:
188
+ if show_counts:
189
+ if weights is not None and not np.allclose(weights, 1.0):
190
+ sum_w = float(w.sum())
191
+ title_text = f"{group} (n={group_count}, total weight={sum_w:.1f})"
192
+ else:
193
+ title_text = f"{group} (n={group_count})"
194
+ else:
195
+ title_text = group
196
+ show_plot_title(ax, title_text, show=True, fontsize=fontsize, pad=15)
197
+
198
+ # Set y-axis limits and ticks
199
+ ax.set_ylim(0, 1.0)
200
+ ax.set_yticks([0, 0.25, 0.5, 0.75, 1.0])
201
+
202
+ # Set grid and spines
203
+ ax.grid(axis='y', color='#E0E0E0', linestyle='-', linewidth=0.5)
204
+ ax.set_axisbelow(True)
205
+
206
+ # Clean up borders
207
+ for spine in ax.spines.values():
208
+ spine.set_color('#CCCCCC')
209
+ spine.set_linewidth(0.5)
210
+
211
+ # Add y-label only for the middle subplot
212
+ if i == n_groups // 2:
213
+ ax.set_ylabel(ylabel, fontsize=fontsize)
214
+
215
+ # Set up X-axis (time) labels on the bottom subplot
216
+ set_up_time_labels_for_x_axis(seqdata, axes[-1])
217
+ axes[-1].set_xlabel(xlabel, fontsize=fontsize, labelpad=10)
218
+
219
+ # Save main figure to memory
220
+ main_buffer = save_figure_to_buffer(main_fig, dpi=dpi)
221
+
222
+ # Create a legend
223
+ # Create standalone legend
224
+ legend_buffer = create_standalone_legend(
225
+ colors=colors,
226
+ labels=seqdata.labels,
227
+ ncol=min(5, len(seqdata.states)),
228
+ figsize=(fig_width, 1),
229
+ fontsize=fontsize-2,
230
+ dpi=dpi
231
+ )
232
+
233
+ if save_as and not save_as.lower().endswith(('.png', '.jpg', '.jpeg', '.pdf')):
234
+ save_as = save_as + '.png'
235
+
236
+ # Combine main plot with legend
237
+ combined_img = combine_plot_with_legend(
238
+ main_buffer,
239
+ legend_buffer,
240
+ output_path=save_as,
241
+ dpi=dpi,
242
+ padding=20 # Increased padding between plot and legend
243
+ )
244
+
245
+ # Display combined image
246
+ plt.figure(figsize=(fig_width, adjusted_fig_height + 1))
247
+ plt.imshow(combined_img)
248
+ plt.axis('off')
249
+ plt.show()
250
+ plt.close()
251
+
252
+
253
+
254
+ if __name__ == '__main__':
255
+ # Import necessary libraries
256
+ from sequenzo import * # Social sequence analysis
257
+ import pandas as pd # Data manipulation
258
+
259
+ # List all the available datasets in Sequenzo
260
+ print('Available datasets in Sequenzo: ', list_datasets())
261
+
262
+ # Load the data that we would like to explore in this tutorial
263
+ # `df` is the short for `dataframe`, which is a common variable name for a dataset
264
+ df = load_dataset('country_co2_emissions')
265
+
266
+ # Create a SequenceData object from the dataset
267
+
268
+ # Define the time-span variable
269
+ time = list(df.columns)[1:]
270
+
271
+ states = ['Very Low', 'Low', 'Middle', 'High', 'Very High']
272
+
273
+ sequence_data = SequenceData(df, time=time, time_type="year", id_col="country", states=states)
274
+
275
+ plot_modal_state(sequence_data)
276
+
@@ -0,0 +1,147 @@
1
+ """
2
+ @Author : Yuqi Liang 梁彧祺
3
+ @File : plot_most_frequent_sequences.py
4
+ @Time : 12/02/2025 10:40
5
+ @Desc :
6
+ Generate sequence frequency plots.
7
+
8
+ This script plots the 10 most frequent sequences,
9
+ similar to `seqfplot` in R's TraMineR package.
10
+ """
11
+
12
+ import pandas as pd
13
+ import matplotlib.pyplot as plt
14
+ import numpy as np
15
+
16
+ from sequenzo.define_sequence_data import SequenceData
17
+ from sequenzo.visualization.utils import (
18
+ set_up_time_labels_for_x_axis,
19
+ save_and_show_results,
20
+ show_plot_title
21
+ )
22
+
23
+
24
+ def plot_most_frequent_sequences(seqdata: SequenceData, top_n: int = 10, weights="auto", title=None, fontsize=12, save_as=None, dpi=200, show_title: bool = True):
25
+ """
26
+ Generate a sequence frequency plot, similar to R's seqfplot.
27
+
28
+ :param seqdata: (SequenceData) A SequenceData object containing sequences.
29
+ :param top_n: (int) Number of most frequent sequences to display.
30
+ :param weights: (np.ndarray or "auto") Weights for sequences. If "auto", uses seqdata.weights if available
31
+ :param title: (str, optional) Title for the plot. If None, no title will be displayed.
32
+ :param fontsize: (int) Base font size for text elements
33
+ :param save_as: (str, optional) Path to save the plot.
34
+ :param dpi: (int) Resolution of the saved plot.
35
+ """
36
+ sequences = seqdata.values.tolist()
37
+
38
+ # Process weights
39
+ if isinstance(weights, str) and weights == "auto":
40
+ weights = getattr(seqdata, "weights", None)
41
+
42
+ if weights is not None:
43
+ weights = np.asarray(weights, dtype=float).reshape(-1)
44
+ if len(weights) != len(seqdata.values):
45
+ raise ValueError("Length of weights must equal number of sequences.")
46
+
47
+ if weights is None:
48
+ weights = np.ones(len(sequences))
49
+
50
+ # Weighted counting of sequences
51
+ agg = {}
52
+ for seq, w in zip(sequences, weights):
53
+ key = tuple(seq)
54
+ agg[key] = agg.get(key, 0.0) + float(w)
55
+
56
+ # Select Top-N by weighted frequency
57
+ items = sorted(agg.items(), key=lambda kv: kv[1], reverse=True)[:top_n]
58
+ df = pd.DataFrame(items, columns=['sequence', 'wcount'])
59
+ totw = float(np.sum(weights))
60
+ df['freq'] = df['wcount'] / (totw if totw > 0 else 1.0) * 100.0
61
+
62
+ # **Ensure colors match seqdef**
63
+ # Use numeric color map directly to avoid label/state-name mismatches
64
+ inv_state_mapping = {v: k for k, v in seqdata.state_mapping.items()} # Reverse mapping kept if needed elsewhere
65
+
66
+ # **Plot settings**
67
+ fig, ax = plt.subplots(figsize=(10, 6))
68
+
69
+ # **Adjust y_positions calculation to ensure sequences fill the entire y-axis**
70
+ y_positions = df['freq'].cumsum() - df['freq'] / 2 # Center the bars
71
+
72
+ for i, (seq, freq) in enumerate(zip(df['sequence'], df['freq'])):
73
+ left = 0 # Starting x position
74
+ for t, state_idx in enumerate(seq):
75
+ # Use numeric-coded color map; if unknown, fall back to gray
76
+ color = seqdata.color_map.get(int(state_idx), "gray")
77
+
78
+ width = 1 # Width of each time slice
79
+ ax.barh(y=y_positions[i], width=width * 1.01, left=left - 0.005,
80
+ height=freq, color=color, linewidth=0,
81
+ antialiased=False)
82
+ left += width # Move to the next time slice
83
+
84
+ # **Formatting**
85
+ ax.set_xlabel("Time", fontsize=fontsize)
86
+ # Check if we have effective weights (not all 1.0) and they were provided by user
87
+ original_weights = getattr(seqdata, "weights", None)
88
+ if original_weights is not None and not np.allclose(original_weights, 1.0):
89
+ # Show both count and weighted total if weights are used
90
+ ax.set_ylabel("Cumulative Frequency (%)\nN={:,}, total weight={:.1f}".format(len(sequences), totw), fontsize=fontsize)
91
+ else:
92
+ ax.set_ylabel("Cumulative Frequency (%)\nN={:,}".format(len(sequences)), fontsize=fontsize)
93
+ if show_title and title is not None:
94
+ show_plot_title(ax, title, show=True, fontsize=fontsize+2, pad=20)
95
+
96
+ # **Optimize X-axis ticks: align to the center of each bar**
97
+ set_up_time_labels_for_x_axis(seqdata, ax)
98
+
99
+ # **Set Y-axis ticks and labels**
100
+ sum_freq_top_10 = df['freq'].sum() # Cumulative frequency of top 10 sequences
101
+ max_freq = df['freq'].max() # Frequency of the top 1 sequence
102
+
103
+ # Set Y-axis ticks: 0%, top1 frequency, top10 cumulative frequency
104
+ y_ticks = [0, max_freq, sum_freq_top_10]
105
+ ax.set_yticks(y_ticks)
106
+ ax.set_yticklabels([f"{ytick:.1f}%" for ytick in y_ticks], fontsize=fontsize-2)
107
+
108
+ # **Set Y-axis range to ensure the highest tick is the top10 cumulative frequency**
109
+ # Force Y-axis range to be from 0 to sum_freq_top_10
110
+ ax.set_ylim(0, sum_freq_top_10)
111
+
112
+ # **Annotate the frequency percentage on the left side of the highest frequency sequence**
113
+ ax.annotate(f"{max_freq:.1f}%", xy=(-0.5, y_positions.iloc[0]),
114
+ xycoords="data", fontsize=fontsize, color="black", ha="left", va="center")
115
+
116
+ # **Annotate 0% at the bottom of the Y-axis**
117
+ ax.annotate("0%", xy=(-0.5, 0), xycoords="data", fontsize=fontsize, color="black", ha="left", va="center")
118
+
119
+ # **Clean up axis aesthetics like plot_state_distribution**
120
+ ax.spines['top'].set_visible(False)
121
+ ax.spines['right'].set_visible(False)
122
+ ax.spines['left'].set_visible(True) # Keep the left border like state_distribution
123
+ ax.spines['bottom'].set_visible(True) # Show bottom border to connect with left
124
+
125
+ # Style the left spine to match plot_state_distribution
126
+ ax.spines['left'].set_color('gray')
127
+ ax.spines['left'].set_linewidth(0.7)
128
+ ax.spines['bottom'].set_color('gray')
129
+ ax.spines['bottom'].set_linewidth(0.7)
130
+
131
+ # Style the tick parameters
132
+ ax.tick_params(axis='y', colors='gray', length=4, width=0.7)
133
+ ax.tick_params(axis='x', colors='gray', length=4, width=0.7)
134
+
135
+ # Extend the left spine slightly beyond the plot area
136
+ ax.spines['left'].set_bounds(0, sum_freq_top_10)
137
+ ax.spines['left'].set_position(('outward', 5)) # Move spine 5 points to the left
138
+
139
+ # Align bottom spine with the left spine position
140
+ ax.spines['bottom'].set_position(('outward', 5)) # Move bottom spine to align with left
141
+
142
+ # Use legend from SequenceData
143
+ ax.legend(*seqdata.get_legend(), bbox_to_anchor=(1.05, 1), loc='upper left')
144
+
145
+ save_and_show_results(save_as, dpi=200)
146
+
147
+