sequenzo 0.1.31__cp310-cp310-macosx_10_9_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (299) hide show
  1. _sequenzo_fastcluster.cpython-310-darwin.so +0 -0
  2. sequenzo/__init__.py +349 -0
  3. sequenzo/big_data/__init__.py +12 -0
  4. sequenzo/big_data/clara/__init__.py +26 -0
  5. sequenzo/big_data/clara/clara.py +476 -0
  6. sequenzo/big_data/clara/utils/__init__.py +27 -0
  7. sequenzo/big_data/clara/utils/aggregatecases.py +92 -0
  8. sequenzo/big_data/clara/utils/davies_bouldin.py +91 -0
  9. sequenzo/big_data/clara/utils/get_weighted_diss.cpython-310-darwin.so +0 -0
  10. sequenzo/big_data/clara/utils/wfcmdd.py +205 -0
  11. sequenzo/big_data/clara/visualization.py +88 -0
  12. sequenzo/clustering/KMedoids.py +178 -0
  13. sequenzo/clustering/__init__.py +30 -0
  14. sequenzo/clustering/clustering_c_code.cpython-310-darwin.so +0 -0
  15. sequenzo/clustering/hierarchical_clustering.py +1256 -0
  16. sequenzo/clustering/sequenzo_fastcluster/fastcluster.py +495 -0
  17. sequenzo/clustering/sequenzo_fastcluster/src/fastcluster.cpp +1877 -0
  18. sequenzo/clustering/sequenzo_fastcluster/src/fastcluster_python.cpp +1264 -0
  19. sequenzo/clustering/src/KMedoid.cpp +263 -0
  20. sequenzo/clustering/src/PAM.cpp +237 -0
  21. sequenzo/clustering/src/PAMonce.cpp +265 -0
  22. sequenzo/clustering/src/cluster_quality.cpp +496 -0
  23. sequenzo/clustering/src/cluster_quality.h +128 -0
  24. sequenzo/clustering/src/cluster_quality_backup.cpp +570 -0
  25. sequenzo/clustering/src/module.cpp +228 -0
  26. sequenzo/clustering/src/weightedinertia.cpp +111 -0
  27. sequenzo/clustering/utils/__init__.py +27 -0
  28. sequenzo/clustering/utils/disscenter.py +122 -0
  29. sequenzo/data_preprocessing/__init__.py +22 -0
  30. sequenzo/data_preprocessing/helpers.py +303 -0
  31. sequenzo/datasets/__init__.py +41 -0
  32. sequenzo/datasets/biofam.csv +2001 -0
  33. sequenzo/datasets/biofam_child_domain.csv +2001 -0
  34. sequenzo/datasets/biofam_left_domain.csv +2001 -0
  35. sequenzo/datasets/biofam_married_domain.csv +2001 -0
  36. sequenzo/datasets/chinese_colonial_territories.csv +12 -0
  37. sequenzo/datasets/country_co2_emissions.csv +194 -0
  38. sequenzo/datasets/country_co2_emissions_global_deciles.csv +195 -0
  39. sequenzo/datasets/country_co2_emissions_global_quintiles.csv +195 -0
  40. sequenzo/datasets/country_co2_emissions_local_deciles.csv +195 -0
  41. sequenzo/datasets/country_co2_emissions_local_quintiles.csv +195 -0
  42. sequenzo/datasets/country_gdp_per_capita.csv +194 -0
  43. sequenzo/datasets/dyadic_children.csv +61 -0
  44. sequenzo/datasets/dyadic_parents.csv +61 -0
  45. sequenzo/datasets/mvad.csv +713 -0
  46. sequenzo/datasets/pairfam_activity_by_month.csv +1028 -0
  47. sequenzo/datasets/pairfam_activity_by_year.csv +1028 -0
  48. sequenzo/datasets/pairfam_family_by_month.csv +1028 -0
  49. sequenzo/datasets/pairfam_family_by_year.csv +1028 -0
  50. sequenzo/datasets/political_science_aid_shock.csv +166 -0
  51. sequenzo/datasets/political_science_donor_fragmentation.csv +157 -0
  52. sequenzo/define_sequence_data.py +1400 -0
  53. sequenzo/dissimilarity_measures/__init__.py +31 -0
  54. sequenzo/dissimilarity_measures/c_code.cpython-310-darwin.so +0 -0
  55. sequenzo/dissimilarity_measures/get_distance_matrix.py +762 -0
  56. sequenzo/dissimilarity_measures/get_substitution_cost_matrix.py +246 -0
  57. sequenzo/dissimilarity_measures/src/DHDdistance.cpp +148 -0
  58. sequenzo/dissimilarity_measures/src/LCPdistance.cpp +114 -0
  59. sequenzo/dissimilarity_measures/src/LCPspellDistance.cpp +215 -0
  60. sequenzo/dissimilarity_measures/src/OMdistance.cpp +247 -0
  61. sequenzo/dissimilarity_measures/src/OMspellDistance.cpp +281 -0
  62. sequenzo/dissimilarity_measures/src/__init__.py +0 -0
  63. sequenzo/dissimilarity_measures/src/dist2matrix.cpp +63 -0
  64. sequenzo/dissimilarity_measures/src/dp_utils.h +160 -0
  65. sequenzo/dissimilarity_measures/src/module.cpp +40 -0
  66. sequenzo/dissimilarity_measures/src/setup.py +30 -0
  67. sequenzo/dissimilarity_measures/src/utils.h +25 -0
  68. sequenzo/dissimilarity_measures/src/xsimd/.github/cmake-test/main.cpp +6 -0
  69. sequenzo/dissimilarity_measures/src/xsimd/benchmark/main.cpp +159 -0
  70. sequenzo/dissimilarity_measures/src/xsimd/benchmark/xsimd_benchmark.hpp +565 -0
  71. sequenzo/dissimilarity_measures/src/xsimd/docs/source/conf.py +37 -0
  72. sequenzo/dissimilarity_measures/src/xsimd/examples/mandelbrot.cpp +330 -0
  73. sequenzo/dissimilarity_measures/src/xsimd/examples/pico_bench.hpp +246 -0
  74. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_arithmetic.hpp +266 -0
  75. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_complex.hpp +112 -0
  76. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_details.hpp +323 -0
  77. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_logical.hpp +218 -0
  78. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_math.hpp +2583 -0
  79. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_memory.hpp +880 -0
  80. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_rounding.hpp +72 -0
  81. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_swizzle.hpp +174 -0
  82. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_trigo.hpp +978 -0
  83. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx.hpp +1924 -0
  84. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx2.hpp +1144 -0
  85. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512bw.hpp +656 -0
  86. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512cd.hpp +28 -0
  87. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512dq.hpp +244 -0
  88. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512er.hpp +20 -0
  89. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512f.hpp +2650 -0
  90. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512ifma.hpp +20 -0
  91. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512pf.hpp +20 -0
  92. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vbmi.hpp +77 -0
  93. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vbmi2.hpp +131 -0
  94. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vnni_avx512bw.hpp +20 -0
  95. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vnni_avx512vbmi2.hpp +20 -0
  96. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avxvnni.hpp +20 -0
  97. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_common.hpp +24 -0
  98. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_common_fwd.hpp +77 -0
  99. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_constants.hpp +393 -0
  100. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_emulated.hpp +788 -0
  101. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma3_avx.hpp +93 -0
  102. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma3_avx2.hpp +46 -0
  103. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma3_sse.hpp +97 -0
  104. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma4.hpp +92 -0
  105. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_i8mm_neon64.hpp +17 -0
  106. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_isa.hpp +142 -0
  107. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_neon.hpp +3142 -0
  108. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_neon64.hpp +1543 -0
  109. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_rvv.hpp +1513 -0
  110. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_scalar.hpp +1260 -0
  111. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse2.hpp +2024 -0
  112. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse3.hpp +67 -0
  113. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse4_1.hpp +339 -0
  114. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse4_2.hpp +44 -0
  115. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_ssse3.hpp +186 -0
  116. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sve.hpp +1155 -0
  117. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_vsx.hpp +892 -0
  118. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_wasm.hpp +1780 -0
  119. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_arch.hpp +240 -0
  120. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_config.hpp +484 -0
  121. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_cpuid.hpp +269 -0
  122. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_inline.hpp +27 -0
  123. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/math/xsimd_rem_pio2.hpp +719 -0
  124. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/memory/xsimd_aligned_allocator.hpp +349 -0
  125. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/memory/xsimd_alignment.hpp +91 -0
  126. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_all_registers.hpp +55 -0
  127. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_api.hpp +2765 -0
  128. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx2_register.hpp +44 -0
  129. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512bw_register.hpp +51 -0
  130. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512cd_register.hpp +51 -0
  131. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512dq_register.hpp +51 -0
  132. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512er_register.hpp +51 -0
  133. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512f_register.hpp +77 -0
  134. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512ifma_register.hpp +51 -0
  135. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512pf_register.hpp +51 -0
  136. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512vbmi2_register.hpp +51 -0
  137. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512vbmi_register.hpp +51 -0
  138. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512vnni_avx512bw_register.hpp +54 -0
  139. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512vnni_avx512vbmi2_register.hpp +53 -0
  140. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx_register.hpp +64 -0
  141. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avxvnni_register.hpp +44 -0
  142. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_batch.hpp +1524 -0
  143. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_batch_constant.hpp +300 -0
  144. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_common_arch.hpp +47 -0
  145. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_emulated_register.hpp +80 -0
  146. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_fma3_avx2_register.hpp +50 -0
  147. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_fma3_avx_register.hpp +50 -0
  148. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_fma3_sse_register.hpp +50 -0
  149. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_fma4_register.hpp +50 -0
  150. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_i8mm_neon64_register.hpp +55 -0
  151. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_neon64_register.hpp +55 -0
  152. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_neon_register.hpp +154 -0
  153. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_register.hpp +94 -0
  154. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_rvv_register.hpp +506 -0
  155. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sse2_register.hpp +59 -0
  156. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sse3_register.hpp +49 -0
  157. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sse4_1_register.hpp +48 -0
  158. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sse4_2_register.hpp +48 -0
  159. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_ssse3_register.hpp +48 -0
  160. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sve_register.hpp +156 -0
  161. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_traits.hpp +337 -0
  162. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_utils.hpp +536 -0
  163. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_vsx_register.hpp +77 -0
  164. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_wasm_register.hpp +59 -0
  165. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/xsimd.hpp +75 -0
  166. sequenzo/dissimilarity_measures/src/xsimd/test/architectures/dummy.cpp +7 -0
  167. sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set.cpp +13 -0
  168. sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set_mean.cpp +24 -0
  169. sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set_mean_aligned.cpp +25 -0
  170. sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set_mean_arch_independent.cpp +28 -0
  171. sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set_mean_tag_dispatch.cpp +25 -0
  172. sequenzo/dissimilarity_measures/src/xsimd/test/doc/manipulating_abstract_batches.cpp +7 -0
  173. sequenzo/dissimilarity_measures/src/xsimd/test/doc/manipulating_parametric_batches.cpp +8 -0
  174. sequenzo/dissimilarity_measures/src/xsimd/test/doc/sum.hpp +31 -0
  175. sequenzo/dissimilarity_measures/src/xsimd/test/doc/sum_avx2.cpp +3 -0
  176. sequenzo/dissimilarity_measures/src/xsimd/test/doc/sum_sse2.cpp +3 -0
  177. sequenzo/dissimilarity_measures/src/xsimd/test/doc/writing_vectorized_code.cpp +11 -0
  178. sequenzo/dissimilarity_measures/src/xsimd/test/main.cpp +31 -0
  179. sequenzo/dissimilarity_measures/src/xsimd/test/test_api.cpp +230 -0
  180. sequenzo/dissimilarity_measures/src/xsimd/test/test_arch.cpp +217 -0
  181. sequenzo/dissimilarity_measures/src/xsimd/test/test_basic_math.cpp +183 -0
  182. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch.cpp +1049 -0
  183. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_bool.cpp +508 -0
  184. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_cast.cpp +409 -0
  185. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_complex.cpp +712 -0
  186. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_constant.cpp +286 -0
  187. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_float.cpp +141 -0
  188. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_int.cpp +365 -0
  189. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_manip.cpp +308 -0
  190. sequenzo/dissimilarity_measures/src/xsimd/test/test_bitwise_cast.cpp +222 -0
  191. sequenzo/dissimilarity_measures/src/xsimd/test/test_complex_exponential.cpp +226 -0
  192. sequenzo/dissimilarity_measures/src/xsimd/test/test_complex_hyperbolic.cpp +183 -0
  193. sequenzo/dissimilarity_measures/src/xsimd/test/test_complex_power.cpp +265 -0
  194. sequenzo/dissimilarity_measures/src/xsimd/test/test_complex_trigonometric.cpp +236 -0
  195. sequenzo/dissimilarity_measures/src/xsimd/test/test_conversion.cpp +248 -0
  196. sequenzo/dissimilarity_measures/src/xsimd/test/test_custom_default_arch.cpp +28 -0
  197. sequenzo/dissimilarity_measures/src/xsimd/test/test_error_gamma.cpp +170 -0
  198. sequenzo/dissimilarity_measures/src/xsimd/test/test_explicit_batch_instantiation.cpp +32 -0
  199. sequenzo/dissimilarity_measures/src/xsimd/test/test_exponential.cpp +202 -0
  200. sequenzo/dissimilarity_measures/src/xsimd/test/test_extract_pair.cpp +92 -0
  201. sequenzo/dissimilarity_measures/src/xsimd/test/test_fp_manipulation.cpp +77 -0
  202. sequenzo/dissimilarity_measures/src/xsimd/test/test_gnu_source.cpp +30 -0
  203. sequenzo/dissimilarity_measures/src/xsimd/test/test_hyperbolic.cpp +167 -0
  204. sequenzo/dissimilarity_measures/src/xsimd/test/test_load_store.cpp +304 -0
  205. sequenzo/dissimilarity_measures/src/xsimd/test/test_memory.cpp +61 -0
  206. sequenzo/dissimilarity_measures/src/xsimd/test/test_poly_evaluation.cpp +64 -0
  207. sequenzo/dissimilarity_measures/src/xsimd/test/test_power.cpp +184 -0
  208. sequenzo/dissimilarity_measures/src/xsimd/test/test_rounding.cpp +199 -0
  209. sequenzo/dissimilarity_measures/src/xsimd/test/test_select.cpp +101 -0
  210. sequenzo/dissimilarity_measures/src/xsimd/test/test_shuffle.cpp +760 -0
  211. sequenzo/dissimilarity_measures/src/xsimd/test/test_sum.cpp +4 -0
  212. sequenzo/dissimilarity_measures/src/xsimd/test/test_sum.hpp +34 -0
  213. sequenzo/dissimilarity_measures/src/xsimd/test/test_traits.cpp +172 -0
  214. sequenzo/dissimilarity_measures/src/xsimd/test/test_trigonometric.cpp +208 -0
  215. sequenzo/dissimilarity_measures/src/xsimd/test/test_utils.hpp +611 -0
  216. sequenzo/dissimilarity_measures/src/xsimd/test/test_wasm/test_wasm_playwright.py +123 -0
  217. sequenzo/dissimilarity_measures/src/xsimd/test/test_xsimd_api.cpp +1460 -0
  218. sequenzo/dissimilarity_measures/utils/__init__.py +16 -0
  219. sequenzo/dissimilarity_measures/utils/get_LCP_length_for_2_seq.py +44 -0
  220. sequenzo/dissimilarity_measures/utils/get_sm_trate_substitution_cost_matrix.cpython-310-darwin.so +0 -0
  221. sequenzo/dissimilarity_measures/utils/seqconc.cpython-310-darwin.so +0 -0
  222. sequenzo/dissimilarity_measures/utils/seqdss.cpython-310-darwin.so +0 -0
  223. sequenzo/dissimilarity_measures/utils/seqdur.cpython-310-darwin.so +0 -0
  224. sequenzo/dissimilarity_measures/utils/seqlength.cpython-310-darwin.so +0 -0
  225. sequenzo/multidomain/__init__.py +23 -0
  226. sequenzo/multidomain/association_between_domains.py +311 -0
  227. sequenzo/multidomain/cat.py +597 -0
  228. sequenzo/multidomain/combt.py +519 -0
  229. sequenzo/multidomain/dat.py +81 -0
  230. sequenzo/multidomain/idcd.py +139 -0
  231. sequenzo/multidomain/linked_polyad.py +292 -0
  232. sequenzo/openmp_setup.py +233 -0
  233. sequenzo/prefix_tree/__init__.py +62 -0
  234. sequenzo/prefix_tree/hub.py +114 -0
  235. sequenzo/prefix_tree/individual_level_indicators.py +1321 -0
  236. sequenzo/prefix_tree/spell_individual_level_indicators.py +580 -0
  237. sequenzo/prefix_tree/spell_level_indicators.py +297 -0
  238. sequenzo/prefix_tree/system_level_indicators.py +544 -0
  239. sequenzo/prefix_tree/utils.py +54 -0
  240. sequenzo/seqhmm/__init__.py +95 -0
  241. sequenzo/seqhmm/advanced_optimization.py +305 -0
  242. sequenzo/seqhmm/bootstrap.py +411 -0
  243. sequenzo/seqhmm/build_hmm.py +142 -0
  244. sequenzo/seqhmm/build_mhmm.py +136 -0
  245. sequenzo/seqhmm/build_nhmm.py +121 -0
  246. sequenzo/seqhmm/fit_mhmm.py +62 -0
  247. sequenzo/seqhmm/fit_model.py +61 -0
  248. sequenzo/seqhmm/fit_nhmm.py +76 -0
  249. sequenzo/seqhmm/formulas.py +289 -0
  250. sequenzo/seqhmm/forward_backward_nhmm.py +276 -0
  251. sequenzo/seqhmm/gradients_nhmm.py +306 -0
  252. sequenzo/seqhmm/hmm.py +291 -0
  253. sequenzo/seqhmm/mhmm.py +314 -0
  254. sequenzo/seqhmm/model_comparison.py +238 -0
  255. sequenzo/seqhmm/multichannel_em.py +282 -0
  256. sequenzo/seqhmm/multichannel_utils.py +138 -0
  257. sequenzo/seqhmm/nhmm.py +270 -0
  258. sequenzo/seqhmm/nhmm_utils.py +191 -0
  259. sequenzo/seqhmm/predict.py +137 -0
  260. sequenzo/seqhmm/predict_mhmm.py +142 -0
  261. sequenzo/seqhmm/simulate.py +878 -0
  262. sequenzo/seqhmm/utils.py +218 -0
  263. sequenzo/seqhmm/visualization.py +910 -0
  264. sequenzo/sequence_characteristics/__init__.py +40 -0
  265. sequenzo/sequence_characteristics/complexity_index.py +49 -0
  266. sequenzo/sequence_characteristics/overall_cross_sectional_entropy.py +220 -0
  267. sequenzo/sequence_characteristics/plot_characteristics.py +593 -0
  268. sequenzo/sequence_characteristics/simple_characteristics.py +311 -0
  269. sequenzo/sequence_characteristics/state_frequencies_and_entropy_per_sequence.py +39 -0
  270. sequenzo/sequence_characteristics/turbulence.py +155 -0
  271. sequenzo/sequence_characteristics/variance_of_spell_durations.py +86 -0
  272. sequenzo/sequence_characteristics/within_sequence_entropy.py +43 -0
  273. sequenzo/suffix_tree/__init__.py +66 -0
  274. sequenzo/suffix_tree/hub.py +114 -0
  275. sequenzo/suffix_tree/individual_level_indicators.py +1679 -0
  276. sequenzo/suffix_tree/spell_individual_level_indicators.py +493 -0
  277. sequenzo/suffix_tree/spell_level_indicators.py +248 -0
  278. sequenzo/suffix_tree/system_level_indicators.py +535 -0
  279. sequenzo/suffix_tree/utils.py +56 -0
  280. sequenzo/version_check.py +283 -0
  281. sequenzo/visualization/__init__.py +29 -0
  282. sequenzo/visualization/plot_mean_time.py +222 -0
  283. sequenzo/visualization/plot_modal_state.py +276 -0
  284. sequenzo/visualization/plot_most_frequent_sequences.py +147 -0
  285. sequenzo/visualization/plot_relative_frequency.py +405 -0
  286. sequenzo/visualization/plot_sequence_index.py +1175 -0
  287. sequenzo/visualization/plot_single_medoid.py +153 -0
  288. sequenzo/visualization/plot_state_distribution.py +651 -0
  289. sequenzo/visualization/plot_transition_matrix.py +190 -0
  290. sequenzo/visualization/utils/__init__.py +23 -0
  291. sequenzo/visualization/utils/utils.py +310 -0
  292. sequenzo/with_event_history_analysis/__init__.py +35 -0
  293. sequenzo/with_event_history_analysis/sequence_analysis_multi_state_model.py +850 -0
  294. sequenzo/with_event_history_analysis/sequence_history_analysis.py +283 -0
  295. sequenzo-0.1.31.dist-info/METADATA +286 -0
  296. sequenzo-0.1.31.dist-info/RECORD +299 -0
  297. sequenzo-0.1.31.dist-info/WHEEL +5 -0
  298. sequenzo-0.1.31.dist-info/licenses/LICENSE +28 -0
  299. sequenzo-0.1.31.dist-info/top_level.txt +2 -0
@@ -0,0 +1,40 @@
1
+ """
2
+ @Author : Yuqi Liang 梁彧祺
3
+ @File : __init__.py
4
+ @Time : 22/09/2025 22:46
5
+ @Desc :
6
+ """
7
+ from .simple_characteristics import (get_subsequences_in_single_sequence,
8
+ get_subsequences_all_sequences,
9
+ get_number_of_transitions)
10
+
11
+ from .state_frequencies_and_entropy_per_sequence import get_state_freq_and_entropy_per_seq
12
+
13
+ from .within_sequence_entropy import get_within_sequence_entropy
14
+
15
+ from .overall_cross_sectional_entropy import get_cross_sectional_entropy
16
+
17
+ from .variance_of_spell_durations import get_spell_duration_variance
18
+
19
+ from .turbulence import get_turbulence
20
+
21
+ from .complexity_index import get_complexity_index
22
+
23
+ from .plot_characteristics import plot_longitudinal_characteristics, plot_cross_sectional_characteristics
24
+
25
+ __all__ = [
26
+ "get_subsequences_in_single_sequence",
27
+ "get_subsequences_all_sequences",
28
+ "get_number_of_transitions",
29
+
30
+ "get_complexity_index",
31
+
32
+ "get_state_freq_and_entropy_per_seq",
33
+ "get_within_sequence_entropy",
34
+ "get_cross_sectional_entropy",
35
+ "get_spell_duration_variance",
36
+ "get_turbulence",
37
+
38
+ "plot_longitudinal_characteristics",
39
+ "plot_cross_sectional_characteristics"
40
+ ]
@@ -0,0 +1,49 @@
1
+ """
2
+ @Author : 李欣怡
3
+ @File : complexity_index.py
4
+ @Time : 2025/9/23 23:45
5
+ @Desc : Complexity index
6
+
7
+ The corresponding function name in TraMineR is seqici.R,
8
+ with the source code available at: https://github.com/cran/TraMineR/blob/master/R/seqici.R
9
+ """
10
+ import os
11
+ from contextlib import redirect_stdout
12
+ import pandas as pd
13
+ import numpy as np
14
+
15
+ from sequenzo.define_sequence_data import SequenceData
16
+ from .simple_characteristics import get_number_of_transitions
17
+ from .within_sequence_entropy import get_within_sequence_entropy
18
+
19
+ def get_complexity_index(seqdata, silent=True):
20
+ if not isinstance(seqdata, SequenceData):
21
+ raise ValueError("[!] data is NOT a sequence object, see SequenceData function to create one.")
22
+
23
+ if not silent:
24
+ print(f"[>] Computing complexity index for {seqdata.seqdata.shape[0]} sequences ...")
25
+
26
+ trans = get_number_of_transitions(seqdata=seqdata, norm=True).iloc[:, 1]
27
+ trans.index = seqdata.seqdata.index
28
+
29
+ with open(os.devnull, 'w') as fnull:
30
+ with redirect_stdout(fnull):
31
+ ient = get_within_sequence_entropy(seqdata=seqdata, norm=True).iloc[:, 1]
32
+ ient.index = seqdata.seqdata.index
33
+
34
+ complxity = np.sqrt(trans * ient)
35
+ complxity = pd.DataFrame(complxity, index=seqdata.seqdata.index, columns=['Complexity Index'])
36
+ complxity = complxity.reset_index().rename(columns={'index': 'ID'})
37
+
38
+ return complxity
39
+
40
+ if __name__ == '__main__':
41
+ from sequenzo import *
42
+
43
+ df = load_dataset("country_co2_emissions")
44
+ _time = list(df.columns)[1:]
45
+ states = ['Very Low', 'Low', 'Middle', 'High', 'Very High']
46
+ sequence_data = SequenceData(df, time=_time, id_col="country", states=states)
47
+
48
+ res = get_complexity_index(sequence_data)
49
+ res
@@ -0,0 +1,220 @@
1
+ """
2
+ @Author : Yuqi Liang, Xinyi Li
3
+ @File : overall_cross_sectional_entropy.py
4
+ @Time : 2025/9/15 21:52
5
+ @Desc : States frequency by time unit
6
+
7
+ The corresponding function name in TraMineR is seqstatd.R,
8
+ with the source code available at: https://github.com/cran/TraMineR/blob/master/R/seqstatd.R
9
+ """
10
+
11
+ import numpy as np
12
+ import pandas as pd
13
+ from scipy.stats import entropy
14
+ from sequenzo.define_sequence_data import SequenceData
15
+
16
+ def get_cross_sectional_entropy(
17
+ seqdata: SequenceData,
18
+ weighted: bool = True,
19
+ norm: bool = True,
20
+ return_format: str = "tidy", # "tidy" | "wide" | "dict"
21
+ include_effective_states: bool = True,
22
+ add_topk: int = 1, # Mark top K dominant states at each time point
23
+ round_decimals: int = 6
24
+ ):
25
+ """
26
+ Cross-sectional state distribution by time with entropy and readable outputs.
27
+
28
+ What you get in a tidy format:
29
+ time state freq entropy per_time_entropy_norm N_valid rank is_top
30
+ 1 A 0.645 0.380 0.380 2346.27 1 True
31
+ ...
32
+
33
+ Additional metrics:
34
+ - per_time_entropy_norm: If norm=True, normalized by maximum entropy (|S|), range 0-1
35
+ - effective_states (H_effective): exp(H), equivalent "effective number of states"
36
+ - summary: Key interpretation points (entropy peaks/valleys, dominant state intervals, average entropy, etc.)
37
+
38
+ Parameters maintain your semantics, with new return_format, add_topk etc. for better interpretability.
39
+
40
+ Parameters
41
+ ----------
42
+ seqdata : SequenceData
43
+ A sequence object created by the SequenceData function.
44
+ weighted : bool, default True
45
+ If True, the frequencies are weighted by the number of non-missing values at each time unit.
46
+ norm : bool, default True
47
+ If True, the entropy is normalized by maximum possible entropy.
48
+ return_format : str, default "tidy"
49
+ Return format: "tidy" for long-form table, "wide" for matrices, "dict" for original dict format.
50
+ include_effective_states : bool, default True
51
+ If True, calculate effective number of states (exp(entropy)).
52
+ add_topk : int, default 1
53
+ Mark top K dominant states at each time point.
54
+ round_decimals : int, default 6
55
+ Number of decimal places for rounding.
56
+
57
+ Returns
58
+ -------
59
+ pd.DataFrame or dict
60
+ Depending on return_format:
61
+ - "tidy": Long-form DataFrame with interpretable columns
62
+ - "wide": Dict with frequency matrix, entropy series, etc.
63
+ - "dict": Original dict format (backward compatible)
64
+ """
65
+
66
+ if not isinstance(seqdata, SequenceData):
67
+ raise ValueError("[!] data is NOT a sequence object, see SequenceData.")
68
+
69
+ # Basic metadata
70
+ states_labels = list(seqdata.states) # Human-readable state labels
71
+ S = len(states_labels)
72
+ T = seqdata.seqdata.shape[1] # Number of time points
73
+ times = list(seqdata.seqdata.columns)
74
+
75
+ # Color attributes
76
+ cpal = seqdata.custom_colors
77
+
78
+ # Weights
79
+ # Also takes into account that in unweighted sequence objects created with
80
+ # older TraMineR versions the weights attribute is a vector of 1
81
+ # instead of NULL
82
+ w = seqdata.weights if seqdata.weights is not None else np.ones(seqdata.seqdata.shape[0])
83
+ if np.all(w == 1):
84
+ weighted = False
85
+
86
+ # Your data is usually encoded with 1..S; if internally already labels, we can map here
87
+ # For compatibility: build a "value -> row index" lookup table
88
+ # Try to support both numeric encoding (1..S) and labels themselves
89
+ value_to_row = {v: i for i, v in enumerate(range(1, S+1))}
90
+ label_to_row = {lab: i for i, lab in enumerate(states_labels)}
91
+
92
+ # Frequency matrix (S x T)
93
+ freq_counts = np.zeros((S, T), dtype=float)
94
+
95
+ for j in range(T):
96
+ col = seqdata.seqdata.iloc[:, j]
97
+ for i in range(S):
98
+ # Try both encoding and label matching
99
+ mask_num = (col == (i+1))
100
+ mask_lab = (col == states_labels[i])
101
+ mask = mask_num | mask_lab
102
+ if weighted:
103
+ freq_counts[i, j] = w[mask].sum()
104
+ else:
105
+ freq_counts[i, j] = mask.sum()
106
+
107
+ N_valid = freq_counts.sum(axis=0) # Valid weight/sample size per time point
108
+ with np.errstate(divide='ignore', invalid='ignore'):
109
+ P = np.divide(freq_counts, N_valid, where=(N_valid>0)) # Frequencies
110
+
111
+ # Entropy
112
+ H = np.array([entropy(P[:, j][P[:, j] > 0]) if N_valid[j] > 0 else 0.0 for j in range(T)])
113
+
114
+ if norm:
115
+ Hmax = entropy(np.ones(S) / S) if S > 0 else 1.0
116
+ H_norm = H / Hmax if Hmax > 0 else H
117
+ else:
118
+ H_norm = H
119
+
120
+ # Effective number of states (highly interpretable: equivalent "how many equiprobable states")
121
+ H_eff = np.exp(H) if include_effective_states else None
122
+
123
+ # Organize output: wide format
124
+ freq_df_wide = pd.DataFrame(P, index=states_labels, columns=times).round(round_decimals)
125
+ entropy_s = pd.Series(H_norm if norm else H, index=times, name=("per_time_entropy_norm" if norm else "Entropy")).round(round_decimals)
126
+ valid_s = pd.Series(N_valid, index=times, name="N_valid").round(round_decimals)
127
+ eff_s = (pd.Series(H_eff, index=times, name="Effective States").round(round_decimals)
128
+ if include_effective_states else None)
129
+
130
+ # Generate tidy table (interpretation-friendly)
131
+ tidy = (
132
+ freq_df_wide
133
+ .reset_index()
134
+ .melt(id_vars="index", var_name="time", value_name="freq")
135
+ .rename(columns={"index": "state"})
136
+ .sort_values(["time", "freq"], ascending=[True, False])
137
+ )
138
+ # Ranking + topK annotation
139
+ tidy["rank"] = tidy.groupby("time")["freq"].rank(method="first", ascending=False).astype(int)
140
+ if add_topk and add_topk > 0:
141
+ tidy["is_top"] = tidy["rank"] <= add_topk
142
+ else:
143
+ tidy["is_top"] = False
144
+
145
+ # Merge entropy/sample size/effective states
146
+ tidy = tidy.merge(entropy_s.reset_index().rename(columns={"index": "time"}), on="time", how="left")
147
+ tidy = tidy.merge(valid_s.reset_index().rename(columns={"index": "time"}), on="time", how="left")
148
+ if eff_s is not None:
149
+ tidy = tidy.merge(eff_s.reset_index().rename(columns={"index": "time"}), on="time", how="left")
150
+
151
+ # Friendly column order
152
+ cols = ["time", "state", "freq"]
153
+ if norm:
154
+ cols += ["per_time_entropy_norm"]
155
+ else:
156
+ cols += ["Entropy"]
157
+ cols += ["N_valid"]
158
+ if include_effective_states:
159
+ cols += ["Effective States"]
160
+ cols += ["rank", "is_top"]
161
+ tidy = tidy[cols]
162
+
163
+ # Summary: key statistics that can be explained in one sentence
164
+ summary = {
165
+ "states": states_labels,
166
+ "n_states": S,
167
+ "n_timepoints": T,
168
+ "avg_entropy_norm": float(tidy["per_time_entropy_norm"].mean()) if norm else None,
169
+ "avg_entropy": float((entropy_s if not norm else entropy_s * entropy(np.ones(S)/S)).mean()) if not norm else None,
170
+ "peak_entropy_time": tidy.loc[tidy["per_time_entropy_norm" if norm else "Entropy"].idxmax(), "time"] if T > 0 else None,
171
+ "lowest_entropy_time": tidy.loc[tidy["per_time_entropy_norm" if norm else "Entropy"].idxmin(), "time"] if T > 0 else None,
172
+ "dominant_stability_ratio": float(tidy.query("rank==1")["freq"].mean()), # Average proportion of dominant state
173
+ "cpal": cpal
174
+ }
175
+
176
+ # Print descriptive statistics
177
+ print("\n" + "="*70)
178
+ print("Cross-Sectional Entropy Summary")
179
+ print("="*70)
180
+ print(f"[>] Number of states: {summary['n_states']}")
181
+ print(f"[>] Number of time points: {summary['n_timepoints']}")
182
+ print(f"[>] On average, the most common state accounts for {summary['dominant_stability_ratio']:.1%} of cases")
183
+ print(f"[>] Entropy is highest at time point {summary['peak_entropy_time']}")
184
+ print(f"[>] Entropy is lowest at time point {summary['lowest_entropy_time']}")
185
+ if norm:
186
+ print(f"[>] Average normalized entropy: {summary['avg_entropy_norm']:.3f} (range: 0 = fully concentrated, 1 = evenly distributed)")
187
+ print("="*70 + "\n")
188
+
189
+ # Compatible with different return formats
190
+ if return_format == "tidy":
191
+ tidy.attrs = {"summary": summary}
192
+ return tidy
193
+ elif return_format == "wide":
194
+ out = {
195
+ "Frequencies": freq_df_wide,
196
+ "N_valid": valid_s,
197
+ ("per_time_entropy_norm" if norm else "Entropy"): entropy_s
198
+ }
199
+ if eff_s is not None:
200
+ out["Effective States"] = eff_s
201
+ return out
202
+ else: # "dict" -- try to be more readable too
203
+ res = {
204
+ "Frequencies": freq_df_wide,
205
+ "ValidStates": valid_s,
206
+ "Entropy": entropy_s if not norm else None,
207
+ "per_time_entropy_norm": entropy_s if norm else None,
208
+ "Effective States": eff_s,
209
+ "__attrs__": {
210
+ "nbseq": float(valid_s.iloc[0]) if len(valid_s)>0 else None,
211
+ "cpal": cpal,
212
+ "xtlab": times,
213
+ "xtstep": getattr(seqdata, "xtstep", None),
214
+ "tick_last": getattr(seqdata, "tick_last", None),
215
+ "weighted": weighted,
216
+ "norm": norm,
217
+ "summary": summary
218
+ }
219
+ }
220
+ return res