sequenzo 0.1.31__cp310-cp310-macosx_10_9_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (299) hide show
  1. _sequenzo_fastcluster.cpython-310-darwin.so +0 -0
  2. sequenzo/__init__.py +349 -0
  3. sequenzo/big_data/__init__.py +12 -0
  4. sequenzo/big_data/clara/__init__.py +26 -0
  5. sequenzo/big_data/clara/clara.py +476 -0
  6. sequenzo/big_data/clara/utils/__init__.py +27 -0
  7. sequenzo/big_data/clara/utils/aggregatecases.py +92 -0
  8. sequenzo/big_data/clara/utils/davies_bouldin.py +91 -0
  9. sequenzo/big_data/clara/utils/get_weighted_diss.cpython-310-darwin.so +0 -0
  10. sequenzo/big_data/clara/utils/wfcmdd.py +205 -0
  11. sequenzo/big_data/clara/visualization.py +88 -0
  12. sequenzo/clustering/KMedoids.py +178 -0
  13. sequenzo/clustering/__init__.py +30 -0
  14. sequenzo/clustering/clustering_c_code.cpython-310-darwin.so +0 -0
  15. sequenzo/clustering/hierarchical_clustering.py +1256 -0
  16. sequenzo/clustering/sequenzo_fastcluster/fastcluster.py +495 -0
  17. sequenzo/clustering/sequenzo_fastcluster/src/fastcluster.cpp +1877 -0
  18. sequenzo/clustering/sequenzo_fastcluster/src/fastcluster_python.cpp +1264 -0
  19. sequenzo/clustering/src/KMedoid.cpp +263 -0
  20. sequenzo/clustering/src/PAM.cpp +237 -0
  21. sequenzo/clustering/src/PAMonce.cpp +265 -0
  22. sequenzo/clustering/src/cluster_quality.cpp +496 -0
  23. sequenzo/clustering/src/cluster_quality.h +128 -0
  24. sequenzo/clustering/src/cluster_quality_backup.cpp +570 -0
  25. sequenzo/clustering/src/module.cpp +228 -0
  26. sequenzo/clustering/src/weightedinertia.cpp +111 -0
  27. sequenzo/clustering/utils/__init__.py +27 -0
  28. sequenzo/clustering/utils/disscenter.py +122 -0
  29. sequenzo/data_preprocessing/__init__.py +22 -0
  30. sequenzo/data_preprocessing/helpers.py +303 -0
  31. sequenzo/datasets/__init__.py +41 -0
  32. sequenzo/datasets/biofam.csv +2001 -0
  33. sequenzo/datasets/biofam_child_domain.csv +2001 -0
  34. sequenzo/datasets/biofam_left_domain.csv +2001 -0
  35. sequenzo/datasets/biofam_married_domain.csv +2001 -0
  36. sequenzo/datasets/chinese_colonial_territories.csv +12 -0
  37. sequenzo/datasets/country_co2_emissions.csv +194 -0
  38. sequenzo/datasets/country_co2_emissions_global_deciles.csv +195 -0
  39. sequenzo/datasets/country_co2_emissions_global_quintiles.csv +195 -0
  40. sequenzo/datasets/country_co2_emissions_local_deciles.csv +195 -0
  41. sequenzo/datasets/country_co2_emissions_local_quintiles.csv +195 -0
  42. sequenzo/datasets/country_gdp_per_capita.csv +194 -0
  43. sequenzo/datasets/dyadic_children.csv +61 -0
  44. sequenzo/datasets/dyadic_parents.csv +61 -0
  45. sequenzo/datasets/mvad.csv +713 -0
  46. sequenzo/datasets/pairfam_activity_by_month.csv +1028 -0
  47. sequenzo/datasets/pairfam_activity_by_year.csv +1028 -0
  48. sequenzo/datasets/pairfam_family_by_month.csv +1028 -0
  49. sequenzo/datasets/pairfam_family_by_year.csv +1028 -0
  50. sequenzo/datasets/political_science_aid_shock.csv +166 -0
  51. sequenzo/datasets/political_science_donor_fragmentation.csv +157 -0
  52. sequenzo/define_sequence_data.py +1400 -0
  53. sequenzo/dissimilarity_measures/__init__.py +31 -0
  54. sequenzo/dissimilarity_measures/c_code.cpython-310-darwin.so +0 -0
  55. sequenzo/dissimilarity_measures/get_distance_matrix.py +762 -0
  56. sequenzo/dissimilarity_measures/get_substitution_cost_matrix.py +246 -0
  57. sequenzo/dissimilarity_measures/src/DHDdistance.cpp +148 -0
  58. sequenzo/dissimilarity_measures/src/LCPdistance.cpp +114 -0
  59. sequenzo/dissimilarity_measures/src/LCPspellDistance.cpp +215 -0
  60. sequenzo/dissimilarity_measures/src/OMdistance.cpp +247 -0
  61. sequenzo/dissimilarity_measures/src/OMspellDistance.cpp +281 -0
  62. sequenzo/dissimilarity_measures/src/__init__.py +0 -0
  63. sequenzo/dissimilarity_measures/src/dist2matrix.cpp +63 -0
  64. sequenzo/dissimilarity_measures/src/dp_utils.h +160 -0
  65. sequenzo/dissimilarity_measures/src/module.cpp +40 -0
  66. sequenzo/dissimilarity_measures/src/setup.py +30 -0
  67. sequenzo/dissimilarity_measures/src/utils.h +25 -0
  68. sequenzo/dissimilarity_measures/src/xsimd/.github/cmake-test/main.cpp +6 -0
  69. sequenzo/dissimilarity_measures/src/xsimd/benchmark/main.cpp +159 -0
  70. sequenzo/dissimilarity_measures/src/xsimd/benchmark/xsimd_benchmark.hpp +565 -0
  71. sequenzo/dissimilarity_measures/src/xsimd/docs/source/conf.py +37 -0
  72. sequenzo/dissimilarity_measures/src/xsimd/examples/mandelbrot.cpp +330 -0
  73. sequenzo/dissimilarity_measures/src/xsimd/examples/pico_bench.hpp +246 -0
  74. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_arithmetic.hpp +266 -0
  75. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_complex.hpp +112 -0
  76. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_details.hpp +323 -0
  77. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_logical.hpp +218 -0
  78. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_math.hpp +2583 -0
  79. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_memory.hpp +880 -0
  80. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_rounding.hpp +72 -0
  81. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_swizzle.hpp +174 -0
  82. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_trigo.hpp +978 -0
  83. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx.hpp +1924 -0
  84. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx2.hpp +1144 -0
  85. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512bw.hpp +656 -0
  86. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512cd.hpp +28 -0
  87. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512dq.hpp +244 -0
  88. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512er.hpp +20 -0
  89. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512f.hpp +2650 -0
  90. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512ifma.hpp +20 -0
  91. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512pf.hpp +20 -0
  92. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vbmi.hpp +77 -0
  93. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vbmi2.hpp +131 -0
  94. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vnni_avx512bw.hpp +20 -0
  95. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vnni_avx512vbmi2.hpp +20 -0
  96. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avxvnni.hpp +20 -0
  97. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_common.hpp +24 -0
  98. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_common_fwd.hpp +77 -0
  99. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_constants.hpp +393 -0
  100. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_emulated.hpp +788 -0
  101. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma3_avx.hpp +93 -0
  102. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma3_avx2.hpp +46 -0
  103. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma3_sse.hpp +97 -0
  104. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma4.hpp +92 -0
  105. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_i8mm_neon64.hpp +17 -0
  106. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_isa.hpp +142 -0
  107. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_neon.hpp +3142 -0
  108. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_neon64.hpp +1543 -0
  109. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_rvv.hpp +1513 -0
  110. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_scalar.hpp +1260 -0
  111. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse2.hpp +2024 -0
  112. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse3.hpp +67 -0
  113. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse4_1.hpp +339 -0
  114. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse4_2.hpp +44 -0
  115. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_ssse3.hpp +186 -0
  116. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sve.hpp +1155 -0
  117. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_vsx.hpp +892 -0
  118. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_wasm.hpp +1780 -0
  119. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_arch.hpp +240 -0
  120. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_config.hpp +484 -0
  121. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_cpuid.hpp +269 -0
  122. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_inline.hpp +27 -0
  123. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/math/xsimd_rem_pio2.hpp +719 -0
  124. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/memory/xsimd_aligned_allocator.hpp +349 -0
  125. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/memory/xsimd_alignment.hpp +91 -0
  126. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_all_registers.hpp +55 -0
  127. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_api.hpp +2765 -0
  128. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx2_register.hpp +44 -0
  129. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512bw_register.hpp +51 -0
  130. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512cd_register.hpp +51 -0
  131. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512dq_register.hpp +51 -0
  132. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512er_register.hpp +51 -0
  133. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512f_register.hpp +77 -0
  134. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512ifma_register.hpp +51 -0
  135. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512pf_register.hpp +51 -0
  136. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512vbmi2_register.hpp +51 -0
  137. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512vbmi_register.hpp +51 -0
  138. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512vnni_avx512bw_register.hpp +54 -0
  139. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512vnni_avx512vbmi2_register.hpp +53 -0
  140. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx_register.hpp +64 -0
  141. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avxvnni_register.hpp +44 -0
  142. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_batch.hpp +1524 -0
  143. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_batch_constant.hpp +300 -0
  144. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_common_arch.hpp +47 -0
  145. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_emulated_register.hpp +80 -0
  146. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_fma3_avx2_register.hpp +50 -0
  147. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_fma3_avx_register.hpp +50 -0
  148. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_fma3_sse_register.hpp +50 -0
  149. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_fma4_register.hpp +50 -0
  150. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_i8mm_neon64_register.hpp +55 -0
  151. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_neon64_register.hpp +55 -0
  152. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_neon_register.hpp +154 -0
  153. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_register.hpp +94 -0
  154. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_rvv_register.hpp +506 -0
  155. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sse2_register.hpp +59 -0
  156. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sse3_register.hpp +49 -0
  157. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sse4_1_register.hpp +48 -0
  158. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sse4_2_register.hpp +48 -0
  159. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_ssse3_register.hpp +48 -0
  160. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sve_register.hpp +156 -0
  161. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_traits.hpp +337 -0
  162. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_utils.hpp +536 -0
  163. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_vsx_register.hpp +77 -0
  164. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_wasm_register.hpp +59 -0
  165. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/xsimd.hpp +75 -0
  166. sequenzo/dissimilarity_measures/src/xsimd/test/architectures/dummy.cpp +7 -0
  167. sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set.cpp +13 -0
  168. sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set_mean.cpp +24 -0
  169. sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set_mean_aligned.cpp +25 -0
  170. sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set_mean_arch_independent.cpp +28 -0
  171. sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set_mean_tag_dispatch.cpp +25 -0
  172. sequenzo/dissimilarity_measures/src/xsimd/test/doc/manipulating_abstract_batches.cpp +7 -0
  173. sequenzo/dissimilarity_measures/src/xsimd/test/doc/manipulating_parametric_batches.cpp +8 -0
  174. sequenzo/dissimilarity_measures/src/xsimd/test/doc/sum.hpp +31 -0
  175. sequenzo/dissimilarity_measures/src/xsimd/test/doc/sum_avx2.cpp +3 -0
  176. sequenzo/dissimilarity_measures/src/xsimd/test/doc/sum_sse2.cpp +3 -0
  177. sequenzo/dissimilarity_measures/src/xsimd/test/doc/writing_vectorized_code.cpp +11 -0
  178. sequenzo/dissimilarity_measures/src/xsimd/test/main.cpp +31 -0
  179. sequenzo/dissimilarity_measures/src/xsimd/test/test_api.cpp +230 -0
  180. sequenzo/dissimilarity_measures/src/xsimd/test/test_arch.cpp +217 -0
  181. sequenzo/dissimilarity_measures/src/xsimd/test/test_basic_math.cpp +183 -0
  182. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch.cpp +1049 -0
  183. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_bool.cpp +508 -0
  184. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_cast.cpp +409 -0
  185. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_complex.cpp +712 -0
  186. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_constant.cpp +286 -0
  187. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_float.cpp +141 -0
  188. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_int.cpp +365 -0
  189. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_manip.cpp +308 -0
  190. sequenzo/dissimilarity_measures/src/xsimd/test/test_bitwise_cast.cpp +222 -0
  191. sequenzo/dissimilarity_measures/src/xsimd/test/test_complex_exponential.cpp +226 -0
  192. sequenzo/dissimilarity_measures/src/xsimd/test/test_complex_hyperbolic.cpp +183 -0
  193. sequenzo/dissimilarity_measures/src/xsimd/test/test_complex_power.cpp +265 -0
  194. sequenzo/dissimilarity_measures/src/xsimd/test/test_complex_trigonometric.cpp +236 -0
  195. sequenzo/dissimilarity_measures/src/xsimd/test/test_conversion.cpp +248 -0
  196. sequenzo/dissimilarity_measures/src/xsimd/test/test_custom_default_arch.cpp +28 -0
  197. sequenzo/dissimilarity_measures/src/xsimd/test/test_error_gamma.cpp +170 -0
  198. sequenzo/dissimilarity_measures/src/xsimd/test/test_explicit_batch_instantiation.cpp +32 -0
  199. sequenzo/dissimilarity_measures/src/xsimd/test/test_exponential.cpp +202 -0
  200. sequenzo/dissimilarity_measures/src/xsimd/test/test_extract_pair.cpp +92 -0
  201. sequenzo/dissimilarity_measures/src/xsimd/test/test_fp_manipulation.cpp +77 -0
  202. sequenzo/dissimilarity_measures/src/xsimd/test/test_gnu_source.cpp +30 -0
  203. sequenzo/dissimilarity_measures/src/xsimd/test/test_hyperbolic.cpp +167 -0
  204. sequenzo/dissimilarity_measures/src/xsimd/test/test_load_store.cpp +304 -0
  205. sequenzo/dissimilarity_measures/src/xsimd/test/test_memory.cpp +61 -0
  206. sequenzo/dissimilarity_measures/src/xsimd/test/test_poly_evaluation.cpp +64 -0
  207. sequenzo/dissimilarity_measures/src/xsimd/test/test_power.cpp +184 -0
  208. sequenzo/dissimilarity_measures/src/xsimd/test/test_rounding.cpp +199 -0
  209. sequenzo/dissimilarity_measures/src/xsimd/test/test_select.cpp +101 -0
  210. sequenzo/dissimilarity_measures/src/xsimd/test/test_shuffle.cpp +760 -0
  211. sequenzo/dissimilarity_measures/src/xsimd/test/test_sum.cpp +4 -0
  212. sequenzo/dissimilarity_measures/src/xsimd/test/test_sum.hpp +34 -0
  213. sequenzo/dissimilarity_measures/src/xsimd/test/test_traits.cpp +172 -0
  214. sequenzo/dissimilarity_measures/src/xsimd/test/test_trigonometric.cpp +208 -0
  215. sequenzo/dissimilarity_measures/src/xsimd/test/test_utils.hpp +611 -0
  216. sequenzo/dissimilarity_measures/src/xsimd/test/test_wasm/test_wasm_playwright.py +123 -0
  217. sequenzo/dissimilarity_measures/src/xsimd/test/test_xsimd_api.cpp +1460 -0
  218. sequenzo/dissimilarity_measures/utils/__init__.py +16 -0
  219. sequenzo/dissimilarity_measures/utils/get_LCP_length_for_2_seq.py +44 -0
  220. sequenzo/dissimilarity_measures/utils/get_sm_trate_substitution_cost_matrix.cpython-310-darwin.so +0 -0
  221. sequenzo/dissimilarity_measures/utils/seqconc.cpython-310-darwin.so +0 -0
  222. sequenzo/dissimilarity_measures/utils/seqdss.cpython-310-darwin.so +0 -0
  223. sequenzo/dissimilarity_measures/utils/seqdur.cpython-310-darwin.so +0 -0
  224. sequenzo/dissimilarity_measures/utils/seqlength.cpython-310-darwin.so +0 -0
  225. sequenzo/multidomain/__init__.py +23 -0
  226. sequenzo/multidomain/association_between_domains.py +311 -0
  227. sequenzo/multidomain/cat.py +597 -0
  228. sequenzo/multidomain/combt.py +519 -0
  229. sequenzo/multidomain/dat.py +81 -0
  230. sequenzo/multidomain/idcd.py +139 -0
  231. sequenzo/multidomain/linked_polyad.py +292 -0
  232. sequenzo/openmp_setup.py +233 -0
  233. sequenzo/prefix_tree/__init__.py +62 -0
  234. sequenzo/prefix_tree/hub.py +114 -0
  235. sequenzo/prefix_tree/individual_level_indicators.py +1321 -0
  236. sequenzo/prefix_tree/spell_individual_level_indicators.py +580 -0
  237. sequenzo/prefix_tree/spell_level_indicators.py +297 -0
  238. sequenzo/prefix_tree/system_level_indicators.py +544 -0
  239. sequenzo/prefix_tree/utils.py +54 -0
  240. sequenzo/seqhmm/__init__.py +95 -0
  241. sequenzo/seqhmm/advanced_optimization.py +305 -0
  242. sequenzo/seqhmm/bootstrap.py +411 -0
  243. sequenzo/seqhmm/build_hmm.py +142 -0
  244. sequenzo/seqhmm/build_mhmm.py +136 -0
  245. sequenzo/seqhmm/build_nhmm.py +121 -0
  246. sequenzo/seqhmm/fit_mhmm.py +62 -0
  247. sequenzo/seqhmm/fit_model.py +61 -0
  248. sequenzo/seqhmm/fit_nhmm.py +76 -0
  249. sequenzo/seqhmm/formulas.py +289 -0
  250. sequenzo/seqhmm/forward_backward_nhmm.py +276 -0
  251. sequenzo/seqhmm/gradients_nhmm.py +306 -0
  252. sequenzo/seqhmm/hmm.py +291 -0
  253. sequenzo/seqhmm/mhmm.py +314 -0
  254. sequenzo/seqhmm/model_comparison.py +238 -0
  255. sequenzo/seqhmm/multichannel_em.py +282 -0
  256. sequenzo/seqhmm/multichannel_utils.py +138 -0
  257. sequenzo/seqhmm/nhmm.py +270 -0
  258. sequenzo/seqhmm/nhmm_utils.py +191 -0
  259. sequenzo/seqhmm/predict.py +137 -0
  260. sequenzo/seqhmm/predict_mhmm.py +142 -0
  261. sequenzo/seqhmm/simulate.py +878 -0
  262. sequenzo/seqhmm/utils.py +218 -0
  263. sequenzo/seqhmm/visualization.py +910 -0
  264. sequenzo/sequence_characteristics/__init__.py +40 -0
  265. sequenzo/sequence_characteristics/complexity_index.py +49 -0
  266. sequenzo/sequence_characteristics/overall_cross_sectional_entropy.py +220 -0
  267. sequenzo/sequence_characteristics/plot_characteristics.py +593 -0
  268. sequenzo/sequence_characteristics/simple_characteristics.py +311 -0
  269. sequenzo/sequence_characteristics/state_frequencies_and_entropy_per_sequence.py +39 -0
  270. sequenzo/sequence_characteristics/turbulence.py +155 -0
  271. sequenzo/sequence_characteristics/variance_of_spell_durations.py +86 -0
  272. sequenzo/sequence_characteristics/within_sequence_entropy.py +43 -0
  273. sequenzo/suffix_tree/__init__.py +66 -0
  274. sequenzo/suffix_tree/hub.py +114 -0
  275. sequenzo/suffix_tree/individual_level_indicators.py +1679 -0
  276. sequenzo/suffix_tree/spell_individual_level_indicators.py +493 -0
  277. sequenzo/suffix_tree/spell_level_indicators.py +248 -0
  278. sequenzo/suffix_tree/system_level_indicators.py +535 -0
  279. sequenzo/suffix_tree/utils.py +56 -0
  280. sequenzo/version_check.py +283 -0
  281. sequenzo/visualization/__init__.py +29 -0
  282. sequenzo/visualization/plot_mean_time.py +222 -0
  283. sequenzo/visualization/plot_modal_state.py +276 -0
  284. sequenzo/visualization/plot_most_frequent_sequences.py +147 -0
  285. sequenzo/visualization/plot_relative_frequency.py +405 -0
  286. sequenzo/visualization/plot_sequence_index.py +1175 -0
  287. sequenzo/visualization/plot_single_medoid.py +153 -0
  288. sequenzo/visualization/plot_state_distribution.py +651 -0
  289. sequenzo/visualization/plot_transition_matrix.py +190 -0
  290. sequenzo/visualization/utils/__init__.py +23 -0
  291. sequenzo/visualization/utils/utils.py +310 -0
  292. sequenzo/with_event_history_analysis/__init__.py +35 -0
  293. sequenzo/with_event_history_analysis/sequence_analysis_multi_state_model.py +850 -0
  294. sequenzo/with_event_history_analysis/sequence_history_analysis.py +283 -0
  295. sequenzo-0.1.31.dist-info/METADATA +286 -0
  296. sequenzo-0.1.31.dist-info/RECORD +299 -0
  297. sequenzo-0.1.31.dist-info/WHEEL +5 -0
  298. sequenzo-0.1.31.dist-info/licenses/LICENSE +28 -0
  299. sequenzo-0.1.31.dist-info/top_level.txt +2 -0
@@ -0,0 +1,283 @@
1
+ """
2
+ @Author : Yuqi Liang 撁彧η₯Ί
3
+ @File : sequence_history_analysis.py
4
+ @Time : 30/09/2025 21:08
5
+ @Desc : Sequence History Analysis - Convert person-level sequence data to person-period format
6
+ """
7
+
8
+ import numpy as np
9
+ import pandas as pd
10
+
11
+
12
+ def person_level_to_person_period(data, id_col="id", period_col="time", event_col="event"):
13
+ """
14
+ Convert person-level data to person-period format.
15
+
16
+ This function expands each person's single row into multiple rows,
17
+ one for each time period they are observed.
18
+
19
+ Parameters
20
+ ----------
21
+ data : pandas.DataFrame
22
+ Input data with one row per person
23
+ id_col : str, optional
24
+ Name of the ID column (default: "id")
25
+ period_col : str, optional
26
+ Name of the time period column (default: "time")
27
+ event_col : str, optional
28
+ Name of the event indicator column (default: "event")
29
+
30
+ Returns
31
+ -------
32
+ pandas.DataFrame
33
+ Expanded data with one row per person-period
34
+
35
+ Examples
36
+ --------
37
+ >>> data = pd.DataFrame({'id': [1, 2], 'time': [3, 2], 'event': [True, False]})
38
+ >>> person_level_to_person_period(data)
39
+ id time event
40
+ 0 1 1 False
41
+ 1 1 2 False
42
+ 2 1 3 True
43
+ 3 2 1 False
44
+ 4 2 2 False
45
+ """
46
+ # Check for missing values in critical columns
47
+ if data[[id_col, period_col, event_col]].isna().any().any():
48
+ raise ValueError("Cannot handle missing data in the time or event variables")
49
+
50
+ # Create an index that repeats each row based on the time value
51
+ # For example, if time=3, that row will be repeated 3 times
52
+ index = np.repeat(np.arange(len(data)), data[period_col].values)
53
+
54
+ # Find the cumulative sum to identify which rows should have the event
55
+ idmax = np.cumsum(data[period_col].values) - 1
56
+
57
+ # Expand the data by repeating rows
58
+ dat = data.iloc[index].copy()
59
+ dat.reset_index(drop=True, inplace=True)
60
+
61
+ # Create sequential time periods for each ID (1, 2, 3, ...)
62
+ dat[period_col] = dat.groupby(id_col).cumcount() + 1
63
+
64
+ # Set all events to False initially
65
+ dat[event_col] = False
66
+
67
+ # Set events to True only at the final period for each person
68
+ # Convert to bool to avoid dtype incompatibility warning
69
+ dat.loc[idmax, event_col] = data[event_col].values.astype(bool)
70
+
71
+ return dat
72
+
73
+
74
+ def _extract_sequence_dataframe(seqdata):
75
+ """
76
+ Extract sequence DataFrame from various input types.
77
+
78
+ Parameters
79
+ ----------
80
+ seqdata : SequenceData, pandas.DataFrame, or numpy.ndarray
81
+ Input sequence data
82
+
83
+ Returns
84
+ -------
85
+ pandas.DataFrame
86
+ Sequence data as a DataFrame
87
+ """
88
+ # Check if input is a SequenceData object
89
+ if hasattr(seqdata, 'seqdata'):
90
+ # This is a SequenceData object
91
+ return seqdata.seqdata.copy()
92
+ elif isinstance(seqdata, pd.DataFrame):
93
+ return seqdata.copy()
94
+ else:
95
+ # Assume it's array-like
96
+ return pd.DataFrame(seqdata)
97
+
98
+
99
+ def seqsha(seqdata, time, event, include_present=False, align_end=False, covar=None):
100
+ """
101
+ Sequence History Analysis: Create person-period format with sequence history.
102
+
103
+ This function converts sequence data into a person-period format where each
104
+ row represents a time point for a person, with columns showing their sequence
105
+ history up to that point.
106
+
107
+ Parameters
108
+ ----------
109
+ seqdata : SequenceData, pandas.DataFrame, or numpy.ndarray
110
+ Sequence data where each row is a person and each column is a time point.
111
+ Can be a SequenceData object, DataFrame, or array.
112
+ time : array-like
113
+ Duration or time until event for each person. Length should equal the
114
+ number of sequences. Each value indicates how many time periods that
115
+ person is observed. For example, if all persons are observed for the
116
+ full sequence length, use: np.full(n_persons, sequence_length)
117
+ event : array-like
118
+ Event indicator for each person (True/False or 1/0). Length should
119
+ equal the number of sequences.
120
+ include_present : bool, optional
121
+ If True, include the current time point in the history (default: False)
122
+ If False, only include past time points (recommended for most analyses)
123
+ align_end : bool, optional
124
+ If True, align sequences from the end (right-aligned) (default: False)
125
+ If False, align sequences from the start (left-aligned)
126
+ covar : pandas.DataFrame or numpy.ndarray, optional
127
+ Additional covariates to merge with the output (default: None)
128
+ Should have the same number of rows as seqdata
129
+
130
+ Returns
131
+ -------
132
+ pandas.DataFrame
133
+ Person-period data with the following columns:
134
+ - id: Person identifier
135
+ - time: Time period within person
136
+ - event: Event indicator (True only at the final period for each person)
137
+ - Sequence history columns (varies based on align_end parameter)
138
+ - Additional covariate columns (if covar is provided)
139
+
140
+ Raises
141
+ ------
142
+ ValueError
143
+ If maximum time exceeds the length of the longest sequence
144
+
145
+ Examples
146
+ --------
147
+ Example 1: Basic usage with DataFrame
148
+ >>> import pandas as pd
149
+ >>> import numpy as np
150
+ >>> seqdata = pd.DataFrame([[1, 2, 3, 4], [1, 1, 2, 2]])
151
+ >>> time = np.array([3, 2])
152
+ >>> event = np.array([True, False])
153
+ >>> result = seqsha(seqdata, time, event)
154
+
155
+ Example 2: Usage with SequenceData object (recommended)
156
+ >>> from sequenzo import SequenceData, load_dataset
157
+ >>> df = load_dataset('pairfam_family')
158
+ >>> time_cols = [str(i) for i in range(1, 265)]
159
+ >>> seq_data = SequenceData(df, time=time_cols, id_col='id',
160
+ ... states=list(range(1, 10)))
161
+ >>> # All persons observed for 264 months
162
+ >>> time = np.full(len(df), 264)
163
+ >>> event = df['highschool'].values
164
+ >>> result = seqsha(seq_data, time, event)
165
+
166
+ Example 3: With covariates
167
+ >>> covar = df[['sex', 'yeduc', 'east']]
168
+ >>> result = seqsha(seq_data, time, event, covar=covar)
169
+
170
+ Example 4: Right-aligned sequences
171
+ >>> result = seqsha(seq_data, time, event, align_end=True)
172
+
173
+ Notes
174
+ -----
175
+ - The time parameter represents observation duration, not calendar time
176
+ - When include_present=False (default), only past states are included
177
+ - Use align_end=True when analyzing sequences leading up to an event
178
+ - Missing values in the original sequence are converted to "NA_orig"
179
+ """
180
+ # Extract sequence DataFrame from input (handles SequenceData, DataFrame, or array)
181
+ seq_df = _extract_sequence_dataframe(seqdata)
182
+
183
+ # Convert time and event to numpy arrays for consistency
184
+ time_array = np.asarray(time)
185
+ event_array = np.asarray(event)
186
+
187
+ # Check that dimensions match
188
+ n_sequences = len(seq_df)
189
+ if len(time_array) != n_sequences:
190
+ raise ValueError(
191
+ f"Length of 'time' ({len(time_array)}) must match number of sequences ({n_sequences})"
192
+ )
193
+ if len(event_array) != n_sequences:
194
+ raise ValueError(
195
+ f"Length of 'event' ({len(event_array)}) must match number of sequences ({n_sequences})"
196
+ )
197
+
198
+ # Create base time data: one row per person with their time and event
199
+ basetime = pd.DataFrame({
200
+ 'id': np.arange(1, n_sequences + 1),
201
+ 'time': time_array,
202
+ 'event': event_array
203
+ })
204
+
205
+ # Convert to person-period format (expand rows)
206
+ persper = person_level_to_person_period(basetime, "id", "time", "event")
207
+
208
+ # Convert sequence data to matrix and handle missing values
209
+ sdata = seq_df.values.astype(str)
210
+ sdata[pd.isna(seq_df.values)] = "NA_orig"
211
+
212
+ # Get the time periods for each row in person-period data
213
+ age = persper['time'].values
214
+ ma = int(np.max(age))
215
+
216
+ # Check if time values are valid
217
+ if ma > seq_df.shape[1]:
218
+ raise ValueError("Maximum time of event occurrence is higher than the longest sequence!")
219
+
220
+ # Create empty matrix to store past sequence states
221
+ past = np.full((len(persper), seq_df.shape[1]), np.nan, dtype=object)
222
+
223
+ if align_end:
224
+ # Right-align the sequences (align from the end)
225
+ start = 1 if include_present else 2
226
+
227
+ for aa in range(start, ma + 1):
228
+ # Find rows where time equals aa
229
+ cond = age == aa
230
+ # Get the person IDs for these rows
231
+ ids_a = persper.loc[cond, 'id'].values - 1 # Subtract 1 for 0-based indexing
232
+
233
+ if include_present:
234
+ # Include current time point: fill from (ncol-aa) to end
235
+ past[cond, (seq_df.shape[1] - aa):seq_df.shape[1]] = sdata[ids_a, 0:aa]
236
+ else:
237
+ # Exclude current time point: fill from (ncol-aa+1) to end
238
+ past[cond, (seq_df.shape[1] - aa + 1):seq_df.shape[1]] = sdata[ids_a, 0:(aa - 1)]
239
+
240
+ # Create column names counting backwards
241
+ col_names = [f"Tm{i}" for i in range(seq_df.shape[1], 0, -1)]
242
+ else:
243
+ # Left-align the sequences (align from the start)
244
+ for aa in range(1, ma + 1):
245
+ if include_present:
246
+ # Include present: use time > aa
247
+ cond = age > aa
248
+ else:
249
+ # Exclude present: use time >= aa
250
+ cond = age >= aa
251
+
252
+ # Get the person IDs for these rows
253
+ ids_a = persper.loc[cond, 'id'].values - 1 # Subtract 1 for 0-based indexing
254
+
255
+ # Fill in the sequence state at position aa-1 (0-based)
256
+ past[cond, aa - 1] = sdata[ids_a, aa - 1]
257
+
258
+ # Use original column names or create default ones
259
+ if seq_df.columns is not None and len(seq_df.columns) > 0:
260
+ col_names = [str(col) for col in seq_df.columns[:ma]]
261
+ # Pad with additional column names if needed
262
+ col_names += [f"col_{i}" for i in range(ma, seq_df.shape[1])]
263
+ else:
264
+ col_names = [f"col_{i}" for i in range(seq_df.shape[1])]
265
+
266
+ # Convert past matrix to DataFrame
267
+ past_df = pd.DataFrame(past, columns=col_names)
268
+
269
+ # Combine person-period data with sequence history
270
+ alldata = pd.concat([persper.reset_index(drop=True), past_df], axis=1)
271
+
272
+ # Add covariates if provided
273
+ if covar is not None:
274
+ # Merge covariates based on the ID (subtract 1 for 0-based indexing)
275
+ if isinstance(covar, pd.DataFrame):
276
+ covar_subset = covar.iloc[alldata['id'].values - 1].reset_index(drop=True)
277
+ alldata = pd.concat([alldata, covar_subset], axis=1)
278
+ else:
279
+ covar_array = np.array(covar)
280
+ covar_subset = covar_array[alldata['id'].values - 1]
281
+ alldata = pd.concat([alldata, pd.DataFrame(covar_subset)], axis=1)
282
+
283
+ return alldata
@@ -0,0 +1,286 @@
1
+ Metadata-Version: 2.4
2
+ Name: sequenzo
3
+ Version: 0.1.31
4
+ Summary: A fast, scalable and intuitive Python package for social sequence analysis.
5
+ Author-email: Yuqi Liang <yuqi.liang.1900@gmail.com>, Xinyi Li <1836724126@qq.com>, Jan Heinrich Ernst Meyerhoff-Liang <jan.meyerhoff1@gmail.com>
6
+ License: BSD 3-Clause License
7
+
8
+ Copyright (c) 2025, Yuqi Liang
9
+
10
+ Redistribution and use in source and binary forms, with or without
11
+ modification, are permitted provided that the following conditions are met:
12
+
13
+ 1. Redistributions of source code must retain the above copyright notice, this
14
+ list of conditions and the following disclaimer.
15
+
16
+ 2. Redistributions in binary form must reproduce the above copyright notice,
17
+ this list of conditions and the following disclaimer in the documentation
18
+ and/or other materials provided with the distribution.
19
+
20
+ 3. Neither the name of the copyright holder nor the names of its
21
+ contributors may be used to endorse or promote products derived from
22
+ this software without specific prior written permission.
23
+
24
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
27
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
28
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
31
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
32
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
33
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34
+
35
+ Project-URL: Homepage, https://github.com/Liang-Team/Sequenzo
36
+ Project-URL: Documentation, https://sequenzo.yuqi-liang.tech
37
+ Classifier: Development Status :: 3 - Alpha
38
+ Classifier: Intended Audience :: Science/Research
39
+ Classifier: Intended Audience :: Developers
40
+ Classifier: Programming Language :: Python :: 3.9
41
+ Classifier: Programming Language :: Python :: 3.10
42
+ Classifier: Programming Language :: Python :: 3.11
43
+ Classifier: Programming Language :: Python :: 3.12
44
+ Requires-Python: <3.13,>=3.9
45
+ Description-Content-Type: text/markdown
46
+ License-File: LICENSE
47
+ Requires-Dist: numpy>=1.21.0
48
+ Requires-Dist: pandas>=1.2.5
49
+ Requires-Dist: matplotlib>=3.4.3
50
+ Requires-Dist: seaborn>=0.11.2
51
+ Requires-Dist: Pillow>=8.3.2
52
+ Requires-Dist: pybind11>=2.6.0
53
+ Requires-Dist: cython>=0.29.21
54
+ Requires-Dist: scipy<1.17,>=1.6.3
55
+ Requires-Dist: scikit-learn>=0.24.2
56
+ Requires-Dist: joblib>=1.0.1
57
+ Requires-Dist: docutils>=0.17
58
+ Requires-Dist: tqdm<5.0.0,>=4.62.3
59
+ Requires-Dist: missingno<0.6.0,>=0.5.2
60
+ Requires-Dist: cffi>=1.15.0
61
+ Requires-Dist: hmmlearn>=0.2.0
62
+ Provides-Extra: dev
63
+ Requires-Dist: pytest>=6.2.5; extra == "dev"
64
+ Requires-Dist: flake8>=3.9.2; extra == "dev"
65
+ Dynamic: license-file
66
+
67
+ <p align="center">
68
+ <img src="https://raw.githubusercontent.com/Liang-Team/Sequenzo/main/assets/logo/FullLogo_NoBuffer.jpg" alt="Sequenzo Logo" width="300">
69
+ </p>
70
+
71
+ <p align="center">
72
+ <!-- βœ… PyPI Latest Version Badge -->
73
+ <a href="https://pypi.org/project/sequenzo/">
74
+ <img alt="PyPI - Version" src="https://img.shields.io/pypi/v/sequenzo?color=blue">
75
+ </a>
76
+
77
+ <!-- πŸ“¦ Downloads Badge (可选) -->
78
+ <a href="https://pypi.org/project/sequenzo/">
79
+ <img alt="Downloads" src="https://static.pepy.tech/badge/sequenzo">
80
+ </a>
81
+
82
+ <!-- πŸ“„ License Badge -->
83
+ <a href="https://github.com/Liang-Team/Sequenzo/blob/main/LICENSE">
84
+ <img alt="License" src="https://img.shields.io/github/license/Liang-Team/Sequenzo">
85
+ </a>
86
+ </p>
87
+
88
+ # Sequenzo: Fast, scalable, and intuitive social sequence analysis in Python
89
+
90
+ Sequenzo is a high-performance Python package designed for social sequence analysis.
91
+
92
+ Its methodological foundations build upon prior work in the R ecosystem, especially
93
+ [the TraMineR (Gabadinho et al. 2011) R package](https://traminer.unige.ch/), which
94
+ established many of the core concepts, representations, and analytical tools in
95
+ sequence analysis, as well as subsequent methodological extensions developed within
96
+ [TraMineRextras (Gabadinho et al. 2021)](https://cran.r-project.org/web/packages/TraMineRextras/index.html).
97
+
98
+ Sequenzo also draws inspiration from
99
+ [WeightedCluster (Studer, 2013)](https://mephisto.unige.ch/weightedcluster/) for
100
+ typology construction,
101
+ [seqHMM (Helske & Helske, 2019)](https://www.jstatsoft.org/article/view/v088i03) for
102
+ probabilistic modeling using hidden Markov models,
103
+ and [ggseqplot (Raab, 2022)](https://maraab23.github.io/ggseqplot/) for the design of
104
+ relative frequency sequence visualizations (Fasang & Liao, 2013).
105
+
106
+ We gratefully acknowledge these pioneering contributions and the broader community in social sequence analysis whose work enables everything we do. Sequenzo seeks to expand the social sequence analysis ecosystem by connecting long-standing methodological traditions with the computational practices of the Python data science community, particularly in machine learning and deep learning.
107
+
108
+ ---
109
+
110
+ Sequenzo is built to analyze **any sequence of categorical events**, from individual career paths and migration patterns to corporate growth and urban development.
111
+ Whether you are working with **people, places, or policies**, Sequenzo helps uncover meaningful patterns efficiently.
112
+
113
+ Sequenzo outperforms traditional R-based tools in social sequence analysis, delivering faster processing and superior efficiency, especially for large-scale datasets. **No big data? No problem. You don’t need big data to benefit as Sequenzo is designed to enhance sequence analysis at any scale, making complex methods accessible to everyone.**
114
+
115
+ > πŸš€ **Explore the official documentation at [sequenzo.yuqi-liang.tech](https://sequenzo.yuqi-liang.tech/en/)**
116
+ > with tutorials, practical examples, and API references to help you get started quickly.
117
+ >
118
+ > πŸ“– Available in **English and Chinese**, our docs are written to be approachable, practical, and easy to follow.
119
+ >
120
+ > ✍️ Regarding **how to cite us**, please see [here](https://sequenzo.yuqi-liang.tech/en/basics/how-to-cite).
121
+
122
+ ## ✨ Be part of the Sequenzo community
123
+ Join our Discord channel to iscuss ideas, get help, and hear about upcoming Sequenzo versions, tutorials, and workshops first.
124
+
125
+ ➑️ https://discord.gg/RHfDuu4ETz
126
+
127
+ ## Target Users
128
+
129
+ Sequenzo is designed for:
130
+
131
+ - Quantitative researchers in sociology, demography, political science, economics, management, etc.
132
+ - Data scientists, data analysts, and business analysts working on trajectory/time-series clustering
133
+ - Educators teaching courses involving social sequence data
134
+ - Users familiar with R packages such as `TraMineR` who want a Python-native alternative
135
+
136
+ ## Why Choose Sequenzo?
137
+
138
+ πŸš€ **High Performance**
139
+
140
+ Leverages Python’s computational power to achieve 8Γ— faster processing than traditional R-based tools like TraMineR.
141
+
142
+ 🎯 **Easy-to-Use API**
143
+
144
+ Designed with simplicity in mind: intuitive functions streamline complex sequence analysis without compromising flexibility.
145
+
146
+ 🌍 **Flexible for Any Scenario**
147
+
148
+ Perfect for research, policy, and business, enabling seamless analysis of categorical data and its evolution over time.
149
+
150
+ ## Platform Compatibility
151
+
152
+ Sequenzo provides pre-built Python wheels for maximum compatibility β€” no need to compile from source.
153
+
154
+ | Platform | Architecture | Python Versions | Status |
155
+ |------------------|--------------------------------|-----------------------|-------------------|
156
+ | **macOS** | Intel && Apple Silicon (64-bit) | 3.9, 3.10, 3.11, 3.12 | βœ… Pre-built wheel |
157
+ | **Windows** | `AMD64`(64-bit) | 3.9, 3.10, 3.11, 3.12 | βœ… Pre-built wheel |
158
+ | **Linux (glibc)**| `x86_64` (standard Linux) | 3.9, 3.10, 3.11, 3.12 | βœ… Pre-built wheel |
159
+ | **Linux (musl)** | `x86_64` (Alpine Linux) | 3.9, 3.10, 3.11, 3.12 | βœ… Pre-built wheel |
160
+
161
+
162
+ What do these terms mean?
163
+ - **macosx_arm64 (macOS)**: One wheel supports Apple Silicon Macs.
164
+ - **macosx_x86_64 (macOS)**: One wheel supports Intel Macs.
165
+ - **manylinux2014_x86_64 (glibc-based Linux)**: Compatible with most mainstream Linux distributions (e.g., Ubuntu, Debian, CentOS).
166
+ - **musllinux_1_2 (musl-based Linux)**: For lightweight Alpine Linux environments, common in Docker containers.
167
+ - **AMD64 (Windows)**: Standard 64-bit Windows system architecture.
168
+
169
+ All of these wheels are pre-built and available on PyPI β€” so `pip install sequenzo` should work on supported platforms, without needing a compiler.
170
+
171
+ **Windows (win32)** and **Linux (i686)** are dropped due to:
172
+
173
+ - Extremely low usage in modern systems (post-2020)
174
+ - Memory limitations (≀ 4GB) unsuitable for scientific computing workloads
175
+ - Increasing incompatibility with packages such as `numpy`, `scipy`, and `pybind11`
176
+ - Frequent build failures and maintenance overhead in CI/CD pipelines
177
+
178
+
179
+ ## Installation
180
+
181
+ If you haven't installed Python, please follow [Yuqi's tutorial about how to set up Python and your virtual environment](https://www.yuqi-liang.tech/blog/setup-python-virtual-environment/).
182
+
183
+ Once Python is installed, we highly recommend using [PyCharm](https://www.jetbrains.com/pycharm/download/) as your IDE (Integrated Development Environment β€” the place where you open your folder and files to work with Python), rather than Visual Studio. PyCharm has excellent built-in support for managing virtual environments, making your workflow much easier and more reliable.
184
+
185
+ In PyCharm, please make sure to select a virtual environment using Python 3.9, 3.10, or 3.11 as these versions are fully supported by `sequenzo`.
186
+
187
+ Then, you can open the built-in terminal by clicking the Terminal icon
188
+ <img src="https://github.com/user-attachments/assets/1e9e3af0-4286-47ba-aa88-29c3288cb7cb" alt="terminal icon" width="30" style="display:inline; vertical-align:middle;">
189
+ in the left sidebar (usually near the bottom). It looks like a small command-line window icon.
190
+
191
+ Once it’s open, type the following to install `sequenzo`:
192
+
193
+ ```
194
+ pip install sequenzo
195
+ ```
196
+
197
+ If you have some issues with the installation, it might because you have both Python 2 and Python 3 installed on your computer. In this case, you can try to use `pip3` instead of `pip` to install the package.
198
+
199
+ ```
200
+ pip3 install sequenzo
201
+ ```
202
+
203
+ ## Documentation
204
+
205
+ Explore the full Sequenzo documentation [here](sequenzo.yuqi-liang.tech). Even though the documentation website is still under construction, you can already find some useful information there.
206
+
207
+ **Where to start on the documentation website?**
208
+ * New to Sequenzo or social sequence analysis? Begin with "About Sequenzo" β†’ "Quickstart Guide" for a smooth introduction.
209
+ * Got your own data? After going through "About Sequenzo" and "Quickstart Guide", you are ready to dive in and start analyzing.
210
+ * Looking for more? Check out our example datasets and tutorials to deepen your understanding.
211
+
212
+ For Chinese users, additional tutorials are available on [Yuqi's video tutorials on Bilibili](https://space.bilibili.com/263594713/lists/4147974).
213
+
214
+ ## Join the Community
215
+
216
+ πŸ’¬ **Have a question or found a bug?**
217
+
218
+ Please submit an issue on [GitHub Issues](https://github.com/Liang-Team/Sequenzo/issues) by following [this instruction](https://sequenzo.yuqi-liang.tech/en/faq/bug_reports_and_feature_requests).
219
+
220
+ * We will respond as quickly as possible.
221
+ * For requests that are not too large, we aim to fix or implement the feature **within one week** from our response time.
222
+ * Timeline may vary depending on how many requests we receive.
223
+
224
+ 🌟 **Enjoying Sequenzo?**
225
+
226
+ Support the project by starring ⭐ the GitHub repo and spreading the word!
227
+
228
+ πŸ›  **Interested in contributing?**
229
+
230
+ Check out our [contribution guide]() for more details (work in progress).
231
+
232
+ * Write code? Submit a pull request to enhance Sequenzo.
233
+ * Testing? Try Sequenzo and share your feedback. Every suggestion counts!
234
+
235
+ If you're contributing or debugging, use:
236
+
237
+ ```bash
238
+ pip install -r requirements/requirements-3.10.txt # Or matching your Python version
239
+ ```
240
+
241
+ For standard installation, use:
242
+
243
+ ```bash
244
+ pip install . # Uses pyproject.toml
245
+ ```
246
+
247
+ ## Team
248
+
249
+ **Paper Authors**
250
+ * [Yuqi Liang, University of Oxford](https://www.yuqi-liang.tech/)
251
+ * [Xinyi Li, Northeastern University](https://github.com/Fantasy201)
252
+ * [Jan Heinrich Ernst Meyerhoff-Liang, Institute for New Economic Thinking Oxford](https://www.linkedin.com/in/jan-meyerhoff-liang-97999a170/)
253
+
254
+ **Package Contributors**
255
+
256
+ Coding contributors:
257
+ * [Sebastian Daza](https://sdaza.com/)
258
+ * [Cheng Deng](https://github.com/de-de-de-de-de)
259
+ * [Liangxingyun He, Stockholm School of Economics, Sweden](https://www.linkedin.com/in/liangxingyun-he-6aa128304/)
260
+
261
+ Documentation contributors:
262
+ * [Liangxingyun He, Stockholm School of Economics, Sweden](https://www.linkedin.com/in/liangxingyun-he-6aa128304/)
263
+ * [Yukun Ming, Universidad Carlos III de Madrid (Spain)](https://www.linkedin.com/in/yukun)
264
+ * [Sizhu Qu, Northeastern University (US)](https://www.linkedin.com/in/sizhuq)
265
+ * [Ziting Yang, Rochester Wniversity (US)](https://www.linkedin.com/in/ziting-yang-7b33832bb)
266
+
267
+ Others
268
+ * With special thanks to our initial testers (alphabetically ordered): [Joji Chia](https://sociology.illinois.edu/directory/profile/jbchia2), [Kass Gonzalez](https://www.linkedin.com/in/kass-gonzalez-72a778276/), [Sinyee Lu](https://sociology.illinois.edu/directory/profile/qianyil4), [Sohee Shin](https://sociology.illinois.edu/directory/profile/sohees2)
269
+ * Website and related technical support: [Mactavish](https://github.com/mactavishz)
270
+ * Sequence data sources compilation - History: Jingrui Chen
271
+ * Visual design consultant: Changyu Yi
272
+
273
+ **Acknowledgements**
274
+
275
+ * Amazing authors of R packages in soial sequence analysis, including [TraMineR (Gabadinho et al. 2011)](https://traminer.unige.ch/), [WeightedCluster (Studer, 2013)](https://mephisto.unige.ch/weightedcluster/), and [seqHMM (Helske & Helske, 2019)](https://cran.r-project.org/web/packages/seqHMM/index.html)
276
+ * Methodological advisor in sequence analysis: [Professor Tim Liao (University of Illinois Urbana-Champaign)](https://sociology.illinois.edu/directory/profile/tfliao)
277
+ * [Social Sequence Analysis Association](https://sequenceanalysis.org/)
278
+ * Helpful discussions and comments:
279
+ * [Gilbert Ritschard](https://mephisto.unige.ch/Gilbert/)
280
+ * [Matthias Studer](https://www.unige.ch/sciences-societe/ideso/membres/matthias-studer)
281
+ * [Emanuela Struffolino](https://emastruffolino.github.io/)
282
+ * [Marcel Raab](https://marcelraab.de/)
283
+ * [Heyi Zhang](https://profiles.ucl.ac.uk/100967-heyi-zhang)
284
+ * Yuqi's PhD advisor [Professor Ridhi Kashyap (University of Oxford)](https://www.nuffield.ox.ac.uk/people/profiles/ridhi-kashyap/), and mentor [Charles Rahal (University of Oxford)](https://crahal.com/)
285
+ * Yuqi's original programming mentor: [JiangHuShiNian](https://github.com/jianghushinian)
286
+