sequenzo 0.1.31__cp310-cp310-macosx_10_9_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (299) hide show
  1. _sequenzo_fastcluster.cpython-310-darwin.so +0 -0
  2. sequenzo/__init__.py +349 -0
  3. sequenzo/big_data/__init__.py +12 -0
  4. sequenzo/big_data/clara/__init__.py +26 -0
  5. sequenzo/big_data/clara/clara.py +476 -0
  6. sequenzo/big_data/clara/utils/__init__.py +27 -0
  7. sequenzo/big_data/clara/utils/aggregatecases.py +92 -0
  8. sequenzo/big_data/clara/utils/davies_bouldin.py +91 -0
  9. sequenzo/big_data/clara/utils/get_weighted_diss.cpython-310-darwin.so +0 -0
  10. sequenzo/big_data/clara/utils/wfcmdd.py +205 -0
  11. sequenzo/big_data/clara/visualization.py +88 -0
  12. sequenzo/clustering/KMedoids.py +178 -0
  13. sequenzo/clustering/__init__.py +30 -0
  14. sequenzo/clustering/clustering_c_code.cpython-310-darwin.so +0 -0
  15. sequenzo/clustering/hierarchical_clustering.py +1256 -0
  16. sequenzo/clustering/sequenzo_fastcluster/fastcluster.py +495 -0
  17. sequenzo/clustering/sequenzo_fastcluster/src/fastcluster.cpp +1877 -0
  18. sequenzo/clustering/sequenzo_fastcluster/src/fastcluster_python.cpp +1264 -0
  19. sequenzo/clustering/src/KMedoid.cpp +263 -0
  20. sequenzo/clustering/src/PAM.cpp +237 -0
  21. sequenzo/clustering/src/PAMonce.cpp +265 -0
  22. sequenzo/clustering/src/cluster_quality.cpp +496 -0
  23. sequenzo/clustering/src/cluster_quality.h +128 -0
  24. sequenzo/clustering/src/cluster_quality_backup.cpp +570 -0
  25. sequenzo/clustering/src/module.cpp +228 -0
  26. sequenzo/clustering/src/weightedinertia.cpp +111 -0
  27. sequenzo/clustering/utils/__init__.py +27 -0
  28. sequenzo/clustering/utils/disscenter.py +122 -0
  29. sequenzo/data_preprocessing/__init__.py +22 -0
  30. sequenzo/data_preprocessing/helpers.py +303 -0
  31. sequenzo/datasets/__init__.py +41 -0
  32. sequenzo/datasets/biofam.csv +2001 -0
  33. sequenzo/datasets/biofam_child_domain.csv +2001 -0
  34. sequenzo/datasets/biofam_left_domain.csv +2001 -0
  35. sequenzo/datasets/biofam_married_domain.csv +2001 -0
  36. sequenzo/datasets/chinese_colonial_territories.csv +12 -0
  37. sequenzo/datasets/country_co2_emissions.csv +194 -0
  38. sequenzo/datasets/country_co2_emissions_global_deciles.csv +195 -0
  39. sequenzo/datasets/country_co2_emissions_global_quintiles.csv +195 -0
  40. sequenzo/datasets/country_co2_emissions_local_deciles.csv +195 -0
  41. sequenzo/datasets/country_co2_emissions_local_quintiles.csv +195 -0
  42. sequenzo/datasets/country_gdp_per_capita.csv +194 -0
  43. sequenzo/datasets/dyadic_children.csv +61 -0
  44. sequenzo/datasets/dyadic_parents.csv +61 -0
  45. sequenzo/datasets/mvad.csv +713 -0
  46. sequenzo/datasets/pairfam_activity_by_month.csv +1028 -0
  47. sequenzo/datasets/pairfam_activity_by_year.csv +1028 -0
  48. sequenzo/datasets/pairfam_family_by_month.csv +1028 -0
  49. sequenzo/datasets/pairfam_family_by_year.csv +1028 -0
  50. sequenzo/datasets/political_science_aid_shock.csv +166 -0
  51. sequenzo/datasets/political_science_donor_fragmentation.csv +157 -0
  52. sequenzo/define_sequence_data.py +1400 -0
  53. sequenzo/dissimilarity_measures/__init__.py +31 -0
  54. sequenzo/dissimilarity_measures/c_code.cpython-310-darwin.so +0 -0
  55. sequenzo/dissimilarity_measures/get_distance_matrix.py +762 -0
  56. sequenzo/dissimilarity_measures/get_substitution_cost_matrix.py +246 -0
  57. sequenzo/dissimilarity_measures/src/DHDdistance.cpp +148 -0
  58. sequenzo/dissimilarity_measures/src/LCPdistance.cpp +114 -0
  59. sequenzo/dissimilarity_measures/src/LCPspellDistance.cpp +215 -0
  60. sequenzo/dissimilarity_measures/src/OMdistance.cpp +247 -0
  61. sequenzo/dissimilarity_measures/src/OMspellDistance.cpp +281 -0
  62. sequenzo/dissimilarity_measures/src/__init__.py +0 -0
  63. sequenzo/dissimilarity_measures/src/dist2matrix.cpp +63 -0
  64. sequenzo/dissimilarity_measures/src/dp_utils.h +160 -0
  65. sequenzo/dissimilarity_measures/src/module.cpp +40 -0
  66. sequenzo/dissimilarity_measures/src/setup.py +30 -0
  67. sequenzo/dissimilarity_measures/src/utils.h +25 -0
  68. sequenzo/dissimilarity_measures/src/xsimd/.github/cmake-test/main.cpp +6 -0
  69. sequenzo/dissimilarity_measures/src/xsimd/benchmark/main.cpp +159 -0
  70. sequenzo/dissimilarity_measures/src/xsimd/benchmark/xsimd_benchmark.hpp +565 -0
  71. sequenzo/dissimilarity_measures/src/xsimd/docs/source/conf.py +37 -0
  72. sequenzo/dissimilarity_measures/src/xsimd/examples/mandelbrot.cpp +330 -0
  73. sequenzo/dissimilarity_measures/src/xsimd/examples/pico_bench.hpp +246 -0
  74. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_arithmetic.hpp +266 -0
  75. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_complex.hpp +112 -0
  76. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_details.hpp +323 -0
  77. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_logical.hpp +218 -0
  78. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_math.hpp +2583 -0
  79. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_memory.hpp +880 -0
  80. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_rounding.hpp +72 -0
  81. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_swizzle.hpp +174 -0
  82. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_trigo.hpp +978 -0
  83. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx.hpp +1924 -0
  84. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx2.hpp +1144 -0
  85. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512bw.hpp +656 -0
  86. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512cd.hpp +28 -0
  87. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512dq.hpp +244 -0
  88. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512er.hpp +20 -0
  89. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512f.hpp +2650 -0
  90. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512ifma.hpp +20 -0
  91. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512pf.hpp +20 -0
  92. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vbmi.hpp +77 -0
  93. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vbmi2.hpp +131 -0
  94. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vnni_avx512bw.hpp +20 -0
  95. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vnni_avx512vbmi2.hpp +20 -0
  96. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avxvnni.hpp +20 -0
  97. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_common.hpp +24 -0
  98. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_common_fwd.hpp +77 -0
  99. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_constants.hpp +393 -0
  100. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_emulated.hpp +788 -0
  101. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma3_avx.hpp +93 -0
  102. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma3_avx2.hpp +46 -0
  103. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma3_sse.hpp +97 -0
  104. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma4.hpp +92 -0
  105. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_i8mm_neon64.hpp +17 -0
  106. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_isa.hpp +142 -0
  107. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_neon.hpp +3142 -0
  108. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_neon64.hpp +1543 -0
  109. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_rvv.hpp +1513 -0
  110. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_scalar.hpp +1260 -0
  111. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse2.hpp +2024 -0
  112. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse3.hpp +67 -0
  113. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse4_1.hpp +339 -0
  114. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse4_2.hpp +44 -0
  115. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_ssse3.hpp +186 -0
  116. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sve.hpp +1155 -0
  117. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_vsx.hpp +892 -0
  118. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_wasm.hpp +1780 -0
  119. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_arch.hpp +240 -0
  120. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_config.hpp +484 -0
  121. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_cpuid.hpp +269 -0
  122. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_inline.hpp +27 -0
  123. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/math/xsimd_rem_pio2.hpp +719 -0
  124. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/memory/xsimd_aligned_allocator.hpp +349 -0
  125. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/memory/xsimd_alignment.hpp +91 -0
  126. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_all_registers.hpp +55 -0
  127. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_api.hpp +2765 -0
  128. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx2_register.hpp +44 -0
  129. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512bw_register.hpp +51 -0
  130. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512cd_register.hpp +51 -0
  131. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512dq_register.hpp +51 -0
  132. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512er_register.hpp +51 -0
  133. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512f_register.hpp +77 -0
  134. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512ifma_register.hpp +51 -0
  135. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512pf_register.hpp +51 -0
  136. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512vbmi2_register.hpp +51 -0
  137. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512vbmi_register.hpp +51 -0
  138. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512vnni_avx512bw_register.hpp +54 -0
  139. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512vnni_avx512vbmi2_register.hpp +53 -0
  140. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx_register.hpp +64 -0
  141. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avxvnni_register.hpp +44 -0
  142. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_batch.hpp +1524 -0
  143. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_batch_constant.hpp +300 -0
  144. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_common_arch.hpp +47 -0
  145. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_emulated_register.hpp +80 -0
  146. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_fma3_avx2_register.hpp +50 -0
  147. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_fma3_avx_register.hpp +50 -0
  148. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_fma3_sse_register.hpp +50 -0
  149. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_fma4_register.hpp +50 -0
  150. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_i8mm_neon64_register.hpp +55 -0
  151. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_neon64_register.hpp +55 -0
  152. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_neon_register.hpp +154 -0
  153. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_register.hpp +94 -0
  154. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_rvv_register.hpp +506 -0
  155. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sse2_register.hpp +59 -0
  156. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sse3_register.hpp +49 -0
  157. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sse4_1_register.hpp +48 -0
  158. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sse4_2_register.hpp +48 -0
  159. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_ssse3_register.hpp +48 -0
  160. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sve_register.hpp +156 -0
  161. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_traits.hpp +337 -0
  162. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_utils.hpp +536 -0
  163. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_vsx_register.hpp +77 -0
  164. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_wasm_register.hpp +59 -0
  165. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/xsimd.hpp +75 -0
  166. sequenzo/dissimilarity_measures/src/xsimd/test/architectures/dummy.cpp +7 -0
  167. sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set.cpp +13 -0
  168. sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set_mean.cpp +24 -0
  169. sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set_mean_aligned.cpp +25 -0
  170. sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set_mean_arch_independent.cpp +28 -0
  171. sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set_mean_tag_dispatch.cpp +25 -0
  172. sequenzo/dissimilarity_measures/src/xsimd/test/doc/manipulating_abstract_batches.cpp +7 -0
  173. sequenzo/dissimilarity_measures/src/xsimd/test/doc/manipulating_parametric_batches.cpp +8 -0
  174. sequenzo/dissimilarity_measures/src/xsimd/test/doc/sum.hpp +31 -0
  175. sequenzo/dissimilarity_measures/src/xsimd/test/doc/sum_avx2.cpp +3 -0
  176. sequenzo/dissimilarity_measures/src/xsimd/test/doc/sum_sse2.cpp +3 -0
  177. sequenzo/dissimilarity_measures/src/xsimd/test/doc/writing_vectorized_code.cpp +11 -0
  178. sequenzo/dissimilarity_measures/src/xsimd/test/main.cpp +31 -0
  179. sequenzo/dissimilarity_measures/src/xsimd/test/test_api.cpp +230 -0
  180. sequenzo/dissimilarity_measures/src/xsimd/test/test_arch.cpp +217 -0
  181. sequenzo/dissimilarity_measures/src/xsimd/test/test_basic_math.cpp +183 -0
  182. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch.cpp +1049 -0
  183. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_bool.cpp +508 -0
  184. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_cast.cpp +409 -0
  185. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_complex.cpp +712 -0
  186. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_constant.cpp +286 -0
  187. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_float.cpp +141 -0
  188. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_int.cpp +365 -0
  189. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_manip.cpp +308 -0
  190. sequenzo/dissimilarity_measures/src/xsimd/test/test_bitwise_cast.cpp +222 -0
  191. sequenzo/dissimilarity_measures/src/xsimd/test/test_complex_exponential.cpp +226 -0
  192. sequenzo/dissimilarity_measures/src/xsimd/test/test_complex_hyperbolic.cpp +183 -0
  193. sequenzo/dissimilarity_measures/src/xsimd/test/test_complex_power.cpp +265 -0
  194. sequenzo/dissimilarity_measures/src/xsimd/test/test_complex_trigonometric.cpp +236 -0
  195. sequenzo/dissimilarity_measures/src/xsimd/test/test_conversion.cpp +248 -0
  196. sequenzo/dissimilarity_measures/src/xsimd/test/test_custom_default_arch.cpp +28 -0
  197. sequenzo/dissimilarity_measures/src/xsimd/test/test_error_gamma.cpp +170 -0
  198. sequenzo/dissimilarity_measures/src/xsimd/test/test_explicit_batch_instantiation.cpp +32 -0
  199. sequenzo/dissimilarity_measures/src/xsimd/test/test_exponential.cpp +202 -0
  200. sequenzo/dissimilarity_measures/src/xsimd/test/test_extract_pair.cpp +92 -0
  201. sequenzo/dissimilarity_measures/src/xsimd/test/test_fp_manipulation.cpp +77 -0
  202. sequenzo/dissimilarity_measures/src/xsimd/test/test_gnu_source.cpp +30 -0
  203. sequenzo/dissimilarity_measures/src/xsimd/test/test_hyperbolic.cpp +167 -0
  204. sequenzo/dissimilarity_measures/src/xsimd/test/test_load_store.cpp +304 -0
  205. sequenzo/dissimilarity_measures/src/xsimd/test/test_memory.cpp +61 -0
  206. sequenzo/dissimilarity_measures/src/xsimd/test/test_poly_evaluation.cpp +64 -0
  207. sequenzo/dissimilarity_measures/src/xsimd/test/test_power.cpp +184 -0
  208. sequenzo/dissimilarity_measures/src/xsimd/test/test_rounding.cpp +199 -0
  209. sequenzo/dissimilarity_measures/src/xsimd/test/test_select.cpp +101 -0
  210. sequenzo/dissimilarity_measures/src/xsimd/test/test_shuffle.cpp +760 -0
  211. sequenzo/dissimilarity_measures/src/xsimd/test/test_sum.cpp +4 -0
  212. sequenzo/dissimilarity_measures/src/xsimd/test/test_sum.hpp +34 -0
  213. sequenzo/dissimilarity_measures/src/xsimd/test/test_traits.cpp +172 -0
  214. sequenzo/dissimilarity_measures/src/xsimd/test/test_trigonometric.cpp +208 -0
  215. sequenzo/dissimilarity_measures/src/xsimd/test/test_utils.hpp +611 -0
  216. sequenzo/dissimilarity_measures/src/xsimd/test/test_wasm/test_wasm_playwright.py +123 -0
  217. sequenzo/dissimilarity_measures/src/xsimd/test/test_xsimd_api.cpp +1460 -0
  218. sequenzo/dissimilarity_measures/utils/__init__.py +16 -0
  219. sequenzo/dissimilarity_measures/utils/get_LCP_length_for_2_seq.py +44 -0
  220. sequenzo/dissimilarity_measures/utils/get_sm_trate_substitution_cost_matrix.cpython-310-darwin.so +0 -0
  221. sequenzo/dissimilarity_measures/utils/seqconc.cpython-310-darwin.so +0 -0
  222. sequenzo/dissimilarity_measures/utils/seqdss.cpython-310-darwin.so +0 -0
  223. sequenzo/dissimilarity_measures/utils/seqdur.cpython-310-darwin.so +0 -0
  224. sequenzo/dissimilarity_measures/utils/seqlength.cpython-310-darwin.so +0 -0
  225. sequenzo/multidomain/__init__.py +23 -0
  226. sequenzo/multidomain/association_between_domains.py +311 -0
  227. sequenzo/multidomain/cat.py +597 -0
  228. sequenzo/multidomain/combt.py +519 -0
  229. sequenzo/multidomain/dat.py +81 -0
  230. sequenzo/multidomain/idcd.py +139 -0
  231. sequenzo/multidomain/linked_polyad.py +292 -0
  232. sequenzo/openmp_setup.py +233 -0
  233. sequenzo/prefix_tree/__init__.py +62 -0
  234. sequenzo/prefix_tree/hub.py +114 -0
  235. sequenzo/prefix_tree/individual_level_indicators.py +1321 -0
  236. sequenzo/prefix_tree/spell_individual_level_indicators.py +580 -0
  237. sequenzo/prefix_tree/spell_level_indicators.py +297 -0
  238. sequenzo/prefix_tree/system_level_indicators.py +544 -0
  239. sequenzo/prefix_tree/utils.py +54 -0
  240. sequenzo/seqhmm/__init__.py +95 -0
  241. sequenzo/seqhmm/advanced_optimization.py +305 -0
  242. sequenzo/seqhmm/bootstrap.py +411 -0
  243. sequenzo/seqhmm/build_hmm.py +142 -0
  244. sequenzo/seqhmm/build_mhmm.py +136 -0
  245. sequenzo/seqhmm/build_nhmm.py +121 -0
  246. sequenzo/seqhmm/fit_mhmm.py +62 -0
  247. sequenzo/seqhmm/fit_model.py +61 -0
  248. sequenzo/seqhmm/fit_nhmm.py +76 -0
  249. sequenzo/seqhmm/formulas.py +289 -0
  250. sequenzo/seqhmm/forward_backward_nhmm.py +276 -0
  251. sequenzo/seqhmm/gradients_nhmm.py +306 -0
  252. sequenzo/seqhmm/hmm.py +291 -0
  253. sequenzo/seqhmm/mhmm.py +314 -0
  254. sequenzo/seqhmm/model_comparison.py +238 -0
  255. sequenzo/seqhmm/multichannel_em.py +282 -0
  256. sequenzo/seqhmm/multichannel_utils.py +138 -0
  257. sequenzo/seqhmm/nhmm.py +270 -0
  258. sequenzo/seqhmm/nhmm_utils.py +191 -0
  259. sequenzo/seqhmm/predict.py +137 -0
  260. sequenzo/seqhmm/predict_mhmm.py +142 -0
  261. sequenzo/seqhmm/simulate.py +878 -0
  262. sequenzo/seqhmm/utils.py +218 -0
  263. sequenzo/seqhmm/visualization.py +910 -0
  264. sequenzo/sequence_characteristics/__init__.py +40 -0
  265. sequenzo/sequence_characteristics/complexity_index.py +49 -0
  266. sequenzo/sequence_characteristics/overall_cross_sectional_entropy.py +220 -0
  267. sequenzo/sequence_characteristics/plot_characteristics.py +593 -0
  268. sequenzo/sequence_characteristics/simple_characteristics.py +311 -0
  269. sequenzo/sequence_characteristics/state_frequencies_and_entropy_per_sequence.py +39 -0
  270. sequenzo/sequence_characteristics/turbulence.py +155 -0
  271. sequenzo/sequence_characteristics/variance_of_spell_durations.py +86 -0
  272. sequenzo/sequence_characteristics/within_sequence_entropy.py +43 -0
  273. sequenzo/suffix_tree/__init__.py +66 -0
  274. sequenzo/suffix_tree/hub.py +114 -0
  275. sequenzo/suffix_tree/individual_level_indicators.py +1679 -0
  276. sequenzo/suffix_tree/spell_individual_level_indicators.py +493 -0
  277. sequenzo/suffix_tree/spell_level_indicators.py +248 -0
  278. sequenzo/suffix_tree/system_level_indicators.py +535 -0
  279. sequenzo/suffix_tree/utils.py +56 -0
  280. sequenzo/version_check.py +283 -0
  281. sequenzo/visualization/__init__.py +29 -0
  282. sequenzo/visualization/plot_mean_time.py +222 -0
  283. sequenzo/visualization/plot_modal_state.py +276 -0
  284. sequenzo/visualization/plot_most_frequent_sequences.py +147 -0
  285. sequenzo/visualization/plot_relative_frequency.py +405 -0
  286. sequenzo/visualization/plot_sequence_index.py +1175 -0
  287. sequenzo/visualization/plot_single_medoid.py +153 -0
  288. sequenzo/visualization/plot_state_distribution.py +651 -0
  289. sequenzo/visualization/plot_transition_matrix.py +190 -0
  290. sequenzo/visualization/utils/__init__.py +23 -0
  291. sequenzo/visualization/utils/utils.py +310 -0
  292. sequenzo/with_event_history_analysis/__init__.py +35 -0
  293. sequenzo/with_event_history_analysis/sequence_analysis_multi_state_model.py +850 -0
  294. sequenzo/with_event_history_analysis/sequence_history_analysis.py +283 -0
  295. sequenzo-0.1.31.dist-info/METADATA +286 -0
  296. sequenzo-0.1.31.dist-info/RECORD +299 -0
  297. sequenzo-0.1.31.dist-info/WHEEL +5 -0
  298. sequenzo-0.1.31.dist-info/licenses/LICENSE +28 -0
  299. sequenzo-0.1.31.dist-info/top_level.txt +2 -0
@@ -0,0 +1,228 @@
1
+ #include "PAM.cpp"
2
+ #include "KMedoid.cpp"
3
+ #include "PAMonce.cpp"
4
+ #include "weightedinertia.cpp"
5
+ #include "cluster_quality.cpp"
6
+
7
+ namespace py = pybind11;
8
+
9
+ PYBIND11_MODULE(clustering_c_code, m) {
10
+ py::class_<PAM>(m, "PAM")
11
+ .def(py::init<int, py::array_t<double>, py::array_t<int>, int, py::array_t<double>>())
12
+ .def("runclusterloop", &PAM::runclusterloop);
13
+
14
+ py::class_<KMedoid>(m, "KMedoid")
15
+ .def(py::init<int, py::array_t<double>, py::array_t<int>, int, py::array_t<double>>())
16
+ .def("runclusterloop", &KMedoid::runclusterloop);
17
+
18
+ py::class_<PAMonce>(m, "PAMonce")
19
+ .def(py::init<int, py::array_t<double>, py::array_t<int>, int, py::array_t<double>>())
20
+ .def("runclusterloop", &PAMonce::runclusterloop);
21
+
22
+ py::class_<weightedinertia>(m, "weightedinertia")
23
+ .def(py::init<py::array_t<double>, py::array_t<int>, py::array_t<double>>())
24
+ .def("tmrWeightedInertiaContrib", &weightedinertia::tmrWeightedInertiaContrib);
25
+
26
+ // Cluster Quality functions
27
+ m.def("cluster_quality", [](py::array_t<double> diss_matrix,
28
+ py::array_t<int> cluster_labels,
29
+ py::array_t<double> weights,
30
+ int nclusters) -> py::dict {
31
+ auto diss_buf = diss_matrix.request();
32
+ auto cluster_buf = cluster_labels.request();
33
+ auto weights_buf = weights.request();
34
+
35
+ if (diss_buf.ndim != 2 || diss_buf.shape[0] != diss_buf.shape[1]) {
36
+ throw std::runtime_error("Distance matrix must be square");
37
+ }
38
+
39
+ int n = diss_buf.shape[0];
40
+
41
+ if (cluster_buf.size != n || weights_buf.size != n) {
42
+ throw std::runtime_error("Cluster labels and weights must have same length as matrix dimension");
43
+ }
44
+
45
+ double* diss_ptr = static_cast<double*>(diss_buf.ptr);
46
+ int* cluster_ptr = static_cast<int*>(cluster_buf.ptr);
47
+ double* weights_ptr = static_cast<double*>(weights_buf.ptr);
48
+
49
+ // Prepare output arrays
50
+ std::vector<double> stats(ClusterQualNumStat);
51
+ std::vector<double> asw(2 * nclusters);
52
+
53
+ // Create Kendall tree for caching
54
+ KendallTree kendall;
55
+
56
+ // Call core function
57
+ clusterquality(diss_ptr, cluster_ptr, weights_ptr, n,
58
+ stats.data(), nclusters, asw.data(), kendall);
59
+
60
+ // Clean up Kendall tree
61
+ finalizeKendall(kendall);
62
+
63
+ // Return results as dictionary
64
+ py::dict result;
65
+ result["PBC"] = stats[ClusterQualHPG]; // PBC is stored in HPG position
66
+ result["HG"] = stats[ClusterQualHG];
67
+ result["HGSD"] = stats[ClusterQualHGSD];
68
+ result["ASW"] = stats[ClusterQualASWi];
69
+ result["ASWw"] = stats[ClusterQualASWw];
70
+ result["CH"] = stats[ClusterQualF];
71
+ result["R2"] = stats[ClusterQualR];
72
+ result["CHsq"] = stats[ClusterQualF2];
73
+ result["R2sq"] = stats[ClusterQualR2];
74
+ result["HC"] = stats[ClusterQualHC];
75
+
76
+ // Convert ASW array to numpy array
77
+ auto asw_array = py::array_t<double>(2 * nclusters);
78
+ auto asw_buf = asw_array.request();
79
+ double* asw_out = static_cast<double*>(asw_buf.ptr);
80
+ std::copy(asw.begin(), asw.end(), asw_out);
81
+ result["cluster_asw"] = asw_array;
82
+
83
+ return result;
84
+ }, "Compute cluster quality indicators for full distance matrix");
85
+
86
+ m.def("cluster_quality_condensed", [](py::array_t<double> diss_condensed,
87
+ py::array_t<int> cluster_labels,
88
+ py::array_t<double> weights,
89
+ int n, int nclusters) -> py::dict {
90
+ auto diss_buf = diss_condensed.request();
91
+ auto cluster_buf = cluster_labels.request();
92
+ auto weights_buf = weights.request();
93
+
94
+ int expected_size = n * (n - 1) / 2;
95
+ if (diss_buf.size != expected_size) {
96
+ throw std::runtime_error("Condensed distance array size mismatch");
97
+ }
98
+
99
+ if (cluster_buf.size != n || weights_buf.size != n) {
100
+ throw std::runtime_error("Cluster labels and weights must have length n");
101
+ }
102
+
103
+ double* diss_ptr = static_cast<double*>(diss_buf.ptr);
104
+ int* cluster_ptr = static_cast<int*>(cluster_buf.ptr);
105
+ double* weights_ptr = static_cast<double*>(weights_buf.ptr);
106
+
107
+ // Prepare output arrays
108
+ std::vector<double> stats(ClusterQualNumStat);
109
+ std::vector<double> asw(2 * nclusters);
110
+
111
+ // Create Kendall tree for caching
112
+ KendallTree kendall;
113
+
114
+ // Call core function
115
+ clusterquality_dist(diss_ptr, cluster_ptr, weights_ptr, n,
116
+ stats.data(), nclusters, asw.data(), kendall);
117
+
118
+ // Clean up Kendall tree
119
+ finalizeKendall(kendall);
120
+
121
+ // Return results as dictionary
122
+ py::dict result;
123
+ result["PBC"] = stats[ClusterQualHPG]; // PBC is stored in HPG position
124
+ result["HG"] = stats[ClusterQualHG];
125
+ result["HGSD"] = stats[ClusterQualHGSD];
126
+ result["ASW"] = stats[ClusterQualASWi];
127
+ result["ASWw"] = stats[ClusterQualASWw];
128
+ result["CH"] = stats[ClusterQualF];
129
+ result["R2"] = stats[ClusterQualR];
130
+ result["CHsq"] = stats[ClusterQualF2];
131
+ result["R2sq"] = stats[ClusterQualR2];
132
+ result["HC"] = stats[ClusterQualHC];
133
+
134
+ // Convert ASW array to numpy array
135
+ auto asw_array = py::array_t<double>(2 * nclusters);
136
+ auto asw_buf = asw_array.request();
137
+ double* asw_out = static_cast<double*>(asw_buf.ptr);
138
+ std::copy(asw.begin(), asw.end(), asw_out);
139
+ result["cluster_asw"] = asw_array;
140
+
141
+ return result;
142
+ }, "Compute cluster quality indicators for condensed distance array");
143
+
144
+ m.def("individual_asw", [](py::array_t<double> diss_matrix,
145
+ py::array_t<int> cluster_labels,
146
+ py::array_t<double> weights,
147
+ int nclusters) -> py::dict {
148
+ auto diss_buf = diss_matrix.request();
149
+ auto cluster_buf = cluster_labels.request();
150
+ auto weights_buf = weights.request();
151
+
152
+ if (diss_buf.ndim != 2 || diss_buf.shape[0] != diss_buf.shape[1]) {
153
+ throw std::runtime_error("Distance matrix must be square");
154
+ }
155
+
156
+ int n = diss_buf.shape[0];
157
+
158
+ if (cluster_buf.size != n || weights_buf.size != n) {
159
+ throw std::runtime_error("Cluster labels and weights must have same length as matrix dimension");
160
+ }
161
+
162
+ double* diss_ptr = static_cast<double*>(diss_buf.ptr);
163
+ int* cluster_ptr = static_cast<int*>(cluster_buf.ptr);
164
+ double* weights_ptr = static_cast<double*>(weights_buf.ptr);
165
+
166
+ // Prepare output arrays
167
+ auto asw_i = py::array_t<double>(n);
168
+ auto asw_w = py::array_t<double>(n);
169
+
170
+ auto asw_i_buf = asw_i.request();
171
+ auto asw_w_buf = asw_w.request();
172
+
173
+ double* asw_i_ptr = static_cast<double*>(asw_i_buf.ptr);
174
+ double* asw_w_ptr = static_cast<double*>(asw_w_buf.ptr);
175
+
176
+ // Call core function
177
+ indiv_asw(diss_ptr, cluster_ptr, weights_ptr, n, nclusters, asw_i_ptr, asw_w_ptr);
178
+
179
+ // Return results as dictionary
180
+ py::dict result;
181
+ result["asw_individual"] = asw_i;
182
+ result["asw_weighted"] = asw_w;
183
+
184
+ return result;
185
+ }, "Compute individual ASW scores for all samples");
186
+
187
+ m.def("individual_asw_condensed", [](py::array_t<double> diss_condensed,
188
+ py::array_t<int> cluster_labels,
189
+ py::array_t<double> weights,
190
+ int n, int nclusters) -> py::dict {
191
+ auto diss_buf = diss_condensed.request();
192
+ auto cluster_buf = cluster_labels.request();
193
+ auto weights_buf = weights.request();
194
+
195
+ int expected_size = n * (n - 1) / 2;
196
+ if (diss_buf.size != expected_size) {
197
+ throw std::runtime_error("Condensed distance array size mismatch");
198
+ }
199
+
200
+ if (cluster_buf.size != n || weights_buf.size != n) {
201
+ throw std::runtime_error("Cluster labels and weights must have length n");
202
+ }
203
+
204
+ double* diss_ptr = static_cast<double*>(diss_buf.ptr);
205
+ int* cluster_ptr = static_cast<int*>(cluster_buf.ptr);
206
+ double* weights_ptr = static_cast<double*>(weights_buf.ptr);
207
+
208
+ // Prepare output arrays
209
+ auto asw_i = py::array_t<double>(n);
210
+ auto asw_w = py::array_t<double>(n);
211
+
212
+ auto asw_i_buf = asw_i.request();
213
+ auto asw_w_buf = asw_w.request();
214
+
215
+ double* asw_i_ptr = static_cast<double*>(asw_i_buf.ptr);
216
+ double* asw_w_ptr = static_cast<double*>(asw_w_buf.ptr);
217
+
218
+ // Call core function
219
+ indiv_asw_dist(diss_ptr, cluster_ptr, weights_ptr, n, nclusters, asw_i_ptr, asw_w_ptr);
220
+
221
+ // Return results as dictionary
222
+ py::dict result;
223
+ result["asw_individual"] = asw_i;
224
+ result["asw_weighted"] = asw_w;
225
+
226
+ return result;
227
+ }, "Compute individual ASW scores for condensed distance array");
228
+ }
@@ -0,0 +1,111 @@
1
+ #include <pybind11/pybind11.h>
2
+ #include <pybind11/numpy.h>
3
+ #include <vector>
4
+ #include <iostream>
5
+ #ifdef _OPENMP
6
+ #include <omp.h>
7
+ #endif
8
+ #include <cfloat>
9
+ #include <cmath>
10
+
11
+ namespace py = pybind11;
12
+
13
+ class weightedinertia {
14
+ public:
15
+ weightedinertia(py::array_t<double> distmatrix, py::array_t<int> individuals, py::array_t<double> weights) {
16
+ std::cout << std::flush; // 刷新 C++ 输出
17
+
18
+ try {
19
+ this->distmatrix = distmatrix;
20
+ this->individuals = individuals;
21
+ this->weights = weights;
22
+
23
+ ilen = individuals.size();
24
+
25
+ result = py::array_t<double>(ilen);
26
+ } catch (const std::exception& e) {
27
+ py::print("Error in constructor: ", e.what());
28
+ throw;
29
+ }
30
+ }
31
+
32
+ py::array_t<double> tmrWeightedInertiaContrib() {
33
+ auto ptr_dist = distmatrix.unchecked<2>();
34
+ auto ptr_indiv = individuals.unchecked<1>();
35
+ auto ptr_weights = weights.unchecked<1>();
36
+
37
+ py::array_t<double> result_local(ilen);
38
+ auto ptr_result = result_local.mutable_unchecked<1>();
39
+
40
+ for (int i = 0; i < ilen; i++) {
41
+ ptr_result(i) = 0.0;
42
+ }
43
+
44
+ double totweights = 0.0;
45
+
46
+ #pragma omp parallel for reduction(+:totweights)
47
+ for (int i = 0; i < ilen; i++) {
48
+ totweights += ptr_weights(ptr_indiv(i));
49
+ }
50
+
51
+ // 每个线程使用局部 result 副本,最后归约合并
52
+ int nthreads = 1;
53
+ #ifdef _OPENMP
54
+ #pragma omp parallel
55
+ {
56
+ #pragma omp single
57
+ nthreads = omp_get_num_threads();
58
+ }
59
+ #endif
60
+
61
+ std::vector<std::vector<double>> result_private(nthreads, std::vector<double>(ilen, 0.0));
62
+
63
+ #pragma omp parallel
64
+ {
65
+ #ifdef _OPENMP
66
+ int tid = omp_get_thread_num();
67
+ #else
68
+ int tid = 0;
69
+ #endif
70
+ auto& local = result_private[tid];
71
+
72
+ #pragma omp for schedule(static)
73
+ for (int i = 0; i < ilen; ++i) {
74
+ int pos_i = ptr_indiv(i);
75
+ double i_weight = ptr_weights(pos_i);
76
+
77
+ for (int j = i + 1; j < ilen; ++j) {
78
+ int pos_j = ptr_indiv(j);
79
+ double diss = ptr_dist(pos_i, pos_j);
80
+
81
+ local[i] += diss * ptr_weights(pos_j);
82
+ local[j] += diss * i_weight;
83
+ }
84
+ }
85
+ }
86
+
87
+ // 合并各线程的 result_private 到主 result
88
+ for (int t = 0; t < nthreads; ++t) {
89
+ for (int i = 0; i < ilen; ++i) {
90
+ ptr_result(i) += result_private[t][i];
91
+ }
92
+ }
93
+
94
+ if (totweights > 0) {
95
+ #pragma omp parallel for
96
+ for (int i = 0; i < ilen; ++i) {
97
+ ptr_result(i) /= totweights;
98
+ }
99
+ }
100
+
101
+ return result_local;
102
+ }
103
+
104
+ private:
105
+ py::array_t<double> distmatrix; // 距离矩阵
106
+ py::array_t<int> individuals; // 某组数据点的集合
107
+ py::array_t<double> weights; // 权重数组
108
+
109
+ int ilen;
110
+ py::array_t<double> result;
111
+ };
@@ -0,0 +1,27 @@
1
+ """
2
+ @Author : Yuqi Liang 梁彧祺
3
+ @File : __init__.py
4
+ @Time : 27/02/2025 10:38
5
+ @Desc :
6
+ """
7
+ # utils/__init__.py
8
+
9
+ from .disscenter import disscentertrim
10
+
11
+
12
+ def _import_c_code():
13
+ """Lazily import the c_code module to avoid circular dependencies during installation"""
14
+ try:
15
+ from sequenzo.dissimilarity_measures.src import c_code
16
+ return c_code
17
+ except ImportError:
18
+ # If the C extension cannot be imported, return None
19
+ print(
20
+ "Warning: The C++ extension (c_code) could not be imported. Please ensure the extension module is compiled correctly.")
21
+ return None
22
+
23
+
24
+ __all__ = [
25
+ "disscentertrim",
26
+ # Add other functions as needed
27
+ ]
@@ -0,0 +1,122 @@
1
+ """
2
+ @Author : 李欣怡
3
+ @File : disscenter.py
4
+ @Time : 2025/2/8 12:57
5
+ @Desc :
6
+ Utility function for the k_medoids algorithm.
7
+ """
8
+
9
+ import numpy as np
10
+ import pandas as pd
11
+
12
+ import importlib
13
+ import sequenzo.clustering.clustering_c_code
14
+
15
+ clustering_c_code = importlib.import_module("sequenzo.clustering.clustering_c_code")
16
+
17
+ def disscentertrim(diss, group=None, medoids_index=None, allcenter=False, weights=None, squared=False, trim=0):
18
+
19
+ # Lazily import the c_code module to avoid circular dependencies during installation
20
+ # from .__init__ import _import_c_code
21
+ # c_code = _import_c_code()
22
+
23
+ if isinstance(medoids_index, bool):
24
+ if medoids_index:
25
+ medoids_index = "First"
26
+ else:
27
+ medoids_index = None
28
+
29
+ retmedoids = medoids_index is not None # Whether medoids need to be returned
30
+ if retmedoids:
31
+ allcenter = False
32
+
33
+ allmedoids = False
34
+
35
+ if medoids_index is not None:
36
+ if medoids_index == "all":
37
+ allmedoids = True
38
+ elif medoids_index != "first":
39
+ raise ValueError('\'medoids_index\' argument should be one of "First", "all" or None')
40
+
41
+ if weights is None:
42
+ weights = np.ones(len(diss), dtype=float)
43
+
44
+ if squared:
45
+ diss = np.square(diss)
46
+
47
+ if group is None:
48
+ group = np.ones(diss.shape[0], dtype=int)
49
+
50
+ ind = np.arange(diss.shape[0])
51
+ grp = np.array(group)
52
+ lgrp = np.unique(group)
53
+
54
+ if allcenter:
55
+ ret = pd.DataFrame(np.zeros((diss.shape[0], 1)))
56
+ else:
57
+ ret = np.zeros(diss.shape[0])
58
+
59
+ if retmedoids:
60
+ if allmedoids:
61
+ medoids = []
62
+ else:
63
+ medoids = np.zeros(len(lgrp))
64
+
65
+ for i in range(len(lgrp)):
66
+ cond = (grp == lgrp[i])
67
+ grpindiv = ind[cond] # 第 i 组所有数据点在隶属矩阵里的位置(0-based 索引)
68
+
69
+ if allcenter:
70
+ # TODO : 以后再补充
71
+ print("以后再补充")
72
+
73
+ else:
74
+ inertia = clustering_c_code.weightedinertia(diss.astype(np.float64),
75
+ grpindiv.astype(np.int32),
76
+ weights.astype(np.float64))
77
+ dc = inertia.tmrWeightedInertiaContrib()
78
+ dc = dc - np.average(dc, weights=weights[cond]) / 2
79
+
80
+ if trim > 0:
81
+ # TODO : 以后再补充
82
+ print("以后再补充")
83
+
84
+ ret[grpindiv] = dc
85
+ mindc = np.min(dc)
86
+
87
+ if retmedoids:
88
+ if allmedoids:
89
+ medoids.append(np.where((ret == mindc) & cond)[0])
90
+ else:
91
+ medoids[i] = np.where((ret == mindc) & cond)[0][0]
92
+
93
+ if retmedoids:
94
+ if len(lgrp) == 1:
95
+ return medoids[[1]]
96
+
97
+ return medoids
98
+
99
+ return ret
100
+
101
+
102
+ if __name__ == '__main__':
103
+ # Load the data that we would like to explore in this tutorial
104
+ # `df` is the short for `dataframe`, which is a common variable name for a dataset
105
+ from sequenzo import *
106
+ df = load_dataset('country_co2_emissions')
107
+
108
+ time = list(df.columns)[1:]
109
+
110
+ states = ['Very Low', 'Low', 'Middle', 'High', 'Very High']
111
+
112
+ sequence_data = SequenceData(df, time=time, states=states)
113
+
114
+ result = clara(seqdata=sequence_data,
115
+ R=2,
116
+ kvals=range(2, 21),
117
+ sample_size=3000,
118
+ criteria=['distance', 'pbm'],
119
+ parallel=True,
120
+ stability=True)
121
+ result = result['allstat']
122
+
@@ -0,0 +1,22 @@
1
+ """
2
+ @Author : Yuqi Liang 梁彧祺
3
+ @File : __init__.py
4
+ @Time : 01/05/2025 09:27
5
+ @Desc :
6
+ """
7
+ from .helpers import (clean_time_columns_auto,
8
+ assign_unique_ids,
9
+ wide_to_long_format_data,
10
+ long_to_wide_format_data,
11
+ summarize_missing_values,
12
+ replace_cluster_id_by_labels)
13
+
14
+
15
+ __all__ = [
16
+ "clean_time_columns_auto",
17
+ "assign_unique_ids",
18
+ "wide_to_long_format_data",
19
+ "long_to_wide_format_data",
20
+ "summarize_missing_values",
21
+ "replace_cluster_id_by_labels"
22
+ ]