sequenzo 0.1.21__cp310-cp310-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sequenzo might be problematic. Click here for more details.

Files changed (260) hide show
  1. sequenzo/__init__.py +240 -0
  2. sequenzo/big_data/__init__.py +12 -0
  3. sequenzo/big_data/clara/__init__.py +26 -0
  4. sequenzo/big_data/clara/clara.py +467 -0
  5. sequenzo/big_data/clara/utils/__init__.py +27 -0
  6. sequenzo/big_data/clara/utils/aggregatecases.py +92 -0
  7. sequenzo/big_data/clara/utils/davies_bouldin.py +91 -0
  8. sequenzo/big_data/clara/utils/get_weighted_diss.cpython-310-darwin.so +0 -0
  9. sequenzo/big_data/clara/utils/wfcmdd.py +205 -0
  10. sequenzo/big_data/clara/visualization.py +88 -0
  11. sequenzo/clustering/KMedoids.py +196 -0
  12. sequenzo/clustering/__init__.py +30 -0
  13. sequenzo/clustering/clustering_c_code.cpython-310-darwin.so +0 -0
  14. sequenzo/clustering/hierarchical_clustering.py +1380 -0
  15. sequenzo/clustering/src/KMedoid.cpp +262 -0
  16. sequenzo/clustering/src/PAM.cpp +236 -0
  17. sequenzo/clustering/src/PAMonce.cpp +234 -0
  18. sequenzo/clustering/src/cluster_quality.cpp +496 -0
  19. sequenzo/clustering/src/cluster_quality.h +128 -0
  20. sequenzo/clustering/src/cluster_quality_backup.cpp +570 -0
  21. sequenzo/clustering/src/module.cpp +228 -0
  22. sequenzo/clustering/src/weightedinertia.cpp +111 -0
  23. sequenzo/clustering/utils/__init__.py +27 -0
  24. sequenzo/clustering/utils/disscenter.py +122 -0
  25. sequenzo/data_preprocessing/__init__.py +20 -0
  26. sequenzo/data_preprocessing/helpers.py +256 -0
  27. sequenzo/datasets/__init__.py +41 -0
  28. sequenzo/datasets/biofam.csv +2001 -0
  29. sequenzo/datasets/biofam_child_domain.csv +2001 -0
  30. sequenzo/datasets/biofam_left_domain.csv +2001 -0
  31. sequenzo/datasets/biofam_married_domain.csv +2001 -0
  32. sequenzo/datasets/chinese_colonial_territories.csv +12 -0
  33. sequenzo/datasets/country_co2_emissions.csv +194 -0
  34. sequenzo/datasets/country_co2_emissions_global_deciles.csv +195 -0
  35. sequenzo/datasets/country_co2_emissions_global_quintiles.csv +195 -0
  36. sequenzo/datasets/country_co2_emissions_local_deciles.csv +195 -0
  37. sequenzo/datasets/country_co2_emissions_local_quintiles.csv +195 -0
  38. sequenzo/datasets/country_gdp_per_capita.csv +194 -0
  39. sequenzo/datasets/mvad.csv +713 -0
  40. sequenzo/datasets/pairfam_family.csv +1867 -0
  41. sequenzo/datasets/polyadic_samplec1.csv +61 -0
  42. sequenzo/datasets/polyadic_samplep1.csv +61 -0
  43. sequenzo/datasets/polyadic_seqc1.csv +61 -0
  44. sequenzo/datasets/polyadic_seqp1.csv +61 -0
  45. sequenzo/define_sequence_data.py +609 -0
  46. sequenzo/dissimilarity_measures/__init__.py +31 -0
  47. sequenzo/dissimilarity_measures/c_code.cpython-310-darwin.so +0 -0
  48. sequenzo/dissimilarity_measures/get_distance_matrix.py +702 -0
  49. sequenzo/dissimilarity_measures/get_substitution_cost_matrix.py +241 -0
  50. sequenzo/dissimilarity_measures/src/DHDdistance.cpp +148 -0
  51. sequenzo/dissimilarity_measures/src/LCPdistance.cpp +114 -0
  52. sequenzo/dissimilarity_measures/src/OMdistance.cpp +247 -0
  53. sequenzo/dissimilarity_measures/src/OMspellDistance.cpp +281 -0
  54. sequenzo/dissimilarity_measures/src/__init__.py +0 -0
  55. sequenzo/dissimilarity_measures/src/dist2matrix.cpp +63 -0
  56. sequenzo/dissimilarity_measures/src/dp_utils.h +160 -0
  57. sequenzo/dissimilarity_measures/src/module.cpp +34 -0
  58. sequenzo/dissimilarity_measures/src/setup.py +30 -0
  59. sequenzo/dissimilarity_measures/src/utils.h +25 -0
  60. sequenzo/dissimilarity_measures/src/xsimd/.github/cmake-test/main.cpp +6 -0
  61. sequenzo/dissimilarity_measures/src/xsimd/benchmark/main.cpp +159 -0
  62. sequenzo/dissimilarity_measures/src/xsimd/benchmark/xsimd_benchmark.hpp +565 -0
  63. sequenzo/dissimilarity_measures/src/xsimd/docs/source/conf.py +37 -0
  64. sequenzo/dissimilarity_measures/src/xsimd/examples/mandelbrot.cpp +330 -0
  65. sequenzo/dissimilarity_measures/src/xsimd/examples/pico_bench.hpp +246 -0
  66. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_arithmetic.hpp +266 -0
  67. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_complex.hpp +112 -0
  68. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_details.hpp +323 -0
  69. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_logical.hpp +218 -0
  70. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_math.hpp +2583 -0
  71. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_memory.hpp +880 -0
  72. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_rounding.hpp +72 -0
  73. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_swizzle.hpp +174 -0
  74. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_trigo.hpp +978 -0
  75. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx.hpp +1924 -0
  76. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx2.hpp +1144 -0
  77. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512bw.hpp +656 -0
  78. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512cd.hpp +28 -0
  79. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512dq.hpp +244 -0
  80. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512er.hpp +20 -0
  81. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512f.hpp +2650 -0
  82. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512ifma.hpp +20 -0
  83. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512pf.hpp +20 -0
  84. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vbmi.hpp +77 -0
  85. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vbmi2.hpp +131 -0
  86. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vnni_avx512bw.hpp +20 -0
  87. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vnni_avx512vbmi2.hpp +20 -0
  88. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avxvnni.hpp +20 -0
  89. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_common.hpp +24 -0
  90. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_common_fwd.hpp +77 -0
  91. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_constants.hpp +393 -0
  92. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_emulated.hpp +788 -0
  93. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma3_avx.hpp +93 -0
  94. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma3_avx2.hpp +46 -0
  95. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma3_sse.hpp +97 -0
  96. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma4.hpp +92 -0
  97. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_i8mm_neon64.hpp +17 -0
  98. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_isa.hpp +142 -0
  99. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_neon.hpp +3142 -0
  100. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_neon64.hpp +1543 -0
  101. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_rvv.hpp +1513 -0
  102. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_scalar.hpp +1260 -0
  103. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse2.hpp +2024 -0
  104. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse3.hpp +67 -0
  105. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse4_1.hpp +339 -0
  106. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse4_2.hpp +44 -0
  107. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_ssse3.hpp +186 -0
  108. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sve.hpp +1155 -0
  109. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_vsx.hpp +892 -0
  110. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_wasm.hpp +1780 -0
  111. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_arch.hpp +240 -0
  112. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_config.hpp +484 -0
  113. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_cpuid.hpp +269 -0
  114. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_inline.hpp +27 -0
  115. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/math/xsimd_rem_pio2.hpp +719 -0
  116. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/memory/xsimd_aligned_allocator.hpp +349 -0
  117. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/memory/xsimd_alignment.hpp +91 -0
  118. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_all_registers.hpp +55 -0
  119. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_api.hpp +2765 -0
  120. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx2_register.hpp +44 -0
  121. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512bw_register.hpp +51 -0
  122. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512cd_register.hpp +51 -0
  123. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512dq_register.hpp +51 -0
  124. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512er_register.hpp +51 -0
  125. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512f_register.hpp +77 -0
  126. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512ifma_register.hpp +51 -0
  127. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512pf_register.hpp +51 -0
  128. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512vbmi2_register.hpp +51 -0
  129. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512vbmi_register.hpp +51 -0
  130. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512vnni_avx512bw_register.hpp +54 -0
  131. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512vnni_avx512vbmi2_register.hpp +53 -0
  132. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx_register.hpp +64 -0
  133. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avxvnni_register.hpp +44 -0
  134. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_batch.hpp +1524 -0
  135. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_batch_constant.hpp +300 -0
  136. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_common_arch.hpp +47 -0
  137. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_emulated_register.hpp +80 -0
  138. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_fma3_avx2_register.hpp +50 -0
  139. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_fma3_avx_register.hpp +50 -0
  140. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_fma3_sse_register.hpp +50 -0
  141. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_fma4_register.hpp +50 -0
  142. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_i8mm_neon64_register.hpp +55 -0
  143. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_neon64_register.hpp +55 -0
  144. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_neon_register.hpp +154 -0
  145. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_register.hpp +94 -0
  146. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_rvv_register.hpp +506 -0
  147. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sse2_register.hpp +59 -0
  148. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sse3_register.hpp +49 -0
  149. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sse4_1_register.hpp +48 -0
  150. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sse4_2_register.hpp +48 -0
  151. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_ssse3_register.hpp +48 -0
  152. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sve_register.hpp +156 -0
  153. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_traits.hpp +337 -0
  154. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_utils.hpp +536 -0
  155. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_vsx_register.hpp +77 -0
  156. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_wasm_register.hpp +59 -0
  157. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/xsimd.hpp +75 -0
  158. sequenzo/dissimilarity_measures/src/xsimd/test/architectures/dummy.cpp +7 -0
  159. sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set.cpp +13 -0
  160. sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set_mean.cpp +24 -0
  161. sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set_mean_aligned.cpp +25 -0
  162. sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set_mean_arch_independent.cpp +28 -0
  163. sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set_mean_tag_dispatch.cpp +25 -0
  164. sequenzo/dissimilarity_measures/src/xsimd/test/doc/manipulating_abstract_batches.cpp +7 -0
  165. sequenzo/dissimilarity_measures/src/xsimd/test/doc/manipulating_parametric_batches.cpp +8 -0
  166. sequenzo/dissimilarity_measures/src/xsimd/test/doc/sum.hpp +31 -0
  167. sequenzo/dissimilarity_measures/src/xsimd/test/doc/sum_avx2.cpp +3 -0
  168. sequenzo/dissimilarity_measures/src/xsimd/test/doc/sum_sse2.cpp +3 -0
  169. sequenzo/dissimilarity_measures/src/xsimd/test/doc/writing_vectorized_code.cpp +11 -0
  170. sequenzo/dissimilarity_measures/src/xsimd/test/main.cpp +31 -0
  171. sequenzo/dissimilarity_measures/src/xsimd/test/test_api.cpp +230 -0
  172. sequenzo/dissimilarity_measures/src/xsimd/test/test_arch.cpp +217 -0
  173. sequenzo/dissimilarity_measures/src/xsimd/test/test_basic_math.cpp +183 -0
  174. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch.cpp +1049 -0
  175. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_bool.cpp +508 -0
  176. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_cast.cpp +409 -0
  177. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_complex.cpp +712 -0
  178. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_constant.cpp +286 -0
  179. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_float.cpp +141 -0
  180. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_int.cpp +365 -0
  181. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_manip.cpp +308 -0
  182. sequenzo/dissimilarity_measures/src/xsimd/test/test_bitwise_cast.cpp +222 -0
  183. sequenzo/dissimilarity_measures/src/xsimd/test/test_complex_exponential.cpp +226 -0
  184. sequenzo/dissimilarity_measures/src/xsimd/test/test_complex_hyperbolic.cpp +183 -0
  185. sequenzo/dissimilarity_measures/src/xsimd/test/test_complex_power.cpp +265 -0
  186. sequenzo/dissimilarity_measures/src/xsimd/test/test_complex_trigonometric.cpp +236 -0
  187. sequenzo/dissimilarity_measures/src/xsimd/test/test_conversion.cpp +248 -0
  188. sequenzo/dissimilarity_measures/src/xsimd/test/test_custom_default_arch.cpp +28 -0
  189. sequenzo/dissimilarity_measures/src/xsimd/test/test_error_gamma.cpp +170 -0
  190. sequenzo/dissimilarity_measures/src/xsimd/test/test_explicit_batch_instantiation.cpp +32 -0
  191. sequenzo/dissimilarity_measures/src/xsimd/test/test_exponential.cpp +202 -0
  192. sequenzo/dissimilarity_measures/src/xsimd/test/test_extract_pair.cpp +92 -0
  193. sequenzo/dissimilarity_measures/src/xsimd/test/test_fp_manipulation.cpp +77 -0
  194. sequenzo/dissimilarity_measures/src/xsimd/test/test_gnu_source.cpp +30 -0
  195. sequenzo/dissimilarity_measures/src/xsimd/test/test_hyperbolic.cpp +167 -0
  196. sequenzo/dissimilarity_measures/src/xsimd/test/test_load_store.cpp +304 -0
  197. sequenzo/dissimilarity_measures/src/xsimd/test/test_memory.cpp +61 -0
  198. sequenzo/dissimilarity_measures/src/xsimd/test/test_poly_evaluation.cpp +64 -0
  199. sequenzo/dissimilarity_measures/src/xsimd/test/test_power.cpp +184 -0
  200. sequenzo/dissimilarity_measures/src/xsimd/test/test_rounding.cpp +199 -0
  201. sequenzo/dissimilarity_measures/src/xsimd/test/test_select.cpp +101 -0
  202. sequenzo/dissimilarity_measures/src/xsimd/test/test_shuffle.cpp +760 -0
  203. sequenzo/dissimilarity_measures/src/xsimd/test/test_sum.cpp +4 -0
  204. sequenzo/dissimilarity_measures/src/xsimd/test/test_sum.hpp +34 -0
  205. sequenzo/dissimilarity_measures/src/xsimd/test/test_traits.cpp +172 -0
  206. sequenzo/dissimilarity_measures/src/xsimd/test/test_trigonometric.cpp +208 -0
  207. sequenzo/dissimilarity_measures/src/xsimd/test/test_utils.hpp +611 -0
  208. sequenzo/dissimilarity_measures/src/xsimd/test/test_wasm/test_wasm_playwright.py +123 -0
  209. sequenzo/dissimilarity_measures/src/xsimd/test/test_xsimd_api.cpp +1460 -0
  210. sequenzo/dissimilarity_measures/utils/__init__.py +16 -0
  211. sequenzo/dissimilarity_measures/utils/get_LCP_length_for_2_seq.py +44 -0
  212. sequenzo/dissimilarity_measures/utils/get_sm_trate_substitution_cost_matrix.cpython-310-darwin.so +0 -0
  213. sequenzo/dissimilarity_measures/utils/seqconc.cpython-310-darwin.so +0 -0
  214. sequenzo/dissimilarity_measures/utils/seqdss.cpython-310-darwin.so +0 -0
  215. sequenzo/dissimilarity_measures/utils/seqdur.cpython-310-darwin.so +0 -0
  216. sequenzo/dissimilarity_measures/utils/seqlength.cpython-310-darwin.so +0 -0
  217. sequenzo/multidomain/__init__.py +23 -0
  218. sequenzo/multidomain/association_between_domains.py +311 -0
  219. sequenzo/multidomain/cat.py +431 -0
  220. sequenzo/multidomain/combt.py +519 -0
  221. sequenzo/multidomain/dat.py +89 -0
  222. sequenzo/multidomain/idcd.py +139 -0
  223. sequenzo/multidomain/linked_polyad.py +292 -0
  224. sequenzo/openmp_setup.py +233 -0
  225. sequenzo/prefix_tree/__init__.py +43 -0
  226. sequenzo/prefix_tree/individual_level_indicators.py +1274 -0
  227. sequenzo/prefix_tree/system_level_indicators.py +465 -0
  228. sequenzo/prefix_tree/utils.py +54 -0
  229. sequenzo/sequence_characteristics/__init__.py +40 -0
  230. sequenzo/sequence_characteristics/complexity_index.py +49 -0
  231. sequenzo/sequence_characteristics/overall_cross_sectional_entropy.py +220 -0
  232. sequenzo/sequence_characteristics/plot_characteristics.py +593 -0
  233. sequenzo/sequence_characteristics/simple_characteristics.py +311 -0
  234. sequenzo/sequence_characteristics/state_frequencies_and_entropy_per_sequence.py +39 -0
  235. sequenzo/sequence_characteristics/turbulence.py +155 -0
  236. sequenzo/sequence_characteristics/variance_of_spell_durations.py +86 -0
  237. sequenzo/sequence_characteristics/within_sequence_entropy.py +43 -0
  238. sequenzo/suffix_tree/__init__.py +48 -0
  239. sequenzo/suffix_tree/individual_level_indicators.py +1638 -0
  240. sequenzo/suffix_tree/system_level_indicators.py +456 -0
  241. sequenzo/suffix_tree/utils.py +56 -0
  242. sequenzo/visualization/__init__.py +29 -0
  243. sequenzo/visualization/plot_mean_time.py +194 -0
  244. sequenzo/visualization/plot_modal_state.py +276 -0
  245. sequenzo/visualization/plot_most_frequent_sequences.py +147 -0
  246. sequenzo/visualization/plot_relative_frequency.py +404 -0
  247. sequenzo/visualization/plot_sequence_index.py +937 -0
  248. sequenzo/visualization/plot_single_medoid.py +153 -0
  249. sequenzo/visualization/plot_state_distribution.py +613 -0
  250. sequenzo/visualization/plot_transition_matrix.py +190 -0
  251. sequenzo/visualization/utils/__init__.py +23 -0
  252. sequenzo/visualization/utils/utils.py +310 -0
  253. sequenzo/with_event_history_analysis/__init__.py +35 -0
  254. sequenzo/with_event_history_analysis/sequence_analysis_multi_state_model.py +850 -0
  255. sequenzo/with_event_history_analysis/sequence_history_analysis.py +283 -0
  256. sequenzo-0.1.21.dist-info/METADATA +308 -0
  257. sequenzo-0.1.21.dist-info/RECORD +254 -0
  258. sequenzo-0.1.21.dist-info/WHEEL +5 -0
  259. sequenzo-0.1.21.dist-info/licenses/LICENSE +28 -0
  260. sequenzo-0.1.21.dist-info/top_level.txt +1 -0
@@ -0,0 +1,205 @@
1
+ """
2
+ @Author : 李欣怡
3
+ @File : wfcmdd.py
4
+ @Time : 2024/12/28 13:38
5
+ @Desc :
6
+ """
7
+
8
+ import numpy as np
9
+ import pandas as pd
10
+ import warnings
11
+
12
+
13
+ def wfcmdd(diss, memb, weights=None, method="FCMdd", m=2, dnoise=None, eta=None, alpha=0.001,
14
+ iter_max=100, verbose=False, dlambda=None):
15
+ # Setting and checking argument values
16
+ METHODS = ["NCdd", "HNCdd", "FCMdd", "PCMdd"]
17
+
18
+ if method not in METHODS:
19
+ raise ValueError(f" [!] Method must be one of {METHODS}.")
20
+
21
+ # TODO:源码中没有 weights = null 时的处理
22
+ if weights is None:
23
+ weights = np.ones(len(diss), dtype=int)
24
+
25
+ # R 源码中只定义未使用
26
+ # pweights = weights / np.sum(weights)
27
+ d = np.array(diss)
28
+ n = d.shape[0]
29
+
30
+ if method == "NCdd":
31
+ if dnoise is None and dlambda is None:
32
+ raise ValueError(" [!] Must provide a value for dnoise or dlambda.")
33
+ if dlambda is not None:
34
+ dnoise = 1
35
+ elif method == "HNCdd":
36
+ if dnoise is None:
37
+ raise ValueError(" [!] Must provide a value for dnoise.")
38
+ m = 1
39
+ elif method == "PCMdd":
40
+ if eta is None:
41
+ raise ValueError(" [!] Must provide a vector of values for eta.")
42
+
43
+ # Checking the membership matrix (memb)
44
+ if isinstance(memb, (pd.DataFrame, np.ndarray)): # Check if memb is matrix or dataframe-like
45
+ if memb.shape[0] != d.shape[1]:
46
+ raise ValueError(" [!] The number of rows in memb must be the same as the number of rows and columns of d.")
47
+ u = memb.to_numpy() if isinstance(memb, pd.DataFrame) else memb
48
+
49
+ elif isinstance(memb, list) and all(isinstance(x, (int, float)) for x in memb):
50
+ # else if (is.vector(memb) && is.numeric(memb))
51
+ u = np.zeros((n, len(memb)))
52
+ for k in range(len(memb)):
53
+ u[memb[k], k] = 1
54
+
55
+ else:
56
+ raise ValueError("[!] Provide a number, a vector of seeds, or membership matrix for mobile clusters.")
57
+
58
+ kMov = u.shape[1]
59
+ med = np.full(kMov, np.nan)
60
+
61
+ if method == "PCMdd" and len(eta) != kMov:
62
+ raise ValueError(" [!] Vector of reference distances (eta) must have a length equal to the number of clusters.")
63
+
64
+ if method in ["NCdd", "HNCdd"]:
65
+ # u <- cbind(u, vector("numeric", length = n))
66
+ u = np.hstack([u, np.zeros((n, 1))])
67
+
68
+ kMovNC = u.shape[1]
69
+ # print("kMovNC = ", kMovNC)
70
+ uPrev = np.zeros((n, kMovNC))
71
+
72
+ if dlambda is not None:
73
+ kdiv = kMov * np.sum(weights)
74
+
75
+ dist2med = np.zeros((n, kMovNC))
76
+ # print("dist2med = ", dist2med)
77
+
78
+ if method in ["NCdd", "HNCdd"]:
79
+ dist2med[:, kMovNC - 1] = dnoise
80
+
81
+ continue_flag = True
82
+ iter_count = 1
83
+ uPrev2 = 0
84
+ # print("u = ", u)
85
+ # print("d = ", d)
86
+ # print("med = ", med)
87
+ while continue_flag:
88
+ # Finding centers
89
+ for k in range(kMov):
90
+ # candidates < - which(apply(u[, -k, drop=FALSE], 1, max) < 1 & (!1:n % in %med[0:(k - 1)]))
91
+ # med[k] < - candidates[which.min((u[, k] ^ m * weights) % * % d[, candidates])]
92
+ # dist2med[, k] < - d[, med[k]]
93
+
94
+ u_removed_k = np.delete(u, k, axis=1) # 去掉第 k 列
95
+ max_per_row = np.max(u_removed_k, axis=1) # 每行的最大值
96
+
97
+ # 查找最大值小于 1 的行
98
+ candidates = np.where((max_per_row < 1) & (~np.isin(np.arange(1, len(u) + 1), med[:k])))[0]
99
+ # print("candidates = ", candidates)
100
+
101
+ u_k_m = u[:, k] ** m
102
+ # print("u_k_m = ", u_k_m)
103
+
104
+ # 按照权重与距离矩阵进行矩阵乘法
105
+ weighted_u_k_m = u_k_m * weights
106
+ # print("weighted_u_k_m =", weighted_u_k_m)
107
+
108
+ # 从 d 中选择 candidates 列
109
+ d_candidates = d[:, candidates]
110
+ # print("d_candidates =", d_candidates)
111
+
112
+ # 进行矩阵乘法
113
+ product = weighted_u_k_m @ d_candidates
114
+ # print("product = ", product)
115
+ # 选取最小值对应的索引
116
+ min_index = np.argmin(product)
117
+ # print("min_index = ", min_index)
118
+
119
+ med[k] = candidates[min_index] # 更新 med[k]
120
+ # print("med[k] = ", med[k])
121
+
122
+ dist2med[:, k] = d[:, int(med[k])]
123
+ # print("dist2med[:, k] = ", dist2med[:, k])
124
+
125
+ # Updating dnoise for adaptive dnoise clustering
126
+ if dlambda is not None and method == "NCdd":
127
+ dnoise = dlambda * np.sum(dist2med[:, :-1] * weights[:, None]) / (kMov * np.sum(weights))
128
+ dist2med[:, kMovNC - 1] = dnoise
129
+
130
+
131
+ # Updating membership
132
+ if method == "HNCdd":
133
+ d2cm = np.hstack([dist2med, np.full((dist2med.shape[0], 1), dnoise)])
134
+ u = np.zeros_like(u)
135
+ minC = np.argmin(d2cm, axis=1)
136
+ for i in range(len(minC)):
137
+ u[i, minC[i]] = 1
138
+
139
+ elif method in ["FCMdd", "NCdd"]:
140
+ with warnings.catch_warnings():
141
+ warnings.simplefilter("ignore")
142
+ # dist2med_safe = np.where(dist2med == 0, 1e-10, dist2med)
143
+ # TODO : 不显示中间报错
144
+ u = (1 / dist2med) ** (1 / (m - 1))
145
+ u /= np.sum(u, axis=1, keepdims=True)
146
+ u[dist2med == 0] = 1
147
+
148
+ elif method == "PCMdd":
149
+ for k in range(kMov):
150
+ u[:, k] = 1 / (1 + (dist2med[:, k] / eta[k]) ** (1 / (m - 1)))
151
+ u[dist2med == 0] = 1
152
+
153
+ # Checking convergence
154
+ if iter_count > 2:
155
+ continue_flag = np.max(np.abs(u - uPrev)) > alpha and iter_count <= iter_max \
156
+ and np.max(np.abs(u - uPrev2)) > alpha
157
+
158
+ if continue_flag:
159
+ uPrev2 = uPrev
160
+ uPrev = u
161
+ iter_count += 1
162
+ if verbose:
163
+ print(".", end="")
164
+
165
+ # Calculate the functional value
166
+ if method in ["NCdd", "FCMdd"]:
167
+ functional = np.sum(dist2med * (u ** m) * weights[:, None])
168
+ elif method == "HNCdd":
169
+ functional = np.sum(dist2med * (u ** m) * weights[:, None])
170
+ elif method == "PCMdd":
171
+ functional = 0
172
+ for k in range(kMov):
173
+ functional += np.sum(dist2med[:, k] * (u[:, k] ** m) * weights) + np.sum(
174
+ eta[k] * (1 - u[:, k]) ** m * weights)
175
+
176
+ if verbose:
177
+ print(f"\nIterations: {iter_count}, Functional: {functional}")
178
+
179
+ mobile_centers = med[:kMov]
180
+
181
+ return {
182
+ "dnoise": dnoise,
183
+ "memb": u,
184
+ "mobileCenters": mobile_centers,
185
+ "functional": functional
186
+ }
187
+
188
+
189
+ if __name__ == "__main__":
190
+ diss = np.array([[0.0, 1.0, 2.0],
191
+ [1.0, 0.0, 1.0],
192
+ [2.0, 1.0, 0.0]])
193
+ diss = pd.DataFrame(diss)
194
+
195
+ memb = np.array([[0.7, 0.3],
196
+ [0.2, 0.8],
197
+ [0.5, 0.5]])
198
+
199
+ result = wfcmdd(diss=diss, memb=memb, method="FCMdd")
200
+
201
+ print("result['dnoise'] = ", result['dnoise'])
202
+ print("result['memb'] =")
203
+ print(result['memb'])
204
+ print("result['mobileCenters'] = ", result['mobileCenters'])
205
+ print("result['functional'] = ", result['functional'])
@@ -0,0 +1,88 @@
1
+ """
2
+ @Author : Yuqi Liang 梁彧祺
3
+ @File : visualization.py
4
+ @Time : 04/04/2025 15:21
5
+ @Desc :
6
+
7
+ """
8
+ import matplotlib.pyplot as plt
9
+ import seaborn as sns
10
+ import numpy as np
11
+ import pandas as pd
12
+
13
+
14
+ def plot_scores_from_dataframe(df,
15
+ k_col="k",
16
+ metrics=None,
17
+ norm="zscore",
18
+ title="CLARA Cluster Quality Metrics",
19
+ palette="Set2",
20
+ line_width=2,
21
+ style="whitegrid",
22
+ xlabel="Number of Clusters",
23
+ ylabel="Normalized Score",
24
+ grid=True,
25
+ save_as=None,
26
+ dpi=200,
27
+ figsize=(12, 8)):
28
+ """
29
+ Plot clustering metrics directly from a summary DataFrame (e.g., loaded from CSV).
30
+
31
+ :param df: DataFrame with clustering metrics. Must include a 'k' column.
32
+ :param k_col: Column name indicating the number of clusters.
33
+ :param metrics: List of metric columns to plot. If None, auto-detect numeric columns.
34
+ :param norm: Normalization method for plotting ('zscore', 'range', or 'none')
35
+ :param title: Plot title
36
+ :param palette: Color palette for the plot
37
+ :param line_width: Width of plotted lines
38
+ :param style: Seaborn style for the plot
39
+ :param xlabel: X-axis label
40
+ :param ylabel: Y-axis label
41
+ :param grid: Whether to show grid lines
42
+ :param save_as: File path to save the plot (optional)
43
+ :param dpi: DPI for saved image
44
+ :param figsize: Figure size in inches
45
+ """
46
+ df = df.copy()
47
+ df = df.sort_values(by=k_col)
48
+
49
+ if metrics is None:
50
+ metrics = df.select_dtypes(include=[float, int]).columns.tolist()
51
+ blacklist = ["Best iter", k_col] # Removed best iter as it is not part of the indicators for cluster quality evaluation
52
+ metrics = [m for m in metrics if m not in blacklist]
53
+
54
+ normed = {}
55
+ for metric in metrics:
56
+ values = df[metric].values.astype(float)
57
+ if norm == "zscore":
58
+ mean = np.nanmean(values)
59
+ std = np.nanstd(values)
60
+ normed[metric] = (values - mean) / std if std > 0 else values
61
+ elif norm == "range":
62
+ min_val = np.nanmin(values)
63
+ max_val = np.nanmax(values)
64
+ normed[metric] = (values - min_val) / (max_val - min_val) if max_val > min_val else values
65
+ else:
66
+ normed[metric] = values
67
+
68
+ sns.set(style=style)
69
+ palette_colors = sns.color_palette(palette, len(metrics))
70
+ plt.figure(figsize=figsize)
71
+
72
+ for idx, metric in enumerate(metrics):
73
+ plt.plot(df[k_col], normed[metric],
74
+ label=metric,
75
+ linewidth=line_width,
76
+ color=palette_colors[idx])
77
+
78
+ plt.title(title, fontsize=14, fontweight="bold")
79
+ plt.xlabel(xlabel)
80
+ plt.ylabel(ylabel)
81
+ plt.xticks(df[k_col])
82
+ plt.grid(grid, linestyle="--", alpha=0.6)
83
+ plt.legend(title="Metric", fontsize=10)
84
+ plt.tight_layout()
85
+
86
+ if save_as:
87
+ plt.savefig(save_as, dpi=dpi)
88
+ plt.show()
@@ -0,0 +1,196 @@
1
+ """
2
+ @Author : 李欣怡 Xinyi Li
3
+ @File : KMedoids.py
4
+ @Time : 2025/2/8 11:53
5
+ @Desc :
6
+ """
7
+
8
+ import numpy as np
9
+ from scipy.cluster.hierarchy import cut_tree
10
+
11
+ import importlib
12
+ import sequenzo.clustering.clustering_c_code
13
+ clustering_c_code = importlib.import_module("sequenzo.clustering.clustering_c_code")
14
+
15
+ from sequenzo.clustering.utils.disscenter import disscentertrim
16
+
17
+ import glob
18
+ import os
19
+ import sys
20
+ import cffi
21
+
22
+ ffi = cffi.FFI()
23
+
24
+ if sys.platform.startswith("win"):
25
+ files = glob.glob(os.path.join(os.path.dirname(__file__), "*.pyd"))
26
+ else:
27
+ files = glob.glob(os.path.join(os.path.dirname(__file__), "*.so"))
28
+
29
+ if not files:
30
+ raise FileNotFoundError("No compiled library found")
31
+
32
+ lib_file = files[0]
33
+
34
+ try:
35
+ # 重定向 stderr 来抑制 cffi 的错误信息输出
36
+ import io
37
+ old_stderr = sys.stderr
38
+ sys.stderr = io.StringIO()
39
+ try:
40
+ lib = ffi.dlopen(lib_file)
41
+ finally:
42
+ # 恢复 stderr
43
+ sys.stderr = old_stderr
44
+ except ImportError as e:
45
+ if sys.platform.startswith("win") and 'cffi mode "ANY" is only "ABI"' in str(e):
46
+ # Windows 降级到 ABI 模式,同样抑制错误信息
47
+ old_stderr = sys.stderr
48
+ sys.stderr = io.StringIO()
49
+ try:
50
+ lib = ffi.dlopen(lib_file)
51
+ finally:
52
+ sys.stderr = old_stderr
53
+ else:
54
+ raise
55
+
56
+ def KMedoids(diss, k, weights=None, npass=1, initialclust=None, method='PAMonce', cluster_only=False):
57
+
58
+ # Lazily import the c_code module to avoid circular dependencies during installation
59
+ # from .__init__ import _import_c_code
60
+ # c_code = _import_c_code()
61
+
62
+ if isinstance(method, str):
63
+ method = method.lower()
64
+ method_map = ["kmedoids", "pam", "pamonce"]
65
+ if method in method_map:
66
+ method = method_map.index(method) + 1 # 1-based index
67
+
68
+ if not (isinstance(method, int) and method in {1, 2, 3}):
69
+ raise ValueError(f"[!] Unknown clustering method: {method}.")
70
+
71
+ nelements = diss.shape[0]
72
+ if nelements != diss.shape[1]:
73
+ raise ValueError(f"[!] Dissipation matrix has {nelements} elements.")
74
+
75
+ def internal_random_sample(nelements, k):
76
+ return np.random.choice(nelements, k, replace=False) # 0-based 直接适用
77
+
78
+ if weights is None:
79
+ weights = np.ones(diss.shape[1], dtype=float)
80
+
81
+ if len(weights) != nelements:
82
+ raise ValueError(f"[!] 'weights' should be a vector of length {nelements}.")
83
+
84
+ if initialclust is None:
85
+ initialclust = internal_random_sample(nelements, k)
86
+ else:
87
+ if _validate_linkage_matrix(initialclust):
88
+ # initialclust = fcluster(initialclust, k, criterion='maxclust') # 1-based 索引
89
+ initialclust = cut_tree(initialclust, n_clusters=k).flatten() + 1 # 1-based 索引
90
+ # TODO : 现在已经得到一个组了,为什么不用这个组当作 PAMonce/PAM 算法的初始化?反而利用这个组去选中心点?
91
+ # 初始化中心点的必要性为什么大于组?初始化中心点无论好不好,最后经过不断迭代肯定能选出较好的
92
+ # TODO : 就算想要从子样本扩展到全数据,入口参数的这个组也是可以的呀?
93
+ if len(initialclust) == nelements:
94
+ initialclust = disscentertrim(diss=diss, group=initialclust, medoids_index="first", weights=weights)
95
+
96
+ if len(initialclust) != k:
97
+ raise ValueError(f"[!] 'initialclust' should be a vector of cluster membership with k={k}.")
98
+
99
+ npass = 0
100
+
101
+ if len(initialclust) != k:
102
+ raise ValueError(f"[!] 'initialclust' should be a vector of medoids index of length :{k}.")
103
+
104
+ if isinstance(initialclust, list):
105
+ initialclust = np.asarray(initialclust)
106
+ if np.any((initialclust >= nelements) | (initialclust < 0)):
107
+ raise ValueError(f"[!] Starting medoids should be in 1:{nelements}")
108
+
109
+ if npass < 0:
110
+ raise ValueError("[!] 'npass' should be greater than 0")
111
+
112
+ if k < 2 or k > nelements:
113
+ raise ValueError(f" [!] 'k' should be in [2, {nelements}]")
114
+
115
+ if method == 1: # KMedoid
116
+ memb = clustering_c_code.KMedoid(nelements,
117
+ diss.astype(np.float64),
118
+ initialclust.astype(np.int32),
119
+ npass,
120
+ weights.astype(np.float64))
121
+ elif method == 2: # PAM
122
+ memb = clustering_c_code.PAM(nelements,
123
+ diss.astype(np.float64),
124
+ initialclust.astype(np.int32),
125
+ npass,
126
+ weights.astype(np.float64))
127
+ else: # PAMonce
128
+ memb = clustering_c_code.PAMonce(nelements,
129
+ diss.astype(np.float64),
130
+ initialclust.astype(np.int32),
131
+ npass,
132
+ weights.astype(np.float64))
133
+
134
+ memb_matrix = memb.runclusterloop()
135
+
136
+ print("[>] Computed successfully.")
137
+
138
+ return memb_matrix
139
+
140
+ def _validate_linkage_matrix(initialclust):
141
+ """
142
+ Check that the passed matrix matches the linkage matrix type requirements
143
+ """
144
+ if not isinstance(initialclust, np.ndarray):
145
+ return False # Linkage matrix must be a NumPy array
146
+
147
+ if initialclust.ndim != 2 or initialclust.shape[1] != 4:
148
+ return False # Linkage matrix must be a 2D array with 4 columns
149
+
150
+ if initialclust.dtype != np.float64:
151
+ return False # Linkage matrix 'Z' must contain doubles (np.float64).
152
+
153
+ return True
154
+
155
+
156
+ if __name__ == '__main__':
157
+ # TODO : KMeodis 在 python3.11 里导包有 numpy 的问题
158
+ # TODO : sequenzo 0.1.14 里找不到 KMeodis 模块(这是 init 的问题,现已修正)
159
+
160
+ from sequenzo import *
161
+
162
+ df = load_dataset('country_co2_emissions')
163
+
164
+ time = list(df.columns)[1:]
165
+ states = ['Very Low', 'Low', 'Middle', 'High', 'Very High']
166
+
167
+ sequence_data = SequenceData(df, time_type="age", time=time, id_col="country", states=states)
168
+
169
+ om = get_distance_matrix(sequence_data, method="OM", sm="TRATE", indel="auto")
170
+
171
+ centroid_indices = [0, 50, 100, 150, 190]
172
+ n_pass = 10
173
+
174
+ weights = np.ones(len(om))
175
+
176
+ # Example 1: KMedoids algorithm without specifying the center point
177
+ clustering = KMedoids(diss=om,
178
+ k=5,
179
+ method='KMedoids',
180
+ npass=n_pass,
181
+ weights=weights)
182
+
183
+ # Example 2: PAM algorithm with a specified center point
184
+ clustering = KMedoids(diss=om,
185
+ k=5,
186
+ method='PAM',
187
+ initialclust=centroid_indices,
188
+ npass=n_pass,
189
+ weights=weights)
190
+
191
+ # Example 3: PAMonce algorithm with default parameters
192
+ clustering = KMedoids(diss=om,
193
+ k=5,
194
+ method='PAMonce',
195
+ npass=n_pass,
196
+ weights=weights)
@@ -0,0 +1,30 @@
1
+ """
2
+ @Author : Yuqi Liang 梁彧祺
3
+ @File : __init__.py
4
+ @Time : 27/02/2025 09:58
5
+ @Desc :
6
+ """
7
+ from .hierarchical_clustering import Cluster, ClusterResults, ClusterQuality
8
+ from .KMedoids import KMedoids
9
+
10
+
11
+ def _import_c_code():
12
+ """Lazily import the c_code module to avoid circular dependencies during installation"""
13
+ try:
14
+ # Import built pybind11 extension placed under this package
15
+ from sequenzo.clustering import clustering_c_code
16
+ return clustering_c_code
17
+ except ImportError:
18
+ # If the C extension cannot be imported, return None
19
+ print(
20
+ "Warning: The C++ extension (c_code) could not be imported. Please ensure the extension module is compiled correctly.")
21
+ return None
22
+
23
+
24
+ __all__ = [
25
+ "Cluster",
26
+ "ClusterResults",
27
+ "ClusterQuality",
28
+ "KMedoids",
29
+ # Add other functions as needed
30
+ ]