sequenzo 0.1.21__cp312-cp312-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sequenzo might be problematic. Click here for more details.

Files changed (260) hide show
  1. sequenzo/__init__.py +240 -0
  2. sequenzo/big_data/__init__.py +12 -0
  3. sequenzo/big_data/clara/__init__.py +26 -0
  4. sequenzo/big_data/clara/clara.py +467 -0
  5. sequenzo/big_data/clara/utils/__init__.py +27 -0
  6. sequenzo/big_data/clara/utils/aggregatecases.py +92 -0
  7. sequenzo/big_data/clara/utils/davies_bouldin.py +91 -0
  8. sequenzo/big_data/clara/utils/get_weighted_diss.cpython-312-darwin.so +0 -0
  9. sequenzo/big_data/clara/utils/wfcmdd.py +205 -0
  10. sequenzo/big_data/clara/visualization.py +88 -0
  11. sequenzo/clustering/KMedoids.py +196 -0
  12. sequenzo/clustering/__init__.py +30 -0
  13. sequenzo/clustering/clustering_c_code.cpython-312-darwin.so +0 -0
  14. sequenzo/clustering/hierarchical_clustering.py +1380 -0
  15. sequenzo/clustering/src/KMedoid.cpp +262 -0
  16. sequenzo/clustering/src/PAM.cpp +236 -0
  17. sequenzo/clustering/src/PAMonce.cpp +234 -0
  18. sequenzo/clustering/src/cluster_quality.cpp +496 -0
  19. sequenzo/clustering/src/cluster_quality.h +128 -0
  20. sequenzo/clustering/src/cluster_quality_backup.cpp +570 -0
  21. sequenzo/clustering/src/module.cpp +228 -0
  22. sequenzo/clustering/src/weightedinertia.cpp +111 -0
  23. sequenzo/clustering/utils/__init__.py +27 -0
  24. sequenzo/clustering/utils/disscenter.py +122 -0
  25. sequenzo/data_preprocessing/__init__.py +20 -0
  26. sequenzo/data_preprocessing/helpers.py +256 -0
  27. sequenzo/datasets/__init__.py +41 -0
  28. sequenzo/datasets/biofam.csv +2001 -0
  29. sequenzo/datasets/biofam_child_domain.csv +2001 -0
  30. sequenzo/datasets/biofam_left_domain.csv +2001 -0
  31. sequenzo/datasets/biofam_married_domain.csv +2001 -0
  32. sequenzo/datasets/chinese_colonial_territories.csv +12 -0
  33. sequenzo/datasets/country_co2_emissions.csv +194 -0
  34. sequenzo/datasets/country_co2_emissions_global_deciles.csv +195 -0
  35. sequenzo/datasets/country_co2_emissions_global_quintiles.csv +195 -0
  36. sequenzo/datasets/country_co2_emissions_local_deciles.csv +195 -0
  37. sequenzo/datasets/country_co2_emissions_local_quintiles.csv +195 -0
  38. sequenzo/datasets/country_gdp_per_capita.csv +194 -0
  39. sequenzo/datasets/mvad.csv +713 -0
  40. sequenzo/datasets/pairfam_family.csv +1867 -0
  41. sequenzo/datasets/polyadic_samplec1.csv +61 -0
  42. sequenzo/datasets/polyadic_samplep1.csv +61 -0
  43. sequenzo/datasets/polyadic_seqc1.csv +61 -0
  44. sequenzo/datasets/polyadic_seqp1.csv +61 -0
  45. sequenzo/define_sequence_data.py +609 -0
  46. sequenzo/dissimilarity_measures/__init__.py +31 -0
  47. sequenzo/dissimilarity_measures/c_code.cpython-312-darwin.so +0 -0
  48. sequenzo/dissimilarity_measures/get_distance_matrix.py +702 -0
  49. sequenzo/dissimilarity_measures/get_substitution_cost_matrix.py +241 -0
  50. sequenzo/dissimilarity_measures/src/DHDdistance.cpp +148 -0
  51. sequenzo/dissimilarity_measures/src/LCPdistance.cpp +114 -0
  52. sequenzo/dissimilarity_measures/src/OMdistance.cpp +247 -0
  53. sequenzo/dissimilarity_measures/src/OMspellDistance.cpp +281 -0
  54. sequenzo/dissimilarity_measures/src/__init__.py +0 -0
  55. sequenzo/dissimilarity_measures/src/dist2matrix.cpp +63 -0
  56. sequenzo/dissimilarity_measures/src/dp_utils.h +160 -0
  57. sequenzo/dissimilarity_measures/src/module.cpp +34 -0
  58. sequenzo/dissimilarity_measures/src/setup.py +30 -0
  59. sequenzo/dissimilarity_measures/src/utils.h +25 -0
  60. sequenzo/dissimilarity_measures/src/xsimd/.github/cmake-test/main.cpp +6 -0
  61. sequenzo/dissimilarity_measures/src/xsimd/benchmark/main.cpp +159 -0
  62. sequenzo/dissimilarity_measures/src/xsimd/benchmark/xsimd_benchmark.hpp +565 -0
  63. sequenzo/dissimilarity_measures/src/xsimd/docs/source/conf.py +37 -0
  64. sequenzo/dissimilarity_measures/src/xsimd/examples/mandelbrot.cpp +330 -0
  65. sequenzo/dissimilarity_measures/src/xsimd/examples/pico_bench.hpp +246 -0
  66. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_arithmetic.hpp +266 -0
  67. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_complex.hpp +112 -0
  68. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_details.hpp +323 -0
  69. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_logical.hpp +218 -0
  70. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_math.hpp +2583 -0
  71. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_memory.hpp +880 -0
  72. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_rounding.hpp +72 -0
  73. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_swizzle.hpp +174 -0
  74. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_trigo.hpp +978 -0
  75. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx.hpp +1924 -0
  76. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx2.hpp +1144 -0
  77. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512bw.hpp +656 -0
  78. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512cd.hpp +28 -0
  79. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512dq.hpp +244 -0
  80. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512er.hpp +20 -0
  81. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512f.hpp +2650 -0
  82. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512ifma.hpp +20 -0
  83. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512pf.hpp +20 -0
  84. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vbmi.hpp +77 -0
  85. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vbmi2.hpp +131 -0
  86. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vnni_avx512bw.hpp +20 -0
  87. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vnni_avx512vbmi2.hpp +20 -0
  88. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avxvnni.hpp +20 -0
  89. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_common.hpp +24 -0
  90. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_common_fwd.hpp +77 -0
  91. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_constants.hpp +393 -0
  92. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_emulated.hpp +788 -0
  93. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma3_avx.hpp +93 -0
  94. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma3_avx2.hpp +46 -0
  95. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma3_sse.hpp +97 -0
  96. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma4.hpp +92 -0
  97. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_i8mm_neon64.hpp +17 -0
  98. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_isa.hpp +142 -0
  99. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_neon.hpp +3142 -0
  100. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_neon64.hpp +1543 -0
  101. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_rvv.hpp +1513 -0
  102. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_scalar.hpp +1260 -0
  103. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse2.hpp +2024 -0
  104. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse3.hpp +67 -0
  105. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse4_1.hpp +339 -0
  106. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse4_2.hpp +44 -0
  107. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_ssse3.hpp +186 -0
  108. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sve.hpp +1155 -0
  109. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_vsx.hpp +892 -0
  110. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_wasm.hpp +1780 -0
  111. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_arch.hpp +240 -0
  112. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_config.hpp +484 -0
  113. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_cpuid.hpp +269 -0
  114. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_inline.hpp +27 -0
  115. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/math/xsimd_rem_pio2.hpp +719 -0
  116. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/memory/xsimd_aligned_allocator.hpp +349 -0
  117. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/memory/xsimd_alignment.hpp +91 -0
  118. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_all_registers.hpp +55 -0
  119. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_api.hpp +2765 -0
  120. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx2_register.hpp +44 -0
  121. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512bw_register.hpp +51 -0
  122. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512cd_register.hpp +51 -0
  123. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512dq_register.hpp +51 -0
  124. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512er_register.hpp +51 -0
  125. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512f_register.hpp +77 -0
  126. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512ifma_register.hpp +51 -0
  127. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512pf_register.hpp +51 -0
  128. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512vbmi2_register.hpp +51 -0
  129. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512vbmi_register.hpp +51 -0
  130. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512vnni_avx512bw_register.hpp +54 -0
  131. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512vnni_avx512vbmi2_register.hpp +53 -0
  132. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx_register.hpp +64 -0
  133. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avxvnni_register.hpp +44 -0
  134. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_batch.hpp +1524 -0
  135. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_batch_constant.hpp +300 -0
  136. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_common_arch.hpp +47 -0
  137. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_emulated_register.hpp +80 -0
  138. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_fma3_avx2_register.hpp +50 -0
  139. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_fma3_avx_register.hpp +50 -0
  140. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_fma3_sse_register.hpp +50 -0
  141. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_fma4_register.hpp +50 -0
  142. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_i8mm_neon64_register.hpp +55 -0
  143. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_neon64_register.hpp +55 -0
  144. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_neon_register.hpp +154 -0
  145. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_register.hpp +94 -0
  146. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_rvv_register.hpp +506 -0
  147. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sse2_register.hpp +59 -0
  148. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sse3_register.hpp +49 -0
  149. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sse4_1_register.hpp +48 -0
  150. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sse4_2_register.hpp +48 -0
  151. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_ssse3_register.hpp +48 -0
  152. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sve_register.hpp +156 -0
  153. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_traits.hpp +337 -0
  154. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_utils.hpp +536 -0
  155. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_vsx_register.hpp +77 -0
  156. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_wasm_register.hpp +59 -0
  157. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/xsimd.hpp +75 -0
  158. sequenzo/dissimilarity_measures/src/xsimd/test/architectures/dummy.cpp +7 -0
  159. sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set.cpp +13 -0
  160. sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set_mean.cpp +24 -0
  161. sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set_mean_aligned.cpp +25 -0
  162. sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set_mean_arch_independent.cpp +28 -0
  163. sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set_mean_tag_dispatch.cpp +25 -0
  164. sequenzo/dissimilarity_measures/src/xsimd/test/doc/manipulating_abstract_batches.cpp +7 -0
  165. sequenzo/dissimilarity_measures/src/xsimd/test/doc/manipulating_parametric_batches.cpp +8 -0
  166. sequenzo/dissimilarity_measures/src/xsimd/test/doc/sum.hpp +31 -0
  167. sequenzo/dissimilarity_measures/src/xsimd/test/doc/sum_avx2.cpp +3 -0
  168. sequenzo/dissimilarity_measures/src/xsimd/test/doc/sum_sse2.cpp +3 -0
  169. sequenzo/dissimilarity_measures/src/xsimd/test/doc/writing_vectorized_code.cpp +11 -0
  170. sequenzo/dissimilarity_measures/src/xsimd/test/main.cpp +31 -0
  171. sequenzo/dissimilarity_measures/src/xsimd/test/test_api.cpp +230 -0
  172. sequenzo/dissimilarity_measures/src/xsimd/test/test_arch.cpp +217 -0
  173. sequenzo/dissimilarity_measures/src/xsimd/test/test_basic_math.cpp +183 -0
  174. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch.cpp +1049 -0
  175. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_bool.cpp +508 -0
  176. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_cast.cpp +409 -0
  177. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_complex.cpp +712 -0
  178. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_constant.cpp +286 -0
  179. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_float.cpp +141 -0
  180. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_int.cpp +365 -0
  181. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_manip.cpp +308 -0
  182. sequenzo/dissimilarity_measures/src/xsimd/test/test_bitwise_cast.cpp +222 -0
  183. sequenzo/dissimilarity_measures/src/xsimd/test/test_complex_exponential.cpp +226 -0
  184. sequenzo/dissimilarity_measures/src/xsimd/test/test_complex_hyperbolic.cpp +183 -0
  185. sequenzo/dissimilarity_measures/src/xsimd/test/test_complex_power.cpp +265 -0
  186. sequenzo/dissimilarity_measures/src/xsimd/test/test_complex_trigonometric.cpp +236 -0
  187. sequenzo/dissimilarity_measures/src/xsimd/test/test_conversion.cpp +248 -0
  188. sequenzo/dissimilarity_measures/src/xsimd/test/test_custom_default_arch.cpp +28 -0
  189. sequenzo/dissimilarity_measures/src/xsimd/test/test_error_gamma.cpp +170 -0
  190. sequenzo/dissimilarity_measures/src/xsimd/test/test_explicit_batch_instantiation.cpp +32 -0
  191. sequenzo/dissimilarity_measures/src/xsimd/test/test_exponential.cpp +202 -0
  192. sequenzo/dissimilarity_measures/src/xsimd/test/test_extract_pair.cpp +92 -0
  193. sequenzo/dissimilarity_measures/src/xsimd/test/test_fp_manipulation.cpp +77 -0
  194. sequenzo/dissimilarity_measures/src/xsimd/test/test_gnu_source.cpp +30 -0
  195. sequenzo/dissimilarity_measures/src/xsimd/test/test_hyperbolic.cpp +167 -0
  196. sequenzo/dissimilarity_measures/src/xsimd/test/test_load_store.cpp +304 -0
  197. sequenzo/dissimilarity_measures/src/xsimd/test/test_memory.cpp +61 -0
  198. sequenzo/dissimilarity_measures/src/xsimd/test/test_poly_evaluation.cpp +64 -0
  199. sequenzo/dissimilarity_measures/src/xsimd/test/test_power.cpp +184 -0
  200. sequenzo/dissimilarity_measures/src/xsimd/test/test_rounding.cpp +199 -0
  201. sequenzo/dissimilarity_measures/src/xsimd/test/test_select.cpp +101 -0
  202. sequenzo/dissimilarity_measures/src/xsimd/test/test_shuffle.cpp +760 -0
  203. sequenzo/dissimilarity_measures/src/xsimd/test/test_sum.cpp +4 -0
  204. sequenzo/dissimilarity_measures/src/xsimd/test/test_sum.hpp +34 -0
  205. sequenzo/dissimilarity_measures/src/xsimd/test/test_traits.cpp +172 -0
  206. sequenzo/dissimilarity_measures/src/xsimd/test/test_trigonometric.cpp +208 -0
  207. sequenzo/dissimilarity_measures/src/xsimd/test/test_utils.hpp +611 -0
  208. sequenzo/dissimilarity_measures/src/xsimd/test/test_wasm/test_wasm_playwright.py +123 -0
  209. sequenzo/dissimilarity_measures/src/xsimd/test/test_xsimd_api.cpp +1460 -0
  210. sequenzo/dissimilarity_measures/utils/__init__.py +16 -0
  211. sequenzo/dissimilarity_measures/utils/get_LCP_length_for_2_seq.py +44 -0
  212. sequenzo/dissimilarity_measures/utils/get_sm_trate_substitution_cost_matrix.cpython-312-darwin.so +0 -0
  213. sequenzo/dissimilarity_measures/utils/seqconc.cpython-312-darwin.so +0 -0
  214. sequenzo/dissimilarity_measures/utils/seqdss.cpython-312-darwin.so +0 -0
  215. sequenzo/dissimilarity_measures/utils/seqdur.cpython-312-darwin.so +0 -0
  216. sequenzo/dissimilarity_measures/utils/seqlength.cpython-312-darwin.so +0 -0
  217. sequenzo/multidomain/__init__.py +23 -0
  218. sequenzo/multidomain/association_between_domains.py +311 -0
  219. sequenzo/multidomain/cat.py +431 -0
  220. sequenzo/multidomain/combt.py +519 -0
  221. sequenzo/multidomain/dat.py +89 -0
  222. sequenzo/multidomain/idcd.py +139 -0
  223. sequenzo/multidomain/linked_polyad.py +292 -0
  224. sequenzo/openmp_setup.py +233 -0
  225. sequenzo/prefix_tree/__init__.py +43 -0
  226. sequenzo/prefix_tree/individual_level_indicators.py +1274 -0
  227. sequenzo/prefix_tree/system_level_indicators.py +465 -0
  228. sequenzo/prefix_tree/utils.py +54 -0
  229. sequenzo/sequence_characteristics/__init__.py +40 -0
  230. sequenzo/sequence_characteristics/complexity_index.py +49 -0
  231. sequenzo/sequence_characteristics/overall_cross_sectional_entropy.py +220 -0
  232. sequenzo/sequence_characteristics/plot_characteristics.py +593 -0
  233. sequenzo/sequence_characteristics/simple_characteristics.py +311 -0
  234. sequenzo/sequence_characteristics/state_frequencies_and_entropy_per_sequence.py +39 -0
  235. sequenzo/sequence_characteristics/turbulence.py +155 -0
  236. sequenzo/sequence_characteristics/variance_of_spell_durations.py +86 -0
  237. sequenzo/sequence_characteristics/within_sequence_entropy.py +43 -0
  238. sequenzo/suffix_tree/__init__.py +48 -0
  239. sequenzo/suffix_tree/individual_level_indicators.py +1638 -0
  240. sequenzo/suffix_tree/system_level_indicators.py +456 -0
  241. sequenzo/suffix_tree/utils.py +56 -0
  242. sequenzo/visualization/__init__.py +29 -0
  243. sequenzo/visualization/plot_mean_time.py +194 -0
  244. sequenzo/visualization/plot_modal_state.py +276 -0
  245. sequenzo/visualization/plot_most_frequent_sequences.py +147 -0
  246. sequenzo/visualization/plot_relative_frequency.py +404 -0
  247. sequenzo/visualization/plot_sequence_index.py +937 -0
  248. sequenzo/visualization/plot_single_medoid.py +153 -0
  249. sequenzo/visualization/plot_state_distribution.py +613 -0
  250. sequenzo/visualization/plot_transition_matrix.py +190 -0
  251. sequenzo/visualization/utils/__init__.py +23 -0
  252. sequenzo/visualization/utils/utils.py +310 -0
  253. sequenzo/with_event_history_analysis/__init__.py +35 -0
  254. sequenzo/with_event_history_analysis/sequence_analysis_multi_state_model.py +850 -0
  255. sequenzo/with_event_history_analysis/sequence_history_analysis.py +283 -0
  256. sequenzo-0.1.21.dist-info/METADATA +308 -0
  257. sequenzo-0.1.21.dist-info/RECORD +254 -0
  258. sequenzo-0.1.21.dist-info/WHEEL +5 -0
  259. sequenzo-0.1.21.dist-info/licenses/LICENSE +28 -0
  260. sequenzo-0.1.21.dist-info/top_level.txt +1 -0
@@ -0,0 +1,16 @@
1
+ """
2
+ @Author : ęŽę¬£ę€”
3
+ @File : __init__.py.py
4
+ @Time : 2025/2/27 20:15
5
+ @Desc :
6
+ """
7
+ from .get_sm_trate_substitution_cost_matrix import get_sm_trate_substitution_cost_matrix
8
+ from .seqconc import seqconc
9
+ from .seqdss import seqdss
10
+ from .seqdur import seqdur
11
+ from .seqlength import seqlength
12
+ from .get_LCP_length_for_2_seq import get_LCP_length_for_2_seq
13
+
14
+ __all__ = [
15
+ 'get_LCP_length_for_2_seq'
16
+ ]
@@ -0,0 +1,44 @@
1
+ """
2
+ @Author : ęŽę¬£ę€”
3
+ @File : get_LCP_length_for_2_seq.py
4
+ @Time : 2025/5/20 11:25
5
+ @Desc :
6
+ """
7
+
8
+ from sequenzo.define_sequence_data import SequenceData
9
+
10
+ def get_LCP_length_for_2_seq(data1 = None, data2 = None, id1 = None, id2 = None):
11
+ # Check parameters is None
12
+ if data1 is None or data2 is None:
13
+ raise ValueError("[!] 'data1' and 'data2' must be set.")
14
+
15
+ if id1 is None or id2 is None:
16
+ raise ValueError("[!] 'id1' and 'id2' must be set.")
17
+
18
+ # Check type
19
+ if not isinstance(data1, SequenceData) or not isinstance(data2, SequenceData):
20
+ raise TypeError("[!] sequences must be sequence objects")
21
+
22
+ if not isinstance(id1, int) or not isinstance(id2, int):
23
+ raise TypeError("[!] 'id1' and 'id2' must be int.")
24
+
25
+ # Check id
26
+ if id1 > data1.seqdata.shape[0] or id2 > data2.seqdata.shape[0] or id1 < 0 or id2 < 0:
27
+ raise ValueError("[!] 'data1' or 'data2' has no such id.")
28
+
29
+ # Check states
30
+ if len(data1.states) != len(data2.states) or any(a != b for a, b in zip(data1.states, data2.states)):
31
+ raise ValueError("[!] The alphabet of both sequences have to be same.")
32
+
33
+ # Get the two sequences which are compared
34
+ seq1 = data1.seqdata.iloc[id1].to_numpy()
35
+ seq2 = data2.seqdata.iloc[id2].to_numpy()
36
+
37
+ boundary = min(len(seq1), len(seq2))
38
+
39
+ # Compute LCP length
40
+ length = 0
41
+ while seq1[length] == seq2[length] and length < boundary:
42
+ length += 1
43
+
44
+ return length
@@ -0,0 +1,23 @@
1
+ """
2
+ @Author : Yuqi Liang 梁彧焺
3
+ @File : __init__.py.py
4
+ @Time : 14/04/2025 21:40
5
+ @Desc :
6
+ """
7
+ from .idcd import create_idcd_sequence_from_csvs
8
+ from .cat import compute_cat_distance_matrix
9
+ from .dat import compute_dat_distance_matrix
10
+ from .combt import get_interactive_combined_typology, merge_sparse_combt_types
11
+ from .association_between_domains import get_association_between_domains
12
+ from .linked_polyad import linked_polyadic_sequence_analysis
13
+
14
+
15
+ __all__ = [
16
+ "create_idcd_sequence_from_csvs",
17
+ "compute_cat_distance_matrix",
18
+ "compute_dat_distance_matrix",
19
+ "get_interactive_combined_typology",
20
+ "merge_sparse_combt_types",
21
+ "get_association_between_domains",
22
+ "linked_polyadic_sequence_analysis"
23
+ ]
@@ -0,0 +1,311 @@
1
+ """
2
+ @Author : Yuqi Liang 梁彧焺
3
+ @File : association_between_domains.py
4
+ @Time : 14/04/2025 21:15
5
+ @Desc :
6
+ This module provides functionality for measuring the association
7
+ between multiple domains of sequence data. It is a Python implementation
8
+ of the R function `seqdomassoc`, and supports calculating statistical
9
+ measures such as Cramer's V and the likelihood ratio test (LRT)
10
+ between pairs of sequence dimensions.
11
+
12
+ Currently, only the "overall" comparison method is supported,
13
+ which compares sequences position by position. Support for
14
+ representative sequences and group medoids can be added in the future.
15
+
16
+ Key features:
17
+ - Pairwise association analysis between sequence domains
18
+ - Support for weighted sequences
19
+ - Cramer's V and LRT calculation with p-values
20
+ - Cross-tabulation matrix extraction
21
+ - Interpretation of association strength
22
+ - Optimized performance using NumPy and SciPy
23
+ """
24
+ import numpy as np
25
+ import pandas as pd
26
+ import scipy.stats as stats
27
+ import itertools
28
+ import sys
29
+
30
+
31
+ def _chi_cramers_v(xtab, chi2, df):
32
+ """
33
+ Computes Cramer's V and its associated p-value.
34
+
35
+ Parameters:
36
+ xtab (np.ndarray): Contingency table.
37
+ chi2 (float): Chi-square statistic.
38
+ df (int): Degrees of freedom.
39
+
40
+ Returns:
41
+ tuple: (Cramer's V value, p-value)
42
+ """
43
+ n = xtab.sum()
44
+ nr, nc = xtab.shape
45
+ min_dim = min(nr, nc)
46
+ V = np.sqrt(chi2 / (n * (min_dim - 1)))
47
+ p_val = 1 - stats.chi2.cdf(chi2, df)
48
+ return V, p_val
49
+
50
+
51
+ def _log_likelihood_ratio_test(xtab, struct_zero=True):
52
+ """
53
+ Computes the likelihood ratio test statistic for independence.
54
+
55
+ Parameters:
56
+ xtab (np.ndarray): Contingency table.
57
+ struct_zero (bool): Adjust degrees of freedom for structural zeros.
58
+
59
+ Returns:
60
+ tuple: (LRT statistic, degrees of freedom, p-value)
61
+ """
62
+ observed = xtab.copy()
63
+ row_totals = observed.sum(axis=1, keepdims=True)
64
+ col_totals = observed.sum(axis=0, keepdims=True)
65
+ total = observed.sum()
66
+ expected = row_totals @ col_totals / total
67
+
68
+ if struct_zero:
69
+ zero_mask = (observed == 0)
70
+ expected[zero_mask] = 1
71
+ observed[zero_mask] = 1
72
+
73
+ with np.errstate(divide='ignore', invalid='ignore'):
74
+ lrt_terms = np.where(observed > 0, observed * np.log(observed / expected), 0)
75
+ lrt_stat = 2 * np.sum(lrt_terms)
76
+ df = (observed.shape[0] - 1) * (observed.shape[1] - 1)
77
+ if struct_zero:
78
+ df -= np.sum(observed == 0)
79
+ df = max(df, 1)
80
+ p_val = 1 - stats.chi2.cdf(lrt_stat, df)
81
+ return lrt_stat, df, p_val
82
+
83
+
84
+ def _classify_strength(v):
85
+ """
86
+ Classifies the strength of association based on Cramer's V value.
87
+
88
+ Parameters:
89
+ v (float): Cramer's V statistic (0 to 1).
90
+
91
+ Returns:
92
+ str: Strength level as a descriptive label.
93
+ """
94
+ if v < 0.1:
95
+ print(
96
+ "\n[!] Note: A Cramer's V below 0.1 suggests no strong linear association. "
97
+ "However, non-linear dependencies may still exist and are not captured by Cramer's V."
98
+ )
99
+ return "None"
100
+ elif v < 0.3:
101
+ return "Weak"
102
+ elif v < 0.5:
103
+ return "Moderate"
104
+ else:
105
+ return "Strong"
106
+
107
+
108
+ def _pvalue_to_stars(p):
109
+ """
110
+ Converts a p-value to significance stars.
111
+ Parameters:
112
+ p (float): P-value
113
+ Returns:
114
+ str: Significance stars string
115
+ """
116
+ if pd.isna(p):
117
+ return ""
118
+ if p < 0.001:
119
+ return "***"
120
+ elif p < 0.01:
121
+ return "**"
122
+ elif p < 0.05:
123
+ return "*"
124
+ else:
125
+ return ""
126
+
127
+
128
+ def _explain_association(result_df):
129
+ """
130
+ Generates human-readable explanations from result DataFrame.
131
+
132
+ Parameters:
133
+ result_df (pd.DataFrame): Result table with Cramer's V values.
134
+
135
+ Returns:
136
+ list of str: Explanation strings.
137
+ """
138
+ explanations = []
139
+ for idx, row in result_df.iterrows():
140
+ v = row.get("v", np.nan)
141
+ if not pd.isna(v):
142
+ strength = _classify_strength(v)
143
+ text = f"{idx.replace('_with_', ' vs ')}: {strength} association (Cramer's V = {v:.3f})"
144
+ explanations.append(text)
145
+ return explanations
146
+
147
+
148
+ def _attach_explanations(result_df):
149
+ """
150
+ Adds interpretation column ('strength') to result DataFrame.
151
+
152
+ Parameters:
153
+ result_df (pd.DataFrame): Original result table.
154
+
155
+ Returns:
156
+ pd.DataFrame: Updated DataFrame with new columns.
157
+ """
158
+ result_df["strength"] = result_df["v"].apply(
159
+ lambda v: _classify_strength(v) if not pd.isna(v) else ""
160
+ )
161
+ return result_df
162
+
163
+
164
+ def _show_full_dataframe(df):
165
+ """
166
+ Print full DataFrame in full width (for terminal or notebook).
167
+ Handles environments without IPython gracefully.
168
+
169
+ Parameters:
170
+ df (pd.DataFrame): Original result table.
171
+
172
+ Returns:
173
+ pd.DataFrame: Full DataFrame.
174
+ """
175
+
176
+ # print(df.to_string(index=True))
177
+
178
+ with pd.option_context('display.max_columns', None, 'display.width', None, 'display.colheader_justify', 'left'):
179
+ if 'ipykernel' in sys.modules:
180
+ try:
181
+ from IPython.display import display
182
+ display(df)
183
+ except ImportError:
184
+ print(df.to_string(index=True))
185
+ else:
186
+ print(df.to_string(index=True))
187
+
188
+
189
+ def get_association_between_domains(seqdata_dom, assoc=("LRT", "V"), rep_method="overall",
190
+ wrange=None, p_value=True, struct_zero=True, cross_table=False,
191
+ with_missing=False, weighted=True, dnames=None,
192
+ explain=True):
193
+ """
194
+ Computes pairwise associations between multiple sequence domains using statistical tests.
195
+
196
+ Parameters:
197
+ seqdata_dom (list): List of SequenceData objects, one per domain.
198
+ assoc (tuple): Which association measures to compute: "LRT", "V", or both.
199
+ rep_method (str): Method to determine which sequences to compare (currently only "overall").
200
+ wrange (tuple or None): Not implemented yet (for time window comparison).
201
+ p_value (bool): Whether to compute p-values.
202
+ struct_zero (bool): Whether to treat structural zeros as affecting degrees of freedom.
203
+ cross_table (bool): If True, attach cross-tabulations to result attributes.
204
+ with_missing (bool): Whether to include rows/cols that only contain missing or void.
205
+ weighted (bool): Whether to apply sequence weights from the first domain.
206
+ dnames (list or None): Names of the domains. If None, will auto-name them as Dom1, Dom2, ...
207
+ explain (bool): If True, add interpretation columns and print explanations.
208
+
209
+ Returns:
210
+ pd.DataFrame: A result table (rows = domain pairs; columns = df, LRT, v, p-values, etc.),
211
+ possibly with `strength` and `explanation` columns when `explain=True`.
212
+ If `cross_table=True`, the cross tables are stored in the `.attrs` dictionary.
213
+ """
214
+ assoc = [a.upper() for a in assoc]
215
+ valid_assoc = {"LRT", "V"}
216
+ if not set(assoc).issubset(valid_assoc):
217
+ raise ValueError(f"assoc must be a subset of {valid_assoc}")
218
+
219
+ if len(seqdata_dom) < 2:
220
+ raise ValueError("seqdata_dom must be a list of at least two SequenceData objects")
221
+
222
+ if rep_method != "overall":
223
+ raise NotImplementedError("Only rep_method='overall' is supported in this version")
224
+
225
+ ndom = len(seqdata_dom)
226
+ if dnames is None:
227
+ dnames = [f"Dom{i + 1}" for i in range(ndom)]
228
+
229
+ cross_tables = {}
230
+ results = []
231
+ tabnames = []
232
+
233
+ for i, j in itertools.combinations(range(ndom), 2):
234
+ d1, d2 = seqdata_dom[i], seqdata_dom[j]
235
+ name1, name2 = dnames[i], dnames[j]
236
+ tabname = f"{name1} vs {name2}"
237
+ tabnames.append(tabname)
238
+
239
+ xtab = d1.get_xtabs(d2, weighted=weighted)
240
+
241
+ if not with_missing:
242
+ xtab = xtab[(xtab.sum(axis=1) > 0), :]
243
+ xtab = xtab[:, (xtab.sum(axis=0) > 0)]
244
+
245
+ res = {"df": None, "LRT": None, "p(LRT)": None, "v": None, "p(v)": None}
246
+
247
+ if "LRT" in assoc:
248
+ lrt, df, plrt = _log_likelihood_ratio_test(xtab, struct_zero)
249
+ res["LRT"] = lrt
250
+ res["df"] = df
251
+ if p_value:
252
+ res["p(LRT)"] = plrt
253
+
254
+ if "V" in assoc:
255
+ if res["df"] is None:
256
+ _, df, _ = _log_likelihood_ratio_test(xtab, struct_zero)
257
+ res["df"] = df
258
+ chi2 = stats.chi2_contingency(xtab, correction=False)[0]
259
+ v, pv = _chi_cramers_v(xtab, chi2, res["df"])
260
+ res["v"] = v
261
+ if p_value:
262
+ res["p(v)"] = pv
263
+
264
+ results.append(res)
265
+ if cross_table:
266
+ cross_tables[tabname] = xtab
267
+
268
+ colnames = ["df", "LRT", "p(LRT)", "v", "p(v)"]
269
+ result_matrix = np.full((len(results), len(colnames)), np.nan)
270
+ for idx, res in enumerate(results):
271
+ for col_idx, col in enumerate(colnames):
272
+ if res[col] is not None:
273
+ result_matrix[idx, col_idx] = res[col]
274
+
275
+ result_df = pd.DataFrame(result_matrix, columns=colnames, index=tabnames)
276
+
277
+ # Safely attach cross tables without causing Pandas printing issues
278
+ if cross_table:
279
+ # Only store *serializable* data to avoid print issues
280
+ result_df.attrs["cross.tables"] = {
281
+ k: xtab.tolist() for k, xtab in cross_tables.items()
282
+ }
283
+ else:
284
+ # Completely clear attrs to avoid ValueError when printing
285
+ result_df.attrs.clear()
286
+
287
+ # After computing result_df
288
+ if explain:
289
+ result_df = _attach_explanations(result_df)
290
+
291
+ result_df["p(LRT)"] = result_df["p(LRT)"].apply(
292
+ lambda p: f"{p:.3f} {_pvalue_to_stars(p)}".strip() if not pd.isna(p) else ""
293
+ )
294
+
295
+ # Convert p(v) to string with stars
296
+ result_df["p(v)"] = result_df["p(v)"].apply(
297
+ lambda p: f"{p:.3f} {_pvalue_to_stars(p)}".strip() if not pd.isna(p) else ""
298
+ )
299
+
300
+ print("\nšŸ“œ Full results table:")
301
+ _show_full_dataframe(result_df)
302
+
303
+ print("\nšŸ“˜ Column explanations:")
304
+ print(" - df : Degrees of freedom for the test (typically 1 for binary state sequences).")
305
+ print(" - LRT : Likelihood Ratio Test statistic (higher = stronger dependence).")
306
+ print(" - p(LRT) : p-value for LRT + significance stars: * (p<.05), ** (p<.01), *** (p<.001)")
307
+ print(" - v : Cramer's V statistic (0 to 1, measures association strength).")
308
+ print(" - p(v) : p-value for Cramer's V (based on chi-squared test) + significance stars: * (p<.05), ** (p<.01), *** (p<.001)")
309
+ print(" - strength : Qualitative label for association strength based on Cramer's V:")
310
+ print(" 0.00-0.09 -> None, 0.10-0.29 -> Weak, 0.30-0.49 -> Moderate, >=0.50 -> Strong")
311
+