sequenzo 0.1.21__cp312-cp312-macosx_10_9_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sequenzo might be problematic. Click here for more details.

Files changed (260) hide show
  1. sequenzo/__init__.py +240 -0
  2. sequenzo/big_data/__init__.py +12 -0
  3. sequenzo/big_data/clara/__init__.py +26 -0
  4. sequenzo/big_data/clara/clara.py +467 -0
  5. sequenzo/big_data/clara/utils/__init__.py +27 -0
  6. sequenzo/big_data/clara/utils/aggregatecases.py +92 -0
  7. sequenzo/big_data/clara/utils/davies_bouldin.py +91 -0
  8. sequenzo/big_data/clara/utils/get_weighted_diss.cpython-312-darwin.so +0 -0
  9. sequenzo/big_data/clara/utils/wfcmdd.py +205 -0
  10. sequenzo/big_data/clara/visualization.py +88 -0
  11. sequenzo/clustering/KMedoids.py +196 -0
  12. sequenzo/clustering/__init__.py +30 -0
  13. sequenzo/clustering/clustering_c_code.cpython-312-darwin.so +0 -0
  14. sequenzo/clustering/hierarchical_clustering.py +1380 -0
  15. sequenzo/clustering/src/KMedoid.cpp +262 -0
  16. sequenzo/clustering/src/PAM.cpp +236 -0
  17. sequenzo/clustering/src/PAMonce.cpp +234 -0
  18. sequenzo/clustering/src/cluster_quality.cpp +496 -0
  19. sequenzo/clustering/src/cluster_quality.h +128 -0
  20. sequenzo/clustering/src/cluster_quality_backup.cpp +570 -0
  21. sequenzo/clustering/src/module.cpp +228 -0
  22. sequenzo/clustering/src/weightedinertia.cpp +111 -0
  23. sequenzo/clustering/utils/__init__.py +27 -0
  24. sequenzo/clustering/utils/disscenter.py +122 -0
  25. sequenzo/data_preprocessing/__init__.py +20 -0
  26. sequenzo/data_preprocessing/helpers.py +256 -0
  27. sequenzo/datasets/__init__.py +41 -0
  28. sequenzo/datasets/biofam.csv +2001 -0
  29. sequenzo/datasets/biofam_child_domain.csv +2001 -0
  30. sequenzo/datasets/biofam_left_domain.csv +2001 -0
  31. sequenzo/datasets/biofam_married_domain.csv +2001 -0
  32. sequenzo/datasets/chinese_colonial_territories.csv +12 -0
  33. sequenzo/datasets/country_co2_emissions.csv +194 -0
  34. sequenzo/datasets/country_co2_emissions_global_deciles.csv +195 -0
  35. sequenzo/datasets/country_co2_emissions_global_quintiles.csv +195 -0
  36. sequenzo/datasets/country_co2_emissions_local_deciles.csv +195 -0
  37. sequenzo/datasets/country_co2_emissions_local_quintiles.csv +195 -0
  38. sequenzo/datasets/country_gdp_per_capita.csv +194 -0
  39. sequenzo/datasets/mvad.csv +713 -0
  40. sequenzo/datasets/pairfam_family.csv +1867 -0
  41. sequenzo/datasets/polyadic_samplec1.csv +61 -0
  42. sequenzo/datasets/polyadic_samplep1.csv +61 -0
  43. sequenzo/datasets/polyadic_seqc1.csv +61 -0
  44. sequenzo/datasets/polyadic_seqp1.csv +61 -0
  45. sequenzo/define_sequence_data.py +609 -0
  46. sequenzo/dissimilarity_measures/__init__.py +31 -0
  47. sequenzo/dissimilarity_measures/c_code.cpython-312-darwin.so +0 -0
  48. sequenzo/dissimilarity_measures/get_distance_matrix.py +702 -0
  49. sequenzo/dissimilarity_measures/get_substitution_cost_matrix.py +241 -0
  50. sequenzo/dissimilarity_measures/src/DHDdistance.cpp +148 -0
  51. sequenzo/dissimilarity_measures/src/LCPdistance.cpp +114 -0
  52. sequenzo/dissimilarity_measures/src/OMdistance.cpp +247 -0
  53. sequenzo/dissimilarity_measures/src/OMspellDistance.cpp +281 -0
  54. sequenzo/dissimilarity_measures/src/__init__.py +0 -0
  55. sequenzo/dissimilarity_measures/src/dist2matrix.cpp +63 -0
  56. sequenzo/dissimilarity_measures/src/dp_utils.h +160 -0
  57. sequenzo/dissimilarity_measures/src/module.cpp +34 -0
  58. sequenzo/dissimilarity_measures/src/setup.py +30 -0
  59. sequenzo/dissimilarity_measures/src/utils.h +25 -0
  60. sequenzo/dissimilarity_measures/src/xsimd/.github/cmake-test/main.cpp +6 -0
  61. sequenzo/dissimilarity_measures/src/xsimd/benchmark/main.cpp +159 -0
  62. sequenzo/dissimilarity_measures/src/xsimd/benchmark/xsimd_benchmark.hpp +565 -0
  63. sequenzo/dissimilarity_measures/src/xsimd/docs/source/conf.py +37 -0
  64. sequenzo/dissimilarity_measures/src/xsimd/examples/mandelbrot.cpp +330 -0
  65. sequenzo/dissimilarity_measures/src/xsimd/examples/pico_bench.hpp +246 -0
  66. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_arithmetic.hpp +266 -0
  67. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_complex.hpp +112 -0
  68. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_details.hpp +323 -0
  69. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_logical.hpp +218 -0
  70. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_math.hpp +2583 -0
  71. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_memory.hpp +880 -0
  72. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_rounding.hpp +72 -0
  73. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_swizzle.hpp +174 -0
  74. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_trigo.hpp +978 -0
  75. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx.hpp +1924 -0
  76. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx2.hpp +1144 -0
  77. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512bw.hpp +656 -0
  78. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512cd.hpp +28 -0
  79. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512dq.hpp +244 -0
  80. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512er.hpp +20 -0
  81. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512f.hpp +2650 -0
  82. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512ifma.hpp +20 -0
  83. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512pf.hpp +20 -0
  84. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vbmi.hpp +77 -0
  85. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vbmi2.hpp +131 -0
  86. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vnni_avx512bw.hpp +20 -0
  87. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vnni_avx512vbmi2.hpp +20 -0
  88. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avxvnni.hpp +20 -0
  89. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_common.hpp +24 -0
  90. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_common_fwd.hpp +77 -0
  91. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_constants.hpp +393 -0
  92. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_emulated.hpp +788 -0
  93. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma3_avx.hpp +93 -0
  94. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma3_avx2.hpp +46 -0
  95. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma3_sse.hpp +97 -0
  96. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma4.hpp +92 -0
  97. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_i8mm_neon64.hpp +17 -0
  98. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_isa.hpp +142 -0
  99. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_neon.hpp +3142 -0
  100. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_neon64.hpp +1543 -0
  101. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_rvv.hpp +1513 -0
  102. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_scalar.hpp +1260 -0
  103. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse2.hpp +2024 -0
  104. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse3.hpp +67 -0
  105. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse4_1.hpp +339 -0
  106. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse4_2.hpp +44 -0
  107. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_ssse3.hpp +186 -0
  108. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sve.hpp +1155 -0
  109. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_vsx.hpp +892 -0
  110. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_wasm.hpp +1780 -0
  111. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_arch.hpp +240 -0
  112. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_config.hpp +484 -0
  113. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_cpuid.hpp +269 -0
  114. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_inline.hpp +27 -0
  115. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/math/xsimd_rem_pio2.hpp +719 -0
  116. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/memory/xsimd_aligned_allocator.hpp +349 -0
  117. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/memory/xsimd_alignment.hpp +91 -0
  118. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_all_registers.hpp +55 -0
  119. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_api.hpp +2765 -0
  120. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx2_register.hpp +44 -0
  121. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512bw_register.hpp +51 -0
  122. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512cd_register.hpp +51 -0
  123. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512dq_register.hpp +51 -0
  124. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512er_register.hpp +51 -0
  125. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512f_register.hpp +77 -0
  126. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512ifma_register.hpp +51 -0
  127. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512pf_register.hpp +51 -0
  128. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512vbmi2_register.hpp +51 -0
  129. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512vbmi_register.hpp +51 -0
  130. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512vnni_avx512bw_register.hpp +54 -0
  131. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512vnni_avx512vbmi2_register.hpp +53 -0
  132. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx_register.hpp +64 -0
  133. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avxvnni_register.hpp +44 -0
  134. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_batch.hpp +1524 -0
  135. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_batch_constant.hpp +300 -0
  136. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_common_arch.hpp +47 -0
  137. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_emulated_register.hpp +80 -0
  138. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_fma3_avx2_register.hpp +50 -0
  139. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_fma3_avx_register.hpp +50 -0
  140. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_fma3_sse_register.hpp +50 -0
  141. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_fma4_register.hpp +50 -0
  142. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_i8mm_neon64_register.hpp +55 -0
  143. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_neon64_register.hpp +55 -0
  144. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_neon_register.hpp +154 -0
  145. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_register.hpp +94 -0
  146. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_rvv_register.hpp +506 -0
  147. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sse2_register.hpp +59 -0
  148. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sse3_register.hpp +49 -0
  149. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sse4_1_register.hpp +48 -0
  150. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sse4_2_register.hpp +48 -0
  151. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_ssse3_register.hpp +48 -0
  152. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sve_register.hpp +156 -0
  153. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_traits.hpp +337 -0
  154. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_utils.hpp +536 -0
  155. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_vsx_register.hpp +77 -0
  156. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_wasm_register.hpp +59 -0
  157. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/xsimd.hpp +75 -0
  158. sequenzo/dissimilarity_measures/src/xsimd/test/architectures/dummy.cpp +7 -0
  159. sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set.cpp +13 -0
  160. sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set_mean.cpp +24 -0
  161. sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set_mean_aligned.cpp +25 -0
  162. sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set_mean_arch_independent.cpp +28 -0
  163. sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set_mean_tag_dispatch.cpp +25 -0
  164. sequenzo/dissimilarity_measures/src/xsimd/test/doc/manipulating_abstract_batches.cpp +7 -0
  165. sequenzo/dissimilarity_measures/src/xsimd/test/doc/manipulating_parametric_batches.cpp +8 -0
  166. sequenzo/dissimilarity_measures/src/xsimd/test/doc/sum.hpp +31 -0
  167. sequenzo/dissimilarity_measures/src/xsimd/test/doc/sum_avx2.cpp +3 -0
  168. sequenzo/dissimilarity_measures/src/xsimd/test/doc/sum_sse2.cpp +3 -0
  169. sequenzo/dissimilarity_measures/src/xsimd/test/doc/writing_vectorized_code.cpp +11 -0
  170. sequenzo/dissimilarity_measures/src/xsimd/test/main.cpp +31 -0
  171. sequenzo/dissimilarity_measures/src/xsimd/test/test_api.cpp +230 -0
  172. sequenzo/dissimilarity_measures/src/xsimd/test/test_arch.cpp +217 -0
  173. sequenzo/dissimilarity_measures/src/xsimd/test/test_basic_math.cpp +183 -0
  174. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch.cpp +1049 -0
  175. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_bool.cpp +508 -0
  176. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_cast.cpp +409 -0
  177. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_complex.cpp +712 -0
  178. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_constant.cpp +286 -0
  179. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_float.cpp +141 -0
  180. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_int.cpp +365 -0
  181. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_manip.cpp +308 -0
  182. sequenzo/dissimilarity_measures/src/xsimd/test/test_bitwise_cast.cpp +222 -0
  183. sequenzo/dissimilarity_measures/src/xsimd/test/test_complex_exponential.cpp +226 -0
  184. sequenzo/dissimilarity_measures/src/xsimd/test/test_complex_hyperbolic.cpp +183 -0
  185. sequenzo/dissimilarity_measures/src/xsimd/test/test_complex_power.cpp +265 -0
  186. sequenzo/dissimilarity_measures/src/xsimd/test/test_complex_trigonometric.cpp +236 -0
  187. sequenzo/dissimilarity_measures/src/xsimd/test/test_conversion.cpp +248 -0
  188. sequenzo/dissimilarity_measures/src/xsimd/test/test_custom_default_arch.cpp +28 -0
  189. sequenzo/dissimilarity_measures/src/xsimd/test/test_error_gamma.cpp +170 -0
  190. sequenzo/dissimilarity_measures/src/xsimd/test/test_explicit_batch_instantiation.cpp +32 -0
  191. sequenzo/dissimilarity_measures/src/xsimd/test/test_exponential.cpp +202 -0
  192. sequenzo/dissimilarity_measures/src/xsimd/test/test_extract_pair.cpp +92 -0
  193. sequenzo/dissimilarity_measures/src/xsimd/test/test_fp_manipulation.cpp +77 -0
  194. sequenzo/dissimilarity_measures/src/xsimd/test/test_gnu_source.cpp +30 -0
  195. sequenzo/dissimilarity_measures/src/xsimd/test/test_hyperbolic.cpp +167 -0
  196. sequenzo/dissimilarity_measures/src/xsimd/test/test_load_store.cpp +304 -0
  197. sequenzo/dissimilarity_measures/src/xsimd/test/test_memory.cpp +61 -0
  198. sequenzo/dissimilarity_measures/src/xsimd/test/test_poly_evaluation.cpp +64 -0
  199. sequenzo/dissimilarity_measures/src/xsimd/test/test_power.cpp +184 -0
  200. sequenzo/dissimilarity_measures/src/xsimd/test/test_rounding.cpp +199 -0
  201. sequenzo/dissimilarity_measures/src/xsimd/test/test_select.cpp +101 -0
  202. sequenzo/dissimilarity_measures/src/xsimd/test/test_shuffle.cpp +760 -0
  203. sequenzo/dissimilarity_measures/src/xsimd/test/test_sum.cpp +4 -0
  204. sequenzo/dissimilarity_measures/src/xsimd/test/test_sum.hpp +34 -0
  205. sequenzo/dissimilarity_measures/src/xsimd/test/test_traits.cpp +172 -0
  206. sequenzo/dissimilarity_measures/src/xsimd/test/test_trigonometric.cpp +208 -0
  207. sequenzo/dissimilarity_measures/src/xsimd/test/test_utils.hpp +611 -0
  208. sequenzo/dissimilarity_measures/src/xsimd/test/test_wasm/test_wasm_playwright.py +123 -0
  209. sequenzo/dissimilarity_measures/src/xsimd/test/test_xsimd_api.cpp +1460 -0
  210. sequenzo/dissimilarity_measures/utils/__init__.py +16 -0
  211. sequenzo/dissimilarity_measures/utils/get_LCP_length_for_2_seq.py +44 -0
  212. sequenzo/dissimilarity_measures/utils/get_sm_trate_substitution_cost_matrix.cpython-312-darwin.so +0 -0
  213. sequenzo/dissimilarity_measures/utils/seqconc.cpython-312-darwin.so +0 -0
  214. sequenzo/dissimilarity_measures/utils/seqdss.cpython-312-darwin.so +0 -0
  215. sequenzo/dissimilarity_measures/utils/seqdur.cpython-312-darwin.so +0 -0
  216. sequenzo/dissimilarity_measures/utils/seqlength.cpython-312-darwin.so +0 -0
  217. sequenzo/multidomain/__init__.py +23 -0
  218. sequenzo/multidomain/association_between_domains.py +311 -0
  219. sequenzo/multidomain/cat.py +431 -0
  220. sequenzo/multidomain/combt.py +519 -0
  221. sequenzo/multidomain/dat.py +89 -0
  222. sequenzo/multidomain/idcd.py +139 -0
  223. sequenzo/multidomain/linked_polyad.py +292 -0
  224. sequenzo/openmp_setup.py +233 -0
  225. sequenzo/prefix_tree/__init__.py +43 -0
  226. sequenzo/prefix_tree/individual_level_indicators.py +1274 -0
  227. sequenzo/prefix_tree/system_level_indicators.py +465 -0
  228. sequenzo/prefix_tree/utils.py +54 -0
  229. sequenzo/sequence_characteristics/__init__.py +40 -0
  230. sequenzo/sequence_characteristics/complexity_index.py +49 -0
  231. sequenzo/sequence_characteristics/overall_cross_sectional_entropy.py +220 -0
  232. sequenzo/sequence_characteristics/plot_characteristics.py +593 -0
  233. sequenzo/sequence_characteristics/simple_characteristics.py +311 -0
  234. sequenzo/sequence_characteristics/state_frequencies_and_entropy_per_sequence.py +39 -0
  235. sequenzo/sequence_characteristics/turbulence.py +155 -0
  236. sequenzo/sequence_characteristics/variance_of_spell_durations.py +86 -0
  237. sequenzo/sequence_characteristics/within_sequence_entropy.py +43 -0
  238. sequenzo/suffix_tree/__init__.py +48 -0
  239. sequenzo/suffix_tree/individual_level_indicators.py +1638 -0
  240. sequenzo/suffix_tree/system_level_indicators.py +456 -0
  241. sequenzo/suffix_tree/utils.py +56 -0
  242. sequenzo/visualization/__init__.py +29 -0
  243. sequenzo/visualization/plot_mean_time.py +194 -0
  244. sequenzo/visualization/plot_modal_state.py +276 -0
  245. sequenzo/visualization/plot_most_frequent_sequences.py +147 -0
  246. sequenzo/visualization/plot_relative_frequency.py +404 -0
  247. sequenzo/visualization/plot_sequence_index.py +937 -0
  248. sequenzo/visualization/plot_single_medoid.py +153 -0
  249. sequenzo/visualization/plot_state_distribution.py +613 -0
  250. sequenzo/visualization/plot_transition_matrix.py +190 -0
  251. sequenzo/visualization/utils/__init__.py +23 -0
  252. sequenzo/visualization/utils/utils.py +310 -0
  253. sequenzo/with_event_history_analysis/__init__.py +35 -0
  254. sequenzo/with_event_history_analysis/sequence_analysis_multi_state_model.py +850 -0
  255. sequenzo/with_event_history_analysis/sequence_history_analysis.py +283 -0
  256. sequenzo-0.1.21.dist-info/METADATA +308 -0
  257. sequenzo-0.1.21.dist-info/RECORD +254 -0
  258. sequenzo-0.1.21.dist-info/WHEEL +5 -0
  259. sequenzo-0.1.21.dist-info/licenses/LICENSE +28 -0
  260. sequenzo-0.1.21.dist-info/top_level.txt +1 -0
@@ -0,0 +1,262 @@
1
+ #include <pybind11/pybind11.h>
2
+ #include <pybind11/numpy.h>
3
+ #include <vector>
4
+ #include <iostream>
5
+ #ifdef _OPENMP
6
+ #include <omp.h>
7
+ #endif
8
+ #include <random>
9
+ #include <cfloat>
10
+ #include <climits>
11
+ #include <cmath>
12
+
13
+ using namespace std;
14
+ namespace py = pybind11;
15
+
16
+ class KMedoid {
17
+ protected:
18
+ int nelements; // Number of elements (data points)
19
+ int nclusters; // Number of clusters (medoids)
20
+ int npass; // Maximum number of iterations
21
+
22
+ vector<int> tclusterid; // Temporary cluster assignment for each element
23
+ vector<int> saved; // Saved cluster assignments to check for convergence
24
+ vector<int> clusterMembership; // Cluster membership indices (flattened 2D: nclusters x nelements)
25
+ vector<int> clusterSize; // Size of each cluster
26
+
27
+ py::array_t<double> diss; // Distance matrix (2D numpy array)
28
+ py::array_t<int> centroids; // Medoid indices (1D numpy array)
29
+ py::array_t<double> weights; // Weights of elements (1D numpy array)
30
+
31
+ public:
32
+ // Constructor initializes members and allocates necessary storage
33
+ KMedoid(int nelements, py::array_t<double> diss,
34
+ py::array_t<int> centroids, int npass,
35
+ py::array_t<double> weights)
36
+ : nelements(nelements),
37
+ diss(diss),
38
+ centroids(centroids),
39
+ npass(npass),
40
+ weights(weights),
41
+ nclusters(static_cast<int>(centroids.size())) {
42
+ py::print("[>] Starting KMedoids...");
43
+
44
+ tclusterid.resize(nelements);
45
+ saved.resize(nelements);
46
+ clusterMembership.resize(nelements * nclusters);
47
+ clusterSize.resize(nclusters);
48
+ fill(clusterSize.begin(), clusterSize.end(), 0);
49
+ }
50
+
51
+ // Initialize medoids using a k-means++ style seeding method for better starting points
52
+ void init_medoids() {
53
+ auto ptr_diss = diss.unchecked<2>();
54
+ auto ptr_centroids = centroids.mutable_unchecked<1>();
55
+ auto ptr_weights = weights.unchecked<1>();
56
+
57
+ vector<int> selected; // Indices of selected medoids
58
+ vector<double> min_dists(nelements, DBL_MAX); // Minimum distance to selected medoids
59
+
60
+ mt19937 rng(random_device{}()); // 3. Random number generator initialization (non-deterministic seed)
61
+ uniform_int_distribution<> dist(0, nelements - 1);
62
+
63
+ // Randomly choose the first medoid
64
+ int first = dist(rng);
65
+ selected.push_back(first);
66
+ ptr_centroids[0] = first;
67
+
68
+ for (int k = 1; k < nclusters; ++k) {
69
+ int last = selected.back();
70
+
71
+ // Update min_dists using only the last selected medoid
72
+ for (int i = 0; i < nelements; ++i) {
73
+ double d = ptr_diss(i, last);
74
+ if (d < min_dists[i]) min_dists[i] = d;
75
+ }
76
+
77
+ // Compute weighted total distance
78
+ double total_weight = 0.0;
79
+ for (int i = 0; i < nelements; ++i) {
80
+ total_weight += min_dists[i] * ptr_weights[i];
81
+ }
82
+
83
+ // Handle degenerate case
84
+ if (total_weight <= 1e-10) {
85
+ int fallback = dist(rng);
86
+ selected.push_back(fallback);
87
+ ptr_centroids[k] = fallback;
88
+ continue;
89
+ }
90
+
91
+ // Select next medoid using weighted probability
92
+ uniform_real_distribution<double> rdist(0, total_weight);
93
+ double r = rdist(rng), accumulator = 0.0;
94
+ int next = -1;
95
+
96
+ for (int i = 0; i < nelements; ++i) {
97
+ accumulator += min_dists[i] * ptr_weights[i];
98
+ if (accumulator >= r) {
99
+ next = i;
100
+ break;
101
+ }
102
+ }
103
+
104
+ if (next == -1) next = dist(rng); // fallback again just in case
105
+ selected.push_back(next);
106
+ ptr_centroids[k] = next;
107
+ }
108
+ }
109
+
110
+
111
+ // Update medoids by selecting the element minimizing the sum of weighted distances to all other elements in the cluster
112
+ void getclustermedoids() {
113
+ auto ptr_weights = weights.unchecked<1>();
114
+ auto ptr_diss = diss.unchecked<2>();
115
+ auto ptr_centroids = centroids.mutable_unchecked<1>();
116
+
117
+ #pragma omp parallel for schedule(dynamic)
118
+ for (int k = 0; k < nclusters; ++k) {
119
+ int size = clusterSize[k];
120
+ double best = DBL_MAX;
121
+ int bestID = 0;
122
+
123
+ // Iterate over all members of cluster k to find the best medoid
124
+ for (int i = 0; i < size; ++i) {
125
+ int ii = clusterMembership[k * nelements + i];
126
+ double current = 0;
127
+
128
+ // Sum weighted distances from candidate medoid ii to all other members
129
+ for (int j = 0; j < size; ++j) {
130
+ if (i == j) continue;
131
+ int jj = clusterMembership[k * nelements + j];
132
+ current += ptr_weights[jj] * ptr_diss(ii, jj);
133
+ if (current >= best) break; // Early stop if worse than current best
134
+ }
135
+
136
+ if (current < best) {
137
+ best = current;
138
+ bestID = ii;
139
+ }
140
+ }
141
+
142
+ ptr_centroids[k] = bestID; // Assign best medoid for cluster k
143
+ }
144
+ }
145
+
146
+ // Main loop to run the clustering process until convergence or max iterations
147
+ py::array_t<int> runclusterloop() {
148
+ auto ptr_weights = weights.unchecked<1>();
149
+ auto ptr_diss = diss.unchecked<2>();
150
+ auto ptr_centroids = centroids.mutable_unchecked<1>();
151
+
152
+ double total = DBL_MAX;
153
+ int counter = 0;
154
+ int period = 10; // Frequency to save cluster assignments for convergence checking
155
+
156
+ while (counter <= npass) {
157
+ PyErr_CheckSignals(); // Allow Python interruption
158
+
159
+ double prev = total;
160
+ total = 0;
161
+
162
+ if (counter > 0) getclustermedoids();
163
+
164
+ // Periodically save cluster assignment to check for convergence
165
+ if (counter % period == 0) {
166
+ for (int i = 0; i < nelements; ++i)
167
+ saved[i] = tclusterid[i];
168
+
169
+ if (period < INT_MAX / 2) period *= 2; // Exponentially increase period
170
+ }
171
+
172
+ counter++;
173
+
174
+ vector<vector<int>> localMembers(nclusters);
175
+
176
+ // Parallel assignment of elements to closest medoid
177
+ #pragma omp parallel
178
+ {
179
+ vector<vector<int>> threadLocal(nclusters);
180
+
181
+ #pragma omp for reduction(+:total) schedule(static)
182
+ for (int i = 0; i < nelements; ++i) {
183
+ double dist = DBL_MAX;
184
+ int assign = 0;
185
+
186
+ // Find nearest medoid
187
+ for (int k = 0; k < nclusters; ++k) {
188
+ int j = ptr_centroids[k];
189
+ double tdistance = ptr_diss(i, j);
190
+
191
+ if (tdistance < dist) {
192
+ dist = tdistance;
193
+ assign = k;
194
+ }
195
+ }
196
+
197
+ tclusterid[i] = assign;
198
+ threadLocal[assign].push_back(i);
199
+ total += ptr_weights[i] * dist;
200
+ }
201
+
202
+ // Merge thread local cluster memberships into shared vector safely
203
+ #pragma omp critical
204
+ {
205
+ for (int k = 0; k < nclusters; ++k) {
206
+ localMembers[k].insert(
207
+ localMembers[k].end(),
208
+ threadLocal[k].begin(),
209
+ threadLocal[k].end()
210
+ );
211
+ }
212
+ }
213
+ }
214
+
215
+ // Update cluster membership and sizes
216
+ for (int k = 0; k < nclusters; ++k) {
217
+ clusterSize[k] = static_cast<int>(localMembers[k].size());
218
+
219
+ // If a cluster is empty, reinitialize medoids and restart
220
+ if (clusterSize[k] == 0) {
221
+ init_medoids();
222
+ counter = 0;
223
+ break;
224
+ }
225
+
226
+ for (int i = 0; i < clusterSize[k]; ++i) {
227
+ clusterMembership[k * nelements + i] = localMembers[k][i];
228
+ }
229
+ }
230
+
231
+ // Convergence check based on total cost change
232
+ if (abs(total - prev) < 1e-6) break;
233
+
234
+ // Check if cluster assignments are unchanged from last saved
235
+ bool same = true;
236
+ for (int i = 0; i < nelements; ++i) {
237
+ if (saved[i] != tclusterid[i]) {
238
+ same = false;
239
+ break;
240
+ }
241
+ }
242
+
243
+ if (same) break;
244
+ }
245
+
246
+ return getResultArray();
247
+ }
248
+
249
+ // Construct and return the final array of medoid assignments for each element
250
+ py::array_t<int> getResultArray() const {
251
+ py::array_t<int> result(nelements);
252
+ auto results = result.mutable_unchecked<1>();
253
+ auto centroid = centroids.unchecked<1>();
254
+
255
+ #pragma omp parallel for schedule(static)
256
+ for (int i = 0; i < nelements; ++i) {
257
+ results(i) = centroid(tclusterid[i]);
258
+ }
259
+
260
+ return result;
261
+ }
262
+ };
@@ -0,0 +1,236 @@
1
+ #include <pybind11/pybind11.h>
2
+ #include <pybind11/numpy.h>
3
+ #include <vector>
4
+ #include <iostream>
5
+ #ifdef _OPENMP
6
+ #include <omp.h>
7
+ #endif
8
+ #include <cfloat>
9
+ #include <climits>
10
+ #include <cmath>
11
+ #define WEIGHTED_CLUST_TOL -1e-10
12
+ using namespace std;
13
+ namespace py = pybind11;
14
+
15
+ class PAM {
16
+ public:
17
+ // Constructor: Initializes the PAM algorithm with required parameters.
18
+ PAM(int nelements, py::array_t<double> diss,
19
+ py::array_t<int> centroids, int npass, py::array_t<double> weights) {
20
+ py::print("[>] Starting Partitioning Around Medoids (PAM)...");
21
+
22
+ try {
23
+ this->nelements = nelements;
24
+ this->centroids = centroids;
25
+ this->npass = npass;
26
+ this->weights = weights;
27
+ this->diss = diss;
28
+ this->maxdist = 0.0;
29
+ this->nclusters = static_cast<int>(centroids.size()); // Number of clusters
30
+ this->tclusterid.resize(nelements); // Initialize cluster id vector
31
+ this->computeMaxDist(); // Compute the maximum distance for use later
32
+
33
+ // Initialize dysma and dysmb with maxdist
34
+ dysma.resize(nelements, maxdist);
35
+ dysmb.resize(nelements, maxdist);
36
+ } catch (const exception &e) {
37
+ py::print("Error: ", e.what()); // Error handling
38
+ }
39
+ }
40
+
41
+ // Computes the maximum distance between any two elements in the distance matrix.
42
+ void computeMaxDist() {
43
+ auto ptr_diss = diss.unchecked<2>();
44
+
45
+ // The manual array collects the thread maxima
46
+ int nthreads = 1;
47
+ #ifdef _OPENMP
48
+ #pragma omp parallel
49
+ {
50
+ #pragma omp single
51
+ nthreads = omp_get_num_threads();
52
+ }
53
+ #endif
54
+
55
+ std::vector<double> thread_max(nthreads, 0.0);
56
+
57
+ #ifdef _OPENMP
58
+ #pragma omp parallel
59
+ {
60
+ int tid = omp_get_thread_num();
61
+ #else
62
+ {
63
+ int tid = 0;
64
+ #endif
65
+ double local = 0.0;
66
+
67
+ #ifdef _OPENMP
68
+ #pragma omp for schedule(static)
69
+ #endif
70
+ for (int i = 0; i < nelements; ++i) {
71
+ for (int j = i + 1; j < nelements; ++j) {
72
+ double val = ptr_diss(i, j);
73
+ if (val > local) local = val;
74
+ }
75
+ }
76
+
77
+ thread_max[tid] = local;
78
+ }
79
+
80
+ // Final reduction (serial, fast)
81
+ double max_val = 0.0;
82
+ for (double val : thread_max) {
83
+ if (val > max_val) max_val = val;
84
+ }
85
+
86
+ maxdist = 1.1 * max_val + 1.0;
87
+ }
88
+
89
+
90
+ // Runs the PAM clustering loop, repeatedly updating centroids and assigning elements to clusters.
91
+ py::array_t<int> runclusterloop() {
92
+ auto ptr_weights = weights.unchecked<1>(); // Access to the weights
93
+ auto ptr_diss = diss.unchecked<2>(); // Access to the distance matrix
94
+ auto ptr_centroids = centroids.mutable_unchecked<1>(); // Access to the centroids
95
+
96
+ double dzsky;
97
+ int ipass = 0;
98
+ int hbest = -1;
99
+ int nbest = -1;
100
+ int k, icluster, h;
101
+ double total = -1.0;
102
+ int nclusters = static_cast<int>(centroids.size());
103
+
104
+ do {
105
+ // Parallel loop to update dysma and dysmb based on current centroids
106
+ #pragma omp parallel for schedule(static)
107
+ for (int i = 0; i < nelements; i++) {
108
+ dysmb[i] = maxdist;
109
+ dysma[i] = maxdist;
110
+
111
+ // Update dysma and dysmb values based on the distance to centroids
112
+ for (int k = 0; k < nclusters; k++) {
113
+ int icluster = ptr_centroids(k);
114
+ double dist = ptr_diss(i, icluster);
115
+
116
+ if (dysma[i] > dist) {
117
+ dysmb[i] = dysma[i];
118
+ dysma[i] = dist;
119
+ tclusterid[i] = k; // Assign element to the current cluster
120
+ } else if (dysmb[i] > dist) {
121
+ dysmb[i] = dist;
122
+ }
123
+ }
124
+ }
125
+
126
+ // If total hasn't been calculated yet, calculate it
127
+ if (total < 0) {
128
+ total = 0;
129
+
130
+ // Parallel loop to calculate the total weighted distance
131
+ #pragma omp parallel for reduction(+:total) schedule(static)
132
+ for (int i = 0; i < nelements; i++) {
133
+ total += ptr_weights[i] * dysma[i];
134
+ }
135
+ }
136
+
137
+ dzsky = 1; // Initialize dzsky to 1 for the change cost comparison
138
+
139
+ // Parallel loop to compute the cost of switching elements' medoids
140
+ #pragma omp parallel for schedule(dynamic)
141
+ for (int h = 0; h < nelements; h++) {
142
+ bool is_current_medoid = false;
143
+ for (int k = 0; k < nclusters; k++) {
144
+ if (h == ptr_centroids[k]) {
145
+ is_current_medoid = true;
146
+ break;
147
+ }
148
+ }
149
+
150
+ if (is_current_medoid) // Skip if the element is already a medoid
151
+ continue;
152
+
153
+ double local_dzsky = dzsky;
154
+ int local_hbest = -1;
155
+ int local_nbest = -1;
156
+
157
+ // Evaluate the change cost for switching each element with a new medoid
158
+ for (int k = 0; k < nclusters; k++) {
159
+ int i = ptr_centroids[k];
160
+ double dz = 0.0;
161
+
162
+ for (int j = 0; j < nelements; j++) {
163
+ if (ptr_diss(i, j) == dysma[j]) {
164
+ double small = (dysmb[j] > ptr_diss(h, j)) ? ptr_diss(h, j) : dysmb[j];
165
+ dz += ptr_weights[j] * (-dysma[j] + small); // Update change cost
166
+ } else if (ptr_diss(h, j) < dysma[j]) {
167
+ dz += ptr_weights[j] * (-dysma[j] + ptr_diss(h, j));
168
+ }
169
+ }
170
+
171
+ // Keep track of the best change
172
+ if (dz < local_dzsky) {
173
+ local_dzsky = dz;
174
+ local_hbest = h;
175
+ local_nbest = i;
176
+ }
177
+ }
178
+
179
+ // Critical section to update dzsky with the best change
180
+ #pragma omp critical
181
+ {
182
+ if (local_dzsky < dzsky) {
183
+ dzsky = local_dzsky;
184
+ hbest = local_hbest;
185
+ nbest = local_nbest;
186
+ }
187
+ }
188
+ }
189
+
190
+ // If there was an improvement in the total cost, update the centroids
191
+ if (dzsky < 0) {
192
+ for (k = 0; k < nclusters; k++) {
193
+ if (ptr_centroids[k] == nbest) {
194
+ ptr_centroids[k] = hbest; // Swap the medoids
195
+ }
196
+ }
197
+
198
+ total += dzsky; // Update the total cost
199
+ }
200
+
201
+ ipass++; // Increment pass count
202
+ if (ipass >= npass) {
203
+ break; // Break if max passes reached
204
+ }
205
+ } while (dzsky < 0); // Repeat until no improvement
206
+
207
+ return getResultArray(); // Return the final cluster assignments
208
+ }
209
+
210
+ // Returns an array of cluster assignments for each element
211
+ py::array_t<int> getResultArray() const {
212
+ py::array_t<int> result(nelements);
213
+ auto results = result.mutable_unchecked<1>();
214
+ auto centroid = centroids.unchecked<1>();
215
+
216
+ #pragma omp parallel for schedule(static)
217
+ for (int i = 0; i < nelements; ++i) {
218
+ results(i) = centroid(tclusterid[i]);
219
+ }
220
+
221
+ return result;
222
+ }
223
+
224
+
225
+ protected:
226
+ int nelements; // Number of elements to cluster
227
+ py::array_t<double> diss; // Distance matrix
228
+ py::array_t<int> centroids; // Initial centroids
229
+ int npass; // Number of passes for the algorithm
230
+ py::array_t<double> weights; // Element weights
231
+ vector<int> tclusterid; // Cluster IDs for each element
232
+ vector<double> dysmb; // Temporary variable for distances
233
+ int nclusters; // Number of clusters
234
+ double maxdist; // Maximum distance value
235
+ vector<double> dysma; // Temporary variable for distances
236
+ };