sequenzo 0.1.31__cp310-cp310-macosx_10_9_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (299) hide show
  1. _sequenzo_fastcluster.cpython-310-darwin.so +0 -0
  2. sequenzo/__init__.py +349 -0
  3. sequenzo/big_data/__init__.py +12 -0
  4. sequenzo/big_data/clara/__init__.py +26 -0
  5. sequenzo/big_data/clara/clara.py +476 -0
  6. sequenzo/big_data/clara/utils/__init__.py +27 -0
  7. sequenzo/big_data/clara/utils/aggregatecases.py +92 -0
  8. sequenzo/big_data/clara/utils/davies_bouldin.py +91 -0
  9. sequenzo/big_data/clara/utils/get_weighted_diss.cpython-310-darwin.so +0 -0
  10. sequenzo/big_data/clara/utils/wfcmdd.py +205 -0
  11. sequenzo/big_data/clara/visualization.py +88 -0
  12. sequenzo/clustering/KMedoids.py +178 -0
  13. sequenzo/clustering/__init__.py +30 -0
  14. sequenzo/clustering/clustering_c_code.cpython-310-darwin.so +0 -0
  15. sequenzo/clustering/hierarchical_clustering.py +1256 -0
  16. sequenzo/clustering/sequenzo_fastcluster/fastcluster.py +495 -0
  17. sequenzo/clustering/sequenzo_fastcluster/src/fastcluster.cpp +1877 -0
  18. sequenzo/clustering/sequenzo_fastcluster/src/fastcluster_python.cpp +1264 -0
  19. sequenzo/clustering/src/KMedoid.cpp +263 -0
  20. sequenzo/clustering/src/PAM.cpp +237 -0
  21. sequenzo/clustering/src/PAMonce.cpp +265 -0
  22. sequenzo/clustering/src/cluster_quality.cpp +496 -0
  23. sequenzo/clustering/src/cluster_quality.h +128 -0
  24. sequenzo/clustering/src/cluster_quality_backup.cpp +570 -0
  25. sequenzo/clustering/src/module.cpp +228 -0
  26. sequenzo/clustering/src/weightedinertia.cpp +111 -0
  27. sequenzo/clustering/utils/__init__.py +27 -0
  28. sequenzo/clustering/utils/disscenter.py +122 -0
  29. sequenzo/data_preprocessing/__init__.py +22 -0
  30. sequenzo/data_preprocessing/helpers.py +303 -0
  31. sequenzo/datasets/__init__.py +41 -0
  32. sequenzo/datasets/biofam.csv +2001 -0
  33. sequenzo/datasets/biofam_child_domain.csv +2001 -0
  34. sequenzo/datasets/biofam_left_domain.csv +2001 -0
  35. sequenzo/datasets/biofam_married_domain.csv +2001 -0
  36. sequenzo/datasets/chinese_colonial_territories.csv +12 -0
  37. sequenzo/datasets/country_co2_emissions.csv +194 -0
  38. sequenzo/datasets/country_co2_emissions_global_deciles.csv +195 -0
  39. sequenzo/datasets/country_co2_emissions_global_quintiles.csv +195 -0
  40. sequenzo/datasets/country_co2_emissions_local_deciles.csv +195 -0
  41. sequenzo/datasets/country_co2_emissions_local_quintiles.csv +195 -0
  42. sequenzo/datasets/country_gdp_per_capita.csv +194 -0
  43. sequenzo/datasets/dyadic_children.csv +61 -0
  44. sequenzo/datasets/dyadic_parents.csv +61 -0
  45. sequenzo/datasets/mvad.csv +713 -0
  46. sequenzo/datasets/pairfam_activity_by_month.csv +1028 -0
  47. sequenzo/datasets/pairfam_activity_by_year.csv +1028 -0
  48. sequenzo/datasets/pairfam_family_by_month.csv +1028 -0
  49. sequenzo/datasets/pairfam_family_by_year.csv +1028 -0
  50. sequenzo/datasets/political_science_aid_shock.csv +166 -0
  51. sequenzo/datasets/political_science_donor_fragmentation.csv +157 -0
  52. sequenzo/define_sequence_data.py +1400 -0
  53. sequenzo/dissimilarity_measures/__init__.py +31 -0
  54. sequenzo/dissimilarity_measures/c_code.cpython-310-darwin.so +0 -0
  55. sequenzo/dissimilarity_measures/get_distance_matrix.py +762 -0
  56. sequenzo/dissimilarity_measures/get_substitution_cost_matrix.py +246 -0
  57. sequenzo/dissimilarity_measures/src/DHDdistance.cpp +148 -0
  58. sequenzo/dissimilarity_measures/src/LCPdistance.cpp +114 -0
  59. sequenzo/dissimilarity_measures/src/LCPspellDistance.cpp +215 -0
  60. sequenzo/dissimilarity_measures/src/OMdistance.cpp +247 -0
  61. sequenzo/dissimilarity_measures/src/OMspellDistance.cpp +281 -0
  62. sequenzo/dissimilarity_measures/src/__init__.py +0 -0
  63. sequenzo/dissimilarity_measures/src/dist2matrix.cpp +63 -0
  64. sequenzo/dissimilarity_measures/src/dp_utils.h +160 -0
  65. sequenzo/dissimilarity_measures/src/module.cpp +40 -0
  66. sequenzo/dissimilarity_measures/src/setup.py +30 -0
  67. sequenzo/dissimilarity_measures/src/utils.h +25 -0
  68. sequenzo/dissimilarity_measures/src/xsimd/.github/cmake-test/main.cpp +6 -0
  69. sequenzo/dissimilarity_measures/src/xsimd/benchmark/main.cpp +159 -0
  70. sequenzo/dissimilarity_measures/src/xsimd/benchmark/xsimd_benchmark.hpp +565 -0
  71. sequenzo/dissimilarity_measures/src/xsimd/docs/source/conf.py +37 -0
  72. sequenzo/dissimilarity_measures/src/xsimd/examples/mandelbrot.cpp +330 -0
  73. sequenzo/dissimilarity_measures/src/xsimd/examples/pico_bench.hpp +246 -0
  74. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_arithmetic.hpp +266 -0
  75. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_complex.hpp +112 -0
  76. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_details.hpp +323 -0
  77. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_logical.hpp +218 -0
  78. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_math.hpp +2583 -0
  79. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_memory.hpp +880 -0
  80. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_rounding.hpp +72 -0
  81. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_swizzle.hpp +174 -0
  82. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_trigo.hpp +978 -0
  83. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx.hpp +1924 -0
  84. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx2.hpp +1144 -0
  85. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512bw.hpp +656 -0
  86. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512cd.hpp +28 -0
  87. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512dq.hpp +244 -0
  88. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512er.hpp +20 -0
  89. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512f.hpp +2650 -0
  90. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512ifma.hpp +20 -0
  91. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512pf.hpp +20 -0
  92. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vbmi.hpp +77 -0
  93. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vbmi2.hpp +131 -0
  94. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vnni_avx512bw.hpp +20 -0
  95. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vnni_avx512vbmi2.hpp +20 -0
  96. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avxvnni.hpp +20 -0
  97. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_common.hpp +24 -0
  98. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_common_fwd.hpp +77 -0
  99. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_constants.hpp +393 -0
  100. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_emulated.hpp +788 -0
  101. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma3_avx.hpp +93 -0
  102. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma3_avx2.hpp +46 -0
  103. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma3_sse.hpp +97 -0
  104. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma4.hpp +92 -0
  105. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_i8mm_neon64.hpp +17 -0
  106. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_isa.hpp +142 -0
  107. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_neon.hpp +3142 -0
  108. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_neon64.hpp +1543 -0
  109. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_rvv.hpp +1513 -0
  110. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_scalar.hpp +1260 -0
  111. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse2.hpp +2024 -0
  112. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse3.hpp +67 -0
  113. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse4_1.hpp +339 -0
  114. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse4_2.hpp +44 -0
  115. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_ssse3.hpp +186 -0
  116. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sve.hpp +1155 -0
  117. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_vsx.hpp +892 -0
  118. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_wasm.hpp +1780 -0
  119. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_arch.hpp +240 -0
  120. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_config.hpp +484 -0
  121. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_cpuid.hpp +269 -0
  122. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_inline.hpp +27 -0
  123. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/math/xsimd_rem_pio2.hpp +719 -0
  124. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/memory/xsimd_aligned_allocator.hpp +349 -0
  125. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/memory/xsimd_alignment.hpp +91 -0
  126. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_all_registers.hpp +55 -0
  127. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_api.hpp +2765 -0
  128. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx2_register.hpp +44 -0
  129. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512bw_register.hpp +51 -0
  130. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512cd_register.hpp +51 -0
  131. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512dq_register.hpp +51 -0
  132. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512er_register.hpp +51 -0
  133. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512f_register.hpp +77 -0
  134. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512ifma_register.hpp +51 -0
  135. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512pf_register.hpp +51 -0
  136. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512vbmi2_register.hpp +51 -0
  137. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512vbmi_register.hpp +51 -0
  138. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512vnni_avx512bw_register.hpp +54 -0
  139. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512vnni_avx512vbmi2_register.hpp +53 -0
  140. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx_register.hpp +64 -0
  141. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avxvnni_register.hpp +44 -0
  142. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_batch.hpp +1524 -0
  143. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_batch_constant.hpp +300 -0
  144. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_common_arch.hpp +47 -0
  145. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_emulated_register.hpp +80 -0
  146. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_fma3_avx2_register.hpp +50 -0
  147. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_fma3_avx_register.hpp +50 -0
  148. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_fma3_sse_register.hpp +50 -0
  149. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_fma4_register.hpp +50 -0
  150. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_i8mm_neon64_register.hpp +55 -0
  151. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_neon64_register.hpp +55 -0
  152. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_neon_register.hpp +154 -0
  153. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_register.hpp +94 -0
  154. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_rvv_register.hpp +506 -0
  155. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sse2_register.hpp +59 -0
  156. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sse3_register.hpp +49 -0
  157. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sse4_1_register.hpp +48 -0
  158. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sse4_2_register.hpp +48 -0
  159. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_ssse3_register.hpp +48 -0
  160. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sve_register.hpp +156 -0
  161. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_traits.hpp +337 -0
  162. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_utils.hpp +536 -0
  163. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_vsx_register.hpp +77 -0
  164. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_wasm_register.hpp +59 -0
  165. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/xsimd.hpp +75 -0
  166. sequenzo/dissimilarity_measures/src/xsimd/test/architectures/dummy.cpp +7 -0
  167. sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set.cpp +13 -0
  168. sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set_mean.cpp +24 -0
  169. sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set_mean_aligned.cpp +25 -0
  170. sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set_mean_arch_independent.cpp +28 -0
  171. sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set_mean_tag_dispatch.cpp +25 -0
  172. sequenzo/dissimilarity_measures/src/xsimd/test/doc/manipulating_abstract_batches.cpp +7 -0
  173. sequenzo/dissimilarity_measures/src/xsimd/test/doc/manipulating_parametric_batches.cpp +8 -0
  174. sequenzo/dissimilarity_measures/src/xsimd/test/doc/sum.hpp +31 -0
  175. sequenzo/dissimilarity_measures/src/xsimd/test/doc/sum_avx2.cpp +3 -0
  176. sequenzo/dissimilarity_measures/src/xsimd/test/doc/sum_sse2.cpp +3 -0
  177. sequenzo/dissimilarity_measures/src/xsimd/test/doc/writing_vectorized_code.cpp +11 -0
  178. sequenzo/dissimilarity_measures/src/xsimd/test/main.cpp +31 -0
  179. sequenzo/dissimilarity_measures/src/xsimd/test/test_api.cpp +230 -0
  180. sequenzo/dissimilarity_measures/src/xsimd/test/test_arch.cpp +217 -0
  181. sequenzo/dissimilarity_measures/src/xsimd/test/test_basic_math.cpp +183 -0
  182. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch.cpp +1049 -0
  183. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_bool.cpp +508 -0
  184. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_cast.cpp +409 -0
  185. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_complex.cpp +712 -0
  186. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_constant.cpp +286 -0
  187. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_float.cpp +141 -0
  188. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_int.cpp +365 -0
  189. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_manip.cpp +308 -0
  190. sequenzo/dissimilarity_measures/src/xsimd/test/test_bitwise_cast.cpp +222 -0
  191. sequenzo/dissimilarity_measures/src/xsimd/test/test_complex_exponential.cpp +226 -0
  192. sequenzo/dissimilarity_measures/src/xsimd/test/test_complex_hyperbolic.cpp +183 -0
  193. sequenzo/dissimilarity_measures/src/xsimd/test/test_complex_power.cpp +265 -0
  194. sequenzo/dissimilarity_measures/src/xsimd/test/test_complex_trigonometric.cpp +236 -0
  195. sequenzo/dissimilarity_measures/src/xsimd/test/test_conversion.cpp +248 -0
  196. sequenzo/dissimilarity_measures/src/xsimd/test/test_custom_default_arch.cpp +28 -0
  197. sequenzo/dissimilarity_measures/src/xsimd/test/test_error_gamma.cpp +170 -0
  198. sequenzo/dissimilarity_measures/src/xsimd/test/test_explicit_batch_instantiation.cpp +32 -0
  199. sequenzo/dissimilarity_measures/src/xsimd/test/test_exponential.cpp +202 -0
  200. sequenzo/dissimilarity_measures/src/xsimd/test/test_extract_pair.cpp +92 -0
  201. sequenzo/dissimilarity_measures/src/xsimd/test/test_fp_manipulation.cpp +77 -0
  202. sequenzo/dissimilarity_measures/src/xsimd/test/test_gnu_source.cpp +30 -0
  203. sequenzo/dissimilarity_measures/src/xsimd/test/test_hyperbolic.cpp +167 -0
  204. sequenzo/dissimilarity_measures/src/xsimd/test/test_load_store.cpp +304 -0
  205. sequenzo/dissimilarity_measures/src/xsimd/test/test_memory.cpp +61 -0
  206. sequenzo/dissimilarity_measures/src/xsimd/test/test_poly_evaluation.cpp +64 -0
  207. sequenzo/dissimilarity_measures/src/xsimd/test/test_power.cpp +184 -0
  208. sequenzo/dissimilarity_measures/src/xsimd/test/test_rounding.cpp +199 -0
  209. sequenzo/dissimilarity_measures/src/xsimd/test/test_select.cpp +101 -0
  210. sequenzo/dissimilarity_measures/src/xsimd/test/test_shuffle.cpp +760 -0
  211. sequenzo/dissimilarity_measures/src/xsimd/test/test_sum.cpp +4 -0
  212. sequenzo/dissimilarity_measures/src/xsimd/test/test_sum.hpp +34 -0
  213. sequenzo/dissimilarity_measures/src/xsimd/test/test_traits.cpp +172 -0
  214. sequenzo/dissimilarity_measures/src/xsimd/test/test_trigonometric.cpp +208 -0
  215. sequenzo/dissimilarity_measures/src/xsimd/test/test_utils.hpp +611 -0
  216. sequenzo/dissimilarity_measures/src/xsimd/test/test_wasm/test_wasm_playwright.py +123 -0
  217. sequenzo/dissimilarity_measures/src/xsimd/test/test_xsimd_api.cpp +1460 -0
  218. sequenzo/dissimilarity_measures/utils/__init__.py +16 -0
  219. sequenzo/dissimilarity_measures/utils/get_LCP_length_for_2_seq.py +44 -0
  220. sequenzo/dissimilarity_measures/utils/get_sm_trate_substitution_cost_matrix.cpython-310-darwin.so +0 -0
  221. sequenzo/dissimilarity_measures/utils/seqconc.cpython-310-darwin.so +0 -0
  222. sequenzo/dissimilarity_measures/utils/seqdss.cpython-310-darwin.so +0 -0
  223. sequenzo/dissimilarity_measures/utils/seqdur.cpython-310-darwin.so +0 -0
  224. sequenzo/dissimilarity_measures/utils/seqlength.cpython-310-darwin.so +0 -0
  225. sequenzo/multidomain/__init__.py +23 -0
  226. sequenzo/multidomain/association_between_domains.py +311 -0
  227. sequenzo/multidomain/cat.py +597 -0
  228. sequenzo/multidomain/combt.py +519 -0
  229. sequenzo/multidomain/dat.py +81 -0
  230. sequenzo/multidomain/idcd.py +139 -0
  231. sequenzo/multidomain/linked_polyad.py +292 -0
  232. sequenzo/openmp_setup.py +233 -0
  233. sequenzo/prefix_tree/__init__.py +62 -0
  234. sequenzo/prefix_tree/hub.py +114 -0
  235. sequenzo/prefix_tree/individual_level_indicators.py +1321 -0
  236. sequenzo/prefix_tree/spell_individual_level_indicators.py +580 -0
  237. sequenzo/prefix_tree/spell_level_indicators.py +297 -0
  238. sequenzo/prefix_tree/system_level_indicators.py +544 -0
  239. sequenzo/prefix_tree/utils.py +54 -0
  240. sequenzo/seqhmm/__init__.py +95 -0
  241. sequenzo/seqhmm/advanced_optimization.py +305 -0
  242. sequenzo/seqhmm/bootstrap.py +411 -0
  243. sequenzo/seqhmm/build_hmm.py +142 -0
  244. sequenzo/seqhmm/build_mhmm.py +136 -0
  245. sequenzo/seqhmm/build_nhmm.py +121 -0
  246. sequenzo/seqhmm/fit_mhmm.py +62 -0
  247. sequenzo/seqhmm/fit_model.py +61 -0
  248. sequenzo/seqhmm/fit_nhmm.py +76 -0
  249. sequenzo/seqhmm/formulas.py +289 -0
  250. sequenzo/seqhmm/forward_backward_nhmm.py +276 -0
  251. sequenzo/seqhmm/gradients_nhmm.py +306 -0
  252. sequenzo/seqhmm/hmm.py +291 -0
  253. sequenzo/seqhmm/mhmm.py +314 -0
  254. sequenzo/seqhmm/model_comparison.py +238 -0
  255. sequenzo/seqhmm/multichannel_em.py +282 -0
  256. sequenzo/seqhmm/multichannel_utils.py +138 -0
  257. sequenzo/seqhmm/nhmm.py +270 -0
  258. sequenzo/seqhmm/nhmm_utils.py +191 -0
  259. sequenzo/seqhmm/predict.py +137 -0
  260. sequenzo/seqhmm/predict_mhmm.py +142 -0
  261. sequenzo/seqhmm/simulate.py +878 -0
  262. sequenzo/seqhmm/utils.py +218 -0
  263. sequenzo/seqhmm/visualization.py +910 -0
  264. sequenzo/sequence_characteristics/__init__.py +40 -0
  265. sequenzo/sequence_characteristics/complexity_index.py +49 -0
  266. sequenzo/sequence_characteristics/overall_cross_sectional_entropy.py +220 -0
  267. sequenzo/sequence_characteristics/plot_characteristics.py +593 -0
  268. sequenzo/sequence_characteristics/simple_characteristics.py +311 -0
  269. sequenzo/sequence_characteristics/state_frequencies_and_entropy_per_sequence.py +39 -0
  270. sequenzo/sequence_characteristics/turbulence.py +155 -0
  271. sequenzo/sequence_characteristics/variance_of_spell_durations.py +86 -0
  272. sequenzo/sequence_characteristics/within_sequence_entropy.py +43 -0
  273. sequenzo/suffix_tree/__init__.py +66 -0
  274. sequenzo/suffix_tree/hub.py +114 -0
  275. sequenzo/suffix_tree/individual_level_indicators.py +1679 -0
  276. sequenzo/suffix_tree/spell_individual_level_indicators.py +493 -0
  277. sequenzo/suffix_tree/spell_level_indicators.py +248 -0
  278. sequenzo/suffix_tree/system_level_indicators.py +535 -0
  279. sequenzo/suffix_tree/utils.py +56 -0
  280. sequenzo/version_check.py +283 -0
  281. sequenzo/visualization/__init__.py +29 -0
  282. sequenzo/visualization/plot_mean_time.py +222 -0
  283. sequenzo/visualization/plot_modal_state.py +276 -0
  284. sequenzo/visualization/plot_most_frequent_sequences.py +147 -0
  285. sequenzo/visualization/plot_relative_frequency.py +405 -0
  286. sequenzo/visualization/plot_sequence_index.py +1175 -0
  287. sequenzo/visualization/plot_single_medoid.py +153 -0
  288. sequenzo/visualization/plot_state_distribution.py +651 -0
  289. sequenzo/visualization/plot_transition_matrix.py +190 -0
  290. sequenzo/visualization/utils/__init__.py +23 -0
  291. sequenzo/visualization/utils/utils.py +310 -0
  292. sequenzo/with_event_history_analysis/__init__.py +35 -0
  293. sequenzo/with_event_history_analysis/sequence_analysis_multi_state_model.py +850 -0
  294. sequenzo/with_event_history_analysis/sequence_history_analysis.py +283 -0
  295. sequenzo-0.1.31.dist-info/METADATA +286 -0
  296. sequenzo-0.1.31.dist-info/RECORD +299 -0
  297. sequenzo-0.1.31.dist-info/WHEEL +5 -0
  298. sequenzo-0.1.31.dist-info/licenses/LICENSE +28 -0
  299. sequenzo-0.1.31.dist-info/top_level.txt +2 -0
@@ -0,0 +1,263 @@
1
+ #include <pybind11/pybind11.h>
2
+ #include <pybind11/numpy.h>
3
+ #include <vector>
4
+ #include <iostream>
5
+ #ifdef _OPENMP
6
+ #include <omp.h>
7
+ #endif
8
+ #include <random>
9
+ #include <cfloat>
10
+ #include <climits>
11
+ #include <cmath>
12
+
13
+ using namespace std;
14
+ namespace py = pybind11;
15
+
16
+ class KMedoid {
17
+ protected:
18
+ int nelements; // Number of elements (data points)
19
+ int nclusters; // Number of clusters (medoids)
20
+ int npass; // Maximum number of iterations
21
+
22
+ vector<int> tclusterid; // Temporary cluster assignment for each element
23
+ vector<int> saved; // Saved cluster assignments to check for convergence
24
+ vector<int> clusterMembership; // Cluster membership indices (flattened 2D: nclusters x nelements)
25
+ vector<int> clusterSize; // Size of each cluster
26
+
27
+ py::array_t<double> diss; // Distance matrix (2D numpy array)
28
+ py::array_t<int> centroids; // Medoid indices (1D numpy array)
29
+ py::array_t<double> weights; // Weights of elements (1D numpy array)
30
+
31
+ public:
32
+ // Constructor initializes members and allocates necessary storage
33
+ KMedoid(int nelements, py::array_t<double> diss,
34
+ py::array_t<int> centroids, int npass,
35
+ py::array_t<double> weights)
36
+ : nelements(nelements),
37
+ diss(diss),
38
+ centroids(centroids),
39
+ npass(npass),
40
+ weights(weights),
41
+ nclusters(static_cast<int>(centroids.size())) {
42
+ // 注释掉信息性打印,避免在并行环境(如 CLARA)中降低性能
43
+ // py::print("[>] Starting KMedoids...");
44
+
45
+ tclusterid.resize(nelements);
46
+ saved.resize(nelements);
47
+ clusterMembership.resize(nelements * nclusters);
48
+ clusterSize.resize(nclusters);
49
+ fill(clusterSize.begin(), clusterSize.end(), 0);
50
+ }
51
+
52
+ // Initialize medoids using a k-means++ style seeding method for better starting points
53
+ void init_medoids() {
54
+ auto ptr_diss = diss.unchecked<2>();
55
+ auto ptr_centroids = centroids.mutable_unchecked<1>();
56
+ auto ptr_weights = weights.unchecked<1>();
57
+
58
+ vector<int> selected; // Indices of selected medoids
59
+ vector<double> min_dists(nelements, DBL_MAX); // Minimum distance to selected medoids
60
+
61
+ mt19937 rng(random_device{}()); // 3. Random number generator initialization (non-deterministic seed)
62
+ uniform_int_distribution<> dist(0, nelements - 1);
63
+
64
+ // Randomly choose the first medoid
65
+ int first = dist(rng);
66
+ selected.push_back(first);
67
+ ptr_centroids[0] = first;
68
+
69
+ for (int k = 1; k < nclusters; ++k) {
70
+ int last = selected.back();
71
+
72
+ // Update min_dists using only the last selected medoid
73
+ for (int i = 0; i < nelements; ++i) {
74
+ double d = ptr_diss(i, last);
75
+ if (d < min_dists[i]) min_dists[i] = d;
76
+ }
77
+
78
+ // Compute weighted total distance
79
+ double total_weight = 0.0;
80
+ for (int i = 0; i < nelements; ++i) {
81
+ total_weight += min_dists[i] * ptr_weights[i];
82
+ }
83
+
84
+ // Handle degenerate case
85
+ if (total_weight <= 1e-10) {
86
+ int fallback = dist(rng);
87
+ selected.push_back(fallback);
88
+ ptr_centroids[k] = fallback;
89
+ continue;
90
+ }
91
+
92
+ // Select next medoid using weighted probability
93
+ uniform_real_distribution<double> rdist(0, total_weight);
94
+ double r = rdist(rng), accumulator = 0.0;
95
+ int next = -1;
96
+
97
+ for (int i = 0; i < nelements; ++i) {
98
+ accumulator += min_dists[i] * ptr_weights[i];
99
+ if (accumulator >= r) {
100
+ next = i;
101
+ break;
102
+ }
103
+ }
104
+
105
+ if (next == -1) next = dist(rng); // fallback again just in case
106
+ selected.push_back(next);
107
+ ptr_centroids[k] = next;
108
+ }
109
+ }
110
+
111
+
112
+ // Update medoids by selecting the element minimizing the sum of weighted distances to all other elements in the cluster
113
+ void getclustermedoids() {
114
+ auto ptr_weights = weights.unchecked<1>();
115
+ auto ptr_diss = diss.unchecked<2>();
116
+ auto ptr_centroids = centroids.mutable_unchecked<1>();
117
+
118
+ #pragma omp parallel for schedule(dynamic)
119
+ for (int k = 0; k < nclusters; ++k) {
120
+ int size = clusterSize[k];
121
+ double best = DBL_MAX;
122
+ int bestID = 0;
123
+
124
+ // Iterate over all members of cluster k to find the best medoid
125
+ for (int i = 0; i < size; ++i) {
126
+ int ii = clusterMembership[k * nelements + i];
127
+ double current = 0;
128
+
129
+ // Sum weighted distances from candidate medoid ii to all other members
130
+ for (int j = 0; j < size; ++j) {
131
+ if (i == j) continue;
132
+ int jj = clusterMembership[k * nelements + j];
133
+ current += ptr_weights[jj] * ptr_diss(ii, jj);
134
+ if (current >= best) break; // Early stop if worse than current best
135
+ }
136
+
137
+ if (current < best) {
138
+ best = current;
139
+ bestID = ii;
140
+ }
141
+ }
142
+
143
+ ptr_centroids[k] = bestID; // Assign best medoid for cluster k
144
+ }
145
+ }
146
+
147
+ // Main loop to run the clustering process until convergence or max iterations
148
+ py::array_t<int> runclusterloop() {
149
+ auto ptr_weights = weights.unchecked<1>();
150
+ auto ptr_diss = diss.unchecked<2>();
151
+ auto ptr_centroids = centroids.mutable_unchecked<1>();
152
+
153
+ double total = DBL_MAX;
154
+ int counter = 0;
155
+ int period = 10; // Frequency to save cluster assignments for convergence checking
156
+
157
+ while (counter <= npass) {
158
+ PyErr_CheckSignals(); // Allow Python interruption
159
+
160
+ double prev = total;
161
+ total = 0;
162
+
163
+ if (counter > 0) getclustermedoids();
164
+
165
+ // Periodically save cluster assignment to check for convergence
166
+ if (counter % period == 0) {
167
+ for (int i = 0; i < nelements; ++i)
168
+ saved[i] = tclusterid[i];
169
+
170
+ if (period < INT_MAX / 2) period *= 2; // Exponentially increase period
171
+ }
172
+
173
+ counter++;
174
+
175
+ vector<vector<int>> localMembers(nclusters);
176
+
177
+ // Parallel assignment of elements to closest medoid
178
+ #pragma omp parallel
179
+ {
180
+ vector<vector<int>> threadLocal(nclusters);
181
+
182
+ #pragma omp for reduction(+:total) schedule(static)
183
+ for (int i = 0; i < nelements; ++i) {
184
+ double dist = DBL_MAX;
185
+ int assign = 0;
186
+
187
+ // Find nearest medoid
188
+ for (int k = 0; k < nclusters; ++k) {
189
+ int j = ptr_centroids[k];
190
+ double tdistance = ptr_diss(i, j);
191
+
192
+ if (tdistance < dist) {
193
+ dist = tdistance;
194
+ assign = k;
195
+ }
196
+ }
197
+
198
+ tclusterid[i] = assign;
199
+ threadLocal[assign].push_back(i);
200
+ total += ptr_weights[i] * dist;
201
+ }
202
+
203
+ // Merge thread local cluster memberships into shared vector safely
204
+ #pragma omp critical
205
+ {
206
+ for (int k = 0; k < nclusters; ++k) {
207
+ localMembers[k].insert(
208
+ localMembers[k].end(),
209
+ threadLocal[k].begin(),
210
+ threadLocal[k].end()
211
+ );
212
+ }
213
+ }
214
+ }
215
+
216
+ // Update cluster membership and sizes
217
+ for (int k = 0; k < nclusters; ++k) {
218
+ clusterSize[k] = static_cast<int>(localMembers[k].size());
219
+
220
+ // If a cluster is empty, reinitialize medoids and restart
221
+ if (clusterSize[k] == 0) {
222
+ init_medoids();
223
+ counter = 0;
224
+ break;
225
+ }
226
+
227
+ for (int i = 0; i < clusterSize[k]; ++i) {
228
+ clusterMembership[k * nelements + i] = localMembers[k][i];
229
+ }
230
+ }
231
+
232
+ // Convergence check based on total cost change
233
+ if (abs(total - prev) < 1e-6) break;
234
+
235
+ // Check if cluster assignments are unchanged from last saved
236
+ bool same = true;
237
+ for (int i = 0; i < nelements; ++i) {
238
+ if (saved[i] != tclusterid[i]) {
239
+ same = false;
240
+ break;
241
+ }
242
+ }
243
+
244
+ if (same) break;
245
+ }
246
+
247
+ return getResultArray();
248
+ }
249
+
250
+ // Construct and return the final array of medoid assignments for each element
251
+ py::array_t<int> getResultArray() const {
252
+ py::array_t<int> result(nelements);
253
+ auto results = result.mutable_unchecked<1>();
254
+ auto centroid = centroids.unchecked<1>();
255
+
256
+ #pragma omp parallel for schedule(static)
257
+ for (int i = 0; i < nelements; ++i) {
258
+ results(i) = centroid(tclusterid[i]);
259
+ }
260
+
261
+ return result;
262
+ }
263
+ };
@@ -0,0 +1,237 @@
1
+ #include <pybind11/pybind11.h>
2
+ #include <pybind11/numpy.h>
3
+ #include <vector>
4
+ #include <iostream>
5
+ #ifdef _OPENMP
6
+ #include <omp.h>
7
+ #endif
8
+ #include <cfloat>
9
+ #include <climits>
10
+ #include <cmath>
11
+ #define WEIGHTED_CLUST_TOL -1e-10
12
+ using namespace std;
13
+ namespace py = pybind11;
14
+
15
+ class PAM {
16
+ public:
17
+ // Constructor: Initializes the PAM algorithm with required parameters.
18
+ PAM(int nelements, py::array_t<double> diss,
19
+ py::array_t<int> centroids, int npass, py::array_t<double> weights) {
20
+ // 注释掉信息性打印,避免在并行环境(如 CLARA)中降低性能
21
+ // py::print("[>] Starting Partitioning Around Medoids (PAM)...");
22
+
23
+ try {
24
+ this->nelements = nelements;
25
+ this->centroids = centroids;
26
+ this->npass = npass;
27
+ this->weights = weights;
28
+ this->diss = diss;
29
+ this->maxdist = 0.0;
30
+ this->nclusters = static_cast<int>(centroids.size()); // Number of clusters
31
+ this->tclusterid.resize(nelements); // Initialize cluster id vector
32
+ this->computeMaxDist(); // Compute the maximum distance for use later
33
+
34
+ // Initialize dysma and dysmb with maxdist
35
+ dysma.resize(nelements, maxdist);
36
+ dysmb.resize(nelements, maxdist);
37
+ } catch (const exception &e) {
38
+ py::print("Error: ", e.what()); // Error handling
39
+ }
40
+ }
41
+
42
+ // Computes the maximum distance between any two elements in the distance matrix.
43
+ void computeMaxDist() {
44
+ auto ptr_diss = diss.unchecked<2>();
45
+
46
+ // The manual array collects the thread maxima
47
+ int nthreads = 1;
48
+ #ifdef _OPENMP
49
+ #pragma omp parallel
50
+ {
51
+ #pragma omp single
52
+ nthreads = omp_get_num_threads();
53
+ }
54
+ #endif
55
+
56
+ std::vector<double> thread_max(nthreads, 0.0);
57
+
58
+ #ifdef _OPENMP
59
+ #pragma omp parallel
60
+ {
61
+ int tid = omp_get_thread_num();
62
+ #else
63
+ {
64
+ int tid = 0;
65
+ #endif
66
+ double local = 0.0;
67
+
68
+ #ifdef _OPENMP
69
+ #pragma omp for schedule(static)
70
+ #endif
71
+ for (int i = 0; i < nelements; ++i) {
72
+ for (int j = i + 1; j < nelements; ++j) {
73
+ double val = ptr_diss(i, j);
74
+ if (val > local) local = val;
75
+ }
76
+ }
77
+
78
+ thread_max[tid] = local;
79
+ }
80
+
81
+ // Final reduction (serial, fast)
82
+ double max_val = 0.0;
83
+ for (double val : thread_max) {
84
+ if (val > max_val) max_val = val;
85
+ }
86
+
87
+ maxdist = 1.1 * max_val + 1.0;
88
+ }
89
+
90
+
91
+ // Runs the PAM clustering loop, repeatedly updating centroids and assigning elements to clusters.
92
+ py::array_t<int> runclusterloop() {
93
+ auto ptr_weights = weights.unchecked<1>(); // Access to the weights
94
+ auto ptr_diss = diss.unchecked<2>(); // Access to the distance matrix
95
+ auto ptr_centroids = centroids.mutable_unchecked<1>(); // Access to the centroids
96
+
97
+ double dzsky;
98
+ int ipass = 0;
99
+ int hbest = -1;
100
+ int nbest = -1;
101
+ int k, icluster, h;
102
+ double total = -1.0;
103
+ int nclusters = static_cast<int>(centroids.size());
104
+
105
+ do {
106
+ // Parallel loop to update dysma and dysmb based on current centroids
107
+ #pragma omp parallel for schedule(static)
108
+ for (int i = 0; i < nelements; i++) {
109
+ dysmb[i] = maxdist;
110
+ dysma[i] = maxdist;
111
+
112
+ // Update dysma and dysmb values based on the distance to centroids
113
+ for (int k = 0; k < nclusters; k++) {
114
+ int icluster = ptr_centroids(k);
115
+ double dist = ptr_diss(i, icluster);
116
+
117
+ if (dysma[i] > dist) {
118
+ dysmb[i] = dysma[i];
119
+ dysma[i] = dist;
120
+ tclusterid[i] = k; // Assign element to the current cluster
121
+ } else if (dysmb[i] > dist) {
122
+ dysmb[i] = dist;
123
+ }
124
+ }
125
+ }
126
+
127
+ // If total hasn't been calculated yet, calculate it
128
+ if (total < 0) {
129
+ total = 0;
130
+
131
+ // Parallel loop to calculate the total weighted distance
132
+ #pragma omp parallel for reduction(+:total) schedule(static)
133
+ for (int i = 0; i < nelements; i++) {
134
+ total += ptr_weights[i] * dysma[i];
135
+ }
136
+ }
137
+
138
+ dzsky = 1; // Initialize dzsky to 1 for the change cost comparison
139
+
140
+ // Parallel loop to compute the cost of switching elements' medoids
141
+ #pragma omp parallel for schedule(dynamic)
142
+ for (int h = 0; h < nelements; h++) {
143
+ bool is_current_medoid = false;
144
+ for (int k = 0; k < nclusters; k++) {
145
+ if (h == ptr_centroids[k]) {
146
+ is_current_medoid = true;
147
+ break;
148
+ }
149
+ }
150
+
151
+ if (is_current_medoid) // Skip if the element is already a medoid
152
+ continue;
153
+
154
+ double local_dzsky = dzsky;
155
+ int local_hbest = -1;
156
+ int local_nbest = -1;
157
+
158
+ // Evaluate the change cost for switching each element with a new medoid
159
+ for (int k = 0; k < nclusters; k++) {
160
+ int i = ptr_centroids[k];
161
+ double dz = 0.0;
162
+
163
+ for (int j = 0; j < nelements; j++) {
164
+ if (ptr_diss(i, j) == dysma[j]) {
165
+ double small = (dysmb[j] > ptr_diss(h, j)) ? ptr_diss(h, j) : dysmb[j];
166
+ dz += ptr_weights[j] * (-dysma[j] + small); // Update change cost
167
+ } else if (ptr_diss(h, j) < dysma[j]) {
168
+ dz += ptr_weights[j] * (-dysma[j] + ptr_diss(h, j));
169
+ }
170
+ }
171
+
172
+ // Keep track of the best change
173
+ if (dz < local_dzsky) {
174
+ local_dzsky = dz;
175
+ local_hbest = h;
176
+ local_nbest = i;
177
+ }
178
+ }
179
+
180
+ // Critical section to update dzsky with the best change
181
+ #pragma omp critical
182
+ {
183
+ if (local_dzsky < dzsky) {
184
+ dzsky = local_dzsky;
185
+ hbest = local_hbest;
186
+ nbest = local_nbest;
187
+ }
188
+ }
189
+ }
190
+
191
+ // If there was an improvement in the total cost, update the centroids
192
+ if (dzsky < 0) {
193
+ for (k = 0; k < nclusters; k++) {
194
+ if (ptr_centroids[k] == nbest) {
195
+ ptr_centroids[k] = hbest; // Swap the medoids
196
+ }
197
+ }
198
+
199
+ total += dzsky; // Update the total cost
200
+ }
201
+
202
+ ipass++; // Increment pass count
203
+ if (ipass >= npass) {
204
+ break; // Break if max passes reached
205
+ }
206
+ } while (dzsky < 0); // Repeat until no improvement
207
+
208
+ return getResultArray(); // Return the final cluster assignments
209
+ }
210
+
211
+ // Returns an array of cluster assignments for each element
212
+ py::array_t<int> getResultArray() const {
213
+ py::array_t<int> result(nelements);
214
+ auto results = result.mutable_unchecked<1>();
215
+ auto centroid = centroids.unchecked<1>();
216
+
217
+ #pragma omp parallel for schedule(static)
218
+ for (int i = 0; i < nelements; ++i) {
219
+ results(i) = centroid(tclusterid[i]);
220
+ }
221
+
222
+ return result;
223
+ }
224
+
225
+
226
+ protected:
227
+ int nelements; // Number of elements to cluster
228
+ py::array_t<double> diss; // Distance matrix
229
+ py::array_t<int> centroids; // Initial centroids
230
+ int npass; // Number of passes for the algorithm
231
+ py::array_t<double> weights; // Element weights
232
+ vector<int> tclusterid; // Cluster IDs for each element
233
+ vector<double> dysmb; // Temporary variable for distances
234
+ int nclusters; // Number of clusters
235
+ double maxdist; // Maximum distance value
236
+ vector<double> dysma; // Temporary variable for distances
237
+ };