sequenzo 0.1.31__cp310-cp310-macosx_10_9_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (299) hide show
  1. _sequenzo_fastcluster.cpython-310-darwin.so +0 -0
  2. sequenzo/__init__.py +349 -0
  3. sequenzo/big_data/__init__.py +12 -0
  4. sequenzo/big_data/clara/__init__.py +26 -0
  5. sequenzo/big_data/clara/clara.py +476 -0
  6. sequenzo/big_data/clara/utils/__init__.py +27 -0
  7. sequenzo/big_data/clara/utils/aggregatecases.py +92 -0
  8. sequenzo/big_data/clara/utils/davies_bouldin.py +91 -0
  9. sequenzo/big_data/clara/utils/get_weighted_diss.cpython-310-darwin.so +0 -0
  10. sequenzo/big_data/clara/utils/wfcmdd.py +205 -0
  11. sequenzo/big_data/clara/visualization.py +88 -0
  12. sequenzo/clustering/KMedoids.py +178 -0
  13. sequenzo/clustering/__init__.py +30 -0
  14. sequenzo/clustering/clustering_c_code.cpython-310-darwin.so +0 -0
  15. sequenzo/clustering/hierarchical_clustering.py +1256 -0
  16. sequenzo/clustering/sequenzo_fastcluster/fastcluster.py +495 -0
  17. sequenzo/clustering/sequenzo_fastcluster/src/fastcluster.cpp +1877 -0
  18. sequenzo/clustering/sequenzo_fastcluster/src/fastcluster_python.cpp +1264 -0
  19. sequenzo/clustering/src/KMedoid.cpp +263 -0
  20. sequenzo/clustering/src/PAM.cpp +237 -0
  21. sequenzo/clustering/src/PAMonce.cpp +265 -0
  22. sequenzo/clustering/src/cluster_quality.cpp +496 -0
  23. sequenzo/clustering/src/cluster_quality.h +128 -0
  24. sequenzo/clustering/src/cluster_quality_backup.cpp +570 -0
  25. sequenzo/clustering/src/module.cpp +228 -0
  26. sequenzo/clustering/src/weightedinertia.cpp +111 -0
  27. sequenzo/clustering/utils/__init__.py +27 -0
  28. sequenzo/clustering/utils/disscenter.py +122 -0
  29. sequenzo/data_preprocessing/__init__.py +22 -0
  30. sequenzo/data_preprocessing/helpers.py +303 -0
  31. sequenzo/datasets/__init__.py +41 -0
  32. sequenzo/datasets/biofam.csv +2001 -0
  33. sequenzo/datasets/biofam_child_domain.csv +2001 -0
  34. sequenzo/datasets/biofam_left_domain.csv +2001 -0
  35. sequenzo/datasets/biofam_married_domain.csv +2001 -0
  36. sequenzo/datasets/chinese_colonial_territories.csv +12 -0
  37. sequenzo/datasets/country_co2_emissions.csv +194 -0
  38. sequenzo/datasets/country_co2_emissions_global_deciles.csv +195 -0
  39. sequenzo/datasets/country_co2_emissions_global_quintiles.csv +195 -0
  40. sequenzo/datasets/country_co2_emissions_local_deciles.csv +195 -0
  41. sequenzo/datasets/country_co2_emissions_local_quintiles.csv +195 -0
  42. sequenzo/datasets/country_gdp_per_capita.csv +194 -0
  43. sequenzo/datasets/dyadic_children.csv +61 -0
  44. sequenzo/datasets/dyadic_parents.csv +61 -0
  45. sequenzo/datasets/mvad.csv +713 -0
  46. sequenzo/datasets/pairfam_activity_by_month.csv +1028 -0
  47. sequenzo/datasets/pairfam_activity_by_year.csv +1028 -0
  48. sequenzo/datasets/pairfam_family_by_month.csv +1028 -0
  49. sequenzo/datasets/pairfam_family_by_year.csv +1028 -0
  50. sequenzo/datasets/political_science_aid_shock.csv +166 -0
  51. sequenzo/datasets/political_science_donor_fragmentation.csv +157 -0
  52. sequenzo/define_sequence_data.py +1400 -0
  53. sequenzo/dissimilarity_measures/__init__.py +31 -0
  54. sequenzo/dissimilarity_measures/c_code.cpython-310-darwin.so +0 -0
  55. sequenzo/dissimilarity_measures/get_distance_matrix.py +762 -0
  56. sequenzo/dissimilarity_measures/get_substitution_cost_matrix.py +246 -0
  57. sequenzo/dissimilarity_measures/src/DHDdistance.cpp +148 -0
  58. sequenzo/dissimilarity_measures/src/LCPdistance.cpp +114 -0
  59. sequenzo/dissimilarity_measures/src/LCPspellDistance.cpp +215 -0
  60. sequenzo/dissimilarity_measures/src/OMdistance.cpp +247 -0
  61. sequenzo/dissimilarity_measures/src/OMspellDistance.cpp +281 -0
  62. sequenzo/dissimilarity_measures/src/__init__.py +0 -0
  63. sequenzo/dissimilarity_measures/src/dist2matrix.cpp +63 -0
  64. sequenzo/dissimilarity_measures/src/dp_utils.h +160 -0
  65. sequenzo/dissimilarity_measures/src/module.cpp +40 -0
  66. sequenzo/dissimilarity_measures/src/setup.py +30 -0
  67. sequenzo/dissimilarity_measures/src/utils.h +25 -0
  68. sequenzo/dissimilarity_measures/src/xsimd/.github/cmake-test/main.cpp +6 -0
  69. sequenzo/dissimilarity_measures/src/xsimd/benchmark/main.cpp +159 -0
  70. sequenzo/dissimilarity_measures/src/xsimd/benchmark/xsimd_benchmark.hpp +565 -0
  71. sequenzo/dissimilarity_measures/src/xsimd/docs/source/conf.py +37 -0
  72. sequenzo/dissimilarity_measures/src/xsimd/examples/mandelbrot.cpp +330 -0
  73. sequenzo/dissimilarity_measures/src/xsimd/examples/pico_bench.hpp +246 -0
  74. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_arithmetic.hpp +266 -0
  75. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_complex.hpp +112 -0
  76. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_details.hpp +323 -0
  77. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_logical.hpp +218 -0
  78. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_math.hpp +2583 -0
  79. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_memory.hpp +880 -0
  80. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_rounding.hpp +72 -0
  81. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_swizzle.hpp +174 -0
  82. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_trigo.hpp +978 -0
  83. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx.hpp +1924 -0
  84. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx2.hpp +1144 -0
  85. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512bw.hpp +656 -0
  86. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512cd.hpp +28 -0
  87. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512dq.hpp +244 -0
  88. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512er.hpp +20 -0
  89. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512f.hpp +2650 -0
  90. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512ifma.hpp +20 -0
  91. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512pf.hpp +20 -0
  92. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vbmi.hpp +77 -0
  93. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vbmi2.hpp +131 -0
  94. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vnni_avx512bw.hpp +20 -0
  95. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vnni_avx512vbmi2.hpp +20 -0
  96. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avxvnni.hpp +20 -0
  97. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_common.hpp +24 -0
  98. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_common_fwd.hpp +77 -0
  99. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_constants.hpp +393 -0
  100. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_emulated.hpp +788 -0
  101. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma3_avx.hpp +93 -0
  102. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma3_avx2.hpp +46 -0
  103. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma3_sse.hpp +97 -0
  104. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma4.hpp +92 -0
  105. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_i8mm_neon64.hpp +17 -0
  106. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_isa.hpp +142 -0
  107. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_neon.hpp +3142 -0
  108. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_neon64.hpp +1543 -0
  109. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_rvv.hpp +1513 -0
  110. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_scalar.hpp +1260 -0
  111. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse2.hpp +2024 -0
  112. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse3.hpp +67 -0
  113. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse4_1.hpp +339 -0
  114. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse4_2.hpp +44 -0
  115. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_ssse3.hpp +186 -0
  116. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sve.hpp +1155 -0
  117. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_vsx.hpp +892 -0
  118. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_wasm.hpp +1780 -0
  119. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_arch.hpp +240 -0
  120. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_config.hpp +484 -0
  121. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_cpuid.hpp +269 -0
  122. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_inline.hpp +27 -0
  123. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/math/xsimd_rem_pio2.hpp +719 -0
  124. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/memory/xsimd_aligned_allocator.hpp +349 -0
  125. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/memory/xsimd_alignment.hpp +91 -0
  126. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_all_registers.hpp +55 -0
  127. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_api.hpp +2765 -0
  128. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx2_register.hpp +44 -0
  129. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512bw_register.hpp +51 -0
  130. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512cd_register.hpp +51 -0
  131. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512dq_register.hpp +51 -0
  132. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512er_register.hpp +51 -0
  133. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512f_register.hpp +77 -0
  134. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512ifma_register.hpp +51 -0
  135. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512pf_register.hpp +51 -0
  136. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512vbmi2_register.hpp +51 -0
  137. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512vbmi_register.hpp +51 -0
  138. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512vnni_avx512bw_register.hpp +54 -0
  139. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512vnni_avx512vbmi2_register.hpp +53 -0
  140. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx_register.hpp +64 -0
  141. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avxvnni_register.hpp +44 -0
  142. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_batch.hpp +1524 -0
  143. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_batch_constant.hpp +300 -0
  144. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_common_arch.hpp +47 -0
  145. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_emulated_register.hpp +80 -0
  146. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_fma3_avx2_register.hpp +50 -0
  147. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_fma3_avx_register.hpp +50 -0
  148. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_fma3_sse_register.hpp +50 -0
  149. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_fma4_register.hpp +50 -0
  150. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_i8mm_neon64_register.hpp +55 -0
  151. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_neon64_register.hpp +55 -0
  152. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_neon_register.hpp +154 -0
  153. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_register.hpp +94 -0
  154. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_rvv_register.hpp +506 -0
  155. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sse2_register.hpp +59 -0
  156. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sse3_register.hpp +49 -0
  157. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sse4_1_register.hpp +48 -0
  158. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sse4_2_register.hpp +48 -0
  159. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_ssse3_register.hpp +48 -0
  160. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sve_register.hpp +156 -0
  161. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_traits.hpp +337 -0
  162. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_utils.hpp +536 -0
  163. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_vsx_register.hpp +77 -0
  164. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_wasm_register.hpp +59 -0
  165. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/xsimd.hpp +75 -0
  166. sequenzo/dissimilarity_measures/src/xsimd/test/architectures/dummy.cpp +7 -0
  167. sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set.cpp +13 -0
  168. sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set_mean.cpp +24 -0
  169. sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set_mean_aligned.cpp +25 -0
  170. sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set_mean_arch_independent.cpp +28 -0
  171. sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set_mean_tag_dispatch.cpp +25 -0
  172. sequenzo/dissimilarity_measures/src/xsimd/test/doc/manipulating_abstract_batches.cpp +7 -0
  173. sequenzo/dissimilarity_measures/src/xsimd/test/doc/manipulating_parametric_batches.cpp +8 -0
  174. sequenzo/dissimilarity_measures/src/xsimd/test/doc/sum.hpp +31 -0
  175. sequenzo/dissimilarity_measures/src/xsimd/test/doc/sum_avx2.cpp +3 -0
  176. sequenzo/dissimilarity_measures/src/xsimd/test/doc/sum_sse2.cpp +3 -0
  177. sequenzo/dissimilarity_measures/src/xsimd/test/doc/writing_vectorized_code.cpp +11 -0
  178. sequenzo/dissimilarity_measures/src/xsimd/test/main.cpp +31 -0
  179. sequenzo/dissimilarity_measures/src/xsimd/test/test_api.cpp +230 -0
  180. sequenzo/dissimilarity_measures/src/xsimd/test/test_arch.cpp +217 -0
  181. sequenzo/dissimilarity_measures/src/xsimd/test/test_basic_math.cpp +183 -0
  182. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch.cpp +1049 -0
  183. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_bool.cpp +508 -0
  184. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_cast.cpp +409 -0
  185. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_complex.cpp +712 -0
  186. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_constant.cpp +286 -0
  187. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_float.cpp +141 -0
  188. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_int.cpp +365 -0
  189. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_manip.cpp +308 -0
  190. sequenzo/dissimilarity_measures/src/xsimd/test/test_bitwise_cast.cpp +222 -0
  191. sequenzo/dissimilarity_measures/src/xsimd/test/test_complex_exponential.cpp +226 -0
  192. sequenzo/dissimilarity_measures/src/xsimd/test/test_complex_hyperbolic.cpp +183 -0
  193. sequenzo/dissimilarity_measures/src/xsimd/test/test_complex_power.cpp +265 -0
  194. sequenzo/dissimilarity_measures/src/xsimd/test/test_complex_trigonometric.cpp +236 -0
  195. sequenzo/dissimilarity_measures/src/xsimd/test/test_conversion.cpp +248 -0
  196. sequenzo/dissimilarity_measures/src/xsimd/test/test_custom_default_arch.cpp +28 -0
  197. sequenzo/dissimilarity_measures/src/xsimd/test/test_error_gamma.cpp +170 -0
  198. sequenzo/dissimilarity_measures/src/xsimd/test/test_explicit_batch_instantiation.cpp +32 -0
  199. sequenzo/dissimilarity_measures/src/xsimd/test/test_exponential.cpp +202 -0
  200. sequenzo/dissimilarity_measures/src/xsimd/test/test_extract_pair.cpp +92 -0
  201. sequenzo/dissimilarity_measures/src/xsimd/test/test_fp_manipulation.cpp +77 -0
  202. sequenzo/dissimilarity_measures/src/xsimd/test/test_gnu_source.cpp +30 -0
  203. sequenzo/dissimilarity_measures/src/xsimd/test/test_hyperbolic.cpp +167 -0
  204. sequenzo/dissimilarity_measures/src/xsimd/test/test_load_store.cpp +304 -0
  205. sequenzo/dissimilarity_measures/src/xsimd/test/test_memory.cpp +61 -0
  206. sequenzo/dissimilarity_measures/src/xsimd/test/test_poly_evaluation.cpp +64 -0
  207. sequenzo/dissimilarity_measures/src/xsimd/test/test_power.cpp +184 -0
  208. sequenzo/dissimilarity_measures/src/xsimd/test/test_rounding.cpp +199 -0
  209. sequenzo/dissimilarity_measures/src/xsimd/test/test_select.cpp +101 -0
  210. sequenzo/dissimilarity_measures/src/xsimd/test/test_shuffle.cpp +760 -0
  211. sequenzo/dissimilarity_measures/src/xsimd/test/test_sum.cpp +4 -0
  212. sequenzo/dissimilarity_measures/src/xsimd/test/test_sum.hpp +34 -0
  213. sequenzo/dissimilarity_measures/src/xsimd/test/test_traits.cpp +172 -0
  214. sequenzo/dissimilarity_measures/src/xsimd/test/test_trigonometric.cpp +208 -0
  215. sequenzo/dissimilarity_measures/src/xsimd/test/test_utils.hpp +611 -0
  216. sequenzo/dissimilarity_measures/src/xsimd/test/test_wasm/test_wasm_playwright.py +123 -0
  217. sequenzo/dissimilarity_measures/src/xsimd/test/test_xsimd_api.cpp +1460 -0
  218. sequenzo/dissimilarity_measures/utils/__init__.py +16 -0
  219. sequenzo/dissimilarity_measures/utils/get_LCP_length_for_2_seq.py +44 -0
  220. sequenzo/dissimilarity_measures/utils/get_sm_trate_substitution_cost_matrix.cpython-310-darwin.so +0 -0
  221. sequenzo/dissimilarity_measures/utils/seqconc.cpython-310-darwin.so +0 -0
  222. sequenzo/dissimilarity_measures/utils/seqdss.cpython-310-darwin.so +0 -0
  223. sequenzo/dissimilarity_measures/utils/seqdur.cpython-310-darwin.so +0 -0
  224. sequenzo/dissimilarity_measures/utils/seqlength.cpython-310-darwin.so +0 -0
  225. sequenzo/multidomain/__init__.py +23 -0
  226. sequenzo/multidomain/association_between_domains.py +311 -0
  227. sequenzo/multidomain/cat.py +597 -0
  228. sequenzo/multidomain/combt.py +519 -0
  229. sequenzo/multidomain/dat.py +81 -0
  230. sequenzo/multidomain/idcd.py +139 -0
  231. sequenzo/multidomain/linked_polyad.py +292 -0
  232. sequenzo/openmp_setup.py +233 -0
  233. sequenzo/prefix_tree/__init__.py +62 -0
  234. sequenzo/prefix_tree/hub.py +114 -0
  235. sequenzo/prefix_tree/individual_level_indicators.py +1321 -0
  236. sequenzo/prefix_tree/spell_individual_level_indicators.py +580 -0
  237. sequenzo/prefix_tree/spell_level_indicators.py +297 -0
  238. sequenzo/prefix_tree/system_level_indicators.py +544 -0
  239. sequenzo/prefix_tree/utils.py +54 -0
  240. sequenzo/seqhmm/__init__.py +95 -0
  241. sequenzo/seqhmm/advanced_optimization.py +305 -0
  242. sequenzo/seqhmm/bootstrap.py +411 -0
  243. sequenzo/seqhmm/build_hmm.py +142 -0
  244. sequenzo/seqhmm/build_mhmm.py +136 -0
  245. sequenzo/seqhmm/build_nhmm.py +121 -0
  246. sequenzo/seqhmm/fit_mhmm.py +62 -0
  247. sequenzo/seqhmm/fit_model.py +61 -0
  248. sequenzo/seqhmm/fit_nhmm.py +76 -0
  249. sequenzo/seqhmm/formulas.py +289 -0
  250. sequenzo/seqhmm/forward_backward_nhmm.py +276 -0
  251. sequenzo/seqhmm/gradients_nhmm.py +306 -0
  252. sequenzo/seqhmm/hmm.py +291 -0
  253. sequenzo/seqhmm/mhmm.py +314 -0
  254. sequenzo/seqhmm/model_comparison.py +238 -0
  255. sequenzo/seqhmm/multichannel_em.py +282 -0
  256. sequenzo/seqhmm/multichannel_utils.py +138 -0
  257. sequenzo/seqhmm/nhmm.py +270 -0
  258. sequenzo/seqhmm/nhmm_utils.py +191 -0
  259. sequenzo/seqhmm/predict.py +137 -0
  260. sequenzo/seqhmm/predict_mhmm.py +142 -0
  261. sequenzo/seqhmm/simulate.py +878 -0
  262. sequenzo/seqhmm/utils.py +218 -0
  263. sequenzo/seqhmm/visualization.py +910 -0
  264. sequenzo/sequence_characteristics/__init__.py +40 -0
  265. sequenzo/sequence_characteristics/complexity_index.py +49 -0
  266. sequenzo/sequence_characteristics/overall_cross_sectional_entropy.py +220 -0
  267. sequenzo/sequence_characteristics/plot_characteristics.py +593 -0
  268. sequenzo/sequence_characteristics/simple_characteristics.py +311 -0
  269. sequenzo/sequence_characteristics/state_frequencies_and_entropy_per_sequence.py +39 -0
  270. sequenzo/sequence_characteristics/turbulence.py +155 -0
  271. sequenzo/sequence_characteristics/variance_of_spell_durations.py +86 -0
  272. sequenzo/sequence_characteristics/within_sequence_entropy.py +43 -0
  273. sequenzo/suffix_tree/__init__.py +66 -0
  274. sequenzo/suffix_tree/hub.py +114 -0
  275. sequenzo/suffix_tree/individual_level_indicators.py +1679 -0
  276. sequenzo/suffix_tree/spell_individual_level_indicators.py +493 -0
  277. sequenzo/suffix_tree/spell_level_indicators.py +248 -0
  278. sequenzo/suffix_tree/system_level_indicators.py +535 -0
  279. sequenzo/suffix_tree/utils.py +56 -0
  280. sequenzo/version_check.py +283 -0
  281. sequenzo/visualization/__init__.py +29 -0
  282. sequenzo/visualization/plot_mean_time.py +222 -0
  283. sequenzo/visualization/plot_modal_state.py +276 -0
  284. sequenzo/visualization/plot_most_frequent_sequences.py +147 -0
  285. sequenzo/visualization/plot_relative_frequency.py +405 -0
  286. sequenzo/visualization/plot_sequence_index.py +1175 -0
  287. sequenzo/visualization/plot_single_medoid.py +153 -0
  288. sequenzo/visualization/plot_state_distribution.py +651 -0
  289. sequenzo/visualization/plot_transition_matrix.py +190 -0
  290. sequenzo/visualization/utils/__init__.py +23 -0
  291. sequenzo/visualization/utils/utils.py +310 -0
  292. sequenzo/with_event_history_analysis/__init__.py +35 -0
  293. sequenzo/with_event_history_analysis/sequence_analysis_multi_state_model.py +850 -0
  294. sequenzo/with_event_history_analysis/sequence_history_analysis.py +283 -0
  295. sequenzo-0.1.31.dist-info/METADATA +286 -0
  296. sequenzo-0.1.31.dist-info/RECORD +299 -0
  297. sequenzo-0.1.31.dist-info/WHEEL +5 -0
  298. sequenzo-0.1.31.dist-info/licenses/LICENSE +28 -0
  299. sequenzo-0.1.31.dist-info/top_level.txt +2 -0
@@ -0,0 +1,139 @@
1
+ """
2
+ @Author : Yuqi Liang 梁彧祺
3
+ @File : idcd.py
4
+ @Time : 15/04/2025 16:38
5
+ @Desc :
6
+ IDCD strategy for multidomain sequence analysis in Python, with custom time, states, and labels.
7
+ """
8
+ from typing import List, Dict
9
+ import pandas as pd
10
+ from sequenzo.define_sequence_data import SequenceData
11
+
12
+
13
+ def _generate_combined_sequence_from_csv(csv_paths: List[str],
14
+ time_cols: List[str],
15
+ id_col: str = "id") -> pd.DataFrame:
16
+ """
17
+ Load multiple CSVs, extract time sequences, and combine into a multidomain sequence.
18
+ Only observed combinations will be used.
19
+
20
+ Parameters:
21
+ csv_paths: List of file paths, each containing one domain's sequence data
22
+ time_cols: Time columns to extract and align
23
+ id_col: ID column to align on
24
+
25
+ Returns:
26
+ combined_df: DataFrame with combined state sequences
27
+
28
+ Raises:
29
+ ValueError: If any CSV is missing required columns
30
+ """
31
+ import os
32
+ domain_dfs = []
33
+
34
+ for idx, path in enumerate(csv_paths):
35
+ try:
36
+ df = pd.read_csv(path)
37
+ except Exception as e:
38
+ raise ValueError(f"Failed to read CSV at '{path}': {str(e)}")
39
+
40
+ # Check if ID column exists
41
+ if id_col not in df.columns:
42
+ raise ValueError(
43
+ f"Missing ID column '{id_col}' in file: {path}\n"
44
+ f"Available columns: {list(df.columns)}"
45
+ )
46
+
47
+ # Check if all time columns exist
48
+ missing_cols = [col for col in time_cols if col not in df.columns]
49
+ if missing_cols:
50
+ raise ValueError(
51
+ f"Missing time columns {missing_cols} in file: {path}\n"
52
+ f"Available columns: {list(df.columns)}"
53
+ )
54
+
55
+ df = df.copy()
56
+ df.sort_values(by=id_col, inplace=True)
57
+ df.reset_index(drop=True, inplace=True)
58
+ domain_dfs.append(df)
59
+
60
+ # Combine states row-wise
61
+ combined_matrix = []
62
+ for i in range(domain_dfs[0].shape[0]):
63
+ row = []
64
+ for t in time_cols:
65
+ combo = '+'.join(str(df.at[i, t]) for df in domain_dfs)
66
+ row.append(combo)
67
+ combined_matrix.append(row)
68
+
69
+ combined_df = pd.DataFrame(combined_matrix, columns=time_cols)
70
+ combined_df.insert(0, id_col, domain_dfs[0][id_col].values)
71
+
72
+ return combined_df
73
+
74
+
75
+ def create_idcd_sequence_from_csvs(
76
+ csv_paths: List[str],
77
+ time_cols: List[str],
78
+ id_col: str = "id",
79
+ domain_state_labels: List[Dict] = None
80
+ ) -> SequenceData:
81
+ """
82
+ Create IDCD-style SequenceData from multiple CSVs.
83
+ Combines real observed joint states and builds sequence data.
84
+
85
+ Parameters:
86
+ - csv_paths: List of paths to domain CSVs
87
+ - time_cols: List of time column names to use
88
+ - id_col: ID column name
89
+ - domain_state_labels: List of dictionaries mapping raw state values to labels for each domain
90
+
91
+ Returns:
92
+ - SequenceData object with expanded alphabet of observed joint states
93
+ """
94
+ combined_df = _generate_combined_sequence_from_csv(csv_paths, time_cols, id_col=id_col)
95
+
96
+ # Get observed states only
97
+ flat_vals = combined_df[time_cols].values.ravel()
98
+ observed_states = pd.Series(flat_vals).value_counts()
99
+ proportions = observed_states / len(flat_vals) * 100
100
+
101
+ # Construct label mapping if provided
102
+ if domain_state_labels:
103
+ pretty_labels = []
104
+ for state in observed_states.index:
105
+ parts = state.split("+")
106
+ label_parts = []
107
+ for i, token in enumerate(parts):
108
+ try:
109
+ key = int(token) if token.isdigit() else token
110
+ label = domain_state_labels[i].get(key, str(token))
111
+ except Exception:
112
+ label = str(token)
113
+ label_parts.append(label)
114
+ pretty_labels.append(' + '.join(label_parts)) # 更清晰的拼接格式
115
+ else:
116
+ pretty_labels = observed_states.index.tolist()
117
+
118
+ # Print frequency + proportion table
119
+ freq_table = pd.DataFrame({
120
+ "State": observed_states.index,
121
+ "Label": pretty_labels,
122
+ "Frequency": observed_states.values,
123
+ "Proportion (%)": proportions.round(2)
124
+ })
125
+
126
+ print("\n[IDCD] Observed Combined States Frequency Table:")
127
+ print(freq_table.to_string(index=False))
128
+
129
+ return SequenceData(
130
+ data=combined_df,
131
+ time=time_cols,
132
+ states=observed_states.index.tolist(),
133
+ labels=pretty_labels,
134
+ id_col=id_col
135
+ )
136
+
137
+
138
+
139
+
@@ -0,0 +1,292 @@
1
+ """
2
+ @Author : Yuqi Liang 梁彧祺
3
+ @File : linked_polyad.py
4
+ @Time : 28/04/2025 21:19
5
+ @Desc :
6
+ This module implements the full Python version of Tim Liao and Gilbert Ritschard's
7
+ seqpolyads function (R version 1.0.2, 29.12.20) for linked polyadic sequence analysis.
8
+
9
+ Provided functionalities:
10
+ 1. Customizable pairwise weighting (pair_weights)
11
+ 2. Support for role-specific weights (role_weights)
12
+ 3. Support for weighted sampling (weights)
13
+ 4. Randomization method selection: a=1 (resample sequences), a=2 (resample states)
14
+ 5. Multi-core parallel processing (n_jobs)
15
+ 6. Full reproducibility via random_seed control
16
+ 7. Outputs include observed distances, randomized distances, U, V, V>0.95 dummy, and mean observed/random distances
17
+
18
+ All calculations faithfully replicate the logic and outputs of the original R implementation.
19
+
20
+ Note:
21
+ You may encounter the following error during execution, especially when running the script inside PyCharm:
22
+
23
+ Traceback (most recent call last):
24
+ File "/Applications/PyCharm.app/Contents/plugins/python/helpers/pydev/_pydevd_bundle/pydevd_comm.py", line 293, in _on_run
25
+ r = self.sock.recv(1024)
26
+ OSError: [Errno 9] Bad file descriptor
27
+ This error is related to PyCharm's debugger trying to manage communication sockets while multiprocessing or background progress bars (like tqdm) are active.
28
+ It does not affect the actual computation or results of the linked_polyad function. You can safely ignore it.
29
+
30
+ To suppress it or avoid seeing it:
31
+
32
+ Run the script outside the PyCharm debugger (e.g., from terminal or using “Run” instead of “Debug”).
33
+
34
+ Alternatively, disable progress bars or multiprocessing (e.g., set n_jobs=1 and disable=True in tqdm, if available in the function).
35
+ """
36
+ import numpy as np
37
+ import random
38
+ from typing import List, Dict, Union, Tuple, Any
39
+ from tqdm import tqdm
40
+ from joblib import Parallel, delayed
41
+ import pandas as pd
42
+ from sequenzo.dissimilarity_measures import get_distance_matrix
43
+ from sequenzo.define_sequence_data import SequenceData
44
+
45
+ import warnings
46
+
47
+ warnings.filterwarnings("ignore", category=UserWarning)
48
+
49
+ import multiprocessing
50
+ import platform
51
+
52
+ if platform.system() != "Windows":
53
+ multiprocessing.set_start_method("fork", force=True)
54
+
55
+
56
+ def linked_polyadic_sequence_analysis(seqlist: List[SequenceData],
57
+ a: int = 1,
58
+ method: str = "OM",
59
+ distance_parameters: dict = None,
60
+ weights: np.ndarray = None,
61
+ rand_weight_type: int = 1,
62
+ role_weights: List[float] = None,
63
+ pair_weights: np.ndarray = None,
64
+ T: int = 1000,
65
+ random_seed: int = 36963,
66
+ replace: bool = True,
67
+ n_jobs: int = 1,
68
+ verbose: bool = True,
69
+ return_df: bool = False,
70
+ return_merged_seqdata: bool = False) -> Union[Dict,
71
+ pd.DataFrame,
72
+ Tuple[Dict, SequenceData],
73
+ Tuple[pd.DataFrame, SequenceData]
74
+ ]:
75
+ """
76
+ Calculate U and V statistics for linked polyadic sequence data.
77
+
78
+ Provided functionalities:
79
+ 1. Customizable pairwise weighting (pair_weights)
80
+ 2. Support for role-specific weights (role_weights)
81
+ 3. Support for weighted sampling (weights)
82
+ 4. Randomization method selection: a=1 (resample sequences), a=2 (resample states)
83
+ 5. Multi-core parallel processing (n_jobs)
84
+ 6. Full reproducibility via random_seed control
85
+ 7. Outputs include (1) merged data, and (2) observed distances, randomized distances, U, V, V>0.95 dummy, and mean observed/random distances
86
+
87
+ :param seqlist: List of SequenceData objects to analyze.
88
+ :param a: Randomization type. 1 = resample sequences; 2 = resample states within sequences.
89
+ :param method: Distance measure method ('HAM', 'OM', 'CHI2', etc.).
90
+ :param distance_parameters: Dictionary of additional keyword arguments for distance calculation.
91
+ :param weights: Sampling weights for sequences when generating random polyads.
92
+ :param rand_weight_type: Strategy for computing randomization weights (1 = uniform, 2 = sample-weight-based).
93
+ :param role_weights: Role-specific importance weights for different sequence sources.
94
+ :param pair_weights: Pairwise weights for distance averaging.
95
+ :param T: Number of randomizations performed.
96
+ :param random_seed: Seed for random number generator to ensure reproducibility.
97
+ :param replace: Whether to sample with replacement during randomization.
98
+ :param n_jobs: Number of parallel workers for randomization; set to -1 to use all CPUs.
99
+ :param verbose: Whether to display a progress bar during randomization.
100
+ :param return_df: If True, return results as a pandas DataFrame (ObservedDist, U, V, V>0.95).
101
+ :param return_merged_seqdata: If True, also return the merged SequenceData object used internally for distance computation.
102
+
103
+ Dictionary containing:
104
+ - 'mean.dist': Mean observed and random distances
105
+ - 'U': Array of U values (mean random distance - observed distance)
106
+ - 'V': Array of V values (proportion observed < random)
107
+ - 'V.95': Binary array where V > 0.95
108
+ - 'observed.dist': Array of observed polyadic distances
109
+ - 'random.dist': Array of randomized polyadic distances
110
+
111
+ - If `return_df=True` and `return_merged_seqdata=False`: returns a pandas DataFrame with columns:
112
+ [ObservedDist, U, V, V>0.95]
113
+
114
+ - If `return_df=False` and `return_merged_seqdata=True`: returns a tuple:
115
+ (result_dict, merged_seqdata)
116
+
117
+ - If `return_df=True` and `return_merged_seqdata=True`: returns a tuple:
118
+ (result_df, merged_seqdata)
119
+ """
120
+ if distance_parameters is None:
121
+ distance_parameters = {}
122
+
123
+ print("[Step 1] Validating sequence data inputs...")
124
+ P = len(seqlist)
125
+ n = seqlist[0].n_sequences
126
+ seq_length = seqlist[0].n_steps
127
+
128
+ for sd in seqlist:
129
+ assert isinstance(sd, SequenceData)
130
+ assert sd.n_sequences == n
131
+ assert sd.n_steps == seq_length
132
+
133
+ if role_weights is None:
134
+ role_weights = [1.0 / P] * P
135
+ role_weights = np.array(role_weights)
136
+
137
+ if pair_weights is None:
138
+ pair_weights = np.ones(int(P * (P - 1) / 2))
139
+
140
+ print(f"[Step 2] Constructing merged polyadic sequence data... (Total polyads: {n}, Roles per polyad: {P})")
141
+
142
+ tagged_dfs = []
143
+ for i, sd in enumerate(seqlist):
144
+ df = sd.to_dataframe().copy()
145
+ df["__id__"] = [f"R{i}_{j}" for j in range(sd.n_sequences)]
146
+ tagged_dfs.append(df)
147
+
148
+ data_concat = pd.concat(tagged_dfs, axis=0).reset_index(drop=True)
149
+ labels = seqlist[0].labels
150
+
151
+ merged_seqdata = SequenceData(
152
+ data=data_concat,
153
+ time=seqlist[0].time,
154
+ states=[i for i in range(1, len(seqlist[0].states) + 1)],
155
+ labels=labels,
156
+ id_col="__id__"
157
+ )
158
+
159
+ print("[Step 3] Computing all pairwise dissimilarities using method:", method)
160
+ alldist = np.asarray(get_distance_matrix(merged_seqdata, method=method, **distance_parameters))
161
+ print(" -> Dissimilarity matrix shape:", alldist.shape)
162
+
163
+ cj = np.array([n * p for p in range(P)])
164
+
165
+ if weights is None:
166
+ weights = np.ones(n) / n
167
+
168
+ def weighted_mean(mat):
169
+ return np.average(mat[np.triu_indices(P, 1)], weights=pair_weights)
170
+
171
+ l_m = np.zeros((T, P), dtype=int)
172
+
173
+ print(f"[Step 4] Starting {T} randomized polyad simulations (randomization type: a={a})...")
174
+
175
+ def random_sample_once(i):
176
+ local_rng = np.random.default_rng(random_seed + i)
177
+ sampled = local_rng.choice(n, size=P, replace=replace, p=weights)
178
+ l_m[i] = sampled
179
+ sample_indices = cj + sampled
180
+
181
+ if a == 1:
182
+ mat = alldist[np.ix_(sample_indices, sample_indices)]
183
+ return weighted_mean(mat)
184
+ elif a == 2:
185
+ df = merged_seqdata.to_dataframe().drop(columns="__id__")
186
+ sampled_df = df.iloc[sample_indices].reset_index(drop=True)
187
+ shuffled = sampled_df.apply(lambda row: local_rng.choice(row, size=seq_length, replace=replace),
188
+ axis=1, result_type="broadcast")
189
+ shuffled["__id__"] = [f"Rand_{i}_{j}" for j in range(len(shuffled))]
190
+ seq_shuffled = SequenceData(
191
+ data=shuffled,
192
+ time=merged_seqdata.time,
193
+ states=merged_seqdata.states,
194
+ labels=merged_seqdata.labels,
195
+ id_col="__id__"
196
+ )
197
+ dmat = np.asarray(get_distance_matrix(seq_shuffled, method=method, **distance_parameters))
198
+ return weighted_mean(dmat)
199
+ else:
200
+ raise ValueError("Invalid randomization type 'a'. Should be 1 or 2.")
201
+
202
+ iterator = tqdm(range(T), desc="-> Randomizing polyads") if verbose else range(T)
203
+ random_dists = Parallel(n_jobs=n_jobs)(delayed(random_sample_once)(i) for i in iterator)
204
+ random_dists = np.array(random_dists)
205
+
206
+ print("[Step 5] Computing observed distances for each polyad...")
207
+ observed_dists = []
208
+ for i in range(n):
209
+ indices = [i + n * p for p in range(P)]
210
+ mat = alldist[np.ix_(indices, indices)]
211
+ observed_dists.append(weighted_mean(mat))
212
+ observed_dists = np.array(observed_dists)
213
+
214
+ print("[Step 6] Calculating U, V, and significance threshold V > 0.95...")
215
+
216
+ if rand_weight_type == 2:
217
+ p_weights = np.array([np.sum(weights[sampled]) for sampled in l_m])
218
+ else:
219
+ p_weights = 1.0
220
+
221
+ l_weights = np.zeros(T)
222
+ for i in range(T):
223
+ sampled = l_m[i]
224
+ l_weights[i] = np.sum(weights[sampled] * role_weights / p_weights[i] if rand_weight_type == 2 else p_weights)
225
+ l_weights /= np.sum(l_weights)
226
+
227
+ mean_rand_dist = np.sum(random_dists * l_weights)
228
+ U = mean_rand_dist - observed_dists
229
+ V = np.array([np.sum((observed_dists[i] < random_dists) * l_weights) for i in range(n)])
230
+ V_95 = (V > 0.95).astype(int)
231
+
232
+ print(
233
+ f"[Step 7] Final summary: mean observed = {np.mean(observed_dists):.2f}, mean randomized = {mean_rand_dist:.2f}")
234
+ print(f" -> Significant polyads (V > 0.95): {np.sum(V_95)} / {n}")
235
+
236
+ result = {
237
+ "mean.dist": {"Obs": np.mean(observed_dists), "Rand": mean_rand_dist},
238
+ "U": U,
239
+ "V": V,
240
+ "V.95": V_95,
241
+ "observed.dist": observed_dists,
242
+ "random.dist": random_dists
243
+ }
244
+
245
+ if return_df and return_merged_seqdata:
246
+ result_df = pd.DataFrame({
247
+ 'ObservedDist': result['observed.dist'],
248
+ 'U': result['U'],
249
+ 'V': result['V'],
250
+ 'V>0.95': result['V.95']
251
+ }, index=pd.RangeIndex(start=1, stop=len(result['U']) + 1, name="PolyadID"))
252
+ print("[Step 8] Returning result_df and merged_seqdata.")
253
+ print("[Program Completed] Please continue to use result_df and merged_seqdata for further analysis.")
254
+ return result_df, merged_seqdata
255
+
256
+ elif return_df:
257
+ print("[Step 8] Returning result_dict only.")
258
+ print("[Info] Program completed successfully.")
259
+ print("[Info] You can now explore the result_dict for detailed statistics (U, V, observed distances, etc.).")
260
+ print("[Info] For further analysis like clustering or sequence visualization,")
261
+ print(" please re-run this function with `return_merged_seqdata=True`.")
262
+ print(" Example:")
263
+ print(" result, merged_seq = linked_polyadic_sequence_analysis(..., return_merged_seqdata=True)")
264
+ return pd.DataFrame({
265
+ 'ObservedDist': result['observed.dist'],
266
+ 'U': result['U'],
267
+ 'V': result['V'],
268
+ 'V>0.95': result['V.95']
269
+ }, index=pd.RangeIndex(start=1, stop=len(result['U']) + 1, name="PolyadID"))
270
+
271
+ elif return_merged_seqdata:
272
+ print("[Step 8] Returning result_dict and merged_seqdata.")
273
+ print("[Program Completed] Please continue to use result_df and merged_seqdata for further analysis.")
274
+ return result, merged_seqdata
275
+
276
+ else:
277
+ raise ValueError(
278
+ "[Error] Both `return_df` and `return_merged_seqdata` are set to False.\n"
279
+ " This configuration will omit both summary statistics (U, V, etc.) and the merged sequence data,\n"
280
+ " which are essential for interpretation and further analysis.\n\n"
281
+ " Recommendation:\n"
282
+ " Set `return_df=True` to obtain the polyad-level summary statistics (ObservedDist, U, V, V>0.95),\n"
283
+ " AND/OR set `return_merged_seqdata=True` to obtain the merged SequenceData for clustering, visualization, etc.\n\n"
284
+ " Example usage:\n"
285
+ " result_df, merged_seq = linked_polyadic_sequence_analysis(...,\n"
286
+ " return_df=True,\n"
287
+ " return_merged_seqdata=True)\n"
288
+ )
289
+
290
+
291
+ if __name__ == '__main__':
292
+ pass
@@ -0,0 +1,233 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ @Author : Yuqi Liang 梁彧祺
4
+ @File : openmp_setup.py
5
+ @Time : 07/10/2025 10:42
6
+ @Desc :
7
+
8
+ OpenMP Setup for Apple Silicon Macs
9
+
10
+ This module provides automatic OpenMP dependency management for Apple Silicon Macs.
11
+ It ensures that libomp is available for parallel computation without requiring
12
+ manual user intervention.
13
+ """
14
+
15
+ import sys
16
+ import os
17
+ import subprocess
18
+ import platform
19
+ import ctypes
20
+ from pathlib import Path
21
+
22
+
23
+ def check_libomp_availability():
24
+ """
25
+ Check if libomp is available on the system.
26
+
27
+ Returns:
28
+ bool: True if libomp is available, False otherwise
29
+ """
30
+ try:
31
+ # Try to load libomp directly
32
+ ctypes.CDLL('libomp.dylib')
33
+ return True
34
+ except OSError:
35
+ pass
36
+
37
+ # Try common Homebrew paths
38
+ homebrew_paths = [
39
+ '/opt/homebrew/lib/libomp.dylib', # Apple Silicon
40
+ '/usr/local/lib/libomp.dylib', # Intel Mac
41
+ ]
42
+
43
+ for path in homebrew_paths:
44
+ if os.path.exists(path):
45
+ try:
46
+ ctypes.CDLL(path)
47
+ return True
48
+ except OSError:
49
+ continue
50
+
51
+ return False
52
+
53
+
54
+ def check_homebrew_available():
55
+ """
56
+ Check if Homebrew is available on the system.
57
+
58
+ Returns:
59
+ bool: True if Homebrew is available, False otherwise
60
+ """
61
+ try:
62
+ subprocess.run(['brew', '--version'],
63
+ stdout=subprocess.DEVNULL,
64
+ stderr=subprocess.DEVNULL,
65
+ check=True)
66
+ return True
67
+ except (subprocess.CalledProcessError, FileNotFoundError):
68
+ return False
69
+
70
+
71
+ def install_libomp_via_homebrew():
72
+ """
73
+ Install libomp via Homebrew.
74
+
75
+ Returns:
76
+ bool: True if installation successful, False otherwise
77
+ """
78
+ try:
79
+ print("🔧 Installing libomp via Homebrew...")
80
+ result = subprocess.run(['brew', 'install', 'libomp'],
81
+ check=True,
82
+ stdout=subprocess.PIPE,
83
+ stderr=subprocess.PIPE)
84
+ print("[>] libomp installed successfully!")
85
+ return True
86
+ except subprocess.CalledProcessError as e:
87
+ print(f"[>] libomp installation failed: {e}")
88
+ return False
89
+ except Exception as e:
90
+ print(f"[>] Error during installation: {e}")
91
+ return False
92
+
93
+
94
+ def setup_openmp_environment():
95
+ """
96
+ Set up OpenMP environment variables for Apple Silicon.
97
+
98
+ Returns:
99
+ bool: True if setup successful, False otherwise
100
+ """
101
+ try:
102
+ # Get Homebrew prefix
103
+ result = subprocess.run(['brew', '--prefix'],
104
+ capture_output=True, text=True, check=True)
105
+ homebrew_prefix = result.stdout.strip()
106
+
107
+ # Set environment variables
108
+ lib_path = f"{homebrew_prefix}/lib"
109
+ include_path = f"{homebrew_prefix}/include"
110
+
111
+ os.environ['DYLD_LIBRARY_PATH'] = f"{lib_path}:{os.environ.get('DYLD_LIBRARY_PATH', '')}"
112
+ os.environ['LDFLAGS'] = f"-L{lib_path} {os.environ.get('LDFLAGS', '')}"
113
+ os.environ['CPPFLAGS'] = f"-I{include_path} {os.environ.get('CPPFLAGS', '')}"
114
+
115
+ print(f"[>] OpenMP environment variables set")
116
+ print(f" - Library path: {lib_path}")
117
+ print(f" - Include path: {include_path}")
118
+ return True
119
+
120
+ except Exception as e:
121
+ print(f"[>] Failed to set environment variables: {e}")
122
+ return False
123
+
124
+
125
+ def ensure_openmp_support():
126
+ """
127
+ Ensure OpenMP support is available on Apple Silicon Macs.
128
+ This function handles the complete setup process.
129
+
130
+ Returns:
131
+ bool: True if OpenMP is available, False otherwise
132
+ """
133
+ # Only run on macOS
134
+ if sys.platform != 'darwin':
135
+ return True
136
+
137
+ # Only run on Apple Silicon
138
+ if platform.machine() != 'arm64':
139
+ return True
140
+
141
+ # Check if we're in a conda environment (don't interfere)
142
+ if os.environ.get('CONDA_DEFAULT_ENV'):
143
+ print("[>] Detected Conda environment, skipping OpenMP auto-setup")
144
+ return True
145
+
146
+ print("[>] Detected Apple Silicon Mac, checking OpenMP support...")
147
+
148
+ # Check if libomp is already available
149
+ if check_libomp_availability():
150
+ print("[>] OpenMP support is available")
151
+ return True
152
+
153
+ # Check if Homebrew is available
154
+ if not check_homebrew_available():
155
+ print("""
156
+ [>] OpenMP Dependency Detection
157
+
158
+ On Apple Silicon Mac, Sequenzo requires OpenMP support for parallel computation.
159
+
160
+ Please run the following command to install OpenMP support:
161
+ brew install libomp
162
+
163
+ If you don't have Homebrew installed, please visit https://brew.sh to install Homebrew first.
164
+ """)
165
+ return False
166
+
167
+ # Check if libomp is already installed via Homebrew
168
+ try:
169
+ subprocess.run(['brew', 'list', 'libomp'],
170
+ stdout=subprocess.DEVNULL,
171
+ stderr=subprocess.DEVNULL,
172
+ check=True)
173
+ print("[>] libomp is already installed via Homebrew")
174
+
175
+ # Set up environment variables
176
+ setup_openmp_environment()
177
+ return True
178
+ except subprocess.CalledProcessError:
179
+ pass # libomp not installed, continue with installation
180
+
181
+ # Attempt to install libomp automatically
182
+ if install_libomp_via_homebrew():
183
+ # Set up environment variables after installation
184
+ setup_openmp_environment()
185
+ return True
186
+ else:
187
+ print("""
188
+ [>] Automatic OpenMP installation failed
189
+
190
+ Please manually run the following command:
191
+ brew install libomp
192
+
193
+ After installation, please restart Python or re-import sequenzo.
194
+ """)
195
+ return False
196
+
197
+
198
+ def get_openmp_status():
199
+ """
200
+ Get the current OpenMP status and provide helpful information.
201
+
202
+ Returns:
203
+ dict: Status information about OpenMP support
204
+ """
205
+ status = {
206
+ 'platform': sys.platform,
207
+ 'architecture': platform.machine(),
208
+ 'is_apple_silicon': sys.platform == 'darwin' and platform.machine() == 'arm64',
209
+ 'libomp_available': check_libomp_availability(),
210
+ 'homebrew_available': check_homebrew_available(),
211
+ 'conda_environment': bool(os.environ.get('CONDA_DEFAULT_ENV')),
212
+ }
213
+
214
+ return status
215
+
216
+
217
+ if __name__ == "__main__":
218
+ # Run the setup when called directly
219
+ success = ensure_openmp_support()
220
+ if success:
221
+ print("[>] OpenMP support is ready!")
222
+ else:
223
+ print("[>] OpenMP support unavailable, will use serial computation")
224
+
225
+ # Print status information
226
+ status = get_openmp_status()
227
+ print(f"\n[>] System Status:")
228
+ print(f" - Platform: {status['platform']}")
229
+ print(f" - Architecture: {status['architecture']}")
230
+ print(f" - Apple Silicon: {status['is_apple_silicon']}")
231
+ print(f" - libomp available: {status['libomp_available']}")
232
+ print(f" - Homebrew available: {status['homebrew_available']}")
233
+ print(f" - Conda environment: {status['conda_environment']}")