sequenzo 0.1.31__cp310-cp310-macosx_10_9_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (299) hide show
  1. _sequenzo_fastcluster.cpython-310-darwin.so +0 -0
  2. sequenzo/__init__.py +349 -0
  3. sequenzo/big_data/__init__.py +12 -0
  4. sequenzo/big_data/clara/__init__.py +26 -0
  5. sequenzo/big_data/clara/clara.py +476 -0
  6. sequenzo/big_data/clara/utils/__init__.py +27 -0
  7. sequenzo/big_data/clara/utils/aggregatecases.py +92 -0
  8. sequenzo/big_data/clara/utils/davies_bouldin.py +91 -0
  9. sequenzo/big_data/clara/utils/get_weighted_diss.cpython-310-darwin.so +0 -0
  10. sequenzo/big_data/clara/utils/wfcmdd.py +205 -0
  11. sequenzo/big_data/clara/visualization.py +88 -0
  12. sequenzo/clustering/KMedoids.py +178 -0
  13. sequenzo/clustering/__init__.py +30 -0
  14. sequenzo/clustering/clustering_c_code.cpython-310-darwin.so +0 -0
  15. sequenzo/clustering/hierarchical_clustering.py +1256 -0
  16. sequenzo/clustering/sequenzo_fastcluster/fastcluster.py +495 -0
  17. sequenzo/clustering/sequenzo_fastcluster/src/fastcluster.cpp +1877 -0
  18. sequenzo/clustering/sequenzo_fastcluster/src/fastcluster_python.cpp +1264 -0
  19. sequenzo/clustering/src/KMedoid.cpp +263 -0
  20. sequenzo/clustering/src/PAM.cpp +237 -0
  21. sequenzo/clustering/src/PAMonce.cpp +265 -0
  22. sequenzo/clustering/src/cluster_quality.cpp +496 -0
  23. sequenzo/clustering/src/cluster_quality.h +128 -0
  24. sequenzo/clustering/src/cluster_quality_backup.cpp +570 -0
  25. sequenzo/clustering/src/module.cpp +228 -0
  26. sequenzo/clustering/src/weightedinertia.cpp +111 -0
  27. sequenzo/clustering/utils/__init__.py +27 -0
  28. sequenzo/clustering/utils/disscenter.py +122 -0
  29. sequenzo/data_preprocessing/__init__.py +22 -0
  30. sequenzo/data_preprocessing/helpers.py +303 -0
  31. sequenzo/datasets/__init__.py +41 -0
  32. sequenzo/datasets/biofam.csv +2001 -0
  33. sequenzo/datasets/biofam_child_domain.csv +2001 -0
  34. sequenzo/datasets/biofam_left_domain.csv +2001 -0
  35. sequenzo/datasets/biofam_married_domain.csv +2001 -0
  36. sequenzo/datasets/chinese_colonial_territories.csv +12 -0
  37. sequenzo/datasets/country_co2_emissions.csv +194 -0
  38. sequenzo/datasets/country_co2_emissions_global_deciles.csv +195 -0
  39. sequenzo/datasets/country_co2_emissions_global_quintiles.csv +195 -0
  40. sequenzo/datasets/country_co2_emissions_local_deciles.csv +195 -0
  41. sequenzo/datasets/country_co2_emissions_local_quintiles.csv +195 -0
  42. sequenzo/datasets/country_gdp_per_capita.csv +194 -0
  43. sequenzo/datasets/dyadic_children.csv +61 -0
  44. sequenzo/datasets/dyadic_parents.csv +61 -0
  45. sequenzo/datasets/mvad.csv +713 -0
  46. sequenzo/datasets/pairfam_activity_by_month.csv +1028 -0
  47. sequenzo/datasets/pairfam_activity_by_year.csv +1028 -0
  48. sequenzo/datasets/pairfam_family_by_month.csv +1028 -0
  49. sequenzo/datasets/pairfam_family_by_year.csv +1028 -0
  50. sequenzo/datasets/political_science_aid_shock.csv +166 -0
  51. sequenzo/datasets/political_science_donor_fragmentation.csv +157 -0
  52. sequenzo/define_sequence_data.py +1400 -0
  53. sequenzo/dissimilarity_measures/__init__.py +31 -0
  54. sequenzo/dissimilarity_measures/c_code.cpython-310-darwin.so +0 -0
  55. sequenzo/dissimilarity_measures/get_distance_matrix.py +762 -0
  56. sequenzo/dissimilarity_measures/get_substitution_cost_matrix.py +246 -0
  57. sequenzo/dissimilarity_measures/src/DHDdistance.cpp +148 -0
  58. sequenzo/dissimilarity_measures/src/LCPdistance.cpp +114 -0
  59. sequenzo/dissimilarity_measures/src/LCPspellDistance.cpp +215 -0
  60. sequenzo/dissimilarity_measures/src/OMdistance.cpp +247 -0
  61. sequenzo/dissimilarity_measures/src/OMspellDistance.cpp +281 -0
  62. sequenzo/dissimilarity_measures/src/__init__.py +0 -0
  63. sequenzo/dissimilarity_measures/src/dist2matrix.cpp +63 -0
  64. sequenzo/dissimilarity_measures/src/dp_utils.h +160 -0
  65. sequenzo/dissimilarity_measures/src/module.cpp +40 -0
  66. sequenzo/dissimilarity_measures/src/setup.py +30 -0
  67. sequenzo/dissimilarity_measures/src/utils.h +25 -0
  68. sequenzo/dissimilarity_measures/src/xsimd/.github/cmake-test/main.cpp +6 -0
  69. sequenzo/dissimilarity_measures/src/xsimd/benchmark/main.cpp +159 -0
  70. sequenzo/dissimilarity_measures/src/xsimd/benchmark/xsimd_benchmark.hpp +565 -0
  71. sequenzo/dissimilarity_measures/src/xsimd/docs/source/conf.py +37 -0
  72. sequenzo/dissimilarity_measures/src/xsimd/examples/mandelbrot.cpp +330 -0
  73. sequenzo/dissimilarity_measures/src/xsimd/examples/pico_bench.hpp +246 -0
  74. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_arithmetic.hpp +266 -0
  75. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_complex.hpp +112 -0
  76. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_details.hpp +323 -0
  77. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_logical.hpp +218 -0
  78. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_math.hpp +2583 -0
  79. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_memory.hpp +880 -0
  80. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_rounding.hpp +72 -0
  81. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_swizzle.hpp +174 -0
  82. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_trigo.hpp +978 -0
  83. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx.hpp +1924 -0
  84. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx2.hpp +1144 -0
  85. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512bw.hpp +656 -0
  86. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512cd.hpp +28 -0
  87. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512dq.hpp +244 -0
  88. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512er.hpp +20 -0
  89. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512f.hpp +2650 -0
  90. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512ifma.hpp +20 -0
  91. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512pf.hpp +20 -0
  92. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vbmi.hpp +77 -0
  93. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vbmi2.hpp +131 -0
  94. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vnni_avx512bw.hpp +20 -0
  95. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vnni_avx512vbmi2.hpp +20 -0
  96. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avxvnni.hpp +20 -0
  97. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_common.hpp +24 -0
  98. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_common_fwd.hpp +77 -0
  99. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_constants.hpp +393 -0
  100. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_emulated.hpp +788 -0
  101. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma3_avx.hpp +93 -0
  102. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma3_avx2.hpp +46 -0
  103. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma3_sse.hpp +97 -0
  104. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma4.hpp +92 -0
  105. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_i8mm_neon64.hpp +17 -0
  106. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_isa.hpp +142 -0
  107. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_neon.hpp +3142 -0
  108. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_neon64.hpp +1543 -0
  109. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_rvv.hpp +1513 -0
  110. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_scalar.hpp +1260 -0
  111. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse2.hpp +2024 -0
  112. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse3.hpp +67 -0
  113. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse4_1.hpp +339 -0
  114. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse4_2.hpp +44 -0
  115. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_ssse3.hpp +186 -0
  116. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sve.hpp +1155 -0
  117. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_vsx.hpp +892 -0
  118. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_wasm.hpp +1780 -0
  119. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_arch.hpp +240 -0
  120. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_config.hpp +484 -0
  121. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_cpuid.hpp +269 -0
  122. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_inline.hpp +27 -0
  123. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/math/xsimd_rem_pio2.hpp +719 -0
  124. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/memory/xsimd_aligned_allocator.hpp +349 -0
  125. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/memory/xsimd_alignment.hpp +91 -0
  126. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_all_registers.hpp +55 -0
  127. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_api.hpp +2765 -0
  128. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx2_register.hpp +44 -0
  129. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512bw_register.hpp +51 -0
  130. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512cd_register.hpp +51 -0
  131. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512dq_register.hpp +51 -0
  132. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512er_register.hpp +51 -0
  133. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512f_register.hpp +77 -0
  134. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512ifma_register.hpp +51 -0
  135. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512pf_register.hpp +51 -0
  136. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512vbmi2_register.hpp +51 -0
  137. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512vbmi_register.hpp +51 -0
  138. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512vnni_avx512bw_register.hpp +54 -0
  139. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512vnni_avx512vbmi2_register.hpp +53 -0
  140. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx_register.hpp +64 -0
  141. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avxvnni_register.hpp +44 -0
  142. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_batch.hpp +1524 -0
  143. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_batch_constant.hpp +300 -0
  144. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_common_arch.hpp +47 -0
  145. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_emulated_register.hpp +80 -0
  146. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_fma3_avx2_register.hpp +50 -0
  147. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_fma3_avx_register.hpp +50 -0
  148. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_fma3_sse_register.hpp +50 -0
  149. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_fma4_register.hpp +50 -0
  150. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_i8mm_neon64_register.hpp +55 -0
  151. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_neon64_register.hpp +55 -0
  152. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_neon_register.hpp +154 -0
  153. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_register.hpp +94 -0
  154. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_rvv_register.hpp +506 -0
  155. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sse2_register.hpp +59 -0
  156. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sse3_register.hpp +49 -0
  157. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sse4_1_register.hpp +48 -0
  158. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sse4_2_register.hpp +48 -0
  159. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_ssse3_register.hpp +48 -0
  160. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sve_register.hpp +156 -0
  161. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_traits.hpp +337 -0
  162. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_utils.hpp +536 -0
  163. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_vsx_register.hpp +77 -0
  164. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_wasm_register.hpp +59 -0
  165. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/xsimd.hpp +75 -0
  166. sequenzo/dissimilarity_measures/src/xsimd/test/architectures/dummy.cpp +7 -0
  167. sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set.cpp +13 -0
  168. sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set_mean.cpp +24 -0
  169. sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set_mean_aligned.cpp +25 -0
  170. sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set_mean_arch_independent.cpp +28 -0
  171. sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set_mean_tag_dispatch.cpp +25 -0
  172. sequenzo/dissimilarity_measures/src/xsimd/test/doc/manipulating_abstract_batches.cpp +7 -0
  173. sequenzo/dissimilarity_measures/src/xsimd/test/doc/manipulating_parametric_batches.cpp +8 -0
  174. sequenzo/dissimilarity_measures/src/xsimd/test/doc/sum.hpp +31 -0
  175. sequenzo/dissimilarity_measures/src/xsimd/test/doc/sum_avx2.cpp +3 -0
  176. sequenzo/dissimilarity_measures/src/xsimd/test/doc/sum_sse2.cpp +3 -0
  177. sequenzo/dissimilarity_measures/src/xsimd/test/doc/writing_vectorized_code.cpp +11 -0
  178. sequenzo/dissimilarity_measures/src/xsimd/test/main.cpp +31 -0
  179. sequenzo/dissimilarity_measures/src/xsimd/test/test_api.cpp +230 -0
  180. sequenzo/dissimilarity_measures/src/xsimd/test/test_arch.cpp +217 -0
  181. sequenzo/dissimilarity_measures/src/xsimd/test/test_basic_math.cpp +183 -0
  182. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch.cpp +1049 -0
  183. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_bool.cpp +508 -0
  184. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_cast.cpp +409 -0
  185. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_complex.cpp +712 -0
  186. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_constant.cpp +286 -0
  187. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_float.cpp +141 -0
  188. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_int.cpp +365 -0
  189. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_manip.cpp +308 -0
  190. sequenzo/dissimilarity_measures/src/xsimd/test/test_bitwise_cast.cpp +222 -0
  191. sequenzo/dissimilarity_measures/src/xsimd/test/test_complex_exponential.cpp +226 -0
  192. sequenzo/dissimilarity_measures/src/xsimd/test/test_complex_hyperbolic.cpp +183 -0
  193. sequenzo/dissimilarity_measures/src/xsimd/test/test_complex_power.cpp +265 -0
  194. sequenzo/dissimilarity_measures/src/xsimd/test/test_complex_trigonometric.cpp +236 -0
  195. sequenzo/dissimilarity_measures/src/xsimd/test/test_conversion.cpp +248 -0
  196. sequenzo/dissimilarity_measures/src/xsimd/test/test_custom_default_arch.cpp +28 -0
  197. sequenzo/dissimilarity_measures/src/xsimd/test/test_error_gamma.cpp +170 -0
  198. sequenzo/dissimilarity_measures/src/xsimd/test/test_explicit_batch_instantiation.cpp +32 -0
  199. sequenzo/dissimilarity_measures/src/xsimd/test/test_exponential.cpp +202 -0
  200. sequenzo/dissimilarity_measures/src/xsimd/test/test_extract_pair.cpp +92 -0
  201. sequenzo/dissimilarity_measures/src/xsimd/test/test_fp_manipulation.cpp +77 -0
  202. sequenzo/dissimilarity_measures/src/xsimd/test/test_gnu_source.cpp +30 -0
  203. sequenzo/dissimilarity_measures/src/xsimd/test/test_hyperbolic.cpp +167 -0
  204. sequenzo/dissimilarity_measures/src/xsimd/test/test_load_store.cpp +304 -0
  205. sequenzo/dissimilarity_measures/src/xsimd/test/test_memory.cpp +61 -0
  206. sequenzo/dissimilarity_measures/src/xsimd/test/test_poly_evaluation.cpp +64 -0
  207. sequenzo/dissimilarity_measures/src/xsimd/test/test_power.cpp +184 -0
  208. sequenzo/dissimilarity_measures/src/xsimd/test/test_rounding.cpp +199 -0
  209. sequenzo/dissimilarity_measures/src/xsimd/test/test_select.cpp +101 -0
  210. sequenzo/dissimilarity_measures/src/xsimd/test/test_shuffle.cpp +760 -0
  211. sequenzo/dissimilarity_measures/src/xsimd/test/test_sum.cpp +4 -0
  212. sequenzo/dissimilarity_measures/src/xsimd/test/test_sum.hpp +34 -0
  213. sequenzo/dissimilarity_measures/src/xsimd/test/test_traits.cpp +172 -0
  214. sequenzo/dissimilarity_measures/src/xsimd/test/test_trigonometric.cpp +208 -0
  215. sequenzo/dissimilarity_measures/src/xsimd/test/test_utils.hpp +611 -0
  216. sequenzo/dissimilarity_measures/src/xsimd/test/test_wasm/test_wasm_playwright.py +123 -0
  217. sequenzo/dissimilarity_measures/src/xsimd/test/test_xsimd_api.cpp +1460 -0
  218. sequenzo/dissimilarity_measures/utils/__init__.py +16 -0
  219. sequenzo/dissimilarity_measures/utils/get_LCP_length_for_2_seq.py +44 -0
  220. sequenzo/dissimilarity_measures/utils/get_sm_trate_substitution_cost_matrix.cpython-310-darwin.so +0 -0
  221. sequenzo/dissimilarity_measures/utils/seqconc.cpython-310-darwin.so +0 -0
  222. sequenzo/dissimilarity_measures/utils/seqdss.cpython-310-darwin.so +0 -0
  223. sequenzo/dissimilarity_measures/utils/seqdur.cpython-310-darwin.so +0 -0
  224. sequenzo/dissimilarity_measures/utils/seqlength.cpython-310-darwin.so +0 -0
  225. sequenzo/multidomain/__init__.py +23 -0
  226. sequenzo/multidomain/association_between_domains.py +311 -0
  227. sequenzo/multidomain/cat.py +597 -0
  228. sequenzo/multidomain/combt.py +519 -0
  229. sequenzo/multidomain/dat.py +81 -0
  230. sequenzo/multidomain/idcd.py +139 -0
  231. sequenzo/multidomain/linked_polyad.py +292 -0
  232. sequenzo/openmp_setup.py +233 -0
  233. sequenzo/prefix_tree/__init__.py +62 -0
  234. sequenzo/prefix_tree/hub.py +114 -0
  235. sequenzo/prefix_tree/individual_level_indicators.py +1321 -0
  236. sequenzo/prefix_tree/spell_individual_level_indicators.py +580 -0
  237. sequenzo/prefix_tree/spell_level_indicators.py +297 -0
  238. sequenzo/prefix_tree/system_level_indicators.py +544 -0
  239. sequenzo/prefix_tree/utils.py +54 -0
  240. sequenzo/seqhmm/__init__.py +95 -0
  241. sequenzo/seqhmm/advanced_optimization.py +305 -0
  242. sequenzo/seqhmm/bootstrap.py +411 -0
  243. sequenzo/seqhmm/build_hmm.py +142 -0
  244. sequenzo/seqhmm/build_mhmm.py +136 -0
  245. sequenzo/seqhmm/build_nhmm.py +121 -0
  246. sequenzo/seqhmm/fit_mhmm.py +62 -0
  247. sequenzo/seqhmm/fit_model.py +61 -0
  248. sequenzo/seqhmm/fit_nhmm.py +76 -0
  249. sequenzo/seqhmm/formulas.py +289 -0
  250. sequenzo/seqhmm/forward_backward_nhmm.py +276 -0
  251. sequenzo/seqhmm/gradients_nhmm.py +306 -0
  252. sequenzo/seqhmm/hmm.py +291 -0
  253. sequenzo/seqhmm/mhmm.py +314 -0
  254. sequenzo/seqhmm/model_comparison.py +238 -0
  255. sequenzo/seqhmm/multichannel_em.py +282 -0
  256. sequenzo/seqhmm/multichannel_utils.py +138 -0
  257. sequenzo/seqhmm/nhmm.py +270 -0
  258. sequenzo/seqhmm/nhmm_utils.py +191 -0
  259. sequenzo/seqhmm/predict.py +137 -0
  260. sequenzo/seqhmm/predict_mhmm.py +142 -0
  261. sequenzo/seqhmm/simulate.py +878 -0
  262. sequenzo/seqhmm/utils.py +218 -0
  263. sequenzo/seqhmm/visualization.py +910 -0
  264. sequenzo/sequence_characteristics/__init__.py +40 -0
  265. sequenzo/sequence_characteristics/complexity_index.py +49 -0
  266. sequenzo/sequence_characteristics/overall_cross_sectional_entropy.py +220 -0
  267. sequenzo/sequence_characteristics/plot_characteristics.py +593 -0
  268. sequenzo/sequence_characteristics/simple_characteristics.py +311 -0
  269. sequenzo/sequence_characteristics/state_frequencies_and_entropy_per_sequence.py +39 -0
  270. sequenzo/sequence_characteristics/turbulence.py +155 -0
  271. sequenzo/sequence_characteristics/variance_of_spell_durations.py +86 -0
  272. sequenzo/sequence_characteristics/within_sequence_entropy.py +43 -0
  273. sequenzo/suffix_tree/__init__.py +66 -0
  274. sequenzo/suffix_tree/hub.py +114 -0
  275. sequenzo/suffix_tree/individual_level_indicators.py +1679 -0
  276. sequenzo/suffix_tree/spell_individual_level_indicators.py +493 -0
  277. sequenzo/suffix_tree/spell_level_indicators.py +248 -0
  278. sequenzo/suffix_tree/system_level_indicators.py +535 -0
  279. sequenzo/suffix_tree/utils.py +56 -0
  280. sequenzo/version_check.py +283 -0
  281. sequenzo/visualization/__init__.py +29 -0
  282. sequenzo/visualization/plot_mean_time.py +222 -0
  283. sequenzo/visualization/plot_modal_state.py +276 -0
  284. sequenzo/visualization/plot_most_frequent_sequences.py +147 -0
  285. sequenzo/visualization/plot_relative_frequency.py +405 -0
  286. sequenzo/visualization/plot_sequence_index.py +1175 -0
  287. sequenzo/visualization/plot_single_medoid.py +153 -0
  288. sequenzo/visualization/plot_state_distribution.py +651 -0
  289. sequenzo/visualization/plot_transition_matrix.py +190 -0
  290. sequenzo/visualization/utils/__init__.py +23 -0
  291. sequenzo/visualization/utils/utils.py +310 -0
  292. sequenzo/with_event_history_analysis/__init__.py +35 -0
  293. sequenzo/with_event_history_analysis/sequence_analysis_multi_state_model.py +850 -0
  294. sequenzo/with_event_history_analysis/sequence_history_analysis.py +283 -0
  295. sequenzo-0.1.31.dist-info/METADATA +286 -0
  296. sequenzo-0.1.31.dist-info/RECORD +299 -0
  297. sequenzo-0.1.31.dist-info/WHEEL +5 -0
  298. sequenzo-0.1.31.dist-info/licenses/LICENSE +28 -0
  299. sequenzo-0.1.31.dist-info/top_level.txt +2 -0
@@ -0,0 +1,405 @@
1
+ """
2
+ @Author : Yuqi Liang 梁彧祺
3
+ @File : plot_relative_frequency.py
4
+ @Time : 06/02/2025 10:17
5
+ @Desc :
6
+ Generate sequence relative frequency plots with medoids and dissimilarities.
7
+ TODO: Update the xticks.
8
+ """
9
+ import pandas as pd
10
+ import numpy as np
11
+ from scipy.stats import f_oneway
12
+ # from sklearn.preprocessing import StandardScaler
13
+
14
+ import matplotlib.pyplot as plt
15
+ from matplotlib.patches import Rectangle
16
+ import seaborn as sns
17
+
18
+ from sequenzo.define_sequence_data import SequenceData
19
+ from sequenzo.visualization.utils import (
20
+ save_and_show_results,
21
+ set_up_time_labels_for_x_axis,
22
+ show_plot_title
23
+ )
24
+
25
+
26
+ # Delay imports to avoid circular dependency issues during installation
27
+ def _get_standard_scaler():
28
+ try:
29
+ from sklearn.preprocessing import StandardScaler
30
+ return StandardScaler
31
+ except ImportError:
32
+ print(
33
+ "Warning: Not able to install StandardScaler。Please ensure that you have installed scikit-learn successfully.")
34
+ return None
35
+
36
+
37
+ def plot_relative_frequency(seqdata: SequenceData,
38
+ distance_matrix: np.ndarray,
39
+ num_groups: int = 12,
40
+ weights="auto",
41
+ grouping_method="first",
42
+ fontsize=12,
43
+ save_as=None,
44
+ dpi=200,
45
+ show_title: bool = True):
46
+ """
47
+ Generate a sequence relative frequency (seqrf) plot.
48
+
49
+ :param seqdata: (SequenceData) The SequenceData object.
50
+ :param distance_matrix: (np.ndarray) A 2D pairwise distance matrix.
51
+ :param num_groups: (int) Number of frequency groups.
52
+ :param weights: (np.ndarray or "auto") Weights for sequences. If "auto", uses seqdata.weights if available
53
+ :param grouping_method: (str) Grouping method: "first" (equal size) or "prop" (weighted grouping)
54
+ :param save_as: (str, optional) File path to save the plot.
55
+ :param dpi: (int) Resolution of the saved plot.
56
+ """
57
+ if isinstance(distance_matrix, pd.DataFrame):
58
+ distance_matrix = distance_matrix.to_numpy()
59
+
60
+ # Process weights
61
+ if isinstance(weights, str) and weights == "auto":
62
+ weights = getattr(seqdata, "weights", None)
63
+
64
+ if weights is not None:
65
+ weights = np.asarray(weights, dtype=float).reshape(-1)
66
+ if len(weights) != len(seqdata.values):
67
+ raise ValueError("Length of weights must equal number of sequences.")
68
+
69
+ # Auto-switch to weighted grouping if weights are provided
70
+ if weights is not None and grouping_method == "first":
71
+ grouping_method = "prop"
72
+
73
+ # Compute medoids and dissimilarities
74
+ rep_sequences, dissimilarities, group_labels = _compute_seqrf(
75
+ seqdata, distance_matrix, num_groups,
76
+ weights=weights, grouping_method=grouping_method
77
+ )
78
+
79
+ # **Auto-adjust figure ratio**: dynamically scale aspect ratio
80
+ num_seq = len(rep_sequences)
81
+ fig_width = 14 # Fixed width
82
+ fig_height = max(6, num_seq / 20) # Adjust height based on the number of sequences
83
+
84
+ fig, axes = plt.subplots(1, 2, figsize=(fig_width, fig_height), gridspec_kw={'width_ratios': [2.5, 1]})
85
+ sns.set_palette("muted")
86
+
87
+ # Use color mapping stored in SequenceData
88
+ state_palette = seqdata.color_map
89
+
90
+ # **LEFT PLOT: Group Medoids (Sequence Index Plot)**
91
+ ax = axes[0]
92
+ for i, seq in enumerate(rep_sequences):
93
+ for t, state_idx in enumerate(seq):
94
+ color = state_palette.get(state_idx, "gray") # 直接用整数查颜色
95
+ ax.add_patch(Rectangle((t, i + 0.5), 1, 1, color=color))
96
+
97
+ ax.set_xlim(0, seqdata.values.shape[1])
98
+ ax.set_ylim(0.5, len(rep_sequences) + 0.5)
99
+ # Add weight information to title if weights are used
100
+ if show_title:
101
+ if weights is not None and not np.allclose(weights, 1.0):
102
+ total_w = float(np.sum(weights))
103
+ title_text = f"Group Medoids (n={len(seqdata.values)}, total weight={total_w:.1f})"
104
+ else:
105
+ title_text = f"Group Medoids (n={len(seqdata.values)})"
106
+ show_plot_title(ax, title_text, show=True, fontsize=fontsize+2)
107
+ ax.set_xlabel("Time", fontsize=fontsize)
108
+ ax.set_ylabel("Frequency Group", fontsize=fontsize)
109
+
110
+ # X-axis labels
111
+ # TODO 权宜之计,不然 index plot 里面没有,但是这里有但是在 quickstart 和 multidomain main_tutorial 里面
112
+ # 因为time一个数字一个string导致不一样,太麻烦了
113
+ # 仅显示一部分 xticks,避免过于密集
114
+ xtick_positions = np.arange(len(seqdata.cleaned_time))
115
+ skip = max(1, len(seqdata.cleaned_time) // 8) # 每隔几个显示一个(可调)
116
+ visible_positions = xtick_positions[::skip]
117
+ visible_labels = [seqdata.cleaned_time[i] for i in visible_positions]
118
+
119
+ ax.set_xticks(visible_positions)
120
+ ax.set_xticklabels(visible_labels, fontsize=fontsize-2, rotation=0, ha='right', color='gray')
121
+
122
+ # Y-axis labels
123
+ ax.set_yticks(range(1, num_groups + 1, max(1, num_groups // 10)))
124
+ ax.set_yticklabels(range(1, num_groups + 1, max(1, num_groups // 10)), fontsize=fontsize-2, color='gray')
125
+
126
+ # **Remove unwanted black outlines**
127
+ ax.spines["top"].set_visible(False)
128
+ ax.spines["right"].set_visible(False)
129
+ ax.spines["left"].set_visible(False)
130
+ ax.spines["bottom"].set_visible(False)
131
+
132
+ # **RIGHT PLOT: Dissimilarity Box Plot**
133
+ box_ax = axes[1]
134
+
135
+ # Set box plot styling
136
+ box_parts = box_ax.boxplot(
137
+ dissimilarities,
138
+ vert=False, # Horizontal box plot
139
+ positions=range(1, num_groups + 1), # Align boxes with y-axis ticks
140
+ patch_artist=True, # Allow fill color
141
+ boxprops=dict(facecolor='lightblue', edgecolor='gray', linewidth=1), # Box style
142
+ whiskerprops=dict(color='gray', linewidth=1), # Whisker style
143
+ capprops=dict(color='gray', linewidth=1), # Cap line style
144
+ medianprops=dict(color='red', linewidth=2), # Median line style
145
+ flierprops=dict(marker='o', markerfacecolor='gray', markersize=5, markeredgecolor='none') # Outlier style
146
+ )
147
+
148
+ # Y-axis labels
149
+ box_ax.set_yticks(range(1, num_groups + 1, max(1, num_groups // 10)))
150
+ box_ax.set_yticklabels(range(1, num_groups + 1, max(1, num_groups // 10)), fontsize=fontsize-2, color='black')
151
+
152
+ # Keep only the bottom x-axis visible
153
+ box_ax.spines["top"].set_visible(False)
154
+ box_ax.spines["right"].set_visible(False)
155
+ box_ax.spines["left"].set_visible(True)
156
+ box_ax.spines["bottom"].set_visible(True)
157
+
158
+ # Set titles and labels
159
+ box_ax.set_title("Dissimilarities to Medoid", fontsize=fontsize+2)
160
+ box_ax.set_xlabel("Dissimilarity", fontsize=fontsize)
161
+ box_ax.set_ylabel("Group", fontsize=fontsize)
162
+
163
+ # Adjust layout
164
+ # TODO 出现问题的地方 - 状态多了就有问题(quickstart) ,状态比较少就没问题 Tutorial/multidomain/main_tutorial
165
+ # plt.subplots_adjust(bottom=0.23, wspace=0.4)
166
+ num_legend_items = len(state_palette)
167
+ bottom_margin = min(0.33, 0.17 + num_legend_items * 0.015)
168
+ plt.subplots_adjust(bottom=bottom_margin, wspace=0.4)
169
+
170
+ # **Representation Quality Stats**
171
+ r_squared, f_statistic, p_value = _compute_r2_f_statistic(distance_matrix, group_labels)
172
+
173
+ # Compute significance level for p-value (show as *, **, ***)
174
+ def get_p_value_stars(p_value):
175
+ if p_value < 0.001:
176
+ return "***"
177
+ elif p_value < 0.01:
178
+ return "**"
179
+ elif p_value < 0.05:
180
+ return "*"
181
+ else:
182
+ return ""
183
+
184
+ # Format p-value for display
185
+ p_value_stars = get_p_value_stars(p_value)
186
+ p_value_text = f"p = {p_value:.2e} {p_value_stars}"
187
+
188
+ # Explanation of p-value significance levels
189
+ stars_explanation = "*: p < 0.05, **: p < 0.01, ***: p < 0.001"
190
+
191
+ stats_text = (f"Representation quality: Pseudo/medoid-based R² = {r_squared:.2f}, F statistic = {f_statistic:.2f}, "
192
+ f"{p_value_text} ({stars_explanation})")
193
+
194
+ # **LEGEND BELOW PLOTS**
195
+ legend_patches = [
196
+ Rectangle((0, 0), 1, 1, color=seqdata.color_map_by_label[label], label=label)
197
+ for label in seqdata.labels
198
+ ]
199
+
200
+ # Automatically adjust legend layout (maximum of 7 items per row)
201
+ # ncol = min(7, len(seqdata.states)) # Maximum of 7 legend items per row
202
+ # legend = fig.legend(
203
+ # handles=legend_patches,
204
+ # loc='lower center',
205
+ # ncol=ncol,
206
+ # fontsize=12,
207
+ # frameon=False,
208
+ # bbox_to_anchor=(0.5, 0.05) # Position legend at the bottom center
209
+ # )
210
+
211
+ # Estimate how many rows are needed for the legend
212
+ max_items_per_row = 5
213
+ n_states = len(seqdata.states)
214
+ ncol = min(max_items_per_row, n_states)
215
+ nrow = (n_states + max_items_per_row - 1) // max_items_per_row # 向上取整
216
+
217
+ legend = fig.legend(
218
+ handles=legend_patches,
219
+ loc='lower center',
220
+ ncol=ncol,
221
+ fontsize=fontsize,
222
+ frameon=False,
223
+ bbox_to_anchor=(0.5, 0.05 + 0.015 * (nrow - 1)) # 动态向上移动避免遮挡文本
224
+ )
225
+
226
+ # Display statistical information below the legend
227
+ plt.figtext(
228
+ 0.5, 0.02, # Adjust position, place below the legend
229
+ stats_text,
230
+ ha="center",
231
+ fontsize=fontsize,
232
+ color="black"
233
+ )
234
+
235
+ # **Save or Show Plot**
236
+ save_and_show_results(save_as, dpi)
237
+
238
+
239
+ def _compute_seqrf(seqdata: SequenceData, distance_matrix: np.ndarray, n_groups: int = 10,
240
+ weights: np.ndarray = None, grouping_method: str = "first"):
241
+ """
242
+ Compute the representative sequences (medoids) for each frequency group in a SequenceData object.
243
+
244
+ :param seqdata: A SequenceData object.
245
+ :param distance_matrix: A 2D pairwise distance matrix.
246
+ :param n_groups: The number of frequency groups to divide sequences into.
247
+ :param weights: Optional weight vector for sequences.
248
+ :param grouping_method: Grouping method, either "first" (equal size) or "prop" (weighted).
249
+
250
+ :return: (Tuple[np.ndarray, np.ndarray, np.ndarray])
251
+ - rep_sequences: Representative sequences (medoids) for each group.
252
+ - dissimilarities: Distances of sequences in each group to their respective medoid.
253
+ - group_labels: Group assignments for each sequence.
254
+ """
255
+ n_sequences = seqdata.values.shape[0]
256
+ if weights is None:
257
+ weights = np.ones(n_sequences) # Default to equal weights
258
+
259
+ # **Step 1: Compute MDS using cmdscale()**
260
+ mds_coords = _cmdscale(distance_matrix) # Classic MDS
261
+ mds_coords_1d = mds_coords[:, 0] # Take only 1D result
262
+
263
+ # **Step 2: Standardize MDS coordinates and sort**
264
+ # 获取 StandardScaler
265
+ scaler_class = _get_standard_scaler()
266
+ if scaler_class is None:
267
+ raise ImportError("需要 scikit-learn 来执行此功能。请安装: pip install scikit-learn")
268
+ scaler = scaler_class() # 实例化对象
269
+ mds_coords_1d = scaler.fit_transform(mds_coords_1d.reshape(-1, 1)).flatten()
270
+
271
+ # Eigenvector direction in np.linalg.eigh() may differ from R, causing cmdscale() to output reversed coordinates.
272
+ mds_coords_1d = -mds_coords_1d # Reverse direction
273
+ sorted_indices = np.argsort(mds_coords_1d) # Sort in ascending order
274
+ sorted_coords = mds_coords_1d[sorted_indices]
275
+
276
+ # **Step 3: Perform grouping based on different methods**
277
+ if grouping_method == "first":
278
+ # **Divide evenly, each group has an equal size**
279
+ group_size = n_sequences // n_groups
280
+ frequency_groups = [sorted_indices[i * group_size:(i + 1) * group_size] for i in range(n_groups)]
281
+ if n_sequences % n_groups != 0:
282
+ frequency_groups[-1] = np.append(frequency_groups[-1], sorted_indices[n_groups * group_size:])
283
+
284
+ elif grouping_method == "prop":
285
+ # **Divide based on weights**
286
+ cumweights = np.cumsum(weights[sorted_indices])
287
+ wsum = np.sum(weights)
288
+ gsize = wsum / n_groups # Target weight for each group
289
+
290
+ frequency_groups = []
291
+ start_idx = 0
292
+ for i in range(n_groups):
293
+ if i == n_groups - 1:
294
+ group = sorted_indices[start_idx:] # Last group includes remaining data
295
+ else:
296
+ end_idx = np.searchsorted(cumweights, (i + 1) * gsize) # Find group boundary
297
+ group = sorted_indices[start_idx:end_idx]
298
+ start_idx = end_idx
299
+ frequency_groups.append(group)
300
+
301
+ else:
302
+ raise ValueError("Invalid grouping_method! Use 'first' or 'prop'.")
303
+
304
+ # **Step 4: Compute the medoid for each group**
305
+ medoid_indices = np.array([
306
+ _compute_group_medoid(distance_matrix, group, weights[group]) for group in frequency_groups
307
+ ])
308
+ rep_sequences = seqdata.values[medoid_indices]
309
+
310
+ # **Step 5: Compute distances to medoid for each group**
311
+ dissimilarities = [
312
+ distance_matrix[np.ix_(group, [medoid_idx])].flatten() for group, medoid_idx in
313
+ zip(frequency_groups, medoid_indices)
314
+ ]
315
+
316
+ # **Step 6: Assign group labels**
317
+ group_labels = np.zeros(n_sequences)
318
+ for i, group in enumerate(frequency_groups):
319
+ group_labels[group] = i
320
+
321
+ return rep_sequences, dissimilarities, group_labels
322
+
323
+
324
+ def _cmdscale(D):
325
+ """
326
+ Classic Multidimensional Scaling (MDS), equivalent to R's cmdscale()
327
+ How Traminer uses cmdscale(): https://github.com/cran/TraMineR/blob/master/R/dissrf.R
328
+
329
+ :param D: A NxN symmetric distance matrix
330
+ :return: Y, a Nxd coordinate matrix, where d is the largest positive eigenvalues' count
331
+ """
332
+ n = len(D)
333
+
334
+ # Step 1: Compute the centering matrix
335
+ H = np.eye(n) - np.ones((n, n)) / n
336
+
337
+ # Step 2: Compute the double centered distance matrix
338
+ B = -0.5 * H @ (D ** 2) @ H
339
+
340
+ # Step 3: Compute eigenvalues and eigenvectors
341
+ eigvals, eigvecs = np.linalg.eigh(B)
342
+
343
+ # Step 4: Sort eigenvalues and eigenvectors in descending order
344
+ idx = np.argsort(eigvals)[::-1]
345
+ eigvals = eigvals[idx]
346
+ eigvecs = eigvecs[:, idx]
347
+
348
+ # Step 5: Select only positive eigenvalues
349
+ w, = np.where(eigvals > 0)
350
+ L = np.diag(np.sqrt(eigvals[w]))
351
+ L = np.diag(np.sqrt(eigvals[w]))
352
+ V = eigvecs[:, w]
353
+
354
+ return V @ L # Return the MDS coordinates
355
+
356
+
357
+ def _compute_group_medoid(distance_matrix: np.ndarray, group_indices: np.ndarray, weights: np.ndarray = None) -> int:
358
+ """Compute the weighted medoid of a given frequency group,
359
+ matching R's disscenter() implementation.
360
+
361
+ :param distance_matrix: (np.ndarray) A 2D symmetric pairwise distance matrix.
362
+ :param group_indices: (np.ndarray) An array of indices representing the sequences in the group.
363
+ :param weights: (np.ndarray, optional) A weight vector for sequences. Defaults to equal weights if not provided.
364
+
365
+ :return: (int)
366
+ The index of the medoid sequence, which has the minimum weighted sum of distances within the group.
367
+ """
368
+ group_distances = distance_matrix[np.ix_(group_indices, group_indices)]
369
+
370
+ if weights is None:
371
+ weights = np.ones(len(group_indices)) # Default to equal weights
372
+
373
+ # **Fix: Compute the weighted sum of distances for each candidate medoid**
374
+ # For each candidate medoid m: sum_i w_i * D(i, m)
375
+ total_distances = group_distances.T @ weights
376
+
377
+ # **Fix: Select the medoid with the minimum weighted distance**
378
+ return group_indices[np.argmin(total_distances)]
379
+
380
+
381
+ def _compute_r2_f_statistic(distance_matrix: np.ndarray, group_labels: np.ndarray):
382
+ """
383
+ Compute the pseudo R² and F-statistic for sequence frequency grouping.
384
+ :param distance_matrix: (np.ndarray) A 2D pairwise distance matrix.
385
+ """
386
+ unique_groups = np.unique(group_labels)
387
+ total_var = np.var(distance_matrix)
388
+
389
+ group_means = np.array([np.mean(distance_matrix[group_labels == g]) for g in unique_groups])
390
+ within_group_vars = np.array([np.var(distance_matrix[group_labels == g]) for g in unique_groups])
391
+
392
+ ss_between = sum(len(distance_matrix[group_labels == g]) * (mean - np.mean(distance_matrix)) ** 2
393
+ for g, mean in zip(unique_groups, group_means))
394
+ ss_within = sum(within_group_vars)
395
+
396
+ # Ensure valid ANOVA conditions
397
+ valid_groups = [distance_matrix[group_labels == g].flatten() for g in unique_groups if
398
+ np.sum(group_labels == g) > 1]
399
+ if len(valid_groups) > 1:
400
+ f_statistic, p_value = f_oneway(*valid_groups)
401
+ else:
402
+ f_statistic, p_value = np.nan, np.nan
403
+
404
+ r_squared = float(ss_between / total_var) if total_var > 0 else 0.0
405
+ return r_squared, float(f_statistic), float(p_value)