sequenzo 0.1.31__cp310-cp310-macosx_10_9_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (299) hide show
  1. _sequenzo_fastcluster.cpython-310-darwin.so +0 -0
  2. sequenzo/__init__.py +349 -0
  3. sequenzo/big_data/__init__.py +12 -0
  4. sequenzo/big_data/clara/__init__.py +26 -0
  5. sequenzo/big_data/clara/clara.py +476 -0
  6. sequenzo/big_data/clara/utils/__init__.py +27 -0
  7. sequenzo/big_data/clara/utils/aggregatecases.py +92 -0
  8. sequenzo/big_data/clara/utils/davies_bouldin.py +91 -0
  9. sequenzo/big_data/clara/utils/get_weighted_diss.cpython-310-darwin.so +0 -0
  10. sequenzo/big_data/clara/utils/wfcmdd.py +205 -0
  11. sequenzo/big_data/clara/visualization.py +88 -0
  12. sequenzo/clustering/KMedoids.py +178 -0
  13. sequenzo/clustering/__init__.py +30 -0
  14. sequenzo/clustering/clustering_c_code.cpython-310-darwin.so +0 -0
  15. sequenzo/clustering/hierarchical_clustering.py +1256 -0
  16. sequenzo/clustering/sequenzo_fastcluster/fastcluster.py +495 -0
  17. sequenzo/clustering/sequenzo_fastcluster/src/fastcluster.cpp +1877 -0
  18. sequenzo/clustering/sequenzo_fastcluster/src/fastcluster_python.cpp +1264 -0
  19. sequenzo/clustering/src/KMedoid.cpp +263 -0
  20. sequenzo/clustering/src/PAM.cpp +237 -0
  21. sequenzo/clustering/src/PAMonce.cpp +265 -0
  22. sequenzo/clustering/src/cluster_quality.cpp +496 -0
  23. sequenzo/clustering/src/cluster_quality.h +128 -0
  24. sequenzo/clustering/src/cluster_quality_backup.cpp +570 -0
  25. sequenzo/clustering/src/module.cpp +228 -0
  26. sequenzo/clustering/src/weightedinertia.cpp +111 -0
  27. sequenzo/clustering/utils/__init__.py +27 -0
  28. sequenzo/clustering/utils/disscenter.py +122 -0
  29. sequenzo/data_preprocessing/__init__.py +22 -0
  30. sequenzo/data_preprocessing/helpers.py +303 -0
  31. sequenzo/datasets/__init__.py +41 -0
  32. sequenzo/datasets/biofam.csv +2001 -0
  33. sequenzo/datasets/biofam_child_domain.csv +2001 -0
  34. sequenzo/datasets/biofam_left_domain.csv +2001 -0
  35. sequenzo/datasets/biofam_married_domain.csv +2001 -0
  36. sequenzo/datasets/chinese_colonial_territories.csv +12 -0
  37. sequenzo/datasets/country_co2_emissions.csv +194 -0
  38. sequenzo/datasets/country_co2_emissions_global_deciles.csv +195 -0
  39. sequenzo/datasets/country_co2_emissions_global_quintiles.csv +195 -0
  40. sequenzo/datasets/country_co2_emissions_local_deciles.csv +195 -0
  41. sequenzo/datasets/country_co2_emissions_local_quintiles.csv +195 -0
  42. sequenzo/datasets/country_gdp_per_capita.csv +194 -0
  43. sequenzo/datasets/dyadic_children.csv +61 -0
  44. sequenzo/datasets/dyadic_parents.csv +61 -0
  45. sequenzo/datasets/mvad.csv +713 -0
  46. sequenzo/datasets/pairfam_activity_by_month.csv +1028 -0
  47. sequenzo/datasets/pairfam_activity_by_year.csv +1028 -0
  48. sequenzo/datasets/pairfam_family_by_month.csv +1028 -0
  49. sequenzo/datasets/pairfam_family_by_year.csv +1028 -0
  50. sequenzo/datasets/political_science_aid_shock.csv +166 -0
  51. sequenzo/datasets/political_science_donor_fragmentation.csv +157 -0
  52. sequenzo/define_sequence_data.py +1400 -0
  53. sequenzo/dissimilarity_measures/__init__.py +31 -0
  54. sequenzo/dissimilarity_measures/c_code.cpython-310-darwin.so +0 -0
  55. sequenzo/dissimilarity_measures/get_distance_matrix.py +762 -0
  56. sequenzo/dissimilarity_measures/get_substitution_cost_matrix.py +246 -0
  57. sequenzo/dissimilarity_measures/src/DHDdistance.cpp +148 -0
  58. sequenzo/dissimilarity_measures/src/LCPdistance.cpp +114 -0
  59. sequenzo/dissimilarity_measures/src/LCPspellDistance.cpp +215 -0
  60. sequenzo/dissimilarity_measures/src/OMdistance.cpp +247 -0
  61. sequenzo/dissimilarity_measures/src/OMspellDistance.cpp +281 -0
  62. sequenzo/dissimilarity_measures/src/__init__.py +0 -0
  63. sequenzo/dissimilarity_measures/src/dist2matrix.cpp +63 -0
  64. sequenzo/dissimilarity_measures/src/dp_utils.h +160 -0
  65. sequenzo/dissimilarity_measures/src/module.cpp +40 -0
  66. sequenzo/dissimilarity_measures/src/setup.py +30 -0
  67. sequenzo/dissimilarity_measures/src/utils.h +25 -0
  68. sequenzo/dissimilarity_measures/src/xsimd/.github/cmake-test/main.cpp +6 -0
  69. sequenzo/dissimilarity_measures/src/xsimd/benchmark/main.cpp +159 -0
  70. sequenzo/dissimilarity_measures/src/xsimd/benchmark/xsimd_benchmark.hpp +565 -0
  71. sequenzo/dissimilarity_measures/src/xsimd/docs/source/conf.py +37 -0
  72. sequenzo/dissimilarity_measures/src/xsimd/examples/mandelbrot.cpp +330 -0
  73. sequenzo/dissimilarity_measures/src/xsimd/examples/pico_bench.hpp +246 -0
  74. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_arithmetic.hpp +266 -0
  75. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_complex.hpp +112 -0
  76. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_details.hpp +323 -0
  77. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_logical.hpp +218 -0
  78. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_math.hpp +2583 -0
  79. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_memory.hpp +880 -0
  80. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_rounding.hpp +72 -0
  81. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_swizzle.hpp +174 -0
  82. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_trigo.hpp +978 -0
  83. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx.hpp +1924 -0
  84. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx2.hpp +1144 -0
  85. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512bw.hpp +656 -0
  86. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512cd.hpp +28 -0
  87. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512dq.hpp +244 -0
  88. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512er.hpp +20 -0
  89. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512f.hpp +2650 -0
  90. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512ifma.hpp +20 -0
  91. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512pf.hpp +20 -0
  92. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vbmi.hpp +77 -0
  93. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vbmi2.hpp +131 -0
  94. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vnni_avx512bw.hpp +20 -0
  95. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vnni_avx512vbmi2.hpp +20 -0
  96. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avxvnni.hpp +20 -0
  97. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_common.hpp +24 -0
  98. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_common_fwd.hpp +77 -0
  99. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_constants.hpp +393 -0
  100. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_emulated.hpp +788 -0
  101. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma3_avx.hpp +93 -0
  102. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma3_avx2.hpp +46 -0
  103. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma3_sse.hpp +97 -0
  104. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma4.hpp +92 -0
  105. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_i8mm_neon64.hpp +17 -0
  106. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_isa.hpp +142 -0
  107. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_neon.hpp +3142 -0
  108. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_neon64.hpp +1543 -0
  109. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_rvv.hpp +1513 -0
  110. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_scalar.hpp +1260 -0
  111. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse2.hpp +2024 -0
  112. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse3.hpp +67 -0
  113. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse4_1.hpp +339 -0
  114. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse4_2.hpp +44 -0
  115. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_ssse3.hpp +186 -0
  116. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sve.hpp +1155 -0
  117. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_vsx.hpp +892 -0
  118. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_wasm.hpp +1780 -0
  119. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_arch.hpp +240 -0
  120. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_config.hpp +484 -0
  121. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_cpuid.hpp +269 -0
  122. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_inline.hpp +27 -0
  123. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/math/xsimd_rem_pio2.hpp +719 -0
  124. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/memory/xsimd_aligned_allocator.hpp +349 -0
  125. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/memory/xsimd_alignment.hpp +91 -0
  126. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_all_registers.hpp +55 -0
  127. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_api.hpp +2765 -0
  128. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx2_register.hpp +44 -0
  129. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512bw_register.hpp +51 -0
  130. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512cd_register.hpp +51 -0
  131. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512dq_register.hpp +51 -0
  132. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512er_register.hpp +51 -0
  133. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512f_register.hpp +77 -0
  134. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512ifma_register.hpp +51 -0
  135. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512pf_register.hpp +51 -0
  136. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512vbmi2_register.hpp +51 -0
  137. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512vbmi_register.hpp +51 -0
  138. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512vnni_avx512bw_register.hpp +54 -0
  139. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512vnni_avx512vbmi2_register.hpp +53 -0
  140. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx_register.hpp +64 -0
  141. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avxvnni_register.hpp +44 -0
  142. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_batch.hpp +1524 -0
  143. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_batch_constant.hpp +300 -0
  144. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_common_arch.hpp +47 -0
  145. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_emulated_register.hpp +80 -0
  146. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_fma3_avx2_register.hpp +50 -0
  147. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_fma3_avx_register.hpp +50 -0
  148. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_fma3_sse_register.hpp +50 -0
  149. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_fma4_register.hpp +50 -0
  150. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_i8mm_neon64_register.hpp +55 -0
  151. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_neon64_register.hpp +55 -0
  152. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_neon_register.hpp +154 -0
  153. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_register.hpp +94 -0
  154. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_rvv_register.hpp +506 -0
  155. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sse2_register.hpp +59 -0
  156. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sse3_register.hpp +49 -0
  157. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sse4_1_register.hpp +48 -0
  158. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sse4_2_register.hpp +48 -0
  159. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_ssse3_register.hpp +48 -0
  160. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sve_register.hpp +156 -0
  161. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_traits.hpp +337 -0
  162. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_utils.hpp +536 -0
  163. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_vsx_register.hpp +77 -0
  164. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_wasm_register.hpp +59 -0
  165. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/xsimd.hpp +75 -0
  166. sequenzo/dissimilarity_measures/src/xsimd/test/architectures/dummy.cpp +7 -0
  167. sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set.cpp +13 -0
  168. sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set_mean.cpp +24 -0
  169. sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set_mean_aligned.cpp +25 -0
  170. sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set_mean_arch_independent.cpp +28 -0
  171. sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set_mean_tag_dispatch.cpp +25 -0
  172. sequenzo/dissimilarity_measures/src/xsimd/test/doc/manipulating_abstract_batches.cpp +7 -0
  173. sequenzo/dissimilarity_measures/src/xsimd/test/doc/manipulating_parametric_batches.cpp +8 -0
  174. sequenzo/dissimilarity_measures/src/xsimd/test/doc/sum.hpp +31 -0
  175. sequenzo/dissimilarity_measures/src/xsimd/test/doc/sum_avx2.cpp +3 -0
  176. sequenzo/dissimilarity_measures/src/xsimd/test/doc/sum_sse2.cpp +3 -0
  177. sequenzo/dissimilarity_measures/src/xsimd/test/doc/writing_vectorized_code.cpp +11 -0
  178. sequenzo/dissimilarity_measures/src/xsimd/test/main.cpp +31 -0
  179. sequenzo/dissimilarity_measures/src/xsimd/test/test_api.cpp +230 -0
  180. sequenzo/dissimilarity_measures/src/xsimd/test/test_arch.cpp +217 -0
  181. sequenzo/dissimilarity_measures/src/xsimd/test/test_basic_math.cpp +183 -0
  182. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch.cpp +1049 -0
  183. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_bool.cpp +508 -0
  184. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_cast.cpp +409 -0
  185. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_complex.cpp +712 -0
  186. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_constant.cpp +286 -0
  187. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_float.cpp +141 -0
  188. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_int.cpp +365 -0
  189. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_manip.cpp +308 -0
  190. sequenzo/dissimilarity_measures/src/xsimd/test/test_bitwise_cast.cpp +222 -0
  191. sequenzo/dissimilarity_measures/src/xsimd/test/test_complex_exponential.cpp +226 -0
  192. sequenzo/dissimilarity_measures/src/xsimd/test/test_complex_hyperbolic.cpp +183 -0
  193. sequenzo/dissimilarity_measures/src/xsimd/test/test_complex_power.cpp +265 -0
  194. sequenzo/dissimilarity_measures/src/xsimd/test/test_complex_trigonometric.cpp +236 -0
  195. sequenzo/dissimilarity_measures/src/xsimd/test/test_conversion.cpp +248 -0
  196. sequenzo/dissimilarity_measures/src/xsimd/test/test_custom_default_arch.cpp +28 -0
  197. sequenzo/dissimilarity_measures/src/xsimd/test/test_error_gamma.cpp +170 -0
  198. sequenzo/dissimilarity_measures/src/xsimd/test/test_explicit_batch_instantiation.cpp +32 -0
  199. sequenzo/dissimilarity_measures/src/xsimd/test/test_exponential.cpp +202 -0
  200. sequenzo/dissimilarity_measures/src/xsimd/test/test_extract_pair.cpp +92 -0
  201. sequenzo/dissimilarity_measures/src/xsimd/test/test_fp_manipulation.cpp +77 -0
  202. sequenzo/dissimilarity_measures/src/xsimd/test/test_gnu_source.cpp +30 -0
  203. sequenzo/dissimilarity_measures/src/xsimd/test/test_hyperbolic.cpp +167 -0
  204. sequenzo/dissimilarity_measures/src/xsimd/test/test_load_store.cpp +304 -0
  205. sequenzo/dissimilarity_measures/src/xsimd/test/test_memory.cpp +61 -0
  206. sequenzo/dissimilarity_measures/src/xsimd/test/test_poly_evaluation.cpp +64 -0
  207. sequenzo/dissimilarity_measures/src/xsimd/test/test_power.cpp +184 -0
  208. sequenzo/dissimilarity_measures/src/xsimd/test/test_rounding.cpp +199 -0
  209. sequenzo/dissimilarity_measures/src/xsimd/test/test_select.cpp +101 -0
  210. sequenzo/dissimilarity_measures/src/xsimd/test/test_shuffle.cpp +760 -0
  211. sequenzo/dissimilarity_measures/src/xsimd/test/test_sum.cpp +4 -0
  212. sequenzo/dissimilarity_measures/src/xsimd/test/test_sum.hpp +34 -0
  213. sequenzo/dissimilarity_measures/src/xsimd/test/test_traits.cpp +172 -0
  214. sequenzo/dissimilarity_measures/src/xsimd/test/test_trigonometric.cpp +208 -0
  215. sequenzo/dissimilarity_measures/src/xsimd/test/test_utils.hpp +611 -0
  216. sequenzo/dissimilarity_measures/src/xsimd/test/test_wasm/test_wasm_playwright.py +123 -0
  217. sequenzo/dissimilarity_measures/src/xsimd/test/test_xsimd_api.cpp +1460 -0
  218. sequenzo/dissimilarity_measures/utils/__init__.py +16 -0
  219. sequenzo/dissimilarity_measures/utils/get_LCP_length_for_2_seq.py +44 -0
  220. sequenzo/dissimilarity_measures/utils/get_sm_trate_substitution_cost_matrix.cpython-310-darwin.so +0 -0
  221. sequenzo/dissimilarity_measures/utils/seqconc.cpython-310-darwin.so +0 -0
  222. sequenzo/dissimilarity_measures/utils/seqdss.cpython-310-darwin.so +0 -0
  223. sequenzo/dissimilarity_measures/utils/seqdur.cpython-310-darwin.so +0 -0
  224. sequenzo/dissimilarity_measures/utils/seqlength.cpython-310-darwin.so +0 -0
  225. sequenzo/multidomain/__init__.py +23 -0
  226. sequenzo/multidomain/association_between_domains.py +311 -0
  227. sequenzo/multidomain/cat.py +597 -0
  228. sequenzo/multidomain/combt.py +519 -0
  229. sequenzo/multidomain/dat.py +81 -0
  230. sequenzo/multidomain/idcd.py +139 -0
  231. sequenzo/multidomain/linked_polyad.py +292 -0
  232. sequenzo/openmp_setup.py +233 -0
  233. sequenzo/prefix_tree/__init__.py +62 -0
  234. sequenzo/prefix_tree/hub.py +114 -0
  235. sequenzo/prefix_tree/individual_level_indicators.py +1321 -0
  236. sequenzo/prefix_tree/spell_individual_level_indicators.py +580 -0
  237. sequenzo/prefix_tree/spell_level_indicators.py +297 -0
  238. sequenzo/prefix_tree/system_level_indicators.py +544 -0
  239. sequenzo/prefix_tree/utils.py +54 -0
  240. sequenzo/seqhmm/__init__.py +95 -0
  241. sequenzo/seqhmm/advanced_optimization.py +305 -0
  242. sequenzo/seqhmm/bootstrap.py +411 -0
  243. sequenzo/seqhmm/build_hmm.py +142 -0
  244. sequenzo/seqhmm/build_mhmm.py +136 -0
  245. sequenzo/seqhmm/build_nhmm.py +121 -0
  246. sequenzo/seqhmm/fit_mhmm.py +62 -0
  247. sequenzo/seqhmm/fit_model.py +61 -0
  248. sequenzo/seqhmm/fit_nhmm.py +76 -0
  249. sequenzo/seqhmm/formulas.py +289 -0
  250. sequenzo/seqhmm/forward_backward_nhmm.py +276 -0
  251. sequenzo/seqhmm/gradients_nhmm.py +306 -0
  252. sequenzo/seqhmm/hmm.py +291 -0
  253. sequenzo/seqhmm/mhmm.py +314 -0
  254. sequenzo/seqhmm/model_comparison.py +238 -0
  255. sequenzo/seqhmm/multichannel_em.py +282 -0
  256. sequenzo/seqhmm/multichannel_utils.py +138 -0
  257. sequenzo/seqhmm/nhmm.py +270 -0
  258. sequenzo/seqhmm/nhmm_utils.py +191 -0
  259. sequenzo/seqhmm/predict.py +137 -0
  260. sequenzo/seqhmm/predict_mhmm.py +142 -0
  261. sequenzo/seqhmm/simulate.py +878 -0
  262. sequenzo/seqhmm/utils.py +218 -0
  263. sequenzo/seqhmm/visualization.py +910 -0
  264. sequenzo/sequence_characteristics/__init__.py +40 -0
  265. sequenzo/sequence_characteristics/complexity_index.py +49 -0
  266. sequenzo/sequence_characteristics/overall_cross_sectional_entropy.py +220 -0
  267. sequenzo/sequence_characteristics/plot_characteristics.py +593 -0
  268. sequenzo/sequence_characteristics/simple_characteristics.py +311 -0
  269. sequenzo/sequence_characteristics/state_frequencies_and_entropy_per_sequence.py +39 -0
  270. sequenzo/sequence_characteristics/turbulence.py +155 -0
  271. sequenzo/sequence_characteristics/variance_of_spell_durations.py +86 -0
  272. sequenzo/sequence_characteristics/within_sequence_entropy.py +43 -0
  273. sequenzo/suffix_tree/__init__.py +66 -0
  274. sequenzo/suffix_tree/hub.py +114 -0
  275. sequenzo/suffix_tree/individual_level_indicators.py +1679 -0
  276. sequenzo/suffix_tree/spell_individual_level_indicators.py +493 -0
  277. sequenzo/suffix_tree/spell_level_indicators.py +248 -0
  278. sequenzo/suffix_tree/system_level_indicators.py +535 -0
  279. sequenzo/suffix_tree/utils.py +56 -0
  280. sequenzo/version_check.py +283 -0
  281. sequenzo/visualization/__init__.py +29 -0
  282. sequenzo/visualization/plot_mean_time.py +222 -0
  283. sequenzo/visualization/plot_modal_state.py +276 -0
  284. sequenzo/visualization/plot_most_frequent_sequences.py +147 -0
  285. sequenzo/visualization/plot_relative_frequency.py +405 -0
  286. sequenzo/visualization/plot_sequence_index.py +1175 -0
  287. sequenzo/visualization/plot_single_medoid.py +153 -0
  288. sequenzo/visualization/plot_state_distribution.py +651 -0
  289. sequenzo/visualization/plot_transition_matrix.py +190 -0
  290. sequenzo/visualization/utils/__init__.py +23 -0
  291. sequenzo/visualization/utils/utils.py +310 -0
  292. sequenzo/with_event_history_analysis/__init__.py +35 -0
  293. sequenzo/with_event_history_analysis/sequence_analysis_multi_state_model.py +850 -0
  294. sequenzo/with_event_history_analysis/sequence_history_analysis.py +283 -0
  295. sequenzo-0.1.31.dist-info/METADATA +286 -0
  296. sequenzo-0.1.31.dist-info/RECORD +299 -0
  297. sequenzo-0.1.31.dist-info/WHEEL +5 -0
  298. sequenzo-0.1.31.dist-info/licenses/LICENSE +28 -0
  299. sequenzo-0.1.31.dist-info/top_level.txt +2 -0
@@ -0,0 +1,570 @@
1
+ #include "cluster_quality.h"
2
+ #include <iostream>
3
+ #include <limits>
4
+ #include <cstring>
5
+
6
+ #ifdef _OPENMP
7
+ #include <omp.h>
8
+ #endif
9
+
10
+ /**
11
+ * Implementation of cluster quality indicators matching R WeightedCluster package
12
+ *
13
+ * This implementation closely follows the logic in R's clusterquality.cpp
14
+ * to ensure numerical consistency with the WeightedCluster package.
15
+ */
16
+
17
+ void resetKendallTree(KendallTree& kendall) {
18
+ for (auto& pair : kendall) {
19
+ pair.second->clustDist0 = 0.0L;
20
+ pair.second->clustDist1 = 0.0L;
21
+ }
22
+ }
23
+
24
+ void finalizeKendall(KendallTree& kendall) {
25
+ for (auto& pair : kendall) {
26
+ delete pair.second;
27
+ }
28
+ kendall.clear();
29
+ }
30
+
31
+ /**
32
+ * Compute individual ASW scores for full distance matrix
33
+ */
34
+ void indiv_asw(const double* diss, const int* cluster, const double* weights,
35
+ int n, int nclusters, double* asw_i, double* asw_w) {
36
+
37
+ // Initialize output arrays
38
+ std::fill(asw_i, asw_i + n, std::numeric_limits<double>::quiet_NaN());
39
+ std::fill(asw_w, asw_w + n, std::numeric_limits<double>::quiet_NaN());
40
+
41
+ // Count cluster sizes and validate
42
+ std::vector<int> cluster_sizes(nclusters + 1, 0);
43
+ for (int i = 0; i < n; i++) {
44
+ if (cluster[i] >= 1 && cluster[i] <= nclusters) {
45
+ cluster_sizes[cluster[i]]++;
46
+ }
47
+ }
48
+
49
+ #pragma omp parallel for
50
+ for (int i = 0; i < n; i++) {
51
+ int ci = cluster[i];
52
+ if (ci < 1 || ci > nclusters || cluster_sizes[ci] <= 1) {
53
+ continue; // Skip singletons or invalid clusters
54
+ }
55
+
56
+ double a_i = 0.0; // Within-cluster average distance
57
+ double b_i = std::numeric_limits<double>::max(); // Min between-cluster average
58
+
59
+ // Calculate within-cluster average (a_i)
60
+ double sum_within = 0.0;
61
+ double weight_within = 0.0;
62
+
63
+ for (int j = 0; j < n; j++) {
64
+ if (i != j && cluster[j] == ci) {
65
+ double dist = diss[i * n + j];
66
+ sum_within += dist * weights[j];
67
+ weight_within += weights[j];
68
+ }
69
+ }
70
+
71
+ if (weight_within > 0) {
72
+ a_i = sum_within / weight_within;
73
+ }
74
+
75
+ // Calculate minimum between-cluster average (b_i)
76
+ for (int k = 1; k <= nclusters; k++) {
77
+ if (k == ci || cluster_sizes[k] == 0) continue;
78
+
79
+ double sum_between = 0.0;
80
+ double weight_between = 0.0;
81
+
82
+ for (int j = 0; j < n; j++) {
83
+ if (cluster[j] == k) {
84
+ double dist = diss[i * n + j];
85
+ sum_between += dist * weights[j];
86
+ weight_between += weights[j];
87
+ }
88
+ }
89
+
90
+ if (weight_between > 0) {
91
+ double avg_between = sum_between / weight_between;
92
+ b_i = std::min(b_i, avg_between);
93
+ }
94
+ }
95
+
96
+ // Calculate silhouette scores
97
+ if (b_i != std::numeric_limits<double>::max()) {
98
+ double max_ab = std::max(a_i, b_i);
99
+ if (max_ab > 0) {
100
+ asw_i[i] = (b_i - a_i) / max_ab;
101
+ asw_w[i] = asw_i[i]; // For individual scores, weighted = unweighted
102
+ } else {
103
+ asw_i[i] = 0.0;
104
+ asw_w[i] = 0.0;
105
+ }
106
+ }
107
+ }
108
+ }
109
+
110
+ /**
111
+ * Compute individual ASW scores for condensed distance array
112
+ */
113
+ void indiv_asw_dist(const double* diss, const int* cluster, const double* weights,
114
+ int n, int nclusters, double* asw_i, double* asw_w) {
115
+
116
+ // Initialize output arrays
117
+ std::fill(asw_i, asw_i + n, std::numeric_limits<double>::quiet_NaN());
118
+ std::fill(asw_w, asw_w + n, std::numeric_limits<double>::quiet_NaN());
119
+
120
+ // Count cluster sizes and validate
121
+ std::vector<int> cluster_sizes(nclusters + 1, 0);
122
+ for (int i = 0; i < n; i++) {
123
+ if (cluster[i] >= 1 && cluster[i] <= nclusters) {
124
+ cluster_sizes[cluster[i]]++;
125
+ }
126
+ }
127
+
128
+ #pragma omp parallel for
129
+ for (int i = 0; i < n; i++) {
130
+ int ci = cluster[i];
131
+ if (ci < 1 || ci > nclusters || cluster_sizes[ci] <= 1) {
132
+ continue; // Skip singletons or invalid clusters
133
+ }
134
+
135
+ double a_i = 0.0; // Within-cluster average distance
136
+ double b_i = std::numeric_limits<double>::max(); // Min between-cluster average
137
+
138
+ // Calculate within-cluster average (a_i)
139
+ double sum_within = 0.0;
140
+ double weight_within = 0.0;
141
+
142
+ for (int j = 0; j < n; j++) {
143
+ if (i != j && cluster[j] == ci) {
144
+ double dist = getDistanceFromCondensed(diss, i, j, n);
145
+ sum_within += dist * weights[j];
146
+ weight_within += weights[j];
147
+ }
148
+ }
149
+
150
+ if (weight_within > 0) {
151
+ a_i = sum_within / weight_within;
152
+ }
153
+
154
+ // Calculate minimum between-cluster average (b_i)
155
+ for (int k = 1; k <= nclusters; k++) {
156
+ if (k == ci || cluster_sizes[k] == 0) continue;
157
+
158
+ double sum_between = 0.0;
159
+ double weight_between = 0.0;
160
+
161
+ for (int j = 0; j < n; j++) {
162
+ if (cluster[j] == k) {
163
+ double dist = getDistanceFromCondensed(diss, i, j, n);
164
+ sum_between += dist * weights[j];
165
+ weight_between += weights[j];
166
+ }
167
+ }
168
+
169
+ if (weight_between > 0) {
170
+ double avg_between = sum_between / weight_between;
171
+ b_i = std::min(b_i, avg_between);
172
+ }
173
+ }
174
+
175
+ // Calculate silhouette scores
176
+ if (b_i != std::numeric_limits<double>::max()) {
177
+ double max_ab = std::max(a_i, b_i);
178
+ if (max_ab > 0) {
179
+ asw_i[i] = (b_i - a_i) / max_ab;
180
+ asw_w[i] = asw_i[i]; // For individual scores, weighted = unweighted
181
+ } else {
182
+ asw_i[i] = 0.0;
183
+ asw_w[i] = 0.0;
184
+ }
185
+ }
186
+ }
187
+ }
188
+
189
+ /**
190
+ * Core function to compute all cluster quality indicators
191
+ * This follows the R implementation logic exactly
192
+ */
193
+ template<bool UseCondensed>
194
+ void compute_cluster_quality_core(const double* diss, const int* cluster, const double* weights,
195
+ int n, double* stats, int nclusters, double* asw,
196
+ KendallTree& kendall) {
197
+
198
+ // Initialize all statistics to NaN
199
+ std::fill(stats, stats + ClusterQualNumStat, std::numeric_limits<double>::quiet_NaN());
200
+ std::fill(asw, asw + 2 * nclusters, std::numeric_limits<double>::quiet_NaN());
201
+
202
+ // Validate input - return all NaN for invalid cases
203
+ if (n < 2 || nclusters < 1 || nclusters >= n) {
204
+ return;
205
+ }
206
+
207
+ // Count cluster sizes and compute total weight
208
+ std::vector<int> cluster_sizes(nclusters + 1, 0);
209
+ std::vector<double> cluster_weights(nclusters + 1, 0.0);
210
+ double total_weight = 0.0;
211
+
212
+ for (int i = 0; i < n; i++) {
213
+ if (cluster[i] >= 1 && cluster[i] <= nclusters) {
214
+ cluster_sizes[cluster[i]]++;
215
+ cluster_weights[cluster[i]] += weights[i];
216
+ }
217
+ total_weight += weights[i];
218
+ }
219
+
220
+ // Check for valid clustering - need at least 2 non-empty clusters
221
+ int valid_clusters = 0;
222
+ for (int c = 1; c <= nclusters; c++) {
223
+ if (cluster_sizes[c] > 0) valid_clusters++;
224
+ }
225
+ if (valid_clusters < 2) {
226
+ // All stats remain NaN for invalid clustering
227
+ return;
228
+ }
229
+
230
+ // ===== Compute ASW (both individual and weighted) =====
231
+ std::vector<double> asw_individual(n);
232
+ std::vector<double> asw_weighted(n);
233
+
234
+ if constexpr (UseCondensed) {
235
+ indiv_asw_dist(diss, cluster, weights, n, nclusters, asw_individual.data(), asw_weighted.data());
236
+ } else {
237
+ indiv_asw(diss, cluster, weights, n, nclusters, asw_individual.data(), asw_weighted.data());
238
+ }
239
+
240
+ // Aggregate ASW by cluster
241
+ std::vector<double> cluster_asw_sum(nclusters + 1, 0.0);
242
+ std::vector<double> cluster_asw_weight(nclusters + 1, 0.0);
243
+ std::vector<double> cluster_asw_weighted_sum(nclusters + 1, 0.0);
244
+
245
+ for (int i = 0; i < n; i++) {
246
+ int ci = cluster[i];
247
+ if (ci >= 1 && ci <= nclusters && !std::isnan(asw_individual[i])) {
248
+ cluster_asw_sum[ci] += asw_individual[i];
249
+ cluster_asw_weighted_sum[ci] += asw_weighted[i] * weights[i];
250
+ cluster_asw_weight[ci] += weights[i];
251
+ }
252
+ }
253
+
254
+ // Store cluster-level ASW
255
+ double global_asw = 0.0, global_asw_weighted = 0.0;
256
+ double global_weight = 0.0;
257
+ int global_count = 0;
258
+
259
+ for (int c = 1; c <= nclusters; c++) {
260
+ if (cluster_sizes[c] > 1) { // Only include clusters with more than 1 member for ASW calculation
261
+ // Count valid individuals in this cluster (those with non-NaN ASW)
262
+ int valid_individuals = 0;
263
+ for (int i = 0; i < n; i++) {
264
+ if (cluster[i] == c && !std::isnan(asw_individual[i])) {
265
+ valid_individuals++;
266
+ }
267
+ }
268
+
269
+ if (valid_individuals > 0) {
270
+ asw[2 * (c - 1)] = cluster_asw_sum[c] / valid_individuals; // Unweighted ASW
271
+ if (cluster_asw_weight[c] > 0) {
272
+ asw[2 * (c - 1) + 1] = cluster_asw_weighted_sum[c] / cluster_asw_weight[c]; // Weighted ASW
273
+ }
274
+
275
+ global_asw += cluster_asw_sum[c];
276
+ global_asw_weighted += cluster_asw_weighted_sum[c];
277
+ global_weight += cluster_asw_weight[c];
278
+ global_count += valid_individuals;
279
+ }
280
+ }
281
+ }
282
+
283
+ stats[ClusterQualASWi] = (global_count > 0) ? global_asw / global_count : 0.0;
284
+ stats[ClusterQualASWw] = (global_weight > 0) ? global_asw_weighted / global_weight : 0.0;
285
+
286
+ // ===== Compute R² (weighted) =====
287
+ long double D_bar = 0.0L; // Global weighted mean of distances
288
+ long double total_pair_weight = 0.0L;
289
+
290
+ // Calculate global weighted mean (using upper triangle only)
291
+ for (int i = 0; i < n - 1; i++) {
292
+ for (int j = i + 1; j < n; j++) {
293
+ double dist;
294
+ if constexpr (UseCondensed) {
295
+ dist = diss[getCondensedIndex(i, j, n)];
296
+ } else {
297
+ dist = diss[i * n + j];
298
+ }
299
+ long double pair_weight = static_cast<long double>(weights[i]) * weights[j];
300
+ D_bar += dist * pair_weight;
301
+ total_pair_weight += pair_weight;
302
+ }
303
+ }
304
+ if (total_pair_weight > 0) {
305
+ D_bar /= total_pair_weight;
306
+ }
307
+
308
+ // Calculate total sum of squares
309
+ long double total_ss = 0.0L;
310
+ for (int i = 0; i < n - 1; i++) {
311
+ for (int j = i + 1; j < n; j++) {
312
+ double dist;
313
+ if constexpr (UseCondensed) {
314
+ dist = diss[getCondensedIndex(i, j, n)];
315
+ } else {
316
+ dist = diss[i * n + j];
317
+ }
318
+ long double pair_weight = static_cast<long double>(weights[i]) * weights[j];
319
+ long double diff = dist - D_bar;
320
+ total_ss += pair_weight * diff * diff;
321
+ }
322
+ }
323
+
324
+ // Calculate within-cluster sum of squares
325
+ long double within_ss = 0.0L;
326
+ for (int c = 1; c <= nclusters; c++) {
327
+ if (cluster_sizes[c] < 2) continue;
328
+
329
+ // Get cluster members
330
+ std::vector<int> cluster_members;
331
+ for (int i = 0; i < n; i++) {
332
+ if (cluster[i] == c) {
333
+ cluster_members.push_back(i);
334
+ }
335
+ }
336
+
337
+ // Calculate cluster weighted mean
338
+ long double cluster_sum = 0.0L;
339
+ long double cluster_weight = 0.0L;
340
+ for (size_t ii = 0; ii < cluster_members.size() - 1; ii++) {
341
+ for (size_t jj = ii + 1; jj < cluster_members.size(); jj++) {
342
+ int i = cluster_members[ii];
343
+ int j = cluster_members[jj];
344
+ double dist;
345
+ if constexpr (UseCondensed) {
346
+ dist = diss[getCondensedIndex(i, j, n)];
347
+ } else {
348
+ dist = diss[i * n + j];
349
+ }
350
+ long double pair_weight = static_cast<long double>(weights[i]) * weights[j];
351
+ cluster_sum += dist * pair_weight;
352
+ cluster_weight += pair_weight;
353
+ }
354
+ }
355
+
356
+ if (cluster_weight > 0) {
357
+ long double cluster_mean = cluster_sum / cluster_weight;
358
+
359
+ // Add to within-cluster sum of squares
360
+ for (size_t ii = 0; ii < cluster_members.size() - 1; ii++) {
361
+ for (size_t jj = ii + 1; jj < cluster_members.size(); jj++) {
362
+ int i = cluster_members[ii];
363
+ int j = cluster_members[jj];
364
+ double dist;
365
+ if constexpr (UseCondensed) {
366
+ dist = diss[getCondensedIndex(i, j, n)];
367
+ } else {
368
+ dist = diss[i * n + j];
369
+ }
370
+ long double pair_weight = static_cast<long double>(weights[i]) * weights[j];
371
+ long double diff = dist - cluster_mean;
372
+ within_ss += pair_weight * diff * diff;
373
+ }
374
+ }
375
+ }
376
+ }
377
+
378
+ stats[ClusterQualR] = (total_ss > 0) ? static_cast<double>(1.0L - within_ss / total_ss) : 0.0;
379
+ stats[ClusterQualR2] = stats[ClusterQualR] * stats[ClusterQualR];
380
+
381
+ // ===== Compute Calinski-Harabasz =====
382
+ long double between_ss = total_ss - within_ss;
383
+ if (within_ss > 0 && nclusters > 1 && n > nclusters) {
384
+ long double f_stat = (between_ss / (nclusters - 1)) / (within_ss / (n - nclusters));
385
+ stats[ClusterQualF] = static_cast<double>(f_stat);
386
+ stats[ClusterQualF2] = stats[ClusterQualF] * stats[ClusterQualF];
387
+ }
388
+
389
+ // ===== Compute HPG (weighted point-biserial correlation) =====
390
+ {
391
+ long double sum_w = 0.0L; // Σ wij
392
+ long double sum_xw = 0.0L; // Σ wij * d_ij
393
+ long double sum_yw = 0.0L; // Σ wij * y_ij
394
+ long double sum_x2w= 0.0L; // Σ wij * d_ij^2
395
+ long double sum_y2w= 0.0L; // Σ wij * y_ij^2 (y^2==y 因为 y∈{0,1})
396
+ long double sum_xyw= 0.0L; // Σ wij * d_ij * y_ij
397
+
398
+ for (int i = 0; i < n - 1; ++i) {
399
+ for (int j = i + 1; j < n; ++j) {
400
+ const double wij = weights[i] * weights[j];
401
+ if (wij <= 0) continue;
402
+ const double dij = (UseCondensed ? diss[getCondensedIndex(i,j,n)]
403
+ : diss[i*n + j]);
404
+ const double yij = (cluster[i] == cluster[j]) ? 1.0 : 0.0;
405
+
406
+ sum_w += wij;
407
+ sum_xw += wij * dij;
408
+ sum_yw += wij * yij;
409
+ sum_x2w += wij * dij * dij;
410
+ sum_y2w += wij * yij; // yij^2 == yij
411
+ sum_xyw += wij * dij * yij;
412
+ }
413
+ }
414
+
415
+ if (sum_w > 0) {
416
+ const long double mx = sum_xw / sum_w;
417
+ const long double my = sum_yw / sum_w;
418
+ const long double cov_xy = (sum_xyw / sum_w) - mx * my;
419
+ const long double var_x = (sum_x2w / sum_w) - mx * mx;
420
+ const long double var_y = (sum_y2w / sum_w) - my * my;
421
+
422
+ if (var_x > 0 && var_y > 0) {
423
+ stats[ClusterQualHPG] = static_cast<double>(cov_xy / std::sqrt(var_x * var_y));
424
+ }
425
+ }
426
+ }
427
+
428
+ // ===== Compute HG and HGSD (Hubert's Gamma) =====
429
+ // Based on R WeightedCluster implementation - correct Kendall tau calculation
430
+
431
+ // Reset Kendall tree
432
+ for (auto& pair : kendall) {
433
+ pair.second->clustDist0 = 0.0L;
434
+ pair.second->clustDist1 = 0.0L;
435
+ }
436
+
437
+ // Build distance groups with cluster memberships
438
+ for (int i = 0; i < n - 1; i++) {
439
+ for (int j = i + 1; j < n; j++) {
440
+ double dist_ij;
441
+ if constexpr (UseCondensed) {
442
+ dist_ij = diss[getCondensedIndex(i, j, n)];
443
+ } else {
444
+ dist_ij = diss[i * n + j];
445
+ }
446
+
447
+ // Get or create entry in Kendall tree
448
+ auto it = kendall.find(dist_ij);
449
+ CmpCluster* cmp;
450
+ if (it == kendall.end()) {
451
+ cmp = new CmpCluster();
452
+ kendall[dist_ij] = cmp;
453
+ } else {
454
+ cmp = it->second;
455
+ }
456
+
457
+ // Count pairs: clustDist1 = same cluster, clustDist0 = different clusters
458
+ long double weight_pair = static_cast<long double>(weights[i]) * weights[j];
459
+ if (cluster[i] == cluster[j]) {
460
+ cmp->clustDist1 += weight_pair;
461
+ } else {
462
+ cmp->clustDist0 += weight_pair;
463
+ }
464
+ }
465
+ }
466
+
467
+ // Calculate Kendall's tau (Gamma) from the tree
468
+ long double gamma_concordant = 0.0L;
469
+ long double gamma_discordant = 0.0L;
470
+
471
+ for (auto it1 = kendall.begin(); it1 != kendall.end(); ++it1) {
472
+ for (auto it2 = std::next(it1); it2 != kendall.end(); ++it2) {
473
+ double d1 = it1->first;
474
+ double d2 = it2->first;
475
+ CmpCluster* cmp1 = it1->second;
476
+ CmpCluster* cmp2 = it2->second;
477
+
478
+ if (d1 < d2) {
479
+ // For distances d1 < d2, we expect same-cluster pairs to be more common at d1
480
+ // Concordant: more same-cluster pairs at smaller distance
481
+ gamma_concordant += cmp1->clustDist1 * cmp2->clustDist0;
482
+ // Discordant: more different-cluster pairs at smaller distance
483
+ gamma_discordant += cmp1->clustDist0 * cmp2->clustDist1;
484
+ }
485
+ }
486
+ }
487
+
488
+ long double gamma_total = gamma_concordant + gamma_discordant;
489
+ if (gamma_total > 0) {
490
+ stats[ClusterQualHG] = static_cast<double>((gamma_concordant - gamma_discordant) / gamma_total);
491
+ }
492
+
493
+ // HGSD: Temporarily set to NaN until exact R formula is ported
494
+ stats[ClusterQualHGSD] = std::numeric_limits<double>::quiet_NaN();
495
+
496
+ // ===== Compute HC (Hierarchical Criterion) =====
497
+ // This is a simplified version - the full implementation would need the dendrogram
498
+ std::vector<double> cluster_means(nclusters + 1, 0.0);
499
+ for (int c = 1; c <= nclusters; c++) {
500
+ if (cluster_sizes[c] > 0) {
501
+ // Calculate mean within-cluster distance
502
+ double sum_dist = 0.0;
503
+ int count = 0;
504
+ for (int i = 0; i < n; i++) {
505
+ if (cluster[i] == c) {
506
+ for (int j = i + 1; j < n; j++) {
507
+ if (cluster[j] == c) {
508
+ double dist;
509
+ if constexpr (UseCondensed) {
510
+ dist = diss[getCondensedIndex(i, j, n)];
511
+ } else {
512
+ dist = diss[i * n + j];
513
+ }
514
+ sum_dist += dist;
515
+ count++;
516
+ }
517
+ }
518
+ }
519
+ }
520
+ cluster_means[c] = (count > 0) ? sum_dist / count : 0.0;
521
+ }
522
+ }
523
+
524
+ double mean_of_means = 0.0;
525
+ int valid_mean_count = 0;
526
+ for (int c = 1; c <= nclusters; c++) {
527
+ if (cluster_sizes[c] > 0) {
528
+ mean_of_means += cluster_means[c];
529
+ valid_mean_count++;
530
+ }
531
+ }
532
+ mean_of_means /= valid_mean_count;
533
+
534
+ double variance = 0.0;
535
+ for (int c = 1; c <= nclusters; c++) {
536
+ if (cluster_sizes[c] > 0) {
537
+ variance += (cluster_means[c] - mean_of_means) * (cluster_means[c] - mean_of_means);
538
+ }
539
+ }
540
+ // HC: Temporarily set to NaN until exact R formula is ported
541
+ stats[ClusterQualHC] = std::numeric_limits<double>::quiet_NaN();
542
+ }
543
+
544
+ // Template instantiations
545
+ void clusterquality(const double* diss, const int* cluster, const double* weights,
546
+ int n, double* stats, int nclusters, double* asw,
547
+ KendallTree& kendall) {
548
+ compute_cluster_quality_core<false>(diss, cluster, weights, n, stats, nclusters, asw, kendall);
549
+ }
550
+
551
+ void clusterquality_dist(const double* diss, const int* cluster, const double* weights,
552
+ int n, double* stats, int nclusters, double* asw,
553
+ KendallTree& kendall) {
554
+ compute_cluster_quality_core<true>(diss, cluster, weights, n, stats, nclusters, asw, kendall);
555
+ }
556
+
557
+ // Simplified versions (subset of statistics)
558
+ void clusterqualitySimple(const double* diss, const int* cluster, const double* weights,
559
+ int n, double* stats, int nclusters, double* asw) {
560
+ KendallTree kendall; // Local Kendall tree for simple version
561
+ clusterquality(diss, cluster, weights, n, stats, nclusters, asw, kendall);
562
+ finalizeKendall(kendall);
563
+ }
564
+
565
+ void clusterqualitySimple_dist(const double* diss, const int* cluster, const double* weights,
566
+ int n, double* stats, int nclusters, double* asw) {
567
+ KendallTree kendall; // Local Kendall tree for simple version
568
+ clusterquality_dist(diss, cluster, weights, n, stats, nclusters, asw, kendall);
569
+ finalizeKendall(kendall);
570
+ }