sequenzo 0.1.31__cp310-cp310-macosx_10_9_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (299) hide show
  1. _sequenzo_fastcluster.cpython-310-darwin.so +0 -0
  2. sequenzo/__init__.py +349 -0
  3. sequenzo/big_data/__init__.py +12 -0
  4. sequenzo/big_data/clara/__init__.py +26 -0
  5. sequenzo/big_data/clara/clara.py +476 -0
  6. sequenzo/big_data/clara/utils/__init__.py +27 -0
  7. sequenzo/big_data/clara/utils/aggregatecases.py +92 -0
  8. sequenzo/big_data/clara/utils/davies_bouldin.py +91 -0
  9. sequenzo/big_data/clara/utils/get_weighted_diss.cpython-310-darwin.so +0 -0
  10. sequenzo/big_data/clara/utils/wfcmdd.py +205 -0
  11. sequenzo/big_data/clara/visualization.py +88 -0
  12. sequenzo/clustering/KMedoids.py +178 -0
  13. sequenzo/clustering/__init__.py +30 -0
  14. sequenzo/clustering/clustering_c_code.cpython-310-darwin.so +0 -0
  15. sequenzo/clustering/hierarchical_clustering.py +1256 -0
  16. sequenzo/clustering/sequenzo_fastcluster/fastcluster.py +495 -0
  17. sequenzo/clustering/sequenzo_fastcluster/src/fastcluster.cpp +1877 -0
  18. sequenzo/clustering/sequenzo_fastcluster/src/fastcluster_python.cpp +1264 -0
  19. sequenzo/clustering/src/KMedoid.cpp +263 -0
  20. sequenzo/clustering/src/PAM.cpp +237 -0
  21. sequenzo/clustering/src/PAMonce.cpp +265 -0
  22. sequenzo/clustering/src/cluster_quality.cpp +496 -0
  23. sequenzo/clustering/src/cluster_quality.h +128 -0
  24. sequenzo/clustering/src/cluster_quality_backup.cpp +570 -0
  25. sequenzo/clustering/src/module.cpp +228 -0
  26. sequenzo/clustering/src/weightedinertia.cpp +111 -0
  27. sequenzo/clustering/utils/__init__.py +27 -0
  28. sequenzo/clustering/utils/disscenter.py +122 -0
  29. sequenzo/data_preprocessing/__init__.py +22 -0
  30. sequenzo/data_preprocessing/helpers.py +303 -0
  31. sequenzo/datasets/__init__.py +41 -0
  32. sequenzo/datasets/biofam.csv +2001 -0
  33. sequenzo/datasets/biofam_child_domain.csv +2001 -0
  34. sequenzo/datasets/biofam_left_domain.csv +2001 -0
  35. sequenzo/datasets/biofam_married_domain.csv +2001 -0
  36. sequenzo/datasets/chinese_colonial_territories.csv +12 -0
  37. sequenzo/datasets/country_co2_emissions.csv +194 -0
  38. sequenzo/datasets/country_co2_emissions_global_deciles.csv +195 -0
  39. sequenzo/datasets/country_co2_emissions_global_quintiles.csv +195 -0
  40. sequenzo/datasets/country_co2_emissions_local_deciles.csv +195 -0
  41. sequenzo/datasets/country_co2_emissions_local_quintiles.csv +195 -0
  42. sequenzo/datasets/country_gdp_per_capita.csv +194 -0
  43. sequenzo/datasets/dyadic_children.csv +61 -0
  44. sequenzo/datasets/dyadic_parents.csv +61 -0
  45. sequenzo/datasets/mvad.csv +713 -0
  46. sequenzo/datasets/pairfam_activity_by_month.csv +1028 -0
  47. sequenzo/datasets/pairfam_activity_by_year.csv +1028 -0
  48. sequenzo/datasets/pairfam_family_by_month.csv +1028 -0
  49. sequenzo/datasets/pairfam_family_by_year.csv +1028 -0
  50. sequenzo/datasets/political_science_aid_shock.csv +166 -0
  51. sequenzo/datasets/political_science_donor_fragmentation.csv +157 -0
  52. sequenzo/define_sequence_data.py +1400 -0
  53. sequenzo/dissimilarity_measures/__init__.py +31 -0
  54. sequenzo/dissimilarity_measures/c_code.cpython-310-darwin.so +0 -0
  55. sequenzo/dissimilarity_measures/get_distance_matrix.py +762 -0
  56. sequenzo/dissimilarity_measures/get_substitution_cost_matrix.py +246 -0
  57. sequenzo/dissimilarity_measures/src/DHDdistance.cpp +148 -0
  58. sequenzo/dissimilarity_measures/src/LCPdistance.cpp +114 -0
  59. sequenzo/dissimilarity_measures/src/LCPspellDistance.cpp +215 -0
  60. sequenzo/dissimilarity_measures/src/OMdistance.cpp +247 -0
  61. sequenzo/dissimilarity_measures/src/OMspellDistance.cpp +281 -0
  62. sequenzo/dissimilarity_measures/src/__init__.py +0 -0
  63. sequenzo/dissimilarity_measures/src/dist2matrix.cpp +63 -0
  64. sequenzo/dissimilarity_measures/src/dp_utils.h +160 -0
  65. sequenzo/dissimilarity_measures/src/module.cpp +40 -0
  66. sequenzo/dissimilarity_measures/src/setup.py +30 -0
  67. sequenzo/dissimilarity_measures/src/utils.h +25 -0
  68. sequenzo/dissimilarity_measures/src/xsimd/.github/cmake-test/main.cpp +6 -0
  69. sequenzo/dissimilarity_measures/src/xsimd/benchmark/main.cpp +159 -0
  70. sequenzo/dissimilarity_measures/src/xsimd/benchmark/xsimd_benchmark.hpp +565 -0
  71. sequenzo/dissimilarity_measures/src/xsimd/docs/source/conf.py +37 -0
  72. sequenzo/dissimilarity_measures/src/xsimd/examples/mandelbrot.cpp +330 -0
  73. sequenzo/dissimilarity_measures/src/xsimd/examples/pico_bench.hpp +246 -0
  74. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_arithmetic.hpp +266 -0
  75. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_complex.hpp +112 -0
  76. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_details.hpp +323 -0
  77. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_logical.hpp +218 -0
  78. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_math.hpp +2583 -0
  79. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_memory.hpp +880 -0
  80. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_rounding.hpp +72 -0
  81. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_swizzle.hpp +174 -0
  82. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_trigo.hpp +978 -0
  83. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx.hpp +1924 -0
  84. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx2.hpp +1144 -0
  85. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512bw.hpp +656 -0
  86. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512cd.hpp +28 -0
  87. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512dq.hpp +244 -0
  88. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512er.hpp +20 -0
  89. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512f.hpp +2650 -0
  90. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512ifma.hpp +20 -0
  91. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512pf.hpp +20 -0
  92. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vbmi.hpp +77 -0
  93. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vbmi2.hpp +131 -0
  94. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vnni_avx512bw.hpp +20 -0
  95. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vnni_avx512vbmi2.hpp +20 -0
  96. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avxvnni.hpp +20 -0
  97. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_common.hpp +24 -0
  98. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_common_fwd.hpp +77 -0
  99. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_constants.hpp +393 -0
  100. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_emulated.hpp +788 -0
  101. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma3_avx.hpp +93 -0
  102. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma3_avx2.hpp +46 -0
  103. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma3_sse.hpp +97 -0
  104. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma4.hpp +92 -0
  105. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_i8mm_neon64.hpp +17 -0
  106. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_isa.hpp +142 -0
  107. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_neon.hpp +3142 -0
  108. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_neon64.hpp +1543 -0
  109. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_rvv.hpp +1513 -0
  110. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_scalar.hpp +1260 -0
  111. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse2.hpp +2024 -0
  112. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse3.hpp +67 -0
  113. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse4_1.hpp +339 -0
  114. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse4_2.hpp +44 -0
  115. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_ssse3.hpp +186 -0
  116. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sve.hpp +1155 -0
  117. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_vsx.hpp +892 -0
  118. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_wasm.hpp +1780 -0
  119. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_arch.hpp +240 -0
  120. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_config.hpp +484 -0
  121. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_cpuid.hpp +269 -0
  122. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_inline.hpp +27 -0
  123. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/math/xsimd_rem_pio2.hpp +719 -0
  124. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/memory/xsimd_aligned_allocator.hpp +349 -0
  125. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/memory/xsimd_alignment.hpp +91 -0
  126. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_all_registers.hpp +55 -0
  127. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_api.hpp +2765 -0
  128. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx2_register.hpp +44 -0
  129. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512bw_register.hpp +51 -0
  130. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512cd_register.hpp +51 -0
  131. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512dq_register.hpp +51 -0
  132. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512er_register.hpp +51 -0
  133. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512f_register.hpp +77 -0
  134. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512ifma_register.hpp +51 -0
  135. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512pf_register.hpp +51 -0
  136. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512vbmi2_register.hpp +51 -0
  137. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512vbmi_register.hpp +51 -0
  138. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512vnni_avx512bw_register.hpp +54 -0
  139. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512vnni_avx512vbmi2_register.hpp +53 -0
  140. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx_register.hpp +64 -0
  141. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avxvnni_register.hpp +44 -0
  142. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_batch.hpp +1524 -0
  143. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_batch_constant.hpp +300 -0
  144. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_common_arch.hpp +47 -0
  145. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_emulated_register.hpp +80 -0
  146. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_fma3_avx2_register.hpp +50 -0
  147. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_fma3_avx_register.hpp +50 -0
  148. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_fma3_sse_register.hpp +50 -0
  149. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_fma4_register.hpp +50 -0
  150. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_i8mm_neon64_register.hpp +55 -0
  151. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_neon64_register.hpp +55 -0
  152. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_neon_register.hpp +154 -0
  153. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_register.hpp +94 -0
  154. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_rvv_register.hpp +506 -0
  155. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sse2_register.hpp +59 -0
  156. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sse3_register.hpp +49 -0
  157. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sse4_1_register.hpp +48 -0
  158. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sse4_2_register.hpp +48 -0
  159. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_ssse3_register.hpp +48 -0
  160. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sve_register.hpp +156 -0
  161. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_traits.hpp +337 -0
  162. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_utils.hpp +536 -0
  163. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_vsx_register.hpp +77 -0
  164. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_wasm_register.hpp +59 -0
  165. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/xsimd.hpp +75 -0
  166. sequenzo/dissimilarity_measures/src/xsimd/test/architectures/dummy.cpp +7 -0
  167. sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set.cpp +13 -0
  168. sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set_mean.cpp +24 -0
  169. sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set_mean_aligned.cpp +25 -0
  170. sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set_mean_arch_independent.cpp +28 -0
  171. sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set_mean_tag_dispatch.cpp +25 -0
  172. sequenzo/dissimilarity_measures/src/xsimd/test/doc/manipulating_abstract_batches.cpp +7 -0
  173. sequenzo/dissimilarity_measures/src/xsimd/test/doc/manipulating_parametric_batches.cpp +8 -0
  174. sequenzo/dissimilarity_measures/src/xsimd/test/doc/sum.hpp +31 -0
  175. sequenzo/dissimilarity_measures/src/xsimd/test/doc/sum_avx2.cpp +3 -0
  176. sequenzo/dissimilarity_measures/src/xsimd/test/doc/sum_sse2.cpp +3 -0
  177. sequenzo/dissimilarity_measures/src/xsimd/test/doc/writing_vectorized_code.cpp +11 -0
  178. sequenzo/dissimilarity_measures/src/xsimd/test/main.cpp +31 -0
  179. sequenzo/dissimilarity_measures/src/xsimd/test/test_api.cpp +230 -0
  180. sequenzo/dissimilarity_measures/src/xsimd/test/test_arch.cpp +217 -0
  181. sequenzo/dissimilarity_measures/src/xsimd/test/test_basic_math.cpp +183 -0
  182. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch.cpp +1049 -0
  183. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_bool.cpp +508 -0
  184. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_cast.cpp +409 -0
  185. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_complex.cpp +712 -0
  186. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_constant.cpp +286 -0
  187. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_float.cpp +141 -0
  188. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_int.cpp +365 -0
  189. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_manip.cpp +308 -0
  190. sequenzo/dissimilarity_measures/src/xsimd/test/test_bitwise_cast.cpp +222 -0
  191. sequenzo/dissimilarity_measures/src/xsimd/test/test_complex_exponential.cpp +226 -0
  192. sequenzo/dissimilarity_measures/src/xsimd/test/test_complex_hyperbolic.cpp +183 -0
  193. sequenzo/dissimilarity_measures/src/xsimd/test/test_complex_power.cpp +265 -0
  194. sequenzo/dissimilarity_measures/src/xsimd/test/test_complex_trigonometric.cpp +236 -0
  195. sequenzo/dissimilarity_measures/src/xsimd/test/test_conversion.cpp +248 -0
  196. sequenzo/dissimilarity_measures/src/xsimd/test/test_custom_default_arch.cpp +28 -0
  197. sequenzo/dissimilarity_measures/src/xsimd/test/test_error_gamma.cpp +170 -0
  198. sequenzo/dissimilarity_measures/src/xsimd/test/test_explicit_batch_instantiation.cpp +32 -0
  199. sequenzo/dissimilarity_measures/src/xsimd/test/test_exponential.cpp +202 -0
  200. sequenzo/dissimilarity_measures/src/xsimd/test/test_extract_pair.cpp +92 -0
  201. sequenzo/dissimilarity_measures/src/xsimd/test/test_fp_manipulation.cpp +77 -0
  202. sequenzo/dissimilarity_measures/src/xsimd/test/test_gnu_source.cpp +30 -0
  203. sequenzo/dissimilarity_measures/src/xsimd/test/test_hyperbolic.cpp +167 -0
  204. sequenzo/dissimilarity_measures/src/xsimd/test/test_load_store.cpp +304 -0
  205. sequenzo/dissimilarity_measures/src/xsimd/test/test_memory.cpp +61 -0
  206. sequenzo/dissimilarity_measures/src/xsimd/test/test_poly_evaluation.cpp +64 -0
  207. sequenzo/dissimilarity_measures/src/xsimd/test/test_power.cpp +184 -0
  208. sequenzo/dissimilarity_measures/src/xsimd/test/test_rounding.cpp +199 -0
  209. sequenzo/dissimilarity_measures/src/xsimd/test/test_select.cpp +101 -0
  210. sequenzo/dissimilarity_measures/src/xsimd/test/test_shuffle.cpp +760 -0
  211. sequenzo/dissimilarity_measures/src/xsimd/test/test_sum.cpp +4 -0
  212. sequenzo/dissimilarity_measures/src/xsimd/test/test_sum.hpp +34 -0
  213. sequenzo/dissimilarity_measures/src/xsimd/test/test_traits.cpp +172 -0
  214. sequenzo/dissimilarity_measures/src/xsimd/test/test_trigonometric.cpp +208 -0
  215. sequenzo/dissimilarity_measures/src/xsimd/test/test_utils.hpp +611 -0
  216. sequenzo/dissimilarity_measures/src/xsimd/test/test_wasm/test_wasm_playwright.py +123 -0
  217. sequenzo/dissimilarity_measures/src/xsimd/test/test_xsimd_api.cpp +1460 -0
  218. sequenzo/dissimilarity_measures/utils/__init__.py +16 -0
  219. sequenzo/dissimilarity_measures/utils/get_LCP_length_for_2_seq.py +44 -0
  220. sequenzo/dissimilarity_measures/utils/get_sm_trate_substitution_cost_matrix.cpython-310-darwin.so +0 -0
  221. sequenzo/dissimilarity_measures/utils/seqconc.cpython-310-darwin.so +0 -0
  222. sequenzo/dissimilarity_measures/utils/seqdss.cpython-310-darwin.so +0 -0
  223. sequenzo/dissimilarity_measures/utils/seqdur.cpython-310-darwin.so +0 -0
  224. sequenzo/dissimilarity_measures/utils/seqlength.cpython-310-darwin.so +0 -0
  225. sequenzo/multidomain/__init__.py +23 -0
  226. sequenzo/multidomain/association_between_domains.py +311 -0
  227. sequenzo/multidomain/cat.py +597 -0
  228. sequenzo/multidomain/combt.py +519 -0
  229. sequenzo/multidomain/dat.py +81 -0
  230. sequenzo/multidomain/idcd.py +139 -0
  231. sequenzo/multidomain/linked_polyad.py +292 -0
  232. sequenzo/openmp_setup.py +233 -0
  233. sequenzo/prefix_tree/__init__.py +62 -0
  234. sequenzo/prefix_tree/hub.py +114 -0
  235. sequenzo/prefix_tree/individual_level_indicators.py +1321 -0
  236. sequenzo/prefix_tree/spell_individual_level_indicators.py +580 -0
  237. sequenzo/prefix_tree/spell_level_indicators.py +297 -0
  238. sequenzo/prefix_tree/system_level_indicators.py +544 -0
  239. sequenzo/prefix_tree/utils.py +54 -0
  240. sequenzo/seqhmm/__init__.py +95 -0
  241. sequenzo/seqhmm/advanced_optimization.py +305 -0
  242. sequenzo/seqhmm/bootstrap.py +411 -0
  243. sequenzo/seqhmm/build_hmm.py +142 -0
  244. sequenzo/seqhmm/build_mhmm.py +136 -0
  245. sequenzo/seqhmm/build_nhmm.py +121 -0
  246. sequenzo/seqhmm/fit_mhmm.py +62 -0
  247. sequenzo/seqhmm/fit_model.py +61 -0
  248. sequenzo/seqhmm/fit_nhmm.py +76 -0
  249. sequenzo/seqhmm/formulas.py +289 -0
  250. sequenzo/seqhmm/forward_backward_nhmm.py +276 -0
  251. sequenzo/seqhmm/gradients_nhmm.py +306 -0
  252. sequenzo/seqhmm/hmm.py +291 -0
  253. sequenzo/seqhmm/mhmm.py +314 -0
  254. sequenzo/seqhmm/model_comparison.py +238 -0
  255. sequenzo/seqhmm/multichannel_em.py +282 -0
  256. sequenzo/seqhmm/multichannel_utils.py +138 -0
  257. sequenzo/seqhmm/nhmm.py +270 -0
  258. sequenzo/seqhmm/nhmm_utils.py +191 -0
  259. sequenzo/seqhmm/predict.py +137 -0
  260. sequenzo/seqhmm/predict_mhmm.py +142 -0
  261. sequenzo/seqhmm/simulate.py +878 -0
  262. sequenzo/seqhmm/utils.py +218 -0
  263. sequenzo/seqhmm/visualization.py +910 -0
  264. sequenzo/sequence_characteristics/__init__.py +40 -0
  265. sequenzo/sequence_characteristics/complexity_index.py +49 -0
  266. sequenzo/sequence_characteristics/overall_cross_sectional_entropy.py +220 -0
  267. sequenzo/sequence_characteristics/plot_characteristics.py +593 -0
  268. sequenzo/sequence_characteristics/simple_characteristics.py +311 -0
  269. sequenzo/sequence_characteristics/state_frequencies_and_entropy_per_sequence.py +39 -0
  270. sequenzo/sequence_characteristics/turbulence.py +155 -0
  271. sequenzo/sequence_characteristics/variance_of_spell_durations.py +86 -0
  272. sequenzo/sequence_characteristics/within_sequence_entropy.py +43 -0
  273. sequenzo/suffix_tree/__init__.py +66 -0
  274. sequenzo/suffix_tree/hub.py +114 -0
  275. sequenzo/suffix_tree/individual_level_indicators.py +1679 -0
  276. sequenzo/suffix_tree/spell_individual_level_indicators.py +493 -0
  277. sequenzo/suffix_tree/spell_level_indicators.py +248 -0
  278. sequenzo/suffix_tree/system_level_indicators.py +535 -0
  279. sequenzo/suffix_tree/utils.py +56 -0
  280. sequenzo/version_check.py +283 -0
  281. sequenzo/visualization/__init__.py +29 -0
  282. sequenzo/visualization/plot_mean_time.py +222 -0
  283. sequenzo/visualization/plot_modal_state.py +276 -0
  284. sequenzo/visualization/plot_most_frequent_sequences.py +147 -0
  285. sequenzo/visualization/plot_relative_frequency.py +405 -0
  286. sequenzo/visualization/plot_sequence_index.py +1175 -0
  287. sequenzo/visualization/plot_single_medoid.py +153 -0
  288. sequenzo/visualization/plot_state_distribution.py +651 -0
  289. sequenzo/visualization/plot_transition_matrix.py +190 -0
  290. sequenzo/visualization/utils/__init__.py +23 -0
  291. sequenzo/visualization/utils/utils.py +310 -0
  292. sequenzo/with_event_history_analysis/__init__.py +35 -0
  293. sequenzo/with_event_history_analysis/sequence_analysis_multi_state_model.py +850 -0
  294. sequenzo/with_event_history_analysis/sequence_history_analysis.py +283 -0
  295. sequenzo-0.1.31.dist-info/METADATA +286 -0
  296. sequenzo-0.1.31.dist-info/RECORD +299 -0
  297. sequenzo-0.1.31.dist-info/WHEEL +5 -0
  298. sequenzo-0.1.31.dist-info/licenses/LICENSE +28 -0
  299. sequenzo-0.1.31.dist-info/top_level.txt +2 -0
@@ -0,0 +1,580 @@
1
+ """
2
+ Spell-based Prefix Tree: Individual-level divergence indicators.
3
+
4
+ Provides per-sequence (per-individual) rarity and divergence measures when the
5
+ unit of analysis is SPELL rather than time index. Each "level" is one spell;
6
+ rarity and divergence are defined along spell levels (1st spell, 2nd spell, ...).
7
+ Variable-length sequences are supported: individuals with fewer spells have NaN
8
+ at spell levels beyond their length.
9
+
10
+ Design mirrors: sequenzo/prefix_tree/individual_level_indicators.py (position-based).
11
+ - Position version: level = time index t, prefix = states up to year t.
12
+ - Spell version: level = spell index k, prefix = states of first k spells.
13
+
14
+ Usage:
15
+ from sequenzo.prefix_tree import build_spell_prefix_tree
16
+ from sequenzo.prefix_tree.spell_individual_level_indicators import SpellIndividualDivergence
17
+
18
+ tree = build_spell_prefix_tree(seqdata, expcost=0)
19
+ ind = SpellIndividualDivergence(tree)
20
+ rarity_per_spell = ind.compute_prefix_rarity_per_spell()
21
+ diverged = ind.compute_diverged(method="zscore", z_threshold=1.5)
22
+
23
+ @Author : Yuqi Liang 梁彧祺
24
+ @File : spell_individual_level_indicators.py
25
+ @Time : 2026/1/30
26
+ @Desc : Individual-level indicators for spell-based prefix tree analysis.
27
+ """
28
+ from typing import Any, Dict, List, Optional
29
+
30
+ import numpy as np
31
+ import pandas as pd
32
+
33
+ from .spell_level_indicators import SpellPrefixTree
34
+
35
+
36
+ # Small constant to avoid log(0) in rarity computation
37
+ _EPS = 1e-10
38
+
39
+
40
+ class SpellIndividualDivergence:
41
+ """
42
+ Individual-level divergence and rarity for spell-based prefix trees.
43
+
44
+ Requires a SpellPrefixTree that was built with build_spell_prefix_tree(seqdata, ...),
45
+ so that tree._spell_states and tree._spell_durations exist and tree.counts /
46
+ tree.total_sequences are populated.
47
+ """
48
+
49
+ def __init__(self, tree: SpellPrefixTree):
50
+ if not isinstance(tree, SpellPrefixTree):
51
+ raise TypeError(
52
+ "[!] SpellIndividualDivergence requires a SpellPrefixTree. "
53
+ "Use: build_spell_prefix_tree(seqdata) then SpellIndividualDivergence(tree)"
54
+ )
55
+ if not hasattr(tree, "_spell_states") or not hasattr(tree, "_spell_durations"):
56
+ raise ValueError(
57
+ "[!] SpellPrefixTree must be built with build_spell_prefix_tree(seqdata) "
58
+ "so that _spell_states and _spell_durations are attached."
59
+ )
60
+ self.tree = tree
61
+ self.spell_states = tree._spell_states
62
+ self.spell_durations = tree._spell_durations
63
+ self.N = tree.total_sequences
64
+ self.max_spells = max(len(s) for s in self.spell_states) if self.spell_states else 0
65
+
66
+ def _build_rarity_matrix(self) -> np.ndarray:
67
+ """
68
+ Build (N, max_spells) matrix of prefix rarity at each spell level.
69
+ rarity_{i,k} = -log( freq(prefix_{i,k}) / N ).
70
+ Cells where individual i has no spell at level k are set to np.nan.
71
+ """
72
+ N, max_spells = self.N, self.max_spells
73
+ counts = self.tree.counts
74
+ rarity = np.full((N, max_spells), np.nan, dtype=float)
75
+ for i, states_i in enumerate(self.spell_states):
76
+ prefix = []
77
+ for k, state in enumerate(states_i):
78
+ prefix.append(state)
79
+ key = tuple(prefix)
80
+ freq = counts.get(key, 0) / max(N, 1)
81
+ rarity[i, k] = -np.log(freq + _EPS)
82
+ return rarity
83
+
84
+ def compute_prefix_rarity_per_spell(
85
+ self,
86
+ as_dataframe: bool = True,
87
+ column_prefix: str = "k",
88
+ zscore: bool = False,
89
+ ):
90
+ """
91
+ Compute per-spell-level prefix rarity for each individual.
92
+
93
+ For each individual i and spell level k (1..max_spells),
94
+ rarity_{i,k} = -log( freq(prefix_{i,k}) / N )
95
+ where prefix_{i,k} is the state sequence of the first k spells for individual i,
96
+ freq(prefix) is how many individuals share that exact spell prefix, and N is total count.
97
+ Levels beyond an individual's spell length are NaN.
98
+
99
+ Parameters
100
+ ----------
101
+ as_dataframe : bool, default True
102
+ If True, returns a pandas DataFrame with columns "k1", "k2", ... .
103
+ If False, returns a NumPy array of shape (N, max_spells).
104
+ column_prefix : str, default "k"
105
+ Column name prefix when returning a DataFrame (e.g. "k1", "k2").
106
+ zscore : bool, default False
107
+ If True, z-standardize rarity column-wise (by spell level) using sample std (ddof=1).
108
+ NaN entries are ignored in mean/std per column.
109
+
110
+ Returns
111
+ -------
112
+ pandas.DataFrame or np.ndarray
113
+ Per-spell-level rarity (optionally z-scored). NaN where no spell at that level.
114
+ """
115
+ rarity = self._build_rarity_matrix()
116
+ if zscore:
117
+ col_means = np.nanmean(rarity, axis=0)
118
+ col_stds = np.nanstd(rarity, axis=0, ddof=1)
119
+ with np.errstate(invalid="ignore", divide="ignore"):
120
+ rarity = (rarity - col_means) / col_stds
121
+ rarity = np.where(np.isfinite(rarity), rarity, np.nan)
122
+ if not as_dataframe:
123
+ return rarity
124
+ columns = [f"{column_prefix}{k + 1}" for k in range(self.max_spells)]
125
+ return pd.DataFrame(rarity, columns=columns)
126
+
127
+ def compute_prefix_rarity_score(self) -> List[float]:
128
+ """
129
+ Compute one aggregated rarity score per individual: sum of -log(freq/N) over spell levels.
130
+
131
+ Same idea as position-based compute_prefix_rarity_score: higher = more atypical path.
132
+ Only spell levels that exist for that individual are summed (variable length).
133
+ """
134
+ rarity = self._build_rarity_matrix()
135
+ scores = []
136
+ for i in range(self.N):
137
+ row = rarity[i, :]
138
+ valid = np.isfinite(row)
139
+ scores.append(float(np.sum(row[valid])) if np.any(valid) else np.nan)
140
+ return scores
141
+
142
+ def compute_standardized_rarity_score(
143
+ self,
144
+ min_k: int = 2,
145
+ window: int = 1,
146
+ ) -> List[float]:
147
+ """
148
+ Standardized rarity score per individual for divergence classification.
149
+
150
+ Formula (aligned with position version): for each individual i,
151
+ standardized_score_i = max over starting spell level of (min over window of z_{i,k}).
152
+ Here z_{i,k} is the column-wise (by spell level k) z-score of rarity, with NaN
153
+ for levels beyond the individual's length. Only windows entirely within [min_k, ...]
154
+ and with no NaN are considered.
155
+
156
+ Parameters
157
+ ----------
158
+ min_k : int, default 2
159
+ Minimum spell level (1-indexed) to consider for divergence (same role as min_t).
160
+ window : int, default 1
161
+ Number of consecutive spell levels in the window.
162
+
163
+ Returns
164
+ -------
165
+ List[float]
166
+ One standardized score per individual. Higher = more atypical. NaN if no valid window.
167
+ """
168
+ rarity = self._build_rarity_matrix()
169
+ col_means = np.nanmean(rarity, axis=0)
170
+ col_stds = np.nanstd(rarity, axis=0, ddof=1)
171
+ with np.errstate(invalid="ignore", divide="ignore"):
172
+ rarity_z = (rarity - col_means) / col_stds
173
+ rarity_z = np.where(np.isfinite(rarity_z), rarity_z, np.nan)
174
+
175
+ # max_spells is 0-indexed length; valid starting indices for window: 0..max_spells-window
176
+ # min_k is 1-indexed, so min_k-1 is the first index to consider
177
+ start_min = min_k - 1
178
+ start_max = max(0, self.max_spells - window)
179
+
180
+ standardized_scores = []
181
+ for i in range(self.N):
182
+ z_row = rarity_z[i, :]
183
+ candidate_values = []
184
+ for t0 in range(start_min, start_max + 1):
185
+ vals = [z_row[t0 + j] for j in range(window)]
186
+ if not np.all(np.isfinite(vals)):
187
+ continue
188
+ candidate_values.append(float(np.min(vals)))
189
+ standardized_scores.append(float(np.nanmax(candidate_values)) if candidate_values else np.nan)
190
+ return standardized_scores
191
+
192
+ def compute_diverged(
193
+ self,
194
+ z_threshold: float = 1.5,
195
+ min_k: int = 2,
196
+ window: int = 1,
197
+ inclusive: bool = False,
198
+ group_labels: Optional[Any] = None,
199
+ *,
200
+ method: str = "zscore",
201
+ proportion: Optional[float] = None,
202
+ quantile_p: Optional[float] = None,
203
+ min_count: int = 1,
204
+ ) -> List[int]:
205
+ """
206
+ Compute binary divergence flags (0/1) per individual using spell-level rarity.
207
+
208
+ Methods (same as position version):
209
+ - "zscore": diverged if there exists a window of length `window` starting at spell level
210
+ in [min_k, ...] such that all z-scores of rarity at those levels are above z_threshold.
211
+ - "top_proportion": select the top `proportion` by standardized rarity (within group or global).
212
+ - "quantile": diverged if standardized rarity >= quantile_p within group or global.
213
+
214
+ Parameters
215
+ ----------
216
+ z_threshold : float, default 1.5
217
+ Used for method "zscore". Diverged when z > z_threshold (or >= if inclusive).
218
+ min_k : int, default 2
219
+ Minimum spell level (1-indexed) to consider.
220
+ window : int, default 1
221
+ Window length for zscore method and for standardized score.
222
+ inclusive : bool, default False
223
+ If True, use >= instead of > for z_threshold.
224
+ group_labels : array-like or None
225
+ If provided, top_proportion and quantile are applied within each group.
226
+ method : str, default "zscore"
227
+ One of "zscore", "top_proportion" (aliases: "topk","proportion","rank"), "quantile".
228
+ proportion : float or None
229
+ For top_proportion. Fraction (0,1). Default 0.10.
230
+ quantile_p : float or None
231
+ For quantile. Default 0.90.
232
+ min_count : int, default 1
233
+ Minimum number selected per group for top_proportion.
234
+
235
+ Returns
236
+ -------
237
+ List[int]
238
+ 0/1 per individual (1 = diverged).
239
+ """
240
+ N = self.N
241
+ start_min = min_k - 1
242
+ start_max = max(0, self.max_spells - window)
243
+
244
+ # --- top_proportion / quantile: use standardized scores ---
245
+ method_norm = (method or "zscore").lower()
246
+ if method_norm in {"top_proportion", "topk", "proportion", "rank"}:
247
+ p = proportion if proportion is not None else 0.10
248
+ scores = np.asarray(
249
+ self.compute_standardized_rarity_score(min_k=min_k, window=window), dtype=float
250
+ )
251
+ if group_labels is None:
252
+ vals = scores
253
+ finite_mask = np.isfinite(vals)
254
+ n_valid = int(np.sum(finite_mask))
255
+ if n_valid == 0:
256
+ return [0] * N
257
+ k = int(np.floor(p * n_valid))
258
+ if k < int(min_count):
259
+ k = int(min_count)
260
+ if k > n_valid:
261
+ k = n_valid
262
+ order = np.argsort(np.where(np.isfinite(vals), vals, -np.inf), kind="mergesort")
263
+ flags = np.zeros(N, dtype=int)
264
+ if k >= 1:
265
+ flags[order[-k:]] = 1
266
+ return flags.tolist()
267
+ else:
268
+ labels = np.asarray(group_labels)
269
+ flags = np.zeros(N, dtype=int)
270
+ for g in pd.unique(labels):
271
+ idx = np.where(labels == g)[0]
272
+ vals = scores[idx]
273
+ finite_mask = np.isfinite(vals)
274
+ n_valid = int(np.sum(finite_mask))
275
+ if n_valid == 0:
276
+ continue
277
+ k = int(np.floor(p * n_valid))
278
+ if k < int(min_count):
279
+ k = int(min_count)
280
+ if k > n_valid:
281
+ k = n_valid
282
+ order_local = np.argsort(np.where(np.isfinite(vals), vals, -np.inf), kind="mergesort")
283
+ if k >= 1:
284
+ selected_global = idx[order_local[-k:]]
285
+ flags[selected_global] = 1
286
+ return flags.tolist()
287
+
288
+ if method_norm == "quantile":
289
+ q = quantile_p if quantile_p is not None else 0.90
290
+ scores = np.asarray(
291
+ self.compute_standardized_rarity_score(min_k=min_k, window=window), dtype=float
292
+ )
293
+ flags = np.zeros(N, dtype=int)
294
+ if group_labels is None:
295
+ valid = scores[np.isfinite(scores)]
296
+ if valid.size == 0:
297
+ return flags.tolist()
298
+ try:
299
+ xq = float(np.nanquantile(scores, q))
300
+ except Exception:
301
+ xq = float(np.quantile(valid, q))
302
+ flags[scores >= xq] = 1
303
+ return flags.tolist()
304
+ else:
305
+ labels = np.asarray(group_labels)
306
+ for g in pd.unique(labels):
307
+ idx = np.where(labels == g)[0]
308
+ vals = scores[idx]
309
+ valid = vals[np.isfinite(vals)]
310
+ if valid.size == 0:
311
+ continue
312
+ try:
313
+ xq = float(np.nanquantile(vals, q))
314
+ except Exception:
315
+ xq = float(np.quantile(valid, q))
316
+ flags[idx[vals >= xq]] = 1
317
+ return flags.tolist()
318
+
319
+ # --- zscore: build rarity matrix, z-score by column, then window logic ---
320
+ rarity = self._build_rarity_matrix()
321
+ col_means = np.nanmean(rarity, axis=0)
322
+ col_stds = np.nanstd(rarity, axis=0, ddof=1)
323
+ with np.errstate(invalid="ignore", divide="ignore"):
324
+ rarity_z = (rarity - col_means) / col_stds
325
+ rarity_z = np.where(np.isfinite(rarity_z), rarity_z, np.nan)
326
+
327
+ flags = []
328
+ for i in range(N):
329
+ z_row = rarity_z[i, :]
330
+ diverged = 0
331
+ for t0 in range(start_min, start_max + 1):
332
+ vals = [z_row[t0 + j] for j in range(window)]
333
+ if not np.all(np.isfinite(vals)):
334
+ continue
335
+ if inclusive:
336
+ condition = all(v >= z_threshold for v in vals)
337
+ else:
338
+ condition = all(v > z_threshold for v in vals)
339
+ if condition:
340
+ diverged = 1
341
+ break
342
+ flags.append(diverged)
343
+ return flags
344
+
345
+ def _compute_window_max_list(self, min_k: int, window: int) -> np.ndarray:
346
+ """
347
+ Per-individual, per starting spell level: max z in that window.
348
+ Used by compute_first_divergence_spell for rank/quantile methods.
349
+ """
350
+ rarity = self._build_rarity_matrix()
351
+ col_means = np.nanmean(rarity, axis=0)
352
+ col_stds = np.nanstd(rarity, axis=0, ddof=1)
353
+ with np.errstate(invalid="ignore", divide="ignore"):
354
+ rarity_z = (rarity - col_means) / col_stds
355
+ rarity_z = np.where(np.isfinite(rarity_z), rarity_z, np.nan)
356
+
357
+ start_min = min_k - 1
358
+ start_max = max(0, self.max_spells - window)
359
+ n_starts = max(0, start_max - start_min + 1)
360
+ window_maxes = np.full((self.N, n_starts), np.nan, dtype=float)
361
+ for i in range(self.N):
362
+ z_row = rarity_z[i, :]
363
+ for idx, t0 in enumerate(range(start_min, start_max + 1)):
364
+ vals = [z_row[t0 + j] for j in range(window)]
365
+ if np.all(np.isfinite(vals)):
366
+ window_maxes[i, idx] = float(np.max(vals))
367
+ return window_maxes
368
+
369
+ def compute_first_divergence_spell(
370
+ self,
371
+ z_threshold: float = 1.5,
372
+ min_k: int = 2,
373
+ window: int = 1,
374
+ inclusive: bool = False,
375
+ group_labels: Optional[Any] = None,
376
+ *,
377
+ method: str = "zscore",
378
+ proportion: Optional[float] = None,
379
+ quantile_p: Optional[float] = None,
380
+ min_count: int = 1,
381
+ ) -> List[Optional[int]]:
382
+ """
383
+ First spell level (1-indexed) at which the individual is diverged, or None.
384
+
385
+ Same methods as compute_diverged. For zscore: first starting spell level (in [min_k, ...])
386
+ where the window of z-scores is above threshold. For top_proportion/quantile: first level
387
+ where the window-max z-score reaches the selection threshold (for selected individuals only).
388
+ """
389
+ N = self.N
390
+ start_min = min_k - 1
391
+ start_max = max(0, self.max_spells - window)
392
+ method_norm = (method or "zscore").lower()
393
+
394
+ if method_norm in {"top_proportion", "topk", "proportion", "rank", "quantile"}:
395
+ agg_scores = np.asarray(
396
+ self.compute_standardized_rarity_score(min_k=min_k, window=window), dtype=float
397
+ )
398
+ per_start_window_max = self._compute_window_max_list(min_k, window)
399
+ n_starts = per_start_window_max.shape[1]
400
+
401
+ if method_norm in {"top_proportion", "topk", "proportion", "rank"}:
402
+ p = proportion if proportion is not None else 0.10
403
+ if group_labels is None:
404
+ vals = agg_scores
405
+ finite_mask = np.isfinite(vals)
406
+ n_valid = int(np.sum(finite_mask))
407
+ if n_valid == 0:
408
+ return [None] * N
409
+ k = int(np.floor(p * n_valid))
410
+ if k < int(min_count):
411
+ k = int(min_count)
412
+ if k > n_valid:
413
+ k = n_valid
414
+ order = np.argsort(np.where(np.isfinite(vals), vals, -np.inf), kind="mergesort")
415
+ selected_idx = set(order[-k:].tolist()) if k >= 1 else set()
416
+ thresh_val = vals[order[-k]] if k >= 1 else np.nan
417
+ spells = []
418
+ for i in range(N):
419
+ if i not in selected_idx or not np.isfinite(thresh_val):
420
+ spells.append(None)
421
+ continue
422
+ wm = per_start_window_max[i, :]
423
+ first_spell = None
424
+ for t_idx in range(n_starts):
425
+ if np.isfinite(wm[t_idx]) and wm[t_idx] >= float(thresh_val):
426
+ first_spell = t_idx + min_k
427
+ break
428
+ spells.append(first_spell)
429
+ return spells
430
+ else:
431
+ labels = np.asarray(group_labels)
432
+ spells = [None] * N
433
+ for g in pd.unique(labels):
434
+ idx = np.where(labels == g)[0]
435
+ vals = agg_scores[idx]
436
+ finite_mask = np.isfinite(vals)
437
+ n_valid = int(np.sum(finite_mask))
438
+ if n_valid == 0:
439
+ continue
440
+ k = int(np.floor(p * n_valid))
441
+ if k < int(min_count):
442
+ k = int(min_count)
443
+ if k > n_valid:
444
+ k = n_valid
445
+ order_local = np.argsort(np.where(np.isfinite(vals), vals, -np.inf), kind="mergesort")
446
+ selected_local = set(order_local[-k:].tolist()) if k >= 1 else set()
447
+ thresh_val = vals[order_local[-k]] if k >= 1 else np.nan
448
+ for j_local, i_global in enumerate(idx):
449
+ if j_local not in selected_local or not np.isfinite(thresh_val):
450
+ continue
451
+ wm = per_start_window_max[i_global, :]
452
+ for t_idx in range(n_starts):
453
+ if np.isfinite(wm[t_idx]) and wm[t_idx] >= float(thresh_val):
454
+ spells[i_global] = t_idx + min_k
455
+ break
456
+ return spells
457
+
458
+ # quantile
459
+ q = quantile_p if quantile_p is not None else 0.90
460
+ spells = [None] * N
461
+ n_starts = per_start_window_max.shape[1]
462
+ if group_labels is None:
463
+ valid = agg_scores[np.isfinite(agg_scores)]
464
+ if valid.size == 0:
465
+ return spells
466
+ try:
467
+ xq = float(np.nanquantile(agg_scores, q))
468
+ except Exception:
469
+ xq = float(np.quantile(valid, q))
470
+ for i in range(N):
471
+ if not np.isfinite(agg_scores[i]) or agg_scores[i] < xq:
472
+ continue
473
+ wm = per_start_window_max[i, :]
474
+ for t_idx in range(n_starts):
475
+ if np.isfinite(wm[t_idx]) and wm[t_idx] >= xq:
476
+ spells[i] = t_idx + min_k
477
+ break
478
+ return spells
479
+ else:
480
+ labels = np.asarray(group_labels)
481
+ for g in pd.unique(labels):
482
+ idx = np.where(labels == g)[0]
483
+ vals = agg_scores[idx]
484
+ valid = vals[np.isfinite(vals)]
485
+ if valid.size == 0:
486
+ continue
487
+ try:
488
+ xq = float(np.nanquantile(vals, q))
489
+ except Exception:
490
+ xq = float(np.quantile(valid, q))
491
+ for j_local, i_global in enumerate(idx):
492
+ if not np.isfinite(vals[j_local]) or vals[j_local] < xq:
493
+ continue
494
+ wm = per_start_window_max[i_global, :]
495
+ for t_idx in range(n_starts):
496
+ if np.isfinite(wm[t_idx]) and wm[t_idx] >= xq:
497
+ spells[i_global] = t_idx + min_k
498
+ break
499
+ return spells
500
+
501
+ # --- zscore: first window where all z above threshold ---
502
+ rarity = self._build_rarity_matrix()
503
+ col_means = np.nanmean(rarity, axis=0)
504
+ col_stds = np.nanstd(rarity, axis=0, ddof=1)
505
+ with np.errstate(invalid="ignore", divide="ignore"):
506
+ rarity_z = (rarity - col_means) / col_stds
507
+ rarity_z = np.where(np.isfinite(rarity_z), rarity_z, np.nan)
508
+
509
+ spells = []
510
+ for i in range(N):
511
+ z_row = rarity_z[i, :]
512
+ first_spell = None
513
+ for t0 in range(start_min, start_max + 1):
514
+ vals = [z_row[t0 + j] for j in range(window)]
515
+ if not np.all(np.isfinite(vals)):
516
+ continue
517
+ if inclusive:
518
+ condition = all(v >= z_threshold for v in vals)
519
+ else:
520
+ condition = all(v > z_threshold for v in vals)
521
+ if condition:
522
+ first_spell = t0 + 1
523
+ break
524
+ spells.append(first_spell)
525
+ return spells
526
+
527
+ def compute_path_uniqueness(self) -> List[int]:
528
+ """
529
+ Per individual: count of spell levels at which the spell prefix is unique (freq == 1).
530
+
531
+ Same idea as position-based path_uniqueness: how many steps along the path
532
+ does this individual have a prefix shared by no one else.
533
+ """
534
+ counts = self.tree.counts
535
+ N = self.N
536
+ uniqueness = []
537
+ for i, states_i in enumerate(self.spell_states):
538
+ prefix = []
539
+ count_unique = 0
540
+ for state in states_i:
541
+ prefix.append(state)
542
+ if counts.get(tuple(prefix), 0) == 1:
543
+ count_unique += 1
544
+ uniqueness.append(count_unique)
545
+ return uniqueness
546
+
547
+ def diagnose_divergence_calculation(
548
+ self,
549
+ z_threshold: float = 1.5,
550
+ min_k: int = 2,
551
+ window: int = 1,
552
+ ) -> Dict[str, Any]:
553
+ """
554
+ Diagnostic for spell-level divergence: variance per spell level, number diverged, distribution.
555
+
556
+ Returns
557
+ -------
558
+ dict
559
+ rarity_std_by_spell, spell_levels_with_zero_variance, n_individuals_with_divergence,
560
+ divergence_spell_distribution, total_individuals, parameters_used.
561
+ """
562
+ rarity = self._build_rarity_matrix()
563
+ rarity_df = pd.DataFrame(rarity)
564
+ rarity_std = rarity_df.std(axis=0, ddof=1)
565
+ levels_zero_var = [k + 1 for k in range(self.max_spells) if pd.isna(rarity_std.iloc[k]) or rarity_std.iloc[k] < 1e-10]
566
+
567
+ divergence_spells = self.compute_first_divergence_spell(
568
+ z_threshold=z_threshold, min_k=min_k, window=window, method="zscore"
569
+ )
570
+ n_diverged = sum(1 for s in divergence_spells if s is not None)
571
+ spell_dist = pd.Series(divergence_spells).value_counts(dropna=False).sort_index().to_dict()
572
+
573
+ return {
574
+ "rarity_std_by_spell": rarity_std.tolist(),
575
+ "spell_levels_with_zero_variance": levels_zero_var,
576
+ "n_individuals_with_divergence": n_diverged,
577
+ "divergence_spell_distribution": spell_dist,
578
+ "total_individuals": self.N,
579
+ "parameters_used": {"z_threshold": z_threshold, "min_k": min_k, "window": window},
580
+ }