sequenzo 0.1.31__cp310-cp310-macosx_10_9_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (299) hide show
  1. _sequenzo_fastcluster.cpython-310-darwin.so +0 -0
  2. sequenzo/__init__.py +349 -0
  3. sequenzo/big_data/__init__.py +12 -0
  4. sequenzo/big_data/clara/__init__.py +26 -0
  5. sequenzo/big_data/clara/clara.py +476 -0
  6. sequenzo/big_data/clara/utils/__init__.py +27 -0
  7. sequenzo/big_data/clara/utils/aggregatecases.py +92 -0
  8. sequenzo/big_data/clara/utils/davies_bouldin.py +91 -0
  9. sequenzo/big_data/clara/utils/get_weighted_diss.cpython-310-darwin.so +0 -0
  10. sequenzo/big_data/clara/utils/wfcmdd.py +205 -0
  11. sequenzo/big_data/clara/visualization.py +88 -0
  12. sequenzo/clustering/KMedoids.py +178 -0
  13. sequenzo/clustering/__init__.py +30 -0
  14. sequenzo/clustering/clustering_c_code.cpython-310-darwin.so +0 -0
  15. sequenzo/clustering/hierarchical_clustering.py +1256 -0
  16. sequenzo/clustering/sequenzo_fastcluster/fastcluster.py +495 -0
  17. sequenzo/clustering/sequenzo_fastcluster/src/fastcluster.cpp +1877 -0
  18. sequenzo/clustering/sequenzo_fastcluster/src/fastcluster_python.cpp +1264 -0
  19. sequenzo/clustering/src/KMedoid.cpp +263 -0
  20. sequenzo/clustering/src/PAM.cpp +237 -0
  21. sequenzo/clustering/src/PAMonce.cpp +265 -0
  22. sequenzo/clustering/src/cluster_quality.cpp +496 -0
  23. sequenzo/clustering/src/cluster_quality.h +128 -0
  24. sequenzo/clustering/src/cluster_quality_backup.cpp +570 -0
  25. sequenzo/clustering/src/module.cpp +228 -0
  26. sequenzo/clustering/src/weightedinertia.cpp +111 -0
  27. sequenzo/clustering/utils/__init__.py +27 -0
  28. sequenzo/clustering/utils/disscenter.py +122 -0
  29. sequenzo/data_preprocessing/__init__.py +22 -0
  30. sequenzo/data_preprocessing/helpers.py +303 -0
  31. sequenzo/datasets/__init__.py +41 -0
  32. sequenzo/datasets/biofam.csv +2001 -0
  33. sequenzo/datasets/biofam_child_domain.csv +2001 -0
  34. sequenzo/datasets/biofam_left_domain.csv +2001 -0
  35. sequenzo/datasets/biofam_married_domain.csv +2001 -0
  36. sequenzo/datasets/chinese_colonial_territories.csv +12 -0
  37. sequenzo/datasets/country_co2_emissions.csv +194 -0
  38. sequenzo/datasets/country_co2_emissions_global_deciles.csv +195 -0
  39. sequenzo/datasets/country_co2_emissions_global_quintiles.csv +195 -0
  40. sequenzo/datasets/country_co2_emissions_local_deciles.csv +195 -0
  41. sequenzo/datasets/country_co2_emissions_local_quintiles.csv +195 -0
  42. sequenzo/datasets/country_gdp_per_capita.csv +194 -0
  43. sequenzo/datasets/dyadic_children.csv +61 -0
  44. sequenzo/datasets/dyadic_parents.csv +61 -0
  45. sequenzo/datasets/mvad.csv +713 -0
  46. sequenzo/datasets/pairfam_activity_by_month.csv +1028 -0
  47. sequenzo/datasets/pairfam_activity_by_year.csv +1028 -0
  48. sequenzo/datasets/pairfam_family_by_month.csv +1028 -0
  49. sequenzo/datasets/pairfam_family_by_year.csv +1028 -0
  50. sequenzo/datasets/political_science_aid_shock.csv +166 -0
  51. sequenzo/datasets/political_science_donor_fragmentation.csv +157 -0
  52. sequenzo/define_sequence_data.py +1400 -0
  53. sequenzo/dissimilarity_measures/__init__.py +31 -0
  54. sequenzo/dissimilarity_measures/c_code.cpython-310-darwin.so +0 -0
  55. sequenzo/dissimilarity_measures/get_distance_matrix.py +762 -0
  56. sequenzo/dissimilarity_measures/get_substitution_cost_matrix.py +246 -0
  57. sequenzo/dissimilarity_measures/src/DHDdistance.cpp +148 -0
  58. sequenzo/dissimilarity_measures/src/LCPdistance.cpp +114 -0
  59. sequenzo/dissimilarity_measures/src/LCPspellDistance.cpp +215 -0
  60. sequenzo/dissimilarity_measures/src/OMdistance.cpp +247 -0
  61. sequenzo/dissimilarity_measures/src/OMspellDistance.cpp +281 -0
  62. sequenzo/dissimilarity_measures/src/__init__.py +0 -0
  63. sequenzo/dissimilarity_measures/src/dist2matrix.cpp +63 -0
  64. sequenzo/dissimilarity_measures/src/dp_utils.h +160 -0
  65. sequenzo/dissimilarity_measures/src/module.cpp +40 -0
  66. sequenzo/dissimilarity_measures/src/setup.py +30 -0
  67. sequenzo/dissimilarity_measures/src/utils.h +25 -0
  68. sequenzo/dissimilarity_measures/src/xsimd/.github/cmake-test/main.cpp +6 -0
  69. sequenzo/dissimilarity_measures/src/xsimd/benchmark/main.cpp +159 -0
  70. sequenzo/dissimilarity_measures/src/xsimd/benchmark/xsimd_benchmark.hpp +565 -0
  71. sequenzo/dissimilarity_measures/src/xsimd/docs/source/conf.py +37 -0
  72. sequenzo/dissimilarity_measures/src/xsimd/examples/mandelbrot.cpp +330 -0
  73. sequenzo/dissimilarity_measures/src/xsimd/examples/pico_bench.hpp +246 -0
  74. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_arithmetic.hpp +266 -0
  75. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_complex.hpp +112 -0
  76. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_details.hpp +323 -0
  77. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_logical.hpp +218 -0
  78. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_math.hpp +2583 -0
  79. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_memory.hpp +880 -0
  80. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_rounding.hpp +72 -0
  81. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_swizzle.hpp +174 -0
  82. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_trigo.hpp +978 -0
  83. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx.hpp +1924 -0
  84. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx2.hpp +1144 -0
  85. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512bw.hpp +656 -0
  86. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512cd.hpp +28 -0
  87. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512dq.hpp +244 -0
  88. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512er.hpp +20 -0
  89. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512f.hpp +2650 -0
  90. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512ifma.hpp +20 -0
  91. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512pf.hpp +20 -0
  92. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vbmi.hpp +77 -0
  93. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vbmi2.hpp +131 -0
  94. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vnni_avx512bw.hpp +20 -0
  95. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vnni_avx512vbmi2.hpp +20 -0
  96. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avxvnni.hpp +20 -0
  97. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_common.hpp +24 -0
  98. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_common_fwd.hpp +77 -0
  99. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_constants.hpp +393 -0
  100. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_emulated.hpp +788 -0
  101. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma3_avx.hpp +93 -0
  102. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma3_avx2.hpp +46 -0
  103. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma3_sse.hpp +97 -0
  104. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma4.hpp +92 -0
  105. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_i8mm_neon64.hpp +17 -0
  106. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_isa.hpp +142 -0
  107. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_neon.hpp +3142 -0
  108. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_neon64.hpp +1543 -0
  109. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_rvv.hpp +1513 -0
  110. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_scalar.hpp +1260 -0
  111. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse2.hpp +2024 -0
  112. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse3.hpp +67 -0
  113. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse4_1.hpp +339 -0
  114. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse4_2.hpp +44 -0
  115. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_ssse3.hpp +186 -0
  116. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sve.hpp +1155 -0
  117. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_vsx.hpp +892 -0
  118. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_wasm.hpp +1780 -0
  119. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_arch.hpp +240 -0
  120. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_config.hpp +484 -0
  121. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_cpuid.hpp +269 -0
  122. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_inline.hpp +27 -0
  123. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/math/xsimd_rem_pio2.hpp +719 -0
  124. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/memory/xsimd_aligned_allocator.hpp +349 -0
  125. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/memory/xsimd_alignment.hpp +91 -0
  126. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_all_registers.hpp +55 -0
  127. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_api.hpp +2765 -0
  128. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx2_register.hpp +44 -0
  129. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512bw_register.hpp +51 -0
  130. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512cd_register.hpp +51 -0
  131. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512dq_register.hpp +51 -0
  132. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512er_register.hpp +51 -0
  133. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512f_register.hpp +77 -0
  134. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512ifma_register.hpp +51 -0
  135. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512pf_register.hpp +51 -0
  136. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512vbmi2_register.hpp +51 -0
  137. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512vbmi_register.hpp +51 -0
  138. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512vnni_avx512bw_register.hpp +54 -0
  139. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512vnni_avx512vbmi2_register.hpp +53 -0
  140. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx_register.hpp +64 -0
  141. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avxvnni_register.hpp +44 -0
  142. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_batch.hpp +1524 -0
  143. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_batch_constant.hpp +300 -0
  144. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_common_arch.hpp +47 -0
  145. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_emulated_register.hpp +80 -0
  146. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_fma3_avx2_register.hpp +50 -0
  147. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_fma3_avx_register.hpp +50 -0
  148. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_fma3_sse_register.hpp +50 -0
  149. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_fma4_register.hpp +50 -0
  150. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_i8mm_neon64_register.hpp +55 -0
  151. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_neon64_register.hpp +55 -0
  152. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_neon_register.hpp +154 -0
  153. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_register.hpp +94 -0
  154. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_rvv_register.hpp +506 -0
  155. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sse2_register.hpp +59 -0
  156. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sse3_register.hpp +49 -0
  157. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sse4_1_register.hpp +48 -0
  158. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sse4_2_register.hpp +48 -0
  159. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_ssse3_register.hpp +48 -0
  160. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sve_register.hpp +156 -0
  161. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_traits.hpp +337 -0
  162. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_utils.hpp +536 -0
  163. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_vsx_register.hpp +77 -0
  164. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_wasm_register.hpp +59 -0
  165. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/xsimd.hpp +75 -0
  166. sequenzo/dissimilarity_measures/src/xsimd/test/architectures/dummy.cpp +7 -0
  167. sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set.cpp +13 -0
  168. sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set_mean.cpp +24 -0
  169. sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set_mean_aligned.cpp +25 -0
  170. sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set_mean_arch_independent.cpp +28 -0
  171. sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set_mean_tag_dispatch.cpp +25 -0
  172. sequenzo/dissimilarity_measures/src/xsimd/test/doc/manipulating_abstract_batches.cpp +7 -0
  173. sequenzo/dissimilarity_measures/src/xsimd/test/doc/manipulating_parametric_batches.cpp +8 -0
  174. sequenzo/dissimilarity_measures/src/xsimd/test/doc/sum.hpp +31 -0
  175. sequenzo/dissimilarity_measures/src/xsimd/test/doc/sum_avx2.cpp +3 -0
  176. sequenzo/dissimilarity_measures/src/xsimd/test/doc/sum_sse2.cpp +3 -0
  177. sequenzo/dissimilarity_measures/src/xsimd/test/doc/writing_vectorized_code.cpp +11 -0
  178. sequenzo/dissimilarity_measures/src/xsimd/test/main.cpp +31 -0
  179. sequenzo/dissimilarity_measures/src/xsimd/test/test_api.cpp +230 -0
  180. sequenzo/dissimilarity_measures/src/xsimd/test/test_arch.cpp +217 -0
  181. sequenzo/dissimilarity_measures/src/xsimd/test/test_basic_math.cpp +183 -0
  182. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch.cpp +1049 -0
  183. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_bool.cpp +508 -0
  184. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_cast.cpp +409 -0
  185. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_complex.cpp +712 -0
  186. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_constant.cpp +286 -0
  187. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_float.cpp +141 -0
  188. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_int.cpp +365 -0
  189. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_manip.cpp +308 -0
  190. sequenzo/dissimilarity_measures/src/xsimd/test/test_bitwise_cast.cpp +222 -0
  191. sequenzo/dissimilarity_measures/src/xsimd/test/test_complex_exponential.cpp +226 -0
  192. sequenzo/dissimilarity_measures/src/xsimd/test/test_complex_hyperbolic.cpp +183 -0
  193. sequenzo/dissimilarity_measures/src/xsimd/test/test_complex_power.cpp +265 -0
  194. sequenzo/dissimilarity_measures/src/xsimd/test/test_complex_trigonometric.cpp +236 -0
  195. sequenzo/dissimilarity_measures/src/xsimd/test/test_conversion.cpp +248 -0
  196. sequenzo/dissimilarity_measures/src/xsimd/test/test_custom_default_arch.cpp +28 -0
  197. sequenzo/dissimilarity_measures/src/xsimd/test/test_error_gamma.cpp +170 -0
  198. sequenzo/dissimilarity_measures/src/xsimd/test/test_explicit_batch_instantiation.cpp +32 -0
  199. sequenzo/dissimilarity_measures/src/xsimd/test/test_exponential.cpp +202 -0
  200. sequenzo/dissimilarity_measures/src/xsimd/test/test_extract_pair.cpp +92 -0
  201. sequenzo/dissimilarity_measures/src/xsimd/test/test_fp_manipulation.cpp +77 -0
  202. sequenzo/dissimilarity_measures/src/xsimd/test/test_gnu_source.cpp +30 -0
  203. sequenzo/dissimilarity_measures/src/xsimd/test/test_hyperbolic.cpp +167 -0
  204. sequenzo/dissimilarity_measures/src/xsimd/test/test_load_store.cpp +304 -0
  205. sequenzo/dissimilarity_measures/src/xsimd/test/test_memory.cpp +61 -0
  206. sequenzo/dissimilarity_measures/src/xsimd/test/test_poly_evaluation.cpp +64 -0
  207. sequenzo/dissimilarity_measures/src/xsimd/test/test_power.cpp +184 -0
  208. sequenzo/dissimilarity_measures/src/xsimd/test/test_rounding.cpp +199 -0
  209. sequenzo/dissimilarity_measures/src/xsimd/test/test_select.cpp +101 -0
  210. sequenzo/dissimilarity_measures/src/xsimd/test/test_shuffle.cpp +760 -0
  211. sequenzo/dissimilarity_measures/src/xsimd/test/test_sum.cpp +4 -0
  212. sequenzo/dissimilarity_measures/src/xsimd/test/test_sum.hpp +34 -0
  213. sequenzo/dissimilarity_measures/src/xsimd/test/test_traits.cpp +172 -0
  214. sequenzo/dissimilarity_measures/src/xsimd/test/test_trigonometric.cpp +208 -0
  215. sequenzo/dissimilarity_measures/src/xsimd/test/test_utils.hpp +611 -0
  216. sequenzo/dissimilarity_measures/src/xsimd/test/test_wasm/test_wasm_playwright.py +123 -0
  217. sequenzo/dissimilarity_measures/src/xsimd/test/test_xsimd_api.cpp +1460 -0
  218. sequenzo/dissimilarity_measures/utils/__init__.py +16 -0
  219. sequenzo/dissimilarity_measures/utils/get_LCP_length_for_2_seq.py +44 -0
  220. sequenzo/dissimilarity_measures/utils/get_sm_trate_substitution_cost_matrix.cpython-310-darwin.so +0 -0
  221. sequenzo/dissimilarity_measures/utils/seqconc.cpython-310-darwin.so +0 -0
  222. sequenzo/dissimilarity_measures/utils/seqdss.cpython-310-darwin.so +0 -0
  223. sequenzo/dissimilarity_measures/utils/seqdur.cpython-310-darwin.so +0 -0
  224. sequenzo/dissimilarity_measures/utils/seqlength.cpython-310-darwin.so +0 -0
  225. sequenzo/multidomain/__init__.py +23 -0
  226. sequenzo/multidomain/association_between_domains.py +311 -0
  227. sequenzo/multidomain/cat.py +597 -0
  228. sequenzo/multidomain/combt.py +519 -0
  229. sequenzo/multidomain/dat.py +81 -0
  230. sequenzo/multidomain/idcd.py +139 -0
  231. sequenzo/multidomain/linked_polyad.py +292 -0
  232. sequenzo/openmp_setup.py +233 -0
  233. sequenzo/prefix_tree/__init__.py +62 -0
  234. sequenzo/prefix_tree/hub.py +114 -0
  235. sequenzo/prefix_tree/individual_level_indicators.py +1321 -0
  236. sequenzo/prefix_tree/spell_individual_level_indicators.py +580 -0
  237. sequenzo/prefix_tree/spell_level_indicators.py +297 -0
  238. sequenzo/prefix_tree/system_level_indicators.py +544 -0
  239. sequenzo/prefix_tree/utils.py +54 -0
  240. sequenzo/seqhmm/__init__.py +95 -0
  241. sequenzo/seqhmm/advanced_optimization.py +305 -0
  242. sequenzo/seqhmm/bootstrap.py +411 -0
  243. sequenzo/seqhmm/build_hmm.py +142 -0
  244. sequenzo/seqhmm/build_mhmm.py +136 -0
  245. sequenzo/seqhmm/build_nhmm.py +121 -0
  246. sequenzo/seqhmm/fit_mhmm.py +62 -0
  247. sequenzo/seqhmm/fit_model.py +61 -0
  248. sequenzo/seqhmm/fit_nhmm.py +76 -0
  249. sequenzo/seqhmm/formulas.py +289 -0
  250. sequenzo/seqhmm/forward_backward_nhmm.py +276 -0
  251. sequenzo/seqhmm/gradients_nhmm.py +306 -0
  252. sequenzo/seqhmm/hmm.py +291 -0
  253. sequenzo/seqhmm/mhmm.py +314 -0
  254. sequenzo/seqhmm/model_comparison.py +238 -0
  255. sequenzo/seqhmm/multichannel_em.py +282 -0
  256. sequenzo/seqhmm/multichannel_utils.py +138 -0
  257. sequenzo/seqhmm/nhmm.py +270 -0
  258. sequenzo/seqhmm/nhmm_utils.py +191 -0
  259. sequenzo/seqhmm/predict.py +137 -0
  260. sequenzo/seqhmm/predict_mhmm.py +142 -0
  261. sequenzo/seqhmm/simulate.py +878 -0
  262. sequenzo/seqhmm/utils.py +218 -0
  263. sequenzo/seqhmm/visualization.py +910 -0
  264. sequenzo/sequence_characteristics/__init__.py +40 -0
  265. sequenzo/sequence_characteristics/complexity_index.py +49 -0
  266. sequenzo/sequence_characteristics/overall_cross_sectional_entropy.py +220 -0
  267. sequenzo/sequence_characteristics/plot_characteristics.py +593 -0
  268. sequenzo/sequence_characteristics/simple_characteristics.py +311 -0
  269. sequenzo/sequence_characteristics/state_frequencies_and_entropy_per_sequence.py +39 -0
  270. sequenzo/sequence_characteristics/turbulence.py +155 -0
  271. sequenzo/sequence_characteristics/variance_of_spell_durations.py +86 -0
  272. sequenzo/sequence_characteristics/within_sequence_entropy.py +43 -0
  273. sequenzo/suffix_tree/__init__.py +66 -0
  274. sequenzo/suffix_tree/hub.py +114 -0
  275. sequenzo/suffix_tree/individual_level_indicators.py +1679 -0
  276. sequenzo/suffix_tree/spell_individual_level_indicators.py +493 -0
  277. sequenzo/suffix_tree/spell_level_indicators.py +248 -0
  278. sequenzo/suffix_tree/system_level_indicators.py +535 -0
  279. sequenzo/suffix_tree/utils.py +56 -0
  280. sequenzo/version_check.py +283 -0
  281. sequenzo/visualization/__init__.py +29 -0
  282. sequenzo/visualization/plot_mean_time.py +222 -0
  283. sequenzo/visualization/plot_modal_state.py +276 -0
  284. sequenzo/visualization/plot_most_frequent_sequences.py +147 -0
  285. sequenzo/visualization/plot_relative_frequency.py +405 -0
  286. sequenzo/visualization/plot_sequence_index.py +1175 -0
  287. sequenzo/visualization/plot_single_medoid.py +153 -0
  288. sequenzo/visualization/plot_state_distribution.py +651 -0
  289. sequenzo/visualization/plot_transition_matrix.py +190 -0
  290. sequenzo/visualization/utils/__init__.py +23 -0
  291. sequenzo/visualization/utils/utils.py +310 -0
  292. sequenzo/with_event_history_analysis/__init__.py +35 -0
  293. sequenzo/with_event_history_analysis/sequence_analysis_multi_state_model.py +850 -0
  294. sequenzo/with_event_history_analysis/sequence_history_analysis.py +283 -0
  295. sequenzo-0.1.31.dist-info/METADATA +286 -0
  296. sequenzo-0.1.31.dist-info/RECORD +299 -0
  297. sequenzo-0.1.31.dist-info/WHEEL +5 -0
  298. sequenzo-0.1.31.dist-info/licenses/LICENSE +28 -0
  299. sequenzo-0.1.31.dist-info/top_level.txt +2 -0
@@ -0,0 +1,493 @@
1
+ """
2
+ @Author : Yuqi Liang 梁彧祺
3
+ @File : spell_individual_level_indicators.py
4
+ @Time : 2026/1/30 15:57
5
+ @Desc : Individual-level indicators for spell-based suffix tree analysis.
6
+
7
+ Spell-based Suffix Tree: Individual-level convergence indicators.
8
+
9
+ Provides per-sequence (per-individual) rarity and convergence measures when the
10
+ unit of analysis is SPELL from the end. Each "level" is one spell from the end
11
+ (last spell, last two spells, ...). Lower rarity = more typical ending pattern.
12
+ Variable-length sequences are supported: individuals with fewer spells have NaN
13
+ at spell levels beyond their length.
14
+
15
+ Design mirrors: sequenzo/suffix_tree/individual_level_indicators.py (position-based).
16
+ - Position version: level = time index from end, suffix = states from year t to end.
17
+ - Spell version: level = spell index from end, suffix = last k spells.
18
+
19
+ Usage:
20
+ from sequenzo.suffix_tree import build_spell_suffix_tree
21
+ from sequenzo.suffix_tree.spell_individual_level_indicators import SpellIndividualConvergence
22
+
23
+ tree = build_spell_suffix_tree(seqdata, expcost=0)
24
+ ind = SpellIndividualConvergence(tree)
25
+ rarity_per_spell = ind.compute_suffix_rarity_per_spell()
26
+ converged = ind.compute_converged(method="zscore", z_threshold=1.5)
27
+ """
28
+ from typing import Any, Dict, List, Optional
29
+
30
+ import numpy as np
31
+ import pandas as pd
32
+
33
+ from .spell_level_indicators import SpellSuffixTree
34
+
35
+
36
+ _EPS = 1e-10
37
+
38
+
39
+ class SpellIndividualConvergence:
40
+ """
41
+ Individual-level convergence and rarity for spell-based suffix trees.
42
+
43
+ Requires a SpellSuffixTree built with build_spell_suffix_tree(seqdata, ...),
44
+ so that tree._spell_states and tree._spell_durations exist and tree.counts /
45
+ tree.total_sequences are populated. Suffix at level k = last k spells (from end).
46
+ Lower rarity = more typical ending; converged = low rarity (z < -z_threshold).
47
+ """
48
+
49
+ def __init__(self, tree: SpellSuffixTree):
50
+ if not isinstance(tree, SpellSuffixTree):
51
+ raise TypeError(
52
+ "[!] SpellIndividualConvergence requires a SpellSuffixTree. "
53
+ "Use: build_spell_suffix_tree(seqdata) then SpellIndividualConvergence(tree)"
54
+ )
55
+ if not hasattr(tree, "_spell_states") or not hasattr(tree, "_spell_durations"):
56
+ raise ValueError(
57
+ "[!] SpellSuffixTree must be built with build_spell_suffix_tree(seqdata) "
58
+ "so that _spell_states and _spell_durations are attached."
59
+ )
60
+ self.tree = tree
61
+ self.spell_states = tree._spell_states
62
+ self.spell_durations = tree._spell_durations
63
+ self.N = tree.total_sequences
64
+ self.max_spells = max(len(s) for s in self.spell_states) if self.spell_states else 0
65
+
66
+ def _build_rarity_matrix(self) -> np.ndarray:
67
+ """
68
+ Build (N, max_spells) matrix of suffix rarity at each spell level (from end).
69
+ Level k = last k spells. rarity_{i,k} = -log( freq(suffix_{i,k}) / N ).
70
+ Cells where individual i has no spell at that level from end are np.nan.
71
+ """
72
+ N, max_spells = self.N, self.max_spells
73
+ counts = self.tree.counts
74
+ rarity = np.full((N, max_spells), np.nan, dtype=float)
75
+ for i, states_i in enumerate(self.spell_states):
76
+ rev = list(reversed(states_i))
77
+ for k in range(len(rev)):
78
+ key = tuple(rev[: k + 1])
79
+ freq = counts.get(key, 0) / max(N, 1)
80
+ rarity[i, k] = -np.log(freq + _EPS)
81
+ return rarity
82
+
83
+ def compute_suffix_rarity_per_spell(
84
+ self,
85
+ as_dataframe: bool = True,
86
+ column_prefix: str = "k",
87
+ zscore: bool = False,
88
+ ):
89
+ """
90
+ Compute per-spell-level suffix rarity for each individual (from end).
91
+
92
+ Level k = last k spells. Higher rarity = rarer ending pattern.
93
+ Levels beyond an individual's spell length are NaN.
94
+ """
95
+ rarity = self._build_rarity_matrix()
96
+ if zscore:
97
+ col_means = np.nanmean(rarity, axis=0)
98
+ col_stds = np.nanstd(rarity, axis=0, ddof=1)
99
+ with np.errstate(invalid="ignore", divide="ignore"):
100
+ rarity = (rarity - col_means) / col_stds
101
+ rarity = np.where(np.isfinite(rarity), rarity, np.nan)
102
+ if not as_dataframe:
103
+ return rarity
104
+ columns = [f"{column_prefix}{k + 1}" for k in range(self.max_spells)]
105
+ return pd.DataFrame(rarity, columns=columns)
106
+
107
+ def compute_suffix_rarity_score(self) -> List[float]:
108
+ """
109
+ One aggregated rarity score per individual: sum of -log(freq/N) over spell levels (from end).
110
+ Higher = rarer ending path.
111
+ """
112
+ rarity = self._build_rarity_matrix()
113
+ scores = []
114
+ for i in range(self.N):
115
+ row = rarity[i, :]
116
+ valid = np.isfinite(row)
117
+ scores.append(float(np.sum(row[valid])) if np.any(valid) else np.nan)
118
+ return scores
119
+
120
+ def compute_standardized_rarity_score(
121
+ self,
122
+ min_k: int = 1,
123
+ window: int = 1,
124
+ ) -> List[float]:
125
+ """
126
+ Standardized rarity score per individual for convergence classification.
127
+
128
+ For convergence we take the minimum (most typical): standardized_score_i =
129
+ min over starting spell level of (max over window of z_{i,k}). Lower = more typical.
130
+ """
131
+ rarity = self._build_rarity_matrix()
132
+ col_means = np.nanmean(rarity, axis=0)
133
+ col_stds = np.nanstd(rarity, axis=0, ddof=1)
134
+ with np.errstate(invalid="ignore", divide="ignore"):
135
+ rarity_z = (rarity - col_means) / col_stds
136
+ rarity_z = np.where(np.isfinite(rarity_z), rarity_z, np.nan)
137
+
138
+ start_min = min_k - 1
139
+ start_max = max(0, self.max_spells - window)
140
+ standardized_scores = []
141
+ for i in range(self.N):
142
+ z_row = rarity_z[i, :]
143
+ candidate_values = []
144
+ for t0 in range(start_min, start_max + 1):
145
+ vals = [z_row[t0 + j] for j in range(window)]
146
+ if not np.all(np.isfinite(vals)):
147
+ continue
148
+ candidate_values.append(float(np.max(vals)))
149
+ standardized_scores.append(float(np.nanmin(candidate_values)) if candidate_values else np.nan)
150
+ return standardized_scores
151
+
152
+ def compute_converged(
153
+ self,
154
+ z_threshold: float = 1.5,
155
+ min_k: int = 1,
156
+ window: int = 1,
157
+ inclusive: bool = False,
158
+ group_labels: Optional[Any] = None,
159
+ *,
160
+ method: str = "zscore",
161
+ proportion: Optional[float] = None,
162
+ quantile_p: Optional[float] = None,
163
+ min_count: int = 1,
164
+ ) -> List[int]:
165
+ """
166
+ Compute binary convergence flags (0/1) per individual. Converged = low rarity (typical).
167
+
168
+ - "zscore": converged if there exists a window where all z-scores < -z_threshold (or <= if inclusive).
169
+ - "top_proportion": select the proportion with smallest standardized scores (most typical).
170
+ - "quantile": converged if standardized score <= quantile_p (e.g. 0.10 = bottom 10%).
171
+ """
172
+ N = self.N
173
+ start_min = min_k - 1
174
+ start_max = max(0, self.max_spells - window)
175
+ method_norm = (method or "zscore").lower()
176
+
177
+ if method_norm in {"top_proportion", "topk", "proportion", "rank"}:
178
+ p = proportion if proportion is not None else 0.10
179
+ scores = np.asarray(
180
+ self.compute_standardized_rarity_score(min_k=min_k, window=window), dtype=float
181
+ )
182
+ if group_labels is None:
183
+ vals = scores
184
+ finite_mask = np.isfinite(vals)
185
+ n_valid = int(np.sum(finite_mask))
186
+ if n_valid == 0:
187
+ return [0] * N
188
+ k = int(np.floor(p * n_valid))
189
+ if k < int(min_count):
190
+ k = int(min_count)
191
+ if k > n_valid:
192
+ k = n_valid
193
+ order = np.argsort(np.where(np.isfinite(vals), vals, np.inf), kind="mergesort")
194
+ flags = np.zeros(N, dtype=int)
195
+ if k >= 1:
196
+ flags[order[:k]] = 1
197
+ return flags.tolist()
198
+ else:
199
+ labels = np.asarray(group_labels)
200
+ flags = np.zeros(N, dtype=int)
201
+ for g in pd.unique(labels):
202
+ idx = np.where(labels == g)[0]
203
+ vals = scores[idx]
204
+ finite_mask = np.isfinite(vals)
205
+ n_valid = int(np.sum(finite_mask))
206
+ if n_valid == 0:
207
+ continue
208
+ k = int(np.floor(p * n_valid))
209
+ if k < int(min_count):
210
+ k = int(min_count)
211
+ if k > n_valid:
212
+ k = n_valid
213
+ order_local = np.argsort(np.where(np.isfinite(vals), vals, np.inf), kind="mergesort")
214
+ if k >= 1:
215
+ selected_global = idx[order_local[:k]]
216
+ flags[selected_global] = 1
217
+ return flags.tolist()
218
+
219
+ if method_norm == "quantile":
220
+ q = quantile_p if quantile_p is not None else 0.10
221
+ scores = np.asarray(
222
+ self.compute_standardized_rarity_score(min_k=min_k, window=window), dtype=float
223
+ )
224
+ flags = np.zeros(N, dtype=int)
225
+ if group_labels is None:
226
+ valid = scores[np.isfinite(scores)]
227
+ if valid.size == 0:
228
+ return flags.tolist()
229
+ try:
230
+ xq = float(np.nanquantile(scores, q))
231
+ except Exception:
232
+ xq = float(np.quantile(valid, q))
233
+ flags[scores <= xq] = 1
234
+ return flags.tolist()
235
+ else:
236
+ labels = np.asarray(group_labels)
237
+ for g in pd.unique(labels):
238
+ idx = np.where(labels == g)[0]
239
+ vals = scores[idx]
240
+ valid = vals[np.isfinite(vals)]
241
+ if valid.size == 0:
242
+ continue
243
+ try:
244
+ xq = float(np.nanquantile(vals, q))
245
+ except Exception:
246
+ xq = float(np.quantile(valid, q))
247
+ flags[idx[vals <= xq]] = 1
248
+ return flags.tolist()
249
+
250
+ rarity = self._build_rarity_matrix()
251
+ col_means = np.nanmean(rarity, axis=0)
252
+ col_stds = np.nanstd(rarity, axis=0, ddof=1)
253
+ with np.errstate(invalid="ignore", divide="ignore"):
254
+ rarity_z = (rarity - col_means) / col_stds
255
+ rarity_z = np.where(np.isfinite(rarity_z), rarity_z, np.nan)
256
+
257
+ flags = []
258
+ for i in range(N):
259
+ z_row = rarity_z[i, :]
260
+ converged = 0
261
+ for t0 in range(start_min, start_max + 1):
262
+ vals = [z_row[t0 + j] for j in range(window)]
263
+ if not np.all(np.isfinite(vals)):
264
+ continue
265
+ if inclusive:
266
+ condition = all(v <= -z_threshold for v in vals)
267
+ else:
268
+ condition = all(v < -z_threshold for v in vals)
269
+ if condition:
270
+ converged = 1
271
+ break
272
+ flags.append(converged)
273
+ return flags
274
+
275
+ def _compute_window_max_list(self, min_k: int, window: int) -> np.ndarray:
276
+ """Per-individual, per starting spell level: max z in that window (for first_convergence_spell)."""
277
+ rarity = self._build_rarity_matrix()
278
+ col_means = np.nanmean(rarity, axis=0)
279
+ col_stds = np.nanstd(rarity, axis=0, ddof=1)
280
+ with np.errstate(invalid="ignore", divide="ignore"):
281
+ rarity_z = (rarity - col_means) / col_stds
282
+ rarity_z = np.where(np.isfinite(rarity_z), rarity_z, np.nan)
283
+
284
+ start_min = min_k - 1
285
+ start_max = max(0, self.max_spells - window)
286
+ n_starts = max(0, start_max - start_min + 1)
287
+ window_maxes = np.full((self.N, n_starts), np.nan, dtype=float)
288
+ for i in range(self.N):
289
+ z_row = rarity_z[i, :]
290
+ for idx, t0 in enumerate(range(start_min, start_max + 1)):
291
+ vals = [z_row[t0 + j] for j in range(window)]
292
+ if np.all(np.isfinite(vals)):
293
+ window_maxes[i, idx] = float(np.max(vals))
294
+ return window_maxes
295
+
296
+ def compute_first_convergence_spell(
297
+ self,
298
+ z_threshold: float = 1.5,
299
+ min_k: int = 1,
300
+ window: int = 1,
301
+ inclusive: bool = False,
302
+ group_labels: Optional[Any] = None,
303
+ *,
304
+ method: str = "zscore",
305
+ proportion: Optional[float] = None,
306
+ quantile_p: Optional[float] = None,
307
+ min_count: int = 1,
308
+ ) -> List[Optional[int]]:
309
+ """
310
+ First spell level (1-indexed from end) at which the individual is converged, or None.
311
+ Level 1 = last spell, level 2 = last two spells, etc.
312
+ """
313
+ N = self.N
314
+ start_min = min_k - 1
315
+ start_max = max(0, self.max_spells - window)
316
+ method_norm = (method or "zscore").lower()
317
+
318
+ if method_norm in {"top_proportion", "topk", "proportion", "rank", "quantile"}:
319
+ agg_scores = np.asarray(
320
+ self.compute_standardized_rarity_score(min_k=min_k, window=window), dtype=float
321
+ )
322
+ per_start_window_max = self._compute_window_max_list(min_k, window)
323
+ n_starts = per_start_window_max.shape[1]
324
+
325
+ if method_norm in {"top_proportion", "topk", "proportion", "rank"}:
326
+ p = proportion if proportion is not None else 0.10
327
+ if group_labels is None:
328
+ vals = agg_scores
329
+ finite_mask = np.isfinite(vals)
330
+ n_valid = int(np.sum(finite_mask))
331
+ if n_valid == 0:
332
+ return [None] * N
333
+ k = int(np.floor(p * n_valid))
334
+ if k < int(min_count):
335
+ k = int(min_count)
336
+ if k > n_valid:
337
+ k = n_valid
338
+ order = np.argsort(np.where(np.isfinite(vals), vals, np.inf), kind="mergesort")
339
+ selected_idx = set(order[:k].tolist()) if k >= 1 else set()
340
+ thresh_val = vals[order[k - 1]] if k >= 1 else np.nan
341
+ spells = []
342
+ for i in range(N):
343
+ if i not in selected_idx or not np.isfinite(thresh_val):
344
+ spells.append(None)
345
+ continue
346
+ wm = per_start_window_max[i, :]
347
+ first_spell = None
348
+ for t_idx in range(n_starts):
349
+ if np.isfinite(wm[t_idx]) and wm[t_idx] <= float(thresh_val):
350
+ first_spell = t_idx + min_k
351
+ break
352
+ spells.append(first_spell)
353
+ return spells
354
+ else:
355
+ labels = np.asarray(group_labels)
356
+ spells = [None] * N
357
+ for g in pd.unique(labels):
358
+ idx = np.where(labels == g)[0]
359
+ vals = agg_scores[idx]
360
+ finite_mask = np.isfinite(vals)
361
+ n_valid = int(np.sum(finite_mask))
362
+ if n_valid == 0:
363
+ continue
364
+ k = int(np.floor(p * n_valid))
365
+ if k < int(min_count):
366
+ k = int(min_count)
367
+ if k > n_valid:
368
+ k = n_valid
369
+ order_local = np.argsort(np.where(np.isfinite(vals), vals, np.inf), kind="mergesort")
370
+ selected_local = set(order_local[:k].tolist()) if k >= 1 else set()
371
+ thresh_val = vals[order_local[k - 1]] if k >= 1 else np.nan
372
+ for j_local, i_global in enumerate(idx):
373
+ if j_local not in selected_local or not np.isfinite(thresh_val):
374
+ continue
375
+ wm = per_start_window_max[i_global, :]
376
+ for t_idx in range(n_starts):
377
+ if np.isfinite(wm[t_idx]) and wm[t_idx] <= float(thresh_val):
378
+ spells[i_global] = t_idx + min_k
379
+ break
380
+ return spells
381
+
382
+ q = quantile_p if quantile_p is not None else 0.10
383
+ spells = [None] * N
384
+ n_starts = per_start_window_max.shape[1]
385
+ if group_labels is None:
386
+ valid = agg_scores[np.isfinite(agg_scores)]
387
+ if valid.size == 0:
388
+ return spells
389
+ try:
390
+ xq = float(np.nanquantile(agg_scores, q))
391
+ except Exception:
392
+ xq = float(np.quantile(valid, q))
393
+ for i in range(N):
394
+ if not np.isfinite(agg_scores[i]) or agg_scores[i] > xq:
395
+ continue
396
+ wm = per_start_window_max[i, :]
397
+ for t_idx in range(n_starts):
398
+ if np.isfinite(wm[t_idx]) and wm[t_idx] <= xq:
399
+ spells[i] = t_idx + min_k
400
+ break
401
+ return spells
402
+ else:
403
+ labels = np.asarray(group_labels)
404
+ for g in pd.unique(labels):
405
+ idx = np.where(labels == g)[0]
406
+ vals = agg_scores[idx]
407
+ valid = vals[np.isfinite(vals)]
408
+ if valid.size == 0:
409
+ continue
410
+ try:
411
+ xq = float(np.nanquantile(vals, q))
412
+ except Exception:
413
+ xq = float(np.quantile(valid, q))
414
+ for j_local, i_global in enumerate(idx):
415
+ if not np.isfinite(vals[j_local]) or vals[j_local] > xq:
416
+ continue
417
+ wm = per_start_window_max[i_global, :]
418
+ for t_idx in range(n_starts):
419
+ if np.isfinite(wm[t_idx]) and wm[t_idx] <= xq:
420
+ spells[i_global] = t_idx + min_k
421
+ break
422
+ return spells
423
+
424
+ rarity = self._build_rarity_matrix()
425
+ col_means = np.nanmean(rarity, axis=0)
426
+ col_stds = np.nanstd(rarity, axis=0, ddof=1)
427
+ with np.errstate(invalid="ignore", divide="ignore"):
428
+ rarity_z = (rarity - col_means) / col_stds
429
+ rarity_z = np.where(np.isfinite(rarity_z), rarity_z, np.nan)
430
+
431
+ spells = []
432
+ for i in range(N):
433
+ z_row = rarity_z[i, :]
434
+ first_spell = None
435
+ for t0 in range(start_min, start_max + 1):
436
+ vals = [z_row[t0 + j] for j in range(window)]
437
+ if not np.all(np.isfinite(vals)):
438
+ continue
439
+ if inclusive:
440
+ condition = all(v <= -z_threshold for v in vals)
441
+ else:
442
+ condition = all(v < -z_threshold for v in vals)
443
+ if condition:
444
+ first_spell = t0 + 1
445
+ break
446
+ spells.append(first_spell)
447
+ return spells
448
+
449
+ def compute_path_uniqueness(self) -> List[int]:
450
+ """
451
+ Per individual: count of spell levels (from end) at which the suffix is unique (freq == 1).
452
+ """
453
+ counts = self.tree.counts
454
+ uniqueness = []
455
+ for i, states_i in enumerate(self.spell_states):
456
+ rev = list(reversed(states_i))
457
+ count_unique = 0
458
+ for k in range(len(rev)):
459
+ key = tuple(rev[: k + 1])
460
+ if counts.get(key, 0) == 1:
461
+ count_unique += 1
462
+ uniqueness.append(count_unique)
463
+ return uniqueness
464
+
465
+ def diagnose_convergence_calculation(
466
+ self,
467
+ z_threshold: float = 1.5,
468
+ min_k: int = 1,
469
+ window: int = 1,
470
+ ) -> Dict[str, Any]:
471
+ """
472
+ Diagnostic for spell-level convergence: variance per spell level, number converged, distribution.
473
+ """
474
+ rarity = self._build_rarity_matrix()
475
+ rarity_df = pd.DataFrame(rarity)
476
+ rarity_std = rarity_df.std(axis=0, ddof=1)
477
+ levels_zero_var = [
478
+ k + 1 for k in range(self.max_spells)
479
+ if pd.isna(rarity_std.iloc[k]) or rarity_std.iloc[k] < 1e-10
480
+ ]
481
+ convergence_spells = self.compute_first_convergence_spell(
482
+ z_threshold=z_threshold, min_k=min_k, window=window, method="zscore"
483
+ )
484
+ n_converged = sum(1 for s in convergence_spells if s is not None)
485
+ spell_dist = pd.Series(convergence_spells).value_counts(dropna=False).sort_index().to_dict()
486
+ return {
487
+ "rarity_std_by_spell": rarity_std.tolist(),
488
+ "spell_levels_with_zero_variance": levels_zero_var,
489
+ "n_individuals_with_convergence": n_converged,
490
+ "convergence_spell_distribution": spell_dist,
491
+ "total_individuals": self.N,
492
+ "parameters_used": {"z_threshold": z_threshold, "min_k": min_k, "window": window},
493
+ }