sequenzo 0.1.31__cp310-cp310-macosx_10_9_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (299) hide show
  1. _sequenzo_fastcluster.cpython-310-darwin.so +0 -0
  2. sequenzo/__init__.py +349 -0
  3. sequenzo/big_data/__init__.py +12 -0
  4. sequenzo/big_data/clara/__init__.py +26 -0
  5. sequenzo/big_data/clara/clara.py +476 -0
  6. sequenzo/big_data/clara/utils/__init__.py +27 -0
  7. sequenzo/big_data/clara/utils/aggregatecases.py +92 -0
  8. sequenzo/big_data/clara/utils/davies_bouldin.py +91 -0
  9. sequenzo/big_data/clara/utils/get_weighted_diss.cpython-310-darwin.so +0 -0
  10. sequenzo/big_data/clara/utils/wfcmdd.py +205 -0
  11. sequenzo/big_data/clara/visualization.py +88 -0
  12. sequenzo/clustering/KMedoids.py +178 -0
  13. sequenzo/clustering/__init__.py +30 -0
  14. sequenzo/clustering/clustering_c_code.cpython-310-darwin.so +0 -0
  15. sequenzo/clustering/hierarchical_clustering.py +1256 -0
  16. sequenzo/clustering/sequenzo_fastcluster/fastcluster.py +495 -0
  17. sequenzo/clustering/sequenzo_fastcluster/src/fastcluster.cpp +1877 -0
  18. sequenzo/clustering/sequenzo_fastcluster/src/fastcluster_python.cpp +1264 -0
  19. sequenzo/clustering/src/KMedoid.cpp +263 -0
  20. sequenzo/clustering/src/PAM.cpp +237 -0
  21. sequenzo/clustering/src/PAMonce.cpp +265 -0
  22. sequenzo/clustering/src/cluster_quality.cpp +496 -0
  23. sequenzo/clustering/src/cluster_quality.h +128 -0
  24. sequenzo/clustering/src/cluster_quality_backup.cpp +570 -0
  25. sequenzo/clustering/src/module.cpp +228 -0
  26. sequenzo/clustering/src/weightedinertia.cpp +111 -0
  27. sequenzo/clustering/utils/__init__.py +27 -0
  28. sequenzo/clustering/utils/disscenter.py +122 -0
  29. sequenzo/data_preprocessing/__init__.py +22 -0
  30. sequenzo/data_preprocessing/helpers.py +303 -0
  31. sequenzo/datasets/__init__.py +41 -0
  32. sequenzo/datasets/biofam.csv +2001 -0
  33. sequenzo/datasets/biofam_child_domain.csv +2001 -0
  34. sequenzo/datasets/biofam_left_domain.csv +2001 -0
  35. sequenzo/datasets/biofam_married_domain.csv +2001 -0
  36. sequenzo/datasets/chinese_colonial_territories.csv +12 -0
  37. sequenzo/datasets/country_co2_emissions.csv +194 -0
  38. sequenzo/datasets/country_co2_emissions_global_deciles.csv +195 -0
  39. sequenzo/datasets/country_co2_emissions_global_quintiles.csv +195 -0
  40. sequenzo/datasets/country_co2_emissions_local_deciles.csv +195 -0
  41. sequenzo/datasets/country_co2_emissions_local_quintiles.csv +195 -0
  42. sequenzo/datasets/country_gdp_per_capita.csv +194 -0
  43. sequenzo/datasets/dyadic_children.csv +61 -0
  44. sequenzo/datasets/dyadic_parents.csv +61 -0
  45. sequenzo/datasets/mvad.csv +713 -0
  46. sequenzo/datasets/pairfam_activity_by_month.csv +1028 -0
  47. sequenzo/datasets/pairfam_activity_by_year.csv +1028 -0
  48. sequenzo/datasets/pairfam_family_by_month.csv +1028 -0
  49. sequenzo/datasets/pairfam_family_by_year.csv +1028 -0
  50. sequenzo/datasets/political_science_aid_shock.csv +166 -0
  51. sequenzo/datasets/political_science_donor_fragmentation.csv +157 -0
  52. sequenzo/define_sequence_data.py +1400 -0
  53. sequenzo/dissimilarity_measures/__init__.py +31 -0
  54. sequenzo/dissimilarity_measures/c_code.cpython-310-darwin.so +0 -0
  55. sequenzo/dissimilarity_measures/get_distance_matrix.py +762 -0
  56. sequenzo/dissimilarity_measures/get_substitution_cost_matrix.py +246 -0
  57. sequenzo/dissimilarity_measures/src/DHDdistance.cpp +148 -0
  58. sequenzo/dissimilarity_measures/src/LCPdistance.cpp +114 -0
  59. sequenzo/dissimilarity_measures/src/LCPspellDistance.cpp +215 -0
  60. sequenzo/dissimilarity_measures/src/OMdistance.cpp +247 -0
  61. sequenzo/dissimilarity_measures/src/OMspellDistance.cpp +281 -0
  62. sequenzo/dissimilarity_measures/src/__init__.py +0 -0
  63. sequenzo/dissimilarity_measures/src/dist2matrix.cpp +63 -0
  64. sequenzo/dissimilarity_measures/src/dp_utils.h +160 -0
  65. sequenzo/dissimilarity_measures/src/module.cpp +40 -0
  66. sequenzo/dissimilarity_measures/src/setup.py +30 -0
  67. sequenzo/dissimilarity_measures/src/utils.h +25 -0
  68. sequenzo/dissimilarity_measures/src/xsimd/.github/cmake-test/main.cpp +6 -0
  69. sequenzo/dissimilarity_measures/src/xsimd/benchmark/main.cpp +159 -0
  70. sequenzo/dissimilarity_measures/src/xsimd/benchmark/xsimd_benchmark.hpp +565 -0
  71. sequenzo/dissimilarity_measures/src/xsimd/docs/source/conf.py +37 -0
  72. sequenzo/dissimilarity_measures/src/xsimd/examples/mandelbrot.cpp +330 -0
  73. sequenzo/dissimilarity_measures/src/xsimd/examples/pico_bench.hpp +246 -0
  74. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_arithmetic.hpp +266 -0
  75. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_complex.hpp +112 -0
  76. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_details.hpp +323 -0
  77. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_logical.hpp +218 -0
  78. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_math.hpp +2583 -0
  79. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_memory.hpp +880 -0
  80. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_rounding.hpp +72 -0
  81. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_swizzle.hpp +174 -0
  82. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_trigo.hpp +978 -0
  83. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx.hpp +1924 -0
  84. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx2.hpp +1144 -0
  85. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512bw.hpp +656 -0
  86. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512cd.hpp +28 -0
  87. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512dq.hpp +244 -0
  88. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512er.hpp +20 -0
  89. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512f.hpp +2650 -0
  90. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512ifma.hpp +20 -0
  91. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512pf.hpp +20 -0
  92. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vbmi.hpp +77 -0
  93. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vbmi2.hpp +131 -0
  94. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vnni_avx512bw.hpp +20 -0
  95. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vnni_avx512vbmi2.hpp +20 -0
  96. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avxvnni.hpp +20 -0
  97. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_common.hpp +24 -0
  98. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_common_fwd.hpp +77 -0
  99. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_constants.hpp +393 -0
  100. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_emulated.hpp +788 -0
  101. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma3_avx.hpp +93 -0
  102. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma3_avx2.hpp +46 -0
  103. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma3_sse.hpp +97 -0
  104. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma4.hpp +92 -0
  105. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_i8mm_neon64.hpp +17 -0
  106. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_isa.hpp +142 -0
  107. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_neon.hpp +3142 -0
  108. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_neon64.hpp +1543 -0
  109. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_rvv.hpp +1513 -0
  110. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_scalar.hpp +1260 -0
  111. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse2.hpp +2024 -0
  112. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse3.hpp +67 -0
  113. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse4_1.hpp +339 -0
  114. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse4_2.hpp +44 -0
  115. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_ssse3.hpp +186 -0
  116. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sve.hpp +1155 -0
  117. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_vsx.hpp +892 -0
  118. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_wasm.hpp +1780 -0
  119. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_arch.hpp +240 -0
  120. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_config.hpp +484 -0
  121. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_cpuid.hpp +269 -0
  122. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_inline.hpp +27 -0
  123. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/math/xsimd_rem_pio2.hpp +719 -0
  124. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/memory/xsimd_aligned_allocator.hpp +349 -0
  125. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/memory/xsimd_alignment.hpp +91 -0
  126. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_all_registers.hpp +55 -0
  127. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_api.hpp +2765 -0
  128. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx2_register.hpp +44 -0
  129. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512bw_register.hpp +51 -0
  130. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512cd_register.hpp +51 -0
  131. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512dq_register.hpp +51 -0
  132. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512er_register.hpp +51 -0
  133. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512f_register.hpp +77 -0
  134. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512ifma_register.hpp +51 -0
  135. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512pf_register.hpp +51 -0
  136. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512vbmi2_register.hpp +51 -0
  137. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512vbmi_register.hpp +51 -0
  138. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512vnni_avx512bw_register.hpp +54 -0
  139. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512vnni_avx512vbmi2_register.hpp +53 -0
  140. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx_register.hpp +64 -0
  141. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avxvnni_register.hpp +44 -0
  142. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_batch.hpp +1524 -0
  143. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_batch_constant.hpp +300 -0
  144. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_common_arch.hpp +47 -0
  145. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_emulated_register.hpp +80 -0
  146. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_fma3_avx2_register.hpp +50 -0
  147. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_fma3_avx_register.hpp +50 -0
  148. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_fma3_sse_register.hpp +50 -0
  149. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_fma4_register.hpp +50 -0
  150. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_i8mm_neon64_register.hpp +55 -0
  151. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_neon64_register.hpp +55 -0
  152. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_neon_register.hpp +154 -0
  153. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_register.hpp +94 -0
  154. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_rvv_register.hpp +506 -0
  155. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sse2_register.hpp +59 -0
  156. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sse3_register.hpp +49 -0
  157. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sse4_1_register.hpp +48 -0
  158. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sse4_2_register.hpp +48 -0
  159. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_ssse3_register.hpp +48 -0
  160. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sve_register.hpp +156 -0
  161. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_traits.hpp +337 -0
  162. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_utils.hpp +536 -0
  163. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_vsx_register.hpp +77 -0
  164. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_wasm_register.hpp +59 -0
  165. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/xsimd.hpp +75 -0
  166. sequenzo/dissimilarity_measures/src/xsimd/test/architectures/dummy.cpp +7 -0
  167. sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set.cpp +13 -0
  168. sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set_mean.cpp +24 -0
  169. sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set_mean_aligned.cpp +25 -0
  170. sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set_mean_arch_independent.cpp +28 -0
  171. sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set_mean_tag_dispatch.cpp +25 -0
  172. sequenzo/dissimilarity_measures/src/xsimd/test/doc/manipulating_abstract_batches.cpp +7 -0
  173. sequenzo/dissimilarity_measures/src/xsimd/test/doc/manipulating_parametric_batches.cpp +8 -0
  174. sequenzo/dissimilarity_measures/src/xsimd/test/doc/sum.hpp +31 -0
  175. sequenzo/dissimilarity_measures/src/xsimd/test/doc/sum_avx2.cpp +3 -0
  176. sequenzo/dissimilarity_measures/src/xsimd/test/doc/sum_sse2.cpp +3 -0
  177. sequenzo/dissimilarity_measures/src/xsimd/test/doc/writing_vectorized_code.cpp +11 -0
  178. sequenzo/dissimilarity_measures/src/xsimd/test/main.cpp +31 -0
  179. sequenzo/dissimilarity_measures/src/xsimd/test/test_api.cpp +230 -0
  180. sequenzo/dissimilarity_measures/src/xsimd/test/test_arch.cpp +217 -0
  181. sequenzo/dissimilarity_measures/src/xsimd/test/test_basic_math.cpp +183 -0
  182. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch.cpp +1049 -0
  183. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_bool.cpp +508 -0
  184. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_cast.cpp +409 -0
  185. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_complex.cpp +712 -0
  186. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_constant.cpp +286 -0
  187. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_float.cpp +141 -0
  188. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_int.cpp +365 -0
  189. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_manip.cpp +308 -0
  190. sequenzo/dissimilarity_measures/src/xsimd/test/test_bitwise_cast.cpp +222 -0
  191. sequenzo/dissimilarity_measures/src/xsimd/test/test_complex_exponential.cpp +226 -0
  192. sequenzo/dissimilarity_measures/src/xsimd/test/test_complex_hyperbolic.cpp +183 -0
  193. sequenzo/dissimilarity_measures/src/xsimd/test/test_complex_power.cpp +265 -0
  194. sequenzo/dissimilarity_measures/src/xsimd/test/test_complex_trigonometric.cpp +236 -0
  195. sequenzo/dissimilarity_measures/src/xsimd/test/test_conversion.cpp +248 -0
  196. sequenzo/dissimilarity_measures/src/xsimd/test/test_custom_default_arch.cpp +28 -0
  197. sequenzo/dissimilarity_measures/src/xsimd/test/test_error_gamma.cpp +170 -0
  198. sequenzo/dissimilarity_measures/src/xsimd/test/test_explicit_batch_instantiation.cpp +32 -0
  199. sequenzo/dissimilarity_measures/src/xsimd/test/test_exponential.cpp +202 -0
  200. sequenzo/dissimilarity_measures/src/xsimd/test/test_extract_pair.cpp +92 -0
  201. sequenzo/dissimilarity_measures/src/xsimd/test/test_fp_manipulation.cpp +77 -0
  202. sequenzo/dissimilarity_measures/src/xsimd/test/test_gnu_source.cpp +30 -0
  203. sequenzo/dissimilarity_measures/src/xsimd/test/test_hyperbolic.cpp +167 -0
  204. sequenzo/dissimilarity_measures/src/xsimd/test/test_load_store.cpp +304 -0
  205. sequenzo/dissimilarity_measures/src/xsimd/test/test_memory.cpp +61 -0
  206. sequenzo/dissimilarity_measures/src/xsimd/test/test_poly_evaluation.cpp +64 -0
  207. sequenzo/dissimilarity_measures/src/xsimd/test/test_power.cpp +184 -0
  208. sequenzo/dissimilarity_measures/src/xsimd/test/test_rounding.cpp +199 -0
  209. sequenzo/dissimilarity_measures/src/xsimd/test/test_select.cpp +101 -0
  210. sequenzo/dissimilarity_measures/src/xsimd/test/test_shuffle.cpp +760 -0
  211. sequenzo/dissimilarity_measures/src/xsimd/test/test_sum.cpp +4 -0
  212. sequenzo/dissimilarity_measures/src/xsimd/test/test_sum.hpp +34 -0
  213. sequenzo/dissimilarity_measures/src/xsimd/test/test_traits.cpp +172 -0
  214. sequenzo/dissimilarity_measures/src/xsimd/test/test_trigonometric.cpp +208 -0
  215. sequenzo/dissimilarity_measures/src/xsimd/test/test_utils.hpp +611 -0
  216. sequenzo/dissimilarity_measures/src/xsimd/test/test_wasm/test_wasm_playwright.py +123 -0
  217. sequenzo/dissimilarity_measures/src/xsimd/test/test_xsimd_api.cpp +1460 -0
  218. sequenzo/dissimilarity_measures/utils/__init__.py +16 -0
  219. sequenzo/dissimilarity_measures/utils/get_LCP_length_for_2_seq.py +44 -0
  220. sequenzo/dissimilarity_measures/utils/get_sm_trate_substitution_cost_matrix.cpython-310-darwin.so +0 -0
  221. sequenzo/dissimilarity_measures/utils/seqconc.cpython-310-darwin.so +0 -0
  222. sequenzo/dissimilarity_measures/utils/seqdss.cpython-310-darwin.so +0 -0
  223. sequenzo/dissimilarity_measures/utils/seqdur.cpython-310-darwin.so +0 -0
  224. sequenzo/dissimilarity_measures/utils/seqlength.cpython-310-darwin.so +0 -0
  225. sequenzo/multidomain/__init__.py +23 -0
  226. sequenzo/multidomain/association_between_domains.py +311 -0
  227. sequenzo/multidomain/cat.py +597 -0
  228. sequenzo/multidomain/combt.py +519 -0
  229. sequenzo/multidomain/dat.py +81 -0
  230. sequenzo/multidomain/idcd.py +139 -0
  231. sequenzo/multidomain/linked_polyad.py +292 -0
  232. sequenzo/openmp_setup.py +233 -0
  233. sequenzo/prefix_tree/__init__.py +62 -0
  234. sequenzo/prefix_tree/hub.py +114 -0
  235. sequenzo/prefix_tree/individual_level_indicators.py +1321 -0
  236. sequenzo/prefix_tree/spell_individual_level_indicators.py +580 -0
  237. sequenzo/prefix_tree/spell_level_indicators.py +297 -0
  238. sequenzo/prefix_tree/system_level_indicators.py +544 -0
  239. sequenzo/prefix_tree/utils.py +54 -0
  240. sequenzo/seqhmm/__init__.py +95 -0
  241. sequenzo/seqhmm/advanced_optimization.py +305 -0
  242. sequenzo/seqhmm/bootstrap.py +411 -0
  243. sequenzo/seqhmm/build_hmm.py +142 -0
  244. sequenzo/seqhmm/build_mhmm.py +136 -0
  245. sequenzo/seqhmm/build_nhmm.py +121 -0
  246. sequenzo/seqhmm/fit_mhmm.py +62 -0
  247. sequenzo/seqhmm/fit_model.py +61 -0
  248. sequenzo/seqhmm/fit_nhmm.py +76 -0
  249. sequenzo/seqhmm/formulas.py +289 -0
  250. sequenzo/seqhmm/forward_backward_nhmm.py +276 -0
  251. sequenzo/seqhmm/gradients_nhmm.py +306 -0
  252. sequenzo/seqhmm/hmm.py +291 -0
  253. sequenzo/seqhmm/mhmm.py +314 -0
  254. sequenzo/seqhmm/model_comparison.py +238 -0
  255. sequenzo/seqhmm/multichannel_em.py +282 -0
  256. sequenzo/seqhmm/multichannel_utils.py +138 -0
  257. sequenzo/seqhmm/nhmm.py +270 -0
  258. sequenzo/seqhmm/nhmm_utils.py +191 -0
  259. sequenzo/seqhmm/predict.py +137 -0
  260. sequenzo/seqhmm/predict_mhmm.py +142 -0
  261. sequenzo/seqhmm/simulate.py +878 -0
  262. sequenzo/seqhmm/utils.py +218 -0
  263. sequenzo/seqhmm/visualization.py +910 -0
  264. sequenzo/sequence_characteristics/__init__.py +40 -0
  265. sequenzo/sequence_characteristics/complexity_index.py +49 -0
  266. sequenzo/sequence_characteristics/overall_cross_sectional_entropy.py +220 -0
  267. sequenzo/sequence_characteristics/plot_characteristics.py +593 -0
  268. sequenzo/sequence_characteristics/simple_characteristics.py +311 -0
  269. sequenzo/sequence_characteristics/state_frequencies_and_entropy_per_sequence.py +39 -0
  270. sequenzo/sequence_characteristics/turbulence.py +155 -0
  271. sequenzo/sequence_characteristics/variance_of_spell_durations.py +86 -0
  272. sequenzo/sequence_characteristics/within_sequence_entropy.py +43 -0
  273. sequenzo/suffix_tree/__init__.py +66 -0
  274. sequenzo/suffix_tree/hub.py +114 -0
  275. sequenzo/suffix_tree/individual_level_indicators.py +1679 -0
  276. sequenzo/suffix_tree/spell_individual_level_indicators.py +493 -0
  277. sequenzo/suffix_tree/spell_level_indicators.py +248 -0
  278. sequenzo/suffix_tree/system_level_indicators.py +535 -0
  279. sequenzo/suffix_tree/utils.py +56 -0
  280. sequenzo/version_check.py +283 -0
  281. sequenzo/visualization/__init__.py +29 -0
  282. sequenzo/visualization/plot_mean_time.py +222 -0
  283. sequenzo/visualization/plot_modal_state.py +276 -0
  284. sequenzo/visualization/plot_most_frequent_sequences.py +147 -0
  285. sequenzo/visualization/plot_relative_frequency.py +405 -0
  286. sequenzo/visualization/plot_sequence_index.py +1175 -0
  287. sequenzo/visualization/plot_single_medoid.py +153 -0
  288. sequenzo/visualization/plot_state_distribution.py +651 -0
  289. sequenzo/visualization/plot_transition_matrix.py +190 -0
  290. sequenzo/visualization/utils/__init__.py +23 -0
  291. sequenzo/visualization/utils/utils.py +310 -0
  292. sequenzo/with_event_history_analysis/__init__.py +35 -0
  293. sequenzo/with_event_history_analysis/sequence_analysis_multi_state_model.py +850 -0
  294. sequenzo/with_event_history_analysis/sequence_history_analysis.py +283 -0
  295. sequenzo-0.1.31.dist-info/METADATA +286 -0
  296. sequenzo-0.1.31.dist-info/RECORD +299 -0
  297. sequenzo-0.1.31.dist-info/WHEEL +5 -0
  298. sequenzo-0.1.31.dist-info/licenses/LICENSE +28 -0
  299. sequenzo-0.1.31.dist-info/top_level.txt +2 -0
@@ -0,0 +1,276 @@
1
+ """
2
+ @Author : Yuqi Liang 梁彧祺
3
+ @File : forward_backward_nhmm.py
4
+ @Time : 2025-10-20 09:41
5
+ @Desc : Forward-Backward algorithm for Non-homogeneous HMM
6
+
7
+ This module provides the forward-backward algorithm implementation for NHMM,
8
+ which handles time-varying transition and emission probabilities.
9
+ This is similar to seqHMM's forward_backward.nhmm() function in R.
10
+ """
11
+
12
+ import numpy as np
13
+ import pandas as pd
14
+ from typing import Optional, Tuple
15
+ from .nhmm import NHMM
16
+ from .nhmm_utils import (
17
+ compute_transition_probs_with_covariates,
18
+ compute_emission_probs_with_covariates,
19
+ compute_initial_probs_with_covariates
20
+ )
21
+ from .utils import sequence_data_to_hmmlearn_format
22
+
23
+
24
+ def forward_backward_nhmm(
25
+ model: NHMM,
26
+ sequences: Optional = None,
27
+ forward_only: bool = False
28
+ ) -> pd.DataFrame:
29
+ """
30
+ Compute forward and backward probabilities for a Non-homogeneous HMM.
31
+
32
+ The forward-backward algorithm computes the probability of being in each
33
+ hidden state at each time point, given the observed sequence. For NHMM,
34
+ this accounts for time-varying transition and emission probabilities.
35
+
36
+ This is similar to seqHMM's forward_backward.nhmm() function in R.
37
+
38
+ Args:
39
+ model: Fitted NHMM model object
40
+ sequences: Optional SequenceData (uses model.observations if None)
41
+ forward_only: If True, only compute forward probabilities. Default is False.
42
+
43
+ Returns:
44
+ pandas DataFrame: Forward and backward probabilities with columns:
45
+ - id: Sequence identifier
46
+ - time: Time point
47
+ - state: Hidden state index
48
+ - log_alpha: Log forward probability
49
+ - log_beta: Log backward probability (if forward_only=False)
50
+
51
+ Examples:
52
+ >>> from sequenzo.seqhmm import build_nhmm, fit_nhmm, forward_backward_nhmm
53
+ >>>
54
+ >>> nhmm = build_nhmm(seq, n_states=4, X=X, random_state=42)
55
+ >>> nhmm = fit_nhmm(nhmm)
56
+ >>>
57
+ >>> # Compute forward-backward probabilities
58
+ >>> fb = forward_backward_nhmm(nhmm)
59
+ >>> print(fb.head())
60
+ """
61
+ if model.log_likelihood is None:
62
+ raise ValueError("Model must be fitted before computing forward-backward probabilities.")
63
+
64
+ if sequences is None:
65
+ sequences = model.observations
66
+
67
+ # Convert sequences to integer format
68
+ X_int, lengths = sequence_data_to_hmmlearn_format(sequences)
69
+ n_sequences = len(lengths)
70
+
71
+ # Compute probabilities for all sequences and time points
72
+ initial_probs, transition_probs, emission_probs = model._compute_probs()
73
+
74
+ # Initialize results list
75
+ results = []
76
+
77
+ # Process each sequence
78
+ for seq_idx in range(n_sequences):
79
+ seq_length = lengths[seq_idx]
80
+ start_idx = lengths[:seq_idx].sum()
81
+ end_idx = start_idx + seq_length
82
+
83
+ # Get sequence observations (0-indexed integers)
84
+ obs_seq = X_int[start_idx:end_idx, 0] # Shape: (seq_length,)
85
+
86
+ # Get probabilities for this sequence
87
+ seq_initial = initial_probs[seq_idx, :] # Shape: (n_states,)
88
+ seq_transition = transition_probs[seq_idx, :seq_length, :, :] # Shape: (seq_length, n_states, n_states)
89
+ seq_emission = emission_probs[seq_idx, :seq_length, :, :] # Shape: (seq_length, n_states, n_symbols)
90
+
91
+ # Compute forward probabilities
92
+ log_alpha = _forward_nhmm(seq_initial, seq_transition, seq_emission, obs_seq, model.n_states)
93
+
94
+ # Compute backward probabilities if requested
95
+ if forward_only:
96
+ log_beta = None
97
+ else:
98
+ log_beta = _backward_nhmm(seq_transition, seq_emission, obs_seq, model.n_states)
99
+
100
+ # Store results
101
+ for t in range(seq_length):
102
+ for state_idx in range(model.n_states):
103
+ result_row = {
104
+ 'id': seq_idx,
105
+ 'time': t + 1, # 1-indexed
106
+ 'state': state_idx,
107
+ 'log_alpha': log_alpha[state_idx, t]
108
+ }
109
+ if not forward_only:
110
+ result_row['log_beta'] = log_beta[state_idx, t]
111
+ results.append(result_row)
112
+
113
+ return pd.DataFrame(results)
114
+
115
+
116
+ def _forward_nhmm(
117
+ initial_probs: np.ndarray,
118
+ transition_probs: np.ndarray,
119
+ emission_probs: np.ndarray,
120
+ observations: np.ndarray,
121
+ n_states: int
122
+ ) -> np.ndarray:
123
+ """
124
+ Forward algorithm for Non-homogeneous HMM (log-space implementation).
125
+
126
+ Computes forward probabilities: alpha[i, t] = P(obs[0:t], state_t = i)
127
+
128
+ For NHMM, transition and emission probabilities vary with time, so we
129
+ use time-specific probabilities at each step.
130
+
131
+ Args:
132
+ initial_probs: Initial state probabilities (n_states,)
133
+ transition_probs: Time-varying transition matrix (T, n_states, n_states)
134
+ emission_probs: Time-varying emission matrix (T, n_states, n_symbols)
135
+ observations: Observed sequence (T,) with integer observations (0-indexed)
136
+ n_states: Number of hidden states
137
+
138
+ Returns:
139
+ numpy array: Log forward probabilities (n_states, T)
140
+ """
141
+ T = len(observations)
142
+ log_alpha = np.zeros((n_states, T))
143
+
144
+ # Initialization: alpha[i, 0] = pi[i] * B[i, obs[0]]
145
+ for i in range(n_states):
146
+ log_alpha[i, 0] = (
147
+ np.log(initial_probs[i] + 1e-10) +
148
+ np.log(emission_probs[0, i, observations[0]] + 1e-10)
149
+ )
150
+
151
+ # Recursion: alpha[j, t] = sum_i(alpha[i, t-1] * A[i, j, t] * B[j, obs[t], t])
152
+ for t in range(1, T):
153
+ for j in range(n_states):
154
+ # Compute log-sum-exp for numerical stability
155
+ log_sum = -np.inf
156
+ for i in range(n_states):
157
+ log_term = (
158
+ log_alpha[i, t-1] +
159
+ np.log(transition_probs[t-1, i, j] + 1e-10) +
160
+ np.log(emission_probs[t, j, observations[t]] + 1e-10)
161
+ )
162
+ # Log-sum-exp trick
163
+ if log_sum == -np.inf:
164
+ log_sum = log_term
165
+ else:
166
+ log_sum = np.logaddexp(log_sum, log_term)
167
+
168
+ log_alpha[j, t] = log_sum
169
+
170
+ return log_alpha
171
+
172
+
173
+ def _backward_nhmm(
174
+ transition_probs: np.ndarray,
175
+ emission_probs: np.ndarray,
176
+ observations: np.ndarray,
177
+ n_states: int
178
+ ) -> np.ndarray:
179
+ """
180
+ Backward algorithm for Non-homogeneous HMM (log-space implementation).
181
+
182
+ Computes backward probabilities: beta[i, t] = P(obs[t+1:T] | state_t = i)
183
+
184
+ Args:
185
+ transition_probs: Time-varying transition matrix (T, n_states, n_states)
186
+ emission_probs: Time-varying emission matrix (T, n_states, n_symbols)
187
+ observations: Observed sequence (T,) with integer observations (0-indexed)
188
+ n_states: Number of hidden states
189
+
190
+ Returns:
191
+ numpy array: Log backward probabilities (n_states, T)
192
+ """
193
+ T = len(observations)
194
+ log_beta = np.zeros((n_states, T))
195
+
196
+ # Initialization: beta[i, T-1] = 1 for all i
197
+ log_beta[:, T-1] = 0.0 # log(1) = 0
198
+
199
+ # Recursion: beta[i, t] = sum_j(A[i, j, t] * B[j, obs[t+1], t+1] * beta[j, t+1])
200
+ for t in range(T-2, -1, -1):
201
+ for i in range(n_states):
202
+ log_sum = -np.inf
203
+ for j in range(n_states):
204
+ log_term = (
205
+ np.log(transition_probs[t, i, j] + 1e-10) +
206
+ np.log(emission_probs[t+1, j, observations[t+1]] + 1e-10) +
207
+ log_beta[j, t+1]
208
+ )
209
+ # Log-sum-exp trick
210
+ if log_sum == -np.inf:
211
+ log_sum = log_term
212
+ else:
213
+ log_sum = np.logaddexp(log_sum, log_term)
214
+
215
+ log_beta[i, t] = log_sum
216
+
217
+ return log_beta
218
+
219
+
220
+ def log_likelihood_nhmm(model: NHMM, sequences: Optional = None) -> float:
221
+ """
222
+ Compute log-likelihood for NHMM using forward algorithm.
223
+
224
+ The log-likelihood is computed as the sum of log forward probabilities
225
+ at the final time point for each sequence.
226
+
227
+ This is similar to seqHMM's logLik.nhmm() function in R.
228
+
229
+ Args:
230
+ model: Fitted NHMM model object
231
+ sequences: Optional SequenceData (uses model.observations if None)
232
+
233
+ Returns:
234
+ float: Total log-likelihood across all sequences
235
+ """
236
+ if sequences is None:
237
+ sequences = model.observations
238
+
239
+ # Convert sequences to integer format
240
+ X_int, lengths = sequence_data_to_hmmlearn_format(sequences)
241
+ n_sequences = len(lengths)
242
+
243
+ # Compute probabilities
244
+ initial_probs, transition_probs, emission_probs = model._compute_probs()
245
+
246
+ total_log_lik = 0.0
247
+
248
+ # Process each sequence
249
+ for seq_idx in range(n_sequences):
250
+ seq_length = lengths[seq_idx]
251
+ start_idx = lengths[:seq_idx].sum()
252
+ end_idx = start_idx + seq_length
253
+
254
+ # Get sequence observations
255
+ obs_seq = X_int[start_idx:end_idx, 0]
256
+
257
+ # Get probabilities for this sequence
258
+ seq_initial = initial_probs[seq_idx, :]
259
+ seq_transition = transition_probs[seq_idx, :seq_length, :, :]
260
+ seq_emission = emission_probs[seq_idx, :seq_length, :, :]
261
+
262
+ # Compute forward probabilities
263
+ log_alpha = _forward_nhmm(seq_initial, seq_transition, seq_emission, obs_seq, model.n_states)
264
+
265
+ # Log-likelihood is log(sum of forward probabilities at final time)
266
+ # Use log-sum-exp for numerical stability
267
+ log_lik_seq = -np.inf
268
+ for i in range(model.n_states):
269
+ if log_lik_seq == -np.inf:
270
+ log_lik_seq = log_alpha[i, seq_length-1]
271
+ else:
272
+ log_lik_seq = np.logaddexp(log_lik_seq, log_alpha[i, seq_length-1])
273
+
274
+ total_log_lik += log_lik_seq
275
+
276
+ return total_log_lik
@@ -0,0 +1,306 @@
1
+ """
2
+ @Author : Yuqi Liang 梁彧祺
3
+ @File : gradients_nhmm.py
4
+ @Time : 2025-10-22 15:18
5
+ @Desc : Analytical gradient computation for Non-homogeneous HMM
6
+
7
+ This module provides functions for computing analytical gradients of the
8
+ log-likelihood with respect to model parameters (eta_pi, eta_A, eta_B).
9
+ This is similar to seqHMM's gradient computation in objective_functions.R.
10
+
11
+ Note: This is a complex implementation. The gradients are computed using
12
+ the forward-backward algorithm and chain rule through the Softmax function.
13
+ """
14
+
15
+ import numpy as np
16
+ from typing import Tuple
17
+ from .nhmm import NHMM
18
+ from .forward_backward_nhmm import _forward_nhmm, _backward_nhmm
19
+ from .nhmm_utils import softmax
20
+ from .utils import sequence_data_to_hmmlearn_format
21
+
22
+
23
+ def compute_gradient_nhmm(model: NHMM) -> np.ndarray:
24
+ """
25
+ Compute analytical gradient of log-likelihood with respect to parameters.
26
+
27
+ The gradient is computed using the forward-backward algorithm and
28
+ the chain rule through the Softmax parameterization.
29
+
30
+ This is similar to seqHMM's gradient computation for NHMM.
31
+
32
+ Args:
33
+ model: Fitted NHMM model object
34
+
35
+ Returns:
36
+ numpy array: Flattened gradient vector [grad_eta_pi, grad_eta_A, grad_eta_B]
37
+ """
38
+ # Convert sequences to integer format
39
+ X_int, lengths = sequence_data_to_hmmlearn_format(model.observations)
40
+ n_sequences = len(lengths)
41
+
42
+ # Compute probabilities
43
+ initial_probs, transition_probs, emission_probs = model._compute_probs()
44
+
45
+ # Initialize gradients
46
+ grad_eta_pi = np.zeros_like(model.eta_pi)
47
+ grad_eta_A = np.zeros_like(model.eta_A)
48
+ grad_eta_B = np.zeros_like(model.eta_B)
49
+
50
+ # Process each sequence
51
+ for seq_idx in range(n_sequences):
52
+ seq_length = lengths[seq_idx]
53
+ start_idx = lengths[:seq_idx].sum()
54
+ end_idx = start_idx + seq_length
55
+
56
+ # Get sequence observations
57
+ obs_seq = X_int[start_idx:end_idx, 0]
58
+
59
+ # Get probabilities for this sequence
60
+ seq_initial = initial_probs[seq_idx, :]
61
+ seq_transition = transition_probs[seq_idx, :seq_length, :, :]
62
+ seq_emission = emission_probs[seq_idx, :seq_length, :, :]
63
+
64
+ # Get covariates for this sequence
65
+ X_seq = model.X[seq_idx, :seq_length, :] # Shape: (seq_length, n_covariates)
66
+
67
+ # Compute forward and backward probabilities
68
+ log_alpha = _forward_nhmm(seq_initial, seq_transition, seq_emission, obs_seq, model.n_states)
69
+ log_beta = _backward_nhmm(seq_transition, seq_emission, obs_seq, model.n_states)
70
+
71
+ # Compute log-likelihood for this sequence
72
+ log_lik_seq = -np.inf
73
+ for i in range(model.n_states):
74
+ if log_lik_seq == -np.inf:
75
+ log_lik_seq = log_alpha[i, seq_length-1]
76
+ else:
77
+ log_lik_seq = np.logaddexp(log_lik_seq, log_alpha[i, seq_length-1])
78
+
79
+ # Compute posterior probabilities: gamma[i, t] = P(state_t = i | obs)
80
+ # gamma[i, t] = alpha[i, t] * beta[i, t] / P(obs)
81
+ gamma = np.zeros((model.n_states, seq_length))
82
+ for t in range(seq_length):
83
+ for i in range(model.n_states):
84
+ gamma[i, t] = np.exp(log_alpha[i, t] + log_beta[i, t] - log_lik_seq)
85
+
86
+ # Compute xi: xi[i, j, t] = P(state_t = i, state_{t+1} = j | obs)
87
+ xi = np.zeros((model.n_states, model.n_states, seq_length - 1))
88
+ for t in range(seq_length - 1):
89
+ for i in range(model.n_states):
90
+ for j in range(model.n_states):
91
+ log_xi = (
92
+ log_alpha[i, t] +
93
+ np.log(seq_transition[t, i, j] + 1e-10) +
94
+ np.log(seq_emission[t+1, j, obs_seq[t+1]] + 1e-10) +
95
+ log_beta[j, t+1] -
96
+ log_lik_seq
97
+ )
98
+ xi[i, j, t] = np.exp(log_xi)
99
+
100
+ # Compute gradients using chain rule through Softmax
101
+ # Gradient w.r.t. eta_pi (initial probabilities)
102
+ grad_pi = _gradient_initial_probs(gamma, seq_initial, model.n_states)
103
+ grad_eta_pi += _gradient_softmax_to_eta(grad_pi, seq_initial, X_seq[0, :], model.n_states)
104
+
105
+ # Gradient w.r.t. eta_A (transition probabilities)
106
+ for t in range(seq_length - 1):
107
+ grad_A_t = _gradient_transition_probs(xi[:, :, t], gamma[:, t], seq_transition[t, :, :], model.n_states)
108
+ grad_eta_A += _gradient_softmax_to_eta_transition(
109
+ grad_A_t, seq_transition[t, :, :], X_seq[t, :], model.n_states
110
+ )
111
+
112
+ # Gradient w.r.t. eta_B (emission probabilities)
113
+ for t in range(seq_length):
114
+ grad_B_t = _gradient_emission_probs(gamma[:, t], obs_seq[t], seq_emission[t, :, :], model.n_states, model.n_symbols)
115
+ grad_eta_B += _gradient_softmax_to_eta_emission(
116
+ grad_B_t, seq_emission[t, :, :], X_seq[t, :], model.n_states, model.n_symbols
117
+ )
118
+
119
+ # Flatten gradients
120
+ grad_flat = np.concatenate([
121
+ grad_eta_pi.flatten(),
122
+ grad_eta_A.flatten(),
123
+ grad_eta_B.flatten()
124
+ ])
125
+
126
+ return grad_flat
127
+
128
+
129
+ def _gradient_initial_probs(gamma: np.ndarray, initial_probs: np.ndarray, n_states: int) -> np.ndarray:
130
+ """
131
+ Compute gradient of log-likelihood w.r.t. initial probabilities.
132
+
133
+ Args:
134
+ gamma: Posterior probabilities (n_states, T)
135
+ initial_probs: Initial probabilities (n_states,)
136
+ n_states: Number of states
137
+
138
+ Returns:
139
+ numpy array: Gradient w.r.t. initial probabilities (n_states,)
140
+ """
141
+ # Gradient: dL/dpi[i] = gamma[i, 0] / pi[i]
142
+ grad = gamma[:, 0] / (initial_probs + 1e-10)
143
+ return grad
144
+
145
+
146
+ def _gradient_transition_probs(
147
+ xi: np.ndarray,
148
+ gamma_t: np.ndarray,
149
+ transition_probs: np.ndarray,
150
+ n_states: int
151
+ ) -> np.ndarray:
152
+ """
153
+ Compute gradient of log-likelihood w.r.t. transition probabilities.
154
+
155
+ Args:
156
+ xi: Joint probabilities (n_states, n_states)
157
+ gamma_t: Posterior probabilities at time t (n_states,)
158
+ transition_probs: Transition probabilities (n_states, n_states)
159
+ n_states: Number of states
160
+
161
+ Returns:
162
+ numpy array: Gradient w.r.t. transition probabilities (n_states, n_states)
163
+ """
164
+ # Gradient: dL/dA[i, j] = xi[i, j] / A[i, j]
165
+ grad = xi / (transition_probs + 1e-10)
166
+ return grad
167
+
168
+
169
+ def _gradient_emission_probs(
170
+ gamma_t: np.ndarray,
171
+ obs: int,
172
+ emission_probs: np.ndarray,
173
+ n_states: int,
174
+ n_symbols: int
175
+ ) -> np.ndarray:
176
+ """
177
+ Compute gradient of log-likelihood w.r.t. emission probabilities.
178
+
179
+ Args:
180
+ gamma_t: Posterior probabilities at time t (n_states,)
181
+ obs: Observed symbol (integer, 0-indexed)
182
+ emission_probs: Emission probabilities (n_states, n_symbols)
183
+ n_states: Number of states
184
+ n_symbols: Number of symbols
185
+
186
+ Returns:
187
+ numpy array: Gradient w.r.t. emission probabilities (n_states, n_symbols)
188
+ """
189
+ # Gradient: dL/dB[i, j] = gamma[i] / B[i, j] if j == obs, else 0
190
+ grad = np.zeros((n_states, n_symbols))
191
+ for i in range(n_states):
192
+ grad[i, obs] = gamma_t[i] / (emission_probs[i, obs] + 1e-10)
193
+ return grad
194
+
195
+
196
+ def _gradient_softmax_to_eta(
197
+ grad_gamma: np.ndarray,
198
+ gamma: np.ndarray,
199
+ x: np.ndarray,
200
+ n_categories: int
201
+ ) -> np.ndarray:
202
+ """
203
+ Compute gradient w.r.t. eta from gradient w.r.t. gamma (Softmax chain rule).
204
+
205
+ For initial probabilities: gamma = softmax(eta), where eta = X @ eta_pi
206
+
207
+ Args:
208
+ grad_gamma: Gradient w.r.t. gamma (n_categories,)
209
+ gamma: Probabilities (n_categories,)
210
+ x: Covariates (n_covariates,)
211
+ n_categories: Number of categories (n_states for initial probs)
212
+
213
+ Returns:
214
+ numpy array: Gradient w.r.t. eta_pi (n_covariates, n_states)
215
+ """
216
+ n_covariates = len(x)
217
+ grad_eta = np.zeros((n_covariates, n_categories))
218
+
219
+ # Chain rule: dL/deta = dL/dgamma * dgamma/deta
220
+ # dgamma[i]/deta[j] = gamma[i] * (delta[i,j] - gamma[j])
221
+ for c in range(n_covariates):
222
+ for i in range(n_categories):
223
+ for j in range(n_categories):
224
+ if i == j:
225
+ dgamma_deta = gamma[i] * (1 - gamma[j])
226
+ else:
227
+ dgamma_deta = -gamma[i] * gamma[j]
228
+ grad_eta[c, i] += grad_gamma[j] * dgamma_deta * x[c]
229
+
230
+ return grad_eta
231
+
232
+
233
+ def _gradient_softmax_to_eta_transition(
234
+ grad_A: np.ndarray,
235
+ A: np.ndarray,
236
+ x: np.ndarray,
237
+ n_states: int
238
+ ) -> np.ndarray:
239
+ """
240
+ Compute gradient w.r.t. eta_A from gradient w.r.t. A (Softmax chain rule).
241
+
242
+ For transition probabilities: A[i, :] = softmax(eta[i, :]), where eta[i, j] = X @ eta_A[:, i, j]
243
+
244
+ Args:
245
+ grad_A: Gradient w.r.t. A (n_states, n_states)
246
+ A: Transition probabilities (n_states, n_states)
247
+ x: Covariates (n_covariates,)
248
+ n_states: Number of states
249
+
250
+ Returns:
251
+ numpy array: Gradient w.r.t. eta_A (n_covariates, n_states, n_states)
252
+ """
253
+ n_covariates = len(x)
254
+ grad_eta = np.zeros((n_covariates, n_states, n_states))
255
+
256
+ # For each row i, apply Softmax chain rule
257
+ for i in range(n_states):
258
+ for c in range(n_covariates):
259
+ for j in range(n_states):
260
+ for k in range(n_states):
261
+ if j == k:
262
+ dA_deta = A[i, j] * (1 - A[i, k])
263
+ else:
264
+ dA_deta = -A[i, j] * A[i, k]
265
+ grad_eta[c, i, j] += grad_A[i, k] * dA_deta * x[c]
266
+
267
+ return grad_eta
268
+
269
+
270
+ def _gradient_softmax_to_eta_emission(
271
+ grad_B: np.ndarray,
272
+ B: np.ndarray,
273
+ x: np.ndarray,
274
+ n_states: int,
275
+ n_symbols: int
276
+ ) -> np.ndarray:
277
+ """
278
+ Compute gradient w.r.t. eta_B from gradient w.r.t. B (Softmax chain rule).
279
+
280
+ For emission probabilities: B[i, :] = softmax(eta[i, :]), where eta[i, j] = X @ eta_B[:, i, j]
281
+
282
+ Args:
283
+ grad_B: Gradient w.r.t. B (n_states, n_symbols)
284
+ B: Emission probabilities (n_states, n_symbols)
285
+ x: Covariates (n_covariates,)
286
+ n_states: Number of states
287
+ n_symbols: Number of symbols
288
+
289
+ Returns:
290
+ numpy array: Gradient w.r.t. eta_B (n_covariates, n_states, n_symbols)
291
+ """
292
+ n_covariates = len(x)
293
+ grad_eta = np.zeros((n_covariates, n_states, n_symbols))
294
+
295
+ # For each row i, apply Softmax chain rule
296
+ for i in range(n_states):
297
+ for c in range(n_covariates):
298
+ for j in range(n_symbols):
299
+ for k in range(n_symbols):
300
+ if j == k:
301
+ dB_deta = B[i, j] * (1 - B[i, k])
302
+ else:
303
+ dB_deta = -B[i, j] * B[i, k]
304
+ grad_eta[c, i, j] += grad_B[i, k] * dB_deta * x[c]
305
+
306
+ return grad_eta