sequenzo 0.1.31__cp310-cp310-macosx_10_9_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- _sequenzo_fastcluster.cpython-310-darwin.so +0 -0
- sequenzo/__init__.py +349 -0
- sequenzo/big_data/__init__.py +12 -0
- sequenzo/big_data/clara/__init__.py +26 -0
- sequenzo/big_data/clara/clara.py +476 -0
- sequenzo/big_data/clara/utils/__init__.py +27 -0
- sequenzo/big_data/clara/utils/aggregatecases.py +92 -0
- sequenzo/big_data/clara/utils/davies_bouldin.py +91 -0
- sequenzo/big_data/clara/utils/get_weighted_diss.cpython-310-darwin.so +0 -0
- sequenzo/big_data/clara/utils/wfcmdd.py +205 -0
- sequenzo/big_data/clara/visualization.py +88 -0
- sequenzo/clustering/KMedoids.py +178 -0
- sequenzo/clustering/__init__.py +30 -0
- sequenzo/clustering/clustering_c_code.cpython-310-darwin.so +0 -0
- sequenzo/clustering/hierarchical_clustering.py +1256 -0
- sequenzo/clustering/sequenzo_fastcluster/fastcluster.py +495 -0
- sequenzo/clustering/sequenzo_fastcluster/src/fastcluster.cpp +1877 -0
- sequenzo/clustering/sequenzo_fastcluster/src/fastcluster_python.cpp +1264 -0
- sequenzo/clustering/src/KMedoid.cpp +263 -0
- sequenzo/clustering/src/PAM.cpp +237 -0
- sequenzo/clustering/src/PAMonce.cpp +265 -0
- sequenzo/clustering/src/cluster_quality.cpp +496 -0
- sequenzo/clustering/src/cluster_quality.h +128 -0
- sequenzo/clustering/src/cluster_quality_backup.cpp +570 -0
- sequenzo/clustering/src/module.cpp +228 -0
- sequenzo/clustering/src/weightedinertia.cpp +111 -0
- sequenzo/clustering/utils/__init__.py +27 -0
- sequenzo/clustering/utils/disscenter.py +122 -0
- sequenzo/data_preprocessing/__init__.py +22 -0
- sequenzo/data_preprocessing/helpers.py +303 -0
- sequenzo/datasets/__init__.py +41 -0
- sequenzo/datasets/biofam.csv +2001 -0
- sequenzo/datasets/biofam_child_domain.csv +2001 -0
- sequenzo/datasets/biofam_left_domain.csv +2001 -0
- sequenzo/datasets/biofam_married_domain.csv +2001 -0
- sequenzo/datasets/chinese_colonial_territories.csv +12 -0
- sequenzo/datasets/country_co2_emissions.csv +194 -0
- sequenzo/datasets/country_co2_emissions_global_deciles.csv +195 -0
- sequenzo/datasets/country_co2_emissions_global_quintiles.csv +195 -0
- sequenzo/datasets/country_co2_emissions_local_deciles.csv +195 -0
- sequenzo/datasets/country_co2_emissions_local_quintiles.csv +195 -0
- sequenzo/datasets/country_gdp_per_capita.csv +194 -0
- sequenzo/datasets/dyadic_children.csv +61 -0
- sequenzo/datasets/dyadic_parents.csv +61 -0
- sequenzo/datasets/mvad.csv +713 -0
- sequenzo/datasets/pairfam_activity_by_month.csv +1028 -0
- sequenzo/datasets/pairfam_activity_by_year.csv +1028 -0
- sequenzo/datasets/pairfam_family_by_month.csv +1028 -0
- sequenzo/datasets/pairfam_family_by_year.csv +1028 -0
- sequenzo/datasets/political_science_aid_shock.csv +166 -0
- sequenzo/datasets/political_science_donor_fragmentation.csv +157 -0
- sequenzo/define_sequence_data.py +1400 -0
- sequenzo/dissimilarity_measures/__init__.py +31 -0
- sequenzo/dissimilarity_measures/c_code.cpython-310-darwin.so +0 -0
- sequenzo/dissimilarity_measures/get_distance_matrix.py +762 -0
- sequenzo/dissimilarity_measures/get_substitution_cost_matrix.py +246 -0
- sequenzo/dissimilarity_measures/src/DHDdistance.cpp +148 -0
- sequenzo/dissimilarity_measures/src/LCPdistance.cpp +114 -0
- sequenzo/dissimilarity_measures/src/LCPspellDistance.cpp +215 -0
- sequenzo/dissimilarity_measures/src/OMdistance.cpp +247 -0
- sequenzo/dissimilarity_measures/src/OMspellDistance.cpp +281 -0
- sequenzo/dissimilarity_measures/src/__init__.py +0 -0
- sequenzo/dissimilarity_measures/src/dist2matrix.cpp +63 -0
- sequenzo/dissimilarity_measures/src/dp_utils.h +160 -0
- sequenzo/dissimilarity_measures/src/module.cpp +40 -0
- sequenzo/dissimilarity_measures/src/setup.py +30 -0
- sequenzo/dissimilarity_measures/src/utils.h +25 -0
- sequenzo/dissimilarity_measures/src/xsimd/.github/cmake-test/main.cpp +6 -0
- sequenzo/dissimilarity_measures/src/xsimd/benchmark/main.cpp +159 -0
- sequenzo/dissimilarity_measures/src/xsimd/benchmark/xsimd_benchmark.hpp +565 -0
- sequenzo/dissimilarity_measures/src/xsimd/docs/source/conf.py +37 -0
- sequenzo/dissimilarity_measures/src/xsimd/examples/mandelbrot.cpp +330 -0
- sequenzo/dissimilarity_measures/src/xsimd/examples/pico_bench.hpp +246 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_arithmetic.hpp +266 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_complex.hpp +112 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_details.hpp +323 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_logical.hpp +218 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_math.hpp +2583 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_memory.hpp +880 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_rounding.hpp +72 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_swizzle.hpp +174 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_trigo.hpp +978 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx.hpp +1924 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx2.hpp +1144 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512bw.hpp +656 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512cd.hpp +28 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512dq.hpp +244 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512er.hpp +20 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512f.hpp +2650 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512ifma.hpp +20 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512pf.hpp +20 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vbmi.hpp +77 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vbmi2.hpp +131 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vnni_avx512bw.hpp +20 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vnni_avx512vbmi2.hpp +20 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avxvnni.hpp +20 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_common.hpp +24 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_common_fwd.hpp +77 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_constants.hpp +393 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_emulated.hpp +788 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma3_avx.hpp +93 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma3_avx2.hpp +46 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma3_sse.hpp +97 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma4.hpp +92 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_i8mm_neon64.hpp +17 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_isa.hpp +142 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_neon.hpp +3142 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_neon64.hpp +1543 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_rvv.hpp +1513 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_scalar.hpp +1260 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse2.hpp +2024 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse3.hpp +67 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse4_1.hpp +339 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse4_2.hpp +44 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_ssse3.hpp +186 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sve.hpp +1155 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_vsx.hpp +892 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_wasm.hpp +1780 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_arch.hpp +240 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_config.hpp +484 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_cpuid.hpp +269 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_inline.hpp +27 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/math/xsimd_rem_pio2.hpp +719 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/memory/xsimd_aligned_allocator.hpp +349 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/memory/xsimd_alignment.hpp +91 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_all_registers.hpp +55 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_api.hpp +2765 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx2_register.hpp +44 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512bw_register.hpp +51 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512cd_register.hpp +51 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512dq_register.hpp +51 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512er_register.hpp +51 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512f_register.hpp +77 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512ifma_register.hpp +51 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512pf_register.hpp +51 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512vbmi2_register.hpp +51 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512vbmi_register.hpp +51 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512vnni_avx512bw_register.hpp +54 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512vnni_avx512vbmi2_register.hpp +53 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx_register.hpp +64 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avxvnni_register.hpp +44 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_batch.hpp +1524 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_batch_constant.hpp +300 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_common_arch.hpp +47 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_emulated_register.hpp +80 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_fma3_avx2_register.hpp +50 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_fma3_avx_register.hpp +50 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_fma3_sse_register.hpp +50 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_fma4_register.hpp +50 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_i8mm_neon64_register.hpp +55 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_neon64_register.hpp +55 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_neon_register.hpp +154 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_register.hpp +94 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_rvv_register.hpp +506 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sse2_register.hpp +59 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sse3_register.hpp +49 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sse4_1_register.hpp +48 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sse4_2_register.hpp +48 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_ssse3_register.hpp +48 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sve_register.hpp +156 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_traits.hpp +337 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_utils.hpp +536 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_vsx_register.hpp +77 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_wasm_register.hpp +59 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/xsimd.hpp +75 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/architectures/dummy.cpp +7 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set.cpp +13 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set_mean.cpp +24 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set_mean_aligned.cpp +25 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set_mean_arch_independent.cpp +28 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set_mean_tag_dispatch.cpp +25 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/doc/manipulating_abstract_batches.cpp +7 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/doc/manipulating_parametric_batches.cpp +8 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/doc/sum.hpp +31 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/doc/sum_avx2.cpp +3 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/doc/sum_sse2.cpp +3 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/doc/writing_vectorized_code.cpp +11 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/main.cpp +31 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_api.cpp +230 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_arch.cpp +217 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_basic_math.cpp +183 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_batch.cpp +1049 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_bool.cpp +508 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_cast.cpp +409 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_complex.cpp +712 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_constant.cpp +286 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_float.cpp +141 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_int.cpp +365 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_manip.cpp +308 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_bitwise_cast.cpp +222 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_complex_exponential.cpp +226 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_complex_hyperbolic.cpp +183 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_complex_power.cpp +265 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_complex_trigonometric.cpp +236 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_conversion.cpp +248 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_custom_default_arch.cpp +28 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_error_gamma.cpp +170 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_explicit_batch_instantiation.cpp +32 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_exponential.cpp +202 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_extract_pair.cpp +92 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_fp_manipulation.cpp +77 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_gnu_source.cpp +30 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_hyperbolic.cpp +167 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_load_store.cpp +304 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_memory.cpp +61 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_poly_evaluation.cpp +64 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_power.cpp +184 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_rounding.cpp +199 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_select.cpp +101 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_shuffle.cpp +760 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_sum.cpp +4 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_sum.hpp +34 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_traits.cpp +172 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_trigonometric.cpp +208 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_utils.hpp +611 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_wasm/test_wasm_playwright.py +123 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_xsimd_api.cpp +1460 -0
- sequenzo/dissimilarity_measures/utils/__init__.py +16 -0
- sequenzo/dissimilarity_measures/utils/get_LCP_length_for_2_seq.py +44 -0
- sequenzo/dissimilarity_measures/utils/get_sm_trate_substitution_cost_matrix.cpython-310-darwin.so +0 -0
- sequenzo/dissimilarity_measures/utils/seqconc.cpython-310-darwin.so +0 -0
- sequenzo/dissimilarity_measures/utils/seqdss.cpython-310-darwin.so +0 -0
- sequenzo/dissimilarity_measures/utils/seqdur.cpython-310-darwin.so +0 -0
- sequenzo/dissimilarity_measures/utils/seqlength.cpython-310-darwin.so +0 -0
- sequenzo/multidomain/__init__.py +23 -0
- sequenzo/multidomain/association_between_domains.py +311 -0
- sequenzo/multidomain/cat.py +597 -0
- sequenzo/multidomain/combt.py +519 -0
- sequenzo/multidomain/dat.py +81 -0
- sequenzo/multidomain/idcd.py +139 -0
- sequenzo/multidomain/linked_polyad.py +292 -0
- sequenzo/openmp_setup.py +233 -0
- sequenzo/prefix_tree/__init__.py +62 -0
- sequenzo/prefix_tree/hub.py +114 -0
- sequenzo/prefix_tree/individual_level_indicators.py +1321 -0
- sequenzo/prefix_tree/spell_individual_level_indicators.py +580 -0
- sequenzo/prefix_tree/spell_level_indicators.py +297 -0
- sequenzo/prefix_tree/system_level_indicators.py +544 -0
- sequenzo/prefix_tree/utils.py +54 -0
- sequenzo/seqhmm/__init__.py +95 -0
- sequenzo/seqhmm/advanced_optimization.py +305 -0
- sequenzo/seqhmm/bootstrap.py +411 -0
- sequenzo/seqhmm/build_hmm.py +142 -0
- sequenzo/seqhmm/build_mhmm.py +136 -0
- sequenzo/seqhmm/build_nhmm.py +121 -0
- sequenzo/seqhmm/fit_mhmm.py +62 -0
- sequenzo/seqhmm/fit_model.py +61 -0
- sequenzo/seqhmm/fit_nhmm.py +76 -0
- sequenzo/seqhmm/formulas.py +289 -0
- sequenzo/seqhmm/forward_backward_nhmm.py +276 -0
- sequenzo/seqhmm/gradients_nhmm.py +306 -0
- sequenzo/seqhmm/hmm.py +291 -0
- sequenzo/seqhmm/mhmm.py +314 -0
- sequenzo/seqhmm/model_comparison.py +238 -0
- sequenzo/seqhmm/multichannel_em.py +282 -0
- sequenzo/seqhmm/multichannel_utils.py +138 -0
- sequenzo/seqhmm/nhmm.py +270 -0
- sequenzo/seqhmm/nhmm_utils.py +191 -0
- sequenzo/seqhmm/predict.py +137 -0
- sequenzo/seqhmm/predict_mhmm.py +142 -0
- sequenzo/seqhmm/simulate.py +878 -0
- sequenzo/seqhmm/utils.py +218 -0
- sequenzo/seqhmm/visualization.py +910 -0
- sequenzo/sequence_characteristics/__init__.py +40 -0
- sequenzo/sequence_characteristics/complexity_index.py +49 -0
- sequenzo/sequence_characteristics/overall_cross_sectional_entropy.py +220 -0
- sequenzo/sequence_characteristics/plot_characteristics.py +593 -0
- sequenzo/sequence_characteristics/simple_characteristics.py +311 -0
- sequenzo/sequence_characteristics/state_frequencies_and_entropy_per_sequence.py +39 -0
- sequenzo/sequence_characteristics/turbulence.py +155 -0
- sequenzo/sequence_characteristics/variance_of_spell_durations.py +86 -0
- sequenzo/sequence_characteristics/within_sequence_entropy.py +43 -0
- sequenzo/suffix_tree/__init__.py +66 -0
- sequenzo/suffix_tree/hub.py +114 -0
- sequenzo/suffix_tree/individual_level_indicators.py +1679 -0
- sequenzo/suffix_tree/spell_individual_level_indicators.py +493 -0
- sequenzo/suffix_tree/spell_level_indicators.py +248 -0
- sequenzo/suffix_tree/system_level_indicators.py +535 -0
- sequenzo/suffix_tree/utils.py +56 -0
- sequenzo/version_check.py +283 -0
- sequenzo/visualization/__init__.py +29 -0
- sequenzo/visualization/plot_mean_time.py +222 -0
- sequenzo/visualization/plot_modal_state.py +276 -0
- sequenzo/visualization/plot_most_frequent_sequences.py +147 -0
- sequenzo/visualization/plot_relative_frequency.py +405 -0
- sequenzo/visualization/plot_sequence_index.py +1175 -0
- sequenzo/visualization/plot_single_medoid.py +153 -0
- sequenzo/visualization/plot_state_distribution.py +651 -0
- sequenzo/visualization/plot_transition_matrix.py +190 -0
- sequenzo/visualization/utils/__init__.py +23 -0
- sequenzo/visualization/utils/utils.py +310 -0
- sequenzo/with_event_history_analysis/__init__.py +35 -0
- sequenzo/with_event_history_analysis/sequence_analysis_multi_state_model.py +850 -0
- sequenzo/with_event_history_analysis/sequence_history_analysis.py +283 -0
- sequenzo-0.1.31.dist-info/METADATA +286 -0
- sequenzo-0.1.31.dist-info/RECORD +299 -0
- sequenzo-0.1.31.dist-info/WHEEL +5 -0
- sequenzo-0.1.31.dist-info/licenses/LICENSE +28 -0
- sequenzo-0.1.31.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,283 @@
|
|
|
1
|
+
"""
|
|
2
|
+
@Author : Yuqi Liang 梁彧祺
|
|
3
|
+
@File : version_check.py
|
|
4
|
+
@Time : 2025-11-20 07:27
|
|
5
|
+
@Desc : Version check utility for Sequenzo
|
|
6
|
+
|
|
7
|
+
This module checks if the installed version of Sequenzo is up-to-date
|
|
8
|
+
by comparing it with the latest version available on PyPI.
|
|
9
|
+
Similar to pip's version notice functionality.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
import sys
|
|
13
|
+
import warnings
|
|
14
|
+
from typing import Optional, Tuple
|
|
15
|
+
|
|
16
|
+
# Try to import packaging for version comparison, fallback to simple comparison
|
|
17
|
+
try:
|
|
18
|
+
from packaging import version as packaging_version
|
|
19
|
+
HAS_PACKAGING = True
|
|
20
|
+
except ImportError:
|
|
21
|
+
HAS_PACKAGING = False
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def get_installed_version() -> str:
|
|
25
|
+
"""
|
|
26
|
+
Get the currently installed version of Sequenzo.
|
|
27
|
+
|
|
28
|
+
This function tries multiple methods to get the version:
|
|
29
|
+
1. First, try to get version from the currently imported sequenzo module
|
|
30
|
+
(works in development mode when project directory is in sys.path)
|
|
31
|
+
2. Then, try importlib.metadata (works for installed packages)
|
|
32
|
+
3. Finally, try reading from pyproject.toml (fallback for development)
|
|
33
|
+
|
|
34
|
+
Returns:
|
|
35
|
+
str: The installed version string (e.g., "0.1.24")
|
|
36
|
+
"""
|
|
37
|
+
# Method 1: Try to get version from currently imported sequenzo module
|
|
38
|
+
# This works in development mode when the project directory is in sys.path
|
|
39
|
+
try:
|
|
40
|
+
import sequenzo
|
|
41
|
+
if hasattr(sequenzo, '__version__'):
|
|
42
|
+
version = sequenzo.__version__
|
|
43
|
+
# Clean up version string (remove any git commands accidentally included)
|
|
44
|
+
version = version.split()[0] if version else None
|
|
45
|
+
if version:
|
|
46
|
+
return version
|
|
47
|
+
except (ImportError, AttributeError):
|
|
48
|
+
pass
|
|
49
|
+
|
|
50
|
+
# Method 2: Try to get version from importlib.metadata (for installed packages)
|
|
51
|
+
try:
|
|
52
|
+
if sys.version_info >= (3, 8):
|
|
53
|
+
from importlib.metadata import version as get_package_version
|
|
54
|
+
return get_package_version("sequenzo")
|
|
55
|
+
else:
|
|
56
|
+
# Fallback for Python < 3.8
|
|
57
|
+
import pkg_resources
|
|
58
|
+
return pkg_resources.get_distribution("sequenzo").version
|
|
59
|
+
except Exception:
|
|
60
|
+
pass
|
|
61
|
+
|
|
62
|
+
# Method 3: Try to read from pyproject.toml (fallback for development mode)
|
|
63
|
+
# Use simple regex parsing to avoid dependency on tomli/tomllib
|
|
64
|
+
try:
|
|
65
|
+
import os
|
|
66
|
+
import re
|
|
67
|
+
|
|
68
|
+
# Find project root (where pyproject.toml should be)
|
|
69
|
+
current_dir = os.path.dirname(os.path.abspath(__file__))
|
|
70
|
+
project_root = os.path.dirname(os.path.dirname(current_dir))
|
|
71
|
+
pyproject_path = os.path.join(project_root, "pyproject.toml")
|
|
72
|
+
|
|
73
|
+
if os.path.exists(pyproject_path):
|
|
74
|
+
with open(pyproject_path, "r", encoding="utf-8") as f:
|
|
75
|
+
content = f.read()
|
|
76
|
+
# Simple regex to find version = "x.y.z" in [project] section
|
|
77
|
+
# Look for pattern: version = "0.1.30" or version = '0.1.30'
|
|
78
|
+
match = re.search(r'^\s*version\s*=\s*["\']([^"\']+)["\']', content, re.MULTILINE)
|
|
79
|
+
if match:
|
|
80
|
+
return match.group(1)
|
|
81
|
+
except Exception:
|
|
82
|
+
pass
|
|
83
|
+
|
|
84
|
+
# If all methods fail, return None
|
|
85
|
+
return None
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def get_latest_version_from_pypi(package_name: str = "sequenzo", timeout: float = 1.0) -> Optional[str]:
|
|
89
|
+
"""
|
|
90
|
+
Check PyPI for the latest version of the package.
|
|
91
|
+
|
|
92
|
+
This function queries the PyPI JSON API to get the latest version.
|
|
93
|
+
It uses a timeout to avoid blocking if the network is slow or unavailable.
|
|
94
|
+
|
|
95
|
+
Args:
|
|
96
|
+
package_name: Name of the package on PyPI (default: "sequenzo")
|
|
97
|
+
timeout: Timeout in seconds for the HTTP request (default: 1.0)
|
|
98
|
+
|
|
99
|
+
Returns:
|
|
100
|
+
Optional[str]: The latest version string if available, None otherwise
|
|
101
|
+
"""
|
|
102
|
+
try:
|
|
103
|
+
import urllib.request
|
|
104
|
+
import json
|
|
105
|
+
|
|
106
|
+
# PyPI JSON API endpoint
|
|
107
|
+
url = f"https://pypi.org/pypi/{package_name}/json"
|
|
108
|
+
|
|
109
|
+
# Create request with timeout
|
|
110
|
+
request = urllib.request.Request(url)
|
|
111
|
+
request.add_header("User-Agent", f"sequenzo/{get_installed_version() or 'unknown'}")
|
|
112
|
+
|
|
113
|
+
# Make request with timeout
|
|
114
|
+
with urllib.request.urlopen(request, timeout=timeout) as response:
|
|
115
|
+
data = json.loads(response.read().decode())
|
|
116
|
+
return data.get("info", {}).get("version")
|
|
117
|
+
except Exception:
|
|
118
|
+
# Silently fail if we can't check (network issues, etc.)
|
|
119
|
+
return None
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def _simple_version_compare(installed: str, latest: str) -> bool:
|
|
123
|
+
"""
|
|
124
|
+
Simple version comparison without external dependencies.
|
|
125
|
+
|
|
126
|
+
This function compares version strings by splitting on '.' and comparing
|
|
127
|
+
each component numerically. This works for most standard version formats
|
|
128
|
+
like "0.1.24" but may not handle all edge cases.
|
|
129
|
+
|
|
130
|
+
Args:
|
|
131
|
+
installed: Currently installed version
|
|
132
|
+
latest: Latest available version
|
|
133
|
+
|
|
134
|
+
Returns:
|
|
135
|
+
bool: True if installed >= latest, False otherwise
|
|
136
|
+
"""
|
|
137
|
+
try:
|
|
138
|
+
# Split versions into components
|
|
139
|
+
installed_parts = [int(x) for x in installed.split('.')]
|
|
140
|
+
latest_parts = [int(x) for x in latest.split('.')]
|
|
141
|
+
|
|
142
|
+
# Pad shorter version with zeros
|
|
143
|
+
max_len = max(len(installed_parts), len(latest_parts))
|
|
144
|
+
installed_parts.extend([0] * (max_len - len(installed_parts)))
|
|
145
|
+
latest_parts.extend([0] * (max_len - len(latest_parts)))
|
|
146
|
+
|
|
147
|
+
# Compare component by component
|
|
148
|
+
for i, l in zip(installed_parts, latest_parts):
|
|
149
|
+
if i > l:
|
|
150
|
+
return True # installed is newer
|
|
151
|
+
elif i < l:
|
|
152
|
+
return False # installed is older
|
|
153
|
+
|
|
154
|
+
return True # versions are equal
|
|
155
|
+
except (ValueError, AttributeError):
|
|
156
|
+
# If parsing fails, do string comparison as fallback
|
|
157
|
+
return installed >= latest
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def compare_versions(installed: str, latest: str) -> Tuple[bool, str]:
|
|
161
|
+
"""
|
|
162
|
+
Compare two version strings to determine if an update is available.
|
|
163
|
+
|
|
164
|
+
Args:
|
|
165
|
+
installed: Currently installed version
|
|
166
|
+
latest: Latest available version
|
|
167
|
+
|
|
168
|
+
Returns:
|
|
169
|
+
Tuple[bool, str]: (is_up_to_date, message)
|
|
170
|
+
- is_up_to_date: True if installed version >= latest version
|
|
171
|
+
- message: Human-readable comparison message
|
|
172
|
+
"""
|
|
173
|
+
try:
|
|
174
|
+
if HAS_PACKAGING:
|
|
175
|
+
# Use packaging library for accurate version comparison
|
|
176
|
+
installed_ver = packaging_version.parse(installed)
|
|
177
|
+
latest_ver = packaging_version.parse(latest)
|
|
178
|
+
|
|
179
|
+
if installed_ver >= latest_ver:
|
|
180
|
+
return True, f"Installed version {installed} is up-to-date"
|
|
181
|
+
else:
|
|
182
|
+
return False, f"Installed version {installed} < latest version {latest}"
|
|
183
|
+
else:
|
|
184
|
+
# Fallback to simple comparison
|
|
185
|
+
is_up_to_date = _simple_version_compare(installed, latest)
|
|
186
|
+
if is_up_to_date:
|
|
187
|
+
return True, f"Installed version {installed} is up-to-date"
|
|
188
|
+
else:
|
|
189
|
+
return False, f"Installed version {installed} < latest version {latest}"
|
|
190
|
+
except Exception:
|
|
191
|
+
# If version parsing fails, assume up-to-date to avoid false positives
|
|
192
|
+
return True, "Could not compare versions"
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
def check_version_update(
|
|
196
|
+
show_notice: bool = True,
|
|
197
|
+
timeout: float = 1.0
|
|
198
|
+
) -> Optional[str]:
|
|
199
|
+
"""
|
|
200
|
+
Check if a newer version of Sequenzo is available on PyPI.
|
|
201
|
+
|
|
202
|
+
This function compares the installed version with the latest version
|
|
203
|
+
on PyPI and optionally displays a notice if an update is available.
|
|
204
|
+
|
|
205
|
+
Args:
|
|
206
|
+
show_notice: If True, display a notice when update is available (default: True)
|
|
207
|
+
timeout: Timeout in seconds for PyPI API request (default: 1.0)
|
|
208
|
+
|
|
209
|
+
Returns:
|
|
210
|
+
Optional[str]: The latest version string if available, None otherwise
|
|
211
|
+
|
|
212
|
+
Examples:
|
|
213
|
+
>>> from sequenzo.version_check import check_version_update
|
|
214
|
+
>>> latest = check_version_update(show_notice=True)
|
|
215
|
+
>>> if latest:
|
|
216
|
+
... print(f"Latest version available: {latest}")
|
|
217
|
+
"""
|
|
218
|
+
# Get installed version
|
|
219
|
+
installed = get_installed_version()
|
|
220
|
+
if not installed:
|
|
221
|
+
# Can't determine installed version, skip check
|
|
222
|
+
return None
|
|
223
|
+
|
|
224
|
+
# Get latest version from PyPI
|
|
225
|
+
latest = get_latest_version_from_pypi(timeout=timeout)
|
|
226
|
+
if not latest:
|
|
227
|
+
# Can't get latest version (network issue, etc.), skip
|
|
228
|
+
return None
|
|
229
|
+
|
|
230
|
+
# Compare versions
|
|
231
|
+
is_up_to_date, message = compare_versions(installed, latest)
|
|
232
|
+
|
|
233
|
+
# Show notice if update is available
|
|
234
|
+
if not is_up_to_date and show_notice:
|
|
235
|
+
print(
|
|
236
|
+
f"[notice] A new release of sequenzo is available: {installed} -> {latest}",
|
|
237
|
+
file=sys.stderr
|
|
238
|
+
)
|
|
239
|
+
print(
|
|
240
|
+
f"[notice] To update, run: pip install --upgrade sequenzo=={latest}",
|
|
241
|
+
file=sys.stderr
|
|
242
|
+
)
|
|
243
|
+
|
|
244
|
+
return latest
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
def check_version_update_async():
|
|
248
|
+
"""
|
|
249
|
+
Asynchronously check for version updates without blocking.
|
|
250
|
+
|
|
251
|
+
This function runs the version check in a background thread
|
|
252
|
+
to avoid blocking the main import process. It's designed to be
|
|
253
|
+
called during package import.
|
|
254
|
+
"""
|
|
255
|
+
try:
|
|
256
|
+
import threading
|
|
257
|
+
|
|
258
|
+
def _check_in_background():
|
|
259
|
+
"""Run version check in background thread."""
|
|
260
|
+
try:
|
|
261
|
+
check_version_update(show_notice=True, timeout=1.0)
|
|
262
|
+
except Exception:
|
|
263
|
+
# Silently fail - we don't want version checks to break imports
|
|
264
|
+
pass
|
|
265
|
+
|
|
266
|
+
# Start background thread (daemon thread so it doesn't prevent exit)
|
|
267
|
+
thread = threading.Thread(target=_check_in_background, daemon=True)
|
|
268
|
+
thread.start()
|
|
269
|
+
except Exception:
|
|
270
|
+
# If threading fails, just skip the check
|
|
271
|
+
pass
|
|
272
|
+
|
|
273
|
+
|
|
274
|
+
if __name__ == "__main__":
|
|
275
|
+
# Allow manual version check
|
|
276
|
+
print("Checking for Sequenzo updates...")
|
|
277
|
+
latest = check_version_update(show_notice=True)
|
|
278
|
+
if latest:
|
|
279
|
+
installed = get_installed_version()
|
|
280
|
+
print(f"\nInstalled version: {installed}")
|
|
281
|
+
print(f"Latest version: {latest}")
|
|
282
|
+
else:
|
|
283
|
+
print("Could not check for updates (network issue or package not found)")
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
"""
|
|
2
|
+
@Author : 梁彧祺
|
|
3
|
+
@File : __init__.py
|
|
4
|
+
@Time : 11/02/2025 16:42
|
|
5
|
+
@Desc :
|
|
6
|
+
"""
|
|
7
|
+
# sequenzo/visualization/__init__.py
|
|
8
|
+
|
|
9
|
+
from .plot_sequence_index import plot_sequence_index
|
|
10
|
+
from .plot_most_frequent_sequences import plot_most_frequent_sequences
|
|
11
|
+
from .plot_relative_frequency import plot_relative_frequency
|
|
12
|
+
from .plot_transition_matrix import compute_transition_matrix, print_transition_matrix, plot_transition_matrix
|
|
13
|
+
from .plot_mean_time import plot_mean_time
|
|
14
|
+
from .plot_single_medoid import plot_single_medoid, compute_medoids_from_distance_matrix
|
|
15
|
+
from .plot_state_distribution import plot_state_distribution
|
|
16
|
+
from .plot_modal_state import plot_modal_state
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
__all__ = [
|
|
20
|
+
"plot_mean_time",
|
|
21
|
+
"plot_most_frequent_sequences",
|
|
22
|
+
"plot_relative_frequency",
|
|
23
|
+
"plot_sequence_index",
|
|
24
|
+
"plot_single_medoid",
|
|
25
|
+
"plot_state_distribution",
|
|
26
|
+
"plot_transition_matrix",
|
|
27
|
+
"plot_modal_state",
|
|
28
|
+
# Add other functions as needed
|
|
29
|
+
]
|
|
@@ -0,0 +1,222 @@
|
|
|
1
|
+
"""
|
|
2
|
+
@Author : Yuqi Liang 梁彧祺
|
|
3
|
+
@File : plot_mean_time.py
|
|
4
|
+
@Time : 14/02/2025 10:12
|
|
5
|
+
@Desc :
|
|
6
|
+
Implementation of Mean Time Plot for social sequence analysis,
|
|
7
|
+
closely following ggseqplot's `ggseqmtplot` function,
|
|
8
|
+
and TraMineR's `plot.stslist.meant.Rd` for mean time calculation.
|
|
9
|
+
"""
|
|
10
|
+
import numpy as np
|
|
11
|
+
import pandas as pd
|
|
12
|
+
import matplotlib.pyplot as plt
|
|
13
|
+
from typing import Optional
|
|
14
|
+
from sequenzo.define_sequence_data import SequenceData
|
|
15
|
+
from sequenzo.visualization.utils import (
|
|
16
|
+
show_plot_title
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def _compute_mean_time(seqdata: SequenceData, weights="auto") -> pd.DataFrame:
|
|
21
|
+
"""
|
|
22
|
+
Compute mean total time spent in each state across all sequences.
|
|
23
|
+
Optimized version using pandas operations.
|
|
24
|
+
|
|
25
|
+
:param seqdata: SequenceData object containing sequence information
|
|
26
|
+
:param weights: (np.ndarray or "auto") Weights for sequences. If "auto", uses seqdata.weights if available
|
|
27
|
+
:return: DataFrame with mean time spent and standard error for each state
|
|
28
|
+
"""
|
|
29
|
+
# Process weights
|
|
30
|
+
if isinstance(weights, str) and weights == "auto":
|
|
31
|
+
weights = getattr(seqdata, "weights", None)
|
|
32
|
+
|
|
33
|
+
if weights is not None:
|
|
34
|
+
weights = np.asarray(weights, dtype=float).reshape(-1)
|
|
35
|
+
if len(weights) != len(seqdata.values):
|
|
36
|
+
raise ValueError("Length of weights must equal number of sequences.")
|
|
37
|
+
|
|
38
|
+
# Get data and preprocess
|
|
39
|
+
seq_df = seqdata.to_dataframe()
|
|
40
|
+
# Use inverse_state_mapping if available, otherwise build it
|
|
41
|
+
if hasattr(seqdata, 'inverse_state_mapping'):
|
|
42
|
+
inv = seqdata.inverse_state_mapping
|
|
43
|
+
else:
|
|
44
|
+
inv = {v: k for k, v in seqdata.state_mapping.items()}
|
|
45
|
+
states = list(range(1, len(seqdata.states) + 1)) # Use numerical state indices
|
|
46
|
+
n = len(seq_df)
|
|
47
|
+
|
|
48
|
+
# Get weights
|
|
49
|
+
if weights is None:
|
|
50
|
+
w = np.ones(n)
|
|
51
|
+
else:
|
|
52
|
+
w = np.asarray(weights, dtype=float)
|
|
53
|
+
|
|
54
|
+
# Broadcast weights to each time point
|
|
55
|
+
W = np.repeat(w[:, None], seq_df.shape[1], axis=1)
|
|
56
|
+
|
|
57
|
+
# Convert to long format with weights
|
|
58
|
+
df_long = seq_df.melt(value_name='state_idx')
|
|
59
|
+
# Replicate weights for each time point
|
|
60
|
+
W_long = pd.DataFrame(W, columns=seq_df.columns).melt(value_name='w')['w'].to_numpy()
|
|
61
|
+
df_long['w'] = W_long
|
|
62
|
+
# Keep state_idx as numerical for consistent grouping
|
|
63
|
+
|
|
64
|
+
# Calculate mean time spent in each state per sequence
|
|
65
|
+
# For each sequence, count time spent in each state, then take weighted average
|
|
66
|
+
seq_state_times = {}
|
|
67
|
+
for s in states:
|
|
68
|
+
# Count occurrences of state s in each sequence
|
|
69
|
+
state_counts = (seq_df == s).sum(axis=1) # Sum across time for each sequence
|
|
70
|
+
# Calculate weighted mean across sequences
|
|
71
|
+
seq_state_times[s] = np.average(state_counts, weights=w) if len(state_counts) > 0 else 0.0
|
|
72
|
+
|
|
73
|
+
mean_times = seq_state_times
|
|
74
|
+
|
|
75
|
+
# Calculate standard errors for mean time
|
|
76
|
+
se = {}
|
|
77
|
+
n_sequences = len(seq_df)
|
|
78
|
+
|
|
79
|
+
for s in states:
|
|
80
|
+
if n_sequences > 1:
|
|
81
|
+
# Count occurrences of state s in each sequence
|
|
82
|
+
state_counts = (seq_df == s).sum(axis=1)
|
|
83
|
+
# Calculate weighted standard error
|
|
84
|
+
weighted_mean = seq_state_times[s]
|
|
85
|
+
weighted_var = np.average((state_counts - weighted_mean) ** 2, weights=w)
|
|
86
|
+
# Standard error of the weighted mean
|
|
87
|
+
se[s] = np.sqrt(weighted_var / n_sequences) if weighted_var >= 0 else 0.0
|
|
88
|
+
else:
|
|
89
|
+
se[s] = 0.0
|
|
90
|
+
|
|
91
|
+
# Create result DataFrame
|
|
92
|
+
# Convert state indices to state names, handling missing values
|
|
93
|
+
state_names = []
|
|
94
|
+
for s in states:
|
|
95
|
+
state_name = inv.get(s, None)
|
|
96
|
+
# Handle np.nan: replace with 'missing'
|
|
97
|
+
if pd.isna(state_name):
|
|
98
|
+
state_name = 'missing'
|
|
99
|
+
# Also handle string 'nan' or 'NaN'
|
|
100
|
+
elif isinstance(state_name, str) and state_name.lower() == 'nan':
|
|
101
|
+
state_name = 'missing'
|
|
102
|
+
state_names.append(state_name)
|
|
103
|
+
|
|
104
|
+
mean_time_df = pd.DataFrame({
|
|
105
|
+
'State': state_names,
|
|
106
|
+
'MeanTime': [mean_times[s] for s in states],
|
|
107
|
+
'StandardError': [se[s] for s in states]
|
|
108
|
+
})
|
|
109
|
+
|
|
110
|
+
mean_time_df.sort_values(by='MeanTime', ascending=True, inplace=True)
|
|
111
|
+
|
|
112
|
+
return mean_time_df
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def plot_mean_time(seqdata: SequenceData,
|
|
116
|
+
weights="auto",
|
|
117
|
+
show_error_bar: bool = True,
|
|
118
|
+
title=None,
|
|
119
|
+
x_label="Mean Time",
|
|
120
|
+
y_label="State",
|
|
121
|
+
fontsize: int = 12,
|
|
122
|
+
save_as: Optional[str] = None,
|
|
123
|
+
dpi: int = 200) -> None:
|
|
124
|
+
"""
|
|
125
|
+
Plot Mean Time Plot for sequence data with clean white background.
|
|
126
|
+
|
|
127
|
+
:param seqdata: SequenceData object containing sequence information
|
|
128
|
+
:param weights: (np.ndarray or "auto") Weights for sequences. If "auto", uses seqdata.weights if available
|
|
129
|
+
:param show_error_bar: Boolean flag to show or hide error bars
|
|
130
|
+
:param title: Optional title for the plot
|
|
131
|
+
:param x_label: Label for the x-axis
|
|
132
|
+
:param y_label: Label for the y-axis
|
|
133
|
+
:param save_as: Optional file path to save the plot
|
|
134
|
+
:param dpi: Resolution of the saved plot
|
|
135
|
+
"""
|
|
136
|
+
# Use default style as base
|
|
137
|
+
plt.style.use('default')
|
|
138
|
+
|
|
139
|
+
# Compute all required data at once
|
|
140
|
+
mean_time_df = _compute_mean_time(seqdata, weights)
|
|
141
|
+
|
|
142
|
+
# Create figure and preallocate memory
|
|
143
|
+
fig = plt.figure(figsize=(12, 7))
|
|
144
|
+
|
|
145
|
+
# Create main plot
|
|
146
|
+
ax = plt.subplot(111)
|
|
147
|
+
|
|
148
|
+
# Get color mapping - use original colors without enhancement
|
|
149
|
+
cmap = seqdata.get_colormap()
|
|
150
|
+
colors = [cmap.colors[i] for i in range(len(seqdata.states))]
|
|
151
|
+
|
|
152
|
+
# Assign colors to states (without enhancing saturation)
|
|
153
|
+
mean_time_df['Color'] = pd.Categorical(mean_time_df['State']).codes
|
|
154
|
+
mean_time_df['Color'] = mean_time_df['Color'].map(lambda x: colors[x])
|
|
155
|
+
|
|
156
|
+
# Create custom barplot
|
|
157
|
+
for i, (_, row) in enumerate(mean_time_df.iterrows()):
|
|
158
|
+
ax.barh(y=i, width=row['MeanTime'], height=0.7,
|
|
159
|
+
color=row['Color'], edgecolor='white', linewidth=0.5)
|
|
160
|
+
|
|
161
|
+
# Set y-axis ticks and labels
|
|
162
|
+
# Replace 'nan' with 'missing' for display
|
|
163
|
+
y_labels = mean_time_df['State'].copy()
|
|
164
|
+
y_labels = y_labels.replace('nan', 'missing')
|
|
165
|
+
ax.set_yticks(range(len(mean_time_df)))
|
|
166
|
+
ax.set_yticklabels(y_labels, fontsize=fontsize-2)
|
|
167
|
+
|
|
168
|
+
# Add error bars if needed
|
|
169
|
+
if show_error_bar:
|
|
170
|
+
ax.errorbar(
|
|
171
|
+
x=mean_time_df["MeanTime"],
|
|
172
|
+
y=range(len(mean_time_df)),
|
|
173
|
+
xerr=mean_time_df["StandardError"],
|
|
174
|
+
fmt='none',
|
|
175
|
+
ecolor='black',
|
|
176
|
+
capsize=3,
|
|
177
|
+
capthick=1,
|
|
178
|
+
elinewidth=1.5
|
|
179
|
+
)
|
|
180
|
+
|
|
181
|
+
# Set plot properties
|
|
182
|
+
if title:
|
|
183
|
+
show_plot_title(ax, title, show=True, fontsize=fontsize+2, fontweight='bold', pad=20)
|
|
184
|
+
ax.set_xlabel(x_label, fontsize=fontsize)
|
|
185
|
+
ax.set_ylabel(y_label, fontsize=fontsize, labelpad=15)
|
|
186
|
+
|
|
187
|
+
# Clean white background with light grid
|
|
188
|
+
ax.set_facecolor('white')
|
|
189
|
+
ax.grid(axis='x', color='#E0E0E0', linestyle='-', linewidth=0.5)
|
|
190
|
+
ax.set_axisbelow(True) # Place grid lines behind the bars
|
|
191
|
+
|
|
192
|
+
# Customize borders
|
|
193
|
+
for spine in ax.spines.values():
|
|
194
|
+
spine.set_color('#CCCCCC') # Light gray border
|
|
195
|
+
spine.set_linewidth(0.5)
|
|
196
|
+
|
|
197
|
+
# Adjust layout(1/2)
|
|
198
|
+
plt.subplots_adjust(left=0.3)
|
|
199
|
+
|
|
200
|
+
# Add a note about normalization
|
|
201
|
+
relative_threshold = 0.01
|
|
202
|
+
max_val = mean_time_df['MeanTime'].max()
|
|
203
|
+
too_many_small = np.sum(mean_time_df['MeanTime'] < relative_threshold * max_val) >= 1
|
|
204
|
+
if too_many_small:
|
|
205
|
+
norm_note = f"Note: Some bars may appear as zero, but actually have small non-zero values."
|
|
206
|
+
plt.figtext(0.5, -0.02, norm_note, ha='center', fontsize=fontsize-2, style='italic')
|
|
207
|
+
|
|
208
|
+
# Adjust layout before saving
|
|
209
|
+
plt.tight_layout()
|
|
210
|
+
|
|
211
|
+
# Handle save_as extension
|
|
212
|
+
if save_as and not save_as.lower().endswith(('.png', '.jpg', '.jpeg', '.pdf')):
|
|
213
|
+
save_as = save_as + '.png'
|
|
214
|
+
|
|
215
|
+
# Save figure if needed
|
|
216
|
+
if save_as:
|
|
217
|
+
fig.savefig(save_as, dpi=dpi, bbox_inches='tight', facecolor='white')
|
|
218
|
+
|
|
219
|
+
# Display plot
|
|
220
|
+
plt.show()
|
|
221
|
+
plt.close()
|
|
222
|
+
|