sequenzo 0.1.31__cp310-cp310-macosx_10_9_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- _sequenzo_fastcluster.cpython-310-darwin.so +0 -0
- sequenzo/__init__.py +349 -0
- sequenzo/big_data/__init__.py +12 -0
- sequenzo/big_data/clara/__init__.py +26 -0
- sequenzo/big_data/clara/clara.py +476 -0
- sequenzo/big_data/clara/utils/__init__.py +27 -0
- sequenzo/big_data/clara/utils/aggregatecases.py +92 -0
- sequenzo/big_data/clara/utils/davies_bouldin.py +91 -0
- sequenzo/big_data/clara/utils/get_weighted_diss.cpython-310-darwin.so +0 -0
- sequenzo/big_data/clara/utils/wfcmdd.py +205 -0
- sequenzo/big_data/clara/visualization.py +88 -0
- sequenzo/clustering/KMedoids.py +178 -0
- sequenzo/clustering/__init__.py +30 -0
- sequenzo/clustering/clustering_c_code.cpython-310-darwin.so +0 -0
- sequenzo/clustering/hierarchical_clustering.py +1256 -0
- sequenzo/clustering/sequenzo_fastcluster/fastcluster.py +495 -0
- sequenzo/clustering/sequenzo_fastcluster/src/fastcluster.cpp +1877 -0
- sequenzo/clustering/sequenzo_fastcluster/src/fastcluster_python.cpp +1264 -0
- sequenzo/clustering/src/KMedoid.cpp +263 -0
- sequenzo/clustering/src/PAM.cpp +237 -0
- sequenzo/clustering/src/PAMonce.cpp +265 -0
- sequenzo/clustering/src/cluster_quality.cpp +496 -0
- sequenzo/clustering/src/cluster_quality.h +128 -0
- sequenzo/clustering/src/cluster_quality_backup.cpp +570 -0
- sequenzo/clustering/src/module.cpp +228 -0
- sequenzo/clustering/src/weightedinertia.cpp +111 -0
- sequenzo/clustering/utils/__init__.py +27 -0
- sequenzo/clustering/utils/disscenter.py +122 -0
- sequenzo/data_preprocessing/__init__.py +22 -0
- sequenzo/data_preprocessing/helpers.py +303 -0
- sequenzo/datasets/__init__.py +41 -0
- sequenzo/datasets/biofam.csv +2001 -0
- sequenzo/datasets/biofam_child_domain.csv +2001 -0
- sequenzo/datasets/biofam_left_domain.csv +2001 -0
- sequenzo/datasets/biofam_married_domain.csv +2001 -0
- sequenzo/datasets/chinese_colonial_territories.csv +12 -0
- sequenzo/datasets/country_co2_emissions.csv +194 -0
- sequenzo/datasets/country_co2_emissions_global_deciles.csv +195 -0
- sequenzo/datasets/country_co2_emissions_global_quintiles.csv +195 -0
- sequenzo/datasets/country_co2_emissions_local_deciles.csv +195 -0
- sequenzo/datasets/country_co2_emissions_local_quintiles.csv +195 -0
- sequenzo/datasets/country_gdp_per_capita.csv +194 -0
- sequenzo/datasets/dyadic_children.csv +61 -0
- sequenzo/datasets/dyadic_parents.csv +61 -0
- sequenzo/datasets/mvad.csv +713 -0
- sequenzo/datasets/pairfam_activity_by_month.csv +1028 -0
- sequenzo/datasets/pairfam_activity_by_year.csv +1028 -0
- sequenzo/datasets/pairfam_family_by_month.csv +1028 -0
- sequenzo/datasets/pairfam_family_by_year.csv +1028 -0
- sequenzo/datasets/political_science_aid_shock.csv +166 -0
- sequenzo/datasets/political_science_donor_fragmentation.csv +157 -0
- sequenzo/define_sequence_data.py +1400 -0
- sequenzo/dissimilarity_measures/__init__.py +31 -0
- sequenzo/dissimilarity_measures/c_code.cpython-310-darwin.so +0 -0
- sequenzo/dissimilarity_measures/get_distance_matrix.py +762 -0
- sequenzo/dissimilarity_measures/get_substitution_cost_matrix.py +246 -0
- sequenzo/dissimilarity_measures/src/DHDdistance.cpp +148 -0
- sequenzo/dissimilarity_measures/src/LCPdistance.cpp +114 -0
- sequenzo/dissimilarity_measures/src/LCPspellDistance.cpp +215 -0
- sequenzo/dissimilarity_measures/src/OMdistance.cpp +247 -0
- sequenzo/dissimilarity_measures/src/OMspellDistance.cpp +281 -0
- sequenzo/dissimilarity_measures/src/__init__.py +0 -0
- sequenzo/dissimilarity_measures/src/dist2matrix.cpp +63 -0
- sequenzo/dissimilarity_measures/src/dp_utils.h +160 -0
- sequenzo/dissimilarity_measures/src/module.cpp +40 -0
- sequenzo/dissimilarity_measures/src/setup.py +30 -0
- sequenzo/dissimilarity_measures/src/utils.h +25 -0
- sequenzo/dissimilarity_measures/src/xsimd/.github/cmake-test/main.cpp +6 -0
- sequenzo/dissimilarity_measures/src/xsimd/benchmark/main.cpp +159 -0
- sequenzo/dissimilarity_measures/src/xsimd/benchmark/xsimd_benchmark.hpp +565 -0
- sequenzo/dissimilarity_measures/src/xsimd/docs/source/conf.py +37 -0
- sequenzo/dissimilarity_measures/src/xsimd/examples/mandelbrot.cpp +330 -0
- sequenzo/dissimilarity_measures/src/xsimd/examples/pico_bench.hpp +246 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_arithmetic.hpp +266 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_complex.hpp +112 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_details.hpp +323 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_logical.hpp +218 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_math.hpp +2583 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_memory.hpp +880 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_rounding.hpp +72 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_swizzle.hpp +174 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_trigo.hpp +978 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx.hpp +1924 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx2.hpp +1144 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512bw.hpp +656 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512cd.hpp +28 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512dq.hpp +244 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512er.hpp +20 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512f.hpp +2650 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512ifma.hpp +20 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512pf.hpp +20 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vbmi.hpp +77 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vbmi2.hpp +131 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vnni_avx512bw.hpp +20 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vnni_avx512vbmi2.hpp +20 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avxvnni.hpp +20 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_common.hpp +24 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_common_fwd.hpp +77 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_constants.hpp +393 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_emulated.hpp +788 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma3_avx.hpp +93 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma3_avx2.hpp +46 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma3_sse.hpp +97 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma4.hpp +92 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_i8mm_neon64.hpp +17 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_isa.hpp +142 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_neon.hpp +3142 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_neon64.hpp +1543 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_rvv.hpp +1513 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_scalar.hpp +1260 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse2.hpp +2024 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse3.hpp +67 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse4_1.hpp +339 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse4_2.hpp +44 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_ssse3.hpp +186 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sve.hpp +1155 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_vsx.hpp +892 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_wasm.hpp +1780 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_arch.hpp +240 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_config.hpp +484 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_cpuid.hpp +269 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_inline.hpp +27 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/math/xsimd_rem_pio2.hpp +719 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/memory/xsimd_aligned_allocator.hpp +349 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/memory/xsimd_alignment.hpp +91 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_all_registers.hpp +55 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_api.hpp +2765 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx2_register.hpp +44 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512bw_register.hpp +51 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512cd_register.hpp +51 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512dq_register.hpp +51 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512er_register.hpp +51 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512f_register.hpp +77 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512ifma_register.hpp +51 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512pf_register.hpp +51 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512vbmi2_register.hpp +51 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512vbmi_register.hpp +51 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512vnni_avx512bw_register.hpp +54 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512vnni_avx512vbmi2_register.hpp +53 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx_register.hpp +64 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avxvnni_register.hpp +44 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_batch.hpp +1524 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_batch_constant.hpp +300 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_common_arch.hpp +47 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_emulated_register.hpp +80 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_fma3_avx2_register.hpp +50 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_fma3_avx_register.hpp +50 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_fma3_sse_register.hpp +50 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_fma4_register.hpp +50 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_i8mm_neon64_register.hpp +55 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_neon64_register.hpp +55 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_neon_register.hpp +154 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_register.hpp +94 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_rvv_register.hpp +506 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sse2_register.hpp +59 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sse3_register.hpp +49 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sse4_1_register.hpp +48 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sse4_2_register.hpp +48 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_ssse3_register.hpp +48 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sve_register.hpp +156 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_traits.hpp +337 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_utils.hpp +536 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_vsx_register.hpp +77 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_wasm_register.hpp +59 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/xsimd.hpp +75 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/architectures/dummy.cpp +7 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set.cpp +13 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set_mean.cpp +24 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set_mean_aligned.cpp +25 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set_mean_arch_independent.cpp +28 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set_mean_tag_dispatch.cpp +25 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/doc/manipulating_abstract_batches.cpp +7 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/doc/manipulating_parametric_batches.cpp +8 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/doc/sum.hpp +31 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/doc/sum_avx2.cpp +3 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/doc/sum_sse2.cpp +3 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/doc/writing_vectorized_code.cpp +11 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/main.cpp +31 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_api.cpp +230 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_arch.cpp +217 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_basic_math.cpp +183 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_batch.cpp +1049 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_bool.cpp +508 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_cast.cpp +409 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_complex.cpp +712 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_constant.cpp +286 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_float.cpp +141 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_int.cpp +365 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_manip.cpp +308 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_bitwise_cast.cpp +222 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_complex_exponential.cpp +226 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_complex_hyperbolic.cpp +183 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_complex_power.cpp +265 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_complex_trigonometric.cpp +236 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_conversion.cpp +248 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_custom_default_arch.cpp +28 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_error_gamma.cpp +170 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_explicit_batch_instantiation.cpp +32 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_exponential.cpp +202 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_extract_pair.cpp +92 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_fp_manipulation.cpp +77 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_gnu_source.cpp +30 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_hyperbolic.cpp +167 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_load_store.cpp +304 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_memory.cpp +61 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_poly_evaluation.cpp +64 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_power.cpp +184 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_rounding.cpp +199 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_select.cpp +101 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_shuffle.cpp +760 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_sum.cpp +4 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_sum.hpp +34 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_traits.cpp +172 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_trigonometric.cpp +208 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_utils.hpp +611 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_wasm/test_wasm_playwright.py +123 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_xsimd_api.cpp +1460 -0
- sequenzo/dissimilarity_measures/utils/__init__.py +16 -0
- sequenzo/dissimilarity_measures/utils/get_LCP_length_for_2_seq.py +44 -0
- sequenzo/dissimilarity_measures/utils/get_sm_trate_substitution_cost_matrix.cpython-310-darwin.so +0 -0
- sequenzo/dissimilarity_measures/utils/seqconc.cpython-310-darwin.so +0 -0
- sequenzo/dissimilarity_measures/utils/seqdss.cpython-310-darwin.so +0 -0
- sequenzo/dissimilarity_measures/utils/seqdur.cpython-310-darwin.so +0 -0
- sequenzo/dissimilarity_measures/utils/seqlength.cpython-310-darwin.so +0 -0
- sequenzo/multidomain/__init__.py +23 -0
- sequenzo/multidomain/association_between_domains.py +311 -0
- sequenzo/multidomain/cat.py +597 -0
- sequenzo/multidomain/combt.py +519 -0
- sequenzo/multidomain/dat.py +81 -0
- sequenzo/multidomain/idcd.py +139 -0
- sequenzo/multidomain/linked_polyad.py +292 -0
- sequenzo/openmp_setup.py +233 -0
- sequenzo/prefix_tree/__init__.py +62 -0
- sequenzo/prefix_tree/hub.py +114 -0
- sequenzo/prefix_tree/individual_level_indicators.py +1321 -0
- sequenzo/prefix_tree/spell_individual_level_indicators.py +580 -0
- sequenzo/prefix_tree/spell_level_indicators.py +297 -0
- sequenzo/prefix_tree/system_level_indicators.py +544 -0
- sequenzo/prefix_tree/utils.py +54 -0
- sequenzo/seqhmm/__init__.py +95 -0
- sequenzo/seqhmm/advanced_optimization.py +305 -0
- sequenzo/seqhmm/bootstrap.py +411 -0
- sequenzo/seqhmm/build_hmm.py +142 -0
- sequenzo/seqhmm/build_mhmm.py +136 -0
- sequenzo/seqhmm/build_nhmm.py +121 -0
- sequenzo/seqhmm/fit_mhmm.py +62 -0
- sequenzo/seqhmm/fit_model.py +61 -0
- sequenzo/seqhmm/fit_nhmm.py +76 -0
- sequenzo/seqhmm/formulas.py +289 -0
- sequenzo/seqhmm/forward_backward_nhmm.py +276 -0
- sequenzo/seqhmm/gradients_nhmm.py +306 -0
- sequenzo/seqhmm/hmm.py +291 -0
- sequenzo/seqhmm/mhmm.py +314 -0
- sequenzo/seqhmm/model_comparison.py +238 -0
- sequenzo/seqhmm/multichannel_em.py +282 -0
- sequenzo/seqhmm/multichannel_utils.py +138 -0
- sequenzo/seqhmm/nhmm.py +270 -0
- sequenzo/seqhmm/nhmm_utils.py +191 -0
- sequenzo/seqhmm/predict.py +137 -0
- sequenzo/seqhmm/predict_mhmm.py +142 -0
- sequenzo/seqhmm/simulate.py +878 -0
- sequenzo/seqhmm/utils.py +218 -0
- sequenzo/seqhmm/visualization.py +910 -0
- sequenzo/sequence_characteristics/__init__.py +40 -0
- sequenzo/sequence_characteristics/complexity_index.py +49 -0
- sequenzo/sequence_characteristics/overall_cross_sectional_entropy.py +220 -0
- sequenzo/sequence_characteristics/plot_characteristics.py +593 -0
- sequenzo/sequence_characteristics/simple_characteristics.py +311 -0
- sequenzo/sequence_characteristics/state_frequencies_and_entropy_per_sequence.py +39 -0
- sequenzo/sequence_characteristics/turbulence.py +155 -0
- sequenzo/sequence_characteristics/variance_of_spell_durations.py +86 -0
- sequenzo/sequence_characteristics/within_sequence_entropy.py +43 -0
- sequenzo/suffix_tree/__init__.py +66 -0
- sequenzo/suffix_tree/hub.py +114 -0
- sequenzo/suffix_tree/individual_level_indicators.py +1679 -0
- sequenzo/suffix_tree/spell_individual_level_indicators.py +493 -0
- sequenzo/suffix_tree/spell_level_indicators.py +248 -0
- sequenzo/suffix_tree/system_level_indicators.py +535 -0
- sequenzo/suffix_tree/utils.py +56 -0
- sequenzo/version_check.py +283 -0
- sequenzo/visualization/__init__.py +29 -0
- sequenzo/visualization/plot_mean_time.py +222 -0
- sequenzo/visualization/plot_modal_state.py +276 -0
- sequenzo/visualization/plot_most_frequent_sequences.py +147 -0
- sequenzo/visualization/plot_relative_frequency.py +405 -0
- sequenzo/visualization/plot_sequence_index.py +1175 -0
- sequenzo/visualization/plot_single_medoid.py +153 -0
- sequenzo/visualization/plot_state_distribution.py +651 -0
- sequenzo/visualization/plot_transition_matrix.py +190 -0
- sequenzo/visualization/utils/__init__.py +23 -0
- sequenzo/visualization/utils/utils.py +310 -0
- sequenzo/with_event_history_analysis/__init__.py +35 -0
- sequenzo/with_event_history_analysis/sequence_analysis_multi_state_model.py +850 -0
- sequenzo/with_event_history_analysis/sequence_history_analysis.py +283 -0
- sequenzo-0.1.31.dist-info/METADATA +286 -0
- sequenzo-0.1.31.dist-info/RECORD +299 -0
- sequenzo-0.1.31.dist-info/WHEEL +5 -0
- sequenzo-0.1.31.dist-info/licenses/LICENSE +28 -0
- sequenzo-0.1.31.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,297 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Spell-based Prefix Tree: System-level divergence indicators.
|
|
3
|
+
|
|
4
|
+
Unlike position-based prefix trees (which use time-index alignment),
|
|
5
|
+
spell-based trees use SPELL as the unit: the k-th level = after the k-th spell.
|
|
6
|
+
Same state sequence merges into the same path regardless of spell duration
|
|
7
|
+
(analogous to LCPspell with expcost=0 for structure).
|
|
8
|
+
Duration can optionally influence derived indicators via expcost.
|
|
9
|
+
|
|
10
|
+
Design aligned with: sequenzo/dissimilarity_measures/src/LCPspellDistance.cpp
|
|
11
|
+
- Position LCP : compare state at same time index (t)
|
|
12
|
+
- Spell LCPspell: compare k-th spell of A with k-th spell of B
|
|
13
|
+
|
|
14
|
+
Usage:
|
|
15
|
+
from sequenzo import SequenceData, build_prefix_tree
|
|
16
|
+
|
|
17
|
+
seqdata = SequenceData(df, time=time_cols, id_col="id", states=states)
|
|
18
|
+
|
|
19
|
+
# Spell-based prefix tree (divergence from first spell onward)
|
|
20
|
+
tree = build_prefix_tree(seqdata, mode="spell", expcost=0)
|
|
21
|
+
|
|
22
|
+
# expcost=0: structure ignores duration (state-only merge)
|
|
23
|
+
# expcost>0: duration influences JS divergence (spell-length weighting)
|
|
24
|
+
|
|
25
|
+
@Author : Yuqi Liang 梁彧祺
|
|
26
|
+
@File : spell_level_indicators.py
|
|
27
|
+
@Time : 2026/1/30 08:47
|
|
28
|
+
@Desc : Spell-based prefix tree for sequence divergence analysis.
|
|
29
|
+
"""
|
|
30
|
+
from collections import defaultdict
|
|
31
|
+
from typing import Dict, Any, List, Optional, Tuple
|
|
32
|
+
|
|
33
|
+
import numpy as np
|
|
34
|
+
from scipy.spatial.distance import jensenshannon
|
|
35
|
+
|
|
36
|
+
from sequenzo.define_sequence_data import SequenceData
|
|
37
|
+
from sequenzo.dissimilarity_measures.utils.seqdss import seqdss
|
|
38
|
+
from sequenzo.dissimilarity_measures.utils.seqdur import seqdur
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def convert_seqdata_to_spells(
|
|
42
|
+
seqdata: SequenceData,
|
|
43
|
+
) -> Tuple[List[List[Any]], List[List[float]], List[Any]]:
|
|
44
|
+
"""
|
|
45
|
+
Convert SequenceData to spell representation (DSS + duration).
|
|
46
|
+
|
|
47
|
+
A "spell" is a maximal run of consecutive same-state positions.
|
|
48
|
+
Aligned with seqdss/seqdur used by LCPspell and OMspell distances.
|
|
49
|
+
|
|
50
|
+
Returns
|
|
51
|
+
-------
|
|
52
|
+
spell_states : list of list
|
|
53
|
+
Each inner list = [s1, s2, ...] state labels for spells 1, 2, ...
|
|
54
|
+
spell_durations : list of list
|
|
55
|
+
Each inner list = [d1, d2, ...] duration (time points) per spell
|
|
56
|
+
state_list : list
|
|
57
|
+
Ordered state labels (for consistent mapping)
|
|
58
|
+
"""
|
|
59
|
+
if not isinstance(seqdata, SequenceData):
|
|
60
|
+
raise TypeError(
|
|
61
|
+
"[!] Spell mode requires SequenceData. "
|
|
62
|
+
"Use: SequenceData(df, time=..., id_col=..., states=...) "
|
|
63
|
+
"Then: build_prefix_tree(seqdata, mode='spell')"
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
dss = seqdss(seqdata) # (n, max_spells), int32, -999 padding
|
|
67
|
+
dur = seqdur(seqdata) # (n, max_spells), float duration per spell
|
|
68
|
+
|
|
69
|
+
state_list = list(seqdata.states)
|
|
70
|
+
n_seq = dss.shape[0]
|
|
71
|
+
max_spells = dss.shape[1]
|
|
72
|
+
|
|
73
|
+
spell_states = []
|
|
74
|
+
spell_durations = []
|
|
75
|
+
|
|
76
|
+
for i in range(n_seq):
|
|
77
|
+
states_i = []
|
|
78
|
+
durs_i = []
|
|
79
|
+
for j in range(max_spells):
|
|
80
|
+
val = int(dss[i, j])
|
|
81
|
+
if val < 0: # padding (-999)
|
|
82
|
+
break
|
|
83
|
+
if val < len(state_list):
|
|
84
|
+
states_i.append(state_list[val])
|
|
85
|
+
else:
|
|
86
|
+
states_i.append(val) # fallback: use raw value
|
|
87
|
+
durs_i.append(float(dur[i, j]))
|
|
88
|
+
spell_states.append(states_i)
|
|
89
|
+
spell_durations.append(durs_i)
|
|
90
|
+
|
|
91
|
+
return spell_states, spell_durations, state_list
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
class SpellPrefixTree:
|
|
95
|
+
"""
|
|
96
|
+
Prefix tree where each level = one spell (1st spell, 2nd spell, ...).
|
|
97
|
+
|
|
98
|
+
Same as position-based PrefixTree in structure, but depth = number of spells
|
|
99
|
+
instead of number of time points. Two sequences merge on the same path if
|
|
100
|
+
they share the same state sequence (spell order), regardless of duration.
|
|
101
|
+
"""
|
|
102
|
+
|
|
103
|
+
def __init__(self):
|
|
104
|
+
self.root = {}
|
|
105
|
+
self.counts = defaultdict(int) # prefix (tuple of states) -> count
|
|
106
|
+
self.total_sequences = 0
|
|
107
|
+
# Optional: store duration info per prefix for expcost-weighted indicators
|
|
108
|
+
self.prefix_durations = defaultdict(list) # prefix -> list of (seq_idx, durations)
|
|
109
|
+
|
|
110
|
+
def insert(self, spell_sequence: List[Any], seq_idx: int = 0, durations: Optional[List[float]] = None):
|
|
111
|
+
"""Insert one spell sequence. States only determine structure; durations stored for optional use."""
|
|
112
|
+
prefix = []
|
|
113
|
+
node = self.root
|
|
114
|
+
for k, state in enumerate(spell_sequence):
|
|
115
|
+
prefix.append(state)
|
|
116
|
+
key = tuple(prefix)
|
|
117
|
+
self.counts[key] += 1
|
|
118
|
+
if durations is not None and k < len(durations):
|
|
119
|
+
self.prefix_durations[key].append((seq_idx, durations[: k + 1]))
|
|
120
|
+
if state not in node:
|
|
121
|
+
node[state] = {}
|
|
122
|
+
node = node[state]
|
|
123
|
+
|
|
124
|
+
def get_prefixes_at_depth(self, depth: int) -> List[Tuple]:
|
|
125
|
+
"""Return all prefix tuples at spell-depth `depth` (1 = first spell, 2 = first two spells, ...)."""
|
|
126
|
+
return [k for k in self.counts if len(k) == depth]
|
|
127
|
+
|
|
128
|
+
def get_children(self, prefix) -> Dict:
|
|
129
|
+
"""Return immediate children of `prefix` (mapping: child_state -> subtree)."""
|
|
130
|
+
node = self.root
|
|
131
|
+
for state in prefix:
|
|
132
|
+
node = node.get(state, {})
|
|
133
|
+
return node
|
|
134
|
+
|
|
135
|
+
def get_children_count(self, prefix) -> int:
|
|
136
|
+
"""Number of distinct child states (branching) at this prefix."""
|
|
137
|
+
return len(self.get_children(prefix))
|
|
138
|
+
|
|
139
|
+
def describe(self):
|
|
140
|
+
depths = [len(k) for k in self.counts.keys()]
|
|
141
|
+
max_d = max(depths) if depths else 0
|
|
142
|
+
print("\n[SpellPrefixTree Overview]")
|
|
143
|
+
print(f"[>] Total sequences: {self.total_sequences}")
|
|
144
|
+
print(f"[>] Max spell depth: {max_d}")
|
|
145
|
+
print(f"[>] Total distinct spell-prefixes: {len(self.counts)}")
|
|
146
|
+
for t in range(1, max_d + 1):
|
|
147
|
+
n = len(self.get_prefixes_at_depth(t))
|
|
148
|
+
print(f" Spell level {t}: {n} unique prefixes")
|
|
149
|
+
|
|
150
|
+
def __repr__(self):
|
|
151
|
+
depths = [len(k) for k in self.counts.keys()]
|
|
152
|
+
return f"SpellPrefixTree(max_spell_depth={max(depths) if depths else 0}, total_prefixes={len(self.counts)})"
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
def build_spell_prefix_tree(
|
|
156
|
+
seqdata: SequenceData,
|
|
157
|
+
expcost: float = 0.0,
|
|
158
|
+
) -> SpellPrefixTree:
|
|
159
|
+
"""
|
|
160
|
+
Build a spell-based prefix tree from SequenceData.
|
|
161
|
+
|
|
162
|
+
Parameters
|
|
163
|
+
----------
|
|
164
|
+
seqdata : SequenceData
|
|
165
|
+
Sequence data object. Must have been created with SequenceData(...).
|
|
166
|
+
expcost : float, default 0.0
|
|
167
|
+
Duration weight for derived indicators (e.g. JS divergence).
|
|
168
|
+
- expcost=0: pure state-based; duration not used in structure or indicators.
|
|
169
|
+
- expcost>0: when computing JS divergence, weight each spell's state
|
|
170
|
+
by its duration (longer spells contribute more to the distribution).
|
|
171
|
+
|
|
172
|
+
Returns
|
|
173
|
+
-------
|
|
174
|
+
SpellPrefixTree
|
|
175
|
+
Tree where level k = after k-th spell.
|
|
176
|
+
"""
|
|
177
|
+
spell_states, spell_durations, _ = convert_seqdata_to_spells(seqdata)
|
|
178
|
+
|
|
179
|
+
tree = SpellPrefixTree()
|
|
180
|
+
tree.total_sequences = len(spell_states)
|
|
181
|
+
tree._expcost = expcost
|
|
182
|
+
tree._spell_states = spell_states
|
|
183
|
+
tree._spell_durations = spell_durations
|
|
184
|
+
|
|
185
|
+
for i, (states_i, durs_i) in enumerate(zip(spell_states, spell_durations)):
|
|
186
|
+
if not states_i:
|
|
187
|
+
continue
|
|
188
|
+
tree.insert(states_i, seq_idx=i, durations=durs_i if expcost != 0 else None)
|
|
189
|
+
|
|
190
|
+
return tree
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
def get_depth_stats_spell(tree: SpellPrefixTree) -> Dict[str, Any]:
|
|
194
|
+
"""Depth-level stats for spell tree (same interface as position version)."""
|
|
195
|
+
depth_counts = defaultdict(int)
|
|
196
|
+
depth_to_prefixes = defaultdict(list)
|
|
197
|
+
for k in tree.counts:
|
|
198
|
+
d = len(k)
|
|
199
|
+
depth_counts[d] += 1
|
|
200
|
+
depth_to_prefixes[d].append(k)
|
|
201
|
+
return {"depth_counts": dict(depth_counts), "depth_to_prefixes": dict(depth_to_prefixes)}
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
def compute_prefix_count_spell(
|
|
205
|
+
tree: SpellPrefixTree,
|
|
206
|
+
max_depth: int,
|
|
207
|
+
depth_stats: Optional[Dict[str, Any]] = None,
|
|
208
|
+
) -> List[int]:
|
|
209
|
+
"""Prefix counts per spell level 1..max_depth."""
|
|
210
|
+
if depth_stats is None:
|
|
211
|
+
depth_counts = defaultdict(int)
|
|
212
|
+
for k in tree.counts:
|
|
213
|
+
depth_counts[len(k)] += 1
|
|
214
|
+
depth_counts = dict(depth_counts)
|
|
215
|
+
else:
|
|
216
|
+
depth_counts = depth_stats["depth_counts"]
|
|
217
|
+
return [depth_counts.get(t, 0) for t in range(1, max_depth + 1)]
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
def compute_branching_factor_spell(
|
|
221
|
+
tree: SpellPrefixTree,
|
|
222
|
+
max_depth: int,
|
|
223
|
+
depth_prefixes: Optional[Dict[int, List[Tuple]]] = None,
|
|
224
|
+
) -> List[float]:
|
|
225
|
+
"""Branching factor per spell level (same logic as position version)."""
|
|
226
|
+
if depth_prefixes is None:
|
|
227
|
+
depth_to_prefixes = defaultdict(list)
|
|
228
|
+
for k in tree.counts:
|
|
229
|
+
depth_to_prefixes[len(k)].append(k)
|
|
230
|
+
depth_to_prefixes = dict(depth_to_prefixes)
|
|
231
|
+
else:
|
|
232
|
+
depth_to_prefixes = depth_prefixes
|
|
233
|
+
result = []
|
|
234
|
+
for t in range(2, max_depth + 1):
|
|
235
|
+
prefixes = depth_to_prefixes.get(t - 1, [])
|
|
236
|
+
if not prefixes:
|
|
237
|
+
result.append(0.0)
|
|
238
|
+
continue
|
|
239
|
+
child_counts = [tree.get_children_count(p) for p in prefixes]
|
|
240
|
+
result.append(float(np.mean(child_counts)))
|
|
241
|
+
return [0.0] + result
|
|
242
|
+
|
|
243
|
+
|
|
244
|
+
def compute_js_divergence_spell(
|
|
245
|
+
spell_states: List[List[Any]],
|
|
246
|
+
spell_durations: List[List[float]],
|
|
247
|
+
state_set: List[Any],
|
|
248
|
+
expcost: float = 0.0,
|
|
249
|
+
) -> List[float]:
|
|
250
|
+
"""
|
|
251
|
+
Jensen-Shannon divergence between consecutive spell-level distributions.
|
|
252
|
+
|
|
253
|
+
At each spell level k, we build the state distribution across sequences.
|
|
254
|
+
- expcost=0: each sequence contributes 1/N to the state it has at spell k.
|
|
255
|
+
- expcost>0: weight by duration; a spell of length d contributes d times
|
|
256
|
+
as much as a spell of length 1 (duration-aware distribution).
|
|
257
|
+
"""
|
|
258
|
+
state_list = list(state_set)
|
|
259
|
+
n_states = len(state_list)
|
|
260
|
+
state_to_idx = {s: i for i, s in enumerate(state_list)}
|
|
261
|
+
N = len(spell_states)
|
|
262
|
+
|
|
263
|
+
max_spells = max(len(s) for s in spell_states)
|
|
264
|
+
if max_spells < 2:
|
|
265
|
+
return [0.0]
|
|
266
|
+
|
|
267
|
+
# Per-spell-level distributions
|
|
268
|
+
distros = []
|
|
269
|
+
for k in range(max_spells):
|
|
270
|
+
counts = np.zeros(n_states, dtype=float)
|
|
271
|
+
total = 0.0
|
|
272
|
+
for i, (states_i, durs_i) in enumerate(zip(spell_states, spell_durations)):
|
|
273
|
+
if k >= len(states_i):
|
|
274
|
+
continue
|
|
275
|
+
s = states_i[k]
|
|
276
|
+
if s not in state_to_idx:
|
|
277
|
+
continue
|
|
278
|
+
idx = state_to_idx[s]
|
|
279
|
+
w = 1.0
|
|
280
|
+
if expcost > 0 and k < len(durs_i):
|
|
281
|
+
w = float(durs_i[k]) ** expcost # duration weighting
|
|
282
|
+
counts[idx] += w
|
|
283
|
+
total += w
|
|
284
|
+
if total > 0:
|
|
285
|
+
distros.append(counts / total)
|
|
286
|
+
else:
|
|
287
|
+
distros.append(counts)
|
|
288
|
+
|
|
289
|
+
js_scores = [0.0]
|
|
290
|
+
for k in range(1, len(distros)):
|
|
291
|
+
p, q = distros[k], distros[k - 1]
|
|
292
|
+
if np.sum(p) <= 0 or np.sum(q) <= 0:
|
|
293
|
+
js_scores.append(0.0)
|
|
294
|
+
else:
|
|
295
|
+
js = jensenshannon(p, q)
|
|
296
|
+
js_scores.append(float(js))
|
|
297
|
+
return js_scores
|