sequenzo 0.1.31__cp310-cp310-macosx_10_9_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- _sequenzo_fastcluster.cpython-310-darwin.so +0 -0
- sequenzo/__init__.py +349 -0
- sequenzo/big_data/__init__.py +12 -0
- sequenzo/big_data/clara/__init__.py +26 -0
- sequenzo/big_data/clara/clara.py +476 -0
- sequenzo/big_data/clara/utils/__init__.py +27 -0
- sequenzo/big_data/clara/utils/aggregatecases.py +92 -0
- sequenzo/big_data/clara/utils/davies_bouldin.py +91 -0
- sequenzo/big_data/clara/utils/get_weighted_diss.cpython-310-darwin.so +0 -0
- sequenzo/big_data/clara/utils/wfcmdd.py +205 -0
- sequenzo/big_data/clara/visualization.py +88 -0
- sequenzo/clustering/KMedoids.py +178 -0
- sequenzo/clustering/__init__.py +30 -0
- sequenzo/clustering/clustering_c_code.cpython-310-darwin.so +0 -0
- sequenzo/clustering/hierarchical_clustering.py +1256 -0
- sequenzo/clustering/sequenzo_fastcluster/fastcluster.py +495 -0
- sequenzo/clustering/sequenzo_fastcluster/src/fastcluster.cpp +1877 -0
- sequenzo/clustering/sequenzo_fastcluster/src/fastcluster_python.cpp +1264 -0
- sequenzo/clustering/src/KMedoid.cpp +263 -0
- sequenzo/clustering/src/PAM.cpp +237 -0
- sequenzo/clustering/src/PAMonce.cpp +265 -0
- sequenzo/clustering/src/cluster_quality.cpp +496 -0
- sequenzo/clustering/src/cluster_quality.h +128 -0
- sequenzo/clustering/src/cluster_quality_backup.cpp +570 -0
- sequenzo/clustering/src/module.cpp +228 -0
- sequenzo/clustering/src/weightedinertia.cpp +111 -0
- sequenzo/clustering/utils/__init__.py +27 -0
- sequenzo/clustering/utils/disscenter.py +122 -0
- sequenzo/data_preprocessing/__init__.py +22 -0
- sequenzo/data_preprocessing/helpers.py +303 -0
- sequenzo/datasets/__init__.py +41 -0
- sequenzo/datasets/biofam.csv +2001 -0
- sequenzo/datasets/biofam_child_domain.csv +2001 -0
- sequenzo/datasets/biofam_left_domain.csv +2001 -0
- sequenzo/datasets/biofam_married_domain.csv +2001 -0
- sequenzo/datasets/chinese_colonial_territories.csv +12 -0
- sequenzo/datasets/country_co2_emissions.csv +194 -0
- sequenzo/datasets/country_co2_emissions_global_deciles.csv +195 -0
- sequenzo/datasets/country_co2_emissions_global_quintiles.csv +195 -0
- sequenzo/datasets/country_co2_emissions_local_deciles.csv +195 -0
- sequenzo/datasets/country_co2_emissions_local_quintiles.csv +195 -0
- sequenzo/datasets/country_gdp_per_capita.csv +194 -0
- sequenzo/datasets/dyadic_children.csv +61 -0
- sequenzo/datasets/dyadic_parents.csv +61 -0
- sequenzo/datasets/mvad.csv +713 -0
- sequenzo/datasets/pairfam_activity_by_month.csv +1028 -0
- sequenzo/datasets/pairfam_activity_by_year.csv +1028 -0
- sequenzo/datasets/pairfam_family_by_month.csv +1028 -0
- sequenzo/datasets/pairfam_family_by_year.csv +1028 -0
- sequenzo/datasets/political_science_aid_shock.csv +166 -0
- sequenzo/datasets/political_science_donor_fragmentation.csv +157 -0
- sequenzo/define_sequence_data.py +1400 -0
- sequenzo/dissimilarity_measures/__init__.py +31 -0
- sequenzo/dissimilarity_measures/c_code.cpython-310-darwin.so +0 -0
- sequenzo/dissimilarity_measures/get_distance_matrix.py +762 -0
- sequenzo/dissimilarity_measures/get_substitution_cost_matrix.py +246 -0
- sequenzo/dissimilarity_measures/src/DHDdistance.cpp +148 -0
- sequenzo/dissimilarity_measures/src/LCPdistance.cpp +114 -0
- sequenzo/dissimilarity_measures/src/LCPspellDistance.cpp +215 -0
- sequenzo/dissimilarity_measures/src/OMdistance.cpp +247 -0
- sequenzo/dissimilarity_measures/src/OMspellDistance.cpp +281 -0
- sequenzo/dissimilarity_measures/src/__init__.py +0 -0
- sequenzo/dissimilarity_measures/src/dist2matrix.cpp +63 -0
- sequenzo/dissimilarity_measures/src/dp_utils.h +160 -0
- sequenzo/dissimilarity_measures/src/module.cpp +40 -0
- sequenzo/dissimilarity_measures/src/setup.py +30 -0
- sequenzo/dissimilarity_measures/src/utils.h +25 -0
- sequenzo/dissimilarity_measures/src/xsimd/.github/cmake-test/main.cpp +6 -0
- sequenzo/dissimilarity_measures/src/xsimd/benchmark/main.cpp +159 -0
- sequenzo/dissimilarity_measures/src/xsimd/benchmark/xsimd_benchmark.hpp +565 -0
- sequenzo/dissimilarity_measures/src/xsimd/docs/source/conf.py +37 -0
- sequenzo/dissimilarity_measures/src/xsimd/examples/mandelbrot.cpp +330 -0
- sequenzo/dissimilarity_measures/src/xsimd/examples/pico_bench.hpp +246 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_arithmetic.hpp +266 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_complex.hpp +112 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_details.hpp +323 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_logical.hpp +218 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_math.hpp +2583 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_memory.hpp +880 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_rounding.hpp +72 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_swizzle.hpp +174 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_trigo.hpp +978 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx.hpp +1924 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx2.hpp +1144 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512bw.hpp +656 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512cd.hpp +28 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512dq.hpp +244 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512er.hpp +20 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512f.hpp +2650 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512ifma.hpp +20 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512pf.hpp +20 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vbmi.hpp +77 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vbmi2.hpp +131 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vnni_avx512bw.hpp +20 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vnni_avx512vbmi2.hpp +20 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avxvnni.hpp +20 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_common.hpp +24 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_common_fwd.hpp +77 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_constants.hpp +393 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_emulated.hpp +788 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma3_avx.hpp +93 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma3_avx2.hpp +46 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma3_sse.hpp +97 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma4.hpp +92 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_i8mm_neon64.hpp +17 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_isa.hpp +142 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_neon.hpp +3142 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_neon64.hpp +1543 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_rvv.hpp +1513 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_scalar.hpp +1260 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse2.hpp +2024 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse3.hpp +67 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse4_1.hpp +339 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse4_2.hpp +44 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_ssse3.hpp +186 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sve.hpp +1155 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_vsx.hpp +892 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_wasm.hpp +1780 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_arch.hpp +240 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_config.hpp +484 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_cpuid.hpp +269 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_inline.hpp +27 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/math/xsimd_rem_pio2.hpp +719 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/memory/xsimd_aligned_allocator.hpp +349 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/memory/xsimd_alignment.hpp +91 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_all_registers.hpp +55 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_api.hpp +2765 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx2_register.hpp +44 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512bw_register.hpp +51 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512cd_register.hpp +51 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512dq_register.hpp +51 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512er_register.hpp +51 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512f_register.hpp +77 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512ifma_register.hpp +51 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512pf_register.hpp +51 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512vbmi2_register.hpp +51 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512vbmi_register.hpp +51 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512vnni_avx512bw_register.hpp +54 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512vnni_avx512vbmi2_register.hpp +53 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx_register.hpp +64 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avxvnni_register.hpp +44 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_batch.hpp +1524 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_batch_constant.hpp +300 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_common_arch.hpp +47 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_emulated_register.hpp +80 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_fma3_avx2_register.hpp +50 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_fma3_avx_register.hpp +50 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_fma3_sse_register.hpp +50 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_fma4_register.hpp +50 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_i8mm_neon64_register.hpp +55 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_neon64_register.hpp +55 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_neon_register.hpp +154 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_register.hpp +94 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_rvv_register.hpp +506 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sse2_register.hpp +59 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sse3_register.hpp +49 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sse4_1_register.hpp +48 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sse4_2_register.hpp +48 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_ssse3_register.hpp +48 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sve_register.hpp +156 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_traits.hpp +337 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_utils.hpp +536 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_vsx_register.hpp +77 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_wasm_register.hpp +59 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/xsimd.hpp +75 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/architectures/dummy.cpp +7 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set.cpp +13 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set_mean.cpp +24 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set_mean_aligned.cpp +25 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set_mean_arch_independent.cpp +28 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set_mean_tag_dispatch.cpp +25 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/doc/manipulating_abstract_batches.cpp +7 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/doc/manipulating_parametric_batches.cpp +8 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/doc/sum.hpp +31 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/doc/sum_avx2.cpp +3 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/doc/sum_sse2.cpp +3 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/doc/writing_vectorized_code.cpp +11 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/main.cpp +31 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_api.cpp +230 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_arch.cpp +217 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_basic_math.cpp +183 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_batch.cpp +1049 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_bool.cpp +508 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_cast.cpp +409 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_complex.cpp +712 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_constant.cpp +286 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_float.cpp +141 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_int.cpp +365 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_manip.cpp +308 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_bitwise_cast.cpp +222 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_complex_exponential.cpp +226 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_complex_hyperbolic.cpp +183 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_complex_power.cpp +265 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_complex_trigonometric.cpp +236 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_conversion.cpp +248 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_custom_default_arch.cpp +28 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_error_gamma.cpp +170 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_explicit_batch_instantiation.cpp +32 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_exponential.cpp +202 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_extract_pair.cpp +92 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_fp_manipulation.cpp +77 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_gnu_source.cpp +30 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_hyperbolic.cpp +167 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_load_store.cpp +304 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_memory.cpp +61 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_poly_evaluation.cpp +64 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_power.cpp +184 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_rounding.cpp +199 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_select.cpp +101 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_shuffle.cpp +760 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_sum.cpp +4 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_sum.hpp +34 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_traits.cpp +172 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_trigonometric.cpp +208 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_utils.hpp +611 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_wasm/test_wasm_playwright.py +123 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_xsimd_api.cpp +1460 -0
- sequenzo/dissimilarity_measures/utils/__init__.py +16 -0
- sequenzo/dissimilarity_measures/utils/get_LCP_length_for_2_seq.py +44 -0
- sequenzo/dissimilarity_measures/utils/get_sm_trate_substitution_cost_matrix.cpython-310-darwin.so +0 -0
- sequenzo/dissimilarity_measures/utils/seqconc.cpython-310-darwin.so +0 -0
- sequenzo/dissimilarity_measures/utils/seqdss.cpython-310-darwin.so +0 -0
- sequenzo/dissimilarity_measures/utils/seqdur.cpython-310-darwin.so +0 -0
- sequenzo/dissimilarity_measures/utils/seqlength.cpython-310-darwin.so +0 -0
- sequenzo/multidomain/__init__.py +23 -0
- sequenzo/multidomain/association_between_domains.py +311 -0
- sequenzo/multidomain/cat.py +597 -0
- sequenzo/multidomain/combt.py +519 -0
- sequenzo/multidomain/dat.py +81 -0
- sequenzo/multidomain/idcd.py +139 -0
- sequenzo/multidomain/linked_polyad.py +292 -0
- sequenzo/openmp_setup.py +233 -0
- sequenzo/prefix_tree/__init__.py +62 -0
- sequenzo/prefix_tree/hub.py +114 -0
- sequenzo/prefix_tree/individual_level_indicators.py +1321 -0
- sequenzo/prefix_tree/spell_individual_level_indicators.py +580 -0
- sequenzo/prefix_tree/spell_level_indicators.py +297 -0
- sequenzo/prefix_tree/system_level_indicators.py +544 -0
- sequenzo/prefix_tree/utils.py +54 -0
- sequenzo/seqhmm/__init__.py +95 -0
- sequenzo/seqhmm/advanced_optimization.py +305 -0
- sequenzo/seqhmm/bootstrap.py +411 -0
- sequenzo/seqhmm/build_hmm.py +142 -0
- sequenzo/seqhmm/build_mhmm.py +136 -0
- sequenzo/seqhmm/build_nhmm.py +121 -0
- sequenzo/seqhmm/fit_mhmm.py +62 -0
- sequenzo/seqhmm/fit_model.py +61 -0
- sequenzo/seqhmm/fit_nhmm.py +76 -0
- sequenzo/seqhmm/formulas.py +289 -0
- sequenzo/seqhmm/forward_backward_nhmm.py +276 -0
- sequenzo/seqhmm/gradients_nhmm.py +306 -0
- sequenzo/seqhmm/hmm.py +291 -0
- sequenzo/seqhmm/mhmm.py +314 -0
- sequenzo/seqhmm/model_comparison.py +238 -0
- sequenzo/seqhmm/multichannel_em.py +282 -0
- sequenzo/seqhmm/multichannel_utils.py +138 -0
- sequenzo/seqhmm/nhmm.py +270 -0
- sequenzo/seqhmm/nhmm_utils.py +191 -0
- sequenzo/seqhmm/predict.py +137 -0
- sequenzo/seqhmm/predict_mhmm.py +142 -0
- sequenzo/seqhmm/simulate.py +878 -0
- sequenzo/seqhmm/utils.py +218 -0
- sequenzo/seqhmm/visualization.py +910 -0
- sequenzo/sequence_characteristics/__init__.py +40 -0
- sequenzo/sequence_characteristics/complexity_index.py +49 -0
- sequenzo/sequence_characteristics/overall_cross_sectional_entropy.py +220 -0
- sequenzo/sequence_characteristics/plot_characteristics.py +593 -0
- sequenzo/sequence_characteristics/simple_characteristics.py +311 -0
- sequenzo/sequence_characteristics/state_frequencies_and_entropy_per_sequence.py +39 -0
- sequenzo/sequence_characteristics/turbulence.py +155 -0
- sequenzo/sequence_characteristics/variance_of_spell_durations.py +86 -0
- sequenzo/sequence_characteristics/within_sequence_entropy.py +43 -0
- sequenzo/suffix_tree/__init__.py +66 -0
- sequenzo/suffix_tree/hub.py +114 -0
- sequenzo/suffix_tree/individual_level_indicators.py +1679 -0
- sequenzo/suffix_tree/spell_individual_level_indicators.py +493 -0
- sequenzo/suffix_tree/spell_level_indicators.py +248 -0
- sequenzo/suffix_tree/system_level_indicators.py +535 -0
- sequenzo/suffix_tree/utils.py +56 -0
- sequenzo/version_check.py +283 -0
- sequenzo/visualization/__init__.py +29 -0
- sequenzo/visualization/plot_mean_time.py +222 -0
- sequenzo/visualization/plot_modal_state.py +276 -0
- sequenzo/visualization/plot_most_frequent_sequences.py +147 -0
- sequenzo/visualization/plot_relative_frequency.py +405 -0
- sequenzo/visualization/plot_sequence_index.py +1175 -0
- sequenzo/visualization/plot_single_medoid.py +153 -0
- sequenzo/visualization/plot_state_distribution.py +651 -0
- sequenzo/visualization/plot_transition_matrix.py +190 -0
- sequenzo/visualization/utils/__init__.py +23 -0
- sequenzo/visualization/utils/utils.py +310 -0
- sequenzo/with_event_history_analysis/__init__.py +35 -0
- sequenzo/with_event_history_analysis/sequence_analysis_multi_state_model.py +850 -0
- sequenzo/with_event_history_analysis/sequence_history_analysis.py +283 -0
- sequenzo-0.1.31.dist-info/METADATA +286 -0
- sequenzo-0.1.31.dist-info/RECORD +299 -0
- sequenzo-0.1.31.dist-info/WHEEL +5 -0
- sequenzo-0.1.31.dist-info/licenses/LICENSE +28 -0
- sequenzo-0.1.31.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,228 @@
|
|
|
1
|
+
#include "PAM.cpp"
|
|
2
|
+
#include "KMedoid.cpp"
|
|
3
|
+
#include "PAMonce.cpp"
|
|
4
|
+
#include "weightedinertia.cpp"
|
|
5
|
+
#include "cluster_quality.cpp"
|
|
6
|
+
|
|
7
|
+
namespace py = pybind11;
|
|
8
|
+
|
|
9
|
+
PYBIND11_MODULE(clustering_c_code, m) {
|
|
10
|
+
py::class_<PAM>(m, "PAM")
|
|
11
|
+
.def(py::init<int, py::array_t<double>, py::array_t<int>, int, py::array_t<double>>())
|
|
12
|
+
.def("runclusterloop", &PAM::runclusterloop);
|
|
13
|
+
|
|
14
|
+
py::class_<KMedoid>(m, "KMedoid")
|
|
15
|
+
.def(py::init<int, py::array_t<double>, py::array_t<int>, int, py::array_t<double>>())
|
|
16
|
+
.def("runclusterloop", &KMedoid::runclusterloop);
|
|
17
|
+
|
|
18
|
+
py::class_<PAMonce>(m, "PAMonce")
|
|
19
|
+
.def(py::init<int, py::array_t<double>, py::array_t<int>, int, py::array_t<double>>())
|
|
20
|
+
.def("runclusterloop", &PAMonce::runclusterloop);
|
|
21
|
+
|
|
22
|
+
py::class_<weightedinertia>(m, "weightedinertia")
|
|
23
|
+
.def(py::init<py::array_t<double>, py::array_t<int>, py::array_t<double>>())
|
|
24
|
+
.def("tmrWeightedInertiaContrib", &weightedinertia::tmrWeightedInertiaContrib);
|
|
25
|
+
|
|
26
|
+
// Cluster Quality functions
|
|
27
|
+
m.def("cluster_quality", [](py::array_t<double> diss_matrix,
|
|
28
|
+
py::array_t<int> cluster_labels,
|
|
29
|
+
py::array_t<double> weights,
|
|
30
|
+
int nclusters) -> py::dict {
|
|
31
|
+
auto diss_buf = diss_matrix.request();
|
|
32
|
+
auto cluster_buf = cluster_labels.request();
|
|
33
|
+
auto weights_buf = weights.request();
|
|
34
|
+
|
|
35
|
+
if (diss_buf.ndim != 2 || diss_buf.shape[0] != diss_buf.shape[1]) {
|
|
36
|
+
throw std::runtime_error("Distance matrix must be square");
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
int n = diss_buf.shape[0];
|
|
40
|
+
|
|
41
|
+
if (cluster_buf.size != n || weights_buf.size != n) {
|
|
42
|
+
throw std::runtime_error("Cluster labels and weights must have same length as matrix dimension");
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
double* diss_ptr = static_cast<double*>(diss_buf.ptr);
|
|
46
|
+
int* cluster_ptr = static_cast<int*>(cluster_buf.ptr);
|
|
47
|
+
double* weights_ptr = static_cast<double*>(weights_buf.ptr);
|
|
48
|
+
|
|
49
|
+
// Prepare output arrays
|
|
50
|
+
std::vector<double> stats(ClusterQualNumStat);
|
|
51
|
+
std::vector<double> asw(2 * nclusters);
|
|
52
|
+
|
|
53
|
+
// Create Kendall tree for caching
|
|
54
|
+
KendallTree kendall;
|
|
55
|
+
|
|
56
|
+
// Call core function
|
|
57
|
+
clusterquality(diss_ptr, cluster_ptr, weights_ptr, n,
|
|
58
|
+
stats.data(), nclusters, asw.data(), kendall);
|
|
59
|
+
|
|
60
|
+
// Clean up Kendall tree
|
|
61
|
+
finalizeKendall(kendall);
|
|
62
|
+
|
|
63
|
+
// Return results as dictionary
|
|
64
|
+
py::dict result;
|
|
65
|
+
result["PBC"] = stats[ClusterQualHPG]; // PBC is stored in HPG position
|
|
66
|
+
result["HG"] = stats[ClusterQualHG];
|
|
67
|
+
result["HGSD"] = stats[ClusterQualHGSD];
|
|
68
|
+
result["ASW"] = stats[ClusterQualASWi];
|
|
69
|
+
result["ASWw"] = stats[ClusterQualASWw];
|
|
70
|
+
result["CH"] = stats[ClusterQualF];
|
|
71
|
+
result["R2"] = stats[ClusterQualR];
|
|
72
|
+
result["CHsq"] = stats[ClusterQualF2];
|
|
73
|
+
result["R2sq"] = stats[ClusterQualR2];
|
|
74
|
+
result["HC"] = stats[ClusterQualHC];
|
|
75
|
+
|
|
76
|
+
// Convert ASW array to numpy array
|
|
77
|
+
auto asw_array = py::array_t<double>(2 * nclusters);
|
|
78
|
+
auto asw_buf = asw_array.request();
|
|
79
|
+
double* asw_out = static_cast<double*>(asw_buf.ptr);
|
|
80
|
+
std::copy(asw.begin(), asw.end(), asw_out);
|
|
81
|
+
result["cluster_asw"] = asw_array;
|
|
82
|
+
|
|
83
|
+
return result;
|
|
84
|
+
}, "Compute cluster quality indicators for full distance matrix");
|
|
85
|
+
|
|
86
|
+
m.def("cluster_quality_condensed", [](py::array_t<double> diss_condensed,
|
|
87
|
+
py::array_t<int> cluster_labels,
|
|
88
|
+
py::array_t<double> weights,
|
|
89
|
+
int n, int nclusters) -> py::dict {
|
|
90
|
+
auto diss_buf = diss_condensed.request();
|
|
91
|
+
auto cluster_buf = cluster_labels.request();
|
|
92
|
+
auto weights_buf = weights.request();
|
|
93
|
+
|
|
94
|
+
int expected_size = n * (n - 1) / 2;
|
|
95
|
+
if (diss_buf.size != expected_size) {
|
|
96
|
+
throw std::runtime_error("Condensed distance array size mismatch");
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
if (cluster_buf.size != n || weights_buf.size != n) {
|
|
100
|
+
throw std::runtime_error("Cluster labels and weights must have length n");
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
double* diss_ptr = static_cast<double*>(diss_buf.ptr);
|
|
104
|
+
int* cluster_ptr = static_cast<int*>(cluster_buf.ptr);
|
|
105
|
+
double* weights_ptr = static_cast<double*>(weights_buf.ptr);
|
|
106
|
+
|
|
107
|
+
// Prepare output arrays
|
|
108
|
+
std::vector<double> stats(ClusterQualNumStat);
|
|
109
|
+
std::vector<double> asw(2 * nclusters);
|
|
110
|
+
|
|
111
|
+
// Create Kendall tree for caching
|
|
112
|
+
KendallTree kendall;
|
|
113
|
+
|
|
114
|
+
// Call core function
|
|
115
|
+
clusterquality_dist(diss_ptr, cluster_ptr, weights_ptr, n,
|
|
116
|
+
stats.data(), nclusters, asw.data(), kendall);
|
|
117
|
+
|
|
118
|
+
// Clean up Kendall tree
|
|
119
|
+
finalizeKendall(kendall);
|
|
120
|
+
|
|
121
|
+
// Return results as dictionary
|
|
122
|
+
py::dict result;
|
|
123
|
+
result["PBC"] = stats[ClusterQualHPG]; // PBC is stored in HPG position
|
|
124
|
+
result["HG"] = stats[ClusterQualHG];
|
|
125
|
+
result["HGSD"] = stats[ClusterQualHGSD];
|
|
126
|
+
result["ASW"] = stats[ClusterQualASWi];
|
|
127
|
+
result["ASWw"] = stats[ClusterQualASWw];
|
|
128
|
+
result["CH"] = stats[ClusterQualF];
|
|
129
|
+
result["R2"] = stats[ClusterQualR];
|
|
130
|
+
result["CHsq"] = stats[ClusterQualF2];
|
|
131
|
+
result["R2sq"] = stats[ClusterQualR2];
|
|
132
|
+
result["HC"] = stats[ClusterQualHC];
|
|
133
|
+
|
|
134
|
+
// Convert ASW array to numpy array
|
|
135
|
+
auto asw_array = py::array_t<double>(2 * nclusters);
|
|
136
|
+
auto asw_buf = asw_array.request();
|
|
137
|
+
double* asw_out = static_cast<double*>(asw_buf.ptr);
|
|
138
|
+
std::copy(asw.begin(), asw.end(), asw_out);
|
|
139
|
+
result["cluster_asw"] = asw_array;
|
|
140
|
+
|
|
141
|
+
return result;
|
|
142
|
+
}, "Compute cluster quality indicators for condensed distance array");
|
|
143
|
+
|
|
144
|
+
m.def("individual_asw", [](py::array_t<double> diss_matrix,
|
|
145
|
+
py::array_t<int> cluster_labels,
|
|
146
|
+
py::array_t<double> weights,
|
|
147
|
+
int nclusters) -> py::dict {
|
|
148
|
+
auto diss_buf = diss_matrix.request();
|
|
149
|
+
auto cluster_buf = cluster_labels.request();
|
|
150
|
+
auto weights_buf = weights.request();
|
|
151
|
+
|
|
152
|
+
if (diss_buf.ndim != 2 || diss_buf.shape[0] != diss_buf.shape[1]) {
|
|
153
|
+
throw std::runtime_error("Distance matrix must be square");
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
int n = diss_buf.shape[0];
|
|
157
|
+
|
|
158
|
+
if (cluster_buf.size != n || weights_buf.size != n) {
|
|
159
|
+
throw std::runtime_error("Cluster labels and weights must have same length as matrix dimension");
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
double* diss_ptr = static_cast<double*>(diss_buf.ptr);
|
|
163
|
+
int* cluster_ptr = static_cast<int*>(cluster_buf.ptr);
|
|
164
|
+
double* weights_ptr = static_cast<double*>(weights_buf.ptr);
|
|
165
|
+
|
|
166
|
+
// Prepare output arrays
|
|
167
|
+
auto asw_i = py::array_t<double>(n);
|
|
168
|
+
auto asw_w = py::array_t<double>(n);
|
|
169
|
+
|
|
170
|
+
auto asw_i_buf = asw_i.request();
|
|
171
|
+
auto asw_w_buf = asw_w.request();
|
|
172
|
+
|
|
173
|
+
double* asw_i_ptr = static_cast<double*>(asw_i_buf.ptr);
|
|
174
|
+
double* asw_w_ptr = static_cast<double*>(asw_w_buf.ptr);
|
|
175
|
+
|
|
176
|
+
// Call core function
|
|
177
|
+
indiv_asw(diss_ptr, cluster_ptr, weights_ptr, n, nclusters, asw_i_ptr, asw_w_ptr);
|
|
178
|
+
|
|
179
|
+
// Return results as dictionary
|
|
180
|
+
py::dict result;
|
|
181
|
+
result["asw_individual"] = asw_i;
|
|
182
|
+
result["asw_weighted"] = asw_w;
|
|
183
|
+
|
|
184
|
+
return result;
|
|
185
|
+
}, "Compute individual ASW scores for all samples");
|
|
186
|
+
|
|
187
|
+
m.def("individual_asw_condensed", [](py::array_t<double> diss_condensed,
|
|
188
|
+
py::array_t<int> cluster_labels,
|
|
189
|
+
py::array_t<double> weights,
|
|
190
|
+
int n, int nclusters) -> py::dict {
|
|
191
|
+
auto diss_buf = diss_condensed.request();
|
|
192
|
+
auto cluster_buf = cluster_labels.request();
|
|
193
|
+
auto weights_buf = weights.request();
|
|
194
|
+
|
|
195
|
+
int expected_size = n * (n - 1) / 2;
|
|
196
|
+
if (diss_buf.size != expected_size) {
|
|
197
|
+
throw std::runtime_error("Condensed distance array size mismatch");
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
if (cluster_buf.size != n || weights_buf.size != n) {
|
|
201
|
+
throw std::runtime_error("Cluster labels and weights must have length n");
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
double* diss_ptr = static_cast<double*>(diss_buf.ptr);
|
|
205
|
+
int* cluster_ptr = static_cast<int*>(cluster_buf.ptr);
|
|
206
|
+
double* weights_ptr = static_cast<double*>(weights_buf.ptr);
|
|
207
|
+
|
|
208
|
+
// Prepare output arrays
|
|
209
|
+
auto asw_i = py::array_t<double>(n);
|
|
210
|
+
auto asw_w = py::array_t<double>(n);
|
|
211
|
+
|
|
212
|
+
auto asw_i_buf = asw_i.request();
|
|
213
|
+
auto asw_w_buf = asw_w.request();
|
|
214
|
+
|
|
215
|
+
double* asw_i_ptr = static_cast<double*>(asw_i_buf.ptr);
|
|
216
|
+
double* asw_w_ptr = static_cast<double*>(asw_w_buf.ptr);
|
|
217
|
+
|
|
218
|
+
// Call core function
|
|
219
|
+
indiv_asw_dist(diss_ptr, cluster_ptr, weights_ptr, n, nclusters, asw_i_ptr, asw_w_ptr);
|
|
220
|
+
|
|
221
|
+
// Return results as dictionary
|
|
222
|
+
py::dict result;
|
|
223
|
+
result["asw_individual"] = asw_i;
|
|
224
|
+
result["asw_weighted"] = asw_w;
|
|
225
|
+
|
|
226
|
+
return result;
|
|
227
|
+
}, "Compute individual ASW scores for condensed distance array");
|
|
228
|
+
}
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
#include <pybind11/pybind11.h>
|
|
2
|
+
#include <pybind11/numpy.h>
|
|
3
|
+
#include <vector>
|
|
4
|
+
#include <iostream>
|
|
5
|
+
#ifdef _OPENMP
|
|
6
|
+
#include <omp.h>
|
|
7
|
+
#endif
|
|
8
|
+
#include <cfloat>
|
|
9
|
+
#include <cmath>
|
|
10
|
+
|
|
11
|
+
namespace py = pybind11;
|
|
12
|
+
|
|
13
|
+
class weightedinertia {
|
|
14
|
+
public:
|
|
15
|
+
weightedinertia(py::array_t<double> distmatrix, py::array_t<int> individuals, py::array_t<double> weights) {
|
|
16
|
+
std::cout << std::flush; // 刷新 C++ 输出
|
|
17
|
+
|
|
18
|
+
try {
|
|
19
|
+
this->distmatrix = distmatrix;
|
|
20
|
+
this->individuals = individuals;
|
|
21
|
+
this->weights = weights;
|
|
22
|
+
|
|
23
|
+
ilen = individuals.size();
|
|
24
|
+
|
|
25
|
+
result = py::array_t<double>(ilen);
|
|
26
|
+
} catch (const std::exception& e) {
|
|
27
|
+
py::print("Error in constructor: ", e.what());
|
|
28
|
+
throw;
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
py::array_t<double> tmrWeightedInertiaContrib() {
|
|
33
|
+
auto ptr_dist = distmatrix.unchecked<2>();
|
|
34
|
+
auto ptr_indiv = individuals.unchecked<1>();
|
|
35
|
+
auto ptr_weights = weights.unchecked<1>();
|
|
36
|
+
|
|
37
|
+
py::array_t<double> result_local(ilen);
|
|
38
|
+
auto ptr_result = result_local.mutable_unchecked<1>();
|
|
39
|
+
|
|
40
|
+
for (int i = 0; i < ilen; i++) {
|
|
41
|
+
ptr_result(i) = 0.0;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
double totweights = 0.0;
|
|
45
|
+
|
|
46
|
+
#pragma omp parallel for reduction(+:totweights)
|
|
47
|
+
for (int i = 0; i < ilen; i++) {
|
|
48
|
+
totweights += ptr_weights(ptr_indiv(i));
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
// 每个线程使用局部 result 副本,最后归约合并
|
|
52
|
+
int nthreads = 1;
|
|
53
|
+
#ifdef _OPENMP
|
|
54
|
+
#pragma omp parallel
|
|
55
|
+
{
|
|
56
|
+
#pragma omp single
|
|
57
|
+
nthreads = omp_get_num_threads();
|
|
58
|
+
}
|
|
59
|
+
#endif
|
|
60
|
+
|
|
61
|
+
std::vector<std::vector<double>> result_private(nthreads, std::vector<double>(ilen, 0.0));
|
|
62
|
+
|
|
63
|
+
#pragma omp parallel
|
|
64
|
+
{
|
|
65
|
+
#ifdef _OPENMP
|
|
66
|
+
int tid = omp_get_thread_num();
|
|
67
|
+
#else
|
|
68
|
+
int tid = 0;
|
|
69
|
+
#endif
|
|
70
|
+
auto& local = result_private[tid];
|
|
71
|
+
|
|
72
|
+
#pragma omp for schedule(static)
|
|
73
|
+
for (int i = 0; i < ilen; ++i) {
|
|
74
|
+
int pos_i = ptr_indiv(i);
|
|
75
|
+
double i_weight = ptr_weights(pos_i);
|
|
76
|
+
|
|
77
|
+
for (int j = i + 1; j < ilen; ++j) {
|
|
78
|
+
int pos_j = ptr_indiv(j);
|
|
79
|
+
double diss = ptr_dist(pos_i, pos_j);
|
|
80
|
+
|
|
81
|
+
local[i] += diss * ptr_weights(pos_j);
|
|
82
|
+
local[j] += diss * i_weight;
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
// 合并各线程的 result_private 到主 result
|
|
88
|
+
for (int t = 0; t < nthreads; ++t) {
|
|
89
|
+
for (int i = 0; i < ilen; ++i) {
|
|
90
|
+
ptr_result(i) += result_private[t][i];
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
if (totweights > 0) {
|
|
95
|
+
#pragma omp parallel for
|
|
96
|
+
for (int i = 0; i < ilen; ++i) {
|
|
97
|
+
ptr_result(i) /= totweights;
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
return result_local;
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
private:
|
|
105
|
+
py::array_t<double> distmatrix; // 距离矩阵
|
|
106
|
+
py::array_t<int> individuals; // 某组数据点的集合
|
|
107
|
+
py::array_t<double> weights; // 权重数组
|
|
108
|
+
|
|
109
|
+
int ilen;
|
|
110
|
+
py::array_t<double> result;
|
|
111
|
+
};
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
"""
|
|
2
|
+
@Author : Yuqi Liang 梁彧祺
|
|
3
|
+
@File : __init__.py
|
|
4
|
+
@Time : 27/02/2025 10:38
|
|
5
|
+
@Desc :
|
|
6
|
+
"""
|
|
7
|
+
# utils/__init__.py
|
|
8
|
+
|
|
9
|
+
from .disscenter import disscentertrim
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def _import_c_code():
|
|
13
|
+
"""Lazily import the c_code module to avoid circular dependencies during installation"""
|
|
14
|
+
try:
|
|
15
|
+
from sequenzo.dissimilarity_measures.src import c_code
|
|
16
|
+
return c_code
|
|
17
|
+
except ImportError:
|
|
18
|
+
# If the C extension cannot be imported, return None
|
|
19
|
+
print(
|
|
20
|
+
"Warning: The C++ extension (c_code) could not be imported. Please ensure the extension module is compiled correctly.")
|
|
21
|
+
return None
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
__all__ = [
|
|
25
|
+
"disscentertrim",
|
|
26
|
+
# Add other functions as needed
|
|
27
|
+
]
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
"""
|
|
2
|
+
@Author : 李欣怡
|
|
3
|
+
@File : disscenter.py
|
|
4
|
+
@Time : 2025/2/8 12:57
|
|
5
|
+
@Desc :
|
|
6
|
+
Utility function for the k_medoids algorithm.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import numpy as np
|
|
10
|
+
import pandas as pd
|
|
11
|
+
|
|
12
|
+
import importlib
|
|
13
|
+
import sequenzo.clustering.clustering_c_code
|
|
14
|
+
|
|
15
|
+
clustering_c_code = importlib.import_module("sequenzo.clustering.clustering_c_code")
|
|
16
|
+
|
|
17
|
+
def disscentertrim(diss, group=None, medoids_index=None, allcenter=False, weights=None, squared=False, trim=0):
|
|
18
|
+
|
|
19
|
+
# Lazily import the c_code module to avoid circular dependencies during installation
|
|
20
|
+
# from .__init__ import _import_c_code
|
|
21
|
+
# c_code = _import_c_code()
|
|
22
|
+
|
|
23
|
+
if isinstance(medoids_index, bool):
|
|
24
|
+
if medoids_index:
|
|
25
|
+
medoids_index = "First"
|
|
26
|
+
else:
|
|
27
|
+
medoids_index = None
|
|
28
|
+
|
|
29
|
+
retmedoids = medoids_index is not None # Whether medoids need to be returned
|
|
30
|
+
if retmedoids:
|
|
31
|
+
allcenter = False
|
|
32
|
+
|
|
33
|
+
allmedoids = False
|
|
34
|
+
|
|
35
|
+
if medoids_index is not None:
|
|
36
|
+
if medoids_index == "all":
|
|
37
|
+
allmedoids = True
|
|
38
|
+
elif medoids_index != "first":
|
|
39
|
+
raise ValueError('\'medoids_index\' argument should be one of "First", "all" or None')
|
|
40
|
+
|
|
41
|
+
if weights is None:
|
|
42
|
+
weights = np.ones(len(diss), dtype=float)
|
|
43
|
+
|
|
44
|
+
if squared:
|
|
45
|
+
diss = np.square(diss)
|
|
46
|
+
|
|
47
|
+
if group is None:
|
|
48
|
+
group = np.ones(diss.shape[0], dtype=int)
|
|
49
|
+
|
|
50
|
+
ind = np.arange(diss.shape[0])
|
|
51
|
+
grp = np.array(group)
|
|
52
|
+
lgrp = np.unique(group)
|
|
53
|
+
|
|
54
|
+
if allcenter:
|
|
55
|
+
ret = pd.DataFrame(np.zeros((diss.shape[0], 1)))
|
|
56
|
+
else:
|
|
57
|
+
ret = np.zeros(diss.shape[0])
|
|
58
|
+
|
|
59
|
+
if retmedoids:
|
|
60
|
+
if allmedoids:
|
|
61
|
+
medoids = []
|
|
62
|
+
else:
|
|
63
|
+
medoids = np.zeros(len(lgrp))
|
|
64
|
+
|
|
65
|
+
for i in range(len(lgrp)):
|
|
66
|
+
cond = (grp == lgrp[i])
|
|
67
|
+
grpindiv = ind[cond] # 第 i 组所有数据点在隶属矩阵里的位置(0-based 索引)
|
|
68
|
+
|
|
69
|
+
if allcenter:
|
|
70
|
+
# TODO : 以后再补充
|
|
71
|
+
print("以后再补充")
|
|
72
|
+
|
|
73
|
+
else:
|
|
74
|
+
inertia = clustering_c_code.weightedinertia(diss.astype(np.float64),
|
|
75
|
+
grpindiv.astype(np.int32),
|
|
76
|
+
weights.astype(np.float64))
|
|
77
|
+
dc = inertia.tmrWeightedInertiaContrib()
|
|
78
|
+
dc = dc - np.average(dc, weights=weights[cond]) / 2
|
|
79
|
+
|
|
80
|
+
if trim > 0:
|
|
81
|
+
# TODO : 以后再补充
|
|
82
|
+
print("以后再补充")
|
|
83
|
+
|
|
84
|
+
ret[grpindiv] = dc
|
|
85
|
+
mindc = np.min(dc)
|
|
86
|
+
|
|
87
|
+
if retmedoids:
|
|
88
|
+
if allmedoids:
|
|
89
|
+
medoids.append(np.where((ret == mindc) & cond)[0])
|
|
90
|
+
else:
|
|
91
|
+
medoids[i] = np.where((ret == mindc) & cond)[0][0]
|
|
92
|
+
|
|
93
|
+
if retmedoids:
|
|
94
|
+
if len(lgrp) == 1:
|
|
95
|
+
return medoids[[1]]
|
|
96
|
+
|
|
97
|
+
return medoids
|
|
98
|
+
|
|
99
|
+
return ret
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
if __name__ == '__main__':
|
|
103
|
+
# Load the data that we would like to explore in this tutorial
|
|
104
|
+
# `df` is the short for `dataframe`, which is a common variable name for a dataset
|
|
105
|
+
from sequenzo import *
|
|
106
|
+
df = load_dataset('country_co2_emissions')
|
|
107
|
+
|
|
108
|
+
time = list(df.columns)[1:]
|
|
109
|
+
|
|
110
|
+
states = ['Very Low', 'Low', 'Middle', 'High', 'Very High']
|
|
111
|
+
|
|
112
|
+
sequence_data = SequenceData(df, time=time, states=states)
|
|
113
|
+
|
|
114
|
+
result = clara(seqdata=sequence_data,
|
|
115
|
+
R=2,
|
|
116
|
+
kvals=range(2, 21),
|
|
117
|
+
sample_size=3000,
|
|
118
|
+
criteria=['distance', 'pbm'],
|
|
119
|
+
parallel=True,
|
|
120
|
+
stability=True)
|
|
121
|
+
result = result['allstat']
|
|
122
|
+
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
"""
|
|
2
|
+
@Author : Yuqi Liang 梁彧祺
|
|
3
|
+
@File : __init__.py
|
|
4
|
+
@Time : 01/05/2025 09:27
|
|
5
|
+
@Desc :
|
|
6
|
+
"""
|
|
7
|
+
from .helpers import (clean_time_columns_auto,
|
|
8
|
+
assign_unique_ids,
|
|
9
|
+
wide_to_long_format_data,
|
|
10
|
+
long_to_wide_format_data,
|
|
11
|
+
summarize_missing_values,
|
|
12
|
+
replace_cluster_id_by_labels)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
__all__ = [
|
|
16
|
+
"clean_time_columns_auto",
|
|
17
|
+
"assign_unique_ids",
|
|
18
|
+
"wide_to_long_format_data",
|
|
19
|
+
"long_to_wide_format_data",
|
|
20
|
+
"summarize_missing_values",
|
|
21
|
+
"replace_cluster_id_by_labels"
|
|
22
|
+
]
|