sequenzo 0.1.31__cp310-cp310-macosx_10_9_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- _sequenzo_fastcluster.cpython-310-darwin.so +0 -0
- sequenzo/__init__.py +349 -0
- sequenzo/big_data/__init__.py +12 -0
- sequenzo/big_data/clara/__init__.py +26 -0
- sequenzo/big_data/clara/clara.py +476 -0
- sequenzo/big_data/clara/utils/__init__.py +27 -0
- sequenzo/big_data/clara/utils/aggregatecases.py +92 -0
- sequenzo/big_data/clara/utils/davies_bouldin.py +91 -0
- sequenzo/big_data/clara/utils/get_weighted_diss.cpython-310-darwin.so +0 -0
- sequenzo/big_data/clara/utils/wfcmdd.py +205 -0
- sequenzo/big_data/clara/visualization.py +88 -0
- sequenzo/clustering/KMedoids.py +178 -0
- sequenzo/clustering/__init__.py +30 -0
- sequenzo/clustering/clustering_c_code.cpython-310-darwin.so +0 -0
- sequenzo/clustering/hierarchical_clustering.py +1256 -0
- sequenzo/clustering/sequenzo_fastcluster/fastcluster.py +495 -0
- sequenzo/clustering/sequenzo_fastcluster/src/fastcluster.cpp +1877 -0
- sequenzo/clustering/sequenzo_fastcluster/src/fastcluster_python.cpp +1264 -0
- sequenzo/clustering/src/KMedoid.cpp +263 -0
- sequenzo/clustering/src/PAM.cpp +237 -0
- sequenzo/clustering/src/PAMonce.cpp +265 -0
- sequenzo/clustering/src/cluster_quality.cpp +496 -0
- sequenzo/clustering/src/cluster_quality.h +128 -0
- sequenzo/clustering/src/cluster_quality_backup.cpp +570 -0
- sequenzo/clustering/src/module.cpp +228 -0
- sequenzo/clustering/src/weightedinertia.cpp +111 -0
- sequenzo/clustering/utils/__init__.py +27 -0
- sequenzo/clustering/utils/disscenter.py +122 -0
- sequenzo/data_preprocessing/__init__.py +22 -0
- sequenzo/data_preprocessing/helpers.py +303 -0
- sequenzo/datasets/__init__.py +41 -0
- sequenzo/datasets/biofam.csv +2001 -0
- sequenzo/datasets/biofam_child_domain.csv +2001 -0
- sequenzo/datasets/biofam_left_domain.csv +2001 -0
- sequenzo/datasets/biofam_married_domain.csv +2001 -0
- sequenzo/datasets/chinese_colonial_territories.csv +12 -0
- sequenzo/datasets/country_co2_emissions.csv +194 -0
- sequenzo/datasets/country_co2_emissions_global_deciles.csv +195 -0
- sequenzo/datasets/country_co2_emissions_global_quintiles.csv +195 -0
- sequenzo/datasets/country_co2_emissions_local_deciles.csv +195 -0
- sequenzo/datasets/country_co2_emissions_local_quintiles.csv +195 -0
- sequenzo/datasets/country_gdp_per_capita.csv +194 -0
- sequenzo/datasets/dyadic_children.csv +61 -0
- sequenzo/datasets/dyadic_parents.csv +61 -0
- sequenzo/datasets/mvad.csv +713 -0
- sequenzo/datasets/pairfam_activity_by_month.csv +1028 -0
- sequenzo/datasets/pairfam_activity_by_year.csv +1028 -0
- sequenzo/datasets/pairfam_family_by_month.csv +1028 -0
- sequenzo/datasets/pairfam_family_by_year.csv +1028 -0
- sequenzo/datasets/political_science_aid_shock.csv +166 -0
- sequenzo/datasets/political_science_donor_fragmentation.csv +157 -0
- sequenzo/define_sequence_data.py +1400 -0
- sequenzo/dissimilarity_measures/__init__.py +31 -0
- sequenzo/dissimilarity_measures/c_code.cpython-310-darwin.so +0 -0
- sequenzo/dissimilarity_measures/get_distance_matrix.py +762 -0
- sequenzo/dissimilarity_measures/get_substitution_cost_matrix.py +246 -0
- sequenzo/dissimilarity_measures/src/DHDdistance.cpp +148 -0
- sequenzo/dissimilarity_measures/src/LCPdistance.cpp +114 -0
- sequenzo/dissimilarity_measures/src/LCPspellDistance.cpp +215 -0
- sequenzo/dissimilarity_measures/src/OMdistance.cpp +247 -0
- sequenzo/dissimilarity_measures/src/OMspellDistance.cpp +281 -0
- sequenzo/dissimilarity_measures/src/__init__.py +0 -0
- sequenzo/dissimilarity_measures/src/dist2matrix.cpp +63 -0
- sequenzo/dissimilarity_measures/src/dp_utils.h +160 -0
- sequenzo/dissimilarity_measures/src/module.cpp +40 -0
- sequenzo/dissimilarity_measures/src/setup.py +30 -0
- sequenzo/dissimilarity_measures/src/utils.h +25 -0
- sequenzo/dissimilarity_measures/src/xsimd/.github/cmake-test/main.cpp +6 -0
- sequenzo/dissimilarity_measures/src/xsimd/benchmark/main.cpp +159 -0
- sequenzo/dissimilarity_measures/src/xsimd/benchmark/xsimd_benchmark.hpp +565 -0
- sequenzo/dissimilarity_measures/src/xsimd/docs/source/conf.py +37 -0
- sequenzo/dissimilarity_measures/src/xsimd/examples/mandelbrot.cpp +330 -0
- sequenzo/dissimilarity_measures/src/xsimd/examples/pico_bench.hpp +246 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_arithmetic.hpp +266 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_complex.hpp +112 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_details.hpp +323 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_logical.hpp +218 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_math.hpp +2583 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_memory.hpp +880 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_rounding.hpp +72 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_swizzle.hpp +174 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_trigo.hpp +978 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx.hpp +1924 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx2.hpp +1144 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512bw.hpp +656 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512cd.hpp +28 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512dq.hpp +244 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512er.hpp +20 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512f.hpp +2650 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512ifma.hpp +20 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512pf.hpp +20 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vbmi.hpp +77 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vbmi2.hpp +131 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vnni_avx512bw.hpp +20 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vnni_avx512vbmi2.hpp +20 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avxvnni.hpp +20 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_common.hpp +24 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_common_fwd.hpp +77 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_constants.hpp +393 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_emulated.hpp +788 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma3_avx.hpp +93 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma3_avx2.hpp +46 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma3_sse.hpp +97 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma4.hpp +92 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_i8mm_neon64.hpp +17 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_isa.hpp +142 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_neon.hpp +3142 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_neon64.hpp +1543 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_rvv.hpp +1513 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_scalar.hpp +1260 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse2.hpp +2024 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse3.hpp +67 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse4_1.hpp +339 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse4_2.hpp +44 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_ssse3.hpp +186 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sve.hpp +1155 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_vsx.hpp +892 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_wasm.hpp +1780 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_arch.hpp +240 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_config.hpp +484 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_cpuid.hpp +269 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_inline.hpp +27 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/math/xsimd_rem_pio2.hpp +719 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/memory/xsimd_aligned_allocator.hpp +349 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/memory/xsimd_alignment.hpp +91 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_all_registers.hpp +55 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_api.hpp +2765 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx2_register.hpp +44 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512bw_register.hpp +51 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512cd_register.hpp +51 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512dq_register.hpp +51 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512er_register.hpp +51 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512f_register.hpp +77 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512ifma_register.hpp +51 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512pf_register.hpp +51 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512vbmi2_register.hpp +51 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512vbmi_register.hpp +51 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512vnni_avx512bw_register.hpp +54 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512vnni_avx512vbmi2_register.hpp +53 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx_register.hpp +64 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avxvnni_register.hpp +44 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_batch.hpp +1524 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_batch_constant.hpp +300 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_common_arch.hpp +47 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_emulated_register.hpp +80 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_fma3_avx2_register.hpp +50 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_fma3_avx_register.hpp +50 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_fma3_sse_register.hpp +50 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_fma4_register.hpp +50 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_i8mm_neon64_register.hpp +55 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_neon64_register.hpp +55 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_neon_register.hpp +154 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_register.hpp +94 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_rvv_register.hpp +506 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sse2_register.hpp +59 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sse3_register.hpp +49 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sse4_1_register.hpp +48 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sse4_2_register.hpp +48 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_ssse3_register.hpp +48 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sve_register.hpp +156 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_traits.hpp +337 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_utils.hpp +536 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_vsx_register.hpp +77 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_wasm_register.hpp +59 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/xsimd.hpp +75 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/architectures/dummy.cpp +7 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set.cpp +13 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set_mean.cpp +24 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set_mean_aligned.cpp +25 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set_mean_arch_independent.cpp +28 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set_mean_tag_dispatch.cpp +25 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/doc/manipulating_abstract_batches.cpp +7 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/doc/manipulating_parametric_batches.cpp +8 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/doc/sum.hpp +31 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/doc/sum_avx2.cpp +3 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/doc/sum_sse2.cpp +3 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/doc/writing_vectorized_code.cpp +11 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/main.cpp +31 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_api.cpp +230 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_arch.cpp +217 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_basic_math.cpp +183 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_batch.cpp +1049 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_bool.cpp +508 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_cast.cpp +409 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_complex.cpp +712 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_constant.cpp +286 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_float.cpp +141 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_int.cpp +365 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_manip.cpp +308 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_bitwise_cast.cpp +222 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_complex_exponential.cpp +226 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_complex_hyperbolic.cpp +183 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_complex_power.cpp +265 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_complex_trigonometric.cpp +236 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_conversion.cpp +248 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_custom_default_arch.cpp +28 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_error_gamma.cpp +170 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_explicit_batch_instantiation.cpp +32 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_exponential.cpp +202 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_extract_pair.cpp +92 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_fp_manipulation.cpp +77 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_gnu_source.cpp +30 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_hyperbolic.cpp +167 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_load_store.cpp +304 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_memory.cpp +61 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_poly_evaluation.cpp +64 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_power.cpp +184 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_rounding.cpp +199 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_select.cpp +101 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_shuffle.cpp +760 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_sum.cpp +4 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_sum.hpp +34 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_traits.cpp +172 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_trigonometric.cpp +208 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_utils.hpp +611 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_wasm/test_wasm_playwright.py +123 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_xsimd_api.cpp +1460 -0
- sequenzo/dissimilarity_measures/utils/__init__.py +16 -0
- sequenzo/dissimilarity_measures/utils/get_LCP_length_for_2_seq.py +44 -0
- sequenzo/dissimilarity_measures/utils/get_sm_trate_substitution_cost_matrix.cpython-310-darwin.so +0 -0
- sequenzo/dissimilarity_measures/utils/seqconc.cpython-310-darwin.so +0 -0
- sequenzo/dissimilarity_measures/utils/seqdss.cpython-310-darwin.so +0 -0
- sequenzo/dissimilarity_measures/utils/seqdur.cpython-310-darwin.so +0 -0
- sequenzo/dissimilarity_measures/utils/seqlength.cpython-310-darwin.so +0 -0
- sequenzo/multidomain/__init__.py +23 -0
- sequenzo/multidomain/association_between_domains.py +311 -0
- sequenzo/multidomain/cat.py +597 -0
- sequenzo/multidomain/combt.py +519 -0
- sequenzo/multidomain/dat.py +81 -0
- sequenzo/multidomain/idcd.py +139 -0
- sequenzo/multidomain/linked_polyad.py +292 -0
- sequenzo/openmp_setup.py +233 -0
- sequenzo/prefix_tree/__init__.py +62 -0
- sequenzo/prefix_tree/hub.py +114 -0
- sequenzo/prefix_tree/individual_level_indicators.py +1321 -0
- sequenzo/prefix_tree/spell_individual_level_indicators.py +580 -0
- sequenzo/prefix_tree/spell_level_indicators.py +297 -0
- sequenzo/prefix_tree/system_level_indicators.py +544 -0
- sequenzo/prefix_tree/utils.py +54 -0
- sequenzo/seqhmm/__init__.py +95 -0
- sequenzo/seqhmm/advanced_optimization.py +305 -0
- sequenzo/seqhmm/bootstrap.py +411 -0
- sequenzo/seqhmm/build_hmm.py +142 -0
- sequenzo/seqhmm/build_mhmm.py +136 -0
- sequenzo/seqhmm/build_nhmm.py +121 -0
- sequenzo/seqhmm/fit_mhmm.py +62 -0
- sequenzo/seqhmm/fit_model.py +61 -0
- sequenzo/seqhmm/fit_nhmm.py +76 -0
- sequenzo/seqhmm/formulas.py +289 -0
- sequenzo/seqhmm/forward_backward_nhmm.py +276 -0
- sequenzo/seqhmm/gradients_nhmm.py +306 -0
- sequenzo/seqhmm/hmm.py +291 -0
- sequenzo/seqhmm/mhmm.py +314 -0
- sequenzo/seqhmm/model_comparison.py +238 -0
- sequenzo/seqhmm/multichannel_em.py +282 -0
- sequenzo/seqhmm/multichannel_utils.py +138 -0
- sequenzo/seqhmm/nhmm.py +270 -0
- sequenzo/seqhmm/nhmm_utils.py +191 -0
- sequenzo/seqhmm/predict.py +137 -0
- sequenzo/seqhmm/predict_mhmm.py +142 -0
- sequenzo/seqhmm/simulate.py +878 -0
- sequenzo/seqhmm/utils.py +218 -0
- sequenzo/seqhmm/visualization.py +910 -0
- sequenzo/sequence_characteristics/__init__.py +40 -0
- sequenzo/sequence_characteristics/complexity_index.py +49 -0
- sequenzo/sequence_characteristics/overall_cross_sectional_entropy.py +220 -0
- sequenzo/sequence_characteristics/plot_characteristics.py +593 -0
- sequenzo/sequence_characteristics/simple_characteristics.py +311 -0
- sequenzo/sequence_characteristics/state_frequencies_and_entropy_per_sequence.py +39 -0
- sequenzo/sequence_characteristics/turbulence.py +155 -0
- sequenzo/sequence_characteristics/variance_of_spell_durations.py +86 -0
- sequenzo/sequence_characteristics/within_sequence_entropy.py +43 -0
- sequenzo/suffix_tree/__init__.py +66 -0
- sequenzo/suffix_tree/hub.py +114 -0
- sequenzo/suffix_tree/individual_level_indicators.py +1679 -0
- sequenzo/suffix_tree/spell_individual_level_indicators.py +493 -0
- sequenzo/suffix_tree/spell_level_indicators.py +248 -0
- sequenzo/suffix_tree/system_level_indicators.py +535 -0
- sequenzo/suffix_tree/utils.py +56 -0
- sequenzo/version_check.py +283 -0
- sequenzo/visualization/__init__.py +29 -0
- sequenzo/visualization/plot_mean_time.py +222 -0
- sequenzo/visualization/plot_modal_state.py +276 -0
- sequenzo/visualization/plot_most_frequent_sequences.py +147 -0
- sequenzo/visualization/plot_relative_frequency.py +405 -0
- sequenzo/visualization/plot_sequence_index.py +1175 -0
- sequenzo/visualization/plot_single_medoid.py +153 -0
- sequenzo/visualization/plot_state_distribution.py +651 -0
- sequenzo/visualization/plot_transition_matrix.py +190 -0
- sequenzo/visualization/utils/__init__.py +23 -0
- sequenzo/visualization/utils/utils.py +310 -0
- sequenzo/with_event_history_analysis/__init__.py +35 -0
- sequenzo/with_event_history_analysis/sequence_analysis_multi_state_model.py +850 -0
- sequenzo/with_event_history_analysis/sequence_history_analysis.py +283 -0
- sequenzo-0.1.31.dist-info/METADATA +286 -0
- sequenzo-0.1.31.dist-info/RECORD +299 -0
- sequenzo-0.1.31.dist-info/WHEEL +5 -0
- sequenzo-0.1.31.dist-info/licenses/LICENSE +28 -0
- sequenzo-0.1.31.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,215 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* LCPspellDistance: Spell-based Longest Common Prefix distance.
|
|
3
|
+
*
|
|
4
|
+
* Unlike position-wise LCP (which compares state at the same time index),
|
|
5
|
+
* LCPspell compares sequences spell-by-spell: the k-th spell of sequence A
|
|
6
|
+
* is compared with the k-th spell of sequence B. Two spells "match" if they
|
|
7
|
+
* have the same state; we do not require the same start time (e.g. "state 1
|
|
8
|
+
* from 2000" and "state 1 from 2005" both count as the same spell state).
|
|
9
|
+
*
|
|
10
|
+
* expcost (timecost in C++):
|
|
11
|
+
* - expcost = 0: ignore duration; only state equality matters (same state
|
|
12
|
+
* in the same spell order gives a match regardless of spell length).
|
|
13
|
+
* - expcost > 0: duration-aware; when two spells have the same state, we
|
|
14
|
+
* add a penalty proportional to |dur_A - dur_B|, similar to OMspell.
|
|
15
|
+
* Larger expcost makes "same state, different length" more distant.
|
|
16
|
+
*
|
|
17
|
+
* Usage (Python):
|
|
18
|
+
* from sequenzo import load_dataset, SequenceData, get_distance_matrix
|
|
19
|
+
*
|
|
20
|
+
* seqdata = SequenceData(df, time=time_list, id_col="country",
|
|
21
|
+
* states=states, labels=states)
|
|
22
|
+
*
|
|
23
|
+
* # State-only: ignore duration (expcost=0)
|
|
24
|
+
* d = get_distance_matrix(seqdata, method="LCPspell", norm="gmean", expcost=0)
|
|
25
|
+
*
|
|
26
|
+
* # Duration-aware: same state but different length adds distance (like OMspell)
|
|
27
|
+
* d2 = get_distance_matrix(seqdata, method="LCPspell", norm="gmean", expcost=0.5)
|
|
28
|
+
*
|
|
29
|
+
* # Reverse: compare from the last spell (RLCPspell)
|
|
30
|
+
* d3 = get_distance_matrix(seqdata, method="RLCPspell", norm="gmean", expcost=0.5)
|
|
31
|
+
*
|
|
32
|
+
* @Author : Yuqi Liang 梁彧祺
|
|
33
|
+
* @File : LCPspellDistance.cpp
|
|
34
|
+
* @Time : 2026/1/29 22:42
|
|
35
|
+
* @Desc : Spell-based Longest Common Prefix distance.
|
|
36
|
+
*/
|
|
37
|
+
|
|
38
|
+
#include <pybind11/pybind11.h>
|
|
39
|
+
#include <pybind11/numpy.h>
|
|
40
|
+
#include <cmath>
|
|
41
|
+
#include <iostream>
|
|
42
|
+
#include "utils.h"
|
|
43
|
+
#include "dp_utils.h"
|
|
44
|
+
|
|
45
|
+
namespace py = pybind11;
|
|
46
|
+
|
|
47
|
+
class LCPspellDistance {
|
|
48
|
+
public:
|
|
49
|
+
/*
|
|
50
|
+
* Constructor.
|
|
51
|
+
* - sequences: spell states, shape (nseq, max_spells); row i holds the
|
|
52
|
+
* state of each spell for sequence i (only positions 0..seqlength(i)-1
|
|
53
|
+
* are valid).
|
|
54
|
+
* - seqdur: spell durations, shape (nseq, max_spells).
|
|
55
|
+
* - seqlength: number of spells per sequence, shape (nseq,).
|
|
56
|
+
* - norm: normalization index (see utils.h).
|
|
57
|
+
* - sign: 1 = forward LCPspell (compare from first spell), -1 = reverse
|
|
58
|
+
* RLCPspell (compare from last spell).
|
|
59
|
+
* - refseqS: reference sequence indices [rseq1, rseq2).
|
|
60
|
+
* - timecost: expcost from Python. 0 = ignore duration (state-only match);
|
|
61
|
+
* >0 = add penalty proportional to |dur_A - dur_B| on matched spells (like OMspell).
|
|
62
|
+
*/
|
|
63
|
+
LCPspellDistance(py::array_t<int> sequences,
|
|
64
|
+
py::array_t<double> seqdur,
|
|
65
|
+
py::array_t<int> seqlength,
|
|
66
|
+
int norm,
|
|
67
|
+
int sign,
|
|
68
|
+
py::array_t<int> refseqS,
|
|
69
|
+
double timecost)
|
|
70
|
+
: norm(norm), sign(sign), timecost(timecost) {
|
|
71
|
+
py::print("[>] Starting (Reverse) Longest Common Prefix on spells (LCPspell/RLCPspell)...");
|
|
72
|
+
std::cout << std::flush;
|
|
73
|
+
|
|
74
|
+
try {
|
|
75
|
+
this->sequences = sequences;
|
|
76
|
+
this->seqdur = seqdur;
|
|
77
|
+
this->seqlength = seqlength;
|
|
78
|
+
|
|
79
|
+
auto seq_shape = sequences.shape();
|
|
80
|
+
nseq = static_cast<int>(seq_shape[0]);
|
|
81
|
+
max_spells = static_cast<int>(seq_shape[1]);
|
|
82
|
+
|
|
83
|
+
dist_matrix = py::array_t<double>({nseq, nseq});
|
|
84
|
+
|
|
85
|
+
// Compute maximum duration over all valid spell positions (used for
|
|
86
|
+
// normalizing the duration-penalty term when timecost > 0).
|
|
87
|
+
max_dur = 0.0;
|
|
88
|
+
auto ptr_dur = seqdur.unchecked<2>();
|
|
89
|
+
auto ptr_len = seqlength.unchecked<1>();
|
|
90
|
+
for (int i = 0; i < nseq; i++) {
|
|
91
|
+
int len_i = ptr_len(i);
|
|
92
|
+
for (int k = 0; k < len_i && k < max_spells; k++) {
|
|
93
|
+
double d = ptr_dur(i, k);
|
|
94
|
+
if (d > max_dur) max_dur = d;
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
// Reference sequence range (same convention as LCPdistance / OMspell).
|
|
99
|
+
nans = nseq;
|
|
100
|
+
rseq1 = refseqS.at(0);
|
|
101
|
+
rseq2 = refseqS.at(1);
|
|
102
|
+
if (rseq1 < rseq2) {
|
|
103
|
+
nseq = rseq1;
|
|
104
|
+
nans = nseq * (rseq2 - rseq1);
|
|
105
|
+
} else {
|
|
106
|
+
rseq1 = rseq1 - 1;
|
|
107
|
+
}
|
|
108
|
+
refdist_matrix = py::array_t<double>({nseq, (rseq2 - rseq1)});
|
|
109
|
+
} catch (const std::exception& e) {
|
|
110
|
+
py::print("Error in LCPspellDistance constructor: ", e.what());
|
|
111
|
+
throw;
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
/*
|
|
116
|
+
* Compute spell-based LCP distance between sequence is and sequence js.
|
|
117
|
+
* - Forward (sign > 0): compare spell 0 with spell 0, spell 1 with 1, ...
|
|
118
|
+
* and count how many consecutive spells have the same state.
|
|
119
|
+
* - Reverse (sign < 0): compare last spell with last spell, then
|
|
120
|
+
* second-to-last with second-to-last, ...
|
|
121
|
+
* For each matched spell we add timecost * |dur_A - dur_B| to the
|
|
122
|
+
* distance (duration penalty). When timecost == 0, only state equality
|
|
123
|
+
* matters.
|
|
124
|
+
*/
|
|
125
|
+
double compute_distance(int is, int js) {
|
|
126
|
+
try {
|
|
127
|
+
auto ptr_seq = sequences.unchecked<2>();
|
|
128
|
+
auto ptr_dur = seqdur.unchecked<2>();
|
|
129
|
+
auto ptr_len = seqlength.unchecked<1>();
|
|
130
|
+
|
|
131
|
+
int n = ptr_len(is);
|
|
132
|
+
int m = ptr_len(js);
|
|
133
|
+
int min_nm = (n < m) ? n : m;
|
|
134
|
+
|
|
135
|
+
if (min_nm == 0) {
|
|
136
|
+
double raw = static_cast<double>(n + m);
|
|
137
|
+
double maxdist = raw;
|
|
138
|
+
double d = normalize_distance(raw, maxdist, static_cast<double>(n), static_cast<double>(m), norm);
|
|
139
|
+
return (d < 0.0) ? 0.0 : (d > 1.0 ? 1.0 : d);
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
int L = 0; // length of spell-based common prefix
|
|
143
|
+
double duration_penalty = 0.0;
|
|
144
|
+
|
|
145
|
+
if (sign > 0) {
|
|
146
|
+
// Forward: compare first spell with first spell, second with second, ...
|
|
147
|
+
while (L < min_nm && ptr_seq(is, L) == ptr_seq(js, L)) {
|
|
148
|
+
duration_penalty += std::fabs(ptr_dur(is, L) - ptr_dur(js, L));
|
|
149
|
+
L++;
|
|
150
|
+
}
|
|
151
|
+
} else {
|
|
152
|
+
// Reverse: compare last spell with last spell, then second-to-last, ...
|
|
153
|
+
while (L < min_nm && ptr_seq(is, n - 1 - L) == ptr_seq(js, m - 1 - L)) {
|
|
154
|
+
duration_penalty += std::fabs(ptr_dur(is, n - 1 - L) - ptr_dur(js, m - 1 - L));
|
|
155
|
+
L++;
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
double raw = (n + m - 2.0 * L) + timecost * duration_penalty;
|
|
160
|
+
// Use same maxdist as position-wise LCP (n+m) so normalization is comparable;
|
|
161
|
+
// raw can exceed maxdist when timecost > 0, so we clamp the result to [0, 1].
|
|
162
|
+
double maxdist = static_cast<double>(n + m);
|
|
163
|
+
double d = normalize_distance(raw, maxdist, static_cast<double>(n), static_cast<double>(m), norm);
|
|
164
|
+
return (d < 0.0) ? 0.0 : (d > 1.0 ? 1.0 : d);
|
|
165
|
+
} catch (const std::exception& e) {
|
|
166
|
+
py::print("Error in LCPspellDistance::compute_distance: ", e.what());
|
|
167
|
+
throw;
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
py::array_t<double> compute_all_distances() {
|
|
172
|
+
try {
|
|
173
|
+
return dp_utils::compute_all_distances_simple(
|
|
174
|
+
nseq,
|
|
175
|
+
dist_matrix,
|
|
176
|
+
[this](int i, int j) { return this->compute_distance(i, j); }
|
|
177
|
+
);
|
|
178
|
+
} catch (const std::exception& e) {
|
|
179
|
+
py::print("Error in LCPspellDistance::compute_all_distances: ", e.what());
|
|
180
|
+
throw;
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
py::array_t<double> compute_refseq_distances() {
|
|
185
|
+
try {
|
|
186
|
+
return dp_utils::compute_refseq_distances_simple(
|
|
187
|
+
nseq,
|
|
188
|
+
rseq1,
|
|
189
|
+
rseq2,
|
|
190
|
+
refdist_matrix,
|
|
191
|
+
[this](int is, int rseq) { return this->compute_distance(is, rseq); }
|
|
192
|
+
);
|
|
193
|
+
} catch (const std::exception& e) {
|
|
194
|
+
py::print("Error in LCPspellDistance::compute_refseq_distances: ", e.what());
|
|
195
|
+
throw;
|
|
196
|
+
}
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
private:
|
|
200
|
+
py::array_t<int> sequences;
|
|
201
|
+
py::array_t<double> seqdur;
|
|
202
|
+
py::array_t<int> seqlength;
|
|
203
|
+
int norm;
|
|
204
|
+
int sign;
|
|
205
|
+
double timecost;
|
|
206
|
+
int nseq;
|
|
207
|
+
int max_spells;
|
|
208
|
+
double max_dur;
|
|
209
|
+
py::array_t<double> dist_matrix;
|
|
210
|
+
|
|
211
|
+
int nans;
|
|
212
|
+
int rseq1;
|
|
213
|
+
int rseq2;
|
|
214
|
+
py::array_t<double> refdist_matrix;
|
|
215
|
+
};
|
|
@@ -0,0 +1,247 @@
|
|
|
1
|
+
#include <pybind11/pybind11.h>
|
|
2
|
+
#include <pybind11/numpy.h>
|
|
3
|
+
#include <xsimd/xsimd.hpp>
|
|
4
|
+
#include <vector>
|
|
5
|
+
#include <cmath>
|
|
6
|
+
#include <iostream>
|
|
7
|
+
#include "utils.h"
|
|
8
|
+
#include "dp_utils.h"
|
|
9
|
+
#ifdef _OPENMP
|
|
10
|
+
#include <omp.h>
|
|
11
|
+
#endif
|
|
12
|
+
|
|
13
|
+
namespace py = pybind11;
|
|
14
|
+
|
|
15
|
+
class OMdistance {
|
|
16
|
+
public:
|
|
17
|
+
OMdistance(py::array_t<int> sequences, py::array_t<double> sm, double indel, int norm, py::array_t<int> seqlength,py::array_t<int> refseqS)
|
|
18
|
+
: indel(indel), norm(norm) {
|
|
19
|
+
|
|
20
|
+
py::print("[>] Starting Optimal Matching(OM)...");
|
|
21
|
+
std::cout << std::flush;
|
|
22
|
+
|
|
23
|
+
try {
|
|
24
|
+
// =========================
|
|
25
|
+
// parameter : sequences, sm
|
|
26
|
+
// =========================
|
|
27
|
+
this->sequences = sequences;
|
|
28
|
+
this->sm = sm;
|
|
29
|
+
this->seqlength = seqlength;
|
|
30
|
+
|
|
31
|
+
auto seq_shape = sequences.shape();
|
|
32
|
+
nseq = seq_shape[0];
|
|
33
|
+
seqlen = seq_shape[1];
|
|
34
|
+
alphasize = sm.shape()[0];
|
|
35
|
+
|
|
36
|
+
dist_matrix = py::array_t<double>({nseq, nseq});
|
|
37
|
+
|
|
38
|
+
fmatsize = seqlen + 1;
|
|
39
|
+
|
|
40
|
+
// ==================
|
|
41
|
+
// initialize maxcost
|
|
42
|
+
// ==================
|
|
43
|
+
if(norm == 4){
|
|
44
|
+
maxscost = 2 * indel;
|
|
45
|
+
}else{
|
|
46
|
+
auto ptr = sm.mutable_unchecked<2>();
|
|
47
|
+
for(int i = 0; i < alphasize; i++){
|
|
48
|
+
for(int j = i+1; j < alphasize; j++){
|
|
49
|
+
if(ptr(i, j) > maxscost){
|
|
50
|
+
maxscost = ptr(i, j);
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
maxscost = std::min(maxscost, 2 * indel);
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
// about reference sequences :
|
|
58
|
+
nans = nseq;
|
|
59
|
+
|
|
60
|
+
rseq1 = refseqS.at(0);
|
|
61
|
+
rseq2 = refseqS.at(1);
|
|
62
|
+
if(rseq1 < rseq2){
|
|
63
|
+
nseq = rseq1;
|
|
64
|
+
nans = nseq * (rseq2 - rseq1);
|
|
65
|
+
}else{
|
|
66
|
+
rseq1 = rseq1 - 1;
|
|
67
|
+
}
|
|
68
|
+
refdist_matrix = py::array_t<double>({nseq, (rseq2-rseq1)});
|
|
69
|
+
} catch (const std::exception& e) {
|
|
70
|
+
py::print("Error in constructor: ", e.what());
|
|
71
|
+
throw;
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
double compute_distance(int is, int js, double* prev, double* curr) {
|
|
76
|
+
try {
|
|
77
|
+
auto ptr_len = seqlength.unchecked<1>();
|
|
78
|
+
int m_full = ptr_len(is);
|
|
79
|
+
int n_full = ptr_len(js);
|
|
80
|
+
int mSuf = m_full + 1, nSuf = n_full + 1;
|
|
81
|
+
int prefix = 0;
|
|
82
|
+
|
|
83
|
+
auto ptr_seq = sequences.unchecked<2>();
|
|
84
|
+
auto ptr_sm = sm.unchecked<2>();
|
|
85
|
+
|
|
86
|
+
// Skipping common prefix
|
|
87
|
+
int ii = 1, jj = 1;
|
|
88
|
+
while (ii < mSuf && jj < nSuf && ptr_seq(is, ii-1) == ptr_seq(js, jj-1)) {
|
|
89
|
+
ii++; jj++; prefix++;
|
|
90
|
+
}
|
|
91
|
+
// Skipping common suffix
|
|
92
|
+
while (mSuf > ii && nSuf > jj && ptr_seq(is, mSuf - 2) == ptr_seq(js, nSuf - 2)) {
|
|
93
|
+
mSuf--; nSuf--;
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
int m = mSuf - prefix;
|
|
97
|
+
int n = nSuf - prefix;
|
|
98
|
+
|
|
99
|
+
// 预处理
|
|
100
|
+
if (m == 0 && n == 0)
|
|
101
|
+
return normalize_distance(0.0, 0.0, 0.0, 0.0, norm);
|
|
102
|
+
if (m == 0) {
|
|
103
|
+
double cost = double(n) * indel;
|
|
104
|
+
double maxpossiblecost = std::abs(n - m) * indel + maxscost * std::min(m, n);
|
|
105
|
+
return normalize_distance(cost, maxpossiblecost, 0.0, double(n) * indel, norm);
|
|
106
|
+
}
|
|
107
|
+
if (n == 0) {
|
|
108
|
+
double cost = double(m) * indel;
|
|
109
|
+
double maxpossiblecost = std::abs(n - m) * indel + maxscost * std::min(m, n);
|
|
110
|
+
return normalize_distance(cost, maxpossiblecost, double(m) * indel, 0.0, norm);
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
using batch_t = xsimd::batch<double>;
|
|
114
|
+
constexpr std::size_t B = batch_t::size;
|
|
115
|
+
|
|
116
|
+
#pragma omp simd
|
|
117
|
+
for (int x = 0; x < n; ++x) prev[x] = double(x) * indel;
|
|
118
|
+
|
|
119
|
+
for (int i = prefix + 1; i < mSuf; ++i) {
|
|
120
|
+
curr[0] = indel * double(i - prefix);
|
|
121
|
+
int ai = ptr_seq(is, i - 1);
|
|
122
|
+
|
|
123
|
+
int j = prefix + 1;
|
|
124
|
+
for (; j + (int)B <= nSuf; j += (int)B) {
|
|
125
|
+
// load prev[j .. j+B-1], prev[j-1 .. j+B-2]
|
|
126
|
+
const double* prev_ptr = prev + (j - prefix);
|
|
127
|
+
const double* prevm1_ptr = prev + (j - 1 - prefix);
|
|
128
|
+
|
|
129
|
+
batch_t prevj = batch_t::load_unaligned(prev_ptr);
|
|
130
|
+
batch_t prevjm1 = batch_t::load_unaligned(prevm1_ptr);
|
|
131
|
+
|
|
132
|
+
// substitution costs
|
|
133
|
+
alignas(64) double subs[B];
|
|
134
|
+
for (std::size_t b = 0; b < B; ++b) {
|
|
135
|
+
int jj_idx = j + int(b);
|
|
136
|
+
int bj = ptr_seq(js, jj_idx - 1);
|
|
137
|
+
subs[b] = (ai == bj) ? 0.0 : ptr_sm(ai, bj);
|
|
138
|
+
}
|
|
139
|
+
batch_t sub_batch = batch_t::load_unaligned(subs);
|
|
140
|
+
|
|
141
|
+
// Vectorize independent candidates: del and sub
|
|
142
|
+
batch_t cand_del = prevj + batch_t(indel);
|
|
143
|
+
batch_t cand_sub = prevjm1 + sub_batch;
|
|
144
|
+
batch_t vert = xsimd::min(cand_del, cand_sub);
|
|
145
|
+
|
|
146
|
+
// Sequential propagation for insert dependencies (low overhead)
|
|
147
|
+
double running_ins = curr[j - prefix - 1] + indel;
|
|
148
|
+
for (std::size_t b = 0; b < B; ++b) {
|
|
149
|
+
double v = vert.get(b);
|
|
150
|
+
double c = std::min(v, running_ins);
|
|
151
|
+
curr[j + int(b) - prefix] = c;
|
|
152
|
+
running_ins = c + indel;
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
// 补足尾部
|
|
157
|
+
for (; j < nSuf; ++j) {
|
|
158
|
+
int bj = ptr_seq(js, j-1);
|
|
159
|
+
double subcost = (ai == bj) ? 0.0 : ptr_sm(ai, bj);
|
|
160
|
+
double delcost = prev[j - prefix] + indel;
|
|
161
|
+
double inscost = curr[j - 1 - prefix] + indel;
|
|
162
|
+
double subval = prev[j - 1 - prefix] + subcost;
|
|
163
|
+
curr[j - prefix] = std::min({ delcost, inscost, subval });
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
std::swap(prev, curr);
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
double final_cost = prev[nSuf - 1 - prefix];
|
|
170
|
+
double maxpossiblecost = std::abs(n - m) * indel + maxscost * std::min(m, n);
|
|
171
|
+
double ml = double(m) * indel;
|
|
172
|
+
double nl = double(n) * indel;
|
|
173
|
+
return normalize_distance(final_cost, maxpossiblecost, ml, nl, norm);
|
|
174
|
+
|
|
175
|
+
} catch (const std::exception& e) {
|
|
176
|
+
py::print("Error in SIMD-batch compute_distance: ", e.what());
|
|
177
|
+
throw;
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
py::array_t<double> compute_all_distances() {
|
|
183
|
+
try {
|
|
184
|
+
return dp_utils::compute_all_distances(
|
|
185
|
+
nseq,
|
|
186
|
+
fmatsize,
|
|
187
|
+
dist_matrix,
|
|
188
|
+
[this](int i, int j, double* prev, double* curr) {
|
|
189
|
+
return this->compute_distance(i, j, prev, curr);
|
|
190
|
+
}
|
|
191
|
+
);
|
|
192
|
+
} catch (const std::exception& e) {
|
|
193
|
+
py::print("Error in compute_all_distances: ", e.what());
|
|
194
|
+
throw;
|
|
195
|
+
}
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
py::array_t<double> compute_refseq_distances() {
|
|
199
|
+
try {
|
|
200
|
+
auto buffer = refdist_matrix.mutable_unchecked<2>();
|
|
201
|
+
|
|
202
|
+
#pragma omp parallel
|
|
203
|
+
{
|
|
204
|
+
double* prev = dp_utils::aligned_alloc_double(static_cast<size_t>(fmatsize));
|
|
205
|
+
double* curr = dp_utils::aligned_alloc_double(static_cast<size_t>(fmatsize));
|
|
206
|
+
|
|
207
|
+
#pragma omp for schedule(static)
|
|
208
|
+
for (int rseq = rseq1; rseq < rseq2; rseq ++) {
|
|
209
|
+
for (int is = 0; is < nseq; is ++) {
|
|
210
|
+
double cmpres = 0;
|
|
211
|
+
if(is != rseq){
|
|
212
|
+
cmpres = compute_distance(is, rseq, prev, curr);
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
buffer(is, rseq - rseq1) = cmpres;
|
|
216
|
+
}
|
|
217
|
+
}
|
|
218
|
+
dp_utils::aligned_free_double(prev);
|
|
219
|
+
dp_utils::aligned_free_double(curr);
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
return refdist_matrix;
|
|
223
|
+
} catch (const std::exception& e) {
|
|
224
|
+
py::print("Error in compute_all_distances: ", e.what());
|
|
225
|
+
throw;
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
private:
|
|
230
|
+
py::array_t<int> sequences;
|
|
231
|
+
py::array_t<double> sm;
|
|
232
|
+
double indel;
|
|
233
|
+
int norm;
|
|
234
|
+
int nseq;
|
|
235
|
+
int seqlen;
|
|
236
|
+
int alphasize;
|
|
237
|
+
int fmatsize;
|
|
238
|
+
py::array_t<int> seqlength;
|
|
239
|
+
py::array_t<double> dist_matrix;
|
|
240
|
+
double maxscost;
|
|
241
|
+
|
|
242
|
+
// about reference sequences :
|
|
243
|
+
int nans = -1;
|
|
244
|
+
int rseq1 = -1;
|
|
245
|
+
int rseq2 = -1;
|
|
246
|
+
py::array_t<double> refdist_matrix;
|
|
247
|
+
};
|