sequenzo 0.1.21__cp310-cp310-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of sequenzo might be problematic. Click here for more details.
- sequenzo/__init__.py +240 -0
- sequenzo/big_data/__init__.py +12 -0
- sequenzo/big_data/clara/__init__.py +26 -0
- sequenzo/big_data/clara/clara.py +467 -0
- sequenzo/big_data/clara/utils/__init__.py +27 -0
- sequenzo/big_data/clara/utils/aggregatecases.py +92 -0
- sequenzo/big_data/clara/utils/davies_bouldin.py +91 -0
- sequenzo/big_data/clara/utils/get_weighted_diss.cpython-310-darwin.so +0 -0
- sequenzo/big_data/clara/utils/wfcmdd.py +205 -0
- sequenzo/big_data/clara/visualization.py +88 -0
- sequenzo/clustering/KMedoids.py +196 -0
- sequenzo/clustering/__init__.py +30 -0
- sequenzo/clustering/clustering_c_code.cpython-310-darwin.so +0 -0
- sequenzo/clustering/hierarchical_clustering.py +1380 -0
- sequenzo/clustering/src/KMedoid.cpp +262 -0
- sequenzo/clustering/src/PAM.cpp +236 -0
- sequenzo/clustering/src/PAMonce.cpp +234 -0
- sequenzo/clustering/src/cluster_quality.cpp +496 -0
- sequenzo/clustering/src/cluster_quality.h +128 -0
- sequenzo/clustering/src/cluster_quality_backup.cpp +570 -0
- sequenzo/clustering/src/module.cpp +228 -0
- sequenzo/clustering/src/weightedinertia.cpp +111 -0
- sequenzo/clustering/utils/__init__.py +27 -0
- sequenzo/clustering/utils/disscenter.py +122 -0
- sequenzo/data_preprocessing/__init__.py +20 -0
- sequenzo/data_preprocessing/helpers.py +256 -0
- sequenzo/datasets/__init__.py +41 -0
- sequenzo/datasets/biofam.csv +2001 -0
- sequenzo/datasets/biofam_child_domain.csv +2001 -0
- sequenzo/datasets/biofam_left_domain.csv +2001 -0
- sequenzo/datasets/biofam_married_domain.csv +2001 -0
- sequenzo/datasets/chinese_colonial_territories.csv +12 -0
- sequenzo/datasets/country_co2_emissions.csv +194 -0
- sequenzo/datasets/country_co2_emissions_global_deciles.csv +195 -0
- sequenzo/datasets/country_co2_emissions_global_quintiles.csv +195 -0
- sequenzo/datasets/country_co2_emissions_local_deciles.csv +195 -0
- sequenzo/datasets/country_co2_emissions_local_quintiles.csv +195 -0
- sequenzo/datasets/country_gdp_per_capita.csv +194 -0
- sequenzo/datasets/mvad.csv +713 -0
- sequenzo/datasets/pairfam_family.csv +1867 -0
- sequenzo/datasets/polyadic_samplec1.csv +61 -0
- sequenzo/datasets/polyadic_samplep1.csv +61 -0
- sequenzo/datasets/polyadic_seqc1.csv +61 -0
- sequenzo/datasets/polyadic_seqp1.csv +61 -0
- sequenzo/define_sequence_data.py +609 -0
- sequenzo/dissimilarity_measures/__init__.py +31 -0
- sequenzo/dissimilarity_measures/c_code.cpython-310-darwin.so +0 -0
- sequenzo/dissimilarity_measures/get_distance_matrix.py +702 -0
- sequenzo/dissimilarity_measures/get_substitution_cost_matrix.py +241 -0
- sequenzo/dissimilarity_measures/src/DHDdistance.cpp +148 -0
- sequenzo/dissimilarity_measures/src/LCPdistance.cpp +114 -0
- sequenzo/dissimilarity_measures/src/OMdistance.cpp +247 -0
- sequenzo/dissimilarity_measures/src/OMspellDistance.cpp +281 -0
- sequenzo/dissimilarity_measures/src/__init__.py +0 -0
- sequenzo/dissimilarity_measures/src/dist2matrix.cpp +63 -0
- sequenzo/dissimilarity_measures/src/dp_utils.h +160 -0
- sequenzo/dissimilarity_measures/src/module.cpp +34 -0
- sequenzo/dissimilarity_measures/src/setup.py +30 -0
- sequenzo/dissimilarity_measures/src/utils.h +25 -0
- sequenzo/dissimilarity_measures/src/xsimd/.github/cmake-test/main.cpp +6 -0
- sequenzo/dissimilarity_measures/src/xsimd/benchmark/main.cpp +159 -0
- sequenzo/dissimilarity_measures/src/xsimd/benchmark/xsimd_benchmark.hpp +565 -0
- sequenzo/dissimilarity_measures/src/xsimd/docs/source/conf.py +37 -0
- sequenzo/dissimilarity_measures/src/xsimd/examples/mandelbrot.cpp +330 -0
- sequenzo/dissimilarity_measures/src/xsimd/examples/pico_bench.hpp +246 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_arithmetic.hpp +266 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_complex.hpp +112 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_details.hpp +323 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_logical.hpp +218 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_math.hpp +2583 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_memory.hpp +880 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_rounding.hpp +72 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_swizzle.hpp +174 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_trigo.hpp +978 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx.hpp +1924 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx2.hpp +1144 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512bw.hpp +656 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512cd.hpp +28 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512dq.hpp +244 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512er.hpp +20 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512f.hpp +2650 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512ifma.hpp +20 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512pf.hpp +20 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vbmi.hpp +77 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vbmi2.hpp +131 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vnni_avx512bw.hpp +20 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vnni_avx512vbmi2.hpp +20 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avxvnni.hpp +20 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_common.hpp +24 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_common_fwd.hpp +77 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_constants.hpp +393 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_emulated.hpp +788 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma3_avx.hpp +93 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma3_avx2.hpp +46 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma3_sse.hpp +97 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma4.hpp +92 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_i8mm_neon64.hpp +17 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_isa.hpp +142 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_neon.hpp +3142 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_neon64.hpp +1543 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_rvv.hpp +1513 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_scalar.hpp +1260 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse2.hpp +2024 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse3.hpp +67 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse4_1.hpp +339 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse4_2.hpp +44 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_ssse3.hpp +186 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sve.hpp +1155 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_vsx.hpp +892 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_wasm.hpp +1780 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_arch.hpp +240 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_config.hpp +484 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_cpuid.hpp +269 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_inline.hpp +27 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/math/xsimd_rem_pio2.hpp +719 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/memory/xsimd_aligned_allocator.hpp +349 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/memory/xsimd_alignment.hpp +91 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_all_registers.hpp +55 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_api.hpp +2765 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx2_register.hpp +44 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512bw_register.hpp +51 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512cd_register.hpp +51 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512dq_register.hpp +51 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512er_register.hpp +51 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512f_register.hpp +77 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512ifma_register.hpp +51 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512pf_register.hpp +51 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512vbmi2_register.hpp +51 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512vbmi_register.hpp +51 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512vnni_avx512bw_register.hpp +54 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512vnni_avx512vbmi2_register.hpp +53 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx_register.hpp +64 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avxvnni_register.hpp +44 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_batch.hpp +1524 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_batch_constant.hpp +300 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_common_arch.hpp +47 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_emulated_register.hpp +80 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_fma3_avx2_register.hpp +50 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_fma3_avx_register.hpp +50 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_fma3_sse_register.hpp +50 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_fma4_register.hpp +50 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_i8mm_neon64_register.hpp +55 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_neon64_register.hpp +55 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_neon_register.hpp +154 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_register.hpp +94 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_rvv_register.hpp +506 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sse2_register.hpp +59 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sse3_register.hpp +49 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sse4_1_register.hpp +48 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sse4_2_register.hpp +48 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_ssse3_register.hpp +48 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sve_register.hpp +156 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_traits.hpp +337 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_utils.hpp +536 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_vsx_register.hpp +77 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_wasm_register.hpp +59 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/xsimd.hpp +75 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/architectures/dummy.cpp +7 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set.cpp +13 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set_mean.cpp +24 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set_mean_aligned.cpp +25 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set_mean_arch_independent.cpp +28 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set_mean_tag_dispatch.cpp +25 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/doc/manipulating_abstract_batches.cpp +7 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/doc/manipulating_parametric_batches.cpp +8 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/doc/sum.hpp +31 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/doc/sum_avx2.cpp +3 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/doc/sum_sse2.cpp +3 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/doc/writing_vectorized_code.cpp +11 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/main.cpp +31 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_api.cpp +230 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_arch.cpp +217 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_basic_math.cpp +183 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_batch.cpp +1049 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_bool.cpp +508 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_cast.cpp +409 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_complex.cpp +712 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_constant.cpp +286 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_float.cpp +141 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_int.cpp +365 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_manip.cpp +308 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_bitwise_cast.cpp +222 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_complex_exponential.cpp +226 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_complex_hyperbolic.cpp +183 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_complex_power.cpp +265 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_complex_trigonometric.cpp +236 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_conversion.cpp +248 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_custom_default_arch.cpp +28 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_error_gamma.cpp +170 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_explicit_batch_instantiation.cpp +32 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_exponential.cpp +202 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_extract_pair.cpp +92 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_fp_manipulation.cpp +77 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_gnu_source.cpp +30 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_hyperbolic.cpp +167 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_load_store.cpp +304 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_memory.cpp +61 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_poly_evaluation.cpp +64 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_power.cpp +184 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_rounding.cpp +199 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_select.cpp +101 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_shuffle.cpp +760 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_sum.cpp +4 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_sum.hpp +34 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_traits.cpp +172 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_trigonometric.cpp +208 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_utils.hpp +611 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_wasm/test_wasm_playwright.py +123 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_xsimd_api.cpp +1460 -0
- sequenzo/dissimilarity_measures/utils/__init__.py +16 -0
- sequenzo/dissimilarity_measures/utils/get_LCP_length_for_2_seq.py +44 -0
- sequenzo/dissimilarity_measures/utils/get_sm_trate_substitution_cost_matrix.cpython-310-darwin.so +0 -0
- sequenzo/dissimilarity_measures/utils/seqconc.cpython-310-darwin.so +0 -0
- sequenzo/dissimilarity_measures/utils/seqdss.cpython-310-darwin.so +0 -0
- sequenzo/dissimilarity_measures/utils/seqdur.cpython-310-darwin.so +0 -0
- sequenzo/dissimilarity_measures/utils/seqlength.cpython-310-darwin.so +0 -0
- sequenzo/multidomain/__init__.py +23 -0
- sequenzo/multidomain/association_between_domains.py +311 -0
- sequenzo/multidomain/cat.py +431 -0
- sequenzo/multidomain/combt.py +519 -0
- sequenzo/multidomain/dat.py +89 -0
- sequenzo/multidomain/idcd.py +139 -0
- sequenzo/multidomain/linked_polyad.py +292 -0
- sequenzo/openmp_setup.py +233 -0
- sequenzo/prefix_tree/__init__.py +43 -0
- sequenzo/prefix_tree/individual_level_indicators.py +1274 -0
- sequenzo/prefix_tree/system_level_indicators.py +465 -0
- sequenzo/prefix_tree/utils.py +54 -0
- sequenzo/sequence_characteristics/__init__.py +40 -0
- sequenzo/sequence_characteristics/complexity_index.py +49 -0
- sequenzo/sequence_characteristics/overall_cross_sectional_entropy.py +220 -0
- sequenzo/sequence_characteristics/plot_characteristics.py +593 -0
- sequenzo/sequence_characteristics/simple_characteristics.py +311 -0
- sequenzo/sequence_characteristics/state_frequencies_and_entropy_per_sequence.py +39 -0
- sequenzo/sequence_characteristics/turbulence.py +155 -0
- sequenzo/sequence_characteristics/variance_of_spell_durations.py +86 -0
- sequenzo/sequence_characteristics/within_sequence_entropy.py +43 -0
- sequenzo/suffix_tree/__init__.py +48 -0
- sequenzo/suffix_tree/individual_level_indicators.py +1638 -0
- sequenzo/suffix_tree/system_level_indicators.py +456 -0
- sequenzo/suffix_tree/utils.py +56 -0
- sequenzo/visualization/__init__.py +29 -0
- sequenzo/visualization/plot_mean_time.py +194 -0
- sequenzo/visualization/plot_modal_state.py +276 -0
- sequenzo/visualization/plot_most_frequent_sequences.py +147 -0
- sequenzo/visualization/plot_relative_frequency.py +404 -0
- sequenzo/visualization/plot_sequence_index.py +937 -0
- sequenzo/visualization/plot_single_medoid.py +153 -0
- sequenzo/visualization/plot_state_distribution.py +613 -0
- sequenzo/visualization/plot_transition_matrix.py +190 -0
- sequenzo/visualization/utils/__init__.py +23 -0
- sequenzo/visualization/utils/utils.py +310 -0
- sequenzo/with_event_history_analysis/__init__.py +35 -0
- sequenzo/with_event_history_analysis/sequence_analysis_multi_state_model.py +850 -0
- sequenzo/with_event_history_analysis/sequence_history_analysis.py +283 -0
- sequenzo-0.1.21.dist-info/METADATA +308 -0
- sequenzo-0.1.21.dist-info/RECORD +254 -0
- sequenzo-0.1.21.dist-info/WHEEL +5 -0
- sequenzo-0.1.21.dist-info/licenses/LICENSE +28 -0
- sequenzo-0.1.21.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,247 @@
|
|
|
1
|
+
#include <pybind11/pybind11.h>
|
|
2
|
+
#include <pybind11/numpy.h>
|
|
3
|
+
#include <xsimd/xsimd.hpp>
|
|
4
|
+
#include <vector>
|
|
5
|
+
#include <cmath>
|
|
6
|
+
#include <iostream>
|
|
7
|
+
#include "utils.h"
|
|
8
|
+
#include "dp_utils.h"
|
|
9
|
+
#ifdef _OPENMP
|
|
10
|
+
#include <omp.h>
|
|
11
|
+
#endif
|
|
12
|
+
|
|
13
|
+
namespace py = pybind11;
|
|
14
|
+
|
|
15
|
+
class OMdistance {
|
|
16
|
+
public:
|
|
17
|
+
OMdistance(py::array_t<int> sequences, py::array_t<double> sm, double indel, int norm, py::array_t<int> seqlength,py::array_t<int> refseqS)
|
|
18
|
+
: indel(indel), norm(norm) {
|
|
19
|
+
|
|
20
|
+
py::print("[>] Starting Optimal Matching(OM)...");
|
|
21
|
+
std::cout << std::flush;
|
|
22
|
+
|
|
23
|
+
try {
|
|
24
|
+
// =========================
|
|
25
|
+
// parameter : sequences, sm
|
|
26
|
+
// =========================
|
|
27
|
+
this->sequences = sequences;
|
|
28
|
+
this->sm = sm;
|
|
29
|
+
this->seqlength = seqlength;
|
|
30
|
+
|
|
31
|
+
auto seq_shape = sequences.shape();
|
|
32
|
+
nseq = seq_shape[0];
|
|
33
|
+
seqlen = seq_shape[1];
|
|
34
|
+
alphasize = sm.shape()[0];
|
|
35
|
+
|
|
36
|
+
dist_matrix = py::array_t<double>({nseq, nseq});
|
|
37
|
+
|
|
38
|
+
fmatsize = seqlen + 1;
|
|
39
|
+
|
|
40
|
+
// ==================
|
|
41
|
+
// initialize maxcost
|
|
42
|
+
// ==================
|
|
43
|
+
if(norm == 4){
|
|
44
|
+
maxscost = 2 * indel;
|
|
45
|
+
}else{
|
|
46
|
+
auto ptr = sm.mutable_unchecked<2>();
|
|
47
|
+
for(int i = 0; i < alphasize; i++){
|
|
48
|
+
for(int j = i+1; j < alphasize; j++){
|
|
49
|
+
if(ptr(i, j) > maxscost){
|
|
50
|
+
maxscost = ptr(i, j);
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
maxscost = std::min(maxscost, 2 * indel);
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
// about reference sequences :
|
|
58
|
+
nans = nseq;
|
|
59
|
+
|
|
60
|
+
rseq1 = refseqS.at(0);
|
|
61
|
+
rseq2 = refseqS.at(1);
|
|
62
|
+
if(rseq1 < rseq2){
|
|
63
|
+
nseq = rseq1;
|
|
64
|
+
nans = nseq * (rseq2 - rseq1);
|
|
65
|
+
}else{
|
|
66
|
+
rseq1 = rseq1 - 1;
|
|
67
|
+
}
|
|
68
|
+
refdist_matrix = py::array_t<double>({nseq, (rseq2-rseq1)});
|
|
69
|
+
} catch (const std::exception& e) {
|
|
70
|
+
py::print("Error in constructor: ", e.what());
|
|
71
|
+
throw;
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
double compute_distance(int is, int js, double* prev, double* curr) {
|
|
76
|
+
try {
|
|
77
|
+
auto ptr_len = seqlength.unchecked<1>();
|
|
78
|
+
int m_full = ptr_len(is);
|
|
79
|
+
int n_full = ptr_len(js);
|
|
80
|
+
int mSuf = m_full + 1, nSuf = n_full + 1;
|
|
81
|
+
int prefix = 0;
|
|
82
|
+
|
|
83
|
+
auto ptr_seq = sequences.unchecked<2>();
|
|
84
|
+
auto ptr_sm = sm.unchecked<2>();
|
|
85
|
+
|
|
86
|
+
// Skipping common prefix
|
|
87
|
+
int ii = 1, jj = 1;
|
|
88
|
+
while (ii < mSuf && jj < nSuf && ptr_seq(is, ii-1) == ptr_seq(js, jj-1)) {
|
|
89
|
+
ii++; jj++; prefix++;
|
|
90
|
+
}
|
|
91
|
+
// Skipping common suffix
|
|
92
|
+
while (mSuf > ii && nSuf > jj && ptr_seq(is, mSuf - 2) == ptr_seq(js, nSuf - 2)) {
|
|
93
|
+
mSuf--; nSuf--;
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
int m = mSuf - prefix;
|
|
97
|
+
int n = nSuf - prefix;
|
|
98
|
+
|
|
99
|
+
// 预处理
|
|
100
|
+
if (m == 0 && n == 0)
|
|
101
|
+
return normalize_distance(0.0, 0.0, 0.0, 0.0, norm);
|
|
102
|
+
if (m == 0) {
|
|
103
|
+
double cost = double(n) * indel;
|
|
104
|
+
double maxpossiblecost = std::abs(n - m) * indel + maxscost * std::min(m, n);
|
|
105
|
+
return normalize_distance(cost, maxpossiblecost, 0.0, double(n) * indel, norm);
|
|
106
|
+
}
|
|
107
|
+
if (n == 0) {
|
|
108
|
+
double cost = double(m) * indel;
|
|
109
|
+
double maxpossiblecost = std::abs(n - m) * indel + maxscost * std::min(m, n);
|
|
110
|
+
return normalize_distance(cost, maxpossiblecost, double(m) * indel, 0.0, norm);
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
using batch_t = xsimd::batch<double>;
|
|
114
|
+
constexpr std::size_t B = batch_t::size;
|
|
115
|
+
|
|
116
|
+
#pragma omp simd
|
|
117
|
+
for (int x = 0; x < n; ++x) prev[x] = double(x) * indel;
|
|
118
|
+
|
|
119
|
+
for (int i = prefix + 1; i < mSuf; ++i) {
|
|
120
|
+
curr[0] = indel * double(i - prefix);
|
|
121
|
+
int ai = ptr_seq(is, i - 1);
|
|
122
|
+
|
|
123
|
+
int j = prefix + 1;
|
|
124
|
+
for (; j + (int)B <= nSuf; j += (int)B) {
|
|
125
|
+
// load prev[j .. j+B-1], prev[j-1 .. j+B-2]
|
|
126
|
+
const double* prev_ptr = prev + (j - prefix);
|
|
127
|
+
const double* prevm1_ptr = prev + (j - 1 - prefix);
|
|
128
|
+
|
|
129
|
+
batch_t prevj = batch_t::load_unaligned(prev_ptr);
|
|
130
|
+
batch_t prevjm1 = batch_t::load_unaligned(prevm1_ptr);
|
|
131
|
+
|
|
132
|
+
// substitution costs
|
|
133
|
+
alignas(64) double subs[B];
|
|
134
|
+
for (std::size_t b = 0; b < B; ++b) {
|
|
135
|
+
int jj_idx = j + int(b);
|
|
136
|
+
int bj = ptr_seq(js, jj_idx - 1);
|
|
137
|
+
subs[b] = (ai == bj) ? 0.0 : ptr_sm(ai, bj);
|
|
138
|
+
}
|
|
139
|
+
batch_t sub_batch = batch_t::load_unaligned(subs);
|
|
140
|
+
|
|
141
|
+
// Vectorize independent candidates: del and sub
|
|
142
|
+
batch_t cand_del = prevj + batch_t(indel);
|
|
143
|
+
batch_t cand_sub = prevjm1 + sub_batch;
|
|
144
|
+
batch_t vert = xsimd::min(cand_del, cand_sub);
|
|
145
|
+
|
|
146
|
+
// Sequential propagation for insert dependencies (low overhead)
|
|
147
|
+
double running_ins = curr[j - prefix - 1] + indel;
|
|
148
|
+
for (std::size_t b = 0; b < B; ++b) {
|
|
149
|
+
double v = vert.get(b);
|
|
150
|
+
double c = std::min(v, running_ins);
|
|
151
|
+
curr[j + int(b) - prefix] = c;
|
|
152
|
+
running_ins = c + indel;
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
// 补足尾部
|
|
157
|
+
for (; j < nSuf; ++j) {
|
|
158
|
+
int bj = ptr_seq(js, j-1);
|
|
159
|
+
double subcost = (ai == bj) ? 0.0 : ptr_sm(ai, bj);
|
|
160
|
+
double delcost = prev[j - prefix] + indel;
|
|
161
|
+
double inscost = curr[j - 1 - prefix] + indel;
|
|
162
|
+
double subval = prev[j - 1 - prefix] + subcost;
|
|
163
|
+
curr[j - prefix] = std::min({ delcost, inscost, subval });
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
std::swap(prev, curr);
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
double final_cost = prev[nSuf - 1 - prefix];
|
|
170
|
+
double maxpossiblecost = std::abs(n - m) * indel + maxscost * std::min(m, n);
|
|
171
|
+
double ml = double(m) * indel;
|
|
172
|
+
double nl = double(n) * indel;
|
|
173
|
+
return normalize_distance(final_cost, maxpossiblecost, ml, nl, norm);
|
|
174
|
+
|
|
175
|
+
} catch (const std::exception& e) {
|
|
176
|
+
py::print("Error in SIMD-batch compute_distance: ", e.what());
|
|
177
|
+
throw;
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
py::array_t<double> compute_all_distances() {
|
|
183
|
+
try {
|
|
184
|
+
return dp_utils::compute_all_distances(
|
|
185
|
+
nseq,
|
|
186
|
+
fmatsize,
|
|
187
|
+
dist_matrix,
|
|
188
|
+
[this](int i, int j, double* prev, double* curr) {
|
|
189
|
+
return this->compute_distance(i, j, prev, curr);
|
|
190
|
+
}
|
|
191
|
+
);
|
|
192
|
+
} catch (const std::exception& e) {
|
|
193
|
+
py::print("Error in compute_all_distances: ", e.what());
|
|
194
|
+
throw;
|
|
195
|
+
}
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
py::array_t<double> compute_refseq_distances() {
|
|
199
|
+
try {
|
|
200
|
+
auto buffer = refdist_matrix.mutable_unchecked<2>();
|
|
201
|
+
|
|
202
|
+
#pragma omp parallel
|
|
203
|
+
{
|
|
204
|
+
double* prev = dp_utils::aligned_alloc_double(static_cast<size_t>(fmatsize));
|
|
205
|
+
double* curr = dp_utils::aligned_alloc_double(static_cast<size_t>(fmatsize));
|
|
206
|
+
|
|
207
|
+
#pragma omp for schedule(static)
|
|
208
|
+
for (int rseq = rseq1; rseq < rseq2; rseq ++) {
|
|
209
|
+
for (int is = 0; is < nseq; is ++) {
|
|
210
|
+
double cmpres = 0;
|
|
211
|
+
if(is != rseq){
|
|
212
|
+
cmpres = compute_distance(is, rseq, prev, curr);
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
buffer(is, rseq - rseq1) = cmpres;
|
|
216
|
+
}
|
|
217
|
+
}
|
|
218
|
+
dp_utils::aligned_free_double(prev);
|
|
219
|
+
dp_utils::aligned_free_double(curr);
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
return refdist_matrix;
|
|
223
|
+
} catch (const std::exception& e) {
|
|
224
|
+
py::print("Error in compute_all_distances: ", e.what());
|
|
225
|
+
throw;
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
private:
|
|
230
|
+
py::array_t<int> sequences;
|
|
231
|
+
py::array_t<double> sm;
|
|
232
|
+
double indel;
|
|
233
|
+
int norm;
|
|
234
|
+
int nseq;
|
|
235
|
+
int seqlen;
|
|
236
|
+
int alphasize;
|
|
237
|
+
int fmatsize;
|
|
238
|
+
py::array_t<int> seqlength;
|
|
239
|
+
py::array_t<double> dist_matrix;
|
|
240
|
+
double maxscost;
|
|
241
|
+
|
|
242
|
+
// about reference sequences :
|
|
243
|
+
int nans = -1;
|
|
244
|
+
int rseq1 = -1;
|
|
245
|
+
int rseq2 = -1;
|
|
246
|
+
py::array_t<double> refdist_matrix;
|
|
247
|
+
};
|
|
@@ -0,0 +1,281 @@
|
|
|
1
|
+
#include <pybind11/pybind11.h>
|
|
2
|
+
#include <pybind11/numpy.h>
|
|
3
|
+
#include <vector>
|
|
4
|
+
#include <cmath>
|
|
5
|
+
#include <iostream>
|
|
6
|
+
#include "utils.h"
|
|
7
|
+
#include "dp_utils.h"
|
|
8
|
+
#ifdef _OPENMP
|
|
9
|
+
#include <omp.h>
|
|
10
|
+
#endif
|
|
11
|
+
#include <xsimd/xsimd.hpp>
|
|
12
|
+
|
|
13
|
+
namespace py = pybind11;
|
|
14
|
+
|
|
15
|
+
class OMspellDistance {
|
|
16
|
+
public:
|
|
17
|
+
OMspellDistance(py::array_t<int> sequences, py::array_t<double> sm, double indel, int norm, py::array_t<int> refseqS,
|
|
18
|
+
double timecost, py::array_t<double> seqdur, py::array_t<double> indellist, py::array_t<int> seqlength)
|
|
19
|
+
: indel(indel), norm(norm), timecost(timecost) {
|
|
20
|
+
|
|
21
|
+
py::print("[>] Starting Optimal Matching with spell(OMspell)...");
|
|
22
|
+
std::cout << std::flush;
|
|
23
|
+
|
|
24
|
+
try {
|
|
25
|
+
// ============================================
|
|
26
|
+
// parameter : sequences, sm, seqdur, indellist
|
|
27
|
+
// ============================================
|
|
28
|
+
this->sequences = sequences;
|
|
29
|
+
this->sm = sm;
|
|
30
|
+
|
|
31
|
+
this->seqdur = seqdur;
|
|
32
|
+
this->indellist = indellist;
|
|
33
|
+
|
|
34
|
+
this->seqlength = seqlength;
|
|
35
|
+
|
|
36
|
+
// ====================================================
|
|
37
|
+
// initialize nseq, seqlen, dist_matrix, fmatsize, fmat
|
|
38
|
+
// ====================================================
|
|
39
|
+
auto seq_shape = sequences.shape();
|
|
40
|
+
nseq = seq_shape[0];
|
|
41
|
+
len = seq_shape[1];
|
|
42
|
+
|
|
43
|
+
dist_matrix = py::array_t<double>({nseq, nseq});
|
|
44
|
+
|
|
45
|
+
fmatsize = len + 1;
|
|
46
|
+
|
|
47
|
+
// ====================
|
|
48
|
+
// initialize alphasize
|
|
49
|
+
// ====================
|
|
50
|
+
auto sm_shape = sm.shape();
|
|
51
|
+
alphasize = sm_shape[0];
|
|
52
|
+
|
|
53
|
+
// ==================
|
|
54
|
+
// initialize maxcost
|
|
55
|
+
// ==================
|
|
56
|
+
auto ptr = sm.mutable_unchecked<2>();
|
|
57
|
+
|
|
58
|
+
if(norm == 4){
|
|
59
|
+
maxscost = 2 * indel;
|
|
60
|
+
}else{
|
|
61
|
+
for(int i = 0; i < alphasize; i++){
|
|
62
|
+
for(int j = i+1; j < alphasize; j++){
|
|
63
|
+
if(ptr(i, j) > maxscost){
|
|
64
|
+
maxscost = ptr(i, j);
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
maxscost = std::min(maxscost, 2 * indel);
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
// about reference sequences :
|
|
72
|
+
nans = nseq;
|
|
73
|
+
|
|
74
|
+
rseq1 = refseqS.at(0);
|
|
75
|
+
rseq2 = refseqS.at(1);
|
|
76
|
+
if(rseq1 < rseq2){
|
|
77
|
+
nseq = rseq1;
|
|
78
|
+
nans = nseq * (rseq2 - rseq1);
|
|
79
|
+
}else{
|
|
80
|
+
rseq1 = rseq1 - 1;
|
|
81
|
+
}
|
|
82
|
+
refdist_matrix = py::array_t<double>({nseq, (rseq2-rseq1)});
|
|
83
|
+
} catch (const std::exception& e) {
|
|
84
|
+
py::print("Error in constructor: ", e.what());
|
|
85
|
+
throw;
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
// 对齐分配函数 moved to dp_utils.h
|
|
90
|
+
|
|
91
|
+
double getIndel(int i, int j, int state){
|
|
92
|
+
auto ptr_indel = indellist.mutable_unchecked<1>();
|
|
93
|
+
auto ptr_dur = seqdur.mutable_unchecked<2>();
|
|
94
|
+
|
|
95
|
+
return ptr_indel(state) + timecost * ptr_dur(i, j);
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
double getSubCost(int i_state, int j_state, int i_x, int i_y, int j_x, int j_y){
|
|
99
|
+
auto ptr_dur = seqdur.mutable_unchecked<2>();
|
|
100
|
+
|
|
101
|
+
if(i_state == j_state){
|
|
102
|
+
double diffdur = ptr_dur(i_x, i_y) - ptr_dur(j_x, j_y);
|
|
103
|
+
|
|
104
|
+
return abs(timecost * diffdur);
|
|
105
|
+
}else{
|
|
106
|
+
auto ptr_sm = sm.mutable_unchecked<2>();
|
|
107
|
+
|
|
108
|
+
return ptr_sm(i_state, j_state) +
|
|
109
|
+
(ptr_dur(i_x, i_y) + ptr_dur(j_x, j_y)) * timecost;
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
double compute_distance(int is, int js, double* prev, double* curr) {
|
|
114
|
+
try {
|
|
115
|
+
auto ptr_seq = sequences.unchecked<2>();
|
|
116
|
+
auto ptr_len = seqlength.unchecked<1>();
|
|
117
|
+
auto ptr_sm = sm.unchecked<2>();
|
|
118
|
+
auto ptr_dur = seqdur.unchecked<2>();
|
|
119
|
+
auto ptr_indel = indellist.unchecked<1>();
|
|
120
|
+
|
|
121
|
+
int i_state = 0, j_state = 0;
|
|
122
|
+
int mm = ptr_len(is);
|
|
123
|
+
int nn = ptr_len(js);
|
|
124
|
+
int mSuf = mm + 1;
|
|
125
|
+
int nSuf = nn + 1;
|
|
126
|
+
|
|
127
|
+
prev[0] = 0;
|
|
128
|
+
curr[0] = 0;
|
|
129
|
+
|
|
130
|
+
// initialize first row: cumulative insertions into js along columns
|
|
131
|
+
for (int jj = 1; jj < nSuf; jj++) {
|
|
132
|
+
int bj = ptr_seq(js, jj - 1);
|
|
133
|
+
prev[jj] = prev[jj - 1] + (ptr_indel(bj) + timecost * ptr_dur(js, jj - 1));
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
using batch_t = xsimd::batch<double>;
|
|
137
|
+
constexpr std::size_t B = batch_t::size;
|
|
138
|
+
|
|
139
|
+
for (int i = 1; i < mSuf; i++) {
|
|
140
|
+
i_state = ptr_seq(is, i - 1);
|
|
141
|
+
// per-row deletion cost (depends only on i_state and i position)
|
|
142
|
+
double dur_i = ptr_dur(is, i - 1);
|
|
143
|
+
double del_cost_i = ptr_indel(i_state) + timecost * dur_i;
|
|
144
|
+
|
|
145
|
+
// first column: cumulative deletions D[i][0] = D[i-1][0] + del_cost_i
|
|
146
|
+
curr[0] = prev[0] + del_cost_i;
|
|
147
|
+
|
|
148
|
+
int j = 1;
|
|
149
|
+
for (; j + (int)B <= nSuf; j += (int)B) {
|
|
150
|
+
const double* prev_ptr = prev + j;
|
|
151
|
+
const double* prevm1_ptr = prev + (j - 1);
|
|
152
|
+
|
|
153
|
+
batch_t prevj = batch_t::load_unaligned(prev_ptr);
|
|
154
|
+
batch_t prevjm1 = batch_t::load_unaligned(prevm1_ptr);
|
|
155
|
+
|
|
156
|
+
alignas(64) double subs[B];
|
|
157
|
+
alignas(64) double ins[B];
|
|
158
|
+
for (std::size_t b = 0; b < B; ++b) {
|
|
159
|
+
int jj_idx = j + (int)b - 1;
|
|
160
|
+
int bj = ptr_seq(js, jj_idx);
|
|
161
|
+
double dur_j = ptr_dur(js, jj_idx);
|
|
162
|
+
|
|
163
|
+
if (i_state == bj) {
|
|
164
|
+
subs[b] = std::abs(timecost * (dur_i - dur_j));
|
|
165
|
+
} else {
|
|
166
|
+
subs[b] = ptr_sm(i_state, bj) + (dur_i + dur_j) * timecost;
|
|
167
|
+
}
|
|
168
|
+
ins[b] = ptr_indel(bj) + timecost * dur_j;
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
batch_t sub_batch = batch_t::load_unaligned(subs);
|
|
172
|
+
batch_t cand_del = prevj + batch_t(del_cost_i);
|
|
173
|
+
batch_t cand_sub = prevjm1 + sub_batch;
|
|
174
|
+
batch_t vert = xsimd::min(cand_del, cand_sub);
|
|
175
|
+
|
|
176
|
+
double running = curr[j - 1] + ins[0];
|
|
177
|
+
for (std::size_t b = 0; b < B; ++b) {
|
|
178
|
+
double v = vert.get(b);
|
|
179
|
+
double c = std::min(v, running);
|
|
180
|
+
curr[j + (int)b] = c;
|
|
181
|
+
if (b + 1 < B) running = c + ins[b + 1];
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
// tail scalar handling
|
|
186
|
+
for (; j < nSuf; ++j) {
|
|
187
|
+
j_state = ptr_seq(js, j - 1);
|
|
188
|
+
double minimum = prev[j] + del_cost_i;
|
|
189
|
+
double j_indel = curr[j - 1] + (ptr_indel(j_state) + timecost * ptr_dur(js, j - 1));
|
|
190
|
+
double sub = prev[j - 1] + (
|
|
191
|
+
(i_state == j_state)
|
|
192
|
+
? std::abs(timecost * (dur_i - ptr_dur(js, j - 1)))
|
|
193
|
+
: (ptr_sm(i_state, j_state) + (dur_i + ptr_dur(js, j - 1)) * timecost)
|
|
194
|
+
);
|
|
195
|
+
curr[j] = std::min({ minimum, j_indel, sub });
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
std::swap(prev, curr);
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
double maxpossiblecost = std::abs(nn - mm) * indel + maxscost * std::min(mm, nn);
|
|
202
|
+
double ml = double(mm) * indel;
|
|
203
|
+
double nl = double(nn) * indel;
|
|
204
|
+
|
|
205
|
+
return normalize_distance(prev[nSuf - 1], maxpossiblecost, ml, nl, norm);
|
|
206
|
+
} catch (const std::exception& e) {
|
|
207
|
+
py::print("Error in compute_distance: ", e.what());
|
|
208
|
+
throw;
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
py::array_t<double> compute_all_distances() {
|
|
213
|
+
try {
|
|
214
|
+
return dp_utils::compute_all_distances(
|
|
215
|
+
nseq,
|
|
216
|
+
fmatsize,
|
|
217
|
+
dist_matrix,
|
|
218
|
+
[this](int i, int j, double* prev, double* curr) {
|
|
219
|
+
return this->compute_distance(i, j, prev, curr);
|
|
220
|
+
}
|
|
221
|
+
);
|
|
222
|
+
} catch (const std::exception& e) {
|
|
223
|
+
py::print("Error in compute_all_distances: ", e.what());
|
|
224
|
+
throw;
|
|
225
|
+
}
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
py::array_t<double> compute_refseq_distances() {
|
|
229
|
+
try {
|
|
230
|
+
auto buffer = refdist_matrix.mutable_unchecked<2>();
|
|
231
|
+
|
|
232
|
+
#pragma omp parallel
|
|
233
|
+
{
|
|
234
|
+
double* prev = dp_utils::aligned_alloc_double(static_cast<size_t>(fmatsize));
|
|
235
|
+
double* curr = dp_utils::aligned_alloc_double(static_cast<size_t>(fmatsize));
|
|
236
|
+
|
|
237
|
+
#pragma omp for schedule(static)
|
|
238
|
+
for (int rseq = rseq1; rseq < rseq2; rseq ++) {
|
|
239
|
+
for (int is = 0; is < nseq; is ++) {
|
|
240
|
+
double cmpres = 0;
|
|
241
|
+
if(is != rseq){
|
|
242
|
+
cmpres = compute_distance(is, rseq, prev, curr);
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
buffer(is, rseq - rseq1) = cmpres;
|
|
246
|
+
}
|
|
247
|
+
}
|
|
248
|
+
dp_utils::aligned_free_double(prev);
|
|
249
|
+
dp_utils::aligned_free_double(curr);
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
return refdist_matrix;
|
|
253
|
+
} catch (const std::exception& e) {
|
|
254
|
+
py::print("Error in compute_all_distances: ", e.what());
|
|
255
|
+
throw;
|
|
256
|
+
}
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
private:
|
|
260
|
+
py::array_t<int> sequences;
|
|
261
|
+
py::array_t<int> seqlength;
|
|
262
|
+
py::array_t<double> sm;
|
|
263
|
+
double indel;
|
|
264
|
+
int norm;
|
|
265
|
+
int nseq;
|
|
266
|
+
int len;
|
|
267
|
+
int alphasize;
|
|
268
|
+
int fmatsize;
|
|
269
|
+
py::array_t<double> dist_matrix;
|
|
270
|
+
double maxscost;
|
|
271
|
+
|
|
272
|
+
double timecost;
|
|
273
|
+
py::array_t<double> seqdur;
|
|
274
|
+
py::array_t<double> indellist;
|
|
275
|
+
|
|
276
|
+
// about reference sequences :
|
|
277
|
+
int nans = -1;
|
|
278
|
+
int rseq1 = -1;
|
|
279
|
+
int rseq2 = -1;
|
|
280
|
+
py::array_t<double> refdist_matrix;
|
|
281
|
+
};
|
|
File without changes
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
#include <pybind11/pybind11.h>
|
|
2
|
+
#include <pybind11/numpy.h>
|
|
3
|
+
#include <vector>
|
|
4
|
+
#include <iostream>
|
|
5
|
+
#ifdef _OPENMP
|
|
6
|
+
#include <omp.h>
|
|
7
|
+
#endif
|
|
8
|
+
|
|
9
|
+
namespace py = pybind11;
|
|
10
|
+
|
|
11
|
+
class dist2matrix {
|
|
12
|
+
public:
|
|
13
|
+
dist2matrix(int nseq, py::array_t<int> seqdata_didxs, py::array_t<double> dist_dseqs_num)
|
|
14
|
+
: nseq(nseq) {
|
|
15
|
+
|
|
16
|
+
py::print("[>] Computing all pairwise distances...");
|
|
17
|
+
std::cout << std::flush;
|
|
18
|
+
|
|
19
|
+
try {
|
|
20
|
+
this->seqdata_didxs = seqdata_didxs;
|
|
21
|
+
this->dist_dseqs_num = dist_dseqs_num;
|
|
22
|
+
|
|
23
|
+
dist_matrix = py::array_t<double>({nseq, nseq});
|
|
24
|
+
} catch (const std::exception& e) {
|
|
25
|
+
py::print("Error in constructor: ", e.what());
|
|
26
|
+
throw;
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
py::array_t<double> padding_matrix() {
|
|
31
|
+
try {
|
|
32
|
+
auto idxs_buf = seqdata_didxs.unchecked<1>();
|
|
33
|
+
auto dist_buf = dist_dseqs_num.unchecked<2>();
|
|
34
|
+
auto buffer = dist_matrix.mutable_unchecked<2>();
|
|
35
|
+
|
|
36
|
+
#pragma omp parallel for schedule(static)
|
|
37
|
+
for (int i = 0; i < nseq; ++i) {
|
|
38
|
+
for (int j = i; j < nseq; ++j) {
|
|
39
|
+
buffer(i, j) = dist_buf(idxs_buf(i), idxs_buf(j));
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
#pragma omp parallel for schedule(static)
|
|
44
|
+
for (int i = 0; i < nseq; ++i) {
|
|
45
|
+
for (int j = i + 1; j < nseq; ++j) {
|
|
46
|
+
buffer(j, i) = buffer(i, j);
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
return dist_matrix;
|
|
51
|
+
} catch (const std::exception& e) {
|
|
52
|
+
py::print("Error in compute_all_distances: ", e.what());
|
|
53
|
+
throw;
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
private:
|
|
58
|
+
py::array_t<int> seqdata_didxs;
|
|
59
|
+
py::array_t<double> dist_dseqs_num;
|
|
60
|
+
int nseq = 0;
|
|
61
|
+
|
|
62
|
+
py::array_t<double> dist_matrix;
|
|
63
|
+
};
|