sequenzo 0.1.24__cp311-cp311-macosx_10_9_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of sequenzo might be problematic. Click here for more details.
- _sequenzo_fastcluster.cpython-311-darwin.so +0 -0
- sequenzo/__init__.py +240 -0
- sequenzo/big_data/__init__.py +12 -0
- sequenzo/big_data/clara/__init__.py +26 -0
- sequenzo/big_data/clara/clara.py +474 -0
- sequenzo/big_data/clara/utils/__init__.py +27 -0
- sequenzo/big_data/clara/utils/aggregatecases.py +92 -0
- sequenzo/big_data/clara/utils/davies_bouldin.py +91 -0
- sequenzo/big_data/clara/utils/get_weighted_diss.cpython-311-darwin.so +0 -0
- sequenzo/big_data/clara/utils/wfcmdd.py +205 -0
- sequenzo/big_data/clara/visualization.py +88 -0
- sequenzo/clustering/KMedoids.py +178 -0
- sequenzo/clustering/__init__.py +30 -0
- sequenzo/clustering/clustering_c_code.cpython-311-darwin.so +0 -0
- sequenzo/clustering/hierarchical_clustering.py +1256 -0
- sequenzo/clustering/sequenzo_fastcluster/fastcluster.py +495 -0
- sequenzo/clustering/sequenzo_fastcluster/src/fastcluster.cpp +1877 -0
- sequenzo/clustering/sequenzo_fastcluster/src/fastcluster_python.cpp +1264 -0
- sequenzo/clustering/src/KMedoid.cpp +263 -0
- sequenzo/clustering/src/PAM.cpp +237 -0
- sequenzo/clustering/src/PAMonce.cpp +265 -0
- sequenzo/clustering/src/cluster_quality.cpp +496 -0
- sequenzo/clustering/src/cluster_quality.h +128 -0
- sequenzo/clustering/src/cluster_quality_backup.cpp +570 -0
- sequenzo/clustering/src/module.cpp +228 -0
- sequenzo/clustering/src/weightedinertia.cpp +111 -0
- sequenzo/clustering/utils/__init__.py +27 -0
- sequenzo/clustering/utils/disscenter.py +122 -0
- sequenzo/data_preprocessing/__init__.py +20 -0
- sequenzo/data_preprocessing/helpers.py +256 -0
- sequenzo/datasets/__init__.py +41 -0
- sequenzo/datasets/biofam.csv +2001 -0
- sequenzo/datasets/biofam_child_domain.csv +2001 -0
- sequenzo/datasets/biofam_left_domain.csv +2001 -0
- sequenzo/datasets/biofam_married_domain.csv +2001 -0
- sequenzo/datasets/chinese_colonial_territories.csv +12 -0
- sequenzo/datasets/country_co2_emissions.csv +194 -0
- sequenzo/datasets/country_co2_emissions_global_deciles.csv +195 -0
- sequenzo/datasets/country_co2_emissions_global_quintiles.csv +195 -0
- sequenzo/datasets/country_co2_emissions_local_deciles.csv +195 -0
- sequenzo/datasets/country_co2_emissions_local_quintiles.csv +195 -0
- sequenzo/datasets/country_gdp_per_capita.csv +194 -0
- sequenzo/datasets/mvad.csv +713 -0
- sequenzo/datasets/pairfam_family.csv +1867 -0
- sequenzo/datasets/polyadic_samplec1.csv +61 -0
- sequenzo/datasets/polyadic_samplep1.csv +61 -0
- sequenzo/datasets/polyadic_seqc1.csv +61 -0
- sequenzo/datasets/polyadic_seqp1.csv +61 -0
- sequenzo/define_sequence_data.py +609 -0
- sequenzo/dissimilarity_measures/__init__.py +31 -0
- sequenzo/dissimilarity_measures/c_code.cpython-311-darwin.so +0 -0
- sequenzo/dissimilarity_measures/get_distance_matrix.py +702 -0
- sequenzo/dissimilarity_measures/get_substitution_cost_matrix.py +241 -0
- sequenzo/dissimilarity_measures/src/DHDdistance.cpp +148 -0
- sequenzo/dissimilarity_measures/src/LCPdistance.cpp +114 -0
- sequenzo/dissimilarity_measures/src/OMdistance.cpp +247 -0
- sequenzo/dissimilarity_measures/src/OMspellDistance.cpp +281 -0
- sequenzo/dissimilarity_measures/src/__init__.py +0 -0
- sequenzo/dissimilarity_measures/src/dist2matrix.cpp +63 -0
- sequenzo/dissimilarity_measures/src/dp_utils.h +160 -0
- sequenzo/dissimilarity_measures/src/module.cpp +34 -0
- sequenzo/dissimilarity_measures/src/setup.py +30 -0
- sequenzo/dissimilarity_measures/src/utils.h +25 -0
- sequenzo/dissimilarity_measures/src/xsimd/.github/cmake-test/main.cpp +6 -0
- sequenzo/dissimilarity_measures/src/xsimd/benchmark/main.cpp +159 -0
- sequenzo/dissimilarity_measures/src/xsimd/benchmark/xsimd_benchmark.hpp +565 -0
- sequenzo/dissimilarity_measures/src/xsimd/docs/source/conf.py +37 -0
- sequenzo/dissimilarity_measures/src/xsimd/examples/mandelbrot.cpp +330 -0
- sequenzo/dissimilarity_measures/src/xsimd/examples/pico_bench.hpp +246 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_arithmetic.hpp +266 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_complex.hpp +112 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_details.hpp +323 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_logical.hpp +218 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_math.hpp +2583 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_memory.hpp +880 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_rounding.hpp +72 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_swizzle.hpp +174 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_trigo.hpp +978 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx.hpp +1924 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx2.hpp +1144 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512bw.hpp +656 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512cd.hpp +28 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512dq.hpp +244 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512er.hpp +20 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512f.hpp +2650 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512ifma.hpp +20 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512pf.hpp +20 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vbmi.hpp +77 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vbmi2.hpp +131 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vnni_avx512bw.hpp +20 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vnni_avx512vbmi2.hpp +20 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avxvnni.hpp +20 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_common.hpp +24 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_common_fwd.hpp +77 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_constants.hpp +393 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_emulated.hpp +788 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma3_avx.hpp +93 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma3_avx2.hpp +46 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma3_sse.hpp +97 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma4.hpp +92 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_i8mm_neon64.hpp +17 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_isa.hpp +142 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_neon.hpp +3142 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_neon64.hpp +1543 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_rvv.hpp +1513 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_scalar.hpp +1260 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse2.hpp +2024 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse3.hpp +67 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse4_1.hpp +339 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse4_2.hpp +44 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_ssse3.hpp +186 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sve.hpp +1155 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_vsx.hpp +892 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_wasm.hpp +1780 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_arch.hpp +240 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_config.hpp +484 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_cpuid.hpp +269 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_inline.hpp +27 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/math/xsimd_rem_pio2.hpp +719 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/memory/xsimd_aligned_allocator.hpp +349 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/memory/xsimd_alignment.hpp +91 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_all_registers.hpp +55 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_api.hpp +2765 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx2_register.hpp +44 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512bw_register.hpp +51 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512cd_register.hpp +51 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512dq_register.hpp +51 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512er_register.hpp +51 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512f_register.hpp +77 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512ifma_register.hpp +51 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512pf_register.hpp +51 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512vbmi2_register.hpp +51 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512vbmi_register.hpp +51 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512vnni_avx512bw_register.hpp +54 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512vnni_avx512vbmi2_register.hpp +53 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx_register.hpp +64 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avxvnni_register.hpp +44 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_batch.hpp +1524 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_batch_constant.hpp +300 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_common_arch.hpp +47 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_emulated_register.hpp +80 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_fma3_avx2_register.hpp +50 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_fma3_avx_register.hpp +50 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_fma3_sse_register.hpp +50 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_fma4_register.hpp +50 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_i8mm_neon64_register.hpp +55 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_neon64_register.hpp +55 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_neon_register.hpp +154 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_register.hpp +94 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_rvv_register.hpp +506 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sse2_register.hpp +59 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sse3_register.hpp +49 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sse4_1_register.hpp +48 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sse4_2_register.hpp +48 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_ssse3_register.hpp +48 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sve_register.hpp +156 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_traits.hpp +337 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_utils.hpp +536 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_vsx_register.hpp +77 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_wasm_register.hpp +59 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/xsimd.hpp +75 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/architectures/dummy.cpp +7 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set.cpp +13 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set_mean.cpp +24 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set_mean_aligned.cpp +25 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set_mean_arch_independent.cpp +28 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set_mean_tag_dispatch.cpp +25 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/doc/manipulating_abstract_batches.cpp +7 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/doc/manipulating_parametric_batches.cpp +8 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/doc/sum.hpp +31 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/doc/sum_avx2.cpp +3 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/doc/sum_sse2.cpp +3 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/doc/writing_vectorized_code.cpp +11 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/main.cpp +31 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_api.cpp +230 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_arch.cpp +217 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_basic_math.cpp +183 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_batch.cpp +1049 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_bool.cpp +508 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_cast.cpp +409 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_complex.cpp +712 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_constant.cpp +286 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_float.cpp +141 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_int.cpp +365 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_manip.cpp +308 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_bitwise_cast.cpp +222 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_complex_exponential.cpp +226 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_complex_hyperbolic.cpp +183 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_complex_power.cpp +265 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_complex_trigonometric.cpp +236 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_conversion.cpp +248 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_custom_default_arch.cpp +28 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_error_gamma.cpp +170 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_explicit_batch_instantiation.cpp +32 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_exponential.cpp +202 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_extract_pair.cpp +92 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_fp_manipulation.cpp +77 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_gnu_source.cpp +30 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_hyperbolic.cpp +167 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_load_store.cpp +304 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_memory.cpp +61 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_poly_evaluation.cpp +64 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_power.cpp +184 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_rounding.cpp +199 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_select.cpp +101 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_shuffle.cpp +760 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_sum.cpp +4 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_sum.hpp +34 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_traits.cpp +172 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_trigonometric.cpp +208 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_utils.hpp +611 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_wasm/test_wasm_playwright.py +123 -0
- sequenzo/dissimilarity_measures/src/xsimd/test/test_xsimd_api.cpp +1460 -0
- sequenzo/dissimilarity_measures/utils/__init__.py +16 -0
- sequenzo/dissimilarity_measures/utils/get_LCP_length_for_2_seq.py +44 -0
- sequenzo/dissimilarity_measures/utils/get_sm_trate_substitution_cost_matrix.cpython-311-darwin.so +0 -0
- sequenzo/dissimilarity_measures/utils/seqconc.cpython-311-darwin.so +0 -0
- sequenzo/dissimilarity_measures/utils/seqdss.cpython-311-darwin.so +0 -0
- sequenzo/dissimilarity_measures/utils/seqdur.cpython-311-darwin.so +0 -0
- sequenzo/dissimilarity_measures/utils/seqlength.cpython-311-darwin.so +0 -0
- sequenzo/multidomain/__init__.py +23 -0
- sequenzo/multidomain/association_between_domains.py +311 -0
- sequenzo/multidomain/cat.py +431 -0
- sequenzo/multidomain/combt.py +519 -0
- sequenzo/multidomain/dat.py +89 -0
- sequenzo/multidomain/idcd.py +139 -0
- sequenzo/multidomain/linked_polyad.py +292 -0
- sequenzo/openmp_setup.py +233 -0
- sequenzo/prefix_tree/__init__.py +43 -0
- sequenzo/prefix_tree/individual_level_indicators.py +1274 -0
- sequenzo/prefix_tree/system_level_indicators.py +465 -0
- sequenzo/prefix_tree/utils.py +54 -0
- sequenzo/sequence_characteristics/__init__.py +40 -0
- sequenzo/sequence_characteristics/complexity_index.py +49 -0
- sequenzo/sequence_characteristics/overall_cross_sectional_entropy.py +220 -0
- sequenzo/sequence_characteristics/plot_characteristics.py +593 -0
- sequenzo/sequence_characteristics/simple_characteristics.py +311 -0
- sequenzo/sequence_characteristics/state_frequencies_and_entropy_per_sequence.py +39 -0
- sequenzo/sequence_characteristics/turbulence.py +155 -0
- sequenzo/sequence_characteristics/variance_of_spell_durations.py +86 -0
- sequenzo/sequence_characteristics/within_sequence_entropy.py +43 -0
- sequenzo/suffix_tree/__init__.py +48 -0
- sequenzo/suffix_tree/individual_level_indicators.py +1638 -0
- sequenzo/suffix_tree/system_level_indicators.py +456 -0
- sequenzo/suffix_tree/utils.py +56 -0
- sequenzo/visualization/__init__.py +29 -0
- sequenzo/visualization/plot_mean_time.py +194 -0
- sequenzo/visualization/plot_modal_state.py +276 -0
- sequenzo/visualization/plot_most_frequent_sequences.py +147 -0
- sequenzo/visualization/plot_relative_frequency.py +404 -0
- sequenzo/visualization/plot_sequence_index.py +951 -0
- sequenzo/visualization/plot_single_medoid.py +153 -0
- sequenzo/visualization/plot_state_distribution.py +627 -0
- sequenzo/visualization/plot_transition_matrix.py +190 -0
- sequenzo/visualization/utils/__init__.py +23 -0
- sequenzo/visualization/utils/utils.py +310 -0
- sequenzo/with_event_history_analysis/__init__.py +35 -0
- sequenzo/with_event_history_analysis/sequence_analysis_multi_state_model.py +850 -0
- sequenzo/with_event_history_analysis/sequence_history_analysis.py +283 -0
- sequenzo-0.1.24.dist-info/METADATA +255 -0
- sequenzo-0.1.24.dist-info/RECORD +264 -0
- sequenzo-0.1.24.dist-info/WHEEL +5 -0
- sequenzo-0.1.24.dist-info/licenses/LICENSE +28 -0
- sequenzo-0.1.24.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,263 @@
|
|
|
1
|
+
#include <pybind11/pybind11.h>
|
|
2
|
+
#include <pybind11/numpy.h>
|
|
3
|
+
#include <vector>
|
|
4
|
+
#include <iostream>
|
|
5
|
+
#ifdef _OPENMP
|
|
6
|
+
#include <omp.h>
|
|
7
|
+
#endif
|
|
8
|
+
#include <random>
|
|
9
|
+
#include <cfloat>
|
|
10
|
+
#include <climits>
|
|
11
|
+
#include <cmath>
|
|
12
|
+
|
|
13
|
+
using namespace std;
|
|
14
|
+
namespace py = pybind11;
|
|
15
|
+
|
|
16
|
+
class KMedoid {
|
|
17
|
+
protected:
|
|
18
|
+
int nelements; // Number of elements (data points)
|
|
19
|
+
int nclusters; // Number of clusters (medoids)
|
|
20
|
+
int npass; // Maximum number of iterations
|
|
21
|
+
|
|
22
|
+
vector<int> tclusterid; // Temporary cluster assignment for each element
|
|
23
|
+
vector<int> saved; // Saved cluster assignments to check for convergence
|
|
24
|
+
vector<int> clusterMembership; // Cluster membership indices (flattened 2D: nclusters x nelements)
|
|
25
|
+
vector<int> clusterSize; // Size of each cluster
|
|
26
|
+
|
|
27
|
+
py::array_t<double> diss; // Distance matrix (2D numpy array)
|
|
28
|
+
py::array_t<int> centroids; // Medoid indices (1D numpy array)
|
|
29
|
+
py::array_t<double> weights; // Weights of elements (1D numpy array)
|
|
30
|
+
|
|
31
|
+
public:
|
|
32
|
+
// Constructor initializes members and allocates necessary storage
|
|
33
|
+
KMedoid(int nelements, py::array_t<double> diss,
|
|
34
|
+
py::array_t<int> centroids, int npass,
|
|
35
|
+
py::array_t<double> weights)
|
|
36
|
+
: nelements(nelements),
|
|
37
|
+
diss(diss),
|
|
38
|
+
centroids(centroids),
|
|
39
|
+
npass(npass),
|
|
40
|
+
weights(weights),
|
|
41
|
+
nclusters(static_cast<int>(centroids.size())) {
|
|
42
|
+
// 注释掉信息性打印,避免在并行环境(如 CLARA)中降低性能
|
|
43
|
+
// py::print("[>] Starting KMedoids...");
|
|
44
|
+
|
|
45
|
+
tclusterid.resize(nelements);
|
|
46
|
+
saved.resize(nelements);
|
|
47
|
+
clusterMembership.resize(nelements * nclusters);
|
|
48
|
+
clusterSize.resize(nclusters);
|
|
49
|
+
fill(clusterSize.begin(), clusterSize.end(), 0);
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
// Initialize medoids using a k-means++ style seeding method for better starting points
|
|
53
|
+
void init_medoids() {
|
|
54
|
+
auto ptr_diss = diss.unchecked<2>();
|
|
55
|
+
auto ptr_centroids = centroids.mutable_unchecked<1>();
|
|
56
|
+
auto ptr_weights = weights.unchecked<1>();
|
|
57
|
+
|
|
58
|
+
vector<int> selected; // Indices of selected medoids
|
|
59
|
+
vector<double> min_dists(nelements, DBL_MAX); // Minimum distance to selected medoids
|
|
60
|
+
|
|
61
|
+
mt19937 rng(random_device{}()); // 3. Random number generator initialization (non-deterministic seed)
|
|
62
|
+
uniform_int_distribution<> dist(0, nelements - 1);
|
|
63
|
+
|
|
64
|
+
// Randomly choose the first medoid
|
|
65
|
+
int first = dist(rng);
|
|
66
|
+
selected.push_back(first);
|
|
67
|
+
ptr_centroids[0] = first;
|
|
68
|
+
|
|
69
|
+
for (int k = 1; k < nclusters; ++k) {
|
|
70
|
+
int last = selected.back();
|
|
71
|
+
|
|
72
|
+
// Update min_dists using only the last selected medoid
|
|
73
|
+
for (int i = 0; i < nelements; ++i) {
|
|
74
|
+
double d = ptr_diss(i, last);
|
|
75
|
+
if (d < min_dists[i]) min_dists[i] = d;
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
// Compute weighted total distance
|
|
79
|
+
double total_weight = 0.0;
|
|
80
|
+
for (int i = 0; i < nelements; ++i) {
|
|
81
|
+
total_weight += min_dists[i] * ptr_weights[i];
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
// Handle degenerate case
|
|
85
|
+
if (total_weight <= 1e-10) {
|
|
86
|
+
int fallback = dist(rng);
|
|
87
|
+
selected.push_back(fallback);
|
|
88
|
+
ptr_centroids[k] = fallback;
|
|
89
|
+
continue;
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
// Select next medoid using weighted probability
|
|
93
|
+
uniform_real_distribution<double> rdist(0, total_weight);
|
|
94
|
+
double r = rdist(rng), accumulator = 0.0;
|
|
95
|
+
int next = -1;
|
|
96
|
+
|
|
97
|
+
for (int i = 0; i < nelements; ++i) {
|
|
98
|
+
accumulator += min_dists[i] * ptr_weights[i];
|
|
99
|
+
if (accumulator >= r) {
|
|
100
|
+
next = i;
|
|
101
|
+
break;
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
if (next == -1) next = dist(rng); // fallback again just in case
|
|
106
|
+
selected.push_back(next);
|
|
107
|
+
ptr_centroids[k] = next;
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
// Update medoids by selecting the element minimizing the sum of weighted distances to all other elements in the cluster
|
|
113
|
+
void getclustermedoids() {
|
|
114
|
+
auto ptr_weights = weights.unchecked<1>();
|
|
115
|
+
auto ptr_diss = diss.unchecked<2>();
|
|
116
|
+
auto ptr_centroids = centroids.mutable_unchecked<1>();
|
|
117
|
+
|
|
118
|
+
#pragma omp parallel for schedule(dynamic)
|
|
119
|
+
for (int k = 0; k < nclusters; ++k) {
|
|
120
|
+
int size = clusterSize[k];
|
|
121
|
+
double best = DBL_MAX;
|
|
122
|
+
int bestID = 0;
|
|
123
|
+
|
|
124
|
+
// Iterate over all members of cluster k to find the best medoid
|
|
125
|
+
for (int i = 0; i < size; ++i) {
|
|
126
|
+
int ii = clusterMembership[k * nelements + i];
|
|
127
|
+
double current = 0;
|
|
128
|
+
|
|
129
|
+
// Sum weighted distances from candidate medoid ii to all other members
|
|
130
|
+
for (int j = 0; j < size; ++j) {
|
|
131
|
+
if (i == j) continue;
|
|
132
|
+
int jj = clusterMembership[k * nelements + j];
|
|
133
|
+
current += ptr_weights[jj] * ptr_diss(ii, jj);
|
|
134
|
+
if (current >= best) break; // Early stop if worse than current best
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
if (current < best) {
|
|
138
|
+
best = current;
|
|
139
|
+
bestID = ii;
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
ptr_centroids[k] = bestID; // Assign best medoid for cluster k
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
// Main loop to run the clustering process until convergence or max iterations
|
|
148
|
+
py::array_t<int> runclusterloop() {
|
|
149
|
+
auto ptr_weights = weights.unchecked<1>();
|
|
150
|
+
auto ptr_diss = diss.unchecked<2>();
|
|
151
|
+
auto ptr_centroids = centroids.mutable_unchecked<1>();
|
|
152
|
+
|
|
153
|
+
double total = DBL_MAX;
|
|
154
|
+
int counter = 0;
|
|
155
|
+
int period = 10; // Frequency to save cluster assignments for convergence checking
|
|
156
|
+
|
|
157
|
+
while (counter <= npass) {
|
|
158
|
+
PyErr_CheckSignals(); // Allow Python interruption
|
|
159
|
+
|
|
160
|
+
double prev = total;
|
|
161
|
+
total = 0;
|
|
162
|
+
|
|
163
|
+
if (counter > 0) getclustermedoids();
|
|
164
|
+
|
|
165
|
+
// Periodically save cluster assignment to check for convergence
|
|
166
|
+
if (counter % period == 0) {
|
|
167
|
+
for (int i = 0; i < nelements; ++i)
|
|
168
|
+
saved[i] = tclusterid[i];
|
|
169
|
+
|
|
170
|
+
if (period < INT_MAX / 2) period *= 2; // Exponentially increase period
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
counter++;
|
|
174
|
+
|
|
175
|
+
vector<vector<int>> localMembers(nclusters);
|
|
176
|
+
|
|
177
|
+
// Parallel assignment of elements to closest medoid
|
|
178
|
+
#pragma omp parallel
|
|
179
|
+
{
|
|
180
|
+
vector<vector<int>> threadLocal(nclusters);
|
|
181
|
+
|
|
182
|
+
#pragma omp for reduction(+:total) schedule(static)
|
|
183
|
+
for (int i = 0; i < nelements; ++i) {
|
|
184
|
+
double dist = DBL_MAX;
|
|
185
|
+
int assign = 0;
|
|
186
|
+
|
|
187
|
+
// Find nearest medoid
|
|
188
|
+
for (int k = 0; k < nclusters; ++k) {
|
|
189
|
+
int j = ptr_centroids[k];
|
|
190
|
+
double tdistance = ptr_diss(i, j);
|
|
191
|
+
|
|
192
|
+
if (tdistance < dist) {
|
|
193
|
+
dist = tdistance;
|
|
194
|
+
assign = k;
|
|
195
|
+
}
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
tclusterid[i] = assign;
|
|
199
|
+
threadLocal[assign].push_back(i);
|
|
200
|
+
total += ptr_weights[i] * dist;
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
// Merge thread local cluster memberships into shared vector safely
|
|
204
|
+
#pragma omp critical
|
|
205
|
+
{
|
|
206
|
+
for (int k = 0; k < nclusters; ++k) {
|
|
207
|
+
localMembers[k].insert(
|
|
208
|
+
localMembers[k].end(),
|
|
209
|
+
threadLocal[k].begin(),
|
|
210
|
+
threadLocal[k].end()
|
|
211
|
+
);
|
|
212
|
+
}
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
// Update cluster membership and sizes
|
|
217
|
+
for (int k = 0; k < nclusters; ++k) {
|
|
218
|
+
clusterSize[k] = static_cast<int>(localMembers[k].size());
|
|
219
|
+
|
|
220
|
+
// If a cluster is empty, reinitialize medoids and restart
|
|
221
|
+
if (clusterSize[k] == 0) {
|
|
222
|
+
init_medoids();
|
|
223
|
+
counter = 0;
|
|
224
|
+
break;
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
for (int i = 0; i < clusterSize[k]; ++i) {
|
|
228
|
+
clusterMembership[k * nelements + i] = localMembers[k][i];
|
|
229
|
+
}
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
// Convergence check based on total cost change
|
|
233
|
+
if (abs(total - prev) < 1e-6) break;
|
|
234
|
+
|
|
235
|
+
// Check if cluster assignments are unchanged from last saved
|
|
236
|
+
bool same = true;
|
|
237
|
+
for (int i = 0; i < nelements; ++i) {
|
|
238
|
+
if (saved[i] != tclusterid[i]) {
|
|
239
|
+
same = false;
|
|
240
|
+
break;
|
|
241
|
+
}
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
if (same) break;
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
return getResultArray();
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
// Construct and return the final array of medoid assignments for each element
|
|
251
|
+
py::array_t<int> getResultArray() const {
|
|
252
|
+
py::array_t<int> result(nelements);
|
|
253
|
+
auto results = result.mutable_unchecked<1>();
|
|
254
|
+
auto centroid = centroids.unchecked<1>();
|
|
255
|
+
|
|
256
|
+
#pragma omp parallel for schedule(static)
|
|
257
|
+
for (int i = 0; i < nelements; ++i) {
|
|
258
|
+
results(i) = centroid(tclusterid[i]);
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
return result;
|
|
262
|
+
}
|
|
263
|
+
};
|
|
@@ -0,0 +1,237 @@
|
|
|
1
|
+
#include <pybind11/pybind11.h>
|
|
2
|
+
#include <pybind11/numpy.h>
|
|
3
|
+
#include <vector>
|
|
4
|
+
#include <iostream>
|
|
5
|
+
#ifdef _OPENMP
|
|
6
|
+
#include <omp.h>
|
|
7
|
+
#endif
|
|
8
|
+
#include <cfloat>
|
|
9
|
+
#include <climits>
|
|
10
|
+
#include <cmath>
|
|
11
|
+
#define WEIGHTED_CLUST_TOL -1e-10
|
|
12
|
+
using namespace std;
|
|
13
|
+
namespace py = pybind11;
|
|
14
|
+
|
|
15
|
+
class PAM {
|
|
16
|
+
public:
|
|
17
|
+
// Constructor: Initializes the PAM algorithm with required parameters.
|
|
18
|
+
PAM(int nelements, py::array_t<double> diss,
|
|
19
|
+
py::array_t<int> centroids, int npass, py::array_t<double> weights) {
|
|
20
|
+
// 注释掉信息性打印,避免在并行环境(如 CLARA)中降低性能
|
|
21
|
+
// py::print("[>] Starting Partitioning Around Medoids (PAM)...");
|
|
22
|
+
|
|
23
|
+
try {
|
|
24
|
+
this->nelements = nelements;
|
|
25
|
+
this->centroids = centroids;
|
|
26
|
+
this->npass = npass;
|
|
27
|
+
this->weights = weights;
|
|
28
|
+
this->diss = diss;
|
|
29
|
+
this->maxdist = 0.0;
|
|
30
|
+
this->nclusters = static_cast<int>(centroids.size()); // Number of clusters
|
|
31
|
+
this->tclusterid.resize(nelements); // Initialize cluster id vector
|
|
32
|
+
this->computeMaxDist(); // Compute the maximum distance for use later
|
|
33
|
+
|
|
34
|
+
// Initialize dysma and dysmb with maxdist
|
|
35
|
+
dysma.resize(nelements, maxdist);
|
|
36
|
+
dysmb.resize(nelements, maxdist);
|
|
37
|
+
} catch (const exception &e) {
|
|
38
|
+
py::print("Error: ", e.what()); // Error handling
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
// Computes the maximum distance between any two elements in the distance matrix.
|
|
43
|
+
void computeMaxDist() {
|
|
44
|
+
auto ptr_diss = diss.unchecked<2>();
|
|
45
|
+
|
|
46
|
+
// The manual array collects the thread maxima
|
|
47
|
+
int nthreads = 1;
|
|
48
|
+
#ifdef _OPENMP
|
|
49
|
+
#pragma omp parallel
|
|
50
|
+
{
|
|
51
|
+
#pragma omp single
|
|
52
|
+
nthreads = omp_get_num_threads();
|
|
53
|
+
}
|
|
54
|
+
#endif
|
|
55
|
+
|
|
56
|
+
std::vector<double> thread_max(nthreads, 0.0);
|
|
57
|
+
|
|
58
|
+
#ifdef _OPENMP
|
|
59
|
+
#pragma omp parallel
|
|
60
|
+
{
|
|
61
|
+
int tid = omp_get_thread_num();
|
|
62
|
+
#else
|
|
63
|
+
{
|
|
64
|
+
int tid = 0;
|
|
65
|
+
#endif
|
|
66
|
+
double local = 0.0;
|
|
67
|
+
|
|
68
|
+
#ifdef _OPENMP
|
|
69
|
+
#pragma omp for schedule(static)
|
|
70
|
+
#endif
|
|
71
|
+
for (int i = 0; i < nelements; ++i) {
|
|
72
|
+
for (int j = i + 1; j < nelements; ++j) {
|
|
73
|
+
double val = ptr_diss(i, j);
|
|
74
|
+
if (val > local) local = val;
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
thread_max[tid] = local;
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
// Final reduction (serial, fast)
|
|
82
|
+
double max_val = 0.0;
|
|
83
|
+
for (double val : thread_max) {
|
|
84
|
+
if (val > max_val) max_val = val;
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
maxdist = 1.1 * max_val + 1.0;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
// Runs the PAM clustering loop, repeatedly updating centroids and assigning elements to clusters.
|
|
92
|
+
py::array_t<int> runclusterloop() {
|
|
93
|
+
auto ptr_weights = weights.unchecked<1>(); // Access to the weights
|
|
94
|
+
auto ptr_diss = diss.unchecked<2>(); // Access to the distance matrix
|
|
95
|
+
auto ptr_centroids = centroids.mutable_unchecked<1>(); // Access to the centroids
|
|
96
|
+
|
|
97
|
+
double dzsky;
|
|
98
|
+
int ipass = 0;
|
|
99
|
+
int hbest = -1;
|
|
100
|
+
int nbest = -1;
|
|
101
|
+
int k, icluster, h;
|
|
102
|
+
double total = -1.0;
|
|
103
|
+
int nclusters = static_cast<int>(centroids.size());
|
|
104
|
+
|
|
105
|
+
do {
|
|
106
|
+
// Parallel loop to update dysma and dysmb based on current centroids
|
|
107
|
+
#pragma omp parallel for schedule(static)
|
|
108
|
+
for (int i = 0; i < nelements; i++) {
|
|
109
|
+
dysmb[i] = maxdist;
|
|
110
|
+
dysma[i] = maxdist;
|
|
111
|
+
|
|
112
|
+
// Update dysma and dysmb values based on the distance to centroids
|
|
113
|
+
for (int k = 0; k < nclusters; k++) {
|
|
114
|
+
int icluster = ptr_centroids(k);
|
|
115
|
+
double dist = ptr_diss(i, icluster);
|
|
116
|
+
|
|
117
|
+
if (dysma[i] > dist) {
|
|
118
|
+
dysmb[i] = dysma[i];
|
|
119
|
+
dysma[i] = dist;
|
|
120
|
+
tclusterid[i] = k; // Assign element to the current cluster
|
|
121
|
+
} else if (dysmb[i] > dist) {
|
|
122
|
+
dysmb[i] = dist;
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
// If total hasn't been calculated yet, calculate it
|
|
128
|
+
if (total < 0) {
|
|
129
|
+
total = 0;
|
|
130
|
+
|
|
131
|
+
// Parallel loop to calculate the total weighted distance
|
|
132
|
+
#pragma omp parallel for reduction(+:total) schedule(static)
|
|
133
|
+
for (int i = 0; i < nelements; i++) {
|
|
134
|
+
total += ptr_weights[i] * dysma[i];
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
dzsky = 1; // Initialize dzsky to 1 for the change cost comparison
|
|
139
|
+
|
|
140
|
+
// Parallel loop to compute the cost of switching elements' medoids
|
|
141
|
+
#pragma omp parallel for schedule(dynamic)
|
|
142
|
+
for (int h = 0; h < nelements; h++) {
|
|
143
|
+
bool is_current_medoid = false;
|
|
144
|
+
for (int k = 0; k < nclusters; k++) {
|
|
145
|
+
if (h == ptr_centroids[k]) {
|
|
146
|
+
is_current_medoid = true;
|
|
147
|
+
break;
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
if (is_current_medoid) // Skip if the element is already a medoid
|
|
152
|
+
continue;
|
|
153
|
+
|
|
154
|
+
double local_dzsky = dzsky;
|
|
155
|
+
int local_hbest = -1;
|
|
156
|
+
int local_nbest = -1;
|
|
157
|
+
|
|
158
|
+
// Evaluate the change cost for switching each element with a new medoid
|
|
159
|
+
for (int k = 0; k < nclusters; k++) {
|
|
160
|
+
int i = ptr_centroids[k];
|
|
161
|
+
double dz = 0.0;
|
|
162
|
+
|
|
163
|
+
for (int j = 0; j < nelements; j++) {
|
|
164
|
+
if (ptr_diss(i, j) == dysma[j]) {
|
|
165
|
+
double small = (dysmb[j] > ptr_diss(h, j)) ? ptr_diss(h, j) : dysmb[j];
|
|
166
|
+
dz += ptr_weights[j] * (-dysma[j] + small); // Update change cost
|
|
167
|
+
} else if (ptr_diss(h, j) < dysma[j]) {
|
|
168
|
+
dz += ptr_weights[j] * (-dysma[j] + ptr_diss(h, j));
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
// Keep track of the best change
|
|
173
|
+
if (dz < local_dzsky) {
|
|
174
|
+
local_dzsky = dz;
|
|
175
|
+
local_hbest = h;
|
|
176
|
+
local_nbest = i;
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
// Critical section to update dzsky with the best change
|
|
181
|
+
#pragma omp critical
|
|
182
|
+
{
|
|
183
|
+
if (local_dzsky < dzsky) {
|
|
184
|
+
dzsky = local_dzsky;
|
|
185
|
+
hbest = local_hbest;
|
|
186
|
+
nbest = local_nbest;
|
|
187
|
+
}
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
// If there was an improvement in the total cost, update the centroids
|
|
192
|
+
if (dzsky < 0) {
|
|
193
|
+
for (k = 0; k < nclusters; k++) {
|
|
194
|
+
if (ptr_centroids[k] == nbest) {
|
|
195
|
+
ptr_centroids[k] = hbest; // Swap the medoids
|
|
196
|
+
}
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
total += dzsky; // Update the total cost
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
ipass++; // Increment pass count
|
|
203
|
+
if (ipass >= npass) {
|
|
204
|
+
break; // Break if max passes reached
|
|
205
|
+
}
|
|
206
|
+
} while (dzsky < 0); // Repeat until no improvement
|
|
207
|
+
|
|
208
|
+
return getResultArray(); // Return the final cluster assignments
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
// Returns an array of cluster assignments for each element
|
|
212
|
+
py::array_t<int> getResultArray() const {
|
|
213
|
+
py::array_t<int> result(nelements);
|
|
214
|
+
auto results = result.mutable_unchecked<1>();
|
|
215
|
+
auto centroid = centroids.unchecked<1>();
|
|
216
|
+
|
|
217
|
+
#pragma omp parallel for schedule(static)
|
|
218
|
+
for (int i = 0; i < nelements; ++i) {
|
|
219
|
+
results(i) = centroid(tclusterid[i]);
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
return result;
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
protected:
|
|
227
|
+
int nelements; // Number of elements to cluster
|
|
228
|
+
py::array_t<double> diss; // Distance matrix
|
|
229
|
+
py::array_t<int> centroids; // Initial centroids
|
|
230
|
+
int npass; // Number of passes for the algorithm
|
|
231
|
+
py::array_t<double> weights; // Element weights
|
|
232
|
+
vector<int> tclusterid; // Cluster IDs for each element
|
|
233
|
+
vector<double> dysmb; // Temporary variable for distances
|
|
234
|
+
int nclusters; // Number of clusters
|
|
235
|
+
double maxdist; // Maximum distance value
|
|
236
|
+
vector<double> dysma; // Temporary variable for distances
|
|
237
|
+
};
|