sequenzo 0.1.24__cp311-cp311-macosx_10_9_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sequenzo might be problematic. Click here for more details.

Files changed (264) hide show
  1. _sequenzo_fastcluster.cpython-311-darwin.so +0 -0
  2. sequenzo/__init__.py +240 -0
  3. sequenzo/big_data/__init__.py +12 -0
  4. sequenzo/big_data/clara/__init__.py +26 -0
  5. sequenzo/big_data/clara/clara.py +474 -0
  6. sequenzo/big_data/clara/utils/__init__.py +27 -0
  7. sequenzo/big_data/clara/utils/aggregatecases.py +92 -0
  8. sequenzo/big_data/clara/utils/davies_bouldin.py +91 -0
  9. sequenzo/big_data/clara/utils/get_weighted_diss.cpython-311-darwin.so +0 -0
  10. sequenzo/big_data/clara/utils/wfcmdd.py +205 -0
  11. sequenzo/big_data/clara/visualization.py +88 -0
  12. sequenzo/clustering/KMedoids.py +178 -0
  13. sequenzo/clustering/__init__.py +30 -0
  14. sequenzo/clustering/clustering_c_code.cpython-311-darwin.so +0 -0
  15. sequenzo/clustering/hierarchical_clustering.py +1256 -0
  16. sequenzo/clustering/sequenzo_fastcluster/fastcluster.py +495 -0
  17. sequenzo/clustering/sequenzo_fastcluster/src/fastcluster.cpp +1877 -0
  18. sequenzo/clustering/sequenzo_fastcluster/src/fastcluster_python.cpp +1264 -0
  19. sequenzo/clustering/src/KMedoid.cpp +263 -0
  20. sequenzo/clustering/src/PAM.cpp +237 -0
  21. sequenzo/clustering/src/PAMonce.cpp +265 -0
  22. sequenzo/clustering/src/cluster_quality.cpp +496 -0
  23. sequenzo/clustering/src/cluster_quality.h +128 -0
  24. sequenzo/clustering/src/cluster_quality_backup.cpp +570 -0
  25. sequenzo/clustering/src/module.cpp +228 -0
  26. sequenzo/clustering/src/weightedinertia.cpp +111 -0
  27. sequenzo/clustering/utils/__init__.py +27 -0
  28. sequenzo/clustering/utils/disscenter.py +122 -0
  29. sequenzo/data_preprocessing/__init__.py +20 -0
  30. sequenzo/data_preprocessing/helpers.py +256 -0
  31. sequenzo/datasets/__init__.py +41 -0
  32. sequenzo/datasets/biofam.csv +2001 -0
  33. sequenzo/datasets/biofam_child_domain.csv +2001 -0
  34. sequenzo/datasets/biofam_left_domain.csv +2001 -0
  35. sequenzo/datasets/biofam_married_domain.csv +2001 -0
  36. sequenzo/datasets/chinese_colonial_territories.csv +12 -0
  37. sequenzo/datasets/country_co2_emissions.csv +194 -0
  38. sequenzo/datasets/country_co2_emissions_global_deciles.csv +195 -0
  39. sequenzo/datasets/country_co2_emissions_global_quintiles.csv +195 -0
  40. sequenzo/datasets/country_co2_emissions_local_deciles.csv +195 -0
  41. sequenzo/datasets/country_co2_emissions_local_quintiles.csv +195 -0
  42. sequenzo/datasets/country_gdp_per_capita.csv +194 -0
  43. sequenzo/datasets/mvad.csv +713 -0
  44. sequenzo/datasets/pairfam_family.csv +1867 -0
  45. sequenzo/datasets/polyadic_samplec1.csv +61 -0
  46. sequenzo/datasets/polyadic_samplep1.csv +61 -0
  47. sequenzo/datasets/polyadic_seqc1.csv +61 -0
  48. sequenzo/datasets/polyadic_seqp1.csv +61 -0
  49. sequenzo/define_sequence_data.py +609 -0
  50. sequenzo/dissimilarity_measures/__init__.py +31 -0
  51. sequenzo/dissimilarity_measures/c_code.cpython-311-darwin.so +0 -0
  52. sequenzo/dissimilarity_measures/get_distance_matrix.py +702 -0
  53. sequenzo/dissimilarity_measures/get_substitution_cost_matrix.py +241 -0
  54. sequenzo/dissimilarity_measures/src/DHDdistance.cpp +148 -0
  55. sequenzo/dissimilarity_measures/src/LCPdistance.cpp +114 -0
  56. sequenzo/dissimilarity_measures/src/OMdistance.cpp +247 -0
  57. sequenzo/dissimilarity_measures/src/OMspellDistance.cpp +281 -0
  58. sequenzo/dissimilarity_measures/src/__init__.py +0 -0
  59. sequenzo/dissimilarity_measures/src/dist2matrix.cpp +63 -0
  60. sequenzo/dissimilarity_measures/src/dp_utils.h +160 -0
  61. sequenzo/dissimilarity_measures/src/module.cpp +34 -0
  62. sequenzo/dissimilarity_measures/src/setup.py +30 -0
  63. sequenzo/dissimilarity_measures/src/utils.h +25 -0
  64. sequenzo/dissimilarity_measures/src/xsimd/.github/cmake-test/main.cpp +6 -0
  65. sequenzo/dissimilarity_measures/src/xsimd/benchmark/main.cpp +159 -0
  66. sequenzo/dissimilarity_measures/src/xsimd/benchmark/xsimd_benchmark.hpp +565 -0
  67. sequenzo/dissimilarity_measures/src/xsimd/docs/source/conf.py +37 -0
  68. sequenzo/dissimilarity_measures/src/xsimd/examples/mandelbrot.cpp +330 -0
  69. sequenzo/dissimilarity_measures/src/xsimd/examples/pico_bench.hpp +246 -0
  70. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_arithmetic.hpp +266 -0
  71. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_complex.hpp +112 -0
  72. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_details.hpp +323 -0
  73. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_logical.hpp +218 -0
  74. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_math.hpp +2583 -0
  75. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_memory.hpp +880 -0
  76. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_rounding.hpp +72 -0
  77. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_swizzle.hpp +174 -0
  78. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_trigo.hpp +978 -0
  79. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx.hpp +1924 -0
  80. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx2.hpp +1144 -0
  81. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512bw.hpp +656 -0
  82. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512cd.hpp +28 -0
  83. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512dq.hpp +244 -0
  84. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512er.hpp +20 -0
  85. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512f.hpp +2650 -0
  86. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512ifma.hpp +20 -0
  87. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512pf.hpp +20 -0
  88. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vbmi.hpp +77 -0
  89. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vbmi2.hpp +131 -0
  90. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vnni_avx512bw.hpp +20 -0
  91. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vnni_avx512vbmi2.hpp +20 -0
  92. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avxvnni.hpp +20 -0
  93. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_common.hpp +24 -0
  94. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_common_fwd.hpp +77 -0
  95. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_constants.hpp +393 -0
  96. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_emulated.hpp +788 -0
  97. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma3_avx.hpp +93 -0
  98. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma3_avx2.hpp +46 -0
  99. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma3_sse.hpp +97 -0
  100. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma4.hpp +92 -0
  101. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_i8mm_neon64.hpp +17 -0
  102. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_isa.hpp +142 -0
  103. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_neon.hpp +3142 -0
  104. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_neon64.hpp +1543 -0
  105. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_rvv.hpp +1513 -0
  106. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_scalar.hpp +1260 -0
  107. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse2.hpp +2024 -0
  108. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse3.hpp +67 -0
  109. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse4_1.hpp +339 -0
  110. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse4_2.hpp +44 -0
  111. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_ssse3.hpp +186 -0
  112. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sve.hpp +1155 -0
  113. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_vsx.hpp +892 -0
  114. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_wasm.hpp +1780 -0
  115. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_arch.hpp +240 -0
  116. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_config.hpp +484 -0
  117. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_cpuid.hpp +269 -0
  118. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_inline.hpp +27 -0
  119. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/math/xsimd_rem_pio2.hpp +719 -0
  120. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/memory/xsimd_aligned_allocator.hpp +349 -0
  121. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/memory/xsimd_alignment.hpp +91 -0
  122. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_all_registers.hpp +55 -0
  123. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_api.hpp +2765 -0
  124. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx2_register.hpp +44 -0
  125. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512bw_register.hpp +51 -0
  126. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512cd_register.hpp +51 -0
  127. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512dq_register.hpp +51 -0
  128. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512er_register.hpp +51 -0
  129. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512f_register.hpp +77 -0
  130. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512ifma_register.hpp +51 -0
  131. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512pf_register.hpp +51 -0
  132. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512vbmi2_register.hpp +51 -0
  133. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512vbmi_register.hpp +51 -0
  134. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512vnni_avx512bw_register.hpp +54 -0
  135. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512vnni_avx512vbmi2_register.hpp +53 -0
  136. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx_register.hpp +64 -0
  137. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avxvnni_register.hpp +44 -0
  138. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_batch.hpp +1524 -0
  139. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_batch_constant.hpp +300 -0
  140. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_common_arch.hpp +47 -0
  141. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_emulated_register.hpp +80 -0
  142. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_fma3_avx2_register.hpp +50 -0
  143. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_fma3_avx_register.hpp +50 -0
  144. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_fma3_sse_register.hpp +50 -0
  145. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_fma4_register.hpp +50 -0
  146. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_i8mm_neon64_register.hpp +55 -0
  147. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_neon64_register.hpp +55 -0
  148. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_neon_register.hpp +154 -0
  149. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_register.hpp +94 -0
  150. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_rvv_register.hpp +506 -0
  151. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sse2_register.hpp +59 -0
  152. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sse3_register.hpp +49 -0
  153. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sse4_1_register.hpp +48 -0
  154. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sse4_2_register.hpp +48 -0
  155. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_ssse3_register.hpp +48 -0
  156. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sve_register.hpp +156 -0
  157. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_traits.hpp +337 -0
  158. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_utils.hpp +536 -0
  159. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_vsx_register.hpp +77 -0
  160. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_wasm_register.hpp +59 -0
  161. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/xsimd.hpp +75 -0
  162. sequenzo/dissimilarity_measures/src/xsimd/test/architectures/dummy.cpp +7 -0
  163. sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set.cpp +13 -0
  164. sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set_mean.cpp +24 -0
  165. sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set_mean_aligned.cpp +25 -0
  166. sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set_mean_arch_independent.cpp +28 -0
  167. sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set_mean_tag_dispatch.cpp +25 -0
  168. sequenzo/dissimilarity_measures/src/xsimd/test/doc/manipulating_abstract_batches.cpp +7 -0
  169. sequenzo/dissimilarity_measures/src/xsimd/test/doc/manipulating_parametric_batches.cpp +8 -0
  170. sequenzo/dissimilarity_measures/src/xsimd/test/doc/sum.hpp +31 -0
  171. sequenzo/dissimilarity_measures/src/xsimd/test/doc/sum_avx2.cpp +3 -0
  172. sequenzo/dissimilarity_measures/src/xsimd/test/doc/sum_sse2.cpp +3 -0
  173. sequenzo/dissimilarity_measures/src/xsimd/test/doc/writing_vectorized_code.cpp +11 -0
  174. sequenzo/dissimilarity_measures/src/xsimd/test/main.cpp +31 -0
  175. sequenzo/dissimilarity_measures/src/xsimd/test/test_api.cpp +230 -0
  176. sequenzo/dissimilarity_measures/src/xsimd/test/test_arch.cpp +217 -0
  177. sequenzo/dissimilarity_measures/src/xsimd/test/test_basic_math.cpp +183 -0
  178. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch.cpp +1049 -0
  179. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_bool.cpp +508 -0
  180. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_cast.cpp +409 -0
  181. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_complex.cpp +712 -0
  182. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_constant.cpp +286 -0
  183. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_float.cpp +141 -0
  184. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_int.cpp +365 -0
  185. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_manip.cpp +308 -0
  186. sequenzo/dissimilarity_measures/src/xsimd/test/test_bitwise_cast.cpp +222 -0
  187. sequenzo/dissimilarity_measures/src/xsimd/test/test_complex_exponential.cpp +226 -0
  188. sequenzo/dissimilarity_measures/src/xsimd/test/test_complex_hyperbolic.cpp +183 -0
  189. sequenzo/dissimilarity_measures/src/xsimd/test/test_complex_power.cpp +265 -0
  190. sequenzo/dissimilarity_measures/src/xsimd/test/test_complex_trigonometric.cpp +236 -0
  191. sequenzo/dissimilarity_measures/src/xsimd/test/test_conversion.cpp +248 -0
  192. sequenzo/dissimilarity_measures/src/xsimd/test/test_custom_default_arch.cpp +28 -0
  193. sequenzo/dissimilarity_measures/src/xsimd/test/test_error_gamma.cpp +170 -0
  194. sequenzo/dissimilarity_measures/src/xsimd/test/test_explicit_batch_instantiation.cpp +32 -0
  195. sequenzo/dissimilarity_measures/src/xsimd/test/test_exponential.cpp +202 -0
  196. sequenzo/dissimilarity_measures/src/xsimd/test/test_extract_pair.cpp +92 -0
  197. sequenzo/dissimilarity_measures/src/xsimd/test/test_fp_manipulation.cpp +77 -0
  198. sequenzo/dissimilarity_measures/src/xsimd/test/test_gnu_source.cpp +30 -0
  199. sequenzo/dissimilarity_measures/src/xsimd/test/test_hyperbolic.cpp +167 -0
  200. sequenzo/dissimilarity_measures/src/xsimd/test/test_load_store.cpp +304 -0
  201. sequenzo/dissimilarity_measures/src/xsimd/test/test_memory.cpp +61 -0
  202. sequenzo/dissimilarity_measures/src/xsimd/test/test_poly_evaluation.cpp +64 -0
  203. sequenzo/dissimilarity_measures/src/xsimd/test/test_power.cpp +184 -0
  204. sequenzo/dissimilarity_measures/src/xsimd/test/test_rounding.cpp +199 -0
  205. sequenzo/dissimilarity_measures/src/xsimd/test/test_select.cpp +101 -0
  206. sequenzo/dissimilarity_measures/src/xsimd/test/test_shuffle.cpp +760 -0
  207. sequenzo/dissimilarity_measures/src/xsimd/test/test_sum.cpp +4 -0
  208. sequenzo/dissimilarity_measures/src/xsimd/test/test_sum.hpp +34 -0
  209. sequenzo/dissimilarity_measures/src/xsimd/test/test_traits.cpp +172 -0
  210. sequenzo/dissimilarity_measures/src/xsimd/test/test_trigonometric.cpp +208 -0
  211. sequenzo/dissimilarity_measures/src/xsimd/test/test_utils.hpp +611 -0
  212. sequenzo/dissimilarity_measures/src/xsimd/test/test_wasm/test_wasm_playwright.py +123 -0
  213. sequenzo/dissimilarity_measures/src/xsimd/test/test_xsimd_api.cpp +1460 -0
  214. sequenzo/dissimilarity_measures/utils/__init__.py +16 -0
  215. sequenzo/dissimilarity_measures/utils/get_LCP_length_for_2_seq.py +44 -0
  216. sequenzo/dissimilarity_measures/utils/get_sm_trate_substitution_cost_matrix.cpython-311-darwin.so +0 -0
  217. sequenzo/dissimilarity_measures/utils/seqconc.cpython-311-darwin.so +0 -0
  218. sequenzo/dissimilarity_measures/utils/seqdss.cpython-311-darwin.so +0 -0
  219. sequenzo/dissimilarity_measures/utils/seqdur.cpython-311-darwin.so +0 -0
  220. sequenzo/dissimilarity_measures/utils/seqlength.cpython-311-darwin.so +0 -0
  221. sequenzo/multidomain/__init__.py +23 -0
  222. sequenzo/multidomain/association_between_domains.py +311 -0
  223. sequenzo/multidomain/cat.py +431 -0
  224. sequenzo/multidomain/combt.py +519 -0
  225. sequenzo/multidomain/dat.py +89 -0
  226. sequenzo/multidomain/idcd.py +139 -0
  227. sequenzo/multidomain/linked_polyad.py +292 -0
  228. sequenzo/openmp_setup.py +233 -0
  229. sequenzo/prefix_tree/__init__.py +43 -0
  230. sequenzo/prefix_tree/individual_level_indicators.py +1274 -0
  231. sequenzo/prefix_tree/system_level_indicators.py +465 -0
  232. sequenzo/prefix_tree/utils.py +54 -0
  233. sequenzo/sequence_characteristics/__init__.py +40 -0
  234. sequenzo/sequence_characteristics/complexity_index.py +49 -0
  235. sequenzo/sequence_characteristics/overall_cross_sectional_entropy.py +220 -0
  236. sequenzo/sequence_characteristics/plot_characteristics.py +593 -0
  237. sequenzo/sequence_characteristics/simple_characteristics.py +311 -0
  238. sequenzo/sequence_characteristics/state_frequencies_and_entropy_per_sequence.py +39 -0
  239. sequenzo/sequence_characteristics/turbulence.py +155 -0
  240. sequenzo/sequence_characteristics/variance_of_spell_durations.py +86 -0
  241. sequenzo/sequence_characteristics/within_sequence_entropy.py +43 -0
  242. sequenzo/suffix_tree/__init__.py +48 -0
  243. sequenzo/suffix_tree/individual_level_indicators.py +1638 -0
  244. sequenzo/suffix_tree/system_level_indicators.py +456 -0
  245. sequenzo/suffix_tree/utils.py +56 -0
  246. sequenzo/visualization/__init__.py +29 -0
  247. sequenzo/visualization/plot_mean_time.py +194 -0
  248. sequenzo/visualization/plot_modal_state.py +276 -0
  249. sequenzo/visualization/plot_most_frequent_sequences.py +147 -0
  250. sequenzo/visualization/plot_relative_frequency.py +404 -0
  251. sequenzo/visualization/plot_sequence_index.py +951 -0
  252. sequenzo/visualization/plot_single_medoid.py +153 -0
  253. sequenzo/visualization/plot_state_distribution.py +627 -0
  254. sequenzo/visualization/plot_transition_matrix.py +190 -0
  255. sequenzo/visualization/utils/__init__.py +23 -0
  256. sequenzo/visualization/utils/utils.py +310 -0
  257. sequenzo/with_event_history_analysis/__init__.py +35 -0
  258. sequenzo/with_event_history_analysis/sequence_analysis_multi_state_model.py +850 -0
  259. sequenzo/with_event_history_analysis/sequence_history_analysis.py +283 -0
  260. sequenzo-0.1.24.dist-info/METADATA +255 -0
  261. sequenzo-0.1.24.dist-info/RECORD +264 -0
  262. sequenzo-0.1.24.dist-info/WHEEL +5 -0
  263. sequenzo-0.1.24.dist-info/licenses/LICENSE +28 -0
  264. sequenzo-0.1.24.dist-info/top_level.txt +2 -0
@@ -0,0 +1,495 @@
1
+ # -*- coding: utf-8 -*-
2
+ __doc__ = """Fast hierarchical clustering routines for R and Python
3
+
4
+ Copyright:
5
+ Until package version 1.1.23: © 2011 Daniel Müllner <https://danifold.net>
6
+ All changes from version 1.1.24 on: © Google Inc. <https://www.google.com>
7
+
8
+ This module provides fast hierarchical clustering routines. The "linkage"
9
+ method is designed to provide a replacement for the “linkage” function and
10
+ its siblings in the scipy.cluster.hierarchy module. You may use the methods
11
+ in this module with the same syntax as the corresponding SciPy functions but
12
+ with the benefit of much faster performance.
13
+
14
+ The method "linkage_vector" performs clustering of vector data with memory-
15
+ saving algorithms.
16
+
17
+ Refer to the User's manual "fastcluster.pdf" for comprehensive details. It
18
+ is located in the directory inst/doc/ in the source distribution and may
19
+ also be obtained at <https://danifold.net/fastcluster.html>.
20
+ """
21
+
22
+ __all__ = ['single', 'complete', 'average', 'weighted', 'ward', 'centroid', 'median', 'linkage', 'linkage_vector']
23
+ __version_info__ = ('1', '3', '0')
24
+ __version__ = '.'.join(__version_info__)
25
+
26
+ from numpy import double, empty, array, ndarray, var, cov, dot, expand_dims, \
27
+ ceil, sqrt
28
+ from numpy.linalg import inv
29
+ try:
30
+ from scipy.spatial.distance import pdist
31
+ except ImportError:
32
+ def pdist(*args, **kwargs):
33
+ raise ImportError('The fastcluster.linkage function cannot process '
34
+ 'vector data since the function '
35
+ 'scipy.spatial.distance.pdist could not be '
36
+ 'imported.')
37
+ # from _fastcluster import linkage_wrap, linkage_vector_wrap
38
+ from _sequenzo_fastcluster import linkage_wrap, linkage_vector_wrap
39
+
40
+ def single(D):
41
+ '''Single linkage clustering (alias). See the help on the “linkage”
42
+ function for further information.'''
43
+ return linkage(D, method='single')
44
+
45
+ def complete(D):
46
+ '''Complete linkage clustering (alias). See the help on the “linkage”
47
+ function for further information.'''
48
+ return linkage(D, method='complete')
49
+
50
+ def average(D):
51
+ '''Hierarchical clustering with the “average” distance update formula
52
+ (alias). See the help on the “linkage” function for further information.'''
53
+ return linkage(D, method='average')
54
+
55
+ def weighted(D):
56
+ '''Hierarchical clustering with the “weighted” distance update formula
57
+ (alias). See the help on the “linkage” function for further information.'''
58
+ return linkage(D, method='weighted')
59
+
60
+ def ward(D):
61
+ '''Hierarchical clustering with the “Ward” distance update formula
62
+ (alias). See the help on the “linkage” function for further information.'''
63
+ return linkage(D, method='ward')
64
+
65
+ def ward_d2(D):
66
+ return linkage(D, method='ward_d2')
67
+
68
+ def centroid(D):
69
+ '''Hierarchical clustering with the “centroid” distance update formula
70
+ (alias). See the help on the “linkage” function for further information.'''
71
+ return linkage(D, method='centroid')
72
+
73
+ def median(D):
74
+ '''Hierarchical clustering with the “median” distance update formula
75
+ (alias). See the help on the “linkage” function for further information.'''
76
+ return linkage(D, method='median')
77
+
78
+ # This dictionary must agree with the enum method_codes in fastcluster.cpp.
79
+ mthidx = {'single' : 0,
80
+ 'complete' : 1,
81
+ 'average' : 2,
82
+ 'weighted' : 3,
83
+ 'ward' : 4,
84
+ 'ward_d2' : 7,
85
+ 'centroid' : 5,
86
+ 'median' : 6 }
87
+
88
+ def linkage(X, method='single', metric='euclidean', preserve_input=True):
89
+ r'''Hierarchical, agglomerative clustering on a dissimilarity matrix or on
90
+ Euclidean data.
91
+
92
+ Apart from the argument 'preserve_input', the method has the same input
93
+ parameters and output format as the functions of the same name in the
94
+ module scipy.cluster.hierarchy.
95
+
96
+ The argument X is preferably a NumPy array with floating point entries
97
+ (X.dtype==numpy.double). Any other data format will be converted before
98
+ it is processed.
99
+
100
+ If X is a one-dimensional array, it is considered a condensed matrix of
101
+ pairwise dissimilarities in the format which is returned by
102
+ scipy.spatial.distance.pdist. It contains the flattened, upper-
103
+ triangular part of a pairwise dissimilarity matrix. That is, if there
104
+ are N data points and the matrix d contains the dissimilarity between
105
+ the i-th and j-th observation at position d(i,j), the vector X has
106
+ length N(N-1)/2 and is ordered as follows:
107
+
108
+ [ d(0,1), d(0,2), ..., d(0,n-1), d(1,2), ..., d(1,n-1), ...,
109
+ d(n-2,n-1) ]
110
+
111
+ The 'metric' argument is ignored in case of dissimilarity input.
112
+
113
+ The optional argument 'preserve_input' specifies whether the method
114
+ makes a working copy of the dissimilarity vector or writes temporary
115
+ data into the existing array. If the dissimilarities are generated for
116
+ the clustering step only and are not needed afterward, approximately
117
+ half the memory can be saved by specifying 'preserve_input=False'. Note
118
+ that the input array X contains unspecified values after this procedure.
119
+ It is therefore safer to write
120
+
121
+ linkage(X, method="...", preserve_input=False)
122
+ del X
123
+
124
+ to make sure that the matrix X is not accessed accidentally after it has
125
+ been used as scratch memory. (The single linkage algorithm does not
126
+ write to the distance matrix or its copy anyway, so the 'preserve_input'
127
+ flag has no effect in this case.)
128
+
129
+ If X contains vector data, it must be a two-dimensional array with N
130
+ observations in D dimensions as an (N×D) array. The preserve_input
131
+ argument is ignored in this case. The specified metric is used to
132
+ generate pairwise distances from the input. The following two function
133
+ calls yield the same output:
134
+
135
+ linkage(pdist(X, metric), method="...", preserve_input=False)
136
+ linkage(X, metric=metric, method="...")
137
+
138
+ The general scheme of the agglomerative clustering procedure is as
139
+ follows:
140
+
141
+ 1. Start with N singleton clusters (nodes) labeled 0,...,N−1, which
142
+ represent the input points.
143
+ 2. Find a pair of nodes with minimal distance among all pairwise
144
+ distances.
145
+ 3. Join the two nodes into a new node and remove the two old nodes.
146
+ The new nodes are labeled consecutively N, N+1, ...
147
+ 4. The distances from the new node to all other nodes is determined by
148
+ the method parameter (see below).
149
+ 5. Repeat N−1 times from step 2, until there is one big node, which
150
+ contains all original input points.
151
+
152
+ The output of linkage is stepwise dendrogram, which is represented as an
153
+ (N−1)×4 NumPy array with floating point entries (dtype=numpy.double).
154
+ The first two columns contain the node indices which are joined in each
155
+ step. The input nodes are labeled 0,...,N−1, and the newly generated
156
+ nodes have the labels N,...,2N−2. The third column contains the distance
157
+ between the two nodes at each step, ie. the current minimal distance at
158
+ the time of the merge. The fourth column counts the number of points
159
+ which comprise each new node.
160
+
161
+ The parameter method specifies which clustering scheme to use. The
162
+ clustering scheme determines the distance from a new node to the other
163
+ nodes. Denote the dissimilarities by d, the nodes to be joined by I, J,
164
+ the new node by K and any other node by L. The symbol |I| denotes the
165
+ size of the cluster I.
166
+
167
+ method='single': d(K,L) = min(d(I,L), d(J,L))
168
+
169
+ The distance between two clusters A, B is the closest distance between
170
+ any two points in each cluster:
171
+
172
+ d(A,B) = min{ d(a,b) | a∈A, b∈B }
173
+
174
+ method='complete': d(K,L) = max(d(I,L), d(J,L))
175
+
176
+ The distance between two clusters A, B is the maximal distance between
177
+ any two points in each cluster:
178
+
179
+ d(A,B) = max{ d(a,b) | a∈A, b∈B }
180
+
181
+ method='average': d(K,L) = ( |I|·d(I,L) + |J|·d(J,L) ) / (|I|+|J|)
182
+
183
+ The distance between two clusters A, B is the average distance between
184
+ the points in the two clusters:
185
+
186
+ d(A,B) = (|A|·|B|)^(-1) · \sum { d(a,b) | a∈A, b∈B }
187
+
188
+ method='weighted': d(K,L) = (d(I,L)+d(J,L))/2
189
+
190
+ There is no global description for the distance between clusters since
191
+ the distance depends on the order of the merging steps.
192
+
193
+ The following three methods are intended for Euclidean data only, ie.
194
+ when X contains the pairwise (non-squared!) distances between vectors in
195
+ Euclidean space. The algorithm will work on any input, however, and it
196
+ is up to the user to make sure that applying the methods makes sense.
197
+
198
+ method='centroid': d(K,L) = ( (|I|·d(I,L) + |J|·d(J,L)) / (|I|+|J|)
199
+ − |I|·|J|·d(I,J)/(|I|+|J|)^2 )^(1/2)
200
+
201
+ There is a geometric interpretation: d(A,B) is the distance between
202
+ the centroids (ie. barycenters) of the clusters in Euclidean space:
203
+
204
+ d(A,B) = ‖c_A−c_B∥,
205
+
206
+ where c_A denotes the centroid of the points in cluster A.
207
+
208
+ method='median': d(K,L) = ( d(I,L)/2 + d(J,L)/2 − d(I,J)/4 )^(1/2)
209
+
210
+ Define the midpoint w_K of a cluster K iteratively as w_K=k if K={k}
211
+ is a singleton and as the midpoint (w_I+w_J)/2 if K is formed by
212
+ joining I and J. Then we have
213
+
214
+ d(A,B) = ∥w_A−w_B∥
215
+
216
+ in Euclidean space for all nodes A,B. Notice however that this
217
+ distance depends on the order of the merging steps.
218
+
219
+ method='ward': d(K,L) = ( ((|I|+|L)d(I,L) + (|J|+|L|)d(J,L) − |L|d(I,J))
220
+ / (|I|+|J|+|L|) )^(1/2)
221
+
222
+ The global cluster dissimilarity can be expressed as
223
+
224
+ d(A,B) = ( 2|A|·|B|/(|A|+|B|) )^(1/2) · ‖c_A−c_B∥,
225
+
226
+ where c_A again denotes the centroid of the points in cluster A.
227
+
228
+ The clustering algorithm handles infinite values correctly, as long as the
229
+ chosen distance update formula makes sense. If a NaN value occurs, either
230
+ in the original dissimilarities or as an updated dissimilarity, an error is
231
+ raised.
232
+
233
+ The linkage method does not treat NumPy's masked arrays as special
234
+ and simply ignores the mask.'''
235
+ X = array(X, subok=True)
236
+ if X.ndim==1:
237
+ if method=='single':
238
+ preserve_input = False
239
+ X = array(X, dtype=double, copy=True if preserve_input else None,
240
+ order='C', subok=True)
241
+ NN = len(X)
242
+ N = int(ceil(sqrt(NN*2)))
243
+ if (N*(N-1)//2) != NN:
244
+ raise ValueError(r'The length of the condensed distance matrix '
245
+ r'must be (k \choose 2) for k data points!')
246
+ else:
247
+ assert X.ndim==2
248
+ N = len(X)
249
+ X = pdist(X, metric=metric)
250
+ X = array(X, dtype=double, order='C', subok=True)
251
+ Z = empty((N-1,4))
252
+ if N > 1:
253
+ linkage_wrap(N, X, Z, mthidx[method])
254
+ return Z
255
+
256
+ # This dictionary must agree with the enum metric_codes in fastcluster_python.cpp.
257
+ mtridx = {'euclidean' : 0,
258
+ 'minkowski' : 1,
259
+ 'cityblock' : 2,
260
+ 'seuclidean' : 3,
261
+ 'sqeuclidean' : 4,
262
+ 'cosine' : 5,
263
+ 'hamming' : 6,
264
+ 'jaccard' : 7,
265
+ 'chebychev' : 8,
266
+ 'canberra' : 9,
267
+ 'braycurtis' : 10,
268
+ 'mahalanobis' : 11,
269
+ 'yule' : 12,
270
+ 'matching' : 13,
271
+ 'sokalmichener' : 13, # an alias for 'matching'
272
+ 'dice' : 14,
273
+ 'rogerstanimoto' : 15,
274
+ 'russellrao' : 16,
275
+ 'sokalsneath' : 17,
276
+ 'kulsinski' : 18,
277
+ 'USER' : 19,
278
+ }
279
+
280
+ booleanmetrics = ('jaccard', 'yule', 'matching', 'dice', 'kulsinski',
281
+ 'rogerstanimoto', 'sokalmichener', 'russellrao',
282
+ 'sokalsneath', 'kulsinski')
283
+
284
+ def linkage_vector(X, method='single', metric='euclidean', extraarg=None):
285
+ r'''Hierarchical (agglomerative) clustering on Euclidean data.
286
+
287
+ Compared to the 'linkage' method, 'linkage_vector' uses a memory-saving
288
+ algorithm. While the linkage method requires Θ(N^2) memory for
289
+ clustering of N points, this method needs Θ(ND) for N points in R^D,
290
+ which is usually much smaller.
291
+
292
+ The argument X has the same format as before, when X describes vector
293
+ data, ie. it is an (N×D) array. Also the output array has the same
294
+ format. The parameter method must be one of 'single', 'centroid',
295
+ 'median', 'ward', ie. only for these methods there exist memory-saving
296
+ algorithms currently. If 'method', is one of 'centroid', 'median',
297
+ 'ward', the 'metric' must be 'euclidean'.
298
+
299
+ For single linkage clustering, any dissimilarity function may be chosen.
300
+ Basically, every metric which is implemented in the method
301
+ scipy.spatial.distance.pdist is reimplemented here. However, the metrics
302
+ differ in some instances since a number of mistakes and typos (both in
303
+ the code and in the documentation) were corrected in the fastcluster
304
+ package.
305
+
306
+ Therefore, the available metrics with their definitions are listed below
307
+ as a reference. The symbols u and v mostly denote vectors in R^D with
308
+ coordinates u_j and v_j respectively. See below for additional metrics
309
+ for Boolean vectors. Unless otherwise stated, the input array X is
310
+ converted to a floating point array (X.dtype==numpy.double) if it does
311
+ not have already the required data type. Some metrics accept Boolean
312
+ input; in this case this is stated explicitly below.
313
+
314
+ If a NaN value occurs, either in the original dissimilarities or as an
315
+ updated dissimilarity, an error is raised. In principle, the clustering
316
+ algorithm handles infinite values correctly, but the user is advised to
317
+ carefully check the behavior of the metric and distance update formulas
318
+ under these circumstances.
319
+
320
+ The distance formulas combined with the clustering in the
321
+ 'linkage_vector' method do not have specified behavior if the data X
322
+ contains infinite or NaN values. Also, the masks in NumPy’s masked
323
+ arrays are simply ignored.
324
+
325
+ metric='euclidean': Euclidean metric, L_2 norm
326
+
327
+ d(u,v) = ∥u−v∥ = ( \sum_j { (u_j−v_j)^2 } )^(1/2)
328
+
329
+ metric='sqeuclidean': squared Euclidean metric
330
+
331
+ d(u,v) = ∥u−v∥^2 = \sum_j { (u_j−v_j)^2 }
332
+
333
+ metric='seuclidean': standardized Euclidean metric
334
+
335
+ d(u,v) = ( \sum_j { (u_j−v_j)^2 / V_j } )^(1/2)
336
+
337
+ The vector V=(V_0,...,V_{D−1}) is given as the 'extraarg' argument. If
338
+ no 'extraarg' is given, V_j is by default the unbiased sample variance
339
+ of all observations in the j-th coordinate:
340
+
341
+ V_j = Var_i (X(i,j) ) = 1/(N−1) · \sum_i ( X(i,j)^2 − μ(X_j)^2 )
342
+
343
+ (Here, μ(X_j) denotes as usual the mean of X(i,j) over all rows i.)
344
+
345
+ metric='mahalanobis': Mahalanobis distance
346
+
347
+ d(u,v) = ( transpose(u−v) V (u−v) )^(1/2)
348
+
349
+ Here, V=extraarg, a (D×D)-matrix. If V is not specified, the inverse
350
+ of the covariance matrix numpy.linalg.inv(numpy.cov(X, rowvar=False))
351
+ is used.
352
+
353
+ metric='cityblock': the Manhattan distance, L_1 norm
354
+
355
+ d(u,v) = \sum_j |u_j−v_j|
356
+
357
+ metric='chebychev': the supremum norm, L_∞ norm
358
+
359
+ d(u,v) = max_j { |u_j−v_j| }
360
+
361
+ metric='minkowski': the L_p norm
362
+
363
+ d(u,v) = ( \sum_j |u_j−v_j|^p ) ^(1/p)
364
+
365
+ This metric coincides with the cityblock, euclidean and chebychev
366
+ metrics for p=1, p=2 and p=∞ (numpy.inf), respectively. The parameter
367
+ p is given as the 'extraarg' argument.
368
+
369
+ metric='cosine'
370
+
371
+ d(u,v) = 1 − ⟨u,v⟩ / (∥u∥·∥v∥)
372
+ = 1 − (\sum_j u_j·v_j) / ( (\sum u_j^2)(\sum v_j^2) )^(1/2)
373
+
374
+ metric='correlation': This method first mean-centers the rows of X and
375
+ then applies the 'cosine' distance. Equivalently, the correlation
376
+ distance measures 1 − (Pearson’s correlation coefficient).
377
+
378
+ d(u,v) = 1 − ⟨u−μ(u),v−μ(v)⟩ / (∥u−μ(u)∥·∥v−μ(v)∥)
379
+
380
+ metric='canberra'
381
+
382
+ d(u,v) = \sum_j ( |u_j−v_j| / (|u_j|+|v_j|) )
383
+
384
+ Summands with u_j=v_j=0 contribute 0 to the sum.
385
+
386
+ metric='braycurtis'
387
+
388
+ d(u,v) = (\sum_j |u_j-v_j|) / (\sum_j |u_j+v_j|)
389
+
390
+ metric=(user function): The parameter metric may also be a function
391
+ which accepts two NumPy floating point vectors and returns a number.
392
+ Eg. the Euclidean distance could be emulated with
393
+
394
+ fn = lambda u, v: numpy.sqrt(((u-v)*(u-v)).sum())
395
+ linkage_vector(X, method='single', metric=fn)
396
+
397
+ This method, however, is much slower than the build-in function.
398
+
399
+ metric='hamming': The Hamming distance accepts a Boolean array
400
+ (X.dtype==bool) for efficient storage. Any other data type is
401
+ converted to numpy.double.
402
+
403
+ d(u,v) = |{j | u_j≠v_j }|
404
+
405
+ The following metrics are designed for Boolean vectors. The input array
406
+ is converted to the 'bool' data type if it is not Boolean already. Use
407
+ the following abbreviations to count the number of True/False
408
+ combinations:
409
+
410
+ a = |{j | u_j ∧ v_j }|
411
+ b = |{j | u_j ∧ (¬v_j) }|
412
+ c = |{j | (¬u_j) ∧ v_j }|
413
+ d = |{j | (¬u_j) ∧ (¬v_j) }|
414
+
415
+ Recall that D denotes the number of dimensions, hence D=a+b+c+d.
416
+
417
+ metric='jaccard'
418
+
419
+ d(u,v) = (b+c) / (b+c+d)
420
+ d(0,0) = 0
421
+
422
+ metric='yule'
423
+
424
+ d(u,v) = 2bc / (ad+bc) if bc≠0
425
+ d(u,v) = 0 if bc=0
426
+
427
+ metric='dice':
428
+
429
+ d(u,v) = (b+c) / (2a+b+c)
430
+ d(0,0) = 0
431
+
432
+ metric='rogerstanimoto':
433
+
434
+ d(u,v) = 2(b+c) / (b+c+D)
435
+
436
+ metric='russellrao':
437
+
438
+ d(u,v) = (b+c+d) / D
439
+
440
+ metric='sokalsneath':
441
+
442
+ d(u,v) = 2(b+c)/ ( a+2(b+c))
443
+ d(0,0) = 0
444
+
445
+ metric='kulsinski'
446
+
447
+ d(u,v) = (b/(a+b) + c/(a+c)) / 2
448
+
449
+ metric='matching':
450
+
451
+ d(u,v) = (b+c)/D
452
+
453
+ Notice that when given a Boolean array, the 'matching' and 'hamming'
454
+ distance are the same. The 'matching' distance formula, however,
455
+ converts every input to Boolean first. Hence, the vectors (0,1) and
456
+ (0,2) have zero 'matching' distance since they are both converted to
457
+ (False, True) but the Hamming distance is 0.5.
458
+
459
+ metric='sokalmichener' is an alias for 'matching'.'''
460
+ if method=='single':
461
+ assert metric!='USER'
462
+ if metric == 'hamming':
463
+ X = array(X, subok=True)
464
+ dtype = bool if X.dtype==bool else double
465
+ else:
466
+ dtype = bool if metric in booleanmetrics else double
467
+ X = array(X, dtype=dtype, order='C', subok=True)
468
+ else:
469
+ assert metric=='euclidean'
470
+ X = array(X, dtype=double, copy=(True if method=='ward' else None),
471
+ order='C', subok=True)
472
+ assert X.ndim==2
473
+ N = len(X)
474
+ Z = empty((N-1,4))
475
+
476
+ if metric=='seuclidean':
477
+ if extraarg is None:
478
+ extraarg = var(X, axis=0, ddof=1)
479
+ elif metric=='mahalanobis':
480
+ if extraarg is None:
481
+ extraarg = inv(cov(X, rowvar=False))
482
+ # instead of the inverse covariance matrix, pass the matrix product
483
+ # with the data matrix!
484
+ extraarg = array(dot(X,extraarg), dtype=double, order='C', subok=True)
485
+ elif metric=='correlation':
486
+ X = X-expand_dims(X.mean(axis=1),1)
487
+ metric='cosine'
488
+ elif not isinstance(metric, str):
489
+ assert extraarg is None
490
+ metric, extraarg = 'USER', metric
491
+ elif metric!='minkowski':
492
+ assert extraarg is None
493
+ if N > 1:
494
+ linkage_vector_wrap(X, Z, mthidx[method], mtridx[metric], extraarg)
495
+ return Z