sequenzo 0.1.21__cp312-cp312-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sequenzo might be problematic. Click here for more details.

Files changed (260) hide show
  1. sequenzo/__init__.py +240 -0
  2. sequenzo/big_data/__init__.py +12 -0
  3. sequenzo/big_data/clara/__init__.py +26 -0
  4. sequenzo/big_data/clara/clara.py +467 -0
  5. sequenzo/big_data/clara/utils/__init__.py +27 -0
  6. sequenzo/big_data/clara/utils/aggregatecases.py +92 -0
  7. sequenzo/big_data/clara/utils/davies_bouldin.py +91 -0
  8. sequenzo/big_data/clara/utils/get_weighted_diss.cpython-312-darwin.so +0 -0
  9. sequenzo/big_data/clara/utils/wfcmdd.py +205 -0
  10. sequenzo/big_data/clara/visualization.py +88 -0
  11. sequenzo/clustering/KMedoids.py +196 -0
  12. sequenzo/clustering/__init__.py +30 -0
  13. sequenzo/clustering/clustering_c_code.cpython-312-darwin.so +0 -0
  14. sequenzo/clustering/hierarchical_clustering.py +1380 -0
  15. sequenzo/clustering/src/KMedoid.cpp +262 -0
  16. sequenzo/clustering/src/PAM.cpp +236 -0
  17. sequenzo/clustering/src/PAMonce.cpp +234 -0
  18. sequenzo/clustering/src/cluster_quality.cpp +496 -0
  19. sequenzo/clustering/src/cluster_quality.h +128 -0
  20. sequenzo/clustering/src/cluster_quality_backup.cpp +570 -0
  21. sequenzo/clustering/src/module.cpp +228 -0
  22. sequenzo/clustering/src/weightedinertia.cpp +111 -0
  23. sequenzo/clustering/utils/__init__.py +27 -0
  24. sequenzo/clustering/utils/disscenter.py +122 -0
  25. sequenzo/data_preprocessing/__init__.py +20 -0
  26. sequenzo/data_preprocessing/helpers.py +256 -0
  27. sequenzo/datasets/__init__.py +41 -0
  28. sequenzo/datasets/biofam.csv +2001 -0
  29. sequenzo/datasets/biofam_child_domain.csv +2001 -0
  30. sequenzo/datasets/biofam_left_domain.csv +2001 -0
  31. sequenzo/datasets/biofam_married_domain.csv +2001 -0
  32. sequenzo/datasets/chinese_colonial_territories.csv +12 -0
  33. sequenzo/datasets/country_co2_emissions.csv +194 -0
  34. sequenzo/datasets/country_co2_emissions_global_deciles.csv +195 -0
  35. sequenzo/datasets/country_co2_emissions_global_quintiles.csv +195 -0
  36. sequenzo/datasets/country_co2_emissions_local_deciles.csv +195 -0
  37. sequenzo/datasets/country_co2_emissions_local_quintiles.csv +195 -0
  38. sequenzo/datasets/country_gdp_per_capita.csv +194 -0
  39. sequenzo/datasets/mvad.csv +713 -0
  40. sequenzo/datasets/pairfam_family.csv +1867 -0
  41. sequenzo/datasets/polyadic_samplec1.csv +61 -0
  42. sequenzo/datasets/polyadic_samplep1.csv +61 -0
  43. sequenzo/datasets/polyadic_seqc1.csv +61 -0
  44. sequenzo/datasets/polyadic_seqp1.csv +61 -0
  45. sequenzo/define_sequence_data.py +609 -0
  46. sequenzo/dissimilarity_measures/__init__.py +31 -0
  47. sequenzo/dissimilarity_measures/c_code.cpython-312-darwin.so +0 -0
  48. sequenzo/dissimilarity_measures/get_distance_matrix.py +702 -0
  49. sequenzo/dissimilarity_measures/get_substitution_cost_matrix.py +241 -0
  50. sequenzo/dissimilarity_measures/src/DHDdistance.cpp +148 -0
  51. sequenzo/dissimilarity_measures/src/LCPdistance.cpp +114 -0
  52. sequenzo/dissimilarity_measures/src/OMdistance.cpp +247 -0
  53. sequenzo/dissimilarity_measures/src/OMspellDistance.cpp +281 -0
  54. sequenzo/dissimilarity_measures/src/__init__.py +0 -0
  55. sequenzo/dissimilarity_measures/src/dist2matrix.cpp +63 -0
  56. sequenzo/dissimilarity_measures/src/dp_utils.h +160 -0
  57. sequenzo/dissimilarity_measures/src/module.cpp +34 -0
  58. sequenzo/dissimilarity_measures/src/setup.py +30 -0
  59. sequenzo/dissimilarity_measures/src/utils.h +25 -0
  60. sequenzo/dissimilarity_measures/src/xsimd/.github/cmake-test/main.cpp +6 -0
  61. sequenzo/dissimilarity_measures/src/xsimd/benchmark/main.cpp +159 -0
  62. sequenzo/dissimilarity_measures/src/xsimd/benchmark/xsimd_benchmark.hpp +565 -0
  63. sequenzo/dissimilarity_measures/src/xsimd/docs/source/conf.py +37 -0
  64. sequenzo/dissimilarity_measures/src/xsimd/examples/mandelbrot.cpp +330 -0
  65. sequenzo/dissimilarity_measures/src/xsimd/examples/pico_bench.hpp +246 -0
  66. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_arithmetic.hpp +266 -0
  67. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_complex.hpp +112 -0
  68. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_details.hpp +323 -0
  69. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_logical.hpp +218 -0
  70. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_math.hpp +2583 -0
  71. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_memory.hpp +880 -0
  72. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_rounding.hpp +72 -0
  73. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_swizzle.hpp +174 -0
  74. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_trigo.hpp +978 -0
  75. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx.hpp +1924 -0
  76. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx2.hpp +1144 -0
  77. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512bw.hpp +656 -0
  78. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512cd.hpp +28 -0
  79. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512dq.hpp +244 -0
  80. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512er.hpp +20 -0
  81. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512f.hpp +2650 -0
  82. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512ifma.hpp +20 -0
  83. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512pf.hpp +20 -0
  84. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vbmi.hpp +77 -0
  85. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vbmi2.hpp +131 -0
  86. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vnni_avx512bw.hpp +20 -0
  87. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vnni_avx512vbmi2.hpp +20 -0
  88. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avxvnni.hpp +20 -0
  89. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_common.hpp +24 -0
  90. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_common_fwd.hpp +77 -0
  91. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_constants.hpp +393 -0
  92. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_emulated.hpp +788 -0
  93. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma3_avx.hpp +93 -0
  94. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma3_avx2.hpp +46 -0
  95. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma3_sse.hpp +97 -0
  96. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma4.hpp +92 -0
  97. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_i8mm_neon64.hpp +17 -0
  98. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_isa.hpp +142 -0
  99. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_neon.hpp +3142 -0
  100. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_neon64.hpp +1543 -0
  101. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_rvv.hpp +1513 -0
  102. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_scalar.hpp +1260 -0
  103. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse2.hpp +2024 -0
  104. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse3.hpp +67 -0
  105. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse4_1.hpp +339 -0
  106. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse4_2.hpp +44 -0
  107. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_ssse3.hpp +186 -0
  108. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sve.hpp +1155 -0
  109. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_vsx.hpp +892 -0
  110. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_wasm.hpp +1780 -0
  111. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_arch.hpp +240 -0
  112. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_config.hpp +484 -0
  113. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_cpuid.hpp +269 -0
  114. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_inline.hpp +27 -0
  115. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/math/xsimd_rem_pio2.hpp +719 -0
  116. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/memory/xsimd_aligned_allocator.hpp +349 -0
  117. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/memory/xsimd_alignment.hpp +91 -0
  118. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_all_registers.hpp +55 -0
  119. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_api.hpp +2765 -0
  120. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx2_register.hpp +44 -0
  121. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512bw_register.hpp +51 -0
  122. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512cd_register.hpp +51 -0
  123. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512dq_register.hpp +51 -0
  124. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512er_register.hpp +51 -0
  125. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512f_register.hpp +77 -0
  126. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512ifma_register.hpp +51 -0
  127. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512pf_register.hpp +51 -0
  128. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512vbmi2_register.hpp +51 -0
  129. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512vbmi_register.hpp +51 -0
  130. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512vnni_avx512bw_register.hpp +54 -0
  131. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512vnni_avx512vbmi2_register.hpp +53 -0
  132. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx_register.hpp +64 -0
  133. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avxvnni_register.hpp +44 -0
  134. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_batch.hpp +1524 -0
  135. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_batch_constant.hpp +300 -0
  136. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_common_arch.hpp +47 -0
  137. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_emulated_register.hpp +80 -0
  138. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_fma3_avx2_register.hpp +50 -0
  139. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_fma3_avx_register.hpp +50 -0
  140. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_fma3_sse_register.hpp +50 -0
  141. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_fma4_register.hpp +50 -0
  142. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_i8mm_neon64_register.hpp +55 -0
  143. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_neon64_register.hpp +55 -0
  144. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_neon_register.hpp +154 -0
  145. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_register.hpp +94 -0
  146. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_rvv_register.hpp +506 -0
  147. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sse2_register.hpp +59 -0
  148. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sse3_register.hpp +49 -0
  149. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sse4_1_register.hpp +48 -0
  150. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sse4_2_register.hpp +48 -0
  151. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_ssse3_register.hpp +48 -0
  152. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sve_register.hpp +156 -0
  153. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_traits.hpp +337 -0
  154. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_utils.hpp +536 -0
  155. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_vsx_register.hpp +77 -0
  156. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_wasm_register.hpp +59 -0
  157. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/xsimd.hpp +75 -0
  158. sequenzo/dissimilarity_measures/src/xsimd/test/architectures/dummy.cpp +7 -0
  159. sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set.cpp +13 -0
  160. sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set_mean.cpp +24 -0
  161. sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set_mean_aligned.cpp +25 -0
  162. sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set_mean_arch_independent.cpp +28 -0
  163. sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set_mean_tag_dispatch.cpp +25 -0
  164. sequenzo/dissimilarity_measures/src/xsimd/test/doc/manipulating_abstract_batches.cpp +7 -0
  165. sequenzo/dissimilarity_measures/src/xsimd/test/doc/manipulating_parametric_batches.cpp +8 -0
  166. sequenzo/dissimilarity_measures/src/xsimd/test/doc/sum.hpp +31 -0
  167. sequenzo/dissimilarity_measures/src/xsimd/test/doc/sum_avx2.cpp +3 -0
  168. sequenzo/dissimilarity_measures/src/xsimd/test/doc/sum_sse2.cpp +3 -0
  169. sequenzo/dissimilarity_measures/src/xsimd/test/doc/writing_vectorized_code.cpp +11 -0
  170. sequenzo/dissimilarity_measures/src/xsimd/test/main.cpp +31 -0
  171. sequenzo/dissimilarity_measures/src/xsimd/test/test_api.cpp +230 -0
  172. sequenzo/dissimilarity_measures/src/xsimd/test/test_arch.cpp +217 -0
  173. sequenzo/dissimilarity_measures/src/xsimd/test/test_basic_math.cpp +183 -0
  174. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch.cpp +1049 -0
  175. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_bool.cpp +508 -0
  176. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_cast.cpp +409 -0
  177. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_complex.cpp +712 -0
  178. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_constant.cpp +286 -0
  179. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_float.cpp +141 -0
  180. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_int.cpp +365 -0
  181. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_manip.cpp +308 -0
  182. sequenzo/dissimilarity_measures/src/xsimd/test/test_bitwise_cast.cpp +222 -0
  183. sequenzo/dissimilarity_measures/src/xsimd/test/test_complex_exponential.cpp +226 -0
  184. sequenzo/dissimilarity_measures/src/xsimd/test/test_complex_hyperbolic.cpp +183 -0
  185. sequenzo/dissimilarity_measures/src/xsimd/test/test_complex_power.cpp +265 -0
  186. sequenzo/dissimilarity_measures/src/xsimd/test/test_complex_trigonometric.cpp +236 -0
  187. sequenzo/dissimilarity_measures/src/xsimd/test/test_conversion.cpp +248 -0
  188. sequenzo/dissimilarity_measures/src/xsimd/test/test_custom_default_arch.cpp +28 -0
  189. sequenzo/dissimilarity_measures/src/xsimd/test/test_error_gamma.cpp +170 -0
  190. sequenzo/dissimilarity_measures/src/xsimd/test/test_explicit_batch_instantiation.cpp +32 -0
  191. sequenzo/dissimilarity_measures/src/xsimd/test/test_exponential.cpp +202 -0
  192. sequenzo/dissimilarity_measures/src/xsimd/test/test_extract_pair.cpp +92 -0
  193. sequenzo/dissimilarity_measures/src/xsimd/test/test_fp_manipulation.cpp +77 -0
  194. sequenzo/dissimilarity_measures/src/xsimd/test/test_gnu_source.cpp +30 -0
  195. sequenzo/dissimilarity_measures/src/xsimd/test/test_hyperbolic.cpp +167 -0
  196. sequenzo/dissimilarity_measures/src/xsimd/test/test_load_store.cpp +304 -0
  197. sequenzo/dissimilarity_measures/src/xsimd/test/test_memory.cpp +61 -0
  198. sequenzo/dissimilarity_measures/src/xsimd/test/test_poly_evaluation.cpp +64 -0
  199. sequenzo/dissimilarity_measures/src/xsimd/test/test_power.cpp +184 -0
  200. sequenzo/dissimilarity_measures/src/xsimd/test/test_rounding.cpp +199 -0
  201. sequenzo/dissimilarity_measures/src/xsimd/test/test_select.cpp +101 -0
  202. sequenzo/dissimilarity_measures/src/xsimd/test/test_shuffle.cpp +760 -0
  203. sequenzo/dissimilarity_measures/src/xsimd/test/test_sum.cpp +4 -0
  204. sequenzo/dissimilarity_measures/src/xsimd/test/test_sum.hpp +34 -0
  205. sequenzo/dissimilarity_measures/src/xsimd/test/test_traits.cpp +172 -0
  206. sequenzo/dissimilarity_measures/src/xsimd/test/test_trigonometric.cpp +208 -0
  207. sequenzo/dissimilarity_measures/src/xsimd/test/test_utils.hpp +611 -0
  208. sequenzo/dissimilarity_measures/src/xsimd/test/test_wasm/test_wasm_playwright.py +123 -0
  209. sequenzo/dissimilarity_measures/src/xsimd/test/test_xsimd_api.cpp +1460 -0
  210. sequenzo/dissimilarity_measures/utils/__init__.py +16 -0
  211. sequenzo/dissimilarity_measures/utils/get_LCP_length_for_2_seq.py +44 -0
  212. sequenzo/dissimilarity_measures/utils/get_sm_trate_substitution_cost_matrix.cpython-312-darwin.so +0 -0
  213. sequenzo/dissimilarity_measures/utils/seqconc.cpython-312-darwin.so +0 -0
  214. sequenzo/dissimilarity_measures/utils/seqdss.cpython-312-darwin.so +0 -0
  215. sequenzo/dissimilarity_measures/utils/seqdur.cpython-312-darwin.so +0 -0
  216. sequenzo/dissimilarity_measures/utils/seqlength.cpython-312-darwin.so +0 -0
  217. sequenzo/multidomain/__init__.py +23 -0
  218. sequenzo/multidomain/association_between_domains.py +311 -0
  219. sequenzo/multidomain/cat.py +431 -0
  220. sequenzo/multidomain/combt.py +519 -0
  221. sequenzo/multidomain/dat.py +89 -0
  222. sequenzo/multidomain/idcd.py +139 -0
  223. sequenzo/multidomain/linked_polyad.py +292 -0
  224. sequenzo/openmp_setup.py +233 -0
  225. sequenzo/prefix_tree/__init__.py +43 -0
  226. sequenzo/prefix_tree/individual_level_indicators.py +1274 -0
  227. sequenzo/prefix_tree/system_level_indicators.py +465 -0
  228. sequenzo/prefix_tree/utils.py +54 -0
  229. sequenzo/sequence_characteristics/__init__.py +40 -0
  230. sequenzo/sequence_characteristics/complexity_index.py +49 -0
  231. sequenzo/sequence_characteristics/overall_cross_sectional_entropy.py +220 -0
  232. sequenzo/sequence_characteristics/plot_characteristics.py +593 -0
  233. sequenzo/sequence_characteristics/simple_characteristics.py +311 -0
  234. sequenzo/sequence_characteristics/state_frequencies_and_entropy_per_sequence.py +39 -0
  235. sequenzo/sequence_characteristics/turbulence.py +155 -0
  236. sequenzo/sequence_characteristics/variance_of_spell_durations.py +86 -0
  237. sequenzo/sequence_characteristics/within_sequence_entropy.py +43 -0
  238. sequenzo/suffix_tree/__init__.py +48 -0
  239. sequenzo/suffix_tree/individual_level_indicators.py +1638 -0
  240. sequenzo/suffix_tree/system_level_indicators.py +456 -0
  241. sequenzo/suffix_tree/utils.py +56 -0
  242. sequenzo/visualization/__init__.py +29 -0
  243. sequenzo/visualization/plot_mean_time.py +194 -0
  244. sequenzo/visualization/plot_modal_state.py +276 -0
  245. sequenzo/visualization/plot_most_frequent_sequences.py +147 -0
  246. sequenzo/visualization/plot_relative_frequency.py +404 -0
  247. sequenzo/visualization/plot_sequence_index.py +937 -0
  248. sequenzo/visualization/plot_single_medoid.py +153 -0
  249. sequenzo/visualization/plot_state_distribution.py +613 -0
  250. sequenzo/visualization/plot_transition_matrix.py +190 -0
  251. sequenzo/visualization/utils/__init__.py +23 -0
  252. sequenzo/visualization/utils/utils.py +310 -0
  253. sequenzo/with_event_history_analysis/__init__.py +35 -0
  254. sequenzo/with_event_history_analysis/sequence_analysis_multi_state_model.py +850 -0
  255. sequenzo/with_event_history_analysis/sequence_history_analysis.py +283 -0
  256. sequenzo-0.1.21.dist-info/METADATA +308 -0
  257. sequenzo-0.1.21.dist-info/RECORD +254 -0
  258. sequenzo-0.1.21.dist-info/WHEEL +5 -0
  259. sequenzo-0.1.21.dist-info/licenses/LICENSE +28 -0
  260. sequenzo-0.1.21.dist-info/top_level.txt +1 -0
@@ -0,0 +1,1144 @@
1
+ /***************************************************************************
2
+ * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and *
3
+ * Martin Renou *
4
+ * Copyright (c) QuantStack *
5
+ * Copyright (c) Serge Guelton *
6
+ * *
7
+ * Distributed under the terms of the BSD 3-Clause License. *
8
+ * *
9
+ * The full license is in the file LICENSE, distributed with this software. *
10
+ ****************************************************************************/
11
+
12
+ #ifndef XSIMD_AVX2_HPP
13
+ #define XSIMD_AVX2_HPP
14
+
15
+ #include <complex>
16
+ #include <type_traits>
17
+
18
+ #include "../types/xsimd_avx2_register.hpp"
19
+
20
+ #include <limits>
21
+
22
+ namespace xsimd
23
+ {
24
+
25
+ namespace kernel
26
+ {
27
+ using namespace types;
28
+
29
+ // abs
30
+ template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
31
+ XSIMD_INLINE batch<T, A> abs(batch<T, A> const& self, requires_arch<avx2>) noexcept
32
+ {
33
+ if (std::is_signed<T>::value)
34
+ {
35
+ XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
36
+ {
37
+ return _mm256_abs_epi8(self);
38
+ }
39
+ else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
40
+ {
41
+ return _mm256_abs_epi16(self);
42
+ }
43
+ else XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
44
+ {
45
+ return _mm256_abs_epi32(self);
46
+ }
47
+ else
48
+ {
49
+ return abs(self, avx {});
50
+ }
51
+ }
52
+ return self;
53
+ }
54
+
55
+ // add
56
+ template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
57
+ XSIMD_INLINE batch<T, A> add(batch<T, A> const& self, batch<T, A> const& other, requires_arch<avx2>) noexcept
58
+ {
59
+ XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
60
+ {
61
+ return _mm256_add_epi8(self, other);
62
+ }
63
+ else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
64
+ {
65
+ return _mm256_add_epi16(self, other);
66
+ }
67
+ else XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
68
+ {
69
+ return _mm256_add_epi32(self, other);
70
+ }
71
+ else XSIMD_IF_CONSTEXPR(sizeof(T) == 8)
72
+ {
73
+ return _mm256_add_epi64(self, other);
74
+ }
75
+ else
76
+ {
77
+ return add(self, other, avx {});
78
+ }
79
+ }
80
+
81
+ // avgr
82
+ template <class A, class T, class = typename std::enable_if<std::is_unsigned<T>::value, void>::type>
83
+ XSIMD_INLINE batch<T, A> avgr(batch<T, A> const& self, batch<T, A> const& other, requires_arch<avx2>) noexcept
84
+ {
85
+ XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
86
+ {
87
+ return _mm256_avg_epu8(self, other);
88
+ }
89
+ else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
90
+ {
91
+ return _mm256_avg_epu16(self, other);
92
+ }
93
+ else
94
+ {
95
+ return avgr(self, other, common {});
96
+ }
97
+ }
98
+
99
+ // avg
100
+ template <class A, class T, class = typename std::enable_if<std::is_unsigned<T>::value, void>::type>
101
+ XSIMD_INLINE batch<T, A> avg(batch<T, A> const& self, batch<T, A> const& other, requires_arch<avx2>) noexcept
102
+ {
103
+ XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
104
+ {
105
+ auto adj = ((self ^ other) << 7) >> 7;
106
+ return avgr(self, other, A {}) - adj;
107
+ }
108
+ else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
109
+ {
110
+ auto adj = ((self ^ other) << 15) >> 15;
111
+ return avgr(self, other, A {}) - adj;
112
+ }
113
+ else
114
+ {
115
+ return avg(self, other, common {});
116
+ }
117
+ }
118
+
119
+ // bitwise_and
120
+ template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
121
+ XSIMD_INLINE batch<T, A> bitwise_and(batch<T, A> const& self, batch<T, A> const& other, requires_arch<avx2>) noexcept
122
+ {
123
+ return _mm256_and_si256(self, other);
124
+ }
125
+ template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
126
+ XSIMD_INLINE batch_bool<T, A> bitwise_and(batch_bool<T, A> const& self, batch_bool<T, A> const& other, requires_arch<avx2>) noexcept
127
+ {
128
+ return _mm256_and_si256(self, other);
129
+ }
130
+
131
+ // bitwise_andnot
132
+ template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
133
+ XSIMD_INLINE batch<T, A> bitwise_andnot(batch<T, A> const& self, batch<T, A> const& other, requires_arch<avx2>) noexcept
134
+ {
135
+ return _mm256_andnot_si256(other, self);
136
+ }
137
+ template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
138
+ XSIMD_INLINE batch_bool<T, A> bitwise_andnot(batch_bool<T, A> const& self, batch_bool<T, A> const& other, requires_arch<avx2>) noexcept
139
+ {
140
+ return _mm256_andnot_si256(other, self);
141
+ }
142
+
143
+ // bitwise_not
144
+ template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
145
+ XSIMD_INLINE batch<T, A> bitwise_not(batch<T, A> const& self, requires_arch<avx2>) noexcept
146
+ {
147
+ return _mm256_xor_si256(self, _mm256_set1_epi32(-1));
148
+ }
149
+ template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
150
+ XSIMD_INLINE batch_bool<T, A> bitwise_not(batch_bool<T, A> const& self, requires_arch<avx2>) noexcept
151
+ {
152
+ return _mm256_xor_si256(self, _mm256_set1_epi32(-1));
153
+ }
154
+
155
+ // bitwise_lshift
156
+ template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
157
+ XSIMD_INLINE batch<T, A> bitwise_lshift(batch<T, A> const& self, int32_t other, requires_arch<avx2>) noexcept
158
+ {
159
+ XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
160
+ {
161
+ return _mm256_slli_epi16(self, other);
162
+ }
163
+ else XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
164
+ {
165
+ return _mm256_slli_epi32(self, other);
166
+ }
167
+ else XSIMD_IF_CONSTEXPR(sizeof(T) == 8)
168
+ {
169
+ return _mm256_slli_epi64(self, other);
170
+ }
171
+ else
172
+ {
173
+ return bitwise_lshift(self, other, avx {});
174
+ }
175
+ }
176
+
177
+ template <size_t shift, class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
178
+ XSIMD_INLINE batch<T, A> bitwise_lshift(batch<T, A> const& self, requires_arch<avx2>) noexcept
179
+ {
180
+ constexpr auto bits = std::numeric_limits<T>::digits + std::numeric_limits<T>::is_signed;
181
+ static_assert(shift < bits, "Shift must be less than the number of bits in T");
182
+ XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
183
+ {
184
+ return _mm256_slli_epi16(self, shift);
185
+ }
186
+ else XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
187
+ {
188
+ return _mm256_slli_epi32(self, shift);
189
+ }
190
+ else XSIMD_IF_CONSTEXPR(sizeof(T) == 8)
191
+ {
192
+ return _mm256_slli_epi64(self, shift);
193
+ }
194
+ else
195
+ {
196
+ return bitwise_lshift<shift>(self, avx {});
197
+ }
198
+ }
199
+
200
+ template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
201
+ XSIMD_INLINE batch<T, A> bitwise_lshift(batch<T, A> const& self, batch<T, A> const& other, requires_arch<avx2>) noexcept
202
+ {
203
+ XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
204
+ {
205
+ return _mm256_sllv_epi32(self, other);
206
+ }
207
+ else XSIMD_IF_CONSTEXPR(sizeof(T) == 8)
208
+ {
209
+ return _mm256_sllv_epi64(self, other);
210
+ }
211
+ else
212
+ {
213
+ return bitwise_lshift(self, other, avx {});
214
+ }
215
+ }
216
+
217
+ // bitwise_or
218
+ template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
219
+ XSIMD_INLINE batch<T, A> bitwise_or(batch<T, A> const& self, batch<T, A> const& other, requires_arch<avx2>) noexcept
220
+ {
221
+ return _mm256_or_si256(self, other);
222
+ }
223
+ template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
224
+ XSIMD_INLINE batch_bool<T, A> bitwise_or(batch_bool<T, A> const& self, batch_bool<T, A> const& other, requires_arch<avx2>) noexcept
225
+ {
226
+ return _mm256_or_si256(self, other);
227
+ }
228
+
229
+ // bitwise_rshift
230
+ template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
231
+ XSIMD_INLINE batch<T, A> bitwise_rshift(batch<T, A> const& self, int32_t other, requires_arch<avx2>) noexcept
232
+ {
233
+ if (std::is_signed<T>::value)
234
+ {
235
+ XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
236
+ {
237
+ __m256i sign_mask = _mm256_set1_epi16((0xFF00 >> other) & 0x00FF);
238
+ __m256i cmp_is_negative = _mm256_cmpgt_epi8(_mm256_setzero_si256(), self);
239
+ __m256i res = _mm256_srai_epi16(self, other);
240
+ return _mm256_or_si256(
241
+ detail::fwd_to_sse([](__m128i s, __m128i o) noexcept
242
+ { return bitwise_and(batch<T, sse4_2>(s), batch<T, sse4_2>(o), sse4_2 {}); },
243
+ sign_mask, cmp_is_negative),
244
+ _mm256_andnot_si256(sign_mask, res));
245
+ }
246
+ else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
247
+ {
248
+ return _mm256_srai_epi16(self, other);
249
+ }
250
+ else XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
251
+ {
252
+ return _mm256_srai_epi32(self, other);
253
+ }
254
+ else
255
+ {
256
+ return bitwise_rshift(self, other, avx {});
257
+ }
258
+ }
259
+ else
260
+ {
261
+ XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
262
+ {
263
+ return _mm256_srli_epi16(self, other);
264
+ }
265
+ else XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
266
+ {
267
+ return _mm256_srli_epi32(self, other);
268
+ }
269
+ else XSIMD_IF_CONSTEXPR(sizeof(T) == 8)
270
+ {
271
+ return _mm256_srli_epi64(self, other);
272
+ }
273
+ else
274
+ {
275
+ return bitwise_rshift(self, other, avx {});
276
+ }
277
+ }
278
+ }
279
+
280
+ template <size_t shift, class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
281
+ XSIMD_INLINE batch<T, A> bitwise_rshift(batch<T, A> const& self, requires_arch<avx2>) noexcept
282
+ {
283
+ constexpr auto bits = std::numeric_limits<T>::digits + std::numeric_limits<T>::is_signed;
284
+ static_assert(shift < bits, "Shift amount must be less than the number of bits in T");
285
+ if (std::is_signed<T>::value)
286
+ {
287
+ XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
288
+ {
289
+ __m256i sign_mask = _mm256_set1_epi16((0xFF00 >> shift) & 0x00FF);
290
+ __m256i cmp_is_negative = _mm256_cmpgt_epi8(_mm256_setzero_si256(), self);
291
+ __m256i res = _mm256_srai_epi16(self, shift);
292
+ return _mm256_or_si256(
293
+ detail::fwd_to_sse([](__m128i s, __m128i o) noexcept
294
+ { return bitwise_and(batch<T, sse4_2>(s), batch<T, sse4_2>(o), sse4_2 {}); },
295
+ sign_mask, cmp_is_negative),
296
+ _mm256_andnot_si256(sign_mask, res));
297
+ }
298
+ else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
299
+ {
300
+ return _mm256_srai_epi16(self, shift);
301
+ }
302
+ else XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
303
+ {
304
+ return _mm256_srai_epi32(self, shift);
305
+ }
306
+ else
307
+ {
308
+ return bitwise_rshift<shift>(self, avx {});
309
+ }
310
+ }
311
+ else
312
+ {
313
+ XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
314
+ {
315
+ const __m256i byte_mask = _mm256_set1_epi16(0x00FF);
316
+ __m256i u16 = _mm256_and_si256(self, byte_mask);
317
+ __m256i r16 = _mm256_srli_epi16(u16, shift);
318
+ return _mm256_and_si256(r16, byte_mask);
319
+ }
320
+ XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
321
+ {
322
+ return _mm256_srli_epi16(self, shift);
323
+ }
324
+ else XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
325
+ {
326
+ return _mm256_srli_epi32(self, shift);
327
+ }
328
+ else XSIMD_IF_CONSTEXPR(sizeof(T) == 8)
329
+ {
330
+ return _mm256_srli_epi64(self, shift);
331
+ }
332
+ else
333
+ {
334
+ return bitwise_rshift<shift>(self, avx {});
335
+ }
336
+ }
337
+ }
338
+
339
+ template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
340
+ XSIMD_INLINE batch<T, A> bitwise_rshift(batch<T, A> const& self, batch<T, A> const& other, requires_arch<avx2>) noexcept
341
+ {
342
+ if (std::is_signed<T>::value)
343
+ {
344
+ XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
345
+ {
346
+ return _mm256_srav_epi32(self, other);
347
+ }
348
+ else
349
+ {
350
+ return bitwise_rshift(self, other, avx {});
351
+ }
352
+ }
353
+ else
354
+ {
355
+ XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
356
+ {
357
+ return _mm256_srlv_epi32(self, other);
358
+ }
359
+ else XSIMD_IF_CONSTEXPR(sizeof(T) == 8)
360
+ {
361
+ return _mm256_srlv_epi64(self, other);
362
+ }
363
+ else
364
+ {
365
+ return bitwise_rshift(self, other, avx {});
366
+ }
367
+ }
368
+ }
369
+
370
+ // bitwise_xor
371
+ template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
372
+ XSIMD_INLINE batch<T, A> bitwise_xor(batch<T, A> const& self, batch<T, A> const& other, requires_arch<avx2>) noexcept
373
+ {
374
+ return _mm256_xor_si256(self, other);
375
+ }
376
+ template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
377
+ XSIMD_INLINE batch<T, A> bitwise_xor(batch_bool<T, A> const& self, batch_bool<T, A> const& other, requires_arch<avx2>) noexcept
378
+ {
379
+ return _mm256_xor_si256(self, other);
380
+ }
381
+
382
+ // complex_low
383
+ template <class A>
384
+ XSIMD_INLINE batch<double, A> complex_low(batch<std::complex<double>, A> const& self, requires_arch<avx2>) noexcept
385
+ {
386
+ __m256d tmp0 = _mm256_permute4x64_pd(self.real(), _MM_SHUFFLE(3, 1, 1, 0));
387
+ __m256d tmp1 = _mm256_permute4x64_pd(self.imag(), _MM_SHUFFLE(1, 2, 0, 0));
388
+ return _mm256_blend_pd(tmp0, tmp1, 10);
389
+ }
390
+
391
+ // complex_high
392
+ template <class A>
393
+ XSIMD_INLINE batch<double, A> complex_high(batch<std::complex<double>, A> const& self, requires_arch<avx2>) noexcept
394
+ {
395
+ __m256d tmp0 = _mm256_permute4x64_pd(self.real(), _MM_SHUFFLE(3, 3, 1, 2));
396
+ __m256d tmp1 = _mm256_permute4x64_pd(self.imag(), _MM_SHUFFLE(3, 2, 2, 0));
397
+ return _mm256_blend_pd(tmp0, tmp1, 10);
398
+ }
399
+
400
+ // fast_cast
401
+ namespace detail
402
+ {
403
+
404
+ template <class A>
405
+ XSIMD_INLINE batch<double, A> fast_cast(batch<uint64_t, A> const& x, batch<double, A> const&, requires_arch<avx2>) noexcept
406
+ {
407
+ // from https://stackoverflow.com/questions/41144668/how-to-efficiently-perform-double-int64-conversions-with-sse-avx
408
+ // adapted to avx
409
+ __m256i xH = _mm256_srli_epi64(x, 32);
410
+ xH = _mm256_or_si256(xH, _mm256_castpd_si256(_mm256_set1_pd(19342813113834066795298816.))); // 2^84
411
+ __m256i mask = _mm256_setr_epi16(0xFFFF, 0xFFFF, 0x0000, 0x0000, 0xFFFF, 0xFFFF, 0x0000, 0x0000,
412
+ 0xFFFF, 0xFFFF, 0x0000, 0x0000, 0xFFFF, 0xFFFF, 0x0000, 0x0000);
413
+ __m256i xL = _mm256_or_si256(_mm256_and_si256(mask, x), _mm256_andnot_si256(mask, _mm256_castpd_si256(_mm256_set1_pd(0x0010000000000000)))); // 2^52
414
+ __m256d f = _mm256_sub_pd(_mm256_castsi256_pd(xH), _mm256_set1_pd(19342813118337666422669312.)); // 2^84 + 2^52
415
+ return _mm256_add_pd(f, _mm256_castsi256_pd(xL));
416
+ }
417
+
418
+ template <class A>
419
+ XSIMD_INLINE batch<double, A> fast_cast(batch<int64_t, A> const& x, batch<double, A> const&, requires_arch<avx2>) noexcept
420
+ {
421
+ // from https://stackoverflow.com/questions/41144668/how-to-efficiently-perform-double-int64-conversions-with-sse-avx
422
+ // adapted to avx
423
+ __m256i xH = _mm256_srai_epi32(x, 16);
424
+ xH = _mm256_and_si256(xH, _mm256_setr_epi16(0x0000, 0x0000, 0xFFFF, 0xFFFF, 0x0000, 0x0000, 0xFFFF, 0xFFFF, 0x0000, 0x0000, 0xFFFF, 0xFFFF, 0x0000, 0x0000, 0xFFFF, 0xFFFF));
425
+ xH = _mm256_add_epi64(xH, _mm256_castpd_si256(_mm256_set1_pd(442721857769029238784.))); // 3*2^67
426
+ __m256i mask = _mm256_setr_epi16(0xFFFF, 0xFFFF, 0xFFFF, 0x0000, 0xFFFF, 0xFFFF, 0xFFFF, 0x0000,
427
+ 0xFFFF, 0xFFFF, 0xFFFF, 0x0000, 0xFFFF, 0xFFFF, 0xFFFF, 0x0000);
428
+ __m256i xL = _mm256_or_si256(_mm256_and_si256(mask, x), _mm256_andnot_si256(mask, _mm256_castpd_si256(_mm256_set1_pd(0x0010000000000000)))); // 2^52
429
+ __m256d f = _mm256_sub_pd(_mm256_castsi256_pd(xH), _mm256_set1_pd(442726361368656609280.)); // 3*2^67 + 2^52
430
+ return _mm256_add_pd(f, _mm256_castsi256_pd(xL));
431
+ }
432
+ }
433
+
434
+ // eq
435
+ template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
436
+ XSIMD_INLINE batch_bool<T, A> eq(batch<T, A> const& self, batch<T, A> const& other, requires_arch<avx2>) noexcept
437
+ {
438
+ XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
439
+ {
440
+ return _mm256_cmpeq_epi8(self, other);
441
+ }
442
+ else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
443
+ {
444
+ return _mm256_cmpeq_epi16(self, other);
445
+ }
446
+ else XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
447
+ {
448
+ return _mm256_cmpeq_epi32(self, other);
449
+ }
450
+ else XSIMD_IF_CONSTEXPR(sizeof(T) == 8)
451
+ {
452
+ return _mm256_cmpeq_epi64(self, other);
453
+ }
454
+ else
455
+ {
456
+ return eq(self, other, avx {});
457
+ }
458
+ }
459
+
460
+ // gather
461
+ template <class T, class A, class U, detail::enable_sized_integral_t<T, 4> = 0, detail::enable_sized_integral_t<U, 4> = 0>
462
+ XSIMD_INLINE batch<T, A> gather(batch<T, A> const&, T const* src, batch<U, A> const& index,
463
+ kernel::requires_arch<avx2>) noexcept
464
+ {
465
+ // scatter for this one is AVX512F+AVX512VL
466
+ return _mm256_i32gather_epi32(reinterpret_cast<const int*>(src), index, sizeof(T));
467
+ }
468
+
469
+ template <class T, class A, class U, detail::enable_sized_integral_t<T, 8> = 0, detail::enable_sized_integral_t<U, 8> = 0>
470
+ XSIMD_INLINE batch<T, A> gather(batch<T, A> const&, T const* src, batch<U, A> const& index,
471
+ kernel::requires_arch<avx2>) noexcept
472
+ {
473
+ // scatter for this one is AVX512F+AVX512VL
474
+ return _mm256_i64gather_epi64(reinterpret_cast<const long long int*>(src), index, sizeof(T));
475
+ }
476
+
477
+ template <class A, class U,
478
+ detail::enable_sized_integral_t<U, 4> = 0>
479
+ XSIMD_INLINE batch<float, A> gather(batch<float, A> const&, float const* src,
480
+ batch<U, A> const& index,
481
+ kernel::requires_arch<avx2>) noexcept
482
+ {
483
+ // scatter for this one is AVX512F+AVX512VL
484
+ return _mm256_i32gather_ps(src, index, sizeof(float));
485
+ }
486
+
487
+ template <class A, class U, detail::enable_sized_integral_t<U, 8> = 0>
488
+ XSIMD_INLINE batch<double, A> gather(batch<double, A> const&, double const* src,
489
+ batch<U, A> const& index,
490
+ requires_arch<avx2>) noexcept
491
+ {
492
+ // scatter for this one is AVX512F+AVX512VL
493
+ return _mm256_i64gather_pd(src, index, sizeof(double));
494
+ }
495
+
496
+ // gather: handmade conversions
497
+ template <class A, class V, detail::enable_sized_integral_t<V, 4> = 0>
498
+ XSIMD_INLINE batch<float, A> gather(batch<float, A> const&, double const* src,
499
+ batch<V, A> const& index,
500
+ requires_arch<avx2>) noexcept
501
+ {
502
+ const batch<double, A> low(_mm256_i32gather_pd(src, _mm256_castsi256_si128(index.data), sizeof(double)));
503
+ const batch<double, A> high(_mm256_i32gather_pd(src, _mm256_extractf128_si256(index.data, 1), sizeof(double)));
504
+ return detail::merge_sse(_mm256_cvtpd_ps(low.data), _mm256_cvtpd_ps(high.data));
505
+ }
506
+
507
+ template <class A, class V, detail::enable_sized_integral_t<V, 4> = 0>
508
+ XSIMD_INLINE batch<int32_t, A> gather(batch<int32_t, A> const&, double const* src,
509
+ batch<V, A> const& index,
510
+ requires_arch<avx2>) noexcept
511
+ {
512
+ const batch<double, A> low(_mm256_i32gather_pd(src, _mm256_castsi256_si128(index.data), sizeof(double)));
513
+ const batch<double, A> high(_mm256_i32gather_pd(src, _mm256_extractf128_si256(index.data, 1), sizeof(double)));
514
+ return detail::merge_sse(_mm256_cvtpd_epi32(low.data), _mm256_cvtpd_epi32(high.data));
515
+ }
516
+
517
+ // lt
518
+ template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
519
+ XSIMD_INLINE batch_bool<T, A> lt(batch<T, A> const& self, batch<T, A> const& other, requires_arch<avx2>) noexcept
520
+ {
521
+ if (std::is_signed<T>::value)
522
+ {
523
+ XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
524
+ {
525
+ return _mm256_cmpgt_epi8(other, self);
526
+ }
527
+ else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
528
+ {
529
+ return _mm256_cmpgt_epi16(other, self);
530
+ }
531
+ else XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
532
+ {
533
+ return _mm256_cmpgt_epi32(other, self);
534
+ }
535
+ else XSIMD_IF_CONSTEXPR(sizeof(T) == 8)
536
+ {
537
+ return _mm256_cmpgt_epi64(other, self);
538
+ }
539
+ else
540
+ {
541
+ return lt(self, other, avx {});
542
+ }
543
+ }
544
+ else
545
+ {
546
+ return lt(self, other, avx {});
547
+ }
548
+ }
549
+
550
+ // load_complex
551
+ template <class A>
552
+ XSIMD_INLINE batch<std::complex<float>, A> load_complex(batch<float, A> const& hi, batch<float, A> const& lo, requires_arch<avx2>) noexcept
553
+ {
554
+ using batch_type = batch<float, A>;
555
+ batch_type real = _mm256_castpd_ps(
556
+ _mm256_permute4x64_pd(
557
+ _mm256_castps_pd(_mm256_shuffle_ps(hi, lo, _MM_SHUFFLE(2, 0, 2, 0))),
558
+ _MM_SHUFFLE(3, 1, 2, 0)));
559
+ batch_type imag = _mm256_castpd_ps(
560
+ _mm256_permute4x64_pd(
561
+ _mm256_castps_pd(_mm256_shuffle_ps(hi, lo, _MM_SHUFFLE(3, 1, 3, 1))),
562
+ _MM_SHUFFLE(3, 1, 2, 0)));
563
+ return { real, imag };
564
+ }
565
+ template <class A>
566
+ XSIMD_INLINE batch<std::complex<double>, A> load_complex(batch<double, A> const& hi, batch<double, A> const& lo, requires_arch<avx2>) noexcept
567
+ {
568
+ using batch_type = batch<double, A>;
569
+ batch_type real = _mm256_permute4x64_pd(_mm256_unpacklo_pd(hi, lo), _MM_SHUFFLE(3, 1, 2, 0));
570
+ batch_type imag = _mm256_permute4x64_pd(_mm256_unpackhi_pd(hi, lo), _MM_SHUFFLE(3, 1, 2, 0));
571
+ return { real, imag };
572
+ }
573
+ // mask
574
+ template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
575
+ XSIMD_INLINE uint64_t mask(batch_bool<T, A> const& self, requires_arch<avx2>) noexcept
576
+ {
577
+ XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
578
+ {
579
+ return 0xFFFFFFFF & (uint64_t)_mm256_movemask_epi8(self);
580
+ }
581
+ else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
582
+ {
583
+ uint64_t mask8 = 0xFFFFFFFF & (uint64_t)_mm256_movemask_epi8(self);
584
+ return detail::mask_lut(mask8) | (detail::mask_lut(mask8 >> 8) << 4) | (detail::mask_lut(mask8 >> 16) << 8) | (detail::mask_lut(mask8 >> 24) << 12);
585
+ }
586
+ else
587
+ {
588
+ return mask(self, avx {});
589
+ }
590
+ }
591
+
592
+ // max
593
+ template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
594
+ XSIMD_INLINE batch<T, A> max(batch<T, A> const& self, batch<T, A> const& other, requires_arch<avx2>) noexcept
595
+ {
596
+ if (std::is_signed<T>::value)
597
+ {
598
+ XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
599
+ {
600
+ return _mm256_max_epi8(self, other);
601
+ }
602
+ else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
603
+ {
604
+ return _mm256_max_epi16(self, other);
605
+ }
606
+ else XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
607
+ {
608
+ return _mm256_max_epi32(self, other);
609
+ }
610
+ else
611
+ {
612
+ return max(self, other, avx {});
613
+ }
614
+ }
615
+ else
616
+ {
617
+ XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
618
+ {
619
+ return _mm256_max_epu8(self, other);
620
+ }
621
+ else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
622
+ {
623
+ return _mm256_max_epu16(self, other);
624
+ }
625
+ else XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
626
+ {
627
+ return _mm256_max_epu32(self, other);
628
+ }
629
+ else
630
+ {
631
+ return max(self, other, avx {});
632
+ }
633
+ }
634
+ }
635
+
636
+ // min
637
+ template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
638
+ XSIMD_INLINE batch<T, A> min(batch<T, A> const& self, batch<T, A> const& other, requires_arch<avx2>) noexcept
639
+ {
640
+ if (std::is_signed<T>::value)
641
+ {
642
+ XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
643
+ {
644
+ return _mm256_min_epi8(self, other);
645
+ }
646
+ else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
647
+ {
648
+ return _mm256_min_epi16(self, other);
649
+ }
650
+ else XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
651
+ {
652
+ return _mm256_min_epi32(self, other);
653
+ }
654
+ else
655
+ {
656
+ return min(self, other, avx {});
657
+ }
658
+ }
659
+ else
660
+ {
661
+ XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
662
+ {
663
+ return _mm256_min_epu8(self, other);
664
+ }
665
+ else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
666
+ {
667
+ return _mm256_min_epu16(self, other);
668
+ }
669
+ else XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
670
+ {
671
+ return _mm256_min_epu32(self, other);
672
+ }
673
+ else
674
+ {
675
+ return min(self, other, avx {});
676
+ }
677
+ }
678
+ }
679
+
680
+ // mul
681
+ template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
682
+ XSIMD_INLINE batch<T, A> mul(batch<T, A> const& self, batch<T, A> const& other, requires_arch<avx2>) noexcept
683
+ {
684
+ XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
685
+ {
686
+ __m256i mask_hi = _mm256_set1_epi32(0xFF00FF00);
687
+ __m256i res_lo = _mm256_mullo_epi16(self, other);
688
+ __m256i other_hi = _mm256_srli_epi16(other, 8);
689
+ __m256i self_hi = _mm256_and_si256(self, mask_hi);
690
+ __m256i res_hi = _mm256_mullo_epi16(self_hi, other_hi);
691
+ __m256i res = _mm256_blendv_epi8(res_lo, res_hi, mask_hi);
692
+ return res;
693
+ }
694
+ else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
695
+ {
696
+ return _mm256_mullo_epi16(self, other);
697
+ }
698
+ else XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
699
+ {
700
+ return _mm256_mullo_epi32(self, other);
701
+ }
702
+ else
703
+ {
704
+ return mul(self, other, avx {});
705
+ }
706
+ }
707
+
708
+ // reduce_add
709
+ template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
710
+ XSIMD_INLINE T reduce_add(batch<T, A> const& self, requires_arch<avx2>) noexcept
711
+ {
712
+ XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
713
+ {
714
+ __m256i tmp1 = _mm256_hadd_epi32(self, self);
715
+ __m256i tmp2 = _mm256_hadd_epi32(tmp1, tmp1);
716
+ __m128i tmp3 = _mm256_extracti128_si256(tmp2, 1);
717
+ __m128i tmp4 = _mm_add_epi32(_mm256_castsi256_si128(tmp2), tmp3);
718
+ return _mm_cvtsi128_si32(tmp4);
719
+ }
720
+ else XSIMD_IF_CONSTEXPR(sizeof(T) == 8)
721
+ {
722
+ __m256i tmp1 = _mm256_shuffle_epi32(self, 0x0E);
723
+ __m256i tmp2 = _mm256_add_epi64(self, tmp1);
724
+ __m128i tmp3 = _mm256_extracti128_si256(tmp2, 1);
725
+ __m128i res = _mm_add_epi64(_mm256_castsi256_si128(tmp2), tmp3);
726
+ #if defined(__x86_64__)
727
+ return _mm_cvtsi128_si64(res);
728
+ #else
729
+ __m128i m;
730
+ _mm_storel_epi64(&m, res);
731
+ int64_t i;
732
+ std::memcpy(&i, &m, sizeof(i));
733
+ return i;
734
+ #endif
735
+ }
736
+ else
737
+ {
738
+ return reduce_add(self, avx {});
739
+ }
740
+ }
741
+
742
+ // rotate_left
743
+ template <size_t N, class A>
744
+ XSIMD_INLINE batch<uint8_t, A> rotate_left(batch<uint8_t, A> const& self, requires_arch<avx2>) noexcept
745
+ {
746
+ auto other = _mm256_permute2x128_si256(self, self, 0x1);
747
+ if (N < 16)
748
+ {
749
+ return _mm256_alignr_epi8(other, self, N);
750
+ }
751
+ else
752
+ {
753
+ return _mm256_alignr_epi8(self, other, N - 16);
754
+ }
755
+ }
756
+ template <size_t N, class A>
757
+ XSIMD_INLINE batch<int8_t, A> rotate_left(batch<int8_t, A> const& self, requires_arch<avx2>) noexcept
758
+ {
759
+ return bitwise_cast<int8_t>(rotate_left<N, A>(bitwise_cast<uint8_t>(self), avx2 {}));
760
+ }
761
+ template <size_t N, class A>
762
+ XSIMD_INLINE batch<uint16_t, A> rotate_left(batch<uint16_t, A> const& self, requires_arch<avx2>) noexcept
763
+ {
764
+ auto other = _mm256_permute2x128_si256(self, self, 0x1);
765
+ if (N < 8)
766
+ {
767
+ return _mm256_alignr_epi8(other, self, 2 * N);
768
+ }
769
+ else
770
+ {
771
+ return _mm256_alignr_epi8(self, other, 2 * (N - 8));
772
+ }
773
+ }
774
+ template <size_t N, class A>
775
+ XSIMD_INLINE batch<int16_t, A> rotate_left(batch<int16_t, A> const& self, requires_arch<avx2>) noexcept
776
+ {
777
+ return bitwise_cast<int16_t>(rotate_left<N, A>(bitwise_cast<uint16_t>(self), avx2 {}));
778
+ }
779
+
780
+ // sadd
781
+ template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
782
+ XSIMD_INLINE batch<T, A> sadd(batch<T, A> const& self, batch<T, A> const& other, requires_arch<avx2>) noexcept
783
+ {
784
+ if (std::is_signed<T>::value)
785
+ {
786
+ XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
787
+ {
788
+ return _mm256_adds_epi8(self, other);
789
+ }
790
+ else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
791
+ {
792
+ return _mm256_adds_epi16(self, other);
793
+ }
794
+ else
795
+ {
796
+ return sadd(self, other, avx {});
797
+ }
798
+ }
799
+ else
800
+ {
801
+ XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
802
+ {
803
+ return _mm256_adds_epu8(self, other);
804
+ }
805
+ else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
806
+ {
807
+ return _mm256_adds_epu16(self, other);
808
+ }
809
+ else
810
+ {
811
+ return sadd(self, other, avx {});
812
+ }
813
+ }
814
+ }
815
+
816
+ // select
817
+ template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
818
+ XSIMD_INLINE batch<T, A> select(batch_bool<T, A> const& cond, batch<T, A> const& true_br, batch<T, A> const& false_br, requires_arch<avx2>) noexcept
819
+ {
820
+ XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
821
+ {
822
+ return _mm256_blendv_epi8(false_br, true_br, cond);
823
+ }
824
+ else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
825
+ {
826
+ return _mm256_blendv_epi8(false_br, true_br, cond);
827
+ }
828
+ else XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
829
+ {
830
+ return _mm256_blendv_epi8(false_br, true_br, cond);
831
+ }
832
+ else XSIMD_IF_CONSTEXPR(sizeof(T) == 8)
833
+ {
834
+ return _mm256_blendv_epi8(false_br, true_br, cond);
835
+ }
836
+ else
837
+ {
838
+ return select(cond, true_br, false_br, avx {});
839
+ }
840
+ }
841
+ template <class A, class T, bool... Values, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
842
+ XSIMD_INLINE batch<T, A> select(batch_bool_constant<T, A, Values...> const&, batch<T, A> const& true_br, batch<T, A> const& false_br, requires_arch<avx2>) noexcept
843
+ {
844
+ // FIXME: for some reason mask here is not considered as an immediate,
845
+ // but it's okay for _mm256_blend_epi32
846
+ // case 2: return _mm256_blend_epi16(false_br, true_br, mask);
847
+ XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
848
+ {
849
+ constexpr int mask = batch_bool_constant<T, A, Values...>::mask();
850
+ return _mm256_blend_epi32(false_br, true_br, mask);
851
+ }
852
+ else XSIMD_IF_CONSTEXPR(sizeof(T) == 8)
853
+ {
854
+ constexpr int mask = batch_bool_constant<T, A, Values...>::mask();
855
+ constexpr int imask = detail::interleave(mask);
856
+ return _mm256_blend_epi32(false_br, true_br, imask);
857
+ }
858
+ else
859
+ {
860
+ return select(batch_bool<T, A> { Values... }, true_br, false_br, avx2 {});
861
+ }
862
+ }
863
+
864
+ // slide_left
865
+ template <size_t N, class A, class T>
866
+ XSIMD_INLINE batch<T, A> slide_left(batch<T, A> const& x, requires_arch<avx2>) noexcept
867
+ {
868
+ constexpr unsigned BitCount = N * 8;
869
+ if (BitCount == 0)
870
+ {
871
+ return x;
872
+ }
873
+ if (BitCount >= 256)
874
+ {
875
+ return batch<T, A>(T(0));
876
+ }
877
+ if (BitCount > 128)
878
+ {
879
+ constexpr unsigned M = (BitCount - 128) / 8;
880
+ auto y = _mm256_bslli_epi128(x, M);
881
+ return _mm256_permute2x128_si256(y, y, 0x28);
882
+ }
883
+ if (BitCount == 128)
884
+ {
885
+ return _mm256_permute2x128_si256(x, x, 0x28);
886
+ }
887
+ // shifting by [0, 128[ bits
888
+ constexpr unsigned M = BitCount / 8;
889
+ auto y = _mm256_bslli_epi128(x, M);
890
+ auto z = _mm256_bsrli_epi128(x, 16 - M);
891
+ auto w = _mm256_permute2x128_si256(z, z, 0x28);
892
+ return _mm256_or_si256(y, w);
893
+ }
894
+
895
+ // slide_right
896
+ template <size_t N, class A, class T>
897
+ XSIMD_INLINE batch<T, A> slide_right(batch<T, A> const& x, requires_arch<avx2>) noexcept
898
+ {
899
+ constexpr unsigned BitCount = N * 8;
900
+ if (BitCount == 0)
901
+ {
902
+ return x;
903
+ }
904
+ if (BitCount >= 256)
905
+ {
906
+ return batch<T, A>(T(0));
907
+ }
908
+ if (BitCount > 128)
909
+ {
910
+ constexpr unsigned M = (BitCount - 128) / 8;
911
+ auto y = _mm256_bsrli_epi128(x, M);
912
+ return _mm256_permute2x128_si256(y, y, 0x81);
913
+ }
914
+ if (BitCount == 128)
915
+ {
916
+ return _mm256_permute2x128_si256(x, x, 0x81);
917
+ }
918
+ // shifting by [0, 128[ bits
919
+ constexpr unsigned M = BitCount / 8;
920
+ auto y = _mm256_bsrli_epi128(x, M);
921
+ auto z = _mm256_bslli_epi128(x, 16 - M);
922
+ auto w = _mm256_permute2x128_si256(z, z, 0x81);
923
+ return _mm256_or_si256(y, w);
924
+ }
925
+
926
+ // ssub
927
+ template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
928
+ XSIMD_INLINE batch<T, A> ssub(batch<T, A> const& self, batch<T, A> const& other, requires_arch<avx2>) noexcept
929
+ {
930
+ if (std::is_signed<T>::value)
931
+ {
932
+ XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
933
+ {
934
+ return _mm256_subs_epi8(self, other);
935
+ }
936
+ else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
937
+ {
938
+ return _mm256_subs_epi16(self, other);
939
+ }
940
+ else
941
+ {
942
+ return ssub(self, other, avx {});
943
+ }
944
+ }
945
+ else
946
+ {
947
+ XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
948
+ {
949
+ return _mm256_subs_epu8(self, other);
950
+ }
951
+ else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
952
+ {
953
+ return _mm256_subs_epu16(self, other);
954
+ }
955
+ else
956
+ {
957
+ return ssub(self, other, avx {});
958
+ }
959
+ }
960
+ }
961
+
962
+ // sub
963
+ template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
964
+ XSIMD_INLINE batch<T, A> sub(batch<T, A> const& self, batch<T, A> const& other, requires_arch<avx2>) noexcept
965
+ {
966
+ XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
967
+ {
968
+ return _mm256_sub_epi8(self, other);
969
+ }
970
+ else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
971
+ {
972
+ return _mm256_sub_epi16(self, other);
973
+ }
974
+ else XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
975
+ {
976
+ return _mm256_sub_epi32(self, other);
977
+ }
978
+ else XSIMD_IF_CONSTEXPR(sizeof(T) == 8)
979
+ {
980
+ return _mm256_sub_epi64(self, other);
981
+ }
982
+ else
983
+ {
984
+ return sub(self, other, avx {});
985
+ }
986
+ }
987
+
988
+ // swizzle (dynamic mask)
989
+ template <class A>
990
+ XSIMD_INLINE batch<float, A> swizzle(batch<float, A> const& self, batch<uint32_t, A> mask, requires_arch<avx2>) noexcept
991
+ {
992
+ return swizzle(self, mask, avx {});
993
+ }
994
+ template <class A>
995
+ XSIMD_INLINE batch<double, A> swizzle(batch<double, A> const& self, batch<uint64_t, A> mask, requires_arch<avx2>) noexcept
996
+ {
997
+ batch<uint32_t, A> broadcaster = { 0, 1, 0, 1, 0, 1, 0, 1 };
998
+ constexpr uint64_t comb = 0x0000000100000001ul * 2;
999
+ return bitwise_cast<double>(swizzle(bitwise_cast<float>(self), bitwise_cast<uint32_t>(mask * comb) + broadcaster, avx2 {}));
1000
+ }
1001
+
1002
+ template <class A>
1003
+ XSIMD_INLINE batch<uint64_t, A> swizzle(batch<uint64_t, A> const& self, batch<uint64_t, A> mask, requires_arch<avx2>) noexcept
1004
+ {
1005
+ return bitwise_cast<uint64_t>(swizzle(bitwise_cast<double>(self), mask, avx2 {}));
1006
+ }
1007
+ template <class A>
1008
+ XSIMD_INLINE batch<int64_t, A> swizzle(batch<int64_t, A> const& self, batch<uint64_t, A> mask, requires_arch<avx2>) noexcept
1009
+ {
1010
+ return bitwise_cast<int64_t>(swizzle(bitwise_cast<double>(self), mask, avx2 {}));
1011
+ }
1012
+ template <class A>
1013
+ XSIMD_INLINE batch<uint32_t, A> swizzle(batch<uint32_t, A> const& self, batch<uint32_t, A> mask, requires_arch<avx2>) noexcept
1014
+ {
1015
+ return swizzle(self, mask, avx {});
1016
+ }
1017
+ template <class A>
1018
+ XSIMD_INLINE batch<int32_t, A> swizzle(batch<int32_t, A> const& self, batch<uint32_t, A> mask, requires_arch<avx2>) noexcept
1019
+ {
1020
+ return bitwise_cast<int32_t>(swizzle(bitwise_cast<uint32_t>(self), mask, avx2 {}));
1021
+ }
1022
+
1023
+ // swizzle (constant mask)
1024
+ template <class A, uint32_t V0, uint32_t V1, uint32_t V2, uint32_t V3, uint32_t V4, uint32_t V5, uint32_t V6, uint32_t V7>
1025
+ XSIMD_INLINE batch<float, A> swizzle(batch<float, A> const& self, batch_constant<uint32_t, A, V0, V1, V2, V3, V4, V5, V6, V7> mask, requires_arch<avx2>) noexcept
1026
+ {
1027
+ XSIMD_IF_CONSTEXPR(detail::is_all_different(mask) && !detail::is_identity(mask))
1028
+ {
1029
+ // The intrinsic does NOT allow to copy the same element of the source vector to more than one element of the destination vector.
1030
+ // one-shot 8-lane permute
1031
+ return _mm256_permutevar8x32_ps(self, mask.as_batch());
1032
+ }
1033
+ return swizzle(self, mask, avx {});
1034
+ }
1035
+
1036
+ template <class A, uint64_t V0, uint64_t V1, uint64_t V2, uint64_t V3>
1037
+ XSIMD_INLINE batch<double, A> swizzle(batch<double, A> const& self, batch_constant<uint64_t, A, V0, V1, V2, V3> mask, requires_arch<avx2>) noexcept
1038
+ {
1039
+ XSIMD_IF_CONSTEXPR(detail::is_identity(mask)) { return self; }
1040
+ XSIMD_IF_CONSTEXPR(!detail::is_cross_lane(mask))
1041
+ {
1042
+ constexpr auto imm = ((V0 & 1) << 0) | ((V1 & 1) << 1) | ((V2 & 1) << 2) | ((V3 & 1) << 3);
1043
+ return _mm256_permute_pd(self, imm);
1044
+ }
1045
+ constexpr auto imm = detail::mod_shuffle(V0, V1, V2, V3);
1046
+ // fallback to full 4-element permute
1047
+ return _mm256_permute4x64_pd(self, imm);
1048
+ }
1049
+
1050
+ template <class A, uint64_t V0, uint64_t V1, uint64_t V2, uint64_t V3>
1051
+ XSIMD_INLINE batch<uint64_t, A> swizzle(batch<uint64_t, A> const& self, batch_constant<uint64_t, A, V0, V1, V2, V3>, requires_arch<avx2>) noexcept
1052
+ {
1053
+ constexpr auto mask = detail::mod_shuffle(V0, V1, V2, V3);
1054
+ return _mm256_permute4x64_epi64(self, mask);
1055
+ }
1056
+ template <class A, uint64_t V0, uint64_t V1, uint64_t V2, uint64_t V3>
1057
+ XSIMD_INLINE batch<int64_t, A> swizzle(batch<int64_t, A> const& self, batch_constant<uint64_t, A, V0, V1, V2, V3> mask, requires_arch<avx2>) noexcept
1058
+ {
1059
+ return bitwise_cast<int64_t>(swizzle(bitwise_cast<uint64_t>(self), mask, avx2 {}));
1060
+ }
1061
+ template <class A, uint32_t V0, uint32_t V1, uint32_t V2, uint32_t V3, uint32_t V4, uint32_t V5, uint32_t V6, uint32_t V7>
1062
+ XSIMD_INLINE batch<uint32_t, A> swizzle(batch<uint32_t, A> const& self, batch_constant<uint32_t, A, V0, V1, V2, V3, V4, V5, V6, V7> mask, requires_arch<avx2>) noexcept
1063
+ {
1064
+ return _mm256_permutevar8x32_epi32(self, mask.as_batch());
1065
+ }
1066
+ template <class A, uint32_t V0, uint32_t V1, uint32_t V2, uint32_t V3, uint32_t V4, uint32_t V5, uint32_t V6, uint32_t V7>
1067
+ XSIMD_INLINE batch<int32_t, A> swizzle(batch<int32_t, A> const& self, batch_constant<uint32_t, A, V0, V1, V2, V3, V4, V5, V6, V7> mask, requires_arch<avx2>) noexcept
1068
+ {
1069
+ return bitwise_cast<int32_t>(swizzle(bitwise_cast<uint32_t>(self), mask, avx2 {}));
1070
+ }
1071
+
1072
+ // zip_hi
1073
+ template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
1074
+ XSIMD_INLINE batch<T, A> zip_hi(batch<T, A> const& self, batch<T, A> const& other, requires_arch<avx2>) noexcept
1075
+ {
1076
+ XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
1077
+ {
1078
+ auto lo = _mm256_unpacklo_epi8(self, other);
1079
+ auto hi = _mm256_unpackhi_epi8(self, other);
1080
+ return _mm256_permute2f128_si256(lo, hi, 0x31);
1081
+ }
1082
+ else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
1083
+ {
1084
+ auto lo = _mm256_unpacklo_epi16(self, other);
1085
+ auto hi = _mm256_unpackhi_epi16(self, other);
1086
+ return _mm256_permute2f128_si256(lo, hi, 0x31);
1087
+ }
1088
+ else XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
1089
+ {
1090
+ auto lo = _mm256_unpacklo_epi32(self, other);
1091
+ auto hi = _mm256_unpackhi_epi32(self, other);
1092
+ return _mm256_permute2f128_si256(lo, hi, 0x31);
1093
+ }
1094
+ else XSIMD_IF_CONSTEXPR(sizeof(T) == 8)
1095
+ {
1096
+ auto lo = _mm256_unpacklo_epi64(self, other);
1097
+ auto hi = _mm256_unpackhi_epi64(self, other);
1098
+ return _mm256_permute2f128_si256(lo, hi, 0x31);
1099
+ }
1100
+ else
1101
+ {
1102
+ assert(false && "unsupported arch/op combination");
1103
+ return {};
1104
+ }
1105
+ }
1106
+
1107
+ // zip_lo
1108
+ template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
1109
+ XSIMD_INLINE batch<T, A> zip_lo(batch<T, A> const& self, batch<T, A> const& other, requires_arch<avx2>) noexcept
1110
+ {
1111
+ XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
1112
+ {
1113
+ auto lo = _mm256_unpacklo_epi8(self, other);
1114
+ auto hi = _mm256_unpackhi_epi8(self, other);
1115
+ return _mm256_inserti128_si256(lo, _mm256_castsi256_si128(hi), 1);
1116
+ }
1117
+ else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
1118
+ {
1119
+ auto lo = _mm256_unpacklo_epi16(self, other);
1120
+ auto hi = _mm256_unpackhi_epi16(self, other);
1121
+ return _mm256_inserti128_si256(lo, _mm256_castsi256_si128(hi), 1);
1122
+ }
1123
+ else XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
1124
+ {
1125
+ auto lo = _mm256_unpacklo_epi32(self, other);
1126
+ auto hi = _mm256_unpackhi_epi32(self, other);
1127
+ return _mm256_inserti128_si256(lo, _mm256_castsi256_si128(hi), 1);
1128
+ }
1129
+ else XSIMD_IF_CONSTEXPR(sizeof(T) == 8)
1130
+ {
1131
+ auto lo = _mm256_unpacklo_epi64(self, other);
1132
+ auto hi = _mm256_unpackhi_epi64(self, other);
1133
+ return _mm256_inserti128_si256(lo, _mm256_castsi256_si128(hi), 1);
1134
+ }
1135
+ else
1136
+ {
1137
+ assert(false && "unsupported arch/op combination");
1138
+ return {};
1139
+ }
1140
+ }
1141
+ }
1142
+ }
1143
+
1144
+ #endif