sequenzo 0.1.17__cp39-cp39-win_amd64.whl → 0.1.18__cp39-cp39-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sequenzo might be problematic. Click here for more details.

Files changed (101) hide show
  1. sequenzo/__init__.py +25 -1
  2. sequenzo/big_data/clara/clara.py +1 -1
  3. sequenzo/big_data/clara/utils/get_weighted_diss.c +156 -156
  4. sequenzo/big_data/clara/utils/get_weighted_diss.cp39-win_amd64.pyd +0 -0
  5. sequenzo/clustering/clustering_c_code.cp39-win_amd64.pyd +0 -0
  6. sequenzo/clustering/hierarchical_clustering.py +202 -8
  7. sequenzo/define_sequence_data.py +34 -2
  8. sequenzo/dissimilarity_measures/c_code.cp39-win_amd64.pyd +0 -0
  9. sequenzo/dissimilarity_measures/get_substitution_cost_matrix.py +1 -1
  10. sequenzo/dissimilarity_measures/src/DHDdistance.cpp +13 -37
  11. sequenzo/dissimilarity_measures/src/LCPdistance.cpp +13 -37
  12. sequenzo/dissimilarity_measures/src/OMdistance.cpp +12 -47
  13. sequenzo/dissimilarity_measures/src/OMspellDistance.cpp +103 -67
  14. sequenzo/dissimilarity_measures/src/dp_utils.h +160 -0
  15. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_arithmetic.hpp +41 -16
  16. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_complex.hpp +4 -0
  17. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_details.hpp +7 -0
  18. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_logical.hpp +10 -0
  19. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_math.hpp +127 -43
  20. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_memory.hpp +30 -2
  21. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_swizzle.hpp +174 -0
  22. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_trigo.hpp +14 -5
  23. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx.hpp +111 -54
  24. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx2.hpp +131 -9
  25. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512bw.hpp +11 -113
  26. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512dq.hpp +39 -7
  27. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512f.hpp +336 -30
  28. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vbmi.hpp +9 -37
  29. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vbmi2.hpp +58 -0
  30. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_common.hpp +1 -0
  31. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_common_fwd.hpp +35 -2
  32. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_constants.hpp +3 -1
  33. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_emulated.hpp +17 -0
  34. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma3_avx.hpp +13 -0
  35. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma3_sse.hpp +18 -0
  36. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma4.hpp +13 -0
  37. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_isa.hpp +8 -0
  38. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_neon.hpp +363 -34
  39. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_neon64.hpp +7 -0
  40. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_rvv.hpp +13 -0
  41. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_scalar.hpp +41 -4
  42. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse2.hpp +252 -16
  43. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse3.hpp +9 -0
  44. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_ssse3.hpp +12 -1
  45. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sve.hpp +7 -0
  46. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_vsx.hpp +892 -0
  47. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_wasm.hpp +78 -1
  48. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_arch.hpp +3 -1
  49. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_config.hpp +13 -2
  50. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_cpuid.hpp +5 -0
  51. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_inline.hpp +5 -1
  52. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_all_registers.hpp +2 -0
  53. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_api.hpp +64 -1
  54. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_batch.hpp +36 -0
  55. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_rvv_register.hpp +40 -31
  56. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_traits.hpp +8 -0
  57. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_vsx_register.hpp +77 -0
  58. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/xsimd.hpp +6 -0
  59. sequenzo/dissimilarity_measures/src/xsimd/test/test_basic_math.cpp +6 -0
  60. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch.cpp +54 -2
  61. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_bool.cpp +8 -0
  62. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_cast.cpp +11 -4
  63. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_complex.cpp +18 -0
  64. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_int.cpp +8 -14
  65. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_manip.cpp +216 -173
  66. sequenzo/dissimilarity_measures/src/xsimd/test/test_load_store.cpp +6 -0
  67. sequenzo/dissimilarity_measures/src/xsimd/test/test_memory.cpp +1 -1
  68. sequenzo/dissimilarity_measures/src/xsimd/test/test_power.cpp +7 -4
  69. sequenzo/dissimilarity_measures/src/xsimd/test/test_select.cpp +6 -2
  70. sequenzo/dissimilarity_measures/src/xsimd/test/test_shuffle.cpp +32 -18
  71. sequenzo/dissimilarity_measures/src/xsimd/test/test_utils.hpp +21 -24
  72. sequenzo/dissimilarity_measures/src/xsimd/test/test_xsimd_api.cpp +69 -9
  73. sequenzo/dissimilarity_measures/utils/get_sm_trate_substitution_cost_matrix.c +156 -156
  74. sequenzo/dissimilarity_measures/utils/get_sm_trate_substitution_cost_matrix.cp39-win_amd64.pyd +0 -0
  75. sequenzo/dissimilarity_measures/utils/seqconc.c +156 -156
  76. sequenzo/dissimilarity_measures/utils/seqconc.cp39-win_amd64.pyd +0 -0
  77. sequenzo/dissimilarity_measures/utils/seqdss.c +156 -156
  78. sequenzo/dissimilarity_measures/utils/seqdss.cp39-win_amd64.pyd +0 -0
  79. sequenzo/dissimilarity_measures/utils/seqdur.c +156 -156
  80. sequenzo/dissimilarity_measures/utils/seqdur.cp39-win_amd64.pyd +0 -0
  81. sequenzo/dissimilarity_measures/utils/seqlength.c +156 -156
  82. sequenzo/dissimilarity_measures/utils/seqlength.cp39-win_amd64.pyd +0 -0
  83. sequenzo/sequence_characteristics/__init__.py +4 -0
  84. sequenzo/sequence_characteristics/complexity_index.py +17 -57
  85. sequenzo/sequence_characteristics/overall_cross_sectional_entropy.py +177 -111
  86. sequenzo/sequence_characteristics/plot_characteristics.py +30 -11
  87. sequenzo/sequence_characteristics/simple_characteristics.py +1 -0
  88. sequenzo/sequence_characteristics/state_frequencies_and_entropy_per_sequence.py +9 -3
  89. sequenzo/sequence_characteristics/turbulence.py +47 -67
  90. sequenzo/sequence_characteristics/variance_of_spell_durations.py +19 -9
  91. sequenzo/sequence_characteristics/within_sequence_entropy.py +5 -58
  92. sequenzo/visualization/plot_sequence_index.py +58 -35
  93. sequenzo/visualization/plot_state_distribution.py +57 -36
  94. sequenzo/with_event_history_analysis/__init__.py +35 -0
  95. sequenzo/with_event_history_analysis/sequence_analysis_multi_state_model.py +850 -0
  96. sequenzo/with_event_history_analysis/sequence_history_analysis.py +283 -0
  97. {sequenzo-0.1.17.dist-info → sequenzo-0.1.18.dist-info}/METADATA +7 -6
  98. {sequenzo-0.1.17.dist-info → sequenzo-0.1.18.dist-info}/RECORD +101 -94
  99. {sequenzo-0.1.17.dist-info → sequenzo-0.1.18.dist-info}/WHEEL +0 -0
  100. {sequenzo-0.1.17.dist-info → sequenzo-0.1.18.dist-info}/licenses/LICENSE +0 -0
  101. {sequenzo-0.1.17.dist-info → sequenzo-0.1.18.dist-info}/top_level.txt +0 -0
@@ -13,6 +13,7 @@
13
13
  #define XSIMD_NEON_HPP
14
14
 
15
15
  #include <algorithm>
16
+ #include <array>
16
17
  #include <complex>
17
18
  #include <tuple>
18
19
  #include <type_traits>
@@ -717,16 +718,10 @@ namespace xsimd
717
718
  return vnegq_s32(rhs);
718
719
  }
719
720
 
720
- template <class A, class T, detail::enable_sized_unsigned_t<T, 8> = 0>
721
- XSIMD_INLINE batch<T, A> neg(batch<T, A> const& rhs, requires_arch<neon>) noexcept
722
- {
723
- return batch<T, A> { -rhs.get(0), -rhs.get(1) };
724
- }
725
-
726
- template <class A, class T, detail::enable_sized_signed_t<T, 8> = 0>
721
+ template <class A, class T, detail::enable_sized_integral_t<T, 8> = 0>
727
722
  XSIMD_INLINE batch<T, A> neg(batch<T, A> const& rhs, requires_arch<neon>) noexcept
728
723
  {
729
- return batch<T, A> { -rhs.get(0), -rhs.get(1) };
724
+ return 0 - rhs;
730
725
  }
731
726
 
732
727
  template <class A>
@@ -923,16 +918,28 @@ namespace xsimd
923
918
  return dispatcher.apply(register_type(lhs), register_type(rhs));
924
919
  }
925
920
 
926
- template <class A, class T, detail::enable_sized_integral_t<T, 8> = 0>
921
+ template <class A, class T, detail::enable_sized_unsigned_t<T, 8> = 0>
927
922
  XSIMD_INLINE batch_bool<T, A> eq(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<neon>) noexcept
928
923
  {
929
- return batch_bool<T, A>({ lhs.get(0) == rhs.get(0), lhs.get(1) == rhs.get(1) });
924
+ auto eq32 = vceqq_u32(vreinterpretq_u32_u64(lhs.data), vreinterpretq_u32_u64(rhs.data));
925
+ auto rev32 = vrev64q_u32(eq32);
926
+ auto eq64 = vandq_u32(eq32, rev32);
927
+ return batch_bool<T, A>(vreinterpretq_u64_u32(eq64));
928
+ }
929
+
930
+ template <class A, class T, detail::enable_sized_signed_t<T, 8> = 0>
931
+ XSIMD_INLINE batch_bool<T, A> eq(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<neon>) noexcept
932
+ {
933
+ auto eq32 = vceqq_u32(vreinterpretq_u32_s64(lhs.data), vreinterpretq_u32_s64(rhs.data));
934
+ auto rev32 = vrev64q_u32(eq32);
935
+ auto eq64 = vandq_u32(eq32, rev32);
936
+ return batch_bool<T, A>(vreinterpretq_u64_u32(eq64));
930
937
  }
931
938
 
932
939
  template <class A, class T, detail::enable_sized_integral_t<T, 8> = 0>
933
940
  XSIMD_INLINE batch_bool<T, A> eq(batch_bool<T, A> const& lhs, batch_bool<T, A> const& rhs, requires_arch<neon>) noexcept
934
941
  {
935
- return batch_bool<T, A>({ lhs.get(0) == rhs.get(0), lhs.get(1) == rhs.get(1) });
942
+ return eq(batch<T, A> { lhs.data }, batch<T, A> { rhs.data }, A {});
936
943
  }
937
944
 
938
945
  /*************
@@ -985,10 +992,19 @@ namespace xsimd
985
992
  return dispatcher.apply(register_type(lhs), register_type(rhs));
986
993
  }
987
994
 
988
- template <class A, class T, detail::enable_sized_integral_t<T, 8> = 0>
995
+ template <class A, class T, detail::enable_sized_signed_t<T, 8> = 0>
996
+ XSIMD_INLINE batch_bool<T, A> lt(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<neon>) noexcept
997
+ {
998
+ using register_type = typename batch<T, A>::register_type;
999
+ return batch_bool<T, A>(vreinterpretq_u64_s64(vshrq_n_s64(vqsubq_s64(register_type(lhs), register_type(rhs)), 63)));
1000
+ }
1001
+
1002
+ template <class A, class T, detail::enable_sized_unsigned_t<T, 8> = 0>
989
1003
  XSIMD_INLINE batch_bool<T, A> lt(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<neon>) noexcept
990
1004
  {
991
- return batch_bool<T, A>({ lhs.get(0) < rhs.get(0), lhs.get(1) < rhs.get(1) });
1005
+ using register_type = typename batch<T, A>::register_type;
1006
+ register_type acc = { 0x7FFFFFFFFFFFFFFFull, 0x7FFFFFFFFFFFFFFFull };
1007
+ return batch_bool<T, A>(vreinterpretq_u64_s64(vshrq_n_s64(vreinterpretq_s64_u64(vqaddq_u64(vqsubq_u64(register_type(rhs), register_type(lhs)), acc)), 63)));
992
1008
  }
993
1009
 
994
1010
  /******
@@ -1012,12 +1028,24 @@ namespace xsimd
1012
1028
  template <class A, class T, detail::enable_sized_integral_t<T, 8> = 0>
1013
1029
  XSIMD_INLINE batch_bool<T, A> le(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<neon>) noexcept
1014
1030
  {
1015
- return batch_bool<T, A>({ lhs.get(0) <= rhs.get(0), lhs.get(1) <= rhs.get(1) });
1031
+ return !(lhs > rhs);
1016
1032
  }
1017
1033
 
1018
1034
  /******
1019
1035
  * gt *
1020
1036
  ******/
1037
+ namespace detail
1038
+ {
1039
+ XSIMD_INLINE int64x2_t bitwise_not_s64(int64x2_t arg) noexcept
1040
+ {
1041
+ return vreinterpretq_s64_s32(vmvnq_s32(vreinterpretq_s32_s64(arg)));
1042
+ }
1043
+
1044
+ XSIMD_INLINE uint64x2_t bitwise_not_u64(uint64x2_t arg) noexcept
1045
+ {
1046
+ return vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(arg)));
1047
+ }
1048
+ }
1021
1049
 
1022
1050
  WRAP_BINARY_INT_EXCLUDING_64(vcgtq, detail::comp_return_type)
1023
1051
  WRAP_BINARY_FLOAT(vcgtq, detail::comp_return_type)
@@ -1033,10 +1061,19 @@ namespace xsimd
1033
1061
  return dispatcher.apply(register_type(lhs), register_type(rhs));
1034
1062
  }
1035
1063
 
1036
- template <class A, class T, detail::enable_sized_integral_t<T, 8> = 0>
1064
+ template <class A, class T, detail::enable_sized_signed_t<T, 8> = 0>
1037
1065
  XSIMD_INLINE batch_bool<T, A> gt(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<neon>) noexcept
1038
1066
  {
1039
- return batch_bool<T, A>({ lhs.get(0) > rhs.get(0), lhs.get(1) > rhs.get(1) });
1067
+ using register_type = typename batch<T, A>::register_type;
1068
+ return batch_bool<T, A>(vreinterpretq_u64_s64(vshrq_n_s64(vqsubq_s64(register_type(rhs), register_type(lhs)), 63)));
1069
+ }
1070
+
1071
+ template <class A, class T, detail::enable_sized_unsigned_t<T, 8> = 0>
1072
+ XSIMD_INLINE batch_bool<T, A> gt(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<neon>) noexcept
1073
+ {
1074
+ using register_type = typename batch<T, A>::register_type;
1075
+ register_type acc = { 0x7FFFFFFFFFFFFFFFull, 0x7FFFFFFFFFFFFFFFull };
1076
+ return batch_bool<T, A>(vreinterpretq_u64_s64(vshrq_n_s64(vreinterpretq_s64_u64(vqaddq_u64(vqsubq_u64(register_type(lhs), register_type(rhs)), acc)), 63)));
1040
1077
  }
1041
1078
 
1042
1079
  /******
@@ -1060,7 +1097,7 @@ namespace xsimd
1060
1097
  template <class A, class T, detail::enable_sized_integral_t<T, 8> = 0>
1061
1098
  XSIMD_INLINE batch_bool<T, A> ge(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<neon>) noexcept
1062
1099
  {
1063
- return batch_bool<T, A>({ lhs.get(0) >= rhs.get(0), lhs.get(1) >= rhs.get(1) });
1100
+ return !(lhs < rhs);
1064
1101
  }
1065
1102
 
1066
1103
  /*******************
@@ -1212,16 +1249,6 @@ namespace xsimd
1212
1249
 
1213
1250
  namespace detail
1214
1251
  {
1215
- XSIMD_INLINE int64x2_t bitwise_not_s64(int64x2_t arg) noexcept
1216
- {
1217
- return vreinterpretq_s64_s32(vmvnq_s32(vreinterpretq_s32_s64(arg)));
1218
- }
1219
-
1220
- XSIMD_INLINE uint64x2_t bitwise_not_u64(uint64x2_t arg) noexcept
1221
- {
1222
- return vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(arg)));
1223
- }
1224
-
1225
1252
  XSIMD_INLINE float32x4_t bitwise_not_f32(float32x4_t arg) noexcept
1226
1253
  {
1227
1254
  return vreinterpretq_f32_u32(vmvnq_u32(vreinterpretq_u32_f32(arg)));
@@ -1314,7 +1341,7 @@ namespace xsimd
1314
1341
  template <class A, class T, detail::enable_sized_integral_t<T, 8> = 0>
1315
1342
  XSIMD_INLINE batch<T, A> min(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<neon>) noexcept
1316
1343
  {
1317
- return { std::min(lhs.get(0), rhs.get(0)), std::min(lhs.get(1), rhs.get(1)) };
1344
+ return select(lhs > rhs, rhs, lhs);
1318
1345
  }
1319
1346
 
1320
1347
  /*******
@@ -1338,7 +1365,7 @@ namespace xsimd
1338
1365
  template <class A, class T, detail::enable_sized_integral_t<T, 8> = 0>
1339
1366
  XSIMD_INLINE batch<T, A> max(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<neon>) noexcept
1340
1367
  {
1341
- return { std::max(lhs.get(0), rhs.get(0)), std::max(lhs.get(1), rhs.get(1)) };
1368
+ return select(lhs > rhs, lhs, rhs);
1342
1369
  }
1343
1370
 
1344
1371
  /*******
@@ -1678,14 +1705,21 @@ namespace xsimd
1678
1705
  * reduce_max *
1679
1706
  **************/
1680
1707
 
1681
- // Using common implementation because ARM doe snot provide intrinsics
1708
+ // Using common implementation because ARM does not provide intrinsics
1682
1709
  // for this operation
1683
1710
 
1684
1711
  /**************
1685
1712
  * reduce_min *
1686
1713
  **************/
1687
1714
 
1688
- // Using common implementation because ARM doe snot provide intrinsics
1715
+ // Using common implementation because ARM does not provide intrinsics
1716
+ // for this operation
1717
+
1718
+ /**************
1719
+ * reduce_mul *
1720
+ **************/
1721
+
1722
+ // Using common implementation because ARM does not provide intrinsics
1689
1723
  // for this operation
1690
1724
 
1691
1725
  /**********
@@ -2280,6 +2314,55 @@ namespace xsimd
2280
2314
  return vshlq_s64(lhs, rhs);
2281
2315
  }
2282
2316
 
2317
+ // immediate variant
2318
+ template <size_t shift, class A, class T, detail::enable_sized_unsigned_t<T, 1> = 0>
2319
+ XSIMD_INLINE batch<T, A> bitwise_lshift(batch<T, A> const& x, requires_arch<neon>) noexcept
2320
+ {
2321
+ return vshlq_n_u8(x, shift);
2322
+ }
2323
+
2324
+ template <size_t shift, class A, class T, detail::enable_sized_signed_t<T, 1> = 0>
2325
+ XSIMD_INLINE batch<T, A> bitwise_lshift(batch<T, A> const& x, requires_arch<neon>) noexcept
2326
+ {
2327
+ return vshlq_n_s8(x, shift);
2328
+ }
2329
+
2330
+ template <size_t shift, class A, class T, detail::enable_sized_unsigned_t<T, 2> = 0>
2331
+ XSIMD_INLINE batch<T, A> bitwise_lshift(batch<T, A> const& x, requires_arch<neon>) noexcept
2332
+ {
2333
+ return vshlq_n_u16(x, shift);
2334
+ }
2335
+
2336
+ template <size_t shift, class A, class T, detail::enable_sized_signed_t<T, 2> = 0>
2337
+ XSIMD_INLINE batch<T, A> bitwise_lshift(batch<T, A> const& x, requires_arch<neon>) noexcept
2338
+ {
2339
+ return vshlq_n_s16(x, shift);
2340
+ }
2341
+
2342
+ template <size_t shift, class A, class T, detail::enable_sized_unsigned_t<T, 4> = 0>
2343
+ XSIMD_INLINE batch<T, A> bitwise_lshift(batch<T, A> const& x, requires_arch<neon>) noexcept
2344
+ {
2345
+ return vshlq_n_u32(x, shift);
2346
+ }
2347
+
2348
+ template <size_t shift, class A, class T, detail::enable_sized_signed_t<T, 4> = 0>
2349
+ XSIMD_INLINE batch<T, A> bitwise_lshift(batch<T, A> const& x, requires_arch<neon>) noexcept
2350
+ {
2351
+ return vshlq_n_s32(x, shift);
2352
+ }
2353
+
2354
+ template <size_t shift, class A, class T, detail::enable_sized_unsigned_t<T, 8> = 0>
2355
+ XSIMD_INLINE batch<T, A> bitwise_lshift(batch<T, A> const& x, requires_arch<neon>) noexcept
2356
+ {
2357
+ return vshlq_n_u64(x, shift);
2358
+ }
2359
+
2360
+ template <size_t shift, class A, class T, detail::enable_sized_signed_t<T, 8> = 0>
2361
+ XSIMD_INLINE batch<T, A> bitwise_lshift(batch<T, A> const& x, requires_arch<neon>) noexcept
2362
+ {
2363
+ return vshlq_n_s64(x, shift);
2364
+ }
2365
+
2283
2366
  /******************
2284
2367
  * bitwise_rshift *
2285
2368
  ******************/
@@ -2455,6 +2538,110 @@ namespace xsimd
2455
2538
  return vshlq_s32(lhs, vnegq_s32(rhs));
2456
2539
  }
2457
2540
 
2541
+ // immediate variant
2542
+ template <size_t shift, class A, class T, detail::enable_sized_unsigned_t<T, 1> = 0>
2543
+ XSIMD_INLINE batch<T, A> bitwise_rshift(batch<T, A> const& x, requires_arch<neon>) noexcept
2544
+ {
2545
+ return vshrq_n_u8(x, shift);
2546
+ }
2547
+
2548
+ template <size_t shift, class A, class T, detail::enable_sized_signed_t<T, 1> = 0>
2549
+ XSIMD_INLINE batch<T, A> bitwise_rshift(batch<T, A> const& x, requires_arch<neon>) noexcept
2550
+ {
2551
+ return vshrq_n_s8(x, shift);
2552
+ }
2553
+
2554
+ template <size_t shift, class A, class T, detail::enable_sized_unsigned_t<T, 2> = 0>
2555
+ XSIMD_INLINE batch<T, A> bitwise_rshift(batch<T, A> const& x, requires_arch<neon>) noexcept
2556
+ {
2557
+ return vshrq_n_u16(x, shift);
2558
+ }
2559
+
2560
+ template <size_t shift, class A, class T, detail::enable_sized_signed_t<T, 2> = 0>
2561
+ XSIMD_INLINE batch<T, A> bitwise_rshift(batch<T, A> const& x, requires_arch<neon>) noexcept
2562
+ {
2563
+ return vshrq_n_s16(x, shift);
2564
+ }
2565
+
2566
+ template <size_t shift, class A, class T, detail::enable_sized_unsigned_t<T, 4> = 0>
2567
+ XSIMD_INLINE batch<T, A> bitwise_rshift(batch<T, A> const& x, requires_arch<neon>) noexcept
2568
+ {
2569
+ return vshrq_n_u32(x, shift);
2570
+ }
2571
+
2572
+ template <size_t shift, class A, class T, detail::enable_sized_signed_t<T, 4> = 0>
2573
+ XSIMD_INLINE batch<T, A> bitwise_rshift(batch<T, A> const& x, requires_arch<neon>) noexcept
2574
+ {
2575
+ return vshrq_n_s32(x, shift);
2576
+ }
2577
+
2578
+ template <size_t shift, class A, class T, detail::enable_sized_unsigned_t<T, 8> = 0>
2579
+ XSIMD_INLINE batch<T, A> bitwise_rshift(batch<T, A> const& x, requires_arch<neon>) noexcept
2580
+ {
2581
+ return vshrq_n_u64(x, shift);
2582
+ }
2583
+
2584
+ template <size_t shift, class A, class T, detail::enable_sized_signed_t<T, 8> = 0>
2585
+ XSIMD_INLINE batch<T, A> bitwise_rshift(batch<T, A> const& x, requires_arch<neon>) noexcept
2586
+ {
2587
+ return vshrq_n_s64(x, shift);
2588
+ }
2589
+
2590
+ // first
2591
+ template <class A>
2592
+ XSIMD_INLINE float first(batch<float, A> const& self, requires_arch<neon>) noexcept
2593
+ {
2594
+ return vgetq_lane_f32(self, 0);
2595
+ }
2596
+
2597
+ template <class A, class T, detail::enable_sized_unsigned_t<T, 1> = 0>
2598
+ XSIMD_INLINE T first(batch<T, A> val, requires_arch<neon>) noexcept
2599
+ {
2600
+ return vgetq_lane_u8(val, 0);
2601
+ }
2602
+
2603
+ template <class A, class T, detail::enable_sized_signed_t<T, 1> = 0>
2604
+ XSIMD_INLINE T first(batch<T, A> val, requires_arch<neon>) noexcept
2605
+ {
2606
+ return vgetq_lane_s8(val, 0);
2607
+ }
2608
+
2609
+ template <class A, class T, detail::enable_sized_unsigned_t<T, 2> = 0>
2610
+ XSIMD_INLINE T first(batch<T, A> val, requires_arch<neon>) noexcept
2611
+ {
2612
+ return vgetq_lane_u16(val, 0);
2613
+ }
2614
+
2615
+ template <class A, class T, detail::enable_sized_signed_t<T, 2> = 0>
2616
+ XSIMD_INLINE T first(batch<T, A> val, requires_arch<neon>) noexcept
2617
+ {
2618
+ return vgetq_lane_s16(val, 0);
2619
+ }
2620
+
2621
+ template <class A, class T, detail::enable_sized_unsigned_t<T, 4> = 0>
2622
+ XSIMD_INLINE T first(batch<T, A> val, requires_arch<neon>) noexcept
2623
+ {
2624
+ return vgetq_lane_u32(val, 0);
2625
+ }
2626
+
2627
+ template <class A, class T, detail::enable_sized_signed_t<T, 4> = 0>
2628
+ XSIMD_INLINE T first(batch<T, A> val, requires_arch<neon>) noexcept
2629
+ {
2630
+ return vgetq_lane_s32(val, 0);
2631
+ }
2632
+
2633
+ template <class A, class T, detail::enable_sized_unsigned_t<T, 8> = 0>
2634
+ XSIMD_INLINE T first(batch<T, A> val, requires_arch<neon>) noexcept
2635
+ {
2636
+ return vgetq_lane_u64(val, 0);
2637
+ }
2638
+
2639
+ template <class A, class T, detail::enable_sized_signed_t<T, 8> = 0>
2640
+ XSIMD_INLINE T first(batch<T, A> val, requires_arch<neon>) noexcept
2641
+ {
2642
+ return vgetq_lane_s64(val, 0);
2643
+ }
2644
+
2458
2645
  // Overloads of bitwise shifts accepting two batches of uint64/int64 are not available with ARMv7
2459
2646
 
2460
2647
  /*******
@@ -2771,10 +2958,11 @@ namespace xsimd
2771
2958
  XSIMD_INLINE batch<T, A> rotate_left(batch<T, A> const& a, requires_arch<neon>) noexcept
2772
2959
  {
2773
2960
  using register_type = typename batch<T, A>::register_type;
2961
+ // Adding modulo to avoid warning.
2774
2962
  const detail::neon_dispatcher::binary dispatcher = {
2775
- std::make_tuple(wrap::rotate_left_u8<N>, wrap::rotate_left_s8<N>, wrap::rotate_left_u16<N>, wrap::rotate_left_s16<N>,
2776
- wrap::rotate_left_u32<N>, wrap::rotate_left_s32<N>, wrap::rotate_left_u64<N>, wrap::rotate_left_s64<N>,
2777
- wrap::rotate_left_f32<N>)
2963
+ std::make_tuple(wrap::rotate_left_u8<N>, wrap::rotate_left_s8<N>, wrap::rotate_left_u16<N % 8>, wrap::rotate_left_s16<N % 8>,
2964
+ wrap::rotate_left_u32<N % 4>, wrap::rotate_left_s32<N % 4>, wrap::rotate_left_u64<N % 2>, wrap::rotate_left_s64<N % 2>,
2965
+ wrap::rotate_left_f32<N % 4>)
2778
2966
  };
2779
2967
  return dispatcher.apply(register_type(a), register_type(a));
2780
2968
  }
@@ -2799,6 +2987,147 @@ namespace xsimd
2799
2987
  self.store_aligned(data.data());
2800
2988
  return set(batch<T, A>(), A(), data[idx]...);
2801
2989
  }
2990
+
2991
+ template <class A, uint64_t V0, uint64_t V1>
2992
+ XSIMD_INLINE batch<uint64_t, A> swizzle(batch<uint64_t, A> const& self,
2993
+ batch_constant<uint64_t, A, V0, V1>,
2994
+ requires_arch<neon>) noexcept
2995
+ {
2996
+ XSIMD_IF_CONSTEXPR(V0 == 0 && V1 == 0)
2997
+ {
2998
+ auto lo = vget_low_u64(self);
2999
+ return vcombine_u64(lo, lo);
3000
+ }
3001
+ XSIMD_IF_CONSTEXPR(V0 == 1 && V1 == 1)
3002
+ {
3003
+ auto hi = vget_high_u64(self);
3004
+ return vcombine_u64(hi, hi);
3005
+ }
3006
+ XSIMD_IF_CONSTEXPR(V0 == 0 && V1 == 1)
3007
+ {
3008
+ return self;
3009
+ }
3010
+ else
3011
+ {
3012
+ return vextq_u64(self, self, 1);
3013
+ }
3014
+ }
3015
+
3016
+ template <class A, uint64_t V0, uint64_t V1>
3017
+ XSIMD_INLINE batch<int64_t, A> swizzle(batch<int64_t, A> const& self,
3018
+ batch_constant<int64_t, A, V0, V1> mask,
3019
+ requires_arch<neon>) noexcept
3020
+ {
3021
+ return vreinterpretq_s64_u64(swizzle(vreinterpretq_u64_s64(self), mask, A {}));
3022
+ }
3023
+
3024
+ namespace detail
3025
+ {
3026
+ template <uint32_t Va, uint32_t Vb>
3027
+ XSIMD_INLINE uint8x8_t make_mask()
3028
+ {
3029
+ uint8x8_t res = {
3030
+ static_cast<uint8_t>((Va % 2) * 4 + 0),
3031
+ static_cast<uint8_t>((Va % 2) * 4 + 1),
3032
+ static_cast<uint8_t>((Va % 2) * 4 + 2),
3033
+ static_cast<uint8_t>((Va % 2) * 4 + 3),
3034
+ static_cast<uint8_t>((Vb % 2) * 4 + 0),
3035
+ static_cast<uint8_t>((Vb % 2) * 4 + 1),
3036
+ static_cast<uint8_t>((Vb % 2) * 4 + 2),
3037
+ static_cast<uint8_t>((Vb % 2) * 4 + 3),
3038
+ };
3039
+ return res;
3040
+ }
3041
+ }
3042
+
3043
+ template <class A, uint32_t V0, uint32_t V1, uint32_t V2, uint32_t V3>
3044
+ XSIMD_INLINE batch<uint32_t, A> swizzle(batch<uint32_t, A> const& self,
3045
+ batch_constant<uint32_t, A, V0, V1, V2, V3> mask,
3046
+ requires_arch<neon>) noexcept
3047
+ {
3048
+ constexpr bool is_identity = detail::is_identity(mask);
3049
+ constexpr bool is_dup_lo = detail::is_dup_lo(mask);
3050
+ constexpr bool is_dup_hi = detail::is_dup_hi(mask);
3051
+
3052
+ XSIMD_IF_CONSTEXPR(is_identity)
3053
+ {
3054
+ return self;
3055
+ }
3056
+ XSIMD_IF_CONSTEXPR(is_dup_lo)
3057
+ {
3058
+ XSIMD_IF_CONSTEXPR(V0 == 0 && V1 == 1)
3059
+ {
3060
+ return vreinterpretq_u32_u64(vdupq_lane_u64(vget_low_u64(vreinterpretq_u64_u32(self)), 0));
3061
+ }
3062
+ XSIMD_IF_CONSTEXPR(V0 == 1 && V1 == 0)
3063
+ {
3064
+ return vreinterpretq_u32_u64(vdupq_lane_u64(vreinterpret_u64_u32(vrev64_u32(vget_low_u32(self))), 0));
3065
+ }
3066
+ return vdupq_n_u32(vgetq_lane_u32(self, V0));
3067
+ }
3068
+ XSIMD_IF_CONSTEXPR(is_dup_hi)
3069
+ {
3070
+ XSIMD_IF_CONSTEXPR(V0 == 2 && V1 == 3)
3071
+ {
3072
+ return vreinterpretq_u32_u64(vdupq_lane_u64(vget_high_u64(vreinterpretq_u64_u32(self)), 0));
3073
+ }
3074
+ XSIMD_IF_CONSTEXPR(V0 == 3 && V1 == 2)
3075
+ {
3076
+ return vreinterpretq_u32_u64(vdupq_lane_u64(vreinterpret_u64_u32(vrev64_u32(vget_high_u32(self))), 0));
3077
+ }
3078
+ return vdupq_n_u32(vgetq_lane_u32(self, V0));
3079
+ }
3080
+ XSIMD_IF_CONSTEXPR(V0 < 2 && V1 < 2 && V2 < 2 && V3 < 2)
3081
+ {
3082
+ uint8x8_t low = vreinterpret_u8_u64(vget_low_u64(vreinterpretq_u64_u32(self)));
3083
+ uint8x8_t mask_lo = detail::make_mask<V0, V1>();
3084
+ uint8x8_t mask_hi = detail::make_mask<V2, V3>();
3085
+ uint8x8_t lo = vtbl1_u8(low, mask_lo);
3086
+ uint8x8_t hi = vtbl1_u8(low, mask_hi);
3087
+ return vreinterpretq_u32_u8(vcombine_u8(lo, hi));
3088
+ }
3089
+ XSIMD_IF_CONSTEXPR(V0 >= 2 && V1 >= 2 && V2 >= 2 && V3 >= 2)
3090
+ {
3091
+ uint8x8_t high = vreinterpret_u8_u64(vget_high_u64(vreinterpretq_u64_u32(self)));
3092
+ uint8x8_t mask_lo = detail::make_mask<V0, V1>();
3093
+ uint8x8_t mask_hi = detail::make_mask<V2, V3>();
3094
+ uint8x8_t lo = vtbl1_u8(high, mask_lo);
3095
+ uint8x8_t hi = vtbl1_u8(high, mask_hi);
3096
+ return vreinterpretq_u32_u8(vcombine_u8(lo, hi));
3097
+ }
3098
+
3099
+ uint8x8_t mask_lo = detail::make_mask<V0, V1>();
3100
+ uint8x8_t mask_hi = detail::make_mask<V2, V3>();
3101
+
3102
+ uint8x8_t low = vreinterpret_u8_u64(vget_low_u64(vreinterpretq_u64_u32(self)));
3103
+ uint8x8_t lol = vtbl1_u8(low, mask_lo);
3104
+ uint8x8_t loh = vtbl1_u8(low, mask_hi);
3105
+ uint32x4_t true_br = vreinterpretq_u32_u8(vcombine_u8(lol, loh));
3106
+
3107
+ uint8x8_t high = vreinterpret_u8_u64(vget_high_u64(vreinterpretq_u64_u32(self)));
3108
+ uint8x8_t hil = vtbl1_u8(high, mask_lo);
3109
+ uint8x8_t hih = vtbl1_u8(high, mask_hi);
3110
+ uint32x4_t false_br = vreinterpretq_u32_u8(vcombine_u8(hil, hih));
3111
+
3112
+ batch_bool_constant<uint32_t, A, (V0 < 2), (V1 < 2), (V2 < 2), (V3 < 2)> blend_mask;
3113
+ return select(blend_mask, batch<uint32_t, A>(true_br), batch<uint32_t, A>(false_br), A {});
3114
+ }
3115
+
3116
+ template <class A, uint32_t V0, uint32_t V1, uint32_t V2, uint32_t V3>
3117
+ XSIMD_INLINE batch<int32_t, A> swizzle(batch<int32_t, A> const& self,
3118
+ batch_constant<int32_t, A, V0, V1, V2, V3> mask,
3119
+ requires_arch<neon>) noexcept
3120
+ {
3121
+ return vreinterpretq_s32_u32(swizzle(vreinterpretq_u32_s32(self), mask, A {}));
3122
+ }
3123
+
3124
+ template <class A, uint32_t V0, uint32_t V1, uint32_t V2, uint32_t V3>
3125
+ XSIMD_INLINE batch<float, A> swizzle(batch<float, A> const& self,
3126
+ batch_constant<uint32_t, A, V0, V1, V2, V3> mask,
3127
+ requires_arch<neon>) noexcept
3128
+ {
3129
+ return vreinterpretq_f32_u32(swizzle(batch<uint32_t, A>(vreinterpretq_u32_f32(self)), mask, A {}));
3130
+ }
2802
3131
  }
2803
3132
 
2804
3133
  }
@@ -28,6 +28,13 @@ namespace xsimd
28
28
  {
29
29
  using namespace types;
30
30
 
31
+ // first
32
+ template <class A>
33
+ XSIMD_INLINE double first(batch<double, A> const& self, requires_arch<neon64>) noexcept
34
+ {
35
+ return vgetq_lane_f64(self, 0);
36
+ }
37
+
31
38
  /*******
32
39
  * all *
33
40
  *******/
@@ -1335,6 +1335,19 @@ namespace xsimd
1335
1335
  return result;
1336
1336
  }
1337
1337
 
1338
+ // first
1339
+ template <class A, class T, detail::rvv_enable_all_t<T> = 0>
1340
+ XSIMD_INLINE T first(batch<T, A> const& arg, requires_arch<rvv>) noexcept
1341
+ {
1342
+ return detail::rvvmv_lane0(arg);
1343
+ }
1344
+
1345
+ template <class A, class T, detail::rvv_enable_all_t<T> = 0>
1346
+ XSIMD_INLINE std::complex<T> first(batch<std::complex<T>, A> const& arg, requires_arch<rvv>) noexcept
1347
+ {
1348
+ return std::complex<T> { detail::rvvmv_lane0(arg.real()), detail::rvvmv_lane0(arg.imag()) };
1349
+ }
1350
+
1338
1351
  // insert
1339
1352
  template <class A, class T, size_t I, detail::rvv_enable_all_t<T> = 0>
1340
1353
  XSIMD_INLINE batch<T, A> insert(batch<T, A> const& arg, T val, index<I>, requires_arch<rvv>) noexcept
@@ -300,12 +300,29 @@ namespace xsimd
300
300
  return x << shift;
301
301
  }
302
302
 
303
+ template <size_t shift, class T>
304
+ XSIMD_INLINE typename std::enable_if<std::is_integral<T>::value, T>::type
305
+ bitwise_lshift(T x) noexcept
306
+ {
307
+ constexpr auto bits = std::numeric_limits<T>::digits + std::numeric_limits<T>::is_signed;
308
+ static_assert(shift < bits, "Count must be less than the number of bits in T");
309
+ return x << shift;
310
+ }
311
+
303
312
  template <class T0, class T1>
304
313
  XSIMD_INLINE typename std::enable_if<std::is_integral<T0>::value && std::is_integral<T1>::value, T0>::type
305
314
  bitwise_rshift(T0 x, T1 shift) noexcept
306
315
  {
307
316
  return x >> shift;
308
317
  }
318
+ template <size_t shift, class T>
319
+ XSIMD_INLINE typename std::enable_if<std::is_integral<T>::value, T>::type
320
+ bitwise_rshift(T x) noexcept
321
+ {
322
+ constexpr auto bits = std::numeric_limits<T>::digits + std::numeric_limits<T>::is_signed;
323
+ static_assert(shift < bits, "Count must be less than the number of bits in T");
324
+ return x >> shift;
325
+ }
309
326
 
310
327
  template <class T>
311
328
  XSIMD_INLINE typename std::enable_if<std::is_integral<T>::value, T>::type
@@ -447,16 +464,32 @@ namespace xsimd
447
464
  XSIMD_INLINE typename std::enable_if<std::is_integral<T0>::value && std::is_integral<T1>::value, T0>::type
448
465
  rotl(T0 x, T1 shift) noexcept
449
466
  {
450
- constexpr auto N = std::numeric_limits<T0>::digits;
451
- return (x << shift) | (x >> (N - shift));
467
+ constexpr auto bits = std::numeric_limits<T0>::digits + std::numeric_limits<T0>::is_signed;
468
+ return (x << shift) | (x >> (bits - shift));
469
+ }
470
+ template <size_t count, class T>
471
+ XSIMD_INLINE typename std::enable_if<std::is_integral<T>::value, T>::type
472
+ rotl(T x) noexcept
473
+ {
474
+ constexpr auto bits = std::numeric_limits<T>::digits + std::numeric_limits<T>::is_signed;
475
+ static_assert(count < bits, "Count must be less than the number of bits in T");
476
+ return (x << count) | (x >> (bits - count));
452
477
  }
453
478
 
454
479
  template <class T0, class T1>
455
480
  XSIMD_INLINE typename std::enable_if<std::is_integral<T0>::value && std::is_integral<T1>::value, T0>::type
456
481
  rotr(T0 x, T1 shift) noexcept
457
482
  {
458
- constexpr auto N = std::numeric_limits<T0>::digits;
459
- return (x >> shift) | (x << (N - shift));
483
+ constexpr auto bits = std::numeric_limits<T0>::digits + std::numeric_limits<T0>::is_signed;
484
+ return (x >> shift) | (x << (bits - shift));
485
+ }
486
+ template <size_t count, class T>
487
+ XSIMD_INLINE typename std::enable_if<std::is_integral<T>::value, T>::type
488
+ rotr(T x) noexcept
489
+ {
490
+ constexpr auto bits = std::numeric_limits<T>::digits + std::numeric_limits<T>::is_signed;
491
+ static_assert(count < bits, "Count must be less than the number of bits in T");
492
+ return (x >> count) | (x << (bits - count));
460
493
  }
461
494
 
462
495
  template <class T>
@@ -510,7 +543,11 @@ namespace xsimd
510
543
  template <class T, class = typename std::enable_if<std::is_scalar<T>::value>::type>
511
544
  XSIMD_INLINE bool is_flint(const T& x) noexcept
512
545
  {
546
+ #ifdef __FAST_MATH__
547
+ return (x - std::trunc(x)) == T(0);
548
+ #else
513
549
  return std::isnan(x - x) ? false : (x - std::trunc(x)) == T(0);
550
+ #endif
514
551
  }
515
552
 
516
553
  template <class T, class = typename std::enable_if<std::is_scalar<T>::value>::type>