sequenzo 0.1.17__cp39-cp39-macosx_10_9_universal2.whl → 0.1.18__cp39-cp39-macosx_10_9_universal2.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of sequenzo might be problematic. Click here for more details.
- sequenzo/__init__.py +25 -1
- sequenzo/big_data/clara/clara.py +1 -1
- sequenzo/big_data/clara/utils/get_weighted_diss.c +157 -157
- sequenzo/big_data/clara/utils/get_weighted_diss.cpython-39-darwin.so +0 -0
- sequenzo/clustering/hierarchical_clustering.py +202 -8
- sequenzo/define_sequence_data.py +34 -2
- sequenzo/dissimilarity_measures/c_code.cpython-39-darwin.so +0 -0
- sequenzo/dissimilarity_measures/get_substitution_cost_matrix.py +1 -1
- sequenzo/dissimilarity_measures/src/DHDdistance.cpp +13 -37
- sequenzo/dissimilarity_measures/src/LCPdistance.cpp +13 -37
- sequenzo/dissimilarity_measures/src/OMdistance.cpp +12 -47
- sequenzo/dissimilarity_measures/src/OMspellDistance.cpp +103 -67
- sequenzo/dissimilarity_measures/src/dp_utils.h +160 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_arithmetic.hpp +41 -16
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_complex.hpp +4 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_details.hpp +7 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_logical.hpp +10 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_math.hpp +127 -43
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_memory.hpp +30 -2
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_swizzle.hpp +174 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_trigo.hpp +14 -5
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx.hpp +111 -54
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx2.hpp +131 -9
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512bw.hpp +11 -113
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512dq.hpp +39 -7
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512f.hpp +336 -30
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vbmi.hpp +9 -37
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vbmi2.hpp +58 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_common.hpp +1 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_common_fwd.hpp +35 -2
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_constants.hpp +3 -1
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_emulated.hpp +17 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma3_avx.hpp +13 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma3_sse.hpp +18 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma4.hpp +13 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_isa.hpp +8 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_neon.hpp +363 -34
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_neon64.hpp +7 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_rvv.hpp +13 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_scalar.hpp +41 -4
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse2.hpp +252 -16
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse3.hpp +9 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_ssse3.hpp +12 -1
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sve.hpp +7 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_vsx.hpp +892 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_wasm.hpp +78 -1
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_arch.hpp +3 -1
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_config.hpp +13 -2
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_cpuid.hpp +5 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_inline.hpp +5 -1
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_all_registers.hpp +2 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_api.hpp +64 -1
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_batch.hpp +36 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_rvv_register.hpp +40 -31
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_traits.hpp +8 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_vsx_register.hpp +77 -0
- sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/xsimd.hpp +6 -0
- sequenzo/dissimilarity_measures/utils/get_sm_trate_substitution_cost_matrix.c +157 -157
- sequenzo/dissimilarity_measures/utils/get_sm_trate_substitution_cost_matrix.cpython-39-darwin.so +0 -0
- sequenzo/dissimilarity_measures/utils/seqconc.c +157 -157
- sequenzo/dissimilarity_measures/utils/seqconc.cpython-39-darwin.so +0 -0
- sequenzo/dissimilarity_measures/utils/seqdss.c +157 -157
- sequenzo/dissimilarity_measures/utils/seqdss.cpython-39-darwin.so +0 -0
- sequenzo/dissimilarity_measures/utils/seqdur.c +157 -157
- sequenzo/dissimilarity_measures/utils/seqdur.cpython-39-darwin.so +0 -0
- sequenzo/dissimilarity_measures/utils/seqlength.c +157 -157
- sequenzo/dissimilarity_measures/utils/seqlength.cpython-39-darwin.so +0 -0
- sequenzo/sequence_characteristics/__init__.py +4 -0
- sequenzo/sequence_characteristics/complexity_index.py +17 -57
- sequenzo/sequence_characteristics/overall_cross_sectional_entropy.py +177 -111
- sequenzo/sequence_characteristics/plot_characteristics.py +30 -11
- sequenzo/sequence_characteristics/simple_characteristics.py +1 -0
- sequenzo/sequence_characteristics/state_frequencies_and_entropy_per_sequence.py +9 -3
- sequenzo/sequence_characteristics/turbulence.py +47 -67
- sequenzo/sequence_characteristics/variance_of_spell_durations.py +19 -9
- sequenzo/sequence_characteristics/within_sequence_entropy.py +5 -58
- sequenzo/visualization/plot_sequence_index.py +58 -35
- sequenzo/visualization/plot_state_distribution.py +57 -36
- sequenzo/with_event_history_analysis/__init__.py +35 -0
- sequenzo/with_event_history_analysis/sequence_analysis_multi_state_model.py +850 -0
- sequenzo/with_event_history_analysis/sequence_history_analysis.py +283 -0
- {sequenzo-0.1.17.dist-info → sequenzo-0.1.18.dist-info}/METADATA +7 -6
- {sequenzo-0.1.17.dist-info → sequenzo-0.1.18.dist-info}/RECORD +86 -79
- {sequenzo-0.1.17.dist-info → sequenzo-0.1.18.dist-info}/WHEEL +0 -0
- {sequenzo-0.1.17.dist-info → sequenzo-0.1.18.dist-info}/licenses/LICENSE +0 -0
- {sequenzo-0.1.17.dist-info → sequenzo-0.1.18.dist-info}/top_level.txt +0 -0
|
@@ -292,6 +292,36 @@ namespace xsimd
|
|
|
292
292
|
return {};
|
|
293
293
|
}
|
|
294
294
|
}
|
|
295
|
+
template <size_t shift, class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
|
|
296
|
+
XSIMD_INLINE batch<T, A> bitwise_lshift(batch<T, A> const& self, requires_arch<sse2>) noexcept
|
|
297
|
+
{
|
|
298
|
+
constexpr auto bits = std::numeric_limits<T>::digits + std::numeric_limits<T>::is_signed;
|
|
299
|
+
static_assert(shift < bits, "Count must be less than the number of bits in T");
|
|
300
|
+
XSIMD_IF_CONSTEXPR(shift == 0)
|
|
301
|
+
{
|
|
302
|
+
return self;
|
|
303
|
+
}
|
|
304
|
+
else XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
|
|
305
|
+
{
|
|
306
|
+
// 8-bit left shift via 16-bit shift + mask
|
|
307
|
+
__m128i shifted = _mm_slli_epi16(self, static_cast<int>(shift));
|
|
308
|
+
__m128i mask = _mm_set1_epi8(static_cast<char>(0xFF << shift));
|
|
309
|
+
return _mm_and_si128(shifted, mask);
|
|
310
|
+
}
|
|
311
|
+
else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
|
|
312
|
+
{
|
|
313
|
+
return _mm_slli_epi16(self, static_cast<int>(shift));
|
|
314
|
+
}
|
|
315
|
+
else XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
|
|
316
|
+
{
|
|
317
|
+
return _mm_slli_epi32(self, static_cast<int>(shift));
|
|
318
|
+
}
|
|
319
|
+
else XSIMD_IF_CONSTEXPR(sizeof(T) == 8)
|
|
320
|
+
{
|
|
321
|
+
return _mm_slli_epi64(self, static_cast<int>(shift));
|
|
322
|
+
}
|
|
323
|
+
return bitwise_lshift<shift>(self, common {});
|
|
324
|
+
}
|
|
295
325
|
|
|
296
326
|
// bitwise_not
|
|
297
327
|
template <class A>
|
|
@@ -420,6 +450,63 @@ namespace xsimd
|
|
|
420
450
|
}
|
|
421
451
|
}
|
|
422
452
|
}
|
|
453
|
+
template <size_t shift, class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
|
|
454
|
+
XSIMD_INLINE batch<T, A> bitwise_rshift(batch<T, A> const& self, requires_arch<sse2>) noexcept
|
|
455
|
+
{
|
|
456
|
+
constexpr auto bits = std::numeric_limits<T>::digits + std::numeric_limits<T>::is_signed;
|
|
457
|
+
static_assert(shift < bits,
|
|
458
|
+
"Shift must be less than the number of value bits in the type");
|
|
459
|
+
|
|
460
|
+
XSIMD_IF_CONSTEXPR(shift == 0)
|
|
461
|
+
{
|
|
462
|
+
return self;
|
|
463
|
+
}
|
|
464
|
+
|
|
465
|
+
XSIMD_IF_CONSTEXPR(std::is_signed<T>::value)
|
|
466
|
+
{
|
|
467
|
+
XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
|
|
468
|
+
{
|
|
469
|
+
// 8-bit arithmetic right shift via 16-bit shift + sign-extension handling.
|
|
470
|
+
__m128i shifted = _mm_srai_epi16(self, static_cast<int>(shift));
|
|
471
|
+
__m128i sign_mask = _mm_set1_epi16(static_cast<short>(0xFF00 >> shift));
|
|
472
|
+
__m128i cmp_negative = _mm_cmpgt_epi8(_mm_setzero_si128(), self);
|
|
473
|
+
return _mm_or_si128(_mm_and_si128(sign_mask, cmp_negative),
|
|
474
|
+
_mm_andnot_si128(sign_mask, shifted));
|
|
475
|
+
}
|
|
476
|
+
else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
|
|
477
|
+
{
|
|
478
|
+
return _mm_srai_epi16(self, static_cast<int>(shift));
|
|
479
|
+
}
|
|
480
|
+
else XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
|
|
481
|
+
{
|
|
482
|
+
return _mm_srai_epi32(self, static_cast<int>(shift));
|
|
483
|
+
}
|
|
484
|
+
// No 64-bit arithmetic right shift in SSE2; fall back
|
|
485
|
+
return bitwise_rshift<shift>(self, common {});
|
|
486
|
+
}
|
|
487
|
+
else // unsigned / logical right shift
|
|
488
|
+
{
|
|
489
|
+
XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
|
|
490
|
+
{
|
|
491
|
+
// Emulate byte-wise logical right shift using 16-bit shifts + per-byte mask.
|
|
492
|
+
__m128i s16 = _mm_srli_epi16(self, static_cast<int>(shift));
|
|
493
|
+
__m128i mask = _mm_set1_epi8(static_cast<char>(0xFFu >> shift));
|
|
494
|
+
return _mm_and_si128(s16, mask);
|
|
495
|
+
}
|
|
496
|
+
else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
|
|
497
|
+
{
|
|
498
|
+
return _mm_srli_epi16(self, static_cast<int>(shift));
|
|
499
|
+
}
|
|
500
|
+
else XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
|
|
501
|
+
{
|
|
502
|
+
return _mm_srli_epi32(self, static_cast<int>(shift));
|
|
503
|
+
}
|
|
504
|
+
else // sizeof(T) == 8
|
|
505
|
+
{
|
|
506
|
+
return _mm_srli_epi64(self, static_cast<int>(shift));
|
|
507
|
+
}
|
|
508
|
+
}
|
|
509
|
+
}
|
|
423
510
|
|
|
424
511
|
// bitwise_xor
|
|
425
512
|
template <class A>
|
|
@@ -673,6 +760,53 @@ namespace xsimd
|
|
|
673
760
|
return _mm_castsi128_pd(_mm_cmpeq_epi32(_mm_castpd_si128(self), _mm_castpd_si128(other)));
|
|
674
761
|
}
|
|
675
762
|
|
|
763
|
+
// first
|
|
764
|
+
template <class A>
|
|
765
|
+
XSIMD_INLINE float first(batch<float, A> const& self, requires_arch<sse2>) noexcept
|
|
766
|
+
{
|
|
767
|
+
return _mm_cvtss_f32(self);
|
|
768
|
+
}
|
|
769
|
+
|
|
770
|
+
template <class A>
|
|
771
|
+
XSIMD_INLINE double first(batch<double, A> const& self, requires_arch<sse2>) noexcept
|
|
772
|
+
{
|
|
773
|
+
return _mm_cvtsd_f64(self);
|
|
774
|
+
}
|
|
775
|
+
|
|
776
|
+
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
|
|
777
|
+
XSIMD_INLINE T first(batch<T, A> const& self, requires_arch<sse2>) noexcept
|
|
778
|
+
{
|
|
779
|
+
XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
|
|
780
|
+
{
|
|
781
|
+
return static_cast<T>(_mm_cvtsi128_si32(self) & 0xFF);
|
|
782
|
+
}
|
|
783
|
+
else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
|
|
784
|
+
{
|
|
785
|
+
return static_cast<T>(_mm_cvtsi128_si32(self) & 0xFFFF);
|
|
786
|
+
}
|
|
787
|
+
else XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
|
|
788
|
+
{
|
|
789
|
+
return static_cast<T>(_mm_cvtsi128_si32(self));
|
|
790
|
+
}
|
|
791
|
+
else XSIMD_IF_CONSTEXPR(sizeof(T) == 8)
|
|
792
|
+
{
|
|
793
|
+
#if defined(__x86_64__)
|
|
794
|
+
return static_cast<T>(_mm_cvtsi128_si64(self));
|
|
795
|
+
#else
|
|
796
|
+
__m128i m;
|
|
797
|
+
_mm_storel_epi64(&m, self);
|
|
798
|
+
int64_t i;
|
|
799
|
+
std::memcpy(&i, &m, sizeof(i));
|
|
800
|
+
return i;
|
|
801
|
+
#endif
|
|
802
|
+
}
|
|
803
|
+
else
|
|
804
|
+
{
|
|
805
|
+
assert(false && "unsupported arch/op combination");
|
|
806
|
+
return {};
|
|
807
|
+
}
|
|
808
|
+
}
|
|
809
|
+
|
|
676
810
|
// from_mask
|
|
677
811
|
template <class A>
|
|
678
812
|
XSIMD_INLINE batch_bool<float, A> from_mask(batch_bool<float, A> const&, uint64_t mask, requires_arch<sse2>) noexcept
|
|
@@ -1090,7 +1224,7 @@ namespace xsimd
|
|
|
1090
1224
|
template <class A>
|
|
1091
1225
|
XSIMD_INLINE batch<float, A> max(batch<float, A> const& self, batch<float, A> const& other, requires_arch<sse2>) noexcept
|
|
1092
1226
|
{
|
|
1093
|
-
return _mm_max_ps(
|
|
1227
|
+
return _mm_max_ps(other, self);
|
|
1094
1228
|
}
|
|
1095
1229
|
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
|
|
1096
1230
|
XSIMD_INLINE batch<T, A> max(batch<T, A> const& self, batch<T, A> const& other, requires_arch<sse2>) noexcept
|
|
@@ -1100,14 +1234,14 @@ namespace xsimd
|
|
|
1100
1234
|
template <class A>
|
|
1101
1235
|
XSIMD_INLINE batch<double, A> max(batch<double, A> const& self, batch<double, A> const& other, requires_arch<sse2>) noexcept
|
|
1102
1236
|
{
|
|
1103
|
-
return _mm_max_pd(
|
|
1237
|
+
return _mm_max_pd(other, self);
|
|
1104
1238
|
}
|
|
1105
1239
|
|
|
1106
1240
|
// min
|
|
1107
1241
|
template <class A>
|
|
1108
1242
|
XSIMD_INLINE batch<float, A> min(batch<float, A> const& self, batch<float, A> const& other, requires_arch<sse2>) noexcept
|
|
1109
1243
|
{
|
|
1110
|
-
return _mm_min_ps(
|
|
1244
|
+
return _mm_min_ps(other, self);
|
|
1111
1245
|
}
|
|
1112
1246
|
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
|
|
1113
1247
|
XSIMD_INLINE batch<T, A> min(batch<T, A> const& self, batch<T, A> const& other, requires_arch<sse2>) noexcept
|
|
@@ -1117,7 +1251,7 @@ namespace xsimd
|
|
|
1117
1251
|
template <class A>
|
|
1118
1252
|
XSIMD_INLINE batch<double, A> min(batch<double, A> const& self, batch<double, A> const& other, requires_arch<sse2>) noexcept
|
|
1119
1253
|
{
|
|
1120
|
-
return _mm_min_pd(
|
|
1254
|
+
return _mm_min_pd(other, self);
|
|
1121
1255
|
}
|
|
1122
1256
|
|
|
1123
1257
|
// mul
|
|
@@ -1243,7 +1377,7 @@ namespace xsimd
|
|
|
1243
1377
|
}
|
|
1244
1378
|
else
|
|
1245
1379
|
{
|
|
1246
|
-
return
|
|
1380
|
+
return reduce_add(self, common {});
|
|
1247
1381
|
}
|
|
1248
1382
|
}
|
|
1249
1383
|
|
|
@@ -1269,10 +1403,10 @@ namespace xsimd
|
|
|
1269
1403
|
batch<T, A> step2 = _mm_shufflelo_epi16(acc1, mask2);
|
|
1270
1404
|
batch<T, A> acc2 = max(acc1, step2);
|
|
1271
1405
|
if (sizeof(T) == 2)
|
|
1272
|
-
return acc2
|
|
1406
|
+
return first(acc2, A {});
|
|
1273
1407
|
batch<T, A> step3 = bitwise_cast<T>(bitwise_cast<uint16_t>(acc2) >> 8);
|
|
1274
1408
|
batch<T, A> acc3 = max(acc2, step3);
|
|
1275
|
-
return acc3
|
|
1409
|
+
return first(acc3, A {});
|
|
1276
1410
|
}
|
|
1277
1411
|
|
|
1278
1412
|
// reduce_min
|
|
@@ -1291,10 +1425,56 @@ namespace xsimd
|
|
|
1291
1425
|
batch<T, A> step2 = _mm_shufflelo_epi16(acc1, mask2);
|
|
1292
1426
|
batch<T, A> acc2 = min(acc1, step2);
|
|
1293
1427
|
if (sizeof(T) == 2)
|
|
1294
|
-
return acc2
|
|
1428
|
+
return first(acc2, A {});
|
|
1295
1429
|
batch<T, A> step3 = bitwise_cast<T>(bitwise_cast<uint16_t>(acc2) >> 8);
|
|
1296
1430
|
batch<T, A> acc3 = min(acc2, step3);
|
|
1297
|
-
return acc3
|
|
1431
|
+
return first(acc3, A {});
|
|
1432
|
+
}
|
|
1433
|
+
|
|
1434
|
+
// reduce_mul
|
|
1435
|
+
template <class A>
|
|
1436
|
+
XSIMD_INLINE float reduce_mul(batch<float, A> const& self, requires_arch<sse2>) noexcept
|
|
1437
|
+
{
|
|
1438
|
+
__m128 tmp0 = _mm_mul_ps(self, _mm_movehl_ps(self, self));
|
|
1439
|
+
__m128 tmp1 = _mm_mul_ss(tmp0, _mm_shuffle_ps(tmp0, tmp0, 1));
|
|
1440
|
+
return _mm_cvtss_f32(tmp1);
|
|
1441
|
+
}
|
|
1442
|
+
|
|
1443
|
+
template <class A>
|
|
1444
|
+
XSIMD_INLINE double reduce_mul(batch<double, A> const& self, requires_arch<sse2>) noexcept
|
|
1445
|
+
{
|
|
1446
|
+
return _mm_cvtsd_f64(_mm_mul_sd(self, _mm_unpackhi_pd(self, self)));
|
|
1447
|
+
}
|
|
1448
|
+
|
|
1449
|
+
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
|
|
1450
|
+
XSIMD_INLINE T reduce_mul(batch<T, A> const& self, requires_arch<sse2>) noexcept
|
|
1451
|
+
{
|
|
1452
|
+
XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
|
|
1453
|
+
{
|
|
1454
|
+
batch<T, A> tmp1 = _mm_shuffle_epi32(self, _MM_SHUFFLE(0, 1, 2, 3));
|
|
1455
|
+
tmp1 = tmp1 * self;
|
|
1456
|
+
batch<T, A> tmp2 = _mm_unpackhi_epi32(tmp1, tmp1);
|
|
1457
|
+
tmp2 = tmp2 * tmp1;
|
|
1458
|
+
return _mm_cvtsi128_si32(tmp2);
|
|
1459
|
+
}
|
|
1460
|
+
else XSIMD_IF_CONSTEXPR(sizeof(T) == 8)
|
|
1461
|
+
{
|
|
1462
|
+
batch<T, A> tmp1 = _mm_unpackhi_epi64(self, self);
|
|
1463
|
+
auto tmp2 = tmp1 * self;
|
|
1464
|
+
#if defined(__x86_64__)
|
|
1465
|
+
return _mm_cvtsi128_si64(tmp2);
|
|
1466
|
+
#else
|
|
1467
|
+
__m128i m;
|
|
1468
|
+
_mm_storel_epi64(&m, tmp2);
|
|
1469
|
+
int64_t i;
|
|
1470
|
+
std::memcpy(&i, &m, sizeof(i));
|
|
1471
|
+
return i;
|
|
1472
|
+
#endif
|
|
1473
|
+
}
|
|
1474
|
+
else
|
|
1475
|
+
{
|
|
1476
|
+
return reduce_mul(self, common {});
|
|
1477
|
+
}
|
|
1298
1478
|
}
|
|
1299
1479
|
|
|
1300
1480
|
// rsqrt
|
|
@@ -1641,22 +1821,78 @@ namespace xsimd
|
|
|
1641
1821
|
}
|
|
1642
1822
|
|
|
1643
1823
|
template <class A, uint16_t V0, uint16_t V1, uint16_t V2, uint16_t V3, uint16_t V4, uint16_t V5, uint16_t V6, uint16_t V7>
|
|
1644
|
-
XSIMD_INLINE batch<uint16_t, A> swizzle(batch<uint16_t, A> const& self, batch_constant<uint16_t, A, V0, V1, V2, V3, V4, V5, V6, V7
|
|
1824
|
+
XSIMD_INLINE batch<uint16_t, A> swizzle(batch<uint16_t, A> const& self, batch_constant<uint16_t, A, V0, V1, V2, V3, V4, V5, V6, V7> mask, requires_arch<sse2>) noexcept
|
|
1645
1825
|
{
|
|
1646
|
-
|
|
1826
|
+
constexpr bool is_identity = detail::is_identity(mask);
|
|
1827
|
+
constexpr bool is_dup_lo = detail::is_dup_lo(mask);
|
|
1828
|
+
constexpr bool is_dup_hi = detail::is_dup_hi(mask);
|
|
1829
|
+
|
|
1830
|
+
XSIMD_IF_CONSTEXPR(is_identity)
|
|
1831
|
+
{
|
|
1832
|
+
return self;
|
|
1833
|
+
}
|
|
1834
|
+
XSIMD_IF_CONSTEXPR(is_dup_lo)
|
|
1835
|
+
{
|
|
1836
|
+
// permute the low half
|
|
1837
|
+
constexpr int imm = detail::mod_shuffle(V0, V1, V2, V3);
|
|
1838
|
+
const auto lo = _mm_shufflelo_epi16(self, imm);
|
|
1839
|
+
// broadcast that 64-bit low half into both halves
|
|
1840
|
+
const auto lo_all = _mm_unpacklo_epi64(lo, lo);
|
|
1841
|
+
return lo_all;
|
|
1842
|
+
}
|
|
1843
|
+
XSIMD_IF_CONSTEXPR(is_dup_hi)
|
|
1844
|
+
{
|
|
1845
|
+
// permute the high half
|
|
1846
|
+
constexpr int imm = detail::mod_shuffle(V4, V5, V6, V7);
|
|
1847
|
+
const auto hi = _mm_shufflehi_epi16(self, imm);
|
|
1848
|
+
// broadcast that 64-bit high half into both halves
|
|
1849
|
+
const auto hi_all = _mm_unpackhi_epi64(hi, hi);
|
|
1850
|
+
return hi_all;
|
|
1851
|
+
}
|
|
1852
|
+
// Only pick elements from the low lane
|
|
1853
|
+
XSIMD_IF_CONSTEXPR((V0 < 4) && (V1 < 4) && (V2 < 4) && (V3 < 4) && (V4 < 4) && (V5 < 4) && (V6 < 4) && (V7 < 4))
|
|
1854
|
+
{
|
|
1855
|
+
// permute within each sub lane
|
|
1856
|
+
constexpr auto mask_lo = detail::mod_shuffle(V0, V1, V2, V3);
|
|
1857
|
+
constexpr auto mask_hi = detail::mod_shuffle(V4, V5, V6, V7);
|
|
1858
|
+
__m128i lol = _mm_shufflelo_epi16(self, mask_lo);
|
|
1859
|
+
__m128i loh = _mm_shufflelo_epi16(self, mask_hi);
|
|
1860
|
+
|
|
1861
|
+
// generate temporary lanes
|
|
1862
|
+
return _mm_unpacklo_epi64(lol, loh);
|
|
1863
|
+
}
|
|
1864
|
+
// Only pick elements from the high lane
|
|
1865
|
+
XSIMD_IF_CONSTEXPR((V0 >= 4) && (V1 >= 4) && (V2 >= 4) && (V3 >= 4) && (V4 >= 4) && (V5 >= 4) && (V6 >= 4) && (V7 >= 4))
|
|
1866
|
+
{
|
|
1867
|
+
// permute within each sub lane
|
|
1868
|
+
constexpr auto mask_lo = detail::mod_shuffle(V0, V1, V2, V3);
|
|
1869
|
+
constexpr auto mask_hi = detail::mod_shuffle(V4, V5, V6, V7);
|
|
1870
|
+
__m128i hil = _mm_shufflehi_epi16(self, mask_lo);
|
|
1871
|
+
__m128i hih = _mm_shufflehi_epi16(self, mask_hi);
|
|
1872
|
+
|
|
1873
|
+
// generate temporary lanes
|
|
1874
|
+
return _mm_unpackhi_epi64(hil, hih);
|
|
1875
|
+
}
|
|
1876
|
+
|
|
1877
|
+
// Generic case
|
|
1878
|
+
|
|
1879
|
+
// permute within each sub lane
|
|
1647
1880
|
constexpr auto mask_lo = detail::mod_shuffle(V0, V1, V2, V3);
|
|
1648
1881
|
constexpr auto mask_hi = detail::mod_shuffle(V4, V5, V6, V7);
|
|
1649
|
-
__m128i
|
|
1650
|
-
__m128i
|
|
1882
|
+
__m128i lol = _mm_shufflelo_epi16(self, mask_lo);
|
|
1883
|
+
__m128i loh = _mm_shufflelo_epi16(self, mask_hi);
|
|
1884
|
+
__m128i hil = _mm_shufflehi_epi16(self, mask_lo);
|
|
1885
|
+
__m128i hih = _mm_shufflehi_epi16(self, mask_hi);
|
|
1651
1886
|
|
|
1652
|
-
|
|
1653
|
-
__m128i
|
|
1887
|
+
// generate temporary lanes
|
|
1888
|
+
__m128i lo = _mm_unpacklo_epi64(lol, loh);
|
|
1889
|
+
__m128i hi = _mm_unpackhi_epi64(hil, hih);
|
|
1654
1890
|
|
|
1655
1891
|
// mask to choose the right lane
|
|
1656
1892
|
batch_bool_constant<uint16_t, A, (V0 < 4), (V1 < 4), (V2 < 4), (V3 < 4), (V4 < 4), (V5 < 4), (V6 < 4), (V7 < 4)> blend_mask;
|
|
1657
1893
|
|
|
1658
1894
|
// blend the two permutes
|
|
1659
|
-
return select(blend_mask, batch<uint16_t, A>(
|
|
1895
|
+
return select(blend_mask, batch<uint16_t, A>(lo), batch<uint16_t, A>(hi));
|
|
1660
1896
|
}
|
|
1661
1897
|
|
|
1662
1898
|
template <class A, uint16_t V0, uint16_t V1, uint16_t V2, uint16_t V3, uint16_t V4, uint16_t V5, uint16_t V6, uint16_t V7>
|
|
@@ -51,6 +51,15 @@ namespace xsimd
|
|
|
51
51
|
return _mm_cvtss_f32(tmp1);
|
|
52
52
|
}
|
|
53
53
|
|
|
54
|
+
// reduce_mul
|
|
55
|
+
template <class A>
|
|
56
|
+
XSIMD_INLINE float reduce_mul(batch<float, A> const& self, requires_arch<sse3>) noexcept
|
|
57
|
+
{
|
|
58
|
+
__m128 tmp1 = _mm_mul_ps(self, _mm_movehl_ps(self, self));
|
|
59
|
+
__m128 tmp2 = _mm_mul_ps(tmp1, _mm_movehdup_ps(tmp1));
|
|
60
|
+
return _mm_cvtss_f32(tmp2);
|
|
61
|
+
}
|
|
62
|
+
|
|
54
63
|
}
|
|
55
64
|
|
|
56
65
|
}
|
|
@@ -107,11 +107,22 @@ namespace xsimd
|
|
|
107
107
|
|
|
108
108
|
// rotate_left
|
|
109
109
|
template <size_t N, class A>
|
|
110
|
-
XSIMD_INLINE batch<
|
|
110
|
+
XSIMD_INLINE batch<uint8_t, A> rotate_left(batch<uint8_t, A> const& self, requires_arch<ssse3>) noexcept
|
|
111
111
|
{
|
|
112
112
|
return _mm_alignr_epi8(self, self, N);
|
|
113
113
|
}
|
|
114
114
|
template <size_t N, class A>
|
|
115
|
+
XSIMD_INLINE batch<int8_t, A> rotate_left(batch<int8_t, A> const& self, requires_arch<ssse3>) noexcept
|
|
116
|
+
{
|
|
117
|
+
return bitwise_cast<int8_t>(rotate_left<N, A>(bitwise_cast<uint8_t>(self), ssse3 {}));
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
template <size_t N, class A>
|
|
121
|
+
XSIMD_INLINE batch<uint16_t, A> rotate_left(batch<uint16_t, A> const& self, requires_arch<ssse3>) noexcept
|
|
122
|
+
{
|
|
123
|
+
return _mm_alignr_epi8(self, self, 2 * N);
|
|
124
|
+
}
|
|
125
|
+
template <size_t N, class A>
|
|
115
126
|
XSIMD_INLINE batch<int16_t, A> rotate_left(batch<int16_t, A> const& self, requires_arch<ssse3>) noexcept
|
|
116
127
|
{
|
|
117
128
|
return bitwise_cast<int16_t>(rotate_left<N, A>(bitwise_cast<uint16_t>(self), ssse3 {}));
|
|
@@ -949,6 +949,13 @@ namespace xsimd
|
|
|
949
949
|
return svsel(index_predicate, broadcast<A, T>(val, sve {}), arg);
|
|
950
950
|
}
|
|
951
951
|
|
|
952
|
+
// first
|
|
953
|
+
template <class A, class T, detail::sve_enable_all_t<T> = 0>
|
|
954
|
+
XSIMD_INLINE T first(batch<T, A> const& self, requires_arch<sve>) noexcept
|
|
955
|
+
{
|
|
956
|
+
return self.data[0];
|
|
957
|
+
}
|
|
958
|
+
|
|
952
959
|
// all
|
|
953
960
|
template <class A, class T, detail::sve_enable_all_t<T> = 0>
|
|
954
961
|
XSIMD_INLINE bool all(batch_bool<T, A> const& arg, requires_arch<sve>) noexcept
|