sequenzo 0.1.17__cp39-cp39-macosx_10_9_universal2.whl → 0.1.18__cp39-cp39-macosx_10_9_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sequenzo might be problematic. Click here for more details.

Files changed (86) hide show
  1. sequenzo/__init__.py +25 -1
  2. sequenzo/big_data/clara/clara.py +1 -1
  3. sequenzo/big_data/clara/utils/get_weighted_diss.c +157 -157
  4. sequenzo/big_data/clara/utils/get_weighted_diss.cpython-39-darwin.so +0 -0
  5. sequenzo/clustering/hierarchical_clustering.py +202 -8
  6. sequenzo/define_sequence_data.py +34 -2
  7. sequenzo/dissimilarity_measures/c_code.cpython-39-darwin.so +0 -0
  8. sequenzo/dissimilarity_measures/get_substitution_cost_matrix.py +1 -1
  9. sequenzo/dissimilarity_measures/src/DHDdistance.cpp +13 -37
  10. sequenzo/dissimilarity_measures/src/LCPdistance.cpp +13 -37
  11. sequenzo/dissimilarity_measures/src/OMdistance.cpp +12 -47
  12. sequenzo/dissimilarity_measures/src/OMspellDistance.cpp +103 -67
  13. sequenzo/dissimilarity_measures/src/dp_utils.h +160 -0
  14. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_arithmetic.hpp +41 -16
  15. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_complex.hpp +4 -0
  16. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_details.hpp +7 -0
  17. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_logical.hpp +10 -0
  18. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_math.hpp +127 -43
  19. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_memory.hpp +30 -2
  20. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_swizzle.hpp +174 -0
  21. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_trigo.hpp +14 -5
  22. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx.hpp +111 -54
  23. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx2.hpp +131 -9
  24. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512bw.hpp +11 -113
  25. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512dq.hpp +39 -7
  26. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512f.hpp +336 -30
  27. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vbmi.hpp +9 -37
  28. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vbmi2.hpp +58 -0
  29. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_common.hpp +1 -0
  30. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_common_fwd.hpp +35 -2
  31. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_constants.hpp +3 -1
  32. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_emulated.hpp +17 -0
  33. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma3_avx.hpp +13 -0
  34. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma3_sse.hpp +18 -0
  35. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma4.hpp +13 -0
  36. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_isa.hpp +8 -0
  37. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_neon.hpp +363 -34
  38. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_neon64.hpp +7 -0
  39. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_rvv.hpp +13 -0
  40. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_scalar.hpp +41 -4
  41. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse2.hpp +252 -16
  42. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse3.hpp +9 -0
  43. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_ssse3.hpp +12 -1
  44. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sve.hpp +7 -0
  45. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_vsx.hpp +892 -0
  46. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_wasm.hpp +78 -1
  47. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_arch.hpp +3 -1
  48. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_config.hpp +13 -2
  49. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_cpuid.hpp +5 -0
  50. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_inline.hpp +5 -1
  51. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_all_registers.hpp +2 -0
  52. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_api.hpp +64 -1
  53. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_batch.hpp +36 -0
  54. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_rvv_register.hpp +40 -31
  55. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_traits.hpp +8 -0
  56. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_vsx_register.hpp +77 -0
  57. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/xsimd.hpp +6 -0
  58. sequenzo/dissimilarity_measures/utils/get_sm_trate_substitution_cost_matrix.c +157 -157
  59. sequenzo/dissimilarity_measures/utils/get_sm_trate_substitution_cost_matrix.cpython-39-darwin.so +0 -0
  60. sequenzo/dissimilarity_measures/utils/seqconc.c +157 -157
  61. sequenzo/dissimilarity_measures/utils/seqconc.cpython-39-darwin.so +0 -0
  62. sequenzo/dissimilarity_measures/utils/seqdss.c +157 -157
  63. sequenzo/dissimilarity_measures/utils/seqdss.cpython-39-darwin.so +0 -0
  64. sequenzo/dissimilarity_measures/utils/seqdur.c +157 -157
  65. sequenzo/dissimilarity_measures/utils/seqdur.cpython-39-darwin.so +0 -0
  66. sequenzo/dissimilarity_measures/utils/seqlength.c +157 -157
  67. sequenzo/dissimilarity_measures/utils/seqlength.cpython-39-darwin.so +0 -0
  68. sequenzo/sequence_characteristics/__init__.py +4 -0
  69. sequenzo/sequence_characteristics/complexity_index.py +17 -57
  70. sequenzo/sequence_characteristics/overall_cross_sectional_entropy.py +177 -111
  71. sequenzo/sequence_characteristics/plot_characteristics.py +30 -11
  72. sequenzo/sequence_characteristics/simple_characteristics.py +1 -0
  73. sequenzo/sequence_characteristics/state_frequencies_and_entropy_per_sequence.py +9 -3
  74. sequenzo/sequence_characteristics/turbulence.py +47 -67
  75. sequenzo/sequence_characteristics/variance_of_spell_durations.py +19 -9
  76. sequenzo/sequence_characteristics/within_sequence_entropy.py +5 -58
  77. sequenzo/visualization/plot_sequence_index.py +58 -35
  78. sequenzo/visualization/plot_state_distribution.py +57 -36
  79. sequenzo/with_event_history_analysis/__init__.py +35 -0
  80. sequenzo/with_event_history_analysis/sequence_analysis_multi_state_model.py +850 -0
  81. sequenzo/with_event_history_analysis/sequence_history_analysis.py +283 -0
  82. {sequenzo-0.1.17.dist-info → sequenzo-0.1.18.dist-info}/METADATA +7 -6
  83. {sequenzo-0.1.17.dist-info → sequenzo-0.1.18.dist-info}/RECORD +86 -79
  84. {sequenzo-0.1.17.dist-info → sequenzo-0.1.18.dist-info}/WHEEL +0 -0
  85. {sequenzo-0.1.17.dist-info → sequenzo-0.1.18.dist-info}/licenses/LICENSE +0 -0
  86. {sequenzo-0.1.17.dist-info → sequenzo-0.1.18.dist-info}/top_level.txt +0 -0
@@ -3,11 +3,12 @@
3
3
  #include <vector>
4
4
  #include <cmath>
5
5
  #include <iostream>
6
- #include <xsimd/xsimd.hpp>
7
6
  #include "utils.h"
7
+ #include "dp_utils.h"
8
8
  #ifdef _OPENMP
9
9
  #include <omp.h>
10
10
  #endif
11
+ #include <xsimd/xsimd.hpp>
11
12
 
12
13
  namespace py = pybind11;
13
14
 
@@ -85,83 +86,119 @@ public:
85
86
  }
86
87
  }
87
88
 
88
- double getIndel(int i, int j, int state) {
89
+ // 对齐分配函数 moved to dp_utils.h
90
+
91
+ double getIndel(int i, int j, int state){
89
92
  auto ptr_indel = indellist.mutable_unchecked<1>();
90
93
  auto ptr_dur = seqdur.mutable_unchecked<2>();
91
94
 
92
- xsimd::batch<double, xsimd::default_arch> state_vec(ptr_indel(state));
93
- xsimd::batch<double, xsimd::default_arch> timecost_vec(timecost);
94
-
95
- xsimd::batch<double, xsimd::default_arch> dur_vec(ptr_dur(i, j));
96
- xsimd::batch<double, xsimd::default_arch> result = state_vec + timecost_vec * dur_vec;
97
-
98
- return result.get(0);
95
+ return ptr_indel(state) + timecost * ptr_dur(i, j);
99
96
  }
100
97
 
101
-
102
- double getSubCost(int i_state, int j_state, int i_x, int i_y, int j_x, int j_y) {
98
+ double getSubCost(int i_state, int j_state, int i_x, int i_y, int j_x, int j_y){
103
99
  auto ptr_dur = seqdur.mutable_unchecked<2>();
104
100
 
105
- if (i_state == j_state) {
101
+ if(i_state == j_state){
106
102
  double diffdur = ptr_dur(i_x, i_y) - ptr_dur(j_x, j_y);
107
- return std::abs(timecost * diffdur);
108
- } else {
109
- auto ptr_sm = sm.mutable_unchecked<2>();
110
103
 
111
- double d1 = ptr_dur(i_x, i_y);
112
- double d2 = ptr_dur(j_x, j_y);
113
-
114
- xsimd::batch<double, xsimd::default_arch> d1_vec = xsimd::batch<double, xsimd::default_arch>::broadcast(d1);
115
- xsimd::batch<double, xsimd::default_arch> d2_vec = xsimd::batch<double, xsimd::default_arch>::broadcast(d2);
116
- xsimd::batch<double, xsimd::default_arch> cost = xsimd::batch<double, xsimd::default_arch>::broadcast(timecost);
117
- xsimd::batch<double, xsimd::default_arch> sum = (d1_vec + d2_vec) * cost;
104
+ return abs(timecost * diffdur);
105
+ }else{
106
+ auto ptr_sm = sm.mutable_unchecked<2>();
118
107
 
119
- return ptr_sm(i_state, j_state) + sum.get(0);
108
+ return ptr_sm(i_state, j_state) +
109
+ (ptr_dur(i_x, i_y) + ptr_dur(j_x, j_y)) * timecost;
120
110
  }
121
111
  }
122
112
 
123
-
124
- double compute_distance(int is, int js) {
113
+ double compute_distance(int is, int js, double* prev, double* curr) {
125
114
  try {
126
115
  auto ptr_seq = sequences.unchecked<2>();
127
116
  auto ptr_len = seqlength.unchecked<1>();
117
+ auto ptr_sm = sm.unchecked<2>();
118
+ auto ptr_dur = seqdur.unchecked<2>();
119
+ auto ptr_indel = indellist.unchecked<1>();
128
120
 
129
121
  int i_state = 0, j_state = 0;
130
- double maxpossiblecost;
131
122
  int mm = ptr_len(is);
132
123
  int nn = ptr_len(js);
133
- int mSuf = mm + 1, nSuf = nn + 1;
134
-
135
- std::vector<double> prev(fmatsize, 0.0);
136
- std::vector<double> curr(fmatsize, 0.0);
124
+ int mSuf = mm + 1;
125
+ int nSuf = nn + 1;
137
126
 
138
127
  prev[0] = 0;
139
128
  curr[0] = 0;
140
129
 
141
- for (int ii = 1; ii < nSuf; ii++) {
142
- j_state = ptr_seq(js, ii - 1);
143
- prev[ii] = prev[ii-1] + getIndel(js, ii-1, j_state);
130
+ // initialize first row: cumulative insertions into js along columns
131
+ for (int jj = 1; jj < nSuf; jj++) {
132
+ int bj = ptr_seq(js, jj - 1);
133
+ prev[jj] = prev[jj - 1] + (ptr_indel(bj) + timecost * ptr_dur(js, jj - 1));
144
134
  }
145
135
 
136
+ using batch_t = xsimd::batch<double>;
137
+ constexpr std::size_t B = batch_t::size;
138
+
146
139
  for (int i = 1; i < mSuf; i++) {
147
140
  i_state = ptr_seq(is, i - 1);
148
- curr[0] = prev[0] + getIndel(is, i - 1, i_state);
149
-
150
- for (int j = 1; j < nSuf; j++) {
151
- j_state = ptr_seq(js, j - 1);
141
+ // per-row deletion cost (depends only on i_state and i position)
142
+ double dur_i = ptr_dur(is, i - 1);
143
+ double del_cost_i = ptr_indel(i_state) + timecost * dur_i;
144
+
145
+ // first column: cumulative deletions D[i][0] = D[i-1][0] + del_cost_i
146
+ curr[0] = prev[0] + del_cost_i;
147
+
148
+ int j = 1;
149
+ for (; j + (int)B <= nSuf; j += (int)B) {
150
+ const double* prev_ptr = prev + j;
151
+ const double* prevm1_ptr = prev + (j - 1);
152
+
153
+ batch_t prevj = batch_t::load_unaligned(prev_ptr);
154
+ batch_t prevjm1 = batch_t::load_unaligned(prevm1_ptr);
155
+
156
+ alignas(64) double subs[B];
157
+ alignas(64) double ins[B];
158
+ for (std::size_t b = 0; b < B; ++b) {
159
+ int jj_idx = j + (int)b - 1;
160
+ int bj = ptr_seq(js, jj_idx);
161
+ double dur_j = ptr_dur(js, jj_idx);
162
+
163
+ if (i_state == bj) {
164
+ subs[b] = std::abs(timecost * (dur_i - dur_j));
165
+ } else {
166
+ subs[b] = ptr_sm(i_state, bj) + (dur_i + dur_j) * timecost;
167
+ }
168
+ ins[b] = ptr_indel(bj) + timecost * dur_j;
169
+ }
152
170
 
153
- xsimd::batch<double, xsimd::default_arch> minimum_batch = prev[j] + getIndel(is, i - 1, i_state);
154
- xsimd::batch<double, xsimd::default_arch> j_indel_batch = curr[j - 1] + getIndel(js, j - 1, j_state);
155
- xsimd::batch<double, xsimd::default_arch> sub_batch = prev[j - 1] + getSubCost(i_state, j_state, is, i - 1, js, j - 1);
171
+ batch_t sub_batch = batch_t::load_unaligned(subs);
172
+ batch_t cand_del = prevj + batch_t(del_cost_i);
173
+ batch_t cand_sub = prevjm1 + sub_batch;
174
+ batch_t vert = xsimd::min(cand_del, cand_sub);
175
+
176
+ double running = curr[j - 1] + ins[0];
177
+ for (std::size_t b = 0; b < B; ++b) {
178
+ double v = vert.get(b);
179
+ double c = std::min(v, running);
180
+ curr[j + (int)b] = c;
181
+ if (b + 1 < B) running = c + ins[b + 1];
182
+ }
183
+ }
156
184
 
157
- xsimd::batch<double> result = xsimd::min(xsimd::min(minimum_batch, j_indel_batch), sub_batch);
158
- curr[j] = result.get(0);
185
+ // tail scalar handling
186
+ for (; j < nSuf; ++j) {
187
+ j_state = ptr_seq(js, j - 1);
188
+ double minimum = prev[j] + del_cost_i;
189
+ double j_indel = curr[j - 1] + (ptr_indel(j_state) + timecost * ptr_dur(js, j - 1));
190
+ double sub = prev[j - 1] + (
191
+ (i_state == j_state)
192
+ ? std::abs(timecost * (dur_i - ptr_dur(js, j - 1)))
193
+ : (ptr_sm(i_state, j_state) + (dur_i + ptr_dur(js, j - 1)) * timecost)
194
+ );
195
+ curr[j] = std::min({ minimum, j_indel, sub });
159
196
  }
160
197
 
161
198
  std::swap(prev, curr);
162
199
  }
163
200
 
164
- maxpossiblecost = std::abs(nn - mm) * indel + maxscost * std::min(mm, nn);
201
+ double maxpossiblecost = std::abs(nn - mm) * indel + maxscost * std::min(mm, nn);
165
202
  double ml = double(mm) * indel;
166
203
  double nl = double(nn) * indel;
167
204
 
@@ -172,26 +209,16 @@ public:
172
209
  }
173
210
  }
174
211
 
175
-
176
212
  py::array_t<double> compute_all_distances() {
177
213
  try {
178
- auto buffer = dist_matrix.mutable_unchecked<2>();
179
-
180
- #pragma omp parallel for schedule(dynamic)
181
- for (int i = 0; i < nseq; i++) {
182
- for (int j = i; j < nseq; j++) {
183
- buffer(i, j) = compute_distance(i, j);
214
+ return dp_utils::compute_all_distances(
215
+ nseq,
216
+ fmatsize,
217
+ dist_matrix,
218
+ [this](int i, int j, double* prev, double* curr) {
219
+ return this->compute_distance(i, j, prev, curr);
184
220
  }
185
- }
186
-
187
- #pragma omp parallel for schedule(dynamic)
188
- for (int i = 0; i < nseq; i++) {
189
- for (int j = i + 1; j < nseq; j++) {
190
- buffer(j, i) = buffer(i, j);
191
- }
192
- }
193
-
194
- return dist_matrix;
221
+ );
195
222
  } catch (const std::exception& e) {
196
223
  py::print("Error in compute_all_distances: ", e.what());
197
224
  throw;
@@ -202,15 +229,24 @@ public:
202
229
  try {
203
230
  auto buffer = refdist_matrix.mutable_unchecked<2>();
204
231
 
205
- #pragma omp parallel for schedule(static)
206
- for (int rseq = rseq1; rseq < rseq2; rseq ++) {
207
- for (int is = 0; is < nseq; is ++) {
208
- if(is == rseq){
209
- buffer(is, rseq-rseq1) = 0;
210
- }else{
211
- buffer(is, rseq-rseq1) = compute_distance(is, rseq);
232
+ #pragma omp parallel
233
+ {
234
+ double* prev = dp_utils::aligned_alloc_double(static_cast<size_t>(fmatsize));
235
+ double* curr = dp_utils::aligned_alloc_double(static_cast<size_t>(fmatsize));
236
+
237
+ #pragma omp for schedule(static)
238
+ for (int rseq = rseq1; rseq < rseq2; rseq ++) {
239
+ for (int is = 0; is < nseq; is ++) {
240
+ double cmpres = 0;
241
+ if(is != rseq){
242
+ cmpres = compute_distance(is, rseq, prev, curr);
243
+ }
244
+
245
+ buffer(is, rseq - rseq1) = cmpres;
212
246
  }
213
247
  }
248
+ dp_utils::aligned_free_double(prev);
249
+ dp_utils::aligned_free_double(curr);
214
250
  }
215
251
 
216
252
  return refdist_matrix;
@@ -0,0 +1,160 @@
1
+ #pragma once
2
+
3
+ #include <pybind11/pybind11.h>
4
+ #include <pybind11/numpy.h>
5
+ #ifdef _OPENMP
6
+ #include <omp.h>
7
+ #endif
8
+ #include <cstdlib>
9
+ #include <new>
10
+
11
+ namespace dp_utils {
12
+
13
+ // Cross-platform aligned allocation for double buffers
14
+ #ifdef _WIN32
15
+ inline double* aligned_alloc_double(size_t size, size_t align = 64) {
16
+ return reinterpret_cast<double*>(_aligned_malloc(size * sizeof(double), align));
17
+ }
18
+ inline void aligned_free_double(double* ptr) {
19
+ _aligned_free(ptr);
20
+ }
21
+ #else
22
+ inline double* aligned_alloc_double(size_t size, size_t align = 64) {
23
+ void* ptr = nullptr;
24
+ if (posix_memalign(&ptr, align, size * sizeof(double)) != 0) throw std::bad_alloc();
25
+ return reinterpret_cast<double*>(ptr);
26
+ }
27
+ inline void aligned_free_double(double* ptr) { free(ptr); }
28
+ #endif
29
+
30
+ // Generic pairwise symmetric computation helper
31
+ // ComputeFn signature: double(int i, int j, double* prev, double* curr)
32
+ template <typename ComputeFn>
33
+ inline pybind11::array_t<double> compute_all_distances(
34
+ int nseq,
35
+ int fmatsize,
36
+ pybind11::array_t<double>& dist_matrix,
37
+ ComputeFn&& compute_fn
38
+ ) {
39
+ auto buffer = dist_matrix.mutable_unchecked<2>();
40
+
41
+ #pragma omp parallel
42
+ {
43
+ double* prev = aligned_alloc_double(static_cast<size_t>(fmatsize));
44
+ double* curr = aligned_alloc_double(static_cast<size_t>(fmatsize));
45
+
46
+ #pragma omp for schedule(static)
47
+ for (int i = 0; i < nseq; i++) {
48
+ for (int j = i; j < nseq; j++) {
49
+ buffer(i, j) = compute_fn(i, j, prev, curr);
50
+ }
51
+ }
52
+
53
+ aligned_free_double(prev);
54
+ aligned_free_double(curr);
55
+ }
56
+
57
+ #pragma omp parallel for schedule(static)
58
+ for (int i = 0; i < nseq; i++) {
59
+ for (int j = i + 1; j < nseq; j++) {
60
+ buffer(j, i) = buffer(i, j);
61
+ }
62
+ }
63
+
64
+ return dist_matrix;
65
+ }
66
+
67
+ // Generic pairwise symmetric computation helper (no buffers)
68
+ // ComputeFn signature: double(int i, int j)
69
+ template <typename ComputeFn>
70
+ inline pybind11::array_t<double> compute_all_distances_simple(
71
+ int nseq,
72
+ pybind11::array_t<double>& dist_matrix,
73
+ ComputeFn&& compute_fn
74
+ ) {
75
+ auto buffer = dist_matrix.mutable_unchecked<2>();
76
+
77
+ #pragma omp parallel
78
+ {
79
+ #pragma omp for schedule(static)
80
+ for (int i = 0; i < nseq; i++) {
81
+ for (int j = i; j < nseq; j++) {
82
+ buffer(i, j) = compute_fn(i, j);
83
+ }
84
+ }
85
+ }
86
+
87
+ #pragma omp parallel for schedule(static)
88
+ for (int i = 0; i < nseq; ++i) {
89
+ for (int j = i + 1; j < nseq; ++j) {
90
+ buffer(j, i) = buffer(i, j);
91
+ }
92
+ }
93
+
94
+ return dist_matrix;
95
+ }
96
+
97
+ // Generic reference-sequence computation helper (no buffers)
98
+ // ComputeFn signature: double(int is, int rseq)
99
+ template <typename ComputeFn>
100
+ inline pybind11::array_t<double> compute_refseq_distances_simple(
101
+ int nseq,
102
+ int rseq1,
103
+ int rseq2,
104
+ pybind11::array_t<double>& refdist_matrix,
105
+ ComputeFn&& compute_fn
106
+ ) {
107
+ auto buffer = refdist_matrix.mutable_unchecked<2>();
108
+
109
+ #pragma omp parallel
110
+ {
111
+ #pragma omp for schedule(guided)
112
+ for (int rseq = rseq1; rseq < rseq2; rseq++) {
113
+ for (int is = 0; is < nseq; is++) {
114
+ buffer(is, rseq - rseq1) = (is == rseq) ? 0.0 : compute_fn(is, rseq);
115
+ }
116
+ }
117
+ }
118
+
119
+ return refdist_matrix;
120
+ }
121
+
122
+ // Generic reference-sequence computation helper (with DP buffers)
123
+ // ComputeFn signature: double(int is, int rseq, double* prev, double* curr)
124
+ template <typename ComputeFn>
125
+ inline pybind11::array_t<double> compute_refseq_distances_buffered(
126
+ int nseq,
127
+ int rseq1,
128
+ int rseq2,
129
+ int fmatsize,
130
+ pybind11::array_t<double>& refdist_matrix,
131
+ ComputeFn&& compute_fn
132
+ ) {
133
+ auto buffer = refdist_matrix.mutable_unchecked<2>();
134
+
135
+ #pragma omp parallel
136
+ {
137
+ double* prev = aligned_alloc_double(static_cast<size_t>(fmatsize));
138
+ double* curr = aligned_alloc_double(static_cast<size_t>(fmatsize));
139
+
140
+ #pragma omp for schedule(static)
141
+ for (int rseq = rseq1; rseq < rseq2; rseq++) {
142
+ for (int is = 0; is < nseq; is++) {
143
+ double cmpres = 0.0;
144
+ if (is != rseq) {
145
+ cmpres = compute_fn(is, rseq, prev, curr);
146
+ }
147
+ buffer(is, rseq - rseq1) = cmpres;
148
+ }
149
+ }
150
+
151
+ aligned_free_double(prev);
152
+ aligned_free_double(curr);
153
+ }
154
+
155
+ return refdist_matrix;
156
+ }
157
+
158
+ } // namespace dp_utils
159
+
160
+
@@ -34,6 +34,13 @@ namespace xsimd
34
34
  { return x << y; },
35
35
  self, other);
36
36
  }
37
+ template <size_t shift, class A, class T, class /*=typename std::enable_if<std::is_integral<T>::value, void>::type*/>
38
+ XSIMD_INLINE batch<T, A> bitwise_lshift(batch<T, A> const& self, requires_arch<common>) noexcept
39
+ {
40
+ constexpr auto bits = std::numeric_limits<T>::digits + std::numeric_limits<T>::is_signed;
41
+ static_assert(shift < bits, "Shift must be less than the number of bits in T");
42
+ return bitwise_lshift(self, shift, A {});
43
+ }
37
44
 
38
45
  // bitwise_rshift
39
46
  template <class A, class T, class /*=typename std::enable_if<std::is_integral<T>::value, void>::type*/>
@@ -43,6 +50,13 @@ namespace xsimd
43
50
  { return x >> y; },
44
51
  self, other);
45
52
  }
53
+ template <size_t shift, class A, class T, class /*=typename std::enable_if<std::is_integral<T>::value, void>::type*/>
54
+ XSIMD_INLINE batch<T, A> bitwise_rshift(batch<T, A> const& self, requires_arch<common>) noexcept
55
+ {
56
+ constexpr auto bits = std::numeric_limits<T>::digits + std::numeric_limits<T>::is_signed;
57
+ static_assert(shift < bits, "Shift must be less than the number of bits in T");
58
+ return bitwise_rshift(self, shift, A {});
59
+ }
46
60
 
47
61
  // decr
48
62
  template <class A, class T>
@@ -127,18 +141,16 @@ namespace xsimd
127
141
  return { res_r, res_i };
128
142
  }
129
143
 
130
- // hadd
131
- template <class A, class T, class /*=typename std::enable_if<std::is_integral<T>::value, void>::type*/>
132
- XSIMD_INLINE T hadd(batch<T, A> const& self, requires_arch<common>) noexcept
144
+ // fmas
145
+ template <class A, class T>
146
+ XSIMD_INLINE batch<T, A> fmas(batch<T, A> const& x, batch<T, A> const& y, batch<T, A> const& z, requires_arch<common>) noexcept
133
147
  {
134
- alignas(A::alignment()) T buffer[batch<T, A>::size];
135
- self.store_aligned(buffer);
136
- T res = 0;
137
- for (T val : buffer)
148
+ struct even_lane
138
149
  {
139
- res += val;
140
- }
141
- return res;
150
+ static constexpr bool get(unsigned const i, unsigned) noexcept { return (i & 1u) == 0; }
151
+ };
152
+ const auto mask = make_batch_bool_constant<T, even_lane, A>();
153
+ return fma(x, y, select(mask, neg(z), z));
142
154
  }
143
155
 
144
156
  // incr
@@ -168,16 +180,30 @@ namespace xsimd
168
180
  template <class A, class T, class STy>
169
181
  XSIMD_INLINE batch<T, A> rotl(batch<T, A> const& self, STy other, requires_arch<common>) noexcept
170
182
  {
171
- constexpr auto N = std::numeric_limits<T>::digits;
172
- return (self << other) | (self >> (N - other));
183
+ constexpr auto bits = std::numeric_limits<T>::digits + std::numeric_limits<T>::is_signed;
184
+ return (self << other) | (self >> (bits - other));
185
+ }
186
+ template <size_t count, class A, class T>
187
+ XSIMD_INLINE batch<T, A> rotl(batch<T, A> const& self, requires_arch<common>) noexcept
188
+ {
189
+ constexpr auto bits = std::numeric_limits<T>::digits + std::numeric_limits<T>::is_signed;
190
+ static_assert(count < bits, "Count amount must be less than the number of bits in T");
191
+ return bitwise_lshift<count>(self) | bitwise_rshift<bits - count>(self);
173
192
  }
174
193
 
175
194
  // rotr
176
195
  template <class A, class T, class STy>
177
196
  XSIMD_INLINE batch<T, A> rotr(batch<T, A> const& self, STy other, requires_arch<common>) noexcept
178
197
  {
179
- constexpr auto N = std::numeric_limits<T>::digits;
180
- return (self >> other) | (self << (N - other));
198
+ constexpr auto bits = std::numeric_limits<T>::digits + std::numeric_limits<T>::is_signed;
199
+ return (self >> other) | (self << (bits - other));
200
+ }
201
+ template <size_t count, class A, class T>
202
+ XSIMD_INLINE batch<T, A> rotr(batch<T, A> const& self, requires_arch<common>) noexcept
203
+ {
204
+ constexpr auto bits = std::numeric_limits<T>::digits + std::numeric_limits<T>::is_signed;
205
+ static_assert(count < bits, "Count must be less than the number of bits in T");
206
+ return bitwise_rshift<count>(self) | bitwise_lshift<bits - count>(self);
181
207
  }
182
208
 
183
209
  // sadd
@@ -191,10 +217,9 @@ namespace xsimd
191
217
  {
192
218
  if (std::is_signed<T>::value)
193
219
  {
194
- auto mask = (other >> (8 * sizeof(T) - 1));
195
220
  auto self_pos_branch = min(std::numeric_limits<T>::max() - other, self);
196
221
  auto self_neg_branch = max(std::numeric_limits<T>::min() - other, self);
197
- return other + select(batch_bool<T, A>(mask.data), self_neg_branch, self_pos_branch);
222
+ return other + select(other >= 0, self_pos_branch, self_neg_branch);
198
223
  }
199
224
  else
200
225
  {
@@ -78,11 +78,15 @@ namespace xsimd
78
78
  using batch_type = complex_batch_type_t<batch<T, A>>;
79
79
  using real_batch = typename batch_type::real_batch;
80
80
  using real_value_type = typename real_batch::value_type;
81
+ #ifdef __FAST_MATH__
82
+ return { self };
83
+ #else
81
84
  auto cond = xsimd::isinf(real(self)) || xsimd::isinf(imag(self));
82
85
  return select(cond,
83
86
  batch_type(constants::infinity<real_batch>(),
84
87
  copysign(real_batch(real_value_type(0)), imag(self))),
85
88
  batch_type(self));
89
+ #endif
86
90
  }
87
91
 
88
92
  template <class A, class T>
@@ -47,6 +47,8 @@ namespace xsimd
47
47
  template <class T, class A>
48
48
  XSIMD_INLINE batch<T, A> fms(batch<T, A> const& x, batch<T, A> const& y, batch<T, A> const& z) noexcept;
49
49
  template <class T, class A>
50
+ XSIMD_INLINE batch<T, A> fmas(batch<T, A> const& x, batch<T, A> const& y, batch<T, A> const& z) noexcept;
51
+ template <class T, class A>
50
52
  XSIMD_INLINE batch<T, A> frexp(const batch<T, A>& x, const batch<as_integer_t<T>, A>& e) noexcept;
51
53
  template <class T, class A, uint64_t... Coefs>
52
54
  XSIMD_INLINE batch<T, A> horner(const batch<T, A>& self) noexcept;
@@ -75,6 +77,8 @@ namespace xsimd
75
77
  template <class T, class A>
76
78
  XSIMD_INLINE T reduce_add(batch<T, A> const&) noexcept;
77
79
  template <class T, class A>
80
+ XSIMD_INLINE T reduce_mul(batch<T, A> const&) noexcept;
81
+ template <class T, class A>
78
82
  XSIMD_INLINE batch<T, A> select(batch_bool<T, A> const&, batch<T, A> const&, batch<T, A> const&) noexcept;
79
83
  template <class T, class A>
80
84
  XSIMD_INLINE batch<std::complex<T>, A> select(batch_bool<T, A> const&, batch<std::complex<T>, A> const&, batch<std::complex<T>, A> const&) noexcept;
@@ -90,6 +94,9 @@ namespace xsimd
90
94
  XSIMD_INLINE std::pair<batch<T, A>, batch<T, A>> sincos(batch<T, A> const& self) noexcept;
91
95
  template <class T, class A>
92
96
  XSIMD_INLINE batch<T, A> sqrt(batch<T, A> const& self) noexcept;
97
+ template <class T, class A, class Vt, Vt... Values>
98
+ XSIMD_INLINE typename std::enable_if<std::is_arithmetic<T>::value, batch<T, A>>::type
99
+ swizzle(batch<T, A> const& x, batch_constant<Vt, A, Values...> mask) noexcept;
93
100
  template <class T, class A>
94
101
  XSIMD_INLINE batch<T, A> tan(batch<T, A> const& self) noexcept;
95
102
  template <class T, class A>
@@ -124,12 +124,22 @@ namespace xsimd
124
124
  template <class A>
125
125
  XSIMD_INLINE batch_bool<float, A> isinf(batch<float, A> const& self, requires_arch<common>) noexcept
126
126
  {
127
+ #ifdef __FAST_MATH__
128
+ (void)self;
129
+ return { false };
130
+ #else
127
131
  return abs(self) == std::numeric_limits<float>::infinity();
132
+ #endif
128
133
  }
129
134
  template <class A>
130
135
  XSIMD_INLINE batch_bool<double, A> isinf(batch<double, A> const& self, requires_arch<common>) noexcept
131
136
  {
137
+ #ifdef __FAST_MATH__
138
+ (void)self;
139
+ return { false };
140
+ #else
132
141
  return abs(self) == std::numeric_limits<double>::infinity();
142
+ #endif
133
143
  }
134
144
 
135
145
  // isfinite