PyPI - sequenzo - Versions diffs - 0.1.17__cp39-cp39-win_amd64.whl → 0.1.18__cp39-cp39-win_amd64.whl - Mend

sequenzo 0.1.17__cp39-cp39-win_amd64.whl → 0.1.18__cp39-cp39-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of sequenzo might be problematic. Click here for more details.

Files changed (101) hide show

sequenzo/dissimilarity_measures/src/OMspellDistance.cpp CHANGED Viewed

@@ -3,11 +3,12 @@
 #include <vector>
 #include <cmath>
 #include <iostream>
-#include <xsimd/xsimd.hpp>
 #include "utils.h"
+#include "dp_utils.h"
 #ifdef _OPENMP
     #include <omp.h>
 #endif
+#include <xsimd/xsimd.hpp>
 namespace py = pybind11;
@@ -85,83 +86,119 @@ public:
         }
     }
-    double getIndel(int i, int j, int state) {
+    // 对齐分配函数 moved to dp_utils.h
+    double getIndel(int i, int j, int state){
         auto ptr_indel = indellist.mutable_unchecked<1>();
         auto ptr_dur = seqdur.mutable_unchecked<2>();
-        xsimd::batch<double, xsimd::default_arch> state_vec(ptr_indel(state));
-        xsimd::batch<double, xsimd::default_arch> timecost_vec(timecost);
-        xsimd::batch<double, xsimd::default_arch> dur_vec(ptr_dur(i, j));
-        xsimd::batch<double, xsimd::default_arch> result = state_vec + timecost_vec * dur_vec;
-        return result.get(0);
+        return ptr_indel(state) + timecost * ptr_dur(i, j);
     }
-    double getSubCost(int i_state, int j_state, int i_x, int i_y, int j_x, int j_y) {
+    double getSubCost(int i_state, int j_state, int i_x, int i_y, int j_x, int j_y){
         auto ptr_dur = seqdur.mutable_unchecked<2>();
-        if (i_state == j_state) {
+        if(i_state == j_state){
             double diffdur = ptr_dur(i_x, i_y) - ptr_dur(j_x, j_y);
-            return std::abs(timecost * diffdur);
-        } else {
-            auto ptr_sm = sm.mutable_unchecked<2>();
-            double d1 = ptr_dur(i_x, i_y);
-            double d2 = ptr_dur(j_x, j_y);
-            xsimd::batch<double, xsimd::default_arch> d1_vec = xsimd::batch<double, xsimd::default_arch>::broadcast(d1);
-            xsimd::batch<double, xsimd::default_arch> d2_vec = xsimd::batch<double, xsimd::default_arch>::broadcast(d2);
-            xsimd::batch<double, xsimd::default_arch> cost = xsimd::batch<double, xsimd::default_arch>::broadcast(timecost);
-            xsimd::batch<double, xsimd::default_arch> sum = (d1_vec + d2_vec) * cost;
+            return abs(timecost * diffdur);
+        }else{
+            auto ptr_sm = sm.mutable_unchecked<2>();
-            return ptr_sm(i_state, j_state) + sum.get(0);
+            return ptr_sm(i_state, j_state) +
+                    (ptr_dur(i_x, i_y) + ptr_dur(j_x, j_y)) * timecost;
         }
     }
-    double compute_distance(int is, int js) {
+    double compute_distance(int is, int js, double* prev, double* curr) {
         try {
             auto ptr_seq = sequences.unchecked<2>();
             auto ptr_len = seqlength.unchecked<1>();
+            auto ptr_sm = sm.unchecked<2>();
+            auto ptr_dur = seqdur.unchecked<2>();
+            auto ptr_indel = indellist.unchecked<1>();
             int i_state = 0, j_state = 0;
-            double maxpossiblecost;
             int mm = ptr_len(is);
             int nn = ptr_len(js);
-            int mSuf = mm + 1, nSuf = nn + 1;
-            std::vector<double> prev(fmatsize, 0.0);
-            std::vector<double> curr(fmatsize, 0.0);
+            int mSuf = mm + 1;
+            int nSuf = nn + 1;
             prev[0] = 0;
             curr[0] = 0;
-            for (int ii = 1; ii < nSuf; ii++) {
-                j_state = ptr_seq(js, ii - 1);
-                prev[ii] = prev[ii-1] + getIndel(js, ii-1, j_state);
+            // initialize first row: cumulative insertions into js along columns
+            for (int jj = 1; jj < nSuf; jj++) {
+                int bj = ptr_seq(js, jj - 1);
+                prev[jj] = prev[jj - 1] + (ptr_indel(bj) + timecost * ptr_dur(js, jj - 1));
             }
+            using batch_t = xsimd::batch<double>;
+            constexpr std::size_t B = batch_t::size;
             for (int i = 1; i < mSuf; i++) {
                 i_state = ptr_seq(is, i - 1);
-                curr[0] = prev[0] + getIndel(is, i - 1, i_state);
-                for (int j = 1; j < nSuf; j++) {
-                    j_state = ptr_seq(js, j - 1);
+                // per-row deletion cost (depends only on i_state and i position)
+                double dur_i = ptr_dur(is, i - 1);
+                double del_cost_i = ptr_indel(i_state) + timecost * dur_i;
+                // first column: cumulative deletions D[i][0] = D[i-1][0] + del_cost_i
+                curr[0] = prev[0] + del_cost_i;
+                int j = 1;
+                for (; j + (int)B <= nSuf; j += (int)B) {
+                    const double* prev_ptr = prev + j;
+                    const double* prevm1_ptr = prev + (j - 1);
+                    batch_t prevj = batch_t::load_unaligned(prev_ptr);
+                    batch_t prevjm1 = batch_t::load_unaligned(prevm1_ptr);
+                    alignas(64) double subs[B];
+                    alignas(64) double ins[B];
+                    for (std::size_t b = 0; b < B; ++b) {
+                        int jj_idx = j + (int)b - 1;
+                        int bj = ptr_seq(js, jj_idx);
+                        double dur_j = ptr_dur(js, jj_idx);
+                        if (i_state == bj) {
+                            subs[b] = std::abs(timecost * (dur_i - dur_j));
+                        } else {
+                            subs[b] = ptr_sm(i_state, bj) + (dur_i + dur_j) * timecost;
+                        }
+                        ins[b] = ptr_indel(bj) + timecost * dur_j;
+                    }
-                    xsimd::batch<double, xsimd::default_arch> minimum_batch = prev[j] + getIndel(is, i - 1, i_state);
-                    xsimd::batch<double, xsimd::default_arch> j_indel_batch = curr[j - 1] + getIndel(js, j - 1, j_state);
-                    xsimd::batch<double, xsimd::default_arch> sub_batch = prev[j - 1] + getSubCost(i_state, j_state, is, i - 1, js, j - 1);
+                    batch_t sub_batch = batch_t::load_unaligned(subs);
+                    batch_t cand_del = prevj + batch_t(del_cost_i);
+                    batch_t cand_sub = prevjm1 + sub_batch;
+                    batch_t vert = xsimd::min(cand_del, cand_sub);
+                    double running = curr[j - 1] + ins[0];
+                    for (std::size_t b = 0; b < B; ++b) {
+                        double v = vert.get(b);
+                        double c = std::min(v, running);
+                        curr[j + (int)b] = c;
+                        if (b + 1 < B) running = c + ins[b + 1];
+                    }
+                }
-                    xsimd::batch<double> result = xsimd::min(xsimd::min(minimum_batch, j_indel_batch), sub_batch);
-                    curr[j] = result.get(0);
+                // tail scalar handling
+                for (; j < nSuf; ++j) {
+                    j_state = ptr_seq(js, j - 1);
+                    double minimum = prev[j] + del_cost_i;
+                    double j_indel = curr[j - 1] + (ptr_indel(j_state) + timecost * ptr_dur(js, j - 1));
+                    double sub = prev[j - 1] + (
+                        (i_state == j_state)
+                        ? std::abs(timecost * (dur_i - ptr_dur(js, j - 1)))
+                        : (ptr_sm(i_state, j_state) + (dur_i + ptr_dur(js, j - 1)) * timecost)
+                    );
+                    curr[j] = std::min({ minimum, j_indel, sub });
                 }
                 std::swap(prev, curr);
             }
-            maxpossiblecost = std::abs(nn - mm) * indel + maxscost * std::min(mm, nn);
+            double maxpossiblecost = std::abs(nn - mm) * indel + maxscost * std::min(mm, nn);
             double ml = double(mm) * indel;
             double nl = double(nn) * indel;
@@ -172,26 +209,16 @@ public:
         }
     }
     py::array_t<double> compute_all_distances() {
         try {
-            auto buffer = dist_matrix.mutable_unchecked<2>();
-            #pragma omp parallel for schedule(dynamic)
-            for (int i = 0; i < nseq; i++) {
-                for (int j = i; j < nseq; j++) {
-                    buffer(i, j) = compute_distance(i, j);
+            return dp_utils::compute_all_distances(
+                nseq,
+                fmatsize,
+                dist_matrix,
+                [this](int i, int j, double* prev, double* curr) {
+                    return this->compute_distance(i, j, prev, curr);
                 }
-            }
-            #pragma omp parallel for schedule(dynamic)
-            for (int i = 0; i < nseq; i++) {
-                for (int j = i + 1; j < nseq; j++) {
-                    buffer(j, i) = buffer(i, j);
-                }
-            }
-            return dist_matrix;
+            );
         } catch (const std::exception& e) {
             py::print("Error in compute_all_distances: ", e.what());
             throw;
@@ -202,15 +229,24 @@ public:
         try {
             auto buffer = refdist_matrix.mutable_unchecked<2>();
-            #pragma omp parallel for schedule(static)
-            for (int rseq = rseq1; rseq < rseq2; rseq ++) {
-                for (int is = 0; is < nseq; is ++) {
-                    if(is == rseq){
-                        buffer(is, rseq-rseq1) = 0;
-                    }else{
-                        buffer(is, rseq-rseq1) = compute_distance(is, rseq);
+            #pragma omp parallel
+            {
+                double* prev = dp_utils::aligned_alloc_double(static_cast<size_t>(fmatsize));
+                double* curr = dp_utils::aligned_alloc_double(static_cast<size_t>(fmatsize));
+                #pragma omp for schedule(static)
+                for (int rseq = rseq1; rseq < rseq2; rseq ++) {
+                    for (int is = 0; is < nseq; is ++) {
+                        double cmpres = 0;
+                        if(is != rseq){
+                            cmpres = compute_distance(is, rseq, prev, curr);
+                        }
+                        buffer(is, rseq - rseq1) = cmpres;
                     }
                 }
+                dp_utils::aligned_free_double(prev);
+                dp_utils::aligned_free_double(curr);
             }
             return refdist_matrix;

sequenzo/dissimilarity_measures/src/dp_utils.h ADDED Viewed

@@ -0,0 +1,160 @@
+#pragma once
+#include <pybind11/pybind11.h>
+#include <pybind11/numpy.h>
+#ifdef _OPENMP
+    #include <omp.h>
+#endif
+#include <cstdlib>
+#include <new>
+namespace dp_utils {
+// Cross-platform aligned allocation for double buffers
+#ifdef _WIN32
+inline double* aligned_alloc_double(size_t size, size_t align = 64) {
+    return reinterpret_cast<double*>(_aligned_malloc(size * sizeof(double), align));
+}
+inline void aligned_free_double(double* ptr) {
+    _aligned_free(ptr);
+}
+#else
+inline double* aligned_alloc_double(size_t size, size_t align = 64) {
+    void* ptr = nullptr;
+    if (posix_memalign(&ptr, align, size * sizeof(double)) != 0) throw std::bad_alloc();
+    return reinterpret_cast<double*>(ptr);
+}
+inline void aligned_free_double(double* ptr) { free(ptr); }
+#endif
+// Generic pairwise symmetric computation helper
+// ComputeFn signature: double(int i, int j, double* prev, double* curr)
+template <typename ComputeFn>
+inline pybind11::array_t<double> compute_all_distances(
+    int nseq,
+    int fmatsize,
+    pybind11::array_t<double>& dist_matrix,
+    ComputeFn&& compute_fn
+) {
+    auto buffer = dist_matrix.mutable_unchecked<2>();
+    #pragma omp parallel
+    {
+        double* prev = aligned_alloc_double(static_cast<size_t>(fmatsize));
+        double* curr = aligned_alloc_double(static_cast<size_t>(fmatsize));
+        #pragma omp for schedule(static)
+        for (int i = 0; i < nseq; i++) {
+            for (int j = i; j < nseq; j++) {
+                buffer(i, j) = compute_fn(i, j, prev, curr);
+            }
+        }
+        aligned_free_double(prev);
+        aligned_free_double(curr);
+    }
+    #pragma omp parallel for schedule(static)
+    for (int i = 0; i < nseq; i++) {
+        for (int j = i + 1; j < nseq; j++) {
+            buffer(j, i) = buffer(i, j);
+        }
+    }
+    return dist_matrix;
+}
+// Generic pairwise symmetric computation helper (no buffers)
+// ComputeFn signature: double(int i, int j)
+template <typename ComputeFn>
+inline pybind11::array_t<double> compute_all_distances_simple(
+    int nseq,
+    pybind11::array_t<double>& dist_matrix,
+    ComputeFn&& compute_fn
+) {
+    auto buffer = dist_matrix.mutable_unchecked<2>();
+    #pragma omp parallel
+    {
+        #pragma omp for schedule(static)
+        for (int i = 0; i < nseq; i++) {
+            for (int j = i; j < nseq; j++) {
+                buffer(i, j) = compute_fn(i, j);
+            }
+        }
+    }
+    #pragma omp parallel for schedule(static)
+    for (int i = 0; i < nseq; ++i) {
+        for (int j = i + 1; j < nseq; ++j) {
+            buffer(j, i) = buffer(i, j);
+        }
+    }
+    return dist_matrix;
+}
+// Generic reference-sequence computation helper (no buffers)
+// ComputeFn signature: double(int is, int rseq)
+template <typename ComputeFn>
+inline pybind11::array_t<double> compute_refseq_distances_simple(
+    int nseq,
+    int rseq1,
+    int rseq2,
+    pybind11::array_t<double>& refdist_matrix,
+    ComputeFn&& compute_fn
+) {
+    auto buffer = refdist_matrix.mutable_unchecked<2>();
+    #pragma omp parallel
+    {
+        #pragma omp for schedule(guided)
+        for (int rseq = rseq1; rseq < rseq2; rseq++) {
+            for (int is = 0; is < nseq; is++) {
+                buffer(is, rseq - rseq1) = (is == rseq) ? 0.0 : compute_fn(is, rseq);
+            }
+        }
+    }
+    return refdist_matrix;
+}
+// Generic reference-sequence computation helper (with DP buffers)
+// ComputeFn signature: double(int is, int rseq, double* prev, double* curr)
+template <typename ComputeFn>
+inline pybind11::array_t<double> compute_refseq_distances_buffered(
+    int nseq,
+    int rseq1,
+    int rseq2,
+    int fmatsize,
+    pybind11::array_t<double>& refdist_matrix,
+    ComputeFn&& compute_fn
+) {
+    auto buffer = refdist_matrix.mutable_unchecked<2>();
+    #pragma omp parallel
+    {
+        double* prev = aligned_alloc_double(static_cast<size_t>(fmatsize));
+        double* curr = aligned_alloc_double(static_cast<size_t>(fmatsize));
+        #pragma omp for schedule(static)
+        for (int rseq = rseq1; rseq < rseq2; rseq++) {
+            for (int is = 0; is < nseq; is++) {
+                double cmpres = 0.0;
+                if (is != rseq) {
+                    cmpres = compute_fn(is, rseq, prev, curr);
+                }
+                buffer(is, rseq - rseq1) = cmpres;
+            }
+        }
+        aligned_free_double(prev);
+        aligned_free_double(curr);
+    }
+    return refdist_matrix;
+}
+} // namespace dp_utils

sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_arithmetic.hpp CHANGED Viewed

@@ -34,6 +34,13 @@ namespace xsimd
                                  { return x << y; },
                                  self, other);
         }
+        template <size_t shift, class A, class T, class /*=typename std::enable_if<std::is_integral<T>::value, void>::type*/>
+        XSIMD_INLINE batch<T, A> bitwise_lshift(batch<T, A> const& self, requires_arch<common>) noexcept
+        {
+            constexpr auto bits = std::numeric_limits<T>::digits + std::numeric_limits<T>::is_signed;
+            static_assert(shift < bits, "Shift must be less than the number of bits in T");
+            return bitwise_lshift(self, shift, A {});
+        }
         // bitwise_rshift
         template <class A, class T, class /*=typename std::enable_if<std::is_integral<T>::value, void>::type*/>
@@ -43,6 +50,13 @@ namespace xsimd
                                  { return x >> y; },
                                  self, other);
         }
+        template <size_t shift, class A, class T, class /*=typename std::enable_if<std::is_integral<T>::value, void>::type*/>
+        XSIMD_INLINE batch<T, A> bitwise_rshift(batch<T, A> const& self, requires_arch<common>) noexcept
+        {
+            constexpr auto bits = std::numeric_limits<T>::digits + std::numeric_limits<T>::is_signed;
+            static_assert(shift < bits, "Shift must be less than the number of bits in T");
+            return bitwise_rshift(self, shift, A {});
+        }
         // decr
         template <class A, class T>
@@ -127,18 +141,16 @@ namespace xsimd
             return { res_r, res_i };
         }
-        // hadd
-        template <class A, class T, class /*=typename std::enable_if<std::is_integral<T>::value, void>::type*/>
-        XSIMD_INLINE T hadd(batch<T, A> const& self, requires_arch<common>) noexcept
+        // fmas
+        template <class A, class T>
+        XSIMD_INLINE batch<T, A> fmas(batch<T, A> const& x, batch<T, A> const& y, batch<T, A> const& z, requires_arch<common>) noexcept
         {
-            alignas(A::alignment()) T buffer[batch<T, A>::size];
-            self.store_aligned(buffer);
-            T res = 0;
-            for (T val : buffer)
+            struct even_lane
             {
-                res += val;
-            }
-            return res;
+                static constexpr bool get(unsigned const i, unsigned) noexcept { return (i & 1u) == 0; }
+            };
+            const auto mask = make_batch_bool_constant<T, even_lane, A>();
+            return fma(x, y, select(mask, neg(z), z));
         }
         // incr
@@ -168,16 +180,30 @@ namespace xsimd
         template <class A, class T, class STy>
         XSIMD_INLINE batch<T, A> rotl(batch<T, A> const& self, STy other, requires_arch<common>) noexcept
         {
-            constexpr auto N = std::numeric_limits<T>::digits;
-            return (self << other) | (self >> (N - other));
+            constexpr auto bits = std::numeric_limits<T>::digits + std::numeric_limits<T>::is_signed;
+            return (self << other) | (self >> (bits - other));
+        }
+        template <size_t count, class A, class T>
+        XSIMD_INLINE batch<T, A> rotl(batch<T, A> const& self, requires_arch<common>) noexcept
+        {
+            constexpr auto bits = std::numeric_limits<T>::digits + std::numeric_limits<T>::is_signed;
+            static_assert(count < bits, "Count amount must be less than the number of bits in T");
+            return bitwise_lshift<count>(self) | bitwise_rshift<bits - count>(self);
         }
         // rotr
         template <class A, class T, class STy>
         XSIMD_INLINE batch<T, A> rotr(batch<T, A> const& self, STy other, requires_arch<common>) noexcept
         {
-            constexpr auto N = std::numeric_limits<T>::digits;
-            return (self >> other) | (self << (N - other));
+            constexpr auto bits = std::numeric_limits<T>::digits + std::numeric_limits<T>::is_signed;
+            return (self >> other) | (self << (bits - other));
+        }
+        template <size_t count, class A, class T>
+        XSIMD_INLINE batch<T, A> rotr(batch<T, A> const& self, requires_arch<common>) noexcept
+        {
+            constexpr auto bits = std::numeric_limits<T>::digits + std::numeric_limits<T>::is_signed;
+            static_assert(count < bits, "Count must be less than the number of bits in T");
+            return bitwise_rshift<count>(self) | bitwise_lshift<bits - count>(self);
         }
         // sadd
@@ -191,10 +217,9 @@ namespace xsimd
         {
             if (std::is_signed<T>::value)
             {
-                auto mask = (other >> (8 * sizeof(T) - 1));
                 auto self_pos_branch = min(std::numeric_limits<T>::max() - other, self);
                 auto self_neg_branch = max(std::numeric_limits<T>::min() - other, self);
-                return other + select(batch_bool<T, A>(mask.data), self_neg_branch, self_pos_branch);
+                return other + select(other >= 0, self_pos_branch, self_neg_branch);
             }
             else
             {

sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_complex.hpp CHANGED Viewed

@@ -78,11 +78,15 @@ namespace xsimd
             using batch_type = complex_batch_type_t<batch<T, A>>;
             using real_batch = typename batch_type::real_batch;
             using real_value_type = typename real_batch::value_type;
+#ifdef __FAST_MATH__
+            return { self };
+#else
             auto cond = xsimd::isinf(real(self)) || xsimd::isinf(imag(self));
             return select(cond,
                           batch_type(constants::infinity<real_batch>(),
                                      copysign(real_batch(real_value_type(0)), imag(self))),
                           batch_type(self));
+#endif
         }
         template <class A, class T>

sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_details.hpp CHANGED Viewed

@@ -47,6 +47,8 @@ namespace xsimd
     template <class T, class A>
     XSIMD_INLINE batch<T, A> fms(batch<T, A> const& x, batch<T, A> const& y, batch<T, A> const& z) noexcept;
     template <class T, class A>
+    XSIMD_INLINE batch<T, A> fmas(batch<T, A> const& x, batch<T, A> const& y, batch<T, A> const& z) noexcept;
+    template <class T, class A>
     XSIMD_INLINE batch<T, A> frexp(const batch<T, A>& x, const batch<as_integer_t<T>, A>& e) noexcept;
     template <class T, class A, uint64_t... Coefs>
     XSIMD_INLINE batch<T, A> horner(const batch<T, A>& self) noexcept;
@@ -75,6 +77,8 @@ namespace xsimd
     template <class T, class A>
     XSIMD_INLINE T reduce_add(batch<T, A> const&) noexcept;
     template <class T, class A>
+    XSIMD_INLINE T reduce_mul(batch<T, A> const&) noexcept;
+    template <class T, class A>
     XSIMD_INLINE batch<T, A> select(batch_bool<T, A> const&, batch<T, A> const&, batch<T, A> const&) noexcept;
     template <class T, class A>
     XSIMD_INLINE batch<std::complex<T>, A> select(batch_bool<T, A> const&, batch<std::complex<T>, A> const&, batch<std::complex<T>, A> const&) noexcept;
@@ -90,6 +94,9 @@ namespace xsimd
     XSIMD_INLINE std::pair<batch<T, A>, batch<T, A>> sincos(batch<T, A> const& self) noexcept;
     template <class T, class A>
     XSIMD_INLINE batch<T, A> sqrt(batch<T, A> const& self) noexcept;
+    template <class T, class A, class Vt, Vt... Values>
+    XSIMD_INLINE typename std::enable_if<std::is_arithmetic<T>::value, batch<T, A>>::type
+    swizzle(batch<T, A> const& x, batch_constant<Vt, A, Values...> mask) noexcept;
     template <class T, class A>
     XSIMD_INLINE batch<T, A> tan(batch<T, A> const& self) noexcept;
     template <class T, class A>

sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_logical.hpp CHANGED Viewed

@@ -124,12 +124,22 @@ namespace xsimd
         template <class A>
         XSIMD_INLINE batch_bool<float, A> isinf(batch<float, A> const& self, requires_arch<common>) noexcept
         {
+#ifdef __FAST_MATH__
+            (void)self;
+            return { false };
+#else
             return abs(self) == std::numeric_limits<float>::infinity();
+#endif
         }
         template <class A>
         XSIMD_INLINE batch_bool<double, A> isinf(batch<double, A> const& self, requires_arch<common>) noexcept
         {
+#ifdef __FAST_MATH__
+            (void)self;
+            return { false };
+#else
             return abs(self) == std::numeric_limits<double>::infinity();
+#endif
         }
         // isfinite