RubyGems - anomaly_detection - Versions diffs - 0.3.0 → 0.3.1 - Mend

anomaly_detection 0.3.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +4 -0
data/README.md +3 -3
data/ext/anomaly_detection/anomaly_detection.hpp +76 -30
data/ext/anomaly_detection/ext.cpp +4 -1
data/ext/anomaly_detection/stl.hpp +415 -100
data/lib/anomaly_detection/version.rb +1 -1
metadata +5 -9

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: cd8b32a5d482c312deb58949c67489b21ce336ddfd90d6215a0f5cf7650f8067
-  data.tar.gz: 3ebd3c26210e64df4f07a13dccf7e0f0276e4cc2e825d0684a9947cbc5d94bc8
+  metadata.gz: 8321bd70889b5f58c93c1e0830513c2984ec6512ec4e9b1418c075fe475485a6
+  data.tar.gz: e62683ce7f8eb5d7e6451252d50b226bb8d0b585c1b9778a193cb1fb8365ff0a
 SHA512:
-  metadata.gz: fd3972f75c65c104057b0b3b4102462e66cf490a901b3d18b2aebd623916d058d0c7eeaa0528d5013838605e36ae29efacaaf2bcbb76c74f90740bb36c191fca
-  data.tar.gz: 0dccba6f00ee2b41683aaa3833108b61095ac02f1be9cf4c6d581a822765022fa8fbd99f7e6afee499768eda69bd2355775af90592a46672ff829b9c38b0735d
+  metadata.gz: 48d887f12f33052e6f5c428cba99ff8a625e269591a03c51c4b3714326c32155154ec494eeefc95a48bbbe115ce14469d26c0fabfd3d1e3453aafaa19ca70a3b
+  data.tar.gz: 03bd43a3819a44218e53ffdf556c2d8d2993e3f6acb8819da1b935682b519ec2c203238e4c38f8d3c0ae7eb278d358021dae978c32b1a88aabf18de520077b55

data/CHANGELOG.md CHANGED Viewed

@@ -1,3 +1,7 @@
+## 0.3.1 (2025-10-26)
+- Fixed error with Rice 4.7
 ## 0.3.0 (2024-10-22)
 - Dropped support for Ruby < 3.1

data/README.md CHANGED Viewed

@@ -20,9 +20,9 @@ Detect anomalies in a time series
 ```ruby
 series = {
-  Date.parse("2020-01-01") => 100,
-  Date.parse("2020-01-02") => 150,
-  Date.parse("2020-01-03") => 136,
+  Date.parse("2025-01-01") => 100,
+  Date.parse("2025-01-02") => 150,
+  Date.parse("2025-01-03") => 136,
   # ...
 }

data/ext/anomaly_detection/anomaly_detection.hpp CHANGED Viewed

@@ -1,5 +1,5 @@
 /*!
- * AnomalyDetection.cpp v0.1.3
+ * AnomalyDetection.cpp v0.2.1
  * https://github.com/ankane/AnomalyDetection.cpp
  * GPL-3.0-or-later License
  */
@@ -12,35 +12,53 @@
 #include <numeric>
 #include <vector>
+#if __cplusplus >= 202002L
+#include <span>
+#endif
 #include "dist.h"
 #include "stl.hpp"
 namespace anomaly_detection {
-enum Direction { Positive, Negative, Both };
+/// The direction to detect anomalies.
+enum class Direction {
+    /// Positive direction.
+    Positive,
+    /// Negative direction.
+    Negative,
+    /// Both directions.
+    Both
+};
+namespace {
-float median_sorted(const std::vector<float>& sorted) {
+template<typename T>
+T median_sorted(const std::vector<T>& sorted) {
     return (sorted[(sorted.size() - 1) / 2] + sorted[sorted.size() / 2]) / 2.0;
 }
-float median(const std::vector<float>& data) {
-    std::vector<float> sorted(data);
+template<typename T>
+T median(const T* data, size_t data_size) {
+    std::vector<T> sorted(data, data + data_size);
     std::sort(sorted.begin(), sorted.end());
     return median_sorted(sorted);
 }
-float mad(const std::vector<float>& data, float med) {
-    std::vector<float> res;
+template<typename T>
+T mad(const std::vector<T>& data, T med) {
+    std::vector<T> res;
     res.reserve(data.size());
     for (auto v : data) {
-        res.push_back(fabs(v - med));
+        res.push_back(std::abs(v - med));
     }
     std::sort(res.begin(), res.end());
     return 1.4826 * median_sorted(res);
 }
-std::vector<size_t> detect_anoms(const std::vector<float>& data, size_t num_obs_per_period, float k, float alpha, bool one_tail, bool upper_tail, bool verbose, std::function<void()> callback) {
-    auto n = data.size();
+template<typename T>
+std::vector<size_t> detect_anoms(const T* data, size_t data_size, size_t num_obs_per_period, float k, float alpha, bool one_tail, bool upper_tail, bool verbose, std::function<void()> callback) {
+    auto n = data_size;
     // Check to make sure we have at least two periods worth of data for anomaly context
     if (n < num_obs_per_period * 2) {
@@ -48,18 +66,20 @@ std::vector<size_t> detect_anoms(const std::vector<float>& data, size_t num_obs_
     }
     // Handle NANs
-    auto nan = std::count_if(data.begin(), data.end(), [](const auto& value) { return std::isnan(value); });
+    auto nan = std::count_if(data, data + data_size, [](const auto& value) {
+        return std::isnan(value);
+    });
     if (nan > 0) {
         throw std::invalid_argument("series contains NANs");
     }
-    std::vector<float> data2;
+    std::vector<T> data2;
     data2.reserve(n);
-    auto med = median(data);
+    auto med = median(data, data_size);
     if (num_obs_per_period > 1) {
         // Decompose data. This returns a univarite remainder which will be used for anomaly detection. Optionally, we might NOT decompose.
-        auto data_decomp = stl::params().robust(true).seasonal_length(data.size() * 10 + 1).fit(data, num_obs_per_period);
+        auto data_decomp = stl::params().robust(true).seasonal_length(data_size * 10 + 1).fit(data, data_size, num_obs_per_period);
         auto seasonal = data_decomp.seasonal;
         for (size_t i = 0; i < n; i++) {
@@ -80,7 +100,9 @@ std::vector<size_t> detect_anoms(const std::vector<float>& data, size_t num_obs_
     // Use stable sort for indexes for deterministic results
     std::vector<size_t> indexes(n);
     std::iota(indexes.begin(), indexes.end(), 0);
-    std::stable_sort(indexes.begin(), indexes.end(), [&data2](size_t a, size_t b) { return data2[a] < data2[b]; });
+    std::stable_sort(indexes.begin(), indexes.end(), [&data2](size_t a, size_t b) {
+        return data2[a] < data2[b];
+    });
     std::sort(data2.begin(), data2.end());
     // Compute test statistic until r=max_outliers values have been removed from the sample
@@ -91,7 +113,7 @@ std::vector<size_t> detect_anoms(const std::vector<float>& data, size_t num_obs_
         // TODO Improve performance between loop iterations
         auto ma = median_sorted(data2);
-        std::vector<float> ares;
+        std::vector<T> ares;
         ares.reserve(data2.size());
         if (one_tail) {
             if (upper_tail) {
@@ -105,7 +127,7 @@ std::vector<size_t> detect_anoms(const std::vector<float>& data, size_t num_obs_
             }
         } else {
             for (auto v : data2) {
-                ares.push_back(fabs(v - ma));
+                ares.push_back(std::abs(v - ma));
             }
         }
@@ -126,7 +148,7 @@ std::vector<size_t> detect_anoms(const std::vector<float>& data, size_t num_obs_
         indexes.erase(indexes.begin() + r_idx_i);
         // Compute critical value
-        float p;
+        double p;
         if (one_tail) {
             p = 1.0 - alpha / (n - i + 1);
         } else {
@@ -134,7 +156,7 @@ std::vector<size_t> detect_anoms(const std::vector<float>& data, size_t num_obs_
         }
         auto t = students_t_ppf(p, n - i - 1);
-        auto lam = t * (n - i) / sqrt(((n - i - 1) + t * t) * (n - i + 1));
+        auto lam = t * (n - i) / std::sqrt(((n - i - 1) + t * t) * (n - i + 1));
         if (r > lam) {
             num_anoms = i;
@@ -153,11 +175,16 @@ std::vector<size_t> detect_anoms(const std::vector<float>& data, size_t num_obs_
     return anomalies;
 }
+}
+/// An anomaly detection result.
 class AnomalyDetectionResult {
 public:
+    /// Returns the anomalies.
     std::vector<size_t> anomalies;
 };
+/// A set of anomaly detection parameters.
 class AnomalyDetectionParams {
     float alpha_ = 0.05;
     float max_anoms_ = 0.1;
@@ -166,45 +193,64 @@ class AnomalyDetectionParams {
     std::function<void()> callback_ = nullptr;
 public:
+    /// Sets the level of statistical significance.
     inline AnomalyDetectionParams alpha(float alpha) {
         this->alpha_ = alpha;
         return *this;
     };
+    /// Sets the maximum number of anomalies as percent of data.
     inline AnomalyDetectionParams max_anoms(float max_anoms) {
         this->max_anoms_ = max_anoms;
         return *this;
     };
+    /// Sets the direction.
     inline AnomalyDetectionParams direction(Direction direction) {
         this->direction_ = direction;
         return *this;
     };
+    /// Sets whether to show progress.
     inline AnomalyDetectionParams verbose(bool verbose) {
         this->verbose_ = verbose;
         return *this;
     };
+    /// Sets a callback for each iteration.
     inline AnomalyDetectionParams callback(std::function<void()> callback) {
         this->callback_ = callback;
         return *this;
     };
-    AnomalyDetectionResult fit(const std::vector<float>& series, size_t period);
-};
+    /// Detects anomalies in a time series from an array.
+    template<typename T>
+    inline AnomalyDetectionResult fit(const T* series, size_t series_size, size_t period) const {
+        bool one_tail = this->direction_ != Direction::Both;
+        bool upper_tail = this->direction_ == Direction::Positive;
-AnomalyDetectionParams params() {
-    return AnomalyDetectionParams();
-}
+        auto anomalies = detect_anoms(series, series_size, period, this->max_anoms_, this->alpha_, one_tail, upper_tail, this->verbose_, this->callback_);
+        return AnomalyDetectionResult { anomalies };
+    }
-AnomalyDetectionResult AnomalyDetectionParams::fit(const std::vector<float>& series, size_t period) {
-    bool one_tail = this->direction_ != Direction::Both;
-    bool upper_tail = this->direction_ == Direction::Positive;
+    /// Detects anomalies in a time series from a vector.
+    template<typename T>
+    inline AnomalyDetectionResult fit(const std::vector<T>& series, size_t period) const {
+        return fit(series.data(), series.size(), period);
+    }
-    auto res = AnomalyDetectionResult();
-    res.anomalies = detect_anoms(series, period, this->max_anoms_, this->alpha_, one_tail, upper_tail, this->verbose_, this->callback_);
-    return res;
+#if __cplusplus >= 202002L
+    /// Detects anomalies in a time series from a span.
+    template<typename T>
+    inline AnomalyDetectionResult fit(std::span<const T> series, size_t period) const {
+        return fit(series.data(), series.size(), period);
+    }
+#endif
+};
+/// Creates a new set of parameters.
+inline AnomalyDetectionParams params() {
+    return AnomalyDetectionParams();
 }
 }

data/ext/anomaly_detection/ext.cpp CHANGED Viewed

@@ -1,3 +1,6 @@
+#include <string>
+#include <vector>
 #include <rice/rice.hpp>
 #include <rice/stl.hpp>
@@ -34,7 +37,7 @@ void Init_ext() {
         auto a = Rice::Array();
         for (auto v : res.anomalies) {
-          a.push(v);
+          a.push(v, false);
         }
         return a;
       });

data/ext/anomaly_detection/stl.hpp CHANGED Viewed

@@ -1,5 +1,5 @@
 /*!
- * STL C++ v0.1.3
+ * STL C++ v0.2.0
  * https://github.com/ankane/stl-cpp
  * Unlicense OR MIT License
  *
@@ -8,6 +8,10 @@
  * Cleveland, R. B., Cleveland, W. S., McRae, J. E., & Terpenning, I. (1990).
  * STL: A Seasonal-Trend Decomposition Procedure Based on Loess.
  * Journal of Official Statistics, 6(1), 3-33.
+ *
+ * Bandara, K., Hyndman, R. J., & Bergmeir, C. (2021).
+ * MSTL: A Seasonal-Trend Decomposition Algorithm for Time Series with Multiple Seasonal Patterns.
+ * arXiv:2107.13462 [stat.AP]. https://doi.org/10.48550/arXiv.2107.13462
  */
 #pragma once
@@ -17,16 +21,24 @@
 #include <numeric>
 #include <optional>
 #include <stdexcept>
+#include <tuple>
 #include <vector>
+#if __cplusplus >= 202002L
+#include <span>
+#endif
 namespace stl {
-bool est(const float* y, size_t n, size_t len, int ideg, float xs, float* ys, size_t nleft, size_t nright, float* w, bool userw, const float* rw) {
-    auto range = ((float) n) - 1.0;
-    auto h = std::max(xs - ((float) nleft), ((float) nright) - xs);
+namespace {
+template<typename T>
+bool est(const T* y, size_t n, size_t len, int ideg, T xs, T* ys, size_t nleft, size_t nright, T* w, bool userw, const T* rw) {
+    auto range = ((T) n) - 1.0;
+    auto h = std::max(xs - ((T) nleft), ((T) nright) - xs);
     if (len > n) {
-        h += (float) ((len - n) / 2);
+        h += (T) ((len - n) / 2);
     }
     auto h9 = 0.999 * h;
@@ -36,12 +48,12 @@ bool est(const float* y, size_t n, size_t len, int ideg, float xs, float* ys, si
     auto a = 0.0;
     for (auto j = nleft; j <= nright; j++) {
         w[j - 1] = 0.0;
-        auto r = fabs(((float) j) - xs);
+        auto r = std::abs(((T) j) - xs);
         if (r <= h9) {
             if (r <= h1) {
                 w[j - 1] = 1.0;
             } else {
-                w[j - 1] = pow(1.0 - pow(r / h, 3), 3);
+                w[j - 1] = (T) std::pow(1.0 - std::pow(r / h, 3), 3);
             }
             if (userw) {
                 w[j - 1] *= rw[j - 1];
@@ -54,25 +66,25 @@ bool est(const float* y, size_t n, size_t len, int ideg, float xs, float* ys, si
         return false;
     } else { // weighted least squares
         for (auto j = nleft; j <= nright; j++) { // make sum of w(j) == 1
-            w[j - 1] /= a;
+            w[j - 1] /= (T) a;
         }
         if (h > 0.0 && ideg > 0) { // use linear fit
             auto a = 0.0;
             for (auto j = nleft; j <= nright; j++) { // weighted center of x values
-                a += w[j - 1] * ((float) j);
+                a += w[j - 1] * ((T) j);
             }
             auto b = xs - a;
             auto c = 0.0;
             for (auto j = nleft; j <= nright; j++) {
-                c += w[j - 1] * pow(((float) j) - a, 2);
+                c += w[j - 1] * std::pow(((T) j) - a, 2);
             }
-            if (sqrt(c) > 0.001 * range) {
+            if (std::sqrt(c) > 0.001 * range) {
                 b /= c;
                 // points are spread out enough to compute slope
                 for (auto j = nleft; j <= nright; j++) {
-                    w[j - 1] *= b * (((float) j) - a) + 1.0;
+                    w[j - 1] *= (T) (b * (((T) j) - a) + 1.0);
                 }
             }
         }
@@ -86,7 +98,8 @@ bool est(const float* y, size_t n, size_t len, int ideg, float xs, float* ys, si
     }
 }
-void ess(const float* y, size_t n, size_t len, int ideg, size_t njump, bool userw, const float* rw, float* ys, float* res) {
+template<typename T>
+void ess(const T* y, size_t n, size_t len, int ideg, size_t njump, bool userw, const T* rw, T* ys, T* res) {
     if (n < 2) {
         ys[0] = y[0];
         return;
@@ -100,7 +113,7 @@ void ess(const float* y, size_t n, size_t len, int ideg, size_t njump, bool user
         nleft = 1;
         nright = n;
         for (size_t i = 1; i <= n; i += newnj) {
-            auto ok = est(y, n, len, ideg, (float) i, &ys[i - 1], nleft, nright, res, userw, rw);
+            auto ok = est(y, n, len, ideg, (T) i, &ys[i - 1], nleft, nright, res, userw, rw);
             if (!ok) {
                 ys[i - 1] = y[i - 1];
             }
@@ -114,7 +127,7 @@ void ess(const float* y, size_t n, size_t len, int ideg, size_t njump, bool user
                 nleft += 1;
                 nright += 1;
             }
-            auto ok = est(y, n, len, ideg, (float) i, &ys[i - 1], nleft, nright, res, userw, rw);
+            auto ok = est(y, n, len, ideg, (T) i, &ys[i - 1], nleft, nright, res, userw, rw);
             if (!ok) {
                 ys[i - 1] = y[i - 1];
             }
@@ -132,7 +145,7 @@ void ess(const float* y, size_t n, size_t len, int ideg, size_t njump, bool user
                 nleft = i - nsh + 1;
                 nright = len + i - nsh;
             }
-            auto ok = est(y, n, len, ideg, (float) i, &ys[i - 1], nleft, nright, res, userw, rw);
+            auto ok = est(y, n, len, ideg, (T) i, &ys[i - 1], nleft, nright, res, userw, rw);
             if (!ok) {
                 ys[i - 1] = y[i - 1];
             }
@@ -141,60 +154,63 @@ void ess(const float* y, size_t n, size_t len, int ideg, size_t njump, bool user
     if (newnj != 1) {
         for (size_t i = 1; i <= n - newnj; i += newnj) {
-            auto delta = (ys[i + newnj - 1] - ys[i - 1]) / ((float) newnj);
+            auto delta = (ys[i + newnj - 1] - ys[i - 1]) / ((T) newnj);
             for (auto j = i + 1; j <= i + newnj - 1; j++) {
-                ys[j - 1] = ys[i - 1] + delta * ((float) (j - i));
+                ys[j - 1] = ys[i - 1] + delta * ((T) (j - i));
             }
         }
         auto k = ((n - 1) / newnj) * newnj + 1;
         if (k != n) {
-            auto ok = est(y, n, len, ideg, (float) n, &ys[n - 1], nleft, nright, res, userw, rw);
+            auto ok = est(y, n, len, ideg, (T) n, &ys[n - 1], nleft, nright, res, userw, rw);
             if (!ok) {
                 ys[n - 1] = y[n - 1];
             }
             if (k != n - 1) {
-                auto delta = (ys[n - 1] - ys[k - 1]) / ((float) (n - k));
+                auto delta = (ys[n - 1] - ys[k - 1]) / ((T) (n - k));
                 for (auto j = k + 1; j <= n - 1; j++) {
-                    ys[j - 1] = ys[k - 1] + delta * ((float) (j - k));
+                    ys[j - 1] = ys[k - 1] + delta * ((T) (j - k));
                 }
             }
         }
     }
 }
-void ma(const float* x, size_t n, size_t len, float* ave) {
+template<typename T>
+void ma(const T* x, size_t n, size_t len, T* ave) {
     auto newn = n - len + 1;
-    auto flen = (float) len;
-    auto v = 0.0;
+    double flen = (T) len;
+    double v = 0.0;
     // get the first average
     for (size_t i = 0; i < len; i++) {
         v += x[i];
     }
-    ave[0] = v / flen;
+    ave[0] = (T) (v / flen);
     if (newn > 1) {
-        auto k = len;
-        auto m = 0;
+        size_t k = len;
+        size_t m = 0;
         for (size_t j = 1; j < newn; j++) {
             // window down the array
             v = v - x[m] + x[k];
-            ave[j] = v / flen;
+            ave[j] = (T) (v / flen);
             k += 1;
             m += 1;
         }
     }
 }
-void fts(const float* x, size_t n, size_t np, float* trend, float* work) {
+template<typename T>
+void fts(const T* x, size_t n, size_t np, T* trend, T* work) {
     ma(x, n, np, trend);
     ma(trend, n - np + 1, np, work);
     ma(work, n - 2 * np + 2, 3, trend);
 }
-void rwts(const float* y, size_t n, const float* fit, float* rw) {
+template<typename T>
+void rwts(const T* y, size_t n, const T* fit, T* rw) {
     for (size_t i = 0; i < n; i++) {
-        rw[i] = fabs(y[i] - fit[i]);
+        rw[i] = std::abs(y[i] - fit[i]);
     }
     auto mid1 = (n - 1) / 2;
@@ -208,18 +224,19 @@ void rwts(const float* y, size_t n, const float* fit, float* rw) {
     auto c1 = 0.001 * cmad;
     for (size_t i = 0; i < n; i++) {
-        auto r = fabs(y[i] - fit[i]);
+        auto r = std::abs(y[i] - fit[i]);
         if (r <= c1) {
             rw[i] = 1.0;
         } else if (r <= c9) {
-            rw[i] = pow(1.0 - pow(r / cmad, 2), 2);
+            rw[i] = (T) std::pow(1.0 - std::pow(r / cmad, 2), 2);
         } else {
             rw[i] = 0.0;
         }
     }
 }
-void ss(const float* y, size_t n, size_t np, size_t ns, int isdeg, size_t nsjump, bool userw, float* rw, float* season, float* work1, float* work2, float* work3, float* work4) {
+template<typename T>
+void ss(const T* y, size_t n, size_t np, size_t ns, int isdeg, size_t nsjump, bool userw, T* rw, T* season, T* work1, T* work2, T* work3, T* work4) {
     for (size_t j = 1; j <= np; j++) {
         size_t k = (n - j) / np + 1;
@@ -232,14 +249,14 @@ void ss(const float* y, size_t n, size_t np, size_t ns, int isdeg, size_t nsjump
             }
         }
         ess(work1, k, ns, isdeg, nsjump, userw, work3, work2 + 1, work4);
-        auto xs = 0.0;
+        T xs = 0.0;
         auto nright = std::min(ns, k);
         auto ok = est(work1, k, ns, isdeg, xs, &work2[0], 1, nright, work4, userw, work3);
         if (!ok) {
             work2[0] = work2[1];
         }
         xs = k + 1;
-        size_t nleft = std::max(1, (int) k - (int) ns + 1);
+        size_t nleft = (size_t) std::max(1, (int) k - (int) ns + 1);
         ok = est(work1, k, ns, isdeg, xs, &work2[k + 1], nleft, k, work4, userw, work3);
         if (!ok) {
             work2[k + 1] = work2[k];
@@ -250,7 +267,8 @@ void ss(const float* y, size_t n, size_t np, size_t ns, int isdeg, size_t nsjump
     }
 }
-void onestp(const float* y, size_t n, size_t np, size_t ns, size_t nt, size_t nl, int isdeg, int itdeg, int ildeg, size_t nsjump, size_t ntjump, size_t nljump, size_t ni, bool userw, float* rw, float* season, float* trend, float* work1, float* work2, float* work3, float* work4, float* work5) {
+template<typename T>
+void onestp(const T* y, size_t n, size_t np, size_t ns, size_t nt, size_t nl, int isdeg, int itdeg, int ildeg, size_t nsjump, size_t ntjump, size_t nljump, size_t ni, bool userw, T* rw, T* season, T* trend, T* work1, T* work2, T* work3, T* work4, T* work5) {
     for (size_t j = 0; j < ni; j++) {
         for (size_t i = 0; i < n; i++) {
             work1[i] = y[i] - trend[i];
@@ -269,7 +287,8 @@ void onestp(const float* y, size_t n, size_t np, size_t ns, size_t nt, size_t nl
     }
 }
-void stl(const float* y, size_t n, size_t np, size_t ns, size_t nt, size_t nl, int isdeg, int itdeg, int ildeg, size_t nsjump, size_t ntjump, size_t nljump, size_t ni, size_t no, float* rw, float* season, float* trend) {
+template<typename T>
+void stl(const T* y, size_t n, size_t np, size_t ns, size_t nt, size_t nl, int isdeg, int itdeg, int ildeg, size_t nsjump, size_t ntjump, size_t nljump, size_t ni, size_t no, T* rw, T* season, T* trend) {
     if (ns < 3) {
         throw std::invalid_argument("seasonal_length must be at least 3");
     }
@@ -303,11 +322,11 @@ void stl(const float* y, size_t n, size_t np, size_t ns, size_t nt, size_t nl, i
         throw std::invalid_argument("low_pass_length must be odd");
     }
-    auto work1 = std::vector<float>(n + 2 * np);
-    auto work2 = std::vector<float>(n + 2 * np);
-    auto work3 = std::vector<float>(n + 2 * np);
-    auto work4 = std::vector<float>(n + 2 * np);
-    auto work5 = std::vector<float>(n + 2 * np);
+    auto work1 = std::vector<T>(n + 2 * np);
+    auto work2 = std::vector<T>(n + 2 * np);
+    auto work3 = std::vector<T>(n + 2 * np);
+    auto work4 = std::vector<T>(n + 2 * np);
+    auto work5 = std::vector<T>(n + 2 * np);
     auto userw = false;
     size_t k = 0;
@@ -332,44 +351,62 @@ void stl(const float* y, size_t n, size_t np, size_t ns, size_t nt, size_t nl, i
     }
 }
-float var(const std::vector<float>& series) {
+template<typename T>
+double var(const std::vector<T>& series) {
     auto mean = std::accumulate(series.begin(), series.end(), 0.0) / series.size();
-    std::vector<float> tmp;
-    tmp.reserve(series.size());
+    double sum = 0.0;
     for (auto v : series) {
-        tmp.push_back(pow(v - mean, 2));
+        double diff = v - mean;
+        sum += diff * diff;
+    }
+    return sum / (series.size() - 1);
+}
+template<typename T>
+double strength(const std::vector<T>& component, const std::vector<T>& remainder) {
+    std::vector<T> sr;
+    sr.reserve(remainder.size());
+    for (size_t i = 0; i < remainder.size(); i++) {
+        sr.push_back(component[i] + remainder[i]);
     }
-    return std::accumulate(tmp.begin(), tmp.end(), 0.0) / (series.size() - 1);
+    return std::max(0.0, 1.0 - var(remainder) / var(sr));
 }
+}
+/// A STL result.
+template<typename T = float>
 class StlResult {
 public:
-    std::vector<float> seasonal;
-    std::vector<float> trend;
-    std::vector<float> remainder;
-    std::vector<float> weights;
-    inline float seasonal_strength() {
-        std::vector<float> sr;
-        sr.reserve(remainder.size());
-        for (size_t i = 0; i < remainder.size(); i++) {
-            sr.push_back(seasonal[i] + remainder[i]);
-        }
-        return std::max(0.0, 1.0 - var(remainder) / var(sr));
+    /// Returns the seasonal component.
+    std::vector<T> seasonal;
+    /// Returns the trend component.
+    std::vector<T> trend;
+    /// Returns the remainder.
+    std::vector<T> remainder;
+    /// Returns the weights.
+    std::vector<T> weights;
+    /// Returns the seasonal strength.
+    inline double seasonal_strength() const {
+        return strength(seasonal, remainder);
     }
-    inline float trend_strength() {
-        std::vector<float> tr;
-        tr.reserve(remainder.size());
-        for (size_t i = 0; i < remainder.size(); i++) {
-            tr.push_back(trend[i] + remainder[i]);
-        }
-        return std::max(0.0, 1.0 - var(remainder) / var(tr));
+    /// Returns the trend strength.
+    inline double trend_strength() const {
+        return strength(trend, remainder);
     }
 };
+/// A set of STL parameters.
 class StlParams {
+public:
+    /// @private
     std::optional<size_t> ns_ = std::nullopt;
+private:
     std::optional<size_t> nt_ = std::nullopt;
     std::optional<size_t> nl_ = std::nullopt;
     int isdeg_ = 0;
@@ -383,75 +420,104 @@ class StlParams {
     bool robust_ = false;
 public:
-    inline StlParams seasonal_length(size_t ns) {
-        this->ns_ = ns;
+    /// Sets the length of the seasonal smoother.
+    inline StlParams seasonal_length(size_t length) {
+        this->ns_ = length;
         return *this;
     }
-    inline StlParams trend_length(size_t nt) {
-        this->nt_ = nt;
+    /// Sets the length of the trend smoother.
+    inline StlParams trend_length(size_t length) {
+        this->nt_ = length;
         return *this;
     }
-    inline StlParams low_pass_length(size_t nl) {
-        this->nl_ = nl;
+    /// Sets the length of the low-pass filter.
+    inline StlParams low_pass_length(size_t length) {
+        this->nl_ = length;
         return *this;
     }
-    inline StlParams seasonal_degree(int isdeg) {
-        this->isdeg_ = isdeg;
+    /// Sets the degree of locally-fitted polynomial in seasonal smoothing.
+    inline StlParams seasonal_degree(int degree) {
+        this->isdeg_ = degree;
         return *this;
     }
-    inline StlParams trend_degree(int itdeg) {
-        this->itdeg_ = itdeg;
+    /// Sets the degree of locally-fitted polynomial in trend smoothing.
+    inline StlParams trend_degree(int degree) {
+        this->itdeg_ = degree;
         return *this;
     }
-    inline StlParams low_pass_degree(int ildeg) {
-        this->ildeg_ = ildeg;
+    /// Sets the degree of locally-fitted polynomial in low-pass smoothing.
+    inline StlParams low_pass_degree(int degree) {
+        this->ildeg_ = degree;
         return *this;
     }
-    inline StlParams seasonal_jump(size_t nsjump) {
-        this->nsjump_ = nsjump;
+    /// Sets the skipping value for seasonal smoothing.
+    inline StlParams seasonal_jump(size_t jump) {
+        this->nsjump_ = jump;
         return *this;
     }
-    inline StlParams trend_jump(size_t ntjump) {
-        this->ntjump_ = ntjump;
+    /// Sets the skipping value for trend smoothing.
+    inline StlParams trend_jump(size_t jump) {
+        this->ntjump_ = jump;
         return *this;
     }
-    inline StlParams low_pass_jump(size_t nljump) {
-        this->nljump_ = nljump;
+    /// Sets the skipping value for low-pass smoothing.
+    inline StlParams low_pass_jump(size_t jump) {
+        this->nljump_ = jump;
         return *this;
     }
-    inline StlParams inner_loops(bool ni) {
-        this->ni_ = ni;
+    /// Sets the number of loops for updating the seasonal and trend components.
+    inline StlParams inner_loops(size_t loops) {
+        this->ni_ = loops;
         return *this;
     }
-    inline StlParams outer_loops(bool no) {
-        this->no_ = no;
+    /// Sets the number of iterations of robust fitting.
+    inline StlParams outer_loops(size_t loops) {
+        this->no_ = loops;
         return *this;
     }
+    /// Sets whether robustness iterations are to be used.
     inline StlParams robust(bool robust) {
         this->robust_ = robust;
         return *this;
     }
-    StlResult fit(const float* y, size_t n, size_t np);
-    StlResult fit(const std::vector<float>& y, size_t np);
+    /// Decomposes a time series from an array.
+    template<typename T>
+    StlResult<T> fit(const T* series, size_t series_size, size_t period) const;
+    /// Decomposes a time series from a vector.
+    template<typename T>
+    StlResult<T> fit(const std::vector<T>& series, size_t period) const;
+#if __cplusplus >= 202002L
+    /// Decomposes a time series from a span.
+    template<typename T>
+    StlResult<T> fit(std::span<const T> series, size_t period) const;
+#endif
 };
-StlParams params() {
+/// Creates a new set of STL parameters.
+inline StlParams params() {
     return StlParams();
 }
-StlResult StlParams::fit(const float* y, size_t n, size_t np) {
+template<typename T>
+StlResult<T> StlParams::fit(const T* series, size_t series_size, size_t period) const {
+    auto y = series;
+    auto np = period;
+    auto n = series_size;
     if (n < 2 * np) {
         throw std::invalid_argument("series has less than two periods");
     }
@@ -461,11 +527,11 @@ StlResult StlParams::fit(const float* y, size_t n, size_t np) {
     auto isdeg = this->isdeg_;
     auto itdeg = this->itdeg_;
-    auto res = StlResult {
-        std::vector<float>(n),
-        std::vector<float>(n),
-        std::vector<float>(),
-        std::vector<float>(n)
+    auto res = StlResult<T> {
+        std::vector<T>(n),
+        std::vector<T>(n),
+        std::vector<T>(),
+        std::vector<T>(n)
     };
     auto ildeg = this->ildeg_.value_or(itdeg);
@@ -504,8 +570,257 @@ StlResult StlParams::fit(const float* y, size_t n, size_t np) {
     return res;
 }
-StlResult StlParams::fit(const std::vector<float>& y, size_t np) {
-    return StlParams::fit(y.data(), y.size(), np);
+template<typename T>
+StlResult<T> StlParams::fit(const std::vector<T>& series, size_t period) const {
+    return StlParams::fit(series.data(), series.size(), period);
+}
+#if __cplusplus >= 202002L
+template<typename T>
+StlResult<T> StlParams::fit(std::span<const T> series, size_t period) const {
+    return StlParams::fit(series.data(), series.size(), period);
+}
+#endif
+/// A MSTL result.
+template<typename T = float>
+class MstlResult {
+public:
+    /// Returns the seasonal component.
+    std::vector<std::vector<T>> seasonal;
+    /// Returns the trend component.
+    std::vector<T> trend;
+    /// Returns the remainder.
+    std::vector<T> remainder;
+    /// Returns the seasonal strength.
+    inline std::vector<double> seasonal_strength() const {
+        std::vector<double> res;
+        for (auto& s : seasonal) {
+            res.push_back(strength(s, remainder));
+        }
+        return res;
+    }
+    /// Returns the trend strength.
+    inline double trend_strength() const {
+        return strength(trend, remainder);
+    }
+};
+/// A set of MSTL parameters.
+class MstlParams {
+    size_t iterate_ = 2;
+    std::optional<float> lambda_ = std::nullopt;
+    std::optional<std::vector<size_t>> swin_ = std::nullopt;
+    StlParams stl_params_;
+public:
+    /// Sets the number of iterations.
+    inline MstlParams iterations(size_t iterations) {
+        this->iterate_ = iterations;
+        return *this;
+    }
+    /// Sets lambda for Box-Cox transformation.
+    inline MstlParams lambda(float lambda) {
+        this->lambda_ = lambda;
+        return *this;
+    }
+    /// Sets the lengths of the seasonal smoothers.
+    inline MstlParams seasonal_lengths(const std::vector<size_t>& lengths) {
+        this->swin_ = lengths;
+        return *this;
+    }
+    /// Sets the STL parameters.
+    inline MstlParams stl_params(const StlParams& stl_params) {
+        this->stl_params_ = stl_params;
+        return *this;
+    }
+    /// Decomposes a time series from an array.
+    template<typename T>
+    MstlResult<T> fit(const T* series, size_t series_size, const size_t* periods, size_t periods_size) const;
+    /// Decomposes a time series from a vector.
+    template<typename T>
+    MstlResult<T> fit(const std::vector<T>& series, const std::vector<size_t>& periods) const;
+#if __cplusplus >= 202002L
+    /// Decomposes a time series from a span.
+    template<typename T>
+    MstlResult<T> fit(std::span<const T> series, std::span<const size_t> periods) const;
+#endif
+};
+/// Creates a new set of MSTL parameters.
+inline MstlParams mstl_params() {
+    return MstlParams();
+}
+namespace {
+template<typename T>
+std::vector<T> box_cox(const T* y, size_t y_size, float lambda) {
+    std::vector<T> res;
+    res.reserve(y_size);
+    if (lambda != 0.0) {
+        for (size_t i = 0; i < y_size; i++) {
+            res.push_back((T) (std::pow(y[i], lambda) - 1.0) / lambda);
+        }
+    } else {
+        for (size_t i = 0; i < y_size; i++) {
+            res.push_back(std::log(y[i]));
+        }
+    }
+    return res;
+}
+template<typename T>
+std::tuple<std::vector<T>, std::vector<T>, std::vector<std::vector<T>>> mstl(
+    const T* x,
+    size_t k,
+    const size_t* seas_ids,
+    size_t seas_size,
+    size_t iterate,
+    std::optional<float> lambda,
+    const std::optional<std::vector<size_t>>& swin,
+    const StlParams& stl_params
+) {
+    // keep track of indices instead of sorting seas_ids
+    // so order is preserved with seasonality
+    std::vector<size_t> indices;
+    for (size_t i = 0; i < seas_size; i++) {
+        indices.push_back(i);
+    }
+    std::sort(indices.begin(), indices.end(), [&seas_ids](size_t a, size_t b) {
+        return seas_ids[a] < seas_ids[b];
+    });
+    if (seas_size == 1) {
+        iterate = 1;
+    }
+    std::vector<std::vector<T>> seasonality;
+    seasonality.reserve(seas_size);
+    std::vector<T> trend;
+    auto deseas = lambda.has_value() ? box_cox(x, k, lambda.value()) : std::vector<T>(x, x + k);
+    if (seas_size != 0) {
+        for (size_t i = 0; i < seas_size; i++) {
+            seasonality.push_back(std::vector<T>());
+        }
+        for (size_t j = 0; j < iterate; j++) {
+            for (size_t i = 0; i < indices.size(); i++) {
+                auto idx = indices[i];
+                if (j > 0) {
+                    for (size_t ii = 0; ii < deseas.size(); ii++) {
+                        deseas[ii] += seasonality[idx][ii];
+                    }
+                }
+                StlResult<T> fit;
+                if (swin) {
+                    StlParams clone = stl_params;
+                    fit = clone.seasonal_length((*swin)[idx]).fit(deseas, seas_ids[idx]);
+                } else if (stl_params.ns_.has_value()) {
+                    fit = stl_params.fit(deseas, seas_ids[idx]);
+                } else {
+                    StlParams clone = stl_params;
+                    fit = clone.seasonal_length(7 + 4 * (i + 1)).fit(deseas, seas_ids[idx]);
+                }
+                seasonality[idx] = fit.seasonal;
+                trend = fit.trend;
+                for (size_t ii = 0; ii < deseas.size(); ii++) {
+                    deseas[ii] -= seasonality[idx][ii];
+                }
+            }
+        }
+    } else {
+        // TODO use Friedman's Super Smoother for trend
+        throw std::invalid_argument("periods must not be empty");
+    }
+    std::vector<T> remainder;
+    remainder.reserve(k);
+    for (size_t i = 0; i < k; i++) {
+        remainder.push_back(deseas[i] - trend[i]);
+    }
+    return std::make_tuple(trend, remainder, seasonality);
+}
+}
+template<typename T>
+MstlResult<T> MstlParams::fit(const T* series, size_t series_size, const size_t* periods, size_t periods_size) const {
+    // return error to be consistent with stl
+    // and ensure seasonal is always same length as periods
+    for (size_t i = 0; i < periods_size; i++) {
+        if (periods[i] < 2) {
+            throw std::invalid_argument("periods must be at least 2");
+        }
+    }
+    // return error to be consistent with stl
+    // and ensure seasonal is always same length as periods
+    for (size_t i = 0; i < periods_size; i++) {
+        if (series_size < periods[i] * 2) {
+            throw std::invalid_argument("series has less than two periods");
+        }
+    }
+    if (lambda_.has_value()) {
+        auto lambda = lambda_.value();
+        if (lambda < 0 || lambda > 1) {
+            throw std::invalid_argument("lambda must be between 0 and 1");
+        }
+    }
+    if (swin_.has_value()) {
+        auto swin = swin_.value();
+        if (swin.size() != periods_size) {
+            throw std::invalid_argument("seasonal_lengths must have the same length as periods");
+        }
+    }
+    auto [trend, remainder, seasonal] = mstl(
+        series,
+        series_size,
+        periods,
+        periods_size,
+        iterate_,
+        lambda_,
+        swin_,
+        stl_params_
+    );
+    return MstlResult<T> {
+        seasonal,
+        trend,
+        remainder
+    };
+}
+template<typename T>
+MstlResult<T> MstlParams::fit(const std::vector<T>& series, const std::vector<size_t>& periods) const {
+    return MstlParams::fit(series.data(), series.size(), periods.data(), periods.size());
+}
+#if __cplusplus >= 202002L
+template<typename T>
+MstlResult<T> MstlParams::fit(std::span<const T> series, std::span<const size_t> periods) const {
+    return MstlParams::fit(series.data(), series.size(), periods.data(), periods.size());
 }
+#endif
 }

data/lib/anomaly_detection/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module AnomalyDetection
-  VERSION = "0.3.0"
+  VERSION = "0.3.1"
 end

metadata CHANGED Viewed

@@ -1,14 +1,13 @@
 --- !ruby/object:Gem::Specification
 name: anomaly_detection
 version: !ruby/object:Gem::Version
-  version: 0.3.0
+  version: 0.3.1
 platform: ruby
 authors:
 - Andrew Kane
-autorequire:
 bindir: bin
 cert_chain: []
-date: 2024-10-22 00:00:00.000000000 Z
+date: 1980-01-02 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: rice
@@ -16,15 +15,14 @@ dependencies:
     requirements:
     - - ">="
       - !ruby/object:Gem::Version
-        version: 4.3.3
+        version: '4.7'
   type: :runtime
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
     - - ">="
       - !ruby/object:Gem::Version
-        version: 4.3.3
-description:
+        version: '4.7'
 email: andrew@ankane.org
 executables: []
 extensions:
@@ -52,7 +50,6 @@ homepage: https://github.com/ankane/AnomalyDetection.rb
 licenses:
 - GPL-3.0-or-later
 metadata: {}
-post_install_message:
 rdoc_options: []
 require_paths:
 - lib
@@ -67,8 +64,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
     - !ruby/object:Gem::Version
       version: '0'
 requirements: []
-rubygems_version: 3.5.16
-signing_key:
+rubygems_version: 3.6.9
 specification_version: 4
 summary: Time series anomaly detection for Ruby
 test_files: []