RubyGems - anomaly_detection - Versions diffs - 0.1.4 → 0.2.0 - Mend

anomaly_detection 0.1.4 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +6 -0
data/NOTICE.txt +1 -1
data/ext/anomaly_detection/anomaly_detection.hpp +15 -8
data/ext/anomaly_detection/dist.h +90 -44
data/ext/anomaly_detection/stl.hpp +103 -50
data/lib/anomaly_detection/version.rb +1 -1
data/lib/anomaly_detection.rb +57 -2
data/licenses/NOTICE-AnomalyDetection-cpp.txt +15 -0
metadata +5 -4

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: c60bb6d75cb8523ecd0926f391d79413a1cb2eb131cd579fd381bb6683f82da3
-  data.tar.gz: '01594d0f0a97ad8cbb7b0b50cb30894bd0d773d4db45b3158345567ce1732efb'
+  metadata.gz: da5eb71023f77a4c05e6322c020ef602e8e22b7b5ba516fce99679af702c881d
+  data.tar.gz: 26560c8dd893c491bd3094202ff82ae33eefdcdba74fe4386b006f7f522906df
 SHA512:
-  metadata.gz: fe09cc140a5d6543f3b00983a754861f6a3a3a436f8a8afecc80d202f1112bb6ea180df794072ee4711c044508a559a015c885cfd61d2c5be9378fc7b6590d96
-  data.tar.gz: 5616e6075888b4521355e6c0fb33f7a94361c971c7f58f9ae6a61e5d8529a3e1938deba10be22c24ec5849f1909f48cba07b97fd1e6ea8b47ae4f66626eb703e
+  metadata.gz: ec2e1459ca2410ee6ab1bce3fe9c528d6419b75e10c6448f1fe5b3030a2e3d8de320a23a9bded17702a01fd23d112007b909c8611e2da6c1ff4f8521352c89ac
+  data.tar.gz: ad150705d6e32a111c3bc044ef7f99910beebe572e07799719e72118422b7a9e6439943cac8b86d613537d7d0fb52cba86a668faf78d135abc888ce3737f8104

data/CHANGELOG.md CHANGED Viewed

@@ -1,3 +1,9 @@
+## 0.2.0 (2023-01-31)
+- Added experimental support for auto-detecting period
+- Fixed result when no seasonality (period is less than 2)
+- Dropped support for Ruby < 2.7
 ## 0.1.4 (2022-03-19)
 - Fixed initial median calculation

data/NOTICE.txt CHANGED Viewed

@@ -1,5 +1,5 @@
 Copyright (C) 2015 Twitter, Inc and other contributors
-Copyright (C) 2021 Andrew Kane
+Copyright (C) 2021-2023 Andrew Kane
 This program is free software: you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by

data/ext/anomaly_detection/anomaly_detection.hpp CHANGED Viewed

@@ -1,5 +1,5 @@
 /*!
- * AnomalyDetection.cpp v0.1.0
+ * AnomalyDetection.cpp v0.1.3
  * https://github.com/ankane/AnomalyDetection.cpp
  * GPL-3.0-or-later License
  */
@@ -39,7 +39,7 @@ float mad(const std::vector<float>& data, float med) {
     return 1.4826 * median_sorted(res);
 }
-std::vector<size_t> detect_anoms(const std::vector<float>& data, int num_obs_per_period, float k, float alpha, bool one_tail, bool upper_tail, bool verbose, std::function<void()> callback) {
+std::vector<size_t> detect_anoms(const std::vector<float>& data, size_t num_obs_per_period, float k, float alpha, bool one_tail, bool upper_tail, bool verbose, std::function<void()> callback) {
     auto n = data.size();
     // Check to make sure we have at least two periods worth of data for anomaly context
@@ -53,15 +53,22 @@ std::vector<size_t> detect_anoms(const std::vector<float>& data, int num_obs_per
         throw std::invalid_argument("series contains NANs");
     }
-    // Decompose data. This returns a univarite remainder which will be used for anomaly detection. Optionally, we might NOT decompose.
-    auto data_decomp = stl::params().robust(true).seasonal_length(data.size() * 10 + 1).fit(data, num_obs_per_period);
-    auto seasonal = data_decomp.seasonal;
     std::vector<float> data2;
     data2.reserve(n);
     auto med = median(data);
-    for (auto i = 0; i < n; i++) {
-        data2.push_back(data[i] - seasonal[i] - med);
+    if (num_obs_per_period > 1) {
+        // Decompose data. This returns a univarite remainder which will be used for anomaly detection. Optionally, we might NOT decompose.
+        auto data_decomp = stl::params().robust(true).seasonal_length(data.size() * 10 + 1).fit(data, num_obs_per_period);
+        auto seasonal = data_decomp.seasonal;
+        for (size_t i = 0; i < n; i++) {
+            data2.push_back(data[i] - seasonal[i] - med);
+        }
+    } else {
+        for (size_t i = 0; i < n; i++) {
+            data2.push_back(data[i] - med);
+        }
     }
     auto num_anoms = 0;

data/ext/anomaly_detection/dist.h CHANGED Viewed

@@ -1,12 +1,11 @@
 /*!
- * dist.h v0.1.1
+ * dist.h v0.3.0
  * https://github.com/ankane/dist.h
  * Unlicense OR MIT License
  */
 #pragma once
-#include <assert.h>
 #include <math.h>
 #ifdef M_E
@@ -21,53 +20,77 @@
 #define DIST_PI 3.14159265358979323846
 #endif
-// Winitzki, S. (2008).
-// A handy approximation for the error function and its inverse.
-// https://drive.google.com/file/d/0B2Mt7luZYBrwZlctV3A3eF82VGM/view?resourcekey=0-UQpPhwZgzP0sF4LHBDlLtg
-// from https://sites.google.com/site/winitzki
-double erf(double x) {
-    double sign = x < 0 ? -1.0 : 1.0;
-    x = x < 0 ? -x : x;
-    double a = 0.14;
-    double x2 = x * x;
-    return sign * sqrt(1.0 - exp(-x2 * (4.0 / DIST_PI + a * x2) / (1.0 + a * x2)));
-}
-// Winitzki, S. (2008).
-// A handy approximation for the error function and its inverse.
-// https://drive.google.com/file/d/0B2Mt7luZYBrwZlctV3A3eF82VGM/view?resourcekey=0-UQpPhwZgzP0sF4LHBDlLtg
-// from https://sites.google.com/site/winitzki
-double inverse_erf(double x) {
-    double sign = x < 0 ? -1.0 : 1.0;
-    x = x < 0 ? -x : x;
-    double a = 0.147;
-    double ln = log(1.0 - x * x);
-    double f1 = 2.0 / (DIST_PI * a);
-    double f2 = ln / 2.0;
-    double f3 = f1 + f2;
-    double f4 = 1.0 / a * ln;
-    return sign * sqrt(-f1 - f2 + sqrt(f3 * f3 - f4));
-}
+#ifdef M_SQRT2
+#define DIST_SQRT2 M_SQRT2
+#else
+#define DIST_SQRT2 1.41421356237309504880
+#endif
 double normal_pdf(double x, double mean, double std_dev) {
-    double var = std_dev * std_dev;
-    return (1.0 / (var * sqrt(2.0 * DIST_PI))) * pow(DIST_E, -0.5 * pow((x - mean) / var, 2));
+    if (std_dev <= 0) {
+        return NAN;
+    }
+    double n = (x - mean) / std_dev;
+    return (1.0 / (std_dev * sqrt(2.0 * DIST_PI))) * pow(DIST_E, -0.5 * n * n);
 }
 double normal_cdf(double x, double mean, double std_dev) {
-    return 0.5 * (1.0 + erf((x - mean) / (std_dev * std_dev * sqrt(2))));
+    if (std_dev <= 0) {
+        return NAN;
+    }
+    return 0.5 * (1.0 + erf((x - mean) / (std_dev * DIST_SQRT2)));
 }
+// Wichura, M. J. (1988).
+// Algorithm AS 241: The Percentage Points of the Normal Distribution.
+// Journal of the Royal Statistical Society. Series C (Applied Statistics), 37(3), 477-484.
 double normal_ppf(double p, double mean, double std_dev) {
-    assert(p >= 0 && p <= 1);
+    if (p < 0 || p > 1 || std_dev <= 0 || isnan(mean) || isnan(std_dev)) {
+        return NAN;
+    }
+    if (p == 0) {
+        return -INFINITY;
+    }
-    return mean + (std_dev * std_dev) * sqrt(2) * inverse_erf(2.0 * p - 1.0);
+    if (p == 1) {
+        return INFINITY;
+    }
+    double q = p - 0.5;
+    if (fabs(q) < 0.425) {
+        double r = 0.180625 - q * q;
+        return mean + std_dev * q *
+            (((((((2.5090809287301226727e3 * r + 3.3430575583588128105e4) * r + 6.7265770927008700853e4) * r + 4.5921953931549871457e4) * r + 1.3731693765509461125e4) * r + 1.9715909503065514427e3) * r + 1.3314166789178437745e2) * r + 3.3871328727963666080e0) /
+            (((((((5.2264952788528545610e3 * r + 2.8729085735721942674e4) * r + 3.9307895800092710610e4) * r + 2.1213794301586595867e4) * r + 5.3941960214247511077e3) * r + 6.8718700749205790830e2) * r + 4.2313330701600911252e1) * r + 1);
+    } else {
+        double r = q < 0 ? p : 1 - p;
+        r = sqrt(-log(r));
+        double sign = q < 0 ? -1 : 1;
+        if (r < 5) {
+            r -= 1.6;
+            return mean + std_dev * sign *
+                (((((((7.74545014278341407640e-4 * r + 2.27238449892691845833e-2) * r + 2.41780725177450611770e-1) * r + 1.27045825245236838258e0) * r + 3.64784832476320460504e0) * r + 5.76949722146069140550e0) * r + 4.63033784615654529590e0) * r + 1.42343711074968357734e0) /
+                (((((((1.05075007164441684324e-9 * r + 5.47593808499534494600e-4) * r + 1.51986665636164571966e-2) * r + 1.48103976427480074590e-1) * r + 6.89767334985100004550e-1) * r + 1.67638483018380384940e0) * r + 2.05319162663775882187e0) * r + 1);
+        } else {
+            r -= 5;
+            return mean + std_dev * sign *
+                (((((((2.01033439929228813265e-7 * r + 2.71155556874348757815e-5) * r + 1.24266094738807843860e-3) * r + 2.65321895265761230930e-2) * r + 2.96560571828504891230e-1) * r + 1.78482653991729133580e0) * r + 5.46378491116411436990e0) * r + 6.65790464350110377720e0) /
+                (((((((2.04426310338993978564e-15 * r + 1.42151175831644588870e-7) * r + 1.84631831751005468180e-5) * r + 7.86869131145613259100e-4) * r + 1.48753612908506148525e-2) * r + 1.36929880922735805310e-1) * r + 5.99832206555887937690e-1) * r + 1);
+        }
+    }
 }
-double students_t_pdf(double x, unsigned int n) {
-    assert(n >= 1);
+double students_t_pdf(double x, double n) {
+    if (n <= 0) {
+        return NAN;
+    }
+    if (n == INFINITY) {
+        return normal_pdf(x, 0, 1);
+    }
     return tgamma((n + 1.0) / 2.0) / (sqrt(n * DIST_PI) * tgamma(n / 2.0)) * pow(1.0 + x * x / n, -(n + 1.0) / 2.0);
 }
@@ -75,8 +98,22 @@ double students_t_pdf(double x, unsigned int n) {
 // Hill, G. W. (1970).
 // Algorithm 395: Student's t-distribution.
 // Communications of the ACM, 13(10), 617-619.
-double students_t_cdf(double x, unsigned int n) {
-    assert(n >= 1);
+double students_t_cdf(double x, double n) {
+    if (n < 1) {
+        return NAN;
+    }
+    if (isnan(x)) {
+        return NAN;
+    }
+    if (!isfinite(x)) {
+        return x < 0 ? 0 : 1;
+    }
+    if (n == INFINITY) {
+        return normal_cdf(x, 0, 1);
+    }
     double start = x < 0 ? 0 : 1;
     double sign = x < 0 ? 1 : -1;
@@ -86,7 +123,7 @@ double students_t_cdf(double x, unsigned int n) {
     double y = t / n;
     double b = 1.0 + y;
-    if ((n >= 20 && t < n) || n > 200) {
+    if (n > floor(n) || (n >= 20 && t < n) || n > 200) {
         // asymptotic series for large or noninteger n
         if (y > 10e-6) {
             y = log(b);
@@ -98,6 +135,10 @@ double students_t_cdf(double x, unsigned int n) {
         return start + sign * normal_cdf(-y, 0.0, 1.0);
     }
+    // make n int
+    // n is int between 1 and 200 if made it here
+    n = (int) n;
     if (n < 20 && t < 4.0) {
         // nested summation of cosine series
         y = sqrt(y);
@@ -144,9 +185,14 @@ double students_t_cdf(double x, unsigned int n) {
 // Hill, G. W. (1970).
 // Algorithm 396: Student's t-quantiles.
 // Communications of the ACM, 13(10), 619-620.
-double students_t_ppf(double p, unsigned int n) {
-    assert(p >= 0 && p <= 1);
-    assert(n >= 1);
+double students_t_ppf(double p, double n) {
+    if (p < 0 || p > 1 || n < 1) {
+        return NAN;
+    }
+    if (n == INFINITY) {
+        return normal_ppf(p, 0, 1);
+    }
     // distribution is symmetric
     double sign = p < 0.5 ? -1 : 1;

data/ext/anomaly_detection/stl.hpp CHANGED Viewed

@@ -1,5 +1,5 @@
 /*!
- * STL C++ v0.1.0
+ * STL C++ v0.1.2
  * https://github.com/ankane/stl-cpp
  * Unlicense OR MIT License
  *
@@ -13,9 +13,10 @@
 #pragma once
 #include <algorithm>
-#include <cassert>
 #include <cmath>
+#include <numeric>
 #include <optional>
+#include <stdexcept>
 #include <vector>
 namespace stl {
@@ -91,14 +92,14 @@ void ess(const float* y, size_t n, size_t len, int ideg, size_t njump, bool user
         return;
     }
-    auto nleft = 0;
-    auto nright = 0;
+    size_t nleft = 0;
+    size_t nright = 0;
     auto newnj = std::min(njump, n - 1);
     if (len >= n) {
         nleft = 1;
         nright = n;
-        for (auto i = 1; i <= n; i += newnj) {
+        for (size_t i = 1; i <= n; i += newnj) {
             auto ok = est(y, n, len, ideg, (float) i, &ys[i - 1], nleft, nright, res, userw, rw);
             if (!ok) {
                 ys[i - 1] = y[i - 1];
@@ -108,7 +109,7 @@ void ess(const float* y, size_t n, size_t len, int ideg, size_t njump, bool user
         auto nsh = (len + 1) / 2;
         nleft = 1;
         nright = len;
-        for (auto i = 1; i <= n; i++) { // fitted value at i
+        for (size_t i = 1; i <= n; i++) { // fitted value at i
             if (i > nsh && nright != n) {
                 nleft += 1;
                 nright += 1;
@@ -120,7 +121,7 @@ void ess(const float* y, size_t n, size_t len, int ideg, size_t njump, bool user
         }
     } else { // newnj greater than one, len less than n
         auto nsh = (len + 1) / 2;
-        for (auto i = 1; i <= n; i += newnj) { // fitted value at i
+        for (size_t i = 1; i <= n; i += newnj) { // fitted value at i
             if (i < nsh) {
                 nleft = 1;
                 nright = len;
@@ -139,7 +140,7 @@ void ess(const float* y, size_t n, size_t len, int ideg, size_t njump, bool user
     }
     if (newnj != 1) {
-        for (auto i = 1; i <= n - newnj; i += newnj) {
+        for (size_t i = 1; i <= n - newnj; i += newnj) {
             auto delta = (ys[i + newnj - 1] - ys[i - 1]) / ((float) newnj);
             for (auto j = i + 1; j <= i + newnj - 1; j++) {
                 ys[j - 1] = ys[i - 1] + delta * ((float) (j - i));
@@ -167,7 +168,7 @@ void ma(const float* x, size_t n, size_t len, float* ave) {
     auto v = 0.0;
     // get the first average
-    for (auto i = 0; i < len; i++) {
+    for (size_t i = 0; i < len; i++) {
         v += x[i];
     }
@@ -175,7 +176,7 @@ void ma(const float* x, size_t n, size_t len, float* ave) {
     if (newn > 1) {
         auto k = len;
         auto m = 0;
-        for (auto j = 1; j < newn; j++) {
+        for (size_t j = 1; j < newn; j++) {
             // window down the array
             v = v - x[m] + x[k];
             ave[j] = v / flen;
@@ -192,7 +193,7 @@ void fts(const float* x, size_t n, size_t np, float* trend, float* work) {
 }
 void rwts(const float* y, size_t n, const float* fit, float* rw) {
-    for (auto i = 0; i < n; i++) {
+    for (size_t i = 0; i < n; i++) {
         rw[i] = fabs(y[i] - fit[i]);
     }
@@ -206,7 +207,7 @@ void rwts(const float* y, size_t n, const float* fit, float* rw) {
     auto c9 = 0.999 * cmad;
     auto c1 = 0.001 * cmad;
-    for (auto i = 0; i < n; i++) {
+    for (size_t i = 0; i < n; i++) {
         auto r = fabs(y[i] - fit[i]);
         if (r <= c1) {
             rw[i] = 1.0;
@@ -219,14 +220,14 @@ void rwts(const float* y, size_t n, const float* fit, float* rw) {
 }
 void ss(const float* y, size_t n, size_t np, size_t ns, int isdeg, size_t nsjump, bool userw, float* rw, float* season, float* work1, float* work2, float* work3, float* work4) {
-    for (auto j = 1; j <= np; j++) {
-        auto k = (n - j) / np + 1;
+    for (size_t j = 1; j <= np; j++) {
+        size_t k = (n - j) / np + 1;
-        for (auto i = 1; i <= k; i++) {
+        for (size_t i = 1; i <= k; i++) {
             work1[i - 1] = y[(i - 1) * np + j - 1];
         }
         if (userw) {
-            for (auto i = 1; i <= k; i++) {
+            for (size_t i = 1; i <= k; i++) {
                 work3[i - 1] = rw[(i - 1) * np + j - 1];
             }
         }
@@ -243,25 +244,25 @@ void ss(const float* y, size_t n, size_t np, size_t ns, int isdeg, size_t nsjump
         if (!ok) {
             work2[k + 1] = work2[k];
         }
-        for (auto m = 1; m <= k + 2; m++) {
+        for (size_t m = 1; m <= k + 2; m++) {
             season[(m - 1) * np + j - 1] = work2[m - 1];
         }
     }
 }
 void onestp(const float* y, size_t n, size_t np, size_t ns, size_t nt, size_t nl, int isdeg, int itdeg, int ildeg, size_t nsjump, size_t ntjump, size_t nljump, size_t ni, bool userw, float* rw, float* season, float* trend, float* work1, float* work2, float* work3, float* work4, float* work5) {
-    for (auto j = 0; j < ni; j++) {
-        for (auto i = 0; i < n; i++) {
+    for (size_t j = 0; j < ni; j++) {
+        for (size_t i = 0; i < n; i++) {
             work1[i] = y[i] - trend[i];
         }
         ss(work1, n, np, ns, isdeg, nsjump, userw, rw, work2, work3, work4, work5, season);
         fts(work2, n + 2 * np, np, work3, work1);
         ess(work3, n, nl, ildeg, nljump, false, work4, work1, work5);
-        for (auto i = 0; i < n; i++) {
+        for (size_t i = 0; i < n; i++) {
             season[i] = work2[np + i] - work1[i];
         }
-        for (auto i = 0; i < n; i++) {
+        for (size_t i = 0; i < n; i++) {
             work1[i] = y[i] - season[i];
         }
         ess(work1, n, nt, itdeg, ntjump, userw, rw, trend, work3);
@@ -269,6 +270,39 @@ void onestp(const float* y, size_t n, size_t np, size_t ns, size_t nt, size_t nl
 }
 void stl(const float* y, size_t n, size_t np, size_t ns, size_t nt, size_t nl, int isdeg, int itdeg, int ildeg, size_t nsjump, size_t ntjump, size_t nljump, size_t ni, size_t no, float* rw, float* season, float* trend) {
+    if (ns < 3) {
+        throw std::invalid_argument("seasonal_length must be at least 3");
+    }
+    if (nt < 3) {
+        throw std::invalid_argument("trend_length must be at least 3");
+    }
+    if (nl < 3) {
+        throw std::invalid_argument("low_pass_length must be at least 3");
+    }
+    if (np < 2) {
+        throw std::invalid_argument("period must be at least 2");
+    }
+    if (isdeg != 0 && isdeg != 1) {
+        throw std::invalid_argument("seasonal_degree must be 0 or 1");
+    }
+    if (itdeg != 0 && itdeg != 1) {
+        throw std::invalid_argument("trend_degree must be 0 or 1");
+    }
+    if (ildeg != 0 && ildeg != 1) {
+        throw std::invalid_argument("low_pass_degree must be 0 or 1");
+    }
+    if (ns % 2 != 1) {
+        throw std::invalid_argument("seasonal_length must be odd");
+    }
+    if (nt % 2 != 1) {
+        throw std::invalid_argument("trend_length must be odd");
+    }
+    if (nl % 2 != 1) {
+        throw std::invalid_argument("low_pass_length must be odd");
+    }
     auto work1 = std::vector<float>(n + 2 * np);
     auto work2 = std::vector<float>(n + 2 * np);
     auto work3 = std::vector<float>(n + 2 * np);
@@ -276,20 +310,7 @@ void stl(const float* y, size_t n, size_t np, size_t ns, size_t nt, size_t nl, i
     auto work5 = std::vector<float>(n + 2 * np);
     auto userw = false;
-    auto k = 0;
-    assert(ns >= 3);
-    assert(nt >= 3);
-    assert(nl >= 3);
-    assert(np >= 2);
-    assert(isdeg == 0 || isdeg == 1);
-    assert(itdeg == 0 || itdeg == 1);
-    assert(ildeg == 0 || ildeg == 1);
-    assert(ns % 2 == 1);
-    assert(nt % 2 == 1);
-    assert(nl % 2 == 1);
+    size_t k = 0;
     while (true) {
         onestp(y, n, np, ns, nt, nl, isdeg, itdeg, ildeg, nsjump, ntjump, nljump, ni, userw, rw, season, trend, work1.data(), work2.data(), work3.data(), work4.data(), work5.data());
@@ -297,7 +318,7 @@ void stl(const float* y, size_t n, size_t np, size_t ns, size_t nt, size_t nl, i
         if (k > no) {
             break;
         }
-        for (auto i = 0; i < n; i++) {
+        for (size_t i = 0; i < n; i++) {
             work1[i] = trend[i] + season[i];
         }
         rwts(y, n, work1.data(), rw);
@@ -305,18 +326,46 @@ void stl(const float* y, size_t n, size_t np, size_t ns, size_t nt, size_t nl, i
     }
     if (no <= 0) {
-        for (auto i = 0; i < n; i++) {
+        for (size_t i = 0; i < n; i++) {
             rw[i] = 1.0;
         }
     }
 }
+float var(const std::vector<float>& series) {
+    auto mean = std::accumulate(series.begin(), series.end(), 0.0) / series.size();
+    std::vector<float> tmp;
+    tmp.reserve(series.size());
+    for (auto v : series) {
+        tmp.push_back(pow(v - mean, 2));
+    }
+    return std::accumulate(tmp.begin(), tmp.end(), 0.0) / (series.size() - 1);
+}
 class StlResult {
 public:
     std::vector<float> seasonal;
     std::vector<float> trend;
     std::vector<float> remainder;
     std::vector<float> weights;
+    inline float seasonal_strength() {
+        std::vector<float> sr;
+        sr.reserve(remainder.size());
+        for (size_t i = 0; i < remainder.size(); i++) {
+            sr.push_back(seasonal[i] + remainder[i]);
+        }
+        return std::max(0.0, 1.0 - var(remainder) / var(sr));
+    }
+    inline float trend_strength() {
+        std::vector<float> tr;
+        tr.reserve(remainder.size());
+        for (size_t i = 0; i < remainder.size(); i++) {
+            tr.push_back(trend[i] + remainder[i]);
+        }
+        return std::max(0.0, 1.0 - var(remainder) / var(tr));
+    }
 };
 class StlParams {
@@ -337,62 +386,62 @@ public:
     inline StlParams seasonal_length(size_t ns) {
         this->ns_ = ns;
         return *this;
-    };
+    }
     inline StlParams trend_length(size_t nt) {
         this->nt_ = nt;
         return *this;
-    };
+    }
     inline StlParams low_pass_length(size_t nl) {
         this->nl_ = nl;
         return *this;
-    };
+    }
     inline StlParams seasonal_degree(int isdeg) {
         this->isdeg_ = isdeg;
         return *this;
-    };
+    }
     inline StlParams trend_degree(int itdeg) {
         this->itdeg_ = itdeg;
         return *this;
-    };
+    }
     inline StlParams low_pass_degree(int ildeg) {
         this->ildeg_ = ildeg;
         return *this;
-    };
+    }
     inline StlParams seasonal_jump(size_t nsjump) {
         this->nsjump_ = nsjump;
         return *this;
-    };
+    }
     inline StlParams trend_jump(size_t ntjump) {
         this->ntjump_ = ntjump;
         return *this;
-    };
+    }
     inline StlParams low_pass_jump(size_t nljump) {
         this->nljump_ = nljump;
         return *this;
-    };
+    }
     inline StlParams inner_loops(bool ni) {
         this->ni_ = ni;
         return *this;
-    };
+    }
     inline StlParams outer_loops(bool no) {
         this->no_ = no;
         return *this;
-    };
+    }
     inline StlParams robust(bool robust) {
         this->robust_ = robust;
         return *this;
-    };
+    }
     StlResult fit(const float* y, size_t n, size_t np);
     StlResult fit(const std::vector<float>& y, size_t np);
@@ -403,6 +452,10 @@ StlParams params() {
 }
 StlResult StlParams::fit(const float* y, size_t n, size_t np) {
+    if (n < 2 * np) {
+        throw std::invalid_argument("series has less than two periods");
+    }
     auto ns = this->ns_.value_or(np);
     auto isdeg = this->isdeg_;
@@ -444,7 +497,7 @@ StlResult StlParams::fit(const float* y, size_t n, size_t np) {
     stl(y, n, newnp, newns, nt, nl, isdeg, itdeg, ildeg, nsjump, ntjump, nljump, ni, no, res.weights.data(), res.seasonal.data(), res.trend.data());
     res.remainder.reserve(n);
-    for (auto i = 0; i < n; i++) {
+    for (size_t i = 0; i < n; i++) {
         res.remainder.push_back(y[i] - res.seasonal[i] - res.trend[i]);
     }

data/lib/anomaly_detection/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module AnomalyDetection
-  VERSION = "0.1.4"
+  VERSION = "0.2.0"
 end

data/lib/anomaly_detection.rb CHANGED Viewed

@@ -1,12 +1,19 @@
 # extensions
-require "anomaly_detection/ext"
+require_relative "anomaly_detection/ext"
 # modules
-require "anomaly_detection/version"
+require_relative "anomaly_detection/version"
 module AnomalyDetection
   class << self
     def detect(series, period:, max_anoms: 0.1, alpha: 0.05, direction: "both", plot: false, verbose: false)
+      if period == :auto
+        period = determine_period(series)
+        puts "Set period to #{period}" if verbose
+      elsif period.nil?
+        period = 1
+      end
       raise ArgumentError, "series must contain at least 2 periods" if series.size < period * 2
       if series.is_a?(Hash)
@@ -16,6 +23,9 @@ module AnomalyDetection
         x = series
       end
+      # flush Ruby output since std::endl flushes C++ output
+      $stdout.flush if verbose
       res = _detect(x, period, max_anoms, alpha, direction, verbose)
       res.map! { |i| sorted[i][0] } if series.is_a?(Hash)
       res
@@ -63,6 +73,51 @@ module AnomalyDetection
         .config(axis: {title: nil, labelFontSize: 12})
     end
+    # determine period based on time keys (experimental)
+    # in future, could use an approach that looks at values
+    # like https://stats.stackexchange.com/a/1214
+    def determine_period(series)
+      unless series.is_a?(Hash)
+        raise ArgumentError, "series must be a hash for :auto period"
+      end
+      times = series.keys.map(&:to_time)
+      second = times.all? { |t| t.nsec == 0 }
+      minute = second && times.all? { |t| t.sec == 0 }
+      hour = minute && times.all? { |t| t.min == 0 }
+      day = hour && times.all? { |t| t.hour == 0 }
+      week = day && times.map { |k| k.wday }.uniq.size == 1
+      month = day && times.all? { |k| k.day == 1 }
+      quarter = month && times.all? { |k| k.month % 3 == 1 }
+      year = quarter && times.all? { |k| k.month == 1 }
+      period =
+        if year
+          1
+        elsif quarter
+          4
+        elsif month
+          12
+        elsif week
+          52
+        elsif day
+          7
+        elsif hour
+          24 # or 24 * 7
+        elsif minute
+          60 # or 60 * 24
+        elsif second
+          60 # or 60 * 60
+        end
+      if series.size < period * 2
+        1
+      else
+        period
+      end
+    end
     private
     def iso8601(v)

data/licenses/NOTICE-AnomalyDetection-cpp.txt ADDED Viewed

@@ -0,0 +1,15 @@
+Copyright (C) 2015 Twitter, Inc and other contributors
+Copyright (C) 2022 Andrew Kane
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+You should have received a copy of the GNU General Public License
+along with this program.  If not, see <http://www.gnu.org/licenses/>.

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: anomaly_detection
 version: !ruby/object:Gem::Version
-  version: 0.1.4
+  version: 0.2.0
 platform: ruby
 authors:
 - Andrew Kane
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2022-03-19 00:00:00.000000000 Z
+date: 2023-02-01 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: rice
@@ -45,6 +45,7 @@ files:
 - licenses/LICENSE-AnomalyDetection-cpp.txt
 - licenses/LICENSE-MIT-dist-h.txt
 - licenses/LICENSE-MIT-stl-cpp.txt
+- licenses/NOTICE-AnomalyDetection-cpp.txt
 - licenses/UNLICENSE-dist-h.txt
 - licenses/UNLICENSE-stl-cpp.txt
 homepage: https://github.com/ankane/AnomalyDetection.rb
@@ -59,14 +60,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
   requirements:
   - - ">="
     - !ruby/object:Gem::Version
-      version: '2.6'
+      version: '2.7'
 required_rubygems_version: !ruby/object:Gem::Requirement
   requirements:
   - - ">="
     - !ruby/object:Gem::Version
       version: '0'
 requirements: []
-rubygems_version: 3.3.7
+rubygems_version: 3.4.1
 signing_key:
 specification_version: 4
 summary: Time series anomaly detection for Ruby