RubyGems - anomaly_detection - Versions diffs - 0.1.2 → 0.1.3 - Mend

anomaly_detection 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +4 -0
data/README.md +2 -2
data/ext/anomaly_detection/anomaly_detection.cpp +6 -21
data/ext/anomaly_detection/dist.h +180 -0
data/lib/anomaly_detection/version.rb +1 -1
data/licenses/LICENSE-MIT-dist-h.txt +21 -0
data/licenses/UNLICENSE-dist-h.txt +24 -0
metadata +6 -6
data/ext/anomaly_detection/cdflib.cpp +0 -12126
data/ext/anomaly_detection/cdflib.hpp +0 -123
data/licenses/LICENSE-cdflib.txt +0 -165

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 94e6edb7ef4ce6db5fdb7b01ab40322bc8daceb222c0388abe19704ef2ca7f99
-  data.tar.gz: 41e4c55f5f42251a25fc337941ee85ba306df15ca925ebfe6d1d59129cc650cc
+  metadata.gz: cfb709e43863f4221a67e8f675f28b5361f6bf33a0d0f6fa4f52cdc0cad01796
+  data.tar.gz: 40965f08bb75cdb673d43e42c7fc47403fbfb1b082de10cab86bac569916d5b8
 SHA512:
-  metadata.gz: a534a1903b14e7e3287b86b8da3936ee889f0e93690a55f5cbba5a384dfd06baab1ded6afb71acec6eb2461c0ac0d6645d28a5093ef33a4c28dce0319ef9ae75
-  data.tar.gz: 90fcd64ce191e8aaff1a46384e003f8eeea7b6e9732357fdd7726066bf4e3ad1a8fe91043042716f57e931b05c048d984c60178c05e38d127d21216fe5a2ee2f
+  metadata.gz: '0496d044ecbe143be64164bd88092c5a5bd660f5fec6425e9f6ea0759f2ab7dbaa41a055c9d03a78c8e5a661b86bb25629c5e7a809614ba415d941f4d63fc9cb'
+  data.tar.gz: 8cc3f28c981d0be5cdb3dbfd054910d45469d258a662e163d7b01379af93e6714874d043ff1521ae19506f742b63eb93bf631f6f895c11842c4c15028c66a4b8

data/CHANGELOG.md CHANGED Viewed

@@ -1,3 +1,7 @@
+## 0.1.3 (2022-01-03)
+- Switched to dist.h
 ## 0.1.2 (2021-10-20)
 - Added `plot` method

data/README.md CHANGED Viewed

@@ -58,7 +58,7 @@ AnomalyDetection.detect(
 )
 ```
-## Plotting [unreleased]
+## Plotting
 Add [Vega](https://github.com/ankane/vega) to your application’s Gemfile:
@@ -74,7 +74,7 @@ AnomalyDetection.plot(series, anomalies)
 ## Credits
-This library was ported from the [AnomalyDetection](https://github.com/twitter/AnomalyDetection) R package and is available under the same license. It uses [cdflib](https://people.sc.fsu.edu/~jburkardt/cpp_src/cdflib/cdflib.html) for the quantile function.
+This library was ported from the [AnomalyDetection](https://github.com/twitter/AnomalyDetection) R package and is available under the same license. It uses [stl-cpp](https://github.com/ankane/stl-cpp) for seasonal-trend decomposition and [dist.h](https://github.com/ankane/dist.h) for the quantile function.
 ## References

data/ext/anomaly_detection/anomaly_detection.cpp CHANGED Viewed

@@ -6,7 +6,7 @@
 #include <vector>
 #include "anomaly_detection.hpp"
-#include "cdflib.hpp"
+#include "dist.h"
 #include "stl.hpp"
 namespace anomaly_detection {
@@ -25,21 +25,7 @@ float mad(const std::vector<float>& data, float med) {
     return 1.4826 * median(res);
 }
-float qt(double p, double df) {
-    int which = 2;
-    double q = 1 - p;
-    double t;
-    int status;
-    double bound;
-    cdft(&which, &p, &q, &t, &df, &status, &bound);
-    if (status != 0) {
-        throw std::invalid_argument("Bad status");
-    }
-    return t;
-}
-std::vector<size_t> detect_anoms(const std::vector<float>& data, int num_obs_per_period, float k, float alpha, bool one_tail, bool upper_tail, bool verbose, std::function<void()> interrupt) {
+std::vector<size_t> detect_anoms(const std::vector<float>& data, int num_obs_per_period, float k, float alpha, bool one_tail, bool upper_tail, bool verbose, std::function<void()> check_for_interrupts) {
     auto n = data.size();
     // Check to make sure we have at least two periods worth of data for anomaly context
@@ -78,8 +64,7 @@ std::vector<size_t> detect_anoms(const std::vector<float>& data, int num_obs_per
     // Compute test statistic until r=max_outliers values have been removed from the sample
     for (auto i = 1; i <= max_outliers; i++) {
-        // Check for interrupts
-        interrupt();
+        check_for_interrupts();
         if (verbose) {
             std::cout << i << " / " << max_outliers << " completed" << std::endl;
@@ -129,7 +114,7 @@ std::vector<size_t> detect_anoms(const std::vector<float>& data, int num_obs_per
             p = 1.0 - alpha / (2.0 * (n - i + 1));
         }
-        auto t = qt(p, n - i - 1);
+        auto t = students_t_ppf(p, n - i - 1);
         auto lam = t * (n - i) / sqrt(((n - i - 1) + powf(t, 2.0)) * (n - i + 1));
         if (r > lam) {
@@ -145,10 +130,10 @@ std::vector<size_t> detect_anoms(const std::vector<float>& data, int num_obs_per
     return anomalies;
 }
-std::vector<size_t> anomalies(const std::vector<float>& x, int period, float k, float alpha, Direction direction, bool verbose, std::function<void()> interrupt) {
+std::vector<size_t> anomalies(const std::vector<float>& x, int period, float k, float alpha, Direction direction, bool verbose, std::function<void()> check_for_interrupts) {
     bool one_tail = direction != Direction::Both;
     bool upper_tail = direction == Direction::Positive;
-    return detect_anoms(x, period, k, alpha, one_tail, upper_tail, verbose, interrupt);
+    return detect_anoms(x, period, k, alpha, one_tail, upper_tail, verbose, check_for_interrupts);
 }
 }

data/ext/anomaly_detection/dist.h ADDED Viewed

@@ -0,0 +1,180 @@
+/*!
+ * dist.h v0.1.0
+ * https://github.com/ankane/dist.h
+ * Unlicense OR MIT License
+ */
+#pragma once
+#define _USE_MATH_DEFINES
+#include <assert.h>
+#include <math.h>
+// Winitzki, S. (2008).
+// A handy approximation for the error function and its inverse.
+// https://drive.google.com/file/d/0B2Mt7luZYBrwZlctV3A3eF82VGM/view?resourcekey=0-UQpPhwZgzP0sF4LHBDlLtg
+// from https://sites.google.com/site/winitzki
+double erf(double x) {
+    double sign = x < 0 ? -1.0 : 1.0;
+    x = x < 0 ? -x : x;
+    double a = 0.14;
+    double x2 = x * x;
+    return sign * sqrt(1.0 - exp(-x2 * (4.0 / M_PI + a * x2) / (1.0 + a * x2)));
+}
+// Winitzki, S. (2008).
+// A handy approximation for the error function and its inverse.
+// https://drive.google.com/file/d/0B2Mt7luZYBrwZlctV3A3eF82VGM/view?resourcekey=0-UQpPhwZgzP0sF4LHBDlLtg
+// from https://sites.google.com/site/winitzki
+double inverse_erf(double x) {
+    double sign = x < 0 ? -1.0 : 1.0;
+    x = x < 0 ? -x : x;
+    double a = 0.147;
+    double ln = log(1.0 - x * x);
+    double f1 = 2.0 / (M_PI * a);
+    double f2 = ln / 2.0;
+    double f3 = f1 + f2;
+    double f4 = 1.0 / a * ln;
+    return sign * sqrt(-f1 - f2 + sqrt(f3 * f3 - f4));
+}
+double normal_pdf(double x, double mean, double std_dev) {
+    double var = std_dev * std_dev;
+    return (1.0 / (var * sqrt(2.0 * M_PI))) * pow(M_E, -0.5 * pow((x - mean) / var, 2));
+}
+double normal_cdf(double x, double mean, double std_dev) {
+    return 0.5 * (1.0 + erf((x - mean) / (std_dev * std_dev * sqrt(2))));
+}
+double normal_ppf(double p, double mean, double std_dev) {
+    assert(p >= 0 && p <= 1);
+    return mean + (std_dev * std_dev) * sqrt(2) * inverse_erf(2.0 * p - 1.0);
+}
+double students_t_pdf(double x, unsigned int n) {
+    assert(n >= 1);
+    return tgamma((n + 1.0) / 2.0) / (sqrt(n * M_PI) * tgamma(n / 2.0)) * pow(1.0 + x * x / n, -(n + 1.0) / 2.0);
+}
+// Hill, G. W. (1970).
+// Algorithm 395: Student's t-distribution.
+// Communications of the ACM, 13(10), 617-619.
+double students_t_cdf(double x, unsigned int n) {
+    assert(n >= 1);
+    double start = x < 0 ? 0 : 1;
+    double sign = x < 0 ? 1 : -1;
+    double z = 1.0;
+    double t = x * x;
+    double y = t / n;
+    double b = 1.0 + y;
+    if ((n >= 20 && t < n) || n > 200) {
+        // asymptotic series for large or noninteger n
+        if (y > 10e-6) {
+            y = log(b);
+        }
+        double a = n - 0.5;
+        b = 48.0 * a * a;
+        y = a * y;
+        y = (((((-0.4 * y - 3.3) * y - 24.0) * y - 85.5) / (0.8 * y * y + 100.0 + b) + y + 3.0) / b + 1.0) * sqrt(y);
+        return start + sign * normal_cdf(-y, 0.0, 1.0);
+    }
+    if (n < 20 && t < 4.0) {
+        // nested summation of cosine series
+        y = sqrt(y);
+        double a = y;
+        if (n == 1) {
+            a = 0.0;
+        }
+        // loop
+        if (n > 1) {
+            n -= 2;
+            while (n > 1) {
+                a = (n - 1) / (b * n) * a + y;
+                n -= 2;
+            }
+        }
+        a = n == 0 ? a / sqrt(b) : (atan(y) + a / b) * (2.0 / M_PI);
+        return start + sign * (z - a) / 2;
+    }
+    // tail series expanation for large t-values
+    double a = sqrt(b);
+    y = a * n;
+    int j = 0;
+    while (a != z) {
+        j += 2;
+        z = a;
+        y = y * (j - 1) / (b * j);
+        a = a + y / (n + j);
+    }
+    z = 0.0;
+    y = 0.0;
+    a = -a;
+    // loop (without n + 2 and n - 2)
+    while (n > 1) {
+        a = (n - 1) / (b * n) * a + y;
+        n -= 2;
+    }
+    a = n == 0 ? a / sqrt(b) : (atan(y) + a / b) * (2.0 / M_PI);
+    return start + sign * (z - a) / 2;
+}
+// Hill, G. W. (1970).
+// Algorithm 396: Student's t-quantiles.
+// Communications of the ACM, 13(10), 619-620.
+double students_t_ppf(double p, unsigned int n) {
+    assert(p >= 0 && p <= 1);
+    assert(n >= 1);
+    // distribution is symmetric
+    double sign = p < 0.5 ? -1 : 1;
+    p = p < 0.5 ? 1 - p : p;
+    // two-tail to one-tail
+    p = 2.0 * (1.0 - p);
+    if (n == 2) {
+        return sign * sqrt(2.0 / (p * (2.0 - p)) - 2.0);
+    }
+    double half_pi = M_PI / 2.0;
+    if (n == 1) {
+        p = p * half_pi;
+        return sign * cos(p) / sin(p);
+    }
+    double a = 1.0 / (n - 0.5);
+    double b = 48.0 / (a * a);
+    double c = ((20700.0 * a / b - 98.0) * a - 16.0) * a + 96.36;
+    double d = ((94.5 / (b + c) - 3.0) / b + 1.0) * sqrt(a * half_pi) * n;
+    double x = d * p;
+    double y = pow(x, 2.0 / n);
+    if (y > 0.05 + a) {
+        // asymptotic inverse expansion about normal
+        x = normal_ppf(p * 0.5, 0.0, 1.0);
+        y = x * x;
+        if (n < 5) {
+            c += 0.3 * (n - 4.5) * (x + 0.6);
+        }
+        c = (((0.05 * d * x - 5.0) * x - 7.0) * x - 2.0) * x + b + c;
+        y = (((((0.4 * y + 6.3) * y + 36.0) * y + 94.5) / c - y - 3.0) / b + 1.0) * x;
+        y = a * y * y;
+        y = y > 0.002 ? exp(y) - 1.0 : 0.5 * y * y + y;
+    } else {
+        y = ((1.0 / (((n + 6.0) / (n * y) - 0.089 * d - 0.822) * (n + 2.0) * 3.0) + 0.5 / (n + 4.0)) * y - 1.0) * (n + 1.0) / (n + 2.0) + 1.0 / y;
+    }
+    return sign * sqrt(n * y);
+}

data/lib/anomaly_detection/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module AnomalyDetection
-  VERSION = "0.1.2"
+  VERSION = "0.1.3"
 end

data/licenses/LICENSE-MIT-dist-h.txt ADDED Viewed

@@ -0,0 +1,21 @@
+The MIT License (MIT)
+Copyright (c) 2021 Contributors
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.

data/licenses/UNLICENSE-dist-h.txt ADDED Viewed

@@ -0,0 +1,24 @@
+This is free and unencumbered software released into the public domain.
+Anyone is free to copy, modify, publish, use, compile, sell, or
+distribute this software, either in source code form or as a compiled
+binary, for any purpose, commercial or non-commercial, and by any
+means.
+In jurisdictions that recognize copyright laws, the author or authors
+of this software dedicate any and all copyright interest in the
+software to the public domain. We make this dedication for the benefit
+of the public at large and to the detriment of our heirs and
+successors. We intend this dedication to be an overt act of
+relinquishment in perpetuity of all present and future rights to this
+software under copyright law.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+OTHER DEALINGS IN THE SOFTWARE.
+For more information, please refer to <http://unlicense.org/>

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: anomaly_detection
 version: !ruby/object:Gem::Version
-  version: 0.1.2
+  version: 0.1.3
 platform: ruby
 authors:
 - Andrew Kane
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2021-10-20 00:00:00.000000000 Z
+date: 2022-01-04 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: rice
@@ -37,15 +37,15 @@ files:
 - README.md
 - ext/anomaly_detection/anomaly_detection.cpp
 - ext/anomaly_detection/anomaly_detection.hpp
-- ext/anomaly_detection/cdflib.cpp
-- ext/anomaly_detection/cdflib.hpp
+- ext/anomaly_detection/dist.h
 - ext/anomaly_detection/ext.cpp
 - ext/anomaly_detection/extconf.rb
 - ext/anomaly_detection/stl.hpp
 - lib/anomaly_detection.rb
 - lib/anomaly_detection/version.rb
+- licenses/LICENSE-MIT-dist-h.txt
 - licenses/LICENSE-MIT-stl-cpp.txt
-- licenses/LICENSE-cdflib.txt
+- licenses/UNLICENSE-dist-h.txt
 - licenses/UNLICENSE-stl-cpp.txt
 homepage: https://github.com/ankane/AnomalyDetection.rb
 licenses:
@@ -66,7 +66,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
     - !ruby/object:Gem::Version
       version: '0'
 requirements: []
-rubygems_version: 3.2.22
+rubygems_version: 3.2.32
 signing_key:
 specification_version: 4
 summary: Time series anomaly detection for Ruby