anomaly_detection 0.1.2 → 0.1.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 94e6edb7ef4ce6db5fdb7b01ab40322bc8daceb222c0388abe19704ef2ca7f99
4
- data.tar.gz: 41e4c55f5f42251a25fc337941ee85ba306df15ca925ebfe6d1d59129cc650cc
3
+ metadata.gz: cfb709e43863f4221a67e8f675f28b5361f6bf33a0d0f6fa4f52cdc0cad01796
4
+ data.tar.gz: 40965f08bb75cdb673d43e42c7fc47403fbfb1b082de10cab86bac569916d5b8
5
5
  SHA512:
6
- metadata.gz: a534a1903b14e7e3287b86b8da3936ee889f0e93690a55f5cbba5a384dfd06baab1ded6afb71acec6eb2461c0ac0d6645d28a5093ef33a4c28dce0319ef9ae75
7
- data.tar.gz: 90fcd64ce191e8aaff1a46384e003f8eeea7b6e9732357fdd7726066bf4e3ad1a8fe91043042716f57e931b05c048d984c60178c05e38d127d21216fe5a2ee2f
6
+ metadata.gz: '0496d044ecbe143be64164bd88092c5a5bd660f5fec6425e9f6ea0759f2ab7dbaa41a055c9d03a78c8e5a661b86bb25629c5e7a809614ba415d941f4d63fc9cb'
7
+ data.tar.gz: 8cc3f28c981d0be5cdb3dbfd054910d45469d258a662e163d7b01379af93e6714874d043ff1521ae19506f742b63eb93bf631f6f895c11842c4c15028c66a4b8
data/CHANGELOG.md CHANGED
@@ -1,3 +1,7 @@
1
+ ## 0.1.3 (2022-01-03)
2
+
3
+ - Switched to dist.h
4
+
1
5
  ## 0.1.2 (2021-10-20)
2
6
 
3
7
  - Added `plot` method
data/README.md CHANGED
@@ -58,7 +58,7 @@ AnomalyDetection.detect(
58
58
  )
59
59
  ```
60
60
 
61
- ## Plotting [unreleased]
61
+ ## Plotting
62
62
 
63
63
  Add [Vega](https://github.com/ankane/vega) to your application’s Gemfile:
64
64
 
@@ -74,7 +74,7 @@ AnomalyDetection.plot(series, anomalies)
74
74
 
75
75
  ## Credits
76
76
 
77
- This library was ported from the [AnomalyDetection](https://github.com/twitter/AnomalyDetection) R package and is available under the same license. It uses [cdflib](https://people.sc.fsu.edu/~jburkardt/cpp_src/cdflib/cdflib.html) for the quantile function.
77
+ This library was ported from the [AnomalyDetection](https://github.com/twitter/AnomalyDetection) R package and is available under the same license. It uses [stl-cpp](https://github.com/ankane/stl-cpp) for seasonal-trend decomposition and [dist.h](https://github.com/ankane/dist.h) for the quantile function.
78
78
 
79
79
  ## References
80
80
 
@@ -6,7 +6,7 @@
6
6
  #include <vector>
7
7
 
8
8
  #include "anomaly_detection.hpp"
9
- #include "cdflib.hpp"
9
+ #include "dist.h"
10
10
  #include "stl.hpp"
11
11
 
12
12
  namespace anomaly_detection {
@@ -25,21 +25,7 @@ float mad(const std::vector<float>& data, float med) {
25
25
  return 1.4826 * median(res);
26
26
  }
27
27
 
28
- float qt(double p, double df) {
29
- int which = 2;
30
- double q = 1 - p;
31
- double t;
32
- int status;
33
- double bound;
34
- cdft(&which, &p, &q, &t, &df, &status, &bound);
35
-
36
- if (status != 0) {
37
- throw std::invalid_argument("Bad status");
38
- }
39
- return t;
40
- }
41
-
42
- std::vector<size_t> detect_anoms(const std::vector<float>& data, int num_obs_per_period, float k, float alpha, bool one_tail, bool upper_tail, bool verbose, std::function<void()> interrupt) {
28
+ std::vector<size_t> detect_anoms(const std::vector<float>& data, int num_obs_per_period, float k, float alpha, bool one_tail, bool upper_tail, bool verbose, std::function<void()> check_for_interrupts) {
43
29
  auto n = data.size();
44
30
 
45
31
  // Check to make sure we have at least two periods worth of data for anomaly context
@@ -78,8 +64,7 @@ std::vector<size_t> detect_anoms(const std::vector<float>& data, int num_obs_per
78
64
 
79
65
  // Compute test statistic until r=max_outliers values have been removed from the sample
80
66
  for (auto i = 1; i <= max_outliers; i++) {
81
- // Check for interrupts
82
- interrupt();
67
+ check_for_interrupts();
83
68
 
84
69
  if (verbose) {
85
70
  std::cout << i << " / " << max_outliers << " completed" << std::endl;
@@ -129,7 +114,7 @@ std::vector<size_t> detect_anoms(const std::vector<float>& data, int num_obs_per
129
114
  p = 1.0 - alpha / (2.0 * (n - i + 1));
130
115
  }
131
116
 
132
- auto t = qt(p, n - i - 1);
117
+ auto t = students_t_ppf(p, n - i - 1);
133
118
  auto lam = t * (n - i) / sqrt(((n - i - 1) + powf(t, 2.0)) * (n - i + 1));
134
119
 
135
120
  if (r > lam) {
@@ -145,10 +130,10 @@ std::vector<size_t> detect_anoms(const std::vector<float>& data, int num_obs_per
145
130
  return anomalies;
146
131
  }
147
132
 
148
- std::vector<size_t> anomalies(const std::vector<float>& x, int period, float k, float alpha, Direction direction, bool verbose, std::function<void()> interrupt) {
133
+ std::vector<size_t> anomalies(const std::vector<float>& x, int period, float k, float alpha, Direction direction, bool verbose, std::function<void()> check_for_interrupts) {
149
134
  bool one_tail = direction != Direction::Both;
150
135
  bool upper_tail = direction == Direction::Positive;
151
- return detect_anoms(x, period, k, alpha, one_tail, upper_tail, verbose, interrupt);
136
+ return detect_anoms(x, period, k, alpha, one_tail, upper_tail, verbose, check_for_interrupts);
152
137
  }
153
138
 
154
139
  }
@@ -0,0 +1,180 @@
1
+ /*!
2
+ * dist.h v0.1.0
3
+ * https://github.com/ankane/dist.h
4
+ * Unlicense OR MIT License
5
+ */
6
+
7
+ #pragma once
8
+
9
+ #define _USE_MATH_DEFINES
10
+
11
+ #include <assert.h>
12
+ #include <math.h>
13
+
14
+ // Winitzki, S. (2008).
15
+ // A handy approximation for the error function and its inverse.
16
+ // https://drive.google.com/file/d/0B2Mt7luZYBrwZlctV3A3eF82VGM/view?resourcekey=0-UQpPhwZgzP0sF4LHBDlLtg
17
+ // from https://sites.google.com/site/winitzki
18
+ double erf(double x) {
19
+ double sign = x < 0 ? -1.0 : 1.0;
20
+ x = x < 0 ? -x : x;
21
+
22
+ double a = 0.14;
23
+ double x2 = x * x;
24
+ return sign * sqrt(1.0 - exp(-x2 * (4.0 / M_PI + a * x2) / (1.0 + a * x2)));
25
+ }
26
+
27
+ // Winitzki, S. (2008).
28
+ // A handy approximation for the error function and its inverse.
29
+ // https://drive.google.com/file/d/0B2Mt7luZYBrwZlctV3A3eF82VGM/view?resourcekey=0-UQpPhwZgzP0sF4LHBDlLtg
30
+ // from https://sites.google.com/site/winitzki
31
+ double inverse_erf(double x) {
32
+ double sign = x < 0 ? -1.0 : 1.0;
33
+ x = x < 0 ? -x : x;
34
+
35
+ double a = 0.147;
36
+ double ln = log(1.0 - x * x);
37
+ double f1 = 2.0 / (M_PI * a);
38
+ double f2 = ln / 2.0;
39
+ double f3 = f1 + f2;
40
+ double f4 = 1.0 / a * ln;
41
+ return sign * sqrt(-f1 - f2 + sqrt(f3 * f3 - f4));
42
+ }
43
+
44
+ double normal_pdf(double x, double mean, double std_dev) {
45
+ double var = std_dev * std_dev;
46
+ return (1.0 / (var * sqrt(2.0 * M_PI))) * pow(M_E, -0.5 * pow((x - mean) / var, 2));
47
+ }
48
+
49
+ double normal_cdf(double x, double mean, double std_dev) {
50
+ return 0.5 * (1.0 + erf((x - mean) / (std_dev * std_dev * sqrt(2))));
51
+ }
52
+
53
+ double normal_ppf(double p, double mean, double std_dev) {
54
+ assert(p >= 0 && p <= 1);
55
+
56
+ return mean + (std_dev * std_dev) * sqrt(2) * inverse_erf(2.0 * p - 1.0);
57
+ }
58
+
59
+ double students_t_pdf(double x, unsigned int n) {
60
+ assert(n >= 1);
61
+
62
+ return tgamma((n + 1.0) / 2.0) / (sqrt(n * M_PI) * tgamma(n / 2.0)) * pow(1.0 + x * x / n, -(n + 1.0) / 2.0);
63
+ }
64
+
65
+ // Hill, G. W. (1970).
66
+ // Algorithm 395: Student's t-distribution.
67
+ // Communications of the ACM, 13(10), 617-619.
68
+ double students_t_cdf(double x, unsigned int n) {
69
+ assert(n >= 1);
70
+
71
+ double start = x < 0 ? 0 : 1;
72
+ double sign = x < 0 ? 1 : -1;
73
+
74
+ double z = 1.0;
75
+ double t = x * x;
76
+ double y = t / n;
77
+ double b = 1.0 + y;
78
+
79
+ if ((n >= 20 && t < n) || n > 200) {
80
+ // asymptotic series for large or noninteger n
81
+ if (y > 10e-6) {
82
+ y = log(b);
83
+ }
84
+ double a = n - 0.5;
85
+ b = 48.0 * a * a;
86
+ y = a * y;
87
+ y = (((((-0.4 * y - 3.3) * y - 24.0) * y - 85.5) / (0.8 * y * y + 100.0 + b) + y + 3.0) / b + 1.0) * sqrt(y);
88
+ return start + sign * normal_cdf(-y, 0.0, 1.0);
89
+ }
90
+
91
+ if (n < 20 && t < 4.0) {
92
+ // nested summation of cosine series
93
+ y = sqrt(y);
94
+ double a = y;
95
+ if (n == 1) {
96
+ a = 0.0;
97
+ }
98
+
99
+ // loop
100
+ if (n > 1) {
101
+ n -= 2;
102
+ while (n > 1) {
103
+ a = (n - 1) / (b * n) * a + y;
104
+ n -= 2;
105
+ }
106
+ }
107
+ a = n == 0 ? a / sqrt(b) : (atan(y) + a / b) * (2.0 / M_PI);
108
+ return start + sign * (z - a) / 2;
109
+ }
110
+
111
+ // tail series expanation for large t-values
112
+ double a = sqrt(b);
113
+ y = a * n;
114
+ int j = 0;
115
+ while (a != z) {
116
+ j += 2;
117
+ z = a;
118
+ y = y * (j - 1) / (b * j);
119
+ a = a + y / (n + j);
120
+ }
121
+ z = 0.0;
122
+ y = 0.0;
123
+ a = -a;
124
+
125
+ // loop (without n + 2 and n - 2)
126
+ while (n > 1) {
127
+ a = (n - 1) / (b * n) * a + y;
128
+ n -= 2;
129
+ }
130
+ a = n == 0 ? a / sqrt(b) : (atan(y) + a / b) * (2.0 / M_PI);
131
+ return start + sign * (z - a) / 2;
132
+ }
133
+
134
+ // Hill, G. W. (1970).
135
+ // Algorithm 396: Student's t-quantiles.
136
+ // Communications of the ACM, 13(10), 619-620.
137
+ double students_t_ppf(double p, unsigned int n) {
138
+ assert(p >= 0 && p <= 1);
139
+ assert(n >= 1);
140
+
141
+ // distribution is symmetric
142
+ double sign = p < 0.5 ? -1 : 1;
143
+ p = p < 0.5 ? 1 - p : p;
144
+
145
+ // two-tail to one-tail
146
+ p = 2.0 * (1.0 - p);
147
+
148
+ if (n == 2) {
149
+ return sign * sqrt(2.0 / (p * (2.0 - p)) - 2.0);
150
+ }
151
+
152
+ double half_pi = M_PI / 2.0;
153
+
154
+ if (n == 1) {
155
+ p = p * half_pi;
156
+ return sign * cos(p) / sin(p);
157
+ }
158
+
159
+ double a = 1.0 / (n - 0.5);
160
+ double b = 48.0 / (a * a);
161
+ double c = ((20700.0 * a / b - 98.0) * a - 16.0) * a + 96.36;
162
+ double d = ((94.5 / (b + c) - 3.0) / b + 1.0) * sqrt(a * half_pi) * n;
163
+ double x = d * p;
164
+ double y = pow(x, 2.0 / n);
165
+ if (y > 0.05 + a) {
166
+ // asymptotic inverse expansion about normal
167
+ x = normal_ppf(p * 0.5, 0.0, 1.0);
168
+ y = x * x;
169
+ if (n < 5) {
170
+ c += 0.3 * (n - 4.5) * (x + 0.6);
171
+ }
172
+ c = (((0.05 * d * x - 5.0) * x - 7.0) * x - 2.0) * x + b + c;
173
+ y = (((((0.4 * y + 6.3) * y + 36.0) * y + 94.5) / c - y - 3.0) / b + 1.0) * x;
174
+ y = a * y * y;
175
+ y = y > 0.002 ? exp(y) - 1.0 : 0.5 * y * y + y;
176
+ } else {
177
+ y = ((1.0 / (((n + 6.0) / (n * y) - 0.089 * d - 0.822) * (n + 2.0) * 3.0) + 0.5 / (n + 4.0)) * y - 1.0) * (n + 1.0) / (n + 2.0) + 1.0 / y;
178
+ }
179
+ return sign * sqrt(n * y);
180
+ }
@@ -1,3 +1,3 @@
1
1
  module AnomalyDetection
2
- VERSION = "0.1.2"
2
+ VERSION = "0.1.3"
3
3
  end
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2021 Contributors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
@@ -0,0 +1,24 @@
1
+ This is free and unencumbered software released into the public domain.
2
+
3
+ Anyone is free to copy, modify, publish, use, compile, sell, or
4
+ distribute this software, either in source code form or as a compiled
5
+ binary, for any purpose, commercial or non-commercial, and by any
6
+ means.
7
+
8
+ In jurisdictions that recognize copyright laws, the author or authors
9
+ of this software dedicate any and all copyright interest in the
10
+ software to the public domain. We make this dedication for the benefit
11
+ of the public at large and to the detriment of our heirs and
12
+ successors. We intend this dedication to be an overt act of
13
+ relinquishment in perpetuity of all present and future rights to this
14
+ software under copyright law.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19
+ IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20
+ OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21
+ ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22
+ OTHER DEALINGS IN THE SOFTWARE.
23
+
24
+ For more information, please refer to <http://unlicense.org/>
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: anomaly_detection
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.1.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-10-20 00:00:00.000000000 Z
11
+ date: 2022-01-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rice
@@ -37,15 +37,15 @@ files:
37
37
  - README.md
38
38
  - ext/anomaly_detection/anomaly_detection.cpp
39
39
  - ext/anomaly_detection/anomaly_detection.hpp
40
- - ext/anomaly_detection/cdflib.cpp
41
- - ext/anomaly_detection/cdflib.hpp
40
+ - ext/anomaly_detection/dist.h
42
41
  - ext/anomaly_detection/ext.cpp
43
42
  - ext/anomaly_detection/extconf.rb
44
43
  - ext/anomaly_detection/stl.hpp
45
44
  - lib/anomaly_detection.rb
46
45
  - lib/anomaly_detection/version.rb
46
+ - licenses/LICENSE-MIT-dist-h.txt
47
47
  - licenses/LICENSE-MIT-stl-cpp.txt
48
- - licenses/LICENSE-cdflib.txt
48
+ - licenses/UNLICENSE-dist-h.txt
49
49
  - licenses/UNLICENSE-stl-cpp.txt
50
50
  homepage: https://github.com/ankane/AnomalyDetection.rb
51
51
  licenses:
@@ -66,7 +66,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
66
66
  - !ruby/object:Gem::Version
67
67
  version: '0'
68
68
  requirements: []
69
- rubygems_version: 3.2.22
69
+ rubygems_version: 3.2.32
70
70
  signing_key:
71
71
  specification_version: 4
72
72
  summary: Time series anomaly detection for Ruby