anomaly_detection 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/README.md +2 -2
- data/ext/anomaly_detection/anomaly_detection.cpp +6 -21
- data/ext/anomaly_detection/dist.h +180 -0
- data/lib/anomaly_detection/version.rb +1 -1
- data/licenses/LICENSE-MIT-dist-h.txt +21 -0
- data/licenses/UNLICENSE-dist-h.txt +24 -0
- metadata +6 -6
- data/ext/anomaly_detection/cdflib.cpp +0 -12126
- data/ext/anomaly_detection/cdflib.hpp +0 -123
- data/licenses/LICENSE-cdflib.txt +0 -165
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: cfb709e43863f4221a67e8f675f28b5361f6bf33a0d0f6fa4f52cdc0cad01796
|
4
|
+
data.tar.gz: 40965f08bb75cdb673d43e42c7fc47403fbfb1b082de10cab86bac569916d5b8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: '0496d044ecbe143be64164bd88092c5a5bd660f5fec6425e9f6ea0759f2ab7dbaa41a055c9d03a78c8e5a661b86bb25629c5e7a809614ba415d941f4d63fc9cb'
|
7
|
+
data.tar.gz: 8cc3f28c981d0be5cdb3dbfd054910d45469d258a662e163d7b01379af93e6714874d043ff1521ae19506f742b63eb93bf631f6f895c11842c4c15028c66a4b8
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -58,7 +58,7 @@ AnomalyDetection.detect(
|
|
58
58
|
)
|
59
59
|
```
|
60
60
|
|
61
|
-
## Plotting
|
61
|
+
## Plotting
|
62
62
|
|
63
63
|
Add [Vega](https://github.com/ankane/vega) to your application’s Gemfile:
|
64
64
|
|
@@ -74,7 +74,7 @@ AnomalyDetection.plot(series, anomalies)
|
|
74
74
|
|
75
75
|
## Credits
|
76
76
|
|
77
|
-
This library was ported from the [AnomalyDetection](https://github.com/twitter/AnomalyDetection) R package and is available under the same license. It uses [
|
77
|
+
This library was ported from the [AnomalyDetection](https://github.com/twitter/AnomalyDetection) R package and is available under the same license. It uses [stl-cpp](https://github.com/ankane/stl-cpp) for seasonal-trend decomposition and [dist.h](https://github.com/ankane/dist.h) for the quantile function.
|
78
78
|
|
79
79
|
## References
|
80
80
|
|
@@ -6,7 +6,7 @@
|
|
6
6
|
#include <vector>
|
7
7
|
|
8
8
|
#include "anomaly_detection.hpp"
|
9
|
-
#include "
|
9
|
+
#include "dist.h"
|
10
10
|
#include "stl.hpp"
|
11
11
|
|
12
12
|
namespace anomaly_detection {
|
@@ -25,21 +25,7 @@ float mad(const std::vector<float>& data, float med) {
|
|
25
25
|
return 1.4826 * median(res);
|
26
26
|
}
|
27
27
|
|
28
|
-
|
29
|
-
int which = 2;
|
30
|
-
double q = 1 - p;
|
31
|
-
double t;
|
32
|
-
int status;
|
33
|
-
double bound;
|
34
|
-
cdft(&which, &p, &q, &t, &df, &status, &bound);
|
35
|
-
|
36
|
-
if (status != 0) {
|
37
|
-
throw std::invalid_argument("Bad status");
|
38
|
-
}
|
39
|
-
return t;
|
40
|
-
}
|
41
|
-
|
42
|
-
std::vector<size_t> detect_anoms(const std::vector<float>& data, int num_obs_per_period, float k, float alpha, bool one_tail, bool upper_tail, bool verbose, std::function<void()> interrupt) {
|
28
|
+
std::vector<size_t> detect_anoms(const std::vector<float>& data, int num_obs_per_period, float k, float alpha, bool one_tail, bool upper_tail, bool verbose, std::function<void()> check_for_interrupts) {
|
43
29
|
auto n = data.size();
|
44
30
|
|
45
31
|
// Check to make sure we have at least two periods worth of data for anomaly context
|
@@ -78,8 +64,7 @@ std::vector<size_t> detect_anoms(const std::vector<float>& data, int num_obs_per
|
|
78
64
|
|
79
65
|
// Compute test statistic until r=max_outliers values have been removed from the sample
|
80
66
|
for (auto i = 1; i <= max_outliers; i++) {
|
81
|
-
|
82
|
-
interrupt();
|
67
|
+
check_for_interrupts();
|
83
68
|
|
84
69
|
if (verbose) {
|
85
70
|
std::cout << i << " / " << max_outliers << " completed" << std::endl;
|
@@ -129,7 +114,7 @@ std::vector<size_t> detect_anoms(const std::vector<float>& data, int num_obs_per
|
|
129
114
|
p = 1.0 - alpha / (2.0 * (n - i + 1));
|
130
115
|
}
|
131
116
|
|
132
|
-
auto t =
|
117
|
+
auto t = students_t_ppf(p, n - i - 1);
|
133
118
|
auto lam = t * (n - i) / sqrt(((n - i - 1) + powf(t, 2.0)) * (n - i + 1));
|
134
119
|
|
135
120
|
if (r > lam) {
|
@@ -145,10 +130,10 @@ std::vector<size_t> detect_anoms(const std::vector<float>& data, int num_obs_per
|
|
145
130
|
return anomalies;
|
146
131
|
}
|
147
132
|
|
148
|
-
std::vector<size_t> anomalies(const std::vector<float>& x, int period, float k, float alpha, Direction direction, bool verbose, std::function<void()>
|
133
|
+
std::vector<size_t> anomalies(const std::vector<float>& x, int period, float k, float alpha, Direction direction, bool verbose, std::function<void()> check_for_interrupts) {
|
149
134
|
bool one_tail = direction != Direction::Both;
|
150
135
|
bool upper_tail = direction == Direction::Positive;
|
151
|
-
return detect_anoms(x, period, k, alpha, one_tail, upper_tail, verbose,
|
136
|
+
return detect_anoms(x, period, k, alpha, one_tail, upper_tail, verbose, check_for_interrupts);
|
152
137
|
}
|
153
138
|
|
154
139
|
}
|
@@ -0,0 +1,180 @@
|
|
1
|
+
/*!
|
2
|
+
* dist.h v0.1.0
|
3
|
+
* https://github.com/ankane/dist.h
|
4
|
+
* Unlicense OR MIT License
|
5
|
+
*/
|
6
|
+
|
7
|
+
#pragma once
|
8
|
+
|
9
|
+
#define _USE_MATH_DEFINES
|
10
|
+
|
11
|
+
#include <assert.h>
|
12
|
+
#include <math.h>
|
13
|
+
|
14
|
+
// Winitzki, S. (2008).
|
15
|
+
// A handy approximation for the error function and its inverse.
|
16
|
+
// https://drive.google.com/file/d/0B2Mt7luZYBrwZlctV3A3eF82VGM/view?resourcekey=0-UQpPhwZgzP0sF4LHBDlLtg
|
17
|
+
// from https://sites.google.com/site/winitzki
|
18
|
+
double erf(double x) {
|
19
|
+
double sign = x < 0 ? -1.0 : 1.0;
|
20
|
+
x = x < 0 ? -x : x;
|
21
|
+
|
22
|
+
double a = 0.14;
|
23
|
+
double x2 = x * x;
|
24
|
+
return sign * sqrt(1.0 - exp(-x2 * (4.0 / M_PI + a * x2) / (1.0 + a * x2)));
|
25
|
+
}
|
26
|
+
|
27
|
+
// Winitzki, S. (2008).
|
28
|
+
// A handy approximation for the error function and its inverse.
|
29
|
+
// https://drive.google.com/file/d/0B2Mt7luZYBrwZlctV3A3eF82VGM/view?resourcekey=0-UQpPhwZgzP0sF4LHBDlLtg
|
30
|
+
// from https://sites.google.com/site/winitzki
|
31
|
+
double inverse_erf(double x) {
|
32
|
+
double sign = x < 0 ? -1.0 : 1.0;
|
33
|
+
x = x < 0 ? -x : x;
|
34
|
+
|
35
|
+
double a = 0.147;
|
36
|
+
double ln = log(1.0 - x * x);
|
37
|
+
double f1 = 2.0 / (M_PI * a);
|
38
|
+
double f2 = ln / 2.0;
|
39
|
+
double f3 = f1 + f2;
|
40
|
+
double f4 = 1.0 / a * ln;
|
41
|
+
return sign * sqrt(-f1 - f2 + sqrt(f3 * f3 - f4));
|
42
|
+
}
|
43
|
+
|
44
|
+
double normal_pdf(double x, double mean, double std_dev) {
|
45
|
+
double var = std_dev * std_dev;
|
46
|
+
return (1.0 / (var * sqrt(2.0 * M_PI))) * pow(M_E, -0.5 * pow((x - mean) / var, 2));
|
47
|
+
}
|
48
|
+
|
49
|
+
double normal_cdf(double x, double mean, double std_dev) {
|
50
|
+
return 0.5 * (1.0 + erf((x - mean) / (std_dev * std_dev * sqrt(2))));
|
51
|
+
}
|
52
|
+
|
53
|
+
double normal_ppf(double p, double mean, double std_dev) {
|
54
|
+
assert(p >= 0 && p <= 1);
|
55
|
+
|
56
|
+
return mean + (std_dev * std_dev) * sqrt(2) * inverse_erf(2.0 * p - 1.0);
|
57
|
+
}
|
58
|
+
|
59
|
+
double students_t_pdf(double x, unsigned int n) {
|
60
|
+
assert(n >= 1);
|
61
|
+
|
62
|
+
return tgamma((n + 1.0) / 2.0) / (sqrt(n * M_PI) * tgamma(n / 2.0)) * pow(1.0 + x * x / n, -(n + 1.0) / 2.0);
|
63
|
+
}
|
64
|
+
|
65
|
+
// Hill, G. W. (1970).
|
66
|
+
// Algorithm 395: Student's t-distribution.
|
67
|
+
// Communications of the ACM, 13(10), 617-619.
|
68
|
+
double students_t_cdf(double x, unsigned int n) {
|
69
|
+
assert(n >= 1);
|
70
|
+
|
71
|
+
double start = x < 0 ? 0 : 1;
|
72
|
+
double sign = x < 0 ? 1 : -1;
|
73
|
+
|
74
|
+
double z = 1.0;
|
75
|
+
double t = x * x;
|
76
|
+
double y = t / n;
|
77
|
+
double b = 1.0 + y;
|
78
|
+
|
79
|
+
if ((n >= 20 && t < n) || n > 200) {
|
80
|
+
// asymptotic series for large or noninteger n
|
81
|
+
if (y > 10e-6) {
|
82
|
+
y = log(b);
|
83
|
+
}
|
84
|
+
double a = n - 0.5;
|
85
|
+
b = 48.0 * a * a;
|
86
|
+
y = a * y;
|
87
|
+
y = (((((-0.4 * y - 3.3) * y - 24.0) * y - 85.5) / (0.8 * y * y + 100.0 + b) + y + 3.0) / b + 1.0) * sqrt(y);
|
88
|
+
return start + sign * normal_cdf(-y, 0.0, 1.0);
|
89
|
+
}
|
90
|
+
|
91
|
+
if (n < 20 && t < 4.0) {
|
92
|
+
// nested summation of cosine series
|
93
|
+
y = sqrt(y);
|
94
|
+
double a = y;
|
95
|
+
if (n == 1) {
|
96
|
+
a = 0.0;
|
97
|
+
}
|
98
|
+
|
99
|
+
// loop
|
100
|
+
if (n > 1) {
|
101
|
+
n -= 2;
|
102
|
+
while (n > 1) {
|
103
|
+
a = (n - 1) / (b * n) * a + y;
|
104
|
+
n -= 2;
|
105
|
+
}
|
106
|
+
}
|
107
|
+
a = n == 0 ? a / sqrt(b) : (atan(y) + a / b) * (2.0 / M_PI);
|
108
|
+
return start + sign * (z - a) / 2;
|
109
|
+
}
|
110
|
+
|
111
|
+
// tail series expanation for large t-values
|
112
|
+
double a = sqrt(b);
|
113
|
+
y = a * n;
|
114
|
+
int j = 0;
|
115
|
+
while (a != z) {
|
116
|
+
j += 2;
|
117
|
+
z = a;
|
118
|
+
y = y * (j - 1) / (b * j);
|
119
|
+
a = a + y / (n + j);
|
120
|
+
}
|
121
|
+
z = 0.0;
|
122
|
+
y = 0.0;
|
123
|
+
a = -a;
|
124
|
+
|
125
|
+
// loop (without n + 2 and n - 2)
|
126
|
+
while (n > 1) {
|
127
|
+
a = (n - 1) / (b * n) * a + y;
|
128
|
+
n -= 2;
|
129
|
+
}
|
130
|
+
a = n == 0 ? a / sqrt(b) : (atan(y) + a / b) * (2.0 / M_PI);
|
131
|
+
return start + sign * (z - a) / 2;
|
132
|
+
}
|
133
|
+
|
134
|
+
// Hill, G. W. (1970).
|
135
|
+
// Algorithm 396: Student's t-quantiles.
|
136
|
+
// Communications of the ACM, 13(10), 619-620.
|
137
|
+
double students_t_ppf(double p, unsigned int n) {
|
138
|
+
assert(p >= 0 && p <= 1);
|
139
|
+
assert(n >= 1);
|
140
|
+
|
141
|
+
// distribution is symmetric
|
142
|
+
double sign = p < 0.5 ? -1 : 1;
|
143
|
+
p = p < 0.5 ? 1 - p : p;
|
144
|
+
|
145
|
+
// two-tail to one-tail
|
146
|
+
p = 2.0 * (1.0 - p);
|
147
|
+
|
148
|
+
if (n == 2) {
|
149
|
+
return sign * sqrt(2.0 / (p * (2.0 - p)) - 2.0);
|
150
|
+
}
|
151
|
+
|
152
|
+
double half_pi = M_PI / 2.0;
|
153
|
+
|
154
|
+
if (n == 1) {
|
155
|
+
p = p * half_pi;
|
156
|
+
return sign * cos(p) / sin(p);
|
157
|
+
}
|
158
|
+
|
159
|
+
double a = 1.0 / (n - 0.5);
|
160
|
+
double b = 48.0 / (a * a);
|
161
|
+
double c = ((20700.0 * a / b - 98.0) * a - 16.0) * a + 96.36;
|
162
|
+
double d = ((94.5 / (b + c) - 3.0) / b + 1.0) * sqrt(a * half_pi) * n;
|
163
|
+
double x = d * p;
|
164
|
+
double y = pow(x, 2.0 / n);
|
165
|
+
if (y > 0.05 + a) {
|
166
|
+
// asymptotic inverse expansion about normal
|
167
|
+
x = normal_ppf(p * 0.5, 0.0, 1.0);
|
168
|
+
y = x * x;
|
169
|
+
if (n < 5) {
|
170
|
+
c += 0.3 * (n - 4.5) * (x + 0.6);
|
171
|
+
}
|
172
|
+
c = (((0.05 * d * x - 5.0) * x - 7.0) * x - 2.0) * x + b + c;
|
173
|
+
y = (((((0.4 * y + 6.3) * y + 36.0) * y + 94.5) / c - y - 3.0) / b + 1.0) * x;
|
174
|
+
y = a * y * y;
|
175
|
+
y = y > 0.002 ? exp(y) - 1.0 : 0.5 * y * y + y;
|
176
|
+
} else {
|
177
|
+
y = ((1.0 / (((n + 6.0) / (n * y) - 0.089 * d - 0.822) * (n + 2.0) * 3.0) + 0.5 / (n + 4.0)) * y - 1.0) * (n + 1.0) / (n + 2.0) + 1.0 / y;
|
178
|
+
}
|
179
|
+
return sign * sqrt(n * y);
|
180
|
+
}
|
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2021 Contributors
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
@@ -0,0 +1,24 @@
|
|
1
|
+
This is free and unencumbered software released into the public domain.
|
2
|
+
|
3
|
+
Anyone is free to copy, modify, publish, use, compile, sell, or
|
4
|
+
distribute this software, either in source code form or as a compiled
|
5
|
+
binary, for any purpose, commercial or non-commercial, and by any
|
6
|
+
means.
|
7
|
+
|
8
|
+
In jurisdictions that recognize copyright laws, the author or authors
|
9
|
+
of this software dedicate any and all copyright interest in the
|
10
|
+
software to the public domain. We make this dedication for the benefit
|
11
|
+
of the public at large and to the detriment of our heirs and
|
12
|
+
successors. We intend this dedication to be an overt act of
|
13
|
+
relinquishment in perpetuity of all present and future rights to this
|
14
|
+
software under copyright law.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
19
|
+
IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
20
|
+
OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
21
|
+
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
22
|
+
OTHER DEALINGS IN THE SOFTWARE.
|
23
|
+
|
24
|
+
For more information, please refer to <http://unlicense.org/>
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: anomaly_detection
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2022-01-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rice
|
@@ -37,15 +37,15 @@ files:
|
|
37
37
|
- README.md
|
38
38
|
- ext/anomaly_detection/anomaly_detection.cpp
|
39
39
|
- ext/anomaly_detection/anomaly_detection.hpp
|
40
|
-
- ext/anomaly_detection/
|
41
|
-
- ext/anomaly_detection/cdflib.hpp
|
40
|
+
- ext/anomaly_detection/dist.h
|
42
41
|
- ext/anomaly_detection/ext.cpp
|
43
42
|
- ext/anomaly_detection/extconf.rb
|
44
43
|
- ext/anomaly_detection/stl.hpp
|
45
44
|
- lib/anomaly_detection.rb
|
46
45
|
- lib/anomaly_detection/version.rb
|
46
|
+
- licenses/LICENSE-MIT-dist-h.txt
|
47
47
|
- licenses/LICENSE-MIT-stl-cpp.txt
|
48
|
-
- licenses/
|
48
|
+
- licenses/UNLICENSE-dist-h.txt
|
49
49
|
- licenses/UNLICENSE-stl-cpp.txt
|
50
50
|
homepage: https://github.com/ankane/AnomalyDetection.rb
|
51
51
|
licenses:
|
@@ -66,7 +66,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
66
66
|
- !ruby/object:Gem::Version
|
67
67
|
version: '0'
|
68
68
|
requirements: []
|
69
|
-
rubygems_version: 3.2.
|
69
|
+
rubygems_version: 3.2.32
|
70
70
|
signing_key:
|
71
71
|
specification_version: 4
|
72
72
|
summary: Time series anomaly detection for Ruby
|